diff --git a/saves-bloom-cosine/checkpoint-9480/config.json b/saves-bloom-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..57266dc701ccdcb97654ab407a925e296c45c5b8 --- /dev/null +++ b/saves-bloom-cosine/checkpoint-9480/config.json @@ -0,0 +1,25 @@ +{ + "apply_residual_connection_post_layernorm": false, + "architectures": [ + "BloomForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "bloom", + "n_head": 8, + "n_layer": 2, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "slow_but_exact": false, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-bloom-cosine/checkpoint-9480/generation_config.json b/saves-bloom-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-bloom-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-bloom-cosine/checkpoint-9480/model.safetensors b/saves-bloom-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eaa0bfdb6e28de325acaf14eebd2714ba0e43911 --- /dev/null +++ b/saves-bloom-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d93825cbb578680f828494c64549fb199ef64251a7ffb54729877458201c18d +size 8373336 diff --git a/saves-bloom-cosine/checkpoint-9480/optimizer.pt b/saves-bloom-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6bc02a5ed8156c168e4c915e439aab4f57134e26 --- /dev/null +++ b/saves-bloom-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:146a5df12627413cd1af645c180442ce37ccb8699ae75e3aebd4b4c7f9745f44 +size 16765063 diff --git a/saves-bloom-cosine/checkpoint-9480/rng_state.pth b/saves-bloom-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-bloom-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-bloom-cosine/checkpoint-9480/scheduler.pt b/saves-bloom-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03c145297021546d40e130546440641e02059bcb --- /dev/null +++ b/saves-bloom-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fd617624c087e1a286ed7cf3fa38baa4a8815e49f107c3186b4c7c58e1adbb +size 1064 diff --git a/saves-bloom-cosine/checkpoint-9480/special_tokens_map.json b/saves-bloom-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-bloom-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-bloom-cosine/checkpoint-9480/tokenizer.json b/saves-bloom-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-bloom-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-bloom-cosine/checkpoint-9480/tokenizer_config.json b/saves-bloom-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-bloom-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-bloom-cosine/checkpoint-9480/trainer_state.json b/saves-bloom-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c4ab5638f772485a5886c4513f7345163384460f --- /dev/null +++ b/saves-bloom-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.1361546516418457, + "learning_rate": 0.00015789473684210527, + "loss": 7.6497, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 0.908168613910675, + "learning_rate": 0.00031578947368421053, + "loss": 7.2777, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8880953192710876, + "learning_rate": 0.00047368421052631577, + "loss": 6.528, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.6118085980415344, + "learning_rate": 0.0006315789473684211, + "loss": 5.8181, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.33708152174949646, + "learning_rate": 0.0007894736842105263, + "loss": 5.3712, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.24235396087169647, + "learning_rate": 0.0009473684210526315, + "loss": 4.9669, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.17883867025375366, + "learning_rate": 0.0011052631578947368, + "loss": 4.6049, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.34269165992736816, + "learning_rate": 0.0012631578947368421, + "loss": 4.3748, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.25559672713279724, + "learning_rate": 0.0014210526315789472, + "loss": 4.203, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.2559225261211395, + "learning_rate": 0.0014999989494847376, + "loss": 4.0774, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.24822884798049927, + "learning_rate": 0.0014999905453802946, + "loss": 3.9568, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.4745056927204132, + "learning_rate": 0.0014999737372655805, + "loss": 3.8672, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.2829824984073639, + "learning_rate": 0.0014999485253289388, + "loss": 3.7831, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.3547530174255371, + "learning_rate": 0.0014999149098528814, + "loss": 3.7153, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.32553762197494507, + "learning_rate": 0.0014998728912140862, + "loss": 3.6379, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.29201358556747437, + "learning_rate": 0.0014998224698833922, + "loss": 3.5747, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.3410353660583496, + "learning_rate": 0.0014997636464257956, + "loss": 3.5088, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.5075426697731018, + "learning_rate": 0.0014996964215004416, + "loss": 3.473, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.3382231593132019, + "learning_rate": 0.0014996207958606182, + "loss": 3.4133, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.36965426802635193, + "learning_rate": 0.001499536770353748, + "loss": 3.3602, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.49166449904441833, + "learning_rate": 0.0014994443459213774, + "loss": 3.3143, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.34380143880844116, + "learning_rate": 0.001499343523599168, + "loss": 3.2807, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.5123799443244934, + "learning_rate": 0.0014992343045168823, + "loss": 3.2242, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.4475407004356384, + "learning_rate": 0.0014991166898983739, + "loss": 3.1804, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.6833670735359192, + "learning_rate": 0.001498990681061572, + "loss": 3.1403, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.5144447088241577, + "learning_rate": 0.001498856279418467, + "loss": 3.096, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.42934003472328186, + "learning_rate": 0.0014987134864750948, + "loss": 3.0594, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.5111614465713501, + "learning_rate": 0.0014985623038315206, + "loss": 3.0117, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.5440753102302551, + "learning_rate": 0.0014984027331818193, + "loss": 2.9799, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.5430408716201782, + "learning_rate": 0.0014982347763140584, + "loss": 2.9458, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.8532850742340088, + "learning_rate": 0.0014980584351102762, + "loss": 2.9085, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.48789384961128235, + "learning_rate": 0.001497873711546462, + "loss": 2.8788, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.46921592950820923, + "learning_rate": 0.0014976806076925334, + "loss": 2.8617, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.40585583448410034, + "learning_rate": 0.0014974791257123137, + "loss": 2.8296, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.8285185694694519, + "learning_rate": 0.001497269267863507, + "loss": 2.7758, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.5533679127693176, + "learning_rate": 0.0014970510364976724, + "loss": 2.7506, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.8483739495277405, + "learning_rate": 0.0014968244340601996, + "loss": 2.736, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.7598241567611694, + "learning_rate": 0.001496589463090279, + "loss": 2.7141, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.738063633441925, + "learning_rate": 0.001496346126220875, + "loss": 2.6895, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.6263159513473511, + "learning_rate": 0.0014960944261786966, + "loss": 2.6634, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.4596900939941406, + "learning_rate": 0.0014958343657841655, + "loss": 2.6408, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.4793330729007721, + "learning_rate": 0.001495565947951385, + "loss": 2.6084, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.7548810243606567, + "learning_rate": 0.0014952891756881085, + "loss": 2.5838, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.8852179050445557, + "learning_rate": 0.0014950040520957037, + "loss": 2.5606, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.5558875203132629, + "learning_rate": 0.0014947105803691204, + "loss": 2.5508, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.4711481034755707, + "learning_rate": 0.0014944087637968522, + "loss": 2.5245, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.510650098323822, + "learning_rate": 0.0014940986057609012, + "loss": 2.4975, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.8742207884788513, + "learning_rate": 0.0014937801097367396, + "loss": 2.499, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.7497596144676208, + "learning_rate": 0.001493453279293271, + "loss": 2.4649, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.4843575954437256, + "learning_rate": 0.0014931181180927902, + "loss": 2.4477, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.7055267095565796, + "learning_rate": 0.001492774629890942, + "loss": 2.4372, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 1.0991405248641968, + "learning_rate": 0.001492422818536679, + "loss": 2.4239, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.6026334762573242, + "learning_rate": 0.00149206268797222, + "loss": 2.4042, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.9058926701545715, + "learning_rate": 0.0014916942422330032, + "loss": 2.3768, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.5138038992881775, + "learning_rate": 0.001491317485447643, + "loss": 2.3767, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.5587601065635681, + "learning_rate": 0.0014909324218378838, + "loss": 2.3387, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.6543309092521667, + "learning_rate": 0.0014905390557185508, + "loss": 2.3423, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.6026796102523804, + "learning_rate": 0.0014901373914975036, + "loss": 2.3378, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.8843022584915161, + "learning_rate": 0.0014897274336755856, + "loss": 2.3148, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.7628885507583618, + "learning_rate": 0.001489309186846575, + "loss": 2.2917, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.5476912260055542, + "learning_rate": 0.0014888826556971313, + "loss": 2.2892, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.42697277665138245, + "learning_rate": 0.0014884478450067444, + "loss": 2.2752, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.4697224795818329, + "learning_rate": 0.0014880047596476807, + "loss": 2.2573, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.4894421398639679, + "learning_rate": 0.0014875534045849274, + "loss": 2.2651, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.6345561742782593, + "learning_rate": 0.0014870937848761388, + "loss": 2.2555, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.49453651905059814, + "learning_rate": 0.001486625905671578, + "loss": 2.2441, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.6118682026863098, + "learning_rate": 0.00148614977221406, + "loss": 2.2208, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.4619838297367096, + "learning_rate": 0.0014856653898388927, + "loss": 2.2133, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.5305221676826477, + "learning_rate": 0.001485172763973817, + "loss": 2.2258, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.47047242522239685, + "learning_rate": 0.0014846719001389466, + "loss": 2.1992, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 1.0235542058944702, + "learning_rate": 0.001484162803946705, + "loss": 2.1917, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.56936115026474, + "learning_rate": 0.0014836454811017635, + "loss": 2.1703, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.6495599746704102, + "learning_rate": 0.0014831199374009778, + "loss": 2.1679, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.49974289536476135, + "learning_rate": 0.0014825861787333208, + "loss": 2.1746, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.5245474576950073, + "learning_rate": 0.0014820442110798197, + "loss": 2.1507, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.5185036063194275, + "learning_rate": 0.0014814940405134865, + "loss": 2.1499, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.5844479203224182, + "learning_rate": 0.001480935673199251, + "loss": 2.1403, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.48124414682388306, + "learning_rate": 0.0014803691153938915, + "loss": 2.1389, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.7114575505256653, + "learning_rate": 0.0014797943734459653, + "loss": 2.1425, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.4797729551792145, + "learning_rate": 0.001479211453795736, + "loss": 2.1121, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.4670267105102539, + "learning_rate": 0.0014786203629751033, + "loss": 2.0947, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.447757363319397, + "learning_rate": 0.0014780211076075279, + "loss": 2.1016, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.5141759514808655, + "learning_rate": 0.0014774136944079594, + "loss": 2.1068, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.7842695713043213, + "learning_rate": 0.0014767981301827592, + "loss": 2.0852, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.48477619886398315, + "learning_rate": 0.0014761744218296249, + "loss": 2.0852, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.4969441294670105, + "learning_rate": 0.001475542576337513, + "loss": 2.0816, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.6644434332847595, + "learning_rate": 0.001474902600786561, + "loss": 2.0758, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.5549584627151489, + "learning_rate": 0.0014742545023480075, + "loss": 2.0705, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.44507768750190735, + "learning_rate": 0.0014735982882841117, + "loss": 2.0548, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.5654860138893127, + "learning_rate": 0.0014729339659480727, + "loss": 2.0569, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.4581911563873291, + "learning_rate": 0.0014722615427839468, + "loss": 2.0635, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.6122888922691345, + "learning_rate": 0.0014715810263265633, + "loss": 2.0475, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.7882494330406189, + "learning_rate": 0.0014708924242014423, + "loss": 2.0373, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.7913827896118164, + "learning_rate": 0.0014701957441247064, + "loss": 2.0423, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.6759977340698242, + "learning_rate": 0.0014694909939029959, + "loss": 2.024, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.4497184753417969, + "learning_rate": 0.0014687781814333814, + "loss": 2.0273, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.9482643008232117, + "learning_rate": 0.0014680573147032746, + "loss": 2.0239, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.5979325175285339, + "learning_rate": 0.0014673284017903392, + "loss": 2.0078, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.4397555887699127, + "learning_rate": 0.0014665914508624, + "loss": 2.0031, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.9002691507339478, + "learning_rate": 0.0014658464701773526, + "loss": 2.0123, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.9311442375183105, + "learning_rate": 0.0014650934680830688, + "loss": 2.0061, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.6002566814422607, + "learning_rate": 0.0014643324530173051, + "loss": 1.9991, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.6356918811798096, + "learning_rate": 0.0014635634335076067, + "loss": 1.989, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.6467710733413696, + "learning_rate": 0.001462786418171213, + "loss": 1.985, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.0886985063552856, + "learning_rate": 0.0014620014157149597, + "loss": 1.9903, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.7051685452461243, + "learning_rate": 0.001461208434935183, + "loss": 1.9818, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.4662516713142395, + "learning_rate": 0.0014604074847176197, + "loss": 1.9642, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.515107274055481, + "learning_rate": 0.0014595985740373082, + "loss": 1.9665, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.566654622554779, + "learning_rate": 0.0014587817119584873, + "loss": 1.9749, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.5290713310241699, + "learning_rate": 0.001457956907634496, + "loss": 1.9587, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.5899307727813721, + "learning_rate": 0.0014571241703076692, + "loss": 1.9641, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.4900335967540741, + "learning_rate": 0.0014562835093092348, + "loss": 1.9549, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.0779500007629395, + "learning_rate": 0.0014554349340592104, + "loss": 1.9446, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.7757524251937866, + "learning_rate": 0.001454578454066296, + "loss": 1.9566, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.7656589150428772, + "learning_rate": 0.0014537140789277678, + "loss": 1.94, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.5552901029586792, + "learning_rate": 0.0014528418183293716, + "loss": 1.9486, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.546013593673706, + "learning_rate": 0.001451961682045213, + "loss": 1.9385, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.4671438932418823, + "learning_rate": 0.001451073679937649, + "loss": 1.9205, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.9370329976081848, + "learning_rate": 0.0014501778219571766, + "loss": 1.9231, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.4605046510696411, + "learning_rate": 0.0014492741181423225, + "loss": 1.9354, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.4588429927825928, + "learning_rate": 0.0014483625786195285, + "loss": 1.93, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.8734654784202576, + "learning_rate": 0.0014474432136030405, + "loss": 1.9162, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.8143278956413269, + "learning_rate": 0.0014465160333947923, + "loss": 1.9052, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.5360792875289917, + "learning_rate": 0.0014455810483842908, + "loss": 1.919, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.6346741914749146, + "learning_rate": 0.0014446382690484997, + "loss": 1.9187, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.774360179901123, + "learning_rate": 0.0014436877059517215, + "loss": 1.9048, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.5729191899299622, + "learning_rate": 0.0014427293697454803, + "loss": 1.9038, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.48271700739860535, + "learning_rate": 0.001441763271168401, + "loss": 1.9108, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.5652008056640625, + "learning_rate": 0.00144078942104609, + "loss": 1.8999, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.5593475103378296, + "learning_rate": 0.001439807830291013, + "loss": 1.8875, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.4523153305053711, + "learning_rate": 0.0014388185099023744, + "loss": 1.8913, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.9170459508895874, + "learning_rate": 0.0014378214709659916, + "loss": 1.8905, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.7784819602966309, + "learning_rate": 0.0014368167246541733, + "loss": 1.8853, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.5773763060569763, + "learning_rate": 0.0014358042822255918, + "loss": 1.8851, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.8327829837799072, + "learning_rate": 0.0014347841550251597, + "loss": 1.8909, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.6381543874740601, + "learning_rate": 0.0014337563544838997, + "loss": 1.8805, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.5755815505981445, + "learning_rate": 0.001432720892118819, + "loss": 1.8754, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.6887590885162354, + "learning_rate": 0.0014316777795327794, + "loss": 1.8649, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.5535954833030701, + "learning_rate": 0.001430627028414366, + "loss": 1.8785, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.5511561036109924, + "learning_rate": 0.0014295686505377586, + "loss": 1.8601, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6784546375274658, + "learning_rate": 0.0014285026577625982, + "loss": 1.8624, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.46250203251838684, + "learning_rate": 0.0014274290620338542, + "loss": 1.8663, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.7906670570373535, + "learning_rate": 0.0014263478753816906, + "loss": 1.8584, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.5146434903144836, + "learning_rate": 0.0014252591099213326, + "loss": 1.8554, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 1.0950006246566772, + "learning_rate": 0.001424162777852928, + "loss": 1.8564, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.5953426361083984, + "learning_rate": 0.0014230588914614134, + "loss": 1.8485, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.5193158388137817, + "learning_rate": 0.0014219474631163745, + "loss": 1.8437, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.6556932330131531, + "learning_rate": 0.001420828505271909, + "loss": 1.8488, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.5962861776351929, + "learning_rate": 0.0014197020304664856, + "loss": 1.8471, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.46357786655426025, + "learning_rate": 0.0014185680513228048, + "loss": 1.8467, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.4353756010532379, + "learning_rate": 0.0014174265805476564, + "loss": 1.841, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.4645099639892578, + "learning_rate": 0.0014162776309317778, + "loss": 1.8449, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.5141316652297974, + "learning_rate": 0.0014151212153497108, + "loss": 1.8296, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6246196031570435, + "learning_rate": 0.0014139573467596561, + "loss": 1.8191, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.5215326547622681, + "learning_rate": 0.00141278603820333, + "loss": 1.8196, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.5152313113212585, + "learning_rate": 0.0014116073028058165, + "loss": 1.8215, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.5511691570281982, + "learning_rate": 0.0014104211537754217, + "loss": 1.811, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.6224981546401978, + "learning_rate": 0.001409227604403524, + "loss": 1.8241, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.5345869064331055, + "learning_rate": 0.0014080266680644277, + "loss": 1.8219, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.5171884298324585, + "learning_rate": 0.0014068183582152103, + "loss": 1.8211, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.548721432685852, + "learning_rate": 0.001405602688395574, + "loss": 1.8192, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.7188496589660645, + "learning_rate": 0.0014043796722276924, + "loss": 1.7978, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 1.0115805864334106, + "learning_rate": 0.0014031493234160591, + "loss": 1.8066, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.9728096723556519, + "learning_rate": 0.0014019116557473332, + "loss": 1.8252, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.5120858550071716, + "learning_rate": 0.0014006666830901854, + "loss": 1.8007, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.6093056201934814, + "learning_rate": 0.001399414419395142, + "loss": 1.794, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.7036264538764954, + "learning_rate": 0.0013981548786944293, + "loss": 1.8054, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.49233219027519226, + "learning_rate": 0.0013968880751018158, + "loss": 1.7936, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.48756009340286255, + "learning_rate": 0.0013956140228124545, + "loss": 1.7855, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.8501176238059998, + "learning_rate": 0.0013943327361027231, + "loss": 1.8065, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.4327588975429535, + "learning_rate": 0.0013930442293300649, + "loss": 1.7812, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.5454190373420715, + "learning_rate": 0.0013917485169328279, + "loss": 1.7814, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.5129348039627075, + "learning_rate": 0.0013904456134301016, + "loss": 1.7804, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.5207455158233643, + "learning_rate": 0.0013891355334215562, + "loss": 1.7872, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.6900990605354309, + "learning_rate": 0.0013878182915872776, + "loss": 1.808, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.44003963470458984, + "learning_rate": 0.001386493902687604, + "loss": 1.7829, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.43902549147605896, + "learning_rate": 0.00138516238156296, + "loss": 1.7846, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.5925940275192261, + "learning_rate": 0.0013838237431336895, + "loss": 1.7847, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.5813276767730713, + "learning_rate": 0.0013824780023998899, + "loss": 1.7833, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.9307818412780762, + "learning_rate": 0.0013811251744412431, + "loss": 1.7816, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.7600083351135254, + "learning_rate": 0.0013797652744168473, + "loss": 1.7716, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.5846881866455078, + "learning_rate": 0.0013783983175650457, + "loss": 1.7686, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.5641414523124695, + "learning_rate": 0.0013770243192032581, + "loss": 1.7659, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.5104548931121826, + "learning_rate": 0.0013756432947278064, + "loss": 1.7637, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.7146143317222595, + "learning_rate": 0.0013742552596137444, + "loss": 1.7683, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.6184601783752441, + "learning_rate": 0.0013728602294146833, + "loss": 1.7711, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.8336368799209595, + "learning_rate": 0.0013714582197626175, + "loss": 1.7746, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.46143412590026855, + "learning_rate": 0.0013700492463677501, + "loss": 1.7564, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.532975971698761, + "learning_rate": 0.0013686333250183154, + "loss": 1.7602, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.79651939868927, + "learning_rate": 0.001367210471580404, + "loss": 1.7577, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.48664984107017517, + "learning_rate": 0.0013657807019977835, + "loss": 1.758, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.6820895671844482, + "learning_rate": 0.0013643440322917198, + "loss": 1.7565, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.9615606665611267, + "learning_rate": 0.0013629004785607989, + "loss": 1.7572, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.4524003565311432, + "learning_rate": 0.0013614500569807445, + "loss": 1.7452, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.49840429425239563, + "learning_rate": 0.0013599927838042394, + "loss": 1.7489, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 1.0147011280059814, + "learning_rate": 0.0013585286753607408, + "loss": 1.7504, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.483129620552063, + "learning_rate": 0.0013570577480562986, + "loss": 1.7596, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.5301021337509155, + "learning_rate": 0.0013555800183733717, + "loss": 1.7408, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.5274137258529663, + "learning_rate": 0.0013540955028706425, + "loss": 1.7372, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.5680530071258545, + "learning_rate": 0.0013526042181828324, + "loss": 1.7373, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.69745272397995, + "learning_rate": 0.0013511061810205143, + "loss": 1.744, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.775166392326355, + "learning_rate": 0.001349601408169926, + "loss": 1.7526, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.5244166254997253, + "learning_rate": 0.0013480899164927823, + "loss": 1.7365, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.5409502983093262, + "learning_rate": 0.0013465717229260853, + "loss": 1.7379, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.5058172941207886, + "learning_rate": 0.001345046844481935, + "loss": 1.7349, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.5646553039550781, + "learning_rate": 0.0013435152982473396, + "loss": 1.7276, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.7753597497940063, + "learning_rate": 0.0013419771013840217, + "loss": 1.729, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.7563711404800415, + "learning_rate": 0.001340432271128229, + "loss": 1.733, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.6744264960289001, + "learning_rate": 0.0013388808247905381, + "loss": 1.7233, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.5431970357894897, + "learning_rate": 0.0013373227797556634, + "loss": 1.7336, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.5262954831123352, + "learning_rate": 0.00133575815348226, + "loss": 1.7207, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.448763906955719, + "learning_rate": 0.0013341869635027292, + "loss": 1.7204, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.5699217915534973, + "learning_rate": 0.001332609227423022, + "loss": 1.7231, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.8906753659248352, + "learning_rate": 0.0013310249629224417, + "loss": 1.7173, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.4379415810108185, + "learning_rate": 0.0013294341877534454, + "loss": 1.7231, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.4739811420440674, + "learning_rate": 0.0013278369197414458, + "loss": 1.7344, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.4546789824962616, + "learning_rate": 0.0013262331767846104, + "loss": 1.7187, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.47932279109954834, + "learning_rate": 0.0013246229768536628, + "loss": 1.7022, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.5030372142791748, + "learning_rate": 0.001323006337991679, + "loss": 1.7183, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.5990695357322693, + "learning_rate": 0.0013213832783138873, + "loss": 1.7157, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.6022274494171143, + "learning_rate": 0.0013197538160074633, + "loss": 1.7025, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.7774659395217896, + "learning_rate": 0.0013181179693313283, + "loss": 1.7191, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.863082766532898, + "learning_rate": 0.0013164757566159428, + "loss": 1.7104, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.8202386498451233, + "learning_rate": 0.001314827196263102, + "loss": 1.7061, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.9174307584762573, + "learning_rate": 0.0013131723067457302, + "loss": 1.7049, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.5396402478218079, + "learning_rate": 0.0013115111066076721, + "loss": 1.6945, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.541278064250946, + "learning_rate": 0.0013098436144634862, + "loss": 1.7272, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.6107963919639587, + "learning_rate": 0.0013081698489982364, + "loss": 1.7114, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.6348549127578735, + "learning_rate": 0.001306489828967282, + "loss": 1.7009, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.5911619663238525, + "learning_rate": 0.0013048035731960679, + "loss": 1.6979, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.5971229076385498, + "learning_rate": 0.0013031111005799133, + "loss": 1.6995, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.4567926824092865, + "learning_rate": 0.0013014124300838004, + "loss": 1.7054, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.4713601768016815, + "learning_rate": 0.0012997075807421612, + "loss": 1.6876, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.5646883249282837, + "learning_rate": 0.0012979965716586653, + "loss": 1.686, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.9336428642272949, + "learning_rate": 0.0012962794220060048, + "loss": 1.6851, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.5376665592193604, + "learning_rate": 0.0012945561510256801, + "loss": 1.6934, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.8239842653274536, + "learning_rate": 0.001292826778027784, + "loss": 1.6911, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.7588313817977905, + "learning_rate": 0.0012910913223907856, + "loss": 1.6883, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.8761573433876038, + "learning_rate": 0.0012893498035613123, + "loss": 1.6887, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.7116152048110962, + "learning_rate": 0.001287602241053933, + "loss": 1.6884, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.4818405210971832, + "learning_rate": 0.0012858486544509392, + "loss": 1.6917, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.7458882331848145, + "learning_rate": 0.0012840890634021249, + "loss": 1.6826, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.6218952536582947, + "learning_rate": 0.0012823234876245667, + "loss": 1.6809, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.8475202322006226, + "learning_rate": 0.0012805519469024035, + "loss": 1.6909, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6821154356002808, + "learning_rate": 0.0012787744610866143, + "loss": 1.6719, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.5371242165565491, + "learning_rate": 0.0012769910500947954, + "loss": 1.6878, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.5225316882133484, + "learning_rate": 0.0012752017339109376, + "loss": 1.6784, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.5158612728118896, + "learning_rate": 0.0012734065325852029, + "loss": 1.6814, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.5762125849723816, + "learning_rate": 0.0012716054662336987, + "loss": 1.6703, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.4870578348636627, + "learning_rate": 0.001269798555038252, + "loss": 1.6748, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.47660568356513977, + "learning_rate": 0.0012679858192461864, + "loss": 1.6742, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.6045103073120117, + "learning_rate": 0.0012661672791700906, + "loss": 1.6789, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.6843808889389038, + "learning_rate": 0.0012643429551875945, + "loss": 1.6716, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.5017849802970886, + "learning_rate": 0.0012625128677411388, + "loss": 1.6752, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.47625529766082764, + "learning_rate": 0.0012606770373377475, + "loss": 1.6705, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.5121369957923889, + "learning_rate": 0.0012588354845487959, + "loss": 1.6759, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.6447870135307312, + "learning_rate": 0.001256988230009783, + "loss": 1.6679, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.46388521790504456, + "learning_rate": 0.0012551352944200976, + "loss": 1.6661, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6746838688850403, + "learning_rate": 0.0012532766985427874, + "loss": 1.6722, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.5236440300941467, + "learning_rate": 0.0012514124632043272, + "loss": 1.6728, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.6455425024032593, + "learning_rate": 0.0012495426092943842, + "loss": 1.6698, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.8995099067687988, + "learning_rate": 0.0012476671577655845, + "loss": 1.6676, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 1.1469542980194092, + "learning_rate": 0.0012457861296332774, + "loss": 1.6603, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.7620439529418945, + "learning_rate": 0.001243899545975303, + "loss": 1.6716, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.5949313640594482, + "learning_rate": 0.0012420074279317515, + "loss": 1.6606, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.4743097424507141, + "learning_rate": 0.0012401097967047298, + "loss": 1.654, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.47151774168014526, + "learning_rate": 0.001238206673558122, + "loss": 1.6552, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.5160755515098572, + "learning_rate": 0.0012362980798173526, + "loss": 1.6547, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.5096105337142944, + "learning_rate": 0.0012343840368691462, + "loss": 1.6557, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.6104464530944824, + "learning_rate": 0.0012324645661612886, + "loss": 1.6492, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.855845034122467, + "learning_rate": 0.0012305396892023867, + "loss": 1.6585, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.424182653427124, + "learning_rate": 0.0012286094275616264, + "loss": 1.6553, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.5019987225532532, + "learning_rate": 0.0012266738028685318, + "loss": 1.6343, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 1.1053872108459473, + "learning_rate": 0.001224732836812723, + "loss": 1.6509, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.7308480143547058, + "learning_rate": 0.0012227865511436724, + "loss": 1.6543, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.451983779668808, + "learning_rate": 0.001220834967670461, + "loss": 1.6637, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.4617938995361328, + "learning_rate": 0.0012188781082615346, + "loss": 1.6566, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.5226640105247498, + "learning_rate": 0.0012169159948444588, + "loss": 1.6494, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.6613280177116394, + "learning_rate": 0.001214948649405672, + "loss": 1.6504, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.5911545753479004, + "learning_rate": 0.0012129760939902407, + "loss": 1.6513, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.53304123878479, + "learning_rate": 0.0012109983507016114, + "loss": 1.6499, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.6235332489013672, + "learning_rate": 0.0012090154417013636, + "loss": 1.6537, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 1.1931818723678589, + "learning_rate": 0.0012070273892089605, + "loss": 1.6297, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.9852569699287415, + "learning_rate": 0.0012050342155015012, + "loss": 1.6325, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.5030422806739807, + "learning_rate": 0.0012030359429134707, + "loss": 1.6376, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.6530547142028809, + "learning_rate": 0.0012010325938364883, + "loss": 1.6404, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 1.1742103099822998, + "learning_rate": 0.0011990241907190592, + "loss": 1.6437, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.574685275554657, + "learning_rate": 0.001197010756066321, + "loss": 1.626, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.5830341577529907, + "learning_rate": 0.0011949923124397917, + "loss": 1.6341, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.484795480966568, + "learning_rate": 0.001192968882457118, + "loss": 1.6394, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 1.126344919204712, + "learning_rate": 0.001190940488791821, + "loss": 1.6474, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.5768723487854004, + "learning_rate": 0.0011889071541730419, + "loss": 1.6351, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.5678271651268005, + "learning_rate": 0.001186868901385288, + "loss": 1.6264, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.49463793635368347, + "learning_rate": 0.001184825753268177, + "loss": 1.6308, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.9706162810325623, + "learning_rate": 0.0011827777327161814, + "loss": 1.6408, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.9023301601409912, + "learning_rate": 0.0011807248626783714, + "loss": 1.6304, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.5531619191169739, + "learning_rate": 0.0011786671661581584, + "loss": 1.6191, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.5522653460502625, + "learning_rate": 0.001176604666213036, + "loss": 1.6213, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.713241457939148, + "learning_rate": 0.0011745373859543236, + "loss": 1.639, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.4535176753997803, + "learning_rate": 0.0011724653485469063, + "loss": 1.6168, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.6761485934257507, + "learning_rate": 0.0011703885772089743, + "loss": 1.6242, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.970213770866394, + "learning_rate": 0.0011683070952117646, + "loss": 1.624, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.5876306295394897, + "learning_rate": 0.0011662209258792998, + "loss": 1.6137, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.5551946759223938, + "learning_rate": 0.0011641300925881257, + "loss": 1.6243, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.5187597870826721, + "learning_rate": 0.0011620346187670501, + "loss": 1.6191, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5157437920570374, + "learning_rate": 0.0011599345278968806, + "loss": 1.6302, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.47498974204063416, + "learning_rate": 0.0011578298435101604, + "loss": 1.6186, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 1.1298749446868896, + "learning_rate": 0.0011557205891909062, + "loss": 1.607, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.46439072489738464, + "learning_rate": 0.0011536067885743423, + "loss": 1.6239, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.5722910165786743, + "learning_rate": 0.001151488465346637, + "loss": 1.6094, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.6602784395217896, + "learning_rate": 0.0011493656432446362, + "loss": 1.6138, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.49073052406311035, + "learning_rate": 0.0011472383460555983, + "loss": 1.6163, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6607328653335571, + "learning_rate": 0.001145106597616927, + "loss": 1.6252, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.9633255004882812, + "learning_rate": 0.001142970421815904, + "loss": 1.6057, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.437183678150177, + "learning_rate": 0.0011408298425894226, + "loss": 1.6151, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.46504244208335876, + "learning_rate": 0.0011386848839237186, + "loss": 1.6137, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.5309893488883972, + "learning_rate": 0.0011365355698541005, + "loss": 1.6169, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.5407806038856506, + "learning_rate": 0.0011343819244646824, + "loss": 1.6134, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.4959893524646759, + "learning_rate": 0.001132223971888112, + "loss": 1.612, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.6287081837654114, + "learning_rate": 0.0011300617363053024, + "loss": 1.6025, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.7256467342376709, + "learning_rate": 0.0011278952419451586, + "loss": 1.6203, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.5274771451950073, + "learning_rate": 0.0011257245130843077, + "loss": 1.611, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.6178369522094727, + "learning_rate": 0.0011235495740468265, + "loss": 1.6042, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.4739209711551666, + "learning_rate": 0.0011213704492039694, + "loss": 1.5843, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.4660987854003906, + "learning_rate": 0.001119187162973894, + "loss": 1.6075, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.4897654056549072, + "learning_rate": 0.001116999739821388, + "loss": 1.5964, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.5139145851135254, + "learning_rate": 0.0011148082042575968, + "loss": 1.6137, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.6091996431350708, + "learning_rate": 0.0011126125808397461, + "loss": 1.6136, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.4915454387664795, + "learning_rate": 0.0011104128941708683, + "loss": 1.5932, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.8600702881813049, + "learning_rate": 0.001108209168899527, + "loss": 1.6074, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.6012086868286133, + "learning_rate": 0.0011060014297195396, + "loss": 1.6054, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.7052494883537292, + "learning_rate": 0.0011037897013697015, + "loss": 1.6105, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.6381321549415588, + "learning_rate": 0.0011015740086335092, + "loss": 1.6029, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.5391980409622192, + "learning_rate": 0.0010993543763388814, + "loss": 1.6018, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.47316044569015503, + "learning_rate": 0.0010971308293578814, + "loss": 1.5906, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.4353296160697937, + "learning_rate": 0.0010949033926064397, + "loss": 1.5893, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.4708898365497589, + "learning_rate": 0.0010926720910440725, + "loss": 1.6018, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.5538137555122375, + "learning_rate": 0.001090436949673603, + "loss": 1.5953, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.4721989035606384, + "learning_rate": 0.0010881979935408815, + "loss": 1.5876, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.766671895980835, + "learning_rate": 0.0010859552477345052, + "loss": 1.6006, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.5119830369949341, + "learning_rate": 0.001083708737385536, + "loss": 1.5961, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5428466796875, + "learning_rate": 0.0010814584876672187, + "loss": 1.5788, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.6007835865020752, + "learning_rate": 0.0010792045237947008, + "loss": 1.5947, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.6104546785354614, + "learning_rate": 0.0010769468710247478, + "loss": 1.59, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.4558543264865875, + "learning_rate": 0.0010746855546554612, + "loss": 1.5862, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.49403393268585205, + "learning_rate": 0.0010724206000259954, + "loss": 1.5795, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.547333836555481, + "learning_rate": 0.0010701520325162727, + "loss": 1.5939, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.4920315444469452, + "learning_rate": 0.0010678798775467001, + "loss": 1.5966, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 1.1386079788208008, + "learning_rate": 0.0010656041605778832, + "loss": 1.5955, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.5450473427772522, + "learning_rate": 0.001063324907110342, + "loss": 1.5729, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.5650832653045654, + "learning_rate": 0.0010610421426842241, + "loss": 1.5917, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.5141381621360779, + "learning_rate": 0.00105875589287902, + "loss": 1.5911, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.910280168056488, + "learning_rate": 0.0010564661833132752, + "loss": 1.5994, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.9338151812553406, + "learning_rate": 0.001054173039644303, + "loss": 1.5933, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.5695720911026001, + "learning_rate": 0.0010518764875678981, + "loss": 1.5809, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.6266768574714661, + "learning_rate": 0.001049576552818048, + "loss": 1.5838, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.6025685667991638, + "learning_rate": 0.0010472732611666448, + "loss": 1.5797, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.6261733770370483, + "learning_rate": 0.0010449666384231954, + "loss": 1.5781, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.48340892791748047, + "learning_rate": 0.0010426567104345346, + "loss": 1.5916, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.49008363485336304, + "learning_rate": 0.0010403435030845332, + "loss": 1.574, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 1.1611214876174927, + "learning_rate": 0.0010380270422938093, + "loss": 1.5838, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.9247241616249084, + "learning_rate": 0.0010357073540194362, + "loss": 1.573, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.6713014245033264, + "learning_rate": 0.001033384464254655, + "loss": 1.5775, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.5715181827545166, + "learning_rate": 0.001031058399028579, + "loss": 1.5749, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.5150052309036255, + "learning_rate": 0.001028729184405905, + "loss": 1.5846, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.5131828784942627, + "learning_rate": 0.0010263968464866201, + "loss": 1.5758, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.5704460740089417, + "learning_rate": 0.0010240614114057098, + "loss": 1.5706, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.535331130027771, + "learning_rate": 0.001021722905332864, + "loss": 1.5751, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.6319772601127625, + "learning_rate": 0.0010193813544721855, + "loss": 1.5818, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.8062748312950134, + "learning_rate": 0.001017036785061895, + "loss": 1.586, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.49930211901664734, + "learning_rate": 0.0010146892233740376, + "loss": 1.5723, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.4763012230396271, + "learning_rate": 0.0010123386957141883, + "loss": 1.5535, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.86577969789505, + "learning_rate": 0.0010099852284211573, + "loss": 1.5716, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.5172073841094971, + "learning_rate": 0.0010076288478666944, + "loss": 1.5707, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.5007083415985107, + "learning_rate": 0.0010052695804551946, + "loss": 1.5751, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.6111957430839539, + "learning_rate": 0.0010029074526234014, + "loss": 1.5652, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.6261873841285706, + "learning_rate": 0.0010005424908401104, + "loss": 1.5806, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5061927437782288, + "learning_rate": 0.0009981747216058728, + "loss": 1.5665, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.49627190828323364, + "learning_rate": 0.0009958041714526998, + "loss": 1.5696, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.6029568314552307, + "learning_rate": 0.0009934308669437627, + "loss": 1.5745, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.47557276487350464, + "learning_rate": 0.0009910548346730972, + "loss": 1.5624, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5486270785331726, + "learning_rate": 0.0009886761012653062, + "loss": 1.552, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.6182465553283691, + "learning_rate": 0.000986294693375258, + "loss": 1.5581, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.47843965888023376, + "learning_rate": 0.000983910637687791, + "loss": 1.574, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.5410100221633911, + "learning_rate": 0.0009815239609174138, + "loss": 1.5578, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.9322824478149414, + "learning_rate": 0.0009791346898080043, + "loss": 1.5709, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.5444523692131042, + "learning_rate": 0.0009767428511325122, + "loss": 1.5607, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.48717835545539856, + "learning_rate": 0.0009743484716926576, + "loss": 1.5495, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.6598711013793945, + "learning_rate": 0.0009719515783186319, + "loss": 1.5493, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.6580759882926941, + "learning_rate": 0.0009695521978687951, + "loss": 1.5531, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.5247423648834229, + "learning_rate": 0.0009671503572293767, + "loss": 1.5563, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.5178701877593994, + "learning_rate": 0.0009647460833141742, + "loss": 1.5511, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.5494204759597778, + "learning_rate": 0.0009623394030642507, + "loss": 1.5573, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.7308394908905029, + "learning_rate": 0.0009599303434476334, + "loss": 1.5499, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 1.116909146308899, + "learning_rate": 0.0009575189314590118, + "loss": 1.5572, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.5184165835380554, + "learning_rate": 0.0009551051941194346, + "loss": 1.5589, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.6424084305763245, + "learning_rate": 0.0009526891584760071, + "loss": 1.5447, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.46564581990242004, + "learning_rate": 0.0009502708516015889, + "loss": 1.5557, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.5686947703361511, + "learning_rate": 0.0009478503005944888, + "loss": 1.5433, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.5193183422088623, + "learning_rate": 0.0009454275325781632, + "loss": 1.5623, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.5051531791687012, + "learning_rate": 0.0009430025747009104, + "loss": 1.5481, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.48578837513923645, + "learning_rate": 0.0009405754541355677, + "loss": 1.5489, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.48167306184768677, + "learning_rate": 0.0009381461980792061, + "loss": 1.5411, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.6252226233482361, + "learning_rate": 0.0009357148337528256, + "loss": 1.5525, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.6204479336738586, + "learning_rate": 0.0009332813884010511, + "loss": 1.5524, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.4954337775707245, + "learning_rate": 0.0009308458892918259, + "loss": 1.5567, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.625588595867157, + "learning_rate": 0.0009284083637161064, + "loss": 1.5512, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.5604734420776367, + "learning_rate": 0.0009259688389875574, + "loss": 1.5569, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.521360456943512, + "learning_rate": 0.0009235273424422442, + "loss": 1.5519, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.4531897008419037, + "learning_rate": 0.0009210839014383282, + "loss": 1.5469, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.47998490929603577, + "learning_rate": 0.0009186385433557584, + "loss": 1.5399, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.8988192081451416, + "learning_rate": 0.0009161912955959668, + "loss": 1.5473, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.5566585063934326, + "learning_rate": 0.000913742185581559, + "loss": 1.5383, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.6921567320823669, + "learning_rate": 0.0009112912407560086, + "loss": 1.5394, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.5351143479347229, + "learning_rate": 0.0009088384885833495, + "loss": 1.5453, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.5496554374694824, + "learning_rate": 0.000906383956547867, + "loss": 1.5375, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.47633862495422363, + "learning_rate": 0.0009039276721537915, + "loss": 1.5394, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5072779059410095, + "learning_rate": 0.0009014696629249886, + "loss": 1.5341, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.7401310205459595, + "learning_rate": 0.0008990099564046522, + "loss": 1.5441, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.5734917521476746, + "learning_rate": 0.0008965485801549946, + "loss": 1.5435, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.4642139971256256, + "learning_rate": 0.000894085561756939, + "loss": 1.5331, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.7488202452659607, + "learning_rate": 0.0008916209288098088, + "loss": 1.5402, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.5966841578483582, + "learning_rate": 0.0008891547089310198, + "loss": 1.5368, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.6754875183105469, + "learning_rate": 0.0008866869297557699, + "loss": 1.5364, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.5848161578178406, + "learning_rate": 0.0008842176189367299, + "loss": 1.5403, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.7788020372390747, + "learning_rate": 0.0008817468041437329, + "loss": 1.5455, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.46313008666038513, + "learning_rate": 0.0008792745130634654, + "loss": 1.5328, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.5829064846038818, + "learning_rate": 0.0008768007733991561, + "loss": 1.5294, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.4733564257621765, + "learning_rate": 0.0008743256128702658, + "loss": 1.5399, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.6273133158683777, + "learning_rate": 0.0008718490592121768, + "loss": 1.5368, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.45090365409851074, + "learning_rate": 0.0008693711401758822, + "loss": 1.5354, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.128738522529602, + "learning_rate": 0.0008668918835276747, + "loss": 1.5302, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.5392206311225891, + "learning_rate": 0.0008644113170488355, + "loss": 1.5375, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.6845002174377441, + "learning_rate": 0.0008619294685353235, + "loss": 1.5274, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.8332899808883667, + "learning_rate": 0.0008594463657974627, + "loss": 1.5286, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.7652190327644348, + "learning_rate": 0.0008569620366596322, + "loss": 1.5463, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.6484683752059937, + "learning_rate": 0.000854476508959953, + "loss": 1.5245, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.519527792930603, + "learning_rate": 0.0008519898105499762, + "loss": 1.5303, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.4845265746116638, + "learning_rate": 0.0008495019692943721, + "loss": 1.5235, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.48636671900749207, + "learning_rate": 0.0008470130130706166, + "loss": 1.529, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.8035237193107605, + "learning_rate": 0.0008445229697686795, + "loss": 1.5423, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.5630937814712524, + "learning_rate": 0.0008420318672907119, + "loss": 1.5273, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.7922337055206299, + "learning_rate": 0.0008395397335507334, + "loss": 1.5282, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.48462677001953125, + "learning_rate": 0.0008370465964743196, + "loss": 1.526, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.49322086572647095, + "learning_rate": 0.0008345524839982886, + "loss": 1.533, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.47821831703186035, + "learning_rate": 0.0008320574240703886, + "loss": 1.5191, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.4673146903514862, + "learning_rate": 0.0008295614446489842, + "loss": 1.5279, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.5318535566329956, + "learning_rate": 0.0008270645737027441, + "loss": 1.531, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.7749707698822021, + "learning_rate": 0.0008245668392103259, + "loss": 1.5219, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.7400705218315125, + "learning_rate": 0.0008220682691600645, + "loss": 1.5198, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.4604952335357666, + "learning_rate": 0.0008195688915496571, + "loss": 1.5253, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.6762343049049377, + "learning_rate": 0.0008170687343858506, + "loss": 1.5081, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.4940911531448364, + "learning_rate": 0.0008145678256841265, + "loss": 1.5261, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.5079637169837952, + "learning_rate": 0.0008120661934683879, + "loss": 1.5261, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.6899538636207581, + "learning_rate": 0.0008095638657706456, + "loss": 1.524, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.7249855995178223, + "learning_rate": 0.000807060870630703, + "loss": 1.5169, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.5128796100616455, + "learning_rate": 0.000804557236095843, + "loss": 1.4958, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.5689982771873474, + "learning_rate": 0.0008020529902205129, + "loss": 1.5212, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.48652711510658264, + "learning_rate": 0.0007995481610660108, + "loss": 1.5055, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.6480730175971985, + "learning_rate": 0.0007970427767001702, + "loss": 1.523, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.6398733854293823, + "learning_rate": 0.0007945368651970464, + "loss": 1.525, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.4856146574020386, + "learning_rate": 0.0007920304546366013, + "loss": 1.5048, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.5048703551292419, + "learning_rate": 0.000789523573104389, + "loss": 1.519, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.4923306107521057, + "learning_rate": 0.0007870162486912414, + "loss": 1.5219, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.502138078212738, + "learning_rate": 0.0007845085094929527, + "loss": 1.523, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.5037270188331604, + "learning_rate": 0.0007820003836099649, + "loss": 1.5126, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.5258697271347046, + "learning_rate": 0.0007794918991470537, + "loss": 1.5105, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5824125409126282, + "learning_rate": 0.0007769830842130119, + "loss": 1.507, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.6700602173805237, + "learning_rate": 0.0007744739669203361, + "loss": 1.505, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.5182927846908569, + "learning_rate": 0.0007719645753849108, + "loss": 1.5141, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.463286429643631, + "learning_rate": 0.0007694549377256932, + "loss": 1.5154, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.9340118169784546, + "learning_rate": 0.0007669450820643987, + "loss": 1.5101, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.5239022970199585, + "learning_rate": 0.0007644350365251855, + "loss": 1.5085, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.531623363494873, + "learning_rate": 0.0007619248292343399, + "loss": 1.5059, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.5866503715515137, + "learning_rate": 0.0007594144883199599, + "loss": 1.5241, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.551724910736084, + "learning_rate": 0.0007569040419116413, + "loss": 1.508, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.5427130460739136, + "learning_rate": 0.000754393518140162, + "loss": 1.5053, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.4989750385284424, + "learning_rate": 0.0007518829451371665, + "loss": 1.5182, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.470165878534317, + "learning_rate": 0.0007493723510348516, + "loss": 1.504, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.7496804594993591, + "learning_rate": 0.0007468617639656496, + "loss": 1.5051, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.5750884413719177, + "learning_rate": 0.0007443512120619144, + "loss": 1.5032, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.5860179662704468, + "learning_rate": 0.0007418407234556067, + "loss": 1.5243, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.581038773059845, + "learning_rate": 0.0007393303262779767, + "loss": 1.5039, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.47394222021102905, + "learning_rate": 0.0007368200486592507, + "loss": 1.4983, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.5796689391136169, + "learning_rate": 0.0007343099187283149, + "loss": 1.515, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.6724705696105957, + "learning_rate": 0.0007317999646124011, + "loss": 1.5008, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.6664035320281982, + "learning_rate": 0.0007292902144367704, + "loss": 1.493, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.9945948719978333, + "learning_rate": 0.0007267806963243995, + "loss": 1.5157, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.8466383218765259, + "learning_rate": 0.0007242714383956639, + "loss": 1.5047, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.48031893372535706, + "learning_rate": 0.000721762468768024, + "loss": 1.5102, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.5278339385986328, + "learning_rate": 0.0007192538155557094, + "loss": 1.5052, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.7497695088386536, + "learning_rate": 0.0007167455068694046, + "loss": 1.4956, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.7196361422538757, + "learning_rate": 0.000714237570815933, + "loss": 1.5035, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.49846696853637695, + "learning_rate": 0.0007117300354979423, + "loss": 1.5002, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.5527176856994629, + "learning_rate": 0.000709222929013591, + "loss": 1.5091, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.5904789566993713, + "learning_rate": 0.0007067162794562309, + "loss": 1.4891, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.7089250683784485, + "learning_rate": 0.0007042101149140943, + "loss": 1.4979, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.5322713851928711, + "learning_rate": 0.0007017044634699787, + "loss": 1.4927, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.6182569861412048, + "learning_rate": 0.0006991993532009319, + "loss": 1.4916, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.5848808884620667, + "learning_rate": 0.0006966948121779378, + "loss": 1.5061, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.4910695552825928, + "learning_rate": 0.000694190868465601, + "loss": 1.4983, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5071682929992676, + "learning_rate": 0.0006916875501218343, + "loss": 1.4931, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.4766605496406555, + "learning_rate": 0.0006891848851975416, + "loss": 1.4752, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.45955702662467957, + "learning_rate": 0.0006866829017363054, + "loss": 1.5053, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.501193642616272, + "learning_rate": 0.0006841816277740722, + "loss": 1.4923, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.6511161923408508, + "learning_rate": 0.0006816810913388379, + "loss": 1.5066, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.5442824363708496, + "learning_rate": 0.0006791813204503342, + "loss": 1.4916, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.5160462856292725, + "learning_rate": 0.0006766823431197147, + "loss": 1.5036, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.5459616780281067, + "learning_rate": 0.0006741841873492406, + "loss": 1.4942, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 1.007577896118164, + "learning_rate": 0.0006716868811319671, + "loss": 1.4822, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.9401910901069641, + "learning_rate": 0.0006691904524514297, + "loss": 1.4961, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.8771179914474487, + "learning_rate": 0.0006666949292813306, + "loss": 1.4882, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.70003342628479, + "learning_rate": 0.0006642003395852258, + "loss": 1.4858, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.5451027154922485, + "learning_rate": 0.0006617067113162103, + "loss": 1.5005, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.4791337251663208, + "learning_rate": 0.0006592140724166073, + "loss": 1.4988, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5083988308906555, + "learning_rate": 0.0006567224508176523, + "loss": 1.4902, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5682942271232605, + "learning_rate": 0.0006542318744391821, + "loss": 1.4896, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.6583775877952576, + "learning_rate": 0.0006517423711893209, + "loss": 1.5011, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.5080510377883911, + "learning_rate": 0.0006492539689641685, + "loss": 1.4832, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.8768990635871887, + "learning_rate": 0.0006467666956474865, + "loss": 1.4948, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.7748043537139893, + "learning_rate": 0.0006442805791103873, + "loss": 1.4788, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.45537272095680237, + "learning_rate": 0.0006417956472110205, + "loss": 1.4894, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.7094871401786804, + "learning_rate": 0.0006393119277942614, + "loss": 1.4837, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.5354472398757935, + "learning_rate": 0.0006368294486913987, + "loss": 1.491, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.4849853217601776, + "learning_rate": 0.0006343482377198232, + "loss": 1.4821, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.4794313609600067, + "learning_rate": 0.0006318683226827151, + "loss": 1.4897, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.6370975375175476, + "learning_rate": 0.0006293897313687331, + "loss": 1.4967, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.6387209892272949, + "learning_rate": 0.0006269124915517037, + "loss": 1.486, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.5333986282348633, + "learning_rate": 0.0006244366309903084, + "loss": 1.4849, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.5619298815727234, + "learning_rate": 0.0006219621774277737, + "loss": 1.4897, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.5637062191963196, + "learning_rate": 0.00061948915859156, + "loss": 1.4763, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.5141758918762207, + "learning_rate": 0.0006170176021930509, + "loss": 1.4759, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.490723192691803, + "learning_rate": 0.0006145475359272424, + "loss": 1.484, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.5165253281593323, + "learning_rate": 0.0006120789874724336, + "loss": 1.48, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.5606610178947449, + "learning_rate": 0.0006096119844899151, + "loss": 1.4716, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.4859774708747864, + "learning_rate": 0.0006071465546236601, + "loss": 1.481, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.6043248772621155, + "learning_rate": 0.0006046827255000135, + "loss": 1.4837, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.5425053238868713, + "learning_rate": 0.0006022205247273845, + "loss": 1.4712, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.5926060080528259, + "learning_rate": 0.0005997599798959343, + "loss": 1.4746, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.5091972947120667, + "learning_rate": 0.0005973011185772694, + "loss": 1.4755, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.4799787104129791, + "learning_rate": 0.0005948439683241318, + "loss": 1.4782, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.46803295612335205, + "learning_rate": 0.0005923885566700896, + "loss": 1.473, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.5348048210144043, + "learning_rate": 0.0005899349111292293, + "loss": 1.4695, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.5067234635353088, + "learning_rate": 0.0005874830591958474, + "loss": 1.469, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.5505863428115845, + "learning_rate": 0.000585033028344142, + "loss": 1.4743, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.49517422914505005, + "learning_rate": 0.0005825848460279048, + "loss": 1.4754, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.5773853659629822, + "learning_rate": 0.0005801385396802146, + "loss": 1.477, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.7006990313529968, + "learning_rate": 0.0005776941367131282, + "loss": 1.4793, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.7622592449188232, + "learning_rate": 0.0005752516645173745, + "loss": 1.482, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.528407871723175, + "learning_rate": 0.0005728111504620472, + "loss": 1.4717, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.48450398445129395, + "learning_rate": 0.0005703726218942976, + "loss": 1.4793, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.6169544458389282, + "learning_rate": 0.0005679361061390295, + "loss": 1.4735, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.5328255891799927, + "learning_rate": 0.0005655016304985908, + "loss": 1.4632, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.5129302740097046, + "learning_rate": 0.0005630692222524709, + "loss": 1.464, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.5741155743598938, + "learning_rate": 0.0005606389086569911, + "loss": 1.4801, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.48890742659568787, + "learning_rate": 0.0005582107169450023, + "loss": 1.4838, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5709759593009949, + "learning_rate": 0.0005557846743255783, + "loss": 1.4668, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.5346205830574036, + "learning_rate": 0.0005533608079837109, + "loss": 1.4657, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.6232300996780396, + "learning_rate": 0.0005509391450800061, + "loss": 1.4701, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.5745095014572144, + "learning_rate": 0.0005485197127503795, + "loss": 1.4647, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.58150714635849, + "learning_rate": 0.0005461025381057516, + "loss": 1.4695, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.9152214527130127, + "learning_rate": 0.0005436876482317444, + "loss": 1.4783, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.49668022990226746, + "learning_rate": 0.0005412750701883782, + "loss": 1.4727, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.5094896554946899, + "learning_rate": 0.0005388648310097682, + "loss": 1.4798, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.6089109182357788, + "learning_rate": 0.000536456957703821, + "loss": 1.4738, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.6121323108673096, + "learning_rate": 0.0005340514772519324, + "loss": 1.4677, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.51023930311203, + "learning_rate": 0.0005316484166086863, + "loss": 1.4761, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.4947879910469055, + "learning_rate": 0.00052924780270155, + "loss": 1.478, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.6367293000221252, + "learning_rate": 0.0005268496624305747, + "loss": 1.4539, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.512224018573761, + "learning_rate": 0.0005244540226680931, + "loss": 1.4594, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.5635572075843811, + "learning_rate": 0.0005220609102584185, + "loss": 1.4645, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5069919228553772, + "learning_rate": 0.0005196703520175437, + "loss": 1.4672, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.5056061744689941, + "learning_rate": 0.0005172823747328415, + "loss": 1.4643, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.5348817110061646, + "learning_rate": 0.0005148970051627632, + "loss": 1.462, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.5328285098075867, + "learning_rate": 0.0005125142700365394, + "loss": 1.437, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.48856961727142334, + "learning_rate": 0.000510134196053881, + "loss": 1.4457, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.6688364148139954, + "learning_rate": 0.0005077568098846789, + "loss": 1.4514, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.47691768407821655, + "learning_rate": 0.000505382138168706, + "loss": 1.4585, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.4656788408756256, + "learning_rate": 0.0005030102075153181, + "loss": 1.4551, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.5498069524765015, + "learning_rate": 0.0005006410445031569, + "loss": 1.4583, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.632783830165863, + "learning_rate": 0.0004982746756798507, + "loss": 1.4639, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.5301288962364197, + "learning_rate": 0.0004959111275617174, + "loss": 1.4778, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.4994509220123291, + "learning_rate": 0.0004935504266334677, + "loss": 1.4593, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.5345269441604614, + "learning_rate": 0.0004911925993479085, + "loss": 1.4705, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.5767863988876343, + "learning_rate": 0.0004888376721256456, + "loss": 1.4752, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.7049380540847778, + "learning_rate": 0.00048648567135478805, + "loss": 1.4632, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.49564504623413086, + "learning_rate": 0.0004841366233906538, + "loss": 1.4551, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.4838204085826874, + "learning_rate": 0.0004817905545554717, + "loss": 1.4668, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.7612228393554688, + "learning_rate": 0.00047944749113808884, + "loss": 1.4552, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.4583361744880676, + "learning_rate": 0.00047710745939367474, + "loss": 1.4561, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.5316163301467896, + "learning_rate": 0.0004747704855434278, + "loss": 1.4491, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.4847448468208313, + "learning_rate": 0.0004724365957742809, + "loss": 1.4476, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.0385831594467163, + "learning_rate": 0.00047010581623860883, + "loss": 1.4501, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.829448401927948, + "learning_rate": 0.0004677781730539342, + "loss": 1.4587, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.5561820268630981, + "learning_rate": 0.0004654536923026356, + "loss": 1.4479, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.5981544852256775, + "learning_rate": 0.00046313240003165466, + "loss": 1.4546, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.4896555244922638, + "learning_rate": 0.0004608143222522048, + "loss": 1.4547, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5490660071372986, + "learning_rate": 0.0004584994849394795, + "loss": 1.4384, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.634878933429718, + "learning_rate": 0.0004561879140323607, + "loss": 1.4575, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.5208130478858948, + "learning_rate": 0.0004538796354331298, + "loss": 1.4629, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.48796504735946655, + "learning_rate": 0.0004515746750071754, + "loss": 1.4502, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.7172197699546814, + "learning_rate": 0.0004492730585827046, + "loss": 1.4566, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.5025409460067749, + "learning_rate": 0.0004469748119504529, + "loss": 1.4372, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.545889675617218, + "learning_rate": 0.0004446799608633964, + "loss": 1.4583, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.4706031382083893, + "learning_rate": 0.00044238853103646154, + "loss": 1.4407, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.5463115572929382, + "learning_rate": 0.00044010054814623925, + "loss": 1.4597, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.515608012676239, + "learning_rate": 0.0004378160378306944, + "loss": 1.4488, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.49624624848365784, + "learning_rate": 0.00043553502568888095, + "loss": 1.4455, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.5415939688682556, + "learning_rate": 0.0004332575372806534, + "loss": 1.4438, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.49219149351119995, + "learning_rate": 0.00043098359812638145, + "loss": 1.4373, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.4915757477283478, + "learning_rate": 0.00042871323370666383, + "loss": 1.4512, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.5928741097450256, + "learning_rate": 0.0004264464694620421, + "loss": 1.4478, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.48575419187545776, + "learning_rate": 0.000424183330792717, + "loss": 1.4447, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.507031261920929, + "learning_rate": 0.0004219238430582621, + "loss": 1.4469, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.4940553903579712, + "learning_rate": 0.0004196680315773408, + "loss": 1.4555, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.4866984486579895, + "learning_rate": 0.00041741592162742214, + "loss": 1.4506, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.5252698659896851, + "learning_rate": 0.0004151675384444978, + "loss": 1.4264, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.5911986231803894, + "learning_rate": 0.00041292290722279914, + "loss": 1.4483, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.7079945206642151, + "learning_rate": 0.00041068205311451517, + "loss": 1.4541, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.5665788054466248, + "learning_rate": 0.00040844500122951026, + "loss": 1.4467, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 1.0949095487594604, + "learning_rate": 0.00040621177663504313, + "loss": 1.4421, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.7985562086105347, + "learning_rate": 0.00040398240435548583, + "loss": 1.4485, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.4975112974643707, + "learning_rate": 0.00040175690937204324, + "loss": 1.4378, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.48909488320350647, + "learning_rate": 0.00039953531662247343, + "loss": 1.4427, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.7844357490539551, + "learning_rate": 0.0003973176510008075, + "loss": 1.4488, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.5498688220977783, + "learning_rate": 0.00039510393735707233, + "loss": 1.4412, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.507627010345459, + "learning_rate": 0.00039289420049700986, + "loss": 1.4504, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.5005232095718384, + "learning_rate": 0.0003906884651818006, + "loss": 1.4465, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5058642029762268, + "learning_rate": 0.00038848675612778577, + "loss": 1.4441, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.6778172850608826, + "learning_rate": 0.00038628909800619046, + "loss": 1.4385, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.49329179525375366, + "learning_rate": 0.0003840955154428467, + "loss": 1.4373, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.8938937783241272, + "learning_rate": 0.00038190603301791864, + "loss": 1.4336, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.5498088002204895, + "learning_rate": 0.0003797206752656258, + "loss": 1.4439, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.5045040845870972, + "learning_rate": 0.0003775394666739688, + "loss": 1.4363, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.6053415536880493, + "learning_rate": 0.00037536243168445507, + "loss": 1.4424, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.49733415246009827, + "learning_rate": 0.0003731895946918246, + "loss": 1.4403, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.5580061078071594, + "learning_rate": 0.0003710209800437769, + "loss": 1.4208, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5783162713050842, + "learning_rate": 0.00036885661204069767, + "loss": 1.4273, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.5714283585548401, + "learning_rate": 0.0003666965149353878, + "loss": 1.4445, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.5972985625267029, + "learning_rate": 0.0003645407129327898, + "loss": 1.4412, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.4876542389392853, + "learning_rate": 0.00036238923018971783, + "loss": 1.4358, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.5544925928115845, + "learning_rate": 0.0003602420908145865, + "loss": 1.428, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.6289705038070679, + "learning_rate": 0.00035809931886714093, + "loss": 1.437, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.5447190999984741, + "learning_rate": 0.00035596093835818683, + "loss": 1.4212, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.6949787139892578, + "learning_rate": 0.00035382697324932245, + "loss": 1.4266, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.8234121203422546, + "learning_rate": 0.00035169744745266866, + "loss": 1.44, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.6068360209465027, + "learning_rate": 0.0003495723848306017, + "loss": 1.4319, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.6669226884841919, + "learning_rate": 0.0003474518091954859, + "loss": 1.4407, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5997419953346252, + "learning_rate": 0.0003453357443094068, + "loss": 1.4333, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.5909753441810608, + "learning_rate": 0.00034322421388390456, + "loss": 1.4512, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.694766104221344, + "learning_rate": 0.0003411172415797087, + "loss": 1.4497, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.5000135898590088, + "learning_rate": 0.0003390148510064727, + "loss": 1.4329, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.5609813928604126, + "learning_rate": 0.0003369170657225094, + "loss": 1.4239, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.5548239350318909, + "learning_rate": 0.0003348239092345275, + "loss": 1.435, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.6655601263046265, + "learning_rate": 0.0003327354049973672, + "loss": 1.4325, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.9289987683296204, + "learning_rate": 0.00033065157641373847, + "loss": 1.4305, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.7574074864387512, + "learning_rate": 0.0003285724468339576, + "loss": 1.4293, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.6782986521720886, + "learning_rate": 0.00032649803955568755, + "loss": 1.4279, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.7088582515716553, + "learning_rate": 0.00032442837782367434, + "loss": 1.431, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.5320621132850647, + "learning_rate": 0.0003223634848294883, + "loss": 1.4373, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.5401026010513306, + "learning_rate": 0.00032030338371126374, + "loss": 1.4244, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.5319799184799194, + "learning_rate": 0.0003182480975534395, + "loss": 1.4287, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.5029904842376709, + "learning_rate": 0.00031619764938650057, + "loss": 1.4247, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.5175420045852661, + "learning_rate": 0.0003141520621867197, + "loss": 1.4276, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.611236572265625, + "learning_rate": 0.00031211135887590074, + "loss": 1.4243, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.5167410969734192, + "learning_rate": 0.0003100755623211205, + "loss": 1.4261, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.47252219915390015, + "learning_rate": 0.0003080446953344735, + "loss": 1.4251, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.5216288566589355, + "learning_rate": 0.00030601878067281575, + "loss": 1.4296, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.554894745349884, + "learning_rate": 0.00030399784103751044, + "loss": 1.4218, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.5862643122673035, + "learning_rate": 0.000301981899074173, + "loss": 1.4237, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.5114737153053284, + "learning_rate": 0.0002999709773724171, + "loss": 1.4253, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.8156765103340149, + "learning_rate": 0.00029796509846560294, + "loss": 1.4159, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.4957371950149536, + "learning_rate": 0.0002959642848305828, + "loss": 1.424, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.6011149883270264, + "learning_rate": 0.00029396855888745045, + "loss": 1.4256, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.49337509274482727, + "learning_rate": 0.0002919779429992895, + "loss": 1.4291, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.5435550212860107, + "learning_rate": 0.0002899924594719231, + "loss": 1.4248, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.5038012862205505, + "learning_rate": 0.00028801213055366335, + "loss": 1.4277, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5369750261306763, + "learning_rate": 0.00028603697843506315, + "loss": 1.4218, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.5309962034225464, + "learning_rate": 0.0002840670252486662, + "loss": 1.4316, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.49596792459487915, + "learning_rate": 0.00028210229306876, + "loss": 1.4156, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.6298664808273315, + "learning_rate": 0.0002801428039111279, + "loss": 1.4216, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.4989502727985382, + "learning_rate": 0.00027818857973280274, + "loss": 1.4235, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6086215376853943, + "learning_rate": 0.0002762396424318206, + "loss": 1.4186, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.504088819026947, + "learning_rate": 0.00027429601384697526, + "loss": 1.4104, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.517050564289093, + "learning_rate": 0.00027235771575757466, + "loss": 1.4184, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.6705247759819031, + "learning_rate": 0.0002704247698831951, + "loss": 1.4165, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.5113469958305359, + "learning_rate": 0.0002684971978834389, + "loss": 1.4125, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5772517323493958, + "learning_rate": 0.0002665750213576914, + "loss": 1.4188, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.6171859502792358, + "learning_rate": 0.0002646582618448794, + "loss": 1.4113, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5865874290466309, + "learning_rate": 0.00026274694082322896, + "loss": 1.4122, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.48441967368125916, + "learning_rate": 0.0002608410797100255, + "loss": 1.4378, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.6300725340843201, + "learning_rate": 0.0002589406998613733, + "loss": 1.4158, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.5188561677932739, + "learning_rate": 0.0002570458225719567, + "loss": 1.4208, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.512396514415741, + "learning_rate": 0.00025515646907480074, + "loss": 1.4254, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.5857425332069397, + "learning_rate": 0.00025327266054103395, + "loss": 1.4213, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.47508955001831055, + "learning_rate": 0.0002513944180796509, + "loss": 1.4111, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.48741745948791504, + "learning_rate": 0.0002495217627372752, + "loss": 1.4074, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.4814155697822571, + "learning_rate": 0.0002476547154979248, + "loss": 1.4098, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.5336549878120422, + "learning_rate": 0.00024579329728277534, + "loss": 1.4127, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.5137410163879395, + "learning_rate": 0.00024393752894992708, + "loss": 1.4231, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.47526130080223083, + "learning_rate": 0.00024208743129417004, + "loss": 1.3962, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.8842220902442932, + "learning_rate": 0.00024024302504675206, + "loss": 1.4106, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.5335461497306824, + "learning_rate": 0.0002384043308751454, + "loss": 1.4144, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.5184959769248962, + "learning_rate": 0.00023657136938281653, + "loss": 1.4188, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.4702299237251282, + "learning_rate": 0.00023474416110899377, + "loss": 1.407, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.4945663511753082, + "learning_rate": 0.00023292272652843807, + "loss": 1.4127, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.4893150329589844, + "learning_rate": 0.00023110708605121317, + "loss": 1.4206, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.5535306334495544, + "learning_rate": 0.00022929726002245728, + "loss": 1.4136, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.502993106842041, + "learning_rate": 0.00022749326872215472, + "loss": 1.4129, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.5133369565010071, + "learning_rate": 0.0002256951323649087, + "loss": 1.4093, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.6289997696876526, + "learning_rate": 0.00022390287109971547, + "loss": 1.4204, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.605743408203125, + "learning_rate": 0.00022211650500973746, + "loss": 1.4171, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.55927574634552, + "learning_rate": 0.0002203360541120789, + "loss": 1.4226, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.4868223965167999, + "learning_rate": 0.00021856153835756164, + "loss": 1.4065, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.7868787050247192, + "learning_rate": 0.00021679297763050104, + "loss": 1.4025, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.5276433229446411, + "learning_rate": 0.0002150303917484834, + "loss": 1.4139, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.8709946274757385, + "learning_rate": 0.0002132738004621446, + "loss": 1.4194, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.4975385367870331, + "learning_rate": 0.00021152322345494763, + "loss": 1.4005, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.782618522644043, + "learning_rate": 0.00020977868034296253, + "loss": 1.4013, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.6031981706619263, + "learning_rate": 0.00020804019067464667, + "loss": 1.4081, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.47875404357910156, + "learning_rate": 0.00020630777393062575, + "loss": 1.4112, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.5568411350250244, + "learning_rate": 0.00020458144952347523, + "loss": 1.4234, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.4957244098186493, + "learning_rate": 0.00020286123679750314, + "loss": 1.4181, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.5013930797576904, + "learning_rate": 0.00020114715502853292, + "loss": 1.417, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.742794930934906, + "learning_rate": 0.0001994392234236878, + "loss": 1.4109, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.5725691914558411, + "learning_rate": 0.0001977374611211754, + "loss": 1.4096, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.5211153030395508, + "learning_rate": 0.00019604188719007313, + "loss": 1.4062, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.5076520442962646, + "learning_rate": 0.00019435252063011504, + "loss": 1.4084, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.5510384440422058, + "learning_rate": 0.0001926693803714779, + "loss": 1.4192, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.5101417303085327, + "learning_rate": 0.00019099248527457068, + "loss": 1.4254, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.4743573069572449, + "learning_rate": 0.0001893218541298216, + "loss": 1.4035, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.6846520900726318, + "learning_rate": 0.00018765750565746827, + "loss": 1.3998, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.5854378938674927, + "learning_rate": 0.00018599945850734812, + "loss": 1.4054, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.7829344272613525, + "learning_rate": 0.00018434773125868895, + "loss": 1.4029, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.49994421005249023, + "learning_rate": 0.00018270234241990108, + "loss": 1.3987, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.5216307640075684, + "learning_rate": 0.0001810633104283698, + "loss": 1.4063, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.4907887279987335, + "learning_rate": 0.0001794306536502492, + "loss": 1.4049, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.5378735065460205, + "learning_rate": 0.0001778043903802555, + "loss": 1.3946, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.48792096972465515, + "learning_rate": 0.0001761845388414627, + "loss": 1.4063, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.5851631164550781, + "learning_rate": 0.00017457111718509831, + "loss": 1.4133, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.5007026195526123, + "learning_rate": 0.00017296414349033976, + "loss": 1.3926, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.5991578698158264, + "learning_rate": 0.00017136363576411172, + "loss": 1.3987, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.5213151574134827, + "learning_rate": 0.00016976961194088526, + "loss": 1.3861, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.5028907060623169, + "learning_rate": 0.00016818208988247533, + "loss": 1.3906, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.5429515242576599, + "learning_rate": 0.0001666010873778419, + "loss": 1.3933, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.501041054725647, + "learning_rate": 0.00016502662214289, + "loss": 1.4043, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.5604754090309143, + "learning_rate": 0.00016345871182027124, + "loss": 1.3939, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.7888116240501404, + "learning_rate": 0.00016189737397918653, + "loss": 1.4003, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.4839627146720886, + "learning_rate": 0.0001603426261151884, + "loss": 1.4093, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.49811026453971863, + "learning_rate": 0.00015879448564998648, + "loss": 1.4049, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.47760209441185, + "learning_rate": 0.0001572529699312501, + "loss": 1.4091, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.5026172995567322, + "learning_rate": 0.0001557180962324158, + "loss": 1.3937, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.5351853966712952, + "learning_rate": 0.00015418988175249282, + "loss": 1.3954, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.48242607712745667, + "learning_rate": 0.00015266834361587063, + "loss": 1.3946, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.48497846722602844, + "learning_rate": 0.00015115349887212678, + "loss": 1.3898, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.579359769821167, + "learning_rate": 0.00014964536449583657, + "loss": 1.3918, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.5208415389060974, + "learning_rate": 0.00014814395738638195, + "loss": 1.405, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.5188043713569641, + "learning_rate": 0.00014664929436776278, + "loss": 1.4002, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.49395835399627686, + "learning_rate": 0.00014516139218840788, + "loss": 1.3942, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.4898037910461426, + "learning_rate": 0.00014368026752098782, + "loss": 1.394, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.5861129760742188, + "learning_rate": 0.00014220593696222768, + "loss": 1.4089, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.49235138297080994, + "learning_rate": 0.00014073841703272092, + "loss": 1.3795, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.49516093730926514, + "learning_rate": 0.00013927772417674558, + "loss": 1.4054, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.48757612705230713, + "learning_rate": 0.00013782387476207788, + "loss": 1.3962, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.47646474838256836, + "learning_rate": 0.00013637688507981064, + "loss": 1.4117, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.4907473027706146, + "learning_rate": 0.0001349367713441697, + "loss": 1.3903, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.47686466574668884, + "learning_rate": 0.0001335035496923326, + "loss": 1.3957, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.5995292067527771, + "learning_rate": 0.0001320772361842478, + "loss": 1.4121, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.46882882714271545, + "learning_rate": 0.00013065784680245442, + "loss": 1.3854, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.4745437800884247, + "learning_rate": 0.00012924539745190402, + "loss": 1.3952, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.5046836137771606, + "learning_rate": 0.0001278399039597809, + "loss": 1.3924, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.5603382587432861, + "learning_rate": 0.0001264413820753261, + "loss": 1.3962, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.5876381397247314, + "learning_rate": 0.00012504984746966003, + "loss": 1.4011, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.4754839241504669, + "learning_rate": 0.00012366531573560754, + "loss": 1.4011, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.48102545738220215, + "learning_rate": 0.00012228780238752264, + "loss": 1.3875, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.634244978427887, + "learning_rate": 0.00012091732286111514, + "loss": 1.388, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.5213733911514282, + "learning_rate": 0.00011955389251327737, + "loss": 1.3971, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.6531879901885986, + "learning_rate": 0.00011819752662191197, + "loss": 1.3887, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.539393961429596, + "learning_rate": 0.00011684824038576115, + "loss": 1.4077, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.47519391775131226, + "learning_rate": 0.00011550604892423593, + "loss": 1.3923, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.5234396457672119, + "learning_rate": 0.0001141709672772471, + "loss": 1.3932, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.48334819078445435, + "learning_rate": 0.00011284301040503625, + "loss": 1.3999, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.4964877665042877, + "learning_rate": 0.0001115221931880088, + "loss": 1.3906, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.499929815530777, + "learning_rate": 0.00011020853042656648, + "loss": 1.3781, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.47219541668891907, + "learning_rate": 0.000108902036840942, + "loss": 1.3864, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.5601077675819397, + "learning_rate": 0.00010760272707103389, + "loss": 1.3749, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5578227639198303, + "learning_rate": 0.00010631061567624259, + "loss": 1.3898, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.5889272093772888, + "learning_rate": 0.00010502571713530706, + "loss": 1.4064, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.49493587017059326, + "learning_rate": 0.00010374804584614308, + "loss": 1.402, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.4760115146636963, + "learning_rate": 0.00010247761612568129, + "loss": 1.3848, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.7417068481445312, + "learning_rate": 0.0001012144422097069, + "loss": 1.393, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.49573588371276855, + "learning_rate": 9.995853825270052e-05, + "loss": 1.392, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.5548797249794006, + "learning_rate": 9.870991832767919e-05, + "loss": 1.4008, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.4964168965816498, + "learning_rate": 9.746859642603884e-05, + "loss": 1.3946, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.47221243381500244, + "learning_rate": 9.623458645739755e-05, + "loss": 1.396, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.4813820421695709, + "learning_rate": 9.50079022494395e-05, + "loss": 1.3842, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.5066344141960144, + "learning_rate": 9.378855754776028e-05, + "loss": 1.3839, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.4867765009403229, + "learning_rate": 9.257656601571266e-05, + "loss": 1.3828, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.5355703234672546, + "learning_rate": 9.137194123425349e-05, + "loss": 1.3944, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.5081289410591125, + "learning_rate": 9.017469670179168e-05, + "loss": 1.3902, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.4760856032371521, + "learning_rate": 8.898484583403668e-05, + "loss": 1.394, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.4752003848552704, + "learning_rate": 8.780240196384873e-05, + "loss": 1.3862, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.48132503032684326, + "learning_rate": 8.662737834108861e-05, + "loss": 1.3867, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.48374220728874207, + "learning_rate": 8.545978813246987e-05, + "loss": 1.4, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.49021220207214355, + "learning_rate": 8.429964442141072e-05, + "loss": 1.3774, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.5363766551017761, + "learning_rate": 8.314696020788806e-05, + "loss": 1.3857, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.5091484785079956, + "learning_rate": 8.200174840829136e-05, + "loss": 1.394, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.48978227376937866, + "learning_rate": 8.08640218552778e-05, + "loss": 1.3967, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.48981067538261414, + "learning_rate": 7.973379329762925e-05, + "loss": 1.3842, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.5715848803520203, + "learning_rate": 7.861107540010845e-05, + "loss": 1.3759, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.4878443777561188, + "learning_rate": 7.749588074331762e-05, + "loss": 1.3957, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.495976060628891, + "learning_rate": 7.63882218235575e-05, + "loss": 1.3845, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.48120617866516113, + "learning_rate": 7.528811105268699e-05, + "loss": 1.3869, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.4755679965019226, + "learning_rate": 7.41955607579845e-05, + "loss": 1.3815, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.4891829490661621, + "learning_rate": 7.311058318200969e-05, + "loss": 1.3787, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.5429104566574097, + "learning_rate": 7.203319048246599e-05, + "loss": 1.393, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.5007178783416748, + "learning_rate": 7.096339473206471e-05, + "loss": 1.3828, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.504002034664154, + "learning_rate": 6.990120791838953e-05, + "loss": 1.3925, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.6871325969696045, + "learning_rate": 6.884664194376233e-05, + "loss": 1.3841, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.49031054973602295, + "learning_rate": 6.779970862510989e-05, + "loss": 1.3939, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.5044110417366028, + "learning_rate": 6.676041969383107e-05, + "loss": 1.3852, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.5476711988449097, + "learning_rate": 6.572878679566605e-05, + "loss": 1.3932, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.5918845534324646, + "learning_rate": 6.470482149056509e-05, + "loss": 1.3891, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.4630289673805237, + "learning_rate": 6.368853525255942e-05, + "loss": 1.3872, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.5118920207023621, + "learning_rate": 6.267993946963249e-05, + "loss": 1.4014, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.47048085927963257, + "learning_rate": 6.167904544359265e-05, + "loss": 1.3924, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.48211798071861267, + "learning_rate": 6.068586438994617e-05, + "loss": 1.389, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.4633164405822754, + "learning_rate": 5.970040743777161e-05, + "loss": 1.3709, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.4633491039276123, + "learning_rate": 5.8722685629595454e-05, + "loss": 1.3727, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.46621644496917725, + "learning_rate": 5.7752709921267855e-05, + "loss": 1.3976, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.5643050074577332, + "learning_rate": 5.6790491181840294e-05, + "loss": 1.376, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.5608354210853577, + "learning_rate": 5.583604019344354e-05, + "loss": 1.3941, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.5177557468414307, + "learning_rate": 5.4889367651167007e-05, + "loss": 1.3904, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.4648410677909851, + "learning_rate": 5.3950484162938714e-05, + "loss": 1.3766, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.47074922919273376, + "learning_rate": 5.3019400249406686e-05, + "loss": 1.3796, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.4819316864013672, + "learning_rate": 5.209612634382077e-05, + "loss": 1.378, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.5172094106674194, + "learning_rate": 5.118067279191599e-05, + "loss": 1.385, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.46945855021476746, + "learning_rate": 5.0273049851796205e-05, + "loss": 1.3923, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.47022953629493713, + "learning_rate": 4.9373267693819805e-05, + "loss": 1.381, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.4940224587917328, + "learning_rate": 4.848133640048513e-05, + "loss": 1.381, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.5979254245758057, + "learning_rate": 4.75972659663178e-05, + "loss": 1.4, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.48716863989830017, + "learning_rate": 4.672106629775882e-05, + "loss": 1.3876, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.4779558777809143, + "learning_rate": 4.585274721305333e-05, + "loss": 1.3826, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.48549434542655945, + "learning_rate": 4.4992318442140575e-05, + "loss": 1.3785, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.5071036219596863, + "learning_rate": 4.413978962654508e-05, + "loss": 1.3876, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.4944823980331421, + "learning_rate": 4.3295170319268554e-05, + "loss": 1.3839, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.4969879984855652, + "learning_rate": 4.245846998468261e-05, + "loss": 1.3809, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.47751787304878235, + "learning_rate": 4.16296979984232e-05, + "loss": 1.3748, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.5304287672042847, + "learning_rate": 4.080886364728506e-05, + "loss": 1.3839, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.48898303508758545, + "learning_rate": 3.999597612911793e-05, + "loss": 1.3719, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.4708358347415924, + "learning_rate": 3.9191044552723345e-05, + "loss": 1.385, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.4926777482032776, + "learning_rate": 3.839407793775268e-05, + "loss": 1.3802, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.4793872833251953, + "learning_rate": 3.760508521460584e-05, + "loss": 1.3966, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.4818679988384247, + "learning_rate": 3.682407522433173e-05, + "loss": 1.3911, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.48742154240608215, + "learning_rate": 3.605105671852854e-05, + "loss": 1.3805, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.4817183017730713, + "learning_rate": 3.528603835924626e-05, + "loss": 1.3743, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.5547499656677246, + "learning_rate": 3.4529028718888935e-05, + "loss": 1.3931, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.5378564596176147, + "learning_rate": 3.378003628011938e-05, + "loss": 1.3838, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.648749589920044, + "learning_rate": 3.303906943576346e-05, + "loss": 1.387, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.5041891932487488, + "learning_rate": 3.230613648871661e-05, + "loss": 1.3801, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.5527295470237732, + "learning_rate": 3.158124565185022e-05, + "loss": 1.3801, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.47023510932922363, + "learning_rate": 3.086440504792026e-05, + "loss": 1.378, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.5023501515388489, + "learning_rate": 3.015562270947553e-05, + "loss": 1.3898, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.5792856216430664, + "learning_rate": 2.945490657876837e-05, + "loss": 1.3669, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.48649540543556213, + "learning_rate": 2.8762264507665113e-05, + "loss": 1.3743, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5258405804634094, + "learning_rate": 2.807770425755829e-05, + "loss": 1.3763, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.4875074028968811, + "learning_rate": 2.7401233499279866e-05, + "loss": 1.382, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.47191861271858215, + "learning_rate": 2.6732859813014987e-05, + "loss": 1.3905, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.49229896068573, + "learning_rate": 2.607259068821721e-05, + "loss": 1.3829, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.46301206946372986, + "learning_rate": 2.5420433523524493e-05, + "loss": 1.382, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.4987659454345703, + "learning_rate": 2.4776395626676162e-05, + "loss": 1.38, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.5030317306518555, + "learning_rate": 2.414048421443141e-05, + "loss": 1.3761, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.4857030212879181, + "learning_rate": 2.3512706412488012e-05, + "loss": 1.3881, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.4724518656730652, + "learning_rate": 2.2893069255402993e-05, + "loss": 1.3764, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.4726200997829437, + "learning_rate": 2.2281579686513176e-05, + "loss": 1.3732, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.5828139185905457, + "learning_rate": 2.1678244557857663e-05, + "loss": 1.3622, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4617006778717041, + "learning_rate": 2.1083070630101232e-05, + "loss": 1.3738, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.4608205258846283, + "learning_rate": 2.0496064572458395e-05, + "loss": 1.3848, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.46868443489074707, + "learning_rate": 1.991723296261863e-05, + "loss": 1.3765, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.4702487289905548, + "learning_rate": 1.9346582286672686e-05, + "loss": 1.3744, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.46541425585746765, + "learning_rate": 1.878411893904014e-05, + "loss": 1.3908, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.467693030834198, + "learning_rate": 1.822984922239737e-05, + "loss": 1.3794, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.48465168476104736, + "learning_rate": 1.7683779347607286e-05, + "loss": 1.3927, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.4728999733924866, + "learning_rate": 1.714591543364938e-05, + "loss": 1.3811, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.5732617974281311, + "learning_rate": 1.6616263507551437e-05, + "loss": 1.3769, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5440745949745178, + "learning_rate": 1.609482950432195e-05, + "loss": 1.3782, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.47789183259010315, + "learning_rate": 1.5581619266883563e-05, + "loss": 1.3821, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.49290019273757935, + "learning_rate": 1.5076638546007548e-05, + "loss": 1.3851, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.4680052697658539, + "learning_rate": 1.457989300024945e-05, + "loss": 1.3843, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.4927351176738739, + "learning_rate": 1.4091388195885625e-05, + "loss": 1.3744, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.5219542980194092, + "learning_rate": 1.3611129606851041e-05, + "loss": 1.3729, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.459949254989624, + "learning_rate": 1.313912261467759e-05, + "loss": 1.381, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.4824971854686737, + "learning_rate": 1.267537250843412e-05, + "loss": 1.3766, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.4652012288570404, + "learning_rate": 1.2219884484667071e-05, + "loss": 1.378, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.45781955122947693, + "learning_rate": 1.1772663647341947e-05, + "loss": 1.3801, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.4768141508102417, + "learning_rate": 1.1333715007786932e-05, + "loss": 1.384, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.48779961466789246, + "learning_rate": 1.0903043484635694e-05, + "loss": 1.3817, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5315613150596619, + "learning_rate": 1.0480653903772924e-05, + "loss": 1.3762, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.4680369794368744, + "learning_rate": 1.0066550998280132e-05, + "loss": 1.3828, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.4570881128311157, + "learning_rate": 9.660739408382608e-06, + "loss": 1.3783, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.4943011701107025, + "learning_rate": 9.26322368139737e-06, + "loss": 1.3737, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.4810599982738495, + "learning_rate": 8.874008271682222e-06, + "loss": 1.3761, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.4643784463405609, + "learning_rate": 8.493097540585775e-06, + "loss": 1.3928, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.4680810570716858, + "learning_rate": 8.120495756399005e-06, + "loss": 1.371, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.47044894099235535, + "learning_rate": 7.756207094306605e-06, + "loss": 1.3828, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.46625006198883057, + "learning_rate": 7.400235636340957e-06, + "loss": 1.3823, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.4695632755756378, + "learning_rate": 7.0525853713362395e-06, + "loss": 1.3848, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.5059646964073181, + "learning_rate": 6.71326019488322e-06, + "loss": 1.3835, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.45554956793785095, + "learning_rate": 6.3822639092862846e-06, + "loss": 1.3841, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.46608665585517883, + "learning_rate": 6.059600223520478e-06, + "loss": 1.3633, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.4648936986923218, + "learning_rate": 5.745272753189784e-06, + "loss": 1.3776, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.4707690477371216, + "learning_rate": 5.439285020487156e-06, + "loss": 1.3837, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.4885684847831726, + "learning_rate": 5.141640454154467e-06, + "loss": 1.3691, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.47174009680747986, + "learning_rate": 4.852342389444458e-06, + "loss": 1.3909, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.48270609974861145, + "learning_rate": 4.571394068083185e-06, + "loss": 1.3775, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.4584430456161499, + "learning_rate": 4.298798638233709e-06, + "loss": 1.3858, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.4954625368118286, + "learning_rate": 4.034559154461049e-06, + "loss": 1.3832, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.45214712619781494, + "learning_rate": 3.7786785776976198e-06, + "loss": 1.3774, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.46736812591552734, + "learning_rate": 3.5311597752100964e-06, + "loss": 1.3735, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.4651713967323303, + "learning_rate": 3.2920055205676867e-06, + "loss": 1.3762, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.46557387709617615, + "learning_rate": 3.06121849361049e-06, + "loss": 1.3785, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.47982534766197205, + "learning_rate": 2.838801280419856e-06, + "loss": 1.3752, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.46053677797317505, + "learning_rate": 2.624756373289322e-06, + "loss": 1.3676, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.4777507483959198, + "learning_rate": 2.419086170696472e-06, + "loss": 1.3632, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.45830753445625305, + "learning_rate": 2.2217929772764545e-06, + "loss": 1.3738, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.460542768239975, + "learning_rate": 2.0328790037957568e-06, + "loss": 1.372, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.45632460713386536, + "learning_rate": 1.8523463671278052e-06, + "loss": 1.3822, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.46475285291671753, + "learning_rate": 1.6801970902288188e-06, + "loss": 1.3698, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.4633490741252899, + "learning_rate": 1.5164331021155774e-06, + "loss": 1.3782, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.4666520357131958, + "learning_rate": 1.3610562378435221e-06, + "loss": 1.3754, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.48110079765319824, + "learning_rate": 1.2140682384862712e-06, + "loss": 1.3764, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.4777778089046478, + "learning_rate": 1.0754707511161365e-06, + "loss": 1.3712, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.46745941042900085, + "learning_rate": 9.452653287856383e-07, + "loss": 1.3867, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.4661557972431183, + "learning_rate": 8.234534305101015e-07, + "loss": 1.3805, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.4665854573249817, + "learning_rate": 7.100364212513367e-07, + "loss": 1.3928, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.47986653447151184, + "learning_rate": 6.050155719023176e-07, + "loss": 1.3842, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.4748641550540924, + "learning_rate": 5.08392059272944e-07, + "loss": 1.3801, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.48322656750679016, + "learning_rate": 4.2016696607680147e-07, + "loss": 1.3675, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.477649986743927, + "learning_rate": 3.4034128091917085e-07, + "loss": 1.3728, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.47399818897247314, + "learning_rate": 2.689158982859541e-07, + "loss": 1.389, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.49044132232666016, + "learning_rate": 2.05891618533266e-07, + "loss": 1.3623, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.47090208530426025, + "learning_rate": 1.5126914787894074e-07, + "loss": 1.3804, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.4704456329345703, + "learning_rate": 1.0504909839462173e-07, + "loss": 1.3817, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.4629286229610443, + "learning_rate": 6.723198799826746e-08, + "loss": 1.3774, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.46754178404808044, + "learning_rate": 3.781824044932214e-08, + "loss": 1.3866, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.46161213517189026, + "learning_rate": 1.6808185342970238e-08, + "loss": 1.3682, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.4677896797657013, + "learning_rate": 4.202058107305451e-09, + "loss": 1.3826, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.379104733467102, + "learning_rate": 0.0, + "loss": 1.3731, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.839757817279693e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-bloom-cosine/checkpoint-9480/training_args.bin b/saves-bloom-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..cf8c6f95ef3c5aca0723374ac26978a62365984e --- /dev/null +++ b/saves-bloom-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa09df16e59dcbb74cf316e7f7af7ee6e0353ab3e97f5d5433a5a66816d59606 +size 5176 diff --git a/saves-bloom-cosine/config.json b/saves-bloom-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..57266dc701ccdcb97654ab407a925e296c45c5b8 --- /dev/null +++ b/saves-bloom-cosine/config.json @@ -0,0 +1,25 @@ +{ + "apply_residual_connection_post_layernorm": false, + "architectures": [ + "BloomForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "bloom", + "n_head": 8, + "n_layer": 2, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "slow_but_exact": false, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-bloom-cosine/generation_config.json b/saves-bloom-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-bloom-cosine/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-bloom-cosine/model.safetensors b/saves-bloom-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eaa0bfdb6e28de325acaf14eebd2714ba0e43911 --- /dev/null +++ b/saves-bloom-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d93825cbb578680f828494c64549fb199ef64251a7ffb54729877458201c18d +size 8373336 diff --git a/saves-bloom-cosine/result.log b/saves-bloom-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..44a8399064354f37ca8e251439e38e3e04ad8fdf --- /dev/null +++ b/saves-bloom-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 2850.4529, 'train_samples_per_second': 3405.285, 'train_steps_per_second': 3.326, 'train_loss': 1.6703774555825985, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-bloom-cosine/special_tokens_map.json b/saves-bloom-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-bloom-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-bloom-cosine/tokenizer.json b/saves-bloom-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-bloom-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-bloom-cosine/tokenizer_config.json b/saves-bloom-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-bloom-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-bloom/checkpoint-9480/config.json b/saves-bloom/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..57266dc701ccdcb97654ab407a925e296c45c5b8 --- /dev/null +++ b/saves-bloom/checkpoint-9480/config.json @@ -0,0 +1,25 @@ +{ + "apply_residual_connection_post_layernorm": false, + "architectures": [ + "BloomForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "bloom", + "n_head": 8, + "n_layer": 2, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "slow_but_exact": false, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-bloom/checkpoint-9480/generation_config.json b/saves-bloom/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-bloom/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-bloom/checkpoint-9480/model.safetensors b/saves-bloom/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d5125393ea05cba92d59f823bab9d9d696a9361 --- /dev/null +++ b/saves-bloom/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb05b64ed9e1624ccea38fc90673c2a407026e35048f7394f6a82dab4a6aa657 +size 8373336 diff --git a/saves-bloom/checkpoint-9480/optimizer.pt b/saves-bloom/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed725decf730e635947f51cfbaea85d5211ea709 --- /dev/null +++ b/saves-bloom/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:258b1808ba3519c37e92d9878d27bbeee5da2a1fc4da35c316ba8ffdbff77eff +size 16764871 diff --git a/saves-bloom/checkpoint-9480/rng_state.pth b/saves-bloom/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-bloom/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-bloom/checkpoint-9480/scheduler.pt b/saves-bloom/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-bloom/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-bloom/checkpoint-9480/special_tokens_map.json b/saves-bloom/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-bloom/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-bloom/checkpoint-9480/tokenizer.json b/saves-bloom/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-bloom/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-bloom/checkpoint-9480/tokenizer_config.json b/saves-bloom/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-bloom/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-bloom/checkpoint-9480/trainer_state.json b/saves-bloom/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e47a3bd5279976040ae6bca15268c25a64ff6af1 --- /dev/null +++ b/saves-bloom/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.150813341140747, + "learning_rate": 0.00015822784810126583, + "loss": 7.6386, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 0.9222255349159241, + "learning_rate": 0.00031645569620253165, + "loss": 7.2705, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8442890644073486, + "learning_rate": 0.00047468354430379745, + "loss": 6.5303, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.6285983324050903, + "learning_rate": 0.0006329113924050633, + "loss": 5.8289, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.3649296462535858, + "learning_rate": 0.0007911392405063291, + "loss": 5.3788, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.243421733379364, + "learning_rate": 0.0009493670886075949, + "loss": 4.9696, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.21124279499053955, + "learning_rate": 0.0011075949367088608, + "loss": 4.6049, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.3619547486305237, + "learning_rate": 0.0012658227848101266, + "loss": 4.3648, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.3652024269104004, + "learning_rate": 0.0014240506329113926, + "loss": 4.1957, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.2144092321395874, + "learning_rate": 0.0015, + "loss": 4.0701, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.3157788813114166, + "learning_rate": 0.0015, + "loss": 3.9488, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.3224923610687256, + "learning_rate": 0.0015, + "loss": 3.8612, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.26904231309890747, + "learning_rate": 0.0015, + "loss": 3.7804, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.2192022055387497, + "learning_rate": 0.0015, + "loss": 3.6944, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.5886008739471436, + "learning_rate": 0.0015, + "loss": 3.6382, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.2867976725101471, + "learning_rate": 0.0015, + "loss": 3.579, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.2692849636077881, + "learning_rate": 0.0015, + "loss": 3.5003, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.31116002798080444, + "learning_rate": 0.0015, + "loss": 3.4577, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.617526650428772, + "learning_rate": 0.0015, + "loss": 3.4048, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.31083545088768005, + "learning_rate": 0.0015, + "loss": 3.3565, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.33985719084739685, + "learning_rate": 0.0015, + "loss": 3.3076, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.42189735174179077, + "learning_rate": 0.0015, + "loss": 3.2698, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.3632897138595581, + "learning_rate": 0.0015, + "loss": 3.2174, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.5148869752883911, + "learning_rate": 0.0015, + "loss": 3.1702, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.558694064617157, + "learning_rate": 0.0015, + "loss": 3.1347, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.3564925789833069, + "learning_rate": 0.0015, + "loss": 3.0917, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.5106367468833923, + "learning_rate": 0.0015, + "loss": 3.0564, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.5460816025733948, + "learning_rate": 0.0015, + "loss": 3.0089, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.4177979826927185, + "learning_rate": 0.0015, + "loss": 2.9935, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.4817129075527191, + "learning_rate": 0.0015, + "loss": 2.9533, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.45403867959976196, + "learning_rate": 0.0015, + "loss": 2.9139, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.5642548203468323, + "learning_rate": 0.0015, + "loss": 2.8754, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.3790704905986786, + "learning_rate": 0.0015, + "loss": 2.8677, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.6472330689430237, + "learning_rate": 0.0015, + "loss": 2.8357, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.6580318212509155, + "learning_rate": 0.0015, + "loss": 2.7878, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.6148264408111572, + "learning_rate": 0.0015, + "loss": 2.7579, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.5459489226341248, + "learning_rate": 0.0015, + "loss": 2.7384, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.38705775141716003, + "learning_rate": 0.0015, + "loss": 2.7189, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.7032163143157959, + "learning_rate": 0.0015, + "loss": 2.6934, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.5982773900032043, + "learning_rate": 0.0015, + "loss": 2.6665, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.568385899066925, + "learning_rate": 0.0015, + "loss": 2.6469, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.602777898311615, + "learning_rate": 0.0015, + "loss": 2.6266, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.5950765609741211, + "learning_rate": 0.0015, + "loss": 2.5922, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.5538167953491211, + "learning_rate": 0.0015, + "loss": 2.5684, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.5699306130409241, + "learning_rate": 0.0015, + "loss": 2.5589, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 1.100414514541626, + "learning_rate": 0.0015, + "loss": 2.5376, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.555873453617096, + "learning_rate": 0.0015, + "loss": 2.5088, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.44411933422088623, + "learning_rate": 0.0015, + "loss": 2.5043, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.6461852788925171, + "learning_rate": 0.0015, + "loss": 2.4712, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.4979498088359833, + "learning_rate": 0.0015, + "loss": 2.463, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.6500505208969116, + "learning_rate": 0.0015, + "loss": 2.4463, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.5891791582107544, + "learning_rate": 0.0015, + "loss": 2.4269, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.5033569931983948, + "learning_rate": 0.0015, + "loss": 2.4104, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 1.0817065238952637, + "learning_rate": 0.0015, + "loss": 2.3933, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.8942646384239197, + "learning_rate": 0.0015, + "loss": 2.3918, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.4584704041481018, + "learning_rate": 0.0015, + "loss": 2.3453, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.4871450960636139, + "learning_rate": 0.0015, + "loss": 2.3442, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.8289301991462708, + "learning_rate": 0.0015, + "loss": 2.3445, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.72162926197052, + "learning_rate": 0.0015, + "loss": 2.3166, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.40924233198165894, + "learning_rate": 0.0015, + "loss": 2.3003, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.4829574525356293, + "learning_rate": 0.0015, + "loss": 2.2965, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.49946269392967224, + "learning_rate": 0.0015, + "loss": 2.2814, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.5377524495124817, + "learning_rate": 0.0015, + "loss": 2.2663, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.39217859506607056, + "learning_rate": 0.0015, + "loss": 2.2744, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.4565280079841614, + "learning_rate": 0.0015, + "loss": 2.2613, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.639655590057373, + "learning_rate": 0.0015, + "loss": 2.2461, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.44999587535858154, + "learning_rate": 0.0015, + "loss": 2.2282, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.48550304770469666, + "learning_rate": 0.0015, + "loss": 2.2224, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.47652631998062134, + "learning_rate": 0.0015, + "loss": 2.2308, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.4794254004955292, + "learning_rate": 0.0015, + "loss": 2.2026, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.5449780225753784, + "learning_rate": 0.0015, + "loss": 2.1991, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.7790537476539612, + "learning_rate": 0.0015, + "loss": 2.1739, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.9798956513404846, + "learning_rate": 0.0015, + "loss": 2.1706, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.49852877855300903, + "learning_rate": 0.0015, + "loss": 2.1797, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.6039758324623108, + "learning_rate": 0.0015, + "loss": 2.1567, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.7530763745307922, + "learning_rate": 0.0015, + "loss": 2.1565, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.9843342304229736, + "learning_rate": 0.0015, + "loss": 2.1471, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.5783156752586365, + "learning_rate": 0.0015, + "loss": 2.1435, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.44985154271125793, + "learning_rate": 0.0015, + "loss": 2.144, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.539827287197113, + "learning_rate": 0.0015, + "loss": 2.1162, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.4987821578979492, + "learning_rate": 0.0015, + "loss": 2.1021, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.4859192371368408, + "learning_rate": 0.0015, + "loss": 2.1101, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.41457563638687134, + "learning_rate": 0.0015, + "loss": 2.1076, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 1.0149922370910645, + "learning_rate": 0.0015, + "loss": 2.0877, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.49760764837265015, + "learning_rate": 0.0015, + "loss": 2.0949, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.43229302763938904, + "learning_rate": 0.0015, + "loss": 2.0843, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.6546627283096313, + "learning_rate": 0.0015, + "loss": 2.0812, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.575627863407135, + "learning_rate": 0.0015, + "loss": 2.0758, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.47545164823532104, + "learning_rate": 0.0015, + "loss": 2.0634, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.4198952913284302, + "learning_rate": 0.0015, + "loss": 2.0588, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.521074652671814, + "learning_rate": 0.0015, + "loss": 2.0657, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.6115285158157349, + "learning_rate": 0.0015, + "loss": 2.0513, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.6493558287620544, + "learning_rate": 0.0015, + "loss": 2.0448, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.8226097226142883, + "learning_rate": 0.0015, + "loss": 2.0504, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.6946693658828735, + "learning_rate": 0.0015, + "loss": 2.0276, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.5198275446891785, + "learning_rate": 0.0015, + "loss": 2.0359, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.7822059988975525, + "learning_rate": 0.0015, + "loss": 2.0298, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.6713688373565674, + "learning_rate": 0.0015, + "loss": 2.0114, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.5232925415039062, + "learning_rate": 0.0015, + "loss": 2.0095, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.7252864837646484, + "learning_rate": 0.0015, + "loss": 2.0188, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.6016257405281067, + "learning_rate": 0.0015, + "loss": 2.0124, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.6161438822746277, + "learning_rate": 0.0015, + "loss": 1.9974, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.6386392116546631, + "learning_rate": 0.0015, + "loss": 1.9978, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.4686180651187897, + "learning_rate": 0.0015, + "loss": 1.9909, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.8074453473091125, + "learning_rate": 0.0015, + "loss": 1.9951, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.6009097099304199, + "learning_rate": 0.0015, + "loss": 1.9872, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.5426019430160522, + "learning_rate": 0.0015, + "loss": 1.9725, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.6576230525970459, + "learning_rate": 0.0015, + "loss": 1.9745, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.4691566824913025, + "learning_rate": 0.0015, + "loss": 1.9826, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.46580421924591064, + "learning_rate": 0.0015, + "loss": 1.9651, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.47598549723625183, + "learning_rate": 0.0015, + "loss": 1.9696, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.4900604784488678, + "learning_rate": 0.0015, + "loss": 1.962, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.0558894872665405, + "learning_rate": 0.0015, + "loss": 1.9495, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.4379860460758209, + "learning_rate": 0.0015, + "loss": 1.9671, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.86305832862854, + "learning_rate": 0.0015, + "loss": 1.9446, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.5544790029525757, + "learning_rate": 0.0015, + "loss": 1.9562, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.6956931948661804, + "learning_rate": 0.0015, + "loss": 1.9462, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.472929447889328, + "learning_rate": 0.0015, + "loss": 1.9301, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.4986661970615387, + "learning_rate": 0.0015, + "loss": 1.9298, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.6288496851921082, + "learning_rate": 0.0015, + "loss": 1.9431, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.4661789536476135, + "learning_rate": 0.0015, + "loss": 1.9342, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.4968653619289398, + "learning_rate": 0.0015, + "loss": 1.9187, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.7806333303451538, + "learning_rate": 0.0015, + "loss": 1.9089, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.4545213282108307, + "learning_rate": 0.0015, + "loss": 1.9265, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.4266795516014099, + "learning_rate": 0.0015, + "loss": 1.9248, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.9585714936256409, + "learning_rate": 0.0015, + "loss": 1.9094, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.8499062061309814, + "learning_rate": 0.0015, + "loss": 1.9159, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.48624470829963684, + "learning_rate": 0.0015, + "loss": 1.9249, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.43314653635025024, + "learning_rate": 0.0015, + "loss": 1.9068, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.42100489139556885, + "learning_rate": 0.0015, + "loss": 1.8916, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.44472599029541016, + "learning_rate": 0.0015, + "loss": 1.8974, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.8936122059822083, + "learning_rate": 0.0015, + "loss": 1.8974, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.44905006885528564, + "learning_rate": 0.0015, + "loss": 1.8947, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.49804428219795227, + "learning_rate": 0.0015, + "loss": 1.89, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.5527216196060181, + "learning_rate": 0.0015, + "loss": 1.8954, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.5694833993911743, + "learning_rate": 0.0015, + "loss": 1.8879, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.5084961652755737, + "learning_rate": 0.0015, + "loss": 1.8835, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.541684091091156, + "learning_rate": 0.0015, + "loss": 1.8736, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.4399120509624481, + "learning_rate": 0.0015, + "loss": 1.8841, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.49186035990715027, + "learning_rate": 0.0015, + "loss": 1.8662, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.47912323474884033, + "learning_rate": 0.0015, + "loss": 1.8666, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.6098805069923401, + "learning_rate": 0.0015, + "loss": 1.8768, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.6260626316070557, + "learning_rate": 0.0015, + "loss": 1.8675, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.8695244789123535, + "learning_rate": 0.0015, + "loss": 1.8666, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.5537676215171814, + "learning_rate": 0.0015, + "loss": 1.8635, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.5429517030715942, + "learning_rate": 0.0015, + "loss": 1.8584, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.4902762770652771, + "learning_rate": 0.0015, + "loss": 1.8526, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.5063686966896057, + "learning_rate": 0.0015, + "loss": 1.8556, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.4521937966346741, + "learning_rate": 0.0015, + "loss": 1.8542, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.7287399172782898, + "learning_rate": 0.0015, + "loss": 1.8565, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.5698118805885315, + "learning_rate": 0.0015, + "loss": 1.854, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.5222446322441101, + "learning_rate": 0.0015, + "loss": 1.8559, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.5126030445098877, + "learning_rate": 0.0015, + "loss": 1.8343, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6289701461791992, + "learning_rate": 0.0015, + "loss": 1.828, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.5062997341156006, + "learning_rate": 0.0015, + "loss": 1.8307, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.4616747498512268, + "learning_rate": 0.0015, + "loss": 1.8323, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.6381959915161133, + "learning_rate": 0.0015, + "loss": 1.8252, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.8569875955581665, + "learning_rate": 0.0015, + "loss": 1.8402, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.4836009740829468, + "learning_rate": 0.0015, + "loss": 1.8276, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.5724925398826599, + "learning_rate": 0.0015, + "loss": 1.8315, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.5199964642524719, + "learning_rate": 0.0015, + "loss": 1.8291, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6030388474464417, + "learning_rate": 0.0015, + "loss": 1.8022, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.5144745707511902, + "learning_rate": 0.0015, + "loss": 1.8165, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.5104593634605408, + "learning_rate": 0.0015, + "loss": 1.8167, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.686623752117157, + "learning_rate": 0.0015, + "loss": 1.808, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.4637073278427124, + "learning_rate": 0.0015, + "loss": 1.8073, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.5601900815963745, + "learning_rate": 0.0015, + "loss": 1.8147, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.8115816116333008, + "learning_rate": 0.0015, + "loss": 1.8063, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.5719322562217712, + "learning_rate": 0.0015, + "loss": 1.7955, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.4701850712299347, + "learning_rate": 0.0015, + "loss": 1.8115, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.535297155380249, + "learning_rate": 0.0015, + "loss": 1.7919, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.5622710585594177, + "learning_rate": 0.0015, + "loss": 1.7956, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.4849448502063751, + "learning_rate": 0.0015, + "loss": 1.7942, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.6260956525802612, + "learning_rate": 0.0015, + "loss": 1.7974, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.5183302164077759, + "learning_rate": 0.0015, + "loss": 1.8183, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.49603813886642456, + "learning_rate": 0.0015, + "loss": 1.7976, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.5160091519355774, + "learning_rate": 0.0015, + "loss": 1.7964, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.6302099227905273, + "learning_rate": 0.0015, + "loss": 1.7947, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.6066163778305054, + "learning_rate": 0.0015, + "loss": 1.7939, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.8006702065467834, + "learning_rate": 0.0015, + "loss": 1.7888, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.44408902525901794, + "learning_rate": 0.0015, + "loss": 1.7812, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.6326849460601807, + "learning_rate": 0.0015, + "loss": 1.7835, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.5474056005477905, + "learning_rate": 0.0015, + "loss": 1.7788, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.4583626687526703, + "learning_rate": 0.0015, + "loss": 1.7756, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.5443903207778931, + "learning_rate": 0.0015, + "loss": 1.776, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.6028438806533813, + "learning_rate": 0.0015, + "loss": 1.7816, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.8050011992454529, + "learning_rate": 0.0015, + "loss": 1.7832, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.5138239860534668, + "learning_rate": 0.0015, + "loss": 1.7663, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.5934531688690186, + "learning_rate": 0.0015, + "loss": 1.7715, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.5334857702255249, + "learning_rate": 0.0015, + "loss": 1.7711, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.6304555535316467, + "learning_rate": 0.0015, + "loss": 1.7701, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.5386019945144653, + "learning_rate": 0.0015, + "loss": 1.7676, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.8461247086524963, + "learning_rate": 0.0015, + "loss": 1.7696, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.5659627318382263, + "learning_rate": 0.0015, + "loss": 1.7576, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.45056530833244324, + "learning_rate": 0.0015, + "loss": 1.7592, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.5300774574279785, + "learning_rate": 0.0015, + "loss": 1.7616, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.4294365644454956, + "learning_rate": 0.0015, + "loss": 1.7708, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.7624651193618774, + "learning_rate": 0.0015, + "loss": 1.7542, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.8281710743904114, + "learning_rate": 0.0015, + "loss": 1.751, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.49018529057502747, + "learning_rate": 0.0015, + "loss": 1.752, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.6264277696609497, + "learning_rate": 0.0015, + "loss": 1.7565, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.720066249370575, + "learning_rate": 0.0015, + "loss": 1.7686, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.48484906554222107, + "learning_rate": 0.0015, + "loss": 1.7502, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.4189296364784241, + "learning_rate": 0.0015, + "loss": 1.7496, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.43970799446105957, + "learning_rate": 0.0015, + "loss": 1.751, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.9812093377113342, + "learning_rate": 0.0015, + "loss": 1.7439, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.8057104349136353, + "learning_rate": 0.0015, + "loss": 1.7427, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.5278838872909546, + "learning_rate": 0.0015, + "loss": 1.7466, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.5162642598152161, + "learning_rate": 0.0015, + "loss": 1.7364, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.4243667721748352, + "learning_rate": 0.0015, + "loss": 1.7465, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.5826160907745361, + "learning_rate": 0.0015, + "loss": 1.7353, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6756936311721802, + "learning_rate": 0.0015, + "loss": 1.7338, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.43055227398872375, + "learning_rate": 0.0015, + "loss": 1.7379, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6738662719726562, + "learning_rate": 0.0015, + "loss": 1.7307, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.5482190251350403, + "learning_rate": 0.0015, + "loss": 1.7379, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.8880805373191833, + "learning_rate": 0.0015, + "loss": 1.752, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.44593849778175354, + "learning_rate": 0.0015, + "loss": 1.7368, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.5845939517021179, + "learning_rate": 0.0015, + "loss": 1.7188, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.4610976278781891, + "learning_rate": 0.0015, + "loss": 1.736, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.7984962463378906, + "learning_rate": 0.0015, + "loss": 1.735, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.479261189699173, + "learning_rate": 0.0015, + "loss": 1.7216, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.5528878569602966, + "learning_rate": 0.0015, + "loss": 1.7316, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.47382694482803345, + "learning_rate": 0.0015, + "loss": 1.7248, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.4474259912967682, + "learning_rate": 0.0015, + "loss": 1.7175, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.4632035791873932, + "learning_rate": 0.0015, + "loss": 1.7171, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.7560294270515442, + "learning_rate": 0.0015, + "loss": 1.7133, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.6277971863746643, + "learning_rate": 0.0015, + "loss": 1.7402, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.5490138530731201, + "learning_rate": 0.0015, + "loss": 1.7278, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.9364339113235474, + "learning_rate": 0.0015, + "loss": 1.7159, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.5979941487312317, + "learning_rate": 0.0015, + "loss": 1.7124, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.8417498469352722, + "learning_rate": 0.0015, + "loss": 1.717, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.4808991551399231, + "learning_rate": 0.0015, + "loss": 1.7226, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.4588165879249573, + "learning_rate": 0.0015, + "loss": 1.7037, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.6004635095596313, + "learning_rate": 0.0015, + "loss": 1.7037, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.7591840028762817, + "learning_rate": 0.0015, + "loss": 1.6986, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.6121309995651245, + "learning_rate": 0.0015, + "loss": 1.7124, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.5905430912971497, + "learning_rate": 0.0015, + "loss": 1.7085, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.8464624285697937, + "learning_rate": 0.0015, + "loss": 1.708, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.8337712287902832, + "learning_rate": 0.0015, + "loss": 1.7081, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.4713364839553833, + "learning_rate": 0.0015, + "loss": 1.7023, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.6810732483863831, + "learning_rate": 0.0015, + "loss": 1.7072, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.5745030045509338, + "learning_rate": 0.0015, + "loss": 1.7042, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.5170700550079346, + "learning_rate": 0.0015, + "loss": 1.7009, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.7049311399459839, + "learning_rate": 0.0015, + "loss": 1.7082, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.5195064544677734, + "learning_rate": 0.0015, + "loss": 1.6935, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.46363475918769836, + "learning_rate": 0.0015, + "loss": 1.7066, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.4979170858860016, + "learning_rate": 0.0015, + "loss": 1.697, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.6032502055168152, + "learning_rate": 0.0015, + "loss": 1.7018, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 1.4362196922302246, + "learning_rate": 0.0015, + "loss": 1.6966, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.49109920859336853, + "learning_rate": 0.0015, + "loss": 1.6975, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.46409809589385986, + "learning_rate": 0.0015, + "loss": 1.6911, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.5044241547584534, + "learning_rate": 0.0015, + "loss": 1.6978, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.6585612297058105, + "learning_rate": 0.0015, + "loss": 1.688, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.5030466318130493, + "learning_rate": 0.0015, + "loss": 1.6959, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.44553372263908386, + "learning_rate": 0.0015, + "loss": 1.6851, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.4382231831550598, + "learning_rate": 0.0015, + "loss": 1.6926, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.572163462638855, + "learning_rate": 0.0015, + "loss": 1.6882, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.4450538754463196, + "learning_rate": 0.0015, + "loss": 1.6888, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.46374884247779846, + "learning_rate": 0.0015, + "loss": 1.6928, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.5283554792404175, + "learning_rate": 0.0015, + "loss": 1.6939, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.6039606332778931, + "learning_rate": 0.0015, + "loss": 1.6931, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.8294399380683899, + "learning_rate": 0.0015, + "loss": 1.6863, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.8275244832038879, + "learning_rate": 0.0015, + "loss": 1.6808, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.5573651194572449, + "learning_rate": 0.0015, + "loss": 1.69, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.512090265750885, + "learning_rate": 0.0015, + "loss": 1.682, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.5057612657546997, + "learning_rate": 0.0015, + "loss": 1.6739, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.4737897217273712, + "learning_rate": 0.0015, + "loss": 1.6771, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.5719987154006958, + "learning_rate": 0.0015, + "loss": 1.6803, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.8795638680458069, + "learning_rate": 0.0015, + "loss": 1.6787, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.4607473313808441, + "learning_rate": 0.0015, + "loss": 1.6759, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.7981528639793396, + "learning_rate": 0.0015, + "loss": 1.6809, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.4333970546722412, + "learning_rate": 0.0015, + "loss": 1.6762, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.50799560546875, + "learning_rate": 0.0015, + "loss": 1.6593, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.9140480160713196, + "learning_rate": 0.0015, + "loss": 1.6704, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.4954560697078705, + "learning_rate": 0.0015, + "loss": 1.6766, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.5669369697570801, + "learning_rate": 0.0015, + "loss": 1.6824, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.5241600275039673, + "learning_rate": 0.0015, + "loss": 1.6805, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.5225156545639038, + "learning_rate": 0.0015, + "loss": 1.6742, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.8474009037017822, + "learning_rate": 0.0015, + "loss": 1.6732, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.9528505802154541, + "learning_rate": 0.0015, + "loss": 1.6725, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.6440209746360779, + "learning_rate": 0.0015, + "loss": 1.6707, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.4494965374469757, + "learning_rate": 0.0015, + "loss": 1.6792, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.6342407464981079, + "learning_rate": 0.0015, + "loss": 1.6491, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.5306560397148132, + "learning_rate": 0.0015, + "loss": 1.6538, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.887423038482666, + "learning_rate": 0.0015, + "loss": 1.66, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.5342133045196533, + "learning_rate": 0.0015, + "loss": 1.6641, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.5690568089485168, + "learning_rate": 0.0015, + "loss": 1.665, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.9180155396461487, + "learning_rate": 0.0015, + "loss": 1.6522, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.49240967631340027, + "learning_rate": 0.0015, + "loss": 1.6577, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.5052701830863953, + "learning_rate": 0.0015, + "loss": 1.6636, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.5250066518783569, + "learning_rate": 0.0015, + "loss": 1.6677, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.6492303013801575, + "learning_rate": 0.0015, + "loss": 1.6564, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.739266574382782, + "learning_rate": 0.0015, + "loss": 1.6542, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.4702005386352539, + "learning_rate": 0.0015, + "loss": 1.6595, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.6040682196617126, + "learning_rate": 0.0015, + "loss": 1.6675, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.6438997983932495, + "learning_rate": 0.0015, + "loss": 1.6472, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.5325720906257629, + "learning_rate": 0.0015, + "loss": 1.6406, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.6444641947746277, + "learning_rate": 0.0015, + "loss": 1.6481, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.4740985333919525, + "learning_rate": 0.0015, + "loss": 1.6659, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.4967401325702667, + "learning_rate": 0.0015, + "loss": 1.6432, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 1.3561769723892212, + "learning_rate": 0.0015, + "loss": 1.6535, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.9485900402069092, + "learning_rate": 0.0015, + "loss": 1.6623, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.46477529406547546, + "learning_rate": 0.0015, + "loss": 1.6381, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.5185746550559998, + "learning_rate": 0.0015, + "loss": 1.645, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.44782137870788574, + "learning_rate": 0.0015, + "loss": 1.6417, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5505303740501404, + "learning_rate": 0.0015, + "loss": 1.6533, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5244767069816589, + "learning_rate": 0.0015, + "loss": 1.6446, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.684838056564331, + "learning_rate": 0.0015, + "loss": 1.6319, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.6142449378967285, + "learning_rate": 0.0015, + "loss": 1.6519, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.8353508710861206, + "learning_rate": 0.0015, + "loss": 1.6386, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.44266948103904724, + "learning_rate": 0.0015, + "loss": 1.6413, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.5067423582077026, + "learning_rate": 0.0015, + "loss": 1.644, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.5164125561714172, + "learning_rate": 0.0015, + "loss": 1.656, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 1.0247501134872437, + "learning_rate": 0.0015, + "loss": 1.6339, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.5098726153373718, + "learning_rate": 0.0015, + "loss": 1.6439, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5366957187652588, + "learning_rate": 0.0015, + "loss": 1.6428, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.5414708852767944, + "learning_rate": 0.0015, + "loss": 1.6438, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.5522865653038025, + "learning_rate": 0.0015, + "loss": 1.6341, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.7166458964347839, + "learning_rate": 0.0015, + "loss": 1.6359, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.4831211268901825, + "learning_rate": 0.0015, + "loss": 1.6298, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.454694539308548, + "learning_rate": 0.0015, + "loss": 1.6484, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.5077452659606934, + "learning_rate": 0.0015, + "loss": 1.6366, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.6335849165916443, + "learning_rate": 0.0015, + "loss": 1.6281, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.5139283537864685, + "learning_rate": 0.0015, + "loss": 1.6125, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.680316686630249, + "learning_rate": 0.0015, + "loss": 1.6342, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5644792318344116, + "learning_rate": 0.0015, + "loss": 1.6269, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.4449726641178131, + "learning_rate": 0.0015, + "loss": 1.641, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.6795740723609924, + "learning_rate": 0.0015, + "loss": 1.6442, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 1.046998143196106, + "learning_rate": 0.0015, + "loss": 1.6308, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.5865319967269897, + "learning_rate": 0.0015, + "loss": 1.6334, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.4926215708255768, + "learning_rate": 0.0015, + "loss": 1.6301, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.47046005725860596, + "learning_rate": 0.0015, + "loss": 1.6353, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.47757425904273987, + "learning_rate": 0.0015, + "loss": 1.6253, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.5514276623725891, + "learning_rate": 0.0015, + "loss": 1.627, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.7312896847724915, + "learning_rate": 0.0015, + "loss": 1.6243, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.4865380823612213, + "learning_rate": 0.0015, + "loss": 1.6234, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.47738945484161377, + "learning_rate": 0.0015, + "loss": 1.6317, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.456064373254776, + "learning_rate": 0.0015, + "loss": 1.6228, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.9585252404212952, + "learning_rate": 0.0015, + "loss": 1.6196, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.8172290921211243, + "learning_rate": 0.0015, + "loss": 1.6293, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6002830862998962, + "learning_rate": 0.0015, + "loss": 1.6266, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5298130512237549, + "learning_rate": 0.0015, + "loss": 1.6045, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.4257142245769501, + "learning_rate": 0.0015, + "loss": 1.6215, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.8374847769737244, + "learning_rate": 0.0015, + "loss": 1.619, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.4771282970905304, + "learning_rate": 0.0015, + "loss": 1.6136, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.6299778819084167, + "learning_rate": 0.0015, + "loss": 1.6117, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.5360429286956787, + "learning_rate": 0.0015, + "loss": 1.6264, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.49939456582069397, + "learning_rate": 0.0015, + "loss": 1.6315, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 1.1296309232711792, + "learning_rate": 0.0015, + "loss": 1.6277, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.5995752215385437, + "learning_rate": 0.0015, + "loss": 1.606, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.5539703369140625, + "learning_rate": 0.0015, + "loss": 1.6215, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.4397030472755432, + "learning_rate": 0.0015, + "loss": 1.6184, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.7149756550788879, + "learning_rate": 0.0015, + "loss": 1.6267, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.5444366335868835, + "learning_rate": 0.0015, + "loss": 1.6232, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.5677608251571655, + "learning_rate": 0.0015, + "loss": 1.6142, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.5791630744934082, + "learning_rate": 0.0015, + "loss": 1.6148, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.5979585647583008, + "learning_rate": 0.0015, + "loss": 1.6111, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.4636039435863495, + "learning_rate": 0.0015, + "loss": 1.6126, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.4689747989177704, + "learning_rate": 0.0015, + "loss": 1.6212, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.0875957012176514, + "learning_rate": 0.0015, + "loss": 1.6053, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.8120474219322205, + "learning_rate": 0.0015, + "loss": 1.6089, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.5953245162963867, + "learning_rate": 0.0015, + "loss": 1.6054, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.8343967795372009, + "learning_rate": 0.0015, + "loss": 1.6097, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.50998854637146, + "learning_rate": 0.0015, + "loss": 1.6093, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.6544301509857178, + "learning_rate": 0.0015, + "loss": 1.6149, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.5464091300964355, + "learning_rate": 0.0015, + "loss": 1.6105, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.6660382151603699, + "learning_rate": 0.0015, + "loss": 1.6008, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.7359334826469421, + "learning_rate": 0.0015, + "loss": 1.6077, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.5046261548995972, + "learning_rate": 0.0015, + "loss": 1.6176, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.5216406583786011, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.6172910332679749, + "learning_rate": 0.0015, + "loss": 1.6043, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.849410355091095, + "learning_rate": 0.0015, + "loss": 1.5866, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.6826720833778381, + "learning_rate": 0.0015, + "loss": 1.6075, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.49372223019599915, + "learning_rate": 0.0015, + "loss": 1.6071, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.6839715838432312, + "learning_rate": 0.0015, + "loss": 1.6076, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.46212440729141235, + "learning_rate": 0.0015, + "loss": 1.5991, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.7888845205307007, + "learning_rate": 0.0015, + "loss": 1.615, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.48591920733451843, + "learning_rate": 0.0015, + "loss": 1.6026, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.46042948961257935, + "learning_rate": 0.0015, + "loss": 1.6032, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.5190149545669556, + "learning_rate": 0.0015, + "loss": 1.6085, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.7054757475852966, + "learning_rate": 0.0015, + "loss": 1.6004, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5426971316337585, + "learning_rate": 0.0015, + "loss": 1.5902, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.49252286553382874, + "learning_rate": 0.0015, + "loss": 1.5953, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.4810856580734253, + "learning_rate": 0.0015, + "loss": 1.6094, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.5096778869628906, + "learning_rate": 0.0015, + "loss": 1.5915, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.4748266637325287, + "learning_rate": 0.0015, + "loss": 1.6084, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 1.0327509641647339, + "learning_rate": 0.0015, + "loss": 1.5977, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.6517208814620972, + "learning_rate": 0.0015, + "loss": 1.5859, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 1.1022732257843018, + "learning_rate": 0.0015, + "loss": 1.5864, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.5637365579605103, + "learning_rate": 0.0015, + "loss": 1.5921, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.6358108520507812, + "learning_rate": 0.0015, + "loss": 1.5922, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.6844465136528015, + "learning_rate": 0.0015, + "loss": 1.5887, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.5282321572303772, + "learning_rate": 0.0015, + "loss": 1.5947, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.5245356559753418, + "learning_rate": 0.0015, + "loss": 1.5885, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.5230286121368408, + "learning_rate": 0.0015, + "loss": 1.5932, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.6528234481811523, + "learning_rate": 0.0015, + "loss": 1.5927, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.5511032938957214, + "learning_rate": 0.0015, + "loss": 1.5808, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.5456432104110718, + "learning_rate": 0.0015, + "loss": 1.5926, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.5152608156204224, + "learning_rate": 0.0015, + "loss": 1.5834, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.4645211696624756, + "learning_rate": 0.0015, + "loss": 1.5966, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.509320855140686, + "learning_rate": 0.0015, + "loss": 1.5838, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.48627740144729614, + "learning_rate": 0.0015, + "loss": 1.5858, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.48657694458961487, + "learning_rate": 0.0015, + "loss": 1.5801, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5566064715385437, + "learning_rate": 0.0015, + "loss": 1.5885, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.7756157517433167, + "learning_rate": 0.0015, + "loss": 1.5908, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.8503503203392029, + "learning_rate": 0.0015, + "loss": 1.5986, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.6750513911247253, + "learning_rate": 0.0015, + "loss": 1.5913, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.4914165735244751, + "learning_rate": 0.0015, + "loss": 1.5943, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.48406654596328735, + "learning_rate": 0.0015, + "loss": 1.5843, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.5713526010513306, + "learning_rate": 0.0015, + "loss": 1.5858, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.5185654163360596, + "learning_rate": 0.0015, + "loss": 1.5794, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.6192831993103027, + "learning_rate": 0.0015, + "loss": 1.5816, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.5061731934547424, + "learning_rate": 0.0015, + "loss": 1.5766, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.9296662211418152, + "learning_rate": 0.0015, + "loss": 1.5838, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.8087186217308044, + "learning_rate": 0.0015, + "loss": 1.5884, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.5286755561828613, + "learning_rate": 0.0015, + "loss": 1.5772, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.4991167187690735, + "learning_rate": 0.0015, + "loss": 1.5772, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.6365343928337097, + "learning_rate": 0.0015, + "loss": 1.5744, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.5686373114585876, + "learning_rate": 0.0015, + "loss": 1.5819, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.46001219749450684, + "learning_rate": 0.0015, + "loss": 1.5793, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.9407059550285339, + "learning_rate": 0.0015, + "loss": 1.5722, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.5759108066558838, + "learning_rate": 0.0015, + "loss": 1.5795, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.5303200483322144, + "learning_rate": 0.0015, + "loss": 1.5784, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.8242844939231873, + "learning_rate": 0.0015, + "loss": 1.5766, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.6142957210540771, + "learning_rate": 0.0015, + "loss": 1.5806, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 1.1863654851913452, + "learning_rate": 0.0015, + "loss": 1.5896, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 1.060115098953247, + "learning_rate": 0.0015, + "loss": 1.5747, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 1.1346009969711304, + "learning_rate": 0.0015, + "loss": 1.5703, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.9194135665893555, + "learning_rate": 0.0015, + "loss": 1.581, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.6078530550003052, + "learning_rate": 0.0015, + "loss": 1.5789, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.47440075874328613, + "learning_rate": 0.0015, + "loss": 1.5752, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.4443087875843048, + "learning_rate": 0.0015, + "loss": 1.5714, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.5200234055519104, + "learning_rate": 0.0015, + "loss": 1.5812, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.5711065530776978, + "learning_rate": 0.0015, + "loss": 1.5692, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.674248218536377, + "learning_rate": 0.0015, + "loss": 1.5711, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.7093497514724731, + "learning_rate": 0.0015, + "loss": 1.5866, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.7658103704452515, + "learning_rate": 0.0015, + "loss": 1.5653, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.5601438283920288, + "learning_rate": 0.0015, + "loss": 1.5691, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.479261577129364, + "learning_rate": 0.0015, + "loss": 1.5663, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.4836485981941223, + "learning_rate": 0.0015, + "loss": 1.5732, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.6983446478843689, + "learning_rate": 0.0015, + "loss": 1.5847, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.49608248472213745, + "learning_rate": 0.0015, + "loss": 1.5705, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.8320929408073425, + "learning_rate": 0.0015, + "loss": 1.5751, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.6829028725624084, + "learning_rate": 0.0015, + "loss": 1.5726, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.6526251435279846, + "learning_rate": 0.0015, + "loss": 1.5789, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.5063625574111938, + "learning_rate": 0.0015, + "loss": 1.564, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.5352622866630554, + "learning_rate": 0.0015, + "loss": 1.5717, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.7437189817428589, + "learning_rate": 0.0015, + "loss": 1.5761, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.7176907658576965, + "learning_rate": 0.0015, + "loss": 1.5656, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.4548720419406891, + "learning_rate": 0.0015, + "loss": 1.5674, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.44671133160591125, + "learning_rate": 0.0015, + "loss": 1.5713, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.6885534524917603, + "learning_rate": 0.0015, + "loss": 1.553, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.7057598829269409, + "learning_rate": 0.0015, + "loss": 1.572, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.5296266078948975, + "learning_rate": 0.0015, + "loss": 1.5731, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.4908561110496521, + "learning_rate": 0.0015, + "loss": 1.5682, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.5058237314224243, + "learning_rate": 0.0015, + "loss": 1.5601, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.5028045177459717, + "learning_rate": 0.0015, + "loss": 1.5414, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.5612614154815674, + "learning_rate": 0.0015, + "loss": 1.5652, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.5249614715576172, + "learning_rate": 0.0015, + "loss": 1.5504, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.8108306527137756, + "learning_rate": 0.0015, + "loss": 1.5664, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.8428634405136108, + "learning_rate": 0.0015, + "loss": 1.5711, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.47969523072242737, + "learning_rate": 0.0015, + "loss": 1.5482, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.5133702754974365, + "learning_rate": 0.0015, + "loss": 1.5648, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6160013675689697, + "learning_rate": 0.0015, + "loss": 1.5682, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.5954915881156921, + "learning_rate": 0.0015, + "loss": 1.5704, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.5409538149833679, + "learning_rate": 0.0015, + "loss": 1.5567, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.5427380204200745, + "learning_rate": 0.0015, + "loss": 1.5589, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5295004844665527, + "learning_rate": 0.0015, + "loss": 1.5561, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.7764521241188049, + "learning_rate": 0.0015, + "loss": 1.5526, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.7982751727104187, + "learning_rate": 0.0015, + "loss": 1.5621, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.47664445638656616, + "learning_rate": 0.0015, + "loss": 1.563, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 1.288316249847412, + "learning_rate": 0.0015, + "loss": 1.5592, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.724997341632843, + "learning_rate": 0.0015, + "loss": 1.5585, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.5051091313362122, + "learning_rate": 0.0015, + "loss": 1.5522, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.67183518409729, + "learning_rate": 0.0015, + "loss": 1.5719, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.6073653101921082, + "learning_rate": 0.0015, + "loss": 1.5577, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.7339580655097961, + "learning_rate": 0.0015, + "loss": 1.5544, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.493580162525177, + "learning_rate": 0.0015, + "loss": 1.5672, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.7570888996124268, + "learning_rate": 0.0015, + "loss": 1.5516, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.4718903601169586, + "learning_rate": 0.0015, + "loss": 1.5537, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.5226156115531921, + "learning_rate": 0.0015, + "loss": 1.5511, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.47913801670074463, + "learning_rate": 0.0015, + "loss": 1.5711, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.8636896014213562, + "learning_rate": 0.0015, + "loss": 1.5537, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.6331192255020142, + "learning_rate": 0.0015, + "loss": 1.5467, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.4399349093437195, + "learning_rate": 0.0015, + "loss": 1.5637, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.7091642022132874, + "learning_rate": 0.0015, + "loss": 1.5481, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.5377468466758728, + "learning_rate": 0.0015, + "loss": 1.5447, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.5458089113235474, + "learning_rate": 0.0015, + "loss": 1.5617, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.5292337536811829, + "learning_rate": 0.0015, + "loss": 1.5541, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.5375266671180725, + "learning_rate": 0.0015, + "loss": 1.5641, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6999505758285522, + "learning_rate": 0.0015, + "loss": 1.5532, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.5864401459693909, + "learning_rate": 0.0015, + "loss": 1.5475, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.5812068581581116, + "learning_rate": 0.0015, + "loss": 1.5536, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.8708783984184265, + "learning_rate": 0.0015, + "loss": 1.5544, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.7917665839195251, + "learning_rate": 0.0015, + "loss": 1.5648, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.6949357390403748, + "learning_rate": 0.0015, + "loss": 1.5444, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.5002421140670776, + "learning_rate": 0.0015, + "loss": 1.549, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.6171126961708069, + "learning_rate": 0.0015, + "loss": 1.5442, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.5782960057258606, + "learning_rate": 0.0015, + "loss": 1.5465, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.6581275463104248, + "learning_rate": 0.0015, + "loss": 1.5573, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.5274462699890137, + "learning_rate": 0.0015, + "loss": 1.5497, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5273053646087646, + "learning_rate": 0.0015, + "loss": 1.5455, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.6722688674926758, + "learning_rate": 0.0015, + "loss": 1.5301, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.6710203289985657, + "learning_rate": 0.0015, + "loss": 1.5565, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.6958735585212708, + "learning_rate": 0.0015, + "loss": 1.5458, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.6558091640472412, + "learning_rate": 0.0015, + "loss": 1.5572, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.679487943649292, + "learning_rate": 0.0015, + "loss": 1.5454, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.5793918371200562, + "learning_rate": 0.0015, + "loss": 1.5607, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.6777279376983643, + "learning_rate": 0.0015, + "loss": 1.5491, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.8515413999557495, + "learning_rate": 0.0015, + "loss": 1.5308, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.4885188937187195, + "learning_rate": 0.0015, + "loss": 1.5454, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.9769313335418701, + "learning_rate": 0.0015, + "loss": 1.5474, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5005014538764954, + "learning_rate": 0.0015, + "loss": 1.543, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.4980844259262085, + "learning_rate": 0.0015, + "loss": 1.5585, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5155827403068542, + "learning_rate": 0.0015, + "loss": 1.5529, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.4385599195957184, + "learning_rate": 0.0015, + "loss": 1.5474, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5264326333999634, + "learning_rate": 0.0015, + "loss": 1.5497, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.620452344417572, + "learning_rate": 0.0015, + "loss": 1.5556, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.7450008392333984, + "learning_rate": 0.0015, + "loss": 1.54, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.7327592372894287, + "learning_rate": 0.0015, + "loss": 1.5486, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.8912865519523621, + "learning_rate": 0.0015, + "loss": 1.5328, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.5738736987113953, + "learning_rate": 0.0015, + "loss": 1.5458, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.4828597605228424, + "learning_rate": 0.0015, + "loss": 1.537, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6224974393844604, + "learning_rate": 0.0015, + "loss": 1.5478, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 1.001340627670288, + "learning_rate": 0.0015, + "loss": 1.5381, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.5912613272666931, + "learning_rate": 0.0015, + "loss": 1.5467, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.5086495876312256, + "learning_rate": 0.0015, + "loss": 1.5539, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.5097530484199524, + "learning_rate": 0.0015, + "loss": 1.545, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.747732400894165, + "learning_rate": 0.0015, + "loss": 1.5443, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.595024585723877, + "learning_rate": 0.0015, + "loss": 1.5479, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.4828443229198456, + "learning_rate": 0.0015, + "loss": 1.5348, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.6312142610549927, + "learning_rate": 0.0015, + "loss": 1.534, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.7723662257194519, + "learning_rate": 0.0015, + "loss": 1.5471, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.8001915812492371, + "learning_rate": 0.0015, + "loss": 1.5378, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.48704344034194946, + "learning_rate": 0.0015, + "loss": 1.5275, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.7280847430229187, + "learning_rate": 0.0015, + "loss": 1.5398, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.5214621424674988, + "learning_rate": 0.0015, + "loss": 1.5429, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.5062591433525085, + "learning_rate": 0.0015, + "loss": 1.5317, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.5541534423828125, + "learning_rate": 0.0015, + "loss": 1.5338, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.48535457253456116, + "learning_rate": 0.0015, + "loss": 1.5381, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.6548792123794556, + "learning_rate": 0.0015, + "loss": 1.5399, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.4913672208786011, + "learning_rate": 0.0015, + "loss": 1.5294, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.6058489680290222, + "learning_rate": 0.0015, + "loss": 1.529, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.8538274168968201, + "learning_rate": 0.0015, + "loss": 1.5249, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6668264269828796, + "learning_rate": 0.0015, + "loss": 1.5341, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.47492551803588867, + "learning_rate": 0.0015, + "loss": 1.5349, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.5480576157569885, + "learning_rate": 0.0015, + "loss": 1.534, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.7709435820579529, + "learning_rate": 0.0015, + "loss": 1.5384, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.5125923156738281, + "learning_rate": 0.0015, + "loss": 1.5391, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.5231842994689941, + "learning_rate": 0.0015, + "loss": 1.5292, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.4863499402999878, + "learning_rate": 0.0015, + "loss": 1.5369, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.47522854804992676, + "learning_rate": 0.0015, + "loss": 1.532, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.5916144847869873, + "learning_rate": 0.0015, + "loss": 1.5247, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.6398744583129883, + "learning_rate": 0.0015, + "loss": 1.5251, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.6109040975570679, + "learning_rate": 0.0015, + "loss": 1.541, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.5315248966217041, + "learning_rate": 0.0015, + "loss": 1.5444, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.7278485298156738, + "learning_rate": 0.0015, + "loss": 1.5283, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.4984261989593506, + "learning_rate": 0.0015, + "loss": 1.5282, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.6012659668922424, + "learning_rate": 0.0015, + "loss": 1.5306, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.612429141998291, + "learning_rate": 0.0015, + "loss": 1.5246, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.6125677824020386, + "learning_rate": 0.0015, + "loss": 1.5305, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.5053579807281494, + "learning_rate": 0.0015, + "loss": 1.5426, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.632543683052063, + "learning_rate": 0.0015, + "loss": 1.5334, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.6583037972450256, + "learning_rate": 0.0015, + "loss": 1.5465, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.5003215670585632, + "learning_rate": 0.0015, + "loss": 1.5416, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.8263731598854065, + "learning_rate": 0.0015, + "loss": 1.5368, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6069696545600891, + "learning_rate": 0.0015, + "loss": 1.5423, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.5096368789672852, + "learning_rate": 0.0015, + "loss": 1.5429, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.5679385662078857, + "learning_rate": 0.0015, + "loss": 1.516, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 1.0392451286315918, + "learning_rate": 0.0015, + "loss": 1.525, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.5319361686706543, + "learning_rate": 0.0015, + "loss": 1.5272, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.45275771617889404, + "learning_rate": 0.0015, + "loss": 1.5289, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 1.0679144859313965, + "learning_rate": 0.0015, + "loss": 1.5261, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.514147937297821, + "learning_rate": 0.0015, + "loss": 1.5268, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.6027361750602722, + "learning_rate": 0.0015, + "loss": 1.5008, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.5092279314994812, + "learning_rate": 0.0015, + "loss": 1.5128, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.5995198488235474, + "learning_rate": 0.0015, + "loss": 1.517, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.7631956934928894, + "learning_rate": 0.0015, + "loss": 1.5262, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.5148959755897522, + "learning_rate": 0.0015, + "loss": 1.5172, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.5230889320373535, + "learning_rate": 0.0015, + "loss": 1.5247, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.5816003084182739, + "learning_rate": 0.0015, + "loss": 1.5316, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.9210548400878906, + "learning_rate": 0.0015, + "loss": 1.543, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.5702227354049683, + "learning_rate": 0.0015, + "loss": 1.5244, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.4419424533843994, + "learning_rate": 0.0015, + "loss": 1.5363, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.4318900406360626, + "learning_rate": 0.0015, + "loss": 1.5411, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.478669673204422, + "learning_rate": 0.0015, + "loss": 1.5248, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.4842545688152313, + "learning_rate": 0.0015, + "loss": 1.5194, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.6825987100601196, + "learning_rate": 0.0015, + "loss": 1.5313, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.7300772070884705, + "learning_rate": 0.0015, + "loss": 1.5223, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.47134003043174744, + "learning_rate": 0.0015, + "loss": 1.526, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.5139616131782532, + "learning_rate": 0.0015, + "loss": 1.5195, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.5042129755020142, + "learning_rate": 0.0015, + "loss": 1.5151, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.1311372518539429, + "learning_rate": 0.0015, + "loss": 1.5218, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.7219382524490356, + "learning_rate": 0.0015, + "loss": 1.5309, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.565650224685669, + "learning_rate": 0.0015, + "loss": 1.5184, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.5066986680030823, + "learning_rate": 0.0015, + "loss": 1.5221, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5538550615310669, + "learning_rate": 0.0015, + "loss": 1.5211, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5575450658798218, + "learning_rate": 0.0015, + "loss": 1.505, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.5579926371574402, + "learning_rate": 0.0015, + "loss": 1.529, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.5696278214454651, + "learning_rate": 0.0015, + "loss": 1.5369, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.7768396139144897, + "learning_rate": 0.0015, + "loss": 1.521, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.9120811820030212, + "learning_rate": 0.0015, + "loss": 1.5236, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.4908900260925293, + "learning_rate": 0.0015, + "loss": 1.51, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.7604925036430359, + "learning_rate": 0.0015, + "loss": 1.529, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.49551641941070557, + "learning_rate": 0.0015, + "loss": 1.5081, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.6577268242835999, + "learning_rate": 0.0015, + "loss": 1.526, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.5374493598937988, + "learning_rate": 0.0015, + "loss": 1.5183, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.5176852345466614, + "learning_rate": 0.0015, + "loss": 1.5138, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.5398962497711182, + "learning_rate": 0.0015, + "loss": 1.5131, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6885660886764526, + "learning_rate": 0.0015, + "loss": 1.5078, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.6670694351196289, + "learning_rate": 0.0015, + "loss": 1.5249, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.49489378929138184, + "learning_rate": 0.0015, + "loss": 1.5181, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.4911707639694214, + "learning_rate": 0.0015, + "loss": 1.5163, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.524797260761261, + "learning_rate": 0.0015, + "loss": 1.5178, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.5137598514556885, + "learning_rate": 0.0015, + "loss": 1.5273, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.4799788296222687, + "learning_rate": 0.0015, + "loss": 1.522, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.4757637679576874, + "learning_rate": 0.0014834368975312174, + "loss": 1.498, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.680515468120575, + "learning_rate": 0.0014629899726345957, + "loss": 1.5224, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.6326233148574829, + "learning_rate": 0.0014428248775471316, + "loss": 1.5227, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.5564861297607422, + "learning_rate": 0.00142293772767289, + "loss": 1.5154, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.5272548198699951, + "learning_rate": 0.001403324691959192, + "loss": 1.5069, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.6546555161476135, + "learning_rate": 0.0013839819921586025, + "loss": 1.5155, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.4702341854572296, + "learning_rate": 0.0013649059021010894, + "loss": 1.5034, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.5010920763015747, + "learning_rate": 0.0013460927469762154, + "loss": 1.5067, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.564780592918396, + "learning_rate": 0.0013275389026252255, + "loss": 1.5091, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.482165664434433, + "learning_rate": 0.0013092407948428887, + "loss": 1.5, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.44305357336997986, + "learning_rate": 0.001291194898688966, + "loss": 1.5095, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.5847101211547852, + "learning_rate": 0.001273397737809166, + "loss": 1.5062, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.6253090500831604, + "learning_rate": 0.001255845883765463, + "loss": 1.5005, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.5310482978820801, + "learning_rate": 0.001238535955375642, + "loss": 1.4954, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.4950437545776367, + "learning_rate": 0.0012214646180619506, + "loss": 1.4917, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5149294137954712, + "learning_rate": 0.001204628583208727, + "loss": 1.4887, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.6041187644004822, + "learning_rate": 0.0011880246075288827, + "loss": 1.4972, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.4914081394672394, + "learning_rate": 0.001171649492439115, + "loss": 1.4875, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.6181724071502686, + "learning_rate": 0.0011555000834437364, + "loss": 1.4907, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.8684509992599487, + "learning_rate": 0.0011395732695269908, + "loss": 1.4908, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.7119426727294922, + "learning_rate": 0.0011238659825537505, + "loss": 1.4732, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5087348222732544, + "learning_rate": 0.0011083751966784717, + "loss": 1.4737, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.8575738668441772, + "learning_rate": 0.0010930979277622953, + "loss": 1.4907, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.4962826073169708, + "learning_rate": 0.0010780312327981854, + "loss": 1.4884, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.6311264038085938, + "learning_rate": 0.0010631722093439888, + "loss": 1.482, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.6116746664047241, + "learning_rate": 0.00104851799496331, + "loss": 1.4725, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.5390072464942932, + "learning_rate": 0.0010340657666740914, + "loss": 1.4821, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.4817228615283966, + "learning_rate": 0.0010198127404047975, + "loss": 1.4635, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.46289730072021484, + "learning_rate": 0.0010057561704580897, + "loss": 1.4678, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.4937571883201599, + "learning_rate": 0.0009918933489818985, + "loss": 1.4778, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.5377094149589539, + "learning_rate": 0.0009782216054477827, + "loss": 1.4709, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.49664416909217834, + "learning_rate": 0.0009647383061364801, + "loss": 1.4785, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.590204119682312, + "learning_rate": 0.0009514408536305495, + "loss": 1.4728, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.4623895585536957, + "learning_rate": 0.0009383266863140042, + "loss": 1.485, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.4693305194377899, + "learning_rate": 0.000925393277878844, + "loss": 1.4844, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.4652727544307709, + "learning_rate": 0.0009126381368383879, + "loss": 1.4676, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.46819135546684265, + "learning_rate": 0.0009000588060473156, + "loss": 1.46, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.4959070086479187, + "learning_rate": 0.0008876528622283235, + "loss": 1.4679, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.4959053695201874, + "learning_rate": 0.0008754179155053053, + "loss": 1.4645, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.5161389708518982, + "learning_rate": 0.0008633516089429683, + "loss": 1.4646, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.5014788508415222, + "learning_rate": 0.0008514516180927928, + "loss": 1.4604, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.523698091506958, + "learning_rate": 0.0008397156505452524, + "loss": 1.4589, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.4866982698440552, + "learning_rate": 0.0008281414454882051, + "loss": 1.4617, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.5741299986839294, + "learning_rate": 0.0008167267732713704, + "loss": 1.4661, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.45538657903671265, + "learning_rate": 0.0008054694349768117, + "loss": 1.4533, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.4680151641368866, + "learning_rate": 0.0007943672619953359, + "loss": 1.4581, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.49672621488571167, + "learning_rate": 0.0007834181156087356, + "loss": 1.4481, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.5573347806930542, + "learning_rate": 0.0007726198865777852, + "loss": 1.4574, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.4367733895778656, + "learning_rate": 0.0007619704947359191, + "loss": 1.4494, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.4344213306903839, + "learning_rate": 0.0007514678885885087, + "loss": 1.4513, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.5328627824783325, + "learning_rate": 0.0007411100449176633, + "loss": 1.4513, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.5584678649902344, + "learning_rate": 0.0007308949683924791, + "loss": 1.4539, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.5603947639465332, + "learning_rate": 0.000720820691184658, + "loss": 1.4487, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.4115470349788666, + "learning_rate": 0.0007108852725894269, + "loss": 1.4479, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.46082454919815063, + "learning_rate": 0.000701086798651681, + "loss": 1.447, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.44467219710350037, + "learning_rate": 0.0006914233817972798, + "loss": 1.4367, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.5105217695236206, + "learning_rate": 0.0006818931604694261, + "loss": 1.4451, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.4288281500339508, + "learning_rate": 0.0006724942987700563, + "loss": 1.4483, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.46505606174468994, + "learning_rate": 0.0006632249861061732, + "loss": 1.4471, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.45374882221221924, + "learning_rate": 0.0006540834368410549, + "loss": 1.4445, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.4485350549221039, + "learning_rate": 0.0006450678899502701, + "loss": 1.4483, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.7305462956428528, + "learning_rate": 0.0006361766086824345, + "loss": 1.4414, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.7582378387451172, + "learning_rate": 0.000627407880224645, + "loss": 1.4491, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.51595538854599, + "learning_rate": 0.0006187600153725225, + "loss": 1.4358, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.7243607044219971, + "learning_rate": 0.0006102313482048055, + "loss": 1.4363, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5584043860435486, + "learning_rate": 0.0006018202357624274, + "loss": 1.4404, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.8587073683738708, + "learning_rate": 0.0005935250577320168, + "loss": 1.4365, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.46687424182891846, + "learning_rate": 0.0005853442161337618, + "loss": 1.4277, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.4609874188899994, + "learning_rate": 0.0005772761350135759, + "loss": 1.4365, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.42891716957092285, + "learning_rate": 0.0005693192601395058, + "loss": 1.4314, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.5215572118759155, + "learning_rate": 0.000561472058702326, + "loss": 1.4267, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.4409372806549072, + "learning_rate": 0.000553733019020258, + "loss": 1.4355, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.45947423577308655, + "learning_rate": 0.0005461006502477612, + "loss": 1.4243, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.48462724685668945, + "learning_rate": 0.0005385734820883369, + "loss": 1.4232, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.5507255792617798, + "learning_rate": 0.0005311500645112907, + "loss": 1.4494, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.44307002425193787, + "learning_rate": 0.0005238289674723993, + "loss": 1.4274, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.4315953552722931, + "learning_rate": 0.0005166087806384274, + "loss": 1.4332, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.47853460907936096, + "learning_rate": 0.0005094881131154418, + "loss": 1.4365, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.44437679648399353, + "learning_rate": 0.0005024655931808696, + "loss": 1.4315, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.48510003089904785, + "learning_rate": 0.0004955398680192508, + "loss": 1.4223, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.4431687891483307, + "learning_rate": 0.000488709603461632, + "loss": 1.4171, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.4383626878261566, + "learning_rate": 0.000481973483728553, + "loss": 1.4189, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.4964883327484131, + "learning_rate": 0.0004753302111765748, + "loss": 1.4192, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.4469358026981354, + "learning_rate": 0.0004687785060483032, + "loss": 1.4332, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.4128322899341583, + "learning_rate": 0.0004623171062258558, + "loss": 1.4017, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.5924244523048401, + "learning_rate": 0.0004559447669877288, + "loss": 1.4194, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.4384585916996002, + "learning_rate": 0.00044966026076901413, + "loss": 1.4236, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.47069668769836426, + "learning_rate": 0.00044346237692492177, + "loss": 1.4272, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.45984041690826416, + "learning_rate": 0.0004373499214975615, + "loss": 1.4135, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.4670739471912384, + "learning_rate": 0.0004313217169859396, + "loss": 1.4184, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.5275494456291199, + "learning_rate": 0.0004253766021191256, + "loss": 1.425, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.459009051322937, + "learning_rate": 0.00041951343163254497, + "loss": 1.4232, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.5222692489624023, + "learning_rate": 0.00041373107604735626, + "loss": 1.4203, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.44252029061317444, + "learning_rate": 0.0004080284214528687, + "loss": 1.4156, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.595230221748352, + "learning_rate": 0.0004024043692919589, + "loss": 1.4238, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.4670724868774414, + "learning_rate": 0.0003968578361494449, + "loss": 1.4218, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.44240885972976685, + "learning_rate": 0.000391387753543378, + "loss": 1.4288, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.48701536655426025, + "learning_rate": 0.00038599306771921023, + "loss": 1.4099, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.6511821746826172, + "learning_rate": 0.0003806727394468004, + "loss": 1.4067, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.6622909307479858, + "learning_rate": 0.0003754257438202162, + "loss": 1.4164, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.5599359273910522, + "learning_rate": 0.0003702510700602974, + "loss": 1.4218, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.6296157836914062, + "learning_rate": 0.0003651477213199393, + "loss": 1.4009, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.48194336891174316, + "learning_rate": 0.000360114714492061, + "loss": 1.4026, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.4437825083732605, + "learning_rate": 0.0003551510800202195, + "loss": 1.4078, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.4649067223072052, + "learning_rate": 0.0003502558617118353, + "loss": 1.4127, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.4865807890892029, + "learning_rate": 0.0003454281165539914, + "loss": 1.4245, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.4343038499355316, + "learning_rate": 0.00034066691453177176, + "loss": 1.4223, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.4302985668182373, + "learning_rate": 0.0003359713384491037, + "loss": 1.4188, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.547858476638794, + "learning_rate": 0.00033134048375206944, + "loss": 1.4084, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.4173900783061981, + "learning_rate": 0.0003267734583546536, + "loss": 1.409, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.4922495186328888, + "learning_rate": 0.00032226938246689157, + "loss": 1.4061, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.48684826493263245, + "learning_rate": 0.0003178273884253874, + "loss": 1.4051, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.5038044452667236, + "learning_rate": 0.0003134466205261674, + "loss": 1.4208, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.4553186893463135, + "learning_rate": 0.0003091262348598378, + "loss": 1.4231, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.42974764108657837, + "learning_rate": 0.0003048653991490141, + "loss": 1.4037, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.4593367278575897, + "learning_rate": 0.00030066329258799187, + "loss": 1.3998, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.5465492606163025, + "learning_rate": 0.0002965191056846266, + "loss": 1.4055, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.7009845972061157, + "learning_rate": 0.000292432040104394, + "loss": 1.4044, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.5206081867218018, + "learning_rate": 0.00028840130851659853, + "loss": 1.3981, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.43879786133766174, + "learning_rate": 0.0002844261344427028, + "loss": 1.4068, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.43642282485961914, + "learning_rate": 0.0002805057521067471, + "loss": 1.4047, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.48048076033592224, + "learning_rate": 0.00027663940628783017, + "loss": 1.3936, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.4274750053882599, + "learning_rate": 0.00027282635217462393, + "loss": 1.405, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.4558883607387543, + "learning_rate": 0.0002690658552218937, + "loss": 1.4144, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.4264092743396759, + "learning_rate": 0.00026535719100899516, + "loss": 1.3914, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.5099205374717712, + "learning_rate": 0.00026169964510032245, + "loss": 1.3956, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.44471004605293274, + "learning_rate": 0.00025809251290767984, + "loss": 1.3827, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.4197140634059906, + "learning_rate": 0.00025453509955454957, + "loss": 1.3892, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.4443294107913971, + "learning_rate": 0.00025102671974223175, + "loss": 1.392, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.41616514325141907, + "learning_rate": 0.00024756669761782815, + "loss": 1.4034, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.4350857436656952, + "learning_rate": 0.0002441543666440464, + "loss": 1.3926, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.6285386085510254, + "learning_rate": 0.00024078906947079878, + "loss": 1.3982, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.3976116478443146, + "learning_rate": 0.00023747015780857005, + "loss": 1.4051, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.43629202246665955, + "learning_rate": 0.00023419699230353144, + "loss": 1.4026, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.4500335454940796, + "learning_rate": 0.00023096894241437586, + "loss": 1.4079, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.44755616784095764, + "learning_rate": 0.00022778538629085056, + "loss": 1.3917, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.4716295301914215, + "learning_rate": 0.00022464571065396427, + "loss": 1.3913, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.40633416175842285, + "learning_rate": 0.00022154931067784521, + "loss": 1.3893, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.5139057040214539, + "learning_rate": 0.00021849558987322782, + "loss": 1.3872, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.48982763290405273, + "learning_rate": 0.0002154839599725452, + "loss": 1.3852, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.4648490250110626, + "learning_rate": 0.00021251384081660544, + "loss": 1.4016, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.46889105439186096, + "learning_rate": 0.0002095846602428303, + "loss": 1.3948, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.4210684299468994, + "learning_rate": 0.00020669585397503358, + "loss": 1.389, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.4468730092048645, + "learning_rate": 0.0002038468655147195, + "loss": 1.392, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.5256270170211792, + "learning_rate": 0.00020103714603387894, + "loss": 1.4042, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.40575408935546875, + "learning_rate": 0.00019826615426926338, + "loss": 1.3776, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.4396148920059204, + "learning_rate": 0.00019553335641811625, + "loss": 1.4004, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.40318024158477783, + "learning_rate": 0.0001928382260353415, + "loss": 1.3912, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.4133901000022888, + "learning_rate": 0.00019018024393208902, + "loss": 1.4067, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.4269525408744812, + "learning_rate": 0.00018755889807573872, + "loss": 1.3841, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.3969095051288605, + "learning_rate": 0.00018497368349126262, + "loss": 1.3892, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.4360499978065491, + "learning_rate": 0.00018242410216394648, + "loss": 1.4075, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.4163929522037506, + "learning_rate": 0.0001799096629434529, + "loss": 1.3792, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.40780094265937805, + "learning_rate": 0.00017742988144920578, + "loss": 1.3891, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.42543014883995056, + "learning_rate": 0.00017498427997707976, + "loss": 1.3851, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.5237059593200684, + "learning_rate": 0.00017257238740737548, + "loss": 1.3897, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.46695801615715027, + "learning_rate": 0.00017019373911406307, + "loss": 1.3961, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.4350501596927643, + "learning_rate": 0.000167847876875277, + "loss": 1.3964, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.4048670828342438, + "learning_rate": 0.00016553434878504428, + "loss": 1.3802, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.4566376209259033, + "learning_rate": 0.00016325270916622947, + "loss": 1.3824, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.42859938740730286, + "learning_rate": 0.00016100251848467966, + "loss": 1.3904, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.5645683407783508, + "learning_rate": 0.0001587833432645528, + "loss": 1.3821, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.5286628603935242, + "learning_rate": 0.00015659475600481292, + "loss": 1.4003, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.4128339886665344, + "learning_rate": 0.00015443633509687688, + "loss": 1.3883, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.3946399986743927, + "learning_rate": 0.00015230766474339536, + "loss": 1.3856, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.4302919805049896, + "learning_rate": 0.00015020833487815416, + "loss": 1.3938, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.43755945563316345, + "learning_rate": 0.0001481379410870792, + "loss": 1.3839, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.4731377959251404, + "learning_rate": 0.00014609608453033013, + "loss": 1.3694, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.3955100476741791, + "learning_rate": 0.00014408237186546807, + "loss": 1.379, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.5021982789039612, + "learning_rate": 0.00014209641517168273, + "loss": 1.3686, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.43806201219558716, + "learning_rate": 0.00014013783187506265, + "loss": 1.3822, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.4424991011619568, + "learning_rate": 0.00013820624467489697, + "loss": 1.4, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.4218648672103882, + "learning_rate": 0.00013630128147099213, + "loss": 1.3986, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.4457917809486389, + "learning_rate": 0.00013442257529199068, + "loss": 1.3781, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.4868776798248291, + "learning_rate": 0.00013256976422467803, + "loss": 1.3866, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.39558809995651245, + "learning_rate": 0.00013074249134426366, + "loss": 1.3851, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.4246436655521393, + "learning_rate": 0.0001289404046456233, + "loss": 1.395, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.4155665338039398, + "learning_rate": 0.0001271631569754887, + "loss": 1.3873, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.41908028721809387, + "learning_rate": 0.0001254104059655723, + "loss": 1.3876, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.39711275696754456, + "learning_rate": 0.00012368181396661337, + "loss": 1.3755, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.4529458284378052, + "learning_rate": 0.00012197704798333364, + "loss": 1.3784, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.4224666357040405, + "learning_rate": 0.00012029577961028894, + "loss": 1.3757, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.45166996121406555, + "learning_rate": 0.00011863768496860542, + "loss": 1.3883, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.4248290956020355, + "learning_rate": 0.00011700244464358777, + "loss": 1.3829, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.40860509872436523, + "learning_rate": 0.00011538974362318715, + "loss": 1.3879, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.42114412784576416, + "learning_rate": 0.00011379927123731737, + "loss": 1.3823, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.40649139881134033, + "learning_rate": 0.0001122307210980077, + "loss": 1.3805, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.42806771397590637, + "learning_rate": 0.00011068379104038026, + "loss": 1.3943, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.4196814000606537, + "learning_rate": 0.00010915818306444116, + "loss": 1.3726, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.4241830110549927, + "learning_rate": 0.00010765360327767384, + "loss": 1.3823, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.43294644355773926, + "learning_rate": 0.00010616976183842376, + "loss": 1.3876, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.4089681804180145, + "learning_rate": 0.00010470637290006365, + "loss": 1.3886, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.42662033438682556, + "learning_rate": 0.00010326315455592764, + "loss": 1.3767, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.45557141304016113, + "learning_rate": 0.0001018398287850053, + "loss": 1.3683, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.4296424388885498, + "learning_rate": 0.00010043612139838357, + "loss": 1.3888, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.4354464113712311, + "learning_rate": 9.905176198642719e-05, + "loss": 1.3785, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.41208481788635254, + "learning_rate": 9.76864838666871e-05, + "loss": 1.3798, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.40287283062934875, + "learning_rate": 9.634002403252676e-05, + "loss": 1.3746, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.41079938411712646, + "learning_rate": 9.501212310245681e-05, + "loss": 1.3717, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.4065066874027252, + "learning_rate": 9.370252527016777e-05, + "loss": 1.3829, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.4285900592803955, + "learning_rate": 9.241097825525163e-05, + "loss": 1.3764, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.4585988521575928, + "learning_rate": 9.113723325460276e-05, + "loss": 1.3844, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.4628828465938568, + "learning_rate": 8.988104489448849e-05, + "loss": 1.3768, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.43181532621383667, + "learning_rate": 8.864217118328042e-05, + "loss": 1.3857, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.43050840497016907, + "learning_rate": 8.742037346483729e-05, + "loss": 1.3775, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.45058807730674744, + "learning_rate": 8.62154163725303e-05, + "loss": 1.3881, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.4476744532585144, + "learning_rate": 8.502706778390219e-05, + "loss": 1.3836, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.38526806235313416, + "learning_rate": 8.38550987759513e-05, + "loss": 1.3793, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.4117099642753601, + "learning_rate": 8.269928358103191e-05, + "loss": 1.3951, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.4176345765590668, + "learning_rate": 8.155939954336243e-05, + "loss": 1.3857, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.4403482675552368, + "learning_rate": 8.043522707613312e-05, + "loss": 1.3821, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.3984360694885254, + "learning_rate": 7.932654961920486e-05, + "loss": 1.3666, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.41802310943603516, + "learning_rate": 7.823315359739135e-05, + "loss": 1.3669, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.4352799952030182, + "learning_rate": 7.715482837931577e-05, + "loss": 1.3935, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.4808656871318817, + "learning_rate": 7.6091366236835e-05, + "loss": 1.3694, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.6241698265075684, + "learning_rate": 7.504256230502289e-05, + "loss": 1.3899, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.4258475601673126, + "learning_rate": 7.400821454270524e-05, + "loss": 1.383, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.391531378030777, + "learning_rate": 7.29881236935386e-05, + "loss": 1.3693, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.40577179193496704, + "learning_rate": 7.198209324762562e-05, + "loss": 1.3727, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.4294425845146179, + "learning_rate": 7.098992940365946e-05, + "loss": 1.3679, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.41527122259140015, + "learning_rate": 7.001144103159e-05, + "loss": 1.3798, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.4196541905403137, + "learning_rate": 6.904643963580461e-05, + "loss": 1.3858, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.39948469400405884, + "learning_rate": 6.809473931881644e-05, + "loss": 1.3744, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.3883092403411865, + "learning_rate": 6.71561567454532e-05, + "loss": 1.3719, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.5183804035186768, + "learning_rate": 6.623051110753948e-05, + "loss": 1.3942, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.4039570689201355, + "learning_rate": 6.531762408906607e-05, + "loss": 1.3797, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.3927137553691864, + "learning_rate": 6.441731983183912e-05, + "loss": 1.3762, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.4067143499851227, + "learning_rate": 6.352942490160292e-05, + "loss": 1.3733, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.4075525999069214, + "learning_rate": 6.265376825462966e-05, + "loss": 1.3816, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.44645389914512634, + "learning_rate": 6.179018120476945e-05, + "loss": 1.3761, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.4125687777996063, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.376, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.4031289517879486, + "learning_rate": 6.009855274515339e-05, + "loss": 1.3706, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.4718487560749054, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.3763, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.40616875886917114, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.3667, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.4215424954891205, + "learning_rate": 5.764754687033678e-05, + "loss": 1.3789, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.41651642322540283, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.3731, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.4115312099456787, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.3873, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.4101486802101135, + "learning_rate": 5.529650063720842e-05, + "loss": 1.3839, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.40822911262512207, + "learning_rate": 5.453432234876445e-05, + "loss": 1.3754, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.4204903244972229, + "learning_rate": 5.37826495305886e-05, + "loss": 1.3666, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.4433811902999878, + "learning_rate": 5.304133738072674e-05, + "loss": 1.3862, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.49682924151420593, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.3777, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.45827022194862366, + "learning_rate": 5.158922582999368e-05, + "loss": 1.3803, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.46369123458862305, + "learning_rate": 5.087814669492819e-05, + "loss": 1.373, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.4033257067203522, + "learning_rate": 5.017686870590028e-05, + "loss": 1.3736, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.405099093914032, + "learning_rate": 4.948525676899577e-05, + "loss": 1.3702, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.41203173995018005, + "learning_rate": 4.880317765236493e-05, + "loss": 1.3816, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.4495888650417328, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.3636, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.427905797958374, + "learning_rate": 4.746709410920699e-05, + "loss": 1.3674, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.49238380789756775, + "learning_rate": 4.681283230007507e-05, + "loss": 1.3683, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.3994861841201782, + "learning_rate": 4.616758849642509e-05, + "loss": 1.3769, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.41125601530075073, + "learning_rate": 4.553123839874615e-05, + "loss": 1.3825, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.4175708591938019, + "learning_rate": 4.490365942080736e-05, + "loss": 1.3785, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.3910943269729614, + "learning_rate": 4.428473066604285e-05, + "loss": 1.3742, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.4074828624725342, + "learning_rate": 4.367433290426233e-05, + "loss": 1.3754, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.42127725481987, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.3683, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.4069540202617645, + "learning_rate": 4.247866163327575e-05, + "loss": 1.382, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.41554608941078186, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.3706, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.38450121879577637, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.3681, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.47636377811431885, + "learning_rate": 4.074624971216005e-05, + "loss": 1.3567, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4643012285232544, + "learning_rate": 4.018462453681889e-05, + "loss": 1.3678, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.39813393354415894, + "learning_rate": 3.963074051164014e-05, + "loss": 1.376, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.39325088262557983, + "learning_rate": 3.908449093662446e-05, + "loss": 1.3689, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.39915817975997925, + "learning_rate": 3.854577058246998e-05, + "loss": 1.3668, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.42343389987945557, + "learning_rate": 3.801447567030094e-05, + "loss": 1.3843, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.39450734853744507, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.369, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.401849627494812, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.3879, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.39773768186569214, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.3732, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.4499163031578064, + "learning_rate": 3.596152451701616e-05, + "loss": 1.3719, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.4330551028251648, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.3714, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.4155457317829132, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.3739, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.38983967900276184, + "learning_rate": 3.449490171442838e-05, + "loss": 1.3767, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.4134129583835602, + "learning_rate": 3.401944187798702e-05, + "loss": 1.3765, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.3873719274997711, + "learning_rate": 3.355053553336137e-05, + "loss": 1.3664, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.4332026243209839, + "learning_rate": 3.308809235061882e-05, + "loss": 1.3679, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.4026283025741577, + "learning_rate": 3.263202324488772e-05, + "loss": 1.3738, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.39853334426879883, + "learning_rate": 3.218224035919609e-05, + "loss": 1.3718, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.40445834398269653, + "learning_rate": 3.173865704754688e-05, + "loss": 1.3713, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.39231348037719727, + "learning_rate": 3.130118785822657e-05, + "loss": 1.3728, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.41678184270858765, + "learning_rate": 3.08697485173437e-05, + "loss": 1.3771, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.4322419762611389, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.373, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.4527577757835388, + "learning_rate": 3.002462807725185e-05, + "loss": 1.3696, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.4193917512893677, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.3765, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.3905143141746521, + "learning_rate": 2.920264448124087e-05, + "loss": 1.371, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.48122960329055786, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.3686, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.40320464968681335, + "learning_rate": 2.84031643124288e-05, + "loss": 1.3676, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.43494367599487305, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.3892, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.38821348547935486, + "learning_rate": 2.762557149497405e-05, + "loss": 1.3635, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.3894213140010834, + "learning_rate": 2.724479494389592e-05, + "loss": 1.3751, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.3882591724395752, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.3764, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.402079313993454, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.3762, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.3952219784259796, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.3765, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.4140000641345978, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.3779, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.3977586030960083, + "learning_rate": 2.541820662891541e-05, + "loss": 1.3577, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.3956061601638794, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.3683, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.41328442096710205, + "learning_rate": 2.472233293365335e-05, + "loss": 1.3782, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.42529335618019104, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.3662, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.413034588098526, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.3834, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.4039822816848755, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.3703, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.4148334562778473, + "learning_rate": 2.338721674401494e-05, + "loss": 1.3787, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.4227692484855652, + "learning_rate": 2.30648594768459e-05, + "loss": 1.3752, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.40048426389694214, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.3696, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.40551966428756714, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.368, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.40273284912109375, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.371, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.396890789270401, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.3713, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.41630643606185913, + "learning_rate": 2.151850895764285e-05, + "loss": 1.3697, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.38820475339889526, + "learning_rate": 2.12219089895037e-05, + "loss": 1.3586, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.3982078731060028, + "learning_rate": 2.092939720151092e-05, + "loss": 1.3547, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.4186069667339325, + "learning_rate": 2.064091724430947e-05, + "loss": 1.3669, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.4142046570777893, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.3631, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.38553792238235474, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.3746, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.39433085918426514, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.3629, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.39306923747062683, + "learning_rate": 1.952621569669175e-05, + "loss": 1.3693, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.41047433018684387, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.3689, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.44673049449920654, + "learning_rate": 1.899164691024229e-05, + "loss": 1.3686, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.4185176193714142, + "learning_rate": 1.872987589815331e-05, + "loss": 1.3641, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.39824265241622925, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.3758, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.3979812562465668, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.3734, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.38724735379219055, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.3855, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.3822114169597626, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.377, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.43143442273139954, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.3738, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.42795488238334656, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.3594, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.41713598370552063, + "learning_rate": 1.699576850233916e-05, + "loss": 1.3648, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.4310470521450043, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.3777, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.39297470450401306, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.3532, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.409742534160614, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.3728, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.40209704637527466, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.3732, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.399766743183136, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.3689, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.3950158655643463, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.3781, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.41801828145980835, + "learning_rate": 1.542221360973268e-05, + "loss": 1.3632, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.3901999890804291, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.3744, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.1799888610839844, + "learning_rate": 1.5e-05, + "loss": 1.364, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.839757817279693e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-bloom/checkpoint-9480/training_args.bin b/saves-bloom/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..646c31afe650dbb33a57749516530f50f31c2238 --- /dev/null +++ b/saves-bloom/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0478e1b1ce9b7b26ee9e7df73313a27c50f1fc95751c99beda3039e0c89ac84 +size 5112 diff --git a/saves-bloom/config.json b/saves-bloom/config.json new file mode 100644 index 0000000000000000000000000000000000000000..57266dc701ccdcb97654ab407a925e296c45c5b8 --- /dev/null +++ b/saves-bloom/config.json @@ -0,0 +1,25 @@ +{ + "apply_residual_connection_post_layernorm": false, + "architectures": [ + "BloomForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "bloom", + "n_head": 8, + "n_layer": 2, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "slow_but_exact": false, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-bloom/generation_config.json b/saves-bloom/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-bloom/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-bloom/model.safetensors b/saves-bloom/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d5125393ea05cba92d59f823bab9d9d696a9361 --- /dev/null +++ b/saves-bloom/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb05b64ed9e1624ccea38fc90673c2a407026e35048f7394f6a82dab4a6aa657 +size 8373336 diff --git a/saves-bloom/result.log b/saves-bloom/result.log new file mode 100644 index 0000000000000000000000000000000000000000..dfe3c291b0bb50611e8d00cbd530f30854787b04 --- /dev/null +++ b/saves-bloom/result.log @@ -0,0 +1 @@ +{'train_runtime': 2849.7142, 'train_samples_per_second': 3406.168, 'train_steps_per_second': 3.327, 'train_loss': 1.6909881898622472, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-bloom/special_tokens_map.json b/saves-bloom/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-bloom/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-bloom/tokenizer.json b/saves-bloom/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-bloom/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-bloom/tokenizer_config.json b/saves-bloom/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-bloom/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-cohere-bf16/checkpoint-9480/config.json b/saves-cohere-bf16/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..259285235c35edc8a5995dd47b86f04d0d5e97d2 --- /dev/null +++ b/saves-cohere-bf16/checkpoint-9480/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "CohereForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 5, + "eos_token_id": 255001, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "layer_norm_eps": 1e-05, + "logit_scale": 0.0625, + "max_position_embeddings": 8192, + "model_type": "cohere", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "rope_theta": 10000.0, + "torch_dtype": "float32", + "transformers_version": "4.42.0", + "use_cache": true, + "use_qk_norm": false, + "vocab_size": 2000 +} diff --git a/saves-cohere-bf16/checkpoint-9480/generation_config.json b/saves-cohere-bf16/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7f9ed94d5f4f18cd4865c82b517f1cf872d5b322 --- /dev/null +++ b/saves-cohere-bf16/checkpoint-9480/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 5, + "eos_token_id": 255001, + "pad_token_id": 0, + "transformers_version": "4.42.0" +} diff --git a/saves-cohere-bf16/checkpoint-9480/model.safetensors b/saves-cohere-bf16/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0855996b8674a264ea883a50133d281d125133e6 --- /dev/null +++ b/saves-cohere-bf16/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3845c0f8d5ba9a1405908d2522f82f19083de962011880b88292d4f2aaad80b9 +size 8344440 diff --git a/saves-cohere-bf16/checkpoint-9480/optimizer.pt b/saves-cohere-bf16/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a286d745609bb704875e9c2c01b098d646a6b513 --- /dev/null +++ b/saves-cohere-bf16/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79fa246d0c6b8281c9408526d5d88c8285bde33d334a96e84a1780c4b7981ac8 +size 16700648 diff --git a/saves-cohere-bf16/checkpoint-9480/rng_state.pth b/saves-cohere-bf16/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-cohere-bf16/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-cohere-bf16/checkpoint-9480/scheduler.pt b/saves-cohere-bf16/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-cohere-bf16/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-cohere-bf16/checkpoint-9480/special_tokens_map.json b/saves-cohere-bf16/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-cohere-bf16/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-cohere-bf16/checkpoint-9480/tokenizer.json b/saves-cohere-bf16/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-cohere-bf16/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-cohere-bf16/checkpoint-9480/tokenizer_config.json b/saves-cohere-bf16/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-cohere-bf16/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-cohere-bf16/checkpoint-9480/trainer_state.json b/saves-cohere-bf16/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4ce1897dae3c0573733629c4b910c07a9dac455c --- /dev/null +++ b/saves-cohere-bf16/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 0.08000069111585617, + "learning_rate": 0.00015822784810126583, + "loss": 7.5887, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 0.07872951030731201, + "learning_rate": 0.00031645569620253165, + "loss": 7.5464, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.07734671235084534, + "learning_rate": 0.00047468354430379745, + "loss": 7.4957, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.08023999631404877, + "learning_rate": 0.0006329113924050633, + "loss": 7.4244, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.08475606143474579, + "learning_rate": 0.0007911392405063291, + "loss": 7.3265, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.10008183121681213, + "learning_rate": 0.0009493670886075949, + "loss": 7.2047, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.09628511965274811, + "learning_rate": 0.0011075949367088608, + "loss": 7.0673, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.21714860200881958, + "learning_rate": 0.0012658227848101266, + "loss": 6.9366, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.1056312844157219, + "learning_rate": 0.0014240506329113926, + "loss": 6.8047, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.10105545073747635, + "learning_rate": 0.0015, + "loss": 6.6619, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.10644520819187164, + "learning_rate": 0.0015, + "loss": 6.5215, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.12805885076522827, + "learning_rate": 0.0015, + "loss": 6.3894, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.11895852535963058, + "learning_rate": 0.0015, + "loss": 6.2576, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.10951970517635345, + "learning_rate": 0.0015, + "loss": 6.1227, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.10010507702827454, + "learning_rate": 0.0015, + "loss": 5.9806, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.10386305302381516, + "learning_rate": 0.0015, + "loss": 5.8499, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.17313894629478455, + "learning_rate": 0.0015, + "loss": 5.7197, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.1267988085746765, + "learning_rate": 0.0015, + "loss": 5.5993, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.18178296089172363, + "learning_rate": 0.0015, + "loss": 5.483, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.17508190870285034, + "learning_rate": 0.0015, + "loss": 5.3645, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.3925415575504303, + "learning_rate": 0.0015, + "loss": 5.2723, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.1316889524459839, + "learning_rate": 0.0015, + "loss": 5.1758, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.12996184825897217, + "learning_rate": 0.0015, + "loss": 5.0767, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.2191348820924759, + "learning_rate": 0.0015, + "loss": 4.9929, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.16052663326263428, + "learning_rate": 0.0015, + "loss": 4.9128, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.2407989203929901, + "learning_rate": 0.0015, + "loss": 4.8402, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.15928399562835693, + "learning_rate": 0.0015, + "loss": 4.772, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.18819557130336761, + "learning_rate": 0.0015, + "loss": 4.704, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.3014315366744995, + "learning_rate": 0.0015, + "loss": 4.6373, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.2249811440706253, + "learning_rate": 0.0015, + "loss": 4.5768, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.2288593202829361, + "learning_rate": 0.0015, + "loss": 4.5108, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.30202898383140564, + "learning_rate": 0.0015, + "loss": 4.4485, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.2222922146320343, + "learning_rate": 0.0015, + "loss": 4.3979, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.2275601029396057, + "learning_rate": 0.0015, + "loss": 4.3382, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.44312891364097595, + "learning_rate": 0.0015, + "loss": 4.2705, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.25240451097488403, + "learning_rate": 0.0015, + "loss": 4.2262, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.26719385385513306, + "learning_rate": 0.0015, + "loss": 4.1729, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.3257826864719391, + "learning_rate": 0.0015, + "loss": 4.1246, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.2367611527442932, + "learning_rate": 0.0015, + "loss": 4.0749, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.3027140200138092, + "learning_rate": 0.0015, + "loss": 4.0161, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.27771511673927307, + "learning_rate": 0.0015, + "loss": 3.9917, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.43055254220962524, + "learning_rate": 0.0015, + "loss": 3.9399, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.3237093389034271, + "learning_rate": 0.0015, + "loss": 3.8899, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.37645670771598816, + "learning_rate": 0.0015, + "loss": 3.8477, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.34314456582069397, + "learning_rate": 0.0015, + "loss": 3.8153, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.38104280829429626, + "learning_rate": 0.0015, + "loss": 3.7744, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.38231614232063293, + "learning_rate": 0.0015, + "loss": 3.7297, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.333865761756897, + "learning_rate": 0.0015, + "loss": 3.7088, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.4378509223461151, + "learning_rate": 0.0015, + "loss": 3.6681, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.29420173168182373, + "learning_rate": 0.0015, + "loss": 3.6326, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.2545853555202484, + "learning_rate": 0.0015, + "loss": 3.6121, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.4205600619316101, + "learning_rate": 0.0015, + "loss": 3.5755, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.4887816309928894, + "learning_rate": 0.0015, + "loss": 3.5506, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.3589875102043152, + "learning_rate": 0.0015, + "loss": 3.5253, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.37651869654655457, + "learning_rate": 0.0015, + "loss": 3.5005, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.4500958323478699, + "learning_rate": 0.0015, + "loss": 3.4524, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.3502822518348694, + "learning_rate": 0.0015, + "loss": 3.4423, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.5461118817329407, + "learning_rate": 0.0015, + "loss": 3.4262, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.7087445855140686, + "learning_rate": 0.0015, + "loss": 3.3883, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.38886040449142456, + "learning_rate": 0.0015, + "loss": 3.357, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.38216376304626465, + "learning_rate": 0.0015, + "loss": 3.3421, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.5458016395568848, + "learning_rate": 0.0015, + "loss": 3.3245, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.40753501653671265, + "learning_rate": 0.0015, + "loss": 3.2963, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.5089264512062073, + "learning_rate": 0.0015, + "loss": 3.2883, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.49709829688072205, + "learning_rate": 0.0015, + "loss": 3.2718, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.5032679438591003, + "learning_rate": 0.0015, + "loss": 3.2483, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.4408726990222931, + "learning_rate": 0.0015, + "loss": 3.2231, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.528001606464386, + "learning_rate": 0.0015, + "loss": 3.1996, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.3725454807281494, + "learning_rate": 0.0015, + "loss": 3.2004, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.43372929096221924, + "learning_rate": 0.0015, + "loss": 3.1707, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.4604400098323822, + "learning_rate": 0.0015, + "loss": 3.1517, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.463722825050354, + "learning_rate": 0.0015, + "loss": 3.1214, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.812790036201477, + "learning_rate": 0.0015, + "loss": 3.1103, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.5507075190544128, + "learning_rate": 0.0015, + "loss": 3.1113, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.7195579409599304, + "learning_rate": 0.0015, + "loss": 3.0831, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.46877631545066833, + "learning_rate": 0.0015, + "loss": 3.0686, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.5957846641540527, + "learning_rate": 0.0015, + "loss": 3.0481, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.5983896255493164, + "learning_rate": 0.0015, + "loss": 3.0327, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.6489009857177734, + "learning_rate": 0.0015, + "loss": 3.0334, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.622799813747406, + "learning_rate": 0.0015, + "loss": 3.0016, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.49533048272132874, + "learning_rate": 0.0015, + "loss": 2.9787, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.6578832268714905, + "learning_rate": 0.0015, + "loss": 2.9771, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.5393092036247253, + "learning_rate": 0.0015, + "loss": 2.9753, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.4226798117160797, + "learning_rate": 0.0015, + "loss": 2.9433, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.42221006751060486, + "learning_rate": 0.0015, + "loss": 2.9361, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.7529399991035461, + "learning_rate": 0.0015, + "loss": 2.9224, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.6478572487831116, + "learning_rate": 0.0015, + "loss": 2.9178, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.5442308187484741, + "learning_rate": 0.0015, + "loss": 2.8998, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.5076757073402405, + "learning_rate": 0.0015, + "loss": 2.8872, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.5085638761520386, + "learning_rate": 0.0015, + "loss": 2.8723, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.5724137425422668, + "learning_rate": 0.0015, + "loss": 2.8646, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.5052751302719116, + "learning_rate": 0.0015, + "loss": 2.851, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.45936843752861023, + "learning_rate": 0.0015, + "loss": 2.828, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.6637619733810425, + "learning_rate": 0.0015, + "loss": 2.8232, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.7366319894790649, + "learning_rate": 0.0015, + "loss": 2.8011, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.6104623675346375, + "learning_rate": 0.0015, + "loss": 2.8028, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.640235424041748, + "learning_rate": 0.0015, + "loss": 2.794, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.5153405666351318, + "learning_rate": 0.0015, + "loss": 2.7663, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.5124087929725647, + "learning_rate": 0.0015, + "loss": 2.7623, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.5819884538650513, + "learning_rate": 0.0015, + "loss": 2.7603, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.4700416922569275, + "learning_rate": 0.0015, + "loss": 2.7508, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.7776612639427185, + "learning_rate": 0.0015, + "loss": 2.7355, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.6290371417999268, + "learning_rate": 0.0015, + "loss": 2.722, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.6244022846221924, + "learning_rate": 0.0015, + "loss": 2.7161, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.6787243485450745, + "learning_rate": 0.0015, + "loss": 2.7142, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.5542663335800171, + "learning_rate": 0.0015, + "loss": 2.6974, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.39720186591148376, + "learning_rate": 0.0015, + "loss": 2.6765, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.4627390205860138, + "learning_rate": 0.0015, + "loss": 2.6721, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.5184245109558105, + "learning_rate": 0.0015, + "loss": 2.6663, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.6086922287940979, + "learning_rate": 0.0015, + "loss": 2.6502, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.48382633924484253, + "learning_rate": 0.0015, + "loss": 2.6494, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.4573609232902527, + "learning_rate": 0.0015, + "loss": 2.6403, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.6410148739814758, + "learning_rate": 0.0015, + "loss": 2.6144, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.4648679494857788, + "learning_rate": 0.0015, + "loss": 2.6281, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.6489707827568054, + "learning_rate": 0.0015, + "loss": 2.609, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.39584651589393616, + "learning_rate": 0.0015, + "loss": 2.6147, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.7939093708992004, + "learning_rate": 0.0015, + "loss": 2.5909, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.43265780806541443, + "learning_rate": 0.0015, + "loss": 2.5774, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.5087862014770508, + "learning_rate": 0.0015, + "loss": 2.5746, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.49960434436798096, + "learning_rate": 0.0015, + "loss": 2.5792, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.4003382623195648, + "learning_rate": 0.0015, + "loss": 2.571, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.5398605465888977, + "learning_rate": 0.0015, + "loss": 2.5531, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.399098664522171, + "learning_rate": 0.0015, + "loss": 2.5338, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.4298166036605835, + "learning_rate": 0.0015, + "loss": 2.5455, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.5648788809776306, + "learning_rate": 0.0015, + "loss": 2.5423, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.6773810386657715, + "learning_rate": 0.0015, + "loss": 2.5273, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.5596596598625183, + "learning_rate": 0.0015, + "loss": 2.5116, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.3979465067386627, + "learning_rate": 0.0015, + "loss": 2.521, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.7661407589912415, + "learning_rate": 0.0015, + "loss": 2.5034, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.5518456697463989, + "learning_rate": 0.0015, + "loss": 2.4884, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.42653220891952515, + "learning_rate": 0.0015, + "loss": 2.4881, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.4962894320487976, + "learning_rate": 0.0015, + "loss": 2.4886, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.7388845086097717, + "learning_rate": 0.0015, + "loss": 2.4763, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.4726186990737915, + "learning_rate": 0.0015, + "loss": 2.476, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.5966110825538635, + "learning_rate": 0.0015, + "loss": 2.4773, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.5548987984657288, + "learning_rate": 0.0015, + "loss": 2.4591, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.4708799123764038, + "learning_rate": 0.0015, + "loss": 2.4501, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.7105334997177124, + "learning_rate": 0.0015, + "loss": 2.4393, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.5323453545570374, + "learning_rate": 0.0015, + "loss": 2.4486, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.48286688327789307, + "learning_rate": 0.0015, + "loss": 2.4279, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6947178840637207, + "learning_rate": 0.0015, + "loss": 2.4243, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.5841905474662781, + "learning_rate": 0.0015, + "loss": 2.4296, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.4542180597782135, + "learning_rate": 0.0015, + "loss": 2.4216, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.4623643159866333, + "learning_rate": 0.0015, + "loss": 2.4138, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.4974883198738098, + "learning_rate": 0.0015, + "loss": 2.4089, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.46891847252845764, + "learning_rate": 0.0015, + "loss": 2.4007, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.5188739895820618, + "learning_rate": 0.0015, + "loss": 2.3914, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.4357692301273346, + "learning_rate": 0.0015, + "loss": 2.3912, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.43899163603782654, + "learning_rate": 0.0015, + "loss": 2.3869, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.4089808464050293, + "learning_rate": 0.0015, + "loss": 2.3873, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.48731446266174316, + "learning_rate": 0.0015, + "loss": 2.3791, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.45719215273857117, + "learning_rate": 0.0015, + "loss": 2.381, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.4243105351924896, + "learning_rate": 0.0015, + "loss": 2.3586, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.4893573224544525, + "learning_rate": 0.0015, + "loss": 2.3463, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.49471256136894226, + "learning_rate": 0.0015, + "loss": 2.3453, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.4878668487071991, + "learning_rate": 0.0015, + "loss": 2.3488, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.4358295798301697, + "learning_rate": 0.0015, + "loss": 2.3381, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.5876789689064026, + "learning_rate": 0.0015, + "loss": 2.3466, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.6470997333526611, + "learning_rate": 0.0015, + "loss": 2.3396, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.4370270371437073, + "learning_rate": 0.0015, + "loss": 2.3365, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.41289031505584717, + "learning_rate": 0.0015, + "loss": 2.3341, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.46754589676856995, + "learning_rate": 0.0015, + "loss": 2.3106, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.4858943819999695, + "learning_rate": 0.0015, + "loss": 2.3185, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.7317027449607849, + "learning_rate": 0.0015, + "loss": 2.3148, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.8671702146530151, + "learning_rate": 0.0015, + "loss": 2.3071, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.5114136338233948, + "learning_rate": 0.0015, + "loss": 2.3042, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.47680553793907166, + "learning_rate": 0.0015, + "loss": 2.3064, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.575048565864563, + "learning_rate": 0.0015, + "loss": 2.296, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.42194151878356934, + "learning_rate": 0.0015, + "loss": 2.2794, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.4296809732913971, + "learning_rate": 0.0015, + "loss": 2.2959, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.4321148991584778, + "learning_rate": 0.0015, + "loss": 2.2824, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.40020737051963806, + "learning_rate": 0.0015, + "loss": 2.2751, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.418253093957901, + "learning_rate": 0.0015, + "loss": 2.271, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.4415479898452759, + "learning_rate": 0.0015, + "loss": 2.2784, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.4157296121120453, + "learning_rate": 0.0015, + "loss": 2.2925, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.5439921021461487, + "learning_rate": 0.0015, + "loss": 2.2681, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.5451791882514954, + "learning_rate": 0.0015, + "loss": 2.2698, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.3975878655910492, + "learning_rate": 0.0015, + "loss": 2.266, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.43305620551109314, + "learning_rate": 0.0015, + "loss": 2.2611, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.3810420632362366, + "learning_rate": 0.0015, + "loss": 2.2613, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.4975600838661194, + "learning_rate": 0.0015, + "loss": 2.2484, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.4414883852005005, + "learning_rate": 0.0015, + "loss": 2.2422, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.4077140688896179, + "learning_rate": 0.0015, + "loss": 2.2372, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.4983878433704376, + "learning_rate": 0.0015, + "loss": 2.2342, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.618269145488739, + "learning_rate": 0.0015, + "loss": 2.2358, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.43015170097351074, + "learning_rate": 0.0015, + "loss": 2.2379, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.6842490434646606, + "learning_rate": 0.0015, + "loss": 2.2386, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.5212199091911316, + "learning_rate": 0.0015, + "loss": 2.2278, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.6083729267120361, + "learning_rate": 0.0015, + "loss": 2.2227, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.7251046299934387, + "learning_rate": 0.0015, + "loss": 2.2223, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.6426624655723572, + "learning_rate": 0.0015, + "loss": 2.2201, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.3915857672691345, + "learning_rate": 0.0015, + "loss": 2.2127, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.6851569414138794, + "learning_rate": 0.0015, + "loss": 2.2181, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.4217659533023834, + "learning_rate": 0.0015, + "loss": 2.2031, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.375531941652298, + "learning_rate": 0.0015, + "loss": 2.2003, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.4148560166358948, + "learning_rate": 0.0015, + "loss": 2.2086, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.3881523311138153, + "learning_rate": 0.0015, + "loss": 2.208, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.46804359555244446, + "learning_rate": 0.0015, + "loss": 2.1918, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.4381694793701172, + "learning_rate": 0.0015, + "loss": 2.1896, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.43603023886680603, + "learning_rate": 0.0015, + "loss": 2.1871, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.43583643436431885, + "learning_rate": 0.0015, + "loss": 2.1919, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.48757272958755493, + "learning_rate": 0.0015, + "loss": 2.1995, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.4318925440311432, + "learning_rate": 0.0015, + "loss": 2.1852, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.4255896210670471, + "learning_rate": 0.0015, + "loss": 2.1781, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.4490632712841034, + "learning_rate": 0.0015, + "loss": 2.1808, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.3975178897380829, + "learning_rate": 0.0015, + "loss": 2.1692, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.3843049108982086, + "learning_rate": 0.0015, + "loss": 2.169, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.42467179894447327, + "learning_rate": 0.0015, + "loss": 2.1744, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.432029128074646, + "learning_rate": 0.0015, + "loss": 2.1619, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.357745885848999, + "learning_rate": 0.0015, + "loss": 2.1675, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.5511990189552307, + "learning_rate": 0.0015, + "loss": 2.1607, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.44558560848236084, + "learning_rate": 0.0015, + "loss": 2.1569, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.4494480788707733, + "learning_rate": 0.0015, + "loss": 2.157, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.4433281719684601, + "learning_rate": 0.0015, + "loss": 2.15, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.37084531784057617, + "learning_rate": 0.0015, + "loss": 2.1501, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.45150014758110046, + "learning_rate": 0.0015, + "loss": 2.1628, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.4242697060108185, + "learning_rate": 0.0015, + "loss": 2.1454, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.3653409481048584, + "learning_rate": 0.0015, + "loss": 2.1307, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.41159456968307495, + "learning_rate": 0.0015, + "loss": 2.1424, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.39554381370544434, + "learning_rate": 0.0015, + "loss": 2.1415, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.41495802998542786, + "learning_rate": 0.0015, + "loss": 2.1285, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.49099722504615784, + "learning_rate": 0.0015, + "loss": 2.1425, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.3911990821361542, + "learning_rate": 0.0015, + "loss": 2.131, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.42251017689704895, + "learning_rate": 0.0015, + "loss": 2.1258, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.5026391744613647, + "learning_rate": 0.0015, + "loss": 2.1245, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.38387489318847656, + "learning_rate": 0.0015, + "loss": 2.1174, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.5047527551651001, + "learning_rate": 0.0015, + "loss": 2.1458, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.5866949558258057, + "learning_rate": 0.0015, + "loss": 2.1278, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.4067322313785553, + "learning_rate": 0.0015, + "loss": 2.1112, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.4284001290798187, + "learning_rate": 0.0015, + "loss": 2.1127, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.5524792671203613, + "learning_rate": 0.0015, + "loss": 2.1171, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.4818218946456909, + "learning_rate": 0.0015, + "loss": 2.1173, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.4074719548225403, + "learning_rate": 0.0015, + "loss": 2.1013, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.4235811233520508, + "learning_rate": 0.0015, + "loss": 2.0984, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.6257115006446838, + "learning_rate": 0.0015, + "loss": 2.0958, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.6080753803253174, + "learning_rate": 0.0015, + "loss": 2.1054, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.42847368121147156, + "learning_rate": 0.0015, + "loss": 2.1013, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.39735880494117737, + "learning_rate": 0.0015, + "loss": 2.0987, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.44733938574790955, + "learning_rate": 0.0015, + "loss": 2.0958, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.4103754758834839, + "learning_rate": 0.0015, + "loss": 2.0909, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.38021695613861084, + "learning_rate": 0.0015, + "loss": 2.0945, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.5292344093322754, + "learning_rate": 0.0015, + "loss": 2.0937, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.5250607132911682, + "learning_rate": 0.0015, + "loss": 2.0874, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.5082991123199463, + "learning_rate": 0.0015, + "loss": 2.0963, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.3772115409374237, + "learning_rate": 0.0015, + "loss": 2.0782, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.4259469509124756, + "learning_rate": 0.0015, + "loss": 2.0904, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.6896575689315796, + "learning_rate": 0.0015, + "loss": 2.0818, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.563823938369751, + "learning_rate": 0.0015, + "loss": 2.0804, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.40345773100852966, + "learning_rate": 0.0015, + "loss": 2.0733, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.4209953844547272, + "learning_rate": 0.0015, + "loss": 2.0775, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.45934030413627625, + "learning_rate": 0.0015, + "loss": 2.0702, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.5836525559425354, + "learning_rate": 0.0015, + "loss": 2.072, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.4129602015018463, + "learning_rate": 0.0015, + "loss": 2.07, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.5341653823852539, + "learning_rate": 0.0015, + "loss": 2.0709, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.3971026539802551, + "learning_rate": 0.0015, + "loss": 2.0601, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.4653588831424713, + "learning_rate": 0.0015, + "loss": 2.0669, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.4160066545009613, + "learning_rate": 0.0015, + "loss": 2.0639, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.3685113191604614, + "learning_rate": 0.0015, + "loss": 2.061, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.43481722474098206, + "learning_rate": 0.0015, + "loss": 2.0654, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.4718077778816223, + "learning_rate": 0.0015, + "loss": 2.0626, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.43581950664520264, + "learning_rate": 0.0015, + "loss": 2.0605, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.3943164050579071, + "learning_rate": 0.0015, + "loss": 2.059, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.4394289255142212, + "learning_rate": 0.0015, + "loss": 2.0455, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.42878061532974243, + "learning_rate": 0.0015, + "loss": 2.0585, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.4528396427631378, + "learning_rate": 0.0015, + "loss": 2.0469, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.4021029770374298, + "learning_rate": 0.0015, + "loss": 2.0472, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.46599486470222473, + "learning_rate": 0.0015, + "loss": 2.0435, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.3904265761375427, + "learning_rate": 0.0015, + "loss": 2.0428, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.4055798351764679, + "learning_rate": 0.0015, + "loss": 2.0376, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.41211187839508057, + "learning_rate": 0.0015, + "loss": 2.0372, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.5562853813171387, + "learning_rate": 0.0015, + "loss": 2.043, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.38862988352775574, + "learning_rate": 0.0015, + "loss": 2.0409, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.4492087960243225, + "learning_rate": 0.0015, + "loss": 2.0228, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.8009628057479858, + "learning_rate": 0.0015, + "loss": 2.0375, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.5228288173675537, + "learning_rate": 0.0015, + "loss": 2.0364, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.5325400233268738, + "learning_rate": 0.0015, + "loss": 2.0416, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.3710283935070038, + "learning_rate": 0.0015, + "loss": 2.0345, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.4476848244667053, + "learning_rate": 0.0015, + "loss": 2.0345, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.34534865617752075, + "learning_rate": 0.0015, + "loss": 2.0275, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.4192986786365509, + "learning_rate": 0.0015, + "loss": 2.0254, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.42185741662979126, + "learning_rate": 0.0015, + "loss": 2.0255, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.40017443895339966, + "learning_rate": 0.0015, + "loss": 2.0309, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 1.0271852016448975, + "learning_rate": 0.0015, + "loss": 2.0036, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.5703707933425903, + "learning_rate": 0.0015, + "loss": 2.0065, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.5895113945007324, + "learning_rate": 0.0015, + "loss": 2.0161, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.38705241680145264, + "learning_rate": 0.0015, + "loss": 2.0152, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.5010685920715332, + "learning_rate": 0.0015, + "loss": 2.0153, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.48778650164604187, + "learning_rate": 0.0015, + "loss": 2.0003, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.4624802768230438, + "learning_rate": 0.0015, + "loss": 2.0039, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.4715951979160309, + "learning_rate": 0.0015, + "loss": 2.0105, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.38887953758239746, + "learning_rate": 0.0015, + "loss": 2.0085, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.406602144241333, + "learning_rate": 0.0015, + "loss": 2.0014, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.4194795489311218, + "learning_rate": 0.0015, + "loss": 1.9979, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.4231756031513214, + "learning_rate": 0.0015, + "loss": 2.0063, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.5089144706726074, + "learning_rate": 0.0015, + "loss": 2.0136, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.4470892548561096, + "learning_rate": 0.0015, + "loss": 1.9944, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.43263986706733704, + "learning_rate": 0.0015, + "loss": 1.9883, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.3462584316730499, + "learning_rate": 0.0015, + "loss": 1.9908, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.41245928406715393, + "learning_rate": 0.0015, + "loss": 2.0089, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.36029165983200073, + "learning_rate": 0.0015, + "loss": 1.9836, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.41074708104133606, + "learning_rate": 0.0015, + "loss": 1.9936, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.5694265961647034, + "learning_rate": 0.0015, + "loss": 1.9922, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.4703502655029297, + "learning_rate": 0.0015, + "loss": 1.9798, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.3536970913410187, + "learning_rate": 0.0015, + "loss": 1.9869, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.3691686689853668, + "learning_rate": 0.0015, + "loss": 1.9823, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.37487566471099854, + "learning_rate": 0.0015, + "loss": 1.9909, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.3542984127998352, + "learning_rate": 0.0015, + "loss": 1.9863, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.43098732829093933, + "learning_rate": 0.0015, + "loss": 1.9696, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.4170604646205902, + "learning_rate": 0.0015, + "loss": 1.983, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.5045300126075745, + "learning_rate": 0.0015, + "loss": 1.9706, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.504026472568512, + "learning_rate": 0.0015, + "loss": 1.9728, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.3979000449180603, + "learning_rate": 0.0015, + "loss": 1.9751, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.3737519681453705, + "learning_rate": 0.0015, + "loss": 1.9864, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.5472099781036377, + "learning_rate": 0.0015, + "loss": 1.9619, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.4074109196662903, + "learning_rate": 0.0015, + "loss": 1.9723, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.40785884857177734, + "learning_rate": 0.0015, + "loss": 1.9678, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.40700992941856384, + "learning_rate": 0.0015, + "loss": 1.9736, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.3770516514778137, + "learning_rate": 0.0015, + "loss": 1.9664, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.6191193461418152, + "learning_rate": 0.0015, + "loss": 1.9644, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.42728057503700256, + "learning_rate": 0.0015, + "loss": 1.9606, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.6794872283935547, + "learning_rate": 0.0015, + "loss": 1.9739, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.4524136483669281, + "learning_rate": 0.0015, + "loss": 1.9636, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.6325470209121704, + "learning_rate": 0.0015, + "loss": 1.9601, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.34216609597206116, + "learning_rate": 0.0015, + "loss": 1.9399, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.39196503162384033, + "learning_rate": 0.0015, + "loss": 1.9611, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.4126991331577301, + "learning_rate": 0.0015, + "loss": 1.9502, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.38933661580085754, + "learning_rate": 0.0015, + "loss": 1.9683, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.45866304636001587, + "learning_rate": 0.0015, + "loss": 1.9669, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.6163985133171082, + "learning_rate": 0.0015, + "loss": 1.9498, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.39485007524490356, + "learning_rate": 0.0015, + "loss": 1.9597, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.3309183716773987, + "learning_rate": 0.0015, + "loss": 1.961, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.38850241899490356, + "learning_rate": 0.0015, + "loss": 1.9595, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.4465595781803131, + "learning_rate": 0.0015, + "loss": 1.9516, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.344111829996109, + "learning_rate": 0.0015, + "loss": 1.9541, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.40312331914901733, + "learning_rate": 0.0015, + "loss": 1.9401, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.3763878345489502, + "learning_rate": 0.0015, + "loss": 1.9389, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.3751490116119385, + "learning_rate": 0.0015, + "loss": 1.9512, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.3285217583179474, + "learning_rate": 0.0015, + "loss": 1.9439, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.36917683482170105, + "learning_rate": 0.0015, + "loss": 1.938, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.517072319984436, + "learning_rate": 0.0015, + "loss": 1.9517, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.3615550994873047, + "learning_rate": 0.0015, + "loss": 1.9434, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.46556949615478516, + "learning_rate": 0.0015, + "loss": 1.9236, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.3592410981655121, + "learning_rate": 0.0015, + "loss": 1.9376, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.3330400288105011, + "learning_rate": 0.0015, + "loss": 1.9317, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.41959577798843384, + "learning_rate": 0.0015, + "loss": 1.9335, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.43094608187675476, + "learning_rate": 0.0015, + "loss": 1.9254, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.40564459562301636, + "learning_rate": 0.0015, + "loss": 1.9422, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.48330584168434143, + "learning_rate": 0.0015, + "loss": 1.9441, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.34685322642326355, + "learning_rate": 0.0015, + "loss": 1.9377, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.35264283418655396, + "learning_rate": 0.0015, + "loss": 1.9185, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.4105161130428314, + "learning_rate": 0.0015, + "loss": 1.9336, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.3324391543865204, + "learning_rate": 0.0015, + "loss": 1.9297, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.43434691429138184, + "learning_rate": 0.0015, + "loss": 1.9406, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.595516562461853, + "learning_rate": 0.0015, + "loss": 1.9381, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.44567352533340454, + "learning_rate": 0.0015, + "loss": 1.9256, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.48279255628585815, + "learning_rate": 0.0015, + "loss": 1.9252, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.5019918084144592, + "learning_rate": 0.0015, + "loss": 1.923, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.33855754137039185, + "learning_rate": 0.0015, + "loss": 1.9192, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.36431026458740234, + "learning_rate": 0.0015, + "loss": 1.9305, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.4728095829486847, + "learning_rate": 0.0015, + "loss": 1.9109, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.38402220606803894, + "learning_rate": 0.0015, + "loss": 1.9179, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.3763788044452667, + "learning_rate": 0.0015, + "loss": 1.9174, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.3789425492286682, + "learning_rate": 0.0015, + "loss": 1.9161, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.4420619308948517, + "learning_rate": 0.0015, + "loss": 1.916, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.47589001059532166, + "learning_rate": 0.0015, + "loss": 1.9235, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.5456047654151917, + "learning_rate": 0.0015, + "loss": 1.9165, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.42406487464904785, + "learning_rate": 0.0015, + "loss": 1.9096, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.45331519842147827, + "learning_rate": 0.0015, + "loss": 1.9109, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.3964917063713074, + "learning_rate": 0.0015, + "loss": 1.9232, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.4842318594455719, + "learning_rate": 0.0015, + "loss": 1.9242, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.32077768445014954, + "learning_rate": 0.0015, + "loss": 1.9126, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.38983920216560364, + "learning_rate": 0.0015, + "loss": 1.8915, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.43065422773361206, + "learning_rate": 0.0015, + "loss": 1.9107, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.3556627035140991, + "learning_rate": 0.0015, + "loss": 1.9111, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.39901190996170044, + "learning_rate": 0.0015, + "loss": 1.9104, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.5182483792304993, + "learning_rate": 0.0015, + "loss": 1.9053, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.38392820954322815, + "learning_rate": 0.0015, + "loss": 1.9156, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.3845175802707672, + "learning_rate": 0.0015, + "loss": 1.8999, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.4916810691356659, + "learning_rate": 0.0015, + "loss": 1.9056, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.4338628351688385, + "learning_rate": 0.0015, + "loss": 1.908, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.38228607177734375, + "learning_rate": 0.0015, + "loss": 1.9014, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.3408377766609192, + "learning_rate": 0.0015, + "loss": 1.8864, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.4755379557609558, + "learning_rate": 0.0015, + "loss": 1.8895, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.38737741112709045, + "learning_rate": 0.0015, + "loss": 1.9104, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.5964657068252563, + "learning_rate": 0.0015, + "loss": 1.8888, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.5963073968887329, + "learning_rate": 0.0015, + "loss": 1.9074, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.3991047143936157, + "learning_rate": 0.0015, + "loss": 1.8924, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.3681778609752655, + "learning_rate": 0.0015, + "loss": 1.8804, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.44637757539749146, + "learning_rate": 0.0015, + "loss": 1.8849, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.6122761368751526, + "learning_rate": 0.0015, + "loss": 1.8876, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.41687506437301636, + "learning_rate": 0.0015, + "loss": 1.8859, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.35810354351997375, + "learning_rate": 0.0015, + "loss": 1.8841, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.31421080231666565, + "learning_rate": 0.0015, + "loss": 1.8868, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.47998908162117004, + "learning_rate": 0.0015, + "loss": 1.8815, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.45613759756088257, + "learning_rate": 0.0015, + "loss": 1.8862, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.3200185298919678, + "learning_rate": 0.0015, + "loss": 1.8875, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.3921988904476166, + "learning_rate": 0.0015, + "loss": 1.8733, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.37021327018737793, + "learning_rate": 0.0015, + "loss": 1.8852, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.3745802342891693, + "learning_rate": 0.0015, + "loss": 1.8753, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.3210016191005707, + "learning_rate": 0.0015, + "loss": 1.8905, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.3362002670764923, + "learning_rate": 0.0015, + "loss": 1.8762, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.3748522698879242, + "learning_rate": 0.0015, + "loss": 1.8782, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.4886162579059601, + "learning_rate": 0.0015, + "loss": 1.8709, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.36939969658851624, + "learning_rate": 0.0015, + "loss": 1.8798, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.35972991585731506, + "learning_rate": 0.0015, + "loss": 1.8793, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.37946438789367676, + "learning_rate": 0.0015, + "loss": 1.8841, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.36330369114875793, + "learning_rate": 0.0015, + "loss": 1.8775, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.4276699125766754, + "learning_rate": 0.0015, + "loss": 1.8858, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.38324111700057983, + "learning_rate": 0.0015, + "loss": 1.8774, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.42404502630233765, + "learning_rate": 0.0015, + "loss": 1.8764, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.5729767680168152, + "learning_rate": 0.0015, + "loss": 1.87, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.47413209080696106, + "learning_rate": 0.0015, + "loss": 1.8695, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.4307495355606079, + "learning_rate": 0.0015, + "loss": 1.8681, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.37268444895744324, + "learning_rate": 0.0015, + "loss": 1.8679, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.3241785168647766, + "learning_rate": 0.0015, + "loss": 1.8748, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.35492029786109924, + "learning_rate": 0.0015, + "loss": 1.8654, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.31254345178604126, + "learning_rate": 0.0015, + "loss": 1.8669, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.3164255917072296, + "learning_rate": 0.0015, + "loss": 1.8599, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.3364178538322449, + "learning_rate": 0.0015, + "loss": 1.8705, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.4431174099445343, + "learning_rate": 0.0015, + "loss": 1.8666, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.45099061727523804, + "learning_rate": 0.0015, + "loss": 1.857, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.32387441396713257, + "learning_rate": 0.0015, + "loss": 1.8637, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.3952908515930176, + "learning_rate": 0.0015, + "loss": 1.8636, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.38458308577537537, + "learning_rate": 0.0015, + "loss": 1.864, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.40344688296318054, + "learning_rate": 0.0015, + "loss": 1.8651, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.38940712809562683, + "learning_rate": 0.0015, + "loss": 1.869, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.39709579944610596, + "learning_rate": 0.0015, + "loss": 1.8588, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.331944078207016, + "learning_rate": 0.0015, + "loss": 1.8519, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.33285272121429443, + "learning_rate": 0.0015, + "loss": 1.8629, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.4516183137893677, + "learning_rate": 0.0015, + "loss": 1.8609, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.34201303124427795, + "learning_rate": 0.0015, + "loss": 1.8579, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.4359893202781677, + "learning_rate": 0.0015, + "loss": 1.8483, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.3476506769657135, + "learning_rate": 0.0015, + "loss": 1.859, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.43458154797554016, + "learning_rate": 0.0015, + "loss": 1.8486, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.36237064003944397, + "learning_rate": 0.0015, + "loss": 1.8542, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.38242867588996887, + "learning_rate": 0.0015, + "loss": 1.8674, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.3427712321281433, + "learning_rate": 0.0015, + "loss": 1.8416, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.4015636742115021, + "learning_rate": 0.0015, + "loss": 1.8489, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.34853026270866394, + "learning_rate": 0.0015, + "loss": 1.8414, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.5962074995040894, + "learning_rate": 0.0015, + "loss": 1.8505, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.5702231526374817, + "learning_rate": 0.0015, + "loss": 1.8636, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.3360097408294678, + "learning_rate": 0.0015, + "loss": 1.8478, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.5092244744300842, + "learning_rate": 0.0015, + "loss": 1.8496, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.3331068456172943, + "learning_rate": 0.0015, + "loss": 1.8475, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.38931623101234436, + "learning_rate": 0.0015, + "loss": 1.8511, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.3346966505050659, + "learning_rate": 0.0015, + "loss": 1.8379, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.3365168273448944, + "learning_rate": 0.0015, + "loss": 1.8518, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.3916284739971161, + "learning_rate": 0.0015, + "loss": 1.8553, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.42281103134155273, + "learning_rate": 0.0015, + "loss": 1.8427, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.3276952803134918, + "learning_rate": 0.0015, + "loss": 1.8412, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.3229634463787079, + "learning_rate": 0.0015, + "loss": 1.8458, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.41583219170570374, + "learning_rate": 0.0015, + "loss": 1.8278, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.42357075214385986, + "learning_rate": 0.0015, + "loss": 1.8453, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.3458639085292816, + "learning_rate": 0.0015, + "loss": 1.8467, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.35474738478660583, + "learning_rate": 0.0015, + "loss": 1.8425, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.4535824656486511, + "learning_rate": 0.0015, + "loss": 1.8337, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.4138074815273285, + "learning_rate": 0.0015, + "loss": 1.8174, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.4137820601463318, + "learning_rate": 0.0015, + "loss": 1.841, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.39168211817741394, + "learning_rate": 0.0015, + "loss": 1.8253, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.5041007399559021, + "learning_rate": 0.0015, + "loss": 1.8431, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.5302157998085022, + "learning_rate": 0.0015, + "loss": 1.8431, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.44530072808265686, + "learning_rate": 0.0015, + "loss": 1.8221, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.366564005613327, + "learning_rate": 0.0015, + "loss": 1.84, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.39872315526008606, + "learning_rate": 0.0015, + "loss": 1.8403, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.35998818278312683, + "learning_rate": 0.0015, + "loss": 1.8392, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.3405250608921051, + "learning_rate": 0.0015, + "loss": 1.8291, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.406040221452713, + "learning_rate": 0.0015, + "loss": 1.8288, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.3696703314781189, + "learning_rate": 0.0015, + "loss": 1.826, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.4865450859069824, + "learning_rate": 0.0015, + "loss": 1.8207, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.3739243745803833, + "learning_rate": 0.0015, + "loss": 1.8354, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.3547900319099426, + "learning_rate": 0.0015, + "loss": 1.8347, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.4278034567832947, + "learning_rate": 0.0015, + "loss": 1.828, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.32390764355659485, + "learning_rate": 0.0015, + "loss": 1.8265, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.3499670624732971, + "learning_rate": 0.0015, + "loss": 1.8229, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.355530709028244, + "learning_rate": 0.0015, + "loss": 1.8397, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.3372103273868561, + "learning_rate": 0.0015, + "loss": 1.8254, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.48468029499053955, + "learning_rate": 0.0015, + "loss": 1.8223, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.4034891724586487, + "learning_rate": 0.0015, + "loss": 1.8334, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.4861879348754883, + "learning_rate": 0.0015, + "loss": 1.8166, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.3496376872062683, + "learning_rate": 0.0015, + "loss": 1.8208, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.3765081763267517, + "learning_rate": 0.0015, + "loss": 1.8206, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.46853533387184143, + "learning_rate": 0.0015, + "loss": 1.8404, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.3622114658355713, + "learning_rate": 0.0015, + "loss": 1.8199, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.40379026532173157, + "learning_rate": 0.0015, + "loss": 1.8145, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.3473694324493408, + "learning_rate": 0.0015, + "loss": 1.8296, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.3614269196987152, + "learning_rate": 0.0015, + "loss": 1.8146, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.37687039375305176, + "learning_rate": 0.0015, + "loss": 1.8068, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.3430388271808624, + "learning_rate": 0.0015, + "loss": 1.8276, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.3632057011127472, + "learning_rate": 0.0015, + "loss": 1.819, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.32978373765945435, + "learning_rate": 0.0015, + "loss": 1.8256, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.3529827296733856, + "learning_rate": 0.0015, + "loss": 1.8186, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.37453562021255493, + "learning_rate": 0.0015, + "loss": 1.8082, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.4753303527832031, + "learning_rate": 0.0015, + "loss": 1.8193, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.3357962369918823, + "learning_rate": 0.0015, + "loss": 1.8147, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.34558114409446716, + "learning_rate": 0.0015, + "loss": 1.823, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.2897375822067261, + "learning_rate": 0.0015, + "loss": 1.8025, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.4054597318172455, + "learning_rate": 0.0015, + "loss": 1.8128, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.36653947830200195, + "learning_rate": 0.0015, + "loss": 1.8072, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.3881050944328308, + "learning_rate": 0.0015, + "loss": 1.8086, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.35539186000823975, + "learning_rate": 0.0015, + "loss": 1.822, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.3445149064064026, + "learning_rate": 0.0015, + "loss": 1.8106, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.3262072801589966, + "learning_rate": 0.0015, + "loss": 1.8036, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.3699089288711548, + "learning_rate": 0.0015, + "loss": 1.7894, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.3256937861442566, + "learning_rate": 0.0015, + "loss": 1.8152, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.4292232096195221, + "learning_rate": 0.0015, + "loss": 1.8022, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.3788437247276306, + "learning_rate": 0.0015, + "loss": 1.8171, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.3824426829814911, + "learning_rate": 0.0015, + "loss": 1.8043, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.43619370460510254, + "learning_rate": 0.0015, + "loss": 1.8192, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.4108010232448578, + "learning_rate": 0.0015, + "loss": 1.8107, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.36239510774612427, + "learning_rate": 0.0015, + "loss": 1.7934, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.40039804577827454, + "learning_rate": 0.0015, + "loss": 1.8024, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.39933285117149353, + "learning_rate": 0.0015, + "loss": 1.8031, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.3655042350292206, + "learning_rate": 0.0015, + "loss": 1.7959, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.4802871644496918, + "learning_rate": 0.0015, + "loss": 1.8141, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.3769666850566864, + "learning_rate": 0.0015, + "loss": 1.8118, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.3966495096683502, + "learning_rate": 0.0015, + "loss": 1.7982, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.35799336433410645, + "learning_rate": 0.0015, + "loss": 1.8021, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.3347603380680084, + "learning_rate": 0.0015, + "loss": 1.8138, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.4724527597427368, + "learning_rate": 0.0015, + "loss": 1.7948, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.422674298286438, + "learning_rate": 0.0015, + "loss": 1.8081, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.36212271451950073, + "learning_rate": 0.0015, + "loss": 1.7883, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.37394046783447266, + "learning_rate": 0.0015, + "loss": 1.803, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.391477108001709, + "learning_rate": 0.0015, + "loss": 1.794, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.33054593205451965, + "learning_rate": 0.0015, + "loss": 1.8004, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.5764470100402832, + "learning_rate": 0.0015, + "loss": 1.7946, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.32538744807243347, + "learning_rate": 0.0015, + "loss": 1.7988, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.4617023468017578, + "learning_rate": 0.0015, + "loss": 1.8032, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.5663644075393677, + "learning_rate": 0.0015, + "loss": 1.7951, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.34853243827819824, + "learning_rate": 0.0015, + "loss": 1.7936, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.3220527470111847, + "learning_rate": 0.0015, + "loss": 1.7991, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.48532089591026306, + "learning_rate": 0.0015, + "loss": 1.7878, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.40416234731674194, + "learning_rate": 0.0015, + "loss": 1.7866, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.3604409098625183, + "learning_rate": 0.0015, + "loss": 1.7975, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.3480297327041626, + "learning_rate": 0.0015, + "loss": 1.7922, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.36295345425605774, + "learning_rate": 0.0015, + "loss": 1.7846, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.43485522270202637, + "learning_rate": 0.0015, + "loss": 1.7904, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.3537534773349762, + "learning_rate": 0.0015, + "loss": 1.7939, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.43174123764038086, + "learning_rate": 0.0015, + "loss": 1.7833, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.5191883444786072, + "learning_rate": 0.0015, + "loss": 1.7866, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.3696303367614746, + "learning_rate": 0.0015, + "loss": 1.7859, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.3290150761604309, + "learning_rate": 0.0015, + "loss": 1.7854, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.4146673083305359, + "learning_rate": 0.0015, + "loss": 1.783, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.3947039544582367, + "learning_rate": 0.0015, + "loss": 1.7813, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.3319171071052551, + "learning_rate": 0.0015, + "loss": 1.781, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.33405378460884094, + "learning_rate": 0.0015, + "loss": 1.7876, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.32065099477767944, + "learning_rate": 0.0015, + "loss": 1.7869, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.3278934359550476, + "learning_rate": 0.0015, + "loss": 1.7825, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.4991150200366974, + "learning_rate": 0.0015, + "loss": 1.7875, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.3259681463241577, + "learning_rate": 0.0015, + "loss": 1.7879, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.39956098794937134, + "learning_rate": 0.0015, + "loss": 1.7816, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.40146827697753906, + "learning_rate": 0.0015, + "loss": 1.7878, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.3431818187236786, + "learning_rate": 0.0015, + "loss": 1.7805, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.4219339191913605, + "learning_rate": 0.0015, + "loss": 1.7761, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.3600873649120331, + "learning_rate": 0.0015, + "loss": 1.7715, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.29148736596107483, + "learning_rate": 0.0015, + "loss": 1.789, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.3306005001068115, + "learning_rate": 0.0015, + "loss": 1.7917, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.3504825532436371, + "learning_rate": 0.0015, + "loss": 1.777, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.3031824231147766, + "learning_rate": 0.0015, + "loss": 1.7768, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.4476396143436432, + "learning_rate": 0.0015, + "loss": 1.7805, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.4386860728263855, + "learning_rate": 0.0015, + "loss": 1.7751, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.3248020112514496, + "learning_rate": 0.0015, + "loss": 1.7773, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.35110512375831604, + "learning_rate": 0.0015, + "loss": 1.7883, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.5061001777648926, + "learning_rate": 0.0015, + "loss": 1.7785, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.39553189277648926, + "learning_rate": 0.0015, + "loss": 1.7914, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.2968481183052063, + "learning_rate": 0.0015, + "loss": 1.7868, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.3996788263320923, + "learning_rate": 0.0015, + "loss": 1.7782, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.3071383535861969, + "learning_rate": 0.0015, + "loss": 1.7863, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.4471006393432617, + "learning_rate": 0.0015, + "loss": 1.784, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.33473655581474304, + "learning_rate": 0.0015, + "loss": 1.7667, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.36823636293411255, + "learning_rate": 0.0015, + "loss": 1.7688, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.39860105514526367, + "learning_rate": 0.0015, + "loss": 1.7709, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.3454592227935791, + "learning_rate": 0.0015, + "loss": 1.7741, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.37099096179008484, + "learning_rate": 0.0015, + "loss": 1.7725, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.3275086581707001, + "learning_rate": 0.0015, + "loss": 1.7738, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.4156765639781952, + "learning_rate": 0.0015, + "loss": 1.7455, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.3556298613548279, + "learning_rate": 0.0015, + "loss": 1.7578, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.3785768747329712, + "learning_rate": 0.0015, + "loss": 1.76, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.3101571202278137, + "learning_rate": 0.0015, + "loss": 1.766, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.4646480977535248, + "learning_rate": 0.0015, + "loss": 1.7609, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.39919760823249817, + "learning_rate": 0.0015, + "loss": 1.7695, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.37033745646476746, + "learning_rate": 0.0015, + "loss": 1.7716, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.3841451406478882, + "learning_rate": 0.0015, + "loss": 1.7837, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.3592260777950287, + "learning_rate": 0.0015, + "loss": 1.7632, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.3078693747520447, + "learning_rate": 0.0015, + "loss": 1.7792, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.313323974609375, + "learning_rate": 0.0015, + "loss": 1.7834, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.3674357235431671, + "learning_rate": 0.0015, + "loss": 1.7706, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.43470507860183716, + "learning_rate": 0.0015, + "loss": 1.7627, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.32323190569877625, + "learning_rate": 0.0015, + "loss": 1.772, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.4054836928844452, + "learning_rate": 0.0015, + "loss": 1.7626, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.3744523525238037, + "learning_rate": 0.0015, + "loss": 1.7659, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.4974427819252014, + "learning_rate": 0.0015, + "loss": 1.7611, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.3532814383506775, + "learning_rate": 0.0015, + "loss": 1.7569, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.4499043822288513, + "learning_rate": 0.0015, + "loss": 1.7626, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.34281599521636963, + "learning_rate": 0.0015, + "loss": 1.7682, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.37006106972694397, + "learning_rate": 0.0015, + "loss": 1.7558, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.37030336260795593, + "learning_rate": 0.0015, + "loss": 1.7623, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.381357342004776, + "learning_rate": 0.0015, + "loss": 1.7642, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.3766602575778961, + "learning_rate": 0.0015, + "loss": 1.7486, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.32820066809654236, + "learning_rate": 0.0015, + "loss": 1.7666, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.3488951623439789, + "learning_rate": 0.0015, + "loss": 1.7731, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.3223486542701721, + "learning_rate": 0.0015, + "loss": 1.7626, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.3566620349884033, + "learning_rate": 0.0015, + "loss": 1.7668, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.33811134099960327, + "learning_rate": 0.0015, + "loss": 1.7483, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.44937440752983093, + "learning_rate": 0.0015, + "loss": 1.765, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.36752060055732727, + "learning_rate": 0.0015, + "loss": 1.7468, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.31580060720443726, + "learning_rate": 0.0015, + "loss": 1.7644, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.4360526502132416, + "learning_rate": 0.0015, + "loss": 1.7551, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.32985156774520874, + "learning_rate": 0.0015, + "loss": 1.7556, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.32723644375801086, + "learning_rate": 0.0015, + "loss": 1.7575, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.49019789695739746, + "learning_rate": 0.0015, + "loss": 1.7477, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.3593115508556366, + "learning_rate": 0.0015, + "loss": 1.7604, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.3175909221172333, + "learning_rate": 0.0015, + "loss": 1.7563, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.384500116109848, + "learning_rate": 0.0015, + "loss": 1.7514, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.424191951751709, + "learning_rate": 0.0015, + "loss": 1.7558, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.396973192691803, + "learning_rate": 0.0015, + "loss": 1.7589, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.32909175753593445, + "learning_rate": 0.0015, + "loss": 1.7585, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.35591256618499756, + "learning_rate": 0.0014834368975312174, + "loss": 1.7344, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.5259692072868347, + "learning_rate": 0.0014629899726345957, + "loss": 1.7568, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.3114689588546753, + "learning_rate": 0.0014428248775471316, + "loss": 1.7578, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.5166715383529663, + "learning_rate": 0.00142293772767289, + "loss": 1.75, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.39521825313568115, + "learning_rate": 0.001403324691959192, + "loss": 1.7453, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.35881662368774414, + "learning_rate": 0.0013839819921586025, + "loss": 1.7513, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.30511415004730225, + "learning_rate": 0.0013649059021010894, + "loss": 1.741, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.4092840254306793, + "learning_rate": 0.0013460927469762154, + "loss": 1.7434, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.35713300108909607, + "learning_rate": 0.0013275389026252255, + "loss": 1.7531, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.3668721318244934, + "learning_rate": 0.0013092407948428887, + "loss": 1.7419, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.33162111043930054, + "learning_rate": 0.001291194898688966, + "loss": 1.7525, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.4238397181034088, + "learning_rate": 0.001273397737809166, + "loss": 1.7486, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.3315000534057617, + "learning_rate": 0.001255845883765463, + "loss": 1.7426, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.40429437160491943, + "learning_rate": 0.001238535955375642, + "loss": 1.736, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.3680913746356964, + "learning_rate": 0.0012214646180619506, + "loss": 1.7336, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.3675517737865448, + "learning_rate": 0.001204628583208727, + "loss": 1.7308, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.3261108100414276, + "learning_rate": 0.0011880246075288827, + "loss": 1.7354, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.4246390759944916, + "learning_rate": 0.001171649492439115, + "loss": 1.7295, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.316724956035614, + "learning_rate": 0.0011555000834437364, + "loss": 1.7349, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.3159819543361664, + "learning_rate": 0.0011395732695269908, + "loss": 1.7302, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.31476807594299316, + "learning_rate": 0.0011238659825537505, + "loss": 1.7162, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.31384891271591187, + "learning_rate": 0.0011083751966784717, + "loss": 1.7187, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.36711472272872925, + "learning_rate": 0.0010930979277622953, + "loss": 1.7335, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.4441031813621521, + "learning_rate": 0.0010780312327981854, + "loss": 1.7301, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.3344942033290863, + "learning_rate": 0.0010631722093439888, + "loss": 1.7261, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.3050030767917633, + "learning_rate": 0.00104851799496331, + "loss": 1.7166, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.3217354714870453, + "learning_rate": 0.0010340657666740914, + "loss": 1.7236, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.3525526523590088, + "learning_rate": 0.0010198127404047975, + "loss": 1.7084, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.39753952622413635, + "learning_rate": 0.0010057561704580897, + "loss": 1.7103, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.32323288917541504, + "learning_rate": 0.0009918933489818985, + "loss": 1.7278, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.31371453404426575, + "learning_rate": 0.0009782216054477827, + "loss": 1.717, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.3848041892051697, + "learning_rate": 0.0009647383061364801, + "loss": 1.7239, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.3419899344444275, + "learning_rate": 0.0009514408536305495, + "loss": 1.7175, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.3177018165588379, + "learning_rate": 0.0009383266863140042, + "loss": 1.7338, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.34837961196899414, + "learning_rate": 0.000925393277878844, + "loss": 1.7277, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.30670979619026184, + "learning_rate": 0.0009126381368383879, + "loss": 1.7142, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.3753418028354645, + "learning_rate": 0.0009000588060473156, + "loss": 1.7094, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.32511377334594727, + "learning_rate": 0.0008876528622283235, + "loss": 1.7186, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.29808443784713745, + "learning_rate": 0.0008754179155053053, + "loss": 1.709, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.5224196314811707, + "learning_rate": 0.0008633516089429683, + "loss": 1.7118, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.3130415380001068, + "learning_rate": 0.0008514516180927928, + "loss": 1.7066, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.3112214505672455, + "learning_rate": 0.0008397156505452524, + "loss": 1.7042, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.34768146276474, + "learning_rate": 0.0008281414454882051, + "loss": 1.7114, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.2983977496623993, + "learning_rate": 0.0008167267732713704, + "loss": 1.7138, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.30299532413482666, + "learning_rate": 0.0008054694349768117, + "loss": 1.7011, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.37259745597839355, + "learning_rate": 0.0007943672619953359, + "loss": 1.7063, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.37842774391174316, + "learning_rate": 0.0007834181156087356, + "loss": 1.7004, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.3643161356449127, + "learning_rate": 0.0007726198865777852, + "loss": 1.7062, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.29314523935317993, + "learning_rate": 0.0007619704947359191, + "loss": 1.6992, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.3162665069103241, + "learning_rate": 0.0007514678885885087, + "loss": 1.6979, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.289482057094574, + "learning_rate": 0.0007411100449176633, + "loss": 1.6977, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.29521444439888, + "learning_rate": 0.0007308949683924791, + "loss": 1.7021, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.3438432514667511, + "learning_rate": 0.000720820691184658, + "loss": 1.6949, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.3035603165626526, + "learning_rate": 0.0007108852725894269, + "loss": 1.6961, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.2915734648704529, + "learning_rate": 0.000701086798651681, + "loss": 1.6963, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.39023539423942566, + "learning_rate": 0.0006914233817972798, + "loss": 1.687, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.30199670791625977, + "learning_rate": 0.0006818931604694261, + "loss": 1.6969, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.3577688932418823, + "learning_rate": 0.0006724942987700563, + "loss": 1.7013, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.3057284355163574, + "learning_rate": 0.0006632249861061732, + "loss": 1.7006, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.3916916251182556, + "learning_rate": 0.0006540834368410549, + "loss": 1.6944, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.3180960416793823, + "learning_rate": 0.0006450678899502701, + "loss": 1.6967, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.2974584698677063, + "learning_rate": 0.0006361766086824345, + "loss": 1.6934, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.3089200258255005, + "learning_rate": 0.000627407880224645, + "loss": 1.6984, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.30943137407302856, + "learning_rate": 0.0006187600153725225, + "loss": 1.6857, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.39003410935401917, + "learning_rate": 0.0006102313482048055, + "loss": 1.6891, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.32572123408317566, + "learning_rate": 0.0006018202357624274, + "loss": 1.6908, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.4509422183036804, + "learning_rate": 0.0005935250577320168, + "loss": 1.6865, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.35671916604042053, + "learning_rate": 0.0005853442161337618, + "loss": 1.6787, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.2916567623615265, + "learning_rate": 0.0005772761350135759, + "loss": 1.6856, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.3454425036907196, + "learning_rate": 0.0005693192601395058, + "loss": 1.6833, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.33079248666763306, + "learning_rate": 0.000561472058702326, + "loss": 1.6813, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.3648410737514496, + "learning_rate": 0.000553733019020258, + "loss": 1.6863, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.321341335773468, + "learning_rate": 0.0005461006502477612, + "loss": 1.6749, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.3055427074432373, + "learning_rate": 0.0005385734820883369, + "loss": 1.6789, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.29323190450668335, + "learning_rate": 0.0005311500645112907, + "loss": 1.7028, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.32127928733825684, + "learning_rate": 0.0005238289674723993, + "loss": 1.683, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.350521981716156, + "learning_rate": 0.0005166087806384274, + "loss": 1.6902, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.3235044777393341, + "learning_rate": 0.0005094881131154418, + "loss": 1.6901, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.32942715287208557, + "learning_rate": 0.0005024655931808696, + "loss": 1.6889, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.3716394603252411, + "learning_rate": 0.0004955398680192508, + "loss": 1.6761, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.327116459608078, + "learning_rate": 0.000488709603461632, + "loss": 1.6714, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.29985353350639343, + "learning_rate": 0.000481973483728553, + "loss": 1.6762, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.31413426995277405, + "learning_rate": 0.0004753302111765748, + "loss": 1.6767, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.32647716999053955, + "learning_rate": 0.0004687785060483032, + "loss": 1.6845, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.3025463819503784, + "learning_rate": 0.0004623171062258558, + "loss": 1.6569, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.38400498032569885, + "learning_rate": 0.0004559447669877288, + "loss": 1.6726, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.34344202280044556, + "learning_rate": 0.00044966026076901413, + "loss": 1.6774, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.39909932017326355, + "learning_rate": 0.00044346237692492177, + "loss": 1.6812, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.30984315276145935, + "learning_rate": 0.0004373499214975615, + "loss": 1.6701, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.3168264329433441, + "learning_rate": 0.0004313217169859396, + "loss": 1.674, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.2879488170146942, + "learning_rate": 0.0004253766021191256, + "loss": 1.6811, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.29310518503189087, + "learning_rate": 0.00041951343163254497, + "loss": 1.6773, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.3639557957649231, + "learning_rate": 0.00041373107604735626, + "loss": 1.6759, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.29705002903938293, + "learning_rate": 0.0004080284214528687, + "loss": 1.6738, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.32136496901512146, + "learning_rate": 0.0004024043692919589, + "loss": 1.6807, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.34039151668548584, + "learning_rate": 0.0003968578361494449, + "loss": 1.6791, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.3102995455265045, + "learning_rate": 0.000391387753543378, + "loss": 1.6843, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.29857951402664185, + "learning_rate": 0.00038599306771921023, + "loss": 1.6672, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.32703617215156555, + "learning_rate": 0.0003806727394468004, + "loss": 1.664, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.3137376010417938, + "learning_rate": 0.0003754257438202162, + "loss": 1.6733, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.4127141535282135, + "learning_rate": 0.0003702510700602974, + "loss": 1.6804, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.34177953004837036, + "learning_rate": 0.0003651477213199393, + "loss": 1.6594, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.3109987676143646, + "learning_rate": 0.000360114714492061, + "loss": 1.661, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.3200550675392151, + "learning_rate": 0.0003551510800202195, + "loss": 1.6686, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.31464260816574097, + "learning_rate": 0.0003502558617118353, + "loss": 1.6702, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.30083632469177246, + "learning_rate": 0.0003454281165539914, + "loss": 1.6816, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.3156372606754303, + "learning_rate": 0.00034066691453177176, + "loss": 1.6758, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.31183063983917236, + "learning_rate": 0.0003359713384491037, + "loss": 1.6744, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.33335936069488525, + "learning_rate": 0.00033134048375206944, + "loss": 1.6705, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.2791520655155182, + "learning_rate": 0.0003267734583546536, + "loss": 1.6662, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.291107177734375, + "learning_rate": 0.00032226938246689157, + "loss": 1.662, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.28034839034080505, + "learning_rate": 0.0003178273884253874, + "loss": 1.6669, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.29026344418525696, + "learning_rate": 0.0003134466205261674, + "loss": 1.6756, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.3160174489021301, + "learning_rate": 0.0003091262348598378, + "loss": 1.6813, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.31982719898223877, + "learning_rate": 0.0003048653991490141, + "loss": 1.6621, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.3359350264072418, + "learning_rate": 0.00030066329258799187, + "loss": 1.6584, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.315728098154068, + "learning_rate": 0.0002965191056846266, + "loss": 1.6646, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.3030148148536682, + "learning_rate": 0.000292432040104394, + "loss": 1.6624, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.3142611086368561, + "learning_rate": 0.00028840130851659853, + "loss": 1.6557, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.2870323061943054, + "learning_rate": 0.0002844261344427028, + "loss": 1.6655, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.28609779477119446, + "learning_rate": 0.0002805057521067471, + "loss": 1.6652, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.28156542778015137, + "learning_rate": 0.00027663940628783017, + "loss": 1.656, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.2975054085254669, + "learning_rate": 0.00027282635217462393, + "loss": 1.6617, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.313997358083725, + "learning_rate": 0.0002690658552218937, + "loss": 1.6709, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.29864105582237244, + "learning_rate": 0.00026535719100899516, + "loss": 1.6514, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.294587105512619, + "learning_rate": 0.00026169964510032245, + "loss": 1.6578, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.2903492748737335, + "learning_rate": 0.00025809251290767984, + "loss": 1.6442, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.2977008819580078, + "learning_rate": 0.00025453509955454957, + "loss": 1.6489, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.2864299714565277, + "learning_rate": 0.00025102671974223175, + "loss": 1.6509, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.2841869592666626, + "learning_rate": 0.00024756669761782815, + "loss": 1.6621, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.3404243290424347, + "learning_rate": 0.0002441543666440464, + "loss": 1.6505, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.3151744604110718, + "learning_rate": 0.00024078906947079878, + "loss": 1.6567, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.2896600365638733, + "learning_rate": 0.00023747015780857005, + "loss": 1.6646, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.2963440716266632, + "learning_rate": 0.00023419699230353144, + "loss": 1.6582, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.3287936747074127, + "learning_rate": 0.00023096894241437586, + "loss": 1.6669, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.29270681738853455, + "learning_rate": 0.00022778538629085056, + "loss": 1.6494, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.28976690769195557, + "learning_rate": 0.00022464571065396427, + "loss": 1.6516, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.30210551619529724, + "learning_rate": 0.00022154931067784521, + "loss": 1.6495, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.2862395942211151, + "learning_rate": 0.00021849558987322782, + "loss": 1.6462, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.3069690465927124, + "learning_rate": 0.0002154839599725452, + "loss": 1.6478, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.3010164201259613, + "learning_rate": 0.00021251384081660544, + "loss": 1.6609, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.286625474691391, + "learning_rate": 0.0002095846602428303, + "loss": 1.6562, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.29061058163642883, + "learning_rate": 0.00020669585397503358, + "loss": 1.6505, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.30739066004753113, + "learning_rate": 0.0002038468655147195, + "loss": 1.651, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.29613780975341797, + "learning_rate": 0.00020103714603387894, + "loss": 1.6672, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.3332805037498474, + "learning_rate": 0.00019826615426926338, + "loss": 1.6368, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.2942625880241394, + "learning_rate": 0.00019553335641811625, + "loss": 1.659, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.30071958899497986, + "learning_rate": 0.0001928382260353415, + "loss": 1.6528, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.27827274799346924, + "learning_rate": 0.00019018024393208902, + "loss": 1.6662, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.2844911813735962, + "learning_rate": 0.00018755889807573872, + "loss": 1.6455, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.2892758548259735, + "learning_rate": 0.00018497368349126262, + "loss": 1.6504, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.33394187688827515, + "learning_rate": 0.00018242410216394648, + "loss": 1.6664, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.3627508580684662, + "learning_rate": 0.0001799096629434529, + "loss": 1.6416, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.3369033634662628, + "learning_rate": 0.00017742988144920578, + "loss": 1.6495, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.3026972711086273, + "learning_rate": 0.00017498427997707976, + "loss": 1.6489, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.2892075479030609, + "learning_rate": 0.00017257238740737548, + "loss": 1.6511, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.31418395042419434, + "learning_rate": 0.00017019373911406307, + "loss": 1.6581, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.2813819348812103, + "learning_rate": 0.000167847876875277, + "loss": 1.6565, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.35128098726272583, + "learning_rate": 0.00016553434878504428, + "loss": 1.6409, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.40352514386177063, + "learning_rate": 0.00016325270916622947, + "loss": 1.6425, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.3125883638858795, + "learning_rate": 0.00016100251848467966, + "loss": 1.6529, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.3259755074977875, + "learning_rate": 0.0001587833432645528, + "loss": 1.6431, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.32143619656562805, + "learning_rate": 0.00015659475600481292, + "loss": 1.6624, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.2872926890850067, + "learning_rate": 0.00015443633509687688, + "loss": 1.6482, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.334501713514328, + "learning_rate": 0.00015230766474339536, + "loss": 1.6481, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.2882138192653656, + "learning_rate": 0.00015020833487815416, + "loss": 1.6549, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.30133500695228577, + "learning_rate": 0.0001481379410870792, + "loss": 1.6466, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.3030688762664795, + "learning_rate": 0.00014609608453033013, + "loss": 1.6337, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.28669485449790955, + "learning_rate": 0.00014408237186546807, + "loss": 1.6418, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.2894490957260132, + "learning_rate": 0.00014209641517168273, + "loss": 1.6302, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.30199211835861206, + "learning_rate": 0.00014013783187506265, + "loss": 1.6441, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.3222101032733917, + "learning_rate": 0.00013820624467489697, + "loss": 1.6625, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.2813795804977417, + "learning_rate": 0.00013630128147099213, + "loss": 1.6602, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.2913859784603119, + "learning_rate": 0.00013442257529199068, + "loss": 1.641, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.30320343375205994, + "learning_rate": 0.00013256976422467803, + "loss": 1.6484, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.3305092751979828, + "learning_rate": 0.00013074249134426366, + "loss": 1.6472, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.28237384557724, + "learning_rate": 0.0001289404046456233, + "loss": 1.6571, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.29093295335769653, + "learning_rate": 0.0001271631569754887, + "loss": 1.6491, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.2733587324619293, + "learning_rate": 0.0001254104059655723, + "loss": 1.6536, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.28348734974861145, + "learning_rate": 0.00012368181396661337, + "loss": 1.6359, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.3097951412200928, + "learning_rate": 0.00012197704798333364, + "loss": 1.6398, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.29911431670188904, + "learning_rate": 0.00012029577961028894, + "loss": 1.6375, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.3036710321903229, + "learning_rate": 0.00011863768496860542, + "loss": 1.6509, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.28275567293167114, + "learning_rate": 0.00011700244464358777, + "loss": 1.6423, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.28171199560165405, + "learning_rate": 0.00011538974362318715, + "loss": 1.6516, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.2995152771472931, + "learning_rate": 0.00011379927123731737, + "loss": 1.6406, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.28432291746139526, + "learning_rate": 0.0001122307210980077, + "loss": 1.6411, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.2907130718231201, + "learning_rate": 0.00011068379104038026, + "loss": 1.6548, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.3092287480831146, + "learning_rate": 0.00010915818306444116, + "loss": 1.6366, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.3048560619354248, + "learning_rate": 0.00010765360327767384, + "loss": 1.6422, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.2986976206302643, + "learning_rate": 0.00010616976183842376, + "loss": 1.6479, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.3488602936267853, + "learning_rate": 0.00010470637290006365, + "loss": 1.652, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.3072564899921417, + "learning_rate": 0.00010326315455592764, + "loss": 1.6414, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.3198103904724121, + "learning_rate": 0.0001018398287850053, + "loss": 1.6323, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.3057430386543274, + "learning_rate": 0.00010043612139838357, + "loss": 1.653, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.2905876338481903, + "learning_rate": 9.905176198642719e-05, + "loss": 1.6391, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.31224319338798523, + "learning_rate": 9.76864838666871e-05, + "loss": 1.642, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.2979212701320648, + "learning_rate": 9.634002403252676e-05, + "loss": 1.6387, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.2785697281360626, + "learning_rate": 9.501212310245681e-05, + "loss": 1.636, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.30655381083488464, + "learning_rate": 9.370252527016777e-05, + "loss": 1.6483, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.31941476464271545, + "learning_rate": 9.241097825525163e-05, + "loss": 1.6379, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.2967509925365448, + "learning_rate": 9.113723325460276e-05, + "loss": 1.6462, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.31797289848327637, + "learning_rate": 8.988104489448849e-05, + "loss": 1.6416, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.3229648470878601, + "learning_rate": 8.864217118328042e-05, + "loss": 1.6499, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.28441721200942993, + "learning_rate": 8.742037346483729e-05, + "loss": 1.6445, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.32493698596954346, + "learning_rate": 8.62154163725303e-05, + "loss": 1.6502, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.29117193818092346, + "learning_rate": 8.502706778390219e-05, + "loss": 1.6466, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.319476842880249, + "learning_rate": 8.38550987759513e-05, + "loss": 1.6434, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.33274680376052856, + "learning_rate": 8.269928358103191e-05, + "loss": 1.6572, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.30592653155326843, + "learning_rate": 8.155939954336243e-05, + "loss": 1.6482, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.28294315934181213, + "learning_rate": 8.043522707613312e-05, + "loss": 1.6424, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.27652573585510254, + "learning_rate": 7.932654961920486e-05, + "loss": 1.6286, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.30717819929122925, + "learning_rate": 7.823315359739135e-05, + "loss": 1.6298, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.28261104226112366, + "learning_rate": 7.715482837931577e-05, + "loss": 1.6554, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.32058534026145935, + "learning_rate": 7.6091366236835e-05, + "loss": 1.6332, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.28376415371894836, + "learning_rate": 7.504256230502289e-05, + "loss": 1.6485, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.26899126172065735, + "learning_rate": 7.400821454270524e-05, + "loss": 1.6464, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.2815333306789398, + "learning_rate": 7.29881236935386e-05, + "loss": 1.6327, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.29519298672676086, + "learning_rate": 7.198209324762562e-05, + "loss": 1.6352, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.2897711992263794, + "learning_rate": 7.098992940365946e-05, + "loss": 1.6325, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.27384185791015625, + "learning_rate": 7.001144103159e-05, + "loss": 1.6415, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.2890484035015106, + "learning_rate": 6.904643963580461e-05, + "loss": 1.6485, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.3331020176410675, + "learning_rate": 6.809473931881644e-05, + "loss": 1.6375, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.3167933523654938, + "learning_rate": 6.71561567454532e-05, + "loss": 1.6406, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.3101309835910797, + "learning_rate": 6.623051110753948e-05, + "loss": 1.6576, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.282186895608902, + "learning_rate": 6.531762408906607e-05, + "loss": 1.6451, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.27861738204956055, + "learning_rate": 6.441731983183912e-05, + "loss": 1.6379, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.31566575169563293, + "learning_rate": 6.352942490160292e-05, + "loss": 1.6353, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.28727439045906067, + "learning_rate": 6.265376825462966e-05, + "loss": 1.6467, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.2992555499076843, + "learning_rate": 6.179018120476945e-05, + "loss": 1.6408, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.28516122698783875, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.6403, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.2922123968601227, + "learning_rate": 6.009855274515339e-05, + "loss": 1.6322, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.3001478910446167, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.6425, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.29792818427085876, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.6293, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.2816435396671295, + "learning_rate": 5.764754687033678e-05, + "loss": 1.6408, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.30157747864723206, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.6388, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.27947813272476196, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.6516, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.28350022435188293, + "learning_rate": 5.529650063720842e-05, + "loss": 1.649, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.2897917628288269, + "learning_rate": 5.453432234876445e-05, + "loss": 1.6379, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.2812824249267578, + "learning_rate": 5.37826495305886e-05, + "loss": 1.6284, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.31812164187431335, + "learning_rate": 5.304133738072674e-05, + "loss": 1.647, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.26702648401260376, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.6428, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.3153185546398163, + "learning_rate": 5.158922582999368e-05, + "loss": 1.6443, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.282937616109848, + "learning_rate": 5.087814669492819e-05, + "loss": 1.6386, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.31850728392601013, + "learning_rate": 5.017686870590028e-05, + "loss": 1.6368, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.27971863746643066, + "learning_rate": 4.948525676899577e-05, + "loss": 1.634, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.2911681830883026, + "learning_rate": 4.880317765236493e-05, + "loss": 1.6432, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.2851537764072418, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.6238, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.29987117648124695, + "learning_rate": 4.746709410920699e-05, + "loss": 1.6327, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.3069855272769928, + "learning_rate": 4.681283230007507e-05, + "loss": 1.6331, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.2873881161212921, + "learning_rate": 4.616758849642509e-05, + "loss": 1.6395, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.27439984679222107, + "learning_rate": 4.553123839874615e-05, + "loss": 1.6483, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.2946086823940277, + "learning_rate": 4.490365942080736e-05, + "loss": 1.6417, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.29139867424964905, + "learning_rate": 4.428473066604285e-05, + "loss": 1.6411, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.2842201590538025, + "learning_rate": 4.367433290426233e-05, + "loss": 1.639, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.2875899374485016, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.6334, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.29709094762802124, + "learning_rate": 4.247866163327575e-05, + "loss": 1.6456, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.29496219754219055, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.6311, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.28014180064201355, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.6318, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.3066428601741791, + "learning_rate": 4.074624971216005e-05, + "loss": 1.6208, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.28945207595825195, + "learning_rate": 4.018462453681889e-05, + "loss": 1.6345, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.2926965057849884, + "learning_rate": 3.963074051164014e-05, + "loss": 1.6389, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.2945704460144043, + "learning_rate": 3.908449093662446e-05, + "loss": 1.6349, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.2938326895236969, + "learning_rate": 3.854577058246998e-05, + "loss": 1.6323, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.28471556305885315, + "learning_rate": 3.801447567030094e-05, + "loss": 1.6477, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.2794736921787262, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.6367, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.28662699460983276, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.6505, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.2798454761505127, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.6358, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.30066630244255066, + "learning_rate": 3.596152451701616e-05, + "loss": 1.6345, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.29603078961372375, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.6346, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.29392972588539124, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.6395, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.286011666059494, + "learning_rate": 3.449490171442838e-05, + "loss": 1.6419, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.2774491310119629, + "learning_rate": 3.401944187798702e-05, + "loss": 1.6415, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.2881662845611572, + "learning_rate": 3.355053553336137e-05, + "loss": 1.6307, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.31947827339172363, + "learning_rate": 3.308809235061882e-05, + "loss": 1.6322, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.302874356508255, + "learning_rate": 3.263202324488772e-05, + "loss": 1.637, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.2747725546360016, + "learning_rate": 3.218224035919609e-05, + "loss": 1.6358, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.28067922592163086, + "learning_rate": 3.173865704754688e-05, + "loss": 1.6351, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.27929338812828064, + "learning_rate": 3.130118785822657e-05, + "loss": 1.6372, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.3006559908390045, + "learning_rate": 3.08697485173437e-05, + "loss": 1.6431, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.2935165464878082, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.6393, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.3300228416919708, + "learning_rate": 3.002462807725185e-05, + "loss": 1.6351, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.28363847732543945, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.6407, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.28174105286598206, + "learning_rate": 2.920264448124087e-05, + "loss": 1.634, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.3101724088191986, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.6322, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.30244192481040955, + "learning_rate": 2.84031643124288e-05, + "loss": 1.6343, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.2945556640625, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.6576, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.29157695174217224, + "learning_rate": 2.762557149497405e-05, + "loss": 1.6278, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.2839052081108093, + "learning_rate": 2.724479494389592e-05, + "loss": 1.6388, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.28519225120544434, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.6412, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.30681735277175903, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.6399, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.27808111906051636, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.6389, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.29020893573760986, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.6443, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.26861000061035156, + "learning_rate": 2.541820662891541e-05, + "loss": 1.62, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.28837451338768005, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.6338, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.3207913041114807, + "learning_rate": 2.472233293365335e-05, + "loss": 1.6394, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.2857476472854614, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.6289, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.30220910906791687, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.6498, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.283668577671051, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.6349, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.28030630946159363, + "learning_rate": 2.338721674401494e-05, + "loss": 1.6387, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.3106326758861542, + "learning_rate": 2.30648594768459e-05, + "loss": 1.6387, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.28013819456100464, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.6359, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.3039085268974304, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.6322, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.28467583656311035, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.6338, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.30532944202423096, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.637, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.2894130051136017, + "learning_rate": 2.151850895764285e-05, + "loss": 1.6342, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.2728683352470398, + "learning_rate": 2.12219089895037e-05, + "loss": 1.625, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.2716827988624573, + "learning_rate": 2.092939720151092e-05, + "loss": 1.618, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.2797609865665436, + "learning_rate": 2.064091724430947e-05, + "loss": 1.6327, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.28480952978134155, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.6281, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.2917044758796692, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.6419, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.30042940378189087, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.6293, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.27563101053237915, + "learning_rate": 1.952621569669175e-05, + "loss": 1.6355, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.29096660017967224, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.6336, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.29261574149131775, + "learning_rate": 1.899164691024229e-05, + "loss": 1.6336, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.29520437121391296, + "learning_rate": 1.872987589815331e-05, + "loss": 1.6292, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.3058977723121643, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.6416, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.28372088074684143, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.6388, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.2728956341743469, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.6498, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.29871150851249695, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.6444, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.2838023602962494, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.6413, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.2850436568260193, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.6275, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.28575748205184937, + "learning_rate": 1.699576850233916e-05, + "loss": 1.6324, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.2765159010887146, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.6433, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.2996973395347595, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.6162, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.28517743945121765, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.6353, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.27998846769332886, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.6418, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.2945730984210968, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.6342, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.2894095778465271, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.6449, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.2997286021709442, + "learning_rate": 1.542221360973268e-05, + "loss": 1.6283, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.2845168709754944, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.6413, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 0.8419556021690369, + "learning_rate": 1.5e-05, + "loss": 1.6288, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.8317122291574784e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-cohere-bf16/checkpoint-9480/training_args.bin b/saves-cohere-bf16/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc8d241f0bd32918fca1e63ecd2bad99983c17e4 --- /dev/null +++ b/saves-cohere-bf16/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29aa654b41a8f8eb43ae02c19ecc8dc65b4af1aca0e587451fd7714909171adf +size 5112 diff --git a/saves-cohere-bf16/config.json b/saves-cohere-bf16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..259285235c35edc8a5995dd47b86f04d0d5e97d2 --- /dev/null +++ b/saves-cohere-bf16/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "CohereForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 5, + "eos_token_id": 255001, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "layer_norm_eps": 1e-05, + "logit_scale": 0.0625, + "max_position_embeddings": 8192, + "model_type": "cohere", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "rope_theta": 10000.0, + "torch_dtype": "float32", + "transformers_version": "4.42.0", + "use_cache": true, + "use_qk_norm": false, + "vocab_size": 2000 +} diff --git a/saves-cohere-bf16/generation_config.json b/saves-cohere-bf16/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..7f9ed94d5f4f18cd4865c82b517f1cf872d5b322 --- /dev/null +++ b/saves-cohere-bf16/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 5, + "eos_token_id": 255001, + "pad_token_id": 0, + "transformers_version": "4.42.0" +} diff --git a/saves-cohere-bf16/model.safetensors b/saves-cohere-bf16/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0855996b8674a264ea883a50133d281d125133e6 --- /dev/null +++ b/saves-cohere-bf16/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3845c0f8d5ba9a1405908d2522f82f19083de962011880b88292d4f2aaad80b9 +size 8344440 diff --git a/saves-cohere-bf16/result.log b/saves-cohere-bf16/result.log new file mode 100644 index 0000000000000000000000000000000000000000..ab40f5a0c9898bea1892118428a45ae46d0bc7fb --- /dev/null +++ b/saves-cohere-bf16/result.log @@ -0,0 +1 @@ +{'train_runtime': 5444.4328, 'train_samples_per_second': 1782.849, 'train_steps_per_second': 1.741, 'train_loss': 2.1112370515171484, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-cohere-bf16/special_tokens_map.json b/saves-cohere-bf16/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-cohere-bf16/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-cohere-bf16/tokenizer.json b/saves-cohere-bf16/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-cohere-bf16/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-cohere-bf16/tokenizer_config.json b/saves-cohere-bf16/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-cohere-bf16/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-cohere-cosine/checkpoint-9480/config.json b/saves-cohere-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..df4858fb7c269ef7a90d58d11e57ba3dd9f2ef21 --- /dev/null +++ b/saves-cohere-cosine/checkpoint-9480/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "CohereForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 5, + "eos_token_id": 255001, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "layer_norm_eps": 1e-05, + "logit_scale": 0.0625, + "max_position_embeddings": 8192, + "model_type": "cohere", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "rope_theta": 10000.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_qk_norm": false, + "vocab_size": 2000 +} diff --git a/saves-cohere-cosine/checkpoint-9480/generation_config.json b/saves-cohere-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9c41a4de69f546b74395520ae8afc0771ed6b49a --- /dev/null +++ b/saves-cohere-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 5, + "eos_token_id": 255001, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-cohere-cosine/checkpoint-9480/model.safetensors b/saves-cohere-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e850f755abd2c21a4bc0db6498d65e6300c1103 --- /dev/null +++ b/saves-cohere-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c437a62079def81d398a279682408ea6cb49a564d010af50b45e9ccdfae96c5 +size 8344440 diff --git a/saves-cohere-cosine/checkpoint-9480/optimizer.pt b/saves-cohere-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..072b0715962d026d24071e8347f37079d84919d6 --- /dev/null +++ b/saves-cohere-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610a0c044908908f227e983fd26ec452dd7371f51b6469ee7234fbef5a76e68e +size 16700776 diff --git a/saves-cohere-cosine/checkpoint-9480/rng_state.pth b/saves-cohere-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-cohere-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-cohere-cosine/checkpoint-9480/scheduler.pt b/saves-cohere-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03c145297021546d40e130546440641e02059bcb --- /dev/null +++ b/saves-cohere-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fd617624c087e1a286ed7cf3fa38baa4a8815e49f107c3186b4c7c58e1adbb +size 1064 diff --git a/saves-cohere-cosine/checkpoint-9480/special_tokens_map.json b/saves-cohere-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-cohere-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-cohere-cosine/checkpoint-9480/tokenizer.json b/saves-cohere-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-cohere-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-cohere-cosine/checkpoint-9480/tokenizer_config.json b/saves-cohere-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-cohere-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-cohere-cosine/checkpoint-9480/trainer_state.json b/saves-cohere-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..38a58fde075f04204bd828773376214418c6e915 --- /dev/null +++ b/saves-cohere-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 0.07807300239801407, + "learning_rate": 0.00015789473684210527, + "loss": 7.5899, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 0.07809121161699295, + "learning_rate": 0.00031578947368421053, + "loss": 7.5481, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.07776851207017899, + "learning_rate": 0.00047368421052631577, + "loss": 7.4978, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.08031152933835983, + "learning_rate": 0.0006315789473684211, + "loss": 7.4263, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.08201228827238083, + "learning_rate": 0.0007894736842105263, + "loss": 7.3279, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.09547655284404755, + "learning_rate": 0.0009473684210526315, + "loss": 7.2064, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.09488283842802048, + "learning_rate": 0.0011052631578947368, + "loss": 7.0686, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.10618791729211807, + "learning_rate": 0.0012631578947368421, + "loss": 6.9267, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.21817465126514435, + "learning_rate": 0.0014210526315789472, + "loss": 6.7949, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.14541859924793243, + "learning_rate": 0.0014999989494847376, + "loss": 6.6908, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.18593978881835938, + "learning_rate": 0.0014999905453802946, + "loss": 6.5501, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.09984356164932251, + "learning_rate": 0.0014999737372655805, + "loss": 6.4102, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.09693623334169388, + "learning_rate": 0.0014999485253289388, + "loss": 6.2762, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.11481635272502899, + "learning_rate": 0.0014999149098528814, + "loss": 6.14, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.11782331764698029, + "learning_rate": 0.0014998728912140862, + "loss": 6.0049, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.1004800945520401, + "learning_rate": 0.0014998224698833922, + "loss": 5.8765, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.13111692667007446, + "learning_rate": 0.0014997636464257956, + "loss": 5.7522, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.11795386672019958, + "learning_rate": 0.0014996964215004416, + "loss": 5.6449, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.21251964569091797, + "learning_rate": 0.0014996207958606182, + "loss": 5.5375, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.12110015004873276, + "learning_rate": 0.001499536770353748, + "loss": 5.4265, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.2576540410518646, + "learning_rate": 0.0014994443459213774, + "loss": 5.3298, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.13740238547325134, + "learning_rate": 0.001499343523599168, + "loss": 5.2426, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.11119973659515381, + "learning_rate": 0.0014992343045168823, + "loss": 5.1511, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.2025327980518341, + "learning_rate": 0.0014991166898983739, + "loss": 5.0695, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.1443842649459839, + "learning_rate": 0.001498990681061572, + "loss": 5.0083, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.15811724960803986, + "learning_rate": 0.001498856279418467, + "loss": 4.9323, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.21502356231212616, + "learning_rate": 0.0014987134864750948, + "loss": 4.879, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.1494235247373581, + "learning_rate": 0.0014985623038315206, + "loss": 4.8082, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.2371232658624649, + "learning_rate": 0.0014984027331818193, + "loss": 4.7491, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.1952720731496811, + "learning_rate": 0.0014982347763140584, + "loss": 4.6992, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.23239083588123322, + "learning_rate": 0.0014980584351102762, + "loss": 4.6307, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.173756405711174, + "learning_rate": 0.001497873711546462, + "loss": 4.5661, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.2377420961856842, + "learning_rate": 0.0014976806076925334, + "loss": 4.5182, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.26965397596359253, + "learning_rate": 0.0014974791257123137, + "loss": 4.4655, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.4811736047267914, + "learning_rate": 0.001497269267863507, + "loss": 4.4021, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.2100367546081543, + "learning_rate": 0.0014970510364976724, + "loss": 4.3501, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.27891844511032104, + "learning_rate": 0.0014968244340601996, + "loss": 4.2987, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.26772335171699524, + "learning_rate": 0.001496589463090279, + "loss": 4.2511, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.32049456238746643, + "learning_rate": 0.001496346126220875, + "loss": 4.2007, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.24359415471553802, + "learning_rate": 0.0014960944261786966, + "loss": 4.1481, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.26938995718955994, + "learning_rate": 0.0014958343657841655, + "loss": 4.1003, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.4620974659919739, + "learning_rate": 0.001495565947951385, + "loss": 4.0614, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.27880045771598816, + "learning_rate": 0.0014952891756881085, + "loss": 4.0118, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.341079443693161, + "learning_rate": 0.0014950040520957037, + "loss": 3.9705, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.26108410954475403, + "learning_rate": 0.0014947105803691204, + "loss": 3.935, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.3294716477394104, + "learning_rate": 0.0014944087637968522, + "loss": 3.8919, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.3282017707824707, + "learning_rate": 0.0014940986057609012, + "loss": 3.8439, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.39483383297920227, + "learning_rate": 0.0014937801097367396, + "loss": 3.8196, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.36017632484436035, + "learning_rate": 0.001493453279293271, + "loss": 3.7783, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.24677737057209015, + "learning_rate": 0.0014931181180927902, + "loss": 3.7407, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.4259503483772278, + "learning_rate": 0.001492774629890942, + "loss": 3.7162, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.3824036419391632, + "learning_rate": 0.001492422818536679, + "loss": 3.6728, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.3404929041862488, + "learning_rate": 0.00149206268797222, + "loss": 3.6416, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.3507385849952698, + "learning_rate": 0.0014916942422330032, + "loss": 3.6132, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.3499310612678528, + "learning_rate": 0.001491317485447643, + "loss": 3.5961, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.4159896969795227, + "learning_rate": 0.0014909324218378838, + "loss": 3.5489, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.3746490478515625, + "learning_rate": 0.0014905390557185508, + "loss": 3.5348, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.43461933732032776, + "learning_rate": 0.0014901373914975036, + "loss": 3.5157, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.34867915511131287, + "learning_rate": 0.0014897274336755856, + "loss": 3.4752, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.46633657813072205, + "learning_rate": 0.001489309186846575, + "loss": 3.4448, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.44998404383659363, + "learning_rate": 0.0014888826556971313, + "loss": 3.4318, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.7173436284065247, + "learning_rate": 0.0014884478450067444, + "loss": 3.4136, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.4291818141937256, + "learning_rate": 0.0014880047596476807, + "loss": 3.3894, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.5226755142211914, + "learning_rate": 0.0014875534045849274, + "loss": 3.3734, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.4069409668445587, + "learning_rate": 0.0014870937848761388, + "loss": 3.3558, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.4315668046474457, + "learning_rate": 0.001486625905671578, + "loss": 3.3308, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.3704533576965332, + "learning_rate": 0.00148614977221406, + "loss": 3.3015, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.39166080951690674, + "learning_rate": 0.0014856653898388927, + "loss": 3.2815, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.550244927406311, + "learning_rate": 0.001485172763973817, + "loss": 3.2773, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.3981841206550598, + "learning_rate": 0.0014846719001389466, + "loss": 3.2508, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.6950619220733643, + "learning_rate": 0.001484162803946705, + "loss": 3.2316, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.46911871433258057, + "learning_rate": 0.0014836454811017635, + "loss": 3.2057, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.5388689637184143, + "learning_rate": 0.0014831199374009778, + "loss": 3.1851, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.3875828981399536, + "learning_rate": 0.0014825861787333208, + "loss": 3.1874, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.6200729608535767, + "learning_rate": 0.0014820442110798197, + "loss": 3.1572, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.6055846214294434, + "learning_rate": 0.0014814940405134865, + "loss": 3.1475, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.5079436302185059, + "learning_rate": 0.001480935673199251, + "loss": 3.1237, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.4132385849952698, + "learning_rate": 0.0014803691153938915, + "loss": 3.1075, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.5488369464874268, + "learning_rate": 0.0014797943734459653, + "loss": 3.1026, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.8777981400489807, + "learning_rate": 0.001479211453795736, + "loss": 3.0738, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.5064425468444824, + "learning_rate": 0.0014786203629751033, + "loss": 3.0545, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.4576670825481415, + "learning_rate": 0.0014780211076075279, + "loss": 3.0489, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.5025016069412231, + "learning_rate": 0.0014774136944079594, + "loss": 3.0435, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.44218751788139343, + "learning_rate": 0.0014767981301827592, + "loss": 3.0119, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.4247269630432129, + "learning_rate": 0.0014761744218296249, + "loss": 3.0028, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.6576159000396729, + "learning_rate": 0.001475542576337513, + "loss": 2.9943, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.4416460394859314, + "learning_rate": 0.001474902600786561, + "loss": 2.979, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.4827321767807007, + "learning_rate": 0.0014742545023480075, + "loss": 2.9629, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.6779020428657532, + "learning_rate": 0.0014735982882841117, + "loss": 2.9522, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.5602276921272278, + "learning_rate": 0.0014729339659480727, + "loss": 2.9332, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.5276526212692261, + "learning_rate": 0.0014722615427839468, + "loss": 2.9271, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.5293432474136353, + "learning_rate": 0.0014715810263265633, + "loss": 2.9114, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.4754944443702698, + "learning_rate": 0.0014708924242014423, + "loss": 2.8915, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.5450716614723206, + "learning_rate": 0.0014701957441247064, + "loss": 2.8818, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.5438559651374817, + "learning_rate": 0.0014694909939029959, + "loss": 2.8563, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.7445106506347656, + "learning_rate": 0.0014687781814333814, + "loss": 2.8606, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.6928383111953735, + "learning_rate": 0.0014680573147032746, + "loss": 2.8508, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.5556101202964783, + "learning_rate": 0.0014673284017903392, + "loss": 2.8195, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.7667347192764282, + "learning_rate": 0.0014665914508624, + "loss": 2.8207, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.5010865926742554, + "learning_rate": 0.0014658464701773526, + "loss": 2.8151, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.5673621296882629, + "learning_rate": 0.0014650934680830688, + "loss": 2.8003, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.5368123054504395, + "learning_rate": 0.0014643324530173051, + "loss": 2.7869, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.39886289834976196, + "learning_rate": 0.0014635634335076067, + "loss": 2.7697, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.4114692807197571, + "learning_rate": 0.001462786418171213, + "loss": 2.7573, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.6538660526275635, + "learning_rate": 0.0014620014157149597, + "loss": 2.7617, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.5379409193992615, + "learning_rate": 0.001461208434935183, + "loss": 2.7418, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.38140618801116943, + "learning_rate": 0.0014604074847176197, + "loss": 2.7181, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.6248795986175537, + "learning_rate": 0.0014595985740373082, + "loss": 2.7171, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.5874338150024414, + "learning_rate": 0.0014587817119584873, + "loss": 2.7091, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.637435793876648, + "learning_rate": 0.001457956907634496, + "loss": 2.6927, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.6604284048080444, + "learning_rate": 0.0014571241703076692, + "loss": 2.6906, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.6395125389099121, + "learning_rate": 0.0014562835093092348, + "loss": 2.6806, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.8312624096870422, + "learning_rate": 0.0014554349340592104, + "loss": 2.6536, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.4435734450817108, + "learning_rate": 0.001454578454066296, + "loss": 2.6671, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.5201511383056641, + "learning_rate": 0.0014537140789277678, + "loss": 2.6435, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.5449000000953674, + "learning_rate": 0.0014528418183293716, + "loss": 2.6466, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.573413610458374, + "learning_rate": 0.001451961682045213, + "loss": 2.622, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.5475837588310242, + "learning_rate": 0.001451073679937649, + "loss": 2.6095, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.4918457269668579, + "learning_rate": 0.0014501778219571766, + "loss": 2.6091, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.5365713238716125, + "learning_rate": 0.0014492741181423225, + "loss": 2.6083, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.5636585354804993, + "learning_rate": 0.0014483625786195285, + "loss": 2.5983, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.5831143856048584, + "learning_rate": 0.0014474432136030405, + "loss": 2.58, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.5196304321289062, + "learning_rate": 0.0014465160333947923, + "loss": 2.5637, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.6028929948806763, + "learning_rate": 0.0014455810483842908, + "loss": 2.5733, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.4952998161315918, + "learning_rate": 0.0014446382690484997, + "loss": 2.5681, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.6270487904548645, + "learning_rate": 0.0014436877059517215, + "loss": 2.5497, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.6462759971618652, + "learning_rate": 0.0014427293697454803, + "loss": 2.5362, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.41916537284851074, + "learning_rate": 0.001441763271168401, + "loss": 2.5433, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.426224023103714, + "learning_rate": 0.00144078942104609, + "loss": 2.5231, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.843202531337738, + "learning_rate": 0.001439807830291013, + "loss": 2.5085, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.562146008014679, + "learning_rate": 0.0014388185099023744, + "loss": 2.5073, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.6429724097251892, + "learning_rate": 0.0014378214709659916, + "loss": 2.5087, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.073501467704773, + "learning_rate": 0.0014368167246541733, + "loss": 2.4995, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.7359769940376282, + "learning_rate": 0.0014358042822255918, + "loss": 2.4968, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.5174380540847778, + "learning_rate": 0.0014347841550251597, + "loss": 2.4931, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.5859662890434265, + "learning_rate": 0.0014337563544838997, + "loss": 2.4746, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.576949954032898, + "learning_rate": 0.001432720892118819, + "loss": 2.4683, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.5304884314537048, + "learning_rate": 0.0014316777795327794, + "loss": 2.4512, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.5506976246833801, + "learning_rate": 0.001430627028414366, + "loss": 2.4631, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.5631604194641113, + "learning_rate": 0.0014295686505377586, + "loss": 2.444, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6325361728668213, + "learning_rate": 0.0014285026577625982, + "loss": 2.4343, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.5105499625205994, + "learning_rate": 0.0014274290620338542, + "loss": 2.4414, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.6928423047065735, + "learning_rate": 0.0014263478753816906, + "loss": 2.4307, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.6486406326293945, + "learning_rate": 0.0014252591099213326, + "loss": 2.4226, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.5129737854003906, + "learning_rate": 0.001424162777852928, + "loss": 2.4172, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.5821391344070435, + "learning_rate": 0.0014230588914614134, + "loss": 2.4088, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 1.0022125244140625, + "learning_rate": 0.0014219474631163745, + "loss": 2.4059, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.4469664394855499, + "learning_rate": 0.001420828505271909, + "loss": 2.4011, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.43791458010673523, + "learning_rate": 0.0014197020304664856, + "loss": 2.3924, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.4403422176837921, + "learning_rate": 0.0014185680513228048, + "loss": 2.3932, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.5889863967895508, + "learning_rate": 0.0014174265805476564, + "loss": 2.3835, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.46867305040359497, + "learning_rate": 0.0014162776309317778, + "loss": 2.3852, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.5346658825874329, + "learning_rate": 0.0014151212153497108, + "loss": 2.366, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.42307308316230774, + "learning_rate": 0.0014139573467596561, + "loss": 2.3514, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.5532405972480774, + "learning_rate": 0.00141278603820333, + "loss": 2.347, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.5887147784233093, + "learning_rate": 0.0014116073028058165, + "loss": 2.3517, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.4708069860935211, + "learning_rate": 0.0014104211537754217, + "loss": 2.3384, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.627088189125061, + "learning_rate": 0.001409227604403524, + "loss": 2.3447, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.5353384017944336, + "learning_rate": 0.0014080266680644277, + "loss": 2.3429, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.5134146809577942, + "learning_rate": 0.0014068183582152103, + "loss": 2.3403, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.5013100504875183, + "learning_rate": 0.001405602688395574, + "loss": 2.3347, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.4874526858329773, + "learning_rate": 0.0014043796722276924, + "loss": 2.3076, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.5575470924377441, + "learning_rate": 0.0014031493234160591, + "loss": 2.32, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.5718533992767334, + "learning_rate": 0.0014019116557473332, + "loss": 2.3128, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.8238292932510376, + "learning_rate": 0.0014006666830901854, + "loss": 2.3031, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.4589861333370209, + "learning_rate": 0.001399414419395142, + "loss": 2.2985, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.5330724716186523, + "learning_rate": 0.0013981548786944293, + "loss": 2.3021, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.4716106653213501, + "learning_rate": 0.0013968880751018158, + "loss": 2.2899, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.6494364142417908, + "learning_rate": 0.0013956140228124545, + "loss": 2.2758, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.667860746383667, + "learning_rate": 0.0013943327361027231, + "loss": 2.2914, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.5189117789268494, + "learning_rate": 0.0013930442293300649, + "loss": 2.2801, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.4977673888206482, + "learning_rate": 0.0013917485169328279, + "loss": 2.2684, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.5525290369987488, + "learning_rate": 0.0013904456134301016, + "loss": 2.269, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.6430198550224304, + "learning_rate": 0.0013891355334215562, + "loss": 2.2695, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.4999960660934448, + "learning_rate": 0.0013878182915872776, + "loss": 2.2866, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.5367920994758606, + "learning_rate": 0.001386493902687604, + "loss": 2.2647, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.4812481701374054, + "learning_rate": 0.00138516238156296, + "loss": 2.262, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.44139522314071655, + "learning_rate": 0.0013838237431336895, + "loss": 2.2587, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.47555893659591675, + "learning_rate": 0.0013824780023998899, + "loss": 2.255, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.5199758410453796, + "learning_rate": 0.0013811251744412431, + "loss": 2.2534, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.4588499665260315, + "learning_rate": 0.0013797652744168473, + "loss": 2.2361, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.5112881064414978, + "learning_rate": 0.0013783983175650457, + "loss": 2.2297, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.5059300661087036, + "learning_rate": 0.0013770243192032581, + "loss": 2.2274, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.5563004016876221, + "learning_rate": 0.0013756432947278064, + "loss": 2.224, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.7133592963218689, + "learning_rate": 0.0013742552596137444, + "loss": 2.2245, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.46903708577156067, + "learning_rate": 0.0013728602294146833, + "loss": 2.2295, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.9044933915138245, + "learning_rate": 0.0013714582197626175, + "loss": 2.2248, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.6611828804016113, + "learning_rate": 0.0013700492463677501, + "loss": 2.2138, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.5141692757606506, + "learning_rate": 0.0013686333250183154, + "loss": 2.2139, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.7123673558235168, + "learning_rate": 0.001367210471580404, + "loss": 2.2109, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.5838491320610046, + "learning_rate": 0.0013657807019977835, + "loss": 2.2087, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.5097541809082031, + "learning_rate": 0.0013643440322917198, + "loss": 2.2021, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.7783833146095276, + "learning_rate": 0.0013629004785607989, + "loss": 2.2035, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.46466729044914246, + "learning_rate": 0.0013614500569807445, + "loss": 2.1894, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.4604887366294861, + "learning_rate": 0.0013599927838042394, + "loss": 2.189, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.5109618902206421, + "learning_rate": 0.0013585286753607408, + "loss": 2.1915, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.5166174173355103, + "learning_rate": 0.0013570577480562986, + "loss": 2.197, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.518307626247406, + "learning_rate": 0.0013555800183733717, + "loss": 2.1803, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.48239415884017944, + "learning_rate": 0.0013540955028706425, + "loss": 2.1743, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.46826907992362976, + "learning_rate": 0.0013526042181828324, + "loss": 2.1725, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.741210401058197, + "learning_rate": 0.0013511061810205143, + "loss": 2.1766, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.7357885837554932, + "learning_rate": 0.001349601408169926, + "loss": 2.1869, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.5517560839653015, + "learning_rate": 0.0013480899164927823, + "loss": 2.1703, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.5097281336784363, + "learning_rate": 0.0013465717229260853, + "loss": 2.1633, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.536267876625061, + "learning_rate": 0.001345046844481935, + "loss": 2.1657, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.5107452869415283, + "learning_rate": 0.0013435152982473396, + "loss": 2.1499, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.6170417666435242, + "learning_rate": 0.0013419771013840217, + "loss": 2.1503, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.5445805788040161, + "learning_rate": 0.001340432271128229, + "loss": 2.1598, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.45438647270202637, + "learning_rate": 0.0013388808247905381, + "loss": 2.1451, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.49706727266311646, + "learning_rate": 0.0013373227797556634, + "loss": 2.1512, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.8206738829612732, + "learning_rate": 0.00133575815348226, + "loss": 2.1425, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.5492030382156372, + "learning_rate": 0.0013341869635027292, + "loss": 2.1386, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.4679706394672394, + "learning_rate": 0.001332609227423022, + "loss": 2.1393, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.4610307216644287, + "learning_rate": 0.0013310249629224417, + "loss": 2.1313, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.5091895461082458, + "learning_rate": 0.0013294341877534454, + "loss": 2.1353, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.49631762504577637, + "learning_rate": 0.0013278369197414458, + "loss": 2.1437, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.5235090851783752, + "learning_rate": 0.0013262331767846104, + "loss": 2.1277, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.46336719393730164, + "learning_rate": 0.0013246229768536628, + "loss": 2.1147, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.4158889651298523, + "learning_rate": 0.001323006337991679, + "loss": 2.1252, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.47239384055137634, + "learning_rate": 0.0013213832783138873, + "loss": 2.124, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.4389095902442932, + "learning_rate": 0.0013197538160074633, + "loss": 2.1132, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.4863034784793854, + "learning_rate": 0.0013181179693313283, + "loss": 2.1211, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.7975693941116333, + "learning_rate": 0.0013164757566159428, + "loss": 2.1123, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.6992659568786621, + "learning_rate": 0.001314827196263102, + "loss": 2.11, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.6842637658119202, + "learning_rate": 0.0013131723067457302, + "loss": 2.1087, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.4873306453227997, + "learning_rate": 0.0013115111066076721, + "loss": 2.0991, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.5303446054458618, + "learning_rate": 0.0013098436144634862, + "loss": 2.1285, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.5792036652565002, + "learning_rate": 0.0013081698489982364, + "loss": 2.1073, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.49649012088775635, + "learning_rate": 0.001306489828967282, + "loss": 2.0944, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.5122395157814026, + "learning_rate": 0.0013048035731960679, + "loss": 2.0938, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.6259952187538147, + "learning_rate": 0.0013031111005799133, + "loss": 2.0968, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.5403408408164978, + "learning_rate": 0.0013014124300838004, + "loss": 2.0993, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.565324068069458, + "learning_rate": 0.0012997075807421612, + "loss": 2.08, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.5294456481933594, + "learning_rate": 0.0012979965716586653, + "loss": 2.0787, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.7323079109191895, + "learning_rate": 0.0012962794220060048, + "loss": 2.079, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.5798640251159668, + "learning_rate": 0.0012945561510256801, + "loss": 2.0855, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.45997607707977295, + "learning_rate": 0.001292826778027784, + "loss": 2.0806, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.5164095759391785, + "learning_rate": 0.0012910913223907856, + "loss": 2.0794, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.5138576626777649, + "learning_rate": 0.0012893498035613123, + "loss": 2.0766, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.4967941641807556, + "learning_rate": 0.001287602241053933, + "loss": 2.0715, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.5357160568237305, + "learning_rate": 0.0012858486544509392, + "loss": 2.0711, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.5568988919258118, + "learning_rate": 0.0012840890634021249, + "loss": 2.0714, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.44154462218284607, + "learning_rate": 0.0012823234876245667, + "loss": 2.0663, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.5281570553779602, + "learning_rate": 0.0012805519469024035, + "loss": 2.0741, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.5093416571617126, + "learning_rate": 0.0012787744610866143, + "loss": 2.058, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.49353864789009094, + "learning_rate": 0.0012769910500947954, + "loss": 2.0686, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.590641438961029, + "learning_rate": 0.0012752017339109376, + "loss": 2.0592, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.6636717319488525, + "learning_rate": 0.0012734065325852029, + "loss": 2.0582, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.5236597657203674, + "learning_rate": 0.0012716054662336987, + "loss": 2.0466, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.5581481456756592, + "learning_rate": 0.001269798555038252, + "loss": 2.0537, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.42993542551994324, + "learning_rate": 0.0012679858192461864, + "loss": 2.048, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.8275519013404846, + "learning_rate": 0.0012661672791700906, + "loss": 2.051, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.5785121321678162, + "learning_rate": 0.0012643429551875945, + "loss": 2.0472, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.5842779278755188, + "learning_rate": 0.0012625128677411388, + "loss": 2.0488, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.4994587004184723, + "learning_rate": 0.0012606770373377475, + "loss": 2.0399, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.6527187824249268, + "learning_rate": 0.0012588354845487959, + "loss": 2.0465, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.48334649205207825, + "learning_rate": 0.001256988230009783, + "loss": 2.0426, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.47088199853897095, + "learning_rate": 0.0012551352944200976, + "loss": 2.0399, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.4740578532218933, + "learning_rate": 0.0012532766985427874, + "loss": 2.044, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.4745907485485077, + "learning_rate": 0.0012514124632043272, + "loss": 2.0418, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.45381197333335876, + "learning_rate": 0.0012495426092943842, + "loss": 2.0402, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.47148647904396057, + "learning_rate": 0.0012476671577655845, + "loss": 2.0358, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.4941268563270569, + "learning_rate": 0.0012457861296332774, + "loss": 2.0228, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.45439717173576355, + "learning_rate": 0.001243899545975303, + "loss": 2.0355, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.5627492666244507, + "learning_rate": 0.0012420074279317515, + "loss": 2.0224, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.42299702763557434, + "learning_rate": 0.0012401097967047298, + "loss": 2.0208, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.6138981580734253, + "learning_rate": 0.001238206673558122, + "loss": 2.018, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.537173867225647, + "learning_rate": 0.0012362980798173526, + "loss": 2.0206, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.5888779163360596, + "learning_rate": 0.0012343840368691462, + "loss": 2.0169, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.49321460723876953, + "learning_rate": 0.0012324645661612886, + "loss": 2.0141, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.7142887711524963, + "learning_rate": 0.0012305396892023867, + "loss": 2.0183, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.4920755922794342, + "learning_rate": 0.0012286094275616264, + "loss": 2.02, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.5029653906822205, + "learning_rate": 0.0012266738028685318, + "loss": 1.9965, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.6936984062194824, + "learning_rate": 0.001224732836812723, + "loss": 2.0109, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.4375728666782379, + "learning_rate": 0.0012227865511436724, + "loss": 2.0122, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.5087941884994507, + "learning_rate": 0.001220834967670461, + "loss": 2.0177, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.45212846994400024, + "learning_rate": 0.0012188781082615346, + "loss": 2.0123, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.5283817648887634, + "learning_rate": 0.0012169159948444588, + "loss": 2.0103, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.44647935032844543, + "learning_rate": 0.001214948649405672, + "loss": 2.0038, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.5136517286300659, + "learning_rate": 0.0012129760939902407, + "loss": 2.0033, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.5517586469650269, + "learning_rate": 0.0012109983507016114, + "loss": 2.0024, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.44700121879577637, + "learning_rate": 0.0012090154417013636, + "loss": 2.0079, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.8851162195205688, + "learning_rate": 0.0012070273892089605, + "loss": 1.9802, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.8499312996864319, + "learning_rate": 0.0012050342155015012, + "loss": 1.9846, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.5142639875411987, + "learning_rate": 0.0012030359429134707, + "loss": 1.9907, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.4491482079029083, + "learning_rate": 0.0012010325938364883, + "loss": 1.9906, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.5181399583816528, + "learning_rate": 0.0011990241907190592, + "loss": 1.9888, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.5678601264953613, + "learning_rate": 0.001197010756066321, + "loss": 1.9771, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.4797880947589874, + "learning_rate": 0.0011949923124397917, + "loss": 1.9809, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.5970410704612732, + "learning_rate": 0.001192968882457118, + "loss": 1.9881, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.6792855262756348, + "learning_rate": 0.001190940488791821, + "loss": 1.9865, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.6052667498588562, + "learning_rate": 0.0011889071541730419, + "loss": 1.9811, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.4717925786972046, + "learning_rate": 0.001186868901385288, + "loss": 1.9749, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.4459092617034912, + "learning_rate": 0.001184825753268177, + "loss": 1.9806, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.5120673775672913, + "learning_rate": 0.0011827777327161814, + "loss": 1.9866, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.6461082100868225, + "learning_rate": 0.0011807248626783714, + "loss": 1.9684, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.7134902477264404, + "learning_rate": 0.0011786671661581584, + "loss": 1.9658, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.6556893587112427, + "learning_rate": 0.001176604666213036, + "loss": 1.9704, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.5367307662963867, + "learning_rate": 0.0011745373859543236, + "loss": 1.9844, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.48330724239349365, + "learning_rate": 0.0011724653485469063, + "loss": 1.9598, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.4575100839138031, + "learning_rate": 0.0011703885772089743, + "loss": 1.9682, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.46918046474456787, + "learning_rate": 0.0011683070952117646, + "loss": 1.965, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.5122446417808533, + "learning_rate": 0.0011662209258792998, + "loss": 1.957, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.49281787872314453, + "learning_rate": 0.0011641300925881257, + "loss": 1.9639, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.5307649970054626, + "learning_rate": 0.0011620346187670501, + "loss": 1.9588, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5202289819717407, + "learning_rate": 0.0011599345278968806, + "loss": 1.9665, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.44670790433883667, + "learning_rate": 0.0011578298435101604, + "loss": 1.9587, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.511365532875061, + "learning_rate": 0.0011557205891909062, + "loss": 1.946, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.5145044326782227, + "learning_rate": 0.0011536067885743423, + "loss": 1.9588, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.5643377900123596, + "learning_rate": 0.001151488465346637, + "loss": 1.943, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.4689241051673889, + "learning_rate": 0.0011493656432446362, + "loss": 1.9542, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.5530062913894653, + "learning_rate": 0.0011472383460555983, + "loss": 1.9536, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6001288890838623, + "learning_rate": 0.001145106597616927, + "loss": 1.9613, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.6501178741455078, + "learning_rate": 0.001142970421815904, + "loss": 1.9375, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.46388620138168335, + "learning_rate": 0.0011408298425894226, + "loss": 1.9478, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.4548287093639374, + "learning_rate": 0.0011386848839237186, + "loss": 1.9474, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.5390093326568604, + "learning_rate": 0.0011365355698541005, + "loss": 1.9499, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.5916829705238342, + "learning_rate": 0.0011343819244646824, + "loss": 1.9442, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.6224778890609741, + "learning_rate": 0.001132223971888112, + "loss": 1.9393, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.5904368758201599, + "learning_rate": 0.0011300617363053024, + "loss": 1.9367, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.5531172752380371, + "learning_rate": 0.0011278952419451586, + "loss": 1.9493, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.5716174840927124, + "learning_rate": 0.0011257245130843077, + "loss": 1.9378, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.6984868049621582, + "learning_rate": 0.0011235495740468265, + "loss": 1.9344, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.46913743019104004, + "learning_rate": 0.0011213704492039694, + "loss": 1.9134, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.5647366046905518, + "learning_rate": 0.001119187162973894, + "loss": 1.9363, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5404950976371765, + "learning_rate": 0.001116999739821388, + "loss": 1.9263, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.49418604373931885, + "learning_rate": 0.0011148082042575968, + "loss": 1.9449, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.5184170603752136, + "learning_rate": 0.0011126125808397461, + "loss": 1.9409, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.8782699108123779, + "learning_rate": 0.0011104128941708683, + "loss": 1.9222, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.4868786036968231, + "learning_rate": 0.001108209168899527, + "loss": 1.9323, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.4536048471927643, + "learning_rate": 0.0011060014297195396, + "loss": 1.9358, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.5090391635894775, + "learning_rate": 0.0011037897013697015, + "loss": 1.9359, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.4594990611076355, + "learning_rate": 0.0011015740086335092, + "loss": 1.9246, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.47361326217651367, + "learning_rate": 0.0010993543763388814, + "loss": 1.9265, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.5177754759788513, + "learning_rate": 0.0010971308293578814, + "loss": 1.9119, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.4772936999797821, + "learning_rate": 0.0010949033926064397, + "loss": 1.9119, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.47741714119911194, + "learning_rate": 0.0010926720910440725, + "loss": 1.9234, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.47953641414642334, + "learning_rate": 0.001090436949673603, + "loss": 1.9189, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.4944550395011902, + "learning_rate": 0.0010881979935408815, + "loss": 1.9128, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.5210838913917542, + "learning_rate": 0.0010859552477345052, + "loss": 1.9261, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.47992566227912903, + "learning_rate": 0.001083708737385536, + "loss": 1.915, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.687235414981842, + "learning_rate": 0.0010814584876672187, + "loss": 1.9021, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.5429995656013489, + "learning_rate": 0.0010792045237947008, + "loss": 1.9158, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.48844775557518005, + "learning_rate": 0.0010769468710247478, + "loss": 1.9076, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.496257483959198, + "learning_rate": 0.0010746855546554612, + "loss": 1.9093, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.672530472278595, + "learning_rate": 0.0010724206000259954, + "loss": 1.8992, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.46270009875297546, + "learning_rate": 0.0010701520325162727, + "loss": 1.9147, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.573235034942627, + "learning_rate": 0.0010678798775467001, + "loss": 1.9172, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.4940800666809082, + "learning_rate": 0.0010656041605778832, + "loss": 1.9126, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.46269842982292175, + "learning_rate": 0.001063324907110342, + "loss": 1.8947, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6589504480361938, + "learning_rate": 0.0010610421426842241, + "loss": 1.9126, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.592200517654419, + "learning_rate": 0.00105875589287902, + "loss": 1.9055, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.5242502093315125, + "learning_rate": 0.0010564661833132752, + "loss": 1.9153, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.633454442024231, + "learning_rate": 0.001054173039644303, + "loss": 1.9123, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.43604227900505066, + "learning_rate": 0.0010518764875678981, + "loss": 1.896, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.46865442395210266, + "learning_rate": 0.001049576552818048, + "loss": 1.8965, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.5873276591300964, + "learning_rate": 0.0010472732611666448, + "loss": 1.8955, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.5167538523674011, + "learning_rate": 0.0010449666384231954, + "loss": 1.8956, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.5381276607513428, + "learning_rate": 0.0010426567104345346, + "loss": 1.9053, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.5202476978302002, + "learning_rate": 0.0010403435030845332, + "loss": 1.8865, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.5648481249809265, + "learning_rate": 0.0010380270422938093, + "loss": 1.8932, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.5549129247665405, + "learning_rate": 0.0010357073540194362, + "loss": 1.8921, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.5572099685668945, + "learning_rate": 0.001033384464254655, + "loss": 1.8915, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.5873011946678162, + "learning_rate": 0.001031058399028579, + "loss": 1.8903, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.570690393447876, + "learning_rate": 0.001028729184405905, + "loss": 1.8956, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.5420499444007874, + "learning_rate": 0.0010263968464866201, + "loss": 1.8896, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.5106627941131592, + "learning_rate": 0.0010240614114057098, + "loss": 1.881, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.6274330019950867, + "learning_rate": 0.001021722905332864, + "loss": 1.8836, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.5878546833992004, + "learning_rate": 0.0010193813544721855, + "loss": 1.8969, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6412076354026794, + "learning_rate": 0.001017036785061895, + "loss": 1.8979, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.6662435531616211, + "learning_rate": 0.0010146892233740376, + "loss": 1.8855, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.5689896941184998, + "learning_rate": 0.0010123386957141883, + "loss": 1.8642, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.6295759081840515, + "learning_rate": 0.0010099852284211573, + "loss": 1.8843, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.5139994025230408, + "learning_rate": 0.0010076288478666944, + "loss": 1.8808, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.47630658745765686, + "learning_rate": 0.0010052695804551946, + "loss": 1.886, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.5225178003311157, + "learning_rate": 0.0010029074526234014, + "loss": 1.8788, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.4731677174568176, + "learning_rate": 0.0010005424908401104, + "loss": 1.8908, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5094460844993591, + "learning_rate": 0.0009981747216058728, + "loss": 1.8734, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.5344535708427429, + "learning_rate": 0.0009958041714526998, + "loss": 1.8765, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.5178588032722473, + "learning_rate": 0.0009934308669437627, + "loss": 1.8819, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.5012313723564148, + "learning_rate": 0.0009910548346730972, + "loss": 1.874, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.49011969566345215, + "learning_rate": 0.0009886761012653062, + "loss": 1.8612, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.5833190679550171, + "learning_rate": 0.000986294693375258, + "loss": 1.8628, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.5859459042549133, + "learning_rate": 0.000983910637687791, + "loss": 1.8845, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.4532450735569, + "learning_rate": 0.0009815239609174138, + "loss": 1.8644, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.550556480884552, + "learning_rate": 0.0009791346898080043, + "loss": 1.8788, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.4845116436481476, + "learning_rate": 0.0009767428511325122, + "loss": 1.8647, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.48399507999420166, + "learning_rate": 0.0009743484716926576, + "loss": 1.8534, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.4463885426521301, + "learning_rate": 0.0009719515783186319, + "loss": 1.8577, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.9311710000038147, + "learning_rate": 0.0009695521978687951, + "loss": 1.8607, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.4749062955379486, + "learning_rate": 0.0009671503572293767, + "loss": 1.8601, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.4193228483200073, + "learning_rate": 0.0009647460833141742, + "loss": 1.8577, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.4457775354385376, + "learning_rate": 0.0009623394030642507, + "loss": 1.8589, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.48866182565689087, + "learning_rate": 0.0009599303434476334, + "loss": 1.8564, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.5462575554847717, + "learning_rate": 0.0009575189314590118, + "loss": 1.8617, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.4707881212234497, + "learning_rate": 0.0009551051941194346, + "loss": 1.8603, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.47637778520584106, + "learning_rate": 0.0009526891584760071, + "loss": 1.8488, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.46638888120651245, + "learning_rate": 0.0009502708516015889, + "loss": 1.8587, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.4925709664821625, + "learning_rate": 0.0009478503005944888, + "loss": 1.8499, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.5838210582733154, + "learning_rate": 0.0009454275325781632, + "loss": 1.865, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.5607469081878662, + "learning_rate": 0.0009430025747009104, + "loss": 1.8473, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.43578922748565674, + "learning_rate": 0.0009405754541355677, + "loss": 1.8516, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.49802809953689575, + "learning_rate": 0.0009381461980792061, + "loss": 1.8442, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.4359084665775299, + "learning_rate": 0.0009357148337528256, + "loss": 1.852, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5608870983123779, + "learning_rate": 0.0009332813884010511, + "loss": 1.853, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.4953092038631439, + "learning_rate": 0.0009308458892918259, + "loss": 1.8593, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.49968084692955017, + "learning_rate": 0.0009284083637161064, + "loss": 1.8536, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.7764460444450378, + "learning_rate": 0.0009259688389875574, + "loss": 1.8595, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.55796217918396, + "learning_rate": 0.0009235273424422442, + "loss": 1.8499, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.5775794982910156, + "learning_rate": 0.0009210839014383282, + "loss": 1.8492, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.6775500774383545, + "learning_rate": 0.0009186385433557584, + "loss": 1.8429, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.4885218143463135, + "learning_rate": 0.0009161912955959668, + "loss": 1.8456, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.5525714159011841, + "learning_rate": 0.000913742185581559, + "loss": 1.8395, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.46920889616012573, + "learning_rate": 0.0009112912407560086, + "loss": 1.8389, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.5018758773803711, + "learning_rate": 0.0009088384885833495, + "loss": 1.8504, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.4722701907157898, + "learning_rate": 0.000906383956547867, + "loss": 1.8376, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.502614438533783, + "learning_rate": 0.0009039276721537915, + "loss": 1.8411, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.43983814120292664, + "learning_rate": 0.0009014696629249886, + "loss": 1.8349, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.4827621877193451, + "learning_rate": 0.0008990099564046522, + "loss": 1.8435, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.5437732934951782, + "learning_rate": 0.0008965485801549946, + "loss": 1.8411, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.4678134620189667, + "learning_rate": 0.000894085561756939, + "loss": 1.8277, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.5156684517860413, + "learning_rate": 0.0008916209288098088, + "loss": 1.8383, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.4900006949901581, + "learning_rate": 0.0008891547089310198, + "loss": 1.8354, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.467733234167099, + "learning_rate": 0.0008866869297557699, + "loss": 1.8359, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.6935895681381226, + "learning_rate": 0.0008842176189367299, + "loss": 1.8385, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.6533648371696472, + "learning_rate": 0.0008817468041437329, + "loss": 1.8424, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.46863996982574463, + "learning_rate": 0.0008792745130634654, + "loss": 1.8306, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.5071920156478882, + "learning_rate": 0.0008768007733991561, + "loss": 1.8244, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.4357513189315796, + "learning_rate": 0.0008743256128702658, + "loss": 1.8344, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.47574949264526367, + "learning_rate": 0.0008718490592121768, + "loss": 1.8356, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5646613836288452, + "learning_rate": 0.0008693711401758822, + "loss": 1.8301, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.4963986575603485, + "learning_rate": 0.0008668918835276747, + "loss": 1.8215, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.4104016125202179, + "learning_rate": 0.0008644113170488355, + "loss": 1.8329, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.5686266422271729, + "learning_rate": 0.0008619294685353235, + "loss": 1.8215, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.5064616203308105, + "learning_rate": 0.0008594463657974627, + "loss": 1.8277, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.4528553783893585, + "learning_rate": 0.0008569620366596322, + "loss": 1.8407, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.44042637944221497, + "learning_rate": 0.000854476508959953, + "loss": 1.8162, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.5440728664398193, + "learning_rate": 0.0008519898105499762, + "loss": 1.8218, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5407334566116333, + "learning_rate": 0.0008495019692943721, + "loss": 1.8177, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.5743148326873779, + "learning_rate": 0.0008470130130706166, + "loss": 1.8236, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.5018296241760254, + "learning_rate": 0.0008445229697686795, + "loss": 1.8361, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.42730623483657837, + "learning_rate": 0.0008420318672907119, + "loss": 1.8184, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.582163393497467, + "learning_rate": 0.0008395397335507334, + "loss": 1.8233, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.5220428109169006, + "learning_rate": 0.0008370465964743196, + "loss": 1.8195, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.4716550409793854, + "learning_rate": 0.0008345524839982886, + "loss": 1.8261, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.5104169845581055, + "learning_rate": 0.0008320574240703886, + "loss": 1.8132, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.46525123715400696, + "learning_rate": 0.0008295614446489842, + "loss": 1.8241, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.6402307748794556, + "learning_rate": 0.0008270645737027441, + "loss": 1.8289, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.46442320942878723, + "learning_rate": 0.0008245668392103259, + "loss": 1.813, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.5230414867401123, + "learning_rate": 0.0008220682691600645, + "loss": 1.8139, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.505630612373352, + "learning_rate": 0.0008195688915496571, + "loss": 1.8185, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.6342094540596008, + "learning_rate": 0.0008170687343858506, + "loss": 1.8026, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.42528700828552246, + "learning_rate": 0.0008145678256841265, + "loss": 1.8191, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.5394435524940491, + "learning_rate": 0.0008120661934683879, + "loss": 1.8214, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.65519118309021, + "learning_rate": 0.0008095638657706456, + "loss": 1.8189, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.521561324596405, + "learning_rate": 0.000807060870630703, + "loss": 1.804, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.5343124270439148, + "learning_rate": 0.000804557236095843, + "loss": 1.7861, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.56565922498703, + "learning_rate": 0.0008020529902205129, + "loss": 1.8125, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.4513322710990906, + "learning_rate": 0.0007995481610660108, + "loss": 1.7996, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.4842698872089386, + "learning_rate": 0.0007970427767001702, + "loss": 1.8162, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.561750054359436, + "learning_rate": 0.0007945368651970464, + "loss": 1.8209, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.44747141003608704, + "learning_rate": 0.0007920304546366013, + "loss": 1.7924, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.5409219861030579, + "learning_rate": 0.000789523573104389, + "loss": 1.8131, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6380155086517334, + "learning_rate": 0.0007870162486912414, + "loss": 1.8139, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.4663771986961365, + "learning_rate": 0.0007845085094929527, + "loss": 1.8126, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.529172420501709, + "learning_rate": 0.0007820003836099649, + "loss": 1.8011, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.47315338253974915, + "learning_rate": 0.0007794918991470537, + "loss": 1.8011, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.48217272758483887, + "learning_rate": 0.0007769830842130119, + "loss": 1.796, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.5863925218582153, + "learning_rate": 0.0007744739669203361, + "loss": 1.7949, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.48721200227737427, + "learning_rate": 0.0007719645753849108, + "loss": 1.8066, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.49619707465171814, + "learning_rate": 0.0007694549377256932, + "loss": 1.8053, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.5473717451095581, + "learning_rate": 0.0007669450820643987, + "loss": 1.8003, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.5889704823493958, + "learning_rate": 0.0007644350365251855, + "loss": 1.799, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.6119542121887207, + "learning_rate": 0.0007619248292343399, + "loss": 1.7994, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.8032838106155396, + "learning_rate": 0.0007594144883199599, + "loss": 1.8145, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.5136709213256836, + "learning_rate": 0.0007569040419116413, + "loss": 1.7994, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.5281930565834045, + "learning_rate": 0.000754393518140162, + "loss": 1.7979, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.5089260339736938, + "learning_rate": 0.0007518829451371665, + "loss": 1.8073, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.4739396572113037, + "learning_rate": 0.0007493723510348516, + "loss": 1.7912, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.4867638051509857, + "learning_rate": 0.0007468617639656496, + "loss": 1.7948, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.5881825685501099, + "learning_rate": 0.0007443512120619144, + "loss": 1.7914, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.5565566420555115, + "learning_rate": 0.0007418407234556067, + "loss": 1.8134, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.43923911452293396, + "learning_rate": 0.0007393303262779767, + "loss": 1.7947, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.5792461037635803, + "learning_rate": 0.0007368200486592507, + "loss": 1.7887, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.6220319271087646, + "learning_rate": 0.0007343099187283149, + "loss": 1.801, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.5463800430297852, + "learning_rate": 0.0007317999646124011, + "loss": 1.7868, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.500849723815918, + "learning_rate": 0.0007292902144367704, + "loss": 1.7793, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.4989151954650879, + "learning_rate": 0.0007267806963243995, + "loss": 1.8028, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.6245041489601135, + "learning_rate": 0.0007242714383956639, + "loss": 1.7899, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.46404677629470825, + "learning_rate": 0.000721762468768024, + "loss": 1.801, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.5411932468414307, + "learning_rate": 0.0007192538155557094, + "loss": 1.7915, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.5335944890975952, + "learning_rate": 0.0007167455068694046, + "loss": 1.7814, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.525510311126709, + "learning_rate": 0.000714237570815933, + "loss": 1.7905, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.5860073566436768, + "learning_rate": 0.0007117300354979423, + "loss": 1.7859, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.5564677119255066, + "learning_rate": 0.000709222929013591, + "loss": 1.7956, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.4903349280357361, + "learning_rate": 0.0007067162794562309, + "loss": 1.7763, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.7321206331253052, + "learning_rate": 0.0007042101149140943, + "loss": 1.789, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.5303898453712463, + "learning_rate": 0.0007017044634699787, + "loss": 1.7804, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.5499765872955322, + "learning_rate": 0.0006991993532009319, + "loss": 1.7783, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.4960661232471466, + "learning_rate": 0.0006966948121779378, + "loss": 1.793, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.5083739757537842, + "learning_rate": 0.000694190868465601, + "loss": 1.7841, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5216884016990662, + "learning_rate": 0.0006916875501218343, + "loss": 1.7788, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.45407798886299133, + "learning_rate": 0.0006891848851975416, + "loss": 1.7629, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5876054763793945, + "learning_rate": 0.0006866829017363054, + "loss": 1.7899, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.4479530453681946, + "learning_rate": 0.0006841816277740722, + "loss": 1.7771, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.45906341075897217, + "learning_rate": 0.0006816810913388379, + "loss": 1.7896, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.6370983123779297, + "learning_rate": 0.0006791813204503342, + "loss": 1.7769, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.6594470739364624, + "learning_rate": 0.0006766823431197147, + "loss": 1.792, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.6397828459739685, + "learning_rate": 0.0006741841873492406, + "loss": 1.7811, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.48662564158439636, + "learning_rate": 0.0006716868811319671, + "loss": 1.7634, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.5116204023361206, + "learning_rate": 0.0006691904524514297, + "loss": 1.7772, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.5872006416320801, + "learning_rate": 0.0006666949292813306, + "loss": 1.7744, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5688655972480774, + "learning_rate": 0.0006642003395852258, + "loss": 1.7723, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.5511744618415833, + "learning_rate": 0.0006617067113162103, + "loss": 1.7901, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5184271335601807, + "learning_rate": 0.0006592140724166073, + "loss": 1.7852, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.6910237669944763, + "learning_rate": 0.0006567224508176523, + "loss": 1.7741, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.47844454646110535, + "learning_rate": 0.0006542318744391821, + "loss": 1.7761, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.5863401889801025, + "learning_rate": 0.0006517423711893209, + "loss": 1.7888, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.48753347992897034, + "learning_rate": 0.0006492539689641685, + "loss": 1.7666, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.5974211096763611, + "learning_rate": 0.0006467666956474865, + "loss": 1.7781, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.5658529996871948, + "learning_rate": 0.0006442805791103873, + "loss": 1.7635, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.5463010668754578, + "learning_rate": 0.0006417956472110205, + "loss": 1.7755, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.5274777412414551, + "learning_rate": 0.0006393119277942614, + "loss": 1.7666, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.47971370816230774, + "learning_rate": 0.0006368294486913987, + "loss": 1.774, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.5172078609466553, + "learning_rate": 0.0006343482377198232, + "loss": 1.7682, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.4493597745895386, + "learning_rate": 0.0006318683226827151, + "loss": 1.7735, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.5698638558387756, + "learning_rate": 0.0006293897313687331, + "loss": 1.7794, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.5641869306564331, + "learning_rate": 0.0006269124915517037, + "loss": 1.7659, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.4630112051963806, + "learning_rate": 0.0006244366309903084, + "loss": 1.7702, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.5254722833633423, + "learning_rate": 0.0006219621774277737, + "loss": 1.7722, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.5389178395271301, + "learning_rate": 0.00061948915859156, + "loss": 1.7627, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.5503820776939392, + "learning_rate": 0.0006170176021930509, + "loss": 1.7573, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.4588729441165924, + "learning_rate": 0.0006145475359272424, + "loss": 1.7715, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.5164360404014587, + "learning_rate": 0.0006120789874724336, + "loss": 1.7646, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.48111769556999207, + "learning_rate": 0.0006096119844899151, + "loss": 1.7574, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.6208154559135437, + "learning_rate": 0.0006071465546236601, + "loss": 1.7652, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.5331301689147949, + "learning_rate": 0.0006046827255000135, + "loss": 1.7653, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.45333394408226013, + "learning_rate": 0.0006022205247273845, + "loss": 1.7567, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.5300566554069519, + "learning_rate": 0.0005997599798959343, + "loss": 1.7575, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.5371107459068298, + "learning_rate": 0.0005973011185772694, + "loss": 1.757, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.45429641008377075, + "learning_rate": 0.0005948439683241318, + "loss": 1.7605, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.4869500994682312, + "learning_rate": 0.0005923885566700896, + "loss": 1.7563, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.4698178172111511, + "learning_rate": 0.0005899349111292293, + "loss": 1.7515, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.47412389516830444, + "learning_rate": 0.0005874830591958474, + "loss": 1.754, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.5239652395248413, + "learning_rate": 0.000585033028344142, + "loss": 1.759, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.46489399671554565, + "learning_rate": 0.0005825848460279048, + "loss": 1.7573, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.5264482498168945, + "learning_rate": 0.0005801385396802146, + "loss": 1.7572, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.48353108763694763, + "learning_rate": 0.0005776941367131282, + "loss": 1.7593, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.47418880462646484, + "learning_rate": 0.0005752516645173745, + "loss": 1.759, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.45828309655189514, + "learning_rate": 0.0005728111504620472, + "loss": 1.7556, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.5585699677467346, + "learning_rate": 0.0005703726218942976, + "loss": 1.7602, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.5797269344329834, + "learning_rate": 0.0005679361061390295, + "loss": 1.7525, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.5022396445274353, + "learning_rate": 0.0005655016304985908, + "loss": 1.7468, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.5571432709693909, + "learning_rate": 0.0005630692222524709, + "loss": 1.7451, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.5242838263511658, + "learning_rate": 0.0005606389086569911, + "loss": 1.7636, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.5125951766967773, + "learning_rate": 0.0005582107169450023, + "loss": 1.7661, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5367953181266785, + "learning_rate": 0.0005557846743255783, + "loss": 1.7541, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.6833013892173767, + "learning_rate": 0.0005533608079837109, + "loss": 1.7481, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.5281261801719666, + "learning_rate": 0.0005509391450800061, + "loss": 1.7515, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.4542435109615326, + "learning_rate": 0.0005485197127503795, + "loss": 1.7484, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.48426374793052673, + "learning_rate": 0.0005461025381057516, + "loss": 1.7513, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.44537362456321716, + "learning_rate": 0.0005436876482317444, + "loss": 1.7602, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.5144262909889221, + "learning_rate": 0.0005412750701883782, + "loss": 1.7538, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.6181552410125732, + "learning_rate": 0.0005388648310097682, + "loss": 1.7631, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.5603669881820679, + "learning_rate": 0.000536456957703821, + "loss": 1.7559, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.5585688352584839, + "learning_rate": 0.0005340514772519324, + "loss": 1.7509, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6304674744606018, + "learning_rate": 0.0005316484166086863, + "loss": 1.7598, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.47473809123039246, + "learning_rate": 0.00052924780270155, + "loss": 1.7575, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.5999768972396851, + "learning_rate": 0.0005268496624305747, + "loss": 1.7359, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.6290451884269714, + "learning_rate": 0.0005244540226680931, + "loss": 1.744, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.5616927742958069, + "learning_rate": 0.0005220609102584185, + "loss": 1.7458, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5754327178001404, + "learning_rate": 0.0005196703520175437, + "loss": 1.7488, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.4909049868583679, + "learning_rate": 0.0005172823747328415, + "loss": 1.7419, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.477825790643692, + "learning_rate": 0.0005148970051627632, + "loss": 1.7456, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.5997064113616943, + "learning_rate": 0.0005125142700365394, + "loss": 1.719, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.5262393951416016, + "learning_rate": 0.000510134196053881, + "loss": 1.7309, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.6012929677963257, + "learning_rate": 0.0005077568098846789, + "loss": 1.7351, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.5543552041053772, + "learning_rate": 0.000505382138168706, + "loss": 1.7407, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.5643839836120605, + "learning_rate": 0.0005030102075153181, + "loss": 1.7348, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.6065688729286194, + "learning_rate": 0.0005006410445031569, + "loss": 1.7426, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.4972877502441406, + "learning_rate": 0.0004982746756798507, + "loss": 1.7446, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.6370704770088196, + "learning_rate": 0.0004959111275617174, + "loss": 1.7549, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.5200682878494263, + "learning_rate": 0.0004935504266334677, + "loss": 1.739, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.5959188938140869, + "learning_rate": 0.0004911925993479085, + "loss": 1.7518, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.5291982293128967, + "learning_rate": 0.0004888376721256456, + "loss": 1.7543, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.5626165866851807, + "learning_rate": 0.00048648567135478805, + "loss": 1.7423, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.49730008840560913, + "learning_rate": 0.0004841366233906538, + "loss": 1.7357, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.47133833169937134, + "learning_rate": 0.0004817905545554717, + "loss": 1.7448, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.562198281288147, + "learning_rate": 0.00047944749113808884, + "loss": 1.7372, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.5078467726707458, + "learning_rate": 0.00047710745939367474, + "loss": 1.7398, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.7159501314163208, + "learning_rate": 0.0004747704855434278, + "loss": 1.7326, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.717029333114624, + "learning_rate": 0.0004724365957742809, + "loss": 1.7273, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.4691425859928131, + "learning_rate": 0.00047010581623860883, + "loss": 1.7342, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.5486435294151306, + "learning_rate": 0.0004677781730539342, + "loss": 1.7412, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.46719151735305786, + "learning_rate": 0.0004654536923026356, + "loss": 1.7278, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.6049476861953735, + "learning_rate": 0.00046313240003165466, + "loss": 1.7346, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5224735736846924, + "learning_rate": 0.0004608143222522048, + "loss": 1.7354, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5320291519165039, + "learning_rate": 0.0004584994849394795, + "loss": 1.719, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.53509122133255, + "learning_rate": 0.0004561879140323607, + "loss": 1.7405, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.5025362372398376, + "learning_rate": 0.0004538796354331298, + "loss": 1.7431, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.5328069925308228, + "learning_rate": 0.0004515746750071754, + "loss": 1.7366, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.49007970094680786, + "learning_rate": 0.0004492730585827046, + "loss": 1.7388, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.49502864480018616, + "learning_rate": 0.0004469748119504529, + "loss": 1.7213, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.6279551982879639, + "learning_rate": 0.0004446799608633964, + "loss": 1.7361, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.5425928831100464, + "learning_rate": 0.00044238853103646154, + "loss": 1.72, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.6600133776664734, + "learning_rate": 0.00044010054814623925, + "loss": 1.7376, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.49875983595848083, + "learning_rate": 0.0004378160378306944, + "loss": 1.729, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.6602660417556763, + "learning_rate": 0.00043553502568888095, + "loss": 1.7278, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.471870481967926, + "learning_rate": 0.0004332575372806534, + "loss": 1.7281, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6140177249908447, + "learning_rate": 0.00043098359812638145, + "loss": 1.7199, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.4695698916912079, + "learning_rate": 0.00042871323370666383, + "loss": 1.7329, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.5330074429512024, + "learning_rate": 0.0004264464694620421, + "loss": 1.728, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.5303173065185547, + "learning_rate": 0.000424183330792717, + "loss": 1.7254, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.4868159890174866, + "learning_rate": 0.0004219238430582621, + "loss": 1.7284, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.4689774811267853, + "learning_rate": 0.0004196680315773408, + "loss": 1.7327, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.4770808815956116, + "learning_rate": 0.00041741592162742214, + "loss": 1.7261, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.5373700261116028, + "learning_rate": 0.0004151675384444978, + "loss": 1.7061, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.4945913851261139, + "learning_rate": 0.00041292290722279914, + "loss": 1.7298, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.600724995136261, + "learning_rate": 0.00041068205311451517, + "loss": 1.7333, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.5088898539543152, + "learning_rate": 0.00040844500122951026, + "loss": 1.7247, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.5828891396522522, + "learning_rate": 0.00040621177663504313, + "loss": 1.7224, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.4740540087223053, + "learning_rate": 0.00040398240435548583, + "loss": 1.7276, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.4641067683696747, + "learning_rate": 0.00040175690937204324, + "loss": 1.7165, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.5898221731185913, + "learning_rate": 0.00039953531662247343, + "loss": 1.7198, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.5704010128974915, + "learning_rate": 0.0003973176510008075, + "loss": 1.7304, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.5211365818977356, + "learning_rate": 0.00039510393735707233, + "loss": 1.723, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.47254467010498047, + "learning_rate": 0.00039289420049700986, + "loss": 1.7305, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.49432697892189026, + "learning_rate": 0.0003906884651818006, + "loss": 1.7283, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5214028358459473, + "learning_rate": 0.00038848675612778577, + "loss": 1.7236, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.46076950430870056, + "learning_rate": 0.00038628909800619046, + "loss": 1.7165, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.5042637586593628, + "learning_rate": 0.0003840955154428467, + "loss": 1.7162, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5157408714294434, + "learning_rate": 0.00038190603301791864, + "loss": 1.7147, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.5365900993347168, + "learning_rate": 0.0003797206752656258, + "loss": 1.7205, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.5030364990234375, + "learning_rate": 0.0003775394666739688, + "loss": 1.717, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.5168613791465759, + "learning_rate": 0.00037536243168445507, + "loss": 1.7228, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.45606762170791626, + "learning_rate": 0.0003731895946918246, + "loss": 1.7164, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.49707797169685364, + "learning_rate": 0.0003710209800437769, + "loss": 1.7021, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5001360177993774, + "learning_rate": 0.00036885661204069767, + "loss": 1.7075, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.5102643966674805, + "learning_rate": 0.0003666965149353878, + "loss": 1.7225, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.5698412656784058, + "learning_rate": 0.0003645407129327898, + "loss": 1.7194, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.6141307353973389, + "learning_rate": 0.00036238923018971783, + "loss": 1.715, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.4701574146747589, + "learning_rate": 0.0003602420908145865, + "loss": 1.7094, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.5332522392272949, + "learning_rate": 0.00035809931886714093, + "loss": 1.7143, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.5474719405174255, + "learning_rate": 0.00035596093835818683, + "loss": 1.7025, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.5008084774017334, + "learning_rate": 0.00035382697324932245, + "loss": 1.7043, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.6357883810997009, + "learning_rate": 0.00035169744745266866, + "loss": 1.7175, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.48195335268974304, + "learning_rate": 0.0003495723848306017, + "loss": 1.7115, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.6697856187820435, + "learning_rate": 0.0003474518091954859, + "loss": 1.7206, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5895291566848755, + "learning_rate": 0.0003453357443094068, + "loss": 1.7134, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.46835434436798096, + "learning_rate": 0.00034322421388390456, + "loss": 1.7298, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.4884438216686249, + "learning_rate": 0.0003411172415797087, + "loss": 1.7295, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.5598358511924744, + "learning_rate": 0.0003390148510064727, + "loss": 1.711, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.49677544832229614, + "learning_rate": 0.0003369170657225094, + "loss": 1.7073, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.48161962628364563, + "learning_rate": 0.0003348239092345275, + "loss": 1.7152, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.5053879022598267, + "learning_rate": 0.0003327354049973672, + "loss": 1.7094, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.8189768195152283, + "learning_rate": 0.00033065157641373847, + "loss": 1.7116, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.6714488863945007, + "learning_rate": 0.0003285724468339576, + "loss": 1.7087, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.5526439547538757, + "learning_rate": 0.00032649803955568755, + "loss": 1.7057, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.5769382119178772, + "learning_rate": 0.00032442837782367434, + "loss": 1.71, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.4643363058567047, + "learning_rate": 0.0003223634848294883, + "loss": 1.7161, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.5053367614746094, + "learning_rate": 0.00032030338371126374, + "loss": 1.7029, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.5323095917701721, + "learning_rate": 0.0003182480975534395, + "loss": 1.7095, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.46482717990875244, + "learning_rate": 0.00031619764938650057, + "loss": 1.7013, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.4698280990123749, + "learning_rate": 0.0003141520621867197, + "loss": 1.7098, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.5259807109832764, + "learning_rate": 0.00031211135887590074, + "loss": 1.7018, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.5317603349685669, + "learning_rate": 0.0003100755623211205, + "loss": 1.7027, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.4953657388687134, + "learning_rate": 0.0003080446953344735, + "loss": 1.7029, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.4898616671562195, + "learning_rate": 0.00030601878067281575, + "loss": 1.7081, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.6255486011505127, + "learning_rate": 0.00030399784103751044, + "loss": 1.6997, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.6013385057449341, + "learning_rate": 0.000301981899074173, + "loss": 1.7047, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.4551595151424408, + "learning_rate": 0.0002999709773724171, + "loss": 1.6994, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.5211358666419983, + "learning_rate": 0.00029796509846560294, + "loss": 1.6943, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.5091364979743958, + "learning_rate": 0.0002959642848305828, + "loss": 1.7051, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.4712308943271637, + "learning_rate": 0.00029396855888745045, + "loss": 1.7098, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.5141660571098328, + "learning_rate": 0.0002919779429992895, + "loss": 1.7086, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.4470996558666229, + "learning_rate": 0.0002899924594719231, + "loss": 1.7033, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.44448748230934143, + "learning_rate": 0.00028801213055366335, + "loss": 1.7076, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.6011311411857605, + "learning_rate": 0.00028603697843506315, + "loss": 1.7038, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.4565386474132538, + "learning_rate": 0.0002840670252486662, + "loss": 1.7093, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.4851883351802826, + "learning_rate": 0.00028210229306876, + "loss": 1.6945, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.5371813178062439, + "learning_rate": 0.0002801428039111279, + "loss": 1.6996, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5035656690597534, + "learning_rate": 0.00027818857973280274, + "loss": 1.7022, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.4859152138233185, + "learning_rate": 0.0002762396424318206, + "loss": 1.6969, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.6100954413414001, + "learning_rate": 0.00027429601384697526, + "loss": 1.6899, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.506010115146637, + "learning_rate": 0.00027235771575757466, + "loss": 1.6987, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.5374642014503479, + "learning_rate": 0.0002704247698831951, + "loss": 1.6957, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.5141903162002563, + "learning_rate": 0.0002684971978834389, + "loss": 1.6918, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.4554612934589386, + "learning_rate": 0.0002665750213576914, + "loss": 1.7006, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.5189086198806763, + "learning_rate": 0.0002646582618448794, + "loss": 1.687, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5333877205848694, + "learning_rate": 0.00026274694082322896, + "loss": 1.6944, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.4992164373397827, + "learning_rate": 0.0002608410797100255, + "loss": 1.7171, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.5430211424827576, + "learning_rate": 0.0002589406998613733, + "loss": 1.6957, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.5532400012016296, + "learning_rate": 0.0002570458225719567, + "loss": 1.7033, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.5315171480178833, + "learning_rate": 0.00025515646907480074, + "loss": 1.7068, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.5123645663261414, + "learning_rate": 0.00025327266054103395, + "loss": 1.7042, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.5465161800384521, + "learning_rate": 0.0002513944180796509, + "loss": 1.6898, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.46384114027023315, + "learning_rate": 0.0002495217627372752, + "loss": 1.6874, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.5511296987533569, + "learning_rate": 0.0002476547154979248, + "loss": 1.6899, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.5181025862693787, + "learning_rate": 0.00024579329728277534, + "loss": 1.6928, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.551807701587677, + "learning_rate": 0.00024393752894992708, + "loss": 1.7024, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.5231742262840271, + "learning_rate": 0.00024208743129417004, + "loss": 1.6739, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.6735913157463074, + "learning_rate": 0.00024024302504675206, + "loss": 1.6903, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.4833276569843292, + "learning_rate": 0.0002384043308751454, + "loss": 1.6913, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.722612738609314, + "learning_rate": 0.00023657136938281653, + "loss": 1.6985, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.5836794376373291, + "learning_rate": 0.00023474416110899377, + "loss": 1.6868, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.515323281288147, + "learning_rate": 0.00023292272652843807, + "loss": 1.6935, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.49344393610954285, + "learning_rate": 0.00023110708605121317, + "loss": 1.6969, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.45984581112861633, + "learning_rate": 0.00022929726002245728, + "loss": 1.6959, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.49989140033721924, + "learning_rate": 0.00022749326872215472, + "loss": 1.6932, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.48957696557044983, + "learning_rate": 0.0002256951323649087, + "loss": 1.6895, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.5280831456184387, + "learning_rate": 0.00022390287109971547, + "loss": 1.6991, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.666061282157898, + "learning_rate": 0.00022211650500973746, + "loss": 1.6987, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.5271697640419006, + "learning_rate": 0.0002203360541120789, + "loss": 1.7051, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.51167231798172, + "learning_rate": 0.00021856153835756164, + "loss": 1.6861, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.4741172194480896, + "learning_rate": 0.00021679297763050104, + "loss": 1.6811, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.43764370679855347, + "learning_rate": 0.0002150303917484834, + "loss": 1.6942, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.5659904479980469, + "learning_rate": 0.0002132738004621446, + "loss": 1.6976, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.49589625000953674, + "learning_rate": 0.00021152322345494763, + "loss": 1.6786, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.4730931520462036, + "learning_rate": 0.00020977868034296253, + "loss": 1.6803, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.541373074054718, + "learning_rate": 0.00020804019067464667, + "loss": 1.6889, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.4999972879886627, + "learning_rate": 0.00020630777393062575, + "loss": 1.6896, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.4998817443847656, + "learning_rate": 0.00020458144952347523, + "loss": 1.7026, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.541976809501648, + "learning_rate": 0.00020286123679750314, + "loss": 1.6978, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.49866101145744324, + "learning_rate": 0.00020114715502853292, + "loss": 1.6962, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.47669893503189087, + "learning_rate": 0.0001994392234236878, + "loss": 1.6908, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.4440383017063141, + "learning_rate": 0.0001977374611211754, + "loss": 1.6863, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.5134697556495667, + "learning_rate": 0.00019604188719007313, + "loss": 1.6867, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.5474386811256409, + "learning_rate": 0.00019435252063011504, + "loss": 1.6877, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.4370889663696289, + "learning_rate": 0.0001926693803714779, + "loss": 1.6968, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.5029585361480713, + "learning_rate": 0.00019099248527457068, + "loss": 1.7049, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.45806601643562317, + "learning_rate": 0.0001893218541298216, + "loss": 1.6802, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.47307705879211426, + "learning_rate": 0.00018765750565746827, + "loss": 1.6782, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.51424640417099, + "learning_rate": 0.00018599945850734812, + "loss": 1.6841, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.5089414715766907, + "learning_rate": 0.00018434773125868895, + "loss": 1.6846, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.4981805980205536, + "learning_rate": 0.00018270234241990108, + "loss": 1.6773, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.4701639711856842, + "learning_rate": 0.0001810633104283698, + "loss": 1.6872, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5209629535675049, + "learning_rate": 0.0001794306536502492, + "loss": 1.6866, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.5327766537666321, + "learning_rate": 0.0001778043903802555, + "loss": 1.6775, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.4828507602214813, + "learning_rate": 0.0001761845388414627, + "loss": 1.6837, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.5321535468101501, + "learning_rate": 0.00017457111718509831, + "loss": 1.6941, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.5002284646034241, + "learning_rate": 0.00017296414349033976, + "loss": 1.6745, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.4460604190826416, + "learning_rate": 0.00017136363576411172, + "loss": 1.6815, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.4390539228916168, + "learning_rate": 0.00016976961194088526, + "loss": 1.6672, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.5192734003067017, + "learning_rate": 0.00016818208988247533, + "loss": 1.6721, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.49038299918174744, + "learning_rate": 0.0001666010873778419, + "loss": 1.6721, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.4575085937976837, + "learning_rate": 0.00016502662214289, + "loss": 1.6849, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.5796797275543213, + "learning_rate": 0.00016345871182027124, + "loss": 1.6734, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.4989880621433258, + "learning_rate": 0.00016189737397918653, + "loss": 1.681, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.44982436299324036, + "learning_rate": 0.0001603426261151884, + "loss": 1.689, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.4690608084201813, + "learning_rate": 0.00015879448564998648, + "loss": 1.6828, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.4908209443092346, + "learning_rate": 0.0001572529699312501, + "loss": 1.69, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.4658385217189789, + "learning_rate": 0.0001557180962324158, + "loss": 1.6729, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.4762396812438965, + "learning_rate": 0.00015418988175249282, + "loss": 1.6768, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.5417431592941284, + "learning_rate": 0.00015266834361587063, + "loss": 1.6755, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.4358850121498108, + "learning_rate": 0.00015115349887212678, + "loss": 1.6695, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5909740924835205, + "learning_rate": 0.00014964536449583657, + "loss": 1.6708, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.6127946972846985, + "learning_rate": 0.00014814395738638195, + "loss": 1.686, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.48377683758735657, + "learning_rate": 0.00014664929436776278, + "loss": 1.6816, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.4370257258415222, + "learning_rate": 0.00014516139218840788, + "loss": 1.6748, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.4921756684780121, + "learning_rate": 0.00014368026752098782, + "loss": 1.6755, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.544208824634552, + "learning_rate": 0.00014220593696222768, + "loss": 1.6912, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.49705615639686584, + "learning_rate": 0.00014073841703272092, + "loss": 1.66, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.468503475189209, + "learning_rate": 0.00013927772417674558, + "loss": 1.6851, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.46158406138420105, + "learning_rate": 0.00013782387476207788, + "loss": 1.6753, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.4895174205303192, + "learning_rate": 0.00013637688507981064, + "loss": 1.691, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.46479442715644836, + "learning_rate": 0.0001349367713441697, + "loss": 1.6704, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.512269139289856, + "learning_rate": 0.0001335035496923326, + "loss": 1.6759, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.4911366105079651, + "learning_rate": 0.0001320772361842478, + "loss": 1.6911, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.4673351049423218, + "learning_rate": 0.00013065784680245442, + "loss": 1.6663, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.432595431804657, + "learning_rate": 0.00012924539745190402, + "loss": 1.6746, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.6019179821014404, + "learning_rate": 0.0001278399039597809, + "loss": 1.6722, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.4442385733127594, + "learning_rate": 0.0001264413820753261, + "loss": 1.677, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.4725878834724426, + "learning_rate": 0.00012504984746966003, + "loss": 1.6817, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.4854767322540283, + "learning_rate": 0.00012366531573560754, + "loss": 1.6811, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.49454352259635925, + "learning_rate": 0.00012228780238752264, + "loss": 1.6643, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.48609641194343567, + "learning_rate": 0.00012091732286111514, + "loss": 1.6681, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.4803316295146942, + "learning_rate": 0.00011955389251327737, + "loss": 1.6785, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.5596554279327393, + "learning_rate": 0.00011819752662191197, + "loss": 1.6699, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.5480920672416687, + "learning_rate": 0.00011684824038576115, + "loss": 1.6882, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.43406906723976135, + "learning_rate": 0.00011550604892423593, + "loss": 1.6737, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.4492756128311157, + "learning_rate": 0.0001141709672772471, + "loss": 1.6734, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.5107495784759521, + "learning_rate": 0.00011284301040503625, + "loss": 1.6815, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.5216100811958313, + "learning_rate": 0.0001115221931880088, + "loss": 1.6712, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.461012065410614, + "learning_rate": 0.00011020853042656648, + "loss": 1.6575, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.47651466727256775, + "learning_rate": 0.000108902036840942, + "loss": 1.6678, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.4415021538734436, + "learning_rate": 0.00010760272707103389, + "loss": 1.6561, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.4723900258541107, + "learning_rate": 0.00010631061567624259, + "loss": 1.6694, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.5299960374832153, + "learning_rate": 0.00010502571713530706, + "loss": 1.6862, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.46111688017845154, + "learning_rate": 0.00010374804584614308, + "loss": 1.6848, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5112030506134033, + "learning_rate": 0.00010247761612568129, + "loss": 1.6646, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.5119314789772034, + "learning_rate": 0.0001012144422097069, + "loss": 1.6729, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.5535699129104614, + "learning_rate": 9.995853825270052e-05, + "loss": 1.672, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.4741092324256897, + "learning_rate": 9.870991832767919e-05, + "loss": 1.6807, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.4959404468536377, + "learning_rate": 9.746859642603884e-05, + "loss": 1.6738, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.4929347634315491, + "learning_rate": 9.623458645739755e-05, + "loss": 1.6768, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.44864967465400696, + "learning_rate": 9.50079022494395e-05, + "loss": 1.6603, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.47165539860725403, + "learning_rate": 9.378855754776028e-05, + "loss": 1.664, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.507309079170227, + "learning_rate": 9.257656601571266e-05, + "loss": 1.662, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.45784810185432434, + "learning_rate": 9.137194123425349e-05, + "loss": 1.675, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.5117497444152832, + "learning_rate": 9.017469670179168e-05, + "loss": 1.6666, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.48904961347579956, + "learning_rate": 8.898484583403668e-05, + "loss": 1.6759, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.4867279529571533, + "learning_rate": 8.780240196384873e-05, + "loss": 1.6682, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.4557291567325592, + "learning_rate": 8.662737834108861e-05, + "loss": 1.6669, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.4508359134197235, + "learning_rate": 8.545978813246987e-05, + "loss": 1.6823, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.4927974045276642, + "learning_rate": 8.429964442141072e-05, + "loss": 1.6619, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.47224199771881104, + "learning_rate": 8.314696020788806e-05, + "loss": 1.669, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.4597434103488922, + "learning_rate": 8.200174840829136e-05, + "loss": 1.6717, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.4617033004760742, + "learning_rate": 8.08640218552778e-05, + "loss": 1.6759, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.5356170535087585, + "learning_rate": 7.973379329762925e-05, + "loss": 1.6652, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.5260666012763977, + "learning_rate": 7.861107540010845e-05, + "loss": 1.6571, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.4889324903488159, + "learning_rate": 7.749588074331762e-05, + "loss": 1.6776, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.4346250891685486, + "learning_rate": 7.63882218235575e-05, + "loss": 1.6669, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.4669164717197418, + "learning_rate": 7.528811105268699e-05, + "loss": 1.6657, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.5097572207450867, + "learning_rate": 7.41955607579845e-05, + "loss": 1.6659, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.4442615509033203, + "learning_rate": 7.311058318200969e-05, + "loss": 1.6611, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.44552934169769287, + "learning_rate": 7.203319048246599e-05, + "loss": 1.6739, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.4795217514038086, + "learning_rate": 7.096339473206471e-05, + "loss": 1.6612, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.49375104904174805, + "learning_rate": 6.990120791838953e-05, + "loss": 1.6741, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.45526301860809326, + "learning_rate": 6.884664194376233e-05, + "loss": 1.6661, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.44551849365234375, + "learning_rate": 6.779970862510989e-05, + "loss": 1.6757, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.49550458788871765, + "learning_rate": 6.676041969383107e-05, + "loss": 1.6675, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.5176491141319275, + "learning_rate": 6.572878679566605e-05, + "loss": 1.6746, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.505177915096283, + "learning_rate": 6.470482149056509e-05, + "loss": 1.6701, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.4305630922317505, + "learning_rate": 6.368853525255942e-05, + "loss": 1.6666, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.4531860053539276, + "learning_rate": 6.267993946963249e-05, + "loss": 1.6826, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.5137802362442017, + "learning_rate": 6.167904544359265e-05, + "loss": 1.6746, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.45894256234169006, + "learning_rate": 6.068586438994617e-05, + "loss": 1.6669, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.4276801347732544, + "learning_rate": 5.970040743777161e-05, + "loss": 1.6546, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.4355577826499939, + "learning_rate": 5.8722685629595454e-05, + "loss": 1.6531, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.475751131772995, + "learning_rate": 5.7752709921267855e-05, + "loss": 1.6765, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.4657628834247589, + "learning_rate": 5.6790491181840294e-05, + "loss": 1.659, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4674154222011566, + "learning_rate": 5.583604019344354e-05, + "loss": 1.6725, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.47468939423561096, + "learning_rate": 5.4889367651167007e-05, + "loss": 1.6727, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.4702012538909912, + "learning_rate": 5.3950484162938714e-05, + "loss": 1.6573, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.45527184009552, + "learning_rate": 5.3019400249406686e-05, + "loss": 1.6609, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.47363999485969543, + "learning_rate": 5.209612634382077e-05, + "loss": 1.658, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.46629542112350464, + "learning_rate": 5.118067279191599e-05, + "loss": 1.6636, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.49402838945388794, + "learning_rate": 5.0273049851796205e-05, + "loss": 1.6719, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.46080490946769714, + "learning_rate": 4.9373267693819805e-05, + "loss": 1.6642, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.539513111114502, + "learning_rate": 4.848133640048513e-05, + "loss": 1.6646, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.5160200595855713, + "learning_rate": 4.75972659663178e-05, + "loss": 1.6814, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.43634530901908875, + "learning_rate": 4.672106629775882e-05, + "loss": 1.6682, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.44822850823402405, + "learning_rate": 4.585274721305333e-05, + "loss": 1.6651, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.4662959575653076, + "learning_rate": 4.4992318442140575e-05, + "loss": 1.6597, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.4416350722312927, + "learning_rate": 4.413978962654508e-05, + "loss": 1.6701, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.5056466460227966, + "learning_rate": 4.3295170319268554e-05, + "loss": 1.6652, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.42820656299591064, + "learning_rate": 4.245846998468261e-05, + "loss": 1.6646, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.42281100153923035, + "learning_rate": 4.16296979984232e-05, + "loss": 1.6583, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.45958805084228516, + "learning_rate": 4.080886364728506e-05, + "loss": 1.6673, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.4433458745479584, + "learning_rate": 3.999597612911793e-05, + "loss": 1.654, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.43281736969947815, + "learning_rate": 3.9191044552723345e-05, + "loss": 1.6652, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.45923444628715515, + "learning_rate": 3.839407793775268e-05, + "loss": 1.6613, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.4472077488899231, + "learning_rate": 3.760508521460584e-05, + "loss": 1.6748, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.41644978523254395, + "learning_rate": 3.682407522433173e-05, + "loss": 1.6759, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.4404599368572235, + "learning_rate": 3.605105671852854e-05, + "loss": 1.6611, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.4719579219818115, + "learning_rate": 3.528603835924626e-05, + "loss": 1.6521, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.5136392712593079, + "learning_rate": 3.4529028718888935e-05, + "loss": 1.673, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.4537601172924042, + "learning_rate": 3.378003628011938e-05, + "loss": 1.667, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.49940335750579834, + "learning_rate": 3.303906943576346e-05, + "loss": 1.6701, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.4163913428783417, + "learning_rate": 3.230613648871661e-05, + "loss": 1.6632, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.43560993671417236, + "learning_rate": 3.158124565185022e-05, + "loss": 1.6615, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.4907214045524597, + "learning_rate": 3.086440504792026e-05, + "loss": 1.6599, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.4473698139190674, + "learning_rate": 3.015562270947553e-05, + "loss": 1.6677, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.4903205335140228, + "learning_rate": 2.945490657876837e-05, + "loss": 1.6479, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.4705663323402405, + "learning_rate": 2.8762264507665113e-05, + "loss": 1.6563, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.4377981126308441, + "learning_rate": 2.807770425755829e-05, + "loss": 1.6589, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.41801419854164124, + "learning_rate": 2.7401233499279866e-05, + "loss": 1.6648, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.44523268938064575, + "learning_rate": 2.6732859813014987e-05, + "loss": 1.6711, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.4396098256111145, + "learning_rate": 2.607259068821721e-05, + "loss": 1.6676, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.423370897769928, + "learning_rate": 2.5420433523524493e-05, + "loss": 1.6657, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.46848398447036743, + "learning_rate": 2.4776395626676162e-05, + "loss": 1.6616, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.4565613865852356, + "learning_rate": 2.414048421443141e-05, + "loss": 1.6556, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.4504757821559906, + "learning_rate": 2.3512706412488012e-05, + "loss": 1.669, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.43590018153190613, + "learning_rate": 2.2893069255402993e-05, + "loss": 1.6554, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.4715381860733032, + "learning_rate": 2.2281579686513176e-05, + "loss": 1.6578, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.458272784948349, + "learning_rate": 2.1678244557857663e-05, + "loss": 1.646, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4373680353164673, + "learning_rate": 2.1083070630101232e-05, + "loss": 1.6565, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.4109298586845398, + "learning_rate": 2.0496064572458395e-05, + "loss": 1.665, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.4232487678527832, + "learning_rate": 1.991723296261863e-05, + "loss": 1.6611, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.4461933970451355, + "learning_rate": 1.9346582286672686e-05, + "loss": 1.6559, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.47736912965774536, + "learning_rate": 1.878411893904014e-05, + "loss": 1.674, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.4359014332294464, + "learning_rate": 1.822984922239737e-05, + "loss": 1.662, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.43752020597457886, + "learning_rate": 1.7683779347607286e-05, + "loss": 1.6745, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.43497052788734436, + "learning_rate": 1.714591543364938e-05, + "loss": 1.6593, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.4512965679168701, + "learning_rate": 1.6616263507551437e-05, + "loss": 1.6586, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.4612230956554413, + "learning_rate": 1.609482950432195e-05, + "loss": 1.6599, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.4260503351688385, + "learning_rate": 1.5581619266883563e-05, + "loss": 1.6634, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.4700492322444916, + "learning_rate": 1.5076638546007548e-05, + "loss": 1.6667, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.448140949010849, + "learning_rate": 1.457989300024945e-05, + "loss": 1.664, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.4292459785938263, + "learning_rate": 1.4091388195885625e-05, + "loss": 1.6572, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.4592759311199188, + "learning_rate": 1.3611129606851041e-05, + "loss": 1.6579, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.4543823301792145, + "learning_rate": 1.313912261467759e-05, + "loss": 1.66, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.4255363643169403, + "learning_rate": 1.267537250843412e-05, + "loss": 1.6608, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.44835999608039856, + "learning_rate": 1.2219884484667071e-05, + "loss": 1.6615, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.4233814477920532, + "learning_rate": 1.1772663647341947e-05, + "loss": 1.6625, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.4282788634300232, + "learning_rate": 1.1333715007786932e-05, + "loss": 1.6657, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.4206037223339081, + "learning_rate": 1.0903043484635694e-05, + "loss": 1.665, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.49093425273895264, + "learning_rate": 1.0480653903772924e-05, + "loss": 1.6593, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.4121231436729431, + "learning_rate": 1.0066550998280132e-05, + "loss": 1.6657, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.42613083124160767, + "learning_rate": 9.660739408382608e-06, + "loss": 1.6592, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.4518212676048279, + "learning_rate": 9.26322368139737e-06, + "loss": 1.6567, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.46585017442703247, + "learning_rate": 8.874008271682222e-06, + "loss": 1.6563, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.42755186557769775, + "learning_rate": 8.493097540585775e-06, + "loss": 1.6838, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.4405651092529297, + "learning_rate": 8.120495756399005e-06, + "loss": 1.6521, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.4298318922519684, + "learning_rate": 7.756207094306605e-06, + "loss": 1.6641, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.4491939842700958, + "learning_rate": 7.400235636340957e-06, + "loss": 1.666, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.44178301095962524, + "learning_rate": 7.0525853713362395e-06, + "loss": 1.665, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.42622971534729004, + "learning_rate": 6.71326019488322e-06, + "loss": 1.6634, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.4461546540260315, + "learning_rate": 6.3822639092862846e-06, + "loss": 1.67, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.43019208312034607, + "learning_rate": 6.059600223520478e-06, + "loss": 1.6463, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.44663575291633606, + "learning_rate": 5.745272753189784e-06, + "loss": 1.6588, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.4323757290840149, + "learning_rate": 5.439285020487156e-06, + "loss": 1.667, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.44403767585754395, + "learning_rate": 5.141640454154467e-06, + "loss": 1.6543, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.4845617711544037, + "learning_rate": 4.852342389444458e-06, + "loss": 1.6736, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.4464190900325775, + "learning_rate": 4.571394068083185e-06, + "loss": 1.6597, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.41682007908821106, + "learning_rate": 4.298798638233709e-06, + "loss": 1.6649, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.46006670594215393, + "learning_rate": 4.034559154461049e-06, + "loss": 1.6653, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.4145854413509369, + "learning_rate": 3.7786785776976198e-06, + "loss": 1.6591, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.432424932718277, + "learning_rate": 3.5311597752100964e-06, + "loss": 1.6582, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.44872480630874634, + "learning_rate": 3.2920055205676867e-06, + "loss": 1.6587, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.4471467137336731, + "learning_rate": 3.06121849361049e-06, + "loss": 1.6615, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.43514835834503174, + "learning_rate": 2.838801280419856e-06, + "loss": 1.6589, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.44895514845848083, + "learning_rate": 2.624756373289322e-06, + "loss": 1.6507, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.4346788823604584, + "learning_rate": 2.419086170696472e-06, + "loss": 1.6428, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.41907569766044617, + "learning_rate": 2.2217929772764545e-06, + "loss": 1.6574, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.43942931294441223, + "learning_rate": 2.0328790037957568e-06, + "loss": 1.6529, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.4250134229660034, + "learning_rate": 1.8523463671278052e-06, + "loss": 1.6669, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.4357467591762543, + "learning_rate": 1.6801970902288188e-06, + "loss": 1.6544, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.43509748578071594, + "learning_rate": 1.5164331021155774e-06, + "loss": 1.6564, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.4471875727176666, + "learning_rate": 1.3610562378435221e-06, + "loss": 1.66, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.419119268655777, + "learning_rate": 1.2140682384862712e-06, + "loss": 1.659, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.4330718517303467, + "learning_rate": 1.0754707511161365e-06, + "loss": 1.6545, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.42688918113708496, + "learning_rate": 9.452653287856383e-07, + "loss": 1.6636, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.4096972942352295, + "learning_rate": 8.234534305101015e-07, + "loss": 1.6614, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.4194006621837616, + "learning_rate": 7.100364212513367e-07, + "loss": 1.6756, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.43339255452156067, + "learning_rate": 6.050155719023176e-07, + "loss": 1.6692, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.4299978017807007, + "learning_rate": 5.08392059272944e-07, + "loss": 1.6655, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.46044686436653137, + "learning_rate": 4.2016696607680147e-07, + "loss": 1.652, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.4418999254703522, + "learning_rate": 3.4034128091917085e-07, + "loss": 1.6569, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.41349098086357117, + "learning_rate": 2.689158982859541e-07, + "loss": 1.6671, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.46896985173225403, + "learning_rate": 2.05891618533266e-07, + "loss": 1.6409, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.44133302569389343, + "learning_rate": 1.5126914787894074e-07, + "loss": 1.6608, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.4281478226184845, + "learning_rate": 1.0504909839462173e-07, + "loss": 1.6668, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.44644874334335327, + "learning_rate": 6.723198799826746e-08, + "loss": 1.6583, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.4268466532230377, + "learning_rate": 3.781824044932214e-08, + "loss": 1.6689, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.4531896412372589, + "learning_rate": 1.6808185342970238e-08, + "loss": 1.6551, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.43237465620040894, + "learning_rate": 4.202058107305451e-09, + "loss": 1.6634, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.2764161825180054, + "learning_rate": 0.0, + "loss": 1.6546, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.8317122291574784e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-cohere-cosine/checkpoint-9480/training_args.bin b/saves-cohere-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..959b13dd99c3447ac01dda3a7afd99f8be2e21c6 --- /dev/null +++ b/saves-cohere-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67e7c83d9e9aec325409b6c944f4cca234837bfa01c0f4edc1971f7886c4ef10 +size 5176 diff --git a/saves-cohere-cosine/config.json b/saves-cohere-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..df4858fb7c269ef7a90d58d11e57ba3dd9f2ef21 --- /dev/null +++ b/saves-cohere-cosine/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "CohereForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 5, + "eos_token_id": 255001, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "layer_norm_eps": 1e-05, + "logit_scale": 0.0625, + "max_position_embeddings": 8192, + "model_type": "cohere", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "rope_theta": 10000.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_qk_norm": false, + "vocab_size": 2000 +} diff --git a/saves-cohere-cosine/generation_config.json b/saves-cohere-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9c41a4de69f546b74395520ae8afc0771ed6b49a --- /dev/null +++ b/saves-cohere-cosine/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 5, + "eos_token_id": 255001, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-cohere-cosine/model.safetensors b/saves-cohere-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e850f755abd2c21a4bc0db6498d65e6300c1103 --- /dev/null +++ b/saves-cohere-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c437a62079def81d398a279682408ea6cb49a564d010af50b45e9ccdfae96c5 +size 8344440 diff --git a/saves-cohere-cosine/special_tokens_map.json b/saves-cohere-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-cohere-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-cohere-cosine/tokenizer.json b/saves-cohere-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-cohere-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-cohere-cosine/tokenizer_config.json b/saves-cohere-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-cohere-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-cohere/checkpoint-9480/config.json b/saves-cohere/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..df4858fb7c269ef7a90d58d11e57ba3dd9f2ef21 --- /dev/null +++ b/saves-cohere/checkpoint-9480/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "CohereForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 5, + "eos_token_id": 255001, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "layer_norm_eps": 1e-05, + "logit_scale": 0.0625, + "max_position_embeddings": 8192, + "model_type": "cohere", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "rope_theta": 10000.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_qk_norm": false, + "vocab_size": 2000 +} diff --git a/saves-cohere/checkpoint-9480/generation_config.json b/saves-cohere/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9c41a4de69f546b74395520ae8afc0771ed6b49a --- /dev/null +++ b/saves-cohere/checkpoint-9480/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 5, + "eos_token_id": 255001, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-cohere/checkpoint-9480/model.safetensors b/saves-cohere/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7b9607651dc517ddc9fae9614d8a192237687bc --- /dev/null +++ b/saves-cohere/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79be30f1ea1ff29b17ca323353282cb348e94ed35effe8555d8ad3083e53133 +size 8344440 diff --git a/saves-cohere/checkpoint-9480/optimizer.pt b/saves-cohere/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4402b10ff992cf316c27ee57dda78444f3fbc484 --- /dev/null +++ b/saves-cohere/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c69f4abfe985cf97b62bd97e08a715313fbf47070a7a969fc81988a016f23a0 +size 16700648 diff --git a/saves-cohere/checkpoint-9480/rng_state.pth b/saves-cohere/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-cohere/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-cohere/checkpoint-9480/scheduler.pt b/saves-cohere/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-cohere/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-cohere/checkpoint-9480/special_tokens_map.json b/saves-cohere/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-cohere/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-cohere/checkpoint-9480/tokenizer.json b/saves-cohere/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-cohere/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-cohere/checkpoint-9480/tokenizer_config.json b/saves-cohere/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-cohere/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-cohere/checkpoint-9480/trainer_state.json b/saves-cohere/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4046f6998ace3df9536aac4ba2c93555e98bf1f3 --- /dev/null +++ b/saves-cohere/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 0.07923071086406708, + "learning_rate": 0.00015822784810126583, + "loss": 7.5889, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 0.07893538475036621, + "learning_rate": 0.00031645569620253165, + "loss": 7.5463, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.07745599746704102, + "learning_rate": 0.00047468354430379745, + "loss": 7.4957, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.08022584021091461, + "learning_rate": 0.0006329113924050633, + "loss": 7.4237, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.08301162719726562, + "learning_rate": 0.0007911392405063291, + "loss": 7.3248, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.09766305238008499, + "learning_rate": 0.0009493670886075949, + "loss": 7.2024, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.10538820177316666, + "learning_rate": 0.0011075949367088608, + "loss": 7.0652, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.1338197886943817, + "learning_rate": 0.0012658227848101266, + "loss": 6.9254, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.1701158881187439, + "learning_rate": 0.0014240506329113926, + "loss": 6.8086, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.09935522824525833, + "learning_rate": 0.0015, + "loss": 6.6879, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.09172353148460388, + "learning_rate": 0.0015, + "loss": 6.5452, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.10350140184164047, + "learning_rate": 0.0015, + "loss": 6.4164, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.13510945439338684, + "learning_rate": 0.0015, + "loss": 6.2871, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.11754976212978363, + "learning_rate": 0.0015, + "loss": 6.1499, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.11622276902198792, + "learning_rate": 0.0015, + "loss": 6.0149, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.12036944925785065, + "learning_rate": 0.0015, + "loss": 5.8896, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.18034903705120087, + "learning_rate": 0.0015, + "loss": 5.7667, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.11490294337272644, + "learning_rate": 0.0015, + "loss": 5.6558, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.1969427466392517, + "learning_rate": 0.0015, + "loss": 5.5462, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.13829076290130615, + "learning_rate": 0.0015, + "loss": 5.4374, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.3710581660270691, + "learning_rate": 0.0015, + "loss": 5.3443, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.12660042941570282, + "learning_rate": 0.0015, + "loss": 5.2585, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.13207125663757324, + "learning_rate": 0.0015, + "loss": 5.1701, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.14921557903289795, + "learning_rate": 0.0015, + "loss": 5.0899, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.1586378961801529, + "learning_rate": 0.0015, + "loss": 5.0215, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.19645819067955017, + "learning_rate": 0.0015, + "loss": 4.9815, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.12963224947452545, + "learning_rate": 0.0015, + "loss": 4.9222, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.14293646812438965, + "learning_rate": 0.0015, + "loss": 4.8489, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.23253677785396576, + "learning_rate": 0.0015, + "loss": 4.7926, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.18617752194404602, + "learning_rate": 0.0015, + "loss": 4.7431, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.15038111805915833, + "learning_rate": 0.0015, + "loss": 4.6854, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.1934233456850052, + "learning_rate": 0.0015, + "loss": 4.6224, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.42051437497138977, + "learning_rate": 0.0015, + "loss": 4.5863, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.16603535413742065, + "learning_rate": 0.0015, + "loss": 4.5376, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.14271578192710876, + "learning_rate": 0.0015, + "loss": 4.4682, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.2038530558347702, + "learning_rate": 0.0015, + "loss": 4.4145, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.430816113948822, + "learning_rate": 0.0015, + "loss": 4.3704, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.2535029351711273, + "learning_rate": 0.0015, + "loss": 4.3287, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.2236170619726181, + "learning_rate": 0.0015, + "loss": 4.2772, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.24314047396183014, + "learning_rate": 0.0015, + "loss": 4.2293, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.42784205079078674, + "learning_rate": 0.0015, + "loss": 4.1828, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.34147414565086365, + "learning_rate": 0.0015, + "loss": 4.1358, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.30154019594192505, + "learning_rate": 0.0015, + "loss": 4.0868, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.29003316164016724, + "learning_rate": 0.0015, + "loss": 4.0516, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.24404658377170563, + "learning_rate": 0.0015, + "loss": 4.0164, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.357685387134552, + "learning_rate": 0.0015, + "loss": 3.9685, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.2554370164871216, + "learning_rate": 0.0015, + "loss": 3.9235, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.40063291788101196, + "learning_rate": 0.0015, + "loss": 3.8991, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.3551327586174011, + "learning_rate": 0.0015, + "loss": 3.8559, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.26288431882858276, + "learning_rate": 0.0015, + "loss": 3.818, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.3543540835380554, + "learning_rate": 0.0015, + "loss": 3.7969, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.28771013021469116, + "learning_rate": 0.0015, + "loss": 3.7607, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.2961564362049103, + "learning_rate": 0.0015, + "loss": 3.7202, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.24359576404094696, + "learning_rate": 0.0015, + "loss": 3.6904, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.29026246070861816, + "learning_rate": 0.0015, + "loss": 3.6703, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.3167600631713867, + "learning_rate": 0.0015, + "loss": 3.6256, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.41890257596969604, + "learning_rate": 0.0015, + "loss": 3.6055, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.3548174202442169, + "learning_rate": 0.0015, + "loss": 3.5849, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.3154391944408417, + "learning_rate": 0.0015, + "loss": 3.5436, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.40213409066200256, + "learning_rate": 0.0015, + "loss": 3.5162, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.37434232234954834, + "learning_rate": 0.0015, + "loss": 3.5027, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.34982094168663025, + "learning_rate": 0.0015, + "loss": 3.4732, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.3418155908584595, + "learning_rate": 0.0015, + "loss": 3.4474, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.49373704195022583, + "learning_rate": 0.0015, + "loss": 3.4428, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.3478853702545166, + "learning_rate": 0.0015, + "loss": 3.4185, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.4067620635032654, + "learning_rate": 0.0015, + "loss": 3.3923, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.5346623659133911, + "learning_rate": 0.0015, + "loss": 3.3652, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.40070468187332153, + "learning_rate": 0.0015, + "loss": 3.3437, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.35132190585136414, + "learning_rate": 0.0015, + "loss": 3.3355, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.45310819149017334, + "learning_rate": 0.0015, + "loss": 3.3098, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.4345400631427765, + "learning_rate": 0.0015, + "loss": 3.2988, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.37969064712524414, + "learning_rate": 0.0015, + "loss": 3.2653, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.3894875645637512, + "learning_rate": 0.0015, + "loss": 3.2472, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.33336156606674194, + "learning_rate": 0.0015, + "loss": 3.248, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.4870315194129944, + "learning_rate": 0.0015, + "loss": 3.2186, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.4258863627910614, + "learning_rate": 0.0015, + "loss": 3.2, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.481924444437027, + "learning_rate": 0.0015, + "loss": 3.1853, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.6292198896408081, + "learning_rate": 0.0015, + "loss": 3.1654, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.5782979726791382, + "learning_rate": 0.0015, + "loss": 3.1672, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.43748995661735535, + "learning_rate": 0.0015, + "loss": 3.1334, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.5902690291404724, + "learning_rate": 0.0015, + "loss": 3.1105, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.45668601989746094, + "learning_rate": 0.0015, + "loss": 3.1166, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.6151666045188904, + "learning_rate": 0.0015, + "loss": 3.1117, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.43925824761390686, + "learning_rate": 0.0015, + "loss": 3.0727, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.7277379631996155, + "learning_rate": 0.0015, + "loss": 3.0682, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.46423545479774475, + "learning_rate": 0.0015, + "loss": 3.0532, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.38785454630851746, + "learning_rate": 0.0015, + "loss": 3.0437, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.4553026854991913, + "learning_rate": 0.0015, + "loss": 3.0269, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.6062794327735901, + "learning_rate": 0.0015, + "loss": 3.0163, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.8279756903648376, + "learning_rate": 0.0015, + "loss": 3.0007, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.4567737281322479, + "learning_rate": 0.0015, + "loss": 2.9923, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.4688446521759033, + "learning_rate": 0.0015, + "loss": 2.978, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.5172986388206482, + "learning_rate": 0.0015, + "loss": 2.9599, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.5056201219558716, + "learning_rate": 0.0015, + "loss": 2.9491, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.5157880187034607, + "learning_rate": 0.0015, + "loss": 2.927, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.7345868349075317, + "learning_rate": 0.0015, + "loss": 2.932, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.5228882431983948, + "learning_rate": 0.0015, + "loss": 2.9211, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.807361364364624, + "learning_rate": 0.0015, + "loss": 2.892, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.46052291989326477, + "learning_rate": 0.0015, + "loss": 2.8876, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.5747856497764587, + "learning_rate": 0.0015, + "loss": 2.8825, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.5301325917243958, + "learning_rate": 0.0015, + "loss": 2.8717, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.6366458535194397, + "learning_rate": 0.0015, + "loss": 2.8571, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.49421897530555725, + "learning_rate": 0.0015, + "loss": 2.8442, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.5443897843360901, + "learning_rate": 0.0015, + "loss": 2.8339, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.5442678928375244, + "learning_rate": 0.0015, + "loss": 2.8315, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.4455268383026123, + "learning_rate": 0.0015, + "loss": 2.8147, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.435916543006897, + "learning_rate": 0.0015, + "loss": 2.7987, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.5206997394561768, + "learning_rate": 0.0015, + "loss": 2.7904, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.7274598479270935, + "learning_rate": 0.0015, + "loss": 2.7836, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.5128963589668274, + "learning_rate": 0.0015, + "loss": 2.7675, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.5600806474685669, + "learning_rate": 0.0015, + "loss": 2.7651, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.489555299282074, + "learning_rate": 0.0015, + "loss": 2.7562, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.6913049221038818, + "learning_rate": 0.0015, + "loss": 2.7317, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.5356138348579407, + "learning_rate": 0.0015, + "loss": 2.7474, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.4805282652378082, + "learning_rate": 0.0015, + "loss": 2.7238, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.48950088024139404, + "learning_rate": 0.0015, + "loss": 2.727, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.44867202639579773, + "learning_rate": 0.0015, + "loss": 2.7014, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.5401803851127625, + "learning_rate": 0.0015, + "loss": 2.6872, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.5483376979827881, + "learning_rate": 0.0015, + "loss": 2.6868, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.671482264995575, + "learning_rate": 0.0015, + "loss": 2.6909, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.4312852621078491, + "learning_rate": 0.0015, + "loss": 2.6814, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.5126764178276062, + "learning_rate": 0.0015, + "loss": 2.6575, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.5414651036262512, + "learning_rate": 0.0015, + "loss": 2.6424, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.42504602670669556, + "learning_rate": 0.0015, + "loss": 2.6525, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.436250776052475, + "learning_rate": 0.0015, + "loss": 2.6448, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.7625768780708313, + "learning_rate": 0.0015, + "loss": 2.6297, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.576604962348938, + "learning_rate": 0.0015, + "loss": 2.6147, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.5628520250320435, + "learning_rate": 0.0015, + "loss": 2.6264, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.49781346321105957, + "learning_rate": 0.0015, + "loss": 2.6046, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.6295763254165649, + "learning_rate": 0.0015, + "loss": 2.5877, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.4855346381664276, + "learning_rate": 0.0015, + "loss": 2.5878, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.4699518084526062, + "learning_rate": 0.0015, + "loss": 2.587, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.9219175577163696, + "learning_rate": 0.0015, + "loss": 2.5741, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.5130273103713989, + "learning_rate": 0.0015, + "loss": 2.5723, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.4627608358860016, + "learning_rate": 0.0015, + "loss": 2.5699, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.8430650234222412, + "learning_rate": 0.0015, + "loss": 2.5528, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.4903288185596466, + "learning_rate": 0.0015, + "loss": 2.5461, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.5185163021087646, + "learning_rate": 0.0015, + "loss": 2.5285, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.5127776861190796, + "learning_rate": 0.0015, + "loss": 2.5405, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.49610579013824463, + "learning_rate": 0.0015, + "loss": 2.5216, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.554472029209137, + "learning_rate": 0.0015, + "loss": 2.5165, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.5494760870933533, + "learning_rate": 0.0015, + "loss": 2.5176, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.9152542948722839, + "learning_rate": 0.0015, + "loss": 2.5138, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.5785524249076843, + "learning_rate": 0.0015, + "loss": 2.504, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.4721471965312958, + "learning_rate": 0.0015, + "loss": 2.4994, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.43738165497779846, + "learning_rate": 0.0015, + "loss": 2.4888, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.5966213941574097, + "learning_rate": 0.0015, + "loss": 2.4794, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.5104870200157166, + "learning_rate": 0.0015, + "loss": 2.4791, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.40664082765579224, + "learning_rate": 0.0015, + "loss": 2.47, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.49904534220695496, + "learning_rate": 0.0015, + "loss": 2.4711, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.5104140043258667, + "learning_rate": 0.0015, + "loss": 2.4609, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.5398507118225098, + "learning_rate": 0.0015, + "loss": 2.4608, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.6123701930046082, + "learning_rate": 0.0015, + "loss": 2.4409, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.3854086399078369, + "learning_rate": 0.0015, + "loss": 2.4274, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.4420521855354309, + "learning_rate": 0.0015, + "loss": 2.4235, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.6921679973602295, + "learning_rate": 0.0015, + "loss": 2.4282, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.6027482748031616, + "learning_rate": 0.0015, + "loss": 2.4143, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.4941210150718689, + "learning_rate": 0.0015, + "loss": 2.4219, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.7488183975219727, + "learning_rate": 0.0015, + "loss": 2.4175, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.6008018255233765, + "learning_rate": 0.0015, + "loss": 2.413, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.5070136189460754, + "learning_rate": 0.0015, + "loss": 2.4067, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6918146014213562, + "learning_rate": 0.0015, + "loss": 2.3832, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.4650106728076935, + "learning_rate": 0.0015, + "loss": 2.3914, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.4868183732032776, + "learning_rate": 0.0015, + "loss": 2.3844, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.879161536693573, + "learning_rate": 0.0015, + "loss": 2.3771, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.4290091395378113, + "learning_rate": 0.0015, + "loss": 2.3763, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.5047755241394043, + "learning_rate": 0.0015, + "loss": 2.3779, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.4913913309574127, + "learning_rate": 0.0015, + "loss": 2.3635, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.5842776894569397, + "learning_rate": 0.0015, + "loss": 2.3465, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.5359498858451843, + "learning_rate": 0.0015, + "loss": 2.3633, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.44172248244285583, + "learning_rate": 0.0015, + "loss": 2.3456, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.42451030015945435, + "learning_rate": 0.0015, + "loss": 2.3388, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.601703941822052, + "learning_rate": 0.0015, + "loss": 2.337, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.610880970954895, + "learning_rate": 0.0015, + "loss": 2.3406, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.46340212225914, + "learning_rate": 0.0015, + "loss": 2.3544, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.5473008751869202, + "learning_rate": 0.0015, + "loss": 2.3316, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.5524681210517883, + "learning_rate": 0.0015, + "loss": 2.3303, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.437361478805542, + "learning_rate": 0.0015, + "loss": 2.3244, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.4375350773334503, + "learning_rate": 0.0015, + "loss": 2.3209, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.38176682591438293, + "learning_rate": 0.0015, + "loss": 2.3201, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.5863919258117676, + "learning_rate": 0.0015, + "loss": 2.3063, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.4978156089782715, + "learning_rate": 0.0015, + "loss": 2.2995, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.46071624755859375, + "learning_rate": 0.0015, + "loss": 2.2958, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.44883671402931213, + "learning_rate": 0.0015, + "loss": 2.293, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.6156477332115173, + "learning_rate": 0.0015, + "loss": 2.2933, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.4380180537700653, + "learning_rate": 0.0015, + "loss": 2.2938, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.6051855087280273, + "learning_rate": 0.0015, + "loss": 2.2932, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.45070695877075195, + "learning_rate": 0.0015, + "loss": 2.2789, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.410013347864151, + "learning_rate": 0.0015, + "loss": 2.2736, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.6564710140228271, + "learning_rate": 0.0015, + "loss": 2.2751, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.5580166578292847, + "learning_rate": 0.0015, + "loss": 2.2727, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.5010768175125122, + "learning_rate": 0.0015, + "loss": 2.265, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.778263509273529, + "learning_rate": 0.0015, + "loss": 2.271, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.40524497628211975, + "learning_rate": 0.0015, + "loss": 2.2536, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.4233589470386505, + "learning_rate": 0.0015, + "loss": 2.2531, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.4691861867904663, + "learning_rate": 0.0015, + "loss": 2.2573, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.45676785707473755, + "learning_rate": 0.0015, + "loss": 2.2578, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.5967681407928467, + "learning_rate": 0.0015, + "loss": 2.2415, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.502697765827179, + "learning_rate": 0.0015, + "loss": 2.2372, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.6226141452789307, + "learning_rate": 0.0015, + "loss": 2.2343, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.4593786299228668, + "learning_rate": 0.0015, + "loss": 2.2427, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.4388757050037384, + "learning_rate": 0.0015, + "loss": 2.2475, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.5371217727661133, + "learning_rate": 0.0015, + "loss": 2.2326, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.5672159194946289, + "learning_rate": 0.0015, + "loss": 2.2277, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.49030035734176636, + "learning_rate": 0.0015, + "loss": 2.2268, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.4842599034309387, + "learning_rate": 0.0015, + "loss": 2.2142, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.5450977683067322, + "learning_rate": 0.0015, + "loss": 2.213, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.4474094808101654, + "learning_rate": 0.0015, + "loss": 2.2203, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.43218910694122314, + "learning_rate": 0.0015, + "loss": 2.2045, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.40497711300849915, + "learning_rate": 0.0015, + "loss": 2.2106, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.6158599853515625, + "learning_rate": 0.0015, + "loss": 2.2039, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.4341694116592407, + "learning_rate": 0.0015, + "loss": 2.1958, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.5063780546188354, + "learning_rate": 0.0015, + "loss": 2.2014, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.4195190668106079, + "learning_rate": 0.0015, + "loss": 2.1905, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.3930788040161133, + "learning_rate": 0.0015, + "loss": 2.1928, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.48034796118736267, + "learning_rate": 0.0015, + "loss": 2.2006, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.44854801893234253, + "learning_rate": 0.0015, + "loss": 2.1841, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.41202056407928467, + "learning_rate": 0.0015, + "loss": 2.1671, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.5261996984481812, + "learning_rate": 0.0015, + "loss": 2.1838, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.42454469203948975, + "learning_rate": 0.0015, + "loss": 2.1802, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.37238433957099915, + "learning_rate": 0.0015, + "loss": 2.1664, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.46407178044319153, + "learning_rate": 0.0015, + "loss": 2.1756, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.546927809715271, + "learning_rate": 0.0015, + "loss": 2.1658, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.4710257649421692, + "learning_rate": 0.0015, + "loss": 2.1611, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.5447347164154053, + "learning_rate": 0.0015, + "loss": 2.161, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.4184820353984833, + "learning_rate": 0.0015, + "loss": 2.152, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.4922381341457367, + "learning_rate": 0.0015, + "loss": 2.1782, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.5470576882362366, + "learning_rate": 0.0015, + "loss": 2.1621, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.5045710802078247, + "learning_rate": 0.0015, + "loss": 2.1477, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.49984458088874817, + "learning_rate": 0.0015, + "loss": 2.1462, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.483463317155838, + "learning_rate": 0.0015, + "loss": 2.1517, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.3996365964412689, + "learning_rate": 0.0015, + "loss": 2.1513, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.5950316190719604, + "learning_rate": 0.0015, + "loss": 2.1338, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.39514636993408203, + "learning_rate": 0.0015, + "loss": 2.1311, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.6214424967765808, + "learning_rate": 0.0015, + "loss": 2.1275, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.5175051689147949, + "learning_rate": 0.0015, + "loss": 2.135, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.43594011664390564, + "learning_rate": 0.0015, + "loss": 2.1315, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.3934478163719177, + "learning_rate": 0.0015, + "loss": 2.1282, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.4840888977050781, + "learning_rate": 0.0015, + "loss": 2.1241, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.4207134246826172, + "learning_rate": 0.0015, + "loss": 2.1208, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.48744526505470276, + "learning_rate": 0.0015, + "loss": 2.123, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.5121942758560181, + "learning_rate": 0.0015, + "loss": 2.121, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.34975388646125793, + "learning_rate": 0.0015, + "loss": 2.1147, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.37598225474357605, + "learning_rate": 0.0015, + "loss": 2.1197, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.4468732476234436, + "learning_rate": 0.0015, + "loss": 2.1057, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.4851619005203247, + "learning_rate": 0.0015, + "loss": 2.1172, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.49839311838150024, + "learning_rate": 0.0015, + "loss": 2.1081, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.4657372534275055, + "learning_rate": 0.0015, + "loss": 2.1071, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.4407047927379608, + "learning_rate": 0.0015, + "loss": 2.0958, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.4567389488220215, + "learning_rate": 0.0015, + "loss": 2.1025, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.4023986756801605, + "learning_rate": 0.0015, + "loss": 2.0964, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.5103545188903809, + "learning_rate": 0.0015, + "loss": 2.0986, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.4978145956993103, + "learning_rate": 0.0015, + "loss": 2.094, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.4225064516067505, + "learning_rate": 0.0015, + "loss": 2.0965, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.40300053358078003, + "learning_rate": 0.0015, + "loss": 2.085, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.5581527352333069, + "learning_rate": 0.0015, + "loss": 2.0899, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.5644493103027344, + "learning_rate": 0.0015, + "loss": 2.0859, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.4028775691986084, + "learning_rate": 0.0015, + "loss": 2.0844, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.4478062391281128, + "learning_rate": 0.0015, + "loss": 2.0866, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.4071362316608429, + "learning_rate": 0.0015, + "loss": 2.0833, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.5723115801811218, + "learning_rate": 0.0015, + "loss": 2.0821, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.3730677366256714, + "learning_rate": 0.0015, + "loss": 2.0795, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.5798608660697937, + "learning_rate": 0.0015, + "loss": 2.0647, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.5718798041343689, + "learning_rate": 0.0015, + "loss": 2.0795, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.49430182576179504, + "learning_rate": 0.0015, + "loss": 2.0699, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.43516209721565247, + "learning_rate": 0.0015, + "loss": 2.0673, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.494834840297699, + "learning_rate": 0.0015, + "loss": 2.0629, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.5274789333343506, + "learning_rate": 0.0015, + "loss": 2.0645, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.5654980540275574, + "learning_rate": 0.0015, + "loss": 2.0609, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.4479534327983856, + "learning_rate": 0.0015, + "loss": 2.0578, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.5492680668830872, + "learning_rate": 0.0015, + "loss": 2.0606, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.43843865394592285, + "learning_rate": 0.0015, + "loss": 2.0619, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.4003993570804596, + "learning_rate": 0.0015, + "loss": 2.0393, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.7665315270423889, + "learning_rate": 0.0015, + "loss": 2.0548, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.42583227157592773, + "learning_rate": 0.0015, + "loss": 2.0538, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.39912813901901245, + "learning_rate": 0.0015, + "loss": 2.0579, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.4089771807193756, + "learning_rate": 0.0015, + "loss": 2.0517, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.4719853699207306, + "learning_rate": 0.0015, + "loss": 2.0505, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.4800323247909546, + "learning_rate": 0.0015, + "loss": 2.0453, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.4587271511554718, + "learning_rate": 0.0015, + "loss": 2.0442, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.4211869239807129, + "learning_rate": 0.0015, + "loss": 2.0433, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.39003247022628784, + "learning_rate": 0.0015, + "loss": 2.0481, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.7108922004699707, + "learning_rate": 0.0015, + "loss": 2.019, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.4657760560512543, + "learning_rate": 0.0015, + "loss": 2.0245, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.49277517199516296, + "learning_rate": 0.0015, + "loss": 2.0318, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.4489877223968506, + "learning_rate": 0.0015, + "loss": 2.0344, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.44397681951522827, + "learning_rate": 0.0015, + "loss": 2.0297, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.4459514915943146, + "learning_rate": 0.0015, + "loss": 2.0158, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.38398468494415283, + "learning_rate": 0.0015, + "loss": 2.0197, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.5393858551979065, + "learning_rate": 0.0015, + "loss": 2.0257, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.6433526873588562, + "learning_rate": 0.0015, + "loss": 2.0245, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.47844600677490234, + "learning_rate": 0.0015, + "loss": 2.0168, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.4422524571418762, + "learning_rate": 0.0015, + "loss": 2.0113, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.4225659668445587, + "learning_rate": 0.0015, + "loss": 2.0184, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.6223881840705872, + "learning_rate": 0.0015, + "loss": 2.0275, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.4961652457714081, + "learning_rate": 0.0015, + "loss": 2.0061, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.648638904094696, + "learning_rate": 0.0015, + "loss": 2.0015, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.4812861680984497, + "learning_rate": 0.0015, + "loss": 2.0064, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.4108127951622009, + "learning_rate": 0.0015, + "loss": 2.023, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.44272148609161377, + "learning_rate": 0.0015, + "loss": 1.9981, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.42668408155441284, + "learning_rate": 0.0015, + "loss": 2.0055, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.5618040561676025, + "learning_rate": 0.0015, + "loss": 2.0039, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.4394238591194153, + "learning_rate": 0.0015, + "loss": 1.9933, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.3770120143890381, + "learning_rate": 0.0015, + "loss": 2.001, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.36212030053138733, + "learning_rate": 0.0015, + "loss": 1.9949, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.3956080973148346, + "learning_rate": 0.0015, + "loss": 2.002, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.37893229722976685, + "learning_rate": 0.0015, + "loss": 1.9924, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.41634806990623474, + "learning_rate": 0.0015, + "loss": 1.9796, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.39234060049057007, + "learning_rate": 0.0015, + "loss": 1.9944, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.4398508369922638, + "learning_rate": 0.0015, + "loss": 1.982, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.3770347833633423, + "learning_rate": 0.0015, + "loss": 1.9875, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.4625287353992462, + "learning_rate": 0.0015, + "loss": 1.9874, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.4447138011455536, + "learning_rate": 0.0015, + "loss": 1.9978, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.6257829070091248, + "learning_rate": 0.0015, + "loss": 1.9743, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.4770488440990448, + "learning_rate": 0.0015, + "loss": 1.9829, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.41480571031570435, + "learning_rate": 0.0015, + "loss": 1.983, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.3980404734611511, + "learning_rate": 0.0015, + "loss": 1.9847, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.5104202032089233, + "learning_rate": 0.0015, + "loss": 1.9807, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.6659597158432007, + "learning_rate": 0.0015, + "loss": 1.9757, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.45003846287727356, + "learning_rate": 0.0015, + "loss": 1.9712, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.6258270144462585, + "learning_rate": 0.0015, + "loss": 1.9852, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.5980298519134521, + "learning_rate": 0.0015, + "loss": 1.9717, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.37435078620910645, + "learning_rate": 0.0015, + "loss": 1.9667, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.3714063763618469, + "learning_rate": 0.0015, + "loss": 1.9484, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.38705742359161377, + "learning_rate": 0.0015, + "loss": 1.9717, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.43854257464408875, + "learning_rate": 0.0015, + "loss": 1.9614, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.4405410587787628, + "learning_rate": 0.0015, + "loss": 1.9745, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.45475882291793823, + "learning_rate": 0.0015, + "loss": 1.9725, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.7493352890014648, + "learning_rate": 0.0015, + "loss": 1.9561, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.4059728682041168, + "learning_rate": 0.0015, + "loss": 1.9673, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.42357611656188965, + "learning_rate": 0.0015, + "loss": 1.9696, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.4446803629398346, + "learning_rate": 0.0015, + "loss": 1.9672, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.5391449332237244, + "learning_rate": 0.0015, + "loss": 1.9568, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.37968334555625916, + "learning_rate": 0.0015, + "loss": 1.9557, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.44053348898887634, + "learning_rate": 0.0015, + "loss": 1.9446, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.35404255986213684, + "learning_rate": 0.0015, + "loss": 1.9492, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.4087972640991211, + "learning_rate": 0.0015, + "loss": 1.9548, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.40248411893844604, + "learning_rate": 0.0015, + "loss": 1.9502, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.3695880174636841, + "learning_rate": 0.0015, + "loss": 1.9448, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.4488960802555084, + "learning_rate": 0.0015, + "loss": 1.9578, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.3623393476009369, + "learning_rate": 0.0015, + "loss": 1.9479, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.6637060642242432, + "learning_rate": 0.0015, + "loss": 1.932, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.5007205605506897, + "learning_rate": 0.0015, + "loss": 1.9479, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.4037579894065857, + "learning_rate": 0.0015, + "loss": 1.9404, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.39390191435813904, + "learning_rate": 0.0015, + "loss": 1.9387, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.4142867922782898, + "learning_rate": 0.0015, + "loss": 1.9292, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.4093887209892273, + "learning_rate": 0.0015, + "loss": 1.9475, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.6525499224662781, + "learning_rate": 0.0015, + "loss": 1.9486, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.4268273413181305, + "learning_rate": 0.0015, + "loss": 1.9442, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.39740699529647827, + "learning_rate": 0.0015, + "loss": 1.9233, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.43190065026283264, + "learning_rate": 0.0015, + "loss": 1.9401, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.35705068707466125, + "learning_rate": 0.0015, + "loss": 1.9359, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.4013505280017853, + "learning_rate": 0.0015, + "loss": 1.9446, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.5907027721405029, + "learning_rate": 0.0015, + "loss": 1.9432, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.42904919385910034, + "learning_rate": 0.0015, + "loss": 1.9287, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.398319274187088, + "learning_rate": 0.0015, + "loss": 1.9249, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.5392965078353882, + "learning_rate": 0.0015, + "loss": 1.9263, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.366304874420166, + "learning_rate": 0.0015, + "loss": 1.9268, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.3998924791812897, + "learning_rate": 0.0015, + "loss": 1.9367, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.4773123860359192, + "learning_rate": 0.0015, + "loss": 1.9143, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.4067232012748718, + "learning_rate": 0.0015, + "loss": 1.9202, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.3973858654499054, + "learning_rate": 0.0015, + "loss": 1.9205, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.5471634864807129, + "learning_rate": 0.0015, + "loss": 1.9178, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.4045250713825226, + "learning_rate": 0.0015, + "loss": 1.9183, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.43062058091163635, + "learning_rate": 0.0015, + "loss": 1.9261, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.5406143069267273, + "learning_rate": 0.0015, + "loss": 1.9183, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.3599720597267151, + "learning_rate": 0.0015, + "loss": 1.9096, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.6880010962486267, + "learning_rate": 0.0015, + "loss": 1.9157, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.4490262269973755, + "learning_rate": 0.0015, + "loss": 1.9237, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.43009379506111145, + "learning_rate": 0.0015, + "loss": 1.9264, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.42287343740463257, + "learning_rate": 0.0015, + "loss": 1.9158, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.40586552023887634, + "learning_rate": 0.0015, + "loss": 1.8928, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.5091451406478882, + "learning_rate": 0.0015, + "loss": 1.9097, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.40897443890571594, + "learning_rate": 0.0015, + "loss": 1.9108, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.5361913442611694, + "learning_rate": 0.0015, + "loss": 1.9139, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.5182894468307495, + "learning_rate": 0.0015, + "loss": 1.9046, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.3900398015975952, + "learning_rate": 0.0015, + "loss": 1.9175, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.3570542335510254, + "learning_rate": 0.0015, + "loss": 1.902, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.421865314245224, + "learning_rate": 0.0015, + "loss": 1.9076, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.429984450340271, + "learning_rate": 0.0015, + "loss": 1.9115, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.3910616636276245, + "learning_rate": 0.0015, + "loss": 1.9016, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.39357802271842957, + "learning_rate": 0.0015, + "loss": 1.887, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.3555103838443756, + "learning_rate": 0.0015, + "loss": 1.8912, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.38038474321365356, + "learning_rate": 0.0015, + "loss": 1.9123, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.37897661328315735, + "learning_rate": 0.0015, + "loss": 1.8912, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.6943684816360474, + "learning_rate": 0.0015, + "loss": 1.9076, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.43240222334861755, + "learning_rate": 0.0015, + "loss": 1.8944, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.4312829375267029, + "learning_rate": 0.0015, + "loss": 1.8821, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.4150441586971283, + "learning_rate": 0.0015, + "loss": 1.8863, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.5343963503837585, + "learning_rate": 0.0015, + "loss": 1.8869, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.3959296941757202, + "learning_rate": 0.0015, + "loss": 1.8858, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.3625887334346771, + "learning_rate": 0.0015, + "loss": 1.8836, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.35107797384262085, + "learning_rate": 0.0015, + "loss": 1.8861, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.4718173146247864, + "learning_rate": 0.0015, + "loss": 1.8847, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.49111998081207275, + "learning_rate": 0.0015, + "loss": 1.8882, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.43096768856048584, + "learning_rate": 0.0015, + "loss": 1.8858, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.41032716631889343, + "learning_rate": 0.0015, + "loss": 1.8751, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.4288388788700104, + "learning_rate": 0.0015, + "loss": 1.8859, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.40460100769996643, + "learning_rate": 0.0015, + "loss": 1.8767, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.4012301564216614, + "learning_rate": 0.0015, + "loss": 1.8898, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.38782012462615967, + "learning_rate": 0.0015, + "loss": 1.8769, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.40569114685058594, + "learning_rate": 0.0015, + "loss": 1.8784, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.42616739869117737, + "learning_rate": 0.0015, + "loss": 1.8711, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.3604695796966553, + "learning_rate": 0.0015, + "loss": 1.8802, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.4237070381641388, + "learning_rate": 0.0015, + "loss": 1.8783, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.3822304606437683, + "learning_rate": 0.0015, + "loss": 1.8851, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.4372658133506775, + "learning_rate": 0.0015, + "loss": 1.8797, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.41478264331817627, + "learning_rate": 0.0015, + "loss": 1.8844, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.3885349929332733, + "learning_rate": 0.0015, + "loss": 1.8784, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.4131007194519043, + "learning_rate": 0.0015, + "loss": 1.8738, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.3749596178531647, + "learning_rate": 0.0015, + "loss": 1.8688, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.49073001742362976, + "learning_rate": 0.0015, + "loss": 1.867, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.5004504323005676, + "learning_rate": 0.0015, + "loss": 1.8677, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.45799756050109863, + "learning_rate": 0.0015, + "loss": 1.867, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.46326014399528503, + "learning_rate": 0.0015, + "loss": 1.8749, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.3633853793144226, + "learning_rate": 0.0015, + "loss": 1.8668, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.38705265522003174, + "learning_rate": 0.0015, + "loss": 1.8687, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.33722713589668274, + "learning_rate": 0.0015, + "loss": 1.8615, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.361875981092453, + "learning_rate": 0.0015, + "loss": 1.8692, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.3760627508163452, + "learning_rate": 0.0015, + "loss": 1.8642, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.5219722390174866, + "learning_rate": 0.0015, + "loss": 1.8544, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.5878645777702332, + "learning_rate": 0.0015, + "loss": 1.8621, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.6367361545562744, + "learning_rate": 0.0015, + "loss": 1.8602, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.40897583961486816, + "learning_rate": 0.0015, + "loss": 1.8593, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.3774475157260895, + "learning_rate": 0.0015, + "loss": 1.8624, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.4458273649215698, + "learning_rate": 0.0015, + "loss": 1.8677, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.36084410548210144, + "learning_rate": 0.0015, + "loss": 1.8549, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.4049150049686432, + "learning_rate": 0.0015, + "loss": 1.8479, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.5080804228782654, + "learning_rate": 0.0015, + "loss": 1.8609, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.656667172908783, + "learning_rate": 0.0015, + "loss": 1.8577, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.6060461401939392, + "learning_rate": 0.0015, + "loss": 1.8586, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.5427998900413513, + "learning_rate": 0.0015, + "loss": 1.8501, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.3466227650642395, + "learning_rate": 0.0015, + "loss": 1.8577, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.4191066026687622, + "learning_rate": 0.0015, + "loss": 1.8464, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.48094719648361206, + "learning_rate": 0.0015, + "loss": 1.849, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.49614375829696655, + "learning_rate": 0.0015, + "loss": 1.8631, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.38866937160491943, + "learning_rate": 0.0015, + "loss": 1.8404, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.4107779562473297, + "learning_rate": 0.0015, + "loss": 1.8469, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.3650112450122833, + "learning_rate": 0.0015, + "loss": 1.8416, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.6362186074256897, + "learning_rate": 0.0015, + "loss": 1.8493, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.4842089116573334, + "learning_rate": 0.0015, + "loss": 1.8624, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.43159064650535583, + "learning_rate": 0.0015, + "loss": 1.8435, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.49956589937210083, + "learning_rate": 0.0015, + "loss": 1.848, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.3889283537864685, + "learning_rate": 0.0015, + "loss": 1.8457, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.420112282037735, + "learning_rate": 0.0015, + "loss": 1.8485, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.3811125159263611, + "learning_rate": 0.0015, + "loss": 1.837, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.3702952563762665, + "learning_rate": 0.0015, + "loss": 1.8464, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.41521361470222473, + "learning_rate": 0.0015, + "loss": 1.8503, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.4480724036693573, + "learning_rate": 0.0015, + "loss": 1.8366, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.38207632303237915, + "learning_rate": 0.0015, + "loss": 1.8382, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.4234530031681061, + "learning_rate": 0.0015, + "loss": 1.8433, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.3913861811161041, + "learning_rate": 0.0015, + "loss": 1.8248, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.35109612345695496, + "learning_rate": 0.0015, + "loss": 1.8427, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.37936994433403015, + "learning_rate": 0.0015, + "loss": 1.843, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.472234845161438, + "learning_rate": 0.0015, + "loss": 1.8402, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.4550206661224365, + "learning_rate": 0.0015, + "loss": 1.8303, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.3389590084552765, + "learning_rate": 0.0015, + "loss": 1.8115, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.523423969745636, + "learning_rate": 0.0015, + "loss": 1.8377, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.43263402581214905, + "learning_rate": 0.0015, + "loss": 1.8212, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.4154493510723114, + "learning_rate": 0.0015, + "loss": 1.8394, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.39631569385528564, + "learning_rate": 0.0015, + "loss": 1.841, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.38108357787132263, + "learning_rate": 0.0015, + "loss": 1.8171, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.37537944316864014, + "learning_rate": 0.0015, + "loss": 1.8367, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.3624442219734192, + "learning_rate": 0.0015, + "loss": 1.8361, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.4107973277568817, + "learning_rate": 0.0015, + "loss": 1.8359, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.38114675879478455, + "learning_rate": 0.0015, + "loss": 1.8237, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.5181953310966492, + "learning_rate": 0.0015, + "loss": 1.8222, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.36573171615600586, + "learning_rate": 0.0015, + "loss": 1.8196, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.4009116291999817, + "learning_rate": 0.0015, + "loss": 1.8179, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.3602575957775116, + "learning_rate": 0.0015, + "loss": 1.8309, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.37840357422828674, + "learning_rate": 0.0015, + "loss": 1.8278, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.5219160914421082, + "learning_rate": 0.0015, + "loss": 1.8205, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.3847928047180176, + "learning_rate": 0.0015, + "loss": 1.8194, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.41638249158859253, + "learning_rate": 0.0015, + "loss": 1.8208, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.3851413428783417, + "learning_rate": 0.0015, + "loss": 1.836, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.3669206500053406, + "learning_rate": 0.0015, + "loss": 1.8223, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.36767563223838806, + "learning_rate": 0.0015, + "loss": 1.8183, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.43759578466415405, + "learning_rate": 0.0015, + "loss": 1.8272, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.3198586702346802, + "learning_rate": 0.0015, + "loss": 1.8111, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.37309959530830383, + "learning_rate": 0.0015, + "loss": 1.8154, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.37838244438171387, + "learning_rate": 0.0015, + "loss": 1.8156, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.4710870087146759, + "learning_rate": 0.0015, + "loss": 1.8348, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.43118733167648315, + "learning_rate": 0.0015, + "loss": 1.8149, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.3798106014728546, + "learning_rate": 0.0015, + "loss": 1.8123, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.4070100784301758, + "learning_rate": 0.0015, + "loss": 1.8234, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.44036611914634705, + "learning_rate": 0.0015, + "loss": 1.8085, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.6049620509147644, + "learning_rate": 0.0015, + "loss": 1.8013, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.4063786566257477, + "learning_rate": 0.0015, + "loss": 1.826, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.37625035643577576, + "learning_rate": 0.0015, + "loss": 1.8147, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.3552354872226715, + "learning_rate": 0.0015, + "loss": 1.8203, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.4841260313987732, + "learning_rate": 0.0015, + "loss": 1.8142, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.3970704674720764, + "learning_rate": 0.0015, + "loss": 1.8063, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.4679521322250366, + "learning_rate": 0.0015, + "loss": 1.8108, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.36642470955848694, + "learning_rate": 0.0015, + "loss": 1.8106, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.4319462776184082, + "learning_rate": 0.0015, + "loss": 1.8189, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.514441192150116, + "learning_rate": 0.0015, + "loss": 1.799, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.34872546792030334, + "learning_rate": 0.0015, + "loss": 1.8075, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.42705288529396057, + "learning_rate": 0.0015, + "loss": 1.8026, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.6406503319740295, + "learning_rate": 0.0015, + "loss": 1.8024, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.4056425392627716, + "learning_rate": 0.0015, + "loss": 1.8168, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.4114770293235779, + "learning_rate": 0.0015, + "loss": 1.808, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.39436665177345276, + "learning_rate": 0.0015, + "loss": 1.7985, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.40641167759895325, + "learning_rate": 0.0015, + "loss": 1.7854, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.4375355541706085, + "learning_rate": 0.0015, + "loss": 1.8127, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.414604514837265, + "learning_rate": 0.0015, + "loss": 1.7997, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.3938453197479248, + "learning_rate": 0.0015, + "loss": 1.8142, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.5752911567687988, + "learning_rate": 0.0015, + "loss": 1.8006, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.37712499499320984, + "learning_rate": 0.0015, + "loss": 1.8127, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.4018497169017792, + "learning_rate": 0.0015, + "loss": 1.8048, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.37279269099235535, + "learning_rate": 0.0015, + "loss": 1.7874, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.48174726963043213, + "learning_rate": 0.0015, + "loss": 1.7995, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.39854592084884644, + "learning_rate": 0.0015, + "loss": 1.7983, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.3552011251449585, + "learning_rate": 0.0015, + "loss": 1.7932, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.38018688559532166, + "learning_rate": 0.0015, + "loss": 1.8094, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.3637646436691284, + "learning_rate": 0.0015, + "loss": 1.803, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5112683176994324, + "learning_rate": 0.0015, + "loss": 1.7935, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.36904454231262207, + "learning_rate": 0.0015, + "loss": 1.7989, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.4275411069393158, + "learning_rate": 0.0015, + "loss": 1.8105, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.4094441831111908, + "learning_rate": 0.0015, + "loss": 1.7898, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.5441651344299316, + "learning_rate": 0.0015, + "loss": 1.8015, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.3846557140350342, + "learning_rate": 0.0015, + "loss": 1.7817, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.4060090184211731, + "learning_rate": 0.0015, + "loss": 1.799, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.4017479419708252, + "learning_rate": 0.0015, + "loss": 1.7886, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.40392205119132996, + "learning_rate": 0.0015, + "loss": 1.7931, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.6735473871231079, + "learning_rate": 0.0015, + "loss": 1.7891, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.42865750193595886, + "learning_rate": 0.0015, + "loss": 1.7934, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.5680474042892456, + "learning_rate": 0.0015, + "loss": 1.7996, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.6624480485916138, + "learning_rate": 0.0015, + "loss": 1.7887, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.3647904694080353, + "learning_rate": 0.0015, + "loss": 1.7904, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.40777212381362915, + "learning_rate": 0.0015, + "loss": 1.7933, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.5746924877166748, + "learning_rate": 0.0015, + "loss": 1.7849, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.36260008811950684, + "learning_rate": 0.0015, + "loss": 1.7794, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.3997408151626587, + "learning_rate": 0.0015, + "loss": 1.7923, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.3587180972099304, + "learning_rate": 0.0015, + "loss": 1.784, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.39205724000930786, + "learning_rate": 0.0015, + "loss": 1.7773, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.5115182995796204, + "learning_rate": 0.0015, + "loss": 1.7837, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.5049818754196167, + "learning_rate": 0.0015, + "loss": 1.7878, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.4765077829360962, + "learning_rate": 0.0015, + "loss": 1.7785, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.4631378948688507, + "learning_rate": 0.0015, + "loss": 1.7795, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.3704979121685028, + "learning_rate": 0.0015, + "loss": 1.7796, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.36718788743019104, + "learning_rate": 0.0015, + "loss": 1.782, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.3675275444984436, + "learning_rate": 0.0015, + "loss": 1.7741, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.3529714345932007, + "learning_rate": 0.0015, + "loss": 1.7724, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.448056697845459, + "learning_rate": 0.0015, + "loss": 1.7739, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.41675782203674316, + "learning_rate": 0.0015, + "loss": 1.7795, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.4032045304775238, + "learning_rate": 0.0015, + "loss": 1.7818, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.35200566053390503, + "learning_rate": 0.0015, + "loss": 1.7761, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.5089764595031738, + "learning_rate": 0.0015, + "loss": 1.7819, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.48288729786872864, + "learning_rate": 0.0015, + "loss": 1.7841, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.44587039947509766, + "learning_rate": 0.0015, + "loss": 1.7753, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.5870866775512695, + "learning_rate": 0.0015, + "loss": 1.7841, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.38041436672210693, + "learning_rate": 0.0015, + "loss": 1.7753, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.4354185163974762, + "learning_rate": 0.0015, + "loss": 1.7683, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.36650848388671875, + "learning_rate": 0.0015, + "loss": 1.7663, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.385189414024353, + "learning_rate": 0.0015, + "loss": 1.7835, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.3649611175060272, + "learning_rate": 0.0015, + "loss": 1.788, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.34608882665634155, + "learning_rate": 0.0015, + "loss": 1.7702, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.3545205295085907, + "learning_rate": 0.0015, + "loss": 1.7686, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.3859558403491974, + "learning_rate": 0.0015, + "loss": 1.7739, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.6199972033500671, + "learning_rate": 0.0015, + "loss": 1.77, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.3931775689125061, + "learning_rate": 0.0015, + "loss": 1.7738, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.4120345115661621, + "learning_rate": 0.0015, + "loss": 1.7829, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.5041189193725586, + "learning_rate": 0.0015, + "loss": 1.7718, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.39991727471351624, + "learning_rate": 0.0015, + "loss": 1.7832, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.39618390798568726, + "learning_rate": 0.0015, + "loss": 1.7777, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.4246659278869629, + "learning_rate": 0.0015, + "loss": 1.7728, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.4983978867530823, + "learning_rate": 0.0015, + "loss": 1.7812, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.38729602098464966, + "learning_rate": 0.0015, + "loss": 1.7803, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.44268617033958435, + "learning_rate": 0.0015, + "loss": 1.7586, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.44333615899086, + "learning_rate": 0.0015, + "loss": 1.765, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.34337031841278076, + "learning_rate": 0.0015, + "loss": 1.7647, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.3607620894908905, + "learning_rate": 0.0015, + "loss": 1.7676, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.42472901940345764, + "learning_rate": 0.0015, + "loss": 1.7652, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.3862784206867218, + "learning_rate": 0.0015, + "loss": 1.7662, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.5129724740982056, + "learning_rate": 0.0015, + "loss": 1.7398, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.3929223120212555, + "learning_rate": 0.0015, + "loss": 1.7518, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.44849100708961487, + "learning_rate": 0.0015, + "loss": 1.7549, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.3359050452709198, + "learning_rate": 0.0015, + "loss": 1.7632, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.40562015771865845, + "learning_rate": 0.0015, + "loss": 1.7571, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.40842700004577637, + "learning_rate": 0.0015, + "loss": 1.761, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.3713739216327667, + "learning_rate": 0.0015, + "loss": 1.7648, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.4099365472793579, + "learning_rate": 0.0015, + "loss": 1.7758, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.3733523488044739, + "learning_rate": 0.0015, + "loss": 1.7602, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.3665669560432434, + "learning_rate": 0.0015, + "loss": 1.7742, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.4490165114402771, + "learning_rate": 0.0015, + "loss": 1.7763, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.4284886121749878, + "learning_rate": 0.0015, + "loss": 1.7648, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5306150913238525, + "learning_rate": 0.0015, + "loss": 1.7582, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.3589763939380646, + "learning_rate": 0.0015, + "loss": 1.7677, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.5833979845046997, + "learning_rate": 0.0015, + "loss": 1.7564, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.4654962420463562, + "learning_rate": 0.0015, + "loss": 1.7598, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.7403870820999146, + "learning_rate": 0.0015, + "loss": 1.7561, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.4152665436267853, + "learning_rate": 0.0015, + "loss": 1.7513, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.39639317989349365, + "learning_rate": 0.0015, + "loss": 1.7568, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.40477094054222107, + "learning_rate": 0.0015, + "loss": 1.7613, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.4371580481529236, + "learning_rate": 0.0015, + "loss": 1.7491, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.38728776574134827, + "learning_rate": 0.0015, + "loss": 1.7556, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.39925897121429443, + "learning_rate": 0.0015, + "loss": 1.7564, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.3748125433921814, + "learning_rate": 0.0015, + "loss": 1.7424, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.39767175912857056, + "learning_rate": 0.0015, + "loss": 1.76, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.43334314227104187, + "learning_rate": 0.0015, + "loss": 1.7668, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.44917285442352295, + "learning_rate": 0.0015, + "loss": 1.7578, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.4493657946586609, + "learning_rate": 0.0015, + "loss": 1.7604, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.3646076023578644, + "learning_rate": 0.0015, + "loss": 1.7412, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.3971148729324341, + "learning_rate": 0.0015, + "loss": 1.759, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.4294765293598175, + "learning_rate": 0.0015, + "loss": 1.7406, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.3631121814250946, + "learning_rate": 0.0015, + "loss": 1.7626, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.41278743743896484, + "learning_rate": 0.0015, + "loss": 1.7493, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.4470178782939911, + "learning_rate": 0.0015, + "loss": 1.7488, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.4392800033092499, + "learning_rate": 0.0015, + "loss": 1.7487, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.3853001892566681, + "learning_rate": 0.0015, + "loss": 1.7424, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.40034884214401245, + "learning_rate": 0.0015, + "loss": 1.7523, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.40867364406585693, + "learning_rate": 0.0015, + "loss": 1.7467, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.3602948486804962, + "learning_rate": 0.0015, + "loss": 1.7442, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.4847540557384491, + "learning_rate": 0.0015, + "loss": 1.7478, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.37840238213539124, + "learning_rate": 0.0015, + "loss": 1.7545, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.4093344807624817, + "learning_rate": 0.0015, + "loss": 1.7505, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.4945821166038513, + "learning_rate": 0.0014834368975312174, + "loss": 1.729, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.4628181457519531, + "learning_rate": 0.0014629899726345957, + "loss": 1.7491, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.3663456439971924, + "learning_rate": 0.0014428248775471316, + "loss": 1.7527, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.4029582142829895, + "learning_rate": 0.00142293772767289, + "loss": 1.7429, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.4698050916194916, + "learning_rate": 0.001403324691959192, + "loss": 1.7405, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.32272350788116455, + "learning_rate": 0.0013839819921586025, + "loss": 1.7437, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.36582040786743164, + "learning_rate": 0.0013649059021010894, + "loss": 1.7318, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.45245763659477234, + "learning_rate": 0.0013460927469762154, + "loss": 1.7366, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.3887353837490082, + "learning_rate": 0.0013275389026252255, + "loss": 1.7426, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.3597774803638458, + "learning_rate": 0.0013092407948428887, + "loss": 1.7327, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.37551334500312805, + "learning_rate": 0.001291194898688966, + "loss": 1.7436, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.31163686513900757, + "learning_rate": 0.001273397737809166, + "loss": 1.7401, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.33886754512786865, + "learning_rate": 0.001255845883765463, + "loss": 1.7327, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.4385211169719696, + "learning_rate": 0.001238535955375642, + "loss": 1.7266, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.4503138065338135, + "learning_rate": 0.0012214646180619506, + "loss": 1.7246, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.41475728154182434, + "learning_rate": 0.001204628583208727, + "loss": 1.7241, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.49436885118484497, + "learning_rate": 0.0011880246075288827, + "loss": 1.7305, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.38352593779563904, + "learning_rate": 0.001171649492439115, + "loss": 1.7255, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.3476194441318512, + "learning_rate": 0.0011555000834437364, + "loss": 1.7279, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.3866131007671356, + "learning_rate": 0.0011395732695269908, + "loss": 1.7241, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.3425932824611664, + "learning_rate": 0.0011238659825537505, + "loss": 1.7065, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.35537442564964294, + "learning_rate": 0.0011083751966784717, + "loss": 1.7097, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.42356932163238525, + "learning_rate": 0.0010930979277622953, + "loss": 1.7253, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.4219104051589966, + "learning_rate": 0.0010780312327981854, + "loss": 1.7243, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.3380492329597473, + "learning_rate": 0.0010631722093439888, + "loss": 1.7192, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.35530468821525574, + "learning_rate": 0.00104851799496331, + "loss": 1.7107, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.3714942932128906, + "learning_rate": 0.0010340657666740914, + "loss": 1.7162, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.38263872265815735, + "learning_rate": 0.0010198127404047975, + "loss": 1.7035, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.43177831172943115, + "learning_rate": 0.0010057561704580897, + "loss": 1.7051, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.3453059792518616, + "learning_rate": 0.0009918933489818985, + "loss": 1.7189, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.325992226600647, + "learning_rate": 0.0009782216054477827, + "loss": 1.7092, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.4558759033679962, + "learning_rate": 0.0009647383061364801, + "loss": 1.7174, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.37190869450569153, + "learning_rate": 0.0009514408536305495, + "loss": 1.7095, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.32614776492118835, + "learning_rate": 0.0009383266863140042, + "loss": 1.7289, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.3383933901786804, + "learning_rate": 0.000925393277878844, + "loss": 1.7239, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.3237789571285248, + "learning_rate": 0.0009126381368383879, + "loss": 1.7059, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.3540682792663574, + "learning_rate": 0.0009000588060473156, + "loss": 1.7023, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.3900008499622345, + "learning_rate": 0.0008876528622283235, + "loss": 1.7103, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.33319854736328125, + "learning_rate": 0.0008754179155053053, + "loss": 1.7032, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.48551586270332336, + "learning_rate": 0.0008633516089429683, + "loss": 1.7021, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.34750625491142273, + "learning_rate": 0.0008514516180927928, + "loss": 1.7, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.34169715642929077, + "learning_rate": 0.0008397156505452524, + "loss": 1.6951, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.40248948335647583, + "learning_rate": 0.0008281414454882051, + "loss": 1.7037, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.35692939162254333, + "learning_rate": 0.0008167267732713704, + "loss": 1.7056, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.4143449068069458, + "learning_rate": 0.0008054694349768117, + "loss": 1.6946, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.3523157238960266, + "learning_rate": 0.0007943672619953359, + "loss": 1.6976, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.3302171230316162, + "learning_rate": 0.0007834181156087356, + "loss": 1.6922, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.378368079662323, + "learning_rate": 0.0007726198865777852, + "loss": 1.6981, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.38149553537368774, + "learning_rate": 0.0007619704947359191, + "loss": 1.6906, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.3489941954612732, + "learning_rate": 0.0007514678885885087, + "loss": 1.6911, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.40645143389701843, + "learning_rate": 0.0007411100449176633, + "loss": 1.69, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.37549859285354614, + "learning_rate": 0.0007308949683924791, + "loss": 1.6947, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.5234854817390442, + "learning_rate": 0.000720820691184658, + "loss": 1.6876, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.41648849844932556, + "learning_rate": 0.0007108852725894269, + "loss": 1.6871, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.38930535316467285, + "learning_rate": 0.000701086798651681, + "loss": 1.6865, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.47343021631240845, + "learning_rate": 0.0006914233817972798, + "loss": 1.6788, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.32668837904930115, + "learning_rate": 0.0006818931604694261, + "loss": 1.6883, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.3579895496368408, + "learning_rate": 0.0006724942987700563, + "loss": 1.6929, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.3305911421775818, + "learning_rate": 0.0006632249861061732, + "loss": 1.6922, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.46947893500328064, + "learning_rate": 0.0006540834368410549, + "loss": 1.6854, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.35277026891708374, + "learning_rate": 0.0006450678899502701, + "loss": 1.6899, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.4404209554195404, + "learning_rate": 0.0006361766086824345, + "loss": 1.6866, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.3640693128108978, + "learning_rate": 0.000627407880224645, + "loss": 1.6896, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.381225049495697, + "learning_rate": 0.0006187600153725225, + "loss": 1.6765, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.5710827112197876, + "learning_rate": 0.0006102313482048055, + "loss": 1.6805, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.36791691184043884, + "learning_rate": 0.0006018202357624274, + "loss": 1.6841, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.3910648822784424, + "learning_rate": 0.0005935250577320168, + "loss": 1.6789, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.39405199885368347, + "learning_rate": 0.0005853442161337618, + "loss": 1.6697, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.372340589761734, + "learning_rate": 0.0005772761350135759, + "loss": 1.6779, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.33880114555358887, + "learning_rate": 0.0005693192601395058, + "loss": 1.6753, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.34514468908309937, + "learning_rate": 0.000561472058702326, + "loss": 1.6711, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.36651530861854553, + "learning_rate": 0.000553733019020258, + "loss": 1.6799, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.42948848009109497, + "learning_rate": 0.0005461006502477612, + "loss": 1.6677, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.3390848934650421, + "learning_rate": 0.0005385734820883369, + "loss": 1.6728, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.33833780884742737, + "learning_rate": 0.0005311500645112907, + "loss": 1.6946, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.3522641360759735, + "learning_rate": 0.0005238289674723993, + "loss": 1.6748, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.3377450704574585, + "learning_rate": 0.0005166087806384274, + "loss": 1.6803, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.4707922637462616, + "learning_rate": 0.0005094881131154418, + "loss": 1.6834, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.3639419376850128, + "learning_rate": 0.0005024655931808696, + "loss": 1.6812, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.3634297251701355, + "learning_rate": 0.0004955398680192508, + "loss": 1.6672, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.33942192792892456, + "learning_rate": 0.000488709603461632, + "loss": 1.6648, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.3213030993938446, + "learning_rate": 0.000481973483728553, + "loss": 1.6672, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.3237278461456299, + "learning_rate": 0.0004753302111765748, + "loss": 1.6675, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.39589565992355347, + "learning_rate": 0.0004687785060483032, + "loss": 1.6787, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.31808146834373474, + "learning_rate": 0.0004623171062258558, + "loss": 1.6484, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.5088188648223877, + "learning_rate": 0.0004559447669877288, + "loss": 1.6667, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.3624919354915619, + "learning_rate": 0.00044966026076901413, + "loss": 1.6689, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.4639842212200165, + "learning_rate": 0.00044346237692492177, + "loss": 1.6722, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.3604246973991394, + "learning_rate": 0.0004373499214975615, + "loss": 1.6617, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.39077281951904297, + "learning_rate": 0.0004313217169859396, + "loss": 1.668, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.34318608045578003, + "learning_rate": 0.0004253766021191256, + "loss": 1.6726, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.3334824740886688, + "learning_rate": 0.00041951343163254497, + "loss": 1.6703, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.3301604986190796, + "learning_rate": 0.00041373107604735626, + "loss": 1.667, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.3273673951625824, + "learning_rate": 0.0004080284214528687, + "loss": 1.6647, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.37167975306510925, + "learning_rate": 0.0004024043692919589, + "loss": 1.6733, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.4096986949443817, + "learning_rate": 0.0003968578361494449, + "loss": 1.6704, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.3329966366291046, + "learning_rate": 0.000391387753543378, + "loss": 1.6774, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.3384993374347687, + "learning_rate": 0.00038599306771921023, + "loss": 1.6572, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.3946463465690613, + "learning_rate": 0.0003806727394468004, + "loss": 1.6532, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.34576791524887085, + "learning_rate": 0.0003754257438202162, + "loss": 1.6653, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.43008339405059814, + "learning_rate": 0.0003702510700602974, + "loss": 1.6712, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.37148287892341614, + "learning_rate": 0.0003651477213199393, + "loss": 1.6487, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.3486021161079407, + "learning_rate": 0.000360114714492061, + "loss": 1.651, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.35204851627349854, + "learning_rate": 0.0003551510800202195, + "loss": 1.6588, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.4431720972061157, + "learning_rate": 0.0003502558617118353, + "loss": 1.6618, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.3386177718639374, + "learning_rate": 0.0003454281165539914, + "loss": 1.6746, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.33008381724357605, + "learning_rate": 0.00034066691453177176, + "loss": 1.6686, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.348519891500473, + "learning_rate": 0.0003359713384491037, + "loss": 1.6647, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.3155626058578491, + "learning_rate": 0.00033134048375206944, + "loss": 1.6623, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.33235377073287964, + "learning_rate": 0.0003267734583546536, + "loss": 1.6565, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.34811490774154663, + "learning_rate": 0.00032226938246689157, + "loss": 1.6545, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.3645946979522705, + "learning_rate": 0.0003178273884253874, + "loss": 1.66, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.31685009598731995, + "learning_rate": 0.0003134466205261674, + "loss": 1.6701, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.3582608699798584, + "learning_rate": 0.0003091262348598378, + "loss": 1.6737, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.3385883569717407, + "learning_rate": 0.0003048653991490141, + "loss": 1.6523, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.35297125577926636, + "learning_rate": 0.00030066329258799187, + "loss": 1.6472, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.3489353060722351, + "learning_rate": 0.0002965191056846266, + "loss": 1.6546, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.3470304310321808, + "learning_rate": 0.000292432040104394, + "loss": 1.6514, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.32428672909736633, + "learning_rate": 0.00028840130851659853, + "loss": 1.6488, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.3145506680011749, + "learning_rate": 0.0002844261344427028, + "loss": 1.6574, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.3551109731197357, + "learning_rate": 0.0002805057521067471, + "loss": 1.6546, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.3797190487384796, + "learning_rate": 0.00027663940628783017, + "loss": 1.6466, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.3197629153728485, + "learning_rate": 0.00027282635217462393, + "loss": 1.6532, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.3569023013114929, + "learning_rate": 0.0002690658552218937, + "loss": 1.6621, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.31288769841194153, + "learning_rate": 0.00026535719100899516, + "loss": 1.6412, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.34753257036209106, + "learning_rate": 0.00026169964510032245, + "loss": 1.6498, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.30654725432395935, + "learning_rate": 0.00025809251290767984, + "loss": 1.6362, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.3653108477592468, + "learning_rate": 0.00025453509955454957, + "loss": 1.6408, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.34892016649246216, + "learning_rate": 0.00025102671974223175, + "loss": 1.6413, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.29282325506210327, + "learning_rate": 0.00024756669761782815, + "loss": 1.6527, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.3693530261516571, + "learning_rate": 0.0002441543666440464, + "loss": 1.6422, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.3493598997592926, + "learning_rate": 0.00024078906947079878, + "loss": 1.6467, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.36602526903152466, + "learning_rate": 0.00023747015780857005, + "loss": 1.6565, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.3195732533931732, + "learning_rate": 0.00023419699230353144, + "loss": 1.6509, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.41372406482696533, + "learning_rate": 0.00023096894241437586, + "loss": 1.6579, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.33621683716773987, + "learning_rate": 0.00022778538629085056, + "loss": 1.6411, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.3519098460674286, + "learning_rate": 0.00022464571065396427, + "loss": 1.6445, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.36613279581069946, + "learning_rate": 0.00022154931067784521, + "loss": 1.6424, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.31610196828842163, + "learning_rate": 0.00021849558987322782, + "loss": 1.6378, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.39142224192619324, + "learning_rate": 0.0002154839599725452, + "loss": 1.6378, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.358140230178833, + "learning_rate": 0.00021251384081660544, + "loss": 1.652, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.3370223343372345, + "learning_rate": 0.0002095846602428303, + "loss": 1.6475, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.32540133595466614, + "learning_rate": 0.00020669585397503358, + "loss": 1.6389, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.3694852292537689, + "learning_rate": 0.0002038468655147195, + "loss": 1.642, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.3504529297351837, + "learning_rate": 0.00020103714603387894, + "loss": 1.6553, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.3778478503227234, + "learning_rate": 0.00019826615426926338, + "loss": 1.6271, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.3270431458950043, + "learning_rate": 0.00019553335641811625, + "loss": 1.6519, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.34093835949897766, + "learning_rate": 0.0001928382260353415, + "loss": 1.6422, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.3709118962287903, + "learning_rate": 0.00019018024393208902, + "loss": 1.6569, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.30881646275520325, + "learning_rate": 0.00018755889807573872, + "loss": 1.6381, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.3335847854614258, + "learning_rate": 0.00018497368349126262, + "loss": 1.6403, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.3539847731590271, + "learning_rate": 0.00018242410216394648, + "loss": 1.6587, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.3978801667690277, + "learning_rate": 0.0001799096629434529, + "loss": 1.633, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.34407463669776917, + "learning_rate": 0.00017742988144920578, + "loss": 1.6393, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.41472217440605164, + "learning_rate": 0.00017498427997707976, + "loss": 1.6392, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.316667765378952, + "learning_rate": 0.00017257238740737548, + "loss": 1.6413, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.35037022829055786, + "learning_rate": 0.00017019373911406307, + "loss": 1.6477, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.29755109548568726, + "learning_rate": 0.000167847876875277, + "loss": 1.6454, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.3720715045928955, + "learning_rate": 0.00016553434878504428, + "loss": 1.6298, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.3846959173679352, + "learning_rate": 0.00016325270916622947, + "loss": 1.6325, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.35193881392478943, + "learning_rate": 0.00016100251848467966, + "loss": 1.6447, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.32953768968582153, + "learning_rate": 0.0001587833432645528, + "loss": 1.6348, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.36382409930229187, + "learning_rate": 0.00015659475600481292, + "loss": 1.6534, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.3171437978744507, + "learning_rate": 0.00015443633509687688, + "loss": 1.6386, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.32451412081718445, + "learning_rate": 0.00015230766474339536, + "loss": 1.6389, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.33827903866767883, + "learning_rate": 0.00015020833487815416, + "loss": 1.6477, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.33603426814079285, + "learning_rate": 0.0001481379410870792, + "loss": 1.6365, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.3055046498775482, + "learning_rate": 0.00014609608453033013, + "loss": 1.6227, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.32990390062332153, + "learning_rate": 0.00014408237186546807, + "loss": 1.6325, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.31218257546424866, + "learning_rate": 0.00014209641517168273, + "loss": 1.6218, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.3420766294002533, + "learning_rate": 0.00014013783187506265, + "loss": 1.6346, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.3293689787387848, + "learning_rate": 0.00013820624467489697, + "loss": 1.6527, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.3033985495567322, + "learning_rate": 0.00013630128147099213, + "loss": 1.6494, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.322937548160553, + "learning_rate": 0.00013442257529199068, + "loss": 1.6307, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.31438392400741577, + "learning_rate": 0.00013256976422467803, + "loss": 1.64, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.37883907556533813, + "learning_rate": 0.00013074249134426366, + "loss": 1.6393, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.3531222939491272, + "learning_rate": 0.0001289404046456233, + "loss": 1.6476, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.3302582800388336, + "learning_rate": 0.0001271631569754887, + "loss": 1.639, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.3109946846961975, + "learning_rate": 0.0001254104059655723, + "loss": 1.6431, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.33854833245277405, + "learning_rate": 0.00012368181396661337, + "loss": 1.6272, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.3203161954879761, + "learning_rate": 0.00012197704798333364, + "loss": 1.6301, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.33319008350372314, + "learning_rate": 0.00012029577961028894, + "loss": 1.6274, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.33095791935920715, + "learning_rate": 0.00011863768496860542, + "loss": 1.6412, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.3395090401172638, + "learning_rate": 0.00011700244464358777, + "loss": 1.6333, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.3152465522289276, + "learning_rate": 0.00011538974362318715, + "loss": 1.6405, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.3196256756782532, + "learning_rate": 0.00011379927123731737, + "loss": 1.6341, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.3656620979309082, + "learning_rate": 0.0001122307210980077, + "loss": 1.6329, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.30399253964424133, + "learning_rate": 0.00011068379104038026, + "loss": 1.645, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.32685747742652893, + "learning_rate": 0.00010915818306444116, + "loss": 1.6267, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.31762781739234924, + "learning_rate": 0.00010765360327767384, + "loss": 1.6341, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.32280975580215454, + "learning_rate": 0.00010616976183842376, + "loss": 1.6384, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.31651660799980164, + "learning_rate": 0.00010470637290006365, + "loss": 1.6409, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.33551445603370667, + "learning_rate": 0.00010326315455592764, + "loss": 1.6303, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.34469228982925415, + "learning_rate": 0.0001018398287850053, + "loss": 1.6218, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.3637249171733856, + "learning_rate": 0.00010043612139838357, + "loss": 1.6428, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.30862969160079956, + "learning_rate": 9.905176198642719e-05, + "loss": 1.6317, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.3249523937702179, + "learning_rate": 9.76864838666871e-05, + "loss": 1.6335, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.3195851743221283, + "learning_rate": 9.634002403252676e-05, + "loss": 1.629, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.3234538733959198, + "learning_rate": 9.501212310245681e-05, + "loss": 1.6266, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.33161020278930664, + "learning_rate": 9.370252527016777e-05, + "loss": 1.6373, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.3949597477912903, + "learning_rate": 9.241097825525163e-05, + "loss": 1.6268, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.34694796800613403, + "learning_rate": 9.113723325460276e-05, + "loss": 1.6376, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.33663344383239746, + "learning_rate": 8.988104489448849e-05, + "loss": 1.6308, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.3222263753414154, + "learning_rate": 8.864217118328042e-05, + "loss": 1.6405, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.3217048943042755, + "learning_rate": 8.742037346483729e-05, + "loss": 1.6315, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.3333226442337036, + "learning_rate": 8.62154163725303e-05, + "loss": 1.6418, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.32743752002716064, + "learning_rate": 8.502706778390219e-05, + "loss": 1.6377, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.34206369519233704, + "learning_rate": 8.38550987759513e-05, + "loss": 1.6335, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.31393253803253174, + "learning_rate": 8.269928358103191e-05, + "loss": 1.6497, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.33991745114326477, + "learning_rate": 8.155939954336243e-05, + "loss": 1.6397, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.33921822905540466, + "learning_rate": 8.043522707613312e-05, + "loss": 1.6336, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.2952041029930115, + "learning_rate": 7.932654961920486e-05, + "loss": 1.6183, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.3467554450035095, + "learning_rate": 7.823315359739135e-05, + "loss": 1.6199, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.3225209414958954, + "learning_rate": 7.715482837931577e-05, + "loss": 1.6467, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.33101144433021545, + "learning_rate": 7.6091366236835e-05, + "loss": 1.6241, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.3513968288898468, + "learning_rate": 7.504256230502289e-05, + "loss": 1.6411, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.29445794224739075, + "learning_rate": 7.400821454270524e-05, + "loss": 1.638, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.33319979906082153, + "learning_rate": 7.29881236935386e-05, + "loss": 1.6229, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.3071054518222809, + "learning_rate": 7.198209324762562e-05, + "loss": 1.6274, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.3454078733921051, + "learning_rate": 7.098992940365946e-05, + "loss": 1.6219, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.304419606924057, + "learning_rate": 7.001144103159e-05, + "loss": 1.6315, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.3211307227611542, + "learning_rate": 6.904643963580461e-05, + "loss": 1.638, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.3396269977092743, + "learning_rate": 6.809473931881644e-05, + "loss": 1.6298, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.3511914312839508, + "learning_rate": 6.71561567454532e-05, + "loss": 1.6297, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.3462528586387634, + "learning_rate": 6.623051110753948e-05, + "loss": 1.6471, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.30597245693206787, + "learning_rate": 6.531762408906607e-05, + "loss": 1.6332, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.3081440031528473, + "learning_rate": 6.441731983183912e-05, + "loss": 1.6294, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.3371829092502594, + "learning_rate": 6.352942490160292e-05, + "loss": 1.6258, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.32282984256744385, + "learning_rate": 6.265376825462966e-05, + "loss": 1.6386, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.361008882522583, + "learning_rate": 6.179018120476945e-05, + "loss": 1.6318, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.3117503225803375, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.6318, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.31181588768959045, + "learning_rate": 6.009855274515339e-05, + "loss": 1.6237, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.3184158504009247, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.6333, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.3234730064868927, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.6205, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.3118648827075958, + "learning_rate": 5.764754687033678e-05, + "loss": 1.6312, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.313286155462265, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.6275, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.31303468346595764, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.6408, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.2958471477031708, + "learning_rate": 5.529650063720842e-05, + "loss": 1.6426, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.3144227862358093, + "learning_rate": 5.453432234876445e-05, + "loss": 1.6267, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.31019237637519836, + "learning_rate": 5.37826495305886e-05, + "loss": 1.6183, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.3832738697528839, + "learning_rate": 5.304133738072674e-05, + "loss": 1.6393, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.3055766522884369, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.6338, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.38660526275634766, + "learning_rate": 5.158922582999368e-05, + "loss": 1.6348, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.3092588484287262, + "learning_rate": 5.087814669492819e-05, + "loss": 1.6277, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.32875898480415344, + "learning_rate": 5.017686870590028e-05, + "loss": 1.6297, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.3500961363315582, + "learning_rate": 4.948525676899577e-05, + "loss": 1.6276, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.349721223115921, + "learning_rate": 4.880317765236493e-05, + "loss": 1.6332, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.3664739727973938, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.6159, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.3233999013900757, + "learning_rate": 4.746709410920699e-05, + "loss": 1.6216, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.34658798575401306, + "learning_rate": 4.681283230007507e-05, + "loss": 1.624, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.31716668605804443, + "learning_rate": 4.616758849642509e-05, + "loss": 1.628, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.3040301501750946, + "learning_rate": 4.553123839874615e-05, + "loss": 1.6373, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.3055068254470825, + "learning_rate": 4.490365942080736e-05, + "loss": 1.6324, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.3031514585018158, + "learning_rate": 4.428473066604285e-05, + "loss": 1.6321, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.3240368366241455, + "learning_rate": 4.367433290426233e-05, + "loss": 1.6281, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.37979379296302795, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.6238, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.3117033839225769, + "learning_rate": 4.247866163327575e-05, + "loss": 1.6347, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.31456810235977173, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.6229, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.31371352076530457, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.6249, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.3305368721485138, + "learning_rate": 4.074624971216005e-05, + "loss": 1.6107, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.30146458745002747, + "learning_rate": 4.018462453681889e-05, + "loss": 1.6248, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.2916974127292633, + "learning_rate": 3.963074051164014e-05, + "loss": 1.6302, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.3154279887676239, + "learning_rate": 3.908449093662446e-05, + "loss": 1.625, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.315110445022583, + "learning_rate": 3.854577058246998e-05, + "loss": 1.6217, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.317649781703949, + "learning_rate": 3.801447567030094e-05, + "loss": 1.6391, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.32514020800590515, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.6261, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.30485421419143677, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.64, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.3108402192592621, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.6271, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.3125903010368347, + "learning_rate": 3.596152451701616e-05, + "loss": 1.6244, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.31470704078674316, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.6253, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.3080314099788666, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.6277, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.3424152433872223, + "learning_rate": 3.449490171442838e-05, + "loss": 1.6313, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.3180200159549713, + "learning_rate": 3.401944187798702e-05, + "loss": 1.632, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.3120277523994446, + "learning_rate": 3.355053553336137e-05, + "loss": 1.6231, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.3785941004753113, + "learning_rate": 3.308809235061882e-05, + "loss": 1.6209, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.3134934902191162, + "learning_rate": 3.263202324488772e-05, + "loss": 1.6269, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.3198505640029907, + "learning_rate": 3.218224035919609e-05, + "loss": 1.6252, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.33399489521980286, + "learning_rate": 3.173865704754688e-05, + "loss": 1.6273, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.30771005153656006, + "learning_rate": 3.130118785822657e-05, + "loss": 1.6285, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.3084212839603424, + "learning_rate": 3.08697485173437e-05, + "loss": 1.6323, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.3333616256713867, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.6313, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.35332056879997253, + "learning_rate": 3.002462807725185e-05, + "loss": 1.6235, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.31455057859420776, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.6322, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.3195644021034241, + "learning_rate": 2.920264448124087e-05, + "loss": 1.6251, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.3129481077194214, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.6236, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.31001561880111694, + "learning_rate": 2.84031643124288e-05, + "loss": 1.6233, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.3185345232486725, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.6461, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.3156781792640686, + "learning_rate": 2.762557149497405e-05, + "loss": 1.6192, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.31877610087394714, + "learning_rate": 2.724479494389592e-05, + "loss": 1.6296, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.30050691962242126, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.6318, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.3098798394203186, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.6313, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.31012141704559326, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.6306, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.30869683623313904, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.6374, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.3007502555847168, + "learning_rate": 2.541820662891541e-05, + "loss": 1.6095, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.32197049260139465, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.6261, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.31294873356819153, + "learning_rate": 2.472233293365335e-05, + "loss": 1.6322, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.3226049840450287, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.6198, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.3473133146762848, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.6391, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.3145406246185303, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.6263, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.299172580242157, + "learning_rate": 2.338721674401494e-05, + "loss": 1.6308, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.31881415843963623, + "learning_rate": 2.30648594768459e-05, + "loss": 1.6305, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.30427101254463196, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.625, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.31671831011772156, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.6229, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.30545756220817566, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.6238, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.32323944568634033, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.6261, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.3047470450401306, + "learning_rate": 2.151850895764285e-05, + "loss": 1.6248, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.28955554962158203, + "learning_rate": 2.12219089895037e-05, + "loss": 1.614, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.3030565083026886, + "learning_rate": 2.092939720151092e-05, + "loss": 1.6092, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.29650411009788513, + "learning_rate": 2.064091724430947e-05, + "loss": 1.6223, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.34423649311065674, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.6167, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.3077792227268219, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.6313, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.3088832497596741, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.619, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.31217217445373535, + "learning_rate": 1.952621569669175e-05, + "loss": 1.6231, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.30523139238357544, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.625, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.3273441195487976, + "learning_rate": 1.899164691024229e-05, + "loss": 1.6241, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.31631141901016235, + "learning_rate": 1.872987589815331e-05, + "loss": 1.6189, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.30482521653175354, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.6292, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.2906980514526367, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.6299, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.2948012053966522, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.6412, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.3058041036128998, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.6343, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.30109626054763794, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.6311, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.3133740723133087, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.6187, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.3518986999988556, + "learning_rate": 1.699576850233916e-05, + "loss": 1.6218, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.3165031373500824, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.6323, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.3171529471874237, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.6069, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.3226306438446045, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.6259, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.30792197585105896, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.63, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.30410683155059814, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.6242, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.3058238625526428, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.6349, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.33604955673217773, + "learning_rate": 1.542221360973268e-05, + "loss": 1.6177, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.3306424915790558, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.6297, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 0.9350292682647705, + "learning_rate": 1.5e-05, + "loss": 1.6205, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.8317122291574784e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-cohere/checkpoint-9480/training_args.bin b/saves-cohere/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..40e00139bd817094ffb615fc3c3639865807fc5e --- /dev/null +++ b/saves-cohere/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79576cca5d581954b59b5349be79056c32495d3b2ed8a8755667dd2d1e5bc09e +size 5112 diff --git a/saves-cohere/config.json b/saves-cohere/config.json new file mode 100644 index 0000000000000000000000000000000000000000..df4858fb7c269ef7a90d58d11e57ba3dd9f2ef21 --- /dev/null +++ b/saves-cohere/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "CohereForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 5, + "eos_token_id": 255001, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "layer_norm_eps": 1e-05, + "logit_scale": 0.0625, + "max_position_embeddings": 8192, + "model_type": "cohere", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "rope_theta": 10000.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_qk_norm": false, + "vocab_size": 2000 +} diff --git a/saves-cohere/generation_config.json b/saves-cohere/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9c41a4de69f546b74395520ae8afc0771ed6b49a --- /dev/null +++ b/saves-cohere/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 5, + "eos_token_id": 255001, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-cohere/model.safetensors b/saves-cohere/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7b9607651dc517ddc9fae9614d8a192237687bc --- /dev/null +++ b/saves-cohere/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79be30f1ea1ff29b17ca323353282cb348e94ed35effe8555d8ad3083e53133 +size 8344440 diff --git a/saves-cohere/result.log b/saves-cohere/result.log new file mode 100644 index 0000000000000000000000000000000000000000..d23bf6d3e1607d23c2d814cb0f538aa698e5dec9 --- /dev/null +++ b/saves-cohere/result.log @@ -0,0 +1 @@ +{'train_runtime': 2344.4408, 'train_samples_per_second': 4140.264, 'train_steps_per_second': 4.044, 'train_loss': 2.132681980414733, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-cohere/special_tokens_map.json b/saves-cohere/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-cohere/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-cohere/tokenizer.json b/saves-cohere/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-cohere/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-cohere/tokenizer_config.json b/saves-cohere/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-cohere/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gemma-cosine/checkpoint-9480/config.json b/saves-gemma-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d48ca7aeb27c97cf10a10143d9cee52b66b74ab3 --- /dev/null +++ b/saves-gemma-cosine/checkpoint-9480/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "GemmaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 2, + "eos_token_id": 1, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 8192, + "model_type": "gemma", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gemma-cosine/checkpoint-9480/generation_config.json b/saves-gemma-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c20913bfa6d3576264545acb67eae5f4818d0d32 --- /dev/null +++ b/saves-gemma-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-gemma-cosine/checkpoint-9480/model.safetensors b/saves-gemma-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aeb29d1abc45aff8c92d5cc22caed1034e8bd856 --- /dev/null +++ b/saves-gemma-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffac0325ea267d46004617d550c5834c1940f02892c6b431368487b7d82ce2d5 +size 19356792 diff --git a/saves-gemma-cosine/checkpoint-9480/optimizer.pt b/saves-gemma-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf7f6357b4a3b39bdda174e70454e24bff81dce7 --- /dev/null +++ b/saves-gemma-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34bfc4efd67be5b9fe906b79bcaf46f0873515bf14cc4b179a30ee084c1cb134 +size 38726626 diff --git a/saves-gemma-cosine/checkpoint-9480/rng_state.pth b/saves-gemma-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-gemma-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-gemma-cosine/checkpoint-9480/scheduler.pt b/saves-gemma-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03c145297021546d40e130546440641e02059bcb --- /dev/null +++ b/saves-gemma-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fd617624c087e1a286ed7cf3fa38baa4a8815e49f107c3186b4c7c58e1adbb +size 1064 diff --git a/saves-gemma-cosine/checkpoint-9480/special_tokens_map.json b/saves-gemma-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gemma-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gemma-cosine/checkpoint-9480/tokenizer.json b/saves-gemma-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gemma-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gemma-cosine/checkpoint-9480/tokenizer_config.json b/saves-gemma-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gemma-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gemma-cosine/checkpoint-9480/trainer_state.json b/saves-gemma-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3f5365e1666a20dfa617b3560faf051c457489a2 --- /dev/null +++ b/saves-gemma-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.224403738975525, + "learning_rate": 0.00015789473684210527, + "loss": 7.6018, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.0830174684524536, + "learning_rate": 0.00031578947368421053, + "loss": 7.0526, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8968034982681274, + "learning_rate": 0.00047368421052631577, + "loss": 6.4082, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.6159543395042419, + "learning_rate": 0.0006315789473684211, + "loss": 5.8731, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.39616236090660095, + "learning_rate": 0.0007894736842105263, + "loss": 5.4938, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.6134156584739685, + "learning_rate": 0.0009473684210526315, + "loss": 5.0902, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.33243003487586975, + "learning_rate": 0.0011052631578947368, + "loss": 4.6618, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.23324701189994812, + "learning_rate": 0.0012631578947368421, + "loss": 4.339, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.4921824336051941, + "learning_rate": 0.0014210526315789472, + "loss": 4.1184, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.3369714319705963, + "learning_rate": 0.0014999989494847376, + "loss": 3.9508, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.27228906750679016, + "learning_rate": 0.0014999905453802946, + "loss": 3.8005, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.2569299340248108, + "learning_rate": 0.0014999737372655805, + "loss": 3.6694, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.6321362257003784, + "learning_rate": 0.0014999485253289388, + "loss": 3.5732, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.265931636095047, + "learning_rate": 0.0014999149098528814, + "loss": 3.4612, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.36976251006126404, + "learning_rate": 0.0014998728912140862, + "loss": 3.3647, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.29034847021102905, + "learning_rate": 0.0014998224698833922, + "loss": 3.2909, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.31680262088775635, + "learning_rate": 0.0014997636464257956, + "loss": 3.2053, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.3954222500324249, + "learning_rate": 0.0014996964215004416, + "loss": 3.1432, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.40973424911499023, + "learning_rate": 0.0014996207958606182, + "loss": 3.0757, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.457225501537323, + "learning_rate": 0.001499536770353748, + "loss": 3.0013, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.5260195732116699, + "learning_rate": 0.0014994443459213774, + "loss": 2.9466, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.33434826135635376, + "learning_rate": 0.001499343523599168, + "loss": 2.8918, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.3714075982570648, + "learning_rate": 0.0014992343045168823, + "loss": 2.8235, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.3708628714084625, + "learning_rate": 0.0014991166898983739, + "loss": 2.769, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.37826642394065857, + "learning_rate": 0.001498990681061572, + "loss": 2.7176, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.3962521255016327, + "learning_rate": 0.001498856279418467, + "loss": 2.6607, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.4650373160839081, + "learning_rate": 0.0014987134864750948, + "loss": 2.6222, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.36165574193000793, + "learning_rate": 0.0014985623038315206, + "loss": 2.571, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.4171614944934845, + "learning_rate": 0.0014984027331818193, + "loss": 2.5323, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.5617461204528809, + "learning_rate": 0.0014982347763140584, + "loss": 2.4971, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.6129226684570312, + "learning_rate": 0.0014980584351102762, + "loss": 2.4632, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.46741676330566406, + "learning_rate": 0.001497873711546462, + "loss": 2.4185, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.451256662607193, + "learning_rate": 0.0014976806076925334, + "loss": 2.3998, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.43502041697502136, + "learning_rate": 0.0014974791257123137, + "loss": 2.367, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.3662364184856415, + "learning_rate": 0.001497269267863507, + "loss": 2.3158, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.4172323942184448, + "learning_rate": 0.0014970510364976724, + "loss": 2.2855, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.3845485746860504, + "learning_rate": 0.0014968244340601996, + "loss": 2.2709, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.5006797313690186, + "learning_rate": 0.001496589463090279, + "loss": 2.2583, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.6061115264892578, + "learning_rate": 0.001496346126220875, + "loss": 2.2285, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.36179161071777344, + "learning_rate": 0.0014960944261786966, + "loss": 2.1955, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.3911934792995453, + "learning_rate": 0.0014958343657841655, + "loss": 2.1831, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.4641297459602356, + "learning_rate": 0.001495565947951385, + "loss": 2.1568, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.3561594486236572, + "learning_rate": 0.0014952891756881085, + "loss": 2.1319, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.3676946759223938, + "learning_rate": 0.0014950040520957037, + "loss": 2.1113, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.397060364484787, + "learning_rate": 0.0014947105803691204, + "loss": 2.1096, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.5825448036193848, + "learning_rate": 0.0014944087637968522, + "loss": 2.0875, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.3827154040336609, + "learning_rate": 0.0014940986057609012, + "loss": 2.0607, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.44185370206832886, + "learning_rate": 0.0014937801097367396, + "loss": 2.0567, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.37032192945480347, + "learning_rate": 0.001493453279293271, + "loss": 2.0288, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.4027710556983948, + "learning_rate": 0.0014931181180927902, + "loss": 2.0196, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.4590783715248108, + "learning_rate": 0.001492774629890942, + "loss": 2.0054, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.43571019172668457, + "learning_rate": 0.001492422818536679, + "loss": 1.9931, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.36802706122398376, + "learning_rate": 0.00149206268797222, + "loss": 1.9784, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.42650750279426575, + "learning_rate": 0.0014916942422330032, + "loss": 1.9588, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.3881192207336426, + "learning_rate": 0.001491317485447643, + "loss": 1.9576, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.37164539098739624, + "learning_rate": 0.0014909324218378838, + "loss": 1.9199, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.4603891372680664, + "learning_rate": 0.0014905390557185508, + "loss": 1.9272, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.4279107451438904, + "learning_rate": 0.0014901373914975036, + "loss": 1.9275, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.4203340411186218, + "learning_rate": 0.0014897274336755856, + "loss": 1.9022, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.47206705808639526, + "learning_rate": 0.001489309186846575, + "loss": 1.8864, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.46364250779151917, + "learning_rate": 0.0014888826556971313, + "loss": 1.8886, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.41355302929878235, + "learning_rate": 0.0014884478450067444, + "loss": 1.8716, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.5133219957351685, + "learning_rate": 0.0014880047596476807, + "loss": 1.8572, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.5945013761520386, + "learning_rate": 0.0014875534045849274, + "loss": 1.8732, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.37314435839653015, + "learning_rate": 0.0014870937848761388, + "loss": 1.8569, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.36437222361564636, + "learning_rate": 0.001486625905671578, + "loss": 1.8432, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.4192541539669037, + "learning_rate": 0.00148614977221406, + "loss": 1.8279, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.35485246777534485, + "learning_rate": 0.0014856653898388927, + "loss": 1.8186, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.43379876017570496, + "learning_rate": 0.001485172763973817, + "loss": 1.8302, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.36517855525016785, + "learning_rate": 0.0014846719001389466, + "loss": 1.8109, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.3900572955608368, + "learning_rate": 0.001484162803946705, + "loss": 1.8012, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.5315179824829102, + "learning_rate": 0.0014836454811017635, + "loss": 1.787, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.3824789524078369, + "learning_rate": 0.0014831199374009778, + "loss": 1.7798, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.3980269730091095, + "learning_rate": 0.0014825861787333208, + "loss": 1.7912, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.4327452778816223, + "learning_rate": 0.0014820442110798197, + "loss": 1.7699, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.4212438762187958, + "learning_rate": 0.0014814940405134865, + "loss": 1.7656, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.44190293550491333, + "learning_rate": 0.001480935673199251, + "loss": 1.7593, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.45607924461364746, + "learning_rate": 0.0014803691153938915, + "loss": 1.7574, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.37207379937171936, + "learning_rate": 0.0014797943734459653, + "loss": 1.758, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.49604687094688416, + "learning_rate": 0.001479211453795736, + "loss": 1.7343, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.41798657178878784, + "learning_rate": 0.0014786203629751033, + "loss": 1.72, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.5231151580810547, + "learning_rate": 0.0014780211076075279, + "loss": 1.7314, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.3442583978176117, + "learning_rate": 0.0014774136944079594, + "loss": 1.7333, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.3477618992328644, + "learning_rate": 0.0014767981301827592, + "loss": 1.7091, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.3565273582935333, + "learning_rate": 0.0014761744218296249, + "loss": 1.7073, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.3818325102329254, + "learning_rate": 0.001475542576337513, + "loss": 1.7096, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.3839077055454254, + "learning_rate": 0.001474902600786561, + "loss": 1.7062, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.5717847347259521, + "learning_rate": 0.0014742545023480075, + "loss": 1.7008, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.361653596162796, + "learning_rate": 0.0014735982882841117, + "loss": 1.6898, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.34179946780204773, + "learning_rate": 0.0014729339659480727, + "loss": 1.6894, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.37158748507499695, + "learning_rate": 0.0014722615427839468, + "loss": 1.6956, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.36093300580978394, + "learning_rate": 0.0014715810263265633, + "loss": 1.6844, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.3625962436199188, + "learning_rate": 0.0014708924242014423, + "loss": 1.6677, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.39460405707359314, + "learning_rate": 0.0014701957441247064, + "loss": 1.6769, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.4288382828235626, + "learning_rate": 0.0014694909939029959, + "loss": 1.6593, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.4599696397781372, + "learning_rate": 0.0014687781814333814, + "loss": 1.6695, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.34559813141822815, + "learning_rate": 0.0014680573147032746, + "loss": 1.6685, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.4324914515018463, + "learning_rate": 0.0014673284017903392, + "loss": 1.647, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.3713063895702362, + "learning_rate": 0.0014665914508624, + "loss": 1.6466, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.36441338062286377, + "learning_rate": 0.0014658464701773526, + "loss": 1.6539, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.3947252929210663, + "learning_rate": 0.0014650934680830688, + "loss": 1.644, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.39086753129959106, + "learning_rate": 0.0014643324530173051, + "loss": 1.6356, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.4416680932044983, + "learning_rate": 0.0014635634335076067, + "loss": 1.6387, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.39951401948928833, + "learning_rate": 0.001462786418171213, + "loss": 1.6317, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.33045339584350586, + "learning_rate": 0.0014620014157149597, + "loss": 1.6345, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.3590207099914551, + "learning_rate": 0.001461208434935183, + "loss": 1.6198, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.38089054822921753, + "learning_rate": 0.0014604074847176197, + "loss": 1.6115, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.3784695863723755, + "learning_rate": 0.0014595985740373082, + "loss": 1.6124, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.46172553300857544, + "learning_rate": 0.0014587817119584873, + "loss": 1.6223, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.41999849677085876, + "learning_rate": 0.001457956907634496, + "loss": 1.6097, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.32970911264419556, + "learning_rate": 0.0014571241703076692, + "loss": 1.6125, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.34392091631889343, + "learning_rate": 0.0014562835093092348, + "loss": 1.6047, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.4005463421344757, + "learning_rate": 0.0014554349340592104, + "loss": 1.5932, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.4000810980796814, + "learning_rate": 0.001454578454066296, + "loss": 1.6066, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.5581340789794922, + "learning_rate": 0.0014537140789277678, + "loss": 1.5937, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.3587999641895294, + "learning_rate": 0.0014528418183293716, + "loss": 1.5998, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.3587309718132019, + "learning_rate": 0.001451961682045213, + "loss": 1.5899, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.38655397295951843, + "learning_rate": 0.001451073679937649, + "loss": 1.5741, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.36412402987480164, + "learning_rate": 0.0014501778219571766, + "loss": 1.5749, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.3607521653175354, + "learning_rate": 0.0014492741181423225, + "loss": 1.5859, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.3544326722621918, + "learning_rate": 0.0014483625786195285, + "loss": 1.5767, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.3618502616882324, + "learning_rate": 0.0014474432136030405, + "loss": 1.5659, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.3665081858634949, + "learning_rate": 0.0014465160333947923, + "loss": 1.5577, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.3543892502784729, + "learning_rate": 0.0014455810483842908, + "loss": 1.5737, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.435255229473114, + "learning_rate": 0.0014446382690484997, + "loss": 1.5761, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.3356614410877228, + "learning_rate": 0.0014436877059517215, + "loss": 1.5561, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.32167506217956543, + "learning_rate": 0.0014427293697454803, + "loss": 1.5594, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.34562185406684875, + "learning_rate": 0.001441763271168401, + "loss": 1.5668, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.3838872015476227, + "learning_rate": 0.00144078942104609, + "loss": 1.5561, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.3552131652832031, + "learning_rate": 0.001439807830291013, + "loss": 1.5432, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.3455946743488312, + "learning_rate": 0.0014388185099023744, + "loss": 1.55, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.3956320583820343, + "learning_rate": 0.0014378214709659916, + "loss": 1.5514, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.4718681573867798, + "learning_rate": 0.0014368167246541733, + "loss": 1.5419, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.37479954957962036, + "learning_rate": 0.0014358042822255918, + "loss": 1.5427, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.384405255317688, + "learning_rate": 0.0014347841550251597, + "loss": 1.5463, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.41001036763191223, + "learning_rate": 0.0014337563544838997, + "loss": 1.5386, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.4328341782093048, + "learning_rate": 0.001432720892118819, + "loss": 1.5383, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.4074663519859314, + "learning_rate": 0.0014316777795327794, + "loss": 1.5296, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.45945727825164795, + "learning_rate": 0.001430627028414366, + "loss": 1.5394, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.582301676273346, + "learning_rate": 0.0014295686505377586, + "loss": 1.5195, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.42094552516937256, + "learning_rate": 0.0014285026577625982, + "loss": 1.5244, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.36885884404182434, + "learning_rate": 0.0014274290620338542, + "loss": 1.5288, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.3604186773300171, + "learning_rate": 0.0014263478753816906, + "loss": 1.521, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.35973232984542847, + "learning_rate": 0.0014252591099213326, + "loss": 1.5165, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.3624361455440521, + "learning_rate": 0.001424162777852928, + "loss": 1.5175, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.35657012462615967, + "learning_rate": 0.0014230588914614134, + "loss": 1.5148, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.44882655143737793, + "learning_rate": 0.0014219474631163745, + "loss": 1.5112, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.4358622133731842, + "learning_rate": 0.001420828505271909, + "loss": 1.5153, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.42174819111824036, + "learning_rate": 0.0014197020304664856, + "loss": 1.5155, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.40337657928466797, + "learning_rate": 0.0014185680513228048, + "loss": 1.5093, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.43946516513824463, + "learning_rate": 0.0014174265805476564, + "loss": 1.5124, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.5411512851715088, + "learning_rate": 0.0014162776309317778, + "loss": 1.5142, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.3460575044155121, + "learning_rate": 0.0014151212153497108, + "loss": 1.4959, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.4668763279914856, + "learning_rate": 0.0014139573467596561, + "loss": 1.4874, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.3798237144947052, + "learning_rate": 0.00141278603820333, + "loss": 1.4891, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.3989150822162628, + "learning_rate": 0.0014116073028058165, + "loss": 1.4922, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.3972000181674957, + "learning_rate": 0.0014104211537754217, + "loss": 1.4833, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.4881563186645508, + "learning_rate": 0.001409227604403524, + "loss": 1.4954, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.40641921758651733, + "learning_rate": 0.0014080266680644277, + "loss": 1.49, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.3936208188533783, + "learning_rate": 0.0014068183582152103, + "loss": 1.4916, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.3647351562976837, + "learning_rate": 0.001405602688395574, + "loss": 1.4895, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.4019123613834381, + "learning_rate": 0.0014043796722276924, + "loss": 1.4603, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.39883333444595337, + "learning_rate": 0.0014031493234160591, + "loss": 1.4766, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.36587414145469666, + "learning_rate": 0.0014019116557473332, + "loss": 1.4798, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.44427385926246643, + "learning_rate": 0.0014006666830901854, + "loss": 1.4702, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.34642136096954346, + "learning_rate": 0.001399414419395142, + "loss": 1.4708, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.4188520014286041, + "learning_rate": 0.0013981548786944293, + "loss": 1.4838, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.4440024495124817, + "learning_rate": 0.0013968880751018158, + "loss": 1.4674, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.5161822438240051, + "learning_rate": 0.0013956140228124545, + "loss": 1.4627, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.43483030796051025, + "learning_rate": 0.0013943327361027231, + "loss": 1.482, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.3826151490211487, + "learning_rate": 0.0013930442293300649, + "loss": 1.4594, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.351190984249115, + "learning_rate": 0.0013917485169328279, + "loss": 1.4576, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.38680753111839294, + "learning_rate": 0.0013904456134301016, + "loss": 1.4569, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.34136396646499634, + "learning_rate": 0.0013891355334215562, + "loss": 1.459, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.35594695806503296, + "learning_rate": 0.0013878182915872776, + "loss": 1.4808, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.3368285596370697, + "learning_rate": 0.001386493902687604, + "loss": 1.4578, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.33792853355407715, + "learning_rate": 0.00138516238156296, + "loss": 1.4599, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.35535696148872375, + "learning_rate": 0.0013838237431336895, + "loss": 1.4613, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.3883817791938782, + "learning_rate": 0.0013824780023998899, + "loss": 1.463, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.3484611511230469, + "learning_rate": 0.0013811251744412431, + "loss": 1.4543, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.3812905550003052, + "learning_rate": 0.0013797652744168473, + "loss": 1.4457, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.3451510965824127, + "learning_rate": 0.0013783983175650457, + "loss": 1.4416, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.391886442899704, + "learning_rate": 0.0013770243192032581, + "loss": 1.4458, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.3714632987976074, + "learning_rate": 0.0013756432947278064, + "loss": 1.447, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.35794293880462646, + "learning_rate": 0.0013742552596137444, + "loss": 1.4432, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.350976824760437, + "learning_rate": 0.0013728602294146833, + "loss": 1.4472, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.40909984707832336, + "learning_rate": 0.0013714582197626175, + "loss": 1.4506, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.3741796612739563, + "learning_rate": 0.0013700492463677501, + "loss": 1.4352, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.36106088757514954, + "learning_rate": 0.0013686333250183154, + "loss": 1.4426, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.3645462095737457, + "learning_rate": 0.001367210471580404, + "loss": 1.437, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.39476481080055237, + "learning_rate": 0.0013657807019977835, + "loss": 1.4379, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.3888195753097534, + "learning_rate": 0.0013643440322917198, + "loss": 1.4382, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.37703272700309753, + "learning_rate": 0.0013629004785607989, + "loss": 1.4363, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.3618707060813904, + "learning_rate": 0.0013614500569807445, + "loss": 1.4306, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.39237603545188904, + "learning_rate": 0.0013599927838042394, + "loss": 1.4269, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.3404817283153534, + "learning_rate": 0.0013585286753607408, + "loss": 1.4289, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.37156736850738525, + "learning_rate": 0.0013570577480562986, + "loss": 1.44, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.3893108069896698, + "learning_rate": 0.0013555800183733717, + "loss": 1.4195, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.3751927316188812, + "learning_rate": 0.0013540955028706425, + "loss": 1.418, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.35814914107322693, + "learning_rate": 0.0013526042181828324, + "loss": 1.4193, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.36299997568130493, + "learning_rate": 0.0013511061810205143, + "loss": 1.4257, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.4335196614265442, + "learning_rate": 0.001349601408169926, + "loss": 1.4377, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.41183096170425415, + "learning_rate": 0.0013480899164927823, + "loss": 1.4227, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.3383892774581909, + "learning_rate": 0.0013465717229260853, + "loss": 1.4217, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.4018089175224304, + "learning_rate": 0.001345046844481935, + "loss": 1.418, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.3578946590423584, + "learning_rate": 0.0013435152982473396, + "loss": 1.4104, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.35767829418182373, + "learning_rate": 0.0013419771013840217, + "loss": 1.4171, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.37678778171539307, + "learning_rate": 0.001340432271128229, + "loss": 1.421, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.3800208866596222, + "learning_rate": 0.0013388808247905381, + "loss": 1.4097, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.41718804836273193, + "learning_rate": 0.0013373227797556634, + "loss": 1.4164, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.39602622389793396, + "learning_rate": 0.00133575815348226, + "loss": 1.4052, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.33915984630584717, + "learning_rate": 0.0013341869635027292, + "loss": 1.405, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.387729674577713, + "learning_rate": 0.001332609227423022, + "loss": 1.4054, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.37279507517814636, + "learning_rate": 0.0013310249629224417, + "loss": 1.4021, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.43018755316734314, + "learning_rate": 0.0013294341877534454, + "loss": 1.4107, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.36134418845176697, + "learning_rate": 0.0013278369197414458, + "loss": 1.4214, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.34999746084213257, + "learning_rate": 0.0013262331767846104, + "loss": 1.4059, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.3411415219306946, + "learning_rate": 0.0013246229768536628, + "loss": 1.3894, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.48818546533584595, + "learning_rate": 0.001323006337991679, + "loss": 1.4056, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.3689866364002228, + "learning_rate": 0.0013213832783138873, + "loss": 1.4079, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.47169798612594604, + "learning_rate": 0.0013197538160074633, + "loss": 1.3925, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.36988016963005066, + "learning_rate": 0.0013181179693313283, + "loss": 1.4046, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.3319711983203888, + "learning_rate": 0.0013164757566159428, + "loss": 1.397, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.3849964141845703, + "learning_rate": 0.001314827196263102, + "loss": 1.3931, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.355008602142334, + "learning_rate": 0.0013131723067457302, + "loss": 1.3916, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.40703532099723816, + "learning_rate": 0.0013115111066076721, + "loss": 1.3824, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.36470040678977966, + "learning_rate": 0.0013098436144634862, + "loss": 1.4164, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.3571237623691559, + "learning_rate": 0.0013081698489982364, + "loss": 1.402, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.39680442214012146, + "learning_rate": 0.001306489828967282, + "loss": 1.3905, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.39895832538604736, + "learning_rate": 0.0013048035731960679, + "loss": 1.3893, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.373119980096817, + "learning_rate": 0.0013031111005799133, + "loss": 1.3882, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.3598041534423828, + "learning_rate": 0.0013014124300838004, + "loss": 1.3977, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.3711779713630676, + "learning_rate": 0.0012997075807421612, + "loss": 1.3823, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.34823834896087646, + "learning_rate": 0.0012979965716586653, + "loss": 1.3766, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.3646671175956726, + "learning_rate": 0.0012962794220060048, + "loss": 1.3777, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.43606895208358765, + "learning_rate": 0.0012945561510256801, + "loss": 1.3853, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.3553369641304016, + "learning_rate": 0.001292826778027784, + "loss": 1.3832, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.33519911766052246, + "learning_rate": 0.0012910913223907856, + "loss": 1.3786, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.3543609082698822, + "learning_rate": 0.0012893498035613123, + "loss": 1.3813, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.35886815190315247, + "learning_rate": 0.001287602241053933, + "loss": 1.381, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.36419469118118286, + "learning_rate": 0.0012858486544509392, + "loss": 1.3838, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.377615749835968, + "learning_rate": 0.0012840890634021249, + "loss": 1.377, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.34815406799316406, + "learning_rate": 0.0012823234876245667, + "loss": 1.3734, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.3714103698730469, + "learning_rate": 0.0012805519469024035, + "loss": 1.3823, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.395673930644989, + "learning_rate": 0.0012787744610866143, + "loss": 1.3688, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.34790366888046265, + "learning_rate": 0.0012769910500947954, + "loss": 1.3829, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.34407252073287964, + "learning_rate": 0.0012752017339109376, + "loss": 1.3743, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.3564300835132599, + "learning_rate": 0.0012734065325852029, + "loss": 1.3773, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.3561383783817291, + "learning_rate": 0.0012716054662336987, + "loss": 1.3661, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.46398332715034485, + "learning_rate": 0.001269798555038252, + "loss": 1.3706, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.34344884753227234, + "learning_rate": 0.0012679858192461864, + "loss": 1.3699, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.40484264492988586, + "learning_rate": 0.0012661672791700906, + "loss": 1.3746, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.3783828318119049, + "learning_rate": 0.0012643429551875945, + "loss": 1.3701, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.4477307200431824, + "learning_rate": 0.0012625128677411388, + "loss": 1.3695, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.47914379835128784, + "learning_rate": 0.0012606770373377475, + "loss": 1.3659, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.35758259892463684, + "learning_rate": 0.0012588354845487959, + "loss": 1.3724, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.3747115135192871, + "learning_rate": 0.001256988230009783, + "loss": 1.3646, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.3435615301132202, + "learning_rate": 0.0012551352944200976, + "loss": 1.3652, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.3976442217826843, + "learning_rate": 0.0012532766985427874, + "loss": 1.3688, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.3415212631225586, + "learning_rate": 0.0012514124632043272, + "loss": 1.3703, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.3382601737976074, + "learning_rate": 0.0012495426092943842, + "loss": 1.3649, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.35768750309944153, + "learning_rate": 0.0012476671577655845, + "loss": 1.3642, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.3365379869937897, + "learning_rate": 0.0012457861296332774, + "loss": 1.3542, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.363199383020401, + "learning_rate": 0.001243899545975303, + "loss": 1.3624, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.3533865213394165, + "learning_rate": 0.0012420074279317515, + "loss": 1.3562, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.44423454999923706, + "learning_rate": 0.0012401097967047298, + "loss": 1.3517, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.36125123500823975, + "learning_rate": 0.001238206673558122, + "loss": 1.3569, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.35034412145614624, + "learning_rate": 0.0012362980798173526, + "loss": 1.3548, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.3607058823108673, + "learning_rate": 0.0012343840368691462, + "loss": 1.3552, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.36174681782722473, + "learning_rate": 0.0012324645661612886, + "loss": 1.3468, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.37004148960113525, + "learning_rate": 0.0012305396892023867, + "loss": 1.356, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.34047403931617737, + "learning_rate": 0.0012286094275616264, + "loss": 1.3549, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.3488231301307678, + "learning_rate": 0.0012266738028685318, + "loss": 1.3395, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.3499659597873688, + "learning_rate": 0.001224732836812723, + "loss": 1.3459, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.3290250599384308, + "learning_rate": 0.0012227865511436724, + "loss": 1.3501, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.4917152523994446, + "learning_rate": 0.001220834967670461, + "loss": 1.3623, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.3639252185821533, + "learning_rate": 0.0012188781082615346, + "loss": 1.3574, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.34025079011917114, + "learning_rate": 0.0012169159948444588, + "loss": 1.3537, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.342074453830719, + "learning_rate": 0.001214948649405672, + "loss": 1.3491, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.3434010446071625, + "learning_rate": 0.0012129760939902407, + "loss": 1.3473, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.37486496567726135, + "learning_rate": 0.0012109983507016114, + "loss": 1.3492, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.3642338812351227, + "learning_rate": 0.0012090154417013636, + "loss": 1.3532, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.31444185972213745, + "learning_rate": 0.0012070273892089605, + "loss": 1.3255, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.3436901271343231, + "learning_rate": 0.0012050342155015012, + "loss": 1.3333, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.3226953148841858, + "learning_rate": 0.0012030359429134707, + "loss": 1.3377, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.32987070083618164, + "learning_rate": 0.0012010325938364883, + "loss": 1.3423, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.3697974979877472, + "learning_rate": 0.0011990241907190592, + "loss": 1.3434, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.35540589690208435, + "learning_rate": 0.001197010756066321, + "loss": 1.3316, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.3480130434036255, + "learning_rate": 0.0011949923124397917, + "loss": 1.3328, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.3540296256542206, + "learning_rate": 0.001192968882457118, + "loss": 1.3406, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.351339727640152, + "learning_rate": 0.001190940488791821, + "loss": 1.3406, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.393935889005661, + "learning_rate": 0.0011889071541730419, + "loss": 1.3353, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.34540510177612305, + "learning_rate": 0.001186868901385288, + "loss": 1.3329, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.36295583844184875, + "learning_rate": 0.001184825753268177, + "loss": 1.3359, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.33286401629447937, + "learning_rate": 0.0011827777327161814, + "loss": 1.3453, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.36974620819091797, + "learning_rate": 0.0011807248626783714, + "loss": 1.3248, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.3501949608325958, + "learning_rate": 0.0011786671661581584, + "loss": 1.3218, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.3529147207736969, + "learning_rate": 0.001176604666213036, + "loss": 1.3278, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.33370137214660645, + "learning_rate": 0.0011745373859543236, + "loss": 1.3411, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.3584390878677368, + "learning_rate": 0.0011724653485469063, + "loss": 1.3221, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.3395592272281647, + "learning_rate": 0.0011703885772089743, + "loss": 1.3285, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.33367520570755005, + "learning_rate": 0.0011683070952117646, + "loss": 1.3291, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.4275783896446228, + "learning_rate": 0.0011662209258792998, + "loss": 1.3179, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.385898619890213, + "learning_rate": 0.0011641300925881257, + "loss": 1.3256, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.3597271144390106, + "learning_rate": 0.0011620346187670501, + "loss": 1.3219, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.34854650497436523, + "learning_rate": 0.0011599345278968806, + "loss": 1.3354, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.34655341506004333, + "learning_rate": 0.0011578298435101604, + "loss": 1.3218, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.34645092487335205, + "learning_rate": 0.0011557205891909062, + "loss": 1.3141, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.338767945766449, + "learning_rate": 0.0011536067885743423, + "loss": 1.3299, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.34491071105003357, + "learning_rate": 0.001151488465346637, + "loss": 1.3169, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.3524955213069916, + "learning_rate": 0.0011493656432446362, + "loss": 1.3186, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.37061384320259094, + "learning_rate": 0.0011472383460555983, + "loss": 1.3234, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.36625683307647705, + "learning_rate": 0.001145106597616927, + "loss": 1.3338, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.384627103805542, + "learning_rate": 0.001142970421815904, + "loss": 1.3098, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.3396291136741638, + "learning_rate": 0.0011408298425894226, + "loss": 1.3219, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.3925064206123352, + "learning_rate": 0.0011386848839237186, + "loss": 1.3212, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.3743514120578766, + "learning_rate": 0.0011365355698541005, + "loss": 1.3204, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.37474945187568665, + "learning_rate": 0.0011343819244646824, + "loss": 1.3147, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.41234973073005676, + "learning_rate": 0.001132223971888112, + "loss": 1.3149, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.3496325612068176, + "learning_rate": 0.0011300617363053024, + "loss": 1.31, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.3794001042842865, + "learning_rate": 0.0011278952419451586, + "loss": 1.3232, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.38375622034072876, + "learning_rate": 0.0011257245130843077, + "loss": 1.3148, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.3591216802597046, + "learning_rate": 0.0011235495740468265, + "loss": 1.3092, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.3375595211982727, + "learning_rate": 0.0011213704492039694, + "loss": 1.29, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.37955981492996216, + "learning_rate": 0.001119187162973894, + "loss": 1.3161, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.35981568694114685, + "learning_rate": 0.001116999739821388, + "loss": 1.3048, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.35022369027137756, + "learning_rate": 0.0011148082042575968, + "loss": 1.3239, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.3633371889591217, + "learning_rate": 0.0011126125808397461, + "loss": 1.322, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.4322781264781952, + "learning_rate": 0.0011104128941708683, + "loss": 1.3041, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.34436267614364624, + "learning_rate": 0.001108209168899527, + "loss": 1.3112, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.3594895601272583, + "learning_rate": 0.0011060014297195396, + "loss": 1.3094, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.35558730363845825, + "learning_rate": 0.0011037897013697015, + "loss": 1.3153, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.3509814441204071, + "learning_rate": 0.0011015740086335092, + "loss": 1.303, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.39170390367507935, + "learning_rate": 0.0010993543763388814, + "loss": 1.3075, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.38959237933158875, + "learning_rate": 0.0010971308293578814, + "loss": 1.3028, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.4082445204257965, + "learning_rate": 0.0010949033926064397, + "loss": 1.3028, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.45328179001808167, + "learning_rate": 0.0010926720910440725, + "loss": 1.3113, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.40795767307281494, + "learning_rate": 0.001090436949673603, + "loss": 1.3048, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.34642016887664795, + "learning_rate": 0.0010881979935408815, + "loss": 1.2998, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.4289262592792511, + "learning_rate": 0.0010859552477345052, + "loss": 1.3076, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.3674197494983673, + "learning_rate": 0.001083708737385536, + "loss": 1.3056, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.3692392110824585, + "learning_rate": 0.0010814584876672187, + "loss": 1.2879, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.3641003966331482, + "learning_rate": 0.0010792045237947008, + "loss": 1.2997, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.3449475169181824, + "learning_rate": 0.0010769468710247478, + "loss": 1.3001, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.3534505367279053, + "learning_rate": 0.0010746855546554612, + "loss": 1.2934, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.383791983127594, + "learning_rate": 0.0010724206000259954, + "loss": 1.2894, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.3717612028121948, + "learning_rate": 0.0010701520325162727, + "loss": 1.3072, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.3450316786766052, + "learning_rate": 0.0010678798775467001, + "loss": 1.3092, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.3202390670776367, + "learning_rate": 0.0010656041605778832, + "loss": 1.3014, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.3394745886325836, + "learning_rate": 0.001063324907110342, + "loss": 1.2814, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.3754570782184601, + "learning_rate": 0.0010610421426842241, + "loss": 1.3015, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.367051899433136, + "learning_rate": 0.00105875589287902, + "loss": 1.2982, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.39652103185653687, + "learning_rate": 0.0010564661833132752, + "loss": 1.3068, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.35056713223457336, + "learning_rate": 0.001054173039644303, + "loss": 1.2984, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.37702634930610657, + "learning_rate": 0.0010518764875678981, + "loss": 1.2922, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.35276398062705994, + "learning_rate": 0.001049576552818048, + "loss": 1.2941, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.3581322729587555, + "learning_rate": 0.0010472732611666448, + "loss": 1.2877, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.4018959701061249, + "learning_rate": 0.0010449666384231954, + "loss": 1.2892, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.382752925157547, + "learning_rate": 0.0010426567104345346, + "loss": 1.3005, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.37618669867515564, + "learning_rate": 0.0010403435030845332, + "loss": 1.2861, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.35512155294418335, + "learning_rate": 0.0010380270422938093, + "loss": 1.284, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.47898605465888977, + "learning_rate": 0.0010357073540194362, + "loss": 1.2887, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.3588807284832001, + "learning_rate": 0.001033384464254655, + "loss": 1.2857, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.3950936198234558, + "learning_rate": 0.001031058399028579, + "loss": 1.2873, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.3477298319339752, + "learning_rate": 0.001028729184405905, + "loss": 1.2954, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.36207327246665955, + "learning_rate": 0.0010263968464866201, + "loss": 1.2892, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.44805607199668884, + "learning_rate": 0.0010240614114057098, + "loss": 1.2815, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.43384575843811035, + "learning_rate": 0.001021722905332864, + "loss": 1.2851, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.35028716921806335, + "learning_rate": 0.0010193813544721855, + "loss": 1.2895, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.3653584122657776, + "learning_rate": 0.001017036785061895, + "loss": 1.2941, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.34270143508911133, + "learning_rate": 0.0010146892233740376, + "loss": 1.2841, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.41713616251945496, + "learning_rate": 0.0010123386957141883, + "loss": 1.2662, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.3545433282852173, + "learning_rate": 0.0010099852284211573, + "loss": 1.2843, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.3524881601333618, + "learning_rate": 0.0010076288478666944, + "loss": 1.2817, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.3451279103755951, + "learning_rate": 0.0010052695804551946, + "loss": 1.2877, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.36001715064048767, + "learning_rate": 0.0010029074526234014, + "loss": 1.275, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.3992428183555603, + "learning_rate": 0.0010005424908401104, + "loss": 1.2909, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.3687851130962372, + "learning_rate": 0.0009981747216058728, + "loss": 1.2761, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.39275357127189636, + "learning_rate": 0.0009958041714526998, + "loss": 1.2806, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.3487364947795868, + "learning_rate": 0.0009934308669437627, + "loss": 1.2866, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.3634994924068451, + "learning_rate": 0.0009910548346730972, + "loss": 1.2745, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.35668596625328064, + "learning_rate": 0.0009886761012653062, + "loss": 1.2684, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.3821624517440796, + "learning_rate": 0.000986294693375258, + "loss": 1.2716, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.3544701039791107, + "learning_rate": 0.000983910637687791, + "loss": 1.2849, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.3593977987766266, + "learning_rate": 0.0009815239609174138, + "loss": 1.268, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.3400956988334656, + "learning_rate": 0.0009791346898080043, + "loss": 1.28, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.3652053475379944, + "learning_rate": 0.0009767428511325122, + "loss": 1.2733, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.37829872965812683, + "learning_rate": 0.0009743484716926576, + "loss": 1.2622, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.36608776450157166, + "learning_rate": 0.0009719515783186319, + "loss": 1.2642, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.3877875506877899, + "learning_rate": 0.0009695521978687951, + "loss": 1.2656, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.37717291712760925, + "learning_rate": 0.0009671503572293767, + "loss": 1.2705, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.3739968538284302, + "learning_rate": 0.0009647460833141742, + "loss": 1.2633, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.3548559546470642, + "learning_rate": 0.0009623394030642507, + "loss": 1.2697, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.3345405161380768, + "learning_rate": 0.0009599303434476334, + "loss": 1.2639, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.3435943126678467, + "learning_rate": 0.0009575189314590118, + "loss": 1.2698, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.3582841157913208, + "learning_rate": 0.0009551051941194346, + "loss": 1.2699, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.39052221179008484, + "learning_rate": 0.0009526891584760071, + "loss": 1.2617, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.34528857469558716, + "learning_rate": 0.0009502708516015889, + "loss": 1.2704, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.35940083861351013, + "learning_rate": 0.0009478503005944888, + "loss": 1.2612, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.4698091447353363, + "learning_rate": 0.0009454275325781632, + "loss": 1.2752, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.35269781947135925, + "learning_rate": 0.0009430025747009104, + "loss": 1.2627, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.35480797290802, + "learning_rate": 0.0009405754541355677, + "loss": 1.2644, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.35654541850090027, + "learning_rate": 0.0009381461980792061, + "loss": 1.2581, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.4350382387638092, + "learning_rate": 0.0009357148337528256, + "loss": 1.265, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.43560877442359924, + "learning_rate": 0.0009332813884010511, + "loss": 1.2698, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.3371978998184204, + "learning_rate": 0.0009308458892918259, + "loss": 1.271, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.3576991856098175, + "learning_rate": 0.0009284083637161064, + "loss": 1.2678, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.39786258339881897, + "learning_rate": 0.0009259688389875574, + "loss": 1.2712, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.40100061893463135, + "learning_rate": 0.0009235273424422442, + "loss": 1.264, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.3555314242839813, + "learning_rate": 0.0009210839014383282, + "loss": 1.2598, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.38855957984924316, + "learning_rate": 0.0009186385433557584, + "loss": 1.2577, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.42936259508132935, + "learning_rate": 0.0009161912955959668, + "loss": 1.2618, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.3469286561012268, + "learning_rate": 0.000913742185581559, + "loss": 1.2507, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.338794082403183, + "learning_rate": 0.0009112912407560086, + "loss": 1.2559, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.38378769159317017, + "learning_rate": 0.0009088384885833495, + "loss": 1.2606, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.37784048914909363, + "learning_rate": 0.000906383956547867, + "loss": 1.2539, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.3505966067314148, + "learning_rate": 0.0009039276721537915, + "loss": 1.2546, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.37530627846717834, + "learning_rate": 0.0009014696629249886, + "loss": 1.253, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.37926673889160156, + "learning_rate": 0.0008990099564046522, + "loss": 1.258, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.3719365894794464, + "learning_rate": 0.0008965485801549946, + "loss": 1.2585, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.34479984641075134, + "learning_rate": 0.000894085561756939, + "loss": 1.2487, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.3615456521511078, + "learning_rate": 0.0008916209288098088, + "loss": 1.257, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.3427673280239105, + "learning_rate": 0.0008891547089310198, + "loss": 1.2533, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.34341564774513245, + "learning_rate": 0.0008866869297557699, + "loss": 1.2475, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.34663861989974976, + "learning_rate": 0.0008842176189367299, + "loss": 1.254, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.35745692253112793, + "learning_rate": 0.0008817468041437329, + "loss": 1.2599, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.35569432377815247, + "learning_rate": 0.0008792745130634654, + "loss": 1.2498, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.34108027815818787, + "learning_rate": 0.0008768007733991561, + "loss": 1.2439, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.3217419385910034, + "learning_rate": 0.0008743256128702658, + "loss": 1.255, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.39865434169769287, + "learning_rate": 0.0008718490592121768, + "loss": 1.2542, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.3799431025981903, + "learning_rate": 0.0008693711401758822, + "loss": 1.2478, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.3823153078556061, + "learning_rate": 0.0008668918835276747, + "loss": 1.2481, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.3550722599029541, + "learning_rate": 0.0008644113170488355, + "loss": 1.2545, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.3509629964828491, + "learning_rate": 0.0008619294685353235, + "loss": 1.2467, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.36587169766426086, + "learning_rate": 0.0008594463657974627, + "loss": 1.2447, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.3689899146556854, + "learning_rate": 0.0008569620366596322, + "loss": 1.2618, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.3530426621437073, + "learning_rate": 0.000854476508959953, + "loss": 1.2376, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.35614463686943054, + "learning_rate": 0.0008519898105499762, + "loss": 1.2429, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.351806104183197, + "learning_rate": 0.0008495019692943721, + "loss": 1.2407, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.37533193826675415, + "learning_rate": 0.0008470130130706166, + "loss": 1.2446, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.3407420814037323, + "learning_rate": 0.0008445229697686795, + "loss": 1.259, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.3381921648979187, + "learning_rate": 0.0008420318672907119, + "loss": 1.2444, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.33693447709083557, + "learning_rate": 0.0008395397335507334, + "loss": 1.2468, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.39592495560646057, + "learning_rate": 0.0008370465964743196, + "loss": 1.2399, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.3851742446422577, + "learning_rate": 0.0008345524839982886, + "loss": 1.2503, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.3582525849342346, + "learning_rate": 0.0008320574240703886, + "loss": 1.2354, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.3568393290042877, + "learning_rate": 0.0008295614446489842, + "loss": 1.2437, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.3440687656402588, + "learning_rate": 0.0008270645737027441, + "loss": 1.2495, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.3949049711227417, + "learning_rate": 0.0008245668392103259, + "loss": 1.2386, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.33692407608032227, + "learning_rate": 0.0008220682691600645, + "loss": 1.2374, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.3463355600833893, + "learning_rate": 0.0008195688915496571, + "loss": 1.2433, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.34808549284935, + "learning_rate": 0.0008170687343858506, + "loss": 1.2243, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.49410006403923035, + "learning_rate": 0.0008145678256841265, + "loss": 1.2453, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.3992741107940674, + "learning_rate": 0.0008120661934683879, + "loss": 1.2427, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.34049564599990845, + "learning_rate": 0.0008095638657706456, + "loss": 1.2424, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.33464476466178894, + "learning_rate": 0.000807060870630703, + "loss": 1.2317, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.356582909822464, + "learning_rate": 0.000804557236095843, + "loss": 1.2142, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.39746856689453125, + "learning_rate": 0.0008020529902205129, + "loss": 1.2384, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.36983558535575867, + "learning_rate": 0.0007995481610660108, + "loss": 1.2247, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.3777245581150055, + "learning_rate": 0.0007970427767001702, + "loss": 1.241, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.35135287046432495, + "learning_rate": 0.0007945368651970464, + "loss": 1.2422, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.3400181829929352, + "learning_rate": 0.0007920304546366013, + "loss": 1.2234, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.41734570264816284, + "learning_rate": 0.000789523573104389, + "loss": 1.2398, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.3581525385379791, + "learning_rate": 0.0007870162486912414, + "loss": 1.2394, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.35023796558380127, + "learning_rate": 0.0007845085094929527, + "loss": 1.2366, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.39396238327026367, + "learning_rate": 0.0007820003836099649, + "loss": 1.232, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.38713324069976807, + "learning_rate": 0.0007794918991470537, + "loss": 1.2324, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.3926469385623932, + "learning_rate": 0.0007769830842130119, + "loss": 1.2275, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.32987460494041443, + "learning_rate": 0.0007744739669203361, + "loss": 1.2215, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.37924712896347046, + "learning_rate": 0.0007719645753849108, + "loss": 1.2297, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.3345460593700409, + "learning_rate": 0.0007694549377256932, + "loss": 1.2321, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.4350821077823639, + "learning_rate": 0.0007669450820643987, + "loss": 1.2302, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.36744949221611023, + "learning_rate": 0.0007644350365251855, + "loss": 1.2244, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.35269591212272644, + "learning_rate": 0.0007619248292343399, + "loss": 1.2237, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.3423689603805542, + "learning_rate": 0.0007594144883199599, + "loss": 1.2441, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.33250823616981506, + "learning_rate": 0.0007569040419116413, + "loss": 1.2274, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.4062730073928833, + "learning_rate": 0.000754393518140162, + "loss": 1.2284, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.33738499879837036, + "learning_rate": 0.0007518829451371665, + "loss": 1.2336, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.3440134525299072, + "learning_rate": 0.0007493723510348516, + "loss": 1.22, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.35827070474624634, + "learning_rate": 0.0007468617639656496, + "loss": 1.2188, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.3508043885231018, + "learning_rate": 0.0007443512120619144, + "loss": 1.2208, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.3430309593677521, + "learning_rate": 0.0007418407234556067, + "loss": 1.2432, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.43787845969200134, + "learning_rate": 0.0007393303262779767, + "loss": 1.2253, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.39787721633911133, + "learning_rate": 0.0007368200486592507, + "loss": 1.2201, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.40354400873184204, + "learning_rate": 0.0007343099187283149, + "loss": 1.2322, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.4470100402832031, + "learning_rate": 0.0007317999646124011, + "loss": 1.2214, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.3791539967060089, + "learning_rate": 0.0007292902144367704, + "loss": 1.2157, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.3646821081638336, + "learning_rate": 0.0007267806963243995, + "loss": 1.2323, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.34422731399536133, + "learning_rate": 0.0007242714383956639, + "loss": 1.2224, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.3907710313796997, + "learning_rate": 0.000721762468768024, + "loss": 1.2291, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.37104958295822144, + "learning_rate": 0.0007192538155557094, + "loss": 1.2241, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.4097077250480652, + "learning_rate": 0.0007167455068694046, + "loss": 1.2126, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.3646121919155121, + "learning_rate": 0.000714237570815933, + "loss": 1.2246, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.39202240109443665, + "learning_rate": 0.0007117300354979423, + "loss": 1.2217, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.35184553265571594, + "learning_rate": 0.000709222929013591, + "loss": 1.2289, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.39552363753318787, + "learning_rate": 0.0007067162794562309, + "loss": 1.212, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.3939037024974823, + "learning_rate": 0.0007042101149140943, + "loss": 1.219, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.3670158088207245, + "learning_rate": 0.0007017044634699787, + "loss": 1.2112, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.3672795295715332, + "learning_rate": 0.0006991993532009319, + "loss": 1.2155, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.37238597869873047, + "learning_rate": 0.0006966948121779378, + "loss": 1.2237, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.34321847558021545, + "learning_rate": 0.000694190868465601, + "loss": 1.2167, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.35932210087776184, + "learning_rate": 0.0006916875501218343, + "loss": 1.2118, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.33948570489883423, + "learning_rate": 0.0006891848851975416, + "loss": 1.197, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.3529501259326935, + "learning_rate": 0.0006866829017363054, + "loss": 1.2252, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.3476143479347229, + "learning_rate": 0.0006841816277740722, + "loss": 1.2125, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.38103732466697693, + "learning_rate": 0.0006816810913388379, + "loss": 1.2257, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.3743418753147125, + "learning_rate": 0.0006791813204503342, + "loss": 1.2119, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.3913949131965637, + "learning_rate": 0.0006766823431197147, + "loss": 1.2239, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.36954477429389954, + "learning_rate": 0.0006741841873492406, + "loss": 1.215, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.3766242563724518, + "learning_rate": 0.0006716868811319671, + "loss": 1.2004, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.36319437623023987, + "learning_rate": 0.0006691904524514297, + "loss": 1.2125, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.35376471281051636, + "learning_rate": 0.0006666949292813306, + "loss": 1.2082, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.3592052757740021, + "learning_rate": 0.0006642003395852258, + "loss": 1.2081, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.3607555031776428, + "learning_rate": 0.0006617067113162103, + "loss": 1.2217, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.34854963421821594, + "learning_rate": 0.0006592140724166073, + "loss": 1.2167, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.3794786036014557, + "learning_rate": 0.0006567224508176523, + "loss": 1.2116, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.352797269821167, + "learning_rate": 0.0006542318744391821, + "loss": 1.2106, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.3626922369003296, + "learning_rate": 0.0006517423711893209, + "loss": 1.2181, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.3714168667793274, + "learning_rate": 0.0006492539689641685, + "loss": 1.2022, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.39416635036468506, + "learning_rate": 0.0006467666956474865, + "loss": 1.2142, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.36706307530403137, + "learning_rate": 0.0006442805791103873, + "loss": 1.2016, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.3452375829219818, + "learning_rate": 0.0006417956472110205, + "loss": 1.2108, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.4015010893344879, + "learning_rate": 0.0006393119277942614, + "loss": 1.2049, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.42108821868896484, + "learning_rate": 0.0006368294486913987, + "loss": 1.209, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.3588517904281616, + "learning_rate": 0.0006343482377198232, + "loss": 1.2027, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.3523201048374176, + "learning_rate": 0.0006318683226827151, + "loss": 1.2083, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.38106194138526917, + "learning_rate": 0.0006293897313687331, + "loss": 1.2161, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.38883480429649353, + "learning_rate": 0.0006269124915517037, + "loss": 1.2054, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.3482867181301117, + "learning_rate": 0.0006244366309903084, + "loss": 1.2067, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.3491329848766327, + "learning_rate": 0.0006219621774277737, + "loss": 1.2112, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.35099756717681885, + "learning_rate": 0.00061948915859156, + "loss": 1.1956, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.3603163957595825, + "learning_rate": 0.0006170176021930509, + "loss": 1.2005, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.3842976689338684, + "learning_rate": 0.0006145475359272424, + "loss": 1.207, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.35818201303482056, + "learning_rate": 0.0006120789874724336, + "loss": 1.1974, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.34876447916030884, + "learning_rate": 0.0006096119844899151, + "loss": 1.1943, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.3554922938346863, + "learning_rate": 0.0006071465546236601, + "loss": 1.2042, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.3837421238422394, + "learning_rate": 0.0006046827255000135, + "loss": 1.2043, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.4092616140842438, + "learning_rate": 0.0006022205247273845, + "loss": 1.1952, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.37640655040740967, + "learning_rate": 0.0005997599798959343, + "loss": 1.1963, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.35962364077568054, + "learning_rate": 0.0005973011185772694, + "loss": 1.1974, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.37594228982925415, + "learning_rate": 0.0005948439683241318, + "loss": 1.2018, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.36223894357681274, + "learning_rate": 0.0005923885566700896, + "loss": 1.1931, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.3633806109428406, + "learning_rate": 0.0005899349111292293, + "loss": 1.1905, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.38978320360183716, + "learning_rate": 0.0005874830591958474, + "loss": 1.1868, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.35502946376800537, + "learning_rate": 0.000585033028344142, + "loss": 1.1953, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.3547123670578003, + "learning_rate": 0.0005825848460279048, + "loss": 1.1962, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.3606116473674774, + "learning_rate": 0.0005801385396802146, + "loss": 1.1962, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.3635554313659668, + "learning_rate": 0.0005776941367131282, + "loss": 1.1984, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.4081866145133972, + "learning_rate": 0.0005752516645173745, + "loss": 1.2017, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.3537876009941101, + "learning_rate": 0.0005728111504620472, + "loss": 1.1939, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.40251171588897705, + "learning_rate": 0.0005703726218942976, + "loss": 1.1996, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.4112827777862549, + "learning_rate": 0.0005679361061390295, + "loss": 1.192, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.39286091923713684, + "learning_rate": 0.0005655016304985908, + "loss": 1.1874, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.3851916193962097, + "learning_rate": 0.0005630692222524709, + "loss": 1.1848, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.3481987416744232, + "learning_rate": 0.0005606389086569911, + "loss": 1.1994, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.36745893955230713, + "learning_rate": 0.0005582107169450023, + "loss": 1.2026, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.3689424991607666, + "learning_rate": 0.0005557846743255783, + "loss": 1.1887, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.3701568841934204, + "learning_rate": 0.0005533608079837109, + "loss": 1.1889, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.35756173729896545, + "learning_rate": 0.0005509391450800061, + "loss": 1.1906, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.38948529958724976, + "learning_rate": 0.0005485197127503795, + "loss": 1.1877, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.401199072599411, + "learning_rate": 0.0005461025381057516, + "loss": 1.1901, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.3774757385253906, + "learning_rate": 0.0005436876482317444, + "loss": 1.1974, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.3717878460884094, + "learning_rate": 0.0005412750701883782, + "loss": 1.1923, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.4294511675834656, + "learning_rate": 0.0005388648310097682, + "loss": 1.2008, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.36430466175079346, + "learning_rate": 0.000536456957703821, + "loss": 1.1945, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.3893060088157654, + "learning_rate": 0.0005340514772519324, + "loss": 1.1896, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.3386535942554474, + "learning_rate": 0.0005316484166086863, + "loss": 1.1962, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.372712105512619, + "learning_rate": 0.00052924780270155, + "loss": 1.1963, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.36947306990623474, + "learning_rate": 0.0005268496624305747, + "loss": 1.1764, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.35860103368759155, + "learning_rate": 0.0005244540226680931, + "loss": 1.182, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.3623606562614441, + "learning_rate": 0.0005220609102584185, + "loss": 1.1873, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.3721083998680115, + "learning_rate": 0.0005196703520175437, + "loss": 1.1913, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.40760260820388794, + "learning_rate": 0.0005172823747328415, + "loss": 1.1886, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.35366445779800415, + "learning_rate": 0.0005148970051627632, + "loss": 1.185, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.3632625937461853, + "learning_rate": 0.0005125142700365394, + "loss": 1.159, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.40271735191345215, + "learning_rate": 0.000510134196053881, + "loss": 1.1697, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.37240713834762573, + "learning_rate": 0.0005077568098846789, + "loss": 1.1709, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.40513280034065247, + "learning_rate": 0.000505382138168706, + "loss": 1.1806, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.3707410991191864, + "learning_rate": 0.0005030102075153181, + "loss": 1.1779, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.39285629987716675, + "learning_rate": 0.0005006410445031569, + "loss": 1.1814, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.36199751496315, + "learning_rate": 0.0004982746756798507, + "loss": 1.184, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.36564263701438904, + "learning_rate": 0.0004959111275617174, + "loss": 1.1968, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.3686883747577667, + "learning_rate": 0.0004935504266334677, + "loss": 1.1806, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.3595767617225647, + "learning_rate": 0.0004911925993479085, + "loss": 1.1911, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.380021870136261, + "learning_rate": 0.0004888376721256456, + "loss": 1.1966, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.3993105888366699, + "learning_rate": 0.00048648567135478805, + "loss": 1.1838, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.3877832889556885, + "learning_rate": 0.0004841366233906538, + "loss": 1.1741, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.3675520122051239, + "learning_rate": 0.0004817905545554717, + "loss": 1.1869, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.365359365940094, + "learning_rate": 0.00047944749113808884, + "loss": 1.1756, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.34352290630340576, + "learning_rate": 0.00047710745939367474, + "loss": 1.1786, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.3574180006980896, + "learning_rate": 0.0004747704855434278, + "loss": 1.1714, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.3392818868160248, + "learning_rate": 0.0004724365957742809, + "loss": 1.1683, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.33924534916877747, + "learning_rate": 0.00047010581623860883, + "loss": 1.1744, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.36496856808662415, + "learning_rate": 0.0004677781730539342, + "loss": 1.1808, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.35622742772102356, + "learning_rate": 0.0004654536923026356, + "loss": 1.172, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.35895973443984985, + "learning_rate": 0.00046313240003165466, + "loss": 1.1776, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.3521154224872589, + "learning_rate": 0.0004608143222522048, + "loss": 1.1763, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.34627509117126465, + "learning_rate": 0.0004584994849394795, + "loss": 1.1611, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.38166508078575134, + "learning_rate": 0.0004561879140323607, + "loss": 1.1793, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.37901800870895386, + "learning_rate": 0.0004538796354331298, + "loss": 1.1864, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.4082631766796112, + "learning_rate": 0.0004515746750071754, + "loss": 1.1753, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.3545146882534027, + "learning_rate": 0.0004492730585827046, + "loss": 1.1744, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.3988998830318451, + "learning_rate": 0.0004469748119504529, + "loss": 1.1594, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.38194578886032104, + "learning_rate": 0.0004446799608633964, + "loss": 1.1786, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.37773770093917847, + "learning_rate": 0.00044238853103646154, + "loss": 1.1644, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.39295461773872375, + "learning_rate": 0.00044010054814623925, + "loss": 1.18, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.3779560327529907, + "learning_rate": 0.0004378160378306944, + "loss": 1.171, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.36832717061042786, + "learning_rate": 0.00043553502568888095, + "loss": 1.1697, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.3654510974884033, + "learning_rate": 0.0004332575372806534, + "loss": 1.1664, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.4271853268146515, + "learning_rate": 0.00043098359812638145, + "loss": 1.1625, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.3969768285751343, + "learning_rate": 0.00042871323370666383, + "loss": 1.1747, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.3841383755207062, + "learning_rate": 0.0004264464694620421, + "loss": 1.1684, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.3560936450958252, + "learning_rate": 0.000424183330792717, + "loss": 1.1723, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.48002129793167114, + "learning_rate": 0.0004219238430582621, + "loss": 1.1676, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.3844591975212097, + "learning_rate": 0.0004196680315773408, + "loss": 1.1788, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.3658396899700165, + "learning_rate": 0.00041741592162742214, + "loss": 1.1715, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.34545275568962097, + "learning_rate": 0.0004151675384444978, + "loss": 1.1503, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.3446422219276428, + "learning_rate": 0.00041292290722279914, + "loss": 1.1725, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.3521484434604645, + "learning_rate": 0.00041068205311451517, + "loss": 1.174, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.3657844662666321, + "learning_rate": 0.00040844500122951026, + "loss": 1.1691, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.38760536909103394, + "learning_rate": 0.00040621177663504313, + "loss": 1.1634, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.4080621898174286, + "learning_rate": 0.00040398240435548583, + "loss": 1.1728, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.37887656688690186, + "learning_rate": 0.00040175690937204324, + "loss": 1.1616, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.3961949050426483, + "learning_rate": 0.00039953531662247343, + "loss": 1.1635, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.36524301767349243, + "learning_rate": 0.0003973176510008075, + "loss": 1.1672, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.393848717212677, + "learning_rate": 0.00039510393735707233, + "loss": 1.1622, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.36277323961257935, + "learning_rate": 0.00039289420049700986, + "loss": 1.1707, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.3623575270175934, + "learning_rate": 0.0003906884651818006, + "loss": 1.1691, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.4982074797153473, + "learning_rate": 0.00038848675612778577, + "loss": 1.1671, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.35465696454048157, + "learning_rate": 0.00038628909800619046, + "loss": 1.1594, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.37563908100128174, + "learning_rate": 0.0003840955154428467, + "loss": 1.1604, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.3676312565803528, + "learning_rate": 0.00038190603301791864, + "loss": 1.1569, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.42015132308006287, + "learning_rate": 0.0003797206752656258, + "loss": 1.1656, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.36727169156074524, + "learning_rate": 0.0003775394666739688, + "loss": 1.1577, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.37604963779449463, + "learning_rate": 0.00037536243168445507, + "loss": 1.1656, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.3591715097427368, + "learning_rate": 0.0003731895946918246, + "loss": 1.1637, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.4027197062969208, + "learning_rate": 0.0003710209800437769, + "loss": 1.1454, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.34489601850509644, + "learning_rate": 0.00036885661204069767, + "loss": 1.1495, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.3564709722995758, + "learning_rate": 0.0003666965149353878, + "loss": 1.1643, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.4152984917163849, + "learning_rate": 0.0003645407129327898, + "loss": 1.1621, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.407699316740036, + "learning_rate": 0.00036238923018971783, + "loss": 1.1588, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.3636753261089325, + "learning_rate": 0.0003602420908145865, + "loss": 1.1499, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.36881223320961, + "learning_rate": 0.00035809931886714093, + "loss": 1.1594, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.36326852440834045, + "learning_rate": 0.00035596093835818683, + "loss": 1.1439, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.35837841033935547, + "learning_rate": 0.00035382697324932245, + "loss": 1.1491, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.43497446179389954, + "learning_rate": 0.00035169744745266866, + "loss": 1.1627, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.379119336605072, + "learning_rate": 0.0003495723848306017, + "loss": 1.1577, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.3709147274494171, + "learning_rate": 0.0003474518091954859, + "loss": 1.1617, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.36736902594566345, + "learning_rate": 0.0003453357443094068, + "loss": 1.1582, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.391516774892807, + "learning_rate": 0.00034322421388390456, + "loss": 1.171, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.3488520681858063, + "learning_rate": 0.0003411172415797087, + "loss": 1.1706, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.36531731486320496, + "learning_rate": 0.0003390148510064727, + "loss": 1.1581, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.37665581703186035, + "learning_rate": 0.0003369170657225094, + "loss": 1.1484, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.37849798798561096, + "learning_rate": 0.0003348239092345275, + "loss": 1.1588, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.38719886541366577, + "learning_rate": 0.0003327354049973672, + "loss": 1.1545, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.41207027435302734, + "learning_rate": 0.00033065157641373847, + "loss": 1.1541, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.42637428641319275, + "learning_rate": 0.0003285724468339576, + "loss": 1.1508, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.3876138925552368, + "learning_rate": 0.00032649803955568755, + "loss": 1.1502, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.3820565938949585, + "learning_rate": 0.00032442837782367434, + "loss": 1.1524, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.3652574419975281, + "learning_rate": 0.0003223634848294883, + "loss": 1.1583, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.4173029363155365, + "learning_rate": 0.00032030338371126374, + "loss": 1.1439, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.362628310918808, + "learning_rate": 0.0003182480975534395, + "loss": 1.1546, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.4287622272968292, + "learning_rate": 0.00031619764938650057, + "loss": 1.1467, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.3689073324203491, + "learning_rate": 0.0003141520621867197, + "loss": 1.1522, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.38702285289764404, + "learning_rate": 0.00031211135887590074, + "loss": 1.1454, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.3564895689487457, + "learning_rate": 0.0003100755623211205, + "loss": 1.1472, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.36778292059898376, + "learning_rate": 0.0003080446953344735, + "loss": 1.1505, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.35331690311431885, + "learning_rate": 0.00030601878067281575, + "loss": 1.1511, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.36919105052948, + "learning_rate": 0.00030399784103751044, + "loss": 1.144, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.35663625597953796, + "learning_rate": 0.000301981899074173, + "loss": 1.1471, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.3756558299064636, + "learning_rate": 0.0002999709773724171, + "loss": 1.1485, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.3560740053653717, + "learning_rate": 0.00029796509846560294, + "loss": 1.1377, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.3501749336719513, + "learning_rate": 0.0002959642848305828, + "loss": 1.1435, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.36983656883239746, + "learning_rate": 0.00029396855888745045, + "loss": 1.15, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.4085451662540436, + "learning_rate": 0.0002919779429992895, + "loss": 1.1543, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.37220215797424316, + "learning_rate": 0.0002899924594719231, + "loss": 1.1488, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.3557329475879669, + "learning_rate": 0.00028801213055366335, + "loss": 1.1492, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.37931346893310547, + "learning_rate": 0.00028603697843506315, + "loss": 1.1465, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.37470945715904236, + "learning_rate": 0.0002840670252486662, + "loss": 1.1506, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.37293675541877747, + "learning_rate": 0.00028210229306876, + "loss": 1.1384, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.3550741374492645, + "learning_rate": 0.0002801428039111279, + "loss": 1.1451, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.37655240297317505, + "learning_rate": 0.00027818857973280274, + "loss": 1.1438, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.3859023451805115, + "learning_rate": 0.0002762396424318206, + "loss": 1.1437, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.36085501313209534, + "learning_rate": 0.00027429601384697526, + "loss": 1.1343, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.37223494052886963, + "learning_rate": 0.00027235771575757466, + "loss": 1.1437, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.38057178258895874, + "learning_rate": 0.0002704247698831951, + "loss": 1.1365, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.36014029383659363, + "learning_rate": 0.0002684971978834389, + "loss": 1.135, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.34835484623908997, + "learning_rate": 0.0002665750213576914, + "loss": 1.1451, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.36837393045425415, + "learning_rate": 0.0002646582618448794, + "loss": 1.1347, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.37352582812309265, + "learning_rate": 0.00026274694082322896, + "loss": 1.1332, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.42627763748168945, + "learning_rate": 0.0002608410797100255, + "loss": 1.1583, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.41890236735343933, + "learning_rate": 0.0002589406998613733, + "loss": 1.1393, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.3742920756340027, + "learning_rate": 0.0002570458225719567, + "loss": 1.1431, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.38046887516975403, + "learning_rate": 0.00025515646907480074, + "loss": 1.1469, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.3562321364879608, + "learning_rate": 0.00025327266054103395, + "loss": 1.1443, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.35831841826438904, + "learning_rate": 0.0002513944180796509, + "loss": 1.1379, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.3924483358860016, + "learning_rate": 0.0002495217627372752, + "loss": 1.1327, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.36096253991127014, + "learning_rate": 0.0002476547154979248, + "loss": 1.1359, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.37537628412246704, + "learning_rate": 0.00024579329728277534, + "loss": 1.1359, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.37565308809280396, + "learning_rate": 0.00024393752894992708, + "loss": 1.1476, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.35901403427124023, + "learning_rate": 0.00024208743129417004, + "loss": 1.1208, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.39581504464149475, + "learning_rate": 0.00024024302504675206, + "loss": 1.1346, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.3442055881023407, + "learning_rate": 0.0002384043308751454, + "loss": 1.1373, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.3714121878147125, + "learning_rate": 0.00023657136938281653, + "loss": 1.1447, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.38861745595932007, + "learning_rate": 0.00023474416110899377, + "loss": 1.1352, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.36330005526542664, + "learning_rate": 0.00023292272652843807, + "loss": 1.1367, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.3554536998271942, + "learning_rate": 0.00023110708605121317, + "loss": 1.143, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.37097129225730896, + "learning_rate": 0.00022929726002245728, + "loss": 1.1393, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.38714399933815, + "learning_rate": 0.00022749326872215472, + "loss": 1.1365, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.3776571452617645, + "learning_rate": 0.0002256951323649087, + "loss": 1.1327, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.3625503182411194, + "learning_rate": 0.00022390287109971547, + "loss": 1.145, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.41759389638900757, + "learning_rate": 0.00022211650500973746, + "loss": 1.1381, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.3729211390018463, + "learning_rate": 0.0002203360541120789, + "loss": 1.1454, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.3729153573513031, + "learning_rate": 0.00021856153835756164, + "loss": 1.1291, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.39670905470848083, + "learning_rate": 0.00021679297763050104, + "loss": 1.1275, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.3594678044319153, + "learning_rate": 0.0002150303917484834, + "loss": 1.1357, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.40017199516296387, + "learning_rate": 0.0002132738004621446, + "loss": 1.1439, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.3732672333717346, + "learning_rate": 0.00021152322345494763, + "loss": 1.1231, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.39866897463798523, + "learning_rate": 0.00020977868034296253, + "loss": 1.1264, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.35106202960014343, + "learning_rate": 0.00020804019067464667, + "loss": 1.1298, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.4421609342098236, + "learning_rate": 0.00020630777393062575, + "loss": 1.134, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.40509048104286194, + "learning_rate": 0.00020458144952347523, + "loss": 1.1451, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.3772280514240265, + "learning_rate": 0.00020286123679750314, + "loss": 1.1437, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.3716863691806793, + "learning_rate": 0.00020114715502853292, + "loss": 1.1409, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.41956421732902527, + "learning_rate": 0.0001994392234236878, + "loss": 1.1332, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.3653375804424286, + "learning_rate": 0.0001977374611211754, + "loss": 1.1337, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.3775090277194977, + "learning_rate": 0.00019604188719007313, + "loss": 1.1317, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.3718429505825043, + "learning_rate": 0.00019435252063011504, + "loss": 1.133, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.37331250309944153, + "learning_rate": 0.0001926693803714779, + "loss": 1.1447, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.3802056312561035, + "learning_rate": 0.00019099248527457068, + "loss": 1.1459, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.36767441034317017, + "learning_rate": 0.0001893218541298216, + "loss": 1.1268, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.34967729449272156, + "learning_rate": 0.00018765750565746827, + "loss": 1.1214, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.3683644235134125, + "learning_rate": 0.00018599945850734812, + "loss": 1.1291, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.3712475001811981, + "learning_rate": 0.00018434773125868895, + "loss": 1.1222, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.3514195680618286, + "learning_rate": 0.00018270234241990108, + "loss": 1.1235, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.37643975019454956, + "learning_rate": 0.0001810633104283698, + "loss": 1.1306, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.38709020614624023, + "learning_rate": 0.0001794306536502492, + "loss": 1.1333, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.3852018117904663, + "learning_rate": 0.0001778043903802555, + "loss": 1.1191, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.36321789026260376, + "learning_rate": 0.0001761845388414627, + "loss": 1.128, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.3746873140335083, + "learning_rate": 0.00017457111718509831, + "loss": 1.1372, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.3483794033527374, + "learning_rate": 0.00017296414349033976, + "loss": 1.1182, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.35910797119140625, + "learning_rate": 0.00017136363576411172, + "loss": 1.1232, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.3550094664096832, + "learning_rate": 0.00016976961194088526, + "loss": 1.1108, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.3682705760002136, + "learning_rate": 0.00016818208988247533, + "loss": 1.1147, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.35992518067359924, + "learning_rate": 0.0001666010873778419, + "loss": 1.1181, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.3707635998725891, + "learning_rate": 0.00016502662214289, + "loss": 1.1287, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.3830517530441284, + "learning_rate": 0.00016345871182027124, + "loss": 1.1204, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.36266350746154785, + "learning_rate": 0.00016189737397918653, + "loss": 1.1258, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.363208144903183, + "learning_rate": 0.0001603426261151884, + "loss": 1.1323, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.3821084201335907, + "learning_rate": 0.00015879448564998648, + "loss": 1.1273, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.3608359396457672, + "learning_rate": 0.0001572529699312501, + "loss": 1.133, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.3614010810852051, + "learning_rate": 0.0001557180962324158, + "loss": 1.1172, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.3900236487388611, + "learning_rate": 0.00015418988175249282, + "loss": 1.1198, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.40939193964004517, + "learning_rate": 0.00015266834361587063, + "loss": 1.1163, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.3722957670688629, + "learning_rate": 0.00015115349887212678, + "loss": 1.1139, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.3719434440135956, + "learning_rate": 0.00014964536449583657, + "loss": 1.119, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.37369686365127563, + "learning_rate": 0.00014814395738638195, + "loss": 1.1259, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.3760931193828583, + "learning_rate": 0.00014664929436776278, + "loss": 1.1248, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.3629474937915802, + "learning_rate": 0.00014516139218840788, + "loss": 1.1123, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.3516145348548889, + "learning_rate": 0.00014368026752098782, + "loss": 1.1198, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.36271005868911743, + "learning_rate": 0.00014220593696222768, + "loss": 1.132, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.34938865900039673, + "learning_rate": 0.00014073841703272092, + "loss": 1.1069, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.3703489601612091, + "learning_rate": 0.00013927772417674558, + "loss": 1.1308, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.3835078477859497, + "learning_rate": 0.00013782387476207788, + "loss": 1.1225, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.3790573477745056, + "learning_rate": 0.00013637688507981064, + "loss": 1.1379, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.3718129098415375, + "learning_rate": 0.0001349367713441697, + "loss": 1.1152, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.36849159002304077, + "learning_rate": 0.0001335035496923326, + "loss": 1.122, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.3801332116127014, + "learning_rate": 0.0001320772361842478, + "loss": 1.1326, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.3597279191017151, + "learning_rate": 0.00013065784680245442, + "loss": 1.1092, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.3624008893966675, + "learning_rate": 0.00012924539745190402, + "loss": 1.1181, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.38765400648117065, + "learning_rate": 0.0001278399039597809, + "loss": 1.1156, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.3619614243507385, + "learning_rate": 0.0001264413820753261, + "loss": 1.1205, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.36417073011398315, + "learning_rate": 0.00012504984746966003, + "loss": 1.126, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.35633671283721924, + "learning_rate": 0.00012366531573560754, + "loss": 1.1255, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.3816451132297516, + "learning_rate": 0.00012228780238752264, + "loss": 1.1127, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.3993898630142212, + "learning_rate": 0.00012091732286111514, + "loss": 1.1123, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.36348336935043335, + "learning_rate": 0.00011955389251327737, + "loss": 1.1179, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.3756120502948761, + "learning_rate": 0.00011819752662191197, + "loss": 1.1125, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.3802875876426697, + "learning_rate": 0.00011684824038576115, + "loss": 1.1294, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.3661904036998749, + "learning_rate": 0.00011550604892423593, + "loss": 1.1176, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.3550266623497009, + "learning_rate": 0.0001141709672772471, + "loss": 1.1166, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.36318251490592957, + "learning_rate": 0.00011284301040503625, + "loss": 1.1232, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.35563451051712036, + "learning_rate": 0.0001115221931880088, + "loss": 1.116, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.3556043803691864, + "learning_rate": 0.00011020853042656648, + "loss": 1.0999, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.36785390973091125, + "learning_rate": 0.000108902036840942, + "loss": 1.1088, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.34999290108680725, + "learning_rate": 0.00010760272707103389, + "loss": 1.0979, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.3790420889854431, + "learning_rate": 0.00010631061567624259, + "loss": 1.1117, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.3649984896183014, + "learning_rate": 0.00010502571713530706, + "loss": 1.1302, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.3923533260822296, + "learning_rate": 0.00010374804584614308, + "loss": 1.1276, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.38636764883995056, + "learning_rate": 0.00010247761612568129, + "loss": 1.1056, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.38178226351737976, + "learning_rate": 0.0001012144422097069, + "loss": 1.1166, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.38185352087020874, + "learning_rate": 9.995853825270052e-05, + "loss": 1.1147, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.3619385063648224, + "learning_rate": 9.870991832767919e-05, + "loss": 1.1225, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.3500303328037262, + "learning_rate": 9.746859642603884e-05, + "loss": 1.1175, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.3563097417354584, + "learning_rate": 9.623458645739755e-05, + "loss": 1.1219, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.35280612111091614, + "learning_rate": 9.50079022494395e-05, + "loss": 1.1071, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.3701493442058563, + "learning_rate": 9.378855754776028e-05, + "loss": 1.1076, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.36128586530685425, + "learning_rate": 9.257656601571266e-05, + "loss": 1.1075, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.3913003206253052, + "learning_rate": 9.137194123425349e-05, + "loss": 1.1176, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.3562246859073639, + "learning_rate": 9.017469670179168e-05, + "loss": 1.1125, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.38052770495414734, + "learning_rate": 8.898484583403668e-05, + "loss": 1.1189, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.36378705501556396, + "learning_rate": 8.780240196384873e-05, + "loss": 1.1116, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.3562518358230591, + "learning_rate": 8.662737834108861e-05, + "loss": 1.1104, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.37795576453208923, + "learning_rate": 8.545978813246987e-05, + "loss": 1.1219, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.3587080240249634, + "learning_rate": 8.429964442141072e-05, + "loss": 1.1039, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.35426101088523865, + "learning_rate": 8.314696020788806e-05, + "loss": 1.1127, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.3638628423213959, + "learning_rate": 8.200174840829136e-05, + "loss": 1.1169, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.3730154037475586, + "learning_rate": 8.08640218552778e-05, + "loss": 1.119, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.3520881235599518, + "learning_rate": 7.973379329762925e-05, + "loss": 1.1072, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.3606489300727844, + "learning_rate": 7.861107540010845e-05, + "loss": 1.1025, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.40547868609428406, + "learning_rate": 7.749588074331762e-05, + "loss": 1.118, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.36077240109443665, + "learning_rate": 7.63882218235575e-05, + "loss": 1.1092, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.3992495834827423, + "learning_rate": 7.528811105268699e-05, + "loss": 1.1092, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.3654575049877167, + "learning_rate": 7.41955607579845e-05, + "loss": 1.108, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.3554239869117737, + "learning_rate": 7.311058318200969e-05, + "loss": 1.1055, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.37257564067840576, + "learning_rate": 7.203319048246599e-05, + "loss": 1.1156, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.36394527554512024, + "learning_rate": 7.096339473206471e-05, + "loss": 1.1073, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.37236538529396057, + "learning_rate": 6.990120791838953e-05, + "loss": 1.1135, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.38898542523384094, + "learning_rate": 6.884664194376233e-05, + "loss": 1.106, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.36607620120048523, + "learning_rate": 6.779970862510989e-05, + "loss": 1.1171, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.36071106791496277, + "learning_rate": 6.676041969383107e-05, + "loss": 1.1126, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.35975003242492676, + "learning_rate": 6.572878679566605e-05, + "loss": 1.1197, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.3584916889667511, + "learning_rate": 6.470482149056509e-05, + "loss": 1.1115, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.3623376488685608, + "learning_rate": 6.368853525255942e-05, + "loss": 1.1084, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.36445266008377075, + "learning_rate": 6.267993946963249e-05, + "loss": 1.1233, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.37702205777168274, + "learning_rate": 6.167904544359265e-05, + "loss": 1.1153, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.36750108003616333, + "learning_rate": 6.068586438994617e-05, + "loss": 1.1131, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.3449283242225647, + "learning_rate": 5.970040743777161e-05, + "loss": 1.0963, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.36028075218200684, + "learning_rate": 5.8722685629595454e-05, + "loss": 1.0979, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.35771775245666504, + "learning_rate": 5.7752709921267855e-05, + "loss": 1.1218, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.3550027012825012, + "learning_rate": 5.6790491181840294e-05, + "loss": 1.1001, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.36124446988105774, + "learning_rate": 5.583604019344354e-05, + "loss": 1.1182, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.3725493252277374, + "learning_rate": 5.4889367651167007e-05, + "loss": 1.1114, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.34998440742492676, + "learning_rate": 5.3950484162938714e-05, + "loss": 1.101, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.36161261796951294, + "learning_rate": 5.3019400249406686e-05, + "loss": 1.1022, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.36241891980171204, + "learning_rate": 5.209612634382077e-05, + "loss": 1.1013, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.35523760318756104, + "learning_rate": 5.118067279191599e-05, + "loss": 1.1102, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.3642718195915222, + "learning_rate": 5.0273049851796205e-05, + "loss": 1.1132, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.36435121297836304, + "learning_rate": 4.9373267693819805e-05, + "loss": 1.105, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.3773235082626343, + "learning_rate": 4.848133640048513e-05, + "loss": 1.1051, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.3574071228504181, + "learning_rate": 4.75972659663178e-05, + "loss": 1.1247, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.3590472638607025, + "learning_rate": 4.672106629775882e-05, + "loss": 1.1096, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.3647381365299225, + "learning_rate": 4.585274721305333e-05, + "loss": 1.1088, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.3700782060623169, + "learning_rate": 4.4992318442140575e-05, + "loss": 1.1025, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.3659191429615021, + "learning_rate": 4.413978962654508e-05, + "loss": 1.1094, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.3675118684768677, + "learning_rate": 4.3295170319268554e-05, + "loss": 1.1051, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.3624016344547272, + "learning_rate": 4.245846998468261e-05, + "loss": 1.1058, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.3543236553668976, + "learning_rate": 4.16296979984232e-05, + "loss": 1.1013, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.34951353073120117, + "learning_rate": 4.080886364728506e-05, + "loss": 1.1069, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.38264498114585876, + "learning_rate": 3.999597612911793e-05, + "loss": 1.0967, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.37308940291404724, + "learning_rate": 3.9191044552723345e-05, + "loss": 1.1087, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.38366225361824036, + "learning_rate": 3.839407793775268e-05, + "loss": 1.1049, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.36603066325187683, + "learning_rate": 3.760508521460584e-05, + "loss": 1.1171, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.35585013031959534, + "learning_rate": 3.682407522433173e-05, + "loss": 1.1129, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.38468706607818604, + "learning_rate": 3.605105671852854e-05, + "loss": 1.105, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.3588978350162506, + "learning_rate": 3.528603835924626e-05, + "loss": 1.0985, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.39159154891967773, + "learning_rate": 3.4529028718888935e-05, + "loss": 1.1147, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.3527233898639679, + "learning_rate": 3.378003628011938e-05, + "loss": 1.1063, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.3757486343383789, + "learning_rate": 3.303906943576346e-05, + "loss": 1.1108, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.3887219727039337, + "learning_rate": 3.230613648871661e-05, + "loss": 1.1016, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.3607241213321686, + "learning_rate": 3.158124565185022e-05, + "loss": 1.107, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.3600178360939026, + "learning_rate": 3.086440504792026e-05, + "loss": 1.1031, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.38059672713279724, + "learning_rate": 3.015562270947553e-05, + "loss": 1.1108, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.3696359097957611, + "learning_rate": 2.945490657876837e-05, + "loss": 1.0924, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.36051735281944275, + "learning_rate": 2.8762264507665113e-05, + "loss": 1.098, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.37566593289375305, + "learning_rate": 2.807770425755829e-05, + "loss": 1.0973, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.35185009241104126, + "learning_rate": 2.7401233499279866e-05, + "loss": 1.1077, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.36478352546691895, + "learning_rate": 2.6732859813014987e-05, + "loss": 1.1157, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.3797094225883484, + "learning_rate": 2.607259068821721e-05, + "loss": 1.1092, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.3498828113079071, + "learning_rate": 2.5420433523524493e-05, + "loss": 1.1054, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.36742252111434937, + "learning_rate": 2.4776395626676162e-05, + "loss": 1.1059, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.3539731204509735, + "learning_rate": 2.414048421443141e-05, + "loss": 1.0994, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.36758720874786377, + "learning_rate": 2.3512706412488012e-05, + "loss": 1.1128, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.3628343343734741, + "learning_rate": 2.2893069255402993e-05, + "loss": 1.1013, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.3503541052341461, + "learning_rate": 2.2281579686513176e-05, + "loss": 1.0978, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.3604075014591217, + "learning_rate": 2.1678244557857663e-05, + "loss": 1.088, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.34871819615364075, + "learning_rate": 2.1083070630101232e-05, + "loss": 1.0962, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.35838931798934937, + "learning_rate": 2.0496064572458395e-05, + "loss": 1.1052, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.3570609986782074, + "learning_rate": 1.991723296261863e-05, + "loss": 1.0995, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.35785019397735596, + "learning_rate": 1.9346582286672686e-05, + "loss": 1.0957, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.3643856644630432, + "learning_rate": 1.878411893904014e-05, + "loss": 1.1149, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.3485681712627411, + "learning_rate": 1.822984922239737e-05, + "loss": 1.1, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.3704765737056732, + "learning_rate": 1.7683779347607286e-05, + "loss": 1.1168, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.35022515058517456, + "learning_rate": 1.714591543364938e-05, + "loss": 1.1045, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.37648147344589233, + "learning_rate": 1.6616263507551437e-05, + "loss": 1.1046, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.3764314651489258, + "learning_rate": 1.609482950432195e-05, + "loss": 1.1033, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.36434733867645264, + "learning_rate": 1.5581619266883563e-05, + "loss": 1.1028, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.37274083495140076, + "learning_rate": 1.5076638546007548e-05, + "loss": 1.1092, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.361558198928833, + "learning_rate": 1.457989300024945e-05, + "loss": 1.1047, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.3575560748577118, + "learning_rate": 1.4091388195885625e-05, + "loss": 1.0988, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.36193081736564636, + "learning_rate": 1.3611129606851041e-05, + "loss": 1.098, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.3471304476261139, + "learning_rate": 1.313912261467759e-05, + "loss": 1.1043, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.35767582058906555, + "learning_rate": 1.267537250843412e-05, + "loss": 1.1029, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.3535594046115875, + "learning_rate": 1.2219884484667071e-05, + "loss": 1.1013, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.35520821809768677, + "learning_rate": 1.1772663647341947e-05, + "loss": 1.1031, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.35638466477394104, + "learning_rate": 1.1333715007786932e-05, + "loss": 1.1059, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.36158987879753113, + "learning_rate": 1.0903043484635694e-05, + "loss": 1.1043, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.37424609065055847, + "learning_rate": 1.0480653903772924e-05, + "loss": 1.0999, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.35378211736679077, + "learning_rate": 1.0066550998280132e-05, + "loss": 1.1059, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.3605569303035736, + "learning_rate": 9.660739408382608e-06, + "loss": 1.1022, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.3631739020347595, + "learning_rate": 9.26322368139737e-06, + "loss": 1.0971, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.3738535940647125, + "learning_rate": 8.874008271682222e-06, + "loss": 1.0982, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.3611426055431366, + "learning_rate": 8.493097540585775e-06, + "loss": 1.117, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.36006152629852295, + "learning_rate": 8.120495756399005e-06, + "loss": 1.0943, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.3576846718788147, + "learning_rate": 7.756207094306605e-06, + "loss": 1.1044, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.36890554428100586, + "learning_rate": 7.400235636340957e-06, + "loss": 1.1062, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.3569638729095459, + "learning_rate": 7.0525853713362395e-06, + "loss": 1.1074, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.35618263483047485, + "learning_rate": 6.71326019488322e-06, + "loss": 1.1086, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.37716180086135864, + "learning_rate": 6.3822639092862846e-06, + "loss": 1.1082, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.3503626883029938, + "learning_rate": 6.059600223520478e-06, + "loss": 1.0881, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.36392760276794434, + "learning_rate": 5.745272753189784e-06, + "loss": 1.0974, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.35463055968284607, + "learning_rate": 5.439285020487156e-06, + "loss": 1.1097, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.35742634534835815, + "learning_rate": 5.141640454154467e-06, + "loss": 1.0939, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.36812102794647217, + "learning_rate": 4.852342389444458e-06, + "loss": 1.1132, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.37311768531799316, + "learning_rate": 4.571394068083185e-06, + "loss": 1.1018, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.3573930263519287, + "learning_rate": 4.298798638233709e-06, + "loss": 1.1089, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.3686988949775696, + "learning_rate": 4.034559154461049e-06, + "loss": 1.1077, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.34888580441474915, + "learning_rate": 3.7786785776976198e-06, + "loss": 1.0993, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.371014803647995, + "learning_rate": 3.5311597752100964e-06, + "loss": 1.0974, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.36439818143844604, + "learning_rate": 3.2920055205676867e-06, + "loss": 1.1026, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.34828996658325195, + "learning_rate": 3.06121849361049e-06, + "loss": 1.1013, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.3600952625274658, + "learning_rate": 2.838801280419856e-06, + "loss": 1.1016, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.35362669825553894, + "learning_rate": 2.624756373289322e-06, + "loss": 1.0932, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.35408636927604675, + "learning_rate": 2.419086170696472e-06, + "loss": 1.0874, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.35020846128463745, + "learning_rate": 2.2217929772764545e-06, + "loss": 1.0993, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.3536199629306793, + "learning_rate": 2.0328790037957568e-06, + "loss": 1.0937, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.35778024792671204, + "learning_rate": 1.8523463671278052e-06, + "loss": 1.1041, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.34945109486579895, + "learning_rate": 1.6801970902288188e-06, + "loss": 1.0929, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.36028343439102173, + "learning_rate": 1.5164331021155774e-06, + "loss": 1.1024, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.3585197329521179, + "learning_rate": 1.3610562378435221e-06, + "loss": 1.0997, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.3567984998226166, + "learning_rate": 1.2140682384862712e-06, + "loss": 1.1006, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.3587944209575653, + "learning_rate": 1.0754707511161365e-06, + "loss": 1.0956, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.360120564699173, + "learning_rate": 9.452653287856383e-07, + "loss": 1.1082, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.36455395817756653, + "learning_rate": 8.234534305101015e-07, + "loss": 1.105, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.3427172303199768, + "learning_rate": 7.100364212513367e-07, + "loss": 1.116, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.3511948585510254, + "learning_rate": 6.050155719023176e-07, + "loss": 1.1072, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.3564155399799347, + "learning_rate": 5.08392059272944e-07, + "loss": 1.1053, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.3686405420303345, + "learning_rate": 4.2016696607680147e-07, + "loss": 1.0926, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.3639363646507263, + "learning_rate": 3.4034128091917085e-07, + "loss": 1.0972, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.35043707489967346, + "learning_rate": 2.689158982859541e-07, + "loss": 1.1099, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.35883840918540955, + "learning_rate": 2.05891618533266e-07, + "loss": 1.0861, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.3705896735191345, + "learning_rate": 1.5126914787894074e-07, + "loss": 1.1049, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.34956392645835876, + "learning_rate": 1.0504909839462173e-07, + "loss": 1.1043, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.3487434983253479, + "learning_rate": 6.723198799826746e-08, + "loss": 1.1014, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.34953734278678894, + "learning_rate": 3.781824044932214e-08, + "loss": 1.1097, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.3557943105697632, + "learning_rate": 1.6808185342970238e-08, + "loss": 1.0945, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.3526889979839325, + "learning_rate": 4.202058107305451e-09, + "loss": 1.1073, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.046021819114685, + "learning_rate": 0.0, + "loss": 1.0947, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5.036240179760947e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-gemma-cosine/checkpoint-9480/training_args.bin b/saves-gemma-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..41607e90c9a40abfa49538b226ce49a3be196ff3 --- /dev/null +++ b/saves-gemma-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfbfdc263ee43b34afb61a5d0b023439dc5ff9d1218afb048f8106c2cc4b524e +size 5176 diff --git a/saves-gemma-cosine/config.json b/saves-gemma-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d48ca7aeb27c97cf10a10143d9cee52b66b74ab3 --- /dev/null +++ b/saves-gemma-cosine/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "GemmaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 2, + "eos_token_id": 1, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 8192, + "model_type": "gemma", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gemma-cosine/generation_config.json b/saves-gemma-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c20913bfa6d3576264545acb67eae5f4818d0d32 --- /dev/null +++ b/saves-gemma-cosine/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-gemma-cosine/model.safetensors b/saves-gemma-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aeb29d1abc45aff8c92d5cc22caed1034e8bd856 --- /dev/null +++ b/saves-gemma-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffac0325ea267d46004617d550c5834c1940f02892c6b431368487b7d82ce2d5 +size 19356792 diff --git a/saves-gemma-cosine/result.log b/saves-gemma-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..fbbc28899f14f067e86c0787795235a9e729c5f0 --- /dev/null +++ b/saves-gemma-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 5289.8615, 'train_samples_per_second': 1834.945, 'train_steps_per_second': 1.792, 'train_loss': 1.3737484057744345, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-gemma-cosine/special_tokens_map.json b/saves-gemma-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gemma-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gemma-cosine/tokenizer.json b/saves-gemma-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gemma-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gemma-cosine/tokenizer_config.json b/saves-gemma-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gemma-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gemma/checkpoint-9480/config.json b/saves-gemma/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d48ca7aeb27c97cf10a10143d9cee52b66b74ab3 --- /dev/null +++ b/saves-gemma/checkpoint-9480/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "GemmaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 2, + "eos_token_id": 1, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 8192, + "model_type": "gemma", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gemma/checkpoint-9480/generation_config.json b/saves-gemma/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c20913bfa6d3576264545acb67eae5f4818d0d32 --- /dev/null +++ b/saves-gemma/checkpoint-9480/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-gemma/checkpoint-9480/model.safetensors b/saves-gemma/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a5554046839f7cf6efcc313d9e0248e0ba82c20 --- /dev/null +++ b/saves-gemma/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228b9e625e6010052454dd32e91d5dff4dc5cb37dc5e42def029775a7f9d945d +size 19356792 diff --git a/saves-gemma/checkpoint-9480/optimizer.pt b/saves-gemma/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f583351c437f33db9b7d084a9a45823ac9dd8a5c --- /dev/null +++ b/saves-gemma/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64afebc75e6f06da3eba985ed0498d5f7701be67a98a1fd0d976d00e3103a387 +size 38726498 diff --git a/saves-gemma/checkpoint-9480/rng_state.pth b/saves-gemma/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-gemma/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-gemma/checkpoint-9480/scheduler.pt b/saves-gemma/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-gemma/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-gemma/checkpoint-9480/special_tokens_map.json b/saves-gemma/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gemma/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gemma/checkpoint-9480/tokenizer.json b/saves-gemma/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gemma/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gemma/checkpoint-9480/tokenizer_config.json b/saves-gemma/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gemma/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gemma/checkpoint-9480/trainer_state.json b/saves-gemma/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6c2d30e38955bf7edd325344d994b7a89d02d7ad --- /dev/null +++ b/saves-gemma/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2371647357940674, + "learning_rate": 0.00015822784810126583, + "loss": 7.5911, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.0724656581878662, + "learning_rate": 0.00031645569620253165, + "loss": 7.0366, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8614045977592468, + "learning_rate": 0.00047468354430379745, + "loss": 6.3984, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.6243458390235901, + "learning_rate": 0.0006329113924050633, + "loss": 5.8768, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.4496183395385742, + "learning_rate": 0.0007911392405063291, + "loss": 5.5034, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.3917015790939331, + "learning_rate": 0.0009493670886075949, + "loss": 5.0734, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.2791913151741028, + "learning_rate": 0.0011075949367088608, + "loss": 4.664, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.3657084107398987, + "learning_rate": 0.0012658227848101266, + "loss": 4.339, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.44374170899391174, + "learning_rate": 0.0014240506329113926, + "loss": 4.0971, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.5566601753234863, + "learning_rate": 0.0015, + "loss": 3.9403, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.3145308792591095, + "learning_rate": 0.0015, + "loss": 3.7763, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.3487795889377594, + "learning_rate": 0.0015, + "loss": 3.6478, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.47254374623298645, + "learning_rate": 0.0015, + "loss": 3.5402, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.3688655495643616, + "learning_rate": 0.0015, + "loss": 3.4295, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.6684237718582153, + "learning_rate": 0.0015, + "loss": 3.3361, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.31444546580314636, + "learning_rate": 0.0015, + "loss": 3.2668, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.6808280944824219, + "learning_rate": 0.0015, + "loss": 3.1863, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.35720598697662354, + "learning_rate": 0.0015, + "loss": 3.1343, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.47156453132629395, + "learning_rate": 0.0015, + "loss": 3.0545, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.39283475279808044, + "learning_rate": 0.0015, + "loss": 2.9937, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.413607656955719, + "learning_rate": 0.0015, + "loss": 2.931, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.39332082867622375, + "learning_rate": 0.0015, + "loss": 2.8887, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.4956569969654083, + "learning_rate": 0.0015, + "loss": 2.8105, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.34334513545036316, + "learning_rate": 0.0015, + "loss": 2.7606, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.4167127311229706, + "learning_rate": 0.0015, + "loss": 2.7061, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.3620551824569702, + "learning_rate": 0.0015, + "loss": 2.6471, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.45425307750701904, + "learning_rate": 0.0015, + "loss": 2.61, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.5003123879432678, + "learning_rate": 0.0015, + "loss": 2.5594, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.4262688457965851, + "learning_rate": 0.0015, + "loss": 2.5234, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.3864128291606903, + "learning_rate": 0.0015, + "loss": 2.487, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.49673473834991455, + "learning_rate": 0.0015, + "loss": 2.4486, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.43513303995132446, + "learning_rate": 0.0015, + "loss": 2.413, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.41726741194725037, + "learning_rate": 0.0015, + "loss": 2.3925, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.4520697593688965, + "learning_rate": 0.0015, + "loss": 2.3633, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.36444756388664246, + "learning_rate": 0.0015, + "loss": 2.31, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.45437198877334595, + "learning_rate": 0.0015, + "loss": 2.2836, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.45183318853378296, + "learning_rate": 0.0015, + "loss": 2.2729, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.3789414167404175, + "learning_rate": 0.0015, + "loss": 2.2489, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.5659546852111816, + "learning_rate": 0.0015, + "loss": 2.2257, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.7111340761184692, + "learning_rate": 0.0015, + "loss": 2.2001, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.3687579333782196, + "learning_rate": 0.0015, + "loss": 2.1839, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.39448997378349304, + "learning_rate": 0.0015, + "loss": 2.1582, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.41239675879478455, + "learning_rate": 0.0015, + "loss": 2.1346, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.34442898631095886, + "learning_rate": 0.0015, + "loss": 2.1145, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.4204893112182617, + "learning_rate": 0.0015, + "loss": 2.1103, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.43915101885795593, + "learning_rate": 0.0015, + "loss": 2.0917, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.36875662207603455, + "learning_rate": 0.0015, + "loss": 2.0652, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.4595106840133667, + "learning_rate": 0.0015, + "loss": 2.0615, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.3633001148700714, + "learning_rate": 0.0015, + "loss": 2.0351, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.39989691972732544, + "learning_rate": 0.0015, + "loss": 2.0241, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.38117069005966187, + "learning_rate": 0.0015, + "loss": 2.0121, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.36800599098205566, + "learning_rate": 0.0015, + "loss": 1.9986, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.3794243335723877, + "learning_rate": 0.0015, + "loss": 1.9862, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.5420165061950684, + "learning_rate": 0.0015, + "loss": 1.9672, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.421758234500885, + "learning_rate": 0.0015, + "loss": 1.9709, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.3748304545879364, + "learning_rate": 0.0015, + "loss": 1.9287, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.38089877367019653, + "learning_rate": 0.0015, + "loss": 1.9324, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.6355550289154053, + "learning_rate": 0.0015, + "loss": 1.9337, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.5314757823944092, + "learning_rate": 0.0015, + "loss": 1.9149, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.5225743055343628, + "learning_rate": 0.0015, + "loss": 1.8945, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.40494346618652344, + "learning_rate": 0.0015, + "loss": 1.8943, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.46508485078811646, + "learning_rate": 0.0015, + "loss": 1.8832, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.4040267765522003, + "learning_rate": 0.0015, + "loss": 1.8634, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.4464590549468994, + "learning_rate": 0.0015, + "loss": 1.8795, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.3856465518474579, + "learning_rate": 0.0015, + "loss": 1.8662, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.39368781447410583, + "learning_rate": 0.0015, + "loss": 1.8503, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.3902086615562439, + "learning_rate": 0.0015, + "loss": 1.8364, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.3802025020122528, + "learning_rate": 0.0015, + "loss": 1.8278, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.5012637376785278, + "learning_rate": 0.0015, + "loss": 1.8388, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.3923953175544739, + "learning_rate": 0.0015, + "loss": 1.8209, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.35562559962272644, + "learning_rate": 0.0015, + "loss": 1.8087, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.43287137150764465, + "learning_rate": 0.0015, + "loss": 1.7948, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.41877642273902893, + "learning_rate": 0.0015, + "loss": 1.7858, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.40727561712265015, + "learning_rate": 0.0015, + "loss": 1.7967, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.4644089937210083, + "learning_rate": 0.0015, + "loss": 1.7794, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.5176643133163452, + "learning_rate": 0.0015, + "loss": 1.7787, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.421298086643219, + "learning_rate": 0.0015, + "loss": 1.7685, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.3996811509132385, + "learning_rate": 0.0015, + "loss": 1.7671, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.37680113315582275, + "learning_rate": 0.0015, + "loss": 1.7646, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.4056326746940613, + "learning_rate": 0.0015, + "loss": 1.7434, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.45371413230895996, + "learning_rate": 0.0015, + "loss": 1.7312, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.38185834884643555, + "learning_rate": 0.0015, + "loss": 1.7392, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.3576831817626953, + "learning_rate": 0.0015, + "loss": 1.7431, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.3754999339580536, + "learning_rate": 0.0015, + "loss": 1.7192, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.3862115740776062, + "learning_rate": 0.0015, + "loss": 1.7178, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.44392654299736023, + "learning_rate": 0.0015, + "loss": 1.7194, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.3966352045536041, + "learning_rate": 0.0015, + "loss": 1.7145, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.48476067185401917, + "learning_rate": 0.0015, + "loss": 1.7128, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.4622691571712494, + "learning_rate": 0.0015, + "loss": 1.6989, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.4104483723640442, + "learning_rate": 0.0015, + "loss": 1.6971, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.39348629117012024, + "learning_rate": 0.0015, + "loss": 1.7058, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.4345850646495819, + "learning_rate": 0.0015, + "loss": 1.6917, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.3856751620769501, + "learning_rate": 0.0015, + "loss": 1.6779, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.3952021896839142, + "learning_rate": 0.0015, + "loss": 1.6886, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.36113595962524414, + "learning_rate": 0.0015, + "loss": 1.6677, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.44029003381729126, + "learning_rate": 0.0015, + "loss": 1.674, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.42792683839797974, + "learning_rate": 0.0015, + "loss": 1.6752, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.39351779222488403, + "learning_rate": 0.0015, + "loss": 1.6577, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.3800772726535797, + "learning_rate": 0.0015, + "loss": 1.6539, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.3503066301345825, + "learning_rate": 0.0015, + "loss": 1.6618, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.4740210771560669, + "learning_rate": 0.0015, + "loss": 1.6535, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.40961724519729614, + "learning_rate": 0.0015, + "loss": 1.6449, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.38675740361213684, + "learning_rate": 0.0015, + "loss": 1.6497, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.3638473451137543, + "learning_rate": 0.0015, + "loss": 1.6401, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.346079021692276, + "learning_rate": 0.0015, + "loss": 1.6389, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.3508372902870178, + "learning_rate": 0.0015, + "loss": 1.6295, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.3539598286151886, + "learning_rate": 0.0015, + "loss": 1.6167, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.3764999806880951, + "learning_rate": 0.0015, + "loss": 1.6202, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.46206948161125183, + "learning_rate": 0.0015, + "loss": 1.6285, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.452450692653656, + "learning_rate": 0.0015, + "loss": 1.6173, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.3777218759059906, + "learning_rate": 0.0015, + "loss": 1.6223, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.3499910533428192, + "learning_rate": 0.0015, + "loss": 1.6123, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.5509498715400696, + "learning_rate": 0.0015, + "loss": 1.603, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.454050213098526, + "learning_rate": 0.0015, + "loss": 1.6144, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.5862528085708618, + "learning_rate": 0.0015, + "loss": 1.6041, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.3834332525730133, + "learning_rate": 0.0015, + "loss": 1.6053, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.379902720451355, + "learning_rate": 0.0015, + "loss": 1.5993, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.39323538541793823, + "learning_rate": 0.0015, + "loss": 1.5846, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.34747618436813354, + "learning_rate": 0.0015, + "loss": 1.5844, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.4111934304237366, + "learning_rate": 0.0015, + "loss": 1.5977, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.3951559066772461, + "learning_rate": 0.0015, + "loss": 1.5886, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.3657253682613373, + "learning_rate": 0.0015, + "loss": 1.5738, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.36329299211502075, + "learning_rate": 0.0015, + "loss": 1.5659, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.38337430357933044, + "learning_rate": 0.0015, + "loss": 1.5815, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.480984628200531, + "learning_rate": 0.0015, + "loss": 1.5826, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.37974998354911804, + "learning_rate": 0.0015, + "loss": 1.5664, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.3430846333503723, + "learning_rate": 0.0015, + "loss": 1.5652, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.3568759560585022, + "learning_rate": 0.0015, + "loss": 1.5765, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.3612004518508911, + "learning_rate": 0.0015, + "loss": 1.5674, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.34362566471099854, + "learning_rate": 0.0015, + "loss": 1.5532, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.36927589774131775, + "learning_rate": 0.0015, + "loss": 1.5584, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.40451106429100037, + "learning_rate": 0.0015, + "loss": 1.5604, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.4525816738605499, + "learning_rate": 0.0015, + "loss": 1.5492, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.4472123086452484, + "learning_rate": 0.0015, + "loss": 1.5516, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.3997417390346527, + "learning_rate": 0.0015, + "loss": 1.555, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.3727695047855377, + "learning_rate": 0.0015, + "loss": 1.5489, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.4645366072654724, + "learning_rate": 0.0015, + "loss": 1.5449, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.3957315981388092, + "learning_rate": 0.0015, + "loss": 1.5384, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.39273178577423096, + "learning_rate": 0.0015, + "loss": 1.5496, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.4525180160999298, + "learning_rate": 0.0015, + "loss": 1.5281, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.40496838092803955, + "learning_rate": 0.0015, + "loss": 1.5339, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.33585745096206665, + "learning_rate": 0.0015, + "loss": 1.5359, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.33285295963287354, + "learning_rate": 0.0015, + "loss": 1.5294, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.34191590547561646, + "learning_rate": 0.0015, + "loss": 1.523, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.3581468164920807, + "learning_rate": 0.0015, + "loss": 1.5225, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.3397340476512909, + "learning_rate": 0.0015, + "loss": 1.5222, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.3838866353034973, + "learning_rate": 0.0015, + "loss": 1.5217, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.4081639051437378, + "learning_rate": 0.0015, + "loss": 1.5245, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.3484359085559845, + "learning_rate": 0.0015, + "loss": 1.5237, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.3435753285884857, + "learning_rate": 0.0015, + "loss": 1.5168, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.3409862518310547, + "learning_rate": 0.0015, + "loss": 1.5182, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.35892271995544434, + "learning_rate": 0.0015, + "loss": 1.5208, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.3690599501132965, + "learning_rate": 0.0015, + "loss": 1.5023, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.3629121482372284, + "learning_rate": 0.0015, + "loss": 1.4942, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.3664407730102539, + "learning_rate": 0.0015, + "loss": 1.4971, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.40004992485046387, + "learning_rate": 0.0015, + "loss": 1.5002, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.3557882308959961, + "learning_rate": 0.0015, + "loss": 1.4924, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.3498414158821106, + "learning_rate": 0.0015, + "loss": 1.5023, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.369983971118927, + "learning_rate": 0.0015, + "loss": 1.5012, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.3442850410938263, + "learning_rate": 0.0015, + "loss": 1.5014, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.4010471701622009, + "learning_rate": 0.0015, + "loss": 1.4968, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.34040263295173645, + "learning_rate": 0.0015, + "loss": 1.4674, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.3703126013278961, + "learning_rate": 0.0015, + "loss": 1.4828, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.34696683287620544, + "learning_rate": 0.0015, + "loss": 1.4879, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.35845789313316345, + "learning_rate": 0.0015, + "loss": 1.4797, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.3321089744567871, + "learning_rate": 0.0015, + "loss": 1.4759, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.39269906282424927, + "learning_rate": 0.0015, + "loss": 1.4879, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.46313339471817017, + "learning_rate": 0.0015, + "loss": 1.4745, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.4027881324291229, + "learning_rate": 0.0015, + "loss": 1.4694, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.4224802851676941, + "learning_rate": 0.0015, + "loss": 1.4858, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.3397369384765625, + "learning_rate": 0.0015, + "loss": 1.461, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.3592316508293152, + "learning_rate": 0.0015, + "loss": 1.4617, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.3517380356788635, + "learning_rate": 0.0015, + "loss": 1.4641, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.37348607182502747, + "learning_rate": 0.0015, + "loss": 1.467, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.3547249138355255, + "learning_rate": 0.0015, + "loss": 1.4893, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.34048789739608765, + "learning_rate": 0.0015, + "loss": 1.4669, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.3326258063316345, + "learning_rate": 0.0015, + "loss": 1.4703, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.3598375618457794, + "learning_rate": 0.0015, + "loss": 1.4683, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.35684293508529663, + "learning_rate": 0.0015, + "loss": 1.4708, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.3906477689743042, + "learning_rate": 0.0015, + "loss": 1.4657, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.37610185146331787, + "learning_rate": 0.0015, + "loss": 1.454, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.37533876299858093, + "learning_rate": 0.0015, + "loss": 1.4522, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.3618168830871582, + "learning_rate": 0.0015, + "loss": 1.453, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.3646780550479889, + "learning_rate": 0.0015, + "loss": 1.4548, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.3709734380245209, + "learning_rate": 0.0015, + "loss": 1.4526, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.36328479647636414, + "learning_rate": 0.0015, + "loss": 1.4575, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.35170361399650574, + "learning_rate": 0.0015, + "loss": 1.4564, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.3583771586418152, + "learning_rate": 0.0015, + "loss": 1.4424, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.3604058623313904, + "learning_rate": 0.0015, + "loss": 1.4509, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.38244473934173584, + "learning_rate": 0.0015, + "loss": 1.4478, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.47545963525772095, + "learning_rate": 0.0015, + "loss": 1.4484, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.37117186188697815, + "learning_rate": 0.0015, + "loss": 1.4483, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.3921581208705902, + "learning_rate": 0.0015, + "loss": 1.4501, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.3382081985473633, + "learning_rate": 0.0015, + "loss": 1.439, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.364330530166626, + "learning_rate": 0.0015, + "loss": 1.4374, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.3328717052936554, + "learning_rate": 0.0015, + "loss": 1.4362, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.3981072008609772, + "learning_rate": 0.0015, + "loss": 1.447, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.3473067283630371, + "learning_rate": 0.0015, + "loss": 1.4267, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.3436720073223114, + "learning_rate": 0.0015, + "loss": 1.428, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.3526890277862549, + "learning_rate": 0.0015, + "loss": 1.4286, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.364200621843338, + "learning_rate": 0.0015, + "loss": 1.4363, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.4152698814868927, + "learning_rate": 0.0015, + "loss": 1.448, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.4520832896232605, + "learning_rate": 0.0015, + "loss": 1.4318, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.39879027009010315, + "learning_rate": 0.0015, + "loss": 1.4327, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.3920576274394989, + "learning_rate": 0.0015, + "loss": 1.4296, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.33325308561325073, + "learning_rate": 0.0015, + "loss": 1.4205, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.41690853238105774, + "learning_rate": 0.0015, + "loss": 1.426, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.37545451521873474, + "learning_rate": 0.0015, + "loss": 1.4286, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.3608053922653198, + "learning_rate": 0.0015, + "loss": 1.4183, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.3502177596092224, + "learning_rate": 0.0015, + "loss": 1.4256, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.37644052505493164, + "learning_rate": 0.0015, + "loss": 1.4128, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.3824097514152527, + "learning_rate": 0.0015, + "loss": 1.4146, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.3496038317680359, + "learning_rate": 0.0015, + "loss": 1.4162, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.3546043038368225, + "learning_rate": 0.0015, + "loss": 1.4142, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.37273067235946655, + "learning_rate": 0.0015, + "loss": 1.4209, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.46220526099205017, + "learning_rate": 0.0015, + "loss": 1.4323, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.3861055374145508, + "learning_rate": 0.0015, + "loss": 1.4159, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.35327857732772827, + "learning_rate": 0.0015, + "loss": 1.4006, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.49925553798675537, + "learning_rate": 0.0015, + "loss": 1.4182, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.4116460978984833, + "learning_rate": 0.0015, + "loss": 1.4206, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.3701082170009613, + "learning_rate": 0.0015, + "loss": 1.4007, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.4048565924167633, + "learning_rate": 0.0015, + "loss": 1.4166, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.34562838077545166, + "learning_rate": 0.0015, + "loss": 1.4059, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.37388190627098083, + "learning_rate": 0.0015, + "loss": 1.4033, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.3642711341381073, + "learning_rate": 0.0015, + "loss": 1.4046, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.4138195514678955, + "learning_rate": 0.0015, + "loss": 1.3919, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.3694874048233032, + "learning_rate": 0.0015, + "loss": 1.4251, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.33054301142692566, + "learning_rate": 0.0015, + "loss": 1.4104, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.35614264011383057, + "learning_rate": 0.0015, + "loss": 1.3991, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.3298373520374298, + "learning_rate": 0.0015, + "loss": 1.3965, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.3741556704044342, + "learning_rate": 0.0015, + "loss": 1.3957, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.3534122705459595, + "learning_rate": 0.0015, + "loss": 1.4096, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.3374796211719513, + "learning_rate": 0.0015, + "loss": 1.3925, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.3883809745311737, + "learning_rate": 0.0015, + "loss": 1.3853, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.4112820625305176, + "learning_rate": 0.0015, + "loss": 1.3886, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.4407918155193329, + "learning_rate": 0.0015, + "loss": 1.4009, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.38165411353111267, + "learning_rate": 0.0015, + "loss": 1.3963, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.3438486158847809, + "learning_rate": 0.0015, + "loss": 1.3921, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.38707923889160156, + "learning_rate": 0.0015, + "loss": 1.3906, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.35939821600914, + "learning_rate": 0.0015, + "loss": 1.3911, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.3938080072402954, + "learning_rate": 0.0015, + "loss": 1.3951, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.37135642766952515, + "learning_rate": 0.0015, + "loss": 1.3871, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.41192826628685, + "learning_rate": 0.0015, + "loss": 1.3839, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.37276938557624817, + "learning_rate": 0.0015, + "loss": 1.3946, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.3469187915325165, + "learning_rate": 0.0015, + "loss": 1.3788, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.3916131556034088, + "learning_rate": 0.0015, + "loss": 1.3964, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.35671016573905945, + "learning_rate": 0.0015, + "loss": 1.3874, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.37222930788993835, + "learning_rate": 0.0015, + "loss": 1.3916, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.3410083055496216, + "learning_rate": 0.0015, + "loss": 1.3775, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.39879265427589417, + "learning_rate": 0.0015, + "loss": 1.3804, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.3476262092590332, + "learning_rate": 0.0015, + "loss": 1.3803, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.38001757860183716, + "learning_rate": 0.0015, + "loss": 1.3861, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.39220911264419556, + "learning_rate": 0.0015, + "loss": 1.3812, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.4335632026195526, + "learning_rate": 0.0015, + "loss": 1.3804, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.3345543444156647, + "learning_rate": 0.0015, + "loss": 1.3789, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.3646849989891052, + "learning_rate": 0.0015, + "loss": 1.3853, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.3379766345024109, + "learning_rate": 0.0015, + "loss": 1.3798, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.3333427309989929, + "learning_rate": 0.0015, + "loss": 1.3782, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.3338565528392792, + "learning_rate": 0.0015, + "loss": 1.3819, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.3742698132991791, + "learning_rate": 0.0015, + "loss": 1.3804, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.38040274381637573, + "learning_rate": 0.0015, + "loss": 1.3784, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.4038099944591522, + "learning_rate": 0.0015, + "loss": 1.3782, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.3927282691001892, + "learning_rate": 0.0015, + "loss": 1.3669, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.39924538135528564, + "learning_rate": 0.0015, + "loss": 1.3796, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.35913312435150146, + "learning_rate": 0.0015, + "loss": 1.3726, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.3561171293258667, + "learning_rate": 0.0015, + "loss": 1.3661, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.3356326222419739, + "learning_rate": 0.0015, + "loss": 1.3696, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.3672836422920227, + "learning_rate": 0.0015, + "loss": 1.3672, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.3602263629436493, + "learning_rate": 0.0015, + "loss": 1.3699, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.37418046593666077, + "learning_rate": 0.0015, + "loss": 1.3623, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.3903089463710785, + "learning_rate": 0.0015, + "loss": 1.3716, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.361920565366745, + "learning_rate": 0.0015, + "loss": 1.3704, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.33929145336151123, + "learning_rate": 0.0015, + "loss": 1.3527, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.3812505006790161, + "learning_rate": 0.0015, + "loss": 1.3633, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.348438024520874, + "learning_rate": 0.0015, + "loss": 1.3679, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.3531169593334198, + "learning_rate": 0.0015, + "loss": 1.3743, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.34419044852256775, + "learning_rate": 0.0015, + "loss": 1.372, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.3198499381542206, + "learning_rate": 0.0015, + "loss": 1.3674, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.32899436354637146, + "learning_rate": 0.0015, + "loss": 1.3651, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.36924058198928833, + "learning_rate": 0.0015, + "loss": 1.3639, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.3184834122657776, + "learning_rate": 0.0015, + "loss": 1.3648, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.4172210097312927, + "learning_rate": 0.0015, + "loss": 1.3676, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.3834500312805176, + "learning_rate": 0.0015, + "loss": 1.3455, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.3521396517753601, + "learning_rate": 0.0015, + "loss": 1.3502, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.31845158338546753, + "learning_rate": 0.0015, + "loss": 1.3533, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.3387175500392914, + "learning_rate": 0.0015, + "loss": 1.3583, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.36378470063209534, + "learning_rate": 0.0015, + "loss": 1.3588, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.3735678791999817, + "learning_rate": 0.0015, + "loss": 1.3471, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.3498466908931732, + "learning_rate": 0.0015, + "loss": 1.3488, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.3306233882904053, + "learning_rate": 0.0015, + "loss": 1.3528, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.35176217555999756, + "learning_rate": 0.0015, + "loss": 1.3582, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.36599618196487427, + "learning_rate": 0.0015, + "loss": 1.3515, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.3750476539134979, + "learning_rate": 0.0015, + "loss": 1.3497, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.36336538195610046, + "learning_rate": 0.0015, + "loss": 1.3542, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.36796873807907104, + "learning_rate": 0.0015, + "loss": 1.3614, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.4234572649002075, + "learning_rate": 0.0015, + "loss": 1.3459, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.3786514401435852, + "learning_rate": 0.0015, + "loss": 1.3409, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.3337559401988983, + "learning_rate": 0.0015, + "loss": 1.3446, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.3283361792564392, + "learning_rate": 0.0015, + "loss": 1.3599, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.3510412871837616, + "learning_rate": 0.0015, + "loss": 1.3363, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.34402918815612793, + "learning_rate": 0.0015, + "loss": 1.3446, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.34152328968048096, + "learning_rate": 0.0015, + "loss": 1.3479, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.3709448575973511, + "learning_rate": 0.0015, + "loss": 1.3391, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.4077426791191101, + "learning_rate": 0.0015, + "loss": 1.3466, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.3553972840309143, + "learning_rate": 0.0015, + "loss": 1.3401, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.32798343896865845, + "learning_rate": 0.0015, + "loss": 1.3504, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.3771567642688751, + "learning_rate": 0.0015, + "loss": 1.3409, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.3586834967136383, + "learning_rate": 0.0015, + "loss": 1.3315, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.32795462012290955, + "learning_rate": 0.0015, + "loss": 1.3485, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.36836713552474976, + "learning_rate": 0.0015, + "loss": 1.3378, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.41258755326271057, + "learning_rate": 0.0015, + "loss": 1.3385, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.3700130879878998, + "learning_rate": 0.0015, + "loss": 1.3392, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.35686036944389343, + "learning_rate": 0.0015, + "loss": 1.3516, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.4040427505970001, + "learning_rate": 0.0015, + "loss": 1.3291, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.3243670165538788, + "learning_rate": 0.0015, + "loss": 1.3382, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.3412677049636841, + "learning_rate": 0.0015, + "loss": 1.3403, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.3531607985496521, + "learning_rate": 0.0015, + "loss": 1.3417, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.3264332115650177, + "learning_rate": 0.0015, + "loss": 1.3345, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.38159504532814026, + "learning_rate": 0.0015, + "loss": 1.3335, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.34057164192199707, + "learning_rate": 0.0015, + "loss": 1.3303, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.4352438151836395, + "learning_rate": 0.0015, + "loss": 1.3452, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.3315022587776184, + "learning_rate": 0.0015, + "loss": 1.3366, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.33373403549194336, + "learning_rate": 0.0015, + "loss": 1.3275, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.33522918820381165, + "learning_rate": 0.0015, + "loss": 1.3095, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.32375264167785645, + "learning_rate": 0.0015, + "loss": 1.3337, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.33494749665260315, + "learning_rate": 0.0015, + "loss": 1.3273, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.34836721420288086, + "learning_rate": 0.0015, + "loss": 1.3428, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.32541200518608093, + "learning_rate": 0.0015, + "loss": 1.3393, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.3796813189983368, + "learning_rate": 0.0015, + "loss": 1.3199, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.3402756154537201, + "learning_rate": 0.0015, + "loss": 1.3315, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.34173041582107544, + "learning_rate": 0.0015, + "loss": 1.3347, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.33362719416618347, + "learning_rate": 0.0015, + "loss": 1.3397, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.345304012298584, + "learning_rate": 0.0015, + "loss": 1.3266, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.36389777064323425, + "learning_rate": 0.0015, + "loss": 1.3297, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.37752678990364075, + "learning_rate": 0.0015, + "loss": 1.3234, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.33431223034858704, + "learning_rate": 0.0015, + "loss": 1.3252, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.4039674699306488, + "learning_rate": 0.0015, + "loss": 1.3316, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.3299323320388794, + "learning_rate": 0.0015, + "loss": 1.3242, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.3494005799293518, + "learning_rate": 0.0015, + "loss": 1.3218, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.3371613025665283, + "learning_rate": 0.0015, + "loss": 1.3285, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.3141007125377655, + "learning_rate": 0.0015, + "loss": 1.3247, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.3440660536289215, + "learning_rate": 0.0015, + "loss": 1.3065, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.3393572270870209, + "learning_rate": 0.0015, + "loss": 1.3221, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.35718950629234314, + "learning_rate": 0.0015, + "loss": 1.3221, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.41040876507759094, + "learning_rate": 0.0015, + "loss": 1.3196, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.35438480973243713, + "learning_rate": 0.0015, + "loss": 1.3131, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.35374361276626587, + "learning_rate": 0.0015, + "loss": 1.3287, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.36048999428749084, + "learning_rate": 0.0015, + "loss": 1.3339, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.3104354441165924, + "learning_rate": 0.0015, + "loss": 1.3235, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.3356916606426239, + "learning_rate": 0.0015, + "loss": 1.3059, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.3305603563785553, + "learning_rate": 0.0015, + "loss": 1.3261, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.35006171464920044, + "learning_rate": 0.0015, + "loss": 1.3231, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.3865632116794586, + "learning_rate": 0.0015, + "loss": 1.3328, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.3990705609321594, + "learning_rate": 0.0015, + "loss": 1.3248, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.32694146037101746, + "learning_rate": 0.0015, + "loss": 1.3134, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.36591851711273193, + "learning_rate": 0.0015, + "loss": 1.3184, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.33968791365623474, + "learning_rate": 0.0015, + "loss": 1.3137, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.4435545802116394, + "learning_rate": 0.0015, + "loss": 1.3159, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.3401292562484741, + "learning_rate": 0.0015, + "loss": 1.326, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.3403913378715515, + "learning_rate": 0.0015, + "loss": 1.3122, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.3234756290912628, + "learning_rate": 0.0015, + "loss": 1.3101, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.33335044980049133, + "learning_rate": 0.0015, + "loss": 1.3139, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.33656445145606995, + "learning_rate": 0.0015, + "loss": 1.3098, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.39324823021888733, + "learning_rate": 0.0015, + "loss": 1.3116, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.3268013894557953, + "learning_rate": 0.0015, + "loss": 1.3187, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.34896695613861084, + "learning_rate": 0.0015, + "loss": 1.3125, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.34891918301582336, + "learning_rate": 0.0015, + "loss": 1.3068, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.48617294430732727, + "learning_rate": 0.0015, + "loss": 1.3129, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.3484684228897095, + "learning_rate": 0.0015, + "loss": 1.3198, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.40289464592933655, + "learning_rate": 0.0015, + "loss": 1.3212, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.3295171856880188, + "learning_rate": 0.0015, + "loss": 1.3115, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.4097455143928528, + "learning_rate": 0.0015, + "loss": 1.2935, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.34945106506347656, + "learning_rate": 0.0015, + "loss": 1.3109, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.368396520614624, + "learning_rate": 0.0015, + "loss": 1.3092, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.39238911867141724, + "learning_rate": 0.0015, + "loss": 1.3136, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.3448035717010498, + "learning_rate": 0.0015, + "loss": 1.3017, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.3241119980812073, + "learning_rate": 0.0015, + "loss": 1.3197, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.3216939866542816, + "learning_rate": 0.0015, + "loss": 1.3042, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.4088415503501892, + "learning_rate": 0.0015, + "loss": 1.3082, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.3490767478942871, + "learning_rate": 0.0015, + "loss": 1.3148, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.34655386209487915, + "learning_rate": 0.0015, + "loss": 1.3002, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.31820032000541687, + "learning_rate": 0.0015, + "loss": 1.2961, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.344877690076828, + "learning_rate": 0.0015, + "loss": 1.3002, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.3609689176082611, + "learning_rate": 0.0015, + "loss": 1.3139, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.36878663301467896, + "learning_rate": 0.0015, + "loss": 1.3012, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.36532774567604065, + "learning_rate": 0.0015, + "loss": 1.3117, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.3955930769443512, + "learning_rate": 0.0015, + "loss": 1.3018, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.331284761428833, + "learning_rate": 0.0015, + "loss": 1.293, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.3166908323764801, + "learning_rate": 0.0015, + "loss": 1.2926, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.4257475733757019, + "learning_rate": 0.0015, + "loss": 1.2952, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.3384430706501007, + "learning_rate": 0.0015, + "loss": 1.3007, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.3213377892971039, + "learning_rate": 0.0015, + "loss": 1.2939, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.33240780234336853, + "learning_rate": 0.0015, + "loss": 1.2996, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.3315512239933014, + "learning_rate": 0.0015, + "loss": 1.2951, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.31810611486434937, + "learning_rate": 0.0015, + "loss": 1.2988, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.3297857940196991, + "learning_rate": 0.0015, + "loss": 1.3009, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.43589648604393005, + "learning_rate": 0.0015, + "loss": 1.2905, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.33363038301467896, + "learning_rate": 0.0015, + "loss": 1.3039, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.3441077172756195, + "learning_rate": 0.0015, + "loss": 1.2928, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.318351149559021, + "learning_rate": 0.0015, + "loss": 1.3061, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.367177814245224, + "learning_rate": 0.0015, + "loss": 1.295, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.3348611891269684, + "learning_rate": 0.0015, + "loss": 1.2949, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.32858869433403015, + "learning_rate": 0.0015, + "loss": 1.2884, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.32444247603416443, + "learning_rate": 0.0015, + "loss": 1.2979, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.34852734208106995, + "learning_rate": 0.0015, + "loss": 1.3015, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.33892375230789185, + "learning_rate": 0.0015, + "loss": 1.2994, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.3241480886936188, + "learning_rate": 0.0015, + "loss": 1.2973, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.4842641353607178, + "learning_rate": 0.0015, + "loss": 1.3032, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.3711332082748413, + "learning_rate": 0.0015, + "loss": 1.2966, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.3361760377883911, + "learning_rate": 0.0015, + "loss": 1.2948, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.3823705315589905, + "learning_rate": 0.0015, + "loss": 1.2865, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.33324065804481506, + "learning_rate": 0.0015, + "loss": 1.2931, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.3460291922092438, + "learning_rate": 0.0015, + "loss": 1.2852, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.3603265583515167, + "learning_rate": 0.0015, + "loss": 1.2882, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.3887493312358856, + "learning_rate": 0.0015, + "loss": 1.2957, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.3834715783596039, + "learning_rate": 0.0015, + "loss": 1.288, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.375080943107605, + "learning_rate": 0.0015, + "loss": 1.2877, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.32457664608955383, + "learning_rate": 0.0015, + "loss": 1.2849, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.3321995437145233, + "learning_rate": 0.0015, + "loss": 1.2886, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.3851778209209442, + "learning_rate": 0.0015, + "loss": 1.2932, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.3327183425426483, + "learning_rate": 0.0015, + "loss": 1.2827, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.315941721200943, + "learning_rate": 0.0015, + "loss": 1.2908, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.3883848488330841, + "learning_rate": 0.0015, + "loss": 1.2886, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.35884931683540344, + "learning_rate": 0.0015, + "loss": 1.2842, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.41224420070648193, + "learning_rate": 0.0015, + "loss": 1.2897, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.34621503949165344, + "learning_rate": 0.0015, + "loss": 1.2967, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.310474157333374, + "learning_rate": 0.0015, + "loss": 1.2854, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.36023226380348206, + "learning_rate": 0.0015, + "loss": 1.2778, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.33774444460868835, + "learning_rate": 0.0015, + "loss": 1.2888, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.3492637574672699, + "learning_rate": 0.0015, + "loss": 1.2899, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.3130638301372528, + "learning_rate": 0.0015, + "loss": 1.2858, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.34976881742477417, + "learning_rate": 0.0015, + "loss": 1.2814, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.3387364149093628, + "learning_rate": 0.0015, + "loss": 1.2885, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.37869808077812195, + "learning_rate": 0.0015, + "loss": 1.2848, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.367775559425354, + "learning_rate": 0.0015, + "loss": 1.2837, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.35876554250717163, + "learning_rate": 0.0015, + "loss": 1.2969, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.33714690804481506, + "learning_rate": 0.0015, + "loss": 1.2769, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.43783363699913025, + "learning_rate": 0.0015, + "loss": 1.2823, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.3282812535762787, + "learning_rate": 0.0015, + "loss": 1.2804, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.314325213432312, + "learning_rate": 0.0015, + "loss": 1.2849, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.3292233347892761, + "learning_rate": 0.0015, + "loss": 1.2959, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.33296987414360046, + "learning_rate": 0.0015, + "loss": 1.281, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.35254812240600586, + "learning_rate": 0.0015, + "loss": 1.2845, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.3571605980396271, + "learning_rate": 0.0015, + "loss": 1.2818, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.3396008312702179, + "learning_rate": 0.0015, + "loss": 1.2875, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.3483985662460327, + "learning_rate": 0.0015, + "loss": 1.275, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.36943528056144714, + "learning_rate": 0.0015, + "loss": 1.2838, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.3278040885925293, + "learning_rate": 0.0015, + "loss": 1.2895, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.34484514594078064, + "learning_rate": 0.0015, + "loss": 1.2773, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.4180009365081787, + "learning_rate": 0.0015, + "loss": 1.2789, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.3349873125553131, + "learning_rate": 0.0015, + "loss": 1.2832, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.32025009393692017, + "learning_rate": 0.0015, + "loss": 1.2658, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.35468825697898865, + "learning_rate": 0.0015, + "loss": 1.2846, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.338003545999527, + "learning_rate": 0.0015, + "loss": 1.2838, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.41642409563064575, + "learning_rate": 0.0015, + "loss": 1.2839, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.3109353184700012, + "learning_rate": 0.0015, + "loss": 1.2749, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.35001808404922485, + "learning_rate": 0.0015, + "loss": 1.2547, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.3922937512397766, + "learning_rate": 0.0015, + "loss": 1.2803, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.3586471974849701, + "learning_rate": 0.0015, + "loss": 1.2677, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.34954914450645447, + "learning_rate": 0.0015, + "loss": 1.2831, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.3975285291671753, + "learning_rate": 0.0015, + "loss": 1.2866, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.33980458974838257, + "learning_rate": 0.0015, + "loss": 1.2652, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.3447878360748291, + "learning_rate": 0.0015, + "loss": 1.2797, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.3304397165775299, + "learning_rate": 0.0015, + "loss": 1.2827, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.33216533064842224, + "learning_rate": 0.0015, + "loss": 1.2829, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.3622681200504303, + "learning_rate": 0.0015, + "loss": 1.2697, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.3417949676513672, + "learning_rate": 0.0015, + "loss": 1.2715, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.30141401290893555, + "learning_rate": 0.0015, + "loss": 1.2669, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.3838488459587097, + "learning_rate": 0.0015, + "loss": 1.2673, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.3914671838283539, + "learning_rate": 0.0015, + "loss": 1.275, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.34327882528305054, + "learning_rate": 0.0015, + "loss": 1.2759, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.41521546244621277, + "learning_rate": 0.0015, + "loss": 1.2751, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.3268080949783325, + "learning_rate": 0.0015, + "loss": 1.2678, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.34257772564888, + "learning_rate": 0.0015, + "loss": 1.2655, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.3677382171154022, + "learning_rate": 0.0015, + "loss": 1.2863, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.3203747868537903, + "learning_rate": 0.0015, + "loss": 1.2714, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.30679064989089966, + "learning_rate": 0.0015, + "loss": 1.2727, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.31779491901397705, + "learning_rate": 0.0015, + "loss": 1.2805, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.4639074504375458, + "learning_rate": 0.0015, + "loss": 1.2648, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.35582196712493896, + "learning_rate": 0.0015, + "loss": 1.2671, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.32101449370384216, + "learning_rate": 0.0015, + "loss": 1.2664, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.31420451402664185, + "learning_rate": 0.0015, + "loss": 1.2863, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.3510427474975586, + "learning_rate": 0.0015, + "loss": 1.2685, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.3493404984474182, + "learning_rate": 0.0015, + "loss": 1.2643, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.33023601770401, + "learning_rate": 0.0015, + "loss": 1.2788, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.33449527621269226, + "learning_rate": 0.0015, + "loss": 1.2657, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.3172394037246704, + "learning_rate": 0.0015, + "loss": 1.258, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.3701837956905365, + "learning_rate": 0.0015, + "loss": 1.2801, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.34527572989463806, + "learning_rate": 0.0015, + "loss": 1.2679, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.3702089786529541, + "learning_rate": 0.0015, + "loss": 1.2764, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.36963438987731934, + "learning_rate": 0.0015, + "loss": 1.2698, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.3635721206665039, + "learning_rate": 0.0015, + "loss": 1.2623, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.3239406943321228, + "learning_rate": 0.0015, + "loss": 1.2705, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.34057164192199707, + "learning_rate": 0.0015, + "loss": 1.2689, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.35554683208465576, + "learning_rate": 0.0015, + "loss": 1.276, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.3254257142543793, + "learning_rate": 0.0015, + "loss": 1.258, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.36377695202827454, + "learning_rate": 0.0015, + "loss": 1.2638, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.3804371654987335, + "learning_rate": 0.0015, + "loss": 1.2603, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.34835729002952576, + "learning_rate": 0.0015, + "loss": 1.2637, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.333615779876709, + "learning_rate": 0.0015, + "loss": 1.273, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.3416522741317749, + "learning_rate": 0.0015, + "loss": 1.2677, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.33487850427627563, + "learning_rate": 0.0015, + "loss": 1.2629, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.32856062054634094, + "learning_rate": 0.0015, + "loss": 1.2473, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.32477837800979614, + "learning_rate": 0.0015, + "loss": 1.2735, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.31196415424346924, + "learning_rate": 0.0015, + "loss": 1.2604, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.3626239001750946, + "learning_rate": 0.0015, + "loss": 1.2754, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.347139835357666, + "learning_rate": 0.0015, + "loss": 1.2616, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.332815557718277, + "learning_rate": 0.0015, + "loss": 1.2756, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.32809755206108093, + "learning_rate": 0.0015, + "loss": 1.2651, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.35669374465942383, + "learning_rate": 0.0015, + "loss": 1.2478, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.3932604193687439, + "learning_rate": 0.0015, + "loss": 1.2627, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.3809468746185303, + "learning_rate": 0.0015, + "loss": 1.2614, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.3322828710079193, + "learning_rate": 0.0015, + "loss": 1.2611, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.30540966987609863, + "learning_rate": 0.0015, + "loss": 1.2751, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.3536911904811859, + "learning_rate": 0.0015, + "loss": 1.2687, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.37020963430404663, + "learning_rate": 0.0015, + "loss": 1.2653, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.3286992013454437, + "learning_rate": 0.0015, + "loss": 1.2642, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.4192366600036621, + "learning_rate": 0.0015, + "loss": 1.2745, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.32625076174736023, + "learning_rate": 0.0015, + "loss": 1.2572, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.3306078314781189, + "learning_rate": 0.0015, + "loss": 1.2675, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.33644092082977295, + "learning_rate": 0.0015, + "loss": 1.2556, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.3233281373977661, + "learning_rate": 0.0015, + "loss": 1.2636, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.34870925545692444, + "learning_rate": 0.0015, + "loss": 1.2567, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.3786031901836395, + "learning_rate": 0.0015, + "loss": 1.2648, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.3718455731868744, + "learning_rate": 0.0015, + "loss": 1.2562, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.40321359038352966, + "learning_rate": 0.0015, + "loss": 1.263, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.44219493865966797, + "learning_rate": 0.0015, + "loss": 1.2704, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.326164186000824, + "learning_rate": 0.0015, + "loss": 1.2592, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.34893083572387695, + "learning_rate": 0.0015, + "loss": 1.2584, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.35736313462257385, + "learning_rate": 0.0015, + "loss": 1.2661, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.39100855588912964, + "learning_rate": 0.0015, + "loss": 1.2524, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.32789599895477295, + "learning_rate": 0.0015, + "loss": 1.2548, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.33129817247390747, + "learning_rate": 0.0015, + "loss": 1.2625, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.32572874426841736, + "learning_rate": 0.0015, + "loss": 1.2548, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.3237755000591278, + "learning_rate": 0.0015, + "loss": 1.2515, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.355275958776474, + "learning_rate": 0.0015, + "loss": 1.257, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.3404082953929901, + "learning_rate": 0.0015, + "loss": 1.2593, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.3304276466369629, + "learning_rate": 0.0015, + "loss": 1.2517, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.33989864587783813, + "learning_rate": 0.0015, + "loss": 1.2522, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.341671884059906, + "learning_rate": 0.0015, + "loss": 1.2519, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.3551168143749237, + "learning_rate": 0.0015, + "loss": 1.2572, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.346642404794693, + "learning_rate": 0.0015, + "loss": 1.2501, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.35103023052215576, + "learning_rate": 0.0015, + "loss": 1.2483, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.3185129761695862, + "learning_rate": 0.0015, + "loss": 1.2482, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.3302600383758545, + "learning_rate": 0.0015, + "loss": 1.2504, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.32768237590789795, + "learning_rate": 0.0015, + "loss": 1.2538, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.35727646946907043, + "learning_rate": 0.0015, + "loss": 1.254, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.367353230714798, + "learning_rate": 0.0015, + "loss": 1.2548, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.3531593978404999, + "learning_rate": 0.0015, + "loss": 1.2631, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.32804006338119507, + "learning_rate": 0.0015, + "loss": 1.251, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.31846901774406433, + "learning_rate": 0.0015, + "loss": 1.2583, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.36841240525245667, + "learning_rate": 0.0015, + "loss": 1.253, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.46265220642089844, + "learning_rate": 0.0015, + "loss": 1.2458, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.32472917437553406, + "learning_rate": 0.0015, + "loss": 1.2434, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.33125030994415283, + "learning_rate": 0.0015, + "loss": 1.2626, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.3427261710166931, + "learning_rate": 0.0015, + "loss": 1.265, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.32336708903312683, + "learning_rate": 0.0015, + "loss": 1.2505, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.31386277079582214, + "learning_rate": 0.0015, + "loss": 1.2485, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.32289138436317444, + "learning_rate": 0.0015, + "loss": 1.2529, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.33690229058265686, + "learning_rate": 0.0015, + "loss": 1.2475, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.36602291464805603, + "learning_rate": 0.0015, + "loss": 1.2519, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.35524094104766846, + "learning_rate": 0.0015, + "loss": 1.2586, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.3474019467830658, + "learning_rate": 0.0015, + "loss": 1.2555, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.3641188144683838, + "learning_rate": 0.0015, + "loss": 1.2621, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.325612872838974, + "learning_rate": 0.0015, + "loss": 1.2555, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.4651053845882416, + "learning_rate": 0.0015, + "loss": 1.2517, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.32629069685935974, + "learning_rate": 0.0015, + "loss": 1.2581, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.3221701681613922, + "learning_rate": 0.0015, + "loss": 1.2586, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.33370065689086914, + "learning_rate": 0.0015, + "loss": 1.2393, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.35515671968460083, + "learning_rate": 0.0015, + "loss": 1.2423, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.36871209740638733, + "learning_rate": 0.0015, + "loss": 1.249, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.3117012083530426, + "learning_rate": 0.0015, + "loss": 1.2526, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.3552721440792084, + "learning_rate": 0.0015, + "loss": 1.2502, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.33427029848098755, + "learning_rate": 0.0015, + "loss": 1.2486, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.34898853302001953, + "learning_rate": 0.0015, + "loss": 1.2244, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.33162492513656616, + "learning_rate": 0.0015, + "loss": 1.2322, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.36996203660964966, + "learning_rate": 0.0015, + "loss": 1.2348, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.30883604288101196, + "learning_rate": 0.0015, + "loss": 1.2451, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.3355100750923157, + "learning_rate": 0.0015, + "loss": 1.2441, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.3257335424423218, + "learning_rate": 0.0015, + "loss": 1.2429, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.349295049905777, + "learning_rate": 0.0015, + "loss": 1.2509, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.34802842140197754, + "learning_rate": 0.0015, + "loss": 1.2627, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.36794769763946533, + "learning_rate": 0.0015, + "loss": 1.2449, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.33518537878990173, + "learning_rate": 0.0015, + "loss": 1.2575, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.3931816518306732, + "learning_rate": 0.0015, + "loss": 1.2623, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.30917906761169434, + "learning_rate": 0.0015, + "loss": 1.2488, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.3136212229728699, + "learning_rate": 0.0015, + "loss": 1.2413, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.32720673084259033, + "learning_rate": 0.0015, + "loss": 1.2528, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.3788354694843292, + "learning_rate": 0.0015, + "loss": 1.2428, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.358934223651886, + "learning_rate": 0.0015, + "loss": 1.2455, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.387750506401062, + "learning_rate": 0.0015, + "loss": 1.2411, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.33726221323013306, + "learning_rate": 0.0015, + "loss": 1.2346, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.43859201669692993, + "learning_rate": 0.0015, + "loss": 1.2397, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.36601218581199646, + "learning_rate": 0.0015, + "loss": 1.2478, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.3264256715774536, + "learning_rate": 0.0015, + "loss": 1.2368, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.3220791220664978, + "learning_rate": 0.0015, + "loss": 1.248, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.35854560136795044, + "learning_rate": 0.0015, + "loss": 1.2464, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.3482915759086609, + "learning_rate": 0.0015, + "loss": 1.2305, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.39330121874809265, + "learning_rate": 0.0015, + "loss": 1.2477, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.36969903111457825, + "learning_rate": 0.0015, + "loss": 1.2547, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.31875529885292053, + "learning_rate": 0.0015, + "loss": 1.2408, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.33607614040374756, + "learning_rate": 0.0015, + "loss": 1.2444, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.3412076532840729, + "learning_rate": 0.0015, + "loss": 1.2279, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.35197487473487854, + "learning_rate": 0.0015, + "loss": 1.2474, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.3313758373260498, + "learning_rate": 0.0015, + "loss": 1.2353, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.38707274198532104, + "learning_rate": 0.0015, + "loss": 1.2499, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.36934447288513184, + "learning_rate": 0.0015, + "loss": 1.2423, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.33539143204689026, + "learning_rate": 0.0015, + "loss": 1.2413, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.3044580817222595, + "learning_rate": 0.0015, + "loss": 1.2364, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.34725913405418396, + "learning_rate": 0.0015, + "loss": 1.2338, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.32453039288520813, + "learning_rate": 0.0015, + "loss": 1.2468, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.31439530849456787, + "learning_rate": 0.0015, + "loss": 1.2395, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.3386889696121216, + "learning_rate": 0.0015, + "loss": 1.2394, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.39667123556137085, + "learning_rate": 0.0015, + "loss": 1.2369, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.35717153549194336, + "learning_rate": 0.0015, + "loss": 1.2481, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.4444461464881897, + "learning_rate": 0.0015, + "loss": 1.2463, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.4026486277580261, + "learning_rate": 0.0014834368975312174, + "loss": 1.2226, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.3683646321296692, + "learning_rate": 0.0014629899726345957, + "loss": 1.2439, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.3451366722583771, + "learning_rate": 0.0014428248775471316, + "loss": 1.2444, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.38098204135894775, + "learning_rate": 0.00142293772767289, + "loss": 1.2358, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.3486824631690979, + "learning_rate": 0.001403324691959192, + "loss": 1.2296, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.33603349328041077, + "learning_rate": 0.0013839819921586025, + "loss": 1.239, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.344035267829895, + "learning_rate": 0.0013649059021010894, + "loss": 1.2299, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.3314090073108673, + "learning_rate": 0.0013460927469762154, + "loss": 1.2303, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.3103720545768738, + "learning_rate": 0.0013275389026252255, + "loss": 1.2322, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.32174864411354065, + "learning_rate": 0.0013092407948428887, + "loss": 1.2229, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.3075444996356964, + "learning_rate": 0.001291194898688966, + "loss": 1.2325, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.32746440172195435, + "learning_rate": 0.001273397737809166, + "loss": 1.2281, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.3113197684288025, + "learning_rate": 0.001255845883765463, + "loss": 1.2253, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.3303135633468628, + "learning_rate": 0.001238535955375642, + "loss": 1.2178, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.30665114521980286, + "learning_rate": 0.0012214646180619506, + "loss": 1.2162, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.35169586539268494, + "learning_rate": 0.001204628583208727, + "loss": 1.2104, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.3337899446487427, + "learning_rate": 0.0011880246075288827, + "loss": 1.2201, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.30616530776023865, + "learning_rate": 0.001171649492439115, + "loss": 1.2112, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.3198193311691284, + "learning_rate": 0.0011555000834437364, + "loss": 1.2179, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.3075357675552368, + "learning_rate": 0.0011395732695269908, + "loss": 1.2106, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.3103102147579193, + "learning_rate": 0.0011238659825537505, + "loss": 1.1934, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.35869720578193665, + "learning_rate": 0.0011083751966784717, + "loss": 1.1977, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.33826592564582825, + "learning_rate": 0.0010930979277622953, + "loss": 1.2129, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.31560054421424866, + "learning_rate": 0.0010780312327981854, + "loss": 1.2108, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.31668636202812195, + "learning_rate": 0.0010631722093439888, + "loss": 1.2027, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.32392340898513794, + "learning_rate": 0.00104851799496331, + "loss": 1.1943, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.35339978337287903, + "learning_rate": 0.0010340657666740914, + "loss": 1.2038, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.35348406434059143, + "learning_rate": 0.0010198127404047975, + "loss": 1.184, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.33255070447921753, + "learning_rate": 0.0010057561704580897, + "loss": 1.1903, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.3151698410511017, + "learning_rate": 0.0009918933489818985, + "loss": 1.1994, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.3220020830631256, + "learning_rate": 0.0009782216054477827, + "loss": 1.1953, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.33605191111564636, + "learning_rate": 0.0009647383061364801, + "loss": 1.1995, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.3029930889606476, + "learning_rate": 0.0009514408536305495, + "loss": 1.1926, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.29145169258117676, + "learning_rate": 0.0009383266863140042, + "loss": 1.2068, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.35904476046562195, + "learning_rate": 0.000925393277878844, + "loss": 1.2043, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.3455471098423004, + "learning_rate": 0.0009126381368383879, + "loss": 1.1899, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.3116503357887268, + "learning_rate": 0.0009000588060473156, + "loss": 1.1791, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.31568071246147156, + "learning_rate": 0.0008876528622283235, + "loss": 1.1899, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.35425931215286255, + "learning_rate": 0.0008754179155053053, + "loss": 1.1867, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.3980303704738617, + "learning_rate": 0.0008633516089429683, + "loss": 1.1843, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.36593008041381836, + "learning_rate": 0.0008514516180927928, + "loss": 1.1812, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.32517319917678833, + "learning_rate": 0.0008397156505452524, + "loss": 1.1772, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.325745552778244, + "learning_rate": 0.0008281414454882051, + "loss": 1.1787, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.2857639193534851, + "learning_rate": 0.0008167267732713704, + "loss": 1.1855, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.3474614918231964, + "learning_rate": 0.0008054694349768117, + "loss": 1.1699, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.3178200125694275, + "learning_rate": 0.0007943672619953359, + "loss": 1.1795, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.3442220389842987, + "learning_rate": 0.0007834181156087356, + "loss": 1.1726, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.32902151346206665, + "learning_rate": 0.0007726198865777852, + "loss": 1.1736, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.3253075182437897, + "learning_rate": 0.0007619704947359191, + "loss": 1.1703, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.31004899740219116, + "learning_rate": 0.0007514678885885087, + "loss": 1.1713, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.32050368189811707, + "learning_rate": 0.0007411100449176633, + "loss": 1.1705, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.296805202960968, + "learning_rate": 0.0007308949683924791, + "loss": 1.1722, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.3058995008468628, + "learning_rate": 0.000720820691184658, + "loss": 1.1659, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.31293025612831116, + "learning_rate": 0.0007108852725894269, + "loss": 1.1655, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.2998400926589966, + "learning_rate": 0.000701086798651681, + "loss": 1.1686, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.30473223328590393, + "learning_rate": 0.0006914233817972798, + "loss": 1.158, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.32161760330200195, + "learning_rate": 0.0006818931604694261, + "loss": 1.1622, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.30090296268463135, + "learning_rate": 0.0006724942987700563, + "loss": 1.1657, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.32431352138519287, + "learning_rate": 0.0006632249861061732, + "loss": 1.1663, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.3277837038040161, + "learning_rate": 0.0006540834368410549, + "loss": 1.1631, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.3163105249404907, + "learning_rate": 0.0006450678899502701, + "loss": 1.1637, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.3263886868953705, + "learning_rate": 0.0006361766086824345, + "loss": 1.1617, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.3312484622001648, + "learning_rate": 0.000627407880224645, + "loss": 1.1657, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.30463480949401855, + "learning_rate": 0.0006187600153725225, + "loss": 1.1532, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.3175445795059204, + "learning_rate": 0.0006102313482048055, + "loss": 1.1563, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.32396236062049866, + "learning_rate": 0.0006018202357624274, + "loss": 1.1583, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.29472267627716064, + "learning_rate": 0.0005935250577320168, + "loss": 1.1567, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.33541539311408997, + "learning_rate": 0.0005853442161337618, + "loss": 1.144, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.29096779227256775, + "learning_rate": 0.0005772761350135759, + "loss": 1.154, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.31761106848716736, + "learning_rate": 0.0005693192601395058, + "loss": 1.1484, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.3081628084182739, + "learning_rate": 0.000561472058702326, + "loss": 1.1436, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.30715107917785645, + "learning_rate": 0.000553733019020258, + "loss": 1.1543, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.30482837557792664, + "learning_rate": 0.0005461006502477612, + "loss": 1.1422, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.3131371736526489, + "learning_rate": 0.0005385734820883369, + "loss": 1.1417, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.30512696504592896, + "learning_rate": 0.0005311500645112907, + "loss": 1.1661, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.3093601167201996, + "learning_rate": 0.0005238289674723993, + "loss": 1.1446, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.3076038658618927, + "learning_rate": 0.0005166087806384274, + "loss": 1.1486, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.305584192276001, + "learning_rate": 0.0005094881131154418, + "loss": 1.1522, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.31308478116989136, + "learning_rate": 0.0005024655931808696, + "loss": 1.1483, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.3094158470630646, + "learning_rate": 0.0004955398680192508, + "loss": 1.1419, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.29979753494262695, + "learning_rate": 0.000488709603461632, + "loss": 1.1373, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.2946663200855255, + "learning_rate": 0.000481973483728553, + "loss": 1.1424, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.30663517117500305, + "learning_rate": 0.0004753302111765748, + "loss": 1.14, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.3226993680000305, + "learning_rate": 0.0004687785060483032, + "loss": 1.1494, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.2898540198802948, + "learning_rate": 0.0004623171062258558, + "loss": 1.123, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.3063409626483917, + "learning_rate": 0.0004559447669877288, + "loss": 1.1363, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.2905513346195221, + "learning_rate": 0.00044966026076901413, + "loss": 1.1411, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.34559938311576843, + "learning_rate": 0.00044346237692492177, + "loss": 1.1461, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.3467509150505066, + "learning_rate": 0.0004373499214975615, + "loss": 1.1328, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.3333624601364136, + "learning_rate": 0.0004313217169859396, + "loss": 1.1366, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.3122444748878479, + "learning_rate": 0.0004253766021191256, + "loss": 1.1423, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.3039610683917999, + "learning_rate": 0.00041951343163254497, + "loss": 1.1379, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.301398903131485, + "learning_rate": 0.00041373107604735626, + "loss": 1.1351, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.3017662465572357, + "learning_rate": 0.0004080284214528687, + "loss": 1.1331, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.3178175687789917, + "learning_rate": 0.0004024043692919589, + "loss": 1.1421, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.34835416078567505, + "learning_rate": 0.0003968578361494449, + "loss": 1.1341, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.32286974787712097, + "learning_rate": 0.000391387753543378, + "loss": 1.1443, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.3165818750858307, + "learning_rate": 0.00038599306771921023, + "loss": 1.1267, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.32807934284210205, + "learning_rate": 0.0003806727394468004, + "loss": 1.1253, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.30154094099998474, + "learning_rate": 0.0003754257438202162, + "loss": 1.1331, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.3244779407978058, + "learning_rate": 0.0003702510700602974, + "loss": 1.1407, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.35364311933517456, + "learning_rate": 0.0003651477213199393, + "loss": 1.1187, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.31307119131088257, + "learning_rate": 0.000360114714492061, + "loss": 1.1218, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.3015627861022949, + "learning_rate": 0.0003551510800202195, + "loss": 1.1251, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.37355753779411316, + "learning_rate": 0.0003502558617118353, + "loss": 1.1302, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.30779457092285156, + "learning_rate": 0.0003454281165539914, + "loss": 1.1422, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.31253111362457275, + "learning_rate": 0.00034066691453177176, + "loss": 1.139, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.29553452134132385, + "learning_rate": 0.0003359713384491037, + "loss": 1.1353, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.3091430366039276, + "learning_rate": 0.00033134048375206944, + "loss": 1.1296, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.35152044892311096, + "learning_rate": 0.0003267734583546536, + "loss": 1.1285, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.30567291378974915, + "learning_rate": 0.00032226938246689157, + "loss": 1.1239, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.3052709400653839, + "learning_rate": 0.0003178273884253874, + "loss": 1.1263, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.3217529058456421, + "learning_rate": 0.0003134466205261674, + "loss": 1.1366, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.29919829964637756, + "learning_rate": 0.0003091262348598378, + "loss": 1.1403, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.29513537883758545, + "learning_rate": 0.0003048653991490141, + "loss": 1.1203, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.3004360795021057, + "learning_rate": 0.00030066329258799187, + "loss": 1.1179, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.3202657401561737, + "learning_rate": 0.0002965191056846266, + "loss": 1.1212, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.3142313063144684, + "learning_rate": 0.000292432040104394, + "loss": 1.1143, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.3124704957008362, + "learning_rate": 0.00028840130851659853, + "loss": 1.1158, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.3039351999759674, + "learning_rate": 0.0002844261344427028, + "loss": 1.1249, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.3144666254520416, + "learning_rate": 0.0002805057521067471, + "loss": 1.1244, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.29907071590423584, + "learning_rate": 0.00027663940628783017, + "loss": 1.1107, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.30688223242759705, + "learning_rate": 0.00027282635217462393, + "loss": 1.1208, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.31314346194267273, + "learning_rate": 0.0002690658552218937, + "loss": 1.1282, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.2993510663509369, + "learning_rate": 0.00026535719100899516, + "loss": 1.1084, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.31194618344306946, + "learning_rate": 0.00026169964510032245, + "loss": 1.1114, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.2804999053478241, + "learning_rate": 0.00025809251290767984, + "loss": 1.1028, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.29775840044021606, + "learning_rate": 0.00025453509955454957, + "loss": 1.1051, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.3113470673561096, + "learning_rate": 0.00025102671974223175, + "loss": 1.1086, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.30034735798835754, + "learning_rate": 0.00024756669761782815, + "loss": 1.1175, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.31758636236190796, + "learning_rate": 0.0002441543666440464, + "loss": 1.1079, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.29403379559516907, + "learning_rate": 0.00024078906947079878, + "loss": 1.1159, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.30131855607032776, + "learning_rate": 0.00023747015780857005, + "loss": 1.1213, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.3080956041812897, + "learning_rate": 0.00023419699230353144, + "loss": 1.1184, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.3078226149082184, + "learning_rate": 0.00023096894241437586, + "loss": 1.1216, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.2988618016242981, + "learning_rate": 0.00022778538629085056, + "loss": 1.1068, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.2947309613227844, + "learning_rate": 0.00022464571065396427, + "loss": 1.1092, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.330911785364151, + "learning_rate": 0.00022154931067784521, + "loss": 1.104, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.3285962641239166, + "learning_rate": 0.00021849558987322782, + "loss": 1.1035, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.3116293251514435, + "learning_rate": 0.0002154839599725452, + "loss": 1.1075, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.3200410008430481, + "learning_rate": 0.00021251384081660544, + "loss": 1.1139, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.2930144667625427, + "learning_rate": 0.0002095846602428303, + "loss": 1.1111, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.29921388626098633, + "learning_rate": 0.00020669585397503358, + "loss": 1.1022, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.2902184724807739, + "learning_rate": 0.0002038468655147195, + "loss": 1.1076, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.2966500520706177, + "learning_rate": 0.00020103714603387894, + "loss": 1.1203, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.28531065583229065, + "learning_rate": 0.00019826615426926338, + "loss": 1.097, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.2880052328109741, + "learning_rate": 0.00019553335641811625, + "loss": 1.1186, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.3241575360298157, + "learning_rate": 0.0001928382260353415, + "loss": 1.1088, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.29371729493141174, + "learning_rate": 0.00019018024393208902, + "loss": 1.1235, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.2925833761692047, + "learning_rate": 0.00018755889807573872, + "loss": 1.101, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.29287418723106384, + "learning_rate": 0.00018497368349126262, + "loss": 1.1088, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.3102490305900574, + "learning_rate": 0.00018242410216394648, + "loss": 1.1191, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.3129653334617615, + "learning_rate": 0.0001799096629434529, + "loss": 1.0939, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.3090495467185974, + "learning_rate": 0.00017742988144920578, + "loss": 1.1038, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.2979944050312042, + "learning_rate": 0.00017498427997707976, + "loss": 1.1031, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.2943827211856842, + "learning_rate": 0.00017257238740737548, + "loss": 1.1057, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.3144172132015228, + "learning_rate": 0.00017019373911406307, + "loss": 1.1136, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.30515575408935547, + "learning_rate": 0.000167847876875277, + "loss": 1.1112, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.30309703946113586, + "learning_rate": 0.00016553434878504428, + "loss": 1.0985, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.3171931505203247, + "learning_rate": 0.00016325270916622947, + "loss": 1.0964, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.30823394656181335, + "learning_rate": 0.00016100251848467966, + "loss": 1.1039, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.30423763394355774, + "learning_rate": 0.0001587833432645528, + "loss": 1.0993, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.31251442432403564, + "learning_rate": 0.00015659475600481292, + "loss": 1.1133, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.28880876302719116, + "learning_rate": 0.00015443633509687688, + "loss": 1.1037, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.2903044819831848, + "learning_rate": 0.00015230766474339536, + "loss": 1.1018, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.29660487174987793, + "learning_rate": 0.00015020833487815416, + "loss": 1.1117, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.30001166462898254, + "learning_rate": 0.0001481379410870792, + "loss": 1.1012, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.29828891158103943, + "learning_rate": 0.00014609608453033013, + "loss": 1.0863, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.29176872968673706, + "learning_rate": 0.00014408237186546807, + "loss": 1.0958, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.2964164614677429, + "learning_rate": 0.00014209641517168273, + "loss": 1.0823, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.33330321311950684, + "learning_rate": 0.00014013783187506265, + "loss": 1.0987, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.30649515986442566, + "learning_rate": 0.00013820624467489697, + "loss": 1.1167, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.29155051708221436, + "learning_rate": 0.00013630128147099213, + "loss": 1.1118, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.3033908009529114, + "learning_rate": 0.00013442257529199068, + "loss": 1.0933, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.30457115173339844, + "learning_rate": 0.00013256976422467803, + "loss": 1.1022, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.29595619440078735, + "learning_rate": 0.00013074249134426366, + "loss": 1.0985, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.3050054609775543, + "learning_rate": 0.0001289404046456233, + "loss": 1.1071, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.2907356023788452, + "learning_rate": 0.0001271631569754887, + "loss": 1.1019, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.32138654589653015, + "learning_rate": 0.0001254104059655723, + "loss": 1.1048, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.29230400919914246, + "learning_rate": 0.00012368181396661337, + "loss": 1.0924, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.3035050928592682, + "learning_rate": 0.00012197704798333364, + "loss": 1.0923, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.294167160987854, + "learning_rate": 0.00012029577961028894, + "loss": 1.092, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.3287489414215088, + "learning_rate": 0.00011863768496860542, + "loss": 1.1029, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.2865371108055115, + "learning_rate": 0.00011700244464358777, + "loss": 1.0994, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.293733149766922, + "learning_rate": 0.00011538974362318715, + "loss": 1.102, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.2847878038883209, + "learning_rate": 0.00011379927123731737, + "loss": 1.0974, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.29861894249916077, + "learning_rate": 0.0001122307210980077, + "loss": 1.0946, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.28597918152809143, + "learning_rate": 0.00011068379104038026, + "loss": 1.1079, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.3035583198070526, + "learning_rate": 0.00010915818306444116, + "loss": 1.0885, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.2900126278400421, + "learning_rate": 0.00010765360327767384, + "loss": 1.0977, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.2991573214530945, + "learning_rate": 0.00010616976183842376, + "loss": 1.1008, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.29756081104278564, + "learning_rate": 0.00010470637290006365, + "loss": 1.1022, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.2888975441455841, + "learning_rate": 0.00010326315455592764, + "loss": 1.093, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.28867655992507935, + "learning_rate": 0.0001018398287850053, + "loss": 1.089, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.3160809278488159, + "learning_rate": 0.00010043612139838357, + "loss": 1.1028, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.29105138778686523, + "learning_rate": 9.905176198642719e-05, + "loss": 1.0934, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.3039015829563141, + "learning_rate": 9.76864838666871e-05, + "loss": 1.095, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.3249395489692688, + "learning_rate": 9.634002403252676e-05, + "loss": 1.0907, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.30835098028182983, + "learning_rate": 9.501212310245681e-05, + "loss": 1.0901, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.3204496502876282, + "learning_rate": 9.370252527016777e-05, + "loss": 1.0998, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.29987505078315735, + "learning_rate": 9.241097825525163e-05, + "loss": 1.0899, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.30191531777381897, + "learning_rate": 9.113723325460276e-05, + "loss": 1.0984, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.305660218000412, + "learning_rate": 8.988104489448849e-05, + "loss": 1.0915, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.30820146203041077, + "learning_rate": 8.864217118328042e-05, + "loss": 1.1043, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.3056180775165558, + "learning_rate": 8.742037346483729e-05, + "loss": 1.0953, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.3009982109069824, + "learning_rate": 8.62154163725303e-05, + "loss": 1.1028, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.2943590581417084, + "learning_rate": 8.502706778390219e-05, + "loss": 1.098, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.2908317446708679, + "learning_rate": 8.38550987759513e-05, + "loss": 1.0953, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.2977442145347595, + "learning_rate": 8.269928358103191e-05, + "loss": 1.1083, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.30347803235054016, + "learning_rate": 8.155939954336243e-05, + "loss": 1.0993, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.2959662675857544, + "learning_rate": 8.043522707613312e-05, + "loss": 1.0974, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.2978464365005493, + "learning_rate": 7.932654961920486e-05, + "loss": 1.081, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.30118417739868164, + "learning_rate": 7.823315359739135e-05, + "loss": 1.0832, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.29306381940841675, + "learning_rate": 7.715482837931577e-05, + "loss": 1.1044, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.30623766779899597, + "learning_rate": 7.6091366236835e-05, + "loss": 1.0839, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.2869498133659363, + "learning_rate": 7.504256230502289e-05, + "loss": 1.1019, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.2981465756893158, + "learning_rate": 7.400821454270524e-05, + "loss": 1.0953, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.29135996103286743, + "learning_rate": 7.29881236935386e-05, + "loss": 1.0858, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.2871904969215393, + "learning_rate": 7.198209324762562e-05, + "loss": 1.0872, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.2934054732322693, + "learning_rate": 7.098992940365946e-05, + "loss": 1.0869, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.2883404791355133, + "learning_rate": 7.001144103159e-05, + "loss": 1.0924, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.2962549924850464, + "learning_rate": 6.904643963580461e-05, + "loss": 1.0977, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.3258002996444702, + "learning_rate": 6.809473931881644e-05, + "loss": 1.0899, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.2951005697250366, + "learning_rate": 6.71561567454532e-05, + "loss": 1.0893, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.29630401730537415, + "learning_rate": 6.623051110753948e-05, + "loss": 1.106, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.2885693311691284, + "learning_rate": 6.531762408906607e-05, + "loss": 1.096, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.2941160500049591, + "learning_rate": 6.441731983183912e-05, + "loss": 1.0941, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.30166760087013245, + "learning_rate": 6.352942490160292e-05, + "loss": 1.0857, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.3127710521221161, + "learning_rate": 6.265376825462966e-05, + "loss": 1.0941, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.29389771819114685, + "learning_rate": 6.179018120476945e-05, + "loss": 1.0892, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.29795289039611816, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.091, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.2943289577960968, + "learning_rate": 6.009855274515339e-05, + "loss": 1.0861, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.2887933850288391, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.0902, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.2993631660938263, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.0819, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.28679993748664856, + "learning_rate": 5.764754687033678e-05, + "loss": 1.0915, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.29990583658218384, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.0873, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.29532426595687866, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.1018, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.2920057475566864, + "learning_rate": 5.529650063720842e-05, + "loss": 1.0974, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.30257919430732727, + "learning_rate": 5.453432234876445e-05, + "loss": 1.0905, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.2978675663471222, + "learning_rate": 5.37826495305886e-05, + "loss": 1.0806, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.3190942406654358, + "learning_rate": 5.304133738072674e-05, + "loss": 1.0998, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.2860528826713562, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.0911, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.3035094738006592, + "learning_rate": 5.158922582999368e-05, + "loss": 1.094, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.2932252287864685, + "learning_rate": 5.087814669492819e-05, + "loss": 1.0858, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.3255833685398102, + "learning_rate": 5.017686870590028e-05, + "loss": 1.0896, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.2977413237094879, + "learning_rate": 4.948525676899577e-05, + "loss": 1.0875, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.31040334701538086, + "learning_rate": 4.880317765236493e-05, + "loss": 1.0948, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.31936654448509216, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.0764, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.3022034764289856, + "learning_rate": 4.746709410920699e-05, + "loss": 1.0828, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.29438066482543945, + "learning_rate": 4.681283230007507e-05, + "loss": 1.0821, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.2978823184967041, + "learning_rate": 4.616758849642509e-05, + "loss": 1.094, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.28286972641944885, + "learning_rate": 4.553123839874615e-05, + "loss": 1.0975, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.2875301241874695, + "learning_rate": 4.490365942080736e-05, + "loss": 1.0921, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.28754597902297974, + "learning_rate": 4.428473066604285e-05, + "loss": 1.0898, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.3019983172416687, + "learning_rate": 4.367433290426233e-05, + "loss": 1.0886, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.3068964183330536, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.0845, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.3060677647590637, + "learning_rate": 4.247866163327575e-05, + "loss": 1.0976, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.30172476172447205, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.0873, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.30481332540512085, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.0837, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.2977677583694458, + "learning_rate": 4.074624971216005e-05, + "loss": 1.0734, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.28608590364456177, + "learning_rate": 4.018462453681889e-05, + "loss": 1.0808, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.2980504035949707, + "learning_rate": 3.963074051164014e-05, + "loss": 1.0892, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.30341869592666626, + "learning_rate": 3.908449093662446e-05, + "loss": 1.0849, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.2970033884048462, + "learning_rate": 3.854577058246998e-05, + "loss": 1.0791, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.2947596609592438, + "learning_rate": 3.801447567030094e-05, + "loss": 1.0989, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.28587204217910767, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.0862, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.30299025774002075, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.1024, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.28645822405815125, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.0886, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.3179738223552704, + "learning_rate": 3.596152451701616e-05, + "loss": 1.0871, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.3064330518245697, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.0874, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.2956417500972748, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.0883, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.29473647475242615, + "learning_rate": 3.449490171442838e-05, + "loss": 1.0936, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.29144287109375, + "learning_rate": 3.401944187798702e-05, + "loss": 1.091, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.2938981354236603, + "learning_rate": 3.355053553336137e-05, + "loss": 1.0807, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.3047492504119873, + "learning_rate": 3.308809235061882e-05, + "loss": 1.0822, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.29885032773017883, + "learning_rate": 3.263202324488772e-05, + "loss": 1.0891, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.28590863943099976, + "learning_rate": 3.218224035919609e-05, + "loss": 1.0853, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.29893019795417786, + "learning_rate": 3.173865704754688e-05, + "loss": 1.084, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.28867775201797485, + "learning_rate": 3.130118785822657e-05, + "loss": 1.0881, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.30242010951042175, + "learning_rate": 3.08697485173437e-05, + "loss": 1.0897, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.30233702063560486, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.0869, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.30981701612472534, + "learning_rate": 3.002462807725185e-05, + "loss": 1.0843, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.3049086630344391, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.0892, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.2946826219558716, + "learning_rate": 2.920264448124087e-05, + "loss": 1.0872, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.30069807171821594, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.081, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.28993991017341614, + "learning_rate": 2.84031643124288e-05, + "loss": 1.0833, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.3197329342365265, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.1017, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.30109071731567383, + "learning_rate": 2.762557149497405e-05, + "loss": 1.0805, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.2971667945384979, + "learning_rate": 2.724479494389592e-05, + "loss": 1.0882, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.2833009362220764, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.0903, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.2999628186225891, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.0894, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.29329007863998413, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.0934, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.3185789883136749, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.0925, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.3005650043487549, + "learning_rate": 2.541820662891541e-05, + "loss": 1.0723, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.2987126410007477, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.082, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.28658491373062134, + "learning_rate": 2.472233293365335e-05, + "loss": 1.0943, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.2930833697319031, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.0793, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.31947436928749084, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.0969, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.3002609312534332, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.0834, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.29156044125556946, + "learning_rate": 2.338721674401494e-05, + "loss": 1.091, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.30455270409584045, + "learning_rate": 2.30648594768459e-05, + "loss": 1.0899, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.2864668667316437, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.0872, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.3029510974884033, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.083, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.2998829483985901, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.0851, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.2927697002887726, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.0858, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.29847201704978943, + "learning_rate": 2.151850895764285e-05, + "loss": 1.0816, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.291204035282135, + "learning_rate": 2.12219089895037e-05, + "loss": 1.0765, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.3079606592655182, + "learning_rate": 2.092939720151092e-05, + "loss": 1.0721, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.3021531403064728, + "learning_rate": 2.064091724430947e-05, + "loss": 1.0819, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.2918916642665863, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.0785, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.30379199981689453, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.0873, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.29026755690574646, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.0746, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.28747591376304626, + "learning_rate": 1.952621569669175e-05, + "loss": 1.0864, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.2957964539527893, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.0824, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.2994972765445709, + "learning_rate": 1.899164691024229e-05, + "loss": 1.0846, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.30032384395599365, + "learning_rate": 1.872987589815331e-05, + "loss": 1.0809, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.31126585602760315, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.0893, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.30770644545555115, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.0888, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.2887580990791321, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.0989, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.299443781375885, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.0882, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.29414528608322144, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.087, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.2917827069759369, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.0745, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.3026796877384186, + "learning_rate": 1.699576850233916e-05, + "loss": 1.0796, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.3261394798755646, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.094, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.29815807938575745, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.0693, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.29984185099601746, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.0877, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.30411556363105774, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.0886, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.28755223751068115, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.0829, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.2939528226852417, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.0933, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.2983037233352661, + "learning_rate": 1.542221360973268e-05, + "loss": 1.0775, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.29906195402145386, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.09, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 0.8255208134651184, + "learning_rate": 1.5e-05, + "loss": 1.0781, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5.036240179760947e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-gemma/checkpoint-9480/training_args.bin b/saves-gemma/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c2d4b274232207f6533d0693dc95413b0544003 --- /dev/null +++ b/saves-gemma/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07713313038530030bc669d3aab955aa94344a30a3b3e3b1e81ee541c7585cfd +size 5112 diff --git a/saves-gemma/config.json b/saves-gemma/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d48ca7aeb27c97cf10a10143d9cee52b66b74ab3 --- /dev/null +++ b/saves-gemma/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "GemmaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 2, + "eos_token_id": 1, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 8192, + "model_type": "gemma", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gemma/generation_config.json b/saves-gemma/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c20913bfa6d3576264545acb67eae5f4818d0d32 --- /dev/null +++ b/saves-gemma/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-gemma/model.safetensors b/saves-gemma/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a5554046839f7cf6efcc313d9e0248e0ba82c20 --- /dev/null +++ b/saves-gemma/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228b9e625e6010052454dd32e91d5dff4dc5cb37dc5e42def029775a7f9d945d +size 19356792 diff --git a/saves-gemma/result.log b/saves-gemma/result.log new file mode 100644 index 0000000000000000000000000000000000000000..fd475367f80eb39584ec47fcc5ea5da6125b9b0d --- /dev/null +++ b/saves-gemma/result.log @@ -0,0 +1 @@ +{'train_runtime': 5286.3999, 'train_samples_per_second': 1836.146, 'train_steps_per_second': 1.793, 'train_loss': 1.3894349830563058, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-gemma/special_tokens_map.json b/saves-gemma/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gemma/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gemma/tokenizer.json b/saves-gemma/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gemma/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gemma/tokenizer_config.json b/saves-gemma/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gemma/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gemma2-cosine/checkpoint-9480/config.json b/saves-gemma2-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96bfe46f54281277b27d9690dc209d86a9da6b69 --- /dev/null +++ b/saves-gemma2-cosine/checkpoint-9480/config.json @@ -0,0 +1,31 @@ +{ + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 224, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gemma2-cosine/checkpoint-9480/generation_config.json b/saves-gemma2-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e9f2b419d37547ea30e2f193ff04443472c78cba --- /dev/null +++ b/saves-gemma2-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-gemma2-cosine/checkpoint-9480/model.safetensors b/saves-gemma2-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78988c1005749af407d2cf7995af5c09edc9060a --- /dev/null +++ b/saves-gemma2-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a81076713bdcac9494c693701b3378c2a10e2e3cb4895841da92d04fe7f8a65 +size 19361344 diff --git a/saves-gemma2-cosine/checkpoint-9480/optimizer.pt b/saves-gemma2-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0202dd699ee8714b9c0d2fa7b3348fc2d24c7896 --- /dev/null +++ b/saves-gemma2-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aea34c6109bdeb16958cc17e6ad3185924f905fdc62eb22814f48f0f8bbbbde +size 38738134 diff --git a/saves-gemma2-cosine/checkpoint-9480/rng_state.pth b/saves-gemma2-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-gemma2-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-gemma2-cosine/checkpoint-9480/scheduler.pt b/saves-gemma2-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03c145297021546d40e130546440641e02059bcb --- /dev/null +++ b/saves-gemma2-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fd617624c087e1a286ed7cf3fa38baa4a8815e49f107c3186b4c7c58e1adbb +size 1064 diff --git a/saves-gemma2-cosine/checkpoint-9480/special_tokens_map.json b/saves-gemma2-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gemma2-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gemma2-cosine/checkpoint-9480/tokenizer.json b/saves-gemma2-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gemma2-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gemma2-cosine/checkpoint-9480/tokenizer_config.json b/saves-gemma2-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gemma2-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gemma2-cosine/checkpoint-9480/trainer_state.json b/saves-gemma2-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2d0e75cbd3e24135902ab2a4b54bdf59361093bb --- /dev/null +++ b/saves-gemma2-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.1786375045776367, + "learning_rate": 0.00015789473684210527, + "loss": 7.5005, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1283082962036133, + "learning_rate": 0.00031578947368421053, + "loss": 6.9543, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8450711965560913, + "learning_rate": 0.00047368421052631577, + "loss": 6.2966, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.86847984790802, + "learning_rate": 0.0006315789473684211, + "loss": 5.8563, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 3.4591453075408936, + "learning_rate": 0.0007894736842105263, + "loss": 5.5547, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 2.574970245361328, + "learning_rate": 0.0009473684210526315, + "loss": 5.1923, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.2372632026672363, + "learning_rate": 0.0011052631578947368, + "loss": 4.8056, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.2699588537216187, + "learning_rate": 0.0012631578947368421, + "loss": 4.4742, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.9223687052726746, + "learning_rate": 0.0014210526315789472, + "loss": 4.1948, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.0993579626083374, + "learning_rate": 0.0014999989494847376, + "loss": 3.9927, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.6931979060173035, + "learning_rate": 0.0014999905453802946, + "loss": 3.7924, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.6839775443077087, + "learning_rate": 0.0014999737372655805, + "loss": 3.6497, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.43988367915153503, + "learning_rate": 0.0014999485253289388, + "loss": 3.5165, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.8311160802841187, + "learning_rate": 0.0014999149098528814, + "loss": 3.4126, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.5584062933921814, + "learning_rate": 0.0014998728912140862, + "loss": 3.3212, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.9340130686759949, + "learning_rate": 0.0014998224698833922, + "loss": 3.2332, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.5206780433654785, + "learning_rate": 0.0014997636464257956, + "loss": 3.1616, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.637313961982727, + "learning_rate": 0.0014996964215004416, + "loss": 3.1068, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.4834980368614197, + "learning_rate": 0.0014996207958606182, + "loss": 3.0324, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.5396218299865723, + "learning_rate": 0.001499536770353748, + "loss": 2.9795, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.4954635798931122, + "learning_rate": 0.0014994443459213774, + "loss": 2.9332, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.49574777483940125, + "learning_rate": 0.001499343523599168, + "loss": 2.9044, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.5420284867286682, + "learning_rate": 0.0014992343045168823, + "loss": 2.8437, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.5483509302139282, + "learning_rate": 0.0014991166898983739, + "loss": 2.8051, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.6089008450508118, + "learning_rate": 0.001498990681061572, + "loss": 2.7672, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.46545055508613586, + "learning_rate": 0.001498856279418467, + "loss": 2.7339, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.7093607187271118, + "learning_rate": 0.0014987134864750948, + "loss": 2.7015, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.5162585377693176, + "learning_rate": 0.0014985623038315206, + "loss": 2.6582, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.5473024845123291, + "learning_rate": 0.0014984027331818193, + "loss": 2.6274, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.65415358543396, + "learning_rate": 0.0014982347763140584, + "loss": 2.6029, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.6702428460121155, + "learning_rate": 0.0014980584351102762, + "loss": 2.5708, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.4799017012119293, + "learning_rate": 0.001497873711546462, + "loss": 2.5236, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.5816959142684937, + "learning_rate": 0.0014976806076925334, + "loss": 2.5104, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.5880727171897888, + "learning_rate": 0.0014974791257123137, + "loss": 2.4743, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.5146706104278564, + "learning_rate": 0.001497269267863507, + "loss": 2.4224, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.5553146004676819, + "learning_rate": 0.0014970510364976724, + "loss": 2.3964, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.5755693912506104, + "learning_rate": 0.0014968244340601996, + "loss": 2.3727, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.4807509779930115, + "learning_rate": 0.001496589463090279, + "loss": 2.3604, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.5620039701461792, + "learning_rate": 0.001496346126220875, + "loss": 2.3263, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.5503111481666565, + "learning_rate": 0.0014960944261786966, + "loss": 2.2942, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.6013765931129456, + "learning_rate": 0.0014958343657841655, + "loss": 2.2798, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.5729621052742004, + "learning_rate": 0.001495565947951385, + "loss": 2.2528, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.49604207277297974, + "learning_rate": 0.0014952891756881085, + "loss": 2.2176, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.5452326536178589, + "learning_rate": 0.0014950040520957037, + "loss": 2.1945, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.6180269718170166, + "learning_rate": 0.0014947105803691204, + "loss": 2.1934, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.5523627400398254, + "learning_rate": 0.0014944087637968522, + "loss": 2.1666, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.5989572405815125, + "learning_rate": 0.0014940986057609012, + "loss": 2.1449, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.5024518966674805, + "learning_rate": 0.0014937801097367396, + "loss": 2.1319, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.5460103154182434, + "learning_rate": 0.001493453279293271, + "loss": 2.1052, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.5047776103019714, + "learning_rate": 0.0014931181180927902, + "loss": 2.0904, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.527693510055542, + "learning_rate": 0.001492774629890942, + "loss": 2.0735, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.5165040493011475, + "learning_rate": 0.001492422818536679, + "loss": 2.0585, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.47094449400901794, + "learning_rate": 0.00149206268797222, + "loss": 2.0408, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.6123978495597839, + "learning_rate": 0.0014916942422330032, + "loss": 2.0181, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.4959682822227478, + "learning_rate": 0.001491317485447643, + "loss": 2.0153, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.4704259932041168, + "learning_rate": 0.0014909324218378838, + "loss": 1.9735, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.5504405498504639, + "learning_rate": 0.0014905390557185508, + "loss": 1.9783, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.5816100835800171, + "learning_rate": 0.0014901373914975036, + "loss": 1.9767, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.5452632904052734, + "learning_rate": 0.0014897274336755856, + "loss": 1.9497, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.728222131729126, + "learning_rate": 0.001489309186846575, + "loss": 1.9298, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.5034846067428589, + "learning_rate": 0.0014888826556971313, + "loss": 1.9288, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.5513783693313599, + "learning_rate": 0.0014884478450067444, + "loss": 1.9089, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.47766217589378357, + "learning_rate": 0.0014880047596476807, + "loss": 1.8931, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.44135981798171997, + "learning_rate": 0.0014875534045849274, + "loss": 1.9058, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.46024608612060547, + "learning_rate": 0.0014870937848761388, + "loss": 1.89, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.5603986382484436, + "learning_rate": 0.001486625905671578, + "loss": 1.8777, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.5151029825210571, + "learning_rate": 0.00148614977221406, + "loss": 1.8615, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.5190609097480774, + "learning_rate": 0.0014856653898388927, + "loss": 1.8535, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.5474860668182373, + "learning_rate": 0.001485172763973817, + "loss": 1.8615, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.48657894134521484, + "learning_rate": 0.0014846719001389466, + "loss": 1.8398, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.5151002407073975, + "learning_rate": 0.001484162803946705, + "loss": 1.8268, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.4923333525657654, + "learning_rate": 0.0014836454811017635, + "loss": 1.8079, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.5203661918640137, + "learning_rate": 0.0014831199374009778, + "loss": 1.799, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.5483428239822388, + "learning_rate": 0.0014825861787333208, + "loss": 1.8083, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.6901031732559204, + "learning_rate": 0.0014820442110798197, + "loss": 1.794, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.61411052942276, + "learning_rate": 0.0014814940405134865, + "loss": 1.7867, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.48286744952201843, + "learning_rate": 0.001480935673199251, + "loss": 1.7777, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.5010297894477844, + "learning_rate": 0.0014803691153938915, + "loss": 1.7738, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.46501874923706055, + "learning_rate": 0.0014797943734459653, + "loss": 1.7728, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.4639991521835327, + "learning_rate": 0.001479211453795736, + "loss": 1.7507, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.46044954657554626, + "learning_rate": 0.0014786203629751033, + "loss": 1.7363, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.5332083106040955, + "learning_rate": 0.0014780211076075279, + "loss": 1.7459, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.450283944606781, + "learning_rate": 0.0014774136944079594, + "loss": 1.7486, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.49115118384361267, + "learning_rate": 0.0014767981301827592, + "loss": 1.7225, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.46080419421195984, + "learning_rate": 0.0014761744218296249, + "loss": 1.7214, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.4616183340549469, + "learning_rate": 0.001475542576337513, + "loss": 1.7175, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.46727532148361206, + "learning_rate": 0.001474902600786561, + "loss": 1.7143, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.5013511180877686, + "learning_rate": 0.0014742545023480075, + "loss": 1.7104, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.4666815996170044, + "learning_rate": 0.0014735982882841117, + "loss": 1.6977, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.4509837031364441, + "learning_rate": 0.0014729339659480727, + "loss": 1.6958, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.4511234760284424, + "learning_rate": 0.0014722615427839468, + "loss": 1.7013, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.45813870429992676, + "learning_rate": 0.0014715810263265633, + "loss": 1.6905, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.43483203649520874, + "learning_rate": 0.0014708924242014423, + "loss": 1.6723, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.44111964106559753, + "learning_rate": 0.0014701957441247064, + "loss": 1.6822, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.4732177257537842, + "learning_rate": 0.0014694909939029959, + "loss": 1.6606, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.472810298204422, + "learning_rate": 0.0014687781814333814, + "loss": 1.6676, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.6334882378578186, + "learning_rate": 0.0014680573147032746, + "loss": 1.6724, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.4691200852394104, + "learning_rate": 0.0014673284017903392, + "loss": 1.6489, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.4657635986804962, + "learning_rate": 0.0014665914508624, + "loss": 1.6474, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.4358479082584381, + "learning_rate": 0.0014658464701773526, + "loss": 1.6562, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.4507771134376526, + "learning_rate": 0.0014650934680830688, + "loss": 1.6411, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.5386626720428467, + "learning_rate": 0.0014643324530173051, + "loss": 1.6365, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.45840373635292053, + "learning_rate": 0.0014635634335076067, + "loss": 1.6375, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.43903160095214844, + "learning_rate": 0.001462786418171213, + "loss": 1.6301, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.5652233958244324, + "learning_rate": 0.0014620014157149597, + "loss": 1.6305, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.49068471789360046, + "learning_rate": 0.001461208434935183, + "loss": 1.6226, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.43248870968818665, + "learning_rate": 0.0014604074847176197, + "loss": 1.6119, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.453948050737381, + "learning_rate": 0.0014595985740373082, + "loss": 1.6082, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.5646439790725708, + "learning_rate": 0.0014587817119584873, + "loss": 1.6192, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.45730093121528625, + "learning_rate": 0.001457956907634496, + "loss": 1.6074, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.47897714376449585, + "learning_rate": 0.0014571241703076692, + "loss": 1.6122, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.46311941742897034, + "learning_rate": 0.0014562835093092348, + "loss": 1.6018, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.4735914468765259, + "learning_rate": 0.0014554349340592104, + "loss": 1.5881, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.4411863386631012, + "learning_rate": 0.001454578454066296, + "loss": 1.6022, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.4954151213169098, + "learning_rate": 0.0014537140789277678, + "loss": 1.5873, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.44517555832862854, + "learning_rate": 0.0014528418183293716, + "loss": 1.5929, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.48129817843437195, + "learning_rate": 0.001451961682045213, + "loss": 1.5839, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.4071759283542633, + "learning_rate": 0.001451073679937649, + "loss": 1.5691, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.5022065043449402, + "learning_rate": 0.0014501778219571766, + "loss": 1.5718, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.46575772762298584, + "learning_rate": 0.0014492741181423225, + "loss": 1.5824, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.4444436728954315, + "learning_rate": 0.0014483625786195285, + "loss": 1.575, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.4218766987323761, + "learning_rate": 0.0014474432136030405, + "loss": 1.5606, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.47626185417175293, + "learning_rate": 0.0014465160333947923, + "loss": 1.5525, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.4337499737739563, + "learning_rate": 0.0014455810483842908, + "loss": 1.5654, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.4665796160697937, + "learning_rate": 0.0014446382690484997, + "loss": 1.5663, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.491285115480423, + "learning_rate": 0.0014436877059517215, + "loss": 1.5472, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.4332406520843506, + "learning_rate": 0.0014427293697454803, + "loss": 1.5506, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.4447202980518341, + "learning_rate": 0.001441763271168401, + "loss": 1.5595, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.4293375611305237, + "learning_rate": 0.00144078942104609, + "loss": 1.5496, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.4669581353664398, + "learning_rate": 0.001439807830291013, + "loss": 1.5358, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.41919976472854614, + "learning_rate": 0.0014388185099023744, + "loss": 1.5401, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.4593247175216675, + "learning_rate": 0.0014378214709659916, + "loss": 1.5404, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.47923025488853455, + "learning_rate": 0.0014368167246541733, + "loss": 1.5336, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.49659043550491333, + "learning_rate": 0.0014358042822255918, + "loss": 1.5332, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.45621344447135925, + "learning_rate": 0.0014347841550251597, + "loss": 1.5366, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.45655909180641174, + "learning_rate": 0.0014337563544838997, + "loss": 1.5309, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.5150895118713379, + "learning_rate": 0.001432720892118819, + "loss": 1.5276, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.45793405175209045, + "learning_rate": 0.0014316777795327794, + "loss": 1.5171, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.5041676759719849, + "learning_rate": 0.001430627028414366, + "loss": 1.5294, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.4763033390045166, + "learning_rate": 0.0014295686505377586, + "loss": 1.5124, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.4887363910675049, + "learning_rate": 0.0014285026577625982, + "loss": 1.5134, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.4866980016231537, + "learning_rate": 0.0014274290620338542, + "loss": 1.5165, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.4552108645439148, + "learning_rate": 0.0014263478753816906, + "loss": 1.511, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.40933629870414734, + "learning_rate": 0.0014252591099213326, + "loss": 1.5074, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.3994167745113373, + "learning_rate": 0.001424162777852928, + "loss": 1.505, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.4502972662448883, + "learning_rate": 0.0014230588914614134, + "loss": 1.4982, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.43475016951560974, + "learning_rate": 0.0014219474631163745, + "loss": 1.4998, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.52269047498703, + "learning_rate": 0.001420828505271909, + "loss": 1.5072, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.4271791875362396, + "learning_rate": 0.0014197020304664856, + "loss": 1.5025, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.4259066879749298, + "learning_rate": 0.0014185680513228048, + "loss": 1.4983, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.4236868917942047, + "learning_rate": 0.0014174265805476564, + "loss": 1.5005, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.4510207176208496, + "learning_rate": 0.0014162776309317778, + "loss": 1.5005, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.4019942283630371, + "learning_rate": 0.0014151212153497108, + "loss": 1.4823, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.4121156334877014, + "learning_rate": 0.0014139573467596561, + "loss": 1.4716, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.4612344801425934, + "learning_rate": 0.00141278603820333, + "loss": 1.4753, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.4241642355918884, + "learning_rate": 0.0014116073028058165, + "loss": 1.4793, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.48224085569381714, + "learning_rate": 0.0014104211537754217, + "loss": 1.4698, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.4202425181865692, + "learning_rate": 0.001409227604403524, + "loss": 1.4792, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.4475680887699127, + "learning_rate": 0.0014080266680644277, + "loss": 1.4763, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.43157926201820374, + "learning_rate": 0.0014068183582152103, + "loss": 1.4784, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.4209076166152954, + "learning_rate": 0.001405602688395574, + "loss": 1.475, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.434438019990921, + "learning_rate": 0.0014043796722276924, + "loss": 1.4488, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.4991225600242615, + "learning_rate": 0.0014031493234160591, + "loss": 1.4619, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.4300524890422821, + "learning_rate": 0.0014019116557473332, + "loss": 1.4645, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.5056980848312378, + "learning_rate": 0.0014006666830901854, + "loss": 1.4578, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.40890949964523315, + "learning_rate": 0.001399414419395142, + "loss": 1.4563, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.4789458215236664, + "learning_rate": 0.0013981548786944293, + "loss": 1.4693, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.5433962345123291, + "learning_rate": 0.0013968880751018158, + "loss": 1.4518, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.438131719827652, + "learning_rate": 0.0013956140228124545, + "loss": 1.4474, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.41799402236938477, + "learning_rate": 0.0013943327361027231, + "loss": 1.4627, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.43579208850860596, + "learning_rate": 0.0013930442293300649, + "loss": 1.4419, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.39426130056381226, + "learning_rate": 0.0013917485169328279, + "loss": 1.4403, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.44635093212127686, + "learning_rate": 0.0013904456134301016, + "loss": 1.4416, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.4294600486755371, + "learning_rate": 0.0013891355334215562, + "loss": 1.4437, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.41782107949256897, + "learning_rate": 0.0013878182915872776, + "loss": 1.4648, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.39697811007499695, + "learning_rate": 0.001386493902687604, + "loss": 1.4455, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.42468470335006714, + "learning_rate": 0.00138516238156296, + "loss": 1.4453, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.4047176241874695, + "learning_rate": 0.0013838237431336895, + "loss": 1.4473, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.49719178676605225, + "learning_rate": 0.0013824780023998899, + "loss": 1.448, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.4322167634963989, + "learning_rate": 0.0013811251744412431, + "loss": 1.4436, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.40722331404685974, + "learning_rate": 0.0013797652744168473, + "loss": 1.4309, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.4781511723995209, + "learning_rate": 0.0013783983175650457, + "loss": 1.4278, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.4903255105018616, + "learning_rate": 0.0013770243192032581, + "loss": 1.4303, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.41799622774124146, + "learning_rate": 0.0013756432947278064, + "loss": 1.4271, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.3908631503582001, + "learning_rate": 0.0013742552596137444, + "loss": 1.4278, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.4002251923084259, + "learning_rate": 0.0013728602294146833, + "loss": 1.4315, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.3998194932937622, + "learning_rate": 0.0013714582197626175, + "loss": 1.433, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.41368791460990906, + "learning_rate": 0.0013700492463677501, + "loss": 1.4203, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.40817055106163025, + "learning_rate": 0.0013686333250183154, + "loss": 1.4297, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.4214288890361786, + "learning_rate": 0.001367210471580404, + "loss": 1.4227, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.4761678874492645, + "learning_rate": 0.0013657807019977835, + "loss": 1.426, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.4355992376804352, + "learning_rate": 0.0013643440322917198, + "loss": 1.4211, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.427737295627594, + "learning_rate": 0.0013629004785607989, + "loss": 1.4219, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.38655394315719604, + "learning_rate": 0.0013614500569807445, + "loss": 1.4143, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.4236430823802948, + "learning_rate": 0.0013599927838042394, + "loss": 1.4095, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.40793803334236145, + "learning_rate": 0.0013585286753607408, + "loss": 1.4139, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.3905400335788727, + "learning_rate": 0.0013570577480562986, + "loss": 1.4225, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.4371512830257416, + "learning_rate": 0.0013555800183733717, + "loss": 1.4011, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.3963046967983246, + "learning_rate": 0.0013540955028706425, + "loss": 1.4037, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.4001469612121582, + "learning_rate": 0.0013526042181828324, + "loss": 1.4001, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.374077171087265, + "learning_rate": 0.0013511061810205143, + "loss": 1.4085, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.40897613763809204, + "learning_rate": 0.001349601408169926, + "loss": 1.4194, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.40229299664497375, + "learning_rate": 0.0013480899164927823, + "loss": 1.4057, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.3836434781551361, + "learning_rate": 0.0013465717229260853, + "loss": 1.402, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.43161365389823914, + "learning_rate": 0.001345046844481935, + "loss": 1.4009, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.43926313519477844, + "learning_rate": 0.0013435152982473396, + "loss": 1.395, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.4069500267505646, + "learning_rate": 0.0013419771013840217, + "loss": 1.4004, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.41078418493270874, + "learning_rate": 0.001340432271128229, + "loss": 1.4011, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.431235671043396, + "learning_rate": 0.0013388808247905381, + "loss": 1.3927, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.4648006856441498, + "learning_rate": 0.0013373227797556634, + "loss": 1.3989, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.40733999013900757, + "learning_rate": 0.00133575815348226, + "loss": 1.39, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.3789640963077545, + "learning_rate": 0.0013341869635027292, + "loss": 1.3878, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.40259355306625366, + "learning_rate": 0.001332609227423022, + "loss": 1.3901, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.3735804855823517, + "learning_rate": 0.0013310249629224417, + "loss": 1.3846, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.39388445019721985, + "learning_rate": 0.0013294341877534454, + "loss": 1.3927, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.5024318099021912, + "learning_rate": 0.0013278369197414458, + "loss": 1.404, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.4365748167037964, + "learning_rate": 0.0013262331767846104, + "loss": 1.3889, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.38077864050865173, + "learning_rate": 0.0013246229768536628, + "loss": 1.3716, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.42655134201049805, + "learning_rate": 0.001323006337991679, + "loss": 1.3876, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.47345173358917236, + "learning_rate": 0.0013213832783138873, + "loss": 1.3899, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.387643426656723, + "learning_rate": 0.0013197538160074633, + "loss": 1.3737, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.3712332546710968, + "learning_rate": 0.0013181179693313283, + "loss": 1.3877, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.4834437966346741, + "learning_rate": 0.0013164757566159428, + "loss": 1.3795, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.40131646394729614, + "learning_rate": 0.001314827196263102, + "loss": 1.3764, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.426821231842041, + "learning_rate": 0.0013131723067457302, + "loss": 1.3734, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.5108863711357117, + "learning_rate": 0.0013115111066076721, + "loss": 1.364, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.4811517000198364, + "learning_rate": 0.0013098436144634862, + "loss": 1.3987, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.42388418316841125, + "learning_rate": 0.0013081698489982364, + "loss": 1.3835, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.4197463393211365, + "learning_rate": 0.001306489828967282, + "loss": 1.3724, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.39482253789901733, + "learning_rate": 0.0013048035731960679, + "loss": 1.3712, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.38612571358680725, + "learning_rate": 0.0013031111005799133, + "loss": 1.3679, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.3846849203109741, + "learning_rate": 0.0013014124300838004, + "loss": 1.3796, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.3670080304145813, + "learning_rate": 0.0012997075807421612, + "loss": 1.3627, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.4192656874656677, + "learning_rate": 0.0012979965716586653, + "loss": 1.3584, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.3855079114437103, + "learning_rate": 0.0012962794220060048, + "loss": 1.3585, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.4666673541069031, + "learning_rate": 0.0012945561510256801, + "loss": 1.371, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.5110845565795898, + "learning_rate": 0.001292826778027784, + "loss": 1.3675, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.3944718837738037, + "learning_rate": 0.0012910913223907856, + "loss": 1.3622, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.4182351529598236, + "learning_rate": 0.0012893498035613123, + "loss": 1.3628, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.38452470302581787, + "learning_rate": 0.001287602241053933, + "loss": 1.3632, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.40802836418151855, + "learning_rate": 0.0012858486544509392, + "loss": 1.365, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.431040495634079, + "learning_rate": 0.0012840890634021249, + "loss": 1.358, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.4450365900993347, + "learning_rate": 0.0012823234876245667, + "loss": 1.3575, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.40130454301834106, + "learning_rate": 0.0012805519469024035, + "loss": 1.367, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.3889053761959076, + "learning_rate": 0.0012787744610866143, + "loss": 1.3522, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.37585121393203735, + "learning_rate": 0.0012769910500947954, + "loss": 1.364, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.41503727436065674, + "learning_rate": 0.0012752017339109376, + "loss": 1.3542, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.3692167401313782, + "learning_rate": 0.0012734065325852029, + "loss": 1.3613, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.41024911403656006, + "learning_rate": 0.0012716054662336987, + "loss": 1.3469, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.42239823937416077, + "learning_rate": 0.001269798555038252, + "loss": 1.3528, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.40146514773368835, + "learning_rate": 0.0012679858192461864, + "loss": 1.3523, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.3812294602394104, + "learning_rate": 0.0012661672791700906, + "loss": 1.3541, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.39357152581214905, + "learning_rate": 0.0012643429551875945, + "loss": 1.3522, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.5319991111755371, + "learning_rate": 0.0012625128677411388, + "loss": 1.3514, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.39748305082321167, + "learning_rate": 0.0012606770373377475, + "loss": 1.3504, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.3796849250793457, + "learning_rate": 0.0012588354845487959, + "loss": 1.3532, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.41927945613861084, + "learning_rate": 0.001256988230009783, + "loss": 1.347, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.37476640939712524, + "learning_rate": 0.0012551352944200976, + "loss": 1.347, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.3877147436141968, + "learning_rate": 0.0012532766985427874, + "loss": 1.3515, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.38439762592315674, + "learning_rate": 0.0012514124632043272, + "loss": 1.352, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.47190430760383606, + "learning_rate": 0.0012495426092943842, + "loss": 1.3465, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.4592326879501343, + "learning_rate": 0.0012476671577655845, + "loss": 1.348, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.38836050033569336, + "learning_rate": 0.0012457861296332774, + "loss": 1.3344, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.4069218337535858, + "learning_rate": 0.001243899545975303, + "loss": 1.3479, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.42172059416770935, + "learning_rate": 0.0012420074279317515, + "loss": 1.3405, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.40382683277130127, + "learning_rate": 0.0012401097967047298, + "loss": 1.3349, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.3661898672580719, + "learning_rate": 0.001238206673558122, + "loss": 1.3365, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.4041979908943176, + "learning_rate": 0.0012362980798173526, + "loss": 1.3363, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.3893468976020813, + "learning_rate": 0.0012343840368691462, + "loss": 1.3352, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.38205385208129883, + "learning_rate": 0.0012324645661612886, + "loss": 1.3307, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.4036223292350769, + "learning_rate": 0.0012305396892023867, + "loss": 1.3373, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.3751426339149475, + "learning_rate": 0.0012286094275616264, + "loss": 1.3348, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.3788963854312897, + "learning_rate": 0.0012266738028685318, + "loss": 1.3195, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.45994821190834045, + "learning_rate": 0.001224732836812723, + "loss": 1.3288, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.38628584146499634, + "learning_rate": 0.0012227865511436724, + "loss": 1.3349, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.3824787139892578, + "learning_rate": 0.001220834967670461, + "loss": 1.3428, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.35537394881248474, + "learning_rate": 0.0012188781082615346, + "loss": 1.3379, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.3919638991355896, + "learning_rate": 0.0012169159948444588, + "loss": 1.3341, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.3717555105686188, + "learning_rate": 0.001214948649405672, + "loss": 1.3286, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.37457147240638733, + "learning_rate": 0.0012129760939902407, + "loss": 1.3277, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.3967638909816742, + "learning_rate": 0.0012109983507016114, + "loss": 1.3304, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.3726331889629364, + "learning_rate": 0.0012090154417013636, + "loss": 1.3341, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.3776319921016693, + "learning_rate": 0.0012070273892089605, + "loss": 1.3102, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.3855263888835907, + "learning_rate": 0.0012050342155015012, + "loss": 1.3159, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.38862308859825134, + "learning_rate": 0.0012030359429134707, + "loss": 1.3191, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.40104252099990845, + "learning_rate": 0.0012010325938364883, + "loss": 1.3232, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.3975936770439148, + "learning_rate": 0.0011990241907190592, + "loss": 1.3262, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.3963460326194763, + "learning_rate": 0.001197010756066321, + "loss": 1.3127, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.39736154675483704, + "learning_rate": 0.0011949923124397917, + "loss": 1.315, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.43555042147636414, + "learning_rate": 0.001192968882457118, + "loss": 1.3208, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.36673370003700256, + "learning_rate": 0.001190940488791821, + "loss": 1.3232, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.41171562671661377, + "learning_rate": 0.0011889071541730419, + "loss": 1.3157, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.37587830424308777, + "learning_rate": 0.001186868901385288, + "loss": 1.3134, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.3840104937553406, + "learning_rate": 0.001184825753268177, + "loss": 1.3186, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.37635836005210876, + "learning_rate": 0.0011827777327161814, + "loss": 1.3257, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.4561614394187927, + "learning_rate": 0.0011807248626783714, + "loss": 1.3071, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.4184989929199219, + "learning_rate": 0.0011786671661581584, + "loss": 1.3059, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.3946536183357239, + "learning_rate": 0.001176604666213036, + "loss": 1.3105, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.37633609771728516, + "learning_rate": 0.0011745373859543236, + "loss": 1.3226, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.3885693848133087, + "learning_rate": 0.0011724653485469063, + "loss": 1.3031, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.3826829791069031, + "learning_rate": 0.0011703885772089743, + "loss": 1.3105, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.38195210695266724, + "learning_rate": 0.0011683070952117646, + "loss": 1.3114, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.42037537693977356, + "learning_rate": 0.0011662209258792998, + "loss": 1.2996, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.4133008122444153, + "learning_rate": 0.0011641300925881257, + "loss": 1.311, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.41504186391830444, + "learning_rate": 0.0011620346187670501, + "loss": 1.3043, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.3791818916797638, + "learning_rate": 0.0011599345278968806, + "loss": 1.3171, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.393568217754364, + "learning_rate": 0.0011578298435101604, + "loss": 1.3047, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.36910107731819153, + "learning_rate": 0.0011557205891909062, + "loss": 1.2966, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.3907497525215149, + "learning_rate": 0.0011536067885743423, + "loss": 1.3135, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.36680251359939575, + "learning_rate": 0.001151488465346637, + "loss": 1.2971, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.3890164792537689, + "learning_rate": 0.0011493656432446362, + "loss": 1.301, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.38398292660713196, + "learning_rate": 0.0011472383460555983, + "loss": 1.3044, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.4149441719055176, + "learning_rate": 0.001145106597616927, + "loss": 1.3168, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.399883896112442, + "learning_rate": 0.001142970421815904, + "loss": 1.291, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.3905462324619293, + "learning_rate": 0.0011408298425894226, + "loss": 1.3015, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.38555899262428284, + "learning_rate": 0.0011386848839237186, + "loss": 1.3039, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.4734407365322113, + "learning_rate": 0.0011365355698541005, + "loss": 1.3036, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.3684196174144745, + "learning_rate": 0.0011343819244646824, + "loss": 1.2984, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.5024691820144653, + "learning_rate": 0.001132223971888112, + "loss": 1.2987, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.38258564472198486, + "learning_rate": 0.0011300617363053024, + "loss": 1.2977, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.40295499563217163, + "learning_rate": 0.0011278952419451586, + "loss": 1.3071, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.39888668060302734, + "learning_rate": 0.0011257245130843077, + "loss": 1.2978, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.385440468788147, + "learning_rate": 0.0011235495740468265, + "loss": 1.2906, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.3719131052494049, + "learning_rate": 0.0011213704492039694, + "loss": 1.2729, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.3902001976966858, + "learning_rate": 0.001119187162973894, + "loss": 1.2965, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.3754209280014038, + "learning_rate": 0.001116999739821388, + "loss": 1.287, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.40873944759368896, + "learning_rate": 0.0011148082042575968, + "loss": 1.3031, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.4269246459007263, + "learning_rate": 0.0011126125808397461, + "loss": 1.3014, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.3669773042201996, + "learning_rate": 0.0011104128941708683, + "loss": 1.2823, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.35514721274375916, + "learning_rate": 0.001108209168899527, + "loss": 1.2941, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.37238529324531555, + "learning_rate": 0.0011060014297195396, + "loss": 1.2912, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.39685437083244324, + "learning_rate": 0.0011037897013697015, + "loss": 1.2974, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.41031378507614136, + "learning_rate": 0.0011015740086335092, + "loss": 1.2882, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.4028247892856598, + "learning_rate": 0.0010993543763388814, + "loss": 1.289, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.3803919851779938, + "learning_rate": 0.0010971308293578814, + "loss": 1.286, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.3852975368499756, + "learning_rate": 0.0010949033926064397, + "loss": 1.2844, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.42593517899513245, + "learning_rate": 0.0010926720910440725, + "loss": 1.2915, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.36477574706077576, + "learning_rate": 0.001090436949673603, + "loss": 1.2862, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.38376858830451965, + "learning_rate": 0.0010881979935408815, + "loss": 1.2818, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.4072783589363098, + "learning_rate": 0.0010859552477345052, + "loss": 1.286, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.3825346529483795, + "learning_rate": 0.001083708737385536, + "loss": 1.2843, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.3997657597064972, + "learning_rate": 0.0010814584876672187, + "loss": 1.2645, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.37019675970077515, + "learning_rate": 0.0010792045237947008, + "loss": 1.2816, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.36619246006011963, + "learning_rate": 0.0010769468710247478, + "loss": 1.2814, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.36794304847717285, + "learning_rate": 0.0010746855546554612, + "loss": 1.278, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.4145756661891937, + "learning_rate": 0.0010724206000259954, + "loss": 1.2696, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.3920341432094574, + "learning_rate": 0.0010701520325162727, + "loss": 1.2864, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.37932780385017395, + "learning_rate": 0.0010678798775467001, + "loss": 1.2901, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.36111539602279663, + "learning_rate": 0.0010656041605778832, + "loss": 1.2803, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.36630406975746155, + "learning_rate": 0.001063324907110342, + "loss": 1.2633, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.40321099758148193, + "learning_rate": 0.0010610421426842241, + "loss": 1.2839, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.3892676532268524, + "learning_rate": 0.00105875589287902, + "loss": 1.2814, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.40748170018196106, + "learning_rate": 0.0010564661833132752, + "loss": 1.2866, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.36532923579216003, + "learning_rate": 0.001054173039644303, + "loss": 1.2805, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.40630143880844116, + "learning_rate": 0.0010518764875678981, + "loss": 1.271, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.3710717558860779, + "learning_rate": 0.001049576552818048, + "loss": 1.2737, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.41121768951416016, + "learning_rate": 0.0010472732611666448, + "loss": 1.269, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.4118445813655853, + "learning_rate": 0.0010449666384231954, + "loss": 1.2714, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.3894495368003845, + "learning_rate": 0.0010426567104345346, + "loss": 1.281, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.36830511689186096, + "learning_rate": 0.0010403435030845332, + "loss": 1.2681, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.47690558433532715, + "learning_rate": 0.0010380270422938093, + "loss": 1.2685, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.4113521873950958, + "learning_rate": 0.0010357073540194362, + "loss": 1.2699, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.349385529756546, + "learning_rate": 0.001033384464254655, + "loss": 1.2692, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.41328349709510803, + "learning_rate": 0.001031058399028579, + "loss": 1.269, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.4065342843532562, + "learning_rate": 0.001028729184405905, + "loss": 1.2768, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.38365989923477173, + "learning_rate": 0.0010263968464866201, + "loss": 1.2692, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.4516963064670563, + "learning_rate": 0.0010240614114057098, + "loss": 1.2631, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.4571594297885895, + "learning_rate": 0.001021722905332864, + "loss": 1.2658, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.38203710317611694, + "learning_rate": 0.0010193813544721855, + "loss": 1.2731, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.41252169013023376, + "learning_rate": 0.001017036785061895, + "loss": 1.2744, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.44035404920578003, + "learning_rate": 0.0010146892233740376, + "loss": 1.2639, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.3752801716327667, + "learning_rate": 0.0010123386957141883, + "loss": 1.248, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.39190322160720825, + "learning_rate": 0.0010099852284211573, + "loss": 1.2657, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.3747861981391907, + "learning_rate": 0.0010076288478666944, + "loss": 1.2631, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.3933667242527008, + "learning_rate": 0.0010052695804551946, + "loss": 1.2673, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.4008212387561798, + "learning_rate": 0.0010029074526234014, + "loss": 1.2595, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.35297682881355286, + "learning_rate": 0.0010005424908401104, + "loss": 1.273, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.41620758175849915, + "learning_rate": 0.0009981747216058728, + "loss": 1.258, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.3613898754119873, + "learning_rate": 0.0009958041714526998, + "loss": 1.2639, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.38978859782218933, + "learning_rate": 0.0009934308669437627, + "loss": 1.2663, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.3745984137058258, + "learning_rate": 0.0009910548346730972, + "loss": 1.2546, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.352555513381958, + "learning_rate": 0.0009886761012653062, + "loss": 1.2498, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.3941967487335205, + "learning_rate": 0.000986294693375258, + "loss": 1.2552, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.3956855535507202, + "learning_rate": 0.000983910637687791, + "loss": 1.2684, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.3981940746307373, + "learning_rate": 0.0009815239609174138, + "loss": 1.2514, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.43054890632629395, + "learning_rate": 0.0009791346898080043, + "loss": 1.2638, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.39159584045410156, + "learning_rate": 0.0009767428511325122, + "loss": 1.2539, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.3620276153087616, + "learning_rate": 0.0009743484716926576, + "loss": 1.2466, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.35611382126808167, + "learning_rate": 0.0009719515783186319, + "loss": 1.245, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.380669504404068, + "learning_rate": 0.0009695521978687951, + "loss": 1.2475, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.37039485573768616, + "learning_rate": 0.0009671503572293767, + "loss": 1.2487, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.37815698981285095, + "learning_rate": 0.0009647460833141742, + "loss": 1.2456, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.37887251377105713, + "learning_rate": 0.0009623394030642507, + "loss": 1.2501, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.35127684473991394, + "learning_rate": 0.0009599303434476334, + "loss": 1.2466, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.37340179085731506, + "learning_rate": 0.0009575189314590118, + "loss": 1.2514, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.3654627203941345, + "learning_rate": 0.0009551051941194346, + "loss": 1.2511, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.4705713987350464, + "learning_rate": 0.0009526891584760071, + "loss": 1.2436, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.4308883845806122, + "learning_rate": 0.0009502708516015889, + "loss": 1.2555, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.38249096274375916, + "learning_rate": 0.0009478503005944888, + "loss": 1.2443, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.4328560531139374, + "learning_rate": 0.0009454275325781632, + "loss": 1.2566, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.3712156116962433, + "learning_rate": 0.0009430025747009104, + "loss": 1.2422, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.3735974133014679, + "learning_rate": 0.0009405754541355677, + "loss": 1.248, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.3643399178981781, + "learning_rate": 0.0009381461980792061, + "loss": 1.239, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.3779797852039337, + "learning_rate": 0.0009357148337528256, + "loss": 1.2467, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.39614400267601013, + "learning_rate": 0.0009332813884010511, + "loss": 1.2496, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.3860389292240143, + "learning_rate": 0.0009308458892918259, + "loss": 1.2507, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.3860090374946594, + "learning_rate": 0.0009284083637161064, + "loss": 1.2449, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.42898666858673096, + "learning_rate": 0.0009259688389875574, + "loss": 1.2499, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.37146082520484924, + "learning_rate": 0.0009235273424422442, + "loss": 1.2461, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.38286641240119934, + "learning_rate": 0.0009210839014383282, + "loss": 1.2421, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.4748443067073822, + "learning_rate": 0.0009186385433557584, + "loss": 1.2383, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.39287233352661133, + "learning_rate": 0.0009161912955959668, + "loss": 1.2422, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.43116647005081177, + "learning_rate": 0.000913742185581559, + "loss": 1.232, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.3863379657268524, + "learning_rate": 0.0009112912407560086, + "loss": 1.2359, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.3846172094345093, + "learning_rate": 0.0009088384885833495, + "loss": 1.2397, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.42868921160697937, + "learning_rate": 0.000906383956547867, + "loss": 1.2353, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.4204123616218567, + "learning_rate": 0.0009039276721537915, + "loss": 1.2363, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.3695426285266876, + "learning_rate": 0.0009014696629249886, + "loss": 1.2331, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.3634350597858429, + "learning_rate": 0.0008990099564046522, + "loss": 1.2363, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.407058447599411, + "learning_rate": 0.0008965485801549946, + "loss": 1.2381, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.39242419600486755, + "learning_rate": 0.000894085561756939, + "loss": 1.2307, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.37065503001213074, + "learning_rate": 0.0008916209288098088, + "loss": 1.2389, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.38749197125434875, + "learning_rate": 0.0008891547089310198, + "loss": 1.2335, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.36509665846824646, + "learning_rate": 0.0008866869297557699, + "loss": 1.2298, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.4026740491390228, + "learning_rate": 0.0008842176189367299, + "loss": 1.2349, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.3837262690067291, + "learning_rate": 0.0008817468041437329, + "loss": 1.2414, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.37404435873031616, + "learning_rate": 0.0008792745130634654, + "loss": 1.2323, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.37415874004364014, + "learning_rate": 0.0008768007733991561, + "loss": 1.2262, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.3748627305030823, + "learning_rate": 0.0008743256128702658, + "loss": 1.2367, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.42322036623954773, + "learning_rate": 0.0008718490592121768, + "loss": 1.2357, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.3685034513473511, + "learning_rate": 0.0008693711401758822, + "loss": 1.2287, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.36822935938835144, + "learning_rate": 0.0008668918835276747, + "loss": 1.2293, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.3825782239437103, + "learning_rate": 0.0008644113170488355, + "loss": 1.2369, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.3803617060184479, + "learning_rate": 0.0008619294685353235, + "loss": 1.2288, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.36820366978645325, + "learning_rate": 0.0008594463657974627, + "loss": 1.2272, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.4030225872993469, + "learning_rate": 0.0008569620366596322, + "loss": 1.2401, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.39233386516571045, + "learning_rate": 0.000854476508959953, + "loss": 1.2186, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.35890206694602966, + "learning_rate": 0.0008519898105499762, + "loss": 1.2274, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.38069260120391846, + "learning_rate": 0.0008495019692943721, + "loss": 1.222, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.38441646099090576, + "learning_rate": 0.0008470130130706166, + "loss": 1.2283, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.3931821584701538, + "learning_rate": 0.0008445229697686795, + "loss": 1.2405, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.3823792338371277, + "learning_rate": 0.0008420318672907119, + "loss": 1.2263, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.3678940236568451, + "learning_rate": 0.0008395397335507334, + "loss": 1.226, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.3954598009586334, + "learning_rate": 0.0008370465964743196, + "loss": 1.2202, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.3614806532859802, + "learning_rate": 0.0008345524839982886, + "loss": 1.2318, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.38190943002700806, + "learning_rate": 0.0008320574240703886, + "loss": 1.2171, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.38935908675193787, + "learning_rate": 0.0008295614446489842, + "loss": 1.2253, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.38194459676742554, + "learning_rate": 0.0008270645737027441, + "loss": 1.2315, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.4150732755661011, + "learning_rate": 0.0008245668392103259, + "loss": 1.2193, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.37971562147140503, + "learning_rate": 0.0008220682691600645, + "loss": 1.2182, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.38130632042884827, + "learning_rate": 0.0008195688915496571, + "loss": 1.2266, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.37892699241638184, + "learning_rate": 0.0008170687343858506, + "loss": 1.2069, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.5107613801956177, + "learning_rate": 0.0008145678256841265, + "loss": 1.2275, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.39866533875465393, + "learning_rate": 0.0008120661934683879, + "loss": 1.2238, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.36257508397102356, + "learning_rate": 0.0008095638657706456, + "loss": 1.22, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.3732055723667145, + "learning_rate": 0.000807060870630703, + "loss": 1.2113, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.3906533420085907, + "learning_rate": 0.000804557236095843, + "loss": 1.1966, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.4269588887691498, + "learning_rate": 0.0008020529902205129, + "loss": 1.2175, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.3742537498474121, + "learning_rate": 0.0007995481610660108, + "loss": 1.2052, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.4252793788909912, + "learning_rate": 0.0007970427767001702, + "loss": 1.2203, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.3769088387489319, + "learning_rate": 0.0007945368651970464, + "loss": 1.2248, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.3712232708930969, + "learning_rate": 0.0007920304546366013, + "loss": 1.204, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.4090394675731659, + "learning_rate": 0.000789523573104389, + "loss": 1.216, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.3830975592136383, + "learning_rate": 0.0007870162486912414, + "loss": 1.22, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.37491726875305176, + "learning_rate": 0.0007845085094929527, + "loss": 1.2204, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.37113234400749207, + "learning_rate": 0.0007820003836099649, + "loss": 1.2099, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.3921297788619995, + "learning_rate": 0.0007794918991470537, + "loss": 1.2109, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.37305620312690735, + "learning_rate": 0.0007769830842130119, + "loss": 1.2071, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.3612220585346222, + "learning_rate": 0.0007744739669203361, + "loss": 1.2034, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.43079039454460144, + "learning_rate": 0.0007719645753849108, + "loss": 1.2096, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.3941101133823395, + "learning_rate": 0.0007694549377256932, + "loss": 1.2135, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.431941419839859, + "learning_rate": 0.0007669450820643987, + "loss": 1.2108, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.4714750647544861, + "learning_rate": 0.0007644350365251855, + "loss": 1.2073, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.3858397603034973, + "learning_rate": 0.0007619248292343399, + "loss": 1.2047, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.40068912506103516, + "learning_rate": 0.0007594144883199599, + "loss": 1.2238, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.36579346656799316, + "learning_rate": 0.0007569040419116413, + "loss": 1.2086, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.3545038402080536, + "learning_rate": 0.000754393518140162, + "loss": 1.2067, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.3704482316970825, + "learning_rate": 0.0007518829451371665, + "loss": 1.2166, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.40773773193359375, + "learning_rate": 0.0007493723510348516, + "loss": 1.2017, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.3698502779006958, + "learning_rate": 0.0007468617639656496, + "loss": 1.2008, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.3846557140350342, + "learning_rate": 0.0007443512120619144, + "loss": 1.2026, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.40047094225883484, + "learning_rate": 0.0007418407234556067, + "loss": 1.221, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.472100168466568, + "learning_rate": 0.0007393303262779767, + "loss": 1.2066, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.4696345925331116, + "learning_rate": 0.0007368200486592507, + "loss": 1.2019, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.38359326124191284, + "learning_rate": 0.0007343099187283149, + "loss": 1.2139, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.42313656210899353, + "learning_rate": 0.0007317999646124011, + "loss": 1.2013, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.3871127665042877, + "learning_rate": 0.0007292902144367704, + "loss": 1.193, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.38251253962516785, + "learning_rate": 0.0007267806963243995, + "loss": 1.2112, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.39091378450393677, + "learning_rate": 0.0007242714383956639, + "loss": 1.203, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.38012486696243286, + "learning_rate": 0.000721762468768024, + "loss": 1.2102, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.38264596462249756, + "learning_rate": 0.0007192538155557094, + "loss": 1.2019, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.3756820559501648, + "learning_rate": 0.0007167455068694046, + "loss": 1.1943, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.48044905066490173, + "learning_rate": 0.000714237570815933, + "loss": 1.2043, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.3843831419944763, + "learning_rate": 0.0007117300354979423, + "loss": 1.2022, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.40780699253082275, + "learning_rate": 0.000709222929013591, + "loss": 1.209, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.38078269362449646, + "learning_rate": 0.0007067162794562309, + "loss": 1.1919, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.36517396569252014, + "learning_rate": 0.0007042101149140943, + "loss": 1.1982, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.36904171109199524, + "learning_rate": 0.0007017044634699787, + "loss": 1.1896, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.365265816450119, + "learning_rate": 0.0006991993532009319, + "loss": 1.1932, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.3909822106361389, + "learning_rate": 0.0006966948121779378, + "loss": 1.2048, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.3858942687511444, + "learning_rate": 0.000694190868465601, + "loss": 1.1991, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.3741839826107025, + "learning_rate": 0.0006916875501218343, + "loss": 1.1934, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.41621774435043335, + "learning_rate": 0.0006891848851975416, + "loss": 1.1798, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.370693176984787, + "learning_rate": 0.0006866829017363054, + "loss": 1.2033, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.36000174283981323, + "learning_rate": 0.0006841816277740722, + "loss": 1.1943, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.4014742374420166, + "learning_rate": 0.0006816810913388379, + "loss": 1.204, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.38886064291000366, + "learning_rate": 0.0006791813204503342, + "loss": 1.194, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.38861414790153503, + "learning_rate": 0.0006766823431197147, + "loss": 1.2051, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.3751736581325531, + "learning_rate": 0.0006741841873492406, + "loss": 1.1963, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.36994561553001404, + "learning_rate": 0.0006716868811319671, + "loss": 1.1801, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.3818756341934204, + "learning_rate": 0.0006691904524514297, + "loss": 1.1906, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.3850463628768921, + "learning_rate": 0.0006666949292813306, + "loss": 1.1904, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.4083471894264221, + "learning_rate": 0.0006642003395852258, + "loss": 1.1894, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.3919425308704376, + "learning_rate": 0.0006617067113162103, + "loss": 1.2007, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.39157313108444214, + "learning_rate": 0.0006592140724166073, + "loss": 1.1992, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.39225631952285767, + "learning_rate": 0.0006567224508176523, + "loss": 1.1919, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.3641131818294525, + "learning_rate": 0.0006542318744391821, + "loss": 1.1923, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.3905319571495056, + "learning_rate": 0.0006517423711893209, + "loss": 1.1993, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.3933909833431244, + "learning_rate": 0.0006492539689641685, + "loss": 1.1844, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.3606071174144745, + "learning_rate": 0.0006467666956474865, + "loss": 1.1931, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.39870119094848633, + "learning_rate": 0.0006442805791103873, + "loss": 1.1827, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.3785361647605896, + "learning_rate": 0.0006417956472110205, + "loss": 1.1917, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.47369471192359924, + "learning_rate": 0.0006393119277942614, + "loss": 1.1863, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.4398342967033386, + "learning_rate": 0.0006368294486913987, + "loss": 1.1914, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.39846035838127136, + "learning_rate": 0.0006343482377198232, + "loss": 1.1851, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.3756946921348572, + "learning_rate": 0.0006318683226827151, + "loss": 1.189, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.4042571783065796, + "learning_rate": 0.0006293897313687331, + "loss": 1.1941, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.4339440166950226, + "learning_rate": 0.0006269124915517037, + "loss": 1.1866, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.42624154686927795, + "learning_rate": 0.0006244366309903084, + "loss": 1.1872, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.42734599113464355, + "learning_rate": 0.0006219621774277737, + "loss": 1.1931, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.3782985508441925, + "learning_rate": 0.00061948915859156, + "loss": 1.1778, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.43499428033828735, + "learning_rate": 0.0006170176021930509, + "loss": 1.1813, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.3867819309234619, + "learning_rate": 0.0006145475359272424, + "loss": 1.1886, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.3781413435935974, + "learning_rate": 0.0006120789874724336, + "loss": 1.1803, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.41494444012641907, + "learning_rate": 0.0006096119844899151, + "loss": 1.1762, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.387002557516098, + "learning_rate": 0.0006071465546236601, + "loss": 1.183, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.41137585043907166, + "learning_rate": 0.0006046827255000135, + "loss": 1.1829, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.39539626240730286, + "learning_rate": 0.0006022205247273845, + "loss": 1.1749, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.38159656524658203, + "learning_rate": 0.0005997599798959343, + "loss": 1.1772, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.3987438976764679, + "learning_rate": 0.0005973011185772694, + "loss": 1.1766, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.3730505406856537, + "learning_rate": 0.0005948439683241318, + "loss": 1.1802, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.3681482970714569, + "learning_rate": 0.0005923885566700896, + "loss": 1.1746, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.38157281279563904, + "learning_rate": 0.0005899349111292293, + "loss": 1.1718, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.42166829109191895, + "learning_rate": 0.0005874830591958474, + "loss": 1.1684, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.39283323287963867, + "learning_rate": 0.000585033028344142, + "loss": 1.1757, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.37957999110221863, + "learning_rate": 0.0005825848460279048, + "loss": 1.1779, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.4186517894268036, + "learning_rate": 0.0005801385396802146, + "loss": 1.1765, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.4113667905330658, + "learning_rate": 0.0005776941367131282, + "loss": 1.1783, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.3858949840068817, + "learning_rate": 0.0005752516645173745, + "loss": 1.1837, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.36408531665802, + "learning_rate": 0.0005728111504620472, + "loss": 1.1726, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.43086349964141846, + "learning_rate": 0.0005703726218942976, + "loss": 1.1783, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.40186476707458496, + "learning_rate": 0.0005679361061390295, + "loss": 1.1719, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.41844937205314636, + "learning_rate": 0.0005655016304985908, + "loss": 1.166, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.4287249743938446, + "learning_rate": 0.0005630692222524709, + "loss": 1.1667, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.38259413838386536, + "learning_rate": 0.0005606389086569911, + "loss": 1.1789, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.41747674345970154, + "learning_rate": 0.0005582107169450023, + "loss": 1.184, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.3860149085521698, + "learning_rate": 0.0005557846743255783, + "loss": 1.1692, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.392096608877182, + "learning_rate": 0.0005533608079837109, + "loss": 1.169, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.3814886808395386, + "learning_rate": 0.0005509391450800061, + "loss": 1.1696, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.3786362409591675, + "learning_rate": 0.0005485197127503795, + "loss": 1.1663, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.37666597962379456, + "learning_rate": 0.0005461025381057516, + "loss": 1.1722, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.3780205249786377, + "learning_rate": 0.0005436876482317444, + "loss": 1.1779, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.38228151202201843, + "learning_rate": 0.0005412750701883782, + "loss": 1.172, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.42714548110961914, + "learning_rate": 0.0005388648310097682, + "loss": 1.1817, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.3644270896911621, + "learning_rate": 0.000536456957703821, + "loss": 1.1715, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.40454643964767456, + "learning_rate": 0.0005340514772519324, + "loss": 1.1703, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.3972594738006592, + "learning_rate": 0.0005316484166086863, + "loss": 1.1758, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.4074248671531677, + "learning_rate": 0.00052924780270155, + "loss": 1.1776, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.3822266161441803, + "learning_rate": 0.0005268496624305747, + "loss": 1.1539, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.4105076193809509, + "learning_rate": 0.0005244540226680931, + "loss": 1.1626, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.3894893229007721, + "learning_rate": 0.0005220609102584185, + "loss": 1.1685, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.3778201639652252, + "learning_rate": 0.0005196703520175437, + "loss": 1.1703, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.414444237947464, + "learning_rate": 0.0005172823747328415, + "loss": 1.1664, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.37120187282562256, + "learning_rate": 0.0005148970051627632, + "loss": 1.1657, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.38689517974853516, + "learning_rate": 0.0005125142700365394, + "loss": 1.1415, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.38950467109680176, + "learning_rate": 0.000510134196053881, + "loss": 1.1506, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.4142420291900635, + "learning_rate": 0.0005077568098846789, + "loss": 1.1499, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.43301883339881897, + "learning_rate": 0.000505382138168706, + "loss": 1.1606, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.3984551727771759, + "learning_rate": 0.0005030102075153181, + "loss": 1.1584, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.3981073796749115, + "learning_rate": 0.0005006410445031569, + "loss": 1.1612, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.4081515669822693, + "learning_rate": 0.0004982746756798507, + "loss": 1.1653, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.37384581565856934, + "learning_rate": 0.0004959111275617174, + "loss": 1.1771, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.3793800473213196, + "learning_rate": 0.0004935504266334677, + "loss": 1.1588, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.375134140253067, + "learning_rate": 0.0004911925993479085, + "loss": 1.1712, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.3785207271575928, + "learning_rate": 0.0004888376721256456, + "loss": 1.1766, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.37380337715148926, + "learning_rate": 0.00048648567135478805, + "loss": 1.1616, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.38806968927383423, + "learning_rate": 0.0004841366233906538, + "loss": 1.1537, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.38425275683403015, + "learning_rate": 0.0004817905545554717, + "loss": 1.1655, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.41830435395240784, + "learning_rate": 0.00047944749113808884, + "loss": 1.1576, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.3807017207145691, + "learning_rate": 0.00047710745939367474, + "loss": 1.1569, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.3918316960334778, + "learning_rate": 0.0004747704855434278, + "loss": 1.1536, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.39770522713661194, + "learning_rate": 0.0004724365957742809, + "loss": 1.1489, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.3696104884147644, + "learning_rate": 0.00047010581623860883, + "loss": 1.1543, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.4291442334651947, + "learning_rate": 0.0004677781730539342, + "loss": 1.1589, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.39524322748184204, + "learning_rate": 0.0004654536923026356, + "loss": 1.1515, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.3784046471118927, + "learning_rate": 0.00046313240003165466, + "loss": 1.1584, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.48262056708335876, + "learning_rate": 0.0004608143222522048, + "loss": 1.1566, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.40012893080711365, + "learning_rate": 0.0004584994849394795, + "loss": 1.1403, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.3889144957065582, + "learning_rate": 0.0004561879140323607, + "loss": 1.1574, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.4002501666545868, + "learning_rate": 0.0004538796354331298, + "loss": 1.1649, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.40692681074142456, + "learning_rate": 0.0004515746750071754, + "loss": 1.1529, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.4026417136192322, + "learning_rate": 0.0004492730585827046, + "loss": 1.1547, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.40128716826438904, + "learning_rate": 0.0004469748119504529, + "loss": 1.1381, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.38008439540863037, + "learning_rate": 0.0004446799608633964, + "loss": 1.1565, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.3946179151535034, + "learning_rate": 0.00044238853103646154, + "loss": 1.1432, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.41926196217536926, + "learning_rate": 0.00044010054814623925, + "loss": 1.1572, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.3924640715122223, + "learning_rate": 0.0004378160378306944, + "loss": 1.1499, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.3780492842197418, + "learning_rate": 0.00043553502568888095, + "loss": 1.151, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.3667239546775818, + "learning_rate": 0.0004332575372806534, + "loss": 1.1461, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.41244643926620483, + "learning_rate": 0.00043098359812638145, + "loss": 1.1418, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.38392534852027893, + "learning_rate": 0.00042871323370666383, + "loss": 1.1542, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.3710794746875763, + "learning_rate": 0.0004264464694620421, + "loss": 1.149, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.38680675625801086, + "learning_rate": 0.000424183330792717, + "loss": 1.1486, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.4415585398674011, + "learning_rate": 0.0004219238430582621, + "loss": 1.148, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.4106017053127289, + "learning_rate": 0.0004196680315773408, + "loss": 1.1571, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.3810531198978424, + "learning_rate": 0.00041741592162742214, + "loss": 1.1508, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.3825860917568207, + "learning_rate": 0.0004151675384444978, + "loss": 1.128, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.3765477240085602, + "learning_rate": 0.00041292290722279914, + "loss": 1.1513, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.4092850685119629, + "learning_rate": 0.00041068205311451517, + "loss": 1.153, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.3921700119972229, + "learning_rate": 0.00040844500122951026, + "loss": 1.1464, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.4049195647239685, + "learning_rate": 0.00040621177663504313, + "loss": 1.1423, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.38116928935050964, + "learning_rate": 0.00040398240435548583, + "loss": 1.1514, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.41330486536026, + "learning_rate": 0.00040175690937204324, + "loss": 1.1408, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.39786624908447266, + "learning_rate": 0.00039953531662247343, + "loss": 1.1422, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.38697049021720886, + "learning_rate": 0.0003973176510008075, + "loss": 1.1505, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.38893020153045654, + "learning_rate": 0.00039510393735707233, + "loss": 1.1414, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.36455008387565613, + "learning_rate": 0.00039289420049700986, + "loss": 1.1506, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.3951217234134674, + "learning_rate": 0.0003906884651818006, + "loss": 1.1481, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.4891367256641388, + "learning_rate": 0.00038848675612778577, + "loss": 1.1463, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.410031795501709, + "learning_rate": 0.00038628909800619046, + "loss": 1.1381, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.3960745334625244, + "learning_rate": 0.0003840955154428467, + "loss": 1.142, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.389371782541275, + "learning_rate": 0.00038190603301791864, + "loss": 1.136, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.38817137479782104, + "learning_rate": 0.0003797206752656258, + "loss": 1.1455, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.38712912797927856, + "learning_rate": 0.0003775394666739688, + "loss": 1.1372, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.39310339093208313, + "learning_rate": 0.00037536243168445507, + "loss": 1.1432, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.3820338845252991, + "learning_rate": 0.0003731895946918246, + "loss": 1.1416, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.43790203332901, + "learning_rate": 0.0003710209800437769, + "loss": 1.1227, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.37873557209968567, + "learning_rate": 0.00036885661204069767, + "loss": 1.1278, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.422167032957077, + "learning_rate": 0.0003666965149353878, + "loss": 1.1426, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.40716055035591125, + "learning_rate": 0.0003645407129327898, + "loss": 1.1425, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.4014732837677002, + "learning_rate": 0.00036238923018971783, + "loss": 1.1365, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.389970988035202, + "learning_rate": 0.0003602420908145865, + "loss": 1.131, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.3878970742225647, + "learning_rate": 0.00035809931886714093, + "loss": 1.1405, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.40258193016052246, + "learning_rate": 0.00035596093835818683, + "loss": 1.1245, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.3841509521007538, + "learning_rate": 0.00035382697324932245, + "loss": 1.128, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.47868505120277405, + "learning_rate": 0.00035169744745266866, + "loss": 1.139, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.3992028832435608, + "learning_rate": 0.0003495723848306017, + "loss": 1.1346, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.45554426312446594, + "learning_rate": 0.0003474518091954859, + "loss": 1.1403, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.407979279756546, + "learning_rate": 0.0003453357443094068, + "loss": 1.1377, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.3960586190223694, + "learning_rate": 0.00034322421388390456, + "loss": 1.1533, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.4038563668727875, + "learning_rate": 0.0003411172415797087, + "loss": 1.1465, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.3982536494731903, + "learning_rate": 0.0003390148510064727, + "loss": 1.1368, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.38412824273109436, + "learning_rate": 0.0003369170657225094, + "loss": 1.1268, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.4190025329589844, + "learning_rate": 0.0003348239092345275, + "loss": 1.1347, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.43112343549728394, + "learning_rate": 0.0003327354049973672, + "loss": 1.1339, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.47184449434280396, + "learning_rate": 0.00033065157641373847, + "loss": 1.1336, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.44842779636383057, + "learning_rate": 0.0003285724468339576, + "loss": 1.131, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.39234837889671326, + "learning_rate": 0.00032649803955568755, + "loss": 1.1276, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.4204063415527344, + "learning_rate": 0.00032442837782367434, + "loss": 1.1301, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.38703998923301697, + "learning_rate": 0.0003223634848294883, + "loss": 1.1364, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.5222501754760742, + "learning_rate": 0.00032030338371126374, + "loss": 1.1223, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.4259988069534302, + "learning_rate": 0.0003182480975534395, + "loss": 1.1344, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.4191349148750305, + "learning_rate": 0.00031619764938650057, + "loss": 1.1274, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.38727352023124695, + "learning_rate": 0.0003141520621867197, + "loss": 1.1281, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.43056294322013855, + "learning_rate": 0.00031211135887590074, + "loss": 1.1259, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.3930073380470276, + "learning_rate": 0.0003100755623211205, + "loss": 1.1271, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.3948570787906647, + "learning_rate": 0.0003080446953344735, + "loss": 1.1275, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.38179853558540344, + "learning_rate": 0.00030601878067281575, + "loss": 1.1312, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.40962478518486023, + "learning_rate": 0.00030399784103751044, + "loss": 1.1225, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.3918619751930237, + "learning_rate": 0.000301981899074173, + "loss": 1.1244, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.4068445861339569, + "learning_rate": 0.0002999709773724171, + "loss": 1.1276, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.40518423914909363, + "learning_rate": 0.00029796509846560294, + "loss": 1.1168, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.37731868028640747, + "learning_rate": 0.0002959642848305828, + "loss": 1.1226, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.3894304633140564, + "learning_rate": 0.00029396855888745045, + "loss": 1.126, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.39714908599853516, + "learning_rate": 0.0002919779429992895, + "loss": 1.1292, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.3878750205039978, + "learning_rate": 0.0002899924594719231, + "loss": 1.1273, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.393411248922348, + "learning_rate": 0.00028801213055366335, + "loss": 1.1266, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.40545380115509033, + "learning_rate": 0.00028603697843506315, + "loss": 1.1287, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.40277689695358276, + "learning_rate": 0.0002840670252486662, + "loss": 1.1314, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.456635445356369, + "learning_rate": 0.00028210229306876, + "loss": 1.1187, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.3888854384422302, + "learning_rate": 0.0002801428039111279, + "loss": 1.1235, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.4069770276546478, + "learning_rate": 0.00027818857973280274, + "loss": 1.1236, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.4361964166164398, + "learning_rate": 0.0002762396424318206, + "loss": 1.121, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.381592720746994, + "learning_rate": 0.00027429601384697526, + "loss": 1.112, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.37498369812965393, + "learning_rate": 0.00027235771575757466, + "loss": 1.1213, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.406649112701416, + "learning_rate": 0.0002704247698831951, + "loss": 1.1154, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.38952797651290894, + "learning_rate": 0.0002684971978834389, + "loss": 1.1124, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.3850283920764923, + "learning_rate": 0.0002665750213576914, + "loss": 1.1221, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.38290688395500183, + "learning_rate": 0.0002646582618448794, + "loss": 1.1142, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.40484410524368286, + "learning_rate": 0.00026274694082322896, + "loss": 1.112, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.4301913380622864, + "learning_rate": 0.0002608410797100255, + "loss": 1.1367, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.3835858404636383, + "learning_rate": 0.0002589406998613733, + "loss": 1.115, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.4031292498111725, + "learning_rate": 0.0002570458225719567, + "loss": 1.1203, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.3919115960597992, + "learning_rate": 0.00025515646907480074, + "loss": 1.124, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.414682537317276, + "learning_rate": 0.00025327266054103395, + "loss": 1.1223, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.39841046929359436, + "learning_rate": 0.0002513944180796509, + "loss": 1.1173, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.42326265573501587, + "learning_rate": 0.0002495217627372752, + "loss": 1.1117, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.3831874132156372, + "learning_rate": 0.0002476547154979248, + "loss": 1.1133, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.4218146800994873, + "learning_rate": 0.00024579329728277534, + "loss": 1.1151, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.40372854471206665, + "learning_rate": 0.00024393752894992708, + "loss": 1.125, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.38364744186401367, + "learning_rate": 0.00024208743129417004, + "loss": 1.0974, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.4064615070819855, + "learning_rate": 0.00024024302504675206, + "loss": 1.1126, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.3830021023750305, + "learning_rate": 0.0002384043308751454, + "loss": 1.1154, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.4404035806655884, + "learning_rate": 0.00023657136938281653, + "loss": 1.121, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.42605167627334595, + "learning_rate": 0.00023474416110899377, + "loss": 1.1102, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.44118231534957886, + "learning_rate": 0.00023292272652843807, + "loss": 1.1152, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.4080129563808441, + "learning_rate": 0.00023110708605121317, + "loss": 1.1201, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.40057724714279175, + "learning_rate": 0.00022929726002245728, + "loss": 1.1161, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.3797246217727661, + "learning_rate": 0.00022749326872215472, + "loss": 1.1125, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.3935549259185791, + "learning_rate": 0.0002256951323649087, + "loss": 1.1099, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.3947845697402954, + "learning_rate": 0.00022390287109971547, + "loss": 1.1215, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.4323355257511139, + "learning_rate": 0.00022211650500973746, + "loss": 1.113, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.42935505509376526, + "learning_rate": 0.0002203360541120789, + "loss": 1.1221, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.4154747426509857, + "learning_rate": 0.00021856153835756164, + "loss": 1.1073, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.4245680272579193, + "learning_rate": 0.00021679297763050104, + "loss": 1.1023, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.40381312370300293, + "learning_rate": 0.0002150303917484834, + "loss": 1.1127, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.4075297713279724, + "learning_rate": 0.0002132738004621446, + "loss": 1.1206, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.42460012435913086, + "learning_rate": 0.00021152322345494763, + "loss": 1.0981, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.41856849193573, + "learning_rate": 0.00020977868034296253, + "loss": 1.1033, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.41449931263923645, + "learning_rate": 0.00020804019067464667, + "loss": 1.1079, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.4799036979675293, + "learning_rate": 0.00020630777393062575, + "loss": 1.1102, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.387323260307312, + "learning_rate": 0.00020458144952347523, + "loss": 1.1204, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.4218441843986511, + "learning_rate": 0.00020286123679750314, + "loss": 1.1199, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.3954945206642151, + "learning_rate": 0.00020114715502853292, + "loss": 1.117, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.42326819896698, + "learning_rate": 0.0001994392234236878, + "loss": 1.1119, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.38280928134918213, + "learning_rate": 0.0001977374611211754, + "loss": 1.1115, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.4045434594154358, + "learning_rate": 0.00019604188719007313, + "loss": 1.1092, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.40801259875297546, + "learning_rate": 0.00019435252063011504, + "loss": 1.1117, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.3943946063518524, + "learning_rate": 0.0001926693803714779, + "loss": 1.1201, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.4292197823524475, + "learning_rate": 0.00019099248527457068, + "loss": 1.1238, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.3949521481990814, + "learning_rate": 0.0001893218541298216, + "loss": 1.1048, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.3822163939476013, + "learning_rate": 0.00018765750565746827, + "loss": 1.1039, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.40053272247314453, + "learning_rate": 0.00018599945850734812, + "loss": 1.107, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.3973643481731415, + "learning_rate": 0.00018434773125868895, + "loss": 1.1002, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.3798317313194275, + "learning_rate": 0.00018270234241990108, + "loss": 1.0997, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.3852475583553314, + "learning_rate": 0.0001810633104283698, + "loss": 1.108, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.3998146057128906, + "learning_rate": 0.0001794306536502492, + "loss": 1.1089, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.3986417353153229, + "learning_rate": 0.0001778043903802555, + "loss": 1.0969, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.38809576630592346, + "learning_rate": 0.0001761845388414627, + "loss": 1.1069, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.39405661821365356, + "learning_rate": 0.00017457111718509831, + "loss": 1.1133, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.3776385486125946, + "learning_rate": 0.00017296414349033976, + "loss": 1.0934, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.3878311812877655, + "learning_rate": 0.00017136363576411172, + "loss": 1.1012, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.38258445262908936, + "learning_rate": 0.00016976961194088526, + "loss": 1.0889, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.3982473313808441, + "learning_rate": 0.00016818208988247533, + "loss": 1.0904, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.3993515372276306, + "learning_rate": 0.0001666010873778419, + "loss": 1.0948, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.3961378335952759, + "learning_rate": 0.00016502662214289, + "loss": 1.1057, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.43822330236434937, + "learning_rate": 0.00016345871182027124, + "loss": 1.097, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.391075998544693, + "learning_rate": 0.00016189737397918653, + "loss": 1.1032, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.3861009180545807, + "learning_rate": 0.0001603426261151884, + "loss": 1.1075, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.40231236815452576, + "learning_rate": 0.00015879448564998648, + "loss": 1.106, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.39477503299713135, + "learning_rate": 0.0001572529699312501, + "loss": 1.1088, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.38415929675102234, + "learning_rate": 0.0001557180962324158, + "loss": 1.0951, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.39178451895713806, + "learning_rate": 0.00015418988175249282, + "loss": 1.0972, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.4139266312122345, + "learning_rate": 0.00015266834361587063, + "loss": 1.094, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.39023134112358093, + "learning_rate": 0.00015115349887212678, + "loss": 1.0903, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.4003297984600067, + "learning_rate": 0.00014964536449583657, + "loss": 1.0953, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.41940462589263916, + "learning_rate": 0.00014814395738638195, + "loss": 1.1028, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.39905744791030884, + "learning_rate": 0.00014664929436776278, + "loss": 1.1001, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.3942812979221344, + "learning_rate": 0.00014516139218840788, + "loss": 1.0895, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.39487162232398987, + "learning_rate": 0.00014368026752098782, + "loss": 1.098, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.4070006310939789, + "learning_rate": 0.00014220593696222768, + "loss": 1.1101, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.3753879964351654, + "learning_rate": 0.00014073841703272092, + "loss": 1.0851, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.40898725390434265, + "learning_rate": 0.00013927772417674558, + "loss": 1.1068, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.4121878743171692, + "learning_rate": 0.00013782387476207788, + "loss": 1.0979, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.40831995010375977, + "learning_rate": 0.00013637688507981064, + "loss": 1.1129, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.37886929512023926, + "learning_rate": 0.0001349367713441697, + "loss": 1.0895, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.39507555961608887, + "learning_rate": 0.0001335035496923326, + "loss": 1.0959, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.44594720005989075, + "learning_rate": 0.0001320772361842478, + "loss": 1.1093, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.4449687898159027, + "learning_rate": 0.00013065784680245442, + "loss": 1.084, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.42952996492385864, + "learning_rate": 0.00012924539745190402, + "loss": 1.096, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.38639920949935913, + "learning_rate": 0.0001278399039597809, + "loss": 1.0901, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.4121502935886383, + "learning_rate": 0.0001264413820753261, + "loss": 1.0952, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.4200342297554016, + "learning_rate": 0.00012504984746966003, + "loss": 1.1035, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.39154133200645447, + "learning_rate": 0.00012366531573560754, + "loss": 1.1042, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.3894316256046295, + "learning_rate": 0.00012228780238752264, + "loss": 1.0885, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.42531612515449524, + "learning_rate": 0.00012091732286111514, + "loss": 1.0881, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.3934972584247589, + "learning_rate": 0.00011955389251327737, + "loss": 1.0937, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.41455936431884766, + "learning_rate": 0.00011819752662191197, + "loss": 1.0875, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.40324583649635315, + "learning_rate": 0.00011684824038576115, + "loss": 1.1047, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.3860013484954834, + "learning_rate": 0.00011550604892423593, + "loss": 1.0912, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.4025815725326538, + "learning_rate": 0.0001141709672772471, + "loss": 1.0934, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.39778342843055725, + "learning_rate": 0.00011284301040503625, + "loss": 1.0992, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.3826282322406769, + "learning_rate": 0.0001115221931880088, + "loss": 1.0923, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.3833996057510376, + "learning_rate": 0.00011020853042656648, + "loss": 1.0762, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.39292940497398376, + "learning_rate": 0.000108902036840942, + "loss": 1.0849, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.37741851806640625, + "learning_rate": 0.00010760272707103389, + "loss": 1.0724, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.3965178430080414, + "learning_rate": 0.00010631061567624259, + "loss": 1.0891, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.39103081822395325, + "learning_rate": 0.00010502571713530706, + "loss": 1.1079, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.3876722455024719, + "learning_rate": 0.00010374804584614308, + "loss": 1.1029, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.399039626121521, + "learning_rate": 0.00010247761612568129, + "loss": 1.0834, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.4021260440349579, + "learning_rate": 0.0001012144422097069, + "loss": 1.0935, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.40740182995796204, + "learning_rate": 9.995853825270052e-05, + "loss": 1.0881, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.40316537022590637, + "learning_rate": 9.870991832767919e-05, + "loss": 1.0975, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.38476547598838806, + "learning_rate": 9.746859642603884e-05, + "loss": 1.0931, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.38479694724082947, + "learning_rate": 9.623458645739755e-05, + "loss": 1.0971, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.37773653864860535, + "learning_rate": 9.50079022494395e-05, + "loss": 1.0838, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.39508056640625, + "learning_rate": 9.378855754776028e-05, + "loss": 1.0829, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.3902425169944763, + "learning_rate": 9.257656601571266e-05, + "loss": 1.0818, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.41624459624290466, + "learning_rate": 9.137194123425349e-05, + "loss": 1.0921, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.38402268290519714, + "learning_rate": 9.017469670179168e-05, + "loss": 1.0874, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.40924087166786194, + "learning_rate": 8.898484583403668e-05, + "loss": 1.0924, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.3977222442626953, + "learning_rate": 8.780240196384873e-05, + "loss": 1.0883, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.39897042512893677, + "learning_rate": 8.662737834108861e-05, + "loss": 1.0878, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.4027484953403473, + "learning_rate": 8.545978813246987e-05, + "loss": 1.0977, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.3986610174179077, + "learning_rate": 8.429964442141072e-05, + "loss": 1.0752, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.39370623230934143, + "learning_rate": 8.314696020788806e-05, + "loss": 1.0894, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.4042566120624542, + "learning_rate": 8.200174840829136e-05, + "loss": 1.095, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.39300772547721863, + "learning_rate": 8.08640218552778e-05, + "loss": 1.093, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.3906431198120117, + "learning_rate": 7.973379329762925e-05, + "loss": 1.0817, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.39622342586517334, + "learning_rate": 7.861107540010845e-05, + "loss": 1.0786, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.44495585560798645, + "learning_rate": 7.749588074331762e-05, + "loss": 1.0932, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.4084097743034363, + "learning_rate": 7.63882218235575e-05, + "loss": 1.0855, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.3862929940223694, + "learning_rate": 7.528811105268699e-05, + "loss": 1.0837, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.39876341819763184, + "learning_rate": 7.41955607579845e-05, + "loss": 1.0834, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.40218445658683777, + "learning_rate": 7.311058318200969e-05, + "loss": 1.0804, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.39800024032592773, + "learning_rate": 7.203319048246599e-05, + "loss": 1.0913, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.39610108733177185, + "learning_rate": 7.096339473206471e-05, + "loss": 1.0812, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.40927422046661377, + "learning_rate": 6.990120791838953e-05, + "loss": 1.0892, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.40633469820022583, + "learning_rate": 6.884664194376233e-05, + "loss": 1.0821, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.39570507407188416, + "learning_rate": 6.779970862510989e-05, + "loss": 1.0937, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.39803698658943176, + "learning_rate": 6.676041969383107e-05, + "loss": 1.0871, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.40148958563804626, + "learning_rate": 6.572878679566605e-05, + "loss": 1.0931, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.3799375593662262, + "learning_rate": 6.470482149056509e-05, + "loss": 1.0892, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.38664311170578003, + "learning_rate": 6.368853525255942e-05, + "loss": 1.0847, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.39490216970443726, + "learning_rate": 6.267993946963249e-05, + "loss": 1.0994, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.39728647470474243, + "learning_rate": 6.167904544359265e-05, + "loss": 1.0892, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.4031187891960144, + "learning_rate": 6.068586438994617e-05, + "loss": 1.0875, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.38711559772491455, + "learning_rate": 5.970040743777161e-05, + "loss": 1.0719, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.3881445825099945, + "learning_rate": 5.8722685629595454e-05, + "loss": 1.0742, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.3856261074542999, + "learning_rate": 5.7752709921267855e-05, + "loss": 1.0966, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.3909825086593628, + "learning_rate": 5.6790491181840294e-05, + "loss": 1.0751, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.3837721645832062, + "learning_rate": 5.583604019344354e-05, + "loss": 1.0935, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.39072513580322266, + "learning_rate": 5.4889367651167007e-05, + "loss": 1.0842, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.37632638216018677, + "learning_rate": 5.3950484162938714e-05, + "loss": 1.0743, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.38079795241355896, + "learning_rate": 5.3019400249406686e-05, + "loss": 1.0784, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.38797900080680847, + "learning_rate": 5.209612634382077e-05, + "loss": 1.0747, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.38324832916259766, + "learning_rate": 5.118067279191599e-05, + "loss": 1.0829, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.3893651068210602, + "learning_rate": 5.0273049851796205e-05, + "loss": 1.0883, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.38790905475616455, + "learning_rate": 4.9373267693819805e-05, + "loss": 1.0807, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.38551226258277893, + "learning_rate": 4.848133640048513e-05, + "loss": 1.0813, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.38885149359703064, + "learning_rate": 4.75972659663178e-05, + "loss": 1.0976, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.3809530436992645, + "learning_rate": 4.672106629775882e-05, + "loss": 1.0861, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.38822048902511597, + "learning_rate": 4.585274721305333e-05, + "loss": 1.0848, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.3988723158836365, + "learning_rate": 4.4992318442140575e-05, + "loss": 1.0757, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.39513498544692993, + "learning_rate": 4.413978962654508e-05, + "loss": 1.0847, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.4065006375312805, + "learning_rate": 4.3295170319268554e-05, + "loss": 1.0804, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.3936798870563507, + "learning_rate": 4.245846998468261e-05, + "loss": 1.0808, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.39284446835517883, + "learning_rate": 4.16296979984232e-05, + "loss": 1.0769, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.3854410946369171, + "learning_rate": 4.080886364728506e-05, + "loss": 1.0832, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.3905365765094757, + "learning_rate": 3.999597612911793e-05, + "loss": 1.0743, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.3856108486652374, + "learning_rate": 3.9191044552723345e-05, + "loss": 1.0833, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.395382285118103, + "learning_rate": 3.839407793775268e-05, + "loss": 1.081, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.38868480920791626, + "learning_rate": 3.760508521460584e-05, + "loss": 1.0915, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.38354727625846863, + "learning_rate": 3.682407522433173e-05, + "loss": 1.0871, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.3920823931694031, + "learning_rate": 3.605105671852854e-05, + "loss": 1.0796, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.38295450806617737, + "learning_rate": 3.528603835924626e-05, + "loss": 1.0715, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.4038940966129303, + "learning_rate": 3.4529028718888935e-05, + "loss": 1.0914, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.37392187118530273, + "learning_rate": 3.378003628011938e-05, + "loss": 1.0821, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.3976120948791504, + "learning_rate": 3.303906943576346e-05, + "loss": 1.087, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.3847948908805847, + "learning_rate": 3.230613648871661e-05, + "loss": 1.0781, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.3972248435020447, + "learning_rate": 3.158124565185022e-05, + "loss": 1.0792, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.3839012384414673, + "learning_rate": 3.086440504792026e-05, + "loss": 1.076, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.3970171809196472, + "learning_rate": 3.015562270947553e-05, + "loss": 1.0864, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.411268413066864, + "learning_rate": 2.945490657876837e-05, + "loss": 1.0662, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.3954361081123352, + "learning_rate": 2.8762264507665113e-05, + "loss": 1.0731, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.4060966968536377, + "learning_rate": 2.807770425755829e-05, + "loss": 1.072, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.38363638520240784, + "learning_rate": 2.7401233499279866e-05, + "loss": 1.0833, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.3846428692340851, + "learning_rate": 2.6732859813014987e-05, + "loss": 1.0899, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.3856589198112488, + "learning_rate": 2.607259068821721e-05, + "loss": 1.0814, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.37877020239830017, + "learning_rate": 2.5420433523524493e-05, + "loss": 1.0802, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.3949586749076843, + "learning_rate": 2.4776395626676162e-05, + "loss": 1.0807, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.395811527967453, + "learning_rate": 2.414048421443141e-05, + "loss": 1.0743, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.37916049361228943, + "learning_rate": 2.3512706412488012e-05, + "loss": 1.0866, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.39117804169654846, + "learning_rate": 2.2893069255402993e-05, + "loss": 1.0741, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.3875597417354584, + "learning_rate": 2.2281579686513176e-05, + "loss": 1.0728, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.389176607131958, + "learning_rate": 2.1678244557857663e-05, + "loss": 1.0641, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.37417352199554443, + "learning_rate": 2.1083070630101232e-05, + "loss": 1.0715, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.3794352114200592, + "learning_rate": 2.0496064572458395e-05, + "loss": 1.0772, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.3930789828300476, + "learning_rate": 1.991723296261863e-05, + "loss": 1.0744, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.37813901901245117, + "learning_rate": 1.9346582286672686e-05, + "loss": 1.071, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.3956584632396698, + "learning_rate": 1.878411893904014e-05, + "loss": 1.0886, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.3814772665500641, + "learning_rate": 1.822984922239737e-05, + "loss": 1.0765, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.38081833720207214, + "learning_rate": 1.7683779347607286e-05, + "loss": 1.0939, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.37919020652770996, + "learning_rate": 1.714591543364938e-05, + "loss": 1.0802, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.3892782926559448, + "learning_rate": 1.6616263507551437e-05, + "loss": 1.0775, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.38628166913986206, + "learning_rate": 1.609482950432195e-05, + "loss": 1.0766, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.39751964807510376, + "learning_rate": 1.5581619266883563e-05, + "loss": 1.0795, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.40209832787513733, + "learning_rate": 1.5076638546007548e-05, + "loss": 1.0842, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.38394156098365784, + "learning_rate": 1.457989300024945e-05, + "loss": 1.0811, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.38769692182540894, + "learning_rate": 1.4091388195885625e-05, + "loss": 1.0715, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.3908451795578003, + "learning_rate": 1.3611129606851041e-05, + "loss": 1.0736, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.38223668932914734, + "learning_rate": 1.313912261467759e-05, + "loss": 1.0811, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.3822036683559418, + "learning_rate": 1.267537250843412e-05, + "loss": 1.0757, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.3838770091533661, + "learning_rate": 1.2219884484667071e-05, + "loss": 1.0764, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.3876483142375946, + "learning_rate": 1.1772663647341947e-05, + "loss": 1.078, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.3883013129234314, + "learning_rate": 1.1333715007786932e-05, + "loss": 1.0796, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.38810646533966064, + "learning_rate": 1.0903043484635694e-05, + "loss": 1.0771, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.39272549748420715, + "learning_rate": 1.0480653903772924e-05, + "loss": 1.0745, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.36674994230270386, + "learning_rate": 1.0066550998280132e-05, + "loss": 1.0779, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.38119739294052124, + "learning_rate": 9.660739408382608e-06, + "loss": 1.0776, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.3855500817298889, + "learning_rate": 9.26322368139737e-06, + "loss": 1.072, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.38145649433135986, + "learning_rate": 8.874008271682222e-06, + "loss": 1.0725, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.3881155252456665, + "learning_rate": 8.493097540585775e-06, + "loss": 1.0917, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.38377439975738525, + "learning_rate": 8.120495756399005e-06, + "loss": 1.068, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.387001097202301, + "learning_rate": 7.756207094306605e-06, + "loss": 1.0778, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.37688058614730835, + "learning_rate": 7.400235636340957e-06, + "loss": 1.0813, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.38394081592559814, + "learning_rate": 7.0525853713362395e-06, + "loss": 1.0808, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.3848443031311035, + "learning_rate": 6.71326019488322e-06, + "loss": 1.0854, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.38681983947753906, + "learning_rate": 6.3822639092862846e-06, + "loss": 1.0814, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.38426393270492554, + "learning_rate": 6.059600223520478e-06, + "loss": 1.0619, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.38100704550743103, + "learning_rate": 5.745272753189784e-06, + "loss": 1.0736, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.38649922609329224, + "learning_rate": 5.439285020487156e-06, + "loss": 1.0848, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.3965057134628296, + "learning_rate": 5.141640454154467e-06, + "loss": 1.0687, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.3938376009464264, + "learning_rate": 4.852342389444458e-06, + "loss": 1.0836, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.3860808312892914, + "learning_rate": 4.571394068083185e-06, + "loss": 1.073, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.3864290118217468, + "learning_rate": 4.298798638233709e-06, + "loss": 1.0819, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.39091676473617554, + "learning_rate": 4.034559154461049e-06, + "loss": 1.0804, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.3859773278236389, + "learning_rate": 3.7786785776976198e-06, + "loss": 1.0761, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.3780228793621063, + "learning_rate": 3.5311597752100964e-06, + "loss": 1.0733, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.3880957365036011, + "learning_rate": 3.2920055205676867e-06, + "loss": 1.0749, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.3794536590576172, + "learning_rate": 3.06121849361049e-06, + "loss": 1.0754, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.3807520568370819, + "learning_rate": 2.838801280419856e-06, + "loss": 1.075, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.38176587224006653, + "learning_rate": 2.624756373289322e-06, + "loss": 1.0675, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.379975289106369, + "learning_rate": 2.419086170696472e-06, + "loss": 1.061, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.37389546632766724, + "learning_rate": 2.2217929772764545e-06, + "loss": 1.0718, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.3839819133281708, + "learning_rate": 2.0328790037957568e-06, + "loss": 1.0693, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.3840799629688263, + "learning_rate": 1.8523463671278052e-06, + "loss": 1.0797, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.3864159882068634, + "learning_rate": 1.6801970902288188e-06, + "loss": 1.0663, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.3804856538772583, + "learning_rate": 1.5164331021155774e-06, + "loss": 1.0766, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.4023216664791107, + "learning_rate": 1.3610562378435221e-06, + "loss": 1.0727, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.3884046673774719, + "learning_rate": 1.2140682384862712e-06, + "loss": 1.0755, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.3878925144672394, + "learning_rate": 1.0754707511161365e-06, + "loss": 1.0713, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.38863489031791687, + "learning_rate": 9.452653287856383e-07, + "loss": 1.0824, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.3817732632160187, + "learning_rate": 8.234534305101015e-07, + "loss": 1.0807, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.3825710117816925, + "learning_rate": 7.100364212513367e-07, + "loss": 1.0889, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.3879372775554657, + "learning_rate": 6.050155719023176e-07, + "loss": 1.0802, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.387472003698349, + "learning_rate": 5.08392059272944e-07, + "loss": 1.0795, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.3887854218482971, + "learning_rate": 4.2016696607680147e-07, + "loss": 1.0686, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.3873176872730255, + "learning_rate": 3.4034128091917085e-07, + "loss": 1.0701, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.3858538568019867, + "learning_rate": 2.689158982859541e-07, + "loss": 1.0828, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.39077526330947876, + "learning_rate": 2.05891618533266e-07, + "loss": 1.0612, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.38713136315345764, + "learning_rate": 1.5126914787894074e-07, + "loss": 1.079, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.3799123167991638, + "learning_rate": 1.0504909839462173e-07, + "loss": 1.0778, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.3749011754989624, + "learning_rate": 6.723198799826746e-08, + "loss": 1.0727, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.3906822204589844, + "learning_rate": 3.781824044932214e-08, + "loss": 1.0852, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.3862172067165375, + "learning_rate": 1.6808185342970238e-08, + "loss": 1.0683, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.38431471586227417, + "learning_rate": 4.202058107305451e-09, + "loss": 1.0831, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.132771611213684, + "learning_rate": 0.0, + "loss": 1.0715, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5.037432118742016e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-gemma2-cosine/checkpoint-9480/training_args.bin b/saves-gemma2-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c4ba93cd0f25a381b536d5d60f472d81f9bcf89 --- /dev/null +++ b/saves-gemma2-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaf77eaf842fa8a6bf6e29c5027969310d51b2882eeb37c8be62f4f40bcc402d +size 5176 diff --git a/saves-gemma2-cosine/config.json b/saves-gemma2-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96bfe46f54281277b27d9690dc209d86a9da6b69 --- /dev/null +++ b/saves-gemma2-cosine/config.json @@ -0,0 +1,31 @@ +{ + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 224, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gemma2-cosine/generation_config.json b/saves-gemma2-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e9f2b419d37547ea30e2f193ff04443472c78cba --- /dev/null +++ b/saves-gemma2-cosine/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-gemma2-cosine/model.safetensors b/saves-gemma2-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78988c1005749af407d2cf7995af5c09edc9060a --- /dev/null +++ b/saves-gemma2-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a81076713bdcac9494c693701b3378c2a10e2e3cb4895841da92d04fe7f8a65 +size 19361344 diff --git a/saves-gemma2-cosine/result.log b/saves-gemma2-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..6871a34c71c4223c9a31e569bfa5f6e0b0454864 --- /dev/null +++ b/saves-gemma2-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 5958.9449, 'train_samples_per_second': 1628.913, 'train_steps_per_second': 1.591, 'train_loss': 1.3597116293283456, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-gemma2-cosine/special_tokens_map.json b/saves-gemma2-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gemma2-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gemma2-cosine/tokenizer.json b/saves-gemma2-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gemma2-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gemma2-cosine/tokenizer_config.json b/saves-gemma2-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gemma2-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gemma2/checkpoint-9480/config.json b/saves-gemma2/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96bfe46f54281277b27d9690dc209d86a9da6b69 --- /dev/null +++ b/saves-gemma2/checkpoint-9480/config.json @@ -0,0 +1,31 @@ +{ + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 224, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gemma2/checkpoint-9480/generation_config.json b/saves-gemma2/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e9f2b419d37547ea30e2f193ff04443472c78cba --- /dev/null +++ b/saves-gemma2/checkpoint-9480/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-gemma2/checkpoint-9480/model.safetensors b/saves-gemma2/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bb52ba8e6de986062601eb20c05faaf7ffdc16d --- /dev/null +++ b/saves-gemma2/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a229e45aada410c4022c63a4b357009fa61bb23e436372765ddcb2a85e4ff2 +size 19361344 diff --git a/saves-gemma2/checkpoint-9480/optimizer.pt b/saves-gemma2/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef13afe741dfe824f4d036ab9c9843564b80980b --- /dev/null +++ b/saves-gemma2/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9875d0c18d1696b5b80b00603f9717c53a2d9d473454af4be774158ef61d4a36 +size 38738006 diff --git a/saves-gemma2/checkpoint-9480/rng_state.pth b/saves-gemma2/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-gemma2/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-gemma2/checkpoint-9480/scheduler.pt b/saves-gemma2/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-gemma2/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-gemma2/checkpoint-9480/special_tokens_map.json b/saves-gemma2/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gemma2/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gemma2/checkpoint-9480/tokenizer.json b/saves-gemma2/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gemma2/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "ä½¿": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gemma2/checkpoint-9480/tokenizer_config.json b/saves-gemma2/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gemma2/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gemma2/checkpoint-9480/trainer_state.json b/saves-gemma2/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..68bc1f4d7742fa96d224a98e1303f5316413a300 --- /dev/null +++ b/saves-gemma2/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.1911535263061523, + "learning_rate": 0.00015822784810126583, + "loss": 7.514, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1261857748031616, + "learning_rate": 0.00031645569620253165, + "loss": 6.971, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8440089821815491, + "learning_rate": 0.00047468354430379745, + "loss": 6.3116, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 1.437301754951477, + "learning_rate": 0.0006329113924050633, + "loss": 5.8497, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 1.4225043058395386, + "learning_rate": 0.0007911392405063291, + "loss": 5.5173, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.9975273013114929, + "learning_rate": 0.0009493670886075949, + "loss": 5.1521, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.0305777788162231, + "learning_rate": 0.0011075949367088608, + "loss": 4.7579, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.3440488576889038, + "learning_rate": 0.0012658227848101266, + "loss": 4.4427, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.0169594287872314, + "learning_rate": 0.0014240506329113926, + "loss": 4.1901, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.8410079479217529, + "learning_rate": 0.0015, + "loss": 4.0012, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.6870416402816772, + "learning_rate": 0.0015, + "loss": 3.8088, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.6572973728179932, + "learning_rate": 0.0015, + "loss": 3.6725, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.535599410533905, + "learning_rate": 0.0015, + "loss": 3.5343, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.8405603766441345, + "learning_rate": 0.0015, + "loss": 3.4165, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.5135706663131714, + "learning_rate": 0.0015, + "loss": 3.3249, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.6049426794052124, + "learning_rate": 0.0015, + "loss": 3.2309, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.6043658256530762, + "learning_rate": 0.0015, + "loss": 3.1535, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.7351441383361816, + "learning_rate": 0.0015, + "loss": 3.1078, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.5547571182250977, + "learning_rate": 0.0015, + "loss": 3.0352, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.7150177955627441, + "learning_rate": 0.0015, + "loss": 2.9785, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.5605180263519287, + "learning_rate": 0.0015, + "loss": 2.9324, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.5501560568809509, + "learning_rate": 0.0015, + "loss": 2.892, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.5405913591384888, + "learning_rate": 0.0015, + "loss": 2.8553, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.44567427039146423, + "learning_rate": 0.0015, + "loss": 2.8098, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.5209677815437317, + "learning_rate": 0.0015, + "loss": 2.7686, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.5224133133888245, + "learning_rate": 0.0015, + "loss": 2.7305, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.561279296875, + "learning_rate": 0.0015, + "loss": 2.7045, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.48504865169525146, + "learning_rate": 0.0015, + "loss": 2.6616, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.5276456475257874, + "learning_rate": 0.0015, + "loss": 2.6373, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.7674102783203125, + "learning_rate": 0.0015, + "loss": 2.6083, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.5037146806716919, + "learning_rate": 0.0015, + "loss": 2.583, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.4920331835746765, + "learning_rate": 0.0015, + "loss": 2.5358, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.6064138412475586, + "learning_rate": 0.0015, + "loss": 2.5224, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.5508156418800354, + "learning_rate": 0.0015, + "loss": 2.4869, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.46913155913352966, + "learning_rate": 0.0015, + "loss": 2.4401, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.6472739577293396, + "learning_rate": 0.0015, + "loss": 2.4113, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.5063979029655457, + "learning_rate": 0.0015, + "loss": 2.3883, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.5765858888626099, + "learning_rate": 0.0015, + "loss": 2.3691, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.5593698620796204, + "learning_rate": 0.0015, + "loss": 2.3406, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.7147346138954163, + "learning_rate": 0.0015, + "loss": 2.3005, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.5318769812583923, + "learning_rate": 0.0015, + "loss": 2.2975, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.5068933963775635, + "learning_rate": 0.0015, + "loss": 2.2618, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.5029956102371216, + "learning_rate": 0.0015, + "loss": 2.2284, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.5769665241241455, + "learning_rate": 0.0015, + "loss": 2.2023, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.6128251552581787, + "learning_rate": 0.0015, + "loss": 2.2, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.530529260635376, + "learning_rate": 0.0015, + "loss": 2.1738, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.5000449419021606, + "learning_rate": 0.0015, + "loss": 2.1554, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.7125113606452942, + "learning_rate": 0.0015, + "loss": 2.1395, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.4939398467540741, + "learning_rate": 0.0015, + "loss": 2.105, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.5103901028633118, + "learning_rate": 0.0015, + "loss": 2.0902, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.4903666377067566, + "learning_rate": 0.0015, + "loss": 2.0743, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.6215901374816895, + "learning_rate": 0.0015, + "loss": 2.0572, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.5370311141014099, + "learning_rate": 0.0015, + "loss": 2.0422, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.583985447883606, + "learning_rate": 0.0015, + "loss": 2.0237, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.48627397418022156, + "learning_rate": 0.0015, + "loss": 2.0133, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.46253713965415955, + "learning_rate": 0.0015, + "loss": 1.9705, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.5268188118934631, + "learning_rate": 0.0015, + "loss": 1.9772, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.526195764541626, + "learning_rate": 0.0015, + "loss": 1.9743, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.501920759677887, + "learning_rate": 0.0015, + "loss": 1.9446, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.5201966166496277, + "learning_rate": 0.0015, + "loss": 1.9259, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.49704989790916443, + "learning_rate": 0.0015, + "loss": 1.9233, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.46477070450782776, + "learning_rate": 0.0015, + "loss": 1.9054, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.5311816334724426, + "learning_rate": 0.0015, + "loss": 1.892, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.6093870401382446, + "learning_rate": 0.0015, + "loss": 1.907, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.5302767157554626, + "learning_rate": 0.0015, + "loss": 1.8872, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.5349369049072266, + "learning_rate": 0.0015, + "loss": 1.8741, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.5523291230201721, + "learning_rate": 0.0015, + "loss": 1.8571, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.45728522539138794, + "learning_rate": 0.0015, + "loss": 1.8468, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.49975475668907166, + "learning_rate": 0.0015, + "loss": 1.8546, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.5351147651672363, + "learning_rate": 0.0015, + "loss": 1.8344, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.4953480362892151, + "learning_rate": 0.0015, + "loss": 1.8199, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.5598252415657043, + "learning_rate": 0.0015, + "loss": 1.806, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.636795699596405, + "learning_rate": 0.0015, + "loss": 1.795, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.5506213903427124, + "learning_rate": 0.0015, + "loss": 1.8043, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.5925042033195496, + "learning_rate": 0.0015, + "loss": 1.7829, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.7445921301841736, + "learning_rate": 0.0015, + "loss": 1.7803, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.48227658867836, + "learning_rate": 0.0015, + "loss": 1.7703, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.5093647837638855, + "learning_rate": 0.0015, + "loss": 1.768, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.4523041844367981, + "learning_rate": 0.0015, + "loss": 1.7647, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.48087066411972046, + "learning_rate": 0.0015, + "loss": 1.743, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.5096999406814575, + "learning_rate": 0.0015, + "loss": 1.7307, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.557677149772644, + "learning_rate": 0.0015, + "loss": 1.7368, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.5189062356948853, + "learning_rate": 0.0015, + "loss": 1.7393, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.48466941714286804, + "learning_rate": 0.0015, + "loss": 1.7153, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.508627712726593, + "learning_rate": 0.0015, + "loss": 1.713, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.44129225611686707, + "learning_rate": 0.0015, + "loss": 1.7096, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.5102670788764954, + "learning_rate": 0.0015, + "loss": 1.7063, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.5179425477981567, + "learning_rate": 0.0015, + "loss": 1.7001, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.46534618735313416, + "learning_rate": 0.0015, + "loss": 1.6878, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.4997912049293518, + "learning_rate": 0.0015, + "loss": 1.6861, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.508382260799408, + "learning_rate": 0.0015, + "loss": 1.6949, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.4784116744995117, + "learning_rate": 0.0015, + "loss": 1.6795, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.4688781797885895, + "learning_rate": 0.0015, + "loss": 1.6661, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.5456479787826538, + "learning_rate": 0.0015, + "loss": 1.6739, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.5274268984794617, + "learning_rate": 0.0015, + "loss": 1.6537, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.4864138960838318, + "learning_rate": 0.0015, + "loss": 1.6592, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.5758424997329712, + "learning_rate": 0.0015, + "loss": 1.6613, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.4512311816215515, + "learning_rate": 0.0015, + "loss": 1.6414, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.45962268114089966, + "learning_rate": 0.0015, + "loss": 1.6391, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.5761123895645142, + "learning_rate": 0.0015, + "loss": 1.6495, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.4733625650405884, + "learning_rate": 0.0015, + "loss": 1.6366, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.5327010154724121, + "learning_rate": 0.0015, + "loss": 1.6276, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.49683907628059387, + "learning_rate": 0.0015, + "loss": 1.6303, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.5400471687316895, + "learning_rate": 0.0015, + "loss": 1.6251, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.48789605498313904, + "learning_rate": 0.0015, + "loss": 1.6224, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.45513924956321716, + "learning_rate": 0.0015, + "loss": 1.6116, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.46881577372550964, + "learning_rate": 0.0015, + "loss": 1.602, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.4709598124027252, + "learning_rate": 0.0015, + "loss": 1.5996, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.43881508708000183, + "learning_rate": 0.0015, + "loss": 1.6087, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.4861796796321869, + "learning_rate": 0.0015, + "loss": 1.5978, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.4598830044269562, + "learning_rate": 0.0015, + "loss": 1.6007, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.46872007846832275, + "learning_rate": 0.0015, + "loss": 1.5916, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.5027231574058533, + "learning_rate": 0.0015, + "loss": 1.5836, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.4448629319667816, + "learning_rate": 0.0015, + "loss": 1.5928, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.5146149396896362, + "learning_rate": 0.0015, + "loss": 1.5788, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.4398585557937622, + "learning_rate": 0.0015, + "loss": 1.5838, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.43229490518569946, + "learning_rate": 0.0015, + "loss": 1.5731, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.44356974959373474, + "learning_rate": 0.0015, + "loss": 1.5594, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.4866366684436798, + "learning_rate": 0.0015, + "loss": 1.5628, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.4466060698032379, + "learning_rate": 0.0015, + "loss": 1.5753, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.5837816596031189, + "learning_rate": 0.0015, + "loss": 1.5668, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.4354594647884369, + "learning_rate": 0.0015, + "loss": 1.5527, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.43021681904792786, + "learning_rate": 0.0015, + "loss": 1.5443, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.5192949771881104, + "learning_rate": 0.0015, + "loss": 1.5602, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.41557225584983826, + "learning_rate": 0.0015, + "loss": 1.5582, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.4372217655181885, + "learning_rate": 0.0015, + "loss": 1.5435, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.4619849920272827, + "learning_rate": 0.0015, + "loss": 1.5426, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.5369654297828674, + "learning_rate": 0.0015, + "loss": 1.5554, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.43552911281585693, + "learning_rate": 0.0015, + "loss": 1.5421, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.4249727427959442, + "learning_rate": 0.0015, + "loss": 1.5318, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.4574583172798157, + "learning_rate": 0.0015, + "loss": 1.5332, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.4392021596431732, + "learning_rate": 0.0015, + "loss": 1.5371, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.6404235363006592, + "learning_rate": 0.0015, + "loss": 1.5272, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.4825304448604584, + "learning_rate": 0.0015, + "loss": 1.5276, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.4290938079357147, + "learning_rate": 0.0015, + "loss": 1.5289, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.42625662684440613, + "learning_rate": 0.0015, + "loss": 1.5239, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.5395568013191223, + "learning_rate": 0.0015, + "loss": 1.5224, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.4960879385471344, + "learning_rate": 0.0015, + "loss": 1.5122, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.4987901747226715, + "learning_rate": 0.0015, + "loss": 1.5219, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.5155091881752014, + "learning_rate": 0.0015, + "loss": 1.5035, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.4964143931865692, + "learning_rate": 0.0015, + "loss": 1.5091, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.5313318967819214, + "learning_rate": 0.0015, + "loss": 1.5126, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.5072162747383118, + "learning_rate": 0.0015, + "loss": 1.5043, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.4389128088951111, + "learning_rate": 0.0015, + "loss": 1.5022, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.44489845633506775, + "learning_rate": 0.0015, + "loss": 1.5024, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.4190062880516052, + "learning_rate": 0.0015, + "loss": 1.4957, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.5346754789352417, + "learning_rate": 0.0015, + "loss": 1.4955, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.472139447927475, + "learning_rate": 0.0015, + "loss": 1.5004, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.4273566007614136, + "learning_rate": 0.0015, + "loss": 1.4983, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.5061330199241638, + "learning_rate": 0.0015, + "loss": 1.4967, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.4574885964393616, + "learning_rate": 0.0015, + "loss": 1.4999, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.5211601257324219, + "learning_rate": 0.0015, + "loss": 1.4977, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.44409823417663574, + "learning_rate": 0.0015, + "loss": 1.4798, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.4143390357494354, + "learning_rate": 0.0015, + "loss": 1.4666, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.45285868644714355, + "learning_rate": 0.0015, + "loss": 1.4724, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.44844868779182434, + "learning_rate": 0.0015, + "loss": 1.4721, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.43528443574905396, + "learning_rate": 0.0015, + "loss": 1.4662, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.4710999131202698, + "learning_rate": 0.0015, + "loss": 1.4786, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.4777051508426666, + "learning_rate": 0.0015, + "loss": 1.4755, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.43375569581985474, + "learning_rate": 0.0015, + "loss": 1.4753, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.44435209035873413, + "learning_rate": 0.0015, + "loss": 1.4761, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.4641205370426178, + "learning_rate": 0.0015, + "loss": 1.4454, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.43654873967170715, + "learning_rate": 0.0015, + "loss": 1.4578, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.43061891198158264, + "learning_rate": 0.0015, + "loss": 1.4605, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.4999637007713318, + "learning_rate": 0.0015, + "loss": 1.4506, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.4092038571834564, + "learning_rate": 0.0015, + "loss": 1.4538, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.4975314438343048, + "learning_rate": 0.0015, + "loss": 1.464, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.5646780133247375, + "learning_rate": 0.0015, + "loss": 1.4501, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.436535120010376, + "learning_rate": 0.0015, + "loss": 1.4464, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.43750905990600586, + "learning_rate": 0.0015, + "loss": 1.4623, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.4112912118434906, + "learning_rate": 0.0015, + "loss": 1.4389, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.39078304171562195, + "learning_rate": 0.0015, + "loss": 1.4371, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.4432763159275055, + "learning_rate": 0.0015, + "loss": 1.4397, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.42006704211235046, + "learning_rate": 0.0015, + "loss": 1.443, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.44937148690223694, + "learning_rate": 0.0015, + "loss": 1.4642, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.4456152617931366, + "learning_rate": 0.0015, + "loss": 1.4428, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.4034252464771271, + "learning_rate": 0.0015, + "loss": 1.4444, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.41375723481178284, + "learning_rate": 0.0015, + "loss": 1.4463, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.44860073924064636, + "learning_rate": 0.0015, + "loss": 1.4482, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.4023696482181549, + "learning_rate": 0.0015, + "loss": 1.4437, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.42736566066741943, + "learning_rate": 0.0015, + "loss": 1.4281, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.48598966002464294, + "learning_rate": 0.0015, + "loss": 1.4277, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.45729580521583557, + "learning_rate": 0.0015, + "loss": 1.4284, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.41488245129585266, + "learning_rate": 0.0015, + "loss": 1.4288, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.4101181626319885, + "learning_rate": 0.0015, + "loss": 1.4296, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.3972269296646118, + "learning_rate": 0.0015, + "loss": 1.4295, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.4474700391292572, + "learning_rate": 0.0015, + "loss": 1.4327, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.4976608157157898, + "learning_rate": 0.0015, + "loss": 1.421, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.47018617391586304, + "learning_rate": 0.0015, + "loss": 1.4293, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.43353503942489624, + "learning_rate": 0.0015, + "loss": 1.4208, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.4655958116054535, + "learning_rate": 0.0015, + "loss": 1.4235, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.40534839034080505, + "learning_rate": 0.0015, + "loss": 1.4226, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.46445077657699585, + "learning_rate": 0.0015, + "loss": 1.4226, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.3819921314716339, + "learning_rate": 0.0015, + "loss": 1.4135, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.3935282528400421, + "learning_rate": 0.0015, + "loss": 1.4088, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.42892390489578247, + "learning_rate": 0.0015, + "loss": 1.4121, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.40014711022377014, + "learning_rate": 0.0015, + "loss": 1.4235, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.48264893889427185, + "learning_rate": 0.0015, + "loss": 1.405, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.4335213303565979, + "learning_rate": 0.0015, + "loss": 1.4061, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.4485473036766052, + "learning_rate": 0.0015, + "loss": 1.4045, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.3801015317440033, + "learning_rate": 0.0015, + "loss": 1.4107, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.5835695862770081, + "learning_rate": 0.0015, + "loss": 1.4223, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.4383508861064911, + "learning_rate": 0.0015, + "loss": 1.4068, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.3987447917461395, + "learning_rate": 0.0015, + "loss": 1.4073, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.4258471727371216, + "learning_rate": 0.0015, + "loss": 1.4054, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.4014463424682617, + "learning_rate": 0.0015, + "loss": 1.3986, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.4350442588329315, + "learning_rate": 0.0015, + "loss": 1.4018, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.44266244769096375, + "learning_rate": 0.0015, + "loss": 1.4064, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.4371784031391144, + "learning_rate": 0.0015, + "loss": 1.3977, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.5309934020042419, + "learning_rate": 0.0015, + "loss": 1.4059, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.39169225096702576, + "learning_rate": 0.0015, + "loss": 1.3954, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.4109102189540863, + "learning_rate": 0.0015, + "loss": 1.3929, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.41619110107421875, + "learning_rate": 0.0015, + "loss": 1.395, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.38766422867774963, + "learning_rate": 0.0015, + "loss": 1.3884, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.4916270971298218, + "learning_rate": 0.0015, + "loss": 1.3973, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.5352882146835327, + "learning_rate": 0.0015, + "loss": 1.4078, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.4140731394290924, + "learning_rate": 0.0015, + "loss": 1.3921, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.41106846928596497, + "learning_rate": 0.0015, + "loss": 1.3766, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.5240281224250793, + "learning_rate": 0.0015, + "loss": 1.3942, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.48931968212127686, + "learning_rate": 0.0015, + "loss": 1.3958, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.40602126717567444, + "learning_rate": 0.0015, + "loss": 1.3791, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.36141782999038696, + "learning_rate": 0.0015, + "loss": 1.3915, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.550554096698761, + "learning_rate": 0.0015, + "loss": 1.3864, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.4383642077445984, + "learning_rate": 0.0015, + "loss": 1.3855, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.4155581295490265, + "learning_rate": 0.0015, + "loss": 1.3821, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.5224567651748657, + "learning_rate": 0.0015, + "loss": 1.3723, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.40350115299224854, + "learning_rate": 0.0015, + "loss": 1.4061, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.4014328420162201, + "learning_rate": 0.0015, + "loss": 1.3902, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.45540615916252136, + "learning_rate": 0.0015, + "loss": 1.3805, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.39433974027633667, + "learning_rate": 0.0015, + "loss": 1.3777, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.3825417459011078, + "learning_rate": 0.0015, + "loss": 1.3742, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.4290764331817627, + "learning_rate": 0.0015, + "loss": 1.3864, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.3883923590183258, + "learning_rate": 0.0015, + "loss": 1.3689, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.43312767148017883, + "learning_rate": 0.0015, + "loss": 1.3621, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.4548530876636505, + "learning_rate": 0.0015, + "loss": 1.3653, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.5046688914299011, + "learning_rate": 0.0015, + "loss": 1.3775, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.4298846125602722, + "learning_rate": 0.0015, + "loss": 1.3738, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.4356420636177063, + "learning_rate": 0.0015, + "loss": 1.3683, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.4656219184398651, + "learning_rate": 0.0015, + "loss": 1.3709, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.3807707130908966, + "learning_rate": 0.0015, + "loss": 1.369, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.4016205370426178, + "learning_rate": 0.0015, + "loss": 1.3717, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.3906775116920471, + "learning_rate": 0.0015, + "loss": 1.3662, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.4661619961261749, + "learning_rate": 0.0015, + "loss": 1.3614, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.42744624614715576, + "learning_rate": 0.0015, + "loss": 1.3732, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.3593474328517914, + "learning_rate": 0.0015, + "loss": 1.3596, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.36683934926986694, + "learning_rate": 0.0015, + "loss": 1.37, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.47187283635139465, + "learning_rate": 0.0015, + "loss": 1.3627, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.36955729126930237, + "learning_rate": 0.0015, + "loss": 1.367, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.4172874689102173, + "learning_rate": 0.0015, + "loss": 1.3564, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.42587655782699585, + "learning_rate": 0.0015, + "loss": 1.3608, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.3867819905281067, + "learning_rate": 0.0015, + "loss": 1.3616, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.4664280414581299, + "learning_rate": 0.0015, + "loss": 1.3648, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.43253934383392334, + "learning_rate": 0.0015, + "loss": 1.3612, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.5740087628364563, + "learning_rate": 0.0015, + "loss": 1.3582, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.4363726079463959, + "learning_rate": 0.0015, + "loss": 1.3555, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.38443291187286377, + "learning_rate": 0.0015, + "loss": 1.3625, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.45384156703948975, + "learning_rate": 0.0015, + "loss": 1.3583, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.36326706409454346, + "learning_rate": 0.0015, + "loss": 1.3557, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.37362417578697205, + "learning_rate": 0.0015, + "loss": 1.3612, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.36792299151420593, + "learning_rate": 0.0015, + "loss": 1.3639, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.39010557532310486, + "learning_rate": 0.0015, + "loss": 1.3571, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.4335007071495056, + "learning_rate": 0.0015, + "loss": 1.3581, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.4066050350666046, + "learning_rate": 0.0015, + "loss": 1.344, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.39231425523757935, + "learning_rate": 0.0015, + "loss": 1.3571, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.47960364818573, + "learning_rate": 0.0015, + "loss": 1.3528, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.4317481219768524, + "learning_rate": 0.0015, + "loss": 1.3449, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.3781212568283081, + "learning_rate": 0.0015, + "loss": 1.3504, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.38963305950164795, + "learning_rate": 0.0015, + "loss": 1.3496, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.4155701696872711, + "learning_rate": 0.0015, + "loss": 1.3472, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.38998645544052124, + "learning_rate": 0.0015, + "loss": 1.342, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.40188562870025635, + "learning_rate": 0.0015, + "loss": 1.3501, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.38737237453460693, + "learning_rate": 0.0015, + "loss": 1.3491, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.40319275856018066, + "learning_rate": 0.0015, + "loss": 1.3301, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.39979711174964905, + "learning_rate": 0.0015, + "loss": 1.3415, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.4038510322570801, + "learning_rate": 0.0015, + "loss": 1.3488, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.5032958984375, + "learning_rate": 0.0015, + "loss": 1.3563, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.432725727558136, + "learning_rate": 0.0015, + "loss": 1.3524, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.4258490204811096, + "learning_rate": 0.0015, + "loss": 1.3465, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.370568186044693, + "learning_rate": 0.0015, + "loss": 1.3436, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.3846338093280792, + "learning_rate": 0.0015, + "loss": 1.3406, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.38603997230529785, + "learning_rate": 0.0015, + "loss": 1.3419, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.4352133870124817, + "learning_rate": 0.0015, + "loss": 1.3495, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.45890146493911743, + "learning_rate": 0.0015, + "loss": 1.3247, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.45596638321876526, + "learning_rate": 0.0015, + "loss": 1.3293, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.4347798526287079, + "learning_rate": 0.0015, + "loss": 1.3335, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.39894723892211914, + "learning_rate": 0.0015, + "loss": 1.3373, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.4256788194179535, + "learning_rate": 0.0015, + "loss": 1.3404, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.40725162625312805, + "learning_rate": 0.0015, + "loss": 1.326, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.3742321729660034, + "learning_rate": 0.0015, + "loss": 1.3286, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.4294082224369049, + "learning_rate": 0.0015, + "loss": 1.3371, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.38926073908805847, + "learning_rate": 0.0015, + "loss": 1.3369, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.3792055547237396, + "learning_rate": 0.0015, + "loss": 1.3318, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.3738500475883484, + "learning_rate": 0.0015, + "loss": 1.3283, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.3772472143173218, + "learning_rate": 0.0015, + "loss": 1.33, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.35470521450042725, + "learning_rate": 0.0015, + "loss": 1.3417, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.4564476013183594, + "learning_rate": 0.0015, + "loss": 1.3269, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.42535507678985596, + "learning_rate": 0.0015, + "loss": 1.3218, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.4032561779022217, + "learning_rate": 0.0015, + "loss": 1.3237, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.39573147892951965, + "learning_rate": 0.0015, + "loss": 1.3367, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.369488924741745, + "learning_rate": 0.0015, + "loss": 1.3198, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.3686826527118683, + "learning_rate": 0.0015, + "loss": 1.3266, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.4304807484149933, + "learning_rate": 0.0015, + "loss": 1.3289, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.4005890190601349, + "learning_rate": 0.0015, + "loss": 1.3159, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.4065329134464264, + "learning_rate": 0.0015, + "loss": 1.323, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.37223920226097107, + "learning_rate": 0.0015, + "loss": 1.3209, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.357913076877594, + "learning_rate": 0.0015, + "loss": 1.3307, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.4006459712982178, + "learning_rate": 0.0015, + "loss": 1.3211, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.37744468450546265, + "learning_rate": 0.0015, + "loss": 1.3126, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.3618320822715759, + "learning_rate": 0.0015, + "loss": 1.3311, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.38559070229530334, + "learning_rate": 0.0015, + "loss": 1.3167, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.4073399007320404, + "learning_rate": 0.0015, + "loss": 1.3192, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.3995159864425659, + "learning_rate": 0.0015, + "loss": 1.3211, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.40881553292274475, + "learning_rate": 0.0015, + "loss": 1.3339, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.4172428548336029, + "learning_rate": 0.0015, + "loss": 1.3081, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.38847237825393677, + "learning_rate": 0.0015, + "loss": 1.3214, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.3866778314113617, + "learning_rate": 0.0015, + "loss": 1.3199, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.3850679397583008, + "learning_rate": 0.0015, + "loss": 1.3249, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.415362149477005, + "learning_rate": 0.0015, + "loss": 1.318, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.41383853554725647, + "learning_rate": 0.0015, + "loss": 1.3173, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.39325055480003357, + "learning_rate": 0.0015, + "loss": 1.3133, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.4082551598548889, + "learning_rate": 0.0015, + "loss": 1.3242, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.3817073106765747, + "learning_rate": 0.0015, + "loss": 1.3168, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.3971477746963501, + "learning_rate": 0.0015, + "loss": 1.3096, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.3851396441459656, + "learning_rate": 0.0015, + "loss": 1.2907, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.36164918541908264, + "learning_rate": 0.0015, + "loss": 1.3179, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.3581262230873108, + "learning_rate": 0.0015, + "loss": 1.3071, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.4046621322631836, + "learning_rate": 0.0015, + "loss": 1.3231, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.45322754979133606, + "learning_rate": 0.0015, + "loss": 1.3222, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.47892409563064575, + "learning_rate": 0.0015, + "loss": 1.3055, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.3846244812011719, + "learning_rate": 0.0015, + "loss": 1.3126, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.36716458201408386, + "learning_rate": 0.0015, + "loss": 1.3135, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.37936967611312866, + "learning_rate": 0.0015, + "loss": 1.3194, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.36913326382637024, + "learning_rate": 0.0015, + "loss": 1.3094, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.40605461597442627, + "learning_rate": 0.0015, + "loss": 1.3109, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.3609521985054016, + "learning_rate": 0.0015, + "loss": 1.304, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.3889778256416321, + "learning_rate": 0.0015, + "loss": 1.3039, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.4098104238510132, + "learning_rate": 0.0015, + "loss": 1.3144, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.37357568740844727, + "learning_rate": 0.0015, + "loss": 1.3078, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.39619654417037964, + "learning_rate": 0.0015, + "loss": 1.3039, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.4084376394748688, + "learning_rate": 0.0015, + "loss": 1.3099, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.4111672043800354, + "learning_rate": 0.0015, + "loss": 1.3097, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.4097122550010681, + "learning_rate": 0.0015, + "loss": 1.2917, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.4278287887573242, + "learning_rate": 0.0015, + "loss": 1.3061, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.3902808725833893, + "learning_rate": 0.0015, + "loss": 1.3039, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.4041774868965149, + "learning_rate": 0.0015, + "loss": 1.3011, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.43103742599487305, + "learning_rate": 0.0015, + "loss": 1.2968, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.39118364453315735, + "learning_rate": 0.0015, + "loss": 1.3109, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.3834078013896942, + "learning_rate": 0.0015, + "loss": 1.3154, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.37766724824905396, + "learning_rate": 0.0015, + "loss": 1.3072, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.37089774012565613, + "learning_rate": 0.0015, + "loss": 1.2862, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.39285215735435486, + "learning_rate": 0.0015, + "loss": 1.3097, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.4295338988304138, + "learning_rate": 0.0015, + "loss": 1.3058, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.38859865069389343, + "learning_rate": 0.0015, + "loss": 1.3126, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.40371015667915344, + "learning_rate": 0.0015, + "loss": 1.3021, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.3651468753814697, + "learning_rate": 0.0015, + "loss": 1.2968, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.36767125129699707, + "learning_rate": 0.0015, + "loss": 1.2971, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.4188666045665741, + "learning_rate": 0.0015, + "loss": 1.2942, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.481037437915802, + "learning_rate": 0.0015, + "loss": 1.2977, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.4443666636943817, + "learning_rate": 0.0015, + "loss": 1.3077, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.36201488971710205, + "learning_rate": 0.0015, + "loss": 1.2909, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.3435400426387787, + "learning_rate": 0.0015, + "loss": 1.2924, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.38419613242149353, + "learning_rate": 0.0015, + "loss": 1.2967, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.4411642253398895, + "learning_rate": 0.0015, + "loss": 1.2929, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.3975316882133484, + "learning_rate": 0.0015, + "loss": 1.2966, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.3638233244419098, + "learning_rate": 0.0015, + "loss": 1.3035, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.3786154091358185, + "learning_rate": 0.0015, + "loss": 1.2938, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.3656991422176361, + "learning_rate": 0.0015, + "loss": 1.2899, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.5419597029685974, + "learning_rate": 0.0015, + "loss": 1.2922, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.4152900278568268, + "learning_rate": 0.0015, + "loss": 1.3027, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.3799019455909729, + "learning_rate": 0.0015, + "loss": 1.3026, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.3744471073150635, + "learning_rate": 0.0015, + "loss": 1.2933, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.3584083020687103, + "learning_rate": 0.0015, + "loss": 1.2744, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.375611811876297, + "learning_rate": 0.0015, + "loss": 1.2936, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.3821758031845093, + "learning_rate": 0.0015, + "loss": 1.2904, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.3981541097164154, + "learning_rate": 0.0015, + "loss": 1.2987, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.3971460461616516, + "learning_rate": 0.0015, + "loss": 1.2851, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.3641221225261688, + "learning_rate": 0.0015, + "loss": 1.3026, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.3688564896583557, + "learning_rate": 0.0015, + "loss": 1.2854, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.4649963080883026, + "learning_rate": 0.0015, + "loss": 1.293, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.43506956100463867, + "learning_rate": 0.0015, + "loss": 1.2983, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.3600989282131195, + "learning_rate": 0.0015, + "loss": 1.2861, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.35615450143814087, + "learning_rate": 0.0015, + "loss": 1.2795, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.3604353368282318, + "learning_rate": 0.0015, + "loss": 1.284, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.37614142894744873, + "learning_rate": 0.0015, + "loss": 1.2985, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.3707175850868225, + "learning_rate": 0.0015, + "loss": 1.2803, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.40577250719070435, + "learning_rate": 0.0015, + "loss": 1.2921, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.38110923767089844, + "learning_rate": 0.0015, + "loss": 1.2858, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.3657989203929901, + "learning_rate": 0.0015, + "loss": 1.2752, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.37295961380004883, + "learning_rate": 0.0015, + "loss": 1.2743, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.5426948070526123, + "learning_rate": 0.0015, + "loss": 1.2808, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.398045152425766, + "learning_rate": 0.0015, + "loss": 1.2877, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.37474942207336426, + "learning_rate": 0.0015, + "loss": 1.2795, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.38445577025413513, + "learning_rate": 0.0015, + "loss": 1.2844, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.4166354835033417, + "learning_rate": 0.0015, + "loss": 1.2782, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.3484591543674469, + "learning_rate": 0.0015, + "loss": 1.2835, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.3584078252315521, + "learning_rate": 0.0015, + "loss": 1.2837, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.4460738003253937, + "learning_rate": 0.0015, + "loss": 1.2736, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.37689462304115295, + "learning_rate": 0.0015, + "loss": 1.2851, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.3761577904224396, + "learning_rate": 0.0015, + "loss": 1.276, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.35418757796287537, + "learning_rate": 0.0015, + "loss": 1.2882, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.4295203387737274, + "learning_rate": 0.0015, + "loss": 1.2772, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.37253305315971375, + "learning_rate": 0.0015, + "loss": 1.2799, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.36368170380592346, + "learning_rate": 0.0015, + "loss": 1.2734, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.37631523609161377, + "learning_rate": 0.0015, + "loss": 1.2793, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.36368027329444885, + "learning_rate": 0.0015, + "loss": 1.282, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.4057527184486389, + "learning_rate": 0.0015, + "loss": 1.2864, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.34539783000946045, + "learning_rate": 0.0015, + "loss": 1.2814, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.5057688355445862, + "learning_rate": 0.0015, + "loss": 1.2873, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.430195689201355, + "learning_rate": 0.0015, + "loss": 1.2817, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.38058942556381226, + "learning_rate": 0.0015, + "loss": 1.2782, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.4111412763595581, + "learning_rate": 0.0015, + "loss": 1.27, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.38134413957595825, + "learning_rate": 0.0015, + "loss": 1.2816, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.37636449933052063, + "learning_rate": 0.0015, + "loss": 1.2669, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.3791978657245636, + "learning_rate": 0.0015, + "loss": 1.2718, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.37329211831092834, + "learning_rate": 0.0015, + "loss": 1.2758, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.37318155169487, + "learning_rate": 0.0015, + "loss": 1.2715, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.3652787506580353, + "learning_rate": 0.0015, + "loss": 1.2693, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.3359285295009613, + "learning_rate": 0.0015, + "loss": 1.2671, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.3559111952781677, + "learning_rate": 0.0015, + "loss": 1.2723, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.3659558892250061, + "learning_rate": 0.0015, + "loss": 1.2766, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.3589211404323578, + "learning_rate": 0.0015, + "loss": 1.2669, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.39184415340423584, + "learning_rate": 0.0015, + "loss": 1.2767, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.47252312302589417, + "learning_rate": 0.0015, + "loss": 1.2736, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.3448552191257477, + "learning_rate": 0.0015, + "loss": 1.2692, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.39452487230300903, + "learning_rate": 0.0015, + "loss": 1.2731, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.4135473072528839, + "learning_rate": 0.0015, + "loss": 1.2796, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.358565092086792, + "learning_rate": 0.0015, + "loss": 1.268, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.39048680663108826, + "learning_rate": 0.0015, + "loss": 1.2626, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.3444206416606903, + "learning_rate": 0.0015, + "loss": 1.2733, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.3811909556388855, + "learning_rate": 0.0015, + "loss": 1.2723, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.35479670763015747, + "learning_rate": 0.0015, + "loss": 1.2697, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.38825368881225586, + "learning_rate": 0.0015, + "loss": 1.2671, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.3594970405101776, + "learning_rate": 0.0015, + "loss": 1.2745, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.35297471284866333, + "learning_rate": 0.0015, + "loss": 1.2685, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.43033990263938904, + "learning_rate": 0.0015, + "loss": 1.2673, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.33862191438674927, + "learning_rate": 0.0015, + "loss": 1.2839, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.3441391587257385, + "learning_rate": 0.0015, + "loss": 1.2593, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.3779299855232239, + "learning_rate": 0.0015, + "loss": 1.2654, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.3549061119556427, + "learning_rate": 0.0015, + "loss": 1.2625, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.3566148281097412, + "learning_rate": 0.0015, + "loss": 1.2677, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.3833557367324829, + "learning_rate": 0.0015, + "loss": 1.2799, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.36007896065711975, + "learning_rate": 0.0015, + "loss": 1.2666, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.39347776770591736, + "learning_rate": 0.0015, + "loss": 1.2693, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.40295612812042236, + "learning_rate": 0.0015, + "loss": 1.2634, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.3890196979045868, + "learning_rate": 0.0015, + "loss": 1.2742, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.35265377163887024, + "learning_rate": 0.0015, + "loss": 1.258, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.373200923204422, + "learning_rate": 0.0015, + "loss": 1.2676, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.3729031980037689, + "learning_rate": 0.0015, + "loss": 1.2732, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.35276147723197937, + "learning_rate": 0.0015, + "loss": 1.2612, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.4667707085609436, + "learning_rate": 0.0015, + "loss": 1.2615, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.3366076648235321, + "learning_rate": 0.0015, + "loss": 1.2664, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.40432390570640564, + "learning_rate": 0.0015, + "loss": 1.2498, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.3696410655975342, + "learning_rate": 0.0015, + "loss": 1.2675, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.36451849341392517, + "learning_rate": 0.0015, + "loss": 1.268, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.4523102343082428, + "learning_rate": 0.0015, + "loss": 1.2688, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.3984118103981018, + "learning_rate": 0.0015, + "loss": 1.2572, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.3475066125392914, + "learning_rate": 0.0015, + "loss": 1.2389, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.4492590129375458, + "learning_rate": 0.0015, + "loss": 1.2604, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.398572713136673, + "learning_rate": 0.0015, + "loss": 1.2507, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.3685084581375122, + "learning_rate": 0.0015, + "loss": 1.269, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.4924905598163605, + "learning_rate": 0.0015, + "loss": 1.2708, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.38425061106681824, + "learning_rate": 0.0015, + "loss": 1.2492, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.3547668755054474, + "learning_rate": 0.0015, + "loss": 1.264, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.3492022156715393, + "learning_rate": 0.0015, + "loss": 1.2669, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.35016942024230957, + "learning_rate": 0.0015, + "loss": 1.2664, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.3903471827507019, + "learning_rate": 0.0015, + "loss": 1.2573, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.37766721844673157, + "learning_rate": 0.0015, + "loss": 1.2571, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.35377028584480286, + "learning_rate": 0.0015, + "loss": 1.2515, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.47878357768058777, + "learning_rate": 0.0015, + "loss": 1.2506, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.3595276176929474, + "learning_rate": 0.0015, + "loss": 1.2587, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.37340301275253296, + "learning_rate": 0.0015, + "loss": 1.2588, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.4304148554801941, + "learning_rate": 0.0015, + "loss": 1.2574, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.3435005843639374, + "learning_rate": 0.0015, + "loss": 1.253, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.36592572927474976, + "learning_rate": 0.0015, + "loss": 1.2501, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.371332585811615, + "learning_rate": 0.0015, + "loss": 1.2721, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.3589191734790802, + "learning_rate": 0.0015, + "loss": 1.2573, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.3282475769519806, + "learning_rate": 0.0015, + "loss": 1.2555, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.34423360228538513, + "learning_rate": 0.0015, + "loss": 1.2645, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.42229223251342773, + "learning_rate": 0.0015, + "loss": 1.2485, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.36169490218162537, + "learning_rate": 0.0015, + "loss": 1.2495, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.3373130261898041, + "learning_rate": 0.0015, + "loss": 1.2513, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.35851961374282837, + "learning_rate": 0.0015, + "loss": 1.2716, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.38497915863990784, + "learning_rate": 0.0015, + "loss": 1.2553, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.3687344491481781, + "learning_rate": 0.0015, + "loss": 1.2483, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.3644031286239624, + "learning_rate": 0.0015, + "loss": 1.2622, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.38985204696655273, + "learning_rate": 0.0015, + "loss": 1.2502, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.4006396234035492, + "learning_rate": 0.0015, + "loss": 1.2447, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.36055195331573486, + "learning_rate": 0.0015, + "loss": 1.263, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.3749941289424896, + "learning_rate": 0.0015, + "loss": 1.2526, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.3642890751361847, + "learning_rate": 0.0015, + "loss": 1.2617, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.37513530254364014, + "learning_rate": 0.0015, + "loss": 1.2548, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.41637057065963745, + "learning_rate": 0.0015, + "loss": 1.2468, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.4796326160430908, + "learning_rate": 0.0015, + "loss": 1.2583, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.35974326729774475, + "learning_rate": 0.0015, + "loss": 1.2535, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.37178534269332886, + "learning_rate": 0.0015, + "loss": 1.2599, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.349430650472641, + "learning_rate": 0.0015, + "loss": 1.2439, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.395673543214798, + "learning_rate": 0.0015, + "loss": 1.2508, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.40750932693481445, + "learning_rate": 0.0015, + "loss": 1.2449, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.3674182891845703, + "learning_rate": 0.0015, + "loss": 1.2476, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.3393429219722748, + "learning_rate": 0.0015, + "loss": 1.2561, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.33547094464302063, + "learning_rate": 0.0015, + "loss": 1.2501, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.36183106899261475, + "learning_rate": 0.0015, + "loss": 1.2452, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.3869287371635437, + "learning_rate": 0.0015, + "loss": 1.2315, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.36629679799079895, + "learning_rate": 0.0015, + "loss": 1.2587, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.349203884601593, + "learning_rate": 0.0015, + "loss": 1.2467, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.3906978666782379, + "learning_rate": 0.0015, + "loss": 1.2572, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.4080146253108978, + "learning_rate": 0.0015, + "loss": 1.2486, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.45458248257637024, + "learning_rate": 0.0015, + "loss": 1.2591, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.39543619751930237, + "learning_rate": 0.0015, + "loss": 1.2524, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.4424404799938202, + "learning_rate": 0.0015, + "loss": 1.2367, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.3873385488986969, + "learning_rate": 0.0015, + "loss": 1.2489, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.33514106273651123, + "learning_rate": 0.0015, + "loss": 1.2461, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.38259291648864746, + "learning_rate": 0.0015, + "loss": 1.2454, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.3605073094367981, + "learning_rate": 0.0015, + "loss": 1.2585, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.40182405710220337, + "learning_rate": 0.0015, + "loss": 1.2523, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.4059754014015198, + "learning_rate": 0.0015, + "loss": 1.2488, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.3528928756713867, + "learning_rate": 0.0015, + "loss": 1.2469, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.4284350574016571, + "learning_rate": 0.0015, + "loss": 1.2564, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.3745574951171875, + "learning_rate": 0.0015, + "loss": 1.2419, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.3614684045314789, + "learning_rate": 0.0015, + "loss": 1.2527, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.4084944427013397, + "learning_rate": 0.0015, + "loss": 1.2394, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.3417372405529022, + "learning_rate": 0.0015, + "loss": 1.248, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.35009732842445374, + "learning_rate": 0.0015, + "loss": 1.2431, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.37051039934158325, + "learning_rate": 0.0015, + "loss": 1.2475, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.44351595640182495, + "learning_rate": 0.0015, + "loss": 1.2384, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.3997798264026642, + "learning_rate": 0.0015, + "loss": 1.2459, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.4093949794769287, + "learning_rate": 0.0015, + "loss": 1.2505, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.457400381565094, + "learning_rate": 0.0015, + "loss": 1.2443, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.35332190990448, + "learning_rate": 0.0015, + "loss": 1.2468, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.3771302103996277, + "learning_rate": 0.0015, + "loss": 1.2499, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.47953101992607117, + "learning_rate": 0.0015, + "loss": 1.2372, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.3351249098777771, + "learning_rate": 0.0015, + "loss": 1.2373, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.35240134596824646, + "learning_rate": 0.0015, + "loss": 1.2459, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.35376760363578796, + "learning_rate": 0.0015, + "loss": 1.2396, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.36500561237335205, + "learning_rate": 0.0015, + "loss": 1.2361, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.45256397128105164, + "learning_rate": 0.0015, + "loss": 1.2449, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.3955960273742676, + "learning_rate": 0.0015, + "loss": 1.2435, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.40095025300979614, + "learning_rate": 0.0015, + "loss": 1.2355, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.3567625880241394, + "learning_rate": 0.0015, + "loss": 1.2386, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.36974769830703735, + "learning_rate": 0.0015, + "loss": 1.2374, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.3549600839614868, + "learning_rate": 0.0015, + "loss": 1.2426, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.34809744358062744, + "learning_rate": 0.0015, + "loss": 1.236, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.3890848755836487, + "learning_rate": 0.0015, + "loss": 1.2304, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.3576195538043976, + "learning_rate": 0.0015, + "loss": 1.2281, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.4001421630382538, + "learning_rate": 0.0015, + "loss": 1.2365, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.36096686124801636, + "learning_rate": 0.0015, + "loss": 1.24, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.42172369360923767, + "learning_rate": 0.0015, + "loss": 1.2386, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.3547919690608978, + "learning_rate": 0.0015, + "loss": 1.241, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.3332633674144745, + "learning_rate": 0.0015, + "loss": 1.2467, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.37135013937950134, + "learning_rate": 0.0015, + "loss": 1.2368, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.4235054850578308, + "learning_rate": 0.0015, + "loss": 1.2446, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.36324357986450195, + "learning_rate": 0.0015, + "loss": 1.2353, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.5144487023353577, + "learning_rate": 0.0015, + "loss": 1.2329, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.3375086784362793, + "learning_rate": 0.0015, + "loss": 1.2295, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.3670271337032318, + "learning_rate": 0.0015, + "loss": 1.2448, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.3790842890739441, + "learning_rate": 0.0015, + "loss": 1.2506, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.36140161752700806, + "learning_rate": 0.0015, + "loss": 1.2344, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.40632885694503784, + "learning_rate": 0.0015, + "loss": 1.2327, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.40224945545196533, + "learning_rate": 0.0015, + "loss": 1.2375, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.3521060347557068, + "learning_rate": 0.0015, + "loss": 1.2327, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.3723829686641693, + "learning_rate": 0.0015, + "loss": 1.2371, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.3640119731426239, + "learning_rate": 0.0015, + "loss": 1.2431, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.41383010149002075, + "learning_rate": 0.0015, + "loss": 1.2391, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.3435527980327606, + "learning_rate": 0.0015, + "loss": 1.2479, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.32942721247673035, + "learning_rate": 0.0015, + "loss": 1.2407, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.47213542461395264, + "learning_rate": 0.0015, + "loss": 1.239, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.34918421506881714, + "learning_rate": 0.0015, + "loss": 1.2427, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.34842443466186523, + "learning_rate": 0.0015, + "loss": 1.244, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.3522525727748871, + "learning_rate": 0.0015, + "loss": 1.2239, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.3676183223724365, + "learning_rate": 0.0015, + "loss": 1.2302, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.3557111322879791, + "learning_rate": 0.0015, + "loss": 1.2345, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.37497225403785706, + "learning_rate": 0.0015, + "loss": 1.2355, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.35429346561431885, + "learning_rate": 0.0015, + "loss": 1.2347, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.3666691780090332, + "learning_rate": 0.0015, + "loss": 1.2334, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.34536102414131165, + "learning_rate": 0.0015, + "loss": 1.2097, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.3289570212364197, + "learning_rate": 0.0015, + "loss": 1.2217, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.39452025294303894, + "learning_rate": 0.0015, + "loss": 1.2228, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.3545337915420532, + "learning_rate": 0.0015, + "loss": 1.2295, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.3574298024177551, + "learning_rate": 0.0015, + "loss": 1.2271, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.3354833126068115, + "learning_rate": 0.0015, + "loss": 1.2293, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.35068947076797485, + "learning_rate": 0.0015, + "loss": 1.2348, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.38195034861564636, + "learning_rate": 0.0015, + "loss": 1.2494, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.3787460923194885, + "learning_rate": 0.0015, + "loss": 1.2297, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.38402631878852844, + "learning_rate": 0.0015, + "loss": 1.243, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.40364325046539307, + "learning_rate": 0.0015, + "loss": 1.2472, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.36070314049720764, + "learning_rate": 0.0015, + "loss": 1.2321, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.4334326684474945, + "learning_rate": 0.0015, + "loss": 1.2288, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.3435908257961273, + "learning_rate": 0.0015, + "loss": 1.2362, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.3393274247646332, + "learning_rate": 0.0015, + "loss": 1.2285, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.3442111015319824, + "learning_rate": 0.0015, + "loss": 1.23, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.3644740581512451, + "learning_rate": 0.0015, + "loss": 1.2248, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.3478515148162842, + "learning_rate": 0.0015, + "loss": 1.2182, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.47216373682022095, + "learning_rate": 0.0015, + "loss": 1.2252, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.3889217674732208, + "learning_rate": 0.0015, + "loss": 1.2331, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.3746896982192993, + "learning_rate": 0.0015, + "loss": 1.2218, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.337329238653183, + "learning_rate": 0.0015, + "loss": 1.2327, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.45176035165786743, + "learning_rate": 0.0015, + "loss": 1.2321, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.34400928020477295, + "learning_rate": 0.0015, + "loss": 1.216, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.4225354790687561, + "learning_rate": 0.0015, + "loss": 1.2325, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.3400496244430542, + "learning_rate": 0.0015, + "loss": 1.2411, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.3938896358013153, + "learning_rate": 0.0015, + "loss": 1.2299, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.3652991056442261, + "learning_rate": 0.0015, + "loss": 1.2299, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.38011661171913147, + "learning_rate": 0.0015, + "loss": 1.2129, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.34210604429244995, + "learning_rate": 0.0015, + "loss": 1.2329, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.352566123008728, + "learning_rate": 0.0015, + "loss": 1.2176, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.34896865487098694, + "learning_rate": 0.0015, + "loss": 1.233, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.4462093710899353, + "learning_rate": 0.0015, + "loss": 1.2256, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.35512223839759827, + "learning_rate": 0.0015, + "loss": 1.2257, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.3445747196674347, + "learning_rate": 0.0015, + "loss": 1.2241, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.3473874628543854, + "learning_rate": 0.0015, + "loss": 1.2186, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.35191580653190613, + "learning_rate": 0.0015, + "loss": 1.2311, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.33353883028030396, + "learning_rate": 0.0015, + "loss": 1.2265, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.44887515902519226, + "learning_rate": 0.0015, + "loss": 1.2262, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.39968711137771606, + "learning_rate": 0.0015, + "loss": 1.2253, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.3577297627925873, + "learning_rate": 0.0015, + "loss": 1.2358, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.4742908179759979, + "learning_rate": 0.0015, + "loss": 1.23, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.4291151463985443, + "learning_rate": 0.0014834368975312174, + "loss": 1.2079, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.38173770904541016, + "learning_rate": 0.0014629899726345957, + "loss": 1.2299, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.37623050808906555, + "learning_rate": 0.0014428248775471316, + "loss": 1.2305, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.38231006264686584, + "learning_rate": 0.00142293772767289, + "loss": 1.2213, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.3671601116657257, + "learning_rate": 0.001403324691959192, + "loss": 1.2156, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.3565635085105896, + "learning_rate": 0.0013839819921586025, + "loss": 1.2229, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.3567785918712616, + "learning_rate": 0.0013649059021010894, + "loss": 1.2105, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.4190845489501953, + "learning_rate": 0.0013460927469762154, + "loss": 1.2122, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.3603142201900482, + "learning_rate": 0.0013275389026252255, + "loss": 1.2185, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.3586927652359009, + "learning_rate": 0.0013092407948428887, + "loss": 1.2087, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.3322678804397583, + "learning_rate": 0.001291194898688966, + "loss": 1.2146, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.3324128985404968, + "learning_rate": 0.001273397737809166, + "loss": 1.2128, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.34206292033195496, + "learning_rate": 0.001255845883765463, + "loss": 1.21, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.33811911940574646, + "learning_rate": 0.001238535955375642, + "loss": 1.2004, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.357466459274292, + "learning_rate": 0.0012214646180619506, + "loss": 1.2015, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.34239062666893005, + "learning_rate": 0.001204628583208727, + "loss": 1.1962, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.34912315011024475, + "learning_rate": 0.0011880246075288827, + "loss": 1.2038, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.3250739872455597, + "learning_rate": 0.001171649492439115, + "loss": 1.1965, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.335197776556015, + "learning_rate": 0.0011555000834437364, + "loss": 1.1989, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.3212112486362457, + "learning_rate": 0.0011395732695269908, + "loss": 1.1966, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.31406673789024353, + "learning_rate": 0.0011238659825537505, + "loss": 1.1773, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.37119054794311523, + "learning_rate": 0.0011083751966784717, + "loss": 1.18, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.4086954593658447, + "learning_rate": 0.0010930979277622953, + "loss": 1.1948, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.40815266966819763, + "learning_rate": 0.0010780312327981854, + "loss": 1.1939, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.35664618015289307, + "learning_rate": 0.0010631722093439888, + "loss": 1.1888, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.3347793519496918, + "learning_rate": 0.00104851799496331, + "loss": 1.1786, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.33713704347610474, + "learning_rate": 0.0010340657666740914, + "loss": 1.1852, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.40812811255455017, + "learning_rate": 0.0010198127404047975, + "loss": 1.1685, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.32857853174209595, + "learning_rate": 0.0010057561704580897, + "loss": 1.1727, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.36213618516921997, + "learning_rate": 0.0009918933489818985, + "loss": 1.1856, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.3340655267238617, + "learning_rate": 0.0009782216054477827, + "loss": 1.178, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.36028996109962463, + "learning_rate": 0.0009647383061364801, + "loss": 1.1836, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.32589006423950195, + "learning_rate": 0.0009514408536305495, + "loss": 1.1763, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.36935076117515564, + "learning_rate": 0.0009383266863140042, + "loss": 1.1913, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.35536494851112366, + "learning_rate": 0.000925393277878844, + "loss": 1.1876, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.3353438079357147, + "learning_rate": 0.0009126381368383879, + "loss": 1.1738, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.3449837267398834, + "learning_rate": 0.0009000588060473156, + "loss": 1.1637, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.32303208112716675, + "learning_rate": 0.0008876528622283235, + "loss": 1.1715, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.3335954248905182, + "learning_rate": 0.0008754179155053053, + "loss": 1.1685, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.3782043755054474, + "learning_rate": 0.0008633516089429683, + "loss": 1.1681, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.35206338763237, + "learning_rate": 0.0008514516180927928, + "loss": 1.1643, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.3493340313434601, + "learning_rate": 0.0008397156505452524, + "loss": 1.1596, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.34636741876602173, + "learning_rate": 0.0008281414454882051, + "loss": 1.1601, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.3398618996143341, + "learning_rate": 0.0008167267732713704, + "loss": 1.1666, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.36243852972984314, + "learning_rate": 0.0008054694349768117, + "loss": 1.1518, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.34618276357650757, + "learning_rate": 0.0007943672619953359, + "loss": 1.163, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.3229001462459564, + "learning_rate": 0.0007834181156087356, + "loss": 1.1538, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.3970085084438324, + "learning_rate": 0.0007726198865777852, + "loss": 1.1541, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.34208858013153076, + "learning_rate": 0.0007619704947359191, + "loss": 1.1508, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.31160768866539, + "learning_rate": 0.0007514678885885087, + "loss": 1.1521, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.3307741582393646, + "learning_rate": 0.0007411100449176633, + "loss": 1.1528, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.32905980944633484, + "learning_rate": 0.0007308949683924791, + "loss": 1.1547, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.3278239667415619, + "learning_rate": 0.000720820691184658, + "loss": 1.147, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.32193559408187866, + "learning_rate": 0.0007108852725894269, + "loss": 1.1469, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.34123092889785767, + "learning_rate": 0.000701086798651681, + "loss": 1.1485, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.3501998484134674, + "learning_rate": 0.0006914233817972798, + "loss": 1.1398, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.3438262939453125, + "learning_rate": 0.0006818931604694261, + "loss": 1.1436, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.3316311836242676, + "learning_rate": 0.0006724942987700563, + "loss": 1.1487, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.3469674289226532, + "learning_rate": 0.0006632249861061732, + "loss": 1.151, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.34641310572624207, + "learning_rate": 0.0006540834368410549, + "loss": 1.1452, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.33597779273986816, + "learning_rate": 0.0006450678899502701, + "loss": 1.1438, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.3230700194835663, + "learning_rate": 0.0006361766086824345, + "loss": 1.1433, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.32162338495254517, + "learning_rate": 0.000627407880224645, + "loss": 1.1462, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.32030555605888367, + "learning_rate": 0.0006187600153725225, + "loss": 1.1339, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.3984506130218506, + "learning_rate": 0.0006102313482048055, + "loss": 1.1385, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.34766748547554016, + "learning_rate": 0.0006018202357624274, + "loss": 1.1386, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.3280739486217499, + "learning_rate": 0.0005935250577320168, + "loss": 1.1369, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.35947754979133606, + "learning_rate": 0.0005853442161337618, + "loss": 1.1279, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.3208995759487152, + "learning_rate": 0.0005772761350135759, + "loss": 1.1348, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.34295305609703064, + "learning_rate": 0.0005693192601395058, + "loss": 1.1285, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.3107556402683258, + "learning_rate": 0.000561472058702326, + "loss": 1.1269, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.31452247500419617, + "learning_rate": 0.000553733019020258, + "loss": 1.1349, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.3466354012489319, + "learning_rate": 0.0005461006502477612, + "loss": 1.1244, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.3526834547519684, + "learning_rate": 0.0005385734820883369, + "loss": 1.1238, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.33901646733283997, + "learning_rate": 0.0005311500645112907, + "loss": 1.1459, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.3271549642086029, + "learning_rate": 0.0005238289674723993, + "loss": 1.1249, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.330404132604599, + "learning_rate": 0.0005166087806384274, + "loss": 1.1319, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.32270577549934387, + "learning_rate": 0.0005094881131154418, + "loss": 1.1339, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.34502968192100525, + "learning_rate": 0.0005024655931808696, + "loss": 1.1294, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.32282140851020813, + "learning_rate": 0.0004955398680192508, + "loss": 1.124, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.33826205134391785, + "learning_rate": 0.000488709603461632, + "loss": 1.1189, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.31764915585517883, + "learning_rate": 0.000481973483728553, + "loss": 1.1212, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.3235630989074707, + "learning_rate": 0.0004753302111765748, + "loss": 1.1206, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.3392946422100067, + "learning_rate": 0.0004687785060483032, + "loss": 1.1296, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.3291850984096527, + "learning_rate": 0.0004623171062258558, + "loss": 1.103, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.34219229221343994, + "learning_rate": 0.0004559447669877288, + "loss": 1.1152, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.32017433643341064, + "learning_rate": 0.00044966026076901413, + "loss": 1.1238, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.41779911518096924, + "learning_rate": 0.00044346237692492177, + "loss": 1.1272, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.37291568517684937, + "learning_rate": 0.0004373499214975615, + "loss": 1.111, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.4117240309715271, + "learning_rate": 0.0004313217169859396, + "loss": 1.1179, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.3707019090652466, + "learning_rate": 0.0004253766021191256, + "loss": 1.1227, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.3181743621826172, + "learning_rate": 0.00041951343163254497, + "loss": 1.1184, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.33088555932044983, + "learning_rate": 0.00041373107604735626, + "loss": 1.1144, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.3327450752258301, + "learning_rate": 0.0004080284214528687, + "loss": 1.1118, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.3445628881454468, + "learning_rate": 0.0004024043692919589, + "loss": 1.124, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.36757397651672363, + "learning_rate": 0.0003968578361494449, + "loss": 1.1146, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.3530139923095703, + "learning_rate": 0.000391387753543378, + "loss": 1.1237, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.33693066239356995, + "learning_rate": 0.00038599306771921023, + "loss": 1.109, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.3698440492153168, + "learning_rate": 0.0003806727394468004, + "loss": 1.1047, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.3456558883190155, + "learning_rate": 0.0003754257438202162, + "loss": 1.1114, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.3305872082710266, + "learning_rate": 0.0003702510700602974, + "loss": 1.1189, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.33361759781837463, + "learning_rate": 0.0003651477213199393, + "loss": 1.0986, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.32957959175109863, + "learning_rate": 0.000360114714492061, + "loss": 1.1006, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.3591114580631256, + "learning_rate": 0.0003551510800202195, + "loss": 1.1035, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.39091235399246216, + "learning_rate": 0.0003502558617118353, + "loss": 1.1091, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.32055342197418213, + "learning_rate": 0.0003454281165539914, + "loss": 1.121, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.35132747888565063, + "learning_rate": 0.00034066691453177176, + "loss": 1.1166, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.31827104091644287, + "learning_rate": 0.0003359713384491037, + "loss": 1.1138, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.3282644748687744, + "learning_rate": 0.00033134048375206944, + "loss": 1.1073, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.32072553038597107, + "learning_rate": 0.0003267734583546536, + "loss": 1.1065, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.36000901460647583, + "learning_rate": 0.00032226938246689157, + "loss": 1.1056, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.33699488639831543, + "learning_rate": 0.0003178273884253874, + "loss": 1.1058, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.33975517749786377, + "learning_rate": 0.0003134466205261674, + "loss": 1.1156, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.3478587567806244, + "learning_rate": 0.0003091262348598378, + "loss": 1.1191, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.32292914390563965, + "learning_rate": 0.0003048653991490141, + "loss": 1.0999, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.32401716709136963, + "learning_rate": 0.00030066329258799187, + "loss": 1.0977, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.34817883372306824, + "learning_rate": 0.0002965191056846266, + "loss": 1.1029, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.3278037905693054, + "learning_rate": 0.000292432040104394, + "loss": 1.0939, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.3233949542045593, + "learning_rate": 0.00028840130851659853, + "loss": 1.0947, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.32060080766677856, + "learning_rate": 0.0002844261344427028, + "loss": 1.1021, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.33821195363998413, + "learning_rate": 0.0002805057521067471, + "loss": 1.1042, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.33443766832351685, + "learning_rate": 0.00027663940628783017, + "loss": 1.0892, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.34301623702049255, + "learning_rate": 0.00027282635217462393, + "loss": 1.1006, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.3341234028339386, + "learning_rate": 0.0002690658552218937, + "loss": 1.1055, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.31503307819366455, + "learning_rate": 0.00026535719100899516, + "loss": 1.0884, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.33604514598846436, + "learning_rate": 0.00026169964510032245, + "loss": 1.0909, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.3207562565803528, + "learning_rate": 0.00025809251290767984, + "loss": 1.0814, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.3265933394432068, + "learning_rate": 0.00025453509955454957, + "loss": 1.0837, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.3195817768573761, + "learning_rate": 0.00025102671974223175, + "loss": 1.0861, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.34826865792274475, + "learning_rate": 0.00024756669761782815, + "loss": 1.0951, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.3280653655529022, + "learning_rate": 0.0002441543666440464, + "loss": 1.0878, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.3230268657207489, + "learning_rate": 0.00024078906947079878, + "loss": 1.094, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.3275749385356903, + "learning_rate": 0.00023747015780857005, + "loss": 1.0986, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.3295617401599884, + "learning_rate": 0.00023419699230353144, + "loss": 1.0968, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.3250296115875244, + "learning_rate": 0.00023096894241437586, + "loss": 1.1005, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.3130958676338196, + "learning_rate": 0.00022778538629085056, + "loss": 1.0857, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.32801106572151184, + "learning_rate": 0.00022464571065396427, + "loss": 1.0861, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.3705176115036011, + "learning_rate": 0.00022154931067784521, + "loss": 1.085, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.34427809715270996, + "learning_rate": 0.00021849558987322782, + "loss": 1.082, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.3411599397659302, + "learning_rate": 0.0002154839599725452, + "loss": 1.0857, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.3404884934425354, + "learning_rate": 0.00021251384081660544, + "loss": 1.0935, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.31883886456489563, + "learning_rate": 0.0002095846602428303, + "loss": 1.0889, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.3307037949562073, + "learning_rate": 0.00020669585397503358, + "loss": 1.0802, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.3246137201786041, + "learning_rate": 0.0002038468655147195, + "loss": 1.0874, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.32273244857788086, + "learning_rate": 0.00020103714603387894, + "loss": 1.0986, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.31461453437805176, + "learning_rate": 0.00019826615426926338, + "loss": 1.0736, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.34340304136276245, + "learning_rate": 0.00019553335641811625, + "loss": 1.0947, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.3382534086704254, + "learning_rate": 0.0001928382260353415, + "loss": 1.0853, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.3437410891056061, + "learning_rate": 0.00019018024393208902, + "loss": 1.102, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.32129502296447754, + "learning_rate": 0.00018755889807573872, + "loss": 1.0771, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.32240381836891174, + "learning_rate": 0.00018497368349126262, + "loss": 1.0863, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.3478579819202423, + "learning_rate": 0.00018242410216394648, + "loss": 1.0964, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.3518509268760681, + "learning_rate": 0.0001799096629434529, + "loss": 1.0721, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.32270342111587524, + "learning_rate": 0.00017742988144920578, + "loss": 1.0828, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.323770135641098, + "learning_rate": 0.00017498427997707976, + "loss": 1.0796, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.33255451917648315, + "learning_rate": 0.00017257238740737548, + "loss": 1.0837, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.35608723759651184, + "learning_rate": 0.00017019373911406307, + "loss": 1.0903, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.31979310512542725, + "learning_rate": 0.000167847876875277, + "loss": 1.0904, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.35860174894332886, + "learning_rate": 0.00016553434878504428, + "loss": 1.078, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.3339579701423645, + "learning_rate": 0.00016325270916622947, + "loss": 1.074, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.33170056343078613, + "learning_rate": 0.00016100251848467966, + "loss": 1.0821, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.3272804021835327, + "learning_rate": 0.0001587833432645528, + "loss": 1.0775, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.3446336090564728, + "learning_rate": 0.00015659475600481292, + "loss": 1.0926, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.3152468502521515, + "learning_rate": 0.00015443633509687688, + "loss": 1.0805, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.3231417238712311, + "learning_rate": 0.00015230766474339536, + "loss": 1.0813, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.32221952080726624, + "learning_rate": 0.00015020833487815416, + "loss": 1.0871, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.3186984360218048, + "learning_rate": 0.0001481379410870792, + "loss": 1.079, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.3271889090538025, + "learning_rate": 0.00014609608453033013, + "loss": 1.0644, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.34038275480270386, + "learning_rate": 0.00014408237186546807, + "loss": 1.072, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.3195663094520569, + "learning_rate": 0.00014209641517168273, + "loss": 1.0609, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.3180614709854126, + "learning_rate": 0.00014013783187506265, + "loss": 1.0764, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.33012938499450684, + "learning_rate": 0.00013820624467489697, + "loss": 1.0924, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.3188948631286621, + "learning_rate": 0.00013630128147099213, + "loss": 1.0883, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.3357279896736145, + "learning_rate": 0.00013442257529199068, + "loss": 1.0711, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.32860061526298523, + "learning_rate": 0.00013256976422467803, + "loss": 1.0801, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.33959388732910156, + "learning_rate": 0.00013074249134426366, + "loss": 1.0766, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.323219895362854, + "learning_rate": 0.0001289404046456233, + "loss": 1.0863, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.32049325108528137, + "learning_rate": 0.0001271631569754887, + "loss": 1.0798, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.3425719141960144, + "learning_rate": 0.0001254104059655723, + "loss": 1.0843, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.31871697306632996, + "learning_rate": 0.00012368181396661337, + "loss": 1.0694, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.3283820152282715, + "learning_rate": 0.00012197704798333364, + "loss": 1.0705, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.31667330861091614, + "learning_rate": 0.00012029577961028894, + "loss": 1.0704, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.36018213629722595, + "learning_rate": 0.00011863768496860542, + "loss": 1.0803, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.3193598985671997, + "learning_rate": 0.00011700244464358777, + "loss": 1.0763, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.33096808195114136, + "learning_rate": 0.00011538974362318715, + "loss": 1.0784, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.3247699737548828, + "learning_rate": 0.00011379927123731737, + "loss": 1.0745, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.33818915486335754, + "learning_rate": 0.0001122307210980077, + "loss": 1.0732, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.3458991050720215, + "learning_rate": 0.00011068379104038026, + "loss": 1.0837, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.35434862971305847, + "learning_rate": 0.00010915818306444116, + "loss": 1.0648, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.3178817331790924, + "learning_rate": 0.00010765360327767384, + "loss": 1.0742, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.3386595547199249, + "learning_rate": 0.00010616976183842376, + "loss": 1.078, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.333050400018692, + "learning_rate": 0.00010470637290006365, + "loss": 1.0806, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.3198642432689667, + "learning_rate": 0.00010326315455592764, + "loss": 1.0683, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.3254936933517456, + "learning_rate": 0.0001018398287850053, + "loss": 1.0654, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.3687814176082611, + "learning_rate": 0.00010043612139838357, + "loss": 1.0796, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.34864723682403564, + "learning_rate": 9.905176198642719e-05, + "loss": 1.0716, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.3522048890590668, + "learning_rate": 9.76864838666871e-05, + "loss": 1.0719, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.3265816867351532, + "learning_rate": 9.634002403252676e-05, + "loss": 1.0686, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.3169366419315338, + "learning_rate": 9.501212310245681e-05, + "loss": 1.0684, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.33043038845062256, + "learning_rate": 9.370252527016777e-05, + "loss": 1.0774, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.3303966820240021, + "learning_rate": 9.241097825525163e-05, + "loss": 1.0683, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.33609864115715027, + "learning_rate": 9.113723325460276e-05, + "loss": 1.0735, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.3414512574672699, + "learning_rate": 8.988104489448849e-05, + "loss": 1.0685, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.31755390763282776, + "learning_rate": 8.864217118328042e-05, + "loss": 1.0803, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.3173295557498932, + "learning_rate": 8.742037346483729e-05, + "loss": 1.0733, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.3303743302822113, + "learning_rate": 8.62154163725303e-05, + "loss": 1.0794, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.3222305178642273, + "learning_rate": 8.502706778390219e-05, + "loss": 1.0764, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.3168421685695648, + "learning_rate": 8.38550987759513e-05, + "loss": 1.0713, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.3277452290058136, + "learning_rate": 8.269928358103191e-05, + "loss": 1.0841, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.31893619894981384, + "learning_rate": 8.155939954336243e-05, + "loss": 1.0776, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.3293255865573883, + "learning_rate": 8.043522707613312e-05, + "loss": 1.0716, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.3259051740169525, + "learning_rate": 7.932654961920486e-05, + "loss": 1.058, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.34142494201660156, + "learning_rate": 7.823315359739135e-05, + "loss": 1.0615, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.31675177812576294, + "learning_rate": 7.715482837931577e-05, + "loss": 1.0818, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.32128244638442993, + "learning_rate": 7.6091366236835e-05, + "loss": 1.0615, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.33591708540916443, + "learning_rate": 7.504256230502289e-05, + "loss": 1.0801, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.3197396397590637, + "learning_rate": 7.400821454270524e-05, + "loss": 1.0713, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.32733380794525146, + "learning_rate": 7.29881236935386e-05, + "loss": 1.0613, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.3151637017726898, + "learning_rate": 7.198209324762562e-05, + "loss": 1.0643, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.3231916129589081, + "learning_rate": 7.098992940365946e-05, + "loss": 1.0613, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.3338322937488556, + "learning_rate": 7.001144103159e-05, + "loss": 1.0692, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.3360345661640167, + "learning_rate": 6.904643963580461e-05, + "loss": 1.0743, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.3391522169113159, + "learning_rate": 6.809473931881644e-05, + "loss": 1.0661, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.32648056745529175, + "learning_rate": 6.71561567454532e-05, + "loss": 1.0661, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.3194596767425537, + "learning_rate": 6.623051110753948e-05, + "loss": 1.0827, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.31810277700424194, + "learning_rate": 6.531762408906607e-05, + "loss": 1.0726, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.3305910527706146, + "learning_rate": 6.441731983183912e-05, + "loss": 1.0709, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.32806795835494995, + "learning_rate": 6.352942490160292e-05, + "loss": 1.0623, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.333575040102005, + "learning_rate": 6.265376825462966e-05, + "loss": 1.0708, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.35335201025009155, + "learning_rate": 6.179018120476945e-05, + "loss": 1.0659, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.3264673948287964, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.0669, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.3392159938812256, + "learning_rate": 6.009855274515339e-05, + "loss": 1.0623, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.3224976360797882, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.0693, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.33346402645111084, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.0586, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.32544925808906555, + "learning_rate": 5.764754687033678e-05, + "loss": 1.0688, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.33190402388572693, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.0658, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.3238281011581421, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.0782, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.32378917932510376, + "learning_rate": 5.529650063720842e-05, + "loss": 1.0722, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.3402920663356781, + "learning_rate": 5.453432234876445e-05, + "loss": 1.066, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.32455769181251526, + "learning_rate": 5.37826495305886e-05, + "loss": 1.0591, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.3707723319530487, + "learning_rate": 5.304133738072674e-05, + "loss": 1.0771, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.3264203369617462, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.0685, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.343057245016098, + "learning_rate": 5.158922582999368e-05, + "loss": 1.0724, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.32586669921875, + "learning_rate": 5.087814669492819e-05, + "loss": 1.0629, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.33987531065940857, + "learning_rate": 5.017686870590028e-05, + "loss": 1.0668, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.32024064660072327, + "learning_rate": 4.948525676899577e-05, + "loss": 1.0642, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.33926844596862793, + "learning_rate": 4.880317765236493e-05, + "loss": 1.0714, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.36164024472236633, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.0544, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.3245364725589752, + "learning_rate": 4.746709410920699e-05, + "loss": 1.0571, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.3439021408557892, + "learning_rate": 4.681283230007507e-05, + "loss": 1.0616, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.3390875458717346, + "learning_rate": 4.616758849642509e-05, + "loss": 1.0684, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.3170855641365051, + "learning_rate": 4.553123839874615e-05, + "loss": 1.076, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.324824720621109, + "learning_rate": 4.490365942080736e-05, + "loss": 1.0691, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.3236071467399597, + "learning_rate": 4.428473066604285e-05, + "loss": 1.0678, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.33034569025039673, + "learning_rate": 4.367433290426233e-05, + "loss": 1.0675, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.3257054388523102, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.0606, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.31744372844696045, + "learning_rate": 4.247866163327575e-05, + "loss": 1.0726, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.33041128516197205, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.0634, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.3342299461364746, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.0609, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.32655617594718933, + "learning_rate": 4.074624971216005e-05, + "loss": 1.0516, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.32039201259613037, + "learning_rate": 4.018462453681889e-05, + "loss": 1.0596, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.33058446645736694, + "learning_rate": 3.963074051164014e-05, + "loss": 1.0648, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.32546499371528625, + "learning_rate": 3.908449093662446e-05, + "loss": 1.0598, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.3228848874568939, + "learning_rate": 3.854577058246998e-05, + "loss": 1.0557, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.33862391114234924, + "learning_rate": 3.801447567030094e-05, + "loss": 1.076, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.3227675259113312, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.0605, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.3217218816280365, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.0786, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.3212469220161438, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.066, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.3378359377384186, + "learning_rate": 3.596152451701616e-05, + "loss": 1.0651, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.31813666224479675, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.0633, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.32426491379737854, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.064, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.34550321102142334, + "learning_rate": 3.449490171442838e-05, + "loss": 1.0705, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.3262031674385071, + "learning_rate": 3.401944187798702e-05, + "loss": 1.0669, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.31900161504745483, + "learning_rate": 3.355053553336137e-05, + "loss": 1.058, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.3401828706264496, + "learning_rate": 3.308809235061882e-05, + "loss": 1.0604, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.33225199580192566, + "learning_rate": 3.263202324488772e-05, + "loss": 1.0653, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.32263875007629395, + "learning_rate": 3.218224035919609e-05, + "loss": 1.0632, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.32119113206863403, + "learning_rate": 3.173865704754688e-05, + "loss": 1.0623, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.3211652338504791, + "learning_rate": 3.130118785822657e-05, + "loss": 1.0641, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.3391672372817993, + "learning_rate": 3.08697485173437e-05, + "loss": 1.0631, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.3661402761936188, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.0639, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.35915690660476685, + "learning_rate": 3.002462807725185e-05, + "loss": 1.0599, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.32106488943099976, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.0659, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.32112917304039, + "learning_rate": 2.920264448124087e-05, + "loss": 1.0639, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.32968273758888245, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.0543, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.338516503572464, + "learning_rate": 2.84031643124288e-05, + "loss": 1.0592, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.330265074968338, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.0776, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.3306685984134674, + "learning_rate": 2.762557149497405e-05, + "loss": 1.0539, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.3367357850074768, + "learning_rate": 2.724479494389592e-05, + "loss": 1.063, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.31615322828292847, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.0667, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.3239525556564331, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.0685, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.33848342299461365, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.0673, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.32886168360710144, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.0668, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.3179469704627991, + "learning_rate": 2.541820662891541e-05, + "loss": 1.0475, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.3251250982284546, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.0579, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.3251715898513794, + "learning_rate": 2.472233293365335e-05, + "loss": 1.0719, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.33614784479141235, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.0542, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.3413209021091461, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.0709, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.3312832713127136, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.0591, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.3249393701553345, + "learning_rate": 2.338721674401494e-05, + "loss": 1.0675, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.32825586199760437, + "learning_rate": 2.30648594768459e-05, + "loss": 1.0653, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.3269410729408264, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.0617, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.3209409713745117, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.0578, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.3257068395614624, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.0625, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.3229939043521881, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.0616, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.33057811856269836, + "learning_rate": 2.151850895764285e-05, + "loss": 1.0591, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.305545449256897, + "learning_rate": 2.12219089895037e-05, + "loss": 1.0513, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.32736852765083313, + "learning_rate": 2.092939720151092e-05, + "loss": 1.0479, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.3169935643672943, + "learning_rate": 2.064091724430947e-05, + "loss": 1.0569, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.3271348774433136, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.0539, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.31698349118232727, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.0645, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.3189639449119568, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.0515, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.3284784257411957, + "learning_rate": 1.952621569669175e-05, + "loss": 1.0612, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.32952404022216797, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.0593, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.32908713817596436, + "learning_rate": 1.899164691024229e-05, + "loss": 1.0613, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.3283434808254242, + "learning_rate": 1.872987589815331e-05, + "loss": 1.0548, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.33627134561538696, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.0674, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.3174121677875519, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.065, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.3188031315803528, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.074, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.3233587145805359, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.0646, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.32843634486198425, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.0637, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.3318040668964386, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.0532, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.343174546957016, + "learning_rate": 1.699576850233916e-05, + "loss": 1.0554, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.3253111243247986, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.0696, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.3279666602611542, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.046, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.32378527522087097, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.0631, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.31868770718574524, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.063, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.323849618434906, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.0585, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.320055216550827, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.0692, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.32591259479522705, + "learning_rate": 1.542221360973268e-05, + "loss": 1.0535, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.3283117413520813, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.0675, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 0.9648054838180542, + "learning_rate": 1.5e-05, + "loss": 1.0556, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5.037432118742016e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-gemma2/checkpoint-9480/training_args.bin b/saves-gemma2/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e86e3f06e3de919177ed98f1a156171cca2cb4e3 --- /dev/null +++ b/saves-gemma2/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ee83d29a94d8cb61098546c2348e9cc6026d65e525e8ce76eb476245cea4679 +size 5112 diff --git a/saves-gemma2/config.json b/saves-gemma2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96bfe46f54281277b27d9690dc209d86a9da6b69 --- /dev/null +++ b/saves-gemma2/config.json @@ -0,0 +1,31 @@ +{ + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 0, + "query_pre_attn_scalar": 224, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gemma2/generation_config.json b/saves-gemma2/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c3d8a8a1a3f2c87f01943870313ce2d1558cefc7 --- /dev/null +++ b/saves-gemma2/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 2, + "pad_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-gemma2/model.safetensors b/saves-gemma2/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bb52ba8e6de986062601eb20c05faaf7ffdc16d --- /dev/null +++ b/saves-gemma2/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a229e45aada410c4022c63a4b357009fa61bb23e436372765ddcb2a85e4ff2 +size 19361344 diff --git a/saves-gemma2/result.log b/saves-gemma2/result.log new file mode 100644 index 0000000000000000000000000000000000000000..ce53172abefd85c77979751868baf9ac9456241e --- /dev/null +++ b/saves-gemma2/result.log @@ -0,0 +1 @@ +{'train_runtime': 5957.6303, 'train_samples_per_second': 1629.273, 'train_steps_per_second': 1.591, 'train_loss': 1.3754059658774846, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-gemma2/special_tokens_map.json b/saves-gemma2/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gemma2/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gemma2/tokenizer.json b/saves-gemma2/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gemma2/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gemma2/tokenizer_config.json b/saves-gemma2/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gemma2/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gpt2-cosine/checkpoint-9480/config.json b/saves-gpt2-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c41b5987863e5a36912ae874f26bce471afeca9f --- /dev/null +++ b/saves-gpt2-cosine/checkpoint-9480/config.json @@ -0,0 +1,34 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "hidden_act": "gelu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_embd": 256, + "n_head": 8, + "n_inner": null, + "n_layer": 2, + "n_positions": 1024, + "num_key_value_heads": 8, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gpt2-cosine/checkpoint-9480/generation_config.json b/saves-gpt2-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-gpt2-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-gpt2-cosine/checkpoint-9480/model.safetensors b/saves-gpt2-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9066386d2ede1926402d1cde71e6c139d1bc76d1 --- /dev/null +++ b/saves-gpt2-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832abcac55ddf8fdf6b3ebb9ce8344fc6e06e7d661f47fec80c8c5eef377a7a1 +size 9419432 diff --git a/saves-gpt2-cosine/checkpoint-9480/optimizer.pt b/saves-gpt2-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..581ebda76ce0f015872ce24013b2a2f752556903 --- /dev/null +++ b/saves-gpt2-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33182be188226436462f04535d26c0334da8e0a777f2a5dfb520ee2e5b445b1e +size 18857290 diff --git a/saves-gpt2-cosine/checkpoint-9480/rng_state.pth b/saves-gpt2-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fed94df6e39f86d0f7d754b86da20d654a45801 --- /dev/null +++ b/saves-gpt2-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9442d8d7469c6d4ca45133aef3c8266faf068adc1a246d6dd77665a1a72f41 +size 14244 diff --git a/saves-gpt2-cosine/checkpoint-9480/scheduler.pt b/saves-gpt2-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03c145297021546d40e130546440641e02059bcb --- /dev/null +++ b/saves-gpt2-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fd617624c087e1a286ed7cf3fa38baa4a8815e49f107c3186b4c7c58e1adbb +size 1064 diff --git a/saves-gpt2-cosine/checkpoint-9480/special_tokens_map.json b/saves-gpt2-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gpt2-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gpt2-cosine/checkpoint-9480/tokenizer.json b/saves-gpt2-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gpt2-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gpt2-cosine/checkpoint-9480/tokenizer_config.json b/saves-gpt2-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gpt2-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gpt2-cosine/checkpoint-9480/trainer_state.json b/saves-gpt2-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..81c49d695b958e0f6558b88e4c31ad283682b552 --- /dev/null +++ b/saves-gpt2-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2747584581375122, + "learning_rate": 0.00015789473684210527, + "loss": 7.4312, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.0849422216415405, + "learning_rate": 0.00031578947368421053, + "loss": 6.7812, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.7873295545578003, + "learning_rate": 0.00047368421052631577, + "loss": 6.1889, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.981127142906189, + "learning_rate": 0.0006315789473684211, + "loss": 5.7693, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.7162046432495117, + "learning_rate": 0.0007894736842105263, + "loss": 5.3788, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.7569587230682373, + "learning_rate": 0.0009473684210526315, + "loss": 4.9362, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.5858421921730042, + "learning_rate": 0.0011052631578947368, + "loss": 4.582, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.1057262420654297, + "learning_rate": 0.0012631578947368421, + "loss": 4.3284, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.6117943525314331, + "learning_rate": 0.0014210526315789472, + "loss": 4.1464, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.7836896181106567, + "learning_rate": 0.0014999989494847376, + "loss": 4.0149, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.44930461049079895, + "learning_rate": 0.0014999905453802946, + "loss": 3.8968, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.8647735118865967, + "learning_rate": 0.0014999737372655805, + "loss": 3.8248, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.42461466789245605, + "learning_rate": 0.0014999485253289388, + "loss": 3.7454, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.2943376302719116, + "learning_rate": 0.0014999149098528814, + "loss": 3.6712, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.5738990306854248, + "learning_rate": 0.0014998728912140862, + "loss": 3.6209, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.5905137062072754, + "learning_rate": 0.0014998224698833922, + "loss": 3.5741, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.5218836069107056, + "learning_rate": 0.0014997636464257956, + "loss": 3.5274, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.5192434191703796, + "learning_rate": 0.0014996964215004416, + "loss": 3.495, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.6479336023330688, + "learning_rate": 0.0014996207958606182, + "loss": 3.4491, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.46953827142715454, + "learning_rate": 0.001499536770353748, + "loss": 3.4121, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.6039459705352783, + "learning_rate": 0.0014994443459213774, + "loss": 3.3729, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.5077710747718811, + "learning_rate": 0.001499343523599168, + "loss": 3.3415, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.8886287212371826, + "learning_rate": 0.0014992343045168823, + "loss": 3.2998, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.618446409702301, + "learning_rate": 0.0014991166898983739, + "loss": 3.2741, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.4531407952308655, + "learning_rate": 0.001498990681061572, + "loss": 3.2372, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.552634060382843, + "learning_rate": 0.001498856279418467, + "loss": 3.202, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.6154399514198303, + "learning_rate": 0.0014987134864750948, + "loss": 3.1885, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.5206190943717957, + "learning_rate": 0.0014985623038315206, + "loss": 3.1491, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.5805359482765198, + "learning_rate": 0.0014984027331818193, + "loss": 3.1318, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.5793381333351135, + "learning_rate": 0.0014982347763140584, + "loss": 3.1098, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.5205258727073669, + "learning_rate": 0.0014980584351102762, + "loss": 3.0834, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.5318222641944885, + "learning_rate": 0.001497873711546462, + "loss": 3.0638, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.5161961317062378, + "learning_rate": 0.0014976806076925334, + "loss": 3.0557, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.5513562560081482, + "learning_rate": 0.0014974791257123137, + "loss": 3.0351, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.48094144463539124, + "learning_rate": 0.001497269267863507, + "loss": 2.9966, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.6854605674743652, + "learning_rate": 0.0014970510364976724, + "loss": 2.983, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.679139256477356, + "learning_rate": 0.0014968244340601996, + "loss": 2.9803, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.5697433948516846, + "learning_rate": 0.001496589463090279, + "loss": 2.9682, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.5194149017333984, + "learning_rate": 0.001496346126220875, + "loss": 2.9502, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.5124607682228088, + "learning_rate": 0.0014960944261786966, + "loss": 2.9228, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.6669304370880127, + "learning_rate": 0.0014958343657841655, + "loss": 2.9205, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.47750750184059143, + "learning_rate": 0.001495565947951385, + "loss": 2.9061, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.6611514091491699, + "learning_rate": 0.0014952891756881085, + "loss": 2.8808, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.4813998341560364, + "learning_rate": 0.0014950040520957037, + "loss": 2.8697, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.5051124691963196, + "learning_rate": 0.0014947105803691204, + "loss": 2.868, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.5885820984840393, + "learning_rate": 0.0014944087637968522, + "loss": 2.8498, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.4992585778236389, + "learning_rate": 0.0014940986057609012, + "loss": 2.829, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.6270170211791992, + "learning_rate": 0.0014937801097367396, + "loss": 2.8279, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.5319812297821045, + "learning_rate": 0.001493453279293271, + "loss": 2.808, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.5532560348510742, + "learning_rate": 0.0014931181180927902, + "loss": 2.8005, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.6820502877235413, + "learning_rate": 0.001492774629890942, + "loss": 2.7956, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.5360422730445862, + "learning_rate": 0.001492422818536679, + "loss": 2.7762, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.5114275813102722, + "learning_rate": 0.00149206268797222, + "loss": 2.7665, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.4956927001476288, + "learning_rate": 0.0014916942422330032, + "loss": 2.7494, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.5611988306045532, + "learning_rate": 0.001491317485447643, + "loss": 2.7508, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.5637648701667786, + "learning_rate": 0.0014909324218378838, + "loss": 2.7152, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.7406920194625854, + "learning_rate": 0.0014905390557185508, + "loss": 2.7214, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.6244109869003296, + "learning_rate": 0.0014901373914975036, + "loss": 2.7243, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.6076600551605225, + "learning_rate": 0.0014897274336755856, + "loss": 2.7032, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.5039713382720947, + "learning_rate": 0.001489309186846575, + "loss": 2.6839, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.553632378578186, + "learning_rate": 0.0014888826556971313, + "loss": 2.682, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.5846127271652222, + "learning_rate": 0.0014884478450067444, + "loss": 2.6752, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.5179242491722107, + "learning_rate": 0.0014880047596476807, + "loss": 2.6613, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.5400410890579224, + "learning_rate": 0.0014875534045849274, + "loss": 2.6703, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.5789357423782349, + "learning_rate": 0.0014870937848761388, + "loss": 2.6626, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.6280109882354736, + "learning_rate": 0.001486625905671578, + "loss": 2.6498, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.6417626142501831, + "learning_rate": 0.00148614977221406, + "loss": 2.6377, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.5628936290740967, + "learning_rate": 0.0014856653898388927, + "loss": 2.6309, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.5995395183563232, + "learning_rate": 0.001485172763973817, + "loss": 2.637, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.6329290270805359, + "learning_rate": 0.0014846719001389466, + "loss": 2.6191, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.5578229427337646, + "learning_rate": 0.001484162803946705, + "loss": 2.6087, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.5341002345085144, + "learning_rate": 0.0014836454811017635, + "loss": 2.5888, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.5629720091819763, + "learning_rate": 0.0014831199374009778, + "loss": 2.5821, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.601706862449646, + "learning_rate": 0.0014825861787333208, + "loss": 2.5934, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.6745474934577942, + "learning_rate": 0.0014820442110798197, + "loss": 2.5735, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.5653058290481567, + "learning_rate": 0.0014814940405134865, + "loss": 2.5734, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.6385292410850525, + "learning_rate": 0.001480935673199251, + "loss": 2.5611, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.6853346824645996, + "learning_rate": 0.0014803691153938915, + "loss": 2.5573, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.6247203350067139, + "learning_rate": 0.0014797943734459653, + "loss": 2.5682, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.526970624923706, + "learning_rate": 0.001479211453795736, + "loss": 2.5364, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.586391806602478, + "learning_rate": 0.0014786203629751033, + "loss": 2.5273, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.6468285322189331, + "learning_rate": 0.0014780211076075279, + "loss": 2.5338, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.5445563197135925, + "learning_rate": 0.0014774136944079594, + "loss": 2.5355, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.7401041388511658, + "learning_rate": 0.0014767981301827592, + "loss": 2.5157, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.4945981800556183, + "learning_rate": 0.0014761744218296249, + "loss": 2.512, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.5440536141395569, + "learning_rate": 0.001475542576337513, + "loss": 2.5128, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.5822538733482361, + "learning_rate": 0.001474902600786561, + "loss": 2.5147, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.5085551142692566, + "learning_rate": 0.0014742545023480075, + "loss": 2.4994, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.5303928852081299, + "learning_rate": 0.0014735982882841117, + "loss": 2.4945, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.5537199378013611, + "learning_rate": 0.0014729339659480727, + "loss": 2.4834, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.5758653283119202, + "learning_rate": 0.0014722615427839468, + "loss": 2.489, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.4917565584182739, + "learning_rate": 0.0014715810263265633, + "loss": 2.4782, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.5117908716201782, + "learning_rate": 0.0014708924242014423, + "loss": 2.4629, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.6005182862281799, + "learning_rate": 0.0014701957441247064, + "loss": 2.4669, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.5064338445663452, + "learning_rate": 0.0014694909939029959, + "loss": 2.4548, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.5839555263519287, + "learning_rate": 0.0014687781814333814, + "loss": 2.4596, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.5321500301361084, + "learning_rate": 0.0014680573147032746, + "loss": 2.458, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.5418179631233215, + "learning_rate": 0.0014673284017903392, + "loss": 2.4354, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.5591111779212952, + "learning_rate": 0.0014665914508624, + "loss": 2.4378, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.5277249813079834, + "learning_rate": 0.0014658464701773526, + "loss": 2.4439, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.5475377440452576, + "learning_rate": 0.0014650934680830688, + "loss": 2.438, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.6166481375694275, + "learning_rate": 0.0014643324530173051, + "loss": 2.4298, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.6506895422935486, + "learning_rate": 0.0014635634335076067, + "loss": 2.4231, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.5713043212890625, + "learning_rate": 0.001462786418171213, + "loss": 2.418, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.5594637989997864, + "learning_rate": 0.0014620014157149597, + "loss": 2.4242, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.5279520750045776, + "learning_rate": 0.001461208434935183, + "loss": 2.414, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.5250833630561829, + "learning_rate": 0.0014604074847176197, + "loss": 2.3976, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.5620298981666565, + "learning_rate": 0.0014595985740373082, + "loss": 2.3995, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.5735461711883545, + "learning_rate": 0.0014587817119584873, + "loss": 2.4037, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.5932706594467163, + "learning_rate": 0.001457956907634496, + "loss": 2.3925, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.5558930039405823, + "learning_rate": 0.0014571241703076692, + "loss": 2.3938, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.5125530362129211, + "learning_rate": 0.0014562835093092348, + "loss": 2.3883, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.8182698488235474, + "learning_rate": 0.0014554349340592104, + "loss": 2.3758, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.6140531897544861, + "learning_rate": 0.001454578454066296, + "loss": 2.394, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.8151295781135559, + "learning_rate": 0.0014537140789277678, + "loss": 2.3731, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.6274152398109436, + "learning_rate": 0.0014528418183293716, + "loss": 2.3808, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.5366663932800293, + "learning_rate": 0.001451961682045213, + "loss": 2.3652, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.5608649849891663, + "learning_rate": 0.001451073679937649, + "loss": 2.3495, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.5778776407241821, + "learning_rate": 0.0014501778219571766, + "loss": 2.3575, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.7551344633102417, + "learning_rate": 0.0014492741181423225, + "loss": 2.3693, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.6345680952072144, + "learning_rate": 0.0014483625786195285, + "loss": 2.3655, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.5620285868644714, + "learning_rate": 0.0014474432136030405, + "loss": 2.3465, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.5185433626174927, + "learning_rate": 0.0014465160333947923, + "loss": 2.3359, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.5388045907020569, + "learning_rate": 0.0014455810483842908, + "loss": 2.3502, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.554252564907074, + "learning_rate": 0.0014446382690484997, + "loss": 2.3543, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.5698495507240295, + "learning_rate": 0.0014436877059517215, + "loss": 2.3364, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.5226405262947083, + "learning_rate": 0.0014427293697454803, + "loss": 2.3264, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.5640619397163391, + "learning_rate": 0.001441763271168401, + "loss": 2.3452, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.595586895942688, + "learning_rate": 0.00144078942104609, + "loss": 2.3333, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.5787559151649475, + "learning_rate": 0.001439807830291013, + "loss": 2.3157, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.5272350907325745, + "learning_rate": 0.0014388185099023744, + "loss": 2.321, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.596028208732605, + "learning_rate": 0.0014378214709659916, + "loss": 2.3236, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.6053712964057922, + "learning_rate": 0.0014368167246541733, + "loss": 2.3148, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.5603710412979126, + "learning_rate": 0.0014358042822255918, + "loss": 2.3191, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.5483109354972839, + "learning_rate": 0.0014347841550251597, + "loss": 2.3214, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.5500408411026001, + "learning_rate": 0.0014337563544838997, + "loss": 2.308, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.6626913547515869, + "learning_rate": 0.001432720892118819, + "loss": 2.307, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.6085088849067688, + "learning_rate": 0.0014316777795327794, + "loss": 2.2959, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.5606074333190918, + "learning_rate": 0.001430627028414366, + "loss": 2.3086, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.6846709847450256, + "learning_rate": 0.0014295686505377586, + "loss": 2.2936, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.566358208656311, + "learning_rate": 0.0014285026577625982, + "loss": 2.2933, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.5893951654434204, + "learning_rate": 0.0014274290620338542, + "loss": 2.2998, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.5212005376815796, + "learning_rate": 0.0014263478753816906, + "loss": 2.2922, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.5123047232627869, + "learning_rate": 0.0014252591099213326, + "loss": 2.2873, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.5280911922454834, + "learning_rate": 0.001424162777852928, + "loss": 2.2872, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.5076168775558472, + "learning_rate": 0.0014230588914614134, + "loss": 2.2826, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.6429795026779175, + "learning_rate": 0.0014219474631163745, + "loss": 2.2803, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.4922437071800232, + "learning_rate": 0.001420828505271909, + "loss": 2.2849, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.5438232421875, + "learning_rate": 0.0014197020304664856, + "loss": 2.2819, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.5499326586723328, + "learning_rate": 0.0014185680513228048, + "loss": 2.2795, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.6121415495872498, + "learning_rate": 0.0014174265805476564, + "loss": 2.2778, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.5868210792541504, + "learning_rate": 0.0014162776309317778, + "loss": 2.279, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.5054233074188232, + "learning_rate": 0.0014151212153497108, + "loss": 2.2605, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.5441999435424805, + "learning_rate": 0.0014139573467596561, + "loss": 2.2535, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.49908918142318726, + "learning_rate": 0.00141278603820333, + "loss": 2.2539, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.5019859075546265, + "learning_rate": 0.0014116073028058165, + "loss": 2.2574, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.5101312398910522, + "learning_rate": 0.0014104211537754217, + "loss": 2.2474, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.5778200030326843, + "learning_rate": 0.001409227604403524, + "loss": 2.2593, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.6015564799308777, + "learning_rate": 0.0014080266680644277, + "loss": 2.2549, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.4796990752220154, + "learning_rate": 0.0014068183582152103, + "loss": 2.2573, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.49003174901008606, + "learning_rate": 0.001405602688395574, + "loss": 2.2583, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.5326870083808899, + "learning_rate": 0.0014043796722276924, + "loss": 2.2287, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.5834649801254272, + "learning_rate": 0.0014031493234160591, + "loss": 2.243, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.5437233448028564, + "learning_rate": 0.0014019116557473332, + "loss": 2.2408, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.6422701478004456, + "learning_rate": 0.0014006666830901854, + "loss": 2.2327, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.5536960959434509, + "learning_rate": 0.001399414419395142, + "loss": 2.2335, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.5583681464195251, + "learning_rate": 0.0013981548786944293, + "loss": 2.2415, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.6664065718650818, + "learning_rate": 0.0013968880751018158, + "loss": 2.2319, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.6318219304084778, + "learning_rate": 0.0013956140228124545, + "loss": 2.2207, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.5605854988098145, + "learning_rate": 0.0013943327361027231, + "loss": 2.2367, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.556694746017456, + "learning_rate": 0.0013930442293300649, + "loss": 2.2192, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.5227272510528564, + "learning_rate": 0.0013917485169328279, + "loss": 2.2136, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.527908205986023, + "learning_rate": 0.0013904456134301016, + "loss": 2.2186, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.4805464744567871, + "learning_rate": 0.0013891355334215562, + "loss": 2.2204, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.5425950288772583, + "learning_rate": 0.0013878182915872776, + "loss": 2.2442, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.5321546196937561, + "learning_rate": 0.001386493902687604, + "loss": 2.2206, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.47689199447631836, + "learning_rate": 0.00138516238156296, + "loss": 2.2197, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.5251813530921936, + "learning_rate": 0.0013838237431336895, + "loss": 2.2212, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.5576980113983154, + "learning_rate": 0.0013824780023998899, + "loss": 2.2194, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.4903496205806732, + "learning_rate": 0.0013811251744412431, + "loss": 2.2164, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.5496652722358704, + "learning_rate": 0.0013797652744168473, + "loss": 2.205, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.5182146430015564, + "learning_rate": 0.0013783983175650457, + "loss": 2.2043, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.6357032060623169, + "learning_rate": 0.0013770243192032581, + "loss": 2.2012, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.5484554171562195, + "learning_rate": 0.0013756432947278064, + "loss": 2.2009, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.5493423938751221, + "learning_rate": 0.0013742552596137444, + "loss": 2.2063, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.5268741250038147, + "learning_rate": 0.0013728602294146833, + "loss": 2.2079, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.543606162071228, + "learning_rate": 0.0013714582197626175, + "loss": 2.2077, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.5476695895195007, + "learning_rate": 0.0013700492463677501, + "loss": 2.1947, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.5037834644317627, + "learning_rate": 0.0013686333250183154, + "loss": 2.2023, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.5949304699897766, + "learning_rate": 0.001367210471580404, + "loss": 2.199, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.4746219217777252, + "learning_rate": 0.0013657807019977835, + "loss": 2.1953, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.4934662878513336, + "learning_rate": 0.0013643440322917198, + "loss": 2.1937, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.48982641100883484, + "learning_rate": 0.0013629004785607989, + "loss": 2.1966, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.5195836424827576, + "learning_rate": 0.0013614500569807445, + "loss": 2.1809, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.4753236770629883, + "learning_rate": 0.0013599927838042394, + "loss": 2.1852, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.48061811923980713, + "learning_rate": 0.0013585286753607408, + "loss": 2.1934, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.5231103301048279, + "learning_rate": 0.0013570577480562986, + "loss": 2.1977, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.5212116837501526, + "learning_rate": 0.0013555800183733717, + "loss": 2.1805, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.4923148453235626, + "learning_rate": 0.0013540955028706425, + "loss": 2.1774, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.4827023446559906, + "learning_rate": 0.0013526042181828324, + "loss": 2.1766, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.5195667743682861, + "learning_rate": 0.0013511061810205143, + "loss": 2.1819, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.5382214784622192, + "learning_rate": 0.001349601408169926, + "loss": 2.1939, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.5547071099281311, + "learning_rate": 0.0013480899164927823, + "loss": 2.1832, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.4954250156879425, + "learning_rate": 0.0013465717229260853, + "loss": 2.1801, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.4699029326438904, + "learning_rate": 0.001345046844481935, + "loss": 2.1802, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.5275043845176697, + "learning_rate": 0.0013435152982473396, + "loss": 2.1695, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.5277373790740967, + "learning_rate": 0.0013419771013840217, + "loss": 2.1659, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.5433393120765686, + "learning_rate": 0.001340432271128229, + "loss": 2.1809, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.5073947310447693, + "learning_rate": 0.0013388808247905381, + "loss": 2.1636, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.6556822657585144, + "learning_rate": 0.0013373227797556634, + "loss": 2.1755, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.617758572101593, + "learning_rate": 0.00133575815348226, + "loss": 2.1648, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.5471624732017517, + "learning_rate": 0.0013341869635027292, + "loss": 2.1632, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.5126450657844543, + "learning_rate": 0.001332609227423022, + "loss": 2.165, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.49747955799102783, + "learning_rate": 0.0013310249629224417, + "loss": 2.1607, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.506222665309906, + "learning_rate": 0.0013294341877534454, + "loss": 2.1672, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.5212807059288025, + "learning_rate": 0.0013278369197414458, + "loss": 2.1799, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.5451085567474365, + "learning_rate": 0.0013262331767846104, + "loss": 2.16, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.4879247844219208, + "learning_rate": 0.0013246229768536628, + "loss": 2.1441, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.4954136610031128, + "learning_rate": 0.001323006337991679, + "loss": 2.1609, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.5348203182220459, + "learning_rate": 0.0013213832783138873, + "loss": 2.1627, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.4880081117153168, + "learning_rate": 0.0013197538160074633, + "loss": 2.1492, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.4840116798877716, + "learning_rate": 0.0013181179693313283, + "loss": 2.1611, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.4619462490081787, + "learning_rate": 0.0013164757566159428, + "loss": 2.1508, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.4876561164855957, + "learning_rate": 0.001314827196263102, + "loss": 2.1463, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.5447617173194885, + "learning_rate": 0.0013131723067457302, + "loss": 2.1447, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.5259478092193604, + "learning_rate": 0.0013115111066076721, + "loss": 2.1379, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.5198143720626831, + "learning_rate": 0.0013098436144634862, + "loss": 2.1725, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.5254530906677246, + "learning_rate": 0.0013081698489982364, + "loss": 2.1549, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.5557888746261597, + "learning_rate": 0.001306489828967282, + "loss": 2.1432, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.5075929164886475, + "learning_rate": 0.0013048035731960679, + "loss": 2.1406, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.5386555790901184, + "learning_rate": 0.0013031111005799133, + "loss": 2.1474, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.5489538908004761, + "learning_rate": 0.0013014124300838004, + "loss": 2.15, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.49059146642684937, + "learning_rate": 0.0012997075807421612, + "loss": 2.133, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.5180448889732361, + "learning_rate": 0.0012979965716586653, + "loss": 2.1337, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.5299931764602661, + "learning_rate": 0.0012962794220060048, + "loss": 2.1298, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.4760563373565674, + "learning_rate": 0.0012945561510256801, + "loss": 2.1375, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.47108808159828186, + "learning_rate": 0.001292826778027784, + "loss": 2.1411, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.5180763602256775, + "learning_rate": 0.0012910913223907856, + "loss": 2.1385, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.5572436451911926, + "learning_rate": 0.0012893498035613123, + "loss": 2.1351, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.5123071074485779, + "learning_rate": 0.001287602241053933, + "loss": 2.1343, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.5105187296867371, + "learning_rate": 0.0012858486544509392, + "loss": 2.1378, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.538803219795227, + "learning_rate": 0.0012840890634021249, + "loss": 2.1302, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.5208653807640076, + "learning_rate": 0.0012823234876245667, + "loss": 2.1295, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.5847416520118713, + "learning_rate": 0.0012805519469024035, + "loss": 2.1372, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.4927160143852234, + "learning_rate": 0.0012787744610866143, + "loss": 2.1224, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.5299948453903198, + "learning_rate": 0.0012769910500947954, + "loss": 2.1387, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.4779018759727478, + "learning_rate": 0.0012752017339109376, + "loss": 2.1289, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.4892433285713196, + "learning_rate": 0.0012734065325852029, + "loss": 2.1285, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.4813171625137329, + "learning_rate": 0.0012716054662336987, + "loss": 2.1159, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.5409374237060547, + "learning_rate": 0.001269798555038252, + "loss": 2.1246, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.5110533833503723, + "learning_rate": 0.0012679858192461864, + "loss": 2.1256, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.5203260779380798, + "learning_rate": 0.0012661672791700906, + "loss": 2.126, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.5206984281539917, + "learning_rate": 0.0012643429551875945, + "loss": 2.1225, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.6048010587692261, + "learning_rate": 0.0012625128677411388, + "loss": 2.1237, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.5268032550811768, + "learning_rate": 0.0012606770373377475, + "loss": 2.115, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.5019213557243347, + "learning_rate": 0.0012588354845487959, + "loss": 2.1231, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.4948783218860626, + "learning_rate": 0.001256988230009783, + "loss": 2.1185, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.5080506801605225, + "learning_rate": 0.0012551352944200976, + "loss": 2.1204, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.5241512060165405, + "learning_rate": 0.0012532766985427874, + "loss": 2.1198, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.48647183179855347, + "learning_rate": 0.0012514124632043272, + "loss": 2.1217, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.5316416025161743, + "learning_rate": 0.0012495426092943842, + "loss": 2.1206, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.512563169002533, + "learning_rate": 0.0012476671577655845, + "loss": 2.1173, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.5111070871353149, + "learning_rate": 0.0012457861296332774, + "loss": 2.1053, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.50269615650177, + "learning_rate": 0.001243899545975303, + "loss": 2.1209, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.5323435664176941, + "learning_rate": 0.0012420074279317515, + "loss": 2.1084, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.5737500786781311, + "learning_rate": 0.0012401097967047298, + "loss": 2.1035, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.4845636487007141, + "learning_rate": 0.001238206673558122, + "loss": 2.1049, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.523668646812439, + "learning_rate": 0.0012362980798173526, + "loss": 2.1049, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.47476130723953247, + "learning_rate": 0.0012343840368691462, + "loss": 2.106, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.5000209212303162, + "learning_rate": 0.0012324645661612886, + "loss": 2.1058, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.5192132592201233, + "learning_rate": 0.0012305396892023867, + "loss": 2.1059, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.5271556377410889, + "learning_rate": 0.0012286094275616264, + "loss": 2.1074, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.49261364340782166, + "learning_rate": 0.0012266738028685318, + "loss": 2.0866, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.4498126804828644, + "learning_rate": 0.001224732836812723, + "loss": 2.1007, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.4905533790588379, + "learning_rate": 0.0012227865511436724, + "loss": 2.1039, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.5066385865211487, + "learning_rate": 0.001220834967670461, + "loss": 2.1142, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.5031419396400452, + "learning_rate": 0.0012188781082615346, + "loss": 2.1046, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.49781933426856995, + "learning_rate": 0.0012169159948444588, + "loss": 2.1062, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.4930562376976013, + "learning_rate": 0.001214948649405672, + "loss": 2.0998, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.5032762289047241, + "learning_rate": 0.0012129760939902407, + "loss": 2.0968, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.4717344641685486, + "learning_rate": 0.0012109983507016114, + "loss": 2.1001, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.5556259751319885, + "learning_rate": 0.0012090154417013636, + "loss": 2.1045, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.4889775514602661, + "learning_rate": 0.0012070273892089605, + "loss": 2.0763, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.4580143988132477, + "learning_rate": 0.0012050342155015012, + "loss": 2.0803, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.5380984544754028, + "learning_rate": 0.0012030359429134707, + "loss": 2.0925, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.4745176434516907, + "learning_rate": 0.0012010325938364883, + "loss": 2.0945, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.5721791982650757, + "learning_rate": 0.0011990241907190592, + "loss": 2.0963, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.4822753667831421, + "learning_rate": 0.001197010756066321, + "loss": 2.0765, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.5165274143218994, + "learning_rate": 0.0011949923124397917, + "loss": 2.0843, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.5151647329330444, + "learning_rate": 0.001192968882457118, + "loss": 2.0934, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.4752923250198364, + "learning_rate": 0.001190940488791821, + "loss": 2.093, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.5059858560562134, + "learning_rate": 0.0011889071541730419, + "loss": 2.0813, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.4885006844997406, + "learning_rate": 0.001186868901385288, + "loss": 2.0771, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.507866621017456, + "learning_rate": 0.001184825753268177, + "loss": 2.0844, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.46038174629211426, + "learning_rate": 0.0011827777327161814, + "loss": 2.0925, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.48411017656326294, + "learning_rate": 0.0011807248626783714, + "loss": 2.0754, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.46657443046569824, + "learning_rate": 0.0011786671661581584, + "loss": 2.0678, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.543995201587677, + "learning_rate": 0.001176604666213036, + "loss": 2.079, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.46147310733795166, + "learning_rate": 0.0011745373859543236, + "loss": 2.0943, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.5072245001792908, + "learning_rate": 0.0011724653485469063, + "loss": 2.0684, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.5141367316246033, + "learning_rate": 0.0011703885772089743, + "loss": 2.0833, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.48530396819114685, + "learning_rate": 0.0011683070952117646, + "loss": 2.0774, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.5095238089561462, + "learning_rate": 0.0011662209258792998, + "loss": 2.0682, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.45794999599456787, + "learning_rate": 0.0011641300925881257, + "loss": 2.0775, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.47670885920524597, + "learning_rate": 0.0011620346187670501, + "loss": 2.0704, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.49431905150413513, + "learning_rate": 0.0011599345278968806, + "loss": 2.0838, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.4868985712528229, + "learning_rate": 0.0011578298435101604, + "loss": 2.0752, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.5148533582687378, + "learning_rate": 0.0011557205891909062, + "loss": 2.0606, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.5089653730392456, + "learning_rate": 0.0011536067885743423, + "loss": 2.0757, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.46995869278907776, + "learning_rate": 0.001151488465346637, + "loss": 2.0624, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.5032846927642822, + "learning_rate": 0.0011493656432446362, + "loss": 2.0649, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.5547357201576233, + "learning_rate": 0.0011472383460555983, + "loss": 2.0711, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.48872530460357666, + "learning_rate": 0.001145106597616927, + "loss": 2.0795, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.5189897418022156, + "learning_rate": 0.001142970421815904, + "loss": 2.0544, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.5024029016494751, + "learning_rate": 0.0011408298425894226, + "loss": 2.0678, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5455828905105591, + "learning_rate": 0.0011386848839237186, + "loss": 2.0691, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.5488808155059814, + "learning_rate": 0.0011365355698541005, + "loss": 2.0687, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.5750316381454468, + "learning_rate": 0.0011343819244646824, + "loss": 2.0635, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.5667594075202942, + "learning_rate": 0.001132223971888112, + "loss": 2.0642, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.5181024074554443, + "learning_rate": 0.0011300617363053024, + "loss": 2.0559, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.49797365069389343, + "learning_rate": 0.0011278952419451586, + "loss": 2.0731, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.5067893862724304, + "learning_rate": 0.0011257245130843077, + "loss": 2.0638, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.46275272965431213, + "learning_rate": 0.0011235495740468265, + "loss": 2.0591, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.5135585069656372, + "learning_rate": 0.0011213704492039694, + "loss": 2.0396, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.5045833587646484, + "learning_rate": 0.001119187162973894, + "loss": 2.0642, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.48112112283706665, + "learning_rate": 0.001116999739821388, + "loss": 2.0522, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.5064218640327454, + "learning_rate": 0.0011148082042575968, + "loss": 2.0647, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.5344030857086182, + "learning_rate": 0.0011126125808397461, + "loss": 2.068, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.4658438265323639, + "learning_rate": 0.0011104128941708683, + "loss": 2.0499, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.4954417943954468, + "learning_rate": 0.001108209168899527, + "loss": 2.061, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.5611436367034912, + "learning_rate": 0.0011060014297195396, + "loss": 2.0624, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.4981188476085663, + "learning_rate": 0.0011037897013697015, + "loss": 2.0656, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.4893597662448883, + "learning_rate": 0.0011015740086335092, + "loss": 2.0522, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.49495676159858704, + "learning_rate": 0.0010993543763388814, + "loss": 2.0566, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.565788209438324, + "learning_rate": 0.0010971308293578814, + "loss": 2.0472, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.5434722900390625, + "learning_rate": 0.0010949033926064397, + "loss": 2.0432, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.48084282875061035, + "learning_rate": 0.0010926720910440725, + "loss": 2.052, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.5268546342849731, + "learning_rate": 0.001090436949673603, + "loss": 2.053, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.5288143754005432, + "learning_rate": 0.0010881979935408815, + "loss": 2.0483, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.5594649314880371, + "learning_rate": 0.0010859552477345052, + "loss": 2.06, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.4980999827384949, + "learning_rate": 0.001083708737385536, + "loss": 2.0506, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5202379822731018, + "learning_rate": 0.0010814584876672187, + "loss": 2.0361, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.5064600706100464, + "learning_rate": 0.0010792045237947008, + "loss": 2.049, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.5036027431488037, + "learning_rate": 0.0010769468710247478, + "loss": 2.0401, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.5233660936355591, + "learning_rate": 0.0010746855546554612, + "loss": 2.0409, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.4982072114944458, + "learning_rate": 0.0010724206000259954, + "loss": 2.0317, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.47707876563072205, + "learning_rate": 0.0010701520325162727, + "loss": 2.0492, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.533072829246521, + "learning_rate": 0.0010678798775467001, + "loss": 2.0543, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.5186434388160706, + "learning_rate": 0.0010656041605778832, + "loss": 2.0516, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.49277275800704956, + "learning_rate": 0.001063324907110342, + "loss": 2.0304, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.48430585861206055, + "learning_rate": 0.0010610421426842241, + "loss": 2.0502, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.46386954188346863, + "learning_rate": 0.00105875589287902, + "loss": 2.041, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.5062665939331055, + "learning_rate": 0.0010564661833132752, + "loss": 2.0525, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.5223333835601807, + "learning_rate": 0.001054173039644303, + "loss": 2.0496, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.5330173969268799, + "learning_rate": 0.0010518764875678981, + "loss": 2.0381, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.4965090751647949, + "learning_rate": 0.001049576552818048, + "loss": 2.0352, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.4718337655067444, + "learning_rate": 0.0010472732611666448, + "loss": 2.0343, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.5628668069839478, + "learning_rate": 0.0010449666384231954, + "loss": 2.037, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.5800204277038574, + "learning_rate": 0.0010426567104345346, + "loss": 2.048, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.5274225473403931, + "learning_rate": 0.0010403435030845332, + "loss": 2.0279, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.5182410478591919, + "learning_rate": 0.0010380270422938093, + "loss": 2.0315, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.5382784605026245, + "learning_rate": 0.0010357073540194362, + "loss": 2.032, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.48896029591560364, + "learning_rate": 0.001033384464254655, + "loss": 2.0355, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.4995596408843994, + "learning_rate": 0.001031058399028579, + "loss": 2.0339, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.48797330260276794, + "learning_rate": 0.001028729184405905, + "loss": 2.0344, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.4732353389263153, + "learning_rate": 0.0010263968464866201, + "loss": 2.0293, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.5201267600059509, + "learning_rate": 0.0010240614114057098, + "loss": 2.0276, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.5523491501808167, + "learning_rate": 0.001021722905332864, + "loss": 2.0286, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.4828028380870819, + "learning_rate": 0.0010193813544721855, + "loss": 2.0414, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.5166120529174805, + "learning_rate": 0.001017036785061895, + "loss": 2.0456, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.4874964952468872, + "learning_rate": 0.0010146892233740376, + "loss": 2.0274, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.5207704305648804, + "learning_rate": 0.0010123386957141883, + "loss": 2.0092, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.5510597229003906, + "learning_rate": 0.0010099852284211573, + "loss": 2.0274, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.4930988848209381, + "learning_rate": 0.0010076288478666944, + "loss": 2.0252, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.4932200312614441, + "learning_rate": 0.0010052695804551946, + "loss": 2.0287, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.4679766297340393, + "learning_rate": 0.0010029074526234014, + "loss": 2.0228, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.46094825863838196, + "learning_rate": 0.0010005424908401104, + "loss": 2.0362, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.47414180636405945, + "learning_rate": 0.0009981747216058728, + "loss": 2.0194, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.5396379232406616, + "learning_rate": 0.0009958041714526998, + "loss": 2.0245, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.47973960638046265, + "learning_rate": 0.0009934308669437627, + "loss": 2.0292, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.5576223134994507, + "learning_rate": 0.0009910548346730972, + "loss": 2.0215, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5662996172904968, + "learning_rate": 0.0009886761012653062, + "loss": 2.0091, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.556354820728302, + "learning_rate": 0.000986294693375258, + "loss": 2.011, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.46897587180137634, + "learning_rate": 0.000983910637687791, + "loss": 2.0319, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.4735710322856903, + "learning_rate": 0.0009815239609174138, + "loss": 2.0148, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.44942522048950195, + "learning_rate": 0.0009791346898080043, + "loss": 2.0284, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.5182238817214966, + "learning_rate": 0.0009767428511325122, + "loss": 2.0122, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.5592268705368042, + "learning_rate": 0.0009743484716926576, + "loss": 2.0062, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.5069847702980042, + "learning_rate": 0.0009719515783186319, + "loss": 2.0086, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.516141414642334, + "learning_rate": 0.0009695521978687951, + "loss": 2.0055, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.45703059434890747, + "learning_rate": 0.0009671503572293767, + "loss": 2.0101, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.5356701612472534, + "learning_rate": 0.0009647460833141742, + "loss": 2.0061, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.4885237514972687, + "learning_rate": 0.0009623394030642507, + "loss": 2.0118, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.5552399158477783, + "learning_rate": 0.0009599303434476334, + "loss": 2.0066, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.5361853241920471, + "learning_rate": 0.0009575189314590118, + "loss": 2.013, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.4704023003578186, + "learning_rate": 0.0009551051941194346, + "loss": 2.0108, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.5325944423675537, + "learning_rate": 0.0009526891584760071, + "loss": 2.0001, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.47970035672187805, + "learning_rate": 0.0009502708516015889, + "loss": 2.0125, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.4554794430732727, + "learning_rate": 0.0009478503005944888, + "loss": 2.0027, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.5881648063659668, + "learning_rate": 0.0009454275325781632, + "loss": 2.0184, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.544071614742279, + "learning_rate": 0.0009430025747009104, + "loss": 2.0048, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.4962325990200043, + "learning_rate": 0.0009405754541355677, + "loss": 2.0083, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.48236700892448425, + "learning_rate": 0.0009381461980792061, + "loss": 1.9993, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.4837944209575653, + "learning_rate": 0.0009357148337528256, + "loss": 2.0082, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.46407032012939453, + "learning_rate": 0.0009332813884010511, + "loss": 2.0123, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.45752060413360596, + "learning_rate": 0.0009308458892918259, + "loss": 2.0142, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.4873388707637787, + "learning_rate": 0.0009284083637161064, + "loss": 2.007, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.4802560806274414, + "learning_rate": 0.0009259688389875574, + "loss": 2.0117, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.49816930294036865, + "learning_rate": 0.0009235273424422442, + "loss": 2.0057, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.4876519739627838, + "learning_rate": 0.0009210839014383282, + "loss": 2.0028, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.5174896121025085, + "learning_rate": 0.0009186385433557584, + "loss": 2.0006, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.5071644186973572, + "learning_rate": 0.0009161912955959668, + "loss": 2.0021, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.4735012948513031, + "learning_rate": 0.000913742185581559, + "loss": 1.9966, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.49999234080314636, + "learning_rate": 0.0009112912407560086, + "loss": 1.9972, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.47958797216415405, + "learning_rate": 0.0009088384885833495, + "loss": 2.0063, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.5233768820762634, + "learning_rate": 0.000906383956547867, + "loss": 2.0007, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.4979265034198761, + "learning_rate": 0.0009039276721537915, + "loss": 2.0006, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.47890499234199524, + "learning_rate": 0.0009014696629249886, + "loss": 1.9946, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.5050597190856934, + "learning_rate": 0.0008990099564046522, + "loss": 2.0014, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.4564422369003296, + "learning_rate": 0.0008965485801549946, + "loss": 1.9964, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.4810010492801666, + "learning_rate": 0.000894085561756939, + "loss": 1.9862, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.4638417065143585, + "learning_rate": 0.0008916209288098088, + "loss": 1.9953, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.5182076692581177, + "learning_rate": 0.0008891547089310198, + "loss": 1.9925, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.5522046685218811, + "learning_rate": 0.0008866869297557699, + "loss": 1.9949, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.5073745846748352, + "learning_rate": 0.0008842176189367299, + "loss": 1.9971, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.5252444744110107, + "learning_rate": 0.0008817468041437329, + "loss": 2.004, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.4783976376056671, + "learning_rate": 0.0008792745130634654, + "loss": 1.989, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.4764023721218109, + "learning_rate": 0.0008768007733991561, + "loss": 1.9855, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.5002824664115906, + "learning_rate": 0.0008743256128702658, + "loss": 2.0001, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.5174551010131836, + "learning_rate": 0.0008718490592121768, + "loss": 1.9977, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5227663516998291, + "learning_rate": 0.0008693711401758822, + "loss": 1.9954, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.5044324994087219, + "learning_rate": 0.0008668918835276747, + "loss": 1.9855, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.5458585619926453, + "learning_rate": 0.0008644113170488355, + "loss": 1.9963, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.49686044454574585, + "learning_rate": 0.0008619294685353235, + "loss": 1.986, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.4756613075733185, + "learning_rate": 0.0008594463657974627, + "loss": 1.9907, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.4782853424549103, + "learning_rate": 0.0008569620366596322, + "loss": 2.0023, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.46658244729042053, + "learning_rate": 0.000854476508959953, + "loss": 1.9793, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.5088354349136353, + "learning_rate": 0.0008519898105499762, + "loss": 1.9839, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5009850859642029, + "learning_rate": 0.0008495019692943721, + "loss": 1.9798, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.5240547060966492, + "learning_rate": 0.0008470130130706166, + "loss": 1.9866, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.4647315740585327, + "learning_rate": 0.0008445229697686795, + "loss": 1.9992, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.4898279309272766, + "learning_rate": 0.0008420318672907119, + "loss": 1.9828, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.49553897976875305, + "learning_rate": 0.0008395397335507334, + "loss": 1.9853, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.5636717677116394, + "learning_rate": 0.0008370465964743196, + "loss": 1.9851, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.5341678261756897, + "learning_rate": 0.0008345524839982886, + "loss": 1.9915, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.45431244373321533, + "learning_rate": 0.0008320574240703886, + "loss": 1.977, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.4780481457710266, + "learning_rate": 0.0008295614446489842, + "loss": 1.9886, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.5069178938865662, + "learning_rate": 0.0008270645737027441, + "loss": 1.9929, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.46919533610343933, + "learning_rate": 0.0008245668392103259, + "loss": 1.978, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.45232242345809937, + "learning_rate": 0.0008220682691600645, + "loss": 1.9765, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.47807392477989197, + "learning_rate": 0.0008195688915496571, + "loss": 1.9819, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.4892328381538391, + "learning_rate": 0.0008170687343858506, + "loss": 1.967, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.5366203784942627, + "learning_rate": 0.0008145678256841265, + "loss": 1.9896, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.5124426484107971, + "learning_rate": 0.0008120661934683879, + "loss": 1.9847, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.504324197769165, + "learning_rate": 0.0008095638657706456, + "loss": 1.9823, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.47886189818382263, + "learning_rate": 0.000807060870630703, + "loss": 1.9749, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.5149202942848206, + "learning_rate": 0.000804557236095843, + "loss": 1.952, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.4734962582588196, + "learning_rate": 0.0008020529902205129, + "loss": 1.9798, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.4645823836326599, + "learning_rate": 0.0007995481610660108, + "loss": 1.9616, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.6023940443992615, + "learning_rate": 0.0007970427767001702, + "loss": 1.9779, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.4923885762691498, + "learning_rate": 0.0007945368651970464, + "loss": 1.9781, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.49328818917274475, + "learning_rate": 0.0007920304546366013, + "loss": 1.9597, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.49655336141586304, + "learning_rate": 0.000789523573104389, + "loss": 1.9784, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.510601818561554, + "learning_rate": 0.0007870162486912414, + "loss": 1.9781, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.5418944358825684, + "learning_rate": 0.0007845085094929527, + "loss": 1.9815, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.47556352615356445, + "learning_rate": 0.0007820003836099649, + "loss": 1.9711, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.47961628437042236, + "learning_rate": 0.0007794918991470537, + "loss": 1.9703, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5001896023750305, + "learning_rate": 0.0007769830842130119, + "loss": 1.9635, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.48832863569259644, + "learning_rate": 0.0007744739669203361, + "loss": 1.9656, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.4763779640197754, + "learning_rate": 0.0007719645753849108, + "loss": 1.9759, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.47631484270095825, + "learning_rate": 0.0007694549377256932, + "loss": 1.9744, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.48810771107673645, + "learning_rate": 0.0007669450820643987, + "loss": 1.9692, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.5503652095794678, + "learning_rate": 0.0007644350365251855, + "loss": 1.9687, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.5370628833770752, + "learning_rate": 0.0007619248292343399, + "loss": 1.9663, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.4779494106769562, + "learning_rate": 0.0007594144883199599, + "loss": 1.985, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.4937244951725006, + "learning_rate": 0.0007569040419116413, + "loss": 1.968, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.519016444683075, + "learning_rate": 0.000754393518140162, + "loss": 1.9652, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.5004549622535706, + "learning_rate": 0.0007518829451371665, + "loss": 1.9757, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.481294721364975, + "learning_rate": 0.0007493723510348516, + "loss": 1.9628, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.5113863348960876, + "learning_rate": 0.0007468617639656496, + "loss": 1.9665, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.4526672959327698, + "learning_rate": 0.0007443512120619144, + "loss": 1.964, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.49948233366012573, + "learning_rate": 0.0007418407234556067, + "loss": 1.9838, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.4929894804954529, + "learning_rate": 0.0007393303262779767, + "loss": 1.9664, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.4841291308403015, + "learning_rate": 0.0007368200486592507, + "loss": 1.9604, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.554615318775177, + "learning_rate": 0.0007343099187283149, + "loss": 1.9764, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.5232459902763367, + "learning_rate": 0.0007317999646124011, + "loss": 1.9575, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.5188080668449402, + "learning_rate": 0.0007292902144367704, + "loss": 1.9522, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.49040526151657104, + "learning_rate": 0.0007267806963243995, + "loss": 1.9739, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.4828893840312958, + "learning_rate": 0.0007242714383956639, + "loss": 1.961, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.494418203830719, + "learning_rate": 0.000721762468768024, + "loss": 1.9679, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.50196373462677, + "learning_rate": 0.0007192538155557094, + "loss": 1.9647, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.5046610236167908, + "learning_rate": 0.0007167455068694046, + "loss": 1.9556, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.4985102415084839, + "learning_rate": 0.000714237570815933, + "loss": 1.962, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.5029101967811584, + "learning_rate": 0.0007117300354979423, + "loss": 1.9584, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.47676149010658264, + "learning_rate": 0.000709222929013591, + "loss": 1.9719, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.49149250984191895, + "learning_rate": 0.0007067162794562309, + "loss": 1.9493, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.4784083664417267, + "learning_rate": 0.0007042101149140943, + "loss": 1.9625, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.5403769612312317, + "learning_rate": 0.0007017044634699787, + "loss": 1.9534, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.47839662432670593, + "learning_rate": 0.0006991993532009319, + "loss": 1.9515, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.5058487057685852, + "learning_rate": 0.0006966948121779378, + "loss": 1.967, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.49358004331588745, + "learning_rate": 0.000694190868465601, + "loss": 1.9564, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.4729559123516083, + "learning_rate": 0.0006916875501218343, + "loss": 1.9484, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.4745205342769623, + "learning_rate": 0.0006891848851975416, + "loss": 1.9392, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.4787537157535553, + "learning_rate": 0.0006866829017363054, + "loss": 1.9612, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.47832751274108887, + "learning_rate": 0.0006841816277740722, + "loss": 1.9467, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.4823712408542633, + "learning_rate": 0.0006816810913388379, + "loss": 1.9629, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.48260727524757385, + "learning_rate": 0.0006791813204503342, + "loss": 1.9539, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.47602495551109314, + "learning_rate": 0.0006766823431197147, + "loss": 1.9662, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.47686123847961426, + "learning_rate": 0.0006741841873492406, + "loss": 1.9548, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.5278387665748596, + "learning_rate": 0.0006716868811319671, + "loss": 1.9383, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.5025494694709778, + "learning_rate": 0.0006691904524514297, + "loss": 1.9511, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.502526581287384, + "learning_rate": 0.0006666949292813306, + "loss": 1.9496, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5238314270973206, + "learning_rate": 0.0006642003395852258, + "loss": 1.9485, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.4985741972923279, + "learning_rate": 0.0006617067113162103, + "loss": 1.9628, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5345995426177979, + "learning_rate": 0.0006592140724166073, + "loss": 1.9619, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.4875504970550537, + "learning_rate": 0.0006567224508176523, + "loss": 1.9515, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.47637832164764404, + "learning_rate": 0.0006542318744391821, + "loss": 1.9527, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.5713498592376709, + "learning_rate": 0.0006517423711893209, + "loss": 1.9637, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.5044013261795044, + "learning_rate": 0.0006492539689641685, + "loss": 1.9448, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.4394160509109497, + "learning_rate": 0.0006467666956474865, + "loss": 1.9544, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.4983683228492737, + "learning_rate": 0.0006442805791103873, + "loss": 1.9348, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.5036283731460571, + "learning_rate": 0.0006417956472110205, + "loss": 1.9497, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.5341352820396423, + "learning_rate": 0.0006393119277942614, + "loss": 1.9484, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.4903087019920349, + "learning_rate": 0.0006368294486913987, + "loss": 1.9501, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.4678954482078552, + "learning_rate": 0.0006343482377198232, + "loss": 1.9447, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.49631252884864807, + "learning_rate": 0.0006318683226827151, + "loss": 1.9499, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.4718645513057709, + "learning_rate": 0.0006293897313687331, + "loss": 1.9549, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.4794149696826935, + "learning_rate": 0.0006269124915517037, + "loss": 1.944, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.46382299065589905, + "learning_rate": 0.0006244366309903084, + "loss": 1.9433, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.4883486032485962, + "learning_rate": 0.0006219621774277737, + "loss": 1.947, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.49683550000190735, + "learning_rate": 0.00061948915859156, + "loss": 1.9398, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.4676542282104492, + "learning_rate": 0.0006170176021930509, + "loss": 1.9366, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.49859514832496643, + "learning_rate": 0.0006145475359272424, + "loss": 1.948, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.5039767026901245, + "learning_rate": 0.0006120789874724336, + "loss": 1.9444, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.5538092255592346, + "learning_rate": 0.0006096119844899151, + "loss": 1.9324, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.511517345905304, + "learning_rate": 0.0006071465546236601, + "loss": 1.9433, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.51280677318573, + "learning_rate": 0.0006046827255000135, + "loss": 1.9467, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.4765148460865021, + "learning_rate": 0.0006022205247273845, + "loss": 1.936, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.4687923491001129, + "learning_rate": 0.0005997599798959343, + "loss": 1.9347, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.47096237540245056, + "learning_rate": 0.0005973011185772694, + "loss": 1.9337, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.4991110861301422, + "learning_rate": 0.0005948439683241318, + "loss": 1.939, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.47963979840278625, + "learning_rate": 0.0005923885566700896, + "loss": 1.9336, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.4666420817375183, + "learning_rate": 0.0005899349111292293, + "loss": 1.9314, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.4761965572834015, + "learning_rate": 0.0005874830591958474, + "loss": 1.927, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.4942338168621063, + "learning_rate": 0.000585033028344142, + "loss": 1.9384, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.4762774109840393, + "learning_rate": 0.0005825848460279048, + "loss": 1.936, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.47518807649612427, + "learning_rate": 0.0005801385396802146, + "loss": 1.937, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.48199406266212463, + "learning_rate": 0.0005776941367131282, + "loss": 1.9404, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.5036256313323975, + "learning_rate": 0.0005752516645173745, + "loss": 1.9368, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.5001112818717957, + "learning_rate": 0.0005728111504620472, + "loss": 1.9354, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.5477650761604309, + "learning_rate": 0.0005703726218942976, + "loss": 1.9427, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.46100470423698425, + "learning_rate": 0.0005679361061390295, + "loss": 1.9303, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.48838087916374207, + "learning_rate": 0.0005655016304985908, + "loss": 1.9282, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.46484336256980896, + "learning_rate": 0.0005630692222524709, + "loss": 1.9231, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.487308144569397, + "learning_rate": 0.0005606389086569911, + "loss": 1.9412, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.46205934882164, + "learning_rate": 0.0005582107169450023, + "loss": 1.9488, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.4625298082828522, + "learning_rate": 0.0005557846743255783, + "loss": 1.926, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.4611247479915619, + "learning_rate": 0.0005533608079837109, + "loss": 1.9252, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.48952651023864746, + "learning_rate": 0.0005509391450800061, + "loss": 1.9312, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.4738491177558899, + "learning_rate": 0.0005485197127503795, + "loss": 1.9287, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.4834357500076294, + "learning_rate": 0.0005461025381057516, + "loss": 1.9321, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.492986261844635, + "learning_rate": 0.0005436876482317444, + "loss": 1.941, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.497835248708725, + "learning_rate": 0.0005412750701883782, + "loss": 1.9328, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.5721185803413391, + "learning_rate": 0.0005388648310097682, + "loss": 1.9428, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.4794788956642151, + "learning_rate": 0.000536456957703821, + "loss": 1.9397, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.4782160818576813, + "learning_rate": 0.0005340514772519324, + "loss": 1.9319, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.483834832906723, + "learning_rate": 0.0005316484166086863, + "loss": 1.9407, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.5006335973739624, + "learning_rate": 0.00052924780270155, + "loss": 1.9406, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.48369693756103516, + "learning_rate": 0.0005268496624305747, + "loss": 1.9203, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.49795201420783997, + "learning_rate": 0.0005244540226680931, + "loss": 1.9225, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.5132947564125061, + "learning_rate": 0.0005220609102584185, + "loss": 1.9275, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5041916370391846, + "learning_rate": 0.0005196703520175437, + "loss": 1.9254, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.5099890232086182, + "learning_rate": 0.0005172823747328415, + "loss": 1.924, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.5221855044364929, + "learning_rate": 0.0005148970051627632, + "loss": 1.9272, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.4914659261703491, + "learning_rate": 0.0005125142700365394, + "loss": 1.8996, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.47027772665023804, + "learning_rate": 0.000510134196053881, + "loss": 1.9116, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.5114367008209229, + "learning_rate": 0.0005077568098846789, + "loss": 1.9156, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.49772781133651733, + "learning_rate": 0.000505382138168706, + "loss": 1.9204, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.4637357294559479, + "learning_rate": 0.0005030102075153181, + "loss": 1.9164, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.4647553265094757, + "learning_rate": 0.0005006410445031569, + "loss": 1.9234, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.44883203506469727, + "learning_rate": 0.0004982746756798507, + "loss": 1.9273, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.49049991369247437, + "learning_rate": 0.0004959111275617174, + "loss": 1.9387, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.5016384720802307, + "learning_rate": 0.0004935504266334677, + "loss": 1.9165, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.460738867521286, + "learning_rate": 0.0004911925993479085, + "loss": 1.9355, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.4522412419319153, + "learning_rate": 0.0004888376721256456, + "loss": 1.9372, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.47224026918411255, + "learning_rate": 0.00048648567135478805, + "loss": 1.9252, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5033979415893555, + "learning_rate": 0.0004841366233906538, + "loss": 1.9162, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.48849156498908997, + "learning_rate": 0.0004817905545554717, + "loss": 1.9294, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.5000433921813965, + "learning_rate": 0.00047944749113808884, + "loss": 1.9156, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.48403510451316833, + "learning_rate": 0.00047710745939367474, + "loss": 1.92, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.5047129988670349, + "learning_rate": 0.0004747704855434278, + "loss": 1.9148, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.5028907060623169, + "learning_rate": 0.0004724365957742809, + "loss": 1.9129, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.4616571068763733, + "learning_rate": 0.00047010581623860883, + "loss": 1.9169, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.48701074719429016, + "learning_rate": 0.0004677781730539342, + "loss": 1.9258, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.5195056200027466, + "learning_rate": 0.0004654536923026356, + "loss": 1.9104, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.471174031496048, + "learning_rate": 0.00046313240003165466, + "loss": 1.9146, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.46550247073173523, + "learning_rate": 0.0004608143222522048, + "loss": 1.9169, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.4861999750137329, + "learning_rate": 0.0004584994849394795, + "loss": 1.9026, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.46461454033851624, + "learning_rate": 0.0004561879140323607, + "loss": 1.9203, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.4921039938926697, + "learning_rate": 0.0004538796354331298, + "loss": 1.9272, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.5279014706611633, + "learning_rate": 0.0004515746750071754, + "loss": 1.9177, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.5009140968322754, + "learning_rate": 0.0004492730585827046, + "loss": 1.9195, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.4612388610839844, + "learning_rate": 0.0004469748119504529, + "loss": 1.9091, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.4665760397911072, + "learning_rate": 0.0004446799608633964, + "loss": 1.9241, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.46882158517837524, + "learning_rate": 0.00044238853103646154, + "loss": 1.902, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.49064722657203674, + "learning_rate": 0.00044010054814623925, + "loss": 1.9185, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.5043464303016663, + "learning_rate": 0.0004378160378306944, + "loss": 1.9107, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.5139796137809753, + "learning_rate": 0.00043553502568888095, + "loss": 1.9108, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.5072844624519348, + "learning_rate": 0.0004332575372806534, + "loss": 1.9116, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.48266708850860596, + "learning_rate": 0.00043098359812638145, + "loss": 1.9042, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.47719070315361023, + "learning_rate": 0.00042871323370666383, + "loss": 1.9196, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.5088273286819458, + "learning_rate": 0.0004264464694620421, + "loss": 1.9116, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.5076519250869751, + "learning_rate": 0.000424183330792717, + "loss": 1.9098, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.5073622465133667, + "learning_rate": 0.0004219238430582621, + "loss": 1.9136, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.506477952003479, + "learning_rate": 0.0004196680315773408, + "loss": 1.9194, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.4859670102596283, + "learning_rate": 0.00041741592162742214, + "loss": 1.913, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.4837457537651062, + "learning_rate": 0.0004151675384444978, + "loss": 1.893, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.4888227880001068, + "learning_rate": 0.00041292290722279914, + "loss": 1.9135, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.4768291115760803, + "learning_rate": 0.00041068205311451517, + "loss": 1.9171, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.4558783173561096, + "learning_rate": 0.00040844500122951026, + "loss": 1.9097, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.4835197329521179, + "learning_rate": 0.00040621177663504313, + "loss": 1.9045, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.4831680655479431, + "learning_rate": 0.00040398240435548583, + "loss": 1.91, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.47730427980422974, + "learning_rate": 0.00040175690937204324, + "loss": 1.9024, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.5078945159912109, + "learning_rate": 0.00039953531662247343, + "loss": 1.909, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.4893501400947571, + "learning_rate": 0.0003973176510008075, + "loss": 1.9144, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.4932333528995514, + "learning_rate": 0.00039510393735707233, + "loss": 1.9057, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.4865145981311798, + "learning_rate": 0.00039289420049700986, + "loss": 1.9143, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.5083089470863342, + "learning_rate": 0.0003906884651818006, + "loss": 1.9143, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5348116755485535, + "learning_rate": 0.00038848675612778577, + "loss": 1.9087, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.5018810033798218, + "learning_rate": 0.00038628909800619046, + "loss": 1.9019, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.4681297540664673, + "learning_rate": 0.0003840955154428467, + "loss": 1.9033, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.4845386743545532, + "learning_rate": 0.00038190603301791864, + "loss": 1.8988, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.5466925501823425, + "learning_rate": 0.0003797206752656258, + "loss": 1.9077, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.5056781768798828, + "learning_rate": 0.0003775394666739688, + "loss": 1.9027, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.4954226016998291, + "learning_rate": 0.00037536243168445507, + "loss": 1.9035, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.4858235716819763, + "learning_rate": 0.0003731895946918246, + "loss": 1.9019, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.4740090072154999, + "learning_rate": 0.0003710209800437769, + "loss": 1.8885, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.4780462086200714, + "learning_rate": 0.00036885661204069767, + "loss": 1.8908, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.5019422173500061, + "learning_rate": 0.0003666965149353878, + "loss": 1.9064, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.47079998254776, + "learning_rate": 0.0003645407129327898, + "loss": 1.9038, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.5257681012153625, + "learning_rate": 0.00036238923018971783, + "loss": 1.9031, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.5053163170814514, + "learning_rate": 0.0003602420908145865, + "loss": 1.8942, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.4894912838935852, + "learning_rate": 0.00035809931886714093, + "loss": 1.903, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.4608984589576721, + "learning_rate": 0.00035596093835818683, + "loss": 1.8846, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.461535781621933, + "learning_rate": 0.00035382697324932245, + "loss": 1.8917, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.4918258786201477, + "learning_rate": 0.00035169744745266866, + "loss": 1.9029, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.5054317116737366, + "learning_rate": 0.0003495723848306017, + "loss": 1.8986, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.5441283583641052, + "learning_rate": 0.0003474518091954859, + "loss": 1.9046, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.49584802985191345, + "learning_rate": 0.0003453357443094068, + "loss": 1.9001, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.4631282091140747, + "learning_rate": 0.00034322421388390456, + "loss": 1.9181, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.46850067377090454, + "learning_rate": 0.0003411172415797087, + "loss": 1.9134, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.45605820417404175, + "learning_rate": 0.0003390148510064727, + "loss": 1.8992, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.47595149278640747, + "learning_rate": 0.0003369170657225094, + "loss": 1.8939, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.4769817292690277, + "learning_rate": 0.0003348239092345275, + "loss": 1.9, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.4761356711387634, + "learning_rate": 0.0003327354049973672, + "loss": 1.8971, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.49854907393455505, + "learning_rate": 0.00033065157641373847, + "loss": 1.8994, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.49468928575515747, + "learning_rate": 0.0003285724468339576, + "loss": 1.8938, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.46492886543273926, + "learning_rate": 0.00032649803955568755, + "loss": 1.8915, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.47800102829933167, + "learning_rate": 0.00032442837782367434, + "loss": 1.9004, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.474301278591156, + "learning_rate": 0.0003223634848294883, + "loss": 1.9025, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.4789976477622986, + "learning_rate": 0.00032030338371126374, + "loss": 1.8919, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.4799453020095825, + "learning_rate": 0.0003182480975534395, + "loss": 1.8964, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.4964424669742584, + "learning_rate": 0.00031619764938650057, + "loss": 1.8892, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.5083863139152527, + "learning_rate": 0.0003141520621867197, + "loss": 1.8983, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.49469172954559326, + "learning_rate": 0.00031211135887590074, + "loss": 1.8905, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.4679158329963684, + "learning_rate": 0.0003100755623211205, + "loss": 1.8935, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.4510248899459839, + "learning_rate": 0.0003080446953344735, + "loss": 1.8887, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.48076680302619934, + "learning_rate": 0.00030601878067281575, + "loss": 1.8941, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.481167733669281, + "learning_rate": 0.00030399784103751044, + "loss": 1.8895, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.4712788164615631, + "learning_rate": 0.000301981899074173, + "loss": 1.8869, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.47437313199043274, + "learning_rate": 0.0002999709773724171, + "loss": 1.8884, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.48809316754341125, + "learning_rate": 0.00029796509846560294, + "loss": 1.8809, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.48144295811653137, + "learning_rate": 0.0002959642848305828, + "loss": 1.8932, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.4858327805995941, + "learning_rate": 0.00029396855888745045, + "loss": 1.8955, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.48790591955184937, + "learning_rate": 0.0002919779429992895, + "loss": 1.8964, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.494166761636734, + "learning_rate": 0.0002899924594719231, + "loss": 1.8911, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.46700236201286316, + "learning_rate": 0.00028801213055366335, + "loss": 1.896, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.4802832305431366, + "learning_rate": 0.00028603697843506315, + "loss": 1.8919, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.5023938417434692, + "learning_rate": 0.0002840670252486662, + "loss": 1.9007, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.4910270571708679, + "learning_rate": 0.00028210229306876, + "loss": 1.883, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.4667412042617798, + "learning_rate": 0.0002801428039111279, + "loss": 1.887, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5093761086463928, + "learning_rate": 0.00027818857973280274, + "loss": 1.8899, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.48690518736839294, + "learning_rate": 0.0002762396424318206, + "loss": 1.8855, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.4917588233947754, + "learning_rate": 0.00027429601384697526, + "loss": 1.8758, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.4619082808494568, + "learning_rate": 0.00027235771575757466, + "loss": 1.8843, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.4849833548069, + "learning_rate": 0.0002704247698831951, + "loss": 1.885, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.47593986988067627, + "learning_rate": 0.0002684971978834389, + "loss": 1.8794, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.4589902460575104, + "learning_rate": 0.0002665750213576914, + "loss": 1.8888, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.46731802821159363, + "learning_rate": 0.0002646582618448794, + "loss": 1.8771, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.4565731883049011, + "learning_rate": 0.00026274694082322896, + "loss": 1.8818, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.48151901364326477, + "learning_rate": 0.0002608410797100255, + "loss": 1.9032, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.4751861095428467, + "learning_rate": 0.0002589406998613733, + "loss": 1.8835, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.46046382188796997, + "learning_rate": 0.0002570458225719567, + "loss": 1.8927, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.4757733941078186, + "learning_rate": 0.00025515646907480074, + "loss": 1.8932, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.46460145711898804, + "learning_rate": 0.00025327266054103395, + "loss": 1.8922, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.4796941876411438, + "learning_rate": 0.0002513944180796509, + "loss": 1.8765, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.4735827147960663, + "learning_rate": 0.0002495217627372752, + "loss": 1.8749, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.4752208888530731, + "learning_rate": 0.0002476547154979248, + "loss": 1.8795, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.5021410584449768, + "learning_rate": 0.00024579329728277534, + "loss": 1.8803, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.48078906536102295, + "learning_rate": 0.00024393752894992708, + "loss": 1.8903, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.4738336205482483, + "learning_rate": 0.00024208743129417004, + "loss": 1.8615, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.46822360157966614, + "learning_rate": 0.00024024302504675206, + "loss": 1.8779, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.48871052265167236, + "learning_rate": 0.0002384043308751454, + "loss": 1.8802, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.5116748213768005, + "learning_rate": 0.00023657136938281653, + "loss": 1.8865, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.4813142418861389, + "learning_rate": 0.00023474416110899377, + "loss": 1.8748, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.4846961200237274, + "learning_rate": 0.00023292272652843807, + "loss": 1.8823, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.47607532143592834, + "learning_rate": 0.00023110708605121317, + "loss": 1.8839, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.47552675008773804, + "learning_rate": 0.00022929726002245728, + "loss": 1.8851, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.48758363723754883, + "learning_rate": 0.00022749326872215472, + "loss": 1.8801, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.4735240340232849, + "learning_rate": 0.0002256951323649087, + "loss": 1.8781, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.4920492172241211, + "learning_rate": 0.00022390287109971547, + "loss": 1.89, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.5136856436729431, + "learning_rate": 0.00022211650500973746, + "loss": 1.8884, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.48868700861930847, + "learning_rate": 0.0002203360541120789, + "loss": 1.8927, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.4715839624404907, + "learning_rate": 0.00021856153835756164, + "loss": 1.8704, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.4695172905921936, + "learning_rate": 0.00021679297763050104, + "loss": 1.8709, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.46847453713417053, + "learning_rate": 0.0002150303917484834, + "loss": 1.8824, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.4812149703502655, + "learning_rate": 0.0002132738004621446, + "loss": 1.8903, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.4652795195579529, + "learning_rate": 0.00021152322345494763, + "loss": 1.8687, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.4650001525878906, + "learning_rate": 0.00020977868034296253, + "loss": 1.8698, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.47473710775375366, + "learning_rate": 0.00020804019067464667, + "loss": 1.8765, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.45719483494758606, + "learning_rate": 0.00020630777393062575, + "loss": 1.8779, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.4736708700656891, + "learning_rate": 0.00020458144952347523, + "loss": 1.8936, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.49048683047294617, + "learning_rate": 0.00020286123679750314, + "loss": 1.8862, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.49034619331359863, + "learning_rate": 0.00020114715502853292, + "loss": 1.8814, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.48266786336898804, + "learning_rate": 0.0001994392234236878, + "loss": 1.8826, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.48702606558799744, + "learning_rate": 0.0001977374611211754, + "loss": 1.8772, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.4764260947704315, + "learning_rate": 0.00019604188719007313, + "loss": 1.8769, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.5010321140289307, + "learning_rate": 0.00019435252063011504, + "loss": 1.8784, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.48279356956481934, + "learning_rate": 0.0001926693803714779, + "loss": 1.8857, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.4771184027194977, + "learning_rate": 0.00019099248527457068, + "loss": 1.8919, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.4536754786968231, + "learning_rate": 0.0001893218541298216, + "loss": 1.8723, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.4668424725532532, + "learning_rate": 0.00018765750565746827, + "loss": 1.8695, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.4849199056625366, + "learning_rate": 0.00018599945850734812, + "loss": 1.874, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.46233031153678894, + "learning_rate": 0.00018434773125868895, + "loss": 1.8715, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.46371015906333923, + "learning_rate": 0.00018270234241990108, + "loss": 1.8688, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.46134862303733826, + "learning_rate": 0.0001810633104283698, + "loss": 1.8765, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.47534269094467163, + "learning_rate": 0.0001794306536502492, + "loss": 1.8765, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.46353772282600403, + "learning_rate": 0.0001778043903802555, + "loss": 1.8685, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.45776838064193726, + "learning_rate": 0.0001761845388414627, + "loss": 1.8754, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.48424655199050903, + "learning_rate": 0.00017457111718509831, + "loss": 1.8866, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.45922234654426575, + "learning_rate": 0.00017296414349033976, + "loss": 1.8645, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.4809322655200958, + "learning_rate": 0.00017136363576411172, + "loss": 1.8707, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.4656301736831665, + "learning_rate": 0.00016976961194088526, + "loss": 1.8583, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.4717573821544647, + "learning_rate": 0.00016818208988247533, + "loss": 1.8621, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.47907140851020813, + "learning_rate": 0.0001666010873778419, + "loss": 1.8642, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.4541815519332886, + "learning_rate": 0.00016502662214289, + "loss": 1.8775, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.47823548316955566, + "learning_rate": 0.00016345871182027124, + "loss": 1.8641, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.4536302983760834, + "learning_rate": 0.00016189737397918653, + "loss": 1.8719, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.4907922148704529, + "learning_rate": 0.0001603426261151884, + "loss": 1.88, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.473623663187027, + "learning_rate": 0.00015879448564998648, + "loss": 1.8734, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.46029412746429443, + "learning_rate": 0.0001572529699312501, + "loss": 1.88, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.45252102613449097, + "learning_rate": 0.0001557180962324158, + "loss": 1.8621, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.474971741437912, + "learning_rate": 0.00015418988175249282, + "loss": 1.8695, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.48034900426864624, + "learning_rate": 0.00015266834361587063, + "loss": 1.864, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.47006815671920776, + "learning_rate": 0.00015115349887212678, + "loss": 1.8585, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.47341981530189514, + "learning_rate": 0.00014964536449583657, + "loss": 1.8626, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.457152783870697, + "learning_rate": 0.00014814395738638195, + "loss": 1.8752, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.452556312084198, + "learning_rate": 0.00014664929436776278, + "loss": 1.8692, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.451565682888031, + "learning_rate": 0.00014516139218840788, + "loss": 1.8652, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.45017147064208984, + "learning_rate": 0.00014368026752098782, + "loss": 1.867, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.45841312408447266, + "learning_rate": 0.00014220593696222768, + "loss": 1.8784, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.43397000432014465, + "learning_rate": 0.00014073841703272092, + "loss": 1.8533, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.4854462742805481, + "learning_rate": 0.00013927772417674558, + "loss": 1.8731, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.4832543730735779, + "learning_rate": 0.00013782387476207788, + "loss": 1.8659, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.46510520577430725, + "learning_rate": 0.00013637688507981064, + "loss": 1.8791, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.45782485604286194, + "learning_rate": 0.0001349367713441697, + "loss": 1.8586, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.4561920166015625, + "learning_rate": 0.0001335035496923326, + "loss": 1.8676, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.500480055809021, + "learning_rate": 0.0001320772361842478, + "loss": 1.8796, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.45493337512016296, + "learning_rate": 0.00013065784680245442, + "loss": 1.8578, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.46516743302345276, + "learning_rate": 0.00012924539745190402, + "loss": 1.8645, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.46138864755630493, + "learning_rate": 0.0001278399039597809, + "loss": 1.8599, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.4848105311393738, + "learning_rate": 0.0001264413820753261, + "loss": 1.8653, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.4863336980342865, + "learning_rate": 0.00012504984746966003, + "loss": 1.8726, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.45759791135787964, + "learning_rate": 0.00012366531573560754, + "loss": 1.8731, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.4671829342842102, + "learning_rate": 0.00012228780238752264, + "loss": 1.8549, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.4864712953567505, + "learning_rate": 0.00012091732286111514, + "loss": 1.8621, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.4534526765346527, + "learning_rate": 0.00011955389251327737, + "loss": 1.8686, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.49574679136276245, + "learning_rate": 0.00011819752662191197, + "loss": 1.8602, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.4781345725059509, + "learning_rate": 0.00011684824038576115, + "loss": 1.8802, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.4684472680091858, + "learning_rate": 0.00011550604892423593, + "loss": 1.8658, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.43438807129859924, + "learning_rate": 0.0001141709672772471, + "loss": 1.8625, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.4549379050731659, + "learning_rate": 0.00011284301040503625, + "loss": 1.87, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.4725560247898102, + "learning_rate": 0.0001115221931880088, + "loss": 1.8628, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.450846403837204, + "learning_rate": 0.00011020853042656648, + "loss": 1.8478, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.44862425327301025, + "learning_rate": 0.000108902036840942, + "loss": 1.8596, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.46587443351745605, + "learning_rate": 0.00010760272707103389, + "loss": 1.8481, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.4508591890335083, + "learning_rate": 0.00010631061567624259, + "loss": 1.8589, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.4707389175891876, + "learning_rate": 0.00010502571713530706, + "loss": 1.8785, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.4480611979961395, + "learning_rate": 0.00010374804584614308, + "loss": 1.8753, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5090247392654419, + "learning_rate": 0.00010247761612568129, + "loss": 1.8573, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.47251445055007935, + "learning_rate": 0.0001012144422097069, + "loss": 1.8625, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.4499441683292389, + "learning_rate": 9.995853825270052e-05, + "loss": 1.8644, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.4492203891277313, + "learning_rate": 9.870991832767919e-05, + "loss": 1.8748, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.4552786946296692, + "learning_rate": 9.746859642603884e-05, + "loss": 1.8657, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.45052555203437805, + "learning_rate": 9.623458645739755e-05, + "loss": 1.8671, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.4534227252006531, + "learning_rate": 9.50079022494395e-05, + "loss": 1.8522, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.46490582823753357, + "learning_rate": 9.378855754776028e-05, + "loss": 1.8581, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.46886372566223145, + "learning_rate": 9.257656601571266e-05, + "loss": 1.8538, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.46401137113571167, + "learning_rate": 9.137194123425349e-05, + "loss": 1.8672, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.46582865715026855, + "learning_rate": 9.017469670179168e-05, + "loss": 1.8592, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.4611853063106537, + "learning_rate": 8.898484583403668e-05, + "loss": 1.8644, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.44908592104911804, + "learning_rate": 8.780240196384873e-05, + "loss": 1.8558, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.45596691966056824, + "learning_rate": 8.662737834108861e-05, + "loss": 1.8576, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.45172643661499023, + "learning_rate": 8.545978813246987e-05, + "loss": 1.8714, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.47122079133987427, + "learning_rate": 8.429964442141072e-05, + "loss": 1.8504, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.4482874572277069, + "learning_rate": 8.314696020788806e-05, + "loss": 1.8573, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.4615163803100586, + "learning_rate": 8.200174840829136e-05, + "loss": 1.8639, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.46466290950775146, + "learning_rate": 8.08640218552778e-05, + "loss": 1.8685, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.4637773633003235, + "learning_rate": 7.973379329762925e-05, + "loss": 1.8557, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.45464038848876953, + "learning_rate": 7.861107540010845e-05, + "loss": 1.8468, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.497221440076828, + "learning_rate": 7.749588074331762e-05, + "loss": 1.8668, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.4532962143421173, + "learning_rate": 7.63882218235575e-05, + "loss": 1.8579, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.4455423653125763, + "learning_rate": 7.528811105268699e-05, + "loss": 1.8577, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.4466083347797394, + "learning_rate": 7.41955607579845e-05, + "loss": 1.8542, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.48050856590270996, + "learning_rate": 7.311058318200969e-05, + "loss": 1.849, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.45858535170555115, + "learning_rate": 7.203319048246599e-05, + "loss": 1.8623, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.4469054937362671, + "learning_rate": 7.096339473206471e-05, + "loss": 1.8558, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.4736482799053192, + "learning_rate": 6.990120791838953e-05, + "loss": 1.864, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.46211355924606323, + "learning_rate": 6.884664194376233e-05, + "loss": 1.8579, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.4574001729488373, + "learning_rate": 6.779970862510989e-05, + "loss": 1.8671, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.4467184841632843, + "learning_rate": 6.676041969383107e-05, + "loss": 1.8587, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.4392499625682831, + "learning_rate": 6.572878679566605e-05, + "loss": 1.8673, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.44786831736564636, + "learning_rate": 6.470482149056509e-05, + "loss": 1.863, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.4359064996242523, + "learning_rate": 6.368853525255942e-05, + "loss": 1.861, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.4474690556526184, + "learning_rate": 6.267993946963249e-05, + "loss": 1.8745, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.4776245057582855, + "learning_rate": 6.167904544359265e-05, + "loss": 1.8657, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.4571479558944702, + "learning_rate": 6.068586438994617e-05, + "loss": 1.858, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.44361361861228943, + "learning_rate": 5.970040743777161e-05, + "loss": 1.8482, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.47817113995552063, + "learning_rate": 5.8722685629595454e-05, + "loss": 1.8477, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.45092493295669556, + "learning_rate": 5.7752709921267855e-05, + "loss": 1.8734, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.4524844288825989, + "learning_rate": 5.6790491181840294e-05, + "loss": 1.8493, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4503737688064575, + "learning_rate": 5.583604019344354e-05, + "loss": 1.8653, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.44201695919036865, + "learning_rate": 5.4889367651167007e-05, + "loss": 1.8639, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.43659183382987976, + "learning_rate": 5.3950484162938714e-05, + "loss": 1.847, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.44355642795562744, + "learning_rate": 5.3019400249406686e-05, + "loss": 1.8509, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.45004603266716003, + "learning_rate": 5.209612634382077e-05, + "loss": 1.8482, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.4667811691761017, + "learning_rate": 5.118067279191599e-05, + "loss": 1.8587, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.46385079622268677, + "learning_rate": 5.0273049851796205e-05, + "loss": 1.8649, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.4491734504699707, + "learning_rate": 4.9373267693819805e-05, + "loss": 1.8536, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.45244160294532776, + "learning_rate": 4.848133640048513e-05, + "loss": 1.8571, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.4751724600791931, + "learning_rate": 4.75972659663178e-05, + "loss": 1.8737, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.44800275564193726, + "learning_rate": 4.672106629775882e-05, + "loss": 1.8595, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.45462116599082947, + "learning_rate": 4.585274721305333e-05, + "loss": 1.8535, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.44465845823287964, + "learning_rate": 4.4992318442140575e-05, + "loss": 1.8491, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.46735644340515137, + "learning_rate": 4.413978962654508e-05, + "loss": 1.8618, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.45186448097229004, + "learning_rate": 4.3295170319268554e-05, + "loss": 1.8553, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.43844571709632874, + "learning_rate": 4.245846998468261e-05, + "loss": 1.858, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.44759395718574524, + "learning_rate": 4.16296979984232e-05, + "loss": 1.849, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.461922824382782, + "learning_rate": 4.080886364728506e-05, + "loss": 1.8586, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.4422442615032196, + "learning_rate": 3.999597612911793e-05, + "loss": 1.8459, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.44728294014930725, + "learning_rate": 3.9191044552723345e-05, + "loss": 1.8575, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.44582033157348633, + "learning_rate": 3.839407793775268e-05, + "loss": 1.8528, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.441847562789917, + "learning_rate": 3.760508521460584e-05, + "loss": 1.867, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.4534991681575775, + "learning_rate": 3.682407522433173e-05, + "loss": 1.8674, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.44629549980163574, + "learning_rate": 3.605105671852854e-05, + "loss": 1.8498, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.4361891746520996, + "learning_rate": 3.528603835924626e-05, + "loss": 1.8435, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.46068695187568665, + "learning_rate": 3.4529028718888935e-05, + "loss": 1.8654, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.44590893387794495, + "learning_rate": 3.378003628011938e-05, + "loss": 1.856, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.45561665296554565, + "learning_rate": 3.303906943576346e-05, + "loss": 1.8623, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.43820303678512573, + "learning_rate": 3.230613648871661e-05, + "loss": 1.8542, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.4502815008163452, + "learning_rate": 3.158124565185022e-05, + "loss": 1.854, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.44974997639656067, + "learning_rate": 3.086440504792026e-05, + "loss": 1.8493, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.4566609561443329, + "learning_rate": 3.015562270947553e-05, + "loss": 1.8612, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.45364320278167725, + "learning_rate": 2.945490657876837e-05, + "loss": 1.841, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.46975603699684143, + "learning_rate": 2.8762264507665113e-05, + "loss": 1.8479, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.4479108452796936, + "learning_rate": 2.807770425755829e-05, + "loss": 1.8501, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.4464396834373474, + "learning_rate": 2.7401233499279866e-05, + "loss": 1.8505, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.4381346106529236, + "learning_rate": 2.6732859813014987e-05, + "loss": 1.8646, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.4520086944103241, + "learning_rate": 2.607259068821721e-05, + "loss": 1.8594, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.4384848475456238, + "learning_rate": 2.5420433523524493e-05, + "loss": 1.8547, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.4509577751159668, + "learning_rate": 2.4776395626676162e-05, + "loss": 1.8552, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.45450013875961304, + "learning_rate": 2.414048421443141e-05, + "loss": 1.8454, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.4475404918193817, + "learning_rate": 2.3512706412488012e-05, + "loss": 1.8622, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.4558909833431244, + "learning_rate": 2.2893069255402993e-05, + "loss": 1.8461, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.442281574010849, + "learning_rate": 2.2281579686513176e-05, + "loss": 1.8487, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.4525986909866333, + "learning_rate": 2.1678244557857663e-05, + "loss": 1.8349, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4333018660545349, + "learning_rate": 2.1083070630101232e-05, + "loss": 1.8503, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.43366196751594543, + "learning_rate": 2.0496064572458395e-05, + "loss": 1.8589, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.4418904781341553, + "learning_rate": 1.991723296261863e-05, + "loss": 1.8519, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.44185906648635864, + "learning_rate": 1.9346582286672686e-05, + "loss": 1.8483, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.44344791769981384, + "learning_rate": 1.878411893904014e-05, + "loss": 1.866, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.4383496046066284, + "learning_rate": 1.822984922239737e-05, + "loss": 1.8541, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.46080538630485535, + "learning_rate": 1.7683779347607286e-05, + "loss": 1.8657, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.4493260383605957, + "learning_rate": 1.714591543364938e-05, + "loss": 1.8524, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.4495392143726349, + "learning_rate": 1.6616263507551437e-05, + "loss": 1.8475, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.4512085020542145, + "learning_rate": 1.609482950432195e-05, + "loss": 1.8528, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.44002366065979004, + "learning_rate": 1.5581619266883563e-05, + "loss": 1.8546, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.4513969123363495, + "learning_rate": 1.5076638546007548e-05, + "loss": 1.8577, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.4510626792907715, + "learning_rate": 1.457989300024945e-05, + "loss": 1.8577, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.4387527108192444, + "learning_rate": 1.4091388195885625e-05, + "loss": 1.8488, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.45338332653045654, + "learning_rate": 1.3611129606851041e-05, + "loss": 1.8456, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.448855996131897, + "learning_rate": 1.313912261467759e-05, + "loss": 1.8518, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.4393201470375061, + "learning_rate": 1.267537250843412e-05, + "loss": 1.8524, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.43996691703796387, + "learning_rate": 1.2219884484667071e-05, + "loss": 1.8502, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.4380275011062622, + "learning_rate": 1.1772663647341947e-05, + "loss": 1.8548, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.4480167329311371, + "learning_rate": 1.1333715007786932e-05, + "loss": 1.857, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.43651461601257324, + "learning_rate": 1.0903043484635694e-05, + "loss": 1.8578, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.43647250533103943, + "learning_rate": 1.0480653903772924e-05, + "loss": 1.8509, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.4358389675617218, + "learning_rate": 1.0066550998280132e-05, + "loss": 1.8588, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.433937668800354, + "learning_rate": 9.660739408382608e-06, + "loss": 1.8509, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.4411761164665222, + "learning_rate": 9.26322368139737e-06, + "loss": 1.851, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.445496529340744, + "learning_rate": 8.874008271682222e-06, + "loss": 1.8484, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.451007604598999, + "learning_rate": 8.493097540585775e-06, + "loss": 1.8711, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.4482809007167816, + "learning_rate": 8.120495756399005e-06, + "loss": 1.8441, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.445203572511673, + "learning_rate": 7.756207094306605e-06, + "loss": 1.8562, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.43753236532211304, + "learning_rate": 7.400235636340957e-06, + "loss": 1.8607, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.44349023699760437, + "learning_rate": 7.0525853713362395e-06, + "loss": 1.8549, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.43987277150154114, + "learning_rate": 6.71326019488322e-06, + "loss": 1.8563, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.46386095881462097, + "learning_rate": 6.3822639092862846e-06, + "loss": 1.8626, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.4398437440395355, + "learning_rate": 6.059600223520478e-06, + "loss": 1.8377, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.4368320405483246, + "learning_rate": 5.745272753189784e-06, + "loss": 1.85, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.44437175989151, + "learning_rate": 5.439285020487156e-06, + "loss": 1.8576, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.44512876868247986, + "learning_rate": 5.141640454154467e-06, + "loss": 1.8442, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.44665366411209106, + "learning_rate": 4.852342389444458e-06, + "loss": 1.8666, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.4420073330402374, + "learning_rate": 4.571394068083185e-06, + "loss": 1.8544, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.44711777567863464, + "learning_rate": 4.298798638233709e-06, + "loss": 1.8584, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.4380023181438446, + "learning_rate": 4.034559154461049e-06, + "loss": 1.8552, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.4378598928451538, + "learning_rate": 3.7786785776976198e-06, + "loss": 1.854, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.4359339773654938, + "learning_rate": 3.5311597752100964e-06, + "loss": 1.8474, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.43689602613449097, + "learning_rate": 3.2920055205676867e-06, + "loss": 1.8484, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.4357077479362488, + "learning_rate": 3.06121849361049e-06, + "loss": 1.8545, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.4428924322128296, + "learning_rate": 2.838801280419856e-06, + "loss": 1.8503, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.4326014220714569, + "learning_rate": 2.624756373289322e-06, + "loss": 1.8391, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.4383988678455353, + "learning_rate": 2.419086170696472e-06, + "loss": 1.8351, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.4408615827560425, + "learning_rate": 2.2217929772764545e-06, + "loss": 1.8484, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.43869075179100037, + "learning_rate": 2.0328790037957568e-06, + "loss": 1.8448, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.4440855085849762, + "learning_rate": 1.8523463671278052e-06, + "loss": 1.859, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.4473136067390442, + "learning_rate": 1.6801970902288188e-06, + "loss": 1.8462, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.4394768178462982, + "learning_rate": 1.5164331021155774e-06, + "loss": 1.8517, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.44447633624076843, + "learning_rate": 1.3610562378435221e-06, + "loss": 1.8506, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.44284042716026306, + "learning_rate": 1.2140682384862712e-06, + "loss": 1.8512, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.4471919536590576, + "learning_rate": 1.0754707511161365e-06, + "loss": 1.8457, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.43650737404823303, + "learning_rate": 9.452653287856383e-07, + "loss": 1.8561, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.43427830934524536, + "learning_rate": 8.234534305101015e-07, + "loss": 1.8535, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.4372943341732025, + "learning_rate": 7.100364212513367e-07, + "loss": 1.8679, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.4388529360294342, + "learning_rate": 6.050155719023176e-07, + "loss": 1.8621, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.4529777765274048, + "learning_rate": 5.08392059272944e-07, + "loss": 1.8565, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.43716755509376526, + "learning_rate": 4.2016696607680147e-07, + "loss": 1.8435, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.44727635383605957, + "learning_rate": 3.4034128091917085e-07, + "loss": 1.8462, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.44620561599731445, + "learning_rate": 2.689158982859541e-07, + "loss": 1.859, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.43522074818611145, + "learning_rate": 2.05891618533266e-07, + "loss": 1.8348, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.4385356903076172, + "learning_rate": 1.5126914787894074e-07, + "loss": 1.854, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.44689205288887024, + "learning_rate": 1.0504909839462173e-07, + "loss": 1.8556, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.42641395330429077, + "learning_rate": 6.723198799826746e-08, + "loss": 1.8502, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.44421735405921936, + "learning_rate": 3.781824044932214e-08, + "loss": 1.8643, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.4314943850040436, + "learning_rate": 1.6808185342970238e-08, + "loss": 1.842, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.4381557106971741, + "learning_rate": 4.202058107305451e-09, + "loss": 1.8583, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.30005943775177, + "learning_rate": 0.0, + "loss": 1.8435, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.8391618477891584e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-gpt2-cosine/checkpoint-9480/training_args.bin b/saves-gpt2-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..be117cbf9e7ddf50a39a5407473c3c2e03e69ef4 --- /dev/null +++ b/saves-gpt2-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:104a112b26d2173c12297d9e105ef74606b528695cb927ec3802668dfe6833ce +size 5176 diff --git a/saves-gpt2-cosine/config.json b/saves-gpt2-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c41b5987863e5a36912ae874f26bce471afeca9f --- /dev/null +++ b/saves-gpt2-cosine/config.json @@ -0,0 +1,34 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "hidden_act": "gelu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_embd": 256, + "n_head": 8, + "n_inner": null, + "n_layer": 2, + "n_positions": 1024, + "num_key_value_heads": 8, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gpt2-cosine/generation_config.json b/saves-gpt2-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-gpt2-cosine/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-gpt2-cosine/model.safetensors b/saves-gpt2-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9066386d2ede1926402d1cde71e6c139d1bc76d1 --- /dev/null +++ b/saves-gpt2-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832abcac55ddf8fdf6b3ebb9ce8344fc6e06e7d661f47fec80c8c5eef377a7a1 +size 9419432 diff --git a/saves-gpt2-cosine/result.log b/saves-gpt2-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..28c75137aa08743e848be9019a80c15350d873c5 --- /dev/null +++ b/saves-gpt2-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 2684.585, 'train_samples_per_second': 3615.681, 'train_steps_per_second': 3.531, 'train_loss': 2.108148268607095, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-gpt2-cosine/special_tokens_map.json b/saves-gpt2-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gpt2-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gpt2-cosine/tokenizer.json b/saves-gpt2-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gpt2-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gpt2-cosine/tokenizer_config.json b/saves-gpt2-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gpt2-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gpt2/checkpoint-9480/config.json b/saves-gpt2/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c41b5987863e5a36912ae874f26bce471afeca9f --- /dev/null +++ b/saves-gpt2/checkpoint-9480/config.json @@ -0,0 +1,34 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "hidden_act": "gelu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_embd": 256, + "n_head": 8, + "n_inner": null, + "n_layer": 2, + "n_positions": 1024, + "num_key_value_heads": 8, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gpt2/checkpoint-9480/generation_config.json b/saves-gpt2/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-gpt2/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-gpt2/checkpoint-9480/model.safetensors b/saves-gpt2/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6214ce7d099d05bef8f1f25dd07b28bd6c81be6 --- /dev/null +++ b/saves-gpt2/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ed12a44fd33ff3f052a87e7cb6d4d67dc97b33b100593f59f8c8816ee4515f +size 9419432 diff --git a/saves-gpt2/checkpoint-9480/optimizer.pt b/saves-gpt2/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..94a570a3b16d161eb9cebb03c41690c5cd5415cd --- /dev/null +++ b/saves-gpt2/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d59181f50040d1a73be4973fe14db6306b722d1edc93455f4794ae24774650c0 +size 18857162 diff --git a/saves-gpt2/checkpoint-9480/rng_state.pth b/saves-gpt2/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fed94df6e39f86d0f7d754b86da20d654a45801 --- /dev/null +++ b/saves-gpt2/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9442d8d7469c6d4ca45133aef3c8266faf068adc1a246d6dd77665a1a72f41 +size 14244 diff --git a/saves-gpt2/checkpoint-9480/scheduler.pt b/saves-gpt2/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-gpt2/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-gpt2/checkpoint-9480/special_tokens_map.json b/saves-gpt2/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gpt2/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gpt2/checkpoint-9480/tokenizer.json b/saves-gpt2/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gpt2/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gpt2/checkpoint-9480/tokenizer_config.json b/saves-gpt2/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gpt2/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gpt2/checkpoint-9480/trainer_state.json b/saves-gpt2/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..954de1a4466bc1a46d1158e7d825334a8443e44f --- /dev/null +++ b/saves-gpt2/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2868106365203857, + "learning_rate": 0.00015822784810126583, + "loss": 7.4625, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.081502914428711, + "learning_rate": 0.00031645569620253165, + "loss": 6.7982, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.7845546007156372, + "learning_rate": 0.00047468354430379745, + "loss": 6.1976, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 1.0056753158569336, + "learning_rate": 0.0006329113924050633, + "loss": 5.7558, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.6206520795822144, + "learning_rate": 0.0007911392405063291, + "loss": 5.3613, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.6065470576286316, + "learning_rate": 0.0009493670886075949, + "loss": 4.9295, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.051611065864563, + "learning_rate": 0.0011075949367088608, + "loss": 4.5799, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.678774893283844, + "learning_rate": 0.0012658227848101266, + "loss": 4.3343, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.6915785074234009, + "learning_rate": 0.0014240506329113926, + "loss": 4.1424, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.4927213788032532, + "learning_rate": 0.0015, + "loss": 4.0223, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.4323209524154663, + "learning_rate": 0.0015, + "loss": 3.8911, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.5385138988494873, + "learning_rate": 0.0015, + "loss": 3.8258, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.4882274866104126, + "learning_rate": 0.0015, + "loss": 3.7476, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.0732852220535278, + "learning_rate": 0.0015, + "loss": 3.6807, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.6531216502189636, + "learning_rate": 0.0015, + "loss": 3.631, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.6538905501365662, + "learning_rate": 0.0015, + "loss": 3.5829, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.5252573490142822, + "learning_rate": 0.0015, + "loss": 3.5372, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.5157744288444519, + "learning_rate": 0.0015, + "loss": 3.5138, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.7058530449867249, + "learning_rate": 0.0015, + "loss": 3.4513, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.4870591461658478, + "learning_rate": 0.0015, + "loss": 3.4176, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.5080909132957458, + "learning_rate": 0.0015, + "loss": 3.3788, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.6322933435440063, + "learning_rate": 0.0015, + "loss": 3.3503, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.5261993408203125, + "learning_rate": 0.0015, + "loss": 3.3152, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.5326963663101196, + "learning_rate": 0.0015, + "loss": 3.2764, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.5141389966011047, + "learning_rate": 0.0015, + "loss": 3.2478, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.5861570239067078, + "learning_rate": 0.0015, + "loss": 3.213, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.5424656867980957, + "learning_rate": 0.0015, + "loss": 3.2038, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.5439494252204895, + "learning_rate": 0.0015, + "loss": 3.1589, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.48905304074287415, + "learning_rate": 0.0015, + "loss": 3.1448, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.5016011595726013, + "learning_rate": 0.0015, + "loss": 3.1269, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.5477200150489807, + "learning_rate": 0.0015, + "loss": 3.0937, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.4754057824611664, + "learning_rate": 0.0015, + "loss": 3.072, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.5856311917304993, + "learning_rate": 0.0015, + "loss": 3.0638, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.5928363800048828, + "learning_rate": 0.0015, + "loss": 3.0494, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.48620685935020447, + "learning_rate": 0.0015, + "loss": 3.0092, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.6079722046852112, + "learning_rate": 0.0015, + "loss": 2.9939, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.6120694279670715, + "learning_rate": 0.0015, + "loss": 2.9895, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.6466580033302307, + "learning_rate": 0.0015, + "loss": 2.9753, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.6039373874664307, + "learning_rate": 0.0015, + "loss": 2.961, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.4796377420425415, + "learning_rate": 0.0015, + "loss": 2.9316, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.6468100547790527, + "learning_rate": 0.0015, + "loss": 2.9278, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.7683860063552856, + "learning_rate": 0.0015, + "loss": 2.9088, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.5952186584472656, + "learning_rate": 0.0015, + "loss": 2.886, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.5001847147941589, + "learning_rate": 0.0015, + "loss": 2.8733, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.6091427803039551, + "learning_rate": 0.0015, + "loss": 2.8715, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.5184801816940308, + "learning_rate": 0.0015, + "loss": 2.864, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.5876625776290894, + "learning_rate": 0.0015, + "loss": 2.8365, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.6384932994842529, + "learning_rate": 0.0015, + "loss": 2.835, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.6414058804512024, + "learning_rate": 0.0015, + "loss": 2.8105, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.5449652671813965, + "learning_rate": 0.0015, + "loss": 2.8079, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.5134292840957642, + "learning_rate": 0.0015, + "loss": 2.7963, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.5442935824394226, + "learning_rate": 0.0015, + "loss": 2.7782, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.48734530806541443, + "learning_rate": 0.0015, + "loss": 2.7682, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.5748555064201355, + "learning_rate": 0.0015, + "loss": 2.7495, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.6285437941551208, + "learning_rate": 0.0015, + "loss": 2.7557, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.6870784759521484, + "learning_rate": 0.0015, + "loss": 2.7206, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.6807124018669128, + "learning_rate": 0.0015, + "loss": 2.7254, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.6352162957191467, + "learning_rate": 0.0015, + "loss": 2.7221, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.8213168382644653, + "learning_rate": 0.0015, + "loss": 2.7018, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.8000382781028748, + "learning_rate": 0.0015, + "loss": 2.6865, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.5520359873771667, + "learning_rate": 0.0015, + "loss": 2.6886, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.5529119968414307, + "learning_rate": 0.0015, + "loss": 2.6749, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.5327541828155518, + "learning_rate": 0.0015, + "loss": 2.6622, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.6201050281524658, + "learning_rate": 0.0015, + "loss": 2.6688, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.612938404083252, + "learning_rate": 0.0015, + "loss": 2.6624, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.5672504901885986, + "learning_rate": 0.0015, + "loss": 2.6479, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.5667068958282471, + "learning_rate": 0.0015, + "loss": 2.6356, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.6461451649665833, + "learning_rate": 0.0015, + "loss": 2.6288, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.879687488079071, + "learning_rate": 0.0015, + "loss": 2.6352, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.6234734058380127, + "learning_rate": 0.0015, + "loss": 2.6209, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.5032368898391724, + "learning_rate": 0.0015, + "loss": 2.6052, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.5342044830322266, + "learning_rate": 0.0015, + "loss": 2.5859, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.6016132235527039, + "learning_rate": 0.0015, + "loss": 2.5785, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.5635907649993896, + "learning_rate": 0.0015, + "loss": 2.5911, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.6573704481124878, + "learning_rate": 0.0015, + "loss": 2.5732, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.725891649723053, + "learning_rate": 0.0015, + "loss": 2.5707, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.5711265206336975, + "learning_rate": 0.0015, + "loss": 2.5593, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.785830020904541, + "learning_rate": 0.0015, + "loss": 2.5554, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.6236185431480408, + "learning_rate": 0.0015, + "loss": 2.5627, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.5514404773712158, + "learning_rate": 0.0015, + "loss": 2.5346, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.5685513019561768, + "learning_rate": 0.0015, + "loss": 2.5257, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.571617603302002, + "learning_rate": 0.0015, + "loss": 2.5345, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.6274116635322571, + "learning_rate": 0.0015, + "loss": 2.5334, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.6448284387588501, + "learning_rate": 0.0015, + "loss": 2.5129, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.550652265548706, + "learning_rate": 0.0015, + "loss": 2.5105, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.4846270978450775, + "learning_rate": 0.0015, + "loss": 2.5089, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.5789787769317627, + "learning_rate": 0.0015, + "loss": 2.5095, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.5167219638824463, + "learning_rate": 0.0015, + "loss": 2.4978, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.5709961652755737, + "learning_rate": 0.0015, + "loss": 2.4925, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.6126795411109924, + "learning_rate": 0.0015, + "loss": 2.4843, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.5470844507217407, + "learning_rate": 0.0015, + "loss": 2.4912, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.487741082906723, + "learning_rate": 0.0015, + "loss": 2.4809, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.5449468493461609, + "learning_rate": 0.0015, + "loss": 2.4667, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.5165682435035706, + "learning_rate": 0.0015, + "loss": 2.4695, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.523166298866272, + "learning_rate": 0.0015, + "loss": 2.4533, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.5307929515838623, + "learning_rate": 0.0015, + "loss": 2.4599, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.5590803623199463, + "learning_rate": 0.0015, + "loss": 2.4603, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.586479127407074, + "learning_rate": 0.0015, + "loss": 2.4378, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.6927133202552795, + "learning_rate": 0.0015, + "loss": 2.4365, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.7192505598068237, + "learning_rate": 0.0015, + "loss": 2.4458, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.5297435522079468, + "learning_rate": 0.0015, + "loss": 2.4368, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.5197161436080933, + "learning_rate": 0.0015, + "loss": 2.431, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.7736219763755798, + "learning_rate": 0.0015, + "loss": 2.4302, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.5818594694137573, + "learning_rate": 0.0015, + "loss": 2.4228, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.5666146874427795, + "learning_rate": 0.0015, + "loss": 2.4246, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.5254496335983276, + "learning_rate": 0.0015, + "loss": 2.4131, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.5331057906150818, + "learning_rate": 0.0015, + "loss": 2.4027, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.5084558129310608, + "learning_rate": 0.0015, + "loss": 2.4073, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.5524882078170776, + "learning_rate": 0.0015, + "loss": 2.4098, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.5621328353881836, + "learning_rate": 0.0015, + "loss": 2.3943, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.4843035042285919, + "learning_rate": 0.0015, + "loss": 2.3961, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.528351366519928, + "learning_rate": 0.0015, + "loss": 2.3914, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.599197506904602, + "learning_rate": 0.0015, + "loss": 2.3768, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.513840913772583, + "learning_rate": 0.0015, + "loss": 2.3961, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.6017542481422424, + "learning_rate": 0.0015, + "loss": 2.3786, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.509437620639801, + "learning_rate": 0.0015, + "loss": 2.3859, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.5826842784881592, + "learning_rate": 0.0015, + "loss": 2.3702, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.529461145401001, + "learning_rate": 0.0015, + "loss": 2.3571, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.5379661321640015, + "learning_rate": 0.0015, + "loss": 2.3604, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.5306044220924377, + "learning_rate": 0.0015, + "loss": 2.369, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.5906445980072021, + "learning_rate": 0.0015, + "loss": 2.3662, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.5382705926895142, + "learning_rate": 0.0015, + "loss": 2.3502, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.49285948276519775, + "learning_rate": 0.0015, + "loss": 2.3365, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.5219690799713135, + "learning_rate": 0.0015, + "loss": 2.354, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.5424527525901794, + "learning_rate": 0.0015, + "loss": 2.3568, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.5400310158729553, + "learning_rate": 0.0015, + "loss": 2.336, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.5023423433303833, + "learning_rate": 0.0015, + "loss": 2.3316, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.5966672301292419, + "learning_rate": 0.0015, + "loss": 2.3487, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.4899304509162903, + "learning_rate": 0.0015, + "loss": 2.3344, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.4942777752876282, + "learning_rate": 0.0015, + "loss": 2.3203, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.5217072367668152, + "learning_rate": 0.0015, + "loss": 2.325, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.5340680480003357, + "learning_rate": 0.0015, + "loss": 2.3268, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.5798514485359192, + "learning_rate": 0.0015, + "loss": 2.3177, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.5663682818412781, + "learning_rate": 0.0015, + "loss": 2.3212, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.5789469480514526, + "learning_rate": 0.0015, + "loss": 2.3274, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.5599437952041626, + "learning_rate": 0.0015, + "loss": 2.3133, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.5702401995658875, + "learning_rate": 0.0015, + "loss": 2.3123, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.5608999133110046, + "learning_rate": 0.0015, + "loss": 2.2989, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.5066065192222595, + "learning_rate": 0.0015, + "loss": 2.3107, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.6196108460426331, + "learning_rate": 0.0015, + "loss": 2.2991, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.5460635423660278, + "learning_rate": 0.0015, + "loss": 2.2974, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.4733991026878357, + "learning_rate": 0.0015, + "loss": 2.3018, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.5400242805480957, + "learning_rate": 0.0015, + "loss": 2.2962, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.5293214917182922, + "learning_rate": 0.0015, + "loss": 2.2908, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.45472586154937744, + "learning_rate": 0.0015, + "loss": 2.2902, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.5257254838943481, + "learning_rate": 0.0015, + "loss": 2.2861, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.5383533835411072, + "learning_rate": 0.0015, + "loss": 2.2832, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.5611907243728638, + "learning_rate": 0.0015, + "loss": 2.2883, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.5457258224487305, + "learning_rate": 0.0015, + "loss": 2.2838, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.49772271513938904, + "learning_rate": 0.0015, + "loss": 2.2817, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.5408310294151306, + "learning_rate": 0.0015, + "loss": 2.2784, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.5458889603614807, + "learning_rate": 0.0015, + "loss": 2.2805, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.4789125323295593, + "learning_rate": 0.0015, + "loss": 2.2612, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.4473031759262085, + "learning_rate": 0.0015, + "loss": 2.252, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.5544670820236206, + "learning_rate": 0.0015, + "loss": 2.2557, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.5125646591186523, + "learning_rate": 0.0015, + "loss": 2.2613, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.49753326177597046, + "learning_rate": 0.0015, + "loss": 2.2521, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.5130689740180969, + "learning_rate": 0.0015, + "loss": 2.2621, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.5498607158660889, + "learning_rate": 0.0015, + "loss": 2.2588, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.4919903874397278, + "learning_rate": 0.0015, + "loss": 2.2635, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.4785551130771637, + "learning_rate": 0.0015, + "loss": 2.2627, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.487488716840744, + "learning_rate": 0.0015, + "loss": 2.2331, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.7447993755340576, + "learning_rate": 0.0015, + "loss": 2.2502, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.517065703868866, + "learning_rate": 0.0015, + "loss": 2.2445, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.48035067319869995, + "learning_rate": 0.0015, + "loss": 2.2383, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.4717473089694977, + "learning_rate": 0.0015, + "loss": 2.2332, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.5340759754180908, + "learning_rate": 0.0015, + "loss": 2.245, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.5645149350166321, + "learning_rate": 0.0015, + "loss": 2.2369, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.479165256023407, + "learning_rate": 0.0015, + "loss": 2.2194, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.5128156542778015, + "learning_rate": 0.0015, + "loss": 2.2411, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.5018277764320374, + "learning_rate": 0.0015, + "loss": 2.2197, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.48069244623184204, + "learning_rate": 0.0015, + "loss": 2.2192, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.5087079405784607, + "learning_rate": 0.0015, + "loss": 2.223, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.5914036631584167, + "learning_rate": 0.0015, + "loss": 2.2284, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.5060463547706604, + "learning_rate": 0.0015, + "loss": 2.2485, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.5524370670318604, + "learning_rate": 0.0015, + "loss": 2.2249, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.5056757926940918, + "learning_rate": 0.0015, + "loss": 2.2237, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.4828547239303589, + "learning_rate": 0.0015, + "loss": 2.2252, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.5432929396629333, + "learning_rate": 0.0015, + "loss": 2.2214, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.51959228515625, + "learning_rate": 0.0015, + "loss": 2.2189, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.513494074344635, + "learning_rate": 0.0015, + "loss": 2.2098, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.4728807508945465, + "learning_rate": 0.0015, + "loss": 2.2085, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.4876849055290222, + "learning_rate": 0.0015, + "loss": 2.2015, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.487371027469635, + "learning_rate": 0.0015, + "loss": 2.2029, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.5107097029685974, + "learning_rate": 0.0015, + "loss": 2.2053, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.4802800416946411, + "learning_rate": 0.0015, + "loss": 2.2095, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.5234653949737549, + "learning_rate": 0.0015, + "loss": 2.2067, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.5987045764923096, + "learning_rate": 0.0015, + "loss": 2.1995, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.5111141204833984, + "learning_rate": 0.0015, + "loss": 2.2054, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.516825258731842, + "learning_rate": 0.0015, + "loss": 2.1997, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.5078732371330261, + "learning_rate": 0.0015, + "loss": 2.201, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.4825908839702606, + "learning_rate": 0.0015, + "loss": 2.195, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.5081301331520081, + "learning_rate": 0.0015, + "loss": 2.1994, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.4621557593345642, + "learning_rate": 0.0015, + "loss": 2.1866, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.49421605467796326, + "learning_rate": 0.0015, + "loss": 2.1891, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.446591854095459, + "learning_rate": 0.0015, + "loss": 2.1931, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.47465038299560547, + "learning_rate": 0.0015, + "loss": 2.198, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.47602617740631104, + "learning_rate": 0.0015, + "loss": 2.1803, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.4852505028247833, + "learning_rate": 0.0015, + "loss": 2.1811, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.495806485414505, + "learning_rate": 0.0015, + "loss": 2.179, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.4961065948009491, + "learning_rate": 0.0015, + "loss": 2.1852, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.5539078116416931, + "learning_rate": 0.0015, + "loss": 2.1965, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.5105553269386292, + "learning_rate": 0.0015, + "loss": 2.1847, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.47178715467453003, + "learning_rate": 0.0015, + "loss": 2.1823, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.47604134678840637, + "learning_rate": 0.0015, + "loss": 2.1838, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.5011481046676636, + "learning_rate": 0.0015, + "loss": 2.1696, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.454166978597641, + "learning_rate": 0.0015, + "loss": 2.1681, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.5561933517456055, + "learning_rate": 0.0015, + "loss": 2.1811, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.47614896297454834, + "learning_rate": 0.0015, + "loss": 2.1668, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.6641237139701843, + "learning_rate": 0.0015, + "loss": 2.1758, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.5254981517791748, + "learning_rate": 0.0015, + "loss": 2.1685, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.4500657618045807, + "learning_rate": 0.0015, + "loss": 2.164, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.4955187737941742, + "learning_rate": 0.0015, + "loss": 2.1673, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.48642462491989136, + "learning_rate": 0.0015, + "loss": 2.1589, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.4678893983364105, + "learning_rate": 0.0015, + "loss": 2.1645, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.5280480980873108, + "learning_rate": 0.0015, + "loss": 2.1805, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.5271260142326355, + "learning_rate": 0.0015, + "loss": 2.1607, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.45616209506988525, + "learning_rate": 0.0015, + "loss": 2.1469, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.5835659503936768, + "learning_rate": 0.0015, + "loss": 2.1636, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.5413613319396973, + "learning_rate": 0.0015, + "loss": 2.1651, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.44273021817207336, + "learning_rate": 0.0015, + "loss": 2.1505, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.536941409111023, + "learning_rate": 0.0015, + "loss": 2.1682, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.45308029651641846, + "learning_rate": 0.0015, + "loss": 2.1552, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.46434181928634644, + "learning_rate": 0.0015, + "loss": 2.1493, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.5993618369102478, + "learning_rate": 0.0015, + "loss": 2.1494, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.4938041865825653, + "learning_rate": 0.0015, + "loss": 2.1387, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.4831969141960144, + "learning_rate": 0.0015, + "loss": 2.1735, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.5053781867027283, + "learning_rate": 0.0015, + "loss": 2.1568, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.6139144897460938, + "learning_rate": 0.0015, + "loss": 2.1437, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.45216429233551025, + "learning_rate": 0.0015, + "loss": 2.141, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.5123635530471802, + "learning_rate": 0.0015, + "loss": 2.1475, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.49576327204704285, + "learning_rate": 0.0015, + "loss": 2.1526, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.47483232617378235, + "learning_rate": 0.0015, + "loss": 2.1349, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.47471559047698975, + "learning_rate": 0.0015, + "loss": 2.1336, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.49153319001197815, + "learning_rate": 0.0015, + "loss": 2.1316, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.5166982412338257, + "learning_rate": 0.0015, + "loss": 2.1433, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.5839778780937195, + "learning_rate": 0.0015, + "loss": 2.1414, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.5124769806861877, + "learning_rate": 0.0015, + "loss": 2.1426, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.5506088137626648, + "learning_rate": 0.0015, + "loss": 2.1383, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.4894935190677643, + "learning_rate": 0.0015, + "loss": 2.1348, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.5619579553604126, + "learning_rate": 0.0015, + "loss": 2.1337, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.5544256567955017, + "learning_rate": 0.0015, + "loss": 2.1305, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.47035881876945496, + "learning_rate": 0.0015, + "loss": 2.1299, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.5639828443527222, + "learning_rate": 0.0015, + "loss": 2.1393, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.5048669576644897, + "learning_rate": 0.0015, + "loss": 2.1196, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.4745074510574341, + "learning_rate": 0.0015, + "loss": 2.1368, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.4397202730178833, + "learning_rate": 0.0015, + "loss": 2.1272, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.4900398850440979, + "learning_rate": 0.0015, + "loss": 2.1307, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.46921002864837646, + "learning_rate": 0.0015, + "loss": 2.122, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.4493680000305176, + "learning_rate": 0.0015, + "loss": 2.1253, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.4484681785106659, + "learning_rate": 0.0015, + "loss": 2.1222, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.45130497217178345, + "learning_rate": 0.0015, + "loss": 2.1284, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.4377133548259735, + "learning_rate": 0.0015, + "loss": 2.1191, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.49162405729293823, + "learning_rate": 0.0015, + "loss": 2.1242, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.46707892417907715, + "learning_rate": 0.0015, + "loss": 2.1122, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.4644486606121063, + "learning_rate": 0.0015, + "loss": 2.1259, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.5053285956382751, + "learning_rate": 0.0015, + "loss": 2.1178, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.48815426230430603, + "learning_rate": 0.0015, + "loss": 2.1179, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.4503813683986664, + "learning_rate": 0.0015, + "loss": 2.1237, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.4196797013282776, + "learning_rate": 0.0015, + "loss": 2.1218, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.5734556317329407, + "learning_rate": 0.0015, + "loss": 2.1233, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.487870991230011, + "learning_rate": 0.0015, + "loss": 2.1204, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.4825158417224884, + "learning_rate": 0.0015, + "loss": 2.1064, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.498384028673172, + "learning_rate": 0.0015, + "loss": 2.1209, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.5111238360404968, + "learning_rate": 0.0015, + "loss": 2.1117, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.5499308705329895, + "learning_rate": 0.0015, + "loss": 2.1076, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.5574318170547485, + "learning_rate": 0.0015, + "loss": 2.105, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.4500906765460968, + "learning_rate": 0.0015, + "loss": 2.1053, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.4742302894592285, + "learning_rate": 0.0015, + "loss": 2.1083, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.47450193762779236, + "learning_rate": 0.0015, + "loss": 2.1058, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.4674990475177765, + "learning_rate": 0.0015, + "loss": 2.1062, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.4899941682815552, + "learning_rate": 0.0015, + "loss": 2.1095, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.4762454628944397, + "learning_rate": 0.0015, + "loss": 2.0897, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.49402663111686707, + "learning_rate": 0.0015, + "loss": 2.1016, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.43774598836898804, + "learning_rate": 0.0015, + "loss": 2.1069, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.4219803512096405, + "learning_rate": 0.0015, + "loss": 2.1115, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.42314285039901733, + "learning_rate": 0.0015, + "loss": 2.1059, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.5017038583755493, + "learning_rate": 0.0015, + "loss": 2.1045, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.5033962726593018, + "learning_rate": 0.0015, + "loss": 2.1004, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.411020427942276, + "learning_rate": 0.0015, + "loss": 2.0992, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.4837932288646698, + "learning_rate": 0.0015, + "loss": 2.1011, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.48591822385787964, + "learning_rate": 0.0015, + "loss": 2.1056, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.5090926885604858, + "learning_rate": 0.0015, + "loss": 2.0751, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.4709317088127136, + "learning_rate": 0.0015, + "loss": 2.078, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.4561120867729187, + "learning_rate": 0.0015, + "loss": 2.093, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.4594179689884186, + "learning_rate": 0.0015, + "loss": 2.0942, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.5431883931159973, + "learning_rate": 0.0015, + "loss": 2.094, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.4916505515575409, + "learning_rate": 0.0015, + "loss": 2.079, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.43344438076019287, + "learning_rate": 0.0015, + "loss": 2.0862, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.4944959580898285, + "learning_rate": 0.0015, + "loss": 2.093, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.4984077215194702, + "learning_rate": 0.0015, + "loss": 2.0932, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.4800587594509125, + "learning_rate": 0.0015, + "loss": 2.086, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.45485249161720276, + "learning_rate": 0.0015, + "loss": 2.0843, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.45084336400032043, + "learning_rate": 0.0015, + "loss": 2.0896, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.42323753237724304, + "learning_rate": 0.0015, + "loss": 2.0951, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.5326976180076599, + "learning_rate": 0.0015, + "loss": 2.0801, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.46457424759864807, + "learning_rate": 0.0015, + "loss": 2.0727, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.47872284054756165, + "learning_rate": 0.0015, + "loss": 2.0803, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.4702615439891815, + "learning_rate": 0.0015, + "loss": 2.0967, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.45781195163726807, + "learning_rate": 0.0015, + "loss": 2.0685, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.44454801082611084, + "learning_rate": 0.0015, + "loss": 2.0806, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.479475200176239, + "learning_rate": 0.0015, + "loss": 2.0787, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.4887007474899292, + "learning_rate": 0.0015, + "loss": 2.0717, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.46655696630477905, + "learning_rate": 0.0015, + "loss": 2.0776, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.48713260889053345, + "learning_rate": 0.0015, + "loss": 2.0746, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5423433780670166, + "learning_rate": 0.0015, + "loss": 2.0836, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.49016839265823364, + "learning_rate": 0.0015, + "loss": 2.0772, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.4809034466743469, + "learning_rate": 0.0015, + "loss": 2.0638, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.4437301754951477, + "learning_rate": 0.0015, + "loss": 2.0813, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.42194899916648865, + "learning_rate": 0.0015, + "loss": 2.0676, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.47773346304893494, + "learning_rate": 0.0015, + "loss": 2.0682, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.47165659070014954, + "learning_rate": 0.0015, + "loss": 2.0726, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.5155400037765503, + "learning_rate": 0.0015, + "loss": 2.0847, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.5024751424789429, + "learning_rate": 0.0015, + "loss": 2.0605, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.47432848811149597, + "learning_rate": 0.0015, + "loss": 2.0699, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.4940382242202759, + "learning_rate": 0.0015, + "loss": 2.0705, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.4658326804637909, + "learning_rate": 0.0015, + "loss": 2.0717, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.4399244487285614, + "learning_rate": 0.0015, + "loss": 2.067, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.41705337166786194, + "learning_rate": 0.0015, + "loss": 2.0647, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.4723767936229706, + "learning_rate": 0.0015, + "loss": 2.0601, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.4406121075153351, + "learning_rate": 0.0015, + "loss": 2.0763, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.4701217710971832, + "learning_rate": 0.0015, + "loss": 2.0646, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.4767507314682007, + "learning_rate": 0.0015, + "loss": 2.0624, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.45047277212142944, + "learning_rate": 0.0015, + "loss": 2.042, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.45256108045578003, + "learning_rate": 0.0015, + "loss": 2.0642, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.46273359656333923, + "learning_rate": 0.0015, + "loss": 2.0556, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.4902341067790985, + "learning_rate": 0.0015, + "loss": 2.0697, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.47359898686408997, + "learning_rate": 0.0015, + "loss": 2.0715, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.5300987958908081, + "learning_rate": 0.0015, + "loss": 2.0537, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.4852631688117981, + "learning_rate": 0.0015, + "loss": 2.0675, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.49096518754959106, + "learning_rate": 0.0015, + "loss": 2.0699, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.4786725342273712, + "learning_rate": 0.0015, + "loss": 2.0695, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.4786069095134735, + "learning_rate": 0.0015, + "loss": 2.0587, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.529323399066925, + "learning_rate": 0.0015, + "loss": 2.058, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.45865610241889954, + "learning_rate": 0.0015, + "loss": 2.0528, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.45312967896461487, + "learning_rate": 0.0015, + "loss": 2.0501, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.48587530851364136, + "learning_rate": 0.0015, + "loss": 2.0594, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.4497227966785431, + "learning_rate": 0.0015, + "loss": 2.0552, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.4384129047393799, + "learning_rate": 0.0015, + "loss": 2.0482, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.47989708185195923, + "learning_rate": 0.0015, + "loss": 2.0617, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.5777666568756104, + "learning_rate": 0.0015, + "loss": 2.0587, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5106038451194763, + "learning_rate": 0.0015, + "loss": 2.0394, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.4221823215484619, + "learning_rate": 0.0015, + "loss": 2.0559, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.4602856934070587, + "learning_rate": 0.0015, + "loss": 2.0448, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.45464810729026794, + "learning_rate": 0.0015, + "loss": 2.0508, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.4791298508644104, + "learning_rate": 0.0015, + "loss": 2.0377, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.4933064877986908, + "learning_rate": 0.0015, + "loss": 2.0569, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.4469650983810425, + "learning_rate": 0.0015, + "loss": 2.0604, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.49007025361061096, + "learning_rate": 0.0015, + "loss": 2.057, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.49333617091178894, + "learning_rate": 0.0015, + "loss": 2.0383, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.4251687228679657, + "learning_rate": 0.0015, + "loss": 2.0538, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.4521111249923706, + "learning_rate": 0.0015, + "loss": 2.0465, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.46906667947769165, + "learning_rate": 0.0015, + "loss": 2.0612, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.4442053437232971, + "learning_rate": 0.0015, + "loss": 2.0575, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.4792494475841522, + "learning_rate": 0.0015, + "loss": 2.0446, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.4594126343727112, + "learning_rate": 0.0015, + "loss": 2.0446, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.4464714229106903, + "learning_rate": 0.0015, + "loss": 2.0402, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.5364466309547424, + "learning_rate": 0.0015, + "loss": 2.041, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.4744153320789337, + "learning_rate": 0.0015, + "loss": 2.0515, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.4536152184009552, + "learning_rate": 0.0015, + "loss": 2.0367, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.514091968536377, + "learning_rate": 0.0015, + "loss": 2.0391, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.4596957564353943, + "learning_rate": 0.0015, + "loss": 2.0433, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.43287795782089233, + "learning_rate": 0.0015, + "loss": 2.0429, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.4937414526939392, + "learning_rate": 0.0015, + "loss": 2.0449, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.4595278203487396, + "learning_rate": 0.0015, + "loss": 2.0459, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.4414646029472351, + "learning_rate": 0.0015, + "loss": 2.0383, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.4963432550430298, + "learning_rate": 0.0015, + "loss": 2.0354, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.4459683895111084, + "learning_rate": 0.0015, + "loss": 2.0356, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.5404839515686035, + "learning_rate": 0.0015, + "loss": 2.0467, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.4792265295982361, + "learning_rate": 0.0015, + "loss": 2.053, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.4621272683143616, + "learning_rate": 0.0015, + "loss": 2.0392, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.3897222578525543, + "learning_rate": 0.0015, + "loss": 2.0163, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.5289087891578674, + "learning_rate": 0.0015, + "loss": 2.0355, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.497032105922699, + "learning_rate": 0.0015, + "loss": 2.0388, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.464529424905777, + "learning_rate": 0.0015, + "loss": 2.0368, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.5141547918319702, + "learning_rate": 0.0015, + "loss": 2.0313, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.5320308208465576, + "learning_rate": 0.0015, + "loss": 2.0457, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.4653412699699402, + "learning_rate": 0.0015, + "loss": 2.0315, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.4444263279438019, + "learning_rate": 0.0015, + "loss": 2.0343, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.444671094417572, + "learning_rate": 0.0015, + "loss": 2.0378, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.4838058054447174, + "learning_rate": 0.0015, + "loss": 2.0343, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.47420215606689453, + "learning_rate": 0.0015, + "loss": 2.0185, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.48603472113609314, + "learning_rate": 0.0015, + "loss": 2.0268, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.5206816792488098, + "learning_rate": 0.0015, + "loss": 2.0435, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.5574729442596436, + "learning_rate": 0.0015, + "loss": 2.0265, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.48063263297080994, + "learning_rate": 0.0015, + "loss": 2.0411, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.4758067727088928, + "learning_rate": 0.0015, + "loss": 2.0247, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.4571913778781891, + "learning_rate": 0.0015, + "loss": 2.0172, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.4651147723197937, + "learning_rate": 0.0015, + "loss": 2.0175, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.4823998212814331, + "learning_rate": 0.0015, + "loss": 2.0225, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.4863017797470093, + "learning_rate": 0.0015, + "loss": 2.0234, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.5648422241210938, + "learning_rate": 0.0015, + "loss": 2.0218, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.4649427533149719, + "learning_rate": 0.0015, + "loss": 2.0249, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.55061274766922, + "learning_rate": 0.0015, + "loss": 2.0203, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.4301551282405853, + "learning_rate": 0.0015, + "loss": 2.0268, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.4754408299922943, + "learning_rate": 0.0015, + "loss": 2.0229, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.43963631987571716, + "learning_rate": 0.0015, + "loss": 2.0149, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.4324744641780853, + "learning_rate": 0.0015, + "loss": 2.0223, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.40844643115997314, + "learning_rate": 0.0015, + "loss": 2.0142, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.5064060688018799, + "learning_rate": 0.0015, + "loss": 2.0341, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.463920921087265, + "learning_rate": 0.0015, + "loss": 2.0165, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.41492554545402527, + "learning_rate": 0.0015, + "loss": 2.0221, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.490600049495697, + "learning_rate": 0.0015, + "loss": 2.012, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.44256430864334106, + "learning_rate": 0.0015, + "loss": 2.0225, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5451088547706604, + "learning_rate": 0.0015, + "loss": 2.0237, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.4461345076560974, + "learning_rate": 0.0015, + "loss": 2.0311, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.4550500512123108, + "learning_rate": 0.0015, + "loss": 2.0216, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.5397707223892212, + "learning_rate": 0.0015, + "loss": 2.0264, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.4498850405216217, + "learning_rate": 0.0015, + "loss": 2.0201, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.45243409276008606, + "learning_rate": 0.0015, + "loss": 2.0145, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.46291717886924744, + "learning_rate": 0.0015, + "loss": 2.0125, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.4428139328956604, + "learning_rate": 0.0015, + "loss": 2.0166, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.5002286434173584, + "learning_rate": 0.0015, + "loss": 2.0118, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.4523918926715851, + "learning_rate": 0.0015, + "loss": 2.012, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.4352469742298126, + "learning_rate": 0.0015, + "loss": 2.0215, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.502491295337677, + "learning_rate": 0.0015, + "loss": 2.0121, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.4340950846672058, + "learning_rate": 0.0015, + "loss": 2.0134, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.42705026268959045, + "learning_rate": 0.0015, + "loss": 2.0095, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.44456130266189575, + "learning_rate": 0.0015, + "loss": 2.0157, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.44883906841278076, + "learning_rate": 0.0015, + "loss": 2.012, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.43857628107070923, + "learning_rate": 0.0015, + "loss": 2.0005, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.46458226442337036, + "learning_rate": 0.0015, + "loss": 2.0051, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.5564199686050415, + "learning_rate": 0.0015, + "loss": 2.0069, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.43587273359298706, + "learning_rate": 0.0015, + "loss": 2.0094, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.4776957929134369, + "learning_rate": 0.0015, + "loss": 2.0142, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.5066412091255188, + "learning_rate": 0.0015, + "loss": 2.0189, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.46694788336753845, + "learning_rate": 0.0015, + "loss": 2.0051, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.5150187015533447, + "learning_rate": 0.0015, + "loss": 2.0017, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.5508468747138977, + "learning_rate": 0.0015, + "loss": 2.0142, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.49552634358406067, + "learning_rate": 0.0015, + "loss": 2.013, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.4847050905227661, + "learning_rate": 0.0015, + "loss": 2.0094, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.4787251949310303, + "learning_rate": 0.0015, + "loss": 2.001, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.48696401715278625, + "learning_rate": 0.0015, + "loss": 2.0134, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.42618006467819214, + "learning_rate": 0.0015, + "loss": 2.0014, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.44857847690582275, + "learning_rate": 0.0015, + "loss": 2.0068, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.47533655166625977, + "learning_rate": 0.0015, + "loss": 2.0225, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.45596417784690857, + "learning_rate": 0.0015, + "loss": 1.9968, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.42831170558929443, + "learning_rate": 0.0015, + "loss": 2.0036, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.4378739893436432, + "learning_rate": 0.0015, + "loss": 1.9962, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.5626639723777771, + "learning_rate": 0.0015, + "loss": 2.0053, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.46488156914711, + "learning_rate": 0.0015, + "loss": 2.015, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.45077210664749146, + "learning_rate": 0.0015, + "loss": 2.0003, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.4862898290157318, + "learning_rate": 0.0015, + "loss": 2.0038, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.4480973482131958, + "learning_rate": 0.0015, + "loss": 2.006, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.44025543332099915, + "learning_rate": 0.0015, + "loss": 2.0102, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.45903876423835754, + "learning_rate": 0.0015, + "loss": 1.9943, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.4817822277545929, + "learning_rate": 0.0015, + "loss": 2.0085, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.4522876441478729, + "learning_rate": 0.0015, + "loss": 2.0136, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.48608750104904175, + "learning_rate": 0.0015, + "loss": 1.9993, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.45206281542778015, + "learning_rate": 0.0015, + "loss": 1.9961, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.4605315327644348, + "learning_rate": 0.0015, + "loss": 2.0039, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.45295825600624084, + "learning_rate": 0.0015, + "loss": 1.9876, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.48463356494903564, + "learning_rate": 0.0015, + "loss": 2.0063, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.4252591133117676, + "learning_rate": 0.0015, + "loss": 2.0067, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.5178452134132385, + "learning_rate": 0.0015, + "loss": 2.003, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.4417160749435425, + "learning_rate": 0.0015, + "loss": 1.9941, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.46367692947387695, + "learning_rate": 0.0015, + "loss": 1.9731, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.4723823666572571, + "learning_rate": 0.0015, + "loss": 1.9998, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.46017590165138245, + "learning_rate": 0.0015, + "loss": 1.9865, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.5154760479927063, + "learning_rate": 0.0015, + "loss": 2.0025, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.4670974612236023, + "learning_rate": 0.0015, + "loss": 2.0056, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.4248136878013611, + "learning_rate": 0.0015, + "loss": 1.9816, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.4667222797870636, + "learning_rate": 0.0015, + "loss": 2.0021, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.4552115499973297, + "learning_rate": 0.0015, + "loss": 2.0007, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.46759188175201416, + "learning_rate": 0.0015, + "loss": 2.0032, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.505689263343811, + "learning_rate": 0.0015, + "loss": 1.9887, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.49472060799598694, + "learning_rate": 0.0015, + "loss": 1.9916, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.4831889867782593, + "learning_rate": 0.0015, + "loss": 1.9867, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.4475608170032501, + "learning_rate": 0.0015, + "loss": 1.9846, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.45800697803497314, + "learning_rate": 0.0015, + "loss": 1.998, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.463772177696228, + "learning_rate": 0.0015, + "loss": 1.9962, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.4545440971851349, + "learning_rate": 0.0015, + "loss": 1.9925, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.4118850529193878, + "learning_rate": 0.0015, + "loss": 1.9913, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.4960225522518158, + "learning_rate": 0.0015, + "loss": 1.9887, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.48252198100090027, + "learning_rate": 0.0015, + "loss": 2.0057, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.4500682055950165, + "learning_rate": 0.0015, + "loss": 1.9894, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.4634728729724884, + "learning_rate": 0.0015, + "loss": 1.9879, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.46008265018463135, + "learning_rate": 0.0015, + "loss": 1.996, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.489211767911911, + "learning_rate": 0.0015, + "loss": 1.9842, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.445319801568985, + "learning_rate": 0.0015, + "loss": 1.9871, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.4555240869522095, + "learning_rate": 0.0015, + "loss": 1.9885, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.4640772044658661, + "learning_rate": 0.0015, + "loss": 2.0086, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.4786563813686371, + "learning_rate": 0.0015, + "loss": 1.9898, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.44662174582481384, + "learning_rate": 0.0015, + "loss": 1.9836, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.4463147521018982, + "learning_rate": 0.0015, + "loss": 1.998, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.4866524338722229, + "learning_rate": 0.0015, + "loss": 1.982, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.4643232226371765, + "learning_rate": 0.0015, + "loss": 1.9781, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.49099764227867126, + "learning_rate": 0.0015, + "loss": 1.9965, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.4760585427284241, + "learning_rate": 0.0015, + "loss": 1.9889, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.4203405976295471, + "learning_rate": 0.0015, + "loss": 1.994, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.4211159348487854, + "learning_rate": 0.0015, + "loss": 1.9888, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.46969079971313477, + "learning_rate": 0.0015, + "loss": 1.9813, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.4826894998550415, + "learning_rate": 0.0015, + "loss": 1.9888, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.4152940511703491, + "learning_rate": 0.0015, + "loss": 1.9859, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.4372897744178772, + "learning_rate": 0.0015, + "loss": 1.998, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.47398361563682556, + "learning_rate": 0.0015, + "loss": 1.9737, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.5493957996368408, + "learning_rate": 0.0015, + "loss": 1.9861, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.4829331338405609, + "learning_rate": 0.0015, + "loss": 1.9797, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.43320485949516296, + "learning_rate": 0.0015, + "loss": 1.9816, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.4184596538543701, + "learning_rate": 0.0015, + "loss": 1.9937, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.44668543338775635, + "learning_rate": 0.0015, + "loss": 1.9817, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5183205604553223, + "learning_rate": 0.0015, + "loss": 1.9779, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.4478100538253784, + "learning_rate": 0.0015, + "loss": 1.9669, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.4046509265899658, + "learning_rate": 0.0015, + "loss": 1.9882, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.4287264943122864, + "learning_rate": 0.0015, + "loss": 1.9743, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.46131548285484314, + "learning_rate": 0.0015, + "loss": 1.9925, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.4695143401622772, + "learning_rate": 0.0015, + "loss": 1.9816, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.433845579624176, + "learning_rate": 0.0015, + "loss": 1.9909, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.4523727297782898, + "learning_rate": 0.0015, + "loss": 1.9869, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.4685346782207489, + "learning_rate": 0.0015, + "loss": 1.9675, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.4254748821258545, + "learning_rate": 0.0015, + "loss": 1.9816, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.46817123889923096, + "learning_rate": 0.0015, + "loss": 1.9789, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.43347081542015076, + "learning_rate": 0.0015, + "loss": 1.9776, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.4349901080131531, + "learning_rate": 0.0015, + "loss": 1.991, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.43023401498794556, + "learning_rate": 0.0015, + "loss": 1.9843, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.434885174036026, + "learning_rate": 0.0015, + "loss": 1.9818, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.43523484468460083, + "learning_rate": 0.0015, + "loss": 1.9804, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.43468087911605835, + "learning_rate": 0.0015, + "loss": 1.9945, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.4559604525566101, + "learning_rate": 0.0015, + "loss": 1.9718, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.45978057384490967, + "learning_rate": 0.0015, + "loss": 1.9846, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.3937625586986542, + "learning_rate": 0.0015, + "loss": 1.9651, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.4718064069747925, + "learning_rate": 0.0015, + "loss": 1.9792, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.4999498426914215, + "learning_rate": 0.0015, + "loss": 1.9754, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.47621315717697144, + "learning_rate": 0.0015, + "loss": 1.98, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.44971024990081787, + "learning_rate": 0.0015, + "loss": 1.971, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.4371758699417114, + "learning_rate": 0.0015, + "loss": 1.9741, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.4647037982940674, + "learning_rate": 0.0015, + "loss": 1.9827, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.5894455909729004, + "learning_rate": 0.0015, + "loss": 1.9752, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.4337363839149475, + "learning_rate": 0.0015, + "loss": 1.976, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.4197293519973755, + "learning_rate": 0.0015, + "loss": 1.9784, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.46126967668533325, + "learning_rate": 0.0015, + "loss": 1.9671, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.4594750702381134, + "learning_rate": 0.0015, + "loss": 1.9685, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.41427746415138245, + "learning_rate": 0.0015, + "loss": 1.9812, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.41871240735054016, + "learning_rate": 0.0015, + "loss": 1.9702, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.4757276177406311, + "learning_rate": 0.0015, + "loss": 1.9606, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.5399381518363953, + "learning_rate": 0.0015, + "loss": 1.9752, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.43040159344673157, + "learning_rate": 0.0015, + "loss": 1.9782, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.46205800771713257, + "learning_rate": 0.0015, + "loss": 1.9658, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.4588663578033447, + "learning_rate": 0.0015, + "loss": 1.9691, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.44170957803726196, + "learning_rate": 0.0015, + "loss": 1.9674, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.42715734243392944, + "learning_rate": 0.0015, + "loss": 1.9688, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.4816579818725586, + "learning_rate": 0.0015, + "loss": 1.9638, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.46428218483924866, + "learning_rate": 0.0015, + "loss": 1.9637, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.43607380986213684, + "learning_rate": 0.0015, + "loss": 1.9593, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.4220513701438904, + "learning_rate": 0.0015, + "loss": 1.9736, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.39790844917297363, + "learning_rate": 0.0015, + "loss": 1.971, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.4477349817752838, + "learning_rate": 0.0015, + "loss": 1.9656, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.44955015182495117, + "learning_rate": 0.0015, + "loss": 1.974, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.44701093435287476, + "learning_rate": 0.0015, + "loss": 1.9717, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.4384399950504303, + "learning_rate": 0.0015, + "loss": 1.9659, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.4545252323150635, + "learning_rate": 0.0015, + "loss": 1.9755, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.46129509806632996, + "learning_rate": 0.0015, + "loss": 1.9656, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.4120708703994751, + "learning_rate": 0.0015, + "loss": 1.9609, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.45053553581237793, + "learning_rate": 0.0015, + "loss": 1.9585, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.4825860559940338, + "learning_rate": 0.0015, + "loss": 1.9737, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.42986005544662476, + "learning_rate": 0.0015, + "loss": 1.9796, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.45834633708000183, + "learning_rate": 0.0015, + "loss": 1.9632, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.43091991543769836, + "learning_rate": 0.0015, + "loss": 1.9597, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.4816896319389343, + "learning_rate": 0.0015, + "loss": 1.9655, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.44736018776893616, + "learning_rate": 0.0015, + "loss": 1.9636, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.46847572922706604, + "learning_rate": 0.0015, + "loss": 1.9637, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.4232116639614105, + "learning_rate": 0.0015, + "loss": 1.9726, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.4705914556980133, + "learning_rate": 0.0015, + "loss": 1.9672, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.450400173664093, + "learning_rate": 0.0015, + "loss": 1.9768, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.4197351038455963, + "learning_rate": 0.0015, + "loss": 1.9741, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.4076090157032013, + "learning_rate": 0.0015, + "loss": 1.9676, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.4663560688495636, + "learning_rate": 0.0015, + "loss": 1.9779, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.4360770285129547, + "learning_rate": 0.0015, + "loss": 1.9764, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.48296862840652466, + "learning_rate": 0.0015, + "loss": 1.9544, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.5105125308036804, + "learning_rate": 0.0015, + "loss": 1.9578, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.465209037065506, + "learning_rate": 0.0015, + "loss": 1.9654, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.4125451147556305, + "learning_rate": 0.0015, + "loss": 1.9658, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.44935154914855957, + "learning_rate": 0.0015, + "loss": 1.9567, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.4448432922363281, + "learning_rate": 0.0015, + "loss": 1.9608, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.44799625873565674, + "learning_rate": 0.0015, + "loss": 1.9359, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.44629913568496704, + "learning_rate": 0.0015, + "loss": 1.9477, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.4163765609264374, + "learning_rate": 0.0015, + "loss": 1.9526, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.4490468204021454, + "learning_rate": 0.0015, + "loss": 1.9567, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.45518097281455994, + "learning_rate": 0.0015, + "loss": 1.952, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.4716593325138092, + "learning_rate": 0.0015, + "loss": 1.9624, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.4235859215259552, + "learning_rate": 0.0015, + "loss": 1.9624, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.4250667989253998, + "learning_rate": 0.0015, + "loss": 1.973, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.4351049065589905, + "learning_rate": 0.0015, + "loss": 1.9551, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.4573591947555542, + "learning_rate": 0.0015, + "loss": 1.9703, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.5216925740242004, + "learning_rate": 0.0015, + "loss": 1.9746, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.48470115661621094, + "learning_rate": 0.0015, + "loss": 1.9637, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.451644629240036, + "learning_rate": 0.0015, + "loss": 1.9556, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.4359816014766693, + "learning_rate": 0.0015, + "loss": 1.9662, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.5675661563873291, + "learning_rate": 0.0015, + "loss": 1.9608, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.3987058103084564, + "learning_rate": 0.0015, + "loss": 1.9614, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.4844250977039337, + "learning_rate": 0.0015, + "loss": 1.9527, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.39574798941612244, + "learning_rate": 0.0015, + "loss": 1.9496, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.6383304595947266, + "learning_rate": 0.0015, + "loss": 1.9576, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.4334365725517273, + "learning_rate": 0.0015, + "loss": 1.9622, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.4857499301433563, + "learning_rate": 0.0015, + "loss": 1.9487, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.46174830198287964, + "learning_rate": 0.0015, + "loss": 1.9569, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.44857001304626465, + "learning_rate": 0.0015, + "loss": 1.9579, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.4137052297592163, + "learning_rate": 0.0015, + "loss": 1.9404, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.4595361649990082, + "learning_rate": 0.0015, + "loss": 1.9581, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.47880202531814575, + "learning_rate": 0.0015, + "loss": 1.9679, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.47764644026756287, + "learning_rate": 0.0015, + "loss": 1.9593, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.43710753321647644, + "learning_rate": 0.0015, + "loss": 1.9655, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.4158395230770111, + "learning_rate": 0.0015, + "loss": 1.9437, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.4571756422519684, + "learning_rate": 0.0015, + "loss": 1.9627, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.5196099281311035, + "learning_rate": 0.0015, + "loss": 1.9435, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.4487728476524353, + "learning_rate": 0.0015, + "loss": 1.9611, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.5082179307937622, + "learning_rate": 0.0015, + "loss": 1.9533, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.4590562880039215, + "learning_rate": 0.0015, + "loss": 1.9481, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.42560580372810364, + "learning_rate": 0.0015, + "loss": 1.9488, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.44798460602760315, + "learning_rate": 0.0015, + "loss": 1.9419, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.4697492718696594, + "learning_rate": 0.0015, + "loss": 1.9605, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.4113924205303192, + "learning_rate": 0.0015, + "loss": 1.9505, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.4310709834098816, + "learning_rate": 0.0015, + "loss": 1.9475, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.5099350214004517, + "learning_rate": 0.0015, + "loss": 1.9516, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.41777700185775757, + "learning_rate": 0.0015, + "loss": 1.9598, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.4158155024051666, + "learning_rate": 0.0015, + "loss": 1.9517, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.4126010835170746, + "learning_rate": 0.0014834368975312174, + "loss": 1.9332, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.4793664813041687, + "learning_rate": 0.0014629899726345957, + "loss": 1.9533, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.4547656178474426, + "learning_rate": 0.0014428248775471316, + "loss": 1.9602, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.46379294991493225, + "learning_rate": 0.00142293772767289, + "loss": 1.95, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.4061027765274048, + "learning_rate": 0.001403324691959192, + "loss": 1.9455, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.4377910792827606, + "learning_rate": 0.0013839819921586025, + "loss": 1.9519, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.44470494985580444, + "learning_rate": 0.0013649059021010894, + "loss": 1.9404, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.394380658864975, + "learning_rate": 0.0013460927469762154, + "loss": 1.9402, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.3865656554698944, + "learning_rate": 0.0013275389026252255, + "loss": 1.9495, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.41206902265548706, + "learning_rate": 0.0013092407948428887, + "loss": 1.9417, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.40238043665885925, + "learning_rate": 0.001291194898688966, + "loss": 1.9467, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.4450398087501526, + "learning_rate": 0.001273397737809166, + "loss": 1.9452, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.4000122547149658, + "learning_rate": 0.001255845883765463, + "loss": 1.9376, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.5313910245895386, + "learning_rate": 0.001238535955375642, + "loss": 1.9362, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.43466663360595703, + "learning_rate": 0.0012214646180619506, + "loss": 1.9318, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.4663276672363281, + "learning_rate": 0.001204628583208727, + "loss": 1.925, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.42209333181381226, + "learning_rate": 0.0011880246075288827, + "loss": 1.9321, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.4251249134540558, + "learning_rate": 0.001171649492439115, + "loss": 1.9279, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.4062013328075409, + "learning_rate": 0.0011555000834437364, + "loss": 1.928, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.4061965048313141, + "learning_rate": 0.0011395732695269908, + "loss": 1.9243, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.4409763514995575, + "learning_rate": 0.0011238659825537505, + "loss": 1.9109, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.41895362734794617, + "learning_rate": 0.0011083751966784717, + "loss": 1.9137, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.42582714557647705, + "learning_rate": 0.0010930979277622953, + "loss": 1.9278, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.4710099995136261, + "learning_rate": 0.0010780312327981854, + "loss": 1.9272, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.4434272050857544, + "learning_rate": 0.0010631722093439888, + "loss": 1.925, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.42562806606292725, + "learning_rate": 0.00104851799496331, + "loss": 1.9119, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.4150448143482208, + "learning_rate": 0.0010340657666740914, + "loss": 1.9191, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.4621260166168213, + "learning_rate": 0.0010198127404047975, + "loss": 1.9031, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.3927471339702606, + "learning_rate": 0.0010057561704580897, + "loss": 1.9063, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.3816894292831421, + "learning_rate": 0.0009918933489818985, + "loss": 1.919, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.4114357531070709, + "learning_rate": 0.0009782216054477827, + "loss": 1.9112, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.4401596784591675, + "learning_rate": 0.0009647383061364801, + "loss": 1.9177, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.4112340211868286, + "learning_rate": 0.0009514408536305495, + "loss": 1.9135, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.42365604639053345, + "learning_rate": 0.0009383266863140042, + "loss": 1.9279, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.3973536491394043, + "learning_rate": 0.000925393277878844, + "loss": 1.9232, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.4078630805015564, + "learning_rate": 0.0009126381368383879, + "loss": 1.908, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.4189813435077667, + "learning_rate": 0.0009000588060473156, + "loss": 1.9038, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.3901522159576416, + "learning_rate": 0.0008876528622283235, + "loss": 1.9061, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.39721715450286865, + "learning_rate": 0.0008754179155053053, + "loss": 1.9038, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.5176498293876648, + "learning_rate": 0.0008633516089429683, + "loss": 1.9048, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.38577041029930115, + "learning_rate": 0.0008514516180927928, + "loss": 1.9032, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.40588951110839844, + "learning_rate": 0.0008397156505452524, + "loss": 1.8961, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.42049214243888855, + "learning_rate": 0.0008281414454882051, + "loss": 1.9051, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.4043624699115753, + "learning_rate": 0.0008167267732713704, + "loss": 1.9076, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.3989948332309723, + "learning_rate": 0.0008054694349768117, + "loss": 1.8948, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.389932781457901, + "learning_rate": 0.0007943672619953359, + "loss": 1.8984, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.4338935613632202, + "learning_rate": 0.0007834181156087356, + "loss": 1.8912, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.4143203794956207, + "learning_rate": 0.0007726198865777852, + "loss": 1.8992, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.4350084364414215, + "learning_rate": 0.0007619704947359191, + "loss": 1.8883, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.4138067662715912, + "learning_rate": 0.0007514678885885087, + "loss": 1.8949, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.36295846104621887, + "learning_rate": 0.0007411100449176633, + "loss": 1.8896, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.39966461062431335, + "learning_rate": 0.0007308949683924791, + "loss": 1.8926, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.396759569644928, + "learning_rate": 0.000720820691184658, + "loss": 1.8905, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.40356534719467163, + "learning_rate": 0.0007108852725894269, + "loss": 1.8842, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.43995505571365356, + "learning_rate": 0.000701086798651681, + "loss": 1.8876, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.39463213086128235, + "learning_rate": 0.0006914233817972798, + "loss": 1.8817, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.37957167625427246, + "learning_rate": 0.0006818931604694261, + "loss": 1.8875, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.37148046493530273, + "learning_rate": 0.0006724942987700563, + "loss": 1.8937, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.3996550440788269, + "learning_rate": 0.0006632249861061732, + "loss": 1.8938, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.3949614465236664, + "learning_rate": 0.0006540834368410549, + "loss": 1.888, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.3813718855381012, + "learning_rate": 0.0006450678899502701, + "loss": 1.8907, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.37598955631256104, + "learning_rate": 0.0006361766086824345, + "loss": 1.8861, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.39398065209388733, + "learning_rate": 0.000627407880224645, + "loss": 1.8921, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.37036997079849243, + "learning_rate": 0.0006187600153725225, + "loss": 1.8747, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.4019232392311096, + "learning_rate": 0.0006102313482048055, + "loss": 1.881, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.41694897413253784, + "learning_rate": 0.0006018202357624274, + "loss": 1.8829, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.40881526470184326, + "learning_rate": 0.0005935250577320168, + "loss": 1.8787, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.4087632894515991, + "learning_rate": 0.0005853442161337618, + "loss": 1.8696, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.37924131751060486, + "learning_rate": 0.0005772761350135759, + "loss": 1.8772, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.38739898800849915, + "learning_rate": 0.0005693192601395058, + "loss": 1.8742, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.3744981288909912, + "learning_rate": 0.000561472058702326, + "loss": 1.8695, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.3714418113231659, + "learning_rate": 0.000553733019020258, + "loss": 1.8793, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.3763202726840973, + "learning_rate": 0.0005461006502477612, + "loss": 1.8629, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.38656339049339294, + "learning_rate": 0.0005385734820883369, + "loss": 1.8729, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.36681076884269714, + "learning_rate": 0.0005311500645112907, + "loss": 1.8944, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.3906095623970032, + "learning_rate": 0.0005238289674723993, + "loss": 1.8737, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.3659605383872986, + "learning_rate": 0.0005166087806384274, + "loss": 1.8813, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.4291110336780548, + "learning_rate": 0.0005094881131154418, + "loss": 1.8803, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.39232853055000305, + "learning_rate": 0.0005024655931808696, + "loss": 1.8777, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.39790189266204834, + "learning_rate": 0.0004955398680192508, + "loss": 1.8676, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.40521377325057983, + "learning_rate": 0.000488709603461632, + "loss": 1.8615, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.41293227672576904, + "learning_rate": 0.000481973483728553, + "loss": 1.8663, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.43020930886268616, + "learning_rate": 0.0004753302111765748, + "loss": 1.8671, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.3803190588951111, + "learning_rate": 0.0004687785060483032, + "loss": 1.8765, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.3787606954574585, + "learning_rate": 0.0004623171062258558, + "loss": 1.8469, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.38570114970207214, + "learning_rate": 0.0004559447669877288, + "loss": 1.8631, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.3946021795272827, + "learning_rate": 0.00044966026076901413, + "loss": 1.864, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.4139018654823303, + "learning_rate": 0.00044346237692492177, + "loss": 1.8711, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.3821570873260498, + "learning_rate": 0.0004373499214975615, + "loss": 1.8613, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.3973364531993866, + "learning_rate": 0.0004313217169859396, + "loss": 1.8656, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.40247097611427307, + "learning_rate": 0.0004253766021191256, + "loss": 1.8697, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.36728376150131226, + "learning_rate": 0.00041951343163254497, + "loss": 1.8667, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.37559038400650024, + "learning_rate": 0.00041373107604735626, + "loss": 1.8658, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.37511202692985535, + "learning_rate": 0.0004080284214528687, + "loss": 1.8595, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.41109174489974976, + "learning_rate": 0.0004024043692919589, + "loss": 1.871, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.39778023958206177, + "learning_rate": 0.0003968578361494449, + "loss": 1.8691, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.3845960795879364, + "learning_rate": 0.000391387753543378, + "loss": 1.8756, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.3910766839981079, + "learning_rate": 0.00038599306771921023, + "loss": 1.8543, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.39525917172431946, + "learning_rate": 0.0003806727394468004, + "loss": 1.8514, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.36833056807518005, + "learning_rate": 0.0003754257438202162, + "loss": 1.8615, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.3827219605445862, + "learning_rate": 0.0003702510700602974, + "loss": 1.8718, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.388357013463974, + "learning_rate": 0.0003651477213199393, + "loss": 1.8462, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.39934271574020386, + "learning_rate": 0.000360114714492061, + "loss": 1.8507, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.41285282373428345, + "learning_rate": 0.0003551510800202195, + "loss": 1.8544, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.3969578742980957, + "learning_rate": 0.0003502558617118353, + "loss": 1.8598, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.3763035237789154, + "learning_rate": 0.0003454281165539914, + "loss": 1.8752, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.4243927597999573, + "learning_rate": 0.00034066691453177176, + "loss": 1.8671, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.3984666168689728, + "learning_rate": 0.0003359713384491037, + "loss": 1.8618, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.39229220151901245, + "learning_rate": 0.00033134048375206944, + "loss": 1.8598, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.359428733587265, + "learning_rate": 0.0003267734583546536, + "loss": 1.8541, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.3993035852909088, + "learning_rate": 0.00032226938246689157, + "loss": 1.8583, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.39195823669433594, + "learning_rate": 0.0003178273884253874, + "loss": 1.855, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.37241217494010925, + "learning_rate": 0.0003134466205261674, + "loss": 1.8653, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.3893034756183624, + "learning_rate": 0.0003091262348598378, + "loss": 1.8726, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.3817334473133087, + "learning_rate": 0.0003048653991490141, + "loss": 1.8498, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.3646955192089081, + "learning_rate": 0.00030066329258799187, + "loss": 1.8465, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.4202854335308075, + "learning_rate": 0.0002965191056846266, + "loss": 1.8541, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.3694995641708374, + "learning_rate": 0.000292432040104394, + "loss": 1.8509, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.35707733035087585, + "learning_rate": 0.00028840130851659853, + "loss": 1.8467, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.38870304822921753, + "learning_rate": 0.0002844261344427028, + "loss": 1.8572, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.3943558633327484, + "learning_rate": 0.0002805057521067471, + "loss": 1.8527, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.3791944980621338, + "learning_rate": 0.00027663940628783017, + "loss": 1.8454, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.37139755487442017, + "learning_rate": 0.00027282635217462393, + "loss": 1.8519, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.3750312030315399, + "learning_rate": 0.0002690658552218937, + "loss": 1.8623, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.3591516315937042, + "learning_rate": 0.00026535719100899516, + "loss": 1.8381, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.3792029321193695, + "learning_rate": 0.00026169964510032245, + "loss": 1.8462, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.3520404100418091, + "learning_rate": 0.00025809251290767984, + "loss": 1.8338, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.3666081130504608, + "learning_rate": 0.00025453509955454957, + "loss": 1.8392, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.38259756565093994, + "learning_rate": 0.00025102671974223175, + "loss": 1.8421, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.3695935010910034, + "learning_rate": 0.00024756669761782815, + "loss": 1.8536, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.36817803978919983, + "learning_rate": 0.0002441543666440464, + "loss": 1.8405, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.38346537947654724, + "learning_rate": 0.00024078906947079878, + "loss": 1.844, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.37116846442222595, + "learning_rate": 0.00023747015780857005, + "loss": 1.8563, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.37044385075569153, + "learning_rate": 0.00023419699230353144, + "loss": 1.85, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.38595202565193176, + "learning_rate": 0.00023096894241437586, + "loss": 1.8571, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.36819988489151, + "learning_rate": 0.00022778538629085056, + "loss": 1.8393, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.36314669251441956, + "learning_rate": 0.00022464571065396427, + "loss": 1.8443, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.39035850763320923, + "learning_rate": 0.00022154931067784521, + "loss": 1.8413, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.3708403408527374, + "learning_rate": 0.00021849558987322782, + "loss": 1.8374, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.38795605301856995, + "learning_rate": 0.0002154839599725452, + "loss": 1.837, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.38620778918266296, + "learning_rate": 0.00021251384081660544, + "loss": 1.8519, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.36514854431152344, + "learning_rate": 0.0002095846602428303, + "loss": 1.8473, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.3578909635543823, + "learning_rate": 0.00020669585397503358, + "loss": 1.8404, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.37365859746932983, + "learning_rate": 0.0002038468655147195, + "loss": 1.8403, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.38331690430641174, + "learning_rate": 0.00020103714603387894, + "loss": 1.8533, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.3489489257335663, + "learning_rate": 0.00019826615426926338, + "loss": 1.8245, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.3655901253223419, + "learning_rate": 0.00019553335641811625, + "loss": 1.8473, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.38881802558898926, + "learning_rate": 0.0001928382260353415, + "loss": 1.8409, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.3771028518676758, + "learning_rate": 0.00019018024393208902, + "loss": 1.8537, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.38920238614082336, + "learning_rate": 0.00018755889807573872, + "loss": 1.8341, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.35742661356925964, + "learning_rate": 0.00018497368349126262, + "loss": 1.8412, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.4079872667789459, + "learning_rate": 0.00018242410216394648, + "loss": 1.8581, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.37757664918899536, + "learning_rate": 0.0001799096629434529, + "loss": 1.8299, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.36545130610466003, + "learning_rate": 0.00017742988144920578, + "loss": 1.84, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.38178738951683044, + "learning_rate": 0.00017498427997707976, + "loss": 1.8369, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.36409515142440796, + "learning_rate": 0.00017257238740737548, + "loss": 1.8373, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.39177459478378296, + "learning_rate": 0.00017019373911406307, + "loss": 1.8498, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.3647187650203705, + "learning_rate": 0.000167847876875277, + "loss": 1.8464, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.39089101552963257, + "learning_rate": 0.00016553434878504428, + "loss": 1.8306, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.38606345653533936, + "learning_rate": 0.00016325270916622947, + "loss": 1.8328, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.36788392066955566, + "learning_rate": 0.00016100251848467966, + "loss": 1.8427, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.35645923018455505, + "learning_rate": 0.0001587833432645528, + "loss": 1.8318, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.3621348738670349, + "learning_rate": 0.00015659475600481292, + "loss": 1.8535, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.35121673345565796, + "learning_rate": 0.00015443633509687688, + "loss": 1.8381, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.3815164566040039, + "learning_rate": 0.00015230766474339536, + "loss": 1.836, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.37245428562164307, + "learning_rate": 0.00015020833487815416, + "loss": 1.8467, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.3836027979850769, + "learning_rate": 0.0001481379410870792, + "loss": 1.8344, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.3576463460922241, + "learning_rate": 0.00014609608453033013, + "loss": 1.8205, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.35346224904060364, + "learning_rate": 0.00014408237186546807, + "loss": 1.8307, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.378290593624115, + "learning_rate": 0.00014209641517168273, + "loss": 1.8216, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.35224640369415283, + "learning_rate": 0.00014013783187506265, + "loss": 1.8318, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.37513265013694763, + "learning_rate": 0.00013820624467489697, + "loss": 1.85, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.35514310002326965, + "learning_rate": 0.00013630128147099213, + "loss": 1.8478, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.3997390568256378, + "learning_rate": 0.00013442257529199068, + "loss": 1.83, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.3484932780265808, + "learning_rate": 0.00013256976422467803, + "loss": 1.8357, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.35105887055397034, + "learning_rate": 0.00013074249134426366, + "loss": 1.8379, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.3573038876056671, + "learning_rate": 0.0001289404046456233, + "loss": 1.8476, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.36516422033309937, + "learning_rate": 0.0001271631569754887, + "loss": 1.8416, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.3589839041233063, + "learning_rate": 0.0001254104059655723, + "loss": 1.8402, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.3751431703567505, + "learning_rate": 0.00012368181396661337, + "loss": 1.8229, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.362739622592926, + "learning_rate": 0.00012197704798333364, + "loss": 1.8323, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.36905282735824585, + "learning_rate": 0.00012029577961028894, + "loss": 1.8251, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.3701053559780121, + "learning_rate": 0.00011863768496860542, + "loss": 1.8409, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.3674405813217163, + "learning_rate": 0.00011700244464358777, + "loss": 1.8328, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.36590641736984253, + "learning_rate": 0.00011538974362318715, + "loss": 1.8391, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.3725239932537079, + "learning_rate": 0.00011379927123731737, + "loss": 1.8305, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.37020644545555115, + "learning_rate": 0.0001122307210980077, + "loss": 1.829, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.3549184799194336, + "learning_rate": 0.00011068379104038026, + "loss": 1.8424, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.3790867030620575, + "learning_rate": 0.00010915818306444116, + "loss": 1.8258, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.3635125160217285, + "learning_rate": 0.00010765360327767384, + "loss": 1.8301, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.3487062454223633, + "learning_rate": 0.00010616976183842376, + "loss": 1.8365, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.35935455560684204, + "learning_rate": 0.00010470637290006365, + "loss": 1.8393, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.34557873010635376, + "learning_rate": 0.00010326315455592764, + "loss": 1.8289, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.359907865524292, + "learning_rate": 0.0001018398287850053, + "loss": 1.8204, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.38513684272766113, + "learning_rate": 0.00010043612139838357, + "loss": 1.8426, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.3592938184738159, + "learning_rate": 9.905176198642719e-05, + "loss": 1.8298, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.37656906247138977, + "learning_rate": 9.76864838666871e-05, + "loss": 1.8326, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.3696509301662445, + "learning_rate": 9.634002403252676e-05, + "loss": 1.8277, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.36902931332588196, + "learning_rate": 9.501212310245681e-05, + "loss": 1.8259, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.35556498169898987, + "learning_rate": 9.370252527016777e-05, + "loss": 1.8357, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.3559355139732361, + "learning_rate": 9.241097825525163e-05, + "loss": 1.8289, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.37157970666885376, + "learning_rate": 9.113723325460276e-05, + "loss": 1.8367, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.355340838432312, + "learning_rate": 8.988104489448849e-05, + "loss": 1.8299, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.3648318946361542, + "learning_rate": 8.864217118328042e-05, + "loss": 1.8382, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.3711951971054077, + "learning_rate": 8.742037346483729e-05, + "loss": 1.8308, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.3465905785560608, + "learning_rate": 8.62154163725303e-05, + "loss": 1.84, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.3531224727630615, + "learning_rate": 8.502706778390219e-05, + "loss": 1.8361, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.3585963547229767, + "learning_rate": 8.38550987759513e-05, + "loss": 1.8324, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.3620217740535736, + "learning_rate": 8.269928358103191e-05, + "loss": 1.8472, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.36264878511428833, + "learning_rate": 8.155939954336243e-05, + "loss": 1.8382, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.3570832908153534, + "learning_rate": 8.043522707613312e-05, + "loss": 1.8293, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.3508296608924866, + "learning_rate": 7.932654961920486e-05, + "loss": 1.8209, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.3548789620399475, + "learning_rate": 7.823315359739135e-05, + "loss": 1.818, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.350575715303421, + "learning_rate": 7.715482837931577e-05, + "loss": 1.8457, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.34618890285491943, + "learning_rate": 7.6091366236835e-05, + "loss": 1.8207, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.36400192975997925, + "learning_rate": 7.504256230502289e-05, + "loss": 1.8395, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.3459380269050598, + "learning_rate": 7.400821454270524e-05, + "loss": 1.8339, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.34697189927101135, + "learning_rate": 7.29881236935386e-05, + "loss": 1.8194, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.3628111779689789, + "learning_rate": 7.198209324762562e-05, + "loss": 1.8248, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.3612930178642273, + "learning_rate": 7.098992940365946e-05, + "loss": 1.8225, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.37480759620666504, + "learning_rate": 7.001144103159e-05, + "loss": 1.832, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.3713092505931854, + "learning_rate": 6.904643963580461e-05, + "loss": 1.8362, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.36273178458213806, + "learning_rate": 6.809473931881644e-05, + "loss": 1.8276, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.35557547211647034, + "learning_rate": 6.71561567454532e-05, + "loss": 1.8295, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.35513368248939514, + "learning_rate": 6.623051110753948e-05, + "loss": 1.8483, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.35310035943984985, + "learning_rate": 6.531762408906607e-05, + "loss": 1.8331, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.36005258560180664, + "learning_rate": 6.441731983183912e-05, + "loss": 1.8305, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.356106162071228, + "learning_rate": 6.352942490160292e-05, + "loss": 1.8234, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.37876901030540466, + "learning_rate": 6.265376825462966e-05, + "loss": 1.8346, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.3676668405532837, + "learning_rate": 6.179018120476945e-05, + "loss": 1.8302, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.3509625792503357, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.8309, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.3570344150066376, + "learning_rate": 6.009855274515339e-05, + "loss": 1.8234, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.3523993194103241, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.833, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.35637491941452026, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.8195, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.34806060791015625, + "learning_rate": 5.764754687033678e-05, + "loss": 1.8327, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.3617207705974579, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.8257, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.3532322645187378, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.8388, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.35625216364860535, + "learning_rate": 5.529650063720842e-05, + "loss": 1.8383, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.3520863652229309, + "learning_rate": 5.453432234876445e-05, + "loss": 1.8293, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.34727564454078674, + "learning_rate": 5.37826495305886e-05, + "loss": 1.8185, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.35971036553382874, + "learning_rate": 5.304133738072674e-05, + "loss": 1.8376, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.3352327346801758, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.831, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.3646147549152374, + "learning_rate": 5.158922582999368e-05, + "loss": 1.8354, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.33844876289367676, + "learning_rate": 5.087814669492819e-05, + "loss": 1.8275, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.3586468994617462, + "learning_rate": 5.017686870590028e-05, + "loss": 1.8258, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.36400601267814636, + "learning_rate": 4.948525676899577e-05, + "loss": 1.8222, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.35185590386390686, + "learning_rate": 4.880317765236493e-05, + "loss": 1.834, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.36754703521728516, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.8111, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.3563450872898102, + "learning_rate": 4.746709410920699e-05, + "loss": 1.8212, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.35486289858818054, + "learning_rate": 4.681283230007507e-05, + "loss": 1.8219, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.35444745421409607, + "learning_rate": 4.616758849642509e-05, + "loss": 1.8267, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.35601454973220825, + "learning_rate": 4.553123839874615e-05, + "loss": 1.8359, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.35390567779541016, + "learning_rate": 4.490365942080736e-05, + "loss": 1.8325, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.3538806736469269, + "learning_rate": 4.428473066604285e-05, + "loss": 1.8294, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.3609935939311981, + "learning_rate": 4.367433290426233e-05, + "loss": 1.8301, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.3562791049480438, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.8191, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.35799530148506165, + "learning_rate": 4.247866163327575e-05, + "loss": 1.8328, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.35550785064697266, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.8192, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.354793906211853, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.8205, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.35129454731941223, + "learning_rate": 4.074624971216005e-05, + "loss": 1.8088, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.34905755519866943, + "learning_rate": 4.018462453681889e-05, + "loss": 1.8224, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.3482454717159271, + "learning_rate": 3.963074051164014e-05, + "loss": 1.8312, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.3517247438430786, + "learning_rate": 3.908449093662446e-05, + "loss": 1.8254, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.3577110171318054, + "learning_rate": 3.854577058246998e-05, + "loss": 1.8213, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.3506202697753906, + "learning_rate": 3.801447567030094e-05, + "loss": 1.8372, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.349944531917572, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.8253, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.3520268201828003, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.8422, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.3507753610610962, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.8241, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.35455137491226196, + "learning_rate": 3.596152451701616e-05, + "loss": 1.8224, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.3520313501358032, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.8255, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.346407413482666, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.8293, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.3564207851886749, + "learning_rate": 3.449490171442838e-05, + "loss": 1.8308, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.3574276566505432, + "learning_rate": 3.401944187798702e-05, + "loss": 1.8308, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.3549087643623352, + "learning_rate": 3.355053553336137e-05, + "loss": 1.821, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.37319085001945496, + "learning_rate": 3.308809235061882e-05, + "loss": 1.8203, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.34253865480422974, + "learning_rate": 3.263202324488772e-05, + "loss": 1.8236, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.3503594696521759, + "learning_rate": 3.218224035919609e-05, + "loss": 1.8276, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.35104867815971375, + "learning_rate": 3.173865704754688e-05, + "loss": 1.8252, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.34539034962654114, + "learning_rate": 3.130118785822657e-05, + "loss": 1.8284, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.3551845848560333, + "learning_rate": 3.08697485173437e-05, + "loss": 1.8309, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.3424545228481293, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.8306, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.35456904768943787, + "learning_rate": 3.002462807725185e-05, + "loss": 1.8216, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.35064905881881714, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.8312, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.34413984417915344, + "learning_rate": 2.920264448124087e-05, + "loss": 1.8245, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.34440872073173523, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.8232, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.366588830947876, + "learning_rate": 2.84031643124288e-05, + "loss": 1.8241, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.3599245250225067, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.8427, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.34534797072410583, + "learning_rate": 2.762557149497405e-05, + "loss": 1.8167, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.34453675150871277, + "learning_rate": 2.724479494389592e-05, + "loss": 1.8323, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.35049787163734436, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.8328, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.34922167658805847, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.8298, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.34359559416770935, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.8293, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.3608853816986084, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.8354, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.3448154628276825, + "learning_rate": 2.541820662891541e-05, + "loss": 1.8088, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.35206976532936096, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.8256, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.3525342047214508, + "learning_rate": 2.472233293365335e-05, + "loss": 1.8293, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.3429815173149109, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.8172, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.34919753670692444, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.8359, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.3607841432094574, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.8267, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.3457028269767761, + "learning_rate": 2.338721674401494e-05, + "loss": 1.8331, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.3522280752658844, + "learning_rate": 2.30648594768459e-05, + "loss": 1.8313, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.3416561186313629, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.827, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.35568854212760925, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.8216, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.3549978733062744, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.8219, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.3559131920337677, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.8254, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.3483375906944275, + "learning_rate": 2.151850895764285e-05, + "loss": 1.8242, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.3538123667240143, + "learning_rate": 2.12219089895037e-05, + "loss": 1.8114, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.3543187975883484, + "learning_rate": 2.092939720151092e-05, + "loss": 1.8096, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.34672802686691284, + "learning_rate": 2.064091724430947e-05, + "loss": 1.8195, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.3511864244937897, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.8189, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.33919981122016907, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.8304, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.34800052642822266, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.8168, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.35135072469711304, + "learning_rate": 1.952621569669175e-05, + "loss": 1.8247, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.35404956340789795, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.8213, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.3485180735588074, + "learning_rate": 1.899164691024229e-05, + "loss": 1.8222, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.35099634528160095, + "learning_rate": 1.872987589815331e-05, + "loss": 1.8157, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.3508138060569763, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.8287, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.3524887263774872, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.8299, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.34672585129737854, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.8415, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.35972487926483154, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.8344, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.3603551685810089, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.8302, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.35453343391418457, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.8158, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.3581453561782837, + "learning_rate": 1.699576850233916e-05, + "loss": 1.8205, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.34704574942588806, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.8312, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.353828489780426, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.8063, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.3566914498806, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.826, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.3434826731681824, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.8289, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.34118619561195374, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.8211, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.35303235054016113, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.8344, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.35012632608413696, + "learning_rate": 1.542221360973268e-05, + "loss": 1.8168, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.3457571864128113, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.832, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.0240792036056519, + "learning_rate": 1.5e-05, + "loss": 1.819, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.8391618477891584e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-gpt2/checkpoint-9480/training_args.bin b/saves-gpt2/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..008ec1e6ff7e40f74546e9c477aa6ea91783219c --- /dev/null +++ b/saves-gpt2/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9320d6629c2881494b4f3623d708703ac62872913867bd6e9f102178e33ff2 +size 5112 diff --git a/saves-gpt2/config.json b/saves-gpt2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c41b5987863e5a36912ae874f26bce471afeca9f --- /dev/null +++ b/saves-gpt2/config.json @@ -0,0 +1,34 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "hidden_act": "gelu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_embd": 256, + "n_head": 8, + "n_inner": null, + "n_layer": 2, + "n_positions": 1024, + "num_key_value_heads": 8, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gpt2/generation_config.json b/saves-gpt2/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-gpt2/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-gpt2/model.safetensors b/saves-gpt2/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6214ce7d099d05bef8f1f25dd07b28bd6c81be6 --- /dev/null +++ b/saves-gpt2/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ed12a44fd33ff3f052a87e7cb6d4d67dc97b33b100593f59f8c8816ee4515f +size 9419432 diff --git a/saves-gpt2/result.log b/saves-gpt2/result.log new file mode 100644 index 0000000000000000000000000000000000000000..da13feaafa64acc2ff5b0b2d76cd6afc2755d778 --- /dev/null +++ b/saves-gpt2/result.log @@ -0,0 +1 @@ +{'train_runtime': 2686.7098, 'train_samples_per_second': 3612.822, 'train_steps_per_second': 3.528, 'train_loss': 2.11005329763839, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-gpt2/special_tokens_map.json b/saves-gpt2/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gpt2/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gpt2/tokenizer.json b/saves-gpt2/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gpt2/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gpt2/tokenizer_config.json b/saves-gpt2/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gpt2/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gpt_neox-cosine/checkpoint-9480/config.json b/saves-gpt_neox-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c216fcd1855cc1122a6b9a742b143fdcac99903d --- /dev/null +++ b/saves-gpt_neox-cosine/checkpoint-9480/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "GPTNeoXForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "bos_token_id": 0, + "classifier_dropout": 0.1, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 2048, + "model_type": "gpt_neox", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "rope_scaling": null, + "rotary_emb_base": 10000, + "rotary_pct": 0.25, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_parallel_residual": true, + "vocab_size": 2000 +} diff --git a/saves-gpt_neox-cosine/checkpoint-9480/generation_config.json b/saves-gpt_neox-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3d6e313c9ea91dde2131852f3f2423673d6a38e --- /dev/null +++ b/saves-gpt_neox-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-gpt_neox-cosine/checkpoint-9480/model.safetensors b/saves-gpt_neox-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d39cdd37c18d20ed772dd4e241c61eb243408e90 --- /dev/null +++ b/saves-gpt_neox-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:910ad63d2c858a23713e80e323383d386e45356521438c8ea9b7d75a28c5e2f7 +size 8371104 diff --git a/saves-gpt_neox-cosine/checkpoint-9480/optimizer.pt b/saves-gpt_neox-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..647b525adb901b704655dbefac74f7adb7d1122b --- /dev/null +++ b/saves-gpt_neox-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecd404539a64c0b012e0cbd9706331e560625059c9e1fab5c14f31659b2d2508 +size 16759309 diff --git a/saves-gpt_neox-cosine/checkpoint-9480/rng_state.pth b/saves-gpt_neox-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-gpt_neox-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-gpt_neox-cosine/checkpoint-9480/scheduler.pt b/saves-gpt_neox-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03c145297021546d40e130546440641e02059bcb --- /dev/null +++ b/saves-gpt_neox-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fd617624c087e1a286ed7cf3fa38baa4a8815e49f107c3186b4c7c58e1adbb +size 1064 diff --git a/saves-gpt_neox-cosine/checkpoint-9480/special_tokens_map.json b/saves-gpt_neox-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gpt_neox-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gpt_neox-cosine/checkpoint-9480/tokenizer.json b/saves-gpt_neox-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gpt_neox-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gpt_neox-cosine/checkpoint-9480/tokenizer_config.json b/saves-gpt_neox-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gpt_neox-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gpt_neox-cosine/checkpoint-9480/trainer_state.json b/saves-gpt_neox-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ebf0034951b2db48d6149b5427ead826317071ef --- /dev/null +++ b/saves-gpt_neox-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,66393 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00010548523206751055, + "grad_norm": 1.9368362426757812, + "learning_rate": 1.5789473684210526e-05, + "loss": 7.6185, + "step": 1 + }, + { + "epoch": 0.0002109704641350211, + "grad_norm": 1.9203377962112427, + "learning_rate": 3.157894736842105e-05, + "loss": 7.6183, + "step": 2 + }, + { + "epoch": 0.00031645569620253165, + "grad_norm": 1.9085681438446045, + "learning_rate": 4.736842105263158e-05, + "loss": 7.598, + "step": 3 + }, + { + "epoch": 0.0004219409282700422, + "grad_norm": 1.9005531072616577, + "learning_rate": 6.31578947368421e-05, + "loss": 7.5567, + "step": 4 + }, + { + "epoch": 0.0005274261603375527, + "grad_norm": 1.8713489770889282, + "learning_rate": 7.894736842105263e-05, + "loss": 7.496, + "step": 5 + }, + { + "epoch": 0.0006329113924050633, + "grad_norm": 1.6959880590438843, + "learning_rate": 9.473684210526316e-05, + "loss": 7.4303, + "step": 6 + }, + { + "epoch": 0.0007383966244725738, + "grad_norm": 1.5679640769958496, + "learning_rate": 0.00011052631578947368, + "loss": 7.3473, + "step": 7 + }, + { + "epoch": 0.0008438818565400844, + "grad_norm": 1.4077574014663696, + "learning_rate": 0.0001263157894736842, + "loss": 7.2639, + "step": 8 + }, + { + "epoch": 0.0009493670886075949, + "grad_norm": 1.313571810722351, + "learning_rate": 0.00014210526315789474, + "loss": 7.1786, + "step": 9 + }, + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2082715034484863, + "learning_rate": 0.00015789473684210527, + "loss": 7.1033, + "step": 10 + }, + { + "epoch": 0.001160337552742616, + "grad_norm": 1.173341989517212, + "learning_rate": 0.0001736842105263158, + "loss": 7.0207, + "step": 11 + }, + { + "epoch": 0.0012658227848101266, + "grad_norm": 1.1311299800872803, + "learning_rate": 0.00018947368421052632, + "loss": 6.9529, + "step": 12 + }, + { + "epoch": 0.0013713080168776372, + "grad_norm": 1.1174957752227783, + "learning_rate": 0.00020526315789473685, + "loss": 6.8865, + "step": 13 + }, + { + "epoch": 0.0014767932489451476, + "grad_norm": 1.121137261390686, + "learning_rate": 0.00022105263157894735, + "loss": 6.8225, + "step": 14 + }, + { + "epoch": 0.0015822784810126582, + "grad_norm": 1.110561728477478, + "learning_rate": 0.00023684210526315788, + "loss": 6.7671, + "step": 15 + }, + { + "epoch": 0.0016877637130801688, + "grad_norm": 1.1065150499343872, + "learning_rate": 0.0002526315789473684, + "loss": 6.7163, + "step": 16 + }, + { + "epoch": 0.0017932489451476794, + "grad_norm": 1.1164034605026245, + "learning_rate": 0.00026842105263157897, + "loss": 6.6395, + "step": 17 + }, + { + "epoch": 0.0018987341772151898, + "grad_norm": 1.104793667793274, + "learning_rate": 0.00028421052631578947, + "loss": 6.5775, + "step": 18 + }, + { + "epoch": 0.0020042194092827004, + "grad_norm": 1.099815845489502, + "learning_rate": 0.00030000000000000003, + "loss": 6.5045, + "step": 19 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.069320559501648, + "learning_rate": 0.00031578947368421053, + "loss": 6.4512, + "step": 20 + }, + { + "epoch": 0.0022151898734177216, + "grad_norm": 1.0516937971115112, + "learning_rate": 0.00033157894736842103, + "loss": 6.3907, + "step": 21 + }, + { + "epoch": 0.002320675105485232, + "grad_norm": 1.0511863231658936, + "learning_rate": 0.0003473684210526316, + "loss": 6.3238, + "step": 22 + }, + { + "epoch": 0.002426160337552743, + "grad_norm": 1.0420417785644531, + "learning_rate": 0.0003631578947368421, + "loss": 6.2553, + "step": 23 + }, + { + "epoch": 0.002531645569620253, + "grad_norm": 1.0283982753753662, + "learning_rate": 0.00037894736842105265, + "loss": 6.1892, + "step": 24 + }, + { + "epoch": 0.0026371308016877636, + "grad_norm": 0.9922574758529663, + "learning_rate": 0.00039473684210526315, + "loss": 6.1389, + "step": 25 + }, + { + "epoch": 0.0027426160337552744, + "grad_norm": 0.9907295107841492, + "learning_rate": 0.0004105263157894737, + "loss": 6.0645, + "step": 26 + }, + { + "epoch": 0.002848101265822785, + "grad_norm": 0.9410306215286255, + "learning_rate": 0.0004263157894736842, + "loss": 6.0315, + "step": 27 + }, + { + "epoch": 0.002953586497890295, + "grad_norm": 0.9313958287239075, + "learning_rate": 0.0004421052631578947, + "loss": 5.9485, + "step": 28 + }, + { + "epoch": 0.003059071729957806, + "grad_norm": 0.9102941155433655, + "learning_rate": 0.00045789473684210527, + "loss": 5.8974, + "step": 29 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8673009276390076, + "learning_rate": 0.00047368421052631577, + "loss": 5.8581, + "step": 30 + }, + { + "epoch": 0.003270042194092827, + "grad_norm": 0.849884569644928, + "learning_rate": 0.0004894736842105264, + "loss": 5.7911, + "step": 31 + }, + { + "epoch": 0.0033755274261603376, + "grad_norm": 0.8369331359863281, + "learning_rate": 0.0005052631578947368, + "loss": 5.7228, + "step": 32 + }, + { + "epoch": 0.003481012658227848, + "grad_norm": 0.8185799717903137, + "learning_rate": 0.0005210526315789474, + "loss": 5.656, + "step": 33 + }, + { + "epoch": 0.003586497890295359, + "grad_norm": 0.7757487893104553, + "learning_rate": 0.0005368421052631579, + "loss": 5.6248, + "step": 34 + }, + { + "epoch": 0.003691983122362869, + "grad_norm": 0.7754783034324646, + "learning_rate": 0.0005526315789473684, + "loss": 5.5338, + "step": 35 + }, + { + "epoch": 0.0037974683544303796, + "grad_norm": 0.72757488489151, + "learning_rate": 0.0005684210526315789, + "loss": 5.506, + "step": 36 + }, + { + "epoch": 0.0039029535864978904, + "grad_norm": 0.7032455801963806, + "learning_rate": 0.0005842105263157895, + "loss": 5.4351, + "step": 37 + }, + { + "epoch": 0.004008438818565401, + "grad_norm": 0.6902341246604919, + "learning_rate": 0.0006000000000000001, + "loss": 5.4047, + "step": 38 + }, + { + "epoch": 0.004113924050632912, + "grad_norm": 0.6818068623542786, + "learning_rate": 0.0006157894736842105, + "loss": 5.3158, + "step": 39 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.6257638335227966, + "learning_rate": 0.0006315789473684211, + "loss": 5.2886, + "step": 40 + }, + { + "epoch": 0.004324894514767932, + "grad_norm": 0.5825420022010803, + "learning_rate": 0.0006473684210526316, + "loss": 5.244, + "step": 41 + }, + { + "epoch": 0.004430379746835443, + "grad_norm": 0.6056625843048096, + "learning_rate": 0.0006631578947368421, + "loss": 5.2016, + "step": 42 + }, + { + "epoch": 0.004535864978902953, + "grad_norm": 0.7627775073051453, + "learning_rate": 0.0006789473684210526, + "loss": 5.1209, + "step": 43 + }, + { + "epoch": 0.004641350210970464, + "grad_norm": 0.7250681519508362, + "learning_rate": 0.0006947368421052632, + "loss": 5.1268, + "step": 44 + }, + { + "epoch": 0.004746835443037975, + "grad_norm": 0.5275149345397949, + "learning_rate": 0.0007105263157894736, + "loss": 5.0318, + "step": 45 + }, + { + "epoch": 0.004852320675105486, + "grad_norm": 0.5834232568740845, + "learning_rate": 0.0007263157894736842, + "loss": 4.9784, + "step": 46 + }, + { + "epoch": 0.004957805907172996, + "grad_norm": 0.6878517270088196, + "learning_rate": 0.0007421052631578947, + "loss": 4.9591, + "step": 47 + }, + { + "epoch": 0.005063291139240506, + "grad_norm": 0.43467026948928833, + "learning_rate": 0.0007578947368421053, + "loss": 4.9035, + "step": 48 + }, + { + "epoch": 0.005168776371308017, + "grad_norm": 0.6768479347229004, + "learning_rate": 0.0007736842105263159, + "loss": 4.8427, + "step": 49 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.6463989019393921, + "learning_rate": 0.0007894736842105263, + "loss": 4.8281, + "step": 50 + }, + { + "epoch": 0.005379746835443038, + "grad_norm": 0.3977185785770416, + "learning_rate": 0.0008052631578947369, + "loss": 4.7614, + "step": 51 + }, + { + "epoch": 0.005485232067510549, + "grad_norm": 0.7852441668510437, + "learning_rate": 0.0008210526315789474, + "loss": 4.7228, + "step": 52 + }, + { + "epoch": 0.005590717299578059, + "grad_norm": 0.47292619943618774, + "learning_rate": 0.0008368421052631579, + "loss": 4.6839, + "step": 53 + }, + { + "epoch": 0.00569620253164557, + "grad_norm": 0.45629453659057617, + "learning_rate": 0.0008526315789473684, + "loss": 4.6176, + "step": 54 + }, + { + "epoch": 0.0058016877637130804, + "grad_norm": 0.4455784559249878, + "learning_rate": 0.000868421052631579, + "loss": 4.5998, + "step": 55 + }, + { + "epoch": 0.00590717299578059, + "grad_norm": 0.3865415155887604, + "learning_rate": 0.0008842105263157894, + "loss": 4.543, + "step": 56 + }, + { + "epoch": 0.006012658227848101, + "grad_norm": 0.3908674716949463, + "learning_rate": 0.0009, + "loss": 4.5428, + "step": 57 + }, + { + "epoch": 0.006118143459915612, + "grad_norm": 0.40482595562934875, + "learning_rate": 0.0009157894736842105, + "loss": 4.5245, + "step": 58 + }, + { + "epoch": 0.006223628691983122, + "grad_norm": 0.35693830251693726, + "learning_rate": 0.0009315789473684211, + "loss": 4.469, + "step": 59 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.3280750811100006, + "learning_rate": 0.0009473684210526315, + "loss": 4.4237, + "step": 60 + }, + { + "epoch": 0.006434599156118144, + "grad_norm": 0.3060671389102936, + "learning_rate": 0.0009631578947368421, + "loss": 4.4165, + "step": 61 + }, + { + "epoch": 0.006540084388185654, + "grad_norm": 0.3245700001716614, + "learning_rate": 0.0009789473684210528, + "loss": 4.3653, + "step": 62 + }, + { + "epoch": 0.006645569620253164, + "grad_norm": 0.36110660433769226, + "learning_rate": 0.000994736842105263, + "loss": 4.3533, + "step": 63 + }, + { + "epoch": 0.006751054852320675, + "grad_norm": 0.4939956068992615, + "learning_rate": 0.0010105263157894737, + "loss": 4.3156, + "step": 64 + }, + { + "epoch": 0.006856540084388186, + "grad_norm": 0.6152052879333496, + "learning_rate": 0.0010263157894736842, + "loss": 4.316, + "step": 65 + }, + { + "epoch": 0.006962025316455696, + "grad_norm": 0.543520987033844, + "learning_rate": 0.0010421052631578948, + "loss": 4.2648, + "step": 66 + }, + { + "epoch": 0.007067510548523207, + "grad_norm": 0.6042988896369934, + "learning_rate": 0.0010578947368421053, + "loss": 4.2677, + "step": 67 + }, + { + "epoch": 0.007172995780590718, + "grad_norm": 0.7987458109855652, + "learning_rate": 0.0010736842105263159, + "loss": 4.2592, + "step": 68 + }, + { + "epoch": 0.007278481012658228, + "grad_norm": 0.5289169549942017, + "learning_rate": 0.0010894736842105264, + "loss": 4.1807, + "step": 69 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.5147881507873535, + "learning_rate": 0.0011052631578947368, + "loss": 4.1736, + "step": 70 + }, + { + "epoch": 0.007489451476793249, + "grad_norm": 0.44606542587280273, + "learning_rate": 0.0011210526315789473, + "loss": 4.195, + "step": 71 + }, + { + "epoch": 0.007594936708860759, + "grad_norm": 0.39063218235969543, + "learning_rate": 0.0011368421052631579, + "loss": 4.1535, + "step": 72 + }, + { + "epoch": 0.00770042194092827, + "grad_norm": 0.5850939750671387, + "learning_rate": 0.0011526315789473684, + "loss": 4.1324, + "step": 73 + }, + { + "epoch": 0.007805907172995781, + "grad_norm": 0.4907500743865967, + "learning_rate": 0.001168421052631579, + "loss": 4.1183, + "step": 74 + }, + { + "epoch": 0.007911392405063292, + "grad_norm": 0.4352447986602783, + "learning_rate": 0.0011842105263157896, + "loss": 4.0908, + "step": 75 + }, + { + "epoch": 0.008016877637130802, + "grad_norm": 0.5259093046188354, + "learning_rate": 0.0012000000000000001, + "loss": 4.0869, + "step": 76 + }, + { + "epoch": 0.008122362869198312, + "grad_norm": 0.5635294914245605, + "learning_rate": 0.0012157894736842105, + "loss": 4.0735, + "step": 77 + }, + { + "epoch": 0.008227848101265823, + "grad_norm": 0.624469518661499, + "learning_rate": 0.001231578947368421, + "loss": 4.0608, + "step": 78 + }, + { + "epoch": 0.008333333333333333, + "grad_norm": 0.6659975647926331, + "learning_rate": 0.0012473684210526316, + "loss": 4.0377, + "step": 79 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.8619318008422852, + "learning_rate": 0.0012631578947368421, + "loss": 4.0413, + "step": 80 + }, + { + "epoch": 0.008544303797468355, + "grad_norm": 1.0287429094314575, + "learning_rate": 0.0012789473684210527, + "loss": 4.0164, + "step": 81 + }, + { + "epoch": 0.008649789029535865, + "grad_norm": 0.9093461036682129, + "learning_rate": 0.0012947368421052632, + "loss": 4.0172, + "step": 82 + }, + { + "epoch": 0.008755274261603375, + "grad_norm": 0.6640251278877258, + "learning_rate": 0.0013105263157894738, + "loss": 3.9771, + "step": 83 + }, + { + "epoch": 0.008860759493670886, + "grad_norm": 0.619200587272644, + "learning_rate": 0.0013263157894736841, + "loss": 3.9598, + "step": 84 + }, + { + "epoch": 0.008966244725738396, + "grad_norm": 0.5427945852279663, + "learning_rate": 0.0013421052631578947, + "loss": 3.9404, + "step": 85 + }, + { + "epoch": 0.009071729957805906, + "grad_norm": 0.6646769046783447, + "learning_rate": 0.0013578947368421052, + "loss": 3.9648, + "step": 86 + }, + { + "epoch": 0.009177215189873418, + "grad_norm": 0.7677651047706604, + "learning_rate": 0.0013736842105263158, + "loss": 3.9673, + "step": 87 + }, + { + "epoch": 0.009282700421940928, + "grad_norm": 0.7616223692893982, + "learning_rate": 0.0013894736842105264, + "loss": 3.8929, + "step": 88 + }, + { + "epoch": 0.009388185654008438, + "grad_norm": 0.654658317565918, + "learning_rate": 0.001405263157894737, + "loss": 3.9011, + "step": 89 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.6831241250038147, + "learning_rate": 0.0014210526315789472, + "loss": 3.9024, + "step": 90 + }, + { + "epoch": 0.00959915611814346, + "grad_norm": 0.5162598490715027, + "learning_rate": 0.0014368421052631578, + "loss": 3.8836, + "step": 91 + }, + { + "epoch": 0.009704641350210971, + "grad_norm": 0.49562177062034607, + "learning_rate": 0.0014526315789473684, + "loss": 3.867, + "step": 92 + }, + { + "epoch": 0.009810126582278481, + "grad_norm": 0.4377480149269104, + "learning_rate": 0.0014684210526315791, + "loss": 3.8515, + "step": 93 + }, + { + "epoch": 0.009915611814345991, + "grad_norm": 0.5269975662231445, + "learning_rate": 0.0014842105263157895, + "loss": 3.8846, + "step": 94 + }, + { + "epoch": 0.010021097046413503, + "grad_norm": 0.5563451051712036, + "learning_rate": 0.0015, + "loss": 3.8422, + "step": 95 + }, + { + "epoch": 0.010126582278481013, + "grad_norm": 0.7432992458343506, + "learning_rate": 0.00149999995797938, + "loss": 3.8431, + "step": 96 + }, + { + "epoch": 0.010232067510548523, + "grad_norm": 0.8820921778678894, + "learning_rate": 0.001499999831917525, + "loss": 3.829, + "step": 97 + }, + { + "epoch": 0.010337552742616034, + "grad_norm": 0.7089058756828308, + "learning_rate": 0.001499999621814449, + "loss": 3.8073, + "step": 98 + }, + { + "epoch": 0.010443037974683544, + "grad_norm": 0.5810973048210144, + "learning_rate": 0.0014999993276701756, + "loss": 3.8326, + "step": 99 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.6441510319709778, + "learning_rate": 0.0014999989494847376, + "loss": 3.7945, + "step": 100 + }, + { + "epoch": 0.010654008438818566, + "grad_norm": 0.6529964208602905, + "learning_rate": 0.0014999984872581774, + "loss": 3.7981, + "step": 101 + }, + { + "epoch": 0.010759493670886076, + "grad_norm": 0.706443190574646, + "learning_rate": 0.0014999979409905469, + "loss": 3.7532, + "step": 102 + }, + { + "epoch": 0.010864978902953586, + "grad_norm": 0.5683443546295166, + "learning_rate": 0.0014999973106819074, + "loss": 3.7424, + "step": 103 + }, + { + "epoch": 0.010970464135021098, + "grad_norm": 0.6603604555130005, + "learning_rate": 0.0014999965963323294, + "loss": 3.7327, + "step": 104 + }, + { + "epoch": 0.011075949367088608, + "grad_norm": 0.635434627532959, + "learning_rate": 0.0014999957979418927, + "loss": 3.7228, + "step": 105 + }, + { + "epoch": 0.011181434599156118, + "grad_norm": 0.5801493525505066, + "learning_rate": 0.0014999949155106874, + "loss": 3.7186, + "step": 106 + }, + { + "epoch": 0.01128691983122363, + "grad_norm": 0.5243746638298035, + "learning_rate": 0.0014999939490388115, + "loss": 3.7171, + "step": 107 + }, + { + "epoch": 0.01139240506329114, + "grad_norm": 0.5669111013412476, + "learning_rate": 0.0014999928985263743, + "loss": 3.7009, + "step": 108 + }, + { + "epoch": 0.01149789029535865, + "grad_norm": 0.49663156270980835, + "learning_rate": 0.001499991763973493, + "loss": 3.7059, + "step": 109 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.47695526480674744, + "learning_rate": 0.0014999905453802946, + "loss": 3.6673, + "step": 110 + }, + { + "epoch": 0.01170886075949367, + "grad_norm": 0.530243456363678, + "learning_rate": 0.0014999892427469156, + "loss": 3.6743, + "step": 111 + }, + { + "epoch": 0.01181434599156118, + "grad_norm": 0.41716378927230835, + "learning_rate": 0.0014999878560735024, + "loss": 3.6676, + "step": 112 + }, + { + "epoch": 0.011919831223628692, + "grad_norm": 0.46580803394317627, + "learning_rate": 0.0014999863853602101, + "loss": 3.6511, + "step": 113 + }, + { + "epoch": 0.012025316455696202, + "grad_norm": 0.415952205657959, + "learning_rate": 0.0014999848306072037, + "loss": 3.6565, + "step": 114 + }, + { + "epoch": 0.012130801687763712, + "grad_norm": 0.5027316808700562, + "learning_rate": 0.0014999831918146571, + "loss": 3.6445, + "step": 115 + }, + { + "epoch": 0.012236286919831224, + "grad_norm": 0.7859313488006592, + "learning_rate": 0.001499981468982754, + "loss": 3.6408, + "step": 116 + }, + { + "epoch": 0.012341772151898734, + "grad_norm": 1.1927897930145264, + "learning_rate": 0.001499979662111688, + "loss": 3.6797, + "step": 117 + }, + { + "epoch": 0.012447257383966244, + "grad_norm": 0.8125998973846436, + "learning_rate": 0.0014999777712016607, + "loss": 3.6475, + "step": 118 + }, + { + "epoch": 0.012552742616033756, + "grad_norm": 0.8989803791046143, + "learning_rate": 0.0014999757962528846, + "loss": 3.6026, + "step": 119 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.6271620392799377, + "learning_rate": 0.0014999737372655805, + "loss": 3.6184, + "step": 120 + }, + { + "epoch": 0.012763713080168776, + "grad_norm": 0.5694255232810974, + "learning_rate": 0.0014999715942399798, + "loss": 3.6213, + "step": 121 + }, + { + "epoch": 0.012869198312236287, + "grad_norm": 0.6130005717277527, + "learning_rate": 0.001499969367176322, + "loss": 3.6103, + "step": 122 + }, + { + "epoch": 0.012974683544303797, + "grad_norm": 0.5075169801712036, + "learning_rate": 0.0014999670560748573, + "loss": 3.5665, + "step": 123 + }, + { + "epoch": 0.013080168776371307, + "grad_norm": 0.5048167705535889, + "learning_rate": 0.001499964660935844, + "loss": 3.5637, + "step": 124 + }, + { + "epoch": 0.013185654008438819, + "grad_norm": 0.5251450538635254, + "learning_rate": 0.0014999621817595509, + "loss": 3.5839, + "step": 125 + }, + { + "epoch": 0.013291139240506329, + "grad_norm": 0.5572177171707153, + "learning_rate": 0.0014999596185462556, + "loss": 3.5635, + "step": 126 + }, + { + "epoch": 0.01339662447257384, + "grad_norm": 0.38631471991539, + "learning_rate": 0.0014999569712962452, + "loss": 3.572, + "step": 127 + }, + { + "epoch": 0.01350210970464135, + "grad_norm": 0.5025709271430969, + "learning_rate": 0.0014999542400098169, + "loss": 3.5426, + "step": 128 + }, + { + "epoch": 0.01360759493670886, + "grad_norm": 0.4361988604068756, + "learning_rate": 0.0014999514246872762, + "loss": 3.5276, + "step": 129 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.4651295840740204, + "learning_rate": 0.0014999485253289388, + "loss": 3.5545, + "step": 130 + }, + { + "epoch": 0.013818565400843882, + "grad_norm": 0.4274992346763611, + "learning_rate": 0.0014999455419351297, + "loss": 3.4993, + "step": 131 + }, + { + "epoch": 0.013924050632911392, + "grad_norm": 0.4556259512901306, + "learning_rate": 0.001499942474506183, + "loss": 3.5493, + "step": 132 + }, + { + "epoch": 0.014029535864978904, + "grad_norm": 0.4968225955963135, + "learning_rate": 0.0014999393230424422, + "loss": 3.5319, + "step": 133 + }, + { + "epoch": 0.014135021097046414, + "grad_norm": 0.4672042429447174, + "learning_rate": 0.001499936087544261, + "loss": 3.4733, + "step": 134 + }, + { + "epoch": 0.014240506329113924, + "grad_norm": 0.49287617206573486, + "learning_rate": 0.001499932768012002, + "loss": 3.4878, + "step": 135 + }, + { + "epoch": 0.014345991561181435, + "grad_norm": 0.6202232241630554, + "learning_rate": 0.0014999293644460362, + "loss": 3.4858, + "step": 136 + }, + { + "epoch": 0.014451476793248945, + "grad_norm": 0.7870105504989624, + "learning_rate": 0.0014999258768467459, + "loss": 3.5048, + "step": 137 + }, + { + "epoch": 0.014556962025316455, + "grad_norm": 0.7071622014045715, + "learning_rate": 0.0014999223052145215, + "loss": 3.4519, + "step": 138 + }, + { + "epoch": 0.014662447257383967, + "grad_norm": 0.5311273336410522, + "learning_rate": 0.0014999186495497636, + "loss": 3.4435, + "step": 139 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.4408625364303589, + "learning_rate": 0.0014999149098528814, + "loss": 3.4401, + "step": 140 + }, + { + "epoch": 0.014873417721518987, + "grad_norm": 0.5375515818595886, + "learning_rate": 0.0014999110861242944, + "loss": 3.452, + "step": 141 + }, + { + "epoch": 0.014978902953586498, + "grad_norm": 0.5978479981422424, + "learning_rate": 0.0014999071783644306, + "loss": 3.4447, + "step": 142 + }, + { + "epoch": 0.015084388185654008, + "grad_norm": 0.5157124400138855, + "learning_rate": 0.001499903186573728, + "loss": 3.4302, + "step": 143 + }, + { + "epoch": 0.015189873417721518, + "grad_norm": 0.5106412768363953, + "learning_rate": 0.001499899110752634, + "loss": 3.4325, + "step": 144 + }, + { + "epoch": 0.01529535864978903, + "grad_norm": 0.5273604989051819, + "learning_rate": 0.0014998949509016054, + "loss": 3.4467, + "step": 145 + }, + { + "epoch": 0.01540084388185654, + "grad_norm": 0.43443992733955383, + "learning_rate": 0.0014998907070211084, + "loss": 3.3968, + "step": 146 + }, + { + "epoch": 0.01550632911392405, + "grad_norm": 0.4058559238910675, + "learning_rate": 0.0014998863791116182, + "loss": 3.4308, + "step": 147 + }, + { + "epoch": 0.015611814345991562, + "grad_norm": 0.5080884695053101, + "learning_rate": 0.0014998819671736198, + "loss": 3.3736, + "step": 148 + }, + { + "epoch": 0.015717299578059073, + "grad_norm": 0.49516427516937256, + "learning_rate": 0.001499877471207608, + "loss": 3.4006, + "step": 149 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.6115649938583374, + "learning_rate": 0.0014998728912140862, + "loss": 3.3983, + "step": 150 + }, + { + "epoch": 0.015928270042194093, + "grad_norm": 0.5729655623435974, + "learning_rate": 0.0014998682271935677, + "loss": 3.4269, + "step": 151 + }, + { + "epoch": 0.016033755274261603, + "grad_norm": 0.6098612546920776, + "learning_rate": 0.0014998634791465752, + "loss": 3.3588, + "step": 152 + }, + { + "epoch": 0.016139240506329113, + "grad_norm": 0.5211475491523743, + "learning_rate": 0.001499858647073641, + "loss": 3.4106, + "step": 153 + }, + { + "epoch": 0.016244725738396623, + "grad_norm": 0.5128841996192932, + "learning_rate": 0.0014998537309753057, + "loss": 3.3508, + "step": 154 + }, + { + "epoch": 0.016350210970464137, + "grad_norm": 0.4578143060207367, + "learning_rate": 0.001499848730852121, + "loss": 3.3672, + "step": 155 + }, + { + "epoch": 0.016455696202531647, + "grad_norm": 0.6261176466941833, + "learning_rate": 0.001499843646704647, + "loss": 3.3583, + "step": 156 + }, + { + "epoch": 0.016561181434599156, + "grad_norm": 0.5240982174873352, + "learning_rate": 0.0014998384785334532, + "loss": 3.3626, + "step": 157 + }, + { + "epoch": 0.016666666666666666, + "grad_norm": 0.4624999761581421, + "learning_rate": 0.0014998332263391192, + "loss": 3.3369, + "step": 158 + }, + { + "epoch": 0.016772151898734176, + "grad_norm": 0.4756961762905121, + "learning_rate": 0.0014998278901222327, + "loss": 3.349, + "step": 159 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.6912655234336853, + "learning_rate": 0.0014998224698833922, + "loss": 3.3517, + "step": 160 + }, + { + "epoch": 0.0169831223628692, + "grad_norm": 0.8258196115493774, + "learning_rate": 0.0014998169656232053, + "loss": 3.3194, + "step": 161 + }, + { + "epoch": 0.01708860759493671, + "grad_norm": 0.9224663972854614, + "learning_rate": 0.0014998113773422883, + "loss": 3.38, + "step": 162 + }, + { + "epoch": 0.01719409282700422, + "grad_norm": 0.9397643208503723, + "learning_rate": 0.0014998057050412674, + "loss": 3.352, + "step": 163 + }, + { + "epoch": 0.01729957805907173, + "grad_norm": 0.688988447189331, + "learning_rate": 0.0014997999487207786, + "loss": 3.3266, + "step": 164 + }, + { + "epoch": 0.01740506329113924, + "grad_norm": 0.6608019471168518, + "learning_rate": 0.0014997941083814666, + "loss": 3.3407, + "step": 165 + }, + { + "epoch": 0.01751054852320675, + "grad_norm": 0.6549971103668213, + "learning_rate": 0.001499788184023986, + "loss": 3.3312, + "step": 166 + }, + { + "epoch": 0.017616033755274263, + "grad_norm": 0.5772607922554016, + "learning_rate": 0.0014997821756490008, + "loss": 3.2949, + "step": 167 + }, + { + "epoch": 0.017721518987341773, + "grad_norm": 0.5963979363441467, + "learning_rate": 0.0014997760832571839, + "loss": 3.2879, + "step": 168 + }, + { + "epoch": 0.017827004219409283, + "grad_norm": 0.55171138048172, + "learning_rate": 0.001499769906849218, + "loss": 3.2735, + "step": 169 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.4522080421447754, + "learning_rate": 0.0014997636464257956, + "loss": 3.2848, + "step": 170 + }, + { + "epoch": 0.018037974683544303, + "grad_norm": 0.5875396132469177, + "learning_rate": 0.0014997573019876179, + "loss": 3.2689, + "step": 171 + }, + { + "epoch": 0.018143459915611813, + "grad_norm": 0.5888291001319885, + "learning_rate": 0.0014997508735353957, + "loss": 3.3131, + "step": 172 + }, + { + "epoch": 0.018248945147679326, + "grad_norm": 0.5993974804878235, + "learning_rate": 0.0014997443610698497, + "loss": 3.2957, + "step": 173 + }, + { + "epoch": 0.018354430379746836, + "grad_norm": 0.4968287944793701, + "learning_rate": 0.0014997377645917095, + "loss": 3.2546, + "step": 174 + }, + { + "epoch": 0.018459915611814346, + "grad_norm": 0.5042872428894043, + "learning_rate": 0.001499731084101714, + "loss": 3.3065, + "step": 175 + }, + { + "epoch": 0.018565400843881856, + "grad_norm": 0.500234842300415, + "learning_rate": 0.0014997243196006125, + "loss": 3.2767, + "step": 176 + }, + { + "epoch": 0.018670886075949366, + "grad_norm": 0.49470505118370056, + "learning_rate": 0.001499717471089162, + "loss": 3.2796, + "step": 177 + }, + { + "epoch": 0.018776371308016876, + "grad_norm": 0.5040004849433899, + "learning_rate": 0.0014997105385681306, + "loss": 3.269, + "step": 178 + }, + { + "epoch": 0.01888185654008439, + "grad_norm": 0.46751224994659424, + "learning_rate": 0.001499703522038295, + "loss": 3.2803, + "step": 179 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.4917394816875458, + "learning_rate": 0.0014996964215004416, + "loss": 3.2542, + "step": 180 + }, + { + "epoch": 0.01909282700421941, + "grad_norm": 0.4489418864250183, + "learning_rate": 0.0014996892369553655, + "loss": 3.2622, + "step": 181 + }, + { + "epoch": 0.01919831223628692, + "grad_norm": 0.4459364116191864, + "learning_rate": 0.0014996819684038726, + "loss": 3.2446, + "step": 182 + }, + { + "epoch": 0.01930379746835443, + "grad_norm": 0.4650493860244751, + "learning_rate": 0.0014996746158467762, + "loss": 3.2194, + "step": 183 + }, + { + "epoch": 0.019409282700421943, + "grad_norm": 0.44979333877563477, + "learning_rate": 0.0014996671792849015, + "loss": 3.2525, + "step": 184 + }, + { + "epoch": 0.019514767932489453, + "grad_norm": 0.5069284439086914, + "learning_rate": 0.001499659658719081, + "loss": 3.2062, + "step": 185 + }, + { + "epoch": 0.019620253164556962, + "grad_norm": 0.49671170115470886, + "learning_rate": 0.0014996520541501574, + "loss": 3.1897, + "step": 186 + }, + { + "epoch": 0.019725738396624472, + "grad_norm": 0.493662029504776, + "learning_rate": 0.0014996443655789832, + "loss": 3.1787, + "step": 187 + }, + { + "epoch": 0.019831223628691982, + "grad_norm": 0.3872704803943634, + "learning_rate": 0.0014996365930064197, + "loss": 3.1759, + "step": 188 + }, + { + "epoch": 0.019936708860759492, + "grad_norm": 0.46454140543937683, + "learning_rate": 0.001499628736433338, + "loss": 3.1854, + "step": 189 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.46753841638565063, + "learning_rate": 0.0014996207958606182, + "loss": 3.2209, + "step": 190 + }, + { + "epoch": 0.020147679324894516, + "grad_norm": 0.5108124017715454, + "learning_rate": 0.0014996127712891504, + "loss": 3.1718, + "step": 191 + }, + { + "epoch": 0.020253164556962026, + "grad_norm": 0.5921775102615356, + "learning_rate": 0.0014996046627198337, + "loss": 3.2202, + "step": 192 + }, + { + "epoch": 0.020358649789029536, + "grad_norm": 0.6913837790489197, + "learning_rate": 0.0014995964701535768, + "loss": 3.1535, + "step": 193 + }, + { + "epoch": 0.020464135021097046, + "grad_norm": 0.9136589765548706, + "learning_rate": 0.0014995881935912973, + "loss": 3.2077, + "step": 194 + }, + { + "epoch": 0.020569620253164556, + "grad_norm": 0.9961623549461365, + "learning_rate": 0.0014995798330339233, + "loss": 3.2002, + "step": 195 + }, + { + "epoch": 0.02067510548523207, + "grad_norm": 0.7285858392715454, + "learning_rate": 0.001499571388482391, + "loss": 3.1767, + "step": 196 + }, + { + "epoch": 0.02078059071729958, + "grad_norm": 0.6457739472389221, + "learning_rate": 0.001499562859937647, + "loss": 3.1671, + "step": 197 + }, + { + "epoch": 0.02088607594936709, + "grad_norm": 0.7176914811134338, + "learning_rate": 0.001499554247400647, + "loss": 3.1943, + "step": 198 + }, + { + "epoch": 0.0209915611814346, + "grad_norm": 0.5451287031173706, + "learning_rate": 0.0014995455508723557, + "loss": 3.1792, + "step": 199 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.5095003247261047, + "learning_rate": 0.001499536770353748, + "loss": 3.1287, + "step": 200 + }, + { + "epoch": 0.02120253164556962, + "grad_norm": 0.6194679737091064, + "learning_rate": 0.0014995279058458075, + "loss": 3.1752, + "step": 201 + }, + { + "epoch": 0.021308016877637132, + "grad_norm": 0.6032462120056152, + "learning_rate": 0.001499518957349528, + "loss": 3.1613, + "step": 202 + }, + { + "epoch": 0.021413502109704642, + "grad_norm": 0.5000930428504944, + "learning_rate": 0.0014995099248659115, + "loss": 3.1573, + "step": 203 + }, + { + "epoch": 0.021518987341772152, + "grad_norm": 0.516535222530365, + "learning_rate": 0.001499500808395971, + "loss": 3.1408, + "step": 204 + }, + { + "epoch": 0.021624472573839662, + "grad_norm": 0.5578704476356506, + "learning_rate": 0.0014994916079407272, + "loss": 3.1221, + "step": 205 + }, + { + "epoch": 0.021729957805907172, + "grad_norm": 0.5374552607536316, + "learning_rate": 0.0014994823235012114, + "loss": 3.1054, + "step": 206 + }, + { + "epoch": 0.021835443037974682, + "grad_norm": 0.4974895119667053, + "learning_rate": 0.0014994729550784642, + "loss": 3.1363, + "step": 207 + }, + { + "epoch": 0.021940928270042195, + "grad_norm": 0.5993492007255554, + "learning_rate": 0.001499463502673535, + "loss": 3.1114, + "step": 208 + }, + { + "epoch": 0.022046413502109705, + "grad_norm": 0.5420301556587219, + "learning_rate": 0.0014994539662874832, + "loss": 3.1171, + "step": 209 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.5560747385025024, + "learning_rate": 0.0014994443459213774, + "loss": 3.1595, + "step": 210 + }, + { + "epoch": 0.022257383966244725, + "grad_norm": 0.5625800490379333, + "learning_rate": 0.0014994346415762956, + "loss": 3.1157, + "step": 211 + }, + { + "epoch": 0.022362869198312235, + "grad_norm": 0.6091681122779846, + "learning_rate": 0.0014994248532533253, + "loss": 3.1152, + "step": 212 + }, + { + "epoch": 0.022468354430379745, + "grad_norm": 0.4946531355381012, + "learning_rate": 0.001499414980953563, + "loss": 3.1356, + "step": 213 + }, + { + "epoch": 0.02257383966244726, + "grad_norm": 0.6350942850112915, + "learning_rate": 0.0014994050246781153, + "loss": 3.1176, + "step": 214 + }, + { + "epoch": 0.02267932489451477, + "grad_norm": 0.6282134056091309, + "learning_rate": 0.0014993949844280977, + "loss": 3.0787, + "step": 215 + }, + { + "epoch": 0.02278481012658228, + "grad_norm": 0.5356674790382385, + "learning_rate": 0.0014993848602046355, + "loss": 3.1027, + "step": 216 + }, + { + "epoch": 0.02289029535864979, + "grad_norm": 0.5169081091880798, + "learning_rate": 0.0014993746520088626, + "loss": 3.0975, + "step": 217 + }, + { + "epoch": 0.0229957805907173, + "grad_norm": 0.5350847244262695, + "learning_rate": 0.0014993643598419234, + "loss": 3.112, + "step": 218 + }, + { + "epoch": 0.023101265822784812, + "grad_norm": 0.5631005764007568, + "learning_rate": 0.0014993539837049707, + "loss": 3.1067, + "step": 219 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.5494160652160645, + "learning_rate": 0.001499343523599168, + "loss": 3.0677, + "step": 220 + }, + { + "epoch": 0.02331223628691983, + "grad_norm": 0.5427203178405762, + "learning_rate": 0.0014993329795256864, + "loss": 3.0766, + "step": 221 + }, + { + "epoch": 0.02341772151898734, + "grad_norm": 0.6221828460693359, + "learning_rate": 0.0014993223514857081, + "loss": 3.0556, + "step": 222 + }, + { + "epoch": 0.02352320675105485, + "grad_norm": 0.6566352844238281, + "learning_rate": 0.001499311639480424, + "loss": 3.0647, + "step": 223 + }, + { + "epoch": 0.02362869198312236, + "grad_norm": 0.675548255443573, + "learning_rate": 0.0014993008435110345, + "loss": 3.0591, + "step": 224 + }, + { + "epoch": 0.023734177215189875, + "grad_norm": 0.5628158450126648, + "learning_rate": 0.0014992899635787487, + "loss": 3.0398, + "step": 225 + }, + { + "epoch": 0.023839662447257385, + "grad_norm": 0.5439023375511169, + "learning_rate": 0.0014992789996847863, + "loss": 3.0916, + "step": 226 + }, + { + "epoch": 0.023945147679324895, + "grad_norm": 0.4854242503643036, + "learning_rate": 0.0014992679518303761, + "loss": 3.0405, + "step": 227 + }, + { + "epoch": 0.024050632911392405, + "grad_norm": 0.4919106364250183, + "learning_rate": 0.001499256820016755, + "loss": 3.0419, + "step": 228 + }, + { + "epoch": 0.024156118143459915, + "grad_norm": 0.4817259609699249, + "learning_rate": 0.0014992456042451717, + "loss": 3.0297, + "step": 229 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.5855054259300232, + "learning_rate": 0.0014992343045168823, + "loss": 3.0554, + "step": 230 + }, + { + "epoch": 0.024367088607594938, + "grad_norm": 0.7613196969032288, + "learning_rate": 0.0014992229208331527, + "loss": 3.0448, + "step": 231 + }, + { + "epoch": 0.024472573839662448, + "grad_norm": 0.8803821802139282, + "learning_rate": 0.0014992114531952592, + "loss": 3.0657, + "step": 232 + }, + { + "epoch": 0.024578059071729958, + "grad_norm": 0.8196407556533813, + "learning_rate": 0.0014991999016044865, + "loss": 3.0135, + "step": 233 + }, + { + "epoch": 0.024683544303797468, + "grad_norm": 0.6888301968574524, + "learning_rate": 0.0014991882660621285, + "loss": 3.0611, + "step": 234 + }, + { + "epoch": 0.024789029535864978, + "grad_norm": 0.6065409779548645, + "learning_rate": 0.0014991765465694898, + "loss": 2.9913, + "step": 235 + }, + { + "epoch": 0.024894514767932488, + "grad_norm": 0.6487226486206055, + "learning_rate": 0.0014991647431278835, + "loss": 3.0266, + "step": 236 + }, + { + "epoch": 0.025, + "grad_norm": 0.6426769495010376, + "learning_rate": 0.001499152855738632, + "loss": 3.0049, + "step": 237 + }, + { + "epoch": 0.02510548523206751, + "grad_norm": 0.6510130167007446, + "learning_rate": 0.0014991408844030672, + "loss": 2.9991, + "step": 238 + }, + { + "epoch": 0.02521097046413502, + "grad_norm": 0.6036146283149719, + "learning_rate": 0.0014991288291225308, + "loss": 3.0235, + "step": 239 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.67680424451828, + "learning_rate": 0.0014991166898983739, + "loss": 3.0396, + "step": 240 + }, + { + "epoch": 0.02542194092827004, + "grad_norm": 0.6059534549713135, + "learning_rate": 0.001499104466731956, + "loss": 2.9829, + "step": 241 + }, + { + "epoch": 0.02552742616033755, + "grad_norm": 0.6804969310760498, + "learning_rate": 0.0014990921596246475, + "loss": 3.0166, + "step": 242 + }, + { + "epoch": 0.025632911392405065, + "grad_norm": 0.5541417002677917, + "learning_rate": 0.0014990797685778272, + "loss": 3.0213, + "step": 243 + }, + { + "epoch": 0.025738396624472575, + "grad_norm": 0.5253164768218994, + "learning_rate": 0.0014990672935928835, + "loss": 3.0064, + "step": 244 + }, + { + "epoch": 0.025843881856540084, + "grad_norm": 0.5536686182022095, + "learning_rate": 0.0014990547346712144, + "loss": 2.9909, + "step": 245 + }, + { + "epoch": 0.025949367088607594, + "grad_norm": 0.5037351250648499, + "learning_rate": 0.0014990420918142271, + "loss": 2.9894, + "step": 246 + }, + { + "epoch": 0.026054852320675104, + "grad_norm": 0.5075766444206238, + "learning_rate": 0.0014990293650233384, + "loss": 2.9546, + "step": 247 + }, + { + "epoch": 0.026160337552742614, + "grad_norm": 0.5581462383270264, + "learning_rate": 0.0014990165542999746, + "loss": 3.0096, + "step": 248 + }, + { + "epoch": 0.026265822784810128, + "grad_norm": 0.615016758441925, + "learning_rate": 0.0014990036596455706, + "loss": 2.9681, + "step": 249 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.5676863193511963, + "learning_rate": 0.001498990681061572, + "loss": 2.9501, + "step": 250 + }, + { + "epoch": 0.026476793248945148, + "grad_norm": 0.5635063648223877, + "learning_rate": 0.0014989776185494322, + "loss": 2.986, + "step": 251 + }, + { + "epoch": 0.026582278481012658, + "grad_norm": 0.4854801893234253, + "learning_rate": 0.001498964472110616, + "loss": 2.9574, + "step": 252 + }, + { + "epoch": 0.026687763713080168, + "grad_norm": 0.5047576427459717, + "learning_rate": 0.001498951241746596, + "loss": 2.9599, + "step": 253 + }, + { + "epoch": 0.02679324894514768, + "grad_norm": 0.5116012692451477, + "learning_rate": 0.0014989379274588546, + "loss": 2.9389, + "step": 254 + }, + { + "epoch": 0.02689873417721519, + "grad_norm": 0.48904216289520264, + "learning_rate": 0.0014989245292488839, + "loss": 2.94, + "step": 255 + }, + { + "epoch": 0.0270042194092827, + "grad_norm": 0.4978305995464325, + "learning_rate": 0.0014989110471181853, + "loss": 2.9125, + "step": 256 + }, + { + "epoch": 0.02710970464135021, + "grad_norm": 0.4638349115848541, + "learning_rate": 0.0014988974810682695, + "loss": 2.9507, + "step": 257 + }, + { + "epoch": 0.02721518987341772, + "grad_norm": 0.5305898785591125, + "learning_rate": 0.0014988838311006565, + "loss": 2.9659, + "step": 258 + }, + { + "epoch": 0.02732067510548523, + "grad_norm": 0.5045801997184753, + "learning_rate": 0.0014988700972168758, + "loss": 2.9632, + "step": 259 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.4949408173561096, + "learning_rate": 0.001498856279418467, + "loss": 2.9059, + "step": 260 + }, + { + "epoch": 0.027531645569620254, + "grad_norm": 0.5531179308891296, + "learning_rate": 0.0014988423777069775, + "loss": 2.9428, + "step": 261 + }, + { + "epoch": 0.027637130801687764, + "grad_norm": 0.6446875929832458, + "learning_rate": 0.0014988283920839658, + "loss": 2.9266, + "step": 262 + }, + { + "epoch": 0.027742616033755274, + "grad_norm": 0.7089143991470337, + "learning_rate": 0.0014988143225509983, + "loss": 2.9276, + "step": 263 + }, + { + "epoch": 0.027848101265822784, + "grad_norm": 0.7069312334060669, + "learning_rate": 0.0014988001691096525, + "loss": 2.944, + "step": 264 + }, + { + "epoch": 0.027953586497890294, + "grad_norm": 0.681413471698761, + "learning_rate": 0.0014987859317615137, + "loss": 2.929, + "step": 265 + }, + { + "epoch": 0.028059071729957807, + "grad_norm": 0.6448777318000793, + "learning_rate": 0.0014987716105081775, + "loss": 2.9237, + "step": 266 + }, + { + "epoch": 0.028164556962025317, + "grad_norm": 0.5562541484832764, + "learning_rate": 0.001498757205351249, + "loss": 2.9096, + "step": 267 + }, + { + "epoch": 0.028270042194092827, + "grad_norm": 0.6443012356758118, + "learning_rate": 0.0014987427162923416, + "loss": 2.8871, + "step": 268 + }, + { + "epoch": 0.028375527426160337, + "grad_norm": 0.7276369333267212, + "learning_rate": 0.001498728143333079, + "loss": 2.9269, + "step": 269 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.7183976173400879, + "learning_rate": 0.0014987134864750948, + "loss": 2.9242, + "step": 270 + }, + { + "epoch": 0.028586497890295357, + "grad_norm": 0.6751725673675537, + "learning_rate": 0.0014986987457200312, + "loss": 2.9244, + "step": 271 + }, + { + "epoch": 0.02869198312236287, + "grad_norm": 0.5437167882919312, + "learning_rate": 0.0014986839210695394, + "loss": 2.9014, + "step": 272 + }, + { + "epoch": 0.02879746835443038, + "grad_norm": 0.6835388541221619, + "learning_rate": 0.0014986690125252814, + "loss": 2.8924, + "step": 273 + }, + { + "epoch": 0.02890295358649789, + "grad_norm": 0.8452227115631104, + "learning_rate": 0.001498654020088927, + "loss": 2.9107, + "step": 274 + }, + { + "epoch": 0.0290084388185654, + "grad_norm": 0.7064874172210693, + "learning_rate": 0.0014986389437621566, + "loss": 2.9218, + "step": 275 + }, + { + "epoch": 0.02911392405063291, + "grad_norm": 0.6370548605918884, + "learning_rate": 0.0014986237835466596, + "loss": 2.8713, + "step": 276 + }, + { + "epoch": 0.02921940928270042, + "grad_norm": 0.8105698823928833, + "learning_rate": 0.0014986085394441343, + "loss": 2.9102, + "step": 277 + }, + { + "epoch": 0.029324894514767934, + "grad_norm": 0.6229732036590576, + "learning_rate": 0.0014985932114562896, + "loss": 2.836, + "step": 278 + }, + { + "epoch": 0.029430379746835444, + "grad_norm": 0.7308720350265503, + "learning_rate": 0.0014985777995848428, + "loss": 2.9017, + "step": 279 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.7451936602592468, + "learning_rate": 0.0014985623038315206, + "loss": 2.8699, + "step": 280 + }, + { + "epoch": 0.029641350210970464, + "grad_norm": 0.6558377742767334, + "learning_rate": 0.0014985467241980597, + "loss": 2.8534, + "step": 281 + }, + { + "epoch": 0.029746835443037974, + "grad_norm": 0.7517158389091492, + "learning_rate": 0.0014985310606862058, + "loss": 2.9169, + "step": 282 + }, + { + "epoch": 0.029852320675105484, + "grad_norm": 0.6237793564796448, + "learning_rate": 0.0014985153132977141, + "loss": 2.8489, + "step": 283 + }, + { + "epoch": 0.029957805907172997, + "grad_norm": 0.6977205872535706, + "learning_rate": 0.0014984994820343488, + "loss": 2.8593, + "step": 284 + }, + { + "epoch": 0.030063291139240507, + "grad_norm": 0.6135554313659668, + "learning_rate": 0.0014984835668978844, + "loss": 2.9018, + "step": 285 + }, + { + "epoch": 0.030168776371308017, + "grad_norm": 0.5886379480361938, + "learning_rate": 0.0014984675678901042, + "loss": 2.8786, + "step": 286 + }, + { + "epoch": 0.030274261603375527, + "grad_norm": 0.567982017993927, + "learning_rate": 0.0014984514850128006, + "loss": 2.8777, + "step": 287 + }, + { + "epoch": 0.030379746835443037, + "grad_norm": 0.5712073445320129, + "learning_rate": 0.0014984353182677759, + "loss": 2.8645, + "step": 288 + }, + { + "epoch": 0.03048523206751055, + "grad_norm": 0.5701545476913452, + "learning_rate": 0.001498419067656842, + "loss": 2.8621, + "step": 289 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.6395534873008728, + "learning_rate": 0.0014984027331818193, + "loss": 2.8517, + "step": 290 + }, + { + "epoch": 0.03069620253164557, + "grad_norm": 0.6830883622169495, + "learning_rate": 0.0014983863148445389, + "loss": 2.858, + "step": 291 + }, + { + "epoch": 0.03080168776371308, + "grad_norm": 0.7150924801826477, + "learning_rate": 0.0014983698126468398, + "loss": 2.879, + "step": 292 + }, + { + "epoch": 0.03090717299578059, + "grad_norm": 0.5498788356781006, + "learning_rate": 0.0014983532265905716, + "loss": 2.8572, + "step": 293 + }, + { + "epoch": 0.0310126582278481, + "grad_norm": 0.5126329660415649, + "learning_rate": 0.0014983365566775928, + "loss": 2.8108, + "step": 294 + }, + { + "epoch": 0.031118143459915613, + "grad_norm": 0.6240675449371338, + "learning_rate": 0.0014983198029097711, + "loss": 2.822, + "step": 295 + }, + { + "epoch": 0.031223628691983123, + "grad_norm": 0.5715270638465881, + "learning_rate": 0.0014983029652889843, + "loss": 2.8561, + "step": 296 + }, + { + "epoch": 0.03132911392405063, + "grad_norm": 0.491413950920105, + "learning_rate": 0.0014982860438171187, + "loss": 2.8669, + "step": 297 + }, + { + "epoch": 0.03143459915611815, + "grad_norm": 0.5250682830810547, + "learning_rate": 0.0014982690384960705, + "loss": 2.8461, + "step": 298 + }, + { + "epoch": 0.03154008438818565, + "grad_norm": 0.5915478467941284, + "learning_rate": 0.0014982519493277455, + "loss": 2.8031, + "step": 299 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.6295618414878845, + "learning_rate": 0.0014982347763140584, + "loss": 2.8252, + "step": 300 + }, + { + "epoch": 0.03175105485232067, + "grad_norm": 0.5571625232696533, + "learning_rate": 0.0014982175194569337, + "loss": 2.7964, + "step": 301 + }, + { + "epoch": 0.03185654008438819, + "grad_norm": 0.497061550617218, + "learning_rate": 0.0014982001787583047, + "loss": 2.7954, + "step": 302 + }, + { + "epoch": 0.03196202531645569, + "grad_norm": 0.5186395049095154, + "learning_rate": 0.001498182754220115, + "loss": 2.8064, + "step": 303 + }, + { + "epoch": 0.032067510548523206, + "grad_norm": 0.5397214889526367, + "learning_rate": 0.001498165245844317, + "loss": 2.8086, + "step": 304 + }, + { + "epoch": 0.03217299578059072, + "grad_norm": 0.5453457236289978, + "learning_rate": 0.0014981476536328722, + "loss": 2.824, + "step": 305 + }, + { + "epoch": 0.032278481012658226, + "grad_norm": 0.5236411690711975, + "learning_rate": 0.0014981299775877525, + "loss": 2.8023, + "step": 306 + }, + { + "epoch": 0.03238396624472574, + "grad_norm": 0.5697463154792786, + "learning_rate": 0.0014981122177109383, + "loss": 2.82, + "step": 307 + }, + { + "epoch": 0.032489451476793246, + "grad_norm": 0.5449116826057434, + "learning_rate": 0.0014980943740044196, + "loss": 2.7966, + "step": 308 + }, + { + "epoch": 0.03259493670886076, + "grad_norm": 0.605577290058136, + "learning_rate": 0.0014980764464701958, + "loss": 2.8185, + "step": 309 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.5855903029441833, + "learning_rate": 0.0014980584351102762, + "loss": 2.8057, + "step": 310 + }, + { + "epoch": 0.03280590717299578, + "grad_norm": 0.5976727604866028, + "learning_rate": 0.0014980403399266786, + "loss": 2.7869, + "step": 311 + }, + { + "epoch": 0.03291139240506329, + "grad_norm": 0.5781182050704956, + "learning_rate": 0.0014980221609214308, + "loss": 2.7926, + "step": 312 + }, + { + "epoch": 0.0330168776371308, + "grad_norm": 0.6259441375732422, + "learning_rate": 0.0014980038980965701, + "loss": 2.7754, + "step": 313 + }, + { + "epoch": 0.03312236286919831, + "grad_norm": 0.6710238456726074, + "learning_rate": 0.0014979855514541424, + "loss": 2.7807, + "step": 314 + }, + { + "epoch": 0.03322784810126582, + "grad_norm": 0.7254925966262817, + "learning_rate": 0.0014979671209962044, + "loss": 2.7919, + "step": 315 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 0.6476753354072571, + "learning_rate": 0.0014979486067248204, + "loss": 2.7608, + "step": 316 + }, + { + "epoch": 0.033438818565400846, + "grad_norm": 0.5913572907447815, + "learning_rate": 0.0014979300086420655, + "loss": 2.7875, + "step": 317 + }, + { + "epoch": 0.03354430379746835, + "grad_norm": 0.5931761264801025, + "learning_rate": 0.0014979113267500235, + "loss": 2.7813, + "step": 318 + }, + { + "epoch": 0.033649789029535866, + "grad_norm": 0.5447616577148438, + "learning_rate": 0.0014978925610507879, + "loss": 2.7714, + "step": 319 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.5250318646430969, + "learning_rate": 0.001497873711546462, + "loss": 2.7863, + "step": 320 + }, + { + "epoch": 0.033860759493670886, + "grad_norm": 0.5072793960571289, + "learning_rate": 0.001497854778239157, + "loss": 2.7801, + "step": 321 + }, + { + "epoch": 0.0339662447257384, + "grad_norm": 0.5617639422416687, + "learning_rate": 0.0014978357611309951, + "loss": 2.7766, + "step": 322 + }, + { + "epoch": 0.034071729957805906, + "grad_norm": 0.6595895290374756, + "learning_rate": 0.0014978166602241068, + "loss": 2.798, + "step": 323 + }, + { + "epoch": 0.03417721518987342, + "grad_norm": 0.7061384320259094, + "learning_rate": 0.0014977974755206334, + "loss": 2.812, + "step": 324 + }, + { + "epoch": 0.034282700421940926, + "grad_norm": 0.7278647422790527, + "learning_rate": 0.0014977782070227236, + "loss": 2.7544, + "step": 325 + }, + { + "epoch": 0.03438818565400844, + "grad_norm": 0.8122200965881348, + "learning_rate": 0.001497758854732537, + "loss": 2.7733, + "step": 326 + }, + { + "epoch": 0.03449367088607595, + "grad_norm": 0.9628205895423889, + "learning_rate": 0.001497739418652242, + "loss": 2.7991, + "step": 327 + }, + { + "epoch": 0.03459915611814346, + "grad_norm": 0.7561812996864319, + "learning_rate": 0.0014977198987840168, + "loss": 2.755, + "step": 328 + }, + { + "epoch": 0.03470464135021097, + "grad_norm": 0.6387292146682739, + "learning_rate": 0.0014977002951300483, + "loss": 2.7357, + "step": 329 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.7672760486602783, + "learning_rate": 0.0014976806076925334, + "loss": 2.7389, + "step": 330 + }, + { + "epoch": 0.03491561181434599, + "grad_norm": 0.6006882786750793, + "learning_rate": 0.0014976608364736781, + "loss": 2.7232, + "step": 331 + }, + { + "epoch": 0.0350210970464135, + "grad_norm": 0.661781370639801, + "learning_rate": 0.001497640981475698, + "loss": 2.7485, + "step": 332 + }, + { + "epoch": 0.03512658227848101, + "grad_norm": 0.5255479216575623, + "learning_rate": 0.0014976210427008177, + "loss": 2.7709, + "step": 333 + }, + { + "epoch": 0.035232067510548526, + "grad_norm": 0.6334099769592285, + "learning_rate": 0.0014976010201512718, + "loss": 2.7496, + "step": 334 + }, + { + "epoch": 0.03533755274261603, + "grad_norm": 0.6551412343978882, + "learning_rate": 0.0014975809138293036, + "loss": 2.7342, + "step": 335 + }, + { + "epoch": 0.035443037974683546, + "grad_norm": 0.6767290234565735, + "learning_rate": 0.0014975607237371663, + "loss": 2.7472, + "step": 336 + }, + { + "epoch": 0.03554852320675105, + "grad_norm": 0.602688193321228, + "learning_rate": 0.0014975404498771222, + "loss": 2.7363, + "step": 337 + }, + { + "epoch": 0.035654008438818566, + "grad_norm": 0.5812928080558777, + "learning_rate": 0.0014975200922514428, + "loss": 2.7622, + "step": 338 + }, + { + "epoch": 0.03575949367088608, + "grad_norm": 0.5992185473442078, + "learning_rate": 0.00149749965086241, + "loss": 2.7626, + "step": 339 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.5592398047447205, + "learning_rate": 0.0014974791257123137, + "loss": 2.7334, + "step": 340 + }, + { + "epoch": 0.0359704641350211, + "grad_norm": 0.5664753317832947, + "learning_rate": 0.0014974585168034543, + "loss": 2.7101, + "step": 341 + }, + { + "epoch": 0.036075949367088606, + "grad_norm": 0.5384204387664795, + "learning_rate": 0.0014974378241381409, + "loss": 2.6814, + "step": 342 + }, + { + "epoch": 0.03618143459915612, + "grad_norm": 0.5547763109207153, + "learning_rate": 0.001497417047718692, + "loss": 2.7239, + "step": 343 + }, + { + "epoch": 0.036286919831223625, + "grad_norm": 0.49282106757164, + "learning_rate": 0.0014973961875474364, + "loss": 2.704, + "step": 344 + }, + { + "epoch": 0.03639240506329114, + "grad_norm": 0.5338295698165894, + "learning_rate": 0.0014973752436267106, + "loss": 2.7173, + "step": 345 + }, + { + "epoch": 0.03649789029535865, + "grad_norm": 0.5309223532676697, + "learning_rate": 0.0014973542159588623, + "loss": 2.6902, + "step": 346 + }, + { + "epoch": 0.03660337552742616, + "grad_norm": 0.5313604474067688, + "learning_rate": 0.0014973331045462475, + "loss": 2.7184, + "step": 347 + }, + { + "epoch": 0.03670886075949367, + "grad_norm": 0.5351517796516418, + "learning_rate": 0.0014973119093912317, + "loss": 2.7007, + "step": 348 + }, + { + "epoch": 0.03681434599156118, + "grad_norm": 0.5444519519805908, + "learning_rate": 0.00149729063049619, + "loss": 2.6678, + "step": 349 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.6231700778007507, + "learning_rate": 0.001497269267863507, + "loss": 2.6805, + "step": 350 + }, + { + "epoch": 0.037025316455696206, + "grad_norm": 0.6954690217971802, + "learning_rate": 0.0014972478214955762, + "loss": 2.6772, + "step": 351 + }, + { + "epoch": 0.03713080168776371, + "grad_norm": 0.7138806581497192, + "learning_rate": 0.0014972262913948008, + "loss": 2.6949, + "step": 352 + }, + { + "epoch": 0.037236286919831225, + "grad_norm": 0.982103705406189, + "learning_rate": 0.0014972046775635934, + "loss": 2.7315, + "step": 353 + }, + { + "epoch": 0.03734177215189873, + "grad_norm": 1.161709189414978, + "learning_rate": 0.0014971829800043762, + "loss": 2.6712, + "step": 354 + }, + { + "epoch": 0.037447257383966245, + "grad_norm": 0.7368850708007812, + "learning_rate": 0.0014971611987195802, + "loss": 2.6807, + "step": 355 + }, + { + "epoch": 0.03755274261603375, + "grad_norm": 0.6095811724662781, + "learning_rate": 0.0014971393337116462, + "loss": 2.7048, + "step": 356 + }, + { + "epoch": 0.037658227848101265, + "grad_norm": 0.8275843262672424, + "learning_rate": 0.0014971173849830243, + "loss": 2.6606, + "step": 357 + }, + { + "epoch": 0.03776371308016878, + "grad_norm": 0.7823063135147095, + "learning_rate": 0.0014970953525361738, + "loss": 2.6824, + "step": 358 + }, + { + "epoch": 0.037869198312236285, + "grad_norm": 0.6211954355239868, + "learning_rate": 0.001497073236373564, + "loss": 2.6538, + "step": 359 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.6586117744445801, + "learning_rate": 0.0014970510364976724, + "loss": 2.6978, + "step": 360 + }, + { + "epoch": 0.038080168776371305, + "grad_norm": 0.6137648820877075, + "learning_rate": 0.0014970287529109873, + "loss": 2.6689, + "step": 361 + }, + { + "epoch": 0.03818565400843882, + "grad_norm": 0.546123206615448, + "learning_rate": 0.0014970063856160054, + "loss": 2.6986, + "step": 362 + }, + { + "epoch": 0.03829113924050633, + "grad_norm": 0.6417360305786133, + "learning_rate": 0.0014969839346152332, + "loss": 2.6459, + "step": 363 + }, + { + "epoch": 0.03839662447257384, + "grad_norm": 0.5386713147163391, + "learning_rate": 0.001496961399911186, + "loss": 2.6499, + "step": 364 + }, + { + "epoch": 0.03850210970464135, + "grad_norm": 0.6745059490203857, + "learning_rate": 0.0014969387815063897, + "loss": 2.6879, + "step": 365 + }, + { + "epoch": 0.03860759493670886, + "grad_norm": 0.6690549850463867, + "learning_rate": 0.0014969160794033778, + "loss": 2.6568, + "step": 366 + }, + { + "epoch": 0.03871308016877637, + "grad_norm": 0.6086397171020508, + "learning_rate": 0.0014968932936046953, + "loss": 2.6644, + "step": 367 + }, + { + "epoch": 0.038818565400843885, + "grad_norm": 0.7090429067611694, + "learning_rate": 0.0014968704241128947, + "loss": 2.6871, + "step": 368 + }, + { + "epoch": 0.03892405063291139, + "grad_norm": 0.7418815493583679, + "learning_rate": 0.0014968474709305384, + "loss": 2.6777, + "step": 369 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.660418689250946, + "learning_rate": 0.0014968244340601996, + "loss": 2.637, + "step": 370 + }, + { + "epoch": 0.03913502109704641, + "grad_norm": 0.7583065032958984, + "learning_rate": 0.0014968013135044586, + "loss": 2.6519, + "step": 371 + }, + { + "epoch": 0.039240506329113925, + "grad_norm": 0.710946261882782, + "learning_rate": 0.0014967781092659065, + "loss": 2.6291, + "step": 372 + }, + { + "epoch": 0.03934599156118143, + "grad_norm": 0.6024428009986877, + "learning_rate": 0.0014967548213471436, + "loss": 2.6701, + "step": 373 + }, + { + "epoch": 0.039451476793248945, + "grad_norm": 0.5378702878952026, + "learning_rate": 0.0014967314497507792, + "loss": 2.6912, + "step": 374 + }, + { + "epoch": 0.03955696202531646, + "grad_norm": 0.5971998572349548, + "learning_rate": 0.0014967079944794323, + "loss": 2.6337, + "step": 375 + }, + { + "epoch": 0.039662447257383965, + "grad_norm": 0.6163696050643921, + "learning_rate": 0.0014966844555357314, + "loss": 2.65, + "step": 376 + }, + { + "epoch": 0.03976793248945148, + "grad_norm": 0.5582315325737, + "learning_rate": 0.0014966608329223137, + "loss": 2.6482, + "step": 377 + }, + { + "epoch": 0.039873417721518985, + "grad_norm": 0.8301354050636292, + "learning_rate": 0.0014966371266418267, + "loss": 2.643, + "step": 378 + }, + { + "epoch": 0.0399789029535865, + "grad_norm": 0.6687024831771851, + "learning_rate": 0.0014966133366969264, + "loss": 2.65, + "step": 379 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.5525863766670227, + "learning_rate": 0.001496589463090279, + "loss": 2.6372, + "step": 380 + }, + { + "epoch": 0.04018987341772152, + "grad_norm": 0.6594188213348389, + "learning_rate": 0.0014965655058245592, + "loss": 2.6428, + "step": 381 + }, + { + "epoch": 0.04029535864978903, + "grad_norm": 0.6421478390693665, + "learning_rate": 0.001496541464902452, + "loss": 2.6536, + "step": 382 + }, + { + "epoch": 0.04040084388185654, + "grad_norm": 0.5362251400947571, + "learning_rate": 0.001496517340326651, + "loss": 2.6316, + "step": 383 + }, + { + "epoch": 0.04050632911392405, + "grad_norm": 0.5164105892181396, + "learning_rate": 0.0014964931320998593, + "loss": 2.621, + "step": 384 + }, + { + "epoch": 0.04061181434599156, + "grad_norm": 0.6131341457366943, + "learning_rate": 0.00149646884022479, + "loss": 2.6136, + "step": 385 + }, + { + "epoch": 0.04071729957805907, + "grad_norm": 0.558914065361023, + "learning_rate": 0.0014964444647041647, + "loss": 2.5967, + "step": 386 + }, + { + "epoch": 0.040822784810126585, + "grad_norm": 0.5958954095840454, + "learning_rate": 0.0014964200055407153, + "loss": 2.6432, + "step": 387 + }, + { + "epoch": 0.04092827004219409, + "grad_norm": 0.567278265953064, + "learning_rate": 0.0014963954627371823, + "loss": 2.6093, + "step": 388 + }, + { + "epoch": 0.041033755274261605, + "grad_norm": 0.5110992193222046, + "learning_rate": 0.0014963708362963157, + "loss": 2.6042, + "step": 389 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.5573233366012573, + "learning_rate": 0.001496346126220875, + "loss": 2.6054, + "step": 390 + }, + { + "epoch": 0.041244725738396625, + "grad_norm": 0.6951462626457214, + "learning_rate": 0.0014963213325136296, + "loss": 2.6156, + "step": 391 + }, + { + "epoch": 0.04135021097046414, + "grad_norm": 0.7410439848899841, + "learning_rate": 0.0014962964551773572, + "loss": 2.6035, + "step": 392 + }, + { + "epoch": 0.041455696202531644, + "grad_norm": 0.7171339988708496, + "learning_rate": 0.0014962714942148457, + "loss": 2.593, + "step": 393 + }, + { + "epoch": 0.04156118143459916, + "grad_norm": 0.6908668279647827, + "learning_rate": 0.001496246449628892, + "loss": 2.5857, + "step": 394 + }, + { + "epoch": 0.041666666666666664, + "grad_norm": 0.6328468918800354, + "learning_rate": 0.0014962213214223025, + "loss": 2.5841, + "step": 395 + }, + { + "epoch": 0.04177215189873418, + "grad_norm": 0.7863591313362122, + "learning_rate": 0.001496196109597893, + "loss": 2.6019, + "step": 396 + }, + { + "epoch": 0.04187763713080169, + "grad_norm": 0.88315749168396, + "learning_rate": 0.0014961708141584885, + "loss": 2.59, + "step": 397 + }, + { + "epoch": 0.0419831223628692, + "grad_norm": 0.8415640592575073, + "learning_rate": 0.0014961454351069233, + "loss": 2.602, + "step": 398 + }, + { + "epoch": 0.04208860759493671, + "grad_norm": 0.6449047327041626, + "learning_rate": 0.0014961199724460418, + "loss": 2.6109, + "step": 399 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.6251477003097534, + "learning_rate": 0.0014960944261786966, + "loss": 2.6025, + "step": 400 + }, + { + "epoch": 0.04229957805907173, + "grad_norm": 0.9256726503372192, + "learning_rate": 0.001496068796307751, + "loss": 2.5978, + "step": 401 + }, + { + "epoch": 0.04240506329113924, + "grad_norm": 0.9320252537727356, + "learning_rate": 0.0014960430828360762, + "loss": 2.5763, + "step": 402 + }, + { + "epoch": 0.04251054852320675, + "grad_norm": 0.7947292327880859, + "learning_rate": 0.001496017285766554, + "loss": 2.617, + "step": 403 + }, + { + "epoch": 0.042616033755274264, + "grad_norm": 0.5648205876350403, + "learning_rate": 0.0014959914051020748, + "loss": 2.5999, + "step": 404 + }, + { + "epoch": 0.04272151898734177, + "grad_norm": 0.875810980796814, + "learning_rate": 0.001495965440845539, + "loss": 2.5704, + "step": 405 + }, + { + "epoch": 0.042827004219409284, + "grad_norm": 1.086775541305542, + "learning_rate": 0.0014959393929998557, + "loss": 2.6189, + "step": 406 + }, + { + "epoch": 0.04293248945147679, + "grad_norm": 0.625517725944519, + "learning_rate": 0.001495913261567944, + "loss": 2.6246, + "step": 407 + }, + { + "epoch": 0.043037974683544304, + "grad_norm": 0.6630136966705322, + "learning_rate": 0.0014958870465527317, + "loss": 2.5819, + "step": 408 + }, + { + "epoch": 0.04314345991561182, + "grad_norm": 0.9430121779441833, + "learning_rate": 0.0014958607479571564, + "loss": 2.5831, + "step": 409 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8050456047058105, + "learning_rate": 0.0014958343657841655, + "loss": 2.5766, + "step": 410 + }, + { + "epoch": 0.04335443037974684, + "grad_norm": 0.5382872223854065, + "learning_rate": 0.0014958079000367147, + "loss": 2.5568, + "step": 411 + }, + { + "epoch": 0.043459915611814344, + "grad_norm": 0.6546604037284851, + "learning_rate": 0.0014957813507177696, + "loss": 2.5663, + "step": 412 + }, + { + "epoch": 0.04356540084388186, + "grad_norm": 0.5657925605773926, + "learning_rate": 0.0014957547178303054, + "loss": 2.5654, + "step": 413 + }, + { + "epoch": 0.043670886075949364, + "grad_norm": 0.5014580488204956, + "learning_rate": 0.0014957280013773065, + "loss": 2.5363, + "step": 414 + }, + { + "epoch": 0.04377637130801688, + "grad_norm": 0.5618875026702881, + "learning_rate": 0.0014957012013617663, + "loss": 2.5842, + "step": 415 + }, + { + "epoch": 0.04388185654008439, + "grad_norm": 0.49778130650520325, + "learning_rate": 0.0014956743177866882, + "loss": 2.554, + "step": 416 + }, + { + "epoch": 0.0439873417721519, + "grad_norm": 0.5190582871437073, + "learning_rate": 0.0014956473506550845, + "loss": 2.5607, + "step": 417 + }, + { + "epoch": 0.04409282700421941, + "grad_norm": 0.5709410309791565, + "learning_rate": 0.0014956202999699773, + "loss": 2.583, + "step": 418 + }, + { + "epoch": 0.04419831223628692, + "grad_norm": 0.5158283114433289, + "learning_rate": 0.001495593165734397, + "loss": 2.5648, + "step": 419 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.5894915461540222, + "learning_rate": 0.001495565947951385, + "loss": 2.531, + "step": 420 + }, + { + "epoch": 0.044409282700421944, + "grad_norm": 0.7039139270782471, + "learning_rate": 0.0014955386466239907, + "loss": 2.5532, + "step": 421 + }, + { + "epoch": 0.04451476793248945, + "grad_norm": 0.6828736066818237, + "learning_rate": 0.0014955112617552734, + "loss": 2.5427, + "step": 422 + }, + { + "epoch": 0.044620253164556964, + "grad_norm": 0.601932168006897, + "learning_rate": 0.001495483793348302, + "loss": 2.5387, + "step": 423 + }, + { + "epoch": 0.04472573839662447, + "grad_norm": 0.5565563440322876, + "learning_rate": 0.0014954562414061538, + "loss": 2.5399, + "step": 424 + }, + { + "epoch": 0.044831223628691984, + "grad_norm": 0.7488911151885986, + "learning_rate": 0.0014954286059319167, + "loss": 2.5074, + "step": 425 + }, + { + "epoch": 0.04493670886075949, + "grad_norm": 0.8251137137413025, + "learning_rate": 0.0014954008869286876, + "loss": 2.5588, + "step": 426 + }, + { + "epoch": 0.045042194092827004, + "grad_norm": 0.7143703699111938, + "learning_rate": 0.001495373084399572, + "loss": 2.542, + "step": 427 + }, + { + "epoch": 0.04514767932489452, + "grad_norm": 0.574868381023407, + "learning_rate": 0.0014953451983476854, + "loss": 2.5348, + "step": 428 + }, + { + "epoch": 0.045253164556962024, + "grad_norm": 0.5324398279190063, + "learning_rate": 0.0014953172287761529, + "loss": 2.5142, + "step": 429 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.5623888373374939, + "learning_rate": 0.0014952891756881085, + "loss": 2.5014, + "step": 430 + }, + { + "epoch": 0.045464135021097044, + "grad_norm": 0.5457816123962402, + "learning_rate": 0.0014952610390866954, + "loss": 2.4803, + "step": 431 + }, + { + "epoch": 0.04556962025316456, + "grad_norm": 0.504292368888855, + "learning_rate": 0.0014952328189750666, + "loss": 2.5163, + "step": 432 + }, + { + "epoch": 0.04567510548523207, + "grad_norm": 0.5685055255889893, + "learning_rate": 0.0014952045153563845, + "loss": 2.5005, + "step": 433 + }, + { + "epoch": 0.04578059071729958, + "grad_norm": 0.641355574131012, + "learning_rate": 0.0014951761282338205, + "loss": 2.5276, + "step": 434 + }, + { + "epoch": 0.04588607594936709, + "grad_norm": 0.5575699210166931, + "learning_rate": 0.0014951476576105555, + "loss": 2.5104, + "step": 435 + }, + { + "epoch": 0.0459915611814346, + "grad_norm": 0.597538411617279, + "learning_rate": 0.00149511910348978, + "loss": 2.5201, + "step": 436 + }, + { + "epoch": 0.04609704641350211, + "grad_norm": 0.5683041214942932, + "learning_rate": 0.0014950904658746933, + "loss": 2.5063, + "step": 437 + }, + { + "epoch": 0.046202531645569624, + "grad_norm": 0.6064679026603699, + "learning_rate": 0.0014950617447685047, + "loss": 2.5377, + "step": 438 + }, + { + "epoch": 0.04630801687763713, + "grad_norm": 0.7777884006500244, + "learning_rate": 0.001495032940174432, + "loss": 2.4938, + "step": 439 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.8259924054145813, + "learning_rate": 0.0014950040520957037, + "loss": 2.5216, + "step": 440 + }, + { + "epoch": 0.04651898734177215, + "grad_norm": 0.8034234642982483, + "learning_rate": 0.0014949750805355563, + "loss": 2.5116, + "step": 441 + }, + { + "epoch": 0.04662447257383966, + "grad_norm": 0.6317571401596069, + "learning_rate": 0.0014949460254972363, + "loss": 2.5232, + "step": 442 + }, + { + "epoch": 0.04672995780590717, + "grad_norm": 0.5546492338180542, + "learning_rate": 0.0014949168869839997, + "loss": 2.4894, + "step": 443 + }, + { + "epoch": 0.04683544303797468, + "grad_norm": 0.5787503123283386, + "learning_rate": 0.0014948876649991112, + "loss": 2.5117, + "step": 444 + }, + { + "epoch": 0.0469409282700422, + "grad_norm": 0.5405594110488892, + "learning_rate": 0.0014948583595458455, + "loss": 2.502, + "step": 445 + }, + { + "epoch": 0.0470464135021097, + "grad_norm": 0.6324287056922913, + "learning_rate": 0.0014948289706274865, + "loss": 2.5254, + "step": 446 + }, + { + "epoch": 0.04715189873417722, + "grad_norm": 0.5844792723655701, + "learning_rate": 0.0014947994982473273, + "loss": 2.5075, + "step": 447 + }, + { + "epoch": 0.04725738396624472, + "grad_norm": 0.5426677465438843, + "learning_rate": 0.0014947699424086704, + "loss": 2.4864, + "step": 448 + }, + { + "epoch": 0.04736286919831224, + "grad_norm": 0.5472251772880554, + "learning_rate": 0.0014947403031148278, + "loss": 2.5315, + "step": 449 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.601150631904602, + "learning_rate": 0.0014947105803691204, + "loss": 2.4919, + "step": 450 + }, + { + "epoch": 0.047573839662447256, + "grad_norm": 0.6356118321418762, + "learning_rate": 0.0014946807741748791, + "loss": 2.513, + "step": 451 + }, + { + "epoch": 0.04767932489451477, + "grad_norm": 0.6059160828590393, + "learning_rate": 0.001494650884535444, + "loss": 2.4725, + "step": 452 + }, + { + "epoch": 0.047784810126582276, + "grad_norm": 0.657669723033905, + "learning_rate": 0.0014946209114541636, + "loss": 2.5323, + "step": 453 + }, + { + "epoch": 0.04789029535864979, + "grad_norm": 0.6738398671150208, + "learning_rate": 0.0014945908549343974, + "loss": 2.5024, + "step": 454 + }, + { + "epoch": 0.047995780590717296, + "grad_norm": 0.6473782658576965, + "learning_rate": 0.001494560714979513, + "loss": 2.4833, + "step": 455 + }, + { + "epoch": 0.04810126582278481, + "grad_norm": 0.5503339171409607, + "learning_rate": 0.0014945304915928875, + "loss": 2.4537, + "step": 456 + }, + { + "epoch": 0.04820675105485232, + "grad_norm": 0.6851359605789185, + "learning_rate": 0.0014945001847779082, + "loss": 2.4687, + "step": 457 + }, + { + "epoch": 0.04831223628691983, + "grad_norm": 0.6670833826065063, + "learning_rate": 0.0014944697945379708, + "loss": 2.4564, + "step": 458 + }, + { + "epoch": 0.04841772151898734, + "grad_norm": 0.6384606957435608, + "learning_rate": 0.0014944393208764805, + "loss": 2.4653, + "step": 459 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.6662296056747437, + "learning_rate": 0.0014944087637968522, + "loss": 2.5068, + "step": 460 + }, + { + "epoch": 0.04862869198312236, + "grad_norm": 0.6446998119354248, + "learning_rate": 0.00149437812330251, + "loss": 2.4658, + "step": 461 + }, + { + "epoch": 0.048734177215189876, + "grad_norm": 0.7069019675254822, + "learning_rate": 0.0014943473993968871, + "loss": 2.4841, + "step": 462 + }, + { + "epoch": 0.04883966244725738, + "grad_norm": 0.7658073306083679, + "learning_rate": 0.0014943165920834266, + "loss": 2.4678, + "step": 463 + }, + { + "epoch": 0.048945147679324896, + "grad_norm": 0.9435396790504456, + "learning_rate": 0.0014942857013655806, + "loss": 2.4749, + "step": 464 + }, + { + "epoch": 0.0490506329113924, + "grad_norm": 0.8941424489021301, + "learning_rate": 0.0014942547272468103, + "loss": 2.4718, + "step": 465 + }, + { + "epoch": 0.049156118143459916, + "grad_norm": 0.6396884918212891, + "learning_rate": 0.0014942236697305866, + "loss": 2.4379, + "step": 466 + }, + { + "epoch": 0.04926160337552743, + "grad_norm": 0.5660668611526489, + "learning_rate": 0.0014941925288203897, + "loss": 2.4707, + "step": 467 + }, + { + "epoch": 0.049367088607594936, + "grad_norm": 0.6140608191490173, + "learning_rate": 0.001494161304519709, + "loss": 2.4671, + "step": 468 + }, + { + "epoch": 0.04947257383966245, + "grad_norm": 0.6068004965782166, + "learning_rate": 0.0014941299968320434, + "loss": 2.4398, + "step": 469 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.6886346340179443, + "learning_rate": 0.0014940986057609012, + "loss": 2.4454, + "step": 470 + }, + { + "epoch": 0.04968354430379747, + "grad_norm": 0.7839289307594299, + "learning_rate": 0.0014940671313097998, + "loss": 2.453, + "step": 471 + }, + { + "epoch": 0.049789029535864976, + "grad_norm": 0.7677481770515442, + "learning_rate": 0.001494035573482266, + "loss": 2.4562, + "step": 472 + }, + { + "epoch": 0.04989451476793249, + "grad_norm": 0.6932200789451599, + "learning_rate": 0.0014940039322818362, + "loss": 2.4843, + "step": 473 + }, + { + "epoch": 0.05, + "grad_norm": 0.5747694373130798, + "learning_rate": 0.0014939722077120558, + "loss": 2.4731, + "step": 474 + }, + { + "epoch": 0.05010548523206751, + "grad_norm": 0.5190569162368774, + "learning_rate": 0.0014939403997764795, + "loss": 2.4704, + "step": 475 + }, + { + "epoch": 0.05021097046413502, + "grad_norm": 0.5559191107749939, + "learning_rate": 0.001493908508478672, + "loss": 2.4483, + "step": 476 + }, + { + "epoch": 0.05031645569620253, + "grad_norm": 0.5790131688117981, + "learning_rate": 0.0014938765338222068, + "loss": 2.4145, + "step": 477 + }, + { + "epoch": 0.05042194092827004, + "grad_norm": 0.5895004272460938, + "learning_rate": 0.0014938444758106665, + "loss": 2.4883, + "step": 478 + }, + { + "epoch": 0.050527426160337556, + "grad_norm": 0.5595369935035706, + "learning_rate": 0.0014938123344476436, + "loss": 2.4382, + "step": 479 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.5518097281455994, + "learning_rate": 0.0014937801097367396, + "loss": 2.4449, + "step": 480 + }, + { + "epoch": 0.050738396624472576, + "grad_norm": 0.7194148898124695, + "learning_rate": 0.0014937478016815657, + "loss": 2.4071, + "step": 481 + }, + { + "epoch": 0.05084388185654008, + "grad_norm": 0.9538900852203369, + "learning_rate": 0.0014937154102857416, + "loss": 2.4629, + "step": 482 + }, + { + "epoch": 0.050949367088607596, + "grad_norm": 1.2442519664764404, + "learning_rate": 0.0014936829355528976, + "loss": 2.4866, + "step": 483 + }, + { + "epoch": 0.0510548523206751, + "grad_norm": 0.8146829605102539, + "learning_rate": 0.0014936503774866721, + "loss": 2.4366, + "step": 484 + }, + { + "epoch": 0.051160337552742616, + "grad_norm": 0.5750346183776855, + "learning_rate": 0.0014936177360907138, + "loss": 2.3953, + "step": 485 + }, + { + "epoch": 0.05126582278481013, + "grad_norm": 0.7566839456558228, + "learning_rate": 0.00149358501136868, + "loss": 2.4017, + "step": 486 + }, + { + "epoch": 0.051371308016877636, + "grad_norm": 1.0250811576843262, + "learning_rate": 0.0014935522033242379, + "loss": 2.4656, + "step": 487 + }, + { + "epoch": 0.05147679324894515, + "grad_norm": 1.0965088605880737, + "learning_rate": 0.0014935193119610638, + "loss": 2.4242, + "step": 488 + }, + { + "epoch": 0.051582278481012656, + "grad_norm": 0.6851458549499512, + "learning_rate": 0.0014934863372828432, + "loss": 2.4147, + "step": 489 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.601778507232666, + "learning_rate": 0.001493453279293271, + "loss": 2.4374, + "step": 490 + }, + { + "epoch": 0.05179324894514768, + "grad_norm": 0.7987101674079895, + "learning_rate": 0.001493420137996052, + "loss": 2.4407, + "step": 491 + }, + { + "epoch": 0.05189873417721519, + "grad_norm": 0.8827337026596069, + "learning_rate": 0.0014933869133948992, + "loss": 2.4393, + "step": 492 + }, + { + "epoch": 0.0520042194092827, + "grad_norm": 0.6110571026802063, + "learning_rate": 0.0014933536054935362, + "loss": 2.4184, + "step": 493 + }, + { + "epoch": 0.05210970464135021, + "grad_norm": 0.5868078470230103, + "learning_rate": 0.0014933202142956947, + "loss": 2.4195, + "step": 494 + }, + { + "epoch": 0.05221518987341772, + "grad_norm": 0.8407921195030212, + "learning_rate": 0.0014932867398051168, + "loss": 2.4184, + "step": 495 + }, + { + "epoch": 0.05232067510548523, + "grad_norm": 0.8748168349266052, + "learning_rate": 0.0014932531820255534, + "loss": 2.4029, + "step": 496 + }, + { + "epoch": 0.05242616033755274, + "grad_norm": 0.65437912940979, + "learning_rate": 0.0014932195409607645, + "loss": 2.431, + "step": 497 + }, + { + "epoch": 0.052531645569620256, + "grad_norm": 0.5540874004364014, + "learning_rate": 0.0014931858166145203, + "loss": 2.3905, + "step": 498 + }, + { + "epoch": 0.05263713080168776, + "grad_norm": 0.6876356601715088, + "learning_rate": 0.0014931520089905993, + "loss": 2.4243, + "step": 499 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.5901927947998047, + "learning_rate": 0.0014931181180927902, + "loss": 2.4089, + "step": 500 + }, + { + "epoch": 0.05284810126582278, + "grad_norm": 0.5558372735977173, + "learning_rate": 0.0014930841439248904, + "loss": 2.4142, + "step": 501 + }, + { + "epoch": 0.052953586497890295, + "grad_norm": 0.7860623002052307, + "learning_rate": 0.0014930500864907066, + "loss": 2.4472, + "step": 502 + }, + { + "epoch": 0.05305907172995781, + "grad_norm": 0.7616980671882629, + "learning_rate": 0.001493015945794056, + "loss": 2.3765, + "step": 503 + }, + { + "epoch": 0.053164556962025315, + "grad_norm": 0.6086657643318176, + "learning_rate": 0.0014929817218387632, + "loss": 2.3932, + "step": 504 + }, + { + "epoch": 0.05327004219409283, + "grad_norm": 0.48971372842788696, + "learning_rate": 0.0014929474146286638, + "loss": 2.399, + "step": 505 + }, + { + "epoch": 0.053375527426160335, + "grad_norm": 0.5938992500305176, + "learning_rate": 0.001492913024167602, + "loss": 2.4389, + "step": 506 + }, + { + "epoch": 0.05348101265822785, + "grad_norm": 0.6259846091270447, + "learning_rate": 0.001492878550459431, + "loss": 2.4178, + "step": 507 + }, + { + "epoch": 0.05358649789029536, + "grad_norm": 0.628774106502533, + "learning_rate": 0.0014928439935080143, + "loss": 2.3945, + "step": 508 + }, + { + "epoch": 0.05369198312236287, + "grad_norm": 0.5695948600769043, + "learning_rate": 0.0014928093533172243, + "loss": 2.3539, + "step": 509 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.531403124332428, + "learning_rate": 0.001492774629890942, + "loss": 2.4156, + "step": 510 + }, + { + "epoch": 0.05390295358649789, + "grad_norm": 0.5992075204849243, + "learning_rate": 0.0014927398232330584, + "loss": 2.3849, + "step": 511 + }, + { + "epoch": 0.0540084388185654, + "grad_norm": 0.787064790725708, + "learning_rate": 0.0014927049333474743, + "loss": 2.405, + "step": 512 + }, + { + "epoch": 0.05411392405063291, + "grad_norm": 0.753335177898407, + "learning_rate": 0.001492669960238099, + "loss": 2.3956, + "step": 513 + }, + { + "epoch": 0.05421940928270042, + "grad_norm": 0.6422935724258423, + "learning_rate": 0.001492634903908851, + "loss": 2.4063, + "step": 514 + }, + { + "epoch": 0.054324894514767935, + "grad_norm": 0.5440375804901123, + "learning_rate": 0.001492599764363659, + "loss": 2.3636, + "step": 515 + }, + { + "epoch": 0.05443037974683544, + "grad_norm": 0.6405780911445618, + "learning_rate": 0.0014925645416064605, + "loss": 2.3939, + "step": 516 + }, + { + "epoch": 0.054535864978902955, + "grad_norm": 0.7677408456802368, + "learning_rate": 0.0014925292356412025, + "loss": 2.3919, + "step": 517 + }, + { + "epoch": 0.05464135021097046, + "grad_norm": 0.6510573625564575, + "learning_rate": 0.001492493846471841, + "loss": 2.3416, + "step": 518 + }, + { + "epoch": 0.054746835443037975, + "grad_norm": 0.627384603023529, + "learning_rate": 0.0014924583741023417, + "loss": 2.3935, + "step": 519 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.601758599281311, + "learning_rate": 0.001492422818536679, + "loss": 2.3957, + "step": 520 + }, + { + "epoch": 0.054957805907172995, + "grad_norm": 0.7271482348442078, + "learning_rate": 0.0014923871797788378, + "loss": 2.3944, + "step": 521 + }, + { + "epoch": 0.05506329113924051, + "grad_norm": 0.7070406079292297, + "learning_rate": 0.001492351457832811, + "loss": 2.3546, + "step": 522 + }, + { + "epoch": 0.055168776371308015, + "grad_norm": 0.5978818535804749, + "learning_rate": 0.0014923156527026017, + "loss": 2.3914, + "step": 523 + }, + { + "epoch": 0.05527426160337553, + "grad_norm": 0.550290584564209, + "learning_rate": 0.001492279764392222, + "loss": 2.387, + "step": 524 + }, + { + "epoch": 0.055379746835443035, + "grad_norm": 0.6084048748016357, + "learning_rate": 0.0014922437929056934, + "loss": 2.3752, + "step": 525 + }, + { + "epoch": 0.05548523206751055, + "grad_norm": 0.581994891166687, + "learning_rate": 0.0014922077382470468, + "loss": 2.3584, + "step": 526 + }, + { + "epoch": 0.05559071729957806, + "grad_norm": 0.5891368389129639, + "learning_rate": 0.001492171600420322, + "loss": 2.3503, + "step": 527 + }, + { + "epoch": 0.05569620253164557, + "grad_norm": 0.5435659289360046, + "learning_rate": 0.0014921353794295684, + "loss": 2.3713, + "step": 528 + }, + { + "epoch": 0.05580168776371308, + "grad_norm": 0.5212981700897217, + "learning_rate": 0.001492099075278845, + "loss": 2.3717, + "step": 529 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.5365485548973083, + "learning_rate": 0.00149206268797222, + "loss": 2.3656, + "step": 530 + }, + { + "epoch": 0.0560126582278481, + "grad_norm": 0.5292839407920837, + "learning_rate": 0.0014920262175137703, + "loss": 2.3667, + "step": 531 + }, + { + "epoch": 0.056118143459915615, + "grad_norm": 0.5741070508956909, + "learning_rate": 0.001491989663907583, + "loss": 2.3341, + "step": 532 + }, + { + "epoch": 0.05622362869198312, + "grad_norm": 0.55158531665802, + "learning_rate": 0.001491953027157754, + "loss": 2.3578, + "step": 533 + }, + { + "epoch": 0.056329113924050635, + "grad_norm": 0.5468905568122864, + "learning_rate": 0.0014919163072683883, + "loss": 2.3492, + "step": 534 + }, + { + "epoch": 0.05643459915611814, + "grad_norm": 0.5942896604537964, + "learning_rate": 0.0014918795042436013, + "loss": 2.3594, + "step": 535 + }, + { + "epoch": 0.056540084388185655, + "grad_norm": 0.7640687227249146, + "learning_rate": 0.001491842618087516, + "loss": 2.3491, + "step": 536 + }, + { + "epoch": 0.05664556962025316, + "grad_norm": 1.076373815536499, + "learning_rate": 0.0014918056488042665, + "loss": 2.3278, + "step": 537 + }, + { + "epoch": 0.056751054852320675, + "grad_norm": 1.0766210556030273, + "learning_rate": 0.0014917685963979949, + "loss": 2.3654, + "step": 538 + }, + { + "epoch": 0.05685654008438819, + "grad_norm": 0.7467502355575562, + "learning_rate": 0.0014917314608728536, + "loss": 2.361, + "step": 539 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.734528124332428, + "learning_rate": 0.0014916942422330032, + "loss": 2.3598, + "step": 540 + }, + { + "epoch": 0.05706751054852321, + "grad_norm": 0.8445684909820557, + "learning_rate": 0.0014916569404826146, + "loss": 2.3932, + "step": 541 + }, + { + "epoch": 0.057172995780590714, + "grad_norm": 0.638463020324707, + "learning_rate": 0.0014916195556258676, + "loss": 2.357, + "step": 542 + }, + { + "epoch": 0.05727848101265823, + "grad_norm": 0.818782389163971, + "learning_rate": 0.0014915820876669514, + "loss": 2.3408, + "step": 543 + }, + { + "epoch": 0.05738396624472574, + "grad_norm": 0.7818675637245178, + "learning_rate": 0.0014915445366100641, + "loss": 2.362, + "step": 544 + }, + { + "epoch": 0.05748945147679325, + "grad_norm": 0.6198149919509888, + "learning_rate": 0.0014915069024594144, + "loss": 2.3593, + "step": 545 + }, + { + "epoch": 0.05759493670886076, + "grad_norm": 0.6598590612411499, + "learning_rate": 0.0014914691852192183, + "loss": 2.3522, + "step": 546 + }, + { + "epoch": 0.05770042194092827, + "grad_norm": 0.6311138272285461, + "learning_rate": 0.001491431384893703, + "loss": 2.3553, + "step": 547 + }, + { + "epoch": 0.05780590717299578, + "grad_norm": 0.6832761764526367, + "learning_rate": 0.0014913935014871035, + "loss": 2.3661, + "step": 548 + }, + { + "epoch": 0.057911392405063294, + "grad_norm": 0.7866519093513489, + "learning_rate": 0.0014913555350036657, + "loss": 2.3593, + "step": 549 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.5946293473243713, + "learning_rate": 0.001491317485447643, + "loss": 2.3119, + "step": 550 + }, + { + "epoch": 0.058122362869198314, + "grad_norm": 0.5947610139846802, + "learning_rate": 0.0014912793528233, + "loss": 2.3069, + "step": 551 + }, + { + "epoch": 0.05822784810126582, + "grad_norm": 0.7171390652656555, + "learning_rate": 0.0014912411371349088, + "loss": 2.3365, + "step": 552 + }, + { + "epoch": 0.058333333333333334, + "grad_norm": 0.7012369632720947, + "learning_rate": 0.0014912028383867522, + "loss": 2.3408, + "step": 553 + }, + { + "epoch": 0.05843881856540084, + "grad_norm": 0.6924192309379578, + "learning_rate": 0.0014911644565831217, + "loss": 2.2766, + "step": 554 + }, + { + "epoch": 0.058544303797468354, + "grad_norm": 0.6331238746643066, + "learning_rate": 0.001491125991728318, + "loss": 2.2779, + "step": 555 + }, + { + "epoch": 0.05864978902953587, + "grad_norm": 0.503001868724823, + "learning_rate": 0.001491087443826651, + "loss": 2.3279, + "step": 556 + }, + { + "epoch": 0.058755274261603374, + "grad_norm": 0.5977935791015625, + "learning_rate": 0.0014910488128824409, + "loss": 2.3507, + "step": 557 + }, + { + "epoch": 0.05886075949367089, + "grad_norm": 0.8341029286384583, + "learning_rate": 0.0014910100989000159, + "loss": 2.313, + "step": 558 + }, + { + "epoch": 0.058966244725738394, + "grad_norm": 1.0206959247589111, + "learning_rate": 0.0014909713018837144, + "loss": 2.2927, + "step": 559 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.9215767979621887, + "learning_rate": 0.0014909324218378838, + "loss": 2.3194, + "step": 560 + }, + { + "epoch": 0.05917721518987342, + "grad_norm": 0.616671085357666, + "learning_rate": 0.0014908934587668805, + "loss": 2.3071, + "step": 561 + }, + { + "epoch": 0.05928270042194093, + "grad_norm": 0.6301413178443909, + "learning_rate": 0.001490854412675071, + "loss": 2.3088, + "step": 562 + }, + { + "epoch": 0.05938818565400844, + "grad_norm": 0.7655355930328369, + "learning_rate": 0.0014908152835668301, + "loss": 2.3455, + "step": 563 + }, + { + "epoch": 0.05949367088607595, + "grad_norm": 0.6773424744606018, + "learning_rate": 0.0014907760714465428, + "loss": 2.3263, + "step": 564 + }, + { + "epoch": 0.05959915611814346, + "grad_norm": 0.63832688331604, + "learning_rate": 0.0014907367763186026, + "loss": 2.3206, + "step": 565 + }, + { + "epoch": 0.05970464135021097, + "grad_norm": 0.6194385290145874, + "learning_rate": 0.0014906973981874132, + "loss": 2.3331, + "step": 566 + }, + { + "epoch": 0.05981012658227848, + "grad_norm": 0.554900050163269, + "learning_rate": 0.0014906579370573868, + "loss": 2.3356, + "step": 567 + }, + { + "epoch": 0.059915611814345994, + "grad_norm": 0.631126880645752, + "learning_rate": 0.0014906183929329455, + "loss": 2.2835, + "step": 568 + }, + { + "epoch": 0.0600210970464135, + "grad_norm": 0.5967705845832825, + "learning_rate": 0.00149057876581852, + "loss": 2.2597, + "step": 569 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.6301481127738953, + "learning_rate": 0.0014905390557185508, + "loss": 2.3478, + "step": 570 + }, + { + "epoch": 0.06023206751054852, + "grad_norm": 0.6598333716392517, + "learning_rate": 0.0014904992626374879, + "loss": 2.3248, + "step": 571 + }, + { + "epoch": 0.060337552742616034, + "grad_norm": 0.6546053290367126, + "learning_rate": 0.0014904593865797903, + "loss": 2.3167, + "step": 572 + }, + { + "epoch": 0.06044303797468355, + "grad_norm": 0.5469541549682617, + "learning_rate": 0.0014904194275499258, + "loss": 2.3293, + "step": 573 + }, + { + "epoch": 0.060548523206751054, + "grad_norm": 0.5749219655990601, + "learning_rate": 0.0014903793855523726, + "loss": 2.2891, + "step": 574 + }, + { + "epoch": 0.06065400843881857, + "grad_norm": 0.5142810940742493, + "learning_rate": 0.0014903392605916175, + "loss": 2.3245, + "step": 575 + }, + { + "epoch": 0.060759493670886074, + "grad_norm": 0.7823461294174194, + "learning_rate": 0.0014902990526721564, + "loss": 2.3202, + "step": 576 + }, + { + "epoch": 0.06086497890295359, + "grad_norm": 0.7379515171051025, + "learning_rate": 0.0014902587617984951, + "loss": 2.3007, + "step": 577 + }, + { + "epoch": 0.0609704641350211, + "grad_norm": 0.7914389371871948, + "learning_rate": 0.0014902183879751483, + "loss": 2.3106, + "step": 578 + }, + { + "epoch": 0.06107594936708861, + "grad_norm": 0.7999078035354614, + "learning_rate": 0.0014901779312066399, + "loss": 2.3134, + "step": 579 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.5661306381225586, + "learning_rate": 0.0014901373914975036, + "loss": 2.307, + "step": 580 + }, + { + "epoch": 0.06128691983122363, + "grad_norm": 0.6946706771850586, + "learning_rate": 0.0014900967688522818, + "loss": 2.3078, + "step": 581 + }, + { + "epoch": 0.06139240506329114, + "grad_norm": 0.8000838160514832, + "learning_rate": 0.0014900560632755265, + "loss": 2.2834, + "step": 582 + }, + { + "epoch": 0.06149789029535865, + "grad_norm": 0.5731629133224487, + "learning_rate": 0.0014900152747717994, + "loss": 2.3181, + "step": 583 + }, + { + "epoch": 0.06160337552742616, + "grad_norm": 0.7691609263420105, + "learning_rate": 0.0014899744033456705, + "loss": 2.2905, + "step": 584 + }, + { + "epoch": 0.061708860759493674, + "grad_norm": 0.7264873385429382, + "learning_rate": 0.0014899334490017198, + "loss": 2.3222, + "step": 585 + }, + { + "epoch": 0.06181434599156118, + "grad_norm": 0.6921423673629761, + "learning_rate": 0.0014898924117445367, + "loss": 2.2869, + "step": 586 + }, + { + "epoch": 0.061919831223628694, + "grad_norm": 0.7821895480155945, + "learning_rate": 0.0014898512915787192, + "loss": 2.2694, + "step": 587 + }, + { + "epoch": 0.0620253164556962, + "grad_norm": 0.5459523797035217, + "learning_rate": 0.0014898100885088754, + "loss": 2.2563, + "step": 588 + }, + { + "epoch": 0.06213080168776371, + "grad_norm": 0.7200435400009155, + "learning_rate": 0.001489768802539622, + "loss": 2.2974, + "step": 589 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.6950234174728394, + "learning_rate": 0.0014897274336755856, + "loss": 2.2381, + "step": 590 + }, + { + "epoch": 0.06234177215189873, + "grad_norm": 0.6145004630088806, + "learning_rate": 0.0014896859819214018, + "loss": 2.2663, + "step": 591 + }, + { + "epoch": 0.06244725738396625, + "grad_norm": 0.694369912147522, + "learning_rate": 0.001489644447281715, + "loss": 2.2742, + "step": 592 + }, + { + "epoch": 0.06255274261603376, + "grad_norm": 0.5640719532966614, + "learning_rate": 0.00148960282976118, + "loss": 2.2893, + "step": 593 + }, + { + "epoch": 0.06265822784810127, + "grad_norm": 0.5702387690544128, + "learning_rate": 0.0014895611293644596, + "loss": 2.2516, + "step": 594 + }, + { + "epoch": 0.06276371308016877, + "grad_norm": 0.616759717464447, + "learning_rate": 0.0014895193460962271, + "loss": 2.2795, + "step": 595 + }, + { + "epoch": 0.0628691983122363, + "grad_norm": 0.5723934173583984, + "learning_rate": 0.001489477479961164, + "loss": 2.228, + "step": 596 + }, + { + "epoch": 0.0629746835443038, + "grad_norm": 0.5637951493263245, + "learning_rate": 0.0014894355309639621, + "loss": 2.2697, + "step": 597 + }, + { + "epoch": 0.0630801687763713, + "grad_norm": 0.5160190463066101, + "learning_rate": 0.0014893934991093221, + "loss": 2.3084, + "step": 598 + }, + { + "epoch": 0.06318565400843881, + "grad_norm": 0.5620864033699036, + "learning_rate": 0.0014893513844019533, + "loss": 2.2587, + "step": 599 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.5687171220779419, + "learning_rate": 0.001489309186846575, + "loss": 2.2674, + "step": 600 + }, + { + "epoch": 0.06339662447257384, + "grad_norm": 0.6853978633880615, + "learning_rate": 0.001489266906447916, + "loss": 2.2829, + "step": 601 + }, + { + "epoch": 0.06350210970464135, + "grad_norm": 0.8625463247299194, + "learning_rate": 0.0014892245432107138, + "loss": 2.263, + "step": 602 + }, + { + "epoch": 0.06360759493670887, + "grad_norm": 0.9322639107704163, + "learning_rate": 0.0014891820971397152, + "loss": 2.2985, + "step": 603 + }, + { + "epoch": 0.06371308016877637, + "grad_norm": 0.9432663917541504, + "learning_rate": 0.001489139568239677, + "loss": 2.2323, + "step": 604 + }, + { + "epoch": 0.06381856540084388, + "grad_norm": 0.7297714948654175, + "learning_rate": 0.0014890969565153642, + "loss": 2.2737, + "step": 605 + }, + { + "epoch": 0.06392405063291139, + "grad_norm": 0.5094132423400879, + "learning_rate": 0.0014890542619715522, + "loss": 2.2421, + "step": 606 + }, + { + "epoch": 0.0640295358649789, + "grad_norm": 0.5689123272895813, + "learning_rate": 0.0014890114846130248, + "loss": 2.2838, + "step": 607 + }, + { + "epoch": 0.06413502109704641, + "grad_norm": 0.574410080909729, + "learning_rate": 0.0014889686244445755, + "loss": 2.3154, + "step": 608 + }, + { + "epoch": 0.06424050632911392, + "grad_norm": 0.5872901082038879, + "learning_rate": 0.0014889256814710071, + "loss": 2.2577, + "step": 609 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.6018288731575012, + "learning_rate": 0.0014888826556971313, + "loss": 2.2594, + "step": 610 + }, + { + "epoch": 0.06445147679324895, + "grad_norm": 0.5361225008964539, + "learning_rate": 0.0014888395471277698, + "loss": 2.253, + "step": 611 + }, + { + "epoch": 0.06455696202531645, + "grad_norm": 0.5185396075248718, + "learning_rate": 0.0014887963557677526, + "loss": 2.2541, + "step": 612 + }, + { + "epoch": 0.06466244725738397, + "grad_norm": 0.4898001253604889, + "learning_rate": 0.00148875308162192, + "loss": 2.2358, + "step": 613 + }, + { + "epoch": 0.06476793248945148, + "grad_norm": 0.5322906374931335, + "learning_rate": 0.0014887097246951205, + "loss": 2.1996, + "step": 614 + }, + { + "epoch": 0.06487341772151899, + "grad_norm": 0.5688365697860718, + "learning_rate": 0.001488666284992213, + "loss": 2.2507, + "step": 615 + }, + { + "epoch": 0.06497890295358649, + "grad_norm": 0.7766420245170593, + "learning_rate": 0.001488622762518065, + "loss": 2.2543, + "step": 616 + }, + { + "epoch": 0.06508438818565401, + "grad_norm": 0.9629533886909485, + "learning_rate": 0.0014885791572775533, + "loss": 2.2476, + "step": 617 + }, + { + "epoch": 0.06518987341772152, + "grad_norm": 1.0248714685440063, + "learning_rate": 0.0014885354692755642, + "loss": 2.2769, + "step": 618 + }, + { + "epoch": 0.06529535864978903, + "grad_norm": 0.9247288703918457, + "learning_rate": 0.001488491698516993, + "loss": 2.2887, + "step": 619 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.7877980470657349, + "learning_rate": 0.0014884478450067444, + "loss": 2.2603, + "step": 620 + }, + { + "epoch": 0.06550632911392405, + "grad_norm": 0.5994645953178406, + "learning_rate": 0.001488403908749733, + "loss": 2.2357, + "step": 621 + }, + { + "epoch": 0.06561181434599156, + "grad_norm": 0.6694386601448059, + "learning_rate": 0.0014883598897508811, + "loss": 2.2654, + "step": 622 + }, + { + "epoch": 0.06571729957805907, + "grad_norm": 0.9274905920028687, + "learning_rate": 0.0014883157880151222, + "loss": 2.2485, + "step": 623 + }, + { + "epoch": 0.06582278481012659, + "grad_norm": 0.8527341485023499, + "learning_rate": 0.0014882716035473974, + "loss": 2.209, + "step": 624 + }, + { + "epoch": 0.06592827004219409, + "grad_norm": 0.6517285108566284, + "learning_rate": 0.001488227336352658, + "loss": 2.2399, + "step": 625 + }, + { + "epoch": 0.0660337552742616, + "grad_norm": 0.5907406806945801, + "learning_rate": 0.0014881829864358644, + "loss": 2.2317, + "step": 626 + }, + { + "epoch": 0.06613924050632912, + "grad_norm": 0.6417720913887024, + "learning_rate": 0.0014881385538019867, + "loss": 2.2583, + "step": 627 + }, + { + "epoch": 0.06624472573839663, + "grad_norm": 0.5792589783668518, + "learning_rate": 0.0014880940384560028, + "loss": 2.2394, + "step": 628 + }, + { + "epoch": 0.06635021097046413, + "grad_norm": 0.5659679174423218, + "learning_rate": 0.0014880494404029016, + "loss": 2.2486, + "step": 629 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.6692177653312683, + "learning_rate": 0.0014880047596476807, + "loss": 2.2072, + "step": 630 + }, + { + "epoch": 0.06656118143459916, + "grad_norm": 0.7472617030143738, + "learning_rate": 0.0014879599961953461, + "loss": 2.214, + "step": 631 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 0.8594291806221008, + "learning_rate": 0.0014879151500509142, + "loss": 2.2589, + "step": 632 + }, + { + "epoch": 0.06677215189873417, + "grad_norm": 1.047629475593567, + "learning_rate": 0.0014878702212194103, + "loss": 2.2521, + "step": 633 + }, + { + "epoch": 0.06687763713080169, + "grad_norm": 0.8850459456443787, + "learning_rate": 0.0014878252097058685, + "loss": 2.2229, + "step": 634 + }, + { + "epoch": 0.0669831223628692, + "grad_norm": 0.7026048302650452, + "learning_rate": 0.001487780115515333, + "loss": 2.2868, + "step": 635 + }, + { + "epoch": 0.0670886075949367, + "grad_norm": 0.6850810050964355, + "learning_rate": 0.0014877349386528565, + "loss": 2.2294, + "step": 636 + }, + { + "epoch": 0.06719409282700423, + "grad_norm": 0.9078318476676941, + "learning_rate": 0.0014876896791235015, + "loss": 2.2953, + "step": 637 + }, + { + "epoch": 0.06729957805907173, + "grad_norm": 0.787359893321991, + "learning_rate": 0.0014876443369323397, + "loss": 2.259, + "step": 638 + }, + { + "epoch": 0.06740506329113924, + "grad_norm": 0.6145498156547546, + "learning_rate": 0.0014875989120844517, + "loss": 2.2346, + "step": 639 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.9258652925491333, + "learning_rate": 0.0014875534045849274, + "loss": 2.2432, + "step": 640 + }, + { + "epoch": 0.06761603375527427, + "grad_norm": 1.066116452217102, + "learning_rate": 0.0014875078144388665, + "loss": 2.2407, + "step": 641 + }, + { + "epoch": 0.06772151898734177, + "grad_norm": 0.5942973494529724, + "learning_rate": 0.0014874621416513774, + "loss": 2.2389, + "step": 642 + }, + { + "epoch": 0.06782700421940928, + "grad_norm": 0.7695975303649902, + "learning_rate": 0.001487416386227578, + "loss": 2.2462, + "step": 643 + }, + { + "epoch": 0.0679324894514768, + "grad_norm": 0.8873471021652222, + "learning_rate": 0.0014873705481725952, + "loss": 2.2023, + "step": 644 + }, + { + "epoch": 0.0680379746835443, + "grad_norm": 0.6295695900917053, + "learning_rate": 0.0014873246274915658, + "loss": 2.2084, + "step": 645 + }, + { + "epoch": 0.06814345991561181, + "grad_norm": 0.6682591438293457, + "learning_rate": 0.0014872786241896354, + "loss": 2.1858, + "step": 646 + }, + { + "epoch": 0.06824894514767932, + "grad_norm": 0.7897014021873474, + "learning_rate": 0.0014872325382719587, + "loss": 2.2669, + "step": 647 + }, + { + "epoch": 0.06835443037974684, + "grad_norm": 0.6743729710578918, + "learning_rate": 0.0014871863697436998, + "loss": 2.2756, + "step": 648 + }, + { + "epoch": 0.06845991561181435, + "grad_norm": 0.7170785069465637, + "learning_rate": 0.0014871401186100322, + "loss": 2.232, + "step": 649 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.6358445882797241, + "learning_rate": 0.0014870937848761388, + "loss": 2.2493, + "step": 650 + }, + { + "epoch": 0.06867088607594937, + "grad_norm": 0.6696831583976746, + "learning_rate": 0.0014870473685472112, + "loss": 2.2121, + "step": 651 + }, + { + "epoch": 0.06877637130801688, + "grad_norm": 0.646957278251648, + "learning_rate": 0.0014870008696284507, + "loss": 2.2221, + "step": 652 + }, + { + "epoch": 0.06888185654008439, + "grad_norm": 0.6351383924484253, + "learning_rate": 0.0014869542881250678, + "loss": 2.221, + "step": 653 + }, + { + "epoch": 0.0689873417721519, + "grad_norm": 0.6284206509590149, + "learning_rate": 0.001486907624042282, + "loss": 2.2351, + "step": 654 + }, + { + "epoch": 0.06909282700421941, + "grad_norm": 0.5398014187812805, + "learning_rate": 0.0014868608773853226, + "loss": 2.2156, + "step": 655 + }, + { + "epoch": 0.06919831223628692, + "grad_norm": 0.6053810715675354, + "learning_rate": 0.0014868140481594273, + "loss": 2.2313, + "step": 656 + }, + { + "epoch": 0.06930379746835443, + "grad_norm": 0.8261733651161194, + "learning_rate": 0.001486767136369844, + "loss": 2.1985, + "step": 657 + }, + { + "epoch": 0.06940928270042195, + "grad_norm": 0.7598422169685364, + "learning_rate": 0.0014867201420218292, + "loss": 2.1818, + "step": 658 + }, + { + "epoch": 0.06951476793248945, + "grad_norm": 0.508154571056366, + "learning_rate": 0.0014866730651206487, + "loss": 2.2216, + "step": 659 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.8388199210166931, + "learning_rate": 0.001486625905671578, + "loss": 2.2094, + "step": 660 + }, + { + "epoch": 0.06972573839662448, + "grad_norm": 0.91300368309021, + "learning_rate": 0.0014865786636799015, + "loss": 2.1986, + "step": 661 + }, + { + "epoch": 0.06983122362869199, + "grad_norm": 0.5833263993263245, + "learning_rate": 0.0014865313391509126, + "loss": 2.1972, + "step": 662 + }, + { + "epoch": 0.06993670886075949, + "grad_norm": 0.6899173259735107, + "learning_rate": 0.0014864839320899148, + "loss": 2.1484, + "step": 663 + }, + { + "epoch": 0.070042194092827, + "grad_norm": 1.0072245597839355, + "learning_rate": 0.0014864364425022198, + "loss": 2.2376, + "step": 664 + }, + { + "epoch": 0.07014767932489452, + "grad_norm": 0.707954466342926, + "learning_rate": 0.001486388870393149, + "loss": 2.2109, + "step": 665 + }, + { + "epoch": 0.07025316455696203, + "grad_norm": 0.632369875907898, + "learning_rate": 0.0014863412157680336, + "loss": 2.1818, + "step": 666 + }, + { + "epoch": 0.07035864978902953, + "grad_norm": 0.9408541917800903, + "learning_rate": 0.0014862934786322131, + "loss": 2.2234, + "step": 667 + }, + { + "epoch": 0.07046413502109705, + "grad_norm": 0.6967310905456543, + "learning_rate": 0.0014862456589910368, + "loss": 2.1792, + "step": 668 + }, + { + "epoch": 0.07056962025316456, + "grad_norm": 0.5454062223434448, + "learning_rate": 0.0014861977568498632, + "loss": 2.2226, + "step": 669 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.7345542907714844, + "learning_rate": 0.00148614977221406, + "loss": 2.2136, + "step": 670 + }, + { + "epoch": 0.07078059071729957, + "grad_norm": 0.8643225431442261, + "learning_rate": 0.001486101705089004, + "loss": 2.224, + "step": 671 + }, + { + "epoch": 0.07088607594936709, + "grad_norm": 0.5435208082199097, + "learning_rate": 0.0014860535554800814, + "loss": 2.1885, + "step": 672 + }, + { + "epoch": 0.0709915611814346, + "grad_norm": 0.6293071508407593, + "learning_rate": 0.0014860053233926875, + "loss": 2.2162, + "step": 673 + }, + { + "epoch": 0.0710970464135021, + "grad_norm": 0.7489345669746399, + "learning_rate": 0.0014859570088322273, + "loss": 2.1475, + "step": 674 + }, + { + "epoch": 0.07120253164556962, + "grad_norm": 0.6438894867897034, + "learning_rate": 0.0014859086118041145, + "loss": 2.2044, + "step": 675 + }, + { + "epoch": 0.07130801687763713, + "grad_norm": 0.5065727829933167, + "learning_rate": 0.001485860132313772, + "loss": 2.186, + "step": 676 + }, + { + "epoch": 0.07141350210970464, + "grad_norm": 0.5683093070983887, + "learning_rate": 0.0014858115703666325, + "loss": 2.1632, + "step": 677 + }, + { + "epoch": 0.07151898734177216, + "grad_norm": 0.5503770709037781, + "learning_rate": 0.001485762925968137, + "loss": 2.1919, + "step": 678 + }, + { + "epoch": 0.07162447257383966, + "grad_norm": 0.5375466346740723, + "learning_rate": 0.0014857141991237372, + "loss": 2.1697, + "step": 679 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.6691716313362122, + "learning_rate": 0.0014856653898388927, + "loss": 2.2012, + "step": 680 + }, + { + "epoch": 0.07183544303797468, + "grad_norm": 0.5603217482566833, + "learning_rate": 0.0014856164981190728, + "loss": 2.2014, + "step": 681 + }, + { + "epoch": 0.0719409282700422, + "grad_norm": 0.6600796580314636, + "learning_rate": 0.0014855675239697564, + "loss": 2.2134, + "step": 682 + }, + { + "epoch": 0.0720464135021097, + "grad_norm": 0.9693423509597778, + "learning_rate": 0.0014855184673964311, + "loss": 2.1978, + "step": 683 + }, + { + "epoch": 0.07215189873417721, + "grad_norm": 0.8745356202125549, + "learning_rate": 0.0014854693284045936, + "loss": 2.2165, + "step": 684 + }, + { + "epoch": 0.07225738396624473, + "grad_norm": 0.5489219427108765, + "learning_rate": 0.0014854201069997505, + "loss": 2.1739, + "step": 685 + }, + { + "epoch": 0.07236286919831224, + "grad_norm": 0.8775746822357178, + "learning_rate": 0.0014853708031874176, + "loss": 2.234, + "step": 686 + }, + { + "epoch": 0.07246835443037974, + "grad_norm": 0.8427210450172424, + "learning_rate": 0.001485321416973119, + "loss": 2.1898, + "step": 687 + }, + { + "epoch": 0.07257383966244725, + "grad_norm": 0.7639915943145752, + "learning_rate": 0.0014852719483623893, + "loss": 2.223, + "step": 688 + }, + { + "epoch": 0.07267932489451477, + "grad_norm": 0.640358567237854, + "learning_rate": 0.001485222397360771, + "loss": 2.142, + "step": 689 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.6910299062728882, + "learning_rate": 0.001485172763973817, + "loss": 2.217, + "step": 690 + }, + { + "epoch": 0.07289029535864978, + "grad_norm": 0.6009826064109802, + "learning_rate": 0.0014851230482070892, + "loss": 2.2047, + "step": 691 + }, + { + "epoch": 0.0729957805907173, + "grad_norm": 0.5491281151771545, + "learning_rate": 0.001485073250066158, + "loss": 2.1678, + "step": 692 + }, + { + "epoch": 0.07310126582278481, + "grad_norm": 0.6311616897583008, + "learning_rate": 0.0014850233695566034, + "loss": 2.1964, + "step": 693 + }, + { + "epoch": 0.07320675105485232, + "grad_norm": 0.583867073059082, + "learning_rate": 0.0014849734066840158, + "loss": 2.2264, + "step": 694 + }, + { + "epoch": 0.07331223628691984, + "grad_norm": 0.6281875371932983, + "learning_rate": 0.0014849233614539926, + "loss": 2.1976, + "step": 695 + }, + { + "epoch": 0.07341772151898734, + "grad_norm": 0.7629311084747314, + "learning_rate": 0.001484873233872142, + "loss": 2.1719, + "step": 696 + }, + { + "epoch": 0.07352320675105485, + "grad_norm": 0.5720497965812683, + "learning_rate": 0.0014848230239440812, + "loss": 2.1768, + "step": 697 + }, + { + "epoch": 0.07362869198312236, + "grad_norm": 0.5988049507141113, + "learning_rate": 0.0014847727316754367, + "loss": 2.1562, + "step": 698 + }, + { + "epoch": 0.07373417721518988, + "grad_norm": 0.6457233428955078, + "learning_rate": 0.0014847223570718436, + "loss": 2.1458, + "step": 699 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.5142356753349304, + "learning_rate": 0.0014846719001389466, + "loss": 2.1417, + "step": 700 + }, + { + "epoch": 0.07394514767932489, + "grad_norm": 0.6057519316673279, + "learning_rate": 0.0014846213608823997, + "loss": 2.1318, + "step": 701 + }, + { + "epoch": 0.07405063291139241, + "grad_norm": 0.6165589094161987, + "learning_rate": 0.0014845707393078664, + "loss": 2.1554, + "step": 702 + }, + { + "epoch": 0.07415611814345992, + "grad_norm": 0.6006090641021729, + "learning_rate": 0.0014845200354210186, + "loss": 2.1853, + "step": 703 + }, + { + "epoch": 0.07426160337552742, + "grad_norm": 0.5726714134216309, + "learning_rate": 0.0014844692492275385, + "loss": 2.177, + "step": 704 + }, + { + "epoch": 0.07436708860759493, + "grad_norm": 0.6789150238037109, + "learning_rate": 0.0014844183807331164, + "loss": 2.1598, + "step": 705 + }, + { + "epoch": 0.07447257383966245, + "grad_norm": 0.8284936547279358, + "learning_rate": 0.0014843674299434527, + "loss": 2.1822, + "step": 706 + }, + { + "epoch": 0.07457805907172996, + "grad_norm": 0.8208980560302734, + "learning_rate": 0.0014843163968642566, + "loss": 2.1561, + "step": 707 + }, + { + "epoch": 0.07468354430379746, + "grad_norm": 0.6120538711547852, + "learning_rate": 0.0014842652815012466, + "loss": 2.1812, + "step": 708 + }, + { + "epoch": 0.07478902953586498, + "grad_norm": 0.5558724999427795, + "learning_rate": 0.0014842140838601501, + "loss": 2.17, + "step": 709 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.6415695548057556, + "learning_rate": 0.001484162803946705, + "loss": 2.1654, + "step": 710 + }, + { + "epoch": 0.075, + "grad_norm": 0.5507535934448242, + "learning_rate": 0.0014841114417666564, + "loss": 2.1614, + "step": 711 + }, + { + "epoch": 0.0751054852320675, + "grad_norm": 0.6807726621627808, + "learning_rate": 0.0014840599973257604, + "loss": 2.1809, + "step": 712 + }, + { + "epoch": 0.07521097046413502, + "grad_norm": 0.5205746293067932, + "learning_rate": 0.001484008470629781, + "loss": 2.1627, + "step": 713 + }, + { + "epoch": 0.07531645569620253, + "grad_norm": 0.7059740424156189, + "learning_rate": 0.0014839568616844927, + "loss": 2.1467, + "step": 714 + }, + { + "epoch": 0.07542194092827004, + "grad_norm": 0.5901152491569519, + "learning_rate": 0.0014839051704956781, + "loss": 2.1639, + "step": 715 + }, + { + "epoch": 0.07552742616033756, + "grad_norm": 0.7511160373687744, + "learning_rate": 0.0014838533970691296, + "loss": 2.1461, + "step": 716 + }, + { + "epoch": 0.07563291139240506, + "grad_norm": 0.6780573725700378, + "learning_rate": 0.0014838015414106486, + "loss": 2.1397, + "step": 717 + }, + { + "epoch": 0.07573839662447257, + "grad_norm": 0.6946103572845459, + "learning_rate": 0.0014837496035260457, + "loss": 2.1076, + "step": 718 + }, + { + "epoch": 0.07584388185654009, + "grad_norm": 0.5783368349075317, + "learning_rate": 0.0014836975834211412, + "loss": 2.1482, + "step": 719 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.5413892865180969, + "learning_rate": 0.0014836454811017635, + "loss": 2.136, + "step": 720 + }, + { + "epoch": 0.0760548523206751, + "grad_norm": 0.6651360988616943, + "learning_rate": 0.0014835932965737517, + "loss": 2.1681, + "step": 721 + }, + { + "epoch": 0.07616033755274261, + "grad_norm": 0.6534151434898376, + "learning_rate": 0.0014835410298429529, + "loss": 2.1365, + "step": 722 + }, + { + "epoch": 0.07626582278481013, + "grad_norm": 0.6770239472389221, + "learning_rate": 0.001483488680915224, + "loss": 2.1413, + "step": 723 + }, + { + "epoch": 0.07637130801687764, + "grad_norm": 0.5469748973846436, + "learning_rate": 0.0014834362497964308, + "loss": 2.1151, + "step": 724 + }, + { + "epoch": 0.07647679324894514, + "grad_norm": 0.5278385281562805, + "learning_rate": 0.0014833837364924484, + "loss": 2.1409, + "step": 725 + }, + { + "epoch": 0.07658227848101266, + "grad_norm": 0.5313040614128113, + "learning_rate": 0.0014833311410091617, + "loss": 2.1364, + "step": 726 + }, + { + "epoch": 0.07668776371308017, + "grad_norm": 0.6335974931716919, + "learning_rate": 0.0014832784633524638, + "loss": 2.1353, + "step": 727 + }, + { + "epoch": 0.07679324894514768, + "grad_norm": 0.7189996242523193, + "learning_rate": 0.0014832257035282577, + "loss": 2.1621, + "step": 728 + }, + { + "epoch": 0.07689873417721518, + "grad_norm": 0.759945273399353, + "learning_rate": 0.0014831728615424553, + "loss": 2.1565, + "step": 729 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.5962994694709778, + "learning_rate": 0.0014831199374009778, + "loss": 2.1237, + "step": 730 + }, + { + "epoch": 0.07710970464135021, + "grad_norm": 0.5249840021133423, + "learning_rate": 0.0014830669311097554, + "loss": 2.1361, + "step": 731 + }, + { + "epoch": 0.07721518987341772, + "grad_norm": 0.6553146839141846, + "learning_rate": 0.0014830138426747282, + "loss": 2.1413, + "step": 732 + }, + { + "epoch": 0.07732067510548524, + "grad_norm": 0.6115611791610718, + "learning_rate": 0.0014829606721018448, + "loss": 2.1606, + "step": 733 + }, + { + "epoch": 0.07742616033755274, + "grad_norm": 0.5286392569541931, + "learning_rate": 0.0014829074193970634, + "loss": 2.1794, + "step": 734 + }, + { + "epoch": 0.07753164556962025, + "grad_norm": 0.5658390522003174, + "learning_rate": 0.0014828540845663507, + "loss": 2.1471, + "step": 735 + }, + { + "epoch": 0.07763713080168777, + "grad_norm": 0.6526901721954346, + "learning_rate": 0.0014828006676156837, + "loss": 2.1234, + "step": 736 + }, + { + "epoch": 0.07774261603375528, + "grad_norm": 0.7104495167732239, + "learning_rate": 0.0014827471685510477, + "loss": 2.1671, + "step": 737 + }, + { + "epoch": 0.07784810126582278, + "grad_norm": 0.6573004126548767, + "learning_rate": 0.0014826935873784378, + "loss": 2.125, + "step": 738 + }, + { + "epoch": 0.07795358649789029, + "grad_norm": 0.48405739665031433, + "learning_rate": 0.0014826399241038577, + "loss": 2.1761, + "step": 739 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.7094058394432068, + "learning_rate": 0.0014825861787333208, + "loss": 2.1174, + "step": 740 + }, + { + "epoch": 0.07816455696202532, + "grad_norm": 0.6942089796066284, + "learning_rate": 0.00148253235127285, + "loss": 2.1369, + "step": 741 + }, + { + "epoch": 0.07827004219409282, + "grad_norm": 0.7145075798034668, + "learning_rate": 0.001482478441728476, + "loss": 2.1626, + "step": 742 + }, + { + "epoch": 0.07837552742616034, + "grad_norm": 0.6512417793273926, + "learning_rate": 0.0014824244501062402, + "loss": 2.1269, + "step": 743 + }, + { + "epoch": 0.07848101265822785, + "grad_norm": 0.5327754616737366, + "learning_rate": 0.0014823703764121929, + "loss": 2.1456, + "step": 744 + }, + { + "epoch": 0.07858649789029536, + "grad_norm": 0.6271353960037231, + "learning_rate": 0.0014823162206523926, + "loss": 2.1378, + "step": 745 + }, + { + "epoch": 0.07869198312236286, + "grad_norm": 0.6336262226104736, + "learning_rate": 0.0014822619828329085, + "loss": 2.13, + "step": 746 + }, + { + "epoch": 0.07879746835443038, + "grad_norm": 0.5520193576812744, + "learning_rate": 0.0014822076629598176, + "loss": 2.1134, + "step": 747 + }, + { + "epoch": 0.07890295358649789, + "grad_norm": 0.7875330448150635, + "learning_rate": 0.001482153261039207, + "loss": 2.0911, + "step": 748 + }, + { + "epoch": 0.0790084388185654, + "grad_norm": 1.0966858863830566, + "learning_rate": 0.0014820987770771726, + "loss": 2.1361, + "step": 749 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.9567157030105591, + "learning_rate": 0.0014820442110798197, + "loss": 2.1272, + "step": 750 + }, + { + "epoch": 0.07921940928270042, + "grad_norm": 0.5886783003807068, + "learning_rate": 0.0014819895630532628, + "loss": 2.1252, + "step": 751 + }, + { + "epoch": 0.07932489451476793, + "grad_norm": 0.7804371118545532, + "learning_rate": 0.0014819348330036251, + "loss": 2.1574, + "step": 752 + }, + { + "epoch": 0.07943037974683544, + "grad_norm": 1.086670160293579, + "learning_rate": 0.0014818800209370397, + "loss": 2.1303, + "step": 753 + }, + { + "epoch": 0.07953586497890296, + "grad_norm": 0.8002756237983704, + "learning_rate": 0.0014818251268596486, + "loss": 2.1232, + "step": 754 + }, + { + "epoch": 0.07964135021097046, + "grad_norm": 0.6548103094100952, + "learning_rate": 0.0014817701507776025, + "loss": 2.139, + "step": 755 + }, + { + "epoch": 0.07974683544303797, + "grad_norm": 0.979441225528717, + "learning_rate": 0.0014817150926970625, + "loss": 2.1199, + "step": 756 + }, + { + "epoch": 0.07985232067510549, + "grad_norm": 1.2518502473831177, + "learning_rate": 0.0014816599526241974, + "loss": 2.1089, + "step": 757 + }, + { + "epoch": 0.079957805907173, + "grad_norm": 0.5926666259765625, + "learning_rate": 0.0014816047305651863, + "loss": 2.1596, + "step": 758 + }, + { + "epoch": 0.0800632911392405, + "grad_norm": 1.0844953060150146, + "learning_rate": 0.0014815494265262169, + "loss": 2.1028, + "step": 759 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.7819319367408752, + "learning_rate": 0.0014814940405134865, + "loss": 2.1037, + "step": 760 + }, + { + "epoch": 0.08027426160337553, + "grad_norm": 0.6302589178085327, + "learning_rate": 0.0014814385725332015, + "loss": 2.1486, + "step": 761 + }, + { + "epoch": 0.08037974683544304, + "grad_norm": 0.9515467286109924, + "learning_rate": 0.001481383022591577, + "loss": 2.1176, + "step": 762 + }, + { + "epoch": 0.08048523206751054, + "grad_norm": 0.6501333117485046, + "learning_rate": 0.0014813273906948378, + "loss": 2.0892, + "step": 763 + }, + { + "epoch": 0.08059071729957806, + "grad_norm": 0.5580490231513977, + "learning_rate": 0.0014812716768492177, + "loss": 2.1114, + "step": 764 + }, + { + "epoch": 0.08069620253164557, + "grad_norm": 0.6237852573394775, + "learning_rate": 0.0014812158810609598, + "loss": 2.116, + "step": 765 + }, + { + "epoch": 0.08080168776371308, + "grad_norm": 0.5214159488677979, + "learning_rate": 0.0014811600033363165, + "loss": 2.1161, + "step": 766 + }, + { + "epoch": 0.0809071729957806, + "grad_norm": 0.5592976212501526, + "learning_rate": 0.0014811040436815486, + "loss": 2.0917, + "step": 767 + }, + { + "epoch": 0.0810126582278481, + "grad_norm": 0.5947949290275574, + "learning_rate": 0.001481048002102927, + "loss": 2.1083, + "step": 768 + }, + { + "epoch": 0.08111814345991561, + "grad_norm": 0.5258958339691162, + "learning_rate": 0.0014809918786067315, + "loss": 2.1295, + "step": 769 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.5832020044326782, + "learning_rate": 0.001480935673199251, + "loss": 2.09, + "step": 770 + }, + { + "epoch": 0.08132911392405064, + "grad_norm": 0.6247684955596924, + "learning_rate": 0.0014808793858867837, + "loss": 2.0826, + "step": 771 + }, + { + "epoch": 0.08143459915611814, + "grad_norm": 0.538936197757721, + "learning_rate": 0.0014808230166756366, + "loss": 2.1637, + "step": 772 + }, + { + "epoch": 0.08154008438818565, + "grad_norm": 0.6640123724937439, + "learning_rate": 0.0014807665655721261, + "loss": 2.1025, + "step": 773 + }, + { + "epoch": 0.08164556962025317, + "grad_norm": 0.588487446308136, + "learning_rate": 0.0014807100325825782, + "loss": 2.1076, + "step": 774 + }, + { + "epoch": 0.08175105485232068, + "grad_norm": 0.5090140104293823, + "learning_rate": 0.0014806534177133274, + "loss": 2.0662, + "step": 775 + }, + { + "epoch": 0.08185654008438818, + "grad_norm": 0.5783838629722595, + "learning_rate": 0.0014805967209707178, + "loss": 2.0595, + "step": 776 + }, + { + "epoch": 0.0819620253164557, + "grad_norm": 0.48650825023651123, + "learning_rate": 0.0014805399423611025, + "loss": 2.1048, + "step": 777 + }, + { + "epoch": 0.08206751054852321, + "grad_norm": 0.5998829007148743, + "learning_rate": 0.0014804830818908438, + "loss": 2.1254, + "step": 778 + }, + { + "epoch": 0.08217299578059072, + "grad_norm": 0.8010297417640686, + "learning_rate": 0.0014804261395663133, + "loss": 2.1415, + "step": 779 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.9018331170082092, + "learning_rate": 0.0014803691153938915, + "loss": 2.1299, + "step": 780 + }, + { + "epoch": 0.08238396624472574, + "grad_norm": 0.6159271597862244, + "learning_rate": 0.0014803120093799687, + "loss": 2.0843, + "step": 781 + }, + { + "epoch": 0.08248945147679325, + "grad_norm": 0.6587032079696655, + "learning_rate": 0.0014802548215309434, + "loss": 2.1074, + "step": 782 + }, + { + "epoch": 0.08259493670886076, + "grad_norm": 0.9247770309448242, + "learning_rate": 0.001480197551853224, + "loss": 2.1061, + "step": 783 + }, + { + "epoch": 0.08270042194092828, + "grad_norm": 0.7181835770606995, + "learning_rate": 0.0014801402003532277, + "loss": 2.1106, + "step": 784 + }, + { + "epoch": 0.08280590717299578, + "grad_norm": 0.538642942905426, + "learning_rate": 0.0014800827670373815, + "loss": 2.1199, + "step": 785 + }, + { + "epoch": 0.08291139240506329, + "grad_norm": 0.6832083463668823, + "learning_rate": 0.0014800252519121203, + "loss": 2.0952, + "step": 786 + }, + { + "epoch": 0.0830168776371308, + "grad_norm": 0.5218282341957092, + "learning_rate": 0.0014799676549838898, + "loss": 2.1417, + "step": 787 + }, + { + "epoch": 0.08312236286919832, + "grad_norm": 0.5753419399261475, + "learning_rate": 0.0014799099762591434, + "loss": 2.1143, + "step": 788 + }, + { + "epoch": 0.08322784810126582, + "grad_norm": 0.7013536095619202, + "learning_rate": 0.0014798522157443443, + "loss": 2.1037, + "step": 789 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.6204512715339661, + "learning_rate": 0.0014797943734459653, + "loss": 2.1491, + "step": 790 + }, + { + "epoch": 0.08343881856540085, + "grad_norm": 0.5236341953277588, + "learning_rate": 0.0014797364493704876, + "loss": 2.1207, + "step": 791 + }, + { + "epoch": 0.08354430379746836, + "grad_norm": 0.5543285608291626, + "learning_rate": 0.001479678443524402, + "loss": 2.1032, + "step": 792 + }, + { + "epoch": 0.08364978902953586, + "grad_norm": 0.49682843685150146, + "learning_rate": 0.0014796203559142081, + "loss": 2.0721, + "step": 793 + }, + { + "epoch": 0.08375527426160338, + "grad_norm": 0.5114486217498779, + "learning_rate": 0.0014795621865464155, + "loss": 2.0921, + "step": 794 + }, + { + "epoch": 0.08386075949367089, + "grad_norm": 0.5353013873100281, + "learning_rate": 0.0014795039354275417, + "loss": 2.0581, + "step": 795 + }, + { + "epoch": 0.0839662447257384, + "grad_norm": 0.49995937943458557, + "learning_rate": 0.0014794456025641143, + "loss": 2.0935, + "step": 796 + }, + { + "epoch": 0.0840717299578059, + "grad_norm": 0.685304582118988, + "learning_rate": 0.00147938718796267, + "loss": 2.1177, + "step": 797 + }, + { + "epoch": 0.08417721518987342, + "grad_norm": 0.845687210559845, + "learning_rate": 0.001479328691629754, + "loss": 2.0772, + "step": 798 + }, + { + "epoch": 0.08428270042194093, + "grad_norm": 0.8249184489250183, + "learning_rate": 0.0014792701135719214, + "loss": 2.0741, + "step": 799 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.5277102589607239, + "learning_rate": 0.001479211453795736, + "loss": 2.0398, + "step": 800 + }, + { + "epoch": 0.08449367088607596, + "grad_norm": 0.6818417906761169, + "learning_rate": 0.001479152712307771, + "loss": 2.0706, + "step": 801 + }, + { + "epoch": 0.08459915611814346, + "grad_norm": 0.7343071699142456, + "learning_rate": 0.0014790938891146089, + "loss": 2.0886, + "step": 802 + }, + { + "epoch": 0.08470464135021097, + "grad_norm": 0.6245061755180359, + "learning_rate": 0.001479034984222841, + "loss": 2.0615, + "step": 803 + }, + { + "epoch": 0.08481012658227848, + "grad_norm": 0.6633434891700745, + "learning_rate": 0.0014789759976390675, + "loss": 2.04, + "step": 804 + }, + { + "epoch": 0.084915611814346, + "grad_norm": 0.7766271233558655, + "learning_rate": 0.0014789169293698988, + "loss": 2.0914, + "step": 805 + }, + { + "epoch": 0.0850210970464135, + "grad_norm": 0.6865748763084412, + "learning_rate": 0.0014788577794219533, + "loss": 2.1146, + "step": 806 + }, + { + "epoch": 0.08512658227848101, + "grad_norm": 0.5118980407714844, + "learning_rate": 0.0014787985478018593, + "loss": 2.0702, + "step": 807 + }, + { + "epoch": 0.08523206751054853, + "grad_norm": 0.6359384655952454, + "learning_rate": 0.0014787392345162538, + "loss": 2.1013, + "step": 808 + }, + { + "epoch": 0.08533755274261604, + "grad_norm": 0.6013325452804565, + "learning_rate": 0.0014786798395717833, + "loss": 2.0357, + "step": 809 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.5776836276054382, + "learning_rate": 0.0014786203629751033, + "loss": 2.0642, + "step": 810 + }, + { + "epoch": 0.08554852320675105, + "grad_norm": 0.647932231426239, + "learning_rate": 0.001478560804732878, + "loss": 2.0943, + "step": 811 + }, + { + "epoch": 0.08565400843881857, + "grad_norm": 0.7398493885993958, + "learning_rate": 0.001478501164851782, + "loss": 2.0618, + "step": 812 + }, + { + "epoch": 0.08575949367088608, + "grad_norm": 0.6168048977851868, + "learning_rate": 0.0014784414433384977, + "loss": 2.1172, + "step": 813 + }, + { + "epoch": 0.08586497890295358, + "grad_norm": 0.5723512768745422, + "learning_rate": 0.0014783816401997174, + "loss": 2.0957, + "step": 814 + }, + { + "epoch": 0.0859704641350211, + "grad_norm": 0.5643520355224609, + "learning_rate": 0.0014783217554421423, + "loss": 2.0655, + "step": 815 + }, + { + "epoch": 0.08607594936708861, + "grad_norm": 0.6806191205978394, + "learning_rate": 0.0014782617890724827, + "loss": 2.0727, + "step": 816 + }, + { + "epoch": 0.08618143459915611, + "grad_norm": 0.6039706468582153, + "learning_rate": 0.0014782017410974583, + "loss": 2.0907, + "step": 817 + }, + { + "epoch": 0.08628691983122364, + "grad_norm": 0.5559926629066467, + "learning_rate": 0.0014781416115237976, + "loss": 2.0818, + "step": 818 + }, + { + "epoch": 0.08639240506329114, + "grad_norm": 0.7588875889778137, + "learning_rate": 0.0014780814003582385, + "loss": 2.0621, + "step": 819 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.911372721195221, + "learning_rate": 0.0014780211076075279, + "loss": 2.0678, + "step": 820 + }, + { + "epoch": 0.08660337552742615, + "grad_norm": 0.6933899521827698, + "learning_rate": 0.001477960733278422, + "loss": 2.0818, + "step": 821 + }, + { + "epoch": 0.08670886075949367, + "grad_norm": 0.5346640348434448, + "learning_rate": 0.001477900277377686, + "loss": 2.0867, + "step": 822 + }, + { + "epoch": 0.08681434599156118, + "grad_norm": 0.7706379294395447, + "learning_rate": 0.0014778397399120942, + "loss": 2.0834, + "step": 823 + }, + { + "epoch": 0.08691983122362869, + "grad_norm": 0.9137313961982727, + "learning_rate": 0.0014777791208884304, + "loss": 2.1319, + "step": 824 + }, + { + "epoch": 0.08702531645569621, + "grad_norm": 0.7282189130783081, + "learning_rate": 0.0014777184203134867, + "loss": 2.0693, + "step": 825 + }, + { + "epoch": 0.08713080168776371, + "grad_norm": 0.5201073884963989, + "learning_rate": 0.0014776576381940658, + "loss": 2.0931, + "step": 826 + }, + { + "epoch": 0.08723628691983122, + "grad_norm": 0.7590122818946838, + "learning_rate": 0.0014775967745369778, + "loss": 2.0822, + "step": 827 + }, + { + "epoch": 0.08734177215189873, + "grad_norm": 0.7985169291496277, + "learning_rate": 0.001477535829349043, + "loss": 2.071, + "step": 828 + }, + { + "epoch": 0.08744725738396625, + "grad_norm": 0.5806565880775452, + "learning_rate": 0.0014774748026370908, + "loss": 2.0706, + "step": 829 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.5729843378067017, + "learning_rate": 0.0014774136944079594, + "loss": 2.0727, + "step": 830 + }, + { + "epoch": 0.08765822784810126, + "grad_norm": 0.6843082308769226, + "learning_rate": 0.0014773525046684964, + "loss": 2.0411, + "step": 831 + }, + { + "epoch": 0.08776371308016878, + "grad_norm": 0.7082656621932983, + "learning_rate": 0.0014772912334255585, + "loss": 2.0423, + "step": 832 + }, + { + "epoch": 0.08786919831223629, + "grad_norm": 0.6524061560630798, + "learning_rate": 0.0014772298806860111, + "loss": 2.0526, + "step": 833 + }, + { + "epoch": 0.0879746835443038, + "grad_norm": 0.6275274753570557, + "learning_rate": 0.0014771684464567293, + "loss": 2.028, + "step": 834 + }, + { + "epoch": 0.08808016877637131, + "grad_norm": 0.5804421305656433, + "learning_rate": 0.0014771069307445972, + "loss": 2.077, + "step": 835 + }, + { + "epoch": 0.08818565400843882, + "grad_norm": 0.5277037024497986, + "learning_rate": 0.0014770453335565077, + "loss": 2.0541, + "step": 836 + }, + { + "epoch": 0.08829113924050633, + "grad_norm": 0.5846939086914062, + "learning_rate": 0.0014769836548993631, + "loss": 2.0724, + "step": 837 + }, + { + "epoch": 0.08839662447257383, + "grad_norm": 0.5014340877532959, + "learning_rate": 0.0014769218947800749, + "loss": 2.091, + "step": 838 + }, + { + "epoch": 0.08850210970464135, + "grad_norm": 0.5391857624053955, + "learning_rate": 0.0014768600532055638, + "loss": 2.0491, + "step": 839 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.649378776550293, + "learning_rate": 0.0014767981301827592, + "loss": 2.086, + "step": 840 + }, + { + "epoch": 0.08871308016877637, + "grad_norm": 0.7407304048538208, + "learning_rate": 0.0014767361257186, + "loss": 2.0723, + "step": 841 + }, + { + "epoch": 0.08881856540084389, + "grad_norm": 0.7203238010406494, + "learning_rate": 0.0014766740398200343, + "loss": 2.0214, + "step": 842 + }, + { + "epoch": 0.0889240506329114, + "grad_norm": 0.5246937870979309, + "learning_rate": 0.0014766118724940185, + "loss": 2.0654, + "step": 843 + }, + { + "epoch": 0.0890295358649789, + "grad_norm": 0.5897988080978394, + "learning_rate": 0.0014765496237475195, + "loss": 2.0468, + "step": 844 + }, + { + "epoch": 0.08913502109704641, + "grad_norm": 0.5388777852058411, + "learning_rate": 0.001476487293587512, + "loss": 2.0648, + "step": 845 + }, + { + "epoch": 0.08924050632911393, + "grad_norm": 0.5750778913497925, + "learning_rate": 0.0014764248820209808, + "loss": 2.0838, + "step": 846 + }, + { + "epoch": 0.08934599156118143, + "grad_norm": 0.7749783396720886, + "learning_rate": 0.0014763623890549193, + "loss": 2.0807, + "step": 847 + }, + { + "epoch": 0.08945147679324894, + "grad_norm": 0.6614772081375122, + "learning_rate": 0.00147629981469633, + "loss": 2.0737, + "step": 848 + }, + { + "epoch": 0.08955696202531646, + "grad_norm": 0.5145711302757263, + "learning_rate": 0.001476237158952225, + "loss": 2.0326, + "step": 849 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.5340364575386047, + "learning_rate": 0.0014761744218296249, + "loss": 2.0154, + "step": 850 + }, + { + "epoch": 0.08976793248945147, + "grad_norm": 0.6012371778488159, + "learning_rate": 0.0014761116033355597, + "loss": 2.0576, + "step": 851 + }, + { + "epoch": 0.08987341772151898, + "grad_norm": 0.6070348620414734, + "learning_rate": 0.001476048703477069, + "loss": 2.0781, + "step": 852 + }, + { + "epoch": 0.0899789029535865, + "grad_norm": 0.5716895461082458, + "learning_rate": 0.0014759857222612003, + "loss": 2.0414, + "step": 853 + }, + { + "epoch": 0.09008438818565401, + "grad_norm": 0.5582172870635986, + "learning_rate": 0.0014759226596950115, + "loss": 2.1036, + "step": 854 + }, + { + "epoch": 0.09018987341772151, + "grad_norm": 0.693910539150238, + "learning_rate": 0.0014758595157855687, + "loss": 2.0542, + "step": 855 + }, + { + "epoch": 0.09029535864978903, + "grad_norm": 0.7997310161590576, + "learning_rate": 0.001475796290539948, + "loss": 2.0749, + "step": 856 + }, + { + "epoch": 0.09040084388185654, + "grad_norm": 0.6175161004066467, + "learning_rate": 0.0014757329839652335, + "loss": 2.049, + "step": 857 + }, + { + "epoch": 0.09050632911392405, + "grad_norm": 0.5473247766494751, + "learning_rate": 0.0014756695960685194, + "loss": 2.0645, + "step": 858 + }, + { + "epoch": 0.09061181434599157, + "grad_norm": 0.5428683161735535, + "learning_rate": 0.0014756061268569086, + "loss": 2.0531, + "step": 859 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.5553563833236694, + "learning_rate": 0.001475542576337513, + "loss": 2.0024, + "step": 860 + }, + { + "epoch": 0.09082278481012658, + "grad_norm": 0.5686576962471008, + "learning_rate": 0.001475478944517454, + "loss": 2.0656, + "step": 861 + }, + { + "epoch": 0.09092827004219409, + "grad_norm": 0.5836552977561951, + "learning_rate": 0.0014754152314038617, + "loss": 2.0443, + "step": 862 + }, + { + "epoch": 0.09103375527426161, + "grad_norm": 0.6624174118041992, + "learning_rate": 0.0014753514370038753, + "loss": 2.0469, + "step": 863 + }, + { + "epoch": 0.09113924050632911, + "grad_norm": 0.6814166307449341, + "learning_rate": 0.0014752875613246435, + "loss": 2.0621, + "step": 864 + }, + { + "epoch": 0.09124472573839662, + "grad_norm": 0.6406340003013611, + "learning_rate": 0.001475223604373324, + "loss": 2.0719, + "step": 865 + }, + { + "epoch": 0.09135021097046414, + "grad_norm": 0.5355114340782166, + "learning_rate": 0.0014751595661570832, + "loss": 2.0135, + "step": 866 + }, + { + "epoch": 0.09145569620253165, + "grad_norm": 0.7155851125717163, + "learning_rate": 0.001475095446683097, + "loss": 2.0547, + "step": 867 + }, + { + "epoch": 0.09156118143459915, + "grad_norm": 0.6150608658790588, + "learning_rate": 0.0014750312459585505, + "loss": 2.0947, + "step": 868 + }, + { + "epoch": 0.09166666666666666, + "grad_norm": 0.5734235644340515, + "learning_rate": 0.0014749669639906374, + "loss": 2.0204, + "step": 869 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.6992524266242981, + "learning_rate": 0.001474902600786561, + "loss": 2.072, + "step": 870 + }, + { + "epoch": 0.09187763713080169, + "grad_norm": 0.7278302907943726, + "learning_rate": 0.0014748381563535337, + "loss": 2.0516, + "step": 871 + }, + { + "epoch": 0.0919831223628692, + "grad_norm": 0.5908600091934204, + "learning_rate": 0.0014747736306987764, + "loss": 2.0442, + "step": 872 + }, + { + "epoch": 0.09208860759493671, + "grad_norm": 0.5849860310554504, + "learning_rate": 0.0014747090238295198, + "loss": 2.0369, + "step": 873 + }, + { + "epoch": 0.09219409282700422, + "grad_norm": 0.7421837449073792, + "learning_rate": 0.0014746443357530033, + "loss": 2.0491, + "step": 874 + }, + { + "epoch": 0.09229957805907173, + "grad_norm": 0.6856223940849304, + "learning_rate": 0.0014745795664764757, + "loss": 2.0304, + "step": 875 + }, + { + "epoch": 0.09240506329113925, + "grad_norm": 0.6184816360473633, + "learning_rate": 0.0014745147160071944, + "loss": 2.0501, + "step": 876 + }, + { + "epoch": 0.09251054852320675, + "grad_norm": 0.5676249265670776, + "learning_rate": 0.0014744497843524266, + "loss": 2.0385, + "step": 877 + }, + { + "epoch": 0.09261603375527426, + "grad_norm": 0.636999785900116, + "learning_rate": 0.001474384771519448, + "loss": 2.0227, + "step": 878 + }, + { + "epoch": 0.09272151898734177, + "grad_norm": 0.6894224286079407, + "learning_rate": 0.0014743196775155434, + "loss": 2.077, + "step": 879 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.6502247452735901, + "learning_rate": 0.0014742545023480075, + "loss": 2.0492, + "step": 880 + }, + { + "epoch": 0.0929324894514768, + "grad_norm": 0.6119763255119324, + "learning_rate": 0.001474189246024143, + "loss": 2.0709, + "step": 881 + }, + { + "epoch": 0.0930379746835443, + "grad_norm": 0.5842965245246887, + "learning_rate": 0.0014741239085512624, + "loss": 2.0164, + "step": 882 + }, + { + "epoch": 0.09314345991561182, + "grad_norm": 0.6609763503074646, + "learning_rate": 0.0014740584899366868, + "loss": 1.9744, + "step": 883 + }, + { + "epoch": 0.09324894514767933, + "grad_norm": 0.5729326009750366, + "learning_rate": 0.0014739929901877473, + "loss": 2.0144, + "step": 884 + }, + { + "epoch": 0.09335443037974683, + "grad_norm": 0.6974164843559265, + "learning_rate": 0.001473927409311783, + "loss": 2.0849, + "step": 885 + }, + { + "epoch": 0.09345991561181434, + "grad_norm": 0.8502578735351562, + "learning_rate": 0.0014738617473161425, + "loss": 2.0068, + "step": 886 + }, + { + "epoch": 0.09356540084388186, + "grad_norm": 0.6608766317367554, + "learning_rate": 0.0014737960042081836, + "loss": 2.0229, + "step": 887 + }, + { + "epoch": 0.09367088607594937, + "grad_norm": 0.5250244140625, + "learning_rate": 0.0014737301799952734, + "loss": 2.0519, + "step": 888 + }, + { + "epoch": 0.09377637130801687, + "grad_norm": 0.5848273634910583, + "learning_rate": 0.001473664274684788, + "loss": 2.03, + "step": 889 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.5367715954780579, + "learning_rate": 0.0014735982882841117, + "loss": 2.0457, + "step": 890 + }, + { + "epoch": 0.0939873417721519, + "grad_norm": 0.5529886484146118, + "learning_rate": 0.0014735322208006391, + "loss": 2.0878, + "step": 891 + }, + { + "epoch": 0.0940928270042194, + "grad_norm": 0.5328641533851624, + "learning_rate": 0.0014734660722417734, + "loss": 2.0303, + "step": 892 + }, + { + "epoch": 0.09419831223628691, + "grad_norm": 0.5158683657646179, + "learning_rate": 0.0014733998426149266, + "loss": 2.0123, + "step": 893 + }, + { + "epoch": 0.09430379746835443, + "grad_norm": 0.5676746368408203, + "learning_rate": 0.0014733335319275203, + "loss": 2.0578, + "step": 894 + }, + { + "epoch": 0.09440928270042194, + "grad_norm": 0.5744146108627319, + "learning_rate": 0.001473267140186985, + "loss": 2.0084, + "step": 895 + }, + { + "epoch": 0.09451476793248945, + "grad_norm": 0.5250632166862488, + "learning_rate": 0.00147320066740076, + "loss": 2.025, + "step": 896 + }, + { + "epoch": 0.09462025316455697, + "grad_norm": 0.6283937096595764, + "learning_rate": 0.001473134113576294, + "loss": 2.0188, + "step": 897 + }, + { + "epoch": 0.09472573839662447, + "grad_norm": 0.7158836722373962, + "learning_rate": 0.0014730674787210448, + "loss": 2.0269, + "step": 898 + }, + { + "epoch": 0.09483122362869198, + "grad_norm": 0.7558615803718567, + "learning_rate": 0.0014730007628424792, + "loss": 2.011, + "step": 899 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.783729076385498, + "learning_rate": 0.0014729339659480727, + "loss": 2.0192, + "step": 900 + }, + { + "epoch": 0.095042194092827, + "grad_norm": 0.6230261921882629, + "learning_rate": 0.0014728670880453105, + "loss": 2.0397, + "step": 901 + }, + { + "epoch": 0.09514767932489451, + "grad_norm": 0.5222036838531494, + "learning_rate": 0.0014728001291416863, + "loss": 2.0422, + "step": 902 + }, + { + "epoch": 0.09525316455696202, + "grad_norm": 0.6623588800430298, + "learning_rate": 0.001472733089244704, + "loss": 2.0293, + "step": 903 + }, + { + "epoch": 0.09535864978902954, + "grad_norm": 0.7538248300552368, + "learning_rate": 0.0014726659683618746, + "loss": 2.0329, + "step": 904 + }, + { + "epoch": 0.09546413502109705, + "grad_norm": 0.6428435444831848, + "learning_rate": 0.0014725987665007202, + "loss": 2.0108, + "step": 905 + }, + { + "epoch": 0.09556962025316455, + "grad_norm": 0.6666980981826782, + "learning_rate": 0.0014725314836687708, + "loss": 2.0447, + "step": 906 + }, + { + "epoch": 0.09567510548523207, + "grad_norm": 0.8515194058418274, + "learning_rate": 0.0014724641198735659, + "loss": 2.0615, + "step": 907 + }, + { + "epoch": 0.09578059071729958, + "grad_norm": 0.8899465203285217, + "learning_rate": 0.0014723966751226535, + "loss": 2.0175, + "step": 908 + }, + { + "epoch": 0.09588607594936709, + "grad_norm": 0.6554786562919617, + "learning_rate": 0.0014723291494235916, + "loss": 2.0406, + "step": 909 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.6170030236244202, + "learning_rate": 0.0014722615427839468, + "loss": 2.0865, + "step": 910 + }, + { + "epoch": 0.09609704641350211, + "grad_norm": 0.72925865650177, + "learning_rate": 0.0014721938552112943, + "loss": 2.0376, + "step": 911 + }, + { + "epoch": 0.09620253164556962, + "grad_norm": 0.5666748285293579, + "learning_rate": 0.0014721260867132193, + "loss": 2.0161, + "step": 912 + }, + { + "epoch": 0.09630801687763713, + "grad_norm": 0.7472238540649414, + "learning_rate": 0.0014720582372973155, + "loss": 2.0152, + "step": 913 + }, + { + "epoch": 0.09641350210970465, + "grad_norm": 0.7019383311271667, + "learning_rate": 0.0014719903069711857, + "loss": 2.057, + "step": 914 + }, + { + "epoch": 0.09651898734177215, + "grad_norm": 0.5440042614936829, + "learning_rate": 0.0014719222957424417, + "loss": 2.0586, + "step": 915 + }, + { + "epoch": 0.09662447257383966, + "grad_norm": 0.8194283843040466, + "learning_rate": 0.0014718542036187049, + "loss": 2.0453, + "step": 916 + }, + { + "epoch": 0.09672995780590718, + "grad_norm": 0.9465827345848083, + "learning_rate": 0.0014717860306076049, + "loss": 2.0109, + "step": 917 + }, + { + "epoch": 0.09683544303797469, + "grad_norm": 0.7540725469589233, + "learning_rate": 0.0014717177767167812, + "loss": 2.0155, + "step": 918 + }, + { + "epoch": 0.09694092827004219, + "grad_norm": 0.5355502367019653, + "learning_rate": 0.0014716494419538815, + "loss": 2.0436, + "step": 919 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.8533740639686584, + "learning_rate": 0.0014715810263265633, + "loss": 1.9779, + "step": 920 + }, + { + "epoch": 0.09715189873417722, + "grad_norm": 0.896659791469574, + "learning_rate": 0.0014715125298424934, + "loss": 2.0537, + "step": 921 + }, + { + "epoch": 0.09725738396624473, + "grad_norm": 0.5114359259605408, + "learning_rate": 0.0014714439525093466, + "loss": 2.0113, + "step": 922 + }, + { + "epoch": 0.09736286919831223, + "grad_norm": 0.8504418730735779, + "learning_rate": 0.0014713752943348074, + "loss": 1.9988, + "step": 923 + }, + { + "epoch": 0.09746835443037975, + "grad_norm": 0.8674096465110779, + "learning_rate": 0.0014713065553265694, + "loss": 2.0238, + "step": 924 + }, + { + "epoch": 0.09757383966244726, + "grad_norm": 0.5605230331420898, + "learning_rate": 0.001471237735492335, + "loss": 1.9866, + "step": 925 + }, + { + "epoch": 0.09767932489451477, + "grad_norm": 0.7468410730361938, + "learning_rate": 0.0014711688348398161, + "loss": 2.0212, + "step": 926 + }, + { + "epoch": 0.09778481012658227, + "grad_norm": 0.7494902014732361, + "learning_rate": 0.001471099853376733, + "loss": 2.0192, + "step": 927 + }, + { + "epoch": 0.09789029535864979, + "grad_norm": 0.594651997089386, + "learning_rate": 0.0014710307911108159, + "loss": 1.9739, + "step": 928 + }, + { + "epoch": 0.0979957805907173, + "grad_norm": 0.5157437324523926, + "learning_rate": 0.0014709616480498029, + "loss": 2.019, + "step": 929 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.5321944355964661, + "learning_rate": 0.0014708924242014423, + "loss": 2.0079, + "step": 930 + }, + { + "epoch": 0.09820675105485233, + "grad_norm": 0.6218492984771729, + "learning_rate": 0.001470823119573491, + "loss": 1.9889, + "step": 931 + }, + { + "epoch": 0.09831223628691983, + "grad_norm": 0.5844724774360657, + "learning_rate": 0.0014707537341737149, + "loss": 2.0491, + "step": 932 + }, + { + "epoch": 0.09841772151898734, + "grad_norm": 0.5378125905990601, + "learning_rate": 0.0014706842680098887, + "loss": 2.0237, + "step": 933 + }, + { + "epoch": 0.09852320675105486, + "grad_norm": 0.6763551831245422, + "learning_rate": 0.0014706147210897967, + "loss": 2.0393, + "step": 934 + }, + { + "epoch": 0.09862869198312237, + "grad_norm": 0.8498495221138, + "learning_rate": 0.0014705450934212317, + "loss": 2.0283, + "step": 935 + }, + { + "epoch": 0.09873417721518987, + "grad_norm": 0.7187536358833313, + "learning_rate": 0.0014704753850119962, + "loss": 1.9988, + "step": 936 + }, + { + "epoch": 0.09883966244725738, + "grad_norm": 0.49806347489356995, + "learning_rate": 0.001470405595869901, + "loss": 2.0248, + "step": 937 + }, + { + "epoch": 0.0989451476793249, + "grad_norm": 0.6707183122634888, + "learning_rate": 0.0014703357260027667, + "loss": 2.0075, + "step": 938 + }, + { + "epoch": 0.0990506329113924, + "grad_norm": 0.6339157223701477, + "learning_rate": 0.0014702657754184225, + "loss": 2.0155, + "step": 939 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.5237643718719482, + "learning_rate": 0.0014701957441247064, + "loss": 2.0035, + "step": 940 + }, + { + "epoch": 0.09926160337552743, + "grad_norm": 0.6133110523223877, + "learning_rate": 0.001470125632129466, + "loss": 1.9793, + "step": 941 + }, + { + "epoch": 0.09936708860759494, + "grad_norm": 0.6389789581298828, + "learning_rate": 0.0014700554394405576, + "loss": 2.0292, + "step": 942 + }, + { + "epoch": 0.09947257383966245, + "grad_norm": 0.5679811239242554, + "learning_rate": 0.0014699851660658469, + "loss": 1.9792, + "step": 943 + }, + { + "epoch": 0.09957805907172995, + "grad_norm": 0.4867742955684662, + "learning_rate": 0.0014699148120132079, + "loss": 1.9691, + "step": 944 + }, + { + "epoch": 0.09968354430379747, + "grad_norm": 0.6102750897407532, + "learning_rate": 0.0014698443772905247, + "loss": 1.9925, + "step": 945 + }, + { + "epoch": 0.09978902953586498, + "grad_norm": 0.6676855683326721, + "learning_rate": 0.0014697738619056891, + "loss": 1.9991, + "step": 946 + }, + { + "epoch": 0.09989451476793249, + "grad_norm": 0.5078664422035217, + "learning_rate": 0.0014697032658666036, + "loss": 1.9615, + "step": 947 + }, + { + "epoch": 0.1, + "grad_norm": 0.666331946849823, + "learning_rate": 0.001469632589181178, + "loss": 2.0245, + "step": 948 + }, + { + "epoch": 0.10010548523206751, + "grad_norm": 0.8374691009521484, + "learning_rate": 0.0014695618318573327, + "loss": 1.9946, + "step": 949 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.6675145030021667, + "learning_rate": 0.0014694909939029959, + "loss": 2.0168, + "step": 950 + }, + { + "epoch": 0.10031645569620253, + "grad_norm": 0.5545938014984131, + "learning_rate": 0.0014694200753261057, + "loss": 2.0258, + "step": 951 + }, + { + "epoch": 0.10042194092827005, + "grad_norm": 0.8792823553085327, + "learning_rate": 0.0014693490761346086, + "loss": 2.0142, + "step": 952 + }, + { + "epoch": 0.10052742616033755, + "grad_norm": 0.7162710428237915, + "learning_rate": 0.0014692779963364606, + "loss": 2.0284, + "step": 953 + }, + { + "epoch": 0.10063291139240506, + "grad_norm": 0.5224078297615051, + "learning_rate": 0.0014692068359396264, + "loss": 2.0124, + "step": 954 + }, + { + "epoch": 0.10073839662447258, + "grad_norm": 0.681459367275238, + "learning_rate": 0.00146913559495208, + "loss": 2.0093, + "step": 955 + }, + { + "epoch": 0.10084388185654009, + "grad_norm": 0.6361257433891296, + "learning_rate": 0.001469064273381804, + "loss": 1.9894, + "step": 956 + }, + { + "epoch": 0.10094936708860759, + "grad_norm": 0.5258025527000427, + "learning_rate": 0.0014689928712367907, + "loss": 1.9795, + "step": 957 + }, + { + "epoch": 0.10105485232067511, + "grad_norm": 0.6367164850234985, + "learning_rate": 0.0014689213885250411, + "loss": 2.0201, + "step": 958 + }, + { + "epoch": 0.10116033755274262, + "grad_norm": 0.6506195068359375, + "learning_rate": 0.001468849825254565, + "loss": 1.9642, + "step": 959 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.5486823916435242, + "learning_rate": 0.0014687781814333814, + "loss": 1.99, + "step": 960 + }, + { + "epoch": 0.10137130801687763, + "grad_norm": 0.6406944394111633, + "learning_rate": 0.0014687064570695185, + "loss": 2.0076, + "step": 961 + }, + { + "epoch": 0.10147679324894515, + "grad_norm": 0.7079448103904724, + "learning_rate": 0.0014686346521710133, + "loss": 1.9785, + "step": 962 + }, + { + "epoch": 0.10158227848101266, + "grad_norm": 0.5505774021148682, + "learning_rate": 0.0014685627667459118, + "loss": 2.0333, + "step": 963 + }, + { + "epoch": 0.10168776371308016, + "grad_norm": 0.6445375084877014, + "learning_rate": 0.0014684908008022694, + "loss": 1.9879, + "step": 964 + }, + { + "epoch": 0.10179324894514769, + "grad_norm": 0.618324875831604, + "learning_rate": 0.00146841875434815, + "loss": 2.0455, + "step": 965 + }, + { + "epoch": 0.10189873417721519, + "grad_norm": 0.6527636647224426, + "learning_rate": 0.0014683466273916266, + "loss": 1.9696, + "step": 966 + }, + { + "epoch": 0.1020042194092827, + "grad_norm": 0.6637148261070251, + "learning_rate": 0.0014682744199407817, + "loss": 2.0008, + "step": 967 + }, + { + "epoch": 0.1021097046413502, + "grad_norm": 0.6427879333496094, + "learning_rate": 0.0014682021320037064, + "loss": 2.0368, + "step": 968 + }, + { + "epoch": 0.10221518987341772, + "grad_norm": 0.8249238729476929, + "learning_rate": 0.0014681297635885011, + "loss": 1.9826, + "step": 969 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.9460309743881226, + "learning_rate": 0.0014680573147032746, + "loss": 2.0094, + "step": 970 + }, + { + "epoch": 0.10242616033755274, + "grad_norm": 0.7181861400604248, + "learning_rate": 0.0014679847853561457, + "loss": 1.9788, + "step": 971 + }, + { + "epoch": 0.10253164556962026, + "grad_norm": 0.5918893814086914, + "learning_rate": 0.0014679121755552412, + "loss": 1.9742, + "step": 972 + }, + { + "epoch": 0.10263713080168776, + "grad_norm": 0.7698010802268982, + "learning_rate": 0.0014678394853086976, + "loss": 1.9839, + "step": 973 + }, + { + "epoch": 0.10274261603375527, + "grad_norm": 0.6221970319747925, + "learning_rate": 0.0014677667146246604, + "loss": 1.9972, + "step": 974 + }, + { + "epoch": 0.10284810126582279, + "grad_norm": 0.5318232178688049, + "learning_rate": 0.0014676938635112835, + "loss": 1.9781, + "step": 975 + }, + { + "epoch": 0.1029535864978903, + "grad_norm": 0.6808528900146484, + "learning_rate": 0.0014676209319767306, + "loss": 1.9774, + "step": 976 + }, + { + "epoch": 0.1030590717299578, + "grad_norm": 0.5577378869056702, + "learning_rate": 0.0014675479200291738, + "loss": 1.9923, + "step": 977 + }, + { + "epoch": 0.10316455696202531, + "grad_norm": 0.5371438264846802, + "learning_rate": 0.0014674748276767944, + "loss": 1.9741, + "step": 978 + }, + { + "epoch": 0.10327004219409283, + "grad_norm": 0.492551326751709, + "learning_rate": 0.0014674016549277831, + "loss": 1.9694, + "step": 979 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.5689995884895325, + "learning_rate": 0.0014673284017903392, + "loss": 2.011, + "step": 980 + }, + { + "epoch": 0.10348101265822784, + "grad_norm": 0.4942963123321533, + "learning_rate": 0.001467255068272671, + "loss": 1.9906, + "step": 981 + }, + { + "epoch": 0.10358649789029536, + "grad_norm": 0.542231023311615, + "learning_rate": 0.0014671816543829954, + "loss": 2.0029, + "step": 982 + }, + { + "epoch": 0.10369198312236287, + "grad_norm": 0.5173419713973999, + "learning_rate": 0.0014671081601295394, + "loss": 1.963, + "step": 983 + }, + { + "epoch": 0.10379746835443038, + "grad_norm": 0.5693127512931824, + "learning_rate": 0.0014670345855205384, + "loss": 1.9636, + "step": 984 + }, + { + "epoch": 0.10390295358649788, + "grad_norm": 0.7573028206825256, + "learning_rate": 0.0014669609305642366, + "loss": 1.9825, + "step": 985 + }, + { + "epoch": 0.1040084388185654, + "grad_norm": 0.6342893838882446, + "learning_rate": 0.0014668871952688873, + "loss": 1.9739, + "step": 986 + }, + { + "epoch": 0.10411392405063291, + "grad_norm": 0.559754490852356, + "learning_rate": 0.0014668133796427532, + "loss": 2.0095, + "step": 987 + }, + { + "epoch": 0.10421940928270042, + "grad_norm": 0.747427761554718, + "learning_rate": 0.0014667394836941055, + "loss": 1.9716, + "step": 988 + }, + { + "epoch": 0.10432489451476794, + "grad_norm": 0.6542848348617554, + "learning_rate": 0.0014666655074312247, + "loss": 1.9558, + "step": 989 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.555674135684967, + "learning_rate": 0.0014665914508624, + "loss": 1.9664, + "step": 990 + }, + { + "epoch": 0.10453586497890295, + "grad_norm": 0.6296031475067139, + "learning_rate": 0.0014665173139959305, + "loss": 2.0421, + "step": 991 + }, + { + "epoch": 0.10464135021097046, + "grad_norm": 0.6478427648544312, + "learning_rate": 0.0014664430968401225, + "loss": 1.9843, + "step": 992 + }, + { + "epoch": 0.10474683544303798, + "grad_norm": 0.6326435208320618, + "learning_rate": 0.0014663687994032931, + "loss": 1.9681, + "step": 993 + }, + { + "epoch": 0.10485232067510548, + "grad_norm": 0.5318074226379395, + "learning_rate": 0.0014662944216937677, + "loss": 2.0042, + "step": 994 + }, + { + "epoch": 0.10495780590717299, + "grad_norm": 0.6270446181297302, + "learning_rate": 0.0014662199637198807, + "loss": 1.9596, + "step": 995 + }, + { + "epoch": 0.10506329113924051, + "grad_norm": 0.646917462348938, + "learning_rate": 0.0014661454254899754, + "loss": 1.9896, + "step": 996 + }, + { + "epoch": 0.10516877637130802, + "grad_norm": 0.728165864944458, + "learning_rate": 0.0014660708070124038, + "loss": 1.9618, + "step": 997 + }, + { + "epoch": 0.10527426160337552, + "grad_norm": 0.606626570224762, + "learning_rate": 0.0014659961082955277, + "loss": 2.0041, + "step": 998 + }, + { + "epoch": 0.10537974683544304, + "grad_norm": 0.6981056332588196, + "learning_rate": 0.0014659213293477177, + "loss": 2.009, + "step": 999 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.8972599506378174, + "learning_rate": 0.0014658464701773526, + "loss": 2.0129, + "step": 1000 + }, + { + "epoch": 0.10559071729957806, + "grad_norm": 0.5392003059387207, + "learning_rate": 0.0014657715307928212, + "loss": 1.9592, + "step": 1001 + }, + { + "epoch": 0.10569620253164556, + "grad_norm": 0.8192545771598816, + "learning_rate": 0.0014656965112025203, + "loss": 1.9715, + "step": 1002 + }, + { + "epoch": 0.10580168776371308, + "grad_norm": 0.9200693368911743, + "learning_rate": 0.0014656214114148567, + "loss": 2.0225, + "step": 1003 + }, + { + "epoch": 0.10590717299578059, + "grad_norm": 0.5064769387245178, + "learning_rate": 0.0014655462314382456, + "loss": 1.9428, + "step": 1004 + }, + { + "epoch": 0.1060126582278481, + "grad_norm": 0.8570011854171753, + "learning_rate": 0.0014654709712811113, + "loss": 1.9731, + "step": 1005 + }, + { + "epoch": 0.10611814345991562, + "grad_norm": 0.9289914965629578, + "learning_rate": 0.0014653956309518866, + "loss": 1.9788, + "step": 1006 + }, + { + "epoch": 0.10622362869198312, + "grad_norm": 0.5847848653793335, + "learning_rate": 0.0014653202104590146, + "loss": 2.0074, + "step": 1007 + }, + { + "epoch": 0.10632911392405063, + "grad_norm": 0.6212193965911865, + "learning_rate": 0.0014652447098109458, + "loss": 1.9733, + "step": 1008 + }, + { + "epoch": 0.10643459915611814, + "grad_norm": 0.6586349010467529, + "learning_rate": 0.001465169129016141, + "loss": 1.9874, + "step": 1009 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.6525812149047852, + "learning_rate": 0.0014650934680830688, + "loss": 1.9856, + "step": 1010 + }, + { + "epoch": 0.10664556962025316, + "grad_norm": 0.5302839875221252, + "learning_rate": 0.001465017727020208, + "loss": 1.9408, + "step": 1011 + }, + { + "epoch": 0.10675105485232067, + "grad_norm": 0.6191859841346741, + "learning_rate": 0.0014649419058360455, + "loss": 1.9392, + "step": 1012 + }, + { + "epoch": 0.10685654008438819, + "grad_norm": 0.588769257068634, + "learning_rate": 0.0014648660045390772, + "loss": 1.9266, + "step": 1013 + }, + { + "epoch": 0.1069620253164557, + "grad_norm": 0.5823872685432434, + "learning_rate": 0.0014647900231378086, + "loss": 1.9783, + "step": 1014 + }, + { + "epoch": 0.1070675105485232, + "grad_norm": 0.5843485593795776, + "learning_rate": 0.0014647139616407539, + "loss": 1.9528, + "step": 1015 + }, + { + "epoch": 0.10717299578059072, + "grad_norm": 0.5024211406707764, + "learning_rate": 0.0014646378200564355, + "loss": 1.9234, + "step": 1016 + }, + { + "epoch": 0.10727848101265823, + "grad_norm": 0.621662974357605, + "learning_rate": 0.001464561598393386, + "loss": 1.9908, + "step": 1017 + }, + { + "epoch": 0.10738396624472574, + "grad_norm": 0.7474318146705627, + "learning_rate": 0.0014644852966601463, + "loss": 2.0059, + "step": 1018 + }, + { + "epoch": 0.10748945147679324, + "grad_norm": 0.7061962485313416, + "learning_rate": 0.0014644089148652664, + "loss": 2.0391, + "step": 1019 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.6023826599121094, + "learning_rate": 0.0014643324530173051, + "loss": 1.9512, + "step": 1020 + }, + { + "epoch": 0.10770042194092827, + "grad_norm": 0.6278279423713684, + "learning_rate": 0.0014642559111248306, + "loss": 1.9674, + "step": 1021 + }, + { + "epoch": 0.10780590717299578, + "grad_norm": 0.7449028491973877, + "learning_rate": 0.0014641792891964195, + "loss": 1.9777, + "step": 1022 + }, + { + "epoch": 0.1079113924050633, + "grad_norm": 0.7385549545288086, + "learning_rate": 0.0014641025872406581, + "loss": 1.9508, + "step": 1023 + }, + { + "epoch": 0.1080168776371308, + "grad_norm": 0.6633833050727844, + "learning_rate": 0.0014640258052661405, + "loss": 1.9807, + "step": 1024 + }, + { + "epoch": 0.10812236286919831, + "grad_norm": 0.5351836681365967, + "learning_rate": 0.0014639489432814712, + "loss": 2.0064, + "step": 1025 + }, + { + "epoch": 0.10822784810126582, + "grad_norm": 0.6886828541755676, + "learning_rate": 0.001463872001295263, + "loss": 1.9418, + "step": 1026 + }, + { + "epoch": 0.10833333333333334, + "grad_norm": 0.5847939848899841, + "learning_rate": 0.0014637949793161371, + "loss": 1.9963, + "step": 1027 + }, + { + "epoch": 0.10843881856540084, + "grad_norm": 0.5763885378837585, + "learning_rate": 0.0014637178773527246, + "loss": 1.9465, + "step": 1028 + }, + { + "epoch": 0.10854430379746835, + "grad_norm": 0.5924845933914185, + "learning_rate": 0.001463640695413665, + "loss": 1.9859, + "step": 1029 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.5316183567047119, + "learning_rate": 0.0014635634335076067, + "loss": 1.9799, + "step": 1030 + }, + { + "epoch": 0.10875527426160338, + "grad_norm": 0.613385796546936, + "learning_rate": 0.0014634860916432077, + "loss": 1.9529, + "step": 1031 + }, + { + "epoch": 0.10886075949367088, + "grad_norm": 0.6486957669258118, + "learning_rate": 0.0014634086698291345, + "loss": 1.9307, + "step": 1032 + }, + { + "epoch": 0.10896624472573839, + "grad_norm": 0.5300064086914062, + "learning_rate": 0.0014633311680740625, + "loss": 1.9631, + "step": 1033 + }, + { + "epoch": 0.10907172995780591, + "grad_norm": 0.6007261276245117, + "learning_rate": 0.0014632535863866756, + "loss": 2.0051, + "step": 1034 + }, + { + "epoch": 0.10917721518987342, + "grad_norm": 0.547659695148468, + "learning_rate": 0.0014631759247756683, + "loss": 1.9882, + "step": 1035 + }, + { + "epoch": 0.10928270042194092, + "grad_norm": 0.549119234085083, + "learning_rate": 0.0014630981832497421, + "loss": 1.9417, + "step": 1036 + }, + { + "epoch": 0.10938818565400844, + "grad_norm": 0.48248225450515747, + "learning_rate": 0.0014630203618176088, + "loss": 1.9524, + "step": 1037 + }, + { + "epoch": 0.10949367088607595, + "grad_norm": 0.5422973036766052, + "learning_rate": 0.0014629424604879885, + "loss": 1.9889, + "step": 1038 + }, + { + "epoch": 0.10959915611814346, + "grad_norm": 0.5252543091773987, + "learning_rate": 0.0014628644792696105, + "loss": 1.9501, + "step": 1039 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.5650680065155029, + "learning_rate": 0.001462786418171213, + "loss": 1.9672, + "step": 1040 + }, + { + "epoch": 0.10981012658227848, + "grad_norm": 0.5559163689613342, + "learning_rate": 0.0014627082772015428, + "loss": 1.9735, + "step": 1041 + }, + { + "epoch": 0.10991561181434599, + "grad_norm": 0.6912906765937805, + "learning_rate": 0.0014626300563693566, + "loss": 1.9675, + "step": 1042 + }, + { + "epoch": 0.1100210970464135, + "grad_norm": 0.7011213302612305, + "learning_rate": 0.0014625517556834187, + "loss": 1.9575, + "step": 1043 + }, + { + "epoch": 0.11012658227848102, + "grad_norm": 0.707778811454773, + "learning_rate": 0.0014624733751525036, + "loss": 1.9496, + "step": 1044 + }, + { + "epoch": 0.11023206751054852, + "grad_norm": 0.6291083693504333, + "learning_rate": 0.001462394914785394, + "loss": 1.9789, + "step": 1045 + }, + { + "epoch": 0.11033755274261603, + "grad_norm": 0.5145670175552368, + "learning_rate": 0.0014623163745908821, + "loss": 1.9874, + "step": 1046 + }, + { + "epoch": 0.11044303797468355, + "grad_norm": 0.609140932559967, + "learning_rate": 0.0014622377545777687, + "loss": 1.9655, + "step": 1047 + }, + { + "epoch": 0.11054852320675106, + "grad_norm": 0.5685890316963196, + "learning_rate": 0.001462159054754863, + "loss": 1.9639, + "step": 1048 + }, + { + "epoch": 0.11065400843881856, + "grad_norm": 0.5743528604507446, + "learning_rate": 0.0014620802751309841, + "loss": 1.9567, + "step": 1049 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.566927433013916, + "learning_rate": 0.0014620014157149597, + "loss": 1.9826, + "step": 1050 + }, + { + "epoch": 0.11086497890295359, + "grad_norm": 0.6089000701904297, + "learning_rate": 0.0014619224765156263, + "loss": 1.9522, + "step": 1051 + }, + { + "epoch": 0.1109704641350211, + "grad_norm": 0.707232654094696, + "learning_rate": 0.0014618434575418293, + "loss": 1.9767, + "step": 1052 + }, + { + "epoch": 0.1110759493670886, + "grad_norm": 0.7164165377616882, + "learning_rate": 0.0014617643588024237, + "loss": 1.9847, + "step": 1053 + }, + { + "epoch": 0.11118143459915612, + "grad_norm": 0.5810354351997375, + "learning_rate": 0.001461685180306272, + "loss": 1.969, + "step": 1054 + }, + { + "epoch": 0.11128691983122363, + "grad_norm": 0.5116831660270691, + "learning_rate": 0.0014616059220622475, + "loss": 1.9463, + "step": 1055 + }, + { + "epoch": 0.11139240506329114, + "grad_norm": 0.5273725986480713, + "learning_rate": 0.0014615265840792308, + "loss": 1.9274, + "step": 1056 + }, + { + "epoch": 0.11149789029535866, + "grad_norm": 0.538004994392395, + "learning_rate": 0.0014614471663661123, + "loss": 1.9675, + "step": 1057 + }, + { + "epoch": 0.11160337552742616, + "grad_norm": 0.49568068981170654, + "learning_rate": 0.0014613676689317916, + "loss": 1.9598, + "step": 1058 + }, + { + "epoch": 0.11170886075949367, + "grad_norm": 0.519355833530426, + "learning_rate": 0.001461288091785176, + "loss": 1.9211, + "step": 1059 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.5563498139381409, + "learning_rate": 0.001461208434935183, + "loss": 1.9527, + "step": 1060 + }, + { + "epoch": 0.1119198312236287, + "grad_norm": 0.6477454304695129, + "learning_rate": 0.0014611286983907384, + "loss": 1.9772, + "step": 1061 + }, + { + "epoch": 0.1120253164556962, + "grad_norm": 0.6618948578834534, + "learning_rate": 0.0014610488821607775, + "loss": 1.9091, + "step": 1062 + }, + { + "epoch": 0.11213080168776371, + "grad_norm": 0.5943455100059509, + "learning_rate": 0.0014609689862542434, + "loss": 1.9663, + "step": 1063 + }, + { + "epoch": 0.11223628691983123, + "grad_norm": 0.5907568335533142, + "learning_rate": 0.0014608890106800893, + "loss": 1.9198, + "step": 1064 + }, + { + "epoch": 0.11234177215189874, + "grad_norm": 0.8423769474029541, + "learning_rate": 0.0014608089554472767, + "loss": 1.9504, + "step": 1065 + }, + { + "epoch": 0.11244725738396624, + "grad_norm": 1.0076454877853394, + "learning_rate": 0.0014607288205647762, + "loss": 1.9324, + "step": 1066 + }, + { + "epoch": 0.11255274261603375, + "grad_norm": 0.7817239761352539, + "learning_rate": 0.0014606486060415673, + "loss": 1.9661, + "step": 1067 + }, + { + "epoch": 0.11265822784810127, + "grad_norm": 0.5399801135063171, + "learning_rate": 0.0014605683118866387, + "loss": 1.9165, + "step": 1068 + }, + { + "epoch": 0.11276371308016878, + "grad_norm": 0.6795113682746887, + "learning_rate": 0.0014604879381089873, + "loss": 1.9537, + "step": 1069 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.5460273027420044, + "learning_rate": 0.0014604074847176197, + "loss": 1.9619, + "step": 1070 + }, + { + "epoch": 0.1129746835443038, + "grad_norm": 0.5661694407463074, + "learning_rate": 0.0014603269517215512, + "loss": 1.9112, + "step": 1071 + }, + { + "epoch": 0.11308016877637131, + "grad_norm": 0.568588376045227, + "learning_rate": 0.0014602463391298055, + "loss": 1.9884, + "step": 1072 + }, + { + "epoch": 0.11318565400843882, + "grad_norm": 0.5135106444358826, + "learning_rate": 0.0014601656469514159, + "loss": 1.9205, + "step": 1073 + }, + { + "epoch": 0.11329113924050632, + "grad_norm": 0.5591195821762085, + "learning_rate": 0.0014600848751954248, + "loss": 1.9106, + "step": 1074 + }, + { + "epoch": 0.11339662447257384, + "grad_norm": 0.5857884287834167, + "learning_rate": 0.001460004023870882, + "loss": 1.9542, + "step": 1075 + }, + { + "epoch": 0.11350210970464135, + "grad_norm": 0.5263600945472717, + "learning_rate": 0.0014599230929868482, + "loss": 1.9819, + "step": 1076 + }, + { + "epoch": 0.11360759493670886, + "grad_norm": 0.6091200709342957, + "learning_rate": 0.0014598420825523918, + "loss": 1.9632, + "step": 1077 + }, + { + "epoch": 0.11371308016877638, + "grad_norm": 0.6581708192825317, + "learning_rate": 0.0014597609925765906, + "loss": 1.9193, + "step": 1078 + }, + { + "epoch": 0.11381856540084388, + "grad_norm": 0.7269029021263123, + "learning_rate": 0.0014596798230685308, + "loss": 1.9617, + "step": 1079 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.5892932415008545, + "learning_rate": 0.0014595985740373082, + "loss": 1.9366, + "step": 1080 + }, + { + "epoch": 0.11402953586497891, + "grad_norm": 0.5831671953201294, + "learning_rate": 0.001459517245492027, + "loss": 1.9381, + "step": 1081 + }, + { + "epoch": 0.11413502109704642, + "grad_norm": 0.7869381308555603, + "learning_rate": 0.0014594358374418004, + "loss": 1.9671, + "step": 1082 + }, + { + "epoch": 0.11424050632911392, + "grad_norm": 0.7487177848815918, + "learning_rate": 0.0014593543498957506, + "loss": 1.9332, + "step": 1083 + }, + { + "epoch": 0.11434599156118143, + "grad_norm": 0.581021249294281, + "learning_rate": 0.0014592727828630088, + "loss": 1.9534, + "step": 1084 + }, + { + "epoch": 0.11445147679324895, + "grad_norm": 0.6287938356399536, + "learning_rate": 0.001459191136352715, + "loss": 1.9175, + "step": 1085 + }, + { + "epoch": 0.11455696202531646, + "grad_norm": 0.7101197838783264, + "learning_rate": 0.0014591094103740179, + "loss": 1.913, + "step": 1086 + }, + { + "epoch": 0.11466244725738396, + "grad_norm": 0.5685555934906006, + "learning_rate": 0.0014590276049360755, + "loss": 1.9975, + "step": 1087 + }, + { + "epoch": 0.11476793248945148, + "grad_norm": 0.6218580603599548, + "learning_rate": 0.0014589457200480543, + "loss": 1.9852, + "step": 1088 + }, + { + "epoch": 0.11487341772151899, + "grad_norm": 0.9988272190093994, + "learning_rate": 0.0014588637557191302, + "loss": 1.9898, + "step": 1089 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 1.071096420288086, + "learning_rate": 0.0014587817119584873, + "loss": 1.9499, + "step": 1090 + }, + { + "epoch": 0.115084388185654, + "grad_norm": 0.5565263628959656, + "learning_rate": 0.0014586995887753197, + "loss": 1.911, + "step": 1091 + }, + { + "epoch": 0.11518987341772152, + "grad_norm": 1.0334595441818237, + "learning_rate": 0.001458617386178829, + "loss": 1.9328, + "step": 1092 + }, + { + "epoch": 0.11529535864978903, + "grad_norm": 1.0199013948440552, + "learning_rate": 0.001458535104178227, + "loss": 1.9332, + "step": 1093 + }, + { + "epoch": 0.11540084388185654, + "grad_norm": 0.6223122477531433, + "learning_rate": 0.001458452742782733, + "loss": 1.9623, + "step": 1094 + }, + { + "epoch": 0.11550632911392406, + "grad_norm": 0.9254889488220215, + "learning_rate": 0.0014583703020015768, + "loss": 1.9617, + "step": 1095 + }, + { + "epoch": 0.11561181434599156, + "grad_norm": 0.7697954773902893, + "learning_rate": 0.001458287781843996, + "loss": 1.973, + "step": 1096 + }, + { + "epoch": 0.11571729957805907, + "grad_norm": 0.6123648881912231, + "learning_rate": 0.0014582051823192374, + "loss": 1.9328, + "step": 1097 + }, + { + "epoch": 0.11582278481012659, + "grad_norm": 0.7514441013336182, + "learning_rate": 0.0014581225034365564, + "loss": 1.9693, + "step": 1098 + }, + { + "epoch": 0.1159282700421941, + "grad_norm": 0.6078644394874573, + "learning_rate": 0.0014580397452052182, + "loss": 1.9388, + "step": 1099 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.6683803796768188, + "learning_rate": 0.001457956907634496, + "loss": 1.9378, + "step": 1100 + }, + { + "epoch": 0.11613924050632911, + "grad_norm": 0.6074516177177429, + "learning_rate": 0.001457873990733672, + "loss": 1.9327, + "step": 1101 + }, + { + "epoch": 0.11624472573839663, + "grad_norm": 0.6700440049171448, + "learning_rate": 0.0014577909945120376, + "loss": 1.9282, + "step": 1102 + }, + { + "epoch": 0.11635021097046414, + "grad_norm": 0.5450042486190796, + "learning_rate": 0.001457707918978893, + "loss": 1.954, + "step": 1103 + }, + { + "epoch": 0.11645569620253164, + "grad_norm": 0.539493203163147, + "learning_rate": 0.0014576247641435469, + "loss": 1.9537, + "step": 1104 + }, + { + "epoch": 0.11656118143459916, + "grad_norm": 0.576741099357605, + "learning_rate": 0.0014575415300153174, + "loss": 1.9668, + "step": 1105 + }, + { + "epoch": 0.11666666666666667, + "grad_norm": 0.4945216476917267, + "learning_rate": 0.0014574582166035314, + "loss": 1.9428, + "step": 1106 + }, + { + "epoch": 0.11677215189873418, + "grad_norm": 0.589461624622345, + "learning_rate": 0.0014573748239175247, + "loss": 1.9346, + "step": 1107 + }, + { + "epoch": 0.11687763713080168, + "grad_norm": 0.5854842662811279, + "learning_rate": 0.0014572913519666417, + "loss": 1.9375, + "step": 1108 + }, + { + "epoch": 0.1169831223628692, + "grad_norm": 0.5724058151245117, + "learning_rate": 0.0014572078007602355, + "loss": 1.958, + "step": 1109 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.721595823764801, + "learning_rate": 0.0014571241703076692, + "loss": 1.9258, + "step": 1110 + }, + { + "epoch": 0.11719409282700421, + "grad_norm": 0.6009465456008911, + "learning_rate": 0.0014570404606183132, + "loss": 1.9678, + "step": 1111 + }, + { + "epoch": 0.11729957805907174, + "grad_norm": 0.6243194937705994, + "learning_rate": 0.0014569566717015483, + "loss": 1.9092, + "step": 1112 + }, + { + "epoch": 0.11740506329113924, + "grad_norm": 0.8400901556015015, + "learning_rate": 0.0014568728035667627, + "loss": 1.9707, + "step": 1113 + }, + { + "epoch": 0.11751054852320675, + "grad_norm": 0.5883736610412598, + "learning_rate": 0.001456788856223355, + "loss": 1.919, + "step": 1114 + }, + { + "epoch": 0.11761603375527427, + "grad_norm": 0.5710265636444092, + "learning_rate": 0.0014567048296807315, + "loss": 1.9648, + "step": 1115 + }, + { + "epoch": 0.11772151898734177, + "grad_norm": 0.5995280742645264, + "learning_rate": 0.0014566207239483078, + "loss": 1.9321, + "step": 1116 + }, + { + "epoch": 0.11782700421940928, + "grad_norm": 0.5837056636810303, + "learning_rate": 0.0014565365390355087, + "loss": 1.9268, + "step": 1117 + }, + { + "epoch": 0.11793248945147679, + "grad_norm": 0.5641255974769592, + "learning_rate": 0.001456452274951767, + "loss": 1.9575, + "step": 1118 + }, + { + "epoch": 0.11803797468354431, + "grad_norm": 0.6566804647445679, + "learning_rate": 0.0014563679317065254, + "loss": 1.9208, + "step": 1119 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.5737595558166504, + "learning_rate": 0.0014562835093092348, + "loss": 1.8957, + "step": 1120 + }, + { + "epoch": 0.11824894514767932, + "grad_norm": 0.5159260630607605, + "learning_rate": 0.0014561990077693553, + "loss": 1.8991, + "step": 1121 + }, + { + "epoch": 0.11835443037974684, + "grad_norm": 0.6947562098503113, + "learning_rate": 0.0014561144270963551, + "loss": 1.9451, + "step": 1122 + }, + { + "epoch": 0.11845991561181435, + "grad_norm": 0.716520369052887, + "learning_rate": 0.0014560297672997127, + "loss": 1.9337, + "step": 1123 + }, + { + "epoch": 0.11856540084388185, + "grad_norm": 0.54122394323349, + "learning_rate": 0.001455945028388914, + "loss": 1.9063, + "step": 1124 + }, + { + "epoch": 0.11867088607594936, + "grad_norm": 0.5828913450241089, + "learning_rate": 0.001455860210373455, + "loss": 1.9294, + "step": 1125 + }, + { + "epoch": 0.11877637130801688, + "grad_norm": 0.5869624018669128, + "learning_rate": 0.0014557753132628396, + "loss": 1.9047, + "step": 1126 + }, + { + "epoch": 0.11888185654008439, + "grad_norm": 0.6268728375434875, + "learning_rate": 0.0014556903370665807, + "loss": 1.9294, + "step": 1127 + }, + { + "epoch": 0.1189873417721519, + "grad_norm": 0.7501130104064941, + "learning_rate": 0.0014556052817942013, + "loss": 1.9299, + "step": 1128 + }, + { + "epoch": 0.11909282700421941, + "grad_norm": 0.5395290851593018, + "learning_rate": 0.001455520147455231, + "loss": 1.9331, + "step": 1129 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.7832436561584473, + "learning_rate": 0.0014554349340592104, + "loss": 1.9238, + "step": 1130 + }, + { + "epoch": 0.11930379746835443, + "grad_norm": 0.8251081705093384, + "learning_rate": 0.001455349641615688, + "loss": 1.9848, + "step": 1131 + }, + { + "epoch": 0.11940928270042193, + "grad_norm": 0.6972483396530151, + "learning_rate": 0.001455264270134221, + "loss": 1.9032, + "step": 1132 + }, + { + "epoch": 0.11951476793248945, + "grad_norm": 0.6397451162338257, + "learning_rate": 0.0014551788196243754, + "loss": 1.9375, + "step": 1133 + }, + { + "epoch": 0.11962025316455696, + "grad_norm": 0.6163764595985413, + "learning_rate": 0.0014550932900957271, + "loss": 1.9476, + "step": 1134 + }, + { + "epoch": 0.11972573839662447, + "grad_norm": 0.5056138038635254, + "learning_rate": 0.0014550076815578595, + "loss": 1.9346, + "step": 1135 + }, + { + "epoch": 0.11983122362869199, + "grad_norm": 0.7232457995414734, + "learning_rate": 0.0014549219940203659, + "loss": 1.8972, + "step": 1136 + }, + { + "epoch": 0.1199367088607595, + "grad_norm": 0.6998229026794434, + "learning_rate": 0.0014548362274928476, + "loss": 1.957, + "step": 1137 + }, + { + "epoch": 0.120042194092827, + "grad_norm": 0.5385776162147522, + "learning_rate": 0.0014547503819849154, + "loss": 1.9371, + "step": 1138 + }, + { + "epoch": 0.12014767932489452, + "grad_norm": 0.6051105856895447, + "learning_rate": 0.001454664457506189, + "loss": 1.9279, + "step": 1139 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.6514634490013123, + "learning_rate": 0.001454578454066296, + "loss": 1.9501, + "step": 1140 + }, + { + "epoch": 0.12035864978902953, + "grad_norm": 0.5548772811889648, + "learning_rate": 0.001454492371674874, + "loss": 1.8706, + "step": 1141 + }, + { + "epoch": 0.12046413502109704, + "grad_norm": 0.5386126041412354, + "learning_rate": 0.0014544062103415687, + "loss": 1.9282, + "step": 1142 + }, + { + "epoch": 0.12056962025316456, + "grad_norm": 0.5547209978103638, + "learning_rate": 0.0014543199700760353, + "loss": 1.9746, + "step": 1143 + }, + { + "epoch": 0.12067510548523207, + "grad_norm": 0.5400877594947815, + "learning_rate": 0.0014542336508879372, + "loss": 1.8929, + "step": 1144 + }, + { + "epoch": 0.12078059071729957, + "grad_norm": 0.6110559701919556, + "learning_rate": 0.0014541472527869468, + "loss": 1.9254, + "step": 1145 + }, + { + "epoch": 0.1208860759493671, + "grad_norm": 0.6501067876815796, + "learning_rate": 0.0014540607757827456, + "loss": 1.9195, + "step": 1146 + }, + { + "epoch": 0.1209915611814346, + "grad_norm": 0.7306692600250244, + "learning_rate": 0.0014539742198850234, + "loss": 1.9067, + "step": 1147 + }, + { + "epoch": 0.12109704641350211, + "grad_norm": 0.5760822296142578, + "learning_rate": 0.0014538875851034798, + "loss": 1.9625, + "step": 1148 + }, + { + "epoch": 0.12120253164556961, + "grad_norm": 0.7171794772148132, + "learning_rate": 0.0014538008714478224, + "loss": 1.9329, + "step": 1149 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.8684197068214417, + "learning_rate": 0.0014537140789277678, + "loss": 1.9125, + "step": 1150 + }, + { + "epoch": 0.12141350210970464, + "grad_norm": 0.7578715682029724, + "learning_rate": 0.0014536272075530417, + "loss": 1.9258, + "step": 1151 + }, + { + "epoch": 0.12151898734177215, + "grad_norm": 0.5794346928596497, + "learning_rate": 0.0014535402573333783, + "loss": 1.9704, + "step": 1152 + }, + { + "epoch": 0.12162447257383967, + "grad_norm": 0.7814840078353882, + "learning_rate": 0.001453453228278521, + "loss": 1.9204, + "step": 1153 + }, + { + "epoch": 0.12172995780590717, + "grad_norm": 0.7330074906349182, + "learning_rate": 0.0014533661203982215, + "loss": 1.9306, + "step": 1154 + }, + { + "epoch": 0.12183544303797468, + "grad_norm": 0.5922874808311462, + "learning_rate": 0.0014532789337022413, + "loss": 1.9234, + "step": 1155 + }, + { + "epoch": 0.1219409282700422, + "grad_norm": 0.7352747321128845, + "learning_rate": 0.0014531916682003494, + "loss": 1.9408, + "step": 1156 + }, + { + "epoch": 0.12204641350210971, + "grad_norm": 0.6917290091514587, + "learning_rate": 0.0014531043239023247, + "loss": 1.9156, + "step": 1157 + }, + { + "epoch": 0.12215189873417721, + "grad_norm": 0.5476695895195007, + "learning_rate": 0.0014530169008179546, + "loss": 1.9228, + "step": 1158 + }, + { + "epoch": 0.12225738396624472, + "grad_norm": 0.6410239338874817, + "learning_rate": 0.001452929398957035, + "loss": 1.9402, + "step": 1159 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.6245636343955994, + "learning_rate": 0.0014528418183293716, + "loss": 1.9104, + "step": 1160 + }, + { + "epoch": 0.12246835443037975, + "grad_norm": 0.5474761724472046, + "learning_rate": 0.0014527541589447774, + "loss": 1.9405, + "step": 1161 + }, + { + "epoch": 0.12257383966244725, + "grad_norm": 0.650377094745636, + "learning_rate": 0.0014526664208130756, + "loss": 1.9474, + "step": 1162 + }, + { + "epoch": 0.12267932489451477, + "grad_norm": 0.7064793705940247, + "learning_rate": 0.0014525786039440971, + "loss": 1.9075, + "step": 1163 + }, + { + "epoch": 0.12278481012658228, + "grad_norm": 0.500774621963501, + "learning_rate": 0.001452490708347683, + "loss": 1.9145, + "step": 1164 + }, + { + "epoch": 0.12289029535864979, + "grad_norm": 0.5664652585983276, + "learning_rate": 0.0014524027340336821, + "loss": 1.9102, + "step": 1165 + }, + { + "epoch": 0.1229957805907173, + "grad_norm": 0.5898625254631042, + "learning_rate": 0.0014523146810119525, + "loss": 1.8975, + "step": 1166 + }, + { + "epoch": 0.12310126582278481, + "grad_norm": 0.5621245503425598, + "learning_rate": 0.0014522265492923608, + "loss": 1.9548, + "step": 1167 + }, + { + "epoch": 0.12320675105485232, + "grad_norm": 0.5372191071510315, + "learning_rate": 0.0014521383388847824, + "loss": 1.8803, + "step": 1168 + }, + { + "epoch": 0.12331223628691983, + "grad_norm": 0.5332903861999512, + "learning_rate": 0.0014520500497991022, + "loss": 1.9223, + "step": 1169 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.500157356262207, + "learning_rate": 0.001451961682045213, + "loss": 1.8837, + "step": 1170 + }, + { + "epoch": 0.12352320675105485, + "grad_norm": 0.5012036561965942, + "learning_rate": 0.001451873235633017, + "loss": 1.9378, + "step": 1171 + }, + { + "epoch": 0.12362869198312236, + "grad_norm": 0.5669428110122681, + "learning_rate": 0.0014517847105724251, + "loss": 1.9101, + "step": 1172 + }, + { + "epoch": 0.12373417721518987, + "grad_norm": 0.5085879564285278, + "learning_rate": 0.0014516961068733569, + "loss": 1.8585, + "step": 1173 + }, + { + "epoch": 0.12383966244725739, + "grad_norm": 0.5619962215423584, + "learning_rate": 0.0014516074245457412, + "loss": 1.9234, + "step": 1174 + }, + { + "epoch": 0.1239451476793249, + "grad_norm": 0.7760449051856995, + "learning_rate": 0.001451518663599515, + "loss": 1.8936, + "step": 1175 + }, + { + "epoch": 0.1240506329113924, + "grad_norm": 0.9056915044784546, + "learning_rate": 0.0014514298240446244, + "loss": 1.8527, + "step": 1176 + }, + { + "epoch": 0.12415611814345992, + "grad_norm": 0.6913965940475464, + "learning_rate": 0.0014513409058910243, + "loss": 1.8824, + "step": 1177 + }, + { + "epoch": 0.12426160337552743, + "grad_norm": 0.6152539253234863, + "learning_rate": 0.0014512519091486786, + "loss": 1.9266, + "step": 1178 + }, + { + "epoch": 0.12436708860759493, + "grad_norm": 0.878398597240448, + "learning_rate": 0.0014511628338275597, + "loss": 1.9391, + "step": 1179 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.7022596597671509, + "learning_rate": 0.001451073679937649, + "loss": 1.9156, + "step": 1180 + }, + { + "epoch": 0.12457805907172996, + "grad_norm": 0.5672844648361206, + "learning_rate": 0.0014509844474889365, + "loss": 1.8867, + "step": 1181 + }, + { + "epoch": 0.12468354430379747, + "grad_norm": 0.7827438712120056, + "learning_rate": 0.0014508951364914213, + "loss": 1.9029, + "step": 1182 + }, + { + "epoch": 0.12478902953586497, + "grad_norm": 0.7863681316375732, + "learning_rate": 0.001450805746955111, + "loss": 1.916, + "step": 1183 + }, + { + "epoch": 0.1248945147679325, + "grad_norm": 0.5651347041130066, + "learning_rate": 0.001450716278890022, + "loss": 1.9151, + "step": 1184 + }, + { + "epoch": 0.125, + "grad_norm": 0.5981082916259766, + "learning_rate": 0.0014506267323061803, + "loss": 1.9161, + "step": 1185 + }, + { + "epoch": 0.12510548523206752, + "grad_norm": 0.6068738102912903, + "learning_rate": 0.0014505371072136195, + "loss": 1.9152, + "step": 1186 + }, + { + "epoch": 0.125210970464135, + "grad_norm": 0.5018198490142822, + "learning_rate": 0.0014504474036223826, + "loss": 1.928, + "step": 1187 + }, + { + "epoch": 0.12531645569620253, + "grad_norm": 0.5296882390975952, + "learning_rate": 0.0014503576215425212, + "loss": 1.9249, + "step": 1188 + }, + { + "epoch": 0.12542194092827005, + "grad_norm": 0.5443305969238281, + "learning_rate": 0.0014502677609840964, + "loss": 1.8828, + "step": 1189 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.5573316216468811, + "learning_rate": 0.0014501778219571766, + "loss": 1.8755, + "step": 1190 + }, + { + "epoch": 0.12563291139240507, + "grad_norm": 0.566230297088623, + "learning_rate": 0.0014500878044718408, + "loss": 1.9008, + "step": 1191 + }, + { + "epoch": 0.1257383966244726, + "grad_norm": 0.6610024571418762, + "learning_rate": 0.0014499977085381756, + "loss": 1.8921, + "step": 1192 + }, + { + "epoch": 0.12584388185654008, + "grad_norm": 0.7654202580451965, + "learning_rate": 0.0014499075341662764, + "loss": 1.943, + "step": 1193 + }, + { + "epoch": 0.1259493670886076, + "grad_norm": 0.624343752861023, + "learning_rate": 0.0014498172813662482, + "loss": 1.967, + "step": 1194 + }, + { + "epoch": 0.1260548523206751, + "grad_norm": 0.6816976070404053, + "learning_rate": 0.0014497269501482037, + "loss": 1.9073, + "step": 1195 + }, + { + "epoch": 0.1261603375527426, + "grad_norm": 0.9940201044082642, + "learning_rate": 0.0014496365405222656, + "loss": 1.8974, + "step": 1196 + }, + { + "epoch": 0.12626582278481013, + "grad_norm": 0.8332315683364868, + "learning_rate": 0.0014495460524985644, + "loss": 1.9038, + "step": 1197 + }, + { + "epoch": 0.12637130801687763, + "grad_norm": 0.5590701103210449, + "learning_rate": 0.0014494554860872398, + "loss": 1.9325, + "step": 1198 + }, + { + "epoch": 0.12647679324894515, + "grad_norm": 0.9218399524688721, + "learning_rate": 0.00144936484129844, + "loss": 1.9462, + "step": 1199 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.7244175672531128, + "learning_rate": 0.0014492741181423225, + "loss": 1.9034, + "step": 1200 + }, + { + "epoch": 0.12668776371308016, + "grad_norm": 0.5379170775413513, + "learning_rate": 0.001449183316629053, + "loss": 1.9057, + "step": 1201 + }, + { + "epoch": 0.12679324894514768, + "grad_norm": 0.784265398979187, + "learning_rate": 0.0014490924367688066, + "loss": 1.9084, + "step": 1202 + }, + { + "epoch": 0.1268987341772152, + "grad_norm": 0.85931396484375, + "learning_rate": 0.0014490014785717667, + "loss": 1.8858, + "step": 1203 + }, + { + "epoch": 0.1270042194092827, + "grad_norm": 0.5454302430152893, + "learning_rate": 0.0014489104420481254, + "loss": 1.9002, + "step": 1204 + }, + { + "epoch": 0.1271097046413502, + "grad_norm": 0.6219759583473206, + "learning_rate": 0.001448819327208084, + "loss": 1.9422, + "step": 1205 + }, + { + "epoch": 0.12721518987341773, + "grad_norm": 0.6554241180419922, + "learning_rate": 0.0014487281340618526, + "loss": 1.9069, + "step": 1206 + }, + { + "epoch": 0.12732067510548523, + "grad_norm": 0.5697451233863831, + "learning_rate": 0.0014486368626196494, + "loss": 1.897, + "step": 1207 + }, + { + "epoch": 0.12742616033755275, + "grad_norm": 0.5648565888404846, + "learning_rate": 0.001448545512891702, + "loss": 1.9053, + "step": 1208 + }, + { + "epoch": 0.12753164556962027, + "grad_norm": 0.6570296287536621, + "learning_rate": 0.0014484540848882469, + "loss": 1.9186, + "step": 1209 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.529322624206543, + "learning_rate": 0.0014483625786195285, + "loss": 1.9343, + "step": 1210 + }, + { + "epoch": 0.12774261603375528, + "grad_norm": 0.5628924369812012, + "learning_rate": 0.0014482709940958009, + "loss": 1.8809, + "step": 1211 + }, + { + "epoch": 0.12784810126582277, + "grad_norm": 0.514370322227478, + "learning_rate": 0.0014481793313273266, + "loss": 1.8702, + "step": 1212 + }, + { + "epoch": 0.1279535864978903, + "grad_norm": 0.6256044507026672, + "learning_rate": 0.0014480875903243766, + "loss": 1.9, + "step": 1213 + }, + { + "epoch": 0.1280590717299578, + "grad_norm": 0.6307132244110107, + "learning_rate": 0.0014479957710972313, + "loss": 1.8925, + "step": 1214 + }, + { + "epoch": 0.1281645569620253, + "grad_norm": 0.6584324836730957, + "learning_rate": 0.0014479038736561793, + "loss": 1.8805, + "step": 1215 + }, + { + "epoch": 0.12827004219409283, + "grad_norm": 0.5534483194351196, + "learning_rate": 0.001447811898011518, + "loss": 1.9133, + "step": 1216 + }, + { + "epoch": 0.12837552742616035, + "grad_norm": 0.5461531281471252, + "learning_rate": 0.0014477198441735543, + "loss": 1.9016, + "step": 1217 + }, + { + "epoch": 0.12848101265822784, + "grad_norm": 0.5972310304641724, + "learning_rate": 0.0014476277121526027, + "loss": 1.9461, + "step": 1218 + }, + { + "epoch": 0.12858649789029536, + "grad_norm": 0.5439587235450745, + "learning_rate": 0.0014475355019589872, + "loss": 1.8737, + "step": 1219 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.5253113508224487, + "learning_rate": 0.0014474432136030405, + "loss": 1.8693, + "step": 1220 + }, + { + "epoch": 0.12879746835443037, + "grad_norm": 0.5318486094474792, + "learning_rate": 0.001447350847095104, + "loss": 1.9138, + "step": 1221 + }, + { + "epoch": 0.1289029535864979, + "grad_norm": 0.5369961857795715, + "learning_rate": 0.001447258402445528, + "loss": 1.909, + "step": 1222 + }, + { + "epoch": 0.1290084388185654, + "grad_norm": 0.5270316004753113, + "learning_rate": 0.0014471658796646708, + "loss": 1.8621, + "step": 1223 + }, + { + "epoch": 0.1291139240506329, + "grad_norm": 0.5411044955253601, + "learning_rate": 0.0014470732787629005, + "loss": 1.9291, + "step": 1224 + }, + { + "epoch": 0.12921940928270043, + "grad_norm": 0.5321746468544006, + "learning_rate": 0.0014469805997505932, + "loss": 1.8489, + "step": 1225 + }, + { + "epoch": 0.12932489451476795, + "grad_norm": 0.5850658416748047, + "learning_rate": 0.0014468878426381346, + "loss": 1.8705, + "step": 1226 + }, + { + "epoch": 0.12943037974683544, + "grad_norm": 0.5754272937774658, + "learning_rate": 0.001446795007435918, + "loss": 1.875, + "step": 1227 + }, + { + "epoch": 0.12953586497890296, + "grad_norm": 0.5933607220649719, + "learning_rate": 0.0014467020941543464, + "loss": 1.8753, + "step": 1228 + }, + { + "epoch": 0.12964135021097045, + "grad_norm": 0.6389915347099304, + "learning_rate": 0.0014466091028038314, + "loss": 1.9151, + "step": 1229 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.5567117929458618, + "learning_rate": 0.0014465160333947923, + "loss": 1.8488, + "step": 1230 + }, + { + "epoch": 0.1298523206751055, + "grad_norm": 0.5394185781478882, + "learning_rate": 0.0014464228859376587, + "loss": 1.8987, + "step": 1231 + }, + { + "epoch": 0.12995780590717299, + "grad_norm": 0.5630916357040405, + "learning_rate": 0.001446329660442868, + "loss": 1.9365, + "step": 1232 + }, + { + "epoch": 0.1300632911392405, + "grad_norm": 0.5445508360862732, + "learning_rate": 0.0014462363569208666, + "loss": 1.8718, + "step": 1233 + }, + { + "epoch": 0.13016877637130803, + "grad_norm": 0.5830762386322021, + "learning_rate": 0.00144614297538211, + "loss": 1.9222, + "step": 1234 + }, + { + "epoch": 0.13027426160337552, + "grad_norm": 0.6150953769683838, + "learning_rate": 0.0014460495158370615, + "loss": 1.9172, + "step": 1235 + }, + { + "epoch": 0.13037974683544304, + "grad_norm": 0.587834358215332, + "learning_rate": 0.0014459559782961937, + "loss": 1.8866, + "step": 1236 + }, + { + "epoch": 0.13048523206751056, + "grad_norm": 0.5183272957801819, + "learning_rate": 0.0014458623627699883, + "loss": 1.8888, + "step": 1237 + }, + { + "epoch": 0.13059071729957805, + "grad_norm": 0.5524412393569946, + "learning_rate": 0.0014457686692689355, + "loss": 1.8941, + "step": 1238 + }, + { + "epoch": 0.13069620253164557, + "grad_norm": 0.5610467195510864, + "learning_rate": 0.0014456748978035339, + "loss": 1.9158, + "step": 1239 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.5203685760498047, + "learning_rate": 0.0014455810483842908, + "loss": 1.875, + "step": 1240 + }, + { + "epoch": 0.13090717299578059, + "grad_norm": 0.6016744375228882, + "learning_rate": 0.0014454871210217229, + "loss": 1.8619, + "step": 1241 + }, + { + "epoch": 0.1310126582278481, + "grad_norm": 0.6330846548080444, + "learning_rate": 0.0014453931157263548, + "loss": 1.9044, + "step": 1242 + }, + { + "epoch": 0.1311181434599156, + "grad_norm": 0.5985261797904968, + "learning_rate": 0.001445299032508721, + "loss": 1.9043, + "step": 1243 + }, + { + "epoch": 0.13122362869198312, + "grad_norm": 0.5227945446968079, + "learning_rate": 0.0014452048713793633, + "loss": 1.8893, + "step": 1244 + }, + { + "epoch": 0.13132911392405064, + "grad_norm": 0.573605477809906, + "learning_rate": 0.0014451106323488331, + "loss": 1.9242, + "step": 1245 + }, + { + "epoch": 0.13143459915611813, + "grad_norm": 0.5513131022453308, + "learning_rate": 0.0014450163154276906, + "loss": 1.9288, + "step": 1246 + }, + { + "epoch": 0.13154008438818565, + "grad_norm": 0.5620348453521729, + "learning_rate": 0.0014449219206265041, + "loss": 1.8946, + "step": 1247 + }, + { + "epoch": 0.13164556962025317, + "grad_norm": 0.6543868780136108, + "learning_rate": 0.0014448274479558513, + "loss": 1.9006, + "step": 1248 + }, + { + "epoch": 0.13175105485232066, + "grad_norm": 0.6634549498558044, + "learning_rate": 0.0014447328974263182, + "loss": 1.902, + "step": 1249 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.5839421153068542, + "learning_rate": 0.0014446382690484997, + "loss": 1.8951, + "step": 1250 + }, + { + "epoch": 0.1319620253164557, + "grad_norm": 0.6201381683349609, + "learning_rate": 0.0014445435628329993, + "loss": 1.9257, + "step": 1251 + }, + { + "epoch": 0.1320675105485232, + "grad_norm": 0.5589203238487244, + "learning_rate": 0.0014444487787904294, + "loss": 1.8809, + "step": 1252 + }, + { + "epoch": 0.13217299578059072, + "grad_norm": 0.598726212978363, + "learning_rate": 0.001444353916931411, + "loss": 1.8791, + "step": 1253 + }, + { + "epoch": 0.13227848101265824, + "grad_norm": 0.5754658579826355, + "learning_rate": 0.001444258977266574, + "loss": 1.86, + "step": 1254 + }, + { + "epoch": 0.13238396624472573, + "grad_norm": 0.5332456827163696, + "learning_rate": 0.0014441639598065565, + "loss": 1.8634, + "step": 1255 + }, + { + "epoch": 0.13248945147679325, + "grad_norm": 0.6224853992462158, + "learning_rate": 0.001444068864562006, + "loss": 1.8797, + "step": 1256 + }, + { + "epoch": 0.13259493670886077, + "grad_norm": 0.6123552322387695, + "learning_rate": 0.0014439736915435786, + "loss": 1.9017, + "step": 1257 + }, + { + "epoch": 0.13270042194092826, + "grad_norm": 0.5402783751487732, + "learning_rate": 0.001443878440761938, + "loss": 1.8811, + "step": 1258 + }, + { + "epoch": 0.13280590717299579, + "grad_norm": 0.6392689943313599, + "learning_rate": 0.0014437831122277585, + "loss": 1.8971, + "step": 1259 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.6795900464057922, + "learning_rate": 0.0014436877059517215, + "loss": 1.8825, + "step": 1260 + }, + { + "epoch": 0.1330168776371308, + "grad_norm": 0.5078997015953064, + "learning_rate": 0.0014435922219445182, + "loss": 1.8998, + "step": 1261 + }, + { + "epoch": 0.13312236286919832, + "grad_norm": 0.5818304419517517, + "learning_rate": 0.0014434966602168478, + "loss": 1.8635, + "step": 1262 + }, + { + "epoch": 0.1332278481012658, + "grad_norm": 0.5954601764678955, + "learning_rate": 0.0014434010207794185, + "loss": 1.8716, + "step": 1263 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 0.5560868382453918, + "learning_rate": 0.0014433053036429474, + "loss": 1.8566, + "step": 1264 + }, + { + "epoch": 0.13343881856540085, + "grad_norm": 0.5392318964004517, + "learning_rate": 0.00144320950881816, + "loss": 1.8924, + "step": 1265 + }, + { + "epoch": 0.13354430379746834, + "grad_norm": 0.5349752902984619, + "learning_rate": 0.0014431136363157902, + "loss": 1.8843, + "step": 1266 + }, + { + "epoch": 0.13364978902953586, + "grad_norm": 0.5494065880775452, + "learning_rate": 0.0014430176861465812, + "loss": 1.9069, + "step": 1267 + }, + { + "epoch": 0.13375527426160339, + "grad_norm": 0.5855647325515747, + "learning_rate": 0.001442921658321285, + "loss": 1.8585, + "step": 1268 + }, + { + "epoch": 0.13386075949367088, + "grad_norm": 0.7028374671936035, + "learning_rate": 0.0014428255528506617, + "loss": 1.9152, + "step": 1269 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.6102259755134583, + "learning_rate": 0.0014427293697454803, + "loss": 1.8877, + "step": 1270 + }, + { + "epoch": 0.13407172995780592, + "grad_norm": 0.6178525686264038, + "learning_rate": 0.001442633109016519, + "loss": 1.9097, + "step": 1271 + }, + { + "epoch": 0.1341772151898734, + "grad_norm": 0.6779242753982544, + "learning_rate": 0.001442536770674564, + "loss": 1.9443, + "step": 1272 + }, + { + "epoch": 0.13428270042194093, + "grad_norm": 0.5987041592597961, + "learning_rate": 0.0014424403547304103, + "loss": 1.8733, + "step": 1273 + }, + { + "epoch": 0.13438818565400845, + "grad_norm": 0.5913470983505249, + "learning_rate": 0.0014423438611948624, + "loss": 1.8928, + "step": 1274 + }, + { + "epoch": 0.13449367088607594, + "grad_norm": 0.5732803344726562, + "learning_rate": 0.0014422472900787323, + "loss": 1.9012, + "step": 1275 + }, + { + "epoch": 0.13459915611814346, + "grad_norm": 0.567559003829956, + "learning_rate": 0.0014421506413928415, + "loss": 1.8716, + "step": 1276 + }, + { + "epoch": 0.13470464135021096, + "grad_norm": 0.5654492974281311, + "learning_rate": 0.0014420539151480199, + "loss": 1.9168, + "step": 1277 + }, + { + "epoch": 0.13481012658227848, + "grad_norm": 0.5749799609184265, + "learning_rate": 0.0014419571113551063, + "loss": 1.8799, + "step": 1278 + }, + { + "epoch": 0.134915611814346, + "grad_norm": 0.6000438332557678, + "learning_rate": 0.0014418602300249482, + "loss": 1.9127, + "step": 1279 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.614173412322998, + "learning_rate": 0.001441763271168401, + "loss": 1.8495, + "step": 1280 + }, + { + "epoch": 0.135126582278481, + "grad_norm": 0.5910013914108276, + "learning_rate": 0.00144166623479633, + "loss": 1.887, + "step": 1281 + }, + { + "epoch": 0.13523206751054853, + "grad_norm": 0.5754728317260742, + "learning_rate": 0.0014415691209196085, + "loss": 1.8434, + "step": 1282 + }, + { + "epoch": 0.13533755274261602, + "grad_norm": 0.6381208896636963, + "learning_rate": 0.0014414719295491184, + "loss": 1.8938, + "step": 1283 + }, + { + "epoch": 0.13544303797468354, + "grad_norm": 0.5870094895362854, + "learning_rate": 0.0014413746606957505, + "loss": 1.8597, + "step": 1284 + }, + { + "epoch": 0.13554852320675106, + "grad_norm": 0.6303638219833374, + "learning_rate": 0.0014412773143704046, + "loss": 1.8579, + "step": 1285 + }, + { + "epoch": 0.13565400843881856, + "grad_norm": 0.6418425440788269, + "learning_rate": 0.0014411798905839884, + "loss": 1.8892, + "step": 1286 + }, + { + "epoch": 0.13575949367088608, + "grad_norm": 0.5491969585418701, + "learning_rate": 0.0014410823893474193, + "loss": 1.8815, + "step": 1287 + }, + { + "epoch": 0.1358649789029536, + "grad_norm": 0.7070967555046082, + "learning_rate": 0.001440984810671622, + "loss": 1.8927, + "step": 1288 + }, + { + "epoch": 0.1359704641350211, + "grad_norm": 0.8546894192695618, + "learning_rate": 0.0014408871545675314, + "loss": 1.9407, + "step": 1289 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.7525215744972229, + "learning_rate": 0.00144078942104609, + "loss": 1.9064, + "step": 1290 + }, + { + "epoch": 0.13618143459915613, + "grad_norm": 0.5813080072402954, + "learning_rate": 0.0014406916101182491, + "loss": 1.8997, + "step": 1291 + }, + { + "epoch": 0.13628691983122362, + "grad_norm": 0.5194587111473083, + "learning_rate": 0.0014405937217949695, + "loss": 1.8633, + "step": 1292 + }, + { + "epoch": 0.13639240506329114, + "grad_norm": 0.6486669778823853, + "learning_rate": 0.0014404957560872197, + "loss": 1.8772, + "step": 1293 + }, + { + "epoch": 0.13649789029535864, + "grad_norm": 0.6776139736175537, + "learning_rate": 0.0014403977130059773, + "loss": 1.9104, + "step": 1294 + }, + { + "epoch": 0.13660337552742616, + "grad_norm": 0.6066609621047974, + "learning_rate": 0.0014402995925622284, + "loss": 1.8179, + "step": 1295 + }, + { + "epoch": 0.13670886075949368, + "grad_norm": 0.5261843800544739, + "learning_rate": 0.0014402013947669681, + "loss": 1.8763, + "step": 1296 + }, + { + "epoch": 0.13681434599156117, + "grad_norm": 0.7677927613258362, + "learning_rate": 0.0014401031196312, + "loss": 1.883, + "step": 1297 + }, + { + "epoch": 0.1369198312236287, + "grad_norm": 0.960085928440094, + "learning_rate": 0.001440004767165936, + "loss": 1.8311, + "step": 1298 + }, + { + "epoch": 0.1370253164556962, + "grad_norm": 0.8924864530563354, + "learning_rate": 0.0014399063373821972, + "loss": 1.8677, + "step": 1299 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.6683682799339294, + "learning_rate": 0.001439807830291013, + "loss": 1.8731, + "step": 1300 + }, + { + "epoch": 0.13723628691983122, + "grad_norm": 0.5542230606079102, + "learning_rate": 0.001439709245903422, + "loss": 1.8764, + "step": 1301 + }, + { + "epoch": 0.13734177215189874, + "grad_norm": 0.6065793633460999, + "learning_rate": 0.0014396105842304707, + "loss": 1.9044, + "step": 1302 + }, + { + "epoch": 0.13744725738396624, + "grad_norm": 0.5920975208282471, + "learning_rate": 0.0014395118452832146, + "loss": 1.8783, + "step": 1303 + }, + { + "epoch": 0.13755274261603376, + "grad_norm": 0.5812239050865173, + "learning_rate": 0.001439413029072718, + "loss": 1.8725, + "step": 1304 + }, + { + "epoch": 0.13765822784810128, + "grad_norm": 0.6501930952072144, + "learning_rate": 0.001439314135610054, + "loss": 1.901, + "step": 1305 + }, + { + "epoch": 0.13776371308016877, + "grad_norm": 0.5590621829032898, + "learning_rate": 0.0014392151649063039, + "loss": 1.8699, + "step": 1306 + }, + { + "epoch": 0.1378691983122363, + "grad_norm": 0.6158004403114319, + "learning_rate": 0.0014391161169725573, + "loss": 1.8552, + "step": 1307 + }, + { + "epoch": 0.1379746835443038, + "grad_norm": 0.6737933158874512, + "learning_rate": 0.001439016991819914, + "loss": 1.9073, + "step": 1308 + }, + { + "epoch": 0.1380801687763713, + "grad_norm": 0.5974993705749512, + "learning_rate": 0.001438917789459481, + "loss": 1.8607, + "step": 1309 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.6189507246017456, + "learning_rate": 0.0014388185099023744, + "loss": 1.8615, + "step": 1310 + }, + { + "epoch": 0.13829113924050632, + "grad_norm": 0.6171853542327881, + "learning_rate": 0.001438719153159719, + "loss": 1.8818, + "step": 1311 + }, + { + "epoch": 0.13839662447257384, + "grad_norm": 0.5388338565826416, + "learning_rate": 0.0014386197192426482, + "loss": 1.8731, + "step": 1312 + }, + { + "epoch": 0.13850210970464136, + "grad_norm": 0.6644672155380249, + "learning_rate": 0.001438520208162304, + "loss": 1.9023, + "step": 1313 + }, + { + "epoch": 0.13860759493670885, + "grad_norm": 0.5981124639511108, + "learning_rate": 0.0014384206199298374, + "loss": 1.9051, + "step": 1314 + }, + { + "epoch": 0.13871308016877637, + "grad_norm": 0.588197648525238, + "learning_rate": 0.0014383209545564073, + "loss": 1.9078, + "step": 1315 + }, + { + "epoch": 0.1388185654008439, + "grad_norm": 0.6462689638137817, + "learning_rate": 0.001438221212053182, + "loss": 1.8408, + "step": 1316 + }, + { + "epoch": 0.13892405063291138, + "grad_norm": 0.6449511647224426, + "learning_rate": 0.0014381213924313386, + "loss": 1.872, + "step": 1317 + }, + { + "epoch": 0.1390295358649789, + "grad_norm": 0.6413859128952026, + "learning_rate": 0.0014380214957020613, + "loss": 1.8653, + "step": 1318 + }, + { + "epoch": 0.13913502109704642, + "grad_norm": 0.6893474459648132, + "learning_rate": 0.001437921521876545, + "loss": 1.8898, + "step": 1319 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.6546101570129395, + "learning_rate": 0.0014378214709659916, + "loss": 1.8727, + "step": 1320 + }, + { + "epoch": 0.13934599156118144, + "grad_norm": 0.7481565475463867, + "learning_rate": 0.0014377213429816128, + "loss": 1.8543, + "step": 1321 + }, + { + "epoch": 0.13945147679324896, + "grad_norm": 0.665841817855835, + "learning_rate": 0.0014376211379346282, + "loss": 1.8681, + "step": 1322 + }, + { + "epoch": 0.13955696202531645, + "grad_norm": 0.5659292340278625, + "learning_rate": 0.0014375208558362663, + "loss": 1.881, + "step": 1323 + }, + { + "epoch": 0.13966244725738397, + "grad_norm": 0.5716024041175842, + "learning_rate": 0.0014374204966977639, + "loss": 1.8801, + "step": 1324 + }, + { + "epoch": 0.13976793248945146, + "grad_norm": 0.5964714884757996, + "learning_rate": 0.0014373200605303674, + "loss": 1.8555, + "step": 1325 + }, + { + "epoch": 0.13987341772151898, + "grad_norm": 0.6695917844772339, + "learning_rate": 0.001437219547345331, + "loss": 1.8853, + "step": 1326 + }, + { + "epoch": 0.1399789029535865, + "grad_norm": 0.6125011444091797, + "learning_rate": 0.0014371189571539174, + "loss": 1.8685, + "step": 1327 + }, + { + "epoch": 0.140084388185654, + "grad_norm": 0.5420240163803101, + "learning_rate": 0.0014370182899673982, + "loss": 1.8492, + "step": 1328 + }, + { + "epoch": 0.14018987341772152, + "grad_norm": 0.7141546607017517, + "learning_rate": 0.0014369175457970538, + "loss": 1.8657, + "step": 1329 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.8808703422546387, + "learning_rate": 0.0014368167246541733, + "loss": 1.8963, + "step": 1330 + }, + { + "epoch": 0.14040084388185653, + "grad_norm": 0.7079309225082397, + "learning_rate": 0.0014367158265500537, + "loss": 1.8894, + "step": 1331 + }, + { + "epoch": 0.14050632911392405, + "grad_norm": 0.5496623516082764, + "learning_rate": 0.0014366148514960016, + "loss": 1.8646, + "step": 1332 + }, + { + "epoch": 0.14061181434599157, + "grad_norm": 0.6560704708099365, + "learning_rate": 0.001436513799503332, + "loss": 1.8442, + "step": 1333 + }, + { + "epoch": 0.14071729957805906, + "grad_norm": 0.6498066782951355, + "learning_rate": 0.0014364126705833675, + "loss": 1.87, + "step": 1334 + }, + { + "epoch": 0.14082278481012658, + "grad_norm": 0.6375007033348083, + "learning_rate": 0.0014363114647474406, + "loss": 1.8395, + "step": 1335 + }, + { + "epoch": 0.1409282700421941, + "grad_norm": 0.5629147291183472, + "learning_rate": 0.0014362101820068918, + "loss": 1.8681, + "step": 1336 + }, + { + "epoch": 0.1410337552742616, + "grad_norm": 0.5439779162406921, + "learning_rate": 0.0014361088223730704, + "loss": 1.8862, + "step": 1337 + }, + { + "epoch": 0.14113924050632912, + "grad_norm": 0.5516910552978516, + "learning_rate": 0.0014360073858573341, + "loss": 1.8739, + "step": 1338 + }, + { + "epoch": 0.14124472573839664, + "grad_norm": 0.5369409918785095, + "learning_rate": 0.0014359058724710497, + "loss": 1.9125, + "step": 1339 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.5949428081512451, + "learning_rate": 0.0014358042822255918, + "loss": 1.8545, + "step": 1340 + }, + { + "epoch": 0.14145569620253165, + "grad_norm": 0.535508394241333, + "learning_rate": 0.0014357026151323444, + "loss": 1.8813, + "step": 1341 + }, + { + "epoch": 0.14156118143459914, + "grad_norm": 0.5309810638427734, + "learning_rate": 0.0014356008712027, + "loss": 1.9056, + "step": 1342 + }, + { + "epoch": 0.14166666666666666, + "grad_norm": 0.5794306397438049, + "learning_rate": 0.0014354990504480592, + "loss": 1.8526, + "step": 1343 + }, + { + "epoch": 0.14177215189873418, + "grad_norm": 0.4972829818725586, + "learning_rate": 0.0014353971528798313, + "loss": 1.8768, + "step": 1344 + }, + { + "epoch": 0.14187763713080168, + "grad_norm": 0.5639289021492004, + "learning_rate": 0.001435295178509435, + "loss": 1.8566, + "step": 1345 + }, + { + "epoch": 0.1419831223628692, + "grad_norm": 0.5476253032684326, + "learning_rate": 0.0014351931273482966, + "loss": 1.8525, + "step": 1346 + }, + { + "epoch": 0.14208860759493672, + "grad_norm": 0.532654345035553, + "learning_rate": 0.0014350909994078516, + "loss": 1.8724, + "step": 1347 + }, + { + "epoch": 0.1421940928270042, + "grad_norm": 0.623290479183197, + "learning_rate": 0.0014349887946995441, + "loss": 1.8635, + "step": 1348 + }, + { + "epoch": 0.14229957805907173, + "grad_norm": 0.6814566850662231, + "learning_rate": 0.0014348865132348262, + "loss": 1.8742, + "step": 1349 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.5827710032463074, + "learning_rate": 0.0014347841550251597, + "loss": 1.8742, + "step": 1350 + }, + { + "epoch": 0.14251054852320674, + "grad_norm": 0.6221494674682617, + "learning_rate": 0.0014346817200820137, + "loss": 1.8524, + "step": 1351 + }, + { + "epoch": 0.14261603375527426, + "grad_norm": 0.7677516341209412, + "learning_rate": 0.0014345792084168672, + "loss": 1.8517, + "step": 1352 + }, + { + "epoch": 0.14272151898734178, + "grad_norm": 0.8303447961807251, + "learning_rate": 0.0014344766200412062, + "loss": 1.8894, + "step": 1353 + }, + { + "epoch": 0.14282700421940928, + "grad_norm": 0.6279175877571106, + "learning_rate": 0.0014343739549665274, + "loss": 1.8895, + "step": 1354 + }, + { + "epoch": 0.1429324894514768, + "grad_norm": 0.49451372027397156, + "learning_rate": 0.0014342712132043342, + "loss": 1.8495, + "step": 1355 + }, + { + "epoch": 0.14303797468354432, + "grad_norm": 0.5483686327934265, + "learning_rate": 0.001434168394766139, + "loss": 1.8461, + "step": 1356 + }, + { + "epoch": 0.1431434599156118, + "grad_norm": 0.5136895179748535, + "learning_rate": 0.001434065499663464, + "loss": 1.8768, + "step": 1357 + }, + { + "epoch": 0.14324894514767933, + "grad_norm": 0.5060833692550659, + "learning_rate": 0.0014339625279078388, + "loss": 1.8628, + "step": 1358 + }, + { + "epoch": 0.14335443037974682, + "grad_norm": 0.5874459743499756, + "learning_rate": 0.0014338594795108017, + "loss": 1.8443, + "step": 1359 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.6087844967842102, + "learning_rate": 0.0014337563544838997, + "loss": 1.8883, + "step": 1360 + }, + { + "epoch": 0.14356540084388186, + "grad_norm": 0.6341425180435181, + "learning_rate": 0.0014336531528386888, + "loss": 1.849, + "step": 1361 + }, + { + "epoch": 0.14367088607594936, + "grad_norm": 0.5329961776733398, + "learning_rate": 0.0014335498745867332, + "loss": 1.8628, + "step": 1362 + }, + { + "epoch": 0.14377637130801688, + "grad_norm": 0.5873750448226929, + "learning_rate": 0.0014334465197396054, + "loss": 1.8757, + "step": 1363 + }, + { + "epoch": 0.1438818565400844, + "grad_norm": 0.5611206293106079, + "learning_rate": 0.0014333430883088877, + "loss": 1.8575, + "step": 1364 + }, + { + "epoch": 0.1439873417721519, + "grad_norm": 0.5127421021461487, + "learning_rate": 0.001433239580306169, + "loss": 1.8403, + "step": 1365 + }, + { + "epoch": 0.1440928270042194, + "grad_norm": 0.5496838688850403, + "learning_rate": 0.0014331359957430482, + "loss": 1.8606, + "step": 1366 + }, + { + "epoch": 0.14419831223628693, + "grad_norm": 0.5445463061332703, + "learning_rate": 0.001433032334631133, + "loss": 1.884, + "step": 1367 + }, + { + "epoch": 0.14430379746835442, + "grad_norm": 0.7003356218338013, + "learning_rate": 0.0014329285969820389, + "loss": 1.8572, + "step": 1368 + }, + { + "epoch": 0.14440928270042194, + "grad_norm": 0.766230583190918, + "learning_rate": 0.00143282478280739, + "loss": 1.8887, + "step": 1369 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.6170887351036072, + "learning_rate": 0.001432720892118819, + "loss": 1.8591, + "step": 1370 + }, + { + "epoch": 0.14462025316455696, + "grad_norm": 0.6822317838668823, + "learning_rate": 0.0014326169249279683, + "loss": 1.8568, + "step": 1371 + }, + { + "epoch": 0.14472573839662448, + "grad_norm": 0.8487035632133484, + "learning_rate": 0.001432512881246487, + "loss": 1.8457, + "step": 1372 + }, + { + "epoch": 0.144831223628692, + "grad_norm": 0.7393089532852173, + "learning_rate": 0.0014324087610860339, + "loss": 1.8669, + "step": 1373 + }, + { + "epoch": 0.1449367088607595, + "grad_norm": 0.527899980545044, + "learning_rate": 0.0014323045644582765, + "loss": 1.8519, + "step": 1374 + }, + { + "epoch": 0.145042194092827, + "grad_norm": 0.6965951919555664, + "learning_rate": 0.0014322002913748902, + "loss": 1.8671, + "step": 1375 + }, + { + "epoch": 0.1451476793248945, + "grad_norm": 0.8703835010528564, + "learning_rate": 0.0014320959418475596, + "loss": 1.8495, + "step": 1376 + }, + { + "epoch": 0.14525316455696202, + "grad_norm": 0.6573900580406189, + "learning_rate": 0.0014319915158879776, + "loss": 1.827, + "step": 1377 + }, + { + "epoch": 0.14535864978902954, + "grad_norm": 0.6171213984489441, + "learning_rate": 0.0014318870135078452, + "loss": 1.8248, + "step": 1378 + }, + { + "epoch": 0.14546413502109704, + "grad_norm": 0.8773496150970459, + "learning_rate": 0.001431782434718873, + "loss": 1.9044, + "step": 1379 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.785266101360321, + "learning_rate": 0.0014316777795327794, + "loss": 1.8428, + "step": 1380 + }, + { + "epoch": 0.14567510548523208, + "grad_norm": 0.5768530964851379, + "learning_rate": 0.0014315730479612914, + "loss": 1.89, + "step": 1381 + }, + { + "epoch": 0.14578059071729957, + "grad_norm": 0.6219577193260193, + "learning_rate": 0.0014314682400161445, + "loss": 1.8546, + "step": 1382 + }, + { + "epoch": 0.1458860759493671, + "grad_norm": 0.7430046200752258, + "learning_rate": 0.0014313633557090834, + "loss": 1.8538, + "step": 1383 + }, + { + "epoch": 0.1459915611814346, + "grad_norm": 0.6507778167724609, + "learning_rate": 0.0014312583950518607, + "loss": 1.8425, + "step": 1384 + }, + { + "epoch": 0.1460970464135021, + "grad_norm": 0.5390203595161438, + "learning_rate": 0.0014311533580562378, + "loss": 1.8823, + "step": 1385 + }, + { + "epoch": 0.14620253164556962, + "grad_norm": 0.6357337236404419, + "learning_rate": 0.0014310482447339845, + "loss": 1.8632, + "step": 1386 + }, + { + "epoch": 0.14630801687763714, + "grad_norm": 0.7420737743377686, + "learning_rate": 0.0014309430550968794, + "loss": 1.8635, + "step": 1387 + }, + { + "epoch": 0.14641350210970464, + "grad_norm": 0.5203769207000732, + "learning_rate": 0.0014308377891567095, + "loss": 1.8482, + "step": 1388 + }, + { + "epoch": 0.14651898734177216, + "grad_norm": 0.7806848287582397, + "learning_rate": 0.0014307324469252703, + "loss": 1.8666, + "step": 1389 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.8801440596580505, + "learning_rate": 0.001430627028414366, + "loss": 1.8802, + "step": 1390 + }, + { + "epoch": 0.14672995780590717, + "grad_norm": 0.7125523686408997, + "learning_rate": 0.0014305215336358093, + "loss": 1.8905, + "step": 1391 + }, + { + "epoch": 0.1468354430379747, + "grad_norm": 0.6039991974830627, + "learning_rate": 0.0014304159626014213, + "loss": 1.878, + "step": 1392 + }, + { + "epoch": 0.14694092827004218, + "grad_norm": 0.6685895323753357, + "learning_rate": 0.0014303103153230322, + "loss": 1.8463, + "step": 1393 + }, + { + "epoch": 0.1470464135021097, + "grad_norm": 0.6639077663421631, + "learning_rate": 0.0014302045918124795, + "loss": 1.8392, + "step": 1394 + }, + { + "epoch": 0.14715189873417722, + "grad_norm": 0.6109727621078491, + "learning_rate": 0.0014300987920816107, + "loss": 1.8654, + "step": 1395 + }, + { + "epoch": 0.14725738396624471, + "grad_norm": 0.6105895042419434, + "learning_rate": 0.0014299929161422807, + "loss": 1.8591, + "step": 1396 + }, + { + "epoch": 0.14736286919831224, + "grad_norm": 0.6957910656929016, + "learning_rate": 0.001429886964006354, + "loss": 1.8192, + "step": 1397 + }, + { + "epoch": 0.14746835443037976, + "grad_norm": 0.648102343082428, + "learning_rate": 0.0014297809356857026, + "loss": 1.8094, + "step": 1398 + }, + { + "epoch": 0.14757383966244725, + "grad_norm": 0.5797403454780579, + "learning_rate": 0.0014296748311922074, + "loss": 1.8538, + "step": 1399 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.7204359173774719, + "learning_rate": 0.0014295686505377586, + "loss": 1.8154, + "step": 1400 + }, + { + "epoch": 0.1477848101265823, + "grad_norm": 0.5485019683837891, + "learning_rate": 0.001429462393734254, + "loss": 1.8612, + "step": 1401 + }, + { + "epoch": 0.14789029535864978, + "grad_norm": 0.6802738904953003, + "learning_rate": 0.0014293560607935999, + "loss": 1.8477, + "step": 1402 + }, + { + "epoch": 0.1479957805907173, + "grad_norm": 0.6453447341918945, + "learning_rate": 0.0014292496517277116, + "loss": 1.8829, + "step": 1403 + }, + { + "epoch": 0.14810126582278482, + "grad_norm": 0.69231116771698, + "learning_rate": 0.0014291431665485125, + "loss": 1.8578, + "step": 1404 + }, + { + "epoch": 0.14820675105485231, + "grad_norm": 0.662425696849823, + "learning_rate": 0.0014290366052679352, + "loss": 1.8284, + "step": 1405 + }, + { + "epoch": 0.14831223628691984, + "grad_norm": 0.6362969279289246, + "learning_rate": 0.0014289299678979207, + "loss": 1.8345, + "step": 1406 + }, + { + "epoch": 0.14841772151898736, + "grad_norm": 0.8318188786506653, + "learning_rate": 0.0014288232544504174, + "loss": 1.7802, + "step": 1407 + }, + { + "epoch": 0.14852320675105485, + "grad_norm": 0.7745736837387085, + "learning_rate": 0.0014287164649373837, + "loss": 1.8709, + "step": 1408 + }, + { + "epoch": 0.14862869198312237, + "grad_norm": 0.6388303637504578, + "learning_rate": 0.0014286095993707856, + "loss": 1.8736, + "step": 1409 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6226194500923157, + "learning_rate": 0.0014285026577625982, + "loss": 1.8639, + "step": 1410 + }, + { + "epoch": 0.14883966244725738, + "grad_norm": 0.8211151957511902, + "learning_rate": 0.0014283956401248048, + "loss": 1.8805, + "step": 1411 + }, + { + "epoch": 0.1489451476793249, + "grad_norm": 0.7271362543106079, + "learning_rate": 0.0014282885464693969, + "loss": 1.8444, + "step": 1412 + }, + { + "epoch": 0.1490506329113924, + "grad_norm": 0.6147940754890442, + "learning_rate": 0.001428181376808375, + "loss": 1.8505, + "step": 1413 + }, + { + "epoch": 0.14915611814345991, + "grad_norm": 0.7665190100669861, + "learning_rate": 0.0014280741311537483, + "loss": 1.862, + "step": 1414 + }, + { + "epoch": 0.14926160337552744, + "grad_norm": 0.8640597462654114, + "learning_rate": 0.001427966809517534, + "loss": 1.8257, + "step": 1415 + }, + { + "epoch": 0.14936708860759493, + "grad_norm": 0.5905357003211975, + "learning_rate": 0.001427859411911758, + "loss": 1.8888, + "step": 1416 + }, + { + "epoch": 0.14947257383966245, + "grad_norm": 0.699871838092804, + "learning_rate": 0.0014277519383484548, + "loss": 1.8252, + "step": 1417 + }, + { + "epoch": 0.14957805907172997, + "grad_norm": 0.7606005668640137, + "learning_rate": 0.0014276443888396675, + "loss": 1.8675, + "step": 1418 + }, + { + "epoch": 0.14968354430379746, + "grad_norm": 0.5441035628318787, + "learning_rate": 0.0014275367633974473, + "loss": 1.8585, + "step": 1419 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.653247058391571, + "learning_rate": 0.0014274290620338542, + "loss": 1.884, + "step": 1420 + }, + { + "epoch": 0.1498945147679325, + "grad_norm": 0.6618223786354065, + "learning_rate": 0.0014273212847609566, + "loss": 1.8523, + "step": 1421 + }, + { + "epoch": 0.15, + "grad_norm": 0.578489363193512, + "learning_rate": 0.0014272134315908317, + "loss": 1.8386, + "step": 1422 + }, + { + "epoch": 0.15010548523206751, + "grad_norm": 0.6089845299720764, + "learning_rate": 0.0014271055025355652, + "loss": 1.8518, + "step": 1423 + }, + { + "epoch": 0.150210970464135, + "grad_norm": 0.5807135105133057, + "learning_rate": 0.0014269974976072505, + "loss": 1.8419, + "step": 1424 + }, + { + "epoch": 0.15031645569620253, + "grad_norm": 0.6860411167144775, + "learning_rate": 0.0014268894168179903, + "loss": 1.8329, + "step": 1425 + }, + { + "epoch": 0.15042194092827005, + "grad_norm": 0.7913932800292969, + "learning_rate": 0.0014267812601798957, + "loss": 1.8738, + "step": 1426 + }, + { + "epoch": 0.15052742616033754, + "grad_norm": 0.5719784498214722, + "learning_rate": 0.0014266730277050863, + "loss": 1.8906, + "step": 1427 + }, + { + "epoch": 0.15063291139240506, + "grad_norm": 0.7256911396980286, + "learning_rate": 0.00142656471940569, + "loss": 1.8373, + "step": 1428 + }, + { + "epoch": 0.15073839662447258, + "grad_norm": 0.7745197415351868, + "learning_rate": 0.001426456335293843, + "loss": 1.836, + "step": 1429 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.6802366971969604, + "learning_rate": 0.0014263478753816906, + "loss": 1.8348, + "step": 1430 + }, + { + "epoch": 0.1509493670886076, + "grad_norm": 0.552780270576477, + "learning_rate": 0.0014262393396813863, + "loss": 1.8478, + "step": 1431 + }, + { + "epoch": 0.15105485232067511, + "grad_norm": 0.8125191330909729, + "learning_rate": 0.001426130728205092, + "loss": 1.8588, + "step": 1432 + }, + { + "epoch": 0.1511603375527426, + "grad_norm": 0.6713847517967224, + "learning_rate": 0.001426022040964978, + "loss": 1.8062, + "step": 1433 + }, + { + "epoch": 0.15126582278481013, + "grad_norm": 0.5367255210876465, + "learning_rate": 0.0014259132779732234, + "loss": 1.8307, + "step": 1434 + }, + { + "epoch": 0.15137130801687765, + "grad_norm": 0.6496189832687378, + "learning_rate": 0.0014258044392420155, + "loss": 1.8773, + "step": 1435 + }, + { + "epoch": 0.15147679324894514, + "grad_norm": 0.5739408135414124, + "learning_rate": 0.0014256955247835504, + "loss": 1.8315, + "step": 1436 + }, + { + "epoch": 0.15158227848101266, + "grad_norm": 0.5475507378578186, + "learning_rate": 0.0014255865346100324, + "loss": 1.8654, + "step": 1437 + }, + { + "epoch": 0.15168776371308018, + "grad_norm": 0.5728459358215332, + "learning_rate": 0.0014254774687336744, + "loss": 1.8196, + "step": 1438 + }, + { + "epoch": 0.15179324894514767, + "grad_norm": 0.63370281457901, + "learning_rate": 0.0014253683271666978, + "loss": 1.8627, + "step": 1439 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.6058464646339417, + "learning_rate": 0.0014252591099213326, + "loss": 1.8489, + "step": 1440 + }, + { + "epoch": 0.1520042194092827, + "grad_norm": 0.5870775580406189, + "learning_rate": 0.0014251498170098167, + "loss": 1.8096, + "step": 1441 + }, + { + "epoch": 0.1521097046413502, + "grad_norm": 0.6893830895423889, + "learning_rate": 0.0014250404484443975, + "loss": 1.8584, + "step": 1442 + }, + { + "epoch": 0.15221518987341773, + "grad_norm": 0.5529017448425293, + "learning_rate": 0.0014249310042373298, + "loss": 1.8427, + "step": 1443 + }, + { + "epoch": 0.15232067510548522, + "grad_norm": 0.6361820101737976, + "learning_rate": 0.0014248214844008776, + "loss": 1.8519, + "step": 1444 + }, + { + "epoch": 0.15242616033755274, + "grad_norm": 0.6112284660339355, + "learning_rate": 0.001424711888947313, + "loss": 1.8327, + "step": 1445 + }, + { + "epoch": 0.15253164556962026, + "grad_norm": 0.5295151472091675, + "learning_rate": 0.001424602217888917, + "loss": 1.8379, + "step": 1446 + }, + { + "epoch": 0.15263713080168775, + "grad_norm": 0.5909629464149475, + "learning_rate": 0.0014244924712379786, + "loss": 1.8498, + "step": 1447 + }, + { + "epoch": 0.15274261603375527, + "grad_norm": 0.5086919069290161, + "learning_rate": 0.0014243826490067954, + "loss": 1.8432, + "step": 1448 + }, + { + "epoch": 0.1528481012658228, + "grad_norm": 0.6039360165596008, + "learning_rate": 0.0014242727512076736, + "loss": 1.864, + "step": 1449 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.6202875971794128, + "learning_rate": 0.001424162777852928, + "loss": 1.8647, + "step": 1450 + }, + { + "epoch": 0.1530590717299578, + "grad_norm": 0.5090221762657166, + "learning_rate": 0.0014240527289548814, + "loss": 1.848, + "step": 1451 + }, + { + "epoch": 0.15316455696202533, + "grad_norm": 0.6075028777122498, + "learning_rate": 0.0014239426045258652, + "loss": 1.8475, + "step": 1452 + }, + { + "epoch": 0.15327004219409282, + "grad_norm": 0.5554097890853882, + "learning_rate": 0.0014238324045782198, + "loss": 1.8225, + "step": 1453 + }, + { + "epoch": 0.15337552742616034, + "grad_norm": 0.522114634513855, + "learning_rate": 0.0014237221291242932, + "loss": 1.7993, + "step": 1454 + }, + { + "epoch": 0.15348101265822786, + "grad_norm": 0.5602745413780212, + "learning_rate": 0.0014236117781764425, + "loss": 1.8207, + "step": 1455 + }, + { + "epoch": 0.15358649789029535, + "grad_norm": 0.5402941107749939, + "learning_rate": 0.0014235013517470334, + "loss": 1.8705, + "step": 1456 + }, + { + "epoch": 0.15369198312236287, + "grad_norm": 0.6084504723548889, + "learning_rate": 0.0014233908498484393, + "loss": 1.8174, + "step": 1457 + }, + { + "epoch": 0.15379746835443037, + "grad_norm": 0.5986098647117615, + "learning_rate": 0.0014232802724930427, + "loss": 1.8274, + "step": 1458 + }, + { + "epoch": 0.1539029535864979, + "grad_norm": 0.5114652514457703, + "learning_rate": 0.0014231696196932342, + "loss": 1.8423, + "step": 1459 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.5902886986732483, + "learning_rate": 0.0014230588914614134, + "loss": 1.8883, + "step": 1460 + }, + { + "epoch": 0.1541139240506329, + "grad_norm": 0.5369532704353333, + "learning_rate": 0.0014229480878099872, + "loss": 1.8512, + "step": 1461 + }, + { + "epoch": 0.15421940928270042, + "grad_norm": 0.5492854118347168, + "learning_rate": 0.0014228372087513725, + "loss": 1.7974, + "step": 1462 + }, + { + "epoch": 0.15432489451476794, + "grad_norm": 0.5336595177650452, + "learning_rate": 0.0014227262542979933, + "loss": 1.8097, + "step": 1463 + }, + { + "epoch": 0.15443037974683543, + "grad_norm": 0.641187310218811, + "learning_rate": 0.0014226152244622826, + "loss": 1.8395, + "step": 1464 + }, + { + "epoch": 0.15453586497890295, + "grad_norm": 0.606646716594696, + "learning_rate": 0.0014225041192566822, + "loss": 1.821, + "step": 1465 + }, + { + "epoch": 0.15464135021097047, + "grad_norm": 0.5542685985565186, + "learning_rate": 0.001422392938693642, + "loss": 1.8192, + "step": 1466 + }, + { + "epoch": 0.15474683544303797, + "grad_norm": 0.6228745579719543, + "learning_rate": 0.0014222816827856202, + "loss": 1.8776, + "step": 1467 + }, + { + "epoch": 0.1548523206751055, + "grad_norm": 0.5541315078735352, + "learning_rate": 0.0014221703515450834, + "loss": 1.857, + "step": 1468 + }, + { + "epoch": 0.154957805907173, + "grad_norm": 0.591027557849884, + "learning_rate": 0.001422058944984507, + "loss": 1.8635, + "step": 1469 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.8596515655517578, + "learning_rate": 0.0014219474631163745, + "loss": 1.8527, + "step": 1470 + }, + { + "epoch": 0.15516877637130802, + "grad_norm": 0.6001699566841125, + "learning_rate": 0.0014218359059531783, + "loss": 1.8563, + "step": 1471 + }, + { + "epoch": 0.15527426160337554, + "grad_norm": 0.6520236730575562, + "learning_rate": 0.0014217242735074188, + "loss": 1.8623, + "step": 1472 + }, + { + "epoch": 0.15537974683544303, + "grad_norm": 0.908523440361023, + "learning_rate": 0.0014216125657916046, + "loss": 1.8306, + "step": 1473 + }, + { + "epoch": 0.15548523206751055, + "grad_norm": 0.7254512310028076, + "learning_rate": 0.0014215007828182536, + "loss": 1.8346, + "step": 1474 + }, + { + "epoch": 0.15559071729957805, + "grad_norm": 0.5696256160736084, + "learning_rate": 0.0014213889245998917, + "loss": 1.846, + "step": 1475 + }, + { + "epoch": 0.15569620253164557, + "grad_norm": 0.7882362008094788, + "learning_rate": 0.0014212769911490528, + "loss": 1.8428, + "step": 1476 + }, + { + "epoch": 0.1558016877637131, + "grad_norm": 0.6446660757064819, + "learning_rate": 0.0014211649824782797, + "loss": 1.8352, + "step": 1477 + }, + { + "epoch": 0.15590717299578058, + "grad_norm": 0.5735400319099426, + "learning_rate": 0.0014210528986001237, + "loss": 1.8311, + "step": 1478 + }, + { + "epoch": 0.1560126582278481, + "grad_norm": 0.7755749821662903, + "learning_rate": 0.001420940739527144, + "loss": 1.8697, + "step": 1479 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.574445903301239, + "learning_rate": 0.001420828505271909, + "loss": 1.8011, + "step": 1480 + }, + { + "epoch": 0.1562236286919831, + "grad_norm": 0.6321041584014893, + "learning_rate": 0.001420716195846995, + "loss": 1.8576, + "step": 1481 + }, + { + "epoch": 0.15632911392405063, + "grad_norm": 0.7346292734146118, + "learning_rate": 0.0014206038112649865, + "loss": 1.8362, + "step": 1482 + }, + { + "epoch": 0.15643459915611815, + "grad_norm": 0.5507804751396179, + "learning_rate": 0.0014204913515384772, + "loss": 1.8473, + "step": 1483 + }, + { + "epoch": 0.15654008438818565, + "grad_norm": 0.6744362711906433, + "learning_rate": 0.0014203788166800685, + "loss": 1.86, + "step": 1484 + }, + { + "epoch": 0.15664556962025317, + "grad_norm": 0.63077312707901, + "learning_rate": 0.0014202662067023708, + "loss": 1.8098, + "step": 1485 + }, + { + "epoch": 0.1567510548523207, + "grad_norm": 0.6044159531593323, + "learning_rate": 0.0014201535216180024, + "loss": 1.8725, + "step": 1486 + }, + { + "epoch": 0.15685654008438818, + "grad_norm": 0.6358382701873779, + "learning_rate": 0.0014200407614395898, + "loss": 1.7962, + "step": 1487 + }, + { + "epoch": 0.1569620253164557, + "grad_norm": 0.573345422744751, + "learning_rate": 0.0014199279261797692, + "loss": 1.8602, + "step": 1488 + }, + { + "epoch": 0.15706751054852322, + "grad_norm": 0.5711865425109863, + "learning_rate": 0.0014198150158511837, + "loss": 1.8187, + "step": 1489 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.6450026035308838, + "learning_rate": 0.0014197020304664856, + "loss": 1.8244, + "step": 1490 + }, + { + "epoch": 0.15727848101265823, + "grad_norm": 0.6698281764984131, + "learning_rate": 0.0014195889700383357, + "loss": 1.8113, + "step": 1491 + }, + { + "epoch": 0.15738396624472573, + "grad_norm": 0.5761056542396545, + "learning_rate": 0.0014194758345794029, + "loss": 1.8356, + "step": 1492 + }, + { + "epoch": 0.15748945147679325, + "grad_norm": 0.6742629408836365, + "learning_rate": 0.0014193626241023644, + "loss": 1.8336, + "step": 1493 + }, + { + "epoch": 0.15759493670886077, + "grad_norm": 0.8266981244087219, + "learning_rate": 0.001419249338619906, + "loss": 1.8252, + "step": 1494 + }, + { + "epoch": 0.15770042194092826, + "grad_norm": 0.6044406890869141, + "learning_rate": 0.0014191359781447223, + "loss": 1.8172, + "step": 1495 + }, + { + "epoch": 0.15780590717299578, + "grad_norm": 0.6362290382385254, + "learning_rate": 0.0014190225426895153, + "loss": 1.888, + "step": 1496 + }, + { + "epoch": 0.1579113924050633, + "grad_norm": 0.6417899131774902, + "learning_rate": 0.0014189090322669967, + "loss": 1.8498, + "step": 1497 + }, + { + "epoch": 0.1580168776371308, + "grad_norm": 0.5481340885162354, + "learning_rate": 0.0014187954468898854, + "loss": 1.82, + "step": 1498 + }, + { + "epoch": 0.1581223628691983, + "grad_norm": 0.5037826299667358, + "learning_rate": 0.0014186817865709095, + "loss": 1.8586, + "step": 1499 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.6089605689048767, + "learning_rate": 0.0014185680513228048, + "loss": 1.8278, + "step": 1500 + }, + { + "epoch": 0.15833333333333333, + "grad_norm": 0.608803927898407, + "learning_rate": 0.0014184542411583162, + "loss": 1.8314, + "step": 1501 + }, + { + "epoch": 0.15843881856540085, + "grad_norm": 0.7378712296485901, + "learning_rate": 0.001418340356090197, + "loss": 1.8259, + "step": 1502 + }, + { + "epoch": 0.15854430379746837, + "grad_norm": 0.5543790459632874, + "learning_rate": 0.0014182263961312078, + "loss": 1.8307, + "step": 1503 + }, + { + "epoch": 0.15864978902953586, + "grad_norm": 0.6083654165267944, + "learning_rate": 0.001418112361294119, + "loss": 1.8717, + "step": 1504 + }, + { + "epoch": 0.15875527426160338, + "grad_norm": 0.8015477657318115, + "learning_rate": 0.0014179982515917088, + "loss": 1.8308, + "step": 1505 + }, + { + "epoch": 0.15886075949367087, + "grad_norm": 0.7409259080886841, + "learning_rate": 0.0014178840670367634, + "loss": 1.8242, + "step": 1506 + }, + { + "epoch": 0.1589662447257384, + "grad_norm": 0.6514421701431274, + "learning_rate": 0.001417769807642078, + "loss": 1.8713, + "step": 1507 + }, + { + "epoch": 0.1590717299578059, + "grad_norm": 0.5489009618759155, + "learning_rate": 0.0014176554734204557, + "loss": 1.8008, + "step": 1508 + }, + { + "epoch": 0.1591772151898734, + "grad_norm": 0.5727930068969727, + "learning_rate": 0.0014175410643847085, + "loss": 1.8251, + "step": 1509 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.5569401979446411, + "learning_rate": 0.0014174265805476564, + "loss": 1.837, + "step": 1510 + }, + { + "epoch": 0.15938818565400845, + "grad_norm": 0.5234407782554626, + "learning_rate": 0.001417312021922128, + "loss": 1.809, + "step": 1511 + }, + { + "epoch": 0.15949367088607594, + "grad_norm": 0.5103266835212708, + "learning_rate": 0.0014171973885209596, + "loss": 1.8323, + "step": 1512 + }, + { + "epoch": 0.15959915611814346, + "grad_norm": 0.544102668762207, + "learning_rate": 0.0014170826803569971, + "loss": 1.8611, + "step": 1513 + }, + { + "epoch": 0.15970464135021098, + "grad_norm": 0.5092897415161133, + "learning_rate": 0.0014169678974430941, + "loss": 1.8274, + "step": 1514 + }, + { + "epoch": 0.15981012658227847, + "grad_norm": 0.5268481373786926, + "learning_rate": 0.0014168530397921121, + "loss": 1.8548, + "step": 1515 + }, + { + "epoch": 0.159915611814346, + "grad_norm": 0.5621142387390137, + "learning_rate": 0.0014167381074169218, + "loss": 1.8351, + "step": 1516 + }, + { + "epoch": 0.1600210970464135, + "grad_norm": 0.550338625907898, + "learning_rate": 0.0014166231003304019, + "loss": 1.8298, + "step": 1517 + }, + { + "epoch": 0.160126582278481, + "grad_norm": 0.5734298229217529, + "learning_rate": 0.0014165080185454396, + "loss": 1.8415, + "step": 1518 + }, + { + "epoch": 0.16023206751054853, + "grad_norm": 0.5179802179336548, + "learning_rate": 0.0014163928620749301, + "loss": 1.8352, + "step": 1519 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.6269114017486572, + "learning_rate": 0.0014162776309317778, + "loss": 1.8259, + "step": 1520 + }, + { + "epoch": 0.16044303797468354, + "grad_norm": 0.6383045315742493, + "learning_rate": 0.0014161623251288944, + "loss": 1.8453, + "step": 1521 + }, + { + "epoch": 0.16054852320675106, + "grad_norm": 0.5995894074440002, + "learning_rate": 0.001416046944679201, + "loss": 1.8194, + "step": 1522 + }, + { + "epoch": 0.16065400843881855, + "grad_norm": 0.5697196125984192, + "learning_rate": 0.0014159314895956258, + "loss": 1.8038, + "step": 1523 + }, + { + "epoch": 0.16075949367088607, + "grad_norm": 0.6247778534889221, + "learning_rate": 0.0014158159598911067, + "loss": 1.8226, + "step": 1524 + }, + { + "epoch": 0.1608649789029536, + "grad_norm": 0.5686785578727722, + "learning_rate": 0.0014157003555785893, + "loss": 1.8394, + "step": 1525 + }, + { + "epoch": 0.16097046413502109, + "grad_norm": 0.5390641093254089, + "learning_rate": 0.0014155846766710277, + "loss": 1.7605, + "step": 1526 + }, + { + "epoch": 0.1610759493670886, + "grad_norm": 0.6159165501594543, + "learning_rate": 0.0014154689231813838, + "loss": 1.8522, + "step": 1527 + }, + { + "epoch": 0.16118143459915613, + "grad_norm": 0.5771231055259705, + "learning_rate": 0.001415353095122629, + "loss": 1.7911, + "step": 1528 + }, + { + "epoch": 0.16128691983122362, + "grad_norm": 0.5684407353401184, + "learning_rate": 0.0014152371925077423, + "loss": 1.8589, + "step": 1529 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.5170876383781433, + "learning_rate": 0.0014151212153497108, + "loss": 1.7951, + "step": 1530 + }, + { + "epoch": 0.16149789029535866, + "grad_norm": 0.5681238770484924, + "learning_rate": 0.0014150051636615305, + "loss": 1.7796, + "step": 1531 + }, + { + "epoch": 0.16160337552742615, + "grad_norm": 0.5822446942329407, + "learning_rate": 0.0014148890374562056, + "loss": 1.785, + "step": 1532 + }, + { + "epoch": 0.16170886075949367, + "grad_norm": 0.6795310974121094, + "learning_rate": 0.0014147728367467486, + "loss": 1.8285, + "step": 1533 + }, + { + "epoch": 0.1618143459915612, + "grad_norm": 0.5617952942848206, + "learning_rate": 0.0014146565615461805, + "loss": 1.8184, + "step": 1534 + }, + { + "epoch": 0.16191983122362869, + "grad_norm": 0.6134721636772156, + "learning_rate": 0.0014145402118675302, + "loss": 1.8462, + "step": 1535 + }, + { + "epoch": 0.1620253164556962, + "grad_norm": 0.6064852476119995, + "learning_rate": 0.0014144237877238355, + "loss": 1.7819, + "step": 1536 + }, + { + "epoch": 0.16213080168776373, + "grad_norm": 0.5989519357681274, + "learning_rate": 0.0014143072891281425, + "loss": 1.7975, + "step": 1537 + }, + { + "epoch": 0.16223628691983122, + "grad_norm": 0.7882618308067322, + "learning_rate": 0.001414190716093505, + "loss": 1.8156, + "step": 1538 + }, + { + "epoch": 0.16234177215189874, + "grad_norm": 0.680798351764679, + "learning_rate": 0.001414074068632986, + "loss": 1.8165, + "step": 1539 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.516761064529419, + "learning_rate": 0.0014139573467596561, + "loss": 1.8336, + "step": 1540 + }, + { + "epoch": 0.16255274261603375, + "grad_norm": 0.6657857894897461, + "learning_rate": 0.0014138405504865949, + "loss": 1.8191, + "step": 1541 + }, + { + "epoch": 0.16265822784810127, + "grad_norm": 0.6114803552627563, + "learning_rate": 0.0014137236798268896, + "loss": 1.773, + "step": 1542 + }, + { + "epoch": 0.16276371308016876, + "grad_norm": 0.5564978718757629, + "learning_rate": 0.0014136067347936363, + "loss": 1.8176, + "step": 1543 + }, + { + "epoch": 0.16286919831223629, + "grad_norm": 0.6135033369064331, + "learning_rate": 0.0014134897153999394, + "loss": 1.8302, + "step": 1544 + }, + { + "epoch": 0.1629746835443038, + "grad_norm": 0.5883001089096069, + "learning_rate": 0.0014133726216589114, + "loss": 1.7894, + "step": 1545 + }, + { + "epoch": 0.1630801687763713, + "grad_norm": 0.6626905798912048, + "learning_rate": 0.0014132554535836732, + "loss": 1.7863, + "step": 1546 + }, + { + "epoch": 0.16318565400843882, + "grad_norm": 0.5560116171836853, + "learning_rate": 0.0014131382111873543, + "loss": 1.8088, + "step": 1547 + }, + { + "epoch": 0.16329113924050634, + "grad_norm": 0.623650312423706, + "learning_rate": 0.0014130208944830923, + "loss": 1.8542, + "step": 1548 + }, + { + "epoch": 0.16339662447257383, + "grad_norm": 0.6441416144371033, + "learning_rate": 0.0014129035034840325, + "loss": 1.8495, + "step": 1549 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.5406753420829773, + "learning_rate": 0.00141278603820333, + "loss": 1.806, + "step": 1550 + }, + { + "epoch": 0.16360759493670887, + "grad_norm": 0.6126689910888672, + "learning_rate": 0.0014126684986541468, + "loss": 1.8189, + "step": 1551 + }, + { + "epoch": 0.16371308016877636, + "grad_norm": 0.5611123442649841, + "learning_rate": 0.0014125508848496539, + "loss": 1.8254, + "step": 1552 + }, + { + "epoch": 0.16381856540084389, + "grad_norm": 0.6051113605499268, + "learning_rate": 0.0014124331968030307, + "loss": 1.8616, + "step": 1553 + }, + { + "epoch": 0.1639240506329114, + "grad_norm": 0.6866985559463501, + "learning_rate": 0.0014123154345274645, + "loss": 1.7898, + "step": 1554 + }, + { + "epoch": 0.1640295358649789, + "grad_norm": 0.6087451577186584, + "learning_rate": 0.0014121975980361512, + "loss": 1.8079, + "step": 1555 + }, + { + "epoch": 0.16413502109704642, + "grad_norm": 0.5478625297546387, + "learning_rate": 0.0014120796873422952, + "loss": 1.8123, + "step": 1556 + }, + { + "epoch": 0.1642405063291139, + "grad_norm": 0.6629983186721802, + "learning_rate": 0.0014119617024591089, + "loss": 1.8315, + "step": 1557 + }, + { + "epoch": 0.16434599156118143, + "grad_norm": 0.5826828479766846, + "learning_rate": 0.0014118436433998127, + "loss": 1.7901, + "step": 1558 + }, + { + "epoch": 0.16445147679324895, + "grad_norm": 0.5929402709007263, + "learning_rate": 0.0014117255101776362, + "loss": 1.7804, + "step": 1559 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.6410099864006042, + "learning_rate": 0.0014116073028058165, + "loss": 1.8087, + "step": 1560 + }, + { + "epoch": 0.16466244725738396, + "grad_norm": 0.5356755256652832, + "learning_rate": 0.0014114890212975997, + "loss": 1.76, + "step": 1561 + }, + { + "epoch": 0.16476793248945149, + "grad_norm": 0.6839003562927246, + "learning_rate": 0.0014113706656662393, + "loss": 1.8465, + "step": 1562 + }, + { + "epoch": 0.16487341772151898, + "grad_norm": 0.5881398916244507, + "learning_rate": 0.001411252235924998, + "loss": 1.8039, + "step": 1563 + }, + { + "epoch": 0.1649789029535865, + "grad_norm": 0.6108241677284241, + "learning_rate": 0.0014111337320871463, + "loss": 1.8027, + "step": 1564 + }, + { + "epoch": 0.16508438818565402, + "grad_norm": 0.6254359483718872, + "learning_rate": 0.0014110151541659633, + "loss": 1.7763, + "step": 1565 + }, + { + "epoch": 0.1651898734177215, + "grad_norm": 0.5148067474365234, + "learning_rate": 0.0014108965021747363, + "loss": 1.8209, + "step": 1566 + }, + { + "epoch": 0.16529535864978903, + "grad_norm": 0.5595871806144714, + "learning_rate": 0.0014107777761267605, + "loss": 1.8178, + "step": 1567 + }, + { + "epoch": 0.16540084388185655, + "grad_norm": 0.509991466999054, + "learning_rate": 0.00141065897603534, + "loss": 1.8423, + "step": 1568 + }, + { + "epoch": 0.16550632911392404, + "grad_norm": 0.6060951948165894, + "learning_rate": 0.001410540101913787, + "loss": 1.8095, + "step": 1569 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.5585451126098633, + "learning_rate": 0.0014104211537754217, + "loss": 1.8162, + "step": 1570 + }, + { + "epoch": 0.16571729957805909, + "grad_norm": 0.5207708477973938, + "learning_rate": 0.001410302131633573, + "loss": 1.7882, + "step": 1571 + }, + { + "epoch": 0.16582278481012658, + "grad_norm": 0.6462551951408386, + "learning_rate": 0.0014101830355015778, + "loss": 1.8384, + "step": 1572 + }, + { + "epoch": 0.1659282700421941, + "grad_norm": 0.5696213841438293, + "learning_rate": 0.0014100638653927816, + "loss": 1.8303, + "step": 1573 + }, + { + "epoch": 0.1660337552742616, + "grad_norm": 0.5768846273422241, + "learning_rate": 0.0014099446213205378, + "loss": 1.8008, + "step": 1574 + }, + { + "epoch": 0.1661392405063291, + "grad_norm": 0.5875324010848999, + "learning_rate": 0.0014098253032982086, + "loss": 1.7948, + "step": 1575 + }, + { + "epoch": 0.16624472573839663, + "grad_norm": 0.5743832588195801, + "learning_rate": 0.0014097059113391639, + "loss": 1.8365, + "step": 1576 + }, + { + "epoch": 0.16635021097046412, + "grad_norm": 0.550788938999176, + "learning_rate": 0.0014095864454567821, + "loss": 1.8357, + "step": 1577 + }, + { + "epoch": 0.16645569620253164, + "grad_norm": 0.5809524059295654, + "learning_rate": 0.0014094669056644502, + "loss": 1.7931, + "step": 1578 + }, + { + "epoch": 0.16656118143459916, + "grad_norm": 0.5861852765083313, + "learning_rate": 0.001409347291975563, + "loss": 1.8583, + "step": 1579 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.5920469760894775, + "learning_rate": 0.001409227604403524, + "loss": 1.8051, + "step": 1580 + }, + { + "epoch": 0.16677215189873418, + "grad_norm": 0.6311987638473511, + "learning_rate": 0.0014091078429617448, + "loss": 1.8347, + "step": 1581 + }, + { + "epoch": 0.1668776371308017, + "grad_norm": 0.5912540555000305, + "learning_rate": 0.0014089880076636452, + "loss": 1.8295, + "step": 1582 + }, + { + "epoch": 0.1669831223628692, + "grad_norm": 0.7150510549545288, + "learning_rate": 0.0014088680985226533, + "loss": 1.7935, + "step": 1583 + }, + { + "epoch": 0.1670886075949367, + "grad_norm": 0.5584931969642639, + "learning_rate": 0.0014087481155522056, + "loss": 1.7976, + "step": 1584 + }, + { + "epoch": 0.16719409282700423, + "grad_norm": 0.5910683870315552, + "learning_rate": 0.0014086280587657467, + "loss": 1.8593, + "step": 1585 + }, + { + "epoch": 0.16729957805907172, + "grad_norm": 0.6416037082672119, + "learning_rate": 0.0014085079281767295, + "loss": 1.8293, + "step": 1586 + }, + { + "epoch": 0.16740506329113924, + "grad_norm": 0.5986722111701965, + "learning_rate": 0.0014083877237986153, + "loss": 1.7839, + "step": 1587 + }, + { + "epoch": 0.16751054852320676, + "grad_norm": 0.5657693147659302, + "learning_rate": 0.0014082674456448738, + "loss": 1.8057, + "step": 1588 + }, + { + "epoch": 0.16761603375527426, + "grad_norm": 0.602375328540802, + "learning_rate": 0.0014081470937289827, + "loss": 1.8304, + "step": 1589 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.5486363172531128, + "learning_rate": 0.0014080266680644277, + "loss": 1.7954, + "step": 1590 + }, + { + "epoch": 0.16782700421940927, + "grad_norm": 0.5563115477561951, + "learning_rate": 0.0014079061686647033, + "loss": 1.8058, + "step": 1591 + }, + { + "epoch": 0.1679324894514768, + "grad_norm": 0.5280571579933167, + "learning_rate": 0.0014077855955433123, + "loss": 1.7831, + "step": 1592 + }, + { + "epoch": 0.1680379746835443, + "grad_norm": 0.5289813280105591, + "learning_rate": 0.001407664948713765, + "loss": 1.8269, + "step": 1593 + }, + { + "epoch": 0.1681434599156118, + "grad_norm": 0.5660833120346069, + "learning_rate": 0.001407544228189581, + "loss": 1.8035, + "step": 1594 + }, + { + "epoch": 0.16824894514767932, + "grad_norm": 0.539189875125885, + "learning_rate": 0.0014074234339842874, + "loss": 1.8507, + "step": 1595 + }, + { + "epoch": 0.16835443037974684, + "grad_norm": 0.5721974968910217, + "learning_rate": 0.00140730256611142, + "loss": 1.8308, + "step": 1596 + }, + { + "epoch": 0.16845991561181434, + "grad_norm": 0.6498994827270508, + "learning_rate": 0.001407181624584522, + "loss": 1.8084, + "step": 1597 + }, + { + "epoch": 0.16856540084388186, + "grad_norm": 0.8458657264709473, + "learning_rate": 0.0014070606094171464, + "loss": 1.8267, + "step": 1598 + }, + { + "epoch": 0.16867088607594938, + "grad_norm": 0.9153026938438416, + "learning_rate": 0.0014069395206228528, + "loss": 1.8192, + "step": 1599 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.6467886567115784, + "learning_rate": 0.0014068183582152103, + "loss": 1.8329, + "step": 1600 + }, + { + "epoch": 0.1688818565400844, + "grad_norm": 0.6594336032867432, + "learning_rate": 0.0014066971222077955, + "loss": 1.7854, + "step": 1601 + }, + { + "epoch": 0.1689873417721519, + "grad_norm": 0.7023598551750183, + "learning_rate": 0.0014065758126141938, + "loss": 1.8647, + "step": 1602 + }, + { + "epoch": 0.1690928270042194, + "grad_norm": 0.7321967482566833, + "learning_rate": 0.0014064544294479981, + "loss": 1.8359, + "step": 1603 + }, + { + "epoch": 0.16919831223628692, + "grad_norm": 0.5909162759780884, + "learning_rate": 0.0014063329727228102, + "loss": 1.8014, + "step": 1604 + }, + { + "epoch": 0.16930379746835442, + "grad_norm": 0.6052146553993225, + "learning_rate": 0.0014062114424522397, + "loss": 1.7924, + "step": 1605 + }, + { + "epoch": 0.16940928270042194, + "grad_norm": 0.7654457092285156, + "learning_rate": 0.0014060898386499053, + "loss": 1.8448, + "step": 1606 + }, + { + "epoch": 0.16951476793248946, + "grad_norm": 0.6661126017570496, + "learning_rate": 0.0014059681613294327, + "loss": 1.8481, + "step": 1607 + }, + { + "epoch": 0.16962025316455695, + "grad_norm": 0.6050203442573547, + "learning_rate": 0.0014058464105044567, + "loss": 1.8008, + "step": 1608 + }, + { + "epoch": 0.16972573839662447, + "grad_norm": 0.6196019649505615, + "learning_rate": 0.0014057245861886201, + "loss": 1.8189, + "step": 1609 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.5569921731948853, + "learning_rate": 0.001405602688395574, + "loss": 1.8219, + "step": 1610 + }, + { + "epoch": 0.16993670886075948, + "grad_norm": 0.5919274687767029, + "learning_rate": 0.0014054807171389773, + "loss": 1.7624, + "step": 1611 + }, + { + "epoch": 0.170042194092827, + "grad_norm": 0.6358324885368347, + "learning_rate": 0.001405358672432498, + "loss": 1.742, + "step": 1612 + }, + { + "epoch": 0.17014767932489452, + "grad_norm": 0.7739889025688171, + "learning_rate": 0.0014052365542898111, + "loss": 1.7892, + "step": 1613 + }, + { + "epoch": 0.17025316455696202, + "grad_norm": 0.8285514116287231, + "learning_rate": 0.0014051143627246015, + "loss": 1.8238, + "step": 1614 + }, + { + "epoch": 0.17035864978902954, + "grad_norm": 0.6318050622940063, + "learning_rate": 0.0014049920977505608, + "loss": 1.7882, + "step": 1615 + }, + { + "epoch": 0.17046413502109706, + "grad_norm": 0.6000921130180359, + "learning_rate": 0.0014048697593813891, + "loss": 1.8084, + "step": 1616 + }, + { + "epoch": 0.17056962025316455, + "grad_norm": 0.7156769037246704, + "learning_rate": 0.0014047473476307955, + "loss": 1.8059, + "step": 1617 + }, + { + "epoch": 0.17067510548523207, + "grad_norm": 0.6543998122215271, + "learning_rate": 0.001404624862512497, + "loss": 1.7926, + "step": 1618 + }, + { + "epoch": 0.1707805907172996, + "grad_norm": 0.537227213382721, + "learning_rate": 0.001404502304040218, + "loss": 1.8032, + "step": 1619 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.62468421459198, + "learning_rate": 0.0014043796722276924, + "loss": 1.8218, + "step": 1620 + }, + { + "epoch": 0.1709915611814346, + "grad_norm": 0.6448827981948853, + "learning_rate": 0.0014042569670886615, + "loss": 1.7959, + "step": 1621 + }, + { + "epoch": 0.1710970464135021, + "grad_norm": 0.5734566450119019, + "learning_rate": 0.0014041341886368752, + "loss": 1.847, + "step": 1622 + }, + { + "epoch": 0.17120253164556962, + "grad_norm": 0.6323956251144409, + "learning_rate": 0.0014040113368860908, + "loss": 1.8117, + "step": 1623 + }, + { + "epoch": 0.17130801687763714, + "grad_norm": 0.740659773349762, + "learning_rate": 0.0014038884118500754, + "loss": 1.7877, + "step": 1624 + }, + { + "epoch": 0.17141350210970463, + "grad_norm": 0.679746150970459, + "learning_rate": 0.0014037654135426025, + "loss": 1.8465, + "step": 1625 + }, + { + "epoch": 0.17151898734177215, + "grad_norm": 0.6029236316680908, + "learning_rate": 0.0014036423419774551, + "loss": 1.7682, + "step": 1626 + }, + { + "epoch": 0.17162447257383967, + "grad_norm": 0.5398522019386292, + "learning_rate": 0.0014035191971684242, + "loss": 1.7786, + "step": 1627 + }, + { + "epoch": 0.17172995780590716, + "grad_norm": 0.7066488265991211, + "learning_rate": 0.0014033959791293082, + "loss": 1.7974, + "step": 1628 + }, + { + "epoch": 0.17183544303797468, + "grad_norm": 0.6859533786773682, + "learning_rate": 0.0014032726878739148, + "loss": 1.8197, + "step": 1629 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.6189306974411011, + "learning_rate": 0.0014031493234160591, + "loss": 1.7848, + "step": 1630 + }, + { + "epoch": 0.1720464135021097, + "grad_norm": 0.6533510684967041, + "learning_rate": 0.001403025885769565, + "loss": 1.8182, + "step": 1631 + }, + { + "epoch": 0.17215189873417722, + "grad_norm": 0.5356724262237549, + "learning_rate": 0.001402902374948264, + "loss": 1.8045, + "step": 1632 + }, + { + "epoch": 0.17225738396624474, + "grad_norm": 0.7835818529129028, + "learning_rate": 0.0014027787909659962, + "loss": 1.808, + "step": 1633 + }, + { + "epoch": 0.17236286919831223, + "grad_norm": 0.6086776256561279, + "learning_rate": 0.0014026551338366098, + "loss": 1.8033, + "step": 1634 + }, + { + "epoch": 0.17246835443037975, + "grad_norm": 0.5822385549545288, + "learning_rate": 0.0014025314035739614, + "loss": 1.7842, + "step": 1635 + }, + { + "epoch": 0.17257383966244727, + "grad_norm": 0.624055027961731, + "learning_rate": 0.001402407600191915, + "loss": 1.8039, + "step": 1636 + }, + { + "epoch": 0.17267932489451476, + "grad_norm": 0.5241172909736633, + "learning_rate": 0.0014022837237043441, + "loss": 1.8045, + "step": 1637 + }, + { + "epoch": 0.17278481012658228, + "grad_norm": 0.747661292552948, + "learning_rate": 0.0014021597741251295, + "loss": 1.816, + "step": 1638 + }, + { + "epoch": 0.17289029535864978, + "grad_norm": 0.9853229522705078, + "learning_rate": 0.00140203575146816, + "loss": 1.8094, + "step": 1639 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.8489435911178589, + "learning_rate": 0.0014019116557473332, + "loss": 1.782, + "step": 1640 + }, + { + "epoch": 0.17310126582278482, + "grad_norm": 0.5595011115074158, + "learning_rate": 0.0014017874869765548, + "loss": 1.7708, + "step": 1641 + }, + { + "epoch": 0.1732067510548523, + "grad_norm": 0.6350626945495605, + "learning_rate": 0.0014016632451697383, + "loss": 1.7729, + "step": 1642 + }, + { + "epoch": 0.17331223628691983, + "grad_norm": 0.7759594321250916, + "learning_rate": 0.0014015389303408058, + "loss": 1.7875, + "step": 1643 + }, + { + "epoch": 0.17341772151898735, + "grad_norm": 0.6048181056976318, + "learning_rate": 0.001401414542503687, + "loss": 1.7886, + "step": 1644 + }, + { + "epoch": 0.17352320675105484, + "grad_norm": 0.6205911040306091, + "learning_rate": 0.001401290081672321, + "loss": 1.8047, + "step": 1645 + }, + { + "epoch": 0.17362869198312236, + "grad_norm": 0.7872011065483093, + "learning_rate": 0.0014011655478606531, + "loss": 1.8022, + "step": 1646 + }, + { + "epoch": 0.17373417721518988, + "grad_norm": 1.0108978748321533, + "learning_rate": 0.001401040941082639, + "loss": 1.8068, + "step": 1647 + }, + { + "epoch": 0.17383966244725738, + "grad_norm": 0.6799403429031372, + "learning_rate": 0.001400916261352241, + "loss": 1.7882, + "step": 1648 + }, + { + "epoch": 0.1739451476793249, + "grad_norm": 0.7802788615226746, + "learning_rate": 0.00140079150868343, + "loss": 1.8354, + "step": 1649 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.2438887357711792, + "learning_rate": 0.0014006666830901854, + "loss": 1.8283, + "step": 1650 + }, + { + "epoch": 0.1741561181434599, + "grad_norm": 0.5394556522369385, + "learning_rate": 0.0014005417845864945, + "loss": 1.8025, + "step": 1651 + }, + { + "epoch": 0.17426160337552743, + "grad_norm": 1.043115496635437, + "learning_rate": 0.0014004168131863525, + "loss": 1.7884, + "step": 1652 + }, + { + "epoch": 0.17436708860759495, + "grad_norm": 0.8469311594963074, + "learning_rate": 0.0014002917689037637, + "loss": 1.7705, + "step": 1653 + }, + { + "epoch": 0.17447257383966244, + "grad_norm": 0.5756351947784424, + "learning_rate": 0.0014001666517527392, + "loss": 1.8183, + "step": 1654 + }, + { + "epoch": 0.17457805907172996, + "grad_norm": 0.8417293429374695, + "learning_rate": 0.0014000414617472996, + "loss": 1.8109, + "step": 1655 + }, + { + "epoch": 0.17468354430379746, + "grad_norm": 0.5764300227165222, + "learning_rate": 0.0013999161989014725, + "loss": 1.8345, + "step": 1656 + }, + { + "epoch": 0.17478902953586498, + "grad_norm": 0.7161532044410706, + "learning_rate": 0.0013997908632292948, + "loss": 1.7834, + "step": 1657 + }, + { + "epoch": 0.1748945147679325, + "grad_norm": 0.8226361274719238, + "learning_rate": 0.0013996654547448106, + "loss": 1.7965, + "step": 1658 + }, + { + "epoch": 0.175, + "grad_norm": 0.5380200743675232, + "learning_rate": 0.0013995399734620729, + "loss": 1.7869, + "step": 1659 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.5697001814842224, + "learning_rate": 0.001399414419395142, + "loss": 1.7873, + "step": 1660 + }, + { + "epoch": 0.17521097046413503, + "grad_norm": 0.5878713130950928, + "learning_rate": 0.0013992887925580874, + "loss": 1.8308, + "step": 1661 + }, + { + "epoch": 0.17531645569620252, + "grad_norm": 0.5282768607139587, + "learning_rate": 0.0013991630929649857, + "loss": 1.7899, + "step": 1662 + }, + { + "epoch": 0.17542194092827004, + "grad_norm": 0.5997141599655151, + "learning_rate": 0.0013990373206299225, + "loss": 1.7909, + "step": 1663 + }, + { + "epoch": 0.17552742616033756, + "grad_norm": 0.5057167410850525, + "learning_rate": 0.0013989114755669912, + "loss": 1.7947, + "step": 1664 + }, + { + "epoch": 0.17563291139240506, + "grad_norm": 0.7179067134857178, + "learning_rate": 0.001398785557790293, + "loss": 1.8057, + "step": 1665 + }, + { + "epoch": 0.17573839662447258, + "grad_norm": 0.5524627566337585, + "learning_rate": 0.0013986595673139382, + "loss": 1.8325, + "step": 1666 + }, + { + "epoch": 0.1758438818565401, + "grad_norm": 0.6813000440597534, + "learning_rate": 0.0013985335041520443, + "loss": 1.8086, + "step": 1667 + }, + { + "epoch": 0.1759493670886076, + "grad_norm": 0.7705308198928833, + "learning_rate": 0.0013984073683187374, + "loss": 1.8267, + "step": 1668 + }, + { + "epoch": 0.1760548523206751, + "grad_norm": 0.5884032249450684, + "learning_rate": 0.0013982811598281517, + "loss": 1.7835, + "step": 1669 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.6554465889930725, + "learning_rate": 0.0013981548786944293, + "loss": 1.7508, + "step": 1670 + }, + { + "epoch": 0.17626582278481012, + "grad_norm": 0.5755608677864075, + "learning_rate": 0.0013980285249317209, + "loss": 1.7517, + "step": 1671 + }, + { + "epoch": 0.17637130801687764, + "grad_norm": 0.5823730230331421, + "learning_rate": 0.0013979020985541847, + "loss": 1.7882, + "step": 1672 + }, + { + "epoch": 0.17647679324894514, + "grad_norm": 0.5076411366462708, + "learning_rate": 0.0013977755995759876, + "loss": 1.8032, + "step": 1673 + }, + { + "epoch": 0.17658227848101266, + "grad_norm": 0.6053689122200012, + "learning_rate": 0.0013976490280113048, + "loss": 1.8067, + "step": 1674 + }, + { + "epoch": 0.17668776371308018, + "grad_norm": 0.4807142913341522, + "learning_rate": 0.0013975223838743188, + "loss": 1.7747, + "step": 1675 + }, + { + "epoch": 0.17679324894514767, + "grad_norm": 0.6027363538742065, + "learning_rate": 0.0013973956671792206, + "loss": 1.8062, + "step": 1676 + }, + { + "epoch": 0.1768987341772152, + "grad_norm": 0.5467568635940552, + "learning_rate": 0.00139726887794021, + "loss": 1.7488, + "step": 1677 + }, + { + "epoch": 0.1770042194092827, + "grad_norm": 0.6148130893707275, + "learning_rate": 0.001397142016171494, + "loss": 1.8228, + "step": 1678 + }, + { + "epoch": 0.1771097046413502, + "grad_norm": 0.613949716091156, + "learning_rate": 0.0013970150818872881, + "loss": 1.7669, + "step": 1679 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.5977597832679749, + "learning_rate": 0.0013968880751018158, + "loss": 1.7915, + "step": 1680 + }, + { + "epoch": 0.17732067510548524, + "grad_norm": 0.6925604939460754, + "learning_rate": 0.0013967609958293091, + "loss": 1.7787, + "step": 1681 + }, + { + "epoch": 0.17742616033755274, + "grad_norm": 0.6686297655105591, + "learning_rate": 0.001396633844084008, + "loss": 1.7737, + "step": 1682 + }, + { + "epoch": 0.17753164556962026, + "grad_norm": 0.6333765983581543, + "learning_rate": 0.00139650661988016, + "loss": 1.8314, + "step": 1683 + }, + { + "epoch": 0.17763713080168778, + "grad_norm": 0.7208518981933594, + "learning_rate": 0.0013963793232320216, + "loss": 1.8254, + "step": 1684 + }, + { + "epoch": 0.17774261603375527, + "grad_norm": 0.7514510154724121, + "learning_rate": 0.0013962519541538569, + "loss": 1.7537, + "step": 1685 + }, + { + "epoch": 0.1778481012658228, + "grad_norm": 0.6445907354354858, + "learning_rate": 0.001396124512659938, + "loss": 1.7444, + "step": 1686 + }, + { + "epoch": 0.17795358649789028, + "grad_norm": 0.6094274520874023, + "learning_rate": 0.001395996998764546, + "loss": 1.8022, + "step": 1687 + }, + { + "epoch": 0.1780590717299578, + "grad_norm": 0.5557835698127747, + "learning_rate": 0.0013958694124819688, + "loss": 1.7345, + "step": 1688 + }, + { + "epoch": 0.17816455696202532, + "grad_norm": 0.6289921402931213, + "learning_rate": 0.0013957417538265032, + "loss": 1.8146, + "step": 1689 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.5010189414024353, + "learning_rate": 0.0013956140228124545, + "loss": 1.7747, + "step": 1690 + }, + { + "epoch": 0.17837552742616034, + "grad_norm": 0.6107930541038513, + "learning_rate": 0.001395486219454135, + "loss": 1.8379, + "step": 1691 + }, + { + "epoch": 0.17848101265822786, + "grad_norm": 0.6100829839706421, + "learning_rate": 0.0013953583437658658, + "loss": 1.8, + "step": 1692 + }, + { + "epoch": 0.17858649789029535, + "grad_norm": 0.5703754425048828, + "learning_rate": 0.0013952303957619763, + "loss": 1.7705, + "step": 1693 + }, + { + "epoch": 0.17869198312236287, + "grad_norm": 0.8051835298538208, + "learning_rate": 0.0013951023754568035, + "loss": 1.7871, + "step": 1694 + }, + { + "epoch": 0.1787974683544304, + "grad_norm": 0.8862444758415222, + "learning_rate": 0.001394974282864693, + "loss": 1.7774, + "step": 1695 + }, + { + "epoch": 0.17890295358649788, + "grad_norm": 0.6193156838417053, + "learning_rate": 0.0013948461179999977, + "loss": 1.7817, + "step": 1696 + }, + { + "epoch": 0.1790084388185654, + "grad_norm": 0.7612337470054626, + "learning_rate": 0.0013947178808770794, + "loss": 1.792, + "step": 1697 + }, + { + "epoch": 0.17911392405063292, + "grad_norm": 0.9321572780609131, + "learning_rate": 0.0013945895715103077, + "loss": 1.8117, + "step": 1698 + }, + { + "epoch": 0.17921940928270041, + "grad_norm": 0.7185301780700684, + "learning_rate": 0.0013944611899140604, + "loss": 1.8466, + "step": 1699 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.7152723073959351, + "learning_rate": 0.0013943327361027231, + "loss": 1.8217, + "step": 1700 + }, + { + "epoch": 0.17943037974683546, + "grad_norm": 0.8954642415046692, + "learning_rate": 0.0013942042100906899, + "loss": 1.7886, + "step": 1701 + }, + { + "epoch": 0.17953586497890295, + "grad_norm": 0.6695270538330078, + "learning_rate": 0.0013940756118923626, + "loss": 1.786, + "step": 1702 + }, + { + "epoch": 0.17964135021097047, + "grad_norm": 0.6731200814247131, + "learning_rate": 0.0013939469415221513, + "loss": 1.7528, + "step": 1703 + }, + { + "epoch": 0.17974683544303796, + "grad_norm": 0.9849134087562561, + "learning_rate": 0.0013938181989944741, + "loss": 1.7855, + "step": 1704 + }, + { + "epoch": 0.17985232067510548, + "grad_norm": 0.6738976240158081, + "learning_rate": 0.0013936893843237573, + "loss": 1.7896, + "step": 1705 + }, + { + "epoch": 0.179957805907173, + "grad_norm": 0.713855504989624, + "learning_rate": 0.0013935604975244356, + "loss": 1.8163, + "step": 1706 + }, + { + "epoch": 0.1800632911392405, + "grad_norm": 0.8884819149971008, + "learning_rate": 0.0013934315386109509, + "loss": 1.7617, + "step": 1707 + }, + { + "epoch": 0.18016877637130801, + "grad_norm": 0.5172052979469299, + "learning_rate": 0.0013933025075977539, + "loss": 1.7589, + "step": 1708 + }, + { + "epoch": 0.18027426160337554, + "grad_norm": 0.8214863538742065, + "learning_rate": 0.0013931734044993031, + "loss": 1.7748, + "step": 1709 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.6871063709259033, + "learning_rate": 0.0013930442293300649, + "loss": 1.8173, + "step": 1710 + }, + { + "epoch": 0.18048523206751055, + "grad_norm": 0.6157609224319458, + "learning_rate": 0.0013929149821045148, + "loss": 1.8031, + "step": 1711 + }, + { + "epoch": 0.18059071729957807, + "grad_norm": 0.7256579995155334, + "learning_rate": 0.0013927856628371347, + "loss": 1.8167, + "step": 1712 + }, + { + "epoch": 0.18069620253164556, + "grad_norm": 0.5969768762588501, + "learning_rate": 0.0013926562715424159, + "loss": 1.7697, + "step": 1713 + }, + { + "epoch": 0.18080168776371308, + "grad_norm": 0.5290868878364563, + "learning_rate": 0.0013925268082348576, + "loss": 1.7701, + "step": 1714 + }, + { + "epoch": 0.1809071729957806, + "grad_norm": 0.6327504515647888, + "learning_rate": 0.0013923972729289662, + "loss": 1.7564, + "step": 1715 + }, + { + "epoch": 0.1810126582278481, + "grad_norm": 0.5828287601470947, + "learning_rate": 0.0013922676656392572, + "loss": 1.7765, + "step": 1716 + }, + { + "epoch": 0.18111814345991561, + "grad_norm": 0.5571871399879456, + "learning_rate": 0.0013921379863802536, + "loss": 1.7451, + "step": 1717 + }, + { + "epoch": 0.18122362869198314, + "grad_norm": 0.5470030307769775, + "learning_rate": 0.0013920082351664867, + "loss": 1.7797, + "step": 1718 + }, + { + "epoch": 0.18132911392405063, + "grad_norm": 0.526543915271759, + "learning_rate": 0.0013918784120124956, + "loss": 1.7763, + "step": 1719 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.5570376515388489, + "learning_rate": 0.0013917485169328279, + "loss": 1.8064, + "step": 1720 + }, + { + "epoch": 0.18154008438818564, + "grad_norm": 0.657335638999939, + "learning_rate": 0.0013916185499420386, + "loss": 1.7746, + "step": 1721 + }, + { + "epoch": 0.18164556962025316, + "grad_norm": 0.5946418046951294, + "learning_rate": 0.0013914885110546916, + "loss": 1.7713, + "step": 1722 + }, + { + "epoch": 0.18175105485232068, + "grad_norm": 0.5829986333847046, + "learning_rate": 0.001391358400285358, + "loss": 1.7687, + "step": 1723 + }, + { + "epoch": 0.18185654008438817, + "grad_norm": 0.6519981026649475, + "learning_rate": 0.0013912282176486177, + "loss": 1.7666, + "step": 1724 + }, + { + "epoch": 0.1819620253164557, + "grad_norm": 0.7232199311256409, + "learning_rate": 0.0013910979631590581, + "loss": 1.7654, + "step": 1725 + }, + { + "epoch": 0.18206751054852321, + "grad_norm": 0.5208910703659058, + "learning_rate": 0.001390967636831275, + "loss": 1.8004, + "step": 1726 + }, + { + "epoch": 0.1821729957805907, + "grad_norm": 0.7124728560447693, + "learning_rate": 0.0013908372386798717, + "loss": 1.78, + "step": 1727 + }, + { + "epoch": 0.18227848101265823, + "grad_norm": 0.6304678320884705, + "learning_rate": 0.0013907067687194607, + "loss": 1.774, + "step": 1728 + }, + { + "epoch": 0.18238396624472575, + "grad_norm": 0.6204988360404968, + "learning_rate": 0.0013905762269646614, + "loss": 1.7954, + "step": 1729 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.6240030527114868, + "learning_rate": 0.0013904456134301016, + "loss": 1.814, + "step": 1730 + }, + { + "epoch": 0.18259493670886076, + "grad_norm": 0.5764158368110657, + "learning_rate": 0.001390314928130417, + "loss": 1.7665, + "step": 1731 + }, + { + "epoch": 0.18270042194092828, + "grad_norm": 0.6084049940109253, + "learning_rate": 0.0013901841710802522, + "loss": 1.7766, + "step": 1732 + }, + { + "epoch": 0.18280590717299577, + "grad_norm": 0.5706049203872681, + "learning_rate": 0.0013900533422942585, + "loss": 1.7674, + "step": 1733 + }, + { + "epoch": 0.1829113924050633, + "grad_norm": 0.5296407341957092, + "learning_rate": 0.0013899224417870963, + "loss": 1.7896, + "step": 1734 + }, + { + "epoch": 0.18301687763713081, + "grad_norm": 0.591642439365387, + "learning_rate": 0.0013897914695734336, + "loss": 1.773, + "step": 1735 + }, + { + "epoch": 0.1831223628691983, + "grad_norm": 0.6062543392181396, + "learning_rate": 0.0013896604256679462, + "loss": 1.8132, + "step": 1736 + }, + { + "epoch": 0.18322784810126583, + "grad_norm": 0.6381182074546814, + "learning_rate": 0.0013895293100853188, + "loss": 1.7713, + "step": 1737 + }, + { + "epoch": 0.18333333333333332, + "grad_norm": 0.6960541605949402, + "learning_rate": 0.001389398122840243, + "loss": 1.7864, + "step": 1738 + }, + { + "epoch": 0.18343881856540084, + "grad_norm": 0.4707389175891876, + "learning_rate": 0.0013892668639474194, + "loss": 1.8071, + "step": 1739 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.8502941131591797, + "learning_rate": 0.0013891355334215562, + "loss": 1.8028, + "step": 1740 + }, + { + "epoch": 0.18364978902953585, + "grad_norm": 0.6726117134094238, + "learning_rate": 0.001389004131277369, + "loss": 1.8077, + "step": 1741 + }, + { + "epoch": 0.18375527426160337, + "grad_norm": 0.5561811327934265, + "learning_rate": 0.0013888726575295826, + "loss": 1.7879, + "step": 1742 + }, + { + "epoch": 0.1838607594936709, + "grad_norm": 0.5997086763381958, + "learning_rate": 0.0013887411121929294, + "loss": 1.7978, + "step": 1743 + }, + { + "epoch": 0.1839662447257384, + "grad_norm": 0.6202653646469116, + "learning_rate": 0.0013886094952821496, + "loss": 1.7996, + "step": 1744 + }, + { + "epoch": 0.1840717299578059, + "grad_norm": 0.7781490087509155, + "learning_rate": 0.0013884778068119913, + "loss": 1.7924, + "step": 1745 + }, + { + "epoch": 0.18417721518987343, + "grad_norm": 0.5901492834091187, + "learning_rate": 0.0013883460467972108, + "loss": 1.8049, + "step": 1746 + }, + { + "epoch": 0.18428270042194092, + "grad_norm": 0.6508224606513977, + "learning_rate": 0.0013882142152525732, + "loss": 1.8302, + "step": 1747 + }, + { + "epoch": 0.18438818565400844, + "grad_norm": 0.6339287757873535, + "learning_rate": 0.0013880823121928498, + "loss": 1.7767, + "step": 1748 + }, + { + "epoch": 0.18449367088607596, + "grad_norm": 0.5953535437583923, + "learning_rate": 0.0013879503376328219, + "loss": 1.8207, + "step": 1749 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.7098846435546875, + "learning_rate": 0.0013878182915872776, + "loss": 1.8263, + "step": 1750 + }, + { + "epoch": 0.18470464135021097, + "grad_norm": 0.7171010971069336, + "learning_rate": 0.001387686174071013, + "loss": 1.7832, + "step": 1751 + }, + { + "epoch": 0.1848101265822785, + "grad_norm": 0.7745935916900635, + "learning_rate": 0.001387553985098833, + "loss": 1.7988, + "step": 1752 + }, + { + "epoch": 0.184915611814346, + "grad_norm": 0.7050067782402039, + "learning_rate": 0.0013874217246855499, + "loss": 1.796, + "step": 1753 + }, + { + "epoch": 0.1850210970464135, + "grad_norm": 0.540745735168457, + "learning_rate": 0.001387289392845984, + "loss": 1.7906, + "step": 1754 + }, + { + "epoch": 0.185126582278481, + "grad_norm": 0.6548086404800415, + "learning_rate": 0.0013871569895949635, + "loss": 1.7953, + "step": 1755 + }, + { + "epoch": 0.18523206751054852, + "grad_norm": 0.5859891176223755, + "learning_rate": 0.0013870245149473256, + "loss": 1.7624, + "step": 1756 + }, + { + "epoch": 0.18533755274261604, + "grad_norm": 0.5926470756530762, + "learning_rate": 0.0013868919689179143, + "loss": 1.7868, + "step": 1757 + }, + { + "epoch": 0.18544303797468353, + "grad_norm": 0.6053606867790222, + "learning_rate": 0.001386759351521582, + "loss": 1.8039, + "step": 1758 + }, + { + "epoch": 0.18554852320675105, + "grad_norm": 0.639509379863739, + "learning_rate": 0.0013866266627731892, + "loss": 1.7877, + "step": 1759 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.5954452157020569, + "learning_rate": 0.001386493902687604, + "loss": 1.7388, + "step": 1760 + }, + { + "epoch": 0.18575949367088607, + "grad_norm": 0.5272190570831299, + "learning_rate": 0.0013863610712797035, + "loss": 1.7884, + "step": 1761 + }, + { + "epoch": 0.1858649789029536, + "grad_norm": 0.5959291458129883, + "learning_rate": 0.0013862281685643716, + "loss": 1.8074, + "step": 1762 + }, + { + "epoch": 0.1859704641350211, + "grad_norm": 0.5910160541534424, + "learning_rate": 0.001386095194556501, + "loss": 1.7776, + "step": 1763 + }, + { + "epoch": 0.1860759493670886, + "grad_norm": 0.5868355631828308, + "learning_rate": 0.001385962149270992, + "loss": 1.7694, + "step": 1764 + }, + { + "epoch": 0.18618143459915612, + "grad_norm": 0.5169520974159241, + "learning_rate": 0.001385829032722753, + "loss": 1.7854, + "step": 1765 + }, + { + "epoch": 0.18628691983122364, + "grad_norm": 0.7021543383598328, + "learning_rate": 0.0013856958449267002, + "loss": 1.7969, + "step": 1766 + }, + { + "epoch": 0.18639240506329113, + "grad_norm": 0.7602433562278748, + "learning_rate": 0.0013855625858977584, + "loss": 1.8045, + "step": 1767 + }, + { + "epoch": 0.18649789029535865, + "grad_norm": 0.5901639461517334, + "learning_rate": 0.0013854292556508593, + "loss": 1.7619, + "step": 1768 + }, + { + "epoch": 0.18660337552742617, + "grad_norm": 0.5687635540962219, + "learning_rate": 0.0013852958542009438, + "loss": 1.7948, + "step": 1769 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.5289777517318726, + "learning_rate": 0.00138516238156296, + "loss": 1.7956, + "step": 1770 + }, + { + "epoch": 0.1868143459915612, + "grad_norm": 0.5443355441093445, + "learning_rate": 0.001385028837751864, + "loss": 1.7924, + "step": 1771 + }, + { + "epoch": 0.18691983122362868, + "grad_norm": 0.5804268717765808, + "learning_rate": 0.0013848952227826202, + "loss": 1.7887, + "step": 1772 + }, + { + "epoch": 0.1870253164556962, + "grad_norm": 0.5772016644477844, + "learning_rate": 0.0013847615366702009, + "loss": 1.7712, + "step": 1773 + }, + { + "epoch": 0.18713080168776372, + "grad_norm": 0.5688541531562805, + "learning_rate": 0.001384627779429586, + "loss": 1.8261, + "step": 1774 + }, + { + "epoch": 0.1872362869198312, + "grad_norm": 0.5785987377166748, + "learning_rate": 0.0013844939510757642, + "loss": 1.8267, + "step": 1775 + }, + { + "epoch": 0.18734177215189873, + "grad_norm": 0.7333600521087646, + "learning_rate": 0.0013843600516237312, + "loss": 1.7937, + "step": 1776 + }, + { + "epoch": 0.18744725738396625, + "grad_norm": 0.7385758757591248, + "learning_rate": 0.001384226081088491, + "loss": 1.7716, + "step": 1777 + }, + { + "epoch": 0.18755274261603375, + "grad_norm": 0.5297675728797913, + "learning_rate": 0.001384092039485056, + "loss": 1.7255, + "step": 1778 + }, + { + "epoch": 0.18765822784810127, + "grad_norm": 0.707962155342102, + "learning_rate": 0.0013839579268284461, + "loss": 1.7799, + "step": 1779 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.7870702743530273, + "learning_rate": 0.0013838237431336895, + "loss": 1.7832, + "step": 1780 + }, + { + "epoch": 0.18786919831223628, + "grad_norm": 0.5111426115036011, + "learning_rate": 0.0013836894884158217, + "loss": 1.7615, + "step": 1781 + }, + { + "epoch": 0.1879746835443038, + "grad_norm": 0.6857714056968689, + "learning_rate": 0.001383555162689887, + "loss": 1.8396, + "step": 1782 + }, + { + "epoch": 0.18808016877637132, + "grad_norm": 0.6859596967697144, + "learning_rate": 0.001383420765970937, + "loss": 1.7882, + "step": 1783 + }, + { + "epoch": 0.1881856540084388, + "grad_norm": 0.5325198173522949, + "learning_rate": 0.0013832862982740318, + "loss": 1.7624, + "step": 1784 + }, + { + "epoch": 0.18829113924050633, + "grad_norm": 0.8604696393013, + "learning_rate": 0.001383151759614239, + "loss": 1.7915, + "step": 1785 + }, + { + "epoch": 0.18839662447257383, + "grad_norm": 0.9610689282417297, + "learning_rate": 0.0013830171500066343, + "loss": 1.7618, + "step": 1786 + }, + { + "epoch": 0.18850210970464135, + "grad_norm": 0.6126101613044739, + "learning_rate": 0.0013828824694663013, + "loss": 1.7792, + "step": 1787 + }, + { + "epoch": 0.18860759493670887, + "grad_norm": 0.5730456113815308, + "learning_rate": 0.001382747718008332, + "loss": 1.8004, + "step": 1788 + }, + { + "epoch": 0.18871308016877636, + "grad_norm": 0.6982443332672119, + "learning_rate": 0.0013826128956478255, + "loss": 1.7789, + "step": 1789 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.5206103324890137, + "learning_rate": 0.0013824780023998899, + "loss": 1.7696, + "step": 1790 + }, + { + "epoch": 0.1889240506329114, + "grad_norm": 0.6944553256034851, + "learning_rate": 0.0013823430382796402, + "loss": 1.7547, + "step": 1791 + }, + { + "epoch": 0.1890295358649789, + "grad_norm": 0.6838858723640442, + "learning_rate": 0.0013822080033021997, + "loss": 1.7759, + "step": 1792 + }, + { + "epoch": 0.1891350210970464, + "grad_norm": 0.5321121215820312, + "learning_rate": 0.0013820728974827, + "loss": 1.7897, + "step": 1793 + }, + { + "epoch": 0.18924050632911393, + "grad_norm": 0.8141621947288513, + "learning_rate": 0.0013819377208362806, + "loss": 1.8483, + "step": 1794 + }, + { + "epoch": 0.18934599156118143, + "grad_norm": 0.7290375232696533, + "learning_rate": 0.0013818024733780881, + "loss": 1.7712, + "step": 1795 + }, + { + "epoch": 0.18945147679324895, + "grad_norm": 0.5447221994400024, + "learning_rate": 0.0013816671551232782, + "loss": 1.8102, + "step": 1796 + }, + { + "epoch": 0.18955696202531647, + "grad_norm": 0.5593889355659485, + "learning_rate": 0.0013815317660870138, + "loss": 1.7619, + "step": 1797 + }, + { + "epoch": 0.18966244725738396, + "grad_norm": 0.5710352659225464, + "learning_rate": 0.001381396306284466, + "loss": 1.7733, + "step": 1798 + }, + { + "epoch": 0.18976793248945148, + "grad_norm": 0.5737835764884949, + "learning_rate": 0.0013812607757308134, + "loss": 1.775, + "step": 1799 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.5465986728668213, + "learning_rate": 0.0013811251744412431, + "loss": 1.7589, + "step": 1800 + }, + { + "epoch": 0.1899789029535865, + "grad_norm": 0.5604190230369568, + "learning_rate": 0.0013809895024309501, + "loss": 1.7209, + "step": 1801 + }, + { + "epoch": 0.190084388185654, + "grad_norm": 0.5953351259231567, + "learning_rate": 0.001380853759715137, + "loss": 1.7587, + "step": 1802 + }, + { + "epoch": 0.1901898734177215, + "grad_norm": 0.6120489835739136, + "learning_rate": 0.0013807179463090143, + "loss": 1.788, + "step": 1803 + }, + { + "epoch": 0.19029535864978903, + "grad_norm": 0.5351553559303284, + "learning_rate": 0.0013805820622278008, + "loss": 1.7748, + "step": 1804 + }, + { + "epoch": 0.19040084388185655, + "grad_norm": 0.6324647665023804, + "learning_rate": 0.0013804461074867227, + "loss": 1.7669, + "step": 1805 + }, + { + "epoch": 0.19050632911392404, + "grad_norm": 0.7690573930740356, + "learning_rate": 0.0013803100821010146, + "loss": 1.7722, + "step": 1806 + }, + { + "epoch": 0.19061181434599156, + "grad_norm": 0.7233635187149048, + "learning_rate": 0.0013801739860859188, + "loss": 1.7753, + "step": 1807 + }, + { + "epoch": 0.19071729957805908, + "grad_norm": 0.5191764831542969, + "learning_rate": 0.0013800378194566856, + "loss": 1.7689, + "step": 1808 + }, + { + "epoch": 0.19082278481012657, + "grad_norm": 0.9026839733123779, + "learning_rate": 0.001379901582228573, + "loss": 1.8158, + "step": 1809 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.6849492192268372, + "learning_rate": 0.0013797652744168473, + "loss": 1.7586, + "step": 1810 + }, + { + "epoch": 0.1910337552742616, + "grad_norm": 0.7147797346115112, + "learning_rate": 0.0013796288960367822, + "loss": 1.7291, + "step": 1811 + }, + { + "epoch": 0.1911392405063291, + "grad_norm": 0.8673971891403198, + "learning_rate": 0.0013794924471036596, + "loss": 1.7825, + "step": 1812 + }, + { + "epoch": 0.19124472573839663, + "grad_norm": 0.6884671449661255, + "learning_rate": 0.0013793559276327695, + "loss": 1.7768, + "step": 1813 + }, + { + "epoch": 0.19135021097046415, + "grad_norm": 0.7091897130012512, + "learning_rate": 0.0013792193376394094, + "loss": 1.7431, + "step": 1814 + }, + { + "epoch": 0.19145569620253164, + "grad_norm": 0.8868871927261353, + "learning_rate": 0.001379082677138885, + "loss": 1.7665, + "step": 1815 + }, + { + "epoch": 0.19156118143459916, + "grad_norm": 0.734649658203125, + "learning_rate": 0.0013789459461465096, + "loss": 1.7752, + "step": 1816 + }, + { + "epoch": 0.19166666666666668, + "grad_norm": 0.5562379956245422, + "learning_rate": 0.001378809144677605, + "loss": 1.7453, + "step": 1817 + }, + { + "epoch": 0.19177215189873417, + "grad_norm": 0.7264087796211243, + "learning_rate": 0.0013786722727474998, + "loss": 1.7632, + "step": 1818 + }, + { + "epoch": 0.1918776371308017, + "grad_norm": 0.5560337901115417, + "learning_rate": 0.0013785353303715317, + "loss": 1.7873, + "step": 1819 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.623801052570343, + "learning_rate": 0.0013783983175650457, + "loss": 1.8281, + "step": 1820 + }, + { + "epoch": 0.1920886075949367, + "grad_norm": 0.672507643699646, + "learning_rate": 0.001378261234343395, + "loss": 1.7599, + "step": 1821 + }, + { + "epoch": 0.19219409282700423, + "grad_norm": 0.6043681502342224, + "learning_rate": 0.0013781240807219399, + "loss": 1.7985, + "step": 1822 + }, + { + "epoch": 0.19229957805907172, + "grad_norm": 0.5036561489105225, + "learning_rate": 0.0013779868567160495, + "loss": 1.7912, + "step": 1823 + }, + { + "epoch": 0.19240506329113924, + "grad_norm": 0.5391710996627808, + "learning_rate": 0.0013778495623411008, + "loss": 1.7814, + "step": 1824 + }, + { + "epoch": 0.19251054852320676, + "grad_norm": 0.6079800724983215, + "learning_rate": 0.0013777121976124775, + "loss": 1.8165, + "step": 1825 + }, + { + "epoch": 0.19261603375527425, + "grad_norm": 0.719024658203125, + "learning_rate": 0.0013775747625455724, + "loss": 1.7624, + "step": 1826 + }, + { + "epoch": 0.19272151898734177, + "grad_norm": 0.7662105560302734, + "learning_rate": 0.0013774372571557856, + "loss": 1.7554, + "step": 1827 + }, + { + "epoch": 0.1928270042194093, + "grad_norm": 0.5492411851882935, + "learning_rate": 0.0013772996814585261, + "loss": 1.7164, + "step": 1828 + }, + { + "epoch": 0.19293248945147679, + "grad_norm": 0.6055081486701965, + "learning_rate": 0.0013771620354692087, + "loss": 1.7432, + "step": 1829 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.7086941599845886, + "learning_rate": 0.0013770243192032581, + "loss": 1.745, + "step": 1830 + }, + { + "epoch": 0.19314345991561183, + "grad_norm": 0.5818771719932556, + "learning_rate": 0.0013768865326761058, + "loss": 1.7496, + "step": 1831 + }, + { + "epoch": 0.19324894514767932, + "grad_norm": 0.6160324215888977, + "learning_rate": 0.0013767486759031918, + "loss": 1.7704, + "step": 1832 + }, + { + "epoch": 0.19335443037974684, + "grad_norm": 0.6271533966064453, + "learning_rate": 0.0013766107488999632, + "loss": 1.8011, + "step": 1833 + }, + { + "epoch": 0.19345991561181436, + "grad_norm": 0.5272262692451477, + "learning_rate": 0.0013764727516818757, + "loss": 1.7935, + "step": 1834 + }, + { + "epoch": 0.19356540084388185, + "grad_norm": 0.6012758612632751, + "learning_rate": 0.0013763346842643927, + "loss": 1.7521, + "step": 1835 + }, + { + "epoch": 0.19367088607594937, + "grad_norm": 0.61079341173172, + "learning_rate": 0.0013761965466629847, + "loss": 1.7805, + "step": 1836 + }, + { + "epoch": 0.19377637130801686, + "grad_norm": 0.6109775304794312, + "learning_rate": 0.0013760583388931315, + "loss": 1.7381, + "step": 1837 + }, + { + "epoch": 0.19388185654008439, + "grad_norm": 0.4991675615310669, + "learning_rate": 0.0013759200609703196, + "loss": 1.7504, + "step": 1838 + }, + { + "epoch": 0.1939873417721519, + "grad_norm": 0.5478335022926331, + "learning_rate": 0.0013757817129100437, + "loss": 1.7627, + "step": 1839 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.5300714373588562, + "learning_rate": 0.0013756432947278064, + "loss": 1.7641, + "step": 1840 + }, + { + "epoch": 0.19419831223628692, + "grad_norm": 0.5412257313728333, + "learning_rate": 0.0013755048064391182, + "loss": 1.7859, + "step": 1841 + }, + { + "epoch": 0.19430379746835444, + "grad_norm": 0.5124518275260925, + "learning_rate": 0.0013753662480594973, + "loss": 1.7637, + "step": 1842 + }, + { + "epoch": 0.19440928270042193, + "grad_norm": 0.5567204356193542, + "learning_rate": 0.0013752276196044699, + "loss": 1.7387, + "step": 1843 + }, + { + "epoch": 0.19451476793248945, + "grad_norm": 0.6106987595558167, + "learning_rate": 0.0013750889210895705, + "loss": 1.8037, + "step": 1844 + }, + { + "epoch": 0.19462025316455697, + "grad_norm": 0.6206992864608765, + "learning_rate": 0.0013749501525303401, + "loss": 1.7684, + "step": 1845 + }, + { + "epoch": 0.19472573839662446, + "grad_norm": 0.5002655982971191, + "learning_rate": 0.0013748113139423288, + "loss": 1.7923, + "step": 1846 + }, + { + "epoch": 0.19483122362869199, + "grad_norm": 0.613249659538269, + "learning_rate": 0.0013746724053410944, + "loss": 1.7822, + "step": 1847 + }, + { + "epoch": 0.1949367088607595, + "grad_norm": 0.5597891807556152, + "learning_rate": 0.001374533426742202, + "loss": 1.7409, + "step": 1848 + }, + { + "epoch": 0.195042194092827, + "grad_norm": 0.6464088559150696, + "learning_rate": 0.0013743943781612251, + "loss": 1.7234, + "step": 1849 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.7889098525047302, + "learning_rate": 0.0013742552596137444, + "loss": 1.7949, + "step": 1850 + }, + { + "epoch": 0.19525316455696204, + "grad_norm": 0.6724808216094971, + "learning_rate": 0.0013741160711153492, + "loss": 1.7429, + "step": 1851 + }, + { + "epoch": 0.19535864978902953, + "grad_norm": 0.6248373985290527, + "learning_rate": 0.0013739768126816358, + "loss": 1.7806, + "step": 1852 + }, + { + "epoch": 0.19546413502109705, + "grad_norm": 0.7913411259651184, + "learning_rate": 0.0013738374843282094, + "loss": 1.7663, + "step": 1853 + }, + { + "epoch": 0.19556962025316454, + "grad_norm": 0.6367949843406677, + "learning_rate": 0.0013736980860706819, + "loss": 1.7889, + "step": 1854 + }, + { + "epoch": 0.19567510548523206, + "grad_norm": 0.576712429523468, + "learning_rate": 0.001373558617924674, + "loss": 1.7779, + "step": 1855 + }, + { + "epoch": 0.19578059071729959, + "grad_norm": 0.8742920756340027, + "learning_rate": 0.0013734190799058136, + "loss": 1.7808, + "step": 1856 + }, + { + "epoch": 0.19588607594936708, + "grad_norm": 0.6934630870819092, + "learning_rate": 0.0013732794720297367, + "loss": 1.7686, + "step": 1857 + }, + { + "epoch": 0.1959915611814346, + "grad_norm": 0.6269975900650024, + "learning_rate": 0.0013731397943120868, + "loss": 1.7709, + "step": 1858 + }, + { + "epoch": 0.19609704641350212, + "grad_norm": 0.8202118873596191, + "learning_rate": 0.001373000046768516, + "loss": 1.7678, + "step": 1859 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.6340488195419312, + "learning_rate": 0.0013728602294146833, + "loss": 1.798, + "step": 1860 + }, + { + "epoch": 0.19630801687763713, + "grad_norm": 0.619353175163269, + "learning_rate": 0.001372720342266256, + "loss": 1.772, + "step": 1861 + }, + { + "epoch": 0.19641350210970465, + "grad_norm": 0.6474510431289673, + "learning_rate": 0.001372580385338909, + "loss": 1.7628, + "step": 1862 + }, + { + "epoch": 0.19651898734177214, + "grad_norm": 0.574639618396759, + "learning_rate": 0.0013724403586483254, + "loss": 1.8326, + "step": 1863 + }, + { + "epoch": 0.19662447257383966, + "grad_norm": 0.6774442791938782, + "learning_rate": 0.001372300262210196, + "loss": 1.7532, + "step": 1864 + }, + { + "epoch": 0.19672995780590719, + "grad_norm": 0.6203790307044983, + "learning_rate": 0.001372160096040219, + "loss": 1.7509, + "step": 1865 + }, + { + "epoch": 0.19683544303797468, + "grad_norm": 0.5876252055168152, + "learning_rate": 0.001372019860154101, + "loss": 1.7394, + "step": 1866 + }, + { + "epoch": 0.1969409282700422, + "grad_norm": 0.6778364181518555, + "learning_rate": 0.001371879554567556, + "loss": 1.7903, + "step": 1867 + }, + { + "epoch": 0.19704641350210972, + "grad_norm": 0.545883297920227, + "learning_rate": 0.0013717391792963062, + "loss": 1.7443, + "step": 1868 + }, + { + "epoch": 0.1971518987341772, + "grad_norm": 0.5916625261306763, + "learning_rate": 0.0013715987343560804, + "loss": 1.7902, + "step": 1869 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.5178245902061462, + "learning_rate": 0.0013714582197626175, + "loss": 1.7978, + "step": 1870 + }, + { + "epoch": 0.19736286919831222, + "grad_norm": 0.5837519764900208, + "learning_rate": 0.001371317635531662, + "loss": 1.7321, + "step": 1871 + }, + { + "epoch": 0.19746835443037974, + "grad_norm": 0.5340811610221863, + "learning_rate": 0.001371176981678967, + "loss": 1.748, + "step": 1872 + }, + { + "epoch": 0.19757383966244726, + "grad_norm": 0.6514809131622314, + "learning_rate": 0.001371036258220294, + "loss": 1.7344, + "step": 1873 + }, + { + "epoch": 0.19767932489451476, + "grad_norm": 0.6563226580619812, + "learning_rate": 0.0013708954651714116, + "loss": 1.7549, + "step": 1874 + }, + { + "epoch": 0.19778481012658228, + "grad_norm": 0.6366662383079529, + "learning_rate": 0.0013707546025480961, + "loss": 1.7723, + "step": 1875 + }, + { + "epoch": 0.1978902953586498, + "grad_norm": 0.5344021916389465, + "learning_rate": 0.001370613670366132, + "loss": 1.7942, + "step": 1876 + }, + { + "epoch": 0.1979957805907173, + "grad_norm": 0.6046069264411926, + "learning_rate": 0.0013704726686413116, + "loss": 1.8204, + "step": 1877 + }, + { + "epoch": 0.1981012658227848, + "grad_norm": 0.6673242449760437, + "learning_rate": 0.0013703315973894346, + "loss": 1.7297, + "step": 1878 + }, + { + "epoch": 0.19820675105485233, + "grad_norm": 0.5448457598686218, + "learning_rate": 0.001370190456626309, + "loss": 1.7196, + "step": 1879 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.6330868601799011, + "learning_rate": 0.0013700492463677501, + "loss": 1.7748, + "step": 1880 + }, + { + "epoch": 0.19841772151898734, + "grad_norm": 0.6515856981277466, + "learning_rate": 0.0013699079666295811, + "loss": 1.7696, + "step": 1881 + }, + { + "epoch": 0.19852320675105486, + "grad_norm": 0.5379091501235962, + "learning_rate": 0.0013697666174276337, + "loss": 1.7499, + "step": 1882 + }, + { + "epoch": 0.19862869198312236, + "grad_norm": 0.5618151426315308, + "learning_rate": 0.001369625198777746, + "loss": 1.7624, + "step": 1883 + }, + { + "epoch": 0.19873417721518988, + "grad_norm": 0.6267819404602051, + "learning_rate": 0.0013694837106957654, + "loss": 1.7703, + "step": 1884 + }, + { + "epoch": 0.19883966244725737, + "grad_norm": 0.6945236921310425, + "learning_rate": 0.0013693421531975455, + "loss": 1.7559, + "step": 1885 + }, + { + "epoch": 0.1989451476793249, + "grad_norm": 0.735725462436676, + "learning_rate": 0.0013692005262989496, + "loss": 1.7743, + "step": 1886 + }, + { + "epoch": 0.1990506329113924, + "grad_norm": 0.6115262508392334, + "learning_rate": 0.0013690588300158467, + "loss": 1.7616, + "step": 1887 + }, + { + "epoch": 0.1991561181434599, + "grad_norm": 0.7149060964584351, + "learning_rate": 0.001368917064364115, + "loss": 1.7933, + "step": 1888 + }, + { + "epoch": 0.19926160337552742, + "grad_norm": 0.6979599595069885, + "learning_rate": 0.0013687752293596402, + "loss": 1.7539, + "step": 1889 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.6006383299827576, + "learning_rate": 0.0013686333250183154, + "loss": 1.7539, + "step": 1890 + }, + { + "epoch": 0.19947257383966244, + "grad_norm": 0.6221960783004761, + "learning_rate": 0.0013684913513560418, + "loss": 1.7708, + "step": 1891 + }, + { + "epoch": 0.19957805907172996, + "grad_norm": 0.7474849820137024, + "learning_rate": 0.0013683493083887282, + "loss": 1.7362, + "step": 1892 + }, + { + "epoch": 0.19968354430379748, + "grad_norm": 0.5101376175880432, + "learning_rate": 0.0013682071961322914, + "loss": 1.7095, + "step": 1893 + }, + { + "epoch": 0.19978902953586497, + "grad_norm": 0.6940047144889832, + "learning_rate": 0.0013680650146026554, + "loss": 1.766, + "step": 1894 + }, + { + "epoch": 0.1998945147679325, + "grad_norm": 0.667888343334198, + "learning_rate": 0.0013679227638157523, + "loss": 1.7938, + "step": 1895 + }, + { + "epoch": 0.2, + "grad_norm": 0.6176604628562927, + "learning_rate": 0.0013677804437875227, + "loss": 1.7605, + "step": 1896 + }, + { + "epoch": 0.2001054852320675, + "grad_norm": 0.8401072025299072, + "learning_rate": 0.0013676380545339136, + "loss": 1.7793, + "step": 1897 + }, + { + "epoch": 0.20021097046413502, + "grad_norm": 0.8186706304550171, + "learning_rate": 0.0013674955960708808, + "loss": 1.7682, + "step": 1898 + }, + { + "epoch": 0.20031645569620254, + "grad_norm": 0.6990500092506409, + "learning_rate": 0.0013673530684143874, + "loss": 1.7557, + "step": 1899 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.845535159111023, + "learning_rate": 0.001367210471580404, + "loss": 1.7893, + "step": 1900 + }, + { + "epoch": 0.20052742616033756, + "grad_norm": 0.7675896883010864, + "learning_rate": 0.0013670678055849098, + "loss": 1.7614, + "step": 1901 + }, + { + "epoch": 0.20063291139240505, + "grad_norm": 0.5801231861114502, + "learning_rate": 0.0013669250704438911, + "loss": 1.72, + "step": 1902 + }, + { + "epoch": 0.20073839662447257, + "grad_norm": 0.9061878323554993, + "learning_rate": 0.0013667822661733418, + "loss": 1.7798, + "step": 1903 + }, + { + "epoch": 0.2008438818565401, + "grad_norm": 0.6637502908706665, + "learning_rate": 0.0013666393927892642, + "loss": 1.7518, + "step": 1904 + }, + { + "epoch": 0.20094936708860758, + "grad_norm": 0.6515799760818481, + "learning_rate": 0.0013664964503076677, + "loss": 1.713, + "step": 1905 + }, + { + "epoch": 0.2010548523206751, + "grad_norm": 0.6578193306922913, + "learning_rate": 0.0013663534387445696, + "loss": 1.7951, + "step": 1906 + }, + { + "epoch": 0.20116033755274262, + "grad_norm": 0.5660399794578552, + "learning_rate": 0.0013662103581159955, + "loss": 1.7604, + "step": 1907 + }, + { + "epoch": 0.20126582278481012, + "grad_norm": 0.8487011790275574, + "learning_rate": 0.0013660672084379781, + "loss": 1.7713, + "step": 1908 + }, + { + "epoch": 0.20137130801687764, + "grad_norm": 0.5995063185691833, + "learning_rate": 0.001365923989726558, + "loss": 1.8081, + "step": 1909 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.8203365802764893, + "learning_rate": 0.0013657807019977835, + "loss": 1.7799, + "step": 1910 + }, + { + "epoch": 0.20158227848101265, + "grad_norm": 0.9505782723426819, + "learning_rate": 0.0013656373452677107, + "loss": 1.7815, + "step": 1911 + }, + { + "epoch": 0.20168776371308017, + "grad_norm": 0.6387736797332764, + "learning_rate": 0.0013654939195524038, + "loss": 1.7555, + "step": 1912 + }, + { + "epoch": 0.2017932489451477, + "grad_norm": 0.7613151669502258, + "learning_rate": 0.0013653504248679338, + "loss": 1.7402, + "step": 1913 + }, + { + "epoch": 0.20189873417721518, + "grad_norm": 1.154398798942566, + "learning_rate": 0.0013652068612303803, + "loss": 1.7783, + "step": 1914 + }, + { + "epoch": 0.2020042194092827, + "grad_norm": 0.5269138216972351, + "learning_rate": 0.0013650632286558305, + "loss": 1.7366, + "step": 1915 + }, + { + "epoch": 0.20210970464135022, + "grad_norm": 0.788766622543335, + "learning_rate": 0.001364919527160379, + "loss": 1.7711, + "step": 1916 + }, + { + "epoch": 0.20221518987341772, + "grad_norm": 0.6325126886367798, + "learning_rate": 0.001364775756760128, + "loss": 1.7428, + "step": 1917 + }, + { + "epoch": 0.20232067510548524, + "grad_norm": 0.6889649033546448, + "learning_rate": 0.0013646319174711878, + "loss": 1.7698, + "step": 1918 + }, + { + "epoch": 0.20242616033755273, + "grad_norm": 0.674340009689331, + "learning_rate": 0.0013644880093096766, + "loss": 1.8096, + "step": 1919 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.6399692296981812, + "learning_rate": 0.0013643440322917198, + "loss": 1.7463, + "step": 1920 + }, + { + "epoch": 0.20263713080168777, + "grad_norm": 0.7241089344024658, + "learning_rate": 0.0013641999864334507, + "loss": 1.779, + "step": 1921 + }, + { + "epoch": 0.20274261603375526, + "grad_norm": 0.6486551761627197, + "learning_rate": 0.0013640558717510107, + "loss": 1.7288, + "step": 1922 + }, + { + "epoch": 0.20284810126582278, + "grad_norm": 0.5946498513221741, + "learning_rate": 0.0013639116882605481, + "loss": 1.7693, + "step": 1923 + }, + { + "epoch": 0.2029535864978903, + "grad_norm": 0.5822680592536926, + "learning_rate": 0.0013637674359782196, + "loss": 1.7408, + "step": 1924 + }, + { + "epoch": 0.2030590717299578, + "grad_norm": 0.6114171147346497, + "learning_rate": 0.0013636231149201895, + "loss": 1.8033, + "step": 1925 + }, + { + "epoch": 0.20316455696202532, + "grad_norm": 0.6100791096687317, + "learning_rate": 0.0013634787251026296, + "loss": 1.744, + "step": 1926 + }, + { + "epoch": 0.20327004219409284, + "grad_norm": 0.5550673604011536, + "learning_rate": 0.0013633342665417192, + "loss": 1.7429, + "step": 1927 + }, + { + "epoch": 0.20337552742616033, + "grad_norm": 0.654039204120636, + "learning_rate": 0.0013631897392536463, + "loss": 1.7952, + "step": 1928 + }, + { + "epoch": 0.20348101265822785, + "grad_norm": 0.7353786826133728, + "learning_rate": 0.001363045143254605, + "loss": 1.7418, + "step": 1929 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.8328297734260559, + "learning_rate": 0.0013629004785607989, + "loss": 1.7828, + "step": 1930 + }, + { + "epoch": 0.20369198312236286, + "grad_norm": 0.5096027255058289, + "learning_rate": 0.0013627557451884374, + "loss": 1.6982, + "step": 1931 + }, + { + "epoch": 0.20379746835443038, + "grad_norm": 0.6868295073509216, + "learning_rate": 0.0013626109431537398, + "loss": 1.7613, + "step": 1932 + }, + { + "epoch": 0.2039029535864979, + "grad_norm": 0.5999056100845337, + "learning_rate": 0.001362466072472931, + "loss": 1.7647, + "step": 1933 + }, + { + "epoch": 0.2040084388185654, + "grad_norm": 0.6788963079452515, + "learning_rate": 0.0013623211331622448, + "loss": 1.7703, + "step": 1934 + }, + { + "epoch": 0.20411392405063292, + "grad_norm": 0.5903393030166626, + "learning_rate": 0.0013621761252379221, + "loss": 1.7421, + "step": 1935 + }, + { + "epoch": 0.2042194092827004, + "grad_norm": 0.5344558954238892, + "learning_rate": 0.0013620310487162124, + "loss": 1.7522, + "step": 1936 + }, + { + "epoch": 0.20432489451476793, + "grad_norm": 0.5978344082832336, + "learning_rate": 0.0013618859036133714, + "loss": 1.7729, + "step": 1937 + }, + { + "epoch": 0.20443037974683545, + "grad_norm": 0.5295487642288208, + "learning_rate": 0.001361740689945664, + "loss": 1.7708, + "step": 1938 + }, + { + "epoch": 0.20453586497890294, + "grad_norm": 0.6141546368598938, + "learning_rate": 0.001361595407729362, + "loss": 1.7646, + "step": 1939 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.530029833316803, + "learning_rate": 0.0013614500569807445, + "loss": 1.7215, + "step": 1940 + }, + { + "epoch": 0.20474683544303798, + "grad_norm": 0.5455735325813293, + "learning_rate": 0.0013613046377160996, + "loss": 1.7611, + "step": 1941 + }, + { + "epoch": 0.20485232067510548, + "grad_norm": 0.579321026802063, + "learning_rate": 0.0013611591499517212, + "loss": 1.7739, + "step": 1942 + }, + { + "epoch": 0.204957805907173, + "grad_norm": 0.5606583952903748, + "learning_rate": 0.001361013593703913, + "loss": 1.747, + "step": 1943 + }, + { + "epoch": 0.20506329113924052, + "grad_norm": 0.5031850934028625, + "learning_rate": 0.0013608679689889847, + "loss": 1.7499, + "step": 1944 + }, + { + "epoch": 0.205168776371308, + "grad_norm": 0.625857412815094, + "learning_rate": 0.0013607222758232546, + "loss": 1.7835, + "step": 1945 + }, + { + "epoch": 0.20527426160337553, + "grad_norm": 0.6399746537208557, + "learning_rate": 0.0013605765142230479, + "loss": 1.7218, + "step": 1946 + }, + { + "epoch": 0.20537974683544305, + "grad_norm": 0.5062942504882812, + "learning_rate": 0.0013604306842046983, + "loss": 1.7795, + "step": 1947 + }, + { + "epoch": 0.20548523206751054, + "grad_norm": 0.6031811833381653, + "learning_rate": 0.0013602847857845466, + "loss": 1.7317, + "step": 1948 + }, + { + "epoch": 0.20559071729957806, + "grad_norm": 0.6113677620887756, + "learning_rate": 0.0013601388189789414, + "loss": 1.7326, + "step": 1949 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.512611985206604, + "learning_rate": 0.0013599927838042394, + "loss": 1.727, + "step": 1950 + }, + { + "epoch": 0.20580168776371308, + "grad_norm": 0.5445234179496765, + "learning_rate": 0.0013598466802768041, + "loss": 1.7879, + "step": 1951 + }, + { + "epoch": 0.2059071729957806, + "grad_norm": 0.6479789018630981, + "learning_rate": 0.0013597005084130072, + "loss": 1.7577, + "step": 1952 + }, + { + "epoch": 0.2060126582278481, + "grad_norm": 0.5464509129524231, + "learning_rate": 0.0013595542682292281, + "loss": 1.7908, + "step": 1953 + }, + { + "epoch": 0.2061181434599156, + "grad_norm": 0.5544924139976501, + "learning_rate": 0.0013594079597418541, + "loss": 1.7086, + "step": 1954 + }, + { + "epoch": 0.20622362869198313, + "grad_norm": 0.65696120262146, + "learning_rate": 0.0013592615829672791, + "loss": 1.7623, + "step": 1955 + }, + { + "epoch": 0.20632911392405062, + "grad_norm": 0.6680405139923096, + "learning_rate": 0.0013591151379219058, + "loss": 1.7139, + "step": 1956 + }, + { + "epoch": 0.20643459915611814, + "grad_norm": 0.532789409160614, + "learning_rate": 0.0013589686246221438, + "loss": 1.7468, + "step": 1957 + }, + { + "epoch": 0.20654008438818566, + "grad_norm": 0.5314083695411682, + "learning_rate": 0.001358822043084411, + "loss": 1.7526, + "step": 1958 + }, + { + "epoch": 0.20664556962025316, + "grad_norm": 0.5546109676361084, + "learning_rate": 0.0013586753933251322, + "loss": 1.7769, + "step": 1959 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.5825533270835876, + "learning_rate": 0.0013585286753607408, + "loss": 1.7283, + "step": 1960 + }, + { + "epoch": 0.2068565400843882, + "grad_norm": 0.5872355103492737, + "learning_rate": 0.0013583818892076765, + "loss": 1.786, + "step": 1961 + }, + { + "epoch": 0.2069620253164557, + "grad_norm": 0.5355517268180847, + "learning_rate": 0.0013582350348823882, + "loss": 1.7945, + "step": 1962 + }, + { + "epoch": 0.2070675105485232, + "grad_norm": 0.5266429781913757, + "learning_rate": 0.0013580881124013312, + "loss": 1.7861, + "step": 1963 + }, + { + "epoch": 0.20717299578059073, + "grad_norm": 0.5678731799125671, + "learning_rate": 0.001357941121780969, + "loss": 1.7333, + "step": 1964 + }, + { + "epoch": 0.20727848101265822, + "grad_norm": 0.5444549918174744, + "learning_rate": 0.0013577940630377725, + "loss": 1.7565, + "step": 1965 + }, + { + "epoch": 0.20738396624472574, + "grad_norm": 0.5402362942695618, + "learning_rate": 0.0013576469361882208, + "loss": 1.7496, + "step": 1966 + }, + { + "epoch": 0.20748945147679324, + "grad_norm": 0.553633987903595, + "learning_rate": 0.0013574997412487996, + "loss": 1.7793, + "step": 1967 + }, + { + "epoch": 0.20759493670886076, + "grad_norm": 0.5375739336013794, + "learning_rate": 0.0013573524782360034, + "loss": 1.7585, + "step": 1968 + }, + { + "epoch": 0.20770042194092828, + "grad_norm": 0.5925225615501404, + "learning_rate": 0.0013572051471663332, + "loss": 1.7579, + "step": 1969 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.5640515685081482, + "learning_rate": 0.0013570577480562986, + "loss": 1.7532, + "step": 1970 + }, + { + "epoch": 0.2079113924050633, + "grad_norm": 0.6028405427932739, + "learning_rate": 0.0013569102809224162, + "loss": 1.7494, + "step": 1971 + }, + { + "epoch": 0.2080168776371308, + "grad_norm": 0.7289939522743225, + "learning_rate": 0.0013567627457812105, + "loss": 1.7122, + "step": 1972 + }, + { + "epoch": 0.2081223628691983, + "grad_norm": 0.643130362033844, + "learning_rate": 0.0013566151426492137, + "loss": 1.7601, + "step": 1973 + }, + { + "epoch": 0.20822784810126582, + "grad_norm": 0.578685462474823, + "learning_rate": 0.0013564674715429651, + "loss": 1.7409, + "step": 1974 + }, + { + "epoch": 0.20833333333333334, + "grad_norm": 0.7569277882575989, + "learning_rate": 0.0013563197324790123, + "loss": 1.7627, + "step": 1975 + }, + { + "epoch": 0.20843881856540084, + "grad_norm": 0.7590417861938477, + "learning_rate": 0.0013561719254739104, + "loss": 1.7415, + "step": 1976 + }, + { + "epoch": 0.20854430379746836, + "grad_norm": 0.5404102802276611, + "learning_rate": 0.001356024050544221, + "loss": 1.7589, + "step": 1977 + }, + { + "epoch": 0.20864978902953588, + "grad_norm": 0.5796499252319336, + "learning_rate": 0.0013558761077065154, + "loss": 1.7413, + "step": 1978 + }, + { + "epoch": 0.20875527426160337, + "grad_norm": 0.6218602061271667, + "learning_rate": 0.0013557280969773704, + "loss": 1.7221, + "step": 1979 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.572904109954834, + "learning_rate": 0.0013555800183733717, + "loss": 1.756, + "step": 1980 + }, + { + "epoch": 0.2089662447257384, + "grad_norm": 0.5852183103561401, + "learning_rate": 0.0013554318719111124, + "loss": 1.724, + "step": 1981 + }, + { + "epoch": 0.2090717299578059, + "grad_norm": 0.6322979927062988, + "learning_rate": 0.0013552836576071925, + "loss": 1.6976, + "step": 1982 + }, + { + "epoch": 0.20917721518987342, + "grad_norm": 0.5113502740859985, + "learning_rate": 0.0013551353754782211, + "loss": 1.7518, + "step": 1983 + }, + { + "epoch": 0.20928270042194091, + "grad_norm": 0.5940808057785034, + "learning_rate": 0.0013549870255408132, + "loss": 1.791, + "step": 1984 + }, + { + "epoch": 0.20938818565400844, + "grad_norm": 0.5323735475540161, + "learning_rate": 0.0013548386078115924, + "loss": 1.7405, + "step": 1985 + }, + { + "epoch": 0.20949367088607596, + "grad_norm": 0.5431329011917114, + "learning_rate": 0.0013546901223071893, + "loss": 1.7111, + "step": 1986 + }, + { + "epoch": 0.20959915611814345, + "grad_norm": 0.5403445959091187, + "learning_rate": 0.001354541569044243, + "loss": 1.7149, + "step": 1987 + }, + { + "epoch": 0.20970464135021097, + "grad_norm": 0.5520282983779907, + "learning_rate": 0.0013543929480393994, + "loss": 1.7624, + "step": 1988 + }, + { + "epoch": 0.2098101265822785, + "grad_norm": 0.5321283936500549, + "learning_rate": 0.0013542442593093122, + "loss": 1.7753, + "step": 1989 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.5443602204322815, + "learning_rate": 0.0013540955028706425, + "loss": 1.7532, + "step": 1990 + }, + { + "epoch": 0.2100210970464135, + "grad_norm": 0.5399332046508789, + "learning_rate": 0.0013539466787400598, + "loss": 1.772, + "step": 1991 + }, + { + "epoch": 0.21012658227848102, + "grad_norm": 0.5769407153129578, + "learning_rate": 0.00135379778693424, + "loss": 1.7324, + "step": 1992 + }, + { + "epoch": 0.21023206751054851, + "grad_norm": 0.5258762836456299, + "learning_rate": 0.0013536488274698672, + "loss": 1.7641, + "step": 1993 + }, + { + "epoch": 0.21033755274261604, + "grad_norm": 0.5081748366355896, + "learning_rate": 0.0013534998003636332, + "loss": 1.7089, + "step": 1994 + }, + { + "epoch": 0.21044303797468356, + "grad_norm": 0.594390332698822, + "learning_rate": 0.0013533507056322374, + "loss": 1.7732, + "step": 1995 + }, + { + "epoch": 0.21054852320675105, + "grad_norm": 0.5897009968757629, + "learning_rate": 0.0013532015432923864, + "loss": 1.7238, + "step": 1996 + }, + { + "epoch": 0.21065400843881857, + "grad_norm": 0.6679245829582214, + "learning_rate": 0.0013530523133607948, + "loss": 1.7449, + "step": 1997 + }, + { + "epoch": 0.2107594936708861, + "grad_norm": 0.6193536520004272, + "learning_rate": 0.0013529030158541842, + "loss": 1.7276, + "step": 1998 + }, + { + "epoch": 0.21086497890295358, + "grad_norm": 0.508429229259491, + "learning_rate": 0.0013527536507892844, + "loss": 1.7198, + "step": 1999 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.6152554750442505, + "learning_rate": 0.0013526042181828324, + "loss": 1.7605, + "step": 2000 + }, + { + "epoch": 0.2110759493670886, + "grad_norm": 0.676760733127594, + "learning_rate": 0.001352454718051573, + "loss": 1.7898, + "step": 2001 + }, + { + "epoch": 0.21118143459915611, + "grad_norm": 0.6129175424575806, + "learning_rate": 0.0013523051504122584, + "loss": 1.7487, + "step": 2002 + }, + { + "epoch": 0.21128691983122364, + "grad_norm": 0.5306701064109802, + "learning_rate": 0.0013521555152816481, + "loss": 1.6908, + "step": 2003 + }, + { + "epoch": 0.21139240506329113, + "grad_norm": 0.7304961085319519, + "learning_rate": 0.0013520058126765097, + "loss": 1.7465, + "step": 2004 + }, + { + "epoch": 0.21149789029535865, + "grad_norm": 0.7580407857894897, + "learning_rate": 0.0013518560426136182, + "loss": 1.7715, + "step": 2005 + }, + { + "epoch": 0.21160337552742617, + "grad_norm": 0.7002617120742798, + "learning_rate": 0.001351706205109756, + "loss": 1.7559, + "step": 2006 + }, + { + "epoch": 0.21170886075949366, + "grad_norm": 0.5726504921913147, + "learning_rate": 0.001351556300181713, + "loss": 1.7448, + "step": 2007 + }, + { + "epoch": 0.21181434599156118, + "grad_norm": 0.6280139684677124, + "learning_rate": 0.001351406327846287, + "loss": 1.7884, + "step": 2008 + }, + { + "epoch": 0.2119198312236287, + "grad_norm": 0.5667811632156372, + "learning_rate": 0.0013512562881202832, + "loss": 1.7218, + "step": 2009 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.71473228931427, + "learning_rate": 0.0013511061810205143, + "loss": 1.7289, + "step": 2010 + }, + { + "epoch": 0.21213080168776371, + "grad_norm": 0.662394106388092, + "learning_rate": 0.0013509560065638002, + "loss": 1.7537, + "step": 2011 + }, + { + "epoch": 0.21223628691983124, + "grad_norm": 0.6129971146583557, + "learning_rate": 0.001350805764766969, + "loss": 1.7468, + "step": 2012 + }, + { + "epoch": 0.21234177215189873, + "grad_norm": 0.5775983333587646, + "learning_rate": 0.0013506554556468558, + "loss": 1.7698, + "step": 2013 + }, + { + "epoch": 0.21244725738396625, + "grad_norm": 0.6310065388679504, + "learning_rate": 0.001350505079220304, + "loss": 1.7497, + "step": 2014 + }, + { + "epoch": 0.21255274261603377, + "grad_norm": 0.5896393060684204, + "learning_rate": 0.0013503546355041636, + "loss": 1.7647, + "step": 2015 + }, + { + "epoch": 0.21265822784810126, + "grad_norm": 0.6104800701141357, + "learning_rate": 0.0013502041245152924, + "loss": 1.7902, + "step": 2016 + }, + { + "epoch": 0.21276371308016878, + "grad_norm": 0.7404797077178955, + "learning_rate": 0.0013500535462705565, + "loss": 1.7543, + "step": 2017 + }, + { + "epoch": 0.21286919831223627, + "grad_norm": 0.7521002292633057, + "learning_rate": 0.0013499029007868284, + "loss": 1.7472, + "step": 2018 + }, + { + "epoch": 0.2129746835443038, + "grad_norm": 0.5480999946594238, + "learning_rate": 0.0013497521880809888, + "loss": 1.7666, + "step": 2019 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.6884995698928833, + "learning_rate": 0.001349601408169926, + "loss": 1.7334, + "step": 2020 + }, + { + "epoch": 0.2131856540084388, + "grad_norm": 0.7936210036277771, + "learning_rate": 0.0013494505610705356, + "loss": 1.7349, + "step": 2021 + }, + { + "epoch": 0.21329113924050633, + "grad_norm": 0.547008752822876, + "learning_rate": 0.0013492996467997205, + "loss": 1.7277, + "step": 2022 + }, + { + "epoch": 0.21339662447257385, + "grad_norm": 0.6721811890602112, + "learning_rate": 0.0013491486653743918, + "loss": 1.801, + "step": 2023 + }, + { + "epoch": 0.21350210970464134, + "grad_norm": 0.6267712712287903, + "learning_rate": 0.0013489976168114676, + "loss": 1.7704, + "step": 2024 + }, + { + "epoch": 0.21360759493670886, + "grad_norm": 0.587982177734375, + "learning_rate": 0.0013488465011278733, + "loss": 1.713, + "step": 2025 + }, + { + "epoch": 0.21371308016877638, + "grad_norm": 0.6106756329536438, + "learning_rate": 0.0013486953183405425, + "loss": 1.7142, + "step": 2026 + }, + { + "epoch": 0.21381856540084387, + "grad_norm": 0.6716460585594177, + "learning_rate": 0.001348544068466416, + "loss": 1.7324, + "step": 2027 + }, + { + "epoch": 0.2139240506329114, + "grad_norm": 0.6656553745269775, + "learning_rate": 0.0013483927515224418, + "loss": 1.7643, + "step": 2028 + }, + { + "epoch": 0.21402953586497891, + "grad_norm": 0.5984935164451599, + "learning_rate": 0.0013482413675255762, + "loss": 1.7654, + "step": 2029 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.6057150363922119, + "learning_rate": 0.0013480899164927823, + "loss": 1.7326, + "step": 2030 + }, + { + "epoch": 0.21424050632911393, + "grad_norm": 0.762966513633728, + "learning_rate": 0.0013479383984410305, + "loss": 1.7643, + "step": 2031 + }, + { + "epoch": 0.21434599156118145, + "grad_norm": 0.7692340612411499, + "learning_rate": 0.0013477868133873001, + "loss": 1.7442, + "step": 2032 + }, + { + "epoch": 0.21445147679324894, + "grad_norm": 0.5183887481689453, + "learning_rate": 0.0013476351613485762, + "loss": 1.7471, + "step": 2033 + }, + { + "epoch": 0.21455696202531646, + "grad_norm": 0.6118265986442566, + "learning_rate": 0.0013474834423418522, + "loss": 1.7361, + "step": 2034 + }, + { + "epoch": 0.21466244725738395, + "grad_norm": 0.501073956489563, + "learning_rate": 0.0013473316563841296, + "loss": 1.7386, + "step": 2035 + }, + { + "epoch": 0.21476793248945147, + "grad_norm": 0.6226796507835388, + "learning_rate": 0.0013471798034924158, + "loss": 1.7541, + "step": 2036 + }, + { + "epoch": 0.214873417721519, + "grad_norm": 0.622655987739563, + "learning_rate": 0.0013470278836837275, + "loss": 1.7259, + "step": 2037 + }, + { + "epoch": 0.2149789029535865, + "grad_norm": 0.5721461176872253, + "learning_rate": 0.001346875896975088, + "loss": 1.7444, + "step": 2038 + }, + { + "epoch": 0.215084388185654, + "grad_norm": 0.6422327160835266, + "learning_rate": 0.0013467238433835277, + "loss": 1.7534, + "step": 2039 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.5577677488327026, + "learning_rate": 0.0013465717229260853, + "loss": 1.7614, + "step": 2040 + }, + { + "epoch": 0.21529535864978902, + "grad_norm": 0.5834091305732727, + "learning_rate": 0.0013464195356198065, + "loss": 1.7378, + "step": 2041 + }, + { + "epoch": 0.21540084388185654, + "grad_norm": 0.5931575298309326, + "learning_rate": 0.0013462672814817445, + "loss": 1.7271, + "step": 2042 + }, + { + "epoch": 0.21550632911392406, + "grad_norm": 0.6651187539100647, + "learning_rate": 0.0013461149605289607, + "loss": 1.7558, + "step": 2043 + }, + { + "epoch": 0.21561181434599155, + "grad_norm": 0.5306116938591003, + "learning_rate": 0.001345962572778523, + "loss": 1.7766, + "step": 2044 + }, + { + "epoch": 0.21571729957805907, + "grad_norm": 0.608521044254303, + "learning_rate": 0.0013458101182475073, + "loss": 1.7249, + "step": 2045 + }, + { + "epoch": 0.2158227848101266, + "grad_norm": 0.5679751634597778, + "learning_rate": 0.0013456575969529967, + "loss": 1.7595, + "step": 2046 + }, + { + "epoch": 0.2159282700421941, + "grad_norm": 0.6724389791488647, + "learning_rate": 0.001345505008912082, + "loss": 1.7257, + "step": 2047 + }, + { + "epoch": 0.2160337552742616, + "grad_norm": 0.6972217559814453, + "learning_rate": 0.0013453523541418623, + "loss": 1.7459, + "step": 2048 + }, + { + "epoch": 0.21613924050632913, + "grad_norm": 0.6244422197341919, + "learning_rate": 0.001345199632659442, + "loss": 1.753, + "step": 2049 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.6604748368263245, + "learning_rate": 0.001345046844481935, + "loss": 1.7507, + "step": 2050 + }, + { + "epoch": 0.21635021097046414, + "grad_norm": 0.5862040519714355, + "learning_rate": 0.0013448939896264622, + "loss": 1.7613, + "step": 2051 + }, + { + "epoch": 0.21645569620253163, + "grad_norm": 0.6657237410545349, + "learning_rate": 0.001344741068110151, + "loss": 1.7676, + "step": 2052 + }, + { + "epoch": 0.21656118143459915, + "grad_norm": 0.709071159362793, + "learning_rate": 0.001344588079950138, + "loss": 1.7206, + "step": 2053 + }, + { + "epoch": 0.21666666666666667, + "grad_norm": 0.7964410781860352, + "learning_rate": 0.0013444350251635654, + "loss": 1.739, + "step": 2054 + }, + { + "epoch": 0.21677215189873417, + "grad_norm": 0.6725720763206482, + "learning_rate": 0.0013442819037675843, + "loss": 1.7503, + "step": 2055 + }, + { + "epoch": 0.2168776371308017, + "grad_norm": 0.6572409868240356, + "learning_rate": 0.0013441287157793522, + "loss": 1.7538, + "step": 2056 + }, + { + "epoch": 0.2169831223628692, + "grad_norm": 0.7820761203765869, + "learning_rate": 0.0013439754612160353, + "loss": 1.744, + "step": 2057 + }, + { + "epoch": 0.2170886075949367, + "grad_norm": 0.5306022763252258, + "learning_rate": 0.001343822140094806, + "loss": 1.7108, + "step": 2058 + }, + { + "epoch": 0.21719409282700422, + "grad_norm": 0.7173293232917786, + "learning_rate": 0.0013436687524328449, + "loss": 1.7217, + "step": 2059 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.6452112793922424, + "learning_rate": 0.0013435152982473396, + "loss": 1.7154, + "step": 2060 + }, + { + "epoch": 0.21740506329113923, + "grad_norm": 0.6097286939620972, + "learning_rate": 0.0013433617775554854, + "loss": 1.7385, + "step": 2061 + }, + { + "epoch": 0.21751054852320675, + "grad_norm": 0.559660017490387, + "learning_rate": 0.0013432081903744857, + "loss": 1.7716, + "step": 2062 + }, + { + "epoch": 0.21761603375527427, + "grad_norm": 0.6148292422294617, + "learning_rate": 0.00134305453672155, + "loss": 1.723, + "step": 2063 + }, + { + "epoch": 0.21772151898734177, + "grad_norm": 0.6278219223022461, + "learning_rate": 0.0013429008166138965, + "loss": 1.7133, + "step": 2064 + }, + { + "epoch": 0.2178270042194093, + "grad_norm": 0.6612865328788757, + "learning_rate": 0.0013427470300687498, + "loss": 1.7159, + "step": 2065 + }, + { + "epoch": 0.21793248945147678, + "grad_norm": 0.6195502877235413, + "learning_rate": 0.0013425931771033426, + "loss": 1.7672, + "step": 2066 + }, + { + "epoch": 0.2180379746835443, + "grad_norm": 0.5497522950172424, + "learning_rate": 0.0013424392577349152, + "loss": 1.7204, + "step": 2067 + }, + { + "epoch": 0.21814345991561182, + "grad_norm": 0.624890923500061, + "learning_rate": 0.001342285271980715, + "loss": 1.7578, + "step": 2068 + }, + { + "epoch": 0.2182489451476793, + "grad_norm": 0.6180700659751892, + "learning_rate": 0.0013421312198579963, + "loss": 1.7601, + "step": 2069 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.6824714541435242, + "learning_rate": 0.0013419771013840217, + "loss": 1.739, + "step": 2070 + }, + { + "epoch": 0.21845991561181435, + "grad_norm": 0.6361819505691528, + "learning_rate": 0.0013418229165760613, + "loss": 1.7676, + "step": 2071 + }, + { + "epoch": 0.21856540084388185, + "grad_norm": 1.0914024114608765, + "learning_rate": 0.001341668665451392, + "loss": 1.7485, + "step": 2072 + }, + { + "epoch": 0.21867088607594937, + "grad_norm": 0.705956757068634, + "learning_rate": 0.0013415143480272982, + "loss": 1.7606, + "step": 2073 + }, + { + "epoch": 0.2187763713080169, + "grad_norm": 0.6577872633934021, + "learning_rate": 0.0013413599643210723, + "loss": 1.7202, + "step": 2074 + }, + { + "epoch": 0.21888185654008438, + "grad_norm": 0.7620785236358643, + "learning_rate": 0.0013412055143500136, + "loss": 1.7214, + "step": 2075 + }, + { + "epoch": 0.2189873417721519, + "grad_norm": 0.5289851427078247, + "learning_rate": 0.001341050998131429, + "loss": 1.7058, + "step": 2076 + }, + { + "epoch": 0.21909282700421942, + "grad_norm": 0.6113953590393066, + "learning_rate": 0.0013408964156826327, + "loss": 1.7573, + "step": 2077 + }, + { + "epoch": 0.2191983122362869, + "grad_norm": 0.5710422396659851, + "learning_rate": 0.0013407417670209467, + "loss": 1.7305, + "step": 2078 + }, + { + "epoch": 0.21930379746835443, + "grad_norm": 0.5292906165122986, + "learning_rate": 0.0013405870521636999, + "loss": 1.7602, + "step": 2079 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.5616595149040222, + "learning_rate": 0.001340432271128229, + "loss": 1.7632, + "step": 2080 + }, + { + "epoch": 0.21951476793248945, + "grad_norm": 0.5564497113227844, + "learning_rate": 0.001340277423931878, + "loss": 1.7356, + "step": 2081 + }, + { + "epoch": 0.21962025316455697, + "grad_norm": 0.723117470741272, + "learning_rate": 0.0013401225105919982, + "loss": 1.7212, + "step": 2082 + }, + { + "epoch": 0.21972573839662446, + "grad_norm": 0.6751576066017151, + "learning_rate": 0.0013399675311259484, + "loss": 1.7088, + "step": 2083 + }, + { + "epoch": 0.21983122362869198, + "grad_norm": 0.5571282505989075, + "learning_rate": 0.0013398124855510951, + "loss": 1.734, + "step": 2084 + }, + { + "epoch": 0.2199367088607595, + "grad_norm": 0.6007402539253235, + "learning_rate": 0.0013396573738848115, + "loss": 1.7425, + "step": 2085 + }, + { + "epoch": 0.220042194092827, + "grad_norm": 0.6813772320747375, + "learning_rate": 0.001339502196144479, + "loss": 1.7012, + "step": 2086 + }, + { + "epoch": 0.2201476793248945, + "grad_norm": 0.5666372776031494, + "learning_rate": 0.0013393469523474858, + "loss": 1.7838, + "step": 2087 + }, + { + "epoch": 0.22025316455696203, + "grad_norm": 0.5901045203208923, + "learning_rate": 0.001339191642511228, + "loss": 1.6974, + "step": 2088 + }, + { + "epoch": 0.22035864978902953, + "grad_norm": 0.7004777789115906, + "learning_rate": 0.0013390362666531085, + "loss": 1.7506, + "step": 2089 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.5660896301269531, + "learning_rate": 0.0013388808247905381, + "loss": 1.7573, + "step": 2090 + }, + { + "epoch": 0.22056962025316457, + "grad_norm": 0.56268709897995, + "learning_rate": 0.0013387253169409351, + "loss": 1.74, + "step": 2091 + }, + { + "epoch": 0.22067510548523206, + "grad_norm": 0.589118242263794, + "learning_rate": 0.0013385697431217247, + "loss": 1.7473, + "step": 2092 + }, + { + "epoch": 0.22078059071729958, + "grad_norm": 0.5403873920440674, + "learning_rate": 0.0013384141033503394, + "loss": 1.7321, + "step": 2093 + }, + { + "epoch": 0.2208860759493671, + "grad_norm": 0.6536214351654053, + "learning_rate": 0.0013382583976442198, + "loss": 1.7376, + "step": 2094 + }, + { + "epoch": 0.2209915611814346, + "grad_norm": 0.5511201620101929, + "learning_rate": 0.0013381026260208136, + "loss": 1.6925, + "step": 2095 + }, + { + "epoch": 0.2210970464135021, + "grad_norm": 0.6087770462036133, + "learning_rate": 0.0013379467884975756, + "loss": 1.749, + "step": 2096 + }, + { + "epoch": 0.22120253164556963, + "grad_norm": 0.6091126203536987, + "learning_rate": 0.001337790885091968, + "loss": 1.746, + "step": 2097 + }, + { + "epoch": 0.22130801687763713, + "grad_norm": 0.5914732813835144, + "learning_rate": 0.0013376349158214609, + "loss": 1.7694, + "step": 2098 + }, + { + "epoch": 0.22141350210970465, + "grad_norm": 0.6624528765678406, + "learning_rate": 0.0013374788807035314, + "loss": 1.7832, + "step": 2099 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.5497255325317383, + "learning_rate": 0.0013373227797556634, + "loss": 1.7348, + "step": 2100 + }, + { + "epoch": 0.22162447257383966, + "grad_norm": 0.5699964761734009, + "learning_rate": 0.0013371666129953497, + "loss": 1.7213, + "step": 2101 + }, + { + "epoch": 0.22172995780590718, + "grad_norm": 0.7334792017936707, + "learning_rate": 0.0013370103804400887, + "loss": 1.7589, + "step": 2102 + }, + { + "epoch": 0.22183544303797467, + "grad_norm": 0.7970278859138489, + "learning_rate": 0.001336854082107388, + "loss": 1.7366, + "step": 2103 + }, + { + "epoch": 0.2219409282700422, + "grad_norm": 0.5158924460411072, + "learning_rate": 0.001336697718014761, + "loss": 1.7284, + "step": 2104 + }, + { + "epoch": 0.2220464135021097, + "grad_norm": 0.6897403001785278, + "learning_rate": 0.001336541288179729, + "loss": 1.7535, + "step": 2105 + }, + { + "epoch": 0.2221518987341772, + "grad_norm": 0.5827572345733643, + "learning_rate": 0.0013363847926198208, + "loss": 1.7077, + "step": 2106 + }, + { + "epoch": 0.22225738396624473, + "grad_norm": 0.6498978137969971, + "learning_rate": 0.0013362282313525728, + "loss": 1.7255, + "step": 2107 + }, + { + "epoch": 0.22236286919831225, + "grad_norm": 0.9308086037635803, + "learning_rate": 0.001336071604395528, + "loss": 1.7144, + "step": 2108 + }, + { + "epoch": 0.22246835443037974, + "grad_norm": 0.7342126369476318, + "learning_rate": 0.0013359149117662377, + "loss": 1.7295, + "step": 2109 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.6221300959587097, + "learning_rate": 0.00133575815348226, + "loss": 1.7262, + "step": 2110 + }, + { + "epoch": 0.22267932489451478, + "grad_norm": 1.005044937133789, + "learning_rate": 0.0013356013295611603, + "loss": 1.7002, + "step": 2111 + }, + { + "epoch": 0.22278481012658227, + "grad_norm": 0.616066038608551, + "learning_rate": 0.0013354444400205114, + "loss": 1.7197, + "step": 2112 + }, + { + "epoch": 0.2228902953586498, + "grad_norm": 0.7054697871208191, + "learning_rate": 0.0013352874848778938, + "loss": 1.7616, + "step": 2113 + }, + { + "epoch": 0.2229957805907173, + "grad_norm": 0.8680294752120972, + "learning_rate": 0.0013351304641508951, + "loss": 1.7333, + "step": 2114 + }, + { + "epoch": 0.2231012658227848, + "grad_norm": 0.6008896231651306, + "learning_rate": 0.0013349733778571101, + "loss": 1.7279, + "step": 2115 + }, + { + "epoch": 0.22320675105485233, + "grad_norm": 0.7241621017456055, + "learning_rate": 0.0013348162260141412, + "loss": 1.7245, + "step": 2116 + }, + { + "epoch": 0.22331223628691982, + "grad_norm": 0.8356872797012329, + "learning_rate": 0.001334659008639598, + "loss": 1.7412, + "step": 2117 + }, + { + "epoch": 0.22341772151898734, + "grad_norm": 0.5820906162261963, + "learning_rate": 0.0013345017257510975, + "loss": 1.7172, + "step": 2118 + }, + { + "epoch": 0.22352320675105486, + "grad_norm": 0.6455304622650146, + "learning_rate": 0.001334344377366264, + "loss": 1.746, + "step": 2119 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.5971921682357788, + "learning_rate": 0.0013341869635027292, + "loss": 1.7178, + "step": 2120 + }, + { + "epoch": 0.22373417721518987, + "grad_norm": 0.7086024284362793, + "learning_rate": 0.0013340294841781323, + "loss": 1.7381, + "step": 2121 + }, + { + "epoch": 0.2238396624472574, + "grad_norm": 0.8033049702644348, + "learning_rate": 0.0013338719394101193, + "loss": 1.7063, + "step": 2122 + }, + { + "epoch": 0.22394514767932489, + "grad_norm": 0.5557394623756409, + "learning_rate": 0.001333714329216344, + "loss": 1.7538, + "step": 2123 + }, + { + "epoch": 0.2240506329113924, + "grad_norm": 0.7954732775688171, + "learning_rate": 0.0013335566536144675, + "loss": 1.7307, + "step": 2124 + }, + { + "epoch": 0.22415611814345993, + "grad_norm": 0.7150917649269104, + "learning_rate": 0.0013333989126221581, + "loss": 1.7114, + "step": 2125 + }, + { + "epoch": 0.22426160337552742, + "grad_norm": 0.593510091304779, + "learning_rate": 0.0013332411062570914, + "loss": 1.7259, + "step": 2126 + }, + { + "epoch": 0.22436708860759494, + "grad_norm": 0.9400124549865723, + "learning_rate": 0.0013330832345369505, + "loss": 1.7711, + "step": 2127 + }, + { + "epoch": 0.22447257383966246, + "grad_norm": 0.7373257875442505, + "learning_rate": 0.0013329252974794256, + "loss": 1.7213, + "step": 2128 + }, + { + "epoch": 0.22457805907172995, + "grad_norm": 0.5879307985305786, + "learning_rate": 0.0013327672951022145, + "loss": 1.7587, + "step": 2129 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.723674476146698, + "learning_rate": 0.001332609227423022, + "loss": 1.7363, + "step": 2130 + }, + { + "epoch": 0.224789029535865, + "grad_norm": 0.6263309121131897, + "learning_rate": 0.0013324510944595605, + "loss": 1.6999, + "step": 2131 + }, + { + "epoch": 0.22489451476793249, + "grad_norm": 0.5727064609527588, + "learning_rate": 0.0013322928962295492, + "loss": 1.6751, + "step": 2132 + }, + { + "epoch": 0.225, + "grad_norm": 0.6407716870307922, + "learning_rate": 0.0013321346327507158, + "loss": 1.7043, + "step": 2133 + }, + { + "epoch": 0.2251054852320675, + "grad_norm": 0.6424069404602051, + "learning_rate": 0.0013319763040407938, + "loss": 1.7509, + "step": 2134 + }, + { + "epoch": 0.22521097046413502, + "grad_norm": 0.6041619181632996, + "learning_rate": 0.0013318179101175246, + "loss": 1.7259, + "step": 2135 + }, + { + "epoch": 0.22531645569620254, + "grad_norm": 0.7191154360771179, + "learning_rate": 0.0013316594509986577, + "loss": 1.7616, + "step": 2136 + }, + { + "epoch": 0.22542194092827003, + "grad_norm": 0.6156099438667297, + "learning_rate": 0.0013315009267019487, + "loss": 1.7277, + "step": 2137 + }, + { + "epoch": 0.22552742616033755, + "grad_norm": 0.6527858972549438, + "learning_rate": 0.0013313423372451614, + "loss": 1.7243, + "step": 2138 + }, + { + "epoch": 0.22563291139240507, + "grad_norm": 0.6912203431129456, + "learning_rate": 0.0013311836826460665, + "loss": 1.752, + "step": 2139 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.5403936505317688, + "learning_rate": 0.0013310249629224417, + "loss": 1.7509, + "step": 2140 + }, + { + "epoch": 0.22584388185654009, + "grad_norm": 0.6552042365074158, + "learning_rate": 0.0013308661780920728, + "loss": 1.7238, + "step": 2141 + }, + { + "epoch": 0.2259493670886076, + "grad_norm": 0.556181788444519, + "learning_rate": 0.0013307073281727518, + "loss": 1.6905, + "step": 2142 + }, + { + "epoch": 0.2260548523206751, + "grad_norm": 0.558937132358551, + "learning_rate": 0.0013305484131822792, + "loss": 1.722, + "step": 2143 + }, + { + "epoch": 0.22616033755274262, + "grad_norm": 0.5650550127029419, + "learning_rate": 0.001330389433138462, + "loss": 1.7821, + "step": 2144 + }, + { + "epoch": 0.22626582278481014, + "grad_norm": 0.5484701991081238, + "learning_rate": 0.0013302303880591147, + "loss": 1.7442, + "step": 2145 + }, + { + "epoch": 0.22637130801687763, + "grad_norm": 0.7235498428344727, + "learning_rate": 0.0013300712779620593, + "loss": 1.7283, + "step": 2146 + }, + { + "epoch": 0.22647679324894515, + "grad_norm": 0.555226743221283, + "learning_rate": 0.0013299121028651246, + "loss": 1.7619, + "step": 2147 + }, + { + "epoch": 0.22658227848101264, + "grad_norm": 0.6549233198165894, + "learning_rate": 0.001329752862786147, + "loss": 1.7439, + "step": 2148 + }, + { + "epoch": 0.22668776371308016, + "grad_norm": 0.7560024857521057, + "learning_rate": 0.0013295935577429703, + "loss": 1.7179, + "step": 2149 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.5178589224815369, + "learning_rate": 0.0013294341877534454, + "loss": 1.703, + "step": 2150 + }, + { + "epoch": 0.22689873417721518, + "grad_norm": 0.7874019742012024, + "learning_rate": 0.0013292747528354304, + "loss": 1.7873, + "step": 2151 + }, + { + "epoch": 0.2270042194092827, + "grad_norm": 0.7091823220252991, + "learning_rate": 0.0013291152530067907, + "loss": 1.7942, + "step": 2152 + }, + { + "epoch": 0.22710970464135022, + "grad_norm": 0.584437370300293, + "learning_rate": 0.0013289556882853993, + "loss": 1.7131, + "step": 2153 + }, + { + "epoch": 0.2272151898734177, + "grad_norm": 0.5778617858886719, + "learning_rate": 0.0013287960586891362, + "loss": 1.7485, + "step": 2154 + }, + { + "epoch": 0.22732067510548523, + "grad_norm": 0.5578702092170715, + "learning_rate": 0.0013286363642358884, + "loss": 1.7484, + "step": 2155 + }, + { + "epoch": 0.22742616033755275, + "grad_norm": 0.608002781867981, + "learning_rate": 0.0013284766049435504, + "loss": 1.7174, + "step": 2156 + }, + { + "epoch": 0.22753164556962024, + "grad_norm": 0.625825047492981, + "learning_rate": 0.0013283167808300247, + "loss": 1.7489, + "step": 2157 + }, + { + "epoch": 0.22763713080168776, + "grad_norm": 0.5937149524688721, + "learning_rate": 0.0013281568919132198, + "loss": 1.7066, + "step": 2158 + }, + { + "epoch": 0.22774261603375529, + "grad_norm": 0.5097715258598328, + "learning_rate": 0.0013279969382110524, + "loss": 1.7777, + "step": 2159 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.6513052582740784, + "learning_rate": 0.0013278369197414458, + "loss": 1.7343, + "step": 2160 + }, + { + "epoch": 0.2279535864978903, + "grad_norm": 0.7718490362167358, + "learning_rate": 0.0013276768365223306, + "loss": 1.7374, + "step": 2161 + }, + { + "epoch": 0.22805907172995782, + "grad_norm": 0.6147799491882324, + "learning_rate": 0.0013275166885716458, + "loss": 1.7148, + "step": 2162 + }, + { + "epoch": 0.2281645569620253, + "grad_norm": 0.5381059050559998, + "learning_rate": 0.0013273564759073361, + "loss": 1.7313, + "step": 2163 + }, + { + "epoch": 0.22827004219409283, + "grad_norm": 0.7116784453392029, + "learning_rate": 0.0013271961985473544, + "loss": 1.715, + "step": 2164 + }, + { + "epoch": 0.22837552742616032, + "grad_norm": 0.5957092642784119, + "learning_rate": 0.0013270358565096606, + "loss": 1.6808, + "step": 2165 + }, + { + "epoch": 0.22848101265822784, + "grad_norm": 0.5596440434455872, + "learning_rate": 0.0013268754498122215, + "loss": 1.7582, + "step": 2166 + }, + { + "epoch": 0.22858649789029536, + "grad_norm": 0.5727484226226807, + "learning_rate": 0.0013267149784730117, + "loss": 1.7636, + "step": 2167 + }, + { + "epoch": 0.22869198312236286, + "grad_norm": 0.5694783329963684, + "learning_rate": 0.0013265544425100128, + "loss": 1.7593, + "step": 2168 + }, + { + "epoch": 0.22879746835443038, + "grad_norm": 0.5721499919891357, + "learning_rate": 0.0013263938419412137, + "loss": 1.719, + "step": 2169 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.5031986236572266, + "learning_rate": 0.0013262331767846104, + "loss": 1.719, + "step": 2170 + }, + { + "epoch": 0.2290084388185654, + "grad_norm": 0.5092172622680664, + "learning_rate": 0.0013260724470582064, + "loss": 1.7156, + "step": 2171 + }, + { + "epoch": 0.2291139240506329, + "grad_norm": 0.5344884991645813, + "learning_rate": 0.001325911652780012, + "loss": 1.6984, + "step": 2172 + }, + { + "epoch": 0.22921940928270043, + "grad_norm": 0.5626872181892395, + "learning_rate": 0.0013257507939680453, + "loss": 1.6942, + "step": 2173 + }, + { + "epoch": 0.22932489451476792, + "grad_norm": 0.511569619178772, + "learning_rate": 0.0013255898706403312, + "loss": 1.7022, + "step": 2174 + }, + { + "epoch": 0.22943037974683544, + "grad_norm": 0.5308660864830017, + "learning_rate": 0.001325428882814902, + "loss": 1.7016, + "step": 2175 + }, + { + "epoch": 0.22953586497890296, + "grad_norm": 0.5853353142738342, + "learning_rate": 0.001325267830509797, + "loss": 1.7172, + "step": 2176 + }, + { + "epoch": 0.22964135021097046, + "grad_norm": 0.5182043313980103, + "learning_rate": 0.0013251067137430629, + "loss": 1.6978, + "step": 2177 + }, + { + "epoch": 0.22974683544303798, + "grad_norm": 0.5312547087669373, + "learning_rate": 0.001324945532532754, + "loss": 1.7175, + "step": 2178 + }, + { + "epoch": 0.2298523206751055, + "grad_norm": 0.579992949962616, + "learning_rate": 0.0013247842868969312, + "loss": 1.7244, + "step": 2179 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.5556614398956299, + "learning_rate": 0.0013246229768536628, + "loss": 1.7293, + "step": 2180 + }, + { + "epoch": 0.2300632911392405, + "grad_norm": 0.5850134491920471, + "learning_rate": 0.0013244616024210246, + "loss": 1.7508, + "step": 2181 + }, + { + "epoch": 0.230168776371308, + "grad_norm": 0.5910875201225281, + "learning_rate": 0.0013243001636170993, + "loss": 1.7264, + "step": 2182 + }, + { + "epoch": 0.23027426160337552, + "grad_norm": 0.6292439699172974, + "learning_rate": 0.0013241386604599772, + "loss": 1.7421, + "step": 2183 + }, + { + "epoch": 0.23037974683544304, + "grad_norm": 0.630033016204834, + "learning_rate": 0.001323977092967755, + "loss": 1.7334, + "step": 2184 + }, + { + "epoch": 0.23048523206751054, + "grad_norm": 0.618274450302124, + "learning_rate": 0.0013238154611585375, + "loss": 1.7109, + "step": 2185 + }, + { + "epoch": 0.23059071729957806, + "grad_norm": 0.7799015641212463, + "learning_rate": 0.0013236537650504361, + "loss": 1.75, + "step": 2186 + }, + { + "epoch": 0.23069620253164558, + "grad_norm": 0.6344192028045654, + "learning_rate": 0.00132349200466157, + "loss": 1.7115, + "step": 2187 + }, + { + "epoch": 0.23080168776371307, + "grad_norm": 0.5717906355857849, + "learning_rate": 0.0013233301800100652, + "loss": 1.7285, + "step": 2188 + }, + { + "epoch": 0.2309071729957806, + "grad_norm": 0.5539491176605225, + "learning_rate": 0.0013231682911140545, + "loss": 1.732, + "step": 2189 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.6524171829223633, + "learning_rate": 0.001323006337991679, + "loss": 1.7401, + "step": 2190 + }, + { + "epoch": 0.2311181434599156, + "grad_norm": 0.583052396774292, + "learning_rate": 0.0013228443206610861, + "loss": 1.7316, + "step": 2191 + }, + { + "epoch": 0.23122362869198312, + "grad_norm": 0.6273820996284485, + "learning_rate": 0.0013226822391404305, + "loss": 1.7278, + "step": 2192 + }, + { + "epoch": 0.23132911392405064, + "grad_norm": 0.645682156085968, + "learning_rate": 0.0013225200934478744, + "loss": 1.7752, + "step": 2193 + }, + { + "epoch": 0.23143459915611814, + "grad_norm": 0.5582815408706665, + "learning_rate": 0.0013223578836015868, + "loss": 1.7633, + "step": 2194 + }, + { + "epoch": 0.23154008438818566, + "grad_norm": 0.6738175749778748, + "learning_rate": 0.0013221956096197446, + "loss": 1.711, + "step": 2195 + }, + { + "epoch": 0.23164556962025318, + "grad_norm": 0.5827831625938416, + "learning_rate": 0.001322033271520531, + "loss": 1.6954, + "step": 2196 + }, + { + "epoch": 0.23175105485232067, + "grad_norm": 0.5941175818443298, + "learning_rate": 0.001321870869322137, + "loss": 1.7178, + "step": 2197 + }, + { + "epoch": 0.2318565400843882, + "grad_norm": 0.5516389608383179, + "learning_rate": 0.0013217084030427604, + "loss": 1.7007, + "step": 2198 + }, + { + "epoch": 0.23196202531645568, + "grad_norm": 0.53468918800354, + "learning_rate": 0.0013215458727006064, + "loss": 1.7031, + "step": 2199 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.5480815768241882, + "learning_rate": 0.0013213832783138873, + "loss": 1.7614, + "step": 2200 + }, + { + "epoch": 0.23217299578059072, + "grad_norm": 0.5701358318328857, + "learning_rate": 0.0013212206199008226, + "loss": 1.7149, + "step": 2201 + }, + { + "epoch": 0.23227848101265822, + "grad_norm": 0.5269657373428345, + "learning_rate": 0.0013210578974796393, + "loss": 1.7207, + "step": 2202 + }, + { + "epoch": 0.23238396624472574, + "grad_norm": 0.5768763422966003, + "learning_rate": 0.001320895111068571, + "loss": 1.7103, + "step": 2203 + }, + { + "epoch": 0.23248945147679326, + "grad_norm": 0.6701122522354126, + "learning_rate": 0.0013207322606858588, + "loss": 1.7042, + "step": 2204 + }, + { + "epoch": 0.23259493670886075, + "grad_norm": 0.6234219670295715, + "learning_rate": 0.001320569346349751, + "loss": 1.6607, + "step": 2205 + }, + { + "epoch": 0.23270042194092827, + "grad_norm": 0.6167173981666565, + "learning_rate": 0.0013204063680785025, + "loss": 1.772, + "step": 2206 + }, + { + "epoch": 0.2328059071729958, + "grad_norm": 0.6779091954231262, + "learning_rate": 0.0013202433258903761, + "loss": 1.7161, + "step": 2207 + }, + { + "epoch": 0.23291139240506328, + "grad_norm": 0.5567956566810608, + "learning_rate": 0.001320080219803642, + "loss": 1.7263, + "step": 2208 + }, + { + "epoch": 0.2330168776371308, + "grad_norm": 0.6204859614372253, + "learning_rate": 0.0013199170498365764, + "loss": 1.6972, + "step": 2209 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.5760573148727417, + "learning_rate": 0.0013197538160074633, + "loss": 1.7289, + "step": 2210 + }, + { + "epoch": 0.23322784810126582, + "grad_norm": 0.6871630549430847, + "learning_rate": 0.0013195905183345943, + "loss": 1.7263, + "step": 2211 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 0.7508535385131836, + "learning_rate": 0.0013194271568362673, + "loss": 1.7254, + "step": 2212 + }, + { + "epoch": 0.23343881856540086, + "grad_norm": 0.5610626935958862, + "learning_rate": 0.001319263731530788, + "loss": 1.7201, + "step": 2213 + }, + { + "epoch": 0.23354430379746835, + "grad_norm": 0.7460150718688965, + "learning_rate": 0.0013191002424364693, + "loss": 1.7308, + "step": 2214 + }, + { + "epoch": 0.23364978902953587, + "grad_norm": 0.6866254210472107, + "learning_rate": 0.0013189366895716302, + "loss": 1.7617, + "step": 2215 + }, + { + "epoch": 0.23375527426160336, + "grad_norm": 0.5705633759498596, + "learning_rate": 0.0013187730729545982, + "loss": 1.7189, + "step": 2216 + }, + { + "epoch": 0.23386075949367088, + "grad_norm": 0.6171281933784485, + "learning_rate": 0.0013186093926037072, + "loss": 1.7251, + "step": 2217 + }, + { + "epoch": 0.2339662447257384, + "grad_norm": 0.625684380531311, + "learning_rate": 0.0013184456485372986, + "loss": 1.7406, + "step": 2218 + }, + { + "epoch": 0.2340717299578059, + "grad_norm": 0.5502495765686035, + "learning_rate": 0.0013182818407737203, + "loss": 1.6944, + "step": 2219 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.6994450688362122, + "learning_rate": 0.0013181179693313283, + "loss": 1.7596, + "step": 2220 + }, + { + "epoch": 0.23428270042194094, + "grad_norm": 0.6656115055084229, + "learning_rate": 0.0013179540342284847, + "loss": 1.7141, + "step": 2221 + }, + { + "epoch": 0.23438818565400843, + "grad_norm": 0.5189930200576782, + "learning_rate": 0.0013177900354835598, + "loss": 1.6917, + "step": 2222 + }, + { + "epoch": 0.23449367088607595, + "grad_norm": 0.7263765335083008, + "learning_rate": 0.00131762597311493, + "loss": 1.7108, + "step": 2223 + }, + { + "epoch": 0.23459915611814347, + "grad_norm": 0.6704619526863098, + "learning_rate": 0.0013174618471409793, + "loss": 1.7511, + "step": 2224 + }, + { + "epoch": 0.23470464135021096, + "grad_norm": 0.5926353335380554, + "learning_rate": 0.0013172976575800991, + "loss": 1.7172, + "step": 2225 + }, + { + "epoch": 0.23481012658227848, + "grad_norm": 0.8064718842506409, + "learning_rate": 0.0013171334044506878, + "loss": 1.7417, + "step": 2226 + }, + { + "epoch": 0.234915611814346, + "grad_norm": 0.6844303607940674, + "learning_rate": 0.0013169690877711502, + "loss": 1.7118, + "step": 2227 + }, + { + "epoch": 0.2350210970464135, + "grad_norm": 0.6386522650718689, + "learning_rate": 0.0013168047075598993, + "loss": 1.7257, + "step": 2228 + }, + { + "epoch": 0.23512658227848102, + "grad_norm": 0.7067338228225708, + "learning_rate": 0.0013166402638353548, + "loss": 1.7422, + "step": 2229 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.605790376663208, + "learning_rate": 0.0013164757566159428, + "loss": 1.7075, + "step": 2230 + }, + { + "epoch": 0.23533755274261603, + "grad_norm": 0.7428744435310364, + "learning_rate": 0.0013163111859200978, + "loss": 1.7176, + "step": 2231 + }, + { + "epoch": 0.23544303797468355, + "grad_norm": 0.7512932419776917, + "learning_rate": 0.0013161465517662603, + "loss": 1.732, + "step": 2232 + }, + { + "epoch": 0.23554852320675104, + "grad_norm": 0.7994312047958374, + "learning_rate": 0.001315981854172879, + "loss": 1.7453, + "step": 2233 + }, + { + "epoch": 0.23565400843881856, + "grad_norm": 0.7386929392814636, + "learning_rate": 0.0013158170931584084, + "loss": 1.7149, + "step": 2234 + }, + { + "epoch": 0.23575949367088608, + "grad_norm": 0.5633136034011841, + "learning_rate": 0.0013156522687413114, + "loss": 1.7049, + "step": 2235 + }, + { + "epoch": 0.23586497890295358, + "grad_norm": 0.7115485668182373, + "learning_rate": 0.0013154873809400568, + "loss": 1.7199, + "step": 2236 + }, + { + "epoch": 0.2359704641350211, + "grad_norm": 0.5762776732444763, + "learning_rate": 0.0013153224297731215, + "loss": 1.6892, + "step": 2237 + }, + { + "epoch": 0.23607594936708862, + "grad_norm": 0.6177297830581665, + "learning_rate": 0.0013151574152589888, + "loss": 1.7582, + "step": 2238 + }, + { + "epoch": 0.2361814345991561, + "grad_norm": 0.6400185227394104, + "learning_rate": 0.00131499233741615, + "loss": 1.7084, + "step": 2239 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.6630924344062805, + "learning_rate": 0.001314827196263102, + "loss": 1.7211, + "step": 2240 + }, + { + "epoch": 0.23639240506329115, + "grad_norm": 0.5788124203681946, + "learning_rate": 0.0013146619918183507, + "loss": 1.7002, + "step": 2241 + }, + { + "epoch": 0.23649789029535864, + "grad_norm": 0.6539347767829895, + "learning_rate": 0.0013144967241004073, + "loss": 1.6899, + "step": 2242 + }, + { + "epoch": 0.23660337552742616, + "grad_norm": 0.6754949688911438, + "learning_rate": 0.001314331393127791, + "loss": 1.6931, + "step": 2243 + }, + { + "epoch": 0.23670886075949368, + "grad_norm": 0.5992099642753601, + "learning_rate": 0.0013141659989190282, + "loss": 1.7055, + "step": 2244 + }, + { + "epoch": 0.23681434599156118, + "grad_norm": 0.584389865398407, + "learning_rate": 0.001314000541492652, + "loss": 1.7072, + "step": 2245 + }, + { + "epoch": 0.2369198312236287, + "grad_norm": 0.604755699634552, + "learning_rate": 0.0013138350208672029, + "loss": 1.7699, + "step": 2246 + }, + { + "epoch": 0.2370253164556962, + "grad_norm": 0.6642125844955444, + "learning_rate": 0.001313669437061228, + "loss": 1.6873, + "step": 2247 + }, + { + "epoch": 0.2371308016877637, + "grad_norm": 0.6052283644676208, + "learning_rate": 0.0013135037900932822, + "loss": 1.7454, + "step": 2248 + }, + { + "epoch": 0.23723628691983123, + "grad_norm": 0.6059631705284119, + "learning_rate": 0.0013133380799819267, + "loss": 1.7415, + "step": 2249 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.7319592237472534, + "learning_rate": 0.0013131723067457302, + "loss": 1.7154, + "step": 2250 + }, + { + "epoch": 0.23744725738396624, + "grad_norm": 0.7335344552993774, + "learning_rate": 0.0013130064704032684, + "loss": 1.7039, + "step": 2251 + }, + { + "epoch": 0.23755274261603376, + "grad_norm": 0.6111718416213989, + "learning_rate": 0.0013128405709731245, + "loss": 1.7109, + "step": 2252 + }, + { + "epoch": 0.23765822784810126, + "grad_norm": 0.6344083547592163, + "learning_rate": 0.001312674608473888, + "loss": 1.6938, + "step": 2253 + }, + { + "epoch": 0.23776371308016878, + "grad_norm": 0.8164600729942322, + "learning_rate": 0.0013125085829241558, + "loss": 1.7078, + "step": 2254 + }, + { + "epoch": 0.2378691983122363, + "grad_norm": 0.7614291906356812, + "learning_rate": 0.0013123424943425317, + "loss": 1.711, + "step": 2255 + }, + { + "epoch": 0.2379746835443038, + "grad_norm": 0.5800808072090149, + "learning_rate": 0.0013121763427476273, + "loss": 1.6956, + "step": 2256 + }, + { + "epoch": 0.2380801687763713, + "grad_norm": 0.6046839356422424, + "learning_rate": 0.0013120101281580605, + "loss": 1.678, + "step": 2257 + }, + { + "epoch": 0.23818565400843883, + "grad_norm": 0.6824318766593933, + "learning_rate": 0.0013118438505924563, + "loss": 1.7131, + "step": 2258 + }, + { + "epoch": 0.23829113924050632, + "grad_norm": 0.5597408413887024, + "learning_rate": 0.001311677510069447, + "loss": 1.7317, + "step": 2259 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.6088464856147766, + "learning_rate": 0.0013115111066076721, + "loss": 1.7288, + "step": 2260 + }, + { + "epoch": 0.23850210970464136, + "grad_norm": 0.5589801669120789, + "learning_rate": 0.0013113446402257774, + "loss": 1.7099, + "step": 2261 + }, + { + "epoch": 0.23860759493670886, + "grad_norm": 0.67257159948349, + "learning_rate": 0.001311178110942417, + "loss": 1.7149, + "step": 2262 + }, + { + "epoch": 0.23871308016877638, + "grad_norm": 0.6436785459518433, + "learning_rate": 0.0013110115187762506, + "loss": 1.731, + "step": 2263 + }, + { + "epoch": 0.23881856540084387, + "grad_norm": 0.5535885095596313, + "learning_rate": 0.0013108448637459465, + "loss": 1.7636, + "step": 2264 + }, + { + "epoch": 0.2389240506329114, + "grad_norm": 0.6112661957740784, + "learning_rate": 0.0013106781458701784, + "loss": 1.7711, + "step": 2265 + }, + { + "epoch": 0.2390295358649789, + "grad_norm": 0.6057182550430298, + "learning_rate": 0.0013105113651676287, + "loss": 1.7381, + "step": 2266 + }, + { + "epoch": 0.2391350210970464, + "grad_norm": 0.5679678320884705, + "learning_rate": 0.001310344521656985, + "loss": 1.7702, + "step": 2267 + }, + { + "epoch": 0.23924050632911392, + "grad_norm": 0.6273238062858582, + "learning_rate": 0.001310177615356944, + "loss": 1.7359, + "step": 2268 + }, + { + "epoch": 0.23934599156118144, + "grad_norm": 0.5258743166923523, + "learning_rate": 0.0013100106462862076, + "loss": 1.7318, + "step": 2269 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.6527169942855835, + "learning_rate": 0.0013098436144634862, + "loss": 1.7371, + "step": 2270 + }, + { + "epoch": 0.23955696202531646, + "grad_norm": 0.7072855830192566, + "learning_rate": 0.0013096765199074958, + "loss": 1.7422, + "step": 2271 + }, + { + "epoch": 0.23966244725738398, + "grad_norm": 0.5013949871063232, + "learning_rate": 0.0013095093626369608, + "loss": 1.6975, + "step": 2272 + }, + { + "epoch": 0.23976793248945147, + "grad_norm": 0.7013154029846191, + "learning_rate": 0.0013093421426706117, + "loss": 1.6914, + "step": 2273 + }, + { + "epoch": 0.239873417721519, + "grad_norm": 0.6441731452941895, + "learning_rate": 0.0013091748600271862, + "loss": 1.7414, + "step": 2274 + }, + { + "epoch": 0.2399789029535865, + "grad_norm": 0.520341157913208, + "learning_rate": 0.0013090075147254294, + "loss": 1.7158, + "step": 2275 + }, + { + "epoch": 0.240084388185654, + "grad_norm": 0.8411921262741089, + "learning_rate": 0.0013088401067840932, + "loss": 1.7378, + "step": 2276 + }, + { + "epoch": 0.24018987341772152, + "grad_norm": 0.6895120739936829, + "learning_rate": 0.0013086726362219363, + "loss": 1.7197, + "step": 2277 + }, + { + "epoch": 0.24029535864978904, + "grad_norm": 0.5694024562835693, + "learning_rate": 0.0013085051030577246, + "loss": 1.7281, + "step": 2278 + }, + { + "epoch": 0.24040084388185654, + "grad_norm": 0.6594669222831726, + "learning_rate": 0.0013083375073102315, + "loss": 1.7346, + "step": 2279 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.5356258749961853, + "learning_rate": 0.0013081698489982364, + "loss": 1.7245, + "step": 2280 + }, + { + "epoch": 0.24061181434599155, + "grad_norm": 0.5801334381103516, + "learning_rate": 0.0013080021281405264, + "loss": 1.7186, + "step": 2281 + }, + { + "epoch": 0.24071729957805907, + "grad_norm": 0.6040521860122681, + "learning_rate": 0.0013078343447558954, + "loss": 1.6833, + "step": 2282 + }, + { + "epoch": 0.2408227848101266, + "grad_norm": 0.8221340775489807, + "learning_rate": 0.0013076664988631447, + "loss": 1.7152, + "step": 2283 + }, + { + "epoch": 0.24092827004219408, + "grad_norm": 0.780543863773346, + "learning_rate": 0.001307498590481082, + "loss": 1.675, + "step": 2284 + }, + { + "epoch": 0.2410337552742616, + "grad_norm": 0.5482277274131775, + "learning_rate": 0.001307330619628522, + "loss": 1.705, + "step": 2285 + }, + { + "epoch": 0.24113924050632912, + "grad_norm": 0.7772508263587952, + "learning_rate": 0.0013071625863242875, + "loss": 1.7476, + "step": 2286 + }, + { + "epoch": 0.24124472573839661, + "grad_norm": 0.6488258242607117, + "learning_rate": 0.0013069944905872064, + "loss": 1.7338, + "step": 2287 + }, + { + "epoch": 0.24135021097046414, + "grad_norm": 0.6228360533714294, + "learning_rate": 0.0013068263324361156, + "loss": 1.7256, + "step": 2288 + }, + { + "epoch": 0.24145569620253166, + "grad_norm": 0.8063803911209106, + "learning_rate": 0.0013066581118898574, + "loss": 1.6987, + "step": 2289 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.5997255444526672, + "learning_rate": 0.001306489828967282, + "loss": 1.7186, + "step": 2290 + }, + { + "epoch": 0.24166666666666667, + "grad_norm": 0.6242216229438782, + "learning_rate": 0.0013063214836872465, + "loss": 1.7223, + "step": 2291 + }, + { + "epoch": 0.2417721518987342, + "grad_norm": 0.679261326789856, + "learning_rate": 0.0013061530760686145, + "loss": 1.6693, + "step": 2292 + }, + { + "epoch": 0.24187763713080168, + "grad_norm": 0.5149263739585876, + "learning_rate": 0.0013059846061302574, + "loss": 1.7167, + "step": 2293 + }, + { + "epoch": 0.2419831223628692, + "grad_norm": 0.645095705986023, + "learning_rate": 0.0013058160738910526, + "loss": 1.7117, + "step": 2294 + }, + { + "epoch": 0.24208860759493672, + "grad_norm": 0.5920207500457764, + "learning_rate": 0.0013056474793698852, + "loss": 1.7176, + "step": 2295 + }, + { + "epoch": 0.24219409282700421, + "grad_norm": 0.5344904661178589, + "learning_rate": 0.001305478822585647, + "loss": 1.7251, + "step": 2296 + }, + { + "epoch": 0.24229957805907174, + "grad_norm": 0.5740098357200623, + "learning_rate": 0.001305310103557237, + "loss": 1.7627, + "step": 2297 + }, + { + "epoch": 0.24240506329113923, + "grad_norm": 0.5958481431007385, + "learning_rate": 0.0013051413223035607, + "loss": 1.7096, + "step": 2298 + }, + { + "epoch": 0.24251054852320675, + "grad_norm": 0.5784908533096313, + "learning_rate": 0.0013049724788435312, + "loss": 1.7025, + "step": 2299 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.5362856388092041, + "learning_rate": 0.0013048035731960679, + "loss": 1.6752, + "step": 2300 + }, + { + "epoch": 0.24272151898734176, + "grad_norm": 0.6628829836845398, + "learning_rate": 0.0013046346053800979, + "loss": 1.7375, + "step": 2301 + }, + { + "epoch": 0.24282700421940928, + "grad_norm": 0.6449332237243652, + "learning_rate": 0.0013044655754145546, + "loss": 1.685, + "step": 2302 + }, + { + "epoch": 0.2429324894514768, + "grad_norm": 0.7465482354164124, + "learning_rate": 0.001304296483318379, + "loss": 1.7336, + "step": 2303 + }, + { + "epoch": 0.2430379746835443, + "grad_norm": 0.7726309299468994, + "learning_rate": 0.0013041273291105181, + "loss": 1.717, + "step": 2304 + }, + { + "epoch": 0.24314345991561181, + "grad_norm": 0.5984777808189392, + "learning_rate": 0.0013039581128099272, + "loss": 1.6957, + "step": 2305 + }, + { + "epoch": 0.24324894514767934, + "grad_norm": 0.8921775221824646, + "learning_rate": 0.0013037888344355673, + "loss": 1.7398, + "step": 2306 + }, + { + "epoch": 0.24335443037974683, + "grad_norm": 0.841029167175293, + "learning_rate": 0.001303619494006407, + "loss": 1.692, + "step": 2307 + }, + { + "epoch": 0.24345991561181435, + "grad_norm": 0.5733690857887268, + "learning_rate": 0.0013034500915414218, + "loss": 1.6737, + "step": 2308 + }, + { + "epoch": 0.24356540084388187, + "grad_norm": 0.6575950980186462, + "learning_rate": 0.0013032806270595941, + "loss": 1.7266, + "step": 2309 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.7530551552772522, + "learning_rate": 0.0013031111005799133, + "loss": 1.7298, + "step": 2310 + }, + { + "epoch": 0.24377637130801688, + "grad_norm": 0.5367902517318726, + "learning_rate": 0.0013029415121213756, + "loss": 1.7091, + "step": 2311 + }, + { + "epoch": 0.2438818565400844, + "grad_norm": 0.6387843489646912, + "learning_rate": 0.0013027718617029842, + "loss": 1.7476, + "step": 2312 + }, + { + "epoch": 0.2439873417721519, + "grad_norm": 0.6823024153709412, + "learning_rate": 0.0013026021493437495, + "loss": 1.7656, + "step": 2313 + }, + { + "epoch": 0.24409282700421941, + "grad_norm": 0.5034077167510986, + "learning_rate": 0.0013024323750626882, + "loss": 1.721, + "step": 2314 + }, + { + "epoch": 0.2441983122362869, + "grad_norm": 0.5456157922744751, + "learning_rate": 0.0013022625388788248, + "loss": 1.7171, + "step": 2315 + }, + { + "epoch": 0.24430379746835443, + "grad_norm": 0.4980573058128357, + "learning_rate": 0.0013020926408111903, + "loss": 1.7531, + "step": 2316 + }, + { + "epoch": 0.24440928270042195, + "grad_norm": 0.5013088583946228, + "learning_rate": 0.001301922680878822, + "loss": 1.6738, + "step": 2317 + }, + { + "epoch": 0.24451476793248944, + "grad_norm": 0.5180625915527344, + "learning_rate": 0.001301752659100765, + "loss": 1.7112, + "step": 2318 + }, + { + "epoch": 0.24462025316455696, + "grad_norm": 0.5477584004402161, + "learning_rate": 0.001301582575496072, + "loss": 1.7308, + "step": 2319 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.5402711629867554, + "learning_rate": 0.0013014124300838004, + "loss": 1.6764, + "step": 2320 + }, + { + "epoch": 0.24483122362869197, + "grad_norm": 0.5363622903823853, + "learning_rate": 0.0013012422228830165, + "loss": 1.6826, + "step": 2321 + }, + { + "epoch": 0.2449367088607595, + "grad_norm": 0.5272742509841919, + "learning_rate": 0.0013010719539127927, + "loss": 1.7372, + "step": 2322 + }, + { + "epoch": 0.24504219409282701, + "grad_norm": 0.5466852784156799, + "learning_rate": 0.001300901623192209, + "loss": 1.713, + "step": 2323 + }, + { + "epoch": 0.2451476793248945, + "grad_norm": 0.5376180410385132, + "learning_rate": 0.0013007312307403507, + "loss": 1.7093, + "step": 2324 + }, + { + "epoch": 0.24525316455696203, + "grad_norm": 0.5575082302093506, + "learning_rate": 0.0013005607765763122, + "loss": 1.6863, + "step": 2325 + }, + { + "epoch": 0.24535864978902955, + "grad_norm": 0.5808238387107849, + "learning_rate": 0.0013003902607191934, + "loss": 1.7215, + "step": 2326 + }, + { + "epoch": 0.24546413502109704, + "grad_norm": 0.6304285526275635, + "learning_rate": 0.0013002196831881014, + "loss": 1.6949, + "step": 2327 + }, + { + "epoch": 0.24556962025316456, + "grad_norm": 0.7093430757522583, + "learning_rate": 0.0013000490440021502, + "loss": 1.7017, + "step": 2328 + }, + { + "epoch": 0.24567510548523205, + "grad_norm": 0.6143107414245605, + "learning_rate": 0.0012998783431804608, + "loss": 1.7049, + "step": 2329 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.5203495025634766, + "learning_rate": 0.0012997075807421612, + "loss": 1.679, + "step": 2330 + }, + { + "epoch": 0.2458860759493671, + "grad_norm": 0.5971010327339172, + "learning_rate": 0.0012995367567063861, + "loss": 1.6692, + "step": 2331 + }, + { + "epoch": 0.2459915611814346, + "grad_norm": 0.561768114566803, + "learning_rate": 0.001299365871092277, + "loss": 1.7372, + "step": 2332 + }, + { + "epoch": 0.2460970464135021, + "grad_norm": 0.5698122382164001, + "learning_rate": 0.0012991949239189826, + "loss": 1.7181, + "step": 2333 + }, + { + "epoch": 0.24620253164556963, + "grad_norm": 0.6031834483146667, + "learning_rate": 0.0012990239152056587, + "loss": 1.7302, + "step": 2334 + }, + { + "epoch": 0.24630801687763712, + "grad_norm": 0.5961331129074097, + "learning_rate": 0.0012988528449714672, + "loss": 1.7335, + "step": 2335 + }, + { + "epoch": 0.24641350210970464, + "grad_norm": 0.6304205656051636, + "learning_rate": 0.001298681713235578, + "loss": 1.7181, + "step": 2336 + }, + { + "epoch": 0.24651898734177216, + "grad_norm": 0.6469119191169739, + "learning_rate": 0.0012985105200171664, + "loss": 1.647, + "step": 2337 + }, + { + "epoch": 0.24662447257383965, + "grad_norm": 0.6045712232589722, + "learning_rate": 0.001298339265335416, + "loss": 1.687, + "step": 2338 + }, + { + "epoch": 0.24672995780590717, + "grad_norm": 0.5920090079307556, + "learning_rate": 0.0012981679492095166, + "loss": 1.7117, + "step": 2339 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.6217019557952881, + "learning_rate": 0.0012979965716586653, + "loss": 1.6987, + "step": 2340 + }, + { + "epoch": 0.2469409282700422, + "grad_norm": 0.55147784948349, + "learning_rate": 0.0012978251327020655, + "loss": 1.7221, + "step": 2341 + }, + { + "epoch": 0.2470464135021097, + "grad_norm": 0.6472154259681702, + "learning_rate": 0.0012976536323589278, + "loss": 1.7005, + "step": 2342 + }, + { + "epoch": 0.24715189873417723, + "grad_norm": 0.5740470290184021, + "learning_rate": 0.0012974820706484697, + "loss": 1.6854, + "step": 2343 + }, + { + "epoch": 0.24725738396624472, + "grad_norm": 0.5455877184867859, + "learning_rate": 0.001297310447589916, + "loss": 1.7116, + "step": 2344 + }, + { + "epoch": 0.24736286919831224, + "grad_norm": 0.6015012860298157, + "learning_rate": 0.0012971387632024968, + "loss": 1.6971, + "step": 2345 + }, + { + "epoch": 0.24746835443037973, + "grad_norm": 0.5780652165412903, + "learning_rate": 0.0012969670175054515, + "loss": 1.7097, + "step": 2346 + }, + { + "epoch": 0.24757383966244725, + "grad_norm": 0.6733734011650085, + "learning_rate": 0.0012967952105180243, + "loss": 1.6742, + "step": 2347 + }, + { + "epoch": 0.24767932489451477, + "grad_norm": 0.6043379306793213, + "learning_rate": 0.001296623342259467, + "loss": 1.6807, + "step": 2348 + }, + { + "epoch": 0.24778481012658227, + "grad_norm": 0.6802244186401367, + "learning_rate": 0.0012964514127490388, + "loss": 1.685, + "step": 2349 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.7827993035316467, + "learning_rate": 0.0012962794220060048, + "loss": 1.7294, + "step": 2350 + }, + { + "epoch": 0.2479957805907173, + "grad_norm": 0.679828405380249, + "learning_rate": 0.0012961073700496378, + "loss": 1.7028, + "step": 2351 + }, + { + "epoch": 0.2481012658227848, + "grad_norm": 0.6276321411132812, + "learning_rate": 0.0012959352568992163, + "loss": 1.7316, + "step": 2352 + }, + { + "epoch": 0.24820675105485232, + "grad_norm": 0.7018020749092102, + "learning_rate": 0.0012957630825740274, + "loss": 1.6968, + "step": 2353 + }, + { + "epoch": 0.24831223628691984, + "grad_norm": 0.5448172688484192, + "learning_rate": 0.0012955908470933637, + "loss": 1.7162, + "step": 2354 + }, + { + "epoch": 0.24841772151898733, + "grad_norm": 0.6234979033470154, + "learning_rate": 0.0012954185504765248, + "loss": 1.735, + "step": 2355 + }, + { + "epoch": 0.24852320675105485, + "grad_norm": 0.7254247665405273, + "learning_rate": 0.0012952461927428177, + "loss": 1.7013, + "step": 2356 + }, + { + "epoch": 0.24862869198312237, + "grad_norm": 0.6705814599990845, + "learning_rate": 0.001295073773911556, + "loss": 1.7083, + "step": 2357 + }, + { + "epoch": 0.24873417721518987, + "grad_norm": 0.6563640832901001, + "learning_rate": 0.0012949012940020599, + "loss": 1.7291, + "step": 2358 + }, + { + "epoch": 0.2488396624472574, + "grad_norm": 0.6223191022872925, + "learning_rate": 0.0012947287530336565, + "loss": 1.7023, + "step": 2359 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.6353045105934143, + "learning_rate": 0.0012945561510256801, + "loss": 1.6918, + "step": 2360 + }, + { + "epoch": 0.2490506329113924, + "grad_norm": 0.6080037355422974, + "learning_rate": 0.0012943834879974717, + "loss": 1.686, + "step": 2361 + }, + { + "epoch": 0.24915611814345992, + "grad_norm": 0.627830982208252, + "learning_rate": 0.001294210763968379, + "loss": 1.7197, + "step": 2362 + }, + { + "epoch": 0.2492616033755274, + "grad_norm": 0.529312014579773, + "learning_rate": 0.0012940379789577565, + "loss": 1.7217, + "step": 2363 + }, + { + "epoch": 0.24936708860759493, + "grad_norm": 0.6784870028495789, + "learning_rate": 0.0012938651329849654, + "loss": 1.6975, + "step": 2364 + }, + { + "epoch": 0.24947257383966245, + "grad_norm": 0.6353651881217957, + "learning_rate": 0.0012936922260693743, + "loss": 1.6801, + "step": 2365 + }, + { + "epoch": 0.24957805907172995, + "grad_norm": 0.7848528623580933, + "learning_rate": 0.0012935192582303582, + "loss": 1.7141, + "step": 2366 + }, + { + "epoch": 0.24968354430379747, + "grad_norm": 0.6391125917434692, + "learning_rate": 0.001293346229487299, + "loss": 1.7297, + "step": 2367 + }, + { + "epoch": 0.249789029535865, + "grad_norm": 0.7028639316558838, + "learning_rate": 0.0012931731398595854, + "loss": 1.7201, + "step": 2368 + }, + { + "epoch": 0.24989451476793248, + "grad_norm": 0.6279028654098511, + "learning_rate": 0.001292999989366613, + "loss": 1.7185, + "step": 2369 + }, + { + "epoch": 0.25, + "grad_norm": 0.6772350072860718, + "learning_rate": 0.001292826778027784, + "loss": 1.7228, + "step": 2370 + }, + { + "epoch": 0.2501054852320675, + "grad_norm": 0.5523633360862732, + "learning_rate": 0.001292653505862508, + "loss": 1.7014, + "step": 2371 + }, + { + "epoch": 0.25021097046413504, + "grad_norm": 0.7594683766365051, + "learning_rate": 0.0012924801728902006, + "loss": 1.7095, + "step": 2372 + }, + { + "epoch": 0.25031645569620253, + "grad_norm": 0.9010887742042542, + "learning_rate": 0.0012923067791302848, + "loss": 1.7027, + "step": 2373 + }, + { + "epoch": 0.25042194092827, + "grad_norm": 0.575728178024292, + "learning_rate": 0.0012921333246021904, + "loss": 1.7081, + "step": 2374 + }, + { + "epoch": 0.2505274261603376, + "grad_norm": 0.9259113073348999, + "learning_rate": 0.0012919598093253533, + "loss": 1.6995, + "step": 2375 + }, + { + "epoch": 0.25063291139240507, + "grad_norm": 0.9067907929420471, + "learning_rate": 0.0012917862333192173, + "loss": 1.7515, + "step": 2376 + }, + { + "epoch": 0.25073839662447256, + "grad_norm": 0.5997393131256104, + "learning_rate": 0.0012916125966032322, + "loss": 1.6693, + "step": 2377 + }, + { + "epoch": 0.2508438818565401, + "grad_norm": 0.7770492434501648, + "learning_rate": 0.001291438899196855, + "loss": 1.7007, + "step": 2378 + }, + { + "epoch": 0.2509493670886076, + "grad_norm": 0.6214288473129272, + "learning_rate": 0.0012912651411195494, + "loss": 1.736, + "step": 2379 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.6663303971290588, + "learning_rate": 0.0012910913223907856, + "loss": 1.6966, + "step": 2380 + }, + { + "epoch": 0.25116033755274264, + "grad_norm": 0.8316518664360046, + "learning_rate": 0.0012909174430300412, + "loss": 1.7239, + "step": 2381 + }, + { + "epoch": 0.25126582278481013, + "grad_norm": 0.7201334834098816, + "learning_rate": 0.0012907435030567996, + "loss": 1.6951, + "step": 2382 + }, + { + "epoch": 0.2513713080168776, + "grad_norm": 0.5624682307243347, + "learning_rate": 0.0012905695024905525, + "loss": 1.7104, + "step": 2383 + }, + { + "epoch": 0.2514767932489452, + "grad_norm": 0.7351395487785339, + "learning_rate": 0.0012903954413507968, + "loss": 1.6806, + "step": 2384 + }, + { + "epoch": 0.25158227848101267, + "grad_norm": 0.6117594242095947, + "learning_rate": 0.0012902213196570376, + "loss": 1.7124, + "step": 2385 + }, + { + "epoch": 0.25168776371308016, + "grad_norm": 0.5913302302360535, + "learning_rate": 0.0012900471374287855, + "loss": 1.7126, + "step": 2386 + }, + { + "epoch": 0.25179324894514765, + "grad_norm": 0.703648030757904, + "learning_rate": 0.0012898728946855588, + "loss": 1.7789, + "step": 2387 + }, + { + "epoch": 0.2518987341772152, + "grad_norm": 0.6010470986366272, + "learning_rate": 0.001289698591446882, + "loss": 1.6677, + "step": 2388 + }, + { + "epoch": 0.2520042194092827, + "grad_norm": 0.6109982132911682, + "learning_rate": 0.0012895242277322872, + "loss": 1.6799, + "step": 2389 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.6874708533287048, + "learning_rate": 0.0012893498035613123, + "loss": 1.7069, + "step": 2390 + }, + { + "epoch": 0.25221518987341773, + "grad_norm": 0.6665875315666199, + "learning_rate": 0.0012891753189535023, + "loss": 1.6832, + "step": 2391 + }, + { + "epoch": 0.2523206751054852, + "grad_norm": 0.8817113041877747, + "learning_rate": 0.0012890007739284092, + "loss": 1.6825, + "step": 2392 + }, + { + "epoch": 0.2524261603375527, + "grad_norm": 0.6294986009597778, + "learning_rate": 0.001288826168505592, + "loss": 1.6786, + "step": 2393 + }, + { + "epoch": 0.25253164556962027, + "grad_norm": 0.6687322854995728, + "learning_rate": 0.0012886515027046156, + "loss": 1.7024, + "step": 2394 + }, + { + "epoch": 0.25263713080168776, + "grad_norm": 0.781847357749939, + "learning_rate": 0.0012884767765450524, + "loss": 1.6619, + "step": 2395 + }, + { + "epoch": 0.25274261603375525, + "grad_norm": 0.6045687794685364, + "learning_rate": 0.0012883019900464814, + "loss": 1.7302, + "step": 2396 + }, + { + "epoch": 0.2528481012658228, + "grad_norm": 0.6811259984970093, + "learning_rate": 0.001288127143228488, + "loss": 1.7592, + "step": 2397 + }, + { + "epoch": 0.2529535864978903, + "grad_norm": 0.8784417510032654, + "learning_rate": 0.0012879522361106646, + "loss": 1.7464, + "step": 2398 + }, + { + "epoch": 0.2530590717299578, + "grad_norm": 0.5776910185813904, + "learning_rate": 0.0012877772687126111, + "loss": 1.6923, + "step": 2399 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6638511419296265, + "learning_rate": 0.001287602241053933, + "loss": 1.7249, + "step": 2400 + }, + { + "epoch": 0.2532700421940928, + "grad_norm": 0.7208113074302673, + "learning_rate": 0.001287427153154243, + "loss": 1.7088, + "step": 2401 + }, + { + "epoch": 0.2533755274261603, + "grad_norm": 0.5600279569625854, + "learning_rate": 0.0012872520050331608, + "loss": 1.7184, + "step": 2402 + }, + { + "epoch": 0.25348101265822787, + "grad_norm": 0.699658215045929, + "learning_rate": 0.0012870767967103122, + "loss": 1.7312, + "step": 2403 + }, + { + "epoch": 0.25358649789029536, + "grad_norm": 0.5913460850715637, + "learning_rate": 0.0012869015282053304, + "loss": 1.6736, + "step": 2404 + }, + { + "epoch": 0.25369198312236285, + "grad_norm": 0.5873759388923645, + "learning_rate": 0.0012867261995378554, + "loss": 1.7332, + "step": 2405 + }, + { + "epoch": 0.2537974683544304, + "grad_norm": 0.6441132426261902, + "learning_rate": 0.001286550810727533, + "loss": 1.6948, + "step": 2406 + }, + { + "epoch": 0.2539029535864979, + "grad_norm": 0.6201358437538147, + "learning_rate": 0.0012863753617940172, + "loss": 1.7341, + "step": 2407 + }, + { + "epoch": 0.2540084388185654, + "grad_norm": 0.6015024781227112, + "learning_rate": 0.001286199852756967, + "loss": 1.6832, + "step": 2408 + }, + { + "epoch": 0.25411392405063293, + "grad_norm": 0.5596890449523926, + "learning_rate": 0.0012860242836360502, + "loss": 1.6794, + "step": 2409 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.5277765393257141, + "learning_rate": 0.0012858486544509392, + "loss": 1.7195, + "step": 2410 + }, + { + "epoch": 0.2543248945147679, + "grad_norm": 0.6877332925796509, + "learning_rate": 0.0012856729652213144, + "loss": 1.7408, + "step": 2411 + }, + { + "epoch": 0.25443037974683547, + "grad_norm": 0.5128920078277588, + "learning_rate": 0.001285497215966863, + "loss": 1.6916, + "step": 2412 + }, + { + "epoch": 0.25453586497890296, + "grad_norm": 0.5837162733078003, + "learning_rate": 0.0012853214067072782, + "loss": 1.7106, + "step": 2413 + }, + { + "epoch": 0.25464135021097045, + "grad_norm": 0.582166314125061, + "learning_rate": 0.0012851455374622604, + "loss": 1.6746, + "step": 2414 + }, + { + "epoch": 0.254746835443038, + "grad_norm": 0.6236438155174255, + "learning_rate": 0.0012849696082515166, + "loss": 1.6894, + "step": 2415 + }, + { + "epoch": 0.2548523206751055, + "grad_norm": 0.5125405192375183, + "learning_rate": 0.0012847936190947605, + "loss": 1.7061, + "step": 2416 + }, + { + "epoch": 0.254957805907173, + "grad_norm": 0.645386815071106, + "learning_rate": 0.001284617570011713, + "loss": 1.6724, + "step": 2417 + }, + { + "epoch": 0.25506329113924053, + "grad_norm": 0.5917177796363831, + "learning_rate": 0.0012844414610221006, + "loss": 1.7071, + "step": 2418 + }, + { + "epoch": 0.255168776371308, + "grad_norm": 0.5818729996681213, + "learning_rate": 0.0012842652921456576, + "loss": 1.6808, + "step": 2419 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.7256907224655151, + "learning_rate": 0.0012840890634021249, + "loss": 1.7252, + "step": 2420 + }, + { + "epoch": 0.255379746835443, + "grad_norm": 0.590599775314331, + "learning_rate": 0.001283912774811249, + "loss": 1.6771, + "step": 2421 + }, + { + "epoch": 0.25548523206751056, + "grad_norm": 0.6391598582267761, + "learning_rate": 0.0012837364263927843, + "loss": 1.7786, + "step": 2422 + }, + { + "epoch": 0.25559071729957805, + "grad_norm": 0.627606213092804, + "learning_rate": 0.001283560018166492, + "loss": 1.7424, + "step": 2423 + }, + { + "epoch": 0.25569620253164554, + "grad_norm": 0.6190105676651001, + "learning_rate": 0.0012833835501521386, + "loss": 1.684, + "step": 2424 + }, + { + "epoch": 0.2558016877637131, + "grad_norm": 0.6380122303962708, + "learning_rate": 0.0012832070223694992, + "loss": 1.6695, + "step": 2425 + }, + { + "epoch": 0.2559071729957806, + "grad_norm": 0.6330202221870422, + "learning_rate": 0.0012830304348383538, + "loss": 1.6983, + "step": 2426 + }, + { + "epoch": 0.2560126582278481, + "grad_norm": 0.5927221775054932, + "learning_rate": 0.0012828537875784905, + "loss": 1.7237, + "step": 2427 + }, + { + "epoch": 0.2561181434599156, + "grad_norm": 0.5757395625114441, + "learning_rate": 0.001282677080609703, + "loss": 1.672, + "step": 2428 + }, + { + "epoch": 0.2562236286919831, + "grad_norm": 0.5760767459869385, + "learning_rate": 0.0012825003139517925, + "loss": 1.6734, + "step": 2429 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.7656447887420654, + "learning_rate": 0.0012823234876245667, + "loss": 1.6718, + "step": 2430 + }, + { + "epoch": 0.25643459915611816, + "grad_norm": 0.6728898286819458, + "learning_rate": 0.0012821466016478395, + "loss": 1.7236, + "step": 2431 + }, + { + "epoch": 0.25654008438818565, + "grad_norm": 0.5939133167266846, + "learning_rate": 0.0012819696560414323, + "loss": 1.7316, + "step": 2432 + }, + { + "epoch": 0.25664556962025314, + "grad_norm": 0.5652764439582825, + "learning_rate": 0.0012817926508251723, + "loss": 1.7539, + "step": 2433 + }, + { + "epoch": 0.2567510548523207, + "grad_norm": 0.6071808934211731, + "learning_rate": 0.0012816155860188938, + "loss": 1.6636, + "step": 2434 + }, + { + "epoch": 0.2568565400843882, + "grad_norm": 0.6490192413330078, + "learning_rate": 0.0012814384616424384, + "loss": 1.6753, + "step": 2435 + }, + { + "epoch": 0.2569620253164557, + "grad_norm": 0.7507681250572205, + "learning_rate": 0.0012812612777156533, + "loss": 1.6766, + "step": 2436 + }, + { + "epoch": 0.2570675105485232, + "grad_norm": 0.5368454456329346, + "learning_rate": 0.001281084034258393, + "loss": 1.7145, + "step": 2437 + }, + { + "epoch": 0.2571729957805907, + "grad_norm": 0.7809728980064392, + "learning_rate": 0.0012809067312905182, + "loss": 1.7095, + "step": 2438 + }, + { + "epoch": 0.2572784810126582, + "grad_norm": 0.6902058124542236, + "learning_rate": 0.0012807293688318969, + "loss": 1.6999, + "step": 2439 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.628619372844696, + "learning_rate": 0.0012805519469024035, + "loss": 1.7274, + "step": 2440 + }, + { + "epoch": 0.25748945147679325, + "grad_norm": 0.914553701877594, + "learning_rate": 0.0012803744655219187, + "loss": 1.7071, + "step": 2441 + }, + { + "epoch": 0.25759493670886074, + "grad_norm": 0.6970487236976624, + "learning_rate": 0.0012801969247103306, + "loss": 1.6935, + "step": 2442 + }, + { + "epoch": 0.2577004219409283, + "grad_norm": 0.5999206900596619, + "learning_rate": 0.001280019324487533, + "loss": 1.7055, + "step": 2443 + }, + { + "epoch": 0.2578059071729958, + "grad_norm": 0.8167735934257507, + "learning_rate": 0.0012798416648734272, + "loss": 1.6952, + "step": 2444 + }, + { + "epoch": 0.2579113924050633, + "grad_norm": 0.6915310025215149, + "learning_rate": 0.001279663945887921, + "loss": 1.7377, + "step": 2445 + }, + { + "epoch": 0.2580168776371308, + "grad_norm": 0.5668425559997559, + "learning_rate": 0.0012794861675509285, + "loss": 1.6716, + "step": 2446 + }, + { + "epoch": 0.2581223628691983, + "grad_norm": 0.7698961496353149, + "learning_rate": 0.0012793083298823708, + "loss": 1.6922, + "step": 2447 + }, + { + "epoch": 0.2582278481012658, + "grad_norm": 0.661979079246521, + "learning_rate": 0.0012791304329021751, + "loss": 1.6977, + "step": 2448 + }, + { + "epoch": 0.25833333333333336, + "grad_norm": 0.5526832342147827, + "learning_rate": 0.001278952476630276, + "loss": 1.6755, + "step": 2449 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.7107158303260803, + "learning_rate": 0.0012787744610866143, + "loss": 1.6738, + "step": 2450 + }, + { + "epoch": 0.25854430379746834, + "grad_norm": 0.6273146271705627, + "learning_rate": 0.0012785963862911376, + "loss": 1.6962, + "step": 2451 + }, + { + "epoch": 0.2586497890295359, + "grad_norm": 0.5372296571731567, + "learning_rate": 0.0012784182522637998, + "loss": 1.7311, + "step": 2452 + }, + { + "epoch": 0.2587552742616034, + "grad_norm": 0.6380911469459534, + "learning_rate": 0.001278240059024562, + "loss": 1.686, + "step": 2453 + }, + { + "epoch": 0.2588607594936709, + "grad_norm": 0.6327139735221863, + "learning_rate": 0.0012780618065933915, + "loss": 1.747, + "step": 2454 + }, + { + "epoch": 0.25896624472573837, + "grad_norm": 0.5916246771812439, + "learning_rate": 0.0012778834949902626, + "loss": 1.68, + "step": 2455 + }, + { + "epoch": 0.2590717299578059, + "grad_norm": 0.6030903458595276, + "learning_rate": 0.0012777051242351557, + "loss": 1.7063, + "step": 2456 + }, + { + "epoch": 0.2591772151898734, + "grad_norm": 0.6094292998313904, + "learning_rate": 0.0012775266943480582, + "loss": 1.6967, + "step": 2457 + }, + { + "epoch": 0.2592827004219409, + "grad_norm": 0.5038880109786987, + "learning_rate": 0.0012773482053489642, + "loss": 1.717, + "step": 2458 + }, + { + "epoch": 0.25938818565400845, + "grad_norm": 0.6531891822814941, + "learning_rate": 0.0012771696572578743, + "loss": 1.7229, + "step": 2459 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.568501353263855, + "learning_rate": 0.0012769910500947954, + "loss": 1.6914, + "step": 2460 + }, + { + "epoch": 0.25959915611814344, + "grad_norm": 0.6506641507148743, + "learning_rate": 0.0012768123838797414, + "loss": 1.6534, + "step": 2461 + }, + { + "epoch": 0.259704641350211, + "grad_norm": 0.6781302094459534, + "learning_rate": 0.0012766336586327333, + "loss": 1.7007, + "step": 2462 + }, + { + "epoch": 0.2598101265822785, + "grad_norm": 0.5641306638717651, + "learning_rate": 0.0012764548743737973, + "loss": 1.7014, + "step": 2463 + }, + { + "epoch": 0.25991561181434597, + "grad_norm": 0.5929677486419678, + "learning_rate": 0.001276276031122968, + "loss": 1.6692, + "step": 2464 + }, + { + "epoch": 0.2600210970464135, + "grad_norm": 0.5876758694648743, + "learning_rate": 0.0012760971289002847, + "loss": 1.7278, + "step": 2465 + }, + { + "epoch": 0.260126582278481, + "grad_norm": 0.5691653490066528, + "learning_rate": 0.0012759181677257946, + "loss": 1.6877, + "step": 2466 + }, + { + "epoch": 0.2602320675105485, + "grad_norm": 0.622257649898529, + "learning_rate": 0.0012757391476195517, + "loss": 1.6932, + "step": 2467 + }, + { + "epoch": 0.26033755274261605, + "grad_norm": 0.5134981870651245, + "learning_rate": 0.0012755600686016155, + "loss": 1.7083, + "step": 2468 + }, + { + "epoch": 0.26044303797468354, + "grad_norm": 0.636460542678833, + "learning_rate": 0.0012753809306920532, + "loss": 1.7179, + "step": 2469 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.5891287922859192, + "learning_rate": 0.0012752017339109376, + "loss": 1.7109, + "step": 2470 + }, + { + "epoch": 0.2606540084388186, + "grad_norm": 0.585329532623291, + "learning_rate": 0.0012750224782783492, + "loss": 1.7357, + "step": 2471 + }, + { + "epoch": 0.2607594936708861, + "grad_norm": 0.6466334462165833, + "learning_rate": 0.0012748431638143739, + "loss": 1.6892, + "step": 2472 + }, + { + "epoch": 0.26086497890295357, + "grad_norm": 0.5909183621406555, + "learning_rate": 0.0012746637905391048, + "loss": 1.6947, + "step": 2473 + }, + { + "epoch": 0.2609704641350211, + "grad_norm": 0.562623918056488, + "learning_rate": 0.001274484358472642, + "loss": 1.6983, + "step": 2474 + }, + { + "epoch": 0.2610759493670886, + "grad_norm": 0.6815049052238464, + "learning_rate": 0.0012743048676350911, + "loss": 1.7086, + "step": 2475 + }, + { + "epoch": 0.2611814345991561, + "grad_norm": 0.5464376211166382, + "learning_rate": 0.001274125318046566, + "loss": 1.6788, + "step": 2476 + }, + { + "epoch": 0.26128691983122365, + "grad_norm": 0.7714467644691467, + "learning_rate": 0.0012739457097271849, + "loss": 1.7125, + "step": 2477 + }, + { + "epoch": 0.26139240506329114, + "grad_norm": 0.8299044370651245, + "learning_rate": 0.0012737660426970748, + "loss": 1.6964, + "step": 2478 + }, + { + "epoch": 0.26149789029535864, + "grad_norm": 0.5951476097106934, + "learning_rate": 0.0012735863169763678, + "loss": 1.7055, + "step": 2479 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.7907959222793579, + "learning_rate": 0.0012734065325852029, + "loss": 1.7062, + "step": 2480 + }, + { + "epoch": 0.2617088607594937, + "grad_norm": 0.8452408909797668, + "learning_rate": 0.0012732266895437265, + "loss": 1.6873, + "step": 2481 + }, + { + "epoch": 0.26181434599156117, + "grad_norm": 0.5108951330184937, + "learning_rate": 0.00127304678787209, + "loss": 1.6704, + "step": 2482 + }, + { + "epoch": 0.2619198312236287, + "grad_norm": 0.8297290205955505, + "learning_rate": 0.001272866827590453, + "loss": 1.7008, + "step": 2483 + }, + { + "epoch": 0.2620253164556962, + "grad_norm": 0.6865988373756409, + "learning_rate": 0.001272686808718981, + "loss": 1.661, + "step": 2484 + }, + { + "epoch": 0.2621308016877637, + "grad_norm": 0.5439263582229614, + "learning_rate": 0.0012725067312778454, + "loss": 1.6988, + "step": 2485 + }, + { + "epoch": 0.2622362869198312, + "grad_norm": 0.6565894484519958, + "learning_rate": 0.0012723265952872252, + "loss": 1.6893, + "step": 2486 + }, + { + "epoch": 0.26234177215189874, + "grad_norm": 0.6105145215988159, + "learning_rate": 0.0012721464007673055, + "loss": 1.6965, + "step": 2487 + }, + { + "epoch": 0.26244725738396624, + "grad_norm": 0.5908785462379456, + "learning_rate": 0.0012719661477382778, + "loss": 1.7262, + "step": 2488 + }, + { + "epoch": 0.26255274261603373, + "grad_norm": 0.768246591091156, + "learning_rate": 0.0012717858362203407, + "loss": 1.662, + "step": 2489 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.547117292881012, + "learning_rate": 0.0012716054662336987, + "loss": 1.7141, + "step": 2490 + }, + { + "epoch": 0.26276371308016877, + "grad_norm": 0.6694080233573914, + "learning_rate": 0.001271425037798563, + "loss": 1.6792, + "step": 2491 + }, + { + "epoch": 0.26286919831223626, + "grad_norm": 0.5651350021362305, + "learning_rate": 0.0012712445509351518, + "loss": 1.694, + "step": 2492 + }, + { + "epoch": 0.2629746835443038, + "grad_norm": 0.6071954965591431, + "learning_rate": 0.00127106400566369, + "loss": 1.7366, + "step": 2493 + }, + { + "epoch": 0.2630801687763713, + "grad_norm": 0.6451035737991333, + "learning_rate": 0.0012708834020044076, + "loss": 1.6752, + "step": 2494 + }, + { + "epoch": 0.2631856540084388, + "grad_norm": 0.5263962745666504, + "learning_rate": 0.0012707027399775429, + "loss": 1.6862, + "step": 2495 + }, + { + "epoch": 0.26329113924050634, + "grad_norm": 0.7411438822746277, + "learning_rate": 0.0012705220196033396, + "loss": 1.6847, + "step": 2496 + }, + { + "epoch": 0.26339662447257384, + "grad_norm": 0.6719682812690735, + "learning_rate": 0.0012703412409020484, + "loss": 1.7328, + "step": 2497 + }, + { + "epoch": 0.26350210970464133, + "grad_norm": 0.6424457430839539, + "learning_rate": 0.0012701604038939268, + "loss": 1.6827, + "step": 2498 + }, + { + "epoch": 0.2636075949367089, + "grad_norm": 0.6699061989784241, + "learning_rate": 0.0012699795085992379, + "loss": 1.6995, + "step": 2499 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.5989509224891663, + "learning_rate": 0.001269798555038252, + "loss": 1.6725, + "step": 2500 + }, + { + "epoch": 0.26381856540084386, + "grad_norm": 0.5565215349197388, + "learning_rate": 0.0012696175432312465, + "loss": 1.6849, + "step": 2501 + }, + { + "epoch": 0.2639240506329114, + "grad_norm": 0.5648347735404968, + "learning_rate": 0.0012694364731985041, + "loss": 1.7197, + "step": 2502 + }, + { + "epoch": 0.2640295358649789, + "grad_norm": 0.6392796039581299, + "learning_rate": 0.0012692553449603148, + "loss": 1.6939, + "step": 2503 + }, + { + "epoch": 0.2641350210970464, + "grad_norm": 0.51346755027771, + "learning_rate": 0.0012690741585369748, + "loss": 1.6638, + "step": 2504 + }, + { + "epoch": 0.26424050632911394, + "grad_norm": 0.5971677899360657, + "learning_rate": 0.0012688929139487869, + "loss": 1.721, + "step": 2505 + }, + { + "epoch": 0.26434599156118144, + "grad_norm": 0.5425843596458435, + "learning_rate": 0.0012687116112160607, + "loss": 1.6725, + "step": 2506 + }, + { + "epoch": 0.26445147679324893, + "grad_norm": 0.5622197985649109, + "learning_rate": 0.0012685302503591118, + "loss": 1.7045, + "step": 2507 + }, + { + "epoch": 0.2645569620253165, + "grad_norm": 0.5386444330215454, + "learning_rate": 0.0012683488313982628, + "loss": 1.676, + "step": 2508 + }, + { + "epoch": 0.26466244725738397, + "grad_norm": 0.590096116065979, + "learning_rate": 0.0012681673543538427, + "loss": 1.6706, + "step": 2509 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.5340452194213867, + "learning_rate": 0.0012679858192461864, + "loss": 1.7178, + "step": 2510 + }, + { + "epoch": 0.264873417721519, + "grad_norm": 0.5690313577651978, + "learning_rate": 0.0012678042260956363, + "loss": 1.7246, + "step": 2511 + }, + { + "epoch": 0.2649789029535865, + "grad_norm": 0.5737891793251038, + "learning_rate": 0.0012676225749225407, + "loss": 1.7117, + "step": 2512 + }, + { + "epoch": 0.265084388185654, + "grad_norm": 0.5570858716964722, + "learning_rate": 0.0012674408657472542, + "loss": 1.6683, + "step": 2513 + }, + { + "epoch": 0.26518987341772154, + "grad_norm": 0.5033515095710754, + "learning_rate": 0.0012672590985901386, + "loss": 1.6842, + "step": 2514 + }, + { + "epoch": 0.26529535864978904, + "grad_norm": 0.5373754501342773, + "learning_rate": 0.001267077273471562, + "loss": 1.6779, + "step": 2515 + }, + { + "epoch": 0.26540084388185653, + "grad_norm": 0.5224241614341736, + "learning_rate": 0.0012668953904118984, + "loss": 1.6951, + "step": 2516 + }, + { + "epoch": 0.2655063291139241, + "grad_norm": 0.6071369051933289, + "learning_rate": 0.001266713449431529, + "loss": 1.6985, + "step": 2517 + }, + { + "epoch": 0.26561181434599157, + "grad_norm": 0.5893866419792175, + "learning_rate": 0.0012665314505508406, + "loss": 1.7087, + "step": 2518 + }, + { + "epoch": 0.26571729957805906, + "grad_norm": 0.5112670063972473, + "learning_rate": 0.0012663493937902278, + "loss": 1.7032, + "step": 2519 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.7329529523849487, + "learning_rate": 0.0012661672791700906, + "loss": 1.7033, + "step": 2520 + }, + { + "epoch": 0.2659282700421941, + "grad_norm": 0.7515628337860107, + "learning_rate": 0.001265985106710836, + "loss": 1.6972, + "step": 2521 + }, + { + "epoch": 0.2660337552742616, + "grad_norm": 0.6171714663505554, + "learning_rate": 0.0012658028764328771, + "loss": 1.6879, + "step": 2522 + }, + { + "epoch": 0.2661392405063291, + "grad_norm": 0.5739088654518127, + "learning_rate": 0.0012656205883566339, + "loss": 1.7094, + "step": 2523 + }, + { + "epoch": 0.26624472573839664, + "grad_norm": 0.5855562090873718, + "learning_rate": 0.0012654382425025328, + "loss": 1.6475, + "step": 2524 + }, + { + "epoch": 0.26635021097046413, + "grad_norm": 0.6361724138259888, + "learning_rate": 0.0012652558388910062, + "loss": 1.6626, + "step": 2525 + }, + { + "epoch": 0.2664556962025316, + "grad_norm": 0.6156182289123535, + "learning_rate": 0.0012650733775424938, + "loss": 1.6667, + "step": 2526 + }, + { + "epoch": 0.26656118143459917, + "grad_norm": 0.6345229148864746, + "learning_rate": 0.001264890858477441, + "loss": 1.6956, + "step": 2527 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.6074820160865784, + "learning_rate": 0.0012647082817162998, + "loss": 1.6866, + "step": 2528 + }, + { + "epoch": 0.26677215189873416, + "grad_norm": 0.5590560436248779, + "learning_rate": 0.0012645256472795295, + "loss": 1.7783, + "step": 2529 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.6990056037902832, + "learning_rate": 0.0012643429551875945, + "loss": 1.6818, + "step": 2530 + }, + { + "epoch": 0.2669831223628692, + "grad_norm": 0.5664899349212646, + "learning_rate": 0.0012641602054609662, + "loss": 1.6888, + "step": 2531 + }, + { + "epoch": 0.2670886075949367, + "grad_norm": 0.80857914686203, + "learning_rate": 0.0012639773981201238, + "loss": 1.7014, + "step": 2532 + }, + { + "epoch": 0.26719409282700424, + "grad_norm": 0.7952802777290344, + "learning_rate": 0.0012637945331855506, + "loss": 1.6336, + "step": 2533 + }, + { + "epoch": 0.26729957805907173, + "grad_norm": 0.609452486038208, + "learning_rate": 0.0012636116106777382, + "loss": 1.6778, + "step": 2534 + }, + { + "epoch": 0.2674050632911392, + "grad_norm": 0.7567176222801208, + "learning_rate": 0.0012634286306171835, + "loss": 1.7445, + "step": 2535 + }, + { + "epoch": 0.26751054852320677, + "grad_norm": 0.6835464835166931, + "learning_rate": 0.0012632455930243907, + "loss": 1.691, + "step": 2536 + }, + { + "epoch": 0.26761603375527426, + "grad_norm": 0.6629877686500549, + "learning_rate": 0.0012630624979198697, + "loss": 1.6911, + "step": 2537 + }, + { + "epoch": 0.26772151898734176, + "grad_norm": 0.6416367292404175, + "learning_rate": 0.0012628793453241377, + "loss": 1.7341, + "step": 2538 + }, + { + "epoch": 0.2678270042194093, + "grad_norm": 0.6362160444259644, + "learning_rate": 0.0012626961352577174, + "loss": 1.6965, + "step": 2539 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.7144106030464172, + "learning_rate": 0.0012625128677411388, + "loss": 1.6858, + "step": 2540 + }, + { + "epoch": 0.2680379746835443, + "grad_norm": 0.6249597072601318, + "learning_rate": 0.0012623295427949377, + "loss": 1.6831, + "step": 2541 + }, + { + "epoch": 0.26814345991561184, + "grad_norm": 0.6934325098991394, + "learning_rate": 0.0012621461604396566, + "loss": 1.7127, + "step": 2542 + }, + { + "epoch": 0.26824894514767933, + "grad_norm": 0.6014187335968018, + "learning_rate": 0.0012619627206958445, + "loss": 1.6769, + "step": 2543 + }, + { + "epoch": 0.2683544303797468, + "grad_norm": 0.7802491784095764, + "learning_rate": 0.0012617792235840564, + "loss": 1.6614, + "step": 2544 + }, + { + "epoch": 0.26845991561181437, + "grad_norm": 0.690214216709137, + "learning_rate": 0.0012615956691248544, + "loss": 1.7001, + "step": 2545 + }, + { + "epoch": 0.26856540084388186, + "grad_norm": 0.5628810524940491, + "learning_rate": 0.001261412057338807, + "loss": 1.6854, + "step": 2546 + }, + { + "epoch": 0.26867088607594936, + "grad_norm": 0.6147732138633728, + "learning_rate": 0.0012612283882464882, + "loss": 1.677, + "step": 2547 + }, + { + "epoch": 0.2687763713080169, + "grad_norm": 0.6269325613975525, + "learning_rate": 0.0012610446618684793, + "loss": 1.6973, + "step": 2548 + }, + { + "epoch": 0.2688818565400844, + "grad_norm": 0.6146154999732971, + "learning_rate": 0.0012608608782253676, + "loss": 1.6908, + "step": 2549 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.6477011442184448, + "learning_rate": 0.0012606770373377475, + "loss": 1.6905, + "step": 2550 + }, + { + "epoch": 0.26909282700421944, + "grad_norm": 0.5687825083732605, + "learning_rate": 0.0012604931392262186, + "loss": 1.7097, + "step": 2551 + }, + { + "epoch": 0.26919831223628693, + "grad_norm": 0.7852251529693604, + "learning_rate": 0.001260309183911388, + "loss": 1.7205, + "step": 2552 + }, + { + "epoch": 0.2693037974683544, + "grad_norm": 0.7204083800315857, + "learning_rate": 0.0012601251714138683, + "loss": 1.7336, + "step": 2553 + }, + { + "epoch": 0.2694092827004219, + "grad_norm": 0.8140206336975098, + "learning_rate": 0.0012599411017542798, + "loss": 1.6925, + "step": 2554 + }, + { + "epoch": 0.26951476793248946, + "grad_norm": 0.62337726354599, + "learning_rate": 0.0012597569749532482, + "loss": 1.693, + "step": 2555 + }, + { + "epoch": 0.26962025316455696, + "grad_norm": 0.8065971732139587, + "learning_rate": 0.0012595727910314056, + "loss": 1.7095, + "step": 2556 + }, + { + "epoch": 0.26972573839662445, + "grad_norm": 0.6344736218452454, + "learning_rate": 0.0012593885500093906, + "loss": 1.6836, + "step": 2557 + }, + { + "epoch": 0.269831223628692, + "grad_norm": 0.6898514032363892, + "learning_rate": 0.0012592042519078486, + "loss": 1.7043, + "step": 2558 + }, + { + "epoch": 0.2699367088607595, + "grad_norm": 0.7425501942634583, + "learning_rate": 0.0012590198967474312, + "loss": 1.7043, + "step": 2559 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.5536888837814331, + "learning_rate": 0.0012588354845487959, + "loss": 1.6442, + "step": 2560 + }, + { + "epoch": 0.27014767932489453, + "grad_norm": 0.6948987245559692, + "learning_rate": 0.0012586510153326075, + "loss": 1.6813, + "step": 2561 + }, + { + "epoch": 0.270253164556962, + "grad_norm": 0.6307619214057922, + "learning_rate": 0.0012584664891195365, + "loss": 1.7105, + "step": 2562 + }, + { + "epoch": 0.2703586497890295, + "grad_norm": 0.5878195762634277, + "learning_rate": 0.0012582819059302598, + "loss": 1.7345, + "step": 2563 + }, + { + "epoch": 0.27046413502109706, + "grad_norm": 0.5436904430389404, + "learning_rate": 0.001258097265785461, + "loss": 1.684, + "step": 2564 + }, + { + "epoch": 0.27056962025316456, + "grad_norm": 0.6475735306739807, + "learning_rate": 0.0012579125687058302, + "loss": 1.6598, + "step": 2565 + }, + { + "epoch": 0.27067510548523205, + "grad_norm": 0.5867934226989746, + "learning_rate": 0.0012577278147120632, + "loss": 1.6679, + "step": 2566 + }, + { + "epoch": 0.2707805907172996, + "grad_norm": 0.6429018378257751, + "learning_rate": 0.0012575430038248628, + "loss": 1.7199, + "step": 2567 + }, + { + "epoch": 0.2708860759493671, + "grad_norm": 0.6311723589897156, + "learning_rate": 0.001257358136064938, + "loss": 1.6943, + "step": 2568 + }, + { + "epoch": 0.2709915611814346, + "grad_norm": 0.5671003460884094, + "learning_rate": 0.001257173211453004, + "loss": 1.6365, + "step": 2569 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.6381908059120178, + "learning_rate": 0.001256988230009783, + "loss": 1.7208, + "step": 2570 + }, + { + "epoch": 0.2712025316455696, + "grad_norm": 0.5454816222190857, + "learning_rate": 0.0012568031917560027, + "loss": 1.6544, + "step": 2571 + }, + { + "epoch": 0.2713080168776371, + "grad_norm": 0.7180166840553284, + "learning_rate": 0.0012566180967123976, + "loss": 1.6917, + "step": 2572 + }, + { + "epoch": 0.27141350210970466, + "grad_norm": 0.5464669466018677, + "learning_rate": 0.0012564329448997082, + "loss": 1.6914, + "step": 2573 + }, + { + "epoch": 0.27151898734177216, + "grad_norm": 0.6009981036186218, + "learning_rate": 0.0012562477363386821, + "loss": 1.6836, + "step": 2574 + }, + { + "epoch": 0.27162447257383965, + "grad_norm": 0.6100663542747498, + "learning_rate": 0.0012560624710500731, + "loss": 1.689, + "step": 2575 + }, + { + "epoch": 0.2717299578059072, + "grad_norm": 0.5436024069786072, + "learning_rate": 0.0012558771490546407, + "loss": 1.6568, + "step": 2576 + }, + { + "epoch": 0.2718354430379747, + "grad_norm": 0.5978066325187683, + "learning_rate": 0.0012556917703731509, + "loss": 1.7067, + "step": 2577 + }, + { + "epoch": 0.2719409282700422, + "grad_norm": 0.5382156372070312, + "learning_rate": 0.0012555063350263768, + "loss": 1.7412, + "step": 2578 + }, + { + "epoch": 0.27204641350210973, + "grad_norm": 0.5682050585746765, + "learning_rate": 0.0012553208430350973, + "loss": 1.6953, + "step": 2579 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.5240630507469177, + "learning_rate": 0.0012551352944200976, + "loss": 1.6877, + "step": 2580 + }, + { + "epoch": 0.2722573839662447, + "grad_norm": 0.5486544370651245, + "learning_rate": 0.0012549496892021693, + "loss": 1.6745, + "step": 2581 + }, + { + "epoch": 0.27236286919831226, + "grad_norm": 0.6045501828193665, + "learning_rate": 0.0012547640274021103, + "loss": 1.7006, + "step": 2582 + }, + { + "epoch": 0.27246835443037976, + "grad_norm": 0.5818844437599182, + "learning_rate": 0.001254578309040725, + "loss": 1.6937, + "step": 2583 + }, + { + "epoch": 0.27257383966244725, + "grad_norm": 0.5760286450386047, + "learning_rate": 0.001254392534138824, + "loss": 1.6943, + "step": 2584 + }, + { + "epoch": 0.27267932489451474, + "grad_norm": 0.6523585319519043, + "learning_rate": 0.0012542067027172248, + "loss": 1.7163, + "step": 2585 + }, + { + "epoch": 0.2727848101265823, + "grad_norm": 0.6774278879165649, + "learning_rate": 0.0012540208147967503, + "loss": 1.6853, + "step": 2586 + }, + { + "epoch": 0.2728902953586498, + "grad_norm": 0.7064288258552551, + "learning_rate": 0.00125383487039823, + "loss": 1.7072, + "step": 2587 + }, + { + "epoch": 0.2729957805907173, + "grad_norm": 0.5452187061309814, + "learning_rate": 0.0012536488695425003, + "loss": 1.6971, + "step": 2588 + }, + { + "epoch": 0.2731012658227848, + "grad_norm": 0.5777950882911682, + "learning_rate": 0.0012534628122504031, + "loss": 1.6888, + "step": 2589 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.553745448589325, + "learning_rate": 0.0012532766985427874, + "loss": 1.6955, + "step": 2590 + }, + { + "epoch": 0.2733122362869198, + "grad_norm": 0.5644375085830688, + "learning_rate": 0.0012530905284405083, + "loss": 1.6899, + "step": 2591 + }, + { + "epoch": 0.27341772151898736, + "grad_norm": 0.5709169507026672, + "learning_rate": 0.0012529043019644266, + "loss": 1.7093, + "step": 2592 + }, + { + "epoch": 0.27352320675105485, + "grad_norm": 0.562324047088623, + "learning_rate": 0.0012527180191354104, + "loss": 1.6926, + "step": 2593 + }, + { + "epoch": 0.27362869198312234, + "grad_norm": 0.6052801609039307, + "learning_rate": 0.0012525316799743332, + "loss": 1.6967, + "step": 2594 + }, + { + "epoch": 0.2737341772151899, + "grad_norm": 0.6023877263069153, + "learning_rate": 0.0012523452845020755, + "loss": 1.7248, + "step": 2595 + }, + { + "epoch": 0.2738396624472574, + "grad_norm": 0.567111074924469, + "learning_rate": 0.0012521588327395236, + "loss": 1.6998, + "step": 2596 + }, + { + "epoch": 0.2739451476793249, + "grad_norm": 0.6063639521598816, + "learning_rate": 0.0012519723247075706, + "loss": 1.6763, + "step": 2597 + }, + { + "epoch": 0.2740506329113924, + "grad_norm": 0.5742048621177673, + "learning_rate": 0.0012517857604271156, + "loss": 1.6581, + "step": 2598 + }, + { + "epoch": 0.2741561181434599, + "grad_norm": 0.643480658531189, + "learning_rate": 0.001251599139919064, + "loss": 1.6764, + "step": 2599 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.5527203679084778, + "learning_rate": 0.0012514124632043272, + "loss": 1.7175, + "step": 2600 + }, + { + "epoch": 0.27436708860759496, + "grad_norm": 0.5569000244140625, + "learning_rate": 0.001251225730303824, + "loss": 1.6745, + "step": 2601 + }, + { + "epoch": 0.27447257383966245, + "grad_norm": 0.544506847858429, + "learning_rate": 0.0012510389412384785, + "loss": 1.6876, + "step": 2602 + }, + { + "epoch": 0.27457805907172994, + "grad_norm": 0.5856977105140686, + "learning_rate": 0.001250852096029221, + "loss": 1.6258, + "step": 2603 + }, + { + "epoch": 0.2746835443037975, + "grad_norm": 0.6928072571754456, + "learning_rate": 0.0012506651946969888, + "loss": 1.7054, + "step": 2604 + }, + { + "epoch": 0.274789029535865, + "grad_norm": 0.6647782325744629, + "learning_rate": 0.0012504782372627248, + "loss": 1.7265, + "step": 2605 + }, + { + "epoch": 0.2748945147679325, + "grad_norm": 0.5459656119346619, + "learning_rate": 0.0012502912237473789, + "loss": 1.7204, + "step": 2606 + }, + { + "epoch": 0.275, + "grad_norm": 0.7399492859840393, + "learning_rate": 0.0012501041541719067, + "loss": 1.7054, + "step": 2607 + }, + { + "epoch": 0.2751054852320675, + "grad_norm": 0.6629577279090881, + "learning_rate": 0.0012499170285572702, + "loss": 1.6887, + "step": 2608 + }, + { + "epoch": 0.275210970464135, + "grad_norm": 0.630962073802948, + "learning_rate": 0.0012497298469244377, + "loss": 1.7162, + "step": 2609 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.6828775405883789, + "learning_rate": 0.0012495426092943842, + "loss": 1.7018, + "step": 2610 + }, + { + "epoch": 0.27542194092827005, + "grad_norm": 0.5524483919143677, + "learning_rate": 0.0012493553156880904, + "loss": 1.7087, + "step": 2611 + }, + { + "epoch": 0.27552742616033754, + "grad_norm": 0.7574877738952637, + "learning_rate": 0.0012491679661265434, + "loss": 1.7096, + "step": 2612 + }, + { + "epoch": 0.2756329113924051, + "grad_norm": 0.6407290101051331, + "learning_rate": 0.0012489805606307367, + "loss": 1.7247, + "step": 2613 + }, + { + "epoch": 0.2757383966244726, + "grad_norm": 0.692075788974762, + "learning_rate": 0.00124879309922167, + "loss": 1.6878, + "step": 2614 + }, + { + "epoch": 0.2758438818565401, + "grad_norm": 0.6014193296432495, + "learning_rate": 0.0012486055819203494, + "loss": 1.6953, + "step": 2615 + }, + { + "epoch": 0.2759493670886076, + "grad_norm": 0.6246918439865112, + "learning_rate": 0.001248418008747787, + "loss": 1.6784, + "step": 2616 + }, + { + "epoch": 0.2760548523206751, + "grad_norm": 0.6020075678825378, + "learning_rate": 0.0012482303797250014, + "loss": 1.6803, + "step": 2617 + }, + { + "epoch": 0.2761603375527426, + "grad_norm": 0.6157186031341553, + "learning_rate": 0.0012480426948730174, + "loss": 1.6621, + "step": 2618 + }, + { + "epoch": 0.2762658227848101, + "grad_norm": 0.5981223583221436, + "learning_rate": 0.001247854954212866, + "loss": 1.6934, + "step": 2619 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.5560953617095947, + "learning_rate": 0.0012476671577655845, + "loss": 1.6753, + "step": 2620 + }, + { + "epoch": 0.27647679324894514, + "grad_norm": 0.6896724700927734, + "learning_rate": 0.001247479305552216, + "loss": 1.6635, + "step": 2621 + }, + { + "epoch": 0.27658227848101263, + "grad_norm": 0.5547972321510315, + "learning_rate": 0.001247291397593811, + "loss": 1.6496, + "step": 2622 + }, + { + "epoch": 0.2766877637130802, + "grad_norm": 0.6868300437927246, + "learning_rate": 0.001247103433911425, + "loss": 1.6703, + "step": 2623 + }, + { + "epoch": 0.2767932489451477, + "grad_norm": 0.6352007389068604, + "learning_rate": 0.0012469154145261208, + "loss": 1.7156, + "step": 2624 + }, + { + "epoch": 0.27689873417721517, + "grad_norm": 0.694233775138855, + "learning_rate": 0.0012467273394589664, + "loss": 1.669, + "step": 2625 + }, + { + "epoch": 0.2770042194092827, + "grad_norm": 0.6209673285484314, + "learning_rate": 0.0012465392087310366, + "loss": 1.6746, + "step": 2626 + }, + { + "epoch": 0.2771097046413502, + "grad_norm": 0.641914963722229, + "learning_rate": 0.0012463510223634125, + "loss": 1.6668, + "step": 2627 + }, + { + "epoch": 0.2772151898734177, + "grad_norm": 0.59421306848526, + "learning_rate": 0.0012461627803771812, + "loss": 1.6996, + "step": 2628 + }, + { + "epoch": 0.27732067510548525, + "grad_norm": 0.6244337558746338, + "learning_rate": 0.0012459744827934367, + "loss": 1.6683, + "step": 2629 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.6171361804008484, + "learning_rate": 0.0012457861296332774, + "loss": 1.7212, + "step": 2630 + }, + { + "epoch": 0.27753164556962023, + "grad_norm": 0.6960135698318481, + "learning_rate": 0.0012455977209178109, + "loss": 1.7024, + "step": 2631 + }, + { + "epoch": 0.2776371308016878, + "grad_norm": 0.7112461924552917, + "learning_rate": 0.0012454092566681482, + "loss": 1.7013, + "step": 2632 + }, + { + "epoch": 0.2777426160337553, + "grad_norm": 0.7156234383583069, + "learning_rate": 0.001245220736905408, + "loss": 1.6569, + "step": 2633 + }, + { + "epoch": 0.27784810126582277, + "grad_norm": 0.8146398663520813, + "learning_rate": 0.0012450321616507148, + "loss": 1.7004, + "step": 2634 + }, + { + "epoch": 0.2779535864978903, + "grad_norm": 0.9371531009674072, + "learning_rate": 0.0012448435309251995, + "loss": 1.7426, + "step": 2635 + }, + { + "epoch": 0.2780590717299578, + "grad_norm": 0.5685480833053589, + "learning_rate": 0.001244654844749999, + "loss": 1.6829, + "step": 2636 + }, + { + "epoch": 0.2781645569620253, + "grad_norm": 0.9463776350021362, + "learning_rate": 0.0012444661031462566, + "loss": 1.7211, + "step": 2637 + }, + { + "epoch": 0.27827004219409285, + "grad_norm": 0.5890041589736938, + "learning_rate": 0.0012442773061351216, + "loss": 1.6681, + "step": 2638 + }, + { + "epoch": 0.27837552742616034, + "grad_norm": 0.731614351272583, + "learning_rate": 0.0012440884537377498, + "loss": 1.6885, + "step": 2639 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.6338502764701843, + "learning_rate": 0.001243899545975303, + "loss": 1.672, + "step": 2640 + }, + { + "epoch": 0.2785864978902954, + "grad_norm": 0.6155173182487488, + "learning_rate": 0.0012437105828689494, + "loss": 1.7065, + "step": 2641 + }, + { + "epoch": 0.2786919831223629, + "grad_norm": 0.6363487839698792, + "learning_rate": 0.0012435215644398632, + "loss": 1.657, + "step": 2642 + }, + { + "epoch": 0.27879746835443037, + "grad_norm": 0.5811752080917358, + "learning_rate": 0.0012433324907092243, + "loss": 1.6731, + "step": 2643 + }, + { + "epoch": 0.2789029535864979, + "grad_norm": 0.6810529828071594, + "learning_rate": 0.0012431433616982204, + "loss": 1.6507, + "step": 2644 + }, + { + "epoch": 0.2790084388185654, + "grad_norm": 0.7061052918434143, + "learning_rate": 0.0012429541774280435, + "loss": 1.7259, + "step": 2645 + }, + { + "epoch": 0.2791139240506329, + "grad_norm": 0.7216823101043701, + "learning_rate": 0.0012427649379198932, + "loss": 1.6885, + "step": 2646 + }, + { + "epoch": 0.27921940928270045, + "grad_norm": 0.8095259070396423, + "learning_rate": 0.0012425756431949742, + "loss": 1.6868, + "step": 2647 + }, + { + "epoch": 0.27932489451476794, + "grad_norm": 0.7158058881759644, + "learning_rate": 0.001242386293274498, + "loss": 1.703, + "step": 2648 + }, + { + "epoch": 0.27943037974683543, + "grad_norm": 0.5889517664909363, + "learning_rate": 0.0012421968881796827, + "loss": 1.6859, + "step": 2649 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.6008945107460022, + "learning_rate": 0.0012420074279317515, + "loss": 1.6866, + "step": 2650 + }, + { + "epoch": 0.2796413502109705, + "grad_norm": 0.6893994808197021, + "learning_rate": 0.001241817912551935, + "loss": 1.6606, + "step": 2651 + }, + { + "epoch": 0.27974683544303797, + "grad_norm": 0.5562378168106079, + "learning_rate": 0.0012416283420614686, + "loss": 1.6937, + "step": 2652 + }, + { + "epoch": 0.27985232067510546, + "grad_norm": 0.5898458957672119, + "learning_rate": 0.0012414387164815953, + "loss": 1.6962, + "step": 2653 + }, + { + "epoch": 0.279957805907173, + "grad_norm": 0.5935140252113342, + "learning_rate": 0.001241249035833563, + "loss": 1.7078, + "step": 2654 + }, + { + "epoch": 0.2800632911392405, + "grad_norm": 0.6966859102249146, + "learning_rate": 0.0012410593001386267, + "loss": 1.6803, + "step": 2655 + }, + { + "epoch": 0.280168776371308, + "grad_norm": 0.5535063743591309, + "learning_rate": 0.0012408695094180474, + "loss": 1.6802, + "step": 2656 + }, + { + "epoch": 0.28027426160337554, + "grad_norm": 0.7659163475036621, + "learning_rate": 0.0012406796636930918, + "loss": 1.6627, + "step": 2657 + }, + { + "epoch": 0.28037974683544303, + "grad_norm": 0.6712785959243774, + "learning_rate": 0.001240489762985033, + "loss": 1.6285, + "step": 2658 + }, + { + "epoch": 0.2804852320675105, + "grad_norm": 0.5330636501312256, + "learning_rate": 0.0012402998073151505, + "loss": 1.6937, + "step": 2659 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.5921744704246521, + "learning_rate": 0.0012401097967047298, + "loss": 1.6802, + "step": 2660 + }, + { + "epoch": 0.28069620253164557, + "grad_norm": 0.5357014536857605, + "learning_rate": 0.0012399197311750623, + "loss": 1.7003, + "step": 2661 + }, + { + "epoch": 0.28080168776371306, + "grad_norm": 0.549491822719574, + "learning_rate": 0.001239729610747446, + "loss": 1.6902, + "step": 2662 + }, + { + "epoch": 0.2809071729957806, + "grad_norm": 0.5747614502906799, + "learning_rate": 0.001239539435443185, + "loss": 1.6688, + "step": 2663 + }, + { + "epoch": 0.2810126582278481, + "grad_norm": 0.5285831093788147, + "learning_rate": 0.001239349205283589, + "loss": 1.6681, + "step": 2664 + }, + { + "epoch": 0.2811181434599156, + "grad_norm": 0.5653184652328491, + "learning_rate": 0.0012391589202899746, + "loss": 1.6765, + "step": 2665 + }, + { + "epoch": 0.28122362869198314, + "grad_norm": 0.5377151966094971, + "learning_rate": 0.001238968580483664, + "loss": 1.6761, + "step": 2666 + }, + { + "epoch": 0.28132911392405063, + "grad_norm": 0.528624951839447, + "learning_rate": 0.0012387781858859857, + "loss": 1.674, + "step": 2667 + }, + { + "epoch": 0.2814345991561181, + "grad_norm": 0.6506520509719849, + "learning_rate": 0.0012385877365182743, + "loss": 1.675, + "step": 2668 + }, + { + "epoch": 0.2815400843881857, + "grad_norm": 0.6475247740745544, + "learning_rate": 0.0012383972324018708, + "loss": 1.6783, + "step": 2669 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.5971980094909668, + "learning_rate": 0.001238206673558122, + "loss": 1.6778, + "step": 2670 + }, + { + "epoch": 0.28175105485232066, + "grad_norm": 0.5795575976371765, + "learning_rate": 0.001238016060008381, + "loss": 1.7045, + "step": 2671 + }, + { + "epoch": 0.2818565400843882, + "grad_norm": 0.7014961242675781, + "learning_rate": 0.0012378253917740072, + "loss": 1.693, + "step": 2672 + }, + { + "epoch": 0.2819620253164557, + "grad_norm": 0.6326607465744019, + "learning_rate": 0.0012376346688763656, + "loss": 1.7248, + "step": 2673 + }, + { + "epoch": 0.2820675105485232, + "grad_norm": 0.6034587025642395, + "learning_rate": 0.0012374438913368277, + "loss": 1.6599, + "step": 2674 + }, + { + "epoch": 0.28217299578059074, + "grad_norm": 0.8968172669410706, + "learning_rate": 0.0012372530591767711, + "loss": 1.6814, + "step": 2675 + }, + { + "epoch": 0.28227848101265823, + "grad_norm": 0.8130230903625488, + "learning_rate": 0.0012370621724175797, + "loss": 1.6961, + "step": 2676 + }, + { + "epoch": 0.2823839662447257, + "grad_norm": 0.5473079681396484, + "learning_rate": 0.0012368712310806432, + "loss": 1.6523, + "step": 2677 + }, + { + "epoch": 0.2824894514767933, + "grad_norm": 0.6694064140319824, + "learning_rate": 0.0012366802351873574, + "loss": 1.6469, + "step": 2678 + }, + { + "epoch": 0.28259493670886077, + "grad_norm": 0.5947160124778748, + "learning_rate": 0.0012364891847591246, + "loss": 1.6816, + "step": 2679 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.5875632762908936, + "learning_rate": 0.0012362980798173526, + "loss": 1.6671, + "step": 2680 + }, + { + "epoch": 0.2828059071729958, + "grad_norm": 0.580256998538971, + "learning_rate": 0.0012361069203834561, + "loss": 1.6502, + "step": 2681 + }, + { + "epoch": 0.2829113924050633, + "grad_norm": 0.5766868591308594, + "learning_rate": 0.0012359157064788548, + "loss": 1.6396, + "step": 2682 + }, + { + "epoch": 0.2830168776371308, + "grad_norm": 0.7465797662734985, + "learning_rate": 0.0012357244381249759, + "loss": 1.7034, + "step": 2683 + }, + { + "epoch": 0.2831223628691983, + "grad_norm": 0.8193070292472839, + "learning_rate": 0.0012355331153432517, + "loss": 1.694, + "step": 2684 + }, + { + "epoch": 0.28322784810126583, + "grad_norm": 0.541053056716919, + "learning_rate": 0.0012353417381551206, + "loss": 1.6568, + "step": 2685 + }, + { + "epoch": 0.2833333333333333, + "grad_norm": 0.8485811352729797, + "learning_rate": 0.001235150306582028, + "loss": 1.6765, + "step": 2686 + }, + { + "epoch": 0.2834388185654008, + "grad_norm": 0.6827322840690613, + "learning_rate": 0.001234958820645424, + "loss": 1.6859, + "step": 2687 + }, + { + "epoch": 0.28354430379746837, + "grad_norm": 0.666796863079071, + "learning_rate": 0.0012347672803667662, + "loss": 1.6753, + "step": 2688 + }, + { + "epoch": 0.28364978902953586, + "grad_norm": 0.7107564806938171, + "learning_rate": 0.0012345756857675171, + "loss": 1.6868, + "step": 2689 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.5842573642730713, + "learning_rate": 0.0012343840368691462, + "loss": 1.7218, + "step": 2690 + }, + { + "epoch": 0.2838607594936709, + "grad_norm": 0.6761600971221924, + "learning_rate": 0.0012341923336931287, + "loss": 1.6956, + "step": 2691 + }, + { + "epoch": 0.2839662447257384, + "grad_norm": 0.5997446775436401, + "learning_rate": 0.0012340005762609457, + "loss": 1.6709, + "step": 2692 + }, + { + "epoch": 0.2840717299578059, + "grad_norm": 0.5958675146102905, + "learning_rate": 0.0012338087645940847, + "loss": 1.7034, + "step": 2693 + }, + { + "epoch": 0.28417721518987343, + "grad_norm": 0.5672042965888977, + "learning_rate": 0.001233616898714039, + "loss": 1.643, + "step": 2694 + }, + { + "epoch": 0.2842827004219409, + "grad_norm": 0.6887109279632568, + "learning_rate": 0.0012334249786423086, + "loss": 1.6973, + "step": 2695 + }, + { + "epoch": 0.2843881856540084, + "grad_norm": 0.7083683609962463, + "learning_rate": 0.0012332330044003987, + "loss": 1.722, + "step": 2696 + }, + { + "epoch": 0.28449367088607597, + "grad_norm": 0.5679346919059753, + "learning_rate": 0.0012330409760098208, + "loss": 1.6537, + "step": 2697 + }, + { + "epoch": 0.28459915611814346, + "grad_norm": 0.6824039220809937, + "learning_rate": 0.0012328488934920932, + "loss": 1.6775, + "step": 2698 + }, + { + "epoch": 0.28470464135021095, + "grad_norm": 0.5341326594352722, + "learning_rate": 0.001232656756868739, + "loss": 1.6646, + "step": 2699 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.6128337979316711, + "learning_rate": 0.0012324645661612886, + "loss": 1.6407, + "step": 2700 + }, + { + "epoch": 0.284915611814346, + "grad_norm": 0.6229696273803711, + "learning_rate": 0.001232272321391278, + "loss": 1.7092, + "step": 2701 + }, + { + "epoch": 0.2850210970464135, + "grad_norm": 0.6551245450973511, + "learning_rate": 0.0012320800225802488, + "loss": 1.6883, + "step": 2702 + }, + { + "epoch": 0.28512658227848103, + "grad_norm": 0.7303221225738525, + "learning_rate": 0.001231887669749749, + "loss": 1.6989, + "step": 2703 + }, + { + "epoch": 0.2852320675105485, + "grad_norm": 0.5958035588264465, + "learning_rate": 0.0012316952629213332, + "loss": 1.6623, + "step": 2704 + }, + { + "epoch": 0.285337552742616, + "grad_norm": 0.6378952860832214, + "learning_rate": 0.001231502802116561, + "loss": 1.6916, + "step": 2705 + }, + { + "epoch": 0.28544303797468357, + "grad_norm": 0.6452405452728271, + "learning_rate": 0.0012313102873569993, + "loss": 1.6828, + "step": 2706 + }, + { + "epoch": 0.28554852320675106, + "grad_norm": 0.615056037902832, + "learning_rate": 0.0012311177186642194, + "loss": 1.6785, + "step": 2707 + }, + { + "epoch": 0.28565400843881855, + "grad_norm": 0.6795971989631653, + "learning_rate": 0.0012309250960598, + "loss": 1.6582, + "step": 2708 + }, + { + "epoch": 0.2857594936708861, + "grad_norm": 0.6632322669029236, + "learning_rate": 0.0012307324195653256, + "loss": 1.6665, + "step": 2709 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.6145006418228149, + "learning_rate": 0.0012305396892023867, + "loss": 1.6834, + "step": 2710 + }, + { + "epoch": 0.2859704641350211, + "grad_norm": 0.7870566844940186, + "learning_rate": 0.0012303469049925791, + "loss": 1.6844, + "step": 2711 + }, + { + "epoch": 0.28607594936708863, + "grad_norm": 0.5621927380561829, + "learning_rate": 0.001230154066957506, + "loss": 1.7144, + "step": 2712 + }, + { + "epoch": 0.2861814345991561, + "grad_norm": 0.7531269192695618, + "learning_rate": 0.001229961175118775, + "loss": 1.6673, + "step": 2713 + }, + { + "epoch": 0.2862869198312236, + "grad_norm": 0.7207185626029968, + "learning_rate": 0.0012297682294980013, + "loss": 1.7091, + "step": 2714 + }, + { + "epoch": 0.28639240506329117, + "grad_norm": 0.6020079255104065, + "learning_rate": 0.0012295752301168048, + "loss": 1.6853, + "step": 2715 + }, + { + "epoch": 0.28649789029535866, + "grad_norm": 0.6886205077171326, + "learning_rate": 0.0012293821769968126, + "loss": 1.6794, + "step": 2716 + }, + { + "epoch": 0.28660337552742615, + "grad_norm": 0.6414914131164551, + "learning_rate": 0.001229189070159657, + "loss": 1.7127, + "step": 2717 + }, + { + "epoch": 0.28670886075949364, + "grad_norm": 0.6073722839355469, + "learning_rate": 0.0012289959096269767, + "loss": 1.6466, + "step": 2718 + }, + { + "epoch": 0.2868143459915612, + "grad_norm": 0.5999168753623962, + "learning_rate": 0.0012288026954204165, + "loss": 1.6462, + "step": 2719 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.6032271981239319, + "learning_rate": 0.0012286094275616264, + "loss": 1.6608, + "step": 2720 + }, + { + "epoch": 0.2870253164556962, + "grad_norm": 0.6154013276100159, + "learning_rate": 0.0012284161060722634, + "loss": 1.6608, + "step": 2721 + }, + { + "epoch": 0.2871308016877637, + "grad_norm": 0.5898162126541138, + "learning_rate": 0.00122822273097399, + "loss": 1.6523, + "step": 2722 + }, + { + "epoch": 0.2872362869198312, + "grad_norm": 0.6258148550987244, + "learning_rate": 0.0012280293022884753, + "loss": 1.6823, + "step": 2723 + }, + { + "epoch": 0.2873417721518987, + "grad_norm": 0.5832931399345398, + "learning_rate": 0.0012278358200373935, + "loss": 1.6292, + "step": 2724 + }, + { + "epoch": 0.28744725738396626, + "grad_norm": 0.5550974011421204, + "learning_rate": 0.001227642284242425, + "loss": 1.6624, + "step": 2725 + }, + { + "epoch": 0.28755274261603375, + "grad_norm": 0.5958783030509949, + "learning_rate": 0.0012274486949252572, + "loss": 1.6847, + "step": 2726 + }, + { + "epoch": 0.28765822784810124, + "grad_norm": 0.682315468788147, + "learning_rate": 0.0012272550521075824, + "loss": 1.6691, + "step": 2727 + }, + { + "epoch": 0.2877637130801688, + "grad_norm": 0.611675500869751, + "learning_rate": 0.0012270613558110993, + "loss": 1.6408, + "step": 2728 + }, + { + "epoch": 0.2878691983122363, + "grad_norm": 0.6126739382743835, + "learning_rate": 0.001226867606057512, + "loss": 1.6873, + "step": 2729 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.5880418419837952, + "learning_rate": 0.0012266738028685318, + "loss": 1.6716, + "step": 2730 + }, + { + "epoch": 0.2880801687763713, + "grad_norm": 0.576758623123169, + "learning_rate": 0.001226479946265875, + "loss": 1.6755, + "step": 2731 + }, + { + "epoch": 0.2881856540084388, + "grad_norm": 0.6275326013565063, + "learning_rate": 0.0012262860362712645, + "loss": 1.6825, + "step": 2732 + }, + { + "epoch": 0.2882911392405063, + "grad_norm": 0.52357017993927, + "learning_rate": 0.0012260920729064285, + "loss": 1.6773, + "step": 2733 + }, + { + "epoch": 0.28839662447257386, + "grad_norm": 0.5472039580345154, + "learning_rate": 0.0012258980561931016, + "loss": 1.6569, + "step": 2734 + }, + { + "epoch": 0.28850210970464135, + "grad_norm": 0.569543182849884, + "learning_rate": 0.0012257039861530246, + "loss": 1.6393, + "step": 2735 + }, + { + "epoch": 0.28860759493670884, + "grad_norm": 0.6465622186660767, + "learning_rate": 0.0012255098628079439, + "loss": 1.6473, + "step": 2736 + }, + { + "epoch": 0.2887130801687764, + "grad_norm": 0.6805784106254578, + "learning_rate": 0.0012253156861796119, + "loss": 1.6543, + "step": 2737 + }, + { + "epoch": 0.2888185654008439, + "grad_norm": 0.5665119290351868, + "learning_rate": 0.0012251214562897872, + "loss": 1.6846, + "step": 2738 + }, + { + "epoch": 0.2889240506329114, + "grad_norm": 0.6805335879325867, + "learning_rate": 0.0012249271731602342, + "loss": 1.7072, + "step": 2739 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.8240353465080261, + "learning_rate": 0.001224732836812723, + "loss": 1.7158, + "step": 2740 + }, + { + "epoch": 0.2891350210970464, + "grad_norm": 0.5985207557678223, + "learning_rate": 0.0012245384472690302, + "loss": 1.6648, + "step": 2741 + }, + { + "epoch": 0.2892405063291139, + "grad_norm": 0.5867387056350708, + "learning_rate": 0.0012243440045509384, + "loss": 1.6869, + "step": 2742 + }, + { + "epoch": 0.28934599156118146, + "grad_norm": 0.6460103988647461, + "learning_rate": 0.0012241495086802356, + "loss": 1.6488, + "step": 2743 + }, + { + "epoch": 0.28945147679324895, + "grad_norm": 0.49960753321647644, + "learning_rate": 0.0012239549596787158, + "loss": 1.6935, + "step": 2744 + }, + { + "epoch": 0.28955696202531644, + "grad_norm": 0.6306697726249695, + "learning_rate": 0.0012237603575681797, + "loss": 1.6839, + "step": 2745 + }, + { + "epoch": 0.289662447257384, + "grad_norm": 0.5812922120094299, + "learning_rate": 0.0012235657023704327, + "loss": 1.6839, + "step": 2746 + }, + { + "epoch": 0.2897679324894515, + "grad_norm": 0.5189002156257629, + "learning_rate": 0.001223370994107288, + "loss": 1.6561, + "step": 2747 + }, + { + "epoch": 0.289873417721519, + "grad_norm": 0.5860068798065186, + "learning_rate": 0.0012231762328005623, + "loss": 1.6843, + "step": 2748 + }, + { + "epoch": 0.28997890295358647, + "grad_norm": 0.5205662846565247, + "learning_rate": 0.0012229814184720805, + "loss": 1.7017, + "step": 2749 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.5461918115615845, + "learning_rate": 0.0012227865511436724, + "loss": 1.6738, + "step": 2750 + }, + { + "epoch": 0.2901898734177215, + "grad_norm": 0.6179672479629517, + "learning_rate": 0.0012225916308371736, + "loss": 1.6649, + "step": 2751 + }, + { + "epoch": 0.290295358649789, + "grad_norm": 0.5985779762268066, + "learning_rate": 0.001222396657574426, + "loss": 1.6915, + "step": 2752 + }, + { + "epoch": 0.29040084388185655, + "grad_norm": 0.56458979845047, + "learning_rate": 0.0012222016313772773, + "loss": 1.6619, + "step": 2753 + }, + { + "epoch": 0.29050632911392404, + "grad_norm": 0.6113935112953186, + "learning_rate": 0.0012220065522675811, + "loss": 1.6908, + "step": 2754 + }, + { + "epoch": 0.29061181434599154, + "grad_norm": 0.5407605767250061, + "learning_rate": 0.0012218114202671973, + "loss": 1.7121, + "step": 2755 + }, + { + "epoch": 0.2907172995780591, + "grad_norm": 0.7014201879501343, + "learning_rate": 0.001221616235397991, + "loss": 1.6901, + "step": 2756 + }, + { + "epoch": 0.2908227848101266, + "grad_norm": 0.5370446443557739, + "learning_rate": 0.001221420997681834, + "loss": 1.669, + "step": 2757 + }, + { + "epoch": 0.29092827004219407, + "grad_norm": 0.6033335328102112, + "learning_rate": 0.0012212257071406037, + "loss": 1.6772, + "step": 2758 + }, + { + "epoch": 0.2910337552742616, + "grad_norm": 0.5160382390022278, + "learning_rate": 0.0012210303637961828, + "loss": 1.7355, + "step": 2759 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.6546493768692017, + "learning_rate": 0.001220834967670461, + "loss": 1.683, + "step": 2760 + }, + { + "epoch": 0.2912447257383966, + "grad_norm": 0.589641273021698, + "learning_rate": 0.0012206395187853334, + "loss": 1.6965, + "step": 2761 + }, + { + "epoch": 0.29135021097046415, + "grad_norm": 0.5645790100097656, + "learning_rate": 0.0012204440171627005, + "loss": 1.7027, + "step": 2762 + }, + { + "epoch": 0.29145569620253164, + "grad_norm": 0.539150595664978, + "learning_rate": 0.00122024846282447, + "loss": 1.7128, + "step": 2763 + }, + { + "epoch": 0.29156118143459914, + "grad_norm": 0.6211428642272949, + "learning_rate": 0.0012200528557925543, + "loss": 1.644, + "step": 2764 + }, + { + "epoch": 0.2916666666666667, + "grad_norm": 0.6137315630912781, + "learning_rate": 0.0012198571960888721, + "loss": 1.6951, + "step": 2765 + }, + { + "epoch": 0.2917721518987342, + "grad_norm": 0.5171540975570679, + "learning_rate": 0.0012196614837353481, + "loss": 1.6588, + "step": 2766 + }, + { + "epoch": 0.29187763713080167, + "grad_norm": 0.5611939430236816, + "learning_rate": 0.001219465718753913, + "loss": 1.6717, + "step": 2767 + }, + { + "epoch": 0.2919831223628692, + "grad_norm": 0.5092734694480896, + "learning_rate": 0.0012192699011665034, + "loss": 1.6605, + "step": 2768 + }, + { + "epoch": 0.2920886075949367, + "grad_norm": 0.5892229080200195, + "learning_rate": 0.0012190740309950612, + "loss": 1.6861, + "step": 2769 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.5384669899940491, + "learning_rate": 0.0012188781082615346, + "loss": 1.7057, + "step": 2770 + }, + { + "epoch": 0.29229957805907175, + "grad_norm": 0.5711917281150818, + "learning_rate": 0.0012186821329878783, + "loss": 1.6727, + "step": 2771 + }, + { + "epoch": 0.29240506329113924, + "grad_norm": 0.5403137803077698, + "learning_rate": 0.0012184861051960517, + "loss": 1.688, + "step": 2772 + }, + { + "epoch": 0.29251054852320674, + "grad_norm": 0.5670219659805298, + "learning_rate": 0.001218290024908021, + "loss": 1.6812, + "step": 2773 + }, + { + "epoch": 0.2926160337552743, + "grad_norm": 0.566531777381897, + "learning_rate": 0.0012180938921457576, + "loss": 1.6751, + "step": 2774 + }, + { + "epoch": 0.2927215189873418, + "grad_norm": 0.5729284882545471, + "learning_rate": 0.00121789770693124, + "loss": 1.6742, + "step": 2775 + }, + { + "epoch": 0.29282700421940927, + "grad_norm": 0.5397321581840515, + "learning_rate": 0.001217701469286451, + "loss": 1.6849, + "step": 2776 + }, + { + "epoch": 0.2929324894514768, + "grad_norm": 0.5676897764205933, + "learning_rate": 0.00121750517923338, + "loss": 1.7005, + "step": 2777 + }, + { + "epoch": 0.2930379746835443, + "grad_norm": 0.5622857809066772, + "learning_rate": 0.0012173088367940228, + "loss": 1.6717, + "step": 2778 + }, + { + "epoch": 0.2931434599156118, + "grad_norm": 0.5567747950553894, + "learning_rate": 0.0012171124419903799, + "loss": 1.676, + "step": 2779 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.5544413924217224, + "learning_rate": 0.0012169159948444588, + "loss": 1.6574, + "step": 2780 + }, + { + "epoch": 0.29335443037974684, + "grad_norm": 0.5583353638648987, + "learning_rate": 0.001216719495378272, + "loss": 1.6768, + "step": 2781 + }, + { + "epoch": 0.29345991561181434, + "grad_norm": 0.5440578460693359, + "learning_rate": 0.0012165229436138388, + "loss": 1.6814, + "step": 2782 + }, + { + "epoch": 0.29356540084388183, + "grad_norm": 0.5137050747871399, + "learning_rate": 0.0012163263395731834, + "loss": 1.6645, + "step": 2783 + }, + { + "epoch": 0.2936708860759494, + "grad_norm": 0.512529730796814, + "learning_rate": 0.0012161296832783363, + "loss": 1.6858, + "step": 2784 + }, + { + "epoch": 0.29377637130801687, + "grad_norm": 0.6087945699691772, + "learning_rate": 0.0012159329747513338, + "loss": 1.6763, + "step": 2785 + }, + { + "epoch": 0.29388185654008436, + "grad_norm": 0.5913386344909668, + "learning_rate": 0.001215736214014218, + "loss": 1.6409, + "step": 2786 + }, + { + "epoch": 0.2939873417721519, + "grad_norm": 0.5551825165748596, + "learning_rate": 0.001215539401089037, + "loss": 1.6616, + "step": 2787 + }, + { + "epoch": 0.2940928270042194, + "grad_norm": 0.6327715516090393, + "learning_rate": 0.0012153425359978452, + "loss": 1.7014, + "step": 2788 + }, + { + "epoch": 0.2941983122362869, + "grad_norm": 0.5910356044769287, + "learning_rate": 0.0012151456187627016, + "loss": 1.7007, + "step": 2789 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.554602324962616, + "learning_rate": 0.001214948649405672, + "loss": 1.6713, + "step": 2790 + }, + { + "epoch": 0.29440928270042194, + "grad_norm": 0.6043323874473572, + "learning_rate": 0.0012147516279488275, + "loss": 1.6876, + "step": 2791 + }, + { + "epoch": 0.29451476793248943, + "grad_norm": 0.6118046045303345, + "learning_rate": 0.0012145545544142461, + "loss": 1.6344, + "step": 2792 + }, + { + "epoch": 0.294620253164557, + "grad_norm": 0.5081064105033875, + "learning_rate": 0.00121435742882401, + "loss": 1.677, + "step": 2793 + }, + { + "epoch": 0.29472573839662447, + "grad_norm": 0.683129608631134, + "learning_rate": 0.001214160251200209, + "loss": 1.6851, + "step": 2794 + }, + { + "epoch": 0.29483122362869196, + "grad_norm": 0.6186483502388, + "learning_rate": 0.0012139630215649369, + "loss": 1.6498, + "step": 2795 + }, + { + "epoch": 0.2949367088607595, + "grad_norm": 0.6049129366874695, + "learning_rate": 0.0012137657399402947, + "loss": 1.701, + "step": 2796 + }, + { + "epoch": 0.295042194092827, + "grad_norm": 0.5677469372749329, + "learning_rate": 0.0012135684063483891, + "loss": 1.6815, + "step": 2797 + }, + { + "epoch": 0.2951476793248945, + "grad_norm": 0.6231409907341003, + "learning_rate": 0.0012133710208113318, + "loss": 1.6931, + "step": 2798 + }, + { + "epoch": 0.29525316455696204, + "grad_norm": 0.7165804505348206, + "learning_rate": 0.0012131735833512411, + "loss": 1.6831, + "step": 2799 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.5595724582672119, + "learning_rate": 0.0012129760939902407, + "loss": 1.6387, + "step": 2800 + }, + { + "epoch": 0.29546413502109703, + "grad_norm": 0.7605285048484802, + "learning_rate": 0.0012127785527504603, + "loss": 1.6876, + "step": 2801 + }, + { + "epoch": 0.2955696202531646, + "grad_norm": 0.6822359561920166, + "learning_rate": 0.0012125809596540357, + "loss": 1.6547, + "step": 2802 + }, + { + "epoch": 0.29567510548523207, + "grad_norm": 0.5489587187767029, + "learning_rate": 0.0012123833147231079, + "loss": 1.6833, + "step": 2803 + }, + { + "epoch": 0.29578059071729956, + "grad_norm": 0.7326112985610962, + "learning_rate": 0.0012121856179798237, + "loss": 1.6822, + "step": 2804 + }, + { + "epoch": 0.2958860759493671, + "grad_norm": 0.7622552514076233, + "learning_rate": 0.0012119878694463366, + "loss": 1.6529, + "step": 2805 + }, + { + "epoch": 0.2959915611814346, + "grad_norm": 0.5948600769042969, + "learning_rate": 0.001211790069144805, + "loss": 1.6746, + "step": 2806 + }, + { + "epoch": 0.2960970464135021, + "grad_norm": 0.6517002582550049, + "learning_rate": 0.0012115922170973935, + "loss": 1.6796, + "step": 2807 + }, + { + "epoch": 0.29620253164556964, + "grad_norm": 0.6930995583534241, + "learning_rate": 0.0012113943133262722, + "loss": 1.6653, + "step": 2808 + }, + { + "epoch": 0.29630801687763714, + "grad_norm": 0.643606424331665, + "learning_rate": 0.0012111963578536177, + "loss": 1.693, + "step": 2809 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.5848391056060791, + "learning_rate": 0.0012109983507016114, + "loss": 1.6807, + "step": 2810 + }, + { + "epoch": 0.2965189873417722, + "grad_norm": 0.720893383026123, + "learning_rate": 0.0012108002918924411, + "loss": 1.6715, + "step": 2811 + }, + { + "epoch": 0.29662447257383967, + "grad_norm": 0.6075405478477478, + "learning_rate": 0.0012106021814483007, + "loss": 1.6606, + "step": 2812 + }, + { + "epoch": 0.29672995780590716, + "grad_norm": 0.5607403516769409, + "learning_rate": 0.0012104040193913884, + "loss": 1.6648, + "step": 2813 + }, + { + "epoch": 0.2968354430379747, + "grad_norm": 0.702920138835907, + "learning_rate": 0.0012102058057439104, + "loss": 1.7121, + "step": 2814 + }, + { + "epoch": 0.2969409282700422, + "grad_norm": 0.6377735137939453, + "learning_rate": 0.001210007540528077, + "loss": 1.6736, + "step": 2815 + }, + { + "epoch": 0.2970464135021097, + "grad_norm": 0.5723035335540771, + "learning_rate": 0.0012098092237661049, + "loss": 1.6372, + "step": 2816 + }, + { + "epoch": 0.2971518987341772, + "grad_norm": 0.642195999622345, + "learning_rate": 0.0012096108554802165, + "loss": 1.6746, + "step": 2817 + }, + { + "epoch": 0.29725738396624474, + "grad_norm": 0.6011932492256165, + "learning_rate": 0.0012094124356926397, + "loss": 1.7078, + "step": 2818 + }, + { + "epoch": 0.29736286919831223, + "grad_norm": 0.6373327374458313, + "learning_rate": 0.001209213964425609, + "loss": 1.6822, + "step": 2819 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.739249050617218, + "learning_rate": 0.0012090154417013636, + "loss": 1.7065, + "step": 2820 + }, + { + "epoch": 0.29757383966244727, + "grad_norm": 0.5431594848632812, + "learning_rate": 0.0012088168675421487, + "loss": 1.6445, + "step": 2821 + }, + { + "epoch": 0.29767932489451476, + "grad_norm": 0.7886919379234314, + "learning_rate": 0.0012086182419702165, + "loss": 1.65, + "step": 2822 + }, + { + "epoch": 0.29778481012658226, + "grad_norm": 0.6242712140083313, + "learning_rate": 0.0012084195650078232, + "loss": 1.6714, + "step": 2823 + }, + { + "epoch": 0.2978902953586498, + "grad_norm": 0.622776985168457, + "learning_rate": 0.001208220836677232, + "loss": 1.6424, + "step": 2824 + }, + { + "epoch": 0.2979957805907173, + "grad_norm": 0.8422796726226807, + "learning_rate": 0.0012080220570007108, + "loss": 1.6516, + "step": 2825 + }, + { + "epoch": 0.2981012658227848, + "grad_norm": 0.5470225214958191, + "learning_rate": 0.001207823226000534, + "loss": 1.6555, + "step": 2826 + }, + { + "epoch": 0.29820675105485234, + "grad_norm": 0.9719045162200928, + "learning_rate": 0.0012076243436989823, + "loss": 1.6263, + "step": 2827 + }, + { + "epoch": 0.29831223628691983, + "grad_norm": 1.1208152770996094, + "learning_rate": 0.0012074254101183408, + "loss": 1.6586, + "step": 2828 + }, + { + "epoch": 0.2984177215189873, + "grad_norm": 0.5857208371162415, + "learning_rate": 0.001207226425280901, + "loss": 1.6532, + "step": 2829 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 1.279778242111206, + "learning_rate": 0.0012070273892089605, + "loss": 1.6687, + "step": 2830 + }, + { + "epoch": 0.29862869198312236, + "grad_norm": 0.5570863485336304, + "learning_rate": 0.001206828301924822, + "loss": 1.6485, + "step": 2831 + }, + { + "epoch": 0.29873417721518986, + "grad_norm": 1.025732398033142, + "learning_rate": 0.0012066291634507944, + "loss": 1.6849, + "step": 2832 + }, + { + "epoch": 0.2988396624472574, + "grad_norm": 0.7379934787750244, + "learning_rate": 0.001206429973809192, + "loss": 1.6525, + "step": 2833 + }, + { + "epoch": 0.2989451476793249, + "grad_norm": 0.7496838569641113, + "learning_rate": 0.001206230733022335, + "loss": 1.7227, + "step": 2834 + }, + { + "epoch": 0.2990506329113924, + "grad_norm": 1.0382286310195923, + "learning_rate": 0.0012060314411125497, + "loss": 1.6439, + "step": 2835 + }, + { + "epoch": 0.29915611814345994, + "grad_norm": 0.5878760814666748, + "learning_rate": 0.0012058320981021672, + "loss": 1.6381, + "step": 2836 + }, + { + "epoch": 0.29926160337552743, + "grad_norm": 1.0053980350494385, + "learning_rate": 0.001205632704013525, + "loss": 1.6593, + "step": 2837 + }, + { + "epoch": 0.2993670886075949, + "grad_norm": 0.8209816813468933, + "learning_rate": 0.0012054332588689667, + "loss": 1.634, + "step": 2838 + }, + { + "epoch": 0.29947257383966247, + "grad_norm": 0.6834116578102112, + "learning_rate": 0.0012052337626908406, + "loss": 1.6538, + "step": 2839 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.906596839427948, + "learning_rate": 0.0012050342155015012, + "loss": 1.685, + "step": 2840 + }, + { + "epoch": 0.29968354430379746, + "grad_norm": 0.5323472619056702, + "learning_rate": 0.0012048346173233091, + "loss": 1.6432, + "step": 2841 + }, + { + "epoch": 0.299789029535865, + "grad_norm": 0.9386053085327148, + "learning_rate": 0.0012046349681786304, + "loss": 1.6964, + "step": 2842 + }, + { + "epoch": 0.2998945147679325, + "grad_norm": 0.7786003947257996, + "learning_rate": 0.001204435268089836, + "loss": 1.6494, + "step": 2843 + }, + { + "epoch": 0.3, + "grad_norm": 0.6051578521728516, + "learning_rate": 0.001204235517079304, + "loss": 1.683, + "step": 2844 + }, + { + "epoch": 0.30010548523206754, + "grad_norm": 0.8086494207382202, + "learning_rate": 0.0012040357151694172, + "loss": 1.6705, + "step": 2845 + }, + { + "epoch": 0.30021097046413503, + "grad_norm": 0.530680239200592, + "learning_rate": 0.0012038358623825646, + "loss": 1.6712, + "step": 2846 + }, + { + "epoch": 0.3003164556962025, + "grad_norm": 0.6984338760375977, + "learning_rate": 0.0012036359587411405, + "loss": 1.6741, + "step": 2847 + }, + { + "epoch": 0.30042194092827, + "grad_norm": 0.699232816696167, + "learning_rate": 0.0012034360042675453, + "loss": 1.6316, + "step": 2848 + }, + { + "epoch": 0.30052742616033756, + "grad_norm": 0.5419764518737793, + "learning_rate": 0.0012032359989841849, + "loss": 1.6528, + "step": 2849 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.5508131980895996, + "learning_rate": 0.0012030359429134707, + "loss": 1.6616, + "step": 2850 + }, + { + "epoch": 0.30073839662447255, + "grad_norm": 0.5431334972381592, + "learning_rate": 0.00120283583607782, + "loss": 1.6295, + "step": 2851 + }, + { + "epoch": 0.3008438818565401, + "grad_norm": 0.5222752690315247, + "learning_rate": 0.0012026356784996554, + "loss": 1.6767, + "step": 2852 + }, + { + "epoch": 0.3009493670886076, + "grad_norm": 0.5292475819587708, + "learning_rate": 0.0012024354702014066, + "loss": 1.6687, + "step": 2853 + }, + { + "epoch": 0.3010548523206751, + "grad_norm": 0.5247073769569397, + "learning_rate": 0.0012022352112055071, + "loss": 1.6543, + "step": 2854 + }, + { + "epoch": 0.30116033755274263, + "grad_norm": 0.5911626219749451, + "learning_rate": 0.001202034901534397, + "loss": 1.7134, + "step": 2855 + }, + { + "epoch": 0.3012658227848101, + "grad_norm": 0.4953978359699249, + "learning_rate": 0.0012018345412105223, + "loss": 1.6849, + "step": 2856 + }, + { + "epoch": 0.3013713080168776, + "grad_norm": 0.5756440758705139, + "learning_rate": 0.0012016341302563342, + "loss": 1.6619, + "step": 2857 + }, + { + "epoch": 0.30147679324894516, + "grad_norm": 0.5511043667793274, + "learning_rate": 0.0012014336686942898, + "loss": 1.6581, + "step": 2858 + }, + { + "epoch": 0.30158227848101266, + "grad_norm": 0.6170274019241333, + "learning_rate": 0.0012012331565468518, + "loss": 1.6429, + "step": 2859 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.5462653636932373, + "learning_rate": 0.0012010325938364883, + "loss": 1.6683, + "step": 2860 + }, + { + "epoch": 0.3017932489451477, + "grad_norm": 0.5609389543533325, + "learning_rate": 0.0012008319805856737, + "loss": 1.6686, + "step": 2861 + }, + { + "epoch": 0.3018987341772152, + "grad_norm": 0.6090097427368164, + "learning_rate": 0.0012006313168168878, + "loss": 1.6694, + "step": 2862 + }, + { + "epoch": 0.3020042194092827, + "grad_norm": 0.47506260871887207, + "learning_rate": 0.0012004306025526158, + "loss": 1.6529, + "step": 2863 + }, + { + "epoch": 0.30210970464135023, + "grad_norm": 0.5417996048927307, + "learning_rate": 0.0012002298378153485, + "loss": 1.6388, + "step": 2864 + }, + { + "epoch": 0.3022151898734177, + "grad_norm": 0.5691490173339844, + "learning_rate": 0.001200029022627583, + "loss": 1.6709, + "step": 2865 + }, + { + "epoch": 0.3023206751054852, + "grad_norm": 0.5408968329429626, + "learning_rate": 0.0011998281570118213, + "loss": 1.7184, + "step": 2866 + }, + { + "epoch": 0.30242616033755276, + "grad_norm": 0.5983133912086487, + "learning_rate": 0.0011996272409905717, + "loss": 1.6635, + "step": 2867 + }, + { + "epoch": 0.30253164556962026, + "grad_norm": 0.5614466071128845, + "learning_rate": 0.0011994262745863478, + "loss": 1.6626, + "step": 2868 + }, + { + "epoch": 0.30263713080168775, + "grad_norm": 0.6240462064743042, + "learning_rate": 0.0011992252578216683, + "loss": 1.6089, + "step": 2869 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.6281453371047974, + "learning_rate": 0.0011990241907190592, + "loss": 1.7085, + "step": 2870 + }, + { + "epoch": 0.3028481012658228, + "grad_norm": 0.5262105464935303, + "learning_rate": 0.0011988230733010502, + "loss": 1.6562, + "step": 2871 + }, + { + "epoch": 0.3029535864978903, + "grad_norm": 0.5574153661727905, + "learning_rate": 0.0011986219055901781, + "loss": 1.6622, + "step": 2872 + }, + { + "epoch": 0.30305907172995783, + "grad_norm": 0.5660473108291626, + "learning_rate": 0.0011984206876089842, + "loss": 1.6711, + "step": 2873 + }, + { + "epoch": 0.3031645569620253, + "grad_norm": 0.6189888119697571, + "learning_rate": 0.001198219419380016, + "loss": 1.613, + "step": 2874 + }, + { + "epoch": 0.3032700421940928, + "grad_norm": 0.5671956539154053, + "learning_rate": 0.0011980181009258273, + "loss": 1.6477, + "step": 2875 + }, + { + "epoch": 0.30337552742616036, + "grad_norm": 0.6249507069587708, + "learning_rate": 0.0011978167322689761, + "loss": 1.6435, + "step": 2876 + }, + { + "epoch": 0.30348101265822786, + "grad_norm": 0.631936252117157, + "learning_rate": 0.001197615313432027, + "loss": 1.6503, + "step": 2877 + }, + { + "epoch": 0.30358649789029535, + "grad_norm": 0.6989109516143799, + "learning_rate": 0.00119741384443755, + "loss": 1.6698, + "step": 2878 + }, + { + "epoch": 0.3036919831223629, + "grad_norm": 0.5896928906440735, + "learning_rate": 0.001197212325308121, + "loss": 1.6522, + "step": 2879 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.6224174499511719, + "learning_rate": 0.001197010756066321, + "loss": 1.6677, + "step": 2880 + }, + { + "epoch": 0.3039029535864979, + "grad_norm": 0.573586642742157, + "learning_rate": 0.0011968091367347367, + "loss": 1.6538, + "step": 2881 + }, + { + "epoch": 0.3040084388185654, + "grad_norm": 0.647431492805481, + "learning_rate": 0.0011966074673359602, + "loss": 1.6603, + "step": 2882 + }, + { + "epoch": 0.3041139240506329, + "grad_norm": 0.6589313745498657, + "learning_rate": 0.0011964057478925903, + "loss": 1.6263, + "step": 2883 + }, + { + "epoch": 0.3042194092827004, + "grad_norm": 0.5975614190101624, + "learning_rate": 0.0011962039784272306, + "loss": 1.6542, + "step": 2884 + }, + { + "epoch": 0.3043248945147679, + "grad_norm": 0.6549745202064514, + "learning_rate": 0.0011960021589624897, + "loss": 1.6686, + "step": 2885 + }, + { + "epoch": 0.30443037974683546, + "grad_norm": 0.6195908784866333, + "learning_rate": 0.001195800289520983, + "loss": 1.6772, + "step": 2886 + }, + { + "epoch": 0.30453586497890295, + "grad_norm": 0.594193696975708, + "learning_rate": 0.0011955983701253312, + "loss": 1.6855, + "step": 2887 + }, + { + "epoch": 0.30464135021097044, + "grad_norm": 0.6592205166816711, + "learning_rate": 0.0011953964007981601, + "loss": 1.6235, + "step": 2888 + }, + { + "epoch": 0.304746835443038, + "grad_norm": 0.6560419201850891, + "learning_rate": 0.001195194381562101, + "loss": 1.7024, + "step": 2889 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.5750100612640381, + "learning_rate": 0.0011949923124397917, + "loss": 1.6428, + "step": 2890 + }, + { + "epoch": 0.304957805907173, + "grad_norm": 0.5578452944755554, + "learning_rate": 0.0011947901934538747, + "loss": 1.675, + "step": 2891 + }, + { + "epoch": 0.3050632911392405, + "grad_norm": 0.6478498578071594, + "learning_rate": 0.0011945880246269987, + "loss": 1.6679, + "step": 2892 + }, + { + "epoch": 0.305168776371308, + "grad_norm": 0.6699252128601074, + "learning_rate": 0.0011943858059818178, + "loss": 1.649, + "step": 2893 + }, + { + "epoch": 0.3052742616033755, + "grad_norm": 0.6070749163627625, + "learning_rate": 0.0011941835375409912, + "loss": 1.6665, + "step": 2894 + }, + { + "epoch": 0.30537974683544306, + "grad_norm": 0.7429534196853638, + "learning_rate": 0.0011939812193271844, + "loss": 1.66, + "step": 2895 + }, + { + "epoch": 0.30548523206751055, + "grad_norm": 0.5806812047958374, + "learning_rate": 0.001193778851363068, + "loss": 1.6664, + "step": 2896 + }, + { + "epoch": 0.30559071729957804, + "grad_norm": 0.6899418234825134, + "learning_rate": 0.0011935764336713187, + "loss": 1.6894, + "step": 2897 + }, + { + "epoch": 0.3056962025316456, + "grad_norm": 0.7403931617736816, + "learning_rate": 0.0011933739662746178, + "loss": 1.6853, + "step": 2898 + }, + { + "epoch": 0.3058016877637131, + "grad_norm": 0.6438469290733337, + "learning_rate": 0.0011931714491956531, + "loss": 1.6695, + "step": 2899 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.8255363702774048, + "learning_rate": 0.001192968882457118, + "loss": 1.6516, + "step": 2900 + }, + { + "epoch": 0.3060126582278481, + "grad_norm": 0.785639762878418, + "learning_rate": 0.0011927662660817105, + "loss": 1.6686, + "step": 2901 + }, + { + "epoch": 0.3061181434599156, + "grad_norm": 0.6760424971580505, + "learning_rate": 0.0011925636000921355, + "loss": 1.7009, + "step": 2902 + }, + { + "epoch": 0.3062236286919831, + "grad_norm": 0.6784874796867371, + "learning_rate": 0.0011923608845111017, + "loss": 1.6699, + "step": 2903 + }, + { + "epoch": 0.30632911392405066, + "grad_norm": 0.7891411185264587, + "learning_rate": 0.0011921581193613253, + "loss": 1.6754, + "step": 2904 + }, + { + "epoch": 0.30643459915611815, + "grad_norm": 0.6485141515731812, + "learning_rate": 0.0011919553046655267, + "loss": 1.683, + "step": 2905 + }, + { + "epoch": 0.30654008438818564, + "grad_norm": 0.5860922932624817, + "learning_rate": 0.0011917524404464325, + "loss": 1.6929, + "step": 2906 + }, + { + "epoch": 0.3066455696202532, + "grad_norm": 0.7677130103111267, + "learning_rate": 0.0011915495267267745, + "loss": 1.632, + "step": 2907 + }, + { + "epoch": 0.3067510548523207, + "grad_norm": 0.6885020136833191, + "learning_rate": 0.0011913465635292903, + "loss": 1.6661, + "step": 2908 + }, + { + "epoch": 0.3068565400843882, + "grad_norm": 0.6182817816734314, + "learning_rate": 0.001191143550876723, + "loss": 1.6512, + "step": 2909 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.6871697902679443, + "learning_rate": 0.001190940488791821, + "loss": 1.6457, + "step": 2910 + }, + { + "epoch": 0.3070675105485232, + "grad_norm": 0.5894158482551575, + "learning_rate": 0.0011907373772973384, + "loss": 1.6256, + "step": 2911 + }, + { + "epoch": 0.3071729957805907, + "grad_norm": 0.8167020082473755, + "learning_rate": 0.001190534216416035, + "loss": 1.6651, + "step": 2912 + }, + { + "epoch": 0.30727848101265826, + "grad_norm": 0.5736615061759949, + "learning_rate": 0.0011903310061706762, + "loss": 1.6379, + "step": 2913 + }, + { + "epoch": 0.30738396624472575, + "grad_norm": 0.7871326208114624, + "learning_rate": 0.0011901277465840323, + "loss": 1.6864, + "step": 2914 + }, + { + "epoch": 0.30748945147679324, + "grad_norm": 0.8797299265861511, + "learning_rate": 0.0011899244376788797, + "loss": 1.6961, + "step": 2915 + }, + { + "epoch": 0.30759493670886073, + "grad_norm": 0.5208446383476257, + "learning_rate": 0.001189721079478, + "loss": 1.6712, + "step": 2916 + }, + { + "epoch": 0.3077004219409283, + "grad_norm": 0.7745703458786011, + "learning_rate": 0.001189517672004181, + "loss": 1.6258, + "step": 2917 + }, + { + "epoch": 0.3078059071729958, + "grad_norm": 0.6665640473365784, + "learning_rate": 0.0011893142152802152, + "loss": 1.6429, + "step": 2918 + }, + { + "epoch": 0.30791139240506327, + "grad_norm": 0.6507602334022522, + "learning_rate": 0.0011891107093289007, + "loss": 1.6978, + "step": 2919 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.7168036699295044, + "learning_rate": 0.0011889071541730419, + "loss": 1.6516, + "step": 2920 + }, + { + "epoch": 0.3081223628691983, + "grad_norm": 0.6887493133544922, + "learning_rate": 0.0011887035498354475, + "loss": 1.6309, + "step": 2921 + }, + { + "epoch": 0.3082278481012658, + "grad_norm": 0.5244121551513672, + "learning_rate": 0.0011884998963389334, + "loss": 1.6626, + "step": 2922 + }, + { + "epoch": 0.30833333333333335, + "grad_norm": 0.5704571008682251, + "learning_rate": 0.0011882961937063187, + "loss": 1.6711, + "step": 2923 + }, + { + "epoch": 0.30843881856540084, + "grad_norm": 0.6246335506439209, + "learning_rate": 0.0011880924419604305, + "loss": 1.6431, + "step": 2924 + }, + { + "epoch": 0.30854430379746833, + "grad_norm": 0.6127635836601257, + "learning_rate": 0.0011878886411240991, + "loss": 1.6583, + "step": 2925 + }, + { + "epoch": 0.3086497890295359, + "grad_norm": 0.5697451829910278, + "learning_rate": 0.0011876847912201624, + "loss": 1.6587, + "step": 2926 + }, + { + "epoch": 0.3087552742616034, + "grad_norm": 0.5506711602210999, + "learning_rate": 0.0011874808922714623, + "loss": 1.6743, + "step": 2927 + }, + { + "epoch": 0.30886075949367087, + "grad_norm": 0.6231689453125, + "learning_rate": 0.0011872769443008466, + "loss": 1.6975, + "step": 2928 + }, + { + "epoch": 0.3089662447257384, + "grad_norm": 0.6228083968162537, + "learning_rate": 0.001187072947331169, + "loss": 1.6251, + "step": 2929 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.5499572157859802, + "learning_rate": 0.001186868901385288, + "loss": 1.6476, + "step": 2930 + }, + { + "epoch": 0.3091772151898734, + "grad_norm": 0.5865378975868225, + "learning_rate": 0.0011866648064860683, + "loss": 1.6888, + "step": 2931 + }, + { + "epoch": 0.30928270042194095, + "grad_norm": 0.62659752368927, + "learning_rate": 0.0011864606626563795, + "loss": 1.7095, + "step": 2932 + }, + { + "epoch": 0.30938818565400844, + "grad_norm": 0.6300550103187561, + "learning_rate": 0.0011862564699190972, + "loss": 1.6465, + "step": 2933 + }, + { + "epoch": 0.30949367088607593, + "grad_norm": 0.6016635894775391, + "learning_rate": 0.0011860522282971019, + "loss": 1.6837, + "step": 2934 + }, + { + "epoch": 0.3095991561181435, + "grad_norm": 0.6375923752784729, + "learning_rate": 0.0011858479378132802, + "loss": 1.6671, + "step": 2935 + }, + { + "epoch": 0.309704641350211, + "grad_norm": 0.7172718048095703, + "learning_rate": 0.0011856435984905237, + "loss": 1.7024, + "step": 2936 + }, + { + "epoch": 0.30981012658227847, + "grad_norm": 0.6246292591094971, + "learning_rate": 0.00118543921035173, + "loss": 1.6237, + "step": 2937 + }, + { + "epoch": 0.309915611814346, + "grad_norm": 0.5959566831588745, + "learning_rate": 0.001185234773419801, + "loss": 1.6492, + "step": 2938 + }, + { + "epoch": 0.3100210970464135, + "grad_norm": 0.6537473797798157, + "learning_rate": 0.0011850302877176456, + "loss": 1.6049, + "step": 2939 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.6144710183143616, + "learning_rate": 0.001184825753268177, + "loss": 1.6437, + "step": 2940 + }, + { + "epoch": 0.31023206751054855, + "grad_norm": 0.5588403940200806, + "learning_rate": 0.0011846211700943148, + "loss": 1.6788, + "step": 2941 + }, + { + "epoch": 0.31033755274261604, + "grad_norm": 0.6462646126747131, + "learning_rate": 0.001184416538218983, + "loss": 1.6538, + "step": 2942 + }, + { + "epoch": 0.31044303797468353, + "grad_norm": 0.5585069060325623, + "learning_rate": 0.0011842118576651122, + "loss": 1.685, + "step": 2943 + }, + { + "epoch": 0.3105485232067511, + "grad_norm": 0.59987872838974, + "learning_rate": 0.0011840071284556373, + "loss": 1.7047, + "step": 2944 + }, + { + "epoch": 0.3106540084388186, + "grad_norm": 0.6724440455436707, + "learning_rate": 0.0011838023506134997, + "loss": 1.6823, + "step": 2945 + }, + { + "epoch": 0.31075949367088607, + "grad_norm": 0.5992052555084229, + "learning_rate": 0.0011835975241616455, + "loss": 1.6415, + "step": 2946 + }, + { + "epoch": 0.31086497890295356, + "grad_norm": 0.5929242372512817, + "learning_rate": 0.0011833926491230265, + "loss": 1.6742, + "step": 2947 + }, + { + "epoch": 0.3109704641350211, + "grad_norm": 0.6151250004768372, + "learning_rate": 0.0011831877255206002, + "loss": 1.6858, + "step": 2948 + }, + { + "epoch": 0.3110759493670886, + "grad_norm": 0.5525078177452087, + "learning_rate": 0.0011829827533773292, + "loss": 1.6779, + "step": 2949 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.7411080002784729, + "learning_rate": 0.0011827777327161814, + "loss": 1.6391, + "step": 2950 + }, + { + "epoch": 0.31128691983122364, + "grad_norm": 0.528934895992279, + "learning_rate": 0.001182572663560131, + "loss": 1.6325, + "step": 2951 + }, + { + "epoch": 0.31139240506329113, + "grad_norm": 0.8342395424842834, + "learning_rate": 0.0011823675459321564, + "loss": 1.6602, + "step": 2952 + }, + { + "epoch": 0.3114978902953586, + "grad_norm": 0.9396069049835205, + "learning_rate": 0.0011821623798552424, + "loss": 1.676, + "step": 2953 + }, + { + "epoch": 0.3116033755274262, + "grad_norm": 0.5606972575187683, + "learning_rate": 0.001181957165352379, + "loss": 1.6284, + "step": 2954 + }, + { + "epoch": 0.31170886075949367, + "grad_norm": 0.8612900376319885, + "learning_rate": 0.0011817519024465608, + "loss": 1.6395, + "step": 2955 + }, + { + "epoch": 0.31181434599156116, + "grad_norm": 0.6746002435684204, + "learning_rate": 0.0011815465911607893, + "loss": 1.6671, + "step": 2956 + }, + { + "epoch": 0.3119198312236287, + "grad_norm": 0.8667129278182983, + "learning_rate": 0.0011813412315180704, + "loss": 1.6775, + "step": 2957 + }, + { + "epoch": 0.3120253164556962, + "grad_norm": 0.7669298648834229, + "learning_rate": 0.0011811358235414154, + "loss": 1.6307, + "step": 2958 + }, + { + "epoch": 0.3121308016877637, + "grad_norm": 0.6504887938499451, + "learning_rate": 0.0011809303672538417, + "loss": 1.6587, + "step": 2959 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.6936582326889038, + "learning_rate": 0.0011807248626783714, + "loss": 1.6595, + "step": 2960 + }, + { + "epoch": 0.31234177215189873, + "grad_norm": 0.6256689429283142, + "learning_rate": 0.0011805193098380327, + "loss": 1.6283, + "step": 2961 + }, + { + "epoch": 0.3124472573839662, + "grad_norm": 0.7270101308822632, + "learning_rate": 0.0011803137087558584, + "loss": 1.652, + "step": 2962 + }, + { + "epoch": 0.3125527426160338, + "grad_norm": 0.6325646638870239, + "learning_rate": 0.0011801080594548874, + "loss": 1.6591, + "step": 2963 + }, + { + "epoch": 0.31265822784810127, + "grad_norm": 0.6134359836578369, + "learning_rate": 0.0011799023619581638, + "loss": 1.6166, + "step": 2964 + }, + { + "epoch": 0.31276371308016876, + "grad_norm": 0.7792713046073914, + "learning_rate": 0.0011796966162887364, + "loss": 1.6384, + "step": 2965 + }, + { + "epoch": 0.3128691983122363, + "grad_norm": 0.678541362285614, + "learning_rate": 0.0011794908224696608, + "loss": 1.6528, + "step": 2966 + }, + { + "epoch": 0.3129746835443038, + "grad_norm": 0.678030788898468, + "learning_rate": 0.0011792849805239967, + "loss": 1.6794, + "step": 2967 + }, + { + "epoch": 0.3130801687763713, + "grad_norm": 0.8605043888092041, + "learning_rate": 0.0011790790904748103, + "loss": 1.6872, + "step": 2968 + }, + { + "epoch": 0.31318565400843884, + "grad_norm": 0.6061658263206482, + "learning_rate": 0.0011788731523451718, + "loss": 1.6358, + "step": 2969 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.7448184490203857, + "learning_rate": 0.0011786671661581584, + "loss": 1.6503, + "step": 2970 + }, + { + "epoch": 0.3133966244725738, + "grad_norm": 0.7953397631645203, + "learning_rate": 0.0011784611319368512, + "loss": 1.6139, + "step": 2971 + }, + { + "epoch": 0.3135021097046414, + "grad_norm": 0.5414730906486511, + "learning_rate": 0.0011782550497043379, + "loss": 1.6298, + "step": 2972 + }, + { + "epoch": 0.31360759493670887, + "grad_norm": 0.7804540395736694, + "learning_rate": 0.0011780489194837106, + "loss": 1.6565, + "step": 2973 + }, + { + "epoch": 0.31371308016877636, + "grad_norm": 0.638394296169281, + "learning_rate": 0.0011778427412980675, + "loss": 1.6678, + "step": 2974 + }, + { + "epoch": 0.3138185654008439, + "grad_norm": 0.6554713845252991, + "learning_rate": 0.0011776365151705119, + "loss": 1.6254, + "step": 2975 + }, + { + "epoch": 0.3139240506329114, + "grad_norm": 0.5818237662315369, + "learning_rate": 0.0011774302411241525, + "loss": 1.6512, + "step": 2976 + }, + { + "epoch": 0.3140295358649789, + "grad_norm": 0.5909695029258728, + "learning_rate": 0.0011772239191821029, + "loss": 1.6673, + "step": 2977 + }, + { + "epoch": 0.31413502109704644, + "grad_norm": 0.6390104293823242, + "learning_rate": 0.0011770175493674827, + "loss": 1.6532, + "step": 2978 + }, + { + "epoch": 0.31424050632911393, + "grad_norm": 0.6097636222839355, + "learning_rate": 0.0011768111317034173, + "loss": 1.6665, + "step": 2979 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.6529159545898438, + "learning_rate": 0.001176604666213036, + "loss": 1.6815, + "step": 2980 + }, + { + "epoch": 0.3144514767932489, + "grad_norm": 0.5344160199165344, + "learning_rate": 0.0011763981529194748, + "loss": 1.6913, + "step": 2981 + }, + { + "epoch": 0.31455696202531647, + "grad_norm": 0.739173173904419, + "learning_rate": 0.001176191591845874, + "loss": 1.6323, + "step": 2982 + }, + { + "epoch": 0.31466244725738396, + "grad_norm": 0.8687513470649719, + "learning_rate": 0.0011759849830153806, + "loss": 1.6577, + "step": 2983 + }, + { + "epoch": 0.31476793248945145, + "grad_norm": 0.7127306461334229, + "learning_rate": 0.0011757783264511456, + "loss": 1.6581, + "step": 2984 + }, + { + "epoch": 0.314873417721519, + "grad_norm": 0.6181420087814331, + "learning_rate": 0.001175571622176326, + "loss": 1.6467, + "step": 2985 + }, + { + "epoch": 0.3149789029535865, + "grad_norm": 0.8255112171173096, + "learning_rate": 0.0011753648702140837, + "loss": 1.6864, + "step": 2986 + }, + { + "epoch": 0.315084388185654, + "grad_norm": 0.5532165765762329, + "learning_rate": 0.001175158070587587, + "loss": 1.6891, + "step": 2987 + }, + { + "epoch": 0.31518987341772153, + "grad_norm": 0.7488635778427124, + "learning_rate": 0.0011749512233200081, + "loss": 1.6815, + "step": 2988 + }, + { + "epoch": 0.315295358649789, + "grad_norm": 0.6110788583755493, + "learning_rate": 0.001174744328434526, + "loss": 1.6983, + "step": 2989 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.5918418169021606, + "learning_rate": 0.0011745373859543236, + "loss": 1.6781, + "step": 2990 + }, + { + "epoch": 0.31550632911392407, + "grad_norm": 0.613122284412384, + "learning_rate": 0.0011743303959025906, + "loss": 1.6276, + "step": 2991 + }, + { + "epoch": 0.31561181434599156, + "grad_norm": 0.5931441783905029, + "learning_rate": 0.0011741233583025205, + "loss": 1.6283, + "step": 2992 + }, + { + "epoch": 0.31571729957805905, + "grad_norm": 0.6148685216903687, + "learning_rate": 0.0011739162731773133, + "loss": 1.603, + "step": 2993 + }, + { + "epoch": 0.3158227848101266, + "grad_norm": 0.6137456893920898, + "learning_rate": 0.0011737091405501741, + "loss": 1.6811, + "step": 2994 + }, + { + "epoch": 0.3159282700421941, + "grad_norm": 0.5316863656044006, + "learning_rate": 0.0011735019604443126, + "loss": 1.6894, + "step": 2995 + }, + { + "epoch": 0.3160337552742616, + "grad_norm": 0.5976124405860901, + "learning_rate": 0.0011732947328829447, + "loss": 1.6347, + "step": 2996 + }, + { + "epoch": 0.31613924050632913, + "grad_norm": 0.5192298293113708, + "learning_rate": 0.0011730874578892913, + "loss": 1.6395, + "step": 2997 + }, + { + "epoch": 0.3162447257383966, + "grad_norm": 0.6057814955711365, + "learning_rate": 0.0011728801354865786, + "loss": 1.6425, + "step": 2998 + }, + { + "epoch": 0.3163502109704641, + "grad_norm": 0.5736595392227173, + "learning_rate": 0.0011726727656980378, + "loss": 1.6664, + "step": 2999 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.5513321161270142, + "learning_rate": 0.0011724653485469063, + "loss": 1.6513, + "step": 3000 + }, + { + "epoch": 0.31656118143459916, + "grad_norm": 0.5348178148269653, + "learning_rate": 0.0011722578840564256, + "loss": 1.6453, + "step": 3001 + }, + { + "epoch": 0.31666666666666665, + "grad_norm": 0.5495903491973877, + "learning_rate": 0.0011720503722498436, + "loss": 1.6293, + "step": 3002 + }, + { + "epoch": 0.3167721518987342, + "grad_norm": 0.6012746691703796, + "learning_rate": 0.0011718428131504127, + "loss": 1.6254, + "step": 3003 + }, + { + "epoch": 0.3168776371308017, + "grad_norm": 0.5354148149490356, + "learning_rate": 0.0011716352067813914, + "loss": 1.6946, + "step": 3004 + }, + { + "epoch": 0.3169831223628692, + "grad_norm": 0.6000759601593018, + "learning_rate": 0.0011714275531660423, + "loss": 1.6662, + "step": 3005 + }, + { + "epoch": 0.31708860759493673, + "grad_norm": 0.5314939618110657, + "learning_rate": 0.0011712198523276347, + "loss": 1.6695, + "step": 3006 + }, + { + "epoch": 0.3171940928270042, + "grad_norm": 0.5132604837417603, + "learning_rate": 0.0011710121042894425, + "loss": 1.6592, + "step": 3007 + }, + { + "epoch": 0.3172995780590717, + "grad_norm": 0.527137279510498, + "learning_rate": 0.0011708043090747442, + "loss": 1.6446, + "step": 3008 + }, + { + "epoch": 0.31740506329113927, + "grad_norm": 0.535784125328064, + "learning_rate": 0.001170596466706825, + "loss": 1.6471, + "step": 3009 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.5565757751464844, + "learning_rate": 0.0011703885772089743, + "loss": 1.6539, + "step": 3010 + }, + { + "epoch": 0.31761603375527425, + "grad_norm": 0.5502114295959473, + "learning_rate": 0.0011701806406044875, + "loss": 1.6409, + "step": 3011 + }, + { + "epoch": 0.31772151898734174, + "grad_norm": 0.6889218091964722, + "learning_rate": 0.0011699726569166643, + "loss": 1.6505, + "step": 3012 + }, + { + "epoch": 0.3178270042194093, + "grad_norm": 0.540901780128479, + "learning_rate": 0.0011697646261688108, + "loss": 1.6232, + "step": 3013 + }, + { + "epoch": 0.3179324894514768, + "grad_norm": 0.6252418160438538, + "learning_rate": 0.0011695565483842382, + "loss": 1.6561, + "step": 3014 + }, + { + "epoch": 0.3180379746835443, + "grad_norm": 0.5898944735527039, + "learning_rate": 0.001169348423586262, + "loss": 1.6802, + "step": 3015 + }, + { + "epoch": 0.3181434599156118, + "grad_norm": 0.5760618448257446, + "learning_rate": 0.0011691402517982038, + "loss": 1.655, + "step": 3016 + }, + { + "epoch": 0.3182489451476793, + "grad_norm": 0.5765841007232666, + "learning_rate": 0.0011689320330433904, + "loss": 1.6831, + "step": 3017 + }, + { + "epoch": 0.3183544303797468, + "grad_norm": 0.6072147488594055, + "learning_rate": 0.0011687237673451538, + "loss": 1.6764, + "step": 3018 + }, + { + "epoch": 0.31845991561181436, + "grad_norm": 0.6743874549865723, + "learning_rate": 0.0011685154547268312, + "loss": 1.6458, + "step": 3019 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.7091125845909119, + "learning_rate": 0.0011683070952117646, + "loss": 1.6418, + "step": 3020 + }, + { + "epoch": 0.31867088607594934, + "grad_norm": 0.6691536903381348, + "learning_rate": 0.0011680986888233024, + "loss": 1.6215, + "step": 3021 + }, + { + "epoch": 0.3187763713080169, + "grad_norm": 0.5588595271110535, + "learning_rate": 0.0011678902355847973, + "loss": 1.645, + "step": 3022 + }, + { + "epoch": 0.3188818565400844, + "grad_norm": 0.5793388485908508, + "learning_rate": 0.0011676817355196075, + "loss": 1.6108, + "step": 3023 + }, + { + "epoch": 0.3189873417721519, + "grad_norm": 0.6755077242851257, + "learning_rate": 0.0011674731886510967, + "loss": 1.627, + "step": 3024 + }, + { + "epoch": 0.3190928270042194, + "grad_norm": 0.648127019405365, + "learning_rate": 0.0011672645950026332, + "loss": 1.6048, + "step": 3025 + }, + { + "epoch": 0.3191983122362869, + "grad_norm": 0.7114402055740356, + "learning_rate": 0.001167055954597591, + "loss": 1.6643, + "step": 3026 + }, + { + "epoch": 0.3193037974683544, + "grad_norm": 0.6269471645355225, + "learning_rate": 0.0011668472674593497, + "loss": 1.6506, + "step": 3027 + }, + { + "epoch": 0.31940928270042196, + "grad_norm": 0.5732945203781128, + "learning_rate": 0.0011666385336112934, + "loss": 1.6718, + "step": 3028 + }, + { + "epoch": 0.31951476793248945, + "grad_norm": 0.5789778828620911, + "learning_rate": 0.0011664297530768117, + "loss": 1.6282, + "step": 3029 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6061925292015076, + "learning_rate": 0.0011662209258792998, + "loss": 1.6882, + "step": 3030 + }, + { + "epoch": 0.3197257383966245, + "grad_norm": 0.6713361144065857, + "learning_rate": 0.0011660120520421578, + "loss": 1.658, + "step": 3031 + }, + { + "epoch": 0.319831223628692, + "grad_norm": 0.5865737795829773, + "learning_rate": 0.0011658031315887908, + "loss": 1.6471, + "step": 3032 + }, + { + "epoch": 0.3199367088607595, + "grad_norm": 0.5926680564880371, + "learning_rate": 0.0011655941645426096, + "loss": 1.6244, + "step": 3033 + }, + { + "epoch": 0.320042194092827, + "grad_norm": 0.563005268573761, + "learning_rate": 0.00116538515092703, + "loss": 1.6871, + "step": 3034 + }, + { + "epoch": 0.3201476793248945, + "grad_norm": 0.5632096529006958, + "learning_rate": 0.0011651760907654728, + "loss": 1.64, + "step": 3035 + }, + { + "epoch": 0.320253164556962, + "grad_norm": 0.5447443723678589, + "learning_rate": 0.0011649669840813645, + "loss": 1.7053, + "step": 3036 + }, + { + "epoch": 0.32035864978902956, + "grad_norm": 0.5474767684936523, + "learning_rate": 0.0011647578308981363, + "loss": 1.6772, + "step": 3037 + }, + { + "epoch": 0.32046413502109705, + "grad_norm": 0.5920907258987427, + "learning_rate": 0.001164548631239225, + "loss": 1.6092, + "step": 3038 + }, + { + "epoch": 0.32056962025316454, + "grad_norm": 0.602484405040741, + "learning_rate": 0.0011643393851280724, + "loss": 1.6324, + "step": 3039 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.5654219388961792, + "learning_rate": 0.0011641300925881257, + "loss": 1.6265, + "step": 3040 + }, + { + "epoch": 0.3207805907172996, + "grad_norm": 0.5814023613929749, + "learning_rate": 0.001163920753642837, + "loss": 1.6422, + "step": 3041 + }, + { + "epoch": 0.3208860759493671, + "grad_norm": 0.5883383750915527, + "learning_rate": 0.001163711368315664, + "loss": 1.6485, + "step": 3042 + }, + { + "epoch": 0.3209915611814346, + "grad_norm": 0.6300072073936462, + "learning_rate": 0.001163501936630069, + "loss": 1.6702, + "step": 3043 + }, + { + "epoch": 0.3210970464135021, + "grad_norm": 0.5257024168968201, + "learning_rate": 0.0011632924586095204, + "loss": 1.6573, + "step": 3044 + }, + { + "epoch": 0.3212025316455696, + "grad_norm": 0.5577696561813354, + "learning_rate": 0.0011630829342774906, + "loss": 1.6468, + "step": 3045 + }, + { + "epoch": 0.3213080168776371, + "grad_norm": 0.546821117401123, + "learning_rate": 0.0011628733636574586, + "loss": 1.6975, + "step": 3046 + }, + { + "epoch": 0.32141350210970465, + "grad_norm": 0.5611344575881958, + "learning_rate": 0.0011626637467729072, + "loss": 1.605, + "step": 3047 + }, + { + "epoch": 0.32151898734177214, + "grad_norm": 0.5945067405700684, + "learning_rate": 0.0011624540836473252, + "loss": 1.6418, + "step": 3048 + }, + { + "epoch": 0.32162447257383964, + "grad_norm": 0.6269094944000244, + "learning_rate": 0.0011622443743042065, + "loss": 1.6222, + "step": 3049 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.5494526028633118, + "learning_rate": 0.0011620346187670501, + "loss": 1.6406, + "step": 3050 + }, + { + "epoch": 0.3218354430379747, + "grad_norm": 0.6588205099105835, + "learning_rate": 0.0011618248170593597, + "loss": 1.643, + "step": 3051 + }, + { + "epoch": 0.32194092827004217, + "grad_norm": 0.5874964594841003, + "learning_rate": 0.0011616149692046454, + "loss": 1.6306, + "step": 3052 + }, + { + "epoch": 0.3220464135021097, + "grad_norm": 0.5758084058761597, + "learning_rate": 0.0011614050752264216, + "loss": 1.6704, + "step": 3053 + }, + { + "epoch": 0.3221518987341772, + "grad_norm": 0.5934622287750244, + "learning_rate": 0.0011611951351482071, + "loss": 1.6517, + "step": 3054 + }, + { + "epoch": 0.3222573839662447, + "grad_norm": 0.5572699904441833, + "learning_rate": 0.0011609851489935274, + "loss": 1.6825, + "step": 3055 + }, + { + "epoch": 0.32236286919831225, + "grad_norm": 0.5487471222877502, + "learning_rate": 0.0011607751167859125, + "loss": 1.669, + "step": 3056 + }, + { + "epoch": 0.32246835443037974, + "grad_norm": 0.5812325477600098, + "learning_rate": 0.0011605650385488977, + "loss": 1.6549, + "step": 3057 + }, + { + "epoch": 0.32257383966244724, + "grad_norm": 0.557290256023407, + "learning_rate": 0.0011603549143060225, + "loss": 1.6541, + "step": 3058 + }, + { + "epoch": 0.3226793248945148, + "grad_norm": 0.5611379742622375, + "learning_rate": 0.0011601447440808335, + "loss": 1.6647, + "step": 3059 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5654525756835938, + "learning_rate": 0.0011599345278968806, + "loss": 1.6669, + "step": 3060 + }, + { + "epoch": 0.32289029535864977, + "grad_norm": 0.5401483178138733, + "learning_rate": 0.0011597242657777195, + "loss": 1.6351, + "step": 3061 + }, + { + "epoch": 0.3229957805907173, + "grad_norm": 0.5996794104576111, + "learning_rate": 0.0011595139577469115, + "loss": 1.6284, + "step": 3062 + }, + { + "epoch": 0.3231012658227848, + "grad_norm": 0.6482956409454346, + "learning_rate": 0.0011593036038280225, + "loss": 1.6555, + "step": 3063 + }, + { + "epoch": 0.3232067510548523, + "grad_norm": 0.5258536338806152, + "learning_rate": 0.0011590932040446236, + "loss": 1.6524, + "step": 3064 + }, + { + "epoch": 0.32331223628691985, + "grad_norm": 0.5579742193222046, + "learning_rate": 0.0011588827584202914, + "loss": 1.6316, + "step": 3065 + }, + { + "epoch": 0.32341772151898734, + "grad_norm": 0.5701795816421509, + "learning_rate": 0.0011586722669786073, + "loss": 1.6495, + "step": 3066 + }, + { + "epoch": 0.32352320675105484, + "grad_norm": 0.5798983573913574, + "learning_rate": 0.0011584617297431578, + "loss": 1.673, + "step": 3067 + }, + { + "epoch": 0.3236286919831224, + "grad_norm": 0.5677968859672546, + "learning_rate": 0.0011582511467375346, + "loss": 1.6427, + "step": 3068 + }, + { + "epoch": 0.3237341772151899, + "grad_norm": 0.5324159860610962, + "learning_rate": 0.001158040517985335, + "loss": 1.6573, + "step": 3069 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5397741198539734, + "learning_rate": 0.0011578298435101604, + "loss": 1.6704, + "step": 3070 + }, + { + "epoch": 0.3239451476793249, + "grad_norm": 0.5679566264152527, + "learning_rate": 0.0011576191233356181, + "loss": 1.6832, + "step": 3071 + }, + { + "epoch": 0.3240506329113924, + "grad_norm": 0.5742844939231873, + "learning_rate": 0.0011574083574853208, + "loss": 1.5999, + "step": 3072 + }, + { + "epoch": 0.3241561181434599, + "grad_norm": 0.5350568294525146, + "learning_rate": 0.0011571975459828852, + "loss": 1.6409, + "step": 3073 + }, + { + "epoch": 0.32426160337552745, + "grad_norm": 0.5908579230308533, + "learning_rate": 0.0011569866888519343, + "loss": 1.6614, + "step": 3074 + }, + { + "epoch": 0.32436708860759494, + "grad_norm": 0.5843973159790039, + "learning_rate": 0.0011567757861160955, + "loss": 1.6178, + "step": 3075 + }, + { + "epoch": 0.32447257383966244, + "grad_norm": 0.5877038836479187, + "learning_rate": 0.0011565648377990017, + "loss": 1.6115, + "step": 3076 + }, + { + "epoch": 0.32457805907173, + "grad_norm": 0.5555029511451721, + "learning_rate": 0.0011563538439242902, + "loss": 1.6166, + "step": 3077 + }, + { + "epoch": 0.3246835443037975, + "grad_norm": 0.5911861658096313, + "learning_rate": 0.0011561428045156043, + "loss": 1.6331, + "step": 3078 + }, + { + "epoch": 0.32478902953586497, + "grad_norm": 0.6454437375068665, + "learning_rate": 0.001155931719596592, + "loss": 1.6437, + "step": 3079 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.6699902415275574, + "learning_rate": 0.0011557205891909062, + "loss": 1.6926, + "step": 3080 + }, + { + "epoch": 0.325, + "grad_norm": 0.585996150970459, + "learning_rate": 0.0011555094133222053, + "loss": 1.5952, + "step": 3081 + }, + { + "epoch": 0.3251054852320675, + "grad_norm": 0.5630878210067749, + "learning_rate": 0.0011552981920141528, + "loss": 1.6528, + "step": 3082 + }, + { + "epoch": 0.325210970464135, + "grad_norm": 0.5964344143867493, + "learning_rate": 0.0011550869252904166, + "loss": 1.6484, + "step": 3083 + }, + { + "epoch": 0.32531645569620254, + "grad_norm": 0.5582366585731506, + "learning_rate": 0.0011548756131746706, + "loss": 1.6507, + "step": 3084 + }, + { + "epoch": 0.32542194092827004, + "grad_norm": 0.6020816564559937, + "learning_rate": 0.0011546642556905934, + "loss": 1.6649, + "step": 3085 + }, + { + "epoch": 0.32552742616033753, + "grad_norm": 0.750615119934082, + "learning_rate": 0.0011544528528618682, + "loss": 1.6504, + "step": 3086 + }, + { + "epoch": 0.3256329113924051, + "grad_norm": 0.6311531066894531, + "learning_rate": 0.0011542414047121842, + "loss": 1.6928, + "step": 3087 + }, + { + "epoch": 0.32573839662447257, + "grad_norm": 0.588301420211792, + "learning_rate": 0.0011540299112652351, + "loss": 1.6298, + "step": 3088 + }, + { + "epoch": 0.32584388185654006, + "grad_norm": 0.6082940101623535, + "learning_rate": 0.00115381837254472, + "loss": 1.6466, + "step": 3089 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.6450660824775696, + "learning_rate": 0.0011536067885743423, + "loss": 1.6727, + "step": 3090 + }, + { + "epoch": 0.3260548523206751, + "grad_norm": 0.5562896728515625, + "learning_rate": 0.0011533951593778115, + "loss": 1.6693, + "step": 3091 + }, + { + "epoch": 0.3261603375527426, + "grad_norm": 0.5649775862693787, + "learning_rate": 0.0011531834849788417, + "loss": 1.6335, + "step": 3092 + }, + { + "epoch": 0.32626582278481014, + "grad_norm": 0.5686457753181458, + "learning_rate": 0.0011529717654011518, + "loss": 1.6156, + "step": 3093 + }, + { + "epoch": 0.32637130801687764, + "grad_norm": 0.5206253528594971, + "learning_rate": 0.001152760000668466, + "loss": 1.6358, + "step": 3094 + }, + { + "epoch": 0.32647679324894513, + "grad_norm": 0.6008743643760681, + "learning_rate": 0.001152548190804514, + "loss": 1.6303, + "step": 3095 + }, + { + "epoch": 0.3265822784810127, + "grad_norm": 0.6452239155769348, + "learning_rate": 0.0011523363358330301, + "loss": 1.6406, + "step": 3096 + }, + { + "epoch": 0.32668776371308017, + "grad_norm": 0.5894255638122559, + "learning_rate": 0.0011521244357777533, + "loss": 1.6273, + "step": 3097 + }, + { + "epoch": 0.32679324894514766, + "grad_norm": 0.6880407333374023, + "learning_rate": 0.0011519124906624284, + "loss": 1.6702, + "step": 3098 + }, + { + "epoch": 0.3268987341772152, + "grad_norm": 1.0401500463485718, + "learning_rate": 0.0011517005005108048, + "loss": 1.6306, + "step": 3099 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.6883208155632019, + "learning_rate": 0.001151488465346637, + "loss": 1.6642, + "step": 3100 + }, + { + "epoch": 0.3271097046413502, + "grad_norm": 0.652014970779419, + "learning_rate": 0.0011512763851936848, + "loss": 1.6508, + "step": 3101 + }, + { + "epoch": 0.32721518987341774, + "grad_norm": 0.7263635396957397, + "learning_rate": 0.0011510642600757123, + "loss": 1.6704, + "step": 3102 + }, + { + "epoch": 0.32732067510548524, + "grad_norm": 0.5759977698326111, + "learning_rate": 0.00115085209001649, + "loss": 1.6644, + "step": 3103 + }, + { + "epoch": 0.32742616033755273, + "grad_norm": 0.6622141599655151, + "learning_rate": 0.0011506398750397919, + "loss": 1.6265, + "step": 3104 + }, + { + "epoch": 0.3275316455696203, + "grad_norm": 0.7145103812217712, + "learning_rate": 0.0011504276151693984, + "loss": 1.6355, + "step": 3105 + }, + { + "epoch": 0.32763713080168777, + "grad_norm": 0.5901355743408203, + "learning_rate": 0.0011502153104290937, + "loss": 1.6679, + "step": 3106 + }, + { + "epoch": 0.32774261603375526, + "grad_norm": 0.7042275071144104, + "learning_rate": 0.0011500029608426676, + "loss": 1.6268, + "step": 3107 + }, + { + "epoch": 0.3278481012658228, + "grad_norm": 0.7201913595199585, + "learning_rate": 0.0011497905664339153, + "loss": 1.6263, + "step": 3108 + }, + { + "epoch": 0.3279535864978903, + "grad_norm": 0.6144424676895142, + "learning_rate": 0.0011495781272266366, + "loss": 1.6593, + "step": 3109 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.7135056257247925, + "learning_rate": 0.0011493656432446362, + "loss": 1.651, + "step": 3110 + }, + { + "epoch": 0.3281645569620253, + "grad_norm": 0.6139999032020569, + "learning_rate": 0.0011491531145117243, + "loss": 1.6636, + "step": 3111 + }, + { + "epoch": 0.32827004219409284, + "grad_norm": 0.5936509966850281, + "learning_rate": 0.0011489405410517151, + "loss": 1.6397, + "step": 3112 + }, + { + "epoch": 0.32837552742616033, + "grad_norm": 0.7126063704490662, + "learning_rate": 0.0011487279228884293, + "loss": 1.6402, + "step": 3113 + }, + { + "epoch": 0.3284810126582278, + "grad_norm": 0.705272912979126, + "learning_rate": 0.0011485152600456913, + "loss": 1.6682, + "step": 3114 + }, + { + "epoch": 0.32858649789029537, + "grad_norm": 0.5824154019355774, + "learning_rate": 0.0011483025525473314, + "loss": 1.6359, + "step": 3115 + }, + { + "epoch": 0.32869198312236286, + "grad_norm": 0.6816695928573608, + "learning_rate": 0.001148089800417184, + "loss": 1.6138, + "step": 3116 + }, + { + "epoch": 0.32879746835443036, + "grad_norm": 0.6636863946914673, + "learning_rate": 0.00114787700367909, + "loss": 1.6497, + "step": 3117 + }, + { + "epoch": 0.3289029535864979, + "grad_norm": 0.6653619408607483, + "learning_rate": 0.0011476641623568934, + "loss": 1.6162, + "step": 3118 + }, + { + "epoch": 0.3290084388185654, + "grad_norm": 0.8609537482261658, + "learning_rate": 0.0011474512764744445, + "loss": 1.6529, + "step": 3119 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.7032024264335632, + "learning_rate": 0.0011472383460555983, + "loss": 1.6937, + "step": 3120 + }, + { + "epoch": 0.32921940928270044, + "grad_norm": 0.6083239912986755, + "learning_rate": 0.0011470253711242146, + "loss": 1.6376, + "step": 3121 + }, + { + "epoch": 0.32932489451476793, + "grad_norm": 0.6960940957069397, + "learning_rate": 0.001146812351704158, + "loss": 1.6368, + "step": 3122 + }, + { + "epoch": 0.3294303797468354, + "grad_norm": 0.5480913519859314, + "learning_rate": 0.001146599287819299, + "loss": 1.6992, + "step": 3123 + }, + { + "epoch": 0.32953586497890297, + "grad_norm": 0.7668262720108032, + "learning_rate": 0.0011463861794935122, + "loss": 1.6194, + "step": 3124 + }, + { + "epoch": 0.32964135021097046, + "grad_norm": 0.601893961429596, + "learning_rate": 0.0011461730267506775, + "loss": 1.6748, + "step": 3125 + }, + { + "epoch": 0.32974683544303796, + "grad_norm": 0.617725670337677, + "learning_rate": 0.0011459598296146795, + "loss": 1.666, + "step": 3126 + }, + { + "epoch": 0.3298523206751055, + "grad_norm": 0.5904416441917419, + "learning_rate": 0.001145746588109408, + "loss": 1.6616, + "step": 3127 + }, + { + "epoch": 0.329957805907173, + "grad_norm": 0.6761355996131897, + "learning_rate": 0.0011455333022587582, + "loss": 1.6487, + "step": 3128 + }, + { + "epoch": 0.3300632911392405, + "grad_norm": 0.6609636545181274, + "learning_rate": 0.0011453199720866296, + "loss": 1.6721, + "step": 3129 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6284902095794678, + "learning_rate": 0.001145106597616927, + "loss": 1.6706, + "step": 3130 + }, + { + "epoch": 0.33027426160337553, + "grad_norm": 0.7033625245094299, + "learning_rate": 0.0011448931788735595, + "loss": 1.638, + "step": 3131 + }, + { + "epoch": 0.330379746835443, + "grad_norm": 0.5690692663192749, + "learning_rate": 0.0011446797158804426, + "loss": 1.6643, + "step": 3132 + }, + { + "epoch": 0.33048523206751057, + "grad_norm": 0.7245692014694214, + "learning_rate": 0.0011444662086614952, + "loss": 1.5917, + "step": 3133 + }, + { + "epoch": 0.33059071729957806, + "grad_norm": 0.7219608426094055, + "learning_rate": 0.0011442526572406422, + "loss": 1.6816, + "step": 3134 + }, + { + "epoch": 0.33069620253164556, + "grad_norm": 0.5896676182746887, + "learning_rate": 0.001144039061641813, + "loss": 1.6009, + "step": 3135 + }, + { + "epoch": 0.3308016877637131, + "grad_norm": 0.6445325613021851, + "learning_rate": 0.0011438254218889422, + "loss": 1.6286, + "step": 3136 + }, + { + "epoch": 0.3309071729957806, + "grad_norm": 0.5840768218040466, + "learning_rate": 0.0011436117380059692, + "loss": 1.6014, + "step": 3137 + }, + { + "epoch": 0.3310126582278481, + "grad_norm": 0.7109744548797607, + "learning_rate": 0.0011433980100168382, + "loss": 1.681, + "step": 3138 + }, + { + "epoch": 0.33111814345991564, + "grad_norm": 0.7461845874786377, + "learning_rate": 0.0011431842379454982, + "loss": 1.6631, + "step": 3139 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.8058801293373108, + "learning_rate": 0.001142970421815904, + "loss": 1.633, + "step": 3140 + }, + { + "epoch": 0.3313291139240506, + "grad_norm": 0.6132332682609558, + "learning_rate": 0.0011427565616520144, + "loss": 1.6723, + "step": 3141 + }, + { + "epoch": 0.33143459915611817, + "grad_norm": 0.6361710429191589, + "learning_rate": 0.0011425426574777936, + "loss": 1.621, + "step": 3142 + }, + { + "epoch": 0.33154008438818566, + "grad_norm": 0.5942292809486389, + "learning_rate": 0.0011423287093172106, + "loss": 1.6573, + "step": 3143 + }, + { + "epoch": 0.33164556962025316, + "grad_norm": 0.5845707058906555, + "learning_rate": 0.0011421147171942398, + "loss": 1.6608, + "step": 3144 + }, + { + "epoch": 0.33175105485232065, + "grad_norm": 0.5365136861801147, + "learning_rate": 0.0011419006811328593, + "loss": 1.6666, + "step": 3145 + }, + { + "epoch": 0.3318565400843882, + "grad_norm": 0.6138281226158142, + "learning_rate": 0.0011416866011570534, + "loss": 1.648, + "step": 3146 + }, + { + "epoch": 0.3319620253164557, + "grad_norm": 0.5538115501403809, + "learning_rate": 0.0011414724772908105, + "loss": 1.6206, + "step": 3147 + }, + { + "epoch": 0.3320675105485232, + "grad_norm": 0.6152692437171936, + "learning_rate": 0.0011412583095581248, + "loss": 1.6388, + "step": 3148 + }, + { + "epoch": 0.33217299578059073, + "grad_norm": 0.6464048027992249, + "learning_rate": 0.0011410440979829942, + "loss": 1.622, + "step": 3149 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.5243057608604431, + "learning_rate": 0.0011408298425894226, + "loss": 1.6364, + "step": 3150 + }, + { + "epoch": 0.3323839662447257, + "grad_norm": 0.8345889449119568, + "learning_rate": 0.0011406155434014185, + "loss": 1.645, + "step": 3151 + }, + { + "epoch": 0.33248945147679326, + "grad_norm": 0.7248412370681763, + "learning_rate": 0.0011404012004429948, + "loss": 1.6408, + "step": 3152 + }, + { + "epoch": 0.33259493670886076, + "grad_norm": 0.5488977432250977, + "learning_rate": 0.00114018681373817, + "loss": 1.6574, + "step": 3153 + }, + { + "epoch": 0.33270042194092825, + "grad_norm": 0.5552822947502136, + "learning_rate": 0.001139972383310967, + "loss": 1.6533, + "step": 3154 + }, + { + "epoch": 0.3328059071729958, + "grad_norm": 0.6169644594192505, + "learning_rate": 0.0011397579091854137, + "loss": 1.6472, + "step": 3155 + }, + { + "epoch": 0.3329113924050633, + "grad_norm": 0.525227963924408, + "learning_rate": 0.0011395433913855434, + "loss": 1.6434, + "step": 3156 + }, + { + "epoch": 0.3330168776371308, + "grad_norm": 0.6150789856910706, + "learning_rate": 0.0011393288299353934, + "loss": 1.6406, + "step": 3157 + }, + { + "epoch": 0.33312236286919833, + "grad_norm": 0.6242336630821228, + "learning_rate": 0.001139114224859007, + "loss": 1.623, + "step": 3158 + }, + { + "epoch": 0.3332278481012658, + "grad_norm": 0.5828819274902344, + "learning_rate": 0.0011388995761804311, + "loss": 1.6561, + "step": 3159 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5471931099891663, + "learning_rate": 0.0011386848839237186, + "loss": 1.648, + "step": 3160 + }, + { + "epoch": 0.33343881856540086, + "grad_norm": 0.5937705039978027, + "learning_rate": 0.0011384701481129266, + "loss": 1.6526, + "step": 3161 + }, + { + "epoch": 0.33354430379746836, + "grad_norm": 0.640213668346405, + "learning_rate": 0.0011382553687721174, + "loss": 1.652, + "step": 3162 + }, + { + "epoch": 0.33364978902953585, + "grad_norm": 0.574081301689148, + "learning_rate": 0.0011380405459253582, + "loss": 1.6425, + "step": 3163 + }, + { + "epoch": 0.3337552742616034, + "grad_norm": 0.6351216435432434, + "learning_rate": 0.0011378256795967208, + "loss": 1.6646, + "step": 3164 + }, + { + "epoch": 0.3338607594936709, + "grad_norm": 0.6212761998176575, + "learning_rate": 0.0011376107698102822, + "loss": 1.6388, + "step": 3165 + }, + { + "epoch": 0.3339662447257384, + "grad_norm": 0.740437924861908, + "learning_rate": 0.001137395816590124, + "loss": 1.6943, + "step": 3166 + }, + { + "epoch": 0.33407172995780593, + "grad_norm": 0.6645119190216064, + "learning_rate": 0.001137180819960333, + "loss": 1.6175, + "step": 3167 + }, + { + "epoch": 0.3341772151898734, + "grad_norm": 0.5889713764190674, + "learning_rate": 0.0011369657799450005, + "loss": 1.6614, + "step": 3168 + }, + { + "epoch": 0.3342827004219409, + "grad_norm": 0.6471067070960999, + "learning_rate": 0.0011367506965682225, + "loss": 1.6639, + "step": 3169 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.6209869384765625, + "learning_rate": 0.0011365355698541005, + "loss": 1.6071, + "step": 3170 + }, + { + "epoch": 0.33449367088607596, + "grad_norm": 0.7412921786308289, + "learning_rate": 0.0011363203998267406, + "loss": 1.6315, + "step": 3171 + }, + { + "epoch": 0.33459915611814345, + "grad_norm": 0.6666702628135681, + "learning_rate": 0.0011361051865102533, + "loss": 1.6292, + "step": 3172 + }, + { + "epoch": 0.334704641350211, + "grad_norm": 0.5883874893188477, + "learning_rate": 0.0011358899299287546, + "loss": 1.6563, + "step": 3173 + }, + { + "epoch": 0.3348101265822785, + "grad_norm": 0.5496659278869629, + "learning_rate": 0.0011356746301063652, + "loss": 1.6576, + "step": 3174 + }, + { + "epoch": 0.334915611814346, + "grad_norm": 0.6422592401504517, + "learning_rate": 0.0011354592870672104, + "loss": 1.6439, + "step": 3175 + }, + { + "epoch": 0.33502109704641353, + "grad_norm": 0.5612423419952393, + "learning_rate": 0.0011352439008354201, + "loss": 1.6398, + "step": 3176 + }, + { + "epoch": 0.335126582278481, + "grad_norm": 0.5424929857254028, + "learning_rate": 0.0011350284714351298, + "loss": 1.637, + "step": 3177 + }, + { + "epoch": 0.3352320675105485, + "grad_norm": 0.6958585381507874, + "learning_rate": 0.0011348129988904797, + "loss": 1.6356, + "step": 3178 + }, + { + "epoch": 0.335337552742616, + "grad_norm": 0.5438727736473083, + "learning_rate": 0.0011345974832256138, + "loss": 1.6117, + "step": 3179 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.685747504234314, + "learning_rate": 0.0011343819244646824, + "loss": 1.6844, + "step": 3180 + }, + { + "epoch": 0.33554852320675105, + "grad_norm": 0.7825990319252014, + "learning_rate": 0.0011341663226318395, + "loss": 1.6097, + "step": 3181 + }, + { + "epoch": 0.33565400843881854, + "grad_norm": 0.7449751496315002, + "learning_rate": 0.0011339506777512446, + "loss": 1.6309, + "step": 3182 + }, + { + "epoch": 0.3357594936708861, + "grad_norm": 0.536396324634552, + "learning_rate": 0.0011337349898470617, + "loss": 1.6455, + "step": 3183 + }, + { + "epoch": 0.3358649789029536, + "grad_norm": 0.7061575651168823, + "learning_rate": 0.0011335192589434597, + "loss": 1.6499, + "step": 3184 + }, + { + "epoch": 0.3359704641350211, + "grad_norm": 0.5501567721366882, + "learning_rate": 0.0011333034850646124, + "loss": 1.6523, + "step": 3185 + }, + { + "epoch": 0.3360759493670886, + "grad_norm": 0.5566317439079285, + "learning_rate": 0.0011330876682346981, + "loss": 1.6345, + "step": 3186 + }, + { + "epoch": 0.3361814345991561, + "grad_norm": 0.5285628437995911, + "learning_rate": 0.0011328718084779004, + "loss": 1.6631, + "step": 3187 + }, + { + "epoch": 0.3362869198312236, + "grad_norm": 0.600501298904419, + "learning_rate": 0.0011326559058184075, + "loss": 1.6486, + "step": 3188 + }, + { + "epoch": 0.33639240506329116, + "grad_norm": 0.5249839425086975, + "learning_rate": 0.001132439960280412, + "loss": 1.6223, + "step": 3189 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.5994705557823181, + "learning_rate": 0.001132223971888112, + "loss": 1.6687, + "step": 3190 + }, + { + "epoch": 0.33660337552742614, + "grad_norm": 0.5968992114067078, + "learning_rate": 0.0011320079406657102, + "loss": 1.6354, + "step": 3191 + }, + { + "epoch": 0.3367088607594937, + "grad_norm": 0.6174200773239136, + "learning_rate": 0.0011317918666374138, + "loss": 1.6133, + "step": 3192 + }, + { + "epoch": 0.3368143459915612, + "grad_norm": 0.5380529165267944, + "learning_rate": 0.0011315757498274349, + "loss": 1.6808, + "step": 3193 + }, + { + "epoch": 0.3369198312236287, + "grad_norm": 0.6446694135665894, + "learning_rate": 0.0011313595902599904, + "loss": 1.6619, + "step": 3194 + }, + { + "epoch": 0.3370253164556962, + "grad_norm": 0.5908147096633911, + "learning_rate": 0.0011311433879593023, + "loss": 1.6131, + "step": 3195 + }, + { + "epoch": 0.3371308016877637, + "grad_norm": 0.5534048676490784, + "learning_rate": 0.001130927142949597, + "loss": 1.6713, + "step": 3196 + }, + { + "epoch": 0.3372362869198312, + "grad_norm": 0.6000741720199585, + "learning_rate": 0.001130710855255106, + "loss": 1.5824, + "step": 3197 + }, + { + "epoch": 0.33734177215189876, + "grad_norm": 0.5176724195480347, + "learning_rate": 0.001130494524900065, + "loss": 1.6371, + "step": 3198 + }, + { + "epoch": 0.33744725738396625, + "grad_norm": 0.7362030744552612, + "learning_rate": 0.0011302781519087154, + "loss": 1.6029, + "step": 3199 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.7788394093513489, + "learning_rate": 0.0011300617363053024, + "loss": 1.6448, + "step": 3200 + }, + { + "epoch": 0.3376582278481013, + "grad_norm": 0.5430024862289429, + "learning_rate": 0.0011298452781140769, + "loss": 1.6341, + "step": 3201 + }, + { + "epoch": 0.3377637130801688, + "grad_norm": 0.8621813654899597, + "learning_rate": 0.0011296287773592938, + "loss": 1.6735, + "step": 3202 + }, + { + "epoch": 0.3378691983122363, + "grad_norm": 0.9051045179367065, + "learning_rate": 0.0011294122340652132, + "loss": 1.6673, + "step": 3203 + }, + { + "epoch": 0.3379746835443038, + "grad_norm": 0.5769403576850891, + "learning_rate": 0.0011291956482561, + "loss": 1.6542, + "step": 3204 + }, + { + "epoch": 0.3380801687763713, + "grad_norm": 0.916631817817688, + "learning_rate": 0.0011289790199562233, + "loss": 1.6627, + "step": 3205 + }, + { + "epoch": 0.3381856540084388, + "grad_norm": 0.6997500061988831, + "learning_rate": 0.001128762349189858, + "loss": 1.6589, + "step": 3206 + }, + { + "epoch": 0.33829113924050636, + "grad_norm": 0.7352639436721802, + "learning_rate": 0.0011285456359812825, + "loss": 1.6555, + "step": 3207 + }, + { + "epoch": 0.33839662447257385, + "grad_norm": 0.9249271750450134, + "learning_rate": 0.0011283288803547809, + "loss": 1.624, + "step": 3208 + }, + { + "epoch": 0.33850210970464134, + "grad_norm": 0.5900299549102783, + "learning_rate": 0.0011281120823346418, + "loss": 1.6345, + "step": 3209 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 1.1381328105926514, + "learning_rate": 0.0011278952419451586, + "loss": 1.6599, + "step": 3210 + }, + { + "epoch": 0.3387130801687764, + "grad_norm": 0.731651246547699, + "learning_rate": 0.0011276783592106291, + "loss": 1.6291, + "step": 3211 + }, + { + "epoch": 0.3388185654008439, + "grad_norm": 0.5894852876663208, + "learning_rate": 0.001127461434155356, + "loss": 1.6313, + "step": 3212 + }, + { + "epoch": 0.33892405063291137, + "grad_norm": 0.6343427896499634, + "learning_rate": 0.001127244466803647, + "loss": 1.6183, + "step": 3213 + }, + { + "epoch": 0.3390295358649789, + "grad_norm": 0.5424615740776062, + "learning_rate": 0.0011270274571798147, + "loss": 1.6706, + "step": 3214 + }, + { + "epoch": 0.3391350210970464, + "grad_norm": 0.6136397123336792, + "learning_rate": 0.0011268104053081755, + "loss": 1.6544, + "step": 3215 + }, + { + "epoch": 0.3392405063291139, + "grad_norm": 0.5987748503684998, + "learning_rate": 0.0011265933112130516, + "loss": 1.6483, + "step": 3216 + }, + { + "epoch": 0.33934599156118145, + "grad_norm": 0.5498157739639282, + "learning_rate": 0.0011263761749187693, + "loss": 1.6395, + "step": 3217 + }, + { + "epoch": 0.33945147679324894, + "grad_norm": 0.6086106300354004, + "learning_rate": 0.0011261589964496597, + "loss": 1.6456, + "step": 3218 + }, + { + "epoch": 0.33955696202531643, + "grad_norm": 0.5589696168899536, + "learning_rate": 0.001125941775830059, + "loss": 1.6166, + "step": 3219 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.6250709295272827, + "learning_rate": 0.0011257245130843077, + "loss": 1.6422, + "step": 3220 + }, + { + "epoch": 0.3397679324894515, + "grad_norm": 0.5714243650436401, + "learning_rate": 0.0011255072082367512, + "loss": 1.6385, + "step": 3221 + }, + { + "epoch": 0.33987341772151897, + "grad_norm": 0.5934084057807922, + "learning_rate": 0.0011252898613117394, + "loss": 1.6532, + "step": 3222 + }, + { + "epoch": 0.3399789029535865, + "grad_norm": 0.677064836025238, + "learning_rate": 0.0011250724723336273, + "loss": 1.6468, + "step": 3223 + }, + { + "epoch": 0.340084388185654, + "grad_norm": 0.576246976852417, + "learning_rate": 0.0011248550413267746, + "loss": 1.6134, + "step": 3224 + }, + { + "epoch": 0.3401898734177215, + "grad_norm": 0.7517459988594055, + "learning_rate": 0.001124637568315545, + "loss": 1.6527, + "step": 3225 + }, + { + "epoch": 0.34029535864978905, + "grad_norm": 0.7777457237243652, + "learning_rate": 0.001124420053324308, + "loss": 1.6196, + "step": 3226 + }, + { + "epoch": 0.34040084388185654, + "grad_norm": 0.7234323620796204, + "learning_rate": 0.001124202496377437, + "loss": 1.6128, + "step": 3227 + }, + { + "epoch": 0.34050632911392403, + "grad_norm": 0.6566915512084961, + "learning_rate": 0.0011239848974993103, + "loss": 1.615, + "step": 3228 + }, + { + "epoch": 0.3406118143459916, + "grad_norm": 0.8073840737342834, + "learning_rate": 0.0011237672567143107, + "loss": 1.6562, + "step": 3229 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.5846312046051025, + "learning_rate": 0.0011235495740468265, + "loss": 1.6557, + "step": 3230 + }, + { + "epoch": 0.34082278481012657, + "grad_norm": 0.9091677665710449, + "learning_rate": 0.00112333184952125, + "loss": 1.619, + "step": 3231 + }, + { + "epoch": 0.3409282700421941, + "grad_norm": 0.5564118027687073, + "learning_rate": 0.001123114083161978, + "loss": 1.6201, + "step": 3232 + }, + { + "epoch": 0.3410337552742616, + "grad_norm": 0.7984133958816528, + "learning_rate": 0.0011228962749934123, + "loss": 1.6158, + "step": 3233 + }, + { + "epoch": 0.3411392405063291, + "grad_norm": 0.6695194244384766, + "learning_rate": 0.0011226784250399598, + "loss": 1.6126, + "step": 3234 + }, + { + "epoch": 0.34124472573839665, + "grad_norm": 0.7103314399719238, + "learning_rate": 0.0011224605333260312, + "loss": 1.6024, + "step": 3235 + }, + { + "epoch": 0.34135021097046414, + "grad_norm": 0.7657415270805359, + "learning_rate": 0.0011222425998760428, + "loss": 1.6443, + "step": 3236 + }, + { + "epoch": 0.34145569620253163, + "grad_norm": 0.6281055212020874, + "learning_rate": 0.0011220246247144149, + "loss": 1.6071, + "step": 3237 + }, + { + "epoch": 0.3415611814345992, + "grad_norm": 0.6709548234939575, + "learning_rate": 0.0011218066078655725, + "loss": 1.6129, + "step": 3238 + }, + { + "epoch": 0.3416666666666667, + "grad_norm": 0.5836053490638733, + "learning_rate": 0.001121588549353946, + "loss": 1.5968, + "step": 3239 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.7265259027481079, + "learning_rate": 0.0011213704492039694, + "loss": 1.6413, + "step": 3240 + }, + { + "epoch": 0.3418776371308017, + "grad_norm": 0.5405563712120056, + "learning_rate": 0.0011211523074400823, + "loss": 1.6251, + "step": 3241 + }, + { + "epoch": 0.3419831223628692, + "grad_norm": 0.7058964967727661, + "learning_rate": 0.0011209341240867282, + "loss": 1.6586, + "step": 3242 + }, + { + "epoch": 0.3420886075949367, + "grad_norm": 0.5696418881416321, + "learning_rate": 0.001120715899168356, + "loss": 1.6115, + "step": 3243 + }, + { + "epoch": 0.3421940928270042, + "grad_norm": 0.6521735787391663, + "learning_rate": 0.0011204976327094187, + "loss": 1.654, + "step": 3244 + }, + { + "epoch": 0.34229957805907174, + "grad_norm": 0.565108060836792, + "learning_rate": 0.0011202793247343742, + "loss": 1.6145, + "step": 3245 + }, + { + "epoch": 0.34240506329113923, + "grad_norm": 0.5970233678817749, + "learning_rate": 0.001120060975267685, + "loss": 1.6344, + "step": 3246 + }, + { + "epoch": 0.3425105485232067, + "grad_norm": 0.5325796604156494, + "learning_rate": 0.0011198425843338183, + "loss": 1.6538, + "step": 3247 + }, + { + "epoch": 0.3426160337552743, + "grad_norm": 0.7701898217201233, + "learning_rate": 0.0011196241519572457, + "loss": 1.6622, + "step": 3248 + }, + { + "epoch": 0.34272151898734177, + "grad_norm": 0.5880612730979919, + "learning_rate": 0.001119405678162444, + "loss": 1.6458, + "step": 3249 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.6394253373146057, + "learning_rate": 0.001119187162973894, + "loss": 1.6549, + "step": 3250 + }, + { + "epoch": 0.3429324894514768, + "grad_norm": 0.6242642402648926, + "learning_rate": 0.0011189686064160811, + "loss": 1.6167, + "step": 3251 + }, + { + "epoch": 0.3430379746835443, + "grad_norm": 0.6213588714599609, + "learning_rate": 0.001118750008513496, + "loss": 1.6671, + "step": 3252 + }, + { + "epoch": 0.3431434599156118, + "grad_norm": 0.6385392546653748, + "learning_rate": 0.0011185313692906342, + "loss": 1.6625, + "step": 3253 + }, + { + "epoch": 0.34324894514767934, + "grad_norm": 0.5964173674583435, + "learning_rate": 0.0011183126887719945, + "loss": 1.6258, + "step": 3254 + }, + { + "epoch": 0.34335443037974683, + "grad_norm": 0.6417024731636047, + "learning_rate": 0.0011180939669820813, + "loss": 1.6175, + "step": 3255 + }, + { + "epoch": 0.3434599156118143, + "grad_norm": 0.5978966951370239, + "learning_rate": 0.001117875203945404, + "loss": 1.5944, + "step": 3256 + }, + { + "epoch": 0.3435654008438819, + "grad_norm": 0.625260055065155, + "learning_rate": 0.0011176563996864754, + "loss": 1.6712, + "step": 3257 + }, + { + "epoch": 0.34367088607594937, + "grad_norm": 0.6457785964012146, + "learning_rate": 0.0011174375542298142, + "loss": 1.6165, + "step": 3258 + }, + { + "epoch": 0.34377637130801686, + "grad_norm": 0.5726715922355652, + "learning_rate": 0.0011172186675999425, + "loss": 1.6238, + "step": 3259 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5987027287483215, + "learning_rate": 0.001116999739821388, + "loss": 1.617, + "step": 3260 + }, + { + "epoch": 0.3439873417721519, + "grad_norm": 0.5552608966827393, + "learning_rate": 0.0011167807709186828, + "loss": 1.6625, + "step": 3261 + }, + { + "epoch": 0.3440928270042194, + "grad_norm": 0.6068829894065857, + "learning_rate": 0.0011165617609163632, + "loss": 1.6493, + "step": 3262 + }, + { + "epoch": 0.34419831223628694, + "grad_norm": 0.6102298498153687, + "learning_rate": 0.0011163427098389706, + "loss": 1.6577, + "step": 3263 + }, + { + "epoch": 0.34430379746835443, + "grad_norm": 0.5710529685020447, + "learning_rate": 0.0011161236177110504, + "loss": 1.5808, + "step": 3264 + }, + { + "epoch": 0.3444092827004219, + "grad_norm": 0.5799105167388916, + "learning_rate": 0.0011159044845571533, + "loss": 1.6183, + "step": 3265 + }, + { + "epoch": 0.3445147679324895, + "grad_norm": 0.5956416726112366, + "learning_rate": 0.0011156853104018342, + "loss": 1.6571, + "step": 3266 + }, + { + "epoch": 0.34462025316455697, + "grad_norm": 0.6177721619606018, + "learning_rate": 0.0011154660952696525, + "loss": 1.6662, + "step": 3267 + }, + { + "epoch": 0.34472573839662446, + "grad_norm": 0.6192376613616943, + "learning_rate": 0.0011152468391851724, + "loss": 1.6563, + "step": 3268 + }, + { + "epoch": 0.344831223628692, + "grad_norm": 0.6652493476867676, + "learning_rate": 0.0011150275421729628, + "loss": 1.6612, + "step": 3269 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.6049871444702148, + "learning_rate": 0.0011148082042575968, + "loss": 1.6515, + "step": 3270 + }, + { + "epoch": 0.345042194092827, + "grad_norm": 0.6123583316802979, + "learning_rate": 0.0011145888254636526, + "loss": 1.6175, + "step": 3271 + }, + { + "epoch": 0.34514767932489454, + "grad_norm": 0.6709561944007874, + "learning_rate": 0.0011143694058157122, + "loss": 1.6893, + "step": 3272 + }, + { + "epoch": 0.34525316455696203, + "grad_norm": 0.5920717120170593, + "learning_rate": 0.0011141499453383632, + "loss": 1.655, + "step": 3273 + }, + { + "epoch": 0.3453586497890295, + "grad_norm": 0.6421986818313599, + "learning_rate": 0.001113930444056197, + "loss": 1.6436, + "step": 3274 + }, + { + "epoch": 0.3454641350210971, + "grad_norm": 0.6278884410858154, + "learning_rate": 0.00111371090199381, + "loss": 1.656, + "step": 3275 + }, + { + "epoch": 0.34556962025316457, + "grad_norm": 0.6366848945617676, + "learning_rate": 0.0011134913191758024, + "loss": 1.6658, + "step": 3276 + }, + { + "epoch": 0.34567510548523206, + "grad_norm": 0.5389577746391296, + "learning_rate": 0.00111327169562678, + "loss": 1.6512, + "step": 3277 + }, + { + "epoch": 0.34578059071729955, + "grad_norm": 0.5768035054206848, + "learning_rate": 0.0011130520313713528, + "loss": 1.5966, + "step": 3278 + }, + { + "epoch": 0.3458860759493671, + "grad_norm": 0.5947706699371338, + "learning_rate": 0.0011128323264341352, + "loss": 1.6542, + "step": 3279 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.7899022102355957, + "learning_rate": 0.0011126125808397461, + "loss": 1.65, + "step": 3280 + }, + { + "epoch": 0.3460970464135021, + "grad_norm": 0.7066892981529236, + "learning_rate": 0.0011123927946128092, + "loss": 1.63, + "step": 3281 + }, + { + "epoch": 0.34620253164556963, + "grad_norm": 0.7348818778991699, + "learning_rate": 0.0011121729677779526, + "loss": 1.634, + "step": 3282 + }, + { + "epoch": 0.3463080168776371, + "grad_norm": 1.140781283378601, + "learning_rate": 0.001111953100359809, + "loss": 1.6065, + "step": 3283 + }, + { + "epoch": 0.3464135021097046, + "grad_norm": 0.6666001677513123, + "learning_rate": 0.0011117331923830157, + "loss": 1.6531, + "step": 3284 + }, + { + "epoch": 0.34651898734177217, + "grad_norm": 0.7332402467727661, + "learning_rate": 0.0011115132438722143, + "loss": 1.6428, + "step": 3285 + }, + { + "epoch": 0.34662447257383966, + "grad_norm": 0.8763302564620972, + "learning_rate": 0.0011112932548520513, + "loss": 1.6595, + "step": 3286 + }, + { + "epoch": 0.34672995780590715, + "grad_norm": 0.6482123732566833, + "learning_rate": 0.0011110732253471777, + "loss": 1.6967, + "step": 3287 + }, + { + "epoch": 0.3468354430379747, + "grad_norm": 0.8848562240600586, + "learning_rate": 0.0011108531553822485, + "loss": 1.6073, + "step": 3288 + }, + { + "epoch": 0.3469409282700422, + "grad_norm": 0.7133051753044128, + "learning_rate": 0.001110633044981924, + "loss": 1.617, + "step": 3289 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.8446333408355713, + "learning_rate": 0.0011104128941708683, + "loss": 1.5659, + "step": 3290 + }, + { + "epoch": 0.34715189873417723, + "grad_norm": 0.9989070892333984, + "learning_rate": 0.001110192702973751, + "loss": 1.6242, + "step": 3291 + }, + { + "epoch": 0.3472573839662447, + "grad_norm": 0.6012383103370667, + "learning_rate": 0.001109972471415245, + "loss": 1.6314, + "step": 3292 + }, + { + "epoch": 0.3473628691983122, + "grad_norm": 0.8607325553894043, + "learning_rate": 0.0011097521995200288, + "loss": 1.6354, + "step": 3293 + }, + { + "epoch": 0.34746835443037977, + "grad_norm": 0.8192002773284912, + "learning_rate": 0.0011095318873127844, + "loss": 1.6347, + "step": 3294 + }, + { + "epoch": 0.34757383966244726, + "grad_norm": 0.5736108422279358, + "learning_rate": 0.0011093115348181995, + "loss": 1.6262, + "step": 3295 + }, + { + "epoch": 0.34767932489451475, + "grad_norm": 0.6402544379234314, + "learning_rate": 0.0011090911420609654, + "loss": 1.6829, + "step": 3296 + }, + { + "epoch": 0.3477848101265823, + "grad_norm": 0.5916029810905457, + "learning_rate": 0.0011088707090657784, + "loss": 1.6374, + "step": 3297 + }, + { + "epoch": 0.3478902953586498, + "grad_norm": 0.5520672798156738, + "learning_rate": 0.0011086502358573387, + "loss": 1.6254, + "step": 3298 + }, + { + "epoch": 0.3479957805907173, + "grad_norm": 0.6282167434692383, + "learning_rate": 0.0011084297224603517, + "loss": 1.658, + "step": 3299 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.5248445272445679, + "learning_rate": 0.001108209168899527, + "loss": 1.6522, + "step": 3300 + }, + { + "epoch": 0.3482067510548523, + "grad_norm": 0.6114848852157593, + "learning_rate": 0.0011079885751995788, + "loss": 1.642, + "step": 3301 + }, + { + "epoch": 0.3483122362869198, + "grad_norm": 0.557396411895752, + "learning_rate": 0.0011077679413852258, + "loss": 1.6187, + "step": 3302 + }, + { + "epoch": 0.34841772151898737, + "grad_norm": 0.5868496298789978, + "learning_rate": 0.0011075472674811908, + "loss": 1.6715, + "step": 3303 + }, + { + "epoch": 0.34852320675105486, + "grad_norm": 0.6713554263114929, + "learning_rate": 0.0011073265535122016, + "loss": 1.6361, + "step": 3304 + }, + { + "epoch": 0.34862869198312235, + "grad_norm": 0.807915449142456, + "learning_rate": 0.0011071057995029902, + "loss": 1.6557, + "step": 3305 + }, + { + "epoch": 0.3487341772151899, + "grad_norm": 0.5795521140098572, + "learning_rate": 0.0011068850054782933, + "loss": 1.6108, + "step": 3306 + }, + { + "epoch": 0.3488396624472574, + "grad_norm": 0.7306057214736938, + "learning_rate": 0.0011066641714628522, + "loss": 1.6342, + "step": 3307 + }, + { + "epoch": 0.3489451476793249, + "grad_norm": 0.6053302884101868, + "learning_rate": 0.001106443297481412, + "loss": 1.6488, + "step": 3308 + }, + { + "epoch": 0.3490506329113924, + "grad_norm": 0.5999066829681396, + "learning_rate": 0.001106222383558723, + "loss": 1.636, + "step": 3309 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.5493079423904419, + "learning_rate": 0.0011060014297195396, + "loss": 1.6137, + "step": 3310 + }, + { + "epoch": 0.3492616033755274, + "grad_norm": 0.6405524015426636, + "learning_rate": 0.0011057804359886209, + "loss": 1.6269, + "step": 3311 + }, + { + "epoch": 0.3493670886075949, + "grad_norm": 0.5205414295196533, + "learning_rate": 0.0011055594023907302, + "loss": 1.6673, + "step": 3312 + }, + { + "epoch": 0.34947257383966246, + "grad_norm": 0.6442984938621521, + "learning_rate": 0.0011053383289506354, + "loss": 1.586, + "step": 3313 + }, + { + "epoch": 0.34957805907172995, + "grad_norm": 0.5477169752120972, + "learning_rate": 0.001105117215693109, + "loss": 1.6758, + "step": 3314 + }, + { + "epoch": 0.34968354430379744, + "grad_norm": 0.6746101975440979, + "learning_rate": 0.001104896062642928, + "loss": 1.6277, + "step": 3315 + }, + { + "epoch": 0.349789029535865, + "grad_norm": 0.5939791202545166, + "learning_rate": 0.001104674869824873, + "loss": 1.6115, + "step": 3316 + }, + { + "epoch": 0.3498945147679325, + "grad_norm": 0.6331865787506104, + "learning_rate": 0.0011044536372637307, + "loss": 1.6745, + "step": 3317 + }, + { + "epoch": 0.35, + "grad_norm": 0.633012056350708, + "learning_rate": 0.001104232364984291, + "loss": 1.6756, + "step": 3318 + }, + { + "epoch": 0.3501054852320675, + "grad_norm": 0.547040581703186, + "learning_rate": 0.001104011053011348, + "loss": 1.6368, + "step": 3319 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.6717926263809204, + "learning_rate": 0.0011037897013697015, + "loss": 1.6394, + "step": 3320 + }, + { + "epoch": 0.3503164556962025, + "grad_norm": 0.5776377320289612, + "learning_rate": 0.0011035683100841548, + "loss": 1.585, + "step": 3321 + }, + { + "epoch": 0.35042194092827006, + "grad_norm": 0.5977219939231873, + "learning_rate": 0.0011033468791795161, + "loss": 1.6279, + "step": 3322 + }, + { + "epoch": 0.35052742616033755, + "grad_norm": 0.5589094758033752, + "learning_rate": 0.0011031254086805973, + "loss": 1.6741, + "step": 3323 + }, + { + "epoch": 0.35063291139240504, + "grad_norm": 0.6255728602409363, + "learning_rate": 0.0011029038986122156, + "loss": 1.6212, + "step": 3324 + }, + { + "epoch": 0.3507383966244726, + "grad_norm": 0.7142055034637451, + "learning_rate": 0.0011026823489991924, + "loss": 1.6757, + "step": 3325 + }, + { + "epoch": 0.3508438818565401, + "grad_norm": 0.5397716760635376, + "learning_rate": 0.0011024607598663539, + "loss": 1.6092, + "step": 3326 + }, + { + "epoch": 0.3509493670886076, + "grad_norm": 0.7472676634788513, + "learning_rate": 0.001102239131238529, + "loss": 1.6515, + "step": 3327 + }, + { + "epoch": 0.3510548523206751, + "grad_norm": 0.5985038876533508, + "learning_rate": 0.0011020174631405533, + "loss": 1.6587, + "step": 3328 + }, + { + "epoch": 0.3511603375527426, + "grad_norm": 0.6326662302017212, + "learning_rate": 0.0011017957555972656, + "loss": 1.6435, + "step": 3329 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.6449219584465027, + "learning_rate": 0.0011015740086335092, + "loss": 1.609, + "step": 3330 + }, + { + "epoch": 0.35137130801687766, + "grad_norm": 0.7083027958869934, + "learning_rate": 0.001101352222274132, + "loss": 1.6341, + "step": 3331 + }, + { + "epoch": 0.35147679324894515, + "grad_norm": 0.5932875871658325, + "learning_rate": 0.0011011303965439863, + "loss": 1.6263, + "step": 3332 + }, + { + "epoch": 0.35158227848101264, + "grad_norm": 0.8540968894958496, + "learning_rate": 0.0011009085314679287, + "loss": 1.6975, + "step": 3333 + }, + { + "epoch": 0.3516877637130802, + "grad_norm": 0.823266863822937, + "learning_rate": 0.0011006866270708204, + "loss": 1.6224, + "step": 3334 + }, + { + "epoch": 0.3517932489451477, + "grad_norm": 0.5748893022537231, + "learning_rate": 0.0011004646833775269, + "loss": 1.6116, + "step": 3335 + }, + { + "epoch": 0.3518987341772152, + "grad_norm": 0.8091727495193481, + "learning_rate": 0.0011002427004129184, + "loss": 1.6422, + "step": 3336 + }, + { + "epoch": 0.3520042194092827, + "grad_norm": 0.6399957537651062, + "learning_rate": 0.0011000206782018683, + "loss": 1.634, + "step": 3337 + }, + { + "epoch": 0.3521097046413502, + "grad_norm": 0.6428866982460022, + "learning_rate": 0.001099798616769256, + "loss": 1.6339, + "step": 3338 + }, + { + "epoch": 0.3522151898734177, + "grad_norm": 0.5907018780708313, + "learning_rate": 0.0010995765161399646, + "loss": 1.6546, + "step": 3339 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.6782863736152649, + "learning_rate": 0.0010993543763388814, + "loss": 1.6185, + "step": 3340 + }, + { + "epoch": 0.35242616033755275, + "grad_norm": 0.5749251246452332, + "learning_rate": 0.0010991321973908982, + "loss": 1.5938, + "step": 3341 + }, + { + "epoch": 0.35253164556962024, + "grad_norm": 0.6641843914985657, + "learning_rate": 0.0010989099793209112, + "loss": 1.6513, + "step": 3342 + }, + { + "epoch": 0.35263713080168774, + "grad_norm": 0.5944504141807556, + "learning_rate": 0.0010986877221538214, + "loss": 1.6192, + "step": 3343 + }, + { + "epoch": 0.3527426160337553, + "grad_norm": 0.7037460207939148, + "learning_rate": 0.0010984654259145335, + "loss": 1.6559, + "step": 3344 + }, + { + "epoch": 0.3528481012658228, + "grad_norm": 0.5951823592185974, + "learning_rate": 0.0010982430906279572, + "loss": 1.6572, + "step": 3345 + }, + { + "epoch": 0.35295358649789027, + "grad_norm": 0.6613565683364868, + "learning_rate": 0.001098020716319006, + "loss": 1.6237, + "step": 3346 + }, + { + "epoch": 0.3530590717299578, + "grad_norm": 0.567345142364502, + "learning_rate": 0.0010977983030125982, + "loss": 1.6463, + "step": 3347 + }, + { + "epoch": 0.3531645569620253, + "grad_norm": 0.6802321672439575, + "learning_rate": 0.001097575850733656, + "loss": 1.6204, + "step": 3348 + }, + { + "epoch": 0.3532700421940928, + "grad_norm": 0.6141411662101746, + "learning_rate": 0.001097353359507107, + "loss": 1.5934, + "step": 3349 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.6410816311836243, + "learning_rate": 0.0010971308293578814, + "loss": 1.6247, + "step": 3350 + }, + { + "epoch": 0.35348101265822784, + "grad_norm": 0.7367594838142395, + "learning_rate": 0.0010969082603109158, + "loss": 1.6268, + "step": 3351 + }, + { + "epoch": 0.35358649789029534, + "grad_norm": 0.6055245399475098, + "learning_rate": 0.00109668565239115, + "loss": 1.6611, + "step": 3352 + }, + { + "epoch": 0.3536919831223629, + "grad_norm": 0.8092782497406006, + "learning_rate": 0.001096463005623528, + "loss": 1.6267, + "step": 3353 + }, + { + "epoch": 0.3537974683544304, + "grad_norm": 0.6094508767127991, + "learning_rate": 0.0010962403200329984, + "loss": 1.5911, + "step": 3354 + }, + { + "epoch": 0.35390295358649787, + "grad_norm": 0.6614775061607361, + "learning_rate": 0.0010960175956445145, + "loss": 1.6522, + "step": 3355 + }, + { + "epoch": 0.3540084388185654, + "grad_norm": 0.7110146284103394, + "learning_rate": 0.0010957948324830337, + "loss": 1.6111, + "step": 3356 + }, + { + "epoch": 0.3541139240506329, + "grad_norm": 0.7591574788093567, + "learning_rate": 0.0010955720305735176, + "loss": 1.6368, + "step": 3357 + }, + { + "epoch": 0.3542194092827004, + "grad_norm": 0.5195856094360352, + "learning_rate": 0.0010953491899409321, + "loss": 1.6028, + "step": 3358 + }, + { + "epoch": 0.35432489451476795, + "grad_norm": 0.7237416505813599, + "learning_rate": 0.001095126310610248, + "loss": 1.6265, + "step": 3359 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.5306234955787659, + "learning_rate": 0.0010949033926064397, + "loss": 1.6502, + "step": 3360 + }, + { + "epoch": 0.35453586497890294, + "grad_norm": 0.7784983515739441, + "learning_rate": 0.0010946804359544867, + "loss": 1.6437, + "step": 3361 + }, + { + "epoch": 0.3546413502109705, + "grad_norm": 0.652715265750885, + "learning_rate": 0.001094457440679372, + "loss": 1.6543, + "step": 3362 + }, + { + "epoch": 0.354746835443038, + "grad_norm": 0.5837976336479187, + "learning_rate": 0.0010942344068060833, + "loss": 1.6377, + "step": 3363 + }, + { + "epoch": 0.35485232067510547, + "grad_norm": 0.5573113560676575, + "learning_rate": 0.001094011334359613, + "loss": 1.6344, + "step": 3364 + }, + { + "epoch": 0.354957805907173, + "grad_norm": 0.6050319671630859, + "learning_rate": 0.0010937882233649572, + "loss": 1.5831, + "step": 3365 + }, + { + "epoch": 0.3550632911392405, + "grad_norm": 0.5905345678329468, + "learning_rate": 0.0010935650738471167, + "loss": 1.631, + "step": 3366 + }, + { + "epoch": 0.355168776371308, + "grad_norm": 0.6836594939231873, + "learning_rate": 0.0010933418858310965, + "loss": 1.6441, + "step": 3367 + }, + { + "epoch": 0.35527426160337555, + "grad_norm": 0.806210458278656, + "learning_rate": 0.0010931186593419059, + "loss": 1.6418, + "step": 3368 + }, + { + "epoch": 0.35537974683544304, + "grad_norm": 0.5843720436096191, + "learning_rate": 0.0010928953944045585, + "loss": 1.6664, + "step": 3369 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.7149490714073181, + "learning_rate": 0.0010926720910440725, + "loss": 1.6379, + "step": 3370 + }, + { + "epoch": 0.3555907172995781, + "grad_norm": 0.8491711616516113, + "learning_rate": 0.00109244874928547, + "loss": 1.637, + "step": 3371 + }, + { + "epoch": 0.3556962025316456, + "grad_norm": 0.604278028011322, + "learning_rate": 0.0010922253691537773, + "loss": 1.6094, + "step": 3372 + }, + { + "epoch": 0.35580168776371307, + "grad_norm": 0.5993859767913818, + "learning_rate": 0.0010920019506740256, + "loss": 1.6138, + "step": 3373 + }, + { + "epoch": 0.35590717299578056, + "grad_norm": 0.6227274537086487, + "learning_rate": 0.00109177849387125, + "loss": 1.6398, + "step": 3374 + }, + { + "epoch": 0.3560126582278481, + "grad_norm": 0.5463117957115173, + "learning_rate": 0.00109155499877049, + "loss": 1.6542, + "step": 3375 + }, + { + "epoch": 0.3561181434599156, + "grad_norm": 0.6524138450622559, + "learning_rate": 0.001091331465396789, + "loss": 1.6486, + "step": 3376 + }, + { + "epoch": 0.3562236286919831, + "grad_norm": 0.5428087711334229, + "learning_rate": 0.0010911078937751954, + "loss": 1.6209, + "step": 3377 + }, + { + "epoch": 0.35632911392405064, + "grad_norm": 0.6989608407020569, + "learning_rate": 0.0010908842839307614, + "loss": 1.6198, + "step": 3378 + }, + { + "epoch": 0.35643459915611814, + "grad_norm": 0.6511144638061523, + "learning_rate": 0.0010906606358885437, + "loss": 1.638, + "step": 3379 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.570578396320343, + "learning_rate": 0.001090436949673603, + "loss": 1.6322, + "step": 3380 + }, + { + "epoch": 0.3566455696202532, + "grad_norm": 0.737057089805603, + "learning_rate": 0.0010902132253110043, + "loss": 1.6137, + "step": 3381 + }, + { + "epoch": 0.35675105485232067, + "grad_norm": 0.6306774616241455, + "learning_rate": 0.0010899894628258174, + "loss": 1.5726, + "step": 3382 + }, + { + "epoch": 0.35685654008438816, + "grad_norm": 0.5975207090377808, + "learning_rate": 0.001089765662243116, + "loss": 1.6572, + "step": 3383 + }, + { + "epoch": 0.3569620253164557, + "grad_norm": 0.6475858688354492, + "learning_rate": 0.0010895418235879776, + "loss": 1.6423, + "step": 3384 + }, + { + "epoch": 0.3570675105485232, + "grad_norm": 0.653226912021637, + "learning_rate": 0.0010893179468854848, + "loss": 1.5711, + "step": 3385 + }, + { + "epoch": 0.3571729957805907, + "grad_norm": 0.5705726742744446, + "learning_rate": 0.0010890940321607245, + "loss": 1.5968, + "step": 3386 + }, + { + "epoch": 0.35727848101265824, + "grad_norm": 0.6776017546653748, + "learning_rate": 0.0010888700794387867, + "loss": 1.6542, + "step": 3387 + }, + { + "epoch": 0.35738396624472574, + "grad_norm": 0.7851178050041199, + "learning_rate": 0.0010886460887447667, + "loss": 1.6542, + "step": 3388 + }, + { + "epoch": 0.35748945147679323, + "grad_norm": 0.5395062565803528, + "learning_rate": 0.0010884220601037637, + "loss": 1.6608, + "step": 3389 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.6995608806610107, + "learning_rate": 0.0010881979935408815, + "loss": 1.6078, + "step": 3390 + }, + { + "epoch": 0.35770042194092827, + "grad_norm": 0.6261734962463379, + "learning_rate": 0.0010879738890812278, + "loss": 1.6321, + "step": 3391 + }, + { + "epoch": 0.35780590717299576, + "grad_norm": 0.6979600191116333, + "learning_rate": 0.0010877497467499146, + "loss": 1.6033, + "step": 3392 + }, + { + "epoch": 0.3579113924050633, + "grad_norm": 0.5751360654830933, + "learning_rate": 0.001087525566572058, + "loss": 1.6177, + "step": 3393 + }, + { + "epoch": 0.3580168776371308, + "grad_norm": 0.6806519627571106, + "learning_rate": 0.0010873013485727782, + "loss": 1.6055, + "step": 3394 + }, + { + "epoch": 0.3581223628691983, + "grad_norm": 0.6595763564109802, + "learning_rate": 0.001087077092777201, + "loss": 1.6485, + "step": 3395 + }, + { + "epoch": 0.35822784810126584, + "grad_norm": 0.5996520519256592, + "learning_rate": 0.0010868527992104545, + "loss": 1.6405, + "step": 3396 + }, + { + "epoch": 0.35833333333333334, + "grad_norm": 0.6444076895713806, + "learning_rate": 0.001086628467897672, + "loss": 1.6211, + "step": 3397 + }, + { + "epoch": 0.35843881856540083, + "grad_norm": 0.5695689916610718, + "learning_rate": 0.0010864040988639912, + "loss": 1.6323, + "step": 3398 + }, + { + "epoch": 0.3585443037974684, + "grad_norm": 0.6586163640022278, + "learning_rate": 0.0010861796921345537, + "loss": 1.6785, + "step": 3399 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.5616662502288818, + "learning_rate": 0.0010859552477345052, + "loss": 1.6801, + "step": 3400 + }, + { + "epoch": 0.35875527426160336, + "grad_norm": 0.6883683204650879, + "learning_rate": 0.0010857307656889962, + "loss": 1.6566, + "step": 3401 + }, + { + "epoch": 0.3588607594936709, + "grad_norm": 0.5706250667572021, + "learning_rate": 0.0010855062460231807, + "loss": 1.6242, + "step": 3402 + }, + { + "epoch": 0.3589662447257384, + "grad_norm": 0.6661357283592224, + "learning_rate": 0.0010852816887622174, + "loss": 1.6656, + "step": 3403 + }, + { + "epoch": 0.3590717299578059, + "grad_norm": 0.7438821196556091, + "learning_rate": 0.0010850570939312687, + "loss": 1.6329, + "step": 3404 + }, + { + "epoch": 0.35917721518987344, + "grad_norm": 0.5682038068771362, + "learning_rate": 0.0010848324615555024, + "loss": 1.623, + "step": 3405 + }, + { + "epoch": 0.35928270042194094, + "grad_norm": 0.5927019119262695, + "learning_rate": 0.0010846077916600888, + "loss": 1.6436, + "step": 3406 + }, + { + "epoch": 0.35938818565400843, + "grad_norm": 0.5755771994590759, + "learning_rate": 0.0010843830842702036, + "loss": 1.61, + "step": 3407 + }, + { + "epoch": 0.3594936708860759, + "grad_norm": 0.6039578914642334, + "learning_rate": 0.0010841583394110266, + "loss": 1.6326, + "step": 3408 + }, + { + "epoch": 0.35959915611814347, + "grad_norm": 0.5099049210548401, + "learning_rate": 0.0010839335571077415, + "loss": 1.6413, + "step": 3409 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.5926864743232727, + "learning_rate": 0.001083708737385536, + "loss": 1.6021, + "step": 3410 + }, + { + "epoch": 0.35981012658227846, + "grad_norm": 0.5067998170852661, + "learning_rate": 0.0010834838802696023, + "loss": 1.6203, + "step": 3411 + }, + { + "epoch": 0.359915611814346, + "grad_norm": 0.5723239183425903, + "learning_rate": 0.0010832589857851373, + "loss": 1.6124, + "step": 3412 + }, + { + "epoch": 0.3600210970464135, + "grad_norm": 0.589963436126709, + "learning_rate": 0.001083034053957341, + "loss": 1.6214, + "step": 3413 + }, + { + "epoch": 0.360126582278481, + "grad_norm": 0.5320117473602295, + "learning_rate": 0.0010828090848114182, + "loss": 1.5928, + "step": 3414 + }, + { + "epoch": 0.36023206751054854, + "grad_norm": 0.6326349973678589, + "learning_rate": 0.001082584078372578, + "loss": 1.5944, + "step": 3415 + }, + { + "epoch": 0.36033755274261603, + "grad_norm": 0.5542338490486145, + "learning_rate": 0.0010823590346660335, + "loss": 1.6261, + "step": 3416 + }, + { + "epoch": 0.3604430379746835, + "grad_norm": 0.5946895480155945, + "learning_rate": 0.0010821339537170015, + "loss": 1.6214, + "step": 3417 + }, + { + "epoch": 0.36054852320675107, + "grad_norm": 0.701485812664032, + "learning_rate": 0.0010819088355507043, + "loss": 1.5971, + "step": 3418 + }, + { + "epoch": 0.36065400843881856, + "grad_norm": 0.5543766617774963, + "learning_rate": 0.0010816836801923666, + "loss": 1.5927, + "step": 3419 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5914662480354309, + "learning_rate": 0.0010814584876672187, + "loss": 1.614, + "step": 3420 + }, + { + "epoch": 0.3608649789029536, + "grad_norm": 0.5488128662109375, + "learning_rate": 0.0010812332580004947, + "loss": 1.6404, + "step": 3421 + }, + { + "epoch": 0.3609704641350211, + "grad_norm": 0.6044926643371582, + "learning_rate": 0.0010810079912174323, + "loss": 1.6703, + "step": 3422 + }, + { + "epoch": 0.3610759493670886, + "grad_norm": 0.6106864809989929, + "learning_rate": 0.001080782687343274, + "loss": 1.6204, + "step": 3423 + }, + { + "epoch": 0.36118143459915614, + "grad_norm": 0.6043225526809692, + "learning_rate": 0.0010805573464032659, + "loss": 1.6517, + "step": 3424 + }, + { + "epoch": 0.36128691983122363, + "grad_norm": 0.6946245431900024, + "learning_rate": 0.0010803319684226593, + "loss": 1.6487, + "step": 3425 + }, + { + "epoch": 0.3613924050632911, + "grad_norm": 0.6893211603164673, + "learning_rate": 0.001080106553426708, + "loss": 1.6008, + "step": 3426 + }, + { + "epoch": 0.36149789029535867, + "grad_norm": 0.6032970547676086, + "learning_rate": 0.0010798811014406716, + "loss": 1.6369, + "step": 3427 + }, + { + "epoch": 0.36160337552742616, + "grad_norm": 0.5968961119651794, + "learning_rate": 0.0010796556124898127, + "loss": 1.621, + "step": 3428 + }, + { + "epoch": 0.36170886075949366, + "grad_norm": 0.5865374207496643, + "learning_rate": 0.0010794300865993988, + "loss": 1.6063, + "step": 3429 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.6032734513282776, + "learning_rate": 0.0010792045237947008, + "loss": 1.6015, + "step": 3430 + }, + { + "epoch": 0.3619198312236287, + "grad_norm": 0.6522693037986755, + "learning_rate": 0.0010789789241009945, + "loss": 1.6273, + "step": 3431 + }, + { + "epoch": 0.3620253164556962, + "grad_norm": 0.664846658706665, + "learning_rate": 0.0010787532875435593, + "loss": 1.6087, + "step": 3432 + }, + { + "epoch": 0.36213080168776374, + "grad_norm": 0.5756176710128784, + "learning_rate": 0.0010785276141476786, + "loss": 1.6311, + "step": 3433 + }, + { + "epoch": 0.36223628691983123, + "grad_norm": 0.6546560525894165, + "learning_rate": 0.001078301903938641, + "loss": 1.6031, + "step": 3434 + }, + { + "epoch": 0.3623417721518987, + "grad_norm": 0.5844568014144897, + "learning_rate": 0.0010780761569417377, + "loss": 1.6422, + "step": 3435 + }, + { + "epoch": 0.36244725738396627, + "grad_norm": 0.7347837686538696, + "learning_rate": 0.0010778503731822652, + "loss": 1.6068, + "step": 3436 + }, + { + "epoch": 0.36255274261603376, + "grad_norm": 0.5917181372642517, + "learning_rate": 0.0010776245526855235, + "loss": 1.673, + "step": 3437 + }, + { + "epoch": 0.36265822784810126, + "grad_norm": 0.6182844042778015, + "learning_rate": 0.0010773986954768172, + "loss": 1.6504, + "step": 3438 + }, + { + "epoch": 0.3627637130801688, + "grad_norm": 0.5465476512908936, + "learning_rate": 0.0010771728015814544, + "loss": 1.6012, + "step": 3439 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.6125494837760925, + "learning_rate": 0.0010769468710247478, + "loss": 1.6125, + "step": 3440 + }, + { + "epoch": 0.3629746835443038, + "grad_norm": 0.5425121784210205, + "learning_rate": 0.0010767209038320138, + "loss": 1.5909, + "step": 3441 + }, + { + "epoch": 0.3630801687763713, + "grad_norm": 0.6604210138320923, + "learning_rate": 0.0010764949000285735, + "loss": 1.6372, + "step": 3442 + }, + { + "epoch": 0.36318565400843883, + "grad_norm": 0.7277034521102905, + "learning_rate": 0.0010762688596397515, + "loss": 1.6428, + "step": 3443 + }, + { + "epoch": 0.3632911392405063, + "grad_norm": 0.609521746635437, + "learning_rate": 0.001076042782690877, + "loss": 1.5874, + "step": 3444 + }, + { + "epoch": 0.3633966244725738, + "grad_norm": 0.5810872316360474, + "learning_rate": 0.001075816669207283, + "loss": 1.6484, + "step": 3445 + }, + { + "epoch": 0.36350210970464136, + "grad_norm": 0.6261112093925476, + "learning_rate": 0.0010755905192143063, + "loss": 1.6584, + "step": 3446 + }, + { + "epoch": 0.36360759493670886, + "grad_norm": 0.6068217754364014, + "learning_rate": 0.0010753643327372886, + "loss": 1.6017, + "step": 3447 + }, + { + "epoch": 0.36371308016877635, + "grad_norm": 0.574536144733429, + "learning_rate": 0.0010751381098015747, + "loss": 1.623, + "step": 3448 + }, + { + "epoch": 0.3638185654008439, + "grad_norm": 0.5784227252006531, + "learning_rate": 0.0010749118504325146, + "loss": 1.6178, + "step": 3449 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.5258245468139648, + "learning_rate": 0.0010746855546554612, + "loss": 1.5857, + "step": 3450 + }, + { + "epoch": 0.3640295358649789, + "grad_norm": 0.5762209296226501, + "learning_rate": 0.0010744592224957727, + "loss": 1.6182, + "step": 3451 + }, + { + "epoch": 0.36413502109704643, + "grad_norm": 0.5391799211502075, + "learning_rate": 0.00107423285397881, + "loss": 1.5804, + "step": 3452 + }, + { + "epoch": 0.3642405063291139, + "grad_norm": 0.5894366502761841, + "learning_rate": 0.0010740064491299398, + "loss": 1.6238, + "step": 3453 + }, + { + "epoch": 0.3643459915611814, + "grad_norm": 0.5656771063804626, + "learning_rate": 0.0010737800079745308, + "loss": 1.5995, + "step": 3454 + }, + { + "epoch": 0.36445147679324896, + "grad_norm": 0.5640451312065125, + "learning_rate": 0.0010735535305379576, + "loss": 1.6608, + "step": 3455 + }, + { + "epoch": 0.36455696202531646, + "grad_norm": 0.5461921691894531, + "learning_rate": 0.001073327016845598, + "loss": 1.6039, + "step": 3456 + }, + { + "epoch": 0.36466244725738395, + "grad_norm": 0.6188967227935791, + "learning_rate": 0.001073100466922834, + "loss": 1.6182, + "step": 3457 + }, + { + "epoch": 0.3647679324894515, + "grad_norm": 0.6029764413833618, + "learning_rate": 0.0010728738807950515, + "loss": 1.6374, + "step": 3458 + }, + { + "epoch": 0.364873417721519, + "grad_norm": 0.6126210689544678, + "learning_rate": 0.0010726472584876403, + "loss": 1.5824, + "step": 3459 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.6420735716819763, + "learning_rate": 0.0010724206000259954, + "loss": 1.6315, + "step": 3460 + }, + { + "epoch": 0.36508438818565403, + "grad_norm": 0.5994442105293274, + "learning_rate": 0.0010721939054355145, + "loss": 1.6439, + "step": 3461 + }, + { + "epoch": 0.3651898734177215, + "grad_norm": 0.6127062439918518, + "learning_rate": 0.0010719671747415995, + "loss": 1.619, + "step": 3462 + }, + { + "epoch": 0.365295358649789, + "grad_norm": 0.6233068704605103, + "learning_rate": 0.0010717404079696575, + "loss": 1.6269, + "step": 3463 + }, + { + "epoch": 0.36540084388185656, + "grad_norm": 0.6765674352645874, + "learning_rate": 0.0010715136051450982, + "loss": 1.6527, + "step": 3464 + }, + { + "epoch": 0.36550632911392406, + "grad_norm": 0.6823341250419617, + "learning_rate": 0.0010712867662933364, + "loss": 1.6296, + "step": 3465 + }, + { + "epoch": 0.36561181434599155, + "grad_norm": 0.5891537070274353, + "learning_rate": 0.0010710598914397901, + "loss": 1.5924, + "step": 3466 + }, + { + "epoch": 0.3657172995780591, + "grad_norm": 0.5770248770713806, + "learning_rate": 0.0010708329806098822, + "loss": 1.6567, + "step": 3467 + }, + { + "epoch": 0.3658227848101266, + "grad_norm": 0.5731015205383301, + "learning_rate": 0.001070606033829039, + "loss": 1.5883, + "step": 3468 + }, + { + "epoch": 0.3659282700421941, + "grad_norm": 0.7379708290100098, + "learning_rate": 0.001070379051122691, + "loss": 1.6795, + "step": 3469 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.7150989174842834, + "learning_rate": 0.0010701520325162727, + "loss": 1.6377, + "step": 3470 + }, + { + "epoch": 0.3661392405063291, + "grad_norm": 0.617095410823822, + "learning_rate": 0.001069924978035223, + "loss": 1.6182, + "step": 3471 + }, + { + "epoch": 0.3662447257383966, + "grad_norm": 0.6948694586753845, + "learning_rate": 0.0010696978877049838, + "loss": 1.6645, + "step": 3472 + }, + { + "epoch": 0.3663502109704641, + "grad_norm": 0.5862619280815125, + "learning_rate": 0.0010694707615510023, + "loss": 1.68, + "step": 3473 + }, + { + "epoch": 0.36645569620253166, + "grad_norm": 0.7647845149040222, + "learning_rate": 0.0010692435995987293, + "loss": 1.6532, + "step": 3474 + }, + { + "epoch": 0.36656118143459915, + "grad_norm": 0.7387787699699402, + "learning_rate": 0.0010690164018736187, + "loss": 1.6692, + "step": 3475 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 0.6296290159225464, + "learning_rate": 0.0010687891684011295, + "loss": 1.6231, + "step": 3476 + }, + { + "epoch": 0.3667721518987342, + "grad_norm": 0.669108510017395, + "learning_rate": 0.0010685618992067243, + "loss": 1.6253, + "step": 3477 + }, + { + "epoch": 0.3668776371308017, + "grad_norm": 0.7052822709083557, + "learning_rate": 0.00106833459431587, + "loss": 1.5924, + "step": 3478 + }, + { + "epoch": 0.3669831223628692, + "grad_norm": 0.6587653756141663, + "learning_rate": 0.001068107253754037, + "loss": 1.6217, + "step": 3479 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.7514618039131165, + "learning_rate": 0.0010678798775467001, + "loss": 1.6144, + "step": 3480 + }, + { + "epoch": 0.3671940928270042, + "grad_norm": 0.5928881764411926, + "learning_rate": 0.0010676524657193378, + "loss": 1.6405, + "step": 3481 + }, + { + "epoch": 0.3672995780590717, + "grad_norm": 0.5963649749755859, + "learning_rate": 0.0010674250182974325, + "loss": 1.6448, + "step": 3482 + }, + { + "epoch": 0.36740506329113926, + "grad_norm": 0.6476935744285583, + "learning_rate": 0.0010671975353064712, + "loss": 1.6021, + "step": 3483 + }, + { + "epoch": 0.36751054852320675, + "grad_norm": 0.6683353185653687, + "learning_rate": 0.0010669700167719443, + "loss": 1.6613, + "step": 3484 + }, + { + "epoch": 0.36761603375527424, + "grad_norm": 0.6272289752960205, + "learning_rate": 0.0010667424627193469, + "loss": 1.6332, + "step": 3485 + }, + { + "epoch": 0.3677215189873418, + "grad_norm": 0.6428462266921997, + "learning_rate": 0.0010665148731741768, + "loss": 1.6185, + "step": 3486 + }, + { + "epoch": 0.3678270042194093, + "grad_norm": 0.6060913801193237, + "learning_rate": 0.0010662872481619367, + "loss": 1.6116, + "step": 3487 + }, + { + "epoch": 0.3679324894514768, + "grad_norm": 0.7500932812690735, + "learning_rate": 0.0010660595877081335, + "loss": 1.6234, + "step": 3488 + }, + { + "epoch": 0.3680379746835443, + "grad_norm": 0.5974366068840027, + "learning_rate": 0.0010658318918382774, + "loss": 1.6215, + "step": 3489 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.589988112449646, + "learning_rate": 0.0010656041605778832, + "loss": 1.6406, + "step": 3490 + }, + { + "epoch": 0.3682489451476793, + "grad_norm": 0.643516480922699, + "learning_rate": 0.0010653763939524688, + "loss": 1.6274, + "step": 3491 + }, + { + "epoch": 0.36835443037974686, + "grad_norm": 0.552812933921814, + "learning_rate": 0.0010651485919875568, + "loss": 1.6149, + "step": 3492 + }, + { + "epoch": 0.36845991561181435, + "grad_norm": 0.6202375292778015, + "learning_rate": 0.0010649207547086738, + "loss": 1.598, + "step": 3493 + }, + { + "epoch": 0.36856540084388184, + "grad_norm": 0.6015993356704712, + "learning_rate": 0.0010646928821413499, + "loss": 1.6407, + "step": 3494 + }, + { + "epoch": 0.3686708860759494, + "grad_norm": 0.6222936511039734, + "learning_rate": 0.0010644649743111192, + "loss": 1.6197, + "step": 3495 + }, + { + "epoch": 0.3687763713080169, + "grad_norm": 0.5834906697273254, + "learning_rate": 0.0010642370312435201, + "loss": 1.5995, + "step": 3496 + }, + { + "epoch": 0.3688818565400844, + "grad_norm": 0.5696661472320557, + "learning_rate": 0.0010640090529640948, + "loss": 1.6334, + "step": 3497 + }, + { + "epoch": 0.3689873417721519, + "grad_norm": 0.5530474781990051, + "learning_rate": 0.0010637810394983893, + "loss": 1.5818, + "step": 3498 + }, + { + "epoch": 0.3690928270042194, + "grad_norm": 0.5547725558280945, + "learning_rate": 0.0010635529908719537, + "loss": 1.6079, + "step": 3499 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.5962654948234558, + "learning_rate": 0.001063324907110342, + "loss": 1.5772, + "step": 3500 + }, + { + "epoch": 0.36930379746835446, + "grad_norm": 0.5754215717315674, + "learning_rate": 0.001063096788239112, + "loss": 1.5951, + "step": 3501 + }, + { + "epoch": 0.36940928270042195, + "grad_norm": 0.6467517614364624, + "learning_rate": 0.0010628686342838253, + "loss": 1.6317, + "step": 3502 + }, + { + "epoch": 0.36951476793248944, + "grad_norm": 0.6100196838378906, + "learning_rate": 0.0010626404452700486, + "loss": 1.6571, + "step": 3503 + }, + { + "epoch": 0.369620253164557, + "grad_norm": 0.6446608304977417, + "learning_rate": 0.0010624122212233506, + "loss": 1.6511, + "step": 3504 + }, + { + "epoch": 0.3697257383966245, + "grad_norm": 0.623295783996582, + "learning_rate": 0.0010621839621693056, + "loss": 1.6016, + "step": 3505 + }, + { + "epoch": 0.369831223628692, + "grad_norm": 0.5364923477172852, + "learning_rate": 0.0010619556681334909, + "loss": 1.6333, + "step": 3506 + }, + { + "epoch": 0.36993670886075947, + "grad_norm": 0.5900058150291443, + "learning_rate": 0.001061727339141488, + "loss": 1.6499, + "step": 3507 + }, + { + "epoch": 0.370042194092827, + "grad_norm": 0.512570321559906, + "learning_rate": 0.0010614989752188823, + "loss": 1.6005, + "step": 3508 + }, + { + "epoch": 0.3701476793248945, + "grad_norm": 0.5636168122291565, + "learning_rate": 0.0010612705763912635, + "loss": 1.6243, + "step": 3509 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.5488389134407043, + "learning_rate": 0.0010610421426842241, + "loss": 1.6338, + "step": 3510 + }, + { + "epoch": 0.37035864978902955, + "grad_norm": 0.5384630560874939, + "learning_rate": 0.0010608136741233618, + "loss": 1.6324, + "step": 3511 + }, + { + "epoch": 0.37046413502109704, + "grad_norm": 0.5466839671134949, + "learning_rate": 0.0010605851707342774, + "loss": 1.6156, + "step": 3512 + }, + { + "epoch": 0.37056962025316453, + "grad_norm": 0.564873218536377, + "learning_rate": 0.0010603566325425758, + "loss": 1.6104, + "step": 3513 + }, + { + "epoch": 0.3706751054852321, + "grad_norm": 0.5365666747093201, + "learning_rate": 0.001060128059573866, + "loss": 1.625, + "step": 3514 + }, + { + "epoch": 0.3707805907172996, + "grad_norm": 0.5499584078788757, + "learning_rate": 0.0010598994518537608, + "loss": 1.6224, + "step": 3515 + }, + { + "epoch": 0.37088607594936707, + "grad_norm": 0.5438109636306763, + "learning_rate": 0.0010596708094078766, + "loss": 1.6483, + "step": 3516 + }, + { + "epoch": 0.3709915611814346, + "grad_norm": 0.5851969122886658, + "learning_rate": 0.0010594421322618341, + "loss": 1.6336, + "step": 3517 + }, + { + "epoch": 0.3710970464135021, + "grad_norm": 0.5761554837226868, + "learning_rate": 0.0010592134204412578, + "loss": 1.6179, + "step": 3518 + }, + { + "epoch": 0.3712025316455696, + "grad_norm": 0.5521709322929382, + "learning_rate": 0.0010589846739717755, + "loss": 1.6344, + "step": 3519 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.575405478477478, + "learning_rate": 0.00105875589287902, + "loss": 1.6406, + "step": 3520 + }, + { + "epoch": 0.37141350210970464, + "grad_norm": 0.5423132181167603, + "learning_rate": 0.001058527077188627, + "loss": 1.6431, + "step": 3521 + }, + { + "epoch": 0.37151898734177213, + "grad_norm": 0.5903770327568054, + "learning_rate": 0.001058298226926237, + "loss": 1.6286, + "step": 3522 + }, + { + "epoch": 0.3716244725738397, + "grad_norm": 0.5470593571662903, + "learning_rate": 0.0010580693421174928, + "loss": 1.604, + "step": 3523 + }, + { + "epoch": 0.3717299578059072, + "grad_norm": 0.6784812211990356, + "learning_rate": 0.0010578404227880429, + "loss": 1.5781, + "step": 3524 + }, + { + "epoch": 0.37183544303797467, + "grad_norm": 0.7179107069969177, + "learning_rate": 0.0010576114689635383, + "loss": 1.6649, + "step": 3525 + }, + { + "epoch": 0.3719409282700422, + "grad_norm": 0.621738076210022, + "learning_rate": 0.0010573824806696351, + "loss": 1.6402, + "step": 3526 + }, + { + "epoch": 0.3720464135021097, + "grad_norm": 0.6364505887031555, + "learning_rate": 0.001057153457931992, + "loss": 1.6188, + "step": 3527 + }, + { + "epoch": 0.3721518987341772, + "grad_norm": 0.667003870010376, + "learning_rate": 0.0010569244007762723, + "loss": 1.6607, + "step": 3528 + }, + { + "epoch": 0.37225738396624475, + "grad_norm": 0.5818111896514893, + "learning_rate": 0.0010566953092281432, + "loss": 1.5955, + "step": 3529 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.7096104025840759, + "learning_rate": 0.0010564661833132752, + "loss": 1.6954, + "step": 3530 + }, + { + "epoch": 0.37246835443037973, + "grad_norm": 0.6678256988525391, + "learning_rate": 0.0010562370230573432, + "loss": 1.6495, + "step": 3531 + }, + { + "epoch": 0.3725738396624473, + "grad_norm": 0.6159685254096985, + "learning_rate": 0.0010560078284860257, + "loss": 1.6737, + "step": 3532 + }, + { + "epoch": 0.3726793248945148, + "grad_norm": 0.6096175312995911, + "learning_rate": 0.0010557785996250053, + "loss": 1.6124, + "step": 3533 + }, + { + "epoch": 0.37278481012658227, + "grad_norm": 0.6428889632225037, + "learning_rate": 0.0010555493364999679, + "loss": 1.6488, + "step": 3534 + }, + { + "epoch": 0.3728902953586498, + "grad_norm": 0.7730045914649963, + "learning_rate": 0.001055320039136604, + "loss": 1.6564, + "step": 3535 + }, + { + "epoch": 0.3729957805907173, + "grad_norm": 0.5829127430915833, + "learning_rate": 0.001055090707560607, + "loss": 1.606, + "step": 3536 + }, + { + "epoch": 0.3731012658227848, + "grad_norm": 0.7308542132377625, + "learning_rate": 0.0010548613417976748, + "loss": 1.5925, + "step": 3537 + }, + { + "epoch": 0.37320675105485235, + "grad_norm": 0.8326286673545837, + "learning_rate": 0.0010546319418735094, + "loss": 1.6512, + "step": 3538 + }, + { + "epoch": 0.37331223628691984, + "grad_norm": 0.5576046705245972, + "learning_rate": 0.0010544025078138156, + "loss": 1.616, + "step": 3539 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.9562029242515564, + "learning_rate": 0.001054173039644303, + "loss": 1.6123, + "step": 3540 + }, + { + "epoch": 0.3735232067510548, + "grad_norm": 0.9035420417785645, + "learning_rate": 0.0010539435373906846, + "loss": 1.6272, + "step": 3541 + }, + { + "epoch": 0.3736286919831224, + "grad_norm": 0.5968384146690369, + "learning_rate": 0.0010537140010786774, + "loss": 1.652, + "step": 3542 + }, + { + "epoch": 0.37373417721518987, + "grad_norm": 0.7301012873649597, + "learning_rate": 0.0010534844307340016, + "loss": 1.6174, + "step": 3543 + }, + { + "epoch": 0.37383966244725736, + "grad_norm": 0.5739559531211853, + "learning_rate": 0.0010532548263823822, + "loss": 1.6037, + "step": 3544 + }, + { + "epoch": 0.3739451476793249, + "grad_norm": 0.6878318786621094, + "learning_rate": 0.0010530251880495473, + "loss": 1.6405, + "step": 3545 + }, + { + "epoch": 0.3740506329113924, + "grad_norm": 0.5819330215454102, + "learning_rate": 0.0010527955157612291, + "loss": 1.6085, + "step": 3546 + }, + { + "epoch": 0.3741561181434599, + "grad_norm": 0.5877583026885986, + "learning_rate": 0.0010525658095431635, + "loss": 1.5851, + "step": 3547 + }, + { + "epoch": 0.37426160337552744, + "grad_norm": 0.6625291109085083, + "learning_rate": 0.00105233606942109, + "loss": 1.6066, + "step": 3548 + }, + { + "epoch": 0.37436708860759493, + "grad_norm": 0.5791611075401306, + "learning_rate": 0.0010521062954207527, + "loss": 1.6176, + "step": 3549 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.7199050784111023, + "learning_rate": 0.0010518764875678981, + "loss": 1.6355, + "step": 3550 + }, + { + "epoch": 0.37457805907173, + "grad_norm": 0.5990197658538818, + "learning_rate": 0.001051646645888278, + "loss": 1.6089, + "step": 3551 + }, + { + "epoch": 0.37468354430379747, + "grad_norm": 0.6341613531112671, + "learning_rate": 0.0010514167704076473, + "loss": 1.614, + "step": 3552 + }, + { + "epoch": 0.37478902953586496, + "grad_norm": 0.6949262619018555, + "learning_rate": 0.0010511868611517644, + "loss": 1.6342, + "step": 3553 + }, + { + "epoch": 0.3748945147679325, + "grad_norm": 0.516036868095398, + "learning_rate": 0.0010509569181463916, + "loss": 1.621, + "step": 3554 + }, + { + "epoch": 0.375, + "grad_norm": 0.638271152973175, + "learning_rate": 0.0010507269414172956, + "loss": 1.6256, + "step": 3555 + }, + { + "epoch": 0.3751054852320675, + "grad_norm": 0.6510864496231079, + "learning_rate": 0.0010504969309902462, + "loss": 1.6168, + "step": 3556 + }, + { + "epoch": 0.37521097046413504, + "grad_norm": 0.5464292764663696, + "learning_rate": 0.0010502668868910174, + "loss": 1.6219, + "step": 3557 + }, + { + "epoch": 0.37531645569620253, + "grad_norm": 0.6253536939620972, + "learning_rate": 0.0010500368091453864, + "loss": 1.6005, + "step": 3558 + }, + { + "epoch": 0.37542194092827, + "grad_norm": 0.6004466414451599, + "learning_rate": 0.001049806697779135, + "loss": 1.6251, + "step": 3559 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.5914967656135559, + "learning_rate": 0.001049576552818048, + "loss": 1.6129, + "step": 3560 + }, + { + "epoch": 0.37563291139240507, + "grad_norm": 0.6258463263511658, + "learning_rate": 0.0010493463742879147, + "loss": 1.6229, + "step": 3561 + }, + { + "epoch": 0.37573839662447256, + "grad_norm": 0.6277844905853271, + "learning_rate": 0.0010491161622145275, + "loss": 1.6078, + "step": 3562 + }, + { + "epoch": 0.3758438818565401, + "grad_norm": 0.5981503129005432, + "learning_rate": 0.0010488859166236824, + "loss": 1.6068, + "step": 3563 + }, + { + "epoch": 0.3759493670886076, + "grad_norm": 0.5960632562637329, + "learning_rate": 0.0010486556375411803, + "loss": 1.6145, + "step": 3564 + }, + { + "epoch": 0.3760548523206751, + "grad_norm": 0.5690255165100098, + "learning_rate": 0.0010484253249928247, + "loss": 1.6163, + "step": 3565 + }, + { + "epoch": 0.37616033755274264, + "grad_norm": 0.6960936188697815, + "learning_rate": 0.0010481949790044234, + "loss": 1.5899, + "step": 3566 + }, + { + "epoch": 0.37626582278481013, + "grad_norm": 0.6346127390861511, + "learning_rate": 0.0010479645996017875, + "loss": 1.619, + "step": 3567 + }, + { + "epoch": 0.3763713080168776, + "grad_norm": 0.68667072057724, + "learning_rate": 0.0010477341868107327, + "loss": 1.6464, + "step": 3568 + }, + { + "epoch": 0.3764767932489452, + "grad_norm": 0.8311170935630798, + "learning_rate": 0.0010475037406570775, + "loss": 1.6048, + "step": 3569 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.8323444724082947, + "learning_rate": 0.0010472732611666448, + "loss": 1.6355, + "step": 3570 + }, + { + "epoch": 0.37668776371308016, + "grad_norm": 0.5608946681022644, + "learning_rate": 0.0010470427483652608, + "loss": 1.5823, + "step": 3571 + }, + { + "epoch": 0.37679324894514765, + "grad_norm": 0.9645335078239441, + "learning_rate": 0.0010468122022787554, + "loss": 1.6168, + "step": 3572 + }, + { + "epoch": 0.3768987341772152, + "grad_norm": 0.8279523849487305, + "learning_rate": 0.001046581622932963, + "loss": 1.6273, + "step": 3573 + }, + { + "epoch": 0.3770042194092827, + "grad_norm": 0.5914105176925659, + "learning_rate": 0.001046351010353721, + "loss": 1.6396, + "step": 3574 + }, + { + "epoch": 0.3771097046413502, + "grad_norm": 0.6612523198127747, + "learning_rate": 0.0010461203645668702, + "loss": 1.6216, + "step": 3575 + }, + { + "epoch": 0.37721518987341773, + "grad_norm": 0.6825933456420898, + "learning_rate": 0.001045889685598256, + "loss": 1.6217, + "step": 3576 + }, + { + "epoch": 0.3773206751054852, + "grad_norm": 0.5977962613105774, + "learning_rate": 0.0010456589734737273, + "loss": 1.61, + "step": 3577 + }, + { + "epoch": 0.3774261603375527, + "grad_norm": 0.7867622375488281, + "learning_rate": 0.0010454282282191362, + "loss": 1.6121, + "step": 3578 + }, + { + "epoch": 0.37753164556962027, + "grad_norm": 0.8275106549263, + "learning_rate": 0.001045197449860339, + "loss": 1.6123, + "step": 3579 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.6461694836616516, + "learning_rate": 0.0010449666384231954, + "loss": 1.6295, + "step": 3580 + }, + { + "epoch": 0.37774261603375525, + "grad_norm": 0.6133614182472229, + "learning_rate": 0.0010447357939335693, + "loss": 1.6275, + "step": 3581 + }, + { + "epoch": 0.3778481012658228, + "grad_norm": 0.6100008487701416, + "learning_rate": 0.001044504916417328, + "loss": 1.6091, + "step": 3582 + }, + { + "epoch": 0.3779535864978903, + "grad_norm": 0.5362046957015991, + "learning_rate": 0.001044274005900342, + "loss": 1.6181, + "step": 3583 + }, + { + "epoch": 0.3780590717299578, + "grad_norm": 0.6055043935775757, + "learning_rate": 0.0010440430624084863, + "loss": 1.6061, + "step": 3584 + }, + { + "epoch": 0.37816455696202533, + "grad_norm": 0.5498244166374207, + "learning_rate": 0.0010438120859676393, + "loss": 1.6576, + "step": 3585 + }, + { + "epoch": 0.3782700421940928, + "grad_norm": 0.5659605860710144, + "learning_rate": 0.0010435810766036828, + "loss": 1.6539, + "step": 3586 + }, + { + "epoch": 0.3783755274261603, + "grad_norm": 0.5845277905464172, + "learning_rate": 0.001043350034342503, + "loss": 1.6073, + "step": 3587 + }, + { + "epoch": 0.37848101265822787, + "grad_norm": 0.6649845242500305, + "learning_rate": 0.001043118959209989, + "loss": 1.6173, + "step": 3588 + }, + { + "epoch": 0.37858649789029536, + "grad_norm": 0.5838857889175415, + "learning_rate": 0.001042887851232034, + "loss": 1.6555, + "step": 3589 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.5881826877593994, + "learning_rate": 0.0010426567104345346, + "loss": 1.6308, + "step": 3590 + }, + { + "epoch": 0.3787974683544304, + "grad_norm": 0.5292380452156067, + "learning_rate": 0.0010424255368433916, + "loss": 1.5963, + "step": 3591 + }, + { + "epoch": 0.3789029535864979, + "grad_norm": 0.6260533332824707, + "learning_rate": 0.0010421943304845093, + "loss": 1.6097, + "step": 3592 + }, + { + "epoch": 0.3790084388185654, + "grad_norm": 0.5749509334564209, + "learning_rate": 0.0010419630913837948, + "loss": 1.6601, + "step": 3593 + }, + { + "epoch": 0.37911392405063293, + "grad_norm": 0.5603563189506531, + "learning_rate": 0.0010417318195671604, + "loss": 1.6148, + "step": 3594 + }, + { + "epoch": 0.3792194092827004, + "grad_norm": 0.5779839158058167, + "learning_rate": 0.0010415005150605208, + "loss": 1.6258, + "step": 3595 + }, + { + "epoch": 0.3793248945147679, + "grad_norm": 0.5897194147109985, + "learning_rate": 0.001041269177889795, + "loss": 1.5652, + "step": 3596 + }, + { + "epoch": 0.37943037974683547, + "grad_norm": 0.5538802146911621, + "learning_rate": 0.0010410378080809052, + "loss": 1.6107, + "step": 3597 + }, + { + "epoch": 0.37953586497890296, + "grad_norm": 0.5194069147109985, + "learning_rate": 0.001040806405659778, + "loss": 1.6173, + "step": 3598 + }, + { + "epoch": 0.37964135021097045, + "grad_norm": 0.6609838604927063, + "learning_rate": 0.0010405749706523428, + "loss": 1.6009, + "step": 3599 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.6262969970703125, + "learning_rate": 0.0010403435030845332, + "loss": 1.6037, + "step": 3600 + }, + { + "epoch": 0.3798523206751055, + "grad_norm": 0.5472788214683533, + "learning_rate": 0.0010401120029822864, + "loss": 1.6184, + "step": 3601 + }, + { + "epoch": 0.379957805907173, + "grad_norm": 0.6446293592453003, + "learning_rate": 0.001039880470371543, + "loss": 1.5991, + "step": 3602 + }, + { + "epoch": 0.38006329113924053, + "grad_norm": 0.5893603563308716, + "learning_rate": 0.0010396489052782473, + "loss": 1.6156, + "step": 3603 + }, + { + "epoch": 0.380168776371308, + "grad_norm": 0.8588913083076477, + "learning_rate": 0.0010394173077283477, + "loss": 1.5927, + "step": 3604 + }, + { + "epoch": 0.3802742616033755, + "grad_norm": 0.5575080513954163, + "learning_rate": 0.0010391856777477954, + "loss": 1.635, + "step": 3605 + }, + { + "epoch": 0.380379746835443, + "grad_norm": 0.8045327663421631, + "learning_rate": 0.001038954015362546, + "loss": 1.6087, + "step": 3606 + }, + { + "epoch": 0.38048523206751056, + "grad_norm": 0.7151440978050232, + "learning_rate": 0.001038722320598558, + "loss": 1.6071, + "step": 3607 + }, + { + "epoch": 0.38059071729957805, + "grad_norm": 0.6009940505027771, + "learning_rate": 0.001038490593481795, + "loss": 1.6389, + "step": 3608 + }, + { + "epoch": 0.38069620253164554, + "grad_norm": 0.5979459881782532, + "learning_rate": 0.0010382588340382218, + "loss": 1.6344, + "step": 3609 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.63556969165802, + "learning_rate": 0.0010380270422938093, + "loss": 1.5779, + "step": 3610 + }, + { + "epoch": 0.3809071729957806, + "grad_norm": 0.6428659558296204, + "learning_rate": 0.00103779521827453, + "loss": 1.6438, + "step": 3611 + }, + { + "epoch": 0.3810126582278481, + "grad_norm": 0.5561164617538452, + "learning_rate": 0.0010375633620063618, + "loss": 1.6026, + "step": 3612 + }, + { + "epoch": 0.3811181434599156, + "grad_norm": 0.5781473517417908, + "learning_rate": 0.0010373314735152848, + "loss": 1.6177, + "step": 3613 + }, + { + "epoch": 0.3812236286919831, + "grad_norm": 0.5902009606361389, + "learning_rate": 0.0010370995528272836, + "loss": 1.5922, + "step": 3614 + }, + { + "epoch": 0.3813291139240506, + "grad_norm": 0.5358254909515381, + "learning_rate": 0.0010368675999683455, + "loss": 1.5718, + "step": 3615 + }, + { + "epoch": 0.38143459915611816, + "grad_norm": 0.600440502166748, + "learning_rate": 0.0010366356149644628, + "loss": 1.6233, + "step": 3616 + }, + { + "epoch": 0.38154008438818565, + "grad_norm": 0.6004948616027832, + "learning_rate": 0.0010364035978416297, + "loss": 1.6702, + "step": 3617 + }, + { + "epoch": 0.38164556962025314, + "grad_norm": 0.574108898639679, + "learning_rate": 0.001036171548625846, + "loss": 1.6517, + "step": 3618 + }, + { + "epoch": 0.3817510548523207, + "grad_norm": 0.6851674914360046, + "learning_rate": 0.0010359394673431126, + "loss": 1.6147, + "step": 3619 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.6093524098396301, + "learning_rate": 0.0010357073540194362, + "loss": 1.584, + "step": 3620 + }, + { + "epoch": 0.3819620253164557, + "grad_norm": 0.6961807012557983, + "learning_rate": 0.0010354752086808264, + "loss": 1.6283, + "step": 3621 + }, + { + "epoch": 0.3820675105485232, + "grad_norm": 0.711357057094574, + "learning_rate": 0.001035243031353296, + "loss": 1.5824, + "step": 3622 + }, + { + "epoch": 0.3821729957805907, + "grad_norm": 0.585471510887146, + "learning_rate": 0.0010350108220628614, + "loss": 1.6366, + "step": 3623 + }, + { + "epoch": 0.3822784810126582, + "grad_norm": 0.6161505579948425, + "learning_rate": 0.001034778580835543, + "loss": 1.6065, + "step": 3624 + }, + { + "epoch": 0.38238396624472576, + "grad_norm": 0.6539911031723022, + "learning_rate": 0.0010345463076973645, + "loss": 1.6244, + "step": 3625 + }, + { + "epoch": 0.38248945147679325, + "grad_norm": 0.6329069137573242, + "learning_rate": 0.0010343140026743535, + "loss": 1.6121, + "step": 3626 + }, + { + "epoch": 0.38259493670886074, + "grad_norm": 0.6686332821846008, + "learning_rate": 0.0010340816657925407, + "loss": 1.6263, + "step": 3627 + }, + { + "epoch": 0.3827004219409283, + "grad_norm": 0.6458685994148254, + "learning_rate": 0.0010338492970779606, + "loss": 1.627, + "step": 3628 + }, + { + "epoch": 0.3828059071729958, + "grad_norm": 0.6624465584754944, + "learning_rate": 0.0010336168965566516, + "loss": 1.5983, + "step": 3629 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.6516238451004028, + "learning_rate": 0.001033384464254655, + "loss": 1.6177, + "step": 3630 + }, + { + "epoch": 0.3830168776371308, + "grad_norm": 0.7344706058502197, + "learning_rate": 0.001033152000198016, + "loss": 1.6183, + "step": 3631 + }, + { + "epoch": 0.3831223628691983, + "grad_norm": 0.6604624390602112, + "learning_rate": 0.0010329195044127834, + "loss": 1.6054, + "step": 3632 + }, + { + "epoch": 0.3832278481012658, + "grad_norm": 0.5684142708778381, + "learning_rate": 0.0010326869769250097, + "loss": 1.6315, + "step": 3633 + }, + { + "epoch": 0.38333333333333336, + "grad_norm": 0.5713350176811218, + "learning_rate": 0.0010324544177607508, + "loss": 1.573, + "step": 3634 + }, + { + "epoch": 0.38343881856540085, + "grad_norm": 0.5993182063102722, + "learning_rate": 0.0010322218269460657, + "loss": 1.6182, + "step": 3635 + }, + { + "epoch": 0.38354430379746834, + "grad_norm": 0.5699136257171631, + "learning_rate": 0.001031989204507018, + "loss": 1.6004, + "step": 3636 + }, + { + "epoch": 0.3836497890295359, + "grad_norm": 0.6335554122924805, + "learning_rate": 0.0010317565504696733, + "loss": 1.6885, + "step": 3637 + }, + { + "epoch": 0.3837552742616034, + "grad_norm": 0.543319582939148, + "learning_rate": 0.0010315238648601025, + "loss": 1.6097, + "step": 3638 + }, + { + "epoch": 0.3838607594936709, + "grad_norm": 0.7087780237197876, + "learning_rate": 0.0010312911477043784, + "loss": 1.6166, + "step": 3639 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.6563761830329895, + "learning_rate": 0.001031058399028579, + "loss": 1.5629, + "step": 3640 + }, + { + "epoch": 0.3840717299578059, + "grad_norm": 0.6550977230072021, + "learning_rate": 0.0010308256188587843, + "loss": 1.6061, + "step": 3641 + }, + { + "epoch": 0.3841772151898734, + "grad_norm": 0.6751545071601868, + "learning_rate": 0.0010305928072210787, + "loss": 1.6663, + "step": 3642 + }, + { + "epoch": 0.3842827004219409, + "grad_norm": 0.6275085210800171, + "learning_rate": 0.00103035996414155, + "loss": 1.6157, + "step": 3643 + }, + { + "epoch": 0.38438818565400845, + "grad_norm": 0.6216983795166016, + "learning_rate": 0.0010301270896462893, + "loss": 1.608, + "step": 3644 + }, + { + "epoch": 0.38449367088607594, + "grad_norm": 0.613724410533905, + "learning_rate": 0.0010298941837613913, + "loss": 1.6201, + "step": 3645 + }, + { + "epoch": 0.38459915611814344, + "grad_norm": 0.6639721393585205, + "learning_rate": 0.0010296612465129542, + "loss": 1.5896, + "step": 3646 + }, + { + "epoch": 0.384704641350211, + "grad_norm": 0.6733205914497375, + "learning_rate": 0.0010294282779270802, + "loss": 1.6258, + "step": 3647 + }, + { + "epoch": 0.3848101265822785, + "grad_norm": 0.6257014870643616, + "learning_rate": 0.001029195278029874, + "loss": 1.6514, + "step": 3648 + }, + { + "epoch": 0.38491561181434597, + "grad_norm": 0.5865177512168884, + "learning_rate": 0.0010289622468474448, + "loss": 1.6336, + "step": 3649 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.677636444568634, + "learning_rate": 0.001028729184405905, + "loss": 1.6182, + "step": 3650 + }, + { + "epoch": 0.385126582278481, + "grad_norm": 0.5992524027824402, + "learning_rate": 0.00102849609073137, + "loss": 1.635, + "step": 3651 + }, + { + "epoch": 0.3852320675105485, + "grad_norm": 0.6595554947853088, + "learning_rate": 0.0010282629658499593, + "loss": 1.6051, + "step": 3652 + }, + { + "epoch": 0.38533755274261605, + "grad_norm": 0.7827090620994568, + "learning_rate": 0.001028029809787796, + "loss": 1.605, + "step": 3653 + }, + { + "epoch": 0.38544303797468354, + "grad_norm": 0.6535933613777161, + "learning_rate": 0.001027796622571006, + "loss": 1.6468, + "step": 3654 + }, + { + "epoch": 0.38554852320675104, + "grad_norm": 0.5728498101234436, + "learning_rate": 0.001027563404225719, + "loss": 1.6155, + "step": 3655 + }, + { + "epoch": 0.3856540084388186, + "grad_norm": 0.6855534315109253, + "learning_rate": 0.0010273301547780687, + "loss": 1.6096, + "step": 3656 + }, + { + "epoch": 0.3857594936708861, + "grad_norm": 0.5655279159545898, + "learning_rate": 0.0010270968742541917, + "loss": 1.5887, + "step": 3657 + }, + { + "epoch": 0.38586497890295357, + "grad_norm": 0.7902032732963562, + "learning_rate": 0.0010268635626802282, + "loss": 1.6118, + "step": 3658 + }, + { + "epoch": 0.3859704641350211, + "grad_norm": 0.6707121729850769, + "learning_rate": 0.001026630220082322, + "loss": 1.6337, + "step": 3659 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.7093947529792786, + "learning_rate": 0.0010263968464866201, + "loss": 1.5981, + "step": 3660 + }, + { + "epoch": 0.3861814345991561, + "grad_norm": 0.7233483195304871, + "learning_rate": 0.0010261634419192732, + "loss": 1.6266, + "step": 3661 + }, + { + "epoch": 0.38628691983122365, + "grad_norm": 0.6667496562004089, + "learning_rate": 0.001025930006406436, + "loss": 1.6056, + "step": 3662 + }, + { + "epoch": 0.38639240506329114, + "grad_norm": 0.7377757430076599, + "learning_rate": 0.0010256965399742652, + "loss": 1.605, + "step": 3663 + }, + { + "epoch": 0.38649789029535864, + "grad_norm": 0.5994714498519897, + "learning_rate": 0.0010254630426489225, + "loss": 1.5924, + "step": 3664 + }, + { + "epoch": 0.3866033755274262, + "grad_norm": 0.7865206599235535, + "learning_rate": 0.0010252295144565725, + "loss": 1.5976, + "step": 3665 + }, + { + "epoch": 0.3867088607594937, + "grad_norm": 0.7330995202064514, + "learning_rate": 0.0010249959554233827, + "loss": 1.6149, + "step": 3666 + }, + { + "epoch": 0.38681434599156117, + "grad_norm": 0.6597687602043152, + "learning_rate": 0.001024762365575525, + "loss": 1.6281, + "step": 3667 + }, + { + "epoch": 0.3869198312236287, + "grad_norm": 0.6817055940628052, + "learning_rate": 0.001024528744939174, + "loss": 1.6359, + "step": 3668 + }, + { + "epoch": 0.3870253164556962, + "grad_norm": 0.7620037794113159, + "learning_rate": 0.0010242950935405084, + "loss": 1.5973, + "step": 3669 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.603541374206543, + "learning_rate": 0.0010240614114057098, + "loss": 1.5976, + "step": 3670 + }, + { + "epoch": 0.3872362869198312, + "grad_norm": 0.8086754083633423, + "learning_rate": 0.0010238276985609631, + "loss": 1.6223, + "step": 3671 + }, + { + "epoch": 0.38734177215189874, + "grad_norm": 0.6453096270561218, + "learning_rate": 0.0010235939550324576, + "loss": 1.6044, + "step": 3672 + }, + { + "epoch": 0.38744725738396624, + "grad_norm": 0.7983880639076233, + "learning_rate": 0.0010233601808463852, + "loss": 1.6198, + "step": 3673 + }, + { + "epoch": 0.38755274261603373, + "grad_norm": 0.5881083607673645, + "learning_rate": 0.0010231263760289416, + "loss": 1.6104, + "step": 3674 + }, + { + "epoch": 0.3876582278481013, + "grad_norm": 0.759041428565979, + "learning_rate": 0.0010228925406063254, + "loss": 1.5892, + "step": 3675 + }, + { + "epoch": 0.38776371308016877, + "grad_norm": 0.618564248085022, + "learning_rate": 0.0010226586746047393, + "loss": 1.6033, + "step": 3676 + }, + { + "epoch": 0.38786919831223626, + "grad_norm": 0.7270857691764832, + "learning_rate": 0.0010224247780503892, + "loss": 1.6282, + "step": 3677 + }, + { + "epoch": 0.3879746835443038, + "grad_norm": 0.6315248012542725, + "learning_rate": 0.0010221908509694842, + "loss": 1.6354, + "step": 3678 + }, + { + "epoch": 0.3880801687763713, + "grad_norm": 0.6538350582122803, + "learning_rate": 0.0010219568933882372, + "loss": 1.6493, + "step": 3679 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.7816930413246155, + "learning_rate": 0.001021722905332864, + "loss": 1.5772, + "step": 3680 + }, + { + "epoch": 0.38829113924050634, + "grad_norm": 0.7655790448188782, + "learning_rate": 0.0010214888868295842, + "loss": 1.6252, + "step": 3681 + }, + { + "epoch": 0.38839662447257384, + "grad_norm": 0.6954217553138733, + "learning_rate": 0.0010212548379046214, + "loss": 1.6215, + "step": 3682 + }, + { + "epoch": 0.38850210970464133, + "grad_norm": 0.6461815237998962, + "learning_rate": 0.001021020758584201, + "loss": 1.6285, + "step": 3683 + }, + { + "epoch": 0.3886075949367089, + "grad_norm": 0.7465604543685913, + "learning_rate": 0.0010207866488945532, + "loss": 1.6423, + "step": 3684 + }, + { + "epoch": 0.38871308016877637, + "grad_norm": 0.6126785278320312, + "learning_rate": 0.0010205525088619112, + "loss": 1.657, + "step": 3685 + }, + { + "epoch": 0.38881856540084386, + "grad_norm": 0.6462568640708923, + "learning_rate": 0.0010203183385125115, + "loss": 1.6078, + "step": 3686 + }, + { + "epoch": 0.3889240506329114, + "grad_norm": 0.5608468651771545, + "learning_rate": 0.001020084137872594, + "loss": 1.599, + "step": 3687 + }, + { + "epoch": 0.3890295358649789, + "grad_norm": 0.7963826060295105, + "learning_rate": 0.0010198499069684023, + "loss": 1.5958, + "step": 3688 + }, + { + "epoch": 0.3891350210970464, + "grad_norm": 0.6664071083068848, + "learning_rate": 0.0010196156458261827, + "loss": 1.6381, + "step": 3689 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.701394259929657, + "learning_rate": 0.0010193813544721855, + "loss": 1.6298, + "step": 3690 + }, + { + "epoch": 0.38934599156118144, + "grad_norm": 0.8595621585845947, + "learning_rate": 0.0010191470329326646, + "loss": 1.6851, + "step": 3691 + }, + { + "epoch": 0.38945147679324893, + "grad_norm": 0.7702974677085876, + "learning_rate": 0.0010189126812338765, + "loss": 1.6054, + "step": 3692 + }, + { + "epoch": 0.3895569620253165, + "grad_norm": 0.6450194120407104, + "learning_rate": 0.0010186782994020811, + "loss": 1.5849, + "step": 3693 + }, + { + "epoch": 0.38966244725738397, + "grad_norm": 0.8145889043807983, + "learning_rate": 0.0010184438874635427, + "loss": 1.6048, + "step": 3694 + }, + { + "epoch": 0.38976793248945146, + "grad_norm": 0.606048047542572, + "learning_rate": 0.0010182094454445282, + "loss": 1.6293, + "step": 3695 + }, + { + "epoch": 0.389873417721519, + "grad_norm": 0.8326023817062378, + "learning_rate": 0.001017974973371308, + "loss": 1.6087, + "step": 3696 + }, + { + "epoch": 0.3899789029535865, + "grad_norm": 0.6411614418029785, + "learning_rate": 0.0010177404712701558, + "loss": 1.6271, + "step": 3697 + }, + { + "epoch": 0.390084388185654, + "grad_norm": 0.6597312092781067, + "learning_rate": 0.0010175059391673486, + "loss": 1.6457, + "step": 3698 + }, + { + "epoch": 0.39018987341772154, + "grad_norm": 0.638305127620697, + "learning_rate": 0.0010172713770891673, + "loss": 1.5843, + "step": 3699 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6545609831809998, + "learning_rate": 0.001017036785061895, + "loss": 1.6796, + "step": 3700 + }, + { + "epoch": 0.39040084388185653, + "grad_norm": 0.739969789981842, + "learning_rate": 0.0010168021631118199, + "loss": 1.6231, + "step": 3701 + }, + { + "epoch": 0.3905063291139241, + "grad_norm": 0.5655426979064941, + "learning_rate": 0.0010165675112652314, + "loss": 1.604, + "step": 3702 + }, + { + "epoch": 0.39061181434599157, + "grad_norm": 0.6491004824638367, + "learning_rate": 0.0010163328295484245, + "loss": 1.5795, + "step": 3703 + }, + { + "epoch": 0.39071729957805906, + "grad_norm": 0.6438212394714355, + "learning_rate": 0.001016098117987696, + "loss": 1.6227, + "step": 3704 + }, + { + "epoch": 0.39082278481012656, + "grad_norm": 0.8220547437667847, + "learning_rate": 0.0010158633766093462, + "loss": 1.6047, + "step": 3705 + }, + { + "epoch": 0.3909282700421941, + "grad_norm": 0.5985939502716064, + "learning_rate": 0.0010156286054396795, + "loss": 1.6381, + "step": 3706 + }, + { + "epoch": 0.3910337552742616, + "grad_norm": 0.6460914015769958, + "learning_rate": 0.001015393804505003, + "loss": 1.6711, + "step": 3707 + }, + { + "epoch": 0.3911392405063291, + "grad_norm": 0.6151636838912964, + "learning_rate": 0.0010151589738316275, + "loss": 1.6109, + "step": 3708 + }, + { + "epoch": 0.39124472573839664, + "grad_norm": 0.5464842915534973, + "learning_rate": 0.0010149241134458666, + "loss": 1.6184, + "step": 3709 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.5757115483283997, + "learning_rate": 0.0010146892233740376, + "loss": 1.549, + "step": 3710 + }, + { + "epoch": 0.3914556962025316, + "grad_norm": 0.5463108420372009, + "learning_rate": 0.0010144543036424616, + "loss": 1.5833, + "step": 3711 + }, + { + "epoch": 0.39156118143459917, + "grad_norm": 0.6172629594802856, + "learning_rate": 0.001014219354277462, + "loss": 1.575, + "step": 3712 + }, + { + "epoch": 0.39166666666666666, + "grad_norm": 0.5454010367393494, + "learning_rate": 0.0010139843753053663, + "loss": 1.5756, + "step": 3713 + }, + { + "epoch": 0.39177215189873416, + "grad_norm": 0.6574873924255371, + "learning_rate": 0.001013749366752505, + "loss": 1.6111, + "step": 3714 + }, + { + "epoch": 0.3918776371308017, + "grad_norm": 0.5894292593002319, + "learning_rate": 0.0010135143286452118, + "loss": 1.5865, + "step": 3715 + }, + { + "epoch": 0.3919831223628692, + "grad_norm": 0.6455109715461731, + "learning_rate": 0.0010132792610098244, + "loss": 1.5799, + "step": 3716 + }, + { + "epoch": 0.3920886075949367, + "grad_norm": 0.6597269177436829, + "learning_rate": 0.0010130441638726828, + "loss": 1.6281, + "step": 3717 + }, + { + "epoch": 0.39219409282700424, + "grad_norm": 0.6304631233215332, + "learning_rate": 0.001012809037260131, + "loss": 1.5569, + "step": 3718 + }, + { + "epoch": 0.39229957805907173, + "grad_norm": 0.7307841181755066, + "learning_rate": 0.001012573881198516, + "loss": 1.6287, + "step": 3719 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.5880141258239746, + "learning_rate": 0.0010123386957141883, + "loss": 1.5941, + "step": 3720 + }, + { + "epoch": 0.39251054852320677, + "grad_norm": 0.6617398858070374, + "learning_rate": 0.0010121034808335018, + "loss": 1.5916, + "step": 3721 + }, + { + "epoch": 0.39261603375527426, + "grad_norm": 0.6380772590637207, + "learning_rate": 0.0010118682365828132, + "loss": 1.6214, + "step": 3722 + }, + { + "epoch": 0.39272151898734176, + "grad_norm": 0.5962855219841003, + "learning_rate": 0.0010116329629884827, + "loss": 1.5916, + "step": 3723 + }, + { + "epoch": 0.3928270042194093, + "grad_norm": 0.5686403512954712, + "learning_rate": 0.0010113976600768743, + "loss": 1.6256, + "step": 3724 + }, + { + "epoch": 0.3929324894514768, + "grad_norm": 0.7047475576400757, + "learning_rate": 0.0010111623278743547, + "loss": 1.5902, + "step": 3725 + }, + { + "epoch": 0.3930379746835443, + "grad_norm": 0.5352593064308167, + "learning_rate": 0.001010926966407294, + "loss": 1.6143, + "step": 3726 + }, + { + "epoch": 0.39314345991561184, + "grad_norm": 0.6259886622428894, + "learning_rate": 0.0010106915757020654, + "loss": 1.6152, + "step": 3727 + }, + { + "epoch": 0.39324894514767933, + "grad_norm": 0.5907722115516663, + "learning_rate": 0.0010104561557850457, + "loss": 1.6266, + "step": 3728 + }, + { + "epoch": 0.3933544303797468, + "grad_norm": 0.7044294476509094, + "learning_rate": 0.0010102207066826155, + "loss": 1.6303, + "step": 3729 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.7739956974983215, + "learning_rate": 0.0010099852284211573, + "loss": 1.5956, + "step": 3730 + }, + { + "epoch": 0.39356540084388186, + "grad_norm": 0.5445778369903564, + "learning_rate": 0.0010097497210270578, + "loss": 1.6084, + "step": 3731 + }, + { + "epoch": 0.39367088607594936, + "grad_norm": 0.6086768507957458, + "learning_rate": 0.0010095141845267066, + "loss": 1.6522, + "step": 3732 + }, + { + "epoch": 0.3937763713080169, + "grad_norm": 0.6514937281608582, + "learning_rate": 0.0010092786189464975, + "loss": 1.6061, + "step": 3733 + }, + { + "epoch": 0.3938818565400844, + "grad_norm": 0.6353490352630615, + "learning_rate": 0.0010090430243128259, + "loss": 1.6195, + "step": 3734 + }, + { + "epoch": 0.3939873417721519, + "grad_norm": 0.6917631030082703, + "learning_rate": 0.0010088074006520918, + "loss": 1.6243, + "step": 3735 + }, + { + "epoch": 0.39409282700421944, + "grad_norm": 0.6938663721084595, + "learning_rate": 0.0010085717479906978, + "loss": 1.6294, + "step": 3736 + }, + { + "epoch": 0.39419831223628693, + "grad_norm": 0.6719883680343628, + "learning_rate": 0.0010083360663550502, + "loss": 1.5774, + "step": 3737 + }, + { + "epoch": 0.3943037974683544, + "grad_norm": 0.6646670699119568, + "learning_rate": 0.0010081003557715583, + "loss": 1.5872, + "step": 3738 + }, + { + "epoch": 0.3944092827004219, + "grad_norm": 0.752776026725769, + "learning_rate": 0.0010078646162666345, + "loss": 1.5713, + "step": 3739 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.548109233379364, + "learning_rate": 0.0010076288478666944, + "loss": 1.6439, + "step": 3740 + }, + { + "epoch": 0.39462025316455696, + "grad_norm": 0.7290925979614258, + "learning_rate": 0.0010073930505981573, + "loss": 1.6272, + "step": 3741 + }, + { + "epoch": 0.39472573839662445, + "grad_norm": 0.6776230335235596, + "learning_rate": 0.0010071572244874456, + "loss": 1.6295, + "step": 3742 + }, + { + "epoch": 0.394831223628692, + "grad_norm": 0.623752772808075, + "learning_rate": 0.0010069213695609845, + "loss": 1.6029, + "step": 3743 + }, + { + "epoch": 0.3949367088607595, + "grad_norm": 0.6681675314903259, + "learning_rate": 0.0010066854858452028, + "loss": 1.661, + "step": 3744 + }, + { + "epoch": 0.395042194092827, + "grad_norm": 0.6390471458435059, + "learning_rate": 0.0010064495733665324, + "loss": 1.5757, + "step": 3745 + }, + { + "epoch": 0.39514767932489453, + "grad_norm": 0.7784256339073181, + "learning_rate": 0.0010062136321514084, + "loss": 1.627, + "step": 3746 + }, + { + "epoch": 0.395253164556962, + "grad_norm": 0.5539395213127136, + "learning_rate": 0.0010059776622262698, + "loss": 1.6513, + "step": 3747 + }, + { + "epoch": 0.3953586497890295, + "grad_norm": 0.6850250959396362, + "learning_rate": 0.0010057416636175575, + "loss": 1.6078, + "step": 3748 + }, + { + "epoch": 0.39546413502109706, + "grad_norm": 0.5856810808181763, + "learning_rate": 0.0010055056363517162, + "loss": 1.5803, + "step": 3749 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.7413769960403442, + "learning_rate": 0.0010052695804551946, + "loss": 1.6046, + "step": 3750 + }, + { + "epoch": 0.39567510548523205, + "grad_norm": 0.5884098410606384, + "learning_rate": 0.0010050334959544438, + "loss": 1.6081, + "step": 3751 + }, + { + "epoch": 0.3957805907172996, + "grad_norm": 0.6473937034606934, + "learning_rate": 0.0010047973828759178, + "loss": 1.6286, + "step": 3752 + }, + { + "epoch": 0.3958860759493671, + "grad_norm": 0.6888754963874817, + "learning_rate": 0.0010045612412460747, + "loss": 1.629, + "step": 3753 + }, + { + "epoch": 0.3959915611814346, + "grad_norm": 0.6498154401779175, + "learning_rate": 0.0010043250710913747, + "loss": 1.6062, + "step": 3754 + }, + { + "epoch": 0.39609704641350213, + "grad_norm": 0.680086612701416, + "learning_rate": 0.0010040888724382828, + "loss": 1.6028, + "step": 3755 + }, + { + "epoch": 0.3962025316455696, + "grad_norm": 0.7497800588607788, + "learning_rate": 0.0010038526453132655, + "loss": 1.6188, + "step": 3756 + }, + { + "epoch": 0.3963080168776371, + "grad_norm": 0.5581958889961243, + "learning_rate": 0.0010036163897427937, + "loss": 1.6126, + "step": 3757 + }, + { + "epoch": 0.39641350210970466, + "grad_norm": 0.7411847114562988, + "learning_rate": 0.0010033801057533404, + "loss": 1.5978, + "step": 3758 + }, + { + "epoch": 0.39651898734177216, + "grad_norm": 0.6286800503730774, + "learning_rate": 0.001003143793371383, + "loss": 1.5674, + "step": 3759 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.7393867373466492, + "learning_rate": 0.0010029074526234014, + "loss": 1.5995, + "step": 3760 + }, + { + "epoch": 0.3967299578059072, + "grad_norm": 0.774144172668457, + "learning_rate": 0.0010026710835358786, + "loss": 1.6344, + "step": 3761 + }, + { + "epoch": 0.3968354430379747, + "grad_norm": 0.5876893401145935, + "learning_rate": 0.0010024346861353007, + "loss": 1.6043, + "step": 3762 + }, + { + "epoch": 0.3969409282700422, + "grad_norm": 0.6747151017189026, + "learning_rate": 0.0010021982604481575, + "loss": 1.6053, + "step": 3763 + }, + { + "epoch": 0.39704641350210973, + "grad_norm": 0.683803915977478, + "learning_rate": 0.001001961806500942, + "loss": 1.6017, + "step": 3764 + }, + { + "epoch": 0.3971518987341772, + "grad_norm": 0.5685717463493347, + "learning_rate": 0.0010017253243201495, + "loss": 1.6075, + "step": 3765 + }, + { + "epoch": 0.3972573839662447, + "grad_norm": 0.6939398646354675, + "learning_rate": 0.0010014888139322792, + "loss": 1.6052, + "step": 3766 + }, + { + "epoch": 0.39736286919831226, + "grad_norm": 0.5950301885604858, + "learning_rate": 0.001001252275363833, + "loss": 1.6554, + "step": 3767 + }, + { + "epoch": 0.39746835443037976, + "grad_norm": 0.6006388664245605, + "learning_rate": 0.0010010157086413167, + "loss": 1.6765, + "step": 3768 + }, + { + "epoch": 0.39757383966244725, + "grad_norm": 0.6189869046211243, + "learning_rate": 0.0010007791137912386, + "loss": 1.6172, + "step": 3769 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.6214725971221924, + "learning_rate": 0.0010005424908401104, + "loss": 1.6154, + "step": 3770 + }, + { + "epoch": 0.3977848101265823, + "grad_norm": 0.5806964635848999, + "learning_rate": 0.0010003058398144464, + "loss": 1.5803, + "step": 3771 + }, + { + "epoch": 0.3978902953586498, + "grad_norm": 0.5863529443740845, + "learning_rate": 0.0010000691607407652, + "loss": 1.618, + "step": 3772 + }, + { + "epoch": 0.3979957805907173, + "grad_norm": 0.5616611242294312, + "learning_rate": 0.0009998324536455877, + "loss": 1.626, + "step": 3773 + }, + { + "epoch": 0.3981012658227848, + "grad_norm": 0.7287088632583618, + "learning_rate": 0.0009995957185554378, + "loss": 1.6357, + "step": 3774 + }, + { + "epoch": 0.3982067510548523, + "grad_norm": 0.6089521050453186, + "learning_rate": 0.000999358955496843, + "loss": 1.594, + "step": 3775 + }, + { + "epoch": 0.3983122362869198, + "grad_norm": 0.6209673285484314, + "learning_rate": 0.000999122164496334, + "loss": 1.6025, + "step": 3776 + }, + { + "epoch": 0.39841772151898736, + "grad_norm": 0.5963621735572815, + "learning_rate": 0.0009988853455804442, + "loss": 1.5825, + "step": 3777 + }, + { + "epoch": 0.39852320675105485, + "grad_norm": 0.6014125943183899, + "learning_rate": 0.0009986484987757102, + "loss": 1.6079, + "step": 3778 + }, + { + "epoch": 0.39862869198312234, + "grad_norm": 0.5942056775093079, + "learning_rate": 0.0009984116241086723, + "loss": 1.6212, + "step": 3779 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5618189573287964, + "learning_rate": 0.0009981747216058728, + "loss": 1.584, + "step": 3780 + }, + { + "epoch": 0.3988396624472574, + "grad_norm": 0.6043898463249207, + "learning_rate": 0.0009979377912938587, + "loss": 1.5838, + "step": 3781 + }, + { + "epoch": 0.3989451476793249, + "grad_norm": 0.5333225131034851, + "learning_rate": 0.0009977008331991785, + "loss": 1.5897, + "step": 3782 + }, + { + "epoch": 0.3990506329113924, + "grad_norm": 0.5712342262268066, + "learning_rate": 0.000997463847348385, + "loss": 1.6366, + "step": 3783 + }, + { + "epoch": 0.3991561181434599, + "grad_norm": 0.5528382062911987, + "learning_rate": 0.000997226833768033, + "loss": 1.587, + "step": 3784 + }, + { + "epoch": 0.3992616033755274, + "grad_norm": 0.6087852120399475, + "learning_rate": 0.0009969897924846818, + "loss": 1.6446, + "step": 3785 + }, + { + "epoch": 0.39936708860759496, + "grad_norm": 0.5876606702804565, + "learning_rate": 0.0009967527235248928, + "loss": 1.6266, + "step": 3786 + }, + { + "epoch": 0.39947257383966245, + "grad_norm": 0.6174178719520569, + "learning_rate": 0.0009965156269152308, + "loss": 1.575, + "step": 3787 + }, + { + "epoch": 0.39957805907172994, + "grad_norm": 0.5762348175048828, + "learning_rate": 0.0009962785026822632, + "loss": 1.6335, + "step": 3788 + }, + { + "epoch": 0.3996835443037975, + "grad_norm": 0.6308127045631409, + "learning_rate": 0.0009960413508525617, + "loss": 1.6077, + "step": 3789 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.6923227906227112, + "learning_rate": 0.0009958041714526998, + "loss": 1.6047, + "step": 3790 + }, + { + "epoch": 0.3998945147679325, + "grad_norm": 0.6356773972511292, + "learning_rate": 0.0009955669645092546, + "loss": 1.6059, + "step": 3791 + }, + { + "epoch": 0.4, + "grad_norm": 0.7397488951683044, + "learning_rate": 0.0009953297300488069, + "loss": 1.5809, + "step": 3792 + }, + { + "epoch": 0.4001054852320675, + "grad_norm": 0.999161958694458, + "learning_rate": 0.0009950924680979393, + "loss": 1.6484, + "step": 3793 + }, + { + "epoch": 0.400210970464135, + "grad_norm": 0.5550193786621094, + "learning_rate": 0.0009948551786832386, + "loss": 1.6234, + "step": 3794 + }, + { + "epoch": 0.40031645569620256, + "grad_norm": 0.8758957982063293, + "learning_rate": 0.0009946178618312942, + "loss": 1.6338, + "step": 3795 + }, + { + "epoch": 0.40042194092827005, + "grad_norm": 0.7007652521133423, + "learning_rate": 0.0009943805175686986, + "loss": 1.6393, + "step": 3796 + }, + { + "epoch": 0.40052742616033754, + "grad_norm": 0.7674638628959656, + "learning_rate": 0.0009941431459220475, + "loss": 1.6053, + "step": 3797 + }, + { + "epoch": 0.4006329113924051, + "grad_norm": 0.9707825779914856, + "learning_rate": 0.0009939057469179394, + "loss": 1.6474, + "step": 3798 + }, + { + "epoch": 0.4007383966244726, + "grad_norm": 0.7095131278038025, + "learning_rate": 0.0009936683205829762, + "loss": 1.5726, + "step": 3799 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.7210427522659302, + "learning_rate": 0.0009934308669437627, + "loss": 1.6032, + "step": 3800 + }, + { + "epoch": 0.4009493670886076, + "grad_norm": 0.8830215930938721, + "learning_rate": 0.0009931933860269063, + "loss": 1.571, + "step": 3801 + }, + { + "epoch": 0.4010548523206751, + "grad_norm": 0.775280773639679, + "learning_rate": 0.0009929558778590188, + "loss": 1.594, + "step": 3802 + }, + { + "epoch": 0.4011603375527426, + "grad_norm": 0.7883564233779907, + "learning_rate": 0.0009927183424667135, + "loss": 1.6466, + "step": 3803 + }, + { + "epoch": 0.4012658227848101, + "grad_norm": 0.7306463122367859, + "learning_rate": 0.0009924807798766077, + "loss": 1.6332, + "step": 3804 + }, + { + "epoch": 0.40137130801687765, + "grad_norm": 0.5908941626548767, + "learning_rate": 0.0009922431901153213, + "loss": 1.5705, + "step": 3805 + }, + { + "epoch": 0.40147679324894514, + "grad_norm": 0.7431436777114868, + "learning_rate": 0.0009920055732094775, + "loss": 1.5585, + "step": 3806 + }, + { + "epoch": 0.40158227848101263, + "grad_norm": 0.6050968766212463, + "learning_rate": 0.0009917679291857027, + "loss": 1.5923, + "step": 3807 + }, + { + "epoch": 0.4016877637130802, + "grad_norm": 0.7557267546653748, + "learning_rate": 0.0009915302580706256, + "loss": 1.6173, + "step": 3808 + }, + { + "epoch": 0.4017932489451477, + "grad_norm": 0.7041510939598083, + "learning_rate": 0.0009912925598908788, + "loss": 1.633, + "step": 3809 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.5919394493103027, + "learning_rate": 0.0009910548346730972, + "loss": 1.6422, + "step": 3810 + }, + { + "epoch": 0.4020042194092827, + "grad_norm": 0.5968484878540039, + "learning_rate": 0.00099081708244392, + "loss": 1.6097, + "step": 3811 + }, + { + "epoch": 0.4021097046413502, + "grad_norm": 0.6471118927001953, + "learning_rate": 0.0009905793032299875, + "loss": 1.5839, + "step": 3812 + }, + { + "epoch": 0.4022151898734177, + "grad_norm": 0.5274564027786255, + "learning_rate": 0.0009903414970579443, + "loss": 1.5818, + "step": 3813 + }, + { + "epoch": 0.40232067510548525, + "grad_norm": 0.7172728180885315, + "learning_rate": 0.000990103663954438, + "loss": 1.6019, + "step": 3814 + }, + { + "epoch": 0.40242616033755274, + "grad_norm": 0.525136411190033, + "learning_rate": 0.000989865803946119, + "loss": 1.6198, + "step": 3815 + }, + { + "epoch": 0.40253164556962023, + "grad_norm": 0.7647259831428528, + "learning_rate": 0.0009896279170596406, + "loss": 1.5894, + "step": 3816 + }, + { + "epoch": 0.4026371308016878, + "grad_norm": 0.5578097701072693, + "learning_rate": 0.0009893900033216593, + "loss": 1.5689, + "step": 3817 + }, + { + "epoch": 0.4027426160337553, + "grad_norm": 0.7563067674636841, + "learning_rate": 0.0009891520627588342, + "loss": 1.6054, + "step": 3818 + }, + { + "epoch": 0.40284810126582277, + "grad_norm": 0.6076073050498962, + "learning_rate": 0.000988914095397828, + "loss": 1.618, + "step": 3819 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.6297212839126587, + "learning_rate": 0.0009886761012653062, + "loss": 1.5728, + "step": 3820 + }, + { + "epoch": 0.4030590717299578, + "grad_norm": 0.6944629549980164, + "learning_rate": 0.000988438080387937, + "loss": 1.6059, + "step": 3821 + }, + { + "epoch": 0.4031645569620253, + "grad_norm": 0.5680896043777466, + "learning_rate": 0.000988200032792392, + "loss": 1.5511, + "step": 3822 + }, + { + "epoch": 0.40327004219409285, + "grad_norm": 0.6166403889656067, + "learning_rate": 0.0009879619585053455, + "loss": 1.6083, + "step": 3823 + }, + { + "epoch": 0.40337552742616034, + "grad_norm": 0.6098466515541077, + "learning_rate": 0.0009877238575534749, + "loss": 1.6022, + "step": 3824 + }, + { + "epoch": 0.40348101265822783, + "grad_norm": 0.5838565230369568, + "learning_rate": 0.0009874857299634605, + "loss": 1.6074, + "step": 3825 + }, + { + "epoch": 0.4035864978902954, + "grad_norm": 0.6142293810844421, + "learning_rate": 0.0009872475757619862, + "loss": 1.6336, + "step": 3826 + }, + { + "epoch": 0.4036919831223629, + "grad_norm": 0.5288886427879333, + "learning_rate": 0.000987009394975738, + "loss": 1.6149, + "step": 3827 + }, + { + "epoch": 0.40379746835443037, + "grad_norm": 0.585972249507904, + "learning_rate": 0.0009867711876314052, + "loss": 1.5763, + "step": 3828 + }, + { + "epoch": 0.4039029535864979, + "grad_norm": 0.5854710936546326, + "learning_rate": 0.00098653295375568, + "loss": 1.5999, + "step": 3829 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.6775607466697693, + "learning_rate": 0.000986294693375258, + "loss": 1.5771, + "step": 3830 + }, + { + "epoch": 0.4041139240506329, + "grad_norm": 0.5490472316741943, + "learning_rate": 0.0009860564065168375, + "loss": 1.6123, + "step": 3831 + }, + { + "epoch": 0.40421940928270045, + "grad_norm": 0.7493919134140015, + "learning_rate": 0.0009858180932071192, + "loss": 1.6318, + "step": 3832 + }, + { + "epoch": 0.40432489451476794, + "grad_norm": 0.7309607267379761, + "learning_rate": 0.000985579753472808, + "loss": 1.5989, + "step": 3833 + }, + { + "epoch": 0.40443037974683543, + "grad_norm": 0.6912968158721924, + "learning_rate": 0.0009853413873406104, + "loss": 1.5976, + "step": 3834 + }, + { + "epoch": 0.4045358649789029, + "grad_norm": 0.6762731075286865, + "learning_rate": 0.000985102994837237, + "loss": 1.5979, + "step": 3835 + }, + { + "epoch": 0.4046413502109705, + "grad_norm": 0.634311854839325, + "learning_rate": 0.0009848645759894005, + "loss": 1.6215, + "step": 3836 + }, + { + "epoch": 0.40474683544303797, + "grad_norm": 0.6014207005500793, + "learning_rate": 0.0009846261308238177, + "loss": 1.5819, + "step": 3837 + }, + { + "epoch": 0.40485232067510546, + "grad_norm": 0.6559983491897583, + "learning_rate": 0.0009843876593672064, + "loss": 1.6146, + "step": 3838 + }, + { + "epoch": 0.404957805907173, + "grad_norm": 0.5682319402694702, + "learning_rate": 0.0009841491616462892, + "loss": 1.6675, + "step": 3839 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.5930134654045105, + "learning_rate": 0.000983910637687791, + "loss": 1.6486, + "step": 3840 + }, + { + "epoch": 0.405168776371308, + "grad_norm": 0.5602677464485168, + "learning_rate": 0.0009836720875184394, + "loss": 1.637, + "step": 3841 + }, + { + "epoch": 0.40527426160337554, + "grad_norm": 0.575120210647583, + "learning_rate": 0.0009834335111649655, + "loss": 1.594, + "step": 3842 + }, + { + "epoch": 0.40537974683544303, + "grad_norm": 0.5231301188468933, + "learning_rate": 0.0009831949086541024, + "loss": 1.601, + "step": 3843 + }, + { + "epoch": 0.4054852320675105, + "grad_norm": 0.6962776184082031, + "learning_rate": 0.0009829562800125868, + "loss": 1.602, + "step": 3844 + }, + { + "epoch": 0.4055907172995781, + "grad_norm": 0.6456721425056458, + "learning_rate": 0.0009827176252671587, + "loss": 1.627, + "step": 3845 + }, + { + "epoch": 0.40569620253164557, + "grad_norm": 0.5969762802124023, + "learning_rate": 0.0009824789444445603, + "loss": 1.5921, + "step": 3846 + }, + { + "epoch": 0.40580168776371306, + "grad_norm": 0.6138609647750854, + "learning_rate": 0.0009822402375715366, + "loss": 1.5754, + "step": 3847 + }, + { + "epoch": 0.4059071729957806, + "grad_norm": 0.632674515247345, + "learning_rate": 0.0009820015046748366, + "loss": 1.5978, + "step": 3848 + }, + { + "epoch": 0.4060126582278481, + "grad_norm": 0.583690345287323, + "learning_rate": 0.0009817627457812106, + "loss": 1.5851, + "step": 3849 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.7668527960777283, + "learning_rate": 0.0009815239609174138, + "loss": 1.592, + "step": 3850 + }, + { + "epoch": 0.40622362869198314, + "grad_norm": 0.6038209199905396, + "learning_rate": 0.0009812851501102024, + "loss": 1.6557, + "step": 3851 + }, + { + "epoch": 0.40632911392405063, + "grad_norm": 0.5522535443305969, + "learning_rate": 0.0009810463133863368, + "loss": 1.6145, + "step": 3852 + }, + { + "epoch": 0.4064345991561181, + "grad_norm": 0.6319645047187805, + "learning_rate": 0.0009808074507725794, + "loss": 1.6246, + "step": 3853 + }, + { + "epoch": 0.4065400843881857, + "grad_norm": 0.5290046334266663, + "learning_rate": 0.0009805685622956966, + "loss": 1.6146, + "step": 3854 + }, + { + "epoch": 0.40664556962025317, + "grad_norm": 0.6117744445800781, + "learning_rate": 0.0009803296479824564, + "loss": 1.6059, + "step": 3855 + }, + { + "epoch": 0.40675105485232066, + "grad_norm": 0.5792310237884521, + "learning_rate": 0.0009800907078596308, + "loss": 1.6214, + "step": 3856 + }, + { + "epoch": 0.4068565400843882, + "grad_norm": 0.755988359451294, + "learning_rate": 0.000979851741953994, + "loss": 1.6049, + "step": 3857 + }, + { + "epoch": 0.4069620253164557, + "grad_norm": 0.7917974591255188, + "learning_rate": 0.0009796127502923232, + "loss": 1.6084, + "step": 3858 + }, + { + "epoch": 0.4070675105485232, + "grad_norm": 0.5912395715713501, + "learning_rate": 0.000979373732901399, + "loss": 1.5879, + "step": 3859 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.8657242655754089, + "learning_rate": 0.0009791346898080043, + "loss": 1.5896, + "step": 3860 + }, + { + "epoch": 0.40727848101265823, + "grad_norm": 0.7381917238235474, + "learning_rate": 0.000978895621038925, + "loss": 1.5917, + "step": 3861 + }, + { + "epoch": 0.4073839662447257, + "grad_norm": 0.6748866438865662, + "learning_rate": 0.0009786565266209496, + "loss": 1.6102, + "step": 3862 + }, + { + "epoch": 0.4074894514767933, + "grad_norm": 0.6830049753189087, + "learning_rate": 0.0009784174065808706, + "loss": 1.6222, + "step": 3863 + }, + { + "epoch": 0.40759493670886077, + "grad_norm": 0.6744226217269897, + "learning_rate": 0.0009781782609454821, + "loss": 1.5699, + "step": 3864 + }, + { + "epoch": 0.40770042194092826, + "grad_norm": 0.9389607310295105, + "learning_rate": 0.000977939089741582, + "loss": 1.5852, + "step": 3865 + }, + { + "epoch": 0.4078059071729958, + "grad_norm": 0.9045077562332153, + "learning_rate": 0.0009776998929959695, + "loss": 1.6365, + "step": 3866 + }, + { + "epoch": 0.4079113924050633, + "grad_norm": 0.5843061208724976, + "learning_rate": 0.0009774606707354493, + "loss": 1.5757, + "step": 3867 + }, + { + "epoch": 0.4080168776371308, + "grad_norm": 0.7126650810241699, + "learning_rate": 0.0009772214229868265, + "loss": 1.5827, + "step": 3868 + }, + { + "epoch": 0.4081223628691983, + "grad_norm": 0.6262292861938477, + "learning_rate": 0.0009769821497769102, + "loss": 1.6327, + "step": 3869 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.5720542073249817, + "learning_rate": 0.0009767428511325122, + "loss": 1.6131, + "step": 3870 + }, + { + "epoch": 0.4083333333333333, + "grad_norm": 0.7077670097351074, + "learning_rate": 0.000976503527080447, + "loss": 1.5898, + "step": 3871 + }, + { + "epoch": 0.4084388185654008, + "grad_norm": 0.5620328187942505, + "learning_rate": 0.0009762641776475322, + "loss": 1.6439, + "step": 3872 + }, + { + "epoch": 0.40854430379746837, + "grad_norm": 0.6342280507087708, + "learning_rate": 0.0009760248028605882, + "loss": 1.5937, + "step": 3873 + }, + { + "epoch": 0.40864978902953586, + "grad_norm": 0.5764793753623962, + "learning_rate": 0.0009757854027464377, + "loss": 1.6084, + "step": 3874 + }, + { + "epoch": 0.40875527426160335, + "grad_norm": 0.6518116593360901, + "learning_rate": 0.000975545977331907, + "loss": 1.6102, + "step": 3875 + }, + { + "epoch": 0.4088607594936709, + "grad_norm": 0.7766746878623962, + "learning_rate": 0.0009753065266438249, + "loss": 1.5692, + "step": 3876 + }, + { + "epoch": 0.4089662447257384, + "grad_norm": 0.5516355037689209, + "learning_rate": 0.0009750670507090233, + "loss": 1.5605, + "step": 3877 + }, + { + "epoch": 0.4090717299578059, + "grad_norm": 0.6620457172393799, + "learning_rate": 0.000974827549554336, + "loss": 1.5671, + "step": 3878 + }, + { + "epoch": 0.40917721518987343, + "grad_norm": 0.5484946370124817, + "learning_rate": 0.0009745880232066007, + "loss": 1.6031, + "step": 3879 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.6323533058166504, + "learning_rate": 0.0009743484716926576, + "loss": 1.5749, + "step": 3880 + }, + { + "epoch": 0.4093881856540084, + "grad_norm": 0.5858737230300903, + "learning_rate": 0.0009741088950393497, + "loss": 1.618, + "step": 3881 + }, + { + "epoch": 0.40949367088607597, + "grad_norm": 0.5227290391921997, + "learning_rate": 0.0009738692932735225, + "loss": 1.5887, + "step": 3882 + }, + { + "epoch": 0.40959915611814346, + "grad_norm": 0.6533905863761902, + "learning_rate": 0.0009736296664220247, + "loss": 1.5867, + "step": 3883 + }, + { + "epoch": 0.40970464135021095, + "grad_norm": 0.7070198655128479, + "learning_rate": 0.0009733900145117075, + "loss": 1.569, + "step": 3884 + }, + { + "epoch": 0.4098101265822785, + "grad_norm": 0.5890876650810242, + "learning_rate": 0.0009731503375694253, + "loss": 1.5792, + "step": 3885 + }, + { + "epoch": 0.409915611814346, + "grad_norm": 0.6202355027198792, + "learning_rate": 0.0009729106356220352, + "loss": 1.6149, + "step": 3886 + }, + { + "epoch": 0.4100210970464135, + "grad_norm": 0.605856716632843, + "learning_rate": 0.0009726709086963967, + "loss": 1.5975, + "step": 3887 + }, + { + "epoch": 0.41012658227848103, + "grad_norm": 0.625876247882843, + "learning_rate": 0.0009724311568193726, + "loss": 1.6145, + "step": 3888 + }, + { + "epoch": 0.4102320675105485, + "grad_norm": 0.7524820566177368, + "learning_rate": 0.0009721913800178281, + "loss": 1.5718, + "step": 3889 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.6172010898590088, + "learning_rate": 0.0009719515783186319, + "loss": 1.5599, + "step": 3890 + }, + { + "epoch": 0.41044303797468357, + "grad_norm": 0.5760720372200012, + "learning_rate": 0.0009717117517486543, + "loss": 1.5944, + "step": 3891 + }, + { + "epoch": 0.41054852320675106, + "grad_norm": 0.6975721716880798, + "learning_rate": 0.0009714719003347693, + "loss": 1.6337, + "step": 3892 + }, + { + "epoch": 0.41065400843881855, + "grad_norm": 0.5718479752540588, + "learning_rate": 0.0009712320241038537, + "loss": 1.62, + "step": 3893 + }, + { + "epoch": 0.4107594936708861, + "grad_norm": 0.7680113315582275, + "learning_rate": 0.0009709921230827865, + "loss": 1.5956, + "step": 3894 + }, + { + "epoch": 0.4108649789029536, + "grad_norm": 0.5474581122398376, + "learning_rate": 0.00097075219729845, + "loss": 1.5947, + "step": 3895 + }, + { + "epoch": 0.4109704641350211, + "grad_norm": 0.616326093673706, + "learning_rate": 0.0009705122467777292, + "loss": 1.5637, + "step": 3896 + }, + { + "epoch": 0.41107594936708863, + "grad_norm": 0.5511362552642822, + "learning_rate": 0.0009702722715475113, + "loss": 1.5867, + "step": 3897 + }, + { + "epoch": 0.4111814345991561, + "grad_norm": 0.6039000153541565, + "learning_rate": 0.000970032271634687, + "loss": 1.5895, + "step": 3898 + }, + { + "epoch": 0.4112869198312236, + "grad_norm": 0.6131844520568848, + "learning_rate": 0.0009697922470661497, + "loss": 1.5602, + "step": 3899 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.6638787984848022, + "learning_rate": 0.0009695521978687951, + "loss": 1.6119, + "step": 3900 + }, + { + "epoch": 0.41149789029535866, + "grad_norm": 0.7504019737243652, + "learning_rate": 0.0009693121240695216, + "loss": 1.5771, + "step": 3901 + }, + { + "epoch": 0.41160337552742615, + "grad_norm": 0.5499006509780884, + "learning_rate": 0.0009690720256952314, + "loss": 1.6401, + "step": 3902 + }, + { + "epoch": 0.41170886075949364, + "grad_norm": 0.68426913022995, + "learning_rate": 0.0009688319027728282, + "loss": 1.589, + "step": 3903 + }, + { + "epoch": 0.4118143459915612, + "grad_norm": 0.5676501393318176, + "learning_rate": 0.0009685917553292192, + "loss": 1.5785, + "step": 3904 + }, + { + "epoch": 0.4119198312236287, + "grad_norm": 0.8094425201416016, + "learning_rate": 0.0009683515833913137, + "loss": 1.5923, + "step": 3905 + }, + { + "epoch": 0.4120253164556962, + "grad_norm": 0.6847144961357117, + "learning_rate": 0.0009681113869860247, + "loss": 1.6074, + "step": 3906 + }, + { + "epoch": 0.4121308016877637, + "grad_norm": 0.6908842921257019, + "learning_rate": 0.0009678711661402672, + "loss": 1.571, + "step": 3907 + }, + { + "epoch": 0.4122362869198312, + "grad_norm": 0.7142349481582642, + "learning_rate": 0.0009676309208809592, + "loss": 1.6049, + "step": 3908 + }, + { + "epoch": 0.4123417721518987, + "grad_norm": 0.5497071146965027, + "learning_rate": 0.0009673906512350213, + "loss": 1.583, + "step": 3909 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.783607006072998, + "learning_rate": 0.0009671503572293767, + "loss": 1.6298, + "step": 3910 + }, + { + "epoch": 0.41255274261603375, + "grad_norm": 0.5711628198623657, + "learning_rate": 0.000966910038890952, + "loss": 1.6012, + "step": 3911 + }, + { + "epoch": 0.41265822784810124, + "grad_norm": 0.6839337348937988, + "learning_rate": 0.0009666696962466757, + "loss": 1.644, + "step": 3912 + }, + { + "epoch": 0.4127637130801688, + "grad_norm": 0.6160405278205872, + "learning_rate": 0.0009664293293234795, + "loss": 1.6033, + "step": 3913 + }, + { + "epoch": 0.4128691983122363, + "grad_norm": 0.5765300989151001, + "learning_rate": 0.0009661889381482977, + "loss": 1.5285, + "step": 3914 + }, + { + "epoch": 0.4129746835443038, + "grad_norm": 0.6586012840270996, + "learning_rate": 0.0009659485227480676, + "loss": 1.5806, + "step": 3915 + }, + { + "epoch": 0.4130801687763713, + "grad_norm": 0.5755287408828735, + "learning_rate": 0.0009657080831497284, + "loss": 1.5961, + "step": 3916 + }, + { + "epoch": 0.4131856540084388, + "grad_norm": 0.6010465025901794, + "learning_rate": 0.0009654676193802232, + "loss": 1.5679, + "step": 3917 + }, + { + "epoch": 0.4132911392405063, + "grad_norm": 0.5736188888549805, + "learning_rate": 0.0009652271314664966, + "loss": 1.6125, + "step": 3918 + }, + { + "epoch": 0.41339662447257386, + "grad_norm": 0.5984007120132446, + "learning_rate": 0.0009649866194354967, + "loss": 1.579, + "step": 3919 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.6039881706237793, + "learning_rate": 0.0009647460833141742, + "loss": 1.6147, + "step": 3920 + }, + { + "epoch": 0.41360759493670884, + "grad_norm": 0.6142542958259583, + "learning_rate": 0.0009645055231294823, + "loss": 1.561, + "step": 3921 + }, + { + "epoch": 0.4137130801687764, + "grad_norm": 0.6223117113113403, + "learning_rate": 0.0009642649389083768, + "loss": 1.6425, + "step": 3922 + }, + { + "epoch": 0.4138185654008439, + "grad_norm": 0.5852706432342529, + "learning_rate": 0.0009640243306778162, + "loss": 1.6129, + "step": 3923 + }, + { + "epoch": 0.4139240506329114, + "grad_norm": 0.6264892220497131, + "learning_rate": 0.0009637836984647627, + "loss": 1.579, + "step": 3924 + }, + { + "epoch": 0.4140295358649789, + "grad_norm": 0.7086585760116577, + "learning_rate": 0.0009635430422961794, + "loss": 1.5533, + "step": 3925 + }, + { + "epoch": 0.4141350210970464, + "grad_norm": 0.5857563018798828, + "learning_rate": 0.0009633023621990334, + "loss": 1.5974, + "step": 3926 + }, + { + "epoch": 0.4142405063291139, + "grad_norm": 0.6404128074645996, + "learning_rate": 0.000963061658200294, + "loss": 1.5994, + "step": 3927 + }, + { + "epoch": 0.41434599156118146, + "grad_norm": 0.6216115951538086, + "learning_rate": 0.0009628209303269335, + "loss": 1.5967, + "step": 3928 + }, + { + "epoch": 0.41445147679324895, + "grad_norm": 0.6084376573562622, + "learning_rate": 0.0009625801786059267, + "loss": 1.6178, + "step": 3929 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.5969811677932739, + "learning_rate": 0.0009623394030642507, + "loss": 1.6061, + "step": 3930 + }, + { + "epoch": 0.414662447257384, + "grad_norm": 0.5629078149795532, + "learning_rate": 0.0009620986037288858, + "loss": 1.5718, + "step": 3931 + }, + { + "epoch": 0.4147679324894515, + "grad_norm": 0.6385952234268188, + "learning_rate": 0.0009618577806268147, + "loss": 1.5782, + "step": 3932 + }, + { + "epoch": 0.414873417721519, + "grad_norm": 0.5837723016738892, + "learning_rate": 0.0009616169337850229, + "loss": 1.5873, + "step": 3933 + }, + { + "epoch": 0.41497890295358647, + "grad_norm": 0.6220123171806335, + "learning_rate": 0.0009613760632304985, + "loss": 1.6032, + "step": 3934 + }, + { + "epoch": 0.415084388185654, + "grad_norm": 0.6490159034729004, + "learning_rate": 0.0009611351689902321, + "loss": 1.6141, + "step": 3935 + }, + { + "epoch": 0.4151898734177215, + "grad_norm": 0.5878368616104126, + "learning_rate": 0.000960894251091217, + "loss": 1.5904, + "step": 3936 + }, + { + "epoch": 0.415295358649789, + "grad_norm": 0.632158100605011, + "learning_rate": 0.0009606533095604499, + "loss": 1.6259, + "step": 3937 + }, + { + "epoch": 0.41540084388185655, + "grad_norm": 0.70749431848526, + "learning_rate": 0.0009604123444249288, + "loss": 1.5571, + "step": 3938 + }, + { + "epoch": 0.41550632911392404, + "grad_norm": 0.5586749911308289, + "learning_rate": 0.0009601713557116554, + "loss": 1.5951, + "step": 3939 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.7537254095077515, + "learning_rate": 0.0009599303434476334, + "loss": 1.6035, + "step": 3940 + }, + { + "epoch": 0.4157172995780591, + "grad_norm": 0.6215395927429199, + "learning_rate": 0.0009596893076598698, + "loss": 1.5987, + "step": 3941 + }, + { + "epoch": 0.4158227848101266, + "grad_norm": 0.6948989629745483, + "learning_rate": 0.0009594482483753736, + "loss": 1.631, + "step": 3942 + }, + { + "epoch": 0.41592827004219407, + "grad_norm": 0.681706428527832, + "learning_rate": 0.0009592071656211568, + "loss": 1.5904, + "step": 3943 + }, + { + "epoch": 0.4160337552742616, + "grad_norm": 0.6230199337005615, + "learning_rate": 0.0009589660594242338, + "loss": 1.6088, + "step": 3944 + }, + { + "epoch": 0.4161392405063291, + "grad_norm": 0.5857817530632019, + "learning_rate": 0.0009587249298116219, + "loss": 1.5727, + "step": 3945 + }, + { + "epoch": 0.4162447257383966, + "grad_norm": 0.6740971803665161, + "learning_rate": 0.0009584837768103408, + "loss": 1.5667, + "step": 3946 + }, + { + "epoch": 0.41635021097046415, + "grad_norm": 0.5929163098335266, + "learning_rate": 0.0009582426004474129, + "loss": 1.6239, + "step": 3947 + }, + { + "epoch": 0.41645569620253164, + "grad_norm": 0.5727715492248535, + "learning_rate": 0.0009580014007498634, + "loss": 1.6119, + "step": 3948 + }, + { + "epoch": 0.41656118143459914, + "grad_norm": 0.5947136878967285, + "learning_rate": 0.0009577601777447194, + "loss": 1.5797, + "step": 3949 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.5872216820716858, + "learning_rate": 0.0009575189314590118, + "loss": 1.612, + "step": 3950 + }, + { + "epoch": 0.4167721518987342, + "grad_norm": 0.6095768809318542, + "learning_rate": 0.0009572776619197731, + "loss": 1.6191, + "step": 3951 + }, + { + "epoch": 0.41687763713080167, + "grad_norm": 0.6625959873199463, + "learning_rate": 0.0009570363691540387, + "loss": 1.5981, + "step": 3952 + }, + { + "epoch": 0.4169831223628692, + "grad_norm": 0.5751397013664246, + "learning_rate": 0.0009567950531888469, + "loss": 1.638, + "step": 3953 + }, + { + "epoch": 0.4170886075949367, + "grad_norm": 0.7165716290473938, + "learning_rate": 0.0009565537140512381, + "loss": 1.6098, + "step": 3954 + }, + { + "epoch": 0.4171940928270042, + "grad_norm": 0.6441510319709778, + "learning_rate": 0.0009563123517682559, + "loss": 1.5925, + "step": 3955 + }, + { + "epoch": 0.41729957805907175, + "grad_norm": 0.627315104007721, + "learning_rate": 0.0009560709663669456, + "loss": 1.5771, + "step": 3956 + }, + { + "epoch": 0.41740506329113924, + "grad_norm": 0.6658162474632263, + "learning_rate": 0.0009558295578743559, + "loss": 1.6021, + "step": 3957 + }, + { + "epoch": 0.41751054852320674, + "grad_norm": 0.6061848998069763, + "learning_rate": 0.0009555881263175381, + "loss": 1.6243, + "step": 3958 + }, + { + "epoch": 0.4176160337552743, + "grad_norm": 0.6720684766769409, + "learning_rate": 0.0009553466717235456, + "loss": 1.5553, + "step": 3959 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.5742719173431396, + "learning_rate": 0.0009551051941194346, + "loss": 1.6082, + "step": 3960 + }, + { + "epoch": 0.41782700421940927, + "grad_norm": 0.6595430970191956, + "learning_rate": 0.0009548636935322639, + "loss": 1.5665, + "step": 3961 + }, + { + "epoch": 0.4179324894514768, + "grad_norm": 0.5466634035110474, + "learning_rate": 0.0009546221699890945, + "loss": 1.5639, + "step": 3962 + }, + { + "epoch": 0.4180379746835443, + "grad_norm": 0.6238153576850891, + "learning_rate": 0.0009543806235169909, + "loss": 1.5755, + "step": 3963 + }, + { + "epoch": 0.4181434599156118, + "grad_norm": 0.6469119191169739, + "learning_rate": 0.0009541390541430192, + "loss": 1.6195, + "step": 3964 + }, + { + "epoch": 0.41824894514767935, + "grad_norm": 0.6049637794494629, + "learning_rate": 0.0009538974618942486, + "loss": 1.6131, + "step": 3965 + }, + { + "epoch": 0.41835443037974684, + "grad_norm": 0.6014156937599182, + "learning_rate": 0.0009536558467977505, + "loss": 1.5804, + "step": 3966 + }, + { + "epoch": 0.41845991561181434, + "grad_norm": 0.6591711640357971, + "learning_rate": 0.0009534142088805994, + "loss": 1.5854, + "step": 3967 + }, + { + "epoch": 0.41856540084388183, + "grad_norm": 0.7898242473602295, + "learning_rate": 0.0009531725481698719, + "loss": 1.6054, + "step": 3968 + }, + { + "epoch": 0.4186708860759494, + "grad_norm": 0.5400672554969788, + "learning_rate": 0.0009529308646926473, + "loss": 1.5988, + "step": 3969 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.7035240530967712, + "learning_rate": 0.0009526891584760071, + "loss": 1.5673, + "step": 3970 + }, + { + "epoch": 0.41888185654008436, + "grad_norm": 0.581085741519928, + "learning_rate": 0.0009524474295470362, + "loss": 1.6137, + "step": 3971 + }, + { + "epoch": 0.4189873417721519, + "grad_norm": 0.6702154874801636, + "learning_rate": 0.0009522056779328214, + "loss": 1.5958, + "step": 3972 + }, + { + "epoch": 0.4190928270042194, + "grad_norm": 0.625404417514801, + "learning_rate": 0.0009519639036604522, + "loss": 1.6178, + "step": 3973 + }, + { + "epoch": 0.4191983122362869, + "grad_norm": 0.5991174578666687, + "learning_rate": 0.0009517221067570204, + "loss": 1.588, + "step": 3974 + }, + { + "epoch": 0.41930379746835444, + "grad_norm": 0.5588675141334534, + "learning_rate": 0.0009514802872496205, + "loss": 1.6001, + "step": 3975 + }, + { + "epoch": 0.41940928270042194, + "grad_norm": 0.6984415054321289, + "learning_rate": 0.0009512384451653499, + "loss": 1.5868, + "step": 3976 + }, + { + "epoch": 0.41951476793248943, + "grad_norm": 0.6106035113334656, + "learning_rate": 0.000950996580531308, + "loss": 1.6002, + "step": 3977 + }, + { + "epoch": 0.419620253164557, + "grad_norm": 0.6182209849357605, + "learning_rate": 0.000950754693374597, + "loss": 1.5468, + "step": 3978 + }, + { + "epoch": 0.41972573839662447, + "grad_norm": 0.5679449439048767, + "learning_rate": 0.0009505127837223215, + "loss": 1.6082, + "step": 3979 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.6601321697235107, + "learning_rate": 0.0009502708516015889, + "loss": 1.6338, + "step": 3980 + }, + { + "epoch": 0.4199367088607595, + "grad_norm": 0.6632180213928223, + "learning_rate": 0.0009500288970395085, + "loss": 1.577, + "step": 3981 + }, + { + "epoch": 0.420042194092827, + "grad_norm": 0.633501410484314, + "learning_rate": 0.000949786920063193, + "loss": 1.6048, + "step": 3982 + }, + { + "epoch": 0.4201476793248945, + "grad_norm": 0.7423333525657654, + "learning_rate": 0.0009495449206997568, + "loss": 1.5913, + "step": 3983 + }, + { + "epoch": 0.42025316455696204, + "grad_norm": 0.600994348526001, + "learning_rate": 0.0009493028989763171, + "loss": 1.6049, + "step": 3984 + }, + { + "epoch": 0.42035864978902954, + "grad_norm": 0.6541864275932312, + "learning_rate": 0.0009490608549199939, + "loss": 1.5798, + "step": 3985 + }, + { + "epoch": 0.42046413502109703, + "grad_norm": 0.7964689135551453, + "learning_rate": 0.0009488187885579092, + "loss": 1.5879, + "step": 3986 + }, + { + "epoch": 0.4205696202531646, + "grad_norm": 0.6713433265686035, + "learning_rate": 0.000948576699917188, + "loss": 1.5766, + "step": 3987 + }, + { + "epoch": 0.42067510548523207, + "grad_norm": 0.565976619720459, + "learning_rate": 0.0009483345890249571, + "loss": 1.5925, + "step": 3988 + }, + { + "epoch": 0.42078059071729956, + "grad_norm": 0.6503856182098389, + "learning_rate": 0.0009480924559083468, + "loss": 1.5967, + "step": 3989 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.6271948218345642, + "learning_rate": 0.0009478503005944888, + "loss": 1.6031, + "step": 3990 + }, + { + "epoch": 0.4209915611814346, + "grad_norm": 0.626835823059082, + "learning_rate": 0.0009476081231105183, + "loss": 1.6041, + "step": 3991 + }, + { + "epoch": 0.4210970464135021, + "grad_norm": 0.625577449798584, + "learning_rate": 0.0009473659234835722, + "loss": 1.5991, + "step": 3992 + }, + { + "epoch": 0.42120253164556964, + "grad_norm": 0.708701491355896, + "learning_rate": 0.00094712370174079, + "loss": 1.5936, + "step": 3993 + }, + { + "epoch": 0.42130801687763714, + "grad_norm": 0.58786940574646, + "learning_rate": 0.0009468814579093141, + "loss": 1.598, + "step": 3994 + }, + { + "epoch": 0.42141350210970463, + "grad_norm": 0.6400099992752075, + "learning_rate": 0.0009466391920162894, + "loss": 1.5868, + "step": 3995 + }, + { + "epoch": 0.4215189873417722, + "grad_norm": 0.642543375492096, + "learning_rate": 0.0009463969040888624, + "loss": 1.6165, + "step": 3996 + }, + { + "epoch": 0.42162447257383967, + "grad_norm": 0.5663923621177673, + "learning_rate": 0.0009461545941541832, + "loss": 1.6173, + "step": 3997 + }, + { + "epoch": 0.42172995780590716, + "grad_norm": 0.6896780729293823, + "learning_rate": 0.0009459122622394033, + "loss": 1.5959, + "step": 3998 + }, + { + "epoch": 0.4218354430379747, + "grad_norm": 0.558889627456665, + "learning_rate": 0.0009456699083716777, + "loss": 1.6112, + "step": 3999 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.7647099494934082, + "learning_rate": 0.0009454275325781632, + "loss": 1.6048, + "step": 4000 + }, + { + "epoch": 0.4220464135021097, + "grad_norm": 0.8649213314056396, + "learning_rate": 0.0009451851348860191, + "loss": 1.5742, + "step": 4001 + }, + { + "epoch": 0.4221518987341772, + "grad_norm": 0.6012972593307495, + "learning_rate": 0.0009449427153224076, + "loss": 1.5893, + "step": 4002 + }, + { + "epoch": 0.42225738396624474, + "grad_norm": 0.7848843336105347, + "learning_rate": 0.0009447002739144924, + "loss": 1.5659, + "step": 4003 + }, + { + "epoch": 0.42236286919831223, + "grad_norm": 0.7921288013458252, + "learning_rate": 0.0009444578106894408, + "loss": 1.6171, + "step": 4004 + }, + { + "epoch": 0.4224683544303797, + "grad_norm": 0.5722038745880127, + "learning_rate": 0.000944215325674422, + "loss": 1.5888, + "step": 4005 + }, + { + "epoch": 0.42257383966244727, + "grad_norm": 0.7022862434387207, + "learning_rate": 0.0009439728188966074, + "loss": 1.614, + "step": 4006 + }, + { + "epoch": 0.42267932489451476, + "grad_norm": 0.5545481443405151, + "learning_rate": 0.0009437302903831712, + "loss": 1.5949, + "step": 4007 + }, + { + "epoch": 0.42278481012658226, + "grad_norm": 0.6483936905860901, + "learning_rate": 0.0009434877401612898, + "loss": 1.5724, + "step": 4008 + }, + { + "epoch": 0.4228902953586498, + "grad_norm": 0.566540539264679, + "learning_rate": 0.0009432451682581424, + "loss": 1.5792, + "step": 4009 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.6126568913459778, + "learning_rate": 0.0009430025747009104, + "loss": 1.6157, + "step": 4010 + }, + { + "epoch": 0.4231012658227848, + "grad_norm": 0.6315452456474304, + "learning_rate": 0.0009427599595167776, + "loss": 1.5801, + "step": 4011 + }, + { + "epoch": 0.42320675105485234, + "grad_norm": 0.5691980123519897, + "learning_rate": 0.0009425173227329297, + "loss": 1.5561, + "step": 4012 + }, + { + "epoch": 0.42331223628691983, + "grad_norm": 0.719071626663208, + "learning_rate": 0.0009422746643765563, + "loss": 1.5821, + "step": 4013 + }, + { + "epoch": 0.4234177215189873, + "grad_norm": 0.6392236351966858, + "learning_rate": 0.0009420319844748476, + "loss": 1.5976, + "step": 4014 + }, + { + "epoch": 0.42352320675105487, + "grad_norm": 0.5741580724716187, + "learning_rate": 0.0009417892830549978, + "loss": 1.6174, + "step": 4015 + }, + { + "epoch": 0.42362869198312236, + "grad_norm": 0.6772246360778809, + "learning_rate": 0.0009415465601442023, + "loss": 1.6113, + "step": 4016 + }, + { + "epoch": 0.42373417721518986, + "grad_norm": 0.5992785692214966, + "learning_rate": 0.0009413038157696595, + "loss": 1.5869, + "step": 4017 + }, + { + "epoch": 0.4238396624472574, + "grad_norm": 0.610297679901123, + "learning_rate": 0.0009410610499585705, + "loss": 1.6038, + "step": 4018 + }, + { + "epoch": 0.4239451476793249, + "grad_norm": 0.6390622854232788, + "learning_rate": 0.000940818262738138, + "loss": 1.5702, + "step": 4019 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.5501418113708496, + "learning_rate": 0.0009405754541355677, + "loss": 1.6276, + "step": 4020 + }, + { + "epoch": 0.42415611814345994, + "grad_norm": 0.6824932098388672, + "learning_rate": 0.0009403326241780674, + "loss": 1.5652, + "step": 4021 + }, + { + "epoch": 0.42426160337552743, + "grad_norm": 0.5482655167579651, + "learning_rate": 0.0009400897728928475, + "loss": 1.6147, + "step": 4022 + }, + { + "epoch": 0.4243670886075949, + "grad_norm": 0.6044957041740417, + "learning_rate": 0.0009398469003071207, + "loss": 1.5711, + "step": 4023 + }, + { + "epoch": 0.42447257383966247, + "grad_norm": 0.5711612105369568, + "learning_rate": 0.0009396040064481021, + "loss": 1.5834, + "step": 4024 + }, + { + "epoch": 0.42457805907172996, + "grad_norm": 0.5625395774841309, + "learning_rate": 0.000939361091343009, + "loss": 1.6103, + "step": 4025 + }, + { + "epoch": 0.42468354430379746, + "grad_norm": 0.6038628220558167, + "learning_rate": 0.0009391181550190615, + "loss": 1.6063, + "step": 4026 + }, + { + "epoch": 0.424789029535865, + "grad_norm": 0.620922863483429, + "learning_rate": 0.0009388751975034815, + "loss": 1.5649, + "step": 4027 + }, + { + "epoch": 0.4248945147679325, + "grad_norm": 0.5753240585327148, + "learning_rate": 0.0009386322188234941, + "loss": 1.5858, + "step": 4028 + }, + { + "epoch": 0.425, + "grad_norm": 0.5570173859596252, + "learning_rate": 0.0009383892190063256, + "loss": 1.5663, + "step": 4029 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.5652287602424622, + "learning_rate": 0.0009381461980792061, + "loss": 1.5847, + "step": 4030 + }, + { + "epoch": 0.42521097046413503, + "grad_norm": 0.5880299806594849, + "learning_rate": 0.0009379031560693665, + "loss": 1.5823, + "step": 4031 + }, + { + "epoch": 0.4253164556962025, + "grad_norm": 0.6295303702354431, + "learning_rate": 0.0009376600930040417, + "loss": 1.6213, + "step": 4032 + }, + { + "epoch": 0.42542194092827, + "grad_norm": 0.5623308420181274, + "learning_rate": 0.0009374170089104676, + "loss": 1.6093, + "step": 4033 + }, + { + "epoch": 0.42552742616033756, + "grad_norm": 0.6062510013580322, + "learning_rate": 0.000937173903815883, + "loss": 1.5941, + "step": 4034 + }, + { + "epoch": 0.42563291139240506, + "grad_norm": 0.6085534691810608, + "learning_rate": 0.0009369307777475293, + "loss": 1.6008, + "step": 4035 + }, + { + "epoch": 0.42573839662447255, + "grad_norm": 0.5639039278030396, + "learning_rate": 0.0009366876307326496, + "loss": 1.5791, + "step": 4036 + }, + { + "epoch": 0.4258438818565401, + "grad_norm": 0.6636078357696533, + "learning_rate": 0.0009364444627984902, + "loss": 1.6212, + "step": 4037 + }, + { + "epoch": 0.4259493670886076, + "grad_norm": 0.6571029424667358, + "learning_rate": 0.000936201273972299, + "loss": 1.6054, + "step": 4038 + }, + { + "epoch": 0.4260548523206751, + "grad_norm": 0.5491618514060974, + "learning_rate": 0.0009359580642813265, + "loss": 1.5711, + "step": 4039 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5910784602165222, + "learning_rate": 0.0009357148337528256, + "loss": 1.5788, + "step": 4040 + }, + { + "epoch": 0.4262658227848101, + "grad_norm": 0.5544033646583557, + "learning_rate": 0.0009354715824140515, + "loss": 1.592, + "step": 4041 + }, + { + "epoch": 0.4263713080168776, + "grad_norm": 0.6090651154518127, + "learning_rate": 0.0009352283102922619, + "loss": 1.6102, + "step": 4042 + }, + { + "epoch": 0.42647679324894516, + "grad_norm": 0.6470099687576294, + "learning_rate": 0.0009349850174147165, + "loss": 1.5745, + "step": 4043 + }, + { + "epoch": 0.42658227848101266, + "grad_norm": 0.5699438452720642, + "learning_rate": 0.0009347417038086772, + "loss": 1.595, + "step": 4044 + }, + { + "epoch": 0.42668776371308015, + "grad_norm": 0.7046888470649719, + "learning_rate": 0.000934498369501409, + "loss": 1.6391, + "step": 4045 + }, + { + "epoch": 0.4267932489451477, + "grad_norm": 0.6480763554573059, + "learning_rate": 0.0009342550145201786, + "loss": 1.58, + "step": 4046 + }, + { + "epoch": 0.4268987341772152, + "grad_norm": 0.554871141910553, + "learning_rate": 0.0009340116388922551, + "loss": 1.5904, + "step": 4047 + }, + { + "epoch": 0.4270042194092827, + "grad_norm": 0.7970278859138489, + "learning_rate": 0.0009337682426449097, + "loss": 1.6111, + "step": 4048 + }, + { + "epoch": 0.42710970464135023, + "grad_norm": 0.7505037188529968, + "learning_rate": 0.0009335248258054162, + "loss": 1.5806, + "step": 4049 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5453915596008301, + "learning_rate": 0.0009332813884010511, + "loss": 1.6054, + "step": 4050 + }, + { + "epoch": 0.4273206751054852, + "grad_norm": 0.9073684811592102, + "learning_rate": 0.0009330379304590924, + "loss": 1.5792, + "step": 4051 + }, + { + "epoch": 0.42742616033755276, + "grad_norm": 0.6468883752822876, + "learning_rate": 0.000932794452006821, + "loss": 1.6207, + "step": 4052 + }, + { + "epoch": 0.42753164556962026, + "grad_norm": 0.6705367565155029, + "learning_rate": 0.0009325509530715196, + "loss": 1.6032, + "step": 4053 + }, + { + "epoch": 0.42763713080168775, + "grad_norm": 0.7394076585769653, + "learning_rate": 0.0009323074336804738, + "loss": 1.593, + "step": 4054 + }, + { + "epoch": 0.4277426160337553, + "grad_norm": 0.5654972195625305, + "learning_rate": 0.0009320638938609708, + "loss": 1.6102, + "step": 4055 + }, + { + "epoch": 0.4278481012658228, + "grad_norm": 0.6939554810523987, + "learning_rate": 0.0009318203336403008, + "loss": 1.5786, + "step": 4056 + }, + { + "epoch": 0.4279535864978903, + "grad_norm": 0.5967707633972168, + "learning_rate": 0.0009315767530457556, + "loss": 1.5794, + "step": 4057 + }, + { + "epoch": 0.42805907172995783, + "grad_norm": 0.603644847869873, + "learning_rate": 0.0009313331521046299, + "loss": 1.6108, + "step": 4058 + }, + { + "epoch": 0.4281645569620253, + "grad_norm": 0.6520218849182129, + "learning_rate": 0.0009310895308442202, + "loss": 1.6304, + "step": 4059 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.5862717032432556, + "learning_rate": 0.0009308458892918259, + "loss": 1.5826, + "step": 4060 + }, + { + "epoch": 0.42837552742616036, + "grad_norm": 0.5896168947219849, + "learning_rate": 0.0009306022274747478, + "loss": 1.6028, + "step": 4061 + }, + { + "epoch": 0.42848101265822786, + "grad_norm": 0.6970139145851135, + "learning_rate": 0.0009303585454202892, + "loss": 1.5915, + "step": 4062 + }, + { + "epoch": 0.42858649789029535, + "grad_norm": 0.588800847530365, + "learning_rate": 0.0009301148431557565, + "loss": 1.5494, + "step": 4063 + }, + { + "epoch": 0.4286919831223629, + "grad_norm": 1.1261259317398071, + "learning_rate": 0.0009298711207084575, + "loss": 1.5791, + "step": 4064 + }, + { + "epoch": 0.4287974683544304, + "grad_norm": 0.683560311794281, + "learning_rate": 0.0009296273781057026, + "loss": 1.6357, + "step": 4065 + }, + { + "epoch": 0.4289029535864979, + "grad_norm": 0.7918499112129211, + "learning_rate": 0.0009293836153748039, + "loss": 1.5823, + "step": 4066 + }, + { + "epoch": 0.4290084388185654, + "grad_norm": 0.7232117652893066, + "learning_rate": 0.0009291398325430771, + "loss": 1.5844, + "step": 4067 + }, + { + "epoch": 0.4291139240506329, + "grad_norm": 0.6511474251747131, + "learning_rate": 0.0009288960296378386, + "loss": 1.577, + "step": 4068 + }, + { + "epoch": 0.4292194092827004, + "grad_norm": 0.9134923219680786, + "learning_rate": 0.0009286522066864078, + "loss": 1.6482, + "step": 4069 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.6128536462783813, + "learning_rate": 0.0009284083637161064, + "loss": 1.5997, + "step": 4070 + }, + { + "epoch": 0.42943037974683546, + "grad_norm": 0.9948230385780334, + "learning_rate": 0.0009281645007542584, + "loss": 1.63, + "step": 4071 + }, + { + "epoch": 0.42953586497890295, + "grad_norm": 1.1269807815551758, + "learning_rate": 0.0009279206178281895, + "loss": 1.5772, + "step": 4072 + }, + { + "epoch": 0.42964135021097044, + "grad_norm": 0.7649590969085693, + "learning_rate": 0.0009276767149652284, + "loss": 1.5862, + "step": 4073 + }, + { + "epoch": 0.429746835443038, + "grad_norm": 1.2950700521469116, + "learning_rate": 0.0009274327921927054, + "loss": 1.5919, + "step": 4074 + }, + { + "epoch": 0.4298523206751055, + "grad_norm": 0.7025986909866333, + "learning_rate": 0.0009271888495379529, + "loss": 1.6386, + "step": 4075 + }, + { + "epoch": 0.429957805907173, + "grad_norm": 1.2922546863555908, + "learning_rate": 0.0009269448870283067, + "loss": 1.6111, + "step": 4076 + }, + { + "epoch": 0.4300632911392405, + "grad_norm": 0.8742403388023376, + "learning_rate": 0.0009267009046911032, + "loss": 1.6144, + "step": 4077 + }, + { + "epoch": 0.430168776371308, + "grad_norm": 1.4139881134033203, + "learning_rate": 0.0009264569025536825, + "loss": 1.609, + "step": 4078 + }, + { + "epoch": 0.4302742616033755, + "grad_norm": 0.9943150877952576, + "learning_rate": 0.0009262128806433858, + "loss": 1.612, + "step": 4079 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.191596508026123, + "learning_rate": 0.0009259688389875574, + "loss": 1.5773, + "step": 4080 + }, + { + "epoch": 0.43048523206751055, + "grad_norm": 0.9868432879447937, + "learning_rate": 0.000925724777613543, + "loss": 1.6093, + "step": 4081 + }, + { + "epoch": 0.43059071729957804, + "grad_norm": 1.073267936706543, + "learning_rate": 0.0009254806965486909, + "loss": 1.6337, + "step": 4082 + }, + { + "epoch": 0.4306962025316456, + "grad_norm": 0.9391686320304871, + "learning_rate": 0.0009252365958203518, + "loss": 1.6391, + "step": 4083 + }, + { + "epoch": 0.4308016877637131, + "grad_norm": 1.0134258270263672, + "learning_rate": 0.0009249924754558785, + "loss": 1.5666, + "step": 4084 + }, + { + "epoch": 0.4309071729957806, + "grad_norm": 0.8407578468322754, + "learning_rate": 0.0009247483354826255, + "loss": 1.5827, + "step": 4085 + }, + { + "epoch": 0.4310126582278481, + "grad_norm": 0.8855129480361938, + "learning_rate": 0.0009245041759279502, + "loss": 1.6135, + "step": 4086 + }, + { + "epoch": 0.4311181434599156, + "grad_norm": 0.8986127376556396, + "learning_rate": 0.0009242599968192119, + "loss": 1.6127, + "step": 4087 + }, + { + "epoch": 0.4312236286919831, + "grad_norm": 0.8183197379112244, + "learning_rate": 0.000924015798183772, + "loss": 1.5773, + "step": 4088 + }, + { + "epoch": 0.43132911392405066, + "grad_norm": 0.793281614780426, + "learning_rate": 0.0009237715800489942, + "loss": 1.5606, + "step": 4089 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.8551552295684814, + "learning_rate": 0.0009235273424422442, + "loss": 1.5707, + "step": 4090 + }, + { + "epoch": 0.43154008438818564, + "grad_norm": 0.8116403818130493, + "learning_rate": 0.0009232830853908904, + "loss": 1.6297, + "step": 4091 + }, + { + "epoch": 0.4316455696202532, + "grad_norm": 0.8905182480812073, + "learning_rate": 0.0009230388089223028, + "loss": 1.5855, + "step": 4092 + }, + { + "epoch": 0.4317510548523207, + "grad_norm": 0.6792471408843994, + "learning_rate": 0.0009227945130638537, + "loss": 1.5962, + "step": 4093 + }, + { + "epoch": 0.4318565400843882, + "grad_norm": 0.8683027029037476, + "learning_rate": 0.0009225501978429177, + "loss": 1.6088, + "step": 4094 + }, + { + "epoch": 0.4319620253164557, + "grad_norm": 0.6127502918243408, + "learning_rate": 0.0009223058632868719, + "loss": 1.5903, + "step": 4095 + }, + { + "epoch": 0.4320675105485232, + "grad_norm": 0.7234742641448975, + "learning_rate": 0.0009220615094230946, + "loss": 1.6006, + "step": 4096 + }, + { + "epoch": 0.4321729957805907, + "grad_norm": 0.6232853531837463, + "learning_rate": 0.0009218171362789674, + "loss": 1.579, + "step": 4097 + }, + { + "epoch": 0.43227848101265826, + "grad_norm": 0.6687788367271423, + "learning_rate": 0.0009215727438818733, + "loss": 1.6052, + "step": 4098 + }, + { + "epoch": 0.43238396624472575, + "grad_norm": 0.6634796857833862, + "learning_rate": 0.0009213283322591977, + "loss": 1.5479, + "step": 4099 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.7043444514274597, + "learning_rate": 0.0009210839014383282, + "loss": 1.5784, + "step": 4100 + }, + { + "epoch": 0.43259493670886073, + "grad_norm": 0.6505648493766785, + "learning_rate": 0.0009208394514466544, + "loss": 1.5805, + "step": 4101 + }, + { + "epoch": 0.4327004219409283, + "grad_norm": 0.5611894726753235, + "learning_rate": 0.0009205949823115681, + "loss": 1.5628, + "step": 4102 + }, + { + "epoch": 0.4328059071729958, + "grad_norm": 0.5875452756881714, + "learning_rate": 0.0009203504940604634, + "loss": 1.5274, + "step": 4103 + }, + { + "epoch": 0.43291139240506327, + "grad_norm": 0.6007571816444397, + "learning_rate": 0.0009201059867207366, + "loss": 1.581, + "step": 4104 + }, + { + "epoch": 0.4330168776371308, + "grad_norm": 0.637540876865387, + "learning_rate": 0.0009198614603197854, + "loss": 1.5742, + "step": 4105 + }, + { + "epoch": 0.4331223628691983, + "grad_norm": 0.6171098947525024, + "learning_rate": 0.0009196169148850108, + "loss": 1.6037, + "step": 4106 + }, + { + "epoch": 0.4332278481012658, + "grad_norm": 0.5984295606613159, + "learning_rate": 0.000919372350443815, + "loss": 1.6064, + "step": 4107 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.6450161933898926, + "learning_rate": 0.000919127767023603, + "loss": 1.5809, + "step": 4108 + }, + { + "epoch": 0.43343881856540084, + "grad_norm": 0.6046229600906372, + "learning_rate": 0.000918883164651781, + "loss": 1.6141, + "step": 4109 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.7130504846572876, + "learning_rate": 0.0009186385433557584, + "loss": 1.6193, + "step": 4110 + }, + { + "epoch": 0.4336497890295359, + "grad_norm": 0.6037923693656921, + "learning_rate": 0.0009183939031629462, + "loss": 1.5685, + "step": 4111 + }, + { + "epoch": 0.4337552742616034, + "grad_norm": 0.8472753763198853, + "learning_rate": 0.0009181492441007577, + "loss": 1.5597, + "step": 4112 + }, + { + "epoch": 0.43386075949367087, + "grad_norm": 0.5780802965164185, + "learning_rate": 0.0009179045661966075, + "loss": 1.5922, + "step": 4113 + }, + { + "epoch": 0.4339662447257384, + "grad_norm": 0.7124613523483276, + "learning_rate": 0.0009176598694779134, + "loss": 1.5828, + "step": 4114 + }, + { + "epoch": 0.4340717299578059, + "grad_norm": 0.6053289175033569, + "learning_rate": 0.0009174151539720953, + "loss": 1.5943, + "step": 4115 + }, + { + "epoch": 0.4341772151898734, + "grad_norm": 0.7832037210464478, + "learning_rate": 0.0009171704197065741, + "loss": 1.5835, + "step": 4116 + }, + { + "epoch": 0.43428270042194095, + "grad_norm": 0.7033383846282959, + "learning_rate": 0.0009169256667087738, + "loss": 1.5862, + "step": 4117 + }, + { + "epoch": 0.43438818565400844, + "grad_norm": 0.6587814688682556, + "learning_rate": 0.0009166808950061202, + "loss": 1.5727, + "step": 4118 + }, + { + "epoch": 0.43449367088607593, + "grad_norm": 0.8853986859321594, + "learning_rate": 0.0009164361046260412, + "loss": 1.6157, + "step": 4119 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.6400985717773438, + "learning_rate": 0.0009161912955959668, + "loss": 1.6083, + "step": 4120 + }, + { + "epoch": 0.434704641350211, + "grad_norm": 0.7780759334564209, + "learning_rate": 0.0009159464679433289, + "loss": 1.5953, + "step": 4121 + }, + { + "epoch": 0.43481012658227847, + "grad_norm": 0.6221949458122253, + "learning_rate": 0.0009157016216955618, + "loss": 1.5723, + "step": 4122 + }, + { + "epoch": 0.434915611814346, + "grad_norm": 0.7921501994132996, + "learning_rate": 0.0009154567568801019, + "loss": 1.5557, + "step": 4123 + }, + { + "epoch": 0.4350210970464135, + "grad_norm": 0.614788293838501, + "learning_rate": 0.0009152118735243871, + "loss": 1.5902, + "step": 4124 + }, + { + "epoch": 0.435126582278481, + "grad_norm": 0.7824706435203552, + "learning_rate": 0.0009149669716558582, + "loss": 1.5748, + "step": 4125 + }, + { + "epoch": 0.43523206751054855, + "grad_norm": 0.7127097249031067, + "learning_rate": 0.0009147220513019577, + "loss": 1.5835, + "step": 4126 + }, + { + "epoch": 0.43533755274261604, + "grad_norm": 0.5524976253509521, + "learning_rate": 0.0009144771124901295, + "loss": 1.5723, + "step": 4127 + }, + { + "epoch": 0.43544303797468353, + "grad_norm": 0.6054399609565735, + "learning_rate": 0.000914232155247821, + "loss": 1.5827, + "step": 4128 + }, + { + "epoch": 0.4355485232067511, + "grad_norm": 0.6010648608207703, + "learning_rate": 0.0009139871796024807, + "loss": 1.5983, + "step": 4129 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.5622166395187378, + "learning_rate": 0.000913742185581559, + "loss": 1.5949, + "step": 4130 + }, + { + "epoch": 0.43575949367088607, + "grad_norm": 0.5440449118614197, + "learning_rate": 0.0009134971732125088, + "loss": 1.6007, + "step": 4131 + }, + { + "epoch": 0.43586497890295356, + "grad_norm": 0.541351854801178, + "learning_rate": 0.0009132521425227852, + "loss": 1.5721, + "step": 4132 + }, + { + "epoch": 0.4359704641350211, + "grad_norm": 0.6248423457145691, + "learning_rate": 0.0009130070935398451, + "loss": 1.6097, + "step": 4133 + }, + { + "epoch": 0.4360759493670886, + "grad_norm": 0.5391668081283569, + "learning_rate": 0.0009127620262911473, + "loss": 1.5988, + "step": 4134 + }, + { + "epoch": 0.4361814345991561, + "grad_norm": 0.5672041177749634, + "learning_rate": 0.0009125169408041526, + "loss": 1.5538, + "step": 4135 + }, + { + "epoch": 0.43628691983122364, + "grad_norm": 0.5605277419090271, + "learning_rate": 0.0009122718371063247, + "loss": 1.5674, + "step": 4136 + }, + { + "epoch": 0.43639240506329113, + "grad_norm": 0.5580936670303345, + "learning_rate": 0.0009120267152251281, + "loss": 1.5674, + "step": 4137 + }, + { + "epoch": 0.4364978902953586, + "grad_norm": 0.6100931763648987, + "learning_rate": 0.0009117815751880301, + "loss": 1.59, + "step": 4138 + }, + { + "epoch": 0.4366033755274262, + "grad_norm": 0.5161224007606506, + "learning_rate": 0.0009115364170225, + "loss": 1.5911, + "step": 4139 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.64018315076828, + "learning_rate": 0.0009112912407560086, + "loss": 1.5749, + "step": 4140 + }, + { + "epoch": 0.43681434599156116, + "grad_norm": 0.625670850276947, + "learning_rate": 0.0009110460464160295, + "loss": 1.5694, + "step": 4141 + }, + { + "epoch": 0.4369198312236287, + "grad_norm": 0.5999443531036377, + "learning_rate": 0.000910800834030038, + "loss": 1.5939, + "step": 4142 + }, + { + "epoch": 0.4370253164556962, + "grad_norm": 0.5914725065231323, + "learning_rate": 0.0009105556036255113, + "loss": 1.5985, + "step": 4143 + }, + { + "epoch": 0.4371308016877637, + "grad_norm": 0.6395543813705444, + "learning_rate": 0.0009103103552299283, + "loss": 1.6052, + "step": 4144 + }, + { + "epoch": 0.43723628691983124, + "grad_norm": 0.611953854560852, + "learning_rate": 0.0009100650888707709, + "loss": 1.5723, + "step": 4145 + }, + { + "epoch": 0.43734177215189873, + "grad_norm": 0.7277881503105164, + "learning_rate": 0.000909819804575522, + "loss": 1.5788, + "step": 4146 + }, + { + "epoch": 0.4374472573839662, + "grad_norm": 0.6323719620704651, + "learning_rate": 0.0009095745023716671, + "loss": 1.5964, + "step": 4147 + }, + { + "epoch": 0.4375527426160338, + "grad_norm": 0.8269078731536865, + "learning_rate": 0.0009093291822866933, + "loss": 1.6361, + "step": 4148 + }, + { + "epoch": 0.43765822784810127, + "grad_norm": 0.6136718988418579, + "learning_rate": 0.0009090838443480903, + "loss": 1.5851, + "step": 4149 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.6451979875564575, + "learning_rate": 0.0009088384885833495, + "loss": 1.592, + "step": 4150 + }, + { + "epoch": 0.4378691983122363, + "grad_norm": 0.585662841796875, + "learning_rate": 0.0009085931150199638, + "loss": 1.5865, + "step": 4151 + }, + { + "epoch": 0.4379746835443038, + "grad_norm": 0.7001010179519653, + "learning_rate": 0.0009083477236854287, + "loss": 1.6116, + "step": 4152 + }, + { + "epoch": 0.4380801687763713, + "grad_norm": 0.6091241240501404, + "learning_rate": 0.0009081023146072414, + "loss": 1.599, + "step": 4153 + }, + { + "epoch": 0.43818565400843884, + "grad_norm": 0.8017532825469971, + "learning_rate": 0.0009078568878129018, + "loss": 1.5477, + "step": 4154 + }, + { + "epoch": 0.43829113924050633, + "grad_norm": 0.6571472883224487, + "learning_rate": 0.0009076114433299107, + "loss": 1.594, + "step": 4155 + }, + { + "epoch": 0.4383966244725738, + "grad_norm": 0.6361532211303711, + "learning_rate": 0.0009073659811857712, + "loss": 1.5923, + "step": 4156 + }, + { + "epoch": 0.4385021097046414, + "grad_norm": 0.7136302590370178, + "learning_rate": 0.0009071205014079888, + "loss": 1.6056, + "step": 4157 + }, + { + "epoch": 0.43860759493670887, + "grad_norm": 0.5241080522537231, + "learning_rate": 0.0009068750040240709, + "loss": 1.5579, + "step": 4158 + }, + { + "epoch": 0.43871308016877636, + "grad_norm": 0.7022104859352112, + "learning_rate": 0.0009066294890615266, + "loss": 1.5778, + "step": 4159 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.6038747429847717, + "learning_rate": 0.000906383956547867, + "loss": 1.5679, + "step": 4160 + }, + { + "epoch": 0.4389240506329114, + "grad_norm": 0.7302247881889343, + "learning_rate": 0.0009061384065106051, + "loss": 1.5767, + "step": 4161 + }, + { + "epoch": 0.4390295358649789, + "grad_norm": 0.6397809982299805, + "learning_rate": 0.0009058928389772564, + "loss": 1.6057, + "step": 4162 + }, + { + "epoch": 0.43913502109704644, + "grad_norm": 0.5394572615623474, + "learning_rate": 0.0009056472539753377, + "loss": 1.5898, + "step": 4163 + }, + { + "epoch": 0.43924050632911393, + "grad_norm": 0.7142806053161621, + "learning_rate": 0.0009054016515323679, + "loss": 1.5502, + "step": 4164 + }, + { + "epoch": 0.4393459915611814, + "grad_norm": 0.5853133797645569, + "learning_rate": 0.0009051560316758684, + "loss": 1.5815, + "step": 4165 + }, + { + "epoch": 0.4394514767932489, + "grad_norm": 0.5964877009391785, + "learning_rate": 0.0009049103944333616, + "loss": 1.5818, + "step": 4166 + }, + { + "epoch": 0.43955696202531647, + "grad_norm": 0.6116570234298706, + "learning_rate": 0.0009046647398323728, + "loss": 1.571, + "step": 4167 + }, + { + "epoch": 0.43966244725738396, + "grad_norm": 0.6299305558204651, + "learning_rate": 0.0009044190679004286, + "loss": 1.5928, + "step": 4168 + }, + { + "epoch": 0.43976793248945145, + "grad_norm": 0.5766546726226807, + "learning_rate": 0.0009041733786650578, + "loss": 1.5892, + "step": 4169 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.7012413144111633, + "learning_rate": 0.0009039276721537915, + "loss": 1.6143, + "step": 4170 + }, + { + "epoch": 0.4399789029535865, + "grad_norm": 0.5839663147926331, + "learning_rate": 0.0009036819483941614, + "loss": 1.5848, + "step": 4171 + }, + { + "epoch": 0.440084388185654, + "grad_norm": 0.5727782845497131, + "learning_rate": 0.0009034362074137032, + "loss": 1.579, + "step": 4172 + }, + { + "epoch": 0.44018987341772153, + "grad_norm": 0.5775489807128906, + "learning_rate": 0.0009031904492399526, + "loss": 1.6131, + "step": 4173 + }, + { + "epoch": 0.440295358649789, + "grad_norm": 0.5393126606941223, + "learning_rate": 0.0009029446739004483, + "loss": 1.5952, + "step": 4174 + }, + { + "epoch": 0.4404008438818565, + "grad_norm": 0.576653003692627, + "learning_rate": 0.0009026988814227308, + "loss": 1.5595, + "step": 4175 + }, + { + "epoch": 0.44050632911392407, + "grad_norm": 0.5337774157524109, + "learning_rate": 0.0009024530718343418, + "loss": 1.559, + "step": 4176 + }, + { + "epoch": 0.44061181434599156, + "grad_norm": 0.560697615146637, + "learning_rate": 0.0009022072451628263, + "loss": 1.5974, + "step": 4177 + }, + { + "epoch": 0.44071729957805905, + "grad_norm": 0.5846347808837891, + "learning_rate": 0.0009019614014357298, + "loss": 1.5583, + "step": 4178 + }, + { + "epoch": 0.4408227848101266, + "grad_norm": 0.5272262096405029, + "learning_rate": 0.0009017155406806006, + "loss": 1.6007, + "step": 4179 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5586947798728943, + "learning_rate": 0.0009014696629249886, + "loss": 1.5689, + "step": 4180 + }, + { + "epoch": 0.4410337552742616, + "grad_norm": 0.5558472275733948, + "learning_rate": 0.0009012237681964454, + "loss": 1.5971, + "step": 4181 + }, + { + "epoch": 0.44113924050632913, + "grad_norm": 0.5208272337913513, + "learning_rate": 0.0009009778565225251, + "loss": 1.5896, + "step": 4182 + }, + { + "epoch": 0.4412447257383966, + "grad_norm": 0.6288925409317017, + "learning_rate": 0.000900731927930783, + "loss": 1.5623, + "step": 4183 + }, + { + "epoch": 0.4413502109704641, + "grad_norm": 0.6049575209617615, + "learning_rate": 0.0009004859824487769, + "loss": 1.582, + "step": 4184 + }, + { + "epoch": 0.44145569620253167, + "grad_norm": 0.5699400305747986, + "learning_rate": 0.0009002400201040659, + "loss": 1.5769, + "step": 4185 + }, + { + "epoch": 0.44156118143459916, + "grad_norm": 0.5674746036529541, + "learning_rate": 0.0008999940409242115, + "loss": 1.5732, + "step": 4186 + }, + { + "epoch": 0.44166666666666665, + "grad_norm": 0.5540896654129028, + "learning_rate": 0.0008997480449367771, + "loss": 1.6034, + "step": 4187 + }, + { + "epoch": 0.4417721518987342, + "grad_norm": 0.5537983179092407, + "learning_rate": 0.0008995020321693274, + "loss": 1.5992, + "step": 4188 + }, + { + "epoch": 0.4418776371308017, + "grad_norm": 0.5304204821586609, + "learning_rate": 0.0008992560026494294, + "loss": 1.5843, + "step": 4189 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.5436570048332214, + "learning_rate": 0.0008990099564046522, + "loss": 1.5977, + "step": 4190 + }, + { + "epoch": 0.44208860759493673, + "grad_norm": 0.5109390020370483, + "learning_rate": 0.0008987638934625662, + "loss": 1.5728, + "step": 4191 + }, + { + "epoch": 0.4421940928270042, + "grad_norm": 0.5588911175727844, + "learning_rate": 0.0008985178138507441, + "loss": 1.6007, + "step": 4192 + }, + { + "epoch": 0.4422995780590717, + "grad_norm": 0.5203474760055542, + "learning_rate": 0.0008982717175967606, + "loss": 1.5648, + "step": 4193 + }, + { + "epoch": 0.44240506329113927, + "grad_norm": 0.577958881855011, + "learning_rate": 0.0008980256047281919, + "loss": 1.6279, + "step": 4194 + }, + { + "epoch": 0.44251054852320676, + "grad_norm": 0.5395603775978088, + "learning_rate": 0.0008977794752726159, + "loss": 1.5771, + "step": 4195 + }, + { + "epoch": 0.44261603375527425, + "grad_norm": 0.5601981282234192, + "learning_rate": 0.0008975333292576125, + "loss": 1.5546, + "step": 4196 + }, + { + "epoch": 0.44272151898734174, + "grad_norm": 0.5958512425422668, + "learning_rate": 0.0008972871667107643, + "loss": 1.5895, + "step": 4197 + }, + { + "epoch": 0.4428270042194093, + "grad_norm": 0.6058614253997803, + "learning_rate": 0.0008970409876596545, + "loss": 1.5938, + "step": 4198 + }, + { + "epoch": 0.4429324894514768, + "grad_norm": 0.5429709553718567, + "learning_rate": 0.0008967947921318689, + "loss": 1.5553, + "step": 4199 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.6588892936706543, + "learning_rate": 0.0008965485801549946, + "loss": 1.6278, + "step": 4200 + }, + { + "epoch": 0.4431434599156118, + "grad_norm": 0.5498107075691223, + "learning_rate": 0.0008963023517566213, + "loss": 1.582, + "step": 4201 + }, + { + "epoch": 0.4432489451476793, + "grad_norm": 0.5886976718902588, + "learning_rate": 0.0008960561069643402, + "loss": 1.5863, + "step": 4202 + }, + { + "epoch": 0.4433544303797468, + "grad_norm": 0.6367396712303162, + "learning_rate": 0.0008958098458057436, + "loss": 1.5599, + "step": 4203 + }, + { + "epoch": 0.44345991561181436, + "grad_norm": 0.5472689270973206, + "learning_rate": 0.000895563568308427, + "loss": 1.5889, + "step": 4204 + }, + { + "epoch": 0.44356540084388185, + "grad_norm": 0.6590432524681091, + "learning_rate": 0.0008953172744999865, + "loss": 1.5549, + "step": 4205 + }, + { + "epoch": 0.44367088607594934, + "grad_norm": 0.6009443402290344, + "learning_rate": 0.000895070964408021, + "loss": 1.6043, + "step": 4206 + }, + { + "epoch": 0.4437763713080169, + "grad_norm": 0.6718936562538147, + "learning_rate": 0.0008948246380601303, + "loss": 1.5582, + "step": 4207 + }, + { + "epoch": 0.4438818565400844, + "grad_norm": 0.6650192141532898, + "learning_rate": 0.000894578295483917, + "loss": 1.5748, + "step": 4208 + }, + { + "epoch": 0.4439873417721519, + "grad_norm": 0.556402862071991, + "learning_rate": 0.0008943319367069844, + "loss": 1.5782, + "step": 4209 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.7555446624755859, + "learning_rate": 0.000894085561756939, + "loss": 1.5898, + "step": 4210 + }, + { + "epoch": 0.4441983122362869, + "grad_norm": 0.5353924036026001, + "learning_rate": 0.0008938391706613878, + "loss": 1.6025, + "step": 4211 + }, + { + "epoch": 0.4443037974683544, + "grad_norm": 0.8154770731925964, + "learning_rate": 0.0008935927634479403, + "loss": 1.5704, + "step": 4212 + }, + { + "epoch": 0.44440928270042196, + "grad_norm": 0.6212957501411438, + "learning_rate": 0.0008933463401442073, + "loss": 1.6006, + "step": 4213 + }, + { + "epoch": 0.44451476793248945, + "grad_norm": 0.7340143322944641, + "learning_rate": 0.0008930999007778025, + "loss": 1.5706, + "step": 4214 + }, + { + "epoch": 0.44462025316455694, + "grad_norm": 0.8552604913711548, + "learning_rate": 0.0008928534453763402, + "loss": 1.5748, + "step": 4215 + }, + { + "epoch": 0.4447257383966245, + "grad_norm": 0.5907376408576965, + "learning_rate": 0.0008926069739674369, + "loss": 1.5875, + "step": 4216 + }, + { + "epoch": 0.444831223628692, + "grad_norm": 0.8011342883110046, + "learning_rate": 0.000892360486578711, + "loss": 1.5877, + "step": 4217 + }, + { + "epoch": 0.4449367088607595, + "grad_norm": 0.6029345989227295, + "learning_rate": 0.0008921139832377829, + "loss": 1.5569, + "step": 4218 + }, + { + "epoch": 0.445042194092827, + "grad_norm": 0.6623184680938721, + "learning_rate": 0.0008918674639722742, + "loss": 1.6037, + "step": 4219 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.640830934047699, + "learning_rate": 0.0008916209288098088, + "loss": 1.6024, + "step": 4220 + }, + { + "epoch": 0.445253164556962, + "grad_norm": 0.6175007820129395, + "learning_rate": 0.0008913743777780122, + "loss": 1.5829, + "step": 4221 + }, + { + "epoch": 0.44535864978902956, + "grad_norm": 0.6872929334640503, + "learning_rate": 0.0008911278109045114, + "loss": 1.596, + "step": 4222 + }, + { + "epoch": 0.44546413502109705, + "grad_norm": 0.6376515030860901, + "learning_rate": 0.0008908812282169359, + "loss": 1.6058, + "step": 4223 + }, + { + "epoch": 0.44556962025316454, + "grad_norm": 0.7865333557128906, + "learning_rate": 0.0008906346297429161, + "loss": 1.5654, + "step": 4224 + }, + { + "epoch": 0.4456751054852321, + "grad_norm": 0.6638888716697693, + "learning_rate": 0.000890388015510085, + "loss": 1.58, + "step": 4225 + }, + { + "epoch": 0.4457805907172996, + "grad_norm": 0.7263659238815308, + "learning_rate": 0.0008901413855460764, + "loss": 1.5777, + "step": 4226 + }, + { + "epoch": 0.4458860759493671, + "grad_norm": 0.7034520506858826, + "learning_rate": 0.0008898947398785271, + "loss": 1.5766, + "step": 4227 + }, + { + "epoch": 0.4459915611814346, + "grad_norm": 0.6204469203948975, + "learning_rate": 0.0008896480785350743, + "loss": 1.5933, + "step": 4228 + }, + { + "epoch": 0.4460970464135021, + "grad_norm": 0.8005537390708923, + "learning_rate": 0.0008894014015433582, + "loss": 1.5759, + "step": 4229 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.7275680303573608, + "learning_rate": 0.0008891547089310198, + "loss": 1.5774, + "step": 4230 + }, + { + "epoch": 0.4463080168776371, + "grad_norm": 0.5510661005973816, + "learning_rate": 0.0008889080007257024, + "loss": 1.6313, + "step": 4231 + }, + { + "epoch": 0.44641350210970465, + "grad_norm": 0.6357248425483704, + "learning_rate": 0.0008886612769550508, + "loss": 1.5847, + "step": 4232 + }, + { + "epoch": 0.44651898734177214, + "grad_norm": 0.564935564994812, + "learning_rate": 0.0008884145376467119, + "loss": 1.5776, + "step": 4233 + }, + { + "epoch": 0.44662447257383964, + "grad_norm": 0.6847401261329651, + "learning_rate": 0.0008881677828283337, + "loss": 1.6137, + "step": 4234 + }, + { + "epoch": 0.4467299578059072, + "grad_norm": 0.5894411206245422, + "learning_rate": 0.0008879210125275664, + "loss": 1.5777, + "step": 4235 + }, + { + "epoch": 0.4468354430379747, + "grad_norm": 0.6606022119522095, + "learning_rate": 0.000887674226772062, + "loss": 1.5551, + "step": 4236 + }, + { + "epoch": 0.44694092827004217, + "grad_norm": 0.6091481447219849, + "learning_rate": 0.000887427425589474, + "loss": 1.591, + "step": 4237 + }, + { + "epoch": 0.4470464135021097, + "grad_norm": 0.5412756204605103, + "learning_rate": 0.0008871806090074577, + "loss": 1.5381, + "step": 4238 + }, + { + "epoch": 0.4471518987341772, + "grad_norm": 0.5993338823318481, + "learning_rate": 0.0008869337770536699, + "loss": 1.5638, + "step": 4239 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.5521502494812012, + "learning_rate": 0.0008866869297557699, + "loss": 1.5616, + "step": 4240 + }, + { + "epoch": 0.44736286919831225, + "grad_norm": 0.5564886331558228, + "learning_rate": 0.0008864400671414177, + "loss": 1.5605, + "step": 4241 + }, + { + "epoch": 0.44746835443037974, + "grad_norm": 0.5529130101203918, + "learning_rate": 0.0008861931892382756, + "loss": 1.586, + "step": 4242 + }, + { + "epoch": 0.44757383966244724, + "grad_norm": 0.6170299649238586, + "learning_rate": 0.0008859462960740076, + "loss": 1.5905, + "step": 4243 + }, + { + "epoch": 0.4476793248945148, + "grad_norm": 0.6203410625457764, + "learning_rate": 0.000885699387676279, + "loss": 1.5942, + "step": 4244 + }, + { + "epoch": 0.4477848101265823, + "grad_norm": 0.5936846733093262, + "learning_rate": 0.0008854524640727575, + "loss": 1.6177, + "step": 4245 + }, + { + "epoch": 0.44789029535864977, + "grad_norm": 0.7406560778617859, + "learning_rate": 0.0008852055252911121, + "loss": 1.5523, + "step": 4246 + }, + { + "epoch": 0.4479957805907173, + "grad_norm": 0.6202894449234009, + "learning_rate": 0.0008849585713590134, + "loss": 1.5318, + "step": 4247 + }, + { + "epoch": 0.4481012658227848, + "grad_norm": 0.7431984543800354, + "learning_rate": 0.0008847116023041336, + "loss": 1.598, + "step": 4248 + }, + { + "epoch": 0.4482067510548523, + "grad_norm": 0.7054474353790283, + "learning_rate": 0.0008844646181541472, + "loss": 1.6043, + "step": 4249 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.648837685585022, + "learning_rate": 0.0008842176189367299, + "loss": 1.5953, + "step": 4250 + }, + { + "epoch": 0.44841772151898734, + "grad_norm": 0.6452342867851257, + "learning_rate": 0.000883970604679559, + "loss": 1.5778, + "step": 4251 + }, + { + "epoch": 0.44852320675105484, + "grad_norm": 0.6656894087791443, + "learning_rate": 0.0008837235754103136, + "loss": 1.6068, + "step": 4252 + }, + { + "epoch": 0.4486286919831224, + "grad_norm": 0.589617133140564, + "learning_rate": 0.000883476531156675, + "loss": 1.5792, + "step": 4253 + }, + { + "epoch": 0.4487341772151899, + "grad_norm": 0.5877411365509033, + "learning_rate": 0.0008832294719463256, + "loss": 1.579, + "step": 4254 + }, + { + "epoch": 0.44883966244725737, + "grad_norm": 0.6278152465820312, + "learning_rate": 0.0008829823978069494, + "loss": 1.6203, + "step": 4255 + }, + { + "epoch": 0.4489451476793249, + "grad_norm": 0.5558190941810608, + "learning_rate": 0.0008827353087662326, + "loss": 1.6169, + "step": 4256 + }, + { + "epoch": 0.4490506329113924, + "grad_norm": 0.6493220925331116, + "learning_rate": 0.0008824882048518622, + "loss": 1.5509, + "step": 4257 + }, + { + "epoch": 0.4491561181434599, + "grad_norm": 0.5860915184020996, + "learning_rate": 0.0008822410860915281, + "loss": 1.5831, + "step": 4258 + }, + { + "epoch": 0.44926160337552745, + "grad_norm": 0.6314749121665955, + "learning_rate": 0.0008819939525129207, + "loss": 1.6085, + "step": 4259 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.8197622895240784, + "learning_rate": 0.0008817468041437329, + "loss": 1.5682, + "step": 4260 + }, + { + "epoch": 0.44947257383966244, + "grad_norm": 0.6343901753425598, + "learning_rate": 0.0008814996410116587, + "loss": 1.6037, + "step": 4261 + }, + { + "epoch": 0.44957805907173, + "grad_norm": 0.6957730054855347, + "learning_rate": 0.0008812524631443938, + "loss": 1.5981, + "step": 4262 + }, + { + "epoch": 0.4496835443037975, + "grad_norm": 0.5860140919685364, + "learning_rate": 0.0008810052705696363, + "loss": 1.5469, + "step": 4263 + }, + { + "epoch": 0.44978902953586497, + "grad_norm": 0.6527582406997681, + "learning_rate": 0.0008807580633150848, + "loss": 1.6105, + "step": 4264 + }, + { + "epoch": 0.44989451476793246, + "grad_norm": 0.6386501789093018, + "learning_rate": 0.0008805108414084401, + "loss": 1.574, + "step": 4265 + }, + { + "epoch": 0.45, + "grad_norm": 0.6115043759346008, + "learning_rate": 0.0008802636048774052, + "loss": 1.5832, + "step": 4266 + }, + { + "epoch": 0.4501054852320675, + "grad_norm": 0.6305690407752991, + "learning_rate": 0.0008800163537496837, + "loss": 1.5387, + "step": 4267 + }, + { + "epoch": 0.450210970464135, + "grad_norm": 0.6135138869285583, + "learning_rate": 0.0008797690880529813, + "loss": 1.5664, + "step": 4268 + }, + { + "epoch": 0.45031645569620254, + "grad_norm": 0.6378039717674255, + "learning_rate": 0.0008795218078150056, + "loss": 1.5614, + "step": 4269 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.6172854900360107, + "learning_rate": 0.0008792745130634654, + "loss": 1.6056, + "step": 4270 + }, + { + "epoch": 0.45052742616033753, + "grad_norm": 0.6130523085594177, + "learning_rate": 0.0008790272038260715, + "loss": 1.5502, + "step": 4271 + }, + { + "epoch": 0.4506329113924051, + "grad_norm": 0.5178954601287842, + "learning_rate": 0.000878779880130536, + "loss": 1.5806, + "step": 4272 + }, + { + "epoch": 0.45073839662447257, + "grad_norm": 0.6286683678627014, + "learning_rate": 0.0008785325420045727, + "loss": 1.5763, + "step": 4273 + }, + { + "epoch": 0.45084388185654006, + "grad_norm": 0.5841578841209412, + "learning_rate": 0.0008782851894758971, + "loss": 1.573, + "step": 4274 + }, + { + "epoch": 0.4509493670886076, + "grad_norm": 0.6160075068473816, + "learning_rate": 0.0008780378225722264, + "loss": 1.5595, + "step": 4275 + }, + { + "epoch": 0.4510548523206751, + "grad_norm": 0.6251958012580872, + "learning_rate": 0.0008777904413212794, + "loss": 1.5553, + "step": 4276 + }, + { + "epoch": 0.4511603375527426, + "grad_norm": 0.6704482436180115, + "learning_rate": 0.0008775430457507759, + "loss": 1.603, + "step": 4277 + }, + { + "epoch": 0.45126582278481014, + "grad_norm": 0.66844642162323, + "learning_rate": 0.0008772956358884383, + "loss": 1.5567, + "step": 4278 + }, + { + "epoch": 0.45137130801687764, + "grad_norm": 0.9302899837493896, + "learning_rate": 0.0008770482117619901, + "loss": 1.5683, + "step": 4279 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.6782798171043396, + "learning_rate": 0.0008768007733991561, + "loss": 1.6161, + "step": 4280 + }, + { + "epoch": 0.4515822784810127, + "grad_norm": 0.6162168383598328, + "learning_rate": 0.0008765533208276632, + "loss": 1.5739, + "step": 4281 + }, + { + "epoch": 0.45168776371308017, + "grad_norm": 0.6017752289772034, + "learning_rate": 0.0008763058540752396, + "loss": 1.623, + "step": 4282 + }, + { + "epoch": 0.45179324894514766, + "grad_norm": 0.6996591687202454, + "learning_rate": 0.0008760583731696151, + "loss": 1.5967, + "step": 4283 + }, + { + "epoch": 0.4518987341772152, + "grad_norm": 0.7665427923202515, + "learning_rate": 0.0008758108781385216, + "loss": 1.5926, + "step": 4284 + }, + { + "epoch": 0.4520042194092827, + "grad_norm": 0.656080424785614, + "learning_rate": 0.0008755633690096918, + "loss": 1.5671, + "step": 4285 + }, + { + "epoch": 0.4521097046413502, + "grad_norm": 0.7716072201728821, + "learning_rate": 0.0008753158458108604, + "loss": 1.5571, + "step": 4286 + }, + { + "epoch": 0.45221518987341774, + "grad_norm": 0.6328102350234985, + "learning_rate": 0.0008750683085697632, + "loss": 1.5572, + "step": 4287 + }, + { + "epoch": 0.45232067510548524, + "grad_norm": 0.7752549648284912, + "learning_rate": 0.0008748207573141388, + "loss": 1.6102, + "step": 4288 + }, + { + "epoch": 0.45242616033755273, + "grad_norm": 0.8293344378471375, + "learning_rate": 0.000874573192071726, + "loss": 1.6144, + "step": 4289 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.6791508793830872, + "learning_rate": 0.0008743256128702658, + "loss": 1.5671, + "step": 4290 + }, + { + "epoch": 0.45263713080168777, + "grad_norm": 0.7007976770401001, + "learning_rate": 0.0008740780197375007, + "loss": 1.5643, + "step": 4291 + }, + { + "epoch": 0.45274261603375526, + "grad_norm": 0.6219468116760254, + "learning_rate": 0.000873830412701175, + "loss": 1.5852, + "step": 4292 + }, + { + "epoch": 0.4528481012658228, + "grad_norm": 0.7938535213470459, + "learning_rate": 0.0008735827917890339, + "loss": 1.6137, + "step": 4293 + }, + { + "epoch": 0.4529535864978903, + "grad_norm": 0.5270271301269531, + "learning_rate": 0.000873335157028825, + "loss": 1.5871, + "step": 4294 + }, + { + "epoch": 0.4530590717299578, + "grad_norm": 0.7542501091957092, + "learning_rate": 0.0008730875084482964, + "loss": 1.5798, + "step": 4295 + }, + { + "epoch": 0.4531645569620253, + "grad_norm": 0.5696561932563782, + "learning_rate": 0.0008728398460751989, + "loss": 1.593, + "step": 4296 + }, + { + "epoch": 0.45327004219409284, + "grad_norm": 0.9864806532859802, + "learning_rate": 0.0008725921699372839, + "loss": 1.565, + "step": 4297 + }, + { + "epoch": 0.45337552742616033, + "grad_norm": 0.6356723308563232, + "learning_rate": 0.0008723444800623053, + "loss": 1.5785, + "step": 4298 + }, + { + "epoch": 0.4534810126582278, + "grad_norm": 0.8893103003501892, + "learning_rate": 0.0008720967764780173, + "loss": 1.577, + "step": 4299 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.869924008846283, + "learning_rate": 0.0008718490592121768, + "loss": 1.5755, + "step": 4300 + }, + { + "epoch": 0.45369198312236286, + "grad_norm": 0.6671549081802368, + "learning_rate": 0.0008716013282925418, + "loss": 1.6235, + "step": 4301 + }, + { + "epoch": 0.45379746835443036, + "grad_norm": 0.9189384579658508, + "learning_rate": 0.0008713535837468714, + "loss": 1.5747, + "step": 4302 + }, + { + "epoch": 0.4539029535864979, + "grad_norm": 0.5749473571777344, + "learning_rate": 0.0008711058256029269, + "loss": 1.5869, + "step": 4303 + }, + { + "epoch": 0.4540084388185654, + "grad_norm": 0.7204334735870361, + "learning_rate": 0.0008708580538884707, + "loss": 1.5795, + "step": 4304 + }, + { + "epoch": 0.4541139240506329, + "grad_norm": 0.6175983548164368, + "learning_rate": 0.0008706102686312668, + "loss": 1.5362, + "step": 4305 + }, + { + "epoch": 0.45421940928270044, + "grad_norm": 0.653944194316864, + "learning_rate": 0.0008703624698590811, + "loss": 1.6025, + "step": 4306 + }, + { + "epoch": 0.45432489451476793, + "grad_norm": 0.5908018350601196, + "learning_rate": 0.0008701146575996804, + "loss": 1.6083, + "step": 4307 + }, + { + "epoch": 0.4544303797468354, + "grad_norm": 0.6005956530570984, + "learning_rate": 0.0008698668318808334, + "loss": 1.5776, + "step": 4308 + }, + { + "epoch": 0.45453586497890297, + "grad_norm": 0.5847792029380798, + "learning_rate": 0.0008696189927303101, + "loss": 1.579, + "step": 4309 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5519574284553528, + "learning_rate": 0.0008693711401758822, + "loss": 1.5422, + "step": 4310 + }, + { + "epoch": 0.45474683544303796, + "grad_norm": 0.6521214842796326, + "learning_rate": 0.0008691232742453229, + "loss": 1.5875, + "step": 4311 + }, + { + "epoch": 0.4548523206751055, + "grad_norm": 0.6276583671569824, + "learning_rate": 0.0008688753949664067, + "loss": 1.5652, + "step": 4312 + }, + { + "epoch": 0.454957805907173, + "grad_norm": 0.5452226400375366, + "learning_rate": 0.0008686275023669096, + "loss": 1.5815, + "step": 4313 + }, + { + "epoch": 0.4550632911392405, + "grad_norm": 0.638420045375824, + "learning_rate": 0.0008683795964746094, + "loss": 1.5477, + "step": 4314 + }, + { + "epoch": 0.45516877637130804, + "grad_norm": 0.6257568597793579, + "learning_rate": 0.0008681316773172852, + "loss": 1.5694, + "step": 4315 + }, + { + "epoch": 0.45527426160337553, + "grad_norm": 0.5781779289245605, + "learning_rate": 0.0008678837449227174, + "loss": 1.5532, + "step": 4316 + }, + { + "epoch": 0.455379746835443, + "grad_norm": 0.6154587268829346, + "learning_rate": 0.0008676357993186882, + "loss": 1.5899, + "step": 4317 + }, + { + "epoch": 0.45548523206751057, + "grad_norm": 0.5711413025856018, + "learning_rate": 0.000867387840532981, + "loss": 1.5626, + "step": 4318 + }, + { + "epoch": 0.45559071729957806, + "grad_norm": 0.5577327013015747, + "learning_rate": 0.0008671398685933811, + "loss": 1.5508, + "step": 4319 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.6604328751564026, + "learning_rate": 0.0008668918835276747, + "loss": 1.6225, + "step": 4320 + }, + { + "epoch": 0.4558016877637131, + "grad_norm": 0.561154842376709, + "learning_rate": 0.0008666438853636499, + "loss": 1.5523, + "step": 4321 + }, + { + "epoch": 0.4559071729957806, + "grad_norm": 0.6220701932907104, + "learning_rate": 0.0008663958741290961, + "loss": 1.5589, + "step": 4322 + }, + { + "epoch": 0.4560126582278481, + "grad_norm": 0.5757575631141663, + "learning_rate": 0.0008661478498518042, + "loss": 1.6349, + "step": 4323 + }, + { + "epoch": 0.45611814345991564, + "grad_norm": 0.570609450340271, + "learning_rate": 0.0008658998125595666, + "loss": 1.5589, + "step": 4324 + }, + { + "epoch": 0.45622362869198313, + "grad_norm": 0.5773056149482727, + "learning_rate": 0.0008656517622801771, + "loss": 1.5998, + "step": 4325 + }, + { + "epoch": 0.4563291139240506, + "grad_norm": 0.5200475454330444, + "learning_rate": 0.0008654036990414308, + "loss": 1.557, + "step": 4326 + }, + { + "epoch": 0.45643459915611817, + "grad_norm": 0.6318970918655396, + "learning_rate": 0.0008651556228711247, + "loss": 1.6186, + "step": 4327 + }, + { + "epoch": 0.45654008438818566, + "grad_norm": 0.5935789942741394, + "learning_rate": 0.0008649075337970567, + "loss": 1.5956, + "step": 4328 + }, + { + "epoch": 0.45664556962025316, + "grad_norm": 0.6583527326583862, + "learning_rate": 0.0008646594318470268, + "loss": 1.5975, + "step": 4329 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.5734765529632568, + "learning_rate": 0.0008644113170488355, + "loss": 1.564, + "step": 4330 + }, + { + "epoch": 0.4568565400843882, + "grad_norm": 0.5726307034492493, + "learning_rate": 0.0008641631894302858, + "loss": 1.5681, + "step": 4331 + }, + { + "epoch": 0.4569620253164557, + "grad_norm": 0.5971695184707642, + "learning_rate": 0.0008639150490191814, + "loss": 1.5954, + "step": 4332 + }, + { + "epoch": 0.4570675105485232, + "grad_norm": 0.5586398839950562, + "learning_rate": 0.0008636668958433279, + "loss": 1.5413, + "step": 4333 + }, + { + "epoch": 0.45717299578059073, + "grad_norm": 0.5781025290489197, + "learning_rate": 0.0008634187299305318, + "loss": 1.5917, + "step": 4334 + }, + { + "epoch": 0.4572784810126582, + "grad_norm": 0.5766333341598511, + "learning_rate": 0.0008631705513086013, + "loss": 1.5685, + "step": 4335 + }, + { + "epoch": 0.4573839662447257, + "grad_norm": 0.5167919993400574, + "learning_rate": 0.0008629223600053465, + "loss": 1.6019, + "step": 4336 + }, + { + "epoch": 0.45748945147679326, + "grad_norm": 0.5756369829177856, + "learning_rate": 0.000862674156048578, + "loss": 1.5555, + "step": 4337 + }, + { + "epoch": 0.45759493670886076, + "grad_norm": 0.5150023102760315, + "learning_rate": 0.0008624259394661085, + "loss": 1.5683, + "step": 4338 + }, + { + "epoch": 0.45770042194092825, + "grad_norm": 0.5072351098060608, + "learning_rate": 0.000862177710285752, + "loss": 1.6046, + "step": 4339 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.6093130111694336, + "learning_rate": 0.0008619294685353235, + "loss": 1.5598, + "step": 4340 + }, + { + "epoch": 0.4579113924050633, + "grad_norm": 0.5542324781417847, + "learning_rate": 0.00086168121424264, + "loss": 1.5615, + "step": 4341 + }, + { + "epoch": 0.4580168776371308, + "grad_norm": 0.6079354286193848, + "learning_rate": 0.0008614329474355196, + "loss": 1.5584, + "step": 4342 + }, + { + "epoch": 0.45812236286919833, + "grad_norm": 0.6509737372398376, + "learning_rate": 0.0008611846681417818, + "loss": 1.5262, + "step": 4343 + }, + { + "epoch": 0.4582278481012658, + "grad_norm": 0.5615888237953186, + "learning_rate": 0.0008609363763892474, + "loss": 1.6006, + "step": 4344 + }, + { + "epoch": 0.4583333333333333, + "grad_norm": 0.546716034412384, + "learning_rate": 0.0008606880722057386, + "loss": 1.5815, + "step": 4345 + }, + { + "epoch": 0.45843881856540086, + "grad_norm": 0.5718411207199097, + "learning_rate": 0.0008604397556190797, + "loss": 1.6164, + "step": 4346 + }, + { + "epoch": 0.45854430379746836, + "grad_norm": 0.6074535250663757, + "learning_rate": 0.0008601914266570956, + "loss": 1.5514, + "step": 4347 + }, + { + "epoch": 0.45864978902953585, + "grad_norm": 0.5587552785873413, + "learning_rate": 0.0008599430853476126, + "loss": 1.568, + "step": 4348 + }, + { + "epoch": 0.4587552742616034, + "grad_norm": 0.6771101951599121, + "learning_rate": 0.0008596947317184585, + "loss": 1.5963, + "step": 4349 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.6353894472122192, + "learning_rate": 0.0008594463657974627, + "loss": 1.5978, + "step": 4350 + }, + { + "epoch": 0.4589662447257384, + "grad_norm": 0.5594850182533264, + "learning_rate": 0.000859197987612456, + "loss": 1.6089, + "step": 4351 + }, + { + "epoch": 0.45907172995780593, + "grad_norm": 0.5763881206512451, + "learning_rate": 0.0008589495971912703, + "loss": 1.6039, + "step": 4352 + }, + { + "epoch": 0.4591772151898734, + "grad_norm": 0.6099939942359924, + "learning_rate": 0.000858701194561739, + "loss": 1.5808, + "step": 4353 + }, + { + "epoch": 0.4592827004219409, + "grad_norm": 0.6232426762580872, + "learning_rate": 0.0008584527797516966, + "loss": 1.6097, + "step": 4354 + }, + { + "epoch": 0.45938818565400846, + "grad_norm": 0.6160762310028076, + "learning_rate": 0.0008582043527889797, + "loss": 1.6076, + "step": 4355 + }, + { + "epoch": 0.45949367088607596, + "grad_norm": 0.5822445750236511, + "learning_rate": 0.0008579559137014254, + "loss": 1.5551, + "step": 4356 + }, + { + "epoch": 0.45959915611814345, + "grad_norm": 0.7144352793693542, + "learning_rate": 0.0008577074625168725, + "loss": 1.5934, + "step": 4357 + }, + { + "epoch": 0.459704641350211, + "grad_norm": 0.5723716616630554, + "learning_rate": 0.0008574589992631617, + "loss": 1.6059, + "step": 4358 + }, + { + "epoch": 0.4598101265822785, + "grad_norm": 0.716579258441925, + "learning_rate": 0.0008572105239681338, + "loss": 1.586, + "step": 4359 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.5617988705635071, + "learning_rate": 0.0008569620366596322, + "loss": 1.5614, + "step": 4360 + }, + { + "epoch": 0.46002109704641353, + "grad_norm": 0.6596713662147522, + "learning_rate": 0.0008567135373655012, + "loss": 1.5801, + "step": 4361 + }, + { + "epoch": 0.460126582278481, + "grad_norm": 0.5746521353721619, + "learning_rate": 0.0008564650261135862, + "loss": 1.5433, + "step": 4362 + }, + { + "epoch": 0.4602320675105485, + "grad_norm": 0.6255010962486267, + "learning_rate": 0.0008562165029317339, + "loss": 1.5606, + "step": 4363 + }, + { + "epoch": 0.460337552742616, + "grad_norm": 0.580033004283905, + "learning_rate": 0.0008559679678477929, + "loss": 1.5787, + "step": 4364 + }, + { + "epoch": 0.46044303797468356, + "grad_norm": 0.5667212009429932, + "learning_rate": 0.0008557194208896129, + "loss": 1.5931, + "step": 4365 + }, + { + "epoch": 0.46054852320675105, + "grad_norm": 0.6673040986061096, + "learning_rate": 0.0008554708620850445, + "loss": 1.5378, + "step": 4366 + }, + { + "epoch": 0.46065400843881854, + "grad_norm": 0.5572834610939026, + "learning_rate": 0.0008552222914619401, + "loss": 1.5766, + "step": 4367 + }, + { + "epoch": 0.4607594936708861, + "grad_norm": 0.6111162900924683, + "learning_rate": 0.0008549737090481532, + "loss": 1.5675, + "step": 4368 + }, + { + "epoch": 0.4608649789029536, + "grad_norm": 0.5937003493309021, + "learning_rate": 0.0008547251148715386, + "loss": 1.5729, + "step": 4369 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.5883057117462158, + "learning_rate": 0.000854476508959953, + "loss": 1.5705, + "step": 4370 + }, + { + "epoch": 0.4610759493670886, + "grad_norm": 0.5603340864181519, + "learning_rate": 0.0008542278913412535, + "loss": 1.5216, + "step": 4371 + }, + { + "epoch": 0.4611814345991561, + "grad_norm": 0.5697519779205322, + "learning_rate": 0.0008539792620432989, + "loss": 1.5793, + "step": 4372 + }, + { + "epoch": 0.4612869198312236, + "grad_norm": 0.5446745753288269, + "learning_rate": 0.0008537306210939497, + "loss": 1.5472, + "step": 4373 + }, + { + "epoch": 0.46139240506329116, + "grad_norm": 0.5811035633087158, + "learning_rate": 0.0008534819685210668, + "loss": 1.5815, + "step": 4374 + }, + { + "epoch": 0.46149789029535865, + "grad_norm": 0.619231641292572, + "learning_rate": 0.0008532333043525136, + "loss": 1.585, + "step": 4375 + }, + { + "epoch": 0.46160337552742614, + "grad_norm": 0.5465437173843384, + "learning_rate": 0.0008529846286161539, + "loss": 1.6028, + "step": 4376 + }, + { + "epoch": 0.4617088607594937, + "grad_norm": 0.5447313189506531, + "learning_rate": 0.000852735941339853, + "loss": 1.5425, + "step": 4377 + }, + { + "epoch": 0.4618143459915612, + "grad_norm": 0.5520161986351013, + "learning_rate": 0.0008524872425514775, + "loss": 1.5863, + "step": 4378 + }, + { + "epoch": 0.4619198312236287, + "grad_norm": 0.6087861061096191, + "learning_rate": 0.0008522385322788955, + "loss": 1.5989, + "step": 4379 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.5745512843132019, + "learning_rate": 0.0008519898105499762, + "loss": 1.5842, + "step": 4380 + }, + { + "epoch": 0.4621308016877637, + "grad_norm": 0.5877556800842285, + "learning_rate": 0.00085174107739259, + "loss": 1.5987, + "step": 4381 + }, + { + "epoch": 0.4622362869198312, + "grad_norm": 0.6018103361129761, + "learning_rate": 0.000851492332834609, + "loss": 1.5713, + "step": 4382 + }, + { + "epoch": 0.46234177215189876, + "grad_norm": 0.5745853781700134, + "learning_rate": 0.0008512435769039055, + "loss": 1.5649, + "step": 4383 + }, + { + "epoch": 0.46244725738396625, + "grad_norm": 0.6948585510253906, + "learning_rate": 0.0008509948096283547, + "loss": 1.5393, + "step": 4384 + }, + { + "epoch": 0.46255274261603374, + "grad_norm": 0.7823477387428284, + "learning_rate": 0.0008507460310358319, + "loss": 1.587, + "step": 4385 + }, + { + "epoch": 0.4626582278481013, + "grad_norm": 0.580324113368988, + "learning_rate": 0.0008504972411542138, + "loss": 1.5866, + "step": 4386 + }, + { + "epoch": 0.4627637130801688, + "grad_norm": 0.7098479866981506, + "learning_rate": 0.0008502484400113787, + "loss": 1.5501, + "step": 4387 + }, + { + "epoch": 0.4628691983122363, + "grad_norm": 0.5831118226051331, + "learning_rate": 0.0008499996276352061, + "loss": 1.5681, + "step": 4388 + }, + { + "epoch": 0.4629746835443038, + "grad_norm": 0.7726491093635559, + "learning_rate": 0.0008497508040535766, + "loss": 1.5643, + "step": 4389 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.6337490081787109, + "learning_rate": 0.0008495019692943721, + "loss": 1.5686, + "step": 4390 + }, + { + "epoch": 0.4631856540084388, + "grad_norm": 0.691466212272644, + "learning_rate": 0.0008492531233854757, + "loss": 1.5721, + "step": 4391 + }, + { + "epoch": 0.46329113924050636, + "grad_norm": 0.7474343776702881, + "learning_rate": 0.0008490042663547719, + "loss": 1.5509, + "step": 4392 + }, + { + "epoch": 0.46339662447257385, + "grad_norm": 0.6098048090934753, + "learning_rate": 0.0008487553982301465, + "loss": 1.5611, + "step": 4393 + }, + { + "epoch": 0.46350210970464134, + "grad_norm": 0.6185755729675293, + "learning_rate": 0.0008485065190394863, + "loss": 1.5877, + "step": 4394 + }, + { + "epoch": 0.46360759493670883, + "grad_norm": 0.6170631647109985, + "learning_rate": 0.0008482576288106794, + "loss": 1.5884, + "step": 4395 + }, + { + "epoch": 0.4637130801687764, + "grad_norm": 0.6403258442878723, + "learning_rate": 0.000848008727571615, + "loss": 1.5686, + "step": 4396 + }, + { + "epoch": 0.4638185654008439, + "grad_norm": 0.6358500123023987, + "learning_rate": 0.0008477598153501842, + "loss": 1.5735, + "step": 4397 + }, + { + "epoch": 0.46392405063291137, + "grad_norm": 0.77675461769104, + "learning_rate": 0.0008475108921742787, + "loss": 1.6129, + "step": 4398 + }, + { + "epoch": 0.4640295358649789, + "grad_norm": 0.6601723432540894, + "learning_rate": 0.0008472619580717914, + "loss": 1.5386, + "step": 4399 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.7068989872932434, + "learning_rate": 0.0008470130130706166, + "loss": 1.6091, + "step": 4400 + }, + { + "epoch": 0.4642405063291139, + "grad_norm": 0.606769323348999, + "learning_rate": 0.00084676405719865, + "loss": 1.5486, + "step": 4401 + }, + { + "epoch": 0.46434599156118145, + "grad_norm": 0.7039675712585449, + "learning_rate": 0.0008465150904837883, + "loss": 1.5973, + "step": 4402 + }, + { + "epoch": 0.46445147679324894, + "grad_norm": 0.8513618111610413, + "learning_rate": 0.0008462661129539296, + "loss": 1.6319, + "step": 4403 + }, + { + "epoch": 0.46455696202531643, + "grad_norm": 0.6547417640686035, + "learning_rate": 0.0008460171246369725, + "loss": 1.6058, + "step": 4404 + }, + { + "epoch": 0.464662447257384, + "grad_norm": 0.6914715766906738, + "learning_rate": 0.000845768125560818, + "loss": 1.5829, + "step": 4405 + }, + { + "epoch": 0.4647679324894515, + "grad_norm": 0.6476884484291077, + "learning_rate": 0.0008455191157533677, + "loss": 1.5903, + "step": 4406 + }, + { + "epoch": 0.46487341772151897, + "grad_norm": 0.752851128578186, + "learning_rate": 0.000845270095242524, + "loss": 1.575, + "step": 4407 + }, + { + "epoch": 0.4649789029535865, + "grad_norm": 0.7800922393798828, + "learning_rate": 0.0008450210640561912, + "loss": 1.5967, + "step": 4408 + }, + { + "epoch": 0.465084388185654, + "grad_norm": 0.6328924298286438, + "learning_rate": 0.000844772022222274, + "loss": 1.5827, + "step": 4409 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.6390321254730225, + "learning_rate": 0.0008445229697686795, + "loss": 1.5803, + "step": 4410 + }, + { + "epoch": 0.46529535864978905, + "grad_norm": 0.5955313444137573, + "learning_rate": 0.0008442739067233148, + "loss": 1.5668, + "step": 4411 + }, + { + "epoch": 0.46540084388185654, + "grad_norm": 0.7433661222457886, + "learning_rate": 0.0008440248331140888, + "loss": 1.6158, + "step": 4412 + }, + { + "epoch": 0.46550632911392403, + "grad_norm": 0.5839003324508667, + "learning_rate": 0.0008437757489689113, + "loss": 1.5277, + "step": 4413 + }, + { + "epoch": 0.4656118143459916, + "grad_norm": 0.6888043284416199, + "learning_rate": 0.0008435266543156935, + "loss": 1.56, + "step": 4414 + }, + { + "epoch": 0.4657172995780591, + "grad_norm": 0.5905136466026306, + "learning_rate": 0.0008432775491823477, + "loss": 1.5762, + "step": 4415 + }, + { + "epoch": 0.46582278481012657, + "grad_norm": 0.6496404409408569, + "learning_rate": 0.0008430284335967876, + "loss": 1.6031, + "step": 4416 + }, + { + "epoch": 0.4659282700421941, + "grad_norm": 0.6074392795562744, + "learning_rate": 0.0008427793075869275, + "loss": 1.5456, + "step": 4417 + }, + { + "epoch": 0.4660337552742616, + "grad_norm": 0.6179284453392029, + "learning_rate": 0.0008425301711806833, + "loss": 1.5841, + "step": 4418 + }, + { + "epoch": 0.4661392405063291, + "grad_norm": 0.6417638063430786, + "learning_rate": 0.0008422810244059721, + "loss": 1.5814, + "step": 4419 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.6035361886024475, + "learning_rate": 0.0008420318672907119, + "loss": 1.5565, + "step": 4420 + }, + { + "epoch": 0.46635021097046414, + "grad_norm": 0.5411706566810608, + "learning_rate": 0.0008417826998628222, + "loss": 1.5688, + "step": 4421 + }, + { + "epoch": 0.46645569620253163, + "grad_norm": 0.685570478439331, + "learning_rate": 0.0008415335221502231, + "loss": 1.5807, + "step": 4422 + }, + { + "epoch": 0.4665611814345992, + "grad_norm": 0.6627756953239441, + "learning_rate": 0.0008412843341808365, + "loss": 1.5934, + "step": 4423 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 0.6027933955192566, + "learning_rate": 0.0008410351359825851, + "loss": 1.6068, + "step": 4424 + }, + { + "epoch": 0.46677215189873417, + "grad_norm": 0.6072971224784851, + "learning_rate": 0.0008407859275833928, + "loss": 1.5613, + "step": 4425 + }, + { + "epoch": 0.4668776371308017, + "grad_norm": 0.6358363628387451, + "learning_rate": 0.0008405367090111845, + "loss": 1.5685, + "step": 4426 + }, + { + "epoch": 0.4669831223628692, + "grad_norm": 0.6314423680305481, + "learning_rate": 0.0008402874802938866, + "loss": 1.5707, + "step": 4427 + }, + { + "epoch": 0.4670886075949367, + "grad_norm": 0.6123191118240356, + "learning_rate": 0.0008400382414594263, + "loss": 1.5801, + "step": 4428 + }, + { + "epoch": 0.4671940928270042, + "grad_norm": 0.5764989852905273, + "learning_rate": 0.000839788992535732, + "loss": 1.6003, + "step": 4429 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.6845150589942932, + "learning_rate": 0.0008395397335507334, + "loss": 1.5545, + "step": 4430 + }, + { + "epoch": 0.46740506329113923, + "grad_norm": 0.5529522895812988, + "learning_rate": 0.0008392904645323612, + "loss": 1.5634, + "step": 4431 + }, + { + "epoch": 0.4675105485232067, + "grad_norm": 0.6455991268157959, + "learning_rate": 0.0008390411855085473, + "loss": 1.607, + "step": 4432 + }, + { + "epoch": 0.4676160337552743, + "grad_norm": 0.6293706893920898, + "learning_rate": 0.0008387918965072244, + "loss": 1.5678, + "step": 4433 + }, + { + "epoch": 0.46772151898734177, + "grad_norm": 0.7454564571380615, + "learning_rate": 0.0008385425975563269, + "loss": 1.6047, + "step": 4434 + }, + { + "epoch": 0.46782700421940926, + "grad_norm": 0.5876478552818298, + "learning_rate": 0.0008382932886837897, + "loss": 1.5441, + "step": 4435 + }, + { + "epoch": 0.4679324894514768, + "grad_norm": 0.6457774043083191, + "learning_rate": 0.0008380439699175493, + "loss": 1.5405, + "step": 4436 + }, + { + "epoch": 0.4680379746835443, + "grad_norm": 0.5734648108482361, + "learning_rate": 0.000837794641285543, + "loss": 1.5726, + "step": 4437 + }, + { + "epoch": 0.4681434599156118, + "grad_norm": 0.716122031211853, + "learning_rate": 0.0008375453028157093, + "loss": 1.5894, + "step": 4438 + }, + { + "epoch": 0.46824894514767934, + "grad_norm": 0.5886810421943665, + "learning_rate": 0.000837295954535988, + "loss": 1.548, + "step": 4439 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.6066471934318542, + "learning_rate": 0.0008370465964743196, + "loss": 1.5957, + "step": 4440 + }, + { + "epoch": 0.4684599156118143, + "grad_norm": 0.5299807190895081, + "learning_rate": 0.0008367972286586461, + "loss": 1.5647, + "step": 4441 + }, + { + "epoch": 0.4685654008438819, + "grad_norm": 0.716541051864624, + "learning_rate": 0.0008365478511169103, + "loss": 1.6085, + "step": 4442 + }, + { + "epoch": 0.46867088607594937, + "grad_norm": 0.6900766491889954, + "learning_rate": 0.000836298463877056, + "loss": 1.5819, + "step": 4443 + }, + { + "epoch": 0.46877637130801686, + "grad_norm": 0.5729686617851257, + "learning_rate": 0.0008360490669670288, + "loss": 1.5577, + "step": 4444 + }, + { + "epoch": 0.4688818565400844, + "grad_norm": 0.7402169108390808, + "learning_rate": 0.0008357996604147744, + "loss": 1.6081, + "step": 4445 + }, + { + "epoch": 0.4689873417721519, + "grad_norm": 0.5422880053520203, + "learning_rate": 0.0008355502442482403, + "loss": 1.5381, + "step": 4446 + }, + { + "epoch": 0.4690928270042194, + "grad_norm": 0.6835195422172546, + "learning_rate": 0.0008353008184953748, + "loss": 1.5927, + "step": 4447 + }, + { + "epoch": 0.46919831223628694, + "grad_norm": 0.5604457855224609, + "learning_rate": 0.0008350513831841271, + "loss": 1.613, + "step": 4448 + }, + { + "epoch": 0.46930379746835443, + "grad_norm": 0.6359450221061707, + "learning_rate": 0.0008348019383424479, + "loss": 1.5806, + "step": 4449 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.579194188117981, + "learning_rate": 0.0008345524839982886, + "loss": 1.5533, + "step": 4450 + }, + { + "epoch": 0.4695147679324895, + "grad_norm": 0.6727117300033569, + "learning_rate": 0.000834303020179602, + "loss": 1.5716, + "step": 4451 + }, + { + "epoch": 0.46962025316455697, + "grad_norm": 0.7059304118156433, + "learning_rate": 0.0008340535469143414, + "loss": 1.5436, + "step": 4452 + }, + { + "epoch": 0.46972573839662446, + "grad_norm": 0.5831950902938843, + "learning_rate": 0.0008338040642304618, + "loss": 1.587, + "step": 4453 + }, + { + "epoch": 0.469831223628692, + "grad_norm": 0.6432431936264038, + "learning_rate": 0.0008335545721559188, + "loss": 1.5357, + "step": 4454 + }, + { + "epoch": 0.4699367088607595, + "grad_norm": 0.590596079826355, + "learning_rate": 0.0008333050707186696, + "loss": 1.5579, + "step": 4455 + }, + { + "epoch": 0.470042194092827, + "grad_norm": 0.6303948163986206, + "learning_rate": 0.0008330555599466716, + "loss": 1.5627, + "step": 4456 + }, + { + "epoch": 0.47014767932489454, + "grad_norm": 0.6568983793258667, + "learning_rate": 0.000832806039867884, + "loss": 1.5556, + "step": 4457 + }, + { + "epoch": 0.47025316455696203, + "grad_norm": 0.6363131403923035, + "learning_rate": 0.000832556510510267, + "loss": 1.5536, + "step": 4458 + }, + { + "epoch": 0.4703586497890295, + "grad_norm": 0.596798300743103, + "learning_rate": 0.0008323069719017812, + "loss": 1.5811, + "step": 4459 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.5656107664108276, + "learning_rate": 0.0008320574240703886, + "loss": 1.6099, + "step": 4460 + }, + { + "epoch": 0.47056962025316457, + "grad_norm": 0.5802732706069946, + "learning_rate": 0.0008318078670440525, + "loss": 1.573, + "step": 4461 + }, + { + "epoch": 0.47067510548523206, + "grad_norm": 0.5963864922523499, + "learning_rate": 0.0008315583008507372, + "loss": 1.5792, + "step": 4462 + }, + { + "epoch": 0.47078059071729955, + "grad_norm": 0.6217668056488037, + "learning_rate": 0.0008313087255184074, + "loss": 1.5669, + "step": 4463 + }, + { + "epoch": 0.4708860759493671, + "grad_norm": 0.5925135016441345, + "learning_rate": 0.0008310591410750295, + "loss": 1.5744, + "step": 4464 + }, + { + "epoch": 0.4709915611814346, + "grad_norm": 0.6503552794456482, + "learning_rate": 0.0008308095475485706, + "loss": 1.5601, + "step": 4465 + }, + { + "epoch": 0.4710970464135021, + "grad_norm": 0.5979328155517578, + "learning_rate": 0.0008305599449669989, + "loss": 1.5868, + "step": 4466 + }, + { + "epoch": 0.47120253164556963, + "grad_norm": 0.5835292935371399, + "learning_rate": 0.0008303103333582839, + "loss": 1.5482, + "step": 4467 + }, + { + "epoch": 0.4713080168776371, + "grad_norm": 0.6346067190170288, + "learning_rate": 0.0008300607127503952, + "loss": 1.5662, + "step": 4468 + }, + { + "epoch": 0.4714135021097046, + "grad_norm": 0.6092365980148315, + "learning_rate": 0.0008298110831713047, + "loss": 1.6109, + "step": 4469 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.6270390748977661, + "learning_rate": 0.0008295614446489842, + "loss": 1.6087, + "step": 4470 + }, + { + "epoch": 0.47162447257383966, + "grad_norm": 0.61066734790802, + "learning_rate": 0.0008293117972114074, + "loss": 1.5601, + "step": 4471 + }, + { + "epoch": 0.47172995780590715, + "grad_norm": 0.616072416305542, + "learning_rate": 0.0008290621408865481, + "loss": 1.5616, + "step": 4472 + }, + { + "epoch": 0.4718354430379747, + "grad_norm": 0.5767464637756348, + "learning_rate": 0.0008288124757023816, + "loss": 1.5978, + "step": 4473 + }, + { + "epoch": 0.4719409282700422, + "grad_norm": 0.5675467848777771, + "learning_rate": 0.0008285628016868841, + "loss": 1.6139, + "step": 4474 + }, + { + "epoch": 0.4720464135021097, + "grad_norm": 0.5880298614501953, + "learning_rate": 0.0008283131188680332, + "loss": 1.606, + "step": 4475 + }, + { + "epoch": 0.47215189873417723, + "grad_norm": 0.5891975164413452, + "learning_rate": 0.0008280634272738066, + "loss": 1.5483, + "step": 4476 + }, + { + "epoch": 0.4722573839662447, + "grad_norm": 0.583401083946228, + "learning_rate": 0.0008278137269321837, + "loss": 1.583, + "step": 4477 + }, + { + "epoch": 0.4723628691983122, + "grad_norm": 0.5636411309242249, + "learning_rate": 0.0008275640178711447, + "loss": 1.6015, + "step": 4478 + }, + { + "epoch": 0.47246835443037977, + "grad_norm": 0.7732736468315125, + "learning_rate": 0.0008273143001186709, + "loss": 1.5773, + "step": 4479 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.6863667964935303, + "learning_rate": 0.0008270645737027441, + "loss": 1.5742, + "step": 4480 + }, + { + "epoch": 0.47267932489451475, + "grad_norm": 0.6190831661224365, + "learning_rate": 0.0008268148386513475, + "loss": 1.5472, + "step": 4481 + }, + { + "epoch": 0.4727848101265823, + "grad_norm": 0.6863664984703064, + "learning_rate": 0.0008265650949924652, + "loss": 1.5629, + "step": 4482 + }, + { + "epoch": 0.4728902953586498, + "grad_norm": 0.5550565719604492, + "learning_rate": 0.0008263153427540825, + "loss": 1.5527, + "step": 4483 + }, + { + "epoch": 0.4729957805907173, + "grad_norm": 0.6163073182106018, + "learning_rate": 0.0008260655819641849, + "loss": 1.5484, + "step": 4484 + }, + { + "epoch": 0.47310126582278483, + "grad_norm": 0.5674335360527039, + "learning_rate": 0.0008258158126507594, + "loss": 1.5825, + "step": 4485 + }, + { + "epoch": 0.4732067510548523, + "grad_norm": 0.5989305377006531, + "learning_rate": 0.0008255660348417944, + "loss": 1.5717, + "step": 4486 + }, + { + "epoch": 0.4733122362869198, + "grad_norm": 0.5668847560882568, + "learning_rate": 0.0008253162485652779, + "loss": 1.5642, + "step": 4487 + }, + { + "epoch": 0.47341772151898737, + "grad_norm": 0.5496958494186401, + "learning_rate": 0.0008250664538492006, + "loss": 1.59, + "step": 4488 + }, + { + "epoch": 0.47352320675105486, + "grad_norm": 0.6954787373542786, + "learning_rate": 0.0008248166507215526, + "loss": 1.5733, + "step": 4489 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.7722894549369812, + "learning_rate": 0.0008245668392103259, + "loss": 1.6142, + "step": 4490 + }, + { + "epoch": 0.4737341772151899, + "grad_norm": 0.5934421420097351, + "learning_rate": 0.000824317019343513, + "loss": 1.5627, + "step": 4491 + }, + { + "epoch": 0.4738396624472574, + "grad_norm": 0.8469609022140503, + "learning_rate": 0.0008240671911491077, + "loss": 1.5857, + "step": 4492 + }, + { + "epoch": 0.4739451476793249, + "grad_norm": 0.5587372183799744, + "learning_rate": 0.000823817354655104, + "loss": 1.5541, + "step": 4493 + }, + { + "epoch": 0.4740506329113924, + "grad_norm": 0.8157481551170349, + "learning_rate": 0.0008235675098894979, + "loss": 1.5508, + "step": 4494 + }, + { + "epoch": 0.4741561181434599, + "grad_norm": 0.7161518931388855, + "learning_rate": 0.0008233176568802851, + "loss": 1.5929, + "step": 4495 + }, + { + "epoch": 0.4742616033755274, + "grad_norm": 0.5806686878204346, + "learning_rate": 0.0008230677956554637, + "loss": 1.561, + "step": 4496 + }, + { + "epoch": 0.4743670886075949, + "grad_norm": 0.6456138491630554, + "learning_rate": 0.0008228179262430313, + "loss": 1.5476, + "step": 4497 + }, + { + "epoch": 0.47447257383966246, + "grad_norm": 0.5695160627365112, + "learning_rate": 0.0008225680486709871, + "loss": 1.5738, + "step": 4498 + }, + { + "epoch": 0.47457805907172995, + "grad_norm": 0.5822930932044983, + "learning_rate": 0.0008223181629673312, + "loss": 1.58, + "step": 4499 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.578395664691925, + "learning_rate": 0.0008220682691600645, + "loss": 1.5738, + "step": 4500 + }, + { + "epoch": 0.474789029535865, + "grad_norm": 0.5219881534576416, + "learning_rate": 0.0008218183672771889, + "loss": 1.5898, + "step": 4501 + }, + { + "epoch": 0.4748945147679325, + "grad_norm": 0.5852826237678528, + "learning_rate": 0.0008215684573467071, + "loss": 1.5967, + "step": 4502 + }, + { + "epoch": 0.475, + "grad_norm": 0.5454450249671936, + "learning_rate": 0.0008213185393966229, + "loss": 1.5681, + "step": 4503 + }, + { + "epoch": 0.4751054852320675, + "grad_norm": 0.5678443908691406, + "learning_rate": 0.0008210686134549406, + "loss": 1.5821, + "step": 4504 + }, + { + "epoch": 0.475210970464135, + "grad_norm": 0.558936595916748, + "learning_rate": 0.0008208186795496657, + "loss": 1.5597, + "step": 4505 + }, + { + "epoch": 0.4753164556962025, + "grad_norm": 0.5785675644874573, + "learning_rate": 0.0008205687377088048, + "loss": 1.5627, + "step": 4506 + }, + { + "epoch": 0.47542194092827006, + "grad_norm": 0.5391056537628174, + "learning_rate": 0.000820318787960365, + "loss": 1.5936, + "step": 4507 + }, + { + "epoch": 0.47552742616033755, + "grad_norm": 0.5768523812294006, + "learning_rate": 0.0008200688303323542, + "loss": 1.5556, + "step": 4508 + }, + { + "epoch": 0.47563291139240504, + "grad_norm": 0.5184341073036194, + "learning_rate": 0.0008198188648527818, + "loss": 1.5792, + "step": 4509 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.5930606126785278, + "learning_rate": 0.0008195688915496571, + "loss": 1.5427, + "step": 4510 + }, + { + "epoch": 0.4758438818565401, + "grad_norm": 0.5789134502410889, + "learning_rate": 0.0008193189104509915, + "loss": 1.5538, + "step": 4511 + }, + { + "epoch": 0.4759493670886076, + "grad_norm": 0.6462187767028809, + "learning_rate": 0.0008190689215847963, + "loss": 1.5536, + "step": 4512 + }, + { + "epoch": 0.4760548523206751, + "grad_norm": 0.5625181198120117, + "learning_rate": 0.0008188189249790838, + "loss": 1.5783, + "step": 4513 + }, + { + "epoch": 0.4761603375527426, + "grad_norm": 0.7557629346847534, + "learning_rate": 0.0008185689206618677, + "loss": 1.5805, + "step": 4514 + }, + { + "epoch": 0.4762658227848101, + "grad_norm": 0.6069433093070984, + "learning_rate": 0.0008183189086611623, + "loss": 1.5956, + "step": 4515 + }, + { + "epoch": 0.47637130801687766, + "grad_norm": 0.8323469161987305, + "learning_rate": 0.0008180688890049823, + "loss": 1.5502, + "step": 4516 + }, + { + "epoch": 0.47647679324894515, + "grad_norm": 0.6328763365745544, + "learning_rate": 0.000817818861721344, + "loss": 1.5401, + "step": 4517 + }, + { + "epoch": 0.47658227848101264, + "grad_norm": 0.6574382781982422, + "learning_rate": 0.0008175688268382639, + "loss": 1.5253, + "step": 4518 + }, + { + "epoch": 0.4766877637130802, + "grad_norm": 0.5930418968200684, + "learning_rate": 0.00081731878438376, + "loss": 1.5262, + "step": 4519 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.694416344165802, + "learning_rate": 0.0008170687343858506, + "loss": 1.5455, + "step": 4520 + }, + { + "epoch": 0.4768987341772152, + "grad_norm": 0.5983505249023438, + "learning_rate": 0.000816818676872555, + "loss": 1.5896, + "step": 4521 + }, + { + "epoch": 0.4770042194092827, + "grad_norm": 0.5858949422836304, + "learning_rate": 0.0008165686118718935, + "loss": 1.5964, + "step": 4522 + }, + { + "epoch": 0.4771097046413502, + "grad_norm": 0.5641458034515381, + "learning_rate": 0.000816318539411887, + "loss": 1.5742, + "step": 4523 + }, + { + "epoch": 0.4772151898734177, + "grad_norm": 0.5813875198364258, + "learning_rate": 0.0008160684595205577, + "loss": 1.5763, + "step": 4524 + }, + { + "epoch": 0.47732067510548526, + "grad_norm": 0.5866410136222839, + "learning_rate": 0.000815818372225928, + "loss": 1.5453, + "step": 4525 + }, + { + "epoch": 0.47742616033755275, + "grad_norm": 0.6208032965660095, + "learning_rate": 0.0008155682775560215, + "loss": 1.5956, + "step": 4526 + }, + { + "epoch": 0.47753164556962024, + "grad_norm": 0.6180749535560608, + "learning_rate": 0.0008153181755388624, + "loss": 1.5454, + "step": 4527 + }, + { + "epoch": 0.47763713080168774, + "grad_norm": 0.77285236120224, + "learning_rate": 0.0008150680662024761, + "loss": 1.5749, + "step": 4528 + }, + { + "epoch": 0.4777426160337553, + "grad_norm": 0.742080807685852, + "learning_rate": 0.0008148179495748885, + "loss": 1.5546, + "step": 4529 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.6792816519737244, + "learning_rate": 0.0008145678256841265, + "loss": 1.588, + "step": 4530 + }, + { + "epoch": 0.47795358649789027, + "grad_norm": 0.5984871983528137, + "learning_rate": 0.0008143176945582175, + "loss": 1.5401, + "step": 4531 + }, + { + "epoch": 0.4780590717299578, + "grad_norm": 0.5747952461242676, + "learning_rate": 0.0008140675562251904, + "loss": 1.5642, + "step": 4532 + }, + { + "epoch": 0.4781645569620253, + "grad_norm": 0.5939887762069702, + "learning_rate": 0.0008138174107130739, + "loss": 1.6051, + "step": 4533 + }, + { + "epoch": 0.4782700421940928, + "grad_norm": 0.6153751611709595, + "learning_rate": 0.0008135672580498984, + "loss": 1.5968, + "step": 4534 + }, + { + "epoch": 0.47837552742616035, + "grad_norm": 0.6507474780082703, + "learning_rate": 0.0008133170982636946, + "loss": 1.5479, + "step": 4535 + }, + { + "epoch": 0.47848101265822784, + "grad_norm": 0.5844017267227173, + "learning_rate": 0.0008130669313824944, + "loss": 1.5615, + "step": 4536 + }, + { + "epoch": 0.47858649789029534, + "grad_norm": 0.5904884934425354, + "learning_rate": 0.0008128167574343299, + "loss": 1.5723, + "step": 4537 + }, + { + "epoch": 0.4786919831223629, + "grad_norm": 0.6842179894447327, + "learning_rate": 0.0008125665764472345, + "loss": 1.5795, + "step": 4538 + }, + { + "epoch": 0.4787974683544304, + "grad_norm": 0.6325793862342834, + "learning_rate": 0.0008123163884492422, + "loss": 1.5745, + "step": 4539 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.6420925855636597, + "learning_rate": 0.0008120661934683879, + "loss": 1.6001, + "step": 4540 + }, + { + "epoch": 0.4790084388185654, + "grad_norm": 0.5892593860626221, + "learning_rate": 0.0008118159915327072, + "loss": 1.5443, + "step": 4541 + }, + { + "epoch": 0.4791139240506329, + "grad_norm": 0.6138510704040527, + "learning_rate": 0.0008115657826702364, + "loss": 1.5498, + "step": 4542 + }, + { + "epoch": 0.4792194092827004, + "grad_norm": 0.6007449626922607, + "learning_rate": 0.0008113155669090124, + "loss": 1.5905, + "step": 4543 + }, + { + "epoch": 0.47932489451476795, + "grad_norm": 0.581271767616272, + "learning_rate": 0.0008110653442770736, + "loss": 1.5836, + "step": 4544 + }, + { + "epoch": 0.47943037974683544, + "grad_norm": 0.6902627944946289, + "learning_rate": 0.0008108151148024584, + "loss": 1.56, + "step": 4545 + }, + { + "epoch": 0.47953586497890294, + "grad_norm": 0.621671199798584, + "learning_rate": 0.0008105648785132065, + "loss": 1.5939, + "step": 4546 + }, + { + "epoch": 0.4796413502109705, + "grad_norm": 0.5703502893447876, + "learning_rate": 0.0008103146354373577, + "loss": 1.5829, + "step": 4547 + }, + { + "epoch": 0.479746835443038, + "grad_norm": 0.5981324911117554, + "learning_rate": 0.0008100643856029534, + "loss": 1.6027, + "step": 4548 + }, + { + "epoch": 0.47985232067510547, + "grad_norm": 0.5765863656997681, + "learning_rate": 0.0008098141290380353, + "loss": 1.5885, + "step": 4549 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.6415268182754517, + "learning_rate": 0.0008095638657706456, + "loss": 1.544, + "step": 4550 + }, + { + "epoch": 0.4800632911392405, + "grad_norm": 0.6711254119873047, + "learning_rate": 0.0008093135958288278, + "loss": 1.5544, + "step": 4551 + }, + { + "epoch": 0.480168776371308, + "grad_norm": 0.6497153639793396, + "learning_rate": 0.0008090633192406256, + "loss": 1.5423, + "step": 4552 + }, + { + "epoch": 0.48027426160337555, + "grad_norm": 0.7193641066551208, + "learning_rate": 0.0008088130360340843, + "loss": 1.5386, + "step": 4553 + }, + { + "epoch": 0.48037974683544304, + "grad_norm": 0.6049671173095703, + "learning_rate": 0.0008085627462372489, + "loss": 1.5783, + "step": 4554 + }, + { + "epoch": 0.48048523206751054, + "grad_norm": 0.6297358274459839, + "learning_rate": 0.0008083124498781658, + "loss": 1.5326, + "step": 4555 + }, + { + "epoch": 0.4805907172995781, + "grad_norm": 0.5877453088760376, + "learning_rate": 0.0008080621469848817, + "loss": 1.5877, + "step": 4556 + }, + { + "epoch": 0.4806962025316456, + "grad_norm": 0.6246798038482666, + "learning_rate": 0.0008078118375854449, + "loss": 1.5562, + "step": 4557 + }, + { + "epoch": 0.48080168776371307, + "grad_norm": 0.641414999961853, + "learning_rate": 0.000807561521707903, + "loss": 1.5667, + "step": 4558 + }, + { + "epoch": 0.48090717299578056, + "grad_norm": 0.6183552742004395, + "learning_rate": 0.000807311199380306, + "loss": 1.5848, + "step": 4559 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.6806971430778503, + "learning_rate": 0.000807060870630703, + "loss": 1.5715, + "step": 4560 + }, + { + "epoch": 0.4811181434599156, + "grad_norm": 0.8424493074417114, + "learning_rate": 0.0008068105354871449, + "loss": 1.5258, + "step": 4561 + }, + { + "epoch": 0.4812236286919831, + "grad_norm": 0.5735191702842712, + "learning_rate": 0.0008065601939776833, + "loss": 1.5272, + "step": 4562 + }, + { + "epoch": 0.48132911392405064, + "grad_norm": 0.824665367603302, + "learning_rate": 0.0008063098461303698, + "loss": 1.5504, + "step": 4563 + }, + { + "epoch": 0.48143459915611814, + "grad_norm": 0.574005663394928, + "learning_rate": 0.0008060594919732572, + "loss": 1.5615, + "step": 4564 + }, + { + "epoch": 0.48154008438818563, + "grad_norm": 0.7401556372642517, + "learning_rate": 0.0008058091315343988, + "loss": 1.5348, + "step": 4565 + }, + { + "epoch": 0.4816455696202532, + "grad_norm": 0.6539406776428223, + "learning_rate": 0.0008055587648418492, + "loss": 1.5813, + "step": 4566 + }, + { + "epoch": 0.48175105485232067, + "grad_norm": 0.5786198973655701, + "learning_rate": 0.000805308391923663, + "loss": 1.5407, + "step": 4567 + }, + { + "epoch": 0.48185654008438816, + "grad_norm": 0.6426239609718323, + "learning_rate": 0.0008050580128078957, + "loss": 1.5549, + "step": 4568 + }, + { + "epoch": 0.4819620253164557, + "grad_norm": 0.6317071914672852, + "learning_rate": 0.0008048076275226032, + "loss": 1.5277, + "step": 4569 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.8296467661857605, + "learning_rate": 0.000804557236095843, + "loss": 1.5487, + "step": 4570 + }, + { + "epoch": 0.4821729957805907, + "grad_norm": 0.6195515990257263, + "learning_rate": 0.0008043068385556725, + "loss": 1.5493, + "step": 4571 + }, + { + "epoch": 0.48227848101265824, + "grad_norm": 0.6305753588676453, + "learning_rate": 0.0008040564349301498, + "loss": 1.5277, + "step": 4572 + }, + { + "epoch": 0.48238396624472574, + "grad_norm": 0.6513224244117737, + "learning_rate": 0.0008038060252473339, + "loss": 1.537, + "step": 4573 + }, + { + "epoch": 0.48248945147679323, + "grad_norm": 0.6516707539558411, + "learning_rate": 0.0008035556095352847, + "loss": 1.6202, + "step": 4574 + }, + { + "epoch": 0.4825949367088608, + "grad_norm": 0.6748477220535278, + "learning_rate": 0.0008033051878220624, + "loss": 1.5435, + "step": 4575 + }, + { + "epoch": 0.48270042194092827, + "grad_norm": 0.609793484210968, + "learning_rate": 0.0008030547601357281, + "loss": 1.5958, + "step": 4576 + }, + { + "epoch": 0.48280590717299576, + "grad_norm": 0.6380211710929871, + "learning_rate": 0.0008028043265043434, + "loss": 1.5474, + "step": 4577 + }, + { + "epoch": 0.4829113924050633, + "grad_norm": 0.6087417602539062, + "learning_rate": 0.0008025538869559703, + "loss": 1.5699, + "step": 4578 + }, + { + "epoch": 0.4830168776371308, + "grad_norm": 0.5921029448509216, + "learning_rate": 0.0008023034415186725, + "loss": 1.598, + "step": 4579 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.6103104948997498, + "learning_rate": 0.0008020529902205129, + "loss": 1.5875, + "step": 4580 + }, + { + "epoch": 0.48322784810126584, + "grad_norm": 0.7036359906196594, + "learning_rate": 0.0008018025330895566, + "loss": 1.5204, + "step": 4581 + }, + { + "epoch": 0.48333333333333334, + "grad_norm": 0.6004746556282043, + "learning_rate": 0.0008015520701538677, + "loss": 1.5445, + "step": 4582 + }, + { + "epoch": 0.48343881856540083, + "grad_norm": 0.6310973763465881, + "learning_rate": 0.0008013016014415126, + "loss": 1.5559, + "step": 4583 + }, + { + "epoch": 0.4835443037974684, + "grad_norm": 0.7099143266677856, + "learning_rate": 0.0008010511269805571, + "loss": 1.5944, + "step": 4584 + }, + { + "epoch": 0.48364978902953587, + "grad_norm": 0.6111563444137573, + "learning_rate": 0.0008008006467990684, + "loss": 1.5823, + "step": 4585 + }, + { + "epoch": 0.48375527426160336, + "grad_norm": 0.6963940262794495, + "learning_rate": 0.0008005501609251136, + "loss": 1.5624, + "step": 4586 + }, + { + "epoch": 0.4838607594936709, + "grad_norm": 0.5746895670890808, + "learning_rate": 0.0008002996693867615, + "loss": 1.5668, + "step": 4587 + }, + { + "epoch": 0.4839662447257384, + "grad_norm": 0.7495745420455933, + "learning_rate": 0.0008000491722120806, + "loss": 1.5162, + "step": 4588 + }, + { + "epoch": 0.4840717299578059, + "grad_norm": 0.6117402911186218, + "learning_rate": 0.0007997986694291404, + "loss": 1.5555, + "step": 4589 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.7631443738937378, + "learning_rate": 0.0007995481610660108, + "loss": 1.5559, + "step": 4590 + }, + { + "epoch": 0.48428270042194094, + "grad_norm": 0.5573623180389404, + "learning_rate": 0.0007992976471507628, + "loss": 1.5616, + "step": 4591 + }, + { + "epoch": 0.48438818565400843, + "grad_norm": 0.7981250286102295, + "learning_rate": 0.0007990471277114676, + "loss": 1.5579, + "step": 4592 + }, + { + "epoch": 0.4844936708860759, + "grad_norm": 0.6250159740447998, + "learning_rate": 0.0007987966027761972, + "loss": 1.5523, + "step": 4593 + }, + { + "epoch": 0.48459915611814347, + "grad_norm": 0.6536917686462402, + "learning_rate": 0.0007985460723730242, + "loss": 1.5255, + "step": 4594 + }, + { + "epoch": 0.48470464135021096, + "grad_norm": 0.7408666610717773, + "learning_rate": 0.0007982955365300214, + "loss": 1.5913, + "step": 4595 + }, + { + "epoch": 0.48481012658227846, + "grad_norm": 0.5725013613700867, + "learning_rate": 0.0007980449952752633, + "loss": 1.5936, + "step": 4596 + }, + { + "epoch": 0.484915611814346, + "grad_norm": 0.7131905555725098, + "learning_rate": 0.0007977944486368237, + "loss": 1.5536, + "step": 4597 + }, + { + "epoch": 0.4850210970464135, + "grad_norm": 0.6253800392150879, + "learning_rate": 0.0007975438966427778, + "loss": 1.5793, + "step": 4598 + }, + { + "epoch": 0.485126582278481, + "grad_norm": 0.6572579145431519, + "learning_rate": 0.0007972933393212012, + "loss": 1.6149, + "step": 4599 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.6340463161468506, + "learning_rate": 0.0007970427767001702, + "loss": 1.5944, + "step": 4600 + }, + { + "epoch": 0.48533755274261603, + "grad_norm": 0.6001745462417603, + "learning_rate": 0.0007967922088077615, + "loss": 1.6004, + "step": 4601 + }, + { + "epoch": 0.4854430379746835, + "grad_norm": 0.6309539675712585, + "learning_rate": 0.0007965416356720524, + "loss": 1.5753, + "step": 4602 + }, + { + "epoch": 0.48554852320675107, + "grad_norm": 0.5920194387435913, + "learning_rate": 0.000796291057321121, + "loss": 1.5182, + "step": 4603 + }, + { + "epoch": 0.48565400843881856, + "grad_norm": 0.5879032015800476, + "learning_rate": 0.0007960404737830457, + "loss": 1.5542, + "step": 4604 + }, + { + "epoch": 0.48575949367088606, + "grad_norm": 0.5984836220741272, + "learning_rate": 0.0007957898850859058, + "loss": 1.5954, + "step": 4605 + }, + { + "epoch": 0.4858649789029536, + "grad_norm": 0.581567108631134, + "learning_rate": 0.000795539291257781, + "loss": 1.5808, + "step": 4606 + }, + { + "epoch": 0.4859704641350211, + "grad_norm": 0.7050942182540894, + "learning_rate": 0.0007952886923267516, + "loss": 1.5702, + "step": 4607 + }, + { + "epoch": 0.4860759493670886, + "grad_norm": 0.6971874833106995, + "learning_rate": 0.0007950380883208981, + "loss": 1.5634, + "step": 4608 + }, + { + "epoch": 0.48618143459915614, + "grad_norm": 0.7544222474098206, + "learning_rate": 0.0007947874792683025, + "loss": 1.5771, + "step": 4609 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.8453202843666077, + "learning_rate": 0.0007945368651970464, + "loss": 1.5995, + "step": 4610 + }, + { + "epoch": 0.4863924050632911, + "grad_norm": 0.6893463730812073, + "learning_rate": 0.0007942862461352125, + "loss": 1.533, + "step": 4611 + }, + { + "epoch": 0.48649789029535867, + "grad_norm": 0.8742305636405945, + "learning_rate": 0.0007940356221108837, + "loss": 1.569, + "step": 4612 + }, + { + "epoch": 0.48660337552742616, + "grad_norm": 0.6068758368492126, + "learning_rate": 0.0007937849931521441, + "loss": 1.5392, + "step": 4613 + }, + { + "epoch": 0.48670886075949366, + "grad_norm": 1.0491981506347656, + "learning_rate": 0.0007935343592870778, + "loss": 1.5534, + "step": 4614 + }, + { + "epoch": 0.4868143459915612, + "grad_norm": 0.6276761293411255, + "learning_rate": 0.0007932837205437692, + "loss": 1.5919, + "step": 4615 + }, + { + "epoch": 0.4869198312236287, + "grad_norm": 0.7752852439880371, + "learning_rate": 0.000793033076950304, + "loss": 1.5798, + "step": 4616 + }, + { + "epoch": 0.4870253164556962, + "grad_norm": 0.5584530830383301, + "learning_rate": 0.0007927824285347678, + "loss": 1.5882, + "step": 4617 + }, + { + "epoch": 0.48713080168776374, + "grad_norm": 0.8057190179824829, + "learning_rate": 0.0007925317753252473, + "loss": 1.5202, + "step": 4618 + }, + { + "epoch": 0.48723628691983123, + "grad_norm": 0.567363977432251, + "learning_rate": 0.0007922811173498293, + "loss": 1.5011, + "step": 4619 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.7408154010772705, + "learning_rate": 0.0007920304546366013, + "loss": 1.5631, + "step": 4620 + }, + { + "epoch": 0.48744725738396627, + "grad_norm": 0.6198625564575195, + "learning_rate": 0.0007917797872136511, + "loss": 1.5886, + "step": 4621 + }, + { + "epoch": 0.48755274261603376, + "grad_norm": 0.6342923641204834, + "learning_rate": 0.0007915291151090676, + "loss": 1.5374, + "step": 4622 + }, + { + "epoch": 0.48765822784810126, + "grad_norm": 0.6465457081794739, + "learning_rate": 0.0007912784383509396, + "loss": 1.5501, + "step": 4623 + }, + { + "epoch": 0.4877637130801688, + "grad_norm": 0.6367639899253845, + "learning_rate": 0.0007910277569673568, + "loss": 1.5567, + "step": 4624 + }, + { + "epoch": 0.4878691983122363, + "grad_norm": 0.6064855456352234, + "learning_rate": 0.000790777070986409, + "loss": 1.5624, + "step": 4625 + }, + { + "epoch": 0.4879746835443038, + "grad_norm": 0.6607616543769836, + "learning_rate": 0.0007905263804361873, + "loss": 1.5948, + "step": 4626 + }, + { + "epoch": 0.4880801687763713, + "grad_norm": 0.5615147948265076, + "learning_rate": 0.0007902756853447824, + "loss": 1.5965, + "step": 4627 + }, + { + "epoch": 0.48818565400843883, + "grad_norm": 0.5822696089744568, + "learning_rate": 0.0007900249857402863, + "loss": 1.5553, + "step": 4628 + }, + { + "epoch": 0.4882911392405063, + "grad_norm": 0.6260411739349365, + "learning_rate": 0.000789774281650791, + "loss": 1.5466, + "step": 4629 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6111975908279419, + "learning_rate": 0.000789523573104389, + "loss": 1.596, + "step": 4630 + }, + { + "epoch": 0.48850210970464136, + "grad_norm": 0.6310405135154724, + "learning_rate": 0.0007892728601291737, + "loss": 1.5767, + "step": 4631 + }, + { + "epoch": 0.48860759493670886, + "grad_norm": 0.5741384625434875, + "learning_rate": 0.0007890221427532384, + "loss": 1.553, + "step": 4632 + }, + { + "epoch": 0.48871308016877635, + "grad_norm": 0.6248970627784729, + "learning_rate": 0.0007887714210046775, + "loss": 1.5684, + "step": 4633 + }, + { + "epoch": 0.4888185654008439, + "grad_norm": 0.6220908761024475, + "learning_rate": 0.0007885206949115855, + "loss": 1.5601, + "step": 4634 + }, + { + "epoch": 0.4889240506329114, + "grad_norm": 0.6445859670639038, + "learning_rate": 0.0007882699645020577, + "loss": 1.5537, + "step": 4635 + }, + { + "epoch": 0.4890295358649789, + "grad_norm": 0.6000876426696777, + "learning_rate": 0.0007880192298041893, + "loss": 1.5737, + "step": 4636 + }, + { + "epoch": 0.48913502109704643, + "grad_norm": 0.6191104650497437, + "learning_rate": 0.0007877684908460768, + "loss": 1.5637, + "step": 4637 + }, + { + "epoch": 0.4892405063291139, + "grad_norm": 0.569187581539154, + "learning_rate": 0.0007875177476558165, + "loss": 1.5738, + "step": 4638 + }, + { + "epoch": 0.4893459915611814, + "grad_norm": 0.6086604595184326, + "learning_rate": 0.0007872670002615056, + "loss": 1.5667, + "step": 4639 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6202175617218018, + "learning_rate": 0.0007870162486912414, + "loss": 1.5901, + "step": 4640 + }, + { + "epoch": 0.48955696202531646, + "grad_norm": 0.5808926224708557, + "learning_rate": 0.0007867654929731221, + "loss": 1.583, + "step": 4641 + }, + { + "epoch": 0.48966244725738395, + "grad_norm": 0.6150477528572083, + "learning_rate": 0.0007865147331352457, + "loss": 1.5446, + "step": 4642 + }, + { + "epoch": 0.4897679324894515, + "grad_norm": 0.6334555745124817, + "learning_rate": 0.0007862639692057115, + "loss": 1.5755, + "step": 4643 + }, + { + "epoch": 0.489873417721519, + "grad_norm": 0.6269553303718567, + "learning_rate": 0.0007860132012126187, + "loss": 1.5829, + "step": 4644 + }, + { + "epoch": 0.4899789029535865, + "grad_norm": 0.6181042194366455, + "learning_rate": 0.0007857624291840672, + "loss": 1.6041, + "step": 4645 + }, + { + "epoch": 0.49008438818565403, + "grad_norm": 0.5812039971351624, + "learning_rate": 0.0007855116531481572, + "loss": 1.5647, + "step": 4646 + }, + { + "epoch": 0.4901898734177215, + "grad_norm": 0.580014169216156, + "learning_rate": 0.0007852608731329893, + "loss": 1.583, + "step": 4647 + }, + { + "epoch": 0.490295358649789, + "grad_norm": 0.5597314238548279, + "learning_rate": 0.0007850100891666648, + "loss": 1.5573, + "step": 4648 + }, + { + "epoch": 0.49040084388185656, + "grad_norm": 0.5908945798873901, + "learning_rate": 0.0007847593012772852, + "loss": 1.5452, + "step": 4649 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.545530378818512, + "learning_rate": 0.0007845085094929527, + "loss": 1.5742, + "step": 4650 + }, + { + "epoch": 0.49061181434599155, + "grad_norm": 0.6326039433479309, + "learning_rate": 0.0007842577138417695, + "loss": 1.5706, + "step": 4651 + }, + { + "epoch": 0.4907172995780591, + "grad_norm": 0.6024174690246582, + "learning_rate": 0.0007840069143518386, + "loss": 1.5575, + "step": 4652 + }, + { + "epoch": 0.4908227848101266, + "grad_norm": 0.6042734980583191, + "learning_rate": 0.0007837561110512635, + "loss": 1.5416, + "step": 4653 + }, + { + "epoch": 0.4909282700421941, + "grad_norm": 0.6035683751106262, + "learning_rate": 0.0007835053039681476, + "loss": 1.5847, + "step": 4654 + }, + { + "epoch": 0.49103375527426163, + "grad_norm": 0.585294246673584, + "learning_rate": 0.0007832544931305956, + "loss": 1.5467, + "step": 4655 + }, + { + "epoch": 0.4911392405063291, + "grad_norm": 0.5597073435783386, + "learning_rate": 0.0007830036785667116, + "loss": 1.5453, + "step": 4656 + }, + { + "epoch": 0.4912447257383966, + "grad_norm": 0.5712742209434509, + "learning_rate": 0.000782752860304601, + "loss": 1.5472, + "step": 4657 + }, + { + "epoch": 0.4913502109704641, + "grad_norm": 0.5804100632667542, + "learning_rate": 0.0007825020383723692, + "loss": 1.5594, + "step": 4658 + }, + { + "epoch": 0.49145569620253166, + "grad_norm": 0.5882404446601868, + "learning_rate": 0.0007822512127981218, + "loss": 1.5494, + "step": 4659 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.5628025531768799, + "learning_rate": 0.0007820003836099649, + "loss": 1.5682, + "step": 4660 + }, + { + "epoch": 0.49166666666666664, + "grad_norm": 0.5942679047584534, + "learning_rate": 0.0007817495508360057, + "loss": 1.5729, + "step": 4661 + }, + { + "epoch": 0.4917721518987342, + "grad_norm": 0.5850052833557129, + "learning_rate": 0.0007814987145043511, + "loss": 1.5326, + "step": 4662 + }, + { + "epoch": 0.4918776371308017, + "grad_norm": 0.574260413646698, + "learning_rate": 0.0007812478746431085, + "loss": 1.5491, + "step": 4663 + }, + { + "epoch": 0.4919831223628692, + "grad_norm": 0.5829454064369202, + "learning_rate": 0.0007809970312803855, + "loss": 1.5548, + "step": 4664 + }, + { + "epoch": 0.4920886075949367, + "grad_norm": 0.6749242544174194, + "learning_rate": 0.0007807461844442906, + "loss": 1.561, + "step": 4665 + }, + { + "epoch": 0.4921940928270042, + "grad_norm": 0.6365500092506409, + "learning_rate": 0.0007804953341629326, + "loss": 1.5546, + "step": 4666 + }, + { + "epoch": 0.4922995780590717, + "grad_norm": 0.6073534488677979, + "learning_rate": 0.0007802444804644202, + "loss": 1.5678, + "step": 4667 + }, + { + "epoch": 0.49240506329113926, + "grad_norm": 0.7078602313995361, + "learning_rate": 0.0007799936233768632, + "loss": 1.5614, + "step": 4668 + }, + { + "epoch": 0.49251054852320675, + "grad_norm": 0.5405545830726624, + "learning_rate": 0.0007797427629283708, + "loss": 1.5718, + "step": 4669 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.5661066174507141, + "learning_rate": 0.0007794918991470537, + "loss": 1.5447, + "step": 4670 + }, + { + "epoch": 0.4927215189873418, + "grad_norm": 0.6139727830886841, + "learning_rate": 0.0007792410320610222, + "loss": 1.5642, + "step": 4671 + }, + { + "epoch": 0.4928270042194093, + "grad_norm": 0.5821015238761902, + "learning_rate": 0.0007789901616983872, + "loss": 1.5272, + "step": 4672 + }, + { + "epoch": 0.4929324894514768, + "grad_norm": 0.5815353393554688, + "learning_rate": 0.0007787392880872601, + "loss": 1.5692, + "step": 4673 + }, + { + "epoch": 0.4930379746835443, + "grad_norm": 0.5996816158294678, + "learning_rate": 0.0007784884112557524, + "loss": 1.5964, + "step": 4674 + }, + { + "epoch": 0.4931434599156118, + "grad_norm": 0.5670466423034668, + "learning_rate": 0.0007782375312319761, + "loss": 1.5405, + "step": 4675 + }, + { + "epoch": 0.4932489451476793, + "grad_norm": 0.5735776424407959, + "learning_rate": 0.0007779866480440437, + "loss": 1.5347, + "step": 4676 + }, + { + "epoch": 0.49335443037974686, + "grad_norm": 0.6203917860984802, + "learning_rate": 0.0007777357617200679, + "loss": 1.5571, + "step": 4677 + }, + { + "epoch": 0.49345991561181435, + "grad_norm": 0.7225866913795471, + "learning_rate": 0.0007774848722881616, + "loss": 1.5494, + "step": 4678 + }, + { + "epoch": 0.49356540084388184, + "grad_norm": 0.6065405011177063, + "learning_rate": 0.0007772339797764385, + "loss": 1.5767, + "step": 4679 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.6363499760627747, + "learning_rate": 0.0007769830842130119, + "loss": 1.5302, + "step": 4680 + }, + { + "epoch": 0.4937763713080169, + "grad_norm": 0.5398289561271667, + "learning_rate": 0.0007767321856259963, + "loss": 1.5606, + "step": 4681 + }, + { + "epoch": 0.4938818565400844, + "grad_norm": 0.6051217317581177, + "learning_rate": 0.0007764812840435058, + "loss": 1.5386, + "step": 4682 + }, + { + "epoch": 0.4939873417721519, + "grad_norm": 0.6411893367767334, + "learning_rate": 0.0007762303794936556, + "loss": 1.5567, + "step": 4683 + }, + { + "epoch": 0.4940928270042194, + "grad_norm": 0.5854157209396362, + "learning_rate": 0.0007759794720045606, + "loss": 1.5167, + "step": 4684 + }, + { + "epoch": 0.4941983122362869, + "grad_norm": 0.624753475189209, + "learning_rate": 0.0007757285616043363, + "loss": 1.5167, + "step": 4685 + }, + { + "epoch": 0.49430379746835446, + "grad_norm": 0.6113045811653137, + "learning_rate": 0.0007754776483210981, + "loss": 1.5664, + "step": 4686 + }, + { + "epoch": 0.49440928270042195, + "grad_norm": 0.5555043816566467, + "learning_rate": 0.0007752267321829624, + "loss": 1.5897, + "step": 4687 + }, + { + "epoch": 0.49451476793248944, + "grad_norm": 0.6033937931060791, + "learning_rate": 0.0007749758132180459, + "loss": 1.5473, + "step": 4688 + }, + { + "epoch": 0.494620253164557, + "grad_norm": 0.5687035918235779, + "learning_rate": 0.0007747248914544646, + "loss": 1.5421, + "step": 4689 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.6324567198753357, + "learning_rate": 0.0007744739669203361, + "loss": 1.599, + "step": 4690 + }, + { + "epoch": 0.494831223628692, + "grad_norm": 0.5756410360336304, + "learning_rate": 0.0007742230396437775, + "loss": 1.5907, + "step": 4691 + }, + { + "epoch": 0.49493670886075947, + "grad_norm": 0.6168112754821777, + "learning_rate": 0.0007739721096529066, + "loss": 1.6036, + "step": 4692 + }, + { + "epoch": 0.495042194092827, + "grad_norm": 0.641732931137085, + "learning_rate": 0.0007737211769758412, + "loss": 1.5606, + "step": 4693 + }, + { + "epoch": 0.4951476793248945, + "grad_norm": 0.56248539686203, + "learning_rate": 0.0007734702416406997, + "loss": 1.5746, + "step": 4694 + }, + { + "epoch": 0.495253164556962, + "grad_norm": 0.6723158955574036, + "learning_rate": 0.0007732193036756006, + "loss": 1.5497, + "step": 4695 + }, + { + "epoch": 0.49535864978902955, + "grad_norm": 0.6792930960655212, + "learning_rate": 0.0007729683631086627, + "loss": 1.5798, + "step": 4696 + }, + { + "epoch": 0.49546413502109704, + "grad_norm": 0.6170209050178528, + "learning_rate": 0.0007727174199680051, + "loss": 1.5519, + "step": 4697 + }, + { + "epoch": 0.49556962025316453, + "grad_norm": 0.7106612324714661, + "learning_rate": 0.0007724664742817475, + "loss": 1.5309, + "step": 4698 + }, + { + "epoch": 0.4956751054852321, + "grad_norm": 0.6593798995018005, + "learning_rate": 0.0007722155260780093, + "loss": 1.5691, + "step": 4699 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.6572960019111633, + "learning_rate": 0.0007719645753849108, + "loss": 1.5457, + "step": 4700 + }, + { + "epoch": 0.49588607594936707, + "grad_norm": 0.6671776175498962, + "learning_rate": 0.0007717136222305718, + "loss": 1.5665, + "step": 4701 + }, + { + "epoch": 0.4959915611814346, + "grad_norm": 0.7370084524154663, + "learning_rate": 0.0007714626666431134, + "loss": 1.5818, + "step": 4702 + }, + { + "epoch": 0.4960970464135021, + "grad_norm": 0.7252852320671082, + "learning_rate": 0.000771211708650656, + "loss": 1.5327, + "step": 4703 + }, + { + "epoch": 0.4962025316455696, + "grad_norm": 0.6009440422058105, + "learning_rate": 0.000770960748281321, + "loss": 1.5694, + "step": 4704 + }, + { + "epoch": 0.49630801687763715, + "grad_norm": 0.6308863162994385, + "learning_rate": 0.0007707097855632297, + "loss": 1.544, + "step": 4705 + }, + { + "epoch": 0.49641350210970464, + "grad_norm": 0.6758759617805481, + "learning_rate": 0.0007704588205245034, + "loss": 1.5456, + "step": 4706 + }, + { + "epoch": 0.49651898734177213, + "grad_norm": 0.6390590071678162, + "learning_rate": 0.0007702078531932645, + "loss": 1.6169, + "step": 4707 + }, + { + "epoch": 0.4966244725738397, + "grad_norm": 0.5914226770401001, + "learning_rate": 0.0007699568835976348, + "loss": 1.5819, + "step": 4708 + }, + { + "epoch": 0.4967299578059072, + "grad_norm": 0.6530932784080505, + "learning_rate": 0.0007697059117657368, + "loss": 1.5457, + "step": 4709 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.5624111890792847, + "learning_rate": 0.0007694549377256932, + "loss": 1.5694, + "step": 4710 + }, + { + "epoch": 0.4969409282700422, + "grad_norm": 0.651448130607605, + "learning_rate": 0.0007692039615056264, + "loss": 1.5769, + "step": 4711 + }, + { + "epoch": 0.4970464135021097, + "grad_norm": 0.6764376759529114, + "learning_rate": 0.0007689529831336604, + "loss": 1.5524, + "step": 4712 + }, + { + "epoch": 0.4971518987341772, + "grad_norm": 0.5818295478820801, + "learning_rate": 0.0007687020026379181, + "loss": 1.5382, + "step": 4713 + }, + { + "epoch": 0.49725738396624475, + "grad_norm": 0.6537736058235168, + "learning_rate": 0.0007684510200465231, + "loss": 1.573, + "step": 4714 + }, + { + "epoch": 0.49736286919831224, + "grad_norm": 0.6120522022247314, + "learning_rate": 0.0007682000353875992, + "loss": 1.5537, + "step": 4715 + }, + { + "epoch": 0.49746835443037973, + "grad_norm": 0.573085606098175, + "learning_rate": 0.0007679490486892705, + "loss": 1.5434, + "step": 4716 + }, + { + "epoch": 0.4975738396624473, + "grad_norm": 0.7000346183776855, + "learning_rate": 0.0007676980599796616, + "loss": 1.5537, + "step": 4717 + }, + { + "epoch": 0.4976793248945148, + "grad_norm": 0.5856572985649109, + "learning_rate": 0.0007674470692868967, + "loss": 1.599, + "step": 4718 + }, + { + "epoch": 0.49778481012658227, + "grad_norm": 0.7478517889976501, + "learning_rate": 0.0007671960766391008, + "loss": 1.5683, + "step": 4719 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.6399938464164734, + "learning_rate": 0.0007669450820643987, + "loss": 1.5307, + "step": 4720 + }, + { + "epoch": 0.4979957805907173, + "grad_norm": 0.6596859097480774, + "learning_rate": 0.0007666940855909155, + "loss": 1.5567, + "step": 4721 + }, + { + "epoch": 0.4981012658227848, + "grad_norm": 0.6584997177124023, + "learning_rate": 0.000766443087246777, + "loss": 1.5683, + "step": 4722 + }, + { + "epoch": 0.49820675105485235, + "grad_norm": 0.7731082439422607, + "learning_rate": 0.0007661920870601085, + "loss": 1.569, + "step": 4723 + }, + { + "epoch": 0.49831223628691984, + "grad_norm": 0.7252548933029175, + "learning_rate": 0.000765941085059036, + "loss": 1.5725, + "step": 4724 + }, + { + "epoch": 0.49841772151898733, + "grad_norm": 0.682181715965271, + "learning_rate": 0.0007656900812716853, + "loss": 1.5563, + "step": 4725 + }, + { + "epoch": 0.4985232067510548, + "grad_norm": 0.7464695572853088, + "learning_rate": 0.0007654390757261827, + "loss": 1.5552, + "step": 4726 + }, + { + "epoch": 0.4986286919831224, + "grad_norm": 0.7472259402275085, + "learning_rate": 0.0007651880684506548, + "loss": 1.5283, + "step": 4727 + }, + { + "epoch": 0.49873417721518987, + "grad_norm": 0.6048937439918518, + "learning_rate": 0.0007649370594732282, + "loss": 1.5811, + "step": 4728 + }, + { + "epoch": 0.49883966244725736, + "grad_norm": 0.591225802898407, + "learning_rate": 0.0007646860488220293, + "loss": 1.5382, + "step": 4729 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.5370999574661255, + "learning_rate": 0.0007644350365251855, + "loss": 1.5427, + "step": 4730 + }, + { + "epoch": 0.4990506329113924, + "grad_norm": 0.6098072528839111, + "learning_rate": 0.0007641840226108241, + "loss": 1.5734, + "step": 4731 + }, + { + "epoch": 0.4991561181434599, + "grad_norm": 0.6091346740722656, + "learning_rate": 0.000763933007107072, + "loss": 1.5403, + "step": 4732 + }, + { + "epoch": 0.49926160337552744, + "grad_norm": 0.5383923053741455, + "learning_rate": 0.0007636819900420572, + "loss": 1.5775, + "step": 4733 + }, + { + "epoch": 0.49936708860759493, + "grad_norm": 0.643825113773346, + "learning_rate": 0.0007634309714439069, + "loss": 1.5658, + "step": 4734 + }, + { + "epoch": 0.4994725738396624, + "grad_norm": 0.5676314234733582, + "learning_rate": 0.0007631799513407495, + "loss": 1.5813, + "step": 4735 + }, + { + "epoch": 0.49957805907173, + "grad_norm": 0.5740551948547363, + "learning_rate": 0.0007629289297607127, + "loss": 1.5619, + "step": 4736 + }, + { + "epoch": 0.49968354430379747, + "grad_norm": 0.6515213847160339, + "learning_rate": 0.0007626779067319251, + "loss": 1.5344, + "step": 4737 + }, + { + "epoch": 0.49978902953586496, + "grad_norm": 0.5891249179840088, + "learning_rate": 0.0007624268822825145, + "loss": 1.5102, + "step": 4738 + }, + { + "epoch": 0.4998945147679325, + "grad_norm": 0.6449539065361023, + "learning_rate": 0.00076217585644061, + "loss": 1.5491, + "step": 4739 + }, + { + "epoch": 0.5, + "grad_norm": 0.6532394289970398, + "learning_rate": 0.0007619248292343399, + "loss": 1.5619, + "step": 4740 + }, + { + "epoch": 0.5001054852320675, + "grad_norm": 0.7767799496650696, + "learning_rate": 0.0007616738006918334, + "loss": 1.5939, + "step": 4741 + }, + { + "epoch": 0.500210970464135, + "grad_norm": 0.6775404810905457, + "learning_rate": 0.0007614227708412191, + "loss": 1.5479, + "step": 4742 + }, + { + "epoch": 0.5003164556962025, + "grad_norm": 0.6595564484596252, + "learning_rate": 0.0007611717397106265, + "loss": 1.5593, + "step": 4743 + }, + { + "epoch": 0.5004219409282701, + "grad_norm": 0.658949613571167, + "learning_rate": 0.0007609207073281848, + "loss": 1.5993, + "step": 4744 + }, + { + "epoch": 0.5005274261603375, + "grad_norm": 0.6306632161140442, + "learning_rate": 0.0007606696737220233, + "loss": 1.5486, + "step": 4745 + }, + { + "epoch": 0.5006329113924051, + "grad_norm": 0.5985409021377563, + "learning_rate": 0.000760418638920272, + "loss": 1.5832, + "step": 4746 + }, + { + "epoch": 0.5007383966244726, + "grad_norm": 0.6125796437263489, + "learning_rate": 0.0007601676029510597, + "loss": 1.5815, + "step": 4747 + }, + { + "epoch": 0.50084388185654, + "grad_norm": 0.6137559413909912, + "learning_rate": 0.000759916565842517, + "loss": 1.5895, + "step": 4748 + }, + { + "epoch": 0.5009493670886076, + "grad_norm": 0.6673507690429688, + "learning_rate": 0.0007596655276227739, + "loss": 1.5861, + "step": 4749 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.7437393665313721, + "learning_rate": 0.0007594144883199599, + "loss": 1.5635, + "step": 4750 + }, + { + "epoch": 0.5011603375527426, + "grad_norm": 0.6684021353721619, + "learning_rate": 0.0007591634479622056, + "loss": 1.5862, + "step": 4751 + }, + { + "epoch": 0.5012658227848101, + "grad_norm": 0.639117419719696, + "learning_rate": 0.0007589124065776414, + "loss": 1.5562, + "step": 4752 + }, + { + "epoch": 0.5013713080168777, + "grad_norm": 0.6525042653083801, + "learning_rate": 0.0007586613641943976, + "loss": 1.5783, + "step": 4753 + }, + { + "epoch": 0.5014767932489451, + "grad_norm": 0.6210924983024597, + "learning_rate": 0.0007584103208406048, + "loss": 1.5584, + "step": 4754 + }, + { + "epoch": 0.5015822784810127, + "grad_norm": 0.6638045310974121, + "learning_rate": 0.0007581592765443933, + "loss": 1.5453, + "step": 4755 + }, + { + "epoch": 0.5016877637130802, + "grad_norm": 0.5965690016746521, + "learning_rate": 0.0007579082313338943, + "loss": 1.52, + "step": 4756 + }, + { + "epoch": 0.5017932489451477, + "grad_norm": 0.5813931226730347, + "learning_rate": 0.0007576571852372386, + "loss": 1.5719, + "step": 4757 + }, + { + "epoch": 0.5018987341772152, + "grad_norm": 0.6824973821640015, + "learning_rate": 0.0007574061382825572, + "loss": 1.5428, + "step": 4758 + }, + { + "epoch": 0.5020042194092827, + "grad_norm": 0.5391411185264587, + "learning_rate": 0.0007571550904979812, + "loss": 1.5458, + "step": 4759 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.6740393042564392, + "learning_rate": 0.0007569040419116413, + "loss": 1.5623, + "step": 4760 + }, + { + "epoch": 0.5022151898734177, + "grad_norm": 0.5855896472930908, + "learning_rate": 0.0007566529925516692, + "loss": 1.5643, + "step": 4761 + }, + { + "epoch": 0.5023206751054853, + "grad_norm": 0.681473970413208, + "learning_rate": 0.0007564019424461962, + "loss": 1.5849, + "step": 4762 + }, + { + "epoch": 0.5024261603375527, + "grad_norm": 0.5686348676681519, + "learning_rate": 0.0007561508916233535, + "loss": 1.5465, + "step": 4763 + }, + { + "epoch": 0.5025316455696203, + "grad_norm": 0.5829660892486572, + "learning_rate": 0.0007558998401112727, + "loss": 1.5603, + "step": 4764 + }, + { + "epoch": 0.5026371308016878, + "grad_norm": 0.6121845841407776, + "learning_rate": 0.0007556487879380856, + "loss": 1.5407, + "step": 4765 + }, + { + "epoch": 0.5027426160337553, + "grad_norm": 0.6247478723526001, + "learning_rate": 0.0007553977351319235, + "loss": 1.5236, + "step": 4766 + }, + { + "epoch": 0.5028481012658228, + "grad_norm": 0.6021741032600403, + "learning_rate": 0.0007551466817209183, + "loss": 1.5716, + "step": 4767 + }, + { + "epoch": 0.5029535864978903, + "grad_norm": 0.7822777628898621, + "learning_rate": 0.0007548956277332016, + "loss": 1.5412, + "step": 4768 + }, + { + "epoch": 0.5030590717299578, + "grad_norm": 0.6046723127365112, + "learning_rate": 0.0007546445731969056, + "loss": 1.5963, + "step": 4769 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.6465094089508057, + "learning_rate": 0.000754393518140162, + "loss": 1.5648, + "step": 4770 + }, + { + "epoch": 0.5032700421940929, + "grad_norm": 0.6657158136367798, + "learning_rate": 0.0007541424625911026, + "loss": 1.543, + "step": 4771 + }, + { + "epoch": 0.5033755274261603, + "grad_norm": 0.5677445530891418, + "learning_rate": 0.0007538914065778598, + "loss": 1.6264, + "step": 4772 + }, + { + "epoch": 0.5034810126582279, + "grad_norm": 0.5552957653999329, + "learning_rate": 0.0007536403501285653, + "loss": 1.5636, + "step": 4773 + }, + { + "epoch": 0.5035864978902953, + "grad_norm": 0.6761462688446045, + "learning_rate": 0.0007533892932713517, + "loss": 1.5398, + "step": 4774 + }, + { + "epoch": 0.5036919831223629, + "grad_norm": 0.6058440208435059, + "learning_rate": 0.0007531382360343507, + "loss": 1.5913, + "step": 4775 + }, + { + "epoch": 0.5037974683544304, + "grad_norm": 0.6996517777442932, + "learning_rate": 0.0007528871784456948, + "loss": 1.5761, + "step": 4776 + }, + { + "epoch": 0.5039029535864978, + "grad_norm": 0.6511644721031189, + "learning_rate": 0.0007526361205335159, + "loss": 1.5432, + "step": 4777 + }, + { + "epoch": 0.5040084388185654, + "grad_norm": 0.613312840461731, + "learning_rate": 0.0007523850623259469, + "loss": 1.5925, + "step": 4778 + }, + { + "epoch": 0.5041139240506329, + "grad_norm": 0.7003174424171448, + "learning_rate": 0.0007521340038511196, + "loss": 1.5479, + "step": 4779 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.6523002982139587, + "learning_rate": 0.0007518829451371665, + "loss": 1.544, + "step": 4780 + }, + { + "epoch": 0.5043248945147679, + "grad_norm": 0.6429961919784546, + "learning_rate": 0.0007516318862122199, + "loss": 1.5285, + "step": 4781 + }, + { + "epoch": 0.5044303797468355, + "grad_norm": 0.6935949921607971, + "learning_rate": 0.0007513808271044125, + "loss": 1.5862, + "step": 4782 + }, + { + "epoch": 0.5045358649789029, + "grad_norm": 0.5542629957199097, + "learning_rate": 0.0007511297678418766, + "loss": 1.5676, + "step": 4783 + }, + { + "epoch": 0.5046413502109705, + "grad_norm": 0.5985938310623169, + "learning_rate": 0.0007508787084527445, + "loss": 1.5423, + "step": 4784 + }, + { + "epoch": 0.504746835443038, + "grad_norm": 0.6609524488449097, + "learning_rate": 0.0007506276489651489, + "loss": 1.5667, + "step": 4785 + }, + { + "epoch": 0.5048523206751054, + "grad_norm": 0.5551084280014038, + "learning_rate": 0.0007503765894072217, + "loss": 1.5421, + "step": 4786 + }, + { + "epoch": 0.504957805907173, + "grad_norm": 0.6941160559654236, + "learning_rate": 0.000750125529807096, + "loss": 1.538, + "step": 4787 + }, + { + "epoch": 0.5050632911392405, + "grad_norm": 0.6196778416633606, + "learning_rate": 0.0007498744701929041, + "loss": 1.542, + "step": 4788 + }, + { + "epoch": 0.505168776371308, + "grad_norm": 0.6853079199790955, + "learning_rate": 0.0007496234105927785, + "loss": 1.5102, + "step": 4789 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.6960750818252563, + "learning_rate": 0.0007493723510348516, + "loss": 1.5925, + "step": 4790 + }, + { + "epoch": 0.5053797468354431, + "grad_norm": 0.6323269605636597, + "learning_rate": 0.0007491212915472557, + "loss": 1.587, + "step": 4791 + }, + { + "epoch": 0.5054852320675105, + "grad_norm": 0.8837481141090393, + "learning_rate": 0.0007488702321581234, + "loss": 1.5205, + "step": 4792 + }, + { + "epoch": 0.505590717299578, + "grad_norm": 0.6380484700202942, + "learning_rate": 0.0007486191728955873, + "loss": 1.5771, + "step": 4793 + }, + { + "epoch": 0.5056962025316456, + "grad_norm": 0.6929970383644104, + "learning_rate": 0.00074836811378778, + "loss": 1.6176, + "step": 4794 + }, + { + "epoch": 0.505801687763713, + "grad_norm": 0.7546747326850891, + "learning_rate": 0.0007481170548628335, + "loss": 1.5519, + "step": 4795 + }, + { + "epoch": 0.5059071729957806, + "grad_norm": 0.6387698650360107, + "learning_rate": 0.0007478659961488805, + "loss": 1.5428, + "step": 4796 + }, + { + "epoch": 0.5060126582278481, + "grad_norm": 0.7152124643325806, + "learning_rate": 0.0007476149376740533, + "loss": 1.5502, + "step": 4797 + }, + { + "epoch": 0.5061181434599156, + "grad_norm": 0.6230798363685608, + "learning_rate": 0.0007473638794664841, + "loss": 1.5091, + "step": 4798 + }, + { + "epoch": 0.5062236286919831, + "grad_norm": 0.8059837222099304, + "learning_rate": 0.0007471128215543056, + "loss": 1.5523, + "step": 4799 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.6262694597244263, + "learning_rate": 0.0007468617639656496, + "loss": 1.5326, + "step": 4800 + }, + { + "epoch": 0.5064345991561181, + "grad_norm": 0.7508646845817566, + "learning_rate": 0.0007466107067286483, + "loss": 1.5385, + "step": 4801 + }, + { + "epoch": 0.5065400843881857, + "grad_norm": 0.5980948209762573, + "learning_rate": 0.0007463596498714346, + "loss": 1.5596, + "step": 4802 + }, + { + "epoch": 0.5066455696202532, + "grad_norm": 0.6347416043281555, + "learning_rate": 0.0007461085934221402, + "loss": 1.5446, + "step": 4803 + }, + { + "epoch": 0.5067510548523206, + "grad_norm": 0.7432201504707336, + "learning_rate": 0.0007458575374088974, + "loss": 1.536, + "step": 4804 + }, + { + "epoch": 0.5068565400843882, + "grad_norm": 0.6142000555992126, + "learning_rate": 0.0007456064818598382, + "loss": 1.5446, + "step": 4805 + }, + { + "epoch": 0.5069620253164557, + "grad_norm": 0.772668182849884, + "learning_rate": 0.0007453554268030946, + "loss": 1.5595, + "step": 4806 + }, + { + "epoch": 0.5070675105485232, + "grad_norm": 0.5850962400436401, + "learning_rate": 0.0007451043722667985, + "loss": 1.5857, + "step": 4807 + }, + { + "epoch": 0.5071729957805907, + "grad_norm": 0.7524467706680298, + "learning_rate": 0.000744853318279082, + "loss": 1.5593, + "step": 4808 + }, + { + "epoch": 0.5072784810126583, + "grad_norm": 0.631938636302948, + "learning_rate": 0.0007446022648680768, + "loss": 1.5647, + "step": 4809 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.6362059116363525, + "learning_rate": 0.0007443512120619144, + "loss": 1.5512, + "step": 4810 + }, + { + "epoch": 0.5074894514767933, + "grad_norm": 0.6835891604423523, + "learning_rate": 0.0007441001598887273, + "loss": 1.5588, + "step": 4811 + }, + { + "epoch": 0.5075949367088608, + "grad_norm": 0.5670980215072632, + "learning_rate": 0.0007438491083766465, + "loss": 1.5553, + "step": 4812 + }, + { + "epoch": 0.5077004219409282, + "grad_norm": 0.6187942028045654, + "learning_rate": 0.000743598057553804, + "loss": 1.6036, + "step": 4813 + }, + { + "epoch": 0.5078059071729958, + "grad_norm": 0.6124381422996521, + "learning_rate": 0.0007433470074483309, + "loss": 1.5885, + "step": 4814 + }, + { + "epoch": 0.5079113924050633, + "grad_norm": 0.5748618245124817, + "learning_rate": 0.0007430959580883589, + "loss": 1.5688, + "step": 4815 + }, + { + "epoch": 0.5080168776371308, + "grad_norm": 0.631779134273529, + "learning_rate": 0.0007428449095020192, + "loss": 1.5389, + "step": 4816 + }, + { + "epoch": 0.5081223628691983, + "grad_norm": 0.6257965564727783, + "learning_rate": 0.000742593861717443, + "loss": 1.5599, + "step": 4817 + }, + { + "epoch": 0.5082278481012659, + "grad_norm": 0.6423956155776978, + "learning_rate": 0.0007423428147627613, + "loss": 1.6153, + "step": 4818 + }, + { + "epoch": 0.5083333333333333, + "grad_norm": 0.7300454378128052, + "learning_rate": 0.0007420917686661055, + "loss": 1.5926, + "step": 4819 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.6331930160522461, + "learning_rate": 0.0007418407234556067, + "loss": 1.5435, + "step": 4820 + }, + { + "epoch": 0.5085443037974684, + "grad_norm": 0.6901171207427979, + "learning_rate": 0.0007415896791593955, + "loss": 1.5813, + "step": 4821 + }, + { + "epoch": 0.5086497890295358, + "grad_norm": 0.6608433723449707, + "learning_rate": 0.0007413386358056025, + "loss": 1.5473, + "step": 4822 + }, + { + "epoch": 0.5087552742616034, + "grad_norm": 0.6512033343315125, + "learning_rate": 0.0007410875934223588, + "loss": 1.5702, + "step": 4823 + }, + { + "epoch": 0.5088607594936709, + "grad_norm": 0.8108649849891663, + "learning_rate": 0.0007408365520377945, + "loss": 1.5758, + "step": 4824 + }, + { + "epoch": 0.5089662447257384, + "grad_norm": 0.6593107581138611, + "learning_rate": 0.0007405855116800403, + "loss": 1.5608, + "step": 4825 + }, + { + "epoch": 0.5090717299578059, + "grad_norm": 0.7654700875282288, + "learning_rate": 0.0007403344723772265, + "loss": 1.5194, + "step": 4826 + }, + { + "epoch": 0.5091772151898735, + "grad_norm": 0.8046720027923584, + "learning_rate": 0.0007400834341574829, + "loss": 1.537, + "step": 4827 + }, + { + "epoch": 0.5092827004219409, + "grad_norm": 0.7406296730041504, + "learning_rate": 0.0007398323970489402, + "loss": 1.5661, + "step": 4828 + }, + { + "epoch": 0.5093881856540085, + "grad_norm": 0.7102281451225281, + "learning_rate": 0.0007395813610797283, + "loss": 1.5451, + "step": 4829 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.7215830087661743, + "learning_rate": 0.0007393303262779767, + "loss": 1.5555, + "step": 4830 + }, + { + "epoch": 0.5095991561181434, + "grad_norm": 0.7242838740348816, + "learning_rate": 0.0007390792926718153, + "loss": 1.5593, + "step": 4831 + }, + { + "epoch": 0.509704641350211, + "grad_norm": 0.6557716131210327, + "learning_rate": 0.0007388282602893737, + "loss": 1.574, + "step": 4832 + }, + { + "epoch": 0.5098101265822785, + "grad_norm": 0.6202520132064819, + "learning_rate": 0.000738577229158781, + "loss": 1.52, + "step": 4833 + }, + { + "epoch": 0.509915611814346, + "grad_norm": 0.6598552465438843, + "learning_rate": 0.000738326199308167, + "loss": 1.5383, + "step": 4834 + }, + { + "epoch": 0.5100210970464135, + "grad_norm": 0.6621387004852295, + "learning_rate": 0.0007380751707656603, + "loss": 1.5569, + "step": 4835 + }, + { + "epoch": 0.5101265822784811, + "grad_norm": 0.6309072971343994, + "learning_rate": 0.0007378241435593901, + "loss": 1.5171, + "step": 4836 + }, + { + "epoch": 0.5102320675105485, + "grad_norm": 0.6116818785667419, + "learning_rate": 0.0007375731177174855, + "loss": 1.5284, + "step": 4837 + }, + { + "epoch": 0.510337552742616, + "grad_norm": 0.6441879868507385, + "learning_rate": 0.0007373220932680751, + "loss": 1.5711, + "step": 4838 + }, + { + "epoch": 0.5104430379746835, + "grad_norm": 0.6521386504173279, + "learning_rate": 0.0007370710702392873, + "loss": 1.5443, + "step": 4839 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.6133905649185181, + "learning_rate": 0.0007368200486592507, + "loss": 1.5972, + "step": 4840 + }, + { + "epoch": 0.5106540084388186, + "grad_norm": 0.6774051785469055, + "learning_rate": 0.0007365690285560932, + "loss": 1.5564, + "step": 4841 + }, + { + "epoch": 0.510759493670886, + "grad_norm": 0.6530702114105225, + "learning_rate": 0.0007363180099579431, + "loss": 1.5682, + "step": 4842 + }, + { + "epoch": 0.5108649789029536, + "grad_norm": 0.67586350440979, + "learning_rate": 0.0007360669928929282, + "loss": 1.5668, + "step": 4843 + }, + { + "epoch": 0.5109704641350211, + "grad_norm": 0.5757574439048767, + "learning_rate": 0.000735815977389176, + "loss": 1.5595, + "step": 4844 + }, + { + "epoch": 0.5110759493670886, + "grad_norm": 0.6988800168037415, + "learning_rate": 0.0007355649634748143, + "loss": 1.5593, + "step": 4845 + }, + { + "epoch": 0.5111814345991561, + "grad_norm": 0.6088206768035889, + "learning_rate": 0.0007353139511779707, + "loss": 1.5825, + "step": 4846 + }, + { + "epoch": 0.5112869198312237, + "grad_norm": 0.6209681630134583, + "learning_rate": 0.000735062940526772, + "loss": 1.5535, + "step": 4847 + }, + { + "epoch": 0.5113924050632911, + "grad_norm": 0.6518624424934387, + "learning_rate": 0.0007348119315493453, + "loss": 1.5744, + "step": 4848 + }, + { + "epoch": 0.5114978902953586, + "grad_norm": 0.66935795545578, + "learning_rate": 0.0007345609242738173, + "loss": 1.5607, + "step": 4849 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.6003526449203491, + "learning_rate": 0.0007343099187283149, + "loss": 1.5834, + "step": 4850 + }, + { + "epoch": 0.5117088607594936, + "grad_norm": 0.7175798416137695, + "learning_rate": 0.0007340589149409644, + "loss": 1.5982, + "step": 4851 + }, + { + "epoch": 0.5118143459915612, + "grad_norm": 0.56234210729599, + "learning_rate": 0.0007338079129398917, + "loss": 1.5921, + "step": 4852 + }, + { + "epoch": 0.5119198312236287, + "grad_norm": 0.713137149810791, + "learning_rate": 0.0007335569127532231, + "loss": 1.529, + "step": 4853 + }, + { + "epoch": 0.5120253164556962, + "grad_norm": 0.6360917687416077, + "learning_rate": 0.0007333059144090845, + "loss": 1.5939, + "step": 4854 + }, + { + "epoch": 0.5121308016877637, + "grad_norm": 0.6593864560127258, + "learning_rate": 0.0007330549179356014, + "loss": 1.5401, + "step": 4855 + }, + { + "epoch": 0.5122362869198313, + "grad_norm": 0.6819075345993042, + "learning_rate": 0.0007328039233608993, + "loss": 1.5442, + "step": 4856 + }, + { + "epoch": 0.5123417721518987, + "grad_norm": 0.60283362865448, + "learning_rate": 0.0007325529307131034, + "loss": 1.5425, + "step": 4857 + }, + { + "epoch": 0.5124472573839662, + "grad_norm": 0.6813228130340576, + "learning_rate": 0.0007323019400203386, + "loss": 1.5246, + "step": 4858 + }, + { + "epoch": 0.5125527426160338, + "grad_norm": 0.6060045957565308, + "learning_rate": 0.0007320509513107296, + "loss": 1.5117, + "step": 4859 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.6135107278823853, + "learning_rate": 0.0007317999646124011, + "loss": 1.547, + "step": 4860 + }, + { + "epoch": 0.5127637130801688, + "grad_norm": 0.6675465106964111, + "learning_rate": 0.0007315489799534772, + "loss": 1.5402, + "step": 4861 + }, + { + "epoch": 0.5128691983122363, + "grad_norm": 0.5976054072380066, + "learning_rate": 0.000731297997362082, + "loss": 1.5348, + "step": 4862 + }, + { + "epoch": 0.5129746835443038, + "grad_norm": 0.7067854404449463, + "learning_rate": 0.0007310470168663397, + "loss": 1.5642, + "step": 4863 + }, + { + "epoch": 0.5130801687763713, + "grad_norm": 0.6285279393196106, + "learning_rate": 0.0007307960384943736, + "loss": 1.5736, + "step": 4864 + }, + { + "epoch": 0.5131856540084389, + "grad_norm": 0.6748374104499817, + "learning_rate": 0.000730545062274307, + "loss": 1.5237, + "step": 4865 + }, + { + "epoch": 0.5132911392405063, + "grad_norm": 0.6186881065368652, + "learning_rate": 0.0007302940882342634, + "loss": 1.5464, + "step": 4866 + }, + { + "epoch": 0.5133966244725738, + "grad_norm": 0.6795584559440613, + "learning_rate": 0.0007300431164023653, + "loss": 1.5114, + "step": 4867 + }, + { + "epoch": 0.5135021097046414, + "grad_norm": 0.554123044013977, + "learning_rate": 0.0007297921468067357, + "loss": 1.5479, + "step": 4868 + }, + { + "epoch": 0.5136075949367088, + "grad_norm": 0.6777911186218262, + "learning_rate": 0.0007295411794754967, + "loss": 1.5457, + "step": 4869 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.608818769454956, + "learning_rate": 0.0007292902144367704, + "loss": 1.5486, + "step": 4870 + }, + { + "epoch": 0.5138185654008439, + "grad_norm": 0.6240236759185791, + "learning_rate": 0.0007290392517186791, + "loss": 1.576, + "step": 4871 + }, + { + "epoch": 0.5139240506329114, + "grad_norm": 0.612262487411499, + "learning_rate": 0.000728788291349344, + "loss": 1.5412, + "step": 4872 + }, + { + "epoch": 0.5140295358649789, + "grad_norm": 0.8245949745178223, + "learning_rate": 0.0007285373333568868, + "loss": 1.6047, + "step": 4873 + }, + { + "epoch": 0.5141350210970465, + "grad_norm": 0.9572369456291199, + "learning_rate": 0.0007282863777694283, + "loss": 1.5662, + "step": 4874 + }, + { + "epoch": 0.5142405063291139, + "grad_norm": 0.6283900141716003, + "learning_rate": 0.0007280354246150894, + "loss": 1.5223, + "step": 4875 + }, + { + "epoch": 0.5143459915611814, + "grad_norm": 0.8337814807891846, + "learning_rate": 0.0007277844739219908, + "loss": 1.5745, + "step": 4876 + }, + { + "epoch": 0.514451476793249, + "grad_norm": 0.6176243424415588, + "learning_rate": 0.0007275335257182526, + "loss": 1.5613, + "step": 4877 + }, + { + "epoch": 0.5145569620253164, + "grad_norm": 0.8174070715904236, + "learning_rate": 0.000727282580031995, + "loss": 1.591, + "step": 4878 + }, + { + "epoch": 0.514662447257384, + "grad_norm": 0.7793298363685608, + "learning_rate": 0.0007270316368913374, + "loss": 1.5877, + "step": 4879 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.5895313024520874, + "learning_rate": 0.0007267806963243995, + "loss": 1.5296, + "step": 4880 + }, + { + "epoch": 0.514873417721519, + "grad_norm": 0.8111134767532349, + "learning_rate": 0.0007265297583593003, + "loss": 1.5733, + "step": 4881 + }, + { + "epoch": 0.5149789029535865, + "grad_norm": 0.567281186580658, + "learning_rate": 0.0007262788230241588, + "loss": 1.5533, + "step": 4882 + }, + { + "epoch": 0.515084388185654, + "grad_norm": 0.7373409867286682, + "learning_rate": 0.0007260278903470935, + "loss": 1.5718, + "step": 4883 + }, + { + "epoch": 0.5151898734177215, + "grad_norm": 0.5973716378211975, + "learning_rate": 0.0007257769603562227, + "loss": 1.5624, + "step": 4884 + }, + { + "epoch": 0.515295358649789, + "grad_norm": 0.7496381998062134, + "learning_rate": 0.0007255260330796639, + "loss": 1.531, + "step": 4885 + }, + { + "epoch": 0.5154008438818566, + "grad_norm": 0.6137037873268127, + "learning_rate": 0.0007252751085455355, + "loss": 1.576, + "step": 4886 + }, + { + "epoch": 0.515506329113924, + "grad_norm": 0.669029951095581, + "learning_rate": 0.0007250241867819544, + "loss": 1.5482, + "step": 4887 + }, + { + "epoch": 0.5156118143459916, + "grad_norm": 0.637636661529541, + "learning_rate": 0.0007247732678170375, + "loss": 1.5904, + "step": 4888 + }, + { + "epoch": 0.5157172995780591, + "grad_norm": 0.6506415009498596, + "learning_rate": 0.0007245223516789019, + "loss": 1.5299, + "step": 4889 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.5869790315628052, + "learning_rate": 0.0007242714383956639, + "loss": 1.5297, + "step": 4890 + }, + { + "epoch": 0.5159282700421941, + "grad_norm": 0.7154970765113831, + "learning_rate": 0.0007240205279954395, + "loss": 1.52, + "step": 4891 + }, + { + "epoch": 0.5160337552742617, + "grad_norm": 0.6481339931488037, + "learning_rate": 0.0007237696205063444, + "loss": 1.5867, + "step": 4892 + }, + { + "epoch": 0.5161392405063291, + "grad_norm": 0.7020748257637024, + "learning_rate": 0.0007235187159564942, + "loss": 1.5584, + "step": 4893 + }, + { + "epoch": 0.5162447257383966, + "grad_norm": 0.776710033416748, + "learning_rate": 0.0007232678143740038, + "loss": 1.5727, + "step": 4894 + }, + { + "epoch": 0.5163502109704642, + "grad_norm": 0.5705628395080566, + "learning_rate": 0.0007230169157869882, + "loss": 1.5352, + "step": 4895 + }, + { + "epoch": 0.5164556962025316, + "grad_norm": 0.7594408392906189, + "learning_rate": 0.0007227660202235616, + "loss": 1.5967, + "step": 4896 + }, + { + "epoch": 0.5165611814345992, + "grad_norm": 0.6397126317024231, + "learning_rate": 0.0007225151277118384, + "loss": 1.5554, + "step": 4897 + }, + { + "epoch": 0.5166666666666667, + "grad_norm": 0.6529342532157898, + "learning_rate": 0.0007222642382799322, + "loss": 1.5809, + "step": 4898 + }, + { + "epoch": 0.5167721518987342, + "grad_norm": 0.5641254782676697, + "learning_rate": 0.0007220133519559563, + "loss": 1.5515, + "step": 4899 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.6432188153266907, + "learning_rate": 0.000721762468768024, + "loss": 1.5726, + "step": 4900 + }, + { + "epoch": 0.5169831223628693, + "grad_norm": 0.6474243998527527, + "learning_rate": 0.0007215115887442478, + "loss": 1.5774, + "step": 4901 + }, + { + "epoch": 0.5170886075949367, + "grad_norm": 0.6334196925163269, + "learning_rate": 0.0007212607119127402, + "loss": 1.5245, + "step": 4902 + }, + { + "epoch": 0.5171940928270042, + "grad_norm": 0.744361400604248, + "learning_rate": 0.000721009838301613, + "loss": 1.5431, + "step": 4903 + }, + { + "epoch": 0.5172995780590718, + "grad_norm": 0.5706969499588013, + "learning_rate": 0.000720758967938978, + "loss": 1.564, + "step": 4904 + }, + { + "epoch": 0.5174050632911392, + "grad_norm": 0.5773879289627075, + "learning_rate": 0.0007205081008529463, + "loss": 1.5749, + "step": 4905 + }, + { + "epoch": 0.5175105485232068, + "grad_norm": 0.6922861933708191, + "learning_rate": 0.0007202572370716292, + "loss": 1.5503, + "step": 4906 + }, + { + "epoch": 0.5176160337552742, + "grad_norm": 0.5911425948143005, + "learning_rate": 0.000720006376623137, + "loss": 1.4915, + "step": 4907 + }, + { + "epoch": 0.5177215189873418, + "grad_norm": 0.791577160358429, + "learning_rate": 0.0007197555195355799, + "loss": 1.5876, + "step": 4908 + }, + { + "epoch": 0.5178270042194093, + "grad_norm": 0.7142723798751831, + "learning_rate": 0.0007195046658370675, + "loss": 1.547, + "step": 4909 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6603586673736572, + "learning_rate": 0.0007192538155557094, + "loss": 1.5937, + "step": 4910 + }, + { + "epoch": 0.5180379746835443, + "grad_norm": 0.871300995349884, + "learning_rate": 0.0007190029687196148, + "loss": 1.5549, + "step": 4911 + }, + { + "epoch": 0.5181434599156118, + "grad_norm": 0.5672197341918945, + "learning_rate": 0.0007187521253568919, + "loss": 1.5254, + "step": 4912 + }, + { + "epoch": 0.5182489451476793, + "grad_norm": 0.7588074207305908, + "learning_rate": 0.0007185012854956491, + "loss": 1.5261, + "step": 4913 + }, + { + "epoch": 0.5183544303797468, + "grad_norm": 0.7040348052978516, + "learning_rate": 0.0007182504491639942, + "loss": 1.5186, + "step": 4914 + }, + { + "epoch": 0.5184599156118144, + "grad_norm": 0.6017284393310547, + "learning_rate": 0.000717999616390035, + "loss": 1.5578, + "step": 4915 + }, + { + "epoch": 0.5185654008438818, + "grad_norm": 0.6483529210090637, + "learning_rate": 0.0007177487872018784, + "loss": 1.5671, + "step": 4916 + }, + { + "epoch": 0.5186708860759494, + "grad_norm": 0.6077374219894409, + "learning_rate": 0.000717497961627631, + "loss": 1.5651, + "step": 4917 + }, + { + "epoch": 0.5187763713080169, + "grad_norm": 0.5781785845756531, + "learning_rate": 0.0007172471396953991, + "loss": 1.5289, + "step": 4918 + }, + { + "epoch": 0.5188818565400843, + "grad_norm": 0.6159402132034302, + "learning_rate": 0.0007169963214332885, + "loss": 1.5393, + "step": 4919 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.693645179271698, + "learning_rate": 0.0007167455068694046, + "loss": 1.5765, + "step": 4920 + }, + { + "epoch": 0.5190928270042194, + "grad_norm": 0.5824750065803528, + "learning_rate": 0.0007164946960318525, + "loss": 1.5716, + "step": 4921 + }, + { + "epoch": 0.5191983122362869, + "grad_norm": 0.6120212078094482, + "learning_rate": 0.0007162438889487365, + "loss": 1.591, + "step": 4922 + }, + { + "epoch": 0.5193037974683544, + "grad_norm": 0.5752853751182556, + "learning_rate": 0.0007159930856481614, + "loss": 1.5652, + "step": 4923 + }, + { + "epoch": 0.519409282700422, + "grad_norm": 0.6284745931625366, + "learning_rate": 0.0007157422861582306, + "loss": 1.5074, + "step": 4924 + }, + { + "epoch": 0.5195147679324894, + "grad_norm": 0.5872123837471008, + "learning_rate": 0.0007154914905070475, + "loss": 1.5462, + "step": 4925 + }, + { + "epoch": 0.519620253164557, + "grad_norm": 0.6032551527023315, + "learning_rate": 0.0007152406987227149, + "loss": 1.5631, + "step": 4926 + }, + { + "epoch": 0.5197257383966245, + "grad_norm": 0.548460066318512, + "learning_rate": 0.0007149899108333354, + "loss": 1.5349, + "step": 4927 + }, + { + "epoch": 0.5198312236286919, + "grad_norm": 0.603483259677887, + "learning_rate": 0.0007147391268670109, + "loss": 1.5656, + "step": 4928 + }, + { + "epoch": 0.5199367088607595, + "grad_norm": 0.6236487030982971, + "learning_rate": 0.000714488346851843, + "loss": 1.5597, + "step": 4929 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.5961970090866089, + "learning_rate": 0.000714237570815933, + "loss": 1.5452, + "step": 4930 + }, + { + "epoch": 0.5201476793248945, + "grad_norm": 0.6148812770843506, + "learning_rate": 0.0007139867987873812, + "loss": 1.554, + "step": 4931 + }, + { + "epoch": 0.520253164556962, + "grad_norm": 0.5592229962348938, + "learning_rate": 0.0007137360307942885, + "loss": 1.5708, + "step": 4932 + }, + { + "epoch": 0.5203586497890296, + "grad_norm": 0.618757426738739, + "learning_rate": 0.0007134852668647543, + "loss": 1.5843, + "step": 4933 + }, + { + "epoch": 0.520464135021097, + "grad_norm": 0.6104587912559509, + "learning_rate": 0.0007132345070268781, + "loss": 1.5492, + "step": 4934 + }, + { + "epoch": 0.5205696202531646, + "grad_norm": 0.5918736457824707, + "learning_rate": 0.0007129837513087587, + "loss": 1.4977, + "step": 4935 + }, + { + "epoch": 0.5206751054852321, + "grad_norm": 0.6317407488822937, + "learning_rate": 0.0007127329997384946, + "loss": 1.5521, + "step": 4936 + }, + { + "epoch": 0.5207805907172995, + "grad_norm": 0.616216242313385, + "learning_rate": 0.0007124822523441837, + "loss": 1.5488, + "step": 4937 + }, + { + "epoch": 0.5208860759493671, + "grad_norm": 0.6768845915794373, + "learning_rate": 0.0007122315091539234, + "loss": 1.5515, + "step": 4938 + }, + { + "epoch": 0.5209915611814346, + "grad_norm": 0.5812139511108398, + "learning_rate": 0.000711980770195811, + "loss": 1.5725, + "step": 4939 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.6186695694923401, + "learning_rate": 0.0007117300354979423, + "loss": 1.5383, + "step": 4940 + }, + { + "epoch": 0.5212025316455696, + "grad_norm": 0.5899620652198792, + "learning_rate": 0.0007114793050884145, + "loss": 1.5945, + "step": 4941 + }, + { + "epoch": 0.5213080168776372, + "grad_norm": 0.5971283316612244, + "learning_rate": 0.0007112285789953226, + "loss": 1.566, + "step": 4942 + }, + { + "epoch": 0.5214135021097046, + "grad_norm": 0.6291025876998901, + "learning_rate": 0.0007109778572467616, + "loss": 1.54, + "step": 4943 + }, + { + "epoch": 0.5215189873417722, + "grad_norm": 0.671026349067688, + "learning_rate": 0.0007107271398708266, + "loss": 1.5647, + "step": 4944 + }, + { + "epoch": 0.5216244725738397, + "grad_norm": 0.5879822969436646, + "learning_rate": 0.0007104764268956111, + "loss": 1.5464, + "step": 4945 + }, + { + "epoch": 0.5217299578059071, + "grad_norm": 0.6031662225723267, + "learning_rate": 0.0007102257183492092, + "loss": 1.5527, + "step": 4946 + }, + { + "epoch": 0.5218354430379747, + "grad_norm": 0.6159895658493042, + "learning_rate": 0.0007099750142597138, + "loss": 1.5644, + "step": 4947 + }, + { + "epoch": 0.5219409282700422, + "grad_norm": 0.6785364151000977, + "learning_rate": 0.0007097243146552175, + "loss": 1.5591, + "step": 4948 + }, + { + "epoch": 0.5220464135021097, + "grad_norm": 0.6748483180999756, + "learning_rate": 0.0007094736195638128, + "loss": 1.5595, + "step": 4949 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.8197882771492004, + "learning_rate": 0.000709222929013591, + "loss": 1.5772, + "step": 4950 + }, + { + "epoch": 0.5222573839662448, + "grad_norm": 0.6267301440238953, + "learning_rate": 0.0007089722430326434, + "loss": 1.5658, + "step": 4951 + }, + { + "epoch": 0.5223628691983122, + "grad_norm": 0.6539154052734375, + "learning_rate": 0.0007087215616490606, + "loss": 1.5434, + "step": 4952 + }, + { + "epoch": 0.5224683544303798, + "grad_norm": 0.5921552181243896, + "learning_rate": 0.0007084708848909326, + "loss": 1.4937, + "step": 4953 + }, + { + "epoch": 0.5225738396624473, + "grad_norm": 0.7230895757675171, + "learning_rate": 0.000708220212786349, + "loss": 1.5484, + "step": 4954 + }, + { + "epoch": 0.5226793248945147, + "grad_norm": 0.6433364748954773, + "learning_rate": 0.000707969545363399, + "loss": 1.5625, + "step": 4955 + }, + { + "epoch": 0.5227848101265823, + "grad_norm": 0.8005784749984741, + "learning_rate": 0.000707718882650171, + "loss": 1.5427, + "step": 4956 + }, + { + "epoch": 0.5228902953586498, + "grad_norm": 0.9216604232788086, + "learning_rate": 0.0007074682246747526, + "loss": 1.5749, + "step": 4957 + }, + { + "epoch": 0.5229957805907173, + "grad_norm": 0.564896821975708, + "learning_rate": 0.0007072175714652321, + "loss": 1.5444, + "step": 4958 + }, + { + "epoch": 0.5231012658227848, + "grad_norm": 0.8120326995849609, + "learning_rate": 0.0007069669230496961, + "loss": 1.5109, + "step": 4959 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.5936939120292664, + "learning_rate": 0.0007067162794562309, + "loss": 1.5395, + "step": 4960 + }, + { + "epoch": 0.5233122362869198, + "grad_norm": 0.6408475637435913, + "learning_rate": 0.0007064656407129224, + "loss": 1.5338, + "step": 4961 + }, + { + "epoch": 0.5234177215189874, + "grad_norm": 0.607948899269104, + "learning_rate": 0.000706215006847856, + "loss": 1.5437, + "step": 4962 + }, + { + "epoch": 0.5235232067510549, + "grad_norm": 0.5640482306480408, + "learning_rate": 0.0007059643778891164, + "loss": 1.5646, + "step": 4963 + }, + { + "epoch": 0.5236286919831223, + "grad_norm": 0.6761637926101685, + "learning_rate": 0.0007057137538647878, + "loss": 1.5498, + "step": 4964 + }, + { + "epoch": 0.5237341772151899, + "grad_norm": 0.5818403363227844, + "learning_rate": 0.0007054631348029539, + "loss": 1.6185, + "step": 4965 + }, + { + "epoch": 0.5238396624472574, + "grad_norm": 0.717028021812439, + "learning_rate": 0.0007052125207316975, + "loss": 1.5384, + "step": 4966 + }, + { + "epoch": 0.5239451476793249, + "grad_norm": 0.5773304104804993, + "learning_rate": 0.0007049619116791019, + "loss": 1.5169, + "step": 4967 + }, + { + "epoch": 0.5240506329113924, + "grad_norm": 0.6611260175704956, + "learning_rate": 0.0007047113076732485, + "loss": 1.4955, + "step": 4968 + }, + { + "epoch": 0.52415611814346, + "grad_norm": 0.6080674529075623, + "learning_rate": 0.0007044607087422191, + "loss": 1.5546, + "step": 4969 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.7462517023086548, + "learning_rate": 0.0007042101149140943, + "loss": 1.5868, + "step": 4970 + }, + { + "epoch": 0.524367088607595, + "grad_norm": 0.7485067248344421, + "learning_rate": 0.0007039595262169544, + "loss": 1.6125, + "step": 4971 + }, + { + "epoch": 0.5244725738396624, + "grad_norm": 0.6601601839065552, + "learning_rate": 0.0007037089426788792, + "loss": 1.4955, + "step": 4972 + }, + { + "epoch": 0.5245780590717299, + "grad_norm": 0.7149142622947693, + "learning_rate": 0.0007034583643279479, + "loss": 1.5416, + "step": 4973 + }, + { + "epoch": 0.5246835443037975, + "grad_norm": 0.586341917514801, + "learning_rate": 0.0007032077911922384, + "loss": 1.5356, + "step": 4974 + }, + { + "epoch": 0.5247890295358649, + "grad_norm": 0.6701570153236389, + "learning_rate": 0.0007029572232998298, + "loss": 1.5898, + "step": 4975 + }, + { + "epoch": 0.5248945147679325, + "grad_norm": 0.6418752670288086, + "learning_rate": 0.0007027066606787988, + "loss": 1.5442, + "step": 4976 + }, + { + "epoch": 0.525, + "grad_norm": 0.6832173466682434, + "learning_rate": 0.0007024561033572223, + "loss": 1.5543, + "step": 4977 + }, + { + "epoch": 0.5251054852320675, + "grad_norm": 0.5895013213157654, + "learning_rate": 0.0007022055513631764, + "loss": 1.5513, + "step": 4978 + }, + { + "epoch": 0.525210970464135, + "grad_norm": 0.6732603907585144, + "learning_rate": 0.000701955004724737, + "loss": 1.5041, + "step": 4979 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.6733709573745728, + "learning_rate": 0.0007017044634699787, + "loss": 1.5215, + "step": 4980 + }, + { + "epoch": 0.52542194092827, + "grad_norm": 0.6504632234573364, + "learning_rate": 0.0007014539276269762, + "loss": 1.5692, + "step": 4981 + }, + { + "epoch": 0.5255274261603375, + "grad_norm": 0.6637694239616394, + "learning_rate": 0.0007012033972238031, + "loss": 1.5358, + "step": 4982 + }, + { + "epoch": 0.5256329113924051, + "grad_norm": 0.8776251673698425, + "learning_rate": 0.0007009528722885323, + "loss": 1.5392, + "step": 4983 + }, + { + "epoch": 0.5257383966244725, + "grad_norm": 0.6391558647155762, + "learning_rate": 0.0007007023528492372, + "loss": 1.5333, + "step": 4984 + }, + { + "epoch": 0.5258438818565401, + "grad_norm": 0.7685898542404175, + "learning_rate": 0.0007004518389339893, + "loss": 1.5424, + "step": 4985 + }, + { + "epoch": 0.5259493670886076, + "grad_norm": 0.8220505714416504, + "learning_rate": 0.0007002013305708598, + "loss": 1.5314, + "step": 4986 + }, + { + "epoch": 0.5260548523206751, + "grad_norm": 0.6323449611663818, + "learning_rate": 0.0006999508277879196, + "loss": 1.5108, + "step": 4987 + }, + { + "epoch": 0.5261603375527426, + "grad_norm": 0.833024799823761, + "learning_rate": 0.0006997003306132386, + "loss": 1.5596, + "step": 4988 + }, + { + "epoch": 0.5262658227848102, + "grad_norm": 0.5773718357086182, + "learning_rate": 0.0006994498390748865, + "loss": 1.5851, + "step": 4989 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.7754939198493958, + "learning_rate": 0.0006991993532009319, + "loss": 1.5484, + "step": 4990 + }, + { + "epoch": 0.5264767932489451, + "grad_norm": 0.6200796961784363, + "learning_rate": 0.0006989488730194432, + "loss": 1.5454, + "step": 4991 + }, + { + "epoch": 0.5265822784810127, + "grad_norm": 0.6372487545013428, + "learning_rate": 0.0006986983985584874, + "loss": 1.5448, + "step": 4992 + }, + { + "epoch": 0.5266877637130801, + "grad_norm": 0.6388144493103027, + "learning_rate": 0.0006984479298461323, + "loss": 1.5523, + "step": 4993 + }, + { + "epoch": 0.5267932489451477, + "grad_norm": 0.5361804962158203, + "learning_rate": 0.0006981974669104436, + "loss": 1.5891, + "step": 4994 + }, + { + "epoch": 0.5268987341772152, + "grad_norm": 0.6108608841896057, + "learning_rate": 0.0006979470097794871, + "loss": 1.568, + "step": 4995 + }, + { + "epoch": 0.5270042194092827, + "grad_norm": 0.5963684320449829, + "learning_rate": 0.0006976965584813277, + "loss": 1.5475, + "step": 4996 + }, + { + "epoch": 0.5271097046413502, + "grad_norm": 0.5337874293327332, + "learning_rate": 0.0006974461130440298, + "loss": 1.5334, + "step": 4997 + }, + { + "epoch": 0.5272151898734178, + "grad_norm": 0.6063400506973267, + "learning_rate": 0.0006971956734956569, + "loss": 1.5734, + "step": 4998 + }, + { + "epoch": 0.5273206751054852, + "grad_norm": 0.5760331153869629, + "learning_rate": 0.0006969452398642721, + "loss": 1.5847, + "step": 4999 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.5653030872344971, + "learning_rate": 0.0006966948121779378, + "loss": 1.5407, + "step": 5000 + }, + { + "epoch": 0.5275316455696203, + "grad_norm": 0.6128718256950378, + "learning_rate": 0.0006964443904647152, + "loss": 1.5817, + "step": 5001 + }, + { + "epoch": 0.5276371308016877, + "grad_norm": 0.5909519791603088, + "learning_rate": 0.0006961939747526661, + "loss": 1.582, + "step": 5002 + }, + { + "epoch": 0.5277426160337553, + "grad_norm": 0.5827531814575195, + "learning_rate": 0.0006959435650698504, + "loss": 1.5511, + "step": 5003 + }, + { + "epoch": 0.5278481012658228, + "grad_norm": 0.5829088687896729, + "learning_rate": 0.0006956931614443278, + "loss": 1.5393, + "step": 5004 + }, + { + "epoch": 0.5279535864978903, + "grad_norm": 0.6210057735443115, + "learning_rate": 0.0006954427639041572, + "loss": 1.5368, + "step": 5005 + }, + { + "epoch": 0.5280590717299578, + "grad_norm": 0.6426866054534912, + "learning_rate": 0.000695192372477397, + "loss": 1.5039, + "step": 5006 + }, + { + "epoch": 0.5281645569620254, + "grad_norm": 0.5552183985710144, + "learning_rate": 0.0006949419871921047, + "loss": 1.5358, + "step": 5007 + }, + { + "epoch": 0.5282700421940928, + "grad_norm": 0.656497597694397, + "learning_rate": 0.0006946916080763373, + "loss": 1.5646, + "step": 5008 + }, + { + "epoch": 0.5283755274261603, + "grad_norm": 0.6156951189041138, + "learning_rate": 0.0006944412351581506, + "loss": 1.5459, + "step": 5009 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.6243900060653687, + "learning_rate": 0.000694190868465601, + "loss": 1.5506, + "step": 5010 + }, + { + "epoch": 0.5285864978902953, + "grad_norm": 0.6314789056777954, + "learning_rate": 0.0006939405080267428, + "loss": 1.5239, + "step": 5011 + }, + { + "epoch": 0.5286919831223629, + "grad_norm": 0.6525082588195801, + "learning_rate": 0.0006936901538696303, + "loss": 1.5743, + "step": 5012 + }, + { + "epoch": 0.5287974683544304, + "grad_norm": 0.5660065412521362, + "learning_rate": 0.0006934398060223168, + "loss": 1.5458, + "step": 5013 + }, + { + "epoch": 0.5289029535864979, + "grad_norm": 0.7159137725830078, + "learning_rate": 0.0006931894645128551, + "loss": 1.5363, + "step": 5014 + }, + { + "epoch": 0.5290084388185654, + "grad_norm": 0.607568085193634, + "learning_rate": 0.0006929391293692972, + "loss": 1.4956, + "step": 5015 + }, + { + "epoch": 0.529113924050633, + "grad_norm": 0.6190477013587952, + "learning_rate": 0.0006926888006196944, + "loss": 1.5758, + "step": 5016 + }, + { + "epoch": 0.5292194092827004, + "grad_norm": 0.6730437874794006, + "learning_rate": 0.0006924384782920971, + "loss": 1.5694, + "step": 5017 + }, + { + "epoch": 0.5293248945147679, + "grad_norm": 0.6447691321372986, + "learning_rate": 0.0006921881624145554, + "loss": 1.5423, + "step": 5018 + }, + { + "epoch": 0.5294303797468355, + "grad_norm": 0.704119861125946, + "learning_rate": 0.0006919378530151182, + "loss": 1.5318, + "step": 5019 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.6235859394073486, + "learning_rate": 0.0006916875501218343, + "loss": 1.5601, + "step": 5020 + }, + { + "epoch": 0.5296413502109705, + "grad_norm": 0.6744921207427979, + "learning_rate": 0.0006914372537627512, + "loss": 1.5348, + "step": 5021 + }, + { + "epoch": 0.529746835443038, + "grad_norm": 0.6863308548927307, + "learning_rate": 0.0006911869639659159, + "loss": 1.5375, + "step": 5022 + }, + { + "epoch": 0.5298523206751055, + "grad_norm": 0.7328982353210449, + "learning_rate": 0.0006909366807593744, + "loss": 1.5373, + "step": 5023 + }, + { + "epoch": 0.529957805907173, + "grad_norm": 0.6428442001342773, + "learning_rate": 0.0006906864041711725, + "loss": 1.5335, + "step": 5024 + }, + { + "epoch": 0.5300632911392406, + "grad_norm": 0.689058244228363, + "learning_rate": 0.0006904361342293546, + "loss": 1.5188, + "step": 5025 + }, + { + "epoch": 0.530168776371308, + "grad_norm": 0.6486260890960693, + "learning_rate": 0.000690185870961965, + "loss": 1.5271, + "step": 5026 + }, + { + "epoch": 0.5302742616033755, + "grad_norm": 0.7649176120758057, + "learning_rate": 0.0006899356143970467, + "loss": 1.5644, + "step": 5027 + }, + { + "epoch": 0.5303797468354431, + "grad_norm": 0.6723948121070862, + "learning_rate": 0.0006896853645626424, + "loss": 1.5267, + "step": 5028 + }, + { + "epoch": 0.5304852320675105, + "grad_norm": 0.696881115436554, + "learning_rate": 0.0006894351214867937, + "loss": 1.5128, + "step": 5029 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.7503393888473511, + "learning_rate": 0.0006891848851975416, + "loss": 1.5186, + "step": 5030 + }, + { + "epoch": 0.5306962025316456, + "grad_norm": 0.5883582234382629, + "learning_rate": 0.0006889346557229265, + "loss": 1.5501, + "step": 5031 + }, + { + "epoch": 0.5308016877637131, + "grad_norm": 0.6047359704971313, + "learning_rate": 0.0006886844330909877, + "loss": 1.5713, + "step": 5032 + }, + { + "epoch": 0.5309071729957806, + "grad_norm": 0.6243405938148499, + "learning_rate": 0.0006884342173297639, + "loss": 1.5614, + "step": 5033 + }, + { + "epoch": 0.5310126582278482, + "grad_norm": 0.6932018995285034, + "learning_rate": 0.000688184008467293, + "loss": 1.5539, + "step": 5034 + }, + { + "epoch": 0.5311181434599156, + "grad_norm": 0.6008765697479248, + "learning_rate": 0.0006879338065316122, + "loss": 1.5623, + "step": 5035 + }, + { + "epoch": 0.5312236286919831, + "grad_norm": 0.5854286551475525, + "learning_rate": 0.0006876836115507579, + "loss": 1.5579, + "step": 5036 + }, + { + "epoch": 0.5313291139240506, + "grad_norm": 0.7277839779853821, + "learning_rate": 0.0006874334235527657, + "loss": 1.5277, + "step": 5037 + }, + { + "epoch": 0.5314345991561181, + "grad_norm": 0.7247647643089294, + "learning_rate": 0.0006871832425656702, + "loss": 1.5962, + "step": 5038 + }, + { + "epoch": 0.5315400843881857, + "grad_norm": 0.6318128108978271, + "learning_rate": 0.0006869330686175058, + "loss": 1.5563, + "step": 5039 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5914750695228577, + "learning_rate": 0.0006866829017363054, + "loss": 1.5247, + "step": 5040 + }, + { + "epoch": 0.5317510548523207, + "grad_norm": 0.6236031651496887, + "learning_rate": 0.0006864327419501017, + "loss": 1.5588, + "step": 5041 + }, + { + "epoch": 0.5318565400843882, + "grad_norm": 0.5834966897964478, + "learning_rate": 0.0006861825892869262, + "loss": 1.5525, + "step": 5042 + }, + { + "epoch": 0.5319620253164556, + "grad_norm": 0.6570964455604553, + "learning_rate": 0.0006859324437748099, + "loss": 1.5145, + "step": 5043 + }, + { + "epoch": 0.5320675105485232, + "grad_norm": 0.654842734336853, + "learning_rate": 0.0006856823054417825, + "loss": 1.5546, + "step": 5044 + }, + { + "epoch": 0.5321729957805907, + "grad_norm": 0.6194042563438416, + "learning_rate": 0.0006854321743158737, + "loss": 1.5259, + "step": 5045 + }, + { + "epoch": 0.5322784810126582, + "grad_norm": 0.718423068523407, + "learning_rate": 0.0006851820504251117, + "loss": 1.5915, + "step": 5046 + }, + { + "epoch": 0.5323839662447257, + "grad_norm": 0.573664128780365, + "learning_rate": 0.0006849319337975242, + "loss": 1.5344, + "step": 5047 + }, + { + "epoch": 0.5324894514767933, + "grad_norm": 0.6066869497299194, + "learning_rate": 0.0006846818244611376, + "loss": 1.4996, + "step": 5048 + }, + { + "epoch": 0.5325949367088607, + "grad_norm": 0.6531670689582825, + "learning_rate": 0.0006844317224439788, + "loss": 1.5638, + "step": 5049 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.615336000919342, + "learning_rate": 0.0006841816277740722, + "loss": 1.5367, + "step": 5050 + }, + { + "epoch": 0.5328059071729958, + "grad_norm": 0.6522095799446106, + "learning_rate": 0.0006839315404794424, + "loss": 1.5649, + "step": 5051 + }, + { + "epoch": 0.5329113924050632, + "grad_norm": 0.6208915114402771, + "learning_rate": 0.0006836814605881131, + "loss": 1.5486, + "step": 5052 + }, + { + "epoch": 0.5330168776371308, + "grad_norm": 0.6514835953712463, + "learning_rate": 0.0006834313881281066, + "loss": 1.5652, + "step": 5053 + }, + { + "epoch": 0.5331223628691983, + "grad_norm": 0.6160233020782471, + "learning_rate": 0.0006831813231274451, + "loss": 1.5612, + "step": 5054 + }, + { + "epoch": 0.5332278481012658, + "grad_norm": 0.6033234000205994, + "learning_rate": 0.0006829312656141496, + "loss": 1.5504, + "step": 5055 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.5748640298843384, + "learning_rate": 0.0006826812156162401, + "loss": 1.5508, + "step": 5056 + }, + { + "epoch": 0.5334388185654009, + "grad_norm": 0.6252884268760681, + "learning_rate": 0.0006824311731617363, + "loss": 1.5742, + "step": 5057 + }, + { + "epoch": 0.5335443037974683, + "grad_norm": 0.6038716435432434, + "learning_rate": 0.0006821811382786561, + "loss": 1.5607, + "step": 5058 + }, + { + "epoch": 0.5336497890295359, + "grad_norm": 0.6667943000793457, + "learning_rate": 0.0006819311109950177, + "loss": 1.5331, + "step": 5059 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.6174015402793884, + "learning_rate": 0.0006816810913388379, + "loss": 1.5385, + "step": 5060 + }, + { + "epoch": 0.5338607594936708, + "grad_norm": 0.6909382939338684, + "learning_rate": 0.0006814310793381322, + "loss": 1.576, + "step": 5061 + }, + { + "epoch": 0.5339662447257384, + "grad_norm": 0.6562239527702332, + "learning_rate": 0.0006811810750209161, + "loss": 1.5203, + "step": 5062 + }, + { + "epoch": 0.5340717299578059, + "grad_norm": 0.7554178237915039, + "learning_rate": 0.0006809310784152039, + "loss": 1.5305, + "step": 5063 + }, + { + "epoch": 0.5341772151898734, + "grad_norm": 0.6757071614265442, + "learning_rate": 0.0006806810895490087, + "loss": 1.5938, + "step": 5064 + }, + { + "epoch": 0.5342827004219409, + "grad_norm": 0.6065846681594849, + "learning_rate": 0.000680431108450343, + "loss": 1.515, + "step": 5065 + }, + { + "epoch": 0.5343881856540085, + "grad_norm": 0.6345084309577942, + "learning_rate": 0.0006801811351472185, + "loss": 1.5415, + "step": 5066 + }, + { + "epoch": 0.5344936708860759, + "grad_norm": 0.6354236602783203, + "learning_rate": 0.000679931169667646, + "loss": 1.5296, + "step": 5067 + }, + { + "epoch": 0.5345991561181435, + "grad_norm": 0.6636695265769958, + "learning_rate": 0.0006796812120396351, + "loss": 1.5712, + "step": 5068 + }, + { + "epoch": 0.534704641350211, + "grad_norm": 0.5796474814414978, + "learning_rate": 0.0006794312622911953, + "loss": 1.5198, + "step": 5069 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.7703236937522888, + "learning_rate": 0.0006791813204503342, + "loss": 1.5029, + "step": 5070 + }, + { + "epoch": 0.534915611814346, + "grad_norm": 0.7085558176040649, + "learning_rate": 0.0006789313865450594, + "loss": 1.5494, + "step": 5071 + }, + { + "epoch": 0.5350210970464135, + "grad_norm": 0.6509859561920166, + "learning_rate": 0.0006786814606033773, + "loss": 1.5606, + "step": 5072 + }, + { + "epoch": 0.535126582278481, + "grad_norm": 0.6279146671295166, + "learning_rate": 0.0006784315426532929, + "loss": 1.5256, + "step": 5073 + }, + { + "epoch": 0.5352320675105485, + "grad_norm": 0.7252156734466553, + "learning_rate": 0.0006781816327228112, + "loss": 1.582, + "step": 5074 + }, + { + "epoch": 0.5353375527426161, + "grad_norm": 0.5718883275985718, + "learning_rate": 0.0006779317308399357, + "loss": 1.5706, + "step": 5075 + }, + { + "epoch": 0.5354430379746835, + "grad_norm": 0.7569712400436401, + "learning_rate": 0.000677681837032669, + "loss": 1.5614, + "step": 5076 + }, + { + "epoch": 0.5355485232067511, + "grad_norm": 0.6323140263557434, + "learning_rate": 0.0006774319513290132, + "loss": 1.5524, + "step": 5077 + }, + { + "epoch": 0.5356540084388186, + "grad_norm": 0.8571042418479919, + "learning_rate": 0.0006771820737569689, + "loss": 1.5686, + "step": 5078 + }, + { + "epoch": 0.535759493670886, + "grad_norm": 0.6866335272789001, + "learning_rate": 0.0006769322043445363, + "loss": 1.5288, + "step": 5079 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.6581236720085144, + "learning_rate": 0.0006766823431197147, + "loss": 1.5575, + "step": 5080 + }, + { + "epoch": 0.5359704641350211, + "grad_norm": 0.615921139717102, + "learning_rate": 0.0006764324901105022, + "loss": 1.53, + "step": 5081 + }, + { + "epoch": 0.5360759493670886, + "grad_norm": 0.7045366764068604, + "learning_rate": 0.000676182645344896, + "loss": 1.5604, + "step": 5082 + }, + { + "epoch": 0.5361814345991561, + "grad_norm": 0.5635098218917847, + "learning_rate": 0.0006759328088508925, + "loss": 1.5245, + "step": 5083 + }, + { + "epoch": 0.5362869198312237, + "grad_norm": 0.6543015837669373, + "learning_rate": 0.0006756829806564872, + "loss": 1.5652, + "step": 5084 + }, + { + "epoch": 0.5363924050632911, + "grad_norm": 0.6101940870285034, + "learning_rate": 0.0006754331607896742, + "loss": 1.5672, + "step": 5085 + }, + { + "epoch": 0.5364978902953587, + "grad_norm": 0.6587273478507996, + "learning_rate": 0.0006751833492784476, + "loss": 1.5033, + "step": 5086 + }, + { + "epoch": 0.5366033755274262, + "grad_norm": 0.576350212097168, + "learning_rate": 0.0006749335461507995, + "loss": 1.505, + "step": 5087 + }, + { + "epoch": 0.5367088607594936, + "grad_norm": 0.6621896028518677, + "learning_rate": 0.000674683751434722, + "loss": 1.5823, + "step": 5088 + }, + { + "epoch": 0.5368143459915612, + "grad_norm": 0.6220313906669617, + "learning_rate": 0.0006744339651582059, + "loss": 1.5524, + "step": 5089 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.6149738430976868, + "learning_rate": 0.0006741841873492406, + "loss": 1.5877, + "step": 5090 + }, + { + "epoch": 0.5370253164556962, + "grad_norm": 0.6695860028266907, + "learning_rate": 0.0006739344180358153, + "loss": 1.5739, + "step": 5091 + }, + { + "epoch": 0.5371308016877637, + "grad_norm": 0.636716365814209, + "learning_rate": 0.0006736846572459178, + "loss": 1.4766, + "step": 5092 + }, + { + "epoch": 0.5372362869198313, + "grad_norm": 0.586111307144165, + "learning_rate": 0.0006734349050075348, + "loss": 1.526, + "step": 5093 + }, + { + "epoch": 0.5373417721518987, + "grad_norm": 0.7472401261329651, + "learning_rate": 0.0006731851613486526, + "loss": 1.5196, + "step": 5094 + }, + { + "epoch": 0.5374472573839663, + "grad_norm": 0.748647928237915, + "learning_rate": 0.0006729354262972561, + "loss": 1.5312, + "step": 5095 + }, + { + "epoch": 0.5375527426160338, + "grad_norm": 0.6907358169555664, + "learning_rate": 0.0006726856998813291, + "loss": 1.5319, + "step": 5096 + }, + { + "epoch": 0.5376582278481012, + "grad_norm": 0.8508015871047974, + "learning_rate": 0.0006724359821288552, + "loss": 1.5512, + "step": 5097 + }, + { + "epoch": 0.5377637130801688, + "grad_norm": 0.6751585602760315, + "learning_rate": 0.0006721862730678164, + "loss": 1.5489, + "step": 5098 + }, + { + "epoch": 0.5378691983122363, + "grad_norm": 0.7733919620513916, + "learning_rate": 0.0006719365727261935, + "loss": 1.555, + "step": 5099 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.680768609046936, + "learning_rate": 0.0006716868811319671, + "loss": 1.498, + "step": 5100 + }, + { + "epoch": 0.5380801687763713, + "grad_norm": 0.6702001690864563, + "learning_rate": 0.000671437198313116, + "loss": 1.5534, + "step": 5101 + }, + { + "epoch": 0.5381856540084389, + "grad_norm": 0.6387300491333008, + "learning_rate": 0.0006711875242976187, + "loss": 1.5637, + "step": 5102 + }, + { + "epoch": 0.5382911392405063, + "grad_norm": 0.8554492592811584, + "learning_rate": 0.0006709378591134523, + "loss": 1.5417, + "step": 5103 + }, + { + "epoch": 0.5383966244725739, + "grad_norm": 0.732205867767334, + "learning_rate": 0.0006706882027885929, + "loss": 1.5167, + "step": 5104 + }, + { + "epoch": 0.5385021097046413, + "grad_norm": 0.6895302534103394, + "learning_rate": 0.0006704385553510156, + "loss": 1.5109, + "step": 5105 + }, + { + "epoch": 0.5386075949367088, + "grad_norm": 0.6401652693748474, + "learning_rate": 0.0006701889168286953, + "loss": 1.5463, + "step": 5106 + }, + { + "epoch": 0.5387130801687764, + "grad_norm": 0.6972709894180298, + "learning_rate": 0.0006699392872496048, + "loss": 1.5814, + "step": 5107 + }, + { + "epoch": 0.5388185654008438, + "grad_norm": 0.6034563183784485, + "learning_rate": 0.0006696896666417163, + "loss": 1.5505, + "step": 5108 + }, + { + "epoch": 0.5389240506329114, + "grad_norm": 0.7365453839302063, + "learning_rate": 0.0006694400550330013, + "loss": 1.5663, + "step": 5109 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.7134302258491516, + "learning_rate": 0.0006691904524514297, + "loss": 1.5044, + "step": 5110 + }, + { + "epoch": 0.5391350210970464, + "grad_norm": 0.7322347164154053, + "learning_rate": 0.0006689408589249709, + "loss": 1.5585, + "step": 5111 + }, + { + "epoch": 0.5392405063291139, + "grad_norm": 0.7600921988487244, + "learning_rate": 0.000668691274481593, + "loss": 1.5385, + "step": 5112 + }, + { + "epoch": 0.5393459915611815, + "grad_norm": 0.6917355060577393, + "learning_rate": 0.0006684416991492629, + "loss": 1.5775, + "step": 5113 + }, + { + "epoch": 0.5394514767932489, + "grad_norm": 0.8310413956642151, + "learning_rate": 0.0006681921329559475, + "loss": 1.5356, + "step": 5114 + }, + { + "epoch": 0.5395569620253164, + "grad_norm": 0.5494151711463928, + "learning_rate": 0.0006679425759296114, + "loss": 1.5195, + "step": 5115 + }, + { + "epoch": 0.539662447257384, + "grad_norm": 0.8526685833930969, + "learning_rate": 0.000667693028098219, + "loss": 1.5428, + "step": 5116 + }, + { + "epoch": 0.5397679324894514, + "grad_norm": 0.7283315658569336, + "learning_rate": 0.0006674434894897332, + "loss": 1.5426, + "step": 5117 + }, + { + "epoch": 0.539873417721519, + "grad_norm": 0.8085581660270691, + "learning_rate": 0.000667193960132116, + "loss": 1.5609, + "step": 5118 + }, + { + "epoch": 0.5399789029535865, + "grad_norm": 0.7927452325820923, + "learning_rate": 0.0006669444400533286, + "loss": 1.5371, + "step": 5119 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.6909306645393372, + "learning_rate": 0.0006666949292813306, + "loss": 1.5189, + "step": 5120 + }, + { + "epoch": 0.5401898734177215, + "grad_norm": 0.7444798350334167, + "learning_rate": 0.0006664454278440813, + "loss": 1.5397, + "step": 5121 + }, + { + "epoch": 0.5402953586497891, + "grad_norm": 0.5563899278640747, + "learning_rate": 0.0006661959357695382, + "loss": 1.5239, + "step": 5122 + }, + { + "epoch": 0.5404008438818565, + "grad_norm": 0.698501467704773, + "learning_rate": 0.0006659464530856587, + "loss": 1.5228, + "step": 5123 + }, + { + "epoch": 0.540506329113924, + "grad_norm": 0.6232441067695618, + "learning_rate": 0.0006656969798203982, + "loss": 1.5456, + "step": 5124 + }, + { + "epoch": 0.5406118143459916, + "grad_norm": 0.8346911668777466, + "learning_rate": 0.0006654475160017115, + "loss": 1.5497, + "step": 5125 + }, + { + "epoch": 0.540717299578059, + "grad_norm": 0.604189395904541, + "learning_rate": 0.0006651980616575522, + "loss": 1.5261, + "step": 5126 + }, + { + "epoch": 0.5408227848101266, + "grad_norm": 0.8206016421318054, + "learning_rate": 0.0006649486168158731, + "loss": 1.5415, + "step": 5127 + }, + { + "epoch": 0.5409282700421941, + "grad_norm": 0.5757191777229309, + "learning_rate": 0.0006646991815046254, + "loss": 1.5405, + "step": 5128 + }, + { + "epoch": 0.5410337552742616, + "grad_norm": 0.7303915023803711, + "learning_rate": 0.0006644497557517599, + "loss": 1.5537, + "step": 5129 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.6231369376182556, + "learning_rate": 0.0006642003395852258, + "loss": 1.5431, + "step": 5130 + }, + { + "epoch": 0.5412447257383967, + "grad_norm": 0.6257041692733765, + "learning_rate": 0.0006639509330329713, + "loss": 1.5749, + "step": 5131 + }, + { + "epoch": 0.5413502109704641, + "grad_norm": 0.6420578956604004, + "learning_rate": 0.0006637015361229438, + "loss": 1.5858, + "step": 5132 + }, + { + "epoch": 0.5414556962025316, + "grad_norm": 0.6416388154029846, + "learning_rate": 0.0006634521488830898, + "loss": 1.5357, + "step": 5133 + }, + { + "epoch": 0.5415611814345992, + "grad_norm": 0.7267821431159973, + "learning_rate": 0.0006632027713413541, + "loss": 1.5555, + "step": 5134 + }, + { + "epoch": 0.5416666666666666, + "grad_norm": 0.6278142929077148, + "learning_rate": 0.0006629534035256805, + "loss": 1.5377, + "step": 5135 + }, + { + "epoch": 0.5417721518987342, + "grad_norm": 0.6449595093727112, + "learning_rate": 0.0006627040454640123, + "loss": 1.5424, + "step": 5136 + }, + { + "epoch": 0.5418776371308017, + "grad_norm": 0.6557134389877319, + "learning_rate": 0.0006624546971842909, + "loss": 1.5587, + "step": 5137 + }, + { + "epoch": 0.5419831223628692, + "grad_norm": 0.6809266805648804, + "learning_rate": 0.0006622053587144572, + "loss": 1.5557, + "step": 5138 + }, + { + "epoch": 0.5420886075949367, + "grad_norm": 0.5951696038246155, + "learning_rate": 0.0006619560300824507, + "loss": 1.5523, + "step": 5139 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.6265801191329956, + "learning_rate": 0.0006617067113162103, + "loss": 1.5446, + "step": 5140 + }, + { + "epoch": 0.5422995780590717, + "grad_norm": 0.631786584854126, + "learning_rate": 0.0006614574024436732, + "loss": 1.5391, + "step": 5141 + }, + { + "epoch": 0.5424050632911392, + "grad_norm": 0.7451404929161072, + "learning_rate": 0.0006612081034927756, + "loss": 1.5741, + "step": 5142 + }, + { + "epoch": 0.5425105485232068, + "grad_norm": 0.6032252311706543, + "learning_rate": 0.0006609588144914528, + "loss": 1.5676, + "step": 5143 + }, + { + "epoch": 0.5426160337552742, + "grad_norm": 0.7280941605567932, + "learning_rate": 0.0006607095354676389, + "loss": 1.5322, + "step": 5144 + }, + { + "epoch": 0.5427215189873418, + "grad_norm": 0.5450572371482849, + "learning_rate": 0.0006604602664492667, + "loss": 1.5632, + "step": 5145 + }, + { + "epoch": 0.5428270042194093, + "grad_norm": 0.6144159436225891, + "learning_rate": 0.0006602110074642682, + "loss": 1.5398, + "step": 5146 + }, + { + "epoch": 0.5429324894514768, + "grad_norm": 0.5736171007156372, + "learning_rate": 0.000659961758540574, + "loss": 1.5789, + "step": 5147 + }, + { + "epoch": 0.5430379746835443, + "grad_norm": 0.5546463131904602, + "learning_rate": 0.0006597125197061133, + "loss": 1.5211, + "step": 5148 + }, + { + "epoch": 0.5431434599156119, + "grad_norm": 0.5707398653030396, + "learning_rate": 0.0006594632909888154, + "loss": 1.5496, + "step": 5149 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5907250046730042, + "learning_rate": 0.0006592140724166073, + "loss": 1.5714, + "step": 5150 + }, + { + "epoch": 0.5433544303797468, + "grad_norm": 0.6272673010826111, + "learning_rate": 0.000658964864017415, + "loss": 1.5198, + "step": 5151 + }, + { + "epoch": 0.5434599156118144, + "grad_norm": 0.5975584983825684, + "learning_rate": 0.0006587156658191635, + "loss": 1.5622, + "step": 5152 + }, + { + "epoch": 0.5435654008438818, + "grad_norm": 0.6529332995414734, + "learning_rate": 0.0006584664778497771, + "loss": 1.5394, + "step": 5153 + }, + { + "epoch": 0.5436708860759494, + "grad_norm": 0.6462889909744263, + "learning_rate": 0.0006582173001371781, + "loss": 1.5543, + "step": 5154 + }, + { + "epoch": 0.5437763713080169, + "grad_norm": 0.6373485326766968, + "learning_rate": 0.0006579681327092883, + "loss": 1.5695, + "step": 5155 + }, + { + "epoch": 0.5438818565400844, + "grad_norm": 0.6604536771774292, + "learning_rate": 0.0006577189755940282, + "loss": 1.5063, + "step": 5156 + }, + { + "epoch": 0.5439873417721519, + "grad_norm": 0.5947659611701965, + "learning_rate": 0.0006574698288193166, + "loss": 1.5491, + "step": 5157 + }, + { + "epoch": 0.5440928270042195, + "grad_norm": 0.6853976249694824, + "learning_rate": 0.0006572206924130725, + "loss": 1.5279, + "step": 5158 + }, + { + "epoch": 0.5441983122362869, + "grad_norm": 0.6565244793891907, + "learning_rate": 0.0006569715664032124, + "loss": 1.5631, + "step": 5159 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.6164253354072571, + "learning_rate": 0.0006567224508176523, + "loss": 1.544, + "step": 5160 + }, + { + "epoch": 0.544409282700422, + "grad_norm": 0.6177341341972351, + "learning_rate": 0.0006564733456843067, + "loss": 1.5838, + "step": 5161 + }, + { + "epoch": 0.5445147679324894, + "grad_norm": 0.5932947993278503, + "learning_rate": 0.000656224251031089, + "loss": 1.5752, + "step": 5162 + }, + { + "epoch": 0.544620253164557, + "grad_norm": 0.5733368992805481, + "learning_rate": 0.0006559751668859115, + "loss": 1.5119, + "step": 5163 + }, + { + "epoch": 0.5447257383966245, + "grad_norm": 0.602484405040741, + "learning_rate": 0.0006557260932766855, + "loss": 1.5428, + "step": 5164 + }, + { + "epoch": 0.544831223628692, + "grad_norm": 0.5824986696243286, + "learning_rate": 0.0006554770302313205, + "loss": 1.5588, + "step": 5165 + }, + { + "epoch": 0.5449367088607595, + "grad_norm": 0.7514373064041138, + "learning_rate": 0.0006552279777777258, + "loss": 1.5017, + "step": 5166 + }, + { + "epoch": 0.5450421940928271, + "grad_norm": 0.6370875835418701, + "learning_rate": 0.000654978935943809, + "loss": 1.5585, + "step": 5167 + }, + { + "epoch": 0.5451476793248945, + "grad_norm": 0.6368341445922852, + "learning_rate": 0.0006547299047574761, + "loss": 1.5417, + "step": 5168 + }, + { + "epoch": 0.545253164556962, + "grad_norm": 0.6020305156707764, + "learning_rate": 0.0006544808842466324, + "loss": 1.5674, + "step": 5169 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5932244658470154, + "learning_rate": 0.0006542318744391821, + "loss": 1.4975, + "step": 5170 + }, + { + "epoch": 0.545464135021097, + "grad_norm": 0.5534425377845764, + "learning_rate": 0.0006539828753630276, + "loss": 1.5198, + "step": 5171 + }, + { + "epoch": 0.5455696202531646, + "grad_norm": 0.6887194514274597, + "learning_rate": 0.0006537338870460708, + "loss": 1.5574, + "step": 5172 + }, + { + "epoch": 0.545675105485232, + "grad_norm": 0.6823055744171143, + "learning_rate": 0.000653484909516212, + "loss": 1.5537, + "step": 5173 + }, + { + "epoch": 0.5457805907172996, + "grad_norm": 0.6582706570625305, + "learning_rate": 0.00065323594280135, + "loss": 1.5687, + "step": 5174 + }, + { + "epoch": 0.5458860759493671, + "grad_norm": 0.6254463791847229, + "learning_rate": 0.0006529869869293834, + "loss": 1.5495, + "step": 5175 + }, + { + "epoch": 0.5459915611814345, + "grad_norm": 0.7109028100967407, + "learning_rate": 0.0006527380419282088, + "loss": 1.5442, + "step": 5176 + }, + { + "epoch": 0.5460970464135021, + "grad_norm": 0.7003293037414551, + "learning_rate": 0.0006524891078257215, + "loss": 1.5762, + "step": 5177 + }, + { + "epoch": 0.5462025316455696, + "grad_norm": 0.6954199075698853, + "learning_rate": 0.000652240184649816, + "loss": 1.5943, + "step": 5178 + }, + { + "epoch": 0.5463080168776371, + "grad_norm": 0.6510816812515259, + "learning_rate": 0.0006519912724283851, + "loss": 1.5399, + "step": 5179 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.7785432934761047, + "learning_rate": 0.0006517423711893209, + "loss": 1.556, + "step": 5180 + }, + { + "epoch": 0.5465189873417722, + "grad_norm": 0.6750881671905518, + "learning_rate": 0.000651493480960514, + "loss": 1.5493, + "step": 5181 + }, + { + "epoch": 0.5466244725738396, + "grad_norm": 0.6980860233306885, + "learning_rate": 0.0006512446017698537, + "loss": 1.5469, + "step": 5182 + }, + { + "epoch": 0.5467299578059072, + "grad_norm": 0.793106734752655, + "learning_rate": 0.0006509957336452279, + "loss": 1.5231, + "step": 5183 + }, + { + "epoch": 0.5468354430379747, + "grad_norm": 0.6032059788703918, + "learning_rate": 0.0006507468766145242, + "loss": 1.5609, + "step": 5184 + }, + { + "epoch": 0.5469409282700421, + "grad_norm": 0.7715347409248352, + "learning_rate": 0.000650498030705628, + "loss": 1.5236, + "step": 5185 + }, + { + "epoch": 0.5470464135021097, + "grad_norm": 0.6044836044311523, + "learning_rate": 0.0006502491959464235, + "loss": 1.581, + "step": 5186 + }, + { + "epoch": 0.5471518987341772, + "grad_norm": 0.7734372615814209, + "learning_rate": 0.000650000372364794, + "loss": 1.5284, + "step": 5187 + }, + { + "epoch": 0.5472573839662447, + "grad_norm": 0.6822202205657959, + "learning_rate": 0.0006497515599886214, + "loss": 1.5052, + "step": 5188 + }, + { + "epoch": 0.5473628691983122, + "grad_norm": 0.9415533542633057, + "learning_rate": 0.0006495027588457864, + "loss": 1.5506, + "step": 5189 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.6392835378646851, + "learning_rate": 0.0006492539689641685, + "loss": 1.512, + "step": 5190 + }, + { + "epoch": 0.5475738396624472, + "grad_norm": 0.7334082126617432, + "learning_rate": 0.0006490051903716454, + "loss": 1.5544, + "step": 5191 + }, + { + "epoch": 0.5476793248945148, + "grad_norm": 0.6250379085540771, + "learning_rate": 0.0006487564230960944, + "loss": 1.5549, + "step": 5192 + }, + { + "epoch": 0.5477848101265823, + "grad_norm": 0.7087116241455078, + "learning_rate": 0.0006485076671653913, + "loss": 1.5367, + "step": 5193 + }, + { + "epoch": 0.5478902953586497, + "grad_norm": 0.5535631775856018, + "learning_rate": 0.00064825892260741, + "loss": 1.5583, + "step": 5194 + }, + { + "epoch": 0.5479957805907173, + "grad_norm": 0.6567651629447937, + "learning_rate": 0.0006480101894500239, + "loss": 1.5445, + "step": 5195 + }, + { + "epoch": 0.5481012658227848, + "grad_norm": 0.6035028100013733, + "learning_rate": 0.0006477614677211046, + "loss": 1.5721, + "step": 5196 + }, + { + "epoch": 0.5482067510548523, + "grad_norm": 0.6320479512214661, + "learning_rate": 0.0006475127574485226, + "loss": 1.5543, + "step": 5197 + }, + { + "epoch": 0.5483122362869198, + "grad_norm": 0.6385921239852905, + "learning_rate": 0.0006472640586601472, + "loss": 1.526, + "step": 5198 + }, + { + "epoch": 0.5484177215189874, + "grad_norm": 0.6353036761283875, + "learning_rate": 0.0006470153713838463, + "loss": 1.5391, + "step": 5199 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.6218320727348328, + "learning_rate": 0.0006467666956474865, + "loss": 1.5389, + "step": 5200 + }, + { + "epoch": 0.5486286919831224, + "grad_norm": 0.7175450921058655, + "learning_rate": 0.0006465180314789332, + "loss": 1.4878, + "step": 5201 + }, + { + "epoch": 0.5487341772151899, + "grad_norm": 0.616515040397644, + "learning_rate": 0.0006462693789060505, + "loss": 1.5465, + "step": 5202 + }, + { + "epoch": 0.5488396624472573, + "grad_norm": 0.6898708939552307, + "learning_rate": 0.0006460207379567011, + "loss": 1.5312, + "step": 5203 + }, + { + "epoch": 0.5489451476793249, + "grad_norm": 0.6797228455543518, + "learning_rate": 0.0006457721086587468, + "loss": 1.5319, + "step": 5204 + }, + { + "epoch": 0.5490506329113924, + "grad_norm": 0.6547464728355408, + "learning_rate": 0.0006455234910400472, + "loss": 1.5474, + "step": 5205 + }, + { + "epoch": 0.5491561181434599, + "grad_norm": 0.6503647565841675, + "learning_rate": 0.0006452748851284615, + "loss": 1.5212, + "step": 5206 + }, + { + "epoch": 0.5492616033755274, + "grad_norm": 0.6584779024124146, + "learning_rate": 0.0006450262909518471, + "loss": 1.5127, + "step": 5207 + }, + { + "epoch": 0.549367088607595, + "grad_norm": 0.7055808305740356, + "learning_rate": 0.0006447777085380603, + "loss": 1.5596, + "step": 5208 + }, + { + "epoch": 0.5494725738396624, + "grad_norm": 0.5733956694602966, + "learning_rate": 0.0006445291379149556, + "loss": 1.5448, + "step": 5209 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.6819512844085693, + "learning_rate": 0.0006442805791103873, + "loss": 1.5291, + "step": 5210 + }, + { + "epoch": 0.5496835443037975, + "grad_norm": 0.6217068433761597, + "learning_rate": 0.0006440320321522071, + "loss": 1.5629, + "step": 5211 + }, + { + "epoch": 0.549789029535865, + "grad_norm": 0.6899569034576416, + "learning_rate": 0.0006437834970682661, + "loss": 1.4989, + "step": 5212 + }, + { + "epoch": 0.5498945147679325, + "grad_norm": 0.6860758662223816, + "learning_rate": 0.000643534973886414, + "loss": 1.5752, + "step": 5213 + }, + { + "epoch": 0.55, + "grad_norm": 0.6319008469581604, + "learning_rate": 0.0006432864626344989, + "loss": 1.5536, + "step": 5214 + }, + { + "epoch": 0.5501054852320675, + "grad_norm": 0.6100592613220215, + "learning_rate": 0.0006430379633403679, + "loss": 1.5505, + "step": 5215 + }, + { + "epoch": 0.550210970464135, + "grad_norm": 0.7569424510002136, + "learning_rate": 0.0006427894760318664, + "loss": 1.5663, + "step": 5216 + }, + { + "epoch": 0.5503164556962026, + "grad_norm": 0.544675350189209, + "learning_rate": 0.0006425410007368385, + "loss": 1.5293, + "step": 5217 + }, + { + "epoch": 0.55042194092827, + "grad_norm": 0.7381376624107361, + "learning_rate": 0.0006422925374831275, + "loss": 1.5418, + "step": 5218 + }, + { + "epoch": 0.5505274261603376, + "grad_norm": 0.6460233926773071, + "learning_rate": 0.0006420440862985748, + "loss": 1.5663, + "step": 5219 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.7191435098648071, + "learning_rate": 0.0006417956472110205, + "loss": 1.5074, + "step": 5220 + }, + { + "epoch": 0.5507383966244725, + "grad_norm": 0.6297436356544495, + "learning_rate": 0.0006415472202483034, + "loss": 1.532, + "step": 5221 + }, + { + "epoch": 0.5508438818565401, + "grad_norm": 0.5924860835075378, + "learning_rate": 0.0006412988054382611, + "loss": 1.5101, + "step": 5222 + }, + { + "epoch": 0.5509493670886076, + "grad_norm": 0.660748302936554, + "learning_rate": 0.0006410504028087297, + "loss": 1.5101, + "step": 5223 + }, + { + "epoch": 0.5510548523206751, + "grad_norm": 0.7160422205924988, + "learning_rate": 0.000640802012387544, + "loss": 1.5589, + "step": 5224 + }, + { + "epoch": 0.5511603375527426, + "grad_norm": 0.6451603770256042, + "learning_rate": 0.0006405536342025374, + "loss": 1.5463, + "step": 5225 + }, + { + "epoch": 0.5512658227848102, + "grad_norm": 0.5672247409820557, + "learning_rate": 0.0006403052682815415, + "loss": 1.5343, + "step": 5226 + }, + { + "epoch": 0.5513713080168776, + "grad_norm": 0.6898153424263, + "learning_rate": 0.0006400569146523875, + "loss": 1.5038, + "step": 5227 + }, + { + "epoch": 0.5514767932489452, + "grad_norm": 0.6569981575012207, + "learning_rate": 0.0006398085733429045, + "loss": 1.5885, + "step": 5228 + }, + { + "epoch": 0.5515822784810127, + "grad_norm": 0.6418317556381226, + "learning_rate": 0.0006395602443809203, + "loss": 1.5435, + "step": 5229 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.7315885424613953, + "learning_rate": 0.0006393119277942614, + "loss": 1.5523, + "step": 5230 + }, + { + "epoch": 0.5517932489451477, + "grad_norm": 0.5435857772827148, + "learning_rate": 0.0006390636236107528, + "loss": 1.5633, + "step": 5231 + }, + { + "epoch": 0.5518987341772152, + "grad_norm": 0.7744802832603455, + "learning_rate": 0.0006388153318582185, + "loss": 1.5272, + "step": 5232 + }, + { + "epoch": 0.5520042194092827, + "grad_norm": 0.5640437006950378, + "learning_rate": 0.0006385670525644806, + "loss": 1.4991, + "step": 5233 + }, + { + "epoch": 0.5521097046413502, + "grad_norm": 0.7884275913238525, + "learning_rate": 0.0006383187857573601, + "loss": 1.5426, + "step": 5234 + }, + { + "epoch": 0.5522151898734177, + "grad_norm": 0.578475296497345, + "learning_rate": 0.0006380705314646765, + "loss": 1.5434, + "step": 5235 + }, + { + "epoch": 0.5523206751054852, + "grad_norm": 0.6844251751899719, + "learning_rate": 0.0006378222897142482, + "loss": 1.5517, + "step": 5236 + }, + { + "epoch": 0.5524261603375528, + "grad_norm": 0.5530648231506348, + "learning_rate": 0.0006375740605338916, + "loss": 1.5365, + "step": 5237 + }, + { + "epoch": 0.5525316455696202, + "grad_norm": 0.8369764089584351, + "learning_rate": 0.0006373258439514221, + "loss": 1.5229, + "step": 5238 + }, + { + "epoch": 0.5526371308016877, + "grad_norm": 0.5848723649978638, + "learning_rate": 0.0006370776399946536, + "loss": 1.5522, + "step": 5239 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6661785244941711, + "learning_rate": 0.0006368294486913987, + "loss": 1.5695, + "step": 5240 + }, + { + "epoch": 0.5528481012658227, + "grad_norm": 0.6056700348854065, + "learning_rate": 0.0006365812700694683, + "loss": 1.5499, + "step": 5241 + }, + { + "epoch": 0.5529535864978903, + "grad_norm": 0.7702030539512634, + "learning_rate": 0.0006363331041566723, + "loss": 1.5254, + "step": 5242 + }, + { + "epoch": 0.5530590717299578, + "grad_norm": 0.545967698097229, + "learning_rate": 0.0006360849509808184, + "loss": 1.5147, + "step": 5243 + }, + { + "epoch": 0.5531645569620253, + "grad_norm": 0.8149393200874329, + "learning_rate": 0.0006358368105697142, + "loss": 1.5208, + "step": 5244 + }, + { + "epoch": 0.5532700421940928, + "grad_norm": 0.589394211769104, + "learning_rate": 0.0006355886829511645, + "loss": 1.5379, + "step": 5245 + }, + { + "epoch": 0.5533755274261604, + "grad_norm": 0.6207462549209595, + "learning_rate": 0.0006353405681529734, + "loss": 1.5477, + "step": 5246 + }, + { + "epoch": 0.5534810126582278, + "grad_norm": 0.6391516923904419, + "learning_rate": 0.0006350924662029433, + "loss": 1.545, + "step": 5247 + }, + { + "epoch": 0.5535864978902953, + "grad_norm": 0.5999003052711487, + "learning_rate": 0.0006348443771288755, + "loss": 1.5477, + "step": 5248 + }, + { + "epoch": 0.5536919831223629, + "grad_norm": 0.7264077663421631, + "learning_rate": 0.0006345963009585694, + "loss": 1.5214, + "step": 5249 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.6848984360694885, + "learning_rate": 0.0006343482377198232, + "loss": 1.5638, + "step": 5250 + }, + { + "epoch": 0.5539029535864979, + "grad_norm": 0.6536538600921631, + "learning_rate": 0.0006341001874404335, + "loss": 1.5652, + "step": 5251 + }, + { + "epoch": 0.5540084388185654, + "grad_norm": 0.621658205986023, + "learning_rate": 0.0006338521501481957, + "loss": 1.5279, + "step": 5252 + }, + { + "epoch": 0.5541139240506329, + "grad_norm": 0.629499077796936, + "learning_rate": 0.0006336041258709039, + "loss": 1.5355, + "step": 5253 + }, + { + "epoch": 0.5542194092827004, + "grad_norm": 0.6146077513694763, + "learning_rate": 0.0006333561146363502, + "loss": 1.5547, + "step": 5254 + }, + { + "epoch": 0.554324894514768, + "grad_norm": 0.6651160717010498, + "learning_rate": 0.0006331081164723253, + "loss": 1.5445, + "step": 5255 + }, + { + "epoch": 0.5544303797468354, + "grad_norm": 0.7196467518806458, + "learning_rate": 0.000632860131406619, + "loss": 1.5105, + "step": 5256 + }, + { + "epoch": 0.554535864978903, + "grad_norm": 0.573121964931488, + "learning_rate": 0.0006326121594670191, + "loss": 1.5521, + "step": 5257 + }, + { + "epoch": 0.5546413502109705, + "grad_norm": 0.6035096049308777, + "learning_rate": 0.000632364200681312, + "loss": 1.5453, + "step": 5258 + }, + { + "epoch": 0.5547468354430379, + "grad_norm": 0.6521493196487427, + "learning_rate": 0.0006321162550772829, + "loss": 1.5622, + "step": 5259 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.6378546953201294, + "learning_rate": 0.0006318683226827151, + "loss": 1.5211, + "step": 5260 + }, + { + "epoch": 0.554957805907173, + "grad_norm": 0.6392876505851746, + "learning_rate": 0.0006316204035253906, + "loss": 1.5413, + "step": 5261 + }, + { + "epoch": 0.5550632911392405, + "grad_norm": 0.6312471032142639, + "learning_rate": 0.0006313724976330904, + "loss": 1.5422, + "step": 5262 + }, + { + "epoch": 0.555168776371308, + "grad_norm": 0.6030455827713013, + "learning_rate": 0.0006311246050335934, + "loss": 1.5472, + "step": 5263 + }, + { + "epoch": 0.5552742616033756, + "grad_norm": 0.6075432896614075, + "learning_rate": 0.0006308767257546772, + "loss": 1.5187, + "step": 5264 + }, + { + "epoch": 0.555379746835443, + "grad_norm": 0.6935825347900391, + "learning_rate": 0.0006306288598241179, + "loss": 1.5439, + "step": 5265 + }, + { + "epoch": 0.5554852320675105, + "grad_norm": 0.7143839001655579, + "learning_rate": 0.00063038100726969, + "loss": 1.5363, + "step": 5266 + }, + { + "epoch": 0.5555907172995781, + "grad_norm": 0.7306966185569763, + "learning_rate": 0.0006301331681191668, + "loss": 1.5622, + "step": 5267 + }, + { + "epoch": 0.5556962025316455, + "grad_norm": 0.7949637174606323, + "learning_rate": 0.0006298853424003199, + "loss": 1.5595, + "step": 5268 + }, + { + "epoch": 0.5558016877637131, + "grad_norm": 0.6167739033699036, + "learning_rate": 0.0006296375301409187, + "loss": 1.5538, + "step": 5269 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.794842541217804, + "learning_rate": 0.0006293897313687331, + "loss": 1.5573, + "step": 5270 + }, + { + "epoch": 0.5560126582278481, + "grad_norm": 0.6986887454986572, + "learning_rate": 0.0006291419461115293, + "loss": 1.541, + "step": 5271 + }, + { + "epoch": 0.5561181434599156, + "grad_norm": 0.6766376495361328, + "learning_rate": 0.0006288941743970732, + "loss": 1.564, + "step": 5272 + }, + { + "epoch": 0.5562236286919832, + "grad_norm": 0.6193362474441528, + "learning_rate": 0.0006286464162531287, + "loss": 1.5069, + "step": 5273 + }, + { + "epoch": 0.5563291139240506, + "grad_norm": 0.8729059100151062, + "learning_rate": 0.0006283986717074585, + "loss": 1.5127, + "step": 5274 + }, + { + "epoch": 0.5564345991561181, + "grad_norm": 0.6612906455993652, + "learning_rate": 0.0006281509407878232, + "loss": 1.5249, + "step": 5275 + }, + { + "epoch": 0.5565400843881857, + "grad_norm": 0.78849858045578, + "learning_rate": 0.0006279032235219829, + "loss": 1.5441, + "step": 5276 + }, + { + "epoch": 0.5566455696202531, + "grad_norm": 0.701620876789093, + "learning_rate": 0.0006276555199376951, + "loss": 1.5637, + "step": 5277 + }, + { + "epoch": 0.5567510548523207, + "grad_norm": 0.6448178887367249, + "learning_rate": 0.000627407830062716, + "loss": 1.539, + "step": 5278 + }, + { + "epoch": 0.5568565400843882, + "grad_norm": 0.8222794532775879, + "learning_rate": 0.0006271601539248012, + "loss": 1.5524, + "step": 5279 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.59568852186203, + "learning_rate": 0.0006269124915517037, + "loss": 1.5287, + "step": 5280 + }, + { + "epoch": 0.5570675105485232, + "grad_norm": 0.5703412890434265, + "learning_rate": 0.0006266648429711753, + "loss": 1.5471, + "step": 5281 + }, + { + "epoch": 0.5571729957805908, + "grad_norm": 0.6437538266181946, + "learning_rate": 0.0006264172082109661, + "loss": 1.5309, + "step": 5282 + }, + { + "epoch": 0.5572784810126582, + "grad_norm": 0.6137694120407104, + "learning_rate": 0.0006261695872988252, + "loss": 1.5077, + "step": 5283 + }, + { + "epoch": 0.5573839662447257, + "grad_norm": 0.6168452501296997, + "learning_rate": 0.0006259219802624994, + "loss": 1.5442, + "step": 5284 + }, + { + "epoch": 0.5574894514767933, + "grad_norm": 0.7098227739334106, + "learning_rate": 0.0006256743871297344, + "loss": 1.5353, + "step": 5285 + }, + { + "epoch": 0.5575949367088607, + "grad_norm": 0.5793830752372742, + "learning_rate": 0.0006254268079282743, + "loss": 1.5322, + "step": 5286 + }, + { + "epoch": 0.5577004219409283, + "grad_norm": 0.5650883913040161, + "learning_rate": 0.0006251792426858612, + "loss": 1.5155, + "step": 5287 + }, + { + "epoch": 0.5578059071729958, + "grad_norm": 0.6356245279312134, + "learning_rate": 0.0006249316914302368, + "loss": 1.5393, + "step": 5288 + }, + { + "epoch": 0.5579113924050633, + "grad_norm": 0.5986429452896118, + "learning_rate": 0.0006246841541891399, + "loss": 1.5619, + "step": 5289 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.6130163669586182, + "learning_rate": 0.0006244366309903084, + "loss": 1.5744, + "step": 5290 + }, + { + "epoch": 0.5581223628691984, + "grad_norm": 0.5576313138008118, + "learning_rate": 0.0006241891218614786, + "loss": 1.5481, + "step": 5291 + }, + { + "epoch": 0.5582278481012658, + "grad_norm": 0.6254655122756958, + "learning_rate": 0.0006239416268303849, + "loss": 1.5115, + "step": 5292 + }, + { + "epoch": 0.5583333333333333, + "grad_norm": 0.6432116031646729, + "learning_rate": 0.0006236941459247606, + "loss": 1.534, + "step": 5293 + }, + { + "epoch": 0.5584388185654009, + "grad_norm": 0.5750532746315002, + "learning_rate": 0.0006234466791723371, + "loss": 1.5538, + "step": 5294 + }, + { + "epoch": 0.5585443037974683, + "grad_norm": 0.5731672048568726, + "learning_rate": 0.0006231992266008438, + "loss": 1.5299, + "step": 5295 + }, + { + "epoch": 0.5586497890295359, + "grad_norm": 0.6065917611122131, + "learning_rate": 0.00062295178823801, + "loss": 1.5642, + "step": 5296 + }, + { + "epoch": 0.5587552742616034, + "grad_norm": 0.5931791067123413, + "learning_rate": 0.0006227043641115616, + "loss": 1.5264, + "step": 5297 + }, + { + "epoch": 0.5588607594936709, + "grad_norm": 0.6344130635261536, + "learning_rate": 0.0006224569542492241, + "loss": 1.5144, + "step": 5298 + }, + { + "epoch": 0.5589662447257384, + "grad_norm": 0.594961404800415, + "learning_rate": 0.0006222095586787208, + "loss": 1.571, + "step": 5299 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.7493018507957458, + "learning_rate": 0.0006219621774277737, + "loss": 1.5604, + "step": 5300 + }, + { + "epoch": 0.5591772151898734, + "grad_norm": 0.5778586864471436, + "learning_rate": 0.000621714810524103, + "loss": 1.5252, + "step": 5301 + }, + { + "epoch": 0.559282700421941, + "grad_norm": 0.716266930103302, + "learning_rate": 0.0006214674579954276, + "loss": 1.5284, + "step": 5302 + }, + { + "epoch": 0.5593881856540084, + "grad_norm": 0.6776962876319885, + "learning_rate": 0.0006212201198694643, + "loss": 1.5458, + "step": 5303 + }, + { + "epoch": 0.5594936708860759, + "grad_norm": 0.5841091871261597, + "learning_rate": 0.0006209727961739286, + "loss": 1.531, + "step": 5304 + }, + { + "epoch": 0.5595991561181435, + "grad_norm": 0.7555704116821289, + "learning_rate": 0.0006207254869365346, + "loss": 1.5605, + "step": 5305 + }, + { + "epoch": 0.5597046413502109, + "grad_norm": 0.6579040884971619, + "learning_rate": 0.0006204781921849945, + "loss": 1.5559, + "step": 5306 + }, + { + "epoch": 0.5598101265822785, + "grad_norm": 0.6672948598861694, + "learning_rate": 0.0006202309119470188, + "loss": 1.5375, + "step": 5307 + }, + { + "epoch": 0.559915611814346, + "grad_norm": 0.559377133846283, + "learning_rate": 0.0006199836462503166, + "loss": 1.4888, + "step": 5308 + }, + { + "epoch": 0.5600210970464135, + "grad_norm": 0.7640194296836853, + "learning_rate": 0.0006197363951225951, + "loss": 1.5358, + "step": 5309 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.704275906085968, + "learning_rate": 0.00061948915859156, + "loss": 1.5267, + "step": 5310 + }, + { + "epoch": 0.5602320675105485, + "grad_norm": 0.5601935982704163, + "learning_rate": 0.0006192419366849155, + "loss": 1.5393, + "step": 5311 + }, + { + "epoch": 0.560337552742616, + "grad_norm": 0.6680573225021362, + "learning_rate": 0.0006189947294303641, + "loss": 1.5221, + "step": 5312 + }, + { + "epoch": 0.5604430379746835, + "grad_norm": 0.6023024916648865, + "learning_rate": 0.000618747536855606, + "loss": 1.5183, + "step": 5313 + }, + { + "epoch": 0.5605485232067511, + "grad_norm": 0.5906475782394409, + "learning_rate": 0.0006185003589883413, + "loss": 1.5069, + "step": 5314 + }, + { + "epoch": 0.5606540084388185, + "grad_norm": 0.6533382534980774, + "learning_rate": 0.0006182531958562672, + "loss": 1.5235, + "step": 5315 + }, + { + "epoch": 0.5607594936708861, + "grad_norm": 0.615663468837738, + "learning_rate": 0.0006180060474870793, + "loss": 1.5589, + "step": 5316 + }, + { + "epoch": 0.5608649789029536, + "grad_norm": 0.7077829837799072, + "learning_rate": 0.0006177589139084721, + "loss": 1.5003, + "step": 5317 + }, + { + "epoch": 0.560970464135021, + "grad_norm": 0.6145153641700745, + "learning_rate": 0.000617511795148138, + "loss": 1.5593, + "step": 5318 + }, + { + "epoch": 0.5610759493670886, + "grad_norm": 0.6223384737968445, + "learning_rate": 0.0006172646912337678, + "loss": 1.543, + "step": 5319 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.6791753768920898, + "learning_rate": 0.0006170176021930509, + "loss": 1.5305, + "step": 5320 + }, + { + "epoch": 0.5612869198312236, + "grad_norm": 0.6314692497253418, + "learning_rate": 0.0006167705280536745, + "loss": 1.5719, + "step": 5321 + }, + { + "epoch": 0.5613924050632911, + "grad_norm": 0.6767950654029846, + "learning_rate": 0.000616523468843325, + "loss": 1.5605, + "step": 5322 + }, + { + "epoch": 0.5614978902953587, + "grad_norm": 0.6456867456436157, + "learning_rate": 0.0006162764245896863, + "loss": 1.5535, + "step": 5323 + }, + { + "epoch": 0.5616033755274261, + "grad_norm": 0.6205055117607117, + "learning_rate": 0.0006160293953204412, + "loss": 1.5403, + "step": 5324 + }, + { + "epoch": 0.5617088607594937, + "grad_norm": 0.7478358745574951, + "learning_rate": 0.0006157823810632704, + "loss": 1.5238, + "step": 5325 + }, + { + "epoch": 0.5618143459915612, + "grad_norm": 0.685676634311676, + "learning_rate": 0.000615535381845853, + "loss": 1.5468, + "step": 5326 + }, + { + "epoch": 0.5619198312236287, + "grad_norm": 0.6257274150848389, + "learning_rate": 0.0006152883976958665, + "loss": 1.5083, + "step": 5327 + }, + { + "epoch": 0.5620253164556962, + "grad_norm": 0.5990146398544312, + "learning_rate": 0.0006150414286409869, + "loss": 1.5051, + "step": 5328 + }, + { + "epoch": 0.5621308016877637, + "grad_norm": 0.8479170799255371, + "learning_rate": 0.0006147944747088881, + "loss": 1.5427, + "step": 5329 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.6265409588813782, + "learning_rate": 0.0006145475359272424, + "loss": 1.5397, + "step": 5330 + }, + { + "epoch": 0.5623417721518987, + "grad_norm": 0.7842718958854675, + "learning_rate": 0.0006143006123237208, + "loss": 1.5335, + "step": 5331 + }, + { + "epoch": 0.5624472573839663, + "grad_norm": 0.616702139377594, + "learning_rate": 0.0006140537039259925, + "loss": 1.5489, + "step": 5332 + }, + { + "epoch": 0.5625527426160337, + "grad_norm": 0.7291362881660461, + "learning_rate": 0.0006138068107617244, + "loss": 1.5086, + "step": 5333 + }, + { + "epoch": 0.5626582278481013, + "grad_norm": 0.6000558137893677, + "learning_rate": 0.0006135599328585824, + "loss": 1.5484, + "step": 5334 + }, + { + "epoch": 0.5627637130801688, + "grad_norm": 0.6492193341255188, + "learning_rate": 0.0006133130702442302, + "loss": 1.544, + "step": 5335 + }, + { + "epoch": 0.5628691983122363, + "grad_norm": 0.6448333263397217, + "learning_rate": 0.0006130662229463301, + "loss": 1.5469, + "step": 5336 + }, + { + "epoch": 0.5629746835443038, + "grad_norm": 0.6665742993354797, + "learning_rate": 0.0006128193909925425, + "loss": 1.518, + "step": 5337 + }, + { + "epoch": 0.5630801687763713, + "grad_norm": 0.6928816437721252, + "learning_rate": 0.0006125725744105263, + "loss": 1.5295, + "step": 5338 + }, + { + "epoch": 0.5631856540084388, + "grad_norm": 0.6036021709442139, + "learning_rate": 0.000612325773227938, + "loss": 1.535, + "step": 5339 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.6506156325340271, + "learning_rate": 0.0006120789874724336, + "loss": 1.5448, + "step": 5340 + }, + { + "epoch": 0.5633966244725739, + "grad_norm": 0.623649001121521, + "learning_rate": 0.0006118322171716665, + "loss": 1.58, + "step": 5341 + }, + { + "epoch": 0.5635021097046413, + "grad_norm": 0.6435539722442627, + "learning_rate": 0.0006115854623532884, + "loss": 1.5255, + "step": 5342 + }, + { + "epoch": 0.5636075949367089, + "grad_norm": 0.5531496405601501, + "learning_rate": 0.0006113387230449493, + "loss": 1.5484, + "step": 5343 + }, + { + "epoch": 0.5637130801687764, + "grad_norm": 0.658439576625824, + "learning_rate": 0.0006110919992742978, + "loss": 1.5164, + "step": 5344 + }, + { + "epoch": 0.5638185654008439, + "grad_norm": 0.6358847618103027, + "learning_rate": 0.0006108452910689804, + "loss": 1.5415, + "step": 5345 + }, + { + "epoch": 0.5639240506329114, + "grad_norm": 0.6039479970932007, + "learning_rate": 0.0006105985984566421, + "loss": 1.5185, + "step": 5346 + }, + { + "epoch": 0.564029535864979, + "grad_norm": 0.7352444529533386, + "learning_rate": 0.0006103519214649256, + "loss": 1.4776, + "step": 5347 + }, + { + "epoch": 0.5641350210970464, + "grad_norm": 0.5668253898620605, + "learning_rate": 0.000610105260121473, + "loss": 1.5086, + "step": 5348 + }, + { + "epoch": 0.5642405063291139, + "grad_norm": 0.6543635129928589, + "learning_rate": 0.0006098586144539235, + "loss": 1.5312, + "step": 5349 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.6188797950744629, + "learning_rate": 0.0006096119844899151, + "loss": 1.5147, + "step": 5350 + }, + { + "epoch": 0.5644514767932489, + "grad_norm": 0.6188855171203613, + "learning_rate": 0.000609365370257084, + "loss": 1.5177, + "step": 5351 + }, + { + "epoch": 0.5645569620253165, + "grad_norm": 0.6535983085632324, + "learning_rate": 0.0006091187717830643, + "loss": 1.5237, + "step": 5352 + }, + { + "epoch": 0.564662447257384, + "grad_norm": 0.6657421588897705, + "learning_rate": 0.0006088721890954887, + "loss": 1.5444, + "step": 5353 + }, + { + "epoch": 0.5647679324894515, + "grad_norm": 0.6670475602149963, + "learning_rate": 0.0006086256222219881, + "loss": 1.5366, + "step": 5354 + }, + { + "epoch": 0.564873417721519, + "grad_norm": 0.6328800916671753, + "learning_rate": 0.0006083790711901915, + "loss": 1.5395, + "step": 5355 + }, + { + "epoch": 0.5649789029535865, + "grad_norm": 0.6678476929664612, + "learning_rate": 0.0006081325360277257, + "loss": 1.5044, + "step": 5356 + }, + { + "epoch": 0.565084388185654, + "grad_norm": 0.6951276063919067, + "learning_rate": 0.0006078860167622171, + "loss": 1.5265, + "step": 5357 + }, + { + "epoch": 0.5651898734177215, + "grad_norm": 0.7044052481651306, + "learning_rate": 0.000607639513421289, + "loss": 1.542, + "step": 5358 + }, + { + "epoch": 0.5652953586497891, + "grad_norm": 0.6964508891105652, + "learning_rate": 0.0006073930260325632, + "loss": 1.5488, + "step": 5359 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.774875819683075, + "learning_rate": 0.0006071465546236601, + "loss": 1.5716, + "step": 5360 + }, + { + "epoch": 0.5655063291139241, + "grad_norm": 0.5620792508125305, + "learning_rate": 0.0006069000992221977, + "loss": 1.5485, + "step": 5361 + }, + { + "epoch": 0.5656118143459916, + "grad_norm": 0.6676979660987854, + "learning_rate": 0.0006066536598557927, + "loss": 1.5469, + "step": 5362 + }, + { + "epoch": 0.565717299578059, + "grad_norm": 0.651387095451355, + "learning_rate": 0.0006064072365520601, + "loss": 1.5986, + "step": 5363 + }, + { + "epoch": 0.5658227848101266, + "grad_norm": 0.6536668539047241, + "learning_rate": 0.0006061608293386126, + "loss": 1.5309, + "step": 5364 + }, + { + "epoch": 0.5659282700421941, + "grad_norm": 0.6820849776268005, + "learning_rate": 0.0006059144382430612, + "loss": 1.5218, + "step": 5365 + }, + { + "epoch": 0.5660337552742616, + "grad_norm": 0.6713017821311951, + "learning_rate": 0.0006056680632930154, + "loss": 1.5458, + "step": 5366 + }, + { + "epoch": 0.5661392405063291, + "grad_norm": 0.6142509579658508, + "learning_rate": 0.0006054217045160831, + "loss": 1.5301, + "step": 5367 + }, + { + "epoch": 0.5662447257383966, + "grad_norm": 0.6360639929771423, + "learning_rate": 0.0006051753619398697, + "loss": 1.5335, + "step": 5368 + }, + { + "epoch": 0.5663502109704641, + "grad_norm": 0.5527632236480713, + "learning_rate": 0.0006049290355919792, + "loss": 1.4965, + "step": 5369 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.6416233777999878, + "learning_rate": 0.0006046827255000135, + "loss": 1.5443, + "step": 5370 + }, + { + "epoch": 0.5665611814345991, + "grad_norm": 0.5692195892333984, + "learning_rate": 0.0006044364316915733, + "loss": 1.5418, + "step": 5371 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 0.6357423067092896, + "learning_rate": 0.0006041901541942565, + "loss": 1.53, + "step": 5372 + }, + { + "epoch": 0.5667721518987342, + "grad_norm": 0.5425747036933899, + "learning_rate": 0.0006039438930356601, + "loss": 1.5052, + "step": 5373 + }, + { + "epoch": 0.5668776371308016, + "grad_norm": 0.6713323593139648, + "learning_rate": 0.0006036976482433787, + "loss": 1.5379, + "step": 5374 + }, + { + "epoch": 0.5669831223628692, + "grad_norm": 0.5888932943344116, + "learning_rate": 0.0006034514198450053, + "loss": 1.5235, + "step": 5375 + }, + { + "epoch": 0.5670886075949367, + "grad_norm": 0.650634765625, + "learning_rate": 0.0006032052078681312, + "loss": 1.4957, + "step": 5376 + }, + { + "epoch": 0.5671940928270042, + "grad_norm": 0.5943626761436462, + "learning_rate": 0.0006029590123403456, + "loss": 1.579, + "step": 5377 + }, + { + "epoch": 0.5672995780590717, + "grad_norm": 0.6564143896102905, + "learning_rate": 0.0006027128332892358, + "loss": 1.4848, + "step": 5378 + }, + { + "epoch": 0.5674050632911393, + "grad_norm": 0.5686602592468262, + "learning_rate": 0.0006024666707423875, + "loss": 1.5429, + "step": 5379 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.5503216981887817, + "learning_rate": 0.0006022205247273845, + "loss": 1.5225, + "step": 5380 + }, + { + "epoch": 0.5676160337552743, + "grad_norm": 0.6098427176475525, + "learning_rate": 0.0006019743952718085, + "loss": 1.5047, + "step": 5381 + }, + { + "epoch": 0.5677215189873418, + "grad_norm": 0.6448250412940979, + "learning_rate": 0.0006017282824032394, + "loss": 1.4854, + "step": 5382 + }, + { + "epoch": 0.5678270042194092, + "grad_norm": 0.5809668898582458, + "learning_rate": 0.0006014821861492559, + "loss": 1.5319, + "step": 5383 + }, + { + "epoch": 0.5679324894514768, + "grad_norm": 0.6603602170944214, + "learning_rate": 0.0006012361065374339, + "loss": 1.5397, + "step": 5384 + }, + { + "epoch": 0.5680379746835443, + "grad_norm": 0.6679709553718567, + "learning_rate": 0.0006009900435953478, + "loss": 1.5257, + "step": 5385 + }, + { + "epoch": 0.5681434599156118, + "grad_norm": 0.6081859469413757, + "learning_rate": 0.0006007439973505707, + "loss": 1.5504, + "step": 5386 + }, + { + "epoch": 0.5682489451476793, + "grad_norm": 0.6924006938934326, + "learning_rate": 0.0006004979678306729, + "loss": 1.5394, + "step": 5387 + }, + { + "epoch": 0.5683544303797469, + "grad_norm": 0.6151008009910583, + "learning_rate": 0.0006002519550632232, + "loss": 1.5261, + "step": 5388 + }, + { + "epoch": 0.5684599156118143, + "grad_norm": 0.61293625831604, + "learning_rate": 0.0006000059590757886, + "loss": 1.5408, + "step": 5389 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.6323763728141785, + "learning_rate": 0.0005997599798959343, + "loss": 1.547, + "step": 5390 + }, + { + "epoch": 0.5686708860759494, + "grad_norm": 0.5640910863876343, + "learning_rate": 0.0005995140175512233, + "loss": 1.5175, + "step": 5391 + }, + { + "epoch": 0.5687763713080168, + "grad_norm": 0.6477962732315063, + "learning_rate": 0.000599268072069217, + "loss": 1.5261, + "step": 5392 + }, + { + "epoch": 0.5688818565400844, + "grad_norm": 0.6275855302810669, + "learning_rate": 0.0005990221434774751, + "loss": 1.5549, + "step": 5393 + }, + { + "epoch": 0.5689873417721519, + "grad_norm": 0.5993220806121826, + "learning_rate": 0.0005987762318035546, + "loss": 1.5355, + "step": 5394 + }, + { + "epoch": 0.5690928270042194, + "grad_norm": 0.6314303278923035, + "learning_rate": 0.0005985303370750115, + "loss": 1.5188, + "step": 5395 + }, + { + "epoch": 0.5691983122362869, + "grad_norm": 0.6571354866027832, + "learning_rate": 0.0005982844593193995, + "loss": 1.5289, + "step": 5396 + }, + { + "epoch": 0.5693037974683545, + "grad_norm": 0.6429842114448547, + "learning_rate": 0.0005980385985642703, + "loss": 1.5574, + "step": 5397 + }, + { + "epoch": 0.5694092827004219, + "grad_norm": 0.6437500715255737, + "learning_rate": 0.000597792754837174, + "loss": 1.5335, + "step": 5398 + }, + { + "epoch": 0.5695147679324895, + "grad_norm": 0.7287536263465881, + "learning_rate": 0.0005975469281656581, + "loss": 1.5093, + "step": 5399 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.6198468804359436, + "learning_rate": 0.0005973011185772694, + "loss": 1.5058, + "step": 5400 + }, + { + "epoch": 0.5697257383966244, + "grad_norm": 0.6361778378486633, + "learning_rate": 0.0005970553260995517, + "loss": 1.5387, + "step": 5401 + }, + { + "epoch": 0.569831223628692, + "grad_norm": 0.6623300313949585, + "learning_rate": 0.0005968095507600476, + "loss": 1.533, + "step": 5402 + }, + { + "epoch": 0.5699367088607595, + "grad_norm": 0.6279090642929077, + "learning_rate": 0.000596563792586297, + "loss": 1.5558, + "step": 5403 + }, + { + "epoch": 0.570042194092827, + "grad_norm": 0.6113454103469849, + "learning_rate": 0.0005963180516058386, + "loss": 1.5759, + "step": 5404 + }, + { + "epoch": 0.5701476793248945, + "grad_norm": 0.6433221697807312, + "learning_rate": 0.0005960723278462086, + "loss": 1.5209, + "step": 5405 + }, + { + "epoch": 0.5702531645569621, + "grad_norm": 0.6538277268409729, + "learning_rate": 0.0005958266213349422, + "loss": 1.4974, + "step": 5406 + }, + { + "epoch": 0.5703586497890295, + "grad_norm": 0.6336203813552856, + "learning_rate": 0.0005955809320995714, + "loss": 1.5207, + "step": 5407 + }, + { + "epoch": 0.570464135021097, + "grad_norm": 0.7956374883651733, + "learning_rate": 0.0005953352601676272, + "loss": 1.5394, + "step": 5408 + }, + { + "epoch": 0.5705696202531646, + "grad_norm": 0.612508237361908, + "learning_rate": 0.0005950896055666384, + "loss": 1.5461, + "step": 5409 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.7446142435073853, + "learning_rate": 0.0005948439683241318, + "loss": 1.5098, + "step": 5410 + }, + { + "epoch": 0.5707805907172996, + "grad_norm": 0.6443213820457458, + "learning_rate": 0.0005945983484676321, + "loss": 1.5287, + "step": 5411 + }, + { + "epoch": 0.5708860759493671, + "grad_norm": 0.5886332988739014, + "learning_rate": 0.0005943527460246625, + "loss": 1.5311, + "step": 5412 + }, + { + "epoch": 0.5709915611814346, + "grad_norm": 0.6746214628219604, + "learning_rate": 0.0005941071610227437, + "loss": 1.5434, + "step": 5413 + }, + { + "epoch": 0.5710970464135021, + "grad_norm": 0.6042154431343079, + "learning_rate": 0.000593861593489395, + "loss": 1.4789, + "step": 5414 + }, + { + "epoch": 0.5712025316455697, + "grad_norm": 0.663906991481781, + "learning_rate": 0.000593616043452133, + "loss": 1.5598, + "step": 5415 + }, + { + "epoch": 0.5713080168776371, + "grad_norm": 0.5602854490280151, + "learning_rate": 0.0005933705109384735, + "loss": 1.5185, + "step": 5416 + }, + { + "epoch": 0.5714135021097047, + "grad_norm": 0.6618138551712036, + "learning_rate": 0.000593124995975929, + "loss": 1.5437, + "step": 5417 + }, + { + "epoch": 0.5715189873417722, + "grad_norm": 0.6140236854553223, + "learning_rate": 0.000592879498592011, + "loss": 1.533, + "step": 5418 + }, + { + "epoch": 0.5716244725738396, + "grad_norm": 0.6186416149139404, + "learning_rate": 0.0005926340188142289, + "loss": 1.5035, + "step": 5419 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.6562359929084778, + "learning_rate": 0.0005923885566700896, + "loss": 1.5195, + "step": 5420 + }, + { + "epoch": 0.5718354430379747, + "grad_norm": 0.6578521132469177, + "learning_rate": 0.0005921431121870984, + "loss": 1.5547, + "step": 5421 + }, + { + "epoch": 0.5719409282700422, + "grad_norm": 0.6185148358345032, + "learning_rate": 0.0005918976853927586, + "loss": 1.5383, + "step": 5422 + }, + { + "epoch": 0.5720464135021097, + "grad_norm": 0.6179506778717041, + "learning_rate": 0.0005916522763145715, + "loss": 1.4912, + "step": 5423 + }, + { + "epoch": 0.5721518987341773, + "grad_norm": 0.7185723185539246, + "learning_rate": 0.0005914068849800365, + "loss": 1.5066, + "step": 5424 + }, + { + "epoch": 0.5722573839662447, + "grad_norm": 0.6261352896690369, + "learning_rate": 0.0005911615114166508, + "loss": 1.5279, + "step": 5425 + }, + { + "epoch": 0.5723628691983123, + "grad_norm": 0.8394593000411987, + "learning_rate": 0.0005909161556519096, + "loss": 1.5439, + "step": 5426 + }, + { + "epoch": 0.5724683544303798, + "grad_norm": 0.7291346788406372, + "learning_rate": 0.0005906708177133066, + "loss": 1.5468, + "step": 5427 + }, + { + "epoch": 0.5725738396624472, + "grad_norm": 0.6366363167762756, + "learning_rate": 0.0005904254976283331, + "loss": 1.5128, + "step": 5428 + }, + { + "epoch": 0.5726793248945148, + "grad_norm": 0.7141589522361755, + "learning_rate": 0.0005901801954244782, + "loss": 1.5237, + "step": 5429 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.6571366786956787, + "learning_rate": 0.0005899349111292293, + "loss": 1.4977, + "step": 5430 + }, + { + "epoch": 0.5728902953586498, + "grad_norm": 0.6777145862579346, + "learning_rate": 0.0005896896447700718, + "loss": 1.5522, + "step": 5431 + }, + { + "epoch": 0.5729957805907173, + "grad_norm": 0.6733178496360779, + "learning_rate": 0.0005894443963744891, + "loss": 1.5132, + "step": 5432 + }, + { + "epoch": 0.5731012658227848, + "grad_norm": 0.7218380570411682, + "learning_rate": 0.0005891991659699622, + "loss": 1.4964, + "step": 5433 + }, + { + "epoch": 0.5732067510548523, + "grad_norm": 0.7145645618438721, + "learning_rate": 0.0005889539535839704, + "loss": 1.5738, + "step": 5434 + }, + { + "epoch": 0.5733122362869199, + "grad_norm": 0.6862101554870605, + "learning_rate": 0.0005887087592439914, + "loss": 1.4846, + "step": 5435 + }, + { + "epoch": 0.5734177215189873, + "grad_norm": 0.8179143071174622, + "learning_rate": 0.0005884635829775002, + "loss": 1.5161, + "step": 5436 + }, + { + "epoch": 0.5735232067510548, + "grad_norm": 0.8279079794883728, + "learning_rate": 0.00058821842481197, + "loss": 1.5023, + "step": 5437 + }, + { + "epoch": 0.5736286919831224, + "grad_norm": 0.6726477146148682, + "learning_rate": 0.0005879732847748721, + "loss": 1.5481, + "step": 5438 + }, + { + "epoch": 0.5737341772151898, + "grad_norm": 0.804387092590332, + "learning_rate": 0.0005877281628936756, + "loss": 1.5358, + "step": 5439 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.6040371060371399, + "learning_rate": 0.0005874830591958474, + "loss": 1.5223, + "step": 5440 + }, + { + "epoch": 0.5739451476793249, + "grad_norm": 0.6221067309379578, + "learning_rate": 0.000587237973708853, + "loss": 1.5321, + "step": 5441 + }, + { + "epoch": 0.5740506329113924, + "grad_norm": 0.6692221760749817, + "learning_rate": 0.0005869929064601551, + "loss": 1.5394, + "step": 5442 + }, + { + "epoch": 0.5741561181434599, + "grad_norm": 0.6738124489784241, + "learning_rate": 0.0005867478574772147, + "loss": 1.5367, + "step": 5443 + }, + { + "epoch": 0.5742616033755275, + "grad_norm": 0.7170299291610718, + "learning_rate": 0.0005865028267874911, + "loss": 1.5381, + "step": 5444 + }, + { + "epoch": 0.5743670886075949, + "grad_norm": 0.5945865511894226, + "learning_rate": 0.0005862578144184412, + "loss": 1.4979, + "step": 5445 + }, + { + "epoch": 0.5744725738396624, + "grad_norm": 0.655207097530365, + "learning_rate": 0.0005860128203975196, + "loss": 1.5462, + "step": 5446 + }, + { + "epoch": 0.57457805907173, + "grad_norm": 0.6457309126853943, + "learning_rate": 0.0005857678447521791, + "loss": 1.551, + "step": 5447 + }, + { + "epoch": 0.5746835443037974, + "grad_norm": 0.6294080018997192, + "learning_rate": 0.0005855228875098706, + "loss": 1.528, + "step": 5448 + }, + { + "epoch": 0.574789029535865, + "grad_norm": 0.6393494606018066, + "learning_rate": 0.0005852779486980427, + "loss": 1.5213, + "step": 5449 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6646655201911926, + "learning_rate": 0.000585033028344142, + "loss": 1.513, + "step": 5450 + }, + { + "epoch": 0.575, + "grad_norm": 0.6045975685119629, + "learning_rate": 0.0005847881264756131, + "loss": 1.5252, + "step": 5451 + }, + { + "epoch": 0.5751054852320675, + "grad_norm": 0.6147010326385498, + "learning_rate": 0.0005845432431198981, + "loss": 1.4946, + "step": 5452 + }, + { + "epoch": 0.575210970464135, + "grad_norm": 0.65303635597229, + "learning_rate": 0.0005842983783044381, + "loss": 1.555, + "step": 5453 + }, + { + "epoch": 0.5753164556962025, + "grad_norm": 0.6291064023971558, + "learning_rate": 0.0005840535320566711, + "loss": 1.5285, + "step": 5454 + }, + { + "epoch": 0.57542194092827, + "grad_norm": 0.6545795202255249, + "learning_rate": 0.0005838087044040334, + "loss": 1.5755, + "step": 5455 + }, + { + "epoch": 0.5755274261603376, + "grad_norm": 0.6042690277099609, + "learning_rate": 0.0005835638953739589, + "loss": 1.5013, + "step": 5456 + }, + { + "epoch": 0.575632911392405, + "grad_norm": 0.6085339784622192, + "learning_rate": 0.00058331910499388, + "loss": 1.5386, + "step": 5457 + }, + { + "epoch": 0.5757383966244726, + "grad_norm": 0.6110787987709045, + "learning_rate": 0.0005830743332912264, + "loss": 1.5187, + "step": 5458 + }, + { + "epoch": 0.5758438818565401, + "grad_norm": 0.6113867163658142, + "learning_rate": 0.0005828295802934263, + "loss": 1.5145, + "step": 5459 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.5756798982620239, + "learning_rate": 0.0005825848460279048, + "loss": 1.5384, + "step": 5460 + }, + { + "epoch": 0.5760548523206751, + "grad_norm": 0.6102608442306519, + "learning_rate": 0.0005823401305220865, + "loss": 1.4987, + "step": 5461 + }, + { + "epoch": 0.5761603375527427, + "grad_norm": 0.6556538343429565, + "learning_rate": 0.0005820954338033925, + "loss": 1.4867, + "step": 5462 + }, + { + "epoch": 0.5762658227848101, + "grad_norm": 0.6144274473190308, + "learning_rate": 0.0005818507558992426, + "loss": 1.5272, + "step": 5463 + }, + { + "epoch": 0.5763713080168776, + "grad_norm": 0.6233268976211548, + "learning_rate": 0.0005816060968370538, + "loss": 1.5116, + "step": 5464 + }, + { + "epoch": 0.5764767932489452, + "grad_norm": 0.6460200548171997, + "learning_rate": 0.0005813614566442416, + "loss": 1.5713, + "step": 5465 + }, + { + "epoch": 0.5765822784810126, + "grad_norm": 0.5722482800483704, + "learning_rate": 0.0005811168353482191, + "loss": 1.5554, + "step": 5466 + }, + { + "epoch": 0.5766877637130802, + "grad_norm": 0.6622674465179443, + "learning_rate": 0.0005808722329763974, + "loss": 1.5422, + "step": 5467 + }, + { + "epoch": 0.5767932489451477, + "grad_norm": 0.5975790619850159, + "learning_rate": 0.0005806276495561852, + "loss": 1.5416, + "step": 5468 + }, + { + "epoch": 0.5768987341772152, + "grad_norm": 0.5948583483695984, + "learning_rate": 0.0005803830851149892, + "loss": 1.5374, + "step": 5469 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.6103337407112122, + "learning_rate": 0.0005801385396802146, + "loss": 1.5274, + "step": 5470 + }, + { + "epoch": 0.5771097046413503, + "grad_norm": 0.5885037183761597, + "learning_rate": 0.0005798940132792636, + "loss": 1.5434, + "step": 5471 + }, + { + "epoch": 0.5772151898734177, + "grad_norm": 0.5842780470848083, + "learning_rate": 0.0005796495059395367, + "loss": 1.5263, + "step": 5472 + }, + { + "epoch": 0.5773206751054852, + "grad_norm": 0.6377031207084656, + "learning_rate": 0.0005794050176884321, + "loss": 1.4968, + "step": 5473 + }, + { + "epoch": 0.5774261603375528, + "grad_norm": 0.5715152621269226, + "learning_rate": 0.0005791605485533459, + "loss": 1.5007, + "step": 5474 + }, + { + "epoch": 0.5775316455696202, + "grad_norm": 0.686110258102417, + "learning_rate": 0.0005789160985616721, + "loss": 1.5245, + "step": 5475 + }, + { + "epoch": 0.5776371308016878, + "grad_norm": 0.5633751749992371, + "learning_rate": 0.0005786716677408025, + "loss": 1.5079, + "step": 5476 + }, + { + "epoch": 0.5777426160337553, + "grad_norm": 0.7018226981163025, + "learning_rate": 0.0005784272561181269, + "loss": 1.5666, + "step": 5477 + }, + { + "epoch": 0.5778481012658228, + "grad_norm": 0.5932949185371399, + "learning_rate": 0.0005781828637210325, + "loss": 1.5425, + "step": 5478 + }, + { + "epoch": 0.5779535864978903, + "grad_norm": 0.808366596698761, + "learning_rate": 0.0005779384905769053, + "loss": 1.5403, + "step": 5479 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.5985637307167053, + "learning_rate": 0.0005776941367131282, + "loss": 1.5647, + "step": 5480 + }, + { + "epoch": 0.5781645569620253, + "grad_norm": 0.7791644930839539, + "learning_rate": 0.0005774498021570824, + "loss": 1.4965, + "step": 5481 + }, + { + "epoch": 0.5782700421940928, + "grad_norm": 0.712004542350769, + "learning_rate": 0.0005772054869361465, + "loss": 1.5434, + "step": 5482 + }, + { + "epoch": 0.5783755274261604, + "grad_norm": 0.684821367263794, + "learning_rate": 0.0005769611910776975, + "loss": 1.5416, + "step": 5483 + }, + { + "epoch": 0.5784810126582278, + "grad_norm": 0.8257584571838379, + "learning_rate": 0.0005767169146091098, + "loss": 1.5365, + "step": 5484 + }, + { + "epoch": 0.5785864978902954, + "grad_norm": 0.5828466415405273, + "learning_rate": 0.0005764726575577559, + "loss": 1.5291, + "step": 5485 + }, + { + "epoch": 0.5786919831223629, + "grad_norm": 0.748038649559021, + "learning_rate": 0.0005762284199510059, + "loss": 1.5106, + "step": 5486 + }, + { + "epoch": 0.5787974683544304, + "grad_norm": 0.6611718535423279, + "learning_rate": 0.000575984201816228, + "loss": 1.5367, + "step": 5487 + }, + { + "epoch": 0.5789029535864979, + "grad_norm": 0.5831300020217896, + "learning_rate": 0.0005757400031807881, + "loss": 1.5134, + "step": 5488 + }, + { + "epoch": 0.5790084388185655, + "grad_norm": 0.7444936037063599, + "learning_rate": 0.0005754958240720498, + "loss": 1.5826, + "step": 5489 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.65775465965271, + "learning_rate": 0.0005752516645173745, + "loss": 1.5234, + "step": 5490 + }, + { + "epoch": 0.5792194092827004, + "grad_norm": 0.7384379506111145, + "learning_rate": 0.0005750075245441218, + "loss": 1.5285, + "step": 5491 + }, + { + "epoch": 0.579324894514768, + "grad_norm": 0.7752281427383423, + "learning_rate": 0.0005747634041796484, + "loss": 1.5258, + "step": 5492 + }, + { + "epoch": 0.5794303797468354, + "grad_norm": 0.6687219142913818, + "learning_rate": 0.0005745193034513092, + "loss": 1.5366, + "step": 5493 + }, + { + "epoch": 0.579535864978903, + "grad_norm": 0.7764040231704712, + "learning_rate": 0.0005742752223864573, + "loss": 1.5141, + "step": 5494 + }, + { + "epoch": 0.5796413502109705, + "grad_norm": 0.6370667815208435, + "learning_rate": 0.0005740311610124427, + "loss": 1.5502, + "step": 5495 + }, + { + "epoch": 0.579746835443038, + "grad_norm": 0.7123350501060486, + "learning_rate": 0.0005737871193566141, + "loss": 1.5017, + "step": 5496 + }, + { + "epoch": 0.5798523206751055, + "grad_norm": 0.619270920753479, + "learning_rate": 0.0005735430974463175, + "loss": 1.5281, + "step": 5497 + }, + { + "epoch": 0.5799578059071729, + "grad_norm": 0.6172968149185181, + "learning_rate": 0.0005732990953088968, + "loss": 1.52, + "step": 5498 + }, + { + "epoch": 0.5800632911392405, + "grad_norm": 0.6429376602172852, + "learning_rate": 0.0005730551129716936, + "loss": 1.5599, + "step": 5499 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.6008946299552917, + "learning_rate": 0.0005728111504620472, + "loss": 1.5027, + "step": 5500 + }, + { + "epoch": 0.5802742616033755, + "grad_norm": 0.8024063110351562, + "learning_rate": 0.000572567207807295, + "loss": 1.5055, + "step": 5501 + }, + { + "epoch": 0.580379746835443, + "grad_norm": 0.6573284268379211, + "learning_rate": 0.000572323285034772, + "loss": 1.5508, + "step": 5502 + }, + { + "epoch": 0.5804852320675106, + "grad_norm": 0.7380409836769104, + "learning_rate": 0.0005720793821718108, + "loss": 1.5496, + "step": 5503 + }, + { + "epoch": 0.580590717299578, + "grad_norm": 0.6462616920471191, + "learning_rate": 0.0005718354992457417, + "loss": 1.5119, + "step": 5504 + }, + { + "epoch": 0.5806962025316456, + "grad_norm": 0.74615079164505, + "learning_rate": 0.0005715916362838936, + "loss": 1.5121, + "step": 5505 + }, + { + "epoch": 0.5808016877637131, + "grad_norm": 0.7232001423835754, + "learning_rate": 0.0005713477933135923, + "loss": 1.5648, + "step": 5506 + }, + { + "epoch": 0.5809071729957805, + "grad_norm": 0.7337769865989685, + "learning_rate": 0.0005711039703621616, + "loss": 1.5529, + "step": 5507 + }, + { + "epoch": 0.5810126582278481, + "grad_norm": 0.7793678045272827, + "learning_rate": 0.0005708601674569232, + "loss": 1.508, + "step": 5508 + }, + { + "epoch": 0.5811181434599156, + "grad_norm": 0.8363128900527954, + "learning_rate": 0.0005706163846251961, + "loss": 1.5389, + "step": 5509 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.5881869196891785, + "learning_rate": 0.0005703726218942976, + "loss": 1.5172, + "step": 5510 + }, + { + "epoch": 0.5813291139240506, + "grad_norm": 0.7543566823005676, + "learning_rate": 0.0005701288792915427, + "loss": 1.496, + "step": 5511 + }, + { + "epoch": 0.5814345991561182, + "grad_norm": 0.6233589053153992, + "learning_rate": 0.0005698851568442434, + "loss": 1.5248, + "step": 5512 + }, + { + "epoch": 0.5815400843881856, + "grad_norm": 0.8035087585449219, + "learning_rate": 0.0005696414545797108, + "loss": 1.5371, + "step": 5513 + }, + { + "epoch": 0.5816455696202532, + "grad_norm": 0.6968535780906677, + "learning_rate": 0.0005693977725252525, + "loss": 1.5472, + "step": 5514 + }, + { + "epoch": 0.5817510548523207, + "grad_norm": 0.7424667477607727, + "learning_rate": 0.0005691541107081743, + "loss": 1.5233, + "step": 5515 + }, + { + "epoch": 0.5818565400843881, + "grad_norm": 0.6339522004127502, + "learning_rate": 0.0005689104691557798, + "loss": 1.4893, + "step": 5516 + }, + { + "epoch": 0.5819620253164557, + "grad_norm": 0.6479394435882568, + "learning_rate": 0.0005686668478953702, + "loss": 1.511, + "step": 5517 + }, + { + "epoch": 0.5820675105485232, + "grad_norm": 0.6917908787727356, + "learning_rate": 0.0005684232469542446, + "loss": 1.5476, + "step": 5518 + }, + { + "epoch": 0.5821729957805907, + "grad_norm": 0.6362911462783813, + "learning_rate": 0.0005681796663596996, + "loss": 1.5176, + "step": 5519 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.7331382632255554, + "learning_rate": 0.0005679361061390295, + "loss": 1.5626, + "step": 5520 + }, + { + "epoch": 0.5823839662447258, + "grad_norm": 0.8349905014038086, + "learning_rate": 0.0005676925663195263, + "loss": 1.5077, + "step": 5521 + }, + { + "epoch": 0.5824894514767932, + "grad_norm": 0.6114510893821716, + "learning_rate": 0.0005674490469284805, + "loss": 1.5056, + "step": 5522 + }, + { + "epoch": 0.5825949367088608, + "grad_norm": 0.7986570596694946, + "learning_rate": 0.0005672055479931791, + "loss": 1.5154, + "step": 5523 + }, + { + "epoch": 0.5827004219409283, + "grad_norm": 0.644041121006012, + "learning_rate": 0.0005669620695409076, + "loss": 1.5364, + "step": 5524 + }, + { + "epoch": 0.5828059071729957, + "grad_norm": 0.9149540066719055, + "learning_rate": 0.000566718611598949, + "loss": 1.5117, + "step": 5525 + }, + { + "epoch": 0.5829113924050633, + "grad_norm": 0.6649644374847412, + "learning_rate": 0.0005664751741945839, + "loss": 1.5215, + "step": 5526 + }, + { + "epoch": 0.5830168776371308, + "grad_norm": 0.9222648739814758, + "learning_rate": 0.0005662317573550906, + "loss": 1.516, + "step": 5527 + }, + { + "epoch": 0.5831223628691983, + "grad_norm": 0.7160688042640686, + "learning_rate": 0.0005659883611077453, + "loss": 1.5436, + "step": 5528 + }, + { + "epoch": 0.5832278481012658, + "grad_norm": 0.9904860258102417, + "learning_rate": 0.0005657449854798216, + "loss": 1.5135, + "step": 5529 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.7459455728530884, + "learning_rate": 0.0005655016304985908, + "loss": 1.5568, + "step": 5530 + }, + { + "epoch": 0.5834388185654008, + "grad_norm": 0.8950833082199097, + "learning_rate": 0.0005652582961913227, + "loss": 1.534, + "step": 5531 + }, + { + "epoch": 0.5835443037974684, + "grad_norm": 0.7478551268577576, + "learning_rate": 0.0005650149825852836, + "loss": 1.4957, + "step": 5532 + }, + { + "epoch": 0.5836497890295359, + "grad_norm": 0.7869430780410767, + "learning_rate": 0.0005647716897077382, + "loss": 1.5021, + "step": 5533 + }, + { + "epoch": 0.5837552742616033, + "grad_norm": 0.6580522656440735, + "learning_rate": 0.0005645284175859486, + "loss": 1.4958, + "step": 5534 + }, + { + "epoch": 0.5838607594936709, + "grad_norm": 0.793883740901947, + "learning_rate": 0.0005642851662471745, + "loss": 1.5146, + "step": 5535 + }, + { + "epoch": 0.5839662447257384, + "grad_norm": 0.6357601881027222, + "learning_rate": 0.0005640419357186738, + "loss": 1.5122, + "step": 5536 + }, + { + "epoch": 0.5840717299578059, + "grad_norm": 0.9298679232597351, + "learning_rate": 0.0005637987260277013, + "loss": 1.5092, + "step": 5537 + }, + { + "epoch": 0.5841772151898734, + "grad_norm": 0.7555457353591919, + "learning_rate": 0.0005635555372015099, + "loss": 1.5812, + "step": 5538 + }, + { + "epoch": 0.584282700421941, + "grad_norm": 0.7766602635383606, + "learning_rate": 0.0005633123692673503, + "loss": 1.5094, + "step": 5539 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.7608329653739929, + "learning_rate": 0.0005630692222524709, + "loss": 1.55, + "step": 5540 + }, + { + "epoch": 0.584493670886076, + "grad_norm": 0.6640138626098633, + "learning_rate": 0.0005628260961841171, + "loss": 1.5331, + "step": 5541 + }, + { + "epoch": 0.5845991561181435, + "grad_norm": 0.743325412273407, + "learning_rate": 0.0005625829910895325, + "loss": 1.5314, + "step": 5542 + }, + { + "epoch": 0.5847046413502109, + "grad_norm": 0.6136193871498108, + "learning_rate": 0.0005623399069959585, + "loss": 1.5326, + "step": 5543 + }, + { + "epoch": 0.5848101265822785, + "grad_norm": 0.6643384695053101, + "learning_rate": 0.0005620968439306335, + "loss": 1.5136, + "step": 5544 + }, + { + "epoch": 0.584915611814346, + "grad_norm": 0.5651798248291016, + "learning_rate": 0.0005618538019207943, + "loss": 1.5465, + "step": 5545 + }, + { + "epoch": 0.5850210970464135, + "grad_norm": 0.7036249041557312, + "learning_rate": 0.0005616107809936746, + "loss": 1.5172, + "step": 5546 + }, + { + "epoch": 0.585126582278481, + "grad_norm": 0.6279877424240112, + "learning_rate": 0.0005613677811765062, + "loss": 1.5121, + "step": 5547 + }, + { + "epoch": 0.5852320675105486, + "grad_norm": 0.6074103713035583, + "learning_rate": 0.0005611248024965186, + "loss": 1.553, + "step": 5548 + }, + { + "epoch": 0.585337552742616, + "grad_norm": 0.5834078192710876, + "learning_rate": 0.0005608818449809387, + "loss": 1.536, + "step": 5549 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.7027078866958618, + "learning_rate": 0.0005606389086569911, + "loss": 1.5675, + "step": 5550 + }, + { + "epoch": 0.5855485232067511, + "grad_norm": 0.5915105938911438, + "learning_rate": 0.0005603959935518981, + "loss": 1.5576, + "step": 5551 + }, + { + "epoch": 0.5856540084388185, + "grad_norm": 0.5988883376121521, + "learning_rate": 0.0005601530996928795, + "loss": 1.5832, + "step": 5552 + }, + { + "epoch": 0.5857594936708861, + "grad_norm": 0.5925279855728149, + "learning_rate": 0.0005599102271071527, + "loss": 1.5204, + "step": 5553 + }, + { + "epoch": 0.5858649789029536, + "grad_norm": 0.6395162343978882, + "learning_rate": 0.0005596673758219327, + "loss": 1.5389, + "step": 5554 + }, + { + "epoch": 0.5859704641350211, + "grad_norm": 0.6596834659576416, + "learning_rate": 0.0005594245458644325, + "loss": 1.5385, + "step": 5555 + }, + { + "epoch": 0.5860759493670886, + "grad_norm": 0.6271157264709473, + "learning_rate": 0.0005591817372618621, + "loss": 1.5172, + "step": 5556 + }, + { + "epoch": 0.5861814345991562, + "grad_norm": 0.6062186360359192, + "learning_rate": 0.0005589389500414296, + "loss": 1.5226, + "step": 5557 + }, + { + "epoch": 0.5862869198312236, + "grad_norm": 0.6585221290588379, + "learning_rate": 0.0005586961842303405, + "loss": 1.5546, + "step": 5558 + }, + { + "epoch": 0.5863924050632912, + "grad_norm": 0.6535933613777161, + "learning_rate": 0.0005584534398557977, + "loss": 1.5165, + "step": 5559 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.6395177841186523, + "learning_rate": 0.0005582107169450023, + "loss": 1.5328, + "step": 5560 + }, + { + "epoch": 0.5866033755274261, + "grad_norm": 0.6475041508674622, + "learning_rate": 0.0005579680155251524, + "loss": 1.5257, + "step": 5561 + }, + { + "epoch": 0.5867088607594937, + "grad_norm": 0.5797942876815796, + "learning_rate": 0.0005577253356234439, + "loss": 1.538, + "step": 5562 + }, + { + "epoch": 0.5868143459915611, + "grad_norm": 0.5939382314682007, + "learning_rate": 0.0005574826772670703, + "loss": 1.5073, + "step": 5563 + }, + { + "epoch": 0.5869198312236287, + "grad_norm": 0.6088054776191711, + "learning_rate": 0.0005572400404832226, + "loss": 1.5339, + "step": 5564 + }, + { + "epoch": 0.5870253164556962, + "grad_norm": 0.6767681837081909, + "learning_rate": 0.0005569974252990896, + "loss": 1.5288, + "step": 5565 + }, + { + "epoch": 0.5871308016877637, + "grad_norm": 0.5812385678291321, + "learning_rate": 0.0005567548317418576, + "loss": 1.5357, + "step": 5566 + }, + { + "epoch": 0.5872362869198312, + "grad_norm": 0.678019642829895, + "learning_rate": 0.0005565122598387103, + "loss": 1.5565, + "step": 5567 + }, + { + "epoch": 0.5873417721518988, + "grad_norm": 0.5723280906677246, + "learning_rate": 0.0005562697096168289, + "loss": 1.4967, + "step": 5568 + }, + { + "epoch": 0.5874472573839662, + "grad_norm": 0.6330854296684265, + "learning_rate": 0.0005560271811033928, + "loss": 1.498, + "step": 5569 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.6158585548400879, + "learning_rate": 0.0005557846743255783, + "loss": 1.5081, + "step": 5570 + }, + { + "epoch": 0.5876582278481013, + "grad_norm": 0.6037547588348389, + "learning_rate": 0.0005555421893105593, + "loss": 1.5266, + "step": 5571 + }, + { + "epoch": 0.5877637130801687, + "grad_norm": 0.7377457022666931, + "learning_rate": 0.0005552997260855077, + "loss": 1.531, + "step": 5572 + }, + { + "epoch": 0.5878691983122363, + "grad_norm": 0.6193912029266357, + "learning_rate": 0.0005550572846775927, + "loss": 1.5188, + "step": 5573 + }, + { + "epoch": 0.5879746835443038, + "grad_norm": 0.5879222750663757, + "learning_rate": 0.0005548148651139809, + "loss": 1.5295, + "step": 5574 + }, + { + "epoch": 0.5880801687763713, + "grad_norm": 0.6470695734024048, + "learning_rate": 0.0005545724674218368, + "loss": 1.5393, + "step": 5575 + }, + { + "epoch": 0.5881856540084388, + "grad_norm": 0.5746118426322937, + "learning_rate": 0.0005543300916283223, + "loss": 1.5094, + "step": 5576 + }, + { + "epoch": 0.5882911392405064, + "grad_norm": 0.5816444754600525, + "learning_rate": 0.0005540877377605968, + "loss": 1.5292, + "step": 5577 + }, + { + "epoch": 0.5883966244725738, + "grad_norm": 0.5609889030456543, + "learning_rate": 0.0005538454058458171, + "loss": 1.5098, + "step": 5578 + }, + { + "epoch": 0.5885021097046413, + "grad_norm": 0.6149580478668213, + "learning_rate": 0.0005536030959111377, + "loss": 1.5362, + "step": 5579 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.5817951560020447, + "learning_rate": 0.0005533608079837109, + "loss": 1.4827, + "step": 5580 + }, + { + "epoch": 0.5887130801687763, + "grad_norm": 0.5821130871772766, + "learning_rate": 0.0005531185420906859, + "loss": 1.518, + "step": 5581 + }, + { + "epoch": 0.5888185654008439, + "grad_norm": 0.6113356351852417, + "learning_rate": 0.0005528762982592101, + "loss": 1.5321, + "step": 5582 + }, + { + "epoch": 0.5889240506329114, + "grad_norm": 0.619861364364624, + "learning_rate": 0.000552634076516428, + "loss": 1.4988, + "step": 5583 + }, + { + "epoch": 0.5890295358649789, + "grad_norm": 0.6327460408210754, + "learning_rate": 0.0005523918768894819, + "loss": 1.5214, + "step": 5584 + }, + { + "epoch": 0.5891350210970464, + "grad_norm": 0.6238740086555481, + "learning_rate": 0.0005521496994055112, + "loss": 1.5501, + "step": 5585 + }, + { + "epoch": 0.589240506329114, + "grad_norm": 0.6172578930854797, + "learning_rate": 0.0005519075440916534, + "loss": 1.5221, + "step": 5586 + }, + { + "epoch": 0.5893459915611814, + "grad_norm": 0.5914363265037537, + "learning_rate": 0.000551665410975043, + "loss": 1.5207, + "step": 5587 + }, + { + "epoch": 0.5894514767932489, + "grad_norm": 0.6398879289627075, + "learning_rate": 0.0005514233000828121, + "loss": 1.5627, + "step": 5588 + }, + { + "epoch": 0.5895569620253165, + "grad_norm": 0.6185176372528076, + "learning_rate": 0.0005511812114420908, + "loss": 1.5146, + "step": 5589 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.6252875924110413, + "learning_rate": 0.0005509391450800061, + "loss": 1.5138, + "step": 5590 + }, + { + "epoch": 0.5897679324894515, + "grad_norm": 0.6178934574127197, + "learning_rate": 0.0005506971010236829, + "loss": 1.5008, + "step": 5591 + }, + { + "epoch": 0.589873417721519, + "grad_norm": 0.6530083417892456, + "learning_rate": 0.0005504550793002433, + "loss": 1.5481, + "step": 5592 + }, + { + "epoch": 0.5899789029535865, + "grad_norm": 0.6247140765190125, + "learning_rate": 0.000550213079936807, + "loss": 1.5154, + "step": 5593 + }, + { + "epoch": 0.590084388185654, + "grad_norm": 0.7335731387138367, + "learning_rate": 0.0005499711029604915, + "loss": 1.5159, + "step": 5594 + }, + { + "epoch": 0.5901898734177216, + "grad_norm": 0.6125921010971069, + "learning_rate": 0.0005497291483984113, + "loss": 1.5199, + "step": 5595 + }, + { + "epoch": 0.590295358649789, + "grad_norm": 0.7776744365692139, + "learning_rate": 0.0005494872162776786, + "loss": 1.501, + "step": 5596 + }, + { + "epoch": 0.5904008438818565, + "grad_norm": 0.6966707110404968, + "learning_rate": 0.0005492453066254032, + "loss": 1.516, + "step": 5597 + }, + { + "epoch": 0.5905063291139241, + "grad_norm": 0.6369206309318542, + "learning_rate": 0.000549003419468692, + "loss": 1.5286, + "step": 5598 + }, + { + "epoch": 0.5906118143459915, + "grad_norm": 0.60280442237854, + "learning_rate": 0.0005487615548346502, + "loss": 1.5346, + "step": 5599 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.6235539317131042, + "learning_rate": 0.0005485197127503795, + "loss": 1.5439, + "step": 5600 + }, + { + "epoch": 0.5908227848101266, + "grad_norm": 0.6456446647644043, + "learning_rate": 0.0005482778932429798, + "loss": 1.5395, + "step": 5601 + }, + { + "epoch": 0.5909282700421941, + "grad_norm": 0.6519951820373535, + "learning_rate": 0.000548036096339548, + "loss": 1.5482, + "step": 5602 + }, + { + "epoch": 0.5910337552742616, + "grad_norm": 0.6538910865783691, + "learning_rate": 0.0005477943220671786, + "loss": 1.5396, + "step": 5603 + }, + { + "epoch": 0.5911392405063292, + "grad_norm": 0.6783102750778198, + "learning_rate": 0.0005475525704529638, + "loss": 1.5123, + "step": 5604 + }, + { + "epoch": 0.5912447257383966, + "grad_norm": 0.6964632272720337, + "learning_rate": 0.0005473108415239929, + "loss": 1.5015, + "step": 5605 + }, + { + "epoch": 0.5913502109704641, + "grad_norm": 0.6691979765892029, + "learning_rate": 0.0005470691353073531, + "loss": 1.5232, + "step": 5606 + }, + { + "epoch": 0.5914556962025317, + "grad_norm": 0.6212390065193176, + "learning_rate": 0.0005468274518301284, + "loss": 1.5208, + "step": 5607 + }, + { + "epoch": 0.5915611814345991, + "grad_norm": 0.7174804210662842, + "learning_rate": 0.0005465857911194006, + "loss": 1.526, + "step": 5608 + }, + { + "epoch": 0.5916666666666667, + "grad_norm": 0.6452056765556335, + "learning_rate": 0.0005463441532022495, + "loss": 1.5096, + "step": 5609 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.6412744522094727, + "learning_rate": 0.0005461025381057516, + "loss": 1.5296, + "step": 5610 + }, + { + "epoch": 0.5918776371308017, + "grad_norm": 0.6891760230064392, + "learning_rate": 0.000545860945856981, + "loss": 1.522, + "step": 5611 + }, + { + "epoch": 0.5919831223628692, + "grad_norm": 0.8192556500434875, + "learning_rate": 0.0005456193764830093, + "loss": 1.5417, + "step": 5612 + }, + { + "epoch": 0.5920886075949368, + "grad_norm": 0.6841350197792053, + "learning_rate": 0.0005453778300109056, + "loss": 1.5216, + "step": 5613 + }, + { + "epoch": 0.5921940928270042, + "grad_norm": 0.802484393119812, + "learning_rate": 0.0005451363064677365, + "loss": 1.533, + "step": 5614 + }, + { + "epoch": 0.5922995780590717, + "grad_norm": 0.6579524874687195, + "learning_rate": 0.0005448948058805657, + "loss": 1.5484, + "step": 5615 + }, + { + "epoch": 0.5924050632911393, + "grad_norm": 0.9257228970527649, + "learning_rate": 0.0005446533282764543, + "loss": 1.5337, + "step": 5616 + }, + { + "epoch": 0.5925105485232067, + "grad_norm": 0.7513065934181213, + "learning_rate": 0.0005444118736824617, + "loss": 1.5135, + "step": 5617 + }, + { + "epoch": 0.5926160337552743, + "grad_norm": 0.6693794131278992, + "learning_rate": 0.000544170442125644, + "loss": 1.5338, + "step": 5618 + }, + { + "epoch": 0.5927215189873418, + "grad_norm": 0.6723673343658447, + "learning_rate": 0.0005439290336330545, + "loss": 1.5286, + "step": 5619 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.8545677065849304, + "learning_rate": 0.0005436876482317444, + "loss": 1.5454, + "step": 5620 + }, + { + "epoch": 0.5929324894514768, + "grad_norm": 0.6214762926101685, + "learning_rate": 0.000543446285948762, + "loss": 1.5375, + "step": 5621 + }, + { + "epoch": 0.5930379746835444, + "grad_norm": 0.8560632467269897, + "learning_rate": 0.0005432049468111534, + "loss": 1.5489, + "step": 5622 + }, + { + "epoch": 0.5931434599156118, + "grad_norm": 0.6005576252937317, + "learning_rate": 0.0005429636308459614, + "loss": 1.4803, + "step": 5623 + }, + { + "epoch": 0.5932489451476793, + "grad_norm": 0.7009727954864502, + "learning_rate": 0.0005427223380802272, + "loss": 1.5425, + "step": 5624 + }, + { + "epoch": 0.5933544303797469, + "grad_norm": 0.7628197073936462, + "learning_rate": 0.0005424810685409881, + "loss": 1.5508, + "step": 5625 + }, + { + "epoch": 0.5934599156118143, + "grad_norm": 0.7554028034210205, + "learning_rate": 0.0005422398222552806, + "loss": 1.5279, + "step": 5626 + }, + { + "epoch": 0.5935654008438819, + "grad_norm": 0.636143147945404, + "learning_rate": 0.0005419985992501367, + "loss": 1.5482, + "step": 5627 + }, + { + "epoch": 0.5936708860759494, + "grad_norm": 0.7510636448860168, + "learning_rate": 0.0005417573995525871, + "loss": 1.4914, + "step": 5628 + }, + { + "epoch": 0.5937763713080169, + "grad_norm": 0.5914394855499268, + "learning_rate": 0.0005415162231896593, + "loss": 1.5256, + "step": 5629 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.5974616408348083, + "learning_rate": 0.0005412750701883782, + "loss": 1.5066, + "step": 5630 + }, + { + "epoch": 0.5939873417721518, + "grad_norm": 0.5736525654792786, + "learning_rate": 0.0005410339405757665, + "loss": 1.5385, + "step": 5631 + }, + { + "epoch": 0.5940928270042194, + "grad_norm": 0.6140990257263184, + "learning_rate": 0.0005407928343788435, + "loss": 1.5354, + "step": 5632 + }, + { + "epoch": 0.5941983122362869, + "grad_norm": 0.6351944804191589, + "learning_rate": 0.0005405517516246267, + "loss": 1.5144, + "step": 5633 + }, + { + "epoch": 0.5943037974683544, + "grad_norm": 0.6003175377845764, + "learning_rate": 0.0005403106923401302, + "loss": 1.5376, + "step": 5634 + }, + { + "epoch": 0.5944092827004219, + "grad_norm": 0.6383232474327087, + "learning_rate": 0.0005400696565523666, + "loss": 1.5207, + "step": 5635 + }, + { + "epoch": 0.5945147679324895, + "grad_norm": 0.657107949256897, + "learning_rate": 0.0005398286442883448, + "loss": 1.5279, + "step": 5636 + }, + { + "epoch": 0.5946202531645569, + "grad_norm": 0.6548314094543457, + "learning_rate": 0.0005395876555750712, + "loss": 1.5245, + "step": 5637 + }, + { + "epoch": 0.5947257383966245, + "grad_norm": 0.6175330877304077, + "learning_rate": 0.0005393466904395503, + "loss": 1.5758, + "step": 5638 + }, + { + "epoch": 0.594831223628692, + "grad_norm": 0.7193893790245056, + "learning_rate": 0.000539105748908783, + "loss": 1.5352, + "step": 5639 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.6560258865356445, + "learning_rate": 0.0005388648310097682, + "loss": 1.5502, + "step": 5640 + }, + { + "epoch": 0.595042194092827, + "grad_norm": 0.8298618197441101, + "learning_rate": 0.0005386239367695018, + "loss": 1.5483, + "step": 5641 + }, + { + "epoch": 0.5951476793248945, + "grad_norm": 0.7194907665252686, + "learning_rate": 0.0005383830662149771, + "loss": 1.5301, + "step": 5642 + }, + { + "epoch": 0.595253164556962, + "grad_norm": 0.744318425655365, + "learning_rate": 0.0005381422193731853, + "loss": 1.5305, + "step": 5643 + }, + { + "epoch": 0.5953586497890295, + "grad_norm": 0.749455988407135, + "learning_rate": 0.0005379013962711143, + "loss": 1.5346, + "step": 5644 + }, + { + "epoch": 0.5954641350210971, + "grad_norm": 0.6090835928916931, + "learning_rate": 0.0005376605969357494, + "loss": 1.5422, + "step": 5645 + }, + { + "epoch": 0.5955696202531645, + "grad_norm": 0.7327213287353516, + "learning_rate": 0.0005374198213940734, + "loss": 1.5135, + "step": 5646 + }, + { + "epoch": 0.5956751054852321, + "grad_norm": 0.6699420213699341, + "learning_rate": 0.0005371790696730665, + "loss": 1.5309, + "step": 5647 + }, + { + "epoch": 0.5957805907172996, + "grad_norm": 0.7009025812149048, + "learning_rate": 0.000536938341799706, + "loss": 1.5295, + "step": 5648 + }, + { + "epoch": 0.595886075949367, + "grad_norm": 0.6451443433761597, + "learning_rate": 0.0005366976378009668, + "loss": 1.5306, + "step": 5649 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.7092702984809875, + "learning_rate": 0.000536456957703821, + "loss": 1.5183, + "step": 5650 + }, + { + "epoch": 0.5960970464135021, + "grad_norm": 0.6167377233505249, + "learning_rate": 0.0005362163015352374, + "loss": 1.5257, + "step": 5651 + }, + { + "epoch": 0.5962025316455696, + "grad_norm": 0.7695239782333374, + "learning_rate": 0.0005359756693221836, + "loss": 1.5098, + "step": 5652 + }, + { + "epoch": 0.5963080168776371, + "grad_norm": 0.6144744157791138, + "learning_rate": 0.0005357350610916233, + "loss": 1.4811, + "step": 5653 + }, + { + "epoch": 0.5964135021097047, + "grad_norm": 0.6087828874588013, + "learning_rate": 0.0005354944768705179, + "loss": 1.5129, + "step": 5654 + }, + { + "epoch": 0.5965189873417721, + "grad_norm": 0.6628642082214355, + "learning_rate": 0.0005352539166858258, + "loss": 1.536, + "step": 5655 + }, + { + "epoch": 0.5966244725738397, + "grad_norm": 0.589148998260498, + "learning_rate": 0.0005350133805645034, + "loss": 1.5434, + "step": 5656 + }, + { + "epoch": 0.5967299578059072, + "grad_norm": 0.6596501469612122, + "learning_rate": 0.0005347728685335036, + "loss": 1.5471, + "step": 5657 + }, + { + "epoch": 0.5968354430379746, + "grad_norm": 0.6165368556976318, + "learning_rate": 0.0005345323806197771, + "loss": 1.5637, + "step": 5658 + }, + { + "epoch": 0.5969409282700422, + "grad_norm": 0.6009559631347656, + "learning_rate": 0.0005342919168502717, + "loss": 1.5119, + "step": 5659 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.6306089758872986, + "learning_rate": 0.0005340514772519324, + "loss": 1.5141, + "step": 5660 + }, + { + "epoch": 0.5971518987341772, + "grad_norm": 0.5763207077980042, + "learning_rate": 0.0005338110618517022, + "loss": 1.5571, + "step": 5661 + }, + { + "epoch": 0.5972573839662447, + "grad_norm": 0.6086251735687256, + "learning_rate": 0.0005335706706765205, + "loss": 1.4908, + "step": 5662 + }, + { + "epoch": 0.5973628691983123, + "grad_norm": 0.6067284941673279, + "learning_rate": 0.0005333303037533244, + "loss": 1.5516, + "step": 5663 + }, + { + "epoch": 0.5974683544303797, + "grad_norm": 0.6302417516708374, + "learning_rate": 0.0005330899611090482, + "loss": 1.5327, + "step": 5664 + }, + { + "epoch": 0.5975738396624473, + "grad_norm": 0.6071452498435974, + "learning_rate": 0.0005328496427706235, + "loss": 1.5244, + "step": 5665 + }, + { + "epoch": 0.5976793248945148, + "grad_norm": 0.683278501033783, + "learning_rate": 0.000532609348764979, + "loss": 1.4888, + "step": 5666 + }, + { + "epoch": 0.5977848101265822, + "grad_norm": 0.6325699687004089, + "learning_rate": 0.0005323690791190412, + "loss": 1.5523, + "step": 5667 + }, + { + "epoch": 0.5978902953586498, + "grad_norm": 0.6767447590827942, + "learning_rate": 0.0005321288338597327, + "loss": 1.5589, + "step": 5668 + }, + { + "epoch": 0.5979957805907173, + "grad_norm": 0.5997103452682495, + "learning_rate": 0.0005318886130139753, + "loss": 1.5285, + "step": 5669 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6558462381362915, + "learning_rate": 0.0005316484166086863, + "loss": 1.4938, + "step": 5670 + }, + { + "epoch": 0.5982067510548523, + "grad_norm": 0.6568024754524231, + "learning_rate": 0.0005314082446707811, + "loss": 1.536, + "step": 5671 + }, + { + "epoch": 0.5983122362869199, + "grad_norm": 0.6153862476348877, + "learning_rate": 0.000531168097227172, + "loss": 1.5191, + "step": 5672 + }, + { + "epoch": 0.5984177215189873, + "grad_norm": 0.6268157362937927, + "learning_rate": 0.0005309279743047687, + "loss": 1.5605, + "step": 5673 + }, + { + "epoch": 0.5985232067510549, + "grad_norm": 0.6334685683250427, + "learning_rate": 0.0005306878759304785, + "loss": 1.5463, + "step": 5674 + }, + { + "epoch": 0.5986286919831224, + "grad_norm": 0.668357789516449, + "learning_rate": 0.0005304478021312053, + "loss": 1.5095, + "step": 5675 + }, + { + "epoch": 0.5987341772151898, + "grad_norm": 0.6933290958404541, + "learning_rate": 0.0005302077529338507, + "loss": 1.5442, + "step": 5676 + }, + { + "epoch": 0.5988396624472574, + "grad_norm": 0.6197472214698792, + "learning_rate": 0.0005299677283653128, + "loss": 1.496, + "step": 5677 + }, + { + "epoch": 0.5989451476793249, + "grad_norm": 0.7273728847503662, + "learning_rate": 0.0005297277284524888, + "loss": 1.5579, + "step": 5678 + }, + { + "epoch": 0.5990506329113924, + "grad_norm": 0.6282045245170593, + "learning_rate": 0.0005294877532222709, + "loss": 1.4922, + "step": 5679 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.58182293176651, + "learning_rate": 0.00052924780270155, + "loss": 1.5403, + "step": 5680 + }, + { + "epoch": 0.5992616033755275, + "grad_norm": 0.6221342086791992, + "learning_rate": 0.0005290078769172135, + "loss": 1.5315, + "step": 5681 + }, + { + "epoch": 0.5993670886075949, + "grad_norm": 0.6675575971603394, + "learning_rate": 0.0005287679758961465, + "loss": 1.5158, + "step": 5682 + }, + { + "epoch": 0.5994725738396625, + "grad_norm": 0.6291682720184326, + "learning_rate": 0.0005285280996652308, + "loss": 1.5099, + "step": 5683 + }, + { + "epoch": 0.59957805907173, + "grad_norm": 0.5919210314750671, + "learning_rate": 0.0005282882482513459, + "loss": 1.4927, + "step": 5684 + }, + { + "epoch": 0.5996835443037974, + "grad_norm": 0.6022294759750366, + "learning_rate": 0.0005280484216813686, + "loss": 1.5075, + "step": 5685 + }, + { + "epoch": 0.599789029535865, + "grad_norm": 0.5729749798774719, + "learning_rate": 0.0005278086199821718, + "loss": 1.5272, + "step": 5686 + }, + { + "epoch": 0.5998945147679325, + "grad_norm": 0.7186183929443359, + "learning_rate": 0.0005275688431806274, + "loss": 1.5096, + "step": 5687 + }, + { + "epoch": 0.6, + "grad_norm": 0.6384018063545227, + "learning_rate": 0.0005273290913036033, + "loss": 1.5092, + "step": 5688 + }, + { + "epoch": 0.6001054852320675, + "grad_norm": 0.7087815999984741, + "learning_rate": 0.0005270893643779649, + "loss": 1.496, + "step": 5689 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.6540051698684692, + "learning_rate": 0.0005268496624305747, + "loss": 1.5082, + "step": 5690 + }, + { + "epoch": 0.6003164556962025, + "grad_norm": 0.6116436719894409, + "learning_rate": 0.0005266099854882927, + "loss": 1.5434, + "step": 5691 + }, + { + "epoch": 0.6004219409282701, + "grad_norm": 0.6784401535987854, + "learning_rate": 0.0005263703335779755, + "loss": 1.4787, + "step": 5692 + }, + { + "epoch": 0.6005274261603376, + "grad_norm": 0.7543290853500366, + "learning_rate": 0.0005261307067264778, + "loss": 1.5336, + "step": 5693 + }, + { + "epoch": 0.600632911392405, + "grad_norm": 0.6564244627952576, + "learning_rate": 0.0005258911049606503, + "loss": 1.5007, + "step": 5694 + }, + { + "epoch": 0.6007383966244726, + "grad_norm": 0.7038724422454834, + "learning_rate": 0.0005256515283073422, + "loss": 1.4835, + "step": 5695 + }, + { + "epoch": 0.60084388185654, + "grad_norm": 0.6221490502357483, + "learning_rate": 0.0005254119767933992, + "loss": 1.5317, + "step": 5696 + }, + { + "epoch": 0.6009493670886076, + "grad_norm": 0.7327918410301208, + "learning_rate": 0.0005251724504456641, + "loss": 1.5338, + "step": 5697 + }, + { + "epoch": 0.6010548523206751, + "grad_norm": 0.6037619113922119, + "learning_rate": 0.000524932949290977, + "loss": 1.52, + "step": 5698 + }, + { + "epoch": 0.6011603375527426, + "grad_norm": 0.6341540813446045, + "learning_rate": 0.0005246934733561751, + "loss": 1.5008, + "step": 5699 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.7148570418357849, + "learning_rate": 0.0005244540226680931, + "loss": 1.5411, + "step": 5700 + }, + { + "epoch": 0.6013713080168777, + "grad_norm": 0.5768029093742371, + "learning_rate": 0.0005242145972535625, + "loss": 1.5005, + "step": 5701 + }, + { + "epoch": 0.6014767932489451, + "grad_norm": 0.6370649933815002, + "learning_rate": 0.0005239751971394122, + "loss": 1.5279, + "step": 5702 + }, + { + "epoch": 0.6015822784810126, + "grad_norm": 0.6828508973121643, + "learning_rate": 0.0005237358223524678, + "loss": 1.5461, + "step": 5703 + }, + { + "epoch": 0.6016877637130802, + "grad_norm": 0.6273444294929504, + "learning_rate": 0.000523496472919553, + "loss": 1.5069, + "step": 5704 + }, + { + "epoch": 0.6017932489451476, + "grad_norm": 0.6040377020835876, + "learning_rate": 0.000523257148867488, + "loss": 1.5303, + "step": 5705 + }, + { + "epoch": 0.6018987341772152, + "grad_norm": 0.6423371434211731, + "learning_rate": 0.00052301785022309, + "loss": 1.498, + "step": 5706 + }, + { + "epoch": 0.6020042194092827, + "grad_norm": 0.5940905809402466, + "learning_rate": 0.0005227785770131737, + "loss": 1.493, + "step": 5707 + }, + { + "epoch": 0.6021097046413502, + "grad_norm": 0.6797044277191162, + "learning_rate": 0.0005225393292645509, + "loss": 1.5112, + "step": 5708 + }, + { + "epoch": 0.6022151898734177, + "grad_norm": 0.6749451160430908, + "learning_rate": 0.0005223001070040305, + "loss": 1.5175, + "step": 5709 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.6217060089111328, + "learning_rate": 0.0005220609102584185, + "loss": 1.5629, + "step": 5710 + }, + { + "epoch": 0.6024261603375527, + "grad_norm": 0.6818180084228516, + "learning_rate": 0.0005218217390545181, + "loss": 1.545, + "step": 5711 + }, + { + "epoch": 0.6025316455696202, + "grad_norm": 0.5673157572746277, + "learning_rate": 0.0005215825934191293, + "loss": 1.5158, + "step": 5712 + }, + { + "epoch": 0.6026371308016878, + "grad_norm": 0.6473332643508911, + "learning_rate": 0.0005213434733790503, + "loss": 1.52, + "step": 5713 + }, + { + "epoch": 0.6027426160337552, + "grad_norm": 0.611903190612793, + "learning_rate": 0.0005211043789610752, + "loss": 1.5068, + "step": 5714 + }, + { + "epoch": 0.6028481012658228, + "grad_norm": 0.6574624180793762, + "learning_rate": 0.0005208653101919959, + "loss": 1.5099, + "step": 5715 + }, + { + "epoch": 0.6029535864978903, + "grad_norm": 0.6405986547470093, + "learning_rate": 0.0005206262670986012, + "loss": 1.5516, + "step": 5716 + }, + { + "epoch": 0.6030590717299578, + "grad_norm": 0.6524390578269958, + "learning_rate": 0.0005203872497076768, + "loss": 1.5561, + "step": 5717 + }, + { + "epoch": 0.6031645569620253, + "grad_norm": 0.6340362429618835, + "learning_rate": 0.0005201482580460063, + "loss": 1.5227, + "step": 5718 + }, + { + "epoch": 0.6032700421940929, + "grad_norm": 0.6752137541770935, + "learning_rate": 0.0005199092921403696, + "loss": 1.5158, + "step": 5719 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.6097899079322815, + "learning_rate": 0.0005196703520175437, + "loss": 1.5189, + "step": 5720 + }, + { + "epoch": 0.6034810126582278, + "grad_norm": 0.62306809425354, + "learning_rate": 0.0005194314377043037, + "loss": 1.5188, + "step": 5721 + }, + { + "epoch": 0.6035864978902954, + "grad_norm": 0.5487856268882751, + "learning_rate": 0.0005191925492274205, + "loss": 1.5233, + "step": 5722 + }, + { + "epoch": 0.6036919831223628, + "grad_norm": 0.6785763502120972, + "learning_rate": 0.0005189536866136634, + "loss": 1.5102, + "step": 5723 + }, + { + "epoch": 0.6037974683544304, + "grad_norm": 0.5811816453933716, + "learning_rate": 0.0005187148498897977, + "loss": 1.54, + "step": 5724 + }, + { + "epoch": 0.6039029535864979, + "grad_norm": 0.7226608395576477, + "learning_rate": 0.0005184760390825865, + "loss": 1.5085, + "step": 5725 + }, + { + "epoch": 0.6040084388185654, + "grad_norm": 0.6385953426361084, + "learning_rate": 0.0005182372542187895, + "loss": 1.5309, + "step": 5726 + }, + { + "epoch": 0.6041139240506329, + "grad_norm": 0.7971310019493103, + "learning_rate": 0.0005179984953251639, + "loss": 1.5069, + "step": 5727 + }, + { + "epoch": 0.6042194092827005, + "grad_norm": 0.7388022541999817, + "learning_rate": 0.0005177597624284637, + "loss": 1.5163, + "step": 5728 + }, + { + "epoch": 0.6043248945147679, + "grad_norm": 0.651263952255249, + "learning_rate": 0.00051752105555544, + "loss": 1.526, + "step": 5729 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.635439395904541, + "learning_rate": 0.0005172823747328415, + "loss": 1.5298, + "step": 5730 + }, + { + "epoch": 0.604535864978903, + "grad_norm": 0.7354445457458496, + "learning_rate": 0.0005170437199874132, + "loss": 1.522, + "step": 5731 + }, + { + "epoch": 0.6046413502109704, + "grad_norm": 0.640394389629364, + "learning_rate": 0.0005168050913458977, + "loss": 1.5139, + "step": 5732 + }, + { + "epoch": 0.604746835443038, + "grad_norm": 0.7178713083267212, + "learning_rate": 0.0005165664888350347, + "loss": 1.5268, + "step": 5733 + }, + { + "epoch": 0.6048523206751055, + "grad_norm": 0.7051268219947815, + "learning_rate": 0.0005163279124815605, + "loss": 1.4941, + "step": 5734 + }, + { + "epoch": 0.604957805907173, + "grad_norm": 0.7519822120666504, + "learning_rate": 0.000516089362312209, + "loss": 1.5116, + "step": 5735 + }, + { + "epoch": 0.6050632911392405, + "grad_norm": 0.6748772263526917, + "learning_rate": 0.0005158508383537109, + "loss": 1.5432, + "step": 5736 + }, + { + "epoch": 0.6051687763713081, + "grad_norm": 0.7248014807701111, + "learning_rate": 0.0005156123406327938, + "loss": 1.521, + "step": 5737 + }, + { + "epoch": 0.6052742616033755, + "grad_norm": 0.643943190574646, + "learning_rate": 0.0005153738691761826, + "loss": 1.5179, + "step": 5738 + }, + { + "epoch": 0.605379746835443, + "grad_norm": 0.6018977761268616, + "learning_rate": 0.0005151354240105994, + "loss": 1.5681, + "step": 5739 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.6555964946746826, + "learning_rate": 0.0005148970051627632, + "loss": 1.4979, + "step": 5740 + }, + { + "epoch": 0.605590717299578, + "grad_norm": 0.5720573663711548, + "learning_rate": 0.0005146586126593898, + "loss": 1.4929, + "step": 5741 + }, + { + "epoch": 0.6056962025316456, + "grad_norm": 0.6995789408683777, + "learning_rate": 0.0005144202465271922, + "loss": 1.5262, + "step": 5742 + }, + { + "epoch": 0.6058016877637131, + "grad_norm": 0.5876768231391907, + "learning_rate": 0.000514181906792881, + "loss": 1.4766, + "step": 5743 + }, + { + "epoch": 0.6059071729957806, + "grad_norm": 0.6268230080604553, + "learning_rate": 0.0005139435934831628, + "loss": 1.4831, + "step": 5744 + }, + { + "epoch": 0.6060126582278481, + "grad_norm": 0.6342013478279114, + "learning_rate": 0.0005137053066247421, + "loss": 1.4732, + "step": 5745 + }, + { + "epoch": 0.6061181434599157, + "grad_norm": 0.5875123143196106, + "learning_rate": 0.00051346704624432, + "loss": 1.5105, + "step": 5746 + }, + { + "epoch": 0.6062236286919831, + "grad_norm": 0.7133899927139282, + "learning_rate": 0.000513228812368595, + "loss": 1.4889, + "step": 5747 + }, + { + "epoch": 0.6063291139240506, + "grad_norm": 0.6599529981613159, + "learning_rate": 0.0005129906050242622, + "loss": 1.5056, + "step": 5748 + }, + { + "epoch": 0.6064345991561182, + "grad_norm": 0.6482488512992859, + "learning_rate": 0.0005127524242380139, + "loss": 1.5152, + "step": 5749 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.7073788046836853, + "learning_rate": 0.0005125142700365394, + "loss": 1.4573, + "step": 5750 + }, + { + "epoch": 0.6066455696202532, + "grad_norm": 0.6270211935043335, + "learning_rate": 0.0005122761424465254, + "loss": 1.4737, + "step": 5751 + }, + { + "epoch": 0.6067510548523207, + "grad_norm": 0.5715824961662292, + "learning_rate": 0.0005120380414946546, + "loss": 1.4816, + "step": 5752 + }, + { + "epoch": 0.6068565400843882, + "grad_norm": 0.7118156552314758, + "learning_rate": 0.0005117999672076081, + "loss": 1.5282, + "step": 5753 + }, + { + "epoch": 0.6069620253164557, + "grad_norm": 0.6491865515708923, + "learning_rate": 0.0005115619196120632, + "loss": 1.4945, + "step": 5754 + }, + { + "epoch": 0.6070675105485233, + "grad_norm": 0.6198714971542358, + "learning_rate": 0.0005113238987346939, + "loss": 1.5267, + "step": 5755 + }, + { + "epoch": 0.6071729957805907, + "grad_norm": 0.6626446843147278, + "learning_rate": 0.000511085904602172, + "loss": 1.4935, + "step": 5756 + }, + { + "epoch": 0.6072784810126582, + "grad_norm": 0.8261712789535522, + "learning_rate": 0.0005108479372411658, + "loss": 1.4687, + "step": 5757 + }, + { + "epoch": 0.6073839662447258, + "grad_norm": 0.6279938817024231, + "learning_rate": 0.0005106099966783409, + "loss": 1.5355, + "step": 5758 + }, + { + "epoch": 0.6074894514767932, + "grad_norm": 0.7402003407478333, + "learning_rate": 0.0005103720829403594, + "loss": 1.5178, + "step": 5759 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.6011863350868225, + "learning_rate": 0.000510134196053881, + "loss": 1.5091, + "step": 5760 + }, + { + "epoch": 0.6077004219409282, + "grad_norm": 0.7013617157936096, + "learning_rate": 0.000509896336045562, + "loss": 1.4732, + "step": 5761 + }, + { + "epoch": 0.6078059071729958, + "grad_norm": 0.6156701445579529, + "learning_rate": 0.0005096585029420556, + "loss": 1.522, + "step": 5762 + }, + { + "epoch": 0.6079113924050633, + "grad_norm": 0.7128559350967407, + "learning_rate": 0.0005094206967700127, + "loss": 1.5089, + "step": 5763 + }, + { + "epoch": 0.6080168776371307, + "grad_norm": 0.6470460295677185, + "learning_rate": 0.0005091829175560801, + "loss": 1.4991, + "step": 5764 + }, + { + "epoch": 0.6081223628691983, + "grad_norm": 0.6748074293136597, + "learning_rate": 0.0005089451653269026, + "loss": 1.4986, + "step": 5765 + }, + { + "epoch": 0.6082278481012658, + "grad_norm": 0.6779104471206665, + "learning_rate": 0.0005087074401091212, + "loss": 1.5342, + "step": 5766 + }, + { + "epoch": 0.6083333333333333, + "grad_norm": 0.6554071307182312, + "learning_rate": 0.0005084697419293746, + "loss": 1.5145, + "step": 5767 + }, + { + "epoch": 0.6084388185654008, + "grad_norm": 0.725023090839386, + "learning_rate": 0.0005082320708142975, + "loss": 1.4891, + "step": 5768 + }, + { + "epoch": 0.6085443037974684, + "grad_norm": 0.6225118041038513, + "learning_rate": 0.0005079944267905226, + "loss": 1.5007, + "step": 5769 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.8017696738243103, + "learning_rate": 0.0005077568098846789, + "loss": 1.5283, + "step": 5770 + }, + { + "epoch": 0.6087552742616034, + "grad_norm": 0.7419580221176147, + "learning_rate": 0.0005075192201233924, + "loss": 1.4955, + "step": 5771 + }, + { + "epoch": 0.6088607594936709, + "grad_norm": 0.5999542474746704, + "learning_rate": 0.0005072816575332864, + "loss": 1.5177, + "step": 5772 + }, + { + "epoch": 0.6089662447257383, + "grad_norm": 0.6411669850349426, + "learning_rate": 0.0005070441221409811, + "loss": 1.5214, + "step": 5773 + }, + { + "epoch": 0.6090717299578059, + "grad_norm": 0.6188130974769592, + "learning_rate": 0.0005068066139730936, + "loss": 1.5058, + "step": 5774 + }, + { + "epoch": 0.6091772151898734, + "grad_norm": 0.5890025496482849, + "learning_rate": 0.0005065691330562375, + "loss": 1.4917, + "step": 5775 + }, + { + "epoch": 0.6092827004219409, + "grad_norm": 0.6181408762931824, + "learning_rate": 0.0005063316794170239, + "loss": 1.5389, + "step": 5776 + }, + { + "epoch": 0.6093881856540084, + "grad_norm": 0.5796529650688171, + "learning_rate": 0.0005060942530820607, + "loss": 1.5084, + "step": 5777 + }, + { + "epoch": 0.609493670886076, + "grad_norm": 0.614410936832428, + "learning_rate": 0.0005058568540779526, + "loss": 1.535, + "step": 5778 + }, + { + "epoch": 0.6095991561181434, + "grad_norm": 0.7101842164993286, + "learning_rate": 0.0005056194824313015, + "loss": 1.5125, + "step": 5779 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.592926561832428, + "learning_rate": 0.000505382138168706, + "loss": 1.5263, + "step": 5780 + }, + { + "epoch": 0.6098101265822785, + "grad_norm": 0.6617709994316101, + "learning_rate": 0.0005051448213167614, + "loss": 1.5078, + "step": 5781 + }, + { + "epoch": 0.609915611814346, + "grad_norm": 0.6232732534408569, + "learning_rate": 0.0005049075319020608, + "loss": 1.5142, + "step": 5782 + }, + { + "epoch": 0.6100210970464135, + "grad_norm": 0.7789562344551086, + "learning_rate": 0.0005046702699511933, + "loss": 1.5459, + "step": 5783 + }, + { + "epoch": 0.610126582278481, + "grad_norm": 0.6222392916679382, + "learning_rate": 0.0005044330354907454, + "loss": 1.547, + "step": 5784 + }, + { + "epoch": 0.6102320675105485, + "grad_norm": 0.6528591513633728, + "learning_rate": 0.0005041958285473005, + "loss": 1.4965, + "step": 5785 + }, + { + "epoch": 0.610337552742616, + "grad_norm": 0.6267693042755127, + "learning_rate": 0.0005039586491474386, + "loss": 1.473, + "step": 5786 + }, + { + "epoch": 0.6104430379746836, + "grad_norm": 0.7543339729309082, + "learning_rate": 0.000503721497317737, + "loss": 1.4863, + "step": 5787 + }, + { + "epoch": 0.610548523206751, + "grad_norm": 0.6003047823905945, + "learning_rate": 0.0005034843730847696, + "loss": 1.4977, + "step": 5788 + }, + { + "epoch": 0.6106540084388186, + "grad_norm": 0.6624587774276733, + "learning_rate": 0.0005032472764751074, + "loss": 1.5176, + "step": 5789 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.5979493856430054, + "learning_rate": 0.0005030102075153181, + "loss": 1.4988, + "step": 5790 + }, + { + "epoch": 0.6108649789029535, + "grad_norm": 0.6811911463737488, + "learning_rate": 0.000502773166231967, + "loss": 1.5421, + "step": 5791 + }, + { + "epoch": 0.6109704641350211, + "grad_norm": 0.6065212488174438, + "learning_rate": 0.0005025361526516151, + "loss": 1.5151, + "step": 5792 + }, + { + "epoch": 0.6110759493670886, + "grad_norm": 0.7912629842758179, + "learning_rate": 0.0005022991668008216, + "loss": 1.5361, + "step": 5793 + }, + { + "epoch": 0.6111814345991561, + "grad_norm": 0.5786060690879822, + "learning_rate": 0.0005020622087061415, + "loss": 1.5204, + "step": 5794 + }, + { + "epoch": 0.6112869198312236, + "grad_norm": 0.6559020280838013, + "learning_rate": 0.0005018252783941273, + "loss": 1.5039, + "step": 5795 + }, + { + "epoch": 0.6113924050632912, + "grad_norm": 0.6395238637924194, + "learning_rate": 0.0005015883758913281, + "loss": 1.5055, + "step": 5796 + }, + { + "epoch": 0.6114978902953586, + "grad_norm": 0.7973873615264893, + "learning_rate": 0.0005013515012242901, + "loss": 1.5126, + "step": 5797 + }, + { + "epoch": 0.6116033755274262, + "grad_norm": 0.6172999143600464, + "learning_rate": 0.0005011146544195559, + "loss": 1.4805, + "step": 5798 + }, + { + "epoch": 0.6117088607594937, + "grad_norm": 0.647060751914978, + "learning_rate": 0.000500877835503666, + "loss": 1.5087, + "step": 5799 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.6478968262672424, + "learning_rate": 0.0005006410445031569, + "loss": 1.5192, + "step": 5800 + }, + { + "epoch": 0.6119198312236287, + "grad_norm": 0.7500591278076172, + "learning_rate": 0.0005004042814445622, + "loss": 1.5264, + "step": 5801 + }, + { + "epoch": 0.6120253164556962, + "grad_norm": 0.6259123682975769, + "learning_rate": 0.0005001675463544125, + "loss": 1.5164, + "step": 5802 + }, + { + "epoch": 0.6121308016877637, + "grad_norm": 0.6876075863838196, + "learning_rate": 0.0004999308392592349, + "loss": 1.4972, + "step": 5803 + }, + { + "epoch": 0.6122362869198312, + "grad_norm": 0.6129536628723145, + "learning_rate": 0.0004996941601855536, + "loss": 1.5194, + "step": 5804 + }, + { + "epoch": 0.6123417721518988, + "grad_norm": 0.6355345249176025, + "learning_rate": 0.0004994575091598898, + "loss": 1.4969, + "step": 5805 + }, + { + "epoch": 0.6124472573839662, + "grad_norm": 0.6294296979904175, + "learning_rate": 0.0004992208862087616, + "loss": 1.538, + "step": 5806 + }, + { + "epoch": 0.6125527426160338, + "grad_norm": 0.6282213926315308, + "learning_rate": 0.0004989842913586832, + "loss": 1.5338, + "step": 5807 + }, + { + "epoch": 0.6126582278481013, + "grad_norm": 0.6352508068084717, + "learning_rate": 0.000498747724636167, + "loss": 1.5304, + "step": 5808 + }, + { + "epoch": 0.6127637130801687, + "grad_norm": 0.572310209274292, + "learning_rate": 0.000498511186067721, + "loss": 1.5395, + "step": 5809 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.6230514645576477, + "learning_rate": 0.0004982746756798507, + "loss": 1.5175, + "step": 5810 + }, + { + "epoch": 0.6129746835443038, + "grad_norm": 0.7100404500961304, + "learning_rate": 0.0004980381934990583, + "loss": 1.5524, + "step": 5811 + }, + { + "epoch": 0.6130801687763713, + "grad_norm": 0.5907939076423645, + "learning_rate": 0.0004978017395518425, + "loss": 1.5171, + "step": 5812 + }, + { + "epoch": 0.6131856540084388, + "grad_norm": 0.7163377404212952, + "learning_rate": 0.0004975653138646994, + "loss": 1.508, + "step": 5813 + }, + { + "epoch": 0.6132911392405064, + "grad_norm": 0.5736547112464905, + "learning_rate": 0.0004973289164641217, + "loss": 1.508, + "step": 5814 + }, + { + "epoch": 0.6133966244725738, + "grad_norm": 0.651974618434906, + "learning_rate": 0.0004970925473765988, + "loss": 1.5566, + "step": 5815 + }, + { + "epoch": 0.6135021097046414, + "grad_norm": 0.5842257142066956, + "learning_rate": 0.0004968562066286168, + "loss": 1.5489, + "step": 5816 + }, + { + "epoch": 0.6136075949367089, + "grad_norm": 0.6610651612281799, + "learning_rate": 0.0004966198942466595, + "loss": 1.5299, + "step": 5817 + }, + { + "epoch": 0.6137130801687763, + "grad_norm": 0.6141730546951294, + "learning_rate": 0.0004963836102572065, + "loss": 1.5497, + "step": 5818 + }, + { + "epoch": 0.6138185654008439, + "grad_norm": 0.6648803353309631, + "learning_rate": 0.0004961473546867346, + "loss": 1.5237, + "step": 5819 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.647946298122406, + "learning_rate": 0.0004959111275617174, + "loss": 1.5352, + "step": 5820 + }, + { + "epoch": 0.6140295358649789, + "grad_norm": 0.6353076696395874, + "learning_rate": 0.0004956749289086254, + "loss": 1.5461, + "step": 5821 + }, + { + "epoch": 0.6141350210970464, + "grad_norm": 0.6076091527938843, + "learning_rate": 0.0004954387587539257, + "loss": 1.5025, + "step": 5822 + }, + { + "epoch": 0.614240506329114, + "grad_norm": 0.7130316495895386, + "learning_rate": 0.0004952026171240826, + "loss": 1.5075, + "step": 5823 + }, + { + "epoch": 0.6143459915611814, + "grad_norm": 0.5509127378463745, + "learning_rate": 0.0004949665040455566, + "loss": 1.5192, + "step": 5824 + }, + { + "epoch": 0.614451476793249, + "grad_norm": 0.7865066528320312, + "learning_rate": 0.0004947304195448052, + "loss": 1.5154, + "step": 5825 + }, + { + "epoch": 0.6145569620253165, + "grad_norm": 0.5897625088691711, + "learning_rate": 0.0004944943636482836, + "loss": 1.5042, + "step": 5826 + }, + { + "epoch": 0.614662447257384, + "grad_norm": 0.6125484108924866, + "learning_rate": 0.0004942583363824428, + "loss": 1.5136, + "step": 5827 + }, + { + "epoch": 0.6147679324894515, + "grad_norm": 0.6346979141235352, + "learning_rate": 0.0004940223377737304, + "loss": 1.4879, + "step": 5828 + }, + { + "epoch": 0.6148734177215189, + "grad_norm": 0.6093542575836182, + "learning_rate": 0.0004937863678485915, + "loss": 1.5255, + "step": 5829 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.6614405512809753, + "learning_rate": 0.0004935504266334677, + "loss": 1.4919, + "step": 5830 + }, + { + "epoch": 0.615084388185654, + "grad_norm": 0.6076056361198425, + "learning_rate": 0.0004933145141547975, + "loss": 1.5192, + "step": 5831 + }, + { + "epoch": 0.6151898734177215, + "grad_norm": 0.6075744032859802, + "learning_rate": 0.0004930786304390158, + "loss": 1.5418, + "step": 5832 + }, + { + "epoch": 0.615295358649789, + "grad_norm": 0.6249316930770874, + "learning_rate": 0.0004928427755125544, + "loss": 1.4892, + "step": 5833 + }, + { + "epoch": 0.6154008438818566, + "grad_norm": 0.5795685052871704, + "learning_rate": 0.0004926069494018427, + "loss": 1.5345, + "step": 5834 + }, + { + "epoch": 0.615506329113924, + "grad_norm": 0.603442907333374, + "learning_rate": 0.0004923711521333056, + "loss": 1.5118, + "step": 5835 + }, + { + "epoch": 0.6156118143459915, + "grad_norm": 0.647088885307312, + "learning_rate": 0.0004921353837333657, + "loss": 1.5545, + "step": 5836 + }, + { + "epoch": 0.6157172995780591, + "grad_norm": 0.5844387412071228, + "learning_rate": 0.0004918996442284419, + "loss": 1.4995, + "step": 5837 + }, + { + "epoch": 0.6158227848101265, + "grad_norm": 0.6332720518112183, + "learning_rate": 0.0004916639336449499, + "loss": 1.5405, + "step": 5838 + }, + { + "epoch": 0.6159282700421941, + "grad_norm": 0.5633305907249451, + "learning_rate": 0.0004914282520093023, + "loss": 1.5221, + "step": 5839 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.6100668907165527, + "learning_rate": 0.0004911925993479085, + "loss": 1.5501, + "step": 5840 + }, + { + "epoch": 0.6161392405063291, + "grad_norm": 0.5925114750862122, + "learning_rate": 0.0004909569756871745, + "loss": 1.5374, + "step": 5841 + }, + { + "epoch": 0.6162447257383966, + "grad_norm": 0.6151412129402161, + "learning_rate": 0.0004907213810535026, + "loss": 1.5439, + "step": 5842 + }, + { + "epoch": 0.6163502109704642, + "grad_norm": 0.5512300729751587, + "learning_rate": 0.0004904858154732932, + "loss": 1.4794, + "step": 5843 + }, + { + "epoch": 0.6164556962025316, + "grad_norm": 0.6229647397994995, + "learning_rate": 0.0004902502789729424, + "loss": 1.5243, + "step": 5844 + }, + { + "epoch": 0.6165611814345991, + "grad_norm": 0.6592930555343628, + "learning_rate": 0.0004900147715788429, + "loss": 1.5329, + "step": 5845 + }, + { + "epoch": 0.6166666666666667, + "grad_norm": 0.6233271956443787, + "learning_rate": 0.0004897792933173847, + "loss": 1.5089, + "step": 5846 + }, + { + "epoch": 0.6167721518987341, + "grad_norm": 0.6326472163200378, + "learning_rate": 0.0004895438442149542, + "loss": 1.5251, + "step": 5847 + }, + { + "epoch": 0.6168776371308017, + "grad_norm": 0.6833012104034424, + "learning_rate": 0.0004893084242979348, + "loss": 1.5563, + "step": 5848 + }, + { + "epoch": 0.6169831223628692, + "grad_norm": 0.6433854699134827, + "learning_rate": 0.0004890730335927063, + "loss": 1.5808, + "step": 5849 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.6505973935127258, + "learning_rate": 0.0004888376721256456, + "loss": 1.5153, + "step": 5850 + }, + { + "epoch": 0.6171940928270042, + "grad_norm": 0.6108572483062744, + "learning_rate": 0.0004886023399231255, + "loss": 1.5259, + "step": 5851 + }, + { + "epoch": 0.6172995780590718, + "grad_norm": 0.6087256669998169, + "learning_rate": 0.0004883670370115173, + "loss": 1.5056, + "step": 5852 + }, + { + "epoch": 0.6174050632911392, + "grad_norm": 0.6665197610855103, + "learning_rate": 0.00048813176341718693, + "loss": 1.5108, + "step": 5853 + }, + { + "epoch": 0.6175105485232067, + "grad_norm": 0.6720457673072815, + "learning_rate": 0.0004878965191664983, + "loss": 1.4809, + "step": 5854 + }, + { + "epoch": 0.6176160337552743, + "grad_norm": 0.6274710297584534, + "learning_rate": 0.0004876613042858118, + "loss": 1.5606, + "step": 5855 + }, + { + "epoch": 0.6177215189873417, + "grad_norm": 0.7509225606918335, + "learning_rate": 0.0004874261188014842, + "loss": 1.5298, + "step": 5856 + }, + { + "epoch": 0.6178270042194093, + "grad_norm": 0.6435737609863281, + "learning_rate": 0.00048719096273986925, + "loss": 1.492, + "step": 5857 + }, + { + "epoch": 0.6179324894514768, + "grad_norm": 0.7490710020065308, + "learning_rate": 0.0004869558361273175, + "loss": 1.5309, + "step": 5858 + }, + { + "epoch": 0.6180379746835443, + "grad_norm": 0.7588711977005005, + "learning_rate": 0.00048672073899017564, + "loss": 1.5265, + "step": 5859 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.7280983924865723, + "learning_rate": 0.00048648567135478805, + "loss": 1.5217, + "step": 5860 + }, + { + "epoch": 0.6182489451476794, + "grad_norm": 0.6906962990760803, + "learning_rate": 0.0004862506332474951, + "loss": 1.5408, + "step": 5861 + }, + { + "epoch": 0.6183544303797468, + "grad_norm": 0.7319624423980713, + "learning_rate": 0.0004860156246946338, + "loss": 1.5797, + "step": 5862 + }, + { + "epoch": 0.6184599156118143, + "grad_norm": 0.6242752075195312, + "learning_rate": 0.0004857806457225381, + "loss": 1.4819, + "step": 5863 + }, + { + "epoch": 0.6185654008438819, + "grad_norm": 0.687139630317688, + "learning_rate": 0.00048554569635753857, + "loss": 1.48, + "step": 5864 + }, + { + "epoch": 0.6186708860759493, + "grad_norm": 0.726227343082428, + "learning_rate": 0.00048531077662596246, + "loss": 1.5119, + "step": 5865 + }, + { + "epoch": 0.6187763713080169, + "grad_norm": 0.5741254091262817, + "learning_rate": 0.00048507588655413367, + "loss": 1.5257, + "step": 5866 + }, + { + "epoch": 0.6188818565400844, + "grad_norm": 0.6416603922843933, + "learning_rate": 0.00048484102616837277, + "loss": 1.4621, + "step": 5867 + }, + { + "epoch": 0.6189873417721519, + "grad_norm": 0.5941846966743469, + "learning_rate": 0.000484606195494997, + "loss": 1.494, + "step": 5868 + }, + { + "epoch": 0.6190928270042194, + "grad_norm": 0.5706182718276978, + "learning_rate": 0.0004843713945603205, + "loss": 1.521, + "step": 5869 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5986681580543518, + "learning_rate": 0.0004841366233906538, + "loss": 1.5068, + "step": 5870 + }, + { + "epoch": 0.6193037974683544, + "grad_norm": 0.6048775315284729, + "learning_rate": 0.0004839018820123042, + "loss": 1.5482, + "step": 5871 + }, + { + "epoch": 0.619409282700422, + "grad_norm": 0.6216210126876831, + "learning_rate": 0.0004836671704515756, + "loss": 1.5056, + "step": 5872 + }, + { + "epoch": 0.6195147679324895, + "grad_norm": 0.557745635509491, + "learning_rate": 0.00048343248873476853, + "loss": 1.4977, + "step": 5873 + }, + { + "epoch": 0.6196202531645569, + "grad_norm": 0.5877941846847534, + "learning_rate": 0.00048319783688818043, + "loss": 1.5053, + "step": 5874 + }, + { + "epoch": 0.6197257383966245, + "grad_norm": 0.7029613852500916, + "learning_rate": 0.00048296321493810507, + "loss": 1.5787, + "step": 5875 + }, + { + "epoch": 0.619831223628692, + "grad_norm": 0.5722869038581848, + "learning_rate": 0.0004827286229108331, + "loss": 1.4853, + "step": 5876 + }, + { + "epoch": 0.6199367088607595, + "grad_norm": 0.6931553483009338, + "learning_rate": 0.00048249406083265123, + "loss": 1.524, + "step": 5877 + }, + { + "epoch": 0.620042194092827, + "grad_norm": 0.6410495042800903, + "learning_rate": 0.0004822595287298442, + "loss": 1.5264, + "step": 5878 + }, + { + "epoch": 0.6201476793248946, + "grad_norm": 0.7521397471427917, + "learning_rate": 0.00048202502662869195, + "loss": 1.5003, + "step": 5879 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.6389250755310059, + "learning_rate": 0.0004817905545554717, + "loss": 1.5116, + "step": 5880 + }, + { + "epoch": 0.6203586497890295, + "grad_norm": 0.7074341177940369, + "learning_rate": 0.00048155611253645727, + "loss": 1.5073, + "step": 5881 + }, + { + "epoch": 0.6204641350210971, + "grad_norm": 0.6749929785728455, + "learning_rate": 0.0004813217005979191, + "loss": 1.5125, + "step": 5882 + }, + { + "epoch": 0.6205696202531645, + "grad_norm": 0.9076398015022278, + "learning_rate": 0.000481087318766124, + "loss": 1.4873, + "step": 5883 + }, + { + "epoch": 0.6206751054852321, + "grad_norm": 0.6142567992210388, + "learning_rate": 0.0004808529670673358, + "loss": 1.4771, + "step": 5884 + }, + { + "epoch": 0.6207805907172996, + "grad_norm": 0.6139792203903198, + "learning_rate": 0.00048061864552781456, + "loss": 1.4917, + "step": 5885 + }, + { + "epoch": 0.6208860759493671, + "grad_norm": 0.7972553372383118, + "learning_rate": 0.0004803843541738173, + "loss": 1.5437, + "step": 5886 + }, + { + "epoch": 0.6209915611814346, + "grad_norm": 0.6432527303695679, + "learning_rate": 0.0004801500930315978, + "loss": 1.5299, + "step": 5887 + }, + { + "epoch": 0.6210970464135022, + "grad_norm": 0.878956139087677, + "learning_rate": 0.000479915862127406, + "loss": 1.49, + "step": 5888 + }, + { + "epoch": 0.6212025316455696, + "grad_norm": 0.6669794321060181, + "learning_rate": 0.0004796816614874885, + "loss": 1.5234, + "step": 5889 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.9060148596763611, + "learning_rate": 0.00047944749113808884, + "loss": 1.5502, + "step": 5890 + }, + { + "epoch": 0.6214135021097047, + "grad_norm": 0.6601831316947937, + "learning_rate": 0.0004792133511054469, + "loss": 1.4794, + "step": 5891 + }, + { + "epoch": 0.6215189873417721, + "grad_norm": 0.8099795579910278, + "learning_rate": 0.0004789792414157992, + "loss": 1.5298, + "step": 5892 + }, + { + "epoch": 0.6216244725738397, + "grad_norm": 0.6548190712928772, + "learning_rate": 0.000478745162095379, + "loss": 1.4712, + "step": 5893 + }, + { + "epoch": 0.6217299578059071, + "grad_norm": 0.6609686613082886, + "learning_rate": 0.0004785111131704157, + "loss": 1.4875, + "step": 5894 + }, + { + "epoch": 0.6218354430379747, + "grad_norm": 0.6561522483825684, + "learning_rate": 0.0004782770946671362, + "loss": 1.5214, + "step": 5895 + }, + { + "epoch": 0.6219409282700422, + "grad_norm": 0.6758773922920227, + "learning_rate": 0.0004780431066117629, + "loss": 1.5139, + "step": 5896 + }, + { + "epoch": 0.6220464135021097, + "grad_norm": 0.7285489439964294, + "learning_rate": 0.0004778091490305159, + "loss": 1.5568, + "step": 5897 + }, + { + "epoch": 0.6221518987341772, + "grad_norm": 0.669206976890564, + "learning_rate": 0.0004775752219496109, + "loss": 1.5375, + "step": 5898 + }, + { + "epoch": 0.6222573839662447, + "grad_norm": 0.6400879621505737, + "learning_rate": 0.00047734132539526086, + "loss": 1.4994, + "step": 5899 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.660430371761322, + "learning_rate": 0.00047710745939367474, + "loss": 1.529, + "step": 5900 + }, + { + "epoch": 0.6224683544303797, + "grad_norm": 0.6682916879653931, + "learning_rate": 0.00047687362397105863, + "loss": 1.509, + "step": 5901 + }, + { + "epoch": 0.6225738396624473, + "grad_norm": 0.6729592680931091, + "learning_rate": 0.0004766398191536149, + "loss": 1.5616, + "step": 5902 + }, + { + "epoch": 0.6226793248945147, + "grad_norm": 0.6668772101402283, + "learning_rate": 0.00047640604496754235, + "loss": 1.4781, + "step": 5903 + }, + { + "epoch": 0.6227848101265823, + "grad_norm": 0.6643920540809631, + "learning_rate": 0.000476172301439037, + "loss": 1.5165, + "step": 5904 + }, + { + "epoch": 0.6228902953586498, + "grad_norm": 0.6943418383598328, + "learning_rate": 0.00047593858859429035, + "loss": 1.4858, + "step": 5905 + }, + { + "epoch": 0.6229957805907173, + "grad_norm": 0.6592877507209778, + "learning_rate": 0.00047570490645949175, + "loss": 1.5137, + "step": 5906 + }, + { + "epoch": 0.6231012658227848, + "grad_norm": 0.6631583571434021, + "learning_rate": 0.000475471255060826, + "loss": 1.4793, + "step": 5907 + }, + { + "epoch": 0.6232067510548523, + "grad_norm": 0.6551583409309387, + "learning_rate": 0.0004752376344244752, + "loss": 1.519, + "step": 5908 + }, + { + "epoch": 0.6233122362869198, + "grad_norm": 0.6262952089309692, + "learning_rate": 0.00047500404457661747, + "loss": 1.5093, + "step": 5909 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.8208453059196472, + "learning_rate": 0.0004747704855434278, + "loss": 1.513, + "step": 5910 + }, + { + "epoch": 0.6235232067510549, + "grad_norm": 0.6562867760658264, + "learning_rate": 0.0004745369573510775, + "loss": 1.4666, + "step": 5911 + }, + { + "epoch": 0.6236286919831223, + "grad_norm": 0.7237608432769775, + "learning_rate": 0.0004743034600257348, + "loss": 1.5346, + "step": 5912 + }, + { + "epoch": 0.6237341772151899, + "grad_norm": 0.6394747495651245, + "learning_rate": 0.0004740699935935643, + "loss": 1.4927, + "step": 5913 + }, + { + "epoch": 0.6238396624472574, + "grad_norm": 0.713468611240387, + "learning_rate": 0.0004738365580807268, + "loss": 1.491, + "step": 5914 + }, + { + "epoch": 0.6239451476793249, + "grad_norm": 0.6605074405670166, + "learning_rate": 0.0004736031535133799, + "loss": 1.4921, + "step": 5915 + }, + { + "epoch": 0.6240506329113924, + "grad_norm": 0.6788218021392822, + "learning_rate": 0.0004733697799176781, + "loss": 1.4989, + "step": 5916 + }, + { + "epoch": 0.62415611814346, + "grad_norm": 0.6463896632194519, + "learning_rate": 0.0004731364373197718, + "loss": 1.5339, + "step": 5917 + }, + { + "epoch": 0.6242616033755274, + "grad_norm": 0.5947257280349731, + "learning_rate": 0.00047290312574580835, + "loss": 1.5117, + "step": 5918 + }, + { + "epoch": 0.6243670886075949, + "grad_norm": 0.6671315431594849, + "learning_rate": 0.00047266984522193134, + "loss": 1.5059, + "step": 5919 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.6280168890953064, + "learning_rate": 0.0004724365957742809, + "loss": 1.5026, + "step": 5920 + }, + { + "epoch": 0.6245780590717299, + "grad_norm": 0.7597678303718567, + "learning_rate": 0.0004722033774289941, + "loss": 1.5233, + "step": 5921 + }, + { + "epoch": 0.6246835443037975, + "grad_norm": 0.6434047818183899, + "learning_rate": 0.0004719701902122041, + "loss": 1.4885, + "step": 5922 + }, + { + "epoch": 0.624789029535865, + "grad_norm": 0.6179329752922058, + "learning_rate": 0.00047173703415004066, + "loss": 1.522, + "step": 5923 + }, + { + "epoch": 0.6248945147679325, + "grad_norm": 0.8090437650680542, + "learning_rate": 0.0004715039092686302, + "loss": 1.5362, + "step": 5924 + }, + { + "epoch": 0.625, + "grad_norm": 0.6339301466941833, + "learning_rate": 0.0004712708155940951, + "loss": 1.5238, + "step": 5925 + }, + { + "epoch": 0.6251054852320675, + "grad_norm": 0.7468827962875366, + "learning_rate": 0.0004710377531525552, + "loss": 1.4539, + "step": 5926 + }, + { + "epoch": 0.625210970464135, + "grad_norm": 0.7012411952018738, + "learning_rate": 0.000470804721970126, + "loss": 1.5113, + "step": 5927 + }, + { + "epoch": 0.6253164556962025, + "grad_norm": 0.8867717981338501, + "learning_rate": 0.00047057172207292004, + "loss": 1.5012, + "step": 5928 + }, + { + "epoch": 0.6254219409282701, + "grad_norm": 0.7870324850082397, + "learning_rate": 0.00047033875348704576, + "loss": 1.5037, + "step": 5929 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.8244644999504089, + "learning_rate": 0.00047010581623860883, + "loss": 1.5169, + "step": 5930 + }, + { + "epoch": 0.6256329113924051, + "grad_norm": 0.6512954235076904, + "learning_rate": 0.0004698729103537109, + "loss": 1.5127, + "step": 5931 + }, + { + "epoch": 0.6257383966244726, + "grad_norm": 0.7579745054244995, + "learning_rate": 0.0004696400358584501, + "loss": 1.5276, + "step": 5932 + }, + { + "epoch": 0.62584388185654, + "grad_norm": 0.600807249546051, + "learning_rate": 0.00046940719277892143, + "loss": 1.5374, + "step": 5933 + }, + { + "epoch": 0.6259493670886076, + "grad_norm": 0.7157487869262695, + "learning_rate": 0.0004691743811412159, + "loss": 1.531, + "step": 5934 + }, + { + "epoch": 0.6260548523206751, + "grad_norm": 0.6342028379440308, + "learning_rate": 0.00046894160097142113, + "loss": 1.512, + "step": 5935 + }, + { + "epoch": 0.6261603375527426, + "grad_norm": 0.7168199419975281, + "learning_rate": 0.00046870885229562153, + "loss": 1.5133, + "step": 5936 + }, + { + "epoch": 0.6262658227848101, + "grad_norm": 0.5888542532920837, + "learning_rate": 0.0004684761351398976, + "loss": 1.5173, + "step": 5937 + }, + { + "epoch": 0.6263713080168777, + "grad_norm": 0.7180189490318298, + "learning_rate": 0.0004682434495303267, + "loss": 1.4738, + "step": 5938 + }, + { + "epoch": 0.6264767932489451, + "grad_norm": 0.6268370747566223, + "learning_rate": 0.00046801079549298224, + "loss": 1.4577, + "step": 5939 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.7384047508239746, + "learning_rate": 0.0004677781730539342, + "loss": 1.5548, + "step": 5940 + }, + { + "epoch": 0.6266877637130802, + "grad_norm": 0.6361849308013916, + "learning_rate": 0.00046754558223924926, + "loss": 1.5378, + "step": 5941 + }, + { + "epoch": 0.6267932489451477, + "grad_norm": 0.7804290056228638, + "learning_rate": 0.00046731302307499023, + "loss": 1.5204, + "step": 5942 + }, + { + "epoch": 0.6268987341772152, + "grad_norm": 0.6943706274032593, + "learning_rate": 0.0004670804955872166, + "loss": 1.4676, + "step": 5943 + }, + { + "epoch": 0.6270042194092827, + "grad_norm": 0.8211323618888855, + "learning_rate": 0.00046684799980198415, + "loss": 1.5157, + "step": 5944 + }, + { + "epoch": 0.6271097046413502, + "grad_norm": 0.606097936630249, + "learning_rate": 0.0004666155357453451, + "loss": 1.4786, + "step": 5945 + }, + { + "epoch": 0.6272151898734177, + "grad_norm": 0.6775771975517273, + "learning_rate": 0.00046638310344334835, + "loss": 1.5215, + "step": 5946 + }, + { + "epoch": 0.6273206751054853, + "grad_norm": 0.6741312146186829, + "learning_rate": 0.0004661507029220393, + "loss": 1.497, + "step": 5947 + }, + { + "epoch": 0.6274261603375527, + "grad_norm": 0.6663179993629456, + "learning_rate": 0.0004659183342074594, + "loss": 1.4925, + "step": 5948 + }, + { + "epoch": 0.6275316455696203, + "grad_norm": 0.6264604926109314, + "learning_rate": 0.0004656859973256466, + "loss": 1.5111, + "step": 5949 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.6373544931411743, + "learning_rate": 0.0004654536923026356, + "loss": 1.4988, + "step": 5950 + }, + { + "epoch": 0.6277426160337553, + "grad_norm": 0.7176641821861267, + "learning_rate": 0.00046522141916445725, + "loss": 1.5535, + "step": 5951 + }, + { + "epoch": 0.6278481012658228, + "grad_norm": 0.6531791687011719, + "learning_rate": 0.0004649891779371389, + "loss": 1.5251, + "step": 5952 + }, + { + "epoch": 0.6279535864978903, + "grad_norm": 0.6701160669326782, + "learning_rate": 0.0004647569686467043, + "loss": 1.5029, + "step": 5953 + }, + { + "epoch": 0.6280590717299578, + "grad_norm": 0.7340153455734253, + "learning_rate": 0.00046452479131917383, + "loss": 1.4875, + "step": 5954 + }, + { + "epoch": 0.6281645569620253, + "grad_norm": 0.6346822381019592, + "learning_rate": 0.0004642926459805636, + "loss": 1.5191, + "step": 5955 + }, + { + "epoch": 0.6282700421940929, + "grad_norm": 0.8339464068412781, + "learning_rate": 0.0004640605326568874, + "loss": 1.5241, + "step": 5956 + }, + { + "epoch": 0.6283755274261603, + "grad_norm": 0.5992371439933777, + "learning_rate": 0.00046382845137415437, + "loss": 1.4833, + "step": 5957 + }, + { + "epoch": 0.6284810126582279, + "grad_norm": 0.8185915946960449, + "learning_rate": 0.0004635964021583703, + "loss": 1.4991, + "step": 5958 + }, + { + "epoch": 0.6285864978902953, + "grad_norm": 0.6242163181304932, + "learning_rate": 0.00046336438503553754, + "loss": 1.4963, + "step": 5959 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.7540661096572876, + "learning_rate": 0.00046313240003165466, + "loss": 1.5232, + "step": 5960 + }, + { + "epoch": 0.6287974683544304, + "grad_norm": 0.6622714996337891, + "learning_rate": 0.00046290044717271685, + "loss": 1.5192, + "step": 5961 + }, + { + "epoch": 0.6289029535864978, + "grad_norm": 0.6197299957275391, + "learning_rate": 0.00046266852648471553, + "loss": 1.5013, + "step": 5962 + }, + { + "epoch": 0.6290084388185654, + "grad_norm": 0.6897821426391602, + "learning_rate": 0.0004624366379936383, + "loss": 1.5187, + "step": 5963 + }, + { + "epoch": 0.6291139240506329, + "grad_norm": 0.6179909110069275, + "learning_rate": 0.00046220478172546997, + "loss": 1.5213, + "step": 5964 + }, + { + "epoch": 0.6292194092827004, + "grad_norm": 0.5970615744590759, + "learning_rate": 0.00046197295770619105, + "loss": 1.5478, + "step": 5965 + }, + { + "epoch": 0.6293248945147679, + "grad_norm": 0.6779214143753052, + "learning_rate": 0.00046174116596177833, + "loss": 1.505, + "step": 5966 + }, + { + "epoch": 0.6294303797468355, + "grad_norm": 0.6918543577194214, + "learning_rate": 0.00046150940651820536, + "loss": 1.5139, + "step": 5967 + }, + { + "epoch": 0.6295358649789029, + "grad_norm": 0.6132500171661377, + "learning_rate": 0.0004612776794014419, + "loss": 1.4652, + "step": 5968 + }, + { + "epoch": 0.6296413502109705, + "grad_norm": 0.6150674819946289, + "learning_rate": 0.00046104598463745424, + "loss": 1.4872, + "step": 5969 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.6501268744468689, + "learning_rate": 0.0004608143222522048, + "loss": 1.5374, + "step": 5970 + }, + { + "epoch": 0.6298523206751054, + "grad_norm": 0.5409150719642639, + "learning_rate": 0.00046058269227165256, + "loss": 1.5314, + "step": 5971 + }, + { + "epoch": 0.629957805907173, + "grad_norm": 0.6667943596839905, + "learning_rate": 0.0004603510947217526, + "loss": 1.4712, + "step": 5972 + }, + { + "epoch": 0.6300632911392405, + "grad_norm": 0.6531066298484802, + "learning_rate": 0.000460119529628457, + "loss": 1.5154, + "step": 5973 + }, + { + "epoch": 0.630168776371308, + "grad_norm": 0.6617052555084229, + "learning_rate": 0.00045988799701771364, + "loss": 1.5247, + "step": 5974 + }, + { + "epoch": 0.6302742616033755, + "grad_norm": 0.724493682384491, + "learning_rate": 0.0004596564969154668, + "loss": 1.4972, + "step": 5975 + }, + { + "epoch": 0.6303797468354431, + "grad_norm": 0.6100730299949646, + "learning_rate": 0.00045942502934765735, + "loss": 1.4967, + "step": 5976 + }, + { + "epoch": 0.6304852320675105, + "grad_norm": 0.6371790170669556, + "learning_rate": 0.0004591935943402222, + "loss": 1.4655, + "step": 5977 + }, + { + "epoch": 0.630590717299578, + "grad_norm": 0.6062325835227966, + "learning_rate": 0.00045896219191909486, + "loss": 1.5027, + "step": 5978 + }, + { + "epoch": 0.6306962025316456, + "grad_norm": 0.6196571588516235, + "learning_rate": 0.0004587308221102053, + "loss": 1.46, + "step": 5979 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.6310222148895264, + "learning_rate": 0.0004584994849394795, + "loss": 1.4864, + "step": 5980 + }, + { + "epoch": 0.6309071729957806, + "grad_norm": 0.5991256833076477, + "learning_rate": 0.0004582681804328396, + "loss": 1.5151, + "step": 5981 + }, + { + "epoch": 0.6310126582278481, + "grad_norm": 0.6274584531784058, + "learning_rate": 0.0004580369086162051, + "loss": 1.5174, + "step": 5982 + }, + { + "epoch": 0.6311181434599156, + "grad_norm": 0.5852131247520447, + "learning_rate": 0.0004578056695154909, + "loss": 1.4843, + "step": 5983 + }, + { + "epoch": 0.6312236286919831, + "grad_norm": 0.5988900661468506, + "learning_rate": 0.0004575744631566083, + "loss": 1.5109, + "step": 5984 + }, + { + "epoch": 0.6313291139240507, + "grad_norm": 0.6793749928474426, + "learning_rate": 0.0004573432895654654, + "loss": 1.5067, + "step": 5985 + }, + { + "epoch": 0.6314345991561181, + "grad_norm": 0.634704053401947, + "learning_rate": 0.00045711214876796623, + "loss": 1.5257, + "step": 5986 + }, + { + "epoch": 0.6315400843881857, + "grad_norm": 0.5985695719718933, + "learning_rate": 0.0004568810407900112, + "loss": 1.5003, + "step": 5987 + }, + { + "epoch": 0.6316455696202532, + "grad_norm": 0.716494619846344, + "learning_rate": 0.00045664996565749716, + "loss": 1.522, + "step": 5988 + }, + { + "epoch": 0.6317510548523206, + "grad_norm": 0.6737105250358582, + "learning_rate": 0.00045641892339631703, + "loss": 1.5577, + "step": 5989 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.6272590756416321, + "learning_rate": 0.0004561879140323607, + "loss": 1.5168, + "step": 5990 + }, + { + "epoch": 0.6319620253164557, + "grad_norm": 0.6266348361968994, + "learning_rate": 0.0004559569375915137, + "loss": 1.5211, + "step": 5991 + }, + { + "epoch": 0.6320675105485232, + "grad_norm": 0.7142159938812256, + "learning_rate": 0.00045572599409965804, + "loss": 1.4931, + "step": 5992 + }, + { + "epoch": 0.6321729957805907, + "grad_norm": 0.6905307769775391, + "learning_rate": 0.00045549508358267224, + "loss": 1.586, + "step": 5993 + }, + { + "epoch": 0.6322784810126583, + "grad_norm": 0.6467257738113403, + "learning_rate": 0.0004552642060664307, + "loss": 1.5259, + "step": 5994 + }, + { + "epoch": 0.6323839662447257, + "grad_norm": 0.703870952129364, + "learning_rate": 0.00045503336157680466, + "loss": 1.489, + "step": 5995 + }, + { + "epoch": 0.6324894514767933, + "grad_norm": 0.6643348932266235, + "learning_rate": 0.00045480255013966123, + "loss": 1.5202, + "step": 5996 + }, + { + "epoch": 0.6325949367088608, + "grad_norm": 0.6757035851478577, + "learning_rate": 0.00045457177178086407, + "loss": 1.5221, + "step": 5997 + }, + { + "epoch": 0.6327004219409282, + "grad_norm": 0.6756793856620789, + "learning_rate": 0.0004543410265262727, + "loss": 1.507, + "step": 5998 + }, + { + "epoch": 0.6328059071729958, + "grad_norm": 0.6527981162071228, + "learning_rate": 0.000454110314401744, + "loss": 1.5172, + "step": 5999 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.662560760974884, + "learning_rate": 0.0004538796354331298, + "loss": 1.5271, + "step": 6000 + }, + { + "epoch": 0.6330168776371308, + "grad_norm": 0.6644051671028137, + "learning_rate": 0.0004536489896462792, + "loss": 1.4972, + "step": 6001 + }, + { + "epoch": 0.6331223628691983, + "grad_norm": 0.6494223475456238, + "learning_rate": 0.0004534183770670371, + "loss": 1.4806, + "step": 6002 + }, + { + "epoch": 0.6332278481012659, + "grad_norm": 0.6927706003189087, + "learning_rate": 0.0004531877977212446, + "loss": 1.5311, + "step": 6003 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 0.7354052066802979, + "learning_rate": 0.00045295725163473945, + "loss": 1.5528, + "step": 6004 + }, + { + "epoch": 0.6334388185654009, + "grad_norm": 0.7858226299285889, + "learning_rate": 0.0004527267388333555, + "loss": 1.5085, + "step": 6005 + }, + { + "epoch": 0.6335443037974684, + "grad_norm": 0.6806443929672241, + "learning_rate": 0.0004524962593429227, + "loss": 1.5058, + "step": 6006 + }, + { + "epoch": 0.6336497890295358, + "grad_norm": 0.6729578971862793, + "learning_rate": 0.00045226581318926737, + "loss": 1.5039, + "step": 6007 + }, + { + "epoch": 0.6337552742616034, + "grad_norm": 0.6128213405609131, + "learning_rate": 0.0004520354003982125, + "loss": 1.5067, + "step": 6008 + }, + { + "epoch": 0.6338607594936709, + "grad_norm": 0.6742966771125793, + "learning_rate": 0.00045180502099557686, + "loss": 1.5447, + "step": 6009 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.6235178709030151, + "learning_rate": 0.0004515746750071754, + "loss": 1.4813, + "step": 6010 + }, + { + "epoch": 0.6340717299578059, + "grad_norm": 0.6326314210891724, + "learning_rate": 0.00045134436245881986, + "loss": 1.487, + "step": 6011 + }, + { + "epoch": 0.6341772151898735, + "grad_norm": 0.6386275887489319, + "learning_rate": 0.0004511140833763177, + "loss": 1.5303, + "step": 6012 + }, + { + "epoch": 0.6342827004219409, + "grad_norm": 0.8570296168327332, + "learning_rate": 0.00045088383778547284, + "loss": 1.5266, + "step": 6013 + }, + { + "epoch": 0.6343881856540085, + "grad_norm": 0.6221066117286682, + "learning_rate": 0.0004506536257120856, + "loss": 1.5289, + "step": 6014 + }, + { + "epoch": 0.634493670886076, + "grad_norm": 0.7110923528671265, + "learning_rate": 0.0004504234471819518, + "loss": 1.5244, + "step": 6015 + }, + { + "epoch": 0.6345991561181434, + "grad_norm": 0.6987123489379883, + "learning_rate": 0.0004501933022208649, + "loss": 1.5062, + "step": 6016 + }, + { + "epoch": 0.634704641350211, + "grad_norm": 0.7186236381530762, + "learning_rate": 0.00044996319085461353, + "loss": 1.5358, + "step": 6017 + }, + { + "epoch": 0.6348101265822785, + "grad_norm": 0.7958090305328369, + "learning_rate": 0.00044973311310898275, + "loss": 1.4845, + "step": 6018 + }, + { + "epoch": 0.634915611814346, + "grad_norm": 0.7208167910575867, + "learning_rate": 0.00044950306900975377, + "loss": 1.4797, + "step": 6019 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.7161325812339783, + "learning_rate": 0.0004492730585827046, + "loss": 1.5283, + "step": 6020 + }, + { + "epoch": 0.6351265822784811, + "grad_norm": 0.6078905463218689, + "learning_rate": 0.0004490430818536085, + "loss": 1.4883, + "step": 6021 + }, + { + "epoch": 0.6352320675105485, + "grad_norm": 0.6813989877700806, + "learning_rate": 0.0004488131388482359, + "loss": 1.4702, + "step": 6022 + }, + { + "epoch": 0.635337552742616, + "grad_norm": 0.6141283512115479, + "learning_rate": 0.000448583229592353, + "loss": 1.5125, + "step": 6023 + }, + { + "epoch": 0.6354430379746835, + "grad_norm": 0.6478928923606873, + "learning_rate": 0.0004483533541117218, + "loss": 1.4755, + "step": 6024 + }, + { + "epoch": 0.635548523206751, + "grad_norm": 0.6441009640693665, + "learning_rate": 0.0004481235124321018, + "loss": 1.5016, + "step": 6025 + }, + { + "epoch": 0.6356540084388186, + "grad_norm": 0.6909101009368896, + "learning_rate": 0.0004478937045792474, + "loss": 1.4953, + "step": 6026 + }, + { + "epoch": 0.635759493670886, + "grad_norm": 0.6185054779052734, + "learning_rate": 0.00044766393057891, + "loss": 1.505, + "step": 6027 + }, + { + "epoch": 0.6358649789029536, + "grad_norm": 0.5918982028961182, + "learning_rate": 0.00044743419045683674, + "loss": 1.512, + "step": 6028 + }, + { + "epoch": 0.6359704641350211, + "grad_norm": 0.6057409644126892, + "learning_rate": 0.00044720448423877113, + "loss": 1.4863, + "step": 6029 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.6272285580635071, + "learning_rate": 0.0004469748119504529, + "loss": 1.4981, + "step": 6030 + }, + { + "epoch": 0.6361814345991561, + "grad_norm": 0.6487106680870056, + "learning_rate": 0.000446745173617618, + "loss": 1.5377, + "step": 6031 + }, + { + "epoch": 0.6362869198312237, + "grad_norm": 0.5916958451271057, + "learning_rate": 0.00044651556926599863, + "loss": 1.5127, + "step": 6032 + }, + { + "epoch": 0.6363924050632911, + "grad_norm": 0.6164158582687378, + "learning_rate": 0.0004462859989213227, + "loss": 1.5057, + "step": 6033 + }, + { + "epoch": 0.6364978902953586, + "grad_norm": 0.6135196089744568, + "learning_rate": 0.0004460564626093154, + "loss": 1.53, + "step": 6034 + }, + { + "epoch": 0.6366033755274262, + "grad_norm": 0.6304799914360046, + "learning_rate": 0.00044582696035569695, + "loss": 1.5198, + "step": 6035 + }, + { + "epoch": 0.6367088607594936, + "grad_norm": 0.6724665760993958, + "learning_rate": 0.00044559749218618444, + "loss": 1.4979, + "step": 6036 + }, + { + "epoch": 0.6368143459915612, + "grad_norm": 0.5959965586662292, + "learning_rate": 0.0004453680581264908, + "loss": 1.5137, + "step": 6037 + }, + { + "epoch": 0.6369198312236287, + "grad_norm": 0.6436355113983154, + "learning_rate": 0.00044513865820232525, + "loss": 1.5091, + "step": 6038 + }, + { + "epoch": 0.6370253164556962, + "grad_norm": 0.6536716222763062, + "learning_rate": 0.0004449092924393933, + "loss": 1.493, + "step": 6039 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.7031990885734558, + "learning_rate": 0.0004446799608633964, + "loss": 1.5299, + "step": 6040 + }, + { + "epoch": 0.6372362869198313, + "grad_norm": 0.6829683184623718, + "learning_rate": 0.00044445066350003203, + "loss": 1.4853, + "step": 6041 + }, + { + "epoch": 0.6373417721518987, + "grad_norm": 0.7064085602760315, + "learning_rate": 0.00044422140037499473, + "loss": 1.5094, + "step": 6042 + }, + { + "epoch": 0.6374472573839662, + "grad_norm": 0.7416712641716003, + "learning_rate": 0.0004439921715139743, + "loss": 1.4958, + "step": 6043 + }, + { + "epoch": 0.6375527426160338, + "grad_norm": 0.6201027631759644, + "learning_rate": 0.00044376297694265687, + "loss": 1.4828, + "step": 6044 + }, + { + "epoch": 0.6376582278481012, + "grad_norm": 0.657595157623291, + "learning_rate": 0.000443533816686725, + "loss": 1.4867, + "step": 6045 + }, + { + "epoch": 0.6377637130801688, + "grad_norm": 0.620121419429779, + "learning_rate": 0.0004433046907718571, + "loss": 1.4853, + "step": 6046 + }, + { + "epoch": 0.6378691983122363, + "grad_norm": 0.6188048720359802, + "learning_rate": 0.0004430755992237278, + "loss": 1.5078, + "step": 6047 + }, + { + "epoch": 0.6379746835443038, + "grad_norm": 0.7239684462547302, + "learning_rate": 0.00044284654206800826, + "loss": 1.5189, + "step": 6048 + }, + { + "epoch": 0.6380801687763713, + "grad_norm": 0.6496513485908508, + "learning_rate": 0.00044261751933036525, + "loss": 1.4887, + "step": 6049 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.6963680386543274, + "learning_rate": 0.00044238853103646154, + "loss": 1.5031, + "step": 6050 + }, + { + "epoch": 0.6382911392405063, + "grad_norm": 0.5716072916984558, + "learning_rate": 0.0004421595772119573, + "loss": 1.5084, + "step": 6051 + }, + { + "epoch": 0.6383966244725738, + "grad_norm": 0.6287668943405151, + "learning_rate": 0.0004419306578825073, + "loss": 1.5427, + "step": 6052 + }, + { + "epoch": 0.6385021097046414, + "grad_norm": 0.7715273499488831, + "learning_rate": 0.0004417017730737633, + "loss": 1.4821, + "step": 6053 + }, + { + "epoch": 0.6386075949367088, + "grad_norm": 0.756332278251648, + "learning_rate": 0.00044147292281137293, + "loss": 1.5359, + "step": 6054 + }, + { + "epoch": 0.6387130801687764, + "grad_norm": 0.7097960114479065, + "learning_rate": 0.00044124410712098014, + "loss": 1.4738, + "step": 6055 + }, + { + "epoch": 0.6388185654008439, + "grad_norm": 0.7075965404510498, + "learning_rate": 0.0004410153260282246, + "loss": 1.4916, + "step": 6056 + }, + { + "epoch": 0.6389240506329114, + "grad_norm": 0.7342345118522644, + "learning_rate": 0.00044078657955874245, + "loss": 1.5214, + "step": 6057 + }, + { + "epoch": 0.6390295358649789, + "grad_norm": 0.6869998574256897, + "learning_rate": 0.0004405578677381661, + "loss": 1.5201, + "step": 6058 + }, + { + "epoch": 0.6391350210970465, + "grad_norm": 0.6966910362243652, + "learning_rate": 0.0004403291905921233, + "loss": 1.5236, + "step": 6059 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.6268489956855774, + "learning_rate": 0.00044010054814623925, + "loss": 1.5303, + "step": 6060 + }, + { + "epoch": 0.6393459915611814, + "grad_norm": 0.8214757442474365, + "learning_rate": 0.00043987194042613393, + "loss": 1.5112, + "step": 6061 + }, + { + "epoch": 0.639451476793249, + "grad_norm": 0.5713306665420532, + "learning_rate": 0.0004396433674574242, + "loss": 1.491, + "step": 6062 + }, + { + "epoch": 0.6395569620253164, + "grad_norm": 0.7061427235603333, + "learning_rate": 0.00043941482926572277, + "loss": 1.4913, + "step": 6063 + }, + { + "epoch": 0.639662447257384, + "grad_norm": 0.5943806171417236, + "learning_rate": 0.0004391863258766384, + "loss": 1.5613, + "step": 6064 + }, + { + "epoch": 0.6397679324894515, + "grad_norm": 0.7006478905677795, + "learning_rate": 0.00043895785731577606, + "loss": 1.4886, + "step": 6065 + }, + { + "epoch": 0.639873417721519, + "grad_norm": 0.6123795509338379, + "learning_rate": 0.0004387294236087368, + "loss": 1.483, + "step": 6066 + }, + { + "epoch": 0.6399789029535865, + "grad_norm": 0.6804739236831665, + "learning_rate": 0.00043850102478111764, + "loss": 1.4928, + "step": 6067 + }, + { + "epoch": 0.640084388185654, + "grad_norm": 0.6227420568466187, + "learning_rate": 0.00043827266085851203, + "loss": 1.4947, + "step": 6068 + }, + { + "epoch": 0.6401898734177215, + "grad_norm": 0.6695016622543335, + "learning_rate": 0.00043804433186650916, + "loss": 1.5556, + "step": 6069 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.6826655268669128, + "learning_rate": 0.0004378160378306944, + "loss": 1.4955, + "step": 6070 + }, + { + "epoch": 0.6404008438818566, + "grad_norm": 0.6392858624458313, + "learning_rate": 0.0004375877787766495, + "loss": 1.5052, + "step": 6071 + }, + { + "epoch": 0.640506329113924, + "grad_norm": 0.5620483756065369, + "learning_rate": 0.0004373595547299517, + "loss": 1.5028, + "step": 6072 + }, + { + "epoch": 0.6406118143459916, + "grad_norm": 0.6092487573623657, + "learning_rate": 0.00043713136571617474, + "loss": 1.5429, + "step": 6073 + }, + { + "epoch": 0.6407172995780591, + "grad_norm": 0.6247598528862, + "learning_rate": 0.00043690321176088843, + "loss": 1.5359, + "step": 6074 + }, + { + "epoch": 0.6408227848101266, + "grad_norm": 0.6008337736129761, + "learning_rate": 0.00043667509288965845, + "loss": 1.4784, + "step": 6075 + }, + { + "epoch": 0.6409282700421941, + "grad_norm": 0.6983892321586609, + "learning_rate": 0.0004364470091280463, + "loss": 1.4814, + "step": 6076 + }, + { + "epoch": 0.6410337552742617, + "grad_norm": 0.5967481732368469, + "learning_rate": 0.0004362189605016107, + "loss": 1.494, + "step": 6077 + }, + { + "epoch": 0.6411392405063291, + "grad_norm": 0.6958829760551453, + "learning_rate": 0.00043599094703590524, + "loss": 1.5017, + "step": 6078 + }, + { + "epoch": 0.6412447257383966, + "grad_norm": 0.6482260823249817, + "learning_rate": 0.00043576296875647984, + "loss": 1.5295, + "step": 6079 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.6425276398658752, + "learning_rate": 0.00043553502568888095, + "loss": 1.4756, + "step": 6080 + }, + { + "epoch": 0.6414556962025316, + "grad_norm": 0.6643887758255005, + "learning_rate": 0.00043530711785865026, + "loss": 1.4919, + "step": 6081 + }, + { + "epoch": 0.6415611814345992, + "grad_norm": 0.6536039710044861, + "learning_rate": 0.00043507924529132637, + "loss": 1.4711, + "step": 6082 + }, + { + "epoch": 0.6416666666666667, + "grad_norm": 0.7165623903274536, + "learning_rate": 0.0004348514080124432, + "loss": 1.5493, + "step": 6083 + }, + { + "epoch": 0.6417721518987342, + "grad_norm": 0.6130931377410889, + "learning_rate": 0.0004346236060475314, + "loss": 1.4851, + "step": 6084 + }, + { + "epoch": 0.6418776371308017, + "grad_norm": 0.5928041338920593, + "learning_rate": 0.00043439583942211674, + "loss": 1.487, + "step": 6085 + }, + { + "epoch": 0.6419831223628693, + "grad_norm": 0.6979196667671204, + "learning_rate": 0.00043416810816172244, + "loss": 1.5274, + "step": 6086 + }, + { + "epoch": 0.6420886075949367, + "grad_norm": 0.7750183939933777, + "learning_rate": 0.0004339404122918664, + "loss": 1.524, + "step": 6087 + }, + { + "epoch": 0.6421940928270042, + "grad_norm": 0.6785627007484436, + "learning_rate": 0.0004337127518380632, + "loss": 1.4823, + "step": 6088 + }, + { + "epoch": 0.6422995780590718, + "grad_norm": 0.730109691619873, + "learning_rate": 0.0004334851268258234, + "loss": 1.4953, + "step": 6089 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.767586886882782, + "learning_rate": 0.0004332575372806534, + "loss": 1.5116, + "step": 6090 + }, + { + "epoch": 0.6425105485232068, + "grad_norm": 0.6475664377212524, + "learning_rate": 0.00043302998322805564, + "loss": 1.5347, + "step": 6091 + }, + { + "epoch": 0.6426160337552742, + "grad_norm": 0.8072822093963623, + "learning_rate": 0.0004328024646935289, + "loss": 1.4913, + "step": 6092 + }, + { + "epoch": 0.6427215189873418, + "grad_norm": 0.6339500546455383, + "learning_rate": 0.00043257498170256735, + "loss": 1.5101, + "step": 6093 + }, + { + "epoch": 0.6428270042194093, + "grad_norm": 0.6419986486434937, + "learning_rate": 0.0004323475342806622, + "loss": 1.4949, + "step": 6094 + }, + { + "epoch": 0.6429324894514767, + "grad_norm": 0.6087484359741211, + "learning_rate": 0.00043212012245329986, + "loss": 1.4597, + "step": 6095 + }, + { + "epoch": 0.6430379746835443, + "grad_norm": 0.5898579955101013, + "learning_rate": 0.0004318927462459629, + "loss": 1.4932, + "step": 6096 + }, + { + "epoch": 0.6431434599156118, + "grad_norm": 0.6320613026618958, + "learning_rate": 0.0004316654056841299, + "loss": 1.4839, + "step": 6097 + }, + { + "epoch": 0.6432489451476793, + "grad_norm": 0.634239673614502, + "learning_rate": 0.0004314381007932756, + "loss": 1.5289, + "step": 6098 + }, + { + "epoch": 0.6433544303797468, + "grad_norm": 0.6616015434265137, + "learning_rate": 0.00043121083159887056, + "loss": 1.4752, + "step": 6099 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.5894590616226196, + "learning_rate": 0.00043098359812638145, + "loss": 1.5018, + "step": 6100 + }, + { + "epoch": 0.6435654008438818, + "grad_norm": 0.6400855183601379, + "learning_rate": 0.000430756400401271, + "loss": 1.5359, + "step": 6101 + }, + { + "epoch": 0.6436708860759494, + "grad_norm": 0.7013359665870667, + "learning_rate": 0.00043052923844899733, + "loss": 1.476, + "step": 6102 + }, + { + "epoch": 0.6437763713080169, + "grad_norm": 0.5811768770217896, + "learning_rate": 0.000430302112295016, + "loss": 1.4841, + "step": 6103 + }, + { + "epoch": 0.6438818565400843, + "grad_norm": 0.6338202953338623, + "learning_rate": 0.00043007502196477703, + "loss": 1.515, + "step": 6104 + }, + { + "epoch": 0.6439873417721519, + "grad_norm": 0.6110925078392029, + "learning_rate": 0.00042984796748372716, + "loss": 1.5303, + "step": 6105 + }, + { + "epoch": 0.6440928270042194, + "grad_norm": 0.654352605342865, + "learning_rate": 0.000429620948877309, + "loss": 1.5043, + "step": 6106 + }, + { + "epoch": 0.6441983122362869, + "grad_norm": 0.6419139504432678, + "learning_rate": 0.000429393966170961, + "loss": 1.5409, + "step": 6107 + }, + { + "epoch": 0.6443037974683544, + "grad_norm": 0.6167211532592773, + "learning_rate": 0.00042916701939011787, + "loss": 1.4955, + "step": 6108 + }, + { + "epoch": 0.644409282700422, + "grad_norm": 0.7181944847106934, + "learning_rate": 0.00042894010856020997, + "loss": 1.5248, + "step": 6109 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.6569611430168152, + "learning_rate": 0.00042871323370666383, + "loss": 1.5013, + "step": 6110 + }, + { + "epoch": 0.644620253164557, + "grad_norm": 0.7718122005462646, + "learning_rate": 0.00042848639485490165, + "loss": 1.5083, + "step": 6111 + }, + { + "epoch": 0.6447257383966245, + "grad_norm": 0.5956722497940063, + "learning_rate": 0.0004282595920303425, + "loss": 1.4779, + "step": 6112 + }, + { + "epoch": 0.6448312236286919, + "grad_norm": 0.6371042728424072, + "learning_rate": 0.00042803282525840036, + "loss": 1.5258, + "step": 6113 + }, + { + "epoch": 0.6449367088607595, + "grad_norm": 0.6416904330253601, + "learning_rate": 0.0004278060945644856, + "loss": 1.4898, + "step": 6114 + }, + { + "epoch": 0.645042194092827, + "grad_norm": 0.5893789529800415, + "learning_rate": 0.0004275793999740046, + "loss": 1.5113, + "step": 6115 + }, + { + "epoch": 0.6451476793248945, + "grad_norm": 0.7949625849723816, + "learning_rate": 0.00042735274151235953, + "loss": 1.5331, + "step": 6116 + }, + { + "epoch": 0.645253164556962, + "grad_norm": 0.5861703753471375, + "learning_rate": 0.00042712611920494865, + "loss": 1.4892, + "step": 6117 + }, + { + "epoch": 0.6453586497890296, + "grad_norm": 0.6687540411949158, + "learning_rate": 0.0004268995330771661, + "loss": 1.5306, + "step": 6118 + }, + { + "epoch": 0.645464135021097, + "grad_norm": 0.6034708023071289, + "learning_rate": 0.0004266729831544017, + "loss": 1.4908, + "step": 6119 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.5962756872177124, + "learning_rate": 0.0004264464694620421, + "loss": 1.4662, + "step": 6120 + }, + { + "epoch": 0.6456751054852321, + "grad_norm": 0.6846450567245483, + "learning_rate": 0.00042621999202546897, + "loss": 1.4941, + "step": 6121 + }, + { + "epoch": 0.6457805907172995, + "grad_norm": 0.6622780561447144, + "learning_rate": 0.0004259935508700603, + "loss": 1.5237, + "step": 6122 + }, + { + "epoch": 0.6458860759493671, + "grad_norm": 0.6049894094467163, + "learning_rate": 0.0004257671460211898, + "loss": 1.499, + "step": 6123 + }, + { + "epoch": 0.6459915611814346, + "grad_norm": 0.5964819192886353, + "learning_rate": 0.00042554077750422736, + "loss": 1.5042, + "step": 6124 + }, + { + "epoch": 0.6460970464135021, + "grad_norm": 0.6304987072944641, + "learning_rate": 0.00042531444534453885, + "loss": 1.497, + "step": 6125 + }, + { + "epoch": 0.6462025316455696, + "grad_norm": 0.57814621925354, + "learning_rate": 0.0004250881495674855, + "loss": 1.4952, + "step": 6126 + }, + { + "epoch": 0.6463080168776372, + "grad_norm": 0.6066054701805115, + "learning_rate": 0.00042486189019842535, + "loss": 1.4868, + "step": 6127 + }, + { + "epoch": 0.6464135021097046, + "grad_norm": 0.6656683683395386, + "learning_rate": 0.00042463566726271137, + "loss": 1.4841, + "step": 6128 + }, + { + "epoch": 0.6465189873417722, + "grad_norm": 0.5772959589958191, + "learning_rate": 0.0004244094807856936, + "loss": 1.5, + "step": 6129 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.6332406401634216, + "learning_rate": 0.000424183330792717, + "loss": 1.5378, + "step": 6130 + }, + { + "epoch": 0.6467299578059071, + "grad_norm": 0.7195770740509033, + "learning_rate": 0.0004239572173091229, + "loss": 1.5373, + "step": 6131 + }, + { + "epoch": 0.6468354430379747, + "grad_norm": 0.6694002747535706, + "learning_rate": 0.0004237311403602484, + "loss": 1.5225, + "step": 6132 + }, + { + "epoch": 0.6469409282700422, + "grad_norm": 0.676016092300415, + "learning_rate": 0.0004235050999714265, + "loss": 1.4936, + "step": 6133 + }, + { + "epoch": 0.6470464135021097, + "grad_norm": 0.6294495463371277, + "learning_rate": 0.00042327909616798616, + "loss": 1.5127, + "step": 6134 + }, + { + "epoch": 0.6471518987341772, + "grad_norm": 0.6478715538978577, + "learning_rate": 0.0004230531289752523, + "loss": 1.4768, + "step": 6135 + }, + { + "epoch": 0.6472573839662448, + "grad_norm": 0.6724597215652466, + "learning_rate": 0.00042282719841854567, + "loss": 1.5311, + "step": 6136 + }, + { + "epoch": 0.6473628691983122, + "grad_norm": 0.6291456818580627, + "learning_rate": 0.0004226013045231826, + "loss": 1.5059, + "step": 6137 + }, + { + "epoch": 0.6474683544303798, + "grad_norm": 0.692042350769043, + "learning_rate": 0.00042237544731447616, + "loss": 1.5102, + "step": 6138 + }, + { + "epoch": 0.6475738396624473, + "grad_norm": 0.6423015594482422, + "learning_rate": 0.00042214962681773457, + "loss": 1.5131, + "step": 6139 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.6210944652557373, + "learning_rate": 0.0004219238430582621, + "loss": 1.4332, + "step": 6140 + }, + { + "epoch": 0.6477848101265823, + "grad_norm": 0.7221693992614746, + "learning_rate": 0.00042169809606135893, + "loss": 1.5107, + "step": 6141 + }, + { + "epoch": 0.6478902953586498, + "grad_norm": 0.6293470859527588, + "learning_rate": 0.0004214723858523212, + "loss": 1.4809, + "step": 6142 + }, + { + "epoch": 0.6479957805907173, + "grad_norm": 0.7988070249557495, + "learning_rate": 0.00042124671245644086, + "loss": 1.479, + "step": 6143 + }, + { + "epoch": 0.6481012658227848, + "grad_norm": 0.612893283367157, + "learning_rate": 0.0004210210758990056, + "loss": 1.5158, + "step": 6144 + }, + { + "epoch": 0.6482067510548524, + "grad_norm": 0.7013056874275208, + "learning_rate": 0.00042079547620529927, + "loss": 1.5343, + "step": 6145 + }, + { + "epoch": 0.6483122362869198, + "grad_norm": 0.6105437874794006, + "learning_rate": 0.0004205699134006011, + "loss": 1.515, + "step": 6146 + }, + { + "epoch": 0.6484177215189874, + "grad_norm": 0.6308321356773376, + "learning_rate": 0.0004203443875101871, + "loss": 1.5196, + "step": 6147 + }, + { + "epoch": 0.6485232067510549, + "grad_norm": 0.5765184164047241, + "learning_rate": 0.0004201188985593283, + "loss": 1.4894, + "step": 6148 + }, + { + "epoch": 0.6486286919831223, + "grad_norm": 0.5567965507507324, + "learning_rate": 0.00041989344657329187, + "loss": 1.5103, + "step": 6149 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.6783146262168884, + "learning_rate": 0.0004196680315773408, + "loss": 1.5424, + "step": 6150 + }, + { + "epoch": 0.6488396624472574, + "grad_norm": 0.6182146072387695, + "learning_rate": 0.0004194426535967339, + "loss": 1.5099, + "step": 6151 + }, + { + "epoch": 0.6489451476793249, + "grad_norm": 0.5574497580528259, + "learning_rate": 0.00041921731265672613, + "loss": 1.5287, + "step": 6152 + }, + { + "epoch": 0.6490506329113924, + "grad_norm": 0.6648773550987244, + "learning_rate": 0.0004189920087825678, + "loss": 1.4879, + "step": 6153 + }, + { + "epoch": 0.64915611814346, + "grad_norm": 0.5849407911300659, + "learning_rate": 0.00041876674199950545, + "loss": 1.5, + "step": 6154 + }, + { + "epoch": 0.6492616033755274, + "grad_norm": 0.6495324969291687, + "learning_rate": 0.0004185415123327813, + "loss": 1.5047, + "step": 6155 + }, + { + "epoch": 0.649367088607595, + "grad_norm": 0.5857902765274048, + "learning_rate": 0.00041831631980763324, + "loss": 1.4818, + "step": 6156 + }, + { + "epoch": 0.6494725738396624, + "grad_norm": 0.6486345529556274, + "learning_rate": 0.00041809116444929586, + "loss": 1.5078, + "step": 6157 + }, + { + "epoch": 0.6495780590717299, + "grad_norm": 0.6032643914222717, + "learning_rate": 0.00041786604628299846, + "loss": 1.5386, + "step": 6158 + }, + { + "epoch": 0.6496835443037975, + "grad_norm": 0.6385478973388672, + "learning_rate": 0.00041764096533396667, + "loss": 1.4742, + "step": 6159 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.6176390051841736, + "learning_rate": 0.00041741592162742214, + "loss": 1.5298, + "step": 6160 + }, + { + "epoch": 0.6498945147679325, + "grad_norm": 0.6494815945625305, + "learning_rate": 0.0004171909151885819, + "loss": 1.5, + "step": 6161 + }, + { + "epoch": 0.65, + "grad_norm": 0.6044570207595825, + "learning_rate": 0.0004169659460426592, + "loss": 1.4927, + "step": 6162 + }, + { + "epoch": 0.6501054852320675, + "grad_norm": 0.7970790863037109, + "learning_rate": 0.00041674101421486294, + "loss": 1.436, + "step": 6163 + }, + { + "epoch": 0.650210970464135, + "grad_norm": 0.622045636177063, + "learning_rate": 0.00041651611973039776, + "loss": 1.4641, + "step": 6164 + }, + { + "epoch": 0.6503164556962026, + "grad_norm": 0.7551796436309814, + "learning_rate": 0.0004162912626144642, + "loss": 1.4872, + "step": 6165 + }, + { + "epoch": 0.65042194092827, + "grad_norm": 0.6701598167419434, + "learning_rate": 0.0004160664428922586, + "loss": 1.4947, + "step": 6166 + }, + { + "epoch": 0.6505274261603375, + "grad_norm": 0.6068963408470154, + "learning_rate": 0.00041584166058897324, + "loss": 1.4762, + "step": 6167 + }, + { + "epoch": 0.6506329113924051, + "grad_norm": 0.6582932472229004, + "learning_rate": 0.00041561691572979624, + "loss": 1.4682, + "step": 6168 + }, + { + "epoch": 0.6507383966244725, + "grad_norm": 0.6649191975593567, + "learning_rate": 0.00041539220833991124, + "loss": 1.5267, + "step": 6169 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.5782134532928467, + "learning_rate": 0.0004151675384444978, + "loss": 1.4797, + "step": 6170 + }, + { + "epoch": 0.6509493670886076, + "grad_norm": 0.6435695290565491, + "learning_rate": 0.0004149429060687312, + "loss": 1.4999, + "step": 6171 + }, + { + "epoch": 0.6510548523206751, + "grad_norm": 0.6387908458709717, + "learning_rate": 0.00041471831123778284, + "loss": 1.481, + "step": 6172 + }, + { + "epoch": 0.6511603375527426, + "grad_norm": 0.6533412337303162, + "learning_rate": 0.0004144937539768195, + "loss": 1.5131, + "step": 6173 + }, + { + "epoch": 0.6512658227848102, + "grad_norm": 0.6688793301582336, + "learning_rate": 0.00041426923431100396, + "loss": 1.5348, + "step": 6174 + }, + { + "epoch": 0.6513713080168776, + "grad_norm": 0.5945640802383423, + "learning_rate": 0.0004140447522654946, + "loss": 1.4773, + "step": 6175 + }, + { + "epoch": 0.6514767932489451, + "grad_norm": 0.7134362459182739, + "learning_rate": 0.0004138203078654463, + "loss": 1.5113, + "step": 6176 + }, + { + "epoch": 0.6515822784810127, + "grad_norm": 0.5879694223403931, + "learning_rate": 0.0004135959011360088, + "loss": 1.5079, + "step": 6177 + }, + { + "epoch": 0.6516877637130801, + "grad_norm": 0.6361109614372253, + "learning_rate": 0.000413371532102328, + "loss": 1.5075, + "step": 6178 + }, + { + "epoch": 0.6517932489451477, + "grad_norm": 0.6730113625526428, + "learning_rate": 0.0004131472007895457, + "loss": 1.5266, + "step": 6179 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.6245576739311218, + "learning_rate": 0.00041292290722279914, + "loss": 1.5044, + "step": 6180 + }, + { + "epoch": 0.6520042194092827, + "grad_norm": 0.8661606907844543, + "learning_rate": 0.00041269865142722176, + "loss": 1.5302, + "step": 6181 + }, + { + "epoch": 0.6521097046413502, + "grad_norm": 0.7596228122711182, + "learning_rate": 0.0004124744334279424, + "loss": 1.5236, + "step": 6182 + }, + { + "epoch": 0.6522151898734178, + "grad_norm": 1.0312148332595825, + "learning_rate": 0.0004122502532500858, + "loss": 1.501, + "step": 6183 + }, + { + "epoch": 0.6523206751054852, + "grad_norm": 0.7346852421760559, + "learning_rate": 0.0004120261109187724, + "loss": 1.5132, + "step": 6184 + }, + { + "epoch": 0.6524261603375527, + "grad_norm": 0.8640813231468201, + "learning_rate": 0.0004118020064591184, + "loss": 1.5042, + "step": 6185 + }, + { + "epoch": 0.6525316455696203, + "grad_norm": 0.6332606673240662, + "learning_rate": 0.00041157793989623625, + "loss": 1.5217, + "step": 6186 + }, + { + "epoch": 0.6526371308016877, + "grad_norm": 0.7473130822181702, + "learning_rate": 0.0004113539112552334, + "loss": 1.4933, + "step": 6187 + }, + { + "epoch": 0.6527426160337553, + "grad_norm": 0.7674705386161804, + "learning_rate": 0.0004111299205612135, + "loss": 1.4979, + "step": 6188 + }, + { + "epoch": 0.6528481012658228, + "grad_norm": 0.8318583965301514, + "learning_rate": 0.00041090596783927583, + "loss": 1.4843, + "step": 6189 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.8612572550773621, + "learning_rate": 0.00041068205311451517, + "loss": 1.5341, + "step": 6190 + }, + { + "epoch": 0.6530590717299578, + "grad_norm": 0.6653329133987427, + "learning_rate": 0.00041045817641202257, + "loss": 1.5324, + "step": 6191 + }, + { + "epoch": 0.6531645569620254, + "grad_norm": 0.786596417427063, + "learning_rate": 0.00041023433775688435, + "loss": 1.4731, + "step": 6192 + }, + { + "epoch": 0.6532700421940928, + "grad_norm": 0.7019310593605042, + "learning_rate": 0.00041001053717418283, + "loss": 1.5108, + "step": 6193 + }, + { + "epoch": 0.6533755274261603, + "grad_norm": 0.9746342301368713, + "learning_rate": 0.000409786774688996, + "loss": 1.4739, + "step": 6194 + }, + { + "epoch": 0.6534810126582279, + "grad_norm": 0.6289726495742798, + "learning_rate": 0.00040956305032639723, + "loss": 1.5058, + "step": 6195 + }, + { + "epoch": 0.6535864978902953, + "grad_norm": 0.7402397990226746, + "learning_rate": 0.0004093393641114565, + "loss": 1.4841, + "step": 6196 + }, + { + "epoch": 0.6536919831223629, + "grad_norm": 0.8559106588363647, + "learning_rate": 0.00040911571606923867, + "loss": 1.4917, + "step": 6197 + }, + { + "epoch": 0.6537974683544304, + "grad_norm": 0.6772670149803162, + "learning_rate": 0.00040889210622480467, + "loss": 1.5177, + "step": 6198 + }, + { + "epoch": 0.6539029535864979, + "grad_norm": 0.7629697322845459, + "learning_rate": 0.0004086685346032111, + "loss": 1.5378, + "step": 6199 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.7529082298278809, + "learning_rate": 0.00040844500122951026, + "loss": 1.507, + "step": 6200 + }, + { + "epoch": 0.654113924050633, + "grad_norm": 0.723372757434845, + "learning_rate": 0.0004082215061287502, + "loss": 1.4817, + "step": 6201 + }, + { + "epoch": 0.6542194092827004, + "grad_norm": 0.6476132869720459, + "learning_rate": 0.00040799804932597464, + "loss": 1.4927, + "step": 6202 + }, + { + "epoch": 0.6543248945147679, + "grad_norm": 0.8154486417770386, + "learning_rate": 0.00040777463084622304, + "loss": 1.4925, + "step": 6203 + }, + { + "epoch": 0.6544303797468355, + "grad_norm": 0.6240777373313904, + "learning_rate": 0.00040755125071453055, + "loss": 1.5056, + "step": 6204 + }, + { + "epoch": 0.6545358649789029, + "grad_norm": 0.8812093734741211, + "learning_rate": 0.00040732790895592764, + "loss": 1.5046, + "step": 6205 + }, + { + "epoch": 0.6546413502109705, + "grad_norm": 0.6356196999549866, + "learning_rate": 0.00040710460559544167, + "loss": 1.5321, + "step": 6206 + }, + { + "epoch": 0.654746835443038, + "grad_norm": 0.7966371774673462, + "learning_rate": 0.0004068813406580944, + "loss": 1.5242, + "step": 6207 + }, + { + "epoch": 0.6548523206751055, + "grad_norm": 0.7291023135185242, + "learning_rate": 0.0004066581141689038, + "loss": 1.5021, + "step": 6208 + }, + { + "epoch": 0.654957805907173, + "grad_norm": 0.7741405367851257, + "learning_rate": 0.00040643492615288367, + "loss": 1.5024, + "step": 6209 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.7302103042602539, + "learning_rate": 0.00040621177663504313, + "loss": 1.4897, + "step": 6210 + }, + { + "epoch": 0.655168776371308, + "grad_norm": 0.6469787359237671, + "learning_rate": 0.0004059886656403874, + "loss": 1.5102, + "step": 6211 + }, + { + "epoch": 0.6552742616033755, + "grad_norm": 0.772252082824707, + "learning_rate": 0.00040576559319391704, + "loss": 1.498, + "step": 6212 + }, + { + "epoch": 0.6553797468354431, + "grad_norm": 0.6211626529693604, + "learning_rate": 0.0004055425593206285, + "loss": 1.5172, + "step": 6213 + }, + { + "epoch": 0.6554852320675105, + "grad_norm": 0.6925276517868042, + "learning_rate": 0.0004053195640455137, + "loss": 1.4788, + "step": 6214 + }, + { + "epoch": 0.6555907172995781, + "grad_norm": 0.611145555973053, + "learning_rate": 0.0004050966073935602, + "loss": 1.5166, + "step": 6215 + }, + { + "epoch": 0.6556962025316456, + "grad_norm": 0.6210170984268188, + "learning_rate": 0.00040487368938975214, + "loss": 1.5237, + "step": 6216 + }, + { + "epoch": 0.6558016877637131, + "grad_norm": 0.5908073782920837, + "learning_rate": 0.00040465081005906805, + "loss": 1.5089, + "step": 6217 + }, + { + "epoch": 0.6559071729957806, + "grad_norm": 0.6536435484886169, + "learning_rate": 0.00040442796942648273, + "loss": 1.5221, + "step": 6218 + }, + { + "epoch": 0.6560126582278482, + "grad_norm": 0.6688380837440491, + "learning_rate": 0.00040420516751696664, + "loss": 1.4941, + "step": 6219 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.5909548401832581, + "learning_rate": 0.00040398240435548583, + "loss": 1.4737, + "step": 6220 + }, + { + "epoch": 0.6562236286919831, + "grad_norm": 0.687706708908081, + "learning_rate": 0.000403759679967002, + "loss": 1.5078, + "step": 6221 + }, + { + "epoch": 0.6563291139240506, + "grad_norm": 0.6309139728546143, + "learning_rate": 0.00040353699437647257, + "loss": 1.4665, + "step": 6222 + }, + { + "epoch": 0.6564345991561181, + "grad_norm": 0.5963916778564453, + "learning_rate": 0.0004033143476088504, + "loss": 1.4938, + "step": 6223 + }, + { + "epoch": 0.6565400843881857, + "grad_norm": 0.592250645160675, + "learning_rate": 0.00040309173968908413, + "loss": 1.4897, + "step": 6224 + }, + { + "epoch": 0.6566455696202531, + "grad_norm": 0.6004682779312134, + "learning_rate": 0.0004028691706421185, + "loss": 1.4894, + "step": 6225 + }, + { + "epoch": 0.6567510548523207, + "grad_norm": 0.5838412642478943, + "learning_rate": 0.00040264664049289336, + "loss": 1.5124, + "step": 6226 + }, + { + "epoch": 0.6568565400843882, + "grad_norm": 0.6042191386222839, + "learning_rate": 0.00040242414926634415, + "loss": 1.4959, + "step": 6227 + }, + { + "epoch": 0.6569620253164556, + "grad_norm": 0.6845608949661255, + "learning_rate": 0.0004022016969874023, + "loss": 1.4956, + "step": 6228 + }, + { + "epoch": 0.6570675105485232, + "grad_norm": 0.6049282550811768, + "learning_rate": 0.00040197928368099445, + "loss": 1.494, + "step": 6229 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.6535252332687378, + "learning_rate": 0.00040175690937204324, + "loss": 1.5102, + "step": 6230 + }, + { + "epoch": 0.6572784810126582, + "grad_norm": 0.6544978022575378, + "learning_rate": 0.0004015345740854668, + "loss": 1.4953, + "step": 6231 + }, + { + "epoch": 0.6573839662447257, + "grad_norm": 0.6048658490180969, + "learning_rate": 0.00040131227784617876, + "loss": 1.5091, + "step": 6232 + }, + { + "epoch": 0.6574894514767933, + "grad_norm": 0.6095743179321289, + "learning_rate": 0.000401090020679089, + "loss": 1.5322, + "step": 6233 + }, + { + "epoch": 0.6575949367088607, + "grad_norm": 0.6234181523323059, + "learning_rate": 0.00040086780260910213, + "loss": 1.5165, + "step": 6234 + }, + { + "epoch": 0.6577004219409283, + "grad_norm": 0.6100748777389526, + "learning_rate": 0.000400645623661119, + "loss": 1.5126, + "step": 6235 + }, + { + "epoch": 0.6578059071729958, + "grad_norm": 0.7727545499801636, + "learning_rate": 0.0004004234838600357, + "loss": 1.4939, + "step": 6236 + }, + { + "epoch": 0.6579113924050632, + "grad_norm": 0.6408064961433411, + "learning_rate": 0.00040020138323074427, + "loss": 1.4809, + "step": 6237 + }, + { + "epoch": 0.6580168776371308, + "grad_norm": 0.7268309593200684, + "learning_rate": 0.00039997932179813205, + "loss": 1.5016, + "step": 6238 + }, + { + "epoch": 0.6581223628691983, + "grad_norm": 0.6340768933296204, + "learning_rate": 0.00039975729958708223, + "loss": 1.4781, + "step": 6239 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.7805841565132141, + "learning_rate": 0.00039953531662247343, + "loss": 1.4908, + "step": 6240 + }, + { + "epoch": 0.6583333333333333, + "grad_norm": 0.6277621388435364, + "learning_rate": 0.00039931337292917966, + "loss": 1.4855, + "step": 6241 + }, + { + "epoch": 0.6584388185654009, + "grad_norm": 0.7166774868965149, + "learning_rate": 0.0003990914685320714, + "loss": 1.4986, + "step": 6242 + }, + { + "epoch": 0.6585443037974683, + "grad_norm": 0.757121205329895, + "learning_rate": 0.00039886960345601394, + "loss": 1.5407, + "step": 6243 + }, + { + "epoch": 0.6586497890295359, + "grad_norm": 0.6343244910240173, + "learning_rate": 0.00039864777772586826, + "loss": 1.4968, + "step": 6244 + }, + { + "epoch": 0.6587552742616034, + "grad_norm": 0.787804365158081, + "learning_rate": 0.00039842599136649117, + "loss": 1.4978, + "step": 6245 + }, + { + "epoch": 0.6588607594936708, + "grad_norm": 0.6118240356445312, + "learning_rate": 0.00039820424440273474, + "loss": 1.4678, + "step": 6246 + }, + { + "epoch": 0.6589662447257384, + "grad_norm": 0.8424069285392761, + "learning_rate": 0.000397982536859447, + "loss": 1.5179, + "step": 6247 + }, + { + "epoch": 0.6590717299578059, + "grad_norm": 0.6410022974014282, + "learning_rate": 0.00039776086876147133, + "loss": 1.4848, + "step": 6248 + }, + { + "epoch": 0.6591772151898734, + "grad_norm": 0.7189099788665771, + "learning_rate": 0.0003975392401336468, + "loss": 1.5428, + "step": 6249 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.8297396302223206, + "learning_rate": 0.0003973176510008075, + "loss": 1.5399, + "step": 6250 + }, + { + "epoch": 0.6593881856540085, + "grad_norm": 0.7112993597984314, + "learning_rate": 0.00039709610138778445, + "loss": 1.4867, + "step": 6251 + }, + { + "epoch": 0.6594936708860759, + "grad_norm": 0.8788849115371704, + "learning_rate": 0.0003968745913194029, + "loss": 1.5134, + "step": 6252 + }, + { + "epoch": 0.6595991561181435, + "grad_norm": 0.8145052790641785, + "learning_rate": 0.0003966531208204842, + "loss": 1.4771, + "step": 6253 + }, + { + "epoch": 0.659704641350211, + "grad_norm": 0.8065302968025208, + "learning_rate": 0.0003964316899158454, + "loss": 1.5029, + "step": 6254 + }, + { + "epoch": 0.6598101265822784, + "grad_norm": 0.7985104322433472, + "learning_rate": 0.00039621029863029874, + "loss": 1.5194, + "step": 6255 + }, + { + "epoch": 0.659915611814346, + "grad_norm": 0.7223377823829651, + "learning_rate": 0.00039598894698865216, + "loss": 1.4966, + "step": 6256 + }, + { + "epoch": 0.6600210970464135, + "grad_norm": 0.7545782923698425, + "learning_rate": 0.00039576763501570944, + "loss": 1.4581, + "step": 6257 + }, + { + "epoch": 0.660126582278481, + "grad_norm": 0.6775133609771729, + "learning_rate": 0.0003955463627362694, + "loss": 1.5078, + "step": 6258 + }, + { + "epoch": 0.6602320675105485, + "grad_norm": 0.7624792456626892, + "learning_rate": 0.00039532513017512694, + "loss": 1.5125, + "step": 6259 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.6552090048789978, + "learning_rate": 0.00039510393735707233, + "loss": 1.5138, + "step": 6260 + }, + { + "epoch": 0.6604430379746835, + "grad_norm": 0.7168278694152832, + "learning_rate": 0.00039488278430689123, + "loss": 1.512, + "step": 6261 + }, + { + "epoch": 0.6605485232067511, + "grad_norm": 0.6383700966835022, + "learning_rate": 0.0003946616710493649, + "loss": 1.518, + "step": 6262 + }, + { + "epoch": 0.6606540084388186, + "grad_norm": 0.7111101746559143, + "learning_rate": 0.0003944405976092702, + "loss": 1.4695, + "step": 6263 + }, + { + "epoch": 0.660759493670886, + "grad_norm": 0.6483263373374939, + "learning_rate": 0.0003942195640113795, + "loss": 1.5502, + "step": 6264 + }, + { + "epoch": 0.6608649789029536, + "grad_norm": 0.6813762784004211, + "learning_rate": 0.00039399857028046066, + "loss": 1.502, + "step": 6265 + }, + { + "epoch": 0.6609704641350211, + "grad_norm": 0.7038830518722534, + "learning_rate": 0.0003937776164412773, + "loss": 1.5099, + "step": 6266 + }, + { + "epoch": 0.6610759493670886, + "grad_norm": 0.6813628077507019, + "learning_rate": 0.00039355670251858805, + "loss": 1.538, + "step": 6267 + }, + { + "epoch": 0.6611814345991561, + "grad_norm": 0.7313941121101379, + "learning_rate": 0.00039333582853714793, + "loss": 1.4856, + "step": 6268 + }, + { + "epoch": 0.6612869198312237, + "grad_norm": 0.6287786960601807, + "learning_rate": 0.00039311499452170665, + "loss": 1.5295, + "step": 6269 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.6888428330421448, + "learning_rate": 0.00039289420049700986, + "loss": 1.474, + "step": 6270 + }, + { + "epoch": 0.6614978902953587, + "grad_norm": 0.6905158758163452, + "learning_rate": 0.0003926734464877986, + "loss": 1.4879, + "step": 6271 + }, + { + "epoch": 0.6616033755274262, + "grad_norm": 0.6145912408828735, + "learning_rate": 0.0003924527325188095, + "loss": 1.5056, + "step": 6272 + }, + { + "epoch": 0.6617088607594936, + "grad_norm": 0.6891868114471436, + "learning_rate": 0.00039223205861477455, + "loss": 1.5465, + "step": 6273 + }, + { + "epoch": 0.6618143459915612, + "grad_norm": 0.662395179271698, + "learning_rate": 0.00039201142480042145, + "loss": 1.4947, + "step": 6274 + }, + { + "epoch": 0.6619198312236287, + "grad_norm": 0.5985618829727173, + "learning_rate": 0.0003917908311004732, + "loss": 1.5067, + "step": 6275 + }, + { + "epoch": 0.6620253164556962, + "grad_norm": 0.7157207131385803, + "learning_rate": 0.0003915702775396483, + "loss": 1.5002, + "step": 6276 + }, + { + "epoch": 0.6621308016877637, + "grad_norm": 0.6688688397407532, + "learning_rate": 0.0003913497641426614, + "loss": 1.4829, + "step": 6277 + }, + { + "epoch": 0.6622362869198313, + "grad_norm": 0.5941034555435181, + "learning_rate": 0.00039112929093422185, + "loss": 1.526, + "step": 6278 + }, + { + "epoch": 0.6623417721518987, + "grad_norm": 0.8184394240379333, + "learning_rate": 0.0003909088579390347, + "loss": 1.5048, + "step": 6279 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.5989071726799011, + "learning_rate": 0.0003906884651818006, + "loss": 1.5017, + "step": 6280 + }, + { + "epoch": 0.6625527426160338, + "grad_norm": 0.7597311735153198, + "learning_rate": 0.0003904681126872157, + "loss": 1.4536, + "step": 6281 + }, + { + "epoch": 0.6626582278481012, + "grad_norm": 0.5854051113128662, + "learning_rate": 0.00039024780047997157, + "loss": 1.4824, + "step": 6282 + }, + { + "epoch": 0.6627637130801688, + "grad_norm": 0.6824061870574951, + "learning_rate": 0.00039002752858475527, + "loss": 1.4798, + "step": 6283 + }, + { + "epoch": 0.6628691983122363, + "grad_norm": 0.6650001406669617, + "learning_rate": 0.00038980729702624896, + "loss": 1.5428, + "step": 6284 + }, + { + "epoch": 0.6629746835443038, + "grad_norm": 0.7067443132400513, + "learning_rate": 0.00038958710582913153, + "loss": 1.5145, + "step": 6285 + }, + { + "epoch": 0.6630801687763713, + "grad_norm": 0.7039052248001099, + "learning_rate": 0.0003893669550180761, + "loss": 1.4989, + "step": 6286 + }, + { + "epoch": 0.6631856540084389, + "grad_norm": 0.644406259059906, + "learning_rate": 0.00038914684461775154, + "loss": 1.5211, + "step": 6287 + }, + { + "epoch": 0.6632911392405063, + "grad_norm": 0.6595544815063477, + "learning_rate": 0.0003889267746528225, + "loss": 1.5122, + "step": 6288 + }, + { + "epoch": 0.6633966244725739, + "grad_norm": 0.6298556327819824, + "learning_rate": 0.00038870674514794877, + "loss": 1.5139, + "step": 6289 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.7176554799079895, + "learning_rate": 0.00038848675612778577, + "loss": 1.4824, + "step": 6290 + }, + { + "epoch": 0.6636075949367088, + "grad_norm": 0.6068400740623474, + "learning_rate": 0.0003882668076169846, + "loss": 1.463, + "step": 6291 + }, + { + "epoch": 0.6637130801687764, + "grad_norm": 0.7590970993041992, + "learning_rate": 0.0003880468996401912, + "loss": 1.5257, + "step": 6292 + }, + { + "epoch": 0.6638185654008438, + "grad_norm": 0.6501085758209229, + "learning_rate": 0.0003878270322220474, + "loss": 1.4961, + "step": 6293 + }, + { + "epoch": 0.6639240506329114, + "grad_norm": 0.5654624104499817, + "learning_rate": 0.00038760720538719086, + "loss": 1.5126, + "step": 6294 + }, + { + "epoch": 0.6640295358649789, + "grad_norm": 0.7287316918373108, + "learning_rate": 0.0003873874191602539, + "loss": 1.4958, + "step": 6295 + }, + { + "epoch": 0.6641350210970464, + "grad_norm": 0.6320059895515442, + "learning_rate": 0.00038716767356586487, + "loss": 1.504, + "step": 6296 + }, + { + "epoch": 0.6642405063291139, + "grad_norm": 0.6982421278953552, + "learning_rate": 0.00038694796862864724, + "loss": 1.5023, + "step": 6297 + }, + { + "epoch": 0.6643459915611815, + "grad_norm": 0.6108893752098083, + "learning_rate": 0.00038672830437322007, + "loss": 1.5224, + "step": 6298 + }, + { + "epoch": 0.6644514767932489, + "grad_norm": 0.7179501056671143, + "learning_rate": 0.0003865086808241979, + "loss": 1.4754, + "step": 6299 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.5964544415473938, + "learning_rate": 0.00038628909800619046, + "loss": 1.4731, + "step": 6300 + }, + { + "epoch": 0.664662447257384, + "grad_norm": 0.607114315032959, + "learning_rate": 0.00038606955594380326, + "loss": 1.4974, + "step": 6301 + }, + { + "epoch": 0.6647679324894514, + "grad_norm": 0.8169963955879211, + "learning_rate": 0.0003858500546616368, + "loss": 1.4865, + "step": 6302 + }, + { + "epoch": 0.664873417721519, + "grad_norm": 0.6295417547225952, + "learning_rate": 0.0003856305941842878, + "loss": 1.4668, + "step": 6303 + }, + { + "epoch": 0.6649789029535865, + "grad_norm": 0.6955541968345642, + "learning_rate": 0.0003854111745363476, + "loss": 1.4871, + "step": 6304 + }, + { + "epoch": 0.665084388185654, + "grad_norm": 0.7025007009506226, + "learning_rate": 0.00038519179574240324, + "loss": 1.5012, + "step": 6305 + }, + { + "epoch": 0.6651898734177215, + "grad_norm": 0.5994135737419128, + "learning_rate": 0.0003849724578270374, + "loss": 1.4806, + "step": 6306 + }, + { + "epoch": 0.6652953586497891, + "grad_norm": 0.5979182124137878, + "learning_rate": 0.0003847531608148277, + "loss": 1.5365, + "step": 6307 + }, + { + "epoch": 0.6654008438818565, + "grad_norm": 0.6844369769096375, + "learning_rate": 0.0003845339047303477, + "loss": 1.497, + "step": 6308 + }, + { + "epoch": 0.665506329113924, + "grad_norm": 0.6639573574066162, + "learning_rate": 0.0003843146895981661, + "loss": 1.4733, + "step": 6309 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.5995876789093018, + "learning_rate": 0.0003840955154428467, + "loss": 1.5178, + "step": 6310 + }, + { + "epoch": 0.665717299578059, + "grad_norm": 0.6938340663909912, + "learning_rate": 0.0003838763822889495, + "loss": 1.5152, + "step": 6311 + }, + { + "epoch": 0.6658227848101266, + "grad_norm": 0.6245591044425964, + "learning_rate": 0.0003836572901610295, + "loss": 1.4574, + "step": 6312 + }, + { + "epoch": 0.6659282700421941, + "grad_norm": 0.6135497093200684, + "learning_rate": 0.0003834382390836368, + "loss": 1.4971, + "step": 6313 + }, + { + "epoch": 0.6660337552742616, + "grad_norm": 0.6514356732368469, + "learning_rate": 0.00038321922908131736, + "loss": 1.4747, + "step": 6314 + }, + { + "epoch": 0.6661392405063291, + "grad_norm": 0.6804214715957642, + "learning_rate": 0.0003830002601786121, + "loss": 1.4732, + "step": 6315 + }, + { + "epoch": 0.6662447257383967, + "grad_norm": 0.6272261738777161, + "learning_rate": 0.0003827813324000578, + "loss": 1.4864, + "step": 6316 + }, + { + "epoch": 0.6663502109704641, + "grad_norm": 0.7398664355278015, + "learning_rate": 0.0003825624457701863, + "loss": 1.523, + "step": 6317 + }, + { + "epoch": 0.6664556962025316, + "grad_norm": 0.6321890354156494, + "learning_rate": 0.00038234360031352485, + "loss": 1.4974, + "step": 6318 + }, + { + "epoch": 0.6665611814345992, + "grad_norm": 0.6421783566474915, + "learning_rate": 0.00038212479605459617, + "loss": 1.5087, + "step": 6319 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.7235296368598938, + "learning_rate": 0.00038190603301791864, + "loss": 1.4728, + "step": 6320 + }, + { + "epoch": 0.6667721518987342, + "grad_norm": 0.6003498435020447, + "learning_rate": 0.0003816873112280056, + "loss": 1.5054, + "step": 6321 + }, + { + "epoch": 0.6668776371308017, + "grad_norm": 0.738300085067749, + "learning_rate": 0.00038146863070936607, + "loss": 1.4986, + "step": 6322 + }, + { + "epoch": 0.6669831223628692, + "grad_norm": 0.6660741567611694, + "learning_rate": 0.0003812499914865039, + "loss": 1.4801, + "step": 6323 + }, + { + "epoch": 0.6670886075949367, + "grad_norm": 0.6756889224052429, + "learning_rate": 0.00038103139358391914, + "loss": 1.5428, + "step": 6324 + }, + { + "epoch": 0.6671940928270043, + "grad_norm": 0.7386507391929626, + "learning_rate": 0.0003808128370261065, + "loss": 1.5046, + "step": 6325 + }, + { + "epoch": 0.6672995780590717, + "grad_norm": 0.7630414962768555, + "learning_rate": 0.00038059432183755633, + "loss": 1.4861, + "step": 6326 + }, + { + "epoch": 0.6674050632911392, + "grad_norm": 0.7221396565437317, + "learning_rate": 0.0003803758480427544, + "loss": 1.5115, + "step": 6327 + }, + { + "epoch": 0.6675105485232068, + "grad_norm": 0.6578530669212341, + "learning_rate": 0.0003801574156661817, + "loss": 1.4444, + "step": 6328 + }, + { + "epoch": 0.6676160337552742, + "grad_norm": 0.7717717885971069, + "learning_rate": 0.000379939024732315, + "loss": 1.4879, + "step": 6329 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.615175187587738, + "learning_rate": 0.0003797206752656258, + "loss": 1.5313, + "step": 6330 + }, + { + "epoch": 0.6678270042194093, + "grad_norm": 0.8276415467262268, + "learning_rate": 0.0003795023672905814, + "loss": 1.4586, + "step": 6331 + }, + { + "epoch": 0.6679324894514768, + "grad_norm": 0.662613034248352, + "learning_rate": 0.00037928410083164416, + "loss": 1.5264, + "step": 6332 + }, + { + "epoch": 0.6680379746835443, + "grad_norm": 0.6464099884033203, + "learning_rate": 0.0003790658759132719, + "loss": 1.5094, + "step": 6333 + }, + { + "epoch": 0.6681434599156119, + "grad_norm": 0.6851511597633362, + "learning_rate": 0.0003788476925599181, + "loss": 1.4878, + "step": 6334 + }, + { + "epoch": 0.6682489451476793, + "grad_norm": 0.636650800704956, + "learning_rate": 0.00037862955079603086, + "loss": 1.4734, + "step": 6335 + }, + { + "epoch": 0.6683544303797468, + "grad_norm": 0.6102101802825928, + "learning_rate": 0.00037841145064605416, + "loss": 1.5167, + "step": 6336 + }, + { + "epoch": 0.6684599156118144, + "grad_norm": 0.6534231305122375, + "learning_rate": 0.00037819339213442744, + "loss": 1.4909, + "step": 6337 + }, + { + "epoch": 0.6685654008438818, + "grad_norm": 0.5813918709754944, + "learning_rate": 0.0003779753752855853, + "loss": 1.4905, + "step": 6338 + }, + { + "epoch": 0.6686708860759494, + "grad_norm": 0.6337214112281799, + "learning_rate": 0.0003777574001239573, + "loss": 1.517, + "step": 6339 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.5762072801589966, + "learning_rate": 0.0003775394666739688, + "loss": 1.4744, + "step": 6340 + }, + { + "epoch": 0.6688818565400844, + "grad_norm": 0.7080326676368713, + "learning_rate": 0.0003773215749600404, + "loss": 1.4986, + "step": 6341 + }, + { + "epoch": 0.6689873417721519, + "grad_norm": 0.625446081161499, + "learning_rate": 0.0003771037250065878, + "loss": 1.5407, + "step": 6342 + }, + { + "epoch": 0.6690928270042195, + "grad_norm": 0.6028234958648682, + "learning_rate": 0.0003768859168380223, + "loss": 1.5081, + "step": 6343 + }, + { + "epoch": 0.6691983122362869, + "grad_norm": 0.6508998870849609, + "learning_rate": 0.0003766681504787503, + "loss": 1.4832, + "step": 6344 + }, + { + "epoch": 0.6693037974683544, + "grad_norm": 0.6122072339057922, + "learning_rate": 0.0003764504259531734, + "loss": 1.5107, + "step": 6345 + }, + { + "epoch": 0.669409282700422, + "grad_norm": 0.6333527565002441, + "learning_rate": 0.0003762327432856892, + "loss": 1.4993, + "step": 6346 + }, + { + "epoch": 0.6695147679324894, + "grad_norm": 0.6492224931716919, + "learning_rate": 0.00037601510250068984, + "loss": 1.4832, + "step": 6347 + }, + { + "epoch": 0.669620253164557, + "grad_norm": 0.6048191785812378, + "learning_rate": 0.0003757975036225632, + "loss": 1.4832, + "step": 6348 + }, + { + "epoch": 0.6697257383966245, + "grad_norm": 0.6463228464126587, + "learning_rate": 0.00037557994667569217, + "loss": 1.4811, + "step": 6349 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.6713540554046631, + "learning_rate": 0.00037536243168445507, + "loss": 1.4973, + "step": 6350 + }, + { + "epoch": 0.6699367088607595, + "grad_norm": 0.6563760042190552, + "learning_rate": 0.0003751449586732257, + "loss": 1.5007, + "step": 6351 + }, + { + "epoch": 0.6700421940928271, + "grad_norm": 0.6746114492416382, + "learning_rate": 0.0003749275276663729, + "loss": 1.4886, + "step": 6352 + }, + { + "epoch": 0.6701476793248945, + "grad_norm": 0.6686612963676453, + "learning_rate": 0.0003747101386882609, + "loss": 1.4724, + "step": 6353 + }, + { + "epoch": 0.670253164556962, + "grad_norm": 0.6775320172309875, + "learning_rate": 0.0003744927917632489, + "loss": 1.5372, + "step": 6354 + }, + { + "epoch": 0.6703586497890295, + "grad_norm": 0.5958873629570007, + "learning_rate": 0.00037427548691569237, + "loss": 1.4534, + "step": 6355 + }, + { + "epoch": 0.670464135021097, + "grad_norm": 0.5839575529098511, + "learning_rate": 0.000374058224169941, + "loss": 1.4899, + "step": 6356 + }, + { + "epoch": 0.6705696202531646, + "grad_norm": 0.6603737473487854, + "learning_rate": 0.00037384100355034033, + "loss": 1.4969, + "step": 6357 + }, + { + "epoch": 0.670675105485232, + "grad_norm": 0.6517522931098938, + "learning_rate": 0.0003736238250812308, + "loss": 1.5309, + "step": 6358 + }, + { + "epoch": 0.6707805907172996, + "grad_norm": 0.5989501476287842, + "learning_rate": 0.0003734066887869485, + "loss": 1.4835, + "step": 6359 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.6052588820457458, + "learning_rate": 0.0003731895946918246, + "loss": 1.5096, + "step": 6360 + }, + { + "epoch": 0.6709915611814345, + "grad_norm": 0.624847412109375, + "learning_rate": 0.0003729725428201856, + "loss": 1.4765, + "step": 6361 + }, + { + "epoch": 0.6710970464135021, + "grad_norm": 0.6128537654876709, + "learning_rate": 0.00037275553319635285, + "loss": 1.4975, + "step": 6362 + }, + { + "epoch": 0.6712025316455696, + "grad_norm": 0.6213300824165344, + "learning_rate": 0.000372538565844644, + "loss": 1.5045, + "step": 6363 + }, + { + "epoch": 0.6713080168776371, + "grad_norm": 0.6477934122085571, + "learning_rate": 0.00037232164078937106, + "loss": 1.4767, + "step": 6364 + }, + { + "epoch": 0.6714135021097046, + "grad_norm": 0.633155882358551, + "learning_rate": 0.00037210475805484156, + "loss": 1.4616, + "step": 6365 + }, + { + "epoch": 0.6715189873417722, + "grad_norm": 0.5993570685386658, + "learning_rate": 0.00037188791766535825, + "loss": 1.4551, + "step": 6366 + }, + { + "epoch": 0.6716244725738396, + "grad_norm": 0.6424781680107117, + "learning_rate": 0.0003716711196452192, + "loss": 1.4258, + "step": 6367 + }, + { + "epoch": 0.6717299578059072, + "grad_norm": 0.6017488837242126, + "learning_rate": 0.0003714543640187177, + "loss": 1.4826, + "step": 6368 + }, + { + "epoch": 0.6718354430379747, + "grad_norm": 0.643278181552887, + "learning_rate": 0.0003712376508101424, + "loss": 1.5027, + "step": 6369 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.6618341207504272, + "learning_rate": 0.0003710209800437769, + "loss": 1.5019, + "step": 6370 + }, + { + "epoch": 0.6720464135021097, + "grad_norm": 0.6812025308609009, + "learning_rate": 0.00037080435174390014, + "loss": 1.4501, + "step": 6371 + }, + { + "epoch": 0.6721518987341772, + "grad_norm": 0.6343410611152649, + "learning_rate": 0.00037058776593478675, + "loss": 1.5106, + "step": 6372 + }, + { + "epoch": 0.6722573839662447, + "grad_norm": 0.690024197101593, + "learning_rate": 0.00037037122264070625, + "loss": 1.505, + "step": 6373 + }, + { + "epoch": 0.6723628691983122, + "grad_norm": 0.6260172724723816, + "learning_rate": 0.0003701547218859232, + "loss": 1.4699, + "step": 6374 + }, + { + "epoch": 0.6724683544303798, + "grad_norm": 0.6789329648017883, + "learning_rate": 0.0003699382636946977, + "loss": 1.4597, + "step": 6375 + }, + { + "epoch": 0.6725738396624472, + "grad_norm": 0.6438372731208801, + "learning_rate": 0.0003697218480912848, + "loss": 1.4677, + "step": 6376 + }, + { + "epoch": 0.6726793248945148, + "grad_norm": 0.6140906810760498, + "learning_rate": 0.0003695054750999352, + "loss": 1.5131, + "step": 6377 + }, + { + "epoch": 0.6727848101265823, + "grad_norm": 0.6521157026290894, + "learning_rate": 0.0003692891447448943, + "loss": 1.4913, + "step": 6378 + }, + { + "epoch": 0.6728902953586497, + "grad_norm": 0.5961560010910034, + "learning_rate": 0.0003690728570504032, + "loss": 1.4912, + "step": 6379 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.6226319074630737, + "learning_rate": 0.00036885661204069767, + "loss": 1.4804, + "step": 6380 + }, + { + "epoch": 0.6731012658227848, + "grad_norm": 0.7700692415237427, + "learning_rate": 0.00036864040974000955, + "loss": 1.4973, + "step": 6381 + }, + { + "epoch": 0.6732067510548523, + "grad_norm": 0.8085213303565979, + "learning_rate": 0.0003684242501725652, + "loss": 1.4792, + "step": 6382 + }, + { + "epoch": 0.6733122362869198, + "grad_norm": 0.775257408618927, + "learning_rate": 0.00036820813336258624, + "loss": 1.5556, + "step": 6383 + }, + { + "epoch": 0.6734177215189874, + "grad_norm": 0.638457179069519, + "learning_rate": 0.0003679920593342898, + "loss": 1.4883, + "step": 6384 + }, + { + "epoch": 0.6735232067510548, + "grad_norm": 0.9047484993934631, + "learning_rate": 0.0003677760281118879, + "loss": 1.5273, + "step": 6385 + }, + { + "epoch": 0.6736286919831224, + "grad_norm": 0.5924265384674072, + "learning_rate": 0.0003675600397195881, + "loss": 1.5333, + "step": 6386 + }, + { + "epoch": 0.6737341772151899, + "grad_norm": 0.9279251098632812, + "learning_rate": 0.0003673440941815928, + "loss": 1.4974, + "step": 6387 + }, + { + "epoch": 0.6738396624472573, + "grad_norm": 0.6654379367828369, + "learning_rate": 0.00036712819152209954, + "loss": 1.5029, + "step": 6388 + }, + { + "epoch": 0.6739451476793249, + "grad_norm": 0.9280942678451538, + "learning_rate": 0.00036691233176530197, + "loss": 1.5092, + "step": 6389 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.5968661904335022, + "learning_rate": 0.0003666965149353878, + "loss": 1.4394, + "step": 6390 + }, + { + "epoch": 0.6741561181434599, + "grad_norm": 0.6720886826515198, + "learning_rate": 0.00036648074105654043, + "loss": 1.5012, + "step": 6391 + }, + { + "epoch": 0.6742616033755274, + "grad_norm": 0.7580605149269104, + "learning_rate": 0.0003662650101529385, + "loss": 1.5343, + "step": 6392 + }, + { + "epoch": 0.674367088607595, + "grad_norm": 0.6197187304496765, + "learning_rate": 0.00036604932224875564, + "loss": 1.4881, + "step": 6393 + }, + { + "epoch": 0.6744725738396624, + "grad_norm": 0.658550500869751, + "learning_rate": 0.0003658336773681607, + "loss": 1.5023, + "step": 6394 + }, + { + "epoch": 0.67457805907173, + "grad_norm": 0.738288938999176, + "learning_rate": 0.0003656180755353179, + "loss": 1.4802, + "step": 6395 + }, + { + "epoch": 0.6746835443037975, + "grad_norm": 0.7358816862106323, + "learning_rate": 0.0003654025167743864, + "loss": 1.4843, + "step": 6396 + }, + { + "epoch": 0.674789029535865, + "grad_norm": 0.6538668274879456, + "learning_rate": 0.0003651870011095204, + "loss": 1.5358, + "step": 6397 + }, + { + "epoch": 0.6748945147679325, + "grad_norm": 0.7080750465393066, + "learning_rate": 0.0003649715285648701, + "loss": 1.4711, + "step": 6398 + }, + { + "epoch": 0.675, + "grad_norm": 0.6939224004745483, + "learning_rate": 0.00036475609916457996, + "loss": 1.4928, + "step": 6399 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.5889508724212646, + "learning_rate": 0.0003645407129327898, + "loss": 1.5224, + "step": 6400 + }, + { + "epoch": 0.675210970464135, + "grad_norm": 0.7116915583610535, + "learning_rate": 0.0003643253698936349, + "loss": 1.4678, + "step": 6401 + }, + { + "epoch": 0.6753164556962026, + "grad_norm": 0.5898457169532776, + "learning_rate": 0.00036411007007124547, + "loss": 1.4942, + "step": 6402 + }, + { + "epoch": 0.67542194092827, + "grad_norm": 0.6050000190734863, + "learning_rate": 0.0003638948134897469, + "loss": 1.526, + "step": 6403 + }, + { + "epoch": 0.6755274261603376, + "grad_norm": 0.6686073541641235, + "learning_rate": 0.0003636796001732597, + "loss": 1.5311, + "step": 6404 + }, + { + "epoch": 0.6756329113924051, + "grad_norm": 0.6222769021987915, + "learning_rate": 0.00036346443014589983, + "loss": 1.5042, + "step": 6405 + }, + { + "epoch": 0.6757383966244725, + "grad_norm": 0.6659001111984253, + "learning_rate": 0.00036324930343177754, + "loss": 1.5043, + "step": 6406 + }, + { + "epoch": 0.6758438818565401, + "grad_norm": 0.5826837420463562, + "learning_rate": 0.0003630342200549997, + "loss": 1.484, + "step": 6407 + }, + { + "epoch": 0.6759493670886076, + "grad_norm": 0.6430691480636597, + "learning_rate": 0.000362819180039667, + "loss": 1.4772, + "step": 6408 + }, + { + "epoch": 0.6760548523206751, + "grad_norm": 0.6176376938819885, + "learning_rate": 0.000362604183409876, + "loss": 1.478, + "step": 6409 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.642096757888794, + "learning_rate": 0.00036238923018971783, + "loss": 1.4684, + "step": 6410 + }, + { + "epoch": 0.6762658227848102, + "grad_norm": 0.6784294843673706, + "learning_rate": 0.00036217432040327926, + "loss": 1.5144, + "step": 6411 + }, + { + "epoch": 0.6763713080168776, + "grad_norm": 0.5624234676361084, + "learning_rate": 0.000361959454074642, + "loss": 1.468, + "step": 6412 + }, + { + "epoch": 0.6764767932489452, + "grad_norm": 0.65642911195755, + "learning_rate": 0.00036174463122788273, + "loss": 1.4739, + "step": 6413 + }, + { + "epoch": 0.6765822784810127, + "grad_norm": 0.6311021447181702, + "learning_rate": 0.00036152985188707344, + "loss": 1.4645, + "step": 6414 + }, + { + "epoch": 0.6766877637130801, + "grad_norm": 0.6316744089126587, + "learning_rate": 0.0003613151160762815, + "loss": 1.5128, + "step": 6415 + }, + { + "epoch": 0.6767932489451477, + "grad_norm": 0.6473492383956909, + "learning_rate": 0.00036110042381956895, + "loss": 1.4647, + "step": 6416 + }, + { + "epoch": 0.6768987341772152, + "grad_norm": 0.6227500438690186, + "learning_rate": 0.00036088577514099325, + "loss": 1.5052, + "step": 6417 + }, + { + "epoch": 0.6770042194092827, + "grad_norm": 0.70001220703125, + "learning_rate": 0.0003606711700646067, + "loss": 1.5129, + "step": 6418 + }, + { + "epoch": 0.6771097046413502, + "grad_norm": 0.5876182317733765, + "learning_rate": 0.00036045660861445684, + "loss": 1.4627, + "step": 6419 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.6292338371276855, + "learning_rate": 0.0003602420908145865, + "loss": 1.5066, + "step": 6420 + }, + { + "epoch": 0.6773206751054852, + "grad_norm": 0.6011817455291748, + "learning_rate": 0.00036002761668903335, + "loss": 1.5, + "step": 6421 + }, + { + "epoch": 0.6774261603375528, + "grad_norm": 0.616290807723999, + "learning_rate": 0.0003598131862618304, + "loss": 1.4788, + "step": 6422 + }, + { + "epoch": 0.6775316455696202, + "grad_norm": 0.6348633170127869, + "learning_rate": 0.0003595987995570052, + "loss": 1.4748, + "step": 6423 + }, + { + "epoch": 0.6776371308016877, + "grad_norm": 0.6198768615722656, + "learning_rate": 0.0003593844565985815, + "loss": 1.5162, + "step": 6424 + }, + { + "epoch": 0.6777426160337553, + "grad_norm": 0.6825061440467834, + "learning_rate": 0.00035917015741057727, + "loss": 1.5228, + "step": 6425 + }, + { + "epoch": 0.6778481012658227, + "grad_norm": 0.6426221132278442, + "learning_rate": 0.0003589559020170058, + "loss": 1.4933, + "step": 6426 + }, + { + "epoch": 0.6779535864978903, + "grad_norm": 0.6323835849761963, + "learning_rate": 0.00035874169044187537, + "loss": 1.5138, + "step": 6427 + }, + { + "epoch": 0.6780590717299578, + "grad_norm": 0.6731966137886047, + "learning_rate": 0.00035852752270918955, + "loss": 1.4939, + "step": 6428 + }, + { + "epoch": 0.6781645569620253, + "grad_norm": 0.7172521948814392, + "learning_rate": 0.0003583133988429468, + "loss": 1.4502, + "step": 6429 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.6723714470863342, + "learning_rate": 0.00035809931886714093, + "loss": 1.5136, + "step": 6430 + }, + { + "epoch": 0.6783755274261604, + "grad_norm": 0.7697321176528931, + "learning_rate": 0.00035788528280576053, + "loss": 1.4704, + "step": 6431 + }, + { + "epoch": 0.6784810126582278, + "grad_norm": 0.6603161096572876, + "learning_rate": 0.0003576712906827892, + "loss": 1.4848, + "step": 6432 + }, + { + "epoch": 0.6785864978902953, + "grad_norm": 0.6642880439758301, + "learning_rate": 0.00035745734252220633, + "loss": 1.4639, + "step": 6433 + }, + { + "epoch": 0.6786919831223629, + "grad_norm": 0.7394683957099915, + "learning_rate": 0.00035724343834798566, + "loss": 1.4772, + "step": 6434 + }, + { + "epoch": 0.6787974683544303, + "grad_norm": 0.7386969923973083, + "learning_rate": 0.00035702957818409606, + "loss": 1.4568, + "step": 6435 + }, + { + "epoch": 0.6789029535864979, + "grad_norm": 0.6705002188682556, + "learning_rate": 0.0003568157620545019, + "loss": 1.4911, + "step": 6436 + }, + { + "epoch": 0.6790084388185654, + "grad_norm": 0.6711088418960571, + "learning_rate": 0.00035660198998316213, + "loss": 1.4694, + "step": 6437 + }, + { + "epoch": 0.6791139240506329, + "grad_norm": 0.6435925960540771, + "learning_rate": 0.00035638826199403103, + "loss": 1.4987, + "step": 6438 + }, + { + "epoch": 0.6792194092827004, + "grad_norm": 0.7101866006851196, + "learning_rate": 0.0003561745781110579, + "loss": 1.479, + "step": 6439 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.768009603023529, + "learning_rate": 0.00035596093835818683, + "loss": 1.4923, + "step": 6440 + }, + { + "epoch": 0.6794303797468354, + "grad_norm": 0.7159779071807861, + "learning_rate": 0.0003557473427593578, + "loss": 1.5143, + "step": 6441 + }, + { + "epoch": 0.679535864978903, + "grad_norm": 0.6389381289482117, + "learning_rate": 0.0003555337913385048, + "loss": 1.4476, + "step": 6442 + }, + { + "epoch": 0.6796413502109705, + "grad_norm": 0.7460859417915344, + "learning_rate": 0.0003553202841195576, + "loss": 1.4962, + "step": 6443 + }, + { + "epoch": 0.6797468354430379, + "grad_norm": 0.6268200874328613, + "learning_rate": 0.00035510682112644055, + "loss": 1.5055, + "step": 6444 + }, + { + "epoch": 0.6798523206751055, + "grad_norm": 0.7022590041160583, + "learning_rate": 0.00035489340238307326, + "loss": 1.4963, + "step": 6445 + }, + { + "epoch": 0.679957805907173, + "grad_norm": 0.6863744258880615, + "learning_rate": 0.00035468002791337047, + "loss": 1.4841, + "step": 6446 + }, + { + "epoch": 0.6800632911392405, + "grad_norm": 0.6771380305290222, + "learning_rate": 0.0003544666977412418, + "loss": 1.4846, + "step": 6447 + }, + { + "epoch": 0.680168776371308, + "grad_norm": 0.6388871073722839, + "learning_rate": 0.000354253411890592, + "loss": 1.4644, + "step": 6448 + }, + { + "epoch": 0.6802742616033756, + "grad_norm": 0.6515480279922485, + "learning_rate": 0.00035404017038532045, + "loss": 1.4571, + "step": 6449 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.6069905757904053, + "learning_rate": 0.00035382697324932245, + "loss": 1.4969, + "step": 6450 + }, + { + "epoch": 0.6804852320675105, + "grad_norm": 0.6528286933898926, + "learning_rate": 0.0003536138205064877, + "loss": 1.4763, + "step": 6451 + }, + { + "epoch": 0.6805907172995781, + "grad_norm": 0.6715302467346191, + "learning_rate": 0.0003534007121807009, + "loss": 1.5188, + "step": 6452 + }, + { + "epoch": 0.6806962025316455, + "grad_norm": 0.648938775062561, + "learning_rate": 0.00035318764829584185, + "loss": 1.5209, + "step": 6453 + }, + { + "epoch": 0.6808016877637131, + "grad_norm": 0.613101601600647, + "learning_rate": 0.0003529746288757856, + "loss": 1.4682, + "step": 6454 + }, + { + "epoch": 0.6809071729957806, + "grad_norm": 0.6063647866249084, + "learning_rate": 0.0003527616539444019, + "loss": 1.4793, + "step": 6455 + }, + { + "epoch": 0.6810126582278481, + "grad_norm": 0.5873428583145142, + "learning_rate": 0.0003525487235255556, + "loss": 1.4741, + "step": 6456 + }, + { + "epoch": 0.6811181434599156, + "grad_norm": 0.7237032651901245, + "learning_rate": 0.0003523358376431068, + "loss": 1.5141, + "step": 6457 + }, + { + "epoch": 0.6812236286919832, + "grad_norm": 0.618183970451355, + "learning_rate": 0.00035212299632090996, + "loss": 1.4929, + "step": 6458 + }, + { + "epoch": 0.6813291139240506, + "grad_norm": 0.6026239395141602, + "learning_rate": 0.00035191019958281575, + "loss": 1.5077, + "step": 6459 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.7241253852844238, + "learning_rate": 0.00035169744745266866, + "loss": 1.5105, + "step": 6460 + }, + { + "epoch": 0.6815400843881857, + "grad_norm": 0.5948453545570374, + "learning_rate": 0.0003514847399543087, + "loss": 1.4868, + "step": 6461 + }, + { + "epoch": 0.6816455696202531, + "grad_norm": 0.5904780030250549, + "learning_rate": 0.00035127207711157084, + "loss": 1.4948, + "step": 6462 + }, + { + "epoch": 0.6817510548523207, + "grad_norm": 0.7382322549819946, + "learning_rate": 0.00035105945894828495, + "loss": 1.4839, + "step": 6463 + }, + { + "epoch": 0.6818565400843882, + "grad_norm": 0.6193774342536926, + "learning_rate": 0.000350846885488276, + "loss": 1.4994, + "step": 6464 + }, + { + "epoch": 0.6819620253164557, + "grad_norm": 0.8553953170776367, + "learning_rate": 0.00035063435675536386, + "loss": 1.4909, + "step": 6465 + }, + { + "epoch": 0.6820675105485232, + "grad_norm": 0.6168661117553711, + "learning_rate": 0.00035042187277336325, + "loss": 1.4664, + "step": 6466 + }, + { + "epoch": 0.6821729957805908, + "grad_norm": 0.6484708189964294, + "learning_rate": 0.00035020943356608444, + "loss": 1.4872, + "step": 6467 + }, + { + "epoch": 0.6822784810126582, + "grad_norm": 0.6485190987586975, + "learning_rate": 0.0003499970391573322, + "loss": 1.5085, + "step": 6468 + }, + { + "epoch": 0.6823839662447257, + "grad_norm": 0.6600796580314636, + "learning_rate": 0.00034978468957090635, + "loss": 1.4782, + "step": 6469 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.64892578125, + "learning_rate": 0.0003495723848306017, + "loss": 1.5191, + "step": 6470 + }, + { + "epoch": 0.6825949367088607, + "grad_norm": 0.7232009768486023, + "learning_rate": 0.000349360124960208, + "loss": 1.4748, + "step": 6471 + }, + { + "epoch": 0.6827004219409283, + "grad_norm": 0.6551834344863892, + "learning_rate": 0.00034914790998351005, + "loss": 1.5406, + "step": 6472 + }, + { + "epoch": 0.6828059071729958, + "grad_norm": 0.8308815956115723, + "learning_rate": 0.0003489357399242876, + "loss": 1.4983, + "step": 6473 + }, + { + "epoch": 0.6829113924050633, + "grad_norm": 0.6385651230812073, + "learning_rate": 0.0003487236148063154, + "loss": 1.4737, + "step": 6474 + }, + { + "epoch": 0.6830168776371308, + "grad_norm": 0.6130197644233704, + "learning_rate": 0.0003485115346533629, + "loss": 1.4963, + "step": 6475 + }, + { + "epoch": 0.6831223628691984, + "grad_norm": 0.6488029360771179, + "learning_rate": 0.00034829949948919517, + "loss": 1.5194, + "step": 6476 + }, + { + "epoch": 0.6832278481012658, + "grad_norm": 0.6163731813430786, + "learning_rate": 0.00034808750933757154, + "loss": 1.4847, + "step": 6477 + }, + { + "epoch": 0.6833333333333333, + "grad_norm": 0.5840620994567871, + "learning_rate": 0.0003478755642222466, + "loss": 1.5088, + "step": 6478 + }, + { + "epoch": 0.6834388185654009, + "grad_norm": 0.5873239636421204, + "learning_rate": 0.0003476636641669699, + "loss": 1.4781, + "step": 6479 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.7053148746490479, + "learning_rate": 0.0003474518091954859, + "loss": 1.5124, + "step": 6480 + }, + { + "epoch": 0.6836497890295359, + "grad_norm": 0.5952763557434082, + "learning_rate": 0.00034723999933153387, + "loss": 1.5001, + "step": 6481 + }, + { + "epoch": 0.6837552742616034, + "grad_norm": 0.7040704488754272, + "learning_rate": 0.00034702823459884836, + "loss": 1.4526, + "step": 6482 + }, + { + "epoch": 0.6838607594936709, + "grad_norm": 0.5775914192199707, + "learning_rate": 0.0003468165150211585, + "loss": 1.4462, + "step": 6483 + }, + { + "epoch": 0.6839662447257384, + "grad_norm": 0.6043601632118225, + "learning_rate": 0.0003466048406221883, + "loss": 1.5138, + "step": 6484 + }, + { + "epoch": 0.6840717299578059, + "grad_norm": 0.5984331965446472, + "learning_rate": 0.0003463932114256576, + "loss": 1.4989, + "step": 6485 + }, + { + "epoch": 0.6841772151898734, + "grad_norm": 0.6982593536376953, + "learning_rate": 0.00034618162745528, + "loss": 1.4996, + "step": 6486 + }, + { + "epoch": 0.684282700421941, + "grad_norm": 0.5917171239852905, + "learning_rate": 0.00034597008873476473, + "loss": 1.4981, + "step": 6487 + }, + { + "epoch": 0.6843881856540084, + "grad_norm": 0.7789023518562317, + "learning_rate": 0.0003457585952878156, + "loss": 1.5135, + "step": 6488 + }, + { + "epoch": 0.6844936708860759, + "grad_norm": 0.625221312046051, + "learning_rate": 0.0003455471471381318, + "loss": 1.4989, + "step": 6489 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5704639554023743, + "learning_rate": 0.0003453357443094068, + "loss": 1.4973, + "step": 6490 + }, + { + "epoch": 0.6847046413502109, + "grad_norm": 0.6415606737136841, + "learning_rate": 0.0003451243868253294, + "loss": 1.51, + "step": 6491 + }, + { + "epoch": 0.6848101265822785, + "grad_norm": 0.6092985272407532, + "learning_rate": 0.0003449130747095835, + "loss": 1.5259, + "step": 6492 + }, + { + "epoch": 0.684915611814346, + "grad_norm": 0.6380095481872559, + "learning_rate": 0.0003447018079858472, + "loss": 1.4983, + "step": 6493 + }, + { + "epoch": 0.6850210970464135, + "grad_norm": 0.6150725483894348, + "learning_rate": 0.0003444905866777946, + "loss": 1.486, + "step": 6494 + }, + { + "epoch": 0.685126582278481, + "grad_norm": 0.6009423732757568, + "learning_rate": 0.0003442794108090938, + "loss": 1.5207, + "step": 6495 + }, + { + "epoch": 0.6852320675105485, + "grad_norm": 0.5915784239768982, + "learning_rate": 0.0003440682804034081, + "loss": 1.4976, + "step": 6496 + }, + { + "epoch": 0.685337552742616, + "grad_norm": 0.6944584250450134, + "learning_rate": 0.00034385719548439585, + "loss": 1.5259, + "step": 6497 + }, + { + "epoch": 0.6854430379746835, + "grad_norm": 0.6014507412910461, + "learning_rate": 0.00034364615607570994, + "loss": 1.4871, + "step": 6498 + }, + { + "epoch": 0.6855485232067511, + "grad_norm": 0.7580494284629822, + "learning_rate": 0.0003434351622009985, + "loss": 1.5336, + "step": 6499 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.6040412187576294, + "learning_rate": 0.00034322421388390456, + "loss": 1.4809, + "step": 6500 + }, + { + "epoch": 0.6857594936708861, + "grad_norm": 0.7480456829071045, + "learning_rate": 0.00034301331114806573, + "loss": 1.5021, + "step": 6501 + }, + { + "epoch": 0.6858649789029536, + "grad_norm": 0.6440590023994446, + "learning_rate": 0.0003428024540171148, + "loss": 1.4899, + "step": 6502 + }, + { + "epoch": 0.685970464135021, + "grad_norm": 0.6219503283500671, + "learning_rate": 0.0003425916425146791, + "loss": 1.5333, + "step": 6503 + }, + { + "epoch": 0.6860759493670886, + "grad_norm": 0.6806439161300659, + "learning_rate": 0.0003423808766643817, + "loss": 1.5178, + "step": 6504 + }, + { + "epoch": 0.6861814345991561, + "grad_norm": 0.6138986349105835, + "learning_rate": 0.00034217015648983957, + "loss": 1.5133, + "step": 6505 + }, + { + "epoch": 0.6862869198312236, + "grad_norm": 0.6259475946426392, + "learning_rate": 0.0003419594820146652, + "loss": 1.5165, + "step": 6506 + }, + { + "epoch": 0.6863924050632911, + "grad_norm": 0.6381478905677795, + "learning_rate": 0.0003417488532624653, + "loss": 1.5038, + "step": 6507 + }, + { + "epoch": 0.6864978902953587, + "grad_norm": 0.6791752576828003, + "learning_rate": 0.00034153827025684225, + "loss": 1.4774, + "step": 6508 + }, + { + "epoch": 0.6866033755274261, + "grad_norm": 0.6485288143157959, + "learning_rate": 0.0003413277330213928, + "loss": 1.5303, + "step": 6509 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.7119107842445374, + "learning_rate": 0.0003411172415797087, + "loss": 1.4915, + "step": 6510 + }, + { + "epoch": 0.6868143459915612, + "grad_norm": 0.6594569087028503, + "learning_rate": 0.00034090679595537646, + "loss": 1.4968, + "step": 6511 + }, + { + "epoch": 0.6869198312236287, + "grad_norm": 0.6352516412734985, + "learning_rate": 0.0003406963961719778, + "loss": 1.5025, + "step": 6512 + }, + { + "epoch": 0.6870253164556962, + "grad_norm": 0.6629581451416016, + "learning_rate": 0.00034048604225308854, + "loss": 1.5019, + "step": 6513 + }, + { + "epoch": 0.6871308016877637, + "grad_norm": 0.6315295696258545, + "learning_rate": 0.00034027573422228054, + "loss": 1.5086, + "step": 6514 + }, + { + "epoch": 0.6872362869198312, + "grad_norm": 0.647594690322876, + "learning_rate": 0.00034006547210311964, + "loss": 1.4816, + "step": 6515 + }, + { + "epoch": 0.6873417721518987, + "grad_norm": 0.6268976926803589, + "learning_rate": 0.0003398552559191667, + "loss": 1.4941, + "step": 6516 + }, + { + "epoch": 0.6874472573839663, + "grad_norm": 0.60996013879776, + "learning_rate": 0.00033964508569397743, + "loss": 1.5318, + "step": 6517 + }, + { + "epoch": 0.6875527426160337, + "grad_norm": 0.6618868708610535, + "learning_rate": 0.0003394349614511026, + "loss": 1.4784, + "step": 6518 + }, + { + "epoch": 0.6876582278481013, + "grad_norm": 0.5980538725852966, + "learning_rate": 0.0003392248832140876, + "loss": 1.4484, + "step": 6519 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.5993999242782593, + "learning_rate": 0.0003390148510064727, + "loss": 1.4847, + "step": 6520 + }, + { + "epoch": 0.6878691983122363, + "grad_norm": 0.6266851425170898, + "learning_rate": 0.00033880486485179305, + "loss": 1.4928, + "step": 6521 + }, + { + "epoch": 0.6879746835443038, + "grad_norm": 0.6206283569335938, + "learning_rate": 0.0003385949247735786, + "loss": 1.4636, + "step": 6522 + }, + { + "epoch": 0.6880801687763713, + "grad_norm": 0.5794071555137634, + "learning_rate": 0.00033838503079535435, + "loss": 1.4574, + "step": 6523 + }, + { + "epoch": 0.6881856540084388, + "grad_norm": 0.6055468916893005, + "learning_rate": 0.00033817518294064003, + "loss": 1.4851, + "step": 6524 + }, + { + "epoch": 0.6882911392405063, + "grad_norm": 0.6478156447410583, + "learning_rate": 0.00033796538123294996, + "loss": 1.5001, + "step": 6525 + }, + { + "epoch": 0.6883966244725739, + "grad_norm": 0.6109395027160645, + "learning_rate": 0.0003377556256957936, + "loss": 1.494, + "step": 6526 + }, + { + "epoch": 0.6885021097046413, + "grad_norm": 0.6229037046432495, + "learning_rate": 0.0003375459163526749, + "loss": 1.4946, + "step": 6527 + }, + { + "epoch": 0.6886075949367089, + "grad_norm": 0.6461597084999084, + "learning_rate": 0.000337336253227093, + "loss": 1.4883, + "step": 6528 + }, + { + "epoch": 0.6887130801687764, + "grad_norm": 0.6558437943458557, + "learning_rate": 0.00033712663634254163, + "loss": 1.474, + "step": 6529 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.6405185461044312, + "learning_rate": 0.0003369170657225094, + "loss": 1.4764, + "step": 6530 + }, + { + "epoch": 0.6889240506329114, + "grad_norm": 0.6247314810752869, + "learning_rate": 0.0003367075413904799, + "loss": 1.4399, + "step": 6531 + }, + { + "epoch": 0.689029535864979, + "grad_norm": 0.5997452139854431, + "learning_rate": 0.00033649806336993085, + "loss": 1.4723, + "step": 6532 + }, + { + "epoch": 0.6891350210970464, + "grad_norm": 0.6562123894691467, + "learning_rate": 0.0003362886316843361, + "loss": 1.475, + "step": 6533 + }, + { + "epoch": 0.6892405063291139, + "grad_norm": 0.671779990196228, + "learning_rate": 0.000336079246357163, + "loss": 1.4905, + "step": 6534 + }, + { + "epoch": 0.6893459915611815, + "grad_norm": 0.590046763420105, + "learning_rate": 0.00033586990741187446, + "loss": 1.5237, + "step": 6535 + }, + { + "epoch": 0.6894514767932489, + "grad_norm": 0.689628541469574, + "learning_rate": 0.0003356606148719277, + "loss": 1.5074, + "step": 6536 + }, + { + "epoch": 0.6895569620253165, + "grad_norm": 0.6788098812103271, + "learning_rate": 0.00033545136876077524, + "loss": 1.5196, + "step": 6537 + }, + { + "epoch": 0.689662447257384, + "grad_norm": 0.6095483899116516, + "learning_rate": 0.00033524216910186394, + "loss": 1.5039, + "step": 6538 + }, + { + "epoch": 0.6897679324894515, + "grad_norm": 0.6499853134155273, + "learning_rate": 0.00033503301591863586, + "loss": 1.5274, + "step": 6539 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.6317347288131714, + "learning_rate": 0.0003348239092345275, + "loss": 1.4989, + "step": 6540 + }, + { + "epoch": 0.6899789029535865, + "grad_norm": 0.6567296385765076, + "learning_rate": 0.00033461484907297036, + "loss": 1.4823, + "step": 6541 + }, + { + "epoch": 0.690084388185654, + "grad_norm": 0.7236684560775757, + "learning_rate": 0.00033440583545739046, + "loss": 1.4887, + "step": 6542 + }, + { + "epoch": 0.6901898734177215, + "grad_norm": 0.6007088422775269, + "learning_rate": 0.00033419686841120925, + "loss": 1.4633, + "step": 6543 + }, + { + "epoch": 0.6902953586497891, + "grad_norm": 0.6767505407333374, + "learning_rate": 0.00033398794795784227, + "loss": 1.5155, + "step": 6544 + }, + { + "epoch": 0.6904008438818565, + "grad_norm": 0.6561751961708069, + "learning_rate": 0.0003337790741207003, + "loss": 1.4892, + "step": 6545 + }, + { + "epoch": 0.6905063291139241, + "grad_norm": 0.6608028411865234, + "learning_rate": 0.0003335702469231884, + "loss": 1.5035, + "step": 6546 + }, + { + "epoch": 0.6906118143459916, + "grad_norm": 0.6087774634361267, + "learning_rate": 0.00033336146638870685, + "loss": 1.5176, + "step": 6547 + }, + { + "epoch": 0.690717299578059, + "grad_norm": 0.602632462978363, + "learning_rate": 0.0003331527325406506, + "loss": 1.4943, + "step": 6548 + }, + { + "epoch": 0.6908227848101266, + "grad_norm": 0.6286904811859131, + "learning_rate": 0.0003329440454024092, + "loss": 1.4731, + "step": 6549 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.6072278618812561, + "learning_rate": 0.0003327354049973672, + "loss": 1.4523, + "step": 6550 + }, + { + "epoch": 0.6910337552742616, + "grad_norm": 0.6032941341400146, + "learning_rate": 0.00033252681134890373, + "loss": 1.5217, + "step": 6551 + }, + { + "epoch": 0.6911392405063291, + "grad_norm": 0.6119758486747742, + "learning_rate": 0.00033231826448039246, + "loss": 1.4602, + "step": 6552 + }, + { + "epoch": 0.6912447257383966, + "grad_norm": 0.5852057933807373, + "learning_rate": 0.0003321097644152027, + "loss": 1.4919, + "step": 6553 + }, + { + "epoch": 0.6913502109704641, + "grad_norm": 0.5957278609275818, + "learning_rate": 0.00033190131117669753, + "loss": 1.5092, + "step": 6554 + }, + { + "epoch": 0.6914556962025317, + "grad_norm": 0.6378327012062073, + "learning_rate": 0.0003316929047882354, + "loss": 1.4783, + "step": 6555 + }, + { + "epoch": 0.6915611814345991, + "grad_norm": 0.5865264534950256, + "learning_rate": 0.0003314845452731691, + "loss": 1.5051, + "step": 6556 + }, + { + "epoch": 0.6916666666666667, + "grad_norm": 0.5963271260261536, + "learning_rate": 0.00033127623265484643, + "loss": 1.4898, + "step": 6557 + }, + { + "epoch": 0.6917721518987342, + "grad_norm": 0.7008376121520996, + "learning_rate": 0.00033106796695660983, + "loss": 1.4907, + "step": 6558 + }, + { + "epoch": 0.6918776371308016, + "grad_norm": 0.6346843242645264, + "learning_rate": 0.0003308597482017965, + "loss": 1.4675, + "step": 6559 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.8123623132705688, + "learning_rate": 0.00033065157641373847, + "loss": 1.4907, + "step": 6560 + }, + { + "epoch": 0.6920886075949367, + "grad_norm": 0.6162258982658386, + "learning_rate": 0.00033044345161576224, + "loss": 1.5119, + "step": 6561 + }, + { + "epoch": 0.6921940928270042, + "grad_norm": 0.7288169860839844, + "learning_rate": 0.00033023537383118916, + "loss": 1.4754, + "step": 6562 + }, + { + "epoch": 0.6922995780590717, + "grad_norm": 0.8107650279998779, + "learning_rate": 0.0003300273430833358, + "loss": 1.4439, + "step": 6563 + }, + { + "epoch": 0.6924050632911393, + "grad_norm": 0.6262010931968689, + "learning_rate": 0.00032981935939551294, + "loss": 1.4746, + "step": 6564 + }, + { + "epoch": 0.6925105485232067, + "grad_norm": 0.7829354405403137, + "learning_rate": 0.000329611422791026, + "loss": 1.5053, + "step": 6565 + }, + { + "epoch": 0.6926160337552743, + "grad_norm": 0.6252524852752686, + "learning_rate": 0.00032940353329317533, + "loss": 1.4864, + "step": 6566 + }, + { + "epoch": 0.6927215189873418, + "grad_norm": 0.7614901661872864, + "learning_rate": 0.0003291956909252561, + "loss": 1.5148, + "step": 6567 + }, + { + "epoch": 0.6928270042194092, + "grad_norm": 0.7289246916770935, + "learning_rate": 0.00032898789571055796, + "loss": 1.4898, + "step": 6568 + }, + { + "epoch": 0.6929324894514768, + "grad_norm": 0.5951951742172241, + "learning_rate": 0.0003287801476723656, + "loss": 1.5037, + "step": 6569 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.7458404302597046, + "learning_rate": 0.0003285724468339576, + "loss": 1.4789, + "step": 6570 + }, + { + "epoch": 0.6931434599156118, + "grad_norm": 0.6851738095283508, + "learning_rate": 0.00032836479321860884, + "loss": 1.4994, + "step": 6571 + }, + { + "epoch": 0.6932489451476793, + "grad_norm": 0.6357306241989136, + "learning_rate": 0.00032815718684958727, + "loss": 1.4941, + "step": 6572 + }, + { + "epoch": 0.6933544303797469, + "grad_norm": 0.6617768406867981, + "learning_rate": 0.00032794962775015656, + "loss": 1.4654, + "step": 6573 + }, + { + "epoch": 0.6934599156118143, + "grad_norm": 0.6065582036972046, + "learning_rate": 0.0003277421159435745, + "loss": 1.4964, + "step": 6574 + }, + { + "epoch": 0.6935654008438819, + "grad_norm": 0.675030529499054, + "learning_rate": 0.000327534651453094, + "loss": 1.4943, + "step": 6575 + }, + { + "epoch": 0.6936708860759494, + "grad_norm": 0.641454815864563, + "learning_rate": 0.00032732723430196236, + "loss": 1.4408, + "step": 6576 + }, + { + "epoch": 0.6937763713080168, + "grad_norm": 0.5944002866744995, + "learning_rate": 0.0003271198645134218, + "loss": 1.4725, + "step": 6577 + }, + { + "epoch": 0.6938818565400844, + "grad_norm": 0.8000326752662659, + "learning_rate": 0.0003269125421107091, + "loss": 1.4793, + "step": 6578 + }, + { + "epoch": 0.6939873417721519, + "grad_norm": 0.6292548775672913, + "learning_rate": 0.00032670526711705536, + "loss": 1.4727, + "step": 6579 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.6784390211105347, + "learning_rate": 0.00032649803955568755, + "loss": 1.5184, + "step": 6580 + }, + { + "epoch": 0.6941983122362869, + "grad_norm": 0.6206501126289368, + "learning_rate": 0.0003262908594498262, + "loss": 1.4562, + "step": 6581 + }, + { + "epoch": 0.6943037974683545, + "grad_norm": 0.6529445052146912, + "learning_rate": 0.0003260837268226868, + "loss": 1.4815, + "step": 6582 + }, + { + "epoch": 0.6944092827004219, + "grad_norm": 0.6441576480865479, + "learning_rate": 0.0003258766416974796, + "loss": 1.4888, + "step": 6583 + }, + { + "epoch": 0.6945147679324895, + "grad_norm": 0.7414789795875549, + "learning_rate": 0.0003256696040974097, + "loss": 1.477, + "step": 6584 + }, + { + "epoch": 0.694620253164557, + "grad_norm": 0.5949278473854065, + "learning_rate": 0.00032546261404567644, + "loss": 1.4994, + "step": 6585 + }, + { + "epoch": 0.6947257383966244, + "grad_norm": 0.6497619152069092, + "learning_rate": 0.0003252556715654743, + "loss": 1.4691, + "step": 6586 + }, + { + "epoch": 0.694831223628692, + "grad_norm": 0.6573922634124756, + "learning_rate": 0.00032504877667999206, + "loss": 1.4963, + "step": 6587 + }, + { + "epoch": 0.6949367088607595, + "grad_norm": 0.576471745967865, + "learning_rate": 0.00032484192941241316, + "loss": 1.4922, + "step": 6588 + }, + { + "epoch": 0.695042194092827, + "grad_norm": 0.6270429491996765, + "learning_rate": 0.0003246351297859164, + "loss": 1.4844, + "step": 6589 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.8052817583084106, + "learning_rate": 0.00032442837782367434, + "loss": 1.5466, + "step": 6590 + }, + { + "epoch": 0.6952531645569621, + "grad_norm": 0.6286761164665222, + "learning_rate": 0.00032422167354885463, + "loss": 1.488, + "step": 6591 + }, + { + "epoch": 0.6953586497890295, + "grad_norm": 0.6125232577323914, + "learning_rate": 0.0003240150169846196, + "loss": 1.5056, + "step": 6592 + }, + { + "epoch": 0.695464135021097, + "grad_norm": 0.5842706561088562, + "learning_rate": 0.00032380840815412603, + "loss": 1.5096, + "step": 6593 + }, + { + "epoch": 0.6955696202531646, + "grad_norm": 0.6084933280944824, + "learning_rate": 0.00032360184708052554, + "loss": 1.4657, + "step": 6594 + }, + { + "epoch": 0.695675105485232, + "grad_norm": 0.6521800756454468, + "learning_rate": 0.00032339533378696424, + "loss": 1.467, + "step": 6595 + }, + { + "epoch": 0.6957805907172996, + "grad_norm": 0.6420132517814636, + "learning_rate": 0.00032318886829658277, + "loss": 1.5136, + "step": 6596 + }, + { + "epoch": 0.6958860759493671, + "grad_norm": 0.6036320924758911, + "learning_rate": 0.0003229824506325172, + "loss": 1.5003, + "step": 6597 + }, + { + "epoch": 0.6959915611814346, + "grad_norm": 0.7036306858062744, + "learning_rate": 0.0003227760808178973, + "loss": 1.4973, + "step": 6598 + }, + { + "epoch": 0.6960970464135021, + "grad_norm": 0.6385533213615417, + "learning_rate": 0.00032256975887584783, + "loss": 1.4921, + "step": 6599 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.6585085391998291, + "learning_rate": 0.0003223634848294883, + "loss": 1.5098, + "step": 6600 + }, + { + "epoch": 0.6963080168776371, + "grad_norm": 0.701053261756897, + "learning_rate": 0.0003221572587019327, + "loss": 1.4603, + "step": 6601 + }, + { + "epoch": 0.6964135021097047, + "grad_norm": 0.6437816023826599, + "learning_rate": 0.0003219510805162896, + "loss": 1.4767, + "step": 6602 + }, + { + "epoch": 0.6965189873417722, + "grad_norm": 0.7898445725440979, + "learning_rate": 0.0003217449502956624, + "loss": 1.4991, + "step": 6603 + }, + { + "epoch": 0.6966244725738396, + "grad_norm": 0.7742087841033936, + "learning_rate": 0.0003215388680631491, + "loss": 1.5007, + "step": 6604 + }, + { + "epoch": 0.6967299578059072, + "grad_norm": 0.6409603357315063, + "learning_rate": 0.00032133283384184173, + "loss": 1.4931, + "step": 6605 + }, + { + "epoch": 0.6968354430379747, + "grad_norm": 0.7304035425186157, + "learning_rate": 0.00032112684765482814, + "loss": 1.4539, + "step": 6606 + }, + { + "epoch": 0.6969409282700422, + "grad_norm": 0.6388316750526428, + "learning_rate": 0.00032092090952518996, + "loss": 1.4833, + "step": 6607 + }, + { + "epoch": 0.6970464135021097, + "grad_norm": 0.5847755670547485, + "learning_rate": 0.00032071501947600334, + "loss": 1.4268, + "step": 6608 + }, + { + "epoch": 0.6971518987341773, + "grad_norm": 0.7566617727279663, + "learning_rate": 0.00032050917753033935, + "loss": 1.4732, + "step": 6609 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.727208137512207, + "learning_rate": 0.00032030338371126374, + "loss": 1.5366, + "step": 6610 + }, + { + "epoch": 0.6973628691983123, + "grad_norm": 0.621410608291626, + "learning_rate": 0.0003200976380418366, + "loss": 1.4448, + "step": 6611 + }, + { + "epoch": 0.6974683544303798, + "grad_norm": 0.7529759407043457, + "learning_rate": 0.00031989194054511276, + "loss": 1.4923, + "step": 6612 + }, + { + "epoch": 0.6975738396624472, + "grad_norm": 0.6465789079666138, + "learning_rate": 0.0003196862912441418, + "loss": 1.5368, + "step": 6613 + }, + { + "epoch": 0.6976793248945148, + "grad_norm": 0.6416911482810974, + "learning_rate": 0.0003194806901619673, + "loss": 1.4888, + "step": 6614 + }, + { + "epoch": 0.6977848101265823, + "grad_norm": 0.72977614402771, + "learning_rate": 0.00031927513732162856, + "loss": 1.4671, + "step": 6615 + }, + { + "epoch": 0.6978902953586498, + "grad_norm": 0.6535199880599976, + "learning_rate": 0.00031906963274615837, + "loss": 1.512, + "step": 6616 + }, + { + "epoch": 0.6979957805907173, + "grad_norm": 0.7611294388771057, + "learning_rate": 0.00031886417645858475, + "loss": 1.5101, + "step": 6617 + }, + { + "epoch": 0.6981012658227848, + "grad_norm": 0.6102940440177917, + "learning_rate": 0.00031865876848192993, + "loss": 1.4668, + "step": 6618 + }, + { + "epoch": 0.6982067510548523, + "grad_norm": 0.6679646372795105, + "learning_rate": 0.000318453408839211, + "loss": 1.4803, + "step": 6619 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.6743208765983582, + "learning_rate": 0.0003182480975534395, + "loss": 1.4728, + "step": 6620 + }, + { + "epoch": 0.6984177215189873, + "grad_norm": 0.6127949357032776, + "learning_rate": 0.0003180428346476215, + "loss": 1.483, + "step": 6621 + }, + { + "epoch": 0.6985232067510548, + "grad_norm": 0.7164561152458191, + "learning_rate": 0.0003178376201447576, + "loss": 1.5112, + "step": 6622 + }, + { + "epoch": 0.6986286919831224, + "grad_norm": 0.6399343013763428, + "learning_rate": 0.00031763245406784364, + "loss": 1.4754, + "step": 6623 + }, + { + "epoch": 0.6987341772151898, + "grad_norm": 0.7101399302482605, + "learning_rate": 0.0003174273364398691, + "loss": 1.4852, + "step": 6624 + }, + { + "epoch": 0.6988396624472574, + "grad_norm": 0.5948734283447266, + "learning_rate": 0.00031722226728381854, + "loss": 1.4997, + "step": 6625 + }, + { + "epoch": 0.6989451476793249, + "grad_norm": 0.69045090675354, + "learning_rate": 0.00031701724662267097, + "loss": 1.4625, + "step": 6626 + }, + { + "epoch": 0.6990506329113924, + "grad_norm": 0.6517089009284973, + "learning_rate": 0.00031681227447939996, + "loss": 1.4761, + "step": 6627 + }, + { + "epoch": 0.6991561181434599, + "grad_norm": 0.6122094988822937, + "learning_rate": 0.00031660735087697363, + "loss": 1.496, + "step": 6628 + }, + { + "epoch": 0.6992616033755275, + "grad_norm": 0.6285280585289001, + "learning_rate": 0.0003164024758383548, + "loss": 1.4671, + "step": 6629 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.6322600841522217, + "learning_rate": 0.00031619764938650057, + "loss": 1.4851, + "step": 6630 + }, + { + "epoch": 0.6994725738396624, + "grad_norm": 0.7142098546028137, + "learning_rate": 0.00031599287154436263, + "loss": 1.4843, + "step": 6631 + }, + { + "epoch": 0.69957805907173, + "grad_norm": 0.663355827331543, + "learning_rate": 0.0003157881423348879, + "loss": 1.4734, + "step": 6632 + }, + { + "epoch": 0.6996835443037974, + "grad_norm": 0.609727144241333, + "learning_rate": 0.00031558346178101694, + "loss": 1.4791, + "step": 6633 + }, + { + "epoch": 0.699789029535865, + "grad_norm": 0.7055918574333191, + "learning_rate": 0.00031537882990568535, + "loss": 1.504, + "step": 6634 + }, + { + "epoch": 0.6998945147679325, + "grad_norm": 0.621724009513855, + "learning_rate": 0.000315174246731823, + "loss": 1.4857, + "step": 6635 + }, + { + "epoch": 0.7, + "grad_norm": 0.6695172190666199, + "learning_rate": 0.00031496971228235464, + "loss": 1.4593, + "step": 6636 + }, + { + "epoch": 0.7001054852320675, + "grad_norm": 0.631528913974762, + "learning_rate": 0.00031476522658019916, + "loss": 1.517, + "step": 6637 + }, + { + "epoch": 0.700210970464135, + "grad_norm": 0.661890983581543, + "learning_rate": 0.0003145607896482704, + "loss": 1.5022, + "step": 6638 + }, + { + "epoch": 0.7003164556962025, + "grad_norm": 0.596538782119751, + "learning_rate": 0.00031435640150947645, + "loss": 1.4784, + "step": 6639 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.6877181529998779, + "learning_rate": 0.0003141520621867197, + "loss": 1.4695, + "step": 6640 + }, + { + "epoch": 0.7005274261603376, + "grad_norm": 0.6352446675300598, + "learning_rate": 0.00031394777170289806, + "loss": 1.4942, + "step": 6641 + }, + { + "epoch": 0.700632911392405, + "grad_norm": 0.6856161952018738, + "learning_rate": 0.00031374353008090285, + "loss": 1.4718, + "step": 6642 + }, + { + "epoch": 0.7007383966244726, + "grad_norm": 0.7697347402572632, + "learning_rate": 0.0003135393373436206, + "loss": 1.4999, + "step": 6643 + }, + { + "epoch": 0.7008438818565401, + "grad_norm": 0.6073369383811951, + "learning_rate": 0.0003133351935139319, + "loss": 1.4431, + "step": 6644 + }, + { + "epoch": 0.7009493670886076, + "grad_norm": 0.6997405290603638, + "learning_rate": 0.00031313109861471223, + "loss": 1.4508, + "step": 6645 + }, + { + "epoch": 0.7010548523206751, + "grad_norm": 0.6203168034553528, + "learning_rate": 0.0003129270526688313, + "loss": 1.4667, + "step": 6646 + }, + { + "epoch": 0.7011603375527427, + "grad_norm": 0.5980361104011536, + "learning_rate": 0.0003127230556991536, + "loss": 1.4986, + "step": 6647 + }, + { + "epoch": 0.7012658227848101, + "grad_norm": 0.6476755738258362, + "learning_rate": 0.000312519107728538, + "loss": 1.5193, + "step": 6648 + }, + { + "epoch": 0.7013713080168776, + "grad_norm": 0.6225103139877319, + "learning_rate": 0.0003123152087798376, + "loss": 1.4407, + "step": 6649 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.6006197929382324, + "learning_rate": 0.00031211135887590074, + "loss": 1.5142, + "step": 6650 + }, + { + "epoch": 0.7015822784810126, + "grad_norm": 0.6015433073043823, + "learning_rate": 0.0003119075580395697, + "loss": 1.4679, + "step": 6651 + }, + { + "epoch": 0.7016877637130802, + "grad_norm": 0.5974664092063904, + "learning_rate": 0.0003117038062936813, + "loss": 1.5025, + "step": 6652 + }, + { + "epoch": 0.7017932489451477, + "grad_norm": 0.5845752358436584, + "learning_rate": 0.0003115001036610669, + "loss": 1.4687, + "step": 6653 + }, + { + "epoch": 0.7018987341772152, + "grad_norm": 0.6576346158981323, + "learning_rate": 0.0003112964501645525, + "loss": 1.496, + "step": 6654 + }, + { + "epoch": 0.7020042194092827, + "grad_norm": 0.618492603302002, + "learning_rate": 0.0003110928458269584, + "loss": 1.4894, + "step": 6655 + }, + { + "epoch": 0.7021097046413503, + "grad_norm": 0.6354486346244812, + "learning_rate": 0.00031088929067109945, + "loss": 1.4695, + "step": 6656 + }, + { + "epoch": 0.7022151898734177, + "grad_norm": 0.640235185623169, + "learning_rate": 0.0003106857847197849, + "loss": 1.4831, + "step": 6657 + }, + { + "epoch": 0.7023206751054852, + "grad_norm": 0.6446513533592224, + "learning_rate": 0.0003104823279958191, + "loss": 1.5153, + "step": 6658 + }, + { + "epoch": 0.7024261603375528, + "grad_norm": 0.6027135252952576, + "learning_rate": 0.00031027892052200003, + "loss": 1.4799, + "step": 6659 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.6367692947387695, + "learning_rate": 0.0003100755623211205, + "loss": 1.4528, + "step": 6660 + }, + { + "epoch": 0.7026371308016878, + "grad_norm": 0.6545665860176086, + "learning_rate": 0.000309872253415968, + "loss": 1.4596, + "step": 6661 + }, + { + "epoch": 0.7027426160337553, + "grad_norm": 0.6137750148773193, + "learning_rate": 0.00030966899382932404, + "loss": 1.4888, + "step": 6662 + }, + { + "epoch": 0.7028481012658228, + "grad_norm": 0.7024329304695129, + "learning_rate": 0.0003094657835839651, + "loss": 1.4857, + "step": 6663 + }, + { + "epoch": 0.7029535864978903, + "grad_norm": 0.6364718079566956, + "learning_rate": 0.00030926262270266177, + "loss": 1.457, + "step": 6664 + }, + { + "epoch": 0.7030590717299579, + "grad_norm": 0.6050267815589905, + "learning_rate": 0.00030905951120817934, + "loss": 1.4842, + "step": 6665 + }, + { + "epoch": 0.7031645569620253, + "grad_norm": 0.6939184069633484, + "learning_rate": 0.00030885644912327713, + "loss": 1.4844, + "step": 6666 + }, + { + "epoch": 0.7032700421940928, + "grad_norm": 0.6266374588012695, + "learning_rate": 0.0003086534364707097, + "loss": 1.4801, + "step": 6667 + }, + { + "epoch": 0.7033755274261604, + "grad_norm": 0.6359589099884033, + "learning_rate": 0.00030845047327322556, + "loss": 1.4774, + "step": 6668 + }, + { + "epoch": 0.7034810126582278, + "grad_norm": 0.6128087639808655, + "learning_rate": 0.0003082475595535677, + "loss": 1.513, + "step": 6669 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.6413208246231079, + "learning_rate": 0.0003080446953344735, + "loss": 1.5049, + "step": 6670 + }, + { + "epoch": 0.7036919831223629, + "grad_norm": 0.696805477142334, + "learning_rate": 0.000307841880638675, + "loss": 1.5246, + "step": 6671 + }, + { + "epoch": 0.7037974683544304, + "grad_norm": 0.6099305748939514, + "learning_rate": 0.0003076391154888985, + "loss": 1.4798, + "step": 6672 + }, + { + "epoch": 0.7039029535864979, + "grad_norm": 0.7245467901229858, + "learning_rate": 0.000307436399907865, + "loss": 1.5046, + "step": 6673 + }, + { + "epoch": 0.7040084388185655, + "grad_norm": 0.6428633332252502, + "learning_rate": 0.00030723373391828966, + "loss": 1.5254, + "step": 6674 + }, + { + "epoch": 0.7041139240506329, + "grad_norm": 0.5806112289428711, + "learning_rate": 0.00030703111754288204, + "loss": 1.4622, + "step": 6675 + }, + { + "epoch": 0.7042194092827004, + "grad_norm": 0.6573676466941833, + "learning_rate": 0.0003068285508043467, + "loss": 1.493, + "step": 6676 + }, + { + "epoch": 0.704324894514768, + "grad_norm": 0.7361288070678711, + "learning_rate": 0.00030662603372538224, + "loss": 1.4946, + "step": 6677 + }, + { + "epoch": 0.7044303797468354, + "grad_norm": 0.5992968678474426, + "learning_rate": 0.0003064235663286815, + "loss": 1.5008, + "step": 6678 + }, + { + "epoch": 0.704535864978903, + "grad_norm": 0.6807885766029358, + "learning_rate": 0.00030622114863693205, + "loss": 1.4414, + "step": 6679 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.5852721929550171, + "learning_rate": 0.00030601878067281575, + "loss": 1.4608, + "step": 6680 + }, + { + "epoch": 0.704746835443038, + "grad_norm": 0.5996155142784119, + "learning_rate": 0.00030581646245900895, + "loss": 1.484, + "step": 6681 + }, + { + "epoch": 0.7048523206751055, + "grad_norm": 0.6971549987792969, + "learning_rate": 0.0003056141940181825, + "loss": 1.493, + "step": 6682 + }, + { + "epoch": 0.7049578059071729, + "grad_norm": 0.6669843196868896, + "learning_rate": 0.0003054119753730012, + "loss": 1.4617, + "step": 6683 + }, + { + "epoch": 0.7050632911392405, + "grad_norm": 0.6352524757385254, + "learning_rate": 0.00030520980654612527, + "loss": 1.449, + "step": 6684 + }, + { + "epoch": 0.705168776371308, + "grad_norm": 0.6605110764503479, + "learning_rate": 0.0003050076875602084, + "loss": 1.4931, + "step": 6685 + }, + { + "epoch": 0.7052742616033755, + "grad_norm": 0.6208652853965759, + "learning_rate": 0.0003048056184378991, + "loss": 1.4857, + "step": 6686 + }, + { + "epoch": 0.705379746835443, + "grad_norm": 0.6888546943664551, + "learning_rate": 0.0003046035992018402, + "loss": 1.4928, + "step": 6687 + }, + { + "epoch": 0.7054852320675106, + "grad_norm": 0.5952357053756714, + "learning_rate": 0.00030440162987466896, + "loss": 1.4586, + "step": 6688 + }, + { + "epoch": 0.705590717299578, + "grad_norm": 0.6502699255943298, + "learning_rate": 0.00030419971047901704, + "loss": 1.5183, + "step": 6689 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.7457621097564697, + "learning_rate": 0.00030399784103751044, + "loss": 1.4763, + "step": 6690 + }, + { + "epoch": 0.7058016877637131, + "grad_norm": 0.6019672155380249, + "learning_rate": 0.0003037960215727699, + "loss": 1.4832, + "step": 6691 + }, + { + "epoch": 0.7059071729957805, + "grad_norm": 0.6386765241622925, + "learning_rate": 0.0003035942521074097, + "loss": 1.4861, + "step": 6692 + }, + { + "epoch": 0.7060126582278481, + "grad_norm": 0.6399839520454407, + "learning_rate": 0.0003033925326640398, + "loss": 1.494, + "step": 6693 + }, + { + "epoch": 0.7061181434599156, + "grad_norm": 0.6631069183349609, + "learning_rate": 0.00030319086326526364, + "loss": 1.4868, + "step": 6694 + }, + { + "epoch": 0.7062236286919831, + "grad_norm": 0.6361362338066101, + "learning_rate": 0.00030298924393367923, + "loss": 1.4638, + "step": 6695 + }, + { + "epoch": 0.7063291139240506, + "grad_norm": 0.6313552856445312, + "learning_rate": 0.0003027876746918791, + "loss": 1.4914, + "step": 6696 + }, + { + "epoch": 0.7064345991561182, + "grad_norm": 0.7098756432533264, + "learning_rate": 0.00030258615556244995, + "loss": 1.4647, + "step": 6697 + }, + { + "epoch": 0.7065400843881856, + "grad_norm": 0.5900928974151611, + "learning_rate": 0.0003023846865679731, + "loss": 1.5101, + "step": 6698 + }, + { + "epoch": 0.7066455696202532, + "grad_norm": 0.7127736806869507, + "learning_rate": 0.00030218326773102407, + "loss": 1.4476, + "step": 6699 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.645200788974762, + "learning_rate": 0.000301981899074173, + "loss": 1.4765, + "step": 6700 + }, + { + "epoch": 0.7068565400843881, + "grad_norm": 0.5992993116378784, + "learning_rate": 0.00030178058061998387, + "loss": 1.5116, + "step": 6701 + }, + { + "epoch": 0.7069620253164557, + "grad_norm": 0.6801372766494751, + "learning_rate": 0.00030157931239101595, + "loss": 1.5026, + "step": 6702 + }, + { + "epoch": 0.7070675105485232, + "grad_norm": 0.6128050088882446, + "learning_rate": 0.00030137809440982207, + "loss": 1.4742, + "step": 6703 + }, + { + "epoch": 0.7071729957805907, + "grad_norm": 0.593609094619751, + "learning_rate": 0.0003011769266989498, + "loss": 1.4685, + "step": 6704 + }, + { + "epoch": 0.7072784810126582, + "grad_norm": 0.6631277799606323, + "learning_rate": 0.0003009758092809409, + "loss": 1.493, + "step": 6705 + }, + { + "epoch": 0.7073839662447258, + "grad_norm": 0.6222121715545654, + "learning_rate": 0.00030077474217833167, + "loss": 1.4649, + "step": 6706 + }, + { + "epoch": 0.7074894514767932, + "grad_norm": 0.6552010774612427, + "learning_rate": 0.0003005737254136525, + "loss": 1.4945, + "step": 6707 + }, + { + "epoch": 0.7075949367088608, + "grad_norm": 0.7184372544288635, + "learning_rate": 0.0003003727590094285, + "loss": 1.4618, + "step": 6708 + }, + { + "epoch": 0.7077004219409283, + "grad_norm": 0.6510214805603027, + "learning_rate": 0.00030017184298817873, + "loss": 1.4919, + "step": 6709 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.6473414897918701, + "learning_rate": 0.0002999709773724171, + "loss": 1.4635, + "step": 6710 + }, + { + "epoch": 0.7079113924050633, + "grad_norm": 0.5953055620193481, + "learning_rate": 0.00029977016218465154, + "loss": 1.4561, + "step": 6711 + }, + { + "epoch": 0.7080168776371308, + "grad_norm": 0.7007343769073486, + "learning_rate": 0.0002995693974473844, + "loss": 1.4828, + "step": 6712 + }, + { + "epoch": 0.7081223628691983, + "grad_norm": 0.6411535143852234, + "learning_rate": 0.00029936868318311235, + "loss": 1.4752, + "step": 6713 + }, + { + "epoch": 0.7082278481012658, + "grad_norm": 0.6134354472160339, + "learning_rate": 0.00029916801941432637, + "loss": 1.4812, + "step": 6714 + }, + { + "epoch": 0.7083333333333334, + "grad_norm": 0.8667522072792053, + "learning_rate": 0.00029896740616351187, + "loss": 1.4298, + "step": 6715 + }, + { + "epoch": 0.7084388185654008, + "grad_norm": 0.6500056385993958, + "learning_rate": 0.00029876684345314853, + "loss": 1.4544, + "step": 6716 + }, + { + "epoch": 0.7085443037974684, + "grad_norm": 0.9441438913345337, + "learning_rate": 0.00029856633130571046, + "loss": 1.5171, + "step": 6717 + }, + { + "epoch": 0.7086497890295359, + "grad_norm": 0.8290315270423889, + "learning_rate": 0.00029836586974366574, + "loss": 1.4551, + "step": 6718 + }, + { + "epoch": 0.7087552742616033, + "grad_norm": 0.804794192314148, + "learning_rate": 0.00029816545878947763, + "loss": 1.4827, + "step": 6719 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.8443757891654968, + "learning_rate": 0.00029796509846560294, + "loss": 1.5065, + "step": 6720 + }, + { + "epoch": 0.7089662447257384, + "grad_norm": 0.6275032162666321, + "learning_rate": 0.00029776478879449305, + "loss": 1.4751, + "step": 6721 + }, + { + "epoch": 0.7090717299578059, + "grad_norm": 0.7605516314506531, + "learning_rate": 0.0002975645297985935, + "loss": 1.508, + "step": 6722 + }, + { + "epoch": 0.7091772151898734, + "grad_norm": 0.5846639275550842, + "learning_rate": 0.0002973643215003445, + "loss": 1.4838, + "step": 6723 + }, + { + "epoch": 0.709282700421941, + "grad_norm": 0.6764972805976868, + "learning_rate": 0.0002971641639221804, + "loss": 1.4756, + "step": 6724 + }, + { + "epoch": 0.7093881856540084, + "grad_norm": 0.7082634568214417, + "learning_rate": 0.00029696405708652966, + "loss": 1.4743, + "step": 6725 + }, + { + "epoch": 0.709493670886076, + "grad_norm": 0.607567548751831, + "learning_rate": 0.00029676400101581545, + "loss": 1.4843, + "step": 6726 + }, + { + "epoch": 0.7095991561181435, + "grad_norm": 0.708551287651062, + "learning_rate": 0.0002965639957324546, + "loss": 1.4795, + "step": 6727 + }, + { + "epoch": 0.7097046413502109, + "grad_norm": 0.6592543721199036, + "learning_rate": 0.00029636404125885936, + "loss": 1.5091, + "step": 6728 + }, + { + "epoch": 0.7098101265822785, + "grad_norm": 0.6390239596366882, + "learning_rate": 0.00029616413761743537, + "loss": 1.4787, + "step": 6729 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.6610618829727173, + "learning_rate": 0.0002959642848305828, + "loss": 1.4631, + "step": 6730 + }, + { + "epoch": 0.7100210970464135, + "grad_norm": 0.713784396648407, + "learning_rate": 0.0002957644829206961, + "loss": 1.4968, + "step": 6731 + }, + { + "epoch": 0.710126582278481, + "grad_norm": 0.6819741129875183, + "learning_rate": 0.0002955647319101641, + "loss": 1.4801, + "step": 6732 + }, + { + "epoch": 0.7102320675105486, + "grad_norm": 0.6286699771881104, + "learning_rate": 0.00029536503182137, + "loss": 1.4913, + "step": 6733 + }, + { + "epoch": 0.710337552742616, + "grad_norm": 0.7873333692550659, + "learning_rate": 0.00029516538267669096, + "loss": 1.4621, + "step": 6734 + }, + { + "epoch": 0.7104430379746836, + "grad_norm": 0.6512568593025208, + "learning_rate": 0.00029496578449849867, + "loss": 1.4798, + "step": 6735 + }, + { + "epoch": 0.7105485232067511, + "grad_norm": 0.7757531404495239, + "learning_rate": 0.00029476623730915943, + "loss": 1.5009, + "step": 6736 + }, + { + "epoch": 0.7106540084388185, + "grad_norm": 0.7617517113685608, + "learning_rate": 0.00029456674113103335, + "loss": 1.4629, + "step": 6737 + }, + { + "epoch": 0.7107594936708861, + "grad_norm": 0.6013985872268677, + "learning_rate": 0.00029436729598647483, + "loss": 1.497, + "step": 6738 + }, + { + "epoch": 0.7108649789029536, + "grad_norm": 0.7970690727233887, + "learning_rate": 0.00029416790189783286, + "loss": 1.5229, + "step": 6739 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.6851124167442322, + "learning_rate": 0.00029396855888745045, + "loss": 1.4702, + "step": 6740 + }, + { + "epoch": 0.7110759493670886, + "grad_norm": 0.8148096203804016, + "learning_rate": 0.00029376926697766495, + "loss": 1.4819, + "step": 6741 + }, + { + "epoch": 0.7111814345991562, + "grad_norm": 0.6614763140678406, + "learning_rate": 0.00029357002619080814, + "loss": 1.4836, + "step": 6742 + }, + { + "epoch": 0.7112869198312236, + "grad_norm": 0.6661633253097534, + "learning_rate": 0.0002933708365492058, + "loss": 1.4664, + "step": 6743 + }, + { + "epoch": 0.7113924050632912, + "grad_norm": 0.7517089247703552, + "learning_rate": 0.00029317169807517785, + "loss": 1.4582, + "step": 6744 + }, + { + "epoch": 0.7114978902953587, + "grad_norm": 0.612013578414917, + "learning_rate": 0.00029297261079103945, + "loss": 1.516, + "step": 6745 + }, + { + "epoch": 0.7116033755274261, + "grad_norm": 0.6834280490875244, + "learning_rate": 0.000292773574719099, + "loss": 1.5039, + "step": 6746 + }, + { + "epoch": 0.7117088607594937, + "grad_norm": 0.6683686971664429, + "learning_rate": 0.0002925745898816594, + "loss": 1.4871, + "step": 6747 + }, + { + "epoch": 0.7118143459915611, + "grad_norm": 0.6270830035209656, + "learning_rate": 0.0002923756563010179, + "loss": 1.4804, + "step": 6748 + }, + { + "epoch": 0.7119198312236287, + "grad_norm": 0.7015135884284973, + "learning_rate": 0.000292176773999466, + "loss": 1.5207, + "step": 6749 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.7252771854400635, + "learning_rate": 0.0002919779429992895, + "loss": 1.4776, + "step": 6750 + }, + { + "epoch": 0.7121308016877637, + "grad_norm": 0.6108191609382629, + "learning_rate": 0.0002917791633227685, + "loss": 1.4565, + "step": 6751 + }, + { + "epoch": 0.7122362869198312, + "grad_norm": 0.870297908782959, + "learning_rate": 0.000291580434992177, + "loss": 1.4765, + "step": 6752 + }, + { + "epoch": 0.7123417721518988, + "grad_norm": 0.6997548937797546, + "learning_rate": 0.00029138175802978343, + "loss": 1.4706, + "step": 6753 + }, + { + "epoch": 0.7124472573839662, + "grad_norm": 0.6347237229347229, + "learning_rate": 0.00029118313245785104, + "loss": 1.532, + "step": 6754 + }, + { + "epoch": 0.7125527426160337, + "grad_norm": 0.9087778329849243, + "learning_rate": 0.00029098455829863653, + "loss": 1.485, + "step": 6755 + }, + { + "epoch": 0.7126582278481013, + "grad_norm": 0.6803264617919922, + "learning_rate": 0.0002907860355743911, + "loss": 1.4569, + "step": 6756 + }, + { + "epoch": 0.7127637130801687, + "grad_norm": 0.8506993651390076, + "learning_rate": 0.00029058756430736025, + "loss": 1.4951, + "step": 6757 + }, + { + "epoch": 0.7128691983122363, + "grad_norm": 0.827897310256958, + "learning_rate": 0.0002903891445197836, + "loss": 1.501, + "step": 6758 + }, + { + "epoch": 0.7129746835443038, + "grad_norm": 0.7119211554527283, + "learning_rate": 0.0002901907762338952, + "loss": 1.4877, + "step": 6759 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.8596027493476868, + "learning_rate": 0.0002899924594719231, + "loss": 1.4779, + "step": 6760 + }, + { + "epoch": 0.7131856540084388, + "grad_norm": 0.6551046967506409, + "learning_rate": 0.0002897941942560894, + "loss": 1.4999, + "step": 6761 + }, + { + "epoch": 0.7132911392405064, + "grad_norm": 0.8308840990066528, + "learning_rate": 0.0002895959806086114, + "loss": 1.4704, + "step": 6762 + }, + { + "epoch": 0.7133966244725738, + "grad_norm": 0.7625182867050171, + "learning_rate": 0.0002893978185516995, + "loss": 1.4967, + "step": 6763 + }, + { + "epoch": 0.7135021097046413, + "grad_norm": 0.6729909777641296, + "learning_rate": 0.00028919970810755883, + "loss": 1.4896, + "step": 6764 + }, + { + "epoch": 0.7136075949367089, + "grad_norm": 0.7617590427398682, + "learning_rate": 0.0002890016492983886, + "loss": 1.5097, + "step": 6765 + }, + { + "epoch": 0.7137130801687763, + "grad_norm": 0.6877893805503845, + "learning_rate": 0.0002888036421463823, + "loss": 1.4461, + "step": 6766 + }, + { + "epoch": 0.7138185654008439, + "grad_norm": 0.6855480074882507, + "learning_rate": 0.0002886056866737277, + "loss": 1.4754, + "step": 6767 + }, + { + "epoch": 0.7139240506329114, + "grad_norm": 0.6992387771606445, + "learning_rate": 0.0002884077829026066, + "loss": 1.4885, + "step": 6768 + }, + { + "epoch": 0.7140295358649789, + "grad_norm": 0.6105559468269348, + "learning_rate": 0.0002882099308551951, + "loss": 1.5056, + "step": 6769 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.7032904624938965, + "learning_rate": 0.00028801213055366335, + "loss": 1.4888, + "step": 6770 + }, + { + "epoch": 0.714240506329114, + "grad_norm": 0.7023160457611084, + "learning_rate": 0.00028781438202017613, + "loss": 1.535, + "step": 6771 + }, + { + "epoch": 0.7143459915611814, + "grad_norm": 0.687073290348053, + "learning_rate": 0.0002876166852768923, + "loss": 1.486, + "step": 6772 + }, + { + "epoch": 0.7144514767932489, + "grad_norm": 0.6702784299850464, + "learning_rate": 0.0002874190403459644, + "loss": 1.5059, + "step": 6773 + }, + { + "epoch": 0.7145569620253165, + "grad_norm": 0.5817635655403137, + "learning_rate": 0.0002872214472495397, + "loss": 1.442, + "step": 6774 + }, + { + "epoch": 0.7146624472573839, + "grad_norm": 0.6549996137619019, + "learning_rate": 0.00028702390600975937, + "loss": 1.4646, + "step": 6775 + }, + { + "epoch": 0.7147679324894515, + "grad_norm": 0.6191824674606323, + "learning_rate": 0.0002868264166487591, + "loss": 1.449, + "step": 6776 + }, + { + "epoch": 0.714873417721519, + "grad_norm": 0.6203476190567017, + "learning_rate": 0.0002866289791886684, + "loss": 1.4775, + "step": 6777 + }, + { + "epoch": 0.7149789029535865, + "grad_norm": 0.5541258454322815, + "learning_rate": 0.00028643159365161113, + "loss": 1.4926, + "step": 6778 + }, + { + "epoch": 0.715084388185654, + "grad_norm": 0.6250616312026978, + "learning_rate": 0.00028623426005970517, + "loss": 1.478, + "step": 6779 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5676767230033875, + "learning_rate": 0.00028603697843506315, + "loss": 1.4899, + "step": 6780 + }, + { + "epoch": 0.715295358649789, + "grad_norm": 0.5577430129051208, + "learning_rate": 0.00028583974879979113, + "loss": 1.519, + "step": 6781 + }, + { + "epoch": 0.7154008438818565, + "grad_norm": 0.6238291263580322, + "learning_rate": 0.00028564257117598993, + "loss": 1.4788, + "step": 6782 + }, + { + "epoch": 0.7155063291139241, + "grad_norm": 0.6358502507209778, + "learning_rate": 0.00028544544558575395, + "loss": 1.4645, + "step": 6783 + }, + { + "epoch": 0.7156118143459915, + "grad_norm": 0.6575526595115662, + "learning_rate": 0.0002852483720511724, + "loss": 1.5205, + "step": 6784 + }, + { + "epoch": 0.7157172995780591, + "grad_norm": 0.6154798269271851, + "learning_rate": 0.0002850513505943281, + "loss": 1.5095, + "step": 6785 + }, + { + "epoch": 0.7158227848101266, + "grad_norm": 0.657428503036499, + "learning_rate": 0.0002848543812372986, + "loss": 1.4467, + "step": 6786 + }, + { + "epoch": 0.7159282700421941, + "grad_norm": 0.7002379298210144, + "learning_rate": 0.00028465746400215463, + "loss": 1.4627, + "step": 6787 + }, + { + "epoch": 0.7160337552742616, + "grad_norm": 0.7183627486228943, + "learning_rate": 0.00028446059891096265, + "loss": 1.4899, + "step": 6788 + }, + { + "epoch": 0.7161392405063292, + "grad_norm": 0.6694715023040771, + "learning_rate": 0.00028426378598578187, + "loss": 1.4665, + "step": 6789 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.6167154312133789, + "learning_rate": 0.0002840670252486662, + "loss": 1.5301, + "step": 6790 + }, + { + "epoch": 0.7163502109704641, + "grad_norm": 0.6493135690689087, + "learning_rate": 0.00028387031672166385, + "loss": 1.4785, + "step": 6791 + }, + { + "epoch": 0.7164556962025317, + "grad_norm": 0.6849386096000671, + "learning_rate": 0.0002836736604268167, + "loss": 1.4851, + "step": 6792 + }, + { + "epoch": 0.7165611814345991, + "grad_norm": 0.6137574911117554, + "learning_rate": 0.0002834770563861613, + "loss": 1.488, + "step": 6793 + }, + { + "epoch": 0.7166666666666667, + "grad_norm": 0.6694591045379639, + "learning_rate": 0.000283280504621728, + "loss": 1.4744, + "step": 6794 + }, + { + "epoch": 0.7167721518987342, + "grad_norm": 0.617026686668396, + "learning_rate": 0.0002830840051555414, + "loss": 1.4764, + "step": 6795 + }, + { + "epoch": 0.7168776371308017, + "grad_norm": 0.5778217315673828, + "learning_rate": 0.00028288755800962, + "loss": 1.432, + "step": 6796 + }, + { + "epoch": 0.7169831223628692, + "grad_norm": 0.6883179545402527, + "learning_rate": 0.00028269116320597733, + "loss": 1.4733, + "step": 6797 + }, + { + "epoch": 0.7170886075949368, + "grad_norm": 0.6286837458610535, + "learning_rate": 0.0002824948207666199, + "loss": 1.486, + "step": 6798 + }, + { + "epoch": 0.7171940928270042, + "grad_norm": 0.6355106234550476, + "learning_rate": 0.0002822985307135491, + "loss": 1.4992, + "step": 6799 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.6579865217208862, + "learning_rate": 0.00028210229306876, + "loss": 1.4454, + "step": 6800 + }, + { + "epoch": 0.7174050632911393, + "grad_norm": 0.632014274597168, + "learning_rate": 0.0002819061078542422, + "loss": 1.5074, + "step": 6801 + }, + { + "epoch": 0.7175105485232067, + "grad_norm": 0.6526886224746704, + "learning_rate": 0.0002817099750919791, + "loss": 1.4671, + "step": 6802 + }, + { + "epoch": 0.7176160337552743, + "grad_norm": 0.6124199032783508, + "learning_rate": 0.0002815138948039485, + "loss": 1.4934, + "step": 6803 + }, + { + "epoch": 0.7177215189873418, + "grad_norm": 0.6536011099815369, + "learning_rate": 0.000281317867012122, + "loss": 1.4406, + "step": 6804 + }, + { + "epoch": 0.7178270042194093, + "grad_norm": 0.7143521308898926, + "learning_rate": 0.0002811218917384652, + "loss": 1.5084, + "step": 6805 + }, + { + "epoch": 0.7179324894514768, + "grad_norm": 0.6232073903083801, + "learning_rate": 0.00028092596900493885, + "loss": 1.4882, + "step": 6806 + }, + { + "epoch": 0.7180379746835444, + "grad_norm": 0.7441821694374084, + "learning_rate": 0.00028073009883349665, + "loss": 1.4619, + "step": 6807 + }, + { + "epoch": 0.7181434599156118, + "grad_norm": 0.6705540418624878, + "learning_rate": 0.00028053428124608684, + "loss": 1.5065, + "step": 6808 + }, + { + "epoch": 0.7182489451476793, + "grad_norm": 0.5967981219291687, + "learning_rate": 0.0002803385162646518, + "loss": 1.4881, + "step": 6809 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.7754794955253601, + "learning_rate": 0.0002801428039111279, + "loss": 1.4099, + "step": 6810 + }, + { + "epoch": 0.7184599156118143, + "grad_norm": 0.6407755613327026, + "learning_rate": 0.0002799471442074459, + "loss": 1.5004, + "step": 6811 + }, + { + "epoch": 0.7185654008438819, + "grad_norm": 0.6518954634666443, + "learning_rate": 0.00027975153717553014, + "loss": 1.4979, + "step": 6812 + }, + { + "epoch": 0.7186708860759494, + "grad_norm": 0.7610940337181091, + "learning_rate": 0.00027955598283729936, + "loss": 1.4878, + "step": 6813 + }, + { + "epoch": 0.7187763713080169, + "grad_norm": 0.6187735199928284, + "learning_rate": 0.00027936048121466673, + "loss": 1.4572, + "step": 6814 + }, + { + "epoch": 0.7188818565400844, + "grad_norm": 0.6515400409698486, + "learning_rate": 0.00027916503232953895, + "loss": 1.4927, + "step": 6815 + }, + { + "epoch": 0.7189873417721518, + "grad_norm": 0.6174860596656799, + "learning_rate": 0.0002789696362038172, + "loss": 1.476, + "step": 6816 + }, + { + "epoch": 0.7190928270042194, + "grad_norm": 0.6455647349357605, + "learning_rate": 0.0002787742928593965, + "loss": 1.4764, + "step": 6817 + }, + { + "epoch": 0.7191983122362869, + "grad_norm": 0.5940806865692139, + "learning_rate": 0.00027857900231816594, + "loss": 1.461, + "step": 6818 + }, + { + "epoch": 0.7193037974683544, + "grad_norm": 0.6624639630317688, + "learning_rate": 0.0002783837646020089, + "loss": 1.4743, + "step": 6819 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.6376464366912842, + "learning_rate": 0.00027818857973280274, + "loss": 1.4796, + "step": 6820 + }, + { + "epoch": 0.7195147679324895, + "grad_norm": 0.5961040258407593, + "learning_rate": 0.0002779934477324189, + "loss": 1.4861, + "step": 6821 + }, + { + "epoch": 0.7196202531645569, + "grad_norm": 0.6341186165809631, + "learning_rate": 0.0002777983686227226, + "loss": 1.5277, + "step": 6822 + }, + { + "epoch": 0.7197257383966245, + "grad_norm": 0.6294660568237305, + "learning_rate": 0.00027760334242557397, + "loss": 1.4305, + "step": 6823 + }, + { + "epoch": 0.719831223628692, + "grad_norm": 0.5691649913787842, + "learning_rate": 0.00027740836916282643, + "loss": 1.481, + "step": 6824 + }, + { + "epoch": 0.7199367088607594, + "grad_norm": 0.594916582107544, + "learning_rate": 0.00027721344885632765, + "loss": 1.4791, + "step": 6825 + }, + { + "epoch": 0.720042194092827, + "grad_norm": 0.6993409991264343, + "learning_rate": 0.0002770185815279195, + "loss": 1.4525, + "step": 6826 + }, + { + "epoch": 0.7201476793248945, + "grad_norm": 0.6090739369392395, + "learning_rate": 0.0002768237671994377, + "loss": 1.5108, + "step": 6827 + }, + { + "epoch": 0.720253164556962, + "grad_norm": 0.6955700516700745, + "learning_rate": 0.0002766290058927123, + "loss": 1.4747, + "step": 6828 + }, + { + "epoch": 0.7203586497890295, + "grad_norm": 0.6510682106018066, + "learning_rate": 0.0002764342976295673, + "loss": 1.4603, + "step": 6829 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.7263311147689819, + "learning_rate": 0.0002762396424318206, + "loss": 1.4711, + "step": 6830 + }, + { + "epoch": 0.7205696202531645, + "grad_norm": 0.6347284317016602, + "learning_rate": 0.000276045040321284, + "loss": 1.4871, + "step": 6831 + }, + { + "epoch": 0.7206751054852321, + "grad_norm": 0.6984785795211792, + "learning_rate": 0.0002758504913197644, + "loss": 1.4521, + "step": 6832 + }, + { + "epoch": 0.7207805907172996, + "grad_norm": 0.7035748362541199, + "learning_rate": 0.0002756559954490615, + "loss": 1.4715, + "step": 6833 + }, + { + "epoch": 0.720886075949367, + "grad_norm": 0.6172999143600464, + "learning_rate": 0.0002754615527309696, + "loss": 1.4613, + "step": 6834 + }, + { + "epoch": 0.7209915611814346, + "grad_norm": 0.6507542729377747, + "learning_rate": 0.000275267163187277, + "loss": 1.4812, + "step": 6835 + }, + { + "epoch": 0.7210970464135021, + "grad_norm": 0.6638917326927185, + "learning_rate": 0.00027507282683976594, + "loss": 1.4629, + "step": 6836 + }, + { + "epoch": 0.7212025316455696, + "grad_norm": 0.6298009753227234, + "learning_rate": 0.0002748785437102129, + "loss": 1.4687, + "step": 6837 + }, + { + "epoch": 0.7213080168776371, + "grad_norm": 0.608578622341156, + "learning_rate": 0.00027468431382038816, + "loss": 1.4886, + "step": 6838 + }, + { + "epoch": 0.7214135021097047, + "grad_norm": 0.6174270510673523, + "learning_rate": 0.00027449013719205623, + "loss": 1.4539, + "step": 6839 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.6574415564537048, + "learning_rate": 0.00027429601384697526, + "loss": 1.4551, + "step": 6840 + }, + { + "epoch": 0.7216244725738397, + "grad_norm": 0.5681121945381165, + "learning_rate": 0.00027410194380689826, + "loss": 1.4642, + "step": 6841 + }, + { + "epoch": 0.7217299578059072, + "grad_norm": 0.7481334805488586, + "learning_rate": 0.00027390792709357155, + "loss": 1.4827, + "step": 6842 + }, + { + "epoch": 0.7218354430379746, + "grad_norm": 0.6577998399734497, + "learning_rate": 0.00027371396372873557, + "loss": 1.4658, + "step": 6843 + }, + { + "epoch": 0.7219409282700422, + "grad_norm": 0.696155846118927, + "learning_rate": 0.00027352005373412487, + "loss": 1.5014, + "step": 6844 + }, + { + "epoch": 0.7220464135021097, + "grad_norm": 0.691301703453064, + "learning_rate": 0.00027332619713146816, + "loss": 1.4848, + "step": 6845 + }, + { + "epoch": 0.7221518987341772, + "grad_norm": 0.7134788036346436, + "learning_rate": 0.000273132393942488, + "loss": 1.4947, + "step": 6846 + }, + { + "epoch": 0.7222573839662447, + "grad_norm": 0.6418678164482117, + "learning_rate": 0.000272938644188901, + "loss": 1.4886, + "step": 6847 + }, + { + "epoch": 0.7223628691983123, + "grad_norm": 0.6262224316596985, + "learning_rate": 0.00027274494789241766, + "loss": 1.4513, + "step": 6848 + }, + { + "epoch": 0.7224683544303797, + "grad_norm": 0.6698778867721558, + "learning_rate": 0.00027255130507474276, + "loss": 1.4909, + "step": 6849 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.592710554599762, + "learning_rate": 0.00027235771575757466, + "loss": 1.4536, + "step": 6850 + }, + { + "epoch": 0.7226793248945148, + "grad_norm": 0.7510206699371338, + "learning_rate": 0.00027216417996260654, + "loss": 1.4993, + "step": 6851 + }, + { + "epoch": 0.7227848101265822, + "grad_norm": 0.6584784984588623, + "learning_rate": 0.00027197069771152464, + "loss": 1.4928, + "step": 6852 + }, + { + "epoch": 0.7228902953586498, + "grad_norm": 0.7745577692985535, + "learning_rate": 0.0002717772690260098, + "loss": 1.4659, + "step": 6853 + }, + { + "epoch": 0.7229957805907173, + "grad_norm": 0.7367361783981323, + "learning_rate": 0.0002715838939277366, + "loss": 1.4856, + "step": 6854 + }, + { + "epoch": 0.7231012658227848, + "grad_norm": 0.6531661748886108, + "learning_rate": 0.0002713905724383737, + "loss": 1.4364, + "step": 6855 + }, + { + "epoch": 0.7232067510548523, + "grad_norm": 0.8006741404533386, + "learning_rate": 0.00027119730457958376, + "loss": 1.4641, + "step": 6856 + }, + { + "epoch": 0.7233122362869199, + "grad_norm": 0.7146099209785461, + "learning_rate": 0.0002710040903730233, + "loss": 1.4374, + "step": 6857 + }, + { + "epoch": 0.7234177215189873, + "grad_norm": 0.6580263376235962, + "learning_rate": 0.00027081092984034303, + "loss": 1.5094, + "step": 6858 + }, + { + "epoch": 0.7235232067510549, + "grad_norm": 0.778014600276947, + "learning_rate": 0.00027061782300318726, + "loss": 1.5093, + "step": 6859 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.7080479264259338, + "learning_rate": 0.0002704247698831951, + "loss": 1.4621, + "step": 6860 + }, + { + "epoch": 0.7237341772151898, + "grad_norm": 0.6819420456886292, + "learning_rate": 0.00027023177050199885, + "loss": 1.4414, + "step": 6861 + }, + { + "epoch": 0.7238396624472574, + "grad_norm": 0.7061856985092163, + "learning_rate": 0.00027003882488122507, + "loss": 1.5107, + "step": 6862 + }, + { + "epoch": 0.7239451476793249, + "grad_norm": 0.6030439734458923, + "learning_rate": 0.0002698459330424942, + "loss": 1.489, + "step": 6863 + }, + { + "epoch": 0.7240506329113924, + "grad_norm": 0.7218970060348511, + "learning_rate": 0.0002696530950074208, + "loss": 1.4925, + "step": 6864 + }, + { + "epoch": 0.7241561181434599, + "grad_norm": 0.724956750869751, + "learning_rate": 0.00026946031079761346, + "loss": 1.469, + "step": 6865 + }, + { + "epoch": 0.7242616033755275, + "grad_norm": 0.6169704794883728, + "learning_rate": 0.00026926758043467435, + "loss": 1.4704, + "step": 6866 + }, + { + "epoch": 0.7243670886075949, + "grad_norm": 0.728151261806488, + "learning_rate": 0.00026907490394020004, + "loss": 1.4614, + "step": 6867 + }, + { + "epoch": 0.7244725738396625, + "grad_norm": 0.6332054138183594, + "learning_rate": 0.00026888228133578086, + "loss": 1.4676, + "step": 6868 + }, + { + "epoch": 0.72457805907173, + "grad_norm": 0.7505289316177368, + "learning_rate": 0.0002686897126430009, + "loss": 1.4514, + "step": 6869 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.6315748691558838, + "learning_rate": 0.0002684971978834389, + "loss": 1.4545, + "step": 6870 + }, + { + "epoch": 0.724789029535865, + "grad_norm": 0.6342616081237793, + "learning_rate": 0.00026830473707866684, + "loss": 1.4785, + "step": 6871 + }, + { + "epoch": 0.7248945147679325, + "grad_norm": 0.7028712034225464, + "learning_rate": 0.00026811233025025096, + "loss": 1.4886, + "step": 6872 + }, + { + "epoch": 0.725, + "grad_norm": 0.6465057134628296, + "learning_rate": 0.00026791997741975134, + "loss": 1.4765, + "step": 6873 + }, + { + "epoch": 0.7251054852320675, + "grad_norm": 0.6432597041130066, + "learning_rate": 0.00026772767860872216, + "loss": 1.4857, + "step": 6874 + }, + { + "epoch": 0.7252109704641351, + "grad_norm": 0.8705135583877563, + "learning_rate": 0.00026753543383871143, + "loss": 1.4727, + "step": 6875 + }, + { + "epoch": 0.7253164556962025, + "grad_norm": 0.5921520590782166, + "learning_rate": 0.0002673432431312611, + "loss": 1.469, + "step": 6876 + }, + { + "epoch": 0.7254219409282701, + "grad_norm": 0.8054582476615906, + "learning_rate": 0.0002671511065079071, + "loss": 1.4676, + "step": 6877 + }, + { + "epoch": 0.7255274261603376, + "grad_norm": 0.8171985745429993, + "learning_rate": 0.00026695902399017935, + "loss": 1.472, + "step": 6878 + }, + { + "epoch": 0.725632911392405, + "grad_norm": 0.7532335519790649, + "learning_rate": 0.00026676699559960145, + "loss": 1.5117, + "step": 6879 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.865423321723938, + "learning_rate": 0.0002665750213576914, + "loss": 1.4772, + "step": 6880 + }, + { + "epoch": 0.72584388185654, + "grad_norm": 0.6057584881782532, + "learning_rate": 0.0002663831012859609, + "loss": 1.4704, + "step": 6881 + }, + { + "epoch": 0.7259493670886076, + "grad_norm": 0.7773483395576477, + "learning_rate": 0.0002661912354059154, + "loss": 1.471, + "step": 6882 + }, + { + "epoch": 0.7260548523206751, + "grad_norm": 0.8411487340927124, + "learning_rate": 0.0002659994237390545, + "loss": 1.4772, + "step": 6883 + }, + { + "epoch": 0.7261603375527426, + "grad_norm": 0.6501556634902954, + "learning_rate": 0.0002658076663068715, + "loss": 1.4666, + "step": 6884 + }, + { + "epoch": 0.7262658227848101, + "grad_norm": 0.8074085116386414, + "learning_rate": 0.00026561596313085396, + "loss": 1.4966, + "step": 6885 + }, + { + "epoch": 0.7263713080168777, + "grad_norm": 0.6870420575141907, + "learning_rate": 0.00026542431423248313, + "loss": 1.4423, + "step": 6886 + }, + { + "epoch": 0.7264767932489451, + "grad_norm": 0.8247681856155396, + "learning_rate": 0.00026523271963323414, + "loss": 1.4749, + "step": 6887 + }, + { + "epoch": 0.7265822784810126, + "grad_norm": 0.7302783727645874, + "learning_rate": 0.0002650411793545763, + "loss": 1.4845, + "step": 6888 + }, + { + "epoch": 0.7266877637130802, + "grad_norm": 0.7367923855781555, + "learning_rate": 0.00026484969341797224, + "loss": 1.46, + "step": 6889 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.7101086378097534, + "learning_rate": 0.0002646582618448794, + "loss": 1.4464, + "step": 6890 + }, + { + "epoch": 0.7268987341772152, + "grad_norm": 0.6788510084152222, + "learning_rate": 0.00026446688465674845, + "loss": 1.4881, + "step": 6891 + }, + { + "epoch": 0.7270042194092827, + "grad_norm": 0.7547821402549744, + "learning_rate": 0.0002642755618750242, + "loss": 1.5004, + "step": 6892 + }, + { + "epoch": 0.7271097046413502, + "grad_norm": 0.7422406077384949, + "learning_rate": 0.0002640842935211453, + "loss": 1.4862, + "step": 6893 + }, + { + "epoch": 0.7272151898734177, + "grad_norm": 0.7057321071624756, + "learning_rate": 0.0002638930796165443, + "loss": 1.4512, + "step": 6894 + }, + { + "epoch": 0.7273206751054853, + "grad_norm": 0.7019047141075134, + "learning_rate": 0.00026370192018264766, + "loss": 1.4421, + "step": 6895 + }, + { + "epoch": 0.7274261603375527, + "grad_norm": 0.8008298873901367, + "learning_rate": 0.00026351081524087573, + "loss": 1.4701, + "step": 6896 + }, + { + "epoch": 0.7275316455696202, + "grad_norm": 0.7379269599914551, + "learning_rate": 0.0002633197648126429, + "loss": 1.4816, + "step": 6897 + }, + { + "epoch": 0.7276371308016878, + "grad_norm": 0.7438326478004456, + "learning_rate": 0.0002631287689193571, + "loss": 1.4794, + "step": 6898 + }, + { + "epoch": 0.7277426160337552, + "grad_norm": 0.7301898002624512, + "learning_rate": 0.0002629378275824204, + "loss": 1.4633, + "step": 6899 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.7659211754798889, + "learning_rate": 0.00026274694082322896, + "loss": 1.4657, + "step": 6900 + }, + { + "epoch": 0.7279535864978903, + "grad_norm": 0.7212939262390137, + "learning_rate": 0.00026255610866317253, + "loss": 1.5001, + "step": 6901 + }, + { + "epoch": 0.7280590717299578, + "grad_norm": 0.6961798667907715, + "learning_rate": 0.0002623653311236347, + "loss": 1.4848, + "step": 6902 + }, + { + "epoch": 0.7281645569620253, + "grad_norm": 0.7861114144325256, + "learning_rate": 0.0002621746082259931, + "loss": 1.5068, + "step": 6903 + }, + { + "epoch": 0.7282700421940929, + "grad_norm": 0.6457453966140747, + "learning_rate": 0.0002619839399916192, + "loss": 1.4722, + "step": 6904 + }, + { + "epoch": 0.7283755274261603, + "grad_norm": 0.7459200024604797, + "learning_rate": 0.0002617933264418782, + "loss": 1.475, + "step": 6905 + }, + { + "epoch": 0.7284810126582278, + "grad_norm": 0.7256425619125366, + "learning_rate": 0.00026160276759812953, + "loss": 1.5055, + "step": 6906 + }, + { + "epoch": 0.7285864978902954, + "grad_norm": 0.5939785838127136, + "learning_rate": 0.00026141226348172595, + "loss": 1.4992, + "step": 6907 + }, + { + "epoch": 0.7286919831223628, + "grad_norm": 0.7724087238311768, + "learning_rate": 0.00026122181411401444, + "loss": 1.4935, + "step": 6908 + }, + { + "epoch": 0.7287974683544304, + "grad_norm": 0.6558492183685303, + "learning_rate": 0.00026103141951633617, + "loss": 1.5445, + "step": 6909 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.6333829164505005, + "learning_rate": 0.0002608410797100255, + "loss": 1.4837, + "step": 6910 + }, + { + "epoch": 0.7290084388185654, + "grad_norm": 0.634476900100708, + "learning_rate": 0.000260650794716411, + "loss": 1.4906, + "step": 6911 + }, + { + "epoch": 0.7291139240506329, + "grad_norm": 0.5974572896957397, + "learning_rate": 0.00026046056455681515, + "loss": 1.449, + "step": 6912 + }, + { + "epoch": 0.7292194092827005, + "grad_norm": 0.6428657174110413, + "learning_rate": 0.00026027038925255407, + "loss": 1.4854, + "step": 6913 + }, + { + "epoch": 0.7293248945147679, + "grad_norm": 0.7589754462242126, + "learning_rate": 0.00026008026882493783, + "loss": 1.459, + "step": 6914 + }, + { + "epoch": 0.7294303797468354, + "grad_norm": 0.6432167887687683, + "learning_rate": 0.00025989020329527057, + "loss": 1.4827, + "step": 6915 + }, + { + "epoch": 0.729535864978903, + "grad_norm": 0.7297220826148987, + "learning_rate": 0.0002597001926848498, + "loss": 1.4711, + "step": 6916 + }, + { + "epoch": 0.7296413502109704, + "grad_norm": 0.6319867372512817, + "learning_rate": 0.00025951023701496713, + "loss": 1.4892, + "step": 6917 + }, + { + "epoch": 0.729746835443038, + "grad_norm": 0.5953535437583923, + "learning_rate": 0.0002593203363069084, + "loss": 1.4847, + "step": 6918 + }, + { + "epoch": 0.7298523206751055, + "grad_norm": 0.6564424633979797, + "learning_rate": 0.00025913049058195277, + "loss": 1.4733, + "step": 6919 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.6604548096656799, + "learning_rate": 0.0002589406998613733, + "loss": 1.4747, + "step": 6920 + }, + { + "epoch": 0.7300632911392405, + "grad_norm": 0.6942149996757507, + "learning_rate": 0.0002587509641664372, + "loss": 1.4881, + "step": 6921 + }, + { + "epoch": 0.7301687763713081, + "grad_norm": 0.6628143191337585, + "learning_rate": 0.0002585612835184051, + "loss": 1.4613, + "step": 6922 + }, + { + "epoch": 0.7302742616033755, + "grad_norm": 0.6381962895393372, + "learning_rate": 0.00025837165793853164, + "loss": 1.4816, + "step": 6923 + }, + { + "epoch": 0.730379746835443, + "grad_norm": 0.6139167547225952, + "learning_rate": 0.0002581820874480654, + "loss": 1.4876, + "step": 6924 + }, + { + "epoch": 0.7304852320675106, + "grad_norm": 0.7107917070388794, + "learning_rate": 0.0002579925720682487, + "loss": 1.4742, + "step": 6925 + }, + { + "epoch": 0.730590717299578, + "grad_norm": 0.6649812459945679, + "learning_rate": 0.0002578031118203174, + "loss": 1.5078, + "step": 6926 + }, + { + "epoch": 0.7306962025316456, + "grad_norm": 0.6523869037628174, + "learning_rate": 0.00025761370672550203, + "loss": 1.4678, + "step": 6927 + }, + { + "epoch": 0.7308016877637131, + "grad_norm": 0.6475544571876526, + "learning_rate": 0.0002574243568050261, + "loss": 1.4653, + "step": 6928 + }, + { + "epoch": 0.7309071729957806, + "grad_norm": 0.6160454154014587, + "learning_rate": 0.0002572350620801072, + "loss": 1.4768, + "step": 6929 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.6923481822013855, + "learning_rate": 0.0002570458225719567, + "loss": 1.5109, + "step": 6930 + }, + { + "epoch": 0.7311181434599157, + "grad_norm": 0.6239365339279175, + "learning_rate": 0.0002568566383017799, + "loss": 1.4967, + "step": 6931 + }, + { + "epoch": 0.7312236286919831, + "grad_norm": 0.6500906944274902, + "learning_rate": 0.0002566675092907757, + "loss": 1.4591, + "step": 6932 + }, + { + "epoch": 0.7313291139240506, + "grad_norm": 0.7193769216537476, + "learning_rate": 0.0002564784355601372, + "loss": 1.5138, + "step": 6933 + }, + { + "epoch": 0.7314345991561182, + "grad_norm": 0.6578816175460815, + "learning_rate": 0.0002562894171310508, + "loss": 1.4714, + "step": 6934 + }, + { + "epoch": 0.7315400843881856, + "grad_norm": 0.6427793502807617, + "learning_rate": 0.00025610045402469695, + "loss": 1.5262, + "step": 6935 + }, + { + "epoch": 0.7316455696202532, + "grad_norm": 0.669570803642273, + "learning_rate": 0.0002559115462622503, + "loss": 1.4351, + "step": 6936 + }, + { + "epoch": 0.7317510548523207, + "grad_norm": 0.6184645891189575, + "learning_rate": 0.00025572269386487853, + "loss": 1.4703, + "step": 6937 + }, + { + "epoch": 0.7318565400843882, + "grad_norm": 0.6264984011650085, + "learning_rate": 0.0002555338968537436, + "loss": 1.4911, + "step": 6938 + }, + { + "epoch": 0.7319620253164557, + "grad_norm": 0.6304031014442444, + "learning_rate": 0.0002553451552500012, + "loss": 1.4946, + "step": 6939 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.6503132581710815, + "learning_rate": 0.00025515646907480074, + "loss": 1.4984, + "step": 6940 + }, + { + "epoch": 0.7321729957805907, + "grad_norm": 0.5920782089233398, + "learning_rate": 0.0002549678383492854, + "loss": 1.5058, + "step": 6941 + }, + { + "epoch": 0.7322784810126582, + "grad_norm": 0.605423092842102, + "learning_rate": 0.00025477926309459224, + "loss": 1.4448, + "step": 6942 + }, + { + "epoch": 0.7323839662447258, + "grad_norm": 0.6506451368331909, + "learning_rate": 0.00025459074333185176, + "loss": 1.4664, + "step": 6943 + }, + { + "epoch": 0.7324894514767932, + "grad_norm": 0.6454433798789978, + "learning_rate": 0.0002544022790821891, + "loss": 1.4707, + "step": 6944 + }, + { + "epoch": 0.7325949367088608, + "grad_norm": 0.6044853329658508, + "learning_rate": 0.0002542138703667224, + "loss": 1.489, + "step": 6945 + }, + { + "epoch": 0.7327004219409282, + "grad_norm": 0.7092216610908508, + "learning_rate": 0.00025402551720656366, + "loss": 1.4606, + "step": 6946 + }, + { + "epoch": 0.7328059071729958, + "grad_norm": 0.7200053930282593, + "learning_rate": 0.0002538372196228189, + "loss": 1.4823, + "step": 6947 + }, + { + "epoch": 0.7329113924050633, + "grad_norm": 0.6023540496826172, + "learning_rate": 0.00025364897763658777, + "loss": 1.5135, + "step": 6948 + }, + { + "epoch": 0.7330168776371307, + "grad_norm": 0.6171811819076538, + "learning_rate": 0.0002534607912689637, + "loss": 1.5183, + "step": 6949 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.6342172026634216, + "learning_rate": 0.00025327266054103395, + "loss": 1.4764, + "step": 6950 + }, + { + "epoch": 0.7332278481012658, + "grad_norm": 0.6044051647186279, + "learning_rate": 0.0002530845854738796, + "loss": 1.4816, + "step": 6951 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.6659830212593079, + "learning_rate": 0.0002528965660885749, + "loss": 1.4766, + "step": 6952 + }, + { + "epoch": 0.7334388185654008, + "grad_norm": 0.5788965225219727, + "learning_rate": 0.00025270860240618904, + "loss": 1.4717, + "step": 6953 + }, + { + "epoch": 0.7335443037974684, + "grad_norm": 0.6902808547019958, + "learning_rate": 0.000252520694447784, + "loss": 1.4757, + "step": 6954 + }, + { + "epoch": 0.7336497890295358, + "grad_norm": 0.7093992233276367, + "learning_rate": 0.0002523328422344158, + "loss": 1.4694, + "step": 6955 + }, + { + "epoch": 0.7337552742616034, + "grad_norm": 0.6136805415153503, + "learning_rate": 0.0002521450457871343, + "loss": 1.4539, + "step": 6956 + }, + { + "epoch": 0.7338607594936709, + "grad_norm": 0.6375381350517273, + "learning_rate": 0.0002519573051269828, + "loss": 1.5194, + "step": 6957 + }, + { + "epoch": 0.7339662447257383, + "grad_norm": 0.6073732972145081, + "learning_rate": 0.0002517696202749988, + "loss": 1.4426, + "step": 6958 + }, + { + "epoch": 0.7340717299578059, + "grad_norm": 0.5989151000976562, + "learning_rate": 0.00025158199125221325, + "loss": 1.4469, + "step": 6959 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.642679750919342, + "learning_rate": 0.0002513944180796509, + "loss": 1.4922, + "step": 6960 + }, + { + "epoch": 0.7342827004219409, + "grad_norm": 0.6779907941818237, + "learning_rate": 0.0002512069007783301, + "loss": 1.5176, + "step": 6961 + }, + { + "epoch": 0.7343881856540084, + "grad_norm": 0.6128835082054138, + "learning_rate": 0.00025101943936926347, + "loss": 1.4546, + "step": 6962 + }, + { + "epoch": 0.734493670886076, + "grad_norm": 0.6991726160049438, + "learning_rate": 0.0002508320338734568, + "loss": 1.4224, + "step": 6963 + }, + { + "epoch": 0.7345991561181434, + "grad_norm": 0.6926712393760681, + "learning_rate": 0.00025064468431190977, + "loss": 1.4862, + "step": 6964 + }, + { + "epoch": 0.734704641350211, + "grad_norm": 0.6085038781166077, + "learning_rate": 0.0002504573907056159, + "loss": 1.4558, + "step": 6965 + }, + { + "epoch": 0.7348101265822785, + "grad_norm": 0.6658189296722412, + "learning_rate": 0.00025027015307556234, + "loss": 1.479, + "step": 6966 + }, + { + "epoch": 0.734915611814346, + "grad_norm": 0.6043661236763, + "learning_rate": 0.00025008297144273, + "loss": 1.4449, + "step": 6967 + }, + { + "epoch": 0.7350210970464135, + "grad_norm": 0.5768483877182007, + "learning_rate": 0.0002498958458280936, + "loss": 1.4715, + "step": 6968 + }, + { + "epoch": 0.735126582278481, + "grad_norm": 0.5990530848503113, + "learning_rate": 0.0002497087762526211, + "loss": 1.461, + "step": 6969 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.6660005450248718, + "learning_rate": 0.0002495217627372752, + "loss": 1.4789, + "step": 6970 + }, + { + "epoch": 0.735337552742616, + "grad_norm": 0.6100176572799683, + "learning_rate": 0.0002493348053030113, + "loss": 1.4532, + "step": 6971 + }, + { + "epoch": 0.7354430379746836, + "grad_norm": 0.6860246062278748, + "learning_rate": 0.0002491479039707791, + "loss": 1.4589, + "step": 6972 + }, + { + "epoch": 0.735548523206751, + "grad_norm": 0.6063279509544373, + "learning_rate": 0.00024896105876152165, + "loss": 1.4496, + "step": 6973 + }, + { + "epoch": 0.7356540084388186, + "grad_norm": 0.6968435049057007, + "learning_rate": 0.0002487742696961761, + "loss": 1.4605, + "step": 6974 + }, + { + "epoch": 0.7357594936708861, + "grad_norm": 0.702816367149353, + "learning_rate": 0.0002485875367956729, + "loss": 1.5084, + "step": 6975 + }, + { + "epoch": 0.7358649789029535, + "grad_norm": 0.6345936059951782, + "learning_rate": 0.00024840086008093645, + "loss": 1.4465, + "step": 6976 + }, + { + "epoch": 0.7359704641350211, + "grad_norm": 0.5748581886291504, + "learning_rate": 0.0002482142395728848, + "loss": 1.4969, + "step": 6977 + }, + { + "epoch": 0.7360759493670886, + "grad_norm": 0.6390098333358765, + "learning_rate": 0.0002480276752924295, + "loss": 1.4788, + "step": 6978 + }, + { + "epoch": 0.7361814345991561, + "grad_norm": 0.6490166187286377, + "learning_rate": 0.0002478411672604766, + "loss": 1.479, + "step": 6979 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.6042848229408264, + "learning_rate": 0.0002476547154979248, + "loss": 1.4525, + "step": 6980 + }, + { + "epoch": 0.7363924050632912, + "grad_norm": 0.6683865785598755, + "learning_rate": 0.00024746832002566703, + "loss": 1.4886, + "step": 6981 + }, + { + "epoch": 0.7364978902953586, + "grad_norm": 0.6025704145431519, + "learning_rate": 0.0002472819808645899, + "loss": 1.4524, + "step": 6982 + }, + { + "epoch": 0.7366033755274262, + "grad_norm": 0.6283865571022034, + "learning_rate": 0.0002470956980355735, + "loss": 1.4445, + "step": 6983 + }, + { + "epoch": 0.7367088607594937, + "grad_norm": 0.676750659942627, + "learning_rate": 0.00024690947155949194, + "loss": 1.4924, + "step": 6984 + }, + { + "epoch": 0.7368143459915611, + "grad_norm": 0.7462862730026245, + "learning_rate": 0.0002467233014572127, + "loss": 1.4941, + "step": 6985 + }, + { + "epoch": 0.7369198312236287, + "grad_norm": 0.6456575989723206, + "learning_rate": 0.00024653718774959713, + "loss": 1.4398, + "step": 6986 + }, + { + "epoch": 0.7370253164556962, + "grad_norm": 0.7000299096107483, + "learning_rate": 0.00024635113045749985, + "loss": 1.4386, + "step": 6987 + }, + { + "epoch": 0.7371308016877637, + "grad_norm": 0.7154055237770081, + "learning_rate": 0.00024616512960177014, + "loss": 1.5043, + "step": 6988 + }, + { + "epoch": 0.7372362869198312, + "grad_norm": 0.6803783178329468, + "learning_rate": 0.00024597918520324994, + "loss": 1.5148, + "step": 6989 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.6092402935028076, + "learning_rate": 0.00024579329728277534, + "loss": 1.4434, + "step": 6990 + }, + { + "epoch": 0.7374472573839662, + "grad_norm": 0.6003770232200623, + "learning_rate": 0.00024560746586117603, + "loss": 1.4993, + "step": 6991 + }, + { + "epoch": 0.7375527426160338, + "grad_norm": 0.6260119080543518, + "learning_rate": 0.00024542169095927526, + "loss": 1.5166, + "step": 6992 + }, + { + "epoch": 0.7376582278481013, + "grad_norm": 0.6211926937103271, + "learning_rate": 0.00024523597259789004, + "loss": 1.4555, + "step": 6993 + }, + { + "epoch": 0.7377637130801687, + "grad_norm": 0.6462066173553467, + "learning_rate": 0.0002450503107978311, + "loss": 1.4775, + "step": 6994 + }, + { + "epoch": 0.7378691983122363, + "grad_norm": 0.6748046278953552, + "learning_rate": 0.00024486470557990247, + "loss": 1.4843, + "step": 6995 + }, + { + "epoch": 0.7379746835443038, + "grad_norm": 0.6241911053657532, + "learning_rate": 0.0002446791569649027, + "loss": 1.4879, + "step": 6996 + }, + { + "epoch": 0.7380801687763713, + "grad_norm": 0.6678838133811951, + "learning_rate": 0.0002444936649736232, + "loss": 1.4595, + "step": 6997 + }, + { + "epoch": 0.7381856540084388, + "grad_norm": 0.6370390057563782, + "learning_rate": 0.00024430822962684905, + "loss": 1.4563, + "step": 6998 + }, + { + "epoch": 0.7382911392405064, + "grad_norm": 0.6036229729652405, + "learning_rate": 0.00024412285094535952, + "loss": 1.5198, + "step": 6999 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.6304142475128174, + "learning_rate": 0.00024393752894992708, + "loss": 1.4759, + "step": 7000 + }, + { + "epoch": 0.7385021097046414, + "grad_norm": 0.5720742344856262, + "learning_rate": 0.00024375226366131787, + "loss": 1.4471, + "step": 7001 + }, + { + "epoch": 0.7386075949367089, + "grad_norm": 0.6261929273605347, + "learning_rate": 0.00024356705510029196, + "loss": 1.4793, + "step": 7002 + }, + { + "epoch": 0.7387130801687763, + "grad_norm": 0.632093608379364, + "learning_rate": 0.00024338190328760282, + "loss": 1.4743, + "step": 7003 + }, + { + "epoch": 0.7388185654008439, + "grad_norm": 0.6491792798042297, + "learning_rate": 0.00024319680824399736, + "loss": 1.4421, + "step": 7004 + }, + { + "epoch": 0.7389240506329114, + "grad_norm": 0.6691301465034485, + "learning_rate": 0.00024301176999021702, + "loss": 1.4315, + "step": 7005 + }, + { + "epoch": 0.7390295358649789, + "grad_norm": 0.6869711875915527, + "learning_rate": 0.00024282678854699592, + "loss": 1.4436, + "step": 7006 + }, + { + "epoch": 0.7391350210970464, + "grad_norm": 0.6385198831558228, + "learning_rate": 0.00024264186393506206, + "loss": 1.4608, + "step": 7007 + }, + { + "epoch": 0.739240506329114, + "grad_norm": 0.642134428024292, + "learning_rate": 0.00024245699617513733, + "loss": 1.4354, + "step": 7008 + }, + { + "epoch": 0.7393459915611814, + "grad_norm": 0.5794150233268738, + "learning_rate": 0.00024227218528793696, + "loss": 1.4673, + "step": 7009 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.6022536158561707, + "learning_rate": 0.00024208743129417004, + "loss": 1.4591, + "step": 7010 + }, + { + "epoch": 0.7395569620253165, + "grad_norm": 0.7207533121109009, + "learning_rate": 0.00024190273421453913, + "loss": 1.4862, + "step": 7011 + }, + { + "epoch": 0.739662447257384, + "grad_norm": 0.5887115001678467, + "learning_rate": 0.00024171809406974047, + "loss": 1.4549, + "step": 7012 + }, + { + "epoch": 0.7397679324894515, + "grad_norm": 0.6048431396484375, + "learning_rate": 0.0002415335108804636, + "loss": 1.476, + "step": 7013 + }, + { + "epoch": 0.7398734177215189, + "grad_norm": 0.7233210802078247, + "learning_rate": 0.0002413489846673925, + "loss": 1.4534, + "step": 7014 + }, + { + "epoch": 0.7399789029535865, + "grad_norm": 0.6507024168968201, + "learning_rate": 0.0002411645154512041, + "loss": 1.4857, + "step": 7015 + }, + { + "epoch": 0.740084388185654, + "grad_norm": 0.7737658023834229, + "learning_rate": 0.00024098010325256897, + "loss": 1.4914, + "step": 7016 + }, + { + "epoch": 0.7401898734177215, + "grad_norm": 0.6166560649871826, + "learning_rate": 0.00024079574809215149, + "loss": 1.4602, + "step": 7017 + }, + { + "epoch": 0.740295358649789, + "grad_norm": 0.692267894744873, + "learning_rate": 0.00024061144999060956, + "loss": 1.4977, + "step": 7018 + }, + { + "epoch": 0.7404008438818566, + "grad_norm": 0.6569559574127197, + "learning_rate": 0.00024042720896859471, + "loss": 1.4315, + "step": 7019 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.7101534605026245, + "learning_rate": 0.00024024302504675206, + "loss": 1.471, + "step": 7020 + }, + { + "epoch": 0.7406118143459915, + "grad_norm": 0.6244527101516724, + "learning_rate": 0.00024005889824572004, + "loss": 1.4793, + "step": 7021 + }, + { + "epoch": 0.7407172995780591, + "grad_norm": 0.6901535391807556, + "learning_rate": 0.00023987482858613154, + "loss": 1.4792, + "step": 7022 + }, + { + "epoch": 0.7408227848101265, + "grad_norm": 0.6292383670806885, + "learning_rate": 0.0002396908160886123, + "loss": 1.4291, + "step": 7023 + }, + { + "epoch": 0.7409282700421941, + "grad_norm": 0.6544637680053711, + "learning_rate": 0.0002395068607737816, + "loss": 1.4476, + "step": 7024 + }, + { + "epoch": 0.7410337552742616, + "grad_norm": 0.6370733976364136, + "learning_rate": 0.0002393229626622528, + "loss": 1.4532, + "step": 7025 + }, + { + "epoch": 0.7411392405063291, + "grad_norm": 0.6603360772132874, + "learning_rate": 0.00023913912177463248, + "loss": 1.4715, + "step": 7026 + }, + { + "epoch": 0.7412447257383966, + "grad_norm": 0.6684688925743103, + "learning_rate": 0.0002389553381315209, + "loss": 1.5161, + "step": 7027 + }, + { + "epoch": 0.7413502109704642, + "grad_norm": 0.6343170404434204, + "learning_rate": 0.00023877161175351206, + "loss": 1.4959, + "step": 7028 + }, + { + "epoch": 0.7414556962025316, + "grad_norm": 0.6193848848342896, + "learning_rate": 0.00023858794266119323, + "loss": 1.4968, + "step": 7029 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.6291797757148743, + "learning_rate": 0.0002384043308751454, + "loss": 1.4557, + "step": 7030 + }, + { + "epoch": 0.7416666666666667, + "grad_norm": 0.6212112307548523, + "learning_rate": 0.0002382207764159436, + "loss": 1.4676, + "step": 7031 + }, + { + "epoch": 0.7417721518987341, + "grad_norm": 0.6499168872833252, + "learning_rate": 0.00023803727930415568, + "loss": 1.5028, + "step": 7032 + }, + { + "epoch": 0.7418776371308017, + "grad_norm": 0.6565657258033752, + "learning_rate": 0.00023785383956034353, + "loss": 1.5038, + "step": 7033 + }, + { + "epoch": 0.7419831223628692, + "grad_norm": 0.6351287961006165, + "learning_rate": 0.00023767045720506243, + "loss": 1.4694, + "step": 7034 + }, + { + "epoch": 0.7420886075949367, + "grad_norm": 0.7208534479141235, + "learning_rate": 0.00023748713225886137, + "loss": 1.4905, + "step": 7035 + }, + { + "epoch": 0.7421940928270042, + "grad_norm": 0.7289441227912903, + "learning_rate": 0.0002373038647422827, + "loss": 1.462, + "step": 7036 + }, + { + "epoch": 0.7422995780590718, + "grad_norm": 0.7224597334861755, + "learning_rate": 0.00023712065467586252, + "loss": 1.47, + "step": 7037 + }, + { + "epoch": 0.7424050632911392, + "grad_norm": 0.6063973307609558, + "learning_rate": 0.00023693750208013045, + "loss": 1.477, + "step": 7038 + }, + { + "epoch": 0.7425105485232067, + "grad_norm": 0.6327435970306396, + "learning_rate": 0.00023675440697560943, + "loss": 1.5074, + "step": 7039 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.8225303292274475, + "learning_rate": 0.00023657136938281653, + "loss": 1.4414, + "step": 7040 + }, + { + "epoch": 0.7427215189873417, + "grad_norm": 0.692870557308197, + "learning_rate": 0.00023638838932226196, + "loss": 1.4325, + "step": 7041 + }, + { + "epoch": 0.7428270042194093, + "grad_norm": 0.7260375618934631, + "learning_rate": 0.00023620546681444942, + "loss": 1.5278, + "step": 7042 + }, + { + "epoch": 0.7429324894514768, + "grad_norm": 0.6054127812385559, + "learning_rate": 0.00023602260187987635, + "loss": 1.4961, + "step": 7043 + }, + { + "epoch": 0.7430379746835443, + "grad_norm": 0.66286301612854, + "learning_rate": 0.0002358397945390336, + "loss": 1.4449, + "step": 7044 + }, + { + "epoch": 0.7431434599156118, + "grad_norm": 0.6484677195549011, + "learning_rate": 0.0002356570448124058, + "loss": 1.4753, + "step": 7045 + }, + { + "epoch": 0.7432489451476794, + "grad_norm": 0.7217618227005005, + "learning_rate": 0.00023547435272047083, + "loss": 1.476, + "step": 7046 + }, + { + "epoch": 0.7433544303797468, + "grad_norm": 0.6193372011184692, + "learning_rate": 0.00023529171828370033, + "loss": 1.4435, + "step": 7047 + }, + { + "epoch": 0.7434599156118143, + "grad_norm": 0.6668204069137573, + "learning_rate": 0.0002351091415225591, + "loss": 1.4643, + "step": 7048 + }, + { + "epoch": 0.7435654008438819, + "grad_norm": 0.693299412727356, + "learning_rate": 0.0002349266224575063, + "loss": 1.4659, + "step": 7049 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.6220583319664001, + "learning_rate": 0.00023474416110899377, + "loss": 1.4309, + "step": 7050 + }, + { + "epoch": 0.7437763713080169, + "grad_norm": 0.7483892440795898, + "learning_rate": 0.00023456175749746736, + "loss": 1.4739, + "step": 7051 + }, + { + "epoch": 0.7438818565400844, + "grad_norm": 0.6099924445152283, + "learning_rate": 0.0002343794116433662, + "loss": 1.4421, + "step": 7052 + }, + { + "epoch": 0.7439873417721519, + "grad_norm": 0.684874415397644, + "learning_rate": 0.00023419712356712307, + "loss": 1.4317, + "step": 7053 + }, + { + "epoch": 0.7440928270042194, + "grad_norm": 0.6303837299346924, + "learning_rate": 0.00023401489328916432, + "loss": 1.4665, + "step": 7054 + }, + { + "epoch": 0.744198312236287, + "grad_norm": 0.6183747053146362, + "learning_rate": 0.00023383272082990963, + "loss": 1.4977, + "step": 7055 + }, + { + "epoch": 0.7443037974683544, + "grad_norm": 0.6510943174362183, + "learning_rate": 0.00023365060620977223, + "loss": 1.5067, + "step": 7056 + }, + { + "epoch": 0.744409282700422, + "grad_norm": 0.6347905397415161, + "learning_rate": 0.00023346854944915937, + "loss": 1.452, + "step": 7057 + }, + { + "epoch": 0.7445147679324895, + "grad_norm": 0.6309388279914856, + "learning_rate": 0.00023328655056847124, + "loss": 1.5048, + "step": 7058 + }, + { + "epoch": 0.7446202531645569, + "grad_norm": 0.6307592391967773, + "learning_rate": 0.0002331046095881017, + "loss": 1.4719, + "step": 7059 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.6679182052612305, + "learning_rate": 0.00023292272652843807, + "loss": 1.4787, + "step": 7060 + }, + { + "epoch": 0.744831223628692, + "grad_norm": 0.6132335066795349, + "learning_rate": 0.00023274090140986138, + "loss": 1.5277, + "step": 7061 + }, + { + "epoch": 0.7449367088607595, + "grad_norm": 0.6375160217285156, + "learning_rate": 0.00023255913425274588, + "loss": 1.4552, + "step": 7062 + }, + { + "epoch": 0.745042194092827, + "grad_norm": 0.6008918881416321, + "learning_rate": 0.00023237742507745964, + "loss": 1.4492, + "step": 7063 + }, + { + "epoch": 0.7451476793248946, + "grad_norm": 0.6369547247886658, + "learning_rate": 0.00023219577390436397, + "loss": 1.479, + "step": 7064 + }, + { + "epoch": 0.745253164556962, + "grad_norm": 0.6245068311691284, + "learning_rate": 0.00023201418075381364, + "loss": 1.4664, + "step": 7065 + }, + { + "epoch": 0.7453586497890295, + "grad_norm": 0.6099976301193237, + "learning_rate": 0.00023183264564615756, + "loss": 1.475, + "step": 7066 + }, + { + "epoch": 0.7454641350210971, + "grad_norm": 0.6543333530426025, + "learning_rate": 0.00023165116860173726, + "loss": 1.4769, + "step": 7067 + }, + { + "epoch": 0.7455696202531645, + "grad_norm": 0.5901547074317932, + "learning_rate": 0.00023146974964088825, + "loss": 1.4992, + "step": 7068 + }, + { + "epoch": 0.7456751054852321, + "grad_norm": 0.6433202028274536, + "learning_rate": 0.00023128838878393946, + "loss": 1.4942, + "step": 7069 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.6152516007423401, + "learning_rate": 0.00023110708605121317, + "loss": 1.4739, + "step": 7070 + }, + { + "epoch": 0.7458860759493671, + "grad_norm": 0.6447781920433044, + "learning_rate": 0.00023092584146302539, + "loss": 1.4992, + "step": 7071 + }, + { + "epoch": 0.7459915611814346, + "grad_norm": 0.6110120415687561, + "learning_rate": 0.0002307446550396854, + "loss": 1.4828, + "step": 7072 + }, + { + "epoch": 0.7460970464135022, + "grad_norm": 0.6479074954986572, + "learning_rate": 0.0002305635268014961, + "loss": 1.4697, + "step": 7073 + }, + { + "epoch": 0.7462025316455696, + "grad_norm": 0.5535089373588562, + "learning_rate": 0.0002303824567687534, + "loss": 1.4606, + "step": 7074 + }, + { + "epoch": 0.7463080168776371, + "grad_norm": 0.6231966018676758, + "learning_rate": 0.00023020144496174781, + "loss": 1.4487, + "step": 7075 + }, + { + "epoch": 0.7464135021097047, + "grad_norm": 0.6444088220596313, + "learning_rate": 0.0002300204914007622, + "loss": 1.4556, + "step": 7076 + }, + { + "epoch": 0.7465189873417721, + "grad_norm": 0.5997350215911865, + "learning_rate": 0.00022983959610607338, + "loss": 1.4444, + "step": 7077 + }, + { + "epoch": 0.7466244725738397, + "grad_norm": 0.5990954637527466, + "learning_rate": 0.00022965875909795164, + "loss": 1.4934, + "step": 7078 + }, + { + "epoch": 0.7467299578059071, + "grad_norm": 0.6288802027702332, + "learning_rate": 0.00022947798039666051, + "loss": 1.4972, + "step": 7079 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.6834893822669983, + "learning_rate": 0.00022929726002245728, + "loss": 1.4905, + "step": 7080 + }, + { + "epoch": 0.7469409282700422, + "grad_norm": 0.6599762439727783, + "learning_rate": 0.00022911659799559254, + "loss": 1.4502, + "step": 7081 + }, + { + "epoch": 0.7470464135021097, + "grad_norm": 0.6711635589599609, + "learning_rate": 0.00022893599433631014, + "loss": 1.4966, + "step": 7082 + }, + { + "epoch": 0.7471518987341772, + "grad_norm": 0.6070608496665955, + "learning_rate": 0.00022875544906484797, + "loss": 1.4908, + "step": 7083 + }, + { + "epoch": 0.7472573839662447, + "grad_norm": 0.6386677026748657, + "learning_rate": 0.00022857496220143696, + "loss": 1.4526, + "step": 7084 + }, + { + "epoch": 0.7473628691983122, + "grad_norm": 0.6122227311134338, + "learning_rate": 0.00022839453376630149, + "loss": 1.4707, + "step": 7085 + }, + { + "epoch": 0.7474683544303797, + "grad_norm": 0.5997546315193176, + "learning_rate": 0.00022821416377965948, + "loss": 1.4603, + "step": 7086 + }, + { + "epoch": 0.7475738396624473, + "grad_norm": 0.5929263234138489, + "learning_rate": 0.00022803385226172226, + "loss": 1.5346, + "step": 7087 + }, + { + "epoch": 0.7476793248945147, + "grad_norm": 0.6056012511253357, + "learning_rate": 0.0002278535992326947, + "loss": 1.4734, + "step": 7088 + }, + { + "epoch": 0.7477848101265823, + "grad_norm": 0.6238064169883728, + "learning_rate": 0.00022767340471277492, + "loss": 1.4966, + "step": 7089 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.6145519018173218, + "learning_rate": 0.00022749326872215472, + "loss": 1.4094, + "step": 7090 + }, + { + "epoch": 0.7479957805907173, + "grad_norm": 0.5938076972961426, + "learning_rate": 0.00022731319128101906, + "loss": 1.4452, + "step": 7091 + }, + { + "epoch": 0.7481012658227848, + "grad_norm": 0.6190865635871887, + "learning_rate": 0.0002271331724095468, + "loss": 1.449, + "step": 7092 + }, + { + "epoch": 0.7482067510548523, + "grad_norm": 0.6179383397102356, + "learning_rate": 0.0002269532121279099, + "loss": 1.4749, + "step": 7093 + }, + { + "epoch": 0.7483122362869198, + "grad_norm": 0.6542215943336487, + "learning_rate": 0.00022677331045627366, + "loss": 1.4932, + "step": 7094 + }, + { + "epoch": 0.7484177215189873, + "grad_norm": 0.6092994809150696, + "learning_rate": 0.00022659346741479708, + "loss": 1.4939, + "step": 7095 + }, + { + "epoch": 0.7485232067510549, + "grad_norm": 0.6561562418937683, + "learning_rate": 0.00022641368302363235, + "loss": 1.4927, + "step": 7096 + }, + { + "epoch": 0.7486286919831223, + "grad_norm": 0.6211662292480469, + "learning_rate": 0.00022623395730292538, + "loss": 1.4644, + "step": 7097 + }, + { + "epoch": 0.7487341772151899, + "grad_norm": 0.5707306265830994, + "learning_rate": 0.0002260542902728151, + "loss": 1.498, + "step": 7098 + }, + { + "epoch": 0.7488396624472574, + "grad_norm": 0.6051817536354065, + "learning_rate": 0.00022587468195343436, + "loss": 1.4576, + "step": 7099 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.6345413327217102, + "learning_rate": 0.0002256951323649087, + "loss": 1.44, + "step": 7100 + }, + { + "epoch": 0.7490506329113924, + "grad_norm": 0.6224578022956848, + "learning_rate": 0.00022551564152735814, + "loss": 1.5015, + "step": 7101 + }, + { + "epoch": 0.74915611814346, + "grad_norm": 0.6320847272872925, + "learning_rate": 0.00022533620946089524, + "loss": 1.5101, + "step": 7102 + }, + { + "epoch": 0.7492616033755274, + "grad_norm": 0.6440356373786926, + "learning_rate": 0.00022515683618562626, + "loss": 1.4767, + "step": 7103 + }, + { + "epoch": 0.7493670886075949, + "grad_norm": 0.618293821811676, + "learning_rate": 0.00022497752172165095, + "loss": 1.4358, + "step": 7104 + }, + { + "epoch": 0.7494725738396625, + "grad_norm": 0.6165223717689514, + "learning_rate": 0.0002247982660890623, + "loss": 1.4944, + "step": 7105 + }, + { + "epoch": 0.7495780590717299, + "grad_norm": 0.6013035774230957, + "learning_rate": 0.00022461906930794687, + "loss": 1.499, + "step": 7106 + }, + { + "epoch": 0.7496835443037975, + "grad_norm": 0.644845724105835, + "learning_rate": 0.00022443993139838447, + "loss": 1.4848, + "step": 7107 + }, + { + "epoch": 0.749789029535865, + "grad_norm": 0.5961825251579285, + "learning_rate": 0.00022426085238044823, + "loss": 1.4615, + "step": 7108 + }, + { + "epoch": 0.7498945147679325, + "grad_norm": 0.7035152912139893, + "learning_rate": 0.00022408183227420528, + "loss": 1.456, + "step": 7109 + }, + { + "epoch": 0.75, + "grad_norm": 0.6730010509490967, + "learning_rate": 0.00022390287109971547, + "loss": 1.4894, + "step": 7110 + }, + { + "epoch": 0.7501054852320675, + "grad_norm": 0.5926129817962646, + "learning_rate": 0.00022372396887703234, + "loss": 1.5176, + "step": 7111 + }, + { + "epoch": 0.750210970464135, + "grad_norm": 0.6592768430709839, + "learning_rate": 0.00022354512562620268, + "loss": 1.4746, + "step": 7112 + }, + { + "epoch": 0.7503164556962025, + "grad_norm": 0.6632883548736572, + "learning_rate": 0.0002233663413672669, + "loss": 1.5354, + "step": 7113 + }, + { + "epoch": 0.7504219409282701, + "grad_norm": 0.6428496837615967, + "learning_rate": 0.00022318761612025856, + "loss": 1.4538, + "step": 7114 + }, + { + "epoch": 0.7505274261603375, + "grad_norm": 0.6821309924125671, + "learning_rate": 0.00022300894990520478, + "loss": 1.4654, + "step": 7115 + }, + { + "epoch": 0.7506329113924051, + "grad_norm": 0.6528717279434204, + "learning_rate": 0.000222830342742126, + "loss": 1.4442, + "step": 7116 + }, + { + "epoch": 0.7507383966244726, + "grad_norm": 0.6070993542671204, + "learning_rate": 0.00022265179465103574, + "loss": 1.477, + "step": 7117 + }, + { + "epoch": 0.75084388185654, + "grad_norm": 0.6544170379638672, + "learning_rate": 0.00022247330565194171, + "loss": 1.4457, + "step": 7118 + }, + { + "epoch": 0.7509493670886076, + "grad_norm": 0.7106103897094727, + "learning_rate": 0.0002222948757648443, + "loss": 1.4307, + "step": 7119 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.6458961367607117, + "learning_rate": 0.00022211650500973746, + "loss": 1.5104, + "step": 7120 + }, + { + "epoch": 0.7511603375527426, + "grad_norm": 0.7562741041183472, + "learning_rate": 0.0002219381934066084, + "loss": 1.5267, + "step": 7121 + }, + { + "epoch": 0.7512658227848101, + "grad_norm": 0.6746728420257568, + "learning_rate": 0.00022175994097543806, + "loss": 1.4785, + "step": 7122 + }, + { + "epoch": 0.7513713080168777, + "grad_norm": 0.608712375164032, + "learning_rate": 0.0002215817477362003, + "loss": 1.4724, + "step": 7123 + }, + { + "epoch": 0.7514767932489451, + "grad_norm": 0.7733335494995117, + "learning_rate": 0.00022140361370886265, + "loss": 1.4875, + "step": 7124 + }, + { + "epoch": 0.7515822784810127, + "grad_norm": 0.6022513508796692, + "learning_rate": 0.00022122553891338586, + "loss": 1.4922, + "step": 7125 + }, + { + "epoch": 0.7516877637130802, + "grad_norm": 0.7194312810897827, + "learning_rate": 0.00022104752336972396, + "loss": 1.496, + "step": 7126 + }, + { + "epoch": 0.7517932489451477, + "grad_norm": 0.6160292625427246, + "learning_rate": 0.00022086956709782495, + "loss": 1.4846, + "step": 7127 + }, + { + "epoch": 0.7518987341772152, + "grad_norm": 0.5748273730278015, + "learning_rate": 0.0002206916701176293, + "loss": 1.4804, + "step": 7128 + }, + { + "epoch": 0.7520042194092827, + "grad_norm": 0.6722036004066467, + "learning_rate": 0.00022051383244907143, + "loss": 1.4234, + "step": 7129 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.6168041825294495, + "learning_rate": 0.0002203360541120789, + "loss": 1.4773, + "step": 7130 + }, + { + "epoch": 0.7522151898734177, + "grad_norm": 0.6204701662063599, + "learning_rate": 0.00022015833512657268, + "loss": 1.4693, + "step": 7131 + }, + { + "epoch": 0.7523206751054853, + "grad_norm": 0.6722081899642944, + "learning_rate": 0.000219980675512467, + "loss": 1.4541, + "step": 7132 + }, + { + "epoch": 0.7524261603375527, + "grad_norm": 0.7003352046012878, + "learning_rate": 0.00021980307528966962, + "loss": 1.4506, + "step": 7133 + }, + { + "epoch": 0.7525316455696203, + "grad_norm": 0.620567798614502, + "learning_rate": 0.00021962553447808108, + "loss": 1.4436, + "step": 7134 + }, + { + "epoch": 0.7526371308016878, + "grad_norm": 0.717556893825531, + "learning_rate": 0.00021944805309759643, + "loss": 1.4295, + "step": 7135 + }, + { + "epoch": 0.7527426160337553, + "grad_norm": 0.6749588251113892, + "learning_rate": 0.000219270631168103, + "loss": 1.4847, + "step": 7136 + }, + { + "epoch": 0.7528481012658228, + "grad_norm": 0.7433471083641052, + "learning_rate": 0.0002190932687094818, + "loss": 1.5021, + "step": 7137 + }, + { + "epoch": 0.7529535864978903, + "grad_norm": 0.6077702045440674, + "learning_rate": 0.00021891596574160715, + "loss": 1.4726, + "step": 7138 + }, + { + "epoch": 0.7530590717299578, + "grad_norm": 0.6301217079162598, + "learning_rate": 0.0002187387222843467, + "loss": 1.477, + "step": 7139 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.6414618492126465, + "learning_rate": 0.00021856153835756164, + "loss": 1.4764, + "step": 7140 + }, + { + "epoch": 0.7532700421940929, + "grad_norm": 0.7982004284858704, + "learning_rate": 0.00021838441398110617, + "loss": 1.4738, + "step": 7141 + }, + { + "epoch": 0.7533755274261603, + "grad_norm": 0.6798188090324402, + "learning_rate": 0.000218207349174828, + "loss": 1.4666, + "step": 7142 + }, + { + "epoch": 0.7534810126582279, + "grad_norm": 0.6038782596588135, + "learning_rate": 0.0002180303439585678, + "loss": 1.4522, + "step": 7143 + }, + { + "epoch": 0.7535864978902953, + "grad_norm": 0.749453604221344, + "learning_rate": 0.0002178533983521605, + "loss": 1.484, + "step": 7144 + }, + { + "epoch": 0.7536919831223629, + "grad_norm": 0.6644917726516724, + "learning_rate": 0.0002176765123754334, + "loss": 1.4479, + "step": 7145 + }, + { + "epoch": 0.7537974683544304, + "grad_norm": 0.5846432447433472, + "learning_rate": 0.00021749968604820754, + "loss": 1.4809, + "step": 7146 + }, + { + "epoch": 0.7539029535864978, + "grad_norm": 0.647148847579956, + "learning_rate": 0.00021732291939029712, + "loss": 1.4833, + "step": 7147 + }, + { + "epoch": 0.7540084388185654, + "grad_norm": 0.7793483734130859, + "learning_rate": 0.00021714621242150973, + "loss": 1.4404, + "step": 7148 + }, + { + "epoch": 0.7541139240506329, + "grad_norm": 0.6329836249351501, + "learning_rate": 0.0002169695651616463, + "loss": 1.4262, + "step": 7149 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.6901568174362183, + "learning_rate": 0.00021679297763050104, + "loss": 1.4749, + "step": 7150 + }, + { + "epoch": 0.7543248945147679, + "grad_norm": 0.7493463158607483, + "learning_rate": 0.00021661644984786142, + "loss": 1.4527, + "step": 7151 + }, + { + "epoch": 0.7544303797468355, + "grad_norm": 0.6343306303024292, + "learning_rate": 0.00021643998183350802, + "loss": 1.4702, + "step": 7152 + }, + { + "epoch": 0.7545358649789029, + "grad_norm": 0.7182573676109314, + "learning_rate": 0.00021626357360721556, + "loss": 1.4587, + "step": 7153 + }, + { + "epoch": 0.7546413502109705, + "grad_norm": 0.6880958080291748, + "learning_rate": 0.0002160872251887511, + "loss": 1.4442, + "step": 7154 + }, + { + "epoch": 0.754746835443038, + "grad_norm": 0.68332439661026, + "learning_rate": 0.00021591093659787528, + "loss": 1.5237, + "step": 7155 + }, + { + "epoch": 0.7548523206751054, + "grad_norm": 0.7568877935409546, + "learning_rate": 0.00021573470785434237, + "loss": 1.5011, + "step": 7156 + }, + { + "epoch": 0.754957805907173, + "grad_norm": 0.6318824291229248, + "learning_rate": 0.00021555853897789942, + "loss": 1.4287, + "step": 7157 + }, + { + "epoch": 0.7550632911392405, + "grad_norm": 0.7009167671203613, + "learning_rate": 0.0002153824299882872, + "loss": 1.4698, + "step": 7158 + }, + { + "epoch": 0.755168776371308, + "grad_norm": 0.694476306438446, + "learning_rate": 0.00021520638090523955, + "loss": 1.4658, + "step": 7159 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.6156480312347412, + "learning_rate": 0.0002150303917484834, + "loss": 1.5155, + "step": 7160 + }, + { + "epoch": 0.7553797468354431, + "grad_norm": 0.7433644533157349, + "learning_rate": 0.00021485446253773966, + "loss": 1.4866, + "step": 7161 + }, + { + "epoch": 0.7554852320675105, + "grad_norm": 0.6419818997383118, + "learning_rate": 0.00021467859329272188, + "loss": 1.476, + "step": 7162 + }, + { + "epoch": 0.755590717299578, + "grad_norm": 0.6389323472976685, + "learning_rate": 0.00021450278403313707, + "loss": 1.4592, + "step": 7163 + }, + { + "epoch": 0.7556962025316456, + "grad_norm": 0.8304619193077087, + "learning_rate": 0.0002143270347786856, + "loss": 1.4779, + "step": 7164 + }, + { + "epoch": 0.755801687763713, + "grad_norm": 0.6017631888389587, + "learning_rate": 0.0002141513455490609, + "loss": 1.4886, + "step": 7165 + }, + { + "epoch": 0.7559071729957806, + "grad_norm": 0.7294104099273682, + "learning_rate": 0.00021397571636394991, + "loss": 1.4386, + "step": 7166 + }, + { + "epoch": 0.7560126582278481, + "grad_norm": 0.8806599378585815, + "learning_rate": 0.00021380014724303286, + "loss": 1.4749, + "step": 7167 + }, + { + "epoch": 0.7561181434599156, + "grad_norm": 0.6322087645530701, + "learning_rate": 0.00021362463820598297, + "loss": 1.4783, + "step": 7168 + }, + { + "epoch": 0.7562236286919831, + "grad_norm": 0.8674919605255127, + "learning_rate": 0.00021344918927246678, + "loss": 1.5037, + "step": 7169 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.8155951499938965, + "learning_rate": 0.0002132738004621446, + "loss": 1.5061, + "step": 7170 + }, + { + "epoch": 0.7564345991561181, + "grad_norm": 0.6199837327003479, + "learning_rate": 0.0002130984717946695, + "loss": 1.465, + "step": 7171 + }, + { + "epoch": 0.7565400843881857, + "grad_norm": 0.8600576519966125, + "learning_rate": 0.00021292320328968783, + "loss": 1.4756, + "step": 7172 + }, + { + "epoch": 0.7566455696202532, + "grad_norm": 0.7206355333328247, + "learning_rate": 0.0002127479949668393, + "loss": 1.4641, + "step": 7173 + }, + { + "epoch": 0.7567510548523206, + "grad_norm": 0.6386166214942932, + "learning_rate": 0.000212572846845757, + "loss": 1.4426, + "step": 7174 + }, + { + "epoch": 0.7568565400843882, + "grad_norm": 0.7976645231246948, + "learning_rate": 0.000212397758946067, + "loss": 1.4849, + "step": 7175 + }, + { + "epoch": 0.7569620253164557, + "grad_norm": 0.6402375102043152, + "learning_rate": 0.0002122227312873889, + "loss": 1.4526, + "step": 7176 + }, + { + "epoch": 0.7570675105485232, + "grad_norm": 0.5932350158691406, + "learning_rate": 0.00021204776388933534, + "loss": 1.473, + "step": 7177 + }, + { + "epoch": 0.7571729957805907, + "grad_norm": 0.8274619579315186, + "learning_rate": 0.00021187285677151205, + "loss": 1.4209, + "step": 7178 + }, + { + "epoch": 0.7572784810126583, + "grad_norm": 0.723387598991394, + "learning_rate": 0.00021169800995351874, + "loss": 1.4368, + "step": 7179 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.6311207413673401, + "learning_rate": 0.00021152322345494763, + "loss": 1.4638, + "step": 7180 + }, + { + "epoch": 0.7574894514767933, + "grad_norm": 0.7578490972518921, + "learning_rate": 0.00021134849729538438, + "loss": 1.4468, + "step": 7181 + }, + { + "epoch": 0.7575949367088608, + "grad_norm": 0.7725979685783386, + "learning_rate": 0.00021117383149440801, + "loss": 1.4659, + "step": 7182 + }, + { + "epoch": 0.7577004219409282, + "grad_norm": 0.6048261523246765, + "learning_rate": 0.00021099922607159064, + "loss": 1.4318, + "step": 7183 + }, + { + "epoch": 0.7578059071729958, + "grad_norm": 0.9315341114997864, + "learning_rate": 0.00021082468104649773, + "loss": 1.4786, + "step": 7184 + }, + { + "epoch": 0.7579113924050633, + "grad_norm": 0.7133838534355164, + "learning_rate": 0.00021065019643868785, + "loss": 1.506, + "step": 7185 + }, + { + "epoch": 0.7580168776371308, + "grad_norm": 0.6853176355361938, + "learning_rate": 0.00021047577226771292, + "loss": 1.4263, + "step": 7186 + }, + { + "epoch": 0.7581223628691983, + "grad_norm": 0.7188118100166321, + "learning_rate": 0.00021030140855311772, + "loss": 1.4885, + "step": 7187 + }, + { + "epoch": 0.7582278481012659, + "grad_norm": 0.6418485641479492, + "learning_rate": 0.00021012710531444112, + "loss": 1.4397, + "step": 7188 + }, + { + "epoch": 0.7583333333333333, + "grad_norm": 0.6160697937011719, + "learning_rate": 0.00020995286257121453, + "loss": 1.4568, + "step": 7189 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.835334062576294, + "learning_rate": 0.00020977868034296253, + "loss": 1.4459, + "step": 7190 + }, + { + "epoch": 0.7585443037974684, + "grad_norm": 0.6950300335884094, + "learning_rate": 0.0002096045586492031, + "loss": 1.4623, + "step": 7191 + }, + { + "epoch": 0.7586497890295358, + "grad_norm": 0.6224502921104431, + "learning_rate": 0.00020943049750944768, + "loss": 1.4604, + "step": 7192 + }, + { + "epoch": 0.7587552742616034, + "grad_norm": 0.7474285960197449, + "learning_rate": 0.00020925649694320046, + "loss": 1.4752, + "step": 7193 + }, + { + "epoch": 0.7588607594936709, + "grad_norm": 0.7384663224220276, + "learning_rate": 0.0002090825569699591, + "loss": 1.4456, + "step": 7194 + }, + { + "epoch": 0.7589662447257384, + "grad_norm": 0.6016077995300293, + "learning_rate": 0.0002089086776092146, + "loss": 1.4992, + "step": 7195 + }, + { + "epoch": 0.7590717299578059, + "grad_norm": 0.9461644887924194, + "learning_rate": 0.0002087348588804505, + "loss": 1.5067, + "step": 7196 + }, + { + "epoch": 0.7591772151898735, + "grad_norm": 0.631378710269928, + "learning_rate": 0.0002085611008031449, + "loss": 1.4423, + "step": 7197 + }, + { + "epoch": 0.7592827004219409, + "grad_norm": 0.7006809711456299, + "learning_rate": 0.00020838740339676763, + "loss": 1.4736, + "step": 7198 + }, + { + "epoch": 0.7593881856540085, + "grad_norm": 0.7013024687767029, + "learning_rate": 0.00020821376668078264, + "loss": 1.4612, + "step": 7199 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.6092227101325989, + "learning_rate": 0.00020804019067464667, + "loss": 1.4508, + "step": 7200 + }, + { + "epoch": 0.7595991561181434, + "grad_norm": 0.6948703527450562, + "learning_rate": 0.00020786667539780977, + "loss": 1.4442, + "step": 7201 + }, + { + "epoch": 0.759704641350211, + "grad_norm": 0.6422743797302246, + "learning_rate": 0.00020769322086971524, + "loss": 1.4549, + "step": 7202 + }, + { + "epoch": 0.7598101265822785, + "grad_norm": 0.6043994426727295, + "learning_rate": 0.00020751982710979944, + "loss": 1.4764, + "step": 7203 + }, + { + "epoch": 0.759915611814346, + "grad_norm": 0.6528903245925903, + "learning_rate": 0.0002073464941374921, + "loss": 1.4712, + "step": 7204 + }, + { + "epoch": 0.7600210970464135, + "grad_norm": 0.6120791435241699, + "learning_rate": 0.000207173221972216, + "loss": 1.5001, + "step": 7205 + }, + { + "epoch": 0.7601265822784811, + "grad_norm": 0.6739292144775391, + "learning_rate": 0.00020700001063338696, + "loss": 1.4831, + "step": 7206 + }, + { + "epoch": 0.7602320675105485, + "grad_norm": 0.6997014284133911, + "learning_rate": 0.00020682686014041458, + "loss": 1.4592, + "step": 7207 + }, + { + "epoch": 0.760337552742616, + "grad_norm": 0.6330496072769165, + "learning_rate": 0.00020665377051270095, + "loss": 1.483, + "step": 7208 + }, + { + "epoch": 0.7604430379746835, + "grad_norm": 0.7054979205131531, + "learning_rate": 0.00020648074176964182, + "loss": 1.452, + "step": 7209 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.6567054390907288, + "learning_rate": 0.00020630777393062575, + "loss": 1.4706, + "step": 7210 + }, + { + "epoch": 0.7606540084388186, + "grad_norm": 0.6493304371833801, + "learning_rate": 0.00020613486701503473, + "loss": 1.4953, + "step": 7211 + }, + { + "epoch": 0.760759493670886, + "grad_norm": 0.6700956225395203, + "learning_rate": 0.00020596202104224376, + "loss": 1.4656, + "step": 7212 + }, + { + "epoch": 0.7608649789029536, + "grad_norm": 0.7152858972549438, + "learning_rate": 0.0002057892360316212, + "loss": 1.4883, + "step": 7213 + }, + { + "epoch": 0.7609704641350211, + "grad_norm": 0.6197860240936279, + "learning_rate": 0.00020561651200252836, + "loss": 1.4965, + "step": 7214 + }, + { + "epoch": 0.7610759493670886, + "grad_norm": 0.6027498841285706, + "learning_rate": 0.00020544384897431997, + "loss": 1.4853, + "step": 7215 + }, + { + "epoch": 0.7611814345991561, + "grad_norm": 0.58158278465271, + "learning_rate": 0.00020527124696634343, + "loss": 1.4699, + "step": 7216 + }, + { + "epoch": 0.7612869198312237, + "grad_norm": 0.6440662145614624, + "learning_rate": 0.00020509870599794022, + "loss": 1.4985, + "step": 7217 + }, + { + "epoch": 0.7613924050632911, + "grad_norm": 0.6044250726699829, + "learning_rate": 0.0002049262260884441, + "loss": 1.4991, + "step": 7218 + }, + { + "epoch": 0.7614978902953586, + "grad_norm": 0.614352285861969, + "learning_rate": 0.00020475380725718228, + "loss": 1.4625, + "step": 7219 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.6108061075210571, + "learning_rate": 0.00020458144952347523, + "loss": 1.4633, + "step": 7220 + }, + { + "epoch": 0.7617088607594936, + "grad_norm": 0.7008078694343567, + "learning_rate": 0.0002044091529066365, + "loss": 1.4836, + "step": 7221 + }, + { + "epoch": 0.7618143459915612, + "grad_norm": 0.5791987180709839, + "learning_rate": 0.00020423691742597273, + "loss": 1.4593, + "step": 7222 + }, + { + "epoch": 0.7619198312236287, + "grad_norm": 0.5894331336021423, + "learning_rate": 0.0002040647431007837, + "loss": 1.4581, + "step": 7223 + }, + { + "epoch": 0.7620253164556962, + "grad_norm": 0.6850675344467163, + "learning_rate": 0.00020389262995036263, + "loss": 1.4654, + "step": 7224 + }, + { + "epoch": 0.7621308016877637, + "grad_norm": 0.675846517086029, + "learning_rate": 0.00020372057799399534, + "loss": 1.498, + "step": 7225 + }, + { + "epoch": 0.7622362869198313, + "grad_norm": 0.6135226488113403, + "learning_rate": 0.00020354858725096122, + "loss": 1.4773, + "step": 7226 + }, + { + "epoch": 0.7623417721518987, + "grad_norm": 0.7217857837677002, + "learning_rate": 0.00020337665774053284, + "loss": 1.4539, + "step": 7227 + }, + { + "epoch": 0.7624472573839662, + "grad_norm": 0.653117835521698, + "learning_rate": 0.0002032047894819758, + "loss": 1.474, + "step": 7228 + }, + { + "epoch": 0.7625527426160338, + "grad_norm": 0.6239475607872009, + "learning_rate": 0.00020303298249454857, + "loss": 1.5224, + "step": 7229 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.6090491414070129, + "learning_rate": 0.00020286123679750314, + "loss": 1.4861, + "step": 7230 + }, + { + "epoch": 0.7627637130801688, + "grad_norm": 0.6356939077377319, + "learning_rate": 0.00020268955241008437, + "loss": 1.4977, + "step": 7231 + }, + { + "epoch": 0.7628691983122363, + "grad_norm": 0.5950292944908142, + "learning_rate": 0.00020251792935153037, + "loss": 1.4725, + "step": 7232 + }, + { + "epoch": 0.7629746835443038, + "grad_norm": 0.6472575068473816, + "learning_rate": 0.0002023463676410724, + "loss": 1.465, + "step": 7233 + }, + { + "epoch": 0.7630801687763713, + "grad_norm": 0.6883344650268555, + "learning_rate": 0.0002021748672979348, + "loss": 1.4772, + "step": 7234 + }, + { + "epoch": 0.7631856540084389, + "grad_norm": 0.637988269329071, + "learning_rate": 0.00020200342834133497, + "loss": 1.5018, + "step": 7235 + }, + { + "epoch": 0.7632911392405063, + "grad_norm": 0.6072467565536499, + "learning_rate": 0.00020183205079048338, + "loss": 1.4457, + "step": 7236 + }, + { + "epoch": 0.7633966244725738, + "grad_norm": 0.7126320004463196, + "learning_rate": 0.0002016607346645841, + "loss": 1.5011, + "step": 7237 + }, + { + "epoch": 0.7635021097046414, + "grad_norm": 0.6080449223518372, + "learning_rate": 0.00020148947998283381, + "loss": 1.4966, + "step": 7238 + }, + { + "epoch": 0.7636075949367088, + "grad_norm": 0.6192559003829956, + "learning_rate": 0.00020131828676442237, + "loss": 1.4533, + "step": 7239 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.6630073189735413, + "learning_rate": 0.00020114715502853292, + "loss": 1.4619, + "step": 7240 + }, + { + "epoch": 0.7638185654008439, + "grad_norm": 0.5966270565986633, + "learning_rate": 0.00020097608479434153, + "loss": 1.4373, + "step": 7241 + }, + { + "epoch": 0.7639240506329114, + "grad_norm": 0.6280491948127747, + "learning_rate": 0.00020080507608101757, + "loss": 1.5154, + "step": 7242 + }, + { + "epoch": 0.7640295358649789, + "grad_norm": 0.6567384004592896, + "learning_rate": 0.0002006341289077233, + "loss": 1.4863, + "step": 7243 + }, + { + "epoch": 0.7641350210970465, + "grad_norm": 0.6241922974586487, + "learning_rate": 0.00020046324329361432, + "loss": 1.4696, + "step": 7244 + }, + { + "epoch": 0.7642405063291139, + "grad_norm": 0.6233017444610596, + "learning_rate": 0.00020029241925783908, + "loss": 1.4463, + "step": 7245 + }, + { + "epoch": 0.7643459915611814, + "grad_norm": 0.5986643433570862, + "learning_rate": 0.00020012165681953923, + "loss": 1.4434, + "step": 7246 + }, + { + "epoch": 0.764451476793249, + "grad_norm": 0.5876789689064026, + "learning_rate": 0.00019995095599784985, + "loss": 1.4757, + "step": 7247 + }, + { + "epoch": 0.7645569620253164, + "grad_norm": 0.5892227292060852, + "learning_rate": 0.00019978031681189864, + "loss": 1.4435, + "step": 7248 + }, + { + "epoch": 0.764662447257384, + "grad_norm": 0.7407413125038147, + "learning_rate": 0.00019960973928080666, + "loss": 1.5134, + "step": 7249 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.6265819668769836, + "learning_rate": 0.0001994392234236878, + "loss": 1.4667, + "step": 7250 + }, + { + "epoch": 0.764873417721519, + "grad_norm": 0.6315609812736511, + "learning_rate": 0.00019926876925964928, + "loss": 1.4725, + "step": 7251 + }, + { + "epoch": 0.7649789029535865, + "grad_norm": 0.6352424621582031, + "learning_rate": 0.00019909837680779141, + "loss": 1.4427, + "step": 7252 + }, + { + "epoch": 0.765084388185654, + "grad_norm": 0.6157045960426331, + "learning_rate": 0.00019892804608720747, + "loss": 1.4757, + "step": 7253 + }, + { + "epoch": 0.7651898734177215, + "grad_norm": 0.9267067313194275, + "learning_rate": 0.00019875777711698384, + "loss": 1.4863, + "step": 7254 + }, + { + "epoch": 0.765295358649789, + "grad_norm": 0.7038561701774597, + "learning_rate": 0.00019858756991619978, + "loss": 1.4763, + "step": 7255 + }, + { + "epoch": 0.7654008438818566, + "grad_norm": 0.6268599629402161, + "learning_rate": 0.00019841742450392837, + "loss": 1.4765, + "step": 7256 + }, + { + "epoch": 0.765506329113924, + "grad_norm": 0.908359944820404, + "learning_rate": 0.0001982473408992349, + "loss": 1.4767, + "step": 7257 + }, + { + "epoch": 0.7656118143459916, + "grad_norm": 0.7011672258377075, + "learning_rate": 0.00019807731912117828, + "loss": 1.4582, + "step": 7258 + }, + { + "epoch": 0.7657172995780591, + "grad_norm": 0.6287122368812561, + "learning_rate": 0.0001979073591888101, + "loss": 1.4955, + "step": 7259 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.6314584612846375, + "learning_rate": 0.0001977374611211754, + "loss": 1.4121, + "step": 7260 + }, + { + "epoch": 0.7659282700421941, + "grad_norm": 0.6570382118225098, + "learning_rate": 0.00019756762493731192, + "loss": 1.4369, + "step": 7261 + }, + { + "epoch": 0.7660337552742617, + "grad_norm": 0.6483246684074402, + "learning_rate": 0.00019739785065625077, + "loss": 1.4492, + "step": 7262 + }, + { + "epoch": 0.7661392405063291, + "grad_norm": 0.6434995532035828, + "learning_rate": 0.00019722813829701593, + "loss": 1.4645, + "step": 7263 + }, + { + "epoch": 0.7662447257383966, + "grad_norm": 0.5655763745307922, + "learning_rate": 0.0001970584878786244, + "loss": 1.4977, + "step": 7264 + }, + { + "epoch": 0.7663502109704642, + "grad_norm": 0.6467612385749817, + "learning_rate": 0.0001968888994200868, + "loss": 1.4507, + "step": 7265 + }, + { + "epoch": 0.7664556962025316, + "grad_norm": 0.6160171627998352, + "learning_rate": 0.00019671937294040595, + "loss": 1.4363, + "step": 7266 + }, + { + "epoch": 0.7665611814345992, + "grad_norm": 0.6358802318572998, + "learning_rate": 0.00019654990845857832, + "loss": 1.4548, + "step": 7267 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 0.6266970634460449, + "learning_rate": 0.00019638050599359326, + "loss": 1.4883, + "step": 7268 + }, + { + "epoch": 0.7667721518987342, + "grad_norm": 0.588821291923523, + "learning_rate": 0.000196211165564433, + "loss": 1.4798, + "step": 7269 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.6626696586608887, + "learning_rate": 0.00019604188719007313, + "loss": 1.5111, + "step": 7270 + }, + { + "epoch": 0.7669831223628693, + "grad_norm": 0.6406923532485962, + "learning_rate": 0.00019587267088948214, + "loss": 1.4721, + "step": 7271 + }, + { + "epoch": 0.7670886075949367, + "grad_norm": 0.6232843995094299, + "learning_rate": 0.00019570351668162143, + "loss": 1.4716, + "step": 7272 + }, + { + "epoch": 0.7671940928270042, + "grad_norm": 0.7155358791351318, + "learning_rate": 0.00019553442458544542, + "loss": 1.4792, + "step": 7273 + }, + { + "epoch": 0.7672995780590718, + "grad_norm": 0.614332377910614, + "learning_rate": 0.00019536539461990224, + "loss": 1.4306, + "step": 7274 + }, + { + "epoch": 0.7674050632911392, + "grad_norm": 0.6289480924606323, + "learning_rate": 0.0001951964268039322, + "loss": 1.4656, + "step": 7275 + }, + { + "epoch": 0.7675105485232068, + "grad_norm": 0.6721113920211792, + "learning_rate": 0.00019502752115646901, + "loss": 1.512, + "step": 7276 + }, + { + "epoch": 0.7676160337552742, + "grad_norm": 0.7293909192085266, + "learning_rate": 0.00019485867769643945, + "loss": 1.4486, + "step": 7277 + }, + { + "epoch": 0.7677215189873418, + "grad_norm": 0.6549577116966248, + "learning_rate": 0.0001946898964427633, + "loss": 1.4959, + "step": 7278 + }, + { + "epoch": 0.7678270042194093, + "grad_norm": 0.7113741040229797, + "learning_rate": 0.00019452117741435314, + "loss": 1.4692, + "step": 7279 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.7080508470535278, + "learning_rate": 0.00019435252063011504, + "loss": 1.4508, + "step": 7280 + }, + { + "epoch": 0.7680379746835443, + "grad_norm": 0.6918862462043762, + "learning_rate": 0.00019418392610894768, + "loss": 1.515, + "step": 7281 + }, + { + "epoch": 0.7681434599156118, + "grad_norm": 0.6840686798095703, + "learning_rate": 0.0001940153938697427, + "loss": 1.5179, + "step": 7282 + }, + { + "epoch": 0.7682489451476793, + "grad_norm": 0.5780191421508789, + "learning_rate": 0.0001938469239313855, + "loss": 1.467, + "step": 7283 + }, + { + "epoch": 0.7683544303797468, + "grad_norm": 0.7133517265319824, + "learning_rate": 0.00019367851631275362, + "loss": 1.4615, + "step": 7284 + }, + { + "epoch": 0.7684599156118144, + "grad_norm": 0.5849462151527405, + "learning_rate": 0.00019351017103271805, + "loss": 1.4909, + "step": 7285 + }, + { + "epoch": 0.7685654008438818, + "grad_norm": 0.6210529804229736, + "learning_rate": 0.00019334188811014278, + "loss": 1.4644, + "step": 7286 + }, + { + "epoch": 0.7686708860759494, + "grad_norm": 0.6917125582695007, + "learning_rate": 0.00019317366756388477, + "loss": 1.4547, + "step": 7287 + }, + { + "epoch": 0.7687763713080169, + "grad_norm": 0.6478434205055237, + "learning_rate": 0.0001930055094127938, + "loss": 1.4587, + "step": 7288 + }, + { + "epoch": 0.7688818565400843, + "grad_norm": 0.676111102104187, + "learning_rate": 0.00019283741367571294, + "loss": 1.4893, + "step": 7289 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.6209662556648254, + "learning_rate": 0.0001926693803714779, + "loss": 1.4568, + "step": 7290 + }, + { + "epoch": 0.7690928270042194, + "grad_norm": 0.6100132465362549, + "learning_rate": 0.00019250140951891813, + "loss": 1.5404, + "step": 7291 + }, + { + "epoch": 0.7691983122362869, + "grad_norm": 0.6319943070411682, + "learning_rate": 0.00019233350113685536, + "loss": 1.4821, + "step": 7292 + }, + { + "epoch": 0.7693037974683544, + "grad_norm": 0.6145641803741455, + "learning_rate": 0.00019216565524410455, + "loss": 1.4473, + "step": 7293 + }, + { + "epoch": 0.769409282700422, + "grad_norm": 0.6262165904045105, + "learning_rate": 0.0001919978718594738, + "loss": 1.4527, + "step": 7294 + }, + { + "epoch": 0.7695147679324894, + "grad_norm": 0.6472716331481934, + "learning_rate": 0.0001918301510017638, + "loss": 1.5038, + "step": 7295 + }, + { + "epoch": 0.769620253164557, + "grad_norm": 0.6998957991600037, + "learning_rate": 0.0001916624926897687, + "loss": 1.4821, + "step": 7296 + }, + { + "epoch": 0.7697257383966245, + "grad_norm": 0.6434881091117859, + "learning_rate": 0.0001914948969422755, + "loss": 1.4739, + "step": 7297 + }, + { + "epoch": 0.7698312236286919, + "grad_norm": 0.6216102242469788, + "learning_rate": 0.00019132736377806394, + "loss": 1.4537, + "step": 7298 + }, + { + "epoch": 0.7699367088607595, + "grad_norm": 0.6275684237480164, + "learning_rate": 0.00019115989321590694, + "loss": 1.4757, + "step": 7299 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.6722654700279236, + "learning_rate": 0.00019099248527457068, + "loss": 1.5156, + "step": 7300 + }, + { + "epoch": 0.7701476793248945, + "grad_norm": 0.6183315515518188, + "learning_rate": 0.00019082513997281398, + "loss": 1.4721, + "step": 7301 + }, + { + "epoch": 0.770253164556962, + "grad_norm": 0.6093221306800842, + "learning_rate": 0.0001906578573293886, + "loss": 1.4318, + "step": 7302 + }, + { + "epoch": 0.7703586497890296, + "grad_norm": 0.6786752939224243, + "learning_rate": 0.00019049063736303946, + "loss": 1.4737, + "step": 7303 + }, + { + "epoch": 0.770464135021097, + "grad_norm": 0.5696977376937866, + "learning_rate": 0.00019032348009250433, + "loss": 1.4709, + "step": 7304 + }, + { + "epoch": 0.7705696202531646, + "grad_norm": 0.6424058079719543, + "learning_rate": 0.0001901563855365141, + "loss": 1.4417, + "step": 7305 + }, + { + "epoch": 0.7706751054852321, + "grad_norm": 0.6213880777359009, + "learning_rate": 0.00018998935371379252, + "loss": 1.4652, + "step": 7306 + }, + { + "epoch": 0.7707805907172995, + "grad_norm": 0.6545773148536682, + "learning_rate": 0.00018982238464305623, + "loss": 1.4837, + "step": 7307 + }, + { + "epoch": 0.7708860759493671, + "grad_norm": 0.5816702246665955, + "learning_rate": 0.0001896554783430149, + "loss": 1.4677, + "step": 7308 + }, + { + "epoch": 0.7709915611814346, + "grad_norm": 0.619512140750885, + "learning_rate": 0.00018948863483237154, + "loss": 1.4676, + "step": 7309 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.6686210036277771, + "learning_rate": 0.0001893218541298216, + "loss": 1.4474, + "step": 7310 + }, + { + "epoch": 0.7712025316455696, + "grad_norm": 0.6059867143630981, + "learning_rate": 0.00018915513625405374, + "loss": 1.4693, + "step": 7311 + }, + { + "epoch": 0.7713080168776372, + "grad_norm": 0.5987179279327393, + "learning_rate": 0.00018898848122374942, + "loss": 1.4755, + "step": 7312 + }, + { + "epoch": 0.7714135021097046, + "grad_norm": 0.689303994178772, + "learning_rate": 0.00018882188905758326, + "loss": 1.471, + "step": 7313 + }, + { + "epoch": 0.7715189873417722, + "grad_norm": 0.6371836066246033, + "learning_rate": 0.00018865535977422273, + "loss": 1.4521, + "step": 7314 + }, + { + "epoch": 0.7716244725738397, + "grad_norm": 0.5814570784568787, + "learning_rate": 0.00018848889339232833, + "loss": 1.443, + "step": 7315 + }, + { + "epoch": 0.7717299578059071, + "grad_norm": 0.6661842465400696, + "learning_rate": 0.00018832248993055304, + "loss": 1.4856, + "step": 7316 + }, + { + "epoch": 0.7718354430379747, + "grad_norm": 0.7029836177825928, + "learning_rate": 0.00018815614940754377, + "loss": 1.4318, + "step": 7317 + }, + { + "epoch": 0.7719409282700422, + "grad_norm": 0.5943669080734253, + "learning_rate": 0.00018798987184193963, + "loss": 1.4464, + "step": 7318 + }, + { + "epoch": 0.7720464135021097, + "grad_norm": 0.6091165542602539, + "learning_rate": 0.00018782365725237272, + "loss": 1.468, + "step": 7319 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.6186548471450806, + "learning_rate": 0.00018765750565746827, + "loss": 1.4622, + "step": 7320 + }, + { + "epoch": 0.7722573839662448, + "grad_norm": 0.614872395992279, + "learning_rate": 0.00018749141707584443, + "loss": 1.5094, + "step": 7321 + }, + { + "epoch": 0.7723628691983122, + "grad_norm": 0.7058675289154053, + "learning_rate": 0.0001873253915261123, + "loss": 1.4441, + "step": 7322 + }, + { + "epoch": 0.7724683544303798, + "grad_norm": 0.6109932661056519, + "learning_rate": 0.00018715942902687566, + "loss": 1.4449, + "step": 7323 + }, + { + "epoch": 0.7725738396624473, + "grad_norm": 0.608079731464386, + "learning_rate": 0.00018699352959673172, + "loss": 1.4614, + "step": 7324 + }, + { + "epoch": 0.7726793248945147, + "grad_norm": 0.6713806986808777, + "learning_rate": 0.00018682769325426986, + "loss": 1.4883, + "step": 7325 + }, + { + "epoch": 0.7727848101265823, + "grad_norm": 0.6280422806739807, + "learning_rate": 0.00018666192001807344, + "loss": 1.4656, + "step": 7326 + }, + { + "epoch": 0.7728902953586498, + "grad_norm": 0.6063868403434753, + "learning_rate": 0.00018649620990671798, + "loss": 1.4294, + "step": 7327 + }, + { + "epoch": 0.7729957805907173, + "grad_norm": 0.6617334485054016, + "learning_rate": 0.00018633056293877203, + "loss": 1.4618, + "step": 7328 + }, + { + "epoch": 0.7731012658227848, + "grad_norm": 0.6669489145278931, + "learning_rate": 0.00018616497913279728, + "loss": 1.4324, + "step": 7329 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.7260106205940247, + "learning_rate": 0.00018599945850734812, + "loss": 1.5231, + "step": 7330 + }, + { + "epoch": 0.7733122362869198, + "grad_norm": 0.7378202080726624, + "learning_rate": 0.00018583400108097194, + "loss": 1.4419, + "step": 7331 + }, + { + "epoch": 0.7734177215189874, + "grad_norm": 0.6203836798667908, + "learning_rate": 0.00018566860687220922, + "loss": 1.4675, + "step": 7332 + }, + { + "epoch": 0.7735232067510549, + "grad_norm": 0.6271997690200806, + "learning_rate": 0.00018550327589959308, + "loss": 1.475, + "step": 7333 + }, + { + "epoch": 0.7736286919831223, + "grad_norm": 0.7201666831970215, + "learning_rate": 0.00018533800818164943, + "loss": 1.4472, + "step": 7334 + }, + { + "epoch": 0.7737341772151899, + "grad_norm": 0.6595064997673035, + "learning_rate": 0.00018517280373689789, + "loss": 1.5009, + "step": 7335 + }, + { + "epoch": 0.7738396624472574, + "grad_norm": 0.626868724822998, + "learning_rate": 0.0001850076625838502, + "loss": 1.4418, + "step": 7336 + }, + { + "epoch": 0.7739451476793249, + "grad_norm": 0.6589841246604919, + "learning_rate": 0.0001848425847410112, + "loss": 1.4324, + "step": 7337 + }, + { + "epoch": 0.7740506329113924, + "grad_norm": 0.6974648833274841, + "learning_rate": 0.00018467757022687864, + "loss": 1.4645, + "step": 7338 + }, + { + "epoch": 0.77415611814346, + "grad_norm": 0.6126692295074463, + "learning_rate": 0.0001845126190599434, + "loss": 1.48, + "step": 7339 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.6848926544189453, + "learning_rate": 0.00018434773125868895, + "loss": 1.4603, + "step": 7340 + }, + { + "epoch": 0.774367088607595, + "grad_norm": 0.6613487005233765, + "learning_rate": 0.00018418290684159175, + "loss": 1.4557, + "step": 7341 + }, + { + "epoch": 0.7744725738396624, + "grad_norm": 0.6226844787597656, + "learning_rate": 0.00018401814582712103, + "loss": 1.4648, + "step": 7342 + }, + { + "epoch": 0.7745780590717299, + "grad_norm": 0.5927276611328125, + "learning_rate": 0.0001838534482337396, + "loss": 1.4291, + "step": 7343 + }, + { + "epoch": 0.7746835443037975, + "grad_norm": 0.6088572144508362, + "learning_rate": 0.0001836888140799023, + "loss": 1.4943, + "step": 7344 + }, + { + "epoch": 0.7747890295358649, + "grad_norm": 0.5861511826515198, + "learning_rate": 0.0001835242433840573, + "loss": 1.4582, + "step": 7345 + }, + { + "epoch": 0.7748945147679325, + "grad_norm": 0.5825832486152649, + "learning_rate": 0.00018335973616464554, + "loss": 1.4151, + "step": 7346 + }, + { + "epoch": 0.775, + "grad_norm": 0.6052067875862122, + "learning_rate": 0.00018319529244010082, + "loss": 1.4885, + "step": 7347 + }, + { + "epoch": 0.7751054852320675, + "grad_norm": 0.5988431572914124, + "learning_rate": 0.00018303091222884998, + "loss": 1.4939, + "step": 7348 + }, + { + "epoch": 0.775210970464135, + "grad_norm": 0.5789024829864502, + "learning_rate": 0.00018286659554931254, + "loss": 1.4613, + "step": 7349 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.5784099102020264, + "learning_rate": 0.00018270234241990108, + "loss": 1.4387, + "step": 7350 + }, + { + "epoch": 0.77542194092827, + "grad_norm": 0.6053193211555481, + "learning_rate": 0.00018253815285902074, + "loss": 1.442, + "step": 7351 + }, + { + "epoch": 0.7755274261603375, + "grad_norm": 0.6314154267311096, + "learning_rate": 0.0001823740268850702, + "loss": 1.4863, + "step": 7352 + }, + { + "epoch": 0.7756329113924051, + "grad_norm": 0.6139577031135559, + "learning_rate": 0.0001822099645164404, + "loss": 1.4706, + "step": 7353 + }, + { + "epoch": 0.7757383966244725, + "grad_norm": 0.6248915791511536, + "learning_rate": 0.00018204596577151534, + "loss": 1.4353, + "step": 7354 + }, + { + "epoch": 0.7758438818565401, + "grad_norm": 0.6399029493331909, + "learning_rate": 0.00018188203066867178, + "loss": 1.4638, + "step": 7355 + }, + { + "epoch": 0.7759493670886076, + "grad_norm": 0.670861005783081, + "learning_rate": 0.00018171815922627974, + "loss": 1.4999, + "step": 7356 + }, + { + "epoch": 0.7760548523206751, + "grad_norm": 0.6753625869750977, + "learning_rate": 0.00018155435146270158, + "loss": 1.4667, + "step": 7357 + }, + { + "epoch": 0.7761603375527426, + "grad_norm": 0.5961960554122925, + "learning_rate": 0.00018139060739629287, + "loss": 1.4886, + "step": 7358 + }, + { + "epoch": 0.7762658227848102, + "grad_norm": 0.6762723922729492, + "learning_rate": 0.00018122692704540194, + "loss": 1.4746, + "step": 7359 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.6167807579040527, + "learning_rate": 0.0001810633104283698, + "loss": 1.4586, + "step": 7360 + }, + { + "epoch": 0.7764767932489451, + "grad_norm": 0.5899614095687866, + "learning_rate": 0.00018089975756353083, + "loss": 1.4946, + "step": 7361 + }, + { + "epoch": 0.7765822784810127, + "grad_norm": 0.6918672323226929, + "learning_rate": 0.0001807362684692119, + "loss": 1.4917, + "step": 7362 + }, + { + "epoch": 0.7766877637130801, + "grad_norm": 0.6071878671646118, + "learning_rate": 0.00018057284316373267, + "loss": 1.4359, + "step": 7363 + }, + { + "epoch": 0.7767932489451477, + "grad_norm": 0.6297783851623535, + "learning_rate": 0.00018040948166540586, + "loss": 1.4341, + "step": 7364 + }, + { + "epoch": 0.7768987341772152, + "grad_norm": 0.617218554019928, + "learning_rate": 0.0001802461839925368, + "loss": 1.4508, + "step": 7365 + }, + { + "epoch": 0.7770042194092827, + "grad_norm": 0.6638060808181763, + "learning_rate": 0.00018008295016342383, + "loss": 1.4619, + "step": 7366 + }, + { + "epoch": 0.7771097046413502, + "grad_norm": 0.6496181488037109, + "learning_rate": 0.00017991978019635819, + "loss": 1.4716, + "step": 7367 + }, + { + "epoch": 0.7772151898734178, + "grad_norm": 0.6383330821990967, + "learning_rate": 0.00017975667410962366, + "loss": 1.4374, + "step": 7368 + }, + { + "epoch": 0.7773206751054852, + "grad_norm": 0.6343960165977478, + "learning_rate": 0.00017959363192149752, + "loss": 1.495, + "step": 7369 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5998852252960205, + "learning_rate": 0.0001794306536502492, + "loss": 1.5104, + "step": 7370 + }, + { + "epoch": 0.7775316455696203, + "grad_norm": 0.6171272993087769, + "learning_rate": 0.0001792677393141412, + "loss": 1.4675, + "step": 7371 + }, + { + "epoch": 0.7776371308016877, + "grad_norm": 0.6254709959030151, + "learning_rate": 0.00017910488893142903, + "loss": 1.4884, + "step": 7372 + }, + { + "epoch": 0.7777426160337553, + "grad_norm": 0.6046574711799622, + "learning_rate": 0.00017894210252036069, + "loss": 1.4229, + "step": 7373 + }, + { + "epoch": 0.7778481012658228, + "grad_norm": 0.5715307593345642, + "learning_rate": 0.0001787793800991774, + "loss": 1.4618, + "step": 7374 + }, + { + "epoch": 0.7779535864978903, + "grad_norm": 0.6728099584579468, + "learning_rate": 0.00017861672168611293, + "loss": 1.4989, + "step": 7375 + }, + { + "epoch": 0.7780590717299578, + "grad_norm": 0.6189089417457581, + "learning_rate": 0.0001784541272993939, + "loss": 1.4304, + "step": 7376 + }, + { + "epoch": 0.7781645569620254, + "grad_norm": 0.6425363421440125, + "learning_rate": 0.00017829159695723973, + "loss": 1.4462, + "step": 7377 + }, + { + "epoch": 0.7782700421940928, + "grad_norm": 0.6184316277503967, + "learning_rate": 0.00017812913067786313, + "loss": 1.4234, + "step": 7378 + }, + { + "epoch": 0.7783755274261603, + "grad_norm": 0.5659582018852234, + "learning_rate": 0.00017796672847946905, + "loss": 1.4566, + "step": 7379 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.6283172965049744, + "learning_rate": 0.0001778043903802555, + "loss": 1.4517, + "step": 7380 + }, + { + "epoch": 0.7785864978902953, + "grad_norm": 0.6084624528884888, + "learning_rate": 0.00017764211639841312, + "loss": 1.4643, + "step": 7381 + }, + { + "epoch": 0.7786919831223629, + "grad_norm": 0.6418417692184448, + "learning_rate": 0.0001774799065521257, + "loss": 1.5502, + "step": 7382 + }, + { + "epoch": 0.7787974683544304, + "grad_norm": 0.6199612021446228, + "learning_rate": 0.0001773177608595696, + "loss": 1.4643, + "step": 7383 + }, + { + "epoch": 0.7789029535864979, + "grad_norm": 0.64478600025177, + "learning_rate": 0.00017715567933891405, + "loss": 1.4537, + "step": 7384 + }, + { + "epoch": 0.7790084388185654, + "grad_norm": 0.5878441333770752, + "learning_rate": 0.0001769936620083211, + "loss": 1.4684, + "step": 7385 + }, + { + "epoch": 0.779113924050633, + "grad_norm": 0.6779734492301941, + "learning_rate": 0.0001768317088859453, + "loss": 1.4162, + "step": 7386 + }, + { + "epoch": 0.7792194092827004, + "grad_norm": 0.5683178901672363, + "learning_rate": 0.0001766698199899349, + "loss": 1.4805, + "step": 7387 + }, + { + "epoch": 0.7793248945147679, + "grad_norm": 0.5962233543395996, + "learning_rate": 0.00017650799533842996, + "loss": 1.447, + "step": 7388 + }, + { + "epoch": 0.7794303797468355, + "grad_norm": 0.6410964131355286, + "learning_rate": 0.0001763462349495639, + "loss": 1.4559, + "step": 7389 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.6156800389289856, + "learning_rate": 0.0001761845388414627, + "loss": 1.4509, + "step": 7390 + }, + { + "epoch": 0.7796413502109705, + "grad_norm": 0.6367402672767639, + "learning_rate": 0.00017602290703224525, + "loss": 1.4621, + "step": 7391 + }, + { + "epoch": 0.779746835443038, + "grad_norm": 0.6666606664657593, + "learning_rate": 0.00017586133954002308, + "loss": 1.49, + "step": 7392 + }, + { + "epoch": 0.7798523206751055, + "grad_norm": 0.6153599619865417, + "learning_rate": 0.00017569983638290084, + "loss": 1.4661, + "step": 7393 + }, + { + "epoch": 0.779957805907173, + "grad_norm": 0.6212663650512695, + "learning_rate": 0.0001755383975789754, + "loss": 1.4647, + "step": 7394 + }, + { + "epoch": 0.7800632911392406, + "grad_norm": 0.6296415328979492, + "learning_rate": 0.00017537702314633722, + "loss": 1.4559, + "step": 7395 + }, + { + "epoch": 0.780168776371308, + "grad_norm": 0.6152603626251221, + "learning_rate": 0.00017521571310306889, + "loss": 1.5167, + "step": 7396 + }, + { + "epoch": 0.7802742616033755, + "grad_norm": 0.6374273300170898, + "learning_rate": 0.0001750544674672461, + "loss": 1.4738, + "step": 7397 + }, + { + "epoch": 0.7803797468354431, + "grad_norm": 0.679121196269989, + "learning_rate": 0.00017489328625693715, + "loss": 1.4542, + "step": 7398 + }, + { + "epoch": 0.7804852320675105, + "grad_norm": 0.6789466142654419, + "learning_rate": 0.00017473216949020326, + "loss": 1.4725, + "step": 7399 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.6110801696777344, + "learning_rate": 0.00017457111718509831, + "loss": 1.4772, + "step": 7400 + }, + { + "epoch": 0.7806962025316456, + "grad_norm": 0.7234927415847778, + "learning_rate": 0.00017441012935966898, + "loss": 1.4278, + "step": 7401 + }, + { + "epoch": 0.7808016877637131, + "grad_norm": 0.6921749114990234, + "learning_rate": 0.00017424920603195483, + "loss": 1.4732, + "step": 7402 + }, + { + "epoch": 0.7809071729957806, + "grad_norm": 0.5818843245506287, + "learning_rate": 0.0001740883472199879, + "loss": 1.4397, + "step": 7403 + }, + { + "epoch": 0.7810126582278482, + "grad_norm": 0.6536332368850708, + "learning_rate": 0.00017392755294179363, + "loss": 1.4706, + "step": 7404 + }, + { + "epoch": 0.7811181434599156, + "grad_norm": 0.6593970656394958, + "learning_rate": 0.0001737668232153896, + "loss": 1.4509, + "step": 7405 + }, + { + "epoch": 0.7812236286919831, + "grad_norm": 0.6249573826789856, + "learning_rate": 0.00017360615805878636, + "loss": 1.4533, + "step": 7406 + }, + { + "epoch": 0.7813291139240506, + "grad_norm": 0.6035283803939819, + "learning_rate": 0.00017344555748998727, + "loss": 1.4532, + "step": 7407 + }, + { + "epoch": 0.7814345991561181, + "grad_norm": 0.6554306745529175, + "learning_rate": 0.0001732850215269885, + "loss": 1.4635, + "step": 7408 + }, + { + "epoch": 0.7815400843881857, + "grad_norm": 0.6401475667953491, + "learning_rate": 0.0001731245501877787, + "loss": 1.4825, + "step": 7409 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.5929785966873169, + "learning_rate": 0.00017296414349033976, + "loss": 1.4402, + "step": 7410 + }, + { + "epoch": 0.7817510548523207, + "grad_norm": 0.6600140333175659, + "learning_rate": 0.0001728038014526458, + "loss": 1.4727, + "step": 7411 + }, + { + "epoch": 0.7818565400843882, + "grad_norm": 0.6411309242248535, + "learning_rate": 0.00017264352409266385, + "loss": 1.4747, + "step": 7412 + }, + { + "epoch": 0.7819620253164556, + "grad_norm": 0.6511048078536987, + "learning_rate": 0.0001724833114283542, + "loss": 1.467, + "step": 7413 + }, + { + "epoch": 0.7820675105485232, + "grad_norm": 0.644035279750824, + "learning_rate": 0.0001723231634776693, + "loss": 1.4684, + "step": 7414 + }, + { + "epoch": 0.7821729957805907, + "grad_norm": 0.6819643378257751, + "learning_rate": 0.0001721630802585545, + "loss": 1.4274, + "step": 7415 + }, + { + "epoch": 0.7822784810126582, + "grad_norm": 0.6705318093299866, + "learning_rate": 0.00017200306178894785, + "loss": 1.4176, + "step": 7416 + }, + { + "epoch": 0.7823839662447257, + "grad_norm": 0.6065923571586609, + "learning_rate": 0.00017184310808678028, + "loss": 1.4383, + "step": 7417 + }, + { + "epoch": 0.7824894514767933, + "grad_norm": 0.6916922330856323, + "learning_rate": 0.00017168321916997547, + "loss": 1.4964, + "step": 7418 + }, + { + "epoch": 0.7825949367088607, + "grad_norm": 0.6805013418197632, + "learning_rate": 0.00017152339505644963, + "loss": 1.4854, + "step": 7419 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.6162799000740051, + "learning_rate": 0.00017136363576411172, + "loss": 1.4469, + "step": 7420 + }, + { + "epoch": 0.7828059071729958, + "grad_norm": 0.6047887802124023, + "learning_rate": 0.00017120394131086398, + "loss": 1.4573, + "step": 7421 + }, + { + "epoch": 0.7829113924050632, + "grad_norm": 0.6401472091674805, + "learning_rate": 0.00017104431171460077, + "loss": 1.4568, + "step": 7422 + }, + { + "epoch": 0.7830168776371308, + "grad_norm": 0.6344454884529114, + "learning_rate": 0.0001708847469932093, + "loss": 1.4236, + "step": 7423 + }, + { + "epoch": 0.7831223628691983, + "grad_norm": 0.6343236565589905, + "learning_rate": 0.00017072524716456975, + "loss": 1.4755, + "step": 7424 + }, + { + "epoch": 0.7832278481012658, + "grad_norm": 0.6191328763961792, + "learning_rate": 0.00017056581224655473, + "loss": 1.4535, + "step": 7425 + }, + { + "epoch": 0.7833333333333333, + "grad_norm": 0.6448207497596741, + "learning_rate": 0.0001704064422570298, + "loss": 1.4063, + "step": 7426 + }, + { + "epoch": 0.7834388185654009, + "grad_norm": 0.6419215202331543, + "learning_rate": 0.0001702471372138531, + "loss": 1.4406, + "step": 7427 + }, + { + "epoch": 0.7835443037974683, + "grad_norm": 0.6354235410690308, + "learning_rate": 0.00017008789713487558, + "loss": 1.4324, + "step": 7428 + }, + { + "epoch": 0.7836497890295359, + "grad_norm": 0.6163613200187683, + "learning_rate": 0.0001699287220379407, + "loss": 1.4579, + "step": 7429 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.5750465989112854, + "learning_rate": 0.00016976961194088526, + "loss": 1.4616, + "step": 7430 + }, + { + "epoch": 0.7838607594936708, + "grad_norm": 0.6115192770957947, + "learning_rate": 0.000169610566861538, + "loss": 1.4413, + "step": 7431 + }, + { + "epoch": 0.7839662447257384, + "grad_norm": 0.6175270080566406, + "learning_rate": 0.0001694515868177209, + "loss": 1.4463, + "step": 7432 + }, + { + "epoch": 0.7840717299578059, + "grad_norm": 0.6209538578987122, + "learning_rate": 0.0001692926718272483, + "loss": 1.4403, + "step": 7433 + }, + { + "epoch": 0.7841772151898734, + "grad_norm": 0.6366962194442749, + "learning_rate": 0.00016913382190792754, + "loss": 1.4727, + "step": 7434 + }, + { + "epoch": 0.7842827004219409, + "grad_norm": 0.6696845889091492, + "learning_rate": 0.0001689750370775584, + "loss": 1.4388, + "step": 7435 + }, + { + "epoch": 0.7843881856540085, + "grad_norm": 0.6313201785087585, + "learning_rate": 0.00016881631735393368, + "loss": 1.416, + "step": 7436 + }, + { + "epoch": 0.7844936708860759, + "grad_norm": 0.6556068658828735, + "learning_rate": 0.00016865766275483865, + "loss": 1.4782, + "step": 7437 + }, + { + "epoch": 0.7845991561181435, + "grad_norm": 0.5913515090942383, + "learning_rate": 0.00016849907329805118, + "loss": 1.4455, + "step": 7438 + }, + { + "epoch": 0.784704641350211, + "grad_norm": 0.6255030632019043, + "learning_rate": 0.00016834054900134228, + "loss": 1.4889, + "step": 7439 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.6015876531600952, + "learning_rate": 0.00016818208988247533, + "loss": 1.4523, + "step": 7440 + }, + { + "epoch": 0.784915611814346, + "grad_norm": 0.6307930946350098, + "learning_rate": 0.00016802369595920647, + "loss": 1.4672, + "step": 7441 + }, + { + "epoch": 0.7850210970464135, + "grad_norm": 0.5704931020736694, + "learning_rate": 0.00016786536724928432, + "loss": 1.4442, + "step": 7442 + }, + { + "epoch": 0.785126582278481, + "grad_norm": 0.6258883476257324, + "learning_rate": 0.00016770710377045074, + "loss": 1.4238, + "step": 7443 + }, + { + "epoch": 0.7852320675105485, + "grad_norm": 0.6019439697265625, + "learning_rate": 0.00016754890554043965, + "loss": 1.4399, + "step": 7444 + }, + { + "epoch": 0.7853375527426161, + "grad_norm": 0.6306976675987244, + "learning_rate": 0.00016739077257697804, + "loss": 1.4539, + "step": 7445 + }, + { + "epoch": 0.7854430379746835, + "grad_norm": 0.6385828256607056, + "learning_rate": 0.0001672327048977856, + "loss": 1.4439, + "step": 7446 + }, + { + "epoch": 0.7855485232067511, + "grad_norm": 0.6377881169319153, + "learning_rate": 0.00016707470252057423, + "loss": 1.4718, + "step": 7447 + }, + { + "epoch": 0.7856540084388186, + "grad_norm": 0.6174176335334778, + "learning_rate": 0.00016691676546304936, + "loss": 1.444, + "step": 7448 + }, + { + "epoch": 0.785759493670886, + "grad_norm": 0.6009869575500488, + "learning_rate": 0.00016675889374290852, + "loss": 1.4703, + "step": 7449 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.615922749042511, + "learning_rate": 0.0001666010873778419, + "loss": 1.4637, + "step": 7450 + }, + { + "epoch": 0.7859704641350211, + "grad_norm": 0.6199615001678467, + "learning_rate": 0.0001664433463855325, + "loss": 1.4747, + "step": 7451 + }, + { + "epoch": 0.7860759493670886, + "grad_norm": 0.6195712089538574, + "learning_rate": 0.00016628567078365612, + "loss": 1.466, + "step": 7452 + }, + { + "epoch": 0.7861814345991561, + "grad_norm": 0.6301698684692383, + "learning_rate": 0.00016612806058988088, + "loss": 1.4431, + "step": 7453 + }, + { + "epoch": 0.7862869198312237, + "grad_norm": 0.629210889339447, + "learning_rate": 0.0001659705158218679, + "loss": 1.467, + "step": 7454 + }, + { + "epoch": 0.7863924050632911, + "grad_norm": 0.6798489093780518, + "learning_rate": 0.00016581303649727076, + "loss": 1.4665, + "step": 7455 + }, + { + "epoch": 0.7864978902953587, + "grad_norm": 0.6222941279411316, + "learning_rate": 0.000165655622633736, + "loss": 1.4678, + "step": 7456 + }, + { + "epoch": 0.7866033755274262, + "grad_norm": 0.6447638869285583, + "learning_rate": 0.00016549827424890257, + "loss": 1.462, + "step": 7457 + }, + { + "epoch": 0.7867088607594936, + "grad_norm": 0.5796579122543335, + "learning_rate": 0.00016534099136040207, + "loss": 1.4635, + "step": 7458 + }, + { + "epoch": 0.7868143459915612, + "grad_norm": 0.6545830965042114, + "learning_rate": 0.0001651837739858589, + "loss": 1.4682, + "step": 7459 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.5998984575271606, + "learning_rate": 0.00016502662214289, + "loss": 1.4522, + "step": 7460 + }, + { + "epoch": 0.7870253164556962, + "grad_norm": 0.6887134909629822, + "learning_rate": 0.000164869535849105, + "loss": 1.4695, + "step": 7461 + }, + { + "epoch": 0.7871308016877637, + "grad_norm": 0.61943519115448, + "learning_rate": 0.00016471251512210626, + "loss": 1.4525, + "step": 7462 + }, + { + "epoch": 0.7872362869198313, + "grad_norm": 0.601871132850647, + "learning_rate": 0.00016455555997948868, + "loss": 1.4597, + "step": 7463 + }, + { + "epoch": 0.7873417721518987, + "grad_norm": 0.611737847328186, + "learning_rate": 0.0001643986704388397, + "loss": 1.4296, + "step": 7464 + }, + { + "epoch": 0.7874472573839663, + "grad_norm": 0.6006296277046204, + "learning_rate": 0.00016424184651773997, + "loss": 1.461, + "step": 7465 + }, + { + "epoch": 0.7875527426160338, + "grad_norm": 0.5841127634048462, + "learning_rate": 0.0001640850882337622, + "loss": 1.475, + "step": 7466 + }, + { + "epoch": 0.7876582278481012, + "grad_norm": 0.6890912055969238, + "learning_rate": 0.00016392839560447196, + "loss": 1.4345, + "step": 7467 + }, + { + "epoch": 0.7877637130801688, + "grad_norm": 0.6297390460968018, + "learning_rate": 0.00016377176864742734, + "loss": 1.4643, + "step": 7468 + }, + { + "epoch": 0.7878691983122363, + "grad_norm": 0.7141300439834595, + "learning_rate": 0.00016361520738017934, + "loss": 1.4346, + "step": 7469 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.6543805003166199, + "learning_rate": 0.00016345871182027124, + "loss": 1.4519, + "step": 7470 + }, + { + "epoch": 0.7880801687763713, + "grad_norm": 0.5711420774459839, + "learning_rate": 0.00016330228198523927, + "loss": 1.4575, + "step": 7471 + }, + { + "epoch": 0.7881856540084389, + "grad_norm": 0.6440286040306091, + "learning_rate": 0.00016314591789261216, + "loss": 1.4316, + "step": 7472 + }, + { + "epoch": 0.7882911392405063, + "grad_norm": 0.7339466214179993, + "learning_rate": 0.00016298961955991105, + "loss": 1.497, + "step": 7473 + }, + { + "epoch": 0.7883966244725739, + "grad_norm": 0.6472685933113098, + "learning_rate": 0.00016283338700465034, + "loss": 1.4465, + "step": 7474 + }, + { + "epoch": 0.7885021097046413, + "grad_norm": 0.5938697457313538, + "learning_rate": 0.00016267722024433654, + "loss": 1.4523, + "step": 7475 + }, + { + "epoch": 0.7886075949367088, + "grad_norm": 0.7518431544303894, + "learning_rate": 0.0001625211192964688, + "loss": 1.4765, + "step": 7476 + }, + { + "epoch": 0.7887130801687764, + "grad_norm": 0.6323265433311462, + "learning_rate": 0.00016236508417853917, + "loss": 1.4981, + "step": 7477 + }, + { + "epoch": 0.7888185654008438, + "grad_norm": 0.5911670923233032, + "learning_rate": 0.00016220911490803206, + "loss": 1.4661, + "step": 7478 + }, + { + "epoch": 0.7889240506329114, + "grad_norm": 0.628486156463623, + "learning_rate": 0.00016205321150242454, + "loss": 1.4226, + "step": 7479 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.7164748907089233, + "learning_rate": 0.00016189737397918653, + "loss": 1.4717, + "step": 7480 + }, + { + "epoch": 0.7891350210970464, + "grad_norm": 0.6687692403793335, + "learning_rate": 0.00016174160235578, + "loss": 1.4777, + "step": 7481 + }, + { + "epoch": 0.7892405063291139, + "grad_norm": 0.6232552528381348, + "learning_rate": 0.00016158589664966053, + "loss": 1.4989, + "step": 7482 + }, + { + "epoch": 0.7893459915611815, + "grad_norm": 0.5994733572006226, + "learning_rate": 0.00016143025687827538, + "loss": 1.4612, + "step": 7483 + }, + { + "epoch": 0.7894514767932489, + "grad_norm": 0.6353920102119446, + "learning_rate": 0.0001612746830590649, + "loss": 1.4881, + "step": 7484 + }, + { + "epoch": 0.7895569620253164, + "grad_norm": 0.6558827757835388, + "learning_rate": 0.00016111917520946175, + "loss": 1.4782, + "step": 7485 + }, + { + "epoch": 0.789662447257384, + "grad_norm": 0.5841767191886902, + "learning_rate": 0.00016096373334689154, + "loss": 1.4371, + "step": 7486 + }, + { + "epoch": 0.7897679324894514, + "grad_norm": 0.643314003944397, + "learning_rate": 0.00016080835748877214, + "loss": 1.4604, + "step": 7487 + }, + { + "epoch": 0.789873417721519, + "grad_norm": 0.6738057732582092, + "learning_rate": 0.00016065304765251423, + "loss": 1.4697, + "step": 7488 + }, + { + "epoch": 0.7899789029535865, + "grad_norm": 0.64020836353302, + "learning_rate": 0.00016049780385552113, + "loss": 1.4657, + "step": 7489 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.641453206539154, + "learning_rate": 0.0001603426261151884, + "loss": 1.4662, + "step": 7490 + }, + { + "epoch": 0.7901898734177215, + "grad_norm": 0.6690492630004883, + "learning_rate": 0.000160187514448905, + "loss": 1.4436, + "step": 7491 + }, + { + "epoch": 0.7902953586497891, + "grad_norm": 0.6406052708625793, + "learning_rate": 0.0001600324688740516, + "loss": 1.478, + "step": 7492 + }, + { + "epoch": 0.7904008438818565, + "grad_norm": 0.5621783137321472, + "learning_rate": 0.00015987748940800186, + "loss": 1.476, + "step": 7493 + }, + { + "epoch": 0.790506329113924, + "grad_norm": 0.6007083654403687, + "learning_rate": 0.0001597225760681221, + "loss": 1.4599, + "step": 7494 + }, + { + "epoch": 0.7906118143459916, + "grad_norm": 0.6298058032989502, + "learning_rate": 0.00015956772887177115, + "loss": 1.4984, + "step": 7495 + }, + { + "epoch": 0.790717299578059, + "grad_norm": 0.6313723921775818, + "learning_rate": 0.00015941294783630022, + "loss": 1.4217, + "step": 7496 + }, + { + "epoch": 0.7908227848101266, + "grad_norm": 0.5829667448997498, + "learning_rate": 0.00015925823297905346, + "loss": 1.4411, + "step": 7497 + }, + { + "epoch": 0.7909282700421941, + "grad_norm": 0.633734405040741, + "learning_rate": 0.00015910358431736745, + "loss": 1.4604, + "step": 7498 + }, + { + "epoch": 0.7910337552742616, + "grad_norm": 0.6251109838485718, + "learning_rate": 0.00015894900186857105, + "loss": 1.436, + "step": 7499 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.6358771920204163, + "learning_rate": 0.00015879448564998648, + "loss": 1.5035, + "step": 7500 + }, + { + "epoch": 0.7912447257383967, + "grad_norm": 0.7055473923683167, + "learning_rate": 0.00015864003567892776, + "loss": 1.5098, + "step": 7501 + }, + { + "epoch": 0.7913502109704641, + "grad_norm": 0.6131179928779602, + "learning_rate": 0.00015848565197270175, + "loss": 1.4692, + "step": 7502 + }, + { + "epoch": 0.7914556962025316, + "grad_norm": 0.7211872935295105, + "learning_rate": 0.00015833133454860814, + "loss": 1.4739, + "step": 7503 + }, + { + "epoch": 0.7915611814345992, + "grad_norm": 0.6894788146018982, + "learning_rate": 0.00015817708342393878, + "loss": 1.5146, + "step": 7504 + }, + { + "epoch": 0.7916666666666666, + "grad_norm": 0.5881443619728088, + "learning_rate": 0.0001580228986159783, + "loss": 1.4444, + "step": 7505 + }, + { + "epoch": 0.7917721518987342, + "grad_norm": 0.6966633796691895, + "learning_rate": 0.00015786878014200387, + "loss": 1.476, + "step": 7506 + }, + { + "epoch": 0.7918776371308017, + "grad_norm": 0.6916399598121643, + "learning_rate": 0.0001577147280192851, + "loss": 1.4944, + "step": 7507 + }, + { + "epoch": 0.7919831223628692, + "grad_norm": 0.7018747925758362, + "learning_rate": 0.0001575607422650846, + "loss": 1.4459, + "step": 7508 + }, + { + "epoch": 0.7920886075949367, + "grad_norm": 0.6259503960609436, + "learning_rate": 0.00015740682289665714, + "loss": 1.4183, + "step": 7509 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.7389950156211853, + "learning_rate": 0.0001572529699312501, + "loss": 1.4561, + "step": 7510 + }, + { + "epoch": 0.7922995780590717, + "grad_norm": 0.7303707003593445, + "learning_rate": 0.0001570991833861035, + "loss": 1.4472, + "step": 7511 + }, + { + "epoch": 0.7924050632911392, + "grad_norm": 0.5858631730079651, + "learning_rate": 0.00015694546327844986, + "loss": 1.4482, + "step": 7512 + }, + { + "epoch": 0.7925105485232068, + "grad_norm": 0.6342059373855591, + "learning_rate": 0.00015679180962551435, + "loss": 1.4397, + "step": 7513 + }, + { + "epoch": 0.7926160337552742, + "grad_norm": 0.7182206511497498, + "learning_rate": 0.00015663822244451446, + "loss": 1.4669, + "step": 7514 + }, + { + "epoch": 0.7927215189873418, + "grad_norm": 0.6233593821525574, + "learning_rate": 0.00015648470175266057, + "loss": 1.4238, + "step": 7515 + }, + { + "epoch": 0.7928270042194093, + "grad_norm": 0.6074845194816589, + "learning_rate": 0.00015633124756715523, + "loss": 1.5051, + "step": 7516 + }, + { + "epoch": 0.7929324894514768, + "grad_norm": 0.6852695345878601, + "learning_rate": 0.00015617785990519403, + "loss": 1.4348, + "step": 7517 + }, + { + "epoch": 0.7930379746835443, + "grad_norm": 0.6904196739196777, + "learning_rate": 0.00015602453878396479, + "loss": 1.4707, + "step": 7518 + }, + { + "epoch": 0.7931434599156119, + "grad_norm": 0.6142614483833313, + "learning_rate": 0.0001558712842206477, + "loss": 1.4646, + "step": 7519 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.7561241984367371, + "learning_rate": 0.0001557180962324158, + "loss": 1.4444, + "step": 7520 + }, + { + "epoch": 0.7933544303797468, + "grad_norm": 0.6212359070777893, + "learning_rate": 0.00015556497483643466, + "loss": 1.4338, + "step": 7521 + }, + { + "epoch": 0.7934599156118144, + "grad_norm": 0.6244015693664551, + "learning_rate": 0.00015541192004986222, + "loss": 1.4586, + "step": 7522 + }, + { + "epoch": 0.7935654008438818, + "grad_norm": 0.6457334756851196, + "learning_rate": 0.00015525893188984898, + "loss": 1.4541, + "step": 7523 + }, + { + "epoch": 0.7936708860759494, + "grad_norm": 0.759016215801239, + "learning_rate": 0.00015510601037353804, + "loss": 1.4681, + "step": 7524 + }, + { + "epoch": 0.7937763713080169, + "grad_norm": 0.6306403875350952, + "learning_rate": 0.00015495315551806486, + "loss": 1.481, + "step": 7525 + }, + { + "epoch": 0.7938818565400844, + "grad_norm": 0.7085781693458557, + "learning_rate": 0.000154800367340558, + "loss": 1.4502, + "step": 7526 + }, + { + "epoch": 0.7939873417721519, + "grad_norm": 0.7066642045974731, + "learning_rate": 0.00015464764585813783, + "loss": 1.4553, + "step": 7527 + }, + { + "epoch": 0.7940928270042195, + "grad_norm": 0.6268943548202515, + "learning_rate": 0.0001544949910879177, + "loss": 1.4508, + "step": 7528 + }, + { + "epoch": 0.7941983122362869, + "grad_norm": 0.7018306255340576, + "learning_rate": 0.00015434240304700332, + "loss": 1.4696, + "step": 7529 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.6104345917701721, + "learning_rate": 0.00015418988175249282, + "loss": 1.4599, + "step": 7530 + }, + { + "epoch": 0.794409282700422, + "grad_norm": 0.6404241323471069, + "learning_rate": 0.00015403742722147707, + "loss": 1.4786, + "step": 7531 + }, + { + "epoch": 0.7945147679324894, + "grad_norm": 0.6078194975852966, + "learning_rate": 0.00015388503947103937, + "loss": 1.4477, + "step": 7532 + }, + { + "epoch": 0.794620253164557, + "grad_norm": 0.7000434994697571, + "learning_rate": 0.00015373271851825527, + "loss": 1.4727, + "step": 7533 + }, + { + "epoch": 0.7947257383966245, + "grad_norm": 0.588708221912384, + "learning_rate": 0.00015358046438019356, + "loss": 1.474, + "step": 7534 + }, + { + "epoch": 0.794831223628692, + "grad_norm": 0.6606690287590027, + "learning_rate": 0.00015342827707391475, + "loss": 1.4173, + "step": 7535 + }, + { + "epoch": 0.7949367088607595, + "grad_norm": 0.6300554275512695, + "learning_rate": 0.0001532761566164723, + "loss": 1.4327, + "step": 7536 + }, + { + "epoch": 0.7950421940928271, + "grad_norm": 0.631349503993988, + "learning_rate": 0.0001531241030249121, + "loss": 1.4611, + "step": 7537 + }, + { + "epoch": 0.7951476793248945, + "grad_norm": 0.6098535060882568, + "learning_rate": 0.00015297211631627234, + "loss": 1.4524, + "step": 7538 + }, + { + "epoch": 0.795253164556962, + "grad_norm": 0.676202118396759, + "learning_rate": 0.0001528201965075841, + "loss": 1.4397, + "step": 7539 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.5892504453659058, + "learning_rate": 0.00015266834361587063, + "loss": 1.4614, + "step": 7540 + }, + { + "epoch": 0.795464135021097, + "grad_norm": 0.6103730201721191, + "learning_rate": 0.00015251655765814777, + "loss": 1.4171, + "step": 7541 + }, + { + "epoch": 0.7955696202531646, + "grad_norm": 0.5840252637863159, + "learning_rate": 0.000152364838651424, + "loss": 1.4495, + "step": 7542 + }, + { + "epoch": 0.795675105485232, + "grad_norm": 0.6809155941009521, + "learning_rate": 0.00015221318661269985, + "loss": 1.4529, + "step": 7543 + }, + { + "epoch": 0.7957805907172996, + "grad_norm": 0.681132435798645, + "learning_rate": 0.00015206160155896924, + "loss": 1.4577, + "step": 7544 + }, + { + "epoch": 0.7958860759493671, + "grad_norm": 0.6064942479133606, + "learning_rate": 0.00015191008350721772, + "loss": 1.4649, + "step": 7545 + }, + { + "epoch": 0.7959915611814345, + "grad_norm": 0.6548331379890442, + "learning_rate": 0.00015175863247442374, + "loss": 1.4739, + "step": 7546 + }, + { + "epoch": 0.7960970464135021, + "grad_norm": 0.6144078373908997, + "learning_rate": 0.00015160724847755806, + "loss": 1.4284, + "step": 7547 + }, + { + "epoch": 0.7962025316455696, + "grad_norm": 0.5848416090011597, + "learning_rate": 0.00015145593153358412, + "loss": 1.4419, + "step": 7548 + }, + { + "epoch": 0.7963080168776371, + "grad_norm": 0.5838331580162048, + "learning_rate": 0.0001513046816594575, + "loss": 1.4556, + "step": 7549 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.6191608309745789, + "learning_rate": 0.00015115349887212678, + "loss": 1.4682, + "step": 7550 + }, + { + "epoch": 0.7965189873417722, + "grad_norm": 0.7128922343254089, + "learning_rate": 0.00015100238318853262, + "loss": 1.4457, + "step": 7551 + }, + { + "epoch": 0.7966244725738396, + "grad_norm": 0.5983912348747253, + "learning_rate": 0.00015085133462560833, + "loss": 1.4832, + "step": 7552 + }, + { + "epoch": 0.7967299578059072, + "grad_norm": 0.5906868577003479, + "learning_rate": 0.00015070035320027933, + "loss": 1.4809, + "step": 7553 + }, + { + "epoch": 0.7968354430379747, + "grad_norm": 0.6193068027496338, + "learning_rate": 0.00015054943892946446, + "loss": 1.429, + "step": 7554 + }, + { + "epoch": 0.7969409282700421, + "grad_norm": 0.5903324484825134, + "learning_rate": 0.000150398591830074, + "loss": 1.3911, + "step": 7555 + }, + { + "epoch": 0.7970464135021097, + "grad_norm": 0.6122763752937317, + "learning_rate": 0.00015024781191901122, + "loss": 1.5009, + "step": 7556 + }, + { + "epoch": 0.7971518987341772, + "grad_norm": 0.5840786695480347, + "learning_rate": 0.00015009709921317172, + "loss": 1.4488, + "step": 7557 + }, + { + "epoch": 0.7972573839662447, + "grad_norm": 0.6778419017791748, + "learning_rate": 0.00014994645372944367, + "loss": 1.4328, + "step": 7558 + }, + { + "epoch": 0.7973628691983122, + "grad_norm": 0.5949789881706238, + "learning_rate": 0.0001497958754847076, + "loss": 1.4447, + "step": 7559 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5891158580780029, + "learning_rate": 0.00014964536449583657, + "loss": 1.4649, + "step": 7560 + }, + { + "epoch": 0.7975738396624472, + "grad_norm": 0.6847309470176697, + "learning_rate": 0.0001494949207796961, + "loss": 1.4485, + "step": 7561 + }, + { + "epoch": 0.7976793248945148, + "grad_norm": 0.6277568340301514, + "learning_rate": 0.00014934454435314417, + "loss": 1.4845, + "step": 7562 + }, + { + "epoch": 0.7977848101265823, + "grad_norm": 0.7180612683296204, + "learning_rate": 0.00014919423523303095, + "loss": 1.457, + "step": 7563 + }, + { + "epoch": 0.7978902953586497, + "grad_norm": 0.6729501485824585, + "learning_rate": 0.00014904399343619972, + "loss": 1.4607, + "step": 7564 + }, + { + "epoch": 0.7979957805907173, + "grad_norm": 0.705545961856842, + "learning_rate": 0.00014889381897948575, + "loss": 1.4839, + "step": 7565 + }, + { + "epoch": 0.7981012658227848, + "grad_norm": 0.7320927977561951, + "learning_rate": 0.00014874371187971672, + "loss": 1.4277, + "step": 7566 + }, + { + "epoch": 0.7982067510548523, + "grad_norm": 0.6345402002334595, + "learning_rate": 0.00014859367215371293, + "loss": 1.4938, + "step": 7567 + }, + { + "epoch": 0.7983122362869198, + "grad_norm": 0.7235166430473328, + "learning_rate": 0.00014844369981828698, + "loss": 1.468, + "step": 7568 + }, + { + "epoch": 0.7984177215189874, + "grad_norm": 0.7808515429496765, + "learning_rate": 0.00014829379489024415, + "loss": 1.4801, + "step": 7569 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.6370118260383606, + "learning_rate": 0.00014814395738638195, + "loss": 1.4514, + "step": 7570 + }, + { + "epoch": 0.7986286919831224, + "grad_norm": 0.6160861253738403, + "learning_rate": 0.0001479941873234905, + "loss": 1.422, + "step": 7571 + }, + { + "epoch": 0.7987341772151899, + "grad_norm": 0.7375660538673401, + "learning_rate": 0.00014784448471835224, + "loss": 1.4749, + "step": 7572 + }, + { + "epoch": 0.7988396624472573, + "grad_norm": 0.6133042573928833, + "learning_rate": 0.0001476948495877418, + "loss": 1.4614, + "step": 7573 + }, + { + "epoch": 0.7989451476793249, + "grad_norm": 0.5957701206207275, + "learning_rate": 0.00014754528194842707, + "loss": 1.4726, + "step": 7574 + }, + { + "epoch": 0.7990506329113924, + "grad_norm": 0.6527002453804016, + "learning_rate": 0.00014739578181716765, + "loss": 1.4458, + "step": 7575 + }, + { + "epoch": 0.7991561181434599, + "grad_norm": 0.5923647284507751, + "learning_rate": 0.00014724634921071573, + "loss": 1.461, + "step": 7576 + }, + { + "epoch": 0.7992616033755274, + "grad_norm": 0.600149393081665, + "learning_rate": 0.0001470969841458159, + "loss": 1.4678, + "step": 7577 + }, + { + "epoch": 0.799367088607595, + "grad_norm": 0.6308768391609192, + "learning_rate": 0.00014694768663920537, + "loss": 1.472, + "step": 7578 + }, + { + "epoch": 0.7994725738396624, + "grad_norm": 0.6229814887046814, + "learning_rate": 0.0001467984567076137, + "loss": 1.4787, + "step": 7579 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.5935222506523132, + "learning_rate": 0.00014664929436776278, + "loss": 1.4528, + "step": 7580 + }, + { + "epoch": 0.7996835443037975, + "grad_norm": 0.6262754797935486, + "learning_rate": 0.00014650019963636696, + "loss": 1.4337, + "step": 7581 + }, + { + "epoch": 0.799789029535865, + "grad_norm": 0.6498810052871704, + "learning_rate": 0.0001463511725301331, + "loss": 1.4754, + "step": 7582 + }, + { + "epoch": 0.7998945147679325, + "grad_norm": 0.5617763996124268, + "learning_rate": 0.00014620221306576027, + "loss": 1.4752, + "step": 7583 + }, + { + "epoch": 0.8, + "grad_norm": 0.622961699962616, + "learning_rate": 0.00014605332125994038, + "loss": 1.4439, + "step": 7584 + }, + { + "epoch": 0.8001054852320675, + "grad_norm": 0.6107605695724487, + "learning_rate": 0.0001459044971293575, + "loss": 1.4628, + "step": 7585 + }, + { + "epoch": 0.800210970464135, + "grad_norm": 0.6289035677909851, + "learning_rate": 0.000145755740690688, + "loss": 1.4411, + "step": 7586 + }, + { + "epoch": 0.8003164556962026, + "grad_norm": 0.6216980814933777, + "learning_rate": 0.00014560705196060074, + "loss": 1.439, + "step": 7587 + }, + { + "epoch": 0.80042194092827, + "grad_norm": 0.5903581976890564, + "learning_rate": 0.00014545843095575709, + "loss": 1.4557, + "step": 7588 + }, + { + "epoch": 0.8005274261603376, + "grad_norm": 0.6068002581596375, + "learning_rate": 0.00014530987769281075, + "loss": 1.4651, + "step": 7589 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.6029548645019531, + "learning_rate": 0.00014516139218840788, + "loss": 1.4256, + "step": 7590 + }, + { + "epoch": 0.8007383966244725, + "grad_norm": 0.6368579268455505, + "learning_rate": 0.00014501297445918703, + "loss": 1.4602, + "step": 7591 + }, + { + "epoch": 0.8008438818565401, + "grad_norm": 0.6611518859863281, + "learning_rate": 0.00014486462452177896, + "loss": 1.4737, + "step": 7592 + }, + { + "epoch": 0.8009493670886076, + "grad_norm": 0.6308072209358215, + "learning_rate": 0.0001447163423928073, + "loss": 1.486, + "step": 7593 + }, + { + "epoch": 0.8010548523206751, + "grad_norm": 0.6342136859893799, + "learning_rate": 0.00014456812808888775, + "loss": 1.4431, + "step": 7594 + }, + { + "epoch": 0.8011603375527426, + "grad_norm": 0.6270778179168701, + "learning_rate": 0.00014441998162662847, + "loss": 1.5012, + "step": 7595 + }, + { + "epoch": 0.8012658227848102, + "grad_norm": 0.6057094931602478, + "learning_rate": 0.00014427190302262989, + "loss": 1.4325, + "step": 7596 + }, + { + "epoch": 0.8013713080168776, + "grad_norm": 0.6065152287483215, + "learning_rate": 0.00014412389229348494, + "loss": 1.4094, + "step": 7597 + }, + { + "epoch": 0.8014767932489452, + "grad_norm": 0.6387645602226257, + "learning_rate": 0.00014397594945577912, + "loss": 1.4443, + "step": 7598 + }, + { + "epoch": 0.8015822784810127, + "grad_norm": 0.6378925442695618, + "learning_rate": 0.00014382807452609003, + "loss": 1.455, + "step": 7599 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.6108806133270264, + "learning_rate": 0.00014368026752098782, + "loss": 1.4412, + "step": 7600 + }, + { + "epoch": 0.8017932489451477, + "grad_norm": 0.672905445098877, + "learning_rate": 0.00014353252845703506, + "loss": 1.4867, + "step": 7601 + }, + { + "epoch": 0.8018987341772152, + "grad_norm": 0.6759300231933594, + "learning_rate": 0.00014338485735078632, + "loss": 1.4533, + "step": 7602 + }, + { + "epoch": 0.8020042194092827, + "grad_norm": 0.6078498959541321, + "learning_rate": 0.0001432372542187895, + "loss": 1.5006, + "step": 7603 + }, + { + "epoch": 0.8021097046413502, + "grad_norm": 0.645070493221283, + "learning_rate": 0.00014308971907758383, + "loss": 1.4779, + "step": 7604 + }, + { + "epoch": 0.8022151898734177, + "grad_norm": 0.6417722702026367, + "learning_rate": 0.00014294225194370154, + "loss": 1.4438, + "step": 7605 + }, + { + "epoch": 0.8023206751054852, + "grad_norm": 0.6379573941230774, + "learning_rate": 0.00014279485283366696, + "loss": 1.47, + "step": 7606 + }, + { + "epoch": 0.8024261603375528, + "grad_norm": 0.5968127846717834, + "learning_rate": 0.00014264752176399687, + "loss": 1.4474, + "step": 7607 + }, + { + "epoch": 0.8025316455696202, + "grad_norm": 0.5896539092063904, + "learning_rate": 0.0001425002587512005, + "loss": 1.432, + "step": 7608 + }, + { + "epoch": 0.8026371308016877, + "grad_norm": 0.6102089285850525, + "learning_rate": 0.00014235306381177952, + "loss": 1.4778, + "step": 7609 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.6150583624839783, + "learning_rate": 0.00014220593696222768, + "loss": 1.4903, + "step": 7610 + }, + { + "epoch": 0.8028481012658227, + "grad_norm": 0.6400398015975952, + "learning_rate": 0.00014205887821903105, + "loss": 1.427, + "step": 7611 + }, + { + "epoch": 0.8029535864978903, + "grad_norm": 0.6066632270812988, + "learning_rate": 0.00014191188759866887, + "loss": 1.4637, + "step": 7612 + }, + { + "epoch": 0.8030590717299578, + "grad_norm": 0.597209632396698, + "learning_rate": 0.00014176496511761192, + "loss": 1.4352, + "step": 7613 + }, + { + "epoch": 0.8031645569620253, + "grad_norm": 0.6335425972938538, + "learning_rate": 0.0001416181107923235, + "loss": 1.4216, + "step": 7614 + }, + { + "epoch": 0.8032700421940928, + "grad_norm": 0.6637945175170898, + "learning_rate": 0.0001414713246392594, + "loss": 1.4531, + "step": 7615 + }, + { + "epoch": 0.8033755274261604, + "grad_norm": 0.602800190448761, + "learning_rate": 0.0001413246066748678, + "loss": 1.4523, + "step": 7616 + }, + { + "epoch": 0.8034810126582278, + "grad_norm": 0.6577479243278503, + "learning_rate": 0.00014117795691558915, + "loss": 1.4236, + "step": 7617 + }, + { + "epoch": 0.8035864978902953, + "grad_norm": 0.685041606426239, + "learning_rate": 0.00014103137537785633, + "loss": 1.4325, + "step": 7618 + }, + { + "epoch": 0.8036919831223629, + "grad_norm": 0.6139729022979736, + "learning_rate": 0.00014088486207809449, + "loss": 1.4566, + "step": 7619 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.6078811287879944, + "learning_rate": 0.00014073841703272092, + "loss": 1.425, + "step": 7620 + }, + { + "epoch": 0.8039029535864979, + "grad_norm": 0.6451017260551453, + "learning_rate": 0.00014059204025814603, + "loss": 1.5196, + "step": 7621 + }, + { + "epoch": 0.8040084388185654, + "grad_norm": 0.6229252815246582, + "learning_rate": 0.0001404457317707718, + "loss": 1.4241, + "step": 7622 + }, + { + "epoch": 0.8041139240506329, + "grad_norm": 0.6273231506347656, + "learning_rate": 0.00014029949158699285, + "loss": 1.4435, + "step": 7623 + }, + { + "epoch": 0.8042194092827004, + "grad_norm": 0.6120379567146301, + "learning_rate": 0.00014015331972319606, + "loss": 1.4688, + "step": 7624 + }, + { + "epoch": 0.804324894514768, + "grad_norm": 0.5948386788368225, + "learning_rate": 0.00014000721619576077, + "loss": 1.4479, + "step": 7625 + }, + { + "epoch": 0.8044303797468354, + "grad_norm": 0.5628746747970581, + "learning_rate": 0.0001398611810210586, + "loss": 1.448, + "step": 7626 + }, + { + "epoch": 0.804535864978903, + "grad_norm": 0.6144855618476868, + "learning_rate": 0.0001397152142154536, + "loss": 1.4667, + "step": 7627 + }, + { + "epoch": 0.8046413502109705, + "grad_norm": 0.6146947145462036, + "learning_rate": 0.00013956931579530194, + "loss": 1.4722, + "step": 7628 + }, + { + "epoch": 0.8047468354430379, + "grad_norm": 0.6454689502716064, + "learning_rate": 0.0001394234857769521, + "loss": 1.4731, + "step": 7629 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.6162256002426147, + "learning_rate": 0.00013927772417674558, + "loss": 1.4934, + "step": 7630 + }, + { + "epoch": 0.804957805907173, + "grad_norm": 0.5698509216308594, + "learning_rate": 0.00013913203101101532, + "loss": 1.4979, + "step": 7631 + }, + { + "epoch": 0.8050632911392405, + "grad_norm": 0.6339166164398193, + "learning_rate": 0.0001389864062960871, + "loss": 1.4373, + "step": 7632 + }, + { + "epoch": 0.805168776371308, + "grad_norm": 0.6798878312110901, + "learning_rate": 0.00013884085004827883, + "loss": 1.4602, + "step": 7633 + }, + { + "epoch": 0.8052742616033756, + "grad_norm": 0.5564504861831665, + "learning_rate": 0.0001386953622839008, + "loss": 1.4756, + "step": 7634 + }, + { + "epoch": 0.805379746835443, + "grad_norm": 0.6504507660865784, + "learning_rate": 0.0001385499430192557, + "loss": 1.4883, + "step": 7635 + }, + { + "epoch": 0.8054852320675105, + "grad_norm": 0.6361581683158875, + "learning_rate": 0.00013840459227063842, + "loss": 1.4321, + "step": 7636 + }, + { + "epoch": 0.8055907172995781, + "grad_norm": 0.5765936374664307, + "learning_rate": 0.00013825931005433605, + "loss": 1.4353, + "step": 7637 + }, + { + "epoch": 0.8056962025316455, + "grad_norm": 0.603077232837677, + "learning_rate": 0.00013811409638662858, + "loss": 1.4536, + "step": 7638 + }, + { + "epoch": 0.8058016877637131, + "grad_norm": 0.6282583475112915, + "learning_rate": 0.0001379689512837878, + "loss": 1.47, + "step": 7639 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.6357269287109375, + "learning_rate": 0.00013782387476207788, + "loss": 1.434, + "step": 7640 + }, + { + "epoch": 0.8060126582278481, + "grad_norm": 0.5665367841720581, + "learning_rate": 0.0001376788668377554, + "loss": 1.4758, + "step": 7641 + }, + { + "epoch": 0.8061181434599156, + "grad_norm": 0.6038592457771301, + "learning_rate": 0.0001375339275270692, + "loss": 1.4991, + "step": 7642 + }, + { + "epoch": 0.8062236286919832, + "grad_norm": 0.6069632172584534, + "learning_rate": 0.00013738905684626044, + "loss": 1.4308, + "step": 7643 + }, + { + "epoch": 0.8063291139240506, + "grad_norm": 0.6145093441009521, + "learning_rate": 0.00013724425481156263, + "loss": 1.4845, + "step": 7644 + }, + { + "epoch": 0.8064345991561181, + "grad_norm": 0.6282770037651062, + "learning_rate": 0.00013709952143920148, + "loss": 1.4745, + "step": 7645 + }, + { + "epoch": 0.8065400843881857, + "grad_norm": 0.5865074992179871, + "learning_rate": 0.000136954856745395, + "loss": 1.5007, + "step": 7646 + }, + { + "epoch": 0.8066455696202531, + "grad_norm": 0.6136338710784912, + "learning_rate": 0.000136810260746354, + "loss": 1.4622, + "step": 7647 + }, + { + "epoch": 0.8067510548523207, + "grad_norm": 0.7694755792617798, + "learning_rate": 0.00013666573345828083, + "loss": 1.4342, + "step": 7648 + }, + { + "epoch": 0.8068565400843882, + "grad_norm": 0.65009605884552, + "learning_rate": 0.00013652127489737067, + "loss": 1.46, + "step": 7649 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.5662466287612915, + "learning_rate": 0.00013637688507981064, + "loss": 1.4946, + "step": 7650 + }, + { + "epoch": 0.8070675105485232, + "grad_norm": 0.6907654404640198, + "learning_rate": 0.0001362325640217805, + "loss": 1.4587, + "step": 7651 + }, + { + "epoch": 0.8071729957805908, + "grad_norm": 0.6129448413848877, + "learning_rate": 0.00013608831173945207, + "loss": 1.3846, + "step": 7652 + }, + { + "epoch": 0.8072784810126582, + "grad_norm": 0.6464186310768127, + "learning_rate": 0.0001359441282489895, + "loss": 1.461, + "step": 7653 + }, + { + "epoch": 0.8073839662447257, + "grad_norm": 0.659373939037323, + "learning_rate": 0.0001358000135665494, + "loss": 1.4837, + "step": 7654 + }, + { + "epoch": 0.8074894514767933, + "grad_norm": 0.6015458106994629, + "learning_rate": 0.00013565596770828025, + "loss": 1.4295, + "step": 7655 + }, + { + "epoch": 0.8075949367088607, + "grad_norm": 0.7354792356491089, + "learning_rate": 0.00013551199069032348, + "loss": 1.434, + "step": 7656 + }, + { + "epoch": 0.8077004219409283, + "grad_norm": 0.5890152454376221, + "learning_rate": 0.0001353680825288123, + "loss": 1.4842, + "step": 7657 + }, + { + "epoch": 0.8078059071729958, + "grad_norm": 0.6991687417030334, + "learning_rate": 0.0001352242432398723, + "loss": 1.4705, + "step": 7658 + }, + { + "epoch": 0.8079113924050633, + "grad_norm": 0.6192061305046082, + "learning_rate": 0.00013508047283962137, + "loss": 1.4565, + "step": 7659 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.6458331942558289, + "learning_rate": 0.0001349367713441697, + "loss": 1.4343, + "step": 7660 + }, + { + "epoch": 0.8081223628691984, + "grad_norm": 0.6173801422119141, + "learning_rate": 0.0001347931387696198, + "loss": 1.4298, + "step": 7661 + }, + { + "epoch": 0.8082278481012658, + "grad_norm": 0.6389113664627075, + "learning_rate": 0.0001346495751320664, + "loss": 1.4592, + "step": 7662 + }, + { + "epoch": 0.8083333333333333, + "grad_norm": 0.6300020217895508, + "learning_rate": 0.00013450608044759634, + "loss": 1.4763, + "step": 7663 + }, + { + "epoch": 0.8084388185654009, + "grad_norm": 0.6520695090293884, + "learning_rate": 0.00013436265473228926, + "loss": 1.4497, + "step": 7664 + }, + { + "epoch": 0.8085443037974683, + "grad_norm": 0.6029162406921387, + "learning_rate": 0.0001342192980022166, + "loss": 1.4654, + "step": 7665 + }, + { + "epoch": 0.8086497890295359, + "grad_norm": 0.6326584219932556, + "learning_rate": 0.00013407601027344213, + "loss": 1.4087, + "step": 7666 + }, + { + "epoch": 0.8087552742616034, + "grad_norm": 0.5990694165229797, + "learning_rate": 0.00013393279156202197, + "loss": 1.4501, + "step": 7667 + }, + { + "epoch": 0.8088607594936709, + "grad_norm": 0.5940345525741577, + "learning_rate": 0.00013378964188400457, + "loss": 1.4852, + "step": 7668 + }, + { + "epoch": 0.8089662447257384, + "grad_norm": 0.6241896152496338, + "learning_rate": 0.00013364656125543044, + "loss": 1.45, + "step": 7669 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.5872434973716736, + "learning_rate": 0.0001335035496923326, + "loss": 1.4615, + "step": 7670 + }, + { + "epoch": 0.8091772151898734, + "grad_norm": 0.6414763331413269, + "learning_rate": 0.00013336060721073608, + "loss": 1.4692, + "step": 7671 + }, + { + "epoch": 0.809282700421941, + "grad_norm": 0.6239520311355591, + "learning_rate": 0.00013321773382665822, + "loss": 1.4993, + "step": 7672 + }, + { + "epoch": 0.8093881856540084, + "grad_norm": 0.6039249897003174, + "learning_rate": 0.00013307492955610896, + "loss": 1.4985, + "step": 7673 + }, + { + "epoch": 0.8094936708860759, + "grad_norm": 0.5849673748016357, + "learning_rate": 0.0001329321944150902, + "loss": 1.4779, + "step": 7674 + }, + { + "epoch": 0.8095991561181435, + "grad_norm": 0.5764127969741821, + "learning_rate": 0.000132789528419596, + "loss": 1.4194, + "step": 7675 + }, + { + "epoch": 0.8097046413502109, + "grad_norm": 0.5770612955093384, + "learning_rate": 0.0001326469315856128, + "loss": 1.4577, + "step": 7676 + }, + { + "epoch": 0.8098101265822785, + "grad_norm": 0.6159189939498901, + "learning_rate": 0.00013250440392911927, + "loss": 1.4431, + "step": 7677 + }, + { + "epoch": 0.809915611814346, + "grad_norm": 0.5665100812911987, + "learning_rate": 0.00013236194546608645, + "loss": 1.4992, + "step": 7678 + }, + { + "epoch": 0.8100210970464135, + "grad_norm": 0.6361575722694397, + "learning_rate": 0.00013221955621247749, + "loss": 1.4382, + "step": 7679 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.6290982365608215, + "learning_rate": 0.0001320772361842478, + "loss": 1.4984, + "step": 7680 + }, + { + "epoch": 0.8102320675105485, + "grad_norm": 0.6085060238838196, + "learning_rate": 0.00013193498539734478, + "loss": 1.4483, + "step": 7681 + }, + { + "epoch": 0.810337552742616, + "grad_norm": 0.5989246964454651, + "learning_rate": 0.00013179280386770885, + "loss": 1.4484, + "step": 7682 + }, + { + "epoch": 0.8104430379746835, + "grad_norm": 0.6021916270256042, + "learning_rate": 0.00013165069161127183, + "loss": 1.4295, + "step": 7683 + }, + { + "epoch": 0.8105485232067511, + "grad_norm": 0.6527813673019409, + "learning_rate": 0.00013150864864395825, + "loss": 1.4429, + "step": 7684 + }, + { + "epoch": 0.8106540084388185, + "grad_norm": 0.6033313274383545, + "learning_rate": 0.00013136667498168464, + "loss": 1.4747, + "step": 7685 + }, + { + "epoch": 0.8107594936708861, + "grad_norm": 0.5865955948829651, + "learning_rate": 0.00013122477064035992, + "loss": 1.4641, + "step": 7686 + }, + { + "epoch": 0.8108649789029536, + "grad_norm": 0.6312289834022522, + "learning_rate": 0.00013108293563588504, + "loss": 1.4565, + "step": 7687 + }, + { + "epoch": 0.810970464135021, + "grad_norm": 0.6198150515556335, + "learning_rate": 0.00013094116998415358, + "loss": 1.4258, + "step": 7688 + }, + { + "epoch": 0.8110759493670886, + "grad_norm": 0.6193734407424927, + "learning_rate": 0.00013079947370105057, + "loss": 1.4437, + "step": 7689 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.606696605682373, + "learning_rate": 0.00013065784680245442, + "loss": 1.4066, + "step": 7690 + }, + { + "epoch": 0.8112869198312236, + "grad_norm": 0.5973882079124451, + "learning_rate": 0.00013051628930423485, + "loss": 1.4638, + "step": 7691 + }, + { + "epoch": 0.8113924050632911, + "grad_norm": 0.6185863018035889, + "learning_rate": 0.00013037480122225412, + "loss": 1.4745, + "step": 7692 + }, + { + "epoch": 0.8114978902953587, + "grad_norm": 0.584095299243927, + "learning_rate": 0.00013023338257236655, + "loss": 1.4464, + "step": 7693 + }, + { + "epoch": 0.8116033755274261, + "grad_norm": 0.6487796306610107, + "learning_rate": 0.00013009203337041898, + "loss": 1.4406, + "step": 7694 + }, + { + "epoch": 0.8117088607594937, + "grad_norm": 0.6149018406867981, + "learning_rate": 0.0001299507536322502, + "loss": 1.4398, + "step": 7695 + }, + { + "epoch": 0.8118143459915612, + "grad_norm": 0.5853291749954224, + "learning_rate": 0.00012980954337369133, + "loss": 1.485, + "step": 7696 + }, + { + "epoch": 0.8119198312236287, + "grad_norm": 0.6846905946731567, + "learning_rate": 0.00012966840261056562, + "loss": 1.4366, + "step": 7697 + }, + { + "epoch": 0.8120253164556962, + "grad_norm": 0.680472195148468, + "learning_rate": 0.0001295273313586885, + "loss": 1.4478, + "step": 7698 + }, + { + "epoch": 0.8121308016877637, + "grad_norm": 0.6856051087379456, + "learning_rate": 0.00012938632963386808, + "loss": 1.446, + "step": 7699 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.5647977590560913, + "learning_rate": 0.00012924539745190402, + "loss": 1.4682, + "step": 7700 + }, + { + "epoch": 0.8123417721518987, + "grad_norm": 0.6874904036521912, + "learning_rate": 0.0001291045348285885, + "loss": 1.4337, + "step": 7701 + }, + { + "epoch": 0.8124472573839663, + "grad_norm": 0.7465366125106812, + "learning_rate": 0.00012896374177970602, + "loss": 1.4649, + "step": 7702 + }, + { + "epoch": 0.8125527426160337, + "grad_norm": 0.6406781673431396, + "learning_rate": 0.00012882301832103297, + "loss": 1.4184, + "step": 7703 + }, + { + "epoch": 0.8126582278481013, + "grad_norm": 0.6866405606269836, + "learning_rate": 0.0001286823644683382, + "loss": 1.4467, + "step": 7704 + }, + { + "epoch": 0.8127637130801688, + "grad_norm": 0.6601691246032715, + "learning_rate": 0.0001285417802373827, + "loss": 1.4462, + "step": 7705 + }, + { + "epoch": 0.8128691983122363, + "grad_norm": 0.6092931032180786, + "learning_rate": 0.00012840126564391961, + "loss": 1.432, + "step": 7706 + }, + { + "epoch": 0.8129746835443038, + "grad_norm": 0.6523966789245605, + "learning_rate": 0.00012826082070369402, + "loss": 1.476, + "step": 7707 + }, + { + "epoch": 0.8130801687763713, + "grad_norm": 0.7369384169578552, + "learning_rate": 0.00012812044543244395, + "loss": 1.4526, + "step": 7708 + }, + { + "epoch": 0.8131856540084388, + "grad_norm": 0.7093276381492615, + "learning_rate": 0.00012798013984589894, + "loss": 1.4882, + "step": 7709 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.6822093725204468, + "learning_rate": 0.0001278399039597809, + "loss": 1.4414, + "step": 7710 + }, + { + "epoch": 0.8133966244725739, + "grad_norm": 0.7106187343597412, + "learning_rate": 0.00012769973778980405, + "loss": 1.4707, + "step": 7711 + }, + { + "epoch": 0.8135021097046413, + "grad_norm": 0.7010236978530884, + "learning_rate": 0.00012755964135167464, + "loss": 1.4519, + "step": 7712 + }, + { + "epoch": 0.8136075949367089, + "grad_norm": 0.6030076146125793, + "learning_rate": 0.00012741961466109113, + "loss": 1.4355, + "step": 7713 + }, + { + "epoch": 0.8137130801687764, + "grad_norm": 0.6059897541999817, + "learning_rate": 0.00012727965773374434, + "loss": 1.4769, + "step": 7714 + }, + { + "epoch": 0.8138185654008439, + "grad_norm": 0.6870704293251038, + "learning_rate": 0.00012713977058531685, + "loss": 1.4526, + "step": 7715 + }, + { + "epoch": 0.8139240506329114, + "grad_norm": 0.6014997959136963, + "learning_rate": 0.0001269999532314841, + "loss": 1.467, + "step": 7716 + }, + { + "epoch": 0.814029535864979, + "grad_norm": 0.6066620945930481, + "learning_rate": 0.00012686020568791311, + "loss": 1.506, + "step": 7717 + }, + { + "epoch": 0.8141350210970464, + "grad_norm": 0.7169098854064941, + "learning_rate": 0.00012672052797026344, + "loss": 1.405, + "step": 7718 + }, + { + "epoch": 0.8142405063291139, + "grad_norm": 0.6062464714050293, + "learning_rate": 0.00012658092009418652, + "loss": 1.4275, + "step": 7719 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.6435296535491943, + "learning_rate": 0.0001264413820753261, + "loss": 1.4601, + "step": 7720 + }, + { + "epoch": 0.8144514767932489, + "grad_norm": 0.6084923148155212, + "learning_rate": 0.0001263019139293182, + "loss": 1.4745, + "step": 7721 + }, + { + "epoch": 0.8145569620253165, + "grad_norm": 0.6100829243659973, + "learning_rate": 0.0001261625156717909, + "loss": 1.4398, + "step": 7722 + }, + { + "epoch": 0.814662447257384, + "grad_norm": 0.6898267865180969, + "learning_rate": 0.0001260231873183644, + "loss": 1.4798, + "step": 7723 + }, + { + "epoch": 0.8147679324894515, + "grad_norm": 0.635445237159729, + "learning_rate": 0.00012588392888465103, + "loss": 1.4457, + "step": 7724 + }, + { + "epoch": 0.814873417721519, + "grad_norm": 0.6249455213546753, + "learning_rate": 0.0001257447403862557, + "loss": 1.5048, + "step": 7725 + }, + { + "epoch": 0.8149789029535865, + "grad_norm": 0.5970811247825623, + "learning_rate": 0.00012560562183877507, + "loss": 1.462, + "step": 7726 + }, + { + "epoch": 0.815084388185654, + "grad_norm": 0.632494330406189, + "learning_rate": 0.00012546657325779805, + "loss": 1.4459, + "step": 7727 + }, + { + "epoch": 0.8151898734177215, + "grad_norm": 0.5775269269943237, + "learning_rate": 0.00012532759465890567, + "loss": 1.4383, + "step": 7728 + }, + { + "epoch": 0.8152953586497891, + "grad_norm": 0.6249797940254211, + "learning_rate": 0.00012518868605767118, + "loss": 1.45, + "step": 7729 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.6285806894302368, + "learning_rate": 0.00012504984746966003, + "loss": 1.4741, + "step": 7730 + }, + { + "epoch": 0.8155063291139241, + "grad_norm": 0.5768740773200989, + "learning_rate": 0.0001249110789104298, + "loss": 1.4385, + "step": 7731 + }, + { + "epoch": 0.8156118143459916, + "grad_norm": 0.6890150904655457, + "learning_rate": 0.00012477238039553006, + "loss": 1.4788, + "step": 7732 + }, + { + "epoch": 0.815717299578059, + "grad_norm": 0.611193835735321, + "learning_rate": 0.00012463375194050267, + "loss": 1.4555, + "step": 7733 + }, + { + "epoch": 0.8158227848101266, + "grad_norm": 0.6102455854415894, + "learning_rate": 0.00012449519356088192, + "loss": 1.4328, + "step": 7734 + }, + { + "epoch": 0.8159282700421941, + "grad_norm": 0.5745158791542053, + "learning_rate": 0.0001243567052721937, + "loss": 1.4499, + "step": 7735 + }, + { + "epoch": 0.8160337552742616, + "grad_norm": 0.6163120865821838, + "learning_rate": 0.00012421828708995649, + "loss": 1.4601, + "step": 7736 + }, + { + "epoch": 0.8161392405063291, + "grad_norm": 0.5874541401863098, + "learning_rate": 0.00012407993902968057, + "loss": 1.4229, + "step": 7737 + }, + { + "epoch": 0.8162447257383966, + "grad_norm": 0.6259423494338989, + "learning_rate": 0.00012394166110686857, + "loss": 1.4784, + "step": 7738 + }, + { + "epoch": 0.8163502109704641, + "grad_norm": 0.6324284672737122, + "learning_rate": 0.0001238034533370153, + "loss": 1.5045, + "step": 7739 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.5956159234046936, + "learning_rate": 0.00012366531573560754, + "loss": 1.4749, + "step": 7740 + }, + { + "epoch": 0.8165611814345991, + "grad_norm": 0.6552072167396545, + "learning_rate": 0.00012352724831812424, + "loss": 1.4208, + "step": 7741 + }, + { + "epoch": 0.8166666666666667, + "grad_norm": 0.5850211977958679, + "learning_rate": 0.0001233892511000368, + "loss": 1.4219, + "step": 7742 + }, + { + "epoch": 0.8167721518987342, + "grad_norm": 0.6135067939758301, + "learning_rate": 0.00012325132409680829, + "loss": 1.4815, + "step": 7743 + }, + { + "epoch": 0.8168776371308016, + "grad_norm": 0.6887007355690002, + "learning_rate": 0.00012311346732389418, + "loss": 1.4682, + "step": 7744 + }, + { + "epoch": 0.8169831223628692, + "grad_norm": 0.5996866226196289, + "learning_rate": 0.000122975680796742, + "loss": 1.413, + "step": 7745 + }, + { + "epoch": 0.8170886075949367, + "grad_norm": 0.6311582922935486, + "learning_rate": 0.00012283796453079146, + "loss": 1.4478, + "step": 7746 + }, + { + "epoch": 0.8171940928270042, + "grad_norm": 0.5886904001235962, + "learning_rate": 0.00012270031854147426, + "loss": 1.4336, + "step": 7747 + }, + { + "epoch": 0.8172995780590717, + "grad_norm": 0.7134320139884949, + "learning_rate": 0.0001225627428442143, + "loss": 1.4375, + "step": 7748 + }, + { + "epoch": 0.8174050632911393, + "grad_norm": 0.6439112424850464, + "learning_rate": 0.0001224252374544278, + "loss": 1.4933, + "step": 7749 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.5903220176696777, + "learning_rate": 0.00012228780238752264, + "loss": 1.4505, + "step": 7750 + }, + { + "epoch": 0.8176160337552743, + "grad_norm": 0.7398788928985596, + "learning_rate": 0.00012215043765889932, + "loss": 1.4579, + "step": 7751 + }, + { + "epoch": 0.8177215189873418, + "grad_norm": 0.6724784970283508, + "learning_rate": 0.00012201314328395032, + "loss": 1.3983, + "step": 7752 + }, + { + "epoch": 0.8178270042194092, + "grad_norm": 0.6580231189727783, + "learning_rate": 0.00012187591927806, + "loss": 1.4353, + "step": 7753 + }, + { + "epoch": 0.8179324894514768, + "grad_norm": 0.625626266002655, + "learning_rate": 0.0001217387656566051, + "loss": 1.4419, + "step": 7754 + }, + { + "epoch": 0.8180379746835443, + "grad_norm": 0.7745427489280701, + "learning_rate": 0.0001216016824349542, + "loss": 1.455, + "step": 7755 + }, + { + "epoch": 0.8181434599156118, + "grad_norm": 0.6503792405128479, + "learning_rate": 0.00012146466962846833, + "loss": 1.4479, + "step": 7756 + }, + { + "epoch": 0.8182489451476793, + "grad_norm": 0.6434838175773621, + "learning_rate": 0.00012132772725250038, + "loss": 1.4482, + "step": 7757 + }, + { + "epoch": 0.8183544303797469, + "grad_norm": 0.6692759990692139, + "learning_rate": 0.0001211908553223954, + "loss": 1.419, + "step": 7758 + }, + { + "epoch": 0.8184599156118143, + "grad_norm": 0.5919449329376221, + "learning_rate": 0.00012105405385349047, + "loss": 1.4771, + "step": 7759 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.7223723530769348, + "learning_rate": 0.00012091732286111514, + "loss": 1.4795, + "step": 7760 + }, + { + "epoch": 0.8186708860759494, + "grad_norm": 0.6475914716720581, + "learning_rate": 0.00012078066236059068, + "loss": 1.4284, + "step": 7761 + }, + { + "epoch": 0.8187763713080168, + "grad_norm": 0.6206196546554565, + "learning_rate": 0.00012064407236723066, + "loss": 1.4268, + "step": 7762 + }, + { + "epoch": 0.8188818565400844, + "grad_norm": 0.698037326335907, + "learning_rate": 0.00012050755289634049, + "loss": 1.4923, + "step": 7763 + }, + { + "epoch": 0.8189873417721519, + "grad_norm": 0.6653298139572144, + "learning_rate": 0.00012037110396321796, + "loss": 1.4835, + "step": 7764 + }, + { + "epoch": 0.8190928270042194, + "grad_norm": 0.665110170841217, + "learning_rate": 0.0001202347255831529, + "loss": 1.4447, + "step": 7765 + }, + { + "epoch": 0.8191983122362869, + "grad_norm": 0.6075855493545532, + "learning_rate": 0.0001200984177714271, + "loss": 1.4851, + "step": 7766 + }, + { + "epoch": 0.8193037974683545, + "grad_norm": 0.7229721546173096, + "learning_rate": 0.00011996218054331434, + "loss": 1.457, + "step": 7767 + }, + { + "epoch": 0.8194092827004219, + "grad_norm": 0.705838143825531, + "learning_rate": 0.00011982601391408115, + "loss": 1.4571, + "step": 7768 + }, + { + "epoch": 0.8195147679324895, + "grad_norm": 0.5939667224884033, + "learning_rate": 0.00011968991789898533, + "loss": 1.4961, + "step": 7769 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.627549946308136, + "learning_rate": 0.00011955389251327737, + "loss": 1.4055, + "step": 7770 + }, + { + "epoch": 0.8197257383966244, + "grad_norm": 0.7317162156105042, + "learning_rate": 0.00011941793777219937, + "loss": 1.441, + "step": 7771 + }, + { + "epoch": 0.819831223628692, + "grad_norm": 0.6549882292747498, + "learning_rate": 0.00011928205369098574, + "loss": 1.4332, + "step": 7772 + }, + { + "epoch": 0.8199367088607595, + "grad_norm": 0.609716534614563, + "learning_rate": 0.00011914624028486315, + "loss": 1.4297, + "step": 7773 + }, + { + "epoch": 0.820042194092827, + "grad_norm": 0.6358229517936707, + "learning_rate": 0.00011901049756905, + "loss": 1.3988, + "step": 7774 + }, + { + "epoch": 0.8201476793248945, + "grad_norm": 0.6959248185157776, + "learning_rate": 0.00011887482555875695, + "loss": 1.4706, + "step": 7775 + }, + { + "epoch": 0.8202531645569621, + "grad_norm": 0.6222283840179443, + "learning_rate": 0.00011873922426918668, + "loss": 1.4771, + "step": 7776 + }, + { + "epoch": 0.8203586497890295, + "grad_norm": 0.6078609228134155, + "learning_rate": 0.0001186036937155342, + "loss": 1.479, + "step": 7777 + }, + { + "epoch": 0.820464135021097, + "grad_norm": 0.6441397070884705, + "learning_rate": 0.00011846823391298628, + "loss": 1.4553, + "step": 7778 + }, + { + "epoch": 0.8205696202531646, + "grad_norm": 0.6652495861053467, + "learning_rate": 0.00011833284487672185, + "loss": 1.4252, + "step": 7779 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.7454142570495605, + "learning_rate": 0.00011819752662191197, + "loss": 1.4806, + "step": 7780 + }, + { + "epoch": 0.8207805907172996, + "grad_norm": 0.6127336025238037, + "learning_rate": 0.00011806227916371964, + "loss": 1.4896, + "step": 7781 + }, + { + "epoch": 0.8208860759493671, + "grad_norm": 0.7028640508651733, + "learning_rate": 0.0001179271025173001, + "loss": 1.4593, + "step": 7782 + }, + { + "epoch": 0.8209915611814346, + "grad_norm": 0.6814848780632019, + "learning_rate": 0.00011779199669780046, + "loss": 1.4587, + "step": 7783 + }, + { + "epoch": 0.8210970464135021, + "grad_norm": 0.643552839756012, + "learning_rate": 0.00011765696172036006, + "loss": 1.4812, + "step": 7784 + }, + { + "epoch": 0.8212025316455697, + "grad_norm": 0.7360714673995972, + "learning_rate": 0.00011752199760011017, + "loss": 1.4529, + "step": 7785 + }, + { + "epoch": 0.8213080168776371, + "grad_norm": 0.5801376700401306, + "learning_rate": 0.00011738710435217431, + "loss": 1.4887, + "step": 7786 + }, + { + "epoch": 0.8214135021097047, + "grad_norm": 0.5902984738349915, + "learning_rate": 0.00011725228199166805, + "loss": 1.4954, + "step": 7787 + }, + { + "epoch": 0.8215189873417722, + "grad_norm": 0.6308233737945557, + "learning_rate": 0.00011711753053369861, + "loss": 1.4547, + "step": 7788 + }, + { + "epoch": 0.8216244725738396, + "grad_norm": 0.6476866602897644, + "learning_rate": 0.00011698284999336578, + "loss": 1.436, + "step": 7789 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.6055675745010376, + "learning_rate": 0.00011684824038576115, + "loss": 1.4491, + "step": 7790 + }, + { + "epoch": 0.8218354430379747, + "grad_norm": 0.6282302141189575, + "learning_rate": 0.00011671370172596829, + "loss": 1.4583, + "step": 7791 + }, + { + "epoch": 0.8219409282700422, + "grad_norm": 0.6247206330299377, + "learning_rate": 0.00011657923402906309, + "loss": 1.5073, + "step": 7792 + }, + { + "epoch": 0.8220464135021097, + "grad_norm": 0.6574622392654419, + "learning_rate": 0.000116444837310113, + "loss": 1.4649, + "step": 7793 + }, + { + "epoch": 0.8221518987341773, + "grad_norm": 0.682826042175293, + "learning_rate": 0.00011631051158417828, + "loss": 1.4248, + "step": 7794 + }, + { + "epoch": 0.8222573839662447, + "grad_norm": 0.5903988480567932, + "learning_rate": 0.00011617625686631056, + "loss": 1.4231, + "step": 7795 + }, + { + "epoch": 0.8223628691983123, + "grad_norm": 0.6643590331077576, + "learning_rate": 0.00011604207317155383, + "loss": 1.4604, + "step": 7796 + }, + { + "epoch": 0.8224683544303798, + "grad_norm": 0.6622309684753418, + "learning_rate": 0.00011590796051494395, + "loss": 1.4527, + "step": 7797 + }, + { + "epoch": 0.8225738396624472, + "grad_norm": 0.5903869271278381, + "learning_rate": 0.00011577391891150901, + "loss": 1.4388, + "step": 7798 + }, + { + "epoch": 0.8226793248945148, + "grad_norm": 0.6051634550094604, + "learning_rate": 0.00011563994837626898, + "loss": 1.4812, + "step": 7799 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.5830221176147461, + "learning_rate": 0.00011550604892423593, + "loss": 1.4167, + "step": 7800 + }, + { + "epoch": 0.8228902953586498, + "grad_norm": 0.6005657315254211, + "learning_rate": 0.00011537222057041396, + "loss": 1.4675, + "step": 7801 + }, + { + "epoch": 0.8229957805907173, + "grad_norm": 0.6424716114997864, + "learning_rate": 0.00011523846332979907, + "loss": 1.4411, + "step": 7802 + }, + { + "epoch": 0.8231012658227848, + "grad_norm": 0.6084069609642029, + "learning_rate": 0.00011510477721737974, + "loss": 1.4446, + "step": 7803 + }, + { + "epoch": 0.8232067510548523, + "grad_norm": 0.5586773753166199, + "learning_rate": 0.00011497116224813604, + "loss": 1.4612, + "step": 7804 + }, + { + "epoch": 0.8233122362869199, + "grad_norm": 0.5983366966247559, + "learning_rate": 0.0001148376184370401, + "loss": 1.4264, + "step": 7805 + }, + { + "epoch": 0.8234177215189873, + "grad_norm": 0.6638029217720032, + "learning_rate": 0.00011470414579905617, + "loss": 1.4947, + "step": 7806 + }, + { + "epoch": 0.8235232067510548, + "grad_norm": 0.626783549785614, + "learning_rate": 0.00011457074434914067, + "loss": 1.4623, + "step": 7807 + }, + { + "epoch": 0.8236286919831224, + "grad_norm": 0.5791128873825073, + "learning_rate": 0.00011443741410224173, + "loss": 1.4153, + "step": 7808 + }, + { + "epoch": 0.8237341772151898, + "grad_norm": 0.594534158706665, + "learning_rate": 0.00011430415507329975, + "loss": 1.465, + "step": 7809 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.5809349417686462, + "learning_rate": 0.0001141709672772471, + "loss": 1.4399, + "step": 7810 + }, + { + "epoch": 0.8239451476793249, + "grad_norm": 0.6402153968811035, + "learning_rate": 0.00011403785072900793, + "loss": 1.4338, + "step": 7811 + }, + { + "epoch": 0.8240506329113924, + "grad_norm": 0.631700873374939, + "learning_rate": 0.00011390480544349891, + "loss": 1.454, + "step": 7812 + }, + { + "epoch": 0.8241561181434599, + "grad_norm": 0.6067496538162231, + "learning_rate": 0.00011377183143562833, + "loss": 1.4411, + "step": 7813 + }, + { + "epoch": 0.8242616033755275, + "grad_norm": 0.6258790493011475, + "learning_rate": 0.00011363892872029655, + "loss": 1.5027, + "step": 7814 + }, + { + "epoch": 0.8243670886075949, + "grad_norm": 0.5799403190612793, + "learning_rate": 0.00011350609731239597, + "loss": 1.4515, + "step": 7815 + }, + { + "epoch": 0.8244725738396624, + "grad_norm": 0.5644058585166931, + "learning_rate": 0.00011337333722681104, + "loss": 1.4196, + "step": 7816 + }, + { + "epoch": 0.82457805907173, + "grad_norm": 0.6330245733261108, + "learning_rate": 0.00011324064847841817, + "loss": 1.4345, + "step": 7817 + }, + { + "epoch": 0.8246835443037974, + "grad_norm": 0.6403197646141052, + "learning_rate": 0.00011310803108208581, + "loss": 1.484, + "step": 7818 + }, + { + "epoch": 0.824789029535865, + "grad_norm": 0.5839745998382568, + "learning_rate": 0.00011297548505267424, + "loss": 1.4682, + "step": 7819 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.5646371245384216, + "learning_rate": 0.00011284301040503625, + "loss": 1.4886, + "step": 7820 + }, + { + "epoch": 0.825, + "grad_norm": 0.6500810384750366, + "learning_rate": 0.00011271060715401604, + "loss": 1.4726, + "step": 7821 + }, + { + "epoch": 0.8251054852320675, + "grad_norm": 0.6809582710266113, + "learning_rate": 0.00011257827531445017, + "loss": 1.4329, + "step": 7822 + }, + { + "epoch": 0.825210970464135, + "grad_norm": 0.6046003103256226, + "learning_rate": 0.00011244601490116693, + "loss": 1.4584, + "step": 7823 + }, + { + "epoch": 0.8253164556962025, + "grad_norm": 0.6270211338996887, + "learning_rate": 0.00011231382592898698, + "loss": 1.4242, + "step": 7824 + }, + { + "epoch": 0.82542194092827, + "grad_norm": 0.623714804649353, + "learning_rate": 0.00011218170841272254, + "loss": 1.4395, + "step": 7825 + }, + { + "epoch": 0.8255274261603376, + "grad_norm": 0.6585409641265869, + "learning_rate": 0.00011204966236717811, + "loss": 1.453, + "step": 7826 + }, + { + "epoch": 0.825632911392405, + "grad_norm": 0.6198793649673462, + "learning_rate": 0.0001119176878071502, + "loss": 1.4819, + "step": 7827 + }, + { + "epoch": 0.8257383966244726, + "grad_norm": 0.6186689138412476, + "learning_rate": 0.00011178578474742687, + "loss": 1.4591, + "step": 7828 + }, + { + "epoch": 0.8258438818565401, + "grad_norm": 0.6359209418296814, + "learning_rate": 0.00011165395320278898, + "loss": 1.4632, + "step": 7829 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.6937469244003296, + "learning_rate": 0.0001115221931880088, + "loss": 1.4109, + "step": 7830 + }, + { + "epoch": 0.8260548523206751, + "grad_norm": 0.6200176477432251, + "learning_rate": 0.00011139050471785051, + "loss": 1.4538, + "step": 7831 + }, + { + "epoch": 0.8261603375527427, + "grad_norm": 0.6304497122764587, + "learning_rate": 0.00011125888780707064, + "loss": 1.4358, + "step": 7832 + }, + { + "epoch": 0.8262658227848101, + "grad_norm": 0.5927324891090393, + "learning_rate": 0.00011112734247041739, + "loss": 1.4403, + "step": 7833 + }, + { + "epoch": 0.8263713080168776, + "grad_norm": 0.6248655915260315, + "learning_rate": 0.00011099586872263107, + "loss": 1.4406, + "step": 7834 + }, + { + "epoch": 0.8264767932489452, + "grad_norm": 0.6694856882095337, + "learning_rate": 0.00011086446657844412, + "loss": 1.4266, + "step": 7835 + }, + { + "epoch": 0.8265822784810126, + "grad_norm": 0.5705370903015137, + "learning_rate": 0.0001107331360525807, + "loss": 1.3952, + "step": 7836 + }, + { + "epoch": 0.8266877637130802, + "grad_norm": 0.6894233822822571, + "learning_rate": 0.00011060187715975686, + "loss": 1.4661, + "step": 7837 + }, + { + "epoch": 0.8267932489451477, + "grad_norm": 0.6227656006813049, + "learning_rate": 0.00011047068991468118, + "loss": 1.4835, + "step": 7838 + }, + { + "epoch": 0.8268987341772152, + "grad_norm": 0.5700816512107849, + "learning_rate": 0.00011033957433205364, + "loss": 1.3883, + "step": 7839 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.6113068461418152, + "learning_rate": 0.00011020853042656648, + "loss": 1.441, + "step": 7840 + }, + { + "epoch": 0.8271097046413503, + "grad_norm": 0.6778412461280823, + "learning_rate": 0.00011007755821290371, + "loss": 1.4666, + "step": 7841 + }, + { + "epoch": 0.8272151898734177, + "grad_norm": 0.6416819095611572, + "learning_rate": 0.00010994665770574162, + "loss": 1.4507, + "step": 7842 + }, + { + "epoch": 0.8273206751054852, + "grad_norm": 0.6836413145065308, + "learning_rate": 0.000109815828919748, + "loss": 1.4587, + "step": 7843 + }, + { + "epoch": 0.8274261603375528, + "grad_norm": 0.6202955842018127, + "learning_rate": 0.00010968507186958302, + "loss": 1.4204, + "step": 7844 + }, + { + "epoch": 0.8275316455696202, + "grad_norm": 0.60715252161026, + "learning_rate": 0.00010955438656989849, + "loss": 1.4322, + "step": 7845 + }, + { + "epoch": 0.8276371308016878, + "grad_norm": 0.6594533324241638, + "learning_rate": 0.00010942377303533865, + "loss": 1.4471, + "step": 7846 + }, + { + "epoch": 0.8277426160337553, + "grad_norm": 0.6284698247909546, + "learning_rate": 0.00010929323128053927, + "loss": 1.4645, + "step": 7847 + }, + { + "epoch": 0.8278481012658228, + "grad_norm": 0.724083423614502, + "learning_rate": 0.00010916276132012818, + "loss": 1.466, + "step": 7848 + }, + { + "epoch": 0.8279535864978903, + "grad_norm": 0.5993248820304871, + "learning_rate": 0.00010903236316872514, + "loss": 1.4212, + "step": 7849 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.5903316140174866, + "learning_rate": 0.000108902036840942, + "loss": 1.4462, + "step": 7850 + }, + { + "epoch": 0.8281645569620253, + "grad_norm": 0.6142461895942688, + "learning_rate": 0.00010877178235138239, + "loss": 1.4654, + "step": 7851 + }, + { + "epoch": 0.8282700421940928, + "grad_norm": 0.5899472832679749, + "learning_rate": 0.00010864159971464205, + "loss": 1.4166, + "step": 7852 + }, + { + "epoch": 0.8283755274261604, + "grad_norm": 0.7141551375389099, + "learning_rate": 0.00010851148894530858, + "loss": 1.4469, + "step": 7853 + }, + { + "epoch": 0.8284810126582278, + "grad_norm": 0.6281754970550537, + "learning_rate": 0.00010838145005796138, + "loss": 1.4601, + "step": 7854 + }, + { + "epoch": 0.8285864978902954, + "grad_norm": 0.5958052277565002, + "learning_rate": 0.00010825148306717222, + "loss": 1.4431, + "step": 7855 + }, + { + "epoch": 0.8286919831223629, + "grad_norm": 0.6012641191482544, + "learning_rate": 0.00010812158798750438, + "loss": 1.4254, + "step": 7856 + }, + { + "epoch": 0.8287974683544304, + "grad_norm": 0.6587985754013062, + "learning_rate": 0.00010799176483351337, + "loss": 1.4328, + "step": 7857 + }, + { + "epoch": 0.8289029535864979, + "grad_norm": 0.6193435192108154, + "learning_rate": 0.00010786201361974646, + "loss": 1.4206, + "step": 7858 + }, + { + "epoch": 0.8290084388185655, + "grad_norm": 0.5782809257507324, + "learning_rate": 0.00010773233436074287, + "loss": 1.4149, + "step": 7859 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.6895678043365479, + "learning_rate": 0.00010760272707103389, + "loss": 1.4151, + "step": 7860 + }, + { + "epoch": 0.8292194092827004, + "grad_norm": 0.6293073296546936, + "learning_rate": 0.00010747319176514264, + "loss": 1.4592, + "step": 7861 + }, + { + "epoch": 0.829324894514768, + "grad_norm": 0.6017540097236633, + "learning_rate": 0.00010734372845758411, + "loss": 1.4695, + "step": 7862 + }, + { + "epoch": 0.8294303797468354, + "grad_norm": 0.5803259611129761, + "learning_rate": 0.00010721433716286527, + "loss": 1.4392, + "step": 7863 + }, + { + "epoch": 0.829535864978903, + "grad_norm": 0.610072135925293, + "learning_rate": 0.00010708501789548527, + "loss": 1.4557, + "step": 7864 + }, + { + "epoch": 0.8296413502109705, + "grad_norm": 0.7090689539909363, + "learning_rate": 0.00010695577066993495, + "loss": 1.4118, + "step": 7865 + }, + { + "epoch": 0.829746835443038, + "grad_norm": 0.6067066788673401, + "learning_rate": 0.00010682659550069704, + "loss": 1.4878, + "step": 7866 + }, + { + "epoch": 0.8298523206751055, + "grad_norm": 0.6499246954917908, + "learning_rate": 0.00010669749240224621, + "loss": 1.4141, + "step": 7867 + }, + { + "epoch": 0.8299578059071729, + "grad_norm": 0.6068961024284363, + "learning_rate": 0.00010656846138904916, + "loss": 1.4122, + "step": 7868 + }, + { + "epoch": 0.8300632911392405, + "grad_norm": 0.6286136507987976, + "learning_rate": 0.00010643950247556447, + "loss": 1.4774, + "step": 7869 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.6277487277984619, + "learning_rate": 0.00010631061567624259, + "loss": 1.4518, + "step": 7870 + }, + { + "epoch": 0.8302742616033755, + "grad_norm": 0.5979680418968201, + "learning_rate": 0.00010618180100552596, + "loss": 1.5069, + "step": 7871 + }, + { + "epoch": 0.830379746835443, + "grad_norm": 0.6101660132408142, + "learning_rate": 0.00010605305847784871, + "loss": 1.4655, + "step": 7872 + }, + { + "epoch": 0.8304852320675106, + "grad_norm": 0.5928319096565247, + "learning_rate": 0.00010592438810763747, + "loss": 1.4331, + "step": 7873 + }, + { + "epoch": 0.830590717299578, + "grad_norm": 0.6367947459220886, + "learning_rate": 0.00010579578990931019, + "loss": 1.4693, + "step": 7874 + }, + { + "epoch": 0.8306962025316456, + "grad_norm": 0.599438488483429, + "learning_rate": 0.00010566726389727693, + "loss": 1.4511, + "step": 7875 + }, + { + "epoch": 0.8308016877637131, + "grad_norm": 0.6190222501754761, + "learning_rate": 0.00010553881008593969, + "loss": 1.4742, + "step": 7876 + }, + { + "epoch": 0.8309071729957805, + "grad_norm": 0.61036616563797, + "learning_rate": 0.00010541042848969235, + "loss": 1.4744, + "step": 7877 + }, + { + "epoch": 0.8310126582278481, + "grad_norm": 0.662693440914154, + "learning_rate": 0.00010528211912292066, + "loss": 1.4681, + "step": 7878 + }, + { + "epoch": 0.8311181434599156, + "grad_norm": 0.62635737657547, + "learning_rate": 0.00010515388200000245, + "loss": 1.4649, + "step": 7879 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.6695979833602905, + "learning_rate": 0.00010502571713530706, + "loss": 1.4545, + "step": 7880 + }, + { + "epoch": 0.8313291139240506, + "grad_norm": 0.5960720181465149, + "learning_rate": 0.00010489762454319634, + "loss": 1.4502, + "step": 7881 + }, + { + "epoch": 0.8314345991561182, + "grad_norm": 0.653224766254425, + "learning_rate": 0.00010476960423802356, + "loss": 1.4431, + "step": 7882 + }, + { + "epoch": 0.8315400843881856, + "grad_norm": 0.6261045336723328, + "learning_rate": 0.00010464165623413408, + "loss": 1.434, + "step": 7883 + }, + { + "epoch": 0.8316455696202532, + "grad_norm": 0.6903164982795715, + "learning_rate": 0.00010451378054586508, + "loss": 1.5033, + "step": 7884 + }, + { + "epoch": 0.8317510548523207, + "grad_norm": 0.6531060338020325, + "learning_rate": 0.00010438597718754561, + "loss": 1.4942, + "step": 7885 + }, + { + "epoch": 0.8318565400843881, + "grad_norm": 0.5651252269744873, + "learning_rate": 0.00010425824617349671, + "loss": 1.488, + "step": 7886 + }, + { + "epoch": 0.8319620253164557, + "grad_norm": 0.6089178323745728, + "learning_rate": 0.00010413058751803129, + "loss": 1.4802, + "step": 7887 + }, + { + "epoch": 0.8320675105485232, + "grad_norm": 0.6276799440383911, + "learning_rate": 0.0001040030012354542, + "loss": 1.4328, + "step": 7888 + }, + { + "epoch": 0.8321729957805907, + "grad_norm": 0.5753511190414429, + "learning_rate": 0.00010387548734006195, + "loss": 1.4481, + "step": 7889 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.602350652217865, + "learning_rate": 0.00010374804584614308, + "loss": 1.4608, + "step": 7890 + }, + { + "epoch": 0.8323839662447258, + "grad_norm": 0.6291635632514954, + "learning_rate": 0.00010362067676797837, + "loss": 1.4219, + "step": 7891 + }, + { + "epoch": 0.8324894514767932, + "grad_norm": 0.6274732947349548, + "learning_rate": 0.00010349338011983998, + "loss": 1.4137, + "step": 7892 + }, + { + "epoch": 0.8325949367088608, + "grad_norm": 0.6144249439239502, + "learning_rate": 0.00010336615591599204, + "loss": 1.4481, + "step": 7893 + }, + { + "epoch": 0.8327004219409283, + "grad_norm": 0.6297507286071777, + "learning_rate": 0.00010323900417069079, + "loss": 1.4234, + "step": 7894 + }, + { + "epoch": 0.8328059071729957, + "grad_norm": 0.5876002907752991, + "learning_rate": 0.00010311192489818421, + "loss": 1.4953, + "step": 7895 + }, + { + "epoch": 0.8329113924050633, + "grad_norm": 0.6134459376335144, + "learning_rate": 0.0001029849181127121, + "loss": 1.4411, + "step": 7896 + }, + { + "epoch": 0.8330168776371308, + "grad_norm": 0.619674563407898, + "learning_rate": 0.00010285798382850614, + "loss": 1.4367, + "step": 7897 + }, + { + "epoch": 0.8331223628691983, + "grad_norm": 0.6945898532867432, + "learning_rate": 0.00010273112205979012, + "loss": 1.4542, + "step": 7898 + }, + { + "epoch": 0.8332278481012658, + "grad_norm": 0.6845241189002991, + "learning_rate": 0.00010260433282077944, + "loss": 1.4408, + "step": 7899 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.6135386228561401, + "learning_rate": 0.00010247761612568129, + "loss": 1.4669, + "step": 7900 + }, + { + "epoch": 0.8334388185654008, + "grad_norm": 0.5694719552993774, + "learning_rate": 0.00010235097198869525, + "loss": 1.4444, + "step": 7901 + }, + { + "epoch": 0.8335443037974684, + "grad_norm": 0.7411350011825562, + "learning_rate": 0.0001022244004240123, + "loss": 1.4693, + "step": 7902 + }, + { + "epoch": 0.8336497890295359, + "grad_norm": 0.7679092288017273, + "learning_rate": 0.00010209790144581533, + "loss": 1.4102, + "step": 7903 + }, + { + "epoch": 0.8337552742616033, + "grad_norm": 0.5769078731536865, + "learning_rate": 0.00010197147506827925, + "loss": 1.4194, + "step": 7904 + }, + { + "epoch": 0.8338607594936709, + "grad_norm": 0.6544285416603088, + "learning_rate": 0.00010184512130557074, + "loss": 1.476, + "step": 7905 + }, + { + "epoch": 0.8339662447257384, + "grad_norm": 0.741223156452179, + "learning_rate": 0.0001017188401718484, + "loss": 1.4719, + "step": 7906 + }, + { + "epoch": 0.8340717299578059, + "grad_norm": 0.6969137787818909, + "learning_rate": 0.00010159263168126265, + "loss": 1.46, + "step": 7907 + }, + { + "epoch": 0.8341772151898734, + "grad_norm": 0.6018732786178589, + "learning_rate": 0.00010146649584795575, + "loss": 1.4692, + "step": 7908 + }, + { + "epoch": 0.834282700421941, + "grad_norm": 0.714189887046814, + "learning_rate": 0.00010134043268606191, + "loss": 1.4598, + "step": 7909 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.828307569026947, + "learning_rate": 0.0001012144422097069, + "loss": 1.4644, + "step": 7910 + }, + { + "epoch": 0.834493670886076, + "grad_norm": 0.7029048204421997, + "learning_rate": 0.00010108852443300895, + "loss": 1.4544, + "step": 7911 + }, + { + "epoch": 0.8345991561181435, + "grad_norm": 0.6001209020614624, + "learning_rate": 0.00010096267937007758, + "loss": 1.4512, + "step": 7912 + }, + { + "epoch": 0.8347046413502109, + "grad_norm": 0.7355759143829346, + "learning_rate": 0.00010083690703501445, + "loss": 1.4631, + "step": 7913 + }, + { + "epoch": 0.8348101265822785, + "grad_norm": 0.6633307933807373, + "learning_rate": 0.00010071120744191284, + "loss": 1.4165, + "step": 7914 + }, + { + "epoch": 0.834915611814346, + "grad_norm": 0.581228494644165, + "learning_rate": 0.0001005855806048581, + "loss": 1.4767, + "step": 7915 + }, + { + "epoch": 0.8350210970464135, + "grad_norm": 0.6294077038764954, + "learning_rate": 0.00010046002653792726, + "loss": 1.4511, + "step": 7916 + }, + { + "epoch": 0.835126582278481, + "grad_norm": 0.5989618897438049, + "learning_rate": 0.00010033454525518945, + "loss": 1.4501, + "step": 7917 + }, + { + "epoch": 0.8352320675105486, + "grad_norm": 0.6195353269577026, + "learning_rate": 0.0001002091367707053, + "loss": 1.4579, + "step": 7918 + }, + { + "epoch": 0.835337552742616, + "grad_norm": 0.6265462636947632, + "learning_rate": 0.00010008380109852752, + "loss": 1.444, + "step": 7919 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.6419804692268372, + "learning_rate": 9.995853825270052e-05, + "loss": 1.4616, + "step": 7920 + }, + { + "epoch": 0.8355485232067511, + "grad_norm": 0.5829523205757141, + "learning_rate": 9.983334824726081e-05, + "loss": 1.465, + "step": 7921 + }, + { + "epoch": 0.8356540084388185, + "grad_norm": 0.8561922311782837, + "learning_rate": 9.970823109623644e-05, + "loss": 1.4632, + "step": 7922 + }, + { + "epoch": 0.8357594936708861, + "grad_norm": 0.6441662907600403, + "learning_rate": 9.958318681364745e-05, + "loss": 1.4864, + "step": 7923 + }, + { + "epoch": 0.8358649789029536, + "grad_norm": 0.6068428754806519, + "learning_rate": 9.94582154135056e-05, + "loss": 1.4604, + "step": 7924 + }, + { + "epoch": 0.8359704641350211, + "grad_norm": 0.7167807221412659, + "learning_rate": 9.933331690981473e-05, + "loss": 1.4468, + "step": 7925 + }, + { + "epoch": 0.8360759493670886, + "grad_norm": 0.6626747846603394, + "learning_rate": 9.920849131657011e-05, + "loss": 1.483, + "step": 7926 + }, + { + "epoch": 0.8361814345991562, + "grad_norm": 0.5690462589263916, + "learning_rate": 9.908373864775915e-05, + "loss": 1.4221, + "step": 7927 + }, + { + "epoch": 0.8362869198312236, + "grad_norm": 0.5936063528060913, + "learning_rate": 9.895905891736118e-05, + "loss": 1.4611, + "step": 7928 + }, + { + "epoch": 0.8363924050632912, + "grad_norm": 0.5901030898094177, + "learning_rate": 9.883445213934675e-05, + "loss": 1.4678, + "step": 7929 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.6632336974143982, + "learning_rate": 9.870991832767919e-05, + "loss": 1.4361, + "step": 7930 + }, + { + "epoch": 0.8366033755274261, + "grad_norm": 0.6320486664772034, + "learning_rate": 9.858545749631287e-05, + "loss": 1.454, + "step": 7931 + }, + { + "epoch": 0.8367088607594937, + "grad_norm": 0.5996153950691223, + "learning_rate": 9.846106965919427e-05, + "loss": 1.4419, + "step": 7932 + }, + { + "epoch": 0.8368143459915611, + "grad_norm": 0.7218416333198547, + "learning_rate": 9.833675483026175e-05, + "loss": 1.4735, + "step": 7933 + }, + { + "epoch": 0.8369198312236287, + "grad_norm": 0.7303300499916077, + "learning_rate": 9.821251302344525e-05, + "loss": 1.4575, + "step": 7934 + }, + { + "epoch": 0.8370253164556962, + "grad_norm": 0.6423093676567078, + "learning_rate": 9.80883442526668e-05, + "loss": 1.4357, + "step": 7935 + }, + { + "epoch": 0.8371308016877637, + "grad_norm": 0.6022685766220093, + "learning_rate": 9.79642485318401e-05, + "loss": 1.4481, + "step": 7936 + }, + { + "epoch": 0.8372362869198312, + "grad_norm": 0.6587350964546204, + "learning_rate": 9.78402258748708e-05, + "loss": 1.433, + "step": 7937 + }, + { + "epoch": 0.8373417721518988, + "grad_norm": 0.6432785987854004, + "learning_rate": 9.771627629565599e-05, + "loss": 1.4552, + "step": 7938 + }, + { + "epoch": 0.8374472573839662, + "grad_norm": 0.6155443787574768, + "learning_rate": 9.759239980808494e-05, + "loss": 1.4572, + "step": 7939 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.5851820707321167, + "learning_rate": 9.746859642603884e-05, + "loss": 1.4648, + "step": 7940 + }, + { + "epoch": 0.8376582278481013, + "grad_norm": 0.5799926519393921, + "learning_rate": 9.734486616339027e-05, + "loss": 1.4549, + "step": 7941 + }, + { + "epoch": 0.8377637130801687, + "grad_norm": 0.6542601585388184, + "learning_rate": 9.722120903400392e-05, + "loss": 1.4736, + "step": 7942 + }, + { + "epoch": 0.8378691983122363, + "grad_norm": 0.5822323560714722, + "learning_rate": 9.709762505173617e-05, + "loss": 1.4541, + "step": 7943 + }, + { + "epoch": 0.8379746835443038, + "grad_norm": 0.6153427362442017, + "learning_rate": 9.697411423043521e-05, + "loss": 1.4716, + "step": 7944 + }, + { + "epoch": 0.8380801687763713, + "grad_norm": 0.5982612371444702, + "learning_rate": 9.685067658394095e-05, + "loss": 1.4367, + "step": 7945 + }, + { + "epoch": 0.8381856540084388, + "grad_norm": 0.6114547848701477, + "learning_rate": 9.672731212608535e-05, + "loss": 1.4588, + "step": 7946 + }, + { + "epoch": 0.8382911392405064, + "grad_norm": 0.5772746801376343, + "learning_rate": 9.660402087069192e-05, + "loss": 1.455, + "step": 7947 + }, + { + "epoch": 0.8383966244725738, + "grad_norm": 0.6304001808166504, + "learning_rate": 9.648080283157604e-05, + "loss": 1.428, + "step": 7948 + }, + { + "epoch": 0.8385021097046413, + "grad_norm": 0.6209251880645752, + "learning_rate": 9.635765802254482e-05, + "loss": 1.4556, + "step": 7949 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.5832303166389465, + "learning_rate": 9.623458645739755e-05, + "loss": 1.4661, + "step": 7950 + }, + { + "epoch": 0.8387130801687763, + "grad_norm": 0.5848055481910706, + "learning_rate": 9.611158814992479e-05, + "loss": 1.4641, + "step": 7951 + }, + { + "epoch": 0.8388185654008439, + "grad_norm": 0.6161857843399048, + "learning_rate": 9.598866311390919e-05, + "loss": 1.4106, + "step": 7952 + }, + { + "epoch": 0.8389240506329114, + "grad_norm": 0.5883482098579407, + "learning_rate": 9.586581136312506e-05, + "loss": 1.4512, + "step": 7953 + }, + { + "epoch": 0.8390295358649789, + "grad_norm": 0.5761998891830444, + "learning_rate": 9.574303291133862e-05, + "loss": 1.4065, + "step": 7954 + }, + { + "epoch": 0.8391350210970464, + "grad_norm": 0.614587664604187, + "learning_rate": 9.562032777230772e-05, + "loss": 1.4242, + "step": 7955 + }, + { + "epoch": 0.839240506329114, + "grad_norm": 0.587245762348175, + "learning_rate": 9.549769595978211e-05, + "loss": 1.4364, + "step": 7956 + }, + { + "epoch": 0.8393459915611814, + "grad_norm": 0.6091371774673462, + "learning_rate": 9.537513748750337e-05, + "loss": 1.4452, + "step": 7957 + }, + { + "epoch": 0.8394514767932489, + "grad_norm": 0.6191341876983643, + "learning_rate": 9.525265236920452e-05, + "loss": 1.4728, + "step": 7958 + }, + { + "epoch": 0.8395569620253165, + "grad_norm": 0.6212775111198425, + "learning_rate": 9.5130240618611e-05, + "loss": 1.4664, + "step": 7959 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.5882304310798645, + "learning_rate": 9.50079022494395e-05, + "loss": 1.4465, + "step": 7960 + }, + { + "epoch": 0.8397679324894515, + "grad_norm": 0.5944860577583313, + "learning_rate": 9.488563727539864e-05, + "loss": 1.4203, + "step": 7961 + }, + { + "epoch": 0.839873417721519, + "grad_norm": 0.5811296105384827, + "learning_rate": 9.47634457101888e-05, + "loss": 1.4471, + "step": 7962 + }, + { + "epoch": 0.8399789029535865, + "grad_norm": 0.5912690162658691, + "learning_rate": 9.464132756750218e-05, + "loss": 1.4405, + "step": 7963 + }, + { + "epoch": 0.840084388185654, + "grad_norm": 0.5776982307434082, + "learning_rate": 9.451928286102277e-05, + "loss": 1.458, + "step": 7964 + }, + { + "epoch": 0.8401898734177216, + "grad_norm": 0.6686093807220459, + "learning_rate": 9.439731160442619e-05, + "loss": 1.4562, + "step": 7965 + }, + { + "epoch": 0.840295358649789, + "grad_norm": 0.6104499697685242, + "learning_rate": 9.427541381138002e-05, + "loss": 1.4527, + "step": 7966 + }, + { + "epoch": 0.8404008438818565, + "grad_norm": 0.5896642804145813, + "learning_rate": 9.415358949554326e-05, + "loss": 1.45, + "step": 7967 + }, + { + "epoch": 0.8405063291139241, + "grad_norm": 0.6079349517822266, + "learning_rate": 9.40318386705673e-05, + "loss": 1.459, + "step": 7968 + }, + { + "epoch": 0.8406118143459915, + "grad_norm": 0.6077632904052734, + "learning_rate": 9.391016135009484e-05, + "loss": 1.4167, + "step": 7969 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.6254934072494507, + "learning_rate": 9.378855754776028e-05, + "loss": 1.4389, + "step": 7970 + }, + { + "epoch": 0.8408227848101266, + "grad_norm": 0.6724560856819153, + "learning_rate": 9.366702727719006e-05, + "loss": 1.459, + "step": 7971 + }, + { + "epoch": 0.8409282700421941, + "grad_norm": 0.6455076932907104, + "learning_rate": 9.354557055200214e-05, + "loss": 1.4686, + "step": 7972 + }, + { + "epoch": 0.8410337552742616, + "grad_norm": 0.5850170850753784, + "learning_rate": 9.342418738580652e-05, + "loss": 1.4275, + "step": 7973 + }, + { + "epoch": 0.8411392405063292, + "grad_norm": 0.6445632576942444, + "learning_rate": 9.330287779220459e-05, + "loss": 1.4147, + "step": 7974 + }, + { + "epoch": 0.8412447257383966, + "grad_norm": 0.6777456402778625, + "learning_rate": 9.31816417847898e-05, + "loss": 1.4579, + "step": 7975 + }, + { + "epoch": 0.8413502109704641, + "grad_norm": 0.6018970608711243, + "learning_rate": 9.306047937714713e-05, + "loss": 1.4558, + "step": 7976 + }, + { + "epoch": 0.8414556962025317, + "grad_norm": 0.5864899754524231, + "learning_rate": 9.29393905828537e-05, + "loss": 1.4247, + "step": 7977 + }, + { + "epoch": 0.8415611814345991, + "grad_norm": 0.5601939558982849, + "learning_rate": 9.281837541547791e-05, + "loss": 1.4333, + "step": 7978 + }, + { + "epoch": 0.8416666666666667, + "grad_norm": 0.6519457697868347, + "learning_rate": 9.269743388858019e-05, + "loss": 1.4415, + "step": 7979 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.5768648982048035, + "learning_rate": 9.257656601571266e-05, + "loss": 1.4135, + "step": 7980 + }, + { + "epoch": 0.8418776371308017, + "grad_norm": 0.599323570728302, + "learning_rate": 9.245577181041901e-05, + "loss": 1.44, + "step": 7981 + }, + { + "epoch": 0.8419831223628692, + "grad_norm": 0.6209219694137573, + "learning_rate": 9.233505128623499e-05, + "loss": 1.4281, + "step": 7982 + }, + { + "epoch": 0.8420886075949368, + "grad_norm": 0.6174030303955078, + "learning_rate": 9.221440445668794e-05, + "loss": 1.476, + "step": 7983 + }, + { + "epoch": 0.8421940928270042, + "grad_norm": 0.6194326877593994, + "learning_rate": 9.209383133529664e-05, + "loss": 1.4177, + "step": 7984 + }, + { + "epoch": 0.8422995780590717, + "grad_norm": 0.5790109038352966, + "learning_rate": 9.197333193557237e-05, + "loss": 1.4823, + "step": 7985 + }, + { + "epoch": 0.8424050632911393, + "grad_norm": 0.5875517725944519, + "learning_rate": 9.185290627101747e-05, + "loss": 1.4614, + "step": 7986 + }, + { + "epoch": 0.8425105485232067, + "grad_norm": 0.631220281124115, + "learning_rate": 9.173255435512617e-05, + "loss": 1.473, + "step": 7987 + }, + { + "epoch": 0.8426160337552743, + "grad_norm": 0.5834121704101562, + "learning_rate": 9.161227620138468e-05, + "loss": 1.4914, + "step": 7988 + }, + { + "epoch": 0.8427215189873418, + "grad_norm": 0.6190552711486816, + "learning_rate": 9.149207182327054e-05, + "loss": 1.4331, + "step": 7989 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.5874776244163513, + "learning_rate": 9.137194123425349e-05, + "loss": 1.4523, + "step": 7990 + }, + { + "epoch": 0.8429324894514768, + "grad_norm": 0.5809932351112366, + "learning_rate": 9.125188444779458e-05, + "loss": 1.4458, + "step": 7991 + }, + { + "epoch": 0.8430379746835444, + "grad_norm": 0.592722475528717, + "learning_rate": 9.113190147734682e-05, + "loss": 1.4279, + "step": 7992 + }, + { + "epoch": 0.8431434599156118, + "grad_norm": 0.5710530877113342, + "learning_rate": 9.101199233635477e-05, + "loss": 1.4637, + "step": 7993 + }, + { + "epoch": 0.8432489451476793, + "grad_norm": 0.6747775673866272, + "learning_rate": 9.089215703825519e-05, + "loss": 1.4359, + "step": 7994 + }, + { + "epoch": 0.8433544303797469, + "grad_norm": 0.5916785001754761, + "learning_rate": 9.077239559647591e-05, + "loss": 1.4407, + "step": 7995 + }, + { + "epoch": 0.8434599156118143, + "grad_norm": 0.5894052982330322, + "learning_rate": 9.065270802443704e-05, + "loss": 1.4739, + "step": 7996 + }, + { + "epoch": 0.8435654008438819, + "grad_norm": 0.5909568667411804, + "learning_rate": 9.053309433554993e-05, + "loss": 1.4518, + "step": 7997 + }, + { + "epoch": 0.8436708860759494, + "grad_norm": 0.5838738679885864, + "learning_rate": 9.041355454321803e-05, + "loss": 1.4439, + "step": 7998 + }, + { + "epoch": 0.8437763713080169, + "grad_norm": 0.5884348154067993, + "learning_rate": 9.029408866083638e-05, + "loss": 1.452, + "step": 7999 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.6085082292556763, + "learning_rate": 9.017469670179168e-05, + "loss": 1.4432, + "step": 8000 + }, + { + "epoch": 0.8439873417721518, + "grad_norm": 0.6303429007530212, + "learning_rate": 9.00553786794624e-05, + "loss": 1.4632, + "step": 8001 + }, + { + "epoch": 0.8440928270042194, + "grad_norm": 0.5902647972106934, + "learning_rate": 8.99361346072185e-05, + "loss": 1.4894, + "step": 8002 + }, + { + "epoch": 0.8441983122362869, + "grad_norm": 0.5906795859336853, + "learning_rate": 8.98169644984223e-05, + "loss": 1.4714, + "step": 8003 + }, + { + "epoch": 0.8443037974683544, + "grad_norm": 0.6184670925140381, + "learning_rate": 8.96978683664272e-05, + "loss": 1.4214, + "step": 8004 + }, + { + "epoch": 0.8444092827004219, + "grad_norm": 0.6550499796867371, + "learning_rate": 8.957884622457854e-05, + "loss": 1.4407, + "step": 8005 + }, + { + "epoch": 0.8445147679324895, + "grad_norm": 0.6235560178756714, + "learning_rate": 8.945989808621321e-05, + "loss": 1.4547, + "step": 8006 + }, + { + "epoch": 0.8446202531645569, + "grad_norm": 0.6205562949180603, + "learning_rate": 8.934102396466016e-05, + "loss": 1.506, + "step": 8007 + }, + { + "epoch": 0.8447257383966245, + "grad_norm": 0.7120638489723206, + "learning_rate": 8.92222238732397e-05, + "loss": 1.4412, + "step": 8008 + }, + { + "epoch": 0.844831223628692, + "grad_norm": 0.5854780673980713, + "learning_rate": 8.910349782526394e-05, + "loss": 1.4159, + "step": 8009 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.6021023988723755, + "learning_rate": 8.898484583403668e-05, + "loss": 1.4372, + "step": 8010 + }, + { + "epoch": 0.845042194092827, + "grad_norm": 0.5829958915710449, + "learning_rate": 8.886626791285369e-05, + "loss": 1.4535, + "step": 8011 + }, + { + "epoch": 0.8451476793248945, + "grad_norm": 0.6186391115188599, + "learning_rate": 8.874776407500206e-05, + "loss": 1.4309, + "step": 8012 + }, + { + "epoch": 0.845253164556962, + "grad_norm": 0.6246089935302734, + "learning_rate": 8.86293343337608e-05, + "loss": 1.4471, + "step": 8013 + }, + { + "epoch": 0.8453586497890295, + "grad_norm": 0.5958248972892761, + "learning_rate": 8.851097870240051e-05, + "loss": 1.441, + "step": 8014 + }, + { + "epoch": 0.8454641350210971, + "grad_norm": 0.5632935166358948, + "learning_rate": 8.839269719418361e-05, + "loss": 1.4403, + "step": 8015 + }, + { + "epoch": 0.8455696202531645, + "grad_norm": 0.5660814046859741, + "learning_rate": 8.827448982236397e-05, + "loss": 1.4446, + "step": 8016 + }, + { + "epoch": 0.8456751054852321, + "grad_norm": 0.609430730342865, + "learning_rate": 8.815635660018742e-05, + "loss": 1.3922, + "step": 8017 + }, + { + "epoch": 0.8457805907172996, + "grad_norm": 0.5873339176177979, + "learning_rate": 8.803829754089138e-05, + "loss": 1.4834, + "step": 8018 + }, + { + "epoch": 0.845886075949367, + "grad_norm": 0.6647022366523743, + "learning_rate": 8.792031265770475e-05, + "loss": 1.4764, + "step": 8019 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.596877932548523, + "learning_rate": 8.780240196384873e-05, + "loss": 1.4545, + "step": 8020 + }, + { + "epoch": 0.8460970464135021, + "grad_norm": 0.6186493039131165, + "learning_rate": 8.768456547253556e-05, + "loss": 1.4517, + "step": 8021 + }, + { + "epoch": 0.8462025316455696, + "grad_norm": 0.6011285781860352, + "learning_rate": 8.756680319696945e-05, + "loss": 1.432, + "step": 8022 + }, + { + "epoch": 0.8463080168776371, + "grad_norm": 0.5796070098876953, + "learning_rate": 8.744911515034623e-05, + "loss": 1.4196, + "step": 8023 + }, + { + "epoch": 0.8464135021097047, + "grad_norm": 0.5781856179237366, + "learning_rate": 8.733150134585338e-05, + "loss": 1.4934, + "step": 8024 + }, + { + "epoch": 0.8465189873417721, + "grad_norm": 0.6318885684013367, + "learning_rate": 8.721396179667019e-05, + "loss": 1.4704, + "step": 8025 + }, + { + "epoch": 0.8466244725738397, + "grad_norm": 0.6065354347229004, + "learning_rate": 8.709649651596752e-05, + "loss": 1.4377, + "step": 8026 + }, + { + "epoch": 0.8467299578059072, + "grad_norm": 0.623866081237793, + "learning_rate": 8.697910551690802e-05, + "loss": 1.43, + "step": 8027 + }, + { + "epoch": 0.8468354430379746, + "grad_norm": 0.6334112286567688, + "learning_rate": 8.686178881264568e-05, + "loss": 1.4292, + "step": 8028 + }, + { + "epoch": 0.8469409282700422, + "grad_norm": 0.6145772337913513, + "learning_rate": 8.67445464163267e-05, + "loss": 1.4725, + "step": 8029 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.6209849119186401, + "learning_rate": 8.662737834108861e-05, + "loss": 1.4358, + "step": 8030 + }, + { + "epoch": 0.8471518987341772, + "grad_norm": 0.5954123735427856, + "learning_rate": 8.651028460006072e-05, + "loss": 1.4679, + "step": 8031 + }, + { + "epoch": 0.8472573839662447, + "grad_norm": 0.6638241410255432, + "learning_rate": 8.639326520636387e-05, + "loss": 1.4103, + "step": 8032 + }, + { + "epoch": 0.8473628691983123, + "grad_norm": 0.6085583567619324, + "learning_rate": 8.627632017311065e-05, + "loss": 1.4742, + "step": 8033 + }, + { + "epoch": 0.8474683544303797, + "grad_norm": 0.5880997180938721, + "learning_rate": 8.615944951340543e-05, + "loss": 1.4233, + "step": 8034 + }, + { + "epoch": 0.8475738396624473, + "grad_norm": 0.6374111771583557, + "learning_rate": 8.604265324034405e-05, + "loss": 1.4545, + "step": 8035 + }, + { + "epoch": 0.8476793248945148, + "grad_norm": 0.6124467253684998, + "learning_rate": 8.592593136701404e-05, + "loss": 1.4681, + "step": 8036 + }, + { + "epoch": 0.8477848101265822, + "grad_norm": 0.601678729057312, + "learning_rate": 8.580928390649496e-05, + "loss": 1.4489, + "step": 8037 + }, + { + "epoch": 0.8478902953586498, + "grad_norm": 0.6390390992164612, + "learning_rate": 8.569271087185756e-05, + "loss": 1.4927, + "step": 8038 + }, + { + "epoch": 0.8479957805907173, + "grad_norm": 0.6678401827812195, + "learning_rate": 8.557621227616444e-05, + "loss": 1.4921, + "step": 8039 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.5748395919799805, + "learning_rate": 8.545978813246987e-05, + "loss": 1.4608, + "step": 8040 + }, + { + "epoch": 0.8482067510548523, + "grad_norm": 0.6310973167419434, + "learning_rate": 8.53434384538197e-05, + "loss": 1.4403, + "step": 8041 + }, + { + "epoch": 0.8483122362869199, + "grad_norm": 0.607078492641449, + "learning_rate": 8.522716325325155e-05, + "loss": 1.3995, + "step": 8042 + }, + { + "epoch": 0.8484177215189873, + "grad_norm": 0.5840705633163452, + "learning_rate": 8.51109625437946e-05, + "loss": 1.4804, + "step": 8043 + }, + { + "epoch": 0.8485232067510549, + "grad_norm": 0.6025848984718323, + "learning_rate": 8.499483633846977e-05, + "loss": 1.4346, + "step": 8044 + }, + { + "epoch": 0.8486286919831224, + "grad_norm": 0.5857424139976501, + "learning_rate": 8.48787846502893e-05, + "loss": 1.4293, + "step": 8045 + }, + { + "epoch": 0.8487341772151898, + "grad_norm": 0.5918225049972534, + "learning_rate": 8.476280749225782e-05, + "loss": 1.4292, + "step": 8046 + }, + { + "epoch": 0.8488396624472574, + "grad_norm": 0.6377977728843689, + "learning_rate": 8.464690487737098e-05, + "loss": 1.4182, + "step": 8047 + }, + { + "epoch": 0.8489451476793249, + "grad_norm": 0.6307489275932312, + "learning_rate": 8.453107681861616e-05, + "loss": 1.4078, + "step": 8048 + }, + { + "epoch": 0.8490506329113924, + "grad_norm": 0.6093991994857788, + "learning_rate": 8.441532332897248e-05, + "loss": 1.4977, + "step": 8049 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.6252887845039368, + "learning_rate": 8.429964442141072e-05, + "loss": 1.4549, + "step": 8050 + }, + { + "epoch": 0.8492616033755275, + "grad_norm": 0.6193724274635315, + "learning_rate": 8.418404010889336e-05, + "loss": 1.4601, + "step": 8051 + }, + { + "epoch": 0.8493670886075949, + "grad_norm": 0.6538556814193726, + "learning_rate": 8.406851040437426e-05, + "loss": 1.4262, + "step": 8052 + }, + { + "epoch": 0.8494725738396625, + "grad_norm": 0.6389971971511841, + "learning_rate": 8.395305532079928e-05, + "loss": 1.4693, + "step": 8053 + }, + { + "epoch": 0.84957805907173, + "grad_norm": 0.610542893409729, + "learning_rate": 8.383767487110552e-05, + "loss": 1.44, + "step": 8054 + }, + { + "epoch": 0.8496835443037974, + "grad_norm": 0.6304973363876343, + "learning_rate": 8.372236906822217e-05, + "loss": 1.4639, + "step": 8055 + }, + { + "epoch": 0.849789029535865, + "grad_norm": 0.6633606553077698, + "learning_rate": 8.360713792506971e-05, + "loss": 1.445, + "step": 8056 + }, + { + "epoch": 0.8498945147679325, + "grad_norm": 0.5928105115890503, + "learning_rate": 8.349198145456049e-05, + "loss": 1.4633, + "step": 8057 + }, + { + "epoch": 0.85, + "grad_norm": 0.6269903182983398, + "learning_rate": 8.337689966959819e-05, + "loss": 1.4781, + "step": 8058 + }, + { + "epoch": 0.8501054852320675, + "grad_norm": 0.6127380728721619, + "learning_rate": 8.326189258307832e-05, + "loss": 1.4011, + "step": 8059 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.6684249043464661, + "learning_rate": 8.314696020788806e-05, + "loss": 1.4438, + "step": 8060 + }, + { + "epoch": 0.8503164556962025, + "grad_norm": 0.6367148160934448, + "learning_rate": 8.303210255690622e-05, + "loss": 1.4518, + "step": 8061 + }, + { + "epoch": 0.8504219409282701, + "grad_norm": 0.6121448874473572, + "learning_rate": 8.29173196430029e-05, + "loss": 1.4576, + "step": 8062 + }, + { + "epoch": 0.8505274261603376, + "grad_norm": 0.6208028793334961, + "learning_rate": 8.280261147904039e-05, + "loss": 1.4102, + "step": 8063 + }, + { + "epoch": 0.850632911392405, + "grad_norm": 0.6720616817474365, + "learning_rate": 8.268797807787226e-05, + "loss": 1.462, + "step": 8064 + }, + { + "epoch": 0.8507383966244726, + "grad_norm": 0.644771158695221, + "learning_rate": 8.257341945234365e-05, + "loss": 1.4943, + "step": 8065 + }, + { + "epoch": 0.85084388185654, + "grad_norm": 0.6042861342430115, + "learning_rate": 8.245893561529153e-05, + "loss": 1.441, + "step": 8066 + }, + { + "epoch": 0.8509493670886076, + "grad_norm": 0.5927634239196777, + "learning_rate": 8.23445265795443e-05, + "loss": 1.438, + "step": 8067 + }, + { + "epoch": 0.8510548523206751, + "grad_norm": 0.6058434844017029, + "learning_rate": 8.223019235792214e-05, + "loss": 1.4654, + "step": 8068 + }, + { + "epoch": 0.8511603375527426, + "grad_norm": 0.6182414889335632, + "learning_rate": 8.211593296323672e-05, + "loss": 1.4574, + "step": 8069 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.648047149181366, + "learning_rate": 8.200174840829136e-05, + "loss": 1.4467, + "step": 8070 + }, + { + "epoch": 0.8513713080168777, + "grad_norm": 0.6273515224456787, + "learning_rate": 8.188763870588092e-05, + "loss": 1.4158, + "step": 8071 + }, + { + "epoch": 0.8514767932489451, + "grad_norm": 0.5965768098831177, + "learning_rate": 8.177360386879217e-05, + "loss": 1.4494, + "step": 8072 + }, + { + "epoch": 0.8515822784810126, + "grad_norm": 0.6012042760848999, + "learning_rate": 8.165964390980316e-05, + "loss": 1.4568, + "step": 8073 + }, + { + "epoch": 0.8516877637130802, + "grad_norm": 0.6216970086097717, + "learning_rate": 8.15457588416838e-05, + "loss": 1.4715, + "step": 8074 + }, + { + "epoch": 0.8517932489451476, + "grad_norm": 0.6525275111198425, + "learning_rate": 8.143194867719534e-05, + "loss": 1.4577, + "step": 8075 + }, + { + "epoch": 0.8518987341772152, + "grad_norm": 0.6517505645751953, + "learning_rate": 8.131821342909071e-05, + "loss": 1.5083, + "step": 8076 + }, + { + "epoch": 0.8520042194092827, + "grad_norm": 0.6000187397003174, + "learning_rate": 8.120455311011473e-05, + "loss": 1.4572, + "step": 8077 + }, + { + "epoch": 0.8521097046413502, + "grad_norm": 0.614588737487793, + "learning_rate": 8.109096773300348e-05, + "loss": 1.4214, + "step": 8078 + }, + { + "epoch": 0.8522151898734177, + "grad_norm": 0.6228635311126709, + "learning_rate": 8.097745731048475e-05, + "loss": 1.4496, + "step": 8079 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.598395824432373, + "learning_rate": 8.08640218552778e-05, + "loss": 1.4797, + "step": 8080 + }, + { + "epoch": 0.8524261603375527, + "grad_norm": 0.581062376499176, + "learning_rate": 8.075066138009396e-05, + "loss": 1.3959, + "step": 8081 + }, + { + "epoch": 0.8525316455696202, + "grad_norm": 0.6619287133216858, + "learning_rate": 8.063737589763573e-05, + "loss": 1.3916, + "step": 8082 + }, + { + "epoch": 0.8526371308016878, + "grad_norm": 0.5995312333106995, + "learning_rate": 8.05241654205973e-05, + "loss": 1.4215, + "step": 8083 + }, + { + "epoch": 0.8527426160337552, + "grad_norm": 0.6094111800193787, + "learning_rate": 8.041102996166442e-05, + "loss": 1.435, + "step": 8084 + }, + { + "epoch": 0.8528481012658228, + "grad_norm": 0.6084576845169067, + "learning_rate": 8.029796953351445e-05, + "loss": 1.4603, + "step": 8085 + }, + { + "epoch": 0.8529535864978903, + "grad_norm": 0.6155444383621216, + "learning_rate": 8.018498414881645e-05, + "loss": 1.4548, + "step": 8086 + }, + { + "epoch": 0.8530590717299578, + "grad_norm": 0.5954487323760986, + "learning_rate": 8.007207382023102e-05, + "loss": 1.4371, + "step": 8087 + }, + { + "epoch": 0.8531645569620253, + "grad_norm": 0.6654340028762817, + "learning_rate": 7.995923856041013e-05, + "loss": 1.467, + "step": 8088 + }, + { + "epoch": 0.8532700421940929, + "grad_norm": 0.5901021957397461, + "learning_rate": 7.984647838199773e-05, + "loss": 1.4887, + "step": 8089 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.5609098672866821, + "learning_rate": 7.973379329762925e-05, + "loss": 1.4784, + "step": 8090 + }, + { + "epoch": 0.8534810126582278, + "grad_norm": 0.6081834435462952, + "learning_rate": 7.96211833199314e-05, + "loss": 1.444, + "step": 8091 + }, + { + "epoch": 0.8535864978902954, + "grad_norm": 0.5829365253448486, + "learning_rate": 7.950864846152284e-05, + "loss": 1.4488, + "step": 8092 + }, + { + "epoch": 0.8536919831223628, + "grad_norm": 0.6414105296134949, + "learning_rate": 7.939618873501356e-05, + "loss": 1.4388, + "step": 8093 + }, + { + "epoch": 0.8537974683544304, + "grad_norm": 0.597663402557373, + "learning_rate": 7.928380415300523e-05, + "loss": 1.437, + "step": 8094 + }, + { + "epoch": 0.8539029535864979, + "grad_norm": 0.5922931432723999, + "learning_rate": 7.917149472809113e-05, + "loss": 1.3925, + "step": 8095 + }, + { + "epoch": 0.8540084388185654, + "grad_norm": 0.5836181044578552, + "learning_rate": 7.905926047285616e-05, + "loss": 1.4233, + "step": 8096 + }, + { + "epoch": 0.8541139240506329, + "grad_norm": 0.6030147671699524, + "learning_rate": 7.894710139987645e-05, + "loss": 1.4588, + "step": 8097 + }, + { + "epoch": 0.8542194092827005, + "grad_norm": 0.6446613669395447, + "learning_rate": 7.883501752172038e-05, + "loss": 1.42, + "step": 8098 + }, + { + "epoch": 0.8543248945147679, + "grad_norm": 0.6710536479949951, + "learning_rate": 7.872300885094736e-05, + "loss": 1.4381, + "step": 8099 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.6744824051856995, + "learning_rate": 7.861107540010845e-05, + "loss": 1.4511, + "step": 8100 + }, + { + "epoch": 0.854535864978903, + "grad_norm": 0.6514532566070557, + "learning_rate": 7.849921718174638e-05, + "loss": 1.4637, + "step": 8101 + }, + { + "epoch": 0.8546413502109704, + "grad_norm": 0.6405929327011108, + "learning_rate": 7.838743420839544e-05, + "loss": 1.4957, + "step": 8102 + }, + { + "epoch": 0.854746835443038, + "grad_norm": 0.5799195766448975, + "learning_rate": 7.827572649258147e-05, + "loss": 1.4479, + "step": 8103 + }, + { + "epoch": 0.8548523206751055, + "grad_norm": 0.6547724008560181, + "learning_rate": 7.816409404682185e-05, + "loss": 1.4728, + "step": 8104 + }, + { + "epoch": 0.854957805907173, + "grad_norm": 0.5897467136383057, + "learning_rate": 7.805253688362557e-05, + "loss": 1.4538, + "step": 8105 + }, + { + "epoch": 0.8550632911392405, + "grad_norm": 0.6146205067634583, + "learning_rate": 7.794105501549306e-05, + "loss": 1.4408, + "step": 8106 + }, + { + "epoch": 0.8551687763713081, + "grad_norm": 0.6333208084106445, + "learning_rate": 7.782964845491666e-05, + "loss": 1.4284, + "step": 8107 + }, + { + "epoch": 0.8552742616033755, + "grad_norm": 0.6322663426399231, + "learning_rate": 7.771831721437989e-05, + "loss": 1.4199, + "step": 8108 + }, + { + "epoch": 0.855379746835443, + "grad_norm": 0.6292869448661804, + "learning_rate": 7.760706130635792e-05, + "loss": 1.4625, + "step": 8109 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.6537604928016663, + "learning_rate": 7.749588074331762e-05, + "loss": 1.4945, + "step": 8110 + }, + { + "epoch": 0.855590717299578, + "grad_norm": 0.6168704032897949, + "learning_rate": 7.738477553771727e-05, + "loss": 1.4544, + "step": 8111 + }, + { + "epoch": 0.8556962025316456, + "grad_norm": 0.619467556476593, + "learning_rate": 7.727374570200685e-05, + "loss": 1.4762, + "step": 8112 + }, + { + "epoch": 0.8558016877637131, + "grad_norm": 0.690534770488739, + "learning_rate": 7.716279124862771e-05, + "loss": 1.408, + "step": 8113 + }, + { + "epoch": 0.8559071729957806, + "grad_norm": 0.6059818267822266, + "learning_rate": 7.705191219001267e-05, + "loss": 1.4841, + "step": 8114 + }, + { + "epoch": 0.8560126582278481, + "grad_norm": 0.613914430141449, + "learning_rate": 7.694110853858671e-05, + "loss": 1.4506, + "step": 8115 + }, + { + "epoch": 0.8561181434599157, + "grad_norm": 0.6018344759941101, + "learning_rate": 7.683038030676573e-05, + "loss": 1.4484, + "step": 8116 + }, + { + "epoch": 0.8562236286919831, + "grad_norm": 0.6684064865112305, + "learning_rate": 7.67197275069573e-05, + "loss": 1.4658, + "step": 8117 + }, + { + "epoch": 0.8563291139240506, + "grad_norm": 0.5859187841415405, + "learning_rate": 7.660915015156067e-05, + "loss": 1.4163, + "step": 8118 + }, + { + "epoch": 0.8564345991561182, + "grad_norm": 0.6317290663719177, + "learning_rate": 7.649864825296669e-05, + "loss": 1.4304, + "step": 8119 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.6055102944374084, + "learning_rate": 7.63882218235575e-05, + "loss": 1.4216, + "step": 8120 + }, + { + "epoch": 0.8566455696202532, + "grad_norm": 0.6585144996643066, + "learning_rate": 7.627787087570692e-05, + "loss": 1.4369, + "step": 8121 + }, + { + "epoch": 0.8567510548523207, + "grad_norm": 0.6559712290763855, + "learning_rate": 7.616759542178045e-05, + "loss": 1.4332, + "step": 8122 + }, + { + "epoch": 0.8568565400843882, + "grad_norm": 0.5787721276283264, + "learning_rate": 7.605739547413487e-05, + "loss": 1.4297, + "step": 8123 + }, + { + "epoch": 0.8569620253164557, + "grad_norm": 0.6757198572158813, + "learning_rate": 7.594727104511873e-05, + "loss": 1.4373, + "step": 8124 + }, + { + "epoch": 0.8570675105485233, + "grad_norm": 0.6011409163475037, + "learning_rate": 7.583722214707206e-05, + "loss": 1.4512, + "step": 8125 + }, + { + "epoch": 0.8571729957805907, + "grad_norm": 0.6036893725395203, + "learning_rate": 7.572724879232634e-05, + "loss": 1.4498, + "step": 8126 + }, + { + "epoch": 0.8572784810126582, + "grad_norm": 0.6018787026405334, + "learning_rate": 7.561735099320463e-05, + "loss": 1.4509, + "step": 8127 + }, + { + "epoch": 0.8573839662447258, + "grad_norm": 0.6263256072998047, + "learning_rate": 7.55075287620215e-05, + "loss": 1.4522, + "step": 8128 + }, + { + "epoch": 0.8574894514767932, + "grad_norm": 0.6320101618766785, + "learning_rate": 7.539778211108309e-05, + "loss": 1.4817, + "step": 8129 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.5826576948165894, + "learning_rate": 7.528811105268699e-05, + "loss": 1.4186, + "step": 8130 + }, + { + "epoch": 0.8577004219409282, + "grad_norm": 0.6270806789398193, + "learning_rate": 7.517851559912254e-05, + "loss": 1.4147, + "step": 8131 + }, + { + "epoch": 0.8578059071729958, + "grad_norm": 0.6298795938491821, + "learning_rate": 7.506899576267023e-05, + "loss": 1.4459, + "step": 8132 + }, + { + "epoch": 0.8579113924050633, + "grad_norm": 0.6277000308036804, + "learning_rate": 7.495955155560261e-05, + "loss": 1.4518, + "step": 8133 + }, + { + "epoch": 0.8580168776371307, + "grad_norm": 0.6044176816940308, + "learning_rate": 7.485018299018326e-05, + "loss": 1.4276, + "step": 8134 + }, + { + "epoch": 0.8581223628691983, + "grad_norm": 0.5885352492332458, + "learning_rate": 7.474089007866756e-05, + "loss": 1.4536, + "step": 8135 + }, + { + "epoch": 0.8582278481012658, + "grad_norm": 0.6019391417503357, + "learning_rate": 7.463167283330227e-05, + "loss": 1.4584, + "step": 8136 + }, + { + "epoch": 0.8583333333333333, + "grad_norm": 0.6161719560623169, + "learning_rate": 7.452253126632564e-05, + "loss": 1.4365, + "step": 8137 + }, + { + "epoch": 0.8584388185654008, + "grad_norm": 0.5884931683540344, + "learning_rate": 7.441346538996769e-05, + "loss": 1.4564, + "step": 8138 + }, + { + "epoch": 0.8585443037974684, + "grad_norm": 0.6261066198348999, + "learning_rate": 7.430447521644973e-05, + "loss": 1.4588, + "step": 8139 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.6636409163475037, + "learning_rate": 7.41955607579845e-05, + "loss": 1.4406, + "step": 8140 + }, + { + "epoch": 0.8587552742616034, + "grad_norm": 0.5569869875907898, + "learning_rate": 7.408672202677666e-05, + "loss": 1.4051, + "step": 8141 + }, + { + "epoch": 0.8588607594936709, + "grad_norm": 0.5819077491760254, + "learning_rate": 7.397795903502202e-05, + "loss": 1.4408, + "step": 8142 + }, + { + "epoch": 0.8589662447257383, + "grad_norm": 0.5714100003242493, + "learning_rate": 7.386927179490801e-05, + "loss": 1.4568, + "step": 8143 + }, + { + "epoch": 0.8590717299578059, + "grad_norm": 0.5777878165245056, + "learning_rate": 7.376066031861364e-05, + "loss": 1.4183, + "step": 8144 + }, + { + "epoch": 0.8591772151898734, + "grad_norm": 0.5857587456703186, + "learning_rate": 7.365212461830933e-05, + "loss": 1.4235, + "step": 8145 + }, + { + "epoch": 0.8592827004219409, + "grad_norm": 0.5691379904747009, + "learning_rate": 7.354366470615695e-05, + "loss": 1.474, + "step": 8146 + }, + { + "epoch": 0.8593881856540084, + "grad_norm": 0.5906193852424622, + "learning_rate": 7.343528059431009e-05, + "loss": 1.4405, + "step": 8147 + }, + { + "epoch": 0.859493670886076, + "grad_norm": 0.7643877863883972, + "learning_rate": 7.332697229491373e-05, + "loss": 1.4381, + "step": 8148 + }, + { + "epoch": 0.8595991561181434, + "grad_norm": 0.5874009728431702, + "learning_rate": 7.321873982010422e-05, + "loss": 1.4137, + "step": 8149 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.5903686285018921, + "learning_rate": 7.311058318200969e-05, + "loss": 1.4786, + "step": 8150 + }, + { + "epoch": 0.8598101265822785, + "grad_norm": 0.5760003328323364, + "learning_rate": 7.300250239274964e-05, + "loss": 1.4472, + "step": 8151 + }, + { + "epoch": 0.859915611814346, + "grad_norm": 0.7386350035667419, + "learning_rate": 7.289449746443494e-05, + "loss": 1.4254, + "step": 8152 + }, + { + "epoch": 0.8600210970464135, + "grad_norm": 0.6411071419715881, + "learning_rate": 7.278656840916825e-05, + "loss": 1.4894, + "step": 8153 + }, + { + "epoch": 0.860126582278481, + "grad_norm": 0.5901421308517456, + "learning_rate": 7.26787152390434e-05, + "loss": 1.4598, + "step": 8154 + }, + { + "epoch": 0.8602320675105485, + "grad_norm": 0.5983632802963257, + "learning_rate": 7.257093796614597e-05, + "loss": 1.4707, + "step": 8155 + }, + { + "epoch": 0.860337552742616, + "grad_norm": 0.6937258839607239, + "learning_rate": 7.246323660255289e-05, + "loss": 1.4738, + "step": 8156 + }, + { + "epoch": 0.8604430379746836, + "grad_norm": 0.6017975807189941, + "learning_rate": 7.235561116033265e-05, + "loss": 1.4299, + "step": 8157 + }, + { + "epoch": 0.860548523206751, + "grad_norm": 0.5757695436477661, + "learning_rate": 7.224806165154504e-05, + "loss": 1.4463, + "step": 8158 + }, + { + "epoch": 0.8606540084388186, + "grad_norm": 0.565200686454773, + "learning_rate": 7.214058808824192e-05, + "loss": 1.4155, + "step": 8159 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.6090195178985596, + "learning_rate": 7.203319048246599e-05, + "loss": 1.4503, + "step": 8160 + }, + { + "epoch": 0.8608649789029535, + "grad_norm": 0.5856454968452454, + "learning_rate": 7.192586884625169e-05, + "loss": 1.417, + "step": 8161 + }, + { + "epoch": 0.8609704641350211, + "grad_norm": 0.6166321039199829, + "learning_rate": 7.1818623191625e-05, + "loss": 1.4577, + "step": 8162 + }, + { + "epoch": 0.8610759493670886, + "grad_norm": 0.5804411172866821, + "learning_rate": 7.17114535306033e-05, + "loss": 1.4599, + "step": 8163 + }, + { + "epoch": 0.8611814345991561, + "grad_norm": 0.5726751685142517, + "learning_rate": 7.16043598751954e-05, + "loss": 1.4161, + "step": 8164 + }, + { + "epoch": 0.8612869198312236, + "grad_norm": 0.5868279933929443, + "learning_rate": 7.149734223740187e-05, + "loss": 1.4375, + "step": 8165 + }, + { + "epoch": 0.8613924050632912, + "grad_norm": 0.5920861959457397, + "learning_rate": 7.139040062921428e-05, + "loss": 1.4826, + "step": 8166 + }, + { + "epoch": 0.8614978902953586, + "grad_norm": 0.5681700706481934, + "learning_rate": 7.128353506261631e-05, + "loss": 1.4512, + "step": 8167 + }, + { + "epoch": 0.8616033755274262, + "grad_norm": 0.5923990607261658, + "learning_rate": 7.117674554958253e-05, + "loss": 1.4538, + "step": 8168 + }, + { + "epoch": 0.8617088607594937, + "grad_norm": 0.5905157923698425, + "learning_rate": 7.107003210207947e-05, + "loss": 1.4242, + "step": 8169 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.6126732230186462, + "learning_rate": 7.096339473206471e-05, + "loss": 1.4161, + "step": 8170 + }, + { + "epoch": 0.8619198312236287, + "grad_norm": 0.557263195514679, + "learning_rate": 7.085683345148753e-05, + "loss": 1.4485, + "step": 8171 + }, + { + "epoch": 0.8620253164556962, + "grad_norm": 0.5912117958068848, + "learning_rate": 7.075034827228862e-05, + "loss": 1.4403, + "step": 8172 + }, + { + "epoch": 0.8621308016877637, + "grad_norm": 0.5888671278953552, + "learning_rate": 7.064393920640031e-05, + "loss": 1.4851, + "step": 8173 + }, + { + "epoch": 0.8622362869198312, + "grad_norm": 0.5608071684837341, + "learning_rate": 7.053760626574618e-05, + "loss": 1.4489, + "step": 8174 + }, + { + "epoch": 0.8623417721518988, + "grad_norm": 0.6275113821029663, + "learning_rate": 7.043134946224123e-05, + "loss": 1.4072, + "step": 8175 + }, + { + "epoch": 0.8624472573839662, + "grad_norm": 0.5821383595466614, + "learning_rate": 7.032516880779233e-05, + "loss": 1.4524, + "step": 8176 + }, + { + "epoch": 0.8625527426160338, + "grad_norm": 0.5914755463600159, + "learning_rate": 7.021906431429747e-05, + "loss": 1.4702, + "step": 8177 + }, + { + "epoch": 0.8626582278481013, + "grad_norm": 0.5890967845916748, + "learning_rate": 7.011303599364608e-05, + "loss": 1.4275, + "step": 8178 + }, + { + "epoch": 0.8627637130801687, + "grad_norm": 0.6044332981109619, + "learning_rate": 7.000708385771928e-05, + "loss": 1.4326, + "step": 8179 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.6205101013183594, + "learning_rate": 6.990120791838953e-05, + "loss": 1.4838, + "step": 8180 + }, + { + "epoch": 0.8629746835443038, + "grad_norm": 0.6092308759689331, + "learning_rate": 6.979540818752064e-05, + "loss": 1.439, + "step": 8181 + }, + { + "epoch": 0.8630801687763713, + "grad_norm": 0.606573760509491, + "learning_rate": 6.968968467696806e-05, + "loss": 1.4365, + "step": 8182 + }, + { + "epoch": 0.8631856540084388, + "grad_norm": 0.6284802556037903, + "learning_rate": 6.958403739857866e-05, + "loss": 1.4282, + "step": 8183 + }, + { + "epoch": 0.8632911392405064, + "grad_norm": 0.576997697353363, + "learning_rate": 6.947846636419061e-05, + "loss": 1.4578, + "step": 8184 + }, + { + "epoch": 0.8633966244725738, + "grad_norm": 0.5687606930732727, + "learning_rate": 6.937297158563389e-05, + "loss": 1.4532, + "step": 8185 + }, + { + "epoch": 0.8635021097046414, + "grad_norm": 0.6425700783729553, + "learning_rate": 6.926755307472968e-05, + "loss": 1.4531, + "step": 8186 + }, + { + "epoch": 0.8636075949367089, + "grad_norm": 0.6578190326690674, + "learning_rate": 6.916221084329055e-05, + "loss": 1.434, + "step": 8187 + }, + { + "epoch": 0.8637130801687763, + "grad_norm": 0.6002835035324097, + "learning_rate": 6.905694490312064e-05, + "loss": 1.4285, + "step": 8188 + }, + { + "epoch": 0.8638185654008439, + "grad_norm": 0.6027795672416687, + "learning_rate": 6.89517552660156e-05, + "loss": 1.4505, + "step": 8189 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.6268856525421143, + "learning_rate": 6.884664194376233e-05, + "loss": 1.4366, + "step": 8190 + }, + { + "epoch": 0.8640295358649789, + "grad_norm": 0.6836393475532532, + "learning_rate": 6.874160494813942e-05, + "loss": 1.4508, + "step": 8191 + }, + { + "epoch": 0.8641350210970464, + "grad_norm": 0.6101682782173157, + "learning_rate": 6.86366442909166e-05, + "loss": 1.4379, + "step": 8192 + }, + { + "epoch": 0.864240506329114, + "grad_norm": 0.5788498520851135, + "learning_rate": 6.853175998385547e-05, + "loss": 1.4357, + "step": 8193 + }, + { + "epoch": 0.8643459915611814, + "grad_norm": 0.7002974152565002, + "learning_rate": 6.842695203870872e-05, + "loss": 1.4763, + "step": 8194 + }, + { + "epoch": 0.864451476793249, + "grad_norm": 0.6513140201568604, + "learning_rate": 6.832222046722069e-05, + "loss": 1.4293, + "step": 8195 + }, + { + "epoch": 0.8645569620253165, + "grad_norm": 0.6022371053695679, + "learning_rate": 6.821756528112693e-05, + "loss": 1.4923, + "step": 8196 + }, + { + "epoch": 0.864662447257384, + "grad_norm": 0.5771981477737427, + "learning_rate": 6.811298649215472e-05, + "loss": 1.4594, + "step": 8197 + }, + { + "epoch": 0.8647679324894515, + "grad_norm": 0.6173442006111145, + "learning_rate": 6.80084841120226e-05, + "loss": 1.4553, + "step": 8198 + }, + { + "epoch": 0.8648734177215189, + "grad_norm": 0.6200125813484192, + "learning_rate": 6.790405815244044e-05, + "loss": 1.484, + "step": 8199 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.6262616515159607, + "learning_rate": 6.779970862510989e-05, + "loss": 1.4331, + "step": 8200 + }, + { + "epoch": 0.865084388185654, + "grad_norm": 0.5802428722381592, + "learning_rate": 6.769543554172361e-05, + "loss": 1.4548, + "step": 8201 + }, + { + "epoch": 0.8651898734177215, + "grad_norm": 0.6193487048149109, + "learning_rate": 6.759123891396615e-05, + "loss": 1.4884, + "step": 8202 + }, + { + "epoch": 0.865295358649789, + "grad_norm": 0.5820459127426147, + "learning_rate": 6.748711875351318e-05, + "loss": 1.4321, + "step": 8203 + }, + { + "epoch": 0.8654008438818566, + "grad_norm": 0.6178025007247925, + "learning_rate": 6.738307507203187e-05, + "loss": 1.4229, + "step": 8204 + }, + { + "epoch": 0.865506329113924, + "grad_norm": 0.577579140663147, + "learning_rate": 6.72791078811808e-05, + "loss": 1.4429, + "step": 8205 + }, + { + "epoch": 0.8656118143459915, + "grad_norm": 0.6006256341934204, + "learning_rate": 6.717521719261016e-05, + "loss": 1.4476, + "step": 8206 + }, + { + "epoch": 0.8657172995780591, + "grad_norm": 0.6302429437637329, + "learning_rate": 6.707140301796122e-05, + "loss": 1.4622, + "step": 8207 + }, + { + "epoch": 0.8658227848101265, + "grad_norm": 0.601800262928009, + "learning_rate": 6.696766536886692e-05, + "loss": 1.4752, + "step": 8208 + }, + { + "epoch": 0.8659282700421941, + "grad_norm": 0.5726154446601868, + "learning_rate": 6.686400425695171e-05, + "loss": 1.4173, + "step": 8209 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.597221851348877, + "learning_rate": 6.676041969383107e-05, + "loss": 1.4223, + "step": 8210 + }, + { + "epoch": 0.8661392405063291, + "grad_norm": 0.5890613198280334, + "learning_rate": 6.665691169111244e-05, + "loss": 1.4565, + "step": 8211 + }, + { + "epoch": 0.8662447257383966, + "grad_norm": 0.6106825470924377, + "learning_rate": 6.655348026039437e-05, + "loss": 1.4292, + "step": 8212 + }, + { + "epoch": 0.8663502109704642, + "grad_norm": 0.59922856092453, + "learning_rate": 6.645012541326678e-05, + "loss": 1.4488, + "step": 8213 + }, + { + "epoch": 0.8664556962025316, + "grad_norm": 0.5611065626144409, + "learning_rate": 6.634684716131114e-05, + "loss": 1.4494, + "step": 8214 + }, + { + "epoch": 0.8665611814345991, + "grad_norm": 0.6754498481750488, + "learning_rate": 6.62436455161003e-05, + "loss": 1.4734, + "step": 8215 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 0.5949006676673889, + "learning_rate": 6.614052048919847e-05, + "loss": 1.4363, + "step": 8216 + }, + { + "epoch": 0.8667721518987341, + "grad_norm": 0.7238005995750427, + "learning_rate": 6.603747209216135e-05, + "loss": 1.4695, + "step": 8217 + }, + { + "epoch": 0.8668776371308017, + "grad_norm": 0.5750429630279541, + "learning_rate": 6.593450033653586e-05, + "loss": 1.4337, + "step": 8218 + }, + { + "epoch": 0.8669831223628692, + "grad_norm": 0.6613823771476746, + "learning_rate": 6.583160523386086e-05, + "loss": 1.4782, + "step": 8219 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.63067227602005, + "learning_rate": 6.572878679566605e-05, + "loss": 1.473, + "step": 8220 + }, + { + "epoch": 0.8671940928270042, + "grad_norm": 0.6178733110427856, + "learning_rate": 6.562604503347277e-05, + "loss": 1.4148, + "step": 8221 + }, + { + "epoch": 0.8672995780590718, + "grad_norm": 0.6643552184104919, + "learning_rate": 6.552337995879368e-05, + "loss": 1.4319, + "step": 8222 + }, + { + "epoch": 0.8674050632911392, + "grad_norm": 0.5990493893623352, + "learning_rate": 6.542079158313305e-05, + "loss": 1.465, + "step": 8223 + }, + { + "epoch": 0.8675105485232067, + "grad_norm": 0.6192862391471863, + "learning_rate": 6.531827991798628e-05, + "loss": 1.4513, + "step": 8224 + }, + { + "epoch": 0.8676160337552743, + "grad_norm": 0.6108850240707397, + "learning_rate": 6.521584497484043e-05, + "loss": 1.4833, + "step": 8225 + }, + { + "epoch": 0.8677215189873417, + "grad_norm": 0.6763048768043518, + "learning_rate": 6.511348676517373e-05, + "loss": 1.4505, + "step": 8226 + }, + { + "epoch": 0.8678270042194093, + "grad_norm": 0.5979939699172974, + "learning_rate": 6.501120530045593e-05, + "loss": 1.4246, + "step": 8227 + }, + { + "epoch": 0.8679324894514768, + "grad_norm": 0.6184331178665161, + "learning_rate": 6.490900059214836e-05, + "loss": 1.465, + "step": 8228 + }, + { + "epoch": 0.8680379746835443, + "grad_norm": 0.6191738247871399, + "learning_rate": 6.480687265170342e-05, + "loss": 1.4404, + "step": 8229 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.7444438338279724, + "learning_rate": 6.470482149056509e-05, + "loss": 1.4699, + "step": 8230 + }, + { + "epoch": 0.8682489451476794, + "grad_norm": 0.6074113845825195, + "learning_rate": 6.460284712016868e-05, + "loss": 1.4471, + "step": 8231 + }, + { + "epoch": 0.8683544303797468, + "grad_norm": 0.5950417518615723, + "learning_rate": 6.450094955194096e-05, + "loss": 1.4212, + "step": 8232 + }, + { + "epoch": 0.8684599156118143, + "grad_norm": 0.5706289410591125, + "learning_rate": 6.439912879730009e-05, + "loss": 1.4663, + "step": 8233 + }, + { + "epoch": 0.8685654008438819, + "grad_norm": 0.6027025580406189, + "learning_rate": 6.429738486765548e-05, + "loss": 1.4544, + "step": 8234 + }, + { + "epoch": 0.8686708860759493, + "grad_norm": 0.5769691467285156, + "learning_rate": 6.419571777440814e-05, + "loss": 1.4332, + "step": 8235 + }, + { + "epoch": 0.8687763713080169, + "grad_norm": 0.5904448628425598, + "learning_rate": 6.409412752895041e-05, + "loss": 1.4479, + "step": 8236 + }, + { + "epoch": 0.8688818565400844, + "grad_norm": 0.5746470093727112, + "learning_rate": 6.399261414266571e-05, + "loss": 1.467, + "step": 8237 + }, + { + "epoch": 0.8689873417721519, + "grad_norm": 0.6200452446937561, + "learning_rate": 6.389117762692952e-05, + "loss": 1.4659, + "step": 8238 + }, + { + "epoch": 0.8690928270042194, + "grad_norm": 0.6222609877586365, + "learning_rate": 6.37898179931081e-05, + "loss": 1.3983, + "step": 8239 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.6029781699180603, + "learning_rate": 6.368853525255942e-05, + "loss": 1.4535, + "step": 8240 + }, + { + "epoch": 0.8693037974683544, + "grad_norm": 0.5794437527656555, + "learning_rate": 6.358732941663248e-05, + "loss": 1.4643, + "step": 8241 + }, + { + "epoch": 0.869409282700422, + "grad_norm": 0.6206902861595154, + "learning_rate": 6.348620049666815e-05, + "loss": 1.4347, + "step": 8242 + }, + { + "epoch": 0.8695147679324895, + "grad_norm": 0.5915148854255676, + "learning_rate": 6.338514850399826e-05, + "loss": 1.4694, + "step": 8243 + }, + { + "epoch": 0.8696202531645569, + "grad_norm": 0.6212947964668274, + "learning_rate": 6.328417344994627e-05, + "loss": 1.4514, + "step": 8244 + }, + { + "epoch": 0.8697257383966245, + "grad_norm": 0.5567852258682251, + "learning_rate": 6.318327534582688e-05, + "loss": 1.4254, + "step": 8245 + }, + { + "epoch": 0.869831223628692, + "grad_norm": 0.5864655375480652, + "learning_rate": 6.308245420294636e-05, + "loss": 1.4646, + "step": 8246 + }, + { + "epoch": 0.8699367088607595, + "grad_norm": 0.6464340090751648, + "learning_rate": 6.298171003260194e-05, + "loss": 1.4716, + "step": 8247 + }, + { + "epoch": 0.870042194092827, + "grad_norm": 0.6175544261932373, + "learning_rate": 6.288104284608284e-05, + "loss": 1.489, + "step": 8248 + }, + { + "epoch": 0.8701476793248946, + "grad_norm": 0.6238746643066406, + "learning_rate": 6.278045265466911e-05, + "loss": 1.4529, + "step": 8249 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.6115866899490356, + "learning_rate": 6.267993946963249e-05, + "loss": 1.4767, + "step": 8250 + }, + { + "epoch": 0.8703586497890295, + "grad_norm": 0.5566269755363464, + "learning_rate": 6.257950330223597e-05, + "loss": 1.476, + "step": 8251 + }, + { + "epoch": 0.8704641350210971, + "grad_norm": 0.5562306046485901, + "learning_rate": 6.247914416373387e-05, + "loss": 1.4243, + "step": 8252 + }, + { + "epoch": 0.8705696202531645, + "grad_norm": 0.6122620701789856, + "learning_rate": 6.237886206537197e-05, + "loss": 1.4399, + "step": 8253 + }, + { + "epoch": 0.8706751054852321, + "grad_norm": 0.5671271681785583, + "learning_rate": 6.227865701838733e-05, + "loss": 1.4401, + "step": 8254 + }, + { + "epoch": 0.8707805907172996, + "grad_norm": 0.5993615984916687, + "learning_rate": 6.217852903400841e-05, + "loss": 1.5049, + "step": 8255 + }, + { + "epoch": 0.8708860759493671, + "grad_norm": 0.6046157479286194, + "learning_rate": 6.207847812345524e-05, + "loss": 1.4566, + "step": 8256 + }, + { + "epoch": 0.8709915611814346, + "grad_norm": 0.6177718043327332, + "learning_rate": 6.197850429793866e-05, + "loss": 1.4557, + "step": 8257 + }, + { + "epoch": 0.8710970464135022, + "grad_norm": 0.597959041595459, + "learning_rate": 6.187860756866157e-05, + "loss": 1.4262, + "step": 8258 + }, + { + "epoch": 0.8712025316455696, + "grad_norm": 0.5798313021659851, + "learning_rate": 6.177878794681782e-05, + "loss": 1.4614, + "step": 8259 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.6213994026184082, + "learning_rate": 6.167904544359265e-05, + "loss": 1.4247, + "step": 8260 + }, + { + "epoch": 0.8714135021097047, + "grad_norm": 0.5933222770690918, + "learning_rate": 6.157938007016279e-05, + "loss": 1.4424, + "step": 8261 + }, + { + "epoch": 0.8715189873417721, + "grad_norm": 0.6611871123313904, + "learning_rate": 6.147979183769602e-05, + "loss": 1.4317, + "step": 8262 + }, + { + "epoch": 0.8716244725738397, + "grad_norm": 0.5756940245628357, + "learning_rate": 6.138028075735196e-05, + "loss": 1.4642, + "step": 8263 + }, + { + "epoch": 0.8717299578059071, + "grad_norm": 0.5624225735664368, + "learning_rate": 6.128084684028118e-05, + "loss": 1.4255, + "step": 8264 + }, + { + "epoch": 0.8718354430379747, + "grad_norm": 0.6149908900260925, + "learning_rate": 6.118149009762574e-05, + "loss": 1.4192, + "step": 8265 + }, + { + "epoch": 0.8719409282700422, + "grad_norm": 0.5805769562721252, + "learning_rate": 6.108221054051902e-05, + "loss": 1.4494, + "step": 8266 + }, + { + "epoch": 0.8720464135021097, + "grad_norm": 0.6043552160263062, + "learning_rate": 6.0983008180086005e-05, + "loss": 1.4472, + "step": 8267 + }, + { + "epoch": 0.8721518987341772, + "grad_norm": 0.6298173069953918, + "learning_rate": 6.088388302744266e-05, + "loss": 1.4271, + "step": 8268 + }, + { + "epoch": 0.8722573839662447, + "grad_norm": 0.5661120414733887, + "learning_rate": 6.078483509369642e-05, + "loss": 1.4726, + "step": 8269 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.6099891662597656, + "learning_rate": 6.068586438994617e-05, + "loss": 1.4749, + "step": 8270 + }, + { + "epoch": 0.8724683544303797, + "grad_norm": 0.5748714804649353, + "learning_rate": 6.058697092728202e-05, + "loss": 1.4889, + "step": 8271 + }, + { + "epoch": 0.8725738396624473, + "grad_norm": 0.5802196264266968, + "learning_rate": 6.048815471678554e-05, + "loss": 1.4176, + "step": 8272 + }, + { + "epoch": 0.8726793248945147, + "grad_norm": 0.5676904916763306, + "learning_rate": 6.038941576952952e-05, + "loss": 1.4386, + "step": 8273 + }, + { + "epoch": 0.8727848101265823, + "grad_norm": 0.5958864688873291, + "learning_rate": 6.029075409657822e-05, + "loss": 1.4319, + "step": 8274 + }, + { + "epoch": 0.8728902953586498, + "grad_norm": 0.585426390171051, + "learning_rate": 6.0192169708987026e-05, + "loss": 1.4143, + "step": 8275 + }, + { + "epoch": 0.8729957805907173, + "grad_norm": 0.6161760687828064, + "learning_rate": 6.009366261780286e-05, + "loss": 1.4477, + "step": 8276 + }, + { + "epoch": 0.8731012658227848, + "grad_norm": 0.6013522148132324, + "learning_rate": 5.999523283406405e-05, + "loss": 1.4204, + "step": 8277 + }, + { + "epoch": 0.8732067510548523, + "grad_norm": 0.5666058659553528, + "learning_rate": 5.9896880368800115e-05, + "loss": 1.4099, + "step": 8278 + }, + { + "epoch": 0.8733122362869198, + "grad_norm": 0.5932400822639465, + "learning_rate": 5.9798605233031904e-05, + "loss": 1.4467, + "step": 8279 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.5862503051757812, + "learning_rate": 5.970040743777161e-05, + "loss": 1.4054, + "step": 8280 + }, + { + "epoch": 0.8735232067510549, + "grad_norm": 0.5714808702468872, + "learning_rate": 5.960228699402284e-05, + "loss": 1.4345, + "step": 8281 + }, + { + "epoch": 0.8736286919831223, + "grad_norm": 0.6254993081092834, + "learning_rate": 5.9504243912780474e-05, + "loss": 1.4533, + "step": 8282 + }, + { + "epoch": 0.8737341772151899, + "grad_norm": 0.6043073534965515, + "learning_rate": 5.940627820503064e-05, + "loss": 1.4567, + "step": 8283 + }, + { + "epoch": 0.8738396624472574, + "grad_norm": 0.5909671187400818, + "learning_rate": 5.930838988175097e-05, + "loss": 1.41, + "step": 8284 + }, + { + "epoch": 0.8739451476793249, + "grad_norm": 0.6373518109321594, + "learning_rate": 5.921057895391027e-05, + "loss": 1.41, + "step": 8285 + }, + { + "epoch": 0.8740506329113924, + "grad_norm": 0.6044569611549377, + "learning_rate": 5.91128454324687e-05, + "loss": 1.4165, + "step": 8286 + }, + { + "epoch": 0.87415611814346, + "grad_norm": 0.5690683722496033, + "learning_rate": 5.901518932837799e-05, + "loss": 1.4549, + "step": 8287 + }, + { + "epoch": 0.8742616033755274, + "grad_norm": 0.6264167428016663, + "learning_rate": 5.891761065258089e-05, + "loss": 1.4591, + "step": 8288 + }, + { + "epoch": 0.8743670886075949, + "grad_norm": 0.5733492374420166, + "learning_rate": 5.8820109416011485e-05, + "loss": 1.4441, + "step": 8289 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.5815274715423584, + "learning_rate": 5.8722685629595454e-05, + "loss": 1.3997, + "step": 8290 + }, + { + "epoch": 0.8745780590717299, + "grad_norm": 0.6261992454528809, + "learning_rate": 5.862533930424949e-05, + "loss": 1.456, + "step": 8291 + }, + { + "epoch": 0.8746835443037975, + "grad_norm": 0.5859726667404175, + "learning_rate": 5.852807045088177e-05, + "loss": 1.4751, + "step": 8292 + }, + { + "epoch": 0.874789029535865, + "grad_norm": 0.6654813289642334, + "learning_rate": 5.843087908039166e-05, + "loss": 1.4247, + "step": 8293 + }, + { + "epoch": 0.8748945147679325, + "grad_norm": 0.6805822253227234, + "learning_rate": 5.833376520367012e-05, + "loss": 1.4579, + "step": 8294 + }, + { + "epoch": 0.875, + "grad_norm": 0.5471951961517334, + "learning_rate": 5.823672883159911e-05, + "loss": 1.4785, + "step": 8295 + }, + { + "epoch": 0.8751054852320675, + "grad_norm": 0.5757602453231812, + "learning_rate": 5.813976997505202e-05, + "loss": 1.4378, + "step": 8296 + }, + { + "epoch": 0.875210970464135, + "grad_norm": 0.5707383751869202, + "learning_rate": 5.804288864489366e-05, + "loss": 1.4672, + "step": 8297 + }, + { + "epoch": 0.8753164556962025, + "grad_norm": 0.6033094525337219, + "learning_rate": 5.794608485198008e-05, + "loss": 1.4529, + "step": 8298 + }, + { + "epoch": 0.8754219409282701, + "grad_norm": 0.5887058973312378, + "learning_rate": 5.784935860715862e-05, + "loss": 1.4823, + "step": 8299 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.5646834373474121, + "learning_rate": 5.7752709921267855e-05, + "loss": 1.4458, + "step": 8300 + }, + { + "epoch": 0.8756329113924051, + "grad_norm": 0.5498074889183044, + "learning_rate": 5.7656138805137785e-05, + "loss": 1.4509, + "step": 8301 + }, + { + "epoch": 0.8757383966244726, + "grad_norm": 0.6226603984832764, + "learning_rate": 5.7559645269589764e-05, + "loss": 1.4343, + "step": 8302 + }, + { + "epoch": 0.87584388185654, + "grad_norm": 0.5608768463134766, + "learning_rate": 5.746322932543621e-05, + "loss": 1.4081, + "step": 8303 + }, + { + "epoch": 0.8759493670886076, + "grad_norm": 0.6533785462379456, + "learning_rate": 5.736689098348125e-05, + "loss": 1.4407, + "step": 8304 + }, + { + "epoch": 0.8760548523206751, + "grad_norm": 0.6474064588546753, + "learning_rate": 5.727063025451973e-05, + "loss": 1.4191, + "step": 8305 + }, + { + "epoch": 0.8761603375527426, + "grad_norm": 0.5799693465232849, + "learning_rate": 5.717444714933845e-05, + "loss": 1.4557, + "step": 8306 + }, + { + "epoch": 0.8762658227848101, + "grad_norm": 0.5999577641487122, + "learning_rate": 5.707834167871512e-05, + "loss": 1.425, + "step": 8307 + }, + { + "epoch": 0.8763713080168777, + "grad_norm": 0.6146747469902039, + "learning_rate": 5.698231385341887e-05, + "loss": 1.4589, + "step": 8308 + }, + { + "epoch": 0.8764767932489451, + "grad_norm": 0.5636517405509949, + "learning_rate": 5.6886363684210016e-05, + "loss": 1.4196, + "step": 8309 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.5831782817840576, + "learning_rate": 5.6790491181840294e-05, + "loss": 1.4542, + "step": 8310 + }, + { + "epoch": 0.8766877637130802, + "grad_norm": 0.5732378363609314, + "learning_rate": 5.6694696357052685e-05, + "loss": 1.4858, + "step": 8311 + }, + { + "epoch": 0.8767932489451477, + "grad_norm": 0.6142959594726562, + "learning_rate": 5.6598979220581434e-05, + "loss": 1.4563, + "step": 8312 + }, + { + "epoch": 0.8768987341772152, + "grad_norm": 0.6124743819236755, + "learning_rate": 5.650333978315223e-05, + "loss": 1.4714, + "step": 8313 + }, + { + "epoch": 0.8770042194092827, + "grad_norm": 0.6132786273956299, + "learning_rate": 5.640777805548181e-05, + "loss": 1.4393, + "step": 8314 + }, + { + "epoch": 0.8771097046413502, + "grad_norm": 0.7038336992263794, + "learning_rate": 5.631229404827845e-05, + "loss": 1.4594, + "step": 8315 + }, + { + "epoch": 0.8772151898734177, + "grad_norm": 0.6065717935562134, + "learning_rate": 5.6216887772241596e-05, + "loss": 1.4544, + "step": 8316 + }, + { + "epoch": 0.8773206751054853, + "grad_norm": 0.6077999472618103, + "learning_rate": 5.612155923806203e-05, + "loss": 1.4401, + "step": 8317 + }, + { + "epoch": 0.8774261603375527, + "grad_norm": 0.5665875673294067, + "learning_rate": 5.60263084564217e-05, + "loss": 1.4539, + "step": 8318 + }, + { + "epoch": 0.8775316455696203, + "grad_norm": 0.5855908989906311, + "learning_rate": 5.5931135437993994e-05, + "loss": 1.4531, + "step": 8319 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.6884644627571106, + "learning_rate": 5.583604019344354e-05, + "loss": 1.4373, + "step": 8320 + }, + { + "epoch": 0.8777426160337553, + "grad_norm": 0.6253120303153992, + "learning_rate": 5.574102273342616e-05, + "loss": 1.4233, + "step": 8321 + }, + { + "epoch": 0.8778481012658228, + "grad_norm": 0.5783118605613708, + "learning_rate": 5.5646083068589065e-05, + "loss": 1.4677, + "step": 8322 + }, + { + "epoch": 0.8779535864978903, + "grad_norm": 0.5822113752365112, + "learning_rate": 5.5551221209570596e-05, + "loss": 1.4648, + "step": 8323 + }, + { + "epoch": 0.8780590717299578, + "grad_norm": 0.5762304663658142, + "learning_rate": 5.5456437167000746e-05, + "loss": 1.4733, + "step": 8324 + }, + { + "epoch": 0.8781645569620253, + "grad_norm": 0.5635523796081543, + "learning_rate": 5.536173095150043e-05, + "loss": 1.4498, + "step": 8325 + }, + { + "epoch": 0.8782700421940929, + "grad_norm": 0.6588979363441467, + "learning_rate": 5.526710257368192e-05, + "loss": 1.4567, + "step": 8326 + }, + { + "epoch": 0.8783755274261603, + "grad_norm": 0.5822212100028992, + "learning_rate": 5.517255204414889e-05, + "loss": 1.4599, + "step": 8327 + }, + { + "epoch": 0.8784810126582279, + "grad_norm": 0.6074418425559998, + "learning_rate": 5.507807937349604e-05, + "loss": 1.4344, + "step": 8328 + }, + { + "epoch": 0.8785864978902953, + "grad_norm": 0.5696887969970703, + "learning_rate": 5.498368457230965e-05, + "loss": 1.4135, + "step": 8329 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.5865123271942139, + "learning_rate": 5.4889367651167007e-05, + "loss": 1.4541, + "step": 8330 + }, + { + "epoch": 0.8787974683544304, + "grad_norm": 0.5978150963783264, + "learning_rate": 5.479512862063674e-05, + "loss": 1.476, + "step": 8331 + }, + { + "epoch": 0.8789029535864978, + "grad_norm": 0.5657370686531067, + "learning_rate": 5.470096749127906e-05, + "loss": 1.4717, + "step": 8332 + }, + { + "epoch": 0.8790084388185654, + "grad_norm": 0.5741989016532898, + "learning_rate": 5.460688427364505e-05, + "loss": 1.4333, + "step": 8333 + }, + { + "epoch": 0.8791139240506329, + "grad_norm": 0.561259388923645, + "learning_rate": 5.451287897827725e-05, + "loss": 1.4389, + "step": 8334 + }, + { + "epoch": 0.8792194092827004, + "grad_norm": 0.5729077458381653, + "learning_rate": 5.441895161570934e-05, + "loss": 1.4624, + "step": 8335 + }, + { + "epoch": 0.8793248945147679, + "grad_norm": 0.5782994031906128, + "learning_rate": 5.43251021964663e-05, + "loss": 1.4265, + "step": 8336 + }, + { + "epoch": 0.8794303797468355, + "grad_norm": 0.655602753162384, + "learning_rate": 5.423133073106457e-05, + "loss": 1.4412, + "step": 8337 + }, + { + "epoch": 0.8795358649789029, + "grad_norm": 0.6144776940345764, + "learning_rate": 5.413763723001164e-05, + "loss": 1.4383, + "step": 8338 + }, + { + "epoch": 0.8796413502109705, + "grad_norm": 0.5911141633987427, + "learning_rate": 5.4044021703806375e-05, + "loss": 1.3704, + "step": 8339 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.5668524503707886, + "learning_rate": 5.3950484162938714e-05, + "loss": 1.4173, + "step": 8340 + }, + { + "epoch": 0.8798523206751054, + "grad_norm": 0.5998548865318298, + "learning_rate": 5.385702461789019e-05, + "loss": 1.444, + "step": 8341 + }, + { + "epoch": 0.879957805907173, + "grad_norm": 0.5692631006240845, + "learning_rate": 5.376364307913334e-05, + "loss": 1.4261, + "step": 8342 + }, + { + "epoch": 0.8800632911392405, + "grad_norm": 0.5497626066207886, + "learning_rate": 5.3670339557132045e-05, + "loss": 1.4058, + "step": 8343 + }, + { + "epoch": 0.880168776371308, + "grad_norm": 0.5747050642967224, + "learning_rate": 5.3577114062341446e-05, + "loss": 1.4225, + "step": 8344 + }, + { + "epoch": 0.8802742616033755, + "grad_norm": 0.5649378895759583, + "learning_rate": 5.348396660520785e-05, + "loss": 1.4603, + "step": 8345 + }, + { + "epoch": 0.8803797468354431, + "grad_norm": 0.5915111899375916, + "learning_rate": 5.339089719616891e-05, + "loss": 1.4319, + "step": 8346 + }, + { + "epoch": 0.8804852320675105, + "grad_norm": 0.6067190170288086, + "learning_rate": 5.329790584565361e-05, + "loss": 1.481, + "step": 8347 + }, + { + "epoch": 0.880590717299578, + "grad_norm": 0.576951265335083, + "learning_rate": 5.320499256408204e-05, + "loss": 1.4426, + "step": 8348 + }, + { + "epoch": 0.8806962025316456, + "grad_norm": 0.6053596138954163, + "learning_rate": 5.311215736186536e-05, + "loss": 1.4291, + "step": 8349 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.5469794869422913, + "learning_rate": 5.3019400249406686e-05, + "loss": 1.4669, + "step": 8350 + }, + { + "epoch": 0.8809071729957806, + "grad_norm": 0.5572378635406494, + "learning_rate": 5.29267212370996e-05, + "loss": 1.4625, + "step": 8351 + }, + { + "epoch": 0.8810126582278481, + "grad_norm": 0.5705866813659668, + "learning_rate": 5.283412033532939e-05, + "loss": 1.4153, + "step": 8352 + }, + { + "epoch": 0.8811181434599156, + "grad_norm": 0.5538221001625061, + "learning_rate": 5.274159755447233e-05, + "loss": 1.3911, + "step": 8353 + }, + { + "epoch": 0.8812236286919831, + "grad_norm": 0.5663058161735535, + "learning_rate": 5.264915290489614e-05, + "loss": 1.4286, + "step": 8354 + }, + { + "epoch": 0.8813291139240507, + "grad_norm": 0.5963493585586548, + "learning_rate": 5.25567863969596e-05, + "loss": 1.416, + "step": 8355 + }, + { + "epoch": 0.8814345991561181, + "grad_norm": 0.5831964015960693, + "learning_rate": 5.246449804101294e-05, + "loss": 1.4442, + "step": 8356 + }, + { + "epoch": 0.8815400843881857, + "grad_norm": 0.5555339455604553, + "learning_rate": 5.237228784739739e-05, + "loss": 1.4701, + "step": 8357 + }, + { + "epoch": 0.8816455696202532, + "grad_norm": 0.5800992846488953, + "learning_rate": 5.228015582644585e-05, + "loss": 1.4444, + "step": 8358 + }, + { + "epoch": 0.8817510548523206, + "grad_norm": 0.5691199898719788, + "learning_rate": 5.21881019884819e-05, + "loss": 1.4751, + "step": 8359 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.6563158631324768, + "learning_rate": 5.209612634382077e-05, + "loss": 1.4238, + "step": 8360 + }, + { + "epoch": 0.8819620253164557, + "grad_norm": 0.60115647315979, + "learning_rate": 5.2004228902768815e-05, + "loss": 1.4538, + "step": 8361 + }, + { + "epoch": 0.8820675105485232, + "grad_norm": 0.6017267107963562, + "learning_rate": 5.191240967562347e-05, + "loss": 1.489, + "step": 8362 + }, + { + "epoch": 0.8821729957805907, + "grad_norm": 0.5737987160682678, + "learning_rate": 5.182066867267357e-05, + "loss": 1.4465, + "step": 8363 + }, + { + "epoch": 0.8822784810126583, + "grad_norm": 0.580034077167511, + "learning_rate": 5.172900590419915e-05, + "loss": 1.4587, + "step": 8364 + }, + { + "epoch": 0.8823839662447257, + "grad_norm": 0.5721950531005859, + "learning_rate": 5.1637421380471586e-05, + "loss": 1.4672, + "step": 8365 + }, + { + "epoch": 0.8824894514767933, + "grad_norm": 0.6513379812240601, + "learning_rate": 5.154591511175316e-05, + "loss": 1.4225, + "step": 8366 + }, + { + "epoch": 0.8825949367088608, + "grad_norm": 0.5847105383872986, + "learning_rate": 5.1454487108297924e-05, + "loss": 1.4391, + "step": 8367 + }, + { + "epoch": 0.8827004219409282, + "grad_norm": 0.6131078600883484, + "learning_rate": 5.136313738035059e-05, + "loss": 1.4505, + "step": 8368 + }, + { + "epoch": 0.8828059071729958, + "grad_norm": 0.5948898792266846, + "learning_rate": 5.127186593814748e-05, + "loss": 1.4288, + "step": 8369 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.5804411172866821, + "learning_rate": 5.118067279191599e-05, + "loss": 1.3969, + "step": 8370 + }, + { + "epoch": 0.8830168776371308, + "grad_norm": 0.5766991972923279, + "learning_rate": 5.1089557951874696e-05, + "loss": 1.4326, + "step": 8371 + }, + { + "epoch": 0.8831223628691983, + "grad_norm": 0.5835434198379517, + "learning_rate": 5.0998521428233526e-05, + "loss": 1.4655, + "step": 8372 + }, + { + "epoch": 0.8832278481012659, + "grad_norm": 0.5701676607131958, + "learning_rate": 5.0907563231193556e-05, + "loss": 1.4267, + "step": 8373 + }, + { + "epoch": 0.8833333333333333, + "grad_norm": 0.5635223388671875, + "learning_rate": 5.081668337094713e-05, + "loss": 1.4367, + "step": 8374 + }, + { + "epoch": 0.8834388185654009, + "grad_norm": 0.6017253398895264, + "learning_rate": 5.072588185767763e-05, + "loss": 1.4726, + "step": 8375 + }, + { + "epoch": 0.8835443037974684, + "grad_norm": 0.5767970681190491, + "learning_rate": 5.063515870156013e-05, + "loss": 1.4828, + "step": 8376 + }, + { + "epoch": 0.8836497890295358, + "grad_norm": 0.6185413002967834, + "learning_rate": 5.054451391276035e-05, + "loss": 1.4538, + "step": 8377 + }, + { + "epoch": 0.8837552742616034, + "grad_norm": 0.5829180479049683, + "learning_rate": 5.045394750143567e-05, + "loss": 1.4327, + "step": 8378 + }, + { + "epoch": 0.8838607594936709, + "grad_norm": 0.5633491277694702, + "learning_rate": 5.0363459477734464e-05, + "loss": 1.427, + "step": 8379 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.5683838129043579, + "learning_rate": 5.0273049851796205e-05, + "loss": 1.4726, + "step": 8380 + }, + { + "epoch": 0.8840717299578059, + "grad_norm": 0.5947743058204651, + "learning_rate": 5.0182718633751954e-05, + "loss": 1.4321, + "step": 8381 + }, + { + "epoch": 0.8841772151898735, + "grad_norm": 0.5918828845024109, + "learning_rate": 5.009246583372362e-05, + "loss": 1.4651, + "step": 8382 + }, + { + "epoch": 0.8842827004219409, + "grad_norm": 0.6822320222854614, + "learning_rate": 5.000229146182453e-05, + "loss": 1.4375, + "step": 8383 + }, + { + "epoch": 0.8843881856540085, + "grad_norm": 0.6002916097640991, + "learning_rate": 4.9912195528159174e-05, + "loss": 1.4371, + "step": 8384 + }, + { + "epoch": 0.884493670886076, + "grad_norm": 0.5755400657653809, + "learning_rate": 4.982217804282332e-05, + "loss": 1.4738, + "step": 8385 + }, + { + "epoch": 0.8845991561181434, + "grad_norm": 0.6157798767089844, + "learning_rate": 4.973223901590382e-05, + "loss": 1.4485, + "step": 8386 + }, + { + "epoch": 0.884704641350211, + "grad_norm": 0.5926619172096252, + "learning_rate": 4.9642378457478847e-05, + "loss": 1.4427, + "step": 8387 + }, + { + "epoch": 0.8848101265822785, + "grad_norm": 0.5878189206123352, + "learning_rate": 4.955259637761761e-05, + "loss": 1.4144, + "step": 8388 + }, + { + "epoch": 0.884915611814346, + "grad_norm": 0.5635631084442139, + "learning_rate": 4.946289278638064e-05, + "loss": 1.4253, + "step": 8389 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.6040284633636475, + "learning_rate": 4.9373267693819805e-05, + "loss": 1.4573, + "step": 8390 + }, + { + "epoch": 0.8851265822784811, + "grad_norm": 0.6090943813323975, + "learning_rate": 4.928372110997792e-05, + "loss": 1.4623, + "step": 8391 + }, + { + "epoch": 0.8852320675105485, + "grad_norm": 0.6032635569572449, + "learning_rate": 4.9194253044889117e-05, + "loss": 1.4217, + "step": 8392 + }, + { + "epoch": 0.885337552742616, + "grad_norm": 0.6341644525527954, + "learning_rate": 4.910486350857887e-05, + "loss": 1.4343, + "step": 8393 + }, + { + "epoch": 0.8854430379746835, + "grad_norm": 0.6422449350357056, + "learning_rate": 4.90155525110636e-05, + "loss": 1.4316, + "step": 8394 + }, + { + "epoch": 0.885548523206751, + "grad_norm": 0.6000868082046509, + "learning_rate": 4.89263200623512e-05, + "loss": 1.4351, + "step": 8395 + }, + { + "epoch": 0.8856540084388186, + "grad_norm": 0.6155270338058472, + "learning_rate": 4.883716617244044e-05, + "loss": 1.4518, + "step": 8396 + }, + { + "epoch": 0.885759493670886, + "grad_norm": 0.6200721263885498, + "learning_rate": 4.874809085132148e-05, + "loss": 1.4434, + "step": 8397 + }, + { + "epoch": 0.8858649789029536, + "grad_norm": 0.6287903189659119, + "learning_rate": 4.865909410897576e-05, + "loss": 1.4271, + "step": 8398 + }, + { + "epoch": 0.8859704641350211, + "grad_norm": 0.6616916060447693, + "learning_rate": 4.8570175955375715e-05, + "loss": 1.4586, + "step": 8399 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.5878200531005859, + "learning_rate": 4.848133640048513e-05, + "loss": 1.4403, + "step": 8400 + }, + { + "epoch": 0.8861814345991561, + "grad_norm": 0.575049102306366, + "learning_rate": 4.839257545425879e-05, + "loss": 1.4529, + "step": 8401 + }, + { + "epoch": 0.8862869198312237, + "grad_norm": 0.5751542448997498, + "learning_rate": 4.830389312664299e-05, + "loss": 1.4503, + "step": 8402 + }, + { + "epoch": 0.8863924050632911, + "grad_norm": 0.5756995677947998, + "learning_rate": 4.821528942757494e-05, + "loss": 1.4659, + "step": 8403 + }, + { + "epoch": 0.8864978902953586, + "grad_norm": 0.6035192012786865, + "learning_rate": 4.8126764366983126e-05, + "loss": 1.4679, + "step": 8404 + }, + { + "epoch": 0.8866033755274262, + "grad_norm": 0.6625435948371887, + "learning_rate": 4.803831795478719e-05, + "loss": 1.4532, + "step": 8405 + }, + { + "epoch": 0.8867088607594936, + "grad_norm": 0.6541687846183777, + "learning_rate": 4.794995020089804e-05, + "loss": 1.4754, + "step": 8406 + }, + { + "epoch": 0.8868143459915612, + "grad_norm": 0.6405147314071655, + "learning_rate": 4.7861661115217754e-05, + "loss": 1.4739, + "step": 8407 + }, + { + "epoch": 0.8869198312236287, + "grad_norm": 0.6042768359184265, + "learning_rate": 4.7773450707639414e-05, + "loss": 1.4501, + "step": 8408 + }, + { + "epoch": 0.8870253164556962, + "grad_norm": 0.7010250687599182, + "learning_rate": 4.768531898804754e-05, + "loss": 1.4902, + "step": 8409 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.6342092752456665, + "learning_rate": 4.75972659663178e-05, + "loss": 1.4324, + "step": 8410 + }, + { + "epoch": 0.8872362869198313, + "grad_norm": 0.5986419320106506, + "learning_rate": 4.75092916523169e-05, + "loss": 1.4669, + "step": 8411 + }, + { + "epoch": 0.8873417721518987, + "grad_norm": 0.590823233127594, + "learning_rate": 4.742139605590279e-05, + "loss": 1.4276, + "step": 8412 + }, + { + "epoch": 0.8874472573839662, + "grad_norm": 0.567789614200592, + "learning_rate": 4.733357918692466e-05, + "loss": 1.473, + "step": 8413 + }, + { + "epoch": 0.8875527426160338, + "grad_norm": 0.5968740582466125, + "learning_rate": 4.7245841055222726e-05, + "loss": 1.4548, + "step": 8414 + }, + { + "epoch": 0.8876582278481012, + "grad_norm": 0.568968653678894, + "learning_rate": 4.715818167062863e-05, + "loss": 1.4565, + "step": 8415 + }, + { + "epoch": 0.8877637130801688, + "grad_norm": 0.5840939879417419, + "learning_rate": 4.7070601042964925e-05, + "loss": 1.4086, + "step": 8416 + }, + { + "epoch": 0.8878691983122363, + "grad_norm": 0.5582497119903564, + "learning_rate": 4.698309918204552e-05, + "loss": 1.4245, + "step": 8417 + }, + { + "epoch": 0.8879746835443038, + "grad_norm": 0.565785825252533, + "learning_rate": 4.6895676097675225e-05, + "loss": 1.4429, + "step": 8418 + }, + { + "epoch": 0.8880801687763713, + "grad_norm": 0.6345649361610413, + "learning_rate": 4.680833179965063e-05, + "loss": 1.4349, + "step": 8419 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.5684214234352112, + "learning_rate": 4.672106629775882e-05, + "loss": 1.4548, + "step": 8420 + }, + { + "epoch": 0.8882911392405063, + "grad_norm": 0.5743030309677124, + "learning_rate": 4.663387960177848e-05, + "loss": 1.4481, + "step": 8421 + }, + { + "epoch": 0.8883966244725738, + "grad_norm": 0.6048021912574768, + "learning_rate": 4.654677172147912e-05, + "loss": 1.4455, + "step": 8422 + }, + { + "epoch": 0.8885021097046414, + "grad_norm": 0.6148910522460938, + "learning_rate": 4.645974266662176e-05, + "loss": 1.4671, + "step": 8423 + }, + { + "epoch": 0.8886075949367088, + "grad_norm": 0.5965267419815063, + "learning_rate": 4.637279244695844e-05, + "loss": 1.4126, + "step": 8424 + }, + { + "epoch": 0.8887130801687764, + "grad_norm": 0.5595756769180298, + "learning_rate": 4.628592107223229e-05, + "loss": 1.4273, + "step": 8425 + }, + { + "epoch": 0.8888185654008439, + "grad_norm": 0.5945635437965393, + "learning_rate": 4.6199128552177756e-05, + "loss": 1.4145, + "step": 8426 + }, + { + "epoch": 0.8889240506329114, + "grad_norm": 0.5832169651985168, + "learning_rate": 4.611241489652016e-05, + "loss": 1.4727, + "step": 8427 + }, + { + "epoch": 0.8890295358649789, + "grad_norm": 0.6127349734306335, + "learning_rate": 4.6025780114976545e-05, + "loss": 1.4394, + "step": 8428 + }, + { + "epoch": 0.8891350210970465, + "grad_norm": 0.5972052812576294, + "learning_rate": 4.5939224217254574e-05, + "loss": 1.4501, + "step": 8429 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.5655155181884766, + "learning_rate": 4.585274721305333e-05, + "loss": 1.433, + "step": 8430 + }, + { + "epoch": 0.8893459915611814, + "grad_norm": 0.6263806223869324, + "learning_rate": 4.576634911206296e-05, + "loss": 1.4264, + "step": 8431 + }, + { + "epoch": 0.889451476793249, + "grad_norm": 0.6009604334831238, + "learning_rate": 4.5680029923964724e-05, + "loss": 1.4165, + "step": 8432 + }, + { + "epoch": 0.8895569620253164, + "grad_norm": 0.5926315188407898, + "learning_rate": 4.559378965843122e-05, + "loss": 1.4035, + "step": 8433 + }, + { + "epoch": 0.889662447257384, + "grad_norm": 0.5960385203361511, + "learning_rate": 4.5507628325126144e-05, + "loss": 1.4134, + "step": 8434 + }, + { + "epoch": 0.8897679324894515, + "grad_norm": 0.5693077445030212, + "learning_rate": 4.542154593370401e-05, + "loss": 1.4644, + "step": 8435 + }, + { + "epoch": 0.889873417721519, + "grad_norm": 0.6120367646217346, + "learning_rate": 4.533554249381119e-05, + "loss": 1.4673, + "step": 8436 + }, + { + "epoch": 0.8899789029535865, + "grad_norm": 0.5989223122596741, + "learning_rate": 4.524961801508456e-05, + "loss": 1.425, + "step": 8437 + }, + { + "epoch": 0.890084388185654, + "grad_norm": 0.5578175783157349, + "learning_rate": 4.5163772507152425e-05, + "loss": 1.4555, + "step": 8438 + }, + { + "epoch": 0.8901898734177215, + "grad_norm": 0.5868714451789856, + "learning_rate": 4.507800597963424e-05, + "loss": 1.4485, + "step": 8439 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.5783162713050842, + "learning_rate": 4.4992318442140575e-05, + "loss": 1.4632, + "step": 8440 + }, + { + "epoch": 0.8904008438818566, + "grad_norm": 0.6081377267837524, + "learning_rate": 4.490670990427309e-05, + "loss": 1.4406, + "step": 8441 + }, + { + "epoch": 0.890506329113924, + "grad_norm": 0.5820649266242981, + "learning_rate": 4.4821180375624684e-05, + "loss": 1.433, + "step": 8442 + }, + { + "epoch": 0.8906118143459916, + "grad_norm": 0.5811196565628052, + "learning_rate": 4.473572986577928e-05, + "loss": 1.4479, + "step": 8443 + }, + { + "epoch": 0.8907172995780591, + "grad_norm": 0.5701732039451599, + "learning_rate": 4.4650358384312056e-05, + "loss": 1.4312, + "step": 8444 + }, + { + "epoch": 0.8908227848101266, + "grad_norm": 0.5905373096466064, + "learning_rate": 4.4565065940789515e-05, + "loss": 1.4171, + "step": 8445 + }, + { + "epoch": 0.8909282700421941, + "grad_norm": 0.5893309712409973, + "learning_rate": 4.447985254476894e-05, + "loss": 1.4507, + "step": 8446 + }, + { + "epoch": 0.8910337552742617, + "grad_norm": 0.6494528651237488, + "learning_rate": 4.439471820579885e-05, + "loss": 1.4764, + "step": 8447 + }, + { + "epoch": 0.8911392405063291, + "grad_norm": 0.6070489287376404, + "learning_rate": 4.430966293341912e-05, + "loss": 1.4635, + "step": 8448 + }, + { + "epoch": 0.8912447257383966, + "grad_norm": 0.6346426606178284, + "learning_rate": 4.422468673716054e-05, + "loss": 1.4673, + "step": 8449 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.606345534324646, + "learning_rate": 4.413978962654508e-05, + "loss": 1.4372, + "step": 8450 + }, + { + "epoch": 0.8914556962025316, + "grad_norm": 0.6018720269203186, + "learning_rate": 4.405497161108596e-05, + "loss": 1.4319, + "step": 8451 + }, + { + "epoch": 0.8915611814345992, + "grad_norm": 0.5813198685646057, + "learning_rate": 4.397023270028749e-05, + "loss": 1.4517, + "step": 8452 + }, + { + "epoch": 0.8916666666666667, + "grad_norm": 0.5728744864463806, + "learning_rate": 4.388557290364484e-05, + "loss": 1.4354, + "step": 8453 + }, + { + "epoch": 0.8917721518987342, + "grad_norm": 0.5668070912361145, + "learning_rate": 4.3800992230644904e-05, + "loss": 1.4603, + "step": 8454 + }, + { + "epoch": 0.8918776371308017, + "grad_norm": 0.6512918472290039, + "learning_rate": 4.3716490690765194e-05, + "loss": 1.4061, + "step": 8455 + }, + { + "epoch": 0.8919831223628693, + "grad_norm": 0.5761946439743042, + "learning_rate": 4.3632068293474545e-05, + "loss": 1.4584, + "step": 8456 + }, + { + "epoch": 0.8920886075949367, + "grad_norm": 0.6381387114524841, + "learning_rate": 4.35477250482329e-05, + "loss": 1.4256, + "step": 8457 + }, + { + "epoch": 0.8921940928270042, + "grad_norm": 0.5778988003730774, + "learning_rate": 4.346346096449136e-05, + "loss": 1.4422, + "step": 8458 + }, + { + "epoch": 0.8922995780590718, + "grad_norm": 0.566650390625, + "learning_rate": 4.337927605169212e-05, + "loss": 1.4493, + "step": 8459 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.5686798095703125, + "learning_rate": 4.3295170319268554e-05, + "loss": 1.4647, + "step": 8460 + }, + { + "epoch": 0.8925105485232068, + "grad_norm": 0.6231056451797485, + "learning_rate": 4.321114377664495e-05, + "loss": 1.4498, + "step": 8461 + }, + { + "epoch": 0.8926160337552742, + "grad_norm": 0.5554863810539246, + "learning_rate": 4.3127196433237205e-05, + "loss": 1.4391, + "step": 8462 + }, + { + "epoch": 0.8927215189873418, + "grad_norm": 0.6026464104652405, + "learning_rate": 4.304332829845187e-05, + "loss": 1.439, + "step": 8463 + }, + { + "epoch": 0.8928270042194093, + "grad_norm": 0.5877730250358582, + "learning_rate": 4.2959539381686843e-05, + "loss": 1.4353, + "step": 8464 + }, + { + "epoch": 0.8929324894514767, + "grad_norm": 0.5753616690635681, + "learning_rate": 4.287582969233103e-05, + "loss": 1.4476, + "step": 8465 + }, + { + "epoch": 0.8930379746835443, + "grad_norm": 0.5770398378372192, + "learning_rate": 4.279219923976452e-05, + "loss": 1.428, + "step": 8466 + }, + { + "epoch": 0.8931434599156118, + "grad_norm": 0.5585523843765259, + "learning_rate": 4.2708648033358554e-05, + "loss": 1.4379, + "step": 8467 + }, + { + "epoch": 0.8932489451476793, + "grad_norm": 0.5703950524330139, + "learning_rate": 4.26251760824754e-05, + "loss": 1.4229, + "step": 8468 + }, + { + "epoch": 0.8933544303797468, + "grad_norm": 0.5818519592285156, + "learning_rate": 4.2541783396468584e-05, + "loss": 1.4544, + "step": 8469 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.5742015242576599, + "learning_rate": 4.245846998468261e-05, + "loss": 1.4571, + "step": 8470 + }, + { + "epoch": 0.8935654008438818, + "grad_norm": 0.6776243448257446, + "learning_rate": 4.2375235856453197e-05, + "loss": 1.4363, + "step": 8471 + }, + { + "epoch": 0.8936708860759494, + "grad_norm": 0.5775099992752075, + "learning_rate": 4.229208102110721e-05, + "loss": 1.432, + "step": 8472 + }, + { + "epoch": 0.8937763713080169, + "grad_norm": 0.5623939037322998, + "learning_rate": 4.220900548796244e-05, + "loss": 1.4649, + "step": 8473 + }, + { + "epoch": 0.8938818565400843, + "grad_norm": 0.5879709720611572, + "learning_rate": 4.212600926632804e-05, + "loss": 1.4261, + "step": 8474 + }, + { + "epoch": 0.8939873417721519, + "grad_norm": 0.6030797958374023, + "learning_rate": 4.204309236550405e-05, + "loss": 1.4556, + "step": 8475 + }, + { + "epoch": 0.8940928270042194, + "grad_norm": 0.5683237910270691, + "learning_rate": 4.1960254794781714e-05, + "loss": 1.3935, + "step": 8476 + }, + { + "epoch": 0.8941983122362869, + "grad_norm": 0.6121626496315002, + "learning_rate": 4.1877496563443446e-05, + "loss": 1.4469, + "step": 8477 + }, + { + "epoch": 0.8943037974683544, + "grad_norm": 0.5569887757301331, + "learning_rate": 4.179481768076274e-05, + "loss": 1.4429, + "step": 8478 + }, + { + "epoch": 0.894409282700422, + "grad_norm": 0.648878276348114, + "learning_rate": 4.1712218156004014e-05, + "loss": 1.4235, + "step": 8479 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.5679932832717896, + "learning_rate": 4.16296979984232e-05, + "loss": 1.4427, + "step": 8480 + }, + { + "epoch": 0.894620253164557, + "grad_norm": 0.5893374085426331, + "learning_rate": 4.154725721726699e-05, + "loss": 1.4632, + "step": 8481 + }, + { + "epoch": 0.8947257383966245, + "grad_norm": 0.5812003016471863, + "learning_rate": 4.1464895821773235e-05, + "loss": 1.4665, + "step": 8482 + }, + { + "epoch": 0.8948312236286919, + "grad_norm": 0.6031120419502258, + "learning_rate": 4.138261382117098e-05, + "loss": 1.4417, + "step": 8483 + }, + { + "epoch": 0.8949367088607595, + "grad_norm": 0.6084063053131104, + "learning_rate": 4.130041122468042e-05, + "loss": 1.4346, + "step": 8484 + }, + { + "epoch": 0.895042194092827, + "grad_norm": 0.5775222778320312, + "learning_rate": 4.1218288041512534e-05, + "loss": 1.4368, + "step": 8485 + }, + { + "epoch": 0.8951476793248945, + "grad_norm": 0.5664339661598206, + "learning_rate": 4.113624428086987e-05, + "loss": 1.4469, + "step": 8486 + }, + { + "epoch": 0.895253164556962, + "grad_norm": 0.587427020072937, + "learning_rate": 4.105427995194566e-05, + "loss": 1.4745, + "step": 8487 + }, + { + "epoch": 0.8953586497890296, + "grad_norm": 0.5587526559829712, + "learning_rate": 4.0972395063924554e-05, + "loss": 1.4199, + "step": 8488 + }, + { + "epoch": 0.895464135021097, + "grad_norm": 0.5882428288459778, + "learning_rate": 4.089058962598213e-05, + "loss": 1.4359, + "step": 8489 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.5743575692176819, + "learning_rate": 4.080886364728506e-05, + "loss": 1.4155, + "step": 8490 + }, + { + "epoch": 0.8956751054852321, + "grad_norm": 0.5702531933784485, + "learning_rate": 4.072721713699118e-05, + "loss": 1.4219, + "step": 8491 + }, + { + "epoch": 0.8957805907172995, + "grad_norm": 0.6693195104598999, + "learning_rate": 4.064565010424942e-05, + "loss": 1.4097, + "step": 8492 + }, + { + "epoch": 0.8958860759493671, + "grad_norm": 0.6109238266944885, + "learning_rate": 4.056416255819964e-05, + "loss": 1.4002, + "step": 8493 + }, + { + "epoch": 0.8959915611814346, + "grad_norm": 0.6257588267326355, + "learning_rate": 4.048275450797312e-05, + "loss": 1.4132, + "step": 8494 + }, + { + "epoch": 0.8960970464135021, + "grad_norm": 0.5877582430839539, + "learning_rate": 4.0401425962691804e-05, + "loss": 1.4988, + "step": 8495 + }, + { + "epoch": 0.8962025316455696, + "grad_norm": 0.6077717542648315, + "learning_rate": 4.032017693146908e-05, + "loss": 1.4447, + "step": 8496 + }, + { + "epoch": 0.8963080168776372, + "grad_norm": 0.588555634021759, + "learning_rate": 4.023900742340941e-05, + "loss": 1.4314, + "step": 8497 + }, + { + "epoch": 0.8964135021097046, + "grad_norm": 0.549619197845459, + "learning_rate": 4.015791744760811e-05, + "loss": 1.4216, + "step": 8498 + }, + { + "epoch": 0.8965189873417722, + "grad_norm": 0.6039576530456543, + "learning_rate": 4.0076907013151726e-05, + "loss": 1.4394, + "step": 8499 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.5728418231010437, + "learning_rate": 3.999597612911793e-05, + "loss": 1.4437, + "step": 8500 + }, + { + "epoch": 0.8967299578059071, + "grad_norm": 0.5675164461135864, + "learning_rate": 3.991512480457546e-05, + "loss": 1.452, + "step": 8501 + }, + { + "epoch": 0.8968354430379747, + "grad_norm": 0.6143014430999756, + "learning_rate": 3.9834353048583984e-05, + "loss": 1.4614, + "step": 8502 + }, + { + "epoch": 0.8969409282700422, + "grad_norm": 0.6219041347503662, + "learning_rate": 3.9753660870194524e-05, + "loss": 1.4253, + "step": 8503 + }, + { + "epoch": 0.8970464135021097, + "grad_norm": 0.5675039291381836, + "learning_rate": 3.967304827844892e-05, + "loss": 1.4593, + "step": 8504 + }, + { + "epoch": 0.8971518987341772, + "grad_norm": 0.6263207197189331, + "learning_rate": 3.95925152823802e-05, + "loss": 1.4211, + "step": 8505 + }, + { + "epoch": 0.8972573839662448, + "grad_norm": 0.5937134623527527, + "learning_rate": 3.9512061891012643e-05, + "loss": 1.4163, + "step": 8506 + }, + { + "epoch": 0.8973628691983122, + "grad_norm": 0.5610312223434448, + "learning_rate": 3.943168811336137e-05, + "loss": 1.4466, + "step": 8507 + }, + { + "epoch": 0.8974683544303798, + "grad_norm": 0.5889670252799988, + "learning_rate": 3.93513939584326e-05, + "loss": 1.472, + "step": 8508 + }, + { + "epoch": 0.8975738396624473, + "grad_norm": 0.5542000532150269, + "learning_rate": 3.927117943522379e-05, + "loss": 1.4591, + "step": 8509 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.5694115161895752, + "learning_rate": 3.9191044552723345e-05, + "loss": 1.4267, + "step": 8510 + }, + { + "epoch": 0.8977848101265823, + "grad_norm": 0.591485857963562, + "learning_rate": 3.911098931991075e-05, + "loss": 1.4242, + "step": 8511 + }, + { + "epoch": 0.8978902953586498, + "grad_norm": 0.575225293636322, + "learning_rate": 3.9031013745756655e-05, + "loss": 1.4387, + "step": 8512 + }, + { + "epoch": 0.8979957805907173, + "grad_norm": 0.5803104639053345, + "learning_rate": 3.895111783922256e-05, + "loss": 1.4217, + "step": 8513 + }, + { + "epoch": 0.8981012658227848, + "grad_norm": 0.550459086894989, + "learning_rate": 3.887130160926139e-05, + "loss": 1.4038, + "step": 8514 + }, + { + "epoch": 0.8982067510548524, + "grad_norm": 0.5929996371269226, + "learning_rate": 3.879156506481699e-05, + "loss": 1.4435, + "step": 8515 + }, + { + "epoch": 0.8983122362869198, + "grad_norm": 0.5867745876312256, + "learning_rate": 3.8711908214824035e-05, + "loss": 1.4708, + "step": 8516 + }, + { + "epoch": 0.8984177215189874, + "grad_norm": 0.6219155192375183, + "learning_rate": 3.863233106820857e-05, + "loss": 1.4558, + "step": 8517 + }, + { + "epoch": 0.8985232067510549, + "grad_norm": 0.5622031688690186, + "learning_rate": 3.855283363388762e-05, + "loss": 1.4444, + "step": 8518 + }, + { + "epoch": 0.8986286919831223, + "grad_norm": 0.5682567954063416, + "learning_rate": 3.8473415920769304e-05, + "loss": 1.4111, + "step": 8519 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.5847326517105103, + "learning_rate": 3.839407793775268e-05, + "loss": 1.4754, + "step": 8520 + }, + { + "epoch": 0.8988396624472574, + "grad_norm": 0.5672892332077026, + "learning_rate": 3.8314819693727966e-05, + "loss": 1.4515, + "step": 8521 + }, + { + "epoch": 0.8989451476793249, + "grad_norm": 0.5682610273361206, + "learning_rate": 3.823564119757647e-05, + "loss": 1.451, + "step": 8522 + }, + { + "epoch": 0.8990506329113924, + "grad_norm": 0.6217142343521118, + "learning_rate": 3.81565424581706e-05, + "loss": 1.4567, + "step": 8523 + }, + { + "epoch": 0.89915611814346, + "grad_norm": 0.59307861328125, + "learning_rate": 3.8077523484373764e-05, + "loss": 1.4306, + "step": 8524 + }, + { + "epoch": 0.8992616033755274, + "grad_norm": 0.5928720235824585, + "learning_rate": 3.79985842850403e-05, + "loss": 1.4702, + "step": 8525 + }, + { + "epoch": 0.899367088607595, + "grad_norm": 0.6239176988601685, + "learning_rate": 3.791972486901596e-05, + "loss": 1.4855, + "step": 8526 + }, + { + "epoch": 0.8994725738396624, + "grad_norm": 0.5406942367553711, + "learning_rate": 3.784094524513709e-05, + "loss": 1.443, + "step": 8527 + }, + { + "epoch": 0.8995780590717299, + "grad_norm": 0.6053119897842407, + "learning_rate": 3.7762245422231476e-05, + "loss": 1.4521, + "step": 8528 + }, + { + "epoch": 0.8996835443037975, + "grad_norm": 0.5721937417984009, + "learning_rate": 3.768362540911788e-05, + "loss": 1.4279, + "step": 8529 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.6010051965713501, + "learning_rate": 3.760508521460584e-05, + "loss": 1.4672, + "step": 8530 + }, + { + "epoch": 0.8998945147679325, + "grad_norm": 0.6227510571479797, + "learning_rate": 3.7526624847496335e-05, + "loss": 1.4443, + "step": 8531 + }, + { + "epoch": 0.9, + "grad_norm": 0.6245517134666443, + "learning_rate": 3.744824431658131e-05, + "loss": 1.439, + "step": 8532 + }, + { + "epoch": 0.9001054852320675, + "grad_norm": 0.58124178647995, + "learning_rate": 3.736994363064358e-05, + "loss": 1.4201, + "step": 8533 + }, + { + "epoch": 0.900210970464135, + "grad_norm": 0.5879043340682983, + "learning_rate": 3.7291722798457215e-05, + "loss": 1.4745, + "step": 8534 + }, + { + "epoch": 0.9003164556962026, + "grad_norm": 0.5778028964996338, + "learning_rate": 3.72135818287872e-05, + "loss": 1.4348, + "step": 8535 + }, + { + "epoch": 0.90042194092827, + "grad_norm": 0.5681250691413879, + "learning_rate": 3.713552073038953e-05, + "loss": 1.4676, + "step": 8536 + }, + { + "epoch": 0.9005274261603375, + "grad_norm": 0.5808393955230713, + "learning_rate": 3.705753951201146e-05, + "loss": 1.4954, + "step": 8537 + }, + { + "epoch": 0.9006329113924051, + "grad_norm": 0.5913647413253784, + "learning_rate": 3.697963818239117e-05, + "loss": 1.4424, + "step": 8538 + }, + { + "epoch": 0.9007383966244725, + "grad_norm": 0.5974302887916565, + "learning_rate": 3.690181675025775e-05, + "loss": 1.4262, + "step": 8539 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.6241319179534912, + "learning_rate": 3.682407522433173e-05, + "loss": 1.4673, + "step": 8540 + }, + { + "epoch": 0.9009493670886076, + "grad_norm": 0.5596635937690735, + "learning_rate": 3.674641361332423e-05, + "loss": 1.4485, + "step": 8541 + }, + { + "epoch": 0.9010548523206751, + "grad_norm": 0.6062620878219604, + "learning_rate": 3.66688319259377e-05, + "loss": 1.4484, + "step": 8542 + }, + { + "epoch": 0.9011603375527426, + "grad_norm": 0.577261745929718, + "learning_rate": 3.6591330170865524e-05, + "loss": 1.425, + "step": 8543 + }, + { + "epoch": 0.9012658227848102, + "grad_norm": 0.5950941443443298, + "learning_rate": 3.6513908356792244e-05, + "loss": 1.4582, + "step": 8544 + }, + { + "epoch": 0.9013713080168776, + "grad_norm": 0.5959452986717224, + "learning_rate": 3.643656649239327e-05, + "loss": 1.4073, + "step": 8545 + }, + { + "epoch": 0.9014767932489451, + "grad_norm": 0.5800740718841553, + "learning_rate": 3.635930458633516e-05, + "loss": 1.4463, + "step": 8546 + }, + { + "epoch": 0.9015822784810127, + "grad_norm": 0.5633642077445984, + "learning_rate": 3.628212264727548e-05, + "loss": 1.433, + "step": 8547 + }, + { + "epoch": 0.9016877637130801, + "grad_norm": 0.5563937425613403, + "learning_rate": 3.6205020683862836e-05, + "loss": 1.4667, + "step": 8548 + }, + { + "epoch": 0.9017932489451477, + "grad_norm": 0.5950255393981934, + "learning_rate": 3.612799870473696e-05, + "loss": 1.4108, + "step": 8549 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.6054626703262329, + "learning_rate": 3.605105671852854e-05, + "loss": 1.4624, + "step": 8550 + }, + { + "epoch": 0.9020042194092827, + "grad_norm": 0.5748695731163025, + "learning_rate": 3.597419473385935e-05, + "loss": 1.4194, + "step": 8551 + }, + { + "epoch": 0.9021097046413502, + "grad_norm": 0.5732536911964417, + "learning_rate": 3.5897412759342e-05, + "loss": 1.4222, + "step": 8552 + }, + { + "epoch": 0.9022151898734178, + "grad_norm": 0.6135848760604858, + "learning_rate": 3.582071080358043e-05, + "loss": 1.442, + "step": 8553 + }, + { + "epoch": 0.9023206751054852, + "grad_norm": 0.575595498085022, + "learning_rate": 3.5744088875169446e-05, + "loss": 1.4625, + "step": 8554 + }, + { + "epoch": 0.9024261603375527, + "grad_norm": 0.5790221095085144, + "learning_rate": 3.566754698269492e-05, + "loss": 1.4479, + "step": 8555 + }, + { + "epoch": 0.9025316455696203, + "grad_norm": 0.5932525396347046, + "learning_rate": 3.5591085134733666e-05, + "loss": 1.4536, + "step": 8556 + }, + { + "epoch": 0.9026371308016877, + "grad_norm": 0.5638000965118408, + "learning_rate": 3.5514703339853656e-05, + "loss": 1.3823, + "step": 8557 + }, + { + "epoch": 0.9027426160337553, + "grad_norm": 0.5874882340431213, + "learning_rate": 3.543840160661396e-05, + "loss": 1.4157, + "step": 8558 + }, + { + "epoch": 0.9028481012658228, + "grad_norm": 0.5850016474723816, + "learning_rate": 3.5362179943564496e-05, + "loss": 1.4434, + "step": 8559 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.5790449380874634, + "learning_rate": 3.528603835924626e-05, + "loss": 1.4225, + "step": 8560 + }, + { + "epoch": 0.9030590717299578, + "grad_norm": 0.5829458832740784, + "learning_rate": 3.520997686219127e-05, + "loss": 1.4463, + "step": 8561 + }, + { + "epoch": 0.9031645569620254, + "grad_norm": 0.5877697467803955, + "learning_rate": 3.513399546092269e-05, + "loss": 1.4766, + "step": 8562 + }, + { + "epoch": 0.9032700421940928, + "grad_norm": 0.5648425817489624, + "learning_rate": 3.5058094163954556e-05, + "loss": 1.4383, + "step": 8563 + }, + { + "epoch": 0.9033755274261603, + "grad_norm": 0.6737684607505798, + "learning_rate": 3.498227297979198e-05, + "loss": 1.4221, + "step": 8564 + }, + { + "epoch": 0.9034810126582279, + "grad_norm": 0.6250383853912354, + "learning_rate": 3.4906531916931075e-05, + "loss": 1.4468, + "step": 8565 + }, + { + "epoch": 0.9035864978902953, + "grad_norm": 0.5729038715362549, + "learning_rate": 3.483087098385906e-05, + "loss": 1.4808, + "step": 8566 + }, + { + "epoch": 0.9036919831223629, + "grad_norm": 0.6009966135025024, + "learning_rate": 3.475529018905416e-05, + "loss": 1.452, + "step": 8567 + }, + { + "epoch": 0.9037974683544304, + "grad_norm": 0.5594527125358582, + "learning_rate": 3.467978954098549e-05, + "loss": 1.4234, + "step": 8568 + }, + { + "epoch": 0.9039029535864979, + "grad_norm": 0.5693352222442627, + "learning_rate": 3.46043690481134e-05, + "loss": 1.4497, + "step": 8569 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.6618000268936157, + "learning_rate": 3.4529028718888935e-05, + "loss": 1.467, + "step": 8570 + }, + { + "epoch": 0.904113924050633, + "grad_norm": 0.6230075359344482, + "learning_rate": 3.4453768561754525e-05, + "loss": 1.4433, + "step": 8571 + }, + { + "epoch": 0.9042194092827004, + "grad_norm": 0.5503385066986084, + "learning_rate": 3.437858858514334e-05, + "loss": 1.4131, + "step": 8572 + }, + { + "epoch": 0.9043248945147679, + "grad_norm": 0.5542915463447571, + "learning_rate": 3.43034887974798e-05, + "loss": 1.4447, + "step": 8573 + }, + { + "epoch": 0.9044303797468355, + "grad_norm": 0.5785207748413086, + "learning_rate": 3.422846920717893e-05, + "loss": 1.4571, + "step": 8574 + }, + { + "epoch": 0.9045358649789029, + "grad_norm": 0.6178798079490662, + "learning_rate": 3.4153529822647414e-05, + "loss": 1.4559, + "step": 8575 + }, + { + "epoch": 0.9046413502109705, + "grad_norm": 0.6574684381484985, + "learning_rate": 3.4078670652282374e-05, + "loss": 1.4233, + "step": 8576 + }, + { + "epoch": 0.904746835443038, + "grad_norm": 0.6013036370277405, + "learning_rate": 3.400389170447218e-05, + "loss": 1.4659, + "step": 8577 + }, + { + "epoch": 0.9048523206751055, + "grad_norm": 0.5858415961265564, + "learning_rate": 3.392919298759623e-05, + "loss": 1.4229, + "step": 8578 + }, + { + "epoch": 0.904957805907173, + "grad_norm": 0.5927820801734924, + "learning_rate": 3.38545745100248e-05, + "loss": 1.4709, + "step": 8579 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.6097804307937622, + "learning_rate": 3.378003628011938e-05, + "loss": 1.4487, + "step": 8580 + }, + { + "epoch": 0.905168776371308, + "grad_norm": 0.6010668277740479, + "learning_rate": 3.3705578306232224e-05, + "loss": 1.4328, + "step": 8581 + }, + { + "epoch": 0.9052742616033755, + "grad_norm": 0.6031314730644226, + "learning_rate": 3.363120059670688e-05, + "loss": 1.4614, + "step": 8582 + }, + { + "epoch": 0.9053797468354431, + "grad_norm": 0.637527346611023, + "learning_rate": 3.355690315987761e-05, + "loss": 1.445, + "step": 8583 + }, + { + "epoch": 0.9054852320675105, + "grad_norm": 0.5858590602874756, + "learning_rate": 3.3482686004069755e-05, + "loss": 1.4313, + "step": 8584 + }, + { + "epoch": 0.9055907172995781, + "grad_norm": 0.5896291732788086, + "learning_rate": 3.340854913759983e-05, + "loss": 1.4433, + "step": 8585 + }, + { + "epoch": 0.9056962025316456, + "grad_norm": 0.5825329422950745, + "learning_rate": 3.3334492568775355e-05, + "loss": 1.4279, + "step": 8586 + }, + { + "epoch": 0.9058016877637131, + "grad_norm": 0.608727753162384, + "learning_rate": 3.3260516305894526e-05, + "loss": 1.4533, + "step": 8587 + }, + { + "epoch": 0.9059071729957806, + "grad_norm": 0.6565045118331909, + "learning_rate": 3.318662035724679e-05, + "loss": 1.4218, + "step": 8588 + }, + { + "epoch": 0.9060126582278482, + "grad_norm": 0.6283084750175476, + "learning_rate": 3.31128047311127e-05, + "loss": 1.4653, + "step": 8589 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.6263375878334045, + "learning_rate": 3.303906943576346e-05, + "loss": 1.4867, + "step": 8590 + }, + { + "epoch": 0.9062236286919831, + "grad_norm": 0.5693736672401428, + "learning_rate": 3.296541447946164e-05, + "loss": 1.4371, + "step": 8591 + }, + { + "epoch": 0.9063291139240506, + "grad_norm": 0.5664024353027344, + "learning_rate": 3.2891839870460546e-05, + "loss": 1.4428, + "step": 8592 + }, + { + "epoch": 0.9064345991561181, + "grad_norm": 0.5754429697990417, + "learning_rate": 3.281834561700467e-05, + "loss": 1.4539, + "step": 8593 + }, + { + "epoch": 0.9065400843881857, + "grad_norm": 0.5965851545333862, + "learning_rate": 3.274493172732926e-05, + "loss": 1.4083, + "step": 8594 + }, + { + "epoch": 0.9066455696202531, + "grad_norm": 0.5960004925727844, + "learning_rate": 3.26715982096609e-05, + "loss": 1.4986, + "step": 8595 + }, + { + "epoch": 0.9067510548523207, + "grad_norm": 0.7153618335723877, + "learning_rate": 3.259834507221684e-05, + "loss": 1.3795, + "step": 8596 + }, + { + "epoch": 0.9068565400843882, + "grad_norm": 0.607225239276886, + "learning_rate": 3.2525172323205535e-05, + "loss": 1.4069, + "step": 8597 + }, + { + "epoch": 0.9069620253164556, + "grad_norm": 0.532216489315033, + "learning_rate": 3.2452079970826335e-05, + "loss": 1.4615, + "step": 8598 + }, + { + "epoch": 0.9070675105485232, + "grad_norm": 0.5659663081169128, + "learning_rate": 3.237906802326951e-05, + "loss": 1.4273, + "step": 8599 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.601791262626648, + "learning_rate": 3.230613648871661e-05, + "loss": 1.4669, + "step": 8600 + }, + { + "epoch": 0.9072784810126582, + "grad_norm": 0.5940601825714111, + "learning_rate": 3.223328537533976e-05, + "loss": 1.4688, + "step": 8601 + }, + { + "epoch": 0.9073839662447257, + "grad_norm": 0.5622960329055786, + "learning_rate": 3.216051469130243e-05, + "loss": 1.4426, + "step": 8602 + }, + { + "epoch": 0.9074894514767933, + "grad_norm": 0.6254304647445679, + "learning_rate": 3.208782444475894e-05, + "loss": 1.4211, + "step": 8603 + }, + { + "epoch": 0.9075949367088607, + "grad_norm": 0.5900026559829712, + "learning_rate": 3.201521464385443e-05, + "loss": 1.4606, + "step": 8604 + }, + { + "epoch": 0.9077004219409283, + "grad_norm": 0.5649974942207336, + "learning_rate": 3.194268529672539e-05, + "loss": 1.4291, + "step": 8605 + }, + { + "epoch": 0.9078059071729958, + "grad_norm": 0.6173660159111023, + "learning_rate": 3.187023641149908e-05, + "loss": 1.4349, + "step": 8606 + }, + { + "epoch": 0.9079113924050632, + "grad_norm": 0.6725533604621887, + "learning_rate": 3.1797867996293663e-05, + "loss": 1.4288, + "step": 8607 + }, + { + "epoch": 0.9080168776371308, + "grad_norm": 0.6489090323448181, + "learning_rate": 3.172558005921841e-05, + "loss": 1.4287, + "step": 8608 + }, + { + "epoch": 0.9081223628691983, + "grad_norm": 0.6048546433448792, + "learning_rate": 3.165337260837351e-05, + "loss": 1.4523, + "step": 8609 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.576407253742218, + "learning_rate": 3.158124565185022e-05, + "loss": 1.4356, + "step": 8610 + }, + { + "epoch": 0.9083333333333333, + "grad_norm": 0.5799739956855774, + "learning_rate": 3.1509199197730765e-05, + "loss": 1.4507, + "step": 8611 + }, + { + "epoch": 0.9084388185654009, + "grad_norm": 0.5706152319908142, + "learning_rate": 3.143723325408826e-05, + "loss": 1.4442, + "step": 8612 + }, + { + "epoch": 0.9085443037974683, + "grad_norm": 0.5872123837471008, + "learning_rate": 3.136534782898667e-05, + "loss": 1.4224, + "step": 8613 + }, + { + "epoch": 0.9086497890295359, + "grad_norm": 0.6442923545837402, + "learning_rate": 3.129354293048148e-05, + "loss": 1.469, + "step": 8614 + }, + { + "epoch": 0.9087552742616034, + "grad_norm": 0.6551254987716675, + "learning_rate": 3.122181856661857e-05, + "loss": 1.3955, + "step": 8615 + }, + { + "epoch": 0.9088607594936708, + "grad_norm": 0.6627232432365417, + "learning_rate": 3.1150174745435026e-05, + "loss": 1.4677, + "step": 8616 + }, + { + "epoch": 0.9089662447257384, + "grad_norm": 0.5678005218505859, + "learning_rate": 3.107861147495891e-05, + "loss": 1.4134, + "step": 8617 + }, + { + "epoch": 0.9090717299578059, + "grad_norm": 0.5705991983413696, + "learning_rate": 3.100712876320924e-05, + "loss": 1.4674, + "step": 8618 + }, + { + "epoch": 0.9091772151898734, + "grad_norm": 0.5815088152885437, + "learning_rate": 3.093572661819602e-05, + "loss": 1.4012, + "step": 8619 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.5725952982902527, + "learning_rate": 3.086440504792026e-05, + "loss": 1.4593, + "step": 8620 + }, + { + "epoch": 0.9093881856540085, + "grad_norm": 0.5708469152450562, + "learning_rate": 3.079316406037375e-05, + "loss": 1.4479, + "step": 8621 + }, + { + "epoch": 0.9094936708860759, + "grad_norm": 0.6400878429412842, + "learning_rate": 3.072200366353958e-05, + "loss": 1.4057, + "step": 8622 + }, + { + "epoch": 0.9095991561181435, + "grad_norm": 0.6668400764465332, + "learning_rate": 3.0650923865391395e-05, + "loss": 1.4501, + "step": 8623 + }, + { + "epoch": 0.909704641350211, + "grad_norm": 0.5470035672187805, + "learning_rate": 3.057992467389431e-05, + "loss": 1.4269, + "step": 8624 + }, + { + "epoch": 0.9098101265822784, + "grad_norm": 0.5566033720970154, + "learning_rate": 3.0509006097004048e-05, + "loss": 1.4558, + "step": 8625 + }, + { + "epoch": 0.909915611814346, + "grad_norm": 0.5763221383094788, + "learning_rate": 3.043816814266734e-05, + "loss": 1.4582, + "step": 8626 + }, + { + "epoch": 0.9100210970464135, + "grad_norm": 0.5591686964035034, + "learning_rate": 3.0367410818821913e-05, + "loss": 1.4584, + "step": 8627 + }, + { + "epoch": 0.910126582278481, + "grad_norm": 0.5960566997528076, + "learning_rate": 3.029673413339651e-05, + "loss": 1.3997, + "step": 8628 + }, + { + "epoch": 0.9102320675105485, + "grad_norm": 0.615857720375061, + "learning_rate": 3.022613809431088e-05, + "loss": 1.4689, + "step": 8629 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.5882724523544312, + "learning_rate": 3.015562270947553e-05, + "loss": 1.488, + "step": 8630 + }, + { + "epoch": 0.9104430379746835, + "grad_norm": 0.5790050625801086, + "learning_rate": 3.0085187986792136e-05, + "loss": 1.4552, + "step": 8631 + }, + { + "epoch": 0.9105485232067511, + "grad_norm": 0.5785961747169495, + "learning_rate": 3.00148339341533e-05, + "loss": 1.4171, + "step": 8632 + }, + { + "epoch": 0.9106540084388186, + "grad_norm": 0.5604156255722046, + "learning_rate": 2.994456055944231e-05, + "loss": 1.4117, + "step": 8633 + }, + { + "epoch": 0.910759493670886, + "grad_norm": 0.5958667993545532, + "learning_rate": 2.9874367870534018e-05, + "loss": 1.4524, + "step": 8634 + }, + { + "epoch": 0.9108649789029536, + "grad_norm": 0.5783675312995911, + "learning_rate": 2.9804255875293645e-05, + "loss": 1.4403, + "step": 8635 + }, + { + "epoch": 0.9109704641350211, + "grad_norm": 0.5981873273849487, + "learning_rate": 2.9734224581577568e-05, + "loss": 1.4021, + "step": 8636 + }, + { + "epoch": 0.9110759493670886, + "grad_norm": 0.5909006595611572, + "learning_rate": 2.966427399723326e-05, + "loss": 1.4598, + "step": 8637 + }, + { + "epoch": 0.9111814345991561, + "grad_norm": 0.5606077909469604, + "learning_rate": 2.959440413009895e-05, + "loss": 1.4319, + "step": 8638 + }, + { + "epoch": 0.9112869198312237, + "grad_norm": 0.5668084025382996, + "learning_rate": 2.952461498800388e-05, + "loss": 1.4218, + "step": 8639 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.6543571949005127, + "learning_rate": 2.945490657876837e-05, + "loss": 1.4063, + "step": 8640 + }, + { + "epoch": 0.9114978902953587, + "grad_norm": 0.6015463471412659, + "learning_rate": 2.938527891020351e-05, + "loss": 1.4231, + "step": 8641 + }, + { + "epoch": 0.9116033755274262, + "grad_norm": 0.5704867839813232, + "learning_rate": 2.931573199011148e-05, + "loss": 1.4352, + "step": 8642 + }, + { + "epoch": 0.9117088607594936, + "grad_norm": 0.584976315498352, + "learning_rate": 2.92462658262852e-05, + "loss": 1.4274, + "step": 8643 + }, + { + "epoch": 0.9118143459915612, + "grad_norm": 0.5783811807632446, + "learning_rate": 2.9176880426508957e-05, + "loss": 1.4146, + "step": 8644 + }, + { + "epoch": 0.9119198312236287, + "grad_norm": 0.5729809403419495, + "learning_rate": 2.9107575798557605e-05, + "loss": 1.374, + "step": 8645 + }, + { + "epoch": 0.9120253164556962, + "grad_norm": 0.564470648765564, + "learning_rate": 2.9038351950197107e-05, + "loss": 1.4461, + "step": 8646 + }, + { + "epoch": 0.9121308016877637, + "grad_norm": 0.5639119744300842, + "learning_rate": 2.8969208889184335e-05, + "loss": 1.4165, + "step": 8647 + }, + { + "epoch": 0.9122362869198313, + "grad_norm": 0.6115474700927734, + "learning_rate": 2.890014662326701e-05, + "loss": 1.4908, + "step": 8648 + }, + { + "epoch": 0.9123417721518987, + "grad_norm": 0.617357075214386, + "learning_rate": 2.8831165160184024e-05, + "loss": 1.4389, + "step": 8649 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.6064773797988892, + "learning_rate": 2.8762264507665113e-05, + "loss": 1.4554, + "step": 8650 + }, + { + "epoch": 0.9125527426160338, + "grad_norm": 0.5657145380973816, + "learning_rate": 2.869344467343077e-05, + "loss": 1.4543, + "step": 8651 + }, + { + "epoch": 0.9126582278481012, + "grad_norm": 0.634956955909729, + "learning_rate": 2.862470566519265e-05, + "loss": 1.4597, + "step": 8652 + }, + { + "epoch": 0.9127637130801688, + "grad_norm": 0.5930854678153992, + "learning_rate": 2.855604749065352e-05, + "loss": 1.4393, + "step": 8653 + }, + { + "epoch": 0.9128691983122363, + "grad_norm": 0.6143166422843933, + "learning_rate": 2.8487470157506633e-05, + "loss": 1.4622, + "step": 8654 + }, + { + "epoch": 0.9129746835443038, + "grad_norm": 0.5628637075424194, + "learning_rate": 2.84189736734366e-05, + "loss": 1.4204, + "step": 8655 + }, + { + "epoch": 0.9130801687763713, + "grad_norm": 0.5753527283668518, + "learning_rate": 2.8350558046118607e-05, + "loss": 1.4135, + "step": 8656 + }, + { + "epoch": 0.9131856540084389, + "grad_norm": 0.6177921891212463, + "learning_rate": 2.828222328321911e-05, + "loss": 1.4012, + "step": 8657 + }, + { + "epoch": 0.9132911392405063, + "grad_norm": 0.5984044075012207, + "learning_rate": 2.8213969392395233e-05, + "loss": 1.4209, + "step": 8658 + }, + { + "epoch": 0.9133966244725739, + "grad_norm": 0.5904444456100464, + "learning_rate": 2.8145796381295276e-05, + "loss": 1.4183, + "step": 8659 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.6169835925102234, + "learning_rate": 2.807770425755829e-05, + "loss": 1.4562, + "step": 8660 + }, + { + "epoch": 0.9136075949367088, + "grad_norm": 0.5786110162734985, + "learning_rate": 2.800969302881434e-05, + "loss": 1.4055, + "step": 8661 + }, + { + "epoch": 0.9137130801687764, + "grad_norm": 0.6012013554573059, + "learning_rate": 2.7941762702684503e-05, + "loss": 1.4294, + "step": 8662 + }, + { + "epoch": 0.9138185654008438, + "grad_norm": 0.5990943908691406, + "learning_rate": 2.7873913286780683e-05, + "loss": 1.4619, + "step": 8663 + }, + { + "epoch": 0.9139240506329114, + "grad_norm": 0.5873018503189087, + "learning_rate": 2.7806144788705718e-05, + "loss": 1.4606, + "step": 8664 + }, + { + "epoch": 0.9140295358649789, + "grad_norm": 0.5874953866004944, + "learning_rate": 2.7738457216053447e-05, + "loss": 1.4528, + "step": 8665 + }, + { + "epoch": 0.9141350210970464, + "grad_norm": 0.5510562062263489, + "learning_rate": 2.7670850576408556e-05, + "loss": 1.4233, + "step": 8666 + }, + { + "epoch": 0.9142405063291139, + "grad_norm": 0.5917528867721558, + "learning_rate": 2.7603324877346653e-05, + "loss": 1.4281, + "step": 8667 + }, + { + "epoch": 0.9143459915611815, + "grad_norm": 0.5897765755653381, + "learning_rate": 2.7535880126434433e-05, + "loss": 1.4184, + "step": 8668 + }, + { + "epoch": 0.9144514767932489, + "grad_norm": 0.5766642689704895, + "learning_rate": 2.7468516331229432e-05, + "loss": 1.4558, + "step": 8669 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.5906760096549988, + "learning_rate": 2.7401233499279866e-05, + "loss": 1.4613, + "step": 8670 + }, + { + "epoch": 0.914662447257384, + "grad_norm": 0.6154567003250122, + "learning_rate": 2.7334031638125367e-05, + "loss": 1.4824, + "step": 8671 + }, + { + "epoch": 0.9147679324894514, + "grad_norm": 0.6160286664962769, + "learning_rate": 2.726691075529625e-05, + "loss": 1.4817, + "step": 8672 + }, + { + "epoch": 0.914873417721519, + "grad_norm": 0.570216178894043, + "learning_rate": 2.7199870858313574e-05, + "loss": 1.4272, + "step": 8673 + }, + { + "epoch": 0.9149789029535865, + "grad_norm": 0.5835993885993958, + "learning_rate": 2.7132911954689672e-05, + "loss": 1.4298, + "step": 8674 + }, + { + "epoch": 0.915084388185654, + "grad_norm": 0.5823359489440918, + "learning_rate": 2.706603405192745e-05, + "loss": 1.4536, + "step": 8675 + }, + { + "epoch": 0.9151898734177215, + "grad_norm": 0.5523690581321716, + "learning_rate": 2.6999237157521005e-05, + "loss": 1.4453, + "step": 8676 + }, + { + "epoch": 0.9152953586497891, + "grad_norm": 0.5867750644683838, + "learning_rate": 2.6932521278955262e-05, + "loss": 1.4312, + "step": 8677 + }, + { + "epoch": 0.9154008438818565, + "grad_norm": 0.5600118637084961, + "learning_rate": 2.686588642370591e-05, + "loss": 1.4611, + "step": 8678 + }, + { + "epoch": 0.915506329113924, + "grad_norm": 0.5990194082260132, + "learning_rate": 2.6799332599239974e-05, + "loss": 1.4719, + "step": 8679 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.5555274486541748, + "learning_rate": 2.6732859813014987e-05, + "loss": 1.4104, + "step": 8680 + }, + { + "epoch": 0.915717299578059, + "grad_norm": 0.5901975035667419, + "learning_rate": 2.666646807247966e-05, + "loss": 1.4796, + "step": 8681 + }, + { + "epoch": 0.9158227848101266, + "grad_norm": 0.6551069617271423, + "learning_rate": 2.660015738507346e-05, + "loss": 1.451, + "step": 8682 + }, + { + "epoch": 0.9159282700421941, + "grad_norm": 0.6033887267112732, + "learning_rate": 2.653392775822677e-05, + "loss": 1.4269, + "step": 8683 + }, + { + "epoch": 0.9160337552742616, + "grad_norm": 0.5646427869796753, + "learning_rate": 2.6467779199361e-05, + "loss": 1.4402, + "step": 8684 + }, + { + "epoch": 0.9161392405063291, + "grad_norm": 0.5603290796279907, + "learning_rate": 2.6401711715888454e-05, + "loss": 1.4344, + "step": 8685 + }, + { + "epoch": 0.9162447257383967, + "grad_norm": 0.5866884589195251, + "learning_rate": 2.6335725315212304e-05, + "loss": 1.4452, + "step": 8686 + }, + { + "epoch": 0.9163502109704641, + "grad_norm": 0.5873220562934875, + "learning_rate": 2.626982000472655e-05, + "loss": 1.4385, + "step": 8687 + }, + { + "epoch": 0.9164556962025316, + "grad_norm": 0.588737964630127, + "learning_rate": 2.6203995791816372e-05, + "loss": 1.4523, + "step": 8688 + }, + { + "epoch": 0.9165611814345992, + "grad_norm": 0.5775124430656433, + "learning_rate": 2.6138252683857693e-05, + "loss": 1.4733, + "step": 8689 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.5795891284942627, + "learning_rate": 2.607259068821721e-05, + "loss": 1.4123, + "step": 8690 + }, + { + "epoch": 0.9167721518987342, + "grad_norm": 0.5847550630569458, + "learning_rate": 2.6007009812252875e-05, + "loss": 1.4792, + "step": 8691 + }, + { + "epoch": 0.9168776371308017, + "grad_norm": 0.5767195820808411, + "learning_rate": 2.594151006331322e-05, + "loss": 1.4408, + "step": 8692 + }, + { + "epoch": 0.9169831223628692, + "grad_norm": 0.5775855183601379, + "learning_rate": 2.5876091448737788e-05, + "loss": 1.4394, + "step": 8693 + }, + { + "epoch": 0.9170886075949367, + "grad_norm": 0.5567613244056702, + "learning_rate": 2.5810753975857136e-05, + "loss": 1.4543, + "step": 8694 + }, + { + "epoch": 0.9171940928270043, + "grad_norm": 0.5601891279220581, + "learning_rate": 2.5745497651992662e-05, + "loss": 1.4163, + "step": 8695 + }, + { + "epoch": 0.9172995780590717, + "grad_norm": 0.5942485332489014, + "learning_rate": 2.568032248445651e-05, + "loss": 1.4477, + "step": 8696 + }, + { + "epoch": 0.9174050632911392, + "grad_norm": 0.5897202491760254, + "learning_rate": 2.561522848055217e-05, + "loss": 1.4253, + "step": 8697 + }, + { + "epoch": 0.9175105485232068, + "grad_norm": 0.5581928491592407, + "learning_rate": 2.5550215647573482e-05, + "loss": 1.4315, + "step": 8698 + }, + { + "epoch": 0.9176160337552742, + "grad_norm": 0.5655611157417297, + "learning_rate": 2.5485283992805615e-05, + "loss": 1.465, + "step": 8699 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.5503929257392883, + "learning_rate": 2.5420433523524493e-05, + "loss": 1.4326, + "step": 8700 + }, + { + "epoch": 0.9178270042194093, + "grad_norm": 0.5832810401916504, + "learning_rate": 2.5355664246996813e-05, + "loss": 1.4323, + "step": 8701 + }, + { + "epoch": 0.9179324894514768, + "grad_norm": 0.5863105058670044, + "learning_rate": 2.5290976170480346e-05, + "loss": 1.4109, + "step": 8702 + }, + { + "epoch": 0.9180379746835443, + "grad_norm": 0.5389050841331482, + "learning_rate": 2.522636930122371e-05, + "loss": 1.4515, + "step": 8703 + }, + { + "epoch": 0.9181434599156119, + "grad_norm": 0.599435567855835, + "learning_rate": 2.516184364646637e-05, + "loss": 1.4328, + "step": 8704 + }, + { + "epoch": 0.9182489451476793, + "grad_norm": 0.6388409733772278, + "learning_rate": 2.5097399213438955e-05, + "loss": 1.4603, + "step": 8705 + }, + { + "epoch": 0.9183544303797468, + "grad_norm": 0.5829973816871643, + "learning_rate": 2.50330360093626e-05, + "loss": 1.4301, + "step": 8706 + }, + { + "epoch": 0.9184599156118144, + "grad_norm": 0.5872178077697754, + "learning_rate": 2.4968754041449633e-05, + "loss": 1.454, + "step": 8707 + }, + { + "epoch": 0.9185654008438818, + "grad_norm": 0.5416826605796814, + "learning_rate": 2.490455331690303e-05, + "loss": 1.4313, + "step": 8708 + }, + { + "epoch": 0.9186708860759494, + "grad_norm": 0.5669833421707153, + "learning_rate": 2.4840433842916872e-05, + "loss": 1.4457, + "step": 8709 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.5634832978248596, + "learning_rate": 2.4776395626676162e-05, + "loss": 1.4715, + "step": 8710 + }, + { + "epoch": 0.9188818565400844, + "grad_norm": 0.5864601135253906, + "learning_rate": 2.471243867535658e-05, + "loss": 1.4555, + "step": 8711 + }, + { + "epoch": 0.9189873417721519, + "grad_norm": 0.5867257118225098, + "learning_rate": 2.4648562996124806e-05, + "loss": 1.4262, + "step": 8712 + }, + { + "epoch": 0.9190928270042195, + "grad_norm": 0.5496393442153931, + "learning_rate": 2.4584768596138452e-05, + "loss": 1.4506, + "step": 8713 + }, + { + "epoch": 0.9191983122362869, + "grad_norm": 0.6395035982131958, + "learning_rate": 2.4521055482546046e-05, + "loss": 1.4355, + "step": 8714 + }, + { + "epoch": 0.9193037974683544, + "grad_norm": 0.5977312922477722, + "learning_rate": 2.4457423662486962e-05, + "loss": 1.4498, + "step": 8715 + }, + { + "epoch": 0.919409282700422, + "grad_norm": 0.5763668417930603, + "learning_rate": 2.4393873143091495e-05, + "loss": 1.4153, + "step": 8716 + }, + { + "epoch": 0.9195147679324894, + "grad_norm": 0.5649773478507996, + "learning_rate": 2.43304039314807e-05, + "loss": 1.3886, + "step": 8717 + }, + { + "epoch": 0.919620253164557, + "grad_norm": 0.6062254309654236, + "learning_rate": 2.4267016034766637e-05, + "loss": 1.4619, + "step": 8718 + }, + { + "epoch": 0.9197257383966245, + "grad_norm": 0.5720923542976379, + "learning_rate": 2.4203709460052292e-05, + "loss": 1.4472, + "step": 8719 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.6060433983802795, + "learning_rate": 2.414048421443141e-05, + "loss": 1.4221, + "step": 8720 + }, + { + "epoch": 0.9199367088607595, + "grad_norm": 0.5681042671203613, + "learning_rate": 2.407734030498873e-05, + "loss": 1.4495, + "step": 8721 + }, + { + "epoch": 0.9200421940928271, + "grad_norm": 0.5993919968605042, + "learning_rate": 2.4014277738799774e-05, + "loss": 1.4492, + "step": 8722 + }, + { + "epoch": 0.9201476793248945, + "grad_norm": 0.5645418167114258, + "learning_rate": 2.395129652293121e-05, + "loss": 1.4577, + "step": 8723 + }, + { + "epoch": 0.920253164556962, + "grad_norm": 0.5975272059440613, + "learning_rate": 2.3888396664440232e-05, + "loss": 1.4928, + "step": 8724 + }, + { + "epoch": 0.9203586497890295, + "grad_norm": 0.5936329960823059, + "learning_rate": 2.38255781703752e-05, + "loss": 1.431, + "step": 8725 + }, + { + "epoch": 0.920464135021097, + "grad_norm": 0.5827581286430359, + "learning_rate": 2.3762841047775068e-05, + "loss": 1.4007, + "step": 8726 + }, + { + "epoch": 0.9205696202531646, + "grad_norm": 0.5696383118629456, + "learning_rate": 2.3700185303670046e-05, + "loss": 1.4409, + "step": 8727 + }, + { + "epoch": 0.920675105485232, + "grad_norm": 0.5902407169342041, + "learning_rate": 2.363761094508085e-05, + "loss": 1.4044, + "step": 8728 + }, + { + "epoch": 0.9207805907172996, + "grad_norm": 0.5780519247055054, + "learning_rate": 2.357511797901929e-05, + "loss": 1.4923, + "step": 8729 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.5766505002975464, + "learning_rate": 2.3512706412488012e-05, + "loss": 1.4282, + "step": 8730 + }, + { + "epoch": 0.9209915611814345, + "grad_norm": 0.5711724162101746, + "learning_rate": 2.345037625248067e-05, + "loss": 1.4226, + "step": 8731 + }, + { + "epoch": 0.9210970464135021, + "grad_norm": 0.6154958605766296, + "learning_rate": 2.3388127505981515e-05, + "loss": 1.4417, + "step": 8732 + }, + { + "epoch": 0.9212025316455696, + "grad_norm": 0.5816785097122192, + "learning_rate": 2.3325960179965967e-05, + "loss": 1.4592, + "step": 8733 + }, + { + "epoch": 0.9213080168776371, + "grad_norm": 0.5735812187194824, + "learning_rate": 2.3263874281400034e-05, + "loss": 1.4222, + "step": 8734 + }, + { + "epoch": 0.9214135021097046, + "grad_norm": 0.5731986165046692, + "learning_rate": 2.3201869817240817e-05, + "loss": 1.4266, + "step": 8735 + }, + { + "epoch": 0.9215189873417722, + "grad_norm": 0.6300302147865295, + "learning_rate": 2.313994679443626e-05, + "loss": 1.4234, + "step": 8736 + }, + { + "epoch": 0.9216244725738396, + "grad_norm": 0.591485321521759, + "learning_rate": 2.307810521992515e-05, + "loss": 1.4278, + "step": 8737 + }, + { + "epoch": 0.9217299578059072, + "grad_norm": 0.5682873129844666, + "learning_rate": 2.301634510063702e-05, + "loss": 1.4521, + "step": 8738 + }, + { + "epoch": 0.9218354430379747, + "grad_norm": 0.5560333132743835, + "learning_rate": 2.2954666443492505e-05, + "loss": 1.4459, + "step": 8739 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.61741042137146, + "learning_rate": 2.2893069255402993e-05, + "loss": 1.4385, + "step": 8740 + }, + { + "epoch": 0.9220464135021097, + "grad_norm": 0.5697554349899292, + "learning_rate": 2.2831553543270793e-05, + "loss": 1.4247, + "step": 8741 + }, + { + "epoch": 0.9221518987341772, + "grad_norm": 0.5806183815002441, + "learning_rate": 2.277011931398898e-05, + "loss": 1.417, + "step": 8742 + }, + { + "epoch": 0.9222573839662447, + "grad_norm": 0.5572609305381775, + "learning_rate": 2.2708766574441626e-05, + "loss": 1.3801, + "step": 8743 + }, + { + "epoch": 0.9223628691983122, + "grad_norm": 0.6285248398780823, + "learning_rate": 2.2647495331503565e-05, + "loss": 1.4467, + "step": 8744 + }, + { + "epoch": 0.9224683544303798, + "grad_norm": 0.5658657550811768, + "learning_rate": 2.2586305592040558e-05, + "loss": 1.4703, + "step": 8745 + }, + { + "epoch": 0.9225738396624472, + "grad_norm": 0.5914174318313599, + "learning_rate": 2.2525197362909282e-05, + "loss": 1.4406, + "step": 8746 + }, + { + "epoch": 0.9226793248945148, + "grad_norm": 0.5893293023109436, + "learning_rate": 2.24641706509571e-05, + "loss": 1.432, + "step": 8747 + }, + { + "epoch": 0.9227848101265823, + "grad_norm": 0.5471675395965576, + "learning_rate": 2.2403225463022288e-05, + "loss": 1.4274, + "step": 8748 + }, + { + "epoch": 0.9228902953586497, + "grad_norm": 0.5737383961677551, + "learning_rate": 2.2342361805934297e-05, + "loss": 1.4458, + "step": 8749 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.5783432722091675, + "learning_rate": 2.2281579686513176e-05, + "loss": 1.4604, + "step": 8750 + }, + { + "epoch": 0.9231012658227848, + "grad_norm": 0.539251446723938, + "learning_rate": 2.2220879111569725e-05, + "loss": 1.4057, + "step": 8751 + }, + { + "epoch": 0.9232067510548523, + "grad_norm": 0.5822808146476746, + "learning_rate": 2.2160260087905753e-05, + "loss": 1.4487, + "step": 8752 + }, + { + "epoch": 0.9233122362869198, + "grad_norm": 0.6027513146400452, + "learning_rate": 2.2099722622314078e-05, + "loss": 1.4111, + "step": 8753 + }, + { + "epoch": 0.9234177215189874, + "grad_norm": 0.5860010981559753, + "learning_rate": 2.203926672157802e-05, + "loss": 1.4314, + "step": 8754 + }, + { + "epoch": 0.9235232067510548, + "grad_norm": 0.632760763168335, + "learning_rate": 2.1978892392472085e-05, + "loss": 1.4432, + "step": 8755 + }, + { + "epoch": 0.9236286919831224, + "grad_norm": 0.5635277032852173, + "learning_rate": 2.1918599641761517e-05, + "loss": 1.4341, + "step": 8756 + }, + { + "epoch": 0.9237341772151899, + "grad_norm": 0.5788161754608154, + "learning_rate": 2.185838847620242e-05, + "loss": 1.4334, + "step": 8757 + }, + { + "epoch": 0.9238396624472573, + "grad_norm": 0.5417002439498901, + "learning_rate": 2.1798258902541723e-05, + "loss": 1.3582, + "step": 8758 + }, + { + "epoch": 0.9239451476793249, + "grad_norm": 0.5904961824417114, + "learning_rate": 2.173821092751721e-05, + "loss": 1.4444, + "step": 8759 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.6286249160766602, + "learning_rate": 2.1678244557857663e-05, + "loss": 1.415, + "step": 8760 + }, + { + "epoch": 0.9241561181434599, + "grad_norm": 0.6170040369033813, + "learning_rate": 2.161835980028254e-05, + "loss": 1.463, + "step": 8761 + }, + { + "epoch": 0.9242616033755274, + "grad_norm": 0.5647348165512085, + "learning_rate": 2.1558556661502222e-05, + "loss": 1.4013, + "step": 8762 + }, + { + "epoch": 0.924367088607595, + "grad_norm": 0.5721019506454468, + "learning_rate": 2.1498835148218017e-05, + "loss": 1.4897, + "step": 8763 + }, + { + "epoch": 0.9244725738396624, + "grad_norm": 0.5583656430244446, + "learning_rate": 2.1439195267121902e-05, + "loss": 1.4483, + "step": 8764 + }, + { + "epoch": 0.92457805907173, + "grad_norm": 0.5761834383010864, + "learning_rate": 2.137963702489687e-05, + "loss": 1.4138, + "step": 8765 + }, + { + "epoch": 0.9246835443037975, + "grad_norm": 0.5997974276542664, + "learning_rate": 2.132016042821683e-05, + "loss": 1.4318, + "step": 8766 + }, + { + "epoch": 0.924789029535865, + "grad_norm": 0.6354941129684448, + "learning_rate": 2.1260765483746282e-05, + "loss": 1.4414, + "step": 8767 + }, + { + "epoch": 0.9248945147679325, + "grad_norm": 0.6038075089454651, + "learning_rate": 2.120145219814082e-05, + "loss": 1.4226, + "step": 8768 + }, + { + "epoch": 0.925, + "grad_norm": 0.5891817212104797, + "learning_rate": 2.1142220578046712e-05, + "loss": 1.4314, + "step": 8769 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.561984121799469, + "learning_rate": 2.1083070630101232e-05, + "loss": 1.4141, + "step": 8770 + }, + { + "epoch": 0.925210970464135, + "grad_norm": 0.5902023911476135, + "learning_rate": 2.102400236093241e-05, + "loss": 1.4677, + "step": 8771 + }, + { + "epoch": 0.9253164556962026, + "grad_norm": 0.5677645802497864, + "learning_rate": 2.096501577715912e-05, + "loss": 1.4057, + "step": 8772 + }, + { + "epoch": 0.92542194092827, + "grad_norm": 0.6023551821708679, + "learning_rate": 2.0906110885391072e-05, + "loss": 1.4574, + "step": 8773 + }, + { + "epoch": 0.9255274261603376, + "grad_norm": 0.5855674743652344, + "learning_rate": 2.0847287692228905e-05, + "loss": 1.4165, + "step": 8774 + }, + { + "epoch": 0.9256329113924051, + "grad_norm": 0.6321659684181213, + "learning_rate": 2.0788546204264013e-05, + "loss": 1.4486, + "step": 8775 + }, + { + "epoch": 0.9257383966244725, + "grad_norm": 0.56999671459198, + "learning_rate": 2.0729886428078716e-05, + "loss": 1.4335, + "step": 8776 + }, + { + "epoch": 0.9258438818565401, + "grad_norm": 0.5810831785202026, + "learning_rate": 2.0671308370246167e-05, + "loss": 1.4336, + "step": 8777 + }, + { + "epoch": 0.9259493670886076, + "grad_norm": 0.5467450618743896, + "learning_rate": 2.0612812037330202e-05, + "loss": 1.43, + "step": 8778 + }, + { + "epoch": 0.9260548523206751, + "grad_norm": 0.5807086229324341, + "learning_rate": 2.0554397435885746e-05, + "loss": 1.4544, + "step": 8779 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.5576408505439758, + "learning_rate": 2.0496064572458395e-05, + "loss": 1.4498, + "step": 8780 + }, + { + "epoch": 0.9262658227848102, + "grad_norm": 0.5974573493003845, + "learning_rate": 2.043781345358467e-05, + "loss": 1.4518, + "step": 8781 + }, + { + "epoch": 0.9263713080168776, + "grad_norm": 0.5703144073486328, + "learning_rate": 2.0379644085791767e-05, + "loss": 1.4716, + "step": 8782 + }, + { + "epoch": 0.9264767932489452, + "grad_norm": 0.592363178730011, + "learning_rate": 2.032155647559805e-05, + "loss": 1.4047, + "step": 8783 + }, + { + "epoch": 0.9265822784810127, + "grad_norm": 0.6005138158798218, + "learning_rate": 2.0263550629512406e-05, + "loss": 1.4232, + "step": 8784 + }, + { + "epoch": 0.9266877637130801, + "grad_norm": 0.6323562860488892, + "learning_rate": 2.0205626554034713e-05, + "loss": 1.4426, + "step": 8785 + }, + { + "epoch": 0.9267932489451477, + "grad_norm": 0.5690987706184387, + "learning_rate": 2.0147784255655692e-05, + "loss": 1.4167, + "step": 8786 + }, + { + "epoch": 0.9268987341772152, + "grad_norm": 0.5587114095687866, + "learning_rate": 2.009002374085675e-05, + "loss": 1.4101, + "step": 8787 + }, + { + "epoch": 0.9270042194092827, + "grad_norm": 0.5712944269180298, + "learning_rate": 2.003234501611037e-05, + "loss": 1.4345, + "step": 8788 + }, + { + "epoch": 0.9271097046413502, + "grad_norm": 0.5614693760871887, + "learning_rate": 1.9974748087879636e-05, + "loss": 1.4423, + "step": 8789 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.5807209014892578, + "learning_rate": 1.991723296261863e-05, + "loss": 1.4513, + "step": 8790 + }, + { + "epoch": 0.9273206751054852, + "grad_norm": 0.5691655278205872, + "learning_rate": 1.985979964677212e-05, + "loss": 1.4492, + "step": 8791 + }, + { + "epoch": 0.9274261603375528, + "grad_norm": 0.5605006217956543, + "learning_rate": 1.9802448146775953e-05, + "loss": 1.4205, + "step": 8792 + }, + { + "epoch": 0.9275316455696202, + "grad_norm": 0.5647096037864685, + "learning_rate": 1.9745178469056575e-05, + "loss": 1.4426, + "step": 8793 + }, + { + "epoch": 0.9276371308016877, + "grad_norm": 0.5521472692489624, + "learning_rate": 1.9687990620031266e-05, + "loss": 1.4188, + "step": 8794 + }, + { + "epoch": 0.9277426160337553, + "grad_norm": 0.6047657132148743, + "learning_rate": 1.963088460610832e-05, + "loss": 1.4246, + "step": 8795 + }, + { + "epoch": 0.9278481012658227, + "grad_norm": 0.5806040167808533, + "learning_rate": 1.9573860433686696e-05, + "loss": 1.4531, + "step": 8796 + }, + { + "epoch": 0.9279535864978903, + "grad_norm": 0.5893714427947998, + "learning_rate": 1.9516918109156206e-05, + "loss": 1.4408, + "step": 8797 + }, + { + "epoch": 0.9280590717299578, + "grad_norm": 0.5766898393630981, + "learning_rate": 1.9460057638897578e-05, + "loss": 1.4395, + "step": 8798 + }, + { + "epoch": 0.9281645569620253, + "grad_norm": 0.5978161692619324, + "learning_rate": 1.9403279029282376e-05, + "loss": 1.42, + "step": 8799 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.585555911064148, + "learning_rate": 1.9346582286672686e-05, + "loss": 1.4357, + "step": 8800 + }, + { + "epoch": 0.9283755274261604, + "grad_norm": 0.5802249312400818, + "learning_rate": 1.9289967417421922e-05, + "loss": 1.4606, + "step": 8801 + }, + { + "epoch": 0.9284810126582278, + "grad_norm": 0.6020320057868958, + "learning_rate": 1.9233434427873924e-05, + "loss": 1.4322, + "step": 8802 + }, + { + "epoch": 0.9285864978902953, + "grad_norm": 0.555915355682373, + "learning_rate": 1.9176983324363545e-05, + "loss": 1.4344, + "step": 8803 + }, + { + "epoch": 0.9286919831223629, + "grad_norm": 0.5809893012046814, + "learning_rate": 1.912061411321639e-05, + "loss": 1.4419, + "step": 8804 + }, + { + "epoch": 0.9287974683544303, + "grad_norm": 0.5567583441734314, + "learning_rate": 1.9064326800748906e-05, + "loss": 1.4358, + "step": 8805 + }, + { + "epoch": 0.9289029535864979, + "grad_norm": 0.5518735647201538, + "learning_rate": 1.9008121393268462e-05, + "loss": 1.4419, + "step": 8806 + }, + { + "epoch": 0.9290084388185654, + "grad_norm": 0.5494452714920044, + "learning_rate": 1.8951997897072943e-05, + "loss": 1.4475, + "step": 8807 + }, + { + "epoch": 0.9291139240506329, + "grad_norm": 0.6095446944236755, + "learning_rate": 1.8895956318451398e-05, + "loss": 1.467, + "step": 8808 + }, + { + "epoch": 0.9292194092827004, + "grad_norm": 0.6061810851097107, + "learning_rate": 1.8839996663683635e-05, + "loss": 1.4774, + "step": 8809 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.606911838054657, + "learning_rate": 1.878411893904014e-05, + "loss": 1.4835, + "step": 8810 + }, + { + "epoch": 0.9294303797468354, + "grad_norm": 0.5773082971572876, + "learning_rate": 1.872832315078224e-05, + "loss": 1.4477, + "step": 8811 + }, + { + "epoch": 0.929535864978903, + "grad_norm": 0.580393373966217, + "learning_rate": 1.8672609305162263e-05, + "loss": 1.4279, + "step": 8812 + }, + { + "epoch": 0.9296413502109705, + "grad_norm": 0.5694815516471863, + "learning_rate": 1.8616977408423053e-05, + "loss": 1.438, + "step": 8813 + }, + { + "epoch": 0.9297468354430379, + "grad_norm": 0.5969712138175964, + "learning_rate": 1.856142746679862e-05, + "loss": 1.4631, + "step": 8814 + }, + { + "epoch": 0.9298523206751055, + "grad_norm": 0.5567110180854797, + "learning_rate": 1.8505959486513485e-05, + "loss": 1.4324, + "step": 8815 + }, + { + "epoch": 0.929957805907173, + "grad_norm": 0.5941346287727356, + "learning_rate": 1.8450573473783094e-05, + "loss": 1.4497, + "step": 8816 + }, + { + "epoch": 0.9300632911392405, + "grad_norm": 0.5590066313743591, + "learning_rate": 1.8395269434813733e-05, + "loss": 1.4551, + "step": 8817 + }, + { + "epoch": 0.930168776371308, + "grad_norm": 0.5388520956039429, + "learning_rate": 1.8340047375802693e-05, + "loss": 1.4383, + "step": 8818 + }, + { + "epoch": 0.9302742616033756, + "grad_norm": 0.5632081627845764, + "learning_rate": 1.8284907302937608e-05, + "loss": 1.4197, + "step": 8819 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.5752242207527161, + "learning_rate": 1.822984922239737e-05, + "loss": 1.4217, + "step": 8820 + }, + { + "epoch": 0.9304852320675105, + "grad_norm": 0.5307113528251648, + "learning_rate": 1.8174873140351544e-05, + "loss": 1.4609, + "step": 8821 + }, + { + "epoch": 0.9305907172995781, + "grad_norm": 0.5853778719902039, + "learning_rate": 1.8119979062960286e-05, + "loss": 1.5059, + "step": 8822 + }, + { + "epoch": 0.9306962025316455, + "grad_norm": 0.557511031627655, + "learning_rate": 1.806516699637492e-05, + "loss": 1.4254, + "step": 8823 + }, + { + "epoch": 0.9308016877637131, + "grad_norm": 0.5805737376213074, + "learning_rate": 1.8010436946737292e-05, + "loss": 1.4768, + "step": 8824 + }, + { + "epoch": 0.9309071729957806, + "grad_norm": 0.553473174571991, + "learning_rate": 1.7955788920180238e-05, + "loss": 1.4517, + "step": 8825 + }, + { + "epoch": 0.9310126582278481, + "grad_norm": 0.5989781022071838, + "learning_rate": 1.7901222922827282e-05, + "loss": 1.4499, + "step": 8826 + }, + { + "epoch": 0.9311181434599156, + "grad_norm": 0.5588606595993042, + "learning_rate": 1.7846738960792945e-05, + "loss": 1.4435, + "step": 8827 + }, + { + "epoch": 0.9312236286919832, + "grad_norm": 0.5647150278091431, + "learning_rate": 1.7792337040182434e-05, + "loss": 1.4661, + "step": 8828 + }, + { + "epoch": 0.9313291139240506, + "grad_norm": 0.6379879117012024, + "learning_rate": 1.773801716709153e-05, + "loss": 1.4322, + "step": 8829 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.5711951851844788, + "learning_rate": 1.7683779347607286e-05, + "loss": 1.4038, + "step": 8830 + }, + { + "epoch": 0.9315400843881857, + "grad_norm": 0.551772952079773, + "learning_rate": 1.7629623587807175e-05, + "loss": 1.398, + "step": 8831 + }, + { + "epoch": 0.9316455696202531, + "grad_norm": 0.5408370494842529, + "learning_rate": 1.7575549893759756e-05, + "loss": 1.4721, + "step": 8832 + }, + { + "epoch": 0.9317510548523207, + "grad_norm": 0.5515504479408264, + "learning_rate": 1.7521558271524103e-05, + "loss": 1.421, + "step": 8833 + }, + { + "epoch": 0.9318565400843882, + "grad_norm": 0.5592682361602783, + "learning_rate": 1.7467648727150202e-05, + "loss": 1.428, + "step": 8834 + }, + { + "epoch": 0.9319620253164557, + "grad_norm": 0.541654646396637, + "learning_rate": 1.741382126667915e-05, + "loss": 1.4427, + "step": 8835 + }, + { + "epoch": 0.9320675105485232, + "grad_norm": 0.581619918346405, + "learning_rate": 1.7360075896142357e-05, + "loss": 1.4515, + "step": 8836 + }, + { + "epoch": 0.9321729957805908, + "grad_norm": 0.6193785071372986, + "learning_rate": 1.7306412621562352e-05, + "loss": 1.4588, + "step": 8837 + }, + { + "epoch": 0.9322784810126582, + "grad_norm": 0.5459283590316772, + "learning_rate": 1.72528314489524e-05, + "loss": 1.4521, + "step": 8838 + }, + { + "epoch": 0.9323839662447257, + "grad_norm": 0.5702266693115234, + "learning_rate": 1.719933238431645e-05, + "loss": 1.406, + "step": 8839 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.575951337814331, + "learning_rate": 1.714591543364938e-05, + "loss": 1.445, + "step": 8840 + }, + { + "epoch": 0.9325949367088607, + "grad_norm": 0.5367328524589539, + "learning_rate": 1.7092580602936807e-05, + "loss": 1.4147, + "step": 8841 + }, + { + "epoch": 0.9327004219409283, + "grad_norm": 0.5844159126281738, + "learning_rate": 1.703932789815521e-05, + "loss": 1.4822, + "step": 8842 + }, + { + "epoch": 0.9328059071729958, + "grad_norm": 0.5535144805908203, + "learning_rate": 1.6986157325271727e-05, + "loss": 1.4372, + "step": 8843 + }, + { + "epoch": 0.9329113924050633, + "grad_norm": 0.5373470187187195, + "learning_rate": 1.6933068890244595e-05, + "loss": 1.4346, + "step": 8844 + }, + { + "epoch": 0.9330168776371308, + "grad_norm": 0.5465664267539978, + "learning_rate": 1.688006259902239e-05, + "loss": 1.4238, + "step": 8845 + }, + { + "epoch": 0.9331223628691984, + "grad_norm": 0.5669174790382385, + "learning_rate": 1.6827138457544854e-05, + "loss": 1.4375, + "step": 8846 + }, + { + "epoch": 0.9332278481012658, + "grad_norm": 0.5537304878234863, + "learning_rate": 1.677429647174242e-05, + "loss": 1.396, + "step": 8847 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.6003081202507019, + "learning_rate": 1.6721536647536255e-05, + "loss": 1.4526, + "step": 8848 + }, + { + "epoch": 0.9334388185654009, + "grad_norm": 0.6030091643333435, + "learning_rate": 1.666885899083831e-05, + "loss": 1.4676, + "step": 8849 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.6348187923431396, + "learning_rate": 1.6616263507551437e-05, + "loss": 1.4097, + "step": 8850 + }, + { + "epoch": 0.9336497890295359, + "grad_norm": 0.5951762795448303, + "learning_rate": 1.656375020356926e-05, + "loss": 1.4487, + "step": 8851 + }, + { + "epoch": 0.9337552742616034, + "grad_norm": 0.5438513159751892, + "learning_rate": 1.6511319084776073e-05, + "loss": 1.4426, + "step": 8852 + }, + { + "epoch": 0.9338607594936709, + "grad_norm": 0.5810691118240356, + "learning_rate": 1.645897015704709e-05, + "loss": 1.4273, + "step": 8853 + }, + { + "epoch": 0.9339662447257384, + "grad_norm": 0.548957109451294, + "learning_rate": 1.6406703426248366e-05, + "loss": 1.4318, + "step": 8854 + }, + { + "epoch": 0.9340717299578059, + "grad_norm": 0.6175123453140259, + "learning_rate": 1.6354518898236472e-05, + "loss": 1.4651, + "step": 8855 + }, + { + "epoch": 0.9341772151898734, + "grad_norm": 0.5326970815658569, + "learning_rate": 1.630241657885906e-05, + "loss": 1.4364, + "step": 8856 + }, + { + "epoch": 0.934282700421941, + "grad_norm": 0.5594727396965027, + "learning_rate": 1.6250396473954377e-05, + "loss": 1.4039, + "step": 8857 + }, + { + "epoch": 0.9343881856540084, + "grad_norm": 0.5516977310180664, + "learning_rate": 1.6198458589351595e-05, + "loss": 1.4559, + "step": 8858 + }, + { + "epoch": 0.9344936708860759, + "grad_norm": 0.5715162754058838, + "learning_rate": 1.614660293087056e-05, + "loss": 1.4556, + "step": 8859 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5540726780891418, + "learning_rate": 1.609482950432195e-05, + "loss": 1.4095, + "step": 8860 + }, + { + "epoch": 0.9347046413502109, + "grad_norm": 0.5639488101005554, + "learning_rate": 1.6043138315507382e-05, + "loss": 1.4141, + "step": 8861 + }, + { + "epoch": 0.9348101265822785, + "grad_norm": 0.532781720161438, + "learning_rate": 1.5991529370218887e-05, + "loss": 1.472, + "step": 8862 + }, + { + "epoch": 0.934915611814346, + "grad_norm": 0.5839990377426147, + "learning_rate": 1.5940002674239756e-05, + "loss": 1.4506, + "step": 8863 + }, + { + "epoch": 0.9350210970464135, + "grad_norm": 0.5672546029090881, + "learning_rate": 1.588855823334362e-05, + "loss": 1.4209, + "step": 8864 + }, + { + "epoch": 0.935126582278481, + "grad_norm": 0.5475982427597046, + "learning_rate": 1.5837196053295117e-05, + "loss": 1.4538, + "step": 8865 + }, + { + "epoch": 0.9352320675105485, + "grad_norm": 0.584936261177063, + "learning_rate": 1.5785916139849725e-05, + "loss": 1.4485, + "step": 8866 + }, + { + "epoch": 0.935337552742616, + "grad_norm": 0.5778898000717163, + "learning_rate": 1.573471849875352e-05, + "loss": 1.4689, + "step": 8867 + }, + { + "epoch": 0.9354430379746835, + "grad_norm": 0.5639301538467407, + "learning_rate": 1.568360313574349e-05, + "loss": 1.4277, + "step": 8868 + }, + { + "epoch": 0.9355485232067511, + "grad_norm": 0.5771474838256836, + "learning_rate": 1.5632570056547308e-05, + "loss": 1.441, + "step": 8869 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.5611417293548584, + "learning_rate": 1.5581619266883563e-05, + "loss": 1.4035, + "step": 8870 + }, + { + "epoch": 0.9357594936708861, + "grad_norm": 0.573586642742157, + "learning_rate": 1.5530750772461522e-05, + "loss": 1.4718, + "step": 8871 + }, + { + "epoch": 0.9358649789029536, + "grad_norm": 0.5513860583305359, + "learning_rate": 1.5479964578981293e-05, + "loss": 1.3916, + "step": 8872 + }, + { + "epoch": 0.935970464135021, + "grad_norm": 0.5914504528045654, + "learning_rate": 1.5429260692133656e-05, + "loss": 1.4679, + "step": 8873 + }, + { + "epoch": 0.9360759493670886, + "grad_norm": 0.5492421984672546, + "learning_rate": 1.5378639117600234e-05, + "loss": 1.4221, + "step": 8874 + }, + { + "epoch": 0.9361814345991561, + "grad_norm": 0.6130530834197998, + "learning_rate": 1.532809986105349e-05, + "loss": 1.4229, + "step": 8875 + }, + { + "epoch": 0.9362869198312236, + "grad_norm": 0.5781847834587097, + "learning_rate": 1.527764292815656e-05, + "loss": 1.4517, + "step": 8876 + }, + { + "epoch": 0.9363924050632911, + "grad_norm": 0.5233162641525269, + "learning_rate": 1.522726832456342e-05, + "loss": 1.4324, + "step": 8877 + }, + { + "epoch": 0.9364978902953587, + "grad_norm": 0.5673003196716309, + "learning_rate": 1.517697605591864e-05, + "loss": 1.4661, + "step": 8878 + }, + { + "epoch": 0.9366033755274261, + "grad_norm": 0.5696048736572266, + "learning_rate": 1.512676612785796e-05, + "loss": 1.463, + "step": 8879 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.58985435962677, + "learning_rate": 1.5076638546007548e-05, + "loss": 1.4432, + "step": 8880 + }, + { + "epoch": 0.9368143459915612, + "grad_norm": 0.6245648264884949, + "learning_rate": 1.502659331598441e-05, + "loss": 1.4412, + "step": 8881 + }, + { + "epoch": 0.9369198312236287, + "grad_norm": 0.5691860318183899, + "learning_rate": 1.4976630443396395e-05, + "loss": 1.4764, + "step": 8882 + }, + { + "epoch": 0.9370253164556962, + "grad_norm": 0.5851648449897766, + "learning_rate": 1.4926749933842187e-05, + "loss": 1.4178, + "step": 8883 + }, + { + "epoch": 0.9371308016877637, + "grad_norm": 0.5733131766319275, + "learning_rate": 1.4876951792910987e-05, + "loss": 1.4506, + "step": 8884 + }, + { + "epoch": 0.9372362869198312, + "grad_norm": 0.5848699808120728, + "learning_rate": 1.4827236026182994e-05, + "loss": 1.4254, + "step": 8885 + }, + { + "epoch": 0.9373417721518987, + "grad_norm": 0.5638344883918762, + "learning_rate": 1.4777602639229004e-05, + "loss": 1.4528, + "step": 8886 + }, + { + "epoch": 0.9374472573839663, + "grad_norm": 0.5667209625244141, + "learning_rate": 1.4728051637610902e-05, + "loss": 1.4336, + "step": 8887 + }, + { + "epoch": 0.9375527426160337, + "grad_norm": 0.5719776153564453, + "learning_rate": 1.4678583026880993e-05, + "loss": 1.4515, + "step": 8888 + }, + { + "epoch": 0.9376582278481013, + "grad_norm": 0.5823426842689514, + "learning_rate": 1.4629196812582513e-05, + "loss": 1.4457, + "step": 8889 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.5640562176704407, + "learning_rate": 1.457989300024945e-05, + "loss": 1.4144, + "step": 8890 + }, + { + "epoch": 0.9378691983122363, + "grad_norm": 0.6091753244400024, + "learning_rate": 1.4530671595406469e-05, + "loss": 1.4416, + "step": 8891 + }, + { + "epoch": 0.9379746835443038, + "grad_norm": 0.5547666549682617, + "learning_rate": 1.4481532603569076e-05, + "loss": 1.4484, + "step": 8892 + }, + { + "epoch": 0.9380801687763713, + "grad_norm": 0.5528984665870667, + "learning_rate": 1.4432476030243696e-05, + "loss": 1.4114, + "step": 8893 + }, + { + "epoch": 0.9381856540084388, + "grad_norm": 0.5656625628471375, + "learning_rate": 1.4383501880927103e-05, + "loss": 1.4147, + "step": 8894 + }, + { + "epoch": 0.9382911392405063, + "grad_norm": 0.5560200810432434, + "learning_rate": 1.433461016110732e-05, + "loss": 1.4505, + "step": 8895 + }, + { + "epoch": 0.9383966244725739, + "grad_norm": 0.55207759141922, + "learning_rate": 1.42858008762628e-05, + "loss": 1.4266, + "step": 8896 + }, + { + "epoch": 0.9385021097046413, + "grad_norm": 0.5725208520889282, + "learning_rate": 1.4237074031862918e-05, + "loss": 1.4485, + "step": 8897 + }, + { + "epoch": 0.9386075949367089, + "grad_norm": 0.5507459044456482, + "learning_rate": 1.4188429633367721e-05, + "loss": 1.4016, + "step": 8898 + }, + { + "epoch": 0.9387130801687764, + "grad_norm": 0.5606070160865784, + "learning_rate": 1.4139867686228102e-05, + "loss": 1.4578, + "step": 8899 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.5688459277153015, + "learning_rate": 1.4091388195885625e-05, + "loss": 1.4317, + "step": 8900 + }, + { + "epoch": 0.9389240506329114, + "grad_norm": 0.5612446069717407, + "learning_rate": 1.404299116777269e-05, + "loss": 1.444, + "step": 8901 + }, + { + "epoch": 0.939029535864979, + "grad_norm": 0.5420080423355103, + "learning_rate": 1.3994676607312379e-05, + "loss": 1.4133, + "step": 8902 + }, + { + "epoch": 0.9391350210970464, + "grad_norm": 0.5560903549194336, + "learning_rate": 1.3946444519918611e-05, + "loss": 1.4466, + "step": 8903 + }, + { + "epoch": 0.9392405063291139, + "grad_norm": 0.5616441369056702, + "learning_rate": 1.3898294910995979e-05, + "loss": 1.4387, + "step": 8904 + }, + { + "epoch": 0.9393459915611815, + "grad_norm": 0.614759624004364, + "learning_rate": 1.385022778594e-05, + "loss": 1.444, + "step": 8905 + }, + { + "epoch": 0.9394514767932489, + "grad_norm": 0.5368724465370178, + "learning_rate": 1.3802243150136784e-05, + "loss": 1.4685, + "step": 8906 + }, + { + "epoch": 0.9395569620253165, + "grad_norm": 0.569229006767273, + "learning_rate": 1.3754341008963194e-05, + "loss": 1.4293, + "step": 8907 + }, + { + "epoch": 0.939662447257384, + "grad_norm": 0.5557070970535278, + "learning_rate": 1.370652136778694e-05, + "loss": 1.4269, + "step": 8908 + }, + { + "epoch": 0.9397679324894515, + "grad_norm": 0.5954893827438354, + "learning_rate": 1.3658784231966481e-05, + "loss": 1.4214, + "step": 8909 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.6060025691986084, + "learning_rate": 1.3611129606851041e-05, + "loss": 1.4061, + "step": 8910 + }, + { + "epoch": 0.9399789029535865, + "grad_norm": 0.590415894985199, + "learning_rate": 1.3563557497780432e-05, + "loss": 1.4217, + "step": 8911 + }, + { + "epoch": 0.940084388185654, + "grad_norm": 0.5425179600715637, + "learning_rate": 1.3516067910085306e-05, + "loss": 1.4625, + "step": 8912 + }, + { + "epoch": 0.9401898734177215, + "grad_norm": 0.5464140176773071, + "learning_rate": 1.3468660849087322e-05, + "loss": 1.4511, + "step": 8913 + }, + { + "epoch": 0.9402953586497891, + "grad_norm": 0.5438153147697449, + "learning_rate": 1.3421336320098565e-05, + "loss": 1.4439, + "step": 8914 + }, + { + "epoch": 0.9404008438818565, + "grad_norm": 0.571660041809082, + "learning_rate": 1.3374094328422043e-05, + "loss": 1.4228, + "step": 8915 + }, + { + "epoch": 0.9405063291139241, + "grad_norm": 0.5708393454551697, + "learning_rate": 1.3326934879351272e-05, + "loss": 1.4814, + "step": 8916 + }, + { + "epoch": 0.9406118143459916, + "grad_norm": 0.5589742064476013, + "learning_rate": 1.327985797817094e-05, + "loss": 1.4579, + "step": 8917 + }, + { + "epoch": 0.940717299578059, + "grad_norm": 0.6607460379600525, + "learning_rate": 1.3232863630156077e-05, + "loss": 1.4067, + "step": 8918 + }, + { + "epoch": 0.9408227848101266, + "grad_norm": 0.6065307855606079, + "learning_rate": 1.3185951840572723e-05, + "loss": 1.4358, + "step": 8919 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.5516390800476074, + "learning_rate": 1.313912261467759e-05, + "loss": 1.4112, + "step": 8920 + }, + { + "epoch": 0.9410337552742616, + "grad_norm": 0.5980112552642822, + "learning_rate": 1.3092375957717978e-05, + "loss": 1.4535, + "step": 8921 + }, + { + "epoch": 0.9411392405063291, + "grad_norm": 0.5546872615814209, + "learning_rate": 1.3045711874932281e-05, + "loss": 1.4348, + "step": 8922 + }, + { + "epoch": 0.9412447257383966, + "grad_norm": 0.5496872663497925, + "learning_rate": 1.2999130371549318e-05, + "loss": 1.435, + "step": 8923 + }, + { + "epoch": 0.9413502109704641, + "grad_norm": 0.5631046891212463, + "learning_rate": 1.2952631452788826e-05, + "loss": 1.4339, + "step": 8924 + }, + { + "epoch": 0.9414556962025317, + "grad_norm": 0.5894784331321716, + "learning_rate": 1.2906215123861226e-05, + "loss": 1.4651, + "step": 8925 + }, + { + "epoch": 0.9415611814345991, + "grad_norm": 0.5738328695297241, + "learning_rate": 1.2859881389967687e-05, + "loss": 1.4093, + "step": 8926 + }, + { + "epoch": 0.9416666666666667, + "grad_norm": 0.5488142371177673, + "learning_rate": 1.2813630256300224e-05, + "loss": 1.4536, + "step": 8927 + }, + { + "epoch": 0.9417721518987342, + "grad_norm": 0.566695511341095, + "learning_rate": 1.2767461728041357e-05, + "loss": 1.4562, + "step": 8928 + }, + { + "epoch": 0.9418776371308016, + "grad_norm": 0.5543351769447327, + "learning_rate": 1.2721375810364616e-05, + "loss": 1.407, + "step": 8929 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.5656411051750183, + "learning_rate": 1.267537250843412e-05, + "loss": 1.4174, + "step": 8930 + }, + { + "epoch": 0.9420886075949367, + "grad_norm": 0.5741050243377686, + "learning_rate": 1.2629451827404659e-05, + "loss": 1.4177, + "step": 8931 + }, + { + "epoch": 0.9421940928270042, + "grad_norm": 0.5727308392524719, + "learning_rate": 1.258361377242212e-05, + "loss": 1.4252, + "step": 8932 + }, + { + "epoch": 0.9422995780590717, + "grad_norm": 0.5365914106369019, + "learning_rate": 1.2537858348622728e-05, + "loss": 1.4413, + "step": 8933 + }, + { + "epoch": 0.9424050632911393, + "grad_norm": 0.6033174991607666, + "learning_rate": 1.2492185561133545e-05, + "loss": 1.4571, + "step": 8934 + }, + { + "epoch": 0.9425105485232067, + "grad_norm": 0.5514104962348938, + "learning_rate": 1.2446595415072565e-05, + "loss": 1.4535, + "step": 8935 + }, + { + "epoch": 0.9426160337552743, + "grad_norm": 0.5658470392227173, + "learning_rate": 1.2401087915548365e-05, + "loss": 1.4313, + "step": 8936 + }, + { + "epoch": 0.9427215189873418, + "grad_norm": 0.5963415503501892, + "learning_rate": 1.2355663067660283e-05, + "loss": 1.4469, + "step": 8937 + }, + { + "epoch": 0.9428270042194092, + "grad_norm": 0.5363219976425171, + "learning_rate": 1.2310320876498333e-05, + "loss": 1.4131, + "step": 8938 + }, + { + "epoch": 0.9429324894514768, + "grad_norm": 0.5661920309066772, + "learning_rate": 1.2265061347143447e-05, + "loss": 1.4629, + "step": 8939 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.5572298169136047, + "learning_rate": 1.2219884484667071e-05, + "loss": 1.44, + "step": 8940 + }, + { + "epoch": 0.9431434599156118, + "grad_norm": 0.5511583685874939, + "learning_rate": 1.2174790294131405e-05, + "loss": 1.4092, + "step": 8941 + }, + { + "epoch": 0.9432489451476793, + "grad_norm": 0.5691820979118347, + "learning_rate": 1.2129778780589823e-05, + "loss": 1.4259, + "step": 8942 + }, + { + "epoch": 0.9433544303797469, + "grad_norm": 0.5598124265670776, + "learning_rate": 1.2084849949085791e-05, + "loss": 1.4384, + "step": 8943 + }, + { + "epoch": 0.9434599156118143, + "grad_norm": 0.5887753367424011, + "learning_rate": 1.2040003804653864e-05, + "loss": 1.4692, + "step": 8944 + }, + { + "epoch": 0.9435654008438819, + "grad_norm": 0.5515027046203613, + "learning_rate": 1.199524035231936e-05, + "loss": 1.4232, + "step": 8945 + }, + { + "epoch": 0.9436708860759494, + "grad_norm": 0.5738243460655212, + "learning_rate": 1.195055959709826e-05, + "loss": 1.4442, + "step": 8946 + }, + { + "epoch": 0.9437763713080168, + "grad_norm": 0.5518315434455872, + "learning_rate": 1.1905961543997147e-05, + "loss": 1.4332, + "step": 8947 + }, + { + "epoch": 0.9438818565400844, + "grad_norm": 0.5578014254570007, + "learning_rate": 1.186144619801352e-05, + "loss": 1.4325, + "step": 8948 + }, + { + "epoch": 0.9439873417721519, + "grad_norm": 0.5891954898834229, + "learning_rate": 1.1817013564135475e-05, + "loss": 1.4726, + "step": 8949 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.5484834313392639, + "learning_rate": 1.1772663647341947e-05, + "loss": 1.4427, + "step": 8950 + }, + { + "epoch": 0.9441983122362869, + "grad_norm": 0.5278035402297974, + "learning_rate": 1.1728396452602708e-05, + "loss": 1.4229, + "step": 8951 + }, + { + "epoch": 0.9443037974683545, + "grad_norm": 0.5698649287223816, + "learning_rate": 1.1684211984877957e-05, + "loss": 1.4406, + "step": 8952 + }, + { + "epoch": 0.9444092827004219, + "grad_norm": 0.558456301689148, + "learning_rate": 1.1640110249118818e-05, + "loss": 1.4592, + "step": 8953 + }, + { + "epoch": 0.9445147679324895, + "grad_norm": 0.5476016998291016, + "learning_rate": 1.1596091250267171e-05, + "loss": 1.4333, + "step": 8954 + }, + { + "epoch": 0.944620253164557, + "grad_norm": 0.6032710075378418, + "learning_rate": 1.1552154993255488e-05, + "loss": 1.4754, + "step": 8955 + }, + { + "epoch": 0.9447257383966244, + "grad_norm": 0.5731917023658752, + "learning_rate": 1.1508301483007078e-05, + "loss": 1.4269, + "step": 8956 + }, + { + "epoch": 0.944831223628692, + "grad_norm": 0.5579376220703125, + "learning_rate": 1.1464530724435928e-05, + "loss": 1.4567, + "step": 8957 + }, + { + "epoch": 0.9449367088607595, + "grad_norm": 0.5510133504867554, + "learning_rate": 1.14208427224467e-05, + "loss": 1.4064, + "step": 8958 + }, + { + "epoch": 0.945042194092827, + "grad_norm": 0.5481350421905518, + "learning_rate": 1.137723748193506e-05, + "loss": 1.4165, + "step": 8959 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.5442681908607483, + "learning_rate": 1.1333715007786932e-05, + "loss": 1.4563, + "step": 8960 + }, + { + "epoch": 0.9452531645569621, + "grad_norm": 0.5722687840461731, + "learning_rate": 1.12902753048795e-05, + "loss": 1.421, + "step": 8961 + }, + { + "epoch": 0.9453586497890295, + "grad_norm": 0.53851717710495, + "learning_rate": 1.1246918378080202e-05, + "loss": 1.435, + "step": 8962 + }, + { + "epoch": 0.945464135021097, + "grad_norm": 0.5782626271247864, + "learning_rate": 1.12036442322474e-05, + "loss": 1.4494, + "step": 8963 + }, + { + "epoch": 0.9455696202531646, + "grad_norm": 0.6180676221847534, + "learning_rate": 1.1160452872230303e-05, + "loss": 1.4227, + "step": 8964 + }, + { + "epoch": 0.945675105485232, + "grad_norm": 0.6217833757400513, + "learning_rate": 1.111734430286862e-05, + "loss": 1.4567, + "step": 8965 + }, + { + "epoch": 0.9457805907172996, + "grad_norm": 0.5605331659317017, + "learning_rate": 1.1074318528992905e-05, + "loss": 1.4798, + "step": 8966 + }, + { + "epoch": 0.9458860759493671, + "grad_norm": 0.5664421916007996, + "learning_rate": 1.1031375555424466e-05, + "loss": 1.3997, + "step": 8967 + }, + { + "epoch": 0.9459915611814346, + "grad_norm": 0.6018392443656921, + "learning_rate": 1.0988515386975206e-05, + "loss": 1.4524, + "step": 8968 + }, + { + "epoch": 0.9460970464135021, + "grad_norm": 0.5536914467811584, + "learning_rate": 1.0945738028447783e-05, + "loss": 1.4485, + "step": 8969 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5682963132858276, + "learning_rate": 1.0903043484635694e-05, + "loss": 1.4353, + "step": 8970 + }, + { + "epoch": 0.9463080168776371, + "grad_norm": 0.5882362127304077, + "learning_rate": 1.0860431760323032e-05, + "loss": 1.4508, + "step": 8971 + }, + { + "epoch": 0.9464135021097047, + "grad_norm": 0.5356586575508118, + "learning_rate": 1.0817902860284723e-05, + "loss": 1.416, + "step": 8972 + }, + { + "epoch": 0.9465189873417722, + "grad_norm": 0.5804042220115662, + "learning_rate": 1.0775456789286291e-05, + "loss": 1.3953, + "step": 8973 + }, + { + "epoch": 0.9466244725738396, + "grad_norm": 0.5708609819412231, + "learning_rate": 1.0733093552084016e-05, + "loss": 1.4317, + "step": 8974 + }, + { + "epoch": 0.9467299578059072, + "grad_norm": 0.5751515626907349, + "learning_rate": 1.0690813153425016e-05, + "loss": 1.4631, + "step": 8975 + }, + { + "epoch": 0.9468354430379747, + "grad_norm": 0.5830596685409546, + "learning_rate": 1.0648615598046834e-05, + "loss": 1.4517, + "step": 8976 + }, + { + "epoch": 0.9469409282700422, + "grad_norm": 0.5442797541618347, + "learning_rate": 1.0606500890678023e-05, + "loss": 1.4414, + "step": 8977 + }, + { + "epoch": 0.9470464135021097, + "grad_norm": 0.547875165939331, + "learning_rate": 1.0564469036037722e-05, + "loss": 1.4583, + "step": 8978 + }, + { + "epoch": 0.9471518987341773, + "grad_norm": 0.5689162015914917, + "learning_rate": 1.0522520038835831e-05, + "loss": 1.4065, + "step": 8979 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5911909937858582, + "learning_rate": 1.0480653903772924e-05, + "loss": 1.456, + "step": 8980 + }, + { + "epoch": 0.9473628691983123, + "grad_norm": 0.56947261095047, + "learning_rate": 1.0438870635540332e-05, + "loss": 1.4494, + "step": 8981 + }, + { + "epoch": 0.9474683544303798, + "grad_norm": 0.5727458596229553, + "learning_rate": 1.0397170238820142e-05, + "loss": 1.4303, + "step": 8982 + }, + { + "epoch": 0.9475738396624472, + "grad_norm": 0.5638369917869568, + "learning_rate": 1.0355552718284949e-05, + "loss": 1.4594, + "step": 8983 + }, + { + "epoch": 0.9476793248945148, + "grad_norm": 0.5709636807441711, + "learning_rate": 1.0314018078598275e-05, + "loss": 1.459, + "step": 8984 + }, + { + "epoch": 0.9477848101265823, + "grad_norm": 0.5583937764167786, + "learning_rate": 1.0272566324414313e-05, + "loss": 1.4563, + "step": 8985 + }, + { + "epoch": 0.9478902953586498, + "grad_norm": 0.5386300086975098, + "learning_rate": 1.0231197460377845e-05, + "loss": 1.4431, + "step": 8986 + }, + { + "epoch": 0.9479957805907173, + "grad_norm": 0.5738025903701782, + "learning_rate": 1.0189911491124582e-05, + "loss": 1.4291, + "step": 8987 + }, + { + "epoch": 0.9481012658227848, + "grad_norm": 0.5821279883384705, + "learning_rate": 1.0148708421280822e-05, + "loss": 1.43, + "step": 8988 + }, + { + "epoch": 0.9482067510548523, + "grad_norm": 0.5601074695587158, + "learning_rate": 1.0107588255463373e-05, + "loss": 1.4489, + "step": 8989 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.546881377696991, + "learning_rate": 1.0066550998280132e-05, + "loss": 1.424, + "step": 8990 + }, + { + "epoch": 0.9484177215189873, + "grad_norm": 0.592361330986023, + "learning_rate": 1.0025596654329504e-05, + "loss": 1.4048, + "step": 8991 + }, + { + "epoch": 0.9485232067510548, + "grad_norm": 0.6356260180473328, + "learning_rate": 9.984725228200654e-06, + "loss": 1.4201, + "step": 8992 + }, + { + "epoch": 0.9486286919831224, + "grad_norm": 0.5551126003265381, + "learning_rate": 9.943936724473412e-06, + "loss": 1.4257, + "step": 8993 + }, + { + "epoch": 0.9487341772151898, + "grad_norm": 0.553807258605957, + "learning_rate": 9.903231147718294e-06, + "loss": 1.4434, + "step": 8994 + }, + { + "epoch": 0.9488396624472574, + "grad_norm": 0.5453750491142273, + "learning_rate": 9.862608502496568e-06, + "loss": 1.4899, + "step": 8995 + }, + { + "epoch": 0.9489451476793249, + "grad_norm": 0.5477018356323242, + "learning_rate": 9.822068793360172e-06, + "loss": 1.4229, + "step": 8996 + }, + { + "epoch": 0.9490506329113924, + "grad_norm": 0.5483537316322327, + "learning_rate": 9.781612024851893e-06, + "loss": 1.4366, + "step": 8997 + }, + { + "epoch": 0.9491561181434599, + "grad_norm": 0.5326668620109558, + "learning_rate": 9.74123820150502e-06, + "loss": 1.4597, + "step": 8998 + }, + { + "epoch": 0.9492616033755275, + "grad_norm": 0.6080837249755859, + "learning_rate": 9.700947327843685e-06, + "loss": 1.4377, + "step": 8999 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.5486772656440735, + "learning_rate": 9.660739408382608e-06, + "loss": 1.4188, + "step": 9000 + }, + { + "epoch": 0.9494725738396624, + "grad_norm": 0.5505321621894836, + "learning_rate": 9.620614447627435e-06, + "loss": 1.444, + "step": 9001 + }, + { + "epoch": 0.94957805907173, + "grad_norm": 0.5606387853622437, + "learning_rate": 9.580572450074237e-06, + "loss": 1.4424, + "step": 9002 + }, + { + "epoch": 0.9496835443037974, + "grad_norm": 0.5822574496269226, + "learning_rate": 9.540613420209927e-06, + "loss": 1.4567, + "step": 9003 + }, + { + "epoch": 0.949789029535865, + "grad_norm": 0.5650441646575928, + "learning_rate": 9.500737362512168e-06, + "loss": 1.4113, + "step": 9004 + }, + { + "epoch": 0.9498945147679325, + "grad_norm": 0.555400550365448, + "learning_rate": 9.460944281449307e-06, + "loss": 1.3967, + "step": 9005 + }, + { + "epoch": 0.95, + "grad_norm": 0.5805560946464539, + "learning_rate": 9.421234181480275e-06, + "loss": 1.4363, + "step": 9006 + }, + { + "epoch": 0.9501054852320675, + "grad_norm": 0.5716471672058105, + "learning_rate": 9.381607067054764e-06, + "loss": 1.4609, + "step": 9007 + }, + { + "epoch": 0.950210970464135, + "grad_norm": 0.5851796865463257, + "learning_rate": 9.342062942613222e-06, + "loss": 1.4592, + "step": 9008 + }, + { + "epoch": 0.9503164556962025, + "grad_norm": 0.5499102473258972, + "learning_rate": 9.302601812586852e-06, + "loss": 1.4196, + "step": 9009 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.5840516686439514, + "learning_rate": 9.26322368139737e-06, + "loss": 1.4311, + "step": 9010 + }, + { + "epoch": 0.9505274261603376, + "grad_norm": 0.5834660530090332, + "learning_rate": 9.223928553457328e-06, + "loss": 1.4063, + "step": 9011 + }, + { + "epoch": 0.950632911392405, + "grad_norm": 0.5961766839027405, + "learning_rate": 9.184716433169955e-06, + "loss": 1.4494, + "step": 9012 + }, + { + "epoch": 0.9507383966244726, + "grad_norm": 0.566660463809967, + "learning_rate": 9.145587324929066e-06, + "loss": 1.451, + "step": 9013 + }, + { + "epoch": 0.9508438818565401, + "grad_norm": 0.6125710010528564, + "learning_rate": 9.106541233119409e-06, + "loss": 1.4373, + "step": 9014 + }, + { + "epoch": 0.9509493670886076, + "grad_norm": 0.6266485452651978, + "learning_rate": 9.06757816211623e-06, + "loss": 1.432, + "step": 9015 + }, + { + "epoch": 0.9510548523206751, + "grad_norm": 0.5632026791572571, + "learning_rate": 9.028698116285538e-06, + "loss": 1.3874, + "step": 9016 + }, + { + "epoch": 0.9511603375527427, + "grad_norm": 0.5500940084457397, + "learning_rate": 8.989901099984016e-06, + "loss": 1.4149, + "step": 9017 + }, + { + "epoch": 0.9512658227848101, + "grad_norm": 0.5711085796356201, + "learning_rate": 8.9511871175591e-06, + "loss": 1.4475, + "step": 9018 + }, + { + "epoch": 0.9513713080168776, + "grad_norm": 0.5353922843933105, + "learning_rate": 8.912556173348907e-06, + "loss": 1.4389, + "step": 9019 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.5609308481216431, + "learning_rate": 8.874008271682222e-06, + "loss": 1.4686, + "step": 9020 + }, + { + "epoch": 0.9515822784810126, + "grad_norm": 0.567702054977417, + "learning_rate": 8.835543416878422e-06, + "loss": 1.4471, + "step": 9021 + }, + { + "epoch": 0.9516877637130802, + "grad_norm": 0.584586501121521, + "learning_rate": 8.797161613247728e-06, + "loss": 1.4859, + "step": 9022 + }, + { + "epoch": 0.9517932489451477, + "grad_norm": 0.54807448387146, + "learning_rate": 8.758862865091117e-06, + "loss": 1.4676, + "step": 9023 + }, + { + "epoch": 0.9518987341772152, + "grad_norm": 0.5670713782310486, + "learning_rate": 8.72064717670007e-06, + "loss": 1.4182, + "step": 9024 + }, + { + "epoch": 0.9520042194092827, + "grad_norm": 0.5688088536262512, + "learning_rate": 8.68251455235683e-06, + "loss": 1.4584, + "step": 9025 + }, + { + "epoch": 0.9521097046413503, + "grad_norm": 0.5787815451622009, + "learning_rate": 8.644464996334395e-06, + "loss": 1.4977, + "step": 9026 + }, + { + "epoch": 0.9522151898734177, + "grad_norm": 0.5797404646873474, + "learning_rate": 8.606498512896438e-06, + "loss": 1.4287, + "step": 9027 + }, + { + "epoch": 0.9523206751054852, + "grad_norm": 0.5552310943603516, + "learning_rate": 8.568615106297223e-06, + "loss": 1.4423, + "step": 9028 + }, + { + "epoch": 0.9524261603375528, + "grad_norm": 0.5614289045333862, + "learning_rate": 8.53081478078177e-06, + "loss": 1.4747, + "step": 9029 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.5386683344841003, + "learning_rate": 8.493097540585775e-06, + "loss": 1.4283, + "step": 9030 + }, + { + "epoch": 0.9526371308016878, + "grad_norm": 0.5562446713447571, + "learning_rate": 8.455463389935774e-06, + "loss": 1.3951, + "step": 9031 + }, + { + "epoch": 0.9527426160337553, + "grad_norm": 0.5683950185775757, + "learning_rate": 8.417912333048727e-06, + "loss": 1.4622, + "step": 9032 + }, + { + "epoch": 0.9528481012658228, + "grad_norm": 0.5838422775268555, + "learning_rate": 8.380444374132517e-06, + "loss": 1.4515, + "step": 9033 + }, + { + "epoch": 0.9529535864978903, + "grad_norm": 0.5558241009712219, + "learning_rate": 8.343059517385454e-06, + "loss": 1.4664, + "step": 9034 + }, + { + "epoch": 0.9530590717299579, + "grad_norm": 0.5576637983322144, + "learning_rate": 8.305757766996935e-06, + "loss": 1.4113, + "step": 9035 + }, + { + "epoch": 0.9531645569620253, + "grad_norm": 0.5657212138175964, + "learning_rate": 8.268539127146619e-06, + "loss": 1.4014, + "step": 9036 + }, + { + "epoch": 0.9532700421940928, + "grad_norm": 0.539736270904541, + "learning_rate": 8.231403602005083e-06, + "loss": 1.3913, + "step": 9037 + }, + { + "epoch": 0.9533755274261604, + "grad_norm": 0.5624712109565735, + "learning_rate": 8.194351195733585e-06, + "loss": 1.4333, + "step": 9038 + }, + { + "epoch": 0.9534810126582278, + "grad_norm": 0.5975872874259949, + "learning_rate": 8.157381912484053e-06, + "loss": 1.4261, + "step": 9039 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.5710906386375427, + "learning_rate": 8.120495756399005e-06, + "loss": 1.4487, + "step": 9040 + }, + { + "epoch": 0.9536919831223629, + "grad_norm": 0.5558106899261475, + "learning_rate": 8.08369273161172e-06, + "loss": 1.4512, + "step": 9041 + }, + { + "epoch": 0.9537974683544304, + "grad_norm": 0.5590314865112305, + "learning_rate": 8.046972842246147e-06, + "loss": 1.452, + "step": 9042 + }, + { + "epoch": 0.9539029535864979, + "grad_norm": 0.543974757194519, + "learning_rate": 8.01033609241708e-06, + "loss": 1.4127, + "step": 9043 + }, + { + "epoch": 0.9540084388185655, + "grad_norm": 0.550612211227417, + "learning_rate": 7.973782486229737e-06, + "loss": 1.4357, + "step": 9044 + }, + { + "epoch": 0.9541139240506329, + "grad_norm": 0.5488320589065552, + "learning_rate": 7.937312027780169e-06, + "loss": 1.4514, + "step": 9045 + }, + { + "epoch": 0.9542194092827004, + "grad_norm": 0.5663657784461975, + "learning_rate": 7.900924721154945e-06, + "loss": 1.4279, + "step": 9046 + }, + { + "epoch": 0.954324894514768, + "grad_norm": 0.5575916767120361, + "learning_rate": 7.864620570431635e-06, + "loss": 1.4907, + "step": 9047 + }, + { + "epoch": 0.9544303797468354, + "grad_norm": 0.5334890484809875, + "learning_rate": 7.828399579678153e-06, + "loss": 1.4211, + "step": 9048 + }, + { + "epoch": 0.954535864978903, + "grad_norm": 0.5493040680885315, + "learning_rate": 7.792261752953333e-06, + "loss": 1.4302, + "step": 9049 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.5698176026344299, + "learning_rate": 7.756207094306605e-06, + "loss": 1.4341, + "step": 9050 + }, + { + "epoch": 0.954746835443038, + "grad_norm": 0.5503990650177002, + "learning_rate": 7.720235607777987e-06, + "loss": 1.4587, + "step": 9051 + }, + { + "epoch": 0.9548523206751055, + "grad_norm": 0.5320939421653748, + "learning_rate": 7.684347297398254e-06, + "loss": 1.4397, + "step": 9052 + }, + { + "epoch": 0.9549578059071729, + "grad_norm": 0.552691638469696, + "learning_rate": 7.648542167189021e-06, + "loss": 1.4321, + "step": 9053 + }, + { + "epoch": 0.9550632911392405, + "grad_norm": 0.5560362339019775, + "learning_rate": 7.612820221162331e-06, + "loss": 1.4606, + "step": 9054 + }, + { + "epoch": 0.955168776371308, + "grad_norm": 0.5245590806007385, + "learning_rate": 7.577181463320981e-06, + "loss": 1.4233, + "step": 9055 + }, + { + "epoch": 0.9552742616033755, + "grad_norm": 0.5619078278541565, + "learning_rate": 7.541625897658444e-06, + "loss": 1.4633, + "step": 9056 + }, + { + "epoch": 0.955379746835443, + "grad_norm": 0.5529354810714722, + "learning_rate": 7.506153528159032e-06, + "loss": 1.4334, + "step": 9057 + }, + { + "epoch": 0.9554852320675106, + "grad_norm": 0.5506321787834167, + "learning_rate": 7.470764358797566e-06, + "loss": 1.4549, + "step": 9058 + }, + { + "epoch": 0.955590717299578, + "grad_norm": 0.577087938785553, + "learning_rate": 7.435458393539457e-06, + "loss": 1.4239, + "step": 9059 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.5582069158554077, + "learning_rate": 7.400235636340957e-06, + "loss": 1.4331, + "step": 9060 + }, + { + "epoch": 0.9558016877637131, + "grad_norm": 0.5503984689712524, + "learning_rate": 7.3650960911490764e-06, + "loss": 1.4639, + "step": 9061 + }, + { + "epoch": 0.9559071729957805, + "grad_norm": 0.5538519620895386, + "learning_rate": 7.330039761901247e-06, + "loss": 1.4548, + "step": 9062 + }, + { + "epoch": 0.9560126582278481, + "grad_norm": 0.5471664071083069, + "learning_rate": 7.295066652525828e-06, + "loss": 1.4472, + "step": 9063 + }, + { + "epoch": 0.9561181434599156, + "grad_norm": 0.5593015551567078, + "learning_rate": 7.260176766941601e-06, + "loss": 1.4332, + "step": 9064 + }, + { + "epoch": 0.9562236286919831, + "grad_norm": 0.5672776699066162, + "learning_rate": 7.225370109058188e-06, + "loss": 1.4685, + "step": 9065 + }, + { + "epoch": 0.9563291139240506, + "grad_norm": 0.563910722732544, + "learning_rate": 7.190646682775886e-06, + "loss": 1.4259, + "step": 9066 + }, + { + "epoch": 0.9564345991561182, + "grad_norm": 0.5654290914535522, + "learning_rate": 7.1560064919855835e-06, + "loss": 1.4641, + "step": 9067 + }, + { + "epoch": 0.9565400843881856, + "grad_norm": 0.5530804395675659, + "learning_rate": 7.121449540568842e-06, + "loss": 1.4315, + "step": 9068 + }, + { + "epoch": 0.9566455696202532, + "grad_norm": 0.5822427868843079, + "learning_rate": 7.086975832398146e-06, + "loss": 1.4362, + "step": 9069 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.561560332775116, + "learning_rate": 7.0525853713362395e-06, + "loss": 1.4113, + "step": 9070 + }, + { + "epoch": 0.9568565400843881, + "grad_norm": 0.5514039397239685, + "learning_rate": 7.018278161236791e-06, + "loss": 1.4745, + "step": 9071 + }, + { + "epoch": 0.9569620253164557, + "grad_norm": 0.5710620880126953, + "learning_rate": 6.984054205944141e-06, + "loss": 1.4165, + "step": 9072 + }, + { + "epoch": 0.9570675105485232, + "grad_norm": 0.5412943959236145, + "learning_rate": 6.949913509293221e-06, + "loss": 1.4591, + "step": 9073 + }, + { + "epoch": 0.9571729957805907, + "grad_norm": 0.5732518434524536, + "learning_rate": 6.915856075109722e-06, + "loss": 1.4585, + "step": 9074 + }, + { + "epoch": 0.9572784810126582, + "grad_norm": 0.5392240881919861, + "learning_rate": 6.881881907209841e-06, + "loss": 1.4277, + "step": 9075 + }, + { + "epoch": 0.9573839662447258, + "grad_norm": 0.5534999370574951, + "learning_rate": 6.847991009400617e-06, + "loss": 1.4066, + "step": 9076 + }, + { + "epoch": 0.9574894514767932, + "grad_norm": 0.5757794380187988, + "learning_rate": 6.814183385479677e-06, + "loss": 1.4645, + "step": 9077 + }, + { + "epoch": 0.9575949367088608, + "grad_norm": 0.5383061766624451, + "learning_rate": 6.780459039235409e-06, + "loss": 1.4633, + "step": 9078 + }, + { + "epoch": 0.9577004219409283, + "grad_norm": 0.582493007183075, + "learning_rate": 6.746817974446706e-06, + "loss": 1.3961, + "step": 9079 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.5611474514007568, + "learning_rate": 6.71326019488322e-06, + "loss": 1.4628, + "step": 9080 + }, + { + "epoch": 0.9579113924050633, + "grad_norm": 0.5893315076828003, + "learning_rate": 6.679785704305358e-06, + "loss": 1.4597, + "step": 9081 + }, + { + "epoch": 0.9580168776371308, + "grad_norm": 0.5712660551071167, + "learning_rate": 6.6463945064639544e-06, + "loss": 1.4525, + "step": 9082 + }, + { + "epoch": 0.9581223628691983, + "grad_norm": 0.550230860710144, + "learning_rate": 6.6130866051007654e-06, + "loss": 1.4445, + "step": 9083 + }, + { + "epoch": 0.9582278481012658, + "grad_norm": 0.5469131469726562, + "learning_rate": 6.57986200394814e-06, + "loss": 1.4343, + "step": 9084 + }, + { + "epoch": 0.9583333333333334, + "grad_norm": 0.5634616613388062, + "learning_rate": 6.546720706728931e-06, + "loss": 1.4139, + "step": 9085 + }, + { + "epoch": 0.9584388185654008, + "grad_norm": 0.5796707272529602, + "learning_rate": 6.513662717156838e-06, + "loss": 1.4374, + "step": 9086 + }, + { + "epoch": 0.9585443037974684, + "grad_norm": 0.6042613387107849, + "learning_rate": 6.480688038936311e-06, + "loss": 1.4291, + "step": 9087 + }, + { + "epoch": 0.9586497890295359, + "grad_norm": 0.5732163190841675, + "learning_rate": 6.447796675762146e-06, + "loss": 1.4553, + "step": 9088 + }, + { + "epoch": 0.9587552742616033, + "grad_norm": 0.5711933970451355, + "learning_rate": 6.414988631320062e-06, + "loss": 1.4441, + "step": 9089 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.5407671928405762, + "learning_rate": 6.3822639092862846e-06, + "loss": 1.4708, + "step": 9090 + }, + { + "epoch": 0.9589662447257384, + "grad_norm": 0.5405071973800659, + "learning_rate": 6.349622513327963e-06, + "loss": 1.4066, + "step": 9091 + }, + { + "epoch": 0.9590717299578059, + "grad_norm": 0.5589068531990051, + "learning_rate": 6.317064447102505e-06, + "loss": 1.4226, + "step": 9092 + }, + { + "epoch": 0.9591772151898734, + "grad_norm": 0.5596954822540283, + "learning_rate": 6.28458971425841e-06, + "loss": 1.4364, + "step": 9093 + }, + { + "epoch": 0.959282700421941, + "grad_norm": 0.5923652052879333, + "learning_rate": 6.252198318434432e-06, + "loss": 1.4032, + "step": 9094 + }, + { + "epoch": 0.9593881856540084, + "grad_norm": 0.5769174098968506, + "learning_rate": 6.219890263260336e-06, + "loss": 1.4125, + "step": 9095 + }, + { + "epoch": 0.959493670886076, + "grad_norm": 0.5891849398612976, + "learning_rate": 6.187665552356392e-06, + "loss": 1.4551, + "step": 9096 + }, + { + "epoch": 0.9595991561181435, + "grad_norm": 0.6016006469726562, + "learning_rate": 6.155524189333461e-06, + "loss": 1.4666, + "step": 9097 + }, + { + "epoch": 0.9597046413502109, + "grad_norm": 0.557330310344696, + "learning_rate": 6.123466177793247e-06, + "loss": 1.3982, + "step": 9098 + }, + { + "epoch": 0.9598101265822785, + "grad_norm": 0.5679131746292114, + "learning_rate": 6.091491521327958e-06, + "loss": 1.3808, + "step": 9099 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.5601481199264526, + "learning_rate": 6.059600223520478e-06, + "loss": 1.4297, + "step": 9100 + }, + { + "epoch": 0.9600210970464135, + "grad_norm": 0.5783832669258118, + "learning_rate": 6.027792287944367e-06, + "loss": 1.4568, + "step": 9101 + }, + { + "epoch": 0.960126582278481, + "grad_norm": 0.5776066780090332, + "learning_rate": 5.996067718163939e-06, + "loss": 1.4185, + "step": 9102 + }, + { + "epoch": 0.9602320675105486, + "grad_norm": 0.5445459485054016, + "learning_rate": 5.964426517734101e-06, + "loss": 1.4211, + "step": 9103 + }, + { + "epoch": 0.960337552742616, + "grad_norm": 0.5470234155654907, + "learning_rate": 5.932868690200266e-06, + "loss": 1.4421, + "step": 9104 + }, + { + "epoch": 0.9604430379746836, + "grad_norm": 0.569744884967804, + "learning_rate": 5.901394239098856e-06, + "loss": 1.4353, + "step": 9105 + }, + { + "epoch": 0.9605485232067511, + "grad_norm": 0.5380504727363586, + "learning_rate": 5.870003167956634e-06, + "loss": 1.4487, + "step": 9106 + }, + { + "epoch": 0.9606540084388185, + "grad_norm": 0.5508010983467102, + "learning_rate": 5.838695480291034e-06, + "loss": 1.4289, + "step": 9107 + }, + { + "epoch": 0.9607594936708861, + "grad_norm": 0.5315438508987427, + "learning_rate": 5.807471179610418e-06, + "loss": 1.4165, + "step": 9108 + }, + { + "epoch": 0.9608649789029536, + "grad_norm": 0.5651118159294128, + "learning_rate": 5.776330269413488e-06, + "loss": 1.4645, + "step": 9109 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.5613304972648621, + "learning_rate": 5.745272753189784e-06, + "loss": 1.4191, + "step": 9110 + }, + { + "epoch": 0.9610759493670886, + "grad_norm": 0.5531914830207825, + "learning_rate": 5.714298634419524e-06, + "loss": 1.4361, + "step": 9111 + }, + { + "epoch": 0.9611814345991562, + "grad_norm": 0.5866995453834534, + "learning_rate": 5.6834079165733464e-06, + "loss": 1.4469, + "step": 9112 + }, + { + "epoch": 0.9612869198312236, + "grad_norm": 0.5574207305908203, + "learning_rate": 5.652600603112818e-06, + "loss": 1.4251, + "step": 9113 + }, + { + "epoch": 0.9613924050632912, + "grad_norm": 0.5696583390235901, + "learning_rate": 5.6218766974900915e-06, + "loss": 1.4376, + "step": 9114 + }, + { + "epoch": 0.9614978902953587, + "grad_norm": 0.5579078793525696, + "learning_rate": 5.591236203147915e-06, + "loss": 1.4442, + "step": 9115 + }, + { + "epoch": 0.9616033755274261, + "grad_norm": 0.5584235191345215, + "learning_rate": 5.560679123519624e-06, + "loss": 1.4846, + "step": 9116 + }, + { + "epoch": 0.9617088607594937, + "grad_norm": 0.5583207607269287, + "learning_rate": 5.530205462029314e-06, + "loss": 1.4454, + "step": 9117 + }, + { + "epoch": 0.9618143459915611, + "grad_norm": 0.5527426600456238, + "learning_rate": 5.499815222091836e-06, + "loss": 1.4841, + "step": 9118 + }, + { + "epoch": 0.9619198312236287, + "grad_norm": 0.5548452138900757, + "learning_rate": 5.469508407112467e-06, + "loss": 1.4574, + "step": 9119 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.5879027247428894, + "learning_rate": 5.439285020487156e-06, + "loss": 1.3845, + "step": 9120 + }, + { + "epoch": 0.9621308016877637, + "grad_norm": 0.6026490926742554, + "learning_rate": 5.409145065602694e-06, + "loss": 1.4464, + "step": 9121 + }, + { + "epoch": 0.9622362869198312, + "grad_norm": 0.5784085988998413, + "learning_rate": 5.379088545836464e-06, + "loss": 1.4132, + "step": 9122 + }, + { + "epoch": 0.9623417721518988, + "grad_norm": 0.5801171064376831, + "learning_rate": 5.349115464556354e-06, + "loss": 1.4418, + "step": 9123 + }, + { + "epoch": 0.9624472573839662, + "grad_norm": 0.5387450456619263, + "learning_rate": 5.319225825120927e-06, + "loss": 1.4157, + "step": 9124 + }, + { + "epoch": 0.9625527426160337, + "grad_norm": 0.5471797585487366, + "learning_rate": 5.289419630879672e-06, + "loss": 1.415, + "step": 9125 + }, + { + "epoch": 0.9626582278481013, + "grad_norm": 0.5529056787490845, + "learning_rate": 5.2596968851724155e-06, + "loss": 1.4096, + "step": 9126 + }, + { + "epoch": 0.9627637130801687, + "grad_norm": 0.5588645935058594, + "learning_rate": 5.230057591329662e-06, + "loss": 1.4445, + "step": 9127 + }, + { + "epoch": 0.9628691983122363, + "grad_norm": 0.5448578596115112, + "learning_rate": 5.200501752672754e-06, + "loss": 1.4069, + "step": 9128 + }, + { + "epoch": 0.9629746835443038, + "grad_norm": 0.567242443561554, + "learning_rate": 5.171029372513458e-06, + "loss": 1.3994, + "step": 9129 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.5735203623771667, + "learning_rate": 5.141640454154467e-06, + "loss": 1.5062, + "step": 9130 + }, + { + "epoch": 0.9631856540084388, + "grad_norm": 0.5531099438667297, + "learning_rate": 5.112335000888813e-06, + "loss": 1.4531, + "step": 9131 + }, + { + "epoch": 0.9632911392405064, + "grad_norm": 0.5541353821754456, + "learning_rate": 5.083113016000368e-06, + "loss": 1.478, + "step": 9132 + }, + { + "epoch": 0.9633966244725738, + "grad_norm": 0.5902933478355408, + "learning_rate": 5.053974502763681e-06, + "loss": 1.492, + "step": 9133 + }, + { + "epoch": 0.9635021097046413, + "grad_norm": 0.5730743408203125, + "learning_rate": 5.024919464443723e-06, + "loss": 1.4112, + "step": 9134 + }, + { + "epoch": 0.9636075949367089, + "grad_norm": 0.5825031995773315, + "learning_rate": 4.995947904296305e-06, + "loss": 1.425, + "step": 9135 + }, + { + "epoch": 0.9637130801687763, + "grad_norm": 0.5636284351348877, + "learning_rate": 4.967059825567832e-06, + "loss": 1.4457, + "step": 9136 + }, + { + "epoch": 0.9638185654008439, + "grad_norm": 0.5528061985969543, + "learning_rate": 4.938255231495464e-06, + "loss": 1.4456, + "step": 9137 + }, + { + "epoch": 0.9639240506329114, + "grad_norm": 0.5753851532936096, + "learning_rate": 4.909534125306702e-06, + "loss": 1.4537, + "step": 9138 + }, + { + "epoch": 0.9640295358649789, + "grad_norm": 0.5515367388725281, + "learning_rate": 4.880896510220056e-06, + "loss": 1.4493, + "step": 9139 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.5822206735610962, + "learning_rate": 4.852342389444458e-06, + "loss": 1.4468, + "step": 9140 + }, + { + "epoch": 0.964240506329114, + "grad_norm": 0.6771122813224792, + "learning_rate": 4.823871766179516e-06, + "loss": 1.4181, + "step": 9141 + }, + { + "epoch": 0.9643459915611814, + "grad_norm": 0.5620704293251038, + "learning_rate": 4.7954846436155104e-06, + "loss": 1.429, + "step": 9142 + }, + { + "epoch": 0.9644514767932489, + "grad_norm": 0.5795155167579651, + "learning_rate": 4.767181024933398e-06, + "loss": 1.4446, + "step": 9143 + }, + { + "epoch": 0.9645569620253165, + "grad_norm": 0.547041118144989, + "learning_rate": 4.738960913304724e-06, + "loss": 1.4435, + "step": 9144 + }, + { + "epoch": 0.9646624472573839, + "grad_norm": 0.5276931524276733, + "learning_rate": 4.710824311891709e-06, + "loss": 1.4295, + "step": 9145 + }, + { + "epoch": 0.9647679324894515, + "grad_norm": 0.5550037026405334, + "learning_rate": 4.682771223847166e-06, + "loss": 1.4522, + "step": 9146 + }, + { + "epoch": 0.964873417721519, + "grad_norm": 0.5796090960502625, + "learning_rate": 4.654801652314577e-06, + "loss": 1.4576, + "step": 9147 + }, + { + "epoch": 0.9649789029535865, + "grad_norm": 0.5726295709609985, + "learning_rate": 4.626915600428105e-06, + "loss": 1.4234, + "step": 9148 + }, + { + "epoch": 0.965084388185654, + "grad_norm": 0.5448333024978638, + "learning_rate": 4.5991130713124995e-06, + "loss": 1.4484, + "step": 9149 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.5702046155929565, + "learning_rate": 4.571394068083185e-06, + "loss": 1.4251, + "step": 9150 + }, + { + "epoch": 0.965295358649789, + "grad_norm": 0.5762125849723816, + "learning_rate": 4.543758593846175e-06, + "loss": 1.4056, + "step": 9151 + }, + { + "epoch": 0.9654008438818565, + "grad_norm": 0.5692216157913208, + "learning_rate": 4.516206651698246e-06, + "loss": 1.4631, + "step": 9152 + }, + { + "epoch": 0.9655063291139241, + "grad_norm": 0.5859667658805847, + "learning_rate": 4.488738244726593e-06, + "loss": 1.4558, + "step": 9153 + }, + { + "epoch": 0.9656118143459915, + "grad_norm": 0.5493971705436707, + "learning_rate": 4.4613533760093365e-06, + "loss": 1.4525, + "step": 9154 + }, + { + "epoch": 0.9657172995780591, + "grad_norm": 0.5608270764350891, + "learning_rate": 4.434052048615022e-06, + "loss": 1.4558, + "step": 9155 + }, + { + "epoch": 0.9658227848101266, + "grad_norm": 0.5644533634185791, + "learning_rate": 4.4068342656028715e-06, + "loss": 1.4308, + "step": 9156 + }, + { + "epoch": 0.9659282700421941, + "grad_norm": 0.6233173608779907, + "learning_rate": 4.37970003002286e-06, + "loss": 1.4303, + "step": 9157 + }, + { + "epoch": 0.9660337552742616, + "grad_norm": 0.5654410719871521, + "learning_rate": 4.352649344915471e-06, + "loss": 1.4655, + "step": 9158 + }, + { + "epoch": 0.9661392405063292, + "grad_norm": 0.5424168705940247, + "learning_rate": 4.325682213311782e-06, + "loss": 1.4202, + "step": 9159 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.5731205940246582, + "learning_rate": 4.298798638233709e-06, + "loss": 1.4579, + "step": 9160 + }, + { + "epoch": 0.9663502109704641, + "grad_norm": 0.5525704026222229, + "learning_rate": 4.271998622693674e-06, + "loss": 1.4011, + "step": 9161 + }, + { + "epoch": 0.9664556962025317, + "grad_norm": 0.5519427061080933, + "learning_rate": 4.245282169694692e-06, + "loss": 1.428, + "step": 9162 + }, + { + "epoch": 0.9665611814345991, + "grad_norm": 0.5628721117973328, + "learning_rate": 4.218649282230536e-06, + "loss": 1.4065, + "step": 9163 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 0.5880379676818848, + "learning_rate": 4.192099963285484e-06, + "loss": 1.4744, + "step": 9164 + }, + { + "epoch": 0.9667721518987342, + "grad_norm": 0.600988507270813, + "learning_rate": 4.165634215834574e-06, + "loss": 1.4801, + "step": 9165 + }, + { + "epoch": 0.9668776371308017, + "grad_norm": 0.6244564652442932, + "learning_rate": 4.139252042843517e-06, + "loss": 1.4598, + "step": 9166 + }, + { + "epoch": 0.9669831223628692, + "grad_norm": 0.5411165356636047, + "learning_rate": 4.112953447268364e-06, + "loss": 1.4483, + "step": 9167 + }, + { + "epoch": 0.9670886075949368, + "grad_norm": 0.5753564238548279, + "learning_rate": 4.086738432056092e-06, + "loss": 1.4374, + "step": 9168 + }, + { + "epoch": 0.9671940928270042, + "grad_norm": 0.5682023763656616, + "learning_rate": 4.060607000144351e-06, + "loss": 1.4656, + "step": 9169 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.5911415219306946, + "learning_rate": 4.034559154461049e-06, + "loss": 1.4011, + "step": 9170 + }, + { + "epoch": 0.9674050632911393, + "grad_norm": 0.5682186484336853, + "learning_rate": 4.008594897925183e-06, + "loss": 1.3918, + "step": 9171 + }, + { + "epoch": 0.9675105485232067, + "grad_norm": 0.5741108059883118, + "learning_rate": 3.982714233446094e-06, + "loss": 1.4813, + "step": 9172 + }, + { + "epoch": 0.9676160337552743, + "grad_norm": 0.5351475477218628, + "learning_rate": 3.956917163923879e-06, + "loss": 1.4567, + "step": 9173 + }, + { + "epoch": 0.9677215189873418, + "grad_norm": 0.5635825395584106, + "learning_rate": 3.931203692249141e-06, + "loss": 1.4375, + "step": 9174 + }, + { + "epoch": 0.9678270042194093, + "grad_norm": 0.5808190107345581, + "learning_rate": 3.905573821303327e-06, + "loss": 1.4293, + "step": 9175 + }, + { + "epoch": 0.9679324894514768, + "grad_norm": 0.578771710395813, + "learning_rate": 3.880027553958304e-06, + "loss": 1.4172, + "step": 9176 + }, + { + "epoch": 0.9680379746835444, + "grad_norm": 0.5545065402984619, + "learning_rate": 3.8545648930767005e-06, + "loss": 1.4371, + "step": 9177 + }, + { + "epoch": 0.9681434599156118, + "grad_norm": 0.5860899090766907, + "learning_rate": 3.8291858415117344e-06, + "loss": 1.4555, + "step": 9178 + }, + { + "epoch": 0.9682489451476793, + "grad_norm": 0.5586036443710327, + "learning_rate": 3.803890402107213e-06, + "loss": 1.441, + "step": 9179 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.5457783341407776, + "learning_rate": 3.7786785776976198e-06, + "loss": 1.4064, + "step": 9180 + }, + { + "epoch": 0.9684599156118143, + "grad_norm": 0.6103423237800598, + "learning_rate": 3.7535503711080276e-06, + "loss": 1.4489, + "step": 9181 + }, + { + "epoch": 0.9685654008438819, + "grad_norm": 0.5588188171386719, + "learning_rate": 3.7285057851543515e-06, + "loss": 1.4301, + "step": 9182 + }, + { + "epoch": 0.9686708860759494, + "grad_norm": 0.5592346787452698, + "learning_rate": 3.703544822642846e-06, + "loss": 1.3613, + "step": 9183 + }, + { + "epoch": 0.9687763713080169, + "grad_norm": 0.5938290953636169, + "learning_rate": 3.6786674863704406e-06, + "loss": 1.4501, + "step": 9184 + }, + { + "epoch": 0.9688818565400844, + "grad_norm": 0.5946541428565979, + "learning_rate": 3.6538737791249053e-06, + "loss": 1.4489, + "step": 9185 + }, + { + "epoch": 0.9689873417721518, + "grad_norm": 0.5524193644523621, + "learning_rate": 3.629163703684352e-06, + "loss": 1.4291, + "step": 9186 + }, + { + "epoch": 0.9690928270042194, + "grad_norm": 0.5497533082962036, + "learning_rate": 3.604537262817814e-06, + "loss": 1.4465, + "step": 9187 + }, + { + "epoch": 0.9691983122362869, + "grad_norm": 0.5776081085205078, + "learning_rate": 3.579994459284752e-06, + "loss": 1.4479, + "step": 9188 + }, + { + "epoch": 0.9693037974683544, + "grad_norm": 0.5612188577651978, + "learning_rate": 3.555535295835216e-06, + "loss": 1.4558, + "step": 9189 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.5474337339401245, + "learning_rate": 3.5311597752100964e-06, + "loss": 1.4138, + "step": 9190 + }, + { + "epoch": 0.9695147679324895, + "grad_norm": 0.5664859414100647, + "learning_rate": 3.506867900140792e-06, + "loss": 1.4787, + "step": 9191 + }, + { + "epoch": 0.9696202531645569, + "grad_norm": 0.5542622208595276, + "learning_rate": 3.4826596733492087e-06, + "loss": 1.4314, + "step": 9192 + }, + { + "epoch": 0.9697257383966245, + "grad_norm": 0.5803922414779663, + "learning_rate": 3.4585350975481766e-06, + "loss": 1.4121, + "step": 9193 + }, + { + "epoch": 0.969831223628692, + "grad_norm": 0.5668203234672546, + "learning_rate": 3.4344941754408663e-06, + "loss": 1.4496, + "step": 9194 + }, + { + "epoch": 0.9699367088607594, + "grad_norm": 0.5784651637077332, + "learning_rate": 3.4105369097211238e-06, + "loss": 1.4233, + "step": 9195 + }, + { + "epoch": 0.970042194092827, + "grad_norm": 0.5616422295570374, + "learning_rate": 3.386663303073634e-06, + "loss": 1.4446, + "step": 9196 + }, + { + "epoch": 0.9701476793248945, + "grad_norm": 0.566922128200531, + "learning_rate": 3.362873358173424e-06, + "loss": 1.4453, + "step": 9197 + }, + { + "epoch": 0.970253164556962, + "grad_norm": 0.559252142906189, + "learning_rate": 3.339167077686278e-06, + "loss": 1.4054, + "step": 9198 + }, + { + "epoch": 0.9703586497890295, + "grad_norm": 0.5816913843154907, + "learning_rate": 3.3155444642687384e-06, + "loss": 1.4696, + "step": 9199 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.5716800689697266, + "learning_rate": 3.2920055205676867e-06, + "loss": 1.407, + "step": 9200 + }, + { + "epoch": 0.9705696202531645, + "grad_norm": 0.5815785527229309, + "learning_rate": 3.2685502492208475e-06, + "loss": 1.4515, + "step": 9201 + }, + { + "epoch": 0.9706751054852321, + "grad_norm": 0.5666601657867432, + "learning_rate": 3.245178652856534e-06, + "loss": 1.4533, + "step": 9202 + }, + { + "epoch": 0.9707805907172996, + "grad_norm": 0.5460169911384583, + "learning_rate": 3.221890734093569e-06, + "loss": 1.4121, + "step": 9203 + }, + { + "epoch": 0.970886075949367, + "grad_norm": 0.5576410889625549, + "learning_rate": 3.198686495541531e-06, + "loss": 1.4511, + "step": 9204 + }, + { + "epoch": 0.9709915611814346, + "grad_norm": 0.5517772436141968, + "learning_rate": 3.1755659398005066e-06, + "loss": 1.458, + "step": 9205 + }, + { + "epoch": 0.9710970464135021, + "grad_norm": 0.5704918503761292, + "learning_rate": 3.152529069461424e-06, + "loss": 1.4091, + "step": 9206 + }, + { + "epoch": 0.9712025316455696, + "grad_norm": 0.6014400124549866, + "learning_rate": 3.129575887105468e-06, + "loss": 1.4537, + "step": 9207 + }, + { + "epoch": 0.9713080168776371, + "grad_norm": 0.5836089849472046, + "learning_rate": 3.1067063953048313e-06, + "loss": 1.4351, + "step": 9208 + }, + { + "epoch": 0.9714135021097047, + "grad_norm": 0.5491876006126404, + "learning_rate": 3.0839205966220474e-06, + "loss": 1.4213, + "step": 9209 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.5644343495368958, + "learning_rate": 3.06121849361049e-06, + "loss": 1.4311, + "step": 9210 + }, + { + "epoch": 0.9716244725738397, + "grad_norm": 0.5691460371017456, + "learning_rate": 3.0386000888139588e-06, + "loss": 1.4483, + "step": 9211 + }, + { + "epoch": 0.9717299578059072, + "grad_norm": 0.5528712272644043, + "learning_rate": 3.0160653847669252e-06, + "loss": 1.4618, + "step": 9212 + }, + { + "epoch": 0.9718354430379746, + "grad_norm": 0.5660435557365417, + "learning_rate": 2.9936143839946193e-06, + "loss": 1.4758, + "step": 9213 + }, + { + "epoch": 0.9719409282700422, + "grad_norm": 0.5552158951759338, + "learning_rate": 2.9712470890126962e-06, + "loss": 1.4257, + "step": 9214 + }, + { + "epoch": 0.9720464135021097, + "grad_norm": 0.5470249056816101, + "learning_rate": 2.9489635023275676e-06, + "loss": 1.4305, + "step": 9215 + }, + { + "epoch": 0.9721518987341772, + "grad_norm": 0.5475825667381287, + "learning_rate": 2.9267636264361517e-06, + "loss": 1.4104, + "step": 9216 + }, + { + "epoch": 0.9722573839662447, + "grad_norm": 0.5626589059829712, + "learning_rate": 2.90464746382621e-06, + "loss": 1.438, + "step": 9217 + }, + { + "epoch": 0.9723628691983123, + "grad_norm": 0.5685763955116272, + "learning_rate": 2.8826150169758425e-06, + "loss": 1.3988, + "step": 9218 + }, + { + "epoch": 0.9724683544303797, + "grad_norm": 0.5916923880577087, + "learning_rate": 2.8606662883539082e-06, + "loss": 1.4414, + "step": 9219 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.5608121156692505, + "learning_rate": 2.838801280419856e-06, + "loss": 1.436, + "step": 9220 + }, + { + "epoch": 0.9726793248945148, + "grad_norm": 0.5562083125114441, + "learning_rate": 2.817019995623893e-06, + "loss": 1.4531, + "step": 9221 + }, + { + "epoch": 0.9727848101265822, + "grad_norm": 0.5562190413475037, + "learning_rate": 2.7953224364065667e-06, + "loss": 1.4078, + "step": 9222 + }, + { + "epoch": 0.9728902953586498, + "grad_norm": 0.5356373190879822, + "learning_rate": 2.7737086051992653e-06, + "loss": 1.4382, + "step": 9223 + }, + { + "epoch": 0.9729957805907173, + "grad_norm": 0.6036079525947571, + "learning_rate": 2.752178504423969e-06, + "loss": 1.4187, + "step": 9224 + }, + { + "epoch": 0.9731012658227848, + "grad_norm": 0.5715419054031372, + "learning_rate": 2.7307321364930804e-06, + "loss": 1.4229, + "step": 9225 + }, + { + "epoch": 0.9732067510548523, + "grad_norm": 0.5575011372566223, + "learning_rate": 2.7093695038099277e-06, + "loss": 1.4484, + "step": 9226 + }, + { + "epoch": 0.9733122362869199, + "grad_norm": 0.5457329750061035, + "learning_rate": 2.6880906087682622e-06, + "loss": 1.3972, + "step": 9227 + }, + { + "epoch": 0.9734177215189873, + "grad_norm": 0.5501576662063599, + "learning_rate": 2.66689545375251e-06, + "loss": 1.4428, + "step": 9228 + }, + { + "epoch": 0.9735232067510549, + "grad_norm": 0.5709729790687561, + "learning_rate": 2.6457840411376888e-06, + "loss": 1.4041, + "step": 9229 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.5390815734863281, + "learning_rate": 2.624756373289322e-06, + "loss": 1.4503, + "step": 9230 + }, + { + "epoch": 0.9737341772151898, + "grad_norm": 0.5564408302307129, + "learning_rate": 2.603812452563775e-06, + "loss": 1.4498, + "step": 9231 + }, + { + "epoch": 0.9738396624472574, + "grad_norm": 0.5449379086494446, + "learning_rate": 2.5829522813079207e-06, + "loss": 1.3882, + "step": 9232 + }, + { + "epoch": 0.9739451476793249, + "grad_norm": 0.5419228672981262, + "learning_rate": 2.5621758618591394e-06, + "loss": 1.4164, + "step": 9233 + }, + { + "epoch": 0.9740506329113924, + "grad_norm": 0.5501412749290466, + "learning_rate": 2.541483196545735e-06, + "loss": 1.439, + "step": 9234 + }, + { + "epoch": 0.9741561181434599, + "grad_norm": 0.5825851559638977, + "learning_rate": 2.52087428768627e-06, + "loss": 1.4276, + "step": 9235 + }, + { + "epoch": 0.9742616033755275, + "grad_norm": 0.5753985643386841, + "learning_rate": 2.5003491375900633e-06, + "loss": 1.4237, + "step": 9236 + }, + { + "epoch": 0.9743670886075949, + "grad_norm": 0.5521145462989807, + "learning_rate": 2.4799077485571087e-06, + "loss": 1.4248, + "step": 9237 + }, + { + "epoch": 0.9744725738396625, + "grad_norm": 0.5614364147186279, + "learning_rate": 2.4595501228779906e-06, + "loss": 1.4315, + "step": 9238 + }, + { + "epoch": 0.97457805907173, + "grad_norm": 0.5513648390769958, + "learning_rate": 2.4392762628338838e-06, + "loss": 1.3917, + "step": 9239 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.5621462464332581, + "learning_rate": 2.419086170696472e-06, + "loss": 1.4287, + "step": 9240 + }, + { + "epoch": 0.974789029535865, + "grad_norm": 0.5427923202514648, + "learning_rate": 2.3989798487282776e-06, + "loss": 1.4096, + "step": 9241 + }, + { + "epoch": 0.9748945147679325, + "grad_norm": 0.5525116324424744, + "learning_rate": 2.3789572991822495e-06, + "loss": 1.4649, + "step": 9242 + }, + { + "epoch": 0.975, + "grad_norm": 0.5631653666496277, + "learning_rate": 2.3590185243020092e-06, + "loss": 1.4101, + "step": 9243 + }, + { + "epoch": 0.9751054852320675, + "grad_norm": 0.5742990374565125, + "learning_rate": 2.3391635263218526e-06, + "loss": 1.4592, + "step": 9244 + }, + { + "epoch": 0.9752109704641351, + "grad_norm": 0.5575211644172668, + "learning_rate": 2.3193923074665834e-06, + "loss": 1.3955, + "step": 9245 + }, + { + "epoch": 0.9753164556962025, + "grad_norm": 0.5517566204071045, + "learning_rate": 2.299704869951763e-06, + "loss": 1.44, + "step": 9246 + }, + { + "epoch": 0.9754219409282701, + "grad_norm": 0.5542289614677429, + "learning_rate": 2.2801012159832933e-06, + "loss": 1.4583, + "step": 9247 + }, + { + "epoch": 0.9755274261603376, + "grad_norm": 0.5476785898208618, + "learning_rate": 2.2605813477579172e-06, + "loss": 1.4299, + "step": 9248 + }, + { + "epoch": 0.975632911392405, + "grad_norm": 0.5409460067749023, + "learning_rate": 2.2411452674630517e-06, + "loss": 1.4539, + "step": 9249 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.5326105356216431, + "learning_rate": 2.2217929772764545e-06, + "loss": 1.4302, + "step": 9250 + }, + { + "epoch": 0.97584388185654, + "grad_norm": 0.5568149089813232, + "learning_rate": 2.2025244793667242e-06, + "loss": 1.4096, + "step": 9251 + }, + { + "epoch": 0.9759493670886076, + "grad_norm": 0.560136079788208, + "learning_rate": 2.1833397758929674e-06, + "loss": 1.437, + "step": 9252 + }, + { + "epoch": 0.9760548523206751, + "grad_norm": 0.5950407385826111, + "learning_rate": 2.1642388690049643e-06, + "loss": 1.4435, + "step": 9253 + }, + { + "epoch": 0.9761603375527426, + "grad_norm": 0.5410525798797607, + "learning_rate": 2.1452217608430857e-06, + "loss": 1.4319, + "step": 9254 + }, + { + "epoch": 0.9762658227848101, + "grad_norm": 0.5483227372169495, + "learning_rate": 2.126288453538211e-06, + "loss": 1.4189, + "step": 9255 + }, + { + "epoch": 0.9763713080168777, + "grad_norm": 0.5454413294792175, + "learning_rate": 2.107438949211976e-06, + "loss": 1.4192, + "step": 9256 + }, + { + "epoch": 0.9764767932489451, + "grad_norm": 0.5685233473777771, + "learning_rate": 2.0886732499764416e-06, + "loss": 1.4635, + "step": 9257 + }, + { + "epoch": 0.9765822784810126, + "grad_norm": 0.5894468426704407, + "learning_rate": 2.069991357934592e-06, + "loss": 1.4226, + "step": 9258 + }, + { + "epoch": 0.9766877637130802, + "grad_norm": 0.5661258697509766, + "learning_rate": 2.0513932751796695e-06, + "loss": 1.4208, + "step": 9259 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.5686420202255249, + "learning_rate": 2.0328790037957568e-06, + "loss": 1.4199, + "step": 9260 + }, + { + "epoch": 0.9768987341772152, + "grad_norm": 0.5612198114395142, + "learning_rate": 2.0144485458574446e-06, + "loss": 1.4389, + "step": 9261 + }, + { + "epoch": 0.9770042194092827, + "grad_norm": 0.5782343149185181, + "learning_rate": 1.9961019034299976e-06, + "loss": 1.4403, + "step": 9262 + }, + { + "epoch": 0.9771097046413502, + "grad_norm": 0.551181435585022, + "learning_rate": 1.977839078569188e-06, + "loss": 1.4517, + "step": 9263 + }, + { + "epoch": 0.9772151898734177, + "grad_norm": 0.5989514589309692, + "learning_rate": 1.959660073321545e-06, + "loss": 1.4672, + "step": 9264 + }, + { + "epoch": 0.9773206751054853, + "grad_norm": 0.5939518213272095, + "learning_rate": 1.94156488972394e-06, + "loss": 1.4311, + "step": 9265 + }, + { + "epoch": 0.9774261603375527, + "grad_norm": 0.579993724822998, + "learning_rate": 1.9235535298042506e-06, + "loss": 1.4526, + "step": 9266 + }, + { + "epoch": 0.9775316455696202, + "grad_norm": 0.553063154220581, + "learning_rate": 1.905625995580612e-06, + "loss": 1.4291, + "step": 9267 + }, + { + "epoch": 0.9776371308016878, + "grad_norm": 0.5575437545776367, + "learning_rate": 1.8877822890618346e-06, + "loss": 1.4256, + "step": 9268 + }, + { + "epoch": 0.9777426160337552, + "grad_norm": 0.5759626626968384, + "learning_rate": 1.8700224122475683e-06, + "loss": 1.4402, + "step": 9269 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.5411273837089539, + "learning_rate": 1.8523463671278052e-06, + "loss": 1.4397, + "step": 9270 + }, + { + "epoch": 0.9779535864978903, + "grad_norm": 0.5439803004264832, + "learning_rate": 1.8347541556832104e-06, + "loss": 1.3979, + "step": 9271 + }, + { + "epoch": 0.9780590717299578, + "grad_norm": 0.5580723881721497, + "learning_rate": 1.8172457798850407e-06, + "loss": 1.4345, + "step": 9272 + }, + { + "epoch": 0.9781645569620253, + "grad_norm": 0.5463303923606873, + "learning_rate": 1.7998212416953096e-06, + "loss": 1.4402, + "step": 9273 + }, + { + "epoch": 0.9782700421940929, + "grad_norm": 0.5939915776252747, + "learning_rate": 1.782480543066456e-06, + "loss": 1.4571, + "step": 9274 + }, + { + "epoch": 0.9783755274261603, + "grad_norm": 0.5259702801704407, + "learning_rate": 1.7652236859416748e-06, + "loss": 1.4537, + "step": 9275 + }, + { + "epoch": 0.9784810126582278, + "grad_norm": 0.5914366841316223, + "learning_rate": 1.7480506722545864e-06, + "loss": 1.4053, + "step": 9276 + }, + { + "epoch": 0.9785864978902954, + "grad_norm": 0.5458679795265198, + "learning_rate": 1.7309615039294847e-06, + "loss": 1.4171, + "step": 9277 + }, + { + "epoch": 0.9786919831223628, + "grad_norm": 0.5533155202865601, + "learning_rate": 1.7139561828813377e-06, + "loss": 1.4592, + "step": 9278 + }, + { + "epoch": 0.9787974683544304, + "grad_norm": 0.5774642825126648, + "learning_rate": 1.6970347110157879e-06, + "loss": 1.4355, + "step": 9279 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.5630315542221069, + "learning_rate": 1.6801970902288188e-06, + "loss": 1.3889, + "step": 9280 + }, + { + "epoch": 0.9790084388185654, + "grad_norm": 0.5652036070823669, + "learning_rate": 1.6634433224072543e-06, + "loss": 1.425, + "step": 9281 + }, + { + "epoch": 0.9791139240506329, + "grad_norm": 0.5560063123703003, + "learning_rate": 1.6467734094283427e-06, + "loss": 1.4266, + "step": 9282 + }, + { + "epoch": 0.9792194092827005, + "grad_norm": 0.5387572646141052, + "learning_rate": 1.630187353160173e-06, + "loss": 1.4265, + "step": 9283 + }, + { + "epoch": 0.9793248945147679, + "grad_norm": 0.5377207398414612, + "learning_rate": 1.6136851554611753e-06, + "loss": 1.4448, + "step": 9284 + }, + { + "epoch": 0.9794303797468354, + "grad_norm": 0.5697512626647949, + "learning_rate": 1.5972668181805373e-06, + "loss": 1.4626, + "step": 9285 + }, + { + "epoch": 0.979535864978903, + "grad_norm": 0.5539559125900269, + "learning_rate": 1.580932343158037e-06, + "loss": 1.4237, + "step": 9286 + }, + { + "epoch": 0.9796413502109704, + "grad_norm": 0.5521272420883179, + "learning_rate": 1.5646817322240436e-06, + "loss": 1.4391, + "step": 9287 + }, + { + "epoch": 0.979746835443038, + "grad_norm": 0.534197986125946, + "learning_rate": 1.5485149871995175e-06, + "loss": 1.4381, + "step": 9288 + }, + { + "epoch": 0.9798523206751055, + "grad_norm": 0.5980522632598877, + "learning_rate": 1.532432109895926e-06, + "loss": 1.4243, + "step": 9289 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.5463370084762573, + "learning_rate": 1.5164331021155774e-06, + "loss": 1.4498, + "step": 9290 + }, + { + "epoch": 0.9800632911392405, + "grad_norm": 0.5614591836929321, + "learning_rate": 1.5005179656511213e-06, + "loss": 1.4079, + "step": 9291 + }, + { + "epoch": 0.9801687763713081, + "grad_norm": 0.5314682722091675, + "learning_rate": 1.4846867022860477e-06, + "loss": 1.4096, + "step": 9292 + }, + { + "epoch": 0.9802742616033755, + "grad_norm": 0.5658854842185974, + "learning_rate": 1.4689393137941876e-06, + "loss": 1.4473, + "step": 9293 + }, + { + "epoch": 0.980379746835443, + "grad_norm": 0.5978246927261353, + "learning_rate": 1.4532758019402958e-06, + "loss": 1.4566, + "step": 9294 + }, + { + "epoch": 0.9804852320675106, + "grad_norm": 0.5417529940605164, + "learning_rate": 1.4376961684793854e-06, + "loss": 1.4479, + "step": 9295 + }, + { + "epoch": 0.980590717299578, + "grad_norm": 0.5715115666389465, + "learning_rate": 1.4222004151572265e-06, + "loss": 1.4152, + "step": 9296 + }, + { + "epoch": 0.9806962025316456, + "grad_norm": 0.5334660410881042, + "learning_rate": 1.4067885437103467e-06, + "loss": 1.4254, + "step": 9297 + }, + { + "epoch": 0.9808016877637131, + "grad_norm": 0.5384794473648071, + "learning_rate": 1.3914605558656146e-06, + "loss": 1.4584, + "step": 9298 + }, + { + "epoch": 0.9809071729957806, + "grad_norm": 0.5619049072265625, + "learning_rate": 1.376216453340573e-06, + "loss": 1.432, + "step": 9299 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.559927761554718, + "learning_rate": 1.3610562378435221e-06, + "loss": 1.4494, + "step": 9300 + }, + { + "epoch": 0.9811181434599157, + "grad_norm": 0.5629281997680664, + "learning_rate": 1.345979911073103e-06, + "loss": 1.4131, + "step": 9301 + }, + { + "epoch": 0.9812236286919831, + "grad_norm": 0.5559492707252502, + "learning_rate": 1.3309874747187978e-06, + "loss": 1.4344, + "step": 9302 + }, + { + "epoch": 0.9813291139240506, + "grad_norm": 0.5982182025909424, + "learning_rate": 1.3160789304605958e-06, + "loss": 1.4533, + "step": 9303 + }, + { + "epoch": 0.9814345991561182, + "grad_norm": 0.567229688167572, + "learning_rate": 1.3012542799689108e-06, + "loss": 1.4147, + "step": 9304 + }, + { + "epoch": 0.9815400843881856, + "grad_norm": 0.5585860013961792, + "learning_rate": 1.286513524905164e-06, + "loss": 1.4462, + "step": 9305 + }, + { + "epoch": 0.9816455696202532, + "grad_norm": 0.5537999272346497, + "learning_rate": 1.2718566669208675e-06, + "loss": 1.4529, + "step": 9306 + }, + { + "epoch": 0.9817510548523207, + "grad_norm": 0.5748533010482788, + "learning_rate": 1.2572837076586241e-06, + "loss": 1.4731, + "step": 9307 + }, + { + "epoch": 0.9818565400843882, + "grad_norm": 0.5583285689353943, + "learning_rate": 1.2427946487512941e-06, + "loss": 1.4477, + "step": 9308 + }, + { + "epoch": 0.9819620253164557, + "grad_norm": 0.5480782389640808, + "learning_rate": 1.2283894918224125e-06, + "loss": 1.3921, + "step": 9309 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.567840039730072, + "learning_rate": 1.2140682384862712e-06, + "loss": 1.4399, + "step": 9310 + }, + { + "epoch": 0.9821729957805907, + "grad_norm": 0.55620938539505, + "learning_rate": 1.199830890347503e-06, + "loss": 1.4252, + "step": 9311 + }, + { + "epoch": 0.9822784810126582, + "grad_norm": 0.5412833094596863, + "learning_rate": 1.185677449001582e-06, + "loss": 1.4313, + "step": 9312 + }, + { + "epoch": 0.9823839662447258, + "grad_norm": 0.5556815266609192, + "learning_rate": 1.1716079160344061e-06, + "loss": 1.4242, + "step": 9313 + }, + { + "epoch": 0.9824894514767932, + "grad_norm": 0.5687146782875061, + "learning_rate": 1.1576222930225478e-06, + "loss": 1.4777, + "step": 9314 + }, + { + "epoch": 0.9825949367088608, + "grad_norm": 0.5476521253585815, + "learning_rate": 1.143720581533253e-06, + "loss": 1.4083, + "step": 9315 + }, + { + "epoch": 0.9827004219409282, + "grad_norm": 0.557808518409729, + "learning_rate": 1.1299027831241094e-06, + "loss": 1.4633, + "step": 9316 + }, + { + "epoch": 0.9828059071729958, + "grad_norm": 0.5582237243652344, + "learning_rate": 1.1161688993435449e-06, + "loss": 1.4169, + "step": 9317 + }, + { + "epoch": 0.9829113924050633, + "grad_norm": 0.5307769179344177, + "learning_rate": 1.1025189317305784e-06, + "loss": 1.4136, + "step": 9318 + }, + { + "epoch": 0.9830168776371307, + "grad_norm": 0.5918707251548767, + "learning_rate": 1.0889528818147366e-06, + "loss": 1.4335, + "step": 9319 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.554032027721405, + "learning_rate": 1.0754707511161365e-06, + "loss": 1.42, + "step": 9320 + }, + { + "epoch": 0.9832278481012658, + "grad_norm": 0.5415098667144775, + "learning_rate": 1.0620725411454868e-06, + "loss": 1.4511, + "step": 9321 + }, + { + "epoch": 0.9833333333333333, + "grad_norm": 0.5734626054763794, + "learning_rate": 1.0487582534040863e-06, + "loss": 1.4569, + "step": 9322 + }, + { + "epoch": 0.9834388185654008, + "grad_norm": 0.5896512866020203, + "learning_rate": 1.0355278893839915e-06, + "loss": 1.5053, + "step": 9323 + }, + { + "epoch": 0.9835443037974684, + "grad_norm": 0.5568007826805115, + "learning_rate": 1.0223814505676832e-06, + "loss": 1.4474, + "step": 9324 + }, + { + "epoch": 0.9836497890295358, + "grad_norm": 0.545231819152832, + "learning_rate": 1.009318938428233e-06, + "loss": 1.4352, + "step": 9325 + }, + { + "epoch": 0.9837552742616034, + "grad_norm": 0.5537331700325012, + "learning_rate": 9.963403544294702e-07, + "loss": 1.4562, + "step": 9326 + }, + { + "epoch": 0.9838607594936709, + "grad_norm": 0.5742846131324768, + "learning_rate": 9.834457000255647e-07, + "loss": 1.4488, + "step": 9327 + }, + { + "epoch": 0.9839662447257383, + "grad_norm": 0.5548868775367737, + "learning_rate": 9.706349766615275e-07, + "loss": 1.4014, + "step": 9328 + }, + { + "epoch": 0.9840717299578059, + "grad_norm": 0.6150918006896973, + "learning_rate": 9.579081857728766e-07, + "loss": 1.4235, + "step": 9329 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.5834855437278748, + "learning_rate": 9.452653287856383e-07, + "loss": 1.4109, + "step": 9330 + }, + { + "epoch": 0.9842827004219409, + "grad_norm": 0.549523651599884, + "learning_rate": 9.327064071165126e-07, + "loss": 1.4522, + "step": 9331 + }, + { + "epoch": 0.9843881856540084, + "grad_norm": 0.5379555225372314, + "learning_rate": 9.202314221728735e-07, + "loss": 1.4272, + "step": 9332 + }, + { + "epoch": 0.984493670886076, + "grad_norm": 0.5468870997428894, + "learning_rate": 9.078403753525199e-07, + "loss": 1.4619, + "step": 9333 + }, + { + "epoch": 0.9845991561181434, + "grad_norm": 0.5762922167778015, + "learning_rate": 8.955332680440076e-07, + "loss": 1.4582, + "step": 9334 + }, + { + "epoch": 0.984704641350211, + "grad_norm": 0.553932785987854, + "learning_rate": 8.833101016263168e-07, + "loss": 1.404, + "step": 9335 + }, + { + "epoch": 0.9848101265822785, + "grad_norm": 0.5558900237083435, + "learning_rate": 8.711708774691851e-07, + "loss": 1.4344, + "step": 9336 + }, + { + "epoch": 0.984915611814346, + "grad_norm": 0.5474409461021423, + "learning_rate": 8.591155969327746e-07, + "loss": 1.4376, + "step": 9337 + }, + { + "epoch": 0.9850210970464135, + "grad_norm": 0.5290424227714539, + "learning_rate": 8.47144261368088e-07, + "loss": 1.4271, + "step": 9338 + }, + { + "epoch": 0.985126582278481, + "grad_norm": 0.5453745722770691, + "learning_rate": 8.352568721165521e-07, + "loss": 1.4338, + "step": 9339 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.5514295101165771, + "learning_rate": 8.234534305101015e-07, + "loss": 1.4615, + "step": 9340 + }, + { + "epoch": 0.985337552742616, + "grad_norm": 0.5628117322921753, + "learning_rate": 8.117339378714283e-07, + "loss": 1.4563, + "step": 9341 + }, + { + "epoch": 0.9854430379746836, + "grad_norm": 0.5622082948684692, + "learning_rate": 8.00098395513732e-07, + "loss": 1.4551, + "step": 9342 + }, + { + "epoch": 0.985548523206751, + "grad_norm": 0.5473676919937134, + "learning_rate": 7.885468047408862e-07, + "loss": 1.4802, + "step": 9343 + }, + { + "epoch": 0.9856540084388186, + "grad_norm": 0.5688801407814026, + "learning_rate": 7.770791668472721e-07, + "loss": 1.429, + "step": 9344 + }, + { + "epoch": 0.9857594936708861, + "grad_norm": 0.5729360580444336, + "learning_rate": 7.656954831178619e-07, + "loss": 1.4438, + "step": 9345 + }, + { + "epoch": 0.9858649789029535, + "grad_norm": 0.5547834038734436, + "learning_rate": 7.543957548283021e-07, + "loss": 1.4515, + "step": 9346 + }, + { + "epoch": 0.9859704641350211, + "grad_norm": 0.5618652701377869, + "learning_rate": 7.431799832448294e-07, + "loss": 1.449, + "step": 9347 + }, + { + "epoch": 0.9860759493670886, + "grad_norm": 0.5526062250137329, + "learning_rate": 7.320481696241887e-07, + "loss": 1.4764, + "step": 9348 + }, + { + "epoch": 0.9861814345991561, + "grad_norm": 0.5876815319061279, + "learning_rate": 7.210003152136324e-07, + "loss": 1.4428, + "step": 9349 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.5414301156997681, + "learning_rate": 7.100364212513367e-07, + "loss": 1.4258, + "step": 9350 + }, + { + "epoch": 0.9863924050632912, + "grad_norm": 0.5498256087303162, + "learning_rate": 6.991564889656521e-07, + "loss": 1.4793, + "step": 9351 + }, + { + "epoch": 0.9864978902953586, + "grad_norm": 0.5858155488967896, + "learning_rate": 6.883605195759369e-07, + "loss": 1.4395, + "step": 9352 + }, + { + "epoch": 0.9866033755274262, + "grad_norm": 0.576528787612915, + "learning_rate": 6.776485142918065e-07, + "loss": 1.4614, + "step": 9353 + }, + { + "epoch": 0.9867088607594937, + "grad_norm": 0.5532556772232056, + "learning_rate": 6.67020474313551e-07, + "loss": 1.4253, + "step": 9354 + }, + { + "epoch": 0.9868143459915611, + "grad_norm": 0.5601005554199219, + "learning_rate": 6.564764008322177e-07, + "loss": 1.4498, + "step": 9355 + }, + { + "epoch": 0.9869198312236287, + "grad_norm": 0.5519230365753174, + "learning_rate": 6.460162950292781e-07, + "loss": 1.4347, + "step": 9356 + }, + { + "epoch": 0.9870253164556962, + "grad_norm": 0.5669980049133301, + "learning_rate": 6.356401580767945e-07, + "loss": 1.4246, + "step": 9357 + }, + { + "epoch": 0.9871308016877637, + "grad_norm": 0.5446463227272034, + "learning_rate": 6.253479911375037e-07, + "loss": 1.4206, + "step": 9358 + }, + { + "epoch": 0.9872362869198312, + "grad_norm": 0.5647793412208557, + "learning_rate": 6.151397953647331e-07, + "loss": 1.4663, + "step": 9359 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.5617344975471497, + "learning_rate": 6.050155719023176e-07, + "loss": 1.4415, + "step": 9360 + }, + { + "epoch": 0.9874472573839662, + "grad_norm": 0.5626500844955444, + "learning_rate": 5.949753218846832e-07, + "loss": 1.4388, + "step": 9361 + }, + { + "epoch": 0.9875527426160338, + "grad_norm": 0.5644152760505676, + "learning_rate": 5.850190464369298e-07, + "loss": 1.4491, + "step": 9362 + }, + { + "epoch": 0.9876582278481013, + "grad_norm": 0.5843236446380615, + "learning_rate": 5.751467466747484e-07, + "loss": 1.4336, + "step": 9363 + }, + { + "epoch": 0.9877637130801687, + "grad_norm": 0.5449989438056946, + "learning_rate": 5.653584237043374e-07, + "loss": 1.4407, + "step": 9364 + }, + { + "epoch": 0.9878691983122363, + "grad_norm": 0.5701293349266052, + "learning_rate": 5.556540786224862e-07, + "loss": 1.4423, + "step": 9365 + }, + { + "epoch": 0.9879746835443038, + "grad_norm": 0.5863252282142639, + "learning_rate": 5.460337125167414e-07, + "loss": 1.4563, + "step": 9366 + }, + { + "epoch": 0.9880801687763713, + "grad_norm": 0.5371816754341125, + "learning_rate": 5.364973264649908e-07, + "loss": 1.4041, + "step": 9367 + }, + { + "epoch": 0.9881856540084388, + "grad_norm": 0.5744244456291199, + "learning_rate": 5.270449215358797e-07, + "loss": 1.4518, + "step": 9368 + }, + { + "epoch": 0.9882911392405064, + "grad_norm": 0.5548229217529297, + "learning_rate": 5.176764987885607e-07, + "loss": 1.4213, + "step": 9369 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.5630658268928528, + "learning_rate": 5.08392059272944e-07, + "loss": 1.4808, + "step": 9370 + }, + { + "epoch": 0.9885021097046414, + "grad_norm": 0.547455906867981, + "learning_rate": 4.991916040291977e-07, + "loss": 1.4202, + "step": 9371 + }, + { + "epoch": 0.9886075949367089, + "grad_norm": 0.5385945439338684, + "learning_rate": 4.900751340884135e-07, + "loss": 1.4293, + "step": 9372 + }, + { + "epoch": 0.9887130801687763, + "grad_norm": 0.5322810411453247, + "learning_rate": 4.810426504721077e-07, + "loss": 1.4439, + "step": 9373 + }, + { + "epoch": 0.9888185654008439, + "grad_norm": 0.5714303851127625, + "learning_rate": 4.720941541923873e-07, + "loss": 1.3844, + "step": 9374 + }, + { + "epoch": 0.9889240506329114, + "grad_norm": 0.5458230972290039, + "learning_rate": 4.632296462520336e-07, + "loss": 1.4583, + "step": 9375 + }, + { + "epoch": 0.9890295358649789, + "grad_norm": 0.5338820815086365, + "learning_rate": 4.544491276443352e-07, + "loss": 1.3933, + "step": 9376 + }, + { + "epoch": 0.9891350210970464, + "grad_norm": 0.5699179172515869, + "learning_rate": 4.457525993531719e-07, + "loss": 1.4034, + "step": 9377 + }, + { + "epoch": 0.989240506329114, + "grad_norm": 0.5579656362533569, + "learning_rate": 4.371400623530142e-07, + "loss": 1.4719, + "step": 9378 + }, + { + "epoch": 0.9893459915611814, + "grad_norm": 0.5444795489311218, + "learning_rate": 4.2861151760900665e-07, + "loss": 1.4451, + "step": 9379 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.5390073657035828, + "learning_rate": 4.2016696607680147e-07, + "loss": 1.432, + "step": 9380 + }, + { + "epoch": 0.9895569620253165, + "grad_norm": 0.5387760400772095, + "learning_rate": 4.118064087025586e-07, + "loss": 1.393, + "step": 9381 + }, + { + "epoch": 0.989662447257384, + "grad_norm": 0.5648242831230164, + "learning_rate": 4.035298464232784e-07, + "loss": 1.4445, + "step": 9382 + }, + { + "epoch": 0.9897679324894515, + "grad_norm": 0.5384402871131897, + "learning_rate": 3.953372801662192e-07, + "loss": 1.4481, + "step": 9383 + }, + { + "epoch": 0.9898734177215189, + "grad_norm": 0.5555413961410522, + "learning_rate": 3.8722871084956313e-07, + "loss": 1.4239, + "step": 9384 + }, + { + "epoch": 0.9899789029535865, + "grad_norm": 0.5685953497886658, + "learning_rate": 3.7920413938175027e-07, + "loss": 1.4222, + "step": 9385 + }, + { + "epoch": 0.990084388185654, + "grad_norm": 0.5625655055046082, + "learning_rate": 3.7126356666214447e-07, + "loss": 1.4285, + "step": 9386 + }, + { + "epoch": 0.9901898734177215, + "grad_norm": 0.5275998115539551, + "learning_rate": 3.6340699358036743e-07, + "loss": 1.4541, + "step": 9387 + }, + { + "epoch": 0.990295358649789, + "grad_norm": 0.5684517621994019, + "learning_rate": 3.5563442101696486e-07, + "loss": 1.4125, + "step": 9388 + }, + { + "epoch": 0.9904008438818566, + "grad_norm": 0.6154665946960449, + "learning_rate": 3.479458498426569e-07, + "loss": 1.4458, + "step": 9389 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.5787166953086853, + "learning_rate": 3.4034128091917085e-07, + "loss": 1.4429, + "step": 9390 + }, + { + "epoch": 0.9906118143459915, + "grad_norm": 0.5460839867591858, + "learning_rate": 3.328207150986584e-07, + "loss": 1.4373, + "step": 9391 + }, + { + "epoch": 0.9907172995780591, + "grad_norm": 0.5765326619148254, + "learning_rate": 3.2538415322369563e-07, + "loss": 1.4434, + "step": 9392 + }, + { + "epoch": 0.9908227848101265, + "grad_norm": 0.5507823824882507, + "learning_rate": 3.180315961276159e-07, + "loss": 1.4385, + "step": 9393 + }, + { + "epoch": 0.9909282700421941, + "grad_norm": 0.562372088432312, + "learning_rate": 3.107630446344267e-07, + "loss": 1.4217, + "step": 9394 + }, + { + "epoch": 0.9910337552742616, + "grad_norm": 0.5684598088264465, + "learning_rate": 3.035784995584767e-07, + "loss": 1.4895, + "step": 9395 + }, + { + "epoch": 0.9911392405063291, + "grad_norm": 0.5465571880340576, + "learning_rate": 2.964779617049551e-07, + "loss": 1.4242, + "step": 9396 + }, + { + "epoch": 0.9912447257383966, + "grad_norm": 0.5707938075065613, + "learning_rate": 2.8946143186930896e-07, + "loss": 1.4435, + "step": 9397 + }, + { + "epoch": 0.9913502109704642, + "grad_norm": 0.5751621127128601, + "learning_rate": 2.825289108379925e-07, + "loss": 1.4439, + "step": 9398 + }, + { + "epoch": 0.9914556962025316, + "grad_norm": 0.5446529388427734, + "learning_rate": 2.756803993877177e-07, + "loss": 1.4648, + "step": 9399 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.5442861318588257, + "learning_rate": 2.689158982859541e-07, + "loss": 1.4491, + "step": 9400 + }, + { + "epoch": 0.9916666666666667, + "grad_norm": 0.5599488615989685, + "learning_rate": 2.622354082905953e-07, + "loss": 1.3956, + "step": 9401 + }, + { + "epoch": 0.9917721518987341, + "grad_norm": 0.5454550981521606, + "learning_rate": 2.556389301502926e-07, + "loss": 1.4414, + "step": 9402 + }, + { + "epoch": 0.9918776371308017, + "grad_norm": 0.5856583714485168, + "learning_rate": 2.491264646042879e-07, + "loss": 1.3915, + "step": 9403 + }, + { + "epoch": 0.9919831223628692, + "grad_norm": 0.5762092471122742, + "learning_rate": 2.426980123821643e-07, + "loss": 1.4218, + "step": 9404 + }, + { + "epoch": 0.9920886075949367, + "grad_norm": 0.5551614761352539, + "learning_rate": 2.3635357420442872e-07, + "loss": 1.4154, + "step": 9405 + }, + { + "epoch": 0.9921940928270042, + "grad_norm": 0.5621278285980225, + "learning_rate": 2.3009315078192926e-07, + "loss": 1.4181, + "step": 9406 + }, + { + "epoch": 0.9922995780590718, + "grad_norm": 0.5573563575744629, + "learning_rate": 2.2391674281610486e-07, + "loss": 1.4175, + "step": 9407 + }, + { + "epoch": 0.9924050632911392, + "grad_norm": 0.5560628771781921, + "learning_rate": 2.1782435099923503e-07, + "loss": 1.4449, + "step": 9408 + }, + { + "epoch": 0.9925105485232067, + "grad_norm": 0.5838240385055542, + "learning_rate": 2.1181597601385716e-07, + "loss": 1.443, + "step": 9409 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.575861930847168, + "learning_rate": 2.05891618533266e-07, + "loss": 1.4238, + "step": 9410 + }, + { + "epoch": 0.9927215189873417, + "grad_norm": 0.5364864468574524, + "learning_rate": 2.0005127922134713e-07, + "loss": 1.4848, + "step": 9411 + }, + { + "epoch": 0.9928270042194093, + "grad_norm": 0.5687334537506104, + "learning_rate": 1.942949587324938e-07, + "loss": 1.3929, + "step": 9412 + }, + { + "epoch": 0.9929324894514768, + "grad_norm": 0.5633195638656616, + "learning_rate": 1.8862265771177333e-07, + "loss": 1.4063, + "step": 9413 + }, + { + "epoch": 0.9930379746835443, + "grad_norm": 0.5692738890647888, + "learning_rate": 1.8303437679476065e-07, + "loss": 1.4759, + "step": 9414 + }, + { + "epoch": 0.9931434599156118, + "grad_norm": 0.5911630392074585, + "learning_rate": 1.775301166077048e-07, + "loss": 1.4601, + "step": 9415 + }, + { + "epoch": 0.9932489451476794, + "grad_norm": 0.556846022605896, + "learning_rate": 1.7210987776736243e-07, + "loss": 1.4312, + "step": 9416 + }, + { + "epoch": 0.9933544303797468, + "grad_norm": 0.568057119846344, + "learning_rate": 1.6677366088099777e-07, + "loss": 1.4657, + "step": 9417 + }, + { + "epoch": 0.9934599156118143, + "grad_norm": 0.5469739437103271, + "learning_rate": 1.6152146654671573e-07, + "loss": 1.4245, + "step": 9418 + }, + { + "epoch": 0.9935654008438819, + "grad_norm": 0.5530745983123779, + "learning_rate": 1.5635329535304554e-07, + "loss": 1.4663, + "step": 9419 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.5583274364471436, + "learning_rate": 1.5126914787894074e-07, + "loss": 1.3979, + "step": 9420 + }, + { + "epoch": 0.9937763713080169, + "grad_norm": 0.5493873357772827, + "learning_rate": 1.4626902469427882e-07, + "loss": 1.4794, + "step": 9421 + }, + { + "epoch": 0.9938818565400844, + "grad_norm": 0.5587198138237, + "learning_rate": 1.4135292635927832e-07, + "loss": 1.4541, + "step": 9422 + }, + { + "epoch": 0.9939873417721519, + "grad_norm": 0.5461710095405579, + "learning_rate": 1.365208534248319e-07, + "loss": 1.4292, + "step": 9423 + }, + { + "epoch": 0.9940928270042194, + "grad_norm": 0.5567509531974792, + "learning_rate": 1.3177280643233979e-07, + "loss": 1.4135, + "step": 9424 + }, + { + "epoch": 0.994198312236287, + "grad_norm": 0.5776997804641724, + "learning_rate": 1.271087859138764e-07, + "loss": 1.4252, + "step": 9425 + }, + { + "epoch": 0.9943037974683544, + "grad_norm": 0.5580016374588013, + "learning_rate": 1.2252879239210702e-07, + "loss": 1.4694, + "step": 9426 + }, + { + "epoch": 0.994409282700422, + "grad_norm": 0.5647424459457397, + "learning_rate": 1.1803282638020441e-07, + "loss": 1.4545, + "step": 9427 + }, + { + "epoch": 0.9945147679324895, + "grad_norm": 0.5554576516151428, + "learning_rate": 1.1362088838193229e-07, + "loss": 1.4207, + "step": 9428 + }, + { + "epoch": 0.9946202531645569, + "grad_norm": 0.5404323935508728, + "learning_rate": 1.0929297889172852e-07, + "loss": 1.4588, + "step": 9429 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.5543349385261536, + "learning_rate": 1.0504909839462173e-07, + "loss": 1.3928, + "step": 9430 + }, + { + "epoch": 0.994831223628692, + "grad_norm": 0.554809033870697, + "learning_rate": 1.008892473659817e-07, + "loss": 1.4234, + "step": 9431 + }, + { + "epoch": 0.9949367088607595, + "grad_norm": 0.5717980265617371, + "learning_rate": 9.68134262721021e-08, + "loss": 1.4532, + "step": 9432 + }, + { + "epoch": 0.995042194092827, + "grad_norm": 0.5438666343688965, + "learning_rate": 9.282163556953437e-08, + "loss": 1.4356, + "step": 9433 + }, + { + "epoch": 0.9951476793248946, + "grad_norm": 0.5455113649368286, + "learning_rate": 8.891387570575393e-08, + "loss": 1.4284, + "step": 9434 + }, + { + "epoch": 0.995253164556962, + "grad_norm": 0.5619592070579529, + "learning_rate": 8.509014711857721e-08, + "loss": 1.4315, + "step": 9435 + }, + { + "epoch": 0.9953586497890295, + "grad_norm": 0.5784994959831238, + "learning_rate": 8.135045023641152e-08, + "loss": 1.4191, + "step": 9436 + }, + { + "epoch": 0.9954641350210971, + "grad_norm": 0.5609286427497864, + "learning_rate": 7.769478547842157e-08, + "loss": 1.4251, + "step": 9437 + }, + { + "epoch": 0.9955696202531645, + "grad_norm": 0.5652316808700562, + "learning_rate": 7.412315325411312e-08, + "loss": 1.4265, + "step": 9438 + }, + { + "epoch": 0.9956751054852321, + "grad_norm": 0.5878161787986755, + "learning_rate": 7.063555396383259e-08, + "loss": 1.4787, + "step": 9439 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.5499141216278076, + "learning_rate": 6.723198799826746e-08, + "loss": 1.4295, + "step": 9440 + }, + { + "epoch": 0.9958860759493671, + "grad_norm": 0.5410447716712952, + "learning_rate": 6.391245573894588e-08, + "loss": 1.4413, + "step": 9441 + }, + { + "epoch": 0.9959915611814346, + "grad_norm": 0.5473859906196594, + "learning_rate": 6.067695755765379e-08, + "loss": 1.4565, + "step": 9442 + }, + { + "epoch": 0.9960970464135022, + "grad_norm": 0.5541492700576782, + "learning_rate": 5.7525493817101035e-08, + "loss": 1.4151, + "step": 9443 + }, + { + "epoch": 0.9962025316455696, + "grad_norm": 0.6002219915390015, + "learning_rate": 5.4458064870338553e-08, + "loss": 1.4393, + "step": 9444 + }, + { + "epoch": 0.9963080168776371, + "grad_norm": 0.5485711693763733, + "learning_rate": 5.147467106117465e-08, + "loss": 1.4423, + "step": 9445 + }, + { + "epoch": 0.9964135021097047, + "grad_norm": 0.5535681843757629, + "learning_rate": 4.85753127237587e-08, + "loss": 1.4365, + "step": 9446 + }, + { + "epoch": 0.9965189873417721, + "grad_norm": 0.5326972007751465, + "learning_rate": 4.575999018316401e-08, + "loss": 1.4442, + "step": 9447 + }, + { + "epoch": 0.9966244725738397, + "grad_norm": 0.5665270686149597, + "learning_rate": 4.302870375472168e-08, + "loss": 1.4459, + "step": 9448 + }, + { + "epoch": 0.9967299578059071, + "grad_norm": 0.5877161622047424, + "learning_rate": 4.038145374460345e-08, + "loss": 1.466, + "step": 9449 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.567847728729248, + "learning_rate": 3.781824044932214e-08, + "loss": 1.4677, + "step": 9450 + }, + { + "epoch": 0.9969409282700422, + "grad_norm": 0.53989577293396, + "learning_rate": 3.533906415614796e-08, + "loss": 1.4404, + "step": 9451 + }, + { + "epoch": 0.9970464135021097, + "grad_norm": 0.5806751251220703, + "learning_rate": 3.294392514285871e-08, + "loss": 1.4789, + "step": 9452 + }, + { + "epoch": 0.9971518987341772, + "grad_norm": 0.5507540702819824, + "learning_rate": 3.0632823677906316e-08, + "loss": 1.4195, + "step": 9453 + }, + { + "epoch": 0.9972573839662447, + "grad_norm": 0.5769245624542236, + "learning_rate": 2.8405760020250304e-08, + "loss": 1.4233, + "step": 9454 + }, + { + "epoch": 0.9973628691983122, + "grad_norm": 0.5874559283256531, + "learning_rate": 2.6262734419441047e-08, + "loss": 1.4234, + "step": 9455 + }, + { + "epoch": 0.9974683544303797, + "grad_norm": 0.5317293405532837, + "learning_rate": 2.420374711561979e-08, + "loss": 1.4214, + "step": 9456 + }, + { + "epoch": 0.9975738396624473, + "grad_norm": 0.5725364685058594, + "learning_rate": 2.2228798339435363e-08, + "loss": 1.4135, + "step": 9457 + }, + { + "epoch": 0.9976793248945147, + "grad_norm": 0.5690646171569824, + "learning_rate": 2.0337888312210727e-08, + "loss": 1.4162, + "step": 9458 + }, + { + "epoch": 0.9977848101265823, + "grad_norm": 0.5534290671348572, + "learning_rate": 1.8531017245942972e-08, + "loss": 1.3977, + "step": 9459 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.5776321291923523, + "learning_rate": 1.6808185342970238e-08, + "loss": 1.4381, + "step": 9460 + }, + { + "epoch": 0.9979957805907173, + "grad_norm": 0.5505704283714294, + "learning_rate": 1.516939279638807e-08, + "loss": 1.4323, + "step": 9461 + }, + { + "epoch": 0.9981012658227848, + "grad_norm": 0.528099536895752, + "learning_rate": 1.3614639789882866e-08, + "loss": 1.4368, + "step": 9462 + }, + { + "epoch": 0.9982067510548523, + "grad_norm": 0.6084156632423401, + "learning_rate": 1.214392649756535e-08, + "loss": 1.4516, + "step": 9463 + }, + { + "epoch": 0.9983122362869198, + "grad_norm": 0.5841971635818481, + "learning_rate": 1.075725308438691e-08, + "loss": 1.4513, + "step": 9464 + }, + { + "epoch": 0.9984177215189873, + "grad_norm": 0.5667870044708252, + "learning_rate": 9.454619705556722e-09, + "loss": 1.4785, + "step": 9465 + }, + { + "epoch": 0.9985232067510549, + "grad_norm": 0.541836678981781, + "learning_rate": 8.236026507124628e-09, + "loss": 1.4238, + "step": 9466 + }, + { + "epoch": 0.9986286919831223, + "grad_norm": 0.5256163477897644, + "learning_rate": 7.101473625648058e-09, + "loss": 1.4277, + "step": 9467 + }, + { + "epoch": 0.9987341772151899, + "grad_norm": 0.5627545714378357, + "learning_rate": 6.050961188358573e-09, + "loss": 1.4361, + "step": 9468 + }, + { + "epoch": 0.9988396624472574, + "grad_norm": 0.5602642893791199, + "learning_rate": 5.084489312745521e-09, + "loss": 1.4303, + "step": 9469 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.5658698678016663, + "learning_rate": 4.202058107305451e-09, + "loss": 1.4532, + "step": 9470 + }, + { + "epoch": 0.9990506329113924, + "grad_norm": 0.5448355078697205, + "learning_rate": 3.403667670792698e-09, + "loss": 1.4451, + "step": 9471 + }, + { + "epoch": 0.99915611814346, + "grad_norm": 0.584951639175415, + "learning_rate": 2.689318092718995e-09, + "loss": 1.407, + "step": 9472 + }, + { + "epoch": 0.9992616033755274, + "grad_norm": 0.5305967926979065, + "learning_rate": 2.059009453103666e-09, + "loss": 1.4467, + "step": 9473 + }, + { + "epoch": 0.9993670886075949, + "grad_norm": 0.5872498750686646, + "learning_rate": 1.5127418226401623e-09, + "loss": 1.4492, + "step": 9474 + }, + { + "epoch": 0.9994725738396625, + "grad_norm": 0.5518171191215515, + "learning_rate": 1.0505152625295278e-09, + "loss": 1.4316, + "step": 9475 + }, + { + "epoch": 0.9995780590717299, + "grad_norm": 0.5727801322937012, + "learning_rate": 6.723298245636666e-10, + "loss": 1.4283, + "step": 9476 + }, + { + "epoch": 0.9996835443037975, + "grad_norm": 0.5918259024620056, + "learning_rate": 3.781855510420762e-10, + "loss": 1.4237, + "step": 9477 + }, + { + "epoch": 0.999789029535865, + "grad_norm": 0.5418986678123474, + "learning_rate": 1.6808247493838026e-10, + "loss": 1.455, + "step": 9478 + }, + { + "epoch": 0.9998945147679325, + "grad_norm": 0.5667402744293213, + "learning_rate": 4.202061990032924e-11, + "loss": 1.4341, + "step": 9479 + }, + { + "epoch": 1.0, + "grad_norm": 1.6665583848953247, + "learning_rate": 0.0, + "loss": 1.382, + "step": 9480 + } + ], + "logging_steps": 1, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.8391618477891584e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-gpt_neox-cosine/checkpoint-9480/training_args.bin b/saves-gpt_neox-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..21e0f5cde4d3c0b73431f1facf1cb95798b6332d --- /dev/null +++ b/saves-gpt_neox-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f930d4ad6de84e3f701ff5ec7a876e12608567696c6f286227b1b7e7e2ef805 +size 5176 diff --git a/saves-gpt_neox-cosine/config.json b/saves-gpt_neox-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c216fcd1855cc1122a6b9a742b143fdcac99903d --- /dev/null +++ b/saves-gpt_neox-cosine/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "GPTNeoXForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "bos_token_id": 0, + "classifier_dropout": 0.1, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 2048, + "model_type": "gpt_neox", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "rope_scaling": null, + "rotary_emb_base": 10000, + "rotary_pct": 0.25, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_parallel_residual": true, + "vocab_size": 2000 +} diff --git a/saves-gpt_neox-cosine/generation_config.json b/saves-gpt_neox-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3d6e313c9ea91dde2131852f3f2423673d6a38e --- /dev/null +++ b/saves-gpt_neox-cosine/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-gpt_neox-cosine/model.safetensors b/saves-gpt_neox-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d39cdd37c18d20ed772dd4e241c61eb243408e90 --- /dev/null +++ b/saves-gpt_neox-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:910ad63d2c858a23713e80e323383d386e45356521438c8ea9b7d75a28c5e2f7 +size 8371104 diff --git a/saves-gpt_neox-cosine/result.log b/saves-gpt_neox-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..78c6a9e70768e9c235fdf1baf936d7b5a7aab2d9 --- /dev/null +++ b/saves-gpt_neox-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 1881.5485, 'train_samples_per_second': 5158.838, 'train_steps_per_second': 5.038, 'train_loss': 1.698128197389313, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-gpt_neox-cosine/special_tokens_map.json b/saves-gpt_neox-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gpt_neox-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gpt_neox-cosine/tokenizer.json b/saves-gpt_neox-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gpt_neox-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gpt_neox-cosine/tokenizer_config.json b/saves-gpt_neox-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gpt_neox-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gpt_neox/checkpoint-9480/config.json b/saves-gpt_neox/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c216fcd1855cc1122a6b9a742b143fdcac99903d --- /dev/null +++ b/saves-gpt_neox/checkpoint-9480/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "GPTNeoXForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "bos_token_id": 0, + "classifier_dropout": 0.1, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 2048, + "model_type": "gpt_neox", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "rope_scaling": null, + "rotary_emb_base": 10000, + "rotary_pct": 0.25, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_parallel_residual": true, + "vocab_size": 2000 +} diff --git a/saves-gpt_neox/checkpoint-9480/generation_config.json b/saves-gpt_neox/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3d6e313c9ea91dde2131852f3f2423673d6a38e --- /dev/null +++ b/saves-gpt_neox/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-gpt_neox/checkpoint-9480/model.safetensors b/saves-gpt_neox/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2764f50b5fa4ba42d2ff86ab491d665540a43713 --- /dev/null +++ b/saves-gpt_neox/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385a3e62735fb0d90ffd20252916108bda92f6d5e0b237a4bde370a7bd362a35 +size 8371104 diff --git a/saves-gpt_neox/checkpoint-9480/optimizer.pt b/saves-gpt_neox/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ede0703cffe2d54a2bf2d117d4e0bfb172ff06e0 --- /dev/null +++ b/saves-gpt_neox/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e51a2e564f2c4d6855a5936bb215d42f94eb924a7488cde5d8954ae6df04045d +size 16759181 diff --git a/saves-gpt_neox/checkpoint-9480/rng_state.pth b/saves-gpt_neox/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-gpt_neox/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-gpt_neox/checkpoint-9480/scheduler.pt b/saves-gpt_neox/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-gpt_neox/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-gpt_neox/checkpoint-9480/special_tokens_map.json b/saves-gpt_neox/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gpt_neox/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gpt_neox/checkpoint-9480/tokenizer.json b/saves-gpt_neox/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gpt_neox/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gpt_neox/checkpoint-9480/tokenizer_config.json b/saves-gpt_neox/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gpt_neox/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gpt_neox/checkpoint-9480/trainer_state.json b/saves-gpt_neox/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..badcbe422f6f75d65b057f3bad143d050674b0a0 --- /dev/null +++ b/saves-gpt_neox/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2485766410827637, + "learning_rate": 0.00015822784810126583, + "loss": 7.4574, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.076921820640564, + "learning_rate": 0.00031645569620253165, + "loss": 6.7423, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8594661951065063, + "learning_rate": 0.00047468354430379745, + "loss": 6.1004, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.7302715182304382, + "learning_rate": 0.0006329113924050633, + "loss": 5.502, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.48561784625053406, + "learning_rate": 0.0007911392405063291, + "loss": 5.0017, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.35066017508506775, + "learning_rate": 0.0009493670886075949, + "loss": 4.5792, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.6283214688301086, + "learning_rate": 0.0011075949367088608, + "loss": 4.2927, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.5184714198112488, + "learning_rate": 0.0012658227848101266, + "loss": 4.1193, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.5667967796325684, + "learning_rate": 0.0014240506329113926, + "loss": 3.9593, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.5922889709472656, + "learning_rate": 0.0015, + "loss": 3.8811, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.3839136064052582, + "learning_rate": 0.0015, + "loss": 3.7423, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.6076145768165588, + "learning_rate": 0.0015, + "loss": 3.6727, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.4232381284236908, + "learning_rate": 0.0015, + "loss": 3.5945, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.4945344626903534, + "learning_rate": 0.0015, + "loss": 3.5079, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.8854876160621643, + "learning_rate": 0.0015, + "loss": 3.4433, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.4872647821903229, + "learning_rate": 0.0015, + "loss": 3.4101, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.4611298739910126, + "learning_rate": 0.0015, + "loss": 3.3305, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.5822963714599609, + "learning_rate": 0.0015, + "loss": 3.3003, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.4285585582256317, + "learning_rate": 0.0015, + "loss": 3.237, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.5830572843551636, + "learning_rate": 0.0015, + "loss": 3.193, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.7902519106864929, + "learning_rate": 0.0015, + "loss": 3.1489, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.5479622483253479, + "learning_rate": 0.0015, + "loss": 3.1197, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.5395523905754089, + "learning_rate": 0.0015, + "loss": 3.0605, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.6672455072402954, + "learning_rate": 0.0015, + "loss": 3.0266, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.5087226033210754, + "learning_rate": 0.0015, + "loss": 2.9871, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.5225133299827576, + "learning_rate": 0.0015, + "loss": 2.9433, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.7876822352409363, + "learning_rate": 0.0015, + "loss": 2.911, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.4625847637653351, + "learning_rate": 0.0015, + "loss": 2.8693, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.8417824506759644, + "learning_rate": 0.0015, + "loss": 2.8435, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.8281850814819336, + "learning_rate": 0.0015, + "loss": 2.8252, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.6728420257568359, + "learning_rate": 0.0015, + "loss": 2.7857, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.564785897731781, + "learning_rate": 0.0015, + "loss": 2.7513, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.786664605140686, + "learning_rate": 0.0015, + "loss": 2.733, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.5595623850822449, + "learning_rate": 0.0015, + "loss": 2.715, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.5897361040115356, + "learning_rate": 0.0015, + "loss": 2.6674, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.5897522568702698, + "learning_rate": 0.0015, + "loss": 2.6465, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.714674174785614, + "learning_rate": 0.0015, + "loss": 2.6299, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.5882616639137268, + "learning_rate": 0.0015, + "loss": 2.6172, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.7988317608833313, + "learning_rate": 0.0015, + "loss": 2.5937, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.6206404566764832, + "learning_rate": 0.0015, + "loss": 2.5675, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.5841651558876038, + "learning_rate": 0.0015, + "loss": 2.5525, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.7199103236198425, + "learning_rate": 0.0015, + "loss": 2.5267, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.5870277881622314, + "learning_rate": 0.0015, + "loss": 2.5024, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.5399110913276672, + "learning_rate": 0.0015, + "loss": 2.4799, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.5798667669296265, + "learning_rate": 0.0015, + "loss": 2.4766, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.8166000843048096, + "learning_rate": 0.0015, + "loss": 2.4545, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.607738196849823, + "learning_rate": 0.0015, + "loss": 2.4326, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.8470941185951233, + "learning_rate": 0.0015, + "loss": 2.4253, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.6075780391693115, + "learning_rate": 0.0015, + "loss": 2.3971, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.57669597864151, + "learning_rate": 0.0015, + "loss": 2.387, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.5554884076118469, + "learning_rate": 0.0015, + "loss": 2.3714, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.564025342464447, + "learning_rate": 0.0015, + "loss": 2.3534, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.6070376634597778, + "learning_rate": 0.0015, + "loss": 2.34, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.7295579314231873, + "learning_rate": 0.0015, + "loss": 2.3187, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.6031685471534729, + "learning_rate": 0.0015, + "loss": 2.3174, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.7265970706939697, + "learning_rate": 0.0015, + "loss": 2.2783, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.706733226776123, + "learning_rate": 0.0015, + "loss": 2.2865, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.6092813014984131, + "learning_rate": 0.0015, + "loss": 2.2813, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.6743170022964478, + "learning_rate": 0.0015, + "loss": 2.2536, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.5131509304046631, + "learning_rate": 0.0015, + "loss": 2.2361, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.6285905838012695, + "learning_rate": 0.0015, + "loss": 2.24, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.7810304164886475, + "learning_rate": 0.0015, + "loss": 2.2301, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.7753847241401672, + "learning_rate": 0.0015, + "loss": 2.2146, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.86361163854599, + "learning_rate": 0.0015, + "loss": 2.2189, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.537851870059967, + "learning_rate": 0.0015, + "loss": 2.2035, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.6117690801620483, + "learning_rate": 0.0015, + "loss": 2.1871, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.6053338646888733, + "learning_rate": 0.0015, + "loss": 2.1717, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.5600166320800781, + "learning_rate": 0.0015, + "loss": 2.1639, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.5834609270095825, + "learning_rate": 0.0015, + "loss": 2.1792, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.5720496773719788, + "learning_rate": 0.0015, + "loss": 2.1544, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.5028406977653503, + "learning_rate": 0.0015, + "loss": 2.1438, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.5963038206100464, + "learning_rate": 0.0015, + "loss": 2.1255, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.669278621673584, + "learning_rate": 0.0015, + "loss": 2.1244, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.6336427330970764, + "learning_rate": 0.0015, + "loss": 2.13, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.8953106999397278, + "learning_rate": 0.0015, + "loss": 2.1119, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.8605039715766907, + "learning_rate": 0.0015, + "loss": 2.1066, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.5540823340415955, + "learning_rate": 0.0015, + "loss": 2.0993, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.5638597011566162, + "learning_rate": 0.0015, + "loss": 2.0931, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.5192545056343079, + "learning_rate": 0.0015, + "loss": 2.0973, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.5290205478668213, + "learning_rate": 0.0015, + "loss": 2.0721, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.5702331066131592, + "learning_rate": 0.0015, + "loss": 2.0648, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.6120274066925049, + "learning_rate": 0.0015, + "loss": 2.0703, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.651861310005188, + "learning_rate": 0.0015, + "loss": 2.0723, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.7092135548591614, + "learning_rate": 0.0015, + "loss": 2.0494, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.5535245537757874, + "learning_rate": 0.0015, + "loss": 2.0462, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.5234808325767517, + "learning_rate": 0.0015, + "loss": 2.0461, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.6178521513938904, + "learning_rate": 0.0015, + "loss": 2.0404, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.5312263369560242, + "learning_rate": 0.0015, + "loss": 2.0341, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.7490671873092651, + "learning_rate": 0.0015, + "loss": 2.0263, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.5407402515411377, + "learning_rate": 0.0015, + "loss": 2.0252, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.5616239905357361, + "learning_rate": 0.0015, + "loss": 2.0303, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.644985020160675, + "learning_rate": 0.0015, + "loss": 2.0164, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.6015644073486328, + "learning_rate": 0.0015, + "loss": 2.0035, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.5466493368148804, + "learning_rate": 0.0015, + "loss": 2.0123, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.5613917708396912, + "learning_rate": 0.0015, + "loss": 1.9918, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.577168881893158, + "learning_rate": 0.0015, + "loss": 1.9996, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 1.0082017183303833, + "learning_rate": 0.0015, + "loss": 2.0, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.8105382919311523, + "learning_rate": 0.0015, + "loss": 1.984, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.6733279824256897, + "learning_rate": 0.0015, + "loss": 1.9845, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.5090422630310059, + "learning_rate": 0.0015, + "loss": 1.9939, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.546383798122406, + "learning_rate": 0.0015, + "loss": 1.9769, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.869979739189148, + "learning_rate": 0.0015, + "loss": 1.9696, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.5353502631187439, + "learning_rate": 0.0015, + "loss": 1.9697, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.7926225662231445, + "learning_rate": 0.0015, + "loss": 1.9668, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.5777803659439087, + "learning_rate": 0.0015, + "loss": 1.9671, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.5637400150299072, + "learning_rate": 0.0015, + "loss": 1.9586, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.5731320381164551, + "learning_rate": 0.0015, + "loss": 1.9424, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.6504504680633545, + "learning_rate": 0.0015, + "loss": 1.9491, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.5387397408485413, + "learning_rate": 0.0015, + "loss": 1.9547, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.6129940748214722, + "learning_rate": 0.0015, + "loss": 1.941, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.6021996140480042, + "learning_rate": 0.0015, + "loss": 1.9452, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.5393975377082825, + "learning_rate": 0.0015, + "loss": 1.9369, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.7050818204879761, + "learning_rate": 0.0015, + "loss": 1.9271, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.6135690808296204, + "learning_rate": 0.0015, + "loss": 1.9386, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.6577451825141907, + "learning_rate": 0.0015, + "loss": 1.9244, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.5335215330123901, + "learning_rate": 0.0015, + "loss": 1.9318, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.624643862247467, + "learning_rate": 0.0015, + "loss": 1.9215, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.6070647835731506, + "learning_rate": 0.0015, + "loss": 1.9064, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.732358455657959, + "learning_rate": 0.0015, + "loss": 1.9091, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.6872867941856384, + "learning_rate": 0.0015, + "loss": 1.9243, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.5056659579277039, + "learning_rate": 0.0015, + "loss": 1.9137, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.59626704454422, + "learning_rate": 0.0015, + "loss": 1.9006, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.5268152356147766, + "learning_rate": 0.0015, + "loss": 1.8902, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.6363093852996826, + "learning_rate": 0.0015, + "loss": 1.9064, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.5384114980697632, + "learning_rate": 0.0015, + "loss": 1.9105, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.631103515625, + "learning_rate": 0.0015, + "loss": 1.8914, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.5494920015335083, + "learning_rate": 0.0015, + "loss": 1.8909, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.5683566927909851, + "learning_rate": 0.0015, + "loss": 1.9016, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.5997524857521057, + "learning_rate": 0.0015, + "loss": 1.8923, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.5443325638771057, + "learning_rate": 0.0015, + "loss": 1.8766, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.5556219220161438, + "learning_rate": 0.0015, + "loss": 1.8829, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.5621205568313599, + "learning_rate": 0.0015, + "loss": 1.8882, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.08877694606781, + "learning_rate": 0.0015, + "loss": 1.8795, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.6446395516395569, + "learning_rate": 0.0015, + "loss": 1.8822, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.5307400822639465, + "learning_rate": 0.0015, + "loss": 1.8808, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.5413005352020264, + "learning_rate": 0.0015, + "loss": 1.8739, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.5862961411476135, + "learning_rate": 0.0015, + "loss": 1.8698, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.628121018409729, + "learning_rate": 0.0015, + "loss": 1.8593, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.6433019638061523, + "learning_rate": 0.0015, + "loss": 1.8707, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.5447952747344971, + "learning_rate": 0.0015, + "loss": 1.8558, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6169909238815308, + "learning_rate": 0.0015, + "loss": 1.8599, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.5279917120933533, + "learning_rate": 0.0015, + "loss": 1.8657, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.8410396575927734, + "learning_rate": 0.0015, + "loss": 1.8575, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.5265130400657654, + "learning_rate": 0.0015, + "loss": 1.8561, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.7601611018180847, + "learning_rate": 0.0015, + "loss": 1.8568, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.6370795369148254, + "learning_rate": 0.0015, + "loss": 1.8493, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.7460944056510925, + "learning_rate": 0.0015, + "loss": 1.8469, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.6164674162864685, + "learning_rate": 0.0015, + "loss": 1.8508, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.5279008746147156, + "learning_rate": 0.0015, + "loss": 1.8511, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.5769187211990356, + "learning_rate": 0.0015, + "loss": 1.8454, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.5380831956863403, + "learning_rate": 0.0015, + "loss": 1.8422, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.49665436148643494, + "learning_rate": 0.0015, + "loss": 1.8466, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.49440786242485046, + "learning_rate": 0.0015, + "loss": 1.8291, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.5244949460029602, + "learning_rate": 0.0015, + "loss": 1.8187, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.6073598265647888, + "learning_rate": 0.0015, + "loss": 1.8235, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.6110168099403381, + "learning_rate": 0.0015, + "loss": 1.8253, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.5527018904685974, + "learning_rate": 0.0015, + "loss": 1.8194, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.7350947260856628, + "learning_rate": 0.0015, + "loss": 1.8337, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.5066964626312256, + "learning_rate": 0.0015, + "loss": 1.8263, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.5679064393043518, + "learning_rate": 0.0015, + "loss": 1.83, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.5815659165382385, + "learning_rate": 0.0015, + "loss": 1.8326, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6580156087875366, + "learning_rate": 0.0015, + "loss": 1.8026, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.584998607635498, + "learning_rate": 0.0015, + "loss": 1.8143, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.6371729969978333, + "learning_rate": 0.0015, + "loss": 1.8136, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.6551669239997864, + "learning_rate": 0.0015, + "loss": 1.8056, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.5452194809913635, + "learning_rate": 0.0015, + "loss": 1.8052, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.580973207950592, + "learning_rate": 0.0015, + "loss": 1.8129, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.7288298010826111, + "learning_rate": 0.0015, + "loss": 1.8037, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.5003995895385742, + "learning_rate": 0.0015, + "loss": 1.7945, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.6342082023620605, + "learning_rate": 0.0015, + "loss": 1.8137, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.6020325422286987, + "learning_rate": 0.0015, + "loss": 1.7937, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.5053991675376892, + "learning_rate": 0.0015, + "loss": 1.792, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.6367082595825195, + "learning_rate": 0.0015, + "loss": 1.7922, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.7677233219146729, + "learning_rate": 0.0015, + "loss": 1.7996, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.5585650205612183, + "learning_rate": 0.0015, + "loss": 1.8174, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.5667433738708496, + "learning_rate": 0.0015, + "loss": 1.7963, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.5477187037467957, + "learning_rate": 0.0015, + "loss": 1.7944, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.5397399067878723, + "learning_rate": 0.0015, + "loss": 1.7973, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.5911719799041748, + "learning_rate": 0.0015, + "loss": 1.7985, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.5183660984039307, + "learning_rate": 0.0015, + "loss": 1.7953, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.5636905431747437, + "learning_rate": 0.0015, + "loss": 1.781, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.48247769474983215, + "learning_rate": 0.0015, + "loss": 1.7824, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.5301914811134338, + "learning_rate": 0.0015, + "loss": 1.7798, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.5747403502464294, + "learning_rate": 0.0015, + "loss": 1.7785, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.7378188371658325, + "learning_rate": 0.0015, + "loss": 1.7845, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.6788085699081421, + "learning_rate": 0.0015, + "loss": 1.7878, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.5520949959754944, + "learning_rate": 0.0015, + "loss": 1.7903, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.5203332304954529, + "learning_rate": 0.0015, + "loss": 1.7724, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.493672639131546, + "learning_rate": 0.0015, + "loss": 1.7759, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.7212117910385132, + "learning_rate": 0.0015, + "loss": 1.776, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.6064359545707703, + "learning_rate": 0.0015, + "loss": 1.779, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.5475752949714661, + "learning_rate": 0.0015, + "loss": 1.7753, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.7544367909431458, + "learning_rate": 0.0015, + "loss": 1.7782, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.500032901763916, + "learning_rate": 0.0015, + "loss": 1.7629, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.5170471668243408, + "learning_rate": 0.0015, + "loss": 1.7681, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.4910539984703064, + "learning_rate": 0.0015, + "loss": 1.7693, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.49903246760368347, + "learning_rate": 0.0015, + "loss": 1.7788, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.5248587131500244, + "learning_rate": 0.0015, + "loss": 1.7596, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.5389870405197144, + "learning_rate": 0.0015, + "loss": 1.7581, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.4865356385707855, + "learning_rate": 0.0015, + "loss": 1.7594, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.7490705251693726, + "learning_rate": 0.0015, + "loss": 1.7667, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.5300907492637634, + "learning_rate": 0.0015, + "loss": 1.7771, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.5607280731201172, + "learning_rate": 0.0015, + "loss": 1.7614, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.5055286288261414, + "learning_rate": 0.0015, + "loss": 1.7614, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.49640461802482605, + "learning_rate": 0.0015, + "loss": 1.7599, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.5747355818748474, + "learning_rate": 0.0015, + "loss": 1.7533, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.5841211676597595, + "learning_rate": 0.0015, + "loss": 1.7532, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.556130588054657, + "learning_rate": 0.0015, + "loss": 1.7594, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.5464954972267151, + "learning_rate": 0.0015, + "loss": 1.7485, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.5646750926971436, + "learning_rate": 0.0015, + "loss": 1.7589, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.5072766542434692, + "learning_rate": 0.0015, + "loss": 1.7465, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6742327213287354, + "learning_rate": 0.0015, + "loss": 1.7484, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.5244772434234619, + "learning_rate": 0.0015, + "loss": 1.7498, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.5155846476554871, + "learning_rate": 0.0015, + "loss": 1.743, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.5239354968070984, + "learning_rate": 0.0015, + "loss": 1.7481, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.785340428352356, + "learning_rate": 0.0015, + "loss": 1.7607, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.5329594612121582, + "learning_rate": 0.0015, + "loss": 1.7494, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.5298193097114563, + "learning_rate": 0.0015, + "loss": 1.7292, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.6361938714981079, + "learning_rate": 0.0015, + "loss": 1.7474, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.5725830793380737, + "learning_rate": 0.0015, + "loss": 1.7449, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.4911138117313385, + "learning_rate": 0.0015, + "loss": 1.7291, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.511965811252594, + "learning_rate": 0.0015, + "loss": 1.7476, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.7234761118888855, + "learning_rate": 0.0015, + "loss": 1.7405, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.7289201021194458, + "learning_rate": 0.0015, + "loss": 1.7395, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.8702979683876038, + "learning_rate": 0.0015, + "loss": 1.734, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.5554032921791077, + "learning_rate": 0.0015, + "loss": 1.7276, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.5437158346176147, + "learning_rate": 0.0015, + "loss": 1.7579, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.5666341185569763, + "learning_rate": 0.0015, + "loss": 1.7411, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.5575513243675232, + "learning_rate": 0.0015, + "loss": 1.7317, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.4892849624156952, + "learning_rate": 0.0015, + "loss": 1.732, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.5326911807060242, + "learning_rate": 0.0015, + "loss": 1.7324, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.492562472820282, + "learning_rate": 0.0015, + "loss": 1.7379, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.5067744255065918, + "learning_rate": 0.0015, + "loss": 1.7198, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.5157293677330017, + "learning_rate": 0.0015, + "loss": 1.7201, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.8337791562080383, + "learning_rate": 0.0015, + "loss": 1.7163, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.6756242513656616, + "learning_rate": 0.0015, + "loss": 1.7335, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.49481797218322754, + "learning_rate": 0.0015, + "loss": 1.7266, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.5397338271141052, + "learning_rate": 0.0015, + "loss": 1.7254, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.5521544218063354, + "learning_rate": 0.0015, + "loss": 1.7237, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.4965150058269501, + "learning_rate": 0.0015, + "loss": 1.7259, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.49155551195144653, + "learning_rate": 0.0015, + "loss": 1.7284, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.5060924291610718, + "learning_rate": 0.0015, + "loss": 1.718, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.5489485859870911, + "learning_rate": 0.0015, + "loss": 1.7191, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.6810722351074219, + "learning_rate": 0.0015, + "loss": 1.728, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.48549798130989075, + "learning_rate": 0.0015, + "loss": 1.7109, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.5303010940551758, + "learning_rate": 0.0015, + "loss": 1.7278, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.5764036178588867, + "learning_rate": 0.0015, + "loss": 1.7182, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.5024107694625854, + "learning_rate": 0.0015, + "loss": 1.7202, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.5058770179748535, + "learning_rate": 0.0015, + "loss": 1.7105, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.5011932253837585, + "learning_rate": 0.0015, + "loss": 1.713, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.5703150033950806, + "learning_rate": 0.0015, + "loss": 1.7159, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.6708720326423645, + "learning_rate": 0.0015, + "loss": 1.7177, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.6345089673995972, + "learning_rate": 0.0015, + "loss": 1.7132, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.5322442650794983, + "learning_rate": 0.0015, + "loss": 1.7149, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.4987994134426117, + "learning_rate": 0.0015, + "loss": 1.7082, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.522023618221283, + "learning_rate": 0.0015, + "loss": 1.7186, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.5543396472930908, + "learning_rate": 0.0015, + "loss": 1.7104, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.4878558814525604, + "learning_rate": 0.0015, + "loss": 1.71, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6076152324676514, + "learning_rate": 0.0015, + "loss": 1.7165, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.543730616569519, + "learning_rate": 0.0015, + "loss": 1.7188, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.5020783543586731, + "learning_rate": 0.0015, + "loss": 1.7156, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.5225809812545776, + "learning_rate": 0.0015, + "loss": 1.7125, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.5299288034439087, + "learning_rate": 0.0015, + "loss": 1.7007, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.5860670804977417, + "learning_rate": 0.0015, + "loss": 1.7142, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.6921345591545105, + "learning_rate": 0.0015, + "loss": 1.7055, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.5481234788894653, + "learning_rate": 0.0015, + "loss": 1.701, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.6057475805282593, + "learning_rate": 0.0015, + "loss": 1.7006, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.4999600648880005, + "learning_rate": 0.0015, + "loss": 1.7015, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.4732488989830017, + "learning_rate": 0.0015, + "loss": 1.7018, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.5896801948547363, + "learning_rate": 0.0015, + "loss": 1.6991, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.5418528914451599, + "learning_rate": 0.0015, + "loss": 1.7039, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.5658354759216309, + "learning_rate": 0.0015, + "loss": 1.7038, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.5932666659355164, + "learning_rate": 0.0015, + "loss": 1.6849, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.7028477787971497, + "learning_rate": 0.0015, + "loss": 1.6998, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.5941743850708008, + "learning_rate": 0.0015, + "loss": 1.7049, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.6618591547012329, + "learning_rate": 0.0015, + "loss": 1.7127, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.5442028641700745, + "learning_rate": 0.0015, + "loss": 1.7057, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.4998684823513031, + "learning_rate": 0.0015, + "loss": 1.6994, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.5061076879501343, + "learning_rate": 0.0015, + "loss": 1.7003, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.5140838027000427, + "learning_rate": 0.0015, + "loss": 1.6979, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.5082165002822876, + "learning_rate": 0.0015, + "loss": 1.7007, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.7357332706451416, + "learning_rate": 0.0015, + "loss": 1.7049, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.6374452710151672, + "learning_rate": 0.0015, + "loss": 1.6758, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.49157869815826416, + "learning_rate": 0.0015, + "loss": 1.682, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.6754854917526245, + "learning_rate": 0.0015, + "loss": 1.691, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.7517315149307251, + "learning_rate": 0.0015, + "loss": 1.6945, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.6263560056686401, + "learning_rate": 0.0015, + "loss": 1.6954, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.5294128060340881, + "learning_rate": 0.0015, + "loss": 1.6782, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.5521923303604126, + "learning_rate": 0.0015, + "loss": 1.685, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.5302305817604065, + "learning_rate": 0.0015, + "loss": 1.6894, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.5109966993331909, + "learning_rate": 0.0015, + "loss": 1.691, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.5606354475021362, + "learning_rate": 0.0015, + "loss": 1.6832, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.5499821901321411, + "learning_rate": 0.0015, + "loss": 1.6811, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.4843187928199768, + "learning_rate": 0.0015, + "loss": 1.6836, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.5288180708885193, + "learning_rate": 0.0015, + "loss": 1.696, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.5568428039550781, + "learning_rate": 0.0015, + "loss": 1.6743, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.5189091563224792, + "learning_rate": 0.0015, + "loss": 1.6704, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.5689852237701416, + "learning_rate": 0.0015, + "loss": 1.6757, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.5281962752342224, + "learning_rate": 0.0015, + "loss": 1.6953, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.5465761423110962, + "learning_rate": 0.0015, + "loss": 1.6702, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.5517351627349854, + "learning_rate": 0.0015, + "loss": 1.6808, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.5710799694061279, + "learning_rate": 0.0015, + "loss": 1.6791, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.5462856292724609, + "learning_rate": 0.0015, + "loss": 1.6686, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.48896288871765137, + "learning_rate": 0.0015, + "loss": 1.6757, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.5136091709136963, + "learning_rate": 0.0015, + "loss": 1.6749, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.7239934802055359, + "learning_rate": 0.0015, + "loss": 1.686, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5063762068748474, + "learning_rate": 0.0015, + "loss": 1.6787, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.6835044622421265, + "learning_rate": 0.0015, + "loss": 1.6652, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.5106092095375061, + "learning_rate": 0.0015, + "loss": 1.68, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.5374235510826111, + "learning_rate": 0.0015, + "loss": 1.6696, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.525035560131073, + "learning_rate": 0.0015, + "loss": 1.672, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.602698564529419, + "learning_rate": 0.0015, + "loss": 1.6747, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6721233129501343, + "learning_rate": 0.0015, + "loss": 1.6884, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.7102489471435547, + "learning_rate": 0.0015, + "loss": 1.6634, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.7099002599716187, + "learning_rate": 0.0015, + "loss": 1.6765, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.6599347591400146, + "learning_rate": 0.0015, + "loss": 1.6739, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.5314111709594727, + "learning_rate": 0.0015, + "loss": 1.6758, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.5334101915359497, + "learning_rate": 0.0015, + "loss": 1.6691, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.5805719494819641, + "learning_rate": 0.0015, + "loss": 1.6683, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.5329128503799438, + "learning_rate": 0.0015, + "loss": 1.665, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.5818187594413757, + "learning_rate": 0.0015, + "loss": 1.6758, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.5500432252883911, + "learning_rate": 0.0015, + "loss": 1.67, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.5137699842453003, + "learning_rate": 0.0015, + "loss": 1.6612, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.6000794172286987, + "learning_rate": 0.0015, + "loss": 1.6463, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.528067946434021, + "learning_rate": 0.0015, + "loss": 1.6692, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5943162441253662, + "learning_rate": 0.0015, + "loss": 1.6603, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.6426229476928711, + "learning_rate": 0.0015, + "loss": 1.6753, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.5630403161048889, + "learning_rate": 0.0015, + "loss": 1.6755, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.5272098183631897, + "learning_rate": 0.0015, + "loss": 1.6579, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.5537458062171936, + "learning_rate": 0.0015, + "loss": 1.668, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.4610064923763275, + "learning_rate": 0.0015, + "loss": 1.668, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.4902813732624054, + "learning_rate": 0.0015, + "loss": 1.6727, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.5457850098609924, + "learning_rate": 0.0015, + "loss": 1.6639, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.5465056300163269, + "learning_rate": 0.0015, + "loss": 1.6655, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.619257390499115, + "learning_rate": 0.0015, + "loss": 1.6541, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.5009593963623047, + "learning_rate": 0.0015, + "loss": 1.6578, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.5059512257575989, + "learning_rate": 0.0015, + "loss": 1.6642, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.5618055462837219, + "learning_rate": 0.0015, + "loss": 1.6619, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.48470979928970337, + "learning_rate": 0.0015, + "loss": 1.6549, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.47895410656929016, + "learning_rate": 0.0015, + "loss": 1.663, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.5176829695701599, + "learning_rate": 0.0015, + "loss": 1.6607, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5178171992301941, + "learning_rate": 0.0015, + "loss": 1.6412, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.5151075124740601, + "learning_rate": 0.0015, + "loss": 1.6592, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.6114545464515686, + "learning_rate": 0.0015, + "loss": 1.6556, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.47311899065971375, + "learning_rate": 0.0015, + "loss": 1.653, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.5413276553153992, + "learning_rate": 0.0015, + "loss": 1.6437, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.519906222820282, + "learning_rate": 0.0015, + "loss": 1.6641, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.5208349823951721, + "learning_rate": 0.0015, + "loss": 1.6639, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.6529089212417603, + "learning_rate": 0.0015, + "loss": 1.6634, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.5094889998435974, + "learning_rate": 0.0015, + "loss": 1.6426, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6394805312156677, + "learning_rate": 0.0015, + "loss": 1.6645, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.5326097011566162, + "learning_rate": 0.0015, + "loss": 1.6572, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.596369206905365, + "learning_rate": 0.0015, + "loss": 1.6655, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.5044280290603638, + "learning_rate": 0.0015, + "loss": 1.6586, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.5055898427963257, + "learning_rate": 0.0015, + "loss": 1.6516, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.5398617386817932, + "learning_rate": 0.0015, + "loss": 1.649, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.5790930390357971, + "learning_rate": 0.0015, + "loss": 1.6466, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.703459620475769, + "learning_rate": 0.0015, + "loss": 1.6486, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.664594829082489, + "learning_rate": 0.0015, + "loss": 1.6613, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.6581702828407288, + "learning_rate": 0.0015, + "loss": 1.6438, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.5328201651573181, + "learning_rate": 0.0015, + "loss": 1.6457, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.5390595197677612, + "learning_rate": 0.0015, + "loss": 1.6513, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.5801642537117004, + "learning_rate": 0.0015, + "loss": 1.6488, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.617379367351532, + "learning_rate": 0.0015, + "loss": 1.6504, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.60773766040802, + "learning_rate": 0.0015, + "loss": 1.6554, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.4956355690956116, + "learning_rate": 0.0015, + "loss": 1.649, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.4812815487384796, + "learning_rate": 0.0015, + "loss": 1.6414, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.8216853141784668, + "learning_rate": 0.0015, + "loss": 1.6443, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.5858913660049438, + "learning_rate": 0.0015, + "loss": 1.6594, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6117913722991943, + "learning_rate": 0.0015, + "loss": 1.6602, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.501028299331665, + "learning_rate": 0.0015, + "loss": 1.6435, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.7885802388191223, + "learning_rate": 0.0015, + "loss": 1.6288, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.48646634817123413, + "learning_rate": 0.0015, + "loss": 1.6452, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.695210874080658, + "learning_rate": 0.0015, + "loss": 1.6473, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.5015830397605896, + "learning_rate": 0.0015, + "loss": 1.6485, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.5499297976493835, + "learning_rate": 0.0015, + "loss": 1.6374, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.5558696985244751, + "learning_rate": 0.0015, + "loss": 1.6545, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5106779336929321, + "learning_rate": 0.0015, + "loss": 1.6397, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.5692006945610046, + "learning_rate": 0.0015, + "loss": 1.644, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.5946459174156189, + "learning_rate": 0.0015, + "loss": 1.6486, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.6998482346534729, + "learning_rate": 0.0015, + "loss": 1.6432, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5392434597015381, + "learning_rate": 0.0015, + "loss": 1.6283, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.6407002806663513, + "learning_rate": 0.0015, + "loss": 1.635, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.49721819162368774, + "learning_rate": 0.0015, + "loss": 1.6555, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.6670035123825073, + "learning_rate": 0.0015, + "loss": 1.6344, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.5335709452629089, + "learning_rate": 0.0015, + "loss": 1.6477, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.9207587838172913, + "learning_rate": 0.0015, + "loss": 1.6348, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.521172821521759, + "learning_rate": 0.0015, + "loss": 1.6272, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.5497180223464966, + "learning_rate": 0.0015, + "loss": 1.6289, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.6907855868339539, + "learning_rate": 0.0015, + "loss": 1.6308, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.5962575078010559, + "learning_rate": 0.0015, + "loss": 1.6329, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.4992539882659912, + "learning_rate": 0.0015, + "loss": 1.6323, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.5308951139450073, + "learning_rate": 0.0015, + "loss": 1.6334, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.5163594484329224, + "learning_rate": 0.0015, + "loss": 1.6294, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.5059428811073303, + "learning_rate": 0.0015, + "loss": 1.6337, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.5803700685501099, + "learning_rate": 0.0015, + "loss": 1.636, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.48982182145118713, + "learning_rate": 0.0015, + "loss": 1.6238, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.5099266767501831, + "learning_rate": 0.0015, + "loss": 1.6377, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.5116462707519531, + "learning_rate": 0.0015, + "loss": 1.626, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.5027227997779846, + "learning_rate": 0.0015, + "loss": 1.6412, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.4886154532432556, + "learning_rate": 0.0015, + "loss": 1.6289, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.5388848185539246, + "learning_rate": 0.0015, + "loss": 1.63, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.4805651009082794, + "learning_rate": 0.0015, + "loss": 1.624, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5276669263839722, + "learning_rate": 0.0015, + "loss": 1.6333, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5610337853431702, + "learning_rate": 0.0015, + "loss": 1.6349, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.655647873878479, + "learning_rate": 0.0015, + "loss": 1.6397, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.5363686680793762, + "learning_rate": 0.0015, + "loss": 1.632, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.516447901725769, + "learning_rate": 0.0015, + "loss": 1.6376, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.6272925138473511, + "learning_rate": 0.0015, + "loss": 1.631, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.5184757709503174, + "learning_rate": 0.0015, + "loss": 1.6296, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.5482593774795532, + "learning_rate": 0.0015, + "loss": 1.6237, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.6392119526863098, + "learning_rate": 0.0015, + "loss": 1.6268, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.592857301235199, + "learning_rate": 0.0015, + "loss": 1.623, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.6189137697219849, + "learning_rate": 0.0015, + "loss": 1.6271, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.5048899054527283, + "learning_rate": 0.0015, + "loss": 1.6331, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.548077404499054, + "learning_rate": 0.0015, + "loss": 1.6248, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.5305324196815491, + "learning_rate": 0.0015, + "loss": 1.6288, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5470110774040222, + "learning_rate": 0.0015, + "loss": 1.6222, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.5240048170089722, + "learning_rate": 0.0015, + "loss": 1.6257, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.46489861607551575, + "learning_rate": 0.0015, + "loss": 1.6275, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.601734459400177, + "learning_rate": 0.0015, + "loss": 1.6213, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.5368877649307251, + "learning_rate": 0.0015, + "loss": 1.6247, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.5832919478416443, + "learning_rate": 0.0015, + "loss": 1.6243, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.5244927406311035, + "learning_rate": 0.0015, + "loss": 1.6211, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.6408116221427917, + "learning_rate": 0.0015, + "loss": 1.6253, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.5419908761978149, + "learning_rate": 0.0015, + "loss": 1.6315, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.5621228218078613, + "learning_rate": 0.0015, + "loss": 1.6194, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.8005079030990601, + "learning_rate": 0.0015, + "loss": 1.6162, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.5439707636833191, + "learning_rate": 0.0015, + "loss": 1.6269, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.9677388072013855, + "learning_rate": 0.0015, + "loss": 1.6284, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.6481673717498779, + "learning_rate": 0.0015, + "loss": 1.6267, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.6467076539993286, + "learning_rate": 0.0015, + "loss": 1.6191, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.548640251159668, + "learning_rate": 0.0015, + "loss": 1.6271, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.5817282199859619, + "learning_rate": 0.0015, + "loss": 1.6157, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.5533577799797058, + "learning_rate": 0.0015, + "loss": 1.6209, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.6503041982650757, + "learning_rate": 0.0015, + "loss": 1.6333, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.6527080535888672, + "learning_rate": 0.0015, + "loss": 1.6094, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.48602229356765747, + "learning_rate": 0.0015, + "loss": 1.6154, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.4748643934726715, + "learning_rate": 0.0015, + "loss": 1.6125, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.47677865624427795, + "learning_rate": 0.0015, + "loss": 1.6164, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.7703567743301392, + "learning_rate": 0.0015, + "loss": 1.6325, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.49584656953811646, + "learning_rate": 0.0015, + "loss": 1.6176, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.6466955542564392, + "learning_rate": 0.0015, + "loss": 1.6207, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.6483890414237976, + "learning_rate": 0.0015, + "loss": 1.616, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.5466180443763733, + "learning_rate": 0.0015, + "loss": 1.6265, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.46194952726364136, + "learning_rate": 0.0015, + "loss": 1.6127, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.5937331914901733, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.6078140735626221, + "learning_rate": 0.0015, + "loss": 1.6266, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.6964238882064819, + "learning_rate": 0.0015, + "loss": 1.615, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.4765681028366089, + "learning_rate": 0.0015, + "loss": 1.6143, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.587977409362793, + "learning_rate": 0.0015, + "loss": 1.6196, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.5218415856361389, + "learning_rate": 0.0015, + "loss": 1.6009, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.6561583280563354, + "learning_rate": 0.0015, + "loss": 1.6191, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.4821780323982239, + "learning_rate": 0.0015, + "loss": 1.621, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.48112690448760986, + "learning_rate": 0.0015, + "loss": 1.6199, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.5676143169403076, + "learning_rate": 0.0015, + "loss": 1.6087, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.5205967426300049, + "learning_rate": 0.0015, + "loss": 1.5904, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.5493068695068359, + "learning_rate": 0.0015, + "loss": 1.6138, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.5330948829650879, + "learning_rate": 0.0015, + "loss": 1.602, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.5802034735679626, + "learning_rate": 0.0015, + "loss": 1.6161, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.5258814096450806, + "learning_rate": 0.0015, + "loss": 1.6191, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.5792579054832458, + "learning_rate": 0.0015, + "loss": 1.5989, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.5287085175514221, + "learning_rate": 0.0015, + "loss": 1.6182, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.5056024789810181, + "learning_rate": 0.0015, + "loss": 1.6172, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.48456060886383057, + "learning_rate": 0.0015, + "loss": 1.622, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.5709035396575928, + "learning_rate": 0.0015, + "loss": 1.6087, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.5253095626831055, + "learning_rate": 0.0015, + "loss": 1.6051, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5250982046127319, + "learning_rate": 0.0015, + "loss": 1.6023, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.6567180156707764, + "learning_rate": 0.0015, + "loss": 1.5982, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.8123635649681091, + "learning_rate": 0.0015, + "loss": 1.6124, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.49482253193855286, + "learning_rate": 0.0015, + "loss": 1.6157, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.890190601348877, + "learning_rate": 0.0015, + "loss": 1.6084, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.5279151201248169, + "learning_rate": 0.0015, + "loss": 1.6054, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.4660314917564392, + "learning_rate": 0.0015, + "loss": 1.6062, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.5381559133529663, + "learning_rate": 0.0015, + "loss": 1.6235, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.5002992153167725, + "learning_rate": 0.0015, + "loss": 1.6056, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.5755791068077087, + "learning_rate": 0.0015, + "loss": 1.6062, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.6434073448181152, + "learning_rate": 0.0015, + "loss": 1.6168, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.5239230990409851, + "learning_rate": 0.0015, + "loss": 1.6, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.6032198071479797, + "learning_rate": 0.0015, + "loss": 1.6017, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.5447028279304504, + "learning_rate": 0.0015, + "loss": 1.6058, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.5023834705352783, + "learning_rate": 0.0015, + "loss": 1.6249, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.4979970455169678, + "learning_rate": 0.0015, + "loss": 1.6046, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.5603806972503662, + "learning_rate": 0.0015, + "loss": 1.6044, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.5204450488090515, + "learning_rate": 0.0015, + "loss": 1.6155, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.506743311882019, + "learning_rate": 0.0015, + "loss": 1.5997, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.6346352100372314, + "learning_rate": 0.0015, + "loss": 1.5922, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.45884600281715393, + "learning_rate": 0.0015, + "loss": 1.6142, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.6494935750961304, + "learning_rate": 0.0015, + "loss": 1.6053, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.6602926850318909, + "learning_rate": 0.0015, + "loss": 1.6167, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.7757523059844971, + "learning_rate": 0.0015, + "loss": 1.6086, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.5537070631980896, + "learning_rate": 0.0015, + "loss": 1.5998, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.47970086336135864, + "learning_rate": 0.0015, + "loss": 1.6062, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.5168854594230652, + "learning_rate": 0.0015, + "loss": 1.6029, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.5096169710159302, + "learning_rate": 0.0015, + "loss": 1.6122, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.5339879393577576, + "learning_rate": 0.0015, + "loss": 1.5927, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.6085875630378723, + "learning_rate": 0.0015, + "loss": 1.6012, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.45492976903915405, + "learning_rate": 0.0015, + "loss": 1.5932, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.571357786655426, + "learning_rate": 0.0015, + "loss": 1.5973, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.5182305574417114, + "learning_rate": 0.0015, + "loss": 1.61, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.48681485652923584, + "learning_rate": 0.0015, + "loss": 1.6012, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5068904161453247, + "learning_rate": 0.0015, + "loss": 1.5978, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.5633773803710938, + "learning_rate": 0.0015, + "loss": 1.5839, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5287569165229797, + "learning_rate": 0.0015, + "loss": 1.6149, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.5018180012702942, + "learning_rate": 0.0015, + "loss": 1.5969, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.5136269927024841, + "learning_rate": 0.0015, + "loss": 1.6097, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.7598251104354858, + "learning_rate": 0.0015, + "loss": 1.5994, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.5178298354148865, + "learning_rate": 0.0015, + "loss": 1.6077, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.5299210548400879, + "learning_rate": 0.0015, + "loss": 1.6007, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.6395118832588196, + "learning_rate": 0.0015, + "loss": 1.5832, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.5721453428268433, + "learning_rate": 0.0015, + "loss": 1.599, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.5829804539680481, + "learning_rate": 0.0015, + "loss": 1.5969, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.6950172185897827, + "learning_rate": 0.0015, + "loss": 1.5963, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.45339399576187134, + "learning_rate": 0.0015, + "loss": 1.6084, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.4815180003643036, + "learning_rate": 0.0015, + "loss": 1.6043, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5665709376335144, + "learning_rate": 0.0015, + "loss": 1.5967, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.675624668598175, + "learning_rate": 0.0015, + "loss": 1.5996, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.6026766896247864, + "learning_rate": 0.0015, + "loss": 1.6144, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.5501036047935486, + "learning_rate": 0.0015, + "loss": 1.5906, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.550432026386261, + "learning_rate": 0.0015, + "loss": 1.605, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.5547890067100525, + "learning_rate": 0.0015, + "loss": 1.585, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.723808765411377, + "learning_rate": 0.0015, + "loss": 1.599, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.49636849761009216, + "learning_rate": 0.0015, + "loss": 1.5946, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.5607818961143494, + "learning_rate": 0.0015, + "loss": 1.6031, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.6797217130661011, + "learning_rate": 0.0015, + "loss": 1.5943, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.4568845331668854, + "learning_rate": 0.0015, + "loss": 1.6004, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.5281131863594055, + "learning_rate": 0.0015, + "loss": 1.6026, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.537294328212738, + "learning_rate": 0.0015, + "loss": 1.597, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.6481627821922302, + "learning_rate": 0.0015, + "loss": 1.5959, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.511182427406311, + "learning_rate": 0.0015, + "loss": 1.6009, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.5247224569320679, + "learning_rate": 0.0015, + "loss": 1.5932, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.743636965751648, + "learning_rate": 0.0015, + "loss": 1.5878, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.4788835346698761, + "learning_rate": 0.0015, + "loss": 1.5988, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.5223715305328369, + "learning_rate": 0.0015, + "loss": 1.5907, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.46506214141845703, + "learning_rate": 0.0015, + "loss": 1.5834, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.5525707006454468, + "learning_rate": 0.0015, + "loss": 1.5939, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.5212825536727905, + "learning_rate": 0.0015, + "loss": 1.5958, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.5370323061943054, + "learning_rate": 0.0015, + "loss": 1.5859, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.5655264854431152, + "learning_rate": 0.0015, + "loss": 1.5872, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.5662801861763, + "learning_rate": 0.0015, + "loss": 1.59, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.5144532322883606, + "learning_rate": 0.0015, + "loss": 1.593, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.5201811790466309, + "learning_rate": 0.0015, + "loss": 1.5845, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.48978814482688904, + "learning_rate": 0.0015, + "loss": 1.5823, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.8404340147972107, + "learning_rate": 0.0015, + "loss": 1.584, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.5247283577919006, + "learning_rate": 0.0015, + "loss": 1.5933, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.5406564474105835, + "learning_rate": 0.0015, + "loss": 1.59, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.5276991128921509, + "learning_rate": 0.0015, + "loss": 1.5896, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.6486048102378845, + "learning_rate": 0.0015, + "loss": 1.5925, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.5306462049484253, + "learning_rate": 0.0015, + "loss": 1.5938, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.6800550818443298, + "learning_rate": 0.0015, + "loss": 1.5871, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.49346840381622314, + "learning_rate": 0.0015, + "loss": 1.5914, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.5364195108413696, + "learning_rate": 0.0015, + "loss": 1.5859, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.5647665858268738, + "learning_rate": 0.0015, + "loss": 1.58, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.4905088245868683, + "learning_rate": 0.0015, + "loss": 1.5786, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.6447497606277466, + "learning_rate": 0.0015, + "loss": 1.5932, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.4846080243587494, + "learning_rate": 0.0015, + "loss": 1.6003, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5495014190673828, + "learning_rate": 0.0015, + "loss": 1.5839, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.5885636210441589, + "learning_rate": 0.0015, + "loss": 1.5853, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.5110231637954712, + "learning_rate": 0.0015, + "loss": 1.5883, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.5421663522720337, + "learning_rate": 0.0015, + "loss": 1.5843, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.5510345101356506, + "learning_rate": 0.0015, + "loss": 1.5879, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.521382749080658, + "learning_rate": 0.0015, + "loss": 1.5992, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.8534302115440369, + "learning_rate": 0.0015, + "loss": 1.5905, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.5378811359405518, + "learning_rate": 0.0015, + "loss": 1.6018, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.5972623825073242, + "learning_rate": 0.0015, + "loss": 1.594, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.5757758021354675, + "learning_rate": 0.0015, + "loss": 1.5878, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.5366994142532349, + "learning_rate": 0.0015, + "loss": 1.595, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.4970906972885132, + "learning_rate": 0.0015, + "loss": 1.5949, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.6522812843322754, + "learning_rate": 0.0015, + "loss": 1.5759, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.6594387888908386, + "learning_rate": 0.0015, + "loss": 1.5822, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.45507556200027466, + "learning_rate": 0.0015, + "loss": 1.5877, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.4963703751564026, + "learning_rate": 0.0015, + "loss": 1.5893, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.5383898615837097, + "learning_rate": 0.0015, + "loss": 1.5863, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.5389703512191772, + "learning_rate": 0.0015, + "loss": 1.5821, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.7132524251937866, + "learning_rate": 0.0015, + "loss": 1.5596, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.5984724760055542, + "learning_rate": 0.0015, + "loss": 1.5684, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.718887209892273, + "learning_rate": 0.0015, + "loss": 1.5727, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.53238844871521, + "learning_rate": 0.0015, + "loss": 1.5812, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.5598893761634827, + "learning_rate": 0.0015, + "loss": 1.5762, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.5576689839363098, + "learning_rate": 0.0015, + "loss": 1.5828, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.5098146200180054, + "learning_rate": 0.0015, + "loss": 1.586, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.5039575099945068, + "learning_rate": 0.0015, + "loss": 1.5966, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.5155593156814575, + "learning_rate": 0.0015, + "loss": 1.5759, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.6449838876724243, + "learning_rate": 0.0015, + "loss": 1.595, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.50468510389328, + "learning_rate": 0.0015, + "loss": 1.5976, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.6136046648025513, + "learning_rate": 0.0015, + "loss": 1.585, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5581533908843994, + "learning_rate": 0.0015, + "loss": 1.5782, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.6023792624473572, + "learning_rate": 0.0015, + "loss": 1.5901, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.5337638258934021, + "learning_rate": 0.0015, + "loss": 1.5796, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.574265718460083, + "learning_rate": 0.0015, + "loss": 1.5821, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.6542115211486816, + "learning_rate": 0.0015, + "loss": 1.5781, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.45129644870758057, + "learning_rate": 0.0015, + "loss": 1.5727, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.9813739657402039, + "learning_rate": 0.0015, + "loss": 1.5788, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.5766255259513855, + "learning_rate": 0.0015, + "loss": 1.5851, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.6553332805633545, + "learning_rate": 0.0015, + "loss": 1.5757, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.6401558518409729, + "learning_rate": 0.0015, + "loss": 1.5802, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5283893942832947, + "learning_rate": 0.0015, + "loss": 1.5808, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.4728381931781769, + "learning_rate": 0.0015, + "loss": 1.5645, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.5703704357147217, + "learning_rate": 0.0015, + "loss": 1.5865, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.539211094379425, + "learning_rate": 0.0015, + "loss": 1.5901, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.6088196039199829, + "learning_rate": 0.0015, + "loss": 1.5791, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.5175780653953552, + "learning_rate": 0.0015, + "loss": 1.5822, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.7184878587722778, + "learning_rate": 0.0015, + "loss": 1.5695, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.5564938187599182, + "learning_rate": 0.0015, + "loss": 1.5873, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.4908703863620758, + "learning_rate": 0.0015, + "loss": 1.5678, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.47964581847190857, + "learning_rate": 0.0015, + "loss": 1.5859, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.5400276780128479, + "learning_rate": 0.0015, + "loss": 1.5767, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.5635712146759033, + "learning_rate": 0.0015, + "loss": 1.5761, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.4909774661064148, + "learning_rate": 0.0015, + "loss": 1.5733, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.5360913276672363, + "learning_rate": 0.0015, + "loss": 1.5701, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.6556453108787537, + "learning_rate": 0.0015, + "loss": 1.5838, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.505547285079956, + "learning_rate": 0.0015, + "loss": 1.5727, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.5425119996070862, + "learning_rate": 0.0015, + "loss": 1.5729, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.5822072625160217, + "learning_rate": 0.0015, + "loss": 1.5768, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.4843142032623291, + "learning_rate": 0.0015, + "loss": 1.5839, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.5754218697547913, + "learning_rate": 0.0015, + "loss": 1.576, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.642614483833313, + "learning_rate": 0.0014834368975312174, + "loss": 1.5582, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.6197355389595032, + "learning_rate": 0.0014629899726345957, + "loss": 1.5788, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.5959349870681763, + "learning_rate": 0.0014428248775471316, + "loss": 1.5815, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.5649117231369019, + "learning_rate": 0.00142293772767289, + "loss": 1.5708, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.4976513981819153, + "learning_rate": 0.001403324691959192, + "loss": 1.57, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.47332245111465454, + "learning_rate": 0.0013839819921586025, + "loss": 1.5722, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.4696807265281677, + "learning_rate": 0.0013649059021010894, + "loss": 1.5632, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.581645131111145, + "learning_rate": 0.0013460927469762154, + "loss": 1.5641, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.5234055519104004, + "learning_rate": 0.0013275389026252255, + "loss": 1.5691, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.4703330099582672, + "learning_rate": 0.0013092407948428887, + "loss": 1.5607, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.48712536692619324, + "learning_rate": 0.001291194898688966, + "loss": 1.5682, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.46039366722106934, + "learning_rate": 0.001273397737809166, + "loss": 1.5672, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5150198340415955, + "learning_rate": 0.001255845883765463, + "loss": 1.5613, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.5750431418418884, + "learning_rate": 0.001238535955375642, + "loss": 1.5572, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.5500268936157227, + "learning_rate": 0.0012214646180619506, + "loss": 1.5519, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5177586078643799, + "learning_rate": 0.001204628583208727, + "loss": 1.5512, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.5168333649635315, + "learning_rate": 0.0011880246075288827, + "loss": 1.5521, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.48658275604248047, + "learning_rate": 0.001171649492439115, + "loss": 1.5509, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.6578323245048523, + "learning_rate": 0.0011555000834437364, + "loss": 1.5551, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.6936450004577637, + "learning_rate": 0.0011395732695269908, + "loss": 1.5515, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.6437989473342896, + "learning_rate": 0.0011238659825537505, + "loss": 1.5342, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5173434615135193, + "learning_rate": 0.0011083751966784717, + "loss": 1.5373, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.635177493095398, + "learning_rate": 0.0010930979277622953, + "loss": 1.5552, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.5062108635902405, + "learning_rate": 0.0010780312327981854, + "loss": 1.548, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.5761057734489441, + "learning_rate": 0.0010631722093439888, + "loss": 1.5469, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.587715208530426, + "learning_rate": 0.00104851799496331, + "loss": 1.5359, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.5120996236801147, + "learning_rate": 0.0010340657666740914, + "loss": 1.5437, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.4589667320251465, + "learning_rate": 0.0010198127404047975, + "loss": 1.5223, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.5560087561607361, + "learning_rate": 0.0010057561704580897, + "loss": 1.5304, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.5423837304115295, + "learning_rate": 0.0009918933489818985, + "loss": 1.5444, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.48756611347198486, + "learning_rate": 0.0009782216054477827, + "loss": 1.5369, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.544849693775177, + "learning_rate": 0.0009647383061364801, + "loss": 1.5431, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5315898656845093, + "learning_rate": 0.0009514408536305495, + "loss": 1.5387, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.5022508502006531, + "learning_rate": 0.0009383266863140042, + "loss": 1.5507, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.5092499256134033, + "learning_rate": 0.000925393277878844, + "loss": 1.5498, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.5613736510276794, + "learning_rate": 0.0009126381368383879, + "loss": 1.5332, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.6328889727592468, + "learning_rate": 0.0009000588060473156, + "loss": 1.5241, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.5084934830665588, + "learning_rate": 0.0008876528622283235, + "loss": 1.5339, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.4776683747768402, + "learning_rate": 0.0008754179155053053, + "loss": 1.527, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.5033855438232422, + "learning_rate": 0.0008633516089429683, + "loss": 1.5275, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.5248212814331055, + "learning_rate": 0.0008514516180927928, + "loss": 1.5245, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.46681085228919983, + "learning_rate": 0.0008397156505452524, + "loss": 1.5208, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.5219325423240662, + "learning_rate": 0.0008281414454882051, + "loss": 1.528, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.4909190535545349, + "learning_rate": 0.0008167267732713704, + "loss": 1.5311, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.5211382508277893, + "learning_rate": 0.0008054694349768117, + "loss": 1.5183, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.5288820862770081, + "learning_rate": 0.0007943672619953359, + "loss": 1.5237, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.4638526141643524, + "learning_rate": 0.0007834181156087356, + "loss": 1.5163, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.6308881640434265, + "learning_rate": 0.0007726198865777852, + "loss": 1.5215, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.47490477561950684, + "learning_rate": 0.0007619704947359191, + "loss": 1.5141, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.47853022813796997, + "learning_rate": 0.0007514678885885087, + "loss": 1.5142, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.4762361943721771, + "learning_rate": 0.0007411100449176633, + "loss": 1.515, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.47374746203422546, + "learning_rate": 0.0007308949683924791, + "loss": 1.5196, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.5044169425964355, + "learning_rate": 0.000720820691184658, + "loss": 1.5116, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.5222132802009583, + "learning_rate": 0.0007108852725894269, + "loss": 1.5106, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.4714582562446594, + "learning_rate": 0.000701086798651681, + "loss": 1.5119, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.5031645894050598, + "learning_rate": 0.0006914233817972798, + "loss": 1.5044, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.4461458921432495, + "learning_rate": 0.0006818931604694261, + "loss": 1.5107, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.4249267280101776, + "learning_rate": 0.0006724942987700563, + "loss": 1.5149, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.469090074300766, + "learning_rate": 0.0006632249861061732, + "loss": 1.5142, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.5288012623786926, + "learning_rate": 0.0006540834368410549, + "loss": 1.5114, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.45834779739379883, + "learning_rate": 0.0006450678899502701, + "loss": 1.5127, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5441902875900269, + "learning_rate": 0.0006361766086824345, + "loss": 1.5078, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.45994263887405396, + "learning_rate": 0.000627407880224645, + "loss": 1.5116, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.5407215356826782, + "learning_rate": 0.0006187600153725225, + "loss": 1.497, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.5714619159698486, + "learning_rate": 0.0006102313482048055, + "loss": 1.5035, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.48840925097465515, + "learning_rate": 0.0006018202357624274, + "loss": 1.5054, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.4573688209056854, + "learning_rate": 0.0005935250577320168, + "loss": 1.5002, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.5283113121986389, + "learning_rate": 0.0005853442161337618, + "loss": 1.4929, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.45427173376083374, + "learning_rate": 0.0005772761350135759, + "loss": 1.5023, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.5191304087638855, + "learning_rate": 0.0005693192601395058, + "loss": 1.4972, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.5384828448295593, + "learning_rate": 0.000561472058702326, + "loss": 1.4943, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5704219341278076, + "learning_rate": 0.000553733019020258, + "loss": 1.5016, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.49554136395454407, + "learning_rate": 0.0005461006502477612, + "loss": 1.4893, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5482887029647827, + "learning_rate": 0.0005385734820883369, + "loss": 1.4937, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.48430365324020386, + "learning_rate": 0.0005311500645112907, + "loss": 1.5174, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.49063172936439514, + "learning_rate": 0.0005238289674723993, + "loss": 1.4956, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.4798126220703125, + "learning_rate": 0.0005166087806384274, + "loss": 1.4996, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.543217122554779, + "learning_rate": 0.0005094881131154418, + "loss": 1.5033, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.5383982062339783, + "learning_rate": 0.0005024655931808696, + "loss": 1.4991, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.4460148513317108, + "learning_rate": 0.0004955398680192508, + "loss": 1.4906, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.47512975335121155, + "learning_rate": 0.000488709603461632, + "loss": 1.4846, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.4532780647277832, + "learning_rate": 0.000481973483728553, + "loss": 1.4874, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.4771507680416107, + "learning_rate": 0.0004753302111765748, + "loss": 1.4869, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.4858551621437073, + "learning_rate": 0.0004687785060483032, + "loss": 1.4971, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.520816445350647, + "learning_rate": 0.0004623171062258558, + "loss": 1.471, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.4765135943889618, + "learning_rate": 0.0004559447669877288, + "loss": 1.4886, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.4891054928302765, + "learning_rate": 0.00044966026076901413, + "loss": 1.4902, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.4770817458629608, + "learning_rate": 0.00044346237692492177, + "loss": 1.4946, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.49663975834846497, + "learning_rate": 0.0004373499214975615, + "loss": 1.4828, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.539469301700592, + "learning_rate": 0.0004313217169859396, + "loss": 1.489, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.41680505871772766, + "learning_rate": 0.0004253766021191256, + "loss": 1.4949, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.5445020198822021, + "learning_rate": 0.00041951343163254497, + "loss": 1.4884, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.4971625506877899, + "learning_rate": 0.00041373107604735626, + "loss": 1.4892, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.5048965811729431, + "learning_rate": 0.0004080284214528687, + "loss": 1.4846, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.4950484037399292, + "learning_rate": 0.0004024043692919589, + "loss": 1.4946, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.4743654727935791, + "learning_rate": 0.0003968578361494449, + "loss": 1.4904, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.4477802813053131, + "learning_rate": 0.000391387753543378, + "loss": 1.4968, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.4663471579551697, + "learning_rate": 0.00038599306771921023, + "loss": 1.4786, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.5972737073898315, + "learning_rate": 0.0003806727394468004, + "loss": 1.4757, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.4672381281852722, + "learning_rate": 0.0003754257438202162, + "loss": 1.4856, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.477232426404953, + "learning_rate": 0.0003702510700602974, + "loss": 1.4912, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.5499731302261353, + "learning_rate": 0.0003651477213199393, + "loss": 1.4708, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.48404136300086975, + "learning_rate": 0.000360114714492061, + "loss": 1.4711, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.4702514111995697, + "learning_rate": 0.0003551510800202195, + "loss": 1.4793, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.48800235986709595, + "learning_rate": 0.0003502558617118353, + "loss": 1.4818, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.4701348543167114, + "learning_rate": 0.0003454281165539914, + "loss": 1.4928, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.4648496210575104, + "learning_rate": 0.00034066691453177176, + "loss": 1.4897, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.48197734355926514, + "learning_rate": 0.0003359713384491037, + "loss": 1.4881, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.5139079093933105, + "learning_rate": 0.00033134048375206944, + "loss": 1.4803, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.4451829195022583, + "learning_rate": 0.0003267734583546536, + "loss": 1.4797, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.49279919266700745, + "learning_rate": 0.00032226938246689157, + "loss": 1.477, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.5037854313850403, + "learning_rate": 0.0003178273884253874, + "loss": 1.4779, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.46679431200027466, + "learning_rate": 0.0003134466205261674, + "loss": 1.4883, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.48517581820487976, + "learning_rate": 0.0003091262348598378, + "loss": 1.4937, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.45381537079811096, + "learning_rate": 0.0003048653991490141, + "loss": 1.4723, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.4712529480457306, + "learning_rate": 0.00030066329258799187, + "loss": 1.4706, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.4980413615703583, + "learning_rate": 0.0002965191056846266, + "loss": 1.4762, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.538616955280304, + "learning_rate": 0.000292432040104394, + "loss": 1.4707, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.455822616815567, + "learning_rate": 0.00028840130851659853, + "loss": 1.4671, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.4577052891254425, + "learning_rate": 0.0002844261344427028, + "loss": 1.4756, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.48684069514274597, + "learning_rate": 0.0002805057521067471, + "loss": 1.475, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.4906095564365387, + "learning_rate": 0.00027663940628783017, + "loss": 1.4653, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.48775243759155273, + "learning_rate": 0.00027282635217462393, + "loss": 1.4735, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.5769883394241333, + "learning_rate": 0.0002690658552218937, + "loss": 1.4826, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.4740167260169983, + "learning_rate": 0.00026535719100899516, + "loss": 1.4629, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.4824984073638916, + "learning_rate": 0.00026169964510032245, + "loss": 1.4674, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.44966965913772583, + "learning_rate": 0.00025809251290767984, + "loss": 1.4549, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.4481082856655121, + "learning_rate": 0.00025453509955454957, + "loss": 1.4581, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.5118510723114014, + "learning_rate": 0.00025102671974223175, + "loss": 1.4614, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.44849956035614014, + "learning_rate": 0.00024756669761782815, + "loss": 1.473, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.48909157514572144, + "learning_rate": 0.0002441543666440464, + "loss": 1.4601, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.5529025197029114, + "learning_rate": 0.00024078906947079878, + "loss": 1.4675, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.4708446264266968, + "learning_rate": 0.00023747015780857005, + "loss": 1.4771, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.46474510431289673, + "learning_rate": 0.00023419699230353144, + "loss": 1.4721, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.48120978474617004, + "learning_rate": 0.00023096894241437586, + "loss": 1.4785, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.5862098336219788, + "learning_rate": 0.00022778538629085056, + "loss": 1.4602, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.4589645266532898, + "learning_rate": 0.00022464571065396427, + "loss": 1.4634, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.44728800654411316, + "learning_rate": 0.00022154931067784521, + "loss": 1.4588, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.4502966105937958, + "learning_rate": 0.00021849558987322782, + "loss": 1.455, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.46762070059776306, + "learning_rate": 0.0002154839599725452, + "loss": 1.459, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.4748530089855194, + "learning_rate": 0.00021251384081660544, + "loss": 1.472, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.46290433406829834, + "learning_rate": 0.0002095846602428303, + "loss": 1.4662, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.4307163953781128, + "learning_rate": 0.00020669585397503358, + "loss": 1.4565, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.4688287377357483, + "learning_rate": 0.0002038468655147195, + "loss": 1.4612, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.47913143038749695, + "learning_rate": 0.00020103714603387894, + "loss": 1.4759, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.4098011255264282, + "learning_rate": 0.00019826615426926338, + "loss": 1.4452, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.4639303982257843, + "learning_rate": 0.00019553335641811625, + "loss": 1.4686, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.494645893573761, + "learning_rate": 0.0001928382260353415, + "loss": 1.4605, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.42934685945510864, + "learning_rate": 0.00019018024393208902, + "loss": 1.4754, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.4477783143520355, + "learning_rate": 0.00018755889807573872, + "loss": 1.4558, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.434712290763855, + "learning_rate": 0.00018497368349126262, + "loss": 1.4593, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.46321457624435425, + "learning_rate": 0.00018242410216394648, + "loss": 1.4754, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.4509671628475189, + "learning_rate": 0.0001799096629434529, + "loss": 1.4495, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.45699021220207214, + "learning_rate": 0.00017742988144920578, + "loss": 1.4586, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.4849603474140167, + "learning_rate": 0.00017498427997707976, + "loss": 1.4557, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.5178509950637817, + "learning_rate": 0.00017257238740737548, + "loss": 1.4615, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.47007158398628235, + "learning_rate": 0.00017019373911406307, + "loss": 1.4663, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.4440992772579193, + "learning_rate": 0.000167847876875277, + "loss": 1.4647, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.47048673033714294, + "learning_rate": 0.00016553434878504428, + "loss": 1.4507, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.5075029730796814, + "learning_rate": 0.00016325270916622947, + "loss": 1.4522, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.45689183473587036, + "learning_rate": 0.00016100251848467966, + "loss": 1.4612, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.5222384333610535, + "learning_rate": 0.0001587833432645528, + "loss": 1.4524, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.5083669424057007, + "learning_rate": 0.00015659475600481292, + "loss": 1.4728, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.4359082281589508, + "learning_rate": 0.00015443633509687688, + "loss": 1.4577, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.522087037563324, + "learning_rate": 0.00015230766474339536, + "loss": 1.4572, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.46215352416038513, + "learning_rate": 0.00015020833487815416, + "loss": 1.4636, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.47835126519203186, + "learning_rate": 0.0001481379410870792, + "loss": 1.4544, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.4356635510921478, + "learning_rate": 0.00014609608453033013, + "loss": 1.4419, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.4490792453289032, + "learning_rate": 0.00014408237186546807, + "loss": 1.45, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.48488369584083557, + "learning_rate": 0.00014209641517168273, + "loss": 1.4388, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.4594859182834625, + "learning_rate": 0.00014013783187506265, + "loss": 1.4507, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.46240001916885376, + "learning_rate": 0.00013820624467489697, + "loss": 1.4726, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.4561460614204407, + "learning_rate": 0.00013630128147099213, + "loss": 1.4677, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.4466109573841095, + "learning_rate": 0.00013442257529199068, + "loss": 1.4496, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.49733322858810425, + "learning_rate": 0.00013256976422467803, + "loss": 1.4578, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.4381830096244812, + "learning_rate": 0.00013074249134426366, + "loss": 1.4563, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.45894813537597656, + "learning_rate": 0.0001289404046456233, + "loss": 1.4652, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.44469326734542847, + "learning_rate": 0.0001271631569754887, + "loss": 1.4548, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.44882410764694214, + "learning_rate": 0.0001254104059655723, + "loss": 1.4604, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.4298862814903259, + "learning_rate": 0.00012368181396661337, + "loss": 1.4472, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.4746420383453369, + "learning_rate": 0.00012197704798333364, + "loss": 1.4455, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.45031026005744934, + "learning_rate": 0.00012029577961028894, + "loss": 1.4448, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.43901896476745605, + "learning_rate": 0.00011863768496860542, + "loss": 1.4596, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.4755641520023346, + "learning_rate": 0.00011700244464358777, + "loss": 1.4504, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.4270656406879425, + "learning_rate": 0.00011538974362318715, + "loss": 1.4585, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.44142812490463257, + "learning_rate": 0.00011379927123731737, + "loss": 1.4499, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.45672503113746643, + "learning_rate": 0.0001122307210980077, + "loss": 1.4499, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.43242624402046204, + "learning_rate": 0.00011068379104038026, + "loss": 1.4634, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.4708192050457001, + "learning_rate": 0.00010915818306444116, + "loss": 1.4441, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.4587632417678833, + "learning_rate": 0.00010765360327767384, + "loss": 1.451, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.44974225759506226, + "learning_rate": 0.00010616976183842376, + "loss": 1.4574, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.4284994304180145, + "learning_rate": 0.00010470637290006365, + "loss": 1.4589, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.44848886132240295, + "learning_rate": 0.00010326315455592764, + "loss": 1.4458, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.4525514245033264, + "learning_rate": 0.0001018398287850053, + "loss": 1.4406, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.492265522480011, + "learning_rate": 0.00010043612139838357, + "loss": 1.4621, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.483041375875473, + "learning_rate": 9.905176198642719e-05, + "loss": 1.449, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.46375101804733276, + "learning_rate": 9.76864838666871e-05, + "loss": 1.4508, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.4372757375240326, + "learning_rate": 9.634002403252676e-05, + "loss": 1.4467, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.44915491342544556, + "learning_rate": 9.501212310245681e-05, + "loss": 1.4422, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.4438994228839874, + "learning_rate": 9.370252527016777e-05, + "loss": 1.4549, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.48496487736701965, + "learning_rate": 9.241097825525163e-05, + "loss": 1.447, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.4393230080604553, + "learning_rate": 9.113723325460276e-05, + "loss": 1.4558, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.4320276975631714, + "learning_rate": 8.988104489448849e-05, + "loss": 1.4471, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.4461377263069153, + "learning_rate": 8.864217118328042e-05, + "loss": 1.4582, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.4428459107875824, + "learning_rate": 8.742037346483729e-05, + "loss": 1.4523, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.5014809966087341, + "learning_rate": 8.62154163725303e-05, + "loss": 1.4572, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.49593299627304077, + "learning_rate": 8.502706778390219e-05, + "loss": 1.4537, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.4178498387336731, + "learning_rate": 8.38550987759513e-05, + "loss": 1.4505, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.46183863282203674, + "learning_rate": 8.269928358103191e-05, + "loss": 1.4634, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.4456265866756439, + "learning_rate": 8.155939954336243e-05, + "loss": 1.4561, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.4421091675758362, + "learning_rate": 8.043522707613312e-05, + "loss": 1.4501, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.42582985758781433, + "learning_rate": 7.932654961920486e-05, + "loss": 1.4356, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.4297718107700348, + "learning_rate": 7.823315359739135e-05, + "loss": 1.438, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.41911646723747253, + "learning_rate": 7.715482837931577e-05, + "loss": 1.4629, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.42138588428497314, + "learning_rate": 7.6091366236835e-05, + "loss": 1.4435, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4509657025337219, + "learning_rate": 7.504256230502289e-05, + "loss": 1.4568, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.41877952218055725, + "learning_rate": 7.400821454270524e-05, + "loss": 1.4529, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.45299476385116577, + "learning_rate": 7.29881236935386e-05, + "loss": 1.4406, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.45176205039024353, + "learning_rate": 7.198209324762562e-05, + "loss": 1.4419, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.477600634098053, + "learning_rate": 7.098992940365946e-05, + "loss": 1.4391, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.44321975111961365, + "learning_rate": 7.001144103159e-05, + "loss": 1.4487, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.43016722798347473, + "learning_rate": 6.904643963580461e-05, + "loss": 1.4559, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.44235581159591675, + "learning_rate": 6.809473931881644e-05, + "loss": 1.448, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.4233081042766571, + "learning_rate": 6.71561567454532e-05, + "loss": 1.4462, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.4557687044143677, + "learning_rate": 6.623051110753948e-05, + "loss": 1.4648, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.4181516468524933, + "learning_rate": 6.531762408906607e-05, + "loss": 1.4506, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.461910218000412, + "learning_rate": 6.441731983183912e-05, + "loss": 1.4487, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.4493960440158844, + "learning_rate": 6.352942490160292e-05, + "loss": 1.4422, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.44184303283691406, + "learning_rate": 6.265376825462966e-05, + "loss": 1.4529, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.4727321267127991, + "learning_rate": 6.179018120476945e-05, + "loss": 1.449, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.4504562020301819, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.4471, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.4324711859226227, + "learning_rate": 6.009855274515339e-05, + "loss": 1.4396, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.45179447531700134, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.4496, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.43770304322242737, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.4352, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.44600874185562134, + "learning_rate": 5.764754687033678e-05, + "loss": 1.4481, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.4510987401008606, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.4453, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.43401893973350525, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.4578, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.45904532074928284, + "learning_rate": 5.529650063720842e-05, + "loss": 1.457, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.4595103859901428, + "learning_rate": 5.453432234876445e-05, + "loss": 1.4455, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.4322601556777954, + "learning_rate": 5.37826495305886e-05, + "loss": 1.4385, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.4951367676258087, + "learning_rate": 5.304133738072674e-05, + "loss": 1.4552, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.4912833571434021, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.4487, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.4649134874343872, + "learning_rate": 5.158922582999368e-05, + "loss": 1.4513, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.4606928825378418, + "learning_rate": 5.087814669492819e-05, + "loss": 1.443, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.42214617133140564, + "learning_rate": 5.017686870590028e-05, + "loss": 1.4469, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.4145753085613251, + "learning_rate": 4.948525676899577e-05, + "loss": 1.4423, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.42406660318374634, + "learning_rate": 4.880317765236493e-05, + "loss": 1.4515, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.47677159309387207, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.4332, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.43156909942626953, + "learning_rate": 4.746709410920699e-05, + "loss": 1.4394, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5339933633804321, + "learning_rate": 4.681283230007507e-05, + "loss": 1.44, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.42672276496887207, + "learning_rate": 4.616758849642509e-05, + "loss": 1.4444, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.4053221046924591, + "learning_rate": 4.553123839874615e-05, + "loss": 1.4543, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.45296961069107056, + "learning_rate": 4.490365942080736e-05, + "loss": 1.4488, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.42269933223724365, + "learning_rate": 4.428473066604285e-05, + "loss": 1.4468, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.43589362502098083, + "learning_rate": 4.367433290426233e-05, + "loss": 1.4462, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.4131251275539398, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.4385, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.49350735545158386, + "learning_rate": 4.247866163327575e-05, + "loss": 1.4504, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.433644562959671, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.4403, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.43731844425201416, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.4398, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.4305371642112732, + "learning_rate": 4.074624971216005e-05, + "loss": 1.4271, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4132416248321533, + "learning_rate": 4.018462453681889e-05, + "loss": 1.4396, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.42768317461013794, + "learning_rate": 3.963074051164014e-05, + "loss": 1.4448, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.41653135418891907, + "learning_rate": 3.908449093662446e-05, + "loss": 1.4396, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.42105209827423096, + "learning_rate": 3.854577058246998e-05, + "loss": 1.4377, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.46469560265541077, + "learning_rate": 3.801447567030094e-05, + "loss": 1.456, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.4113624691963196, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.4432, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.44958192110061646, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.4565, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.4294537603855133, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.4449, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.48246440291404724, + "learning_rate": 3.596152451701616e-05, + "loss": 1.4428, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.40501779317855835, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.4426, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.43628913164138794, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.445, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.43571701645851135, + "learning_rate": 3.449490171442838e-05, + "loss": 1.4495, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.41575223207473755, + "learning_rate": 3.401944187798702e-05, + "loss": 1.4478, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.43247947096824646, + "learning_rate": 3.355053553336137e-05, + "loss": 1.4389, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.490737646818161, + "learning_rate": 3.308809235061882e-05, + "loss": 1.4363, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.4198133945465088, + "learning_rate": 3.263202324488772e-05, + "loss": 1.4421, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.4254949390888214, + "learning_rate": 3.218224035919609e-05, + "loss": 1.4426, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.43694689869880676, + "learning_rate": 3.173865704754688e-05, + "loss": 1.4403, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.4228687286376953, + "learning_rate": 3.130118785822657e-05, + "loss": 1.4452, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.4251164197921753, + "learning_rate": 3.08697485173437e-05, + "loss": 1.4473, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.43062159419059753, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.4449, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.41896024346351624, + "learning_rate": 3.002462807725185e-05, + "loss": 1.4401, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.43290072679519653, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.4484, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.4218009114265442, + "learning_rate": 2.920264448124087e-05, + "loss": 1.4414, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.43858784437179565, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.4418, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.4248104691505432, + "learning_rate": 2.84031643124288e-05, + "loss": 1.4393, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.4133535325527191, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.4596, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.4027361273765564, + "learning_rate": 2.762557149497405e-05, + "loss": 1.4335, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.41632702946662903, + "learning_rate": 2.724479494389592e-05, + "loss": 1.4461, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.4205421805381775, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.4458, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.4251040816307068, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.448, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.41622573137283325, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.4445, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.44685351848602295, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.45, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.4030863344669342, + "learning_rate": 2.541820662891541e-05, + "loss": 1.4278, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.430654376745224, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.4406, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.4610930383205414, + "learning_rate": 2.472233293365335e-05, + "loss": 1.4475, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.46867457032203674, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.4359, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.4635215401649475, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.4539, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.4317043125629425, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.4417, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.45931223034858704, + "learning_rate": 2.338721674401494e-05, + "loss": 1.4486, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.45791056752204895, + "learning_rate": 2.30648594768459e-05, + "loss": 1.4421, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.4479103088378906, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.4415, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.43347054719924927, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.4386, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.42060601711273193, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.4409, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.4431242346763611, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.4409, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.42724913358688354, + "learning_rate": 2.151850895764285e-05, + "loss": 1.4418, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.42308560013771057, + "learning_rate": 2.12219089895037e-05, + "loss": 1.4291, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.40325576066970825, + "learning_rate": 2.092939720151092e-05, + "loss": 1.4272, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.4251537024974823, + "learning_rate": 2.064091724430947e-05, + "loss": 1.4366, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.4398569166660309, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.4333, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.4224061071872711, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.4477, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.41980135440826416, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.4318, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.40270403027534485, + "learning_rate": 1.952621569669175e-05, + "loss": 1.4412, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.4245798587799072, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.4401, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.42163968086242676, + "learning_rate": 1.899164691024229e-05, + "loss": 1.4407, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.4195674657821655, + "learning_rate": 1.872987589815331e-05, + "loss": 1.4362, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.41309353709220886, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.4499, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.4175505042076111, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.4449, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.4427430033683777, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.4566, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.42718228697776794, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.4484, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.45362961292266846, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.4461, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.4560001790523529, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.4338, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.4449373483657837, + "learning_rate": 1.699576850233916e-05, + "loss": 1.4362, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.40183788537979126, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.4489, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.42784374952316284, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.423, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.43885427713394165, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.4444, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.42327016592025757, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.445, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.4224078059196472, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.4397, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.4369066059589386, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.4504, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.44434547424316406, + "learning_rate": 1.542221360973268e-05, + "loss": 1.4328, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.42214450240135193, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.445, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.2342544794082642, + "learning_rate": 1.5e-05, + "loss": 1.435, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.8391618477891584e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-gpt_neox/checkpoint-9480/training_args.bin b/saves-gpt_neox/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2280ad99e83613e987750d3ca0a957f1287f60b4 --- /dev/null +++ b/saves-gpt_neox/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66738aabcbc4d47016cec83a734947b6c7f8b6bfbaf62c8643f42115c5a7bac0 +size 5112 diff --git a/saves-gpt_neox/config.json b/saves-gpt_neox/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c216fcd1855cc1122a6b9a742b143fdcac99903d --- /dev/null +++ b/saves-gpt_neox/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "GPTNeoXForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "bos_token_id": 0, + "classifier_dropout": 0.1, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 2048, + "model_type": "gpt_neox", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "rope_scaling": null, + "rotary_emb_base": 10000, + "rotary_pct": 0.25, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_parallel_residual": true, + "vocab_size": 2000 +} diff --git a/saves-gpt_neox/generation_config.json b/saves-gpt_neox/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3d6e313c9ea91dde2131852f3f2423673d6a38e --- /dev/null +++ b/saves-gpt_neox/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-gpt_neox/model.safetensors b/saves-gpt_neox/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2764f50b5fa4ba42d2ff86ab491d665540a43713 --- /dev/null +++ b/saves-gpt_neox/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385a3e62735fb0d90ffd20252916108bda92f6d5e0b237a4bde370a7bd362a35 +size 8371104 diff --git a/saves-gpt_neox/result.log b/saves-gpt_neox/result.log new file mode 100644 index 0000000000000000000000000000000000000000..94053834c44f8f02244a127ee14ada57ce2c2fbc --- /dev/null +++ b/saves-gpt_neox/result.log @@ -0,0 +1 @@ +{'train_runtime': 1866.7603, 'train_samples_per_second': 5199.706, 'train_steps_per_second': 5.078, 'train_loss': 1.720536396674466, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-gpt_neox/special_tokens_map.json b/saves-gpt_neox/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gpt_neox/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gpt_neox/tokenizer.json b/saves-gpt_neox/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gpt_neox/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gpt_neox/tokenizer_config.json b/saves-gpt_neox/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gpt_neox/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gptj-cosine/checkpoint-9480/config.json b/saves-gptj-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f91579c3f4d0a727ebdfcdd5a519a318b31cd88d --- /dev/null +++ b/saves-gptj-cosine/checkpoint-9480/config.json @@ -0,0 +1,28 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPTJForCausalLM" + ], + "attn_pdrop": 0.0, + "bos_token_id": 50256, + "embd_pdrop": 0.0, + "eos_token_id": 50256, + "hidden_act": "gelu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "gptj", + "n_embd": 256, + "n_head": 4, + "n_inner": null, + "n_layer": 2, + "n_positions": 2048, + "num_key_value_heads": 4, + "resid_pdrop": 0.0, + "rotary_dim": 64, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gptj-cosine/checkpoint-9480/generation_config.json b/saves-gptj-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-gptj-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-gptj-cosine/checkpoint-9480/model.safetensors b/saves-gptj-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..080b88ec77db8c51c6c4bc847ced96e5a517b74c --- /dev/null +++ b/saves-gptj-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44f67e0a76e729d1041fbcc1698900a956363b1d4502c4352362c8e8f0612dc5 +size 8366216 diff --git a/saves-gptj-cosine/checkpoint-9480/optimizer.pt b/saves-gptj-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..90987705d912ca9c5ccda406f8040bd88722a40d --- /dev/null +++ b/saves-gptj-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791e67918ff267ab21a7691b10745abea66a83bbccaa33a38852bfec978dd6c5 +size 16748310 diff --git a/saves-gptj-cosine/checkpoint-9480/rng_state.pth b/saves-gptj-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-gptj-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-gptj-cosine/checkpoint-9480/scheduler.pt b/saves-gptj-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03c145297021546d40e130546440641e02059bcb --- /dev/null +++ b/saves-gptj-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fd617624c087e1a286ed7cf3fa38baa4a8815e49f107c3186b4c7c58e1adbb +size 1064 diff --git a/saves-gptj-cosine/checkpoint-9480/special_tokens_map.json b/saves-gptj-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gptj-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gptj-cosine/checkpoint-9480/tokenizer.json b/saves-gptj-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gptj-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gptj-cosine/checkpoint-9480/tokenizer_config.json b/saves-gptj-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gptj-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gptj-cosine/checkpoint-9480/trainer_state.json b/saves-gptj-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6b99f22df49226c12597bb2436c74b9e2bdb1c54 --- /dev/null +++ b/saves-gptj-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2278097867965698, + "learning_rate": 0.00015789473684210527, + "loss": 7.4535, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.0722776651382446, + "learning_rate": 0.00031578947368421053, + "loss": 6.7413, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8684439063072205, + "learning_rate": 0.00047368421052631577, + "loss": 6.1051, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.7196707129478455, + "learning_rate": 0.0006315789473684211, + "loss": 5.5125, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.37807539105415344, + "learning_rate": 0.0007894736842105263, + "loss": 5.0033, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.5269351005554199, + "learning_rate": 0.0009473684210526315, + "loss": 4.574, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.47045862674713135, + "learning_rate": 0.0011052631578947368, + "loss": 4.2772, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.5410513877868652, + "learning_rate": 0.0012631578947368421, + "loss": 4.0898, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.4382465183734894, + "learning_rate": 0.0014210526315789472, + "loss": 3.9197, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.6709838509559631, + "learning_rate": 0.0014999989494847376, + "loss": 3.8136, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.5138986110687256, + "learning_rate": 0.0014999905453802946, + "loss": 3.6835, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.6986360549926758, + "learning_rate": 0.0014999737372655805, + "loss": 3.6047, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.41930243372917175, + "learning_rate": 0.0014999485253289388, + "loss": 3.5255, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.5452022552490234, + "learning_rate": 0.0014999149098528814, + "loss": 3.443, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.676002025604248, + "learning_rate": 0.0014998728912140862, + "loss": 3.3813, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.5220478177070618, + "learning_rate": 0.0014998224698833922, + "loss": 3.3282, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.5928002595901489, + "learning_rate": 0.0014997636464257956, + "loss": 3.2681, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.5212839841842651, + "learning_rate": 0.0014996964215004416, + "loss": 3.2303, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.49748554825782776, + "learning_rate": 0.0014996207958606182, + "loss": 3.1735, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.5923460125923157, + "learning_rate": 0.001499536770353748, + "loss": 3.1206, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.6057188510894775, + "learning_rate": 0.0014994443459213774, + "loss": 3.087, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.4954093098640442, + "learning_rate": 0.001499343523599168, + "loss": 3.0468, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.49925902485847473, + "learning_rate": 0.0014992343045168823, + "loss": 2.9996, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.571590781211853, + "learning_rate": 0.0014991166898983739, + "loss": 2.9467, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.5933189392089844, + "learning_rate": 0.001498990681061572, + "loss": 2.9149, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.5211824774742126, + "learning_rate": 0.001498856279418467, + "loss": 2.8725, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.8120631575584412, + "learning_rate": 0.0014987134864750948, + "loss": 2.8408, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.6150375604629517, + "learning_rate": 0.0014985623038315206, + "loss": 2.7998, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.7435336112976074, + "learning_rate": 0.0014984027331818193, + "loss": 2.7674, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.7141335010528564, + "learning_rate": 0.0014982347763140584, + "loss": 2.7399, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.7329383492469788, + "learning_rate": 0.0014980584351102762, + "loss": 2.7036, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.4977788031101227, + "learning_rate": 0.001497873711546462, + "loss": 2.6648, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.7068954110145569, + "learning_rate": 0.0014976806076925334, + "loss": 2.6565, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.5333254337310791, + "learning_rate": 0.0014974791257123137, + "loss": 2.6198, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.9223081469535828, + "learning_rate": 0.001497269267863507, + "loss": 2.5715, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.5915946960449219, + "learning_rate": 0.0014970510364976724, + "loss": 2.5569, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.8074727654457092, + "learning_rate": 0.0014968244340601996, + "loss": 2.5397, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.5810747742652893, + "learning_rate": 0.001496589463090279, + "loss": 2.5242, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.6720121502876282, + "learning_rate": 0.001496346126220875, + "loss": 2.4959, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.5454748272895813, + "learning_rate": 0.0014960944261786966, + "loss": 2.4626, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.5418341159820557, + "learning_rate": 0.0014958343657841655, + "loss": 2.4486, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.8023163676261902, + "learning_rate": 0.001495565947951385, + "loss": 2.4271, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.6564382314682007, + "learning_rate": 0.0014952891756881085, + "loss": 2.4011, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.5771198868751526, + "learning_rate": 0.0014950040520957037, + "loss": 2.3758, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.569807767868042, + "learning_rate": 0.0014947105803691204, + "loss": 2.3724, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.611444354057312, + "learning_rate": 0.0014944087637968522, + "loss": 2.347, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.55059814453125, + "learning_rate": 0.0014940986057609012, + "loss": 2.321, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.6207442879676819, + "learning_rate": 0.0014937801097367396, + "loss": 2.3214, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.6357215046882629, + "learning_rate": 0.001493453279293271, + "loss": 2.2954, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.602497398853302, + "learning_rate": 0.0014931181180927902, + "loss": 2.2854, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.6029930710792542, + "learning_rate": 0.001492774629890942, + "loss": 2.2765, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.5690841674804688, + "learning_rate": 0.001492422818536679, + "loss": 2.2626, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.601751446723938, + "learning_rate": 0.00149206268797222, + "loss": 2.2431, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.5995395183563232, + "learning_rate": 0.0014916942422330032, + "loss": 2.2219, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.5338934659957886, + "learning_rate": 0.001491317485447643, + "loss": 2.2245, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.5547764897346497, + "learning_rate": 0.0014909324218378838, + "loss": 2.1841, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.8922212719917297, + "learning_rate": 0.0014905390557185508, + "loss": 2.1942, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.5770824551582336, + "learning_rate": 0.0014901373914975036, + "loss": 2.1941, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.6388729214668274, + "learning_rate": 0.0014897274336755856, + "loss": 2.1721, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.6425994634628296, + "learning_rate": 0.001489309186846575, + "loss": 2.1578, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.649827778339386, + "learning_rate": 0.0014888826556971313, + "loss": 2.1576, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.547429084777832, + "learning_rate": 0.0014884478450067444, + "loss": 2.1423, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.5221420526504517, + "learning_rate": 0.0014880047596476807, + "loss": 2.125, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.8965398073196411, + "learning_rate": 0.0014875534045849274, + "loss": 2.1436, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.5338075160980225, + "learning_rate": 0.0014870937848761388, + "loss": 2.1251, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.6436975598335266, + "learning_rate": 0.001486625905671578, + "loss": 2.1108, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.5351758599281311, + "learning_rate": 0.00148614977221406, + "loss": 2.1009, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.5582025051116943, + "learning_rate": 0.0014856653898388927, + "loss": 2.094, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.6484729051589966, + "learning_rate": 0.001485172763973817, + "loss": 2.1054, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.6020621657371521, + "learning_rate": 0.0014846719001389466, + "loss": 2.0855, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.514555811882019, + "learning_rate": 0.001484162803946705, + "loss": 2.0753, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.7553670406341553, + "learning_rate": 0.0014836454811017635, + "loss": 2.0589, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.5340862274169922, + "learning_rate": 0.0014831199374009778, + "loss": 2.0523, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.6416875123977661, + "learning_rate": 0.0014825861787333208, + "loss": 2.0616, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.853373110294342, + "learning_rate": 0.0014820442110798197, + "loss": 2.047, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 1.5705329179763794, + "learning_rate": 0.0014814940405134865, + "loss": 2.0495, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.96327143907547, + "learning_rate": 0.001480935673199251, + "loss": 2.0459, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.5600569844245911, + "learning_rate": 0.0014803691153938915, + "loss": 2.0329, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.4883485734462738, + "learning_rate": 0.0014797943734459653, + "loss": 2.0376, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.5479632019996643, + "learning_rate": 0.001479211453795736, + "loss": 2.0107, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.544116199016571, + "learning_rate": 0.0014786203629751033, + "loss": 2.0012, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.7197643518447876, + "learning_rate": 0.0014780211076075279, + "loss": 2.0107, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.5321123003959656, + "learning_rate": 0.0014774136944079594, + "loss": 2.0169, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.5018560290336609, + "learning_rate": 0.0014767981301827592, + "loss": 1.9941, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.4979773163795471, + "learning_rate": 0.0014761744218296249, + "loss": 1.9926, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.5132992267608643, + "learning_rate": 0.001475542576337513, + "loss": 1.995, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.6127412915229797, + "learning_rate": 0.001474902600786561, + "loss": 1.993, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.5391224026679993, + "learning_rate": 0.0014742545023480075, + "loss": 1.9846, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.6764683723449707, + "learning_rate": 0.0014735982882841117, + "loss": 1.9769, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.7597113847732544, + "learning_rate": 0.0014729339659480727, + "loss": 1.977, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.7991539239883423, + "learning_rate": 0.0014722615427839468, + "loss": 1.9873, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.6347671747207642, + "learning_rate": 0.0014715810263265633, + "loss": 1.9711, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.5361153483390808, + "learning_rate": 0.0014708924242014423, + "loss": 1.9572, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.5275008082389832, + "learning_rate": 0.0014701957441247064, + "loss": 1.9635, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.54686439037323, + "learning_rate": 0.0014694909939029959, + "loss": 1.9445, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.5958303809165955, + "learning_rate": 0.0014687781814333814, + "loss": 1.9545, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.9188456535339355, + "learning_rate": 0.0014680573147032746, + "loss": 1.9552, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.5903360843658447, + "learning_rate": 0.0014673284017903392, + "loss": 1.9365, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.5280314683914185, + "learning_rate": 0.0014665914508624, + "loss": 1.9328, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.7860329747200012, + "learning_rate": 0.0014658464701773526, + "loss": 1.9452, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.5484187006950378, + "learning_rate": 0.0014650934680830688, + "loss": 1.9358, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 1.4045076370239258, + "learning_rate": 0.0014643324530173051, + "loss": 1.9287, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.5204342603683472, + "learning_rate": 0.0014635634335076067, + "loss": 1.9346, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.61603182554245, + "learning_rate": 0.001462786418171213, + "loss": 1.9208, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.6103143095970154, + "learning_rate": 0.0014620014157149597, + "loss": 1.9272, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.5225959420204163, + "learning_rate": 0.001461208434935183, + "loss": 1.9159, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.5206629037857056, + "learning_rate": 0.0014604074847176197, + "loss": 1.9072, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.5538263320922852, + "learning_rate": 0.0014595985740373082, + "loss": 1.9082, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.8069490790367126, + "learning_rate": 0.0014587817119584873, + "loss": 1.9172, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.497503399848938, + "learning_rate": 0.001457956907634496, + "loss": 1.9067, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.512475311756134, + "learning_rate": 0.0014571241703076692, + "loss": 1.9074, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.5735927820205688, + "learning_rate": 0.0014562835093092348, + "loss": 1.9, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.5935125350952148, + "learning_rate": 0.0014554349340592104, + "loss": 1.887, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.6163064241409302, + "learning_rate": 0.001454578454066296, + "loss": 1.9038, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.5961697101593018, + "learning_rate": 0.0014537140789277678, + "loss": 1.8889, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.5046367645263672, + "learning_rate": 0.0014528418183293716, + "loss": 1.896, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.5353291630744934, + "learning_rate": 0.001451961682045213, + "loss": 1.8856, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.6587094664573669, + "learning_rate": 0.001451073679937649, + "loss": 1.8729, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.5267122387886047, + "learning_rate": 0.0014501778219571766, + "loss": 1.8767, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.8488121032714844, + "learning_rate": 0.0014492741181423225, + "loss": 1.8871, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.736929714679718, + "learning_rate": 0.0014483625786195285, + "loss": 1.8811, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6392133831977844, + "learning_rate": 0.0014474432136030405, + "loss": 1.87, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.6987409591674805, + "learning_rate": 0.0014465160333947923, + "loss": 1.8598, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.624569296836853, + "learning_rate": 0.0014455810483842908, + "loss": 1.8754, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.6998398900032043, + "learning_rate": 0.0014446382690484997, + "loss": 1.8785, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.552516758441925, + "learning_rate": 0.0014436877059517215, + "loss": 1.8597, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.6996772289276123, + "learning_rate": 0.0014427293697454803, + "loss": 1.8574, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.571471631526947, + "learning_rate": 0.001441763271168401, + "loss": 1.8708, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.8697657585144043, + "learning_rate": 0.00144078942104609, + "loss": 1.8581, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.5444104075431824, + "learning_rate": 0.001439807830291013, + "loss": 1.8486, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.5180477499961853, + "learning_rate": 0.0014388185099023744, + "loss": 1.8525, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.4946292042732239, + "learning_rate": 0.0014378214709659916, + "loss": 1.8551, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.8654590845108032, + "learning_rate": 0.0014368167246541733, + "loss": 1.8477, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.5088804364204407, + "learning_rate": 0.0014358042822255918, + "loss": 1.8476, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.680858850479126, + "learning_rate": 0.0014347841550251597, + "loss": 1.8524, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.47212639451026917, + "learning_rate": 0.0014337563544838997, + "loss": 1.8417, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.6372097730636597, + "learning_rate": 0.001432720892118819, + "loss": 1.8415, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.9408152103424072, + "learning_rate": 0.0014316777795327794, + "loss": 1.8309, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.4758073091506958, + "learning_rate": 0.001430627028414366, + "loss": 1.8442, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.7502924203872681, + "learning_rate": 0.0014295686505377586, + "loss": 1.8283, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6692312359809875, + "learning_rate": 0.0014285026577625982, + "loss": 1.8308, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.6520975828170776, + "learning_rate": 0.0014274290620338542, + "loss": 1.8405, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.9330534934997559, + "learning_rate": 0.0014263478753816906, + "loss": 1.8284, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.5251515507698059, + "learning_rate": 0.0014252591099213326, + "loss": 1.8291, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.6605876684188843, + "learning_rate": 0.001424162777852928, + "loss": 1.8278, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.5477521419525146, + "learning_rate": 0.0014230588914614134, + "loss": 1.8209, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 1.0202778577804565, + "learning_rate": 0.0014219474631163745, + "loss": 1.8211, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.9424344897270203, + "learning_rate": 0.001420828505271909, + "loss": 1.8274, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.5526300668716431, + "learning_rate": 0.0014197020304664856, + "loss": 1.8242, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.5609686970710754, + "learning_rate": 0.0014185680513228048, + "loss": 1.8226, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.5468385219573975, + "learning_rate": 0.0014174265805476564, + "loss": 1.821, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.5879926681518555, + "learning_rate": 0.0014162776309317778, + "loss": 1.8204, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.5888334512710571, + "learning_rate": 0.0014151212153497108, + "loss": 1.8048, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.5080498456954956, + "learning_rate": 0.0014139573467596561, + "loss": 1.7913, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.6305698156356812, + "learning_rate": 0.00141278603820333, + "loss": 1.7956, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.6266664266586304, + "learning_rate": 0.0014116073028058165, + "loss": 1.7972, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.5771638751029968, + "learning_rate": 0.0014104211537754217, + "loss": 1.7942, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.5643879771232605, + "learning_rate": 0.001409227604403524, + "loss": 1.8011, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.7012278437614441, + "learning_rate": 0.0014080266680644277, + "loss": 1.7992, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 1.0102479457855225, + "learning_rate": 0.0014068183582152103, + "loss": 1.8045, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.5632858872413635, + "learning_rate": 0.001405602688395574, + "loss": 1.8067, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6346735954284668, + "learning_rate": 0.0014043796722276924, + "loss": 1.777, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.5212036967277527, + "learning_rate": 0.0014031493234160591, + "loss": 1.7893, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.7424996495246887, + "learning_rate": 0.0014019116557473332, + "loss": 1.7917, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.6245171427726746, + "learning_rate": 0.0014006666830901854, + "loss": 1.7821, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.5200752019882202, + "learning_rate": 0.001399414419395142, + "loss": 1.7829, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.7273435592651367, + "learning_rate": 0.0013981548786944293, + "loss": 1.7933, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.7536471486091614, + "learning_rate": 0.0013968880751018158, + "loss": 1.7776, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.565134584903717, + "learning_rate": 0.0013956140228124545, + "loss": 1.7708, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.5974119305610657, + "learning_rate": 0.0013943327361027231, + "loss": 1.7899, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.7430379986763, + "learning_rate": 0.0013930442293300649, + "loss": 1.7688, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.49069711565971375, + "learning_rate": 0.0013917485169328279, + "loss": 1.7685, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.7117071151733398, + "learning_rate": 0.0013904456134301016, + "loss": 1.7734, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.9844165444374084, + "learning_rate": 0.0013891355334215562, + "loss": 1.7785, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.6243603229522705, + "learning_rate": 0.0013878182915872776, + "loss": 1.7988, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.695322573184967, + "learning_rate": 0.001386493902687604, + "loss": 1.7741, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.4707360863685608, + "learning_rate": 0.00138516238156296, + "loss": 1.7718, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.507228434085846, + "learning_rate": 0.0013838237431336895, + "loss": 1.7737, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.48154696822166443, + "learning_rate": 0.0013824780023998899, + "loss": 1.7761, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.6127373576164246, + "learning_rate": 0.0013811251744412431, + "loss": 1.7709, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.5426109433174133, + "learning_rate": 0.0013797652744168473, + "loss": 1.7605, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.5138939619064331, + "learning_rate": 0.0013783983175650457, + "loss": 1.7588, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.553103506565094, + "learning_rate": 0.0013770243192032581, + "loss": 1.7577, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.5266918540000916, + "learning_rate": 0.0013756432947278064, + "loss": 1.7559, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.9226388335227966, + "learning_rate": 0.0013742552596137444, + "loss": 1.7593, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.5951690673828125, + "learning_rate": 0.0013728602294146833, + "loss": 1.7656, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.6096036434173584, + "learning_rate": 0.0013714582197626175, + "loss": 1.7658, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.5772407054901123, + "learning_rate": 0.0013700492463677501, + "loss": 1.7517, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.5843099355697632, + "learning_rate": 0.0013686333250183154, + "loss": 1.7584, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.967965841293335, + "learning_rate": 0.001367210471580404, + "loss": 1.7547, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.6380553841590881, + "learning_rate": 0.0013657807019977835, + "loss": 1.7574, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.7145974636077881, + "learning_rate": 0.0013643440322917198, + "loss": 1.7548, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.6628404855728149, + "learning_rate": 0.0013629004785607989, + "loss": 1.7561, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.5272290706634521, + "learning_rate": 0.0013614500569807445, + "loss": 1.7455, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.6261658668518066, + "learning_rate": 0.0013599927838042394, + "loss": 1.7435, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.5160012245178223, + "learning_rate": 0.0013585286753607408, + "loss": 1.7445, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.5278101563453674, + "learning_rate": 0.0013570577480562986, + "loss": 1.7564, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.6808750629425049, + "learning_rate": 0.0013555800183733717, + "loss": 1.737, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.5647542476654053, + "learning_rate": 0.0013540955028706425, + "loss": 1.7375, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.6382290720939636, + "learning_rate": 0.0013526042181828324, + "loss": 1.7381, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.704724907875061, + "learning_rate": 0.0013511061810205143, + "loss": 1.7421, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.6695508360862732, + "learning_rate": 0.001349601408169926, + "loss": 1.756, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.7101924419403076, + "learning_rate": 0.0013480899164927823, + "loss": 1.7418, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.500063955783844, + "learning_rate": 0.0013465717229260853, + "loss": 1.7402, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.5229079723358154, + "learning_rate": 0.001345046844481935, + "loss": 1.7385, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.5279678106307983, + "learning_rate": 0.0013435152982473396, + "loss": 1.7271, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.5653748512268066, + "learning_rate": 0.0013419771013840217, + "loss": 1.7347, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.6687909960746765, + "learning_rate": 0.001340432271128229, + "loss": 1.7378, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.6813969016075134, + "learning_rate": 0.0013388808247905381, + "loss": 1.7284, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.5120254755020142, + "learning_rate": 0.0013373227797556634, + "loss": 1.737, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.9315611124038696, + "learning_rate": 0.00133575815348226, + "loss": 1.7271, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.5132087469100952, + "learning_rate": 0.0013341869635027292, + "loss": 1.7274, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.4970592260360718, + "learning_rate": 0.001332609227423022, + "loss": 1.7273, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.5128822326660156, + "learning_rate": 0.0013310249629224417, + "loss": 1.7224, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.5117209553718567, + "learning_rate": 0.0013294341877534454, + "loss": 1.7294, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.7867664098739624, + "learning_rate": 0.0013278369197414458, + "loss": 1.7419, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.5022678375244141, + "learning_rate": 0.0013262331767846104, + "loss": 1.7279, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.5226845145225525, + "learning_rate": 0.0013246229768536628, + "loss": 1.7098, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.48600006103515625, + "learning_rate": 0.001323006337991679, + "loss": 1.7243, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.6702044606208801, + "learning_rate": 0.0013213832783138873, + "loss": 1.7231, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.7053707838058472, + "learning_rate": 0.0013197538160074633, + "loss": 1.7144, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.7200219631195068, + "learning_rate": 0.0013181179693313283, + "loss": 1.7287, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 1.0686776638031006, + "learning_rate": 0.0013164757566159428, + "loss": 1.7205, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.7585874199867249, + "learning_rate": 0.001314827196263102, + "loss": 1.7166, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.8125566840171814, + "learning_rate": 0.0013131723067457302, + "loss": 1.7128, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.9645271301269531, + "learning_rate": 0.0013115111066076721, + "loss": 1.706, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.604837954044342, + "learning_rate": 0.0013098436144634862, + "loss": 1.7397, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.635099470615387, + "learning_rate": 0.0013081698489982364, + "loss": 1.7214, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.6959373950958252, + "learning_rate": 0.001306489828967282, + "loss": 1.7138, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.6749511361122131, + "learning_rate": 0.0013048035731960679, + "loss": 1.7128, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.6464404463768005, + "learning_rate": 0.0013031111005799133, + "loss": 1.7113, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.5773245692253113, + "learning_rate": 0.0013014124300838004, + "loss": 1.7219, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.572909414768219, + "learning_rate": 0.0012997075807421612, + "loss": 1.7038, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.7363695502281189, + "learning_rate": 0.0012979965716586653, + "loss": 1.7026, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 1.466504454612732, + "learning_rate": 0.0012962794220060048, + "loss": 1.7084, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 1.0482419729232788, + "learning_rate": 0.0012945561510256801, + "loss": 1.7205, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.5506669282913208, + "learning_rate": 0.001292826778027784, + "loss": 1.7053, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.6531742215156555, + "learning_rate": 0.0012910913223907856, + "loss": 1.7016, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.5463520884513855, + "learning_rate": 0.0012893498035613123, + "loss": 1.703, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6516016125679016, + "learning_rate": 0.001287602241053933, + "loss": 1.7062, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.5607056021690369, + "learning_rate": 0.0012858486544509392, + "loss": 1.7073, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.6049599051475525, + "learning_rate": 0.0012840890634021249, + "loss": 1.6978, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 1.0902249813079834, + "learning_rate": 0.0012823234876245667, + "loss": 1.6994, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.920967161655426, + "learning_rate": 0.0012805519469024035, + "loss": 1.7066, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.7389056086540222, + "learning_rate": 0.0012787744610866143, + "loss": 1.6932, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.5547269582748413, + "learning_rate": 0.0012769910500947954, + "loss": 1.7095, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.6491239666938782, + "learning_rate": 0.0012752017339109376, + "loss": 1.6995, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.6624890565872192, + "learning_rate": 0.0012734065325852029, + "loss": 1.7048, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.49583667516708374, + "learning_rate": 0.0012716054662336987, + "loss": 1.6898, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.5256396532058716, + "learning_rate": 0.001269798555038252, + "loss": 1.6946, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.6015365123748779, + "learning_rate": 0.0012679858192461864, + "loss": 1.6943, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.8601450324058533, + "learning_rate": 0.0012661672791700906, + "loss": 1.6997, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.6989620327949524, + "learning_rate": 0.0012643429551875945, + "loss": 1.692, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.6465297341346741, + "learning_rate": 0.0012625128677411388, + "loss": 1.6963, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.5421907305717468, + "learning_rate": 0.0012606770373377475, + "loss": 1.6917, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.6237439513206482, + "learning_rate": 0.0012588354845487959, + "loss": 1.6983, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.7129495739936829, + "learning_rate": 0.001256988230009783, + "loss": 1.692, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.5316609144210815, + "learning_rate": 0.0012551352944200976, + "loss": 1.6893, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.722777247428894, + "learning_rate": 0.0012532766985427874, + "loss": 1.6941, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.5814105868339539, + "learning_rate": 0.0012514124632043272, + "loss": 1.6959, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.6119912266731262, + "learning_rate": 0.0012495426092943842, + "loss": 1.6937, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.5168399214744568, + "learning_rate": 0.0012476671577655845, + "loss": 1.6899, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.8378857374191284, + "learning_rate": 0.0012457861296332774, + "loss": 1.6838, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.702446460723877, + "learning_rate": 0.001243899545975303, + "loss": 1.6955, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.589516818523407, + "learning_rate": 0.0012420074279317515, + "loss": 1.6876, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.6143748760223389, + "learning_rate": 0.0012401097967047298, + "loss": 1.6821, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.6360085010528564, + "learning_rate": 0.001238206673558122, + "loss": 1.6815, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.6078710556030273, + "learning_rate": 0.0012362980798173526, + "loss": 1.6835, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.7311747670173645, + "learning_rate": 0.0012343840368691462, + "loss": 1.6843, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.850714385509491, + "learning_rate": 0.0012324645661612886, + "loss": 1.6787, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.693458080291748, + "learning_rate": 0.0012305396892023867, + "loss": 1.6848, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.5178744792938232, + "learning_rate": 0.0012286094275616264, + "loss": 1.6834, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.5004352927207947, + "learning_rate": 0.0012266738028685318, + "loss": 1.666, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.8583536148071289, + "learning_rate": 0.001224732836812723, + "loss": 1.6764, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.47937116026878357, + "learning_rate": 0.0012227865511436724, + "loss": 1.6804, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.5805412530899048, + "learning_rate": 0.001220834967670461, + "loss": 1.6894, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.5018792152404785, + "learning_rate": 0.0012188781082615346, + "loss": 1.6863, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.5338218808174133, + "learning_rate": 0.0012169159948444588, + "loss": 1.6828, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.5100282430648804, + "learning_rate": 0.001214948649405672, + "loss": 1.6784, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.49944400787353516, + "learning_rate": 0.0012129760939902407, + "loss": 1.6769, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.5971114635467529, + "learning_rate": 0.0012109983507016114, + "loss": 1.6794, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.5830234885215759, + "learning_rate": 0.0012090154417013636, + "loss": 1.6837, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 1.0087296962738037, + "learning_rate": 0.0012070273892089605, + "loss": 1.6595, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.6587289571762085, + "learning_rate": 0.0012050342155015012, + "loss": 1.6628, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.826564371585846, + "learning_rate": 0.0012030359429134707, + "loss": 1.6688, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.6125487089157104, + "learning_rate": 0.0012010325938364883, + "loss": 1.6707, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.7530525326728821, + "learning_rate": 0.0011990241907190592, + "loss": 1.6713, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.56882643699646, + "learning_rate": 0.001197010756066321, + "loss": 1.6571, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.6395471096038818, + "learning_rate": 0.0011949923124397917, + "loss": 1.6636, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.6341049075126648, + "learning_rate": 0.001192968882457118, + "loss": 1.6685, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.6597684621810913, + "learning_rate": 0.001190940488791821, + "loss": 1.6691, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.805629312992096, + "learning_rate": 0.0011889071541730419, + "loss": 1.6652, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.6129904389381409, + "learning_rate": 0.001186868901385288, + "loss": 1.6625, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.49381929636001587, + "learning_rate": 0.001184825753268177, + "loss": 1.6682, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.8280429244041443, + "learning_rate": 0.0011827777327161814, + "loss": 1.6777, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.7691074013710022, + "learning_rate": 0.0011807248626783714, + "loss": 1.6561, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.9054762125015259, + "learning_rate": 0.0011786671661581584, + "loss": 1.6548, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.7576959729194641, + "learning_rate": 0.001176604666213036, + "loss": 1.6573, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.6869341731071472, + "learning_rate": 0.0011745373859543236, + "loss": 1.6762, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.5557254552841187, + "learning_rate": 0.0011724653485469063, + "loss": 1.6523, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.595557451248169, + "learning_rate": 0.0011703885772089743, + "loss": 1.6598, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 1.248582363128662, + "learning_rate": 0.0011683070952117646, + "loss": 1.6601, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.7984007596969604, + "learning_rate": 0.0011662209258792998, + "loss": 1.6487, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.5102805495262146, + "learning_rate": 0.0011641300925881257, + "loss": 1.6575, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.5064625144004822, + "learning_rate": 0.0011620346187670501, + "loss": 1.6531, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5275861024856567, + "learning_rate": 0.0011599345278968806, + "loss": 1.6656, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.4811701774597168, + "learning_rate": 0.0011578298435101604, + "loss": 1.6552, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.9903383255004883, + "learning_rate": 0.0011557205891909062, + "loss": 1.6456, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.5609761476516724, + "learning_rate": 0.0011536067885743423, + "loss": 1.6604, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.5215162038803101, + "learning_rate": 0.001151488465346637, + "loss": 1.6481, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.77486252784729, + "learning_rate": 0.0011493656432446362, + "loss": 1.6524, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.5342754125595093, + "learning_rate": 0.0011472383460555983, + "loss": 1.6531, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.8088502287864685, + "learning_rate": 0.001145106597616927, + "loss": 1.6652, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.9950772523880005, + "learning_rate": 0.001142970421815904, + "loss": 1.642, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.5284668207168579, + "learning_rate": 0.0011408298425894226, + "loss": 1.6511, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.636503279209137, + "learning_rate": 0.0011386848839237186, + "loss": 1.6502, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.6239200830459595, + "learning_rate": 0.0011365355698541005, + "loss": 1.6537, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.5236424207687378, + "learning_rate": 0.0011343819244646824, + "loss": 1.6467, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.6877872347831726, + "learning_rate": 0.001132223971888112, + "loss": 1.6499, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.8002364635467529, + "learning_rate": 0.0011300617363053024, + "loss": 1.6406, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.7474848628044128, + "learning_rate": 0.0011278952419451586, + "loss": 1.6568, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.9320099353790283, + "learning_rate": 0.0011257245130843077, + "loss": 1.6467, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.5745654702186584, + "learning_rate": 0.0011235495740468265, + "loss": 1.6422, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.5315459966659546, + "learning_rate": 0.0011213704492039694, + "loss": 1.6231, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.5665087699890137, + "learning_rate": 0.001119187162973894, + "loss": 1.6467, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5049409866333008, + "learning_rate": 0.001116999739821388, + "loss": 1.6357, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.5673466324806213, + "learning_rate": 0.0011148082042575968, + "loss": 1.6517, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.99655681848526, + "learning_rate": 0.0011126125808397461, + "loss": 1.6521, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 1.0942261219024658, + "learning_rate": 0.0011104128941708683, + "loss": 1.641, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.5385993123054504, + "learning_rate": 0.001108209168899527, + "loss": 1.6457, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.48614969849586487, + "learning_rate": 0.0011060014297195396, + "loss": 1.6461, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.5507870316505432, + "learning_rate": 0.0011037897013697015, + "loss": 1.6509, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.5481265783309937, + "learning_rate": 0.0011015740086335092, + "loss": 1.6382, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.6650676727294922, + "learning_rate": 0.0010993543763388814, + "loss": 1.642, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.645088255405426, + "learning_rate": 0.0010971308293578814, + "loss": 1.6322, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.563561737537384, + "learning_rate": 0.0010949033926064397, + "loss": 1.6331, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.6594712138175964, + "learning_rate": 0.0010926720910440725, + "loss": 1.6413, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.6993468999862671, + "learning_rate": 0.001090436949673603, + "loss": 1.6371, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.5427238345146179, + "learning_rate": 0.0010881979935408815, + "loss": 1.6314, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 1.1471266746520996, + "learning_rate": 0.0010859552477345052, + "loss": 1.6444, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.8993985652923584, + "learning_rate": 0.001083708737385536, + "loss": 1.6388, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5465598702430725, + "learning_rate": 0.0010814584876672187, + "loss": 1.6177, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.6668012142181396, + "learning_rate": 0.0010792045237947008, + "loss": 1.6348, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.6991229057312012, + "learning_rate": 0.0010769468710247478, + "loss": 1.6343, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.5535702109336853, + "learning_rate": 0.0010746855546554612, + "loss": 1.6288, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.7196880578994751, + "learning_rate": 0.0010724206000259954, + "loss": 1.6244, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.6040158867835999, + "learning_rate": 0.0010701520325162727, + "loss": 1.6429, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.5791844725608826, + "learning_rate": 0.0010678798775467001, + "loss": 1.6441, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.6173314452171326, + "learning_rate": 0.0010656041605778832, + "loss": 1.6387, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.7677024602890015, + "learning_rate": 0.001063324907110342, + "loss": 1.6175, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6228676438331604, + "learning_rate": 0.0010610421426842241, + "loss": 1.6368, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.6007316708564758, + "learning_rate": 0.00105875589287902, + "loss": 1.6353, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.7982361316680908, + "learning_rate": 0.0010564661833132752, + "loss": 1.643, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.9526289701461792, + "learning_rate": 0.001054173039644303, + "loss": 1.6384, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.699305534362793, + "learning_rate": 0.0010518764875678981, + "loss": 1.6298, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.5673437118530273, + "learning_rate": 0.001049576552818048, + "loss": 1.6249, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.8004374504089355, + "learning_rate": 0.0010472732611666448, + "loss": 1.623, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.657015860080719, + "learning_rate": 0.0010449666384231954, + "loss": 1.6241, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.8358457684516907, + "learning_rate": 0.0010426567104345346, + "loss": 1.6359, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.7296909689903259, + "learning_rate": 0.0010403435030845332, + "loss": 1.6196, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.5378011465072632, + "learning_rate": 0.0010380270422938093, + "loss": 1.6223, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.6494420766830444, + "learning_rate": 0.0010357073540194362, + "loss": 1.6229, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.9161489009857178, + "learning_rate": 0.001033384464254655, + "loss": 1.6231, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.6290285587310791, + "learning_rate": 0.001031058399028579, + "loss": 1.623, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.6033028364181519, + "learning_rate": 0.001028729184405905, + "loss": 1.6306, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.5903000831604004, + "learning_rate": 0.0010263968464866201, + "loss": 1.6253, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.5634178519248962, + "learning_rate": 0.0010240614114057098, + "loss": 1.6195, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 1.0467827320098877, + "learning_rate": 0.001021722905332864, + "loss": 1.622, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.8016198873519897, + "learning_rate": 0.0010193813544721855, + "loss": 1.6302, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.7411696314811707, + "learning_rate": 0.001017036785061895, + "loss": 1.6374, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.5882891416549683, + "learning_rate": 0.0010146892233740376, + "loss": 1.6236, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.6171638369560242, + "learning_rate": 0.0010123386957141883, + "loss": 1.6016, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.670336902141571, + "learning_rate": 0.0010099852284211573, + "loss": 1.6219, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.6489212512969971, + "learning_rate": 0.0010076288478666944, + "loss": 1.6212, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.730172336101532, + "learning_rate": 0.0010052695804551946, + "loss": 1.6256, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.9267665147781372, + "learning_rate": 0.0010029074526234014, + "loss": 1.6144, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.5295860171318054, + "learning_rate": 0.0010005424908401104, + "loss": 1.6295, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5772867798805237, + "learning_rate": 0.0009981747216058728, + "loss": 1.6126, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.5177218914031982, + "learning_rate": 0.0009958041714526998, + "loss": 1.6179, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.5203511118888855, + "learning_rate": 0.0009934308669437627, + "loss": 1.6231, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.5907359719276428, + "learning_rate": 0.0009910548346730972, + "loss": 1.615, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5076828598976135, + "learning_rate": 0.0009886761012653062, + "loss": 1.6023, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.5063542127609253, + "learning_rate": 0.000986294693375258, + "loss": 1.6085, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.556496798992157, + "learning_rate": 0.000983910637687791, + "loss": 1.626, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.6612011790275574, + "learning_rate": 0.0009815239609174138, + "loss": 1.6054, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.7697446346282959, + "learning_rate": 0.0009791346898080043, + "loss": 1.6215, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.5939280986785889, + "learning_rate": 0.0009767428511325122, + "loss": 1.6112, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.7321987152099609, + "learning_rate": 0.0009743484716926576, + "loss": 1.6004, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.811866283416748, + "learning_rate": 0.0009719515783186319, + "loss": 1.6029, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.8771321177482605, + "learning_rate": 0.0009695521978687951, + "loss": 1.6049, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.9379278421401978, + "learning_rate": 0.0009671503572293767, + "loss": 1.6088, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 1.1965348720550537, + "learning_rate": 0.0009647460833141742, + "loss": 1.6084, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.8523448705673218, + "learning_rate": 0.0009623394030642507, + "loss": 1.6029, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.6702988743782043, + "learning_rate": 0.0009599303434476334, + "loss": 1.6015, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.5801752805709839, + "learning_rate": 0.0009575189314590118, + "loss": 1.6076, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.6770606637001038, + "learning_rate": 0.0009551051941194346, + "loss": 1.6079, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.7278493046760559, + "learning_rate": 0.0009526891584760071, + "loss": 1.5949, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.5087621808052063, + "learning_rate": 0.0009502708516015889, + "loss": 1.6093, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.5776836276054382, + "learning_rate": 0.0009478503005944888, + "loss": 1.6006, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.6365747451782227, + "learning_rate": 0.0009454275325781632, + "loss": 1.6158, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.8209394216537476, + "learning_rate": 0.0009430025747009104, + "loss": 1.6032, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.5628833770751953, + "learning_rate": 0.0009405754541355677, + "loss": 1.6043, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.6329572796821594, + "learning_rate": 0.0009381461980792061, + "loss": 1.5961, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5296734571456909, + "learning_rate": 0.0009357148337528256, + "loss": 1.6037, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.7551426887512207, + "learning_rate": 0.0009332813884010511, + "loss": 1.6057, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.8426782488822937, + "learning_rate": 0.0009308458892918259, + "loss": 1.6111, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.9062341451644897, + "learning_rate": 0.0009284083637161064, + "loss": 1.6077, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.1163408756256104, + "learning_rate": 0.0009259688389875574, + "loss": 1.6192, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 1.1263148784637451, + "learning_rate": 0.0009235273424422442, + "loss": 1.6068, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.7062352895736694, + "learning_rate": 0.0009210839014383282, + "loss": 1.6017, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.7208341956138611, + "learning_rate": 0.0009186385433557584, + "loss": 1.5959, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.9055721759796143, + "learning_rate": 0.0009161912955959668, + "loss": 1.5986, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.7277659773826599, + "learning_rate": 0.000913742185581559, + "loss": 1.5926, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 1.1553928852081299, + "learning_rate": 0.0009112912407560086, + "loss": 1.5955, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.6272118091583252, + "learning_rate": 0.0009088384885833495, + "loss": 1.6036, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.5771468877792358, + "learning_rate": 0.000906383956547867, + "loss": 1.594, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.5323604345321655, + "learning_rate": 0.0009039276721537915, + "loss": 1.5981, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5176653861999512, + "learning_rate": 0.0009014696629249886, + "loss": 1.5914, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.8057810664176941, + "learning_rate": 0.0008990099564046522, + "loss": 1.5997, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.940513014793396, + "learning_rate": 0.0008965485801549946, + "loss": 1.5989, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.5933017730712891, + "learning_rate": 0.000894085561756939, + "loss": 1.5904, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.6983321309089661, + "learning_rate": 0.0008916209288098088, + "loss": 1.5961, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.8738566637039185, + "learning_rate": 0.0008891547089310198, + "loss": 1.5918, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.6277160048484802, + "learning_rate": 0.0008866869297557699, + "loss": 1.5916, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.5517798066139221, + "learning_rate": 0.0008842176189367299, + "loss": 1.5966, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 1.312120795249939, + "learning_rate": 0.0008817468041437329, + "loss": 1.6023, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.8608296513557434, + "learning_rate": 0.0008792745130634654, + "loss": 1.5896, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.527365505695343, + "learning_rate": 0.0008768007733991561, + "loss": 1.5847, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.48201900720596313, + "learning_rate": 0.0008743256128702658, + "loss": 1.5961, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.6115725636482239, + "learning_rate": 0.0008718490592121768, + "loss": 1.5945, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.6590610146522522, + "learning_rate": 0.0008693711401758822, + "loss": 1.5911, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.5170368552207947, + "learning_rate": 0.0008668918835276747, + "loss": 1.5868, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 1.0741411447525024, + "learning_rate": 0.0008644113170488355, + "loss": 1.596, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.5104585886001587, + "learning_rate": 0.0008619294685353235, + "loss": 1.5844, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.6905128955841064, + "learning_rate": 0.0008594463657974627, + "loss": 1.5884, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.6427602767944336, + "learning_rate": 0.0008569620366596322, + "loss": 1.6039, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.515118420124054, + "learning_rate": 0.000854476508959953, + "loss": 1.5784, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.6664035320281982, + "learning_rate": 0.0008519898105499762, + "loss": 1.5851, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.555374801158905, + "learning_rate": 0.0008495019692943721, + "loss": 1.5801, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.6291237473487854, + "learning_rate": 0.0008470130130706166, + "loss": 1.587, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.5843510627746582, + "learning_rate": 0.0008445229697686795, + "loss": 1.6019, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.6794947385787964, + "learning_rate": 0.0008420318672907119, + "loss": 1.5859, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.6664280295372009, + "learning_rate": 0.0008395397335507334, + "loss": 1.5872, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.7631552219390869, + "learning_rate": 0.0008370465964743196, + "loss": 1.5857, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.6629914045333862, + "learning_rate": 0.0008345524839982886, + "loss": 1.5931, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.5235519409179688, + "learning_rate": 0.0008320574240703886, + "loss": 1.5774, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.6849856972694397, + "learning_rate": 0.0008295614446489842, + "loss": 1.5882, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.6194204688072205, + "learning_rate": 0.0008270645737027441, + "loss": 1.5937, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.7102261185646057, + "learning_rate": 0.0008245668392103259, + "loss": 1.5822, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.9604889750480652, + "learning_rate": 0.0008220682691600645, + "loss": 1.5836, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.8601362705230713, + "learning_rate": 0.0008195688915496571, + "loss": 1.5902, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.5696207284927368, + "learning_rate": 0.0008170687343858506, + "loss": 1.5691, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.6995933651924133, + "learning_rate": 0.0008145678256841265, + "loss": 1.5872, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.757399320602417, + "learning_rate": 0.0008120661934683879, + "loss": 1.5868, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.4980528652667999, + "learning_rate": 0.0008095638657706456, + "loss": 1.5848, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.5150482058525085, + "learning_rate": 0.000807060870630703, + "loss": 1.5753, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.7893345952033997, + "learning_rate": 0.000804557236095843, + "loss": 1.5578, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.6156172156333923, + "learning_rate": 0.0008020529902205129, + "loss": 1.5825, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.7261443734169006, + "learning_rate": 0.0007995481610660108, + "loss": 1.5676, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.5738362073898315, + "learning_rate": 0.0007970427767001702, + "loss": 1.5836, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.7454018592834473, + "learning_rate": 0.0007945368651970464, + "loss": 1.5857, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.9112736582756042, + "learning_rate": 0.0007920304546366013, + "loss": 1.5662, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.7049988508224487, + "learning_rate": 0.000789523573104389, + "loss": 1.5829, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.5927552580833435, + "learning_rate": 0.0007870162486912414, + "loss": 1.5816, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.5143202543258667, + "learning_rate": 0.0007845085094929527, + "loss": 1.5848, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.5179629921913147, + "learning_rate": 0.0007820003836099649, + "loss": 1.5725, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.5422449707984924, + "learning_rate": 0.0007794918991470537, + "loss": 1.574, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.606109619140625, + "learning_rate": 0.0007769830842130119, + "loss": 1.5671, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.7221512198448181, + "learning_rate": 0.0007744739669203361, + "loss": 1.5651, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.7250859141349792, + "learning_rate": 0.0007719645753849108, + "loss": 1.5778, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.6284032464027405, + "learning_rate": 0.0007694549377256932, + "loss": 1.5793, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.677979588508606, + "learning_rate": 0.0007669450820643987, + "loss": 1.5713, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 1.2178112268447876, + "learning_rate": 0.0007644350365251855, + "loss": 1.5716, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.8423519730567932, + "learning_rate": 0.0007619248292343399, + "loss": 1.5717, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.6503134965896606, + "learning_rate": 0.0007594144883199599, + "loss": 1.5902, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.5862320065498352, + "learning_rate": 0.0007569040419116413, + "loss": 1.5707, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.8562874794006348, + "learning_rate": 0.000754393518140162, + "loss": 1.5695, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.577817976474762, + "learning_rate": 0.0007518829451371665, + "loss": 1.5797, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.8240464329719543, + "learning_rate": 0.0007493723510348516, + "loss": 1.5668, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.6382803916931152, + "learning_rate": 0.0007468617639656496, + "loss": 1.5663, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 1.2234867811203003, + "learning_rate": 0.0007443512120619144, + "loss": 1.5654, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.9998992085456848, + "learning_rate": 0.0007418407234556067, + "loss": 1.5868, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.8155842423439026, + "learning_rate": 0.0007393303262779767, + "loss": 1.5699, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.9842706918716431, + "learning_rate": 0.0007368200486592507, + "loss": 1.5639, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.6295363903045654, + "learning_rate": 0.0007343099187283149, + "loss": 1.5803, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.5746870636940002, + "learning_rate": 0.0007317999646124011, + "loss": 1.5622, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.799921452999115, + "learning_rate": 0.0007292902144367704, + "loss": 1.5561, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.723427414894104, + "learning_rate": 0.0007267806963243995, + "loss": 1.5783, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.7890868782997131, + "learning_rate": 0.0007242714383956639, + "loss": 1.569, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.6295633912086487, + "learning_rate": 0.000721762468768024, + "loss": 1.5747, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6297635436058044, + "learning_rate": 0.0007192538155557094, + "loss": 1.5669, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.6860962510108948, + "learning_rate": 0.0007167455068694046, + "loss": 1.5579, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.7743304371833801, + "learning_rate": 0.000714237570815933, + "loss": 1.5688, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.6168741583824158, + "learning_rate": 0.0007117300354979423, + "loss": 1.5631, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.7270210385322571, + "learning_rate": 0.000709222929013591, + "loss": 1.5715, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.7465235590934753, + "learning_rate": 0.0007067162794562309, + "loss": 1.555, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.8232589364051819, + "learning_rate": 0.0007042101149140943, + "loss": 1.5649, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.6453871130943298, + "learning_rate": 0.0007017044634699787, + "loss": 1.5566, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.6048101186752319, + "learning_rate": 0.0006991993532009319, + "loss": 1.5618, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.7034915685653687, + "learning_rate": 0.0006966948121779378, + "loss": 1.5706, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.6632400155067444, + "learning_rate": 0.000694190868465601, + "loss": 1.5626, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5953408479690552, + "learning_rate": 0.0006916875501218343, + "loss": 1.5581, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.6490030884742737, + "learning_rate": 0.0006891848851975416, + "loss": 1.5415, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5260306000709534, + "learning_rate": 0.0006866829017363054, + "loss": 1.5706, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.6372014284133911, + "learning_rate": 0.0006841816277740722, + "loss": 1.5584, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.5504810214042664, + "learning_rate": 0.0006816810913388379, + "loss": 1.5684, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.635343611240387, + "learning_rate": 0.0006791813204503342, + "loss": 1.5579, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.552595853805542, + "learning_rate": 0.0006766823431197147, + "loss": 1.5692, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.72637939453125, + "learning_rate": 0.0006741841873492406, + "loss": 1.5616, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.9619336724281311, + "learning_rate": 0.0006716868811319671, + "loss": 1.5455, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.8375715017318726, + "learning_rate": 0.0006691904524514297, + "loss": 1.5613, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.6994984745979309, + "learning_rate": 0.0006666949292813306, + "loss": 1.5549, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5462349057197571, + "learning_rate": 0.0006642003395852258, + "loss": 1.5517, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.6135057806968689, + "learning_rate": 0.0006617067113162103, + "loss": 1.5663, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5969448685646057, + "learning_rate": 0.0006592140724166073, + "loss": 1.5646, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.6049835085868835, + "learning_rate": 0.0006567224508176523, + "loss": 1.5585, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.6460230350494385, + "learning_rate": 0.0006542318744391821, + "loss": 1.5554, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.8698463439941406, + "learning_rate": 0.0006517423711893209, + "loss": 1.569, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.7690253853797913, + "learning_rate": 0.0006492539689641685, + "loss": 1.5506, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.8094373345375061, + "learning_rate": 0.0006467666956474865, + "loss": 1.5627, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.5970216989517212, + "learning_rate": 0.0006442805791103873, + "loss": 1.5452, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.9301138520240784, + "learning_rate": 0.0006417956472110205, + "loss": 1.5578, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 1.0505397319793701, + "learning_rate": 0.0006393119277942614, + "loss": 1.5492, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6449258923530579, + "learning_rate": 0.0006368294486913987, + "loss": 1.5557, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.6096972823143005, + "learning_rate": 0.0006343482377198232, + "loss": 1.5487, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.7595222592353821, + "learning_rate": 0.0006318683226827151, + "loss": 1.5577, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.9682703614234924, + "learning_rate": 0.0006293897313687331, + "loss": 1.5631, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.5442140698432922, + "learning_rate": 0.0006269124915517037, + "loss": 1.5534, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.6464776992797852, + "learning_rate": 0.0006244366309903084, + "loss": 1.5528, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.7928241491317749, + "learning_rate": 0.0006219621774277737, + "loss": 1.5575, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.691679835319519, + "learning_rate": 0.00061948915859156, + "loss": 1.5471, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.5657025575637817, + "learning_rate": 0.0006170176021930509, + "loss": 1.5448, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.5586305856704712, + "learning_rate": 0.0006145475359272424, + "loss": 1.5557, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.6415152549743652, + "learning_rate": 0.0006120789874724336, + "loss": 1.5487, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.6272568106651306, + "learning_rate": 0.0006096119844899151, + "loss": 1.5398, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.5844125151634216, + "learning_rate": 0.0006071465546236601, + "loss": 1.5513, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.659501850605011, + "learning_rate": 0.0006046827255000135, + "loss": 1.5523, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.5435826182365417, + "learning_rate": 0.0006022205247273845, + "loss": 1.5409, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.7304155230522156, + "learning_rate": 0.0005997599798959343, + "loss": 1.5425, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.5765751600265503, + "learning_rate": 0.0005973011185772694, + "loss": 1.5445, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.6959366798400879, + "learning_rate": 0.0005948439683241318, + "loss": 1.5471, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.6025418043136597, + "learning_rate": 0.0005923885566700896, + "loss": 1.5395, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.8841429948806763, + "learning_rate": 0.0005899349111292293, + "loss": 1.5405, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.6896085739135742, + "learning_rate": 0.0005874830591958474, + "loss": 1.5384, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6408348083496094, + "learning_rate": 0.000585033028344142, + "loss": 1.5432, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.7171529531478882, + "learning_rate": 0.0005825848460279048, + "loss": 1.5466, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.8158733248710632, + "learning_rate": 0.0005801385396802146, + "loss": 1.5421, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 1.0748262405395508, + "learning_rate": 0.0005776941367131282, + "loss": 1.5488, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.8084495067596436, + "learning_rate": 0.0005752516645173745, + "loss": 1.5465, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.5522925853729248, + "learning_rate": 0.0005728111504620472, + "loss": 1.5407, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.5863711833953857, + "learning_rate": 0.0005703726218942976, + "loss": 1.5476, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.5634138584136963, + "learning_rate": 0.0005679361061390295, + "loss": 1.539, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.7387145757675171, + "learning_rate": 0.0005655016304985908, + "loss": 1.5344, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.995307981967926, + "learning_rate": 0.0005630692222524709, + "loss": 1.534, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.9609442353248596, + "learning_rate": 0.0005606389086569911, + "loss": 1.5485, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.5431232452392578, + "learning_rate": 0.0005582107169450023, + "loss": 1.5532, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5243140459060669, + "learning_rate": 0.0005557846743255783, + "loss": 1.5359, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.698683500289917, + "learning_rate": 0.0005533608079837109, + "loss": 1.5362, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.7093376517295837, + "learning_rate": 0.0005509391450800061, + "loss": 1.541, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.719809353351593, + "learning_rate": 0.0005485197127503795, + "loss": 1.5367, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.664923906326294, + "learning_rate": 0.0005461025381057516, + "loss": 1.5397, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.7531226277351379, + "learning_rate": 0.0005436876482317444, + "loss": 1.5481, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.5259689688682556, + "learning_rate": 0.0005412750701883782, + "loss": 1.5417, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.7380029559135437, + "learning_rate": 0.0005388648310097682, + "loss": 1.551, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.8128563761711121, + "learning_rate": 0.000536456957703821, + "loss": 1.5455, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.585620105266571, + "learning_rate": 0.0005340514772519324, + "loss": 1.5407, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.5881842970848083, + "learning_rate": 0.0005316484166086863, + "loss": 1.5451, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.5658267736434937, + "learning_rate": 0.00052924780270155, + "loss": 1.546, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.6962028741836548, + "learning_rate": 0.0005268496624305747, + "loss": 1.5266, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.7076374888420105, + "learning_rate": 0.0005244540226680931, + "loss": 1.5329, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.7366517782211304, + "learning_rate": 0.0005220609102584185, + "loss": 1.5344, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5640454888343811, + "learning_rate": 0.0005196703520175437, + "loss": 1.5403, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.8901501893997192, + "learning_rate": 0.0005172823747328415, + "loss": 1.5368, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.722210168838501, + "learning_rate": 0.0005148970051627632, + "loss": 1.5361, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.717575192451477, + "learning_rate": 0.0005125142700365394, + "loss": 1.5086, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.6678367853164673, + "learning_rate": 0.000510134196053881, + "loss": 1.5211, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.7207977771759033, + "learning_rate": 0.0005077568098846789, + "loss": 1.5225, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.5583497285842896, + "learning_rate": 0.000505382138168706, + "loss": 1.531, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.7306301593780518, + "learning_rate": 0.0005030102075153181, + "loss": 1.5263, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.7418814301490784, + "learning_rate": 0.0005006410445031569, + "loss": 1.5306, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.730140209197998, + "learning_rate": 0.0004982746756798507, + "loss": 1.5356, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.5322291254997253, + "learning_rate": 0.0004959111275617174, + "loss": 1.5464, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.7516300082206726, + "learning_rate": 0.0004935504266334677, + "loss": 1.527, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.7283377051353455, + "learning_rate": 0.0004911925993479085, + "loss": 1.5411, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.6152079105377197, + "learning_rate": 0.0004888376721256456, + "loss": 1.544, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.7793056964874268, + "learning_rate": 0.00048648567135478805, + "loss": 1.5325, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.8490005135536194, + "learning_rate": 0.0004841366233906538, + "loss": 1.5252, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.5301468372344971, + "learning_rate": 0.0004817905545554717, + "loss": 1.5354, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.9015042781829834, + "learning_rate": 0.00047944749113808884, + "loss": 1.528, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.6948291659355164, + "learning_rate": 0.00047710745939367474, + "loss": 1.5277, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.8811990022659302, + "learning_rate": 0.0004747704855434278, + "loss": 1.5261, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.6645548343658447, + "learning_rate": 0.0004724365957742809, + "loss": 1.5199, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.0660958290100098, + "learning_rate": 0.00047010581623860883, + "loss": 1.5275, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.5938519239425659, + "learning_rate": 0.0004677781730539342, + "loss": 1.5304, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.7018988728523254, + "learning_rate": 0.0004654536923026356, + "loss": 1.5209, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.6923278570175171, + "learning_rate": 0.00046313240003165466, + "loss": 1.5276, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.6942279934883118, + "learning_rate": 0.0004608143222522048, + "loss": 1.5269, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.6499513983726501, + "learning_rate": 0.0004584994849394795, + "loss": 1.5116, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 1.0198639631271362, + "learning_rate": 0.0004561879140323607, + "loss": 1.533, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.7474728226661682, + "learning_rate": 0.0004538796354331298, + "loss": 1.5392, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.6230899691581726, + "learning_rate": 0.0004515746750071754, + "loss": 1.5262, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.626305341720581, + "learning_rate": 0.0004492730585827046, + "loss": 1.5286, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.6829103827476501, + "learning_rate": 0.0004469748119504529, + "loss": 1.512, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.8076156973838806, + "learning_rate": 0.0004446799608633964, + "loss": 1.5316, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.5991992354393005, + "learning_rate": 0.00044238853103646154, + "loss": 1.5116, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.6281356811523438, + "learning_rate": 0.00044010054814623925, + "loss": 1.5298, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.7195472121238708, + "learning_rate": 0.0004378160378306944, + "loss": 1.5227, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.5887148380279541, + "learning_rate": 0.00043553502568888095, + "loss": 1.5229, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.7634897828102112, + "learning_rate": 0.0004332575372806534, + "loss": 1.5174, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.5819145441055298, + "learning_rate": 0.00043098359812638145, + "loss": 1.5137, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.8440636396408081, + "learning_rate": 0.00042871323370666383, + "loss": 1.5253, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.5726558566093445, + "learning_rate": 0.0004264464694620421, + "loss": 1.5202, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.5537199974060059, + "learning_rate": 0.000424183330792717, + "loss": 1.5187, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.8540937900543213, + "learning_rate": 0.0004219238430582621, + "loss": 1.5183, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.7657756805419922, + "learning_rate": 0.0004196680315773408, + "loss": 1.5289, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.5736072659492493, + "learning_rate": 0.00041741592162742214, + "loss": 1.5227, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.6046714782714844, + "learning_rate": 0.0004151675384444978, + "loss": 1.4994, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 1.1634455919265747, + "learning_rate": 0.00041292290722279914, + "loss": 1.5217, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.9989205002784729, + "learning_rate": 0.00041068205311451517, + "loss": 1.5254, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.7797510623931885, + "learning_rate": 0.00040844500122951026, + "loss": 1.5193, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.6658583879470825, + "learning_rate": 0.00040621177663504313, + "loss": 1.5154, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.5408698320388794, + "learning_rate": 0.00040398240435548583, + "loss": 1.5215, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.5456634759902954, + "learning_rate": 0.00040175690937204324, + "loss": 1.5119, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.6791335940361023, + "learning_rate": 0.00039953531662247343, + "loss": 1.5167, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.6939202547073364, + "learning_rate": 0.0003973176510008075, + "loss": 1.5218, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.6589305400848389, + "learning_rate": 0.00039510393735707233, + "loss": 1.5129, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.528374969959259, + "learning_rate": 0.00039289420049700986, + "loss": 1.5217, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.614210844039917, + "learning_rate": 0.0003906884651818006, + "loss": 1.5233, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5489770174026489, + "learning_rate": 0.00038848675612778577, + "loss": 1.5173, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.7366181015968323, + "learning_rate": 0.00038628909800619046, + "loss": 1.513, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.6422160267829895, + "learning_rate": 0.0003840955154428467, + "loss": 1.5114, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.682155430316925, + "learning_rate": 0.00038190603301791864, + "loss": 1.5077, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.666942298412323, + "learning_rate": 0.0003797206752656258, + "loss": 1.5176, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6019499897956848, + "learning_rate": 0.0003775394666739688, + "loss": 1.5098, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.6468465328216553, + "learning_rate": 0.00037536243168445507, + "loss": 1.5168, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.5997724533081055, + "learning_rate": 0.0003731895946918246, + "loss": 1.5136, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.5916891694068909, + "learning_rate": 0.0003710209800437769, + "loss": 1.4957, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.6119093298912048, + "learning_rate": 0.00036885661204069767, + "loss": 1.5013, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.8705652356147766, + "learning_rate": 0.0003666965149353878, + "loss": 1.5169, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.6239902377128601, + "learning_rate": 0.0003645407129327898, + "loss": 1.5155, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.5648002028465271, + "learning_rate": 0.00036238923018971783, + "loss": 1.5118, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.6062861680984497, + "learning_rate": 0.0003602420908145865, + "loss": 1.5026, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.655336856842041, + "learning_rate": 0.00035809931886714093, + "loss": 1.5118, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.7121381759643555, + "learning_rate": 0.00035596093835818683, + "loss": 1.495, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.6306138634681702, + "learning_rate": 0.00035382697324932245, + "loss": 1.5012, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.959322988986969, + "learning_rate": 0.00035169744745266866, + "loss": 1.514, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.9421017169952393, + "learning_rate": 0.0003495723848306017, + "loss": 1.5084, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.6493289470672607, + "learning_rate": 0.0003474518091954859, + "loss": 1.5146, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5578026175498962, + "learning_rate": 0.0003453357443094068, + "loss": 1.5105, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.540990948677063, + "learning_rate": 0.00034322421388390456, + "loss": 1.5251, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.571877658367157, + "learning_rate": 0.0003411172415797087, + "loss": 1.522, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.5469315648078918, + "learning_rate": 0.0003390148510064727, + "loss": 1.5094, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.7351030707359314, + "learning_rate": 0.0003369170657225094, + "loss": 1.5002, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.6015704274177551, + "learning_rate": 0.0003348239092345275, + "loss": 1.5109, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.6040710806846619, + "learning_rate": 0.0003327354049973672, + "loss": 1.5047, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 1.0752464532852173, + "learning_rate": 0.00033065157641373847, + "loss": 1.5055, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.877627968788147, + "learning_rate": 0.0003285724468339576, + "loss": 1.5032, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.7728718519210815, + "learning_rate": 0.00032649803955568755, + "loss": 1.4996, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 1.2035620212554932, + "learning_rate": 0.00032442837782367434, + "loss": 1.5066, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.6481416821479797, + "learning_rate": 0.0003223634848294883, + "loss": 1.5103, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.8738474249839783, + "learning_rate": 0.00032030338371126374, + "loss": 1.4972, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.5568274259567261, + "learning_rate": 0.0003182480975534395, + "loss": 1.504, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.7597935795783997, + "learning_rate": 0.00031619764938650057, + "loss": 1.4979, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.6429214477539062, + "learning_rate": 0.0003141520621867197, + "loss": 1.5027, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.5849870443344116, + "learning_rate": 0.00031211135887590074, + "loss": 1.4985, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.755167543888092, + "learning_rate": 0.0003100755623211205, + "loss": 1.4973, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.5719517469406128, + "learning_rate": 0.0003080446953344735, + "loss": 1.5024, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.5742954015731812, + "learning_rate": 0.00030601878067281575, + "loss": 1.5024, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.794468104839325, + "learning_rate": 0.00030399784103751044, + "loss": 1.4967, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.7615025639533997, + "learning_rate": 0.000301981899074173, + "loss": 1.4983, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.6404043436050415, + "learning_rate": 0.0002999709773724171, + "loss": 1.4985, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.8927266597747803, + "learning_rate": 0.00029796509846560294, + "loss": 1.4916, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.9995803833007812, + "learning_rate": 0.0002959642848305828, + "loss": 1.4986, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.7388569712638855, + "learning_rate": 0.00029396855888745045, + "loss": 1.5051, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.7992568016052246, + "learning_rate": 0.0002919779429992895, + "loss": 1.5049, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.7185298204421997, + "learning_rate": 0.0002899924594719231, + "loss": 1.5007, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.5857946872711182, + "learning_rate": 0.00028801213055366335, + "loss": 1.504, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.6569031476974487, + "learning_rate": 0.00028603697843506315, + "loss": 1.4987, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.6807733774185181, + "learning_rate": 0.0002840670252486662, + "loss": 1.5048, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.681591272354126, + "learning_rate": 0.00028210229306876, + "loss": 1.4909, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 1.0114452838897705, + "learning_rate": 0.0002801428039111279, + "loss": 1.4952, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.6833702325820923, + "learning_rate": 0.00027818857973280274, + "loss": 1.4972, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.831628680229187, + "learning_rate": 0.0002762396424318206, + "loss": 1.4937, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.6646669507026672, + "learning_rate": 0.00027429601384697526, + "loss": 1.4863, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.5678536295890808, + "learning_rate": 0.00027235771575757466, + "loss": 1.4967, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 1.0276697874069214, + "learning_rate": 0.0002704247698831951, + "loss": 1.4929, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.6257740259170532, + "learning_rate": 0.0002684971978834389, + "loss": 1.4898, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.7391855120658875, + "learning_rate": 0.0002665750213576914, + "loss": 1.4971, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.8270142078399658, + "learning_rate": 0.0002646582618448794, + "loss": 1.4875, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 1.0389912128448486, + "learning_rate": 0.00026274694082322896, + "loss": 1.4906, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.7089370489120483, + "learning_rate": 0.0002608410797100255, + "loss": 1.5135, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.621171236038208, + "learning_rate": 0.0002589406998613733, + "loss": 1.4921, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.7269111275672913, + "learning_rate": 0.0002570458225719567, + "loss": 1.4999, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.7223342657089233, + "learning_rate": 0.00025515646907480074, + "loss": 1.5005, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.7558168172836304, + "learning_rate": 0.00025327266054103395, + "loss": 1.4979, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.5971283912658691, + "learning_rate": 0.0002513944180796509, + "loss": 1.4879, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.556942880153656, + "learning_rate": 0.0002495217627372752, + "loss": 1.4854, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.5493460297584534, + "learning_rate": 0.0002476547154979248, + "loss": 1.4885, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.7682004570960999, + "learning_rate": 0.00024579329728277534, + "loss": 1.4895, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.5973787307739258, + "learning_rate": 0.00024393752894992708, + "loss": 1.5016, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.691828727722168, + "learning_rate": 0.00024208743129417004, + "loss": 1.4708, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.8582556247711182, + "learning_rate": 0.00024024302504675206, + "loss": 1.4872, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.7478125095367432, + "learning_rate": 0.0002384043308751454, + "loss": 1.4881, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.7091748714447021, + "learning_rate": 0.00023657136938281653, + "loss": 1.4972, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.6594682931900024, + "learning_rate": 0.00023474416110899377, + "loss": 1.4825, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.5818279981613159, + "learning_rate": 0.00023292272652843807, + "loss": 1.4893, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.5838139057159424, + "learning_rate": 0.00023110708605121317, + "loss": 1.4967, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.6163455247879028, + "learning_rate": 0.00022929726002245728, + "loss": 1.4905, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.5999016761779785, + "learning_rate": 0.00022749326872215472, + "loss": 1.4914, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.5929955840110779, + "learning_rate": 0.0002256951323649087, + "loss": 1.4852, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.8933738470077515, + "learning_rate": 0.00022390287109971547, + "loss": 1.4967, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.6281285881996155, + "learning_rate": 0.00022211650500973746, + "loss": 1.494, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.5670304894447327, + "learning_rate": 0.0002203360541120789, + "loss": 1.4991, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.6504455804824829, + "learning_rate": 0.00021856153835756164, + "loss": 1.4813, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.7502389550209045, + "learning_rate": 0.00021679297763050104, + "loss": 1.4769, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.8267398476600647, + "learning_rate": 0.0002150303917484834, + "loss": 1.4906, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.9848253726959229, + "learning_rate": 0.0002132738004621446, + "loss": 1.4983, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.8194891214370728, + "learning_rate": 0.00021152322345494763, + "loss": 1.4759, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.7674716114997864, + "learning_rate": 0.00020977868034296253, + "loss": 1.4779, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.7068671584129333, + "learning_rate": 0.00020804019067464667, + "loss": 1.4838, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.6020982265472412, + "learning_rate": 0.00020630777393062575, + "loss": 1.4865, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.5899586081504822, + "learning_rate": 0.00020458144952347523, + "loss": 1.4999, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.6057927012443542, + "learning_rate": 0.00020286123679750314, + "loss": 1.4955, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.5882257223129272, + "learning_rate": 0.00020114715502853292, + "loss": 1.4922, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.8482392430305481, + "learning_rate": 0.0001994392234236878, + "loss": 1.4854, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.6891030073165894, + "learning_rate": 0.0001977374611211754, + "loss": 1.4858, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.6707295775413513, + "learning_rate": 0.00019604188719007313, + "loss": 1.4842, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.6552391648292542, + "learning_rate": 0.00019435252063011504, + "loss": 1.4856, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.6179306507110596, + "learning_rate": 0.0001926693803714779, + "loss": 1.4955, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.7294806838035583, + "learning_rate": 0.00019099248527457068, + "loss": 1.5031, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.5443121194839478, + "learning_rate": 0.0001893218541298216, + "loss": 1.4794, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.7363503575325012, + "learning_rate": 0.00018765750565746827, + "loss": 1.4778, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.7273024320602417, + "learning_rate": 0.00018599945850734812, + "loss": 1.4842, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.6860677599906921, + "learning_rate": 0.00018434773125868895, + "loss": 1.4797, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.7369831800460815, + "learning_rate": 0.00018270234241990108, + "loss": 1.4745, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.7779724597930908, + "learning_rate": 0.0001810633104283698, + "loss": 1.4854, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.6062511205673218, + "learning_rate": 0.0001794306536502492, + "loss": 1.4862, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.620524525642395, + "learning_rate": 0.0001778043903802555, + "loss": 1.4762, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.7244411110877991, + "learning_rate": 0.0001761845388414627, + "loss": 1.4828, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.5704057216644287, + "learning_rate": 0.00017457111718509831, + "loss": 1.4917, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.5905210971832275, + "learning_rate": 0.00017296414349033976, + "loss": 1.471, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.6144134998321533, + "learning_rate": 0.00017136363576411172, + "loss": 1.4757, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.7873600721359253, + "learning_rate": 0.00016976961194088526, + "loss": 1.4649, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.6452359557151794, + "learning_rate": 0.00016818208988247533, + "loss": 1.4689, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.6352021098136902, + "learning_rate": 0.0001666010873778419, + "loss": 1.4728, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.5584001541137695, + "learning_rate": 0.00016502662214289, + "loss": 1.4818, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.7875795364379883, + "learning_rate": 0.00016345871182027124, + "loss": 1.4713, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.767079770565033, + "learning_rate": 0.00016189737397918653, + "loss": 1.4777, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.5551626086235046, + "learning_rate": 0.0001603426261151884, + "loss": 1.4871, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.5894846320152283, + "learning_rate": 0.00015879448564998648, + "loss": 1.4797, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.8564838767051697, + "learning_rate": 0.0001572529699312501, + "loss": 1.4879, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.7947567701339722, + "learning_rate": 0.0001557180962324158, + "loss": 1.47, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.652262270450592, + "learning_rate": 0.00015418988175249282, + "loss": 1.4729, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.627450704574585, + "learning_rate": 0.00015266834361587063, + "loss": 1.471, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.6242784261703491, + "learning_rate": 0.00015115349887212678, + "loss": 1.4659, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5873207449913025, + "learning_rate": 0.00014964536449583657, + "loss": 1.4707, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.6726136803627014, + "learning_rate": 0.00014814395738638195, + "loss": 1.4816, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.9509627223014832, + "learning_rate": 0.00014664929436776278, + "loss": 1.4782, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.7091712951660156, + "learning_rate": 0.00014516139218840788, + "loss": 1.4713, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.635170578956604, + "learning_rate": 0.00014368026752098782, + "loss": 1.471, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.5847497582435608, + "learning_rate": 0.00014220593696222768, + "loss": 1.4868, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.5726175904273987, + "learning_rate": 0.00014073841703272092, + "loss": 1.4582, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.6278169751167297, + "learning_rate": 0.00013927772417674558, + "loss": 1.4816, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.7151062488555908, + "learning_rate": 0.00013782387476207788, + "loss": 1.4733, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.626961886882782, + "learning_rate": 0.00013637688507981064, + "loss": 1.4898, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.5914263725280762, + "learning_rate": 0.0001349367713441697, + "loss": 1.4671, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.6298964619636536, + "learning_rate": 0.0001335035496923326, + "loss": 1.4743, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.6886650919914246, + "learning_rate": 0.0001320772361842478, + "loss": 1.4864, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.6819822788238525, + "learning_rate": 0.00013065784680245442, + "loss": 1.4635, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.7044243216514587, + "learning_rate": 0.00012924539745190402, + "loss": 1.4729, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.6611077189445496, + "learning_rate": 0.0001278399039597809, + "loss": 1.4689, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.6910321712493896, + "learning_rate": 0.0001264413820753261, + "loss": 1.4733, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.6490603089332581, + "learning_rate": 0.00012504984746966003, + "loss": 1.4777, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.6089397072792053, + "learning_rate": 0.00012366531573560754, + "loss": 1.4765, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.7661972045898438, + "learning_rate": 0.00012228780238752264, + "loss": 1.4631, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.6821120381355286, + "learning_rate": 0.00012091732286111514, + "loss": 1.4656, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.5958478450775146, + "learning_rate": 0.00011955389251327737, + "loss": 1.4735, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.7508857250213623, + "learning_rate": 0.00011819752662191197, + "loss": 1.4661, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.6203247904777527, + "learning_rate": 0.00011684824038576115, + "loss": 1.4848, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.6274954080581665, + "learning_rate": 0.00011550604892423593, + "loss": 1.4676, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.654800534248352, + "learning_rate": 0.0001141709672772471, + "loss": 1.4712, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.6142522096633911, + "learning_rate": 0.00011284301040503625, + "loss": 1.4787, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.6720873713493347, + "learning_rate": 0.0001115221931880088, + "loss": 1.4687, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.5677502155303955, + "learning_rate": 0.00011020853042656648, + "loss": 1.4565, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.5971149802207947, + "learning_rate": 0.000108902036840942, + "loss": 1.464, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.5706846714019775, + "learning_rate": 0.00010760272707103389, + "loss": 1.451, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5851002931594849, + "learning_rate": 0.00010631061567624259, + "loss": 1.4646, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.8277323842048645, + "learning_rate": 0.00010502571713530706, + "loss": 1.4846, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.5705551505088806, + "learning_rate": 0.00010374804584614308, + "loss": 1.4829, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.7543693780899048, + "learning_rate": 0.00010247761612568129, + "loss": 1.4614, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.9401488900184631, + "learning_rate": 0.0001012144422097069, + "loss": 1.4735, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.6426950693130493, + "learning_rate": 9.995853825270052e-05, + "loss": 1.4706, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.6009029150009155, + "learning_rate": 9.870991832767919e-05, + "loss": 1.4777, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.6342074871063232, + "learning_rate": 9.746859642603884e-05, + "loss": 1.4725, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.5594701170921326, + "learning_rate": 9.623458645739755e-05, + "loss": 1.4748, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.5254621505737305, + "learning_rate": 9.50079022494395e-05, + "loss": 1.4601, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.6053014993667603, + "learning_rate": 9.378855754776028e-05, + "loss": 1.4606, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.6164686679840088, + "learning_rate": 9.257656601571266e-05, + "loss": 1.4595, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.6706066131591797, + "learning_rate": 9.137194123425349e-05, + "loss": 1.4725, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.5835105776786804, + "learning_rate": 9.017469670179168e-05, + "loss": 1.466, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.5491660237312317, + "learning_rate": 8.898484583403668e-05, + "loss": 1.4725, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.5983719229698181, + "learning_rate": 8.780240196384873e-05, + "loss": 1.4657, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.8344730734825134, + "learning_rate": 8.662737834108861e-05, + "loss": 1.4644, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.6508998870849609, + "learning_rate": 8.545978813246987e-05, + "loss": 1.4767, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.6542248725891113, + "learning_rate": 8.429964442141072e-05, + "loss": 1.4579, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.6182092428207397, + "learning_rate": 8.314696020788806e-05, + "loss": 1.4656, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.5681187510490417, + "learning_rate": 8.200174840829136e-05, + "loss": 1.471, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.7938519716262817, + "learning_rate": 8.08640218552778e-05, + "loss": 1.4734, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.5567902326583862, + "learning_rate": 7.973379329762925e-05, + "loss": 1.4627, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.625292181968689, + "learning_rate": 7.861107540010845e-05, + "loss": 1.4525, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.6169321537017822, + "learning_rate": 7.749588074331762e-05, + "loss": 1.4739, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.5991222858428955, + "learning_rate": 7.63882218235575e-05, + "loss": 1.4613, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.5360255241394043, + "learning_rate": 7.528811105268699e-05, + "loss": 1.4649, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.5714207291603088, + "learning_rate": 7.41955607579845e-05, + "loss": 1.4615, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.5800121426582336, + "learning_rate": 7.311058318200969e-05, + "loss": 1.4577, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.5521470308303833, + "learning_rate": 7.203319048246599e-05, + "loss": 1.4696, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.620658278465271, + "learning_rate": 7.096339473206471e-05, + "loss": 1.458, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.649807870388031, + "learning_rate": 6.990120791838953e-05, + "loss": 1.4696, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.6516839861869812, + "learning_rate": 6.884664194376233e-05, + "loss": 1.4607, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.5932875871658325, + "learning_rate": 6.779970862510989e-05, + "loss": 1.4705, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.5524738430976868, + "learning_rate": 6.676041969383107e-05, + "loss": 1.464, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.6496051549911499, + "learning_rate": 6.572878679566605e-05, + "loss": 1.4721, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.7905246019363403, + "learning_rate": 6.470482149056509e-05, + "loss": 1.4697, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.5781804323196411, + "learning_rate": 6.368853525255942e-05, + "loss": 1.4638, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.5914996862411499, + "learning_rate": 6.267993946963249e-05, + "loss": 1.4795, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.6025715470314026, + "learning_rate": 6.167904544359265e-05, + "loss": 1.4707, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.7128942012786865, + "learning_rate": 6.068586438994617e-05, + "loss": 1.4643, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.6007360816001892, + "learning_rate": 5.970040743777161e-05, + "loss": 1.4514, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.5647073984146118, + "learning_rate": 5.8722685629595454e-05, + "loss": 1.4513, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.5333129167556763, + "learning_rate": 5.7752709921267855e-05, + "loss": 1.4753, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.6161176562309265, + "learning_rate": 5.6790491181840294e-05, + "loss": 1.4543, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.630627453327179, + "learning_rate": 5.583604019344354e-05, + "loss": 1.4698, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.5502766370773315, + "learning_rate": 5.4889367651167007e-05, + "loss": 1.468, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.6914411187171936, + "learning_rate": 5.3950484162938714e-05, + "loss": 1.4532, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.6065025925636292, + "learning_rate": 5.3019400249406686e-05, + "loss": 1.4564, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.6388503909111023, + "learning_rate": 5.209612634382077e-05, + "loss": 1.4543, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.5875827670097351, + "learning_rate": 5.118067279191599e-05, + "loss": 1.4609, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.6648836135864258, + "learning_rate": 5.0273049851796205e-05, + "loss": 1.4678, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.5877416729927063, + "learning_rate": 4.9373267693819805e-05, + "loss": 1.46, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.6295384764671326, + "learning_rate": 4.848133640048513e-05, + "loss": 1.4588, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.748939037322998, + "learning_rate": 4.75972659663178e-05, + "loss": 1.48, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.5693717002868652, + "learning_rate": 4.672106629775882e-05, + "loss": 1.4633, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.5761023759841919, + "learning_rate": 4.585274721305333e-05, + "loss": 1.4618, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.5543421506881714, + "learning_rate": 4.4992318442140575e-05, + "loss": 1.4556, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.5633760094642639, + "learning_rate": 4.413978962654508e-05, + "loss": 1.465, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.6168580651283264, + "learning_rate": 4.3295170319268554e-05, + "loss": 1.4608, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.5306214094161987, + "learning_rate": 4.245846998468261e-05, + "loss": 1.461, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.5883339643478394, + "learning_rate": 4.16296979984232e-05, + "loss": 1.4554, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.5548875331878662, + "learning_rate": 4.080886364728506e-05, + "loss": 1.4619, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.5476236343383789, + "learning_rate": 3.999597612911793e-05, + "loss": 1.4498, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.5550110936164856, + "learning_rate": 3.9191044552723345e-05, + "loss": 1.4603, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.5657105445861816, + "learning_rate": 3.839407793775268e-05, + "loss": 1.4575, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.5904182195663452, + "learning_rate": 3.760508521460584e-05, + "loss": 1.472, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.7188125848770142, + "learning_rate": 3.682407522433173e-05, + "loss": 1.4701, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.5683848857879639, + "learning_rate": 3.605105671852854e-05, + "loss": 1.4591, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.5996320843696594, + "learning_rate": 3.528603835924626e-05, + "loss": 1.4493, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.640957236289978, + "learning_rate": 3.4529028718888935e-05, + "loss": 1.4699, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.5540004968643188, + "learning_rate": 3.378003628011938e-05, + "loss": 1.4623, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.6028122305870056, + "learning_rate": 3.303906943576346e-05, + "loss": 1.4657, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.6463452577590942, + "learning_rate": 3.230613648871661e-05, + "loss": 1.4576, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.5300910472869873, + "learning_rate": 3.158124565185022e-05, + "loss": 1.459, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.6761503219604492, + "learning_rate": 3.086440504792026e-05, + "loss": 1.4554, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.6475768685340881, + "learning_rate": 3.015562270947553e-05, + "loss": 1.4641, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.6882293820381165, + "learning_rate": 2.945490657876837e-05, + "loss": 1.4491, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.5752027630805969, + "learning_rate": 2.8762264507665113e-05, + "loss": 1.4531, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.576434850692749, + "learning_rate": 2.807770425755829e-05, + "loss": 1.4545, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.6252720952033997, + "learning_rate": 2.7401233499279866e-05, + "loss": 1.457, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.5452733635902405, + "learning_rate": 2.6732859813014987e-05, + "loss": 1.4683, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.5618522763252258, + "learning_rate": 2.607259068821721e-05, + "loss": 1.4647, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.53113853931427, + "learning_rate": 2.5420433523524493e-05, + "loss": 1.461, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.5819805264472961, + "learning_rate": 2.4776395626676162e-05, + "loss": 1.458, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.6076368093490601, + "learning_rate": 2.414048421443141e-05, + "loss": 1.453, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.529052197933197, + "learning_rate": 2.3512706412488012e-05, + "loss": 1.4648, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.547086775302887, + "learning_rate": 2.2893069255402993e-05, + "loss": 1.4546, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.563378095626831, + "learning_rate": 2.2281579686513176e-05, + "loss": 1.4523, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.5878322720527649, + "learning_rate": 2.1678244557857663e-05, + "loss": 1.4391, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.603276252746582, + "learning_rate": 2.1083070630101232e-05, + "loss": 1.4507, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.5168069005012512, + "learning_rate": 2.0496064572458395e-05, + "loss": 1.461, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.5414597392082214, + "learning_rate": 1.991723296261863e-05, + "loss": 1.4517, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.5947030782699585, + "learning_rate": 1.9346582286672686e-05, + "loss": 1.4529, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.5995769500732422, + "learning_rate": 1.878411893904014e-05, + "loss": 1.4692, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.5068775415420532, + "learning_rate": 1.822984922239737e-05, + "loss": 1.4558, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.6213989853858948, + "learning_rate": 1.7683779347607286e-05, + "loss": 1.4709, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.5615682005882263, + "learning_rate": 1.714591543364938e-05, + "loss": 1.4567, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.5593920946121216, + "learning_rate": 1.6616263507551437e-05, + "loss": 1.4558, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5650639533996582, + "learning_rate": 1.609482950432195e-05, + "loss": 1.4536, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.5335788726806641, + "learning_rate": 1.5581619266883563e-05, + "loss": 1.4578, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.6423068642616272, + "learning_rate": 1.5076638546007548e-05, + "loss": 1.4633, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.5491659045219421, + "learning_rate": 1.457989300024945e-05, + "loss": 1.4612, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.5565329194068909, + "learning_rate": 1.4091388195885625e-05, + "loss": 1.4506, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.6494315266609192, + "learning_rate": 1.3611129606851041e-05, + "loss": 1.4509, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.5353463292121887, + "learning_rate": 1.313912261467759e-05, + "loss": 1.4568, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.5631817579269409, + "learning_rate": 1.267537250843412e-05, + "loss": 1.4549, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.587587296962738, + "learning_rate": 1.2219884484667071e-05, + "loss": 1.4568, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.5251736044883728, + "learning_rate": 1.1772663647341947e-05, + "loss": 1.4563, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.5785340070724487, + "learning_rate": 1.1333715007786932e-05, + "loss": 1.4617, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5118443369865417, + "learning_rate": 1.0903043484635694e-05, + "loss": 1.459, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.6425765752792358, + "learning_rate": 1.0480653903772924e-05, + "loss": 1.4539, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.5221004486083984, + "learning_rate": 1.0066550998280132e-05, + "loss": 1.4606, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.561033308506012, + "learning_rate": 9.660739408382608e-06, + "loss": 1.4558, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.5691772103309631, + "learning_rate": 9.26322368139737e-06, + "loss": 1.4513, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.5327644348144531, + "learning_rate": 8.874008271682222e-06, + "loss": 1.4517, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.5187295079231262, + "learning_rate": 8.493097540585775e-06, + "loss": 1.4727, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.5272732973098755, + "learning_rate": 8.120495756399005e-06, + "loss": 1.4465, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.5403730273246765, + "learning_rate": 7.756207094306605e-06, + "loss": 1.4593, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.5256035923957825, + "learning_rate": 7.400235636340957e-06, + "loss": 1.4592, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.5263029336929321, + "learning_rate": 7.0525853713362395e-06, + "loss": 1.4614, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.5421921610832214, + "learning_rate": 6.71326019488322e-06, + "loss": 1.4605, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.522507905960083, + "learning_rate": 6.3822639092862846e-06, + "loss": 1.464, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.5461652278900146, + "learning_rate": 6.059600223520478e-06, + "loss": 1.4402, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.5702023506164551, + "learning_rate": 5.745272753189784e-06, + "loss": 1.4528, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.5699732303619385, + "learning_rate": 5.439285020487156e-06, + "loss": 1.4626, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.542851984500885, + "learning_rate": 5.141640454154467e-06, + "loss": 1.4479, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.5577608942985535, + "learning_rate": 4.852342389444458e-06, + "loss": 1.4681, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.5622571110725403, + "learning_rate": 4.571394068083185e-06, + "loss": 1.4548, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.5297626852989197, + "learning_rate": 4.298798638233709e-06, + "loss": 1.4635, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.5667217969894409, + "learning_rate": 4.034559154461049e-06, + "loss": 1.4597, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.5147329568862915, + "learning_rate": 3.7786785776976198e-06, + "loss": 1.4536, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.6363189816474915, + "learning_rate": 3.5311597752100964e-06, + "loss": 1.452, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.5682249069213867, + "learning_rate": 3.2920055205676867e-06, + "loss": 1.4541, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.5686089396476746, + "learning_rate": 3.06121849361049e-06, + "loss": 1.4574, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.5313770771026611, + "learning_rate": 2.838801280419856e-06, + "loss": 1.4547, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.5421391725540161, + "learning_rate": 2.624756373289322e-06, + "loss": 1.4448, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.5371723771095276, + "learning_rate": 2.419086170696472e-06, + "loss": 1.4393, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.5149198770523071, + "learning_rate": 2.2217929772764545e-06, + "loss": 1.453, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.5437723994255066, + "learning_rate": 2.0328790037957568e-06, + "loss": 1.4476, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.5555050373077393, + "learning_rate": 1.8523463671278052e-06, + "loss": 1.4595, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.5433332324028015, + "learning_rate": 1.6801970902288188e-06, + "loss": 1.4471, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.5250236988067627, + "learning_rate": 1.5164331021155774e-06, + "loss": 1.4552, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.6419638395309448, + "learning_rate": 1.3610562378435221e-06, + "loss": 1.4561, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.5724244713783264, + "learning_rate": 1.2140682384862712e-06, + "loss": 1.4545, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.5455237627029419, + "learning_rate": 1.0754707511161365e-06, + "loss": 1.4479, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.5163520574569702, + "learning_rate": 9.452653287856383e-07, + "loss": 1.4612, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.5218765735626221, + "learning_rate": 8.234534305101015e-07, + "loss": 1.4582, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.5098288059234619, + "learning_rate": 7.100364212513367e-07, + "loss": 1.4714, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.5313687324523926, + "learning_rate": 6.050155719023176e-07, + "loss": 1.4645, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.553096354007721, + "learning_rate": 5.08392059272944e-07, + "loss": 1.4581, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.5735213160514832, + "learning_rate": 4.2016696607680147e-07, + "loss": 1.4472, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.5305885076522827, + "learning_rate": 3.4034128091917085e-07, + "loss": 1.4491, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.5038774013519287, + "learning_rate": 2.689158982859541e-07, + "loss": 1.463, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.5241613388061523, + "learning_rate": 2.05891618533266e-07, + "loss": 1.4385, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.5079508423805237, + "learning_rate": 1.5126914787894074e-07, + "loss": 1.4589, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.5533608794212341, + "learning_rate": 1.0504909839462173e-07, + "loss": 1.4576, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.5440090894699097, + "learning_rate": 6.723198799826746e-08, + "loss": 1.4539, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.5556203126907349, + "learning_rate": 3.781824044932214e-08, + "loss": 1.463, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.5242736339569092, + "learning_rate": 1.6808185342970238e-08, + "loss": 1.4478, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.5323259830474854, + "learning_rate": 4.202058107305451e-09, + "loss": 1.4591, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.7408448457717896, + "learning_rate": 0.0, + "loss": 1.4497, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.837914036668352e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-gptj-cosine/checkpoint-9480/training_args.bin b/saves-gptj-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..62ea34567b649c6aa8742f348a4d509b48720d16 --- /dev/null +++ b/saves-gptj-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:676493b4ada2e0167b183b52df9b03152dc3382a502eb7a94eb01f023a6ed3a6 +size 5176 diff --git a/saves-gptj-cosine/config.json b/saves-gptj-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f91579c3f4d0a727ebdfcdd5a519a318b31cd88d --- /dev/null +++ b/saves-gptj-cosine/config.json @@ -0,0 +1,28 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPTJForCausalLM" + ], + "attn_pdrop": 0.0, + "bos_token_id": 50256, + "embd_pdrop": 0.0, + "eos_token_id": 50256, + "hidden_act": "gelu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "gptj", + "n_embd": 256, + "n_head": 4, + "n_inner": null, + "n_layer": 2, + "n_positions": 2048, + "num_key_value_heads": 4, + "resid_pdrop": 0.0, + "rotary_dim": 64, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gptj-cosine/generation_config.json b/saves-gptj-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-gptj-cosine/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-gptj-cosine/model.safetensors b/saves-gptj-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..080b88ec77db8c51c6c4bc847ced96e5a517b74c --- /dev/null +++ b/saves-gptj-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44f67e0a76e729d1041fbcc1698900a956363b1d4502c4352362c8e8f0612dc5 +size 8366216 diff --git a/saves-gptj-cosine/result.log b/saves-gptj-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..c3f310c594b8b9b8fbfb702eface9063f406cfa9 --- /dev/null +++ b/saves-gptj-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 3097.3706, 'train_samples_per_second': 3133.821, 'train_steps_per_second': 3.061, 'train_loss': 1.6975962274688206, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-gptj-cosine/special_tokens_map.json b/saves-gptj-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gptj-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gptj-cosine/tokenizer.json b/saves-gptj-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gptj-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gptj-cosine/tokenizer_config.json b/saves-gptj-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gptj-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gptj/checkpoint-9480/config.json b/saves-gptj/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f91579c3f4d0a727ebdfcdd5a519a318b31cd88d --- /dev/null +++ b/saves-gptj/checkpoint-9480/config.json @@ -0,0 +1,28 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPTJForCausalLM" + ], + "attn_pdrop": 0.0, + "bos_token_id": 50256, + "embd_pdrop": 0.0, + "eos_token_id": 50256, + "hidden_act": "gelu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "gptj", + "n_embd": 256, + "n_head": 4, + "n_inner": null, + "n_layer": 2, + "n_positions": 2048, + "num_key_value_heads": 4, + "resid_pdrop": 0.0, + "rotary_dim": 64, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gptj/checkpoint-9480/generation_config.json b/saves-gptj/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-gptj/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-gptj/checkpoint-9480/model.safetensors b/saves-gptj/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f11246919536c047b60f76c32640258930f59c8 --- /dev/null +++ b/saves-gptj/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff804f70751681743f7d085e3c31b02d74e01a49724b04666d3852a348d8247 +size 8366216 diff --git a/saves-gptj/checkpoint-9480/optimizer.pt b/saves-gptj/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0c112cabd7c14deb3fa081d51159d9e4f530a68 --- /dev/null +++ b/saves-gptj/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7327d0a61601c5aacf3b3da15cc4da8b6070331a97c332a28b6d4ae491b1e347 +size 16748182 diff --git a/saves-gptj/checkpoint-9480/rng_state.pth b/saves-gptj/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-gptj/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-gptj/checkpoint-9480/scheduler.pt b/saves-gptj/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-gptj/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-gptj/checkpoint-9480/special_tokens_map.json b/saves-gptj/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gptj/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gptj/checkpoint-9480/tokenizer.json b/saves-gptj/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gptj/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gptj/checkpoint-9480/tokenizer_config.json b/saves-gptj/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gptj/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-gptj/checkpoint-9480/trainer_state.json b/saves-gptj/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d5d2b0926c00050013663a5773fdbfdf1fc5d167 --- /dev/null +++ b/saves-gptj/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.222225546836853, + "learning_rate": 0.00015822784810126583, + "loss": 7.4619, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.078452467918396, + "learning_rate": 0.00031645569620253165, + "loss": 6.7525, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8753913044929504, + "learning_rate": 0.00047468354430379745, + "loss": 6.1156, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.6476579308509827, + "learning_rate": 0.0006329113924050633, + "loss": 5.5192, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.41553089022636414, + "learning_rate": 0.0007911392405063291, + "loss": 5.002, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.5763905644416809, + "learning_rate": 0.0009493670886075949, + "loss": 4.5686, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.525600790977478, + "learning_rate": 0.0011075949367088608, + "loss": 4.2742, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.5770911574363708, + "learning_rate": 0.0012658227848101266, + "loss": 4.0828, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.6319863796234131, + "learning_rate": 0.0014240506329113926, + "loss": 3.9172, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.46108534932136536, + "learning_rate": 0.0015, + "loss": 3.8094, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.5290006399154663, + "learning_rate": 0.0015, + "loss": 3.6767, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.7682923674583435, + "learning_rate": 0.0015, + "loss": 3.6004, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.39833033084869385, + "learning_rate": 0.0015, + "loss": 3.5234, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.7431315183639526, + "learning_rate": 0.0015, + "loss": 3.4386, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.5607738494873047, + "learning_rate": 0.0015, + "loss": 3.3826, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.8131740689277649, + "learning_rate": 0.0015, + "loss": 3.3305, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.6065526008605957, + "learning_rate": 0.0015, + "loss": 3.2766, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.4178932309150696, + "learning_rate": 0.0015, + "loss": 3.2339, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.4412979483604431, + "learning_rate": 0.0015, + "loss": 3.1703, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.8100572824478149, + "learning_rate": 0.0015, + "loss": 3.1417, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.4573080241680145, + "learning_rate": 0.0015, + "loss": 3.0965, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.4430824816226959, + "learning_rate": 0.0015, + "loss": 3.0528, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.5088762640953064, + "learning_rate": 0.0015, + "loss": 3.0013, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.638719916343689, + "learning_rate": 0.0015, + "loss": 2.9563, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.6850151419639587, + "learning_rate": 0.0015, + "loss": 2.9299, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.5137956142425537, + "learning_rate": 0.0015, + "loss": 2.8792, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.7760046720504761, + "learning_rate": 0.0015, + "loss": 2.8417, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.5475852489471436, + "learning_rate": 0.0015, + "loss": 2.8031, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.6375889182090759, + "learning_rate": 0.0015, + "loss": 2.7701, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.5618500113487244, + "learning_rate": 0.0015, + "loss": 2.7413, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.5540605783462524, + "learning_rate": 0.0015, + "loss": 2.7045, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.7945109605789185, + "learning_rate": 0.0015, + "loss": 2.678, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.6178033351898193, + "learning_rate": 0.0015, + "loss": 2.6611, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.5869758129119873, + "learning_rate": 0.0015, + "loss": 2.626, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.5268247127532959, + "learning_rate": 0.0015, + "loss": 2.577, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.5654016733169556, + "learning_rate": 0.0015, + "loss": 2.5561, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.5816453695297241, + "learning_rate": 0.0015, + "loss": 2.5365, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.9008394479751587, + "learning_rate": 0.0015, + "loss": 2.5278, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.6613947749137878, + "learning_rate": 0.0015, + "loss": 2.4967, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.6217777132987976, + "learning_rate": 0.0015, + "loss": 2.4652, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.724476158618927, + "learning_rate": 0.0015, + "loss": 2.4519, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.7924133539199829, + "learning_rate": 0.0015, + "loss": 2.431, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.5326886773109436, + "learning_rate": 0.0015, + "loss": 2.4088, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.7292129397392273, + "learning_rate": 0.0015, + "loss": 2.3872, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.5934910774230957, + "learning_rate": 0.0015, + "loss": 2.3826, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.5619164109230042, + "learning_rate": 0.0015, + "loss": 2.3622, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.5665011405944824, + "learning_rate": 0.0015, + "loss": 2.3346, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.5501720309257507, + "learning_rate": 0.0015, + "loss": 2.3326, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.5323930382728577, + "learning_rate": 0.0015, + "loss": 2.3104, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.5599923133850098, + "learning_rate": 0.0015, + "loss": 2.3019, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.6306147575378418, + "learning_rate": 0.0015, + "loss": 2.2891, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.5090570449829102, + "learning_rate": 0.0015, + "loss": 2.2739, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.4715142548084259, + "learning_rate": 0.0015, + "loss": 2.2578, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.5620536804199219, + "learning_rate": 0.0015, + "loss": 2.2408, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.6516860127449036, + "learning_rate": 0.0015, + "loss": 2.2441, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.525547981262207, + "learning_rate": 0.0015, + "loss": 2.2082, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.5827058553695679, + "learning_rate": 0.0015, + "loss": 2.212, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.5643989443778992, + "learning_rate": 0.0015, + "loss": 2.2153, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.5231021046638489, + "learning_rate": 0.0015, + "loss": 2.1931, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.7255063056945801, + "learning_rate": 0.0015, + "loss": 2.1769, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.5210108160972595, + "learning_rate": 0.0015, + "loss": 2.1769, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.5545986294746399, + "learning_rate": 0.0015, + "loss": 2.1607, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.5367145538330078, + "learning_rate": 0.0015, + "loss": 2.148, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.8410078287124634, + "learning_rate": 0.0015, + "loss": 2.1626, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.4836041331291199, + "learning_rate": 0.0015, + "loss": 2.1497, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.6116440296173096, + "learning_rate": 0.0015, + "loss": 2.1405, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.6062175035476685, + "learning_rate": 0.0015, + "loss": 2.1219, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.4857017993927002, + "learning_rate": 0.0015, + "loss": 2.1154, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.5630396008491516, + "learning_rate": 0.0015, + "loss": 2.1264, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.5638059973716736, + "learning_rate": 0.0015, + "loss": 2.1056, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.49564117193222046, + "learning_rate": 0.0015, + "loss": 2.0996, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.4810255169868469, + "learning_rate": 0.0015, + "loss": 2.082, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.5104503035545349, + "learning_rate": 0.0015, + "loss": 2.0766, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.5939556956291199, + "learning_rate": 0.0015, + "loss": 2.0885, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.6907451152801514, + "learning_rate": 0.0015, + "loss": 2.0679, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.8348866105079651, + "learning_rate": 0.0015, + "loss": 2.068, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.5212741494178772, + "learning_rate": 0.0015, + "loss": 2.0602, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.5968546271324158, + "learning_rate": 0.0015, + "loss": 2.0551, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.559980034828186, + "learning_rate": 0.0015, + "loss": 2.0586, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.4857085049152374, + "learning_rate": 0.0015, + "loss": 2.0353, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.5006406903266907, + "learning_rate": 0.0015, + "loss": 2.0215, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.7630496621131897, + "learning_rate": 0.0015, + "loss": 2.0361, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.5243736505508423, + "learning_rate": 0.0015, + "loss": 2.0407, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.4956797659397125, + "learning_rate": 0.0015, + "loss": 2.0134, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.43798890709877014, + "learning_rate": 0.0015, + "loss": 2.0157, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.47858327627182007, + "learning_rate": 0.0015, + "loss": 2.0173, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.5558115839958191, + "learning_rate": 0.0015, + "loss": 2.0117, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.6888266205787659, + "learning_rate": 0.0015, + "loss": 2.006, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.5976872444152832, + "learning_rate": 0.0015, + "loss": 1.9997, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.47807228565216064, + "learning_rate": 0.0015, + "loss": 1.9948, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.549640953540802, + "learning_rate": 0.0015, + "loss": 2.0041, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.5190247893333435, + "learning_rate": 0.0015, + "loss": 1.9933, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.5374358296394348, + "learning_rate": 0.0015, + "loss": 1.9809, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.5424185395240784, + "learning_rate": 0.0015, + "loss": 1.9851, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.5747806429862976, + "learning_rate": 0.0015, + "loss": 1.9648, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.45154106616973877, + "learning_rate": 0.0015, + "loss": 1.9761, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.8179157376289368, + "learning_rate": 0.0015, + "loss": 1.9759, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.5067345499992371, + "learning_rate": 0.0015, + "loss": 1.9564, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.5212324857711792, + "learning_rate": 0.0015, + "loss": 1.9543, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.657710075378418, + "learning_rate": 0.0015, + "loss": 1.9674, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.6337433457374573, + "learning_rate": 0.0015, + "loss": 1.957, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.5864647030830383, + "learning_rate": 0.0015, + "loss": 1.9505, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.5108038187026978, + "learning_rate": 0.0015, + "loss": 1.9498, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.4738903343677521, + "learning_rate": 0.0015, + "loss": 1.9449, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.5570417642593384, + "learning_rate": 0.0015, + "loss": 1.9461, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.605408251285553, + "learning_rate": 0.0015, + "loss": 1.9365, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.5146418213844299, + "learning_rate": 0.0015, + "loss": 1.9258, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.5443252921104431, + "learning_rate": 0.0015, + "loss": 1.9284, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.8356622457504272, + "learning_rate": 0.0015, + "loss": 1.9377, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.5060002207756042, + "learning_rate": 0.0015, + "loss": 1.9261, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.45779773592948914, + "learning_rate": 0.0015, + "loss": 1.9253, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.4614758789539337, + "learning_rate": 0.0015, + "loss": 1.9205, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.566154420375824, + "learning_rate": 0.0015, + "loss": 1.9095, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.45777004957199097, + "learning_rate": 0.0015, + "loss": 1.9218, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.5856761336326599, + "learning_rate": 0.0015, + "loss": 1.9084, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.547659695148468, + "learning_rate": 0.0015, + "loss": 1.9155, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.4796094000339508, + "learning_rate": 0.0015, + "loss": 1.9071, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.5200700163841248, + "learning_rate": 0.0015, + "loss": 1.8932, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.4618190824985504, + "learning_rate": 0.0015, + "loss": 1.8963, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.6290208697319031, + "learning_rate": 0.0015, + "loss": 1.9068, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.6318977475166321, + "learning_rate": 0.0015, + "loss": 1.9001, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.5180197954177856, + "learning_rate": 0.0015, + "loss": 1.8863, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.5761007070541382, + "learning_rate": 0.0015, + "loss": 1.8794, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.6240835189819336, + "learning_rate": 0.0015, + "loss": 1.8966, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.4585850238800049, + "learning_rate": 0.0015, + "loss": 1.8963, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.6809152960777283, + "learning_rate": 0.0015, + "loss": 1.8831, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.48098719120025635, + "learning_rate": 0.0015, + "loss": 1.8813, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.6248587369918823, + "learning_rate": 0.0015, + "loss": 1.8953, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.660209059715271, + "learning_rate": 0.0015, + "loss": 1.8816, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.4646812081336975, + "learning_rate": 0.0015, + "loss": 1.8683, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.44351664185523987, + "learning_rate": 0.0015, + "loss": 1.8702, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.49303629994392395, + "learning_rate": 0.0015, + "loss": 1.8753, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.718999981880188, + "learning_rate": 0.0015, + "loss": 1.8648, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.46604716777801514, + "learning_rate": 0.0015, + "loss": 1.8673, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.5090628266334534, + "learning_rate": 0.0015, + "loss": 1.8715, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.4868958294391632, + "learning_rate": 0.0015, + "loss": 1.8636, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.5031871199607849, + "learning_rate": 0.0015, + "loss": 1.8602, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.4896354377269745, + "learning_rate": 0.0015, + "loss": 1.8541, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.4694559872150421, + "learning_rate": 0.0015, + "loss": 1.8679, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.49668270349502563, + "learning_rate": 0.0015, + "loss": 1.8455, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.47248223423957825, + "learning_rate": 0.0015, + "loss": 1.8469, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.6016485691070557, + "learning_rate": 0.0015, + "loss": 1.8547, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.5686023235321045, + "learning_rate": 0.0015, + "loss": 1.8495, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.5577753186225891, + "learning_rate": 0.0015, + "loss": 1.8448, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.5137126445770264, + "learning_rate": 0.0015, + "loss": 1.8495, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.4884479343891144, + "learning_rate": 0.0015, + "loss": 1.8389, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.7402251362800598, + "learning_rate": 0.0015, + "loss": 1.839, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.485662043094635, + "learning_rate": 0.0015, + "loss": 1.8431, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.4874778389930725, + "learning_rate": 0.0015, + "loss": 1.8405, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.6295904517173767, + "learning_rate": 0.0015, + "loss": 1.8418, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.7308560609817505, + "learning_rate": 0.0015, + "loss": 1.8422, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.5444386005401611, + "learning_rate": 0.0015, + "loss": 1.8444, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.4547692537307739, + "learning_rate": 0.0015, + "loss": 1.825, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.4396387040615082, + "learning_rate": 0.0015, + "loss": 1.8143, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.6077943444252014, + "learning_rate": 0.0015, + "loss": 1.8165, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.5430124402046204, + "learning_rate": 0.0015, + "loss": 1.8185, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.4452005922794342, + "learning_rate": 0.0015, + "loss": 1.813, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.5291849374771118, + "learning_rate": 0.0015, + "loss": 1.8236, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.7987478375434875, + "learning_rate": 0.0015, + "loss": 1.8194, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.5864682793617249, + "learning_rate": 0.0015, + "loss": 1.8245, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.49037304520606995, + "learning_rate": 0.0015, + "loss": 1.824, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.5214933753013611, + "learning_rate": 0.0015, + "loss": 1.7965, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.7572597861289978, + "learning_rate": 0.0015, + "loss": 1.81, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.532415509223938, + "learning_rate": 0.0015, + "loss": 1.8105, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.704556941986084, + "learning_rate": 0.0015, + "loss": 1.8027, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.45814046263694763, + "learning_rate": 0.0015, + "loss": 1.8019, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.6001232862472534, + "learning_rate": 0.0015, + "loss": 1.8087, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.8192107081413269, + "learning_rate": 0.0015, + "loss": 1.8013, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.6131729483604431, + "learning_rate": 0.0015, + "loss": 1.793, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.4663192927837372, + "learning_rate": 0.0015, + "loss": 1.8061, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.48156777024269104, + "learning_rate": 0.0015, + "loss": 1.7898, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.527096152305603, + "learning_rate": 0.0015, + "loss": 1.7891, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.64168381690979, + "learning_rate": 0.0015, + "loss": 1.7917, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.6163222193717957, + "learning_rate": 0.0015, + "loss": 1.7941, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.5239340662956238, + "learning_rate": 0.0015, + "loss": 1.8158, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.5176557898521423, + "learning_rate": 0.0015, + "loss": 1.7913, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.4712711572647095, + "learning_rate": 0.0015, + "loss": 1.7952, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.5567694902420044, + "learning_rate": 0.0015, + "loss": 1.795, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.45907673239707947, + "learning_rate": 0.0015, + "loss": 1.7965, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.4090765714645386, + "learning_rate": 0.0015, + "loss": 1.792, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.5466338992118835, + "learning_rate": 0.0015, + "loss": 1.7789, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.47220662236213684, + "learning_rate": 0.0015, + "loss": 1.7797, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.5684412121772766, + "learning_rate": 0.0015, + "loss": 1.7796, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.5197381973266602, + "learning_rate": 0.0015, + "loss": 1.7768, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.6911113262176514, + "learning_rate": 0.0015, + "loss": 1.7829, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.7781076431274414, + "learning_rate": 0.0015, + "loss": 1.7853, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.5504148602485657, + "learning_rate": 0.0015, + "loss": 1.7859, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.5567972660064697, + "learning_rate": 0.0015, + "loss": 1.7685, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.5026859045028687, + "learning_rate": 0.0015, + "loss": 1.7772, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.6695922017097473, + "learning_rate": 0.0015, + "loss": 1.7744, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.47478118538856506, + "learning_rate": 0.0015, + "loss": 1.7764, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.5399294495582581, + "learning_rate": 0.0015, + "loss": 1.7734, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.5199739336967468, + "learning_rate": 0.0015, + "loss": 1.7771, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.5336176156997681, + "learning_rate": 0.0015, + "loss": 1.766, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.4370203912258148, + "learning_rate": 0.0015, + "loss": 1.7653, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.5613576173782349, + "learning_rate": 0.0015, + "loss": 1.7676, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.4811178147792816, + "learning_rate": 0.0015, + "loss": 1.7779, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.6334314942359924, + "learning_rate": 0.0015, + "loss": 1.7592, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.5677840709686279, + "learning_rate": 0.0015, + "loss": 1.7546, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.5683848857879639, + "learning_rate": 0.0015, + "loss": 1.7596, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.732013463973999, + "learning_rate": 0.0015, + "loss": 1.765, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.5320744514465332, + "learning_rate": 0.0015, + "loss": 1.7731, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.49970418214797974, + "learning_rate": 0.0015, + "loss": 1.7602, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.5536065697669983, + "learning_rate": 0.0015, + "loss": 1.7592, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.5105161070823669, + "learning_rate": 0.0015, + "loss": 1.7591, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.4826969802379608, + "learning_rate": 0.0015, + "loss": 1.7541, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.5110848546028137, + "learning_rate": 0.0015, + "loss": 1.7511, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.6191832423210144, + "learning_rate": 0.0015, + "loss": 1.7573, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.46266549825668335, + "learning_rate": 0.0015, + "loss": 1.7491, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.5335398316383362, + "learning_rate": 0.0015, + "loss": 1.7547, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.5051414966583252, + "learning_rate": 0.0015, + "loss": 1.7459, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.49106815457344055, + "learning_rate": 0.0015, + "loss": 1.7455, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.4691365361213684, + "learning_rate": 0.0015, + "loss": 1.7473, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.570389986038208, + "learning_rate": 0.0015, + "loss": 1.7436, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.5152807831764221, + "learning_rate": 0.0015, + "loss": 1.749, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.6476304531097412, + "learning_rate": 0.0015, + "loss": 1.7626, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.4600910246372223, + "learning_rate": 0.0015, + "loss": 1.7479, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.48738375306129456, + "learning_rate": 0.0015, + "loss": 1.7288, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.5096136331558228, + "learning_rate": 0.0015, + "loss": 1.7467, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.5732322931289673, + "learning_rate": 0.0015, + "loss": 1.7456, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.45911917090415955, + "learning_rate": 0.0015, + "loss": 1.7345, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.5818554162979126, + "learning_rate": 0.0015, + "loss": 1.7462, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.4352804124355316, + "learning_rate": 0.0015, + "loss": 1.7405, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.5699293613433838, + "learning_rate": 0.0015, + "loss": 1.7334, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.935554563999176, + "learning_rate": 0.0015, + "loss": 1.7369, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.6398186683654785, + "learning_rate": 0.0015, + "loss": 1.7307, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.5753996968269348, + "learning_rate": 0.0015, + "loss": 1.7593, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.42552733421325684, + "learning_rate": 0.0015, + "loss": 1.7424, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.6425786018371582, + "learning_rate": 0.0015, + "loss": 1.7317, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.47474586963653564, + "learning_rate": 0.0015, + "loss": 1.7336, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.45456463098526, + "learning_rate": 0.0015, + "loss": 1.7334, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.5725328326225281, + "learning_rate": 0.0015, + "loss": 1.7374, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.646536111831665, + "learning_rate": 0.0015, + "loss": 1.7238, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.503918468952179, + "learning_rate": 0.0015, + "loss": 1.7212, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 1.0969829559326172, + "learning_rate": 0.0015, + "loss": 1.7215, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.5288516879081726, + "learning_rate": 0.0015, + "loss": 1.731, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.5329839587211609, + "learning_rate": 0.0015, + "loss": 1.7272, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.4656757414340973, + "learning_rate": 0.0015, + "loss": 1.7261, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.5369203090667725, + "learning_rate": 0.0015, + "loss": 1.7234, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.5271219611167908, + "learning_rate": 0.0015, + "loss": 1.7254, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.47681111097335815, + "learning_rate": 0.0015, + "loss": 1.7267, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.4791703224182129, + "learning_rate": 0.0015, + "loss": 1.7198, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.5583822131156921, + "learning_rate": 0.0015, + "loss": 1.7203, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.5442696213722229, + "learning_rate": 0.0015, + "loss": 1.729, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6266167163848877, + "learning_rate": 0.0015, + "loss": 1.7118, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.6559293866157532, + "learning_rate": 0.0015, + "loss": 1.7281, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.5037382245063782, + "learning_rate": 0.0015, + "loss": 1.7184, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.5620531439781189, + "learning_rate": 0.0015, + "loss": 1.7237, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.5025471448898315, + "learning_rate": 0.0015, + "loss": 1.711, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.5060129165649414, + "learning_rate": 0.0015, + "loss": 1.7155, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.4642243981361389, + "learning_rate": 0.0015, + "loss": 1.7151, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.633920431137085, + "learning_rate": 0.0015, + "loss": 1.7181, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.5765074491500854, + "learning_rate": 0.0015, + "loss": 1.7144, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.49057888984680176, + "learning_rate": 0.0015, + "loss": 1.7147, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.49242648482322693, + "learning_rate": 0.0015, + "loss": 1.7087, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.49905985593795776, + "learning_rate": 0.0015, + "loss": 1.7182, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.5166392922401428, + "learning_rate": 0.0015, + "loss": 1.7116, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.48449474573135376, + "learning_rate": 0.0015, + "loss": 1.7097, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6912030577659607, + "learning_rate": 0.0015, + "loss": 1.7167, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.6896213293075562, + "learning_rate": 0.0015, + "loss": 1.7173, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.6085569858551025, + "learning_rate": 0.0015, + "loss": 1.7164, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.5478935837745667, + "learning_rate": 0.0015, + "loss": 1.7141, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.5240187644958496, + "learning_rate": 0.0015, + "loss": 1.7013, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.6411234140396118, + "learning_rate": 0.0015, + "loss": 1.715, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.4808960556983948, + "learning_rate": 0.0015, + "loss": 1.7073, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.426119327545166, + "learning_rate": 0.0015, + "loss": 1.6979, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.5314222574234009, + "learning_rate": 0.0015, + "loss": 1.7003, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.5103647708892822, + "learning_rate": 0.0015, + "loss": 1.7071, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.5228768587112427, + "learning_rate": 0.0015, + "loss": 1.708, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.522731602191925, + "learning_rate": 0.0015, + "loss": 1.7, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.500950276851654, + "learning_rate": 0.0015, + "loss": 1.7051, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.46140557527542114, + "learning_rate": 0.0015, + "loss": 1.7049, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.48429882526397705, + "learning_rate": 0.0015, + "loss": 1.6868, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.8525009155273438, + "learning_rate": 0.0015, + "loss": 1.7002, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.6817622780799866, + "learning_rate": 0.0015, + "loss": 1.7025, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.7357791662216187, + "learning_rate": 0.0015, + "loss": 1.7126, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.5024354457855225, + "learning_rate": 0.0015, + "loss": 1.7089, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.4828796088695526, + "learning_rate": 0.0015, + "loss": 1.703, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.408743292093277, + "learning_rate": 0.0015, + "loss": 1.6981, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.6103696823120117, + "learning_rate": 0.0015, + "loss": 1.6997, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.789609968662262, + "learning_rate": 0.0015, + "loss": 1.7004, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.6100125908851624, + "learning_rate": 0.0015, + "loss": 1.7048, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.507382869720459, + "learning_rate": 0.0015, + "loss": 1.6755, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.487572580575943, + "learning_rate": 0.0015, + "loss": 1.6838, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.5198614597320557, + "learning_rate": 0.0015, + "loss": 1.691, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.596764862537384, + "learning_rate": 0.0015, + "loss": 1.6952, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.6617895364761353, + "learning_rate": 0.0015, + "loss": 1.6945, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.7584461569786072, + "learning_rate": 0.0015, + "loss": 1.6833, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.4396650791168213, + "learning_rate": 0.0015, + "loss": 1.686, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.5725198984146118, + "learning_rate": 0.0015, + "loss": 1.6896, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.7115629315376282, + "learning_rate": 0.0015, + "loss": 1.6923, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.5480477809906006, + "learning_rate": 0.0015, + "loss": 1.6852, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.5029016137123108, + "learning_rate": 0.0015, + "loss": 1.6858, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.6621780395507812, + "learning_rate": 0.0015, + "loss": 1.6891, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.592009425163269, + "learning_rate": 0.0015, + "loss": 1.6988, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.42894601821899414, + "learning_rate": 0.0015, + "loss": 1.6776, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.43958646059036255, + "learning_rate": 0.0015, + "loss": 1.6758, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.582295835018158, + "learning_rate": 0.0015, + "loss": 1.68, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.868670642375946, + "learning_rate": 0.0015, + "loss": 1.6991, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.5141469836235046, + "learning_rate": 0.0015, + "loss": 1.6742, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.9177303314208984, + "learning_rate": 0.0015, + "loss": 1.6869, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.45765310525894165, + "learning_rate": 0.0015, + "loss": 1.684, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6141239404678345, + "learning_rate": 0.0015, + "loss": 1.6747, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.48899802565574646, + "learning_rate": 0.0015, + "loss": 1.6808, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.5064447522163391, + "learning_rate": 0.0015, + "loss": 1.6759, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5975239872932434, + "learning_rate": 0.0015, + "loss": 1.6889, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5352042317390442, + "learning_rate": 0.0015, + "loss": 1.6787, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.6929758191108704, + "learning_rate": 0.0015, + "loss": 1.6692, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.6103318333625793, + "learning_rate": 0.0015, + "loss": 1.6839, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.6223170161247253, + "learning_rate": 0.0015, + "loss": 1.6738, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.77720707654953, + "learning_rate": 0.0015, + "loss": 1.6769, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.7838046550750732, + "learning_rate": 0.0015, + "loss": 1.6757, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6083099842071533, + "learning_rate": 0.0015, + "loss": 1.6894, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.6493582129478455, + "learning_rate": 0.0015, + "loss": 1.6687, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.6273305416107178, + "learning_rate": 0.0015, + "loss": 1.6777, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5375348925590515, + "learning_rate": 0.0015, + "loss": 1.6751, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.49990466237068176, + "learning_rate": 0.0015, + "loss": 1.6796, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.4574170410633087, + "learning_rate": 0.0015, + "loss": 1.6716, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.6537464261054993, + "learning_rate": 0.0015, + "loss": 1.6741, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.7017143368721008, + "learning_rate": 0.0015, + "loss": 1.6679, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.5553740859031677, + "learning_rate": 0.0015, + "loss": 1.6817, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.5856168866157532, + "learning_rate": 0.0015, + "loss": 1.6736, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.46680551767349243, + "learning_rate": 0.0015, + "loss": 1.665, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.5319445729255676, + "learning_rate": 0.0015, + "loss": 1.6446, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.550144374370575, + "learning_rate": 0.0015, + "loss": 1.6721, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5681722164154053, + "learning_rate": 0.0015, + "loss": 1.6638, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.4633236527442932, + "learning_rate": 0.0015, + "loss": 1.6795, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.7579367160797119, + "learning_rate": 0.0015, + "loss": 1.6799, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.558925986289978, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.5117262601852417, + "learning_rate": 0.0015, + "loss": 1.6714, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.4554060399532318, + "learning_rate": 0.0015, + "loss": 1.6698, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.49014028906822205, + "learning_rate": 0.0015, + "loss": 1.6762, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.46652093529701233, + "learning_rate": 0.0015, + "loss": 1.6656, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.8586543798446655, + "learning_rate": 0.0015, + "loss": 1.6703, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.7492462396621704, + "learning_rate": 0.0015, + "loss": 1.661, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.5858999490737915, + "learning_rate": 0.0015, + "loss": 1.6622, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.5616260766983032, + "learning_rate": 0.0015, + "loss": 1.6694, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 1.0986077785491943, + "learning_rate": 0.0015, + "loss": 1.6655, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.5568318367004395, + "learning_rate": 0.0015, + "loss": 1.6607, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.6300899386405945, + "learning_rate": 0.0015, + "loss": 1.6686, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.5752268433570862, + "learning_rate": 0.0015, + "loss": 1.6667, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.6281692981719971, + "learning_rate": 0.0015, + "loss": 1.6461, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.4507755637168884, + "learning_rate": 0.0015, + "loss": 1.663, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.5289875864982605, + "learning_rate": 0.0015, + "loss": 1.6583, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.5292497873306274, + "learning_rate": 0.0015, + "loss": 1.6592, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.5379101037979126, + "learning_rate": 0.0015, + "loss": 1.6506, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.5110471844673157, + "learning_rate": 0.0015, + "loss": 1.6697, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.5615798234939575, + "learning_rate": 0.0015, + "loss": 1.6718, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.4357393682003021, + "learning_rate": 0.0015, + "loss": 1.6631, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.5332390069961548, + "learning_rate": 0.0015, + "loss": 1.6455, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6678214073181152, + "learning_rate": 0.0015, + "loss": 1.665, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.5163810849189758, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.654403030872345, + "learning_rate": 0.0015, + "loss": 1.6695, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.8108707070350647, + "learning_rate": 0.0015, + "loss": 1.6661, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.4883694350719452, + "learning_rate": 0.0015, + "loss": 1.6537, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.43957310914993286, + "learning_rate": 0.0015, + "loss": 1.6532, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.793530285358429, + "learning_rate": 0.0015, + "loss": 1.6534, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.932205855846405, + "learning_rate": 0.0015, + "loss": 1.656, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.5770485997200012, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.7234318256378174, + "learning_rate": 0.0015, + "loss": 1.6493, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.7382251620292664, + "learning_rate": 0.0015, + "loss": 1.6511, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.5714709162712097, + "learning_rate": 0.0015, + "loss": 1.6533, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.6483688950538635, + "learning_rate": 0.0015, + "loss": 1.6508, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.6614258289337158, + "learning_rate": 0.0015, + "loss": 1.6528, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.6100746393203735, + "learning_rate": 0.0015, + "loss": 1.6592, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.5614967346191406, + "learning_rate": 0.0015, + "loss": 1.6536, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.5582228899002075, + "learning_rate": 0.0015, + "loss": 1.6484, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.7282346487045288, + "learning_rate": 0.0015, + "loss": 1.65, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.48201948404312134, + "learning_rate": 0.0015, + "loss": 1.6614, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6210127472877502, + "learning_rate": 0.0015, + "loss": 1.6621, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.7041175365447998, + "learning_rate": 0.0015, + "loss": 1.652, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.5997670292854309, + "learning_rate": 0.0015, + "loss": 1.6361, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.6002827286720276, + "learning_rate": 0.0015, + "loss": 1.6487, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.46460452675819397, + "learning_rate": 0.0015, + "loss": 1.6513, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.6907001733779907, + "learning_rate": 0.0015, + "loss": 1.6525, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.5740986466407776, + "learning_rate": 0.0015, + "loss": 1.6462, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.43042105436325073, + "learning_rate": 0.0015, + "loss": 1.6615, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5377410054206848, + "learning_rate": 0.0015, + "loss": 1.6461, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.6504456996917725, + "learning_rate": 0.0015, + "loss": 1.6501, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.5716488361358643, + "learning_rate": 0.0015, + "loss": 1.6547, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.5466607213020325, + "learning_rate": 0.0015, + "loss": 1.6461, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5013929009437561, + "learning_rate": 0.0015, + "loss": 1.6344, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.47789978981018066, + "learning_rate": 0.0015, + "loss": 1.6389, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.48156487941741943, + "learning_rate": 0.0015, + "loss": 1.6558, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.513731062412262, + "learning_rate": 0.0015, + "loss": 1.638, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.47874006628990173, + "learning_rate": 0.0015, + "loss": 1.6524, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.7129272222518921, + "learning_rate": 0.0015, + "loss": 1.6416, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.5598849058151245, + "learning_rate": 0.0015, + "loss": 1.6308, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.7132563591003418, + "learning_rate": 0.0015, + "loss": 1.6353, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.5598984360694885, + "learning_rate": 0.0015, + "loss": 1.6386, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.5166809558868408, + "learning_rate": 0.0015, + "loss": 1.6366, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.601611316204071, + "learning_rate": 0.0015, + "loss": 1.6343, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.5891398191452026, + "learning_rate": 0.0015, + "loss": 1.6376, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.5140433311462402, + "learning_rate": 0.0015, + "loss": 1.6361, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.5785304307937622, + "learning_rate": 0.0015, + "loss": 1.6409, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.4608519673347473, + "learning_rate": 0.0015, + "loss": 1.6419, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.4348074197769165, + "learning_rate": 0.0015, + "loss": 1.6304, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.4958269000053406, + "learning_rate": 0.0015, + "loss": 1.6421, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.6315652132034302, + "learning_rate": 0.0015, + "loss": 1.6315, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.5757411122322083, + "learning_rate": 0.0015, + "loss": 1.6462, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.7426285743713379, + "learning_rate": 0.0015, + "loss": 1.6369, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.5850421786308289, + "learning_rate": 0.0015, + "loss": 1.6371, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.5360798239707947, + "learning_rate": 0.0015, + "loss": 1.6327, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5775125026702881, + "learning_rate": 0.0015, + "loss": 1.6381, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5402942895889282, + "learning_rate": 0.0015, + "loss": 1.6399, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.6813234686851501, + "learning_rate": 0.0015, + "loss": 1.6481, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.8612727522850037, + "learning_rate": 0.0015, + "loss": 1.6408, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.6534830927848816, + "learning_rate": 0.0015, + "loss": 1.6458, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.9965766072273254, + "learning_rate": 0.0015, + "loss": 1.6428, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.43894922733306885, + "learning_rate": 0.0015, + "loss": 1.6367, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.5145808458328247, + "learning_rate": 0.0015, + "loss": 1.6326, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.883119523525238, + "learning_rate": 0.0015, + "loss": 1.6352, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.7677164673805237, + "learning_rate": 0.0015, + "loss": 1.6321, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.5919917821884155, + "learning_rate": 0.0015, + "loss": 1.6354, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.6078453063964844, + "learning_rate": 0.0015, + "loss": 1.6396, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.4383150339126587, + "learning_rate": 0.0015, + "loss": 1.6294, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.47161468863487244, + "learning_rate": 0.0015, + "loss": 1.632, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.6369029879570007, + "learning_rate": 0.0015, + "loss": 1.6271, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.43082642555236816, + "learning_rate": 0.0015, + "loss": 1.6329, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.4800276756286621, + "learning_rate": 0.0015, + "loss": 1.6332, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.5221289992332458, + "learning_rate": 0.0015, + "loss": 1.6217, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.44972649216651917, + "learning_rate": 0.0015, + "loss": 1.632, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.5700702667236328, + "learning_rate": 0.0015, + "loss": 1.6285, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.41922304034233093, + "learning_rate": 0.0015, + "loss": 1.6275, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.4673209488391876, + "learning_rate": 0.0015, + "loss": 1.6316, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.8012862205505371, + "learning_rate": 0.0015, + "loss": 1.6359, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.564134418964386, + "learning_rate": 0.0015, + "loss": 1.6252, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.7686864137649536, + "learning_rate": 0.0015, + "loss": 1.6207, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.5233380794525146, + "learning_rate": 0.0015, + "loss": 1.6334, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.7524824142456055, + "learning_rate": 0.0015, + "loss": 1.6298, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.6613220572471619, + "learning_rate": 0.0015, + "loss": 1.633, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.43975117802619934, + "learning_rate": 0.0015, + "loss": 1.6251, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.6859729886054993, + "learning_rate": 0.0015, + "loss": 1.6333, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.527412474155426, + "learning_rate": 0.0015, + "loss": 1.6243, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.5228123664855957, + "learning_rate": 0.0015, + "loss": 1.6248, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.6889928579330444, + "learning_rate": 0.0015, + "loss": 1.6422, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.5031303763389587, + "learning_rate": 0.0015, + "loss": 1.6164, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.4771326184272766, + "learning_rate": 0.0015, + "loss": 1.623, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.6762826442718506, + "learning_rate": 0.0015, + "loss": 1.6184, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.4187808334827423, + "learning_rate": 0.0015, + "loss": 1.6274, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.5549326539039612, + "learning_rate": 0.0015, + "loss": 1.6426, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.44259676337242126, + "learning_rate": 0.0015, + "loss": 1.6236, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.5867189764976501, + "learning_rate": 0.0015, + "loss": 1.6281, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.728274941444397, + "learning_rate": 0.0015, + "loss": 1.6236, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.5667452216148376, + "learning_rate": 0.0015, + "loss": 1.6342, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 1.0014342069625854, + "learning_rate": 0.0015, + "loss": 1.6211, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.8299092054367065, + "learning_rate": 0.0015, + "loss": 1.6294, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.6380898356437683, + "learning_rate": 0.0015, + "loss": 1.6346, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.5860293507575989, + "learning_rate": 0.0015, + "loss": 1.6202, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.45869171619415283, + "learning_rate": 0.0015, + "loss": 1.6191, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.5300114750862122, + "learning_rate": 0.0015, + "loss": 1.6234, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.48694780468940735, + "learning_rate": 0.0015, + "loss": 1.6064, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.6806397438049316, + "learning_rate": 0.0015, + "loss": 1.6246, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.7818400263786316, + "learning_rate": 0.0015, + "loss": 1.6271, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.5127776861190796, + "learning_rate": 0.0015, + "loss": 1.6244, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.8508099317550659, + "learning_rate": 0.0015, + "loss": 1.6163, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.42512232065200806, + "learning_rate": 0.0015, + "loss": 1.5981, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.46401745080947876, + "learning_rate": 0.0015, + "loss": 1.6187, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.5017504096031189, + "learning_rate": 0.0015, + "loss": 1.6052, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.6696678996086121, + "learning_rate": 0.0015, + "loss": 1.6241, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.6821325421333313, + "learning_rate": 0.0015, + "loss": 1.6287, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.5087493658065796, + "learning_rate": 0.0015, + "loss": 1.6061, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.5839017629623413, + "learning_rate": 0.0015, + "loss": 1.6273, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.482339471578598, + "learning_rate": 0.0015, + "loss": 1.6226, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.7293662428855896, + "learning_rate": 0.0015, + "loss": 1.628, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.6664180159568787, + "learning_rate": 0.0015, + "loss": 1.6145, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.47784915566444397, + "learning_rate": 0.0015, + "loss": 1.6152, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5330538749694824, + "learning_rate": 0.0015, + "loss": 1.6113, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.5254371166229248, + "learning_rate": 0.0015, + "loss": 1.6081, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.6671851873397827, + "learning_rate": 0.0015, + "loss": 1.6205, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.5424262881278992, + "learning_rate": 0.0015, + "loss": 1.6209, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.7977746725082397, + "learning_rate": 0.0015, + "loss": 1.6144, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.7952998280525208, + "learning_rate": 0.0015, + "loss": 1.6133, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 1.034942626953125, + "learning_rate": 0.0015, + "loss": 1.6122, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.5086579918861389, + "learning_rate": 0.0015, + "loss": 1.6311, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.6869698762893677, + "learning_rate": 0.0015, + "loss": 1.6143, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.5302034020423889, + "learning_rate": 0.0015, + "loss": 1.6114, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.7200300097465515, + "learning_rate": 0.0015, + "loss": 1.6214, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.4996623694896698, + "learning_rate": 0.0015, + "loss": 1.6078, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.4812504053115845, + "learning_rate": 0.0015, + "loss": 1.6099, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.4332095980644226, + "learning_rate": 0.0015, + "loss": 1.6109, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.6265988945960999, + "learning_rate": 0.0015, + "loss": 1.6323, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.8052026033401489, + "learning_rate": 0.0015, + "loss": 1.6171, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.5490649342536926, + "learning_rate": 0.0015, + "loss": 1.6106, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.5358046889305115, + "learning_rate": 0.0015, + "loss": 1.6246, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.48801183700561523, + "learning_rate": 0.0015, + "loss": 1.6067, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.5564013719558716, + "learning_rate": 0.0015, + "loss": 1.6033, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.5191406011581421, + "learning_rate": 0.0015, + "loss": 1.623, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.7706090211868286, + "learning_rate": 0.0015, + "loss": 1.6147, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.5492609739303589, + "learning_rate": 0.0015, + "loss": 1.6201, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.5550104379653931, + "learning_rate": 0.0015, + "loss": 1.6163, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.5112224221229553, + "learning_rate": 0.0015, + "loss": 1.6052, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.6472383141517639, + "learning_rate": 0.0015, + "loss": 1.6139, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.6084277033805847, + "learning_rate": 0.0015, + "loss": 1.61, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.7113805413246155, + "learning_rate": 0.0015, + "loss": 1.6205, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.746320366859436, + "learning_rate": 0.0015, + "loss": 1.6013, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.6392861008644104, + "learning_rate": 0.0015, + "loss": 1.6131, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.5408276319503784, + "learning_rate": 0.0015, + "loss": 1.6038, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.6266064047813416, + "learning_rate": 0.0015, + "loss": 1.6085, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.579879641532898, + "learning_rate": 0.0015, + "loss": 1.6184, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.677553117275238, + "learning_rate": 0.0015, + "loss": 1.6108, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.4388851225376129, + "learning_rate": 0.0015, + "loss": 1.6034, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.5612084865570068, + "learning_rate": 0.0015, + "loss": 1.589, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.48129740357398987, + "learning_rate": 0.0015, + "loss": 1.6155, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.620211660861969, + "learning_rate": 0.0015, + "loss": 1.6041, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.5643119812011719, + "learning_rate": 0.0015, + "loss": 1.6167, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.6635465621948242, + "learning_rate": 0.0015, + "loss": 1.6043, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.6689878106117249, + "learning_rate": 0.0015, + "loss": 1.6184, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.4475485682487488, + "learning_rate": 0.0015, + "loss": 1.6085, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.5719823837280273, + "learning_rate": 0.0015, + "loss": 1.5911, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.5780596733093262, + "learning_rate": 0.0015, + "loss": 1.606, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.5817821025848389, + "learning_rate": 0.0015, + "loss": 1.6052, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.7150209546089172, + "learning_rate": 0.0015, + "loss": 1.6037, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.4330749809741974, + "learning_rate": 0.0015, + "loss": 1.6157, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5769237279891968, + "learning_rate": 0.0015, + "loss": 1.6134, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5143467783927917, + "learning_rate": 0.0015, + "loss": 1.6065, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.4564622938632965, + "learning_rate": 0.0015, + "loss": 1.6079, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.48991790413856506, + "learning_rate": 0.0015, + "loss": 1.6189, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.861297070980072, + "learning_rate": 0.0015, + "loss": 1.5964, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.5165767669677734, + "learning_rate": 0.0015, + "loss": 1.6115, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.8736697435379028, + "learning_rate": 0.0015, + "loss": 1.5968, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.5755019783973694, + "learning_rate": 0.0015, + "loss": 1.608, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.6591705083847046, + "learning_rate": 0.0015, + "loss": 1.6006, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.43363070487976074, + "learning_rate": 0.0015, + "loss": 1.6047, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.7220332026481628, + "learning_rate": 0.0015, + "loss": 1.5995, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.488716721534729, + "learning_rate": 0.0015, + "loss": 1.6063, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.863763153553009, + "learning_rate": 0.0015, + "loss": 1.6128, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.48147138953208923, + "learning_rate": 0.0015, + "loss": 1.6018, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.5747051239013672, + "learning_rate": 0.0015, + "loss": 1.6049, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.6245841383934021, + "learning_rate": 0.0015, + "loss": 1.6079, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.5443736910820007, + "learning_rate": 0.0015, + "loss": 1.5975, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.777647852897644, + "learning_rate": 0.0015, + "loss": 1.5973, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 1.218781590461731, + "learning_rate": 0.0015, + "loss": 1.6135, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.941681444644928, + "learning_rate": 0.0015, + "loss": 1.6054, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.6715230941772461, + "learning_rate": 0.0015, + "loss": 1.5896, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.5077252388000488, + "learning_rate": 0.0015, + "loss": 1.5997, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.6214714050292969, + "learning_rate": 0.0015, + "loss": 1.6016, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.43598514795303345, + "learning_rate": 0.0015, + "loss": 1.5954, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.6077280640602112, + "learning_rate": 0.0015, + "loss": 1.595, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.582649290561676, + "learning_rate": 0.0015, + "loss": 1.5969, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.7912663221359253, + "learning_rate": 0.0015, + "loss": 1.5988, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.4473852217197418, + "learning_rate": 0.0015, + "loss": 1.5931, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.47345608472824097, + "learning_rate": 0.0015, + "loss": 1.5903, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.7846567034721375, + "learning_rate": 0.0015, + "loss": 1.5917, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.7544945478439331, + "learning_rate": 0.0015, + "loss": 1.6016, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.5894906520843506, + "learning_rate": 0.0015, + "loss": 1.5975, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.6474308967590332, + "learning_rate": 0.0015, + "loss": 1.5962, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 1.0168825387954712, + "learning_rate": 0.0015, + "loss": 1.6003, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.6084374785423279, + "learning_rate": 0.0015, + "loss": 1.603, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.7339563369750977, + "learning_rate": 0.0015, + "loss": 1.5958, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.8963285088539124, + "learning_rate": 0.0015, + "loss": 1.5998, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.4471118450164795, + "learning_rate": 0.0015, + "loss": 1.5925, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.5358632802963257, + "learning_rate": 0.0015, + "loss": 1.5863, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.46802031993865967, + "learning_rate": 0.0015, + "loss": 1.5877, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.5518823862075806, + "learning_rate": 0.0015, + "loss": 1.6021, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.4729432761669159, + "learning_rate": 0.0015, + "loss": 1.6071, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.8033674359321594, + "learning_rate": 0.0015, + "loss": 1.5924, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.7526063323020935, + "learning_rate": 0.0015, + "loss": 1.5927, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.5350978374481201, + "learning_rate": 0.0015, + "loss": 1.5967, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.46993488073349, + "learning_rate": 0.0015, + "loss": 1.5944, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.650333046913147, + "learning_rate": 0.0015, + "loss": 1.5975, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.4917782247066498, + "learning_rate": 0.0015, + "loss": 1.6035, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.6928593516349792, + "learning_rate": 0.0015, + "loss": 1.5967, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.7578586935997009, + "learning_rate": 0.0015, + "loss": 1.6095, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.5395996570587158, + "learning_rate": 0.0015, + "loss": 1.6038, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.653925895690918, + "learning_rate": 0.0015, + "loss": 1.5969, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.5010302066802979, + "learning_rate": 0.0015, + "loss": 1.6061, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.5441184043884277, + "learning_rate": 0.0015, + "loss": 1.6044, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.5527088046073914, + "learning_rate": 0.0015, + "loss": 1.5824, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.6557479500770569, + "learning_rate": 0.0015, + "loss": 1.5903, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.531202495098114, + "learning_rate": 0.0015, + "loss": 1.5942, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.6222879886627197, + "learning_rate": 0.0015, + "loss": 1.5984, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.6802123188972473, + "learning_rate": 0.0015, + "loss": 1.5949, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.5912046432495117, + "learning_rate": 0.0015, + "loss": 1.5938, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 1.2869442701339722, + "learning_rate": 0.0015, + "loss": 1.5698, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.6825901865959167, + "learning_rate": 0.0015, + "loss": 1.5779, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.5409672260284424, + "learning_rate": 0.0015, + "loss": 1.5807, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.5308418869972229, + "learning_rate": 0.0015, + "loss": 1.5905, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.4643774926662445, + "learning_rate": 0.0015, + "loss": 1.5854, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.4992660880088806, + "learning_rate": 0.0015, + "loss": 1.5894, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.5887247920036316, + "learning_rate": 0.0015, + "loss": 1.5976, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.4516873061656952, + "learning_rate": 0.0015, + "loss": 1.6049, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.6803337335586548, + "learning_rate": 0.0015, + "loss": 1.5867, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.8297432661056519, + "learning_rate": 0.0015, + "loss": 1.602, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.6586122512817383, + "learning_rate": 0.0015, + "loss": 1.6071, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.7014937400817871, + "learning_rate": 0.0015, + "loss": 1.596, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.620307445526123, + "learning_rate": 0.0015, + "loss": 1.5845, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.5104919075965881, + "learning_rate": 0.0015, + "loss": 1.5979, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.4153170883655548, + "learning_rate": 0.0015, + "loss": 1.5877, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.7058597207069397, + "learning_rate": 0.0015, + "loss": 1.5908, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.4738933742046356, + "learning_rate": 0.0015, + "loss": 1.5876, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.6007941961288452, + "learning_rate": 0.0015, + "loss": 1.5778, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.848249614238739, + "learning_rate": 0.0015, + "loss": 1.5879, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.6753978729248047, + "learning_rate": 0.0015, + "loss": 1.5923, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.7624160647392273, + "learning_rate": 0.0015, + "loss": 1.5839, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.5964211225509644, + "learning_rate": 0.0015, + "loss": 1.5907, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.4659964144229889, + "learning_rate": 0.0015, + "loss": 1.5899, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.43876907229423523, + "learning_rate": 0.0015, + "loss": 1.5729, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.8533287048339844, + "learning_rate": 0.0015, + "loss": 1.5949, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.6350633502006531, + "learning_rate": 0.0015, + "loss": 1.6018, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.5238096714019775, + "learning_rate": 0.0015, + "loss": 1.5858, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.9278310537338257, + "learning_rate": 0.0015, + "loss": 1.5943, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.6696098446846008, + "learning_rate": 0.0015, + "loss": 1.5773, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.6653708219528198, + "learning_rate": 0.0015, + "loss": 1.5946, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.5016084313392639, + "learning_rate": 0.0015, + "loss": 1.578, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.5422341823577881, + "learning_rate": 0.0015, + "loss": 1.5952, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.5984102487564087, + "learning_rate": 0.0015, + "loss": 1.5862, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.7125038504600525, + "learning_rate": 0.0015, + "loss": 1.5865, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.6717996597290039, + "learning_rate": 0.0015, + "loss": 1.5849, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6750341653823853, + "learning_rate": 0.0015, + "loss": 1.5806, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.611428439617157, + "learning_rate": 0.0015, + "loss": 1.5926, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.5834221839904785, + "learning_rate": 0.0015, + "loss": 1.5852, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.4161120355129242, + "learning_rate": 0.0015, + "loss": 1.5831, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.6962540149688721, + "learning_rate": 0.0015, + "loss": 1.5825, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.5041799545288086, + "learning_rate": 0.0015, + "loss": 1.5927, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.6145204305648804, + "learning_rate": 0.0015, + "loss": 1.5874, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.6151684522628784, + "learning_rate": 0.0014834368975312174, + "loss": 1.5684, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.8990934491157532, + "learning_rate": 0.0014629899726345957, + "loss": 1.5928, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.527798056602478, + "learning_rate": 0.0014428248775471316, + "loss": 1.5979, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.4924493432044983, + "learning_rate": 0.00142293772767289, + "loss": 1.5783, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.5190075635910034, + "learning_rate": 0.001403324691959192, + "loss": 1.5802, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.5425005555152893, + "learning_rate": 0.0013839819921586025, + "loss": 1.583, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.732195258140564, + "learning_rate": 0.0013649059021010894, + "loss": 1.5732, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.5653340816497803, + "learning_rate": 0.0013460927469762154, + "loss": 1.5734, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.4457107484340668, + "learning_rate": 0.0013275389026252255, + "loss": 1.5802, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.5443724989891052, + "learning_rate": 0.0013092407948428887, + "loss": 1.5731, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.6917367577552795, + "learning_rate": 0.001291194898688966, + "loss": 1.5792, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.49355635046958923, + "learning_rate": 0.001273397737809166, + "loss": 1.5775, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5608471035957336, + "learning_rate": 0.001255845883765463, + "loss": 1.5717, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.44105231761932373, + "learning_rate": 0.001238535955375642, + "loss": 1.5659, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.5762683749198914, + "learning_rate": 0.0012214646180619506, + "loss": 1.5632, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5470274090766907, + "learning_rate": 0.001204628583208727, + "loss": 1.5598, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.5119523406028748, + "learning_rate": 0.0011880246075288827, + "loss": 1.5663, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6202800869941711, + "learning_rate": 0.001171649492439115, + "loss": 1.559, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.841611921787262, + "learning_rate": 0.0011555000834437364, + "loss": 1.5651, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.45137086510658264, + "learning_rate": 0.0011395732695269908, + "loss": 1.5599, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.478753000497818, + "learning_rate": 0.0011238659825537505, + "loss": 1.5435, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5273224711418152, + "learning_rate": 0.0011083751966784717, + "loss": 1.5476, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.9997715353965759, + "learning_rate": 0.0010930979277622953, + "loss": 1.5642, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.5257177352905273, + "learning_rate": 0.0010780312327981854, + "loss": 1.5608, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.5972280502319336, + "learning_rate": 0.0010631722093439888, + "loss": 1.5561, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.7389847040176392, + "learning_rate": 0.00104851799496331, + "loss": 1.5465, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.5938292145729065, + "learning_rate": 0.0010340657666740914, + "loss": 1.553, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.41769060492515564, + "learning_rate": 0.0010198127404047975, + "loss": 1.5362, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.44951990246772766, + "learning_rate": 0.0010057561704580897, + "loss": 1.5411, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.6148278713226318, + "learning_rate": 0.0009918933489818985, + "loss": 1.5524, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.468996524810791, + "learning_rate": 0.0009782216054477827, + "loss": 1.5461, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.4214553236961365, + "learning_rate": 0.0009647383061364801, + "loss": 1.5529, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5168136954307556, + "learning_rate": 0.0009514408536305495, + "loss": 1.5482, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.5339716076850891, + "learning_rate": 0.0009383266863140042, + "loss": 1.561, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.5784514546394348, + "learning_rate": 0.000925393277878844, + "loss": 1.5596, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.4845491051673889, + "learning_rate": 0.0009126381368383879, + "loss": 1.5424, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.4790401756763458, + "learning_rate": 0.0009000588060473156, + "loss": 1.5326, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.44591090083122253, + "learning_rate": 0.0008876528622283235, + "loss": 1.5429, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.41002029180526733, + "learning_rate": 0.0008754179155053053, + "loss": 1.5375, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.6511792540550232, + "learning_rate": 0.0008633516089429683, + "loss": 1.5397, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.51832115650177, + "learning_rate": 0.0008514516180927928, + "loss": 1.5354, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.4134800136089325, + "learning_rate": 0.0008397156505452524, + "loss": 1.5319, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.4657205045223236, + "learning_rate": 0.0008281414454882051, + "loss": 1.5367, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.42394086718559265, + "learning_rate": 0.0008167267732713704, + "loss": 1.5395, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.5078455209732056, + "learning_rate": 0.0008054694349768117, + "loss": 1.5249, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.4855765104293823, + "learning_rate": 0.0007943672619953359, + "loss": 1.5334, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.4633086621761322, + "learning_rate": 0.0007834181156087356, + "loss": 1.5255, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.5075934529304504, + "learning_rate": 0.0007726198865777852, + "loss": 1.5319, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.4568498134613037, + "learning_rate": 0.0007619704947359191, + "loss": 1.523, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.5546969771385193, + "learning_rate": 0.0007514678885885087, + "loss": 1.524, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.7917981147766113, + "learning_rate": 0.0007411100449176633, + "loss": 1.5266, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.44755789637565613, + "learning_rate": 0.0007308949683924791, + "loss": 1.5288, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.4976639747619629, + "learning_rate": 0.000720820691184658, + "loss": 1.5231, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.43881821632385254, + "learning_rate": 0.0007108852725894269, + "loss": 1.5183, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.46466559171676636, + "learning_rate": 0.000701086798651681, + "loss": 1.5243, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.6676735281944275, + "learning_rate": 0.0006914233817972798, + "loss": 1.514, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.5401358008384705, + "learning_rate": 0.0006818931604694261, + "loss": 1.5207, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.44543299078941345, + "learning_rate": 0.0006724942987700563, + "loss": 1.5255, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.40253111720085144, + "learning_rate": 0.0006632249861061732, + "loss": 1.5249, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.5226932764053345, + "learning_rate": 0.0006540834368410549, + "loss": 1.5213, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.4352646768093109, + "learning_rate": 0.0006450678899502701, + "loss": 1.5232, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.6914459466934204, + "learning_rate": 0.0006361766086824345, + "loss": 1.519, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.9273353815078735, + "learning_rate": 0.000627407880224645, + "loss": 1.5246, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.7584615349769592, + "learning_rate": 0.0006187600153725225, + "loss": 1.5098, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.5383031368255615, + "learning_rate": 0.0006102313482048055, + "loss": 1.514, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.42636895179748535, + "learning_rate": 0.0006018202357624274, + "loss": 1.5158, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.5778006315231323, + "learning_rate": 0.0005935250577320168, + "loss": 1.5113, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.5066882371902466, + "learning_rate": 0.0005853442161337618, + "loss": 1.5011, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.5009013414382935, + "learning_rate": 0.0005772761350135759, + "loss": 1.5129, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.5174026489257812, + "learning_rate": 0.0005693192601395058, + "loss": 1.5067, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.4822061359882355, + "learning_rate": 0.000561472058702326, + "loss": 1.5015, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.4462820589542389, + "learning_rate": 0.000553733019020258, + "loss": 1.5112, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.4044378697872162, + "learning_rate": 0.0005461006502477612, + "loss": 1.5, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5433807373046875, + "learning_rate": 0.0005385734820883369, + "loss": 1.5033, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.7410753965377808, + "learning_rate": 0.0005311500645112907, + "loss": 1.527, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.5111313462257385, + "learning_rate": 0.0005238289674723993, + "loss": 1.5077, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.5060286521911621, + "learning_rate": 0.0005166087806384274, + "loss": 1.5119, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.4992697834968567, + "learning_rate": 0.0005094881131154418, + "loss": 1.5145, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.4542028605937958, + "learning_rate": 0.0005024655931808696, + "loss": 1.5131, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.4581011235713959, + "learning_rate": 0.0004955398680192508, + "loss": 1.5008, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.5143532752990723, + "learning_rate": 0.000488709603461632, + "loss": 1.4939, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.47939208149909973, + "learning_rate": 0.000481973483728553, + "loss": 1.4985, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.40984195470809937, + "learning_rate": 0.0004753302111765748, + "loss": 1.4993, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.47273120284080505, + "learning_rate": 0.0004687785060483032, + "loss": 1.509, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.5313041806221008, + "learning_rate": 0.0004623171062258558, + "loss": 1.479, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.49590882658958435, + "learning_rate": 0.0004559447669877288, + "loss": 1.4965, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.5065464377403259, + "learning_rate": 0.00044966026076901413, + "loss": 1.4981, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.4207705557346344, + "learning_rate": 0.00044346237692492177, + "loss": 1.5056, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.4077906012535095, + "learning_rate": 0.0004373499214975615, + "loss": 1.4925, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.44039446115493774, + "learning_rate": 0.0004313217169859396, + "loss": 1.4962, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.4154196083545685, + "learning_rate": 0.0004253766021191256, + "loss": 1.5043, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.4877999424934387, + "learning_rate": 0.00041951343163254497, + "loss": 1.4993, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.4808781147003174, + "learning_rate": 0.00041373107604735626, + "loss": 1.4991, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.5578346252441406, + "learning_rate": 0.0004080284214528687, + "loss": 1.4929, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.6338397264480591, + "learning_rate": 0.0004024043692919589, + "loss": 1.5049, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.4208441972732544, + "learning_rate": 0.0003968578361494449, + "loss": 1.5021, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.5005459189414978, + "learning_rate": 0.000391387753543378, + "loss": 1.5067, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.4151977300643921, + "learning_rate": 0.00038599306771921023, + "loss": 1.4884, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.5265041589736938, + "learning_rate": 0.0003806727394468004, + "loss": 1.4855, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.6657745242118835, + "learning_rate": 0.0003754257438202162, + "loss": 1.4956, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.6323660612106323, + "learning_rate": 0.0003702510700602974, + "loss": 1.503, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.5100046992301941, + "learning_rate": 0.0003651477213199393, + "loss": 1.4811, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.4394318461418152, + "learning_rate": 0.000360114714492061, + "loss": 1.4798, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.5836018323898315, + "learning_rate": 0.0003551510800202195, + "loss": 1.4887, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.4392067790031433, + "learning_rate": 0.0003502558617118353, + "loss": 1.4938, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.415519654750824, + "learning_rate": 0.0003454281165539914, + "loss": 1.5032, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.40656188130378723, + "learning_rate": 0.00034066691453177176, + "loss": 1.4989, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.547730565071106, + "learning_rate": 0.0003359713384491037, + "loss": 1.4964, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.534142792224884, + "learning_rate": 0.00033134048375206944, + "loss": 1.4893, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.40249350666999817, + "learning_rate": 0.0003267734583546536, + "loss": 1.4881, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.5053009986877441, + "learning_rate": 0.00032226938246689157, + "loss": 1.4893, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.435779333114624, + "learning_rate": 0.0003178273884253874, + "loss": 1.4894, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.43496453762054443, + "learning_rate": 0.0003134466205261674, + "loss": 1.4981, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.514687716960907, + "learning_rate": 0.0003091262348598378, + "loss": 1.5026, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.40123680233955383, + "learning_rate": 0.0003048653991490141, + "loss": 1.4827, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.4594343900680542, + "learning_rate": 0.00030066329258799187, + "loss": 1.4814, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.5377413034439087, + "learning_rate": 0.0002965191056846266, + "loss": 1.4868, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.5593989491462708, + "learning_rate": 0.000292432040104394, + "loss": 1.4817, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.5676313638687134, + "learning_rate": 0.00028840130851659853, + "loss": 1.4757, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.5032836198806763, + "learning_rate": 0.0002844261344427028, + "loss": 1.4867, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5518320798873901, + "learning_rate": 0.0002805057521067471, + "loss": 1.4865, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.5424656867980957, + "learning_rate": 0.00027663940628783017, + "loss": 1.4768, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.6343960762023926, + "learning_rate": 0.00027282635217462393, + "loss": 1.484, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.4994524419307709, + "learning_rate": 0.0002690658552218937, + "loss": 1.4923, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.42348432540893555, + "learning_rate": 0.00026535719100899516, + "loss": 1.4713, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.4212459325790405, + "learning_rate": 0.00026169964510032245, + "loss": 1.4788, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.5739911794662476, + "learning_rate": 0.00025809251290767984, + "loss": 1.4662, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.41022229194641113, + "learning_rate": 0.00025453509955454957, + "loss": 1.4688, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.5347615480422974, + "learning_rate": 0.00025102671974223175, + "loss": 1.4719, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.43664875626564026, + "learning_rate": 0.00024756669761782815, + "loss": 1.4829, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.4519919157028198, + "learning_rate": 0.0002441543666440464, + "loss": 1.4707, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.5600587129592896, + "learning_rate": 0.00024078906947079878, + "loss": 1.4756, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.5105632543563843, + "learning_rate": 0.00023747015780857005, + "loss": 1.4862, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.6876630187034607, + "learning_rate": 0.00023419699230353144, + "loss": 1.4805, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.6306701898574829, + "learning_rate": 0.00023096894241437586, + "loss": 1.4873, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.5762724280357361, + "learning_rate": 0.00022778538629085056, + "loss": 1.4707, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.522975504398346, + "learning_rate": 0.00022464571065396427, + "loss": 1.475, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.6013692617416382, + "learning_rate": 0.00022154931067784521, + "loss": 1.4717, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.762579619884491, + "learning_rate": 0.00021849558987322782, + "loss": 1.4657, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.6460270285606384, + "learning_rate": 0.0002154839599725452, + "loss": 1.4686, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.41718071699142456, + "learning_rate": 0.00021251384081660544, + "loss": 1.4819, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.5827421545982361, + "learning_rate": 0.0002095846602428303, + "loss": 1.4777, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.4144783318042755, + "learning_rate": 0.00020669585397503358, + "loss": 1.4693, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.5936702489852905, + "learning_rate": 0.0002038468655147195, + "loss": 1.472, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.6536031365394592, + "learning_rate": 0.00020103714603387894, + "loss": 1.4859, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.410735160112381, + "learning_rate": 0.00019826615426926338, + "loss": 1.4573, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.44961825013160706, + "learning_rate": 0.00019553335641811625, + "loss": 1.4811, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.4156222343444824, + "learning_rate": 0.0001928382260353415, + "loss": 1.472, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.41524773836135864, + "learning_rate": 0.00019018024393208902, + "loss": 1.488, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.4502960443496704, + "learning_rate": 0.00018755889807573872, + "loss": 1.4656, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.4081941843032837, + "learning_rate": 0.00018497368349126262, + "loss": 1.4724, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.43221691250801086, + "learning_rate": 0.00018242410216394648, + "loss": 1.4857, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.4987049400806427, + "learning_rate": 0.0001799096629434529, + "loss": 1.4596, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.4031578302383423, + "learning_rate": 0.00017742988144920578, + "loss": 1.4718, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.46239161491394043, + "learning_rate": 0.00017498427997707976, + "loss": 1.4681, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.4970310926437378, + "learning_rate": 0.00017257238740737548, + "loss": 1.4732, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.47037479281425476, + "learning_rate": 0.00017019373911406307, + "loss": 1.4778, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.5311369895935059, + "learning_rate": 0.000167847876875277, + "loss": 1.4753, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.4282110631465912, + "learning_rate": 0.00016553434878504428, + "loss": 1.4623, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.516330897808075, + "learning_rate": 0.00016325270916622947, + "loss": 1.4629, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.5064811110496521, + "learning_rate": 0.00016100251848467966, + "loss": 1.4731, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.552573561668396, + "learning_rate": 0.0001587833432645528, + "loss": 1.4639, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.4432623088359833, + "learning_rate": 0.00015659475600481292, + "loss": 1.4816, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.539801299571991, + "learning_rate": 0.00015443633509687688, + "loss": 1.4655, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.4653639495372772, + "learning_rate": 0.00015230766474339536, + "loss": 1.4672, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.40810105204582214, + "learning_rate": 0.00015020833487815416, + "loss": 1.477, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.4198808968067169, + "learning_rate": 0.0001481379410870792, + "loss": 1.4655, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.5031906366348267, + "learning_rate": 0.00014609608453033013, + "loss": 1.4527, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.3922320008277893, + "learning_rate": 0.00014408237186546807, + "loss": 1.4605, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.5315123796463013, + "learning_rate": 0.00014209641517168273, + "loss": 1.4497, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.43408292531967163, + "learning_rate": 0.00014013783187506265, + "loss": 1.4618, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.4878089427947998, + "learning_rate": 0.00013820624467489697, + "loss": 1.4827, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.5060374140739441, + "learning_rate": 0.00013630128147099213, + "loss": 1.4792, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.4923860430717468, + "learning_rate": 0.00013442257529199068, + "loss": 1.4592, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.5069882869720459, + "learning_rate": 0.00013256976422467803, + "loss": 1.4684, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.5731358528137207, + "learning_rate": 0.00013074249134426366, + "loss": 1.4673, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.40071263909339905, + "learning_rate": 0.0001289404046456233, + "loss": 1.4761, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.4133740961551666, + "learning_rate": 0.0001271631569754887, + "loss": 1.4674, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.40208736062049866, + "learning_rate": 0.0001254104059655723, + "loss": 1.4712, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.3982742130756378, + "learning_rate": 0.00012368181396661337, + "loss": 1.455, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.4975784718990326, + "learning_rate": 0.00012197704798333364, + "loss": 1.4566, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.406220018863678, + "learning_rate": 0.00012029577961028894, + "loss": 1.4556, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.43308210372924805, + "learning_rate": 0.00011863768496860542, + "loss": 1.4692, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.3904995024204254, + "learning_rate": 0.00011700244464358777, + "loss": 1.4614, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.49914997816085815, + "learning_rate": 0.00011538974362318715, + "loss": 1.4687, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.4279344081878662, + "learning_rate": 0.00011379927123731737, + "loss": 1.4624, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.4091090261936188, + "learning_rate": 0.0001122307210980077, + "loss": 1.4596, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.4029027223587036, + "learning_rate": 0.00011068379104038026, + "loss": 1.4751, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.5238761901855469, + "learning_rate": 0.00010915818306444116, + "loss": 1.4561, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.4498094618320465, + "learning_rate": 0.00010765360327767384, + "loss": 1.4613, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.42569437623023987, + "learning_rate": 0.00010616976183842376, + "loss": 1.4669, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.45577147603034973, + "learning_rate": 0.00010470637290006365, + "loss": 1.4691, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.4576866030693054, + "learning_rate": 0.00010326315455592764, + "loss": 1.458, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.45839405059814453, + "learning_rate": 0.0001018398287850053, + "loss": 1.4508, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.4084922671318054, + "learning_rate": 0.00010043612139838357, + "loss": 1.471, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.45637550950050354, + "learning_rate": 9.905176198642719e-05, + "loss": 1.4593, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.4106643497943878, + "learning_rate": 9.76864838666871e-05, + "loss": 1.4597, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.5586416721343994, + "learning_rate": 9.634002403252676e-05, + "loss": 1.461, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.41943398118019104, + "learning_rate": 9.501212310245681e-05, + "loss": 1.4543, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.5332395434379578, + "learning_rate": 9.370252527016777e-05, + "loss": 1.4656, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.45243382453918457, + "learning_rate": 9.241097825525163e-05, + "loss": 1.4575, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.46501415967941284, + "learning_rate": 9.113723325460276e-05, + "loss": 1.4654, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.4448326826095581, + "learning_rate": 8.988104489448849e-05, + "loss": 1.4576, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.41820263862609863, + "learning_rate": 8.864217118328042e-05, + "loss": 1.4685, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.4040219783782959, + "learning_rate": 8.742037346483729e-05, + "loss": 1.461, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.4453597366809845, + "learning_rate": 8.62154163725303e-05, + "loss": 1.4697, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.435975581407547, + "learning_rate": 8.502706778390219e-05, + "loss": 1.4645, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.4368712306022644, + "learning_rate": 8.38550987759513e-05, + "loss": 1.4592, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.43043869733810425, + "learning_rate": 8.269928358103191e-05, + "loss": 1.4761, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.4675109088420868, + "learning_rate": 8.155939954336243e-05, + "loss": 1.4667, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.41896888613700867, + "learning_rate": 8.043522707613312e-05, + "loss": 1.4618, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.41147059202194214, + "learning_rate": 7.932654961920486e-05, + "loss": 1.4491, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.43678274750709534, + "learning_rate": 7.823315359739135e-05, + "loss": 1.4481, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.3967992961406708, + "learning_rate": 7.715482837931577e-05, + "loss": 1.4732, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.4164084792137146, + "learning_rate": 7.6091366236835e-05, + "loss": 1.4512, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4792173504829407, + "learning_rate": 7.504256230502289e-05, + "loss": 1.4687, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.41159412264823914, + "learning_rate": 7.400821454270524e-05, + "loss": 1.4638, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.42427778244018555, + "learning_rate": 7.29881236935386e-05, + "loss": 1.4515, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.4141082763671875, + "learning_rate": 7.198209324762562e-05, + "loss": 1.4527, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.4402506351470947, + "learning_rate": 7.098992940365946e-05, + "loss": 1.4513, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.4119701385498047, + "learning_rate": 7.001144103159e-05, + "loss": 1.4603, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.39887380599975586, + "learning_rate": 6.904643963580461e-05, + "loss": 1.4651, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.6100857257843018, + "learning_rate": 6.809473931881644e-05, + "loss": 1.4581, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.4508749842643738, + "learning_rate": 6.71561567454532e-05, + "loss": 1.4569, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.4711706042289734, + "learning_rate": 6.623051110753948e-05, + "loss": 1.4746, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.41305121779441833, + "learning_rate": 6.531762408906607e-05, + "loss": 1.4613, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.4427293837070465, + "learning_rate": 6.441731983183912e-05, + "loss": 1.4565, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.44373971223831177, + "learning_rate": 6.352942490160292e-05, + "loss": 1.453, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.39872896671295166, + "learning_rate": 6.265376825462966e-05, + "loss": 1.4625, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.4938305616378784, + "learning_rate": 6.179018120476945e-05, + "loss": 1.4604, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.4404746890068054, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.4572, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.3998059034347534, + "learning_rate": 6.009855274515339e-05, + "loss": 1.4501, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.38832971453666687, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.4584, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.4013470411300659, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.4455, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.4124337434768677, + "learning_rate": 5.764754687033678e-05, + "loss": 1.459, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.4225826859474182, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.4554, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.40246811509132385, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.4688, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.6622455716133118, + "learning_rate": 5.529650063720842e-05, + "loss": 1.4677, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.4136310815811157, + "learning_rate": 5.453432234876445e-05, + "loss": 1.4575, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.42627909779548645, + "learning_rate": 5.37826495305886e-05, + "loss": 1.4465, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.444655179977417, + "learning_rate": 5.304133738072674e-05, + "loss": 1.4679, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.4993586838245392, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.4592, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.46483758091926575, + "learning_rate": 5.158922582999368e-05, + "loss": 1.4607, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.42821627855300903, + "learning_rate": 5.087814669492819e-05, + "loss": 1.4545, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.423797607421875, + "learning_rate": 5.017686870590028e-05, + "loss": 1.4547, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.397562712430954, + "learning_rate": 4.948525676899577e-05, + "loss": 1.4513, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.4379497170448303, + "learning_rate": 4.880317765236493e-05, + "loss": 1.4587, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.47238776087760925, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.4442, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.409699946641922, + "learning_rate": 4.746709410920699e-05, + "loss": 1.4506, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5099485516548157, + "learning_rate": 4.681283230007507e-05, + "loss": 1.4496, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.41278332471847534, + "learning_rate": 4.616758849642509e-05, + "loss": 1.4562, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.3833622634410858, + "learning_rate": 4.553123839874615e-05, + "loss": 1.4666, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.44272372126579285, + "learning_rate": 4.490365942080736e-05, + "loss": 1.4606, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.3929062783718109, + "learning_rate": 4.428473066604285e-05, + "loss": 1.459, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.42337173223495483, + "learning_rate": 4.367433290426233e-05, + "loss": 1.4579, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.44215089082717896, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.4516, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.41219446063041687, + "learning_rate": 4.247866163327575e-05, + "loss": 1.4622, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.4308396577835083, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.4503, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.4208793044090271, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.4487, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.4248664081096649, + "learning_rate": 4.074624971216005e-05, + "loss": 1.4385, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4497723877429962, + "learning_rate": 4.018462453681889e-05, + "loss": 1.4501, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.42813512682914734, + "learning_rate": 3.963074051164014e-05, + "loss": 1.4558, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.4101034998893738, + "learning_rate": 3.908449093662446e-05, + "loss": 1.4513, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.4209040105342865, + "learning_rate": 3.854577058246998e-05, + "loss": 1.4501, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.4299197196960449, + "learning_rate": 3.801447567030094e-05, + "loss": 1.4663, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.40840667486190796, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.4539, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.4192747175693512, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.4695, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.43148040771484375, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.4556, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.4947909116744995, + "learning_rate": 3.596152451701616e-05, + "loss": 1.452, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.41824567317962646, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.4528, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.40341225266456604, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.4536, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.4139859080314636, + "learning_rate": 3.449490171442838e-05, + "loss": 1.4598, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.38406530022621155, + "learning_rate": 3.401944187798702e-05, + "loss": 1.4555, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.4115971326828003, + "learning_rate": 3.355053553336137e-05, + "loss": 1.4492, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.4668926000595093, + "learning_rate": 3.308809235061882e-05, + "loss": 1.4471, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.4154922664165497, + "learning_rate": 3.263202324488772e-05, + "loss": 1.4538, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.39377495646476746, + "learning_rate": 3.218224035919609e-05, + "loss": 1.4514, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.40643295645713806, + "learning_rate": 3.173865704754688e-05, + "loss": 1.4535, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.4133358299732208, + "learning_rate": 3.130118785822657e-05, + "loss": 1.4566, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.3953145742416382, + "learning_rate": 3.08697485173437e-05, + "loss": 1.457, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.4108392894268036, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.4554, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.39774221181869507, + "learning_rate": 3.002462807725185e-05, + "loss": 1.4502, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.39042046666145325, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.4598, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.4192306399345398, + "learning_rate": 2.920264448124087e-05, + "loss": 1.4521, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.4432130455970764, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.4504, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.3934832811355591, + "learning_rate": 2.84031643124288e-05, + "loss": 1.4491, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.39443790912628174, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.4693, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.42719605565071106, + "learning_rate": 2.762557149497405e-05, + "loss": 1.4443, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.38149702548980713, + "learning_rate": 2.724479494389592e-05, + "loss": 1.4579, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.4130416214466095, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.4563, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.3876403570175171, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.4574, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.389961838722229, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.4573, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.4101412892341614, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.4606, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.4133794605731964, + "learning_rate": 2.541820662891541e-05, + "loss": 1.4369, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.4387854337692261, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.4504, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.41470590233802795, + "learning_rate": 2.472233293365335e-05, + "loss": 1.4581, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.4048042893409729, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.4456, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.4369741380214691, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.4668, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.4158765971660614, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.4517, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.40791085362434387, + "learning_rate": 2.338721674401494e-05, + "loss": 1.4613, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.39976707100868225, + "learning_rate": 2.30648594768459e-05, + "loss": 1.4562, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.40331876277923584, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.4511, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.39779213070869446, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.448, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.4350074231624603, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.4514, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.418064683675766, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.4525, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.3996509611606598, + "learning_rate": 2.151850895764285e-05, + "loss": 1.4508, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.41180336475372314, + "learning_rate": 2.12219089895037e-05, + "loss": 1.4419, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.4046807289123535, + "learning_rate": 2.092939720151092e-05, + "loss": 1.438, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.46560943126678467, + "learning_rate": 2.064091724430947e-05, + "loss": 1.4484, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.41788387298583984, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.4458, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.42240655422210693, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.4578, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.402235209941864, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.4431, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.39841288328170776, + "learning_rate": 1.952621569669175e-05, + "loss": 1.4523, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.39124491810798645, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.4494, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.43226784467697144, + "learning_rate": 1.899164691024229e-05, + "loss": 1.4525, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.4032827317714691, + "learning_rate": 1.872987589815331e-05, + "loss": 1.4465, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.4149746596813202, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.4598, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.38260945677757263, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.4541, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.39362403750419617, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.4686, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.4112773835659027, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.4607, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.4066416025161743, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.457, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.40966859459877014, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.4451, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.4169604480266571, + "learning_rate": 1.699576850233916e-05, + "loss": 1.446, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.4007004499435425, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.4595, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.3877018392086029, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.4344, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.44535520672798157, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.4545, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.3912922441959381, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.4547, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.4358745515346527, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.4498, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.41719552874565125, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.4595, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.4397567808628082, + "learning_rate": 1.542221360973268e-05, + "loss": 1.4419, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.4786853492259979, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.4552, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.1239162683486938, + "learning_rate": 1.5e-05, + "loss": 1.4475, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.837914036668352e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-gptj/checkpoint-9480/training_args.bin b/saves-gptj/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f139165191b69b6acd446eba57e7c585d2d5f2ac --- /dev/null +++ b/saves-gptj/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc389ea40cad6c4dcd2f02ce54fd9a15827f8f2e250bc84e2a2efce0c6cef596 +size 5112 diff --git a/saves-gptj/config.json b/saves-gptj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f91579c3f4d0a727ebdfcdd5a519a318b31cd88d --- /dev/null +++ b/saves-gptj/config.json @@ -0,0 +1,28 @@ +{ + "activation_function": "gelu_new", + "architectures": [ + "GPTJForCausalLM" + ], + "attn_pdrop": 0.0, + "bos_token_id": 50256, + "embd_pdrop": 0.0, + "eos_token_id": 50256, + "hidden_act": "gelu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "gptj", + "n_embd": 256, + "n_head": 4, + "n_inner": null, + "n_layer": 2, + "n_positions": 2048, + "num_key_value_heads": 4, + "resid_pdrop": 0.0, + "rotary_dim": 64, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-gptj/generation_config.json b/saves-gptj/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-gptj/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-gptj/model.safetensors b/saves-gptj/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f11246919536c047b60f76c32640258930f59c8 --- /dev/null +++ b/saves-gptj/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff804f70751681743f7d085e3c31b02d74e01a49724b04666d3852a348d8247 +size 8366216 diff --git a/saves-gptj/result.log b/saves-gptj/result.log new file mode 100644 index 0000000000000000000000000000000000000000..b1d6602bde1bc0842e00e73c837fa8212b644f68 --- /dev/null +++ b/saves-gptj/result.log @@ -0,0 +1 @@ +{'train_runtime': 3097.311, 'train_samples_per_second': 3133.881, 'train_steps_per_second': 3.061, 'train_loss': 1.7198248221401424, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-gptj/special_tokens_map.json b/saves-gptj/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-gptj/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-gptj/tokenizer.json b/saves-gptj/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-gptj/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-gptj/tokenizer_config.json b/saves-gptj/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-gptj/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-llama-bf16/checkpoint-9480/config.json b/saves-llama-bf16/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4dd56814721ed8dfc23e87878a8340d8b4f18fc0 --- /dev/null +++ b/saves-llama-bf16/checkpoint-9480/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.0", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-llama-bf16/checkpoint-9480/generation_config.json b/saves-llama-bf16/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..59c0f3c6815a220b6b4e852c51be873503df2ce0 --- /dev/null +++ b/saves-llama-bf16/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.0" +} diff --git a/saves-llama-bf16/checkpoint-9480/model.safetensors b/saves-llama-bf16/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a39d758e9c96da4a8db150c80986b1ad0946cf88 --- /dev/null +++ b/saves-llama-bf16/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1136b4c8d9f04036876e36dbe09274e731871c9b1081c7981af3932adceacea9 +size 8346712 diff --git a/saves-llama-bf16/checkpoint-9480/optimizer.pt b/saves-llama-bf16/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e42b12b66c56006d0623bf1d7cb5ca683a2f6a4 --- /dev/null +++ b/saves-llama-bf16/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65b30dcfb1affe96f623681c2de4091d36df84220e580a1f37c656ddd146af29 +size 16706402 diff --git a/saves-llama-bf16/checkpoint-9480/rng_state.pth b/saves-llama-bf16/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-llama-bf16/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-llama-bf16/checkpoint-9480/scheduler.pt b/saves-llama-bf16/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-llama-bf16/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-llama-bf16/checkpoint-9480/special_tokens_map.json b/saves-llama-bf16/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-llama-bf16/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-llama-bf16/checkpoint-9480/tokenizer.json b/saves-llama-bf16/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-llama-bf16/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-llama-bf16/checkpoint-9480/tokenizer_config.json b/saves-llama-bf16/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-llama-bf16/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-llama-bf16/checkpoint-9480/trainer_state.json b/saves-llama-bf16/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2b9dbb84a07c9a606a96fe3d9b70d5cd7f47719f --- /dev/null +++ b/saves-llama-bf16/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2676714658737183, + "learning_rate": 0.00015822784810126583, + "loss": 7.5053, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.2003611326217651, + "learning_rate": 0.00031645569620253165, + "loss": 6.918, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8659409284591675, + "learning_rate": 0.00047468354430379745, + "loss": 6.2612, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 1.5269256830215454, + "learning_rate": 0.0006329113924050633, + "loss": 5.7649, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.6712124943733215, + "learning_rate": 0.0007911392405063291, + "loss": 5.2793, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 1.05386221408844, + "learning_rate": 0.0009493670886075949, + "loss": 4.7686, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.1053828001022339, + "learning_rate": 0.0011075949367088608, + "loss": 4.37, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 2.077012300491333, + "learning_rate": 0.0012658227848101266, + "loss": 4.1252, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.867311418056488, + "learning_rate": 0.0014240506329113926, + "loss": 3.9293, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.7466582655906677, + "learning_rate": 0.0015, + "loss": 3.7734, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.5645921230316162, + "learning_rate": 0.0015, + "loss": 3.6186, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.9498663544654846, + "learning_rate": 0.0015, + "loss": 3.4933, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.9199284911155701, + "learning_rate": 0.0015, + "loss": 3.3998, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.0472668409347534, + "learning_rate": 0.0015, + "loss": 3.2958, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.6909369826316833, + "learning_rate": 0.0015, + "loss": 3.2169, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.7731783986091614, + "learning_rate": 0.0015, + "loss": 3.151, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.9465954303741455, + "learning_rate": 0.0015, + "loss": 3.0791, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.8251365423202515, + "learning_rate": 0.0015, + "loss": 3.0412, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.7760269045829773, + "learning_rate": 0.0015, + "loss": 2.9729, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.7894698977470398, + "learning_rate": 0.0015, + "loss": 2.9234, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.8450524210929871, + "learning_rate": 0.0015, + "loss": 2.8875, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.8511534333229065, + "learning_rate": 0.0015, + "loss": 2.8503, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.8807975053787231, + "learning_rate": 0.0015, + "loss": 2.8045, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.8002775311470032, + "learning_rate": 0.0015, + "loss": 2.7679, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.7201477885246277, + "learning_rate": 0.0015, + "loss": 2.7345, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.8155679106712341, + "learning_rate": 0.0015, + "loss": 2.6989, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.8021968603134155, + "learning_rate": 0.0015, + "loss": 2.6701, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.8564861416816711, + "learning_rate": 0.0015, + "loss": 2.6345, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.7679778933525085, + "learning_rate": 0.0015, + "loss": 2.6112, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.9016502499580383, + "learning_rate": 0.0015, + "loss": 2.5843, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.8038421273231506, + "learning_rate": 0.0015, + "loss": 2.5602, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.9400681853294373, + "learning_rate": 0.0015, + "loss": 2.5301, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.768626868724823, + "learning_rate": 0.0015, + "loss": 2.5243, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.8398343920707703, + "learning_rate": 0.0015, + "loss": 2.4998, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 1.5680025815963745, + "learning_rate": 0.0015, + "loss": 2.4575, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.9627473950386047, + "learning_rate": 0.0015, + "loss": 2.4406, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.7779473662376404, + "learning_rate": 0.0015, + "loss": 2.4276, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.7552341818809509, + "learning_rate": 0.0015, + "loss": 2.417, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.8301730751991272, + "learning_rate": 0.0015, + "loss": 2.3935, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.8521502614021301, + "learning_rate": 0.0015, + "loss": 2.3673, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 1.3798494338989258, + "learning_rate": 0.0015, + "loss": 2.3676, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.9804680347442627, + "learning_rate": 0.0015, + "loss": 2.3442, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.8778681755065918, + "learning_rate": 0.0015, + "loss": 2.3148, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.7500641942024231, + "learning_rate": 0.0015, + "loss": 2.3027, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.790119469165802, + "learning_rate": 0.0015, + "loss": 2.3061, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 1.0381672382354736, + "learning_rate": 0.0015, + "loss": 2.2829, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.8469134569168091, + "learning_rate": 0.0015, + "loss": 2.2644, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.8368005752563477, + "learning_rate": 0.0015, + "loss": 2.2597, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.7936886548995972, + "learning_rate": 0.0015, + "loss": 2.2322, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.7298189997673035, + "learning_rate": 0.0015, + "loss": 2.2324, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.7378261089324951, + "learning_rate": 0.0015, + "loss": 2.2145, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.9706739783287048, + "learning_rate": 0.0015, + "loss": 2.2, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.6752395629882812, + "learning_rate": 0.0015, + "loss": 2.191, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.7605948448181152, + "learning_rate": 0.0015, + "loss": 2.1713, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.7210273146629333, + "learning_rate": 0.0015, + "loss": 2.1739, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.7085642218589783, + "learning_rate": 0.0015, + "loss": 2.1389, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.7519193291664124, + "learning_rate": 0.0015, + "loss": 2.1395, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.8002046942710876, + "learning_rate": 0.0015, + "loss": 2.1431, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.8533918261528015, + "learning_rate": 0.0015, + "loss": 2.1157, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.8157392144203186, + "learning_rate": 0.0015, + "loss": 2.0995, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.8924641013145447, + "learning_rate": 0.0015, + "loss": 2.1053, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 1.154096245765686, + "learning_rate": 0.0015, + "loss": 2.0933, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.8395906090736389, + "learning_rate": 0.0015, + "loss": 2.0779, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.6407838463783264, + "learning_rate": 0.0015, + "loss": 2.0818, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.9125556945800781, + "learning_rate": 0.0015, + "loss": 2.0734, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.982740581035614, + "learning_rate": 0.0015, + "loss": 2.0657, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.7293259501457214, + "learning_rate": 0.0015, + "loss": 2.0456, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.7736948728561401, + "learning_rate": 0.0015, + "loss": 2.04, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.7887272834777832, + "learning_rate": 0.0015, + "loss": 2.0486, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.99362713098526, + "learning_rate": 0.0015, + "loss": 2.032, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.9181499481201172, + "learning_rate": 0.0015, + "loss": 2.0175, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.8582957983016968, + "learning_rate": 0.0015, + "loss": 2.002, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.9869788885116577, + "learning_rate": 0.0015, + "loss": 1.9936, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.6779254674911499, + "learning_rate": 0.0015, + "loss": 2.0055, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.7355097532272339, + "learning_rate": 0.0015, + "loss": 1.9857, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.9653441905975342, + "learning_rate": 0.0015, + "loss": 1.98, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.8166528344154358, + "learning_rate": 0.0015, + "loss": 1.9743, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 1.3542753458023071, + "learning_rate": 0.0015, + "loss": 1.9691, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.8158953785896301, + "learning_rate": 0.0015, + "loss": 1.9763, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.9691234827041626, + "learning_rate": 0.0015, + "loss": 1.9532, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 1.1619948148727417, + "learning_rate": 0.0015, + "loss": 1.9407, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.9012211561203003, + "learning_rate": 0.0015, + "loss": 1.9515, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.8661260008811951, + "learning_rate": 0.0015, + "loss": 1.9442, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.8007470965385437, + "learning_rate": 0.0015, + "loss": 1.9253, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.635250985622406, + "learning_rate": 0.0015, + "loss": 1.9238, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.8432357907295227, + "learning_rate": 0.0015, + "loss": 1.9251, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.7449591755867004, + "learning_rate": 0.0015, + "loss": 1.9227, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.8097949028015137, + "learning_rate": 0.0015, + "loss": 1.9159, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.9864391088485718, + "learning_rate": 0.0015, + "loss": 1.9075, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 1.0313844680786133, + "learning_rate": 0.0015, + "loss": 1.9069, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.7370740175247192, + "learning_rate": 0.0015, + "loss": 1.9077, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.6517109274864197, + "learning_rate": 0.0015, + "loss": 1.8955, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.7895755171775818, + "learning_rate": 0.0015, + "loss": 1.8807, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.9607167840003967, + "learning_rate": 0.0015, + "loss": 1.8903, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 1.2988543510437012, + "learning_rate": 0.0015, + "loss": 1.8738, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.7540029883384705, + "learning_rate": 0.0015, + "loss": 1.8798, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 1.196566104888916, + "learning_rate": 0.0015, + "loss": 1.8807, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.7288890480995178, + "learning_rate": 0.0015, + "loss": 1.8585, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 1.2709994316101074, + "learning_rate": 0.0015, + "loss": 1.8573, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.9284675717353821, + "learning_rate": 0.0015, + "loss": 1.8686, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.7057151794433594, + "learning_rate": 0.0015, + "loss": 1.8546, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 1.2227894067764282, + "learning_rate": 0.0015, + "loss": 1.8493, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.97975093126297, + "learning_rate": 0.0015, + "loss": 1.8515, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.6549012660980225, + "learning_rate": 0.0015, + "loss": 1.8398, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.200682282447815, + "learning_rate": 0.0015, + "loss": 1.8433, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.6727108955383301, + "learning_rate": 0.0015, + "loss": 1.8286, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.6396026611328125, + "learning_rate": 0.0015, + "loss": 1.8194, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.7154361605644226, + "learning_rate": 0.0015, + "loss": 1.8204, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.686638593673706, + "learning_rate": 0.0015, + "loss": 1.8271, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.704549252986908, + "learning_rate": 0.0015, + "loss": 1.8168, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.6743365526199341, + "learning_rate": 0.0015, + "loss": 1.8201, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.6587491631507874, + "learning_rate": 0.0015, + "loss": 1.8143, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.7203865647315979, + "learning_rate": 0.0015, + "loss": 1.8006, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.6614108085632324, + "learning_rate": 0.0015, + "loss": 1.8132, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.0738704204559326, + "learning_rate": 0.0015, + "loss": 1.7983, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.8673825860023499, + "learning_rate": 0.0015, + "loss": 1.8037, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.9415754079818726, + "learning_rate": 0.0015, + "loss": 1.7945, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.7221841216087341, + "learning_rate": 0.0015, + "loss": 1.787, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.7380742430686951, + "learning_rate": 0.0015, + "loss": 1.7768, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.7279195785522461, + "learning_rate": 0.0015, + "loss": 1.7908, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.6544088125228882, + "learning_rate": 0.0015, + "loss": 1.7863, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6239087581634521, + "learning_rate": 0.0015, + "loss": 1.7715, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.6921641826629639, + "learning_rate": 0.0015, + "loss": 1.7638, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.7242016792297363, + "learning_rate": 0.0015, + "loss": 1.7818, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.5922678709030151, + "learning_rate": 0.0015, + "loss": 1.7804, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.8560516834259033, + "learning_rate": 0.0015, + "loss": 1.7635, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.8240560293197632, + "learning_rate": 0.0015, + "loss": 1.7616, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.6459408402442932, + "learning_rate": 0.0015, + "loss": 1.7754, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.6974472999572754, + "learning_rate": 0.0015, + "loss": 1.7623, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.6455386281013489, + "learning_rate": 0.0015, + "loss": 1.7493, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.7955482006072998, + "learning_rate": 0.0015, + "loss": 1.7559, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.9156669974327087, + "learning_rate": 0.0015, + "loss": 1.7579, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.3482805490493774, + "learning_rate": 0.0015, + "loss": 1.7481, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.907547116279602, + "learning_rate": 0.0015, + "loss": 1.7529, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.7467724084854126, + "learning_rate": 0.0015, + "loss": 1.7533, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.7775065302848816, + "learning_rate": 0.0015, + "loss": 1.7427, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.7710546851158142, + "learning_rate": 0.0015, + "loss": 1.7416, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.7154306769371033, + "learning_rate": 0.0015, + "loss": 1.7298, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.8193673491477966, + "learning_rate": 0.0015, + "loss": 1.744, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 1.3405286073684692, + "learning_rate": 0.0015, + "loss": 1.727, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.7622034549713135, + "learning_rate": 0.0015, + "loss": 1.727, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.7322996854782104, + "learning_rate": 0.0015, + "loss": 1.7317, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.9026175141334534, + "learning_rate": 0.0015, + "loss": 1.7257, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.6308895349502563, + "learning_rate": 0.0015, + "loss": 1.721, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.6418623924255371, + "learning_rate": 0.0015, + "loss": 1.7226, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.6251792907714844, + "learning_rate": 0.0015, + "loss": 1.7199, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.9634207487106323, + "learning_rate": 0.0015, + "loss": 1.7171, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.6411728858947754, + "learning_rate": 0.0015, + "loss": 1.719, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.6816997528076172, + "learning_rate": 0.0015, + "loss": 1.7172, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.6228671073913574, + "learning_rate": 0.0015, + "loss": 1.7149, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.9275813102722168, + "learning_rate": 0.0015, + "loss": 1.7142, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.7076777815818787, + "learning_rate": 0.0015, + "loss": 1.7146, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.7567518949508667, + "learning_rate": 0.0015, + "loss": 1.6944, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.609905481338501, + "learning_rate": 0.0015, + "loss": 1.684, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.9018255472183228, + "learning_rate": 0.0015, + "loss": 1.6892, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.9008323550224304, + "learning_rate": 0.0015, + "loss": 1.6962, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.6872172951698303, + "learning_rate": 0.0015, + "loss": 1.6872, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.6834494471549988, + "learning_rate": 0.0015, + "loss": 1.696, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.773495614528656, + "learning_rate": 0.0015, + "loss": 1.689, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.7644145488739014, + "learning_rate": 0.0015, + "loss": 1.6952, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.6221880912780762, + "learning_rate": 0.0015, + "loss": 1.694, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.5826748609542847, + "learning_rate": 0.0015, + "loss": 1.6629, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.6441935300827026, + "learning_rate": 0.0015, + "loss": 1.6776, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.6708511114120483, + "learning_rate": 0.0015, + "loss": 1.6807, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.0699470043182373, + "learning_rate": 0.0015, + "loss": 1.6744, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.7459723949432373, + "learning_rate": 0.0015, + "loss": 1.6731, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.8440617918968201, + "learning_rate": 0.0015, + "loss": 1.6845, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.0241174697875977, + "learning_rate": 0.0015, + "loss": 1.6663, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.8902535438537598, + "learning_rate": 0.0015, + "loss": 1.6633, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.9664157032966614, + "learning_rate": 0.0015, + "loss": 1.6791, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.6553189754486084, + "learning_rate": 0.0015, + "loss": 1.6587, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.5719506144523621, + "learning_rate": 0.0015, + "loss": 1.6547, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.6185916662216187, + "learning_rate": 0.0015, + "loss": 1.6599, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.7124719619750977, + "learning_rate": 0.0015, + "loss": 1.6647, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.5817111730575562, + "learning_rate": 0.0015, + "loss": 1.679, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.6421247720718384, + "learning_rate": 0.0015, + "loss": 1.6606, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.6397530436515808, + "learning_rate": 0.0015, + "loss": 1.6635, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.6979488134384155, + "learning_rate": 0.0015, + "loss": 1.6623, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.737483024597168, + "learning_rate": 0.0015, + "loss": 1.6625, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.6587856411933899, + "learning_rate": 0.0015, + "loss": 1.6586, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.6534157991409302, + "learning_rate": 0.0015, + "loss": 1.6461, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.6248773336410522, + "learning_rate": 0.0015, + "loss": 1.6483, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.6421462297439575, + "learning_rate": 0.0015, + "loss": 1.6438, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.66046541929245, + "learning_rate": 0.0015, + "loss": 1.6418, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.8321159482002258, + "learning_rate": 0.0015, + "loss": 1.6457, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.8411294221878052, + "learning_rate": 0.0015, + "loss": 1.6512, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.7363362908363342, + "learning_rate": 0.0015, + "loss": 1.651, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.6523796319961548, + "learning_rate": 0.0015, + "loss": 1.6371, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.5669546723365784, + "learning_rate": 0.0015, + "loss": 1.6424, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 1.208672046661377, + "learning_rate": 0.0015, + "loss": 1.6411, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.7647756338119507, + "learning_rate": 0.0015, + "loss": 1.6449, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.6463038325309753, + "learning_rate": 0.0015, + "loss": 1.6372, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.6697849035263062, + "learning_rate": 0.0015, + "loss": 1.641, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 1.0806149244308472, + "learning_rate": 0.0015, + "loss": 1.6417, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.6005822420120239, + "learning_rate": 0.0015, + "loss": 1.6232, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.6279442310333252, + "learning_rate": 0.0015, + "loss": 1.6247, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.6158758997917175, + "learning_rate": 0.0015, + "loss": 1.6398, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.7227078676223755, + "learning_rate": 0.0015, + "loss": 1.6202, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.6707258820533752, + "learning_rate": 0.0015, + "loss": 1.6207, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.8268239498138428, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.7592812776565552, + "learning_rate": 0.0015, + "loss": 1.6293, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.6391326785087585, + "learning_rate": 0.0015, + "loss": 1.6389, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.7282036542892456, + "learning_rate": 0.0015, + "loss": 1.6245, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.6061456203460693, + "learning_rate": 0.0015, + "loss": 1.6237, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.6838663816452026, + "learning_rate": 0.0015, + "loss": 1.6235, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.8125630617141724, + "learning_rate": 0.0015, + "loss": 1.6102, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.7961479425430298, + "learning_rate": 0.0015, + "loss": 1.6171, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.7218239903450012, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.6032005548477173, + "learning_rate": 0.0015, + "loss": 1.6091, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.6218113899230957, + "learning_rate": 0.0015, + "loss": 1.6176, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 1.0019512176513672, + "learning_rate": 0.0015, + "loss": 1.6125, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6041945815086365, + "learning_rate": 0.0015, + "loss": 1.608, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.663590669631958, + "learning_rate": 0.0015, + "loss": 1.6105, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6677202582359314, + "learning_rate": 0.0015, + "loss": 1.6022, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.5931677222251892, + "learning_rate": 0.0015, + "loss": 1.6089, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.74836665391922, + "learning_rate": 0.0015, + "loss": 1.6223, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.5746312737464905, + "learning_rate": 0.0015, + "loss": 1.6093, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.6232902407646179, + "learning_rate": 0.0015, + "loss": 1.592, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.6280152797698975, + "learning_rate": 0.0015, + "loss": 1.6094, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.6640642881393433, + "learning_rate": 0.0015, + "loss": 1.6091, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.5953279137611389, + "learning_rate": 0.0015, + "loss": 1.5918, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.5875793099403381, + "learning_rate": 0.0015, + "loss": 1.6045, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.9849272966384888, + "learning_rate": 0.0015, + "loss": 1.6012, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.7007441520690918, + "learning_rate": 0.0015, + "loss": 1.5971, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.6199012398719788, + "learning_rate": 0.0015, + "loss": 1.5936, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.5989089608192444, + "learning_rate": 0.0015, + "loss": 1.5847, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.633146345615387, + "learning_rate": 0.0015, + "loss": 1.6186, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 1.0054070949554443, + "learning_rate": 0.0015, + "loss": 1.6048, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 1.1697497367858887, + "learning_rate": 0.0015, + "loss": 1.5917, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.8472827672958374, + "learning_rate": 0.0015, + "loss": 1.5896, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.5943598747253418, + "learning_rate": 0.0015, + "loss": 1.59, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.5772278308868408, + "learning_rate": 0.0015, + "loss": 1.5992, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.7935953736305237, + "learning_rate": 0.0015, + "loss": 1.5797, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.7110589146614075, + "learning_rate": 0.0015, + "loss": 1.5774, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.7481532692909241, + "learning_rate": 0.0015, + "loss": 1.5866, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.6471440196037292, + "learning_rate": 0.0015, + "loss": 1.5894, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.6757197976112366, + "learning_rate": 0.0015, + "loss": 1.5859, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.7993605136871338, + "learning_rate": 0.0015, + "loss": 1.5827, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.8421042561531067, + "learning_rate": 0.0015, + "loss": 1.583, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.7545788288116455, + "learning_rate": 0.0015, + "loss": 1.5842, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.6350547671318054, + "learning_rate": 0.0015, + "loss": 1.5863, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.6043763756752014, + "learning_rate": 0.0015, + "loss": 1.5789, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.7645569443702698, + "learning_rate": 0.0015, + "loss": 1.5765, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 1.0355976819992065, + "learning_rate": 0.0015, + "loss": 1.5867, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6011759042739868, + "learning_rate": 0.0015, + "loss": 1.572, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.5543169379234314, + "learning_rate": 0.0015, + "loss": 1.5837, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.8837592601776123, + "learning_rate": 0.0015, + "loss": 1.5794, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.8439568877220154, + "learning_rate": 0.0015, + "loss": 1.5809, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.5690454840660095, + "learning_rate": 0.0015, + "loss": 1.5666, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.6714497804641724, + "learning_rate": 0.0015, + "loss": 1.5695, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.5516358613967896, + "learning_rate": 0.0015, + "loss": 1.5713, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.9065622687339783, + "learning_rate": 0.0015, + "loss": 1.5756, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.6845459938049316, + "learning_rate": 0.0015, + "loss": 1.5709, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 1.046621322631836, + "learning_rate": 0.0015, + "loss": 1.5743, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.7775939702987671, + "learning_rate": 0.0015, + "loss": 1.5674, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.5784750580787659, + "learning_rate": 0.0015, + "loss": 1.575, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.7423901557922363, + "learning_rate": 0.0015, + "loss": 1.5701, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.6351116895675659, + "learning_rate": 0.0015, + "loss": 1.5666, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.5681401491165161, + "learning_rate": 0.0015, + "loss": 1.576, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.5756617188453674, + "learning_rate": 0.0015, + "loss": 1.5729, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.5904302000999451, + "learning_rate": 0.0015, + "loss": 1.5696, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.5718812346458435, + "learning_rate": 0.0015, + "loss": 1.5707, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.671302855014801, + "learning_rate": 0.0015, + "loss": 1.559, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.6097699999809265, + "learning_rate": 0.0015, + "loss": 1.5717, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.7234987020492554, + "learning_rate": 0.0015, + "loss": 1.5647, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.5396336913108826, + "learning_rate": 0.0015, + "loss": 1.5564, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.625312089920044, + "learning_rate": 0.0015, + "loss": 1.558, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.5907183885574341, + "learning_rate": 0.0015, + "loss": 1.5596, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.6804481744766235, + "learning_rate": 0.0015, + "loss": 1.5609, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.5979005694389343, + "learning_rate": 0.0015, + "loss": 1.5539, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.6254168748855591, + "learning_rate": 0.0015, + "loss": 1.5609, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.6254093647003174, + "learning_rate": 0.0015, + "loss": 1.5612, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.6794012784957886, + "learning_rate": 0.0015, + "loss": 1.5413, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.8687151074409485, + "learning_rate": 0.0015, + "loss": 1.5545, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.8657436966896057, + "learning_rate": 0.0015, + "loss": 1.5591, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.9693871140480042, + "learning_rate": 0.0015, + "loss": 1.5637, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.568263053894043, + "learning_rate": 0.0015, + "loss": 1.5618, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.6327154040336609, + "learning_rate": 0.0015, + "loss": 1.5574, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.5310362577438354, + "learning_rate": 0.0015, + "loss": 1.5556, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.649588406085968, + "learning_rate": 0.0015, + "loss": 1.5503, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.7648938298225403, + "learning_rate": 0.0015, + "loss": 1.5546, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.6821872591972351, + "learning_rate": 0.0015, + "loss": 1.562, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 1.2423720359802246, + "learning_rate": 0.0015, + "loss": 1.532, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.9050655961036682, + "learning_rate": 0.0015, + "loss": 1.5427, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.6564595103263855, + "learning_rate": 0.0015, + "loss": 1.5393, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.6022120118141174, + "learning_rate": 0.0015, + "loss": 1.5474, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.7589964866638184, + "learning_rate": 0.0015, + "loss": 1.5463, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.7630635499954224, + "learning_rate": 0.0015, + "loss": 1.5363, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.590700626373291, + "learning_rate": 0.0015, + "loss": 1.5396, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.6236220598220825, + "learning_rate": 0.0015, + "loss": 1.5484, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.6357071399688721, + "learning_rate": 0.0015, + "loss": 1.5469, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.5876275897026062, + "learning_rate": 0.0015, + "loss": 1.541, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.5511279106140137, + "learning_rate": 0.0015, + "loss": 1.5342, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.8370975852012634, + "learning_rate": 0.0015, + "loss": 1.5435, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.5435685515403748, + "learning_rate": 0.0015, + "loss": 1.5553, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.8278359770774841, + "learning_rate": 0.0015, + "loss": 1.5333, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.8868322968482971, + "learning_rate": 0.0015, + "loss": 1.5247, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.6485639810562134, + "learning_rate": 0.0015, + "loss": 1.5306, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.5989826917648315, + "learning_rate": 0.0015, + "loss": 1.5462, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.7159509658813477, + "learning_rate": 0.0015, + "loss": 1.529, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.5925466418266296, + "learning_rate": 0.0015, + "loss": 1.5397, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.6697542667388916, + "learning_rate": 0.0015, + "loss": 1.5396, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6349740624427795, + "learning_rate": 0.0015, + "loss": 1.5281, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.7143148183822632, + "learning_rate": 0.0015, + "loss": 1.5375, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.6477770805358887, + "learning_rate": 0.0015, + "loss": 1.5315, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5939271450042725, + "learning_rate": 0.0015, + "loss": 1.5412, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.6116764545440674, + "learning_rate": 0.0015, + "loss": 1.5317, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.806381344795227, + "learning_rate": 0.0015, + "loss": 1.5217, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.5201924443244934, + "learning_rate": 0.0015, + "loss": 1.5391, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.5994991660118103, + "learning_rate": 0.0015, + "loss": 1.5218, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.7356926798820496, + "learning_rate": 0.0015, + "loss": 1.5286, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.7890620231628418, + "learning_rate": 0.0015, + "loss": 1.5336, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.7932880520820618, + "learning_rate": 0.0015, + "loss": 1.5446, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.5750153064727783, + "learning_rate": 0.0015, + "loss": 1.5207, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.5573716759681702, + "learning_rate": 0.0015, + "loss": 1.5316, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.6410278081893921, + "learning_rate": 0.0015, + "loss": 1.5311, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.7242074012756348, + "learning_rate": 0.0015, + "loss": 1.5332, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.5749971866607666, + "learning_rate": 0.0015, + "loss": 1.5265, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.6374623775482178, + "learning_rate": 0.0015, + "loss": 1.5273, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.6034247875213623, + "learning_rate": 0.0015, + "loss": 1.5209, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.6909670829772949, + "learning_rate": 0.0015, + "loss": 1.5359, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.6684272289276123, + "learning_rate": 0.0015, + "loss": 1.5278, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.6351376175880432, + "learning_rate": 0.0015, + "loss": 1.5197, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.5349257588386536, + "learning_rate": 0.0015, + "loss": 1.5015, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.5453034043312073, + "learning_rate": 0.0015, + "loss": 1.5251, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5507636666297913, + "learning_rate": 0.0015, + "loss": 1.513, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.6290556192398071, + "learning_rate": 0.0015, + "loss": 1.534, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.6522665619850159, + "learning_rate": 0.0015, + "loss": 1.5316, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 1.059340476989746, + "learning_rate": 0.0015, + "loss": 1.5167, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.6655550599098206, + "learning_rate": 0.0015, + "loss": 1.5245, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.5322595834732056, + "learning_rate": 0.0015, + "loss": 1.5218, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.6133769750595093, + "learning_rate": 0.0015, + "loss": 1.5255, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.5492916107177734, + "learning_rate": 0.0015, + "loss": 1.5158, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.6243652105331421, + "learning_rate": 0.0015, + "loss": 1.521, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.5818319320678711, + "learning_rate": 0.0015, + "loss": 1.5124, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.7079645991325378, + "learning_rate": 0.0015, + "loss": 1.5143, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.5255757570266724, + "learning_rate": 0.0015, + "loss": 1.5201, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.5484145283699036, + "learning_rate": 0.0015, + "loss": 1.5142, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.6683304309844971, + "learning_rate": 0.0015, + "loss": 1.5106, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.7573396563529968, + "learning_rate": 0.0015, + "loss": 1.52, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.5544288158416748, + "learning_rate": 0.0015, + "loss": 1.5153, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.9378796219825745, + "learning_rate": 0.0015, + "loss": 1.4979, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.6284019351005554, + "learning_rate": 0.0015, + "loss": 1.5141, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.5269809365272522, + "learning_rate": 0.0015, + "loss": 1.5099, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.645771861076355, + "learning_rate": 0.0015, + "loss": 1.5085, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.6537370681762695, + "learning_rate": 0.0015, + "loss": 1.5015, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.8419973850250244, + "learning_rate": 0.0015, + "loss": 1.5197, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.6124632954597473, + "learning_rate": 0.0015, + "loss": 1.5231, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.7231237888336182, + "learning_rate": 0.0015, + "loss": 1.5162, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.6742547750473022, + "learning_rate": 0.0015, + "loss": 1.4955, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6121177077293396, + "learning_rate": 0.0015, + "loss": 1.5186, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.5688107013702393, + "learning_rate": 0.0015, + "loss": 1.5136, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.7281293272972107, + "learning_rate": 0.0015, + "loss": 1.5223, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.8605830073356628, + "learning_rate": 0.0015, + "loss": 1.5187, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.5592063665390015, + "learning_rate": 0.0015, + "loss": 1.5033, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.596141517162323, + "learning_rate": 0.0015, + "loss": 1.505, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.8582192659378052, + "learning_rate": 0.0015, + "loss": 1.5002, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.6140157580375671, + "learning_rate": 0.0015, + "loss": 1.504, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.6526047587394714, + "learning_rate": 0.0015, + "loss": 1.5155, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.7063454389572144, + "learning_rate": 0.0015, + "loss": 1.4983, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.8851373791694641, + "learning_rate": 0.0015, + "loss": 1.5008, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.5217905640602112, + "learning_rate": 0.0015, + "loss": 1.5014, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.6794372797012329, + "learning_rate": 0.0015, + "loss": 1.5011, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 1.064008355140686, + "learning_rate": 0.0015, + "loss": 1.5008, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.6575676202774048, + "learning_rate": 0.0015, + "loss": 1.5146, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.8898921012878418, + "learning_rate": 0.0015, + "loss": 1.5028, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.6219639778137207, + "learning_rate": 0.0015, + "loss": 1.5021, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.7778682112693787, + "learning_rate": 0.0015, + "loss": 1.5028, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.7537215352058411, + "learning_rate": 0.0015, + "loss": 1.5085, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6599957346916199, + "learning_rate": 0.0015, + "loss": 1.5134, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.5223343968391418, + "learning_rate": 0.0015, + "loss": 1.4994, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.5719749927520752, + "learning_rate": 0.0015, + "loss": 1.4789, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.6128490567207336, + "learning_rate": 0.0015, + "loss": 1.4964, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.5783220529556274, + "learning_rate": 0.0015, + "loss": 1.4992, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.7674107551574707, + "learning_rate": 0.0015, + "loss": 1.5038, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.9251591563224792, + "learning_rate": 0.0015, + "loss": 1.4946, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.5669074058532715, + "learning_rate": 0.0015, + "loss": 1.5076, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.504377007484436, + "learning_rate": 0.0015, + "loss": 1.4931, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.5681625008583069, + "learning_rate": 0.0015, + "loss": 1.4978, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.6606709361076355, + "learning_rate": 0.0015, + "loss": 1.5052, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.559603214263916, + "learning_rate": 0.0015, + "loss": 1.4927, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5750709176063538, + "learning_rate": 0.0015, + "loss": 1.4838, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.5639090538024902, + "learning_rate": 0.0015, + "loss": 1.487, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.5645895004272461, + "learning_rate": 0.0015, + "loss": 1.5072, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.5773900747299194, + "learning_rate": 0.0015, + "loss": 1.4869, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.8146610260009766, + "learning_rate": 0.0015, + "loss": 1.5024, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.5846198201179504, + "learning_rate": 0.0015, + "loss": 1.491, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.5263673663139343, + "learning_rate": 0.0015, + "loss": 1.4824, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.8206654191017151, + "learning_rate": 0.0015, + "loss": 1.481, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.7633253931999207, + "learning_rate": 0.0015, + "loss": 1.4861, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.6088473200798035, + "learning_rate": 0.0015, + "loss": 1.4879, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.5344824194908142, + "learning_rate": 0.0015, + "loss": 1.4833, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.55226069688797, + "learning_rate": 0.0015, + "loss": 1.4882, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.5827703475952148, + "learning_rate": 0.0015, + "loss": 1.4817, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.7387348413467407, + "learning_rate": 0.0015, + "loss": 1.4907, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.9242419600486755, + "learning_rate": 0.0015, + "loss": 1.49, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.8952328562736511, + "learning_rate": 0.0015, + "loss": 1.4816, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.8109372854232788, + "learning_rate": 0.0015, + "loss": 1.4909, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.8116260170936584, + "learning_rate": 0.0015, + "loss": 1.4831, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.7635520100593567, + "learning_rate": 0.0015, + "loss": 1.4992, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.5781578421592712, + "learning_rate": 0.0015, + "loss": 1.4837, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.5823401808738708, + "learning_rate": 0.0015, + "loss": 1.486, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.7159461379051208, + "learning_rate": 0.0015, + "loss": 1.4765, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.546384871006012, + "learning_rate": 0.0015, + "loss": 1.4885, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5313042998313904, + "learning_rate": 0.0015, + "loss": 1.4882, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.7160599827766418, + "learning_rate": 0.0015, + "loss": 1.494, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.6895796656608582, + "learning_rate": 0.0015, + "loss": 1.4852, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.6481873393058777, + "learning_rate": 0.0015, + "loss": 1.4949, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.657903790473938, + "learning_rate": 0.0015, + "loss": 1.4835, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.5603094100952148, + "learning_rate": 0.0015, + "loss": 1.4845, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.575001060962677, + "learning_rate": 0.0015, + "loss": 1.4762, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.7686195373535156, + "learning_rate": 0.0015, + "loss": 1.4823, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.6176062822341919, + "learning_rate": 0.0015, + "loss": 1.4769, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.6297482252120972, + "learning_rate": 0.0015, + "loss": 1.4776, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.531175434589386, + "learning_rate": 0.0015, + "loss": 1.4822, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.5193732380867004, + "learning_rate": 0.0015, + "loss": 1.4768, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.7521961331367493, + "learning_rate": 0.0015, + "loss": 1.4763, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5999307632446289, + "learning_rate": 0.0015, + "loss": 1.4728, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.7637180685997009, + "learning_rate": 0.0015, + "loss": 1.4779, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.8921180367469788, + "learning_rate": 0.0015, + "loss": 1.4832, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.586078941822052, + "learning_rate": 0.0015, + "loss": 1.4715, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.5220807790756226, + "learning_rate": 0.0015, + "loss": 1.4801, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.7704445123672485, + "learning_rate": 0.0015, + "loss": 1.476, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.6387608647346497, + "learning_rate": 0.0015, + "loss": 1.4732, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.5596318244934082, + "learning_rate": 0.0015, + "loss": 1.48, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.6356242895126343, + "learning_rate": 0.0015, + "loss": 1.4829, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.5630121231079102, + "learning_rate": 0.0015, + "loss": 1.4739, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 1.0371557474136353, + "learning_rate": 0.0015, + "loss": 1.4706, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 1.0211822986602783, + "learning_rate": 0.0015, + "loss": 1.4806, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.6440302133560181, + "learning_rate": 0.0015, + "loss": 1.4777, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5308347940444946, + "learning_rate": 0.0015, + "loss": 1.4746, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.6810348629951477, + "learning_rate": 0.0015, + "loss": 1.4699, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.5810175538063049, + "learning_rate": 0.0015, + "loss": 1.477, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.8294820189476013, + "learning_rate": 0.0015, + "loss": 1.4705, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.9704918265342712, + "learning_rate": 0.0015, + "loss": 1.474, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.6052191853523254, + "learning_rate": 0.0015, + "loss": 1.4916, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.5454020500183105, + "learning_rate": 0.0015, + "loss": 1.4659, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.5632649064064026, + "learning_rate": 0.0015, + "loss": 1.4715, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5132173299789429, + "learning_rate": 0.0015, + "loss": 1.4668, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.5825719237327576, + "learning_rate": 0.0015, + "loss": 1.474, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.7798466682434082, + "learning_rate": 0.0015, + "loss": 1.4886, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.6333516836166382, + "learning_rate": 0.0015, + "loss": 1.4703, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.6412692666053772, + "learning_rate": 0.0015, + "loss": 1.4733, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.6523199081420898, + "learning_rate": 0.0015, + "loss": 1.4684, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.5513646602630615, + "learning_rate": 0.0015, + "loss": 1.4784, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.5289167761802673, + "learning_rate": 0.0015, + "loss": 1.462, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.49874287843704224, + "learning_rate": 0.0015, + "loss": 1.4709, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.62343829870224, + "learning_rate": 0.0015, + "loss": 1.479, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.7276079058647156, + "learning_rate": 0.0015, + "loss": 1.4666, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.8593319058418274, + "learning_rate": 0.0015, + "loss": 1.4644, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.555818498134613, + "learning_rate": 0.0015, + "loss": 1.4707, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.5740022659301758, + "learning_rate": 0.0015, + "loss": 1.4533, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.518110990524292, + "learning_rate": 0.0015, + "loss": 1.4746, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.5647834539413452, + "learning_rate": 0.0015, + "loss": 1.4749, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.5966101288795471, + "learning_rate": 0.0015, + "loss": 1.4702, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.5421002507209778, + "learning_rate": 0.0015, + "loss": 1.4623, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.5057939887046814, + "learning_rate": 0.0015, + "loss": 1.4416, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.647251546382904, + "learning_rate": 0.0015, + "loss": 1.465, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.580030083656311, + "learning_rate": 0.0015, + "loss": 1.45, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.4939667880535126, + "learning_rate": 0.0015, + "loss": 1.4731, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.7751357555389404, + "learning_rate": 0.0015, + "loss": 1.4759, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.6056842803955078, + "learning_rate": 0.0015, + "loss": 1.4524, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.5608704090118408, + "learning_rate": 0.0015, + "loss": 1.4694, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.5101577639579773, + "learning_rate": 0.0015, + "loss": 1.4703, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.5333223342895508, + "learning_rate": 0.0015, + "loss": 1.4721, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.5126179456710815, + "learning_rate": 0.0015, + "loss": 1.4595, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.5470878481864929, + "learning_rate": 0.0015, + "loss": 1.46, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.48143497109413147, + "learning_rate": 0.0015, + "loss": 1.4567, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.6167868971824646, + "learning_rate": 0.0015, + "loss": 1.4511, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.5401530265808105, + "learning_rate": 0.0015, + "loss": 1.4633, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.5182884931564331, + "learning_rate": 0.0015, + "loss": 1.4637, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.794460654258728, + "learning_rate": 0.0015, + "loss": 1.4606, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.5684305429458618, + "learning_rate": 0.0015, + "loss": 1.4569, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.8572563529014587, + "learning_rate": 0.0015, + "loss": 1.456, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.5929309725761414, + "learning_rate": 0.0015, + "loss": 1.4772, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.5886070728302002, + "learning_rate": 0.0015, + "loss": 1.4595, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.6663429141044617, + "learning_rate": 0.0015, + "loss": 1.4583, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.6770031452178955, + "learning_rate": 0.0015, + "loss": 1.4684, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.7095074653625488, + "learning_rate": 0.0015, + "loss": 1.453, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.5950344800949097, + "learning_rate": 0.0015, + "loss": 1.4563, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.4960660934448242, + "learning_rate": 0.0015, + "loss": 1.458, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.6155035495758057, + "learning_rate": 0.0015, + "loss": 1.4776, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.6673266887664795, + "learning_rate": 0.0015, + "loss": 1.4596, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.5229274034500122, + "learning_rate": 0.0015, + "loss": 1.4527, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.5223971605300903, + "learning_rate": 0.0015, + "loss": 1.465, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.5979974269866943, + "learning_rate": 0.0015, + "loss": 1.4532, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.8200761079788208, + "learning_rate": 0.0015, + "loss": 1.446, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.5326952338218689, + "learning_rate": 0.0015, + "loss": 1.4682, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.49913546442985535, + "learning_rate": 0.0015, + "loss": 1.4582, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.5209238529205322, + "learning_rate": 0.0015, + "loss": 1.467, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6158308386802673, + "learning_rate": 0.0015, + "loss": 1.4612, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.6039217114448547, + "learning_rate": 0.0015, + "loss": 1.4447, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.798934280872345, + "learning_rate": 0.0015, + "loss": 1.4591, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.7106212973594666, + "learning_rate": 0.0015, + "loss": 1.4563, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 1.0009483098983765, + "learning_rate": 0.0015, + "loss": 1.4645, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.5082882046699524, + "learning_rate": 0.0015, + "loss": 1.4468, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.6346124410629272, + "learning_rate": 0.0015, + "loss": 1.4545, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.5069888234138489, + "learning_rate": 0.0015, + "loss": 1.4478, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.5240869522094727, + "learning_rate": 0.0015, + "loss": 1.4498, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.6287071704864502, + "learning_rate": 0.0015, + "loss": 1.4621, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.5667035579681396, + "learning_rate": 0.0015, + "loss": 1.4547, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.6018599271774292, + "learning_rate": 0.0015, + "loss": 1.4489, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.5398070216178894, + "learning_rate": 0.0015, + "loss": 1.4338, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.48505374789237976, + "learning_rate": 0.0015, + "loss": 1.4617, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.6518738269805908, + "learning_rate": 0.0015, + "loss": 1.4499, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.517331063747406, + "learning_rate": 0.0015, + "loss": 1.4584, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.6288743615150452, + "learning_rate": 0.0015, + "loss": 1.4501, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.5427009463310242, + "learning_rate": 0.0015, + "loss": 1.461, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.4915584921836853, + "learning_rate": 0.0015, + "loss": 1.456, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.5427595973014832, + "learning_rate": 0.0015, + "loss": 1.4363, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.6097490787506104, + "learning_rate": 0.0015, + "loss": 1.4542, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.8088488578796387, + "learning_rate": 0.0015, + "loss": 1.4494, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5150653719902039, + "learning_rate": 0.0015, + "loss": 1.4483, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.7246208190917969, + "learning_rate": 0.0015, + "loss": 1.4626, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5539005994796753, + "learning_rate": 0.0015, + "loss": 1.458, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5675966739654541, + "learning_rate": 0.0015, + "loss": 1.4515, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5135599374771118, + "learning_rate": 0.0015, + "loss": 1.4507, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.5891128778457642, + "learning_rate": 0.0015, + "loss": 1.4594, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.6899130940437317, + "learning_rate": 0.0015, + "loss": 1.4451, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.6410430669784546, + "learning_rate": 0.0015, + "loss": 1.4568, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.5227227210998535, + "learning_rate": 0.0015, + "loss": 1.4404, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.5342944264411926, + "learning_rate": 0.0015, + "loss": 1.4523, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.5157833099365234, + "learning_rate": 0.0015, + "loss": 1.4428, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.5259016752243042, + "learning_rate": 0.0015, + "loss": 1.449, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.7924625873565674, + "learning_rate": 0.0015, + "loss": 1.4433, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.5321140289306641, + "learning_rate": 0.0015, + "loss": 1.4486, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.6185385584831238, + "learning_rate": 0.0015, + "loss": 1.455, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 1.0393201112747192, + "learning_rate": 0.0015, + "loss": 1.4445, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.650523841381073, + "learning_rate": 0.0015, + "loss": 1.4475, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.836540162563324, + "learning_rate": 0.0015, + "loss": 1.453, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 1.0315202474594116, + "learning_rate": 0.0015, + "loss": 1.4418, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.5890907049179077, + "learning_rate": 0.0015, + "loss": 1.4407, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.5192587971687317, + "learning_rate": 0.0015, + "loss": 1.4485, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.48271578550338745, + "learning_rate": 0.0015, + "loss": 1.4432, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.6865177750587463, + "learning_rate": 0.0015, + "loss": 1.4342, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.6784921288490295, + "learning_rate": 0.0015, + "loss": 1.445, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.5662094354629517, + "learning_rate": 0.0015, + "loss": 1.445, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.6252244710922241, + "learning_rate": 0.0015, + "loss": 1.4377, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.7630616426467896, + "learning_rate": 0.0015, + "loss": 1.4406, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.5907131433486938, + "learning_rate": 0.0015, + "loss": 1.4422, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.555400013923645, + "learning_rate": 0.0015, + "loss": 1.4412, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.6746355891227722, + "learning_rate": 0.0015, + "loss": 1.4403, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.7087790369987488, + "learning_rate": 0.0015, + "loss": 1.4336, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.4771483540534973, + "learning_rate": 0.0015, + "loss": 1.4303, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.548286497592926, + "learning_rate": 0.0015, + "loss": 1.4394, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.5101822018623352, + "learning_rate": 0.0015, + "loss": 1.4403, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.5782089233398438, + "learning_rate": 0.0015, + "loss": 1.4394, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.8748046159744263, + "learning_rate": 0.0015, + "loss": 1.4433, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.4744136333465576, + "learning_rate": 0.0015, + "loss": 1.4494, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.7095358967781067, + "learning_rate": 0.0015, + "loss": 1.4391, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 1.0599972009658813, + "learning_rate": 0.0015, + "loss": 1.4435, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.4942331910133362, + "learning_rate": 0.0015, + "loss": 1.4365, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.6657361388206482, + "learning_rate": 0.0015, + "loss": 1.4316, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.5583669543266296, + "learning_rate": 0.0015, + "loss": 1.4285, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.6270962357521057, + "learning_rate": 0.0015, + "loss": 1.4464, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.49382930994033813, + "learning_rate": 0.0015, + "loss": 1.4494, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.526191234588623, + "learning_rate": 0.0015, + "loss": 1.4378, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.507631778717041, + "learning_rate": 0.0015, + "loss": 1.4344, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.5356090664863586, + "learning_rate": 0.0015, + "loss": 1.4386, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.570796012878418, + "learning_rate": 0.0015, + "loss": 1.4336, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.7144193053245544, + "learning_rate": 0.0015, + "loss": 1.4387, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.5034265518188477, + "learning_rate": 0.0015, + "loss": 1.4474, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.663703441619873, + "learning_rate": 0.0015, + "loss": 1.4407, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.6947717070579529, + "learning_rate": 0.0015, + "loss": 1.4509, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.6027748584747314, + "learning_rate": 0.0015, + "loss": 1.4449, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.533505916595459, + "learning_rate": 0.0015, + "loss": 1.4399, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6265408396720886, + "learning_rate": 0.0015, + "loss": 1.4487, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.5134779810905457, + "learning_rate": 0.0015, + "loss": 1.4483, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.9670719504356384, + "learning_rate": 0.0015, + "loss": 1.4278, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.5491591095924377, + "learning_rate": 0.0015, + "loss": 1.4326, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.6348828673362732, + "learning_rate": 0.0015, + "loss": 1.4372, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.6312845945358276, + "learning_rate": 0.0015, + "loss": 1.4406, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.8930693864822388, + "learning_rate": 0.0015, + "loss": 1.4366, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.5609354376792908, + "learning_rate": 0.0015, + "loss": 1.434, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.6328973770141602, + "learning_rate": 0.0015, + "loss": 1.4125, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.6302005648612976, + "learning_rate": 0.0015, + "loss": 1.42, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.5807915925979614, + "learning_rate": 0.0015, + "loss": 1.423, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.6533567309379578, + "learning_rate": 0.0015, + "loss": 1.434, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.6333770751953125, + "learning_rate": 0.0015, + "loss": 1.4273, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.5491467118263245, + "learning_rate": 0.0015, + "loss": 1.4333, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.5394753217697144, + "learning_rate": 0.0015, + "loss": 1.4384, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.690026044845581, + "learning_rate": 0.0015, + "loss": 1.4497, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.6209574937820435, + "learning_rate": 0.0015, + "loss": 1.4323, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.6453071236610413, + "learning_rate": 0.0015, + "loss": 1.4437, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.5997976660728455, + "learning_rate": 0.0015, + "loss": 1.4476, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.5365158915519714, + "learning_rate": 0.0015, + "loss": 1.4325, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.49893149733543396, + "learning_rate": 0.0015, + "loss": 1.4268, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.5324397087097168, + "learning_rate": 0.0015, + "loss": 1.44, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.4756423830986023, + "learning_rate": 0.0015, + "loss": 1.429, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.4641866981983185, + "learning_rate": 0.0015, + "loss": 1.4314, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.5774140954017639, + "learning_rate": 0.0015, + "loss": 1.4254, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.5534369945526123, + "learning_rate": 0.0015, + "loss": 1.4215, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.874764621257782, + "learning_rate": 0.0015, + "loss": 1.4306, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.7962957620620728, + "learning_rate": 0.0015, + "loss": 1.4364, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.5455976128578186, + "learning_rate": 0.0015, + "loss": 1.4254, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.5671746134757996, + "learning_rate": 0.0015, + "loss": 1.4317, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5362151861190796, + "learning_rate": 0.0015, + "loss": 1.4336, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5267981290817261, + "learning_rate": 0.0015, + "loss": 1.4147, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.5344679355621338, + "learning_rate": 0.0015, + "loss": 1.4348, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.7529051303863525, + "learning_rate": 0.0015, + "loss": 1.4415, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.5814988017082214, + "learning_rate": 0.0015, + "loss": 1.429, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.6331633925437927, + "learning_rate": 0.0015, + "loss": 1.4312, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.5392013788223267, + "learning_rate": 0.0015, + "loss": 1.4144, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.5011127591133118, + "learning_rate": 0.0015, + "loss": 1.4325, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.511524498462677, + "learning_rate": 0.0015, + "loss": 1.4165, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.5531562566757202, + "learning_rate": 0.0015, + "loss": 1.4353, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.6578499674797058, + "learning_rate": 0.0015, + "loss": 1.424, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.5781983733177185, + "learning_rate": 0.0015, + "loss": 1.4241, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.6368937492370605, + "learning_rate": 0.0015, + "loss": 1.4255, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.7107160687446594, + "learning_rate": 0.0015, + "loss": 1.422, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.5551614165306091, + "learning_rate": 0.0015, + "loss": 1.4327, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.5038522481918335, + "learning_rate": 0.0015, + "loss": 1.4274, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.5798343420028687, + "learning_rate": 0.0015, + "loss": 1.4241, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 1.0596702098846436, + "learning_rate": 0.0015, + "loss": 1.4247, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.4872379004955292, + "learning_rate": 0.0015, + "loss": 1.4356, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.5309109687805176, + "learning_rate": 0.0015, + "loss": 1.4295, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.5368584990501404, + "learning_rate": 0.0014834368975312174, + "loss": 1.4067, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.6451349258422852, + "learning_rate": 0.0014629899726345957, + "loss": 1.4299, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.6021255254745483, + "learning_rate": 0.0014428248775471316, + "loss": 1.4318, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.4939712584018707, + "learning_rate": 0.00142293772767289, + "loss": 1.4232, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.6264420747756958, + "learning_rate": 0.001403324691959192, + "loss": 1.4183, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.6540963649749756, + "learning_rate": 0.0013839819921586025, + "loss": 1.4234, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.8758813142776489, + "learning_rate": 0.0013649059021010894, + "loss": 1.4117, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.6108523607254028, + "learning_rate": 0.0013460927469762154, + "loss": 1.4144, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.4905129671096802, + "learning_rate": 0.0013275389026252255, + "loss": 1.4218, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.5057850480079651, + "learning_rate": 0.0013092407948428887, + "loss": 1.4119, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.7858890295028687, + "learning_rate": 0.001291194898688966, + "loss": 1.4195, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.47579309344291687, + "learning_rate": 0.001273397737809166, + "loss": 1.4133, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5790653824806213, + "learning_rate": 0.001255845883765463, + "loss": 1.41, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.7623328566551208, + "learning_rate": 0.001238535955375642, + "loss": 1.4019, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.6686232686042786, + "learning_rate": 0.0012214646180619506, + "loss": 1.403, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.47785404324531555, + "learning_rate": 0.001204628583208727, + "loss": 1.3964, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.586111307144165, + "learning_rate": 0.0011880246075288827, + "loss": 1.405, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6106604933738708, + "learning_rate": 0.001171649492439115, + "loss": 1.3987, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.5575138926506042, + "learning_rate": 0.0011555000834437364, + "loss": 1.4019, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.7197902798652649, + "learning_rate": 0.0011395732695269908, + "loss": 1.3989, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.5655246376991272, + "learning_rate": 0.0011238659825537505, + "loss": 1.3825, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.48017051815986633, + "learning_rate": 0.0011083751966784717, + "loss": 1.3811, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 1.0802205801010132, + "learning_rate": 0.0010930979277622953, + "loss": 1.3967, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.7795031666755676, + "learning_rate": 0.0010780312327981854, + "loss": 1.3973, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.4866909384727478, + "learning_rate": 0.0010631722093439888, + "loss": 1.3906, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.45107483863830566, + "learning_rate": 0.00104851799496331, + "loss": 1.3805, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.5446386933326721, + "learning_rate": 0.0010340657666740914, + "loss": 1.3882, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.581493079662323, + "learning_rate": 0.0010198127404047975, + "loss": 1.3725, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.5727628469467163, + "learning_rate": 0.0010057561704580897, + "loss": 1.3771, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.4938890039920807, + "learning_rate": 0.0009918933489818985, + "loss": 1.3889, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.48046085238456726, + "learning_rate": 0.0009782216054477827, + "loss": 1.3817, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.6804182529449463, + "learning_rate": 0.0009647383061364801, + "loss": 1.3855, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.49545037746429443, + "learning_rate": 0.0009514408536305495, + "loss": 1.3811, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.5484305024147034, + "learning_rate": 0.0009383266863140042, + "loss": 1.3944, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.4370139539241791, + "learning_rate": 0.000925393277878844, + "loss": 1.3911, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.47975751757621765, + "learning_rate": 0.0009126381368383879, + "loss": 1.3753, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.5764487385749817, + "learning_rate": 0.0009000588060473156, + "loss": 1.3695, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.6651996374130249, + "learning_rate": 0.0008876528622283235, + "loss": 1.3799, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.5270824432373047, + "learning_rate": 0.0008754179155053053, + "loss": 1.3726, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.5168655514717102, + "learning_rate": 0.0008633516089429683, + "loss": 1.3712, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.4749312996864319, + "learning_rate": 0.0008514516180927928, + "loss": 1.3692, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.4497000575065613, + "learning_rate": 0.0008397156505452524, + "loss": 1.3666, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.5032919049263, + "learning_rate": 0.0008281414454882051, + "loss": 1.37, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.47693976759910583, + "learning_rate": 0.0008167267732713704, + "loss": 1.3725, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.5165442228317261, + "learning_rate": 0.0008054694349768117, + "loss": 1.358, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.48088765144348145, + "learning_rate": 0.0007943672619953359, + "loss": 1.3692, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.673410177230835, + "learning_rate": 0.0007834181156087356, + "loss": 1.3623, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.5547133088111877, + "learning_rate": 0.0007726198865777852, + "loss": 1.3657, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.6152449250221252, + "learning_rate": 0.0007619704947359191, + "loss": 1.3585, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.5233445167541504, + "learning_rate": 0.0007514678885885087, + "loss": 1.3565, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.48368462920188904, + "learning_rate": 0.0007411100449176633, + "loss": 1.3594, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.47559550404548645, + "learning_rate": 0.0007308949683924791, + "loss": 1.3587, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.4937286674976349, + "learning_rate": 0.000720820691184658, + "loss": 1.3532, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.4688864052295685, + "learning_rate": 0.0007108852725894269, + "loss": 1.3564, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.47821226716041565, + "learning_rate": 0.000701086798651681, + "loss": 1.356, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.5739691853523254, + "learning_rate": 0.0006914233817972798, + "loss": 1.3449, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.5106558203697205, + "learning_rate": 0.0006818931604694261, + "loss": 1.3534, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.4550883173942566, + "learning_rate": 0.0006724942987700563, + "loss": 1.3545, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.4880290925502777, + "learning_rate": 0.0006632249861061732, + "loss": 1.3583, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.7219993472099304, + "learning_rate": 0.0006540834368410549, + "loss": 1.3525, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.5759091377258301, + "learning_rate": 0.0006450678899502701, + "loss": 1.3549, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.49101975560188293, + "learning_rate": 0.0006361766086824345, + "loss": 1.3511, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.4891746938228607, + "learning_rate": 0.000627407880224645, + "loss": 1.3571, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.4540632665157318, + "learning_rate": 0.0006187600153725225, + "loss": 1.341, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.7699702978134155, + "learning_rate": 0.0006102313482048055, + "loss": 1.3452, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5174618363380432, + "learning_rate": 0.0006018202357624274, + "loss": 1.3458, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.5508108139038086, + "learning_rate": 0.0005935250577320168, + "loss": 1.3446, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.5237697958946228, + "learning_rate": 0.0005853442161337618, + "loss": 1.3323, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.47324222326278687, + "learning_rate": 0.0005772761350135759, + "loss": 1.3419, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.5309130549430847, + "learning_rate": 0.0005693192601395058, + "loss": 1.3365, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.46695488691329956, + "learning_rate": 0.000561472058702326, + "loss": 1.3342, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5354440808296204, + "learning_rate": 0.000553733019020258, + "loss": 1.3419, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.6369292736053467, + "learning_rate": 0.0005461006502477612, + "loss": 1.3311, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5531011819839478, + "learning_rate": 0.0005385734820883369, + "loss": 1.3328, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.47640466690063477, + "learning_rate": 0.0005311500645112907, + "loss": 1.3563, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.5280459523200989, + "learning_rate": 0.0005238289674723993, + "loss": 1.3352, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.4927901327610016, + "learning_rate": 0.0005166087806384274, + "loss": 1.3405, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.5483647584915161, + "learning_rate": 0.0005094881131154418, + "loss": 1.3431, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.48676419258117676, + "learning_rate": 0.0005024655931808696, + "loss": 1.3401, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.48754075169563293, + "learning_rate": 0.0004955398680192508, + "loss": 1.3289, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.4818941354751587, + "learning_rate": 0.000488709603461632, + "loss": 1.3269, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.47020477056503296, + "learning_rate": 0.000481973483728553, + "loss": 1.3279, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.46600183844566345, + "learning_rate": 0.0004753302111765748, + "loss": 1.329, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.5204647779464722, + "learning_rate": 0.0004687785060483032, + "loss": 1.3405, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.46493932604789734, + "learning_rate": 0.0004623171062258558, + "loss": 1.3104, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.6995670795440674, + "learning_rate": 0.0004559447669877288, + "loss": 1.3259, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.49520400166511536, + "learning_rate": 0.00044966026076901413, + "loss": 1.3305, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.6466608643531799, + "learning_rate": 0.00044346237692492177, + "loss": 1.3352, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.49757030606269836, + "learning_rate": 0.0004373499214975615, + "loss": 1.3217, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.5600497126579285, + "learning_rate": 0.0004313217169859396, + "loss": 1.3265, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.49683064222335815, + "learning_rate": 0.0004253766021191256, + "loss": 1.3337, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.47298088669776917, + "learning_rate": 0.00041951343163254497, + "loss": 1.3285, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.5046508312225342, + "learning_rate": 0.00041373107604735626, + "loss": 1.3266, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.4719400405883789, + "learning_rate": 0.0004080284214528687, + "loss": 1.3225, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.46346330642700195, + "learning_rate": 0.0004024043692919589, + "loss": 1.3327, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.49503159523010254, + "learning_rate": 0.0003968578361494449, + "loss": 1.3271, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.46437186002731323, + "learning_rate": 0.000391387753543378, + "loss": 1.3315, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.4448468089103699, + "learning_rate": 0.00038599306771921023, + "loss": 1.3181, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.559030294418335, + "learning_rate": 0.0003806727394468004, + "loss": 1.3118, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.4602966010570526, + "learning_rate": 0.0003754257438202162, + "loss": 1.3232, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.6919352412223816, + "learning_rate": 0.0003702510700602974, + "loss": 1.3305, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.567984402179718, + "learning_rate": 0.0003651477213199393, + "loss": 1.3081, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.48121747374534607, + "learning_rate": 0.000360114714492061, + "loss": 1.3125, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.4667026698589325, + "learning_rate": 0.0003551510800202195, + "loss": 1.318, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.5451624393463135, + "learning_rate": 0.0003502558617118353, + "loss": 1.3211, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.5071596503257751, + "learning_rate": 0.0003454281165539914, + "loss": 1.3349, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.4939100742340088, + "learning_rate": 0.00034066691453177176, + "loss": 1.3271, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.4617915451526642, + "learning_rate": 0.0003359713384491037, + "loss": 1.3246, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.5091696977615356, + "learning_rate": 0.00033134048375206944, + "loss": 1.3191, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.46702897548675537, + "learning_rate": 0.0003267734583546536, + "loss": 1.3155, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.4664374887943268, + "learning_rate": 0.00032226938246689157, + "loss": 1.3158, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.5964689254760742, + "learning_rate": 0.0003178273884253874, + "loss": 1.3154, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.47512152791023254, + "learning_rate": 0.0003134466205261674, + "loss": 1.3259, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.5440223217010498, + "learning_rate": 0.0003091262348598378, + "loss": 1.3299, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.444031298160553, + "learning_rate": 0.0003048653991490141, + "loss": 1.3115, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.6015365123748779, + "learning_rate": 0.00030066329258799187, + "loss": 1.3055, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.5202664136886597, + "learning_rate": 0.0002965191056846266, + "loss": 1.3119, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.5013032555580139, + "learning_rate": 0.000292432040104394, + "loss": 1.3093, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.4607769548892975, + "learning_rate": 0.00028840130851659853, + "loss": 1.3057, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.4528924822807312, + "learning_rate": 0.0002844261344427028, + "loss": 1.3138, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.4496114253997803, + "learning_rate": 0.0002805057521067471, + "loss": 1.3141, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.4944457411766052, + "learning_rate": 0.00027663940628783017, + "loss": 1.3022, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.49140650033950806, + "learning_rate": 0.00027282635217462393, + "loss": 1.3152, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.504790186882019, + "learning_rate": 0.0002690658552218937, + "loss": 1.3193, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.49402180314064026, + "learning_rate": 0.00026535719100899516, + "loss": 1.2989, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.47412753105163574, + "learning_rate": 0.00026169964510032245, + "loss": 1.3049, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.4607010781764984, + "learning_rate": 0.00025809251290767984, + "loss": 1.2922, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.5059224963188171, + "learning_rate": 0.00025453509955454957, + "loss": 1.2976, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.4447955787181854, + "learning_rate": 0.00025102671974223175, + "loss": 1.2973, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.4556456506252289, + "learning_rate": 0.00024756669761782815, + "loss": 1.3068, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.4962462782859802, + "learning_rate": 0.0002441543666440464, + "loss": 1.3014, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.4789837598800659, + "learning_rate": 0.00024078906947079878, + "loss": 1.3069, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.5070563554763794, + "learning_rate": 0.00023747015780857005, + "loss": 1.3128, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.47648945450782776, + "learning_rate": 0.00023419699230353144, + "loss": 1.3086, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.4938623011112213, + "learning_rate": 0.00023096894241437586, + "loss": 1.3153, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.4547622501850128, + "learning_rate": 0.00022778538629085056, + "loss": 1.2979, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.4571170508861542, + "learning_rate": 0.00022464571065396427, + "loss": 1.2994, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.4782553017139435, + "learning_rate": 0.00022154931067784521, + "loss": 1.297, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.5219008922576904, + "learning_rate": 0.00021849558987322782, + "loss": 1.2932, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5985008478164673, + "learning_rate": 0.0002154839599725452, + "loss": 1.2978, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.46499961614608765, + "learning_rate": 0.00021251384081660544, + "loss": 1.3087, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.46360117197036743, + "learning_rate": 0.0002095846602428303, + "loss": 1.3021, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.47122639417648315, + "learning_rate": 0.00020669585397503358, + "loss": 1.294, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.47307297587394714, + "learning_rate": 0.0002038468655147195, + "loss": 1.2988, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.46345120668411255, + "learning_rate": 0.00020103714603387894, + "loss": 1.312, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.519330620765686, + "learning_rate": 0.00019826615426926338, + "loss": 1.2852, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.4953019320964813, + "learning_rate": 0.00019553335641811625, + "loss": 1.3066, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.5160877108573914, + "learning_rate": 0.0001928382260353415, + "loss": 1.2982, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.45700088143348694, + "learning_rate": 0.00019018024393208902, + "loss": 1.316, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.45830273628234863, + "learning_rate": 0.00018755889807573872, + "loss": 1.2913, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.4837879240512848, + "learning_rate": 0.00018497368349126262, + "loss": 1.2986, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.5491199493408203, + "learning_rate": 0.00018242410216394648, + "loss": 1.3123, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.49971577525138855, + "learning_rate": 0.0001799096629434529, + "loss": 1.2873, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.4328581988811493, + "learning_rate": 0.00017742988144920578, + "loss": 1.2948, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.5801659226417542, + "learning_rate": 0.00017498427997707976, + "loss": 1.2932, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.45597508549690247, + "learning_rate": 0.00017257238740737548, + "loss": 1.2981, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.48222076892852783, + "learning_rate": 0.00017019373911406307, + "loss": 1.3037, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.4486793875694275, + "learning_rate": 0.000167847876875277, + "loss": 1.3034, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.4598535895347595, + "learning_rate": 0.00016553434878504428, + "loss": 1.2882, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.5578005313873291, + "learning_rate": 0.00016325270916622947, + "loss": 1.2894, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.46461695432662964, + "learning_rate": 0.00016100251848467966, + "loss": 1.2982, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.5096900463104248, + "learning_rate": 0.0001587833432645528, + "loss": 1.2889, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.5235912203788757, + "learning_rate": 0.00015659475600481292, + "loss": 1.3056, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.43095365166664124, + "learning_rate": 0.00015443633509687688, + "loss": 1.2962, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.44497594237327576, + "learning_rate": 0.00015230766474339536, + "loss": 1.2946, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.45911484956741333, + "learning_rate": 0.00015020833487815416, + "loss": 1.3025, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.5016101002693176, + "learning_rate": 0.0001481379410870792, + "loss": 1.2922, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.5127238035202026, + "learning_rate": 0.00014609608453033013, + "loss": 1.2774, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.4559255540370941, + "learning_rate": 0.00014408237186546807, + "loss": 1.2849, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.43668150901794434, + "learning_rate": 0.00014209641517168273, + "loss": 1.275, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.4989013373851776, + "learning_rate": 0.00014013783187506265, + "loss": 1.2889, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.4905323088169098, + "learning_rate": 0.00013820624467489697, + "loss": 1.3083, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.48599889874458313, + "learning_rate": 0.00013630128147099213, + "loss": 1.3029, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.4611985981464386, + "learning_rate": 0.00013442257529199068, + "loss": 1.2835, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.5239390134811401, + "learning_rate": 0.00013256976422467803, + "loss": 1.293, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.4843706488609314, + "learning_rate": 0.00013074249134426366, + "loss": 1.2911, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.47987717390060425, + "learning_rate": 0.0001289404046456233, + "loss": 1.303, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.49060261249542236, + "learning_rate": 0.0001271631569754887, + "loss": 1.2946, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.4857374429702759, + "learning_rate": 0.0001254104059655723, + "loss": 1.2992, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.47642552852630615, + "learning_rate": 0.00012368181396661337, + "loss": 1.2831, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.45929238200187683, + "learning_rate": 0.00012197704798333364, + "loss": 1.2834, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.46110883355140686, + "learning_rate": 0.00012029577961028894, + "loss": 1.2807, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.4774809181690216, + "learning_rate": 0.00011863768496860542, + "loss": 1.2937, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.48800569772720337, + "learning_rate": 0.00011700244464358777, + "loss": 1.288, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.49740204215049744, + "learning_rate": 0.00011538974362318715, + "loss": 1.2956, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.5389509797096252, + "learning_rate": 0.00011379927123731737, + "loss": 1.2886, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.4840477406978607, + "learning_rate": 0.0001122307210980077, + "loss": 1.2866, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.4536563456058502, + "learning_rate": 0.00011068379104038026, + "loss": 1.2973, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.46081849932670593, + "learning_rate": 0.00010915818306444116, + "loss": 1.2795, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.48537546396255493, + "learning_rate": 0.00010765360327767384, + "loss": 1.2873, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.4364425241947174, + "learning_rate": 0.00010616976183842376, + "loss": 1.2926, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.46480149030685425, + "learning_rate": 0.00010470637290006365, + "loss": 1.2941, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.48859983682632446, + "learning_rate": 0.00010326315455592764, + "loss": 1.2817, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.5287128686904907, + "learning_rate": 0.0001018398287850053, + "loss": 1.2784, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.49913743138313293, + "learning_rate": 0.00010043612139838357, + "loss": 1.2952, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.46165934205055237, + "learning_rate": 9.905176198642719e-05, + "loss": 1.2845, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.5162762403488159, + "learning_rate": 9.76864838666871e-05, + "loss": 1.2859, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.4593356251716614, + "learning_rate": 9.634002403252676e-05, + "loss": 1.2839, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.46035006642341614, + "learning_rate": 9.501212310245681e-05, + "loss": 1.2784, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.4654366075992584, + "learning_rate": 9.370252527016777e-05, + "loss": 1.2919, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.5215297341346741, + "learning_rate": 9.241097825525163e-05, + "loss": 1.2812, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.4827187955379486, + "learning_rate": 9.113723325460276e-05, + "loss": 1.2893, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.530578076839447, + "learning_rate": 8.988104489448849e-05, + "loss": 1.284, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.4608142673969269, + "learning_rate": 8.864217118328042e-05, + "loss": 1.2933, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.4659345746040344, + "learning_rate": 8.742037346483729e-05, + "loss": 1.2873, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.4808141589164734, + "learning_rate": 8.62154163725303e-05, + "loss": 1.2956, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.44529998302459717, + "learning_rate": 8.502706778390219e-05, + "loss": 1.2891, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.4749753773212433, + "learning_rate": 8.38550987759513e-05, + "loss": 1.2862, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.48008447885513306, + "learning_rate": 8.269928358103191e-05, + "loss": 1.301, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.4491159915924072, + "learning_rate": 8.155939954336243e-05, + "loss": 1.2904, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.46144554018974304, + "learning_rate": 8.043522707613312e-05, + "loss": 1.2872, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.437112957239151, + "learning_rate": 7.932654961920486e-05, + "loss": 1.2716, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.4609586000442505, + "learning_rate": 7.823315359739135e-05, + "loss": 1.2721, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.4745173156261444, + "learning_rate": 7.715482837931577e-05, + "loss": 1.299, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.4722728133201599, + "learning_rate": 7.6091366236835e-05, + "loss": 1.2753, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4655050039291382, + "learning_rate": 7.504256230502289e-05, + "loss": 1.2958, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.44645416736602783, + "learning_rate": 7.400821454270524e-05, + "loss": 1.2878, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.4844835698604584, + "learning_rate": 7.29881236935386e-05, + "loss": 1.2743, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.4614316523075104, + "learning_rate": 7.198209324762562e-05, + "loss": 1.2794, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.46245789527893066, + "learning_rate": 7.098992940365946e-05, + "loss": 1.2767, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.44889703392982483, + "learning_rate": 7.001144103159e-05, + "loss": 1.2823, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.447860985994339, + "learning_rate": 6.904643963580461e-05, + "loss": 1.2912, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.46396374702453613, + "learning_rate": 6.809473931881644e-05, + "loss": 1.2803, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.46097517013549805, + "learning_rate": 6.71561567454532e-05, + "loss": 1.2824, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.48171210289001465, + "learning_rate": 6.623051110753948e-05, + "loss": 1.2986, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.4460631012916565, + "learning_rate": 6.531762408906607e-05, + "loss": 1.2877, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.4530577063560486, + "learning_rate": 6.441731983183912e-05, + "loss": 1.2835, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.4470306932926178, + "learning_rate": 6.352942490160292e-05, + "loss": 1.2771, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.4788602590560913, + "learning_rate": 6.265376825462966e-05, + "loss": 1.288, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.5076389908790588, + "learning_rate": 6.179018120476945e-05, + "loss": 1.2806, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.4506382346153259, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.2842, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.44538381695747375, + "learning_rate": 6.009855274515339e-05, + "loss": 1.2775, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.4832964539527893, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.2837, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.44291096925735474, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.2741, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.4487961232662201, + "learning_rate": 5.764754687033678e-05, + "loss": 1.2849, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.4693380296230316, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.2805, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.44351643323898315, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.2936, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.46539580821990967, + "learning_rate": 5.529650063720842e-05, + "loss": 1.2894, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.45908939838409424, + "learning_rate": 5.453432234876445e-05, + "loss": 1.2803, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.4722623825073242, + "learning_rate": 5.37826495305886e-05, + "loss": 1.2739, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.5558964610099792, + "learning_rate": 5.304133738072674e-05, + "loss": 1.2924, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.46721094846725464, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.2825, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.49262481927871704, + "learning_rate": 5.158922582999368e-05, + "loss": 1.2874, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.4975415766239166, + "learning_rate": 5.087814669492819e-05, + "loss": 1.2817, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.48556289076805115, + "learning_rate": 5.017686870590028e-05, + "loss": 1.282, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.47611770033836365, + "learning_rate": 4.948525676899577e-05, + "loss": 1.2763, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.46547186374664307, + "learning_rate": 4.880317765236493e-05, + "loss": 1.2878, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.5726670026779175, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.2687, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.4780838191509247, + "learning_rate": 4.746709410920699e-05, + "loss": 1.2756, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.49581974744796753, + "learning_rate": 4.681283230007507e-05, + "loss": 1.2751, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.46643558144569397, + "learning_rate": 4.616758849642509e-05, + "loss": 1.2822, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.4325465261936188, + "learning_rate": 4.553123839874615e-05, + "loss": 1.2913, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.4567927420139313, + "learning_rate": 4.490365942080736e-05, + "loss": 1.283, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.45788127183914185, + "learning_rate": 4.428473066604285e-05, + "loss": 1.2829, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.47260361909866333, + "learning_rate": 4.367433290426233e-05, + "loss": 1.281, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.4562292993068695, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.276, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.4540281593799591, + "learning_rate": 4.247866163327575e-05, + "loss": 1.2881, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.4628209173679352, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.2777, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.47517630457878113, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.2755, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.49479952454566956, + "learning_rate": 4.074624971216005e-05, + "loss": 1.2633, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4500695466995239, + "learning_rate": 4.018462453681889e-05, + "loss": 1.2746, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.4760417640209198, + "learning_rate": 3.963074051164014e-05, + "loss": 1.2807, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.4558374583721161, + "learning_rate": 3.908449093662446e-05, + "loss": 1.2753, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.46657711267471313, + "learning_rate": 3.854577058246998e-05, + "loss": 1.2721, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.441482275724411, + "learning_rate": 3.801447567030094e-05, + "loss": 1.2922, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.43996483087539673, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.277, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.4519858658313751, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.2935, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.4479627311229706, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.2796, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.4910711944103241, + "learning_rate": 3.596152451701616e-05, + "loss": 1.2794, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.49181532859802246, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.2789, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.4645511209964752, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.2816, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.47990480065345764, + "learning_rate": 3.449490171442838e-05, + "loss": 1.2847, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.4430792033672333, + "learning_rate": 3.401944187798702e-05, + "loss": 1.2832, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.4384567439556122, + "learning_rate": 3.355053553336137e-05, + "loss": 1.2747, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.4880802631378174, + "learning_rate": 3.308809235061882e-05, + "loss": 1.2736, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.4456760883331299, + "learning_rate": 3.263202324488772e-05, + "loss": 1.2797, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.46178436279296875, + "learning_rate": 3.218224035919609e-05, + "loss": 1.2782, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.4523911476135254, + "learning_rate": 3.173865704754688e-05, + "loss": 1.2785, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.45336154103279114, + "learning_rate": 3.130118785822657e-05, + "loss": 1.2787, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.44965076446533203, + "learning_rate": 3.08697485173437e-05, + "loss": 1.2815, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5384255051612854, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.2785, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5449677109718323, + "learning_rate": 3.002462807725185e-05, + "loss": 1.2775, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.43419110774993896, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.283, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.46079882979393005, + "learning_rate": 2.920264448124087e-05, + "loss": 1.2756, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.47705045342445374, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.2717, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.4502153992652893, + "learning_rate": 2.84031643124288e-05, + "loss": 1.2728, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.4820896089076996, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.2956, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.45591697096824646, + "learning_rate": 2.762557149497405e-05, + "loss": 1.27, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.4733026921749115, + "learning_rate": 2.724479494389592e-05, + "loss": 1.2803, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.447063148021698, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.2813, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.4395914673805237, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.2832, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.4538501799106598, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.2847, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.48643651604652405, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.2828, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.449874609708786, + "learning_rate": 2.541820662891541e-05, + "loss": 1.2638, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.4420047104358673, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.2728, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.4749002456665039, + "learning_rate": 2.472233293365335e-05, + "loss": 1.2842, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.4869522452354431, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.2694, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.533047080039978, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.2891, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.4892658293247223, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.2767, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.4426679313182831, + "learning_rate": 2.338721674401494e-05, + "loss": 1.2826, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.47194015979766846, + "learning_rate": 2.30648594768459e-05, + "loss": 1.281, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.4602823555469513, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.2779, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.4629691541194916, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.2732, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.5147120952606201, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.2792, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.47264429926872253, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.2776, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.4766085743904114, + "learning_rate": 2.151850895764285e-05, + "loss": 1.2765, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.4370192587375641, + "learning_rate": 2.12219089895037e-05, + "loss": 1.2677, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.46413567662239075, + "learning_rate": 2.092939720151092e-05, + "loss": 1.261, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.47559598088264465, + "learning_rate": 2.064091724430947e-05, + "loss": 1.272, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.45722126960754395, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.27, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.45105379819869995, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.281, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.4484458863735199, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.269, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.4607316851615906, + "learning_rate": 1.952621569669175e-05, + "loss": 1.2766, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.45601749420166016, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.2732, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.48505786061286926, + "learning_rate": 1.899164691024229e-05, + "loss": 1.2754, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.4511771500110626, + "learning_rate": 1.872987589815331e-05, + "loss": 1.2714, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.4381241500377655, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.2812, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.44202518463134766, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.2816, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.4516373872756958, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.2879, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.4416944086551666, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.2829, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.4617598056793213, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.2816, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.4664436876773834, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.2666, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.4793858528137207, + "learning_rate": 1.699576850233916e-05, + "loss": 1.2724, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.45729121565818787, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.2843, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.49434587359428406, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.2596, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.4711279571056366, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.2801, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.44683927297592163, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.2789, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.43298545479774475, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.2745, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.4444383680820465, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.2839, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.4536062479019165, + "learning_rate": 1.542221360973268e-05, + "loss": 1.2692, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.43719130754470825, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.2813, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.3620312213897705, + "learning_rate": 1.5e-05, + "loss": 1.2736, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.832308198648013e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-llama-bf16/checkpoint-9480/training_args.bin b/saves-llama-bf16/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e5243f49684dc2a85e2f65b2c4776574982f9269 --- /dev/null +++ b/saves-llama-bf16/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07866fa0cb2393a285e3dd28e316873fd2a9cc643287d2c61b534700624ad430 +size 5112 diff --git a/saves-llama-bf16/config.json b/saves-llama-bf16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4dd56814721ed8dfc23e87878a8340d8b4f18fc0 --- /dev/null +++ b/saves-llama-bf16/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.0", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-llama-bf16/generation_config.json b/saves-llama-bf16/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..59c0f3c6815a220b6b4e852c51be873503df2ce0 --- /dev/null +++ b/saves-llama-bf16/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.0" +} diff --git a/saves-llama-bf16/model.safetensors b/saves-llama-bf16/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a39d758e9c96da4a8db150c80986b1ad0946cf88 --- /dev/null +++ b/saves-llama-bf16/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1136b4c8d9f04036876e36dbe09274e731871c9b1081c7981af3932adceacea9 +size 8346712 diff --git a/saves-llama-bf16/result.log b/saves-llama-bf16/result.log new file mode 100644 index 0000000000000000000000000000000000000000..fea1e26dd128a85eef9b470b5406884f8583cc1f --- /dev/null +++ b/saves-llama-bf16/result.log @@ -0,0 +1 @@ +{'train_runtime': 5451.9089, 'train_samples_per_second': 1780.405, 'train_steps_per_second': 1.739, 'train_loss': 1.5713628742765273, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-llama-bf16/special_tokens_map.json b/saves-llama-bf16/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-llama-bf16/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-llama-bf16/tokenizer.json b/saves-llama-bf16/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-llama-bf16/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-llama-bf16/tokenizer_config.json b/saves-llama-bf16/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-llama-bf16/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-llama-cosine/checkpoint-9480/config.json b/saves-llama-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d241c1935481613c5259df93a97d2cacb314defb --- /dev/null +++ b/saves-llama-cosine/checkpoint-9480/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-llama-cosine/checkpoint-9480/generation_config.json b/saves-llama-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-llama-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-llama-cosine/checkpoint-9480/model.safetensors b/saves-llama-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d4dbb9e44bb90fe03de5cf81edd5f8613b69244 --- /dev/null +++ b/saves-llama-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de33d4bf5107f5e47dc96c97470aad1b526688636f37d68afb095cf11e6613ea +size 8346712 diff --git a/saves-llama-cosine/checkpoint-9480/optimizer.pt b/saves-llama-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5544d4ca9b35cbab39f155eb0ece66fe695a1399 --- /dev/null +++ b/saves-llama-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c59c1f80dfc27faf621f65a7e5cf5fbb159c172b0dfec4825f2a6f11dc33c49 +size 16706530 diff --git a/saves-llama-cosine/checkpoint-9480/rng_state.pth b/saves-llama-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-llama-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-llama-cosine/checkpoint-9480/scheduler.pt b/saves-llama-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..47ca193b702fc31e51e3ee0689a4054b394880b6 --- /dev/null +++ b/saves-llama-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f195640e66bde784a0961679ecd73c2a561c5a12962a7316325d731f304936 +size 1064 diff --git a/saves-llama-cosine/checkpoint-9480/special_tokens_map.json b/saves-llama-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-llama-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-llama-cosine/checkpoint-9480/tokenizer.json b/saves-llama-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-llama-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-llama-cosine/checkpoint-9480/tokenizer_config.json b/saves-llama-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-llama-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-llama-cosine/checkpoint-9480/trainer_state.json b/saves-llama-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8f3c0d431fe3b4dcbc69be018b44f3443f8efe3d --- /dev/null +++ b/saves-llama-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,66393 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00010548523206751055, + "grad_norm": 1.4125691652297974, + "learning_rate": 1.5789473684210526e-05, + "loss": 7.6406, + "step": 1 + }, + { + "epoch": 0.0002109704641350211, + "grad_norm": 1.3846136331558228, + "learning_rate": 3.157894736842105e-05, + "loss": 7.6443, + "step": 2 + }, + { + "epoch": 0.00031645569620253165, + "grad_norm": 1.3876163959503174, + "learning_rate": 4.736842105263158e-05, + "loss": 7.6273, + "step": 3 + }, + { + "epoch": 0.0004219409282700422, + "grad_norm": 1.4033862352371216, + "learning_rate": 6.31578947368421e-05, + "loss": 7.6011, + "step": 4 + }, + { + "epoch": 0.0005274261603375527, + "grad_norm": 1.400907278060913, + "learning_rate": 7.894736842105263e-05, + "loss": 7.5644, + "step": 5 + }, + { + "epoch": 0.0006329113924050633, + "grad_norm": 1.348865270614624, + "learning_rate": 9.473684210526316e-05, + "loss": 7.5148, + "step": 6 + }, + { + "epoch": 0.0007383966244725738, + "grad_norm": 1.322057843208313, + "learning_rate": 0.00011052631578947368, + "loss": 7.4582, + "step": 7 + }, + { + "epoch": 0.0008438818565400844, + "grad_norm": 1.2913519144058228, + "learning_rate": 0.0001263157894736842, + "loss": 7.3956, + "step": 8 + }, + { + "epoch": 0.0009493670886075949, + "grad_norm": 1.2970657348632812, + "learning_rate": 0.00014210526315789474, + "loss": 7.3287, + "step": 9 + }, + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2728885412216187, + "learning_rate": 0.00015789473684210527, + "loss": 7.259, + "step": 10 + }, + { + "epoch": 0.001160337552742616, + "grad_norm": 1.2809137105941772, + "learning_rate": 0.0001736842105263158, + "loss": 7.1859, + "step": 11 + }, + { + "epoch": 0.0012658227848101266, + "grad_norm": 1.2566874027252197, + "learning_rate": 0.00018947368421052632, + "loss": 7.1254, + "step": 12 + }, + { + "epoch": 0.0013713080168776372, + "grad_norm": 1.2693380117416382, + "learning_rate": 0.00020526315789473685, + "loss": 7.0643, + "step": 13 + }, + { + "epoch": 0.0014767932489451476, + "grad_norm": 1.2805697917938232, + "learning_rate": 0.00022105263157894735, + "loss": 7.0029, + "step": 14 + }, + { + "epoch": 0.0015822784810126582, + "grad_norm": 1.2841190099716187, + "learning_rate": 0.00023684210526315788, + "loss": 6.9468, + "step": 15 + }, + { + "epoch": 0.0016877637130801688, + "grad_norm": 1.2669384479522705, + "learning_rate": 0.0002526315789473684, + "loss": 6.8897, + "step": 16 + }, + { + "epoch": 0.0017932489451476794, + "grad_norm": 1.2652876377105713, + "learning_rate": 0.00026842105263157897, + "loss": 6.8101, + "step": 17 + }, + { + "epoch": 0.0018987341772151898, + "grad_norm": 1.2257435321807861, + "learning_rate": 0.00028421052631578947, + "loss": 6.7361, + "step": 18 + }, + { + "epoch": 0.0020042194092827004, + "grad_norm": 1.2064332962036133, + "learning_rate": 0.00030000000000000003, + "loss": 6.6542, + "step": 19 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.155080795288086, + "learning_rate": 0.00031578947368421053, + "loss": 6.5894, + "step": 20 + }, + { + "epoch": 0.0022151898734177216, + "grad_norm": 1.1264567375183105, + "learning_rate": 0.00033157894736842103, + "loss": 6.5252, + "step": 21 + }, + { + "epoch": 0.002320675105485232, + "grad_norm": 1.1046894788742065, + "learning_rate": 0.0003473684210526316, + "loss": 6.4556, + "step": 22 + }, + { + "epoch": 0.002426160337552743, + "grad_norm": 1.0741931200027466, + "learning_rate": 0.0003631578947368421, + "loss": 6.3822, + "step": 23 + }, + { + "epoch": 0.002531645569620253, + "grad_norm": 1.0437588691711426, + "learning_rate": 0.00037894736842105265, + "loss": 6.3147, + "step": 24 + }, + { + "epoch": 0.0026371308016877636, + "grad_norm": 1.0016728639602661, + "learning_rate": 0.00039473684210526315, + "loss": 6.2725, + "step": 25 + }, + { + "epoch": 0.0027426160337552744, + "grad_norm": 0.989832878112793, + "learning_rate": 0.0004105263157894737, + "loss": 6.2024, + "step": 26 + }, + { + "epoch": 0.002848101265822785, + "grad_norm": 0.9331700205802917, + "learning_rate": 0.0004263157894736842, + "loss": 6.1703, + "step": 27 + }, + { + "epoch": 0.002953586497890295, + "grad_norm": 0.9203375577926636, + "learning_rate": 0.0004421052631578947, + "loss": 6.1053, + "step": 28 + }, + { + "epoch": 0.003059071729957806, + "grad_norm": 0.8893955945968628, + "learning_rate": 0.00045789473684210527, + "loss": 6.0638, + "step": 29 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8321983218193054, + "learning_rate": 0.00047368421052631577, + "loss": 6.0331, + "step": 30 + }, + { + "epoch": 0.003270042194092827, + "grad_norm": 0.8178334832191467, + "learning_rate": 0.0004894736842105264, + "loss": 5.9838, + "step": 31 + }, + { + "epoch": 0.0033755274261603376, + "grad_norm": 0.8192487359046936, + "learning_rate": 0.0005052631578947368, + "loss": 5.9232, + "step": 32 + }, + { + "epoch": 0.003481012658227848, + "grad_norm": 0.8011775016784668, + "learning_rate": 0.0005210526315789474, + "loss": 5.8684, + "step": 33 + }, + { + "epoch": 0.003586497890295359, + "grad_norm": 0.7735371589660645, + "learning_rate": 0.0005368421052631579, + "loss": 5.8456, + "step": 34 + }, + { + "epoch": 0.003691983122362869, + "grad_norm": 0.8007768392562866, + "learning_rate": 0.0005526315789473684, + "loss": 5.7633, + "step": 35 + }, + { + "epoch": 0.0037974683544303796, + "grad_norm": 0.7537615299224854, + "learning_rate": 0.0005684210526315789, + "loss": 5.7429, + "step": 36 + }, + { + "epoch": 0.0039029535864978904, + "grad_norm": 0.7775960564613342, + "learning_rate": 0.0005842105263157895, + "loss": 5.6782, + "step": 37 + }, + { + "epoch": 0.004008438818565401, + "grad_norm": 0.816963255405426, + "learning_rate": 0.0006000000000000001, + "loss": 5.6507, + "step": 38 + }, + { + "epoch": 0.004113924050632912, + "grad_norm": 2.151431083679199, + "learning_rate": 0.0006157894736842105, + "loss": 5.5858, + "step": 39 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 1.2425979375839233, + "learning_rate": 0.0006315789473684211, + "loss": 5.5591, + "step": 40 + }, + { + "epoch": 0.004324894514767932, + "grad_norm": 0.9822751879692078, + "learning_rate": 0.0006473684210526316, + "loss": 5.5153, + "step": 41 + }, + { + "epoch": 0.004430379746835443, + "grad_norm": 1.3079657554626465, + "learning_rate": 0.0006631578947368421, + "loss": 5.4743, + "step": 42 + }, + { + "epoch": 0.004535864978902953, + "grad_norm": 0.8507727980613708, + "learning_rate": 0.0006789473684210526, + "loss": 5.3869, + "step": 43 + }, + { + "epoch": 0.004641350210970464, + "grad_norm": 0.9433321952819824, + "learning_rate": 0.0006947368421052632, + "loss": 5.3905, + "step": 44 + }, + { + "epoch": 0.004746835443037975, + "grad_norm": 0.8024179339408875, + "learning_rate": 0.0007105263157894736, + "loss": 5.2986, + "step": 45 + }, + { + "epoch": 0.004852320675105486, + "grad_norm": 0.8996524810791016, + "learning_rate": 0.0007263157894736842, + "loss": 5.2459, + "step": 46 + }, + { + "epoch": 0.004957805907172996, + "grad_norm": 0.9035475850105286, + "learning_rate": 0.0007421052631578947, + "loss": 5.2073, + "step": 47 + }, + { + "epoch": 0.005063291139240506, + "grad_norm": 1.0809053182601929, + "learning_rate": 0.0007578947368421053, + "loss": 5.1686, + "step": 48 + }, + { + "epoch": 0.005168776371308017, + "grad_norm": 1.065477967262268, + "learning_rate": 0.0007736842105263159, + "loss": 5.0886, + "step": 49 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 1.0389506816864014, + "learning_rate": 0.0007894736842105263, + "loss": 5.0626, + "step": 50 + }, + { + "epoch": 0.005379746835443038, + "grad_norm": 1.1089506149291992, + "learning_rate": 0.0008052631578947369, + "loss": 5.001, + "step": 51 + }, + { + "epoch": 0.005485232067510549, + "grad_norm": 1.265552043914795, + "learning_rate": 0.0008210526315789474, + "loss": 4.9461, + "step": 52 + }, + { + "epoch": 0.005590717299578059, + "grad_norm": 1.0652519464492798, + "learning_rate": 0.0008368421052631579, + "loss": 4.906, + "step": 53 + }, + { + "epoch": 0.00569620253164557, + "grad_norm": 0.9696651697158813, + "learning_rate": 0.0008526315789473684, + "loss": 4.8239, + "step": 54 + }, + { + "epoch": 0.0058016877637130804, + "grad_norm": 0.8202279806137085, + "learning_rate": 0.000868421052631579, + "loss": 4.7889, + "step": 55 + }, + { + "epoch": 0.00590717299578059, + "grad_norm": 0.5885863900184631, + "learning_rate": 0.0008842105263157894, + "loss": 4.7189, + "step": 56 + }, + { + "epoch": 0.006012658227848101, + "grad_norm": 0.797754168510437, + "learning_rate": 0.0009, + "loss": 4.7109, + "step": 57 + }, + { + "epoch": 0.006118143459915612, + "grad_norm": 0.9304715394973755, + "learning_rate": 0.0009157894736842105, + "loss": 4.6771, + "step": 58 + }, + { + "epoch": 0.006223628691983122, + "grad_norm": 0.7799877524375916, + "learning_rate": 0.0009315789473684211, + "loss": 4.6116, + "step": 59 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.523409366607666, + "learning_rate": 0.0009473684210526315, + "loss": 4.5551, + "step": 60 + }, + { + "epoch": 0.006434599156118144, + "grad_norm": 0.49776318669319153, + "learning_rate": 0.0009631578947368421, + "loss": 4.5385, + "step": 61 + }, + { + "epoch": 0.006540084388185654, + "grad_norm": 0.5358529090881348, + "learning_rate": 0.0009789473684210528, + "loss": 4.4744, + "step": 62 + }, + { + "epoch": 0.006645569620253164, + "grad_norm": 0.5563139915466309, + "learning_rate": 0.000994736842105263, + "loss": 4.4497, + "step": 63 + }, + { + "epoch": 0.006751054852320675, + "grad_norm": 0.591421902179718, + "learning_rate": 0.0010105263157894737, + "loss": 4.399, + "step": 64 + }, + { + "epoch": 0.006856540084388186, + "grad_norm": 0.7304073572158813, + "learning_rate": 0.0010263157894736842, + "loss": 4.3877, + "step": 65 + }, + { + "epoch": 0.006962025316455696, + "grad_norm": 0.9059742093086243, + "learning_rate": 0.0010421052631578948, + "loss": 4.3282, + "step": 66 + }, + { + "epoch": 0.007067510548523207, + "grad_norm": 1.403586983680725, + "learning_rate": 0.0010578947368421053, + "loss": 4.3407, + "step": 67 + }, + { + "epoch": 0.007172995780590718, + "grad_norm": 1.406400203704834, + "learning_rate": 0.0010736842105263159, + "loss": 4.3051, + "step": 68 + }, + { + "epoch": 0.007278481012658228, + "grad_norm": 2.4728293418884277, + "learning_rate": 0.0010894736842105264, + "loss": 4.2607, + "step": 69 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.8229213356971741, + "learning_rate": 0.0011052631578947368, + "loss": 4.2168, + "step": 70 + }, + { + "epoch": 0.007489451476793249, + "grad_norm": 1.6141891479492188, + "learning_rate": 0.0011210526315789473, + "loss": 4.2689, + "step": 71 + }, + { + "epoch": 0.007594936708860759, + "grad_norm": 1.0150361061096191, + "learning_rate": 0.0011368421052631579, + "loss": 4.1997, + "step": 72 + }, + { + "epoch": 0.00770042194092827, + "grad_norm": 1.5387511253356934, + "learning_rate": 0.0011526315789473684, + "loss": 4.1724, + "step": 73 + }, + { + "epoch": 0.007805907172995781, + "grad_norm": 1.8746471405029297, + "learning_rate": 0.001168421052631579, + "loss": 4.1713, + "step": 74 + }, + { + "epoch": 0.007911392405063292, + "grad_norm": 0.8924740552902222, + "learning_rate": 0.0011842105263157896, + "loss": 4.12, + "step": 75 + }, + { + "epoch": 0.008016877637130802, + "grad_norm": 1.476842999458313, + "learning_rate": 0.0012000000000000001, + "loss": 4.1254, + "step": 76 + }, + { + "epoch": 0.008122362869198312, + "grad_norm": 0.6783918738365173, + "learning_rate": 0.0012157894736842105, + "loss": 4.0959, + "step": 77 + }, + { + "epoch": 0.008227848101265823, + "grad_norm": 1.4356942176818848, + "learning_rate": 0.001231578947368421, + "loss": 4.0923, + "step": 78 + }, + { + "epoch": 0.008333333333333333, + "grad_norm": 1.2355000972747803, + "learning_rate": 0.0012473684210526316, + "loss": 4.0598, + "step": 79 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.904261589050293, + "learning_rate": 0.0012631578947368421, + "loss": 4.0421, + "step": 80 + }, + { + "epoch": 0.008544303797468355, + "grad_norm": 1.123159408569336, + "learning_rate": 0.0012789473684210527, + "loss": 4.0082, + "step": 81 + }, + { + "epoch": 0.008649789029535865, + "grad_norm": 1.4762696027755737, + "learning_rate": 0.0012947368421052632, + "loss": 4.0076, + "step": 82 + }, + { + "epoch": 0.008755274261603375, + "grad_norm": 0.7992144823074341, + "learning_rate": 0.0013105263157894738, + "loss": 3.9653, + "step": 83 + }, + { + "epoch": 0.008860759493670886, + "grad_norm": 1.1277954578399658, + "learning_rate": 0.0013263157894736841, + "loss": 3.9557, + "step": 84 + }, + { + "epoch": 0.008966244725738396, + "grad_norm": 1.2336082458496094, + "learning_rate": 0.0013421052631578947, + "loss": 3.9395, + "step": 85 + }, + { + "epoch": 0.009071729957805906, + "grad_norm": 0.7977361679077148, + "learning_rate": 0.0013578947368421052, + "loss": 3.938, + "step": 86 + }, + { + "epoch": 0.009177215189873418, + "grad_norm": 0.9615039825439453, + "learning_rate": 0.0013736842105263158, + "loss": 3.9302, + "step": 87 + }, + { + "epoch": 0.009282700421940928, + "grad_norm": 1.1215773820877075, + "learning_rate": 0.0013894736842105264, + "loss": 3.8496, + "step": 88 + }, + { + "epoch": 0.009388185654008438, + "grad_norm": 1.219015121459961, + "learning_rate": 0.001405263157894737, + "loss": 3.8656, + "step": 89 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.8463934063911438, + "learning_rate": 0.0014210526315789472, + "loss": 3.8495, + "step": 90 + }, + { + "epoch": 0.00959915611814346, + "grad_norm": 1.0766414403915405, + "learning_rate": 0.0014368421052631578, + "loss": 3.8334, + "step": 91 + }, + { + "epoch": 0.009704641350210971, + "grad_norm": 1.0067421197891235, + "learning_rate": 0.0014526315789473684, + "loss": 3.823, + "step": 92 + }, + { + "epoch": 0.009810126582278481, + "grad_norm": 1.4081043004989624, + "learning_rate": 0.0014684210526315791, + "loss": 3.8016, + "step": 93 + }, + { + "epoch": 0.009915611814345991, + "grad_norm": 0.9934578537940979, + "learning_rate": 0.0014842105263157895, + "loss": 3.8336, + "step": 94 + }, + { + "epoch": 0.010021097046413503, + "grad_norm": 0.885004460811615, + "learning_rate": 0.0015, + "loss": 3.7828, + "step": 95 + }, + { + "epoch": 0.010126582278481013, + "grad_norm": 0.9675113558769226, + "learning_rate": 0.00149999995797938, + "loss": 3.773, + "step": 96 + }, + { + "epoch": 0.010232067510548523, + "grad_norm": 1.1336740255355835, + "learning_rate": 0.001499999831917525, + "loss": 3.746, + "step": 97 + }, + { + "epoch": 0.010337552742616034, + "grad_norm": 2.00848650932312, + "learning_rate": 0.001499999621814449, + "loss": 3.746, + "step": 98 + }, + { + "epoch": 0.010443037974683544, + "grad_norm": 0.9693989157676697, + "learning_rate": 0.0014999993276701756, + "loss": 3.7651, + "step": 99 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.9588125944137573, + "learning_rate": 0.0014999989494847376, + "loss": 3.7035, + "step": 100 + }, + { + "epoch": 0.010654008438818566, + "grad_norm": 1.018338680267334, + "learning_rate": 0.0014999984872581774, + "loss": 3.6997, + "step": 101 + }, + { + "epoch": 0.010759493670886076, + "grad_norm": 1.5535240173339844, + "learning_rate": 0.0014999979409905469, + "loss": 3.6755, + "step": 102 + }, + { + "epoch": 0.010864978902953586, + "grad_norm": 0.9406970739364624, + "learning_rate": 0.0014999973106819074, + "loss": 3.6556, + "step": 103 + }, + { + "epoch": 0.010970464135021098, + "grad_norm": 1.0212641954421997, + "learning_rate": 0.0014999965963323294, + "loss": 3.6289, + "step": 104 + }, + { + "epoch": 0.011075949367088608, + "grad_norm": 0.9744918346405029, + "learning_rate": 0.0014999957979418927, + "loss": 3.6244, + "step": 105 + }, + { + "epoch": 0.011181434599156118, + "grad_norm": 0.6927087306976318, + "learning_rate": 0.0014999949155106874, + "loss": 3.6052, + "step": 106 + }, + { + "epoch": 0.01128691983122363, + "grad_norm": 0.8927171230316162, + "learning_rate": 0.0014999939490388115, + "loss": 3.6154, + "step": 107 + }, + { + "epoch": 0.01139240506329114, + "grad_norm": 0.7788654565811157, + "learning_rate": 0.0014999928985263743, + "loss": 3.585, + "step": 108 + }, + { + "epoch": 0.01149789029535865, + "grad_norm": 0.6510046124458313, + "learning_rate": 0.001499991763973493, + "loss": 3.5825, + "step": 109 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.7838415503501892, + "learning_rate": 0.0014999905453802946, + "loss": 3.5444, + "step": 110 + }, + { + "epoch": 0.01170886075949367, + "grad_norm": 1.0097483396530151, + "learning_rate": 0.0014999892427469156, + "loss": 3.5459, + "step": 111 + }, + { + "epoch": 0.01181434599156118, + "grad_norm": 1.2372549772262573, + "learning_rate": 0.0014999878560735024, + "loss": 3.55, + "step": 112 + }, + { + "epoch": 0.011919831223628692, + "grad_norm": 0.8204699158668518, + "learning_rate": 0.0014999863853602101, + "loss": 3.5227, + "step": 113 + }, + { + "epoch": 0.012025316455696202, + "grad_norm": 0.7253666520118713, + "learning_rate": 0.0014999848306072037, + "loss": 3.5332, + "step": 114 + }, + { + "epoch": 0.012130801687763712, + "grad_norm": 0.752126157283783, + "learning_rate": 0.0014999831918146571, + "loss": 3.513, + "step": 115 + }, + { + "epoch": 0.012236286919831224, + "grad_norm": 0.7716972827911377, + "learning_rate": 0.001499981468982754, + "loss": 3.4903, + "step": 116 + }, + { + "epoch": 0.012341772151898734, + "grad_norm": 0.7619340419769287, + "learning_rate": 0.001499979662111688, + "loss": 3.5059, + "step": 117 + }, + { + "epoch": 0.012447257383966244, + "grad_norm": 0.5772149562835693, + "learning_rate": 0.0014999777712016607, + "loss": 3.4738, + "step": 118 + }, + { + "epoch": 0.012552742616033756, + "grad_norm": 0.8338004350662231, + "learning_rate": 0.0014999757962528846, + "loss": 3.4289, + "step": 119 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.7984045743942261, + "learning_rate": 0.0014999737372655805, + "loss": 3.4519, + "step": 120 + }, + { + "epoch": 0.012763713080168776, + "grad_norm": 1.0912904739379883, + "learning_rate": 0.0014999715942399798, + "loss": 3.4515, + "step": 121 + }, + { + "epoch": 0.012869198312236287, + "grad_norm": 1.2305970191955566, + "learning_rate": 0.001499969367176322, + "loss": 3.45, + "step": 122 + }, + { + "epoch": 0.012974683544303797, + "grad_norm": 0.8301472067832947, + "learning_rate": 0.0014999670560748573, + "loss": 3.3986, + "step": 123 + }, + { + "epoch": 0.013080168776371307, + "grad_norm": 0.8816103935241699, + "learning_rate": 0.001499964660935844, + "loss": 3.3982, + "step": 124 + }, + { + "epoch": 0.013185654008438819, + "grad_norm": 1.0284500122070312, + "learning_rate": 0.0014999621817595509, + "loss": 3.4138, + "step": 125 + }, + { + "epoch": 0.013291139240506329, + "grad_norm": 1.1884199380874634, + "learning_rate": 0.0014999596185462556, + "loss": 3.3891, + "step": 126 + }, + { + "epoch": 0.01339662447257384, + "grad_norm": 0.9609977006912231, + "learning_rate": 0.0014999569712962452, + "loss": 3.4036, + "step": 127 + }, + { + "epoch": 0.01350210970464135, + "grad_norm": 0.7716151475906372, + "learning_rate": 0.0014999542400098169, + "loss": 3.3639, + "step": 128 + }, + { + "epoch": 0.01360759493670886, + "grad_norm": 0.7190305590629578, + "learning_rate": 0.0014999514246872762, + "loss": 3.3455, + "step": 129 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.8661912679672241, + "learning_rate": 0.0014999485253289388, + "loss": 3.3746, + "step": 130 + }, + { + "epoch": 0.013818565400843882, + "grad_norm": 0.6636521816253662, + "learning_rate": 0.0014999455419351297, + "loss": 3.3115, + "step": 131 + }, + { + "epoch": 0.013924050632911392, + "grad_norm": 0.8013447523117065, + "learning_rate": 0.001499942474506183, + "loss": 3.3729, + "step": 132 + }, + { + "epoch": 0.014029535864978904, + "grad_norm": 0.6359685659408569, + "learning_rate": 0.0014999393230424422, + "loss": 3.3453, + "step": 133 + }, + { + "epoch": 0.014135021097046414, + "grad_norm": 0.8324923515319824, + "learning_rate": 0.001499936087544261, + "loss": 3.289, + "step": 134 + }, + { + "epoch": 0.014240506329113924, + "grad_norm": 0.8725466132164001, + "learning_rate": 0.001499932768012002, + "loss": 3.3006, + "step": 135 + }, + { + "epoch": 0.014345991561181435, + "grad_norm": 0.9704053401947021, + "learning_rate": 0.0014999293644460362, + "loss": 3.3004, + "step": 136 + }, + { + "epoch": 0.014451476793248945, + "grad_norm": 0.8218097686767578, + "learning_rate": 0.0014999258768467459, + "loss": 3.2979, + "step": 137 + }, + { + "epoch": 0.014556962025316455, + "grad_norm": 0.856134831905365, + "learning_rate": 0.0014999223052145215, + "loss": 3.2545, + "step": 138 + }, + { + "epoch": 0.014662447257383967, + "grad_norm": 0.9056288599967957, + "learning_rate": 0.0014999186495497636, + "loss": 3.247, + "step": 139 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.6627249717712402, + "learning_rate": 0.0014999149098528814, + "loss": 3.2402, + "step": 140 + }, + { + "epoch": 0.014873417721518987, + "grad_norm": 0.6721928119659424, + "learning_rate": 0.0014999110861242944, + "loss": 3.2456, + "step": 141 + }, + { + "epoch": 0.014978902953586498, + "grad_norm": 0.5820363759994507, + "learning_rate": 0.0014999071783644306, + "loss": 3.2307, + "step": 142 + }, + { + "epoch": 0.015084388185654008, + "grad_norm": 0.6729377508163452, + "learning_rate": 0.001499903186573728, + "loss": 3.2232, + "step": 143 + }, + { + "epoch": 0.015189873417721518, + "grad_norm": 0.9279201030731201, + "learning_rate": 0.001499899110752634, + "loss": 3.2305, + "step": 144 + }, + { + "epoch": 0.01529535864978903, + "grad_norm": 1.1168944835662842, + "learning_rate": 0.0014998949509016054, + "loss": 3.2487, + "step": 145 + }, + { + "epoch": 0.01540084388185654, + "grad_norm": 0.6976972818374634, + "learning_rate": 0.0014998907070211084, + "loss": 3.192, + "step": 146 + }, + { + "epoch": 0.01550632911392405, + "grad_norm": 0.824496865272522, + "learning_rate": 0.0014998863791116182, + "loss": 3.2223, + "step": 147 + }, + { + "epoch": 0.015611814345991562, + "grad_norm": 0.8909064531326294, + "learning_rate": 0.0014998819671736198, + "loss": 3.1655, + "step": 148 + }, + { + "epoch": 0.015717299578059073, + "grad_norm": 1.1542097330093384, + "learning_rate": 0.001499877471207608, + "loss": 3.1964, + "step": 149 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.944225549697876, + "learning_rate": 0.0014998728912140862, + "loss": 3.1968, + "step": 150 + }, + { + "epoch": 0.015928270042194093, + "grad_norm": 0.711151659488678, + "learning_rate": 0.0014998682271935677, + "loss": 3.2089, + "step": 151 + }, + { + "epoch": 0.016033755274261603, + "grad_norm": 0.895266056060791, + "learning_rate": 0.0014998634791465752, + "loss": 3.1446, + "step": 152 + }, + { + "epoch": 0.016139240506329113, + "grad_norm": 0.8669366836547852, + "learning_rate": 0.001499858647073641, + "loss": 3.1988, + "step": 153 + }, + { + "epoch": 0.016244725738396623, + "grad_norm": 0.9939653873443604, + "learning_rate": 0.0014998537309753057, + "loss": 3.1401, + "step": 154 + }, + { + "epoch": 0.016350210970464137, + "grad_norm": 0.8096132278442383, + "learning_rate": 0.001499848730852121, + "loss": 3.1475, + "step": 155 + }, + { + "epoch": 0.016455696202531647, + "grad_norm": 0.966376543045044, + "learning_rate": 0.001499843646704647, + "loss": 3.1385, + "step": 156 + }, + { + "epoch": 0.016561181434599156, + "grad_norm": 1.1007814407348633, + "learning_rate": 0.0014998384785334532, + "loss": 3.1556, + "step": 157 + }, + { + "epoch": 0.016666666666666666, + "grad_norm": 1.0333107709884644, + "learning_rate": 0.0014998332263391192, + "loss": 3.1235, + "step": 158 + }, + { + "epoch": 0.016772151898734176, + "grad_norm": 0.8851603269577026, + "learning_rate": 0.0014998278901222327, + "loss": 3.1327, + "step": 159 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.6391797661781311, + "learning_rate": 0.0014998224698833922, + "loss": 3.1215, + "step": 160 + }, + { + "epoch": 0.0169831223628692, + "grad_norm": 0.6912878751754761, + "learning_rate": 0.0014998169656232053, + "loss": 3.0736, + "step": 161 + }, + { + "epoch": 0.01708860759493671, + "grad_norm": 0.7403839826583862, + "learning_rate": 0.0014998113773422883, + "loss": 3.1356, + "step": 162 + }, + { + "epoch": 0.01719409282700422, + "grad_norm": 0.7562828063964844, + "learning_rate": 0.0014998057050412674, + "loss": 3.1034, + "step": 163 + }, + { + "epoch": 0.01729957805907173, + "grad_norm": 0.6717499494552612, + "learning_rate": 0.0014997999487207786, + "loss": 3.0845, + "step": 164 + }, + { + "epoch": 0.01740506329113924, + "grad_norm": 0.8262808918952942, + "learning_rate": 0.0014997941083814666, + "loss": 3.1115, + "step": 165 + }, + { + "epoch": 0.01751054852320675, + "grad_norm": 0.6903622150421143, + "learning_rate": 0.001499788184023986, + "loss": 3.0801, + "step": 166 + }, + { + "epoch": 0.017616033755274263, + "grad_norm": 0.6742339730262756, + "learning_rate": 0.0014997821756490008, + "loss": 3.0407, + "step": 167 + }, + { + "epoch": 0.017721518987341773, + "grad_norm": 0.7868281006813049, + "learning_rate": 0.0014997760832571839, + "loss": 3.0439, + "step": 168 + }, + { + "epoch": 0.017827004219409283, + "grad_norm": 0.81510990858078, + "learning_rate": 0.001499769906849218, + "loss": 3.0325, + "step": 169 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.8188894987106323, + "learning_rate": 0.0014997636464257956, + "loss": 3.0368, + "step": 170 + }, + { + "epoch": 0.018037974683544303, + "grad_norm": 1.0497719049453735, + "learning_rate": 0.0014997573019876179, + "loss": 3.0269, + "step": 171 + }, + { + "epoch": 0.018143459915611813, + "grad_norm": 0.860253095626831, + "learning_rate": 0.0014997508735353957, + "loss": 3.0805, + "step": 172 + }, + { + "epoch": 0.018248945147679326, + "grad_norm": 0.7613124847412109, + "learning_rate": 0.0014997443610698497, + "loss": 3.0498, + "step": 173 + }, + { + "epoch": 0.018354430379746836, + "grad_norm": 0.6303547620773315, + "learning_rate": 0.0014997377645917095, + "loss": 3.001, + "step": 174 + }, + { + "epoch": 0.018459915611814346, + "grad_norm": 0.8954768776893616, + "learning_rate": 0.001499731084101714, + "loss": 3.0594, + "step": 175 + }, + { + "epoch": 0.018565400843881856, + "grad_norm": 0.9532195329666138, + "learning_rate": 0.0014997243196006125, + "loss": 3.0371, + "step": 176 + }, + { + "epoch": 0.018670886075949366, + "grad_norm": 1.0947997570037842, + "learning_rate": 0.001499717471089162, + "loss": 3.0449, + "step": 177 + }, + { + "epoch": 0.018776371308016876, + "grad_norm": 0.9511747360229492, + "learning_rate": 0.0014997105385681306, + "loss": 3.0318, + "step": 178 + }, + { + "epoch": 0.01888185654008439, + "grad_norm": 0.9476508498191833, + "learning_rate": 0.001499703522038295, + "loss": 3.0486, + "step": 179 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 1.0026170015335083, + "learning_rate": 0.0014996964215004416, + "loss": 3.0095, + "step": 180 + }, + { + "epoch": 0.01909282700421941, + "grad_norm": 0.8586771488189697, + "learning_rate": 0.0014996892369553655, + "loss": 3.021, + "step": 181 + }, + { + "epoch": 0.01919831223628692, + "grad_norm": 0.6531928181648254, + "learning_rate": 0.0014996819684038726, + "loss": 2.9981, + "step": 182 + }, + { + "epoch": 0.01930379746835443, + "grad_norm": 0.8344888687133789, + "learning_rate": 0.0014996746158467762, + "loss": 2.9821, + "step": 183 + }, + { + "epoch": 0.019409282700421943, + "grad_norm": 0.7904844880104065, + "learning_rate": 0.0014996671792849015, + "loss": 3.0129, + "step": 184 + }, + { + "epoch": 0.019514767932489453, + "grad_norm": 1.0559462308883667, + "learning_rate": 0.001499659658719081, + "loss": 2.9681, + "step": 185 + }, + { + "epoch": 0.019620253164556962, + "grad_norm": 1.0281323194503784, + "learning_rate": 0.0014996520541501574, + "loss": 2.9561, + "step": 186 + }, + { + "epoch": 0.019725738396624472, + "grad_norm": 1.0009651184082031, + "learning_rate": 0.0014996443655789832, + "loss": 2.9467, + "step": 187 + }, + { + "epoch": 0.019831223628691982, + "grad_norm": 0.9298136830329895, + "learning_rate": 0.0014996365930064197, + "loss": 2.9478, + "step": 188 + }, + { + "epoch": 0.019936708860759492, + "grad_norm": 0.8605768084526062, + "learning_rate": 0.001499628736433338, + "loss": 2.9606, + "step": 189 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.8928024172782898, + "learning_rate": 0.0014996207958606182, + "loss": 2.9884, + "step": 190 + }, + { + "epoch": 0.020147679324894516, + "grad_norm": 0.6860042214393616, + "learning_rate": 0.0014996127712891504, + "loss": 2.9232, + "step": 191 + }, + { + "epoch": 0.020253164556962026, + "grad_norm": 0.8216872811317444, + "learning_rate": 0.0014996046627198337, + "loss": 2.9808, + "step": 192 + }, + { + "epoch": 0.020358649789029536, + "grad_norm": 0.7484018802642822, + "learning_rate": 0.0014995964701535768, + "loss": 2.9075, + "step": 193 + }, + { + "epoch": 0.020464135021097046, + "grad_norm": 0.7545064091682434, + "learning_rate": 0.0014995881935912973, + "loss": 2.9443, + "step": 194 + }, + { + "epoch": 0.020569620253164556, + "grad_norm": 0.9029513001441956, + "learning_rate": 0.0014995798330339233, + "loss": 2.9336, + "step": 195 + }, + { + "epoch": 0.02067510548523207, + "grad_norm": 0.7799097299575806, + "learning_rate": 0.001499571388482391, + "loss": 2.9066, + "step": 196 + }, + { + "epoch": 0.02078059071729958, + "grad_norm": 0.6838781833648682, + "learning_rate": 0.001499562859937647, + "loss": 2.9086, + "step": 197 + }, + { + "epoch": 0.02088607594936709, + "grad_norm": 0.6533251404762268, + "learning_rate": 0.001499554247400647, + "loss": 2.9381, + "step": 198 + }, + { + "epoch": 0.0209915611814346, + "grad_norm": 0.6706313490867615, + "learning_rate": 0.0014995455508723557, + "loss": 2.9343, + "step": 199 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.75434809923172, + "learning_rate": 0.001499536770353748, + "loss": 2.879, + "step": 200 + }, + { + "epoch": 0.02120253164556962, + "grad_norm": 0.7447013258934021, + "learning_rate": 0.0014995279058458075, + "loss": 2.9212, + "step": 201 + }, + { + "epoch": 0.021308016877637132, + "grad_norm": 0.9261327385902405, + "learning_rate": 0.001499518957349528, + "loss": 2.9055, + "step": 202 + }, + { + "epoch": 0.021413502109704642, + "grad_norm": 1.1261088848114014, + "learning_rate": 0.0014995099248659115, + "loss": 2.9202, + "step": 203 + }, + { + "epoch": 0.021518987341772152, + "grad_norm": 1.2600892782211304, + "learning_rate": 0.001499500808395971, + "loss": 2.9083, + "step": 204 + }, + { + "epoch": 0.021624472573839662, + "grad_norm": 0.8950139284133911, + "learning_rate": 0.0014994916079407272, + "loss": 2.877, + "step": 205 + }, + { + "epoch": 0.021729957805907172, + "grad_norm": 1.0753698348999023, + "learning_rate": 0.0014994823235012114, + "loss": 2.8615, + "step": 206 + }, + { + "epoch": 0.021835443037974682, + "grad_norm": 0.8130130767822266, + "learning_rate": 0.0014994729550784642, + "loss": 2.8912, + "step": 207 + }, + { + "epoch": 0.021940928270042195, + "grad_norm": 0.792235791683197, + "learning_rate": 0.001499463502673535, + "loss": 2.8613, + "step": 208 + }, + { + "epoch": 0.022046413502109705, + "grad_norm": 0.7663805484771729, + "learning_rate": 0.0014994539662874832, + "loss": 2.8658, + "step": 209 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.8318954706192017, + "learning_rate": 0.0014994443459213774, + "loss": 2.9134, + "step": 210 + }, + { + "epoch": 0.022257383966244725, + "grad_norm": 0.8196743726730347, + "learning_rate": 0.0014994346415762956, + "loss": 2.869, + "step": 211 + }, + { + "epoch": 0.022362869198312235, + "grad_norm": 0.7377031445503235, + "learning_rate": 0.0014994248532533253, + "loss": 2.8581, + "step": 212 + }, + { + "epoch": 0.022468354430379745, + "grad_norm": 0.8021937012672424, + "learning_rate": 0.001499414980953563, + "loss": 2.8873, + "step": 213 + }, + { + "epoch": 0.02257383966244726, + "grad_norm": 0.8724969029426575, + "learning_rate": 0.0014994050246781153, + "loss": 2.8596, + "step": 214 + }, + { + "epoch": 0.02267932489451477, + "grad_norm": 0.7893036007881165, + "learning_rate": 0.0014993949844280977, + "loss": 2.8203, + "step": 215 + }, + { + "epoch": 0.02278481012658228, + "grad_norm": 0.7690721154212952, + "learning_rate": 0.0014993848602046355, + "loss": 2.8549, + "step": 216 + }, + { + "epoch": 0.02289029535864979, + "grad_norm": 0.8050940632820129, + "learning_rate": 0.0014993746520088626, + "loss": 2.8408, + "step": 217 + }, + { + "epoch": 0.0229957805907173, + "grad_norm": 0.7850884795188904, + "learning_rate": 0.0014993643598419234, + "loss": 2.8629, + "step": 218 + }, + { + "epoch": 0.023101265822784812, + "grad_norm": 1.1050050258636475, + "learning_rate": 0.0014993539837049707, + "loss": 2.8722, + "step": 219 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.9157338738441467, + "learning_rate": 0.001499343523599168, + "loss": 2.8256, + "step": 220 + }, + { + "epoch": 0.02331223628691983, + "grad_norm": 1.02295982837677, + "learning_rate": 0.0014993329795256864, + "loss": 2.8311, + "step": 221 + }, + { + "epoch": 0.02341772151898734, + "grad_norm": 1.0586615800857544, + "learning_rate": 0.0014993223514857081, + "loss": 2.8102, + "step": 222 + }, + { + "epoch": 0.02352320675105485, + "grad_norm": 0.7005104422569275, + "learning_rate": 0.001499311639480424, + "loss": 2.8028, + "step": 223 + }, + { + "epoch": 0.02362869198312236, + "grad_norm": 0.9401168823242188, + "learning_rate": 0.0014993008435110345, + "loss": 2.8108, + "step": 224 + }, + { + "epoch": 0.023734177215189875, + "grad_norm": 0.7477805614471436, + "learning_rate": 0.0014992899635787487, + "loss": 2.791, + "step": 225 + }, + { + "epoch": 0.023839662447257385, + "grad_norm": 0.9180036187171936, + "learning_rate": 0.0014992789996847863, + "loss": 2.8511, + "step": 226 + }, + { + "epoch": 0.023945147679324895, + "grad_norm": 0.8967137336730957, + "learning_rate": 0.0014992679518303761, + "loss": 2.7938, + "step": 227 + }, + { + "epoch": 0.024050632911392405, + "grad_norm": 0.728473424911499, + "learning_rate": 0.001499256820016755, + "loss": 2.7824, + "step": 228 + }, + { + "epoch": 0.024156118143459915, + "grad_norm": 0.7292273044586182, + "learning_rate": 0.0014992456042451717, + "loss": 2.7772, + "step": 229 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.8464710116386414, + "learning_rate": 0.0014992343045168823, + "loss": 2.8048, + "step": 230 + }, + { + "epoch": 0.024367088607594938, + "grad_norm": 0.8340122699737549, + "learning_rate": 0.0014992229208331527, + "loss": 2.7865, + "step": 231 + }, + { + "epoch": 0.024472573839662448, + "grad_norm": 0.8699526190757751, + "learning_rate": 0.0014992114531952592, + "loss": 2.7983, + "step": 232 + }, + { + "epoch": 0.024578059071729958, + "grad_norm": 0.9551359415054321, + "learning_rate": 0.0014991999016044865, + "loss": 2.7462, + "step": 233 + }, + { + "epoch": 0.024683544303797468, + "grad_norm": 1.1301026344299316, + "learning_rate": 0.0014991882660621285, + "loss": 2.8094, + "step": 234 + }, + { + "epoch": 0.024789029535864978, + "grad_norm": 1.03757643699646, + "learning_rate": 0.0014991765465694898, + "loss": 2.7448, + "step": 235 + }, + { + "epoch": 0.024894514767932488, + "grad_norm": 0.9353962540626526, + "learning_rate": 0.0014991647431278835, + "loss": 2.7677, + "step": 236 + }, + { + "epoch": 0.025, + "grad_norm": 0.7047120332717896, + "learning_rate": 0.001499152855738632, + "loss": 2.7399, + "step": 237 + }, + { + "epoch": 0.02510548523206751, + "grad_norm": 0.7802032232284546, + "learning_rate": 0.0014991408844030672, + "loss": 2.7462, + "step": 238 + }, + { + "epoch": 0.02521097046413502, + "grad_norm": 0.8195375800132751, + "learning_rate": 0.0014991288291225308, + "loss": 2.7599, + "step": 239 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.8865566253662109, + "learning_rate": 0.0014991166898983739, + "loss": 2.7781, + "step": 240 + }, + { + "epoch": 0.02542194092827004, + "grad_norm": 0.8398318290710449, + "learning_rate": 0.001499104466731956, + "loss": 2.7099, + "step": 241 + }, + { + "epoch": 0.02552742616033755, + "grad_norm": 0.928798496723175, + "learning_rate": 0.0014990921596246475, + "loss": 2.7534, + "step": 242 + }, + { + "epoch": 0.025632911392405065, + "grad_norm": 0.877642035484314, + "learning_rate": 0.0014990797685778272, + "loss": 2.7742, + "step": 243 + }, + { + "epoch": 0.025738396624472575, + "grad_norm": 0.7983734607696533, + "learning_rate": 0.0014990672935928835, + "loss": 2.7637, + "step": 244 + }, + { + "epoch": 0.025843881856540084, + "grad_norm": 0.9849335551261902, + "learning_rate": 0.0014990547346712144, + "loss": 2.7493, + "step": 245 + }, + { + "epoch": 0.025949367088607594, + "grad_norm": 1.0323108434677124, + "learning_rate": 0.0014990420918142271, + "loss": 2.7429, + "step": 246 + }, + { + "epoch": 0.026054852320675104, + "grad_norm": 0.8962952494621277, + "learning_rate": 0.0014990293650233384, + "loss": 2.6965, + "step": 247 + }, + { + "epoch": 0.026160337552742614, + "grad_norm": 1.031099796295166, + "learning_rate": 0.0014990165542999746, + "loss": 2.7581, + "step": 248 + }, + { + "epoch": 0.026265822784810128, + "grad_norm": 1.1190574169158936, + "learning_rate": 0.0014990036596455706, + "loss": 2.7169, + "step": 249 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.7374270558357239, + "learning_rate": 0.001498990681061572, + "loss": 2.6911, + "step": 250 + }, + { + "epoch": 0.026476793248945148, + "grad_norm": 0.9564046859741211, + "learning_rate": 0.0014989776185494322, + "loss": 2.7418, + "step": 251 + }, + { + "epoch": 0.026582278481012658, + "grad_norm": 0.8994322419166565, + "learning_rate": 0.001498964472110616, + "loss": 2.7083, + "step": 252 + }, + { + "epoch": 0.026687763713080168, + "grad_norm": 0.7286154627799988, + "learning_rate": 0.001498951241746596, + "loss": 2.7133, + "step": 253 + }, + { + "epoch": 0.02679324894514768, + "grad_norm": 0.7479667067527771, + "learning_rate": 0.0014989379274588546, + "loss": 2.6865, + "step": 254 + }, + { + "epoch": 0.02689873417721519, + "grad_norm": 0.7310888767242432, + "learning_rate": 0.0014989245292488839, + "loss": 2.6907, + "step": 255 + }, + { + "epoch": 0.0270042194092827, + "grad_norm": 0.9391236901283264, + "learning_rate": 0.0014989110471181853, + "loss": 2.6548, + "step": 256 + }, + { + "epoch": 0.02710970464135021, + "grad_norm": 0.9826369285583496, + "learning_rate": 0.0014988974810682695, + "loss": 2.7099, + "step": 257 + }, + { + "epoch": 0.02721518987341772, + "grad_norm": 1.0842355489730835, + "learning_rate": 0.0014988838311006565, + "loss": 2.7195, + "step": 258 + }, + { + "epoch": 0.02732067510548523, + "grad_norm": 1.100656509399414, + "learning_rate": 0.0014988700972168758, + "loss": 2.7256, + "step": 259 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.7387327551841736, + "learning_rate": 0.001498856279418467, + "loss": 2.6568, + "step": 260 + }, + { + "epoch": 0.027531645569620254, + "grad_norm": 0.8931964635848999, + "learning_rate": 0.0014988423777069775, + "loss": 2.7001, + "step": 261 + }, + { + "epoch": 0.027637130801687764, + "grad_norm": 0.9801490902900696, + "learning_rate": 0.0014988283920839658, + "loss": 2.674, + "step": 262 + }, + { + "epoch": 0.027742616033755274, + "grad_norm": 0.9840818643569946, + "learning_rate": 0.0014988143225509983, + "loss": 2.6783, + "step": 263 + }, + { + "epoch": 0.027848101265822784, + "grad_norm": 0.7759222984313965, + "learning_rate": 0.0014988001691096525, + "loss": 2.6749, + "step": 264 + }, + { + "epoch": 0.027953586497890294, + "grad_norm": 0.7968153357505798, + "learning_rate": 0.0014987859317615137, + "loss": 2.6753, + "step": 265 + }, + { + "epoch": 0.028059071729957807, + "grad_norm": 0.97371906042099, + "learning_rate": 0.0014987716105081775, + "loss": 2.6773, + "step": 266 + }, + { + "epoch": 0.028164556962025317, + "grad_norm": 1.0047575235366821, + "learning_rate": 0.001498757205351249, + "loss": 2.6647, + "step": 267 + }, + { + "epoch": 0.028270042194092827, + "grad_norm": 1.3084237575531006, + "learning_rate": 0.0014987427162923416, + "loss": 2.6447, + "step": 268 + }, + { + "epoch": 0.028375527426160337, + "grad_norm": 0.8623433113098145, + "learning_rate": 0.001498728143333079, + "loss": 2.6717, + "step": 269 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.7620953321456909, + "learning_rate": 0.0014987134864750948, + "loss": 2.6608, + "step": 270 + }, + { + "epoch": 0.028586497890295357, + "grad_norm": 0.8837935328483582, + "learning_rate": 0.0014986987457200312, + "loss": 2.6655, + "step": 271 + }, + { + "epoch": 0.02869198312236287, + "grad_norm": 1.2152572870254517, + "learning_rate": 0.0014986839210695394, + "loss": 2.658, + "step": 272 + }, + { + "epoch": 0.02879746835443038, + "grad_norm": 0.9333233833312988, + "learning_rate": 0.0014986690125252814, + "loss": 2.6256, + "step": 273 + }, + { + "epoch": 0.02890295358649789, + "grad_norm": 0.6842397451400757, + "learning_rate": 0.001498654020088927, + "loss": 2.6383, + "step": 274 + }, + { + "epoch": 0.0290084388185654, + "grad_norm": 0.8164568543434143, + "learning_rate": 0.0014986389437621566, + "loss": 2.667, + "step": 275 + }, + { + "epoch": 0.02911392405063291, + "grad_norm": 0.887560248374939, + "learning_rate": 0.0014986237835466596, + "loss": 2.6221, + "step": 276 + }, + { + "epoch": 0.02921940928270042, + "grad_norm": 0.8998042345046997, + "learning_rate": 0.0014986085394441343, + "loss": 2.6459, + "step": 277 + }, + { + "epoch": 0.029324894514767934, + "grad_norm": 0.8301979899406433, + "learning_rate": 0.0014985932114562896, + "loss": 2.571, + "step": 278 + }, + { + "epoch": 0.029430379746835444, + "grad_norm": 0.8182389140129089, + "learning_rate": 0.0014985777995848428, + "loss": 2.6336, + "step": 279 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.7618486881256104, + "learning_rate": 0.0014985623038315206, + "loss": 2.6113, + "step": 280 + }, + { + "epoch": 0.029641350210970464, + "grad_norm": 0.7772089242935181, + "learning_rate": 0.0014985467241980597, + "loss": 2.5897, + "step": 281 + }, + { + "epoch": 0.029746835443037974, + "grad_norm": 0.8481540679931641, + "learning_rate": 0.0014985310606862058, + "loss": 2.654, + "step": 282 + }, + { + "epoch": 0.029852320675105484, + "grad_norm": 0.7922455668449402, + "learning_rate": 0.0014985153132977141, + "loss": 2.5766, + "step": 283 + }, + { + "epoch": 0.029957805907172997, + "grad_norm": 0.8059092164039612, + "learning_rate": 0.0014984994820343488, + "loss": 2.5944, + "step": 284 + }, + { + "epoch": 0.030063291139240507, + "grad_norm": 0.7504307627677917, + "learning_rate": 0.0014984835668978844, + "loss": 2.6428, + "step": 285 + }, + { + "epoch": 0.030168776371308017, + "grad_norm": 0.8072834610939026, + "learning_rate": 0.0014984675678901042, + "loss": 2.6151, + "step": 286 + }, + { + "epoch": 0.030274261603375527, + "grad_norm": 0.9621486663818359, + "learning_rate": 0.0014984514850128006, + "loss": 2.6123, + "step": 287 + }, + { + "epoch": 0.030379746835443037, + "grad_norm": 1.0051345825195312, + "learning_rate": 0.0014984353182677759, + "loss": 2.6111, + "step": 288 + }, + { + "epoch": 0.03048523206751055, + "grad_norm": 0.9531722068786621, + "learning_rate": 0.001498419067656842, + "loss": 2.5974, + "step": 289 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 1.0332670211791992, + "learning_rate": 0.0014984027331818193, + "loss": 2.6003, + "step": 290 + }, + { + "epoch": 0.03069620253164557, + "grad_norm": 1.0103956460952759, + "learning_rate": 0.0014983863148445389, + "loss": 2.5992, + "step": 291 + }, + { + "epoch": 0.03080168776371308, + "grad_norm": 0.9183757305145264, + "learning_rate": 0.0014983698126468398, + "loss": 2.6181, + "step": 292 + }, + { + "epoch": 0.03090717299578059, + "grad_norm": 0.6754888296127319, + "learning_rate": 0.0014983532265905716, + "loss": 2.5932, + "step": 293 + }, + { + "epoch": 0.0310126582278481, + "grad_norm": 0.7391828894615173, + "learning_rate": 0.0014983365566775928, + "loss": 2.5541, + "step": 294 + }, + { + "epoch": 0.031118143459915613, + "grad_norm": 0.6932097673416138, + "learning_rate": 0.0014983198029097711, + "loss": 2.5613, + "step": 295 + }, + { + "epoch": 0.031223628691983123, + "grad_norm": 0.7754963636398315, + "learning_rate": 0.0014983029652889843, + "loss": 2.6035, + "step": 296 + }, + { + "epoch": 0.03132911392405063, + "grad_norm": 0.892566978931427, + "learning_rate": 0.0014982860438171187, + "loss": 2.6198, + "step": 297 + }, + { + "epoch": 0.03143459915611815, + "grad_norm": 0.919947624206543, + "learning_rate": 0.0014982690384960705, + "loss": 2.5863, + "step": 298 + }, + { + "epoch": 0.03154008438818565, + "grad_norm": 1.0368226766586304, + "learning_rate": 0.0014982519493277455, + "loss": 2.5477, + "step": 299 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 1.25827157497406, + "learning_rate": 0.0014982347763140584, + "loss": 2.5777, + "step": 300 + }, + { + "epoch": 0.03175105485232067, + "grad_norm": 0.9795299172401428, + "learning_rate": 0.0014982175194569337, + "loss": 2.5559, + "step": 301 + }, + { + "epoch": 0.03185654008438819, + "grad_norm": 1.068429708480835, + "learning_rate": 0.0014982001787583047, + "loss": 2.5569, + "step": 302 + }, + { + "epoch": 0.03196202531645569, + "grad_norm": 0.8613247275352478, + "learning_rate": 0.001498182754220115, + "loss": 2.5617, + "step": 303 + }, + { + "epoch": 0.032067510548523206, + "grad_norm": 0.8511029481887817, + "learning_rate": 0.001498165245844317, + "loss": 2.5643, + "step": 304 + }, + { + "epoch": 0.03217299578059072, + "grad_norm": 0.8806131482124329, + "learning_rate": 0.0014981476536328722, + "loss": 2.5802, + "step": 305 + }, + { + "epoch": 0.032278481012658226, + "grad_norm": 0.8648945093154907, + "learning_rate": 0.0014981299775877525, + "loss": 2.5588, + "step": 306 + }, + { + "epoch": 0.03238396624472574, + "grad_norm": 0.8159801959991455, + "learning_rate": 0.0014981122177109383, + "loss": 2.5651, + "step": 307 + }, + { + "epoch": 0.032489451476793246, + "grad_norm": 0.77834552526474, + "learning_rate": 0.0014980943740044196, + "loss": 2.5421, + "step": 308 + }, + { + "epoch": 0.03259493670886076, + "grad_norm": 0.8303746581077576, + "learning_rate": 0.0014980764464701958, + "loss": 2.5707, + "step": 309 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.8329693675041199, + "learning_rate": 0.0014980584351102762, + "loss": 2.5613, + "step": 310 + }, + { + "epoch": 0.03280590717299578, + "grad_norm": 0.9889587759971619, + "learning_rate": 0.0014980403399266786, + "loss": 2.5379, + "step": 311 + }, + { + "epoch": 0.03291139240506329, + "grad_norm": 1.0490999221801758, + "learning_rate": 0.0014980221609214308, + "loss": 2.5463, + "step": 312 + }, + { + "epoch": 0.0330168776371308, + "grad_norm": 1.283737301826477, + "learning_rate": 0.0014980038980965701, + "loss": 2.5293, + "step": 313 + }, + { + "epoch": 0.03312236286919831, + "grad_norm": 0.912256121635437, + "learning_rate": 0.0014979855514541424, + "loss": 2.5212, + "step": 314 + }, + { + "epoch": 0.03322784810126582, + "grad_norm": 0.997200608253479, + "learning_rate": 0.0014979671209962044, + "loss": 2.5473, + "step": 315 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 1.1375951766967773, + "learning_rate": 0.0014979486067248204, + "loss": 2.5177, + "step": 316 + }, + { + "epoch": 0.033438818565400846, + "grad_norm": 0.9144400954246521, + "learning_rate": 0.0014979300086420655, + "loss": 2.5459, + "step": 317 + }, + { + "epoch": 0.03354430379746835, + "grad_norm": 0.9101810455322266, + "learning_rate": 0.0014979113267500235, + "loss": 2.5328, + "step": 318 + }, + { + "epoch": 0.033649789029535866, + "grad_norm": 0.8112136721611023, + "learning_rate": 0.0014978925610507879, + "loss": 2.5192, + "step": 319 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.8703595399856567, + "learning_rate": 0.001497873711546462, + "loss": 2.54, + "step": 320 + }, + { + "epoch": 0.033860759493670886, + "grad_norm": 0.870887041091919, + "learning_rate": 0.001497854778239157, + "loss": 2.5357, + "step": 321 + }, + { + "epoch": 0.0339662447257384, + "grad_norm": 0.8235728740692139, + "learning_rate": 0.0014978357611309951, + "loss": 2.54, + "step": 322 + }, + { + "epoch": 0.034071729957805906, + "grad_norm": 1.0844752788543701, + "learning_rate": 0.0014978166602241068, + "loss": 2.5562, + "step": 323 + }, + { + "epoch": 0.03417721518987342, + "grad_norm": 1.0264545679092407, + "learning_rate": 0.0014977974755206334, + "loss": 2.5691, + "step": 324 + }, + { + "epoch": 0.034282700421940926, + "grad_norm": 0.9507666230201721, + "learning_rate": 0.0014977782070227236, + "loss": 2.511, + "step": 325 + }, + { + "epoch": 0.03438818565400844, + "grad_norm": 1.091043472290039, + "learning_rate": 0.001497758854732537, + "loss": 2.5297, + "step": 326 + }, + { + "epoch": 0.03449367088607595, + "grad_norm": 1.5450935363769531, + "learning_rate": 0.001497739418652242, + "loss": 2.5726, + "step": 327 + }, + { + "epoch": 0.03459915611814346, + "grad_norm": 0.7949501276016235, + "learning_rate": 0.0014977198987840168, + "loss": 2.5057, + "step": 328 + }, + { + "epoch": 0.03470464135021097, + "grad_norm": 1.5304049253463745, + "learning_rate": 0.0014977002951300483, + "loss": 2.5107, + "step": 329 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 1.1956652402877808, + "learning_rate": 0.0014976806076925334, + "loss": 2.496, + "step": 330 + }, + { + "epoch": 0.03491561181434599, + "grad_norm": 0.9874817728996277, + "learning_rate": 0.0014976608364736781, + "loss": 2.4964, + "step": 331 + }, + { + "epoch": 0.0350210970464135, + "grad_norm": 0.8058302998542786, + "learning_rate": 0.001497640981475698, + "loss": 2.4981, + "step": 332 + }, + { + "epoch": 0.03512658227848101, + "grad_norm": 1.077811598777771, + "learning_rate": 0.0014976210427008177, + "loss": 2.5404, + "step": 333 + }, + { + "epoch": 0.035232067510548526, + "grad_norm": 1.2074663639068604, + "learning_rate": 0.0014976010201512718, + "loss": 2.5043, + "step": 334 + }, + { + "epoch": 0.03533755274261603, + "grad_norm": 0.934378981590271, + "learning_rate": 0.0014975809138293036, + "loss": 2.4897, + "step": 335 + }, + { + "epoch": 0.035443037974683546, + "grad_norm": 0.882798433303833, + "learning_rate": 0.0014975607237371663, + "loss": 2.4936, + "step": 336 + }, + { + "epoch": 0.03554852320675105, + "grad_norm": 1.079505205154419, + "learning_rate": 0.0014975404498771222, + "loss": 2.4907, + "step": 337 + }, + { + "epoch": 0.035654008438818566, + "grad_norm": 0.9299851059913635, + "learning_rate": 0.0014975200922514428, + "loss": 2.5038, + "step": 338 + }, + { + "epoch": 0.03575949367088608, + "grad_norm": 0.9592328071594238, + "learning_rate": 0.00149749965086241, + "loss": 2.52, + "step": 339 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.7728857398033142, + "learning_rate": 0.0014974791257123137, + "loss": 2.4903, + "step": 340 + }, + { + "epoch": 0.0359704641350211, + "grad_norm": 0.8789849281311035, + "learning_rate": 0.0014974585168034543, + "loss": 2.4642, + "step": 341 + }, + { + "epoch": 0.036075949367088606, + "grad_norm": 0.8572189211845398, + "learning_rate": 0.0014974378241381409, + "loss": 2.4356, + "step": 342 + }, + { + "epoch": 0.03618143459915612, + "grad_norm": 0.871981680393219, + "learning_rate": 0.001497417047718692, + "loss": 2.483, + "step": 343 + }, + { + "epoch": 0.036286919831223625, + "grad_norm": 0.8811277151107788, + "learning_rate": 0.0014973961875474364, + "loss": 2.4662, + "step": 344 + }, + { + "epoch": 0.03639240506329114, + "grad_norm": 0.8861455917358398, + "learning_rate": 0.0014973752436267106, + "loss": 2.4704, + "step": 345 + }, + { + "epoch": 0.03649789029535865, + "grad_norm": 0.9588395357131958, + "learning_rate": 0.0014973542159588623, + "loss": 2.4488, + "step": 346 + }, + { + "epoch": 0.03660337552742616, + "grad_norm": 0.9352737069129944, + "learning_rate": 0.0014973331045462475, + "loss": 2.4848, + "step": 347 + }, + { + "epoch": 0.03670886075949367, + "grad_norm": 1.0065271854400635, + "learning_rate": 0.0014973119093912317, + "loss": 2.4713, + "step": 348 + }, + { + "epoch": 0.03681434599156118, + "grad_norm": 0.9064860343933105, + "learning_rate": 0.00149729063049619, + "loss": 2.4342, + "step": 349 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.9364880919456482, + "learning_rate": 0.001497269267863507, + "loss": 2.4385, + "step": 350 + }, + { + "epoch": 0.037025316455696206, + "grad_norm": 0.9036106467247009, + "learning_rate": 0.0014972478214955762, + "loss": 2.4373, + "step": 351 + }, + { + "epoch": 0.03713080168776371, + "grad_norm": 0.8238677382469177, + "learning_rate": 0.0014972262913948008, + "loss": 2.4553, + "step": 352 + }, + { + "epoch": 0.037236286919831225, + "grad_norm": 0.8576406240463257, + "learning_rate": 0.0014972046775635934, + "loss": 2.4728, + "step": 353 + }, + { + "epoch": 0.03734177215189873, + "grad_norm": 0.9308829307556152, + "learning_rate": 0.0014971829800043762, + "loss": 2.4126, + "step": 354 + }, + { + "epoch": 0.037447257383966245, + "grad_norm": 1.1234643459320068, + "learning_rate": 0.0014971611987195802, + "loss": 2.4462, + "step": 355 + }, + { + "epoch": 0.03755274261603375, + "grad_norm": 1.062861680984497, + "learning_rate": 0.0014971393337116462, + "loss": 2.4733, + "step": 356 + }, + { + "epoch": 0.037658227848101265, + "grad_norm": 0.896031379699707, + "learning_rate": 0.0014971173849830243, + "loss": 2.4257, + "step": 357 + }, + { + "epoch": 0.03776371308016878, + "grad_norm": 0.8607374429702759, + "learning_rate": 0.0014970953525361738, + "loss": 2.4422, + "step": 358 + }, + { + "epoch": 0.037869198312236285, + "grad_norm": 0.7985581755638123, + "learning_rate": 0.001497073236373564, + "loss": 2.4177, + "step": 359 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.8593570590019226, + "learning_rate": 0.0014970510364976724, + "loss": 2.4623, + "step": 360 + }, + { + "epoch": 0.038080168776371305, + "grad_norm": 1.0175992250442505, + "learning_rate": 0.0014970287529109873, + "loss": 2.4388, + "step": 361 + }, + { + "epoch": 0.03818565400843882, + "grad_norm": 1.2127894163131714, + "learning_rate": 0.0014970063856160054, + "loss": 2.4701, + "step": 362 + }, + { + "epoch": 0.03829113924050633, + "grad_norm": 0.8473833203315735, + "learning_rate": 0.0014969839346152332, + "loss": 2.4149, + "step": 363 + }, + { + "epoch": 0.03839662447257384, + "grad_norm": 0.8215389251708984, + "learning_rate": 0.001496961399911186, + "loss": 2.4196, + "step": 364 + }, + { + "epoch": 0.03850210970464135, + "grad_norm": 1.0926530361175537, + "learning_rate": 0.0014969387815063897, + "loss": 2.4605, + "step": 365 + }, + { + "epoch": 0.03860759493670886, + "grad_norm": 1.1607736349105835, + "learning_rate": 0.0014969160794033778, + "loss": 2.4192, + "step": 366 + }, + { + "epoch": 0.03871308016877637, + "grad_norm": 0.7103621363639832, + "learning_rate": 0.0014968932936046953, + "loss": 2.415, + "step": 367 + }, + { + "epoch": 0.038818565400843885, + "grad_norm": 0.965883731842041, + "learning_rate": 0.0014968704241128947, + "loss": 2.4469, + "step": 368 + }, + { + "epoch": 0.03892405063291139, + "grad_norm": 0.9909449219703674, + "learning_rate": 0.0014968474709305384, + "loss": 2.435, + "step": 369 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 1.0886893272399902, + "learning_rate": 0.0014968244340601996, + "loss": 2.4053, + "step": 370 + }, + { + "epoch": 0.03913502109704641, + "grad_norm": 1.0527657270431519, + "learning_rate": 0.0014968013135044586, + "loss": 2.421, + "step": 371 + }, + { + "epoch": 0.039240506329113925, + "grad_norm": 0.8397204875946045, + "learning_rate": 0.0014967781092659065, + "loss": 2.3997, + "step": 372 + }, + { + "epoch": 0.03934599156118143, + "grad_norm": 0.7885728478431702, + "learning_rate": 0.0014967548213471436, + "loss": 2.4365, + "step": 373 + }, + { + "epoch": 0.039451476793248945, + "grad_norm": 0.7230392694473267, + "learning_rate": 0.0014967314497507792, + "loss": 2.4537, + "step": 374 + }, + { + "epoch": 0.03955696202531646, + "grad_norm": 0.7651317119598389, + "learning_rate": 0.0014967079944794323, + "loss": 2.4008, + "step": 375 + }, + { + "epoch": 0.039662447257383965, + "grad_norm": 0.8687002658843994, + "learning_rate": 0.0014966844555357314, + "loss": 2.4163, + "step": 376 + }, + { + "epoch": 0.03976793248945148, + "grad_norm": 0.8616766929626465, + "learning_rate": 0.0014966608329223137, + "loss": 2.4247, + "step": 377 + }, + { + "epoch": 0.039873417721518985, + "grad_norm": 0.7912053465843201, + "learning_rate": 0.0014966371266418267, + "loss": 2.4015, + "step": 378 + }, + { + "epoch": 0.0399789029535865, + "grad_norm": 0.7859050631523132, + "learning_rate": 0.0014966133366969264, + "loss": 2.4207, + "step": 379 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.8568212985992432, + "learning_rate": 0.001496589463090279, + "loss": 2.4086, + "step": 380 + }, + { + "epoch": 0.04018987341772152, + "grad_norm": 0.8737967014312744, + "learning_rate": 0.0014965655058245592, + "loss": 2.4093, + "step": 381 + }, + { + "epoch": 0.04029535864978903, + "grad_norm": 1.0347440242767334, + "learning_rate": 0.001496541464902452, + "loss": 2.419, + "step": 382 + }, + { + "epoch": 0.04040084388185654, + "grad_norm": 0.9830706119537354, + "learning_rate": 0.001496517340326651, + "loss": 2.415, + "step": 383 + }, + { + "epoch": 0.04050632911392405, + "grad_norm": 0.828308641910553, + "learning_rate": 0.0014964931320998593, + "loss": 2.3951, + "step": 384 + }, + { + "epoch": 0.04061181434599156, + "grad_norm": 0.7924207448959351, + "learning_rate": 0.00149646884022479, + "loss": 2.3873, + "step": 385 + }, + { + "epoch": 0.04071729957805907, + "grad_norm": 0.798167884349823, + "learning_rate": 0.0014964444647041647, + "loss": 2.3771, + "step": 386 + }, + { + "epoch": 0.040822784810126585, + "grad_norm": 0.877292275428772, + "learning_rate": 0.0014964200055407153, + "loss": 2.4189, + "step": 387 + }, + { + "epoch": 0.04092827004219409, + "grad_norm": 0.8147332072257996, + "learning_rate": 0.0014963954627371823, + "loss": 2.3944, + "step": 388 + }, + { + "epoch": 0.041033755274261605, + "grad_norm": 0.7936195135116577, + "learning_rate": 0.0014963708362963157, + "loss": 2.3867, + "step": 389 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.9974108338356018, + "learning_rate": 0.001496346126220875, + "loss": 2.3903, + "step": 390 + }, + { + "epoch": 0.041244725738396625, + "grad_norm": 1.76646888256073, + "learning_rate": 0.0014963213325136296, + "loss": 2.4065, + "step": 391 + }, + { + "epoch": 0.04135021097046414, + "grad_norm": 0.8132949471473694, + "learning_rate": 0.0014962964551773572, + "loss": 2.3873, + "step": 392 + }, + { + "epoch": 0.041455696202531644, + "grad_norm": 1.8590089082717896, + "learning_rate": 0.0014962714942148457, + "loss": 2.3915, + "step": 393 + }, + { + "epoch": 0.04156118143459916, + "grad_norm": 0.9324873685836792, + "learning_rate": 0.001496246449628892, + "loss": 2.3779, + "step": 394 + }, + { + "epoch": 0.041666666666666664, + "grad_norm": 1.2885868549346924, + "learning_rate": 0.0014962213214223025, + "loss": 2.371, + "step": 395 + }, + { + "epoch": 0.04177215189873418, + "grad_norm": 1.1826075315475464, + "learning_rate": 0.001496196109597893, + "loss": 2.3884, + "step": 396 + }, + { + "epoch": 0.04187763713080169, + "grad_norm": 0.8629667162895203, + "learning_rate": 0.0014961708141584885, + "loss": 2.3631, + "step": 397 + }, + { + "epoch": 0.0419831223628692, + "grad_norm": 0.8228399753570557, + "learning_rate": 0.0014961454351069233, + "loss": 2.3712, + "step": 398 + }, + { + "epoch": 0.04208860759493671, + "grad_norm": 0.7863699793815613, + "learning_rate": 0.0014961199724460418, + "loss": 2.3729, + "step": 399 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.8410415053367615, + "learning_rate": 0.0014960944261786966, + "loss": 2.3744, + "step": 400 + }, + { + "epoch": 0.04229957805907173, + "grad_norm": 0.9725362658500671, + "learning_rate": 0.001496068796307751, + "loss": 2.3479, + "step": 401 + }, + { + "epoch": 0.04240506329113924, + "grad_norm": 1.1419708728790283, + "learning_rate": 0.0014960430828360762, + "loss": 2.3432, + "step": 402 + }, + { + "epoch": 0.04251054852320675, + "grad_norm": 0.9433478713035583, + "learning_rate": 0.001496017285766554, + "loss": 2.3857, + "step": 403 + }, + { + "epoch": 0.042616033755274264, + "grad_norm": 0.7768199443817139, + "learning_rate": 0.0014959914051020748, + "loss": 2.3759, + "step": 404 + }, + { + "epoch": 0.04272151898734177, + "grad_norm": 1.2092794179916382, + "learning_rate": 0.001495965440845539, + "loss": 2.3458, + "step": 405 + }, + { + "epoch": 0.042827004219409284, + "grad_norm": 1.3977795839309692, + "learning_rate": 0.0014959393929998557, + "loss": 2.3928, + "step": 406 + }, + { + "epoch": 0.04293248945147679, + "grad_norm": 0.680060863494873, + "learning_rate": 0.001495913261567944, + "loss": 2.4053, + "step": 407 + }, + { + "epoch": 0.043037974683544304, + "grad_norm": 1.253244161605835, + "learning_rate": 0.0014958870465527317, + "loss": 2.3707, + "step": 408 + }, + { + "epoch": 0.04314345991561182, + "grad_norm": 1.053170084953308, + "learning_rate": 0.0014958607479571564, + "loss": 2.3604, + "step": 409 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.991131067276001, + "learning_rate": 0.0014958343657841655, + "loss": 2.3456, + "step": 410 + }, + { + "epoch": 0.04335443037974684, + "grad_norm": 1.030563235282898, + "learning_rate": 0.0014958079000367147, + "loss": 2.3399, + "step": 411 + }, + { + "epoch": 0.043459915611814344, + "grad_norm": 1.0305529832839966, + "learning_rate": 0.0014957813507177696, + "loss": 2.3583, + "step": 412 + }, + { + "epoch": 0.04356540084388186, + "grad_norm": 1.1089740991592407, + "learning_rate": 0.0014957547178303054, + "loss": 2.3548, + "step": 413 + }, + { + "epoch": 0.043670886075949364, + "grad_norm": 0.7843964099884033, + "learning_rate": 0.0014957280013773065, + "loss": 2.3227, + "step": 414 + }, + { + "epoch": 0.04377637130801688, + "grad_norm": 0.8057492971420288, + "learning_rate": 0.0014957012013617663, + "loss": 2.3622, + "step": 415 + }, + { + "epoch": 0.04388185654008439, + "grad_norm": 1.1146016120910645, + "learning_rate": 0.0014956743177866882, + "loss": 2.3431, + "step": 416 + }, + { + "epoch": 0.0439873417721519, + "grad_norm": 0.899165689945221, + "learning_rate": 0.0014956473506550845, + "loss": 2.3482, + "step": 417 + }, + { + "epoch": 0.04409282700421941, + "grad_norm": 0.6977551579475403, + "learning_rate": 0.0014956202999699773, + "loss": 2.3589, + "step": 418 + }, + { + "epoch": 0.04419831223628692, + "grad_norm": 1.225568413734436, + "learning_rate": 0.001495593165734397, + "loss": 2.3489, + "step": 419 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.9382018446922302, + "learning_rate": 0.001495565947951385, + "loss": 2.3117, + "step": 420 + }, + { + "epoch": 0.044409282700421944, + "grad_norm": 0.9728153347969055, + "learning_rate": 0.0014955386466239907, + "loss": 2.3462, + "step": 421 + }, + { + "epoch": 0.04451476793248945, + "grad_norm": 0.966541588306427, + "learning_rate": 0.0014955112617552734, + "loss": 2.3342, + "step": 422 + }, + { + "epoch": 0.044620253164556964, + "grad_norm": 0.8491876125335693, + "learning_rate": 0.001495483793348302, + "loss": 2.3232, + "step": 423 + }, + { + "epoch": 0.04472573839662447, + "grad_norm": 0.8945299386978149, + "learning_rate": 0.0014954562414061538, + "loss": 2.3328, + "step": 424 + }, + { + "epoch": 0.044831223628691984, + "grad_norm": 0.8778835535049438, + "learning_rate": 0.0014954286059319167, + "loss": 2.2869, + "step": 425 + }, + { + "epoch": 0.04493670886075949, + "grad_norm": 0.8796997666358948, + "learning_rate": 0.0014954008869286876, + "loss": 2.3341, + "step": 426 + }, + { + "epoch": 0.045042194092827004, + "grad_norm": 0.8119380474090576, + "learning_rate": 0.001495373084399572, + "loss": 2.3206, + "step": 427 + }, + { + "epoch": 0.04514767932489452, + "grad_norm": 0.7166072726249695, + "learning_rate": 0.0014953451983476854, + "loss": 2.321, + "step": 428 + }, + { + "epoch": 0.045253164556962024, + "grad_norm": 0.7281684875488281, + "learning_rate": 0.0014953172287761529, + "loss": 2.3016, + "step": 429 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.8143855929374695, + "learning_rate": 0.0014952891756881085, + "loss": 2.2795, + "step": 430 + }, + { + "epoch": 0.045464135021097044, + "grad_norm": 0.856016993522644, + "learning_rate": 0.0014952610390866954, + "loss": 2.2706, + "step": 431 + }, + { + "epoch": 0.04556962025316456, + "grad_norm": 0.8886516690254211, + "learning_rate": 0.0014952328189750666, + "loss": 2.2968, + "step": 432 + }, + { + "epoch": 0.04567510548523207, + "grad_norm": 0.830393373966217, + "learning_rate": 0.0014952045153563845, + "loss": 2.2995, + "step": 433 + }, + { + "epoch": 0.04578059071729958, + "grad_norm": 0.8687398433685303, + "learning_rate": 0.0014951761282338205, + "loss": 2.3214, + "step": 434 + }, + { + "epoch": 0.04588607594936709, + "grad_norm": 0.7907660007476807, + "learning_rate": 0.0014951476576105555, + "loss": 2.3053, + "step": 435 + }, + { + "epoch": 0.0459915611814346, + "grad_norm": 0.6988112330436707, + "learning_rate": 0.00149511910348978, + "loss": 2.3083, + "step": 436 + }, + { + "epoch": 0.04609704641350211, + "grad_norm": 0.894393265247345, + "learning_rate": 0.0014950904658746933, + "loss": 2.3041, + "step": 437 + }, + { + "epoch": 0.046202531645569624, + "grad_norm": 1.0113506317138672, + "learning_rate": 0.0014950617447685047, + "loss": 2.3333, + "step": 438 + }, + { + "epoch": 0.04630801687763713, + "grad_norm": 0.9196192622184753, + "learning_rate": 0.001495032940174432, + "loss": 2.2837, + "step": 439 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.8145056962966919, + "learning_rate": 0.0014950040520957037, + "loss": 2.3153, + "step": 440 + }, + { + "epoch": 0.04651898734177215, + "grad_norm": 0.7384308576583862, + "learning_rate": 0.0014949750805355563, + "loss": 2.3103, + "step": 441 + }, + { + "epoch": 0.04662447257383966, + "grad_norm": 0.6935234665870667, + "learning_rate": 0.0014949460254972363, + "loss": 2.3128, + "step": 442 + }, + { + "epoch": 0.04672995780590717, + "grad_norm": 0.7984455823898315, + "learning_rate": 0.0014949168869839997, + "loss": 2.281, + "step": 443 + }, + { + "epoch": 0.04683544303797468, + "grad_norm": 1.0903993844985962, + "learning_rate": 0.0014948876649991112, + "loss": 2.3075, + "step": 444 + }, + { + "epoch": 0.0469409282700422, + "grad_norm": 1.3251672983169556, + "learning_rate": 0.0014948583595458455, + "loss": 2.3001, + "step": 445 + }, + { + "epoch": 0.0470464135021097, + "grad_norm": 0.865172266960144, + "learning_rate": 0.0014948289706274865, + "loss": 2.3307, + "step": 446 + }, + { + "epoch": 0.04715189873417722, + "grad_norm": 0.7689465284347534, + "learning_rate": 0.0014947994982473273, + "loss": 2.3128, + "step": 447 + }, + { + "epoch": 0.04725738396624472, + "grad_norm": 0.8707035183906555, + "learning_rate": 0.0014947699424086704, + "loss": 2.2934, + "step": 448 + }, + { + "epoch": 0.04736286919831224, + "grad_norm": 0.7911492586135864, + "learning_rate": 0.0014947403031148278, + "loss": 2.3378, + "step": 449 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.8448671698570251, + "learning_rate": 0.0014947105803691204, + "loss": 2.2901, + "step": 450 + }, + { + "epoch": 0.047573839662447256, + "grad_norm": 0.7717065811157227, + "learning_rate": 0.0014946807741748791, + "loss": 2.2992, + "step": 451 + }, + { + "epoch": 0.04767932489451477, + "grad_norm": 0.8649860620498657, + "learning_rate": 0.001494650884535444, + "loss": 2.2699, + "step": 452 + }, + { + "epoch": 0.047784810126582276, + "grad_norm": 0.9744870662689209, + "learning_rate": 0.0014946209114541636, + "loss": 2.3378, + "step": 453 + }, + { + "epoch": 0.04789029535864979, + "grad_norm": 1.1223254203796387, + "learning_rate": 0.0014945908549343974, + "loss": 2.3043, + "step": 454 + }, + { + "epoch": 0.047995780590717296, + "grad_norm": 1.312940001487732, + "learning_rate": 0.001494560714979513, + "loss": 2.2895, + "step": 455 + }, + { + "epoch": 0.04810126582278481, + "grad_norm": 0.8678674697875977, + "learning_rate": 0.0014945304915928875, + "loss": 2.2624, + "step": 456 + }, + { + "epoch": 0.04820675105485232, + "grad_norm": 0.8895772695541382, + "learning_rate": 0.0014945001847779082, + "loss": 2.2652, + "step": 457 + }, + { + "epoch": 0.04831223628691983, + "grad_norm": 1.167988896369934, + "learning_rate": 0.0014944697945379708, + "loss": 2.2633, + "step": 458 + }, + { + "epoch": 0.04841772151898734, + "grad_norm": 1.1970216035842896, + "learning_rate": 0.0014944393208764805, + "loss": 2.271, + "step": 459 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.6726823449134827, + "learning_rate": 0.0014944087637968522, + "loss": 2.3048, + "step": 460 + }, + { + "epoch": 0.04862869198312236, + "grad_norm": 1.1446876525878906, + "learning_rate": 0.00149437812330251, + "loss": 2.2768, + "step": 461 + }, + { + "epoch": 0.048734177215189876, + "grad_norm": 1.4397507905960083, + "learning_rate": 0.0014943473993968871, + "loss": 2.3007, + "step": 462 + }, + { + "epoch": 0.04883966244725738, + "grad_norm": 0.8814982175827026, + "learning_rate": 0.0014943165920834266, + "loss": 2.267, + "step": 463 + }, + { + "epoch": 0.048945147679324896, + "grad_norm": 1.4293931722640991, + "learning_rate": 0.0014942857013655806, + "loss": 2.2735, + "step": 464 + }, + { + "epoch": 0.0490506329113924, + "grad_norm": 1.076352834701538, + "learning_rate": 0.0014942547272468103, + "loss": 2.2717, + "step": 465 + }, + { + "epoch": 0.049156118143459916, + "grad_norm": 0.9571002721786499, + "learning_rate": 0.0014942236697305866, + "loss": 2.2435, + "step": 466 + }, + { + "epoch": 0.04926160337552743, + "grad_norm": 0.9082633852958679, + "learning_rate": 0.0014941925288203897, + "loss": 2.2799, + "step": 467 + }, + { + "epoch": 0.049367088607594936, + "grad_norm": 0.9408537745475769, + "learning_rate": 0.001494161304519709, + "loss": 2.2627, + "step": 468 + }, + { + "epoch": 0.04947257383966245, + "grad_norm": 1.1063525676727295, + "learning_rate": 0.0014941299968320434, + "loss": 2.2452, + "step": 469 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.8353798985481262, + "learning_rate": 0.0014940986057609012, + "loss": 2.2493, + "step": 470 + }, + { + "epoch": 0.04968354430379747, + "grad_norm": 0.7874616980552673, + "learning_rate": 0.0014940671313097998, + "loss": 2.2507, + "step": 471 + }, + { + "epoch": 0.049789029535864976, + "grad_norm": 1.1063933372497559, + "learning_rate": 0.001494035573482266, + "loss": 2.2491, + "step": 472 + }, + { + "epoch": 0.04989451476793249, + "grad_norm": 1.1268893480300903, + "learning_rate": 0.0014940039322818362, + "loss": 2.2779, + "step": 473 + }, + { + "epoch": 0.05, + "grad_norm": 0.8950392007827759, + "learning_rate": 0.0014939722077120558, + "loss": 2.28, + "step": 474 + }, + { + "epoch": 0.05010548523206751, + "grad_norm": 0.7497607469558716, + "learning_rate": 0.0014939403997764795, + "loss": 2.2653, + "step": 475 + }, + { + "epoch": 0.05021097046413502, + "grad_norm": 0.8274568915367126, + "learning_rate": 0.001493908508478672, + "loss": 2.2457, + "step": 476 + }, + { + "epoch": 0.05031645569620253, + "grad_norm": 0.8084931373596191, + "learning_rate": 0.0014938765338222068, + "loss": 2.2197, + "step": 477 + }, + { + "epoch": 0.05042194092827004, + "grad_norm": 0.7706467509269714, + "learning_rate": 0.0014938444758106665, + "loss": 2.2961, + "step": 478 + }, + { + "epoch": 0.050527426160337556, + "grad_norm": 0.9420619010925293, + "learning_rate": 0.0014938123344476436, + "loss": 2.2329, + "step": 479 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.5951966047286987, + "learning_rate": 0.0014937801097367396, + "loss": 2.2623, + "step": 480 + }, + { + "epoch": 0.050738396624472576, + "grad_norm": 0.8614630699157715, + "learning_rate": 0.0014937478016815657, + "loss": 2.2219, + "step": 481 + }, + { + "epoch": 0.05084388185654008, + "grad_norm": 0.870225727558136, + "learning_rate": 0.0014937154102857416, + "loss": 2.2502, + "step": 482 + }, + { + "epoch": 0.050949367088607596, + "grad_norm": 1.0625172853469849, + "learning_rate": 0.0014936829355528976, + "loss": 2.2805, + "step": 483 + }, + { + "epoch": 0.0510548523206751, + "grad_norm": 0.9393806457519531, + "learning_rate": 0.0014936503774866721, + "loss": 2.2467, + "step": 484 + }, + { + "epoch": 0.051160337552742616, + "grad_norm": 0.9601203203201294, + "learning_rate": 0.0014936177360907138, + "loss": 2.1972, + "step": 485 + }, + { + "epoch": 0.05126582278481013, + "grad_norm": 0.7535005807876587, + "learning_rate": 0.00149358501136868, + "loss": 2.195, + "step": 486 + }, + { + "epoch": 0.051371308016877636, + "grad_norm": 0.7087482810020447, + "learning_rate": 0.0014935522033242379, + "loss": 2.2481, + "step": 487 + }, + { + "epoch": 0.05147679324894515, + "grad_norm": 1.0566579103469849, + "learning_rate": 0.0014935193119610638, + "loss": 2.2217, + "step": 488 + }, + { + "epoch": 0.051582278481012656, + "grad_norm": 1.0319668054580688, + "learning_rate": 0.0014934863372828432, + "loss": 2.2149, + "step": 489 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.8528290390968323, + "learning_rate": 0.001493453279293271, + "loss": 2.2451, + "step": 490 + }, + { + "epoch": 0.05179324894514768, + "grad_norm": 0.7560349106788635, + "learning_rate": 0.001493420137996052, + "loss": 2.2414, + "step": 491 + }, + { + "epoch": 0.05189873417721519, + "grad_norm": 1.1141968965530396, + "learning_rate": 0.0014933869133948992, + "loss": 2.2378, + "step": 492 + }, + { + "epoch": 0.0520042194092827, + "grad_norm": 1.287231206893921, + "learning_rate": 0.0014933536054935362, + "loss": 2.2428, + "step": 493 + }, + { + "epoch": 0.05210970464135021, + "grad_norm": 0.7439708113670349, + "learning_rate": 0.0014933202142956947, + "loss": 2.2351, + "step": 494 + }, + { + "epoch": 0.05221518987341772, + "grad_norm": 1.0523273944854736, + "learning_rate": 0.0014932867398051168, + "loss": 2.2262, + "step": 495 + }, + { + "epoch": 0.05232067510548523, + "grad_norm": 1.1790101528167725, + "learning_rate": 0.0014932531820255534, + "loss": 2.2128, + "step": 496 + }, + { + "epoch": 0.05242616033755274, + "grad_norm": 0.8038113713264465, + "learning_rate": 0.0014932195409607645, + "loss": 2.2372, + "step": 497 + }, + { + "epoch": 0.052531645569620256, + "grad_norm": 0.9719715714454651, + "learning_rate": 0.0014931858166145203, + "loss": 2.2016, + "step": 498 + }, + { + "epoch": 0.05263713080168776, + "grad_norm": 0.9810130596160889, + "learning_rate": 0.0014931520089905993, + "loss": 2.2229, + "step": 499 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.9817430973052979, + "learning_rate": 0.0014931181180927902, + "loss": 2.2201, + "step": 500 + }, + { + "epoch": 0.05284810126582278, + "grad_norm": 0.9961178302764893, + "learning_rate": 0.0014930841439248904, + "loss": 2.2275, + "step": 501 + }, + { + "epoch": 0.052953586497890295, + "grad_norm": 0.9383935928344727, + "learning_rate": 0.0014930500864907066, + "loss": 2.2633, + "step": 502 + }, + { + "epoch": 0.05305907172995781, + "grad_norm": 0.8046041131019592, + "learning_rate": 0.001493015945794056, + "loss": 2.1812, + "step": 503 + }, + { + "epoch": 0.053164556962025315, + "grad_norm": 0.8859094381332397, + "learning_rate": 0.0014929817218387632, + "loss": 2.201, + "step": 504 + }, + { + "epoch": 0.05327004219409283, + "grad_norm": 0.8112618327140808, + "learning_rate": 0.0014929474146286638, + "loss": 2.2084, + "step": 505 + }, + { + "epoch": 0.053375527426160335, + "grad_norm": 0.7949586510658264, + "learning_rate": 0.001492913024167602, + "loss": 2.2503, + "step": 506 + }, + { + "epoch": 0.05348101265822785, + "grad_norm": 0.85134357213974, + "learning_rate": 0.001492878550459431, + "loss": 2.2369, + "step": 507 + }, + { + "epoch": 0.05358649789029536, + "grad_norm": 0.8256014585494995, + "learning_rate": 0.0014928439935080143, + "loss": 2.2075, + "step": 508 + }, + { + "epoch": 0.05369198312236287, + "grad_norm": 0.7281264662742615, + "learning_rate": 0.0014928093533172243, + "loss": 2.1642, + "step": 509 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.9713343977928162, + "learning_rate": 0.001492774629890942, + "loss": 2.2312, + "step": 510 + }, + { + "epoch": 0.05390295358649789, + "grad_norm": 0.756997287273407, + "learning_rate": 0.0014927398232330584, + "loss": 2.1979, + "step": 511 + }, + { + "epoch": 0.0540084388185654, + "grad_norm": 1.107666254043579, + "learning_rate": 0.0014927049333474743, + "loss": 2.2241, + "step": 512 + }, + { + "epoch": 0.05411392405063291, + "grad_norm": 1.1888220310211182, + "learning_rate": 0.001492669960238099, + "loss": 2.2253, + "step": 513 + }, + { + "epoch": 0.05421940928270042, + "grad_norm": 0.8283997774124146, + "learning_rate": 0.001492634903908851, + "loss": 2.2185, + "step": 514 + }, + { + "epoch": 0.054324894514767935, + "grad_norm": 0.7774815559387207, + "learning_rate": 0.001492599764363659, + "loss": 2.1864, + "step": 515 + }, + { + "epoch": 0.05443037974683544, + "grad_norm": 0.9652143120765686, + "learning_rate": 0.0014925645416064605, + "loss": 2.2109, + "step": 516 + }, + { + "epoch": 0.054535864978902955, + "grad_norm": 0.9533340334892273, + "learning_rate": 0.0014925292356412025, + "loss": 2.202, + "step": 517 + }, + { + "epoch": 0.05464135021097046, + "grad_norm": 0.9700469970703125, + "learning_rate": 0.001492493846471841, + "loss": 2.155, + "step": 518 + }, + { + "epoch": 0.054746835443037975, + "grad_norm": 1.0206139087677002, + "learning_rate": 0.0014924583741023417, + "loss": 2.2072, + "step": 519 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.8756183385848999, + "learning_rate": 0.001492422818536679, + "loss": 2.2135, + "step": 520 + }, + { + "epoch": 0.054957805907172995, + "grad_norm": 0.8507488369941711, + "learning_rate": 0.0014923871797788378, + "loss": 2.2017, + "step": 521 + }, + { + "epoch": 0.05506329113924051, + "grad_norm": 1.0308321714401245, + "learning_rate": 0.001492351457832811, + "loss": 2.1806, + "step": 522 + }, + { + "epoch": 0.055168776371308015, + "grad_norm": 0.9579706192016602, + "learning_rate": 0.0014923156527026017, + "loss": 2.2148, + "step": 523 + }, + { + "epoch": 0.05527426160337553, + "grad_norm": 0.7602447271347046, + "learning_rate": 0.001492279764392222, + "loss": 2.2064, + "step": 524 + }, + { + "epoch": 0.055379746835443035, + "grad_norm": 0.7868837714195251, + "learning_rate": 0.0014922437929056934, + "loss": 2.1914, + "step": 525 + }, + { + "epoch": 0.05548523206751055, + "grad_norm": 0.9722076654434204, + "learning_rate": 0.0014922077382470468, + "loss": 2.1704, + "step": 526 + }, + { + "epoch": 0.05559071729957806, + "grad_norm": 0.8349431753158569, + "learning_rate": 0.001492171600420322, + "loss": 2.1679, + "step": 527 + }, + { + "epoch": 0.05569620253164557, + "grad_norm": 0.8372104167938232, + "learning_rate": 0.0014921353794295684, + "loss": 2.1964, + "step": 528 + }, + { + "epoch": 0.05580168776371308, + "grad_norm": 0.8027809262275696, + "learning_rate": 0.001492099075278845, + "loss": 2.1975, + "step": 529 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.7860098481178284, + "learning_rate": 0.00149206268797222, + "loss": 2.1842, + "step": 530 + }, + { + "epoch": 0.0560126582278481, + "grad_norm": 0.9505695700645447, + "learning_rate": 0.0014920262175137703, + "loss": 2.1938, + "step": 531 + }, + { + "epoch": 0.056118143459915615, + "grad_norm": 1.1932835578918457, + "learning_rate": 0.001491989663907583, + "loss": 2.1517, + "step": 532 + }, + { + "epoch": 0.05622362869198312, + "grad_norm": 1.2619564533233643, + "learning_rate": 0.001491953027157754, + "loss": 2.1814, + "step": 533 + }, + { + "epoch": 0.056329113924050635, + "grad_norm": 0.840788722038269, + "learning_rate": 0.0014919163072683883, + "loss": 2.175, + "step": 534 + }, + { + "epoch": 0.05643459915611814, + "grad_norm": 0.8327469229698181, + "learning_rate": 0.0014918795042436013, + "loss": 2.1812, + "step": 535 + }, + { + "epoch": 0.056540084388185655, + "grad_norm": 0.7572490572929382, + "learning_rate": 0.001491842618087516, + "loss": 2.163, + "step": 536 + }, + { + "epoch": 0.05664556962025316, + "grad_norm": 0.7168274521827698, + "learning_rate": 0.0014918056488042665, + "loss": 2.1464, + "step": 537 + }, + { + "epoch": 0.056751054852320675, + "grad_norm": 0.8444762825965881, + "learning_rate": 0.0014917685963979949, + "loss": 2.1721, + "step": 538 + }, + { + "epoch": 0.05685654008438819, + "grad_norm": 0.9437917470932007, + "learning_rate": 0.0014917314608728536, + "loss": 2.1827, + "step": 539 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 1.0266352891921997, + "learning_rate": 0.0014916942422330032, + "loss": 2.1852, + "step": 540 + }, + { + "epoch": 0.05706751054852321, + "grad_norm": 0.9508644938468933, + "learning_rate": 0.0014916569404826146, + "loss": 2.2092, + "step": 541 + }, + { + "epoch": 0.057172995780590714, + "grad_norm": 0.9399073719978333, + "learning_rate": 0.0014916195556258676, + "loss": 2.1886, + "step": 542 + }, + { + "epoch": 0.05727848101265823, + "grad_norm": 0.7659522891044617, + "learning_rate": 0.0014915820876669514, + "loss": 2.1441, + "step": 543 + }, + { + "epoch": 0.05738396624472574, + "grad_norm": 0.8380123376846313, + "learning_rate": 0.0014915445366100641, + "loss": 2.1846, + "step": 544 + }, + { + "epoch": 0.05748945147679325, + "grad_norm": 0.8105321526527405, + "learning_rate": 0.0014915069024594144, + "loss": 2.1841, + "step": 545 + }, + { + "epoch": 0.05759493670886076, + "grad_norm": 0.8955923318862915, + "learning_rate": 0.0014914691852192183, + "loss": 2.1655, + "step": 546 + }, + { + "epoch": 0.05770042194092827, + "grad_norm": 1.0156301259994507, + "learning_rate": 0.001491431384893703, + "loss": 2.1767, + "step": 547 + }, + { + "epoch": 0.05780590717299578, + "grad_norm": 0.9511234164237976, + "learning_rate": 0.0014913935014871035, + "loss": 2.1889, + "step": 548 + }, + { + "epoch": 0.057911392405063294, + "grad_norm": 0.7168984413146973, + "learning_rate": 0.0014913555350036657, + "loss": 2.1755, + "step": 549 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.724568247795105, + "learning_rate": 0.001491317485447643, + "loss": 2.1389, + "step": 550 + }, + { + "epoch": 0.058122362869198314, + "grad_norm": 0.8156276345252991, + "learning_rate": 0.0014912793528233, + "loss": 2.1302, + "step": 551 + }, + { + "epoch": 0.05822784810126582, + "grad_norm": 0.7771896123886108, + "learning_rate": 0.0014912411371349088, + "loss": 2.1662, + "step": 552 + }, + { + "epoch": 0.058333333333333334, + "grad_norm": 0.8348959684371948, + "learning_rate": 0.0014912028383867522, + "loss": 2.1615, + "step": 553 + }, + { + "epoch": 0.05843881856540084, + "grad_norm": 0.7651283740997314, + "learning_rate": 0.0014911644565831217, + "loss": 2.0992, + "step": 554 + }, + { + "epoch": 0.058544303797468354, + "grad_norm": 0.8170585632324219, + "learning_rate": 0.001491125991728318, + "loss": 2.1124, + "step": 555 + }, + { + "epoch": 0.05864978902953587, + "grad_norm": 0.8377650380134583, + "learning_rate": 0.001491087443826651, + "loss": 2.1453, + "step": 556 + }, + { + "epoch": 0.058755274261603374, + "grad_norm": 0.8116096258163452, + "learning_rate": 0.0014910488128824409, + "loss": 2.1793, + "step": 557 + }, + { + "epoch": 0.05886075949367089, + "grad_norm": 0.7576544880867004, + "learning_rate": 0.0014910100989000159, + "loss": 2.1346, + "step": 558 + }, + { + "epoch": 0.058966244725738394, + "grad_norm": 1.0041922330856323, + "learning_rate": 0.0014909713018837144, + "loss": 2.1111, + "step": 559 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 1.1568418741226196, + "learning_rate": 0.0014909324218378838, + "loss": 2.1451, + "step": 560 + }, + { + "epoch": 0.05917721518987342, + "grad_norm": 0.8902949690818787, + "learning_rate": 0.0014908934587668805, + "loss": 2.1401, + "step": 561 + }, + { + "epoch": 0.05928270042194093, + "grad_norm": 0.6587743759155273, + "learning_rate": 0.001490854412675071, + "loss": 2.1281, + "step": 562 + }, + { + "epoch": 0.05938818565400844, + "grad_norm": 0.8118045926094055, + "learning_rate": 0.0014908152835668301, + "loss": 2.1668, + "step": 563 + }, + { + "epoch": 0.05949367088607595, + "grad_norm": 0.9985524415969849, + "learning_rate": 0.0014907760714465428, + "loss": 2.1553, + "step": 564 + }, + { + "epoch": 0.05959915611814346, + "grad_norm": 1.1944894790649414, + "learning_rate": 0.0014907367763186026, + "loss": 2.1572, + "step": 565 + }, + { + "epoch": 0.05970464135021097, + "grad_norm": 0.9586542248725891, + "learning_rate": 0.0014906973981874132, + "loss": 2.1629, + "step": 566 + }, + { + "epoch": 0.05981012658227848, + "grad_norm": 0.8030982613563538, + "learning_rate": 0.0014906579370573868, + "loss": 2.1681, + "step": 567 + }, + { + "epoch": 0.059915611814345994, + "grad_norm": 0.7447845935821533, + "learning_rate": 0.0014906183929329455, + "loss": 2.1044, + "step": 568 + }, + { + "epoch": 0.0600210970464135, + "grad_norm": 1.0295255184173584, + "learning_rate": 0.00149057876581852, + "loss": 2.0917, + "step": 569 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 1.25404953956604, + "learning_rate": 0.0014905390557185508, + "loss": 2.1779, + "step": 570 + }, + { + "epoch": 0.06023206751054852, + "grad_norm": 0.919904887676239, + "learning_rate": 0.0014904992626374879, + "loss": 2.1576, + "step": 571 + }, + { + "epoch": 0.060337552742616034, + "grad_norm": 0.8260601162910461, + "learning_rate": 0.0014904593865797903, + "loss": 2.1602, + "step": 572 + }, + { + "epoch": 0.06044303797468355, + "grad_norm": 1.1029646396636963, + "learning_rate": 0.0014904194275499258, + "loss": 2.1587, + "step": 573 + }, + { + "epoch": 0.060548523206751054, + "grad_norm": 1.2216222286224365, + "learning_rate": 0.0014903793855523726, + "loss": 2.1221, + "step": 574 + }, + { + "epoch": 0.06065400843881857, + "grad_norm": 0.8967699408531189, + "learning_rate": 0.0014903392605916175, + "loss": 2.1728, + "step": 575 + }, + { + "epoch": 0.060759493670886074, + "grad_norm": 1.0319534540176392, + "learning_rate": 0.0014902990526721564, + "loss": 2.1623, + "step": 576 + }, + { + "epoch": 0.06086497890295359, + "grad_norm": 1.2141618728637695, + "learning_rate": 0.0014902587617984951, + "loss": 2.1398, + "step": 577 + }, + { + "epoch": 0.0609704641350211, + "grad_norm": 0.8922154307365417, + "learning_rate": 0.0014902183879751483, + "loss": 2.1405, + "step": 578 + }, + { + "epoch": 0.06107594936708861, + "grad_norm": 1.2008285522460938, + "learning_rate": 0.0014901779312066399, + "loss": 2.1425, + "step": 579 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.415653109550476, + "learning_rate": 0.0014901373914975036, + "loss": 2.1502, + "step": 580 + }, + { + "epoch": 0.06128691983122363, + "grad_norm": 0.8850347399711609, + "learning_rate": 0.0014900967688522818, + "loss": 2.1315, + "step": 581 + }, + { + "epoch": 0.06139240506329114, + "grad_norm": 0.856171190738678, + "learning_rate": 0.0014900560632755265, + "loss": 2.0978, + "step": 582 + }, + { + "epoch": 0.06149789029535865, + "grad_norm": 1.0935858488082886, + "learning_rate": 0.0014900152747717994, + "loss": 2.1514, + "step": 583 + }, + { + "epoch": 0.06160337552742616, + "grad_norm": 0.8829552531242371, + "learning_rate": 0.0014899744033456705, + "loss": 2.1141, + "step": 584 + }, + { + "epoch": 0.061708860759493674, + "grad_norm": 0.6998454332351685, + "learning_rate": 0.0014899334490017198, + "loss": 2.15, + "step": 585 + }, + { + "epoch": 0.06181434599156118, + "grad_norm": 0.8516126871109009, + "learning_rate": 0.0014898924117445367, + "loss": 2.118, + "step": 586 + }, + { + "epoch": 0.061919831223628694, + "grad_norm": 1.0134931802749634, + "learning_rate": 0.0014898512915787192, + "loss": 2.0872, + "step": 587 + }, + { + "epoch": 0.0620253164556962, + "grad_norm": 1.2504044771194458, + "learning_rate": 0.0014898100885088754, + "loss": 2.1077, + "step": 588 + }, + { + "epoch": 0.06213080168776371, + "grad_norm": 0.7985358834266663, + "learning_rate": 0.001489768802539622, + "loss": 2.1264, + "step": 589 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.9122056365013123, + "learning_rate": 0.0014897274336755856, + "loss": 2.0657, + "step": 590 + }, + { + "epoch": 0.06234177215189873, + "grad_norm": 1.3255841732025146, + "learning_rate": 0.0014896859819214018, + "loss": 2.0919, + "step": 591 + }, + { + "epoch": 0.06244725738396625, + "grad_norm": 0.7854761481285095, + "learning_rate": 0.001489644447281715, + "loss": 2.0974, + "step": 592 + }, + { + "epoch": 0.06255274261603376, + "grad_norm": 0.8351320028305054, + "learning_rate": 0.00148960282976118, + "loss": 2.1211, + "step": 593 + }, + { + "epoch": 0.06265822784810127, + "grad_norm": 0.8409185409545898, + "learning_rate": 0.0014895611293644596, + "loss": 2.0788, + "step": 594 + }, + { + "epoch": 0.06276371308016877, + "grad_norm": 0.7872886061668396, + "learning_rate": 0.0014895193460962271, + "loss": 2.099, + "step": 595 + }, + { + "epoch": 0.0628691983122363, + "grad_norm": 0.7821812033653259, + "learning_rate": 0.001489477479961164, + "loss": 2.0629, + "step": 596 + }, + { + "epoch": 0.0629746835443038, + "grad_norm": 0.7939206957817078, + "learning_rate": 0.0014894355309639621, + "loss": 2.0922, + "step": 597 + }, + { + "epoch": 0.0630801687763713, + "grad_norm": 0.8659226894378662, + "learning_rate": 0.0014893934991093221, + "loss": 2.1426, + "step": 598 + }, + { + "epoch": 0.06318565400843881, + "grad_norm": 0.8322390913963318, + "learning_rate": 0.0014893513844019533, + "loss": 2.0977, + "step": 599 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.9229258298873901, + "learning_rate": 0.001489309186846575, + "loss": 2.0952, + "step": 600 + }, + { + "epoch": 0.06339662447257384, + "grad_norm": 1.0476220846176147, + "learning_rate": 0.001489266906447916, + "loss": 2.1082, + "step": 601 + }, + { + "epoch": 0.06350210970464135, + "grad_norm": 0.8791822791099548, + "learning_rate": 0.0014892245432107138, + "loss": 2.0931, + "step": 602 + }, + { + "epoch": 0.06360759493670887, + "grad_norm": 0.8420791625976562, + "learning_rate": 0.0014891820971397152, + "loss": 2.1282, + "step": 603 + }, + { + "epoch": 0.06371308016877637, + "grad_norm": 0.7653708457946777, + "learning_rate": 0.001489139568239677, + "loss": 2.0653, + "step": 604 + }, + { + "epoch": 0.06381856540084388, + "grad_norm": 0.7430724501609802, + "learning_rate": 0.0014890969565153642, + "loss": 2.0993, + "step": 605 + }, + { + "epoch": 0.06392405063291139, + "grad_norm": 0.9271796345710754, + "learning_rate": 0.0014890542619715522, + "loss": 2.0703, + "step": 606 + }, + { + "epoch": 0.0640295358649789, + "grad_norm": 1.0561275482177734, + "learning_rate": 0.0014890114846130248, + "loss": 2.1246, + "step": 607 + }, + { + "epoch": 0.06413502109704641, + "grad_norm": 0.889785647392273, + "learning_rate": 0.0014889686244445755, + "loss": 2.1513, + "step": 608 + }, + { + "epoch": 0.06424050632911392, + "grad_norm": 0.7398675680160522, + "learning_rate": 0.0014889256814710071, + "loss": 2.0975, + "step": 609 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.7498558163642883, + "learning_rate": 0.0014888826556971313, + "loss": 2.0851, + "step": 610 + }, + { + "epoch": 0.06445147679324895, + "grad_norm": 0.8774113655090332, + "learning_rate": 0.0014888395471277698, + "loss": 2.1048, + "step": 611 + }, + { + "epoch": 0.06455696202531645, + "grad_norm": 0.793350100517273, + "learning_rate": 0.0014887963557677526, + "loss": 2.083, + "step": 612 + }, + { + "epoch": 0.06466244725738397, + "grad_norm": 0.8314034938812256, + "learning_rate": 0.00148875308162192, + "loss": 2.076, + "step": 613 + }, + { + "epoch": 0.06476793248945148, + "grad_norm": 0.955161988735199, + "learning_rate": 0.0014887097246951205, + "loss": 2.0569, + "step": 614 + }, + { + "epoch": 0.06487341772151899, + "grad_norm": 1.140178918838501, + "learning_rate": 0.001488666284992213, + "loss": 2.1078, + "step": 615 + }, + { + "epoch": 0.06497890295358649, + "grad_norm": 1.0809495449066162, + "learning_rate": 0.001488622762518065, + "loss": 2.0924, + "step": 616 + }, + { + "epoch": 0.06508438818565401, + "grad_norm": 1.0806293487548828, + "learning_rate": 0.0014885791572775533, + "loss": 2.0943, + "step": 617 + }, + { + "epoch": 0.06518987341772152, + "grad_norm": 1.005719542503357, + "learning_rate": 0.0014885354692755642, + "loss": 2.1184, + "step": 618 + }, + { + "epoch": 0.06529535864978903, + "grad_norm": 0.9364853501319885, + "learning_rate": 0.001488491698516993, + "loss": 2.1236, + "step": 619 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 1.0211985111236572, + "learning_rate": 0.0014884478450067444, + "loss": 2.0983, + "step": 620 + }, + { + "epoch": 0.06550632911392405, + "grad_norm": 1.0820415019989014, + "learning_rate": 0.001488403908749733, + "loss": 2.0727, + "step": 621 + }, + { + "epoch": 0.06561181434599156, + "grad_norm": 0.6914173364639282, + "learning_rate": 0.0014883598897508811, + "loss": 2.1079, + "step": 622 + }, + { + "epoch": 0.06571729957805907, + "grad_norm": 1.0765446424484253, + "learning_rate": 0.0014883157880151222, + "loss": 2.0782, + "step": 623 + }, + { + "epoch": 0.06582278481012659, + "grad_norm": 1.3546998500823975, + "learning_rate": 0.0014882716035473974, + "loss": 2.0609, + "step": 624 + }, + { + "epoch": 0.06592827004219409, + "grad_norm": 0.8620421886444092, + "learning_rate": 0.001488227336352658, + "loss": 2.0831, + "step": 625 + }, + { + "epoch": 0.0660337552742616, + "grad_norm": 0.9241822361946106, + "learning_rate": 0.0014881829864358644, + "loss": 2.0623, + "step": 626 + }, + { + "epoch": 0.06613924050632912, + "grad_norm": 0.6854711771011353, + "learning_rate": 0.0014881385538019867, + "loss": 2.0952, + "step": 627 + }, + { + "epoch": 0.06624472573839663, + "grad_norm": 0.7871749401092529, + "learning_rate": 0.0014880940384560028, + "loss": 2.0647, + "step": 628 + }, + { + "epoch": 0.06635021097046413, + "grad_norm": 0.8690797686576843, + "learning_rate": 0.0014880494404029016, + "loss": 2.0929, + "step": 629 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.7756013870239258, + "learning_rate": 0.0014880047596476807, + "loss": 2.0488, + "step": 630 + }, + { + "epoch": 0.06656118143459916, + "grad_norm": 0.9794647097587585, + "learning_rate": 0.0014879599961953461, + "loss": 2.0534, + "step": 631 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 1.504712462425232, + "learning_rate": 0.0014879151500509142, + "loss": 2.1107, + "step": 632 + }, + { + "epoch": 0.06677215189873417, + "grad_norm": 0.9072020053863525, + "learning_rate": 0.0014878702212194103, + "loss": 2.0839, + "step": 633 + }, + { + "epoch": 0.06687763713080169, + "grad_norm": 1.0631102323532104, + "learning_rate": 0.0014878252097058685, + "loss": 2.065, + "step": 634 + }, + { + "epoch": 0.0669831223628692, + "grad_norm": 1.2744462490081787, + "learning_rate": 0.001487780115515333, + "loss": 2.1363, + "step": 635 + }, + { + "epoch": 0.0670886075949367, + "grad_norm": 1.0638090372085571, + "learning_rate": 0.0014877349386528565, + "loss": 2.0798, + "step": 636 + }, + { + "epoch": 0.06719409282700423, + "grad_norm": 0.8622269630432129, + "learning_rate": 0.0014876896791235015, + "loss": 2.126, + "step": 637 + }, + { + "epoch": 0.06729957805907173, + "grad_norm": 1.1233619451522827, + "learning_rate": 0.0014876443369323397, + "loss": 2.1054, + "step": 638 + }, + { + "epoch": 0.06740506329113924, + "grad_norm": 1.232654333114624, + "learning_rate": 0.0014875989120844517, + "loss": 2.0895, + "step": 639 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.8006609678268433, + "learning_rate": 0.0014875534045849274, + "loss": 2.0876, + "step": 640 + }, + { + "epoch": 0.06761603375527427, + "grad_norm": 0.8102526664733887, + "learning_rate": 0.0014875078144388665, + "loss": 2.0762, + "step": 641 + }, + { + "epoch": 0.06772151898734177, + "grad_norm": 1.004589319229126, + "learning_rate": 0.0014874621416513774, + "loss": 2.0797, + "step": 642 + }, + { + "epoch": 0.06782700421940928, + "grad_norm": 0.944071352481842, + "learning_rate": 0.001487416386227578, + "loss": 2.0922, + "step": 643 + }, + { + "epoch": 0.0679324894514768, + "grad_norm": 0.7743476629257202, + "learning_rate": 0.0014873705481725952, + "loss": 2.0315, + "step": 644 + }, + { + "epoch": 0.0680379746835443, + "grad_norm": 0.6949995756149292, + "learning_rate": 0.0014873246274915658, + "loss": 2.0428, + "step": 645 + }, + { + "epoch": 0.06814345991561181, + "grad_norm": 0.9288864731788635, + "learning_rate": 0.0014872786241896354, + "loss": 2.0263, + "step": 646 + }, + { + "epoch": 0.06824894514767932, + "grad_norm": 1.0589450597763062, + "learning_rate": 0.0014872325382719587, + "loss": 2.1058, + "step": 647 + }, + { + "epoch": 0.06835443037974684, + "grad_norm": 0.7766799330711365, + "learning_rate": 0.0014871863697436998, + "loss": 2.1175, + "step": 648 + }, + { + "epoch": 0.06845991561181435, + "grad_norm": 0.7851554751396179, + "learning_rate": 0.0014871401186100322, + "loss": 2.0807, + "step": 649 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 1.0592528581619263, + "learning_rate": 0.0014870937848761388, + "loss": 2.0949, + "step": 650 + }, + { + "epoch": 0.06867088607594937, + "grad_norm": 0.9170325398445129, + "learning_rate": 0.0014870473685472112, + "loss": 2.048, + "step": 651 + }, + { + "epoch": 0.06877637130801688, + "grad_norm": 0.8597625494003296, + "learning_rate": 0.0014870008696284507, + "loss": 2.0612, + "step": 652 + }, + { + "epoch": 0.06888185654008439, + "grad_norm": 0.8671399354934692, + "learning_rate": 0.0014869542881250678, + "loss": 2.0647, + "step": 653 + }, + { + "epoch": 0.0689873417721519, + "grad_norm": 1.062805414199829, + "learning_rate": 0.001486907624042282, + "loss": 2.093, + "step": 654 + }, + { + "epoch": 0.06909282700421941, + "grad_norm": 0.8067552447319031, + "learning_rate": 0.0014868608773853226, + "loss": 2.0676, + "step": 655 + }, + { + "epoch": 0.06919831223628692, + "grad_norm": 0.7604615688323975, + "learning_rate": 0.0014868140481594273, + "loss": 2.0761, + "step": 656 + }, + { + "epoch": 0.06930379746835443, + "grad_norm": 0.968817949295044, + "learning_rate": 0.001486767136369844, + "loss": 2.045, + "step": 657 + }, + { + "epoch": 0.06940928270042195, + "grad_norm": 0.9282968044281006, + "learning_rate": 0.0014867201420218292, + "loss": 2.0356, + "step": 658 + }, + { + "epoch": 0.06951476793248945, + "grad_norm": 0.7253937125205994, + "learning_rate": 0.0014866730651206487, + "loss": 2.0646, + "step": 659 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 1.0265213251113892, + "learning_rate": 0.001486625905671578, + "loss": 2.0551, + "step": 660 + }, + { + "epoch": 0.06972573839662448, + "grad_norm": 1.5000754594802856, + "learning_rate": 0.0014865786636799015, + "loss": 2.0472, + "step": 661 + }, + { + "epoch": 0.06983122362869199, + "grad_norm": 0.7789219617843628, + "learning_rate": 0.0014865313391509126, + "loss": 2.0486, + "step": 662 + }, + { + "epoch": 0.06993670886075949, + "grad_norm": 2.084035634994507, + "learning_rate": 0.0014864839320899148, + "loss": 2.0254, + "step": 663 + }, + { + "epoch": 0.070042194092827, + "grad_norm": 1.0057741403579712, + "learning_rate": 0.0014864364425022198, + "loss": 2.0814, + "step": 664 + }, + { + "epoch": 0.07014767932489452, + "grad_norm": 2.3266310691833496, + "learning_rate": 0.001486388870393149, + "loss": 2.1031, + "step": 665 + }, + { + "epoch": 0.07025316455696203, + "grad_norm": 2.112344980239868, + "learning_rate": 0.0014863412157680336, + "loss": 2.0743, + "step": 666 + }, + { + "epoch": 0.07035864978902953, + "grad_norm": 1.1792027950286865, + "learning_rate": 0.0014862934786322131, + "loss": 2.1004, + "step": 667 + }, + { + "epoch": 0.07046413502109705, + "grad_norm": 1.5345449447631836, + "learning_rate": 0.0014862456589910368, + "loss": 2.0462, + "step": 668 + }, + { + "epoch": 0.07056962025316456, + "grad_norm": 1.21785306930542, + "learning_rate": 0.0014861977568498632, + "loss": 2.1065, + "step": 669 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 1.0236432552337646, + "learning_rate": 0.00148614977221406, + "loss": 2.0824, + "step": 670 + }, + { + "epoch": 0.07078059071729957, + "grad_norm": 1.4998524188995361, + "learning_rate": 0.001486101705089004, + "loss": 2.075, + "step": 671 + }, + { + "epoch": 0.07088607594936709, + "grad_norm": 0.8914380669593811, + "learning_rate": 0.0014860535554800814, + "loss": 2.0505, + "step": 672 + }, + { + "epoch": 0.0709915611814346, + "grad_norm": 1.3908029794692993, + "learning_rate": 0.0014860053233926875, + "loss": 2.0687, + "step": 673 + }, + { + "epoch": 0.0710970464135021, + "grad_norm": 0.9446824193000793, + "learning_rate": 0.0014859570088322273, + "loss": 2.0008, + "step": 674 + }, + { + "epoch": 0.07120253164556962, + "grad_norm": 1.8221971988677979, + "learning_rate": 0.0014859086118041145, + "loss": 2.0702, + "step": 675 + }, + { + "epoch": 0.07130801687763713, + "grad_norm": 1.2512032985687256, + "learning_rate": 0.001485860132313772, + "loss": 2.0611, + "step": 676 + }, + { + "epoch": 0.07141350210970464, + "grad_norm": 1.4974417686462402, + "learning_rate": 0.0014858115703666325, + "loss": 2.0433, + "step": 677 + }, + { + "epoch": 0.07151898734177216, + "grad_norm": 1.299141764640808, + "learning_rate": 0.001485762925968137, + "loss": 2.0686, + "step": 678 + }, + { + "epoch": 0.07162447257383966, + "grad_norm": 1.030820608139038, + "learning_rate": 0.0014857141991237372, + "loss": 2.034, + "step": 679 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 1.0380555391311646, + "learning_rate": 0.0014856653898388927, + "loss": 2.0579, + "step": 680 + }, + { + "epoch": 0.07183544303797468, + "grad_norm": 0.9340001940727234, + "learning_rate": 0.0014856164981190728, + "loss": 2.0548, + "step": 681 + }, + { + "epoch": 0.0719409282700422, + "grad_norm": 0.9610301852226257, + "learning_rate": 0.0014855675239697564, + "loss": 2.0617, + "step": 682 + }, + { + "epoch": 0.0720464135021097, + "grad_norm": 0.9944568872451782, + "learning_rate": 0.0014855184673964311, + "loss": 2.0389, + "step": 683 + }, + { + "epoch": 0.07215189873417721, + "grad_norm": 0.8253220915794373, + "learning_rate": 0.0014854693284045936, + "loss": 2.0407, + "step": 684 + }, + { + "epoch": 0.07225738396624473, + "grad_norm": 0.7912881374359131, + "learning_rate": 0.0014854201069997505, + "loss": 2.0275, + "step": 685 + }, + { + "epoch": 0.07236286919831224, + "grad_norm": 0.7507191896438599, + "learning_rate": 0.0014853708031874176, + "loss": 2.0632, + "step": 686 + }, + { + "epoch": 0.07246835443037974, + "grad_norm": 0.8588409423828125, + "learning_rate": 0.001485321416973119, + "loss": 2.0389, + "step": 687 + }, + { + "epoch": 0.07257383966244725, + "grad_norm": 0.9095551371574402, + "learning_rate": 0.0014852719483623893, + "loss": 2.0744, + "step": 688 + }, + { + "epoch": 0.07267932489451477, + "grad_norm": 1.0544146299362183, + "learning_rate": 0.001485222397360771, + "loss": 1.997, + "step": 689 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.8014028072357178, + "learning_rate": 0.001485172763973817, + "loss": 2.0651, + "step": 690 + }, + { + "epoch": 0.07289029535864978, + "grad_norm": 0.8014892339706421, + "learning_rate": 0.0014851230482070892, + "loss": 2.0496, + "step": 691 + }, + { + "epoch": 0.0729957805907173, + "grad_norm": 1.0109673738479614, + "learning_rate": 0.001485073250066158, + "loss": 2.0199, + "step": 692 + }, + { + "epoch": 0.07310126582278481, + "grad_norm": 0.938495934009552, + "learning_rate": 0.0014850233695566034, + "loss": 2.0452, + "step": 693 + }, + { + "epoch": 0.07320675105485232, + "grad_norm": 0.7398389577865601, + "learning_rate": 0.0014849734066840158, + "loss": 2.0734, + "step": 694 + }, + { + "epoch": 0.07331223628691984, + "grad_norm": 0.8043602108955383, + "learning_rate": 0.0014849233614539926, + "loss": 2.0419, + "step": 695 + }, + { + "epoch": 0.07341772151898734, + "grad_norm": 0.7279019951820374, + "learning_rate": 0.001484873233872142, + "loss": 2.0193, + "step": 696 + }, + { + "epoch": 0.07352320675105485, + "grad_norm": 0.851155698299408, + "learning_rate": 0.0014848230239440812, + "loss": 2.044, + "step": 697 + }, + { + "epoch": 0.07362869198312236, + "grad_norm": 0.7703299522399902, + "learning_rate": 0.0014847727316754367, + "loss": 2.0186, + "step": 698 + }, + { + "epoch": 0.07373417721518988, + "grad_norm": 0.8322718739509583, + "learning_rate": 0.0014847223570718436, + "loss": 2.0, + "step": 699 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.6955406069755554, + "learning_rate": 0.0014846719001389466, + "loss": 1.9991, + "step": 700 + }, + { + "epoch": 0.07394514767932489, + "grad_norm": 0.859824001789093, + "learning_rate": 0.0014846213608823997, + "loss": 1.9938, + "step": 701 + }, + { + "epoch": 0.07405063291139241, + "grad_norm": 0.995246946811676, + "learning_rate": 0.0014845707393078664, + "loss": 2.007, + "step": 702 + }, + { + "epoch": 0.07415611814345992, + "grad_norm": 0.9352458715438843, + "learning_rate": 0.0014845200354210186, + "loss": 2.0562, + "step": 703 + }, + { + "epoch": 0.07426160337552742, + "grad_norm": 0.7447950839996338, + "learning_rate": 0.0014844692492275385, + "loss": 2.0134, + "step": 704 + }, + { + "epoch": 0.07436708860759493, + "grad_norm": 0.8291091918945312, + "learning_rate": 0.0014844183807331164, + "loss": 2.0083, + "step": 705 + }, + { + "epoch": 0.07447257383966245, + "grad_norm": 0.9824552536010742, + "learning_rate": 0.0014843674299434527, + "loss": 2.0413, + "step": 706 + }, + { + "epoch": 0.07457805907172996, + "grad_norm": 1.1072850227355957, + "learning_rate": 0.0014843163968642566, + "loss": 2.0212, + "step": 707 + }, + { + "epoch": 0.07468354430379746, + "grad_norm": 0.9261953830718994, + "learning_rate": 0.0014842652815012466, + "loss": 2.0425, + "step": 708 + }, + { + "epoch": 0.07478902953586498, + "grad_norm": 0.726503312587738, + "learning_rate": 0.0014842140838601501, + "loss": 2.0307, + "step": 709 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.9648948311805725, + "learning_rate": 0.001484162803946705, + "loss": 2.0294, + "step": 710 + }, + { + "epoch": 0.075, + "grad_norm": 1.0922942161560059, + "learning_rate": 0.0014841114417666564, + "loss": 2.0277, + "step": 711 + }, + { + "epoch": 0.0751054852320675, + "grad_norm": 0.7850067615509033, + "learning_rate": 0.0014840599973257604, + "loss": 2.0382, + "step": 712 + }, + { + "epoch": 0.07521097046413502, + "grad_norm": 0.8855099081993103, + "learning_rate": 0.001484008470629781, + "loss": 2.0286, + "step": 713 + }, + { + "epoch": 0.07531645569620253, + "grad_norm": 1.2824063301086426, + "learning_rate": 0.0014839568616844927, + "loss": 1.9962, + "step": 714 + }, + { + "epoch": 0.07542194092827004, + "grad_norm": 0.7803816795349121, + "learning_rate": 0.0014839051704956781, + "loss": 2.0304, + "step": 715 + }, + { + "epoch": 0.07552742616033756, + "grad_norm": 1.4273767471313477, + "learning_rate": 0.0014838533970691296, + "loss": 2.0058, + "step": 716 + }, + { + "epoch": 0.07563291139240506, + "grad_norm": 0.8134103417396545, + "learning_rate": 0.0014838015414106486, + "loss": 1.9985, + "step": 717 + }, + { + "epoch": 0.07573839662447257, + "grad_norm": 1.1863408088684082, + "learning_rate": 0.0014837496035260457, + "loss": 1.9733, + "step": 718 + }, + { + "epoch": 0.07584388185654009, + "grad_norm": 1.2571485042572021, + "learning_rate": 0.0014836975834211412, + "loss": 2.0208, + "step": 719 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.7453169226646423, + "learning_rate": 0.0014836454811017635, + "loss": 1.9951, + "step": 720 + }, + { + "epoch": 0.0760548523206751, + "grad_norm": 0.8923184275627136, + "learning_rate": 0.0014835932965737517, + "loss": 2.0231, + "step": 721 + }, + { + "epoch": 0.07616033755274261, + "grad_norm": 0.9391566514968872, + "learning_rate": 0.0014835410298429529, + "loss": 2.0116, + "step": 722 + }, + { + "epoch": 0.07626582278481013, + "grad_norm": 0.796238124370575, + "learning_rate": 0.001483488680915224, + "loss": 2.0065, + "step": 723 + }, + { + "epoch": 0.07637130801687764, + "grad_norm": 0.9544135332107544, + "learning_rate": 0.0014834362497964308, + "loss": 1.9813, + "step": 724 + }, + { + "epoch": 0.07647679324894514, + "grad_norm": 1.171861171722412, + "learning_rate": 0.0014833837364924484, + "loss": 2.0139, + "step": 725 + }, + { + "epoch": 0.07658227848101266, + "grad_norm": 0.7357950806617737, + "learning_rate": 0.0014833311410091617, + "loss": 1.9787, + "step": 726 + }, + { + "epoch": 0.07668776371308017, + "grad_norm": 1.3287736177444458, + "learning_rate": 0.0014832784633524638, + "loss": 2.0008, + "step": 727 + }, + { + "epoch": 0.07679324894514768, + "grad_norm": 1.1112440824508667, + "learning_rate": 0.0014832257035282577, + "loss": 2.0075, + "step": 728 + }, + { + "epoch": 0.07689873417721518, + "grad_norm": 0.8627713322639465, + "learning_rate": 0.0014831728615424553, + "loss": 2.0093, + "step": 729 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.8479001522064209, + "learning_rate": 0.0014831199374009778, + "loss": 1.9749, + "step": 730 + }, + { + "epoch": 0.07710970464135021, + "grad_norm": 0.7516772747039795, + "learning_rate": 0.0014830669311097554, + "loss": 2.0064, + "step": 731 + }, + { + "epoch": 0.07721518987341772, + "grad_norm": 0.8123369812965393, + "learning_rate": 0.0014830138426747282, + "loss": 2.0066, + "step": 732 + }, + { + "epoch": 0.07732067510548524, + "grad_norm": 0.849873423576355, + "learning_rate": 0.0014829606721018448, + "loss": 2.0102, + "step": 733 + }, + { + "epoch": 0.07742616033755274, + "grad_norm": 0.7998387813568115, + "learning_rate": 0.0014829074193970634, + "loss": 2.0421, + "step": 734 + }, + { + "epoch": 0.07753164556962025, + "grad_norm": 0.7290231585502625, + "learning_rate": 0.0014828540845663507, + "loss": 2.0104, + "step": 735 + }, + { + "epoch": 0.07763713080168777, + "grad_norm": 0.8766542077064514, + "learning_rate": 0.0014828006676156837, + "loss": 1.9786, + "step": 736 + }, + { + "epoch": 0.07774261603375528, + "grad_norm": 0.9682561159133911, + "learning_rate": 0.0014827471685510477, + "loss": 2.0276, + "step": 737 + }, + { + "epoch": 0.07784810126582278, + "grad_norm": 0.8015082478523254, + "learning_rate": 0.0014826935873784378, + "loss": 1.9879, + "step": 738 + }, + { + "epoch": 0.07795358649789029, + "grad_norm": 0.6808128952980042, + "learning_rate": 0.0014826399241038577, + "loss": 2.0262, + "step": 739 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.8359289169311523, + "learning_rate": 0.0014825861787333208, + "loss": 1.9761, + "step": 740 + }, + { + "epoch": 0.07816455696202532, + "grad_norm": 0.7633480429649353, + "learning_rate": 0.00148253235127285, + "loss": 1.9937, + "step": 741 + }, + { + "epoch": 0.07827004219409282, + "grad_norm": 0.7409210801124573, + "learning_rate": 0.001482478441728476, + "loss": 2.0202, + "step": 742 + }, + { + "epoch": 0.07837552742616034, + "grad_norm": 0.8588515520095825, + "learning_rate": 0.0014824244501062402, + "loss": 1.9859, + "step": 743 + }, + { + "epoch": 0.07848101265822785, + "grad_norm": 0.9817986488342285, + "learning_rate": 0.0014823703764121929, + "loss": 1.9951, + "step": 744 + }, + { + "epoch": 0.07858649789029536, + "grad_norm": 0.8827958703041077, + "learning_rate": 0.0014823162206523926, + "loss": 2.0033, + "step": 745 + }, + { + "epoch": 0.07869198312236286, + "grad_norm": 0.6753709316253662, + "learning_rate": 0.0014822619828329085, + "loss": 1.9961, + "step": 746 + }, + { + "epoch": 0.07879746835443038, + "grad_norm": 0.7976277470588684, + "learning_rate": 0.0014822076629598176, + "loss": 1.9751, + "step": 747 + }, + { + "epoch": 0.07890295358649789, + "grad_norm": 0.7851034998893738, + "learning_rate": 0.001482153261039207, + "loss": 1.9394, + "step": 748 + }, + { + "epoch": 0.0790084388185654, + "grad_norm": 0.7930102944374084, + "learning_rate": 0.0014820987770771726, + "loss": 1.9947, + "step": 749 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.6949541568756104, + "learning_rate": 0.0014820442110798197, + "loss": 1.9698, + "step": 750 + }, + { + "epoch": 0.07921940928270042, + "grad_norm": 0.8928585052490234, + "learning_rate": 0.0014819895630532628, + "loss": 1.9874, + "step": 751 + }, + { + "epoch": 0.07932489451476793, + "grad_norm": 1.0159589052200317, + "learning_rate": 0.0014819348330036251, + "loss": 2.0188, + "step": 752 + }, + { + "epoch": 0.07943037974683544, + "grad_norm": 0.8262913823127747, + "learning_rate": 0.0014818800209370397, + "loss": 1.9868, + "step": 753 + }, + { + "epoch": 0.07953586497890296, + "grad_norm": 0.7219811081886292, + "learning_rate": 0.0014818251268596486, + "loss": 1.9954, + "step": 754 + }, + { + "epoch": 0.07964135021097046, + "grad_norm": 1.075239658355713, + "learning_rate": 0.0014817701507776025, + "loss": 2.0046, + "step": 755 + }, + { + "epoch": 0.07974683544303797, + "grad_norm": 0.9047238826751709, + "learning_rate": 0.0014817150926970625, + "loss": 1.9792, + "step": 756 + }, + { + "epoch": 0.07985232067510549, + "grad_norm": 0.721527636051178, + "learning_rate": 0.0014816599526241974, + "loss": 1.9703, + "step": 757 + }, + { + "epoch": 0.079957805907173, + "grad_norm": 1.4943727254867554, + "learning_rate": 0.0014816047305651863, + "loss": 2.0256, + "step": 758 + }, + { + "epoch": 0.0800632911392405, + "grad_norm": 0.7496199011802673, + "learning_rate": 0.0014815494265262169, + "loss": 1.9582, + "step": 759 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 1.1496256589889526, + "learning_rate": 0.0014814940405134865, + "loss": 1.9816, + "step": 760 + }, + { + "epoch": 0.08027426160337553, + "grad_norm": 1.1217842102050781, + "learning_rate": 0.0014814385725332015, + "loss": 2.0348, + "step": 761 + }, + { + "epoch": 0.08037974683544304, + "grad_norm": 0.7066051959991455, + "learning_rate": 0.001481383022591577, + "loss": 1.9765, + "step": 762 + }, + { + "epoch": 0.08048523206751054, + "grad_norm": 1.129097819328308, + "learning_rate": 0.0014813273906948378, + "loss": 1.9568, + "step": 763 + }, + { + "epoch": 0.08059071729957806, + "grad_norm": 1.0935163497924805, + "learning_rate": 0.0014812716768492177, + "loss": 1.9945, + "step": 764 + }, + { + "epoch": 0.08069620253164557, + "grad_norm": 0.8361796736717224, + "learning_rate": 0.0014812158810609598, + "loss": 1.9758, + "step": 765 + }, + { + "epoch": 0.08080168776371308, + "grad_norm": 0.7219378352165222, + "learning_rate": 0.0014811600033363165, + "loss": 1.9816, + "step": 766 + }, + { + "epoch": 0.0809071729957806, + "grad_norm": 0.8454236388206482, + "learning_rate": 0.0014811040436815486, + "loss": 1.9565, + "step": 767 + }, + { + "epoch": 0.0810126582278481, + "grad_norm": 0.9749904870986938, + "learning_rate": 0.001481048002102927, + "loss": 1.969, + "step": 768 + }, + { + "epoch": 0.08111814345991561, + "grad_norm": 0.7698752284049988, + "learning_rate": 0.0014809918786067315, + "loss": 2.0067, + "step": 769 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.816430926322937, + "learning_rate": 0.001480935673199251, + "loss": 1.9476, + "step": 770 + }, + { + "epoch": 0.08132911392405064, + "grad_norm": 0.9901277422904968, + "learning_rate": 0.0014808793858867837, + "loss": 1.9525, + "step": 771 + }, + { + "epoch": 0.08143459915611814, + "grad_norm": 0.9887930154800415, + "learning_rate": 0.0014808230166756366, + "loss": 2.0271, + "step": 772 + }, + { + "epoch": 0.08154008438818565, + "grad_norm": 0.8193899989128113, + "learning_rate": 0.0014807665655721261, + "loss": 1.9683, + "step": 773 + }, + { + "epoch": 0.08164556962025317, + "grad_norm": 0.9001303911209106, + "learning_rate": 0.0014807100325825782, + "loss": 1.9791, + "step": 774 + }, + { + "epoch": 0.08175105485232068, + "grad_norm": 1.2176485061645508, + "learning_rate": 0.0014806534177133274, + "loss": 1.9438, + "step": 775 + }, + { + "epoch": 0.08185654008438818, + "grad_norm": 0.9540280103683472, + "learning_rate": 0.0014805967209707178, + "loss": 1.9307, + "step": 776 + }, + { + "epoch": 0.0819620253164557, + "grad_norm": 0.7548363208770752, + "learning_rate": 0.0014805399423611025, + "loss": 1.9712, + "step": 777 + }, + { + "epoch": 0.08206751054852321, + "grad_norm": 1.271173119544983, + "learning_rate": 0.0014804830818908438, + "loss": 2.001, + "step": 778 + }, + { + "epoch": 0.08217299578059072, + "grad_norm": 1.1781439781188965, + "learning_rate": 0.0014804261395663133, + "loss": 2.0144, + "step": 779 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.7881703972816467, + "learning_rate": 0.0014803691153938915, + "loss": 2.0027, + "step": 780 + }, + { + "epoch": 0.08238396624472574, + "grad_norm": 0.7140640020370483, + "learning_rate": 0.0014803120093799687, + "loss": 1.9471, + "step": 781 + }, + { + "epoch": 0.08248945147679325, + "grad_norm": 0.8765788078308105, + "learning_rate": 0.0014802548215309434, + "loss": 1.9625, + "step": 782 + }, + { + "epoch": 0.08259493670886076, + "grad_norm": 0.9438034892082214, + "learning_rate": 0.001480197551853224, + "loss": 1.9592, + "step": 783 + }, + { + "epoch": 0.08270042194092828, + "grad_norm": 0.7496185302734375, + "learning_rate": 0.0014801402003532277, + "loss": 1.9774, + "step": 784 + }, + { + "epoch": 0.08280590717299578, + "grad_norm": 0.7161319851875305, + "learning_rate": 0.0014800827670373815, + "loss": 1.9865, + "step": 785 + }, + { + "epoch": 0.08291139240506329, + "grad_norm": 0.7397176623344421, + "learning_rate": 0.0014800252519121203, + "loss": 1.9515, + "step": 786 + }, + { + "epoch": 0.0830168776371308, + "grad_norm": 0.7876753211021423, + "learning_rate": 0.0014799676549838898, + "loss": 2.0004, + "step": 787 + }, + { + "epoch": 0.08312236286919832, + "grad_norm": 1.0011625289916992, + "learning_rate": 0.0014799099762591434, + "loss": 1.9991, + "step": 788 + }, + { + "epoch": 0.08322784810126582, + "grad_norm": 1.2221286296844482, + "learning_rate": 0.0014798522157443443, + "loss": 1.965, + "step": 789 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.8034047484397888, + "learning_rate": 0.0014797943734459653, + "loss": 2.0165, + "step": 790 + }, + { + "epoch": 0.08343881856540085, + "grad_norm": 0.73537278175354, + "learning_rate": 0.0014797364493704876, + "loss": 1.9896, + "step": 791 + }, + { + "epoch": 0.08354430379746836, + "grad_norm": 0.7380205988883972, + "learning_rate": 0.001479678443524402, + "loss": 1.9692, + "step": 792 + }, + { + "epoch": 0.08364978902953586, + "grad_norm": 0.8861795663833618, + "learning_rate": 0.0014796203559142081, + "loss": 1.9475, + "step": 793 + }, + { + "epoch": 0.08375527426160338, + "grad_norm": 0.8280783295631409, + "learning_rate": 0.0014795621865464155, + "loss": 1.9646, + "step": 794 + }, + { + "epoch": 0.08386075949367089, + "grad_norm": 0.8233578205108643, + "learning_rate": 0.0014795039354275417, + "loss": 1.936, + "step": 795 + }, + { + "epoch": 0.0839662447257384, + "grad_norm": 0.8164411187171936, + "learning_rate": 0.0014794456025641143, + "loss": 1.9676, + "step": 796 + }, + { + "epoch": 0.0840717299578059, + "grad_norm": 1.2623236179351807, + "learning_rate": 0.00147938718796267, + "loss": 1.9839, + "step": 797 + }, + { + "epoch": 0.08417721518987342, + "grad_norm": 1.020638108253479, + "learning_rate": 0.001479328691629754, + "loss": 1.9454, + "step": 798 + }, + { + "epoch": 0.08428270042194093, + "grad_norm": 0.7561349272727966, + "learning_rate": 0.0014792701135719214, + "loss": 1.9458, + "step": 799 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.9238168597221375, + "learning_rate": 0.001479211453795736, + "loss": 1.9213, + "step": 800 + }, + { + "epoch": 0.08449367088607596, + "grad_norm": 1.0268235206604004, + "learning_rate": 0.001479152712307771, + "loss": 1.9453, + "step": 801 + }, + { + "epoch": 0.08459915611814346, + "grad_norm": 0.8139402270317078, + "learning_rate": 0.0014790938891146089, + "loss": 1.955, + "step": 802 + }, + { + "epoch": 0.08470464135021097, + "grad_norm": 0.7657691836357117, + "learning_rate": 0.001479034984222841, + "loss": 1.921, + "step": 803 + }, + { + "epoch": 0.08481012658227848, + "grad_norm": 1.001217246055603, + "learning_rate": 0.0014789759976390675, + "loss": 1.9049, + "step": 804 + }, + { + "epoch": 0.084915611814346, + "grad_norm": 0.9925279021263123, + "learning_rate": 0.0014789169293698988, + "loss": 1.9717, + "step": 805 + }, + { + "epoch": 0.0850210970464135, + "grad_norm": 0.7576563358306885, + "learning_rate": 0.0014788577794219533, + "loss": 1.9714, + "step": 806 + }, + { + "epoch": 0.08512658227848101, + "grad_norm": 0.9301592707633972, + "learning_rate": 0.0014787985478018593, + "loss": 1.9526, + "step": 807 + }, + { + "epoch": 0.08523206751054853, + "grad_norm": 0.8671308755874634, + "learning_rate": 0.0014787392345162538, + "loss": 1.9714, + "step": 808 + }, + { + "epoch": 0.08533755274261604, + "grad_norm": 0.7936947345733643, + "learning_rate": 0.0014786798395717833, + "loss": 1.9153, + "step": 809 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.7263367176055908, + "learning_rate": 0.0014786203629751033, + "loss": 1.9345, + "step": 810 + }, + { + "epoch": 0.08554852320675105, + "grad_norm": 0.7763354182243347, + "learning_rate": 0.001478560804732878, + "loss": 1.9606, + "step": 811 + }, + { + "epoch": 0.08565400843881857, + "grad_norm": 0.8377158045768738, + "learning_rate": 0.001478501164851782, + "loss": 1.9317, + "step": 812 + }, + { + "epoch": 0.08575949367088608, + "grad_norm": 0.7996160387992859, + "learning_rate": 0.0014784414433384977, + "loss": 1.994, + "step": 813 + }, + { + "epoch": 0.08586497890295358, + "grad_norm": 0.7316470146179199, + "learning_rate": 0.0014783816401997174, + "loss": 1.9615, + "step": 814 + }, + { + "epoch": 0.0859704641350211, + "grad_norm": 0.9026787281036377, + "learning_rate": 0.0014783217554421423, + "loss": 1.9402, + "step": 815 + }, + { + "epoch": 0.08607594936708861, + "grad_norm": 0.9258112907409668, + "learning_rate": 0.0014782617890724827, + "loss": 1.94, + "step": 816 + }, + { + "epoch": 0.08618143459915611, + "grad_norm": 0.8521250486373901, + "learning_rate": 0.0014782017410974583, + "loss": 1.9648, + "step": 817 + }, + { + "epoch": 0.08628691983122364, + "grad_norm": 0.7175568342208862, + "learning_rate": 0.0014781416115237976, + "loss": 1.9591, + "step": 818 + }, + { + "epoch": 0.08639240506329114, + "grad_norm": 1.0408341884613037, + "learning_rate": 0.0014780814003582385, + "loss": 1.9467, + "step": 819 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 1.205456018447876, + "learning_rate": 0.0014780211076075279, + "loss": 1.9377, + "step": 820 + }, + { + "epoch": 0.08660337552742615, + "grad_norm": 0.9052318334579468, + "learning_rate": 0.001477960733278422, + "loss": 1.956, + "step": 821 + }, + { + "epoch": 0.08670886075949367, + "grad_norm": 0.7298400402069092, + "learning_rate": 0.001477900277377686, + "loss": 1.9587, + "step": 822 + }, + { + "epoch": 0.08681434599156118, + "grad_norm": 0.8972980976104736, + "learning_rate": 0.0014778397399120942, + "loss": 1.9687, + "step": 823 + }, + { + "epoch": 0.08691983122362869, + "grad_norm": 0.9017173647880554, + "learning_rate": 0.0014777791208884304, + "loss": 1.9927, + "step": 824 + }, + { + "epoch": 0.08702531645569621, + "grad_norm": 0.8284468650817871, + "learning_rate": 0.0014777184203134867, + "loss": 1.9309, + "step": 825 + }, + { + "epoch": 0.08713080168776371, + "grad_norm": 0.7504839301109314, + "learning_rate": 0.0014776576381940658, + "loss": 1.9726, + "step": 826 + }, + { + "epoch": 0.08723628691983122, + "grad_norm": 0.74728924036026, + "learning_rate": 0.0014775967745369778, + "loss": 1.952, + "step": 827 + }, + { + "epoch": 0.08734177215189873, + "grad_norm": 0.8446511626243591, + "learning_rate": 0.001477535829349043, + "loss": 1.9409, + "step": 828 + }, + { + "epoch": 0.08744725738396625, + "grad_norm": 0.8540011048316956, + "learning_rate": 0.0014774748026370908, + "loss": 1.9418, + "step": 829 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.847830593585968, + "learning_rate": 0.0014774136944079594, + "loss": 1.962, + "step": 830 + }, + { + "epoch": 0.08765822784810126, + "grad_norm": 0.786203145980835, + "learning_rate": 0.0014773525046684964, + "loss": 1.9108, + "step": 831 + }, + { + "epoch": 0.08776371308016878, + "grad_norm": 0.7765092849731445, + "learning_rate": 0.0014772912334255585, + "loss": 1.9099, + "step": 832 + }, + { + "epoch": 0.08786919831223629, + "grad_norm": 0.7807843685150146, + "learning_rate": 0.0014772298806860111, + "loss": 1.9256, + "step": 833 + }, + { + "epoch": 0.0879746835443038, + "grad_norm": 0.7427983283996582, + "learning_rate": 0.0014771684464567293, + "loss": 1.9013, + "step": 834 + }, + { + "epoch": 0.08808016877637131, + "grad_norm": 0.7351009249687195, + "learning_rate": 0.0014771069307445972, + "loss": 1.9558, + "step": 835 + }, + { + "epoch": 0.08818565400843882, + "grad_norm": 0.674807071685791, + "learning_rate": 0.0014770453335565077, + "loss": 1.9247, + "step": 836 + }, + { + "epoch": 0.08829113924050633, + "grad_norm": 0.7209314107894897, + "learning_rate": 0.0014769836548993631, + "loss": 1.9459, + "step": 837 + }, + { + "epoch": 0.08839662447257383, + "grad_norm": 0.7937597036361694, + "learning_rate": 0.0014769218947800749, + "loss": 1.97, + "step": 838 + }, + { + "epoch": 0.08850210970464135, + "grad_norm": 0.8873916268348694, + "learning_rate": 0.0014768600532055638, + "loss": 1.927, + "step": 839 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.9846102595329285, + "learning_rate": 0.0014767981301827592, + "loss": 1.9504, + "step": 840 + }, + { + "epoch": 0.08871308016877637, + "grad_norm": 0.9854592680931091, + "learning_rate": 0.0014767361257186, + "loss": 1.9458, + "step": 841 + }, + { + "epoch": 0.08881856540084389, + "grad_norm": 1.0084080696105957, + "learning_rate": 0.0014766740398200343, + "loss": 1.8958, + "step": 842 + }, + { + "epoch": 0.0889240506329114, + "grad_norm": 0.8479753136634827, + "learning_rate": 0.0014766118724940185, + "loss": 1.9449, + "step": 843 + }, + { + "epoch": 0.0890295358649789, + "grad_norm": 0.6834438443183899, + "learning_rate": 0.0014765496237475195, + "loss": 1.9288, + "step": 844 + }, + { + "epoch": 0.08913502109704641, + "grad_norm": 0.8389136791229248, + "learning_rate": 0.001476487293587512, + "loss": 1.9418, + "step": 845 + }, + { + "epoch": 0.08924050632911393, + "grad_norm": 0.8059629797935486, + "learning_rate": 0.0014764248820209808, + "loss": 1.9555, + "step": 846 + }, + { + "epoch": 0.08934599156118143, + "grad_norm": 0.8115542531013489, + "learning_rate": 0.0014763623890549193, + "loss": 1.9503, + "step": 847 + }, + { + "epoch": 0.08945147679324894, + "grad_norm": 1.0103729963302612, + "learning_rate": 0.00147629981469633, + "loss": 1.9527, + "step": 848 + }, + { + "epoch": 0.08955696202531646, + "grad_norm": 0.9218172430992126, + "learning_rate": 0.001476237158952225, + "loss": 1.9092, + "step": 849 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.6924265623092651, + "learning_rate": 0.0014761744218296249, + "loss": 1.8923, + "step": 850 + }, + { + "epoch": 0.08976793248945147, + "grad_norm": 0.8133346438407898, + "learning_rate": 0.0014761116033355597, + "loss": 1.9291, + "step": 851 + }, + { + "epoch": 0.08987341772151898, + "grad_norm": 1.3134002685546875, + "learning_rate": 0.001476048703477069, + "loss": 1.9632, + "step": 852 + }, + { + "epoch": 0.0899789029535865, + "grad_norm": 1.1146644353866577, + "learning_rate": 0.0014759857222612003, + "loss": 1.9295, + "step": 853 + }, + { + "epoch": 0.09008438818565401, + "grad_norm": 0.7377743124961853, + "learning_rate": 0.0014759226596950115, + "loss": 1.9797, + "step": 854 + }, + { + "epoch": 0.09018987341772151, + "grad_norm": 1.1374541521072388, + "learning_rate": 0.0014758595157855687, + "loss": 1.9527, + "step": 855 + }, + { + "epoch": 0.09029535864978903, + "grad_norm": 1.2817232608795166, + "learning_rate": 0.001475796290539948, + "loss": 1.9518, + "step": 856 + }, + { + "epoch": 0.09040084388185654, + "grad_norm": 0.7303783893585205, + "learning_rate": 0.0014757329839652335, + "loss": 1.9155, + "step": 857 + }, + { + "epoch": 0.09050632911392405, + "grad_norm": 0.9100944399833679, + "learning_rate": 0.0014756695960685194, + "loss": 1.9439, + "step": 858 + }, + { + "epoch": 0.09061181434599157, + "grad_norm": 0.8507583737373352, + "learning_rate": 0.0014756061268569086, + "loss": 1.926, + "step": 859 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.7675528526306152, + "learning_rate": 0.001475542576337513, + "loss": 1.8843, + "step": 860 + }, + { + "epoch": 0.09082278481012658, + "grad_norm": 0.8400834798812866, + "learning_rate": 0.001475478944517454, + "loss": 1.9403, + "step": 861 + }, + { + "epoch": 0.09092827004219409, + "grad_norm": 1.0770355463027954, + "learning_rate": 0.0014754152314038617, + "loss": 1.9187, + "step": 862 + }, + { + "epoch": 0.09103375527426161, + "grad_norm": 0.9115419387817383, + "learning_rate": 0.0014753514370038753, + "loss": 1.929, + "step": 863 + }, + { + "epoch": 0.09113924050632911, + "grad_norm": 0.7001010775566101, + "learning_rate": 0.0014752875613246435, + "loss": 1.9327, + "step": 864 + }, + { + "epoch": 0.09124472573839662, + "grad_norm": 0.7418375015258789, + "learning_rate": 0.001475223604373324, + "loss": 1.9471, + "step": 865 + }, + { + "epoch": 0.09135021097046414, + "grad_norm": 0.7170901298522949, + "learning_rate": 0.0014751595661570832, + "loss": 1.8921, + "step": 866 + }, + { + "epoch": 0.09145569620253165, + "grad_norm": 0.7662613391876221, + "learning_rate": 0.001475095446683097, + "loss": 1.9312, + "step": 867 + }, + { + "epoch": 0.09156118143459915, + "grad_norm": 0.7229713797569275, + "learning_rate": 0.0014750312459585505, + "loss": 1.9496, + "step": 868 + }, + { + "epoch": 0.09166666666666666, + "grad_norm": 0.7620078325271606, + "learning_rate": 0.0014749669639906374, + "loss": 1.8963, + "step": 869 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.7648833394050598, + "learning_rate": 0.001474902600786561, + "loss": 1.9437, + "step": 870 + }, + { + "epoch": 0.09187763713080169, + "grad_norm": 0.7546665668487549, + "learning_rate": 0.0014748381563535337, + "loss": 1.926, + "step": 871 + }, + { + "epoch": 0.0919831223628692, + "grad_norm": 0.8841967582702637, + "learning_rate": 0.0014747736306987764, + "loss": 1.9115, + "step": 872 + }, + { + "epoch": 0.09208860759493671, + "grad_norm": 0.8691505789756775, + "learning_rate": 0.0014747090238295198, + "loss": 1.9102, + "step": 873 + }, + { + "epoch": 0.09219409282700422, + "grad_norm": 0.7517157196998596, + "learning_rate": 0.0014746443357530033, + "loss": 1.9115, + "step": 874 + }, + { + "epoch": 0.09229957805907173, + "grad_norm": 0.7571626901626587, + "learning_rate": 0.0014745795664764757, + "loss": 1.9047, + "step": 875 + }, + { + "epoch": 0.09240506329113925, + "grad_norm": 0.7358955144882202, + "learning_rate": 0.0014745147160071944, + "loss": 1.9347, + "step": 876 + }, + { + "epoch": 0.09251054852320675, + "grad_norm": 0.7117266058921814, + "learning_rate": 0.0014744497843524266, + "loss": 1.9103, + "step": 877 + }, + { + "epoch": 0.09261603375527426, + "grad_norm": 0.7733256220817566, + "learning_rate": 0.001474384771519448, + "loss": 1.8969, + "step": 878 + }, + { + "epoch": 0.09272151898734177, + "grad_norm": 0.7456676959991455, + "learning_rate": 0.0014743196775155434, + "loss": 1.9517, + "step": 879 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.8599201440811157, + "learning_rate": 0.0014742545023480075, + "loss": 1.9334, + "step": 880 + }, + { + "epoch": 0.0929324894514768, + "grad_norm": 0.7499380707740784, + "learning_rate": 0.001474189246024143, + "loss": 1.9507, + "step": 881 + }, + { + "epoch": 0.0930379746835443, + "grad_norm": 0.983161985874176, + "learning_rate": 0.0014741239085512624, + "loss": 1.8976, + "step": 882 + }, + { + "epoch": 0.09314345991561182, + "grad_norm": 1.3016536235809326, + "learning_rate": 0.0014740584899366868, + "loss": 1.8594, + "step": 883 + }, + { + "epoch": 0.09324894514767933, + "grad_norm": 0.7097591757774353, + "learning_rate": 0.0014739929901877473, + "loss": 1.9011, + "step": 884 + }, + { + "epoch": 0.09335443037974683, + "grad_norm": 1.2970683574676514, + "learning_rate": 0.001473927409311783, + "loss": 1.9673, + "step": 885 + }, + { + "epoch": 0.09345991561181434, + "grad_norm": 1.1186587810516357, + "learning_rate": 0.0014738617473161425, + "loss": 1.889, + "step": 886 + }, + { + "epoch": 0.09356540084388186, + "grad_norm": 0.759933590888977, + "learning_rate": 0.0014737960042081836, + "loss": 1.8961, + "step": 887 + }, + { + "epoch": 0.09367088607594937, + "grad_norm": 0.7593050003051758, + "learning_rate": 0.0014737301799952734, + "loss": 1.9347, + "step": 888 + }, + { + "epoch": 0.09377637130801687, + "grad_norm": 0.7195255160331726, + "learning_rate": 0.001473664274684788, + "loss": 1.9044, + "step": 889 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.9628476500511169, + "learning_rate": 0.0014735982882841117, + "loss": 1.9329, + "step": 890 + }, + { + "epoch": 0.0939873417721519, + "grad_norm": 0.946158766746521, + "learning_rate": 0.0014735322208006391, + "loss": 1.9476, + "step": 891 + }, + { + "epoch": 0.0940928270042194, + "grad_norm": 0.8389595746994019, + "learning_rate": 0.0014734660722417734, + "loss": 1.9192, + "step": 892 + }, + { + "epoch": 0.09419831223628691, + "grad_norm": 0.7247409224510193, + "learning_rate": 0.0014733998426149266, + "loss": 1.8975, + "step": 893 + }, + { + "epoch": 0.09430379746835443, + "grad_norm": 0.8504897356033325, + "learning_rate": 0.0014733335319275203, + "loss": 1.9412, + "step": 894 + }, + { + "epoch": 0.09440928270042194, + "grad_norm": 0.7360488772392273, + "learning_rate": 0.001473267140186985, + "loss": 1.891, + "step": 895 + }, + { + "epoch": 0.09451476793248945, + "grad_norm": 0.967298686504364, + "learning_rate": 0.00147320066740076, + "loss": 1.897, + "step": 896 + }, + { + "epoch": 0.09462025316455697, + "grad_norm": 1.5010679960250854, + "learning_rate": 0.001473134113576294, + "loss": 1.9102, + "step": 897 + }, + { + "epoch": 0.09472573839662447, + "grad_norm": 0.8215804100036621, + "learning_rate": 0.0014730674787210448, + "loss": 1.9145, + "step": 898 + }, + { + "epoch": 0.09483122362869198, + "grad_norm": 1.1177204847335815, + "learning_rate": 0.0014730007628424792, + "loss": 1.8867, + "step": 899 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 1.1839195489883423, + "learning_rate": 0.0014729339659480727, + "loss": 1.9042, + "step": 900 + }, + { + "epoch": 0.095042194092827, + "grad_norm": 0.7512202858924866, + "learning_rate": 0.0014728670880453105, + "loss": 1.9198, + "step": 901 + }, + { + "epoch": 0.09514767932489451, + "grad_norm": 0.958102822303772, + "learning_rate": 0.0014728001291416863, + "loss": 1.9157, + "step": 902 + }, + { + "epoch": 0.09525316455696202, + "grad_norm": 0.8522101044654846, + "learning_rate": 0.001472733089244704, + "loss": 1.8967, + "step": 903 + }, + { + "epoch": 0.09535864978902954, + "grad_norm": 0.7154491543769836, + "learning_rate": 0.0014726659683618746, + "loss": 1.8956, + "step": 904 + }, + { + "epoch": 0.09546413502109705, + "grad_norm": 0.7152008414268494, + "learning_rate": 0.0014725987665007202, + "loss": 1.8865, + "step": 905 + }, + { + "epoch": 0.09556962025316455, + "grad_norm": 0.816632866859436, + "learning_rate": 0.0014725314836687708, + "loss": 1.9309, + "step": 906 + }, + { + "epoch": 0.09567510548523207, + "grad_norm": 0.8353370428085327, + "learning_rate": 0.0014724641198735659, + "loss": 1.932, + "step": 907 + }, + { + "epoch": 0.09578059071729958, + "grad_norm": 0.8477990031242371, + "learning_rate": 0.0014723966751226535, + "loss": 1.884, + "step": 908 + }, + { + "epoch": 0.09588607594936709, + "grad_norm": 0.7849442958831787, + "learning_rate": 0.0014723291494235916, + "loss": 1.9075, + "step": 909 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.7701500058174133, + "learning_rate": 0.0014722615427839468, + "loss": 1.9596, + "step": 910 + }, + { + "epoch": 0.09609704641350211, + "grad_norm": 0.7625805139541626, + "learning_rate": 0.0014721938552112943, + "loss": 1.9073, + "step": 911 + }, + { + "epoch": 0.09620253164556962, + "grad_norm": 0.8008151650428772, + "learning_rate": 0.0014721260867132193, + "loss": 1.8951, + "step": 912 + }, + { + "epoch": 0.09630801687763713, + "grad_norm": 0.7727149724960327, + "learning_rate": 0.0014720582372973155, + "loss": 1.8943, + "step": 913 + }, + { + "epoch": 0.09641350210970465, + "grad_norm": 0.9480679631233215, + "learning_rate": 0.0014719903069711857, + "loss": 1.9152, + "step": 914 + }, + { + "epoch": 0.09651898734177215, + "grad_norm": 1.4313794374465942, + "learning_rate": 0.0014719222957424417, + "loss": 1.9379, + "step": 915 + }, + { + "epoch": 0.09662447257383966, + "grad_norm": 0.8077335953712463, + "learning_rate": 0.0014718542036187049, + "loss": 1.9228, + "step": 916 + }, + { + "epoch": 0.09672995780590718, + "grad_norm": 0.7847627401351929, + "learning_rate": 0.0014717860306076049, + "loss": 1.8773, + "step": 917 + }, + { + "epoch": 0.09683544303797469, + "grad_norm": 0.9104686975479126, + "learning_rate": 0.0014717177767167812, + "loss": 1.8877, + "step": 918 + }, + { + "epoch": 0.09694092827004219, + "grad_norm": 0.8004719614982605, + "learning_rate": 0.0014716494419538815, + "loss": 1.9299, + "step": 919 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.7162675857543945, + "learning_rate": 0.0014715810263265633, + "loss": 1.8496, + "step": 920 + }, + { + "epoch": 0.09715189873417722, + "grad_norm": 0.8379033803939819, + "learning_rate": 0.0014715125298424934, + "loss": 1.9209, + "step": 921 + }, + { + "epoch": 0.09725738396624473, + "grad_norm": 0.7772669792175293, + "learning_rate": 0.0014714439525093466, + "loss": 1.8879, + "step": 922 + }, + { + "epoch": 0.09736286919831223, + "grad_norm": 0.7326444983482361, + "learning_rate": 0.0014713752943348074, + "loss": 1.8702, + "step": 923 + }, + { + "epoch": 0.09746835443037975, + "grad_norm": 0.7257019877433777, + "learning_rate": 0.0014713065553265694, + "loss": 1.8994, + "step": 924 + }, + { + "epoch": 0.09757383966244726, + "grad_norm": 0.8054008483886719, + "learning_rate": 0.001471237735492335, + "loss": 1.8693, + "step": 925 + }, + { + "epoch": 0.09767932489451477, + "grad_norm": 1.124942660331726, + "learning_rate": 0.0014711688348398161, + "loss": 1.895, + "step": 926 + }, + { + "epoch": 0.09778481012658227, + "grad_norm": 0.9967485070228577, + "learning_rate": 0.001471099853376733, + "loss": 1.8963, + "step": 927 + }, + { + "epoch": 0.09789029535864979, + "grad_norm": 0.7618308663368225, + "learning_rate": 0.0014710307911108159, + "loss": 1.8487, + "step": 928 + }, + { + "epoch": 0.0979957805907173, + "grad_norm": 0.8096457123756409, + "learning_rate": 0.0014709616480498029, + "loss": 1.8998, + "step": 929 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.7934850454330444, + "learning_rate": 0.0014708924242014423, + "loss": 1.8905, + "step": 930 + }, + { + "epoch": 0.09820675105485233, + "grad_norm": 0.7728493213653564, + "learning_rate": 0.001470823119573491, + "loss": 1.8678, + "step": 931 + }, + { + "epoch": 0.09831223628691983, + "grad_norm": 0.7882628440856934, + "learning_rate": 0.0014707537341737149, + "loss": 1.9218, + "step": 932 + }, + { + "epoch": 0.09841772151898734, + "grad_norm": 1.028428316116333, + "learning_rate": 0.0014706842680098887, + "loss": 1.9153, + "step": 933 + }, + { + "epoch": 0.09852320675105486, + "grad_norm": 0.8578744530677795, + "learning_rate": 0.0014706147210897967, + "loss": 1.9093, + "step": 934 + }, + { + "epoch": 0.09862869198312237, + "grad_norm": 0.7174988985061646, + "learning_rate": 0.0014705450934212317, + "loss": 1.8963, + "step": 935 + }, + { + "epoch": 0.09873417721518987, + "grad_norm": 1.073665976524353, + "learning_rate": 0.0014704753850119962, + "loss": 1.8817, + "step": 936 + }, + { + "epoch": 0.09883966244725738, + "grad_norm": 1.2672936916351318, + "learning_rate": 0.001470405595869901, + "loss": 1.9157, + "step": 937 + }, + { + "epoch": 0.0989451476793249, + "grad_norm": 0.7430790066719055, + "learning_rate": 0.0014703357260027667, + "loss": 1.8892, + "step": 938 + }, + { + "epoch": 0.0990506329113924, + "grad_norm": 1.155817985534668, + "learning_rate": 0.0014702657754184225, + "loss": 1.9041, + "step": 939 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 1.1936633586883545, + "learning_rate": 0.0014701957441247064, + "loss": 1.8891, + "step": 940 + }, + { + "epoch": 0.09926160337552743, + "grad_norm": 0.731796145439148, + "learning_rate": 0.001470125632129466, + "loss": 1.8665, + "step": 941 + }, + { + "epoch": 0.09936708860759494, + "grad_norm": 1.2533847093582153, + "learning_rate": 0.0014700554394405576, + "loss": 1.9092, + "step": 942 + }, + { + "epoch": 0.09947257383966245, + "grad_norm": 1.3875657320022583, + "learning_rate": 0.0014699851660658469, + "loss": 1.8735, + "step": 943 + }, + { + "epoch": 0.09957805907172995, + "grad_norm": 0.8215686678886414, + "learning_rate": 0.0014699148120132079, + "loss": 1.8479, + "step": 944 + }, + { + "epoch": 0.09968354430379747, + "grad_norm": 1.5812702178955078, + "learning_rate": 0.0014698443772905247, + "loss": 1.8943, + "step": 945 + }, + { + "epoch": 0.09978902953586498, + "grad_norm": 0.8269277811050415, + "learning_rate": 0.0014697738619056891, + "loss": 1.8905, + "step": 946 + }, + { + "epoch": 0.09989451476793249, + "grad_norm": 1.2609508037567139, + "learning_rate": 0.0014697032658666036, + "loss": 1.8417, + "step": 947 + }, + { + "epoch": 0.1, + "grad_norm": 1.076684832572937, + "learning_rate": 0.001469632589181178, + "loss": 1.915, + "step": 948 + }, + { + "epoch": 0.10010548523206751, + "grad_norm": 0.7765666246414185, + "learning_rate": 0.0014695618318573327, + "loss": 1.8621, + "step": 949 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.9273523688316345, + "learning_rate": 0.0014694909939029959, + "loss": 1.8864, + "step": 950 + }, + { + "epoch": 0.10031645569620253, + "grad_norm": 0.8359507918357849, + "learning_rate": 0.0014694200753261057, + "loss": 1.8934, + "step": 951 + }, + { + "epoch": 0.10042194092827005, + "grad_norm": 1.0057463645935059, + "learning_rate": 0.0014693490761346086, + "loss": 1.8927, + "step": 952 + }, + { + "epoch": 0.10052742616033755, + "grad_norm": 0.9644596576690674, + "learning_rate": 0.0014692779963364606, + "loss": 1.9075, + "step": 953 + }, + { + "epoch": 0.10063291139240506, + "grad_norm": 0.9011514782905579, + "learning_rate": 0.0014692068359396264, + "loss": 1.8953, + "step": 954 + }, + { + "epoch": 0.10073839662447258, + "grad_norm": 0.9451431632041931, + "learning_rate": 0.00146913559495208, + "loss": 1.8919, + "step": 955 + }, + { + "epoch": 0.10084388185654009, + "grad_norm": 1.0711475610733032, + "learning_rate": 0.001469064273381804, + "loss": 1.8836, + "step": 956 + }, + { + "epoch": 0.10094936708860759, + "grad_norm": 0.8782908916473389, + "learning_rate": 0.0014689928712367907, + "loss": 1.8594, + "step": 957 + }, + { + "epoch": 0.10105485232067511, + "grad_norm": 1.155221700668335, + "learning_rate": 0.0014689213885250411, + "loss": 1.9038, + "step": 958 + }, + { + "epoch": 0.10116033755274262, + "grad_norm": 1.0446572303771973, + "learning_rate": 0.001468849825254565, + "loss": 1.8478, + "step": 959 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.0314210653305054, + "learning_rate": 0.0014687781814333814, + "loss": 1.8666, + "step": 960 + }, + { + "epoch": 0.10137130801687763, + "grad_norm": 0.7773676514625549, + "learning_rate": 0.0014687064570695185, + "loss": 1.8808, + "step": 961 + }, + { + "epoch": 0.10147679324894515, + "grad_norm": 0.8623193502426147, + "learning_rate": 0.0014686346521710133, + "loss": 1.8543, + "step": 962 + }, + { + "epoch": 0.10158227848101266, + "grad_norm": 0.9285409450531006, + "learning_rate": 0.0014685627667459118, + "loss": 1.9127, + "step": 963 + }, + { + "epoch": 0.10168776371308016, + "grad_norm": 1.0404728651046753, + "learning_rate": 0.0014684908008022694, + "loss": 1.8728, + "step": 964 + }, + { + "epoch": 0.10179324894514769, + "grad_norm": 0.8223545551300049, + "learning_rate": 0.00146841875434815, + "loss": 1.9246, + "step": 965 + }, + { + "epoch": 0.10189873417721519, + "grad_norm": 0.9597097635269165, + "learning_rate": 0.0014683466273916266, + "loss": 1.8567, + "step": 966 + }, + { + "epoch": 0.1020042194092827, + "grad_norm": 1.2674291133880615, + "learning_rate": 0.0014682744199407817, + "loss": 1.8752, + "step": 967 + }, + { + "epoch": 0.1021097046413502, + "grad_norm": 0.7515655159950256, + "learning_rate": 0.0014682021320037064, + "loss": 1.9157, + "step": 968 + }, + { + "epoch": 0.10221518987341772, + "grad_norm": 1.434720754623413, + "learning_rate": 0.0014681297635885011, + "loss": 1.8702, + "step": 969 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.9321997165679932, + "learning_rate": 0.0014680573147032746, + "loss": 1.8935, + "step": 970 + }, + { + "epoch": 0.10242616033755274, + "grad_norm": 0.8779327869415283, + "learning_rate": 0.0014679847853561457, + "loss": 1.858, + "step": 971 + }, + { + "epoch": 0.10253164556962026, + "grad_norm": 1.1187217235565186, + "learning_rate": 0.0014679121755552412, + "loss": 1.8608, + "step": 972 + }, + { + "epoch": 0.10263713080168776, + "grad_norm": 0.8770945072174072, + "learning_rate": 0.0014678394853086976, + "loss": 1.8604, + "step": 973 + }, + { + "epoch": 0.10274261603375527, + "grad_norm": 0.7053911089897156, + "learning_rate": 0.0014677667146246604, + "loss": 1.8637, + "step": 974 + }, + { + "epoch": 0.10284810126582279, + "grad_norm": 0.7642235159873962, + "learning_rate": 0.0014676938635112835, + "loss": 1.8554, + "step": 975 + }, + { + "epoch": 0.1029535864978903, + "grad_norm": 0.7571728229522705, + "learning_rate": 0.0014676209319767306, + "loss": 1.8624, + "step": 976 + }, + { + "epoch": 0.1030590717299578, + "grad_norm": 0.7447241544723511, + "learning_rate": 0.0014675479200291738, + "loss": 1.871, + "step": 977 + }, + { + "epoch": 0.10316455696202531, + "grad_norm": 0.7056824564933777, + "learning_rate": 0.0014674748276767944, + "loss": 1.8548, + "step": 978 + }, + { + "epoch": 0.10327004219409283, + "grad_norm": 0.7578874230384827, + "learning_rate": 0.0014674016549277831, + "loss": 1.8531, + "step": 979 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.6854112148284912, + "learning_rate": 0.0014673284017903392, + "loss": 1.8925, + "step": 980 + }, + { + "epoch": 0.10348101265822784, + "grad_norm": 0.7510766983032227, + "learning_rate": 0.001467255068272671, + "loss": 1.8702, + "step": 981 + }, + { + "epoch": 0.10358649789029536, + "grad_norm": 0.7241171598434448, + "learning_rate": 0.0014671816543829954, + "loss": 1.8762, + "step": 982 + }, + { + "epoch": 0.10369198312236287, + "grad_norm": 0.7509797215461731, + "learning_rate": 0.0014671081601295394, + "loss": 1.8445, + "step": 983 + }, + { + "epoch": 0.10379746835443038, + "grad_norm": 0.7131299376487732, + "learning_rate": 0.0014670345855205384, + "loss": 1.8468, + "step": 984 + }, + { + "epoch": 0.10390295358649788, + "grad_norm": 0.9719008207321167, + "learning_rate": 0.0014669609305642366, + "loss": 1.8617, + "step": 985 + }, + { + "epoch": 0.1040084388185654, + "grad_norm": 0.9067281484603882, + "learning_rate": 0.0014668871952688873, + "loss": 1.8716, + "step": 986 + }, + { + "epoch": 0.10411392405063291, + "grad_norm": 0.6816486120223999, + "learning_rate": 0.0014668133796427532, + "loss": 1.892, + "step": 987 + }, + { + "epoch": 0.10421940928270042, + "grad_norm": 0.7557010054588318, + "learning_rate": 0.0014667394836941055, + "loss": 1.8579, + "step": 988 + }, + { + "epoch": 0.10432489451476794, + "grad_norm": 0.7888644337654114, + "learning_rate": 0.0014666655074312247, + "loss": 1.8501, + "step": 989 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.7500376105308533, + "learning_rate": 0.0014665914508624, + "loss": 1.861, + "step": 990 + }, + { + "epoch": 0.10453586497890295, + "grad_norm": 0.7035607099533081, + "learning_rate": 0.0014665173139959305, + "loss": 1.9211, + "step": 991 + }, + { + "epoch": 0.10464135021097046, + "grad_norm": 0.6809131503105164, + "learning_rate": 0.0014664430968401225, + "loss": 1.8599, + "step": 992 + }, + { + "epoch": 0.10474683544303798, + "grad_norm": 0.6881659030914307, + "learning_rate": 0.0014663687994032931, + "loss": 1.8363, + "step": 993 + }, + { + "epoch": 0.10485232067510548, + "grad_norm": 0.7244149446487427, + "learning_rate": 0.0014662944216937677, + "loss": 1.8947, + "step": 994 + }, + { + "epoch": 0.10495780590717299, + "grad_norm": 0.7917876839637756, + "learning_rate": 0.0014662199637198807, + "loss": 1.836, + "step": 995 + }, + { + "epoch": 0.10506329113924051, + "grad_norm": 0.868735134601593, + "learning_rate": 0.0014661454254899754, + "loss": 1.8692, + "step": 996 + }, + { + "epoch": 0.10516877637130802, + "grad_norm": 0.7353900074958801, + "learning_rate": 0.0014660708070124038, + "loss": 1.8356, + "step": 997 + }, + { + "epoch": 0.10527426160337552, + "grad_norm": 0.7687863707542419, + "learning_rate": 0.0014659961082955277, + "loss": 1.8825, + "step": 998 + }, + { + "epoch": 0.10537974683544304, + "grad_norm": 0.8061034083366394, + "learning_rate": 0.0014659213293477177, + "loss": 1.8933, + "step": 999 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.8222795128822327, + "learning_rate": 0.0014658464701773526, + "loss": 1.9031, + "step": 1000 + }, + { + "epoch": 0.10559071729957806, + "grad_norm": 0.7023205161094666, + "learning_rate": 0.0014657715307928212, + "loss": 1.8366, + "step": 1001 + }, + { + "epoch": 0.10569620253164556, + "grad_norm": 0.816092848777771, + "learning_rate": 0.0014656965112025203, + "loss": 1.8664, + "step": 1002 + }, + { + "epoch": 0.10580168776371308, + "grad_norm": 0.9118419885635376, + "learning_rate": 0.0014656214114148567, + "loss": 1.9007, + "step": 1003 + }, + { + "epoch": 0.10590717299578059, + "grad_norm": 0.7233783602714539, + "learning_rate": 0.0014655462314382456, + "loss": 1.8294, + "step": 1004 + }, + { + "epoch": 0.1060126582278481, + "grad_norm": 1.1325019598007202, + "learning_rate": 0.0014654709712811113, + "loss": 1.8691, + "step": 1005 + }, + { + "epoch": 0.10611814345991562, + "grad_norm": 1.2233012914657593, + "learning_rate": 0.0014653956309518866, + "loss": 1.8701, + "step": 1006 + }, + { + "epoch": 0.10622362869198312, + "grad_norm": 0.7650702595710754, + "learning_rate": 0.0014653202104590146, + "loss": 1.9077, + "step": 1007 + }, + { + "epoch": 0.10632911392405063, + "grad_norm": 0.8159961104393005, + "learning_rate": 0.0014652447098109458, + "loss": 1.8561, + "step": 1008 + }, + { + "epoch": 0.10643459915611814, + "grad_norm": 1.1680084466934204, + "learning_rate": 0.001465169129016141, + "loss": 1.8788, + "step": 1009 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.8221814036369324, + "learning_rate": 0.0014650934680830688, + "loss": 1.8653, + "step": 1010 + }, + { + "epoch": 0.10664556962025316, + "grad_norm": 0.7106383442878723, + "learning_rate": 0.001465017727020208, + "loss": 1.839, + "step": 1011 + }, + { + "epoch": 0.10675105485232067, + "grad_norm": 0.801090657711029, + "learning_rate": 0.0014649419058360455, + "loss": 1.8418, + "step": 1012 + }, + { + "epoch": 0.10685654008438819, + "grad_norm": 0.8218127489089966, + "learning_rate": 0.0014648660045390772, + "loss": 1.8152, + "step": 1013 + }, + { + "epoch": 0.1069620253164557, + "grad_norm": 0.9082812070846558, + "learning_rate": 0.0014647900231378086, + "loss": 1.8681, + "step": 1014 + }, + { + "epoch": 0.1070675105485232, + "grad_norm": 0.838887631893158, + "learning_rate": 0.0014647139616407539, + "loss": 1.8447, + "step": 1015 + }, + { + "epoch": 0.10717299578059072, + "grad_norm": 0.7410591244697571, + "learning_rate": 0.0014646378200564355, + "loss": 1.8104, + "step": 1016 + }, + { + "epoch": 0.10727848101265823, + "grad_norm": 0.7445988655090332, + "learning_rate": 0.001464561598393386, + "loss": 1.879, + "step": 1017 + }, + { + "epoch": 0.10738396624472574, + "grad_norm": 1.0272256135940552, + "learning_rate": 0.0014644852966601463, + "loss": 1.8813, + "step": 1018 + }, + { + "epoch": 0.10748945147679324, + "grad_norm": 1.6707016229629517, + "learning_rate": 0.0014644089148652664, + "loss": 1.9353, + "step": 1019 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.8264486789703369, + "learning_rate": 0.0014643324530173051, + "loss": 1.8441, + "step": 1020 + }, + { + "epoch": 0.10770042194092827, + "grad_norm": 1.3634072542190552, + "learning_rate": 0.0014642559111248306, + "loss": 1.8559, + "step": 1021 + }, + { + "epoch": 0.10780590717299578, + "grad_norm": 0.9944160580635071, + "learning_rate": 0.0014641792891964195, + "loss": 1.8598, + "step": 1022 + }, + { + "epoch": 0.1079113924050633, + "grad_norm": 0.7940081357955933, + "learning_rate": 0.0014641025872406581, + "loss": 1.8365, + "step": 1023 + }, + { + "epoch": 0.1080168776371308, + "grad_norm": 1.3783882856369019, + "learning_rate": 0.0014640258052661405, + "loss": 1.8692, + "step": 1024 + }, + { + "epoch": 0.10812236286919831, + "grad_norm": 0.7794583439826965, + "learning_rate": 0.0014639489432814712, + "loss": 1.8974, + "step": 1025 + }, + { + "epoch": 0.10822784810126582, + "grad_norm": 1.2024257183074951, + "learning_rate": 0.001463872001295263, + "loss": 1.8242, + "step": 1026 + }, + { + "epoch": 0.10833333333333334, + "grad_norm": 1.3404207229614258, + "learning_rate": 0.0014637949793161371, + "loss": 1.8828, + "step": 1027 + }, + { + "epoch": 0.10843881856540084, + "grad_norm": 0.7687327861785889, + "learning_rate": 0.0014637178773527246, + "loss": 1.8276, + "step": 1028 + }, + { + "epoch": 0.10854430379746835, + "grad_norm": 1.1027799844741821, + "learning_rate": 0.001463640695413665, + "loss": 1.8842, + "step": 1029 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 1.1278355121612549, + "learning_rate": 0.0014635634335076067, + "loss": 1.858, + "step": 1030 + }, + { + "epoch": 0.10875527426160338, + "grad_norm": 0.7419495582580566, + "learning_rate": 0.0014634860916432077, + "loss": 1.823, + "step": 1031 + }, + { + "epoch": 0.10886075949367088, + "grad_norm": 0.9377525448799133, + "learning_rate": 0.0014634086698291345, + "loss": 1.8129, + "step": 1032 + }, + { + "epoch": 0.10896624472573839, + "grad_norm": 0.951326310634613, + "learning_rate": 0.0014633311680740625, + "loss": 1.8608, + "step": 1033 + }, + { + "epoch": 0.10907172995780591, + "grad_norm": 0.7632668614387512, + "learning_rate": 0.0014632535863866756, + "loss": 1.8885, + "step": 1034 + }, + { + "epoch": 0.10917721518987342, + "grad_norm": 0.7901451587677002, + "learning_rate": 0.0014631759247756683, + "loss": 1.8678, + "step": 1035 + }, + { + "epoch": 0.10928270042194092, + "grad_norm": 0.9139153361320496, + "learning_rate": 0.0014630981832497421, + "loss": 1.8194, + "step": 1036 + }, + { + "epoch": 0.10938818565400844, + "grad_norm": 0.7502335906028748, + "learning_rate": 0.0014630203618176088, + "loss": 1.8374, + "step": 1037 + }, + { + "epoch": 0.10949367088607595, + "grad_norm": 0.7589531540870667, + "learning_rate": 0.0014629424604879885, + "loss": 1.8604, + "step": 1038 + }, + { + "epoch": 0.10959915611814346, + "grad_norm": 0.8252959847450256, + "learning_rate": 0.0014628644792696105, + "loss": 1.8293, + "step": 1039 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.8295950293540955, + "learning_rate": 0.001462786418171213, + "loss": 1.8438, + "step": 1040 + }, + { + "epoch": 0.10981012658227848, + "grad_norm": 0.7339478135108948, + "learning_rate": 0.0014627082772015428, + "loss": 1.8521, + "step": 1041 + }, + { + "epoch": 0.10991561181434599, + "grad_norm": 1.0018126964569092, + "learning_rate": 0.0014626300563693566, + "loss": 1.8423, + "step": 1042 + }, + { + "epoch": 0.1100210970464135, + "grad_norm": 1.1624867916107178, + "learning_rate": 0.0014625517556834187, + "loss": 1.8339, + "step": 1043 + }, + { + "epoch": 0.11012658227848102, + "grad_norm": 0.7972609996795654, + "learning_rate": 0.0014624733751525036, + "loss": 1.8314, + "step": 1044 + }, + { + "epoch": 0.11023206751054852, + "grad_norm": 0.6554785966873169, + "learning_rate": 0.001462394914785394, + "loss": 1.852, + "step": 1045 + }, + { + "epoch": 0.11033755274261603, + "grad_norm": 0.6744415163993835, + "learning_rate": 0.0014623163745908821, + "loss": 1.8502, + "step": 1046 + }, + { + "epoch": 0.11044303797468355, + "grad_norm": 0.7696368098258972, + "learning_rate": 0.0014622377545777687, + "loss": 1.8437, + "step": 1047 + }, + { + "epoch": 0.11054852320675106, + "grad_norm": 0.797175943851471, + "learning_rate": 0.001462159054754863, + "loss": 1.8478, + "step": 1048 + }, + { + "epoch": 0.11065400843881856, + "grad_norm": 0.7565019130706787, + "learning_rate": 0.0014620802751309841, + "loss": 1.8314, + "step": 1049 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.2034286260604858, + "learning_rate": 0.0014620014157149597, + "loss": 1.8655, + "step": 1050 + }, + { + "epoch": 0.11086497890295359, + "grad_norm": 0.8967472910881042, + "learning_rate": 0.0014619224765156263, + "loss": 1.8375, + "step": 1051 + }, + { + "epoch": 0.1109704641350211, + "grad_norm": 0.882737934589386, + "learning_rate": 0.0014618434575418293, + "loss": 1.8503, + "step": 1052 + }, + { + "epoch": 0.1110759493670886, + "grad_norm": 1.4707130193710327, + "learning_rate": 0.0014617643588024237, + "loss": 1.8787, + "step": 1053 + }, + { + "epoch": 0.11118143459915612, + "grad_norm": 0.8959537148475647, + "learning_rate": 0.001461685180306272, + "loss": 1.8624, + "step": 1054 + }, + { + "epoch": 0.11128691983122363, + "grad_norm": 1.054348111152649, + "learning_rate": 0.0014616059220622475, + "loss": 1.8406, + "step": 1055 + }, + { + "epoch": 0.11139240506329114, + "grad_norm": 1.020711064338684, + "learning_rate": 0.0014615265840792308, + "loss": 1.8165, + "step": 1056 + }, + { + "epoch": 0.11149789029535866, + "grad_norm": 0.9954169988632202, + "learning_rate": 0.0014614471663661123, + "loss": 1.8479, + "step": 1057 + }, + { + "epoch": 0.11160337552742616, + "grad_norm": 0.840684175491333, + "learning_rate": 0.0014613676689317916, + "loss": 1.8256, + "step": 1058 + }, + { + "epoch": 0.11170886075949367, + "grad_norm": 0.8573005795478821, + "learning_rate": 0.001461288091785176, + "loss": 1.8103, + "step": 1059 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 1.0515198707580566, + "learning_rate": 0.001461208434935183, + "loss": 1.8331, + "step": 1060 + }, + { + "epoch": 0.1119198312236287, + "grad_norm": 0.9842373132705688, + "learning_rate": 0.0014611286983907384, + "loss": 1.8604, + "step": 1061 + }, + { + "epoch": 0.1120253164556962, + "grad_norm": 0.8456565141677856, + "learning_rate": 0.0014610488821607775, + "loss": 1.8039, + "step": 1062 + }, + { + "epoch": 0.11213080168776371, + "grad_norm": 0.8554563522338867, + "learning_rate": 0.0014609689862542434, + "loss": 1.8546, + "step": 1063 + }, + { + "epoch": 0.11223628691983123, + "grad_norm": 1.1371192932128906, + "learning_rate": 0.0014608890106800893, + "loss": 1.8112, + "step": 1064 + }, + { + "epoch": 0.11234177215189874, + "grad_norm": 1.0181691646575928, + "learning_rate": 0.0014608089554472767, + "loss": 1.8251, + "step": 1065 + }, + { + "epoch": 0.11244725738396624, + "grad_norm": 0.8198114037513733, + "learning_rate": 0.0014607288205647762, + "loss": 1.8105, + "step": 1066 + }, + { + "epoch": 0.11255274261603375, + "grad_norm": 0.7199311852455139, + "learning_rate": 0.0014606486060415673, + "loss": 1.8304, + "step": 1067 + }, + { + "epoch": 0.11265822784810127, + "grad_norm": 0.8209946751594543, + "learning_rate": 0.0014605683118866387, + "loss": 1.8096, + "step": 1068 + }, + { + "epoch": 0.11276371308016878, + "grad_norm": 0.6833330392837524, + "learning_rate": 0.0014604879381089873, + "loss": 1.8424, + "step": 1069 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.8263991475105286, + "learning_rate": 0.0014604074847176197, + "loss": 1.8426, + "step": 1070 + }, + { + "epoch": 0.1129746835443038, + "grad_norm": 1.019362449645996, + "learning_rate": 0.0014603269517215512, + "loss": 1.8033, + "step": 1071 + }, + { + "epoch": 0.11308016877637131, + "grad_norm": 0.9154192209243774, + "learning_rate": 0.0014602463391298055, + "loss": 1.8739, + "step": 1072 + }, + { + "epoch": 0.11318565400843882, + "grad_norm": 0.7804326415061951, + "learning_rate": 0.0014601656469514159, + "loss": 1.8101, + "step": 1073 + }, + { + "epoch": 0.11329113924050632, + "grad_norm": 0.6833314895629883, + "learning_rate": 0.0014600848751954248, + "loss": 1.7963, + "step": 1074 + }, + { + "epoch": 0.11339662447257384, + "grad_norm": 0.7130392789840698, + "learning_rate": 0.001460004023870882, + "loss": 1.8371, + "step": 1075 + }, + { + "epoch": 0.11350210970464135, + "grad_norm": 0.6996466517448425, + "learning_rate": 0.0014599230929868482, + "loss": 1.8604, + "step": 1076 + }, + { + "epoch": 0.11360759493670886, + "grad_norm": 0.720677375793457, + "learning_rate": 0.0014598420825523918, + "loss": 1.8415, + "step": 1077 + }, + { + "epoch": 0.11371308016877638, + "grad_norm": 0.8400801420211792, + "learning_rate": 0.0014597609925765906, + "loss": 1.7995, + "step": 1078 + }, + { + "epoch": 0.11381856540084388, + "grad_norm": 0.8215530514717102, + "learning_rate": 0.0014596798230685308, + "loss": 1.8445, + "step": 1079 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.8703927397727966, + "learning_rate": 0.0014595985740373082, + "loss": 1.8152, + "step": 1080 + }, + { + "epoch": 0.11402953586497891, + "grad_norm": 0.8383963108062744, + "learning_rate": 0.001459517245492027, + "loss": 1.828, + "step": 1081 + }, + { + "epoch": 0.11413502109704642, + "grad_norm": 0.7095515131950378, + "learning_rate": 0.0014594358374418004, + "loss": 1.8592, + "step": 1082 + }, + { + "epoch": 0.11424050632911392, + "grad_norm": 0.8138594627380371, + "learning_rate": 0.0014593543498957506, + "loss": 1.8156, + "step": 1083 + }, + { + "epoch": 0.11434599156118143, + "grad_norm": 0.7442721128463745, + "learning_rate": 0.0014592727828630088, + "loss": 1.8417, + "step": 1084 + }, + { + "epoch": 0.11445147679324895, + "grad_norm": 0.7152398824691772, + "learning_rate": 0.001459191136352715, + "loss": 1.8015, + "step": 1085 + }, + { + "epoch": 0.11455696202531646, + "grad_norm": 0.7391453385353088, + "learning_rate": 0.0014591094103740179, + "loss": 1.7932, + "step": 1086 + }, + { + "epoch": 0.11466244725738396, + "grad_norm": 0.7144518494606018, + "learning_rate": 0.0014590276049360755, + "loss": 1.8843, + "step": 1087 + }, + { + "epoch": 0.11476793248945148, + "grad_norm": 0.713981568813324, + "learning_rate": 0.0014589457200480543, + "loss": 1.8611, + "step": 1088 + }, + { + "epoch": 0.11487341772151899, + "grad_norm": 0.7636213898658752, + "learning_rate": 0.0014588637557191302, + "loss": 1.8634, + "step": 1089 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.7617779970169067, + "learning_rate": 0.0014587817119584873, + "loss": 1.8392, + "step": 1090 + }, + { + "epoch": 0.115084388185654, + "grad_norm": 0.8286158442497253, + "learning_rate": 0.0014586995887753197, + "loss": 1.7942, + "step": 1091 + }, + { + "epoch": 0.11518987341772152, + "grad_norm": 0.7796362638473511, + "learning_rate": 0.001458617386178829, + "loss": 1.8005, + "step": 1092 + }, + { + "epoch": 0.11529535864978903, + "grad_norm": 0.719281792640686, + "learning_rate": 0.001458535104178227, + "loss": 1.8067, + "step": 1093 + }, + { + "epoch": 0.11540084388185654, + "grad_norm": 0.8510270714759827, + "learning_rate": 0.001458452742782733, + "loss": 1.8401, + "step": 1094 + }, + { + "epoch": 0.11550632911392406, + "grad_norm": 0.7722228169441223, + "learning_rate": 0.0014583703020015768, + "loss": 1.8425, + "step": 1095 + }, + { + "epoch": 0.11561181434599156, + "grad_norm": 0.6780914068222046, + "learning_rate": 0.001458287781843996, + "loss": 1.8443, + "step": 1096 + }, + { + "epoch": 0.11571729957805907, + "grad_norm": 0.8096938729286194, + "learning_rate": 0.0014582051823192374, + "loss": 1.8252, + "step": 1097 + }, + { + "epoch": 0.11582278481012659, + "grad_norm": 0.9519742727279663, + "learning_rate": 0.0014581225034365564, + "loss": 1.8536, + "step": 1098 + }, + { + "epoch": 0.1159282700421941, + "grad_norm": 0.7918270230293274, + "learning_rate": 0.0014580397452052182, + "loss": 1.8247, + "step": 1099 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.7185626029968262, + "learning_rate": 0.001457956907634496, + "loss": 1.8256, + "step": 1100 + }, + { + "epoch": 0.11613924050632911, + "grad_norm": 0.7837152481079102, + "learning_rate": 0.001457873990733672, + "loss": 1.829, + "step": 1101 + }, + { + "epoch": 0.11624472573839663, + "grad_norm": 1.05426025390625, + "learning_rate": 0.0014577909945120376, + "loss": 1.8158, + "step": 1102 + }, + { + "epoch": 0.11635021097046414, + "grad_norm": 0.9113102555274963, + "learning_rate": 0.001457707918978893, + "loss": 1.8412, + "step": 1103 + }, + { + "epoch": 0.11645569620253164, + "grad_norm": 0.730090320110321, + "learning_rate": 0.0014576247641435469, + "loss": 1.8428, + "step": 1104 + }, + { + "epoch": 0.11656118143459916, + "grad_norm": 0.8351618051528931, + "learning_rate": 0.0014575415300153174, + "loss": 1.8524, + "step": 1105 + }, + { + "epoch": 0.11666666666666667, + "grad_norm": 0.9420020580291748, + "learning_rate": 0.0014574582166035314, + "loss": 1.8349, + "step": 1106 + }, + { + "epoch": 0.11677215189873418, + "grad_norm": 0.832585334777832, + "learning_rate": 0.0014573748239175247, + "loss": 1.8236, + "step": 1107 + }, + { + "epoch": 0.11687763713080168, + "grad_norm": 0.7193986773490906, + "learning_rate": 0.0014572913519666417, + "loss": 1.8211, + "step": 1108 + }, + { + "epoch": 0.1169831223628692, + "grad_norm": 1.0969455242156982, + "learning_rate": 0.0014572078007602355, + "loss": 1.8475, + "step": 1109 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 1.08275306224823, + "learning_rate": 0.0014571241703076692, + "loss": 1.8021, + "step": 1110 + }, + { + "epoch": 0.11719409282700421, + "grad_norm": 0.8244225978851318, + "learning_rate": 0.0014570404606183132, + "loss": 1.8501, + "step": 1111 + }, + { + "epoch": 0.11729957805907174, + "grad_norm": 0.6836012005805969, + "learning_rate": 0.0014569566717015483, + "loss": 1.7994, + "step": 1112 + }, + { + "epoch": 0.11740506329113924, + "grad_norm": 0.8340814113616943, + "learning_rate": 0.0014568728035667627, + "loss": 1.844, + "step": 1113 + }, + { + "epoch": 0.11751054852320675, + "grad_norm": 0.7485461235046387, + "learning_rate": 0.001456788856223355, + "loss": 1.7986, + "step": 1114 + }, + { + "epoch": 0.11761603375527427, + "grad_norm": 0.7133552432060242, + "learning_rate": 0.0014567048296807315, + "loss": 1.8416, + "step": 1115 + }, + { + "epoch": 0.11772151898734177, + "grad_norm": 0.7260302305221558, + "learning_rate": 0.0014566207239483078, + "loss": 1.8176, + "step": 1116 + }, + { + "epoch": 0.11782700421940928, + "grad_norm": 0.8957187533378601, + "learning_rate": 0.0014565365390355087, + "loss": 1.8182, + "step": 1117 + }, + { + "epoch": 0.11793248945147679, + "grad_norm": 0.9207319617271423, + "learning_rate": 0.001456452274951767, + "loss": 1.8403, + "step": 1118 + }, + { + "epoch": 0.11803797468354431, + "grad_norm": 0.8739698529243469, + "learning_rate": 0.0014563679317065254, + "loss": 1.8109, + "step": 1119 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.8352541923522949, + "learning_rate": 0.0014562835093092348, + "loss": 1.7914, + "step": 1120 + }, + { + "epoch": 0.11824894514767932, + "grad_norm": 0.6537138819694519, + "learning_rate": 0.0014561990077693553, + "loss": 1.7704, + "step": 1121 + }, + { + "epoch": 0.11835443037974684, + "grad_norm": 1.0870816707611084, + "learning_rate": 0.0014561144270963551, + "loss": 1.8312, + "step": 1122 + }, + { + "epoch": 0.11845991561181435, + "grad_norm": 1.3879342079162598, + "learning_rate": 0.0014560297672997127, + "loss": 1.8143, + "step": 1123 + }, + { + "epoch": 0.11856540084388185, + "grad_norm": 0.6967979073524475, + "learning_rate": 0.001455945028388914, + "loss": 1.7981, + "step": 1124 + }, + { + "epoch": 0.11867088607594936, + "grad_norm": 1.2061856985092163, + "learning_rate": 0.001455860210373455, + "loss": 1.8264, + "step": 1125 + }, + { + "epoch": 0.11877637130801688, + "grad_norm": 0.7234745025634766, + "learning_rate": 0.0014557753132628396, + "loss": 1.7972, + "step": 1126 + }, + { + "epoch": 0.11888185654008439, + "grad_norm": 1.3147799968719482, + "learning_rate": 0.0014556903370665807, + "loss": 1.8176, + "step": 1127 + }, + { + "epoch": 0.1189873417721519, + "grad_norm": 0.7667887806892395, + "learning_rate": 0.0014556052817942013, + "loss": 1.8163, + "step": 1128 + }, + { + "epoch": 0.11909282700421941, + "grad_norm": 1.360310435295105, + "learning_rate": 0.001455520147455231, + "loss": 1.8371, + "step": 1129 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.0297434329986572, + "learning_rate": 0.0014554349340592104, + "loss": 1.8063, + "step": 1130 + }, + { + "epoch": 0.11930379746835443, + "grad_norm": 0.837645947933197, + "learning_rate": 0.001455349641615688, + "loss": 1.8565, + "step": 1131 + }, + { + "epoch": 0.11940928270042193, + "grad_norm": 0.7313145399093628, + "learning_rate": 0.001455264270134221, + "loss": 1.7939, + "step": 1132 + }, + { + "epoch": 0.11951476793248945, + "grad_norm": 0.8954237699508667, + "learning_rate": 0.0014551788196243754, + "loss": 1.8253, + "step": 1133 + }, + { + "epoch": 0.11962025316455696, + "grad_norm": 1.001744270324707, + "learning_rate": 0.0014550932900957271, + "loss": 1.8389, + "step": 1134 + }, + { + "epoch": 0.11972573839662447, + "grad_norm": 1.2749005556106567, + "learning_rate": 0.0014550076815578595, + "loss": 1.8159, + "step": 1135 + }, + { + "epoch": 0.11983122362869199, + "grad_norm": 0.8522084355354309, + "learning_rate": 0.0014549219940203659, + "loss": 1.7751, + "step": 1136 + }, + { + "epoch": 0.1199367088607595, + "grad_norm": 0.8547834157943726, + "learning_rate": 0.0014548362274928476, + "loss": 1.8405, + "step": 1137 + }, + { + "epoch": 0.120042194092827, + "grad_norm": 1.0666133165359497, + "learning_rate": 0.0014547503819849154, + "loss": 1.8258, + "step": 1138 + }, + { + "epoch": 0.12014767932489452, + "grad_norm": 0.8339992165565491, + "learning_rate": 0.001454664457506189, + "loss": 1.8085, + "step": 1139 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.7824354767799377, + "learning_rate": 0.001454578454066296, + "loss": 1.8293, + "step": 1140 + }, + { + "epoch": 0.12035864978902953, + "grad_norm": 0.7009497284889221, + "learning_rate": 0.001454492371674874, + "loss": 1.7432, + "step": 1141 + }, + { + "epoch": 0.12046413502109704, + "grad_norm": 0.9596753120422363, + "learning_rate": 0.0014544062103415687, + "loss": 1.8052, + "step": 1142 + }, + { + "epoch": 0.12056962025316456, + "grad_norm": 1.0404242277145386, + "learning_rate": 0.0014543199700760353, + "loss": 1.8552, + "step": 1143 + }, + { + "epoch": 0.12067510548523207, + "grad_norm": 0.8581358194351196, + "learning_rate": 0.0014542336508879372, + "loss": 1.779, + "step": 1144 + }, + { + "epoch": 0.12078059071729957, + "grad_norm": 0.7535548806190491, + "learning_rate": 0.0014541472527869468, + "loss": 1.8069, + "step": 1145 + }, + { + "epoch": 0.1208860759493671, + "grad_norm": 0.6468865275382996, + "learning_rate": 0.0014540607757827456, + "loss": 1.7973, + "step": 1146 + }, + { + "epoch": 0.1209915611814346, + "grad_norm": 0.8945378065109253, + "learning_rate": 0.0014539742198850234, + "loss": 1.801, + "step": 1147 + }, + { + "epoch": 0.12109704641350211, + "grad_norm": 0.6877255439758301, + "learning_rate": 0.0014538875851034798, + "loss": 1.838, + "step": 1148 + }, + { + "epoch": 0.12120253164556961, + "grad_norm": 0.9881496429443359, + "learning_rate": 0.0014538008714478224, + "loss": 1.8223, + "step": 1149 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.2785940170288086, + "learning_rate": 0.0014537140789277678, + "loss": 1.8077, + "step": 1150 + }, + { + "epoch": 0.12141350210970464, + "grad_norm": 0.7862429022789001, + "learning_rate": 0.0014536272075530417, + "loss": 1.8142, + "step": 1151 + }, + { + "epoch": 0.12151898734177215, + "grad_norm": 1.4669513702392578, + "learning_rate": 0.0014535402573333783, + "loss": 1.87, + "step": 1152 + }, + { + "epoch": 0.12162447257383967, + "grad_norm": 0.887669026851654, + "learning_rate": 0.001453453228278521, + "loss": 1.8085, + "step": 1153 + }, + { + "epoch": 0.12172995780590717, + "grad_norm": 1.0380301475524902, + "learning_rate": 0.0014533661203982215, + "loss": 1.817, + "step": 1154 + }, + { + "epoch": 0.12183544303797468, + "grad_norm": 1.0781421661376953, + "learning_rate": 0.0014532789337022413, + "loss": 1.8056, + "step": 1155 + }, + { + "epoch": 0.1219409282700422, + "grad_norm": 0.9631609916687012, + "learning_rate": 0.0014531916682003494, + "loss": 1.8212, + "step": 1156 + }, + { + "epoch": 0.12204641350210971, + "grad_norm": 0.8212611079216003, + "learning_rate": 0.0014531043239023247, + "loss": 1.7936, + "step": 1157 + }, + { + "epoch": 0.12215189873417721, + "grad_norm": 0.9175748229026794, + "learning_rate": 0.0014530169008179546, + "loss": 1.8087, + "step": 1158 + }, + { + "epoch": 0.12225738396624472, + "grad_norm": 1.0858362913131714, + "learning_rate": 0.001452929398957035, + "loss": 1.8225, + "step": 1159 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.7701021432876587, + "learning_rate": 0.0014528418183293716, + "loss": 1.7853, + "step": 1160 + }, + { + "epoch": 0.12246835443037975, + "grad_norm": 0.9062806367874146, + "learning_rate": 0.0014527541589447774, + "loss": 1.8302, + "step": 1161 + }, + { + "epoch": 0.12257383966244725, + "grad_norm": 1.4300470352172852, + "learning_rate": 0.0014526664208130756, + "loss": 1.8368, + "step": 1162 + }, + { + "epoch": 0.12267932489451477, + "grad_norm": 0.8709359765052795, + "learning_rate": 0.0014525786039440971, + "loss": 1.8007, + "step": 1163 + }, + { + "epoch": 0.12278481012658228, + "grad_norm": 1.126595377922058, + "learning_rate": 0.001452490708347683, + "loss": 1.8038, + "step": 1164 + }, + { + "epoch": 0.12289029535864979, + "grad_norm": 1.0236226320266724, + "learning_rate": 0.0014524027340336821, + "loss": 1.7968, + "step": 1165 + }, + { + "epoch": 0.1229957805907173, + "grad_norm": 0.8345550894737244, + "learning_rate": 0.0014523146810119525, + "loss": 1.7864, + "step": 1166 + }, + { + "epoch": 0.12310126582278481, + "grad_norm": 1.258304238319397, + "learning_rate": 0.0014522265492923608, + "loss": 1.8335, + "step": 1167 + }, + { + "epoch": 0.12320675105485232, + "grad_norm": 0.8127961754798889, + "learning_rate": 0.0014521383388847824, + "loss": 1.7759, + "step": 1168 + }, + { + "epoch": 0.12331223628691983, + "grad_norm": 1.5011109113693237, + "learning_rate": 0.0014520500497991022, + "loss": 1.8158, + "step": 1169 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.7582064270973206, + "learning_rate": 0.001451961682045213, + "loss": 1.7749, + "step": 1170 + }, + { + "epoch": 0.12352320675105485, + "grad_norm": 1.5630700588226318, + "learning_rate": 0.001451873235633017, + "loss": 1.8301, + "step": 1171 + }, + { + "epoch": 0.12362869198312236, + "grad_norm": 0.8559879660606384, + "learning_rate": 0.0014517847105724251, + "loss": 1.7966, + "step": 1172 + }, + { + "epoch": 0.12373417721518987, + "grad_norm": 1.0201098918914795, + "learning_rate": 0.0014516961068733569, + "loss": 1.7467, + "step": 1173 + }, + { + "epoch": 0.12383966244725739, + "grad_norm": 0.7982648611068726, + "learning_rate": 0.0014516074245457412, + "loss": 1.8119, + "step": 1174 + }, + { + "epoch": 0.1239451476793249, + "grad_norm": 1.083436131477356, + "learning_rate": 0.001451518663599515, + "loss": 1.7771, + "step": 1175 + }, + { + "epoch": 0.1240506329113924, + "grad_norm": 1.2326340675354004, + "learning_rate": 0.0014514298240446244, + "loss": 1.7363, + "step": 1176 + }, + { + "epoch": 0.12415611814345992, + "grad_norm": 0.683566153049469, + "learning_rate": 0.0014513409058910243, + "loss": 1.7615, + "step": 1177 + }, + { + "epoch": 0.12426160337552743, + "grad_norm": 1.1039146184921265, + "learning_rate": 0.0014512519091486786, + "loss": 1.8092, + "step": 1178 + }, + { + "epoch": 0.12436708860759493, + "grad_norm": 0.9868018627166748, + "learning_rate": 0.0014511628338275597, + "loss": 1.8191, + "step": 1179 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.7748619318008423, + "learning_rate": 0.001451073679937649, + "loss": 1.8006, + "step": 1180 + }, + { + "epoch": 0.12457805907172996, + "grad_norm": 1.0632961988449097, + "learning_rate": 0.0014509844474889365, + "loss": 1.7731, + "step": 1181 + }, + { + "epoch": 0.12468354430379747, + "grad_norm": 1.008508324623108, + "learning_rate": 0.0014508951364914213, + "loss": 1.7789, + "step": 1182 + }, + { + "epoch": 0.12478902953586497, + "grad_norm": 0.7538714408874512, + "learning_rate": 0.001450805746955111, + "loss": 1.7982, + "step": 1183 + }, + { + "epoch": 0.1248945147679325, + "grad_norm": 0.8062027096748352, + "learning_rate": 0.001450716278890022, + "loss": 1.7978, + "step": 1184 + }, + { + "epoch": 0.125, + "grad_norm": 0.9150775671005249, + "learning_rate": 0.0014506267323061803, + "loss": 1.7955, + "step": 1185 + }, + { + "epoch": 0.12510548523206752, + "grad_norm": 0.7329570651054382, + "learning_rate": 0.0014505371072136195, + "loss": 1.7891, + "step": 1186 + }, + { + "epoch": 0.125210970464135, + "grad_norm": 0.8549013733863831, + "learning_rate": 0.0014504474036223826, + "loss": 1.8024, + "step": 1187 + }, + { + "epoch": 0.12531645569620253, + "grad_norm": 0.9452799558639526, + "learning_rate": 0.0014503576215425212, + "loss": 1.801, + "step": 1188 + }, + { + "epoch": 0.12542194092827005, + "grad_norm": 0.6878595948219299, + "learning_rate": 0.0014502677609840964, + "loss": 1.7649, + "step": 1189 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.8982939720153809, + "learning_rate": 0.0014501778219571766, + "loss": 1.7605, + "step": 1190 + }, + { + "epoch": 0.12563291139240507, + "grad_norm": 0.9380730986595154, + "learning_rate": 0.0014500878044718408, + "loss": 1.7907, + "step": 1191 + }, + { + "epoch": 0.1257383966244726, + "grad_norm": 0.8939931392669678, + "learning_rate": 0.0014499977085381756, + "loss": 1.7671, + "step": 1192 + }, + { + "epoch": 0.12584388185654008, + "grad_norm": 0.7124978303909302, + "learning_rate": 0.0014499075341662764, + "loss": 1.8164, + "step": 1193 + }, + { + "epoch": 0.1259493670886076, + "grad_norm": 0.8625569939613342, + "learning_rate": 0.0014498172813662482, + "loss": 1.8486, + "step": 1194 + }, + { + "epoch": 0.1260548523206751, + "grad_norm": 1.3579461574554443, + "learning_rate": 0.0014497269501482037, + "loss": 1.8049, + "step": 1195 + }, + { + "epoch": 0.1261603375527426, + "grad_norm": 0.8892766833305359, + "learning_rate": 0.0014496365405222656, + "loss": 1.783, + "step": 1196 + }, + { + "epoch": 0.12626582278481013, + "grad_norm": 0.733532190322876, + "learning_rate": 0.0014495460524985644, + "loss": 1.7844, + "step": 1197 + }, + { + "epoch": 0.12637130801687763, + "grad_norm": 1.0018692016601562, + "learning_rate": 0.0014494554860872398, + "loss": 1.8144, + "step": 1198 + }, + { + "epoch": 0.12647679324894515, + "grad_norm": 0.8846414089202881, + "learning_rate": 0.00144936484129844, + "loss": 1.8256, + "step": 1199 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.7068676352500916, + "learning_rate": 0.0014492741181423225, + "loss": 1.7972, + "step": 1200 + }, + { + "epoch": 0.12668776371308016, + "grad_norm": 0.7895307540893555, + "learning_rate": 0.001449183316629053, + "loss": 1.7918, + "step": 1201 + }, + { + "epoch": 0.12679324894514768, + "grad_norm": 0.8006041646003723, + "learning_rate": 0.0014490924367688066, + "loss": 1.7981, + "step": 1202 + }, + { + "epoch": 0.1268987341772152, + "grad_norm": 0.7066389918327332, + "learning_rate": 0.0014490014785717667, + "loss": 1.7725, + "step": 1203 + }, + { + "epoch": 0.1270042194092827, + "grad_norm": 0.6922296285629272, + "learning_rate": 0.0014489104420481254, + "loss": 1.7869, + "step": 1204 + }, + { + "epoch": 0.1271097046413502, + "grad_norm": 0.7295243144035339, + "learning_rate": 0.001448819327208084, + "loss": 1.826, + "step": 1205 + }, + { + "epoch": 0.12721518987341773, + "grad_norm": 0.7432446479797363, + "learning_rate": 0.0014487281340618526, + "loss": 1.7887, + "step": 1206 + }, + { + "epoch": 0.12732067510548523, + "grad_norm": 0.7004541754722595, + "learning_rate": 0.0014486368626196494, + "loss": 1.7824, + "step": 1207 + }, + { + "epoch": 0.12742616033755275, + "grad_norm": 0.7865073084831238, + "learning_rate": 0.001448545512891702, + "loss": 1.7926, + "step": 1208 + }, + { + "epoch": 0.12753164556962027, + "grad_norm": 0.8445515632629395, + "learning_rate": 0.0014484540848882469, + "loss": 1.7904, + "step": 1209 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.8561847805976868, + "learning_rate": 0.0014483625786195285, + "loss": 1.8236, + "step": 1210 + }, + { + "epoch": 0.12774261603375528, + "grad_norm": 0.6555332541465759, + "learning_rate": 0.0014482709940958009, + "loss": 1.7695, + "step": 1211 + }, + { + "epoch": 0.12784810126582277, + "grad_norm": 0.7860832810401917, + "learning_rate": 0.0014481793313273266, + "loss": 1.7599, + "step": 1212 + }, + { + "epoch": 0.1279535864978903, + "grad_norm": 0.7351190447807312, + "learning_rate": 0.0014480875903243766, + "loss": 1.7837, + "step": 1213 + }, + { + "epoch": 0.1280590717299578, + "grad_norm": 0.7722806930541992, + "learning_rate": 0.0014479957710972313, + "loss": 1.7742, + "step": 1214 + }, + { + "epoch": 0.1281645569620253, + "grad_norm": 1.0445855855941772, + "learning_rate": 0.0014479038736561793, + "loss": 1.7762, + "step": 1215 + }, + { + "epoch": 0.12827004219409283, + "grad_norm": 1.0175955295562744, + "learning_rate": 0.001447811898011518, + "loss": 1.7969, + "step": 1216 + }, + { + "epoch": 0.12837552742616035, + "grad_norm": 0.8952299356460571, + "learning_rate": 0.0014477198441735543, + "loss": 1.7934, + "step": 1217 + }, + { + "epoch": 0.12848101265822784, + "grad_norm": 0.7402116060256958, + "learning_rate": 0.0014476277121526027, + "loss": 1.8321, + "step": 1218 + }, + { + "epoch": 0.12858649789029536, + "grad_norm": 0.7755619287490845, + "learning_rate": 0.0014475355019589872, + "loss": 1.762, + "step": 1219 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.7078384757041931, + "learning_rate": 0.0014474432136030405, + "loss": 1.7522, + "step": 1220 + }, + { + "epoch": 0.12879746835443037, + "grad_norm": 0.7680009603500366, + "learning_rate": 0.001447350847095104, + "loss": 1.7904, + "step": 1221 + }, + { + "epoch": 0.1289029535864979, + "grad_norm": 0.8148878812789917, + "learning_rate": 0.001447258402445528, + "loss": 1.8068, + "step": 1222 + }, + { + "epoch": 0.1290084388185654, + "grad_norm": 0.7032340168952942, + "learning_rate": 0.0014471658796646708, + "loss": 1.7608, + "step": 1223 + }, + { + "epoch": 0.1291139240506329, + "grad_norm": 0.9310318827629089, + "learning_rate": 0.0014470732787629005, + "loss": 1.807, + "step": 1224 + }, + { + "epoch": 0.12921940928270043, + "grad_norm": 0.8106138706207275, + "learning_rate": 0.0014469805997505932, + "loss": 1.7364, + "step": 1225 + }, + { + "epoch": 0.12932489451476795, + "grad_norm": 0.7410348653793335, + "learning_rate": 0.0014468878426381346, + "loss": 1.7574, + "step": 1226 + }, + { + "epoch": 0.12943037974683544, + "grad_norm": 0.7871372103691101, + "learning_rate": 0.001446795007435918, + "loss": 1.7633, + "step": 1227 + }, + { + "epoch": 0.12953586497890296, + "grad_norm": 0.76044100522995, + "learning_rate": 0.0014467020941543464, + "loss": 1.7558, + "step": 1228 + }, + { + "epoch": 0.12964135021097045, + "grad_norm": 1.1536540985107422, + "learning_rate": 0.0014466091028038314, + "loss": 1.8087, + "step": 1229 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.7635254859924316, + "learning_rate": 0.0014465160333947923, + "loss": 1.7379, + "step": 1230 + }, + { + "epoch": 0.1298523206751055, + "grad_norm": 0.8268280029296875, + "learning_rate": 0.0014464228859376587, + "loss": 1.8055, + "step": 1231 + }, + { + "epoch": 0.12995780590717299, + "grad_norm": 0.9217702746391296, + "learning_rate": 0.001446329660442868, + "loss": 1.8119, + "step": 1232 + }, + { + "epoch": 0.1300632911392405, + "grad_norm": 1.094534158706665, + "learning_rate": 0.0014462363569208666, + "loss": 1.7695, + "step": 1233 + }, + { + "epoch": 0.13016877637130803, + "grad_norm": 0.8779019713401794, + "learning_rate": 0.00144614297538211, + "loss": 1.8144, + "step": 1234 + }, + { + "epoch": 0.13027426160337552, + "grad_norm": 0.7221916317939758, + "learning_rate": 0.0014460495158370615, + "loss": 1.8179, + "step": 1235 + }, + { + "epoch": 0.13037974683544304, + "grad_norm": 0.7974010109901428, + "learning_rate": 0.0014459559782961937, + "loss": 1.7805, + "step": 1236 + }, + { + "epoch": 0.13048523206751056, + "grad_norm": 0.6997444033622742, + "learning_rate": 0.0014458623627699883, + "loss": 1.7696, + "step": 1237 + }, + { + "epoch": 0.13059071729957805, + "grad_norm": 0.776787281036377, + "learning_rate": 0.0014457686692689355, + "loss": 1.7736, + "step": 1238 + }, + { + "epoch": 0.13069620253164557, + "grad_norm": 0.7869901061058044, + "learning_rate": 0.0014456748978035339, + "loss": 1.7989, + "step": 1239 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.8271242380142212, + "learning_rate": 0.0014455810483842908, + "loss": 1.7606, + "step": 1240 + }, + { + "epoch": 0.13090717299578059, + "grad_norm": 0.7570054531097412, + "learning_rate": 0.0014454871210217229, + "loss": 1.7541, + "step": 1241 + }, + { + "epoch": 0.1310126582278481, + "grad_norm": 0.7352009415626526, + "learning_rate": 0.0014453931157263548, + "loss": 1.8058, + "step": 1242 + }, + { + "epoch": 0.1311181434599156, + "grad_norm": 0.8753525614738464, + "learning_rate": 0.001445299032508721, + "loss": 1.8005, + "step": 1243 + }, + { + "epoch": 0.13122362869198312, + "grad_norm": 0.8869842886924744, + "learning_rate": 0.0014452048713793633, + "loss": 1.7847, + "step": 1244 + }, + { + "epoch": 0.13132911392405064, + "grad_norm": 0.7135405540466309, + "learning_rate": 0.0014451106323488331, + "loss": 1.816, + "step": 1245 + }, + { + "epoch": 0.13143459915611813, + "grad_norm": 0.9258755445480347, + "learning_rate": 0.0014450163154276906, + "loss": 1.8256, + "step": 1246 + }, + { + "epoch": 0.13154008438818565, + "grad_norm": 1.061652660369873, + "learning_rate": 0.0014449219206265041, + "loss": 1.7832, + "step": 1247 + }, + { + "epoch": 0.13164556962025317, + "grad_norm": 0.9028099179267883, + "learning_rate": 0.0014448274479558513, + "loss": 1.7944, + "step": 1248 + }, + { + "epoch": 0.13175105485232066, + "grad_norm": 0.7117069959640503, + "learning_rate": 0.0014447328974263182, + "loss": 1.7782, + "step": 1249 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.7960436344146729, + "learning_rate": 0.0014446382690484997, + "loss": 1.7851, + "step": 1250 + }, + { + "epoch": 0.1319620253164557, + "grad_norm": 0.7290751338005066, + "learning_rate": 0.0014445435628329993, + "loss": 1.8081, + "step": 1251 + }, + { + "epoch": 0.1320675105485232, + "grad_norm": 0.7657306790351868, + "learning_rate": 0.0014444487787904294, + "loss": 1.7671, + "step": 1252 + }, + { + "epoch": 0.13217299578059072, + "grad_norm": 0.7914220094680786, + "learning_rate": 0.001444353916931411, + "loss": 1.7679, + "step": 1253 + }, + { + "epoch": 0.13227848101265824, + "grad_norm": 0.7564322352409363, + "learning_rate": 0.001444258977266574, + "loss": 1.751, + "step": 1254 + }, + { + "epoch": 0.13238396624472573, + "grad_norm": 0.8258821368217468, + "learning_rate": 0.0014441639598065565, + "loss": 1.7578, + "step": 1255 + }, + { + "epoch": 0.13248945147679325, + "grad_norm": 1.109049677848816, + "learning_rate": 0.001444068864562006, + "loss": 1.7716, + "step": 1256 + }, + { + "epoch": 0.13259493670886077, + "grad_norm": 1.0367786884307861, + "learning_rate": 0.0014439736915435786, + "loss": 1.8003, + "step": 1257 + }, + { + "epoch": 0.13270042194092826, + "grad_norm": 0.7790071964263916, + "learning_rate": 0.001443878440761938, + "loss": 1.7666, + "step": 1258 + }, + { + "epoch": 0.13280590717299579, + "grad_norm": 0.8064179420471191, + "learning_rate": 0.0014437831122277585, + "loss": 1.7833, + "step": 1259 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 1.0513782501220703, + "learning_rate": 0.0014436877059517215, + "loss": 1.772, + "step": 1260 + }, + { + "epoch": 0.1330168776371308, + "grad_norm": 0.7219683527946472, + "learning_rate": 0.0014435922219445182, + "loss": 1.7776, + "step": 1261 + }, + { + "epoch": 0.13312236286919832, + "grad_norm": 0.791477620601654, + "learning_rate": 0.0014434966602168478, + "loss": 1.7586, + "step": 1262 + }, + { + "epoch": 0.1332278481012658, + "grad_norm": 0.8902749419212341, + "learning_rate": 0.0014434010207794185, + "loss": 1.7603, + "step": 1263 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 1.005233645439148, + "learning_rate": 0.0014433053036429474, + "loss": 1.7455, + "step": 1264 + }, + { + "epoch": 0.13343881856540085, + "grad_norm": 1.0005639791488647, + "learning_rate": 0.00144320950881816, + "loss": 1.7711, + "step": 1265 + }, + { + "epoch": 0.13354430379746834, + "grad_norm": 0.7393149137496948, + "learning_rate": 0.0014431136363157902, + "loss": 1.7687, + "step": 1266 + }, + { + "epoch": 0.13364978902953586, + "grad_norm": 0.794333815574646, + "learning_rate": 0.0014430176861465812, + "loss": 1.7957, + "step": 1267 + }, + { + "epoch": 0.13375527426160339, + "grad_norm": 0.822784960269928, + "learning_rate": 0.001442921658321285, + "loss": 1.7338, + "step": 1268 + }, + { + "epoch": 0.13386075949367088, + "grad_norm": 0.7833670377731323, + "learning_rate": 0.0014428255528506617, + "loss": 1.8124, + "step": 1269 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 1.0421143770217896, + "learning_rate": 0.0014427293697454803, + "loss": 1.7701, + "step": 1270 + }, + { + "epoch": 0.13407172995780592, + "grad_norm": 0.7331686615943909, + "learning_rate": 0.001442633109016519, + "loss": 1.8019, + "step": 1271 + }, + { + "epoch": 0.1341772151898734, + "grad_norm": 0.9354903101921082, + "learning_rate": 0.001442536770674564, + "loss": 1.8359, + "step": 1272 + }, + { + "epoch": 0.13428270042194093, + "grad_norm": 0.9824520349502563, + "learning_rate": 0.0014424403547304103, + "loss": 1.7619, + "step": 1273 + }, + { + "epoch": 0.13438818565400845, + "grad_norm": 0.7779013514518738, + "learning_rate": 0.0014423438611948624, + "loss": 1.7889, + "step": 1274 + }, + { + "epoch": 0.13449367088607594, + "grad_norm": 0.798350989818573, + "learning_rate": 0.0014422472900787323, + "loss": 1.7847, + "step": 1275 + }, + { + "epoch": 0.13459915611814346, + "grad_norm": 0.9472059607505798, + "learning_rate": 0.0014421506413928415, + "loss": 1.7608, + "step": 1276 + }, + { + "epoch": 0.13470464135021096, + "grad_norm": 0.8495382070541382, + "learning_rate": 0.0014420539151480199, + "loss": 1.7968, + "step": 1277 + }, + { + "epoch": 0.13481012658227848, + "grad_norm": 0.7338272929191589, + "learning_rate": 0.0014419571113551063, + "loss": 1.7688, + "step": 1278 + }, + { + "epoch": 0.134915611814346, + "grad_norm": 0.7279659509658813, + "learning_rate": 0.0014418602300249482, + "loss": 1.7981, + "step": 1279 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.7433304190635681, + "learning_rate": 0.001441763271168401, + "loss": 1.7482, + "step": 1280 + }, + { + "epoch": 0.135126582278481, + "grad_norm": 0.95670485496521, + "learning_rate": 0.00144166623479633, + "loss": 1.7837, + "step": 1281 + }, + { + "epoch": 0.13523206751054853, + "grad_norm": 0.9599905014038086, + "learning_rate": 0.0014415691209196085, + "loss": 1.7253, + "step": 1282 + }, + { + "epoch": 0.13533755274261602, + "grad_norm": 0.8108053803443909, + "learning_rate": 0.0014414719295491184, + "loss": 1.7876, + "step": 1283 + }, + { + "epoch": 0.13544303797468354, + "grad_norm": 0.7007649540901184, + "learning_rate": 0.0014413746606957505, + "loss": 1.7498, + "step": 1284 + }, + { + "epoch": 0.13554852320675106, + "grad_norm": 1.1243233680725098, + "learning_rate": 0.0014412773143704046, + "loss": 1.7498, + "step": 1285 + }, + { + "epoch": 0.13565400843881856, + "grad_norm": 0.9084108471870422, + "learning_rate": 0.0014411798905839884, + "loss": 1.766, + "step": 1286 + }, + { + "epoch": 0.13575949367088608, + "grad_norm": 0.784788966178894, + "learning_rate": 0.0014410823893474193, + "loss": 1.7633, + "step": 1287 + }, + { + "epoch": 0.1358649789029536, + "grad_norm": 0.7171452045440674, + "learning_rate": 0.001440984810671622, + "loss": 1.7863, + "step": 1288 + }, + { + "epoch": 0.1359704641350211, + "grad_norm": 0.7020769715309143, + "learning_rate": 0.0014408871545675314, + "loss": 1.8203, + "step": 1289 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.6905579566955566, + "learning_rate": 0.00144078942104609, + "loss": 1.7784, + "step": 1290 + }, + { + "epoch": 0.13618143459915613, + "grad_norm": 0.7203900814056396, + "learning_rate": 0.0014406916101182491, + "loss": 1.7949, + "step": 1291 + }, + { + "epoch": 0.13628691983122362, + "grad_norm": 0.7566185593605042, + "learning_rate": 0.0014405937217949695, + "loss": 1.7624, + "step": 1292 + }, + { + "epoch": 0.13639240506329114, + "grad_norm": 0.7078579664230347, + "learning_rate": 0.0014404957560872197, + "loss": 1.7627, + "step": 1293 + }, + { + "epoch": 0.13649789029535864, + "grad_norm": 0.7657403349876404, + "learning_rate": 0.0014403977130059773, + "loss": 1.7973, + "step": 1294 + }, + { + "epoch": 0.13660337552742616, + "grad_norm": 0.745755672454834, + "learning_rate": 0.0014402995925622284, + "loss": 1.7053, + "step": 1295 + }, + { + "epoch": 0.13670886075949368, + "grad_norm": 0.7208760976791382, + "learning_rate": 0.0014402013947669681, + "loss": 1.773, + "step": 1296 + }, + { + "epoch": 0.13681434599156117, + "grad_norm": 0.8644149303436279, + "learning_rate": 0.0014401031196312, + "loss": 1.7774, + "step": 1297 + }, + { + "epoch": 0.1369198312236287, + "grad_norm": 1.1449702978134155, + "learning_rate": 0.001440004767165936, + "loss": 1.718, + "step": 1298 + }, + { + "epoch": 0.1370253164556962, + "grad_norm": 1.027312159538269, + "learning_rate": 0.0014399063373821972, + "loss": 1.7547, + "step": 1299 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.7334897518157959, + "learning_rate": 0.001439807830291013, + "loss": 1.7734, + "step": 1300 + }, + { + "epoch": 0.13723628691983122, + "grad_norm": 0.8365859389305115, + "learning_rate": 0.001439709245903422, + "loss": 1.7607, + "step": 1301 + }, + { + "epoch": 0.13734177215189874, + "grad_norm": 0.8061856031417847, + "learning_rate": 0.0014396105842304707, + "loss": 1.7876, + "step": 1302 + }, + { + "epoch": 0.13744725738396624, + "grad_norm": 0.6972666382789612, + "learning_rate": 0.0014395118452832146, + "loss": 1.7686, + "step": 1303 + }, + { + "epoch": 0.13755274261603376, + "grad_norm": 0.7251750826835632, + "learning_rate": 0.001439413029072718, + "loss": 1.7612, + "step": 1304 + }, + { + "epoch": 0.13765822784810128, + "grad_norm": 0.7050989270210266, + "learning_rate": 0.001439314135610054, + "loss": 1.7766, + "step": 1305 + }, + { + "epoch": 0.13776371308016877, + "grad_norm": 0.7090973854064941, + "learning_rate": 0.0014392151649063039, + "loss": 1.767, + "step": 1306 + }, + { + "epoch": 0.1378691983122363, + "grad_norm": 0.7685337662696838, + "learning_rate": 0.0014391161169725573, + "loss": 1.7442, + "step": 1307 + }, + { + "epoch": 0.1379746835443038, + "grad_norm": 0.8015944361686707, + "learning_rate": 0.001439016991819914, + "loss": 1.7856, + "step": 1308 + }, + { + "epoch": 0.1380801687763713, + "grad_norm": 0.7322651743888855, + "learning_rate": 0.001438917789459481, + "loss": 1.75, + "step": 1309 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.690895676612854, + "learning_rate": 0.0014388185099023744, + "loss": 1.7441, + "step": 1310 + }, + { + "epoch": 0.13829113924050632, + "grad_norm": 0.7553848624229431, + "learning_rate": 0.001438719153159719, + "loss": 1.7743, + "step": 1311 + }, + { + "epoch": 0.13839662447257384, + "grad_norm": 0.7687403559684753, + "learning_rate": 0.0014386197192426482, + "loss": 1.7587, + "step": 1312 + }, + { + "epoch": 0.13850210970464136, + "grad_norm": 0.8521800637245178, + "learning_rate": 0.001438520208162304, + "loss": 1.7897, + "step": 1313 + }, + { + "epoch": 0.13860759493670885, + "grad_norm": 0.7221477031707764, + "learning_rate": 0.0014384206199298374, + "loss": 1.7917, + "step": 1314 + }, + { + "epoch": 0.13871308016877637, + "grad_norm": 0.7549881935119629, + "learning_rate": 0.0014383209545564073, + "loss": 1.793, + "step": 1315 + }, + { + "epoch": 0.1388185654008439, + "grad_norm": 0.8666206002235413, + "learning_rate": 0.001438221212053182, + "loss": 1.7239, + "step": 1316 + }, + { + "epoch": 0.13892405063291138, + "grad_norm": 0.7919970154762268, + "learning_rate": 0.0014381213924313386, + "loss": 1.7542, + "step": 1317 + }, + { + "epoch": 0.1390295358649789, + "grad_norm": 0.8159410357475281, + "learning_rate": 0.0014380214957020613, + "loss": 1.767, + "step": 1318 + }, + { + "epoch": 0.13913502109704642, + "grad_norm": 0.8324804306030273, + "learning_rate": 0.001437921521876545, + "loss": 1.7748, + "step": 1319 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 1.0432636737823486, + "learning_rate": 0.0014378214709659916, + "loss": 1.7593, + "step": 1320 + }, + { + "epoch": 0.13934599156118144, + "grad_norm": 1.0124508142471313, + "learning_rate": 0.0014377213429816128, + "loss": 1.7413, + "step": 1321 + }, + { + "epoch": 0.13945147679324896, + "grad_norm": 0.9083257913589478, + "learning_rate": 0.0014376211379346282, + "loss": 1.7501, + "step": 1322 + }, + { + "epoch": 0.13955696202531645, + "grad_norm": 0.9196361303329468, + "learning_rate": 0.0014375208558362663, + "loss": 1.7664, + "step": 1323 + }, + { + "epoch": 0.13966244725738397, + "grad_norm": 0.8157097101211548, + "learning_rate": 0.0014374204966977639, + "loss": 1.7615, + "step": 1324 + }, + { + "epoch": 0.13976793248945146, + "grad_norm": 0.7129227519035339, + "learning_rate": 0.0014373200605303674, + "loss": 1.7383, + "step": 1325 + }, + { + "epoch": 0.13987341772151898, + "grad_norm": 0.7971266508102417, + "learning_rate": 0.001437219547345331, + "loss": 1.7712, + "step": 1326 + }, + { + "epoch": 0.1399789029535865, + "grad_norm": 0.8752045631408691, + "learning_rate": 0.0014371189571539174, + "loss": 1.7606, + "step": 1327 + }, + { + "epoch": 0.140084388185654, + "grad_norm": 0.7591539621353149, + "learning_rate": 0.0014370182899673982, + "loss": 1.7496, + "step": 1328 + }, + { + "epoch": 0.14018987341772152, + "grad_norm": 0.9207198619842529, + "learning_rate": 0.0014369175457970538, + "loss": 1.7533, + "step": 1329 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.3990358114242554, + "learning_rate": 0.0014368167246541733, + "loss": 1.7865, + "step": 1330 + }, + { + "epoch": 0.14040084388185653, + "grad_norm": 0.7711177468299866, + "learning_rate": 0.0014367158265500537, + "loss": 1.7787, + "step": 1331 + }, + { + "epoch": 0.14050632911392405, + "grad_norm": 1.1077121496200562, + "learning_rate": 0.0014366148514960016, + "loss": 1.7693, + "step": 1332 + }, + { + "epoch": 0.14061181434599157, + "grad_norm": 0.9961812496185303, + "learning_rate": 0.001436513799503332, + "loss": 1.7357, + "step": 1333 + }, + { + "epoch": 0.14071729957805906, + "grad_norm": 0.8232142925262451, + "learning_rate": 0.0014364126705833675, + "loss": 1.7428, + "step": 1334 + }, + { + "epoch": 0.14082278481012658, + "grad_norm": 1.2144005298614502, + "learning_rate": 0.0014363114647474406, + "loss": 1.7336, + "step": 1335 + }, + { + "epoch": 0.1409282700421941, + "grad_norm": 1.0606623888015747, + "learning_rate": 0.0014362101820068918, + "loss": 1.7558, + "step": 1336 + }, + { + "epoch": 0.1410337552742616, + "grad_norm": 0.7582722902297974, + "learning_rate": 0.0014361088223730704, + "loss": 1.781, + "step": 1337 + }, + { + "epoch": 0.14113924050632912, + "grad_norm": 1.1381856203079224, + "learning_rate": 0.0014360073858573341, + "loss": 1.7544, + "step": 1338 + }, + { + "epoch": 0.14124472573839664, + "grad_norm": 0.8779007792472839, + "learning_rate": 0.0014359058724710497, + "loss": 1.8034, + "step": 1339 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.7700310349464417, + "learning_rate": 0.0014358042822255918, + "loss": 1.7373, + "step": 1340 + }, + { + "epoch": 0.14145569620253165, + "grad_norm": 0.860141932964325, + "learning_rate": 0.0014357026151323444, + "loss": 1.7671, + "step": 1341 + }, + { + "epoch": 0.14156118143459914, + "grad_norm": 0.9048537611961365, + "learning_rate": 0.0014356008712027, + "loss": 1.796, + "step": 1342 + }, + { + "epoch": 0.14166666666666666, + "grad_norm": 0.7589737176895142, + "learning_rate": 0.0014354990504480592, + "loss": 1.7442, + "step": 1343 + }, + { + "epoch": 0.14177215189873418, + "grad_norm": 0.8478718400001526, + "learning_rate": 0.0014353971528798313, + "loss": 1.7651, + "step": 1344 + }, + { + "epoch": 0.14187763713080168, + "grad_norm": 1.1018155813217163, + "learning_rate": 0.001435295178509435, + "loss": 1.7538, + "step": 1345 + }, + { + "epoch": 0.1419831223628692, + "grad_norm": 0.8580230474472046, + "learning_rate": 0.0014351931273482966, + "loss": 1.7458, + "step": 1346 + }, + { + "epoch": 0.14208860759493672, + "grad_norm": 0.6667075157165527, + "learning_rate": 0.0014350909994078516, + "loss": 1.7583, + "step": 1347 + }, + { + "epoch": 0.1421940928270042, + "grad_norm": 0.6861121654510498, + "learning_rate": 0.0014349887946995441, + "loss": 1.7303, + "step": 1348 + }, + { + "epoch": 0.14229957805907173, + "grad_norm": 0.7855656147003174, + "learning_rate": 0.0014348865132348262, + "loss": 1.7755, + "step": 1349 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.703948438167572, + "learning_rate": 0.0014347841550251597, + "loss": 1.765, + "step": 1350 + }, + { + "epoch": 0.14251054852320674, + "grad_norm": 0.9044631719589233, + "learning_rate": 0.0014346817200820137, + "loss": 1.7388, + "step": 1351 + }, + { + "epoch": 0.14261603375527426, + "grad_norm": 0.7394034266471863, + "learning_rate": 0.0014345792084168672, + "loss": 1.7275, + "step": 1352 + }, + { + "epoch": 0.14272151898734178, + "grad_norm": 0.7973538041114807, + "learning_rate": 0.0014344766200412062, + "loss": 1.7621, + "step": 1353 + }, + { + "epoch": 0.14282700421940928, + "grad_norm": 0.869996190071106, + "learning_rate": 0.0014343739549665274, + "loss": 1.7753, + "step": 1354 + }, + { + "epoch": 0.1429324894514768, + "grad_norm": 0.8301615118980408, + "learning_rate": 0.0014342712132043342, + "loss": 1.7308, + "step": 1355 + }, + { + "epoch": 0.14303797468354432, + "grad_norm": 0.768605649471283, + "learning_rate": 0.001434168394766139, + "loss": 1.7325, + "step": 1356 + }, + { + "epoch": 0.1431434599156118, + "grad_norm": 0.80864417552948, + "learning_rate": 0.001434065499663464, + "loss": 1.7634, + "step": 1357 + }, + { + "epoch": 0.14324894514767933, + "grad_norm": 0.6782084703445435, + "learning_rate": 0.0014339625279078388, + "loss": 1.7505, + "step": 1358 + }, + { + "epoch": 0.14335443037974682, + "grad_norm": 0.781444787979126, + "learning_rate": 0.0014338594795108017, + "loss": 1.7252, + "step": 1359 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.830514132976532, + "learning_rate": 0.0014337563544838997, + "loss": 1.7752, + "step": 1360 + }, + { + "epoch": 0.14356540084388186, + "grad_norm": 0.7011828422546387, + "learning_rate": 0.0014336531528386888, + "loss": 1.7326, + "step": 1361 + }, + { + "epoch": 0.14367088607594936, + "grad_norm": 0.7500370740890503, + "learning_rate": 0.0014335498745867332, + "loss": 1.7505, + "step": 1362 + }, + { + "epoch": 0.14377637130801688, + "grad_norm": 0.8415970802307129, + "learning_rate": 0.0014334465197396054, + "loss": 1.7607, + "step": 1363 + }, + { + "epoch": 0.1438818565400844, + "grad_norm": 0.7374060750007629, + "learning_rate": 0.0014333430883088877, + "loss": 1.7429, + "step": 1364 + }, + { + "epoch": 0.1439873417721519, + "grad_norm": 0.6930688619613647, + "learning_rate": 0.001433239580306169, + "loss": 1.7307, + "step": 1365 + }, + { + "epoch": 0.1440928270042194, + "grad_norm": 0.7097570300102234, + "learning_rate": 0.0014331359957430482, + "loss": 1.7453, + "step": 1366 + }, + { + "epoch": 0.14419831223628693, + "grad_norm": 0.7327412962913513, + "learning_rate": 0.001433032334631133, + "loss": 1.7631, + "step": 1367 + }, + { + "epoch": 0.14430379746835442, + "grad_norm": 0.9199993014335632, + "learning_rate": 0.0014329285969820389, + "loss": 1.7405, + "step": 1368 + }, + { + "epoch": 0.14440928270042194, + "grad_norm": 0.9888438582420349, + "learning_rate": 0.00143282478280739, + "loss": 1.7639, + "step": 1369 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.807594358921051, + "learning_rate": 0.001432720892118819, + "loss": 1.7378, + "step": 1370 + }, + { + "epoch": 0.14462025316455696, + "grad_norm": 0.708183765411377, + "learning_rate": 0.0014326169249279683, + "loss": 1.735, + "step": 1371 + }, + { + "epoch": 0.14472573839662448, + "grad_norm": 0.9016063213348389, + "learning_rate": 0.001432512881246487, + "loss": 1.724, + "step": 1372 + }, + { + "epoch": 0.144831223628692, + "grad_norm": 0.9903687834739685, + "learning_rate": 0.0014324087610860339, + "loss": 1.7585, + "step": 1373 + }, + { + "epoch": 0.1449367088607595, + "grad_norm": 0.8389500379562378, + "learning_rate": 0.0014323045644582765, + "loss": 1.7328, + "step": 1374 + }, + { + "epoch": 0.145042194092827, + "grad_norm": 0.7465028166770935, + "learning_rate": 0.0014322002913748902, + "loss": 1.7572, + "step": 1375 + }, + { + "epoch": 0.1451476793248945, + "grad_norm": 1.0979706048965454, + "learning_rate": 0.0014320959418475596, + "loss": 1.7381, + "step": 1376 + }, + { + "epoch": 0.14525316455696202, + "grad_norm": 0.9074468016624451, + "learning_rate": 0.0014319915158879776, + "loss": 1.7126, + "step": 1377 + }, + { + "epoch": 0.14535864978902954, + "grad_norm": 0.7485944628715515, + "learning_rate": 0.0014318870135078452, + "loss": 1.7239, + "step": 1378 + }, + { + "epoch": 0.14546413502109704, + "grad_norm": 1.0982022285461426, + "learning_rate": 0.001431782434718873, + "loss": 1.7856, + "step": 1379 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.9831777215003967, + "learning_rate": 0.0014316777795327794, + "loss": 1.7317, + "step": 1380 + }, + { + "epoch": 0.14567510548523208, + "grad_norm": 0.7186659574508667, + "learning_rate": 0.0014315730479612914, + "loss": 1.777, + "step": 1381 + }, + { + "epoch": 0.14578059071729957, + "grad_norm": 0.8143289089202881, + "learning_rate": 0.0014314682400161445, + "loss": 1.7451, + "step": 1382 + }, + { + "epoch": 0.1458860759493671, + "grad_norm": 0.9239808320999146, + "learning_rate": 0.0014313633557090834, + "loss": 1.7298, + "step": 1383 + }, + { + "epoch": 0.1459915611814346, + "grad_norm": 0.8531401753425598, + "learning_rate": 0.0014312583950518607, + "loss": 1.7243, + "step": 1384 + }, + { + "epoch": 0.1460970464135021, + "grad_norm": 0.6965650916099548, + "learning_rate": 0.0014311533580562378, + "loss": 1.7726, + "step": 1385 + }, + { + "epoch": 0.14620253164556962, + "grad_norm": 0.9825159907341003, + "learning_rate": 0.0014310482447339845, + "loss": 1.7621, + "step": 1386 + }, + { + "epoch": 0.14630801687763714, + "grad_norm": 1.1348851919174194, + "learning_rate": 0.0014309430550968794, + "loss": 1.748, + "step": 1387 + }, + { + "epoch": 0.14641350210970464, + "grad_norm": 0.7244431972503662, + "learning_rate": 0.0014308377891567095, + "loss": 1.7401, + "step": 1388 + }, + { + "epoch": 0.14651898734177216, + "grad_norm": 1.1722646951675415, + "learning_rate": 0.0014307324469252703, + "loss": 1.7575, + "step": 1389 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 1.1724995374679565, + "learning_rate": 0.001430627028414366, + "loss": 1.7662, + "step": 1390 + }, + { + "epoch": 0.14672995780590717, + "grad_norm": 0.7699658870697021, + "learning_rate": 0.0014305215336358093, + "loss": 1.775, + "step": 1391 + }, + { + "epoch": 0.1468354430379747, + "grad_norm": 1.4396076202392578, + "learning_rate": 0.0014304159626014213, + "loss": 1.7797, + "step": 1392 + }, + { + "epoch": 0.14694092827004218, + "grad_norm": 0.7333863973617554, + "learning_rate": 0.0014303103153230322, + "loss": 1.7276, + "step": 1393 + }, + { + "epoch": 0.1470464135021097, + "grad_norm": 1.3409966230392456, + "learning_rate": 0.0014302045918124795, + "loss": 1.7431, + "step": 1394 + }, + { + "epoch": 0.14715189873417722, + "grad_norm": 0.8924731612205505, + "learning_rate": 0.0014300987920816107, + "loss": 1.7641, + "step": 1395 + }, + { + "epoch": 0.14725738396624471, + "grad_norm": 0.9730976223945618, + "learning_rate": 0.0014299929161422807, + "loss": 1.7471, + "step": 1396 + }, + { + "epoch": 0.14736286919831224, + "grad_norm": 1.3873295783996582, + "learning_rate": 0.001429886964006354, + "loss": 1.7053, + "step": 1397 + }, + { + "epoch": 0.14746835443037976, + "grad_norm": 0.7698411345481873, + "learning_rate": 0.0014297809356857026, + "loss": 1.6912, + "step": 1398 + }, + { + "epoch": 0.14757383966244725, + "grad_norm": 1.108999490737915, + "learning_rate": 0.0014296748311922074, + "loss": 1.7388, + "step": 1399 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.8729742169380188, + "learning_rate": 0.0014295686505377586, + "loss": 1.7004, + "step": 1400 + }, + { + "epoch": 0.1477848101265823, + "grad_norm": 0.9977589845657349, + "learning_rate": 0.001429462393734254, + "loss": 1.7568, + "step": 1401 + }, + { + "epoch": 0.14789029535864978, + "grad_norm": 0.8990689516067505, + "learning_rate": 0.0014293560607935999, + "loss": 1.7262, + "step": 1402 + }, + { + "epoch": 0.1479957805907173, + "grad_norm": 0.7874919772148132, + "learning_rate": 0.0014292496517277116, + "loss": 1.7696, + "step": 1403 + }, + { + "epoch": 0.14810126582278482, + "grad_norm": 0.7541494965553284, + "learning_rate": 0.0014291431665485125, + "loss": 1.7341, + "step": 1404 + }, + { + "epoch": 0.14820675105485231, + "grad_norm": 0.7717254161834717, + "learning_rate": 0.0014290366052679352, + "loss": 1.7141, + "step": 1405 + }, + { + "epoch": 0.14831223628691984, + "grad_norm": 0.955639660358429, + "learning_rate": 0.0014289299678979207, + "loss": 1.7294, + "step": 1406 + }, + { + "epoch": 0.14841772151898736, + "grad_norm": 0.8663560748100281, + "learning_rate": 0.0014288232544504174, + "loss": 1.671, + "step": 1407 + }, + { + "epoch": 0.14852320675105485, + "grad_norm": 1.0008610486984253, + "learning_rate": 0.0014287164649373837, + "loss": 1.7464, + "step": 1408 + }, + { + "epoch": 0.14862869198312237, + "grad_norm": 0.9949718713760376, + "learning_rate": 0.0014286095993707856, + "loss": 1.7668, + "step": 1409 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.7768282294273376, + "learning_rate": 0.0014285026577625982, + "loss": 1.7462, + "step": 1410 + }, + { + "epoch": 0.14883966244725738, + "grad_norm": 0.9254842400550842, + "learning_rate": 0.0014283956401248048, + "loss": 1.7654, + "step": 1411 + }, + { + "epoch": 0.1489451476793249, + "grad_norm": 0.9137781858444214, + "learning_rate": 0.0014282885464693969, + "loss": 1.7172, + "step": 1412 + }, + { + "epoch": 0.1490506329113924, + "grad_norm": 0.7002085447311401, + "learning_rate": 0.001428181376808375, + "loss": 1.7268, + "step": 1413 + }, + { + "epoch": 0.14915611814345991, + "grad_norm": 1.1757274866104126, + "learning_rate": 0.0014280741311537483, + "loss": 1.7492, + "step": 1414 + }, + { + "epoch": 0.14926160337552744, + "grad_norm": 0.8314210772514343, + "learning_rate": 0.001427966809517534, + "loss": 1.7106, + "step": 1415 + }, + { + "epoch": 0.14936708860759493, + "grad_norm": 0.8402689099311829, + "learning_rate": 0.001427859411911758, + "loss": 1.7662, + "step": 1416 + }, + { + "epoch": 0.14947257383966245, + "grad_norm": 1.1976873874664307, + "learning_rate": 0.0014277519383484548, + "loss": 1.7183, + "step": 1417 + }, + { + "epoch": 0.14957805907172997, + "grad_norm": 0.7578982710838318, + "learning_rate": 0.0014276443888396675, + "loss": 1.747, + "step": 1418 + }, + { + "epoch": 0.14968354430379746, + "grad_norm": 1.42343270778656, + "learning_rate": 0.0014275367633974473, + "loss": 1.7516, + "step": 1419 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.6812211871147156, + "learning_rate": 0.0014274290620338542, + "loss": 1.7535, + "step": 1420 + }, + { + "epoch": 0.1498945147679325, + "grad_norm": 0.8583179116249084, + "learning_rate": 0.0014273212847609566, + "loss": 1.7364, + "step": 1421 + }, + { + "epoch": 0.15, + "grad_norm": 0.718254804611206, + "learning_rate": 0.0014272134315908317, + "loss": 1.7181, + "step": 1422 + }, + { + "epoch": 0.15010548523206751, + "grad_norm": 0.8566754460334778, + "learning_rate": 0.0014271055025355652, + "loss": 1.7272, + "step": 1423 + }, + { + "epoch": 0.150210970464135, + "grad_norm": 1.222701072692871, + "learning_rate": 0.0014269974976072505, + "loss": 1.7323, + "step": 1424 + }, + { + "epoch": 0.15031645569620253, + "grad_norm": 0.8223708868026733, + "learning_rate": 0.0014268894168179903, + "loss": 1.7148, + "step": 1425 + }, + { + "epoch": 0.15042194092827005, + "grad_norm": 1.0104161500930786, + "learning_rate": 0.0014267812601798957, + "loss": 1.7521, + "step": 1426 + }, + { + "epoch": 0.15052742616033754, + "grad_norm": 1.2649123668670654, + "learning_rate": 0.0014266730277050863, + "loss": 1.7734, + "step": 1427 + }, + { + "epoch": 0.15063291139240506, + "grad_norm": 0.857974112033844, + "learning_rate": 0.00142656471940569, + "loss": 1.7351, + "step": 1428 + }, + { + "epoch": 0.15073839662447258, + "grad_norm": 0.7970879077911377, + "learning_rate": 0.001426456335293843, + "loss": 1.7183, + "step": 1429 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.8004881143569946, + "learning_rate": 0.0014263478753816906, + "loss": 1.7063, + "step": 1430 + }, + { + "epoch": 0.1509493670886076, + "grad_norm": 0.7360291481018066, + "learning_rate": 0.0014262393396813863, + "loss": 1.7302, + "step": 1431 + }, + { + "epoch": 0.15105485232067511, + "grad_norm": 0.9482747912406921, + "learning_rate": 0.001426130728205092, + "loss": 1.7455, + "step": 1432 + }, + { + "epoch": 0.1511603375527426, + "grad_norm": 0.8889839053153992, + "learning_rate": 0.001426022040964978, + "loss": 1.6875, + "step": 1433 + }, + { + "epoch": 0.15126582278481013, + "grad_norm": 0.6646797060966492, + "learning_rate": 0.0014259132779732234, + "loss": 1.7101, + "step": 1434 + }, + { + "epoch": 0.15137130801687765, + "grad_norm": 0.7093903422355652, + "learning_rate": 0.0014258044392420155, + "loss": 1.7775, + "step": 1435 + }, + { + "epoch": 0.15147679324894514, + "grad_norm": 0.6837721467018127, + "learning_rate": 0.0014256955247835504, + "loss": 1.7147, + "step": 1436 + }, + { + "epoch": 0.15158227848101266, + "grad_norm": 0.7132874131202698, + "learning_rate": 0.0014255865346100324, + "loss": 1.7459, + "step": 1437 + }, + { + "epoch": 0.15168776371308018, + "grad_norm": 0.7433846592903137, + "learning_rate": 0.0014254774687336744, + "loss": 1.6956, + "step": 1438 + }, + { + "epoch": 0.15179324894514767, + "grad_norm": 0.7339115142822266, + "learning_rate": 0.0014253683271666978, + "loss": 1.7373, + "step": 1439 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.7104413509368896, + "learning_rate": 0.0014252591099213326, + "loss": 1.7328, + "step": 1440 + }, + { + "epoch": 0.1520042194092827, + "grad_norm": 0.7532001733779907, + "learning_rate": 0.0014251498170098167, + "loss": 1.6802, + "step": 1441 + }, + { + "epoch": 0.1521097046413502, + "grad_norm": 1.0238569974899292, + "learning_rate": 0.0014250404484443975, + "loss": 1.739, + "step": 1442 + }, + { + "epoch": 0.15221518987341773, + "grad_norm": 0.8643844127655029, + "learning_rate": 0.0014249310042373298, + "loss": 1.728, + "step": 1443 + }, + { + "epoch": 0.15232067510548522, + "grad_norm": 0.787152111530304, + "learning_rate": 0.0014248214844008776, + "loss": 1.7434, + "step": 1444 + }, + { + "epoch": 0.15242616033755274, + "grad_norm": 0.6954615116119385, + "learning_rate": 0.001424711888947313, + "loss": 1.7194, + "step": 1445 + }, + { + "epoch": 0.15253164556962026, + "grad_norm": 0.7101428508758545, + "learning_rate": 0.001424602217888917, + "loss": 1.7248, + "step": 1446 + }, + { + "epoch": 0.15263713080168775, + "grad_norm": 0.7866194844245911, + "learning_rate": 0.0014244924712379786, + "loss": 1.7394, + "step": 1447 + }, + { + "epoch": 0.15274261603375527, + "grad_norm": 0.8064047694206238, + "learning_rate": 0.0014243826490067954, + "loss": 1.7242, + "step": 1448 + }, + { + "epoch": 0.1528481012658228, + "grad_norm": 0.7359359860420227, + "learning_rate": 0.0014242727512076736, + "loss": 1.7359, + "step": 1449 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.7662122249603271, + "learning_rate": 0.001424162777852928, + "loss": 1.744, + "step": 1450 + }, + { + "epoch": 0.1530590717299578, + "grad_norm": 0.7494163513183594, + "learning_rate": 0.0014240527289548814, + "loss": 1.7362, + "step": 1451 + }, + { + "epoch": 0.15316455696202533, + "grad_norm": 0.7088572978973389, + "learning_rate": 0.0014239426045258652, + "loss": 1.7426, + "step": 1452 + }, + { + "epoch": 0.15327004219409282, + "grad_norm": 0.9355782866477966, + "learning_rate": 0.0014238324045782198, + "loss": 1.717, + "step": 1453 + }, + { + "epoch": 0.15337552742616034, + "grad_norm": 1.1064172983169556, + "learning_rate": 0.0014237221291242932, + "loss": 1.6926, + "step": 1454 + }, + { + "epoch": 0.15348101265822786, + "grad_norm": 0.7368583679199219, + "learning_rate": 0.0014236117781764425, + "loss": 1.7104, + "step": 1455 + }, + { + "epoch": 0.15358649789029535, + "grad_norm": 0.9319393038749695, + "learning_rate": 0.0014235013517470334, + "loss": 1.7602, + "step": 1456 + }, + { + "epoch": 0.15369198312236287, + "grad_norm": 1.3018933534622192, + "learning_rate": 0.0014233908498484393, + "loss": 1.7122, + "step": 1457 + }, + { + "epoch": 0.15379746835443037, + "grad_norm": 0.7036160230636597, + "learning_rate": 0.0014232802724930427, + "loss": 1.7223, + "step": 1458 + }, + { + "epoch": 0.1539029535864979, + "grad_norm": 1.4678186178207397, + "learning_rate": 0.0014231696196932342, + "loss": 1.7321, + "step": 1459 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.7249419689178467, + "learning_rate": 0.0014230588914614134, + "loss": 1.7706, + "step": 1460 + }, + { + "epoch": 0.1541139240506329, + "grad_norm": 1.46665620803833, + "learning_rate": 0.0014229480878099872, + "loss": 1.7496, + "step": 1461 + }, + { + "epoch": 0.15421940928270042, + "grad_norm": 0.7128012180328369, + "learning_rate": 0.0014228372087513725, + "loss": 1.6789, + "step": 1462 + }, + { + "epoch": 0.15432489451476794, + "grad_norm": 1.2167108058929443, + "learning_rate": 0.0014227262542979933, + "loss": 1.7031, + "step": 1463 + }, + { + "epoch": 0.15443037974683543, + "grad_norm": 0.8061521053314209, + "learning_rate": 0.0014226152244622826, + "loss": 1.7221, + "step": 1464 + }, + { + "epoch": 0.15453586497890295, + "grad_norm": 0.78562331199646, + "learning_rate": 0.0014225041192566822, + "loss": 1.7035, + "step": 1465 + }, + { + "epoch": 0.15464135021097047, + "grad_norm": 0.6685592532157898, + "learning_rate": 0.001422392938693642, + "loss": 1.7079, + "step": 1466 + }, + { + "epoch": 0.15474683544303797, + "grad_norm": 0.8260959982872009, + "learning_rate": 0.0014222816827856202, + "loss": 1.7578, + "step": 1467 + }, + { + "epoch": 0.1548523206751055, + "grad_norm": 0.70361328125, + "learning_rate": 0.0014221703515450834, + "loss": 1.7284, + "step": 1468 + }, + { + "epoch": 0.154957805907173, + "grad_norm": 0.7553149461746216, + "learning_rate": 0.001422058944984507, + "loss": 1.7429, + "step": 1469 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.9662527441978455, + "learning_rate": 0.0014219474631163745, + "loss": 1.7276, + "step": 1470 + }, + { + "epoch": 0.15516877637130802, + "grad_norm": 0.6451228857040405, + "learning_rate": 0.0014218359059531783, + "loss": 1.7393, + "step": 1471 + }, + { + "epoch": 0.15527426160337554, + "grad_norm": 0.8695082068443298, + "learning_rate": 0.0014217242735074188, + "loss": 1.7448, + "step": 1472 + }, + { + "epoch": 0.15537974683544303, + "grad_norm": 0.7386113405227661, + "learning_rate": 0.0014216125657916046, + "loss": 1.7125, + "step": 1473 + }, + { + "epoch": 0.15548523206751055, + "grad_norm": 0.7727784514427185, + "learning_rate": 0.0014215007828182536, + "loss": 1.7108, + "step": 1474 + }, + { + "epoch": 0.15559071729957805, + "grad_norm": 0.9609541296958923, + "learning_rate": 0.0014213889245998917, + "loss": 1.7353, + "step": 1475 + }, + { + "epoch": 0.15569620253164557, + "grad_norm": 0.7453968524932861, + "learning_rate": 0.0014212769911490528, + "loss": 1.7191, + "step": 1476 + }, + { + "epoch": 0.1558016877637131, + "grad_norm": 0.7283174991607666, + "learning_rate": 0.0014211649824782797, + "loss": 1.7244, + "step": 1477 + }, + { + "epoch": 0.15590717299578058, + "grad_norm": 0.7473112344741821, + "learning_rate": 0.0014210528986001237, + "loss": 1.72, + "step": 1478 + }, + { + "epoch": 0.1560126582278481, + "grad_norm": 0.7182976007461548, + "learning_rate": 0.001420940739527144, + "loss": 1.7491, + "step": 1479 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.6627541780471802, + "learning_rate": 0.001420828505271909, + "loss": 1.6765, + "step": 1480 + }, + { + "epoch": 0.1562236286919831, + "grad_norm": 0.7368115186691284, + "learning_rate": 0.001420716195846995, + "loss": 1.7372, + "step": 1481 + }, + { + "epoch": 0.15632911392405063, + "grad_norm": 0.6955598592758179, + "learning_rate": 0.0014206038112649865, + "loss": 1.7224, + "step": 1482 + }, + { + "epoch": 0.15643459915611815, + "grad_norm": 0.8393622040748596, + "learning_rate": 0.0014204913515384772, + "loss": 1.7248, + "step": 1483 + }, + { + "epoch": 0.15654008438818565, + "grad_norm": 0.7769884467124939, + "learning_rate": 0.0014203788166800685, + "loss": 1.744, + "step": 1484 + }, + { + "epoch": 0.15664556962025317, + "grad_norm": 0.7534805536270142, + "learning_rate": 0.0014202662067023708, + "loss": 1.6828, + "step": 1485 + }, + { + "epoch": 0.1567510548523207, + "grad_norm": 0.8304197192192078, + "learning_rate": 0.0014201535216180024, + "loss": 1.7576, + "step": 1486 + }, + { + "epoch": 0.15685654008438818, + "grad_norm": 0.7032349705696106, + "learning_rate": 0.0014200407614395898, + "loss": 1.6811, + "step": 1487 + }, + { + "epoch": 0.1569620253164557, + "grad_norm": 0.7724673748016357, + "learning_rate": 0.0014199279261797692, + "loss": 1.7476, + "step": 1488 + }, + { + "epoch": 0.15706751054852322, + "grad_norm": 0.7038403153419495, + "learning_rate": 0.0014198150158511837, + "loss": 1.7095, + "step": 1489 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.8289119601249695, + "learning_rate": 0.0014197020304664856, + "loss": 1.7266, + "step": 1490 + }, + { + "epoch": 0.15727848101265823, + "grad_norm": 0.8617633581161499, + "learning_rate": 0.0014195889700383357, + "loss": 1.6951, + "step": 1491 + }, + { + "epoch": 0.15738396624472573, + "grad_norm": 0.734815776348114, + "learning_rate": 0.0014194758345794029, + "loss": 1.7331, + "step": 1492 + }, + { + "epoch": 0.15748945147679325, + "grad_norm": 0.9046897888183594, + "learning_rate": 0.0014193626241023644, + "loss": 1.7182, + "step": 1493 + }, + { + "epoch": 0.15759493670886077, + "grad_norm": 0.9125707745552063, + "learning_rate": 0.001419249338619906, + "loss": 1.717, + "step": 1494 + }, + { + "epoch": 0.15770042194092826, + "grad_norm": 0.7109353542327881, + "learning_rate": 0.0014191359781447223, + "loss": 1.6876, + "step": 1495 + }, + { + "epoch": 0.15780590717299578, + "grad_norm": 0.7701314687728882, + "learning_rate": 0.0014190225426895153, + "loss": 1.7664, + "step": 1496 + }, + { + "epoch": 0.1579113924050633, + "grad_norm": 0.756155788898468, + "learning_rate": 0.0014189090322669967, + "loss": 1.7445, + "step": 1497 + }, + { + "epoch": 0.1580168776371308, + "grad_norm": 0.7378673553466797, + "learning_rate": 0.0014187954468898854, + "loss": 1.7065, + "step": 1498 + }, + { + "epoch": 0.1581223628691983, + "grad_norm": 0.8152537941932678, + "learning_rate": 0.0014186817865709095, + "loss": 1.7567, + "step": 1499 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.7883657813072205, + "learning_rate": 0.0014185680513228048, + "loss": 1.7204, + "step": 1500 + }, + { + "epoch": 0.15833333333333333, + "grad_norm": 0.6700489521026611, + "learning_rate": 0.0014184542411583162, + "loss": 1.7178, + "step": 1501 + }, + { + "epoch": 0.15843881856540085, + "grad_norm": 0.680573582649231, + "learning_rate": 0.001418340356090197, + "loss": 1.7101, + "step": 1502 + }, + { + "epoch": 0.15854430379746837, + "grad_norm": 0.8090732097625732, + "learning_rate": 0.0014182263961312078, + "loss": 1.7239, + "step": 1503 + }, + { + "epoch": 0.15864978902953586, + "grad_norm": 0.7507702708244324, + "learning_rate": 0.001418112361294119, + "loss": 1.7537, + "step": 1504 + }, + { + "epoch": 0.15875527426160338, + "grad_norm": 0.6994723677635193, + "learning_rate": 0.0014179982515917088, + "loss": 1.726, + "step": 1505 + }, + { + "epoch": 0.15886075949367087, + "grad_norm": 0.7061163783073425, + "learning_rate": 0.0014178840670367634, + "loss": 1.7043, + "step": 1506 + }, + { + "epoch": 0.1589662447257384, + "grad_norm": 0.7614158987998962, + "learning_rate": 0.001417769807642078, + "loss": 1.7629, + "step": 1507 + }, + { + "epoch": 0.1590717299578059, + "grad_norm": 0.7402801513671875, + "learning_rate": 0.0014176554734204557, + "loss": 1.6955, + "step": 1508 + }, + { + "epoch": 0.1591772151898734, + "grad_norm": 0.7337663173675537, + "learning_rate": 0.0014175410643847085, + "loss": 1.7234, + "step": 1509 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.7863836884498596, + "learning_rate": 0.0014174265805476564, + "loss": 1.7278, + "step": 1510 + }, + { + "epoch": 0.15938818565400845, + "grad_norm": 0.7025467157363892, + "learning_rate": 0.001417312021922128, + "loss": 1.7014, + "step": 1511 + }, + { + "epoch": 0.15949367088607594, + "grad_norm": 0.8042990565299988, + "learning_rate": 0.0014171973885209596, + "loss": 1.7231, + "step": 1512 + }, + { + "epoch": 0.15959915611814346, + "grad_norm": 1.0383039712905884, + "learning_rate": 0.0014170826803569971, + "loss": 1.7514, + "step": 1513 + }, + { + "epoch": 0.15970464135021098, + "grad_norm": 0.9978072047233582, + "learning_rate": 0.0014169678974430941, + "loss": 1.7159, + "step": 1514 + }, + { + "epoch": 0.15981012658227847, + "grad_norm": 0.8237003684043884, + "learning_rate": 0.0014168530397921121, + "loss": 1.7402, + "step": 1515 + }, + { + "epoch": 0.159915611814346, + "grad_norm": 0.6725286841392517, + "learning_rate": 0.0014167381074169218, + "loss": 1.7273, + "step": 1516 + }, + { + "epoch": 0.1600210970464135, + "grad_norm": 0.8686544299125671, + "learning_rate": 0.0014166231003304019, + "loss": 1.7184, + "step": 1517 + }, + { + "epoch": 0.160126582278481, + "grad_norm": 0.8774184584617615, + "learning_rate": 0.0014165080185454396, + "loss": 1.7214, + "step": 1518 + }, + { + "epoch": 0.16023206751054853, + "grad_norm": 0.688323438167572, + "learning_rate": 0.0014163928620749301, + "loss": 1.7306, + "step": 1519 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.8977227210998535, + "learning_rate": 0.0014162776309317778, + "loss": 1.7228, + "step": 1520 + }, + { + "epoch": 0.16044303797468354, + "grad_norm": 1.2134642601013184, + "learning_rate": 0.0014161623251288944, + "loss": 1.731, + "step": 1521 + }, + { + "epoch": 0.16054852320675106, + "grad_norm": 0.682066798210144, + "learning_rate": 0.001416046944679201, + "loss": 1.7001, + "step": 1522 + }, + { + "epoch": 0.16065400843881855, + "grad_norm": 1.2380015850067139, + "learning_rate": 0.0014159314895956258, + "loss": 1.6913, + "step": 1523 + }, + { + "epoch": 0.16075949367088607, + "grad_norm": 0.8521327972412109, + "learning_rate": 0.0014158159598911067, + "loss": 1.6991, + "step": 1524 + }, + { + "epoch": 0.1608649789029536, + "grad_norm": 0.7028538584709167, + "learning_rate": 0.0014157003555785893, + "loss": 1.7219, + "step": 1525 + }, + { + "epoch": 0.16097046413502109, + "grad_norm": 0.7261594533920288, + "learning_rate": 0.0014155846766710277, + "loss": 1.652, + "step": 1526 + }, + { + "epoch": 0.1610759493670886, + "grad_norm": 0.6937370300292969, + "learning_rate": 0.0014154689231813838, + "loss": 1.7412, + "step": 1527 + }, + { + "epoch": 0.16118143459915613, + "grad_norm": 0.6821504831314087, + "learning_rate": 0.001415353095122629, + "loss": 1.6793, + "step": 1528 + }, + { + "epoch": 0.16128691983122362, + "grad_norm": 0.6825953125953674, + "learning_rate": 0.0014152371925077423, + "loss": 1.7432, + "step": 1529 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.6900463104248047, + "learning_rate": 0.0014151212153497108, + "loss": 1.6906, + "step": 1530 + }, + { + "epoch": 0.16149789029535866, + "grad_norm": 0.7375921010971069, + "learning_rate": 0.0014150051636615305, + "loss": 1.6637, + "step": 1531 + }, + { + "epoch": 0.16160337552742615, + "grad_norm": 0.9045844674110413, + "learning_rate": 0.0014148890374562056, + "loss": 1.6731, + "step": 1532 + }, + { + "epoch": 0.16170886075949367, + "grad_norm": 0.7976173758506775, + "learning_rate": 0.0014147728367467486, + "loss": 1.7122, + "step": 1533 + }, + { + "epoch": 0.1618143459915612, + "grad_norm": 0.7201755046844482, + "learning_rate": 0.0014146565615461805, + "loss": 1.7095, + "step": 1534 + }, + { + "epoch": 0.16191983122362869, + "grad_norm": 0.8088489174842834, + "learning_rate": 0.0014145402118675302, + "loss": 1.7333, + "step": 1535 + }, + { + "epoch": 0.1620253164556962, + "grad_norm": 0.7074602842330933, + "learning_rate": 0.0014144237877238355, + "loss": 1.6575, + "step": 1536 + }, + { + "epoch": 0.16213080168776373, + "grad_norm": 0.6984399557113647, + "learning_rate": 0.0014143072891281425, + "loss": 1.6786, + "step": 1537 + }, + { + "epoch": 0.16223628691983122, + "grad_norm": 0.7883186340332031, + "learning_rate": 0.001414190716093505, + "loss": 1.7005, + "step": 1538 + }, + { + "epoch": 0.16234177215189874, + "grad_norm": 0.7204409837722778, + "learning_rate": 0.001414074068632986, + "loss": 1.7032, + "step": 1539 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6856916546821594, + "learning_rate": 0.0014139573467596561, + "loss": 1.7258, + "step": 1540 + }, + { + "epoch": 0.16255274261603375, + "grad_norm": 0.6750685572624207, + "learning_rate": 0.0014138405504865949, + "loss": 1.7033, + "step": 1541 + }, + { + "epoch": 0.16265822784810127, + "grad_norm": 0.7793522477149963, + "learning_rate": 0.0014137236798268896, + "loss": 1.6588, + "step": 1542 + }, + { + "epoch": 0.16276371308016876, + "grad_norm": 0.8123607039451599, + "learning_rate": 0.0014136067347936363, + "loss": 1.6981, + "step": 1543 + }, + { + "epoch": 0.16286919831223629, + "grad_norm": 0.674791693687439, + "learning_rate": 0.0014134897153999394, + "loss": 1.7205, + "step": 1544 + }, + { + "epoch": 0.1629746835443038, + "grad_norm": 0.9480265974998474, + "learning_rate": 0.0014133726216589114, + "loss": 1.6796, + "step": 1545 + }, + { + "epoch": 0.1630801687763713, + "grad_norm": 0.9583049416542053, + "learning_rate": 0.0014132554535836732, + "loss": 1.6755, + "step": 1546 + }, + { + "epoch": 0.16318565400843882, + "grad_norm": 0.7104970216751099, + "learning_rate": 0.0014131382111873543, + "loss": 1.6988, + "step": 1547 + }, + { + "epoch": 0.16329113924050634, + "grad_norm": 0.9763946533203125, + "learning_rate": 0.0014130208944830923, + "loss": 1.7333, + "step": 1548 + }, + { + "epoch": 0.16339662447257383, + "grad_norm": 1.014245629310608, + "learning_rate": 0.0014129035034840325, + "loss": 1.7275, + "step": 1549 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.6507986783981323, + "learning_rate": 0.00141278603820333, + "loss": 1.6869, + "step": 1550 + }, + { + "epoch": 0.16360759493670887, + "grad_norm": 0.7726278305053711, + "learning_rate": 0.0014126684986541468, + "loss": 1.7103, + "step": 1551 + }, + { + "epoch": 0.16371308016877636, + "grad_norm": 0.6943826079368591, + "learning_rate": 0.0014125508848496539, + "loss": 1.7147, + "step": 1552 + }, + { + "epoch": 0.16381856540084389, + "grad_norm": 0.8619660139083862, + "learning_rate": 0.0014124331968030307, + "loss": 1.7513, + "step": 1553 + }, + { + "epoch": 0.1639240506329114, + "grad_norm": 0.8739969730377197, + "learning_rate": 0.0014123154345274645, + "loss": 1.6736, + "step": 1554 + }, + { + "epoch": 0.1640295358649789, + "grad_norm": 0.6566855907440186, + "learning_rate": 0.0014121975980361512, + "loss": 1.6847, + "step": 1555 + }, + { + "epoch": 0.16413502109704642, + "grad_norm": 0.8173077702522278, + "learning_rate": 0.0014120796873422952, + "loss": 1.7085, + "step": 1556 + }, + { + "epoch": 0.1642405063291139, + "grad_norm": 0.6923719048500061, + "learning_rate": 0.0014119617024591089, + "loss": 1.7331, + "step": 1557 + }, + { + "epoch": 0.16434599156118143, + "grad_norm": 0.7602263689041138, + "learning_rate": 0.0014118436433998127, + "loss": 1.6832, + "step": 1558 + }, + { + "epoch": 0.16445147679324895, + "grad_norm": 1.01948881149292, + "learning_rate": 0.0014117255101776362, + "loss": 1.6665, + "step": 1559 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.9326398968696594, + "learning_rate": 0.0014116073028058165, + "loss": 1.6901, + "step": 1560 + }, + { + "epoch": 0.16466244725738396, + "grad_norm": 0.7370006442070007, + "learning_rate": 0.0014114890212975997, + "loss": 1.6439, + "step": 1561 + }, + { + "epoch": 0.16476793248945149, + "grad_norm": 0.72734135389328, + "learning_rate": 0.0014113706656662393, + "loss": 1.7287, + "step": 1562 + }, + { + "epoch": 0.16487341772151898, + "grad_norm": 0.8433178663253784, + "learning_rate": 0.001411252235924998, + "loss": 1.6887, + "step": 1563 + }, + { + "epoch": 0.1649789029535865, + "grad_norm": 0.7561265826225281, + "learning_rate": 0.0014111337320871463, + "loss": 1.6864, + "step": 1564 + }, + { + "epoch": 0.16508438818565402, + "grad_norm": 0.657299280166626, + "learning_rate": 0.0014110151541659633, + "loss": 1.6513, + "step": 1565 + }, + { + "epoch": 0.1651898734177215, + "grad_norm": 0.7236863970756531, + "learning_rate": 0.0014108965021747363, + "loss": 1.7064, + "step": 1566 + }, + { + "epoch": 0.16529535864978903, + "grad_norm": 0.7223225235939026, + "learning_rate": 0.0014107777761267605, + "loss": 1.7014, + "step": 1567 + }, + { + "epoch": 0.16540084388185655, + "grad_norm": 0.6764310598373413, + "learning_rate": 0.00141065897603534, + "loss": 1.738, + "step": 1568 + }, + { + "epoch": 0.16550632911392404, + "grad_norm": 0.7217133045196533, + "learning_rate": 0.001410540101913787, + "loss": 1.6878, + "step": 1569 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.714729905128479, + "learning_rate": 0.0014104211537754217, + "loss": 1.6996, + "step": 1570 + }, + { + "epoch": 0.16571729957805909, + "grad_norm": 0.6701192855834961, + "learning_rate": 0.001410302131633573, + "loss": 1.671, + "step": 1571 + }, + { + "epoch": 0.16582278481012658, + "grad_norm": 0.7062249183654785, + "learning_rate": 0.0014101830355015778, + "loss": 1.7264, + "step": 1572 + }, + { + "epoch": 0.1659282700421941, + "grad_norm": 0.70981365442276, + "learning_rate": 0.0014100638653927816, + "loss": 1.7237, + "step": 1573 + }, + { + "epoch": 0.1660337552742616, + "grad_norm": 0.717598021030426, + "learning_rate": 0.0014099446213205378, + "loss": 1.6855, + "step": 1574 + }, + { + "epoch": 0.1661392405063291, + "grad_norm": 0.7935261130332947, + "learning_rate": 0.0014098253032982086, + "loss": 1.6765, + "step": 1575 + }, + { + "epoch": 0.16624472573839663, + "grad_norm": 0.668037474155426, + "learning_rate": 0.0014097059113391639, + "loss": 1.7171, + "step": 1576 + }, + { + "epoch": 0.16635021097046412, + "grad_norm": 0.7071298360824585, + "learning_rate": 0.0014095864454567821, + "loss": 1.7085, + "step": 1577 + }, + { + "epoch": 0.16645569620253164, + "grad_norm": 0.6713935732841492, + "learning_rate": 0.0014094669056644502, + "loss": 1.6812, + "step": 1578 + }, + { + "epoch": 0.16656118143459916, + "grad_norm": 0.7373342514038086, + "learning_rate": 0.001409347291975563, + "loss": 1.7415, + "step": 1579 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.9113540053367615, + "learning_rate": 0.001409227604403524, + "loss": 1.7032, + "step": 1580 + }, + { + "epoch": 0.16677215189873418, + "grad_norm": 0.7423143982887268, + "learning_rate": 0.0014091078429617448, + "loss": 1.7112, + "step": 1581 + }, + { + "epoch": 0.1668776371308017, + "grad_norm": 0.8553333282470703, + "learning_rate": 0.0014089880076636452, + "loss": 1.7099, + "step": 1582 + }, + { + "epoch": 0.1669831223628692, + "grad_norm": 1.0204716920852661, + "learning_rate": 0.0014088680985226533, + "loss": 1.6759, + "step": 1583 + }, + { + "epoch": 0.1670886075949367, + "grad_norm": 0.7075477242469788, + "learning_rate": 0.0014087481155522056, + "loss": 1.6773, + "step": 1584 + }, + { + "epoch": 0.16719409282700423, + "grad_norm": 0.7543619275093079, + "learning_rate": 0.0014086280587657467, + "loss": 1.7291, + "step": 1585 + }, + { + "epoch": 0.16729957805907172, + "grad_norm": 0.7863457202911377, + "learning_rate": 0.0014085079281767295, + "loss": 1.7105, + "step": 1586 + }, + { + "epoch": 0.16740506329113924, + "grad_norm": 1.0501266717910767, + "learning_rate": 0.0014083877237986153, + "loss": 1.6654, + "step": 1587 + }, + { + "epoch": 0.16751054852320676, + "grad_norm": 0.9072461128234863, + "learning_rate": 0.0014082674456448738, + "loss": 1.6964, + "step": 1588 + }, + { + "epoch": 0.16761603375527426, + "grad_norm": 0.6921250820159912, + "learning_rate": 0.0014081470937289827, + "loss": 1.7162, + "step": 1589 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.9605475068092346, + "learning_rate": 0.0014080266680644277, + "loss": 1.6803, + "step": 1590 + }, + { + "epoch": 0.16782700421940927, + "grad_norm": 1.1459786891937256, + "learning_rate": 0.0014079061686647033, + "loss": 1.6938, + "step": 1591 + }, + { + "epoch": 0.1679324894514768, + "grad_norm": 0.7685462832450867, + "learning_rate": 0.0014077855955433123, + "loss": 1.6699, + "step": 1592 + }, + { + "epoch": 0.1680379746835443, + "grad_norm": 0.7780585289001465, + "learning_rate": 0.001407664948713765, + "loss": 1.7172, + "step": 1593 + }, + { + "epoch": 0.1681434599156118, + "grad_norm": 1.1219998598098755, + "learning_rate": 0.001407544228189581, + "loss": 1.6954, + "step": 1594 + }, + { + "epoch": 0.16824894514767932, + "grad_norm": 0.7991330623626709, + "learning_rate": 0.0014074234339842874, + "loss": 1.7439, + "step": 1595 + }, + { + "epoch": 0.16835443037974684, + "grad_norm": 0.7747524380683899, + "learning_rate": 0.00140730256611142, + "loss": 1.7186, + "step": 1596 + }, + { + "epoch": 0.16845991561181434, + "grad_norm": 0.8412891626358032, + "learning_rate": 0.001407181624584522, + "loss": 1.6901, + "step": 1597 + }, + { + "epoch": 0.16856540084388186, + "grad_norm": 0.8173542022705078, + "learning_rate": 0.0014070606094171464, + "loss": 1.7083, + "step": 1598 + }, + { + "epoch": 0.16867088607594938, + "grad_norm": 0.814456582069397, + "learning_rate": 0.0014069395206228528, + "loss": 1.6946, + "step": 1599 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.8211082816123962, + "learning_rate": 0.0014068183582152103, + "loss": 1.7236, + "step": 1600 + }, + { + "epoch": 0.1688818565400844, + "grad_norm": 0.8397409915924072, + "learning_rate": 0.0014066971222077955, + "loss": 1.6699, + "step": 1601 + }, + { + "epoch": 0.1689873417721519, + "grad_norm": 0.6999192833900452, + "learning_rate": 0.0014065758126141938, + "loss": 1.7349, + "step": 1602 + }, + { + "epoch": 0.1690928270042194, + "grad_norm": 0.9001176953315735, + "learning_rate": 0.0014064544294479981, + "loss": 1.7181, + "step": 1603 + }, + { + "epoch": 0.16919831223628692, + "grad_norm": 0.7867156267166138, + "learning_rate": 0.0014063329727228102, + "loss": 1.675, + "step": 1604 + }, + { + "epoch": 0.16930379746835442, + "grad_norm": 0.8208297491073608, + "learning_rate": 0.0014062114424522397, + "loss": 1.6777, + "step": 1605 + }, + { + "epoch": 0.16940928270042194, + "grad_norm": 0.7368332147598267, + "learning_rate": 0.0014060898386499053, + "loss": 1.7182, + "step": 1606 + }, + { + "epoch": 0.16951476793248946, + "grad_norm": 0.8782017827033997, + "learning_rate": 0.0014059681613294327, + "loss": 1.7336, + "step": 1607 + }, + { + "epoch": 0.16962025316455695, + "grad_norm": 1.0875483751296997, + "learning_rate": 0.0014058464105044567, + "loss": 1.68, + "step": 1608 + }, + { + "epoch": 0.16972573839662447, + "grad_norm": 0.8326412439346313, + "learning_rate": 0.0014057245861886201, + "loss": 1.6939, + "step": 1609 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.7252690196037292, + "learning_rate": 0.001405602688395574, + "loss": 1.71, + "step": 1610 + }, + { + "epoch": 0.16993670886075948, + "grad_norm": 0.7404804825782776, + "learning_rate": 0.0014054807171389773, + "loss": 1.6401, + "step": 1611 + }, + { + "epoch": 0.170042194092827, + "grad_norm": 0.8364023566246033, + "learning_rate": 0.001405358672432498, + "loss": 1.6264, + "step": 1612 + }, + { + "epoch": 0.17014767932489452, + "grad_norm": 0.8776815533638, + "learning_rate": 0.0014052365542898111, + "loss": 1.6653, + "step": 1613 + }, + { + "epoch": 0.17025316455696202, + "grad_norm": 0.8269844055175781, + "learning_rate": 0.0014051143627246015, + "loss": 1.7005, + "step": 1614 + }, + { + "epoch": 0.17035864978902954, + "grad_norm": 0.7105354070663452, + "learning_rate": 0.0014049920977505608, + "loss": 1.6727, + "step": 1615 + }, + { + "epoch": 0.17046413502109706, + "grad_norm": 0.705833375453949, + "learning_rate": 0.0014048697593813891, + "loss": 1.6927, + "step": 1616 + }, + { + "epoch": 0.17056962025316455, + "grad_norm": 0.8713106513023376, + "learning_rate": 0.0014047473476307955, + "loss": 1.6722, + "step": 1617 + }, + { + "epoch": 0.17067510548523207, + "grad_norm": 0.9303885698318481, + "learning_rate": 0.001404624862512497, + "loss": 1.6652, + "step": 1618 + }, + { + "epoch": 0.1707805907172996, + "grad_norm": 0.750637948513031, + "learning_rate": 0.001404502304040218, + "loss": 1.685, + "step": 1619 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6825612783432007, + "learning_rate": 0.0014043796722276924, + "loss": 1.6911, + "step": 1620 + }, + { + "epoch": 0.1709915611814346, + "grad_norm": 0.7913682460784912, + "learning_rate": 0.0014042569670886615, + "loss": 1.6803, + "step": 1621 + }, + { + "epoch": 0.1710970464135021, + "grad_norm": 0.8564597368240356, + "learning_rate": 0.0014041341886368752, + "loss": 1.7348, + "step": 1622 + }, + { + "epoch": 0.17120253164556962, + "grad_norm": 0.6719011068344116, + "learning_rate": 0.0014040113368860908, + "loss": 1.6878, + "step": 1623 + }, + { + "epoch": 0.17130801687763714, + "grad_norm": 1.1054884195327759, + "learning_rate": 0.0014038884118500754, + "loss": 1.6636, + "step": 1624 + }, + { + "epoch": 0.17141350210970463, + "grad_norm": 1.1051424741744995, + "learning_rate": 0.0014037654135426025, + "loss": 1.721, + "step": 1625 + }, + { + "epoch": 0.17151898734177215, + "grad_norm": 0.6447979807853699, + "learning_rate": 0.0014036423419774551, + "loss": 1.6505, + "step": 1626 + }, + { + "epoch": 0.17162447257383967, + "grad_norm": 0.7497978210449219, + "learning_rate": 0.0014035191971684242, + "loss": 1.6669, + "step": 1627 + }, + { + "epoch": 0.17172995780590716, + "grad_norm": 0.7319137454032898, + "learning_rate": 0.0014033959791293082, + "loss": 1.6746, + "step": 1628 + }, + { + "epoch": 0.17183544303797468, + "grad_norm": 0.9076715707778931, + "learning_rate": 0.0014032726878739148, + "loss": 1.6952, + "step": 1629 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.9494011998176575, + "learning_rate": 0.0014031493234160591, + "loss": 1.6675, + "step": 1630 + }, + { + "epoch": 0.1720464135021097, + "grad_norm": 0.8354353904724121, + "learning_rate": 0.001403025885769565, + "loss": 1.7045, + "step": 1631 + }, + { + "epoch": 0.17215189873417722, + "grad_norm": 0.6961443424224854, + "learning_rate": 0.001402902374948264, + "loss": 1.6918, + "step": 1632 + }, + { + "epoch": 0.17225738396624474, + "grad_norm": 0.8050990700721741, + "learning_rate": 0.0014027787909659962, + "loss": 1.6866, + "step": 1633 + }, + { + "epoch": 0.17236286919831223, + "grad_norm": 0.7572201490402222, + "learning_rate": 0.0014026551338366098, + "loss": 1.6888, + "step": 1634 + }, + { + "epoch": 0.17246835443037975, + "grad_norm": 0.7290216684341431, + "learning_rate": 0.0014025314035739614, + "loss": 1.6759, + "step": 1635 + }, + { + "epoch": 0.17257383966244727, + "grad_norm": 0.8614826798439026, + "learning_rate": 0.001402407600191915, + "loss": 1.6788, + "step": 1636 + }, + { + "epoch": 0.17267932489451476, + "grad_norm": 1.42624032497406, + "learning_rate": 0.0014022837237043441, + "loss": 1.6958, + "step": 1637 + }, + { + "epoch": 0.17278481012658228, + "grad_norm": 0.8827770352363586, + "learning_rate": 0.0014021597741251295, + "loss": 1.7024, + "step": 1638 + }, + { + "epoch": 0.17289029535864978, + "grad_norm": 0.7573687434196472, + "learning_rate": 0.00140203575146816, + "loss": 1.6927, + "step": 1639 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.9886500835418701, + "learning_rate": 0.0014019116557473332, + "loss": 1.6578, + "step": 1640 + }, + { + "epoch": 0.17310126582278482, + "grad_norm": 0.6978749632835388, + "learning_rate": 0.0014017874869765548, + "loss": 1.6618, + "step": 1641 + }, + { + "epoch": 0.1732067510548523, + "grad_norm": 0.8279091119766235, + "learning_rate": 0.0014016632451697383, + "loss": 1.6591, + "step": 1642 + }, + { + "epoch": 0.17331223628691983, + "grad_norm": 1.162722110748291, + "learning_rate": 0.0014015389303408058, + "loss": 1.6759, + "step": 1643 + }, + { + "epoch": 0.17341772151898735, + "grad_norm": 0.8606261014938354, + "learning_rate": 0.001401414542503687, + "loss": 1.6718, + "step": 1644 + }, + { + "epoch": 0.17352320675105484, + "grad_norm": 0.7485703229904175, + "learning_rate": 0.001401290081672321, + "loss": 1.6937, + "step": 1645 + }, + { + "epoch": 0.17362869198312236, + "grad_norm": 0.6540822386741638, + "learning_rate": 0.0014011655478606531, + "loss": 1.6745, + "step": 1646 + }, + { + "epoch": 0.17373417721518988, + "grad_norm": 0.7924497723579407, + "learning_rate": 0.001401040941082639, + "loss": 1.6857, + "step": 1647 + }, + { + "epoch": 0.17383966244725738, + "grad_norm": 0.943410336971283, + "learning_rate": 0.001400916261352241, + "loss": 1.6629, + "step": 1648 + }, + { + "epoch": 0.1739451476793249, + "grad_norm": 0.6667643785476685, + "learning_rate": 0.00140079150868343, + "loss": 1.713, + "step": 1649 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.0199024677276611, + "learning_rate": 0.0014006666830901854, + "loss": 1.6912, + "step": 1650 + }, + { + "epoch": 0.1741561181434599, + "grad_norm": 0.8633936643600464, + "learning_rate": 0.0014005417845864945, + "loss": 1.6967, + "step": 1651 + }, + { + "epoch": 0.17426160337552743, + "grad_norm": 0.7102012038230896, + "learning_rate": 0.0014004168131863525, + "loss": 1.6596, + "step": 1652 + }, + { + "epoch": 0.17436708860759495, + "grad_norm": 0.9194730520248413, + "learning_rate": 0.0014002917689037637, + "loss": 1.6535, + "step": 1653 + }, + { + "epoch": 0.17447257383966244, + "grad_norm": 0.8975662589073181, + "learning_rate": 0.0014001666517527392, + "loss": 1.7028, + "step": 1654 + }, + { + "epoch": 0.17457805907172996, + "grad_norm": 0.7649430632591248, + "learning_rate": 0.0014000414617472996, + "loss": 1.6891, + "step": 1655 + }, + { + "epoch": 0.17468354430379746, + "grad_norm": 0.7446131706237793, + "learning_rate": 0.0013999161989014725, + "loss": 1.724, + "step": 1656 + }, + { + "epoch": 0.17478902953586498, + "grad_norm": 0.7410969734191895, + "learning_rate": 0.0013997908632292948, + "loss": 1.664, + "step": 1657 + }, + { + "epoch": 0.1748945147679325, + "grad_norm": 0.9291269779205322, + "learning_rate": 0.0013996654547448106, + "loss": 1.6697, + "step": 1658 + }, + { + "epoch": 0.175, + "grad_norm": 0.8407784700393677, + "learning_rate": 0.0013995399734620729, + "loss": 1.682, + "step": 1659 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.7539697885513306, + "learning_rate": 0.001399414419395142, + "loss": 1.6637, + "step": 1660 + }, + { + "epoch": 0.17521097046413503, + "grad_norm": 0.7751826643943787, + "learning_rate": 0.0013992887925580874, + "loss": 1.7153, + "step": 1661 + }, + { + "epoch": 0.17531645569620252, + "grad_norm": 0.7759808897972107, + "learning_rate": 0.0013991630929649857, + "loss": 1.6836, + "step": 1662 + }, + { + "epoch": 0.17542194092827004, + "grad_norm": 1.4682105779647827, + "learning_rate": 0.0013990373206299225, + "loss": 1.6725, + "step": 1663 + }, + { + "epoch": 0.17552742616033756, + "grad_norm": 0.7585763335227966, + "learning_rate": 0.0013989114755669912, + "loss": 1.6869, + "step": 1664 + }, + { + "epoch": 0.17563291139240506, + "grad_norm": 1.5893713235855103, + "learning_rate": 0.001398785557790293, + "loss": 1.6999, + "step": 1665 + }, + { + "epoch": 0.17573839662447258, + "grad_norm": 0.7699084281921387, + "learning_rate": 0.0013986595673139382, + "loss": 1.7257, + "step": 1666 + }, + { + "epoch": 0.1758438818565401, + "grad_norm": 1.4923624992370605, + "learning_rate": 0.0013985335041520443, + "loss": 1.711, + "step": 1667 + }, + { + "epoch": 0.1759493670886076, + "grad_norm": 0.7988854646682739, + "learning_rate": 0.0013984073683187374, + "loss": 1.7096, + "step": 1668 + }, + { + "epoch": 0.1760548523206751, + "grad_norm": 1.2062352895736694, + "learning_rate": 0.0013982811598281517, + "loss": 1.6681, + "step": 1669 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 1.0232994556427002, + "learning_rate": 0.0013981548786944293, + "loss": 1.6484, + "step": 1670 + }, + { + "epoch": 0.17626582278481012, + "grad_norm": 0.9452105164527893, + "learning_rate": 0.0013980285249317209, + "loss": 1.6457, + "step": 1671 + }, + { + "epoch": 0.17637130801687764, + "grad_norm": 0.7257751226425171, + "learning_rate": 0.0013979020985541847, + "loss": 1.6788, + "step": 1672 + }, + { + "epoch": 0.17647679324894514, + "grad_norm": 0.8554603457450867, + "learning_rate": 0.0013977755995759876, + "loss": 1.6954, + "step": 1673 + }, + { + "epoch": 0.17658227848101266, + "grad_norm": 0.8139756917953491, + "learning_rate": 0.0013976490280113048, + "loss": 1.6818, + "step": 1674 + }, + { + "epoch": 0.17668776371308018, + "grad_norm": 0.7430441379547119, + "learning_rate": 0.0013975223838743188, + "loss": 1.6669, + "step": 1675 + }, + { + "epoch": 0.17679324894514767, + "grad_norm": 0.8491414785385132, + "learning_rate": 0.0013973956671792206, + "loss": 1.694, + "step": 1676 + }, + { + "epoch": 0.1768987341772152, + "grad_norm": 0.697247326374054, + "learning_rate": 0.00139726887794021, + "loss": 1.6313, + "step": 1677 + }, + { + "epoch": 0.1770042194092827, + "grad_norm": 0.9169026613235474, + "learning_rate": 0.001397142016171494, + "loss": 1.7116, + "step": 1678 + }, + { + "epoch": 0.1771097046413502, + "grad_norm": 0.7733203768730164, + "learning_rate": 0.0013970150818872881, + "loss": 1.6492, + "step": 1679 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.8478755354881287, + "learning_rate": 0.0013968880751018158, + "loss": 1.6822, + "step": 1680 + }, + { + "epoch": 0.17732067510548524, + "grad_norm": 1.455599069595337, + "learning_rate": 0.0013967609958293091, + "loss": 1.6761, + "step": 1681 + }, + { + "epoch": 0.17742616033755274, + "grad_norm": 0.7645924687385559, + "learning_rate": 0.001396633844084008, + "loss": 1.67, + "step": 1682 + }, + { + "epoch": 0.17753164556962026, + "grad_norm": 1.6062300205230713, + "learning_rate": 0.00139650661988016, + "loss": 1.7251, + "step": 1683 + }, + { + "epoch": 0.17763713080168778, + "grad_norm": 0.8164054751396179, + "learning_rate": 0.0013963793232320216, + "loss": 1.7029, + "step": 1684 + }, + { + "epoch": 0.17774261603375527, + "grad_norm": 1.451500654220581, + "learning_rate": 0.0013962519541538569, + "loss": 1.6418, + "step": 1685 + }, + { + "epoch": 0.1778481012658228, + "grad_norm": 0.8783779740333557, + "learning_rate": 0.001396124512659938, + "loss": 1.6358, + "step": 1686 + }, + { + "epoch": 0.17795358649789028, + "grad_norm": 1.9689642190933228, + "learning_rate": 0.001395996998764546, + "loss": 1.7103, + "step": 1687 + }, + { + "epoch": 0.1780590717299578, + "grad_norm": 1.4904247522354126, + "learning_rate": 0.0013958694124819688, + "loss": 1.6296, + "step": 1688 + }, + { + "epoch": 0.17816455696202532, + "grad_norm": 1.8891215324401855, + "learning_rate": 0.0013957417538265032, + "loss": 1.7151, + "step": 1689 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 2.1127071380615234, + "learning_rate": 0.0013956140228124545, + "loss": 1.6839, + "step": 1690 + }, + { + "epoch": 0.17837552742616034, + "grad_norm": 0.8862119913101196, + "learning_rate": 0.001395486219454135, + "loss": 1.7307, + "step": 1691 + }, + { + "epoch": 0.17848101265822786, + "grad_norm": 1.4286534786224365, + "learning_rate": 0.0013953583437658658, + "loss": 1.6903, + "step": 1692 + }, + { + "epoch": 0.17858649789029535, + "grad_norm": 0.8528317213058472, + "learning_rate": 0.0013952303957619763, + "loss": 1.6597, + "step": 1693 + }, + { + "epoch": 0.17869198312236287, + "grad_norm": 0.9875421524047852, + "learning_rate": 0.0013951023754568035, + "loss": 1.6663, + "step": 1694 + }, + { + "epoch": 0.1787974683544304, + "grad_norm": 1.0061475038528442, + "learning_rate": 0.001394974282864693, + "loss": 1.6573, + "step": 1695 + }, + { + "epoch": 0.17890295358649788, + "grad_norm": 0.776001513004303, + "learning_rate": 0.0013948461179999977, + "loss": 1.647, + "step": 1696 + }, + { + "epoch": 0.1790084388185654, + "grad_norm": 1.1073856353759766, + "learning_rate": 0.0013947178808770794, + "loss": 1.6636, + "step": 1697 + }, + { + "epoch": 0.17911392405063292, + "grad_norm": 0.6581562757492065, + "learning_rate": 0.0013945895715103077, + "loss": 1.6778, + "step": 1698 + }, + { + "epoch": 0.17921940928270041, + "grad_norm": 1.1530848741531372, + "learning_rate": 0.0013944611899140604, + "loss": 1.7221, + "step": 1699 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.7026259303092957, + "learning_rate": 0.0013943327361027231, + "loss": 1.6923, + "step": 1700 + }, + { + "epoch": 0.17943037974683546, + "grad_norm": 1.2047683000564575, + "learning_rate": 0.0013942042100906899, + "loss": 1.6652, + "step": 1701 + }, + { + "epoch": 0.17953586497890295, + "grad_norm": 0.9628404974937439, + "learning_rate": 0.0013940756118923626, + "loss": 1.6619, + "step": 1702 + }, + { + "epoch": 0.17964135021097047, + "grad_norm": 0.7761884331703186, + "learning_rate": 0.0013939469415221513, + "loss": 1.6282, + "step": 1703 + }, + { + "epoch": 0.17974683544303796, + "grad_norm": 0.9200721383094788, + "learning_rate": 0.0013938181989944741, + "loss": 1.6536, + "step": 1704 + }, + { + "epoch": 0.17985232067510548, + "grad_norm": 0.8476955890655518, + "learning_rate": 0.0013936893843237573, + "loss": 1.6708, + "step": 1705 + }, + { + "epoch": 0.179957805907173, + "grad_norm": 0.685566782951355, + "learning_rate": 0.0013935604975244356, + "loss": 1.6894, + "step": 1706 + }, + { + "epoch": 0.1800632911392405, + "grad_norm": 0.9651691913604736, + "learning_rate": 0.0013934315386109509, + "loss": 1.6311, + "step": 1707 + }, + { + "epoch": 0.18016877637130801, + "grad_norm": 0.8870660662651062, + "learning_rate": 0.0013933025075977539, + "loss": 1.6339, + "step": 1708 + }, + { + "epoch": 0.18027426160337554, + "grad_norm": 0.7023838758468628, + "learning_rate": 0.0013931734044993031, + "loss": 1.642, + "step": 1709 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.9655669331550598, + "learning_rate": 0.0013930442293300649, + "loss": 1.7025, + "step": 1710 + }, + { + "epoch": 0.18048523206751055, + "grad_norm": 0.8400550484657288, + "learning_rate": 0.0013929149821045148, + "loss": 1.6903, + "step": 1711 + }, + { + "epoch": 0.18059071729957807, + "grad_norm": 0.7707672119140625, + "learning_rate": 0.0013927856628371347, + "loss": 1.7041, + "step": 1712 + }, + { + "epoch": 0.18069620253164556, + "grad_norm": 1.0762255191802979, + "learning_rate": 0.0013926562715424159, + "loss": 1.6513, + "step": 1713 + }, + { + "epoch": 0.18080168776371308, + "grad_norm": 1.006707787513733, + "learning_rate": 0.0013925268082348576, + "loss": 1.6463, + "step": 1714 + }, + { + "epoch": 0.1809071729957806, + "grad_norm": 0.6881898045539856, + "learning_rate": 0.0013923972729289662, + "loss": 1.6332, + "step": 1715 + }, + { + "epoch": 0.1810126582278481, + "grad_norm": 1.0833543539047241, + "learning_rate": 0.0013922676656392572, + "loss": 1.664, + "step": 1716 + }, + { + "epoch": 0.18111814345991561, + "grad_norm": 0.8344457149505615, + "learning_rate": 0.0013921379863802536, + "loss": 1.6302, + "step": 1717 + }, + { + "epoch": 0.18122362869198314, + "grad_norm": 0.7221484780311584, + "learning_rate": 0.0013920082351664867, + "loss": 1.6544, + "step": 1718 + }, + { + "epoch": 0.18132911392405063, + "grad_norm": 0.7345021367073059, + "learning_rate": 0.0013918784120124956, + "loss": 1.6662, + "step": 1719 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.7035703659057617, + "learning_rate": 0.0013917485169328279, + "loss": 1.6913, + "step": 1720 + }, + { + "epoch": 0.18154008438818564, + "grad_norm": 0.8721500635147095, + "learning_rate": 0.0013916185499420386, + "loss": 1.6526, + "step": 1721 + }, + { + "epoch": 0.18164556962025316, + "grad_norm": 0.9745742678642273, + "learning_rate": 0.0013914885110546916, + "loss": 1.6499, + "step": 1722 + }, + { + "epoch": 0.18175105485232068, + "grad_norm": 0.6547992825508118, + "learning_rate": 0.001391358400285358, + "loss": 1.6478, + "step": 1723 + }, + { + "epoch": 0.18185654008438817, + "grad_norm": 1.0724048614501953, + "learning_rate": 0.0013912282176486177, + "loss": 1.6436, + "step": 1724 + }, + { + "epoch": 0.1819620253164557, + "grad_norm": 0.8480265736579895, + "learning_rate": 0.0013910979631590581, + "loss": 1.6534, + "step": 1725 + }, + { + "epoch": 0.18206751054852321, + "grad_norm": 0.6986917853355408, + "learning_rate": 0.001390967636831275, + "loss": 1.691, + "step": 1726 + }, + { + "epoch": 0.1821729957805907, + "grad_norm": 0.7404647469520569, + "learning_rate": 0.0013908372386798717, + "loss": 1.6564, + "step": 1727 + }, + { + "epoch": 0.18227848101265823, + "grad_norm": 0.7808349132537842, + "learning_rate": 0.0013907067687194607, + "loss": 1.6598, + "step": 1728 + }, + { + "epoch": 0.18238396624472575, + "grad_norm": 0.745953381061554, + "learning_rate": 0.0013905762269646614, + "loss": 1.6822, + "step": 1729 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.6797197461128235, + "learning_rate": 0.0013904456134301016, + "loss": 1.6984, + "step": 1730 + }, + { + "epoch": 0.18259493670886076, + "grad_norm": 0.8420961499214172, + "learning_rate": 0.001390314928130417, + "loss": 1.6524, + "step": 1731 + }, + { + "epoch": 0.18270042194092828, + "grad_norm": 0.6611125469207764, + "learning_rate": 0.0013901841710802522, + "loss": 1.649, + "step": 1732 + }, + { + "epoch": 0.18280590717299577, + "grad_norm": 0.7673959732055664, + "learning_rate": 0.0013900533422942585, + "loss": 1.6593, + "step": 1733 + }, + { + "epoch": 0.1829113924050633, + "grad_norm": 0.70577472448349, + "learning_rate": 0.0013899224417870963, + "loss": 1.6604, + "step": 1734 + }, + { + "epoch": 0.18301687763713081, + "grad_norm": 0.7298133373260498, + "learning_rate": 0.0013897914695734336, + "loss": 1.6535, + "step": 1735 + }, + { + "epoch": 0.1831223628691983, + "grad_norm": 0.7327473163604736, + "learning_rate": 0.0013896604256679462, + "loss": 1.698, + "step": 1736 + }, + { + "epoch": 0.18322784810126583, + "grad_norm": 0.7476534247398376, + "learning_rate": 0.0013895293100853188, + "loss": 1.6523, + "step": 1737 + }, + { + "epoch": 0.18333333333333332, + "grad_norm": 0.6787974834442139, + "learning_rate": 0.001389398122840243, + "loss": 1.6707, + "step": 1738 + }, + { + "epoch": 0.18343881856540084, + "grad_norm": 0.7723780870437622, + "learning_rate": 0.0013892668639474194, + "loss": 1.6986, + "step": 1739 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.9879834055900574, + "learning_rate": 0.0013891355334215562, + "loss": 1.6846, + "step": 1740 + }, + { + "epoch": 0.18364978902953585, + "grad_norm": 0.7187525629997253, + "learning_rate": 0.001389004131277369, + "loss": 1.6818, + "step": 1741 + }, + { + "epoch": 0.18375527426160337, + "grad_norm": 0.751211404800415, + "learning_rate": 0.0013888726575295826, + "loss": 1.6735, + "step": 1742 + }, + { + "epoch": 0.1838607594936709, + "grad_norm": 0.799956202507019, + "learning_rate": 0.0013887411121929294, + "loss": 1.6726, + "step": 1743 + }, + { + "epoch": 0.1839662447257384, + "grad_norm": 0.8534452319145203, + "learning_rate": 0.0013886094952821496, + "loss": 1.6993, + "step": 1744 + }, + { + "epoch": 0.1840717299578059, + "grad_norm": 0.6849446296691895, + "learning_rate": 0.0013884778068119913, + "loss": 1.6743, + "step": 1745 + }, + { + "epoch": 0.18417721518987343, + "grad_norm": 0.7724353075027466, + "learning_rate": 0.0013883460467972108, + "loss": 1.6948, + "step": 1746 + }, + { + "epoch": 0.18428270042194092, + "grad_norm": 0.8062916398048401, + "learning_rate": 0.0013882142152525732, + "loss": 1.7163, + "step": 1747 + }, + { + "epoch": 0.18438818565400844, + "grad_norm": 0.7523599863052368, + "learning_rate": 0.0013880823121928498, + "loss": 1.6643, + "step": 1748 + }, + { + "epoch": 0.18449367088607596, + "grad_norm": 0.7420204281806946, + "learning_rate": 0.0013879503376328219, + "loss": 1.7086, + "step": 1749 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.8395634293556213, + "learning_rate": 0.0013878182915872776, + "loss": 1.7018, + "step": 1750 + }, + { + "epoch": 0.18470464135021097, + "grad_norm": 0.7595760822296143, + "learning_rate": 0.001387686174071013, + "loss": 1.6676, + "step": 1751 + }, + { + "epoch": 0.1848101265822785, + "grad_norm": 0.780832827091217, + "learning_rate": 0.001387553985098833, + "loss": 1.6837, + "step": 1752 + }, + { + "epoch": 0.184915611814346, + "grad_norm": 0.8465923070907593, + "learning_rate": 0.0013874217246855499, + "loss": 1.6899, + "step": 1753 + }, + { + "epoch": 0.1850210970464135, + "grad_norm": 0.874474287033081, + "learning_rate": 0.001387289392845984, + "loss": 1.6782, + "step": 1754 + }, + { + "epoch": 0.185126582278481, + "grad_norm": 0.6978711485862732, + "learning_rate": 0.0013871569895949635, + "loss": 1.6747, + "step": 1755 + }, + { + "epoch": 0.18523206751054852, + "grad_norm": 0.8222792148590088, + "learning_rate": 0.0013870245149473256, + "loss": 1.65, + "step": 1756 + }, + { + "epoch": 0.18533755274261604, + "grad_norm": 0.8474301695823669, + "learning_rate": 0.0013868919689179143, + "loss": 1.6701, + "step": 1757 + }, + { + "epoch": 0.18544303797468353, + "grad_norm": 0.7021660208702087, + "learning_rate": 0.001386759351521582, + "loss": 1.6733, + "step": 1758 + }, + { + "epoch": 0.18554852320675105, + "grad_norm": 1.0447235107421875, + "learning_rate": 0.0013866266627731892, + "loss": 1.6912, + "step": 1759 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.8358891606330872, + "learning_rate": 0.001386493902687604, + "loss": 1.6194, + "step": 1760 + }, + { + "epoch": 0.18575949367088607, + "grad_norm": 0.683719277381897, + "learning_rate": 0.0013863610712797035, + "loss": 1.67, + "step": 1761 + }, + { + "epoch": 0.1858649789029536, + "grad_norm": 0.6804810166358948, + "learning_rate": 0.0013862281685643716, + "loss": 1.6896, + "step": 1762 + }, + { + "epoch": 0.1859704641350211, + "grad_norm": 0.6657385230064392, + "learning_rate": 0.001386095194556501, + "loss": 1.6651, + "step": 1763 + }, + { + "epoch": 0.1860759493670886, + "grad_norm": 0.7224509716033936, + "learning_rate": 0.001385962149270992, + "loss": 1.653, + "step": 1764 + }, + { + "epoch": 0.18618143459915612, + "grad_norm": 0.689156174659729, + "learning_rate": 0.001385829032722753, + "loss": 1.662, + "step": 1765 + }, + { + "epoch": 0.18628691983122364, + "grad_norm": 0.7101447582244873, + "learning_rate": 0.0013856958449267002, + "loss": 1.6758, + "step": 1766 + }, + { + "epoch": 0.18639240506329113, + "grad_norm": 0.8612897396087646, + "learning_rate": 0.0013855625858977584, + "loss": 1.6945, + "step": 1767 + }, + { + "epoch": 0.18649789029535865, + "grad_norm": 0.9294751882553101, + "learning_rate": 0.0013854292556508593, + "loss": 1.6482, + "step": 1768 + }, + { + "epoch": 0.18660337552742617, + "grad_norm": 0.7934178709983826, + "learning_rate": 0.0013852958542009438, + "loss": 1.6681, + "step": 1769 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.7012536525726318, + "learning_rate": 0.00138516238156296, + "loss": 1.6844, + "step": 1770 + }, + { + "epoch": 0.1868143459915612, + "grad_norm": 0.8752344846725464, + "learning_rate": 0.001385028837751864, + "loss": 1.6791, + "step": 1771 + }, + { + "epoch": 0.18691983122362868, + "grad_norm": 0.7413762807846069, + "learning_rate": 0.0013848952227826202, + "loss": 1.6817, + "step": 1772 + }, + { + "epoch": 0.1870253164556962, + "grad_norm": 0.6752062439918518, + "learning_rate": 0.0013847615366702009, + "loss": 1.6605, + "step": 1773 + }, + { + "epoch": 0.18713080168776372, + "grad_norm": 0.8091282844543457, + "learning_rate": 0.001384627779429586, + "loss": 1.7108, + "step": 1774 + }, + { + "epoch": 0.1872362869198312, + "grad_norm": 0.7532191276550293, + "learning_rate": 0.0013844939510757642, + "loss": 1.7139, + "step": 1775 + }, + { + "epoch": 0.18734177215189873, + "grad_norm": 0.721275806427002, + "learning_rate": 0.0013843600516237312, + "loss": 1.6801, + "step": 1776 + }, + { + "epoch": 0.18744725738396625, + "grad_norm": 0.934272050857544, + "learning_rate": 0.001384226081088491, + "loss": 1.652, + "step": 1777 + }, + { + "epoch": 0.18755274261603375, + "grad_norm": 0.7672579884529114, + "learning_rate": 0.001384092039485056, + "loss": 1.6114, + "step": 1778 + }, + { + "epoch": 0.18765822784810127, + "grad_norm": 0.6750036478042603, + "learning_rate": 0.0013839579268284461, + "loss": 1.671, + "step": 1779 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.9007228016853333, + "learning_rate": 0.0013838237431336895, + "loss": 1.6609, + "step": 1780 + }, + { + "epoch": 0.18786919831223628, + "grad_norm": 0.9701758027076721, + "learning_rate": 0.0013836894884158217, + "loss": 1.67, + "step": 1781 + }, + { + "epoch": 0.1879746835443038, + "grad_norm": 0.6899674534797668, + "learning_rate": 0.001383555162689887, + "loss": 1.7222, + "step": 1782 + }, + { + "epoch": 0.18808016877637132, + "grad_norm": 1.0132606029510498, + "learning_rate": 0.001383420765970937, + "loss": 1.6671, + "step": 1783 + }, + { + "epoch": 0.1881856540084388, + "grad_norm": 0.7458951473236084, + "learning_rate": 0.0013832862982740318, + "loss": 1.6494, + "step": 1784 + }, + { + "epoch": 0.18829113924050633, + "grad_norm": 1.0472551584243774, + "learning_rate": 0.001383151759614239, + "loss": 1.6774, + "step": 1785 + }, + { + "epoch": 0.18839662447257383, + "grad_norm": 1.7017607688903809, + "learning_rate": 0.0013830171500066343, + "loss": 1.6622, + "step": 1786 + }, + { + "epoch": 0.18850210970464135, + "grad_norm": 0.8456632494926453, + "learning_rate": 0.0013828824694663013, + "loss": 1.6838, + "step": 1787 + }, + { + "epoch": 0.18860759493670887, + "grad_norm": 1.3676953315734863, + "learning_rate": 0.001382747718008332, + "loss": 1.6926, + "step": 1788 + }, + { + "epoch": 0.18871308016877636, + "grad_norm": 1.0841717720031738, + "learning_rate": 0.0013826128956478255, + "loss": 1.6854, + "step": 1789 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 2.163360357284546, + "learning_rate": 0.0013824780023998899, + "loss": 1.6836, + "step": 1790 + }, + { + "epoch": 0.1889240506329114, + "grad_norm": 1.9520195722579956, + "learning_rate": 0.0013823430382796402, + "loss": 1.6607, + "step": 1791 + }, + { + "epoch": 0.1890295358649789, + "grad_norm": 1.3710353374481201, + "learning_rate": 0.0013822080033021997, + "loss": 1.6829, + "step": 1792 + }, + { + "epoch": 0.1891350210970464, + "grad_norm": 1.3712373971939087, + "learning_rate": 0.0013820728974827, + "loss": 1.7035, + "step": 1793 + }, + { + "epoch": 0.18924050632911393, + "grad_norm": 1.22395658493042, + "learning_rate": 0.0013819377208362806, + "loss": 1.743, + "step": 1794 + }, + { + "epoch": 0.18934599156118143, + "grad_norm": 0.9773948192596436, + "learning_rate": 0.0013818024733780881, + "loss": 1.6579, + "step": 1795 + }, + { + "epoch": 0.18945147679324895, + "grad_norm": 1.6021243333816528, + "learning_rate": 0.0013816671551232782, + "loss": 1.7021, + "step": 1796 + }, + { + "epoch": 0.18955696202531647, + "grad_norm": 1.0090693235397339, + "learning_rate": 0.0013815317660870138, + "loss": 1.6496, + "step": 1797 + }, + { + "epoch": 0.18966244725738396, + "grad_norm": 1.637266755104065, + "learning_rate": 0.001381396306284466, + "loss": 1.6665, + "step": 1798 + }, + { + "epoch": 0.18976793248945148, + "grad_norm": 1.3539154529571533, + "learning_rate": 0.0013812607757308134, + "loss": 1.6599, + "step": 1799 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 1.2168667316436768, + "learning_rate": 0.0013811251744412431, + "loss": 1.6492, + "step": 1800 + }, + { + "epoch": 0.1899789029535865, + "grad_norm": 1.1722478866577148, + "learning_rate": 0.0013809895024309501, + "loss": 1.6094, + "step": 1801 + }, + { + "epoch": 0.190084388185654, + "grad_norm": 1.181887149810791, + "learning_rate": 0.001380853759715137, + "loss": 1.6352, + "step": 1802 + }, + { + "epoch": 0.1901898734177215, + "grad_norm": 1.0886744260787964, + "learning_rate": 0.0013807179463090143, + "loss": 1.6522, + "step": 1803 + }, + { + "epoch": 0.19029535864978903, + "grad_norm": 1.477313756942749, + "learning_rate": 0.0013805820622278008, + "loss": 1.6437, + "step": 1804 + }, + { + "epoch": 0.19040084388185655, + "grad_norm": 1.1741431951522827, + "learning_rate": 0.0013804461074867227, + "loss": 1.6654, + "step": 1805 + }, + { + "epoch": 0.19050632911392404, + "grad_norm": 1.2774296998977661, + "learning_rate": 0.0013803100821010146, + "loss": 1.6517, + "step": 1806 + }, + { + "epoch": 0.19061181434599156, + "grad_norm": 1.3477916717529297, + "learning_rate": 0.0013801739860859188, + "loss": 1.6598, + "step": 1807 + }, + { + "epoch": 0.19071729957805908, + "grad_norm": 1.0035940408706665, + "learning_rate": 0.0013800378194566856, + "loss": 1.6415, + "step": 1808 + }, + { + "epoch": 0.19082278481012657, + "grad_norm": 1.07771897315979, + "learning_rate": 0.001379901582228573, + "loss": 1.6918, + "step": 1809 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.9455654621124268, + "learning_rate": 0.0013797652744168473, + "loss": 1.6219, + "step": 1810 + }, + { + "epoch": 0.1910337552742616, + "grad_norm": 1.02512526512146, + "learning_rate": 0.0013796288960367822, + "loss": 1.6133, + "step": 1811 + }, + { + "epoch": 0.1911392405063291, + "grad_norm": 1.0334981679916382, + "learning_rate": 0.0013794924471036596, + "loss": 1.6514, + "step": 1812 + }, + { + "epoch": 0.19124472573839663, + "grad_norm": 0.8217393159866333, + "learning_rate": 0.0013793559276327695, + "loss": 1.6377, + "step": 1813 + }, + { + "epoch": 0.19135021097046415, + "grad_norm": 1.04657781124115, + "learning_rate": 0.0013792193376394094, + "loss": 1.6092, + "step": 1814 + }, + { + "epoch": 0.19145569620253164, + "grad_norm": 0.8921926617622375, + "learning_rate": 0.001379082677138885, + "loss": 1.6395, + "step": 1815 + }, + { + "epoch": 0.19156118143459916, + "grad_norm": 0.7972114086151123, + "learning_rate": 0.0013789459461465096, + "loss": 1.6509, + "step": 1816 + }, + { + "epoch": 0.19166666666666668, + "grad_norm": 0.7831979393959045, + "learning_rate": 0.001378809144677605, + "loss": 1.6196, + "step": 1817 + }, + { + "epoch": 0.19177215189873417, + "grad_norm": 0.9814146757125854, + "learning_rate": 0.0013786722727474998, + "loss": 1.634, + "step": 1818 + }, + { + "epoch": 0.1918776371308017, + "grad_norm": 0.8089895844459534, + "learning_rate": 0.0013785353303715317, + "loss": 1.6625, + "step": 1819 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.9443227648735046, + "learning_rate": 0.0013783983175650457, + "loss": 1.7073, + "step": 1820 + }, + { + "epoch": 0.1920886075949367, + "grad_norm": 0.7873952388763428, + "learning_rate": 0.001378261234343395, + "loss": 1.6448, + "step": 1821 + }, + { + "epoch": 0.19219409282700423, + "grad_norm": 0.7975823283195496, + "learning_rate": 0.0013781240807219399, + "loss": 1.6704, + "step": 1822 + }, + { + "epoch": 0.19229957805907172, + "grad_norm": 0.8072745203971863, + "learning_rate": 0.0013779868567160495, + "loss": 1.6605, + "step": 1823 + }, + { + "epoch": 0.19240506329113924, + "grad_norm": 1.0357986688613892, + "learning_rate": 0.0013778495623411008, + "loss": 1.6698, + "step": 1824 + }, + { + "epoch": 0.19251054852320676, + "grad_norm": 0.7159184813499451, + "learning_rate": 0.0013777121976124775, + "loss": 1.6984, + "step": 1825 + }, + { + "epoch": 0.19261603375527425, + "grad_norm": 0.8203062415122986, + "learning_rate": 0.0013775747625455724, + "loss": 1.6352, + "step": 1826 + }, + { + "epoch": 0.19272151898734177, + "grad_norm": 0.7948002219200134, + "learning_rate": 0.0013774372571557856, + "loss": 1.6397, + "step": 1827 + }, + { + "epoch": 0.1928270042194093, + "grad_norm": 0.8762108087539673, + "learning_rate": 0.0013772996814585261, + "loss": 1.5995, + "step": 1828 + }, + { + "epoch": 0.19293248945147679, + "grad_norm": 0.8641853928565979, + "learning_rate": 0.0013771620354692087, + "loss": 1.6297, + "step": 1829 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.7375999093055725, + "learning_rate": 0.0013770243192032581, + "loss": 1.6195, + "step": 1830 + }, + { + "epoch": 0.19314345991561183, + "grad_norm": 0.8763672113418579, + "learning_rate": 0.0013768865326761058, + "loss": 1.6233, + "step": 1831 + }, + { + "epoch": 0.19324894514767932, + "grad_norm": 0.6979380249977112, + "learning_rate": 0.0013767486759031918, + "loss": 1.6438, + "step": 1832 + }, + { + "epoch": 0.19335443037974684, + "grad_norm": 0.735023558139801, + "learning_rate": 0.0013766107488999632, + "loss": 1.6787, + "step": 1833 + }, + { + "epoch": 0.19345991561181436, + "grad_norm": 0.7252920269966125, + "learning_rate": 0.0013764727516818757, + "loss": 1.6746, + "step": 1834 + }, + { + "epoch": 0.19356540084388185, + "grad_norm": 0.7217807173728943, + "learning_rate": 0.0013763346842643927, + "loss": 1.6398, + "step": 1835 + }, + { + "epoch": 0.19367088607594937, + "grad_norm": 1.0534640550613403, + "learning_rate": 0.0013761965466629847, + "loss": 1.671, + "step": 1836 + }, + { + "epoch": 0.19377637130801686, + "grad_norm": 0.8358858823776245, + "learning_rate": 0.0013760583388931315, + "loss": 1.6378, + "step": 1837 + }, + { + "epoch": 0.19388185654008439, + "grad_norm": 0.7671049237251282, + "learning_rate": 0.0013759200609703196, + "loss": 1.6374, + "step": 1838 + }, + { + "epoch": 0.1939873417721519, + "grad_norm": 0.9932117462158203, + "learning_rate": 0.0013757817129100437, + "loss": 1.6431, + "step": 1839 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.674439549446106, + "learning_rate": 0.0013756432947278064, + "loss": 1.6385, + "step": 1840 + }, + { + "epoch": 0.19419831223628692, + "grad_norm": 0.7792503833770752, + "learning_rate": 0.0013755048064391182, + "loss": 1.6755, + "step": 1841 + }, + { + "epoch": 0.19430379746835444, + "grad_norm": 0.9476420283317566, + "learning_rate": 0.0013753662480594973, + "loss": 1.6475, + "step": 1842 + }, + { + "epoch": 0.19440928270042193, + "grad_norm": 0.7954926490783691, + "learning_rate": 0.0013752276196044699, + "loss": 1.6195, + "step": 1843 + }, + { + "epoch": 0.19451476793248945, + "grad_norm": 0.6906411051750183, + "learning_rate": 0.0013750889210895705, + "loss": 1.6784, + "step": 1844 + }, + { + "epoch": 0.19462025316455697, + "grad_norm": 0.7969041466712952, + "learning_rate": 0.0013749501525303401, + "loss": 1.6412, + "step": 1845 + }, + { + "epoch": 0.19472573839662446, + "grad_norm": 0.6904591917991638, + "learning_rate": 0.0013748113139423288, + "loss": 1.6758, + "step": 1846 + }, + { + "epoch": 0.19483122362869199, + "grad_norm": 0.7175238728523254, + "learning_rate": 0.0013746724053410944, + "loss": 1.6709, + "step": 1847 + }, + { + "epoch": 0.1949367088607595, + "grad_norm": 0.8752435445785522, + "learning_rate": 0.001374533426742202, + "loss": 1.6198, + "step": 1848 + }, + { + "epoch": 0.195042194092827, + "grad_norm": 0.7749301195144653, + "learning_rate": 0.0013743943781612251, + "loss": 1.5963, + "step": 1849 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.6507778167724609, + "learning_rate": 0.0013742552596137444, + "loss": 1.6701, + "step": 1850 + }, + { + "epoch": 0.19525316455696204, + "grad_norm": 0.8483625650405884, + "learning_rate": 0.0013741160711153492, + "loss": 1.6255, + "step": 1851 + }, + { + "epoch": 0.19535864978902953, + "grad_norm": 0.9016757607460022, + "learning_rate": 0.0013739768126816358, + "loss": 1.6645, + "step": 1852 + }, + { + "epoch": 0.19546413502109705, + "grad_norm": 0.6863948702812195, + "learning_rate": 0.0013738374843282094, + "loss": 1.6512, + "step": 1853 + }, + { + "epoch": 0.19556962025316454, + "grad_norm": 0.7054228782653809, + "learning_rate": 0.0013736980860706819, + "loss": 1.6699, + "step": 1854 + }, + { + "epoch": 0.19567510548523206, + "grad_norm": 0.733914315700531, + "learning_rate": 0.001373558617924674, + "loss": 1.6568, + "step": 1855 + }, + { + "epoch": 0.19578059071729959, + "grad_norm": 0.9529192447662354, + "learning_rate": 0.0013734190799058136, + "loss": 1.6691, + "step": 1856 + }, + { + "epoch": 0.19588607594936708, + "grad_norm": 0.6501055359840393, + "learning_rate": 0.0013732794720297367, + "loss": 1.6474, + "step": 1857 + }, + { + "epoch": 0.1959915611814346, + "grad_norm": 0.9115878939628601, + "learning_rate": 0.0013731397943120868, + "loss": 1.6627, + "step": 1858 + }, + { + "epoch": 0.19609704641350212, + "grad_norm": 0.8803216218948364, + "learning_rate": 0.001373000046768516, + "loss": 1.6493, + "step": 1859 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.6943856477737427, + "learning_rate": 0.0013728602294146833, + "loss": 1.6821, + "step": 1860 + }, + { + "epoch": 0.19630801687763713, + "grad_norm": 0.9105629920959473, + "learning_rate": 0.001372720342266256, + "loss": 1.6635, + "step": 1861 + }, + { + "epoch": 0.19641350210970465, + "grad_norm": 0.7556730508804321, + "learning_rate": 0.001372580385338909, + "loss": 1.657, + "step": 1862 + }, + { + "epoch": 0.19651898734177214, + "grad_norm": 0.7750322222709656, + "learning_rate": 0.0013724403586483254, + "loss": 1.7093, + "step": 1863 + }, + { + "epoch": 0.19662447257383966, + "grad_norm": 1.0521831512451172, + "learning_rate": 0.001372300262210196, + "loss": 1.634, + "step": 1864 + }, + { + "epoch": 0.19672995780590719, + "grad_norm": 0.7446916699409485, + "learning_rate": 0.001372160096040219, + "loss": 1.6444, + "step": 1865 + }, + { + "epoch": 0.19683544303797468, + "grad_norm": 0.7070254683494568, + "learning_rate": 0.001372019860154101, + "loss": 1.62, + "step": 1866 + }, + { + "epoch": 0.1969409282700422, + "grad_norm": 0.727117121219635, + "learning_rate": 0.001371879554567556, + "loss": 1.6607, + "step": 1867 + }, + { + "epoch": 0.19704641350210972, + "grad_norm": 0.69407719373703, + "learning_rate": 0.0013717391792963062, + "loss": 1.6205, + "step": 1868 + }, + { + "epoch": 0.1971518987341772, + "grad_norm": 0.6807323694229126, + "learning_rate": 0.0013715987343560804, + "loss": 1.6702, + "step": 1869 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.7018837332725525, + "learning_rate": 0.0013714582197626175, + "loss": 1.6815, + "step": 1870 + }, + { + "epoch": 0.19736286919831222, + "grad_norm": 0.6804510354995728, + "learning_rate": 0.001371317635531662, + "loss": 1.6189, + "step": 1871 + }, + { + "epoch": 0.19746835443037974, + "grad_norm": 0.6804285645484924, + "learning_rate": 0.001371176981678967, + "loss": 1.64, + "step": 1872 + }, + { + "epoch": 0.19757383966244726, + "grad_norm": 0.6290163993835449, + "learning_rate": 0.001371036258220294, + "loss": 1.616, + "step": 1873 + }, + { + "epoch": 0.19767932489451476, + "grad_norm": 0.7397742867469788, + "learning_rate": 0.0013708954651714116, + "loss": 1.6389, + "step": 1874 + }, + { + "epoch": 0.19778481012658228, + "grad_norm": 0.7774626612663269, + "learning_rate": 0.0013707546025480961, + "loss": 1.6522, + "step": 1875 + }, + { + "epoch": 0.1978902953586498, + "grad_norm": 0.810099720954895, + "learning_rate": 0.001370613670366132, + "loss": 1.6771, + "step": 1876 + }, + { + "epoch": 0.1979957805907173, + "grad_norm": 0.7019131779670715, + "learning_rate": 0.0013704726686413116, + "loss": 1.7067, + "step": 1877 + }, + { + "epoch": 0.1981012658227848, + "grad_norm": 0.9990528225898743, + "learning_rate": 0.0013703315973894346, + "loss": 1.6246, + "step": 1878 + }, + { + "epoch": 0.19820675105485233, + "grad_norm": 0.7908687591552734, + "learning_rate": 0.001370190456626309, + "loss": 1.6072, + "step": 1879 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.8054495453834534, + "learning_rate": 0.0013700492463677501, + "loss": 1.6654, + "step": 1880 + }, + { + "epoch": 0.19841772151898734, + "grad_norm": 1.0821541547775269, + "learning_rate": 0.0013699079666295811, + "loss": 1.6587, + "step": 1881 + }, + { + "epoch": 0.19852320675105486, + "grad_norm": 0.6387237310409546, + "learning_rate": 0.0013697666174276337, + "loss": 1.6375, + "step": 1882 + }, + { + "epoch": 0.19862869198312236, + "grad_norm": 0.8673486113548279, + "learning_rate": 0.001369625198777746, + "loss": 1.6524, + "step": 1883 + }, + { + "epoch": 0.19873417721518988, + "grad_norm": 0.6326647400856018, + "learning_rate": 0.0013694837106957654, + "loss": 1.6507, + "step": 1884 + }, + { + "epoch": 0.19883966244725737, + "grad_norm": 1.5266985893249512, + "learning_rate": 0.0013693421531975455, + "loss": 1.652, + "step": 1885 + }, + { + "epoch": 0.1989451476793249, + "grad_norm": 0.7220916748046875, + "learning_rate": 0.0013692005262989496, + "loss": 1.6545, + "step": 1886 + }, + { + "epoch": 0.1990506329113924, + "grad_norm": 1.6320558786392212, + "learning_rate": 0.0013690588300158467, + "loss": 1.6637, + "step": 1887 + }, + { + "epoch": 0.1991561181434599, + "grad_norm": 0.7568427324295044, + "learning_rate": 0.001368917064364115, + "loss": 1.6897, + "step": 1888 + }, + { + "epoch": 0.19926160337552742, + "grad_norm": 1.4813687801361084, + "learning_rate": 0.0013687752293596402, + "loss": 1.666, + "step": 1889 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.9474484324455261, + "learning_rate": 0.0013686333250183154, + "loss": 1.6507, + "step": 1890 + }, + { + "epoch": 0.19947257383966244, + "grad_norm": 1.977271556854248, + "learning_rate": 0.0013684913513560418, + "loss": 1.682, + "step": 1891 + }, + { + "epoch": 0.19957805907172996, + "grad_norm": 1.7193926572799683, + "learning_rate": 0.0013683493083887282, + "loss": 1.6445, + "step": 1892 + }, + { + "epoch": 0.19968354430379748, + "grad_norm": 1.168325424194336, + "learning_rate": 0.0013682071961322914, + "loss": 1.6039, + "step": 1893 + }, + { + "epoch": 0.19978902953586497, + "grad_norm": 1.2239187955856323, + "learning_rate": 0.0013680650146026554, + "loss": 1.6521, + "step": 1894 + }, + { + "epoch": 0.1998945147679325, + "grad_norm": 0.904570996761322, + "learning_rate": 0.0013679227638157523, + "loss": 1.6789, + "step": 1895 + }, + { + "epoch": 0.2, + "grad_norm": 1.119280219078064, + "learning_rate": 0.0013677804437875227, + "loss": 1.6459, + "step": 1896 + }, + { + "epoch": 0.2001054852320675, + "grad_norm": 1.0938661098480225, + "learning_rate": 0.0013676380545339136, + "loss": 1.6586, + "step": 1897 + }, + { + "epoch": 0.20021097046413502, + "grad_norm": 0.8282173871994019, + "learning_rate": 0.0013674955960708808, + "loss": 1.6607, + "step": 1898 + }, + { + "epoch": 0.20031645569620254, + "grad_norm": 0.979809582233429, + "learning_rate": 0.0013673530684143874, + "loss": 1.6345, + "step": 1899 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 1.0228255987167358, + "learning_rate": 0.001367210471580404, + "loss": 1.6617, + "step": 1900 + }, + { + "epoch": 0.20052742616033756, + "grad_norm": 0.9489909410476685, + "learning_rate": 0.0013670678055849098, + "loss": 1.6426, + "step": 1901 + }, + { + "epoch": 0.20063291139240505, + "grad_norm": 1.1966103315353394, + "learning_rate": 0.0013669250704438911, + "loss": 1.6073, + "step": 1902 + }, + { + "epoch": 0.20073839662447257, + "grad_norm": 0.7969965934753418, + "learning_rate": 0.0013667822661733418, + "loss": 1.6555, + "step": 1903 + }, + { + "epoch": 0.2008438818565401, + "grad_norm": 1.0098611116409302, + "learning_rate": 0.0013666393927892642, + "loss": 1.6292, + "step": 1904 + }, + { + "epoch": 0.20094936708860758, + "grad_norm": 0.8297820091247559, + "learning_rate": 0.0013664964503076677, + "loss": 1.5862, + "step": 1905 + }, + { + "epoch": 0.2010548523206751, + "grad_norm": 0.7722975015640259, + "learning_rate": 0.0013663534387445696, + "loss": 1.6702, + "step": 1906 + }, + { + "epoch": 0.20116033755274262, + "grad_norm": 0.8964451551437378, + "learning_rate": 0.0013662103581159955, + "loss": 1.6399, + "step": 1907 + }, + { + "epoch": 0.20126582278481012, + "grad_norm": 0.6725467443466187, + "learning_rate": 0.0013660672084379781, + "loss": 1.6444, + "step": 1908 + }, + { + "epoch": 0.20137130801687764, + "grad_norm": 0.9129676818847656, + "learning_rate": 0.001365923989726558, + "loss": 1.6879, + "step": 1909 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.9149845838546753, + "learning_rate": 0.0013657807019977835, + "loss": 1.6526, + "step": 1910 + }, + { + "epoch": 0.20158227848101265, + "grad_norm": 0.6900898218154907, + "learning_rate": 0.0013656373452677107, + "loss": 1.6559, + "step": 1911 + }, + { + "epoch": 0.20168776371308017, + "grad_norm": 0.8219763040542603, + "learning_rate": 0.0013654939195524038, + "loss": 1.6294, + "step": 1912 + }, + { + "epoch": 0.2017932489451477, + "grad_norm": 0.7364716529846191, + "learning_rate": 0.0013653504248679338, + "loss": 1.6281, + "step": 1913 + }, + { + "epoch": 0.20189873417721518, + "grad_norm": 0.7317228317260742, + "learning_rate": 0.0013652068612303803, + "loss": 1.6505, + "step": 1914 + }, + { + "epoch": 0.2020042194092827, + "grad_norm": 0.703464925289154, + "learning_rate": 0.0013650632286558305, + "loss": 1.6174, + "step": 1915 + }, + { + "epoch": 0.20210970464135022, + "grad_norm": 0.7276213765144348, + "learning_rate": 0.001364919527160379, + "loss": 1.6414, + "step": 1916 + }, + { + "epoch": 0.20221518987341772, + "grad_norm": 0.7494603991508484, + "learning_rate": 0.001364775756760128, + "loss": 1.6263, + "step": 1917 + }, + { + "epoch": 0.20232067510548524, + "grad_norm": 0.743166446685791, + "learning_rate": 0.0013646319174711878, + "loss": 1.6467, + "step": 1918 + }, + { + "epoch": 0.20242616033755273, + "grad_norm": 0.884726345539093, + "learning_rate": 0.0013644880093096766, + "loss": 1.6932, + "step": 1919 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.9483200311660767, + "learning_rate": 0.0013643440322917198, + "loss": 1.6277, + "step": 1920 + }, + { + "epoch": 0.20263713080168777, + "grad_norm": 0.7053067088127136, + "learning_rate": 0.0013641999864334507, + "loss": 1.6574, + "step": 1921 + }, + { + "epoch": 0.20274261603375526, + "grad_norm": 1.0343208312988281, + "learning_rate": 0.0013640558717510107, + "loss": 1.6159, + "step": 1922 + }, + { + "epoch": 0.20284810126582278, + "grad_norm": 0.6440756320953369, + "learning_rate": 0.0013639116882605481, + "loss": 1.6545, + "step": 1923 + }, + { + "epoch": 0.2029535864978903, + "grad_norm": 1.0636368989944458, + "learning_rate": 0.0013637674359782196, + "loss": 1.6255, + "step": 1924 + }, + { + "epoch": 0.2030590717299578, + "grad_norm": 0.9693118333816528, + "learning_rate": 0.0013636231149201895, + "loss": 1.6762, + "step": 1925 + }, + { + "epoch": 0.20316455696202532, + "grad_norm": 0.718925952911377, + "learning_rate": 0.0013634787251026296, + "loss": 1.6232, + "step": 1926 + }, + { + "epoch": 0.20327004219409284, + "grad_norm": 1.0835603475570679, + "learning_rate": 0.0013633342665417192, + "loss": 1.6329, + "step": 1927 + }, + { + "epoch": 0.20337552742616033, + "grad_norm": 0.6623822450637817, + "learning_rate": 0.0013631897392536463, + "loss": 1.6706, + "step": 1928 + }, + { + "epoch": 0.20348101265822785, + "grad_norm": 1.3452048301696777, + "learning_rate": 0.001363045143254605, + "loss": 1.6207, + "step": 1929 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.6742807626724243, + "learning_rate": 0.0013629004785607989, + "loss": 1.6676, + "step": 1930 + }, + { + "epoch": 0.20369198312236286, + "grad_norm": 1.0580029487609863, + "learning_rate": 0.0013627557451884374, + "loss": 1.5871, + "step": 1931 + }, + { + "epoch": 0.20379746835443038, + "grad_norm": 0.8387618064880371, + "learning_rate": 0.0013626109431537398, + "loss": 1.6524, + "step": 1932 + }, + { + "epoch": 0.2039029535864979, + "grad_norm": 0.9362020492553711, + "learning_rate": 0.001362466072472931, + "loss": 1.6397, + "step": 1933 + }, + { + "epoch": 0.2040084388185654, + "grad_norm": 1.164847731590271, + "learning_rate": 0.0013623211331622448, + "loss": 1.6529, + "step": 1934 + }, + { + "epoch": 0.20411392405063292, + "grad_norm": 0.8348828554153442, + "learning_rate": 0.0013621761252379221, + "loss": 1.6147, + "step": 1935 + }, + { + "epoch": 0.2042194092827004, + "grad_norm": 1.1712889671325684, + "learning_rate": 0.0013620310487162124, + "loss": 1.6421, + "step": 1936 + }, + { + "epoch": 0.20432489451476793, + "grad_norm": 0.7402987480163574, + "learning_rate": 0.0013618859036133714, + "loss": 1.6567, + "step": 1937 + }, + { + "epoch": 0.20443037974683545, + "grad_norm": 1.1078624725341797, + "learning_rate": 0.001361740689945664, + "loss": 1.6557, + "step": 1938 + }, + { + "epoch": 0.20453586497890294, + "grad_norm": 0.7911491394042969, + "learning_rate": 0.001361595407729362, + "loss": 1.6478, + "step": 1939 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.984368622303009, + "learning_rate": 0.0013614500569807445, + "loss": 1.6018, + "step": 1940 + }, + { + "epoch": 0.20474683544303798, + "grad_norm": 1.0343034267425537, + "learning_rate": 0.0013613046377160996, + "loss": 1.6409, + "step": 1941 + }, + { + "epoch": 0.20485232067510548, + "grad_norm": 0.790642261505127, + "learning_rate": 0.0013611591499517212, + "loss": 1.6544, + "step": 1942 + }, + { + "epoch": 0.204957805907173, + "grad_norm": 0.8586578965187073, + "learning_rate": 0.001361013593703913, + "loss": 1.626, + "step": 1943 + }, + { + "epoch": 0.20506329113924052, + "grad_norm": 0.8068537712097168, + "learning_rate": 0.0013608679689889847, + "loss": 1.6392, + "step": 1944 + }, + { + "epoch": 0.205168776371308, + "grad_norm": 0.9889823794364929, + "learning_rate": 0.0013607222758232546, + "loss": 1.673, + "step": 1945 + }, + { + "epoch": 0.20527426160337553, + "grad_norm": 0.8979260325431824, + "learning_rate": 0.0013605765142230479, + "loss": 1.5909, + "step": 1946 + }, + { + "epoch": 0.20537974683544305, + "grad_norm": 0.6977877616882324, + "learning_rate": 0.0013604306842046983, + "loss": 1.6625, + "step": 1947 + }, + { + "epoch": 0.20548523206751054, + "grad_norm": 0.7933268547058105, + "learning_rate": 0.0013602847857845466, + "loss": 1.6174, + "step": 1948 + }, + { + "epoch": 0.20559071729957806, + "grad_norm": 0.7726649045944214, + "learning_rate": 0.0013601388189789414, + "loss": 1.6157, + "step": 1949 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.6927939653396606, + "learning_rate": 0.0013599927838042394, + "loss": 1.6178, + "step": 1950 + }, + { + "epoch": 0.20580168776371308, + "grad_norm": 0.7227939367294312, + "learning_rate": 0.0013598466802768041, + "loss": 1.6731, + "step": 1951 + }, + { + "epoch": 0.2059071729957806, + "grad_norm": 0.7933559417724609, + "learning_rate": 0.0013597005084130072, + "loss": 1.6447, + "step": 1952 + }, + { + "epoch": 0.2060126582278481, + "grad_norm": 0.7392335534095764, + "learning_rate": 0.0013595542682292281, + "loss": 1.6749, + "step": 1953 + }, + { + "epoch": 0.2061181434599156, + "grad_norm": 0.7522357702255249, + "learning_rate": 0.0013594079597418541, + "loss": 1.591, + "step": 1954 + }, + { + "epoch": 0.20622362869198313, + "grad_norm": 0.9170671105384827, + "learning_rate": 0.0013592615829672791, + "loss": 1.6374, + "step": 1955 + }, + { + "epoch": 0.20632911392405062, + "grad_norm": 0.8854087591171265, + "learning_rate": 0.0013591151379219058, + "loss": 1.6036, + "step": 1956 + }, + { + "epoch": 0.20643459915611814, + "grad_norm": 0.6818795800209045, + "learning_rate": 0.0013589686246221438, + "loss": 1.6214, + "step": 1957 + }, + { + "epoch": 0.20654008438818566, + "grad_norm": 0.7197780013084412, + "learning_rate": 0.001358822043084411, + "loss": 1.6443, + "step": 1958 + }, + { + "epoch": 0.20664556962025316, + "grad_norm": 0.675777792930603, + "learning_rate": 0.0013586753933251322, + "loss": 1.6545, + "step": 1959 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.7160118222236633, + "learning_rate": 0.0013585286753607408, + "loss": 1.6093, + "step": 1960 + }, + { + "epoch": 0.2068565400843882, + "grad_norm": 0.6931870579719543, + "learning_rate": 0.0013583818892076765, + "loss": 1.6671, + "step": 1961 + }, + { + "epoch": 0.2069620253164557, + "grad_norm": 0.8130098581314087, + "learning_rate": 0.0013582350348823882, + "loss": 1.6795, + "step": 1962 + }, + { + "epoch": 0.2070675105485232, + "grad_norm": 0.7067359685897827, + "learning_rate": 0.0013580881124013312, + "loss": 1.6651, + "step": 1963 + }, + { + "epoch": 0.20717299578059073, + "grad_norm": 0.749526858329773, + "learning_rate": 0.001357941121780969, + "loss": 1.6101, + "step": 1964 + }, + { + "epoch": 0.20727848101265822, + "grad_norm": 0.7881572842597961, + "learning_rate": 0.0013577940630377725, + "loss": 1.6467, + "step": 1965 + }, + { + "epoch": 0.20738396624472574, + "grad_norm": 0.7061471939086914, + "learning_rate": 0.0013576469361882208, + "loss": 1.6354, + "step": 1966 + }, + { + "epoch": 0.20748945147679324, + "grad_norm": 0.9445706605911255, + "learning_rate": 0.0013574997412487996, + "loss": 1.6675, + "step": 1967 + }, + { + "epoch": 0.20759493670886076, + "grad_norm": 0.8979973196983337, + "learning_rate": 0.0013573524782360034, + "loss": 1.6479, + "step": 1968 + }, + { + "epoch": 0.20770042194092828, + "grad_norm": 0.654565691947937, + "learning_rate": 0.0013572051471663332, + "loss": 1.6421, + "step": 1969 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.8387991786003113, + "learning_rate": 0.0013570577480562986, + "loss": 1.6383, + "step": 1970 + }, + { + "epoch": 0.2079113924050633, + "grad_norm": 0.9652854800224304, + "learning_rate": 0.0013569102809224162, + "loss": 1.6447, + "step": 1971 + }, + { + "epoch": 0.2080168776371308, + "grad_norm": 0.7299418449401855, + "learning_rate": 0.0013567627457812105, + "loss": 1.593, + "step": 1972 + }, + { + "epoch": 0.2081223628691983, + "grad_norm": 0.8111629486083984, + "learning_rate": 0.0013566151426492137, + "loss": 1.6382, + "step": 1973 + }, + { + "epoch": 0.20822784810126582, + "grad_norm": 1.2374345064163208, + "learning_rate": 0.0013564674715429651, + "loss": 1.6375, + "step": 1974 + }, + { + "epoch": 0.20833333333333334, + "grad_norm": 0.8506698608398438, + "learning_rate": 0.0013563197324790123, + "loss": 1.6438, + "step": 1975 + }, + { + "epoch": 0.20843881856540084, + "grad_norm": 0.6152286529541016, + "learning_rate": 0.0013561719254739104, + "loss": 1.6203, + "step": 1976 + }, + { + "epoch": 0.20854430379746836, + "grad_norm": 0.7296903133392334, + "learning_rate": 0.001356024050544221, + "loss": 1.6425, + "step": 1977 + }, + { + "epoch": 0.20864978902953588, + "grad_norm": 0.7153924107551575, + "learning_rate": 0.0013558761077065154, + "loss": 1.6387, + "step": 1978 + }, + { + "epoch": 0.20875527426160337, + "grad_norm": 0.7524582147598267, + "learning_rate": 0.0013557280969773704, + "loss": 1.6012, + "step": 1979 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.7545072436332703, + "learning_rate": 0.0013555800183733717, + "loss": 1.6426, + "step": 1980 + }, + { + "epoch": 0.2089662447257384, + "grad_norm": 0.7109759449958801, + "learning_rate": 0.0013554318719111124, + "loss": 1.6205, + "step": 1981 + }, + { + "epoch": 0.2090717299578059, + "grad_norm": 0.6268584728240967, + "learning_rate": 0.0013552836576071925, + "loss": 1.5805, + "step": 1982 + }, + { + "epoch": 0.20917721518987342, + "grad_norm": 0.7290811538696289, + "learning_rate": 0.0013551353754782211, + "loss": 1.6319, + "step": 1983 + }, + { + "epoch": 0.20928270042194091, + "grad_norm": 0.6491149663925171, + "learning_rate": 0.0013549870255408132, + "loss": 1.6738, + "step": 1984 + }, + { + "epoch": 0.20938818565400844, + "grad_norm": 0.6917279958724976, + "learning_rate": 0.0013548386078115924, + "loss": 1.6211, + "step": 1985 + }, + { + "epoch": 0.20949367088607596, + "grad_norm": 0.6577163338661194, + "learning_rate": 0.0013546901223071893, + "loss": 1.6033, + "step": 1986 + }, + { + "epoch": 0.20959915611814345, + "grad_norm": 0.7218565940856934, + "learning_rate": 0.001354541569044243, + "loss": 1.5992, + "step": 1987 + }, + { + "epoch": 0.20970464135021097, + "grad_norm": 0.8090323209762573, + "learning_rate": 0.0013543929480393994, + "loss": 1.645, + "step": 1988 + }, + { + "epoch": 0.2098101265822785, + "grad_norm": 0.7403135895729065, + "learning_rate": 0.0013542442593093122, + "loss": 1.6583, + "step": 1989 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.6693066358566284, + "learning_rate": 0.0013540955028706425, + "loss": 1.6419, + "step": 1990 + }, + { + "epoch": 0.2100210970464135, + "grad_norm": 0.6717994809150696, + "learning_rate": 0.0013539466787400598, + "loss": 1.6417, + "step": 1991 + }, + { + "epoch": 0.21012658227848102, + "grad_norm": 0.714327871799469, + "learning_rate": 0.00135379778693424, + "loss": 1.618, + "step": 1992 + }, + { + "epoch": 0.21023206751054851, + "grad_norm": 0.7343468070030212, + "learning_rate": 0.0013536488274698672, + "loss": 1.6561, + "step": 1993 + }, + { + "epoch": 0.21033755274261604, + "grad_norm": 0.8573880791664124, + "learning_rate": 0.0013534998003636332, + "loss": 1.6009, + "step": 1994 + }, + { + "epoch": 0.21044303797468356, + "grad_norm": 0.6787741184234619, + "learning_rate": 0.0013533507056322374, + "loss": 1.6616, + "step": 1995 + }, + { + "epoch": 0.21054852320675105, + "grad_norm": 0.9611364006996155, + "learning_rate": 0.0013532015432923864, + "loss": 1.6146, + "step": 1996 + }, + { + "epoch": 0.21065400843881857, + "grad_norm": 1.0249038934707642, + "learning_rate": 0.0013530523133607948, + "loss": 1.6174, + "step": 1997 + }, + { + "epoch": 0.2107594936708861, + "grad_norm": 0.7961052060127258, + "learning_rate": 0.0013529030158541842, + "loss": 1.6159, + "step": 1998 + }, + { + "epoch": 0.21086497890295358, + "grad_norm": 0.9665228724479675, + "learning_rate": 0.0013527536507892844, + "loss": 1.6118, + "step": 1999 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 1.2123796939849854, + "learning_rate": 0.0013526042181828324, + "loss": 1.6431, + "step": 2000 + }, + { + "epoch": 0.2110759493670886, + "grad_norm": 0.8140988945960999, + "learning_rate": 0.001352454718051573, + "loss": 1.6816, + "step": 2001 + }, + { + "epoch": 0.21118143459915611, + "grad_norm": 1.3032352924346924, + "learning_rate": 0.0013523051504122584, + "loss": 1.6375, + "step": 2002 + }, + { + "epoch": 0.21128691983122364, + "grad_norm": 0.7830499410629272, + "learning_rate": 0.0013521555152816481, + "loss": 1.5904, + "step": 2003 + }, + { + "epoch": 0.21139240506329113, + "grad_norm": 0.8186193108558655, + "learning_rate": 0.0013520058126765097, + "loss": 1.6353, + "step": 2004 + }, + { + "epoch": 0.21149789029535865, + "grad_norm": 0.8076159358024597, + "learning_rate": 0.0013518560426136182, + "loss": 1.6655, + "step": 2005 + }, + { + "epoch": 0.21160337552742617, + "grad_norm": 0.8325140476226807, + "learning_rate": 0.001351706205109756, + "loss": 1.6513, + "step": 2006 + }, + { + "epoch": 0.21170886075949366, + "grad_norm": 0.9750213027000427, + "learning_rate": 0.001351556300181713, + "loss": 1.622, + "step": 2007 + }, + { + "epoch": 0.21181434599156118, + "grad_norm": 0.822094202041626, + "learning_rate": 0.001351406327846287, + "loss": 1.6712, + "step": 2008 + }, + { + "epoch": 0.2119198312236287, + "grad_norm": 0.6873660683631897, + "learning_rate": 0.0013512562881202832, + "loss": 1.6084, + "step": 2009 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.7187005877494812, + "learning_rate": 0.0013511061810205143, + "loss": 1.6136, + "step": 2010 + }, + { + "epoch": 0.21213080168776371, + "grad_norm": 0.6484179496765137, + "learning_rate": 0.0013509560065638002, + "loss": 1.6404, + "step": 2011 + }, + { + "epoch": 0.21223628691983124, + "grad_norm": 0.7004154920578003, + "learning_rate": 0.001350805764766969, + "loss": 1.6456, + "step": 2012 + }, + { + "epoch": 0.21234177215189873, + "grad_norm": 0.6836040616035461, + "learning_rate": 0.0013506554556468558, + "loss": 1.6463, + "step": 2013 + }, + { + "epoch": 0.21244725738396625, + "grad_norm": 0.6831625699996948, + "learning_rate": 0.001350505079220304, + "loss": 1.6389, + "step": 2014 + }, + { + "epoch": 0.21255274261603377, + "grad_norm": 0.7480412721633911, + "learning_rate": 0.0013503546355041636, + "loss": 1.6366, + "step": 2015 + }, + { + "epoch": 0.21265822784810126, + "grad_norm": 0.7548952102661133, + "learning_rate": 0.0013502041245152924, + "loss": 1.6755, + "step": 2016 + }, + { + "epoch": 0.21276371308016878, + "grad_norm": 0.8741075992584229, + "learning_rate": 0.0013500535462705565, + "loss": 1.6427, + "step": 2017 + }, + { + "epoch": 0.21286919831223627, + "grad_norm": 1.3173564672470093, + "learning_rate": 0.0013499029007868284, + "loss": 1.646, + "step": 2018 + }, + { + "epoch": 0.2129746835443038, + "grad_norm": 0.778488039970398, + "learning_rate": 0.0013497521880809888, + "loss": 1.6648, + "step": 2019 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 1.802783489227295, + "learning_rate": 0.001349601408169926, + "loss": 1.641, + "step": 2020 + }, + { + "epoch": 0.2131856540084388, + "grad_norm": 1.1860119104385376, + "learning_rate": 0.0013494505610705356, + "loss": 1.6345, + "step": 2021 + }, + { + "epoch": 0.21329113924050633, + "grad_norm": 1.7711691856384277, + "learning_rate": 0.0013492996467997205, + "loss": 1.6219, + "step": 2022 + }, + { + "epoch": 0.21339662447257385, + "grad_norm": 1.824223279953003, + "learning_rate": 0.0013491486653743918, + "loss": 1.7055, + "step": 2023 + }, + { + "epoch": 0.21350210970464134, + "grad_norm": 0.9102794528007507, + "learning_rate": 0.0013489976168114676, + "loss": 1.661, + "step": 2024 + }, + { + "epoch": 0.21360759493670886, + "grad_norm": 1.3216469287872314, + "learning_rate": 0.0013488465011278733, + "loss": 1.6057, + "step": 2025 + }, + { + "epoch": 0.21371308016877638, + "grad_norm": 0.9136369824409485, + "learning_rate": 0.0013486953183405425, + "loss": 1.62, + "step": 2026 + }, + { + "epoch": 0.21381856540084387, + "grad_norm": 1.0990597009658813, + "learning_rate": 0.001348544068466416, + "loss": 1.6134, + "step": 2027 + }, + { + "epoch": 0.2139240506329114, + "grad_norm": 1.169548749923706, + "learning_rate": 0.0013483927515224418, + "loss": 1.6561, + "step": 2028 + }, + { + "epoch": 0.21402953586497891, + "grad_norm": 0.9593425989151001, + "learning_rate": 0.0013482413675255762, + "loss": 1.6462, + "step": 2029 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 1.4086284637451172, + "learning_rate": 0.0013480899164927823, + "loss": 1.6202, + "step": 2030 + }, + { + "epoch": 0.21424050632911393, + "grad_norm": 1.028988003730774, + "learning_rate": 0.0013479383984410305, + "loss": 1.6501, + "step": 2031 + }, + { + "epoch": 0.21434599156118145, + "grad_norm": 1.439373254776001, + "learning_rate": 0.0013477868133873001, + "loss": 1.6297, + "step": 2032 + }, + { + "epoch": 0.21445147679324894, + "grad_norm": 1.361315131187439, + "learning_rate": 0.0013476351613485762, + "loss": 1.627, + "step": 2033 + }, + { + "epoch": 0.21455696202531646, + "grad_norm": 1.4421658515930176, + "learning_rate": 0.0013474834423418522, + "loss": 1.6267, + "step": 2034 + }, + { + "epoch": 0.21466244725738395, + "grad_norm": 1.2655214071273804, + "learning_rate": 0.0013473316563841296, + "loss": 1.6251, + "step": 2035 + }, + { + "epoch": 0.21476793248945147, + "grad_norm": 1.2102216482162476, + "learning_rate": 0.0013471798034924158, + "loss": 1.6464, + "step": 2036 + }, + { + "epoch": 0.214873417721519, + "grad_norm": 1.152522325515747, + "learning_rate": 0.0013470278836837275, + "loss": 1.6051, + "step": 2037 + }, + { + "epoch": 0.2149789029535865, + "grad_norm": 1.2388025522232056, + "learning_rate": 0.001346875896975088, + "loss": 1.6232, + "step": 2038 + }, + { + "epoch": 0.215084388185654, + "grad_norm": 1.0452063083648682, + "learning_rate": 0.0013467238433835277, + "loss": 1.6296, + "step": 2039 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 1.3130394220352173, + "learning_rate": 0.0013465717229260853, + "loss": 1.6452, + "step": 2040 + }, + { + "epoch": 0.21529535864978902, + "grad_norm": 0.9869100451469421, + "learning_rate": 0.0013464195356198065, + "loss": 1.6121, + "step": 2041 + }, + { + "epoch": 0.21540084388185654, + "grad_norm": 1.2029099464416504, + "learning_rate": 0.0013462672814817445, + "loss": 1.5989, + "step": 2042 + }, + { + "epoch": 0.21550632911392406, + "grad_norm": 0.906936526298523, + "learning_rate": 0.0013461149605289607, + "loss": 1.6303, + "step": 2043 + }, + { + "epoch": 0.21561181434599155, + "grad_norm": 1.1705553531646729, + "learning_rate": 0.001345962572778523, + "loss": 1.6628, + "step": 2044 + }, + { + "epoch": 0.21571729957805907, + "grad_norm": 0.8323335647583008, + "learning_rate": 0.0013458101182475073, + "loss": 1.5949, + "step": 2045 + }, + { + "epoch": 0.2158227848101266, + "grad_norm": 1.2315517663955688, + "learning_rate": 0.0013456575969529967, + "loss": 1.6398, + "step": 2046 + }, + { + "epoch": 0.2159282700421941, + "grad_norm": 0.8694968223571777, + "learning_rate": 0.001345505008912082, + "loss": 1.593, + "step": 2047 + }, + { + "epoch": 0.2160337552742616, + "grad_norm": 1.2372688055038452, + "learning_rate": 0.0013453523541418623, + "loss": 1.6283, + "step": 2048 + }, + { + "epoch": 0.21613924050632913, + "grad_norm": 0.9962382912635803, + "learning_rate": 0.001345199632659442, + "loss": 1.6213, + "step": 2049 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 1.3055285215377808, + "learning_rate": 0.001345046844481935, + "loss": 1.6276, + "step": 2050 + }, + { + "epoch": 0.21635021097046414, + "grad_norm": 1.1213769912719727, + "learning_rate": 0.0013448939896264622, + "loss": 1.6413, + "step": 2051 + }, + { + "epoch": 0.21645569620253163, + "grad_norm": 1.2308753728866577, + "learning_rate": 0.001344741068110151, + "loss": 1.6579, + "step": 2052 + }, + { + "epoch": 0.21656118143459915, + "grad_norm": 0.9580768346786499, + "learning_rate": 0.001344588079950138, + "loss": 1.6003, + "step": 2053 + }, + { + "epoch": 0.21666666666666667, + "grad_norm": 1.0704164505004883, + "learning_rate": 0.0013444350251635654, + "loss": 1.6274, + "step": 2054 + }, + { + "epoch": 0.21677215189873417, + "grad_norm": 0.8974556922912598, + "learning_rate": 0.0013442819037675843, + "loss": 1.6178, + "step": 2055 + }, + { + "epoch": 0.2168776371308017, + "grad_norm": 0.9850271344184875, + "learning_rate": 0.0013441287157793522, + "loss": 1.6285, + "step": 2056 + }, + { + "epoch": 0.2169831223628692, + "grad_norm": 0.7565875053405762, + "learning_rate": 0.0013439754612160353, + "loss": 1.6076, + "step": 2057 + }, + { + "epoch": 0.2170886075949367, + "grad_norm": 0.8381140232086182, + "learning_rate": 0.001343822140094806, + "loss": 1.575, + "step": 2058 + }, + { + "epoch": 0.21719409282700422, + "grad_norm": 0.7753020524978638, + "learning_rate": 0.0013436687524328449, + "loss": 1.5892, + "step": 2059 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.7096189856529236, + "learning_rate": 0.0013435152982473396, + "loss": 1.5762, + "step": 2060 + }, + { + "epoch": 0.21740506329113923, + "grad_norm": 0.8011468052864075, + "learning_rate": 0.0013433617775554854, + "loss": 1.6077, + "step": 2061 + }, + { + "epoch": 0.21751054852320675, + "grad_norm": 0.6772448420524597, + "learning_rate": 0.0013432081903744857, + "loss": 1.6365, + "step": 2062 + }, + { + "epoch": 0.21761603375527427, + "grad_norm": 0.824309766292572, + "learning_rate": 0.00134305453672155, + "loss": 1.594, + "step": 2063 + }, + { + "epoch": 0.21772151898734177, + "grad_norm": 0.667262852191925, + "learning_rate": 0.0013429008166138965, + "loss": 1.5924, + "step": 2064 + }, + { + "epoch": 0.2178270042194093, + "grad_norm": 0.7525674104690552, + "learning_rate": 0.0013427470300687498, + "loss": 1.5966, + "step": 2065 + }, + { + "epoch": 0.21793248945147678, + "grad_norm": 0.6358681321144104, + "learning_rate": 0.0013425931771033426, + "loss": 1.6371, + "step": 2066 + }, + { + "epoch": 0.2180379746835443, + "grad_norm": 0.7820482850074768, + "learning_rate": 0.0013424392577349152, + "loss": 1.5914, + "step": 2067 + }, + { + "epoch": 0.21814345991561182, + "grad_norm": 0.6971426010131836, + "learning_rate": 0.001342285271980715, + "loss": 1.6239, + "step": 2068 + }, + { + "epoch": 0.2182489451476793, + "grad_norm": 1.332732915878296, + "learning_rate": 0.0013421312198579963, + "loss": 1.6383, + "step": 2069 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.6765321493148804, + "learning_rate": 0.0013419771013840217, + "loss": 1.6308, + "step": 2070 + }, + { + "epoch": 0.21845991561181435, + "grad_norm": 1.3549394607543945, + "learning_rate": 0.0013418229165760613, + "loss": 1.6629, + "step": 2071 + }, + { + "epoch": 0.21856540084388185, + "grad_norm": 0.7305866479873657, + "learning_rate": 0.001341668665451392, + "loss": 1.6134, + "step": 2072 + }, + { + "epoch": 0.21867088607594937, + "grad_norm": 1.2995518445968628, + "learning_rate": 0.0013415143480272982, + "loss": 1.6363, + "step": 2073 + }, + { + "epoch": 0.2187763713080169, + "grad_norm": 0.7499101758003235, + "learning_rate": 0.0013413599643210723, + "loss": 1.6059, + "step": 2074 + }, + { + "epoch": 0.21888185654008438, + "grad_norm": 1.1767297983169556, + "learning_rate": 0.0013412055143500136, + "loss": 1.6042, + "step": 2075 + }, + { + "epoch": 0.2189873417721519, + "grad_norm": 0.764022171497345, + "learning_rate": 0.001341050998131429, + "loss": 1.5833, + "step": 2076 + }, + { + "epoch": 0.21909282700421942, + "grad_norm": 1.0882482528686523, + "learning_rate": 0.0013408964156826327, + "loss": 1.6409, + "step": 2077 + }, + { + "epoch": 0.2191983122362869, + "grad_norm": 0.8059864044189453, + "learning_rate": 0.0013407417670209467, + "loss": 1.6048, + "step": 2078 + }, + { + "epoch": 0.21930379746835443, + "grad_norm": 0.8385416865348816, + "learning_rate": 0.0013405870521636999, + "loss": 1.6434, + "step": 2079 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.8396673202514648, + "learning_rate": 0.001340432271128229, + "loss": 1.6475, + "step": 2080 + }, + { + "epoch": 0.21951476793248945, + "grad_norm": 0.655370831489563, + "learning_rate": 0.001340277423931878, + "loss": 1.6139, + "step": 2081 + }, + { + "epoch": 0.21962025316455697, + "grad_norm": 0.7482780814170837, + "learning_rate": 0.0013401225105919982, + "loss": 1.5978, + "step": 2082 + }, + { + "epoch": 0.21972573839662446, + "grad_norm": 0.867310106754303, + "learning_rate": 0.0013399675311259484, + "loss": 1.5904, + "step": 2083 + }, + { + "epoch": 0.21983122362869198, + "grad_norm": 0.6412976980209351, + "learning_rate": 0.0013398124855510951, + "loss": 1.6164, + "step": 2084 + }, + { + "epoch": 0.2199367088607595, + "grad_norm": 0.9410138130187988, + "learning_rate": 0.0013396573738848115, + "loss": 1.623, + "step": 2085 + }, + { + "epoch": 0.220042194092827, + "grad_norm": 0.9012517929077148, + "learning_rate": 0.001339502196144479, + "loss": 1.5833, + "step": 2086 + }, + { + "epoch": 0.2201476793248945, + "grad_norm": 0.6284943222999573, + "learning_rate": 0.0013393469523474858, + "loss": 1.6509, + "step": 2087 + }, + { + "epoch": 0.22025316455696203, + "grad_norm": 0.7080163955688477, + "learning_rate": 0.001339191642511228, + "loss": 1.5794, + "step": 2088 + }, + { + "epoch": 0.22035864978902953, + "grad_norm": 0.7069942951202393, + "learning_rate": 0.0013390362666531085, + "loss": 1.6164, + "step": 2089 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.816108226776123, + "learning_rate": 0.0013388808247905381, + "loss": 1.6397, + "step": 2090 + }, + { + "epoch": 0.22056962025316457, + "grad_norm": 0.7195611000061035, + "learning_rate": 0.0013387253169409351, + "loss": 1.6221, + "step": 2091 + }, + { + "epoch": 0.22067510548523206, + "grad_norm": 0.9123033285140991, + "learning_rate": 0.0013385697431217247, + "loss": 1.6286, + "step": 2092 + }, + { + "epoch": 0.22078059071729958, + "grad_norm": 1.0042873620986938, + "learning_rate": 0.0013384141033503394, + "loss": 1.6083, + "step": 2093 + }, + { + "epoch": 0.2208860759493671, + "grad_norm": 0.6885650753974915, + "learning_rate": 0.0013382583976442198, + "loss": 1.6084, + "step": 2094 + }, + { + "epoch": 0.2209915611814346, + "grad_norm": 1.0452864170074463, + "learning_rate": 0.0013381026260208136, + "loss": 1.5662, + "step": 2095 + }, + { + "epoch": 0.2210970464135021, + "grad_norm": 0.9657107591629028, + "learning_rate": 0.0013379467884975756, + "loss": 1.622, + "step": 2096 + }, + { + "epoch": 0.22120253164556963, + "grad_norm": 0.7564265727996826, + "learning_rate": 0.001337790885091968, + "loss": 1.621, + "step": 2097 + }, + { + "epoch": 0.22130801687763713, + "grad_norm": 1.4345651865005493, + "learning_rate": 0.0013376349158214609, + "loss": 1.6587, + "step": 2098 + }, + { + "epoch": 0.22141350210970465, + "grad_norm": 0.8301056027412415, + "learning_rate": 0.0013374788807035314, + "loss": 1.6761, + "step": 2099 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 1.4141011238098145, + "learning_rate": 0.0013373227797556634, + "loss": 1.6275, + "step": 2100 + }, + { + "epoch": 0.22162447257383966, + "grad_norm": 1.012839436531067, + "learning_rate": 0.0013371666129953497, + "loss": 1.6177, + "step": 2101 + }, + { + "epoch": 0.22172995780590718, + "grad_norm": 1.807425618171692, + "learning_rate": 0.0013370103804400887, + "loss": 1.6672, + "step": 2102 + }, + { + "epoch": 0.22183544303797467, + "grad_norm": 1.6038119792938232, + "learning_rate": 0.001336854082107388, + "loss": 1.6324, + "step": 2103 + }, + { + "epoch": 0.2219409282700422, + "grad_norm": 1.0820517539978027, + "learning_rate": 0.001336697718014761, + "loss": 1.621, + "step": 2104 + }, + { + "epoch": 0.2220464135021097, + "grad_norm": 1.2490415573120117, + "learning_rate": 0.001336541288179729, + "loss": 1.6507, + "step": 2105 + }, + { + "epoch": 0.2221518987341772, + "grad_norm": 0.8013483881950378, + "learning_rate": 0.0013363847926198208, + "loss": 1.6039, + "step": 2106 + }, + { + "epoch": 0.22225738396624473, + "grad_norm": 1.3598099946975708, + "learning_rate": 0.0013362282313525728, + "loss": 1.6146, + "step": 2107 + }, + { + "epoch": 0.22236286919831225, + "grad_norm": 0.9013681411743164, + "learning_rate": 0.001336071604395528, + "loss": 1.5976, + "step": 2108 + }, + { + "epoch": 0.22246835443037974, + "grad_norm": 0.9811347126960754, + "learning_rate": 0.0013359149117662377, + "loss": 1.5998, + "step": 2109 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 1.1399840116500854, + "learning_rate": 0.00133575815348226, + "loss": 1.6122, + "step": 2110 + }, + { + "epoch": 0.22267932489451478, + "grad_norm": 0.7315861582756042, + "learning_rate": 0.0013356013295611603, + "loss": 1.5745, + "step": 2111 + }, + { + "epoch": 0.22278481012658227, + "grad_norm": 0.9130045175552368, + "learning_rate": 0.0013354444400205114, + "loss": 1.6045, + "step": 2112 + }, + { + "epoch": 0.2228902953586498, + "grad_norm": 0.7418559789657593, + "learning_rate": 0.0013352874848778938, + "loss": 1.6422, + "step": 2113 + }, + { + "epoch": 0.2229957805907173, + "grad_norm": 0.9314473867416382, + "learning_rate": 0.0013351304641508951, + "loss": 1.6118, + "step": 2114 + }, + { + "epoch": 0.2231012658227848, + "grad_norm": 0.7861246466636658, + "learning_rate": 0.0013349733778571101, + "loss": 1.6098, + "step": 2115 + }, + { + "epoch": 0.22320675105485233, + "grad_norm": 0.8329129219055176, + "learning_rate": 0.0013348162260141412, + "loss": 1.6065, + "step": 2116 + }, + { + "epoch": 0.22331223628691982, + "grad_norm": 0.7339509129524231, + "learning_rate": 0.001334659008639598, + "loss": 1.6128, + "step": 2117 + }, + { + "epoch": 0.22341772151898734, + "grad_norm": 0.7740418314933777, + "learning_rate": 0.0013345017257510975, + "loss": 1.5975, + "step": 2118 + }, + { + "epoch": 0.22352320675105486, + "grad_norm": 0.7440289855003357, + "learning_rate": 0.001334344377366264, + "loss": 1.6205, + "step": 2119 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6515061855316162, + "learning_rate": 0.0013341869635027292, + "loss": 1.5983, + "step": 2120 + }, + { + "epoch": 0.22373417721518987, + "grad_norm": 0.6410554647445679, + "learning_rate": 0.0013340294841781323, + "loss": 1.6099, + "step": 2121 + }, + { + "epoch": 0.2238396624472574, + "grad_norm": 0.6885988116264343, + "learning_rate": 0.0013338719394101193, + "loss": 1.5836, + "step": 2122 + }, + { + "epoch": 0.22394514767932489, + "grad_norm": 0.6393587589263916, + "learning_rate": 0.001333714329216344, + "loss": 1.6351, + "step": 2123 + }, + { + "epoch": 0.2240506329113924, + "grad_norm": 0.6384758949279785, + "learning_rate": 0.0013335566536144675, + "loss": 1.6029, + "step": 2124 + }, + { + "epoch": 0.22415611814345993, + "grad_norm": 0.663129448890686, + "learning_rate": 0.0013333989126221581, + "loss": 1.5828, + "step": 2125 + }, + { + "epoch": 0.22426160337552742, + "grad_norm": 0.6738982200622559, + "learning_rate": 0.0013332411062570914, + "loss": 1.5956, + "step": 2126 + }, + { + "epoch": 0.22436708860759494, + "grad_norm": 0.7227692008018494, + "learning_rate": 0.0013330832345369505, + "loss": 1.6582, + "step": 2127 + }, + { + "epoch": 0.22447257383966246, + "grad_norm": 0.6926857829093933, + "learning_rate": 0.0013329252974794256, + "loss": 1.5902, + "step": 2128 + }, + { + "epoch": 0.22457805907172995, + "grad_norm": 0.6197987794876099, + "learning_rate": 0.0013327672951022145, + "loss": 1.6315, + "step": 2129 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.6836928725242615, + "learning_rate": 0.001332609227423022, + "loss": 1.6182, + "step": 2130 + }, + { + "epoch": 0.224789029535865, + "grad_norm": 0.6525380611419678, + "learning_rate": 0.0013324510944595605, + "loss": 1.5884, + "step": 2131 + }, + { + "epoch": 0.22489451476793249, + "grad_norm": 0.7143260836601257, + "learning_rate": 0.0013322928962295492, + "loss": 1.563, + "step": 2132 + }, + { + "epoch": 0.225, + "grad_norm": 0.8215411901473999, + "learning_rate": 0.0013321346327507158, + "loss": 1.5807, + "step": 2133 + }, + { + "epoch": 0.2251054852320675, + "grad_norm": 0.6380265951156616, + "learning_rate": 0.0013319763040407938, + "loss": 1.6403, + "step": 2134 + }, + { + "epoch": 0.22521097046413502, + "grad_norm": 0.7346876859664917, + "learning_rate": 0.0013318179101175246, + "loss": 1.6095, + "step": 2135 + }, + { + "epoch": 0.22531645569620254, + "grad_norm": 0.6627327799797058, + "learning_rate": 0.0013316594509986577, + "loss": 1.6391, + "step": 2136 + }, + { + "epoch": 0.22542194092827003, + "grad_norm": 0.6280340552330017, + "learning_rate": 0.0013315009267019487, + "loss": 1.6004, + "step": 2137 + }, + { + "epoch": 0.22552742616033755, + "grad_norm": 0.6449537873268127, + "learning_rate": 0.0013313423372451614, + "loss": 1.6081, + "step": 2138 + }, + { + "epoch": 0.22563291139240507, + "grad_norm": 0.6646926999092102, + "learning_rate": 0.0013311836826460665, + "loss": 1.6347, + "step": 2139 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6632667779922485, + "learning_rate": 0.0013310249629224417, + "loss": 1.626, + "step": 2140 + }, + { + "epoch": 0.22584388185654009, + "grad_norm": 0.6437148451805115, + "learning_rate": 0.0013308661780920728, + "loss": 1.6051, + "step": 2141 + }, + { + "epoch": 0.2259493670886076, + "grad_norm": 0.6943729519844055, + "learning_rate": 0.0013307073281727518, + "loss": 1.5813, + "step": 2142 + }, + { + "epoch": 0.2260548523206751, + "grad_norm": 0.6557376384735107, + "learning_rate": 0.0013305484131822792, + "loss": 1.6108, + "step": 2143 + }, + { + "epoch": 0.22616033755274262, + "grad_norm": 0.7631944417953491, + "learning_rate": 0.001330389433138462, + "loss": 1.6716, + "step": 2144 + }, + { + "epoch": 0.22626582278481014, + "grad_norm": 0.7280348539352417, + "learning_rate": 0.0013302303880591147, + "loss": 1.6322, + "step": 2145 + }, + { + "epoch": 0.22637130801687763, + "grad_norm": 0.63859623670578, + "learning_rate": 0.0013300712779620593, + "loss": 1.6089, + "step": 2146 + }, + { + "epoch": 0.22647679324894515, + "grad_norm": 0.7568277716636658, + "learning_rate": 0.0013299121028651246, + "loss": 1.6469, + "step": 2147 + }, + { + "epoch": 0.22658227848101264, + "grad_norm": 0.680420994758606, + "learning_rate": 0.001329752862786147, + "loss": 1.6411, + "step": 2148 + }, + { + "epoch": 0.22668776371308016, + "grad_norm": 0.7386844754219055, + "learning_rate": 0.0013295935577429703, + "loss": 1.599, + "step": 2149 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 1.0360543727874756, + "learning_rate": 0.0013294341877534454, + "loss": 1.5896, + "step": 2150 + }, + { + "epoch": 0.22689873417721518, + "grad_norm": 1.0124346017837524, + "learning_rate": 0.0013292747528354304, + "loss": 1.6619, + "step": 2151 + }, + { + "epoch": 0.2270042194092827, + "grad_norm": 0.6927493214607239, + "learning_rate": 0.0013291152530067907, + "loss": 1.6726, + "step": 2152 + }, + { + "epoch": 0.22710970464135022, + "grad_norm": 0.8176259994506836, + "learning_rate": 0.0013289556882853993, + "loss": 1.5997, + "step": 2153 + }, + { + "epoch": 0.2272151898734177, + "grad_norm": 0.7876347303390503, + "learning_rate": 0.0013287960586891362, + "loss": 1.6288, + "step": 2154 + }, + { + "epoch": 0.22732067510548523, + "grad_norm": 0.9981083273887634, + "learning_rate": 0.0013286363642358884, + "loss": 1.6393, + "step": 2155 + }, + { + "epoch": 0.22742616033755275, + "grad_norm": 0.7704718112945557, + "learning_rate": 0.0013284766049435504, + "loss": 1.6015, + "step": 2156 + }, + { + "epoch": 0.22753164556962024, + "grad_norm": 1.410914659500122, + "learning_rate": 0.0013283167808300247, + "loss": 1.6358, + "step": 2157 + }, + { + "epoch": 0.22763713080168776, + "grad_norm": 0.8242373466491699, + "learning_rate": 0.0013281568919132198, + "loss": 1.5939, + "step": 2158 + }, + { + "epoch": 0.22774261603375529, + "grad_norm": 1.5899289846420288, + "learning_rate": 0.0013279969382110524, + "loss": 1.6604, + "step": 2159 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.9916908740997314, + "learning_rate": 0.0013278369197414458, + "loss": 1.6283, + "step": 2160 + }, + { + "epoch": 0.2279535864978903, + "grad_norm": 1.5870153903961182, + "learning_rate": 0.0013276768365223306, + "loss": 1.6302, + "step": 2161 + }, + { + "epoch": 0.22805907172995782, + "grad_norm": 1.2683593034744263, + "learning_rate": 0.0013275166885716458, + "loss": 1.6031, + "step": 2162 + }, + { + "epoch": 0.2281645569620253, + "grad_norm": 1.3644040822982788, + "learning_rate": 0.0013273564759073361, + "loss": 1.6196, + "step": 2163 + }, + { + "epoch": 0.22827004219409283, + "grad_norm": 1.3423583507537842, + "learning_rate": 0.0013271961985473544, + "loss": 1.6073, + "step": 2164 + }, + { + "epoch": 0.22837552742616032, + "grad_norm": 1.116478681564331, + "learning_rate": 0.0013270358565096606, + "loss": 1.5804, + "step": 2165 + }, + { + "epoch": 0.22848101265822784, + "grad_norm": 1.0255082845687866, + "learning_rate": 0.0013268754498122215, + "loss": 1.6513, + "step": 2166 + }, + { + "epoch": 0.22858649789029536, + "grad_norm": 1.1073294878005981, + "learning_rate": 0.0013267149784730117, + "loss": 1.6566, + "step": 2167 + }, + { + "epoch": 0.22869198312236286, + "grad_norm": 0.7359756827354431, + "learning_rate": 0.0013265544425100128, + "loss": 1.6383, + "step": 2168 + }, + { + "epoch": 0.22879746835443038, + "grad_norm": 0.899054765701294, + "learning_rate": 0.0013263938419412137, + "loss": 1.5978, + "step": 2169 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.7098967432975769, + "learning_rate": 0.0013262331767846104, + "loss": 1.6069, + "step": 2170 + }, + { + "epoch": 0.2290084388185654, + "grad_norm": 1.160605788230896, + "learning_rate": 0.0013260724470582064, + "loss": 1.5935, + "step": 2171 + }, + { + "epoch": 0.2291139240506329, + "grad_norm": 0.6738909482955933, + "learning_rate": 0.001325911652780012, + "loss": 1.5787, + "step": 2172 + }, + { + "epoch": 0.22921940928270043, + "grad_norm": 0.943509042263031, + "learning_rate": 0.0013257507939680453, + "loss": 1.5736, + "step": 2173 + }, + { + "epoch": 0.22932489451476792, + "grad_norm": 0.8233562707901001, + "learning_rate": 0.0013255898706403312, + "loss": 1.5684, + "step": 2174 + }, + { + "epoch": 0.22943037974683544, + "grad_norm": 0.6222773194313049, + "learning_rate": 0.001325428882814902, + "loss": 1.5757, + "step": 2175 + }, + { + "epoch": 0.22953586497890296, + "grad_norm": 0.7222838997840881, + "learning_rate": 0.001325267830509797, + "loss": 1.6113, + "step": 2176 + }, + { + "epoch": 0.22964135021097046, + "grad_norm": 0.6930871605873108, + "learning_rate": 0.0013251067137430629, + "loss": 1.5708, + "step": 2177 + }, + { + "epoch": 0.22974683544303798, + "grad_norm": 0.8231094479560852, + "learning_rate": 0.001324945532532754, + "loss": 1.6084, + "step": 2178 + }, + { + "epoch": 0.2298523206751055, + "grad_norm": 0.8068329691886902, + "learning_rate": 0.0013247842868969312, + "loss": 1.6133, + "step": 2179 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.6636372208595276, + "learning_rate": 0.0013246229768536628, + "loss": 1.6052, + "step": 2180 + }, + { + "epoch": 0.2300632911392405, + "grad_norm": 0.800202488899231, + "learning_rate": 0.0013244616024210246, + "loss": 1.6287, + "step": 2181 + }, + { + "epoch": 0.230168776371308, + "grad_norm": 0.6798205971717834, + "learning_rate": 0.0013243001636170993, + "loss": 1.6057, + "step": 2182 + }, + { + "epoch": 0.23027426160337552, + "grad_norm": 0.7899278998374939, + "learning_rate": 0.0013241386604599772, + "loss": 1.6233, + "step": 2183 + }, + { + "epoch": 0.23037974683544304, + "grad_norm": 0.7776110768318176, + "learning_rate": 0.001323977092967755, + "loss": 1.6154, + "step": 2184 + }, + { + "epoch": 0.23048523206751054, + "grad_norm": 0.7521340250968933, + "learning_rate": 0.0013238154611585375, + "loss": 1.5965, + "step": 2185 + }, + { + "epoch": 0.23059071729957806, + "grad_norm": 0.8825452327728271, + "learning_rate": 0.0013236537650504361, + "loss": 1.632, + "step": 2186 + }, + { + "epoch": 0.23069620253164558, + "grad_norm": 0.6659138798713684, + "learning_rate": 0.00132349200466157, + "loss": 1.5824, + "step": 2187 + }, + { + "epoch": 0.23080168776371307, + "grad_norm": 0.7795585989952087, + "learning_rate": 0.0013233301800100652, + "loss": 1.6042, + "step": 2188 + }, + { + "epoch": 0.2309071729957806, + "grad_norm": 0.6484190225601196, + "learning_rate": 0.0013231682911140545, + "loss": 1.6029, + "step": 2189 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.9694568514823914, + "learning_rate": 0.001323006337991679, + "loss": 1.6235, + "step": 2190 + }, + { + "epoch": 0.2311181434599156, + "grad_norm": 0.9903103709220886, + "learning_rate": 0.0013228443206610861, + "loss": 1.6197, + "step": 2191 + }, + { + "epoch": 0.23122362869198312, + "grad_norm": 0.6882439255714417, + "learning_rate": 0.0013226822391404305, + "loss": 1.609, + "step": 2192 + }, + { + "epoch": 0.23132911392405064, + "grad_norm": 1.0045292377471924, + "learning_rate": 0.0013225200934478744, + "loss": 1.6566, + "step": 2193 + }, + { + "epoch": 0.23143459915611814, + "grad_norm": 0.7124696373939514, + "learning_rate": 0.0013223578836015868, + "loss": 1.6524, + "step": 2194 + }, + { + "epoch": 0.23154008438818566, + "grad_norm": 0.9085876941680908, + "learning_rate": 0.0013221956096197446, + "loss": 1.5841, + "step": 2195 + }, + { + "epoch": 0.23164556962025318, + "grad_norm": 0.8121490478515625, + "learning_rate": 0.001322033271520531, + "loss": 1.5848, + "step": 2196 + }, + { + "epoch": 0.23175105485232067, + "grad_norm": 0.7580808997154236, + "learning_rate": 0.001321870869322137, + "loss": 1.5994, + "step": 2197 + }, + { + "epoch": 0.2318565400843882, + "grad_norm": 0.8047360181808472, + "learning_rate": 0.0013217084030427604, + "loss": 1.5909, + "step": 2198 + }, + { + "epoch": 0.23196202531645568, + "grad_norm": 0.699310302734375, + "learning_rate": 0.0013215458727006064, + "loss": 1.5904, + "step": 2199 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.7520379424095154, + "learning_rate": 0.0013213832783138873, + "loss": 1.6442, + "step": 2200 + }, + { + "epoch": 0.23217299578059072, + "grad_norm": 0.6907774209976196, + "learning_rate": 0.0013212206199008226, + "loss": 1.6019, + "step": 2201 + }, + { + "epoch": 0.23227848101265822, + "grad_norm": 0.651979386806488, + "learning_rate": 0.0013210578974796393, + "loss": 1.6171, + "step": 2202 + }, + { + "epoch": 0.23238396624472574, + "grad_norm": 0.644012451171875, + "learning_rate": 0.001320895111068571, + "loss": 1.598, + "step": 2203 + }, + { + "epoch": 0.23248945147679326, + "grad_norm": 0.8137530088424683, + "learning_rate": 0.0013207322606858588, + "loss": 1.5819, + "step": 2204 + }, + { + "epoch": 0.23259493670886075, + "grad_norm": 0.7074142098426819, + "learning_rate": 0.001320569346349751, + "loss": 1.536, + "step": 2205 + }, + { + "epoch": 0.23270042194092827, + "grad_norm": 0.7096148729324341, + "learning_rate": 0.0013204063680785025, + "loss": 1.6548, + "step": 2206 + }, + { + "epoch": 0.2328059071729958, + "grad_norm": 0.6587634086608887, + "learning_rate": 0.0013202433258903761, + "loss": 1.5954, + "step": 2207 + }, + { + "epoch": 0.23291139240506328, + "grad_norm": 0.6917019486427307, + "learning_rate": 0.001320080219803642, + "loss": 1.6149, + "step": 2208 + }, + { + "epoch": 0.2330168776371308, + "grad_norm": 0.6440833210945129, + "learning_rate": 0.0013199170498365764, + "loss": 1.5827, + "step": 2209 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.8007323145866394, + "learning_rate": 0.0013197538160074633, + "loss": 1.604, + "step": 2210 + }, + { + "epoch": 0.23322784810126582, + "grad_norm": 0.7517332434654236, + "learning_rate": 0.0013195905183345943, + "loss": 1.6095, + "step": 2211 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 0.6906202435493469, + "learning_rate": 0.0013194271568362673, + "loss": 1.6105, + "step": 2212 + }, + { + "epoch": 0.23343881856540086, + "grad_norm": 0.9119969010353088, + "learning_rate": 0.001319263731530788, + "loss": 1.6062, + "step": 2213 + }, + { + "epoch": 0.23354430379746835, + "grad_norm": 0.8129238486289978, + "learning_rate": 0.0013191002424364693, + "loss": 1.6058, + "step": 2214 + }, + { + "epoch": 0.23364978902953587, + "grad_norm": 0.7113754749298096, + "learning_rate": 0.0013189366895716302, + "loss": 1.6462, + "step": 2215 + }, + { + "epoch": 0.23375527426160336, + "grad_norm": 0.7839686870574951, + "learning_rate": 0.0013187730729545982, + "loss": 1.6091, + "step": 2216 + }, + { + "epoch": 0.23386075949367088, + "grad_norm": 0.7204231023788452, + "learning_rate": 0.0013186093926037072, + "loss": 1.6038, + "step": 2217 + }, + { + "epoch": 0.2339662447257384, + "grad_norm": 0.6475699543952942, + "learning_rate": 0.0013184456485372986, + "loss": 1.6159, + "step": 2218 + }, + { + "epoch": 0.2340717299578059, + "grad_norm": 0.7907409071922302, + "learning_rate": 0.0013182818407737203, + "loss": 1.5885, + "step": 2219 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.677640974521637, + "learning_rate": 0.0013181179693313283, + "loss": 1.6348, + "step": 2220 + }, + { + "epoch": 0.23428270042194094, + "grad_norm": 0.733856737613678, + "learning_rate": 0.0013179540342284847, + "loss": 1.6072, + "step": 2221 + }, + { + "epoch": 0.23438818565400843, + "grad_norm": 0.7696864008903503, + "learning_rate": 0.0013177900354835598, + "loss": 1.5694, + "step": 2222 + }, + { + "epoch": 0.23449367088607595, + "grad_norm": 0.7969393730163574, + "learning_rate": 0.00131762597311493, + "loss": 1.5915, + "step": 2223 + }, + { + "epoch": 0.23459915611814347, + "grad_norm": 1.0925132036209106, + "learning_rate": 0.0013174618471409793, + "loss": 1.6364, + "step": 2224 + }, + { + "epoch": 0.23470464135021096, + "grad_norm": 0.6721614003181458, + "learning_rate": 0.0013172976575800991, + "loss": 1.6056, + "step": 2225 + }, + { + "epoch": 0.23481012658227848, + "grad_norm": 1.1989890336990356, + "learning_rate": 0.0013171334044506878, + "loss": 1.6379, + "step": 2226 + }, + { + "epoch": 0.234915611814346, + "grad_norm": 0.8742562532424927, + "learning_rate": 0.0013169690877711502, + "loss": 1.5904, + "step": 2227 + }, + { + "epoch": 0.2350210970464135, + "grad_norm": 0.6095036864280701, + "learning_rate": 0.0013168047075598993, + "loss": 1.6153, + "step": 2228 + }, + { + "epoch": 0.23512658227848102, + "grad_norm": 0.8016342520713806, + "learning_rate": 0.0013166402638353548, + "loss": 1.6206, + "step": 2229 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.7969964146614075, + "learning_rate": 0.0013164757566159428, + "loss": 1.5933, + "step": 2230 + }, + { + "epoch": 0.23533755274261603, + "grad_norm": 0.6577075123786926, + "learning_rate": 0.0013163111859200978, + "loss": 1.597, + "step": 2231 + }, + { + "epoch": 0.23544303797468355, + "grad_norm": 1.1408928632736206, + "learning_rate": 0.0013161465517662603, + "loss": 1.6114, + "step": 2232 + }, + { + "epoch": 0.23554852320675104, + "grad_norm": 0.660457968711853, + "learning_rate": 0.001315981854172879, + "loss": 1.6278, + "step": 2233 + }, + { + "epoch": 0.23565400843881856, + "grad_norm": 0.9223642349243164, + "learning_rate": 0.0013158170931584084, + "loss": 1.5938, + "step": 2234 + }, + { + "epoch": 0.23575949367088608, + "grad_norm": 0.7127107977867126, + "learning_rate": 0.0013156522687413114, + "loss": 1.5884, + "step": 2235 + }, + { + "epoch": 0.23586497890295358, + "grad_norm": 0.8722673058509827, + "learning_rate": 0.0013154873809400568, + "loss": 1.5937, + "step": 2236 + }, + { + "epoch": 0.2359704641350211, + "grad_norm": 0.8434324860572815, + "learning_rate": 0.0013153224297731215, + "loss": 1.5713, + "step": 2237 + }, + { + "epoch": 0.23607594936708862, + "grad_norm": 0.7605053186416626, + "learning_rate": 0.0013151574152589888, + "loss": 1.6282, + "step": 2238 + }, + { + "epoch": 0.2361814345991561, + "grad_norm": 0.8706852197647095, + "learning_rate": 0.00131499233741615, + "loss": 1.5948, + "step": 2239 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.7120388746261597, + "learning_rate": 0.001314827196263102, + "loss": 1.5982, + "step": 2240 + }, + { + "epoch": 0.23639240506329115, + "grad_norm": 0.8341058492660522, + "learning_rate": 0.0013146619918183507, + "loss": 1.5816, + "step": 2241 + }, + { + "epoch": 0.23649789029535864, + "grad_norm": 0.7109008431434631, + "learning_rate": 0.0013144967241004073, + "loss": 1.5726, + "step": 2242 + }, + { + "epoch": 0.23660337552742616, + "grad_norm": 0.7012761831283569, + "learning_rate": 0.001314331393127791, + "loss": 1.5756, + "step": 2243 + }, + { + "epoch": 0.23670886075949368, + "grad_norm": 0.7334566116333008, + "learning_rate": 0.0013141659989190282, + "loss": 1.5983, + "step": 2244 + }, + { + "epoch": 0.23681434599156118, + "grad_norm": 0.7659544944763184, + "learning_rate": 0.001314000541492652, + "loss": 1.6037, + "step": 2245 + }, + { + "epoch": 0.2369198312236287, + "grad_norm": 0.6481732726097107, + "learning_rate": 0.0013138350208672029, + "loss": 1.6464, + "step": 2246 + }, + { + "epoch": 0.2370253164556962, + "grad_norm": 0.8521044850349426, + "learning_rate": 0.001313669437061228, + "loss": 1.5695, + "step": 2247 + }, + { + "epoch": 0.2371308016877637, + "grad_norm": 0.7935889363288879, + "learning_rate": 0.0013135037900932822, + "loss": 1.6234, + "step": 2248 + }, + { + "epoch": 0.23723628691983123, + "grad_norm": 0.7556452751159668, + "learning_rate": 0.0013133380799819267, + "loss": 1.63, + "step": 2249 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 1.0755139589309692, + "learning_rate": 0.0013131723067457302, + "loss": 1.5971, + "step": 2250 + }, + { + "epoch": 0.23744725738396624, + "grad_norm": 0.8032243847846985, + "learning_rate": 0.0013130064704032684, + "loss": 1.5722, + "step": 2251 + }, + { + "epoch": 0.23755274261603376, + "grad_norm": 0.6993762254714966, + "learning_rate": 0.0013128405709731245, + "loss": 1.5989, + "step": 2252 + }, + { + "epoch": 0.23765822784810126, + "grad_norm": 0.8055968284606934, + "learning_rate": 0.001312674608473888, + "loss": 1.5968, + "step": 2253 + }, + { + "epoch": 0.23776371308016878, + "grad_norm": 0.6401166319847107, + "learning_rate": 0.0013125085829241558, + "loss": 1.5854, + "step": 2254 + }, + { + "epoch": 0.2378691983122363, + "grad_norm": 0.7716368436813354, + "learning_rate": 0.0013123424943425317, + "loss": 1.5898, + "step": 2255 + }, + { + "epoch": 0.2379746835443038, + "grad_norm": 0.6552234888076782, + "learning_rate": 0.0013121763427476273, + "loss": 1.5768, + "step": 2256 + }, + { + "epoch": 0.2380801687763713, + "grad_norm": 1.013262391090393, + "learning_rate": 0.0013120101281580605, + "loss": 1.5656, + "step": 2257 + }, + { + "epoch": 0.23818565400843883, + "grad_norm": 1.0302777290344238, + "learning_rate": 0.0013118438505924563, + "loss": 1.5992, + "step": 2258 + }, + { + "epoch": 0.23829113924050632, + "grad_norm": 0.7516588568687439, + "learning_rate": 0.001311677510069447, + "loss": 1.6208, + "step": 2259 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 1.0749679803848267, + "learning_rate": 0.0013115111066076721, + "loss": 1.6144, + "step": 2260 + }, + { + "epoch": 0.23850210970464136, + "grad_norm": 0.6713942289352417, + "learning_rate": 0.0013113446402257774, + "loss": 1.5974, + "step": 2261 + }, + { + "epoch": 0.23860759493670886, + "grad_norm": 1.241042137145996, + "learning_rate": 0.001311178110942417, + "loss": 1.598, + "step": 2262 + }, + { + "epoch": 0.23871308016877638, + "grad_norm": 0.7135102152824402, + "learning_rate": 0.0013110115187762506, + "loss": 1.6143, + "step": 2263 + }, + { + "epoch": 0.23881856540084387, + "grad_norm": 0.8381176590919495, + "learning_rate": 0.0013108448637459465, + "loss": 1.6578, + "step": 2264 + }, + { + "epoch": 0.2389240506329114, + "grad_norm": 0.850952684879303, + "learning_rate": 0.0013106781458701784, + "loss": 1.6478, + "step": 2265 + }, + { + "epoch": 0.2390295358649789, + "grad_norm": 0.6933821439743042, + "learning_rate": 0.0013105113651676287, + "loss": 1.6127, + "step": 2266 + }, + { + "epoch": 0.2391350210970464, + "grad_norm": 0.7171997427940369, + "learning_rate": 0.001310344521656985, + "loss": 1.6493, + "step": 2267 + }, + { + "epoch": 0.23924050632911392, + "grad_norm": 0.6451805830001831, + "learning_rate": 0.001310177615356944, + "loss": 1.6166, + "step": 2268 + }, + { + "epoch": 0.23934599156118144, + "grad_norm": 0.7677789330482483, + "learning_rate": 0.0013100106462862076, + "loss": 1.6111, + "step": 2269 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.7745285034179688, + "learning_rate": 0.0013098436144634862, + "loss": 1.6224, + "step": 2270 + }, + { + "epoch": 0.23955696202531646, + "grad_norm": 0.7059374451637268, + "learning_rate": 0.0013096765199074958, + "loss": 1.6177, + "step": 2271 + }, + { + "epoch": 0.23966244725738398, + "grad_norm": 0.8317598700523376, + "learning_rate": 0.0013095093626369608, + "loss": 1.5762, + "step": 2272 + }, + { + "epoch": 0.23976793248945147, + "grad_norm": 0.6298731565475464, + "learning_rate": 0.0013093421426706117, + "loss": 1.5774, + "step": 2273 + }, + { + "epoch": 0.239873417721519, + "grad_norm": 1.0127137899398804, + "learning_rate": 0.0013091748600271862, + "loss": 1.6234, + "step": 2274 + }, + { + "epoch": 0.2399789029535865, + "grad_norm": 1.1078979969024658, + "learning_rate": 0.0013090075147254294, + "loss": 1.5944, + "step": 2275 + }, + { + "epoch": 0.240084388185654, + "grad_norm": 0.7833831310272217, + "learning_rate": 0.0013088401067840932, + "loss": 1.6247, + "step": 2276 + }, + { + "epoch": 0.24018987341772152, + "grad_norm": 1.3735692501068115, + "learning_rate": 0.0013086726362219363, + "loss": 1.6223, + "step": 2277 + }, + { + "epoch": 0.24029535864978904, + "grad_norm": 0.9315179586410522, + "learning_rate": 0.0013085051030577246, + "loss": 1.6113, + "step": 2278 + }, + { + "epoch": 0.24040084388185654, + "grad_norm": 1.9394822120666504, + "learning_rate": 0.0013083375073102315, + "loss": 1.637, + "step": 2279 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 1.5022156238555908, + "learning_rate": 0.0013081698489982364, + "loss": 1.629, + "step": 2280 + }, + { + "epoch": 0.24061181434599155, + "grad_norm": 1.430822491645813, + "learning_rate": 0.0013080021281405264, + "loss": 1.611, + "step": 2281 + }, + { + "epoch": 0.24071729957805907, + "grad_norm": 1.2627028226852417, + "learning_rate": 0.0013078343447558954, + "loss": 1.58, + "step": 2282 + }, + { + "epoch": 0.2408227848101266, + "grad_norm": 1.0306642055511475, + "learning_rate": 0.0013076664988631447, + "loss": 1.5977, + "step": 2283 + }, + { + "epoch": 0.24092827004219408, + "grad_norm": 1.1619000434875488, + "learning_rate": 0.001307498590481082, + "loss": 1.5621, + "step": 2284 + }, + { + "epoch": 0.2410337552742616, + "grad_norm": 1.2066630125045776, + "learning_rate": 0.001307330619628522, + "loss": 1.5945, + "step": 2285 + }, + { + "epoch": 0.24113924050632912, + "grad_norm": 1.0714571475982666, + "learning_rate": 0.0013071625863242875, + "loss": 1.6342, + "step": 2286 + }, + { + "epoch": 0.24124472573839661, + "grad_norm": 1.1005979776382446, + "learning_rate": 0.0013069944905872064, + "loss": 1.6028, + "step": 2287 + }, + { + "epoch": 0.24135021097046414, + "grad_norm": 1.2252156734466553, + "learning_rate": 0.0013068263324361156, + "loss": 1.6063, + "step": 2288 + }, + { + "epoch": 0.24145569620253166, + "grad_norm": 1.0291119813919067, + "learning_rate": 0.0013066581118898574, + "loss": 1.592, + "step": 2289 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 1.019702672958374, + "learning_rate": 0.001306489828967282, + "loss": 1.598, + "step": 2290 + }, + { + "epoch": 0.24166666666666667, + "grad_norm": 1.0853697061538696, + "learning_rate": 0.0013063214836872465, + "loss": 1.6021, + "step": 2291 + }, + { + "epoch": 0.2417721518987342, + "grad_norm": 0.8971830010414124, + "learning_rate": 0.0013061530760686145, + "loss": 1.5432, + "step": 2292 + }, + { + "epoch": 0.24187763713080168, + "grad_norm": 1.037650227546692, + "learning_rate": 0.0013059846061302574, + "loss": 1.588, + "step": 2293 + }, + { + "epoch": 0.2419831223628692, + "grad_norm": 0.8351383805274963, + "learning_rate": 0.0013058160738910526, + "loss": 1.5802, + "step": 2294 + }, + { + "epoch": 0.24208860759493672, + "grad_norm": 0.9594129920005798, + "learning_rate": 0.0013056474793698852, + "loss": 1.5898, + "step": 2295 + }, + { + "epoch": 0.24219409282700421, + "grad_norm": 0.7380295395851135, + "learning_rate": 0.001305478822585647, + "loss": 1.5903, + "step": 2296 + }, + { + "epoch": 0.24229957805907174, + "grad_norm": 0.9035357236862183, + "learning_rate": 0.001305310103557237, + "loss": 1.6347, + "step": 2297 + }, + { + "epoch": 0.24240506329113923, + "grad_norm": 0.77594393491745, + "learning_rate": 0.0013051413223035607, + "loss": 1.5782, + "step": 2298 + }, + { + "epoch": 0.24251054852320675, + "grad_norm": 0.8340296745300293, + "learning_rate": 0.0013049724788435312, + "loss": 1.5694, + "step": 2299 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.7776919603347778, + "learning_rate": 0.0013048035731960679, + "loss": 1.5579, + "step": 2300 + }, + { + "epoch": 0.24272151898734176, + "grad_norm": 0.6951138973236084, + "learning_rate": 0.0013046346053800979, + "loss": 1.6027, + "step": 2301 + }, + { + "epoch": 0.24282700421940928, + "grad_norm": 0.7537091374397278, + "learning_rate": 0.0013044655754145546, + "loss": 1.5689, + "step": 2302 + }, + { + "epoch": 0.2429324894514768, + "grad_norm": 0.7213175892829895, + "learning_rate": 0.001304296483318379, + "loss": 1.609, + "step": 2303 + }, + { + "epoch": 0.2430379746835443, + "grad_norm": 0.6947235465049744, + "learning_rate": 0.0013041273291105181, + "loss": 1.5968, + "step": 2304 + }, + { + "epoch": 0.24314345991561181, + "grad_norm": 0.8264784216880798, + "learning_rate": 0.0013039581128099272, + "loss": 1.5846, + "step": 2305 + }, + { + "epoch": 0.24324894514767934, + "grad_norm": 0.7955607175827026, + "learning_rate": 0.0013037888344355673, + "loss": 1.6099, + "step": 2306 + }, + { + "epoch": 0.24335443037974683, + "grad_norm": 0.7793200016021729, + "learning_rate": 0.001303619494006407, + "loss": 1.5718, + "step": 2307 + }, + { + "epoch": 0.24345991561181435, + "grad_norm": 0.6773825883865356, + "learning_rate": 0.0013034500915414218, + "loss": 1.5529, + "step": 2308 + }, + { + "epoch": 0.24356540084388187, + "grad_norm": 0.7179785370826721, + "learning_rate": 0.0013032806270595941, + "loss": 1.6019, + "step": 2309 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.6989943981170654, + "learning_rate": 0.0013031111005799133, + "loss": 1.6068, + "step": 2310 + }, + { + "epoch": 0.24377637130801688, + "grad_norm": 0.6744117140769958, + "learning_rate": 0.0013029415121213756, + "loss": 1.5965, + "step": 2311 + }, + { + "epoch": 0.2438818565400844, + "grad_norm": 0.7145010828971863, + "learning_rate": 0.0013027718617029842, + "loss": 1.6262, + "step": 2312 + }, + { + "epoch": 0.2439873417721519, + "grad_norm": 0.7344281673431396, + "learning_rate": 0.0013026021493437495, + "loss": 1.6498, + "step": 2313 + }, + { + "epoch": 0.24409282700421941, + "grad_norm": 0.7538691163063049, + "learning_rate": 0.0013024323750626882, + "loss": 1.6008, + "step": 2314 + }, + { + "epoch": 0.2441983122362869, + "grad_norm": 0.7729992270469666, + "learning_rate": 0.0013022625388788248, + "loss": 1.598, + "step": 2315 + }, + { + "epoch": 0.24430379746835443, + "grad_norm": 0.6620161533355713, + "learning_rate": 0.0013020926408111903, + "loss": 1.6296, + "step": 2316 + }, + { + "epoch": 0.24440928270042195, + "grad_norm": 0.6783233880996704, + "learning_rate": 0.001301922680878822, + "loss": 1.5452, + "step": 2317 + }, + { + "epoch": 0.24451476793248944, + "grad_norm": 0.7110063433647156, + "learning_rate": 0.001301752659100765, + "loss": 1.594, + "step": 2318 + }, + { + "epoch": 0.24462025316455696, + "grad_norm": 0.7639060616493225, + "learning_rate": 0.001301582575496072, + "loss": 1.6096, + "step": 2319 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.6643346548080444, + "learning_rate": 0.0013014124300838004, + "loss": 1.5567, + "step": 2320 + }, + { + "epoch": 0.24483122362869197, + "grad_norm": 0.8011399507522583, + "learning_rate": 0.0013012422228830165, + "loss": 1.5656, + "step": 2321 + }, + { + "epoch": 0.2449367088607595, + "grad_norm": 0.6922979354858398, + "learning_rate": 0.0013010719539127927, + "loss": 1.6124, + "step": 2322 + }, + { + "epoch": 0.24504219409282701, + "grad_norm": 0.7193114757537842, + "learning_rate": 0.001300901623192209, + "loss": 1.5972, + "step": 2323 + }, + { + "epoch": 0.2451476793248945, + "grad_norm": 0.7262327671051025, + "learning_rate": 0.0013007312307403507, + "loss": 1.6037, + "step": 2324 + }, + { + "epoch": 0.24525316455696203, + "grad_norm": 0.6740649938583374, + "learning_rate": 0.0013005607765763122, + "loss": 1.5723, + "step": 2325 + }, + { + "epoch": 0.24535864978902955, + "grad_norm": 0.7274128198623657, + "learning_rate": 0.0013003902607191934, + "loss": 1.6144, + "step": 2326 + }, + { + "epoch": 0.24546413502109704, + "grad_norm": 0.7522609829902649, + "learning_rate": 0.0013002196831881014, + "loss": 1.5861, + "step": 2327 + }, + { + "epoch": 0.24556962025316456, + "grad_norm": 0.7042447328567505, + "learning_rate": 0.0013000490440021502, + "loss": 1.5805, + "step": 2328 + }, + { + "epoch": 0.24567510548523205, + "grad_norm": 0.6699759364128113, + "learning_rate": 0.0012998783431804608, + "loss": 1.5809, + "step": 2329 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.7195650339126587, + "learning_rate": 0.0012997075807421612, + "loss": 1.5691, + "step": 2330 + }, + { + "epoch": 0.2458860759493671, + "grad_norm": 0.6367641091346741, + "learning_rate": 0.0012995367567063861, + "loss": 1.5419, + "step": 2331 + }, + { + "epoch": 0.2459915611814346, + "grad_norm": 0.6669050455093384, + "learning_rate": 0.001299365871092277, + "loss": 1.621, + "step": 2332 + }, + { + "epoch": 0.2460970464135021, + "grad_norm": 0.6426677703857422, + "learning_rate": 0.0012991949239189826, + "loss": 1.5941, + "step": 2333 + }, + { + "epoch": 0.24620253164556963, + "grad_norm": 0.6628992557525635, + "learning_rate": 0.0012990239152056587, + "loss": 1.605, + "step": 2334 + }, + { + "epoch": 0.24630801687763712, + "grad_norm": 0.747962474822998, + "learning_rate": 0.0012988528449714672, + "loss": 1.6077, + "step": 2335 + }, + { + "epoch": 0.24641350210970464, + "grad_norm": 0.7093303203582764, + "learning_rate": 0.001298681713235578, + "loss": 1.5905, + "step": 2336 + }, + { + "epoch": 0.24651898734177216, + "grad_norm": 0.6760169863700867, + "learning_rate": 0.0012985105200171664, + "loss": 1.5256, + "step": 2337 + }, + { + "epoch": 0.24662447257383965, + "grad_norm": 0.7909282445907593, + "learning_rate": 0.001298339265335416, + "loss": 1.5676, + "step": 2338 + }, + { + "epoch": 0.24672995780590717, + "grad_norm": 0.6299623250961304, + "learning_rate": 0.0012981679492095166, + "loss": 1.5773, + "step": 2339 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.9317965507507324, + "learning_rate": 0.0012979965716586653, + "loss": 1.5876, + "step": 2340 + }, + { + "epoch": 0.2469409282700422, + "grad_norm": 0.8253819346427917, + "learning_rate": 0.0012978251327020655, + "loss": 1.6154, + "step": 2341 + }, + { + "epoch": 0.2470464135021097, + "grad_norm": 0.7974177002906799, + "learning_rate": 0.0012976536323589278, + "loss": 1.583, + "step": 2342 + }, + { + "epoch": 0.24715189873417723, + "grad_norm": 1.040910243988037, + "learning_rate": 0.0012974820706484697, + "loss": 1.5816, + "step": 2343 + }, + { + "epoch": 0.24725738396624472, + "grad_norm": 0.8095408082008362, + "learning_rate": 0.001297310447589916, + "loss": 1.6067, + "step": 2344 + }, + { + "epoch": 0.24736286919831224, + "grad_norm": 0.8841133713722229, + "learning_rate": 0.0012971387632024968, + "loss": 1.5782, + "step": 2345 + }, + { + "epoch": 0.24746835443037973, + "grad_norm": 1.136755108833313, + "learning_rate": 0.0012969670175054515, + "loss": 1.6012, + "step": 2346 + }, + { + "epoch": 0.24757383966244725, + "grad_norm": 0.7177879810333252, + "learning_rate": 0.0012967952105180243, + "loss": 1.5597, + "step": 2347 + }, + { + "epoch": 0.24767932489451477, + "grad_norm": 0.946736752986908, + "learning_rate": 0.001296623342259467, + "loss": 1.5565, + "step": 2348 + }, + { + "epoch": 0.24778481012658227, + "grad_norm": 1.2007994651794434, + "learning_rate": 0.0012964514127490388, + "loss": 1.5688, + "step": 2349 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.8283360600471497, + "learning_rate": 0.0012962794220060048, + "loss": 1.5934, + "step": 2350 + }, + { + "epoch": 0.2479957805907173, + "grad_norm": 1.2093982696533203, + "learning_rate": 0.0012961073700496378, + "loss": 1.598, + "step": 2351 + }, + { + "epoch": 0.2481012658227848, + "grad_norm": 0.7973083257675171, + "learning_rate": 0.0012959352568992163, + "loss": 1.6125, + "step": 2352 + }, + { + "epoch": 0.24820675105485232, + "grad_norm": 1.3112307786941528, + "learning_rate": 0.0012957630825740274, + "loss": 1.5815, + "step": 2353 + }, + { + "epoch": 0.24831223628691984, + "grad_norm": 0.9479383826255798, + "learning_rate": 0.0012955908470933637, + "loss": 1.6063, + "step": 2354 + }, + { + "epoch": 0.24841772151898733, + "grad_norm": 1.9709174633026123, + "learning_rate": 0.0012954185504765248, + "loss": 1.6293, + "step": 2355 + }, + { + "epoch": 0.24852320675105485, + "grad_norm": 1.5675593614578247, + "learning_rate": 0.0012952461927428177, + "loss": 1.6111, + "step": 2356 + }, + { + "epoch": 0.24862869198312237, + "grad_norm": 1.0304241180419922, + "learning_rate": 0.001295073773911556, + "loss": 1.6103, + "step": 2357 + }, + { + "epoch": 0.24873417721518987, + "grad_norm": 1.311730980873108, + "learning_rate": 0.0012949012940020599, + "loss": 1.6219, + "step": 2358 + }, + { + "epoch": 0.2488396624472574, + "grad_norm": 1.023891806602478, + "learning_rate": 0.0012947287530336565, + "loss": 1.5826, + "step": 2359 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.9627659916877747, + "learning_rate": 0.0012945561510256801, + "loss": 1.5775, + "step": 2360 + }, + { + "epoch": 0.2490506329113924, + "grad_norm": 1.0899226665496826, + "learning_rate": 0.0012943834879974717, + "loss": 1.5749, + "step": 2361 + }, + { + "epoch": 0.24915611814345992, + "grad_norm": 0.8406262397766113, + "learning_rate": 0.001294210763968379, + "loss": 1.5916, + "step": 2362 + }, + { + "epoch": 0.2492616033755274, + "grad_norm": 0.8782409429550171, + "learning_rate": 0.0012940379789577565, + "loss": 1.6009, + "step": 2363 + }, + { + "epoch": 0.24936708860759493, + "grad_norm": 0.7640487551689148, + "learning_rate": 0.0012938651329849654, + "loss": 1.5722, + "step": 2364 + }, + { + "epoch": 0.24947257383966245, + "grad_norm": 0.689403772354126, + "learning_rate": 0.0012936922260693743, + "loss": 1.5605, + "step": 2365 + }, + { + "epoch": 0.24957805907172995, + "grad_norm": 0.8647041320800781, + "learning_rate": 0.0012935192582303582, + "loss": 1.578, + "step": 2366 + }, + { + "epoch": 0.24968354430379747, + "grad_norm": 0.7135575413703918, + "learning_rate": 0.001293346229487299, + "loss": 1.6057, + "step": 2367 + }, + { + "epoch": 0.249789029535865, + "grad_norm": 0.884378969669342, + "learning_rate": 0.0012931731398595854, + "loss": 1.5974, + "step": 2368 + }, + { + "epoch": 0.24989451476793248, + "grad_norm": 0.811915934085846, + "learning_rate": 0.001292999989366613, + "loss": 1.5882, + "step": 2369 + }, + { + "epoch": 0.25, + "grad_norm": 0.7537535429000854, + "learning_rate": 0.001292826778027784, + "loss": 1.597, + "step": 2370 + }, + { + "epoch": 0.2501054852320675, + "grad_norm": 0.7403700351715088, + "learning_rate": 0.001292653505862508, + "loss": 1.5862, + "step": 2371 + }, + { + "epoch": 0.25021097046413504, + "grad_norm": 0.7199975252151489, + "learning_rate": 0.0012924801728902006, + "loss": 1.5847, + "step": 2372 + }, + { + "epoch": 0.25031645569620253, + "grad_norm": 0.7530269622802734, + "learning_rate": 0.0012923067791302848, + "loss": 1.5873, + "step": 2373 + }, + { + "epoch": 0.25042194092827, + "grad_norm": 0.7499523758888245, + "learning_rate": 0.0012921333246021904, + "loss": 1.586, + "step": 2374 + }, + { + "epoch": 0.2505274261603376, + "grad_norm": 1.1272975206375122, + "learning_rate": 0.0012919598093253533, + "loss": 1.5821, + "step": 2375 + }, + { + "epoch": 0.25063291139240507, + "grad_norm": 0.6990654468536377, + "learning_rate": 0.0012917862333192173, + "loss": 1.6145, + "step": 2376 + }, + { + "epoch": 0.25073839662447256, + "grad_norm": 1.0460474491119385, + "learning_rate": 0.0012916125966032322, + "loss": 1.5649, + "step": 2377 + }, + { + "epoch": 0.2508438818565401, + "grad_norm": 0.6861876249313354, + "learning_rate": 0.001291438899196855, + "loss": 1.5803, + "step": 2378 + }, + { + "epoch": 0.2509493670886076, + "grad_norm": 0.8852860331535339, + "learning_rate": 0.0012912651411195494, + "loss": 1.6217, + "step": 2379 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.7079659104347229, + "learning_rate": 0.0012910913223907856, + "loss": 1.5791, + "step": 2380 + }, + { + "epoch": 0.25116033755274264, + "grad_norm": 0.7527608275413513, + "learning_rate": 0.0012909174430300412, + "loss": 1.6009, + "step": 2381 + }, + { + "epoch": 0.25126582278481013, + "grad_norm": 0.7640373706817627, + "learning_rate": 0.0012907435030567996, + "loss": 1.5831, + "step": 2382 + }, + { + "epoch": 0.2513713080168776, + "grad_norm": 0.9619731307029724, + "learning_rate": 0.0012905695024905525, + "loss": 1.5956, + "step": 2383 + }, + { + "epoch": 0.2514767932489452, + "grad_norm": 0.6535916924476624, + "learning_rate": 0.0012903954413507968, + "loss": 1.5493, + "step": 2384 + }, + { + "epoch": 0.25158227848101267, + "grad_norm": 0.8747962713241577, + "learning_rate": 0.0012902213196570376, + "loss": 1.5983, + "step": 2385 + }, + { + "epoch": 0.25168776371308016, + "grad_norm": 0.6686516404151917, + "learning_rate": 0.0012900471374287855, + "loss": 1.5914, + "step": 2386 + }, + { + "epoch": 0.25179324894514765, + "grad_norm": 0.7026684284210205, + "learning_rate": 0.0012898728946855588, + "loss": 1.6609, + "step": 2387 + }, + { + "epoch": 0.2518987341772152, + "grad_norm": 0.6588168740272522, + "learning_rate": 0.001289698591446882, + "loss": 1.5542, + "step": 2388 + }, + { + "epoch": 0.2520042194092827, + "grad_norm": 0.7010842561721802, + "learning_rate": 0.0012895242277322872, + "loss": 1.5489, + "step": 2389 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.696877658367157, + "learning_rate": 0.0012893498035613123, + "loss": 1.5892, + "step": 2390 + }, + { + "epoch": 0.25221518987341773, + "grad_norm": 1.0618510246276855, + "learning_rate": 0.0012891753189535023, + "loss": 1.5705, + "step": 2391 + }, + { + "epoch": 0.2523206751054852, + "grad_norm": 0.9092926383018494, + "learning_rate": 0.0012890007739284092, + "loss": 1.5618, + "step": 2392 + }, + { + "epoch": 0.2524261603375527, + "grad_norm": 0.6635982394218445, + "learning_rate": 0.001288826168505592, + "loss": 1.5679, + "step": 2393 + }, + { + "epoch": 0.25253164556962027, + "grad_norm": 0.7727280855178833, + "learning_rate": 0.0012886515027046156, + "loss": 1.576, + "step": 2394 + }, + { + "epoch": 0.25263713080168776, + "grad_norm": 0.6806418895721436, + "learning_rate": 0.0012884767765450524, + "loss": 1.5361, + "step": 2395 + }, + { + "epoch": 0.25274261603375525, + "grad_norm": 0.6961405873298645, + "learning_rate": 0.0012883019900464814, + "loss": 1.6119, + "step": 2396 + }, + { + "epoch": 0.2528481012658228, + "grad_norm": 0.7085536122322083, + "learning_rate": 0.001288127143228488, + "loss": 1.6404, + "step": 2397 + }, + { + "epoch": 0.2529535864978903, + "grad_norm": 0.6383935809135437, + "learning_rate": 0.0012879522361106646, + "loss": 1.6145, + "step": 2398 + }, + { + "epoch": 0.2530590717299578, + "grad_norm": 0.654321551322937, + "learning_rate": 0.0012877772687126111, + "loss": 1.5812, + "step": 2399 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6669245958328247, + "learning_rate": 0.001287602241053933, + "loss": 1.6061, + "step": 2400 + }, + { + "epoch": 0.2532700421940928, + "grad_norm": 0.6180142164230347, + "learning_rate": 0.001287427153154243, + "loss": 1.5819, + "step": 2401 + }, + { + "epoch": 0.2533755274261603, + "grad_norm": 0.6575337648391724, + "learning_rate": 0.0012872520050331608, + "loss": 1.594, + "step": 2402 + }, + { + "epoch": 0.25348101265822787, + "grad_norm": 0.6652092337608337, + "learning_rate": 0.0012870767967103122, + "loss": 1.6012, + "step": 2403 + }, + { + "epoch": 0.25358649789029536, + "grad_norm": 0.6595035791397095, + "learning_rate": 0.0012869015282053304, + "loss": 1.5423, + "step": 2404 + }, + { + "epoch": 0.25369198312236285, + "grad_norm": 0.7384206056594849, + "learning_rate": 0.0012867261995378554, + "loss": 1.6189, + "step": 2405 + }, + { + "epoch": 0.2537974683544304, + "grad_norm": 0.6338590979576111, + "learning_rate": 0.001286550810727533, + "loss": 1.5806, + "step": 2406 + }, + { + "epoch": 0.2539029535864979, + "grad_norm": 0.7052680253982544, + "learning_rate": 0.0012863753617940172, + "loss": 1.6077, + "step": 2407 + }, + { + "epoch": 0.2540084388185654, + "grad_norm": 0.641475260257721, + "learning_rate": 0.001286199852756967, + "loss": 1.5769, + "step": 2408 + }, + { + "epoch": 0.25411392405063293, + "grad_norm": 1.025480031967163, + "learning_rate": 0.0012860242836360502, + "loss": 1.5608, + "step": 2409 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.881987988948822, + "learning_rate": 0.0012858486544509392, + "loss": 1.6075, + "step": 2410 + }, + { + "epoch": 0.2543248945147679, + "grad_norm": 0.6987824440002441, + "learning_rate": 0.0012856729652213144, + "loss": 1.625, + "step": 2411 + }, + { + "epoch": 0.25443037974683547, + "grad_norm": 0.8570953011512756, + "learning_rate": 0.001285497215966863, + "loss": 1.5847, + "step": 2412 + }, + { + "epoch": 0.25453586497890296, + "grad_norm": 0.7592807412147522, + "learning_rate": 0.0012853214067072782, + "loss": 1.5927, + "step": 2413 + }, + { + "epoch": 0.25464135021097045, + "grad_norm": 0.633867621421814, + "learning_rate": 0.0012851455374622604, + "loss": 1.5572, + "step": 2414 + }, + { + "epoch": 0.254746835443038, + "grad_norm": 0.7331764101982117, + "learning_rate": 0.0012849696082515166, + "loss": 1.5764, + "step": 2415 + }, + { + "epoch": 0.2548523206751055, + "grad_norm": 0.8396545052528381, + "learning_rate": 0.0012847936190947605, + "loss": 1.5843, + "step": 2416 + }, + { + "epoch": 0.254957805907173, + "grad_norm": 0.7395906448364258, + "learning_rate": 0.001284617570011713, + "loss": 1.5518, + "step": 2417 + }, + { + "epoch": 0.25506329113924053, + "grad_norm": 0.6698686480522156, + "learning_rate": 0.0012844414610221006, + "loss": 1.5897, + "step": 2418 + }, + { + "epoch": 0.255168776371308, + "grad_norm": 0.6550968885421753, + "learning_rate": 0.0012842652921456576, + "loss": 1.5616, + "step": 2419 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.7067723274230957, + "learning_rate": 0.0012840890634021249, + "loss": 1.6072, + "step": 2420 + }, + { + "epoch": 0.255379746835443, + "grad_norm": 0.703155517578125, + "learning_rate": 0.001283912774811249, + "loss": 1.5556, + "step": 2421 + }, + { + "epoch": 0.25548523206751056, + "grad_norm": 0.7451987266540527, + "learning_rate": 0.0012837364263927843, + "loss": 1.6579, + "step": 2422 + }, + { + "epoch": 0.25559071729957805, + "grad_norm": 0.8642445802688599, + "learning_rate": 0.001283560018166492, + "loss": 1.6221, + "step": 2423 + }, + { + "epoch": 0.25569620253164554, + "grad_norm": 0.6488139033317566, + "learning_rate": 0.0012833835501521386, + "loss": 1.565, + "step": 2424 + }, + { + "epoch": 0.2558016877637131, + "grad_norm": 0.9385361075401306, + "learning_rate": 0.0012832070223694992, + "loss": 1.5529, + "step": 2425 + }, + { + "epoch": 0.2559071729957806, + "grad_norm": 0.8104203939437866, + "learning_rate": 0.0012830304348383538, + "loss": 1.5795, + "step": 2426 + }, + { + "epoch": 0.2560126582278481, + "grad_norm": 0.8405123353004456, + "learning_rate": 0.0012828537875784905, + "loss": 1.6138, + "step": 2427 + }, + { + "epoch": 0.2561181434599156, + "grad_norm": 0.861587643623352, + "learning_rate": 0.001282677080609703, + "loss": 1.5541, + "step": 2428 + }, + { + "epoch": 0.2562236286919831, + "grad_norm": 0.7726901769638062, + "learning_rate": 0.0012825003139517925, + "loss": 1.5562, + "step": 2429 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.7644317150115967, + "learning_rate": 0.0012823234876245667, + "loss": 1.5641, + "step": 2430 + }, + { + "epoch": 0.25643459915611816, + "grad_norm": 0.7508739233016968, + "learning_rate": 0.0012821466016478395, + "loss": 1.6018, + "step": 2431 + }, + { + "epoch": 0.25654008438818565, + "grad_norm": 0.7691518664360046, + "learning_rate": 0.0012819696560414323, + "loss": 1.6106, + "step": 2432 + }, + { + "epoch": 0.25664556962025314, + "grad_norm": 0.7013019919395447, + "learning_rate": 0.0012817926508251723, + "loss": 1.6398, + "step": 2433 + }, + { + "epoch": 0.2567510548523207, + "grad_norm": 0.7581450939178467, + "learning_rate": 0.0012816155860188938, + "loss": 1.5359, + "step": 2434 + }, + { + "epoch": 0.2568565400843882, + "grad_norm": 0.8822186589241028, + "learning_rate": 0.0012814384616424384, + "loss": 1.5592, + "step": 2435 + }, + { + "epoch": 0.2569620253164557, + "grad_norm": 0.785973072052002, + "learning_rate": 0.0012812612777156533, + "loss": 1.5647, + "step": 2436 + }, + { + "epoch": 0.2570675105485232, + "grad_norm": 0.6914061903953552, + "learning_rate": 0.001281084034258393, + "loss": 1.5971, + "step": 2437 + }, + { + "epoch": 0.2571729957805907, + "grad_norm": 0.8115978837013245, + "learning_rate": 0.0012809067312905182, + "loss": 1.5913, + "step": 2438 + }, + { + "epoch": 0.2572784810126582, + "grad_norm": 0.684647262096405, + "learning_rate": 0.0012807293688318969, + "loss": 1.5888, + "step": 2439 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 1.1828255653381348, + "learning_rate": 0.0012805519469024035, + "loss": 1.6148, + "step": 2440 + }, + { + "epoch": 0.25748945147679325, + "grad_norm": 0.7423099875450134, + "learning_rate": 0.0012803744655219187, + "loss": 1.5969, + "step": 2441 + }, + { + "epoch": 0.25759493670886074, + "grad_norm": 0.9442310333251953, + "learning_rate": 0.0012801969247103306, + "loss": 1.5769, + "step": 2442 + }, + { + "epoch": 0.2577004219409283, + "grad_norm": 1.00467050075531, + "learning_rate": 0.001280019324487533, + "loss": 1.5903, + "step": 2443 + }, + { + "epoch": 0.2578059071729958, + "grad_norm": 0.6647293567657471, + "learning_rate": 0.0012798416648734272, + "loss": 1.5819, + "step": 2444 + }, + { + "epoch": 0.2579113924050633, + "grad_norm": 0.783153772354126, + "learning_rate": 0.001279663945887921, + "loss": 1.6096, + "step": 2445 + }, + { + "epoch": 0.2580168776371308, + "grad_norm": 0.6189470291137695, + "learning_rate": 0.0012794861675509285, + "loss": 1.5513, + "step": 2446 + }, + { + "epoch": 0.2581223628691983, + "grad_norm": 0.785517692565918, + "learning_rate": 0.0012793083298823708, + "loss": 1.5751, + "step": 2447 + }, + { + "epoch": 0.2582278481012658, + "grad_norm": 0.6450029611587524, + "learning_rate": 0.0012791304329021751, + "loss": 1.5657, + "step": 2448 + }, + { + "epoch": 0.25833333333333336, + "grad_norm": 0.7449167370796204, + "learning_rate": 0.001278952476630276, + "loss": 1.5633, + "step": 2449 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6606023907661438, + "learning_rate": 0.0012787744610866143, + "loss": 1.5517, + "step": 2450 + }, + { + "epoch": 0.25854430379746834, + "grad_norm": 0.7129826545715332, + "learning_rate": 0.0012785963862911376, + "loss": 1.5795, + "step": 2451 + }, + { + "epoch": 0.2586497890295359, + "grad_norm": 0.7472405433654785, + "learning_rate": 0.0012784182522637998, + "loss": 1.6171, + "step": 2452 + }, + { + "epoch": 0.2587552742616034, + "grad_norm": 0.6643209457397461, + "learning_rate": 0.001278240059024562, + "loss": 1.573, + "step": 2453 + }, + { + "epoch": 0.2588607594936709, + "grad_norm": 0.6565025448799133, + "learning_rate": 0.0012780618065933915, + "loss": 1.6239, + "step": 2454 + }, + { + "epoch": 0.25896624472573837, + "grad_norm": 0.6686996817588806, + "learning_rate": 0.0012778834949902626, + "loss": 1.5621, + "step": 2455 + }, + { + "epoch": 0.2590717299578059, + "grad_norm": 0.6679922342300415, + "learning_rate": 0.0012777051242351557, + "loss": 1.5872, + "step": 2456 + }, + { + "epoch": 0.2591772151898734, + "grad_norm": 0.7095039486885071, + "learning_rate": 0.0012775266943480582, + "loss": 1.5833, + "step": 2457 + }, + { + "epoch": 0.2592827004219409, + "grad_norm": 0.6322080492973328, + "learning_rate": 0.0012773482053489642, + "loss": 1.5992, + "step": 2458 + }, + { + "epoch": 0.25938818565400845, + "grad_norm": 0.7035310864448547, + "learning_rate": 0.0012771696572578743, + "loss": 1.6021, + "step": 2459 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.6617456674575806, + "learning_rate": 0.0012769910500947954, + "loss": 1.5737, + "step": 2460 + }, + { + "epoch": 0.25959915611814344, + "grad_norm": 0.6385916471481323, + "learning_rate": 0.0012768123838797414, + "loss": 1.5358, + "step": 2461 + }, + { + "epoch": 0.259704641350211, + "grad_norm": 0.75906902551651, + "learning_rate": 0.0012766336586327333, + "loss": 1.5869, + "step": 2462 + }, + { + "epoch": 0.2598101265822785, + "grad_norm": 0.6518564224243164, + "learning_rate": 0.0012764548743737973, + "loss": 1.5836, + "step": 2463 + }, + { + "epoch": 0.25991561181434597, + "grad_norm": 0.7267652750015259, + "learning_rate": 0.001276276031122968, + "loss": 1.5547, + "step": 2464 + }, + { + "epoch": 0.2600210970464135, + "grad_norm": 0.7266334295272827, + "learning_rate": 0.0012760971289002847, + "loss": 1.611, + "step": 2465 + }, + { + "epoch": 0.260126582278481, + "grad_norm": 0.6750686168670654, + "learning_rate": 0.0012759181677257946, + "loss": 1.5593, + "step": 2466 + }, + { + "epoch": 0.2602320675105485, + "grad_norm": 0.764900267124176, + "learning_rate": 0.0012757391476195517, + "loss": 1.5771, + "step": 2467 + }, + { + "epoch": 0.26033755274261605, + "grad_norm": 0.6680631637573242, + "learning_rate": 0.0012755600686016155, + "loss": 1.5851, + "step": 2468 + }, + { + "epoch": 0.26044303797468354, + "grad_norm": 0.7960082292556763, + "learning_rate": 0.0012753809306920532, + "loss": 1.5955, + "step": 2469 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.830490231513977, + "learning_rate": 0.0012752017339109376, + "loss": 1.5957, + "step": 2470 + }, + { + "epoch": 0.2606540084388186, + "grad_norm": 0.701457679271698, + "learning_rate": 0.0012750224782783492, + "loss": 1.6133, + "step": 2471 + }, + { + "epoch": 0.2607594936708861, + "grad_norm": 0.9396066069602966, + "learning_rate": 0.0012748431638143739, + "loss": 1.5673, + "step": 2472 + }, + { + "epoch": 0.26086497890295357, + "grad_norm": 0.8538429141044617, + "learning_rate": 0.0012746637905391048, + "loss": 1.5772, + "step": 2473 + }, + { + "epoch": 0.2609704641350211, + "grad_norm": 0.6814212799072266, + "learning_rate": 0.001274484358472642, + "loss": 1.5819, + "step": 2474 + }, + { + "epoch": 0.2610759493670886, + "grad_norm": 0.895710825920105, + "learning_rate": 0.0012743048676350911, + "loss": 1.5949, + "step": 2475 + }, + { + "epoch": 0.2611814345991561, + "grad_norm": 0.6901353001594543, + "learning_rate": 0.001274125318046566, + "loss": 1.5573, + "step": 2476 + }, + { + "epoch": 0.26128691983122365, + "grad_norm": 1.0844773054122925, + "learning_rate": 0.0012739457097271849, + "loss": 1.6061, + "step": 2477 + }, + { + "epoch": 0.26139240506329114, + "grad_norm": 0.787852942943573, + "learning_rate": 0.0012737660426970748, + "loss": 1.5909, + "step": 2478 + }, + { + "epoch": 0.26149789029535864, + "grad_norm": 0.7478452324867249, + "learning_rate": 0.0012735863169763678, + "loss": 1.5862, + "step": 2479 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.838025689125061, + "learning_rate": 0.0012734065325852029, + "loss": 1.58, + "step": 2480 + }, + { + "epoch": 0.2617088607594937, + "grad_norm": 0.6724877953529358, + "learning_rate": 0.0012732266895437265, + "loss": 1.5652, + "step": 2481 + }, + { + "epoch": 0.26181434599156117, + "grad_norm": 0.7484658360481262, + "learning_rate": 0.00127304678787209, + "loss": 1.5515, + "step": 2482 + }, + { + "epoch": 0.2619198312236287, + "grad_norm": 0.7298699617385864, + "learning_rate": 0.001272866827590453, + "loss": 1.5852, + "step": 2483 + }, + { + "epoch": 0.2620253164556962, + "grad_norm": 0.6208005547523499, + "learning_rate": 0.001272686808718981, + "loss": 1.5458, + "step": 2484 + }, + { + "epoch": 0.2621308016877637, + "grad_norm": 0.7990036010742188, + "learning_rate": 0.0012725067312778454, + "loss": 1.5827, + "step": 2485 + }, + { + "epoch": 0.2622362869198312, + "grad_norm": 0.8125020265579224, + "learning_rate": 0.0012723265952872252, + "loss": 1.5705, + "step": 2486 + }, + { + "epoch": 0.26234177215189874, + "grad_norm": 0.6924694776535034, + "learning_rate": 0.0012721464007673055, + "loss": 1.5713, + "step": 2487 + }, + { + "epoch": 0.26244725738396624, + "grad_norm": 0.8673000931739807, + "learning_rate": 0.0012719661477382778, + "loss": 1.6122, + "step": 2488 + }, + { + "epoch": 0.26255274261603373, + "grad_norm": 0.8599487543106079, + "learning_rate": 0.0012717858362203407, + "loss": 1.5382, + "step": 2489 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.636349081993103, + "learning_rate": 0.0012716054662336987, + "loss": 1.5878, + "step": 2490 + }, + { + "epoch": 0.26276371308016877, + "grad_norm": 0.9041169285774231, + "learning_rate": 0.001271425037798563, + "loss": 1.5629, + "step": 2491 + }, + { + "epoch": 0.26286919831223626, + "grad_norm": 0.746301531791687, + "learning_rate": 0.0012712445509351518, + "loss": 1.5716, + "step": 2492 + }, + { + "epoch": 0.2629746835443038, + "grad_norm": 0.78575199842453, + "learning_rate": 0.00127106400566369, + "loss": 1.6224, + "step": 2493 + }, + { + "epoch": 0.2630801687763713, + "grad_norm": 1.0046812295913696, + "learning_rate": 0.0012708834020044076, + "loss": 1.5543, + "step": 2494 + }, + { + "epoch": 0.2631856540084388, + "grad_norm": 0.6690831780433655, + "learning_rate": 0.0012707027399775429, + "loss": 1.5721, + "step": 2495 + }, + { + "epoch": 0.26329113924050634, + "grad_norm": 0.906298816204071, + "learning_rate": 0.0012705220196033396, + "loss": 1.5687, + "step": 2496 + }, + { + "epoch": 0.26339662447257384, + "grad_norm": 0.7363414168357849, + "learning_rate": 0.0012703412409020484, + "loss": 1.6091, + "step": 2497 + }, + { + "epoch": 0.26350210970464133, + "grad_norm": 0.9228997230529785, + "learning_rate": 0.0012701604038939268, + "loss": 1.5678, + "step": 2498 + }, + { + "epoch": 0.2636075949367089, + "grad_norm": 0.8280107378959656, + "learning_rate": 0.0012699795085992379, + "loss": 1.5737, + "step": 2499 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.9170246124267578, + "learning_rate": 0.001269798555038252, + "loss": 1.5638, + "step": 2500 + }, + { + "epoch": 0.26381856540084386, + "grad_norm": 0.8943712711334229, + "learning_rate": 0.0012696175432312465, + "loss": 1.5545, + "step": 2501 + }, + { + "epoch": 0.2639240506329114, + "grad_norm": 0.6755743026733398, + "learning_rate": 0.0012694364731985041, + "loss": 1.5957, + "step": 2502 + }, + { + "epoch": 0.2640295358649789, + "grad_norm": 0.7771424651145935, + "learning_rate": 0.0012692553449603148, + "loss": 1.5766, + "step": 2503 + }, + { + "epoch": 0.2641350210970464, + "grad_norm": 0.7453479170799255, + "learning_rate": 0.0012690741585369748, + "loss": 1.5526, + "step": 2504 + }, + { + "epoch": 0.26424050632911394, + "grad_norm": 0.7056819200515747, + "learning_rate": 0.0012688929139487869, + "loss": 1.6087, + "step": 2505 + }, + { + "epoch": 0.26434599156118144, + "grad_norm": 0.7389658689498901, + "learning_rate": 0.0012687116112160607, + "loss": 1.5571, + "step": 2506 + }, + { + "epoch": 0.26445147679324893, + "grad_norm": 0.7229214906692505, + "learning_rate": 0.0012685302503591118, + "loss": 1.5862, + "step": 2507 + }, + { + "epoch": 0.2645569620253165, + "grad_norm": 0.6422256231307983, + "learning_rate": 0.0012683488313982628, + "loss": 1.5692, + "step": 2508 + }, + { + "epoch": 0.26466244725738397, + "grad_norm": 0.7579633593559265, + "learning_rate": 0.0012681673543538427, + "loss": 1.5588, + "step": 2509 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.6588929891586304, + "learning_rate": 0.0012679858192461864, + "loss": 1.5949, + "step": 2510 + }, + { + "epoch": 0.264873417721519, + "grad_norm": 0.6882631182670593, + "learning_rate": 0.0012678042260956363, + "loss": 1.6072, + "step": 2511 + }, + { + "epoch": 0.2649789029535865, + "grad_norm": 0.6468110680580139, + "learning_rate": 0.0012676225749225407, + "loss": 1.5808, + "step": 2512 + }, + { + "epoch": 0.265084388185654, + "grad_norm": 0.7737018465995789, + "learning_rate": 0.0012674408657472542, + "loss": 1.5438, + "step": 2513 + }, + { + "epoch": 0.26518987341772154, + "grad_norm": 0.6856210231781006, + "learning_rate": 0.0012672590985901386, + "loss": 1.5693, + "step": 2514 + }, + { + "epoch": 0.26529535864978904, + "grad_norm": 0.7291756868362427, + "learning_rate": 0.001267077273471562, + "loss": 1.567, + "step": 2515 + }, + { + "epoch": 0.26540084388185653, + "grad_norm": 0.6246429681777954, + "learning_rate": 0.0012668953904118984, + "loss": 1.5742, + "step": 2516 + }, + { + "epoch": 0.2655063291139241, + "grad_norm": 0.8616237640380859, + "learning_rate": 0.001266713449431529, + "loss": 1.5897, + "step": 2517 + }, + { + "epoch": 0.26561181434599157, + "grad_norm": 0.7316917181015015, + "learning_rate": 0.0012665314505508406, + "loss": 1.5967, + "step": 2518 + }, + { + "epoch": 0.26571729957805906, + "grad_norm": 0.8766317963600159, + "learning_rate": 0.0012663493937902278, + "loss": 1.5952, + "step": 2519 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.8979650735855103, + "learning_rate": 0.0012661672791700906, + "loss": 1.5826, + "step": 2520 + }, + { + "epoch": 0.2659282700421941, + "grad_norm": 0.7276127338409424, + "learning_rate": 0.001265985106710836, + "loss": 1.5759, + "step": 2521 + }, + { + "epoch": 0.2660337552742616, + "grad_norm": 0.9184083938598633, + "learning_rate": 0.0012658028764328771, + "loss": 1.5764, + "step": 2522 + }, + { + "epoch": 0.2661392405063291, + "grad_norm": 0.6894850134849548, + "learning_rate": 0.0012656205883566339, + "loss": 1.588, + "step": 2523 + }, + { + "epoch": 0.26624472573839664, + "grad_norm": 0.735515832901001, + "learning_rate": 0.0012654382425025328, + "loss": 1.5302, + "step": 2524 + }, + { + "epoch": 0.26635021097046413, + "grad_norm": 0.6475989818572998, + "learning_rate": 0.0012652558388910062, + "loss": 1.5394, + "step": 2525 + }, + { + "epoch": 0.2664556962025316, + "grad_norm": 0.6883207559585571, + "learning_rate": 0.0012650733775424938, + "loss": 1.5591, + "step": 2526 + }, + { + "epoch": 0.26656118143459917, + "grad_norm": 0.6676151156425476, + "learning_rate": 0.001264890858477441, + "loss": 1.5642, + "step": 2527 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.7325810790061951, + "learning_rate": 0.0012647082817162998, + "loss": 1.5795, + "step": 2528 + }, + { + "epoch": 0.26677215189873416, + "grad_norm": 0.6868779063224792, + "learning_rate": 0.0012645256472795295, + "loss": 1.6522, + "step": 2529 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.7874464392662048, + "learning_rate": 0.0012643429551875945, + "loss": 1.5627, + "step": 2530 + }, + { + "epoch": 0.2669831223628692, + "grad_norm": 0.668770432472229, + "learning_rate": 0.0012641602054609662, + "loss": 1.5776, + "step": 2531 + }, + { + "epoch": 0.2670886075949367, + "grad_norm": 1.1380810737609863, + "learning_rate": 0.0012639773981201238, + "loss": 1.577, + "step": 2532 + }, + { + "epoch": 0.26719409282700424, + "grad_norm": 0.9405377507209778, + "learning_rate": 0.0012637945331855506, + "loss": 1.5111, + "step": 2533 + }, + { + "epoch": 0.26729957805907173, + "grad_norm": 0.7849228382110596, + "learning_rate": 0.0012636116106777382, + "loss": 1.5575, + "step": 2534 + }, + { + "epoch": 0.2674050632911392, + "grad_norm": 1.1612542867660522, + "learning_rate": 0.0012634286306171835, + "loss": 1.6194, + "step": 2535 + }, + { + "epoch": 0.26751054852320677, + "grad_norm": 0.6926266551017761, + "learning_rate": 0.0012632455930243907, + "loss": 1.5802, + "step": 2536 + }, + { + "epoch": 0.26761603375527426, + "grad_norm": 1.059706449508667, + "learning_rate": 0.0012630624979198697, + "loss": 1.5799, + "step": 2537 + }, + { + "epoch": 0.26772151898734176, + "grad_norm": 0.935330331325531, + "learning_rate": 0.0012628793453241377, + "loss": 1.6166, + "step": 2538 + }, + { + "epoch": 0.2678270042194093, + "grad_norm": 0.7302536368370056, + "learning_rate": 0.0012626961352577174, + "loss": 1.5869, + "step": 2539 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 1.2239211797714233, + "learning_rate": 0.0012625128677411388, + "loss": 1.5708, + "step": 2540 + }, + { + "epoch": 0.2680379746835443, + "grad_norm": 0.6356970071792603, + "learning_rate": 0.0012623295427949377, + "loss": 1.5632, + "step": 2541 + }, + { + "epoch": 0.26814345991561184, + "grad_norm": 1.2514971494674683, + "learning_rate": 0.0012621461604396566, + "loss": 1.602, + "step": 2542 + }, + { + "epoch": 0.26824894514767933, + "grad_norm": 0.6907729506492615, + "learning_rate": 0.0012619627206958445, + "loss": 1.5666, + "step": 2543 + }, + { + "epoch": 0.2683544303797468, + "grad_norm": 1.0649675130844116, + "learning_rate": 0.0012617792235840564, + "loss": 1.5432, + "step": 2544 + }, + { + "epoch": 0.26845991561181437, + "grad_norm": 0.7117766737937927, + "learning_rate": 0.0012615956691248544, + "loss": 1.5862, + "step": 2545 + }, + { + "epoch": 0.26856540084388186, + "grad_norm": 0.9948951601982117, + "learning_rate": 0.001261412057338807, + "loss": 1.5576, + "step": 2546 + }, + { + "epoch": 0.26867088607594936, + "grad_norm": 1.0241447687149048, + "learning_rate": 0.0012612283882464882, + "loss": 1.5526, + "step": 2547 + }, + { + "epoch": 0.2687763713080169, + "grad_norm": 0.7281988859176636, + "learning_rate": 0.0012610446618684793, + "loss": 1.5727, + "step": 2548 + }, + { + "epoch": 0.2688818565400844, + "grad_norm": 1.248826265335083, + "learning_rate": 0.0012608608782253676, + "loss": 1.574, + "step": 2549 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.655188798904419, + "learning_rate": 0.0012606770373377475, + "loss": 1.5651, + "step": 2550 + }, + { + "epoch": 0.26909282700421944, + "grad_norm": 1.0503344535827637, + "learning_rate": 0.0012604931392262186, + "loss": 1.5962, + "step": 2551 + }, + { + "epoch": 0.26919831223628693, + "grad_norm": 0.7266337275505066, + "learning_rate": 0.001260309183911388, + "loss": 1.5839, + "step": 2552 + }, + { + "epoch": 0.2693037974683544, + "grad_norm": 0.9811515212059021, + "learning_rate": 0.0012601251714138683, + "loss": 1.6004, + "step": 2553 + }, + { + "epoch": 0.2694092827004219, + "grad_norm": 1.136507272720337, + "learning_rate": 0.0012599411017542798, + "loss": 1.5625, + "step": 2554 + }, + { + "epoch": 0.26951476793248946, + "grad_norm": 0.6849504113197327, + "learning_rate": 0.0012597569749532482, + "loss": 1.5719, + "step": 2555 + }, + { + "epoch": 0.26962025316455696, + "grad_norm": 0.9800060987472534, + "learning_rate": 0.0012595727910314056, + "loss": 1.5808, + "step": 2556 + }, + { + "epoch": 0.26972573839662445, + "grad_norm": 0.6783215999603271, + "learning_rate": 0.0012593885500093906, + "loss": 1.5597, + "step": 2557 + }, + { + "epoch": 0.269831223628692, + "grad_norm": 1.2724545001983643, + "learning_rate": 0.0012592042519078486, + "loss": 1.5819, + "step": 2558 + }, + { + "epoch": 0.2699367088607595, + "grad_norm": 0.6746913194656372, + "learning_rate": 0.0012590198967474312, + "loss": 1.5672, + "step": 2559 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 1.103001594543457, + "learning_rate": 0.0012588354845487959, + "loss": 1.5408, + "step": 2560 + }, + { + "epoch": 0.27014767932489453, + "grad_norm": 0.688103973865509, + "learning_rate": 0.0012586510153326075, + "loss": 1.5578, + "step": 2561 + }, + { + "epoch": 0.270253164556962, + "grad_norm": 0.745525062084198, + "learning_rate": 0.0012584664891195365, + "loss": 1.5828, + "step": 2562 + }, + { + "epoch": 0.2703586497890295, + "grad_norm": 0.670349657535553, + "learning_rate": 0.0012582819059302598, + "loss": 1.6052, + "step": 2563 + }, + { + "epoch": 0.27046413502109706, + "grad_norm": 0.7514866590499878, + "learning_rate": 0.001258097265785461, + "loss": 1.5648, + "step": 2564 + }, + { + "epoch": 0.27056962025316456, + "grad_norm": 0.7635443210601807, + "learning_rate": 0.0012579125687058302, + "loss": 1.5425, + "step": 2565 + }, + { + "epoch": 0.27067510548523205, + "grad_norm": 0.749721109867096, + "learning_rate": 0.0012577278147120632, + "loss": 1.5437, + "step": 2566 + }, + { + "epoch": 0.2707805907172996, + "grad_norm": 1.0705678462982178, + "learning_rate": 0.0012575430038248628, + "loss": 1.5926, + "step": 2567 + }, + { + "epoch": 0.2708860759493671, + "grad_norm": 0.7713499069213867, + "learning_rate": 0.001257358136064938, + "loss": 1.5688, + "step": 2568 + }, + { + "epoch": 0.2709915611814346, + "grad_norm": 0.959242582321167, + "learning_rate": 0.001257173211453004, + "loss": 1.5259, + "step": 2569 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 1.0549201965332031, + "learning_rate": 0.001256988230009783, + "loss": 1.592, + "step": 2570 + }, + { + "epoch": 0.2712025316455696, + "grad_norm": 0.7159736156463623, + "learning_rate": 0.0012568031917560027, + "loss": 1.5394, + "step": 2571 + }, + { + "epoch": 0.2713080168776371, + "grad_norm": 1.100268840789795, + "learning_rate": 0.0012566180967123976, + "loss": 1.5724, + "step": 2572 + }, + { + "epoch": 0.27141350210970466, + "grad_norm": 0.7555873394012451, + "learning_rate": 0.0012564329448997082, + "loss": 1.5651, + "step": 2573 + }, + { + "epoch": 0.27151898734177216, + "grad_norm": 0.8638109564781189, + "learning_rate": 0.0012562477363386821, + "loss": 1.564, + "step": 2574 + }, + { + "epoch": 0.27162447257383965, + "grad_norm": 0.7734572887420654, + "learning_rate": 0.0012560624710500731, + "loss": 1.5599, + "step": 2575 + }, + { + "epoch": 0.2717299578059072, + "grad_norm": 1.0647709369659424, + "learning_rate": 0.0012558771490546407, + "loss": 1.5443, + "step": 2576 + }, + { + "epoch": 0.2718354430379747, + "grad_norm": 1.0617550611495972, + "learning_rate": 0.0012556917703731509, + "loss": 1.5795, + "step": 2577 + }, + { + "epoch": 0.2719409282700422, + "grad_norm": 0.7222639322280884, + "learning_rate": 0.0012555063350263768, + "loss": 1.6215, + "step": 2578 + }, + { + "epoch": 0.27204641350210973, + "grad_norm": 0.8942621946334839, + "learning_rate": 0.0012553208430350973, + "loss": 1.5761, + "step": 2579 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.808737576007843, + "learning_rate": 0.0012551352944200976, + "loss": 1.5719, + "step": 2580 + }, + { + "epoch": 0.2722573839662447, + "grad_norm": 0.6676040291786194, + "learning_rate": 0.0012549496892021693, + "loss": 1.5466, + "step": 2581 + }, + { + "epoch": 0.27236286919831226, + "grad_norm": 0.7493626475334167, + "learning_rate": 0.0012547640274021103, + "loss": 1.581, + "step": 2582 + }, + { + "epoch": 0.27246835443037976, + "grad_norm": 0.6190323829650879, + "learning_rate": 0.001254578309040725, + "loss": 1.5696, + "step": 2583 + }, + { + "epoch": 0.27257383966244725, + "grad_norm": 0.7634061574935913, + "learning_rate": 0.001254392534138824, + "loss": 1.5742, + "step": 2584 + }, + { + "epoch": 0.27267932489451474, + "grad_norm": 0.698890745639801, + "learning_rate": 0.0012542067027172248, + "loss": 1.5977, + "step": 2585 + }, + { + "epoch": 0.2727848101265823, + "grad_norm": 1.1273051500320435, + "learning_rate": 0.0012540208147967503, + "loss": 1.5622, + "step": 2586 + }, + { + "epoch": 0.2728902953586498, + "grad_norm": 0.9891734719276428, + "learning_rate": 0.00125383487039823, + "loss": 1.6041, + "step": 2587 + }, + { + "epoch": 0.2729957805907173, + "grad_norm": 0.6534878611564636, + "learning_rate": 0.0012536488695425003, + "loss": 1.5757, + "step": 2588 + }, + { + "epoch": 0.2731012658227848, + "grad_norm": 0.8328507542610168, + "learning_rate": 0.0012534628122504031, + "loss": 1.572, + "step": 2589 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6907442808151245, + "learning_rate": 0.0012532766985427874, + "loss": 1.5789, + "step": 2590 + }, + { + "epoch": 0.2733122362869198, + "grad_norm": 0.727445662021637, + "learning_rate": 0.0012530905284405083, + "loss": 1.5728, + "step": 2591 + }, + { + "epoch": 0.27341772151898736, + "grad_norm": 0.7052291035652161, + "learning_rate": 0.0012529043019644266, + "loss": 1.5828, + "step": 2592 + }, + { + "epoch": 0.27352320675105485, + "grad_norm": 0.7314464449882507, + "learning_rate": 0.0012527180191354104, + "loss": 1.5765, + "step": 2593 + }, + { + "epoch": 0.27362869198312234, + "grad_norm": 0.7437423467636108, + "learning_rate": 0.0012525316799743332, + "loss": 1.5794, + "step": 2594 + }, + { + "epoch": 0.2737341772151899, + "grad_norm": 0.9850825667381287, + "learning_rate": 0.0012523452845020755, + "loss": 1.6091, + "step": 2595 + }, + { + "epoch": 0.2738396624472574, + "grad_norm": 0.7026557922363281, + "learning_rate": 0.0012521588327395236, + "loss": 1.571, + "step": 2596 + }, + { + "epoch": 0.2739451476793249, + "grad_norm": 0.8555828928947449, + "learning_rate": 0.0012519723247075706, + "loss": 1.5582, + "step": 2597 + }, + { + "epoch": 0.2740506329113924, + "grad_norm": 0.7746468186378479, + "learning_rate": 0.0012517857604271156, + "loss": 1.5336, + "step": 2598 + }, + { + "epoch": 0.2741561181434599, + "grad_norm": 0.6827036738395691, + "learning_rate": 0.001251599139919064, + "loss": 1.556, + "step": 2599 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.9027358293533325, + "learning_rate": 0.0012514124632043272, + "loss": 1.5969, + "step": 2600 + }, + { + "epoch": 0.27436708860759496, + "grad_norm": 0.9315114617347717, + "learning_rate": 0.001251225730303824, + "loss": 1.5542, + "step": 2601 + }, + { + "epoch": 0.27447257383966245, + "grad_norm": 0.7167088985443115, + "learning_rate": 0.0012510389412384785, + "loss": 1.5663, + "step": 2602 + }, + { + "epoch": 0.27457805907172994, + "grad_norm": 0.6399233341217041, + "learning_rate": 0.001250852096029221, + "loss": 1.5078, + "step": 2603 + }, + { + "epoch": 0.2746835443037975, + "grad_norm": 0.6913734078407288, + "learning_rate": 0.0012506651946969888, + "loss": 1.579, + "step": 2604 + }, + { + "epoch": 0.274789029535865, + "grad_norm": 0.8000332713127136, + "learning_rate": 0.0012504782372627248, + "loss": 1.6048, + "step": 2605 + }, + { + "epoch": 0.2748945147679325, + "grad_norm": 0.6380992531776428, + "learning_rate": 0.0012502912237473789, + "loss": 1.5872, + "step": 2606 + }, + { + "epoch": 0.275, + "grad_norm": 0.7552348375320435, + "learning_rate": 0.0012501041541719067, + "loss": 1.5902, + "step": 2607 + }, + { + "epoch": 0.2751054852320675, + "grad_norm": 0.6952722072601318, + "learning_rate": 0.0012499170285572702, + "loss": 1.5637, + "step": 2608 + }, + { + "epoch": 0.275210970464135, + "grad_norm": 0.8204019665718079, + "learning_rate": 0.0012497298469244377, + "loss": 1.5936, + "step": 2609 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.6706911325454712, + "learning_rate": 0.0012495426092943842, + "loss": 1.5753, + "step": 2610 + }, + { + "epoch": 0.27542194092827005, + "grad_norm": 0.8297910690307617, + "learning_rate": 0.0012493553156880904, + "loss": 1.6, + "step": 2611 + }, + { + "epoch": 0.27552742616033754, + "grad_norm": 0.879906177520752, + "learning_rate": 0.0012491679661265434, + "loss": 1.5801, + "step": 2612 + }, + { + "epoch": 0.2756329113924051, + "grad_norm": 0.791293740272522, + "learning_rate": 0.0012489805606307367, + "loss": 1.6061, + "step": 2613 + }, + { + "epoch": 0.2757383966244726, + "grad_norm": 0.9210045337677002, + "learning_rate": 0.00124879309922167, + "loss": 1.5686, + "step": 2614 + }, + { + "epoch": 0.2758438818565401, + "grad_norm": 0.7218326330184937, + "learning_rate": 0.0012486055819203494, + "loss": 1.5798, + "step": 2615 + }, + { + "epoch": 0.2759493670886076, + "grad_norm": 0.8447563648223877, + "learning_rate": 0.001248418008747787, + "loss": 1.5534, + "step": 2616 + }, + { + "epoch": 0.2760548523206751, + "grad_norm": 0.6671553254127502, + "learning_rate": 0.0012482303797250014, + "loss": 1.5554, + "step": 2617 + }, + { + "epoch": 0.2761603375527426, + "grad_norm": 1.0086140632629395, + "learning_rate": 0.0012480426948730174, + "loss": 1.545, + "step": 2618 + }, + { + "epoch": 0.2762658227848101, + "grad_norm": 0.8233963251113892, + "learning_rate": 0.001247854954212866, + "loss": 1.577, + "step": 2619 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.7361007332801819, + "learning_rate": 0.0012476671577655845, + "loss": 1.5496, + "step": 2620 + }, + { + "epoch": 0.27647679324894514, + "grad_norm": 0.8004971742630005, + "learning_rate": 0.001247479305552216, + "loss": 1.5447, + "step": 2621 + }, + { + "epoch": 0.27658227848101263, + "grad_norm": 0.8524491190910339, + "learning_rate": 0.001247291397593811, + "loss": 1.5372, + "step": 2622 + }, + { + "epoch": 0.2766877637130802, + "grad_norm": 0.663224995136261, + "learning_rate": 0.001247103433911425, + "loss": 1.5389, + "step": 2623 + }, + { + "epoch": 0.2767932489451477, + "grad_norm": 0.6938676238059998, + "learning_rate": 0.0012469154145261208, + "loss": 1.5908, + "step": 2624 + }, + { + "epoch": 0.27689873417721517, + "grad_norm": 0.7456339001655579, + "learning_rate": 0.0012467273394589664, + "loss": 1.5486, + "step": 2625 + }, + { + "epoch": 0.2770042194092827, + "grad_norm": 0.6411186456680298, + "learning_rate": 0.0012465392087310366, + "loss": 1.5513, + "step": 2626 + }, + { + "epoch": 0.2771097046413502, + "grad_norm": 0.7330667972564697, + "learning_rate": 0.0012463510223634125, + "loss": 1.5457, + "step": 2627 + }, + { + "epoch": 0.2772151898734177, + "grad_norm": 0.6554349064826965, + "learning_rate": 0.0012461627803771812, + "loss": 1.5762, + "step": 2628 + }, + { + "epoch": 0.27732067510548525, + "grad_norm": 0.7174804210662842, + "learning_rate": 0.0012459744827934367, + "loss": 1.5604, + "step": 2629 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.6786907911300659, + "learning_rate": 0.0012457861296332774, + "loss": 1.6024, + "step": 2630 + }, + { + "epoch": 0.27753164556962023, + "grad_norm": 0.7631864547729492, + "learning_rate": 0.0012455977209178109, + "loss": 1.5701, + "step": 2631 + }, + { + "epoch": 0.2776371308016878, + "grad_norm": 0.7481979131698608, + "learning_rate": 0.0012454092566681482, + "loss": 1.5802, + "step": 2632 + }, + { + "epoch": 0.2777426160337553, + "grad_norm": 0.7306005954742432, + "learning_rate": 0.001245220736905408, + "loss": 1.5396, + "step": 2633 + }, + { + "epoch": 0.27784810126582277, + "grad_norm": 1.003538966178894, + "learning_rate": 0.0012450321616507148, + "loss": 1.5873, + "step": 2634 + }, + { + "epoch": 0.2779535864978903, + "grad_norm": 0.7786024808883667, + "learning_rate": 0.0012448435309251995, + "loss": 1.6053, + "step": 2635 + }, + { + "epoch": 0.2780590717299578, + "grad_norm": 0.8942409753799438, + "learning_rate": 0.001244654844749999, + "loss": 1.5624, + "step": 2636 + }, + { + "epoch": 0.2781645569620253, + "grad_norm": 0.9214422106742859, + "learning_rate": 0.0012444661031462566, + "loss": 1.5933, + "step": 2637 + }, + { + "epoch": 0.27827004219409285, + "grad_norm": 0.7166975140571594, + "learning_rate": 0.0012442773061351216, + "loss": 1.5527, + "step": 2638 + }, + { + "epoch": 0.27837552742616034, + "grad_norm": 0.8898508548736572, + "learning_rate": 0.0012440884537377498, + "loss": 1.5632, + "step": 2639 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.7046276330947876, + "learning_rate": 0.001243899545975303, + "loss": 1.5533, + "step": 2640 + }, + { + "epoch": 0.2785864978902954, + "grad_norm": 0.8511481881141663, + "learning_rate": 0.0012437105828689494, + "loss": 1.578, + "step": 2641 + }, + { + "epoch": 0.2786919831223629, + "grad_norm": 0.957596480846405, + "learning_rate": 0.0012435215644398632, + "loss": 1.5473, + "step": 2642 + }, + { + "epoch": 0.27879746835443037, + "grad_norm": 0.7404864430427551, + "learning_rate": 0.0012433324907092243, + "loss": 1.5538, + "step": 2643 + }, + { + "epoch": 0.2789029535864979, + "grad_norm": 1.0649288892745972, + "learning_rate": 0.0012431433616982204, + "loss": 1.5304, + "step": 2644 + }, + { + "epoch": 0.2790084388185654, + "grad_norm": 0.7162666320800781, + "learning_rate": 0.0012429541774280435, + "loss": 1.5985, + "step": 2645 + }, + { + "epoch": 0.2791139240506329, + "grad_norm": 1.118582844734192, + "learning_rate": 0.0012427649379198932, + "loss": 1.5692, + "step": 2646 + }, + { + "epoch": 0.27921940928270045, + "grad_norm": 0.8393739461898804, + "learning_rate": 0.0012425756431949742, + "loss": 1.5721, + "step": 2647 + }, + { + "epoch": 0.27932489451476794, + "grad_norm": 0.8587276339530945, + "learning_rate": 0.001242386293274498, + "loss": 1.5687, + "step": 2648 + }, + { + "epoch": 0.27943037974683543, + "grad_norm": 1.0668492317199707, + "learning_rate": 0.0012421968881796827, + "loss": 1.5624, + "step": 2649 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.8085271716117859, + "learning_rate": 0.0012420074279317515, + "loss": 1.5723, + "step": 2650 + }, + { + "epoch": 0.2796413502109705, + "grad_norm": 1.0576512813568115, + "learning_rate": 0.001241817912551935, + "loss": 1.5518, + "step": 2651 + }, + { + "epoch": 0.27974683544303797, + "grad_norm": 0.715470552444458, + "learning_rate": 0.0012416283420614686, + "loss": 1.5781, + "step": 2652 + }, + { + "epoch": 0.27985232067510546, + "grad_norm": 0.7801958918571472, + "learning_rate": 0.0012414387164815953, + "loss": 1.5831, + "step": 2653 + }, + { + "epoch": 0.279957805907173, + "grad_norm": 0.7420521974563599, + "learning_rate": 0.001241249035833563, + "loss": 1.5785, + "step": 2654 + }, + { + "epoch": 0.2800632911392405, + "grad_norm": 0.760363757610321, + "learning_rate": 0.0012410593001386267, + "loss": 1.5547, + "step": 2655 + }, + { + "epoch": 0.280168776371308, + "grad_norm": 0.7763776779174805, + "learning_rate": 0.0012408695094180474, + "loss": 1.5477, + "step": 2656 + }, + { + "epoch": 0.28027426160337554, + "grad_norm": 0.6976529955863953, + "learning_rate": 0.0012406796636930918, + "loss": 1.5374, + "step": 2657 + }, + { + "epoch": 0.28037974683544303, + "grad_norm": 0.6970902681350708, + "learning_rate": 0.001240489762985033, + "loss": 1.5065, + "step": 2658 + }, + { + "epoch": 0.2804852320675105, + "grad_norm": 0.7243312001228333, + "learning_rate": 0.0012402998073151505, + "loss": 1.569, + "step": 2659 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.6166459918022156, + "learning_rate": 0.0012401097967047298, + "loss": 1.5644, + "step": 2660 + }, + { + "epoch": 0.28069620253164557, + "grad_norm": 0.7252774834632874, + "learning_rate": 0.0012399197311750623, + "loss": 1.5881, + "step": 2661 + }, + { + "epoch": 0.28080168776371306, + "grad_norm": 0.7840885519981384, + "learning_rate": 0.001239729610747446, + "loss": 1.5724, + "step": 2662 + }, + { + "epoch": 0.2809071729957806, + "grad_norm": 0.846058189868927, + "learning_rate": 0.001239539435443185, + "loss": 1.5569, + "step": 2663 + }, + { + "epoch": 0.2810126582278481, + "grad_norm": 0.6437597870826721, + "learning_rate": 0.001239349205283589, + "loss": 1.5496, + "step": 2664 + }, + { + "epoch": 0.2811181434599156, + "grad_norm": 0.7237761616706848, + "learning_rate": 0.0012391589202899746, + "loss": 1.5638, + "step": 2665 + }, + { + "epoch": 0.28122362869198314, + "grad_norm": 0.7092918157577515, + "learning_rate": 0.001238968580483664, + "loss": 1.5511, + "step": 2666 + }, + { + "epoch": 0.28132911392405063, + "grad_norm": 0.7795864939689636, + "learning_rate": 0.0012387781858859857, + "loss": 1.5575, + "step": 2667 + }, + { + "epoch": 0.2814345991561181, + "grad_norm": 0.6848478317260742, + "learning_rate": 0.0012385877365182743, + "loss": 1.5466, + "step": 2668 + }, + { + "epoch": 0.2815400843881857, + "grad_norm": 0.7708088159561157, + "learning_rate": 0.0012383972324018708, + "loss": 1.5675, + "step": 2669 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.698846161365509, + "learning_rate": 0.001238206673558122, + "loss": 1.5561, + "step": 2670 + }, + { + "epoch": 0.28175105485232066, + "grad_norm": 0.9865152835845947, + "learning_rate": 0.001238016060008381, + "loss": 1.585, + "step": 2671 + }, + { + "epoch": 0.2818565400843882, + "grad_norm": 0.9701254367828369, + "learning_rate": 0.0012378253917740072, + "loss": 1.5734, + "step": 2672 + }, + { + "epoch": 0.2819620253164557, + "grad_norm": 0.6891840100288391, + "learning_rate": 0.0012376346688763656, + "loss": 1.6006, + "step": 2673 + }, + { + "epoch": 0.2820675105485232, + "grad_norm": 0.8368023633956909, + "learning_rate": 0.0012374438913368277, + "loss": 1.5318, + "step": 2674 + }, + { + "epoch": 0.28217299578059074, + "grad_norm": 1.0202275514602661, + "learning_rate": 0.0012372530591767711, + "loss": 1.5559, + "step": 2675 + }, + { + "epoch": 0.28227848101265823, + "grad_norm": 0.6590669751167297, + "learning_rate": 0.0012370621724175797, + "loss": 1.5644, + "step": 2676 + }, + { + "epoch": 0.2823839662447257, + "grad_norm": 0.6364756226539612, + "learning_rate": 0.0012368712310806432, + "loss": 1.5269, + "step": 2677 + }, + { + "epoch": 0.2824894514767933, + "grad_norm": 0.7392777800559998, + "learning_rate": 0.0012366802351873574, + "loss": 1.5286, + "step": 2678 + }, + { + "epoch": 0.28259493670886077, + "grad_norm": 0.8968419432640076, + "learning_rate": 0.0012364891847591246, + "loss": 1.5665, + "step": 2679 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.6944916844367981, + "learning_rate": 0.0012362980798173526, + "loss": 1.5384, + "step": 2680 + }, + { + "epoch": 0.2828059071729958, + "grad_norm": 0.7169219255447388, + "learning_rate": 0.0012361069203834561, + "loss": 1.5353, + "step": 2681 + }, + { + "epoch": 0.2829113924050633, + "grad_norm": 0.841817319393158, + "learning_rate": 0.0012359157064788548, + "loss": 1.5221, + "step": 2682 + }, + { + "epoch": 0.2830168776371308, + "grad_norm": 0.8008159399032593, + "learning_rate": 0.0012357244381249759, + "loss": 1.5765, + "step": 2683 + }, + { + "epoch": 0.2831223628691983, + "grad_norm": 0.602039098739624, + "learning_rate": 0.0012355331153432517, + "loss": 1.5755, + "step": 2684 + }, + { + "epoch": 0.28322784810126583, + "grad_norm": 0.8129308223724365, + "learning_rate": 0.0012353417381551206, + "loss": 1.535, + "step": 2685 + }, + { + "epoch": 0.2833333333333333, + "grad_norm": 0.6172975897789001, + "learning_rate": 0.001235150306582028, + "loss": 1.5507, + "step": 2686 + }, + { + "epoch": 0.2834388185654008, + "grad_norm": 0.883161187171936, + "learning_rate": 0.001234958820645424, + "loss": 1.5716, + "step": 2687 + }, + { + "epoch": 0.28354430379746837, + "grad_norm": 0.7506101727485657, + "learning_rate": 0.0012347672803667662, + "loss": 1.5566, + "step": 2688 + }, + { + "epoch": 0.28364978902953586, + "grad_norm": 0.681428074836731, + "learning_rate": 0.0012345756857675171, + "loss": 1.5681, + "step": 2689 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.7770109176635742, + "learning_rate": 0.0012343840368691462, + "loss": 1.6059, + "step": 2690 + }, + { + "epoch": 0.2838607594936709, + "grad_norm": 0.599473774433136, + "learning_rate": 0.0012341923336931287, + "loss": 1.5718, + "step": 2691 + }, + { + "epoch": 0.2839662447257384, + "grad_norm": 0.6256713271141052, + "learning_rate": 0.0012340005762609457, + "loss": 1.5454, + "step": 2692 + }, + { + "epoch": 0.2840717299578059, + "grad_norm": 0.8309346437454224, + "learning_rate": 0.0012338087645940847, + "loss": 1.5846, + "step": 2693 + }, + { + "epoch": 0.28417721518987343, + "grad_norm": 0.8293386697769165, + "learning_rate": 0.001233616898714039, + "loss": 1.5297, + "step": 2694 + }, + { + "epoch": 0.2842827004219409, + "grad_norm": 0.6766045093536377, + "learning_rate": 0.0012334249786423086, + "loss": 1.5676, + "step": 2695 + }, + { + "epoch": 0.2843881856540084, + "grad_norm": 0.7309761047363281, + "learning_rate": 0.0012332330044003987, + "loss": 1.6057, + "step": 2696 + }, + { + "epoch": 0.28449367088607597, + "grad_norm": 0.8214983940124512, + "learning_rate": 0.0012330409760098208, + "loss": 1.5348, + "step": 2697 + }, + { + "epoch": 0.28459915611814346, + "grad_norm": 0.7108471989631653, + "learning_rate": 0.0012328488934920932, + "loss": 1.5543, + "step": 2698 + }, + { + "epoch": 0.28470464135021095, + "grad_norm": 0.7197054624557495, + "learning_rate": 0.001232656756868739, + "loss": 1.5432, + "step": 2699 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.8608608841896057, + "learning_rate": 0.0012324645661612886, + "loss": 1.51, + "step": 2700 + }, + { + "epoch": 0.284915611814346, + "grad_norm": 0.6828679442405701, + "learning_rate": 0.001232272321391278, + "loss": 1.5994, + "step": 2701 + }, + { + "epoch": 0.2850210970464135, + "grad_norm": 0.6844384670257568, + "learning_rate": 0.0012320800225802488, + "loss": 1.5793, + "step": 2702 + }, + { + "epoch": 0.28512658227848103, + "grad_norm": 0.6846449971199036, + "learning_rate": 0.001231887669749749, + "loss": 1.5731, + "step": 2703 + }, + { + "epoch": 0.2852320675105485, + "grad_norm": 0.6606101989746094, + "learning_rate": 0.0012316952629213332, + "loss": 1.5405, + "step": 2704 + }, + { + "epoch": 0.285337552742616, + "grad_norm": 0.6681669354438782, + "learning_rate": 0.001231502802116561, + "loss": 1.5699, + "step": 2705 + }, + { + "epoch": 0.28544303797468357, + "grad_norm": 0.6272997260093689, + "learning_rate": 0.0012313102873569993, + "loss": 1.5627, + "step": 2706 + }, + { + "epoch": 0.28554852320675106, + "grad_norm": 0.6707155108451843, + "learning_rate": 0.0012311177186642194, + "loss": 1.5572, + "step": 2707 + }, + { + "epoch": 0.28565400843881855, + "grad_norm": 0.733778715133667, + "learning_rate": 0.0012309250960598, + "loss": 1.5347, + "step": 2708 + }, + { + "epoch": 0.2857594936708861, + "grad_norm": 0.7025527358055115, + "learning_rate": 0.0012307324195653256, + "loss": 1.5418, + "step": 2709 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.6995662450790405, + "learning_rate": 0.0012305396892023867, + "loss": 1.5607, + "step": 2710 + }, + { + "epoch": 0.2859704641350211, + "grad_norm": 0.7837663888931274, + "learning_rate": 0.0012303469049925791, + "loss": 1.5694, + "step": 2711 + }, + { + "epoch": 0.28607594936708863, + "grad_norm": 0.6405348777770996, + "learning_rate": 0.001230154066957506, + "loss": 1.5989, + "step": 2712 + }, + { + "epoch": 0.2861814345991561, + "grad_norm": 0.632402241230011, + "learning_rate": 0.001229961175118775, + "loss": 1.5423, + "step": 2713 + }, + { + "epoch": 0.2862869198312236, + "grad_norm": 0.7080779075622559, + "learning_rate": 0.0012297682294980013, + "loss": 1.5863, + "step": 2714 + }, + { + "epoch": 0.28639240506329117, + "grad_norm": 0.6844183802604675, + "learning_rate": 0.0012295752301168048, + "loss": 1.574, + "step": 2715 + }, + { + "epoch": 0.28649789029535866, + "grad_norm": 0.6688645482063293, + "learning_rate": 0.0012293821769968126, + "loss": 1.5676, + "step": 2716 + }, + { + "epoch": 0.28660337552742615, + "grad_norm": 0.6780166029930115, + "learning_rate": 0.001229189070159657, + "loss": 1.5841, + "step": 2717 + }, + { + "epoch": 0.28670886075949364, + "grad_norm": 0.6167871952056885, + "learning_rate": 0.0012289959096269767, + "loss": 1.5302, + "step": 2718 + }, + { + "epoch": 0.2868143459915612, + "grad_norm": 0.6785048842430115, + "learning_rate": 0.0012288026954204165, + "loss": 1.5255, + "step": 2719 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.6530604958534241, + "learning_rate": 0.0012286094275616264, + "loss": 1.5375, + "step": 2720 + }, + { + "epoch": 0.2870253164556962, + "grad_norm": 0.6820730566978455, + "learning_rate": 0.0012284161060722634, + "loss": 1.5382, + "step": 2721 + }, + { + "epoch": 0.2871308016877637, + "grad_norm": 0.726412832736969, + "learning_rate": 0.00122822273097399, + "loss": 1.5324, + "step": 2722 + }, + { + "epoch": 0.2872362869198312, + "grad_norm": 0.6559046506881714, + "learning_rate": 0.0012280293022884753, + "loss": 1.5583, + "step": 2723 + }, + { + "epoch": 0.2873417721518987, + "grad_norm": 0.836555004119873, + "learning_rate": 0.0012278358200373935, + "loss": 1.5178, + "step": 2724 + }, + { + "epoch": 0.28744725738396626, + "grad_norm": 0.6658849716186523, + "learning_rate": 0.001227642284242425, + "loss": 1.5466, + "step": 2725 + }, + { + "epoch": 0.28755274261603375, + "grad_norm": 0.6525724530220032, + "learning_rate": 0.0012274486949252572, + "loss": 1.5595, + "step": 2726 + }, + { + "epoch": 0.28765822784810124, + "grad_norm": 0.6522358655929565, + "learning_rate": 0.0012272550521075824, + "loss": 1.5416, + "step": 2727 + }, + { + "epoch": 0.2877637130801688, + "grad_norm": 0.6323016285896301, + "learning_rate": 0.0012270613558110993, + "loss": 1.5173, + "step": 2728 + }, + { + "epoch": 0.2878691983122363, + "grad_norm": 0.7045419216156006, + "learning_rate": 0.001226867606057512, + "loss": 1.5634, + "step": 2729 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.6568433046340942, + "learning_rate": 0.0012266738028685318, + "loss": 1.5479, + "step": 2730 + }, + { + "epoch": 0.2880801687763713, + "grad_norm": 0.6718873977661133, + "learning_rate": 0.001226479946265875, + "loss": 1.5538, + "step": 2731 + }, + { + "epoch": 0.2881856540084388, + "grad_norm": 0.857623279094696, + "learning_rate": 0.0012262860362712645, + "loss": 1.5646, + "step": 2732 + }, + { + "epoch": 0.2882911392405063, + "grad_norm": 0.7745062112808228, + "learning_rate": 0.0012260920729064285, + "loss": 1.5557, + "step": 2733 + }, + { + "epoch": 0.28839662447257386, + "grad_norm": 0.6937345266342163, + "learning_rate": 0.0012258980561931016, + "loss": 1.5372, + "step": 2734 + }, + { + "epoch": 0.28850210970464135, + "grad_norm": 0.8195507526397705, + "learning_rate": 0.0012257039861530246, + "loss": 1.5245, + "step": 2735 + }, + { + "epoch": 0.28860759493670884, + "grad_norm": 0.7714326977729797, + "learning_rate": 0.0012255098628079439, + "loss": 1.5264, + "step": 2736 + }, + { + "epoch": 0.2887130801687764, + "grad_norm": 0.9742757081985474, + "learning_rate": 0.0012253156861796119, + "loss": 1.5372, + "step": 2737 + }, + { + "epoch": 0.2888185654008439, + "grad_norm": 0.7925447225570679, + "learning_rate": 0.0012251214562897872, + "loss": 1.5675, + "step": 2738 + }, + { + "epoch": 0.2889240506329114, + "grad_norm": 1.1152740716934204, + "learning_rate": 0.0012249271731602342, + "loss": 1.5996, + "step": 2739 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 1.0198121070861816, + "learning_rate": 0.001224732836812723, + "loss": 1.5916, + "step": 2740 + }, + { + "epoch": 0.2891350210970464, + "grad_norm": 0.7592471837997437, + "learning_rate": 0.0012245384472690302, + "loss": 1.5552, + "step": 2741 + }, + { + "epoch": 0.2892405063291139, + "grad_norm": 0.6682032346725464, + "learning_rate": 0.0012243440045509384, + "loss": 1.5691, + "step": 2742 + }, + { + "epoch": 0.28934599156118146, + "grad_norm": 0.7108476758003235, + "learning_rate": 0.0012241495086802356, + "loss": 1.5276, + "step": 2743 + }, + { + "epoch": 0.28945147679324895, + "grad_norm": 0.7498525381088257, + "learning_rate": 0.0012239549596787158, + "loss": 1.5622, + "step": 2744 + }, + { + "epoch": 0.28955696202531644, + "grad_norm": 0.785966157913208, + "learning_rate": 0.0012237603575681797, + "loss": 1.5701, + "step": 2745 + }, + { + "epoch": 0.289662447257384, + "grad_norm": 0.7373350262641907, + "learning_rate": 0.0012235657023704327, + "loss": 1.5636, + "step": 2746 + }, + { + "epoch": 0.2897679324894515, + "grad_norm": 0.9528790712356567, + "learning_rate": 0.001223370994107288, + "loss": 1.5501, + "step": 2747 + }, + { + "epoch": 0.289873417721519, + "grad_norm": 0.6805464625358582, + "learning_rate": 0.0012231762328005623, + "loss": 1.5704, + "step": 2748 + }, + { + "epoch": 0.28997890295358647, + "grad_norm": 0.876691460609436, + "learning_rate": 0.0012229814184720805, + "loss": 1.5771, + "step": 2749 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.6786679625511169, + "learning_rate": 0.0012227865511436724, + "loss": 1.5615, + "step": 2750 + }, + { + "epoch": 0.2901898734177215, + "grad_norm": 1.0442060232162476, + "learning_rate": 0.0012225916308371736, + "loss": 1.5382, + "step": 2751 + }, + { + "epoch": 0.290295358649789, + "grad_norm": 0.8850172162055969, + "learning_rate": 0.001222396657574426, + "loss": 1.573, + "step": 2752 + }, + { + "epoch": 0.29040084388185655, + "grad_norm": 0.6887724995613098, + "learning_rate": 0.0012222016313772773, + "loss": 1.5469, + "step": 2753 + }, + { + "epoch": 0.29050632911392404, + "grad_norm": 0.8471478819847107, + "learning_rate": 0.0012220065522675811, + "loss": 1.5787, + "step": 2754 + }, + { + "epoch": 0.29061181434599154, + "grad_norm": 0.660945475101471, + "learning_rate": 0.0012218114202671973, + "loss": 1.5868, + "step": 2755 + }, + { + "epoch": 0.2907172995780591, + "grad_norm": 1.0804190635681152, + "learning_rate": 0.001221616235397991, + "loss": 1.5724, + "step": 2756 + }, + { + "epoch": 0.2908227848101266, + "grad_norm": 0.7156038284301758, + "learning_rate": 0.001221420997681834, + "loss": 1.5405, + "step": 2757 + }, + { + "epoch": 0.29092827004219407, + "grad_norm": 0.7107678651809692, + "learning_rate": 0.0012212257071406037, + "loss": 1.5556, + "step": 2758 + }, + { + "epoch": 0.2910337552742616, + "grad_norm": 0.6941881775856018, + "learning_rate": 0.0012210303637961828, + "loss": 1.6145, + "step": 2759 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.7649319767951965, + "learning_rate": 0.001220834967670461, + "loss": 1.5605, + "step": 2760 + }, + { + "epoch": 0.2912447257383966, + "grad_norm": 0.8671178221702576, + "learning_rate": 0.0012206395187853334, + "loss": 1.5678, + "step": 2761 + }, + { + "epoch": 0.29135021097046415, + "grad_norm": 0.7132166028022766, + "learning_rate": 0.0012204440171627005, + "loss": 1.5812, + "step": 2762 + }, + { + "epoch": 0.29145569620253164, + "grad_norm": 0.6478742361068726, + "learning_rate": 0.00122024846282447, + "loss": 1.586, + "step": 2763 + }, + { + "epoch": 0.29156118143459914, + "grad_norm": 0.7554233074188232, + "learning_rate": 0.0012200528557925543, + "loss": 1.5176, + "step": 2764 + }, + { + "epoch": 0.2916666666666667, + "grad_norm": 0.8482685685157776, + "learning_rate": 0.0012198571960888721, + "loss": 1.5727, + "step": 2765 + }, + { + "epoch": 0.2917721518987342, + "grad_norm": 0.7302841544151306, + "learning_rate": 0.0012196614837353481, + "loss": 1.5402, + "step": 2766 + }, + { + "epoch": 0.29187763713080167, + "grad_norm": 0.6667806506156921, + "learning_rate": 0.001219465718753913, + "loss": 1.5479, + "step": 2767 + }, + { + "epoch": 0.2919831223628692, + "grad_norm": 0.8929007053375244, + "learning_rate": 0.0012192699011665034, + "loss": 1.5336, + "step": 2768 + }, + { + "epoch": 0.2920886075949367, + "grad_norm": 0.751361608505249, + "learning_rate": 0.0012190740309950612, + "loss": 1.5581, + "step": 2769 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.6707968711853027, + "learning_rate": 0.0012188781082615346, + "loss": 1.5773, + "step": 2770 + }, + { + "epoch": 0.29229957805907175, + "grad_norm": 0.6729738712310791, + "learning_rate": 0.0012186821329878783, + "loss": 1.5588, + "step": 2771 + }, + { + "epoch": 0.29240506329113924, + "grad_norm": 0.6167835593223572, + "learning_rate": 0.0012184861051960517, + "loss": 1.5675, + "step": 2772 + }, + { + "epoch": 0.29251054852320674, + "grad_norm": 0.6649729609489441, + "learning_rate": 0.001218290024908021, + "loss": 1.5613, + "step": 2773 + }, + { + "epoch": 0.2926160337552743, + "grad_norm": 0.6372507214546204, + "learning_rate": 0.0012180938921457576, + "loss": 1.5567, + "step": 2774 + }, + { + "epoch": 0.2927215189873418, + "grad_norm": 0.6579638123512268, + "learning_rate": 0.00121789770693124, + "loss": 1.5523, + "step": 2775 + }, + { + "epoch": 0.29282700421940927, + "grad_norm": 0.6667876243591309, + "learning_rate": 0.001217701469286451, + "loss": 1.5606, + "step": 2776 + }, + { + "epoch": 0.2929324894514768, + "grad_norm": 0.6624444127082825, + "learning_rate": 0.00121750517923338, + "loss": 1.5773, + "step": 2777 + }, + { + "epoch": 0.2930379746835443, + "grad_norm": 0.6702631115913391, + "learning_rate": 0.0012173088367940228, + "loss": 1.5408, + "step": 2778 + }, + { + "epoch": 0.2931434599156118, + "grad_norm": 0.6434685587882996, + "learning_rate": 0.0012171124419903799, + "loss": 1.5555, + "step": 2779 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.67312091588974, + "learning_rate": 0.0012169159948444588, + "loss": 1.5343, + "step": 2780 + }, + { + "epoch": 0.29335443037974684, + "grad_norm": 0.7171863317489624, + "learning_rate": 0.001216719495378272, + "loss": 1.5604, + "step": 2781 + }, + { + "epoch": 0.29345991561181434, + "grad_norm": 0.6628344655036926, + "learning_rate": 0.0012165229436138388, + "loss": 1.5578, + "step": 2782 + }, + { + "epoch": 0.29356540084388183, + "grad_norm": 0.7110141515731812, + "learning_rate": 0.0012163263395731834, + "loss": 1.5352, + "step": 2783 + }, + { + "epoch": 0.2936708860759494, + "grad_norm": 0.8437027335166931, + "learning_rate": 0.0012161296832783363, + "loss": 1.56, + "step": 2784 + }, + { + "epoch": 0.29377637130801687, + "grad_norm": 0.7414014339447021, + "learning_rate": 0.0012159329747513338, + "loss": 1.549, + "step": 2785 + }, + { + "epoch": 0.29388185654008436, + "grad_norm": 0.592193067073822, + "learning_rate": 0.001215736214014218, + "loss": 1.5128, + "step": 2786 + }, + { + "epoch": 0.2939873417721519, + "grad_norm": 0.7688472270965576, + "learning_rate": 0.001215539401089037, + "loss": 1.544, + "step": 2787 + }, + { + "epoch": 0.2940928270042194, + "grad_norm": 0.8096864223480225, + "learning_rate": 0.0012153425359978452, + "loss": 1.576, + "step": 2788 + }, + { + "epoch": 0.2941983122362869, + "grad_norm": 0.7058255076408386, + "learning_rate": 0.0012151456187627016, + "loss": 1.5741, + "step": 2789 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.6586410403251648, + "learning_rate": 0.001214948649405672, + "loss": 1.5536, + "step": 2790 + }, + { + "epoch": 0.29440928270042194, + "grad_norm": 0.7951847314834595, + "learning_rate": 0.0012147516279488275, + "loss": 1.5718, + "step": 2791 + }, + { + "epoch": 0.29451476793248943, + "grad_norm": 1.037614107131958, + "learning_rate": 0.0012145545544142461, + "loss": 1.5192, + "step": 2792 + }, + { + "epoch": 0.294620253164557, + "grad_norm": 0.7057431936264038, + "learning_rate": 0.00121435742882401, + "loss": 1.5585, + "step": 2793 + }, + { + "epoch": 0.29472573839662447, + "grad_norm": 0.8927338123321533, + "learning_rate": 0.001214160251200209, + "loss": 1.565, + "step": 2794 + }, + { + "epoch": 0.29483122362869196, + "grad_norm": 0.745470404624939, + "learning_rate": 0.0012139630215649369, + "loss": 1.5341, + "step": 2795 + }, + { + "epoch": 0.2949367088607595, + "grad_norm": 0.8155547976493835, + "learning_rate": 0.0012137657399402947, + "loss": 1.5786, + "step": 2796 + }, + { + "epoch": 0.295042194092827, + "grad_norm": 1.0948604345321655, + "learning_rate": 0.0012135684063483891, + "loss": 1.5501, + "step": 2797 + }, + { + "epoch": 0.2951476793248945, + "grad_norm": 0.7592798471450806, + "learning_rate": 0.0012133710208113318, + "loss": 1.571, + "step": 2798 + }, + { + "epoch": 0.29525316455696204, + "grad_norm": 1.0272746086120605, + "learning_rate": 0.0012131735833512411, + "loss": 1.5569, + "step": 2799 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.9437803626060486, + "learning_rate": 0.0012129760939902407, + "loss": 1.5235, + "step": 2800 + }, + { + "epoch": 0.29546413502109703, + "grad_norm": 0.7075207233428955, + "learning_rate": 0.0012127785527504603, + "loss": 1.5613, + "step": 2801 + }, + { + "epoch": 0.2955696202531646, + "grad_norm": 0.6963232755661011, + "learning_rate": 0.0012125809596540357, + "loss": 1.526, + "step": 2802 + }, + { + "epoch": 0.29567510548523207, + "grad_norm": 0.7464491128921509, + "learning_rate": 0.0012123833147231079, + "loss": 1.5627, + "step": 2803 + }, + { + "epoch": 0.29578059071729956, + "grad_norm": 0.7728781700134277, + "learning_rate": 0.0012121856179798237, + "loss": 1.5602, + "step": 2804 + }, + { + "epoch": 0.2958860759493671, + "grad_norm": 0.8229570388793945, + "learning_rate": 0.0012119878694463366, + "loss": 1.5318, + "step": 2805 + }, + { + "epoch": 0.2959915611814346, + "grad_norm": 0.6174908876419067, + "learning_rate": 0.001211790069144805, + "loss": 1.5491, + "step": 2806 + }, + { + "epoch": 0.2960970464135021, + "grad_norm": 0.7845272421836853, + "learning_rate": 0.0012115922170973935, + "loss": 1.5493, + "step": 2807 + }, + { + "epoch": 0.29620253164556964, + "grad_norm": 0.7002971768379211, + "learning_rate": 0.0012113943133262722, + "loss": 1.5417, + "step": 2808 + }, + { + "epoch": 0.29630801687763714, + "grad_norm": 0.7506234645843506, + "learning_rate": 0.0012111963578536177, + "loss": 1.5795, + "step": 2809 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.7024001479148865, + "learning_rate": 0.0012109983507016114, + "loss": 1.5566, + "step": 2810 + }, + { + "epoch": 0.2965189873417722, + "grad_norm": 0.6907679438591003, + "learning_rate": 0.0012108002918924411, + "loss": 1.5433, + "step": 2811 + }, + { + "epoch": 0.29662447257383967, + "grad_norm": 0.7173947691917419, + "learning_rate": 0.0012106021814483007, + "loss": 1.5418, + "step": 2812 + }, + { + "epoch": 0.29672995780590716, + "grad_norm": 0.6407349705696106, + "learning_rate": 0.0012104040193913884, + "loss": 1.5502, + "step": 2813 + }, + { + "epoch": 0.2968354430379747, + "grad_norm": 0.7252712249755859, + "learning_rate": 0.0012102058057439104, + "loss": 1.5905, + "step": 2814 + }, + { + "epoch": 0.2969409282700422, + "grad_norm": 0.6646013855934143, + "learning_rate": 0.001210007540528077, + "loss": 1.5543, + "step": 2815 + }, + { + "epoch": 0.2970464135021097, + "grad_norm": 0.7760693430900574, + "learning_rate": 0.0012098092237661049, + "loss": 1.5201, + "step": 2816 + }, + { + "epoch": 0.2971518987341772, + "grad_norm": 0.7249820828437805, + "learning_rate": 0.0012096108554802165, + "loss": 1.5575, + "step": 2817 + }, + { + "epoch": 0.29725738396624474, + "grad_norm": 0.7341113686561584, + "learning_rate": 0.0012094124356926397, + "loss": 1.5903, + "step": 2818 + }, + { + "epoch": 0.29736286919831223, + "grad_norm": 0.9478294253349304, + "learning_rate": 0.001209213964425609, + "loss": 1.5658, + "step": 2819 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.7597594857215881, + "learning_rate": 0.0012090154417013636, + "loss": 1.5857, + "step": 2820 + }, + { + "epoch": 0.29757383966244727, + "grad_norm": 0.7860878705978394, + "learning_rate": 0.0012088168675421487, + "loss": 1.5373, + "step": 2821 + }, + { + "epoch": 0.29767932489451476, + "grad_norm": 1.020045518875122, + "learning_rate": 0.0012086182419702165, + "loss": 1.5367, + "step": 2822 + }, + { + "epoch": 0.29778481012658226, + "grad_norm": 0.706844687461853, + "learning_rate": 0.0012084195650078232, + "loss": 1.5553, + "step": 2823 + }, + { + "epoch": 0.2978902953586498, + "grad_norm": 1.09311842918396, + "learning_rate": 0.001208220836677232, + "loss": 1.5215, + "step": 2824 + }, + { + "epoch": 0.2979957805907173, + "grad_norm": 1.039456844329834, + "learning_rate": 0.0012080220570007108, + "loss": 1.5363, + "step": 2825 + }, + { + "epoch": 0.2981012658227848, + "grad_norm": 0.7109678983688354, + "learning_rate": 0.001207823226000534, + "loss": 1.5411, + "step": 2826 + }, + { + "epoch": 0.29820675105485234, + "grad_norm": 1.3221694231033325, + "learning_rate": 0.0012076243436989823, + "loss": 1.5092, + "step": 2827 + }, + { + "epoch": 0.29831223628691983, + "grad_norm": 0.8056342005729675, + "learning_rate": 0.0012074254101183408, + "loss": 1.5363, + "step": 2828 + }, + { + "epoch": 0.2984177215189873, + "grad_norm": 0.8442791104316711, + "learning_rate": 0.001207226425280901, + "loss": 1.5387, + "step": 2829 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.9887701869010925, + "learning_rate": 0.0012070273892089605, + "loss": 1.5374, + "step": 2830 + }, + { + "epoch": 0.29862869198312236, + "grad_norm": 0.6883377432823181, + "learning_rate": 0.001206828301924822, + "loss": 1.5202, + "step": 2831 + }, + { + "epoch": 0.29873417721518986, + "grad_norm": 1.052254319190979, + "learning_rate": 0.0012066291634507944, + "loss": 1.5579, + "step": 2832 + }, + { + "epoch": 0.2988396624472574, + "grad_norm": 0.7370201349258423, + "learning_rate": 0.001206429973809192, + "loss": 1.5361, + "step": 2833 + }, + { + "epoch": 0.2989451476793249, + "grad_norm": 0.9314294457435608, + "learning_rate": 0.001206230733022335, + "loss": 1.5846, + "step": 2834 + }, + { + "epoch": 0.2990506329113924, + "grad_norm": 1.3604544401168823, + "learning_rate": 0.0012060314411125497, + "loss": 1.5199, + "step": 2835 + }, + { + "epoch": 0.29915611814345994, + "grad_norm": 0.6937543749809265, + "learning_rate": 0.0012058320981021672, + "loss": 1.5128, + "step": 2836 + }, + { + "epoch": 0.29926160337552743, + "grad_norm": 1.4042043685913086, + "learning_rate": 0.001205632704013525, + "loss": 1.5381, + "step": 2837 + }, + { + "epoch": 0.2993670886075949, + "grad_norm": 0.7008637189865112, + "learning_rate": 0.0012054332588689667, + "loss": 1.5222, + "step": 2838 + }, + { + "epoch": 0.29947257383966247, + "grad_norm": 1.0943379402160645, + "learning_rate": 0.0012052337626908406, + "loss": 1.5296, + "step": 2839 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.7459411025047302, + "learning_rate": 0.0012050342155015012, + "loss": 1.5688, + "step": 2840 + }, + { + "epoch": 0.29968354430379746, + "grad_norm": 0.7907518744468689, + "learning_rate": 0.0012048346173233091, + "loss": 1.5207, + "step": 2841 + }, + { + "epoch": 0.299789029535865, + "grad_norm": 0.8796839714050293, + "learning_rate": 0.0012046349681786304, + "loss": 1.5698, + "step": 2842 + }, + { + "epoch": 0.2998945147679325, + "grad_norm": 0.7326210141181946, + "learning_rate": 0.001204435268089836, + "loss": 1.5238, + "step": 2843 + }, + { + "epoch": 0.3, + "grad_norm": 0.665722131729126, + "learning_rate": 0.001204235517079304, + "loss": 1.5617, + "step": 2844 + }, + { + "epoch": 0.30010548523206754, + "grad_norm": 0.7092705965042114, + "learning_rate": 0.0012040357151694172, + "loss": 1.5427, + "step": 2845 + }, + { + "epoch": 0.30021097046413503, + "grad_norm": 0.6407773494720459, + "learning_rate": 0.0012038358623825646, + "loss": 1.5485, + "step": 2846 + }, + { + "epoch": 0.3003164556962025, + "grad_norm": 0.6819642782211304, + "learning_rate": 0.0012036359587411405, + "loss": 1.5462, + "step": 2847 + }, + { + "epoch": 0.30042194092827, + "grad_norm": 0.6478820443153381, + "learning_rate": 0.0012034360042675453, + "loss": 1.5113, + "step": 2848 + }, + { + "epoch": 0.30052742616033756, + "grad_norm": 0.7913660407066345, + "learning_rate": 0.0012032359989841849, + "loss": 1.5251, + "step": 2849 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.666246771812439, + "learning_rate": 0.0012030359429134707, + "loss": 1.5422, + "step": 2850 + }, + { + "epoch": 0.30073839662447255, + "grad_norm": 0.9337389469146729, + "learning_rate": 0.00120283583607782, + "loss": 1.5059, + "step": 2851 + }, + { + "epoch": 0.3008438818565401, + "grad_norm": 1.1635243892669678, + "learning_rate": 0.0012026356784996554, + "loss": 1.5482, + "step": 2852 + }, + { + "epoch": 0.3009493670886076, + "grad_norm": 0.6611487865447998, + "learning_rate": 0.0012024354702014066, + "loss": 1.5415, + "step": 2853 + }, + { + "epoch": 0.3010548523206751, + "grad_norm": 0.967355489730835, + "learning_rate": 0.0012022352112055071, + "loss": 1.5424, + "step": 2854 + }, + { + "epoch": 0.30116033755274263, + "grad_norm": 0.6852697134017944, + "learning_rate": 0.001202034901534397, + "loss": 1.5894, + "step": 2855 + }, + { + "epoch": 0.3012658227848101, + "grad_norm": 0.8806949853897095, + "learning_rate": 0.0012018345412105223, + "loss": 1.564, + "step": 2856 + }, + { + "epoch": 0.3013713080168776, + "grad_norm": 0.8296533823013306, + "learning_rate": 0.0012016341302563342, + "loss": 1.5513, + "step": 2857 + }, + { + "epoch": 0.30147679324894516, + "grad_norm": 0.795354425907135, + "learning_rate": 0.0012014336686942898, + "loss": 1.5393, + "step": 2858 + }, + { + "epoch": 0.30158227848101266, + "grad_norm": 1.039196491241455, + "learning_rate": 0.0012012331565468518, + "loss": 1.5273, + "step": 2859 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.6827373504638672, + "learning_rate": 0.0012010325938364883, + "loss": 1.5432, + "step": 2860 + }, + { + "epoch": 0.3017932489451477, + "grad_norm": 1.0438569784164429, + "learning_rate": 0.0012008319805856737, + "loss": 1.5517, + "step": 2861 + }, + { + "epoch": 0.3018987341772152, + "grad_norm": 1.1209932565689087, + "learning_rate": 0.0012006313168168878, + "loss": 1.5459, + "step": 2862 + }, + { + "epoch": 0.3020042194092827, + "grad_norm": 0.6604718565940857, + "learning_rate": 0.0012004306025526158, + "loss": 1.5385, + "step": 2863 + }, + { + "epoch": 0.30210970464135023, + "grad_norm": 1.0306836366653442, + "learning_rate": 0.0012002298378153485, + "loss": 1.5187, + "step": 2864 + }, + { + "epoch": 0.3022151898734177, + "grad_norm": 0.7441912889480591, + "learning_rate": 0.001200029022627583, + "loss": 1.5537, + "step": 2865 + }, + { + "epoch": 0.3023206751054852, + "grad_norm": 0.7582967877388, + "learning_rate": 0.0011998281570118213, + "loss": 1.597, + "step": 2866 + }, + { + "epoch": 0.30242616033755276, + "grad_norm": 0.8933154940605164, + "learning_rate": 0.0011996272409905717, + "loss": 1.5438, + "step": 2867 + }, + { + "epoch": 0.30253164556962026, + "grad_norm": 0.6572579145431519, + "learning_rate": 0.0011994262745863478, + "loss": 1.5347, + "step": 2868 + }, + { + "epoch": 0.30263713080168775, + "grad_norm": 0.834147036075592, + "learning_rate": 0.0011992252578216683, + "loss": 1.4936, + "step": 2869 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 1.2165489196777344, + "learning_rate": 0.0011990241907190592, + "loss": 1.5853, + "step": 2870 + }, + { + "epoch": 0.3028481012658228, + "grad_norm": 0.639917254447937, + "learning_rate": 0.0011988230733010502, + "loss": 1.5369, + "step": 2871 + }, + { + "epoch": 0.3029535864978903, + "grad_norm": 0.8989267349243164, + "learning_rate": 0.0011986219055901781, + "loss": 1.5437, + "step": 2872 + }, + { + "epoch": 0.30305907172995783, + "grad_norm": 0.5916596055030823, + "learning_rate": 0.0011984206876089842, + "loss": 1.5552, + "step": 2873 + }, + { + "epoch": 0.3031645569620253, + "grad_norm": 1.0687130689620972, + "learning_rate": 0.001198219419380016, + "loss": 1.4852, + "step": 2874 + }, + { + "epoch": 0.3032700421940928, + "grad_norm": 0.7762568593025208, + "learning_rate": 0.0011980181009258273, + "loss": 1.5372, + "step": 2875 + }, + { + "epoch": 0.30337552742616036, + "grad_norm": 0.8354180455207825, + "learning_rate": 0.0011978167322689761, + "loss": 1.5252, + "step": 2876 + }, + { + "epoch": 0.30348101265822786, + "grad_norm": 1.00016450881958, + "learning_rate": 0.001197615313432027, + "loss": 1.529, + "step": 2877 + }, + { + "epoch": 0.30358649789029535, + "grad_norm": 0.6794220209121704, + "learning_rate": 0.00119741384443755, + "loss": 1.5455, + "step": 2878 + }, + { + "epoch": 0.3036919831223629, + "grad_norm": 1.14077627658844, + "learning_rate": 0.001197212325308121, + "loss": 1.5372, + "step": 2879 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.760959267616272, + "learning_rate": 0.001197010756066321, + "loss": 1.5407, + "step": 2880 + }, + { + "epoch": 0.3039029535864979, + "grad_norm": 0.8234152793884277, + "learning_rate": 0.0011968091367347367, + "loss": 1.5372, + "step": 2881 + }, + { + "epoch": 0.3040084388185654, + "grad_norm": 0.830484926700592, + "learning_rate": 0.0011966074673359602, + "loss": 1.5324, + "step": 2882 + }, + { + "epoch": 0.3041139240506329, + "grad_norm": 0.7051867246627808, + "learning_rate": 0.0011964057478925903, + "loss": 1.5026, + "step": 2883 + }, + { + "epoch": 0.3042194092827004, + "grad_norm": 0.7982526421546936, + "learning_rate": 0.0011962039784272306, + "loss": 1.5367, + "step": 2884 + }, + { + "epoch": 0.3043248945147679, + "grad_norm": 0.7475396394729614, + "learning_rate": 0.0011960021589624897, + "loss": 1.5449, + "step": 2885 + }, + { + "epoch": 0.30443037974683546, + "grad_norm": 0.6697095632553101, + "learning_rate": 0.001195800289520983, + "loss": 1.5424, + "step": 2886 + }, + { + "epoch": 0.30453586497890295, + "grad_norm": 0.765105664730072, + "learning_rate": 0.0011955983701253312, + "loss": 1.5608, + "step": 2887 + }, + { + "epoch": 0.30464135021097044, + "grad_norm": 0.7164781093597412, + "learning_rate": 0.0011953964007981601, + "loss": 1.4979, + "step": 2888 + }, + { + "epoch": 0.304746835443038, + "grad_norm": 1.1808778047561646, + "learning_rate": 0.001195194381562101, + "loss": 1.5735, + "step": 2889 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.6607586145401001, + "learning_rate": 0.0011949923124397917, + "loss": 1.5325, + "step": 2890 + }, + { + "epoch": 0.304957805907173, + "grad_norm": 1.071819543838501, + "learning_rate": 0.0011947901934538747, + "loss": 1.5575, + "step": 2891 + }, + { + "epoch": 0.3050632911392405, + "grad_norm": 0.6146923899650574, + "learning_rate": 0.0011945880246269987, + "loss": 1.545, + "step": 2892 + }, + { + "epoch": 0.305168776371308, + "grad_norm": 1.219101071357727, + "learning_rate": 0.0011943858059818178, + "loss": 1.5201, + "step": 2893 + }, + { + "epoch": 0.3052742616033755, + "grad_norm": 0.6956585645675659, + "learning_rate": 0.0011941835375409912, + "loss": 1.547, + "step": 2894 + }, + { + "epoch": 0.30537974683544306, + "grad_norm": 1.0246316194534302, + "learning_rate": 0.0011939812193271844, + "loss": 1.5306, + "step": 2895 + }, + { + "epoch": 0.30548523206751055, + "grad_norm": 0.8957122564315796, + "learning_rate": 0.001193778851363068, + "loss": 1.5422, + "step": 2896 + }, + { + "epoch": 0.30559071729957804, + "grad_norm": 0.8229274153709412, + "learning_rate": 0.0011935764336713187, + "loss": 1.562, + "step": 2897 + }, + { + "epoch": 0.3056962025316456, + "grad_norm": 1.1012674570083618, + "learning_rate": 0.0011933739662746178, + "loss": 1.5583, + "step": 2898 + }, + { + "epoch": 0.3058016877637131, + "grad_norm": 0.6612919569015503, + "learning_rate": 0.0011931714491956531, + "loss": 1.5509, + "step": 2899 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 1.049758791923523, + "learning_rate": 0.001192968882457118, + "loss": 1.5082, + "step": 2900 + }, + { + "epoch": 0.3060126582278481, + "grad_norm": 0.8551911115646362, + "learning_rate": 0.0011927662660817105, + "loss": 1.5367, + "step": 2901 + }, + { + "epoch": 0.3061181434599156, + "grad_norm": 0.7624301314353943, + "learning_rate": 0.0011925636000921355, + "loss": 1.5655, + "step": 2902 + }, + { + "epoch": 0.3062236286919831, + "grad_norm": 0.9982276558876038, + "learning_rate": 0.0011923608845111017, + "loss": 1.5471, + "step": 2903 + }, + { + "epoch": 0.30632911392405066, + "grad_norm": 0.8458219170570374, + "learning_rate": 0.0011921581193613253, + "loss": 1.5517, + "step": 2904 + }, + { + "epoch": 0.30643459915611815, + "grad_norm": 0.6856141090393066, + "learning_rate": 0.0011919553046655267, + "loss": 1.5652, + "step": 2905 + }, + { + "epoch": 0.30654008438818564, + "grad_norm": 0.675682783126831, + "learning_rate": 0.0011917524404464325, + "loss": 1.57, + "step": 2906 + }, + { + "epoch": 0.3066455696202532, + "grad_norm": 0.8347587585449219, + "learning_rate": 0.0011915495267267745, + "loss": 1.5102, + "step": 2907 + }, + { + "epoch": 0.3067510548523207, + "grad_norm": 0.7710554003715515, + "learning_rate": 0.0011913465635292903, + "loss": 1.5413, + "step": 2908 + }, + { + "epoch": 0.3068565400843882, + "grad_norm": 0.7723362445831299, + "learning_rate": 0.001191143550876723, + "loss": 1.5146, + "step": 2909 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.8650389909744263, + "learning_rate": 0.001190940488791821, + "loss": 1.52, + "step": 2910 + }, + { + "epoch": 0.3070675105485232, + "grad_norm": 0.8285431265830994, + "learning_rate": 0.0011907373772973384, + "loss": 1.4997, + "step": 2911 + }, + { + "epoch": 0.3071729957805907, + "grad_norm": 1.2853292226791382, + "learning_rate": 0.001190534216416035, + "loss": 1.5534, + "step": 2912 + }, + { + "epoch": 0.30727848101265826, + "grad_norm": 0.7869629263877869, + "learning_rate": 0.0011903310061706762, + "loss": 1.5203, + "step": 2913 + }, + { + "epoch": 0.30738396624472575, + "grad_norm": 1.4642741680145264, + "learning_rate": 0.0011901277465840323, + "loss": 1.5655, + "step": 2914 + }, + { + "epoch": 0.30748945147679324, + "grad_norm": 0.8930038213729858, + "learning_rate": 0.0011899244376788797, + "loss": 1.5646, + "step": 2915 + }, + { + "epoch": 0.30759493670886073, + "grad_norm": 1.6687711477279663, + "learning_rate": 0.001189721079478, + "loss": 1.5627, + "step": 2916 + }, + { + "epoch": 0.3077004219409283, + "grad_norm": 1.1977434158325195, + "learning_rate": 0.001189517672004181, + "loss": 1.5142, + "step": 2917 + }, + { + "epoch": 0.3078059071729958, + "grad_norm": 1.2322747707366943, + "learning_rate": 0.0011893142152802152, + "loss": 1.5289, + "step": 2918 + }, + { + "epoch": 0.30791139240506327, + "grad_norm": 1.5497817993164062, + "learning_rate": 0.0011891107093289007, + "loss": 1.5835, + "step": 2919 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.8869861364364624, + "learning_rate": 0.0011889071541730419, + "loss": 1.5367, + "step": 2920 + }, + { + "epoch": 0.3081223628691983, + "grad_norm": 1.1852318048477173, + "learning_rate": 0.0011887035498354475, + "loss": 1.5082, + "step": 2921 + }, + { + "epoch": 0.3082278481012658, + "grad_norm": 0.9787128567695618, + "learning_rate": 0.0011884998963389334, + "loss": 1.5431, + "step": 2922 + }, + { + "epoch": 0.30833333333333335, + "grad_norm": 0.7180287837982178, + "learning_rate": 0.0011882961937063187, + "loss": 1.5382, + "step": 2923 + }, + { + "epoch": 0.30843881856540084, + "grad_norm": 0.7059841156005859, + "learning_rate": 0.0011880924419604305, + "loss": 1.5074, + "step": 2924 + }, + { + "epoch": 0.30854430379746833, + "grad_norm": 0.6919748783111572, + "learning_rate": 0.0011878886411240991, + "loss": 1.5276, + "step": 2925 + }, + { + "epoch": 0.3086497890295359, + "grad_norm": 0.6101765632629395, + "learning_rate": 0.0011876847912201624, + "loss": 1.5294, + "step": 2926 + }, + { + "epoch": 0.3087552742616034, + "grad_norm": 0.7185982465744019, + "learning_rate": 0.0011874808922714623, + "loss": 1.5474, + "step": 2927 + }, + { + "epoch": 0.30886075949367087, + "grad_norm": 0.6258642077445984, + "learning_rate": 0.0011872769443008466, + "loss": 1.5547, + "step": 2928 + }, + { + "epoch": 0.3089662447257384, + "grad_norm": 0.7902642488479614, + "learning_rate": 0.001187072947331169, + "loss": 1.4993, + "step": 2929 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.6555467844009399, + "learning_rate": 0.001186868901385288, + "loss": 1.5056, + "step": 2930 + }, + { + "epoch": 0.3091772151898734, + "grad_norm": 0.637855052947998, + "learning_rate": 0.0011866648064860683, + "loss": 1.5555, + "step": 2931 + }, + { + "epoch": 0.30928270042194095, + "grad_norm": 0.6493734121322632, + "learning_rate": 0.0011864606626563795, + "loss": 1.5725, + "step": 2932 + }, + { + "epoch": 0.30938818565400844, + "grad_norm": 0.6343172788619995, + "learning_rate": 0.0011862564699190972, + "loss": 1.5238, + "step": 2933 + }, + { + "epoch": 0.30949367088607593, + "grad_norm": 0.6269710659980774, + "learning_rate": 0.0011860522282971019, + "loss": 1.554, + "step": 2934 + }, + { + "epoch": 0.3095991561181435, + "grad_norm": 0.6711395978927612, + "learning_rate": 0.0011858479378132802, + "loss": 1.5419, + "step": 2935 + }, + { + "epoch": 0.309704641350211, + "grad_norm": 0.6189565062522888, + "learning_rate": 0.0011856435984905237, + "loss": 1.5687, + "step": 2936 + }, + { + "epoch": 0.30981012658227847, + "grad_norm": 0.6874924302101135, + "learning_rate": 0.00118543921035173, + "loss": 1.5033, + "step": 2937 + }, + { + "epoch": 0.309915611814346, + "grad_norm": 0.6866757273674011, + "learning_rate": 0.001185234773419801, + "loss": 1.5266, + "step": 2938 + }, + { + "epoch": 0.3100210970464135, + "grad_norm": 0.670831561088562, + "learning_rate": 0.0011850302877176456, + "loss": 1.4843, + "step": 2939 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.7369555234909058, + "learning_rate": 0.001184825753268177, + "loss": 1.5215, + "step": 2940 + }, + { + "epoch": 0.31023206751054855, + "grad_norm": 0.6803542971611023, + "learning_rate": 0.0011846211700943148, + "loss": 1.555, + "step": 2941 + }, + { + "epoch": 0.31033755274261604, + "grad_norm": 0.721152663230896, + "learning_rate": 0.001184416538218983, + "loss": 1.5271, + "step": 2942 + }, + { + "epoch": 0.31044303797468353, + "grad_norm": 0.7083051800727844, + "learning_rate": 0.0011842118576651122, + "loss": 1.5618, + "step": 2943 + }, + { + "epoch": 0.3105485232067511, + "grad_norm": 0.9875057339668274, + "learning_rate": 0.0011840071284556373, + "loss": 1.5764, + "step": 2944 + }, + { + "epoch": 0.3106540084388186, + "grad_norm": 0.8234829306602478, + "learning_rate": 0.0011838023506134997, + "loss": 1.5523, + "step": 2945 + }, + { + "epoch": 0.31075949367088607, + "grad_norm": 0.7499757409095764, + "learning_rate": 0.0011835975241616455, + "loss": 1.522, + "step": 2946 + }, + { + "epoch": 0.31086497890295356, + "grad_norm": 1.0770479440689087, + "learning_rate": 0.0011833926491230265, + "loss": 1.5547, + "step": 2947 + }, + { + "epoch": 0.3109704641350211, + "grad_norm": 0.7113229036331177, + "learning_rate": 0.0011831877255206002, + "loss": 1.5592, + "step": 2948 + }, + { + "epoch": 0.3110759493670886, + "grad_norm": 1.432540774345398, + "learning_rate": 0.0011829827533773292, + "loss": 1.5571, + "step": 2949 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.7553693056106567, + "learning_rate": 0.0011827777327161814, + "loss": 1.5113, + "step": 2950 + }, + { + "epoch": 0.31128691983122364, + "grad_norm": 1.619726538658142, + "learning_rate": 0.001182572663560131, + "loss": 1.5066, + "step": 2951 + }, + { + "epoch": 0.31139240506329113, + "grad_norm": 0.7348953485488892, + "learning_rate": 0.0011823675459321564, + "loss": 1.5331, + "step": 2952 + }, + { + "epoch": 0.3114978902953586, + "grad_norm": 1.0935654640197754, + "learning_rate": 0.0011821623798552424, + "loss": 1.5548, + "step": 2953 + }, + { + "epoch": 0.3116033755274262, + "grad_norm": 0.7247912287712097, + "learning_rate": 0.001181957165352379, + "loss": 1.5169, + "step": 2954 + }, + { + "epoch": 0.31170886075949367, + "grad_norm": 0.9191718101501465, + "learning_rate": 0.0011817519024465608, + "loss": 1.521, + "step": 2955 + }, + { + "epoch": 0.31181434599156116, + "grad_norm": 1.2072099447250366, + "learning_rate": 0.0011815465911607893, + "loss": 1.5424, + "step": 2956 + }, + { + "epoch": 0.3119198312236287, + "grad_norm": 0.7098910212516785, + "learning_rate": 0.0011813412315180704, + "loss": 1.5492, + "step": 2957 + }, + { + "epoch": 0.3120253164556962, + "grad_norm": 1.0906376838684082, + "learning_rate": 0.0011811358235414154, + "loss": 1.5084, + "step": 2958 + }, + { + "epoch": 0.3121308016877637, + "grad_norm": 0.6483845114707947, + "learning_rate": 0.0011809303672538417, + "loss": 1.5351, + "step": 2959 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 1.0814794301986694, + "learning_rate": 0.0011807248626783714, + "loss": 1.5284, + "step": 2960 + }, + { + "epoch": 0.31234177215189873, + "grad_norm": 0.650475800037384, + "learning_rate": 0.0011805193098380327, + "loss": 1.4954, + "step": 2961 + }, + { + "epoch": 0.3124472573839662, + "grad_norm": 0.998494029045105, + "learning_rate": 0.0011803137087558584, + "loss": 1.5262, + "step": 2962 + }, + { + "epoch": 0.3125527426160338, + "grad_norm": 0.8141764402389526, + "learning_rate": 0.0011801080594548874, + "loss": 1.518, + "step": 2963 + }, + { + "epoch": 0.31265822784810127, + "grad_norm": 0.6701788902282715, + "learning_rate": 0.0011799023619581638, + "loss": 1.4841, + "step": 2964 + }, + { + "epoch": 0.31276371308016876, + "grad_norm": 0.6752135753631592, + "learning_rate": 0.0011796966162887364, + "loss": 1.5118, + "step": 2965 + }, + { + "epoch": 0.3128691983122363, + "grad_norm": 0.6796728372573853, + "learning_rate": 0.0011794908224696608, + "loss": 1.518, + "step": 2966 + }, + { + "epoch": 0.3129746835443038, + "grad_norm": 0.6907919049263, + "learning_rate": 0.0011792849805239967, + "loss": 1.543, + "step": 2967 + }, + { + "epoch": 0.3130801687763713, + "grad_norm": 0.6649686694145203, + "learning_rate": 0.0011790790904748103, + "loss": 1.5457, + "step": 2968 + }, + { + "epoch": 0.31318565400843884, + "grad_norm": 0.8784547448158264, + "learning_rate": 0.0011788731523451718, + "loss": 1.5061, + "step": 2969 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.6811883449554443, + "learning_rate": 0.0011786671661581584, + "loss": 1.5259, + "step": 2970 + }, + { + "epoch": 0.3133966244725738, + "grad_norm": 0.8324406743049622, + "learning_rate": 0.0011784611319368512, + "loss": 1.4925, + "step": 2971 + }, + { + "epoch": 0.3135021097046414, + "grad_norm": 0.7486146092414856, + "learning_rate": 0.0011782550497043379, + "loss": 1.5037, + "step": 2972 + }, + { + "epoch": 0.31360759493670887, + "grad_norm": 0.92384272813797, + "learning_rate": 0.0011780489194837106, + "loss": 1.5372, + "step": 2973 + }, + { + "epoch": 0.31371308016877636, + "grad_norm": 1.3711521625518799, + "learning_rate": 0.0011778427412980675, + "loss": 1.554, + "step": 2974 + }, + { + "epoch": 0.3138185654008439, + "grad_norm": 0.8715711832046509, + "learning_rate": 0.0011776365151705119, + "loss": 1.5134, + "step": 2975 + }, + { + "epoch": 0.3139240506329114, + "grad_norm": 1.3869915008544922, + "learning_rate": 0.0011774302411241525, + "loss": 1.5287, + "step": 2976 + }, + { + "epoch": 0.3140295358649789, + "grad_norm": 1.2167623043060303, + "learning_rate": 0.0011772239191821029, + "loss": 1.5448, + "step": 2977 + }, + { + "epoch": 0.31413502109704644, + "grad_norm": 1.4407260417938232, + "learning_rate": 0.0011770175493674827, + "loss": 1.5375, + "step": 2978 + }, + { + "epoch": 0.31424050632911393, + "grad_norm": 1.2211499214172363, + "learning_rate": 0.0011768111317034173, + "loss": 1.5504, + "step": 2979 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 1.068924903869629, + "learning_rate": 0.001176604666213036, + "loss": 1.5651, + "step": 2980 + }, + { + "epoch": 0.3144514767932489, + "grad_norm": 0.9553629159927368, + "learning_rate": 0.0011763981529194748, + "loss": 1.5619, + "step": 2981 + }, + { + "epoch": 0.31455696202531647, + "grad_norm": 0.9049874544143677, + "learning_rate": 0.001176191591845874, + "loss": 1.4998, + "step": 2982 + }, + { + "epoch": 0.31466244725738396, + "grad_norm": 1.4547661542892456, + "learning_rate": 0.0011759849830153806, + "loss": 1.5258, + "step": 2983 + }, + { + "epoch": 0.31476793248945145, + "grad_norm": 1.1145117282867432, + "learning_rate": 0.0011757783264511456, + "loss": 1.539, + "step": 2984 + }, + { + "epoch": 0.314873417721519, + "grad_norm": 1.041845679283142, + "learning_rate": 0.001175571622176326, + "loss": 1.5243, + "step": 2985 + }, + { + "epoch": 0.3149789029535865, + "grad_norm": 1.0721639394760132, + "learning_rate": 0.0011753648702140837, + "loss": 1.5524, + "step": 2986 + }, + { + "epoch": 0.315084388185654, + "grad_norm": 0.9924624562263489, + "learning_rate": 0.001175158070587587, + "loss": 1.5677, + "step": 2987 + }, + { + "epoch": 0.31518987341772153, + "grad_norm": 0.9908656477928162, + "learning_rate": 0.0011749512233200081, + "loss": 1.5427, + "step": 2988 + }, + { + "epoch": 0.315295358649789, + "grad_norm": 0.7939729690551758, + "learning_rate": 0.001174744328434526, + "loss": 1.5665, + "step": 2989 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.9545892477035522, + "learning_rate": 0.0011745373859543236, + "loss": 1.5449, + "step": 2990 + }, + { + "epoch": 0.31550632911392407, + "grad_norm": 0.7182574272155762, + "learning_rate": 0.0011743303959025906, + "loss": 1.5062, + "step": 2991 + }, + { + "epoch": 0.31561181434599156, + "grad_norm": 0.8656249642372131, + "learning_rate": 0.0011741233583025205, + "loss": 1.5098, + "step": 2992 + }, + { + "epoch": 0.31571729957805905, + "grad_norm": 0.7275987863540649, + "learning_rate": 0.0011739162731773133, + "loss": 1.4766, + "step": 2993 + }, + { + "epoch": 0.3158227848101266, + "grad_norm": 0.7111440300941467, + "learning_rate": 0.0011737091405501741, + "loss": 1.551, + "step": 2994 + }, + { + "epoch": 0.3159282700421941, + "grad_norm": 0.7334299087524414, + "learning_rate": 0.0011735019604443126, + "loss": 1.5709, + "step": 2995 + }, + { + "epoch": 0.3160337552742616, + "grad_norm": 0.6355679035186768, + "learning_rate": 0.0011732947328829447, + "loss": 1.5006, + "step": 2996 + }, + { + "epoch": 0.31613924050632913, + "grad_norm": 0.7327666282653809, + "learning_rate": 0.0011730874578892913, + "loss": 1.5067, + "step": 2997 + }, + { + "epoch": 0.3162447257383966, + "grad_norm": 0.6658511757850647, + "learning_rate": 0.0011728801354865786, + "loss": 1.5171, + "step": 2998 + }, + { + "epoch": 0.3163502109704641, + "grad_norm": 0.7105266451835632, + "learning_rate": 0.0011726727656980378, + "loss": 1.5502, + "step": 2999 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.6918742656707764, + "learning_rate": 0.0011724653485469063, + "loss": 1.5172, + "step": 3000 + }, + { + "epoch": 0.31656118143459916, + "grad_norm": 0.6420064568519592, + "learning_rate": 0.0011722578840564256, + "loss": 1.5198, + "step": 3001 + }, + { + "epoch": 0.31666666666666665, + "grad_norm": 0.6940688490867615, + "learning_rate": 0.0011720503722498436, + "loss": 1.4991, + "step": 3002 + }, + { + "epoch": 0.3167721518987342, + "grad_norm": 0.7398155331611633, + "learning_rate": 0.0011718428131504127, + "loss": 1.493, + "step": 3003 + }, + { + "epoch": 0.3168776371308017, + "grad_norm": 0.6864051818847656, + "learning_rate": 0.0011716352067813914, + "loss": 1.568, + "step": 3004 + }, + { + "epoch": 0.3169831223628692, + "grad_norm": 0.642543613910675, + "learning_rate": 0.0011714275531660423, + "loss": 1.5393, + "step": 3005 + }, + { + "epoch": 0.31708860759493673, + "grad_norm": 0.6677579879760742, + "learning_rate": 0.0011712198523276347, + "loss": 1.5488, + "step": 3006 + }, + { + "epoch": 0.3171940928270042, + "grad_norm": 0.6589192748069763, + "learning_rate": 0.0011710121042894425, + "loss": 1.5272, + "step": 3007 + }, + { + "epoch": 0.3172995780590717, + "grad_norm": 0.7291915416717529, + "learning_rate": 0.0011708043090747442, + "loss": 1.5302, + "step": 3008 + }, + { + "epoch": 0.31740506329113927, + "grad_norm": 0.9251818656921387, + "learning_rate": 0.001170596466706825, + "loss": 1.5201, + "step": 3009 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.6675481796264648, + "learning_rate": 0.0011703885772089743, + "loss": 1.531, + "step": 3010 + }, + { + "epoch": 0.31761603375527425, + "grad_norm": 1.004442811012268, + "learning_rate": 0.0011701806406044875, + "loss": 1.5131, + "step": 3011 + }, + { + "epoch": 0.31772151898734174, + "grad_norm": 0.9261484146118164, + "learning_rate": 0.0011699726569166643, + "loss": 1.5305, + "step": 3012 + }, + { + "epoch": 0.3178270042194093, + "grad_norm": 0.9527552723884583, + "learning_rate": 0.0011697646261688108, + "loss": 1.4981, + "step": 3013 + }, + { + "epoch": 0.3179324894514768, + "grad_norm": 1.3757954835891724, + "learning_rate": 0.0011695565483842382, + "loss": 1.5376, + "step": 3014 + }, + { + "epoch": 0.3180379746835443, + "grad_norm": 1.1330909729003906, + "learning_rate": 0.001169348423586262, + "loss": 1.558, + "step": 3015 + }, + { + "epoch": 0.3181434599156118, + "grad_norm": 1.473752737045288, + "learning_rate": 0.0011691402517982038, + "loss": 1.541, + "step": 3016 + }, + { + "epoch": 0.3182489451476793, + "grad_norm": 1.1968939304351807, + "learning_rate": 0.0011689320330433904, + "loss": 1.5764, + "step": 3017 + }, + { + "epoch": 0.3183544303797468, + "grad_norm": 1.1947311162948608, + "learning_rate": 0.0011687237673451538, + "loss": 1.5578, + "step": 3018 + }, + { + "epoch": 0.31845991561181436, + "grad_norm": 0.8334040641784668, + "learning_rate": 0.0011685154547268312, + "loss": 1.5249, + "step": 3019 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.9519461989402771, + "learning_rate": 0.0011683070952117646, + "loss": 1.5178, + "step": 3020 + }, + { + "epoch": 0.31867088607594934, + "grad_norm": 0.8182008266448975, + "learning_rate": 0.0011680986888233024, + "loss": 1.5004, + "step": 3021 + }, + { + "epoch": 0.3187763713080169, + "grad_norm": 1.406956672668457, + "learning_rate": 0.0011678902355847973, + "loss": 1.5364, + "step": 3022 + }, + { + "epoch": 0.3188818565400844, + "grad_norm": 0.7083514928817749, + "learning_rate": 0.0011676817355196075, + "loss": 1.4858, + "step": 3023 + }, + { + "epoch": 0.3189873417721519, + "grad_norm": 0.8838760256767273, + "learning_rate": 0.0011674731886510967, + "loss": 1.522, + "step": 3024 + }, + { + "epoch": 0.3190928270042194, + "grad_norm": 0.6882526874542236, + "learning_rate": 0.0011672645950026332, + "loss": 1.4856, + "step": 3025 + }, + { + "epoch": 0.3191983122362869, + "grad_norm": 0.9379214644432068, + "learning_rate": 0.001167055954597591, + "loss": 1.5393, + "step": 3026 + }, + { + "epoch": 0.3193037974683544, + "grad_norm": 0.8876725435256958, + "learning_rate": 0.0011668472674593497, + "loss": 1.5265, + "step": 3027 + }, + { + "epoch": 0.31940928270042196, + "grad_norm": 0.6570401787757874, + "learning_rate": 0.0011666385336112934, + "loss": 1.5395, + "step": 3028 + }, + { + "epoch": 0.31951476793248945, + "grad_norm": 0.7354498505592346, + "learning_rate": 0.0011664297530768117, + "loss": 1.5014, + "step": 3029 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6717156767845154, + "learning_rate": 0.0011662209258792998, + "loss": 1.5643, + "step": 3030 + }, + { + "epoch": 0.3197257383966245, + "grad_norm": 0.7264850735664368, + "learning_rate": 0.0011660120520421578, + "loss": 1.5315, + "step": 3031 + }, + { + "epoch": 0.319831223628692, + "grad_norm": 0.680975079536438, + "learning_rate": 0.0011658031315887908, + "loss": 1.5181, + "step": 3032 + }, + { + "epoch": 0.3199367088607595, + "grad_norm": 0.7121829390525818, + "learning_rate": 0.0011655941645426096, + "loss": 1.5045, + "step": 3033 + }, + { + "epoch": 0.320042194092827, + "grad_norm": 0.7940396070480347, + "learning_rate": 0.00116538515092703, + "loss": 1.5596, + "step": 3034 + }, + { + "epoch": 0.3201476793248945, + "grad_norm": 0.6544436812400818, + "learning_rate": 0.0011651760907654728, + "loss": 1.5148, + "step": 3035 + }, + { + "epoch": 0.320253164556962, + "grad_norm": 0.9960066676139832, + "learning_rate": 0.0011649669840813645, + "loss": 1.5798, + "step": 3036 + }, + { + "epoch": 0.32035864978902956, + "grad_norm": 0.6388936638832092, + "learning_rate": 0.0011647578308981363, + "loss": 1.5479, + "step": 3037 + }, + { + "epoch": 0.32046413502109705, + "grad_norm": 0.8911318182945251, + "learning_rate": 0.001164548631239225, + "loss": 1.4932, + "step": 3038 + }, + { + "epoch": 0.32056962025316454, + "grad_norm": 0.6385089755058289, + "learning_rate": 0.0011643393851280724, + "loss": 1.5132, + "step": 3039 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.8522648215293884, + "learning_rate": 0.0011641300925881257, + "loss": 1.5034, + "step": 3040 + }, + { + "epoch": 0.3207805907172996, + "grad_norm": 0.9062016606330872, + "learning_rate": 0.001163920753642837, + "loss": 1.5197, + "step": 3041 + }, + { + "epoch": 0.3208860759493671, + "grad_norm": 0.6526281237602234, + "learning_rate": 0.001163711368315664, + "loss": 1.5132, + "step": 3042 + }, + { + "epoch": 0.3209915611814346, + "grad_norm": 0.9810452461242676, + "learning_rate": 0.001163501936630069, + "loss": 1.5474, + "step": 3043 + }, + { + "epoch": 0.3210970464135021, + "grad_norm": 0.6497313976287842, + "learning_rate": 0.0011632924586095204, + "loss": 1.5398, + "step": 3044 + }, + { + "epoch": 0.3212025316455696, + "grad_norm": 0.9879642724990845, + "learning_rate": 0.0011630829342774906, + "loss": 1.5307, + "step": 3045 + }, + { + "epoch": 0.3213080168776371, + "grad_norm": 0.8241793513298035, + "learning_rate": 0.0011628733636574586, + "loss": 1.5639, + "step": 3046 + }, + { + "epoch": 0.32141350210970465, + "grad_norm": 0.74004727602005, + "learning_rate": 0.0011626637467729072, + "loss": 1.4833, + "step": 3047 + }, + { + "epoch": 0.32151898734177214, + "grad_norm": 0.690866231918335, + "learning_rate": 0.0011624540836473252, + "loss": 1.5083, + "step": 3048 + }, + { + "epoch": 0.32162447257383964, + "grad_norm": 0.87788325548172, + "learning_rate": 0.0011622443743042065, + "loss": 1.5035, + "step": 3049 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.9866598844528198, + "learning_rate": 0.0011620346187670501, + "loss": 1.5341, + "step": 3050 + }, + { + "epoch": 0.3218354430379747, + "grad_norm": 0.6281954050064087, + "learning_rate": 0.0011618248170593597, + "loss": 1.5144, + "step": 3051 + }, + { + "epoch": 0.32194092827004217, + "grad_norm": 0.845039963722229, + "learning_rate": 0.0011616149692046454, + "loss": 1.5094, + "step": 3052 + }, + { + "epoch": 0.3220464135021097, + "grad_norm": 0.6384650468826294, + "learning_rate": 0.0011614050752264216, + "loss": 1.5432, + "step": 3053 + }, + { + "epoch": 0.3221518987341772, + "grad_norm": 0.9113030433654785, + "learning_rate": 0.0011611951351482071, + "loss": 1.5288, + "step": 3054 + }, + { + "epoch": 0.3222573839662447, + "grad_norm": 0.7432407736778259, + "learning_rate": 0.0011609851489935274, + "loss": 1.5582, + "step": 3055 + }, + { + "epoch": 0.32236286919831225, + "grad_norm": 0.8016780614852905, + "learning_rate": 0.0011607751167859125, + "loss": 1.5486, + "step": 3056 + }, + { + "epoch": 0.32246835443037974, + "grad_norm": 1.1103399991989136, + "learning_rate": 0.0011605650385488977, + "loss": 1.5399, + "step": 3057 + }, + { + "epoch": 0.32257383966244724, + "grad_norm": 0.6855688095092773, + "learning_rate": 0.0011603549143060225, + "loss": 1.5315, + "step": 3058 + }, + { + "epoch": 0.3226793248945148, + "grad_norm": 1.0352356433868408, + "learning_rate": 0.0011601447440808335, + "loss": 1.54, + "step": 3059 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.666873037815094, + "learning_rate": 0.0011599345278968806, + "loss": 1.5469, + "step": 3060 + }, + { + "epoch": 0.32289029535864977, + "grad_norm": 0.8261487483978271, + "learning_rate": 0.0011597242657777195, + "loss": 1.5161, + "step": 3061 + }, + { + "epoch": 0.3229957805907173, + "grad_norm": 0.6790077090263367, + "learning_rate": 0.0011595139577469115, + "loss": 1.4993, + "step": 3062 + }, + { + "epoch": 0.3231012658227848, + "grad_norm": 0.7003688812255859, + "learning_rate": 0.0011593036038280225, + "loss": 1.5279, + "step": 3063 + }, + { + "epoch": 0.3232067510548523, + "grad_norm": 0.5939058065414429, + "learning_rate": 0.0011590932040446236, + "loss": 1.533, + "step": 3064 + }, + { + "epoch": 0.32331223628691985, + "grad_norm": 0.7258656024932861, + "learning_rate": 0.0011588827584202914, + "loss": 1.5014, + "step": 3065 + }, + { + "epoch": 0.32341772151898734, + "grad_norm": 0.8545254468917847, + "learning_rate": 0.0011586722669786073, + "loss": 1.5266, + "step": 3066 + }, + { + "epoch": 0.32352320675105484, + "grad_norm": 0.6134364008903503, + "learning_rate": 0.0011584617297431578, + "loss": 1.5397, + "step": 3067 + }, + { + "epoch": 0.3236286919831224, + "grad_norm": 0.7200748324394226, + "learning_rate": 0.0011582511467375346, + "loss": 1.5201, + "step": 3068 + }, + { + "epoch": 0.3237341772151899, + "grad_norm": 0.6435648798942566, + "learning_rate": 0.001158040517985335, + "loss": 1.5299, + "step": 3069 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.7155391573905945, + "learning_rate": 0.0011578298435101604, + "loss": 1.5466, + "step": 3070 + }, + { + "epoch": 0.3239451476793249, + "grad_norm": 0.6732375621795654, + "learning_rate": 0.0011576191233356181, + "loss": 1.5641, + "step": 3071 + }, + { + "epoch": 0.3240506329113924, + "grad_norm": 0.7379866242408752, + "learning_rate": 0.0011574083574853208, + "loss": 1.4744, + "step": 3072 + }, + { + "epoch": 0.3241561181434599, + "grad_norm": 0.8322383165359497, + "learning_rate": 0.0011571975459828852, + "loss": 1.5256, + "step": 3073 + }, + { + "epoch": 0.32426160337552745, + "grad_norm": 0.6318680644035339, + "learning_rate": 0.0011569866888519343, + "loss": 1.5387, + "step": 3074 + }, + { + "epoch": 0.32436708860759494, + "grad_norm": 0.8920484185218811, + "learning_rate": 0.0011567757861160955, + "loss": 1.4935, + "step": 3075 + }, + { + "epoch": 0.32447257383966244, + "grad_norm": 0.7535182237625122, + "learning_rate": 0.0011565648377990017, + "loss": 1.495, + "step": 3076 + }, + { + "epoch": 0.32457805907173, + "grad_norm": 0.6805022954940796, + "learning_rate": 0.0011563538439242902, + "loss": 1.499, + "step": 3077 + }, + { + "epoch": 0.3246835443037975, + "grad_norm": 0.9398968815803528, + "learning_rate": 0.0011561428045156043, + "loss": 1.5054, + "step": 3078 + }, + { + "epoch": 0.32478902953586497, + "grad_norm": 0.6677874326705933, + "learning_rate": 0.001155931719596592, + "loss": 1.5161, + "step": 3079 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.7520086765289307, + "learning_rate": 0.0011557205891909062, + "loss": 1.565, + "step": 3080 + }, + { + "epoch": 0.325, + "grad_norm": 0.7382199168205261, + "learning_rate": 0.0011555094133222053, + "loss": 1.4747, + "step": 3081 + }, + { + "epoch": 0.3251054852320675, + "grad_norm": 0.8241503834724426, + "learning_rate": 0.0011552981920141528, + "loss": 1.5346, + "step": 3082 + }, + { + "epoch": 0.325210970464135, + "grad_norm": 0.7098329663276672, + "learning_rate": 0.0011550869252904166, + "loss": 1.5359, + "step": 3083 + }, + { + "epoch": 0.32531645569620254, + "grad_norm": 0.668655514717102, + "learning_rate": 0.0011548756131746706, + "loss": 1.5421, + "step": 3084 + }, + { + "epoch": 0.32542194092827004, + "grad_norm": 0.6569998264312744, + "learning_rate": 0.0011546642556905934, + "loss": 1.5468, + "step": 3085 + }, + { + "epoch": 0.32552742616033753, + "grad_norm": 0.7064712643623352, + "learning_rate": 0.0011544528528618682, + "loss": 1.5336, + "step": 3086 + }, + { + "epoch": 0.3256329113924051, + "grad_norm": 0.6862848401069641, + "learning_rate": 0.0011542414047121842, + "loss": 1.5894, + "step": 3087 + }, + { + "epoch": 0.32573839662447257, + "grad_norm": 0.6644270420074463, + "learning_rate": 0.0011540299112652351, + "loss": 1.5145, + "step": 3088 + }, + { + "epoch": 0.32584388185654006, + "grad_norm": 0.6375104784965515, + "learning_rate": 0.00115381837254472, + "loss": 1.5315, + "step": 3089 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.7043631076812744, + "learning_rate": 0.0011536067885743423, + "loss": 1.5582, + "step": 3090 + }, + { + "epoch": 0.3260548523206751, + "grad_norm": 0.7059652209281921, + "learning_rate": 0.0011533951593778115, + "loss": 1.552, + "step": 3091 + }, + { + "epoch": 0.3261603375527426, + "grad_norm": 0.6409168243408203, + "learning_rate": 0.0011531834849788417, + "loss": 1.5048, + "step": 3092 + }, + { + "epoch": 0.32626582278481014, + "grad_norm": 0.6428069472312927, + "learning_rate": 0.0011529717654011518, + "loss": 1.4909, + "step": 3093 + }, + { + "epoch": 0.32637130801687764, + "grad_norm": 0.6800059676170349, + "learning_rate": 0.001152760000668466, + "loss": 1.5116, + "step": 3094 + }, + { + "epoch": 0.32647679324894513, + "grad_norm": 0.6632084250450134, + "learning_rate": 0.001152548190804514, + "loss": 1.5167, + "step": 3095 + }, + { + "epoch": 0.3265822784810127, + "grad_norm": 0.6947028040885925, + "learning_rate": 0.0011523363358330301, + "loss": 1.5175, + "step": 3096 + }, + { + "epoch": 0.32668776371308017, + "grad_norm": 0.7076581716537476, + "learning_rate": 0.0011521244357777533, + "loss": 1.5063, + "step": 3097 + }, + { + "epoch": 0.32679324894514766, + "grad_norm": 0.7735928297042847, + "learning_rate": 0.0011519124906624284, + "loss": 1.5521, + "step": 3098 + }, + { + "epoch": 0.3268987341772152, + "grad_norm": 0.9707397222518921, + "learning_rate": 0.0011517005005108048, + "loss": 1.5082, + "step": 3099 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.6052380204200745, + "learning_rate": 0.001151488465346637, + "loss": 1.5251, + "step": 3100 + }, + { + "epoch": 0.3271097046413502, + "grad_norm": 0.9250460863113403, + "learning_rate": 0.0011512763851936848, + "loss": 1.5277, + "step": 3101 + }, + { + "epoch": 0.32721518987341774, + "grad_norm": 0.6434914469718933, + "learning_rate": 0.0011510642600757123, + "loss": 1.5411, + "step": 3102 + }, + { + "epoch": 0.32732067510548524, + "grad_norm": 1.0183025598526, + "learning_rate": 0.00115085209001649, + "loss": 1.5454, + "step": 3103 + }, + { + "epoch": 0.32742616033755273, + "grad_norm": 0.6917226314544678, + "learning_rate": 0.0011506398750397919, + "loss": 1.5063, + "step": 3104 + }, + { + "epoch": 0.3275316455696203, + "grad_norm": 0.8920472860336304, + "learning_rate": 0.0011504276151693984, + "loss": 1.5221, + "step": 3105 + }, + { + "epoch": 0.32763713080168777, + "grad_norm": 0.8609181046485901, + "learning_rate": 0.0011502153104290937, + "loss": 1.5374, + "step": 3106 + }, + { + "epoch": 0.32774261603375526, + "grad_norm": 0.6118366718292236, + "learning_rate": 0.0011500029608426676, + "loss": 1.5032, + "step": 3107 + }, + { + "epoch": 0.3278481012658228, + "grad_norm": 0.7345561385154724, + "learning_rate": 0.0011497905664339153, + "loss": 1.5075, + "step": 3108 + }, + { + "epoch": 0.3279535864978903, + "grad_norm": 0.633423924446106, + "learning_rate": 0.0011495781272266366, + "loss": 1.5322, + "step": 3109 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.8255857825279236, + "learning_rate": 0.0011493656432446362, + "loss": 1.5228, + "step": 3110 + }, + { + "epoch": 0.3281645569620253, + "grad_norm": 0.9369515776634216, + "learning_rate": 0.0011491531145117243, + "loss": 1.544, + "step": 3111 + }, + { + "epoch": 0.32827004219409284, + "grad_norm": 0.6398832201957703, + "learning_rate": 0.0011489405410517151, + "loss": 1.5256, + "step": 3112 + }, + { + "epoch": 0.32837552742616033, + "grad_norm": 0.6369377970695496, + "learning_rate": 0.0011487279228884293, + "loss": 1.5168, + "step": 3113 + }, + { + "epoch": 0.3284810126582278, + "grad_norm": 0.6952142715454102, + "learning_rate": 0.0011485152600456913, + "loss": 1.5321, + "step": 3114 + }, + { + "epoch": 0.32858649789029537, + "grad_norm": 0.6936406493186951, + "learning_rate": 0.0011483025525473314, + "loss": 1.5126, + "step": 3115 + }, + { + "epoch": 0.32869198312236286, + "grad_norm": 0.6725615859031677, + "learning_rate": 0.001148089800417184, + "loss": 1.4955, + "step": 3116 + }, + { + "epoch": 0.32879746835443036, + "grad_norm": 0.7122244238853455, + "learning_rate": 0.00114787700367909, + "loss": 1.5232, + "step": 3117 + }, + { + "epoch": 0.3289029535864979, + "grad_norm": 0.7256622910499573, + "learning_rate": 0.0011476641623568934, + "loss": 1.5009, + "step": 3118 + }, + { + "epoch": 0.3290084388185654, + "grad_norm": 0.6664836406707764, + "learning_rate": 0.0011474512764744445, + "loss": 1.5229, + "step": 3119 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.7585827708244324, + "learning_rate": 0.0011472383460555983, + "loss": 1.574, + "step": 3120 + }, + { + "epoch": 0.32921940928270044, + "grad_norm": 0.779174268245697, + "learning_rate": 0.0011470253711242146, + "loss": 1.5131, + "step": 3121 + }, + { + "epoch": 0.32932489451476793, + "grad_norm": 0.6387424468994141, + "learning_rate": 0.001146812351704158, + "loss": 1.5139, + "step": 3122 + }, + { + "epoch": 0.3294303797468354, + "grad_norm": 0.7719461917877197, + "learning_rate": 0.001146599287819299, + "loss": 1.581, + "step": 3123 + }, + { + "epoch": 0.32953586497890297, + "grad_norm": 0.7411165237426758, + "learning_rate": 0.0011463861794935122, + "loss": 1.5059, + "step": 3124 + }, + { + "epoch": 0.32964135021097046, + "grad_norm": 0.6181056499481201, + "learning_rate": 0.0011461730267506775, + "loss": 1.553, + "step": 3125 + }, + { + "epoch": 0.32974683544303796, + "grad_norm": 0.627004861831665, + "learning_rate": 0.0011459598296146795, + "loss": 1.5401, + "step": 3126 + }, + { + "epoch": 0.3298523206751055, + "grad_norm": 0.9103903770446777, + "learning_rate": 0.001145746588109408, + "loss": 1.533, + "step": 3127 + }, + { + "epoch": 0.329957805907173, + "grad_norm": 0.8644546270370483, + "learning_rate": 0.0011455333022587582, + "loss": 1.5268, + "step": 3128 + }, + { + "epoch": 0.3300632911392405, + "grad_norm": 0.6427549123764038, + "learning_rate": 0.0011453199720866296, + "loss": 1.5512, + "step": 3129 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.7117038369178772, + "learning_rate": 0.001145106597616927, + "loss": 1.5471, + "step": 3130 + }, + { + "epoch": 0.33027426160337553, + "grad_norm": 0.6877314448356628, + "learning_rate": 0.0011448931788735595, + "loss": 1.5104, + "step": 3131 + }, + { + "epoch": 0.330379746835443, + "grad_norm": 0.7401301860809326, + "learning_rate": 0.0011446797158804426, + "loss": 1.5405, + "step": 3132 + }, + { + "epoch": 0.33048523206751057, + "grad_norm": 0.8072726726531982, + "learning_rate": 0.0011444662086614952, + "loss": 1.477, + "step": 3133 + }, + { + "epoch": 0.33059071729957806, + "grad_norm": 0.7187771797180176, + "learning_rate": 0.0011442526572406422, + "loss": 1.5571, + "step": 3134 + }, + { + "epoch": 0.33069620253164556, + "grad_norm": 0.6380051970481873, + "learning_rate": 0.001144039061641813, + "loss": 1.4846, + "step": 3135 + }, + { + "epoch": 0.3308016877637131, + "grad_norm": 0.7608206272125244, + "learning_rate": 0.0011438254218889422, + "loss": 1.497, + "step": 3136 + }, + { + "epoch": 0.3309071729957806, + "grad_norm": 0.7010186910629272, + "learning_rate": 0.0011436117380059692, + "loss": 1.4847, + "step": 3137 + }, + { + "epoch": 0.3310126582278481, + "grad_norm": 0.9077845215797424, + "learning_rate": 0.0011433980100168382, + "loss": 1.5479, + "step": 3138 + }, + { + "epoch": 0.33111814345991564, + "grad_norm": 1.2812285423278809, + "learning_rate": 0.0011431842379454982, + "loss": 1.547, + "step": 3139 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.657632052898407, + "learning_rate": 0.001142970421815904, + "loss": 1.514, + "step": 3140 + }, + { + "epoch": 0.3313291139240506, + "grad_norm": 0.7112274169921875, + "learning_rate": 0.0011427565616520144, + "loss": 1.5526, + "step": 3141 + }, + { + "epoch": 0.33143459915611817, + "grad_norm": 0.7432538270950317, + "learning_rate": 0.0011425426574777936, + "loss": 1.4965, + "step": 3142 + }, + { + "epoch": 0.33154008438818566, + "grad_norm": 0.895258903503418, + "learning_rate": 0.0011423287093172106, + "loss": 1.537, + "step": 3143 + }, + { + "epoch": 0.33164556962025316, + "grad_norm": 0.6665063500404358, + "learning_rate": 0.0011421147171942398, + "loss": 1.5423, + "step": 3144 + }, + { + "epoch": 0.33175105485232065, + "grad_norm": 0.8400756120681763, + "learning_rate": 0.0011419006811328593, + "loss": 1.5463, + "step": 3145 + }, + { + "epoch": 0.3318565400843882, + "grad_norm": 0.6335294246673584, + "learning_rate": 0.0011416866011570534, + "loss": 1.5096, + "step": 3146 + }, + { + "epoch": 0.3319620253164557, + "grad_norm": 0.8134473562240601, + "learning_rate": 0.0011414724772908105, + "loss": 1.5011, + "step": 3147 + }, + { + "epoch": 0.3320675105485232, + "grad_norm": 0.6803396940231323, + "learning_rate": 0.0011412583095581248, + "loss": 1.5133, + "step": 3148 + }, + { + "epoch": 0.33217299578059073, + "grad_norm": 1.0194330215454102, + "learning_rate": 0.0011410440979829942, + "loss": 1.5079, + "step": 3149 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.7299574613571167, + "learning_rate": 0.0011408298425894226, + "loss": 1.5094, + "step": 3150 + }, + { + "epoch": 0.3323839662447257, + "grad_norm": 0.9955706000328064, + "learning_rate": 0.0011406155434014185, + "loss": 1.524, + "step": 3151 + }, + { + "epoch": 0.33248945147679326, + "grad_norm": 1.0375969409942627, + "learning_rate": 0.0011404012004429948, + "loss": 1.5171, + "step": 3152 + }, + { + "epoch": 0.33259493670886076, + "grad_norm": 0.7220268845558167, + "learning_rate": 0.00114018681373817, + "loss": 1.5338, + "step": 3153 + }, + { + "epoch": 0.33270042194092825, + "grad_norm": 0.6943667531013489, + "learning_rate": 0.001139972383310967, + "loss": 1.5235, + "step": 3154 + }, + { + "epoch": 0.3328059071729958, + "grad_norm": 0.6873776912689209, + "learning_rate": 0.0011397579091854137, + "loss": 1.5241, + "step": 3155 + }, + { + "epoch": 0.3329113924050633, + "grad_norm": 0.6469470262527466, + "learning_rate": 0.0011395433913855434, + "loss": 1.5187, + "step": 3156 + }, + { + "epoch": 0.3330168776371308, + "grad_norm": 0.637839674949646, + "learning_rate": 0.0011393288299353934, + "loss": 1.5162, + "step": 3157 + }, + { + "epoch": 0.33312236286919833, + "grad_norm": 0.7291017770767212, + "learning_rate": 0.001139114224859007, + "loss": 1.5013, + "step": 3158 + }, + { + "epoch": 0.3332278481012658, + "grad_norm": 0.7359235286712646, + "learning_rate": 0.0011388995761804311, + "loss": 1.5403, + "step": 3159 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.6520144939422607, + "learning_rate": 0.0011386848839237186, + "loss": 1.5271, + "step": 3160 + }, + { + "epoch": 0.33343881856540086, + "grad_norm": 0.7447418570518494, + "learning_rate": 0.0011384701481129266, + "loss": 1.5254, + "step": 3161 + }, + { + "epoch": 0.33354430379746836, + "grad_norm": 0.8279687166213989, + "learning_rate": 0.0011382553687721174, + "loss": 1.5242, + "step": 3162 + }, + { + "epoch": 0.33364978902953585, + "grad_norm": 0.7678663730621338, + "learning_rate": 0.0011380405459253582, + "loss": 1.5246, + "step": 3163 + }, + { + "epoch": 0.3337552742616034, + "grad_norm": 0.7631957530975342, + "learning_rate": 0.0011378256795967208, + "loss": 1.5274, + "step": 3164 + }, + { + "epoch": 0.3338607594936709, + "grad_norm": 0.937030017375946, + "learning_rate": 0.0011376107698102822, + "loss": 1.5192, + "step": 3165 + }, + { + "epoch": 0.3339662447257384, + "grad_norm": 0.7017852067947388, + "learning_rate": 0.001137395816590124, + "loss": 1.5667, + "step": 3166 + }, + { + "epoch": 0.33407172995780593, + "grad_norm": 0.7834508419036865, + "learning_rate": 0.001137180819960333, + "loss": 1.4983, + "step": 3167 + }, + { + "epoch": 0.3341772151898734, + "grad_norm": 0.8663356900215149, + "learning_rate": 0.0011369657799450005, + "loss": 1.5365, + "step": 3168 + }, + { + "epoch": 0.3342827004219409, + "grad_norm": 0.7620875835418701, + "learning_rate": 0.0011367506965682225, + "loss": 1.5423, + "step": 3169 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.6700520515441895, + "learning_rate": 0.0011365355698541005, + "loss": 1.4819, + "step": 3170 + }, + { + "epoch": 0.33449367088607596, + "grad_norm": 0.6616265177726746, + "learning_rate": 0.0011363203998267406, + "loss": 1.5175, + "step": 3171 + }, + { + "epoch": 0.33459915611814345, + "grad_norm": 0.8500560522079468, + "learning_rate": 0.0011361051865102533, + "loss": 1.502, + "step": 3172 + }, + { + "epoch": 0.334704641350211, + "grad_norm": 0.8502007722854614, + "learning_rate": 0.0011358899299287546, + "loss": 1.5269, + "step": 3173 + }, + { + "epoch": 0.3348101265822785, + "grad_norm": 0.7802116274833679, + "learning_rate": 0.0011356746301063652, + "loss": 1.5333, + "step": 3174 + }, + { + "epoch": 0.334915611814346, + "grad_norm": 0.7409927248954773, + "learning_rate": 0.0011354592870672104, + "loss": 1.5197, + "step": 3175 + }, + { + "epoch": 0.33502109704641353, + "grad_norm": 0.6940266489982605, + "learning_rate": 0.0011352439008354201, + "loss": 1.5219, + "step": 3176 + }, + { + "epoch": 0.335126582278481, + "grad_norm": 0.6581469178199768, + "learning_rate": 0.0011350284714351298, + "loss": 1.523, + "step": 3177 + }, + { + "epoch": 0.3352320675105485, + "grad_norm": 0.6531473994255066, + "learning_rate": 0.0011348129988904797, + "loss": 1.5139, + "step": 3178 + }, + { + "epoch": 0.335337552742616, + "grad_norm": 0.6892757415771484, + "learning_rate": 0.0011345974832256138, + "loss": 1.479, + "step": 3179 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.7634063959121704, + "learning_rate": 0.0011343819244646824, + "loss": 1.5595, + "step": 3180 + }, + { + "epoch": 0.33554852320675105, + "grad_norm": 0.9249230623245239, + "learning_rate": 0.0011341663226318395, + "loss": 1.4909, + "step": 3181 + }, + { + "epoch": 0.33565400843881854, + "grad_norm": 1.0548282861709595, + "learning_rate": 0.0011339506777512446, + "loss": 1.4995, + "step": 3182 + }, + { + "epoch": 0.3357594936708861, + "grad_norm": 0.7220276594161987, + "learning_rate": 0.0011337349898470617, + "loss": 1.5194, + "step": 3183 + }, + { + "epoch": 0.3358649789029536, + "grad_norm": 0.7691535353660583, + "learning_rate": 0.0011335192589434597, + "loss": 1.5268, + "step": 3184 + }, + { + "epoch": 0.3359704641350211, + "grad_norm": 0.7303735613822937, + "learning_rate": 0.0011333034850646124, + "loss": 1.5251, + "step": 3185 + }, + { + "epoch": 0.3360759493670886, + "grad_norm": 0.679295539855957, + "learning_rate": 0.0011330876682346981, + "loss": 1.5163, + "step": 3186 + }, + { + "epoch": 0.3361814345991561, + "grad_norm": 0.7102896571159363, + "learning_rate": 0.0011328718084779004, + "loss": 1.5346, + "step": 3187 + }, + { + "epoch": 0.3362869198312236, + "grad_norm": 0.7258500456809998, + "learning_rate": 0.0011326559058184075, + "loss": 1.5279, + "step": 3188 + }, + { + "epoch": 0.33639240506329116, + "grad_norm": 0.6863203644752502, + "learning_rate": 0.001132439960280412, + "loss": 1.4953, + "step": 3189 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.7879579067230225, + "learning_rate": 0.001132223971888112, + "loss": 1.5462, + "step": 3190 + }, + { + "epoch": 0.33660337552742614, + "grad_norm": 0.6945745348930359, + "learning_rate": 0.0011320079406657102, + "loss": 1.5143, + "step": 3191 + }, + { + "epoch": 0.3367088607594937, + "grad_norm": 0.8093021512031555, + "learning_rate": 0.0011317918666374138, + "loss": 1.495, + "step": 3192 + }, + { + "epoch": 0.3368143459915612, + "grad_norm": 0.757619321346283, + "learning_rate": 0.0011315757498274349, + "loss": 1.5656, + "step": 3193 + }, + { + "epoch": 0.3369198312236287, + "grad_norm": 0.7153559327125549, + "learning_rate": 0.0011313595902599904, + "loss": 1.5435, + "step": 3194 + }, + { + "epoch": 0.3370253164556962, + "grad_norm": 0.7309967279434204, + "learning_rate": 0.0011311433879593023, + "loss": 1.4849, + "step": 3195 + }, + { + "epoch": 0.3371308016877637, + "grad_norm": 0.7512174248695374, + "learning_rate": 0.001130927142949597, + "loss": 1.551, + "step": 3196 + }, + { + "epoch": 0.3372362869198312, + "grad_norm": 0.6642071008682251, + "learning_rate": 0.001130710855255106, + "loss": 1.4597, + "step": 3197 + }, + { + "epoch": 0.33734177215189876, + "grad_norm": 0.8273674249649048, + "learning_rate": 0.001130494524900065, + "loss": 1.5252, + "step": 3198 + }, + { + "epoch": 0.33744725738396625, + "grad_norm": 0.837226390838623, + "learning_rate": 0.0011302781519087154, + "loss": 1.4819, + "step": 3199 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.688612163066864, + "learning_rate": 0.0011300617363053024, + "loss": 1.5178, + "step": 3200 + }, + { + "epoch": 0.3376582278481013, + "grad_norm": 0.6819198131561279, + "learning_rate": 0.0011298452781140769, + "loss": 1.5193, + "step": 3201 + }, + { + "epoch": 0.3377637130801688, + "grad_norm": 0.7897123694419861, + "learning_rate": 0.0011296287773592938, + "loss": 1.5444, + "step": 3202 + }, + { + "epoch": 0.3378691983122363, + "grad_norm": 0.7305850386619568, + "learning_rate": 0.0011294122340652132, + "loss": 1.5442, + "step": 3203 + }, + { + "epoch": 0.3379746835443038, + "grad_norm": 0.7197477221488953, + "learning_rate": 0.0011291956482561, + "loss": 1.5282, + "step": 3204 + }, + { + "epoch": 0.3380801687763713, + "grad_norm": 0.624173641204834, + "learning_rate": 0.0011289790199562233, + "loss": 1.5346, + "step": 3205 + }, + { + "epoch": 0.3381856540084388, + "grad_norm": 0.687995195388794, + "learning_rate": 0.001128762349189858, + "loss": 1.537, + "step": 3206 + }, + { + "epoch": 0.33829113924050636, + "grad_norm": 0.6278294324874878, + "learning_rate": 0.0011285456359812825, + "loss": 1.5325, + "step": 3207 + }, + { + "epoch": 0.33839662447257385, + "grad_norm": 0.6732079386711121, + "learning_rate": 0.0011283288803547809, + "loss": 1.4943, + "step": 3208 + }, + { + "epoch": 0.33850210970464134, + "grad_norm": 0.777282178401947, + "learning_rate": 0.0011281120823346418, + "loss": 1.5022, + "step": 3209 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.9362792372703552, + "learning_rate": 0.0011278952419451586, + "loss": 1.5192, + "step": 3210 + }, + { + "epoch": 0.3387130801687764, + "grad_norm": 0.7324205040931702, + "learning_rate": 0.0011276783592106291, + "loss": 1.5013, + "step": 3211 + }, + { + "epoch": 0.3388185654008439, + "grad_norm": 0.6489834785461426, + "learning_rate": 0.001127461434155356, + "loss": 1.505, + "step": 3212 + }, + { + "epoch": 0.33892405063291137, + "grad_norm": 0.6262931227684021, + "learning_rate": 0.001127244466803647, + "loss": 1.5003, + "step": 3213 + }, + { + "epoch": 0.3390295358649789, + "grad_norm": 0.599428117275238, + "learning_rate": 0.0011270274571798147, + "loss": 1.5534, + "step": 3214 + }, + { + "epoch": 0.3391350210970464, + "grad_norm": 0.6495329141616821, + "learning_rate": 0.0011268104053081755, + "loss": 1.5306, + "step": 3215 + }, + { + "epoch": 0.3392405063291139, + "grad_norm": 0.6080668568611145, + "learning_rate": 0.0011265933112130516, + "loss": 1.5289, + "step": 3216 + }, + { + "epoch": 0.33934599156118145, + "grad_norm": 0.63544100522995, + "learning_rate": 0.0011263761749187693, + "loss": 1.5108, + "step": 3217 + }, + { + "epoch": 0.33945147679324894, + "grad_norm": 0.6840908527374268, + "learning_rate": 0.0011261589964496597, + "loss": 1.5267, + "step": 3218 + }, + { + "epoch": 0.33955696202531643, + "grad_norm": 0.7814382910728455, + "learning_rate": 0.001125941775830059, + "loss": 1.4971, + "step": 3219 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.7283262610435486, + "learning_rate": 0.0011257245130843077, + "loss": 1.526, + "step": 3220 + }, + { + "epoch": 0.3397679324894515, + "grad_norm": 0.6417208313941956, + "learning_rate": 0.0011255072082367512, + "loss": 1.5145, + "step": 3221 + }, + { + "epoch": 0.33987341772151897, + "grad_norm": 0.7981081008911133, + "learning_rate": 0.0011252898613117394, + "loss": 1.5276, + "step": 3222 + }, + { + "epoch": 0.3399789029535865, + "grad_norm": 0.7749520540237427, + "learning_rate": 0.0011250724723336273, + "loss": 1.5222, + "step": 3223 + }, + { + "epoch": 0.340084388185654, + "grad_norm": 0.6969601511955261, + "learning_rate": 0.0011248550413267746, + "loss": 1.4916, + "step": 3224 + }, + { + "epoch": 0.3401898734177215, + "grad_norm": 0.9640623331069946, + "learning_rate": 0.001124637568315545, + "loss": 1.5202, + "step": 3225 + }, + { + "epoch": 0.34029535864978905, + "grad_norm": 0.7094578742980957, + "learning_rate": 0.001124420053324308, + "loss": 1.4898, + "step": 3226 + }, + { + "epoch": 0.34040084388185654, + "grad_norm": 0.7188253402709961, + "learning_rate": 0.001124202496377437, + "loss": 1.4884, + "step": 3227 + }, + { + "epoch": 0.34050632911392403, + "grad_norm": 0.9028517603874207, + "learning_rate": 0.0011239848974993103, + "loss": 1.4936, + "step": 3228 + }, + { + "epoch": 0.3406118143459916, + "grad_norm": 0.6731326580047607, + "learning_rate": 0.0011237672567143107, + "loss": 1.5286, + "step": 3229 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.9405009746551514, + "learning_rate": 0.0011235495740468265, + "loss": 1.5237, + "step": 3230 + }, + { + "epoch": 0.34082278481012657, + "grad_norm": 0.7721452713012695, + "learning_rate": 0.00112333184952125, + "loss": 1.4896, + "step": 3231 + }, + { + "epoch": 0.3409282700421941, + "grad_norm": 0.8174638152122498, + "learning_rate": 0.001123114083161978, + "loss": 1.4942, + "step": 3232 + }, + { + "epoch": 0.3410337552742616, + "grad_norm": 0.9227631092071533, + "learning_rate": 0.0011228962749934123, + "loss": 1.4962, + "step": 3233 + }, + { + "epoch": 0.3411392405063291, + "grad_norm": 0.6713069677352905, + "learning_rate": 0.0011226784250399598, + "loss": 1.4834, + "step": 3234 + }, + { + "epoch": 0.34124472573839665, + "grad_norm": 1.0134245157241821, + "learning_rate": 0.0011224605333260312, + "loss": 1.4802, + "step": 3235 + }, + { + "epoch": 0.34135021097046414, + "grad_norm": 0.923168957233429, + "learning_rate": 0.0011222425998760428, + "loss": 1.5245, + "step": 3236 + }, + { + "epoch": 0.34145569620253163, + "grad_norm": 0.6957815885543823, + "learning_rate": 0.0011220246247144149, + "loss": 1.4863, + "step": 3237 + }, + { + "epoch": 0.3415611814345992, + "grad_norm": 0.7310555577278137, + "learning_rate": 0.0011218066078655725, + "loss": 1.4876, + "step": 3238 + }, + { + "epoch": 0.3416666666666667, + "grad_norm": 0.7358708381652832, + "learning_rate": 0.001121588549353946, + "loss": 1.4753, + "step": 3239 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 1.0232446193695068, + "learning_rate": 0.0011213704492039694, + "loss": 1.5194, + "step": 3240 + }, + { + "epoch": 0.3418776371308017, + "grad_norm": 0.7220911383628845, + "learning_rate": 0.0011211523074400823, + "loss": 1.5085, + "step": 3241 + }, + { + "epoch": 0.3419831223628692, + "grad_norm": 1.013839602470398, + "learning_rate": 0.0011209341240867282, + "loss": 1.5318, + "step": 3242 + }, + { + "epoch": 0.3420886075949367, + "grad_norm": 0.7730945348739624, + "learning_rate": 0.001120715899168356, + "loss": 1.4948, + "step": 3243 + }, + { + "epoch": 0.3421940928270042, + "grad_norm": 0.7804702520370483, + "learning_rate": 0.0011204976327094187, + "loss": 1.5346, + "step": 3244 + }, + { + "epoch": 0.34229957805907174, + "grad_norm": 0.7171404957771301, + "learning_rate": 0.0011202793247343742, + "loss": 1.493, + "step": 3245 + }, + { + "epoch": 0.34240506329113923, + "grad_norm": 0.693139374256134, + "learning_rate": 0.001120060975267685, + "loss": 1.5078, + "step": 3246 + }, + { + "epoch": 0.3425105485232067, + "grad_norm": 0.6848937273025513, + "learning_rate": 0.0011198425843338183, + "loss": 1.5251, + "step": 3247 + }, + { + "epoch": 0.3426160337552743, + "grad_norm": 0.7122357487678528, + "learning_rate": 0.0011196241519572457, + "loss": 1.5407, + "step": 3248 + }, + { + "epoch": 0.34272151898734177, + "grad_norm": 0.683201253414154, + "learning_rate": 0.001119405678162444, + "loss": 1.5187, + "step": 3249 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.6900715231895447, + "learning_rate": 0.001119187162973894, + "loss": 1.5336, + "step": 3250 + }, + { + "epoch": 0.3429324894514768, + "grad_norm": 0.7465255856513977, + "learning_rate": 0.0011189686064160811, + "loss": 1.494, + "step": 3251 + }, + { + "epoch": 0.3430379746835443, + "grad_norm": 1.0652883052825928, + "learning_rate": 0.001118750008513496, + "loss": 1.5455, + "step": 3252 + }, + { + "epoch": 0.3431434599156118, + "grad_norm": 0.6913561224937439, + "learning_rate": 0.0011185313692906342, + "loss": 1.5355, + "step": 3253 + }, + { + "epoch": 0.34324894514767934, + "grad_norm": 0.9687121510505676, + "learning_rate": 0.0011183126887719945, + "loss": 1.4977, + "step": 3254 + }, + { + "epoch": 0.34335443037974683, + "grad_norm": 0.8660417199134827, + "learning_rate": 0.0011180939669820813, + "loss": 1.4884, + "step": 3255 + }, + { + "epoch": 0.3434599156118143, + "grad_norm": 0.6917083263397217, + "learning_rate": 0.001117875203945404, + "loss": 1.4731, + "step": 3256 + }, + { + "epoch": 0.3435654008438819, + "grad_norm": 0.7494186758995056, + "learning_rate": 0.0011176563996864754, + "loss": 1.5388, + "step": 3257 + }, + { + "epoch": 0.34367088607594937, + "grad_norm": 0.7207738757133484, + "learning_rate": 0.0011174375542298142, + "loss": 1.4944, + "step": 3258 + }, + { + "epoch": 0.34377637130801686, + "grad_norm": 0.9842816591262817, + "learning_rate": 0.0011172186675999425, + "loss": 1.5004, + "step": 3259 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.6482349038124084, + "learning_rate": 0.001116999739821388, + "loss": 1.4928, + "step": 3260 + }, + { + "epoch": 0.3439873417721519, + "grad_norm": 0.8512858152389526, + "learning_rate": 0.0011167807709186828, + "loss": 1.5388, + "step": 3261 + }, + { + "epoch": 0.3440928270042194, + "grad_norm": 0.6863115429878235, + "learning_rate": 0.0011165617609163632, + "loss": 1.5321, + "step": 3262 + }, + { + "epoch": 0.34419831223628694, + "grad_norm": 0.7594689130783081, + "learning_rate": 0.0011163427098389706, + "loss": 1.5345, + "step": 3263 + }, + { + "epoch": 0.34430379746835443, + "grad_norm": 0.8007660508155823, + "learning_rate": 0.0011161236177110504, + "loss": 1.4604, + "step": 3264 + }, + { + "epoch": 0.3444092827004219, + "grad_norm": 0.7029464244842529, + "learning_rate": 0.0011159044845571533, + "loss": 1.5078, + "step": 3265 + }, + { + "epoch": 0.3445147679324895, + "grad_norm": 0.6528224945068359, + "learning_rate": 0.0011156853104018342, + "loss": 1.5413, + "step": 3266 + }, + { + "epoch": 0.34462025316455697, + "grad_norm": 0.7483561038970947, + "learning_rate": 0.0011154660952696525, + "loss": 1.5469, + "step": 3267 + }, + { + "epoch": 0.34472573839662446, + "grad_norm": 0.7452855110168457, + "learning_rate": 0.0011152468391851724, + "loss": 1.5321, + "step": 3268 + }, + { + "epoch": 0.344831223628692, + "grad_norm": 0.6305812001228333, + "learning_rate": 0.0011150275421729628, + "loss": 1.5297, + "step": 3269 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.778160035610199, + "learning_rate": 0.0011148082042575968, + "loss": 1.5383, + "step": 3270 + }, + { + "epoch": 0.345042194092827, + "grad_norm": 0.712450385093689, + "learning_rate": 0.0011145888254636526, + "loss": 1.4874, + "step": 3271 + }, + { + "epoch": 0.34514767932489454, + "grad_norm": 0.6904246211051941, + "learning_rate": 0.0011143694058157122, + "loss": 1.5673, + "step": 3272 + }, + { + "epoch": 0.34525316455696203, + "grad_norm": 0.6697987914085388, + "learning_rate": 0.0011141499453383632, + "loss": 1.5239, + "step": 3273 + }, + { + "epoch": 0.3453586497890295, + "grad_norm": 0.7048588395118713, + "learning_rate": 0.001113930444056197, + "loss": 1.5178, + "step": 3274 + }, + { + "epoch": 0.3454641350210971, + "grad_norm": 0.7049442529678345, + "learning_rate": 0.00111371090199381, + "loss": 1.5387, + "step": 3275 + }, + { + "epoch": 0.34556962025316457, + "grad_norm": 0.6854183077812195, + "learning_rate": 0.0011134913191758024, + "loss": 1.5454, + "step": 3276 + }, + { + "epoch": 0.34567510548523206, + "grad_norm": 0.6309434771537781, + "learning_rate": 0.00111327169562678, + "loss": 1.5256, + "step": 3277 + }, + { + "epoch": 0.34578059071729955, + "grad_norm": 0.6266121864318848, + "learning_rate": 0.0011130520313713528, + "loss": 1.477, + "step": 3278 + }, + { + "epoch": 0.3458860759493671, + "grad_norm": 0.6714829206466675, + "learning_rate": 0.0011128323264341352, + "loss": 1.5305, + "step": 3279 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.6430190801620483, + "learning_rate": 0.0011126125808397461, + "loss": 1.5231, + "step": 3280 + }, + { + "epoch": 0.3460970464135021, + "grad_norm": 0.6101608872413635, + "learning_rate": 0.0011123927946128092, + "loss": 1.4978, + "step": 3281 + }, + { + "epoch": 0.34620253164556963, + "grad_norm": 0.6927487254142761, + "learning_rate": 0.0011121729677779526, + "loss": 1.52, + "step": 3282 + }, + { + "epoch": 0.3463080168776371, + "grad_norm": 0.823867678642273, + "learning_rate": 0.001111953100359809, + "loss": 1.475, + "step": 3283 + }, + { + "epoch": 0.3464135021097046, + "grad_norm": 0.7532235383987427, + "learning_rate": 0.0011117331923830157, + "loss": 1.5337, + "step": 3284 + }, + { + "epoch": 0.34651898734177217, + "grad_norm": 0.6318394541740417, + "learning_rate": 0.0011115132438722143, + "loss": 1.5077, + "step": 3285 + }, + { + "epoch": 0.34662447257383966, + "grad_norm": 0.6647289991378784, + "learning_rate": 0.0011112932548520513, + "loss": 1.5286, + "step": 3286 + }, + { + "epoch": 0.34672995780590715, + "grad_norm": 0.6619812250137329, + "learning_rate": 0.0011110732253471777, + "loss": 1.5735, + "step": 3287 + }, + { + "epoch": 0.3468354430379747, + "grad_norm": 0.6626503467559814, + "learning_rate": 0.0011108531553822485, + "loss": 1.4777, + "step": 3288 + }, + { + "epoch": 0.3469409282700422, + "grad_norm": 0.7732320427894592, + "learning_rate": 0.001110633044981924, + "loss": 1.4807, + "step": 3289 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 1.0216768980026245, + "learning_rate": 0.0011104128941708683, + "loss": 1.4498, + "step": 3290 + }, + { + "epoch": 0.34715189873417723, + "grad_norm": 0.8629582524299622, + "learning_rate": 0.001110192702973751, + "loss": 1.4881, + "step": 3291 + }, + { + "epoch": 0.3472573839662447, + "grad_norm": 0.6401883959770203, + "learning_rate": 0.001109972471415245, + "loss": 1.5049, + "step": 3292 + }, + { + "epoch": 0.3473628691983122, + "grad_norm": 0.6863206028938293, + "learning_rate": 0.0011097521995200288, + "loss": 1.5027, + "step": 3293 + }, + { + "epoch": 0.34746835443037977, + "grad_norm": 0.6963392496109009, + "learning_rate": 0.0011095318873127844, + "loss": 1.5137, + "step": 3294 + }, + { + "epoch": 0.34757383966244726, + "grad_norm": 0.626484215259552, + "learning_rate": 0.0011093115348181995, + "loss": 1.5004, + "step": 3295 + }, + { + "epoch": 0.34767932489451475, + "grad_norm": 0.6633526086807251, + "learning_rate": 0.0011090911420609654, + "loss": 1.5562, + "step": 3296 + }, + { + "epoch": 0.3477848101265823, + "grad_norm": 0.6781042814254761, + "learning_rate": 0.0011088707090657784, + "loss": 1.5112, + "step": 3297 + }, + { + "epoch": 0.3478902953586498, + "grad_norm": 0.6743370890617371, + "learning_rate": 0.0011086502358573387, + "loss": 1.4931, + "step": 3298 + }, + { + "epoch": 0.3479957805907173, + "grad_norm": 0.6970534920692444, + "learning_rate": 0.0011084297224603517, + "loss": 1.5269, + "step": 3299 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.6344352960586548, + "learning_rate": 0.001108209168899527, + "loss": 1.5241, + "step": 3300 + }, + { + "epoch": 0.3482067510548523, + "grad_norm": 0.689744770526886, + "learning_rate": 0.0011079885751995788, + "loss": 1.5197, + "step": 3301 + }, + { + "epoch": 0.3483122362869198, + "grad_norm": 0.7785930633544922, + "learning_rate": 0.0011077679413852258, + "loss": 1.4999, + "step": 3302 + }, + { + "epoch": 0.34841772151898737, + "grad_norm": 0.8714576959609985, + "learning_rate": 0.0011075472674811908, + "loss": 1.5455, + "step": 3303 + }, + { + "epoch": 0.34852320675105486, + "grad_norm": 0.9042152762413025, + "learning_rate": 0.0011073265535122016, + "loss": 1.5218, + "step": 3304 + }, + { + "epoch": 0.34862869198312235, + "grad_norm": 0.9612825512886047, + "learning_rate": 0.0011071057995029902, + "loss": 1.5398, + "step": 3305 + }, + { + "epoch": 0.3487341772151899, + "grad_norm": 0.8131027817726135, + "learning_rate": 0.0011068850054782933, + "loss": 1.4882, + "step": 3306 + }, + { + "epoch": 0.3488396624472574, + "grad_norm": 0.6450968980789185, + "learning_rate": 0.0011066641714628522, + "loss": 1.5119, + "step": 3307 + }, + { + "epoch": 0.3489451476793249, + "grad_norm": 0.6980674266815186, + "learning_rate": 0.001106443297481412, + "loss": 1.5237, + "step": 3308 + }, + { + "epoch": 0.3490506329113924, + "grad_norm": 0.7150322794914246, + "learning_rate": 0.001106222383558723, + "loss": 1.5134, + "step": 3309 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.6315184235572815, + "learning_rate": 0.0011060014297195396, + "loss": 1.4911, + "step": 3310 + }, + { + "epoch": 0.3492616033755274, + "grad_norm": 0.7295318841934204, + "learning_rate": 0.0011057804359886209, + "loss": 1.508, + "step": 3311 + }, + { + "epoch": 0.3493670886075949, + "grad_norm": 0.6289675235748291, + "learning_rate": 0.0011055594023907302, + "loss": 1.544, + "step": 3312 + }, + { + "epoch": 0.34947257383966246, + "grad_norm": 0.772550106048584, + "learning_rate": 0.0011053383289506354, + "loss": 1.4675, + "step": 3313 + }, + { + "epoch": 0.34957805907172995, + "grad_norm": 0.7334232926368713, + "learning_rate": 0.001105117215693109, + "loss": 1.5516, + "step": 3314 + }, + { + "epoch": 0.34968354430379744, + "grad_norm": 0.7576287984848022, + "learning_rate": 0.001104896062642928, + "loss": 1.5067, + "step": 3315 + }, + { + "epoch": 0.349789029535865, + "grad_norm": 0.7954704761505127, + "learning_rate": 0.001104674869824873, + "loss": 1.4884, + "step": 3316 + }, + { + "epoch": 0.3498945147679325, + "grad_norm": 0.6958885788917542, + "learning_rate": 0.0011044536372637307, + "loss": 1.5497, + "step": 3317 + }, + { + "epoch": 0.35, + "grad_norm": 0.7231343388557434, + "learning_rate": 0.001104232364984291, + "loss": 1.5443, + "step": 3318 + }, + { + "epoch": 0.3501054852320675, + "grad_norm": 0.7422758936882019, + "learning_rate": 0.001104011053011348, + "loss": 1.5152, + "step": 3319 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.8102894425392151, + "learning_rate": 0.0011037897013697015, + "loss": 1.5214, + "step": 3320 + }, + { + "epoch": 0.3503164556962025, + "grad_norm": 0.6796724200248718, + "learning_rate": 0.0011035683100841548, + "loss": 1.4582, + "step": 3321 + }, + { + "epoch": 0.35042194092827006, + "grad_norm": 0.8239926099777222, + "learning_rate": 0.0011033468791795161, + "loss": 1.5054, + "step": 3322 + }, + { + "epoch": 0.35052742616033755, + "grad_norm": 0.7097301483154297, + "learning_rate": 0.0011031254086805973, + "loss": 1.5516, + "step": 3323 + }, + { + "epoch": 0.35063291139240504, + "grad_norm": 0.7381468415260315, + "learning_rate": 0.0011029038986122156, + "loss": 1.4875, + "step": 3324 + }, + { + "epoch": 0.3507383966244726, + "grad_norm": 0.6811791658401489, + "learning_rate": 0.0011026823489991924, + "loss": 1.5421, + "step": 3325 + }, + { + "epoch": 0.3508438818565401, + "grad_norm": 0.6773131489753723, + "learning_rate": 0.0011024607598663539, + "loss": 1.4843, + "step": 3326 + }, + { + "epoch": 0.3509493670886076, + "grad_norm": 0.7530147433280945, + "learning_rate": 0.001102239131238529, + "loss": 1.5344, + "step": 3327 + }, + { + "epoch": 0.3510548523206751, + "grad_norm": 0.7123945355415344, + "learning_rate": 0.0011020174631405533, + "loss": 1.5322, + "step": 3328 + }, + { + "epoch": 0.3511603375527426, + "grad_norm": 0.7054051756858826, + "learning_rate": 0.0011017957555972656, + "loss": 1.5236, + "step": 3329 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.6528933644294739, + "learning_rate": 0.0011015740086335092, + "loss": 1.4739, + "step": 3330 + }, + { + "epoch": 0.35137130801687766, + "grad_norm": 0.6821040511131287, + "learning_rate": 0.001101352222274132, + "loss": 1.5063, + "step": 3331 + }, + { + "epoch": 0.35147679324894515, + "grad_norm": 0.7183469533920288, + "learning_rate": 0.0011011303965439863, + "loss": 1.4954, + "step": 3332 + }, + { + "epoch": 0.35158227848101264, + "grad_norm": 0.9800889492034912, + "learning_rate": 0.0011009085314679287, + "loss": 1.5701, + "step": 3333 + }, + { + "epoch": 0.3516877637130802, + "grad_norm": 1.0010946989059448, + "learning_rate": 0.0011006866270708204, + "loss": 1.4965, + "step": 3334 + }, + { + "epoch": 0.3517932489451477, + "grad_norm": 0.7533735632896423, + "learning_rate": 0.0011004646833775269, + "loss": 1.4794, + "step": 3335 + }, + { + "epoch": 0.3518987341772152, + "grad_norm": 0.7106168866157532, + "learning_rate": 0.0011002427004129184, + "loss": 1.5146, + "step": 3336 + }, + { + "epoch": 0.3520042194092827, + "grad_norm": 0.8976284265518188, + "learning_rate": 0.0011000206782018683, + "loss": 1.5019, + "step": 3337 + }, + { + "epoch": 0.3521097046413502, + "grad_norm": 0.9021469950675964, + "learning_rate": 0.001099798616769256, + "loss": 1.5118, + "step": 3338 + }, + { + "epoch": 0.3522151898734177, + "grad_norm": 0.6628067493438721, + "learning_rate": 0.0010995765161399646, + "loss": 1.5303, + "step": 3339 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.741376519203186, + "learning_rate": 0.0010993543763388814, + "loss": 1.4926, + "step": 3340 + }, + { + "epoch": 0.35242616033755275, + "grad_norm": 0.6567178964614868, + "learning_rate": 0.0010991321973908982, + "loss": 1.4778, + "step": 3341 + }, + { + "epoch": 0.35253164556962024, + "grad_norm": 0.7075969576835632, + "learning_rate": 0.0010989099793209112, + "loss": 1.5301, + "step": 3342 + }, + { + "epoch": 0.35263713080168774, + "grad_norm": 0.8000706434249878, + "learning_rate": 0.0010986877221538214, + "loss": 1.4922, + "step": 3343 + }, + { + "epoch": 0.3527426160337553, + "grad_norm": 0.6288245916366577, + "learning_rate": 0.0010984654259145335, + "loss": 1.5261, + "step": 3344 + }, + { + "epoch": 0.3528481012658228, + "grad_norm": 0.7275409698486328, + "learning_rate": 0.0010982430906279572, + "loss": 1.5324, + "step": 3345 + }, + { + "epoch": 0.35295358649789027, + "grad_norm": 0.7566996216773987, + "learning_rate": 0.001098020716319006, + "loss": 1.5001, + "step": 3346 + }, + { + "epoch": 0.3530590717299578, + "grad_norm": 0.6528691053390503, + "learning_rate": 0.0010977983030125982, + "loss": 1.5189, + "step": 3347 + }, + { + "epoch": 0.3531645569620253, + "grad_norm": 0.6460501551628113, + "learning_rate": 0.001097575850733656, + "loss": 1.491, + "step": 3348 + }, + { + "epoch": 0.3532700421940928, + "grad_norm": 0.6378699541091919, + "learning_rate": 0.001097353359507107, + "loss": 1.4681, + "step": 3349 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.6511355638504028, + "learning_rate": 0.0010971308293578814, + "loss": 1.4906, + "step": 3350 + }, + { + "epoch": 0.35348101265822784, + "grad_norm": 0.6747869253158569, + "learning_rate": 0.0010969082603109158, + "loss": 1.4962, + "step": 3351 + }, + { + "epoch": 0.35358649789029534, + "grad_norm": 0.7929050922393799, + "learning_rate": 0.00109668565239115, + "loss": 1.5467, + "step": 3352 + }, + { + "epoch": 0.3536919831223629, + "grad_norm": 0.7363895773887634, + "learning_rate": 0.001096463005623528, + "loss": 1.501, + "step": 3353 + }, + { + "epoch": 0.3537974683544304, + "grad_norm": 0.6900444626808167, + "learning_rate": 0.0010962403200329984, + "loss": 1.468, + "step": 3354 + }, + { + "epoch": 0.35390295358649787, + "grad_norm": 0.7532036304473877, + "learning_rate": 0.0010960175956445145, + "loss": 1.523, + "step": 3355 + }, + { + "epoch": 0.3540084388185654, + "grad_norm": 0.6560195088386536, + "learning_rate": 0.0010957948324830337, + "loss": 1.4857, + "step": 3356 + }, + { + "epoch": 0.3541139240506329, + "grad_norm": 0.8880114555358887, + "learning_rate": 0.0010955720305735176, + "loss": 1.5163, + "step": 3357 + }, + { + "epoch": 0.3542194092827004, + "grad_norm": 0.9124129414558411, + "learning_rate": 0.0010953491899409321, + "loss": 1.4847, + "step": 3358 + }, + { + "epoch": 0.35432489451476795, + "grad_norm": 0.6817755699157715, + "learning_rate": 0.001095126310610248, + "loss": 1.5059, + "step": 3359 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.7906236052513123, + "learning_rate": 0.0010949033926064397, + "loss": 1.5312, + "step": 3360 + }, + { + "epoch": 0.35453586497890294, + "grad_norm": 0.64002925157547, + "learning_rate": 0.0010946804359544867, + "loss": 1.513, + "step": 3361 + }, + { + "epoch": 0.3546413502109705, + "grad_norm": 0.7213395833969116, + "learning_rate": 0.001094457440679372, + "loss": 1.5343, + "step": 3362 + }, + { + "epoch": 0.354746835443038, + "grad_norm": 0.734682559967041, + "learning_rate": 0.0010942344068060833, + "loss": 1.5078, + "step": 3363 + }, + { + "epoch": 0.35485232067510547, + "grad_norm": 0.657082200050354, + "learning_rate": 0.001094011334359613, + "loss": 1.5087, + "step": 3364 + }, + { + "epoch": 0.354957805907173, + "grad_norm": 0.712727963924408, + "learning_rate": 0.0010937882233649572, + "loss": 1.4627, + "step": 3365 + }, + { + "epoch": 0.3550632911392405, + "grad_norm": 0.6136417388916016, + "learning_rate": 0.0010935650738471167, + "loss": 1.4982, + "step": 3366 + }, + { + "epoch": 0.355168776371308, + "grad_norm": 0.6637826561927795, + "learning_rate": 0.0010933418858310965, + "loss": 1.519, + "step": 3367 + }, + { + "epoch": 0.35527426160337555, + "grad_norm": 0.659335196018219, + "learning_rate": 0.0010931186593419059, + "loss": 1.5101, + "step": 3368 + }, + { + "epoch": 0.35537974683544304, + "grad_norm": 0.6846908926963806, + "learning_rate": 0.0010928953944045585, + "loss": 1.5385, + "step": 3369 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.7758545279502869, + "learning_rate": 0.0010926720910440725, + "loss": 1.5153, + "step": 3370 + }, + { + "epoch": 0.3555907172995781, + "grad_norm": 0.7112063765525818, + "learning_rate": 0.00109244874928547, + "loss": 1.5066, + "step": 3371 + }, + { + "epoch": 0.3556962025316456, + "grad_norm": 0.6599509119987488, + "learning_rate": 0.0010922253691537773, + "loss": 1.4853, + "step": 3372 + }, + { + "epoch": 0.35580168776371307, + "grad_norm": 0.6890398859977722, + "learning_rate": 0.0010920019506740256, + "loss": 1.4837, + "step": 3373 + }, + { + "epoch": 0.35590717299578056, + "grad_norm": 0.7142712473869324, + "learning_rate": 0.00109177849387125, + "loss": 1.5165, + "step": 3374 + }, + { + "epoch": 0.3560126582278481, + "grad_norm": 0.7469745874404907, + "learning_rate": 0.00109155499877049, + "loss": 1.5229, + "step": 3375 + }, + { + "epoch": 0.3561181434599156, + "grad_norm": 0.7772284150123596, + "learning_rate": 0.001091331465396789, + "loss": 1.5352, + "step": 3376 + }, + { + "epoch": 0.3562236286919831, + "grad_norm": 0.7442648410797119, + "learning_rate": 0.0010911078937751954, + "loss": 1.5011, + "step": 3377 + }, + { + "epoch": 0.35632911392405064, + "grad_norm": 0.7943164110183716, + "learning_rate": 0.0010908842839307614, + "loss": 1.5027, + "step": 3378 + }, + { + "epoch": 0.35643459915611814, + "grad_norm": 0.6681036949157715, + "learning_rate": 0.0010906606358885437, + "loss": 1.5111, + "step": 3379 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.8647040128707886, + "learning_rate": 0.001090436949673603, + "loss": 1.5081, + "step": 3380 + }, + { + "epoch": 0.3566455696202532, + "grad_norm": 0.9183293581008911, + "learning_rate": 0.0010902132253110043, + "loss": 1.4969, + "step": 3381 + }, + { + "epoch": 0.35675105485232067, + "grad_norm": 0.6872519254684448, + "learning_rate": 0.0010899894628258174, + "loss": 1.4508, + "step": 3382 + }, + { + "epoch": 0.35685654008438816, + "grad_norm": 0.7281395196914673, + "learning_rate": 0.001089765662243116, + "loss": 1.5309, + "step": 3383 + }, + { + "epoch": 0.3569620253164557, + "grad_norm": 0.8392608761787415, + "learning_rate": 0.0010895418235879776, + "loss": 1.5164, + "step": 3384 + }, + { + "epoch": 0.3570675105485232, + "grad_norm": 0.6712735295295715, + "learning_rate": 0.0010893179468854848, + "loss": 1.4559, + "step": 3385 + }, + { + "epoch": 0.3571729957805907, + "grad_norm": 0.6444318890571594, + "learning_rate": 0.0010890940321607245, + "loss": 1.4691, + "step": 3386 + }, + { + "epoch": 0.35727848101265824, + "grad_norm": 0.803979754447937, + "learning_rate": 0.0010888700794387867, + "loss": 1.54, + "step": 3387 + }, + { + "epoch": 0.35738396624472574, + "grad_norm": 0.8717182874679565, + "learning_rate": 0.0010886460887447667, + "loss": 1.5281, + "step": 3388 + }, + { + "epoch": 0.35748945147679323, + "grad_norm": 0.7086305618286133, + "learning_rate": 0.0010884220601037637, + "loss": 1.5285, + "step": 3389 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.6835879683494568, + "learning_rate": 0.0010881979935408815, + "loss": 1.4833, + "step": 3390 + }, + { + "epoch": 0.35770042194092827, + "grad_norm": 0.9166587591171265, + "learning_rate": 0.0010879738890812278, + "loss": 1.5011, + "step": 3391 + }, + { + "epoch": 0.35780590717299576, + "grad_norm": 1.046378254890442, + "learning_rate": 0.0010877497467499146, + "loss": 1.4885, + "step": 3392 + }, + { + "epoch": 0.3579113924050633, + "grad_norm": 0.7152315974235535, + "learning_rate": 0.001087525566572058, + "loss": 1.4874, + "step": 3393 + }, + { + "epoch": 0.3580168776371308, + "grad_norm": 1.420736312866211, + "learning_rate": 0.0010873013485727782, + "loss": 1.4779, + "step": 3394 + }, + { + "epoch": 0.3581223628691983, + "grad_norm": 0.7477501034736633, + "learning_rate": 0.001087077092777201, + "loss": 1.5209, + "step": 3395 + }, + { + "epoch": 0.35822784810126584, + "grad_norm": 1.411468267440796, + "learning_rate": 0.0010868527992104545, + "loss": 1.523, + "step": 3396 + }, + { + "epoch": 0.35833333333333334, + "grad_norm": 0.7543176412582397, + "learning_rate": 0.001086628467897672, + "loss": 1.4969, + "step": 3397 + }, + { + "epoch": 0.35843881856540083, + "grad_norm": 1.0565534830093384, + "learning_rate": 0.0010864040988639912, + "loss": 1.5084, + "step": 3398 + }, + { + "epoch": 0.3585443037974684, + "grad_norm": 0.7956781983375549, + "learning_rate": 0.0010861796921345537, + "loss": 1.5586, + "step": 3399 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.9604870676994324, + "learning_rate": 0.0010859552477345052, + "loss": 1.5561, + "step": 3400 + }, + { + "epoch": 0.35875527426160336, + "grad_norm": 0.8586600422859192, + "learning_rate": 0.0010857307656889962, + "loss": 1.5287, + "step": 3401 + }, + { + "epoch": 0.3588607594936709, + "grad_norm": 0.7465792298316956, + "learning_rate": 0.0010855062460231807, + "loss": 1.4901, + "step": 3402 + }, + { + "epoch": 0.3589662447257384, + "grad_norm": 0.7592329382896423, + "learning_rate": 0.0010852816887622174, + "loss": 1.5353, + "step": 3403 + }, + { + "epoch": 0.3590717299578059, + "grad_norm": 0.7645116448402405, + "learning_rate": 0.0010850570939312687, + "loss": 1.4996, + "step": 3404 + }, + { + "epoch": 0.35917721518987344, + "grad_norm": 0.6928794980049133, + "learning_rate": 0.0010848324615555024, + "loss": 1.4921, + "step": 3405 + }, + { + "epoch": 0.35928270042194094, + "grad_norm": 0.7253916263580322, + "learning_rate": 0.0010846077916600888, + "loss": 1.5128, + "step": 3406 + }, + { + "epoch": 0.35938818565400843, + "grad_norm": 0.7427808046340942, + "learning_rate": 0.0010843830842702036, + "loss": 1.4827, + "step": 3407 + }, + { + "epoch": 0.3594936708860759, + "grad_norm": 0.6529711484909058, + "learning_rate": 0.0010841583394110266, + "loss": 1.5018, + "step": 3408 + }, + { + "epoch": 0.35959915611814347, + "grad_norm": 0.6766622066497803, + "learning_rate": 0.0010839335571077415, + "loss": 1.5125, + "step": 3409 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6477968692779541, + "learning_rate": 0.001083708737385536, + "loss": 1.4728, + "step": 3410 + }, + { + "epoch": 0.35981012658227846, + "grad_norm": 0.640113890171051, + "learning_rate": 0.0010834838802696023, + "loss": 1.4958, + "step": 3411 + }, + { + "epoch": 0.359915611814346, + "grad_norm": 0.651015043258667, + "learning_rate": 0.0010832589857851373, + "loss": 1.4795, + "step": 3412 + }, + { + "epoch": 0.3600210970464135, + "grad_norm": 0.6669970750808716, + "learning_rate": 0.001083034053957341, + "loss": 1.5025, + "step": 3413 + }, + { + "epoch": 0.360126582278481, + "grad_norm": 0.660350501537323, + "learning_rate": 0.0010828090848114182, + "loss": 1.4673, + "step": 3414 + }, + { + "epoch": 0.36023206751054854, + "grad_norm": 0.7303650379180908, + "learning_rate": 0.001082584078372578, + "loss": 1.4597, + "step": 3415 + }, + { + "epoch": 0.36033755274261603, + "grad_norm": 0.6543876528739929, + "learning_rate": 0.0010823590346660335, + "loss": 1.5049, + "step": 3416 + }, + { + "epoch": 0.3604430379746835, + "grad_norm": 0.6534654498100281, + "learning_rate": 0.0010821339537170015, + "loss": 1.4926, + "step": 3417 + }, + { + "epoch": 0.36054852320675107, + "grad_norm": 0.6749167442321777, + "learning_rate": 0.0010819088355507043, + "loss": 1.465, + "step": 3418 + }, + { + "epoch": 0.36065400843881856, + "grad_norm": 0.6457816958427429, + "learning_rate": 0.0010816836801923666, + "loss": 1.4711, + "step": 3419 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.9591659307479858, + "learning_rate": 0.0010814584876672187, + "loss": 1.4836, + "step": 3420 + }, + { + "epoch": 0.3608649789029536, + "grad_norm": 0.9739683270454407, + "learning_rate": 0.0010812332580004947, + "loss": 1.5283, + "step": 3421 + }, + { + "epoch": 0.3609704641350211, + "grad_norm": 0.6846515536308289, + "learning_rate": 0.0010810079912174323, + "loss": 1.5413, + "step": 3422 + }, + { + "epoch": 0.3610759493670886, + "grad_norm": 1.0372101068496704, + "learning_rate": 0.001080782687343274, + "loss": 1.5117, + "step": 3423 + }, + { + "epoch": 0.36118143459915614, + "grad_norm": 0.6437996625900269, + "learning_rate": 0.0010805573464032659, + "loss": 1.516, + "step": 3424 + }, + { + "epoch": 0.36128691983122363, + "grad_norm": 0.9744899868965149, + "learning_rate": 0.0010803319684226593, + "loss": 1.5146, + "step": 3425 + }, + { + "epoch": 0.3613924050632911, + "grad_norm": 0.7618891000747681, + "learning_rate": 0.001080106553426708, + "loss": 1.4733, + "step": 3426 + }, + { + "epoch": 0.36149789029535867, + "grad_norm": 0.8006644248962402, + "learning_rate": 0.0010798811014406716, + "loss": 1.5125, + "step": 3427 + }, + { + "epoch": 0.36160337552742616, + "grad_norm": 0.9349137544631958, + "learning_rate": 0.0010796556124898127, + "loss": 1.488, + "step": 3428 + }, + { + "epoch": 0.36170886075949366, + "grad_norm": 0.6524068713188171, + "learning_rate": 0.0010794300865993988, + "loss": 1.4763, + "step": 3429 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.8112552165985107, + "learning_rate": 0.0010792045237947008, + "loss": 1.471, + "step": 3430 + }, + { + "epoch": 0.3619198312236287, + "grad_norm": 0.7198824882507324, + "learning_rate": 0.0010789789241009945, + "loss": 1.5033, + "step": 3431 + }, + { + "epoch": 0.3620253164556962, + "grad_norm": 0.792266309261322, + "learning_rate": 0.0010787532875435593, + "loss": 1.4859, + "step": 3432 + }, + { + "epoch": 0.36213080168776374, + "grad_norm": 0.7408920526504517, + "learning_rate": 0.0010785276141476786, + "loss": 1.5004, + "step": 3433 + }, + { + "epoch": 0.36223628691983123, + "grad_norm": 0.6036134958267212, + "learning_rate": 0.001078301903938641, + "loss": 1.4684, + "step": 3434 + }, + { + "epoch": 0.3623417721518987, + "grad_norm": 0.6907472610473633, + "learning_rate": 0.0010780761569417377, + "loss": 1.5144, + "step": 3435 + }, + { + "epoch": 0.36244725738396627, + "grad_norm": 0.6889050006866455, + "learning_rate": 0.0010778503731822652, + "loss": 1.4788, + "step": 3436 + }, + { + "epoch": 0.36255274261603376, + "grad_norm": 0.7223511934280396, + "learning_rate": 0.0010776245526855235, + "loss": 1.5472, + "step": 3437 + }, + { + "epoch": 0.36265822784810126, + "grad_norm": 0.6643140316009521, + "learning_rate": 0.0010773986954768172, + "loss": 1.5293, + "step": 3438 + }, + { + "epoch": 0.3627637130801688, + "grad_norm": 0.7182597517967224, + "learning_rate": 0.0010771728015814544, + "loss": 1.4803, + "step": 3439 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.6462011933326721, + "learning_rate": 0.0010769468710247478, + "loss": 1.4865, + "step": 3440 + }, + { + "epoch": 0.3629746835443038, + "grad_norm": 0.7151877284049988, + "learning_rate": 0.0010767209038320138, + "loss": 1.4684, + "step": 3441 + }, + { + "epoch": 0.3630801687763713, + "grad_norm": 0.8053479194641113, + "learning_rate": 0.0010764949000285735, + "loss": 1.5144, + "step": 3442 + }, + { + "epoch": 0.36318565400843883, + "grad_norm": 0.8277683258056641, + "learning_rate": 0.0010762688596397515, + "loss": 1.5153, + "step": 3443 + }, + { + "epoch": 0.3632911392405063, + "grad_norm": 0.6867623329162598, + "learning_rate": 0.001076042782690877, + "loss": 1.4619, + "step": 3444 + }, + { + "epoch": 0.3633966244725738, + "grad_norm": 0.9476373195648193, + "learning_rate": 0.001075816669207283, + "loss": 1.5284, + "step": 3445 + }, + { + "epoch": 0.36350210970464136, + "grad_norm": 0.7445204257965088, + "learning_rate": 0.0010755905192143063, + "loss": 1.5406, + "step": 3446 + }, + { + "epoch": 0.36360759493670886, + "grad_norm": 0.7394459843635559, + "learning_rate": 0.0010753643327372886, + "loss": 1.4731, + "step": 3447 + }, + { + "epoch": 0.36371308016877635, + "grad_norm": 0.6728391647338867, + "learning_rate": 0.0010751381098015747, + "loss": 1.5043, + "step": 3448 + }, + { + "epoch": 0.3638185654008439, + "grad_norm": 0.6758776307106018, + "learning_rate": 0.0010749118504325146, + "loss": 1.4885, + "step": 3449 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.8016533255577087, + "learning_rate": 0.0010746855546554612, + "loss": 1.4734, + "step": 3450 + }, + { + "epoch": 0.3640295358649789, + "grad_norm": 0.7062186002731323, + "learning_rate": 0.0010744592224957727, + "loss": 1.4951, + "step": 3451 + }, + { + "epoch": 0.36413502109704643, + "grad_norm": 0.6749126315116882, + "learning_rate": 0.00107423285397881, + "loss": 1.4556, + "step": 3452 + }, + { + "epoch": 0.3642405063291139, + "grad_norm": 0.939731776714325, + "learning_rate": 0.0010740064491299398, + "loss": 1.501, + "step": 3453 + }, + { + "epoch": 0.3643459915611814, + "grad_norm": 0.6019579768180847, + "learning_rate": 0.0010737800079745308, + "loss": 1.471, + "step": 3454 + }, + { + "epoch": 0.36445147679324896, + "grad_norm": 0.8220798969268799, + "learning_rate": 0.0010735535305379576, + "loss": 1.535, + "step": 3455 + }, + { + "epoch": 0.36455696202531646, + "grad_norm": 0.6509349942207336, + "learning_rate": 0.001073327016845598, + "loss": 1.4754, + "step": 3456 + }, + { + "epoch": 0.36466244725738395, + "grad_norm": 0.860582709312439, + "learning_rate": 0.001073100466922834, + "loss": 1.4961, + "step": 3457 + }, + { + "epoch": 0.3647679324894515, + "grad_norm": 1.0458903312683105, + "learning_rate": 0.0010728738807950515, + "loss": 1.5118, + "step": 3458 + }, + { + "epoch": 0.364873417721519, + "grad_norm": 0.6897073984146118, + "learning_rate": 0.0010726472584876403, + "loss": 1.4548, + "step": 3459 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 1.0769054889678955, + "learning_rate": 0.0010724206000259954, + "loss": 1.5079, + "step": 3460 + }, + { + "epoch": 0.36508438818565403, + "grad_norm": 0.7101266980171204, + "learning_rate": 0.0010721939054355145, + "loss": 1.5128, + "step": 3461 + }, + { + "epoch": 0.3651898734177215, + "grad_norm": 0.9157030582427979, + "learning_rate": 0.0010719671747415995, + "loss": 1.5037, + "step": 3462 + }, + { + "epoch": 0.365295358649789, + "grad_norm": 0.8663707375526428, + "learning_rate": 0.0010717404079696575, + "loss": 1.5053, + "step": 3463 + }, + { + "epoch": 0.36540084388185656, + "grad_norm": 0.7059624791145325, + "learning_rate": 0.0010715136051450982, + "loss": 1.5252, + "step": 3464 + }, + { + "epoch": 0.36550632911392406, + "grad_norm": 0.7346908450126648, + "learning_rate": 0.0010712867662933364, + "loss": 1.4953, + "step": 3465 + }, + { + "epoch": 0.36561181434599155, + "grad_norm": 0.6897137761116028, + "learning_rate": 0.0010710598914397901, + "loss": 1.4803, + "step": 3466 + }, + { + "epoch": 0.3657172995780591, + "grad_norm": 0.8481220602989197, + "learning_rate": 0.0010708329806098822, + "loss": 1.5299, + "step": 3467 + }, + { + "epoch": 0.3658227848101266, + "grad_norm": 0.7384406924247742, + "learning_rate": 0.001070606033829039, + "loss": 1.4617, + "step": 3468 + }, + { + "epoch": 0.3659282700421941, + "grad_norm": 0.7036728858947754, + "learning_rate": 0.001070379051122691, + "loss": 1.5401, + "step": 3469 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.6960628628730774, + "learning_rate": 0.0010701520325162727, + "loss": 1.5117, + "step": 3470 + }, + { + "epoch": 0.3661392405063291, + "grad_norm": 0.7014181613922119, + "learning_rate": 0.001069924978035223, + "loss": 1.4919, + "step": 3471 + }, + { + "epoch": 0.3662447257383966, + "grad_norm": 0.8111971616744995, + "learning_rate": 0.0010696978877049838, + "loss": 1.5372, + "step": 3472 + }, + { + "epoch": 0.3663502109704641, + "grad_norm": 0.7864565253257751, + "learning_rate": 0.0010694707615510023, + "loss": 1.5522, + "step": 3473 + }, + { + "epoch": 0.36645569620253166, + "grad_norm": 0.7565685510635376, + "learning_rate": 0.0010692435995987293, + "loss": 1.5255, + "step": 3474 + }, + { + "epoch": 0.36656118143459915, + "grad_norm": 0.6274101138114929, + "learning_rate": 0.0010690164018736187, + "loss": 1.5419, + "step": 3475 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 0.6859972476959229, + "learning_rate": 0.0010687891684011295, + "loss": 1.4938, + "step": 3476 + }, + { + "epoch": 0.3667721518987342, + "grad_norm": 0.6486687064170837, + "learning_rate": 0.0010685618992067243, + "loss": 1.4992, + "step": 3477 + }, + { + "epoch": 0.3668776371308017, + "grad_norm": 0.6406609416007996, + "learning_rate": 0.00106833459431587, + "loss": 1.4641, + "step": 3478 + }, + { + "epoch": 0.3669831223628692, + "grad_norm": 0.7788352966308594, + "learning_rate": 0.001068107253754037, + "loss": 1.506, + "step": 3479 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.9239599108695984, + "learning_rate": 0.0010678798775467001, + "loss": 1.4836, + "step": 3480 + }, + { + "epoch": 0.3671940928270042, + "grad_norm": 0.6730583906173706, + "learning_rate": 0.0010676524657193378, + "loss": 1.5131, + "step": 3481 + }, + { + "epoch": 0.3672995780590717, + "grad_norm": 0.8979195356369019, + "learning_rate": 0.0010674250182974325, + "loss": 1.5132, + "step": 3482 + }, + { + "epoch": 0.36740506329113926, + "grad_norm": 0.6796730756759644, + "learning_rate": 0.0010671975353064712, + "loss": 1.4786, + "step": 3483 + }, + { + "epoch": 0.36751054852320675, + "grad_norm": 0.8631410002708435, + "learning_rate": 0.0010669700167719443, + "loss": 1.5313, + "step": 3484 + }, + { + "epoch": 0.36761603375527424, + "grad_norm": 0.8330124616622925, + "learning_rate": 0.0010667424627193469, + "loss": 1.5172, + "step": 3485 + }, + { + "epoch": 0.3677215189873418, + "grad_norm": 0.701097846031189, + "learning_rate": 0.0010665148731741768, + "loss": 1.4948, + "step": 3486 + }, + { + "epoch": 0.3678270042194093, + "grad_norm": 0.694823682308197, + "learning_rate": 0.0010662872481619367, + "loss": 1.4834, + "step": 3487 + }, + { + "epoch": 0.3679324894514768, + "grad_norm": 0.7132963538169861, + "learning_rate": 0.0010660595877081335, + "loss": 1.501, + "step": 3488 + }, + { + "epoch": 0.3680379746835443, + "grad_norm": 0.8161367774009705, + "learning_rate": 0.0010658318918382774, + "loss": 1.4809, + "step": 3489 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.8703175187110901, + "learning_rate": 0.0010656041605778832, + "loss": 1.5168, + "step": 3490 + }, + { + "epoch": 0.3682489451476793, + "grad_norm": 0.6306816339492798, + "learning_rate": 0.0010653763939524688, + "loss": 1.4976, + "step": 3491 + }, + { + "epoch": 0.36835443037974686, + "grad_norm": 0.6293425559997559, + "learning_rate": 0.0010651485919875568, + "loss": 1.4839, + "step": 3492 + }, + { + "epoch": 0.36845991561181435, + "grad_norm": 0.6564885973930359, + "learning_rate": 0.0010649207547086738, + "loss": 1.4763, + "step": 3493 + }, + { + "epoch": 0.36856540084388184, + "grad_norm": 0.6525143384933472, + "learning_rate": 0.0010646928821413499, + "loss": 1.5187, + "step": 3494 + }, + { + "epoch": 0.3686708860759494, + "grad_norm": 0.6772763729095459, + "learning_rate": 0.0010644649743111192, + "loss": 1.5011, + "step": 3495 + }, + { + "epoch": 0.3687763713080169, + "grad_norm": 0.6601477265357971, + "learning_rate": 0.0010642370312435201, + "loss": 1.4729, + "step": 3496 + }, + { + "epoch": 0.3688818565400844, + "grad_norm": 0.6198678016662598, + "learning_rate": 0.0010640090529640948, + "loss": 1.5091, + "step": 3497 + }, + { + "epoch": 0.3689873417721519, + "grad_norm": 0.6513189673423767, + "learning_rate": 0.0010637810394983893, + "loss": 1.4543, + "step": 3498 + }, + { + "epoch": 0.3690928270042194, + "grad_norm": 0.623833954334259, + "learning_rate": 0.0010635529908719537, + "loss": 1.479, + "step": 3499 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.6932898759841919, + "learning_rate": 0.001063324907110342, + "loss": 1.4477, + "step": 3500 + }, + { + "epoch": 0.36930379746835446, + "grad_norm": 0.9453396201133728, + "learning_rate": 0.001063096788239112, + "loss": 1.4775, + "step": 3501 + }, + { + "epoch": 0.36940928270042195, + "grad_norm": 1.063321828842163, + "learning_rate": 0.0010628686342838253, + "loss": 1.5099, + "step": 3502 + }, + { + "epoch": 0.36951476793248944, + "grad_norm": 0.7658918499946594, + "learning_rate": 0.0010626404452700486, + "loss": 1.5324, + "step": 3503 + }, + { + "epoch": 0.369620253164557, + "grad_norm": 1.321641445159912, + "learning_rate": 0.0010624122212233506, + "loss": 1.5374, + "step": 3504 + }, + { + "epoch": 0.3697257383966245, + "grad_norm": 0.9265565276145935, + "learning_rate": 0.0010621839621693056, + "loss": 1.4782, + "step": 3505 + }, + { + "epoch": 0.369831223628692, + "grad_norm": 1.3356868028640747, + "learning_rate": 0.0010619556681334909, + "loss": 1.5107, + "step": 3506 + }, + { + "epoch": 0.36993670886075947, + "grad_norm": 1.0601603984832764, + "learning_rate": 0.001061727339141488, + "loss": 1.5328, + "step": 3507 + }, + { + "epoch": 0.370042194092827, + "grad_norm": 1.267107367515564, + "learning_rate": 0.0010614989752188823, + "loss": 1.4853, + "step": 3508 + }, + { + "epoch": 0.3701476793248945, + "grad_norm": 1.132203459739685, + "learning_rate": 0.0010612705763912635, + "loss": 1.5064, + "step": 3509 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 1.4552916288375854, + "learning_rate": 0.0010610421426842241, + "loss": 1.5082, + "step": 3510 + }, + { + "epoch": 0.37035864978902955, + "grad_norm": 1.1355514526367188, + "learning_rate": 0.0010608136741233618, + "loss": 1.5131, + "step": 3511 + }, + { + "epoch": 0.37046413502109704, + "grad_norm": 1.3512934446334839, + "learning_rate": 0.0010605851707342774, + "loss": 1.4841, + "step": 3512 + }, + { + "epoch": 0.37056962025316453, + "grad_norm": 1.1824678182601929, + "learning_rate": 0.0010603566325425758, + "loss": 1.482, + "step": 3513 + }, + { + "epoch": 0.3706751054852321, + "grad_norm": 1.0498274564743042, + "learning_rate": 0.001060128059573866, + "loss": 1.4971, + "step": 3514 + }, + { + "epoch": 0.3707805907172996, + "grad_norm": 0.9212824106216431, + "learning_rate": 0.0010598994518537608, + "loss": 1.4825, + "step": 3515 + }, + { + "epoch": 0.37088607594936707, + "grad_norm": 0.999603271484375, + "learning_rate": 0.0010596708094078766, + "loss": 1.5264, + "step": 3516 + }, + { + "epoch": 0.3709915611814346, + "grad_norm": 0.8857445120811462, + "learning_rate": 0.0010594421322618341, + "loss": 1.5073, + "step": 3517 + }, + { + "epoch": 0.3710970464135021, + "grad_norm": 0.9652879238128662, + "learning_rate": 0.0010592134204412578, + "loss": 1.4892, + "step": 3518 + }, + { + "epoch": 0.3712025316455696, + "grad_norm": 0.8032265901565552, + "learning_rate": 0.0010589846739717755, + "loss": 1.5096, + "step": 3519 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.8504155278205872, + "learning_rate": 0.00105875589287902, + "loss": 1.5158, + "step": 3520 + }, + { + "epoch": 0.37141350210970464, + "grad_norm": 0.7323578000068665, + "learning_rate": 0.001058527077188627, + "loss": 1.5132, + "step": 3521 + }, + { + "epoch": 0.37151898734177213, + "grad_norm": 0.8096004128456116, + "learning_rate": 0.001058298226926237, + "loss": 1.4948, + "step": 3522 + }, + { + "epoch": 0.3716244725738397, + "grad_norm": 0.8629668951034546, + "learning_rate": 0.0010580693421174928, + "loss": 1.4907, + "step": 3523 + }, + { + "epoch": 0.3717299578059072, + "grad_norm": 0.7852508425712585, + "learning_rate": 0.0010578404227880429, + "loss": 1.457, + "step": 3524 + }, + { + "epoch": 0.37183544303797467, + "grad_norm": 0.869198203086853, + "learning_rate": 0.0010576114689635383, + "loss": 1.5449, + "step": 3525 + }, + { + "epoch": 0.3719409282700422, + "grad_norm": 0.7246788144111633, + "learning_rate": 0.0010573824806696351, + "loss": 1.5075, + "step": 3526 + }, + { + "epoch": 0.3720464135021097, + "grad_norm": 0.8019077181816101, + "learning_rate": 0.001057153457931992, + "loss": 1.506, + "step": 3527 + }, + { + "epoch": 0.3721518987341772, + "grad_norm": 0.7121505737304688, + "learning_rate": 0.0010569244007762723, + "loss": 1.535, + "step": 3528 + }, + { + "epoch": 0.37225738396624475, + "grad_norm": 0.94538414478302, + "learning_rate": 0.0010566953092281432, + "loss": 1.4675, + "step": 3529 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.7595131993293762, + "learning_rate": 0.0010564661833132752, + "loss": 1.5519, + "step": 3530 + }, + { + "epoch": 0.37246835443037973, + "grad_norm": 0.7312641143798828, + "learning_rate": 0.0010562370230573432, + "loss": 1.518, + "step": 3531 + }, + { + "epoch": 0.3725738396624473, + "grad_norm": 0.7689396739006042, + "learning_rate": 0.0010560078284860257, + "loss": 1.5396, + "step": 3532 + }, + { + "epoch": 0.3726793248945148, + "grad_norm": 0.7126878499984741, + "learning_rate": 0.0010557785996250053, + "loss": 1.4847, + "step": 3533 + }, + { + "epoch": 0.37278481012658227, + "grad_norm": 0.9910696148872375, + "learning_rate": 0.0010555493364999679, + "loss": 1.5212, + "step": 3534 + }, + { + "epoch": 0.3728902953586498, + "grad_norm": 0.8251933455467224, + "learning_rate": 0.001055320039136604, + "loss": 1.5246, + "step": 3535 + }, + { + "epoch": 0.3729957805907173, + "grad_norm": 0.7453010082244873, + "learning_rate": 0.001055090707560607, + "loss": 1.4747, + "step": 3536 + }, + { + "epoch": 0.3731012658227848, + "grad_norm": 1.046349287033081, + "learning_rate": 0.0010548613417976748, + "loss": 1.4621, + "step": 3537 + }, + { + "epoch": 0.37320675105485235, + "grad_norm": 0.6647979021072388, + "learning_rate": 0.0010546319418735094, + "loss": 1.5123, + "step": 3538 + }, + { + "epoch": 0.37331223628691984, + "grad_norm": 0.9262690544128418, + "learning_rate": 0.0010544025078138156, + "loss": 1.4944, + "step": 3539 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.8632687330245972, + "learning_rate": 0.001054173039644303, + "loss": 1.4837, + "step": 3540 + }, + { + "epoch": 0.3735232067510548, + "grad_norm": 0.6165754199028015, + "learning_rate": 0.0010539435373906846, + "loss": 1.495, + "step": 3541 + }, + { + "epoch": 0.3736286919831224, + "grad_norm": 0.7094970941543579, + "learning_rate": 0.0010537140010786774, + "loss": 1.5294, + "step": 3542 + }, + { + "epoch": 0.37373417721518987, + "grad_norm": 0.8162989616394043, + "learning_rate": 0.0010534844307340016, + "loss": 1.4864, + "step": 3543 + }, + { + "epoch": 0.37383966244725736, + "grad_norm": 1.0827642679214478, + "learning_rate": 0.0010532548263823822, + "loss": 1.4748, + "step": 3544 + }, + { + "epoch": 0.3739451476793249, + "grad_norm": 0.6685175895690918, + "learning_rate": 0.0010530251880495473, + "loss": 1.5158, + "step": 3545 + }, + { + "epoch": 0.3740506329113924, + "grad_norm": 1.1590133905410767, + "learning_rate": 0.0010527955157612291, + "loss": 1.4932, + "step": 3546 + }, + { + "epoch": 0.3741561181434599, + "grad_norm": 0.6360828280448914, + "learning_rate": 0.0010525658095431635, + "loss": 1.4612, + "step": 3547 + }, + { + "epoch": 0.37426160337552744, + "grad_norm": 0.8452709317207336, + "learning_rate": 0.00105233606942109, + "loss": 1.4859, + "step": 3548 + }, + { + "epoch": 0.37436708860759493, + "grad_norm": 0.6704861521720886, + "learning_rate": 0.0010521062954207527, + "loss": 1.4803, + "step": 3549 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.7754371166229248, + "learning_rate": 0.0010518764875678981, + "loss": 1.506, + "step": 3550 + }, + { + "epoch": 0.37457805907173, + "grad_norm": 0.7997182607650757, + "learning_rate": 0.001051646645888278, + "loss": 1.4732, + "step": 3551 + }, + { + "epoch": 0.37468354430379747, + "grad_norm": 0.708045244216919, + "learning_rate": 0.0010514167704076473, + "loss": 1.483, + "step": 3552 + }, + { + "epoch": 0.37478902953586496, + "grad_norm": 1.1802717447280884, + "learning_rate": 0.0010511868611517644, + "loss": 1.5063, + "step": 3553 + }, + { + "epoch": 0.3748945147679325, + "grad_norm": 0.68510502576828, + "learning_rate": 0.0010509569181463916, + "loss": 1.5095, + "step": 3554 + }, + { + "epoch": 0.375, + "grad_norm": 1.14121675491333, + "learning_rate": 0.0010507269414172956, + "loss": 1.493, + "step": 3555 + }, + { + "epoch": 0.3751054852320675, + "grad_norm": 0.677394449710846, + "learning_rate": 0.0010504969309902462, + "loss": 1.5046, + "step": 3556 + }, + { + "epoch": 0.37521097046413504, + "grad_norm": 0.9772894978523254, + "learning_rate": 0.0010502668868910174, + "loss": 1.4968, + "step": 3557 + }, + { + "epoch": 0.37531645569620253, + "grad_norm": 0.6000973582267761, + "learning_rate": 0.0010500368091453864, + "loss": 1.4611, + "step": 3558 + }, + { + "epoch": 0.37542194092827, + "grad_norm": 1.0971736907958984, + "learning_rate": 0.001049806697779135, + "loss": 1.5013, + "step": 3559 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.6921679377555847, + "learning_rate": 0.001049576552818048, + "loss": 1.4826, + "step": 3560 + }, + { + "epoch": 0.37563291139240507, + "grad_norm": 1.0226629972457886, + "learning_rate": 0.0010493463742879147, + "loss": 1.4929, + "step": 3561 + }, + { + "epoch": 0.37573839662447256, + "grad_norm": 0.9098431468009949, + "learning_rate": 0.0010491161622145275, + "loss": 1.4712, + "step": 3562 + }, + { + "epoch": 0.3758438818565401, + "grad_norm": 0.852651059627533, + "learning_rate": 0.0010488859166236824, + "loss": 1.4718, + "step": 3563 + }, + { + "epoch": 0.3759493670886076, + "grad_norm": 1.003774881362915, + "learning_rate": 0.0010486556375411803, + "loss": 1.4867, + "step": 3564 + }, + { + "epoch": 0.3760548523206751, + "grad_norm": 0.800825297832489, + "learning_rate": 0.0010484253249928247, + "loss": 1.4865, + "step": 3565 + }, + { + "epoch": 0.37616033755274264, + "grad_norm": 1.3148133754730225, + "learning_rate": 0.0010481949790044234, + "loss": 1.4636, + "step": 3566 + }, + { + "epoch": 0.37626582278481013, + "grad_norm": 0.9250807166099548, + "learning_rate": 0.0010479645996017875, + "loss": 1.4789, + "step": 3567 + }, + { + "epoch": 0.3763713080168776, + "grad_norm": 1.3880162239074707, + "learning_rate": 0.0010477341868107327, + "loss": 1.5188, + "step": 3568 + }, + { + "epoch": 0.3764767932489452, + "grad_norm": 0.9579887390136719, + "learning_rate": 0.0010475037406570775, + "loss": 1.4821, + "step": 3569 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 1.0605778694152832, + "learning_rate": 0.0010472732611666448, + "loss": 1.5088, + "step": 3570 + }, + { + "epoch": 0.37668776371308016, + "grad_norm": 0.7786879539489746, + "learning_rate": 0.0010470427483652608, + "loss": 1.4648, + "step": 3571 + }, + { + "epoch": 0.37679324894514765, + "grad_norm": 0.9086720943450928, + "learning_rate": 0.0010468122022787554, + "loss": 1.478, + "step": 3572 + }, + { + "epoch": 0.3768987341772152, + "grad_norm": 0.7319563031196594, + "learning_rate": 0.001046581622932963, + "loss": 1.497, + "step": 3573 + }, + { + "epoch": 0.3770042194092827, + "grad_norm": 1.1202397346496582, + "learning_rate": 0.001046351010353721, + "loss": 1.5185, + "step": 3574 + }, + { + "epoch": 0.3771097046413502, + "grad_norm": 0.7729990482330322, + "learning_rate": 0.0010461203645668702, + "loss": 1.4935, + "step": 3575 + }, + { + "epoch": 0.37721518987341773, + "grad_norm": 0.763212263584137, + "learning_rate": 0.001045889685598256, + "loss": 1.4986, + "step": 3576 + }, + { + "epoch": 0.3773206751054852, + "grad_norm": 0.6880739331245422, + "learning_rate": 0.0010456589734737273, + "loss": 1.4665, + "step": 3577 + }, + { + "epoch": 0.3774261603375527, + "grad_norm": 0.8761333227157593, + "learning_rate": 0.0010454282282191362, + "loss": 1.4772, + "step": 3578 + }, + { + "epoch": 0.37753164556962027, + "grad_norm": 1.0062121152877808, + "learning_rate": 0.001045197449860339, + "loss": 1.4859, + "step": 3579 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.7025592923164368, + "learning_rate": 0.0010449666384231954, + "loss": 1.5015, + "step": 3580 + }, + { + "epoch": 0.37774261603375525, + "grad_norm": 0.7120627164840698, + "learning_rate": 0.0010447357939335693, + "loss": 1.5064, + "step": 3581 + }, + { + "epoch": 0.3778481012658228, + "grad_norm": 0.7209911346435547, + "learning_rate": 0.001044504916417328, + "loss": 1.4788, + "step": 3582 + }, + { + "epoch": 0.3779535864978903, + "grad_norm": 0.6602431535720825, + "learning_rate": 0.001044274005900342, + "loss": 1.5002, + "step": 3583 + }, + { + "epoch": 0.3780590717299578, + "grad_norm": 0.6446284651756287, + "learning_rate": 0.0010440430624084863, + "loss": 1.4666, + "step": 3584 + }, + { + "epoch": 0.37816455696202533, + "grad_norm": 0.6521733403205872, + "learning_rate": 0.0010438120859676393, + "loss": 1.531, + "step": 3585 + }, + { + "epoch": 0.3782700421940928, + "grad_norm": 0.6577736139297485, + "learning_rate": 0.0010435810766036828, + "loss": 1.5201, + "step": 3586 + }, + { + "epoch": 0.3783755274261603, + "grad_norm": 0.7150927782058716, + "learning_rate": 0.001043350034342503, + "loss": 1.4762, + "step": 3587 + }, + { + "epoch": 0.37848101265822787, + "grad_norm": 0.6962739825248718, + "learning_rate": 0.001043118959209989, + "loss": 1.4939, + "step": 3588 + }, + { + "epoch": 0.37858649789029536, + "grad_norm": 0.749321460723877, + "learning_rate": 0.001042887851232034, + "loss": 1.5291, + "step": 3589 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.7762422561645508, + "learning_rate": 0.0010426567104345346, + "loss": 1.5009, + "step": 3590 + }, + { + "epoch": 0.3787974683544304, + "grad_norm": 0.6662004590034485, + "learning_rate": 0.0010424255368433916, + "loss": 1.462, + "step": 3591 + }, + { + "epoch": 0.3789029535864979, + "grad_norm": 0.6643801927566528, + "learning_rate": 0.0010421943304845093, + "loss": 1.4877, + "step": 3592 + }, + { + "epoch": 0.3790084388185654, + "grad_norm": 0.7597912549972534, + "learning_rate": 0.0010419630913837948, + "loss": 1.5316, + "step": 3593 + }, + { + "epoch": 0.37911392405063293, + "grad_norm": 0.6283294558525085, + "learning_rate": 0.0010417318195671604, + "loss": 1.4919, + "step": 3594 + }, + { + "epoch": 0.3792194092827004, + "grad_norm": 0.7059802412986755, + "learning_rate": 0.0010415005150605208, + "loss": 1.5083, + "step": 3595 + }, + { + "epoch": 0.3793248945147679, + "grad_norm": 0.7008107304573059, + "learning_rate": 0.001041269177889795, + "loss": 1.4392, + "step": 3596 + }, + { + "epoch": 0.37943037974683547, + "grad_norm": 0.7274407148361206, + "learning_rate": 0.0010410378080809052, + "loss": 1.4732, + "step": 3597 + }, + { + "epoch": 0.37953586497890296, + "grad_norm": 0.93056720495224, + "learning_rate": 0.001040806405659778, + "loss": 1.4904, + "step": 3598 + }, + { + "epoch": 0.37964135021097045, + "grad_norm": 1.3470996618270874, + "learning_rate": 0.0010405749706523428, + "loss": 1.4708, + "step": 3599 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.0376651287078857, + "learning_rate": 0.0010403435030845332, + "loss": 1.4694, + "step": 3600 + }, + { + "epoch": 0.3798523206751055, + "grad_norm": 1.6695551872253418, + "learning_rate": 0.0010401120029822864, + "loss": 1.5009, + "step": 3601 + }, + { + "epoch": 0.379957805907173, + "grad_norm": 1.739208698272705, + "learning_rate": 0.001039880470371543, + "loss": 1.485, + "step": 3602 + }, + { + "epoch": 0.38006329113924053, + "grad_norm": 1.057340383529663, + "learning_rate": 0.0010396489052782473, + "loss": 1.5003, + "step": 3603 + }, + { + "epoch": 0.380168776371308, + "grad_norm": 1.1080920696258545, + "learning_rate": 0.0010394173077283477, + "loss": 1.4731, + "step": 3604 + }, + { + "epoch": 0.3802742616033755, + "grad_norm": 0.8375294208526611, + "learning_rate": 0.0010391856777477954, + "loss": 1.5054, + "step": 3605 + }, + { + "epoch": 0.380379746835443, + "grad_norm": 1.0291216373443604, + "learning_rate": 0.001038954015362546, + "loss": 1.4752, + "step": 3606 + }, + { + "epoch": 0.38048523206751056, + "grad_norm": 0.9058377742767334, + "learning_rate": 0.001038722320598558, + "loss": 1.4694, + "step": 3607 + }, + { + "epoch": 0.38059071729957805, + "grad_norm": 0.9802544116973877, + "learning_rate": 0.001038490593481795, + "loss": 1.5104, + "step": 3608 + }, + { + "epoch": 0.38069620253164554, + "grad_norm": 0.8623273968696594, + "learning_rate": 0.0010382588340382218, + "loss": 1.5006, + "step": 3609 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.9182188510894775, + "learning_rate": 0.0010380270422938093, + "loss": 1.4455, + "step": 3610 + }, + { + "epoch": 0.3809071729957806, + "grad_norm": 1.1505135297775269, + "learning_rate": 0.00103779521827453, + "loss": 1.5147, + "step": 3611 + }, + { + "epoch": 0.3810126582278481, + "grad_norm": 0.7495729923248291, + "learning_rate": 0.0010375633620063618, + "loss": 1.4734, + "step": 3612 + }, + { + "epoch": 0.3811181434599156, + "grad_norm": 0.7321133017539978, + "learning_rate": 0.0010373314735152848, + "loss": 1.4811, + "step": 3613 + }, + { + "epoch": 0.3812236286919831, + "grad_norm": 0.8237871527671814, + "learning_rate": 0.0010370995528272836, + "loss": 1.4733, + "step": 3614 + }, + { + "epoch": 0.3813291139240506, + "grad_norm": 0.6773837804794312, + "learning_rate": 0.0010368675999683455, + "loss": 1.4406, + "step": 3615 + }, + { + "epoch": 0.38143459915611816, + "grad_norm": 0.8372664451599121, + "learning_rate": 0.0010366356149644628, + "loss": 1.4962, + "step": 3616 + }, + { + "epoch": 0.38154008438818565, + "grad_norm": 0.665533185005188, + "learning_rate": 0.0010364035978416297, + "loss": 1.5352, + "step": 3617 + }, + { + "epoch": 0.38164556962025314, + "grad_norm": 0.8711310625076294, + "learning_rate": 0.001036171548625846, + "loss": 1.5185, + "step": 3618 + }, + { + "epoch": 0.3817510548523207, + "grad_norm": 0.7039082050323486, + "learning_rate": 0.0010359394673431126, + "loss": 1.4774, + "step": 3619 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.9578801393508911, + "learning_rate": 0.0010357073540194362, + "loss": 1.4533, + "step": 3620 + }, + { + "epoch": 0.3819620253164557, + "grad_norm": 0.805316150188446, + "learning_rate": 0.0010354752086808264, + "loss": 1.4958, + "step": 3621 + }, + { + "epoch": 0.3820675105485232, + "grad_norm": 0.8567383885383606, + "learning_rate": 0.001035243031353296, + "loss": 1.4538, + "step": 3622 + }, + { + "epoch": 0.3821729957805907, + "grad_norm": 0.8702483773231506, + "learning_rate": 0.0010350108220628614, + "loss": 1.5104, + "step": 3623 + }, + { + "epoch": 0.3822784810126582, + "grad_norm": 0.7241293787956238, + "learning_rate": 0.001034778580835543, + "loss": 1.4736, + "step": 3624 + }, + { + "epoch": 0.38238396624472576, + "grad_norm": 0.8113511204719543, + "learning_rate": 0.0010345463076973645, + "loss": 1.5005, + "step": 3625 + }, + { + "epoch": 0.38248945147679325, + "grad_norm": 0.7173448801040649, + "learning_rate": 0.0010343140026743535, + "loss": 1.4844, + "step": 3626 + }, + { + "epoch": 0.38259493670886074, + "grad_norm": 0.935888946056366, + "learning_rate": 0.0010340816657925407, + "loss": 1.4929, + "step": 3627 + }, + { + "epoch": 0.3827004219409283, + "grad_norm": 0.6807728409767151, + "learning_rate": 0.0010338492970779606, + "loss": 1.5056, + "step": 3628 + }, + { + "epoch": 0.3828059071729958, + "grad_norm": 0.9765699505805969, + "learning_rate": 0.0010336168965566516, + "loss": 1.4605, + "step": 3629 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.8491668701171875, + "learning_rate": 0.001033384464254655, + "loss": 1.4917, + "step": 3630 + }, + { + "epoch": 0.3830168776371308, + "grad_norm": 0.7163394689559937, + "learning_rate": 0.001033152000198016, + "loss": 1.4917, + "step": 3631 + }, + { + "epoch": 0.3831223628691983, + "grad_norm": 0.7505501508712769, + "learning_rate": 0.0010329195044127834, + "loss": 1.4725, + "step": 3632 + }, + { + "epoch": 0.3832278481012658, + "grad_norm": 0.8346563577651978, + "learning_rate": 0.0010326869769250097, + "loss": 1.5114, + "step": 3633 + }, + { + "epoch": 0.38333333333333336, + "grad_norm": 0.7512205839157104, + "learning_rate": 0.0010324544177607508, + "loss": 1.451, + "step": 3634 + }, + { + "epoch": 0.38343881856540085, + "grad_norm": 0.9480777978897095, + "learning_rate": 0.0010322218269460657, + "loss": 1.4864, + "step": 3635 + }, + { + "epoch": 0.38354430379746834, + "grad_norm": 0.7854421734809875, + "learning_rate": 0.001031989204507018, + "loss": 1.4666, + "step": 3636 + }, + { + "epoch": 0.3836497890295359, + "grad_norm": 0.8475116491317749, + "learning_rate": 0.0010317565504696733, + "loss": 1.5609, + "step": 3637 + }, + { + "epoch": 0.3837552742616034, + "grad_norm": 0.8014345169067383, + "learning_rate": 0.0010315238648601025, + "loss": 1.4891, + "step": 3638 + }, + { + "epoch": 0.3838607594936709, + "grad_norm": 1.0790244340896606, + "learning_rate": 0.0010312911477043784, + "loss": 1.4813, + "step": 3639 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 1.0101162195205688, + "learning_rate": 0.001031058399028579, + "loss": 1.4507, + "step": 3640 + }, + { + "epoch": 0.3840717299578059, + "grad_norm": 0.799181342124939, + "learning_rate": 0.0010308256188587843, + "loss": 1.4732, + "step": 3641 + }, + { + "epoch": 0.3841772151898734, + "grad_norm": 1.209834337234497, + "learning_rate": 0.0010305928072210787, + "loss": 1.5407, + "step": 3642 + }, + { + "epoch": 0.3842827004219409, + "grad_norm": 0.7904993891716003, + "learning_rate": 0.00103035996414155, + "loss": 1.4954, + "step": 3643 + }, + { + "epoch": 0.38438818565400845, + "grad_norm": 1.0889732837677002, + "learning_rate": 0.0010301270896462893, + "loss": 1.4841, + "step": 3644 + }, + { + "epoch": 0.38449367088607594, + "grad_norm": 0.7599901556968689, + "learning_rate": 0.0010298941837613913, + "loss": 1.4932, + "step": 3645 + }, + { + "epoch": 0.38459915611814344, + "grad_norm": 1.2003995180130005, + "learning_rate": 0.0010296612465129542, + "loss": 1.4602, + "step": 3646 + }, + { + "epoch": 0.384704641350211, + "grad_norm": 0.7131286263465881, + "learning_rate": 0.0010294282779270802, + "loss": 1.4974, + "step": 3647 + }, + { + "epoch": 0.3848101265822785, + "grad_norm": 0.9024287462234497, + "learning_rate": 0.001029195278029874, + "loss": 1.5254, + "step": 3648 + }, + { + "epoch": 0.38491561181434597, + "grad_norm": 0.6533273458480835, + "learning_rate": 0.0010289622468474448, + "loss": 1.5131, + "step": 3649 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.893247663974762, + "learning_rate": 0.001028729184405905, + "loss": 1.5014, + "step": 3650 + }, + { + "epoch": 0.385126582278481, + "grad_norm": 0.7084142565727234, + "learning_rate": 0.00102849609073137, + "loss": 1.5078, + "step": 3651 + }, + { + "epoch": 0.3852320675105485, + "grad_norm": 0.7489818930625916, + "learning_rate": 0.0010282629658499593, + "loss": 1.4774, + "step": 3652 + }, + { + "epoch": 0.38533755274261605, + "grad_norm": 0.7552056312561035, + "learning_rate": 0.001028029809787796, + "loss": 1.4745, + "step": 3653 + }, + { + "epoch": 0.38544303797468354, + "grad_norm": 0.8990428447723389, + "learning_rate": 0.001027796622571006, + "loss": 1.5164, + "step": 3654 + }, + { + "epoch": 0.38554852320675104, + "grad_norm": 0.8525082468986511, + "learning_rate": 0.001027563404225719, + "loss": 1.4917, + "step": 3655 + }, + { + "epoch": 0.3856540084388186, + "grad_norm": 0.7620163559913635, + "learning_rate": 0.0010273301547780687, + "loss": 1.4782, + "step": 3656 + }, + { + "epoch": 0.3857594936708861, + "grad_norm": 0.7220757007598877, + "learning_rate": 0.0010270968742541917, + "loss": 1.4583, + "step": 3657 + }, + { + "epoch": 0.38586497890295357, + "grad_norm": 0.7745612859725952, + "learning_rate": 0.0010268635626802282, + "loss": 1.4893, + "step": 3658 + }, + { + "epoch": 0.3859704641350211, + "grad_norm": 0.7665947675704956, + "learning_rate": 0.001026630220082322, + "loss": 1.5045, + "step": 3659 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.8088875412940979, + "learning_rate": 0.0010263968464866201, + "loss": 1.4768, + "step": 3660 + }, + { + "epoch": 0.3861814345991561, + "grad_norm": 0.7503980994224548, + "learning_rate": 0.0010261634419192732, + "loss": 1.4982, + "step": 3661 + }, + { + "epoch": 0.38628691983122365, + "grad_norm": 0.8620875477790833, + "learning_rate": 0.001025930006406436, + "loss": 1.4786, + "step": 3662 + }, + { + "epoch": 0.38639240506329114, + "grad_norm": 0.8860164880752563, + "learning_rate": 0.0010256965399742652, + "loss": 1.4737, + "step": 3663 + }, + { + "epoch": 0.38649789029535864, + "grad_norm": 0.9484544992446899, + "learning_rate": 0.0010254630426489225, + "loss": 1.4679, + "step": 3664 + }, + { + "epoch": 0.3866033755274262, + "grad_norm": 1.2252897024154663, + "learning_rate": 0.0010252295144565725, + "loss": 1.4738, + "step": 3665 + }, + { + "epoch": 0.3867088607594937, + "grad_norm": 0.9537423849105835, + "learning_rate": 0.0010249959554233827, + "loss": 1.4855, + "step": 3666 + }, + { + "epoch": 0.38681434599156117, + "grad_norm": 1.3456639051437378, + "learning_rate": 0.001024762365575525, + "loss": 1.5089, + "step": 3667 + }, + { + "epoch": 0.3869198312236287, + "grad_norm": 1.1616673469543457, + "learning_rate": 0.001024528744939174, + "loss": 1.5042, + "step": 3668 + }, + { + "epoch": 0.3870253164556962, + "grad_norm": 1.544301986694336, + "learning_rate": 0.0010242950935405084, + "loss": 1.4807, + "step": 3669 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 1.6884047985076904, + "learning_rate": 0.0010240614114057098, + "loss": 1.4777, + "step": 3670 + }, + { + "epoch": 0.3872362869198312, + "grad_norm": 0.9825799465179443, + "learning_rate": 0.0010238276985609631, + "loss": 1.4944, + "step": 3671 + }, + { + "epoch": 0.38734177215189874, + "grad_norm": 1.0171315670013428, + "learning_rate": 0.0010235939550324576, + "loss": 1.4876, + "step": 3672 + }, + { + "epoch": 0.38744725738396624, + "grad_norm": 0.8267747163772583, + "learning_rate": 0.0010233601808463852, + "loss": 1.4829, + "step": 3673 + }, + { + "epoch": 0.38755274261603373, + "grad_norm": 0.6913849115371704, + "learning_rate": 0.0010231263760289416, + "loss": 1.4806, + "step": 3674 + }, + { + "epoch": 0.3876582278481013, + "grad_norm": 0.7318279147148132, + "learning_rate": 0.0010228925406063254, + "loss": 1.462, + "step": 3675 + }, + { + "epoch": 0.38776371308016877, + "grad_norm": 0.6507459878921509, + "learning_rate": 0.0010226586746047393, + "loss": 1.4613, + "step": 3676 + }, + { + "epoch": 0.38786919831223626, + "grad_norm": 0.7514415979385376, + "learning_rate": 0.0010224247780503892, + "loss": 1.5003, + "step": 3677 + }, + { + "epoch": 0.3879746835443038, + "grad_norm": 0.628184974193573, + "learning_rate": 0.0010221908509694842, + "loss": 1.497, + "step": 3678 + }, + { + "epoch": 0.3880801687763713, + "grad_norm": 0.7577600479125977, + "learning_rate": 0.0010219568933882372, + "loss": 1.5273, + "step": 3679 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 1.2198106050491333, + "learning_rate": 0.001021722905332864, + "loss": 1.4498, + "step": 3680 + }, + { + "epoch": 0.38829113924050634, + "grad_norm": 0.7651435732841492, + "learning_rate": 0.0010214888868295842, + "loss": 1.494, + "step": 3681 + }, + { + "epoch": 0.38839662447257384, + "grad_norm": 1.0210847854614258, + "learning_rate": 0.0010212548379046214, + "loss": 1.4816, + "step": 3682 + }, + { + "epoch": 0.38850210970464133, + "grad_norm": 0.7287131547927856, + "learning_rate": 0.001021020758584201, + "loss": 1.4992, + "step": 3683 + }, + { + "epoch": 0.3886075949367089, + "grad_norm": 1.0170162916183472, + "learning_rate": 0.0010207866488945532, + "loss": 1.5115, + "step": 3684 + }, + { + "epoch": 0.38871308016877637, + "grad_norm": 0.6949412226676941, + "learning_rate": 0.0010205525088619112, + "loss": 1.529, + "step": 3685 + }, + { + "epoch": 0.38881856540084386, + "grad_norm": 1.1324872970581055, + "learning_rate": 0.0010203183385125115, + "loss": 1.4766, + "step": 3686 + }, + { + "epoch": 0.3889240506329114, + "grad_norm": 0.6830289363861084, + "learning_rate": 0.001020084137872594, + "loss": 1.4666, + "step": 3687 + }, + { + "epoch": 0.3890295358649789, + "grad_norm": 0.9057130813598633, + "learning_rate": 0.0010198499069684023, + "loss": 1.4628, + "step": 3688 + }, + { + "epoch": 0.3891350210970464, + "grad_norm": 0.6813346743583679, + "learning_rate": 0.0010196156458261827, + "loss": 1.5073, + "step": 3689 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.8428582549095154, + "learning_rate": 0.0010193813544721855, + "loss": 1.484, + "step": 3690 + }, + { + "epoch": 0.38934599156118144, + "grad_norm": 0.66568523645401, + "learning_rate": 0.0010191470329326646, + "loss": 1.5543, + "step": 3691 + }, + { + "epoch": 0.38945147679324893, + "grad_norm": 1.0013045072555542, + "learning_rate": 0.0010189126812338765, + "loss": 1.484, + "step": 3692 + }, + { + "epoch": 0.3895569620253165, + "grad_norm": 0.6104904413223267, + "learning_rate": 0.0010186782994020811, + "loss": 1.4435, + "step": 3693 + }, + { + "epoch": 0.38966244725738397, + "grad_norm": 0.7430548667907715, + "learning_rate": 0.0010184438874635427, + "loss": 1.4692, + "step": 3694 + }, + { + "epoch": 0.38976793248945146, + "grad_norm": 0.6799267530441284, + "learning_rate": 0.0010182094454445282, + "loss": 1.5022, + "step": 3695 + }, + { + "epoch": 0.389873417721519, + "grad_norm": 0.804732620716095, + "learning_rate": 0.001017974973371308, + "loss": 1.4842, + "step": 3696 + }, + { + "epoch": 0.3899789029535865, + "grad_norm": 0.6919838190078735, + "learning_rate": 0.0010177404712701558, + "loss": 1.4969, + "step": 3697 + }, + { + "epoch": 0.390084388185654, + "grad_norm": 0.6669623851776123, + "learning_rate": 0.0010175059391673486, + "loss": 1.5237, + "step": 3698 + }, + { + "epoch": 0.39018987341772154, + "grad_norm": 0.658766508102417, + "learning_rate": 0.0010172713770891673, + "loss": 1.4515, + "step": 3699 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.688721239566803, + "learning_rate": 0.001017036785061895, + "loss": 1.5579, + "step": 3700 + }, + { + "epoch": 0.39040084388185653, + "grad_norm": 0.7694225907325745, + "learning_rate": 0.0010168021631118199, + "loss": 1.4989, + "step": 3701 + }, + { + "epoch": 0.3905063291139241, + "grad_norm": 0.6737179160118103, + "learning_rate": 0.0010165675112652314, + "loss": 1.4734, + "step": 3702 + }, + { + "epoch": 0.39061181434599157, + "grad_norm": 0.6778608560562134, + "learning_rate": 0.0010163328295484245, + "loss": 1.4554, + "step": 3703 + }, + { + "epoch": 0.39071729957805906, + "grad_norm": 0.9413993954658508, + "learning_rate": 0.001016098117987696, + "loss": 1.4972, + "step": 3704 + }, + { + "epoch": 0.39082278481012656, + "grad_norm": 0.8423580527305603, + "learning_rate": 0.0010158633766093462, + "loss": 1.4713, + "step": 3705 + }, + { + "epoch": 0.3909282700421941, + "grad_norm": 0.6924589276313782, + "learning_rate": 0.0010156286054396795, + "loss": 1.5002, + "step": 3706 + }, + { + "epoch": 0.3910337552742616, + "grad_norm": 0.6988875269889832, + "learning_rate": 0.001015393804505003, + "loss": 1.5368, + "step": 3707 + }, + { + "epoch": 0.3911392405063291, + "grad_norm": 0.6553449034690857, + "learning_rate": 0.0010151589738316275, + "loss": 1.4879, + "step": 3708 + }, + { + "epoch": 0.39124472573839664, + "grad_norm": 0.6547777652740479, + "learning_rate": 0.0010149241134458666, + "loss": 1.4836, + "step": 3709 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.7655200362205505, + "learning_rate": 0.0010146892233740376, + "loss": 1.4307, + "step": 3710 + }, + { + "epoch": 0.3914556962025316, + "grad_norm": 0.8614525198936462, + "learning_rate": 0.0010144543036424616, + "loss": 1.4459, + "step": 3711 + }, + { + "epoch": 0.39156118143459917, + "grad_norm": 0.6544397473335266, + "learning_rate": 0.001014219354277462, + "loss": 1.4388, + "step": 3712 + }, + { + "epoch": 0.39166666666666666, + "grad_norm": 0.6859782934188843, + "learning_rate": 0.0010139843753053663, + "loss": 1.4543, + "step": 3713 + }, + { + "epoch": 0.39177215189873416, + "grad_norm": 0.6344606876373291, + "learning_rate": 0.001013749366752505, + "loss": 1.4819, + "step": 3714 + }, + { + "epoch": 0.3918776371308017, + "grad_norm": 0.6227748394012451, + "learning_rate": 0.0010135143286452118, + "loss": 1.4649, + "step": 3715 + }, + { + "epoch": 0.3919831223628692, + "grad_norm": 0.7312073111534119, + "learning_rate": 0.0010132792610098244, + "loss": 1.4573, + "step": 3716 + }, + { + "epoch": 0.3920886075949367, + "grad_norm": 0.6171767115592957, + "learning_rate": 0.0010130441638726828, + "loss": 1.4929, + "step": 3717 + }, + { + "epoch": 0.39219409282700424, + "grad_norm": 0.6531344652175903, + "learning_rate": 0.001012809037260131, + "loss": 1.4324, + "step": 3718 + }, + { + "epoch": 0.39229957805907173, + "grad_norm": 0.737295925617218, + "learning_rate": 0.001012573881198516, + "loss": 1.4992, + "step": 3719 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.7094447016716003, + "learning_rate": 0.0010123386957141883, + "loss": 1.4688, + "step": 3720 + }, + { + "epoch": 0.39251054852320677, + "grad_norm": 0.6847347021102905, + "learning_rate": 0.0010121034808335018, + "loss": 1.4697, + "step": 3721 + }, + { + "epoch": 0.39261603375527426, + "grad_norm": 0.6429101228713989, + "learning_rate": 0.0010118682365828132, + "loss": 1.4856, + "step": 3722 + }, + { + "epoch": 0.39272151898734176, + "grad_norm": 1.0621747970581055, + "learning_rate": 0.0010116329629884827, + "loss": 1.4588, + "step": 3723 + }, + { + "epoch": 0.3928270042194093, + "grad_norm": 0.7693407535552979, + "learning_rate": 0.0010113976600768743, + "loss": 1.5012, + "step": 3724 + }, + { + "epoch": 0.3929324894514768, + "grad_norm": 1.268703579902649, + "learning_rate": 0.0010111623278743547, + "loss": 1.4662, + "step": 3725 + }, + { + "epoch": 0.3930379746835443, + "grad_norm": 0.7723185420036316, + "learning_rate": 0.001010926966407294, + "loss": 1.4936, + "step": 3726 + }, + { + "epoch": 0.39314345991561184, + "grad_norm": 1.1553159952163696, + "learning_rate": 0.0010106915757020654, + "loss": 1.4991, + "step": 3727 + }, + { + "epoch": 0.39324894514767933, + "grad_norm": 0.8166211843490601, + "learning_rate": 0.0010104561557850457, + "loss": 1.502, + "step": 3728 + }, + { + "epoch": 0.3933544303797468, + "grad_norm": 0.8311273455619812, + "learning_rate": 0.0010102207066826155, + "loss": 1.4931, + "step": 3729 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.8567454814910889, + "learning_rate": 0.0010099852284211573, + "loss": 1.4729, + "step": 3730 + }, + { + "epoch": 0.39356540084388186, + "grad_norm": 0.7462763786315918, + "learning_rate": 0.0010097497210270578, + "loss": 1.4807, + "step": 3731 + }, + { + "epoch": 0.39367088607594936, + "grad_norm": 0.8372796773910522, + "learning_rate": 0.0010095141845267066, + "loss": 1.5178, + "step": 3732 + }, + { + "epoch": 0.3937763713080169, + "grad_norm": 0.7347860336303711, + "learning_rate": 0.0010092786189464975, + "loss": 1.4803, + "step": 3733 + }, + { + "epoch": 0.3938818565400844, + "grad_norm": 0.7812328338623047, + "learning_rate": 0.0010090430243128259, + "loss": 1.4851, + "step": 3734 + }, + { + "epoch": 0.3939873417721519, + "grad_norm": 0.8147075772285461, + "learning_rate": 0.0010088074006520918, + "loss": 1.5027, + "step": 3735 + }, + { + "epoch": 0.39409282700421944, + "grad_norm": 0.8592190742492676, + "learning_rate": 0.0010085717479906978, + "loss": 1.4999, + "step": 3736 + }, + { + "epoch": 0.39419831223628693, + "grad_norm": 0.7433800101280212, + "learning_rate": 0.0010083360663550502, + "loss": 1.4546, + "step": 3737 + }, + { + "epoch": 0.3943037974683544, + "grad_norm": 0.9874478578567505, + "learning_rate": 0.0010081003557715583, + "loss": 1.4625, + "step": 3738 + }, + { + "epoch": 0.3944092827004219, + "grad_norm": 0.9399405121803284, + "learning_rate": 0.0010078646162666345, + "loss": 1.4441, + "step": 3739 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.6854374408721924, + "learning_rate": 0.0010076288478666944, + "loss": 1.5058, + "step": 3740 + }, + { + "epoch": 0.39462025316455696, + "grad_norm": 0.8383798003196716, + "learning_rate": 0.0010073930505981573, + "loss": 1.4979, + "step": 3741 + }, + { + "epoch": 0.39472573839662445, + "grad_norm": 0.6826254725456238, + "learning_rate": 0.0010071572244874456, + "loss": 1.5007, + "step": 3742 + }, + { + "epoch": 0.394831223628692, + "grad_norm": 0.6958991885185242, + "learning_rate": 0.0010069213695609845, + "loss": 1.4668, + "step": 3743 + }, + { + "epoch": 0.3949367088607595, + "grad_norm": 0.6661311388015747, + "learning_rate": 0.0010066854858452028, + "loss": 1.5301, + "step": 3744 + }, + { + "epoch": 0.395042194092827, + "grad_norm": 0.7488638162612915, + "learning_rate": 0.0010064495733665324, + "loss": 1.4543, + "step": 3745 + }, + { + "epoch": 0.39514767932489453, + "grad_norm": 0.8240493535995483, + "learning_rate": 0.0010062136321514084, + "loss": 1.4924, + "step": 3746 + }, + { + "epoch": 0.395253164556962, + "grad_norm": 0.9695510268211365, + "learning_rate": 0.0010059776622262698, + "loss": 1.5262, + "step": 3747 + }, + { + "epoch": 0.3953586497890295, + "grad_norm": 0.7215347290039062, + "learning_rate": 0.0010057416636175575, + "loss": 1.4701, + "step": 3748 + }, + { + "epoch": 0.39546413502109706, + "grad_norm": 1.1495205163955688, + "learning_rate": 0.0010055056363517162, + "loss": 1.4614, + "step": 3749 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.7103577256202698, + "learning_rate": 0.0010052695804551946, + "loss": 1.4885, + "step": 3750 + }, + { + "epoch": 0.39567510548523205, + "grad_norm": 0.8315786719322205, + "learning_rate": 0.0010050334959544438, + "loss": 1.485, + "step": 3751 + }, + { + "epoch": 0.3957805907172996, + "grad_norm": 0.7113361954689026, + "learning_rate": 0.0010047973828759178, + "loss": 1.5078, + "step": 3752 + }, + { + "epoch": 0.3958860759493671, + "grad_norm": 0.7101591229438782, + "learning_rate": 0.0010045612412460747, + "loss": 1.4953, + "step": 3753 + }, + { + "epoch": 0.3959915611814346, + "grad_norm": 0.7272844910621643, + "learning_rate": 0.0010043250710913747, + "loss": 1.4823, + "step": 3754 + }, + { + "epoch": 0.39609704641350213, + "grad_norm": 0.6862738132476807, + "learning_rate": 0.0010040888724382828, + "loss": 1.4767, + "step": 3755 + }, + { + "epoch": 0.3962025316455696, + "grad_norm": 0.682761013507843, + "learning_rate": 0.0010038526453132655, + "loss": 1.492, + "step": 3756 + }, + { + "epoch": 0.3963080168776371, + "grad_norm": 0.6871645450592041, + "learning_rate": 0.0010036163897427937, + "loss": 1.4811, + "step": 3757 + }, + { + "epoch": 0.39641350210970466, + "grad_norm": 0.6665424108505249, + "learning_rate": 0.0010033801057533404, + "loss": 1.4659, + "step": 3758 + }, + { + "epoch": 0.39651898734177216, + "grad_norm": 0.7342808246612549, + "learning_rate": 0.001003143793371383, + "loss": 1.4301, + "step": 3759 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.8061913847923279, + "learning_rate": 0.0010029074526234014, + "loss": 1.4631, + "step": 3760 + }, + { + "epoch": 0.3967299578059072, + "grad_norm": 0.7513694167137146, + "learning_rate": 0.0010026710835358786, + "loss": 1.5046, + "step": 3761 + }, + { + "epoch": 0.3968354430379747, + "grad_norm": 0.724391520023346, + "learning_rate": 0.0010024346861353007, + "loss": 1.4695, + "step": 3762 + }, + { + "epoch": 0.3969409282700422, + "grad_norm": 0.6555619239807129, + "learning_rate": 0.0010021982604481575, + "loss": 1.4739, + "step": 3763 + }, + { + "epoch": 0.39704641350210973, + "grad_norm": 0.6746649742126465, + "learning_rate": 0.001001961806500942, + "loss": 1.4637, + "step": 3764 + }, + { + "epoch": 0.3971518987341772, + "grad_norm": 0.7220439314842224, + "learning_rate": 0.0010017253243201495, + "loss": 1.4814, + "step": 3765 + }, + { + "epoch": 0.3972573839662447, + "grad_norm": 0.752842903137207, + "learning_rate": 0.0010014888139322792, + "loss": 1.4826, + "step": 3766 + }, + { + "epoch": 0.39736286919831226, + "grad_norm": 0.6449989676475525, + "learning_rate": 0.001001252275363833, + "loss": 1.5246, + "step": 3767 + }, + { + "epoch": 0.39746835443037976, + "grad_norm": 0.6382114887237549, + "learning_rate": 0.0010010157086413167, + "loss": 1.5356, + "step": 3768 + }, + { + "epoch": 0.39757383966244725, + "grad_norm": 0.6729283332824707, + "learning_rate": 0.0010007791137912386, + "loss": 1.4847, + "step": 3769 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.6164910197257996, + "learning_rate": 0.0010005424908401104, + "loss": 1.4812, + "step": 3770 + }, + { + "epoch": 0.3977848101265823, + "grad_norm": 0.6806765794754028, + "learning_rate": 0.0010003058398144464, + "loss": 1.4469, + "step": 3771 + }, + { + "epoch": 0.3978902953586498, + "grad_norm": 0.6527426242828369, + "learning_rate": 0.0010000691607407652, + "loss": 1.4852, + "step": 3772 + }, + { + "epoch": 0.3979957805907173, + "grad_norm": 0.6424415111541748, + "learning_rate": 0.0009998324536455877, + "loss": 1.5014, + "step": 3773 + }, + { + "epoch": 0.3981012658227848, + "grad_norm": 0.7710249423980713, + "learning_rate": 0.0009995957185554378, + "loss": 1.5014, + "step": 3774 + }, + { + "epoch": 0.3982067510548523, + "grad_norm": 0.7662692666053772, + "learning_rate": 0.000999358955496843, + "loss": 1.4711, + "step": 3775 + }, + { + "epoch": 0.3983122362869198, + "grad_norm": 0.6815635561943054, + "learning_rate": 0.000999122164496334, + "loss": 1.4705, + "step": 3776 + }, + { + "epoch": 0.39841772151898736, + "grad_norm": 0.7684950232505798, + "learning_rate": 0.0009988853455804442, + "loss": 1.4559, + "step": 3777 + }, + { + "epoch": 0.39852320675105485, + "grad_norm": 0.6819830536842346, + "learning_rate": 0.0009986484987757102, + "loss": 1.4836, + "step": 3778 + }, + { + "epoch": 0.39862869198312234, + "grad_norm": 0.676868200302124, + "learning_rate": 0.0009984116241086723, + "loss": 1.4829, + "step": 3779 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.6312851905822754, + "learning_rate": 0.0009981747216058728, + "loss": 1.4599, + "step": 3780 + }, + { + "epoch": 0.3988396624472574, + "grad_norm": 0.6512326002120972, + "learning_rate": 0.0009979377912938587, + "loss": 1.4551, + "step": 3781 + }, + { + "epoch": 0.3989451476793249, + "grad_norm": 0.6643367409706116, + "learning_rate": 0.0009977008331991785, + "loss": 1.4702, + "step": 3782 + }, + { + "epoch": 0.3990506329113924, + "grad_norm": 0.6381166577339172, + "learning_rate": 0.000997463847348385, + "loss": 1.5037, + "step": 3783 + }, + { + "epoch": 0.3991561181434599, + "grad_norm": 0.6663507223129272, + "learning_rate": 0.000997226833768033, + "loss": 1.4488, + "step": 3784 + }, + { + "epoch": 0.3992616033755274, + "grad_norm": 0.7067028880119324, + "learning_rate": 0.0009969897924846818, + "loss": 1.5155, + "step": 3785 + }, + { + "epoch": 0.39936708860759496, + "grad_norm": 0.678489089012146, + "learning_rate": 0.0009967527235248928, + "loss": 1.4993, + "step": 3786 + }, + { + "epoch": 0.39947257383966245, + "grad_norm": 0.8612210750579834, + "learning_rate": 0.0009965156269152308, + "loss": 1.4498, + "step": 3787 + }, + { + "epoch": 0.39957805907172994, + "grad_norm": 0.7572685480117798, + "learning_rate": 0.0009962785026822632, + "loss": 1.5064, + "step": 3788 + }, + { + "epoch": 0.3996835443037975, + "grad_norm": 0.7013511061668396, + "learning_rate": 0.0009960413508525617, + "loss": 1.481, + "step": 3789 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.9028255343437195, + "learning_rate": 0.0009958041714526998, + "loss": 1.479, + "step": 3790 + }, + { + "epoch": 0.3998945147679325, + "grad_norm": 1.0023910999298096, + "learning_rate": 0.0009955669645092546, + "loss": 1.4757, + "step": 3791 + }, + { + "epoch": 0.4, + "grad_norm": 0.7519499659538269, + "learning_rate": 0.0009953297300488069, + "loss": 1.4529, + "step": 3792 + }, + { + "epoch": 0.4001054852320675, + "grad_norm": 1.1237328052520752, + "learning_rate": 0.0009950924680979393, + "loss": 1.5222, + "step": 3793 + }, + { + "epoch": 0.400210970464135, + "grad_norm": 0.724747896194458, + "learning_rate": 0.0009948551786832386, + "loss": 1.4962, + "step": 3794 + }, + { + "epoch": 0.40031645569620256, + "grad_norm": 0.8828396201133728, + "learning_rate": 0.0009946178618312942, + "loss": 1.4938, + "step": 3795 + }, + { + "epoch": 0.40042194092827005, + "grad_norm": 0.7572329044342041, + "learning_rate": 0.0009943805175686986, + "loss": 1.5237, + "step": 3796 + }, + { + "epoch": 0.40052742616033754, + "grad_norm": 0.9363591074943542, + "learning_rate": 0.0009941431459220475, + "loss": 1.4692, + "step": 3797 + }, + { + "epoch": 0.4006329113924051, + "grad_norm": 0.74767005443573, + "learning_rate": 0.0009939057469179394, + "loss": 1.5167, + "step": 3798 + }, + { + "epoch": 0.4007383966244726, + "grad_norm": 0.7900565266609192, + "learning_rate": 0.0009936683205829762, + "loss": 1.4454, + "step": 3799 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.7825347185134888, + "learning_rate": 0.0009934308669437627, + "loss": 1.4797, + "step": 3800 + }, + { + "epoch": 0.4009493670886076, + "grad_norm": 0.6414241194725037, + "learning_rate": 0.0009931933860269063, + "loss": 1.4338, + "step": 3801 + }, + { + "epoch": 0.4010548523206751, + "grad_norm": 0.7734975814819336, + "learning_rate": 0.0009929558778590188, + "loss": 1.4604, + "step": 3802 + }, + { + "epoch": 0.4011603375527426, + "grad_norm": 1.0803254842758179, + "learning_rate": 0.0009927183424667135, + "loss": 1.5251, + "step": 3803 + }, + { + "epoch": 0.4012658227848101, + "grad_norm": 0.7361491322517395, + "learning_rate": 0.0009924807798766077, + "loss": 1.4987, + "step": 3804 + }, + { + "epoch": 0.40137130801687765, + "grad_norm": 1.0490858554840088, + "learning_rate": 0.0009922431901153213, + "loss": 1.438, + "step": 3805 + }, + { + "epoch": 0.40147679324894514, + "grad_norm": 0.7140102982521057, + "learning_rate": 0.0009920055732094775, + "loss": 1.4316, + "step": 3806 + }, + { + "epoch": 0.40158227848101263, + "grad_norm": 0.8132771849632263, + "learning_rate": 0.0009917679291857027, + "loss": 1.4615, + "step": 3807 + }, + { + "epoch": 0.4016877637130802, + "grad_norm": 0.654556930065155, + "learning_rate": 0.0009915302580706256, + "loss": 1.4749, + "step": 3808 + }, + { + "epoch": 0.4017932489451477, + "grad_norm": 0.8370399475097656, + "learning_rate": 0.0009912925598908788, + "loss": 1.497, + "step": 3809 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.6490349769592285, + "learning_rate": 0.0009910548346730972, + "loss": 1.5076, + "step": 3810 + }, + { + "epoch": 0.4020042194092827, + "grad_norm": 0.9519945979118347, + "learning_rate": 0.00099081708244392, + "loss": 1.4772, + "step": 3811 + }, + { + "epoch": 0.4021097046413502, + "grad_norm": 0.7156282067298889, + "learning_rate": 0.0009905793032299875, + "loss": 1.4515, + "step": 3812 + }, + { + "epoch": 0.4022151898734177, + "grad_norm": 0.6954716444015503, + "learning_rate": 0.0009903414970579443, + "loss": 1.4514, + "step": 3813 + }, + { + "epoch": 0.40232067510548525, + "grad_norm": 0.6945379972457886, + "learning_rate": 0.000990103663954438, + "loss": 1.4756, + "step": 3814 + }, + { + "epoch": 0.40242616033755274, + "grad_norm": 0.6681906580924988, + "learning_rate": 0.000989865803946119, + "loss": 1.4928, + "step": 3815 + }, + { + "epoch": 0.40253164556962023, + "grad_norm": 0.6251301169395447, + "learning_rate": 0.0009896279170596406, + "loss": 1.4625, + "step": 3816 + }, + { + "epoch": 0.4026371308016878, + "grad_norm": 0.6422755122184753, + "learning_rate": 0.0009893900033216593, + "loss": 1.4469, + "step": 3817 + }, + { + "epoch": 0.4027426160337553, + "grad_norm": 0.6634401082992554, + "learning_rate": 0.0009891520627588342, + "loss": 1.4705, + "step": 3818 + }, + { + "epoch": 0.40284810126582277, + "grad_norm": 0.6181023120880127, + "learning_rate": 0.000988914095397828, + "loss": 1.4913, + "step": 3819 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.7063077092170715, + "learning_rate": 0.0009886761012653062, + "loss": 1.4498, + "step": 3820 + }, + { + "epoch": 0.4030590717299578, + "grad_norm": 0.7413365244865417, + "learning_rate": 0.000988438080387937, + "loss": 1.4737, + "step": 3821 + }, + { + "epoch": 0.4031645569620253, + "grad_norm": 0.6518958806991577, + "learning_rate": 0.000988200032792392, + "loss": 1.4231, + "step": 3822 + }, + { + "epoch": 0.40327004219409285, + "grad_norm": 0.7187268137931824, + "learning_rate": 0.0009879619585053455, + "loss": 1.4746, + "step": 3823 + }, + { + "epoch": 0.40337552742616034, + "grad_norm": 0.6765727996826172, + "learning_rate": 0.0009877238575534749, + "loss": 1.47, + "step": 3824 + }, + { + "epoch": 0.40348101265822783, + "grad_norm": 0.7324607968330383, + "learning_rate": 0.0009874857299634605, + "loss": 1.4846, + "step": 3825 + }, + { + "epoch": 0.4035864978902954, + "grad_norm": 0.6874995231628418, + "learning_rate": 0.0009872475757619862, + "loss": 1.5006, + "step": 3826 + }, + { + "epoch": 0.4036919831223629, + "grad_norm": 0.7029670476913452, + "learning_rate": 0.000987009394975738, + "loss": 1.4855, + "step": 3827 + }, + { + "epoch": 0.40379746835443037, + "grad_norm": 0.7083728909492493, + "learning_rate": 0.0009867711876314052, + "loss": 1.4511, + "step": 3828 + }, + { + "epoch": 0.4039029535864979, + "grad_norm": 0.8456974029541016, + "learning_rate": 0.00098653295375568, + "loss": 1.4589, + "step": 3829 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.6183338761329651, + "learning_rate": 0.000986294693375258, + "loss": 1.4459, + "step": 3830 + }, + { + "epoch": 0.4041139240506329, + "grad_norm": 0.773489773273468, + "learning_rate": 0.0009860564065168375, + "loss": 1.4849, + "step": 3831 + }, + { + "epoch": 0.40421940928270045, + "grad_norm": 0.6694275140762329, + "learning_rate": 0.0009858180932071192, + "loss": 1.496, + "step": 3832 + }, + { + "epoch": 0.40432489451476794, + "grad_norm": 0.8816345930099487, + "learning_rate": 0.000985579753472808, + "loss": 1.475, + "step": 3833 + }, + { + "epoch": 0.40443037974683543, + "grad_norm": 0.8109209537506104, + "learning_rate": 0.0009853413873406104, + "loss": 1.4663, + "step": 3834 + }, + { + "epoch": 0.4045358649789029, + "grad_norm": 0.7513608336448669, + "learning_rate": 0.000985102994837237, + "loss": 1.465, + "step": 3835 + }, + { + "epoch": 0.4046413502109705, + "grad_norm": 0.6846289038658142, + "learning_rate": 0.0009848645759894005, + "loss": 1.4887, + "step": 3836 + }, + { + "epoch": 0.40474683544303797, + "grad_norm": 0.8060891032218933, + "learning_rate": 0.0009846261308238177, + "loss": 1.451, + "step": 3837 + }, + { + "epoch": 0.40485232067510546, + "grad_norm": 0.8741810321807861, + "learning_rate": 0.0009843876593672064, + "loss": 1.4765, + "step": 3838 + }, + { + "epoch": 0.404957805907173, + "grad_norm": 0.6430941820144653, + "learning_rate": 0.0009841491616462892, + "loss": 1.5301, + "step": 3839 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.6915519833564758, + "learning_rate": 0.000983910637687791, + "loss": 1.5173, + "step": 3840 + }, + { + "epoch": 0.405168776371308, + "grad_norm": 0.6402732133865356, + "learning_rate": 0.0009836720875184394, + "loss": 1.4992, + "step": 3841 + }, + { + "epoch": 0.40527426160337554, + "grad_norm": 0.6798478960990906, + "learning_rate": 0.0009834335111649655, + "loss": 1.4587, + "step": 3842 + }, + { + "epoch": 0.40537974683544303, + "grad_norm": 0.6227434873580933, + "learning_rate": 0.0009831949086541024, + "loss": 1.4664, + "step": 3843 + }, + { + "epoch": 0.4054852320675105, + "grad_norm": 0.7186411619186401, + "learning_rate": 0.0009829562800125868, + "loss": 1.4645, + "step": 3844 + }, + { + "epoch": 0.4055907172995781, + "grad_norm": 0.8084548115730286, + "learning_rate": 0.0009827176252671587, + "loss": 1.4906, + "step": 3845 + }, + { + "epoch": 0.40569620253164557, + "grad_norm": 0.67000412940979, + "learning_rate": 0.0009824789444445603, + "loss": 1.4565, + "step": 3846 + }, + { + "epoch": 0.40580168776371306, + "grad_norm": 0.7546884417533875, + "learning_rate": 0.0009822402375715366, + "loss": 1.448, + "step": 3847 + }, + { + "epoch": 0.4059071729957806, + "grad_norm": 0.6475914716720581, + "learning_rate": 0.0009820015046748366, + "loss": 1.4721, + "step": 3848 + }, + { + "epoch": 0.4060126582278481, + "grad_norm": 0.7942495942115784, + "learning_rate": 0.0009817627457812106, + "loss": 1.4638, + "step": 3849 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.6926468014717102, + "learning_rate": 0.0009815239609174138, + "loss": 1.4655, + "step": 3850 + }, + { + "epoch": 0.40622362869198314, + "grad_norm": 0.6596444249153137, + "learning_rate": 0.0009812851501102024, + "loss": 1.5223, + "step": 3851 + }, + { + "epoch": 0.40632911392405063, + "grad_norm": 0.6937089562416077, + "learning_rate": 0.0009810463133863368, + "loss": 1.4881, + "step": 3852 + }, + { + "epoch": 0.4064345991561181, + "grad_norm": 0.6256614327430725, + "learning_rate": 0.0009808074507725794, + "loss": 1.4933, + "step": 3853 + }, + { + "epoch": 0.4065400843881857, + "grad_norm": 0.639835774898529, + "learning_rate": 0.0009805685622956966, + "loss": 1.4891, + "step": 3854 + }, + { + "epoch": 0.40664556962025317, + "grad_norm": 0.6494413614273071, + "learning_rate": 0.0009803296479824564, + "loss": 1.474, + "step": 3855 + }, + { + "epoch": 0.40675105485232066, + "grad_norm": 0.6521511077880859, + "learning_rate": 0.0009800907078596308, + "loss": 1.4934, + "step": 3856 + }, + { + "epoch": 0.4068565400843882, + "grad_norm": 0.8302261233329773, + "learning_rate": 0.000979851741953994, + "loss": 1.4752, + "step": 3857 + }, + { + "epoch": 0.4069620253164557, + "grad_norm": 0.8179552555084229, + "learning_rate": 0.0009796127502923232, + "loss": 1.4763, + "step": 3858 + }, + { + "epoch": 0.4070675105485232, + "grad_norm": 0.6482041478157043, + "learning_rate": 0.000979373732901399, + "loss": 1.4526, + "step": 3859 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.8446061611175537, + "learning_rate": 0.0009791346898080043, + "loss": 1.4626, + "step": 3860 + }, + { + "epoch": 0.40727848101265823, + "grad_norm": 0.6257843375205994, + "learning_rate": 0.000978895621038925, + "loss": 1.4601, + "step": 3861 + }, + { + "epoch": 0.4073839662447257, + "grad_norm": 0.6918672919273376, + "learning_rate": 0.0009786565266209496, + "loss": 1.4717, + "step": 3862 + }, + { + "epoch": 0.4074894514767933, + "grad_norm": 0.6276994347572327, + "learning_rate": 0.0009784174065808706, + "loss": 1.4871, + "step": 3863 + }, + { + "epoch": 0.40759493670886077, + "grad_norm": 0.9420344233512878, + "learning_rate": 0.0009781782609454821, + "loss": 1.4415, + "step": 3864 + }, + { + "epoch": 0.40770042194092826, + "grad_norm": 1.147099494934082, + "learning_rate": 0.000977939089741582, + "loss": 1.4485, + "step": 3865 + }, + { + "epoch": 0.4078059071729958, + "grad_norm": 0.640975832939148, + "learning_rate": 0.0009776998929959695, + "loss": 1.5071, + "step": 3866 + }, + { + "epoch": 0.4079113924050633, + "grad_norm": 0.8154355883598328, + "learning_rate": 0.0009774606707354493, + "loss": 1.4577, + "step": 3867 + }, + { + "epoch": 0.4080168776371308, + "grad_norm": 0.6199520230293274, + "learning_rate": 0.0009772214229868265, + "loss": 1.4535, + "step": 3868 + }, + { + "epoch": 0.4081223628691983, + "grad_norm": 0.6663382053375244, + "learning_rate": 0.0009769821497769102, + "loss": 1.5055, + "step": 3869 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.6770683526992798, + "learning_rate": 0.0009767428511325122, + "loss": 1.4916, + "step": 3870 + }, + { + "epoch": 0.4083333333333333, + "grad_norm": 0.7476838827133179, + "learning_rate": 0.000976503527080447, + "loss": 1.4509, + "step": 3871 + }, + { + "epoch": 0.4084388185654008, + "grad_norm": 0.6327685117721558, + "learning_rate": 0.0009762641776475322, + "loss": 1.5127, + "step": 3872 + }, + { + "epoch": 0.40854430379746837, + "grad_norm": 0.8032517433166504, + "learning_rate": 0.0009760248028605882, + "loss": 1.4718, + "step": 3873 + }, + { + "epoch": 0.40864978902953586, + "grad_norm": 0.6586158275604248, + "learning_rate": 0.0009757854027464377, + "loss": 1.4812, + "step": 3874 + }, + { + "epoch": 0.40875527426160335, + "grad_norm": 0.6707819700241089, + "learning_rate": 0.000975545977331907, + "loss": 1.4854, + "step": 3875 + }, + { + "epoch": 0.4088607594936709, + "grad_norm": 0.6394398212432861, + "learning_rate": 0.0009753065266438249, + "loss": 1.4424, + "step": 3876 + }, + { + "epoch": 0.4089662447257384, + "grad_norm": 0.6548624634742737, + "learning_rate": 0.0009750670507090233, + "loss": 1.4355, + "step": 3877 + }, + { + "epoch": 0.4090717299578059, + "grad_norm": 0.8138777017593384, + "learning_rate": 0.000974827549554336, + "loss": 1.4392, + "step": 3878 + }, + { + "epoch": 0.40917721518987343, + "grad_norm": 0.6025513410568237, + "learning_rate": 0.0009745880232066007, + "loss": 1.4827, + "step": 3879 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.7339764833450317, + "learning_rate": 0.0009743484716926576, + "loss": 1.4389, + "step": 3880 + }, + { + "epoch": 0.4093881856540084, + "grad_norm": 0.7989233136177063, + "learning_rate": 0.0009741088950393497, + "loss": 1.4819, + "step": 3881 + }, + { + "epoch": 0.40949367088607597, + "grad_norm": 0.7080419659614563, + "learning_rate": 0.0009738692932735225, + "loss": 1.4651, + "step": 3882 + }, + { + "epoch": 0.40959915611814346, + "grad_norm": 1.039488434791565, + "learning_rate": 0.0009736296664220247, + "loss": 1.4624, + "step": 3883 + }, + { + "epoch": 0.40970464135021095, + "grad_norm": 0.7374864220619202, + "learning_rate": 0.0009733900145117075, + "loss": 1.451, + "step": 3884 + }, + { + "epoch": 0.4098101265822785, + "grad_norm": 0.6798710227012634, + "learning_rate": 0.0009731503375694253, + "loss": 1.4511, + "step": 3885 + }, + { + "epoch": 0.409915611814346, + "grad_norm": 0.6993646621704102, + "learning_rate": 0.0009729106356220352, + "loss": 1.4761, + "step": 3886 + }, + { + "epoch": 0.4100210970464135, + "grad_norm": 0.6379567384719849, + "learning_rate": 0.0009726709086963967, + "loss": 1.4655, + "step": 3887 + }, + { + "epoch": 0.41012658227848103, + "grad_norm": 0.8021030426025391, + "learning_rate": 0.0009724311568193726, + "loss": 1.4915, + "step": 3888 + }, + { + "epoch": 0.4102320675105485, + "grad_norm": 0.6681240200996399, + "learning_rate": 0.0009721913800178281, + "loss": 1.4479, + "step": 3889 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.6585062146186829, + "learning_rate": 0.0009719515783186319, + "loss": 1.4385, + "step": 3890 + }, + { + "epoch": 0.41044303797468357, + "grad_norm": 0.6606306433677673, + "learning_rate": 0.0009717117517486543, + "loss": 1.4607, + "step": 3891 + }, + { + "epoch": 0.41054852320675106, + "grad_norm": 0.6180629134178162, + "learning_rate": 0.0009714719003347693, + "loss": 1.5056, + "step": 3892 + }, + { + "epoch": 0.41065400843881855, + "grad_norm": 0.6898514032363892, + "learning_rate": 0.0009712320241038537, + "loss": 1.4881, + "step": 3893 + }, + { + "epoch": 0.4107594936708861, + "grad_norm": 0.6244657039642334, + "learning_rate": 0.0009709921230827865, + "loss": 1.4666, + "step": 3894 + }, + { + "epoch": 0.4108649789029536, + "grad_norm": 0.6684898138046265, + "learning_rate": 0.00097075219729845, + "loss": 1.4628, + "step": 3895 + }, + { + "epoch": 0.4109704641350211, + "grad_norm": 0.7590639591217041, + "learning_rate": 0.0009705122467777292, + "loss": 1.4277, + "step": 3896 + }, + { + "epoch": 0.41107594936708863, + "grad_norm": 0.6058562994003296, + "learning_rate": 0.0009702722715475113, + "loss": 1.4534, + "step": 3897 + }, + { + "epoch": 0.4111814345991561, + "grad_norm": 0.7689893841743469, + "learning_rate": 0.000970032271634687, + "loss": 1.4621, + "step": 3898 + }, + { + "epoch": 0.4112869198312236, + "grad_norm": 0.8390997052192688, + "learning_rate": 0.0009697922470661497, + "loss": 1.4371, + "step": 3899 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.9440376162528992, + "learning_rate": 0.0009695521978687951, + "loss": 1.4886, + "step": 3900 + }, + { + "epoch": 0.41149789029535866, + "grad_norm": 0.6789002418518066, + "learning_rate": 0.0009693121240695216, + "loss": 1.4514, + "step": 3901 + }, + { + "epoch": 0.41160337552742615, + "grad_norm": 0.8525362014770508, + "learning_rate": 0.0009690720256952314, + "loss": 1.51, + "step": 3902 + }, + { + "epoch": 0.41170886075949364, + "grad_norm": 0.6855630874633789, + "learning_rate": 0.0009688319027728282, + "loss": 1.465, + "step": 3903 + }, + { + "epoch": 0.4118143459915612, + "grad_norm": 1.0394352674484253, + "learning_rate": 0.0009685917553292192, + "loss": 1.4501, + "step": 3904 + }, + { + "epoch": 0.4119198312236287, + "grad_norm": 0.9471652507781982, + "learning_rate": 0.0009683515833913137, + "loss": 1.4598, + "step": 3905 + }, + { + "epoch": 0.4120253164556962, + "grad_norm": 0.7070785760879517, + "learning_rate": 0.0009681113869860247, + "loss": 1.4729, + "step": 3906 + }, + { + "epoch": 0.4121308016877637, + "grad_norm": 1.0105736255645752, + "learning_rate": 0.0009678711661402672, + "loss": 1.4339, + "step": 3907 + }, + { + "epoch": 0.4122362869198312, + "grad_norm": 0.6375705003738403, + "learning_rate": 0.0009676309208809592, + "loss": 1.4629, + "step": 3908 + }, + { + "epoch": 0.4123417721518987, + "grad_norm": 1.0421210527420044, + "learning_rate": 0.0009673906512350213, + "loss": 1.4593, + "step": 3909 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.6591453552246094, + "learning_rate": 0.0009671503572293767, + "loss": 1.5055, + "step": 3910 + }, + { + "epoch": 0.41255274261603375, + "grad_norm": 1.0322856903076172, + "learning_rate": 0.000966910038890952, + "loss": 1.4717, + "step": 3911 + }, + { + "epoch": 0.41265822784810124, + "grad_norm": 0.668004035949707, + "learning_rate": 0.0009666696962466757, + "loss": 1.5129, + "step": 3912 + }, + { + "epoch": 0.4127637130801688, + "grad_norm": 0.9528229236602783, + "learning_rate": 0.0009664293293234795, + "loss": 1.4708, + "step": 3913 + }, + { + "epoch": 0.4128691983122363, + "grad_norm": 0.922210156917572, + "learning_rate": 0.0009661889381482977, + "loss": 1.4058, + "step": 3914 + }, + { + "epoch": 0.4129746835443038, + "grad_norm": 0.6043683290481567, + "learning_rate": 0.0009659485227480676, + "loss": 1.4471, + "step": 3915 + }, + { + "epoch": 0.4130801687763713, + "grad_norm": 0.742496907711029, + "learning_rate": 0.0009657080831497284, + "loss": 1.46, + "step": 3916 + }, + { + "epoch": 0.4131856540084388, + "grad_norm": 0.6682368516921997, + "learning_rate": 0.0009654676193802232, + "loss": 1.4342, + "step": 3917 + }, + { + "epoch": 0.4132911392405063, + "grad_norm": 0.7673653364181519, + "learning_rate": 0.0009652271314664966, + "loss": 1.4839, + "step": 3918 + }, + { + "epoch": 0.41339662447257386, + "grad_norm": 0.7623026371002197, + "learning_rate": 0.0009649866194354967, + "loss": 1.4521, + "step": 3919 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.8757008910179138, + "learning_rate": 0.0009647460833141742, + "loss": 1.4826, + "step": 3920 + }, + { + "epoch": 0.41360759493670884, + "grad_norm": 0.6649569869041443, + "learning_rate": 0.0009645055231294823, + "loss": 1.4409, + "step": 3921 + }, + { + "epoch": 0.4137130801687764, + "grad_norm": 0.679530143737793, + "learning_rate": 0.0009642649389083768, + "loss": 1.5107, + "step": 3922 + }, + { + "epoch": 0.4138185654008439, + "grad_norm": 0.6614977717399597, + "learning_rate": 0.0009640243306778162, + "loss": 1.4847, + "step": 3923 + }, + { + "epoch": 0.4139240506329114, + "grad_norm": 0.6424683928489685, + "learning_rate": 0.0009637836984647627, + "loss": 1.4522, + "step": 3924 + }, + { + "epoch": 0.4140295358649789, + "grad_norm": 0.6627458930015564, + "learning_rate": 0.0009635430422961794, + "loss": 1.4195, + "step": 3925 + }, + { + "epoch": 0.4141350210970464, + "grad_norm": 0.6439464092254639, + "learning_rate": 0.0009633023621990334, + "loss": 1.4676, + "step": 3926 + }, + { + "epoch": 0.4142405063291139, + "grad_norm": 0.6393585205078125, + "learning_rate": 0.000963061658200294, + "loss": 1.4574, + "step": 3927 + }, + { + "epoch": 0.41434599156118146, + "grad_norm": 0.6728157997131348, + "learning_rate": 0.0009628209303269335, + "loss": 1.4703, + "step": 3928 + }, + { + "epoch": 0.41445147679324895, + "grad_norm": 0.7755478620529175, + "learning_rate": 0.0009625801786059267, + "loss": 1.4864, + "step": 3929 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.6306868195533752, + "learning_rate": 0.0009623394030642507, + "loss": 1.472, + "step": 3930 + }, + { + "epoch": 0.414662447257384, + "grad_norm": 0.753173291683197, + "learning_rate": 0.0009620986037288858, + "loss": 1.4429, + "step": 3931 + }, + { + "epoch": 0.4147679324894515, + "grad_norm": 0.6406272649765015, + "learning_rate": 0.0009618577806268147, + "loss": 1.4487, + "step": 3932 + }, + { + "epoch": 0.414873417721519, + "grad_norm": 0.6980904936790466, + "learning_rate": 0.0009616169337850229, + "loss": 1.4565, + "step": 3933 + }, + { + "epoch": 0.41497890295358647, + "grad_norm": 0.683038055896759, + "learning_rate": 0.0009613760632304985, + "loss": 1.4768, + "step": 3934 + }, + { + "epoch": 0.415084388185654, + "grad_norm": 0.8219226002693176, + "learning_rate": 0.0009611351689902321, + "loss": 1.4792, + "step": 3935 + }, + { + "epoch": 0.4151898734177215, + "grad_norm": 0.6466397047042847, + "learning_rate": 0.000960894251091217, + "loss": 1.4522, + "step": 3936 + }, + { + "epoch": 0.415295358649789, + "grad_norm": 0.7947251796722412, + "learning_rate": 0.0009606533095604499, + "loss": 1.4984, + "step": 3937 + }, + { + "epoch": 0.41540084388185655, + "grad_norm": 0.6888821125030518, + "learning_rate": 0.0009604123444249288, + "loss": 1.4293, + "step": 3938 + }, + { + "epoch": 0.41550632911392404, + "grad_norm": 0.8416332006454468, + "learning_rate": 0.0009601713557116554, + "loss": 1.4812, + "step": 3939 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.884223997592926, + "learning_rate": 0.0009599303434476334, + "loss": 1.4761, + "step": 3940 + }, + { + "epoch": 0.4157172995780591, + "grad_norm": 0.7056865096092224, + "learning_rate": 0.0009596893076598698, + "loss": 1.4731, + "step": 3941 + }, + { + "epoch": 0.4158227848101266, + "grad_norm": 0.9602525234222412, + "learning_rate": 0.0009594482483753736, + "loss": 1.5034, + "step": 3942 + }, + { + "epoch": 0.41592827004219407, + "grad_norm": 0.645514726638794, + "learning_rate": 0.0009592071656211568, + "loss": 1.4658, + "step": 3943 + }, + { + "epoch": 0.4160337552742616, + "grad_norm": 1.06758451461792, + "learning_rate": 0.0009589660594242338, + "loss": 1.4848, + "step": 3944 + }, + { + "epoch": 0.4161392405063291, + "grad_norm": 0.6525558829307556, + "learning_rate": 0.0009587249298116219, + "loss": 1.4479, + "step": 3945 + }, + { + "epoch": 0.4162447257383966, + "grad_norm": 1.2863065004348755, + "learning_rate": 0.0009584837768103408, + "loss": 1.4361, + "step": 3946 + }, + { + "epoch": 0.41635021097046415, + "grad_norm": 0.7306294441223145, + "learning_rate": 0.0009582426004474129, + "loss": 1.4936, + "step": 3947 + }, + { + "epoch": 0.41645569620253164, + "grad_norm": 1.4769604206085205, + "learning_rate": 0.0009580014007498634, + "loss": 1.4889, + "step": 3948 + }, + { + "epoch": 0.41656118143459914, + "grad_norm": 0.6982805728912354, + "learning_rate": 0.0009577601777447194, + "loss": 1.4547, + "step": 3949 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.9208523631095886, + "learning_rate": 0.0009575189314590118, + "loss": 1.4801, + "step": 3950 + }, + { + "epoch": 0.4167721518987342, + "grad_norm": 0.6501477360725403, + "learning_rate": 0.0009572776619197731, + "loss": 1.4783, + "step": 3951 + }, + { + "epoch": 0.41687763713080167, + "grad_norm": 0.8439766764640808, + "learning_rate": 0.0009570363691540387, + "loss": 1.466, + "step": 3952 + }, + { + "epoch": 0.4169831223628692, + "grad_norm": 0.7427982091903687, + "learning_rate": 0.0009567950531888469, + "loss": 1.5054, + "step": 3953 + }, + { + "epoch": 0.4170886075949367, + "grad_norm": 0.7019367814064026, + "learning_rate": 0.0009565537140512381, + "loss": 1.4716, + "step": 3954 + }, + { + "epoch": 0.4171940928270042, + "grad_norm": 0.7678133249282837, + "learning_rate": 0.0009563123517682559, + "loss": 1.4598, + "step": 3955 + }, + { + "epoch": 0.41729957805907175, + "grad_norm": 0.6508509516716003, + "learning_rate": 0.0009560709663669456, + "loss": 1.4436, + "step": 3956 + }, + { + "epoch": 0.41740506329113924, + "grad_norm": 0.8355050683021545, + "learning_rate": 0.0009558295578743559, + "loss": 1.4684, + "step": 3957 + }, + { + "epoch": 0.41751054852320674, + "grad_norm": 0.6707166433334351, + "learning_rate": 0.0009555881263175381, + "loss": 1.4928, + "step": 3958 + }, + { + "epoch": 0.4176160337552743, + "grad_norm": 1.0175260305404663, + "learning_rate": 0.0009553466717235456, + "loss": 1.4329, + "step": 3959 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.6813086867332458, + "learning_rate": 0.0009551051941194346, + "loss": 1.4768, + "step": 3960 + }, + { + "epoch": 0.41782700421940927, + "grad_norm": 1.0503877401351929, + "learning_rate": 0.0009548636935322639, + "loss": 1.437, + "step": 3961 + }, + { + "epoch": 0.4179324894514768, + "grad_norm": 1.0875930786132812, + "learning_rate": 0.0009546221699890945, + "loss": 1.4454, + "step": 3962 + }, + { + "epoch": 0.4180379746835443, + "grad_norm": 0.7766265273094177, + "learning_rate": 0.0009543806235169909, + "loss": 1.451, + "step": 3963 + }, + { + "epoch": 0.4181434599156118, + "grad_norm": 1.2726719379425049, + "learning_rate": 0.0009541390541430192, + "loss": 1.4995, + "step": 3964 + }, + { + "epoch": 0.41824894514767935, + "grad_norm": 0.7729300260543823, + "learning_rate": 0.0009538974618942486, + "loss": 1.4881, + "step": 3965 + }, + { + "epoch": 0.41835443037974684, + "grad_norm": 1.1635795831680298, + "learning_rate": 0.0009536558467977505, + "loss": 1.4481, + "step": 3966 + }, + { + "epoch": 0.41845991561181434, + "grad_norm": 1.1024264097213745, + "learning_rate": 0.0009534142088805994, + "loss": 1.4673, + "step": 3967 + }, + { + "epoch": 0.41856540084388183, + "grad_norm": 1.590507984161377, + "learning_rate": 0.0009531725481698719, + "loss": 1.478, + "step": 3968 + }, + { + "epoch": 0.4186708860759494, + "grad_norm": 1.4425760507583618, + "learning_rate": 0.0009529308646926473, + "loss": 1.496, + "step": 3969 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.8819417357444763, + "learning_rate": 0.0009526891584760071, + "loss": 1.4451, + "step": 3970 + }, + { + "epoch": 0.41888185654008436, + "grad_norm": 1.13009774684906, + "learning_rate": 0.0009524474295470362, + "loss": 1.4811, + "step": 3971 + }, + { + "epoch": 0.4189873417721519, + "grad_norm": 0.7942914962768555, + "learning_rate": 0.0009522056779328214, + "loss": 1.4611, + "step": 3972 + }, + { + "epoch": 0.4190928270042194, + "grad_norm": 1.0235496759414673, + "learning_rate": 0.0009519639036604522, + "loss": 1.4994, + "step": 3973 + }, + { + "epoch": 0.4191983122362869, + "grad_norm": 0.9283861517906189, + "learning_rate": 0.0009517221067570204, + "loss": 1.4672, + "step": 3974 + }, + { + "epoch": 0.41930379746835444, + "grad_norm": 0.8952341079711914, + "learning_rate": 0.0009514802872496205, + "loss": 1.4695, + "step": 3975 + }, + { + "epoch": 0.41940928270042194, + "grad_norm": 1.172163724899292, + "learning_rate": 0.0009512384451653499, + "loss": 1.4619, + "step": 3976 + }, + { + "epoch": 0.41951476793248943, + "grad_norm": 0.7830551862716675, + "learning_rate": 0.000950996580531308, + "loss": 1.4694, + "step": 3977 + }, + { + "epoch": 0.419620253164557, + "grad_norm": 1.0034863948822021, + "learning_rate": 0.000950754693374597, + "loss": 1.4222, + "step": 3978 + }, + { + "epoch": 0.41972573839662447, + "grad_norm": 0.7188722491264343, + "learning_rate": 0.0009505127837223215, + "loss": 1.4749, + "step": 3979 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 1.0144569873809814, + "learning_rate": 0.0009502708516015889, + "loss": 1.4986, + "step": 3980 + }, + { + "epoch": 0.4199367088607595, + "grad_norm": 0.7771670818328857, + "learning_rate": 0.0009500288970395085, + "loss": 1.453, + "step": 3981 + }, + { + "epoch": 0.420042194092827, + "grad_norm": 1.1229596138000488, + "learning_rate": 0.000949786920063193, + "loss": 1.4712, + "step": 3982 + }, + { + "epoch": 0.4201476793248945, + "grad_norm": 1.0276552438735962, + "learning_rate": 0.0009495449206997568, + "loss": 1.4527, + "step": 3983 + }, + { + "epoch": 0.42025316455696204, + "grad_norm": 1.2845436334609985, + "learning_rate": 0.0009493028989763171, + "loss": 1.4732, + "step": 3984 + }, + { + "epoch": 0.42035864978902954, + "grad_norm": 1.114293098449707, + "learning_rate": 0.0009490608549199939, + "loss": 1.4567, + "step": 3985 + }, + { + "epoch": 0.42046413502109703, + "grad_norm": 0.9144551753997803, + "learning_rate": 0.0009488187885579092, + "loss": 1.4519, + "step": 3986 + }, + { + "epoch": 0.4205696202531646, + "grad_norm": 0.9293582439422607, + "learning_rate": 0.000948576699917188, + "loss": 1.4452, + "step": 3987 + }, + { + "epoch": 0.42067510548523207, + "grad_norm": 0.8524990081787109, + "learning_rate": 0.0009483345890249571, + "loss": 1.4604, + "step": 3988 + }, + { + "epoch": 0.42078059071729956, + "grad_norm": 1.0114359855651855, + "learning_rate": 0.0009480924559083468, + "loss": 1.4622, + "step": 3989 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.9090972542762756, + "learning_rate": 0.0009478503005944888, + "loss": 1.4811, + "step": 3990 + }, + { + "epoch": 0.4209915611814346, + "grad_norm": 0.8513180017471313, + "learning_rate": 0.0009476081231105183, + "loss": 1.4737, + "step": 3991 + }, + { + "epoch": 0.4210970464135021, + "grad_norm": 0.942046046257019, + "learning_rate": 0.0009473659234835722, + "loss": 1.4739, + "step": 3992 + }, + { + "epoch": 0.42120253164556964, + "grad_norm": 0.6526361703872681, + "learning_rate": 0.00094712370174079, + "loss": 1.4591, + "step": 3993 + }, + { + "epoch": 0.42130801687763714, + "grad_norm": 0.6730970740318298, + "learning_rate": 0.0009468814579093141, + "loss": 1.4688, + "step": 3994 + }, + { + "epoch": 0.42141350210970463, + "grad_norm": 0.6674411296844482, + "learning_rate": 0.0009466391920162894, + "loss": 1.4575, + "step": 3995 + }, + { + "epoch": 0.4215189873417722, + "grad_norm": 0.6338145732879639, + "learning_rate": 0.0009463969040888624, + "loss": 1.474, + "step": 3996 + }, + { + "epoch": 0.42162447257383967, + "grad_norm": 0.7466512322425842, + "learning_rate": 0.0009461545941541832, + "loss": 1.4885, + "step": 3997 + }, + { + "epoch": 0.42172995780590716, + "grad_norm": 0.6866075992584229, + "learning_rate": 0.0009459122622394033, + "loss": 1.4534, + "step": 3998 + }, + { + "epoch": 0.4218354430379747, + "grad_norm": 0.8315669298171997, + "learning_rate": 0.0009456699083716777, + "loss": 1.4895, + "step": 3999 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.6954001188278198, + "learning_rate": 0.0009454275325781632, + "loss": 1.4763, + "step": 4000 + }, + { + "epoch": 0.4220464135021097, + "grad_norm": 0.9013093113899231, + "learning_rate": 0.0009451851348860191, + "loss": 1.4442, + "step": 4001 + }, + { + "epoch": 0.4221518987341772, + "grad_norm": 0.6554880738258362, + "learning_rate": 0.0009449427153224076, + "loss": 1.4556, + "step": 4002 + }, + { + "epoch": 0.42225738396624474, + "grad_norm": 0.8806166648864746, + "learning_rate": 0.0009447002739144924, + "loss": 1.4378, + "step": 4003 + }, + { + "epoch": 0.42236286919831223, + "grad_norm": 0.7432313561439514, + "learning_rate": 0.0009444578106894408, + "loss": 1.4752, + "step": 4004 + }, + { + "epoch": 0.4224683544303797, + "grad_norm": 0.6972000002861023, + "learning_rate": 0.000944215325674422, + "loss": 1.4528, + "step": 4005 + }, + { + "epoch": 0.42257383966244727, + "grad_norm": 0.8357451558113098, + "learning_rate": 0.0009439728188966074, + "loss": 1.4722, + "step": 4006 + }, + { + "epoch": 0.42267932489451476, + "grad_norm": 0.6296131014823914, + "learning_rate": 0.0009437302903831712, + "loss": 1.4615, + "step": 4007 + }, + { + "epoch": 0.42278481012658226, + "grad_norm": 0.7244961857795715, + "learning_rate": 0.0009434877401612898, + "loss": 1.4401, + "step": 4008 + }, + { + "epoch": 0.4228902953586498, + "grad_norm": 0.6873177886009216, + "learning_rate": 0.0009432451682581424, + "loss": 1.4537, + "step": 4009 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.8001173138618469, + "learning_rate": 0.0009430025747009104, + "loss": 1.4869, + "step": 4010 + }, + { + "epoch": 0.4231012658227848, + "grad_norm": 0.6502379179000854, + "learning_rate": 0.0009427599595167776, + "loss": 1.4411, + "step": 4011 + }, + { + "epoch": 0.42320675105485234, + "grad_norm": 1.0047571659088135, + "learning_rate": 0.0009425173227329297, + "loss": 1.4308, + "step": 4012 + }, + { + "epoch": 0.42331223628691983, + "grad_norm": 0.7214546799659729, + "learning_rate": 0.0009422746643765563, + "loss": 1.452, + "step": 4013 + }, + { + "epoch": 0.4234177215189873, + "grad_norm": 0.8159408569335938, + "learning_rate": 0.0009420319844748476, + "loss": 1.4698, + "step": 4014 + }, + { + "epoch": 0.42352320675105487, + "grad_norm": 0.7029444575309753, + "learning_rate": 0.0009417892830549978, + "loss": 1.4894, + "step": 4015 + }, + { + "epoch": 0.42362869198312236, + "grad_norm": 0.7474221587181091, + "learning_rate": 0.0009415465601442023, + "loss": 1.4886, + "step": 4016 + }, + { + "epoch": 0.42373417721518986, + "grad_norm": 0.7046831846237183, + "learning_rate": 0.0009413038157696595, + "loss": 1.4599, + "step": 4017 + }, + { + "epoch": 0.4238396624472574, + "grad_norm": 0.6344519257545471, + "learning_rate": 0.0009410610499585705, + "loss": 1.4771, + "step": 4018 + }, + { + "epoch": 0.4239451476793249, + "grad_norm": 0.6953473687171936, + "learning_rate": 0.000940818262738138, + "loss": 1.4366, + "step": 4019 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.6206953525543213, + "learning_rate": 0.0009405754541355677, + "loss": 1.4963, + "step": 4020 + }, + { + "epoch": 0.42415611814345994, + "grad_norm": 0.7984479665756226, + "learning_rate": 0.0009403326241780674, + "loss": 1.4352, + "step": 4021 + }, + { + "epoch": 0.42426160337552743, + "grad_norm": 0.6621367931365967, + "learning_rate": 0.0009400897728928475, + "loss": 1.4773, + "step": 4022 + }, + { + "epoch": 0.4243670886075949, + "grad_norm": 0.7358695864677429, + "learning_rate": 0.0009398469003071207, + "loss": 1.4379, + "step": 4023 + }, + { + "epoch": 0.42447257383966247, + "grad_norm": 0.77762371301651, + "learning_rate": 0.0009396040064481021, + "loss": 1.4529, + "step": 4024 + }, + { + "epoch": 0.42457805907172996, + "grad_norm": 0.6910567879676819, + "learning_rate": 0.000939361091343009, + "loss": 1.4823, + "step": 4025 + }, + { + "epoch": 0.42468354430379746, + "grad_norm": 0.7129560708999634, + "learning_rate": 0.0009391181550190615, + "loss": 1.4749, + "step": 4026 + }, + { + "epoch": 0.424789029535865, + "grad_norm": 0.7347972989082336, + "learning_rate": 0.0009388751975034815, + "loss": 1.4337, + "step": 4027 + }, + { + "epoch": 0.4248945147679325, + "grad_norm": 0.7336187958717346, + "learning_rate": 0.0009386322188234941, + "loss": 1.4679, + "step": 4028 + }, + { + "epoch": 0.425, + "grad_norm": 0.7482222318649292, + "learning_rate": 0.0009383892190063256, + "loss": 1.4423, + "step": 4029 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.9067668914794922, + "learning_rate": 0.0009381461980792061, + "loss": 1.4587, + "step": 4030 + }, + { + "epoch": 0.42521097046413503, + "grad_norm": 0.7618240118026733, + "learning_rate": 0.0009379031560693665, + "loss": 1.4369, + "step": 4031 + }, + { + "epoch": 0.4253164556962025, + "grad_norm": 1.2805787324905396, + "learning_rate": 0.0009376600930040417, + "loss": 1.4918, + "step": 4032 + }, + { + "epoch": 0.42542194092827, + "grad_norm": 0.7664522528648376, + "learning_rate": 0.0009374170089104676, + "loss": 1.4802, + "step": 4033 + }, + { + "epoch": 0.42552742616033756, + "grad_norm": 1.224408745765686, + "learning_rate": 0.000937173903815883, + "loss": 1.4638, + "step": 4034 + }, + { + "epoch": 0.42563291139240506, + "grad_norm": 0.7894347906112671, + "learning_rate": 0.0009369307777475293, + "loss": 1.4725, + "step": 4035 + }, + { + "epoch": 0.42573839662447255, + "grad_norm": 0.9640341997146606, + "learning_rate": 0.0009366876307326496, + "loss": 1.4587, + "step": 4036 + }, + { + "epoch": 0.4258438818565401, + "grad_norm": 0.77154541015625, + "learning_rate": 0.0009364444627984902, + "loss": 1.488, + "step": 4037 + }, + { + "epoch": 0.4259493670886076, + "grad_norm": 0.8987277150154114, + "learning_rate": 0.000936201273972299, + "loss": 1.4689, + "step": 4038 + }, + { + "epoch": 0.4260548523206751, + "grad_norm": 0.8137081861495972, + "learning_rate": 0.0009359580642813265, + "loss": 1.4417, + "step": 4039 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.8378822803497314, + "learning_rate": 0.0009357148337528256, + "loss": 1.4531, + "step": 4040 + }, + { + "epoch": 0.4262658227848101, + "grad_norm": 0.870857834815979, + "learning_rate": 0.0009354715824140515, + "loss": 1.4621, + "step": 4041 + }, + { + "epoch": 0.4263713080168776, + "grad_norm": 0.7752943634986877, + "learning_rate": 0.0009352283102922619, + "loss": 1.4817, + "step": 4042 + }, + { + "epoch": 0.42647679324894516, + "grad_norm": 0.804477870464325, + "learning_rate": 0.0009349850174147165, + "loss": 1.4405, + "step": 4043 + }, + { + "epoch": 0.42658227848101266, + "grad_norm": 0.6881405115127563, + "learning_rate": 0.0009347417038086772, + "loss": 1.4627, + "step": 4044 + }, + { + "epoch": 0.42668776371308015, + "grad_norm": 0.9269249439239502, + "learning_rate": 0.000934498369501409, + "loss": 1.5149, + "step": 4045 + }, + { + "epoch": 0.4267932489451477, + "grad_norm": 0.7646681070327759, + "learning_rate": 0.0009342550145201786, + "loss": 1.4501, + "step": 4046 + }, + { + "epoch": 0.4268987341772152, + "grad_norm": 0.7286233901977539, + "learning_rate": 0.0009340116388922551, + "loss": 1.4651, + "step": 4047 + }, + { + "epoch": 0.4270042194092827, + "grad_norm": 0.6980843544006348, + "learning_rate": 0.0009337682426449097, + "loss": 1.4773, + "step": 4048 + }, + { + "epoch": 0.42710970464135023, + "grad_norm": 0.9468338489532471, + "learning_rate": 0.0009335248258054162, + "loss": 1.4424, + "step": 4049 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.6528197526931763, + "learning_rate": 0.0009332813884010511, + "loss": 1.4703, + "step": 4050 + }, + { + "epoch": 0.4273206751054852, + "grad_norm": 1.0093284845352173, + "learning_rate": 0.0009330379304590924, + "loss": 1.4457, + "step": 4051 + }, + { + "epoch": 0.42742616033755276, + "grad_norm": 0.9030142426490784, + "learning_rate": 0.000932794452006821, + "loss": 1.4977, + "step": 4052 + }, + { + "epoch": 0.42753164556962026, + "grad_norm": 0.6249412298202515, + "learning_rate": 0.0009325509530715196, + "loss": 1.4739, + "step": 4053 + }, + { + "epoch": 0.42763713080168775, + "grad_norm": 0.7019919753074646, + "learning_rate": 0.0009323074336804738, + "loss": 1.4691, + "step": 4054 + }, + { + "epoch": 0.4277426160337553, + "grad_norm": 0.679599404335022, + "learning_rate": 0.0009320638938609708, + "loss": 1.4844, + "step": 4055 + }, + { + "epoch": 0.4278481012658228, + "grad_norm": 0.7089268565177917, + "learning_rate": 0.0009318203336403008, + "loss": 1.4515, + "step": 4056 + }, + { + "epoch": 0.4279535864978903, + "grad_norm": 0.8625531792640686, + "learning_rate": 0.0009315767530457556, + "loss": 1.4561, + "step": 4057 + }, + { + "epoch": 0.42805907172995783, + "grad_norm": 0.6931233406066895, + "learning_rate": 0.0009313331521046299, + "loss": 1.4733, + "step": 4058 + }, + { + "epoch": 0.4281645569620253, + "grad_norm": 0.8373854160308838, + "learning_rate": 0.0009310895308442202, + "loss": 1.5001, + "step": 4059 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.9766184687614441, + "learning_rate": 0.0009308458892918259, + "loss": 1.4536, + "step": 4060 + }, + { + "epoch": 0.42837552742616036, + "grad_norm": 0.6942794322967529, + "learning_rate": 0.0009306022274747478, + "loss": 1.4835, + "step": 4061 + }, + { + "epoch": 0.42848101265822786, + "grad_norm": 0.8147565722465515, + "learning_rate": 0.0009303585454202892, + "loss": 1.4703, + "step": 4062 + }, + { + "epoch": 0.42858649789029535, + "grad_norm": 0.8052829504013062, + "learning_rate": 0.0009301148431557565, + "loss": 1.4178, + "step": 4063 + }, + { + "epoch": 0.4286919831223629, + "grad_norm": 1.1469813585281372, + "learning_rate": 0.0009298711207084575, + "loss": 1.4452, + "step": 4064 + }, + { + "epoch": 0.4287974683544304, + "grad_norm": 0.6810134649276733, + "learning_rate": 0.0009296273781057026, + "loss": 1.4967, + "step": 4065 + }, + { + "epoch": 0.4289029535864979, + "grad_norm": 1.1050074100494385, + "learning_rate": 0.0009293836153748039, + "loss": 1.4587, + "step": 4066 + }, + { + "epoch": 0.4290084388185654, + "grad_norm": 0.6691009998321533, + "learning_rate": 0.0009291398325430771, + "loss": 1.447, + "step": 4067 + }, + { + "epoch": 0.4291139240506329, + "grad_norm": 1.203199028968811, + "learning_rate": 0.0009288960296378386, + "loss": 1.4484, + "step": 4068 + }, + { + "epoch": 0.4292194092827004, + "grad_norm": 0.6721424460411072, + "learning_rate": 0.0009286522066864078, + "loss": 1.5097, + "step": 4069 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.818268895149231, + "learning_rate": 0.0009284083637161064, + "loss": 1.4738, + "step": 4070 + }, + { + "epoch": 0.42943037974683546, + "grad_norm": 0.6563342213630676, + "learning_rate": 0.0009281645007542584, + "loss": 1.4928, + "step": 4071 + }, + { + "epoch": 0.42953586497890295, + "grad_norm": 0.7009510397911072, + "learning_rate": 0.0009279206178281895, + "loss": 1.4515, + "step": 4072 + }, + { + "epoch": 0.42964135021097044, + "grad_norm": 0.7013559341430664, + "learning_rate": 0.0009276767149652284, + "loss": 1.4505, + "step": 4073 + }, + { + "epoch": 0.429746835443038, + "grad_norm": 0.6873618364334106, + "learning_rate": 0.0009274327921927054, + "loss": 1.4515, + "step": 4074 + }, + { + "epoch": 0.4298523206751055, + "grad_norm": 0.7237976789474487, + "learning_rate": 0.0009271888495379529, + "loss": 1.5076, + "step": 4075 + }, + { + "epoch": 0.429957805907173, + "grad_norm": 0.7576857805252075, + "learning_rate": 0.0009269448870283067, + "loss": 1.4614, + "step": 4076 + }, + { + "epoch": 0.4300632911392405, + "grad_norm": 0.7287431955337524, + "learning_rate": 0.0009267009046911032, + "loss": 1.4806, + "step": 4077 + }, + { + "epoch": 0.430168776371308, + "grad_norm": 0.9463727474212646, + "learning_rate": 0.0009264569025536825, + "loss": 1.4665, + "step": 4078 + }, + { + "epoch": 0.4302742616033755, + "grad_norm": 0.9350462555885315, + "learning_rate": 0.0009262128806433858, + "loss": 1.4738, + "step": 4079 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.0167808532714844, + "learning_rate": 0.0009259688389875574, + "loss": 1.4495, + "step": 4080 + }, + { + "epoch": 0.43048523206751055, + "grad_norm": 1.3325135707855225, + "learning_rate": 0.000925724777613543, + "loss": 1.4773, + "step": 4081 + }, + { + "epoch": 0.43059071729957804, + "grad_norm": 1.0059990882873535, + "learning_rate": 0.0009254806965486909, + "loss": 1.499, + "step": 4082 + }, + { + "epoch": 0.4306962025316456, + "grad_norm": 0.9836728572845459, + "learning_rate": 0.0009252365958203518, + "loss": 1.491, + "step": 4083 + }, + { + "epoch": 0.4308016877637131, + "grad_norm": 1.3063454627990723, + "learning_rate": 0.0009249924754558785, + "loss": 1.4382, + "step": 4084 + }, + { + "epoch": 0.4309071729957806, + "grad_norm": 0.8996194005012512, + "learning_rate": 0.0009247483354826255, + "loss": 1.4558, + "step": 4085 + }, + { + "epoch": 0.4310126582278481, + "grad_norm": 1.0764617919921875, + "learning_rate": 0.0009245041759279502, + "loss": 1.4818, + "step": 4086 + }, + { + "epoch": 0.4311181434599156, + "grad_norm": 0.9607557654380798, + "learning_rate": 0.0009242599968192119, + "loss": 1.4888, + "step": 4087 + }, + { + "epoch": 0.4312236286919831, + "grad_norm": 0.7288025617599487, + "learning_rate": 0.000924015798183772, + "loss": 1.4533, + "step": 4088 + }, + { + "epoch": 0.43132911392405066, + "grad_norm": 0.7735080718994141, + "learning_rate": 0.0009237715800489942, + "loss": 1.4355, + "step": 4089 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.7202901840209961, + "learning_rate": 0.0009235273424422442, + "loss": 1.4382, + "step": 4090 + }, + { + "epoch": 0.43154008438818564, + "grad_norm": 0.7916978597640991, + "learning_rate": 0.0009232830853908904, + "loss": 1.5, + "step": 4091 + }, + { + "epoch": 0.4316455696202532, + "grad_norm": 0.7161177396774292, + "learning_rate": 0.0009230388089223028, + "loss": 1.4534, + "step": 4092 + }, + { + "epoch": 0.4317510548523207, + "grad_norm": 0.728975236415863, + "learning_rate": 0.0009227945130638537, + "loss": 1.466, + "step": 4093 + }, + { + "epoch": 0.4318565400843882, + "grad_norm": 0.7122763395309448, + "learning_rate": 0.0009225501978429177, + "loss": 1.4708, + "step": 4094 + }, + { + "epoch": 0.4319620253164557, + "grad_norm": 0.7959088087081909, + "learning_rate": 0.0009223058632868719, + "loss": 1.4551, + "step": 4095 + }, + { + "epoch": 0.4320675105485232, + "grad_norm": 0.6814939975738525, + "learning_rate": 0.0009220615094230946, + "loss": 1.4646, + "step": 4096 + }, + { + "epoch": 0.4321729957805907, + "grad_norm": 0.8311017751693726, + "learning_rate": 0.0009218171362789674, + "loss": 1.4549, + "step": 4097 + }, + { + "epoch": 0.43227848101265826, + "grad_norm": 0.6342844367027283, + "learning_rate": 0.0009215727438818733, + "loss": 1.467, + "step": 4098 + }, + { + "epoch": 0.43238396624472575, + "grad_norm": 0.7303431034088135, + "learning_rate": 0.0009213283322591977, + "loss": 1.4157, + "step": 4099 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.6358318328857422, + "learning_rate": 0.0009210839014383282, + "loss": 1.4481, + "step": 4100 + }, + { + "epoch": 0.43259493670886073, + "grad_norm": 0.6790050864219666, + "learning_rate": 0.0009208394514466544, + "loss": 1.4453, + "step": 4101 + }, + { + "epoch": 0.4327004219409283, + "grad_norm": 0.7178156971931458, + "learning_rate": 0.0009205949823115681, + "loss": 1.4342, + "step": 4102 + }, + { + "epoch": 0.4328059071729958, + "grad_norm": 0.7828618288040161, + "learning_rate": 0.0009203504940604634, + "loss": 1.392, + "step": 4103 + }, + { + "epoch": 0.43291139240506327, + "grad_norm": 0.7220349311828613, + "learning_rate": 0.0009201059867207366, + "loss": 1.449, + "step": 4104 + }, + { + "epoch": 0.4330168776371308, + "grad_norm": 0.7232245802879333, + "learning_rate": 0.0009198614603197854, + "loss": 1.4412, + "step": 4105 + }, + { + "epoch": 0.4331223628691983, + "grad_norm": 0.6696179509162903, + "learning_rate": 0.0009196169148850108, + "loss": 1.477, + "step": 4106 + }, + { + "epoch": 0.4332278481012658, + "grad_norm": 0.8325371146202087, + "learning_rate": 0.000919372350443815, + "loss": 1.4768, + "step": 4107 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.8660522103309631, + "learning_rate": 0.000919127767023603, + "loss": 1.4472, + "step": 4108 + }, + { + "epoch": 0.43343881856540084, + "grad_norm": 0.8020641803741455, + "learning_rate": 0.000918883164651781, + "loss": 1.4848, + "step": 4109 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.956502377986908, + "learning_rate": 0.0009186385433557584, + "loss": 1.4817, + "step": 4110 + }, + { + "epoch": 0.4336497890295359, + "grad_norm": 0.7953915596008301, + "learning_rate": 0.0009183939031629462, + "loss": 1.4446, + "step": 4111 + }, + { + "epoch": 0.4337552742616034, + "grad_norm": 1.2211757898330688, + "learning_rate": 0.0009181492441007577, + "loss": 1.4324, + "step": 4112 + }, + { + "epoch": 0.43386075949367087, + "grad_norm": 0.757123589515686, + "learning_rate": 0.0009179045661966075, + "loss": 1.4763, + "step": 4113 + }, + { + "epoch": 0.4339662447257384, + "grad_norm": 1.089949131011963, + "learning_rate": 0.0009176598694779134, + "loss": 1.4613, + "step": 4114 + }, + { + "epoch": 0.4340717299578059, + "grad_norm": 0.7285904288291931, + "learning_rate": 0.0009174151539720953, + "loss": 1.4603, + "step": 4115 + }, + { + "epoch": 0.4341772151898734, + "grad_norm": 1.3258572816848755, + "learning_rate": 0.0009171704197065741, + "loss": 1.4564, + "step": 4116 + }, + { + "epoch": 0.43428270042194095, + "grad_norm": 0.6538669466972351, + "learning_rate": 0.0009169256667087738, + "loss": 1.4592, + "step": 4117 + }, + { + "epoch": 0.43438818565400844, + "grad_norm": 0.9790480732917786, + "learning_rate": 0.0009166808950061202, + "loss": 1.4604, + "step": 4118 + }, + { + "epoch": 0.43449367088607593, + "grad_norm": 0.6798054575920105, + "learning_rate": 0.0009164361046260412, + "loss": 1.4809, + "step": 4119 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.7979106903076172, + "learning_rate": 0.0009161912955959668, + "loss": 1.4812, + "step": 4120 + }, + { + "epoch": 0.434704641350211, + "grad_norm": 0.7945855855941772, + "learning_rate": 0.0009159464679433289, + "loss": 1.4608, + "step": 4121 + }, + { + "epoch": 0.43481012658227847, + "grad_norm": 0.8013361096382141, + "learning_rate": 0.0009157016216955618, + "loss": 1.4416, + "step": 4122 + }, + { + "epoch": 0.434915611814346, + "grad_norm": 0.9310688972473145, + "learning_rate": 0.0009154567568801019, + "loss": 1.4251, + "step": 4123 + }, + { + "epoch": 0.4350210970464135, + "grad_norm": 0.7021268606185913, + "learning_rate": 0.0009152118735243871, + "loss": 1.4516, + "step": 4124 + }, + { + "epoch": 0.435126582278481, + "grad_norm": 0.9339877367019653, + "learning_rate": 0.0009149669716558582, + "loss": 1.4313, + "step": 4125 + }, + { + "epoch": 0.43523206751054855, + "grad_norm": 0.7110081911087036, + "learning_rate": 0.0009147220513019577, + "loss": 1.4487, + "step": 4126 + }, + { + "epoch": 0.43533755274261604, + "grad_norm": 0.7097159028053284, + "learning_rate": 0.0009144771124901295, + "loss": 1.4409, + "step": 4127 + }, + { + "epoch": 0.43544303797468353, + "grad_norm": 0.6114322543144226, + "learning_rate": 0.000914232155247821, + "loss": 1.4462, + "step": 4128 + }, + { + "epoch": 0.4355485232067511, + "grad_norm": 0.679984986782074, + "learning_rate": 0.0009139871796024807, + "loss": 1.4641, + "step": 4129 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.6724117398262024, + "learning_rate": 0.000913742185581559, + "loss": 1.4644, + "step": 4130 + }, + { + "epoch": 0.43575949367088607, + "grad_norm": 0.7634376287460327, + "learning_rate": 0.0009134971732125088, + "loss": 1.4688, + "step": 4131 + }, + { + "epoch": 0.43586497890295356, + "grad_norm": 0.6402517557144165, + "learning_rate": 0.0009132521425227852, + "loss": 1.442, + "step": 4132 + }, + { + "epoch": 0.4359704641350211, + "grad_norm": 0.8750699162483215, + "learning_rate": 0.0009130070935398451, + "loss": 1.4777, + "step": 4133 + }, + { + "epoch": 0.4360759493670886, + "grad_norm": 0.6603758335113525, + "learning_rate": 0.0009127620262911473, + "loss": 1.4683, + "step": 4134 + }, + { + "epoch": 0.4361814345991561, + "grad_norm": 0.7835839986801147, + "learning_rate": 0.0009125169408041526, + "loss": 1.4238, + "step": 4135 + }, + { + "epoch": 0.43628691983122364, + "grad_norm": 0.8440776467323303, + "learning_rate": 0.0009122718371063247, + "loss": 1.4444, + "step": 4136 + }, + { + "epoch": 0.43639240506329113, + "grad_norm": 0.6180686354637146, + "learning_rate": 0.0009120267152251281, + "loss": 1.441, + "step": 4137 + }, + { + "epoch": 0.4364978902953586, + "grad_norm": 0.7532533407211304, + "learning_rate": 0.0009117815751880301, + "loss": 1.4636, + "step": 4138 + }, + { + "epoch": 0.4366033755274262, + "grad_norm": 0.6378854513168335, + "learning_rate": 0.0009115364170225, + "loss": 1.4669, + "step": 4139 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.6796383857727051, + "learning_rate": 0.0009112912407560086, + "loss": 1.4507, + "step": 4140 + }, + { + "epoch": 0.43681434599156116, + "grad_norm": 0.7180373072624207, + "learning_rate": 0.0009110460464160295, + "loss": 1.4369, + "step": 4141 + }, + { + "epoch": 0.4369198312236287, + "grad_norm": 0.8089341521263123, + "learning_rate": 0.000910800834030038, + "loss": 1.4479, + "step": 4142 + }, + { + "epoch": 0.4370253164556962, + "grad_norm": 0.6837633848190308, + "learning_rate": 0.0009105556036255113, + "loss": 1.4627, + "step": 4143 + }, + { + "epoch": 0.4371308016877637, + "grad_norm": 0.8559759855270386, + "learning_rate": 0.0009103103552299283, + "loss": 1.481, + "step": 4144 + }, + { + "epoch": 0.43723628691983124, + "grad_norm": 0.6786631345748901, + "learning_rate": 0.0009100650888707709, + "loss": 1.4451, + "step": 4145 + }, + { + "epoch": 0.43734177215189873, + "grad_norm": 0.8642974495887756, + "learning_rate": 0.000909819804575522, + "loss": 1.4423, + "step": 4146 + }, + { + "epoch": 0.4374472573839662, + "grad_norm": 0.6346184015274048, + "learning_rate": 0.0009095745023716671, + "loss": 1.465, + "step": 4147 + }, + { + "epoch": 0.4375527426160338, + "grad_norm": 1.1108994483947754, + "learning_rate": 0.0009093291822866933, + "loss": 1.4973, + "step": 4148 + }, + { + "epoch": 0.43765822784810127, + "grad_norm": 0.6644514203071594, + "learning_rate": 0.0009090838443480903, + "loss": 1.4434, + "step": 4149 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.782443642616272, + "learning_rate": 0.0009088384885833495, + "loss": 1.4572, + "step": 4150 + }, + { + "epoch": 0.4378691983122363, + "grad_norm": 0.6964821815490723, + "learning_rate": 0.0009085931150199638, + "loss": 1.4603, + "step": 4151 + }, + { + "epoch": 0.4379746835443038, + "grad_norm": 0.6846626400947571, + "learning_rate": 0.0009083477236854287, + "loss": 1.4805, + "step": 4152 + }, + { + "epoch": 0.4380801687763713, + "grad_norm": 0.6150874495506287, + "learning_rate": 0.0009081023146072414, + "loss": 1.4621, + "step": 4153 + }, + { + "epoch": 0.43818565400843884, + "grad_norm": 0.7723942399024963, + "learning_rate": 0.0009078568878129018, + "loss": 1.4219, + "step": 4154 + }, + { + "epoch": 0.43829113924050633, + "grad_norm": 0.8001584410667419, + "learning_rate": 0.0009076114433299107, + "loss": 1.4514, + "step": 4155 + }, + { + "epoch": 0.4383966244725738, + "grad_norm": 0.6655614972114563, + "learning_rate": 0.0009073659811857712, + "loss": 1.4722, + "step": 4156 + }, + { + "epoch": 0.4385021097046414, + "grad_norm": 0.9289842247962952, + "learning_rate": 0.0009071205014079888, + "loss": 1.4729, + "step": 4157 + }, + { + "epoch": 0.43860759493670887, + "grad_norm": 0.7831913828849792, + "learning_rate": 0.0009068750040240709, + "loss": 1.4306, + "step": 4158 + }, + { + "epoch": 0.43871308016877636, + "grad_norm": 0.6693117022514343, + "learning_rate": 0.0009066294890615266, + "loss": 1.4502, + "step": 4159 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.679416835308075, + "learning_rate": 0.000906383956547867, + "loss": 1.4416, + "step": 4160 + }, + { + "epoch": 0.4389240506329114, + "grad_norm": 0.9169327020645142, + "learning_rate": 0.0009061384065106051, + "loss": 1.4475, + "step": 4161 + }, + { + "epoch": 0.4390295358649789, + "grad_norm": 1.0724948644638062, + "learning_rate": 0.0009058928389772564, + "loss": 1.4597, + "step": 4162 + }, + { + "epoch": 0.43913502109704644, + "grad_norm": 0.6373178362846375, + "learning_rate": 0.0009056472539753377, + "loss": 1.4575, + "step": 4163 + }, + { + "epoch": 0.43924050632911393, + "grad_norm": 0.7167089581489563, + "learning_rate": 0.0009054016515323679, + "loss": 1.4132, + "step": 4164 + }, + { + "epoch": 0.4393459915611814, + "grad_norm": 0.6332940459251404, + "learning_rate": 0.0009051560316758684, + "loss": 1.4569, + "step": 4165 + }, + { + "epoch": 0.4394514767932489, + "grad_norm": 0.646899402141571, + "learning_rate": 0.0009049103944333616, + "loss": 1.4471, + "step": 4166 + }, + { + "epoch": 0.43955696202531647, + "grad_norm": 0.648969292640686, + "learning_rate": 0.0009046647398323728, + "loss": 1.4422, + "step": 4167 + }, + { + "epoch": 0.43966244725738396, + "grad_norm": 0.665513813495636, + "learning_rate": 0.0009044190679004286, + "loss": 1.4558, + "step": 4168 + }, + { + "epoch": 0.43976793248945145, + "grad_norm": 0.6446421146392822, + "learning_rate": 0.0009041733786650578, + "loss": 1.4512, + "step": 4169 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.6702010035514832, + "learning_rate": 0.0009039276721537915, + "loss": 1.481, + "step": 4170 + }, + { + "epoch": 0.4399789029535865, + "grad_norm": 0.6906449198722839, + "learning_rate": 0.0009036819483941614, + "loss": 1.4502, + "step": 4171 + }, + { + "epoch": 0.440084388185654, + "grad_norm": 0.6945631504058838, + "learning_rate": 0.0009034362074137032, + "loss": 1.4584, + "step": 4172 + }, + { + "epoch": 0.44018987341772153, + "grad_norm": 0.7389658689498901, + "learning_rate": 0.0009031904492399526, + "loss": 1.486, + "step": 4173 + }, + { + "epoch": 0.440295358649789, + "grad_norm": 0.7787357568740845, + "learning_rate": 0.0009029446739004483, + "loss": 1.4658, + "step": 4174 + }, + { + "epoch": 0.4404008438818565, + "grad_norm": 0.6449801325798035, + "learning_rate": 0.0009026988814227308, + "loss": 1.4318, + "step": 4175 + }, + { + "epoch": 0.44050632911392407, + "grad_norm": 0.6302877068519592, + "learning_rate": 0.0009024530718343418, + "loss": 1.428, + "step": 4176 + }, + { + "epoch": 0.44061181434599156, + "grad_norm": 0.715155303478241, + "learning_rate": 0.0009022072451628263, + "loss": 1.461, + "step": 4177 + }, + { + "epoch": 0.44071729957805905, + "grad_norm": 0.629571259021759, + "learning_rate": 0.0009019614014357298, + "loss": 1.4253, + "step": 4178 + }, + { + "epoch": 0.4408227848101266, + "grad_norm": 0.6654064655303955, + "learning_rate": 0.0009017155406806006, + "loss": 1.459, + "step": 4179 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.621148943901062, + "learning_rate": 0.0009014696629249886, + "loss": 1.429, + "step": 4180 + }, + { + "epoch": 0.4410337552742616, + "grad_norm": 0.6974664330482483, + "learning_rate": 0.0009012237681964454, + "loss": 1.4608, + "step": 4181 + }, + { + "epoch": 0.44113924050632913, + "grad_norm": 0.6353408098220825, + "learning_rate": 0.0009009778565225251, + "loss": 1.4587, + "step": 4182 + }, + { + "epoch": 0.4412447257383966, + "grad_norm": 0.7109920978546143, + "learning_rate": 0.000900731927930783, + "loss": 1.4411, + "step": 4183 + }, + { + "epoch": 0.4413502109704641, + "grad_norm": 0.6755382418632507, + "learning_rate": 0.0009004859824487769, + "loss": 1.4443, + "step": 4184 + }, + { + "epoch": 0.44145569620253167, + "grad_norm": 0.6315171718597412, + "learning_rate": 0.0009002400201040659, + "loss": 1.4471, + "step": 4185 + }, + { + "epoch": 0.44156118143459916, + "grad_norm": 0.6198553442955017, + "learning_rate": 0.0008999940409242115, + "loss": 1.4389, + "step": 4186 + }, + { + "epoch": 0.44166666666666665, + "grad_norm": 0.7130765318870544, + "learning_rate": 0.0008997480449367771, + "loss": 1.4796, + "step": 4187 + }, + { + "epoch": 0.4417721518987342, + "grad_norm": 0.6949689388275146, + "learning_rate": 0.0008995020321693274, + "loss": 1.4709, + "step": 4188 + }, + { + "epoch": 0.4418776371308017, + "grad_norm": 0.6632195711135864, + "learning_rate": 0.0008992560026494294, + "loss": 1.4558, + "step": 4189 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.8176189064979553, + "learning_rate": 0.0008990099564046522, + "loss": 1.4677, + "step": 4190 + }, + { + "epoch": 0.44208860759493673, + "grad_norm": 0.6197324991226196, + "learning_rate": 0.0008987638934625662, + "loss": 1.4448, + "step": 4191 + }, + { + "epoch": 0.4421940928270042, + "grad_norm": 1.0071884393692017, + "learning_rate": 0.0008985178138507441, + "loss": 1.4731, + "step": 4192 + }, + { + "epoch": 0.4422995780590717, + "grad_norm": 0.895950973033905, + "learning_rate": 0.0008982717175967606, + "loss": 1.4407, + "step": 4193 + }, + { + "epoch": 0.44240506329113927, + "grad_norm": 0.9238412380218506, + "learning_rate": 0.0008980256047281919, + "loss": 1.492, + "step": 4194 + }, + { + "epoch": 0.44251054852320676, + "grad_norm": 0.9375045299530029, + "learning_rate": 0.0008977794752726159, + "loss": 1.4542, + "step": 4195 + }, + { + "epoch": 0.44261603375527425, + "grad_norm": 0.9729557037353516, + "learning_rate": 0.0008975333292576125, + "loss": 1.4335, + "step": 4196 + }, + { + "epoch": 0.44272151898734174, + "grad_norm": 1.0880274772644043, + "learning_rate": 0.0008972871667107643, + "loss": 1.4543, + "step": 4197 + }, + { + "epoch": 0.4428270042194093, + "grad_norm": 0.8359451293945312, + "learning_rate": 0.0008970409876596545, + "loss": 1.4608, + "step": 4198 + }, + { + "epoch": 0.4429324894514768, + "grad_norm": 0.9407356977462769, + "learning_rate": 0.0008967947921318689, + "loss": 1.4295, + "step": 4199 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.7690221071243286, + "learning_rate": 0.0008965485801549946, + "loss": 1.4954, + "step": 4200 + }, + { + "epoch": 0.4431434599156118, + "grad_norm": 0.760863184928894, + "learning_rate": 0.0008963023517566213, + "loss": 1.4528, + "step": 4201 + }, + { + "epoch": 0.4432489451476793, + "grad_norm": 0.7197104096412659, + "learning_rate": 0.0008960561069643402, + "loss": 1.4562, + "step": 4202 + }, + { + "epoch": 0.4433544303797468, + "grad_norm": 0.6182852983474731, + "learning_rate": 0.0008958098458057436, + "loss": 1.4244, + "step": 4203 + }, + { + "epoch": 0.44345991561181436, + "grad_norm": 0.6805903315544128, + "learning_rate": 0.000895563568308427, + "loss": 1.4621, + "step": 4204 + }, + { + "epoch": 0.44356540084388185, + "grad_norm": 0.6930450201034546, + "learning_rate": 0.0008953172744999865, + "loss": 1.4181, + "step": 4205 + }, + { + "epoch": 0.44367088607594934, + "grad_norm": 0.6590464115142822, + "learning_rate": 0.000895070964408021, + "loss": 1.4852, + "step": 4206 + }, + { + "epoch": 0.4437763713080169, + "grad_norm": 0.6416075229644775, + "learning_rate": 0.0008948246380601303, + "loss": 1.4223, + "step": 4207 + }, + { + "epoch": 0.4438818565400844, + "grad_norm": 0.6798422932624817, + "learning_rate": 0.000894578295483917, + "loss": 1.4423, + "step": 4208 + }, + { + "epoch": 0.4439873417721519, + "grad_norm": 0.5985584855079651, + "learning_rate": 0.0008943319367069844, + "loss": 1.4467, + "step": 4209 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.6408816576004028, + "learning_rate": 0.000894085561756939, + "loss": 1.4518, + "step": 4210 + }, + { + "epoch": 0.4441983122362869, + "grad_norm": 0.7095431685447693, + "learning_rate": 0.0008938391706613878, + "loss": 1.4937, + "step": 4211 + }, + { + "epoch": 0.4443037974683544, + "grad_norm": 0.6470734477043152, + "learning_rate": 0.0008935927634479403, + "loss": 1.4375, + "step": 4212 + }, + { + "epoch": 0.44440928270042196, + "grad_norm": 0.7106198072433472, + "learning_rate": 0.0008933463401442073, + "loss": 1.4699, + "step": 4213 + }, + { + "epoch": 0.44451476793248945, + "grad_norm": 0.7801128029823303, + "learning_rate": 0.0008930999007778025, + "loss": 1.4415, + "step": 4214 + }, + { + "epoch": 0.44462025316455694, + "grad_norm": 0.6597306132316589, + "learning_rate": 0.0008928534453763402, + "loss": 1.4433, + "step": 4215 + }, + { + "epoch": 0.4447257383966245, + "grad_norm": 0.7062357664108276, + "learning_rate": 0.0008926069739674369, + "loss": 1.4618, + "step": 4216 + }, + { + "epoch": 0.444831223628692, + "grad_norm": 0.71197509765625, + "learning_rate": 0.000892360486578711, + "loss": 1.4459, + "step": 4217 + }, + { + "epoch": 0.4449367088607595, + "grad_norm": 0.6824907660484314, + "learning_rate": 0.0008921139832377829, + "loss": 1.4172, + "step": 4218 + }, + { + "epoch": 0.445042194092827, + "grad_norm": 0.6785166263580322, + "learning_rate": 0.0008918674639722742, + "loss": 1.472, + "step": 4219 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.6529973745346069, + "learning_rate": 0.0008916209288098088, + "loss": 1.4673, + "step": 4220 + }, + { + "epoch": 0.445253164556962, + "grad_norm": 0.7350890040397644, + "learning_rate": 0.0008913743777780122, + "loss": 1.4545, + "step": 4221 + }, + { + "epoch": 0.44535864978902956, + "grad_norm": 0.6625419855117798, + "learning_rate": 0.0008911278109045114, + "loss": 1.4598, + "step": 4222 + }, + { + "epoch": 0.44546413502109705, + "grad_norm": 0.7571985125541687, + "learning_rate": 0.0008908812282169359, + "loss": 1.465, + "step": 4223 + }, + { + "epoch": 0.44556962025316454, + "grad_norm": 0.6714229583740234, + "learning_rate": 0.0008906346297429161, + "loss": 1.437, + "step": 4224 + }, + { + "epoch": 0.4456751054852321, + "grad_norm": 0.7826354503631592, + "learning_rate": 0.000890388015510085, + "loss": 1.4523, + "step": 4225 + }, + { + "epoch": 0.4457805907172996, + "grad_norm": 0.7367755770683289, + "learning_rate": 0.0008901413855460764, + "loss": 1.4483, + "step": 4226 + }, + { + "epoch": 0.4458860759493671, + "grad_norm": 0.7319809198379517, + "learning_rate": 0.0008898947398785271, + "loss": 1.4371, + "step": 4227 + }, + { + "epoch": 0.4459915611814346, + "grad_norm": 0.6941118836402893, + "learning_rate": 0.0008896480785350743, + "loss": 1.4693, + "step": 4228 + }, + { + "epoch": 0.4460970464135021, + "grad_norm": 0.8345164656639099, + "learning_rate": 0.0008894014015433582, + "loss": 1.4389, + "step": 4229 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.7928284406661987, + "learning_rate": 0.0008891547089310198, + "loss": 1.4451, + "step": 4230 + }, + { + "epoch": 0.4463080168776371, + "grad_norm": 0.7868247628211975, + "learning_rate": 0.0008889080007257024, + "loss": 1.5005, + "step": 4231 + }, + { + "epoch": 0.44641350210970465, + "grad_norm": 0.9507951140403748, + "learning_rate": 0.0008886612769550508, + "loss": 1.4546, + "step": 4232 + }, + { + "epoch": 0.44651898734177214, + "grad_norm": 0.6801866888999939, + "learning_rate": 0.0008884145376467119, + "loss": 1.4539, + "step": 4233 + }, + { + "epoch": 0.44662447257383964, + "grad_norm": 1.0844662189483643, + "learning_rate": 0.0008881677828283337, + "loss": 1.4844, + "step": 4234 + }, + { + "epoch": 0.4467299578059072, + "grad_norm": 0.7789134383201599, + "learning_rate": 0.0008879210125275664, + "loss": 1.454, + "step": 4235 + }, + { + "epoch": 0.4468354430379747, + "grad_norm": 0.7910667061805725, + "learning_rate": 0.000887674226772062, + "loss": 1.4196, + "step": 4236 + }, + { + "epoch": 0.44694092827004217, + "grad_norm": 0.8166566491127014, + "learning_rate": 0.000887427425589474, + "loss": 1.4655, + "step": 4237 + }, + { + "epoch": 0.4470464135021097, + "grad_norm": 0.6941398978233337, + "learning_rate": 0.0008871806090074577, + "loss": 1.3993, + "step": 4238 + }, + { + "epoch": 0.4471518987341772, + "grad_norm": 0.7679691910743713, + "learning_rate": 0.0008869337770536699, + "loss": 1.4344, + "step": 4239 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.667112410068512, + "learning_rate": 0.0008866869297557699, + "loss": 1.4342, + "step": 4240 + }, + { + "epoch": 0.44736286919831225, + "grad_norm": 0.6992975473403931, + "learning_rate": 0.0008864400671414177, + "loss": 1.4342, + "step": 4241 + }, + { + "epoch": 0.44746835443037974, + "grad_norm": 0.643361508846283, + "learning_rate": 0.0008861931892382756, + "loss": 1.459, + "step": 4242 + }, + { + "epoch": 0.44757383966244724, + "grad_norm": 0.7508655190467834, + "learning_rate": 0.0008859462960740076, + "loss": 1.4552, + "step": 4243 + }, + { + "epoch": 0.4476793248945148, + "grad_norm": 0.6755492687225342, + "learning_rate": 0.000885699387676279, + "loss": 1.4589, + "step": 4244 + }, + { + "epoch": 0.4477848101265823, + "grad_norm": 0.7333484292030334, + "learning_rate": 0.0008854524640727575, + "loss": 1.4951, + "step": 4245 + }, + { + "epoch": 0.44789029535864977, + "grad_norm": 0.9179319739341736, + "learning_rate": 0.0008852055252911121, + "loss": 1.4296, + "step": 4246 + }, + { + "epoch": 0.4479957805907173, + "grad_norm": 0.6872104406356812, + "learning_rate": 0.0008849585713590134, + "loss": 1.4087, + "step": 4247 + }, + { + "epoch": 0.4481012658227848, + "grad_norm": 1.2669498920440674, + "learning_rate": 0.0008847116023041336, + "loss": 1.4738, + "step": 4248 + }, + { + "epoch": 0.4482067510548523, + "grad_norm": 0.7523173689842224, + "learning_rate": 0.0008844646181541472, + "loss": 1.4765, + "step": 4249 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 1.0622243881225586, + "learning_rate": 0.0008842176189367299, + "loss": 1.4683, + "step": 4250 + }, + { + "epoch": 0.44841772151898734, + "grad_norm": 0.9707855582237244, + "learning_rate": 0.000883970604679559, + "loss": 1.453, + "step": 4251 + }, + { + "epoch": 0.44852320675105484, + "grad_norm": 1.1522397994995117, + "learning_rate": 0.0008837235754103136, + "loss": 1.4814, + "step": 4252 + }, + { + "epoch": 0.4486286919831224, + "grad_norm": 1.0732622146606445, + "learning_rate": 0.000883476531156675, + "loss": 1.4502, + "step": 4253 + }, + { + "epoch": 0.4487341772151899, + "grad_norm": 0.8129450082778931, + "learning_rate": 0.0008832294719463256, + "loss": 1.4531, + "step": 4254 + }, + { + "epoch": 0.44883966244725737, + "grad_norm": 1.1663950681686401, + "learning_rate": 0.0008829823978069494, + "loss": 1.4942, + "step": 4255 + }, + { + "epoch": 0.4489451476793249, + "grad_norm": 0.697817862033844, + "learning_rate": 0.0008827353087662326, + "loss": 1.4888, + "step": 4256 + }, + { + "epoch": 0.4490506329113924, + "grad_norm": 1.10069739818573, + "learning_rate": 0.0008824882048518622, + "loss": 1.4107, + "step": 4257 + }, + { + "epoch": 0.4491561181434599, + "grad_norm": 0.6994149088859558, + "learning_rate": 0.0008822410860915281, + "loss": 1.4527, + "step": 4258 + }, + { + "epoch": 0.44926160337552745, + "grad_norm": 0.9528466463088989, + "learning_rate": 0.0008819939525129207, + "loss": 1.4781, + "step": 4259 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.8901872634887695, + "learning_rate": 0.0008817468041437329, + "loss": 1.4265, + "step": 4260 + }, + { + "epoch": 0.44947257383966244, + "grad_norm": 0.785199761390686, + "learning_rate": 0.0008814996410116587, + "loss": 1.4677, + "step": 4261 + }, + { + "epoch": 0.44957805907173, + "grad_norm": 0.8339399099349976, + "learning_rate": 0.0008812524631443938, + "loss": 1.4683, + "step": 4262 + }, + { + "epoch": 0.4496835443037975, + "grad_norm": 0.6708350777626038, + "learning_rate": 0.0008810052705696363, + "loss": 1.411, + "step": 4263 + }, + { + "epoch": 0.44978902953586497, + "grad_norm": 0.7237518429756165, + "learning_rate": 0.0008807580633150848, + "loss": 1.4766, + "step": 4264 + }, + { + "epoch": 0.44989451476793246, + "grad_norm": 0.6150097846984863, + "learning_rate": 0.0008805108414084401, + "loss": 1.4417, + "step": 4265 + }, + { + "epoch": 0.45, + "grad_norm": 0.6822790503501892, + "learning_rate": 0.0008802636048774052, + "loss": 1.4474, + "step": 4266 + }, + { + "epoch": 0.4501054852320675, + "grad_norm": 0.7131941318511963, + "learning_rate": 0.0008800163537496837, + "loss": 1.4023, + "step": 4267 + }, + { + "epoch": 0.450210970464135, + "grad_norm": 0.6878585815429688, + "learning_rate": 0.0008797690880529813, + "loss": 1.4312, + "step": 4268 + }, + { + "epoch": 0.45031645569620254, + "grad_norm": 0.6311556696891785, + "learning_rate": 0.0008795218078150056, + "loss": 1.4331, + "step": 4269 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.6761914491653442, + "learning_rate": 0.0008792745130634654, + "loss": 1.469, + "step": 4270 + }, + { + "epoch": 0.45052742616033753, + "grad_norm": 0.6264138221740723, + "learning_rate": 0.0008790272038260715, + "loss": 1.4117, + "step": 4271 + }, + { + "epoch": 0.4506329113924051, + "grad_norm": 0.6379764676094055, + "learning_rate": 0.000878779880130536, + "loss": 1.4548, + "step": 4272 + }, + { + "epoch": 0.45073839662447257, + "grad_norm": 0.7250474095344543, + "learning_rate": 0.0008785325420045727, + "loss": 1.444, + "step": 4273 + }, + { + "epoch": 0.45084388185654006, + "grad_norm": 0.6431525349617004, + "learning_rate": 0.0008782851894758971, + "loss": 1.4419, + "step": 4274 + }, + { + "epoch": 0.4509493670886076, + "grad_norm": 0.7568891644477844, + "learning_rate": 0.0008780378225722264, + "loss": 1.4322, + "step": 4275 + }, + { + "epoch": 0.4510548523206751, + "grad_norm": 0.7094635367393494, + "learning_rate": 0.0008777904413212794, + "loss": 1.4307, + "step": 4276 + }, + { + "epoch": 0.4511603375527426, + "grad_norm": 0.6413698792457581, + "learning_rate": 0.0008775430457507759, + "loss": 1.4663, + "step": 4277 + }, + { + "epoch": 0.45126582278481014, + "grad_norm": 0.7681580185890198, + "learning_rate": 0.0008772956358884383, + "loss": 1.4341, + "step": 4278 + }, + { + "epoch": 0.45137130801687764, + "grad_norm": 0.829929769039154, + "learning_rate": 0.0008770482117619901, + "loss": 1.4344, + "step": 4279 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.6740018129348755, + "learning_rate": 0.0008768007733991561, + "loss": 1.4823, + "step": 4280 + }, + { + "epoch": 0.4515822784810127, + "grad_norm": 0.7321917414665222, + "learning_rate": 0.0008765533208276632, + "loss": 1.442, + "step": 4281 + }, + { + "epoch": 0.45168776371308017, + "grad_norm": 0.6972896456718445, + "learning_rate": 0.0008763058540752396, + "loss": 1.4901, + "step": 4282 + }, + { + "epoch": 0.45179324894514766, + "grad_norm": 0.801696240901947, + "learning_rate": 0.0008760583731696151, + "loss": 1.469, + "step": 4283 + }, + { + "epoch": 0.4518987341772152, + "grad_norm": 0.6376705765724182, + "learning_rate": 0.0008758108781385216, + "loss": 1.4621, + "step": 4284 + }, + { + "epoch": 0.4520042194092827, + "grad_norm": 0.6771137714385986, + "learning_rate": 0.0008755633690096918, + "loss": 1.4399, + "step": 4285 + }, + { + "epoch": 0.4521097046413502, + "grad_norm": 0.7251279354095459, + "learning_rate": 0.0008753158458108604, + "loss": 1.4232, + "step": 4286 + }, + { + "epoch": 0.45221518987341774, + "grad_norm": 0.7085144519805908, + "learning_rate": 0.0008750683085697632, + "loss": 1.4311, + "step": 4287 + }, + { + "epoch": 0.45232067510548524, + "grad_norm": 0.8877135515213013, + "learning_rate": 0.0008748207573141388, + "loss": 1.4805, + "step": 4288 + }, + { + "epoch": 0.45242616033755273, + "grad_norm": 1.030031442642212, + "learning_rate": 0.000874573192071726, + "loss": 1.4738, + "step": 4289 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.6127774119377136, + "learning_rate": 0.0008743256128702658, + "loss": 1.4339, + "step": 4290 + }, + { + "epoch": 0.45263713080168777, + "grad_norm": 0.8160855174064636, + "learning_rate": 0.0008740780197375007, + "loss": 1.4389, + "step": 4291 + }, + { + "epoch": 0.45274261603375526, + "grad_norm": 0.7284292578697205, + "learning_rate": 0.000873830412701175, + "loss": 1.451, + "step": 4292 + }, + { + "epoch": 0.4528481012658228, + "grad_norm": 0.7248976230621338, + "learning_rate": 0.0008735827917890339, + "loss": 1.4966, + "step": 4293 + }, + { + "epoch": 0.4529535864978903, + "grad_norm": 1.135445237159729, + "learning_rate": 0.000873335157028825, + "loss": 1.4554, + "step": 4294 + }, + { + "epoch": 0.4530590717299578, + "grad_norm": 0.8004437685012817, + "learning_rate": 0.0008730875084482964, + "loss": 1.4465, + "step": 4295 + }, + { + "epoch": 0.4531645569620253, + "grad_norm": 1.1768295764923096, + "learning_rate": 0.0008728398460751989, + "loss": 1.4608, + "step": 4296 + }, + { + "epoch": 0.45327004219409284, + "grad_norm": 1.015681266784668, + "learning_rate": 0.0008725921699372839, + "loss": 1.4389, + "step": 4297 + }, + { + "epoch": 0.45337552742616033, + "grad_norm": 0.7673236727714539, + "learning_rate": 0.0008723444800623053, + "loss": 1.4544, + "step": 4298 + }, + { + "epoch": 0.4534810126582278, + "grad_norm": 0.8682942986488342, + "learning_rate": 0.0008720967764780173, + "loss": 1.4482, + "step": 4299 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.663346529006958, + "learning_rate": 0.0008718490592121768, + "loss": 1.4503, + "step": 4300 + }, + { + "epoch": 0.45369198312236286, + "grad_norm": 0.6774370670318604, + "learning_rate": 0.0008716013282925418, + "loss": 1.4899, + "step": 4301 + }, + { + "epoch": 0.45379746835443036, + "grad_norm": 0.6913812756538391, + "learning_rate": 0.0008713535837468714, + "loss": 1.4407, + "step": 4302 + }, + { + "epoch": 0.4539029535864979, + "grad_norm": 0.734190821647644, + "learning_rate": 0.0008711058256029269, + "loss": 1.4496, + "step": 4303 + }, + { + "epoch": 0.4540084388185654, + "grad_norm": 0.6557974815368652, + "learning_rate": 0.0008708580538884707, + "loss": 1.4419, + "step": 4304 + }, + { + "epoch": 0.4541139240506329, + "grad_norm": 0.7087630033493042, + "learning_rate": 0.0008706102686312668, + "loss": 1.4, + "step": 4305 + }, + { + "epoch": 0.45421940928270044, + "grad_norm": 0.701992928981781, + "learning_rate": 0.0008703624698590811, + "loss": 1.4767, + "step": 4306 + }, + { + "epoch": 0.45432489451476793, + "grad_norm": 0.762848973274231, + "learning_rate": 0.0008701146575996804, + "loss": 1.4805, + "step": 4307 + }, + { + "epoch": 0.4544303797468354, + "grad_norm": 0.6507176160812378, + "learning_rate": 0.0008698668318808334, + "loss": 1.4402, + "step": 4308 + }, + { + "epoch": 0.45453586497890297, + "grad_norm": 0.7025113701820374, + "learning_rate": 0.0008696189927303101, + "loss": 1.4448, + "step": 4309 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.6685600280761719, + "learning_rate": 0.0008693711401758822, + "loss": 1.41, + "step": 4310 + }, + { + "epoch": 0.45474683544303796, + "grad_norm": 0.7687404155731201, + "learning_rate": 0.0008691232742453229, + "loss": 1.4583, + "step": 4311 + }, + { + "epoch": 0.4548523206751055, + "grad_norm": 0.6822163462638855, + "learning_rate": 0.0008688753949664067, + "loss": 1.4365, + "step": 4312 + }, + { + "epoch": 0.454957805907173, + "grad_norm": 0.77659010887146, + "learning_rate": 0.0008686275023669096, + "loss": 1.4527, + "step": 4313 + }, + { + "epoch": 0.4550632911392405, + "grad_norm": 0.7811279892921448, + "learning_rate": 0.0008683795964746094, + "loss": 1.4144, + "step": 4314 + }, + { + "epoch": 0.45516877637130804, + "grad_norm": 0.6948309540748596, + "learning_rate": 0.0008681316773172852, + "loss": 1.4435, + "step": 4315 + }, + { + "epoch": 0.45527426160337553, + "grad_norm": 0.8585042357444763, + "learning_rate": 0.0008678837449227174, + "loss": 1.418, + "step": 4316 + }, + { + "epoch": 0.455379746835443, + "grad_norm": 0.8249034881591797, + "learning_rate": 0.0008676357993186882, + "loss": 1.4559, + "step": 4317 + }, + { + "epoch": 0.45548523206751057, + "grad_norm": 0.7790976166725159, + "learning_rate": 0.000867387840532981, + "loss": 1.4263, + "step": 4318 + }, + { + "epoch": 0.45559071729957806, + "grad_norm": 0.7319022417068481, + "learning_rate": 0.0008671398685933811, + "loss": 1.4255, + "step": 4319 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.9387202858924866, + "learning_rate": 0.0008668918835276747, + "loss": 1.5097, + "step": 4320 + }, + { + "epoch": 0.4558016877637131, + "grad_norm": 0.6780996322631836, + "learning_rate": 0.0008666438853636499, + "loss": 1.4171, + "step": 4321 + }, + { + "epoch": 0.4559071729957806, + "grad_norm": 0.7971130013465881, + "learning_rate": 0.0008663958741290961, + "loss": 1.4261, + "step": 4322 + }, + { + "epoch": 0.4560126582278481, + "grad_norm": 0.710080087184906, + "learning_rate": 0.0008661478498518042, + "loss": 1.5067, + "step": 4323 + }, + { + "epoch": 0.45611814345991564, + "grad_norm": 0.7372080087661743, + "learning_rate": 0.0008658998125595666, + "loss": 1.4358, + "step": 4324 + }, + { + "epoch": 0.45622362869198313, + "grad_norm": 0.8894590735435486, + "learning_rate": 0.0008656517622801771, + "loss": 1.472, + "step": 4325 + }, + { + "epoch": 0.4563291139240506, + "grad_norm": 0.6724890470504761, + "learning_rate": 0.0008654036990414308, + "loss": 1.4281, + "step": 4326 + }, + { + "epoch": 0.45643459915611817, + "grad_norm": 0.9888261556625366, + "learning_rate": 0.0008651556228711247, + "loss": 1.4937, + "step": 4327 + }, + { + "epoch": 0.45654008438818566, + "grad_norm": 0.7065803408622742, + "learning_rate": 0.0008649075337970567, + "loss": 1.4564, + "step": 4328 + }, + { + "epoch": 0.45664556962025316, + "grad_norm": 0.8952890634536743, + "learning_rate": 0.0008646594318470268, + "loss": 1.4638, + "step": 4329 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.7328623533248901, + "learning_rate": 0.0008644113170488355, + "loss": 1.4269, + "step": 4330 + }, + { + "epoch": 0.4568565400843882, + "grad_norm": 0.7858170866966248, + "learning_rate": 0.0008641631894302858, + "loss": 1.441, + "step": 4331 + }, + { + "epoch": 0.4569620253164557, + "grad_norm": 0.7008242607116699, + "learning_rate": 0.0008639150490191814, + "loss": 1.4584, + "step": 4332 + }, + { + "epoch": 0.4570675105485232, + "grad_norm": 0.7113161087036133, + "learning_rate": 0.0008636668958433279, + "loss": 1.4205, + "step": 4333 + }, + { + "epoch": 0.45717299578059073, + "grad_norm": 0.6523024439811707, + "learning_rate": 0.0008634187299305318, + "loss": 1.4538, + "step": 4334 + }, + { + "epoch": 0.4572784810126582, + "grad_norm": 0.6788266897201538, + "learning_rate": 0.0008631705513086013, + "loss": 1.4334, + "step": 4335 + }, + { + "epoch": 0.4573839662447257, + "grad_norm": 0.8616634011268616, + "learning_rate": 0.0008629223600053465, + "loss": 1.4722, + "step": 4336 + }, + { + "epoch": 0.45748945147679326, + "grad_norm": 0.697888970375061, + "learning_rate": 0.000862674156048578, + "loss": 1.4324, + "step": 4337 + }, + { + "epoch": 0.45759493670886076, + "grad_norm": 0.7876095175743103, + "learning_rate": 0.0008624259394661085, + "loss": 1.4328, + "step": 4338 + }, + { + "epoch": 0.45770042194092825, + "grad_norm": 0.7521372437477112, + "learning_rate": 0.000862177710285752, + "loss": 1.4733, + "step": 4339 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.7320294976234436, + "learning_rate": 0.0008619294685353235, + "loss": 1.4355, + "step": 4340 + }, + { + "epoch": 0.4579113924050633, + "grad_norm": 0.605569064617157, + "learning_rate": 0.00086168121424264, + "loss": 1.4318, + "step": 4341 + }, + { + "epoch": 0.4580168776371308, + "grad_norm": 0.738642692565918, + "learning_rate": 0.0008614329474355196, + "loss": 1.4222, + "step": 4342 + }, + { + "epoch": 0.45812236286919833, + "grad_norm": 0.6997052431106567, + "learning_rate": 0.0008611846681417818, + "loss": 1.4098, + "step": 4343 + }, + { + "epoch": 0.4582278481012658, + "grad_norm": 0.6716024279594421, + "learning_rate": 0.0008609363763892474, + "loss": 1.4663, + "step": 4344 + }, + { + "epoch": 0.4583333333333333, + "grad_norm": 0.6828845739364624, + "learning_rate": 0.0008606880722057386, + "loss": 1.4547, + "step": 4345 + }, + { + "epoch": 0.45843881856540086, + "grad_norm": 0.76622474193573, + "learning_rate": 0.0008604397556190797, + "loss": 1.4878, + "step": 4346 + }, + { + "epoch": 0.45854430379746836, + "grad_norm": 0.9046446084976196, + "learning_rate": 0.0008601914266570956, + "loss": 1.433, + "step": 4347 + }, + { + "epoch": 0.45864978902953585, + "grad_norm": 0.6926295161247253, + "learning_rate": 0.0008599430853476126, + "loss": 1.4422, + "step": 4348 + }, + { + "epoch": 0.4587552742616034, + "grad_norm": 0.9678751230239868, + "learning_rate": 0.0008596947317184585, + "loss": 1.4612, + "step": 4349 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.8415082693099976, + "learning_rate": 0.0008594463657974627, + "loss": 1.4584, + "step": 4350 + }, + { + "epoch": 0.4589662447257384, + "grad_norm": 0.704569935798645, + "learning_rate": 0.000859197987612456, + "loss": 1.4806, + "step": 4351 + }, + { + "epoch": 0.45907172995780593, + "grad_norm": 0.7377588748931885, + "learning_rate": 0.0008589495971912703, + "loss": 1.4786, + "step": 4352 + }, + { + "epoch": 0.4591772151898734, + "grad_norm": 0.8062375783920288, + "learning_rate": 0.000858701194561739, + "loss": 1.4415, + "step": 4353 + }, + { + "epoch": 0.4592827004219409, + "grad_norm": 0.8158602714538574, + "learning_rate": 0.0008584527797516966, + "loss": 1.4766, + "step": 4354 + }, + { + "epoch": 0.45938818565400846, + "grad_norm": 0.7632367610931396, + "learning_rate": 0.0008582043527889797, + "loss": 1.4787, + "step": 4355 + }, + { + "epoch": 0.45949367088607596, + "grad_norm": 0.98036789894104, + "learning_rate": 0.0008579559137014254, + "loss": 1.4408, + "step": 4356 + }, + { + "epoch": 0.45959915611814345, + "grad_norm": 0.7095525860786438, + "learning_rate": 0.0008577074625168725, + "loss": 1.4547, + "step": 4357 + }, + { + "epoch": 0.459704641350211, + "grad_norm": 0.8393321633338928, + "learning_rate": 0.0008574589992631617, + "loss": 1.4749, + "step": 4358 + }, + { + "epoch": 0.4598101265822785, + "grad_norm": 0.8973718881607056, + "learning_rate": 0.0008572105239681338, + "loss": 1.4503, + "step": 4359 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 1.0975415706634521, + "learning_rate": 0.0008569620366596322, + "loss": 1.4421, + "step": 4360 + }, + { + "epoch": 0.46002109704641353, + "grad_norm": 0.7630864977836609, + "learning_rate": 0.0008567135373655012, + "loss": 1.4513, + "step": 4361 + }, + { + "epoch": 0.460126582278481, + "grad_norm": 0.9034954905509949, + "learning_rate": 0.0008564650261135862, + "loss": 1.4194, + "step": 4362 + }, + { + "epoch": 0.4602320675105485, + "grad_norm": 0.8934493064880371, + "learning_rate": 0.0008562165029317339, + "loss": 1.4269, + "step": 4363 + }, + { + "epoch": 0.460337552742616, + "grad_norm": 1.041664481163025, + "learning_rate": 0.0008559679678477929, + "loss": 1.454, + "step": 4364 + }, + { + "epoch": 0.46044303797468356, + "grad_norm": 0.6922926902770996, + "learning_rate": 0.0008557194208896129, + "loss": 1.462, + "step": 4365 + }, + { + "epoch": 0.46054852320675105, + "grad_norm": 0.9974015355110168, + "learning_rate": 0.0008554708620850445, + "loss": 1.4008, + "step": 4366 + }, + { + "epoch": 0.46065400843881854, + "grad_norm": 0.6401768922805786, + "learning_rate": 0.0008552222914619401, + "loss": 1.4501, + "step": 4367 + }, + { + "epoch": 0.4607594936708861, + "grad_norm": 0.8053321242332458, + "learning_rate": 0.0008549737090481532, + "loss": 1.4398, + "step": 4368 + }, + { + "epoch": 0.4608649789029536, + "grad_norm": 0.7581405639648438, + "learning_rate": 0.0008547251148715386, + "loss": 1.4474, + "step": 4369 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.7131026387214661, + "learning_rate": 0.000854476508959953, + "loss": 1.4408, + "step": 4370 + }, + { + "epoch": 0.4610759493670886, + "grad_norm": 0.8392437100410461, + "learning_rate": 0.0008542278913412535, + "loss": 1.3951, + "step": 4371 + }, + { + "epoch": 0.4611814345991561, + "grad_norm": 0.6352400183677673, + "learning_rate": 0.0008539792620432989, + "loss": 1.4586, + "step": 4372 + }, + { + "epoch": 0.4612869198312236, + "grad_norm": 0.8606449961662292, + "learning_rate": 0.0008537306210939497, + "loss": 1.4163, + "step": 4373 + }, + { + "epoch": 0.46139240506329116, + "grad_norm": 0.7301177978515625, + "learning_rate": 0.0008534819685210668, + "loss": 1.4489, + "step": 4374 + }, + { + "epoch": 0.46149789029535865, + "grad_norm": 0.6194576621055603, + "learning_rate": 0.0008532333043525136, + "loss": 1.4454, + "step": 4375 + }, + { + "epoch": 0.46160337552742614, + "grad_norm": 0.6067698001861572, + "learning_rate": 0.0008529846286161539, + "loss": 1.4609, + "step": 4376 + }, + { + "epoch": 0.4617088607594937, + "grad_norm": 0.6531108617782593, + "learning_rate": 0.000852735941339853, + "loss": 1.4151, + "step": 4377 + }, + { + "epoch": 0.4618143459915612, + "grad_norm": 0.669245719909668, + "learning_rate": 0.0008524872425514775, + "loss": 1.4519, + "step": 4378 + }, + { + "epoch": 0.4619198312236287, + "grad_norm": 0.6464606523513794, + "learning_rate": 0.0008522385322788955, + "loss": 1.4644, + "step": 4379 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.6994924545288086, + "learning_rate": 0.0008519898105499762, + "loss": 1.4519, + "step": 4380 + }, + { + "epoch": 0.4621308016877637, + "grad_norm": 0.6427987813949585, + "learning_rate": 0.00085174107739259, + "loss": 1.4799, + "step": 4381 + }, + { + "epoch": 0.4622362869198312, + "grad_norm": 0.6658562421798706, + "learning_rate": 0.000851492332834609, + "loss": 1.4274, + "step": 4382 + }, + { + "epoch": 0.46234177215189876, + "grad_norm": 0.6613098382949829, + "learning_rate": 0.0008512435769039055, + "loss": 1.4317, + "step": 4383 + }, + { + "epoch": 0.46244725738396625, + "grad_norm": 0.8461209535598755, + "learning_rate": 0.0008509948096283547, + "loss": 1.4124, + "step": 4384 + }, + { + "epoch": 0.46255274261603374, + "grad_norm": 0.6589515805244446, + "learning_rate": 0.0008507460310358319, + "loss": 1.4567, + "step": 4385 + }, + { + "epoch": 0.4626582278481013, + "grad_norm": 0.9345209002494812, + "learning_rate": 0.0008504972411542138, + "loss": 1.4495, + "step": 4386 + }, + { + "epoch": 0.4627637130801688, + "grad_norm": 0.7097235321998596, + "learning_rate": 0.0008502484400113787, + "loss": 1.4264, + "step": 4387 + }, + { + "epoch": 0.4628691983122363, + "grad_norm": 0.8448627591133118, + "learning_rate": 0.0008499996276352061, + "loss": 1.4306, + "step": 4388 + }, + { + "epoch": 0.4629746835443038, + "grad_norm": 0.7175149917602539, + "learning_rate": 0.0008497508040535766, + "loss": 1.4391, + "step": 4389 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 1.0838961601257324, + "learning_rate": 0.0008495019692943721, + "loss": 1.435, + "step": 4390 + }, + { + "epoch": 0.4631856540084388, + "grad_norm": 0.9622994661331177, + "learning_rate": 0.0008492531233854757, + "loss": 1.4406, + "step": 4391 + }, + { + "epoch": 0.46329113924050636, + "grad_norm": 0.7334150671958923, + "learning_rate": 0.0008490042663547719, + "loss": 1.4224, + "step": 4392 + }, + { + "epoch": 0.46339662447257385, + "grad_norm": 0.7419310212135315, + "learning_rate": 0.0008487553982301465, + "loss": 1.4344, + "step": 4393 + }, + { + "epoch": 0.46350210970464134, + "grad_norm": 0.6788934469223022, + "learning_rate": 0.0008485065190394863, + "loss": 1.461, + "step": 4394 + }, + { + "epoch": 0.46360759493670883, + "grad_norm": 0.8020852208137512, + "learning_rate": 0.0008482576288106794, + "loss": 1.46, + "step": 4395 + }, + { + "epoch": 0.4637130801687764, + "grad_norm": 0.6579435467720032, + "learning_rate": 0.000848008727571615, + "loss": 1.4318, + "step": 4396 + }, + { + "epoch": 0.4638185654008439, + "grad_norm": 0.7581225037574768, + "learning_rate": 0.0008477598153501842, + "loss": 1.4434, + "step": 4397 + }, + { + "epoch": 0.46392405063291137, + "grad_norm": 0.6931782364845276, + "learning_rate": 0.0008475108921742787, + "loss": 1.4768, + "step": 4398 + }, + { + "epoch": 0.4640295358649789, + "grad_norm": 0.8018944263458252, + "learning_rate": 0.0008472619580717914, + "loss": 1.4115, + "step": 4399 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.6610898971557617, + "learning_rate": 0.0008470130130706166, + "loss": 1.4851, + "step": 4400 + }, + { + "epoch": 0.4642405063291139, + "grad_norm": 0.8308686017990112, + "learning_rate": 0.00084676405719865, + "loss": 1.4259, + "step": 4401 + }, + { + "epoch": 0.46434599156118145, + "grad_norm": 0.6948236227035522, + "learning_rate": 0.0008465150904837883, + "loss": 1.4674, + "step": 4402 + }, + { + "epoch": 0.46445147679324894, + "grad_norm": 1.08034086227417, + "learning_rate": 0.0008462661129539296, + "loss": 1.4969, + "step": 4403 + }, + { + "epoch": 0.46455696202531643, + "grad_norm": 0.7507718205451965, + "learning_rate": 0.0008460171246369725, + "loss": 1.48, + "step": 4404 + }, + { + "epoch": 0.464662447257384, + "grad_norm": 1.0238730907440186, + "learning_rate": 0.000845768125560818, + "loss": 1.4476, + "step": 4405 + }, + { + "epoch": 0.4647679324894515, + "grad_norm": 0.7635399699211121, + "learning_rate": 0.0008455191157533677, + "loss": 1.4552, + "step": 4406 + }, + { + "epoch": 0.46487341772151897, + "grad_norm": 0.8367379903793335, + "learning_rate": 0.000845270095242524, + "loss": 1.4423, + "step": 4407 + }, + { + "epoch": 0.4649789029535865, + "grad_norm": 0.829502522945404, + "learning_rate": 0.0008450210640561912, + "loss": 1.4771, + "step": 4408 + }, + { + "epoch": 0.465084388185654, + "grad_norm": 0.6720311045646667, + "learning_rate": 0.000844772022222274, + "loss": 1.4528, + "step": 4409 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.8260130882263184, + "learning_rate": 0.0008445229697686795, + "loss": 1.4482, + "step": 4410 + }, + { + "epoch": 0.46529535864978905, + "grad_norm": 0.6674537062644958, + "learning_rate": 0.0008442739067233148, + "loss": 1.4443, + "step": 4411 + }, + { + "epoch": 0.46540084388185654, + "grad_norm": 0.8905089497566223, + "learning_rate": 0.0008440248331140888, + "loss": 1.4834, + "step": 4412 + }, + { + "epoch": 0.46550632911392403, + "grad_norm": 0.6441659331321716, + "learning_rate": 0.0008437757489689113, + "loss": 1.3971, + "step": 4413 + }, + { + "epoch": 0.4656118143459916, + "grad_norm": 0.932510495185852, + "learning_rate": 0.0008435266543156935, + "loss": 1.4422, + "step": 4414 + }, + { + "epoch": 0.4657172995780591, + "grad_norm": 0.6756090521812439, + "learning_rate": 0.0008432775491823477, + "loss": 1.4492, + "step": 4415 + }, + { + "epoch": 0.46582278481012657, + "grad_norm": 0.8997355699539185, + "learning_rate": 0.0008430284335967876, + "loss": 1.4742, + "step": 4416 + }, + { + "epoch": 0.4659282700421941, + "grad_norm": 0.8109990954399109, + "learning_rate": 0.0008427793075869275, + "loss": 1.4127, + "step": 4417 + }, + { + "epoch": 0.4660337552742616, + "grad_norm": 0.6880932450294495, + "learning_rate": 0.0008425301711806833, + "loss": 1.4508, + "step": 4418 + }, + { + "epoch": 0.4661392405063291, + "grad_norm": 0.6732782125473022, + "learning_rate": 0.0008422810244059721, + "loss": 1.4495, + "step": 4419 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.6863621473312378, + "learning_rate": 0.0008420318672907119, + "loss": 1.4258, + "step": 4420 + }, + { + "epoch": 0.46635021097046414, + "grad_norm": 0.645616352558136, + "learning_rate": 0.0008417826998628222, + "loss": 1.4386, + "step": 4421 + }, + { + "epoch": 0.46645569620253163, + "grad_norm": 0.713141918182373, + "learning_rate": 0.0008415335221502231, + "loss": 1.4408, + "step": 4422 + }, + { + "epoch": 0.4665611814345992, + "grad_norm": 0.7614730000495911, + "learning_rate": 0.0008412843341808365, + "loss": 1.4519, + "step": 4423 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 0.7965167164802551, + "learning_rate": 0.0008410351359825851, + "loss": 1.4653, + "step": 4424 + }, + { + "epoch": 0.46677215189873417, + "grad_norm": 0.6427363753318787, + "learning_rate": 0.0008407859275833928, + "loss": 1.4298, + "step": 4425 + }, + { + "epoch": 0.4668776371308017, + "grad_norm": 0.7099232077598572, + "learning_rate": 0.0008405367090111845, + "loss": 1.4347, + "step": 4426 + }, + { + "epoch": 0.4669831223628692, + "grad_norm": 0.6255497932434082, + "learning_rate": 0.0008402874802938866, + "loss": 1.4382, + "step": 4427 + }, + { + "epoch": 0.4670886075949367, + "grad_norm": 0.6688752174377441, + "learning_rate": 0.0008400382414594263, + "loss": 1.4526, + "step": 4428 + }, + { + "epoch": 0.4671940928270042, + "grad_norm": 0.6574814915657043, + "learning_rate": 0.000839788992535732, + "loss": 1.4723, + "step": 4429 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.7082003951072693, + "learning_rate": 0.0008395397335507334, + "loss": 1.415, + "step": 4430 + }, + { + "epoch": 0.46740506329113923, + "grad_norm": 0.6392809748649597, + "learning_rate": 0.0008392904645323612, + "loss": 1.4266, + "step": 4431 + }, + { + "epoch": 0.4675105485232067, + "grad_norm": 0.6545090675354004, + "learning_rate": 0.0008390411855085473, + "loss": 1.4678, + "step": 4432 + }, + { + "epoch": 0.4676160337552743, + "grad_norm": 0.6773849725723267, + "learning_rate": 0.0008387918965072244, + "loss": 1.4375, + "step": 4433 + }, + { + "epoch": 0.46772151898734177, + "grad_norm": 0.9438024759292603, + "learning_rate": 0.0008385425975563269, + "loss": 1.4661, + "step": 4434 + }, + { + "epoch": 0.46782700421940926, + "grad_norm": 0.8544812202453613, + "learning_rate": 0.0008382932886837897, + "loss": 1.4074, + "step": 4435 + }, + { + "epoch": 0.4679324894514768, + "grad_norm": 0.6282941699028015, + "learning_rate": 0.0008380439699175493, + "loss": 1.4067, + "step": 4436 + }, + { + "epoch": 0.4680379746835443, + "grad_norm": 0.7334436774253845, + "learning_rate": 0.000837794641285543, + "loss": 1.4409, + "step": 4437 + }, + { + "epoch": 0.4681434599156118, + "grad_norm": 0.6281618475914001, + "learning_rate": 0.0008375453028157093, + "loss": 1.4474, + "step": 4438 + }, + { + "epoch": 0.46824894514767934, + "grad_norm": 0.7249337434768677, + "learning_rate": 0.000837295954535988, + "loss": 1.4216, + "step": 4439 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.667786717414856, + "learning_rate": 0.0008370465964743196, + "loss": 1.468, + "step": 4440 + }, + { + "epoch": 0.4684599156118143, + "grad_norm": 0.7536304593086243, + "learning_rate": 0.0008367972286586461, + "loss": 1.4303, + "step": 4441 + }, + { + "epoch": 0.4685654008438819, + "grad_norm": 0.8026655912399292, + "learning_rate": 0.0008365478511169103, + "loss": 1.4723, + "step": 4442 + }, + { + "epoch": 0.46867088607594937, + "grad_norm": 0.7477960586547852, + "learning_rate": 0.000836298463877056, + "loss": 1.4425, + "step": 4443 + }, + { + "epoch": 0.46877637130801686, + "grad_norm": 1.1167923212051392, + "learning_rate": 0.0008360490669670288, + "loss": 1.4289, + "step": 4444 + }, + { + "epoch": 0.4688818565400844, + "grad_norm": 0.6792464256286621, + "learning_rate": 0.0008357996604147744, + "loss": 1.4739, + "step": 4445 + }, + { + "epoch": 0.4689873417721519, + "grad_norm": 0.9664304852485657, + "learning_rate": 0.0008355502442482403, + "loss": 1.4207, + "step": 4446 + }, + { + "epoch": 0.4690928270042194, + "grad_norm": 0.6982495188713074, + "learning_rate": 0.0008353008184953748, + "loss": 1.456, + "step": 4447 + }, + { + "epoch": 0.46919831223628694, + "grad_norm": 0.9191277027130127, + "learning_rate": 0.0008350513831841271, + "loss": 1.4758, + "step": 4448 + }, + { + "epoch": 0.46930379746835443, + "grad_norm": 0.726590633392334, + "learning_rate": 0.0008348019383424479, + "loss": 1.4492, + "step": 4449 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.9087488055229187, + "learning_rate": 0.0008345524839982886, + "loss": 1.4225, + "step": 4450 + }, + { + "epoch": 0.4695147679324895, + "grad_norm": 0.7870250940322876, + "learning_rate": 0.000834303020179602, + "loss": 1.4385, + "step": 4451 + }, + { + "epoch": 0.46962025316455697, + "grad_norm": 0.8045299053192139, + "learning_rate": 0.0008340535469143414, + "loss": 1.4154, + "step": 4452 + }, + { + "epoch": 0.46972573839662446, + "grad_norm": 0.7158856987953186, + "learning_rate": 0.0008338040642304618, + "loss": 1.4596, + "step": 4453 + }, + { + "epoch": 0.469831223628692, + "grad_norm": 1.1726213693618774, + "learning_rate": 0.0008335545721559188, + "loss": 1.4011, + "step": 4454 + }, + { + "epoch": 0.4699367088607595, + "grad_norm": 0.7834528684616089, + "learning_rate": 0.0008333050707186696, + "loss": 1.4273, + "step": 4455 + }, + { + "epoch": 0.470042194092827, + "grad_norm": 0.9361106753349304, + "learning_rate": 0.0008330555599466716, + "loss": 1.4351, + "step": 4456 + }, + { + "epoch": 0.47014767932489454, + "grad_norm": 0.7533807754516602, + "learning_rate": 0.000832806039867884, + "loss": 1.4232, + "step": 4457 + }, + { + "epoch": 0.47025316455696203, + "grad_norm": 0.8598374128341675, + "learning_rate": 0.000832556510510267, + "loss": 1.4192, + "step": 4458 + }, + { + "epoch": 0.4703586497890295, + "grad_norm": 0.8534011244773865, + "learning_rate": 0.0008323069719017812, + "loss": 1.4554, + "step": 4459 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.8163793087005615, + "learning_rate": 0.0008320574240703886, + "loss": 1.4803, + "step": 4460 + }, + { + "epoch": 0.47056962025316457, + "grad_norm": 1.1160898208618164, + "learning_rate": 0.0008318078670440525, + "loss": 1.4466, + "step": 4461 + }, + { + "epoch": 0.47067510548523206, + "grad_norm": 0.7210294008255005, + "learning_rate": 0.0008315583008507372, + "loss": 1.4379, + "step": 4462 + }, + { + "epoch": 0.47078059071729955, + "grad_norm": 1.3034826517105103, + "learning_rate": 0.0008313087255184074, + "loss": 1.4321, + "step": 4463 + }, + { + "epoch": 0.4708860759493671, + "grad_norm": 0.7311270833015442, + "learning_rate": 0.0008310591410750295, + "loss": 1.4332, + "step": 4464 + }, + { + "epoch": 0.4709915611814346, + "grad_norm": 1.1024552583694458, + "learning_rate": 0.0008308095475485706, + "loss": 1.4201, + "step": 4465 + }, + { + "epoch": 0.4710970464135021, + "grad_norm": 0.9751746654510498, + "learning_rate": 0.0008305599449669989, + "loss": 1.457, + "step": 4466 + }, + { + "epoch": 0.47120253164556963, + "grad_norm": 1.2343671321868896, + "learning_rate": 0.0008303103333582839, + "loss": 1.4278, + "step": 4467 + }, + { + "epoch": 0.4713080168776371, + "grad_norm": 1.0159926414489746, + "learning_rate": 0.0008300607127503952, + "loss": 1.4263, + "step": 4468 + }, + { + "epoch": 0.4714135021097046, + "grad_norm": 0.9928357601165771, + "learning_rate": 0.0008298110831713047, + "loss": 1.4795, + "step": 4469 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.9737125039100647, + "learning_rate": 0.0008295614446489842, + "loss": 1.4693, + "step": 4470 + }, + { + "epoch": 0.47162447257383966, + "grad_norm": 1.0530657768249512, + "learning_rate": 0.0008293117972114074, + "loss": 1.4287, + "step": 4471 + }, + { + "epoch": 0.47172995780590715, + "grad_norm": 0.712058424949646, + "learning_rate": 0.0008290621408865481, + "loss": 1.4261, + "step": 4472 + }, + { + "epoch": 0.4718354430379747, + "grad_norm": 0.753962516784668, + "learning_rate": 0.0008288124757023816, + "loss": 1.4562, + "step": 4473 + }, + { + "epoch": 0.4719409282700422, + "grad_norm": 0.8168962001800537, + "learning_rate": 0.0008285628016868841, + "loss": 1.4731, + "step": 4474 + }, + { + "epoch": 0.4720464135021097, + "grad_norm": 0.9812271595001221, + "learning_rate": 0.0008283131188680332, + "loss": 1.4777, + "step": 4475 + }, + { + "epoch": 0.47215189873417723, + "grad_norm": 0.6807776093482971, + "learning_rate": 0.0008280634272738066, + "loss": 1.4126, + "step": 4476 + }, + { + "epoch": 0.4722573839662447, + "grad_norm": 0.9044895172119141, + "learning_rate": 0.0008278137269321837, + "loss": 1.4382, + "step": 4477 + }, + { + "epoch": 0.4723628691983122, + "grad_norm": 0.8391577005386353, + "learning_rate": 0.0008275640178711447, + "loss": 1.4612, + "step": 4478 + }, + { + "epoch": 0.47246835443037977, + "grad_norm": 1.2328282594680786, + "learning_rate": 0.0008273143001186709, + "loss": 1.4452, + "step": 4479 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.8228799700737, + "learning_rate": 0.0008270645737027441, + "loss": 1.4436, + "step": 4480 + }, + { + "epoch": 0.47267932489451475, + "grad_norm": 1.0879409313201904, + "learning_rate": 0.0008268148386513475, + "loss": 1.4147, + "step": 4481 + }, + { + "epoch": 0.4727848101265823, + "grad_norm": 0.724404513835907, + "learning_rate": 0.0008265650949924652, + "loss": 1.4275, + "step": 4482 + }, + { + "epoch": 0.4728902953586498, + "grad_norm": 0.8542022705078125, + "learning_rate": 0.0008263153427540825, + "loss": 1.4202, + "step": 4483 + }, + { + "epoch": 0.4729957805907173, + "grad_norm": 0.8983049392700195, + "learning_rate": 0.0008260655819641849, + "loss": 1.4239, + "step": 4484 + }, + { + "epoch": 0.47310126582278483, + "grad_norm": 0.9991790652275085, + "learning_rate": 0.0008258158126507594, + "loss": 1.4486, + "step": 4485 + }, + { + "epoch": 0.4732067510548523, + "grad_norm": 0.797103762626648, + "learning_rate": 0.0008255660348417944, + "loss": 1.4351, + "step": 4486 + }, + { + "epoch": 0.4733122362869198, + "grad_norm": 1.134954810142517, + "learning_rate": 0.0008253162485652779, + "loss": 1.4269, + "step": 4487 + }, + { + "epoch": 0.47341772151898737, + "grad_norm": 0.9665555953979492, + "learning_rate": 0.0008250664538492006, + "loss": 1.454, + "step": 4488 + }, + { + "epoch": 0.47352320675105486, + "grad_norm": Infinity, + "learning_rate": 0.0008250664538492006, + "loss": 1.4411, + "step": 4489 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 1.6289870738983154, + "learning_rate": 0.0008248166507215526, + "loss": 1.485, + "step": 4490 + }, + { + "epoch": 0.4737341772151899, + "grad_norm": 1.457502841949463, + "learning_rate": 0.0008245668392103259, + "loss": 1.4393, + "step": 4491 + }, + { + "epoch": 0.4738396624472574, + "grad_norm": 0.8639112710952759, + "learning_rate": 0.000824317019343513, + "loss": 1.4509, + "step": 4492 + }, + { + "epoch": 0.4739451476793249, + "grad_norm": 1.2777045965194702, + "learning_rate": 0.0008240671911491077, + "loss": 1.4229, + "step": 4493 + }, + { + "epoch": 0.4740506329113924, + "grad_norm": 0.6500400304794312, + "learning_rate": 0.000823817354655104, + "loss": 1.4209, + "step": 4494 + }, + { + "epoch": 0.4741561181434599, + "grad_norm": 1.435761570930481, + "learning_rate": 0.0008235675098894979, + "loss": 1.4716, + "step": 4495 + }, + { + "epoch": 0.4742616033755274, + "grad_norm": 0.8944509029388428, + "learning_rate": 0.0008233176568802851, + "loss": 1.4299, + "step": 4496 + }, + { + "epoch": 0.4743670886075949, + "grad_norm": 0.9180499911308289, + "learning_rate": 0.0008230677956554637, + "loss": 1.4125, + "step": 4497 + }, + { + "epoch": 0.47447257383966246, + "grad_norm": 0.9473007917404175, + "learning_rate": 0.0008228179262430313, + "loss": 1.4452, + "step": 4498 + }, + { + "epoch": 0.47457805907172995, + "grad_norm": 0.6266997456550598, + "learning_rate": 0.0008225680486709871, + "loss": 1.4377, + "step": 4499 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 1.1750192642211914, + "learning_rate": 0.0008223181629673312, + "loss": 1.4359, + "step": 4500 + }, + { + "epoch": 0.474789029535865, + "grad_norm": 0.6906416416168213, + "learning_rate": 0.0008220682691600645, + "loss": 1.4572, + "step": 4501 + }, + { + "epoch": 0.4748945147679325, + "grad_norm": 1.155094861984253, + "learning_rate": 0.0008218183672771889, + "loss": 1.4671, + "step": 4502 + }, + { + "epoch": 0.475, + "grad_norm": 0.792525589466095, + "learning_rate": 0.0008215684573467071, + "loss": 1.4351, + "step": 4503 + }, + { + "epoch": 0.4751054852320675, + "grad_norm": 0.9314962029457092, + "learning_rate": 0.0008213185393966229, + "loss": 1.4464, + "step": 4504 + }, + { + "epoch": 0.475210970464135, + "grad_norm": 0.9046427607536316, + "learning_rate": 0.0008210686134549406, + "loss": 1.4342, + "step": 4505 + }, + { + "epoch": 0.4753164556962025, + "grad_norm": 1.1076122522354126, + "learning_rate": 0.0008208186795496657, + "loss": 1.4369, + "step": 4506 + }, + { + "epoch": 0.47542194092827006, + "grad_norm": 0.8312665820121765, + "learning_rate": 0.0008205687377088048, + "loss": 1.4638, + "step": 4507 + }, + { + "epoch": 0.47552742616033755, + "grad_norm": 0.941737174987793, + "learning_rate": 0.000820318787960365, + "loss": 1.4209, + "step": 4508 + }, + { + "epoch": 0.47563291139240504, + "grad_norm": 0.8747421503067017, + "learning_rate": 0.0008200688303323542, + "loss": 1.4377, + "step": 4509 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.9611195921897888, + "learning_rate": 0.0008198188648527818, + "loss": 1.4032, + "step": 4510 + }, + { + "epoch": 0.4758438818565401, + "grad_norm": 0.8277146816253662, + "learning_rate": 0.0008195688915496571, + "loss": 1.4229, + "step": 4511 + }, + { + "epoch": 0.4759493670886076, + "grad_norm": 0.8024994730949402, + "learning_rate": 0.0008193189104509915, + "loss": 1.4192, + "step": 4512 + }, + { + "epoch": 0.4760548523206751, + "grad_norm": 0.8625160455703735, + "learning_rate": 0.0008190689215847963, + "loss": 1.4408, + "step": 4513 + }, + { + "epoch": 0.4761603375527426, + "grad_norm": 0.6985374093055725, + "learning_rate": 0.0008188189249790838, + "loss": 1.4424, + "step": 4514 + }, + { + "epoch": 0.4762658227848101, + "grad_norm": 0.9923935532569885, + "learning_rate": 0.0008185689206618677, + "loss": 1.4516, + "step": 4515 + }, + { + "epoch": 0.47637130801687766, + "grad_norm": 0.8337643146514893, + "learning_rate": 0.0008183189086611623, + "loss": 1.4229, + "step": 4516 + }, + { + "epoch": 0.47647679324894515, + "grad_norm": 0.8121418952941895, + "learning_rate": 0.0008180688890049823, + "loss": 1.4141, + "step": 4517 + }, + { + "epoch": 0.47658227848101264, + "grad_norm": 0.6960178017616272, + "learning_rate": 0.000817818861721344, + "loss": 1.3908, + "step": 4518 + }, + { + "epoch": 0.4766877637130802, + "grad_norm": 0.9720804691314697, + "learning_rate": 0.0008175688268382639, + "loss": 1.39, + "step": 4519 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.7609580159187317, + "learning_rate": 0.00081731878438376, + "loss": 1.4114, + "step": 4520 + }, + { + "epoch": 0.4768987341772152, + "grad_norm": 0.7522091865539551, + "learning_rate": 0.0008170687343858506, + "loss": 1.4598, + "step": 4521 + }, + { + "epoch": 0.4770042194092827, + "grad_norm": 0.6813508868217468, + "learning_rate": 0.000816818676872555, + "loss": 1.4654, + "step": 4522 + }, + { + "epoch": 0.4771097046413502, + "grad_norm": 1.0248245000839233, + "learning_rate": 0.0008165686118718935, + "loss": 1.4387, + "step": 4523 + }, + { + "epoch": 0.4772151898734177, + "grad_norm": 0.6347531080245972, + "learning_rate": 0.000816318539411887, + "loss": 1.4361, + "step": 4524 + }, + { + "epoch": 0.47732067510548526, + "grad_norm": 1.1516518592834473, + "learning_rate": 0.0008160684595205577, + "loss": 1.4126, + "step": 4525 + }, + { + "epoch": 0.47742616033755275, + "grad_norm": 0.6943604946136475, + "learning_rate": 0.000815818372225928, + "loss": 1.4655, + "step": 4526 + }, + { + "epoch": 0.47753164556962024, + "grad_norm": 1.255549669265747, + "learning_rate": 0.0008155682775560215, + "loss": 1.4095, + "step": 4527 + }, + { + "epoch": 0.47763713080168774, + "grad_norm": 0.7061770558357239, + "learning_rate": 0.0008153181755388624, + "loss": 1.438, + "step": 4528 + }, + { + "epoch": 0.4777426160337553, + "grad_norm": 0.740763783454895, + "learning_rate": 0.0008150680662024761, + "loss": 1.4265, + "step": 4529 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.9896188974380493, + "learning_rate": 0.0008148179495748885, + "loss": 1.4514, + "step": 4530 + }, + { + "epoch": 0.47795358649789027, + "grad_norm": 0.807020366191864, + "learning_rate": 0.0008145678256841265, + "loss": 1.4224, + "step": 4531 + }, + { + "epoch": 0.4780590717299578, + "grad_norm": 0.8448647260665894, + "learning_rate": 0.0008143176945582175, + "loss": 1.431, + "step": 4532 + }, + { + "epoch": 0.4781645569620253, + "grad_norm": 0.6703131198883057, + "learning_rate": 0.0008140675562251904, + "loss": 1.471, + "step": 4533 + }, + { + "epoch": 0.4782700421940928, + "grad_norm": 0.6671913266181946, + "learning_rate": 0.0008138174107130739, + "loss": 1.4602, + "step": 4534 + }, + { + "epoch": 0.47837552742616035, + "grad_norm": 0.7782979011535645, + "learning_rate": 0.0008135672580498984, + "loss": 1.4159, + "step": 4535 + }, + { + "epoch": 0.47848101265822784, + "grad_norm": 0.6666164398193359, + "learning_rate": 0.0008133170982636946, + "loss": 1.4318, + "step": 4536 + }, + { + "epoch": 0.47858649789029534, + "grad_norm": 0.7531194090843201, + "learning_rate": 0.0008130669313824944, + "loss": 1.4405, + "step": 4537 + }, + { + "epoch": 0.4786919831223629, + "grad_norm": 0.689154326915741, + "learning_rate": 0.0008128167574343299, + "loss": 1.4456, + "step": 4538 + }, + { + "epoch": 0.4787974683544304, + "grad_norm": 0.6728829145431519, + "learning_rate": 0.0008125665764472345, + "loss": 1.4395, + "step": 4539 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.7019101977348328, + "learning_rate": 0.0008123163884492422, + "loss": 1.4595, + "step": 4540 + }, + { + "epoch": 0.4790084388185654, + "grad_norm": 0.9549896717071533, + "learning_rate": 0.0008120661934683879, + "loss": 1.4121, + "step": 4541 + }, + { + "epoch": 0.4791139240506329, + "grad_norm": 0.6464274525642395, + "learning_rate": 0.0008118159915327072, + "loss": 1.4128, + "step": 4542 + }, + { + "epoch": 0.4792194092827004, + "grad_norm": 0.8776606321334839, + "learning_rate": 0.0008115657826702364, + "loss": 1.4549, + "step": 4543 + }, + { + "epoch": 0.47932489451476795, + "grad_norm": 0.6135542392730713, + "learning_rate": 0.0008113155669090124, + "loss": 1.4463, + "step": 4544 + }, + { + "epoch": 0.47943037974683544, + "grad_norm": 0.9547217488288879, + "learning_rate": 0.0008110653442770736, + "loss": 1.446, + "step": 4545 + }, + { + "epoch": 0.47953586497890294, + "grad_norm": 0.6425114870071411, + "learning_rate": 0.0008108151148024584, + "loss": 1.4391, + "step": 4546 + }, + { + "epoch": 0.4796413502109705, + "grad_norm": 0.8506060838699341, + "learning_rate": 0.0008105648785132065, + "loss": 1.4464, + "step": 4547 + }, + { + "epoch": 0.479746835443038, + "grad_norm": 0.662864089012146, + "learning_rate": 0.0008103146354373577, + "loss": 1.4618, + "step": 4548 + }, + { + "epoch": 0.47985232067510547, + "grad_norm": 1.0700041055679321, + "learning_rate": 0.0008100643856029534, + "loss": 1.4596, + "step": 4549 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.743222713470459, + "learning_rate": 0.0008098141290380353, + "loss": 1.4022, + "step": 4550 + }, + { + "epoch": 0.4800632911392405, + "grad_norm": 0.914020836353302, + "learning_rate": 0.0008095638657706456, + "loss": 1.4196, + "step": 4551 + }, + { + "epoch": 0.480168776371308, + "grad_norm": 0.7148547768592834, + "learning_rate": 0.0008093135958288278, + "loss": 1.4198, + "step": 4552 + }, + { + "epoch": 0.48027426160337555, + "grad_norm": 0.8262978196144104, + "learning_rate": 0.0008090633192406256, + "loss": 1.4048, + "step": 4553 + }, + { + "epoch": 0.48037974683544304, + "grad_norm": 0.6972702741622925, + "learning_rate": 0.0008088130360340843, + "loss": 1.4425, + "step": 4554 + }, + { + "epoch": 0.48048523206751054, + "grad_norm": 0.8094848394393921, + "learning_rate": 0.0008085627462372489, + "loss": 1.4007, + "step": 4555 + }, + { + "epoch": 0.4805907172995781, + "grad_norm": 0.8025990724563599, + "learning_rate": 0.0008083124498781658, + "loss": 1.4529, + "step": 4556 + }, + { + "epoch": 0.4806962025316456, + "grad_norm": 0.6986115574836731, + "learning_rate": 0.0008080621469848817, + "loss": 1.4266, + "step": 4557 + }, + { + "epoch": 0.48080168776371307, + "grad_norm": 0.6725935935974121, + "learning_rate": 0.0008078118375854449, + "loss": 1.4384, + "step": 4558 + }, + { + "epoch": 0.48090717299578056, + "grad_norm": 0.9526024460792542, + "learning_rate": 0.000807561521707903, + "loss": 1.4473, + "step": 4559 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.7435904741287231, + "learning_rate": 0.000807311199380306, + "loss": 1.4415, + "step": 4560 + }, + { + "epoch": 0.4811181434599156, + "grad_norm": 0.937390148639679, + "learning_rate": 0.000807060870630703, + "loss": 1.3922, + "step": 4561 + }, + { + "epoch": 0.4812236286919831, + "grad_norm": 0.895479142665863, + "learning_rate": 0.0008068105354871449, + "loss": 1.3987, + "step": 4562 + }, + { + "epoch": 0.48132911392405064, + "grad_norm": 0.7209329605102539, + "learning_rate": 0.0008065601939776833, + "loss": 1.4084, + "step": 4563 + }, + { + "epoch": 0.48143459915611814, + "grad_norm": 0.7759113907814026, + "learning_rate": 0.0008063098461303698, + "loss": 1.4305, + "step": 4564 + }, + { + "epoch": 0.48154008438818563, + "grad_norm": 0.7453292012214661, + "learning_rate": 0.0008060594919732572, + "loss": 1.4038, + "step": 4565 + }, + { + "epoch": 0.4816455696202532, + "grad_norm": 0.7481597661972046, + "learning_rate": 0.0008058091315343988, + "loss": 1.4506, + "step": 4566 + }, + { + "epoch": 0.48175105485232067, + "grad_norm": 0.690994381904602, + "learning_rate": 0.0008055587648418492, + "loss": 1.409, + "step": 4567 + }, + { + "epoch": 0.48185654008438816, + "grad_norm": 0.6667223572731018, + "learning_rate": 0.000805308391923663, + "loss": 1.419, + "step": 4568 + }, + { + "epoch": 0.4819620253164557, + "grad_norm": 0.836010754108429, + "learning_rate": 0.0008050580128078957, + "loss": 1.3874, + "step": 4569 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.7562402486801147, + "learning_rate": 0.0008048076275226032, + "loss": 1.4198, + "step": 4570 + }, + { + "epoch": 0.4821729957805907, + "grad_norm": 0.8154714703559875, + "learning_rate": 0.000804557236095843, + "loss": 1.413, + "step": 4571 + }, + { + "epoch": 0.48227848101265824, + "grad_norm": 0.7081707715988159, + "learning_rate": 0.0008043068385556725, + "loss": 1.3973, + "step": 4572 + }, + { + "epoch": 0.48238396624472574, + "grad_norm": 0.9025362730026245, + "learning_rate": 0.0008040564349301498, + "loss": 1.404, + "step": 4573 + }, + { + "epoch": 0.48248945147679323, + "grad_norm": 0.8409237265586853, + "learning_rate": 0.0008038060252473339, + "loss": 1.4881, + "step": 4574 + }, + { + "epoch": 0.4825949367088608, + "grad_norm": 0.7266778945922852, + "learning_rate": 0.0008035556095352847, + "loss": 1.4118, + "step": 4575 + }, + { + "epoch": 0.48270042194092827, + "grad_norm": 0.8457436561584473, + "learning_rate": 0.0008033051878220624, + "loss": 1.4701, + "step": 4576 + }, + { + "epoch": 0.48280590717299576, + "grad_norm": 0.7232792973518372, + "learning_rate": 0.0008030547601357281, + "loss": 1.4041, + "step": 4577 + }, + { + "epoch": 0.4829113924050633, + "grad_norm": 0.8824338316917419, + "learning_rate": 0.0008028043265043434, + "loss": 1.4357, + "step": 4578 + }, + { + "epoch": 0.4830168776371308, + "grad_norm": 0.7419396042823792, + "learning_rate": 0.0008025538869559703, + "loss": 1.4711, + "step": 4579 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.8683390617370605, + "learning_rate": 0.0008023034415186725, + "loss": 1.4609, + "step": 4580 + }, + { + "epoch": 0.48322784810126584, + "grad_norm": 0.6923568844795227, + "learning_rate": 0.0008020529902205129, + "loss": 1.3809, + "step": 4581 + }, + { + "epoch": 0.48333333333333334, + "grad_norm": 0.7637879848480225, + "learning_rate": 0.0008018025330895566, + "loss": 1.4104, + "step": 4582 + }, + { + "epoch": 0.48343881856540083, + "grad_norm": 0.6778084635734558, + "learning_rate": 0.0008015520701538677, + "loss": 1.4204, + "step": 4583 + }, + { + "epoch": 0.4835443037974684, + "grad_norm": 0.723313570022583, + "learning_rate": 0.0008013016014415126, + "loss": 1.455, + "step": 4584 + }, + { + "epoch": 0.48364978902953587, + "grad_norm": 0.7020635604858398, + "learning_rate": 0.0008010511269805571, + "loss": 1.4393, + "step": 4585 + }, + { + "epoch": 0.48375527426160336, + "grad_norm": 0.7499696612358093, + "learning_rate": 0.0008008006467990684, + "loss": 1.424, + "step": 4586 + }, + { + "epoch": 0.4838607594936709, + "grad_norm": 0.7446900010108948, + "learning_rate": 0.0008005501609251136, + "loss": 1.4439, + "step": 4587 + }, + { + "epoch": 0.4839662447257384, + "grad_norm": 0.7448982000350952, + "learning_rate": 0.0008002996693867615, + "loss": 1.3842, + "step": 4588 + }, + { + "epoch": 0.4840717299578059, + "grad_norm": 0.7199206948280334, + "learning_rate": 0.0008000491722120806, + "loss": 1.4176, + "step": 4589 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.7855405211448669, + "learning_rate": 0.0007997986694291404, + "loss": 1.4236, + "step": 4590 + }, + { + "epoch": 0.48428270042194094, + "grad_norm": 0.6804451942443848, + "learning_rate": 0.0007995481610660108, + "loss": 1.4433, + "step": 4591 + }, + { + "epoch": 0.48438818565400843, + "grad_norm": 0.6399988532066345, + "learning_rate": 0.0007992976471507628, + "loss": 1.4263, + "step": 4592 + }, + { + "epoch": 0.4844936708860759, + "grad_norm": 0.7028741240501404, + "learning_rate": 0.0007990471277114676, + "loss": 1.4266, + "step": 4593 + }, + { + "epoch": 0.48459915611814347, + "grad_norm": 0.6612092852592468, + "learning_rate": 0.0007987966027761972, + "loss": 1.4017, + "step": 4594 + }, + { + "epoch": 0.48470464135021096, + "grad_norm": 0.6610286831855774, + "learning_rate": 0.0007985460723730242, + "loss": 1.4545, + "step": 4595 + }, + { + "epoch": 0.48481012658227846, + "grad_norm": 0.6986663937568665, + "learning_rate": 0.0007982955365300214, + "loss": 1.4511, + "step": 4596 + }, + { + "epoch": 0.484915611814346, + "grad_norm": 0.8270006775856018, + "learning_rate": 0.0007980449952752633, + "loss": 1.4151, + "step": 4597 + }, + { + "epoch": 0.4850210970464135, + "grad_norm": 0.7281879782676697, + "learning_rate": 0.0007977944486368237, + "loss": 1.4453, + "step": 4598 + }, + { + "epoch": 0.485126582278481, + "grad_norm": 0.9818108081817627, + "learning_rate": 0.0007975438966427778, + "loss": 1.4875, + "step": 4599 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.6565176844596863, + "learning_rate": 0.0007972933393212012, + "loss": 1.4531, + "step": 4600 + }, + { + "epoch": 0.48533755274261603, + "grad_norm": 1.3365147113800049, + "learning_rate": 0.0007970427767001702, + "loss": 1.4746, + "step": 4601 + }, + { + "epoch": 0.4854430379746835, + "grad_norm": 0.664700448513031, + "learning_rate": 0.0007967922088077615, + "loss": 1.4451, + "step": 4602 + }, + { + "epoch": 0.48554852320675107, + "grad_norm": 1.2235504388809204, + "learning_rate": 0.0007965416356720524, + "loss": 1.3961, + "step": 4603 + }, + { + "epoch": 0.48565400843881856, + "grad_norm": 0.8496304154396057, + "learning_rate": 0.000796291057321121, + "loss": 1.4308, + "step": 4604 + }, + { + "epoch": 0.48575949367088606, + "grad_norm": 1.1878173351287842, + "learning_rate": 0.0007960404737830457, + "loss": 1.458, + "step": 4605 + }, + { + "epoch": 0.4858649789029536, + "grad_norm": 0.8845499753952026, + "learning_rate": 0.0007957898850859058, + "loss": 1.4621, + "step": 4606 + }, + { + "epoch": 0.4859704641350211, + "grad_norm": 0.766895592212677, + "learning_rate": 0.000795539291257781, + "loss": 1.4365, + "step": 4607 + }, + { + "epoch": 0.4860759493670886, + "grad_norm": 1.0140380859375, + "learning_rate": 0.0007952886923267516, + "loss": 1.4322, + "step": 4608 + }, + { + "epoch": 0.48618143459915614, + "grad_norm": 0.6410446763038635, + "learning_rate": 0.0007950380883208981, + "loss": 1.4355, + "step": 4609 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 1.1449785232543945, + "learning_rate": 0.0007947874792683025, + "loss": 1.4683, + "step": 4610 + }, + { + "epoch": 0.4863924050632911, + "grad_norm": 0.7138764262199402, + "learning_rate": 0.0007945368651970464, + "loss": 1.4046, + "step": 4611 + }, + { + "epoch": 0.48649789029535867, + "grad_norm": 0.8452233672142029, + "learning_rate": 0.0007942862461352125, + "loss": 1.4218, + "step": 4612 + }, + { + "epoch": 0.48660337552742616, + "grad_norm": 0.7582920789718628, + "learning_rate": 0.0007940356221108837, + "loss": 1.4029, + "step": 4613 + }, + { + "epoch": 0.48670886075949366, + "grad_norm": 0.7891407012939453, + "learning_rate": 0.0007937849931521441, + "loss": 1.4135, + "step": 4614 + }, + { + "epoch": 0.4868143459915612, + "grad_norm": 0.6842870712280273, + "learning_rate": 0.0007935343592870778, + "loss": 1.464, + "step": 4615 + }, + { + "epoch": 0.4869198312236287, + "grad_norm": 0.7821921110153198, + "learning_rate": 0.0007932837205437692, + "loss": 1.4436, + "step": 4616 + }, + { + "epoch": 0.4870253164556962, + "grad_norm": 0.7139105796813965, + "learning_rate": 0.000793033076950304, + "loss": 1.4492, + "step": 4617 + }, + { + "epoch": 0.48713080168776374, + "grad_norm": 0.7559991478919983, + "learning_rate": 0.0007927824285347678, + "loss": 1.3851, + "step": 4618 + }, + { + "epoch": 0.48723628691983123, + "grad_norm": 0.758359968662262, + "learning_rate": 0.0007925317753252473, + "loss": 1.3624, + "step": 4619 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.7745559811592102, + "learning_rate": 0.0007922811173498293, + "loss": 1.4263, + "step": 4620 + }, + { + "epoch": 0.48744725738396627, + "grad_norm": 0.6942875385284424, + "learning_rate": 0.0007920304546366013, + "loss": 1.451, + "step": 4621 + }, + { + "epoch": 0.48755274261603376, + "grad_norm": 0.668445348739624, + "learning_rate": 0.0007917797872136511, + "loss": 1.3976, + "step": 4622 + }, + { + "epoch": 0.48765822784810126, + "grad_norm": 0.8516966104507446, + "learning_rate": 0.0007915291151090676, + "loss": 1.4163, + "step": 4623 + }, + { + "epoch": 0.4877637130801688, + "grad_norm": 0.7396148443222046, + "learning_rate": 0.0007912784383509396, + "loss": 1.424, + "step": 4624 + }, + { + "epoch": 0.4878691983122363, + "grad_norm": 0.7385764122009277, + "learning_rate": 0.0007910277569673568, + "loss": 1.4316, + "step": 4625 + }, + { + "epoch": 0.4879746835443038, + "grad_norm": 0.6686953902244568, + "learning_rate": 0.000790777070986409, + "loss": 1.4652, + "step": 4626 + }, + { + "epoch": 0.4880801687763713, + "grad_norm": 0.8494482040405273, + "learning_rate": 0.0007905263804361873, + "loss": 1.4649, + "step": 4627 + }, + { + "epoch": 0.48818565400843883, + "grad_norm": 0.6437564492225647, + "learning_rate": 0.0007902756853447824, + "loss": 1.4283, + "step": 4628 + }, + { + "epoch": 0.4882911392405063, + "grad_norm": 0.8176875114440918, + "learning_rate": 0.0007900249857402863, + "loss": 1.414, + "step": 4629 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.7398815155029297, + "learning_rate": 0.000789774281650791, + "loss": 1.4623, + "step": 4630 + }, + { + "epoch": 0.48850210970464136, + "grad_norm": 0.7535821795463562, + "learning_rate": 0.000789523573104389, + "loss": 1.4364, + "step": 4631 + }, + { + "epoch": 0.48860759493670886, + "grad_norm": 0.8105677366256714, + "learning_rate": 0.0007892728601291737, + "loss": 1.4273, + "step": 4632 + }, + { + "epoch": 0.48871308016877635, + "grad_norm": 0.6311827301979065, + "learning_rate": 0.0007890221427532384, + "loss": 1.4382, + "step": 4633 + }, + { + "epoch": 0.4888185654008439, + "grad_norm": 0.7238463163375854, + "learning_rate": 0.0007887714210046775, + "loss": 1.4279, + "step": 4634 + }, + { + "epoch": 0.4889240506329114, + "grad_norm": 0.7869098782539368, + "learning_rate": 0.0007885206949115855, + "loss": 1.4242, + "step": 4635 + }, + { + "epoch": 0.4890295358649789, + "grad_norm": 0.7448087930679321, + "learning_rate": 0.0007882699645020577, + "loss": 1.4453, + "step": 4636 + }, + { + "epoch": 0.48913502109704643, + "grad_norm": 0.6977633833885193, + "learning_rate": 0.0007880192298041893, + "loss": 1.4369, + "step": 4637 + }, + { + "epoch": 0.4892405063291139, + "grad_norm": 0.6689139008522034, + "learning_rate": 0.0007877684908460768, + "loss": 1.441, + "step": 4638 + }, + { + "epoch": 0.4893459915611814, + "grad_norm": 0.7141353487968445, + "learning_rate": 0.0007875177476558165, + "loss": 1.432, + "step": 4639 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.7562928795814514, + "learning_rate": 0.0007872670002615056, + "loss": 1.4633, + "step": 4640 + }, + { + "epoch": 0.48955696202531646, + "grad_norm": 0.6957457661628723, + "learning_rate": 0.0007870162486912414, + "loss": 1.4543, + "step": 4641 + }, + { + "epoch": 0.48966244725738395, + "grad_norm": 0.8705501556396484, + "learning_rate": 0.0007867654929731221, + "loss": 1.4111, + "step": 4642 + }, + { + "epoch": 0.4897679324894515, + "grad_norm": 0.7472569346427917, + "learning_rate": 0.0007865147331352457, + "loss": 1.4334, + "step": 4643 + }, + { + "epoch": 0.489873417721519, + "grad_norm": 0.7630128264427185, + "learning_rate": 0.0007862639692057115, + "loss": 1.4387, + "step": 4644 + }, + { + "epoch": 0.4899789029535865, + "grad_norm": 0.7201412320137024, + "learning_rate": 0.0007860132012126187, + "loss": 1.4704, + "step": 4645 + }, + { + "epoch": 0.49008438818565403, + "grad_norm": 0.7271934151649475, + "learning_rate": 0.0007857624291840672, + "loss": 1.4278, + "step": 4646 + }, + { + "epoch": 0.4901898734177215, + "grad_norm": 0.6743425726890564, + "learning_rate": 0.0007855116531481572, + "loss": 1.4554, + "step": 4647 + }, + { + "epoch": 0.490295358649789, + "grad_norm": 1.1665090322494507, + "learning_rate": 0.0007852608731329893, + "loss": 1.4332, + "step": 4648 + }, + { + "epoch": 0.49040084388185656, + "grad_norm": 0.6956673264503479, + "learning_rate": 0.0007850100891666648, + "loss": 1.4112, + "step": 4649 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.9300810098648071, + "learning_rate": 0.0007847593012772852, + "loss": 1.4499, + "step": 4650 + }, + { + "epoch": 0.49061181434599155, + "grad_norm": 0.75099778175354, + "learning_rate": 0.0007845085094929527, + "loss": 1.4446, + "step": 4651 + }, + { + "epoch": 0.4907172995780591, + "grad_norm": 0.847026526927948, + "learning_rate": 0.0007842577138417695, + "loss": 1.4247, + "step": 4652 + }, + { + "epoch": 0.4908227848101266, + "grad_norm": 0.8513079285621643, + "learning_rate": 0.0007840069143518386, + "loss": 1.4049, + "step": 4653 + }, + { + "epoch": 0.4909282700421941, + "grad_norm": 0.847732663154602, + "learning_rate": 0.0007837561110512635, + "loss": 1.4604, + "step": 4654 + }, + { + "epoch": 0.49103375527426163, + "grad_norm": 0.7961540222167969, + "learning_rate": 0.0007835053039681476, + "loss": 1.4163, + "step": 4655 + }, + { + "epoch": 0.4911392405063291, + "grad_norm": 1.02321457862854, + "learning_rate": 0.0007832544931305956, + "loss": 1.4202, + "step": 4656 + }, + { + "epoch": 0.4912447257383966, + "grad_norm": 0.8576701283454895, + "learning_rate": 0.0007830036785667116, + "loss": 1.4187, + "step": 4657 + }, + { + "epoch": 0.4913502109704641, + "grad_norm": 0.9348928332328796, + "learning_rate": 0.000782752860304601, + "loss": 1.4322, + "step": 4658 + }, + { + "epoch": 0.49145569620253166, + "grad_norm": 0.8308172821998596, + "learning_rate": 0.0007825020383723692, + "loss": 1.4146, + "step": 4659 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.9122442007064819, + "learning_rate": 0.0007822512127981218, + "loss": 1.4402, + "step": 4660 + }, + { + "epoch": 0.49166666666666664, + "grad_norm": 0.8986720442771912, + "learning_rate": 0.0007820003836099649, + "loss": 1.4443, + "step": 4661 + }, + { + "epoch": 0.4917721518987342, + "grad_norm": 0.6953029632568359, + "learning_rate": 0.0007817495508360057, + "loss": 1.409, + "step": 4662 + }, + { + "epoch": 0.4918776371308017, + "grad_norm": 0.7774820327758789, + "learning_rate": 0.0007814987145043511, + "loss": 1.4146, + "step": 4663 + }, + { + "epoch": 0.4919831223628692, + "grad_norm": 0.7342221736907959, + "learning_rate": 0.0007812478746431085, + "loss": 1.4227, + "step": 4664 + }, + { + "epoch": 0.4920886075949367, + "grad_norm": 0.7390682101249695, + "learning_rate": 0.0007809970312803855, + "loss": 1.429, + "step": 4665 + }, + { + "epoch": 0.4921940928270042, + "grad_norm": 0.6894100308418274, + "learning_rate": 0.0007807461844442906, + "loss": 1.4189, + "step": 4666 + }, + { + "epoch": 0.4922995780590717, + "grad_norm": 0.9105950593948364, + "learning_rate": 0.0007804953341629326, + "loss": 1.451, + "step": 4667 + }, + { + "epoch": 0.49240506329113926, + "grad_norm": 0.7295313477516174, + "learning_rate": 0.0007802444804644202, + "loss": 1.4308, + "step": 4668 + }, + { + "epoch": 0.49251054852320675, + "grad_norm": 0.7489637732505798, + "learning_rate": 0.0007799936233768632, + "loss": 1.4391, + "step": 4669 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.7014515995979309, + "learning_rate": 0.0007797427629283708, + "loss": 1.4192, + "step": 4670 + }, + { + "epoch": 0.4927215189873418, + "grad_norm": 0.7162891030311584, + "learning_rate": 0.0007794918991470537, + "loss": 1.4315, + "step": 4671 + }, + { + "epoch": 0.4928270042194093, + "grad_norm": 0.7578434348106384, + "learning_rate": 0.0007792410320610222, + "loss": 1.4049, + "step": 4672 + }, + { + "epoch": 0.4929324894514768, + "grad_norm": 0.6665034294128418, + "learning_rate": 0.0007789901616983872, + "loss": 1.4418, + "step": 4673 + }, + { + "epoch": 0.4930379746835443, + "grad_norm": 0.7396875023841858, + "learning_rate": 0.0007787392880872601, + "loss": 1.4727, + "step": 4674 + }, + { + "epoch": 0.4931434599156118, + "grad_norm": 0.7259019017219543, + "learning_rate": 0.0007784884112557524, + "loss": 1.4147, + "step": 4675 + }, + { + "epoch": 0.4932489451476793, + "grad_norm": 0.685201108455658, + "learning_rate": 0.0007782375312319761, + "loss": 1.4042, + "step": 4676 + }, + { + "epoch": 0.49335443037974686, + "grad_norm": 0.7101973295211792, + "learning_rate": 0.0007779866480440437, + "loss": 1.4269, + "step": 4677 + }, + { + "epoch": 0.49345991561181435, + "grad_norm": 0.7048077583312988, + "learning_rate": 0.0007777357617200679, + "loss": 1.415, + "step": 4678 + }, + { + "epoch": 0.49356540084388184, + "grad_norm": 0.6940801739692688, + "learning_rate": 0.0007774848722881616, + "loss": 1.4434, + "step": 4679 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.6912481784820557, + "learning_rate": 0.0007772339797764385, + "loss": 1.403, + "step": 4680 + }, + { + "epoch": 0.4937763713080169, + "grad_norm": 0.6813130974769592, + "learning_rate": 0.0007769830842130119, + "loss": 1.4152, + "step": 4681 + }, + { + "epoch": 0.4938818565400844, + "grad_norm": 0.676161527633667, + "learning_rate": 0.0007767321856259963, + "loss": 1.4003, + "step": 4682 + }, + { + "epoch": 0.4939873417721519, + "grad_norm": 0.6770591139793396, + "learning_rate": 0.0007764812840435058, + "loss": 1.4254, + "step": 4683 + }, + { + "epoch": 0.4940928270042194, + "grad_norm": 0.7016900181770325, + "learning_rate": 0.0007762303794936556, + "loss": 1.39, + "step": 4684 + }, + { + "epoch": 0.4941983122362869, + "grad_norm": 0.6903887391090393, + "learning_rate": 0.0007759794720045606, + "loss": 1.3829, + "step": 4685 + }, + { + "epoch": 0.49430379746835446, + "grad_norm": 0.8764941096305847, + "learning_rate": 0.0007757285616043363, + "loss": 1.4256, + "step": 4686 + }, + { + "epoch": 0.49440928270042195, + "grad_norm": 0.8179246783256531, + "learning_rate": 0.0007754776483210981, + "loss": 1.464, + "step": 4687 + }, + { + "epoch": 0.49451476793248944, + "grad_norm": 1.0445151329040527, + "learning_rate": 0.0007752267321829624, + "loss": 1.4155, + "step": 4688 + }, + { + "epoch": 0.494620253164557, + "grad_norm": 0.9390841722488403, + "learning_rate": 0.0007749758132180459, + "loss": 1.4023, + "step": 4689 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 1.1933585405349731, + "learning_rate": 0.0007747248914544646, + "loss": 1.4661, + "step": 4690 + }, + { + "epoch": 0.494831223628692, + "grad_norm": 1.086400032043457, + "learning_rate": 0.0007744739669203361, + "loss": 1.4579, + "step": 4691 + }, + { + "epoch": 0.49493670886075947, + "grad_norm": 0.9516783952713013, + "learning_rate": 0.0007742230396437775, + "loss": 1.4616, + "step": 4692 + }, + { + "epoch": 0.495042194092827, + "grad_norm": 1.272527813911438, + "learning_rate": 0.0007739721096529066, + "loss": 1.4391, + "step": 4693 + }, + { + "epoch": 0.4951476793248945, + "grad_norm": 0.7849406599998474, + "learning_rate": 0.0007737211769758412, + "loss": 1.447, + "step": 4694 + }, + { + "epoch": 0.495253164556962, + "grad_norm": 0.9713467359542847, + "learning_rate": 0.0007734702416406997, + "loss": 1.4148, + "step": 4695 + }, + { + "epoch": 0.49535864978902955, + "grad_norm": 0.6532008051872253, + "learning_rate": 0.0007732193036756006, + "loss": 1.441, + "step": 4696 + }, + { + "epoch": 0.49546413502109704, + "grad_norm": 1.1410515308380127, + "learning_rate": 0.0007729683631086627, + "loss": 1.4161, + "step": 4697 + }, + { + "epoch": 0.49556962025316453, + "grad_norm": 0.7604749798774719, + "learning_rate": 0.0007727174199680051, + "loss": 1.3926, + "step": 4698 + }, + { + "epoch": 0.4956751054852321, + "grad_norm": 1.1352980136871338, + "learning_rate": 0.0007724664742817475, + "loss": 1.4353, + "step": 4699 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.9469410181045532, + "learning_rate": 0.0007722155260780093, + "loss": 1.4013, + "step": 4700 + }, + { + "epoch": 0.49588607594936707, + "grad_norm": 0.7809967398643494, + "learning_rate": 0.0007719645753849108, + "loss": 1.4229, + "step": 4701 + }, + { + "epoch": 0.4959915611814346, + "grad_norm": 1.2025829553604126, + "learning_rate": 0.0007717136222305718, + "loss": 1.444, + "step": 4702 + }, + { + "epoch": 0.4960970464135021, + "grad_norm": 0.804617702960968, + "learning_rate": 0.0007714626666431134, + "loss": 1.4039, + "step": 4703 + }, + { + "epoch": 0.4962025316455696, + "grad_norm": 0.8246406316757202, + "learning_rate": 0.000771211708650656, + "loss": 1.4375, + "step": 4704 + }, + { + "epoch": 0.49630801687763715, + "grad_norm": 0.7863566875457764, + "learning_rate": 0.000770960748281321, + "loss": 1.4132, + "step": 4705 + }, + { + "epoch": 0.49641350210970464, + "grad_norm": 0.7153175473213196, + "learning_rate": 0.0007707097855632297, + "loss": 1.4184, + "step": 4706 + }, + { + "epoch": 0.49651898734177213, + "grad_norm": 0.8163058757781982, + "learning_rate": 0.0007704588205245034, + "loss": 1.47, + "step": 4707 + }, + { + "epoch": 0.4966244725738397, + "grad_norm": 0.769882082939148, + "learning_rate": 0.0007702078531932645, + "loss": 1.4438, + "step": 4708 + }, + { + "epoch": 0.4967299578059072, + "grad_norm": 0.718605637550354, + "learning_rate": 0.0007699568835976348, + "loss": 1.4101, + "step": 4709 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.7513044476509094, + "learning_rate": 0.0007697059117657368, + "loss": 1.4292, + "step": 4710 + }, + { + "epoch": 0.4969409282700422, + "grad_norm": 0.7315669059753418, + "learning_rate": 0.0007694549377256932, + "loss": 1.4443, + "step": 4711 + }, + { + "epoch": 0.4970464135021097, + "grad_norm": 0.8093006014823914, + "learning_rate": 0.0007692039615056264, + "loss": 1.4219, + "step": 4712 + }, + { + "epoch": 0.4971518987341772, + "grad_norm": 0.8585615158081055, + "learning_rate": 0.0007689529831336604, + "loss": 1.3937, + "step": 4713 + }, + { + "epoch": 0.49725738396624475, + "grad_norm": 0.8300880193710327, + "learning_rate": 0.0007687020026379181, + "loss": 1.4452, + "step": 4714 + }, + { + "epoch": 0.49736286919831224, + "grad_norm": 0.8463894128799438, + "learning_rate": 0.0007684510200465231, + "loss": 1.4198, + "step": 4715 + }, + { + "epoch": 0.49746835443037973, + "grad_norm": 0.7120779752731323, + "learning_rate": 0.0007682000353875992, + "loss": 1.4067, + "step": 4716 + }, + { + "epoch": 0.4975738396624473, + "grad_norm": 1.1614967584609985, + "learning_rate": 0.0007679490486892705, + "loss": 1.4135, + "step": 4717 + }, + { + "epoch": 0.4976793248945148, + "grad_norm": 0.8540139198303223, + "learning_rate": 0.0007676980599796616, + "loss": 1.4735, + "step": 4718 + }, + { + "epoch": 0.49778481012658227, + "grad_norm": 1.1718367338180542, + "learning_rate": 0.0007674470692868967, + "loss": 1.442, + "step": 4719 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.8077955842018127, + "learning_rate": 0.0007671960766391008, + "loss": 1.4, + "step": 4720 + }, + { + "epoch": 0.4979957805907173, + "grad_norm": 0.7899685502052307, + "learning_rate": 0.0007669450820643987, + "loss": 1.4175, + "step": 4721 + }, + { + "epoch": 0.4981012658227848, + "grad_norm": 0.892180860042572, + "learning_rate": 0.0007666940855909155, + "loss": 1.4391, + "step": 4722 + }, + { + "epoch": 0.49820675105485235, + "grad_norm": 0.8622490167617798, + "learning_rate": 0.000766443087246777, + "loss": 1.4343, + "step": 4723 + }, + { + "epoch": 0.49831223628691984, + "grad_norm": 0.7962700128555298, + "learning_rate": 0.0007661920870601085, + "loss": 1.4407, + "step": 4724 + }, + { + "epoch": 0.49841772151898733, + "grad_norm": 0.9330347776412964, + "learning_rate": 0.000765941085059036, + "loss": 1.426, + "step": 4725 + }, + { + "epoch": 0.4985232067510548, + "grad_norm": 0.6905992031097412, + "learning_rate": 0.0007656900812716853, + "loss": 1.425, + "step": 4726 + }, + { + "epoch": 0.4986286919831224, + "grad_norm": 0.7519301176071167, + "learning_rate": 0.0007654390757261827, + "loss": 1.399, + "step": 4727 + }, + { + "epoch": 0.49873417721518987, + "grad_norm": 0.7045724987983704, + "learning_rate": 0.0007651880684506548, + "loss": 1.4513, + "step": 4728 + }, + { + "epoch": 0.49883966244725736, + "grad_norm": 0.7821600437164307, + "learning_rate": 0.0007649370594732282, + "loss": 1.4006, + "step": 4729 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.6876749396324158, + "learning_rate": 0.0007646860488220293, + "loss": 1.4062, + "step": 4730 + }, + { + "epoch": 0.4990506329113924, + "grad_norm": 0.7698221802711487, + "learning_rate": 0.0007644350365251855, + "loss": 1.4375, + "step": 4731 + }, + { + "epoch": 0.4991561181434599, + "grad_norm": 0.7498847246170044, + "learning_rate": 0.0007641840226108241, + "loss": 1.4101, + "step": 4732 + }, + { + "epoch": 0.49926160337552744, + "grad_norm": 0.81513512134552, + "learning_rate": 0.000763933007107072, + "loss": 1.4492, + "step": 4733 + }, + { + "epoch": 0.49936708860759493, + "grad_norm": 0.7050167322158813, + "learning_rate": 0.0007636819900420572, + "loss": 1.4291, + "step": 4734 + }, + { + "epoch": 0.4994725738396624, + "grad_norm": 0.9180414080619812, + "learning_rate": 0.0007634309714439069, + "loss": 1.4442, + "step": 4735 + }, + { + "epoch": 0.49957805907173, + "grad_norm": 0.638550877571106, + "learning_rate": 0.0007631799513407495, + "loss": 1.4246, + "step": 4736 + }, + { + "epoch": 0.49968354430379747, + "grad_norm": 0.7389904260635376, + "learning_rate": 0.0007629289297607127, + "loss": 1.4, + "step": 4737 + }, + { + "epoch": 0.49978902953586496, + "grad_norm": 0.663836658000946, + "learning_rate": 0.0007626779067319251, + "loss": 1.3784, + "step": 4738 + }, + { + "epoch": 0.4998945147679325, + "grad_norm": 0.7821851968765259, + "learning_rate": 0.0007624268822825145, + "loss": 1.4175, + "step": 4739 + }, + { + "epoch": 0.5, + "grad_norm": 0.7226592898368835, + "learning_rate": 0.00076217585644061, + "loss": 1.4233, + "step": 4740 + }, + { + "epoch": 0.5001054852320675, + "grad_norm": 0.6334653496742249, + "learning_rate": 0.0007619248292343399, + "loss": 1.4473, + "step": 4741 + }, + { + "epoch": 0.500210970464135, + "grad_norm": 0.6531209349632263, + "learning_rate": 0.0007616738006918334, + "loss": 1.4138, + "step": 4742 + }, + { + "epoch": 0.5003164556962025, + "grad_norm": 0.7639381885528564, + "learning_rate": 0.0007614227708412191, + "loss": 1.4169, + "step": 4743 + }, + { + "epoch": 0.5004219409282701, + "grad_norm": 0.6438692808151245, + "learning_rate": 0.0007611717397106265, + "loss": 1.4717, + "step": 4744 + }, + { + "epoch": 0.5005274261603375, + "grad_norm": 0.731940507888794, + "learning_rate": 0.0007609207073281848, + "loss": 1.411, + "step": 4745 + }, + { + "epoch": 0.5006329113924051, + "grad_norm": 0.6440172791481018, + "learning_rate": 0.0007606696737220233, + "loss": 1.4394, + "step": 4746 + }, + { + "epoch": 0.5007383966244726, + "grad_norm": 0.7227556109428406, + "learning_rate": 0.000760418638920272, + "loss": 1.4419, + "step": 4747 + }, + { + "epoch": 0.50084388185654, + "grad_norm": 0.6447212100028992, + "learning_rate": 0.0007601676029510597, + "loss": 1.4565, + "step": 4748 + }, + { + "epoch": 0.5009493670886076, + "grad_norm": 0.8178848624229431, + "learning_rate": 0.000759916565842517, + "loss": 1.4615, + "step": 4749 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.614560604095459, + "learning_rate": 0.0007596655276227739, + "loss": 1.4385, + "step": 4750 + }, + { + "epoch": 0.5011603375527426, + "grad_norm": 0.7042905688285828, + "learning_rate": 0.0007594144883199599, + "loss": 1.4459, + "step": 4751 + }, + { + "epoch": 0.5012658227848101, + "grad_norm": 0.6648653149604797, + "learning_rate": 0.0007591634479622056, + "loss": 1.4304, + "step": 4752 + }, + { + "epoch": 0.5013713080168777, + "grad_norm": 0.7095407843589783, + "learning_rate": 0.0007589124065776414, + "loss": 1.4444, + "step": 4753 + }, + { + "epoch": 0.5014767932489451, + "grad_norm": 0.665195643901825, + "learning_rate": 0.0007586613641943976, + "loss": 1.4323, + "step": 4754 + }, + { + "epoch": 0.5015822784810127, + "grad_norm": 0.6978325247764587, + "learning_rate": 0.0007584103208406048, + "loss": 1.4052, + "step": 4755 + }, + { + "epoch": 0.5016877637130802, + "grad_norm": 0.6800966262817383, + "learning_rate": 0.0007581592765443933, + "loss": 1.3847, + "step": 4756 + }, + { + "epoch": 0.5017932489451477, + "grad_norm": 0.7506575584411621, + "learning_rate": 0.0007579082313338943, + "loss": 1.4404, + "step": 4757 + }, + { + "epoch": 0.5018987341772152, + "grad_norm": 0.6349396705627441, + "learning_rate": 0.0007576571852372386, + "loss": 1.4108, + "step": 4758 + }, + { + "epoch": 0.5020042194092827, + "grad_norm": 0.7053964138031006, + "learning_rate": 0.0007574061382825572, + "loss": 1.406, + "step": 4759 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.6692227125167847, + "learning_rate": 0.0007571550904979812, + "loss": 1.4449, + "step": 4760 + }, + { + "epoch": 0.5022151898734177, + "grad_norm": 0.7772524952888489, + "learning_rate": 0.0007569040419116413, + "loss": 1.4326, + "step": 4761 + }, + { + "epoch": 0.5023206751054853, + "grad_norm": 0.689189076423645, + "learning_rate": 0.0007566529925516692, + "loss": 1.452, + "step": 4762 + }, + { + "epoch": 0.5024261603375527, + "grad_norm": 0.6904187798500061, + "learning_rate": 0.0007564019424461962, + "loss": 1.405, + "step": 4763 + }, + { + "epoch": 0.5025316455696203, + "grad_norm": 0.6617719531059265, + "learning_rate": 0.0007561508916233535, + "loss": 1.423, + "step": 4764 + }, + { + "epoch": 0.5026371308016878, + "grad_norm": 0.6716655492782593, + "learning_rate": 0.0007558998401112727, + "loss": 1.4025, + "step": 4765 + }, + { + "epoch": 0.5027426160337553, + "grad_norm": 0.7489088177680969, + "learning_rate": 0.0007556487879380856, + "loss": 1.388, + "step": 4766 + }, + { + "epoch": 0.5028481012658228, + "grad_norm": 0.6715736389160156, + "learning_rate": 0.0007553977351319235, + "loss": 1.43, + "step": 4767 + }, + { + "epoch": 0.5029535864978903, + "grad_norm": 0.8808643221855164, + "learning_rate": 0.0007551466817209183, + "loss": 1.41, + "step": 4768 + }, + { + "epoch": 0.5030590717299578, + "grad_norm": 0.6243277788162231, + "learning_rate": 0.0007548956277332016, + "loss": 1.4542, + "step": 4769 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.6878921389579773, + "learning_rate": 0.0007546445731969056, + "loss": 1.4286, + "step": 4770 + }, + { + "epoch": 0.5032700421940929, + "grad_norm": 0.6771445274353027, + "learning_rate": 0.000754393518140162, + "loss": 1.419, + "step": 4771 + }, + { + "epoch": 0.5033755274261603, + "grad_norm": 0.6579036116600037, + "learning_rate": 0.0007541424625911026, + "loss": 1.4881, + "step": 4772 + }, + { + "epoch": 0.5034810126582279, + "grad_norm": 0.7082417607307434, + "learning_rate": 0.0007538914065778598, + "loss": 1.4345, + "step": 4773 + }, + { + "epoch": 0.5035864978902953, + "grad_norm": 0.9689401984214783, + "learning_rate": 0.0007536403501285653, + "loss": 1.408, + "step": 4774 + }, + { + "epoch": 0.5036919831223629, + "grad_norm": 0.6816084980964661, + "learning_rate": 0.0007533892932713517, + "loss": 1.4557, + "step": 4775 + }, + { + "epoch": 0.5037974683544304, + "grad_norm": 1.0757466554641724, + "learning_rate": 0.0007531382360343507, + "loss": 1.4473, + "step": 4776 + }, + { + "epoch": 0.5039029535864978, + "grad_norm": 0.7128094434738159, + "learning_rate": 0.0007528871784456948, + "loss": 1.4177, + "step": 4777 + }, + { + "epoch": 0.5040084388185654, + "grad_norm": 0.664467990398407, + "learning_rate": 0.0007526361205335159, + "loss": 1.453, + "step": 4778 + }, + { + "epoch": 0.5041139240506329, + "grad_norm": 0.7586062550544739, + "learning_rate": 0.0007523850623259469, + "loss": 1.4173, + "step": 4779 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.8560940027236938, + "learning_rate": 0.0007521340038511196, + "loss": 1.4123, + "step": 4780 + }, + { + "epoch": 0.5043248945147679, + "grad_norm": 0.7063254117965698, + "learning_rate": 0.0007518829451371665, + "loss": 1.3947, + "step": 4781 + }, + { + "epoch": 0.5044303797468355, + "grad_norm": 0.8352259397506714, + "learning_rate": 0.0007516318862122199, + "loss": 1.4515, + "step": 4782 + }, + { + "epoch": 0.5045358649789029, + "grad_norm": 0.6808869242668152, + "learning_rate": 0.0007513808271044125, + "loss": 1.4278, + "step": 4783 + }, + { + "epoch": 0.5046413502109705, + "grad_norm": 0.6380367875099182, + "learning_rate": 0.0007511297678418766, + "loss": 1.4048, + "step": 4784 + }, + { + "epoch": 0.504746835443038, + "grad_norm": 0.865281879901886, + "learning_rate": 0.0007508787084527445, + "loss": 1.4256, + "step": 4785 + }, + { + "epoch": 0.5048523206751054, + "grad_norm": 0.6509671807289124, + "learning_rate": 0.0007506276489651489, + "loss": 1.3977, + "step": 4786 + }, + { + "epoch": 0.504957805907173, + "grad_norm": 0.7907978892326355, + "learning_rate": 0.0007503765894072217, + "loss": 1.4184, + "step": 4787 + }, + { + "epoch": 0.5050632911392405, + "grad_norm": 0.6459627151489258, + "learning_rate": 0.000750125529807096, + "loss": 1.4112, + "step": 4788 + }, + { + "epoch": 0.505168776371308, + "grad_norm": 0.9450064301490784, + "learning_rate": 0.0007498744701929041, + "loss": 1.3655, + "step": 4789 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.7417000532150269, + "learning_rate": 0.0007496234105927785, + "loss": 1.4628, + "step": 4790 + }, + { + "epoch": 0.5053797468354431, + "grad_norm": 0.7226696014404297, + "learning_rate": 0.0007493723510348516, + "loss": 1.4538, + "step": 4791 + }, + { + "epoch": 0.5054852320675105, + "grad_norm": 0.8740532994270325, + "learning_rate": 0.0007491212915472557, + "loss": 1.3899, + "step": 4792 + }, + { + "epoch": 0.505590717299578, + "grad_norm": 0.6592686772346497, + "learning_rate": 0.0007488702321581234, + "loss": 1.4511, + "step": 4793 + }, + { + "epoch": 0.5056962025316456, + "grad_norm": 0.8515461087226868, + "learning_rate": 0.0007486191728955873, + "loss": 1.494, + "step": 4794 + }, + { + "epoch": 0.505801687763713, + "grad_norm": 0.6726322174072266, + "learning_rate": 0.00074836811378778, + "loss": 1.4176, + "step": 4795 + }, + { + "epoch": 0.5059071729957806, + "grad_norm": 0.7780492305755615, + "learning_rate": 0.0007481170548628335, + "loss": 1.4127, + "step": 4796 + }, + { + "epoch": 0.5060126582278481, + "grad_norm": 0.68145751953125, + "learning_rate": 0.0007478659961488805, + "loss": 1.4069, + "step": 4797 + }, + { + "epoch": 0.5061181434599156, + "grad_norm": 0.6369461417198181, + "learning_rate": 0.0007476149376740533, + "loss": 1.3791, + "step": 4798 + }, + { + "epoch": 0.5062236286919831, + "grad_norm": 0.7671969532966614, + "learning_rate": 0.0007473638794664841, + "loss": 1.4205, + "step": 4799 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.7682017087936401, + "learning_rate": 0.0007471128215543056, + "loss": 1.411, + "step": 4800 + }, + { + "epoch": 0.5064345991561181, + "grad_norm": 0.645331084728241, + "learning_rate": 0.0007468617639656496, + "loss": 1.4039, + "step": 4801 + }, + { + "epoch": 0.5065400843881857, + "grad_norm": 0.7209265232086182, + "learning_rate": 0.0007466107067286483, + "loss": 1.4288, + "step": 4802 + }, + { + "epoch": 0.5066455696202532, + "grad_norm": 0.6830179691314697, + "learning_rate": 0.0007463596498714346, + "loss": 1.4221, + "step": 4803 + }, + { + "epoch": 0.5067510548523206, + "grad_norm": 0.7364936470985413, + "learning_rate": 0.0007461085934221402, + "loss": 1.3994, + "step": 4804 + }, + { + "epoch": 0.5068565400843882, + "grad_norm": 0.7624307870864868, + "learning_rate": 0.0007458575374088974, + "loss": 1.4237, + "step": 4805 + }, + { + "epoch": 0.5069620253164557, + "grad_norm": 0.6677145957946777, + "learning_rate": 0.0007456064818598382, + "loss": 1.4191, + "step": 4806 + }, + { + "epoch": 0.5070675105485232, + "grad_norm": 0.6963493824005127, + "learning_rate": 0.0007453554268030946, + "loss": 1.4422, + "step": 4807 + }, + { + "epoch": 0.5071729957805907, + "grad_norm": 0.683713972568512, + "learning_rate": 0.0007451043722667985, + "loss": 1.4271, + "step": 4808 + }, + { + "epoch": 0.5072784810126583, + "grad_norm": 0.7482369542121887, + "learning_rate": 0.000744853318279082, + "loss": 1.4257, + "step": 4809 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.6293342709541321, + "learning_rate": 0.0007446022648680768, + "loss": 1.4198, + "step": 4810 + }, + { + "epoch": 0.5074894514767933, + "grad_norm": 0.709611177444458, + "learning_rate": 0.0007443512120619144, + "loss": 1.4113, + "step": 4811 + }, + { + "epoch": 0.5075949367088608, + "grad_norm": 0.6345851421356201, + "learning_rate": 0.0007441001598887273, + "loss": 1.4191, + "step": 4812 + }, + { + "epoch": 0.5077004219409282, + "grad_norm": 0.8177386522293091, + "learning_rate": 0.0007438491083766465, + "loss": 1.4686, + "step": 4813 + }, + { + "epoch": 0.5078059071729958, + "grad_norm": 0.7459893226623535, + "learning_rate": 0.000743598057553804, + "loss": 1.4582, + "step": 4814 + }, + { + "epoch": 0.5079113924050633, + "grad_norm": 0.8503395915031433, + "learning_rate": 0.0007433470074483309, + "loss": 1.4322, + "step": 4815 + }, + { + "epoch": 0.5080168776371308, + "grad_norm": 0.7310387492179871, + "learning_rate": 0.0007430959580883589, + "loss": 1.4057, + "step": 4816 + }, + { + "epoch": 0.5081223628691983, + "grad_norm": 0.699527382850647, + "learning_rate": 0.0007428449095020192, + "loss": 1.4259, + "step": 4817 + }, + { + "epoch": 0.5082278481012659, + "grad_norm": 0.86371910572052, + "learning_rate": 0.000742593861717443, + "loss": 1.486, + "step": 4818 + }, + { + "epoch": 0.5083333333333333, + "grad_norm": 0.7504265308380127, + "learning_rate": 0.0007423428147627613, + "loss": 1.4596, + "step": 4819 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.8054905533790588, + "learning_rate": 0.0007420917686661055, + "loss": 1.4275, + "step": 4820 + }, + { + "epoch": 0.5085443037974684, + "grad_norm": 0.7517632842063904, + "learning_rate": 0.0007418407234556067, + "loss": 1.4537, + "step": 4821 + }, + { + "epoch": 0.5086497890295358, + "grad_norm": 0.7092738747596741, + "learning_rate": 0.0007415896791593955, + "loss": 1.4104, + "step": 4822 + }, + { + "epoch": 0.5087552742616034, + "grad_norm": 0.7787160873413086, + "learning_rate": 0.0007413386358056025, + "loss": 1.4377, + "step": 4823 + }, + { + "epoch": 0.5088607594936709, + "grad_norm": 0.8277494311332703, + "learning_rate": 0.0007410875934223588, + "loss": 1.4442, + "step": 4824 + }, + { + "epoch": 0.5089662447257384, + "grad_norm": 0.6845177412033081, + "learning_rate": 0.0007408365520377945, + "loss": 1.4327, + "step": 4825 + }, + { + "epoch": 0.5090717299578059, + "grad_norm": 0.8213304877281189, + "learning_rate": 0.0007405855116800403, + "loss": 1.3904, + "step": 4826 + }, + { + "epoch": 0.5091772151898735, + "grad_norm": 0.7059573531150818, + "learning_rate": 0.0007403344723772265, + "loss": 1.4, + "step": 4827 + }, + { + "epoch": 0.5092827004219409, + "grad_norm": 0.7011231780052185, + "learning_rate": 0.0007400834341574829, + "loss": 1.4295, + "step": 4828 + }, + { + "epoch": 0.5093881856540085, + "grad_norm": 0.6337414979934692, + "learning_rate": 0.0007398323970489402, + "loss": 1.4131, + "step": 4829 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.7152306437492371, + "learning_rate": 0.0007395813610797283, + "loss": 1.4155, + "step": 4830 + }, + { + "epoch": 0.5095991561181434, + "grad_norm": 0.8581161499023438, + "learning_rate": 0.0007393303262779767, + "loss": 1.4284, + "step": 4831 + }, + { + "epoch": 0.509704641350211, + "grad_norm": 0.8766769766807556, + "learning_rate": 0.0007390792926718153, + "loss": 1.4318, + "step": 4832 + }, + { + "epoch": 0.5098101265822785, + "grad_norm": 0.7462860941886902, + "learning_rate": 0.0007388282602893737, + "loss": 1.3805, + "step": 4833 + }, + { + "epoch": 0.509915611814346, + "grad_norm": 1.1493957042694092, + "learning_rate": 0.000738577229158781, + "loss": 1.4141, + "step": 4834 + }, + { + "epoch": 0.5100210970464135, + "grad_norm": 0.7406513094902039, + "learning_rate": 0.000738326199308167, + "loss": 1.4133, + "step": 4835 + }, + { + "epoch": 0.5101265822784811, + "grad_norm": 1.1460944414138794, + "learning_rate": 0.0007380751707656603, + "loss": 1.397, + "step": 4836 + }, + { + "epoch": 0.5102320675105485, + "grad_norm": 0.8175053596496582, + "learning_rate": 0.0007378241435593901, + "loss": 1.3933, + "step": 4837 + }, + { + "epoch": 0.510337552742616, + "grad_norm": 1.0734773874282837, + "learning_rate": 0.0007375731177174855, + "loss": 1.4424, + "step": 4838 + }, + { + "epoch": 0.5104430379746835, + "grad_norm": 0.8201318383216858, + "learning_rate": 0.0007373220932680751, + "loss": 1.4145, + "step": 4839 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 1.121429443359375, + "learning_rate": 0.0007370710702392873, + "loss": 1.4658, + "step": 4840 + }, + { + "epoch": 0.5106540084388186, + "grad_norm": 0.8257076144218445, + "learning_rate": 0.0007368200486592507, + "loss": 1.4185, + "step": 4841 + }, + { + "epoch": 0.510759493670886, + "grad_norm": 0.8507822751998901, + "learning_rate": 0.0007365690285560932, + "loss": 1.4241, + "step": 4842 + }, + { + "epoch": 0.5108649789029536, + "grad_norm": 0.9065518975257874, + "learning_rate": 0.0007363180099579431, + "loss": 1.4309, + "step": 4843 + }, + { + "epoch": 0.5109704641350211, + "grad_norm": 0.7296246886253357, + "learning_rate": 0.0007360669928929282, + "loss": 1.4292, + "step": 4844 + }, + { + "epoch": 0.5110759493670886, + "grad_norm": 1.0070278644561768, + "learning_rate": 0.000735815977389176, + "loss": 1.4235, + "step": 4845 + }, + { + "epoch": 0.5111814345991561, + "grad_norm": 0.8008151054382324, + "learning_rate": 0.0007355649634748143, + "loss": 1.447, + "step": 4846 + }, + { + "epoch": 0.5112869198312237, + "grad_norm": 0.987079381942749, + "learning_rate": 0.0007353139511779707, + "loss": 1.4187, + "step": 4847 + }, + { + "epoch": 0.5113924050632911, + "grad_norm": 0.8930948376655579, + "learning_rate": 0.000735062940526772, + "loss": 1.4477, + "step": 4848 + }, + { + "epoch": 0.5114978902953586, + "grad_norm": 0.8229268789291382, + "learning_rate": 0.0007348119315493453, + "loss": 1.4173, + "step": 4849 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 1.0125408172607422, + "learning_rate": 0.0007345609242738173, + "loss": 1.4422, + "step": 4850 + }, + { + "epoch": 0.5117088607594936, + "grad_norm": 0.7311812043190002, + "learning_rate": 0.0007343099187283149, + "loss": 1.4677, + "step": 4851 + }, + { + "epoch": 0.5118143459915612, + "grad_norm": 0.7481174468994141, + "learning_rate": 0.0007340589149409644, + "loss": 1.4457, + "step": 4852 + }, + { + "epoch": 0.5119198312236287, + "grad_norm": 0.822070300579071, + "learning_rate": 0.0007338079129398917, + "loss": 1.3872, + "step": 4853 + }, + { + "epoch": 0.5120253164556962, + "grad_norm": 0.7711800932884216, + "learning_rate": 0.0007335569127532231, + "loss": 1.4565, + "step": 4854 + }, + { + "epoch": 0.5121308016877637, + "grad_norm": 0.945068359375, + "learning_rate": 0.0007333059144090845, + "loss": 1.4104, + "step": 4855 + }, + { + "epoch": 0.5122362869198313, + "grad_norm": 0.7425305843353271, + "learning_rate": 0.0007330549179356014, + "loss": 1.3986, + "step": 4856 + }, + { + "epoch": 0.5123417721518987, + "grad_norm": 0.9810259938240051, + "learning_rate": 0.0007328039233608993, + "loss": 1.4123, + "step": 4857 + }, + { + "epoch": 0.5124472573839662, + "grad_norm": 0.7305166721343994, + "learning_rate": 0.0007325529307131034, + "loss": 1.3945, + "step": 4858 + }, + { + "epoch": 0.5125527426160338, + "grad_norm": 0.9541597366333008, + "learning_rate": 0.0007323019400203386, + "loss": 1.3827, + "step": 4859 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.62334805727005, + "learning_rate": 0.0007320509513107296, + "loss": 1.4234, + "step": 4860 + }, + { + "epoch": 0.5127637130801688, + "grad_norm": 1.118996262550354, + "learning_rate": 0.0007317999646124011, + "loss": 1.4065, + "step": 4861 + }, + { + "epoch": 0.5128691983122363, + "grad_norm": 0.6522722244262695, + "learning_rate": 0.0007315489799534772, + "loss": 1.3948, + "step": 4862 + }, + { + "epoch": 0.5129746835443038, + "grad_norm": 1.1074997186660767, + "learning_rate": 0.000731297997362082, + "loss": 1.4275, + "step": 4863 + }, + { + "epoch": 0.5130801687763713, + "grad_norm": 0.7397345304489136, + "learning_rate": 0.0007310470168663397, + "loss": 1.4338, + "step": 4864 + }, + { + "epoch": 0.5131856540084389, + "grad_norm": 0.8869359493255615, + "learning_rate": 0.0007307960384943736, + "loss": 1.3979, + "step": 4865 + }, + { + "epoch": 0.5132911392405063, + "grad_norm": 0.7166852951049805, + "learning_rate": 0.000730545062274307, + "loss": 1.4115, + "step": 4866 + }, + { + "epoch": 0.5133966244725738, + "grad_norm": 0.6671982407569885, + "learning_rate": 0.0007302940882342634, + "loss": 1.3861, + "step": 4867 + }, + { + "epoch": 0.5135021097046414, + "grad_norm": 0.7960548996925354, + "learning_rate": 0.0007300431164023653, + "loss": 1.4181, + "step": 4868 + }, + { + "epoch": 0.5136075949367088, + "grad_norm": 0.7395235896110535, + "learning_rate": 0.0007297921468067357, + "loss": 1.409, + "step": 4869 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.7252451777458191, + "learning_rate": 0.0007295411794754967, + "loss": 1.4204, + "step": 4870 + }, + { + "epoch": 0.5138185654008439, + "grad_norm": 0.7517849802970886, + "learning_rate": 0.0007292902144367704, + "loss": 1.4437, + "step": 4871 + }, + { + "epoch": 0.5139240506329114, + "grad_norm": 0.6742880344390869, + "learning_rate": 0.0007290392517186791, + "loss": 1.4051, + "step": 4872 + }, + { + "epoch": 0.5140295358649789, + "grad_norm": 1.1704210042953491, + "learning_rate": 0.000728788291349344, + "loss": 1.4755, + "step": 4873 + }, + { + "epoch": 0.5141350210970465, + "grad_norm": 0.8805853724479675, + "learning_rate": 0.0007285373333568868, + "loss": 1.4315, + "step": 4874 + }, + { + "epoch": 0.5142405063291139, + "grad_norm": 0.8318775296211243, + "learning_rate": 0.0007282863777694283, + "loss": 1.3901, + "step": 4875 + }, + { + "epoch": 0.5143459915611814, + "grad_norm": 0.757864773273468, + "learning_rate": 0.0007280354246150894, + "loss": 1.4406, + "step": 4876 + }, + { + "epoch": 0.514451476793249, + "grad_norm": 0.8855127096176147, + "learning_rate": 0.0007277844739219908, + "loss": 1.4297, + "step": 4877 + }, + { + "epoch": 0.5145569620253164, + "grad_norm": 1.03273606300354, + "learning_rate": 0.0007275335257182526, + "loss": 1.4588, + "step": 4878 + }, + { + "epoch": 0.514662447257384, + "grad_norm": 0.687064528465271, + "learning_rate": 0.000727282580031995, + "loss": 1.451, + "step": 4879 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.825936496257782, + "learning_rate": 0.0007270316368913374, + "loss": 1.3973, + "step": 4880 + }, + { + "epoch": 0.514873417721519, + "grad_norm": 0.6263144016265869, + "learning_rate": 0.0007267806963243995, + "loss": 1.4445, + "step": 4881 + }, + { + "epoch": 0.5149789029535865, + "grad_norm": 0.8696973919868469, + "learning_rate": 0.0007265297583593003, + "loss": 1.4255, + "step": 4882 + }, + { + "epoch": 0.515084388185654, + "grad_norm": 0.7130003571510315, + "learning_rate": 0.0007262788230241588, + "loss": 1.4282, + "step": 4883 + }, + { + "epoch": 0.5151898734177215, + "grad_norm": 0.9217080473899841, + "learning_rate": 0.0007260278903470935, + "loss": 1.4236, + "step": 4884 + }, + { + "epoch": 0.515295358649789, + "grad_norm": 0.7667673230171204, + "learning_rate": 0.0007257769603562227, + "loss": 1.3988, + "step": 4885 + }, + { + "epoch": 0.5154008438818566, + "grad_norm": 0.9505096673965454, + "learning_rate": 0.0007255260330796639, + "loss": 1.4316, + "step": 4886 + }, + { + "epoch": 0.515506329113924, + "grad_norm": 0.8128397464752197, + "learning_rate": 0.0007252751085455355, + "loss": 1.4093, + "step": 4887 + }, + { + "epoch": 0.5156118143459916, + "grad_norm": 0.8148605227470398, + "learning_rate": 0.0007250241867819544, + "loss": 1.444, + "step": 4888 + }, + { + "epoch": 0.5157172995780591, + "grad_norm": 0.6925582885742188, + "learning_rate": 0.0007247732678170375, + "loss": 1.3943, + "step": 4889 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.9743139743804932, + "learning_rate": 0.0007245223516789019, + "loss": 1.3942, + "step": 4890 + }, + { + "epoch": 0.5159282700421941, + "grad_norm": 0.6585618257522583, + "learning_rate": 0.0007242714383956639, + "loss": 1.3738, + "step": 4891 + }, + { + "epoch": 0.5160337552742617, + "grad_norm": 0.9486732482910156, + "learning_rate": 0.0007240205279954395, + "loss": 1.454, + "step": 4892 + }, + { + "epoch": 0.5161392405063291, + "grad_norm": 0.6510959267616272, + "learning_rate": 0.0007237696205063444, + "loss": 1.4251, + "step": 4893 + }, + { + "epoch": 0.5162447257383966, + "grad_norm": 0.6982353329658508, + "learning_rate": 0.0007235187159564942, + "loss": 1.4369, + "step": 4894 + }, + { + "epoch": 0.5163502109704642, + "grad_norm": 0.6970493197441101, + "learning_rate": 0.0007232678143740038, + "loss": 1.3958, + "step": 4895 + }, + { + "epoch": 0.5164556962025316, + "grad_norm": 0.7273386716842651, + "learning_rate": 0.0007230169157869882, + "loss": 1.464, + "step": 4896 + }, + { + "epoch": 0.5165611814345992, + "grad_norm": 0.7861851453781128, + "learning_rate": 0.0007227660202235616, + "loss": 1.4225, + "step": 4897 + }, + { + "epoch": 0.5166666666666667, + "grad_norm": 0.6771386861801147, + "learning_rate": 0.0007225151277118384, + "loss": 1.4504, + "step": 4898 + }, + { + "epoch": 0.5167721518987342, + "grad_norm": 0.7197140455245972, + "learning_rate": 0.0007222642382799322, + "loss": 1.4204, + "step": 4899 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.7300542593002319, + "learning_rate": 0.0007220133519559563, + "loss": 1.4255, + "step": 4900 + }, + { + "epoch": 0.5169831223628693, + "grad_norm": 0.7236276268959045, + "learning_rate": 0.000721762468768024, + "loss": 1.4516, + "step": 4901 + }, + { + "epoch": 0.5170886075949367, + "grad_norm": 0.7960496544837952, + "learning_rate": 0.0007215115887442478, + "loss": 1.3981, + "step": 4902 + }, + { + "epoch": 0.5171940928270042, + "grad_norm": 0.7226382493972778, + "learning_rate": 0.0007212607119127402, + "loss": 1.4066, + "step": 4903 + }, + { + "epoch": 0.5172995780590718, + "grad_norm": 0.7344061136245728, + "learning_rate": 0.000721009838301613, + "loss": 1.4276, + "step": 4904 + }, + { + "epoch": 0.5174050632911392, + "grad_norm": 0.7243438959121704, + "learning_rate": 0.000720758967938978, + "loss": 1.4432, + "step": 4905 + }, + { + "epoch": 0.5175105485232068, + "grad_norm": 1.0177100896835327, + "learning_rate": 0.0007205081008529463, + "loss": 1.41, + "step": 4906 + }, + { + "epoch": 0.5176160337552742, + "grad_norm": 0.6996870040893555, + "learning_rate": 0.0007202572370716292, + "loss": 1.3625, + "step": 4907 + }, + { + "epoch": 0.5177215189873418, + "grad_norm": 0.976451575756073, + "learning_rate": 0.000720006376623137, + "loss": 1.4456, + "step": 4908 + }, + { + "epoch": 0.5178270042194093, + "grad_norm": 0.6772083044052124, + "learning_rate": 0.0007197555195355799, + "loss": 1.4027, + "step": 4909 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 1.0483055114746094, + "learning_rate": 0.0007195046658370675, + "loss": 1.4707, + "step": 4910 + }, + { + "epoch": 0.5180379746835443, + "grad_norm": 0.6269174814224243, + "learning_rate": 0.0007192538155557094, + "loss": 1.409, + "step": 4911 + }, + { + "epoch": 0.5181434599156118, + "grad_norm": 0.8311445713043213, + "learning_rate": 0.0007190029687196148, + "loss": 1.3977, + "step": 4912 + }, + { + "epoch": 0.5182489451476793, + "grad_norm": 0.6536158919334412, + "learning_rate": 0.0007187521253568919, + "loss": 1.394, + "step": 4913 + }, + { + "epoch": 0.5183544303797468, + "grad_norm": 0.6651986837387085, + "learning_rate": 0.0007185012854956491, + "loss": 1.3788, + "step": 4914 + }, + { + "epoch": 0.5184599156118144, + "grad_norm": 0.7914029359817505, + "learning_rate": 0.0007182504491639942, + "loss": 1.4147, + "step": 4915 + }, + { + "epoch": 0.5185654008438818, + "grad_norm": 0.702215313911438, + "learning_rate": 0.000717999616390035, + "loss": 1.4372, + "step": 4916 + }, + { + "epoch": 0.5186708860759494, + "grad_norm": 0.6706388592720032, + "learning_rate": 0.0007177487872018784, + "loss": 1.4262, + "step": 4917 + }, + { + "epoch": 0.5187763713080169, + "grad_norm": 0.7625278234481812, + "learning_rate": 0.000717497961627631, + "loss": 1.392, + "step": 4918 + }, + { + "epoch": 0.5188818565400843, + "grad_norm": 0.7811239361763, + "learning_rate": 0.0007172471396953991, + "loss": 1.4052, + "step": 4919 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.8237462639808655, + "learning_rate": 0.0007169963214332885, + "loss": 1.4409, + "step": 4920 + }, + { + "epoch": 0.5190928270042194, + "grad_norm": 0.7158022522926331, + "learning_rate": 0.0007167455068694046, + "loss": 1.4382, + "step": 4921 + }, + { + "epoch": 0.5191983122362869, + "grad_norm": 0.6993364691734314, + "learning_rate": 0.0007164946960318525, + "loss": 1.4592, + "step": 4922 + }, + { + "epoch": 0.5193037974683544, + "grad_norm": 0.6671636700630188, + "learning_rate": 0.0007162438889487365, + "loss": 1.4316, + "step": 4923 + }, + { + "epoch": 0.519409282700422, + "grad_norm": 0.7109612822532654, + "learning_rate": 0.0007159930856481614, + "loss": 1.3733, + "step": 4924 + }, + { + "epoch": 0.5195147679324894, + "grad_norm": 0.6266855597496033, + "learning_rate": 0.0007157422861582306, + "loss": 1.4124, + "step": 4925 + }, + { + "epoch": 0.519620253164557, + "grad_norm": 0.6493484377861023, + "learning_rate": 0.0007154914905070475, + "loss": 1.4262, + "step": 4926 + }, + { + "epoch": 0.5197257383966245, + "grad_norm": 0.6698410511016846, + "learning_rate": 0.0007152406987227149, + "loss": 1.397, + "step": 4927 + }, + { + "epoch": 0.5198312236286919, + "grad_norm": 0.7869201898574829, + "learning_rate": 0.0007149899108333354, + "loss": 1.4322, + "step": 4928 + }, + { + "epoch": 0.5199367088607595, + "grad_norm": 0.6523434519767761, + "learning_rate": 0.0007147391268670109, + "loss": 1.422, + "step": 4929 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 1.1503511667251587, + "learning_rate": 0.000714488346851843, + "loss": 1.4102, + "step": 4930 + }, + { + "epoch": 0.5201476793248945, + "grad_norm": 0.6897754073143005, + "learning_rate": 0.000714237570815933, + "loss": 1.4232, + "step": 4931 + }, + { + "epoch": 0.520253164556962, + "grad_norm": 1.153412938117981, + "learning_rate": 0.0007139867987873812, + "loss": 1.4347, + "step": 4932 + }, + { + "epoch": 0.5203586497890296, + "grad_norm": 0.7253311276435852, + "learning_rate": 0.0007137360307942885, + "loss": 1.4523, + "step": 4933 + }, + { + "epoch": 0.520464135021097, + "grad_norm": 1.1717942953109741, + "learning_rate": 0.0007134852668647543, + "loss": 1.4094, + "step": 4934 + }, + { + "epoch": 0.5205696202531646, + "grad_norm": 0.7748826742172241, + "learning_rate": 0.0007132345070268781, + "loss": 1.3731, + "step": 4935 + }, + { + "epoch": 0.5206751054852321, + "grad_norm": 1.0341068506240845, + "learning_rate": 0.0007129837513087587, + "loss": 1.4166, + "step": 4936 + }, + { + "epoch": 0.5207805907172995, + "grad_norm": 0.8377732634544373, + "learning_rate": 0.0007127329997384946, + "loss": 1.4173, + "step": 4937 + }, + { + "epoch": 0.5208860759493671, + "grad_norm": 0.7837262153625488, + "learning_rate": 0.0007124822523441837, + "loss": 1.4222, + "step": 4938 + }, + { + "epoch": 0.5209915611814346, + "grad_norm": 0.8280486464500427, + "learning_rate": 0.0007122315091539234, + "loss": 1.4391, + "step": 4939 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.7385804057121277, + "learning_rate": 0.000711980770195811, + "loss": 1.405, + "step": 4940 + }, + { + "epoch": 0.5212025316455696, + "grad_norm": 0.6984983086585999, + "learning_rate": 0.0007117300354979423, + "loss": 1.4516, + "step": 4941 + }, + { + "epoch": 0.5213080168776372, + "grad_norm": 0.7164515256881714, + "learning_rate": 0.0007114793050884145, + "loss": 1.4292, + "step": 4942 + }, + { + "epoch": 0.5214135021097046, + "grad_norm": 0.9104859232902527, + "learning_rate": 0.0007112285789953226, + "loss": 1.4123, + "step": 4943 + }, + { + "epoch": 0.5215189873417722, + "grad_norm": 0.7688770294189453, + "learning_rate": 0.0007109778572467616, + "loss": 1.4224, + "step": 4944 + }, + { + "epoch": 0.5216244725738397, + "grad_norm": 0.7102062702178955, + "learning_rate": 0.0007107271398708266, + "loss": 1.3984, + "step": 4945 + }, + { + "epoch": 0.5217299578059071, + "grad_norm": 0.6880765557289124, + "learning_rate": 0.0007104764268956111, + "loss": 1.4136, + "step": 4946 + }, + { + "epoch": 0.5218354430379747, + "grad_norm": 0.9311950206756592, + "learning_rate": 0.0007102257183492092, + "loss": 1.4301, + "step": 4947 + }, + { + "epoch": 0.5219409282700422, + "grad_norm": 0.6469693183898926, + "learning_rate": 0.0007099750142597138, + "loss": 1.414, + "step": 4948 + }, + { + "epoch": 0.5220464135021097, + "grad_norm": 0.999125063419342, + "learning_rate": 0.0007097243146552175, + "loss": 1.4221, + "step": 4949 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.6726298928260803, + "learning_rate": 0.0007094736195638128, + "loss": 1.4255, + "step": 4950 + }, + { + "epoch": 0.5222573839662448, + "grad_norm": 0.9582846760749817, + "learning_rate": 0.000709222929013591, + "loss": 1.4296, + "step": 4951 + }, + { + "epoch": 0.5223628691983122, + "grad_norm": 0.6580064296722412, + "learning_rate": 0.0007089722430326434, + "loss": 1.4052, + "step": 4952 + }, + { + "epoch": 0.5224683544303798, + "grad_norm": 1.0403331518173218, + "learning_rate": 0.0007087215616490606, + "loss": 1.3621, + "step": 4953 + }, + { + "epoch": 0.5225738396624473, + "grad_norm": 0.6695725917816162, + "learning_rate": 0.0007084708848909326, + "loss": 1.4095, + "step": 4954 + }, + { + "epoch": 0.5226793248945147, + "grad_norm": 0.8680985569953918, + "learning_rate": 0.000708220212786349, + "loss": 1.4277, + "step": 4955 + }, + { + "epoch": 0.5227848101265823, + "grad_norm": 0.8235695958137512, + "learning_rate": 0.000707969545363399, + "loss": 1.4119, + "step": 4956 + }, + { + "epoch": 0.5228902953586498, + "grad_norm": 0.5971724987030029, + "learning_rate": 0.000707718882650171, + "loss": 1.4294, + "step": 4957 + }, + { + "epoch": 0.5229957805907173, + "grad_norm": 0.6747058033943176, + "learning_rate": 0.0007074682246747526, + "loss": 1.4102, + "step": 4958 + }, + { + "epoch": 0.5231012658227848, + "grad_norm": 0.6986768841743469, + "learning_rate": 0.0007072175714652321, + "loss": 1.3775, + "step": 4959 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.7083244323730469, + "learning_rate": 0.0007069669230496961, + "loss": 1.4123, + "step": 4960 + }, + { + "epoch": 0.5233122362869198, + "grad_norm": 0.789608359336853, + "learning_rate": 0.0007067162794562309, + "loss": 1.3946, + "step": 4961 + }, + { + "epoch": 0.5234177215189874, + "grad_norm": 0.6549357771873474, + "learning_rate": 0.0007064656407129224, + "loss": 1.4139, + "step": 4962 + }, + { + "epoch": 0.5235232067510549, + "grad_norm": 0.7543911933898926, + "learning_rate": 0.000706215006847856, + "loss": 1.4358, + "step": 4963 + }, + { + "epoch": 0.5236286919831223, + "grad_norm": 0.7024418711662292, + "learning_rate": 0.0007059643778891164, + "loss": 1.4059, + "step": 4964 + }, + { + "epoch": 0.5237341772151899, + "grad_norm": 0.7335165143013, + "learning_rate": 0.0007057137538647878, + "loss": 1.4813, + "step": 4965 + }, + { + "epoch": 0.5238396624472574, + "grad_norm": 0.9118576049804688, + "learning_rate": 0.0007054631348029539, + "loss": 1.4011, + "step": 4966 + }, + { + "epoch": 0.5239451476793249, + "grad_norm": 0.6874716877937317, + "learning_rate": 0.0007052125207316975, + "loss": 1.3726, + "step": 4967 + }, + { + "epoch": 0.5240506329113924, + "grad_norm": 1.0861566066741943, + "learning_rate": 0.0007049619116791019, + "loss": 1.3656, + "step": 4968 + }, + { + "epoch": 0.52415611814346, + "grad_norm": 0.6770743131637573, + "learning_rate": 0.0007047113076732485, + "loss": 1.414, + "step": 4969 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 1.2851618528366089, + "learning_rate": 0.0007044607087422191, + "loss": 1.4498, + "step": 4970 + }, + { + "epoch": 0.524367088607595, + "grad_norm": 0.685655415058136, + "learning_rate": 0.0007042101149140943, + "loss": 1.4783, + "step": 4971 + }, + { + "epoch": 0.5244725738396624, + "grad_norm": 0.841391921043396, + "learning_rate": 0.0007039595262169544, + "loss": 1.3615, + "step": 4972 + }, + { + "epoch": 0.5245780590717299, + "grad_norm": 0.6619018316268921, + "learning_rate": 0.0007037089426788792, + "loss": 1.4083, + "step": 4973 + }, + { + "epoch": 0.5246835443037975, + "grad_norm": 0.6961378455162048, + "learning_rate": 0.0007034583643279479, + "loss": 1.4083, + "step": 4974 + }, + { + "epoch": 0.5247890295358649, + "grad_norm": 0.7064463496208191, + "learning_rate": 0.0007032077911922384, + "loss": 1.4386, + "step": 4975 + }, + { + "epoch": 0.5248945147679325, + "grad_norm": 0.7796102166175842, + "learning_rate": 0.0007029572232998298, + "loss": 1.4137, + "step": 4976 + }, + { + "epoch": 0.525, + "grad_norm": 0.6909579634666443, + "learning_rate": 0.0007027066606787988, + "loss": 1.4149, + "step": 4977 + }, + { + "epoch": 0.5251054852320675, + "grad_norm": 0.8030523657798767, + "learning_rate": 0.0007024561033572223, + "loss": 1.4158, + "step": 4978 + }, + { + "epoch": 0.525210970464135, + "grad_norm": 0.7313268780708313, + "learning_rate": 0.0007022055513631764, + "loss": 1.3719, + "step": 4979 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 1.0302358865737915, + "learning_rate": 0.000701955004724737, + "loss": 1.3896, + "step": 4980 + }, + { + "epoch": 0.52542194092827, + "grad_norm": 0.6597002148628235, + "learning_rate": 0.0007017044634699787, + "loss": 1.4368, + "step": 4981 + }, + { + "epoch": 0.5255274261603375, + "grad_norm": 0.9059293866157532, + "learning_rate": 0.0007014539276269762, + "loss": 1.3939, + "step": 4982 + }, + { + "epoch": 0.5256329113924051, + "grad_norm": 0.6892435550689697, + "learning_rate": 0.0007012033972238031, + "loss": 1.4035, + "step": 4983 + }, + { + "epoch": 0.5257383966244725, + "grad_norm": 0.6970540881156921, + "learning_rate": 0.0007009528722885323, + "loss": 1.3975, + "step": 4984 + }, + { + "epoch": 0.5258438818565401, + "grad_norm": 0.6883819103240967, + "learning_rate": 0.0007007023528492372, + "loss": 1.4117, + "step": 4985 + }, + { + "epoch": 0.5259493670886076, + "grad_norm": 0.7255256175994873, + "learning_rate": 0.0007004518389339893, + "loss": 1.396, + "step": 4986 + }, + { + "epoch": 0.5260548523206751, + "grad_norm": 0.6679804921150208, + "learning_rate": 0.0007002013305708598, + "loss": 1.3727, + "step": 4987 + }, + { + "epoch": 0.5261603375527426, + "grad_norm": 0.6417363286018372, + "learning_rate": 0.0006999508277879196, + "loss": 1.4131, + "step": 4988 + }, + { + "epoch": 0.5262658227848102, + "grad_norm": 0.6882642507553101, + "learning_rate": 0.0006997003306132386, + "loss": 1.456, + "step": 4989 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.6520658135414124, + "learning_rate": 0.0006994498390748865, + "loss": 1.4135, + "step": 4990 + }, + { + "epoch": 0.5264767932489451, + "grad_norm": 0.7551886439323425, + "learning_rate": 0.0006991993532009319, + "loss": 1.4148, + "step": 4991 + }, + { + "epoch": 0.5265822784810127, + "grad_norm": 0.6985572576522827, + "learning_rate": 0.0006989488730194432, + "loss": 1.4088, + "step": 4992 + }, + { + "epoch": 0.5266877637130801, + "grad_norm": 0.6666409373283386, + "learning_rate": 0.0006986983985584874, + "loss": 1.4159, + "step": 4993 + }, + { + "epoch": 0.5267932489451477, + "grad_norm": 0.8049547076225281, + "learning_rate": 0.0006984479298461323, + "loss": 1.4528, + "step": 4994 + }, + { + "epoch": 0.5268987341772152, + "grad_norm": 0.6436857581138611, + "learning_rate": 0.0006981974669104436, + "loss": 1.4277, + "step": 4995 + }, + { + "epoch": 0.5270042194092827, + "grad_norm": 0.713212788105011, + "learning_rate": 0.0006979470097794871, + "loss": 1.4167, + "step": 4996 + }, + { + "epoch": 0.5271097046413502, + "grad_norm": 0.6655982732772827, + "learning_rate": 0.0006976965584813277, + "loss": 1.3921, + "step": 4997 + }, + { + "epoch": 0.5272151898734178, + "grad_norm": 0.7040588855743408, + "learning_rate": 0.0006974461130440298, + "loss": 1.4313, + "step": 4998 + }, + { + "epoch": 0.5273206751054852, + "grad_norm": 0.7486143112182617, + "learning_rate": 0.0006971956734956569, + "loss": 1.4493, + "step": 4999 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.6468159556388855, + "learning_rate": 0.0006969452398642721, + "loss": 1.4117, + "step": 5000 + }, + { + "epoch": 0.5275316455696203, + "grad_norm": 0.669723391532898, + "learning_rate": 0.0006966948121779378, + "loss": 1.4481, + "step": 5001 + }, + { + "epoch": 0.5276371308016877, + "grad_norm": 0.7074394226074219, + "learning_rate": 0.0006964443904647152, + "loss": 1.4408, + "step": 5002 + }, + { + "epoch": 0.5277426160337553, + "grad_norm": 0.6956746578216553, + "learning_rate": 0.0006961939747526661, + "loss": 1.4144, + "step": 5003 + }, + { + "epoch": 0.5278481012658228, + "grad_norm": 0.6767659783363342, + "learning_rate": 0.0006959435650698504, + "loss": 1.4077, + "step": 5004 + }, + { + "epoch": 0.5279535864978903, + "grad_norm": 0.6879178881645203, + "learning_rate": 0.0006956931614443278, + "loss": 1.3944, + "step": 5005 + }, + { + "epoch": 0.5280590717299578, + "grad_norm": 0.6969015002250671, + "learning_rate": 0.0006954427639041572, + "loss": 1.3799, + "step": 5006 + }, + { + "epoch": 0.5281645569620254, + "grad_norm": 0.6527873277664185, + "learning_rate": 0.000695192372477397, + "loss": 1.4023, + "step": 5007 + }, + { + "epoch": 0.5282700421940928, + "grad_norm": 0.6439434885978699, + "learning_rate": 0.0006949419871921047, + "loss": 1.4246, + "step": 5008 + }, + { + "epoch": 0.5283755274261603, + "grad_norm": 0.734341025352478, + "learning_rate": 0.0006946916080763373, + "loss": 1.4121, + "step": 5009 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.6388939619064331, + "learning_rate": 0.0006944412351581506, + "loss": 1.4163, + "step": 5010 + }, + { + "epoch": 0.5285864978902953, + "grad_norm": 0.6951176524162292, + "learning_rate": 0.000694190868465601, + "loss": 1.3831, + "step": 5011 + }, + { + "epoch": 0.5286919831223629, + "grad_norm": 0.6847085952758789, + "learning_rate": 0.0006939405080267428, + "loss": 1.4385, + "step": 5012 + }, + { + "epoch": 0.5287974683544304, + "grad_norm": 0.7009779810905457, + "learning_rate": 0.0006936901538696303, + "loss": 1.4051, + "step": 5013 + }, + { + "epoch": 0.5289029535864979, + "grad_norm": 0.6498489379882812, + "learning_rate": 0.0006934398060223168, + "loss": 1.3979, + "step": 5014 + }, + { + "epoch": 0.5290084388185654, + "grad_norm": 0.7094370722770691, + "learning_rate": 0.0006931894645128551, + "loss": 1.3555, + "step": 5015 + }, + { + "epoch": 0.529113924050633, + "grad_norm": 0.6607550382614136, + "learning_rate": 0.0006929391293692972, + "loss": 1.4322, + "step": 5016 + }, + { + "epoch": 0.5292194092827004, + "grad_norm": 0.7258709073066711, + "learning_rate": 0.0006926888006196944, + "loss": 1.4378, + "step": 5017 + }, + { + "epoch": 0.5293248945147679, + "grad_norm": 0.6616054773330688, + "learning_rate": 0.0006924384782920971, + "loss": 1.408, + "step": 5018 + }, + { + "epoch": 0.5294303797468355, + "grad_norm": 0.6928311586380005, + "learning_rate": 0.0006921881624145554, + "loss": 1.3892, + "step": 5019 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.6769385933876038, + "learning_rate": 0.0006919378530151182, + "loss": 1.4222, + "step": 5020 + }, + { + "epoch": 0.5296413502109705, + "grad_norm": 0.7320187091827393, + "learning_rate": 0.0006916875501218343, + "loss": 1.3978, + "step": 5021 + }, + { + "epoch": 0.529746835443038, + "grad_norm": 0.6890569925308228, + "learning_rate": 0.0006914372537627512, + "loss": 1.41, + "step": 5022 + }, + { + "epoch": 0.5298523206751055, + "grad_norm": 0.6549115180969238, + "learning_rate": 0.0006911869639659159, + "loss": 1.4018, + "step": 5023 + }, + { + "epoch": 0.529957805907173, + "grad_norm": 0.661676824092865, + "learning_rate": 0.0006909366807593744, + "loss": 1.3991, + "step": 5024 + }, + { + "epoch": 0.5300632911392406, + "grad_norm": 0.7060753703117371, + "learning_rate": 0.0006906864041711725, + "loss": 1.3794, + "step": 5025 + }, + { + "epoch": 0.530168776371308, + "grad_norm": 0.6451223492622375, + "learning_rate": 0.0006904361342293546, + "loss": 1.394, + "step": 5026 + }, + { + "epoch": 0.5302742616033755, + "grad_norm": 0.7025732398033142, + "learning_rate": 0.000690185870961965, + "loss": 1.4232, + "step": 5027 + }, + { + "epoch": 0.5303797468354431, + "grad_norm": 0.607715368270874, + "learning_rate": 0.0006899356143970467, + "loss": 1.3941, + "step": 5028 + }, + { + "epoch": 0.5304852320675105, + "grad_norm": 0.7202771306037903, + "learning_rate": 0.0006896853645626424, + "loss": 1.3696, + "step": 5029 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.7331110239028931, + "learning_rate": 0.0006894351214867937, + "loss": 1.3803, + "step": 5030 + }, + { + "epoch": 0.5306962025316456, + "grad_norm": 0.6622663140296936, + "learning_rate": 0.0006891848851975416, + "loss": 1.4187, + "step": 5031 + }, + { + "epoch": 0.5308016877637131, + "grad_norm": 0.6541109085083008, + "learning_rate": 0.0006889346557229265, + "loss": 1.4384, + "step": 5032 + }, + { + "epoch": 0.5309071729957806, + "grad_norm": 0.7499716877937317, + "learning_rate": 0.0006886844330909877, + "loss": 1.4207, + "step": 5033 + }, + { + "epoch": 0.5310126582278482, + "grad_norm": 0.6720448732376099, + "learning_rate": 0.0006884342173297639, + "loss": 1.4215, + "step": 5034 + }, + { + "epoch": 0.5311181434599156, + "grad_norm": 0.6923902034759521, + "learning_rate": 0.000688184008467293, + "loss": 1.4206, + "step": 5035 + }, + { + "epoch": 0.5312236286919831, + "grad_norm": 0.64171302318573, + "learning_rate": 0.0006879338065316122, + "loss": 1.4217, + "step": 5036 + }, + { + "epoch": 0.5313291139240506, + "grad_norm": 0.7664328813552856, + "learning_rate": 0.0006876836115507579, + "loss": 1.3887, + "step": 5037 + }, + { + "epoch": 0.5314345991561181, + "grad_norm": 0.7779372334480286, + "learning_rate": 0.0006874334235527657, + "loss": 1.4561, + "step": 5038 + }, + { + "epoch": 0.5315400843881857, + "grad_norm": 0.7073723673820496, + "learning_rate": 0.0006871832425656702, + "loss": 1.4277, + "step": 5039 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.6451733708381653, + "learning_rate": 0.0006869330686175058, + "loss": 1.3898, + "step": 5040 + }, + { + "epoch": 0.5317510548523207, + "grad_norm": 0.7410340309143066, + "learning_rate": 0.0006866829017363054, + "loss": 1.4152, + "step": 5041 + }, + { + "epoch": 0.5318565400843882, + "grad_norm": 0.6607666611671448, + "learning_rate": 0.0006864327419501017, + "loss": 1.4147, + "step": 5042 + }, + { + "epoch": 0.5319620253164556, + "grad_norm": 0.9245789647102356, + "learning_rate": 0.0006861825892869262, + "loss": 1.3754, + "step": 5043 + }, + { + "epoch": 0.5320675105485232, + "grad_norm": 0.6382090449333191, + "learning_rate": 0.0006859324437748099, + "loss": 1.4123, + "step": 5044 + }, + { + "epoch": 0.5321729957805907, + "grad_norm": 0.90790194272995, + "learning_rate": 0.0006856823054417825, + "loss": 1.3874, + "step": 5045 + }, + { + "epoch": 0.5322784810126582, + "grad_norm": 0.6356422901153564, + "learning_rate": 0.0006854321743158737, + "loss": 1.447, + "step": 5046 + }, + { + "epoch": 0.5323839662447257, + "grad_norm": 0.8217293620109558, + "learning_rate": 0.0006851820504251117, + "loss": 1.4038, + "step": 5047 + }, + { + "epoch": 0.5324894514767933, + "grad_norm": 0.6523387432098389, + "learning_rate": 0.0006849319337975242, + "loss": 1.3743, + "step": 5048 + }, + { + "epoch": 0.5325949367088607, + "grad_norm": 0.7533002495765686, + "learning_rate": 0.0006846818244611376, + "loss": 1.4304, + "step": 5049 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.6754258275032043, + "learning_rate": 0.0006844317224439788, + "loss": 1.4052, + "step": 5050 + }, + { + "epoch": 0.5328059071729958, + "grad_norm": 0.6593883037567139, + "learning_rate": 0.0006841816277740722, + "loss": 1.421, + "step": 5051 + }, + { + "epoch": 0.5329113924050632, + "grad_norm": 0.6713997721672058, + "learning_rate": 0.0006839315404794424, + "loss": 1.4211, + "step": 5052 + }, + { + "epoch": 0.5330168776371308, + "grad_norm": 0.7029459476470947, + "learning_rate": 0.0006836814605881131, + "loss": 1.4189, + "step": 5053 + }, + { + "epoch": 0.5331223628691983, + "grad_norm": 0.701063334941864, + "learning_rate": 0.0006834313881281066, + "loss": 1.4265, + "step": 5054 + }, + { + "epoch": 0.5332278481012658, + "grad_norm": 0.6859737038612366, + "learning_rate": 0.0006831813231274451, + "loss": 1.4218, + "step": 5055 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.7039437294006348, + "learning_rate": 0.0006829312656141496, + "loss": 1.4218, + "step": 5056 + }, + { + "epoch": 0.5334388185654009, + "grad_norm": 0.9734662771224976, + "learning_rate": 0.0006826812156162401, + "loss": 1.44, + "step": 5057 + }, + { + "epoch": 0.5335443037974683, + "grad_norm": 0.7329657673835754, + "learning_rate": 0.0006824311731617363, + "loss": 1.4245, + "step": 5058 + }, + { + "epoch": 0.5336497890295359, + "grad_norm": 1.1695228815078735, + "learning_rate": 0.0006821811382786561, + "loss": 1.4037, + "step": 5059 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.8419864773750305, + "learning_rate": 0.0006819311109950177, + "loss": 1.3979, + "step": 5060 + }, + { + "epoch": 0.5338607594936708, + "grad_norm": 1.0220141410827637, + "learning_rate": 0.0006816810913388379, + "loss": 1.442, + "step": 5061 + }, + { + "epoch": 0.5339662447257384, + "grad_norm": 0.9535608291625977, + "learning_rate": 0.0006814310793381322, + "loss": 1.3892, + "step": 5062 + }, + { + "epoch": 0.5340717299578059, + "grad_norm": 1.1654311418533325, + "learning_rate": 0.0006811810750209161, + "loss": 1.396, + "step": 5063 + }, + { + "epoch": 0.5341772151898734, + "grad_norm": 0.8948180675506592, + "learning_rate": 0.0006809310784152039, + "loss": 1.4437, + "step": 5064 + }, + { + "epoch": 0.5342827004219409, + "grad_norm": 0.6937078833580017, + "learning_rate": 0.0006806810895490087, + "loss": 1.3883, + "step": 5065 + }, + { + "epoch": 0.5343881856540085, + "grad_norm": 1.2373156547546387, + "learning_rate": 0.000680431108450343, + "loss": 1.4095, + "step": 5066 + }, + { + "epoch": 0.5344936708860759, + "grad_norm": 0.7528419494628906, + "learning_rate": 0.0006801811351472185, + "loss": 1.3936, + "step": 5067 + }, + { + "epoch": 0.5345991561181435, + "grad_norm": 1.2156625986099243, + "learning_rate": 0.000679931169667646, + "loss": 1.4477, + "step": 5068 + }, + { + "epoch": 0.534704641350211, + "grad_norm": 0.967000424861908, + "learning_rate": 0.0006796812120396351, + "loss": 1.3931, + "step": 5069 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 1.1576319932937622, + "learning_rate": 0.0006794312622911953, + "loss": 1.3704, + "step": 5070 + }, + { + "epoch": 0.534915611814346, + "grad_norm": 1.0860360860824585, + "learning_rate": 0.0006791813204503342, + "loss": 1.417, + "step": 5071 + }, + { + "epoch": 0.5350210970464135, + "grad_norm": 0.8885681629180908, + "learning_rate": 0.0006789313865450594, + "loss": 1.4337, + "step": 5072 + }, + { + "epoch": 0.535126582278481, + "grad_norm": 1.2900629043579102, + "learning_rate": 0.0006786814606033773, + "loss": 1.3985, + "step": 5073 + }, + { + "epoch": 0.5352320675105485, + "grad_norm": 0.7074217796325684, + "learning_rate": 0.0006784315426532929, + "loss": 1.4541, + "step": 5074 + }, + { + "epoch": 0.5353375527426161, + "grad_norm": 1.3574059009552002, + "learning_rate": 0.0006781816327228112, + "loss": 1.4444, + "step": 5075 + }, + { + "epoch": 0.5354430379746835, + "grad_norm": 0.7338024973869324, + "learning_rate": 0.0006779317308399357, + "loss": 1.4098, + "step": 5076 + }, + { + "epoch": 0.5355485232067511, + "grad_norm": 1.1590943336486816, + "learning_rate": 0.000677681837032669, + "loss": 1.409, + "step": 5077 + }, + { + "epoch": 0.5356540084388186, + "grad_norm": 0.7004222869873047, + "learning_rate": 0.0006774319513290132, + "loss": 1.4269, + "step": 5078 + }, + { + "epoch": 0.535759493670886, + "grad_norm": 0.8437802791595459, + "learning_rate": 0.0006771820737569689, + "loss": 1.3971, + "step": 5079 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.830391526222229, + "learning_rate": 0.0006769322043445363, + "loss": 1.4134, + "step": 5080 + }, + { + "epoch": 0.5359704641350211, + "grad_norm": 0.8171951174736023, + "learning_rate": 0.0006766823431197147, + "loss": 1.3953, + "step": 5081 + }, + { + "epoch": 0.5360759493670886, + "grad_norm": 0.7000376582145691, + "learning_rate": 0.0006764324901105022, + "loss": 1.4171, + "step": 5082 + }, + { + "epoch": 0.5361814345991561, + "grad_norm": 0.8018508553504944, + "learning_rate": 0.000676182645344896, + "loss": 1.3851, + "step": 5083 + }, + { + "epoch": 0.5362869198312237, + "grad_norm": 0.6495301127433777, + "learning_rate": 0.0006759328088508925, + "loss": 1.4219, + "step": 5084 + }, + { + "epoch": 0.5363924050632911, + "grad_norm": 0.6691861748695374, + "learning_rate": 0.0006756829806564872, + "loss": 1.4203, + "step": 5085 + }, + { + "epoch": 0.5364978902953587, + "grad_norm": 0.772855281829834, + "learning_rate": 0.0006754331607896742, + "loss": 1.3691, + "step": 5086 + }, + { + "epoch": 0.5366033755274262, + "grad_norm": 0.6178820729255676, + "learning_rate": 0.0006751833492784476, + "loss": 1.3675, + "step": 5087 + }, + { + "epoch": 0.5367088607594936, + "grad_norm": 0.771080493927002, + "learning_rate": 0.0006749335461507995, + "loss": 1.4483, + "step": 5088 + }, + { + "epoch": 0.5368143459915612, + "grad_norm": 0.6420205235481262, + "learning_rate": 0.000674683751434722, + "loss": 1.4123, + "step": 5089 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.7296054363250732, + "learning_rate": 0.0006744339651582059, + "loss": 1.4472, + "step": 5090 + }, + { + "epoch": 0.5370253164556962, + "grad_norm": 0.667548418045044, + "learning_rate": 0.0006741841873492406, + "loss": 1.4348, + "step": 5091 + }, + { + "epoch": 0.5371308016877637, + "grad_norm": 0.8346918225288391, + "learning_rate": 0.0006739344180358153, + "loss": 1.3384, + "step": 5092 + }, + { + "epoch": 0.5372362869198313, + "grad_norm": 0.6514577865600586, + "learning_rate": 0.0006736846572459178, + "loss": 1.386, + "step": 5093 + }, + { + "epoch": 0.5373417721518987, + "grad_norm": 0.9260113835334778, + "learning_rate": 0.0006734349050075348, + "loss": 1.3874, + "step": 5094 + }, + { + "epoch": 0.5374472573839663, + "grad_norm": 0.7480331063270569, + "learning_rate": 0.0006731851613486526, + "loss": 1.3944, + "step": 5095 + }, + { + "epoch": 0.5375527426160338, + "grad_norm": 0.8728125095367432, + "learning_rate": 0.0006729354262972561, + "loss": 1.4008, + "step": 5096 + }, + { + "epoch": 0.5376582278481012, + "grad_norm": 0.7251887321472168, + "learning_rate": 0.0006726856998813291, + "loss": 1.4166, + "step": 5097 + }, + { + "epoch": 0.5377637130801688, + "grad_norm": 0.7554314732551575, + "learning_rate": 0.0006724359821288552, + "loss": 1.4052, + "step": 5098 + }, + { + "epoch": 0.5378691983122363, + "grad_norm": 0.6467447280883789, + "learning_rate": 0.0006721862730678164, + "loss": 1.4131, + "step": 5099 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.6923406720161438, + "learning_rate": 0.0006719365727261935, + "loss": 1.3618, + "step": 5100 + }, + { + "epoch": 0.5380801687763713, + "grad_norm": 0.638073205947876, + "learning_rate": 0.0006716868811319671, + "loss": 1.4229, + "step": 5101 + }, + { + "epoch": 0.5381856540084389, + "grad_norm": 0.7282451391220093, + "learning_rate": 0.000671437198313116, + "loss": 1.4264, + "step": 5102 + }, + { + "epoch": 0.5382911392405063, + "grad_norm": 0.8132507801055908, + "learning_rate": 0.0006711875242976187, + "loss": 1.4104, + "step": 5103 + }, + { + "epoch": 0.5383966244725739, + "grad_norm": 0.7101955413818359, + "learning_rate": 0.0006709378591134523, + "loss": 1.3881, + "step": 5104 + }, + { + "epoch": 0.5385021097046413, + "grad_norm": 0.8344826698303223, + "learning_rate": 0.0006706882027885929, + "loss": 1.3736, + "step": 5105 + }, + { + "epoch": 0.5386075949367088, + "grad_norm": 0.674444854259491, + "learning_rate": 0.0006704385553510156, + "loss": 1.4213, + "step": 5106 + }, + { + "epoch": 0.5387130801687764, + "grad_norm": 0.9710882902145386, + "learning_rate": 0.0006701889168286953, + "loss": 1.4507, + "step": 5107 + }, + { + "epoch": 0.5388185654008438, + "grad_norm": 0.6616508960723877, + "learning_rate": 0.0006699392872496048, + "loss": 1.4141, + "step": 5108 + }, + { + "epoch": 0.5389240506329114, + "grad_norm": 0.9690060019493103, + "learning_rate": 0.0006696896666417163, + "loss": 1.423, + "step": 5109 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.6504380106925964, + "learning_rate": 0.0006694400550330013, + "loss": 1.3639, + "step": 5110 + }, + { + "epoch": 0.5391350210970464, + "grad_norm": 0.895553469657898, + "learning_rate": 0.0006691904524514297, + "loss": 1.4242, + "step": 5111 + }, + { + "epoch": 0.5392405063291139, + "grad_norm": 0.7423070073127747, + "learning_rate": 0.0006689408589249709, + "loss": 1.4, + "step": 5112 + }, + { + "epoch": 0.5393459915611815, + "grad_norm": 0.9113427996635437, + "learning_rate": 0.000668691274481593, + "loss": 1.4344, + "step": 5113 + }, + { + "epoch": 0.5394514767932489, + "grad_norm": 0.7099983096122742, + "learning_rate": 0.0006684416991492629, + "loss": 1.4018, + "step": 5114 + }, + { + "epoch": 0.5395569620253164, + "grad_norm": 0.6831557750701904, + "learning_rate": 0.0006681921329559475, + "loss": 1.3881, + "step": 5115 + }, + { + "epoch": 0.539662447257384, + "grad_norm": 0.6560385823249817, + "learning_rate": 0.0006679425759296114, + "loss": 1.4078, + "step": 5116 + }, + { + "epoch": 0.5397679324894514, + "grad_norm": 0.736088216304779, + "learning_rate": 0.000667693028098219, + "loss": 1.3956, + "step": 5117 + }, + { + "epoch": 0.539873417721519, + "grad_norm": 0.736411988735199, + "learning_rate": 0.0006674434894897332, + "loss": 1.4326, + "step": 5118 + }, + { + "epoch": 0.5399789029535865, + "grad_norm": 0.8178707957267761, + "learning_rate": 0.000667193960132116, + "loss": 1.4104, + "step": 5119 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.6839601397514343, + "learning_rate": 0.0006669444400533286, + "loss": 1.3813, + "step": 5120 + }, + { + "epoch": 0.5401898734177215, + "grad_norm": 0.8854724168777466, + "learning_rate": 0.0006666949292813306, + "loss": 1.402, + "step": 5121 + }, + { + "epoch": 0.5402953586497891, + "grad_norm": 0.8216031789779663, + "learning_rate": 0.0006664454278440813, + "loss": 1.3955, + "step": 5122 + }, + { + "epoch": 0.5404008438818565, + "grad_norm": 0.7185940742492676, + "learning_rate": 0.0006661959357695382, + "loss": 1.3917, + "step": 5123 + }, + { + "epoch": 0.540506329113924, + "grad_norm": 0.7559511661529541, + "learning_rate": 0.0006659464530856587, + "loss": 1.4166, + "step": 5124 + }, + { + "epoch": 0.5406118143459916, + "grad_norm": 0.7224497199058533, + "learning_rate": 0.0006656969798203982, + "loss": 1.4051, + "step": 5125 + }, + { + "epoch": 0.540717299578059, + "grad_norm": 0.7111749649047852, + "learning_rate": 0.0006654475160017115, + "loss": 1.3998, + "step": 5126 + }, + { + "epoch": 0.5408227848101266, + "grad_norm": 0.6804084181785583, + "learning_rate": 0.0006651980616575522, + "loss": 1.4107, + "step": 5127 + }, + { + "epoch": 0.5409282700421941, + "grad_norm": 0.7046949863433838, + "learning_rate": 0.0006649486168158731, + "loss": 1.4089, + "step": 5128 + }, + { + "epoch": 0.5410337552742616, + "grad_norm": 0.733573853969574, + "learning_rate": 0.0006646991815046254, + "loss": 1.4121, + "step": 5129 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.7217696905136108, + "learning_rate": 0.0006644497557517599, + "loss": 1.4161, + "step": 5130 + }, + { + "epoch": 0.5412447257383967, + "grad_norm": 0.8269341588020325, + "learning_rate": 0.0006642003395852258, + "loss": 1.4388, + "step": 5131 + }, + { + "epoch": 0.5413502109704641, + "grad_norm": 0.7110166549682617, + "learning_rate": 0.0006639509330329713, + "loss": 1.4481, + "step": 5132 + }, + { + "epoch": 0.5414556962025316, + "grad_norm": 0.7222434878349304, + "learning_rate": 0.0006637015361229438, + "loss": 1.4054, + "step": 5133 + }, + { + "epoch": 0.5415611814345992, + "grad_norm": 1.1009751558303833, + "learning_rate": 0.0006634521488830898, + "loss": 1.4227, + "step": 5134 + }, + { + "epoch": 0.5416666666666666, + "grad_norm": 0.7662981152534485, + "learning_rate": 0.0006632027713413541, + "loss": 1.4023, + "step": 5135 + }, + { + "epoch": 0.5417721518987342, + "grad_norm": 0.9105427861213684, + "learning_rate": 0.0006629534035256805, + "loss": 1.3996, + "step": 5136 + }, + { + "epoch": 0.5418776371308017, + "grad_norm": 0.6575705409049988, + "learning_rate": 0.0006627040454640123, + "loss": 1.4154, + "step": 5137 + }, + { + "epoch": 0.5419831223628692, + "grad_norm": 1.040065884590149, + "learning_rate": 0.0006624546971842909, + "loss": 1.4145, + "step": 5138 + }, + { + "epoch": 0.5420886075949367, + "grad_norm": 0.6919463872909546, + "learning_rate": 0.0006622053587144572, + "loss": 1.4149, + "step": 5139 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.7106877565383911, + "learning_rate": 0.0006619560300824507, + "loss": 1.413, + "step": 5140 + }, + { + "epoch": 0.5422995780590717, + "grad_norm": 0.8215839862823486, + "learning_rate": 0.0006617067113162103, + "loss": 1.4021, + "step": 5141 + }, + { + "epoch": 0.5424050632911392, + "grad_norm": 0.7772276997566223, + "learning_rate": 0.0006614574024436732, + "loss": 1.4386, + "step": 5142 + }, + { + "epoch": 0.5425105485232068, + "grad_norm": 0.878695011138916, + "learning_rate": 0.0006612081034927756, + "loss": 1.4349, + "step": 5143 + }, + { + "epoch": 0.5426160337552742, + "grad_norm": 0.8178412318229675, + "learning_rate": 0.0006609588144914528, + "loss": 1.4008, + "step": 5144 + }, + { + "epoch": 0.5427215189873418, + "grad_norm": 0.7235819101333618, + "learning_rate": 0.0006607095354676389, + "loss": 1.4278, + "step": 5145 + }, + { + "epoch": 0.5428270042194093, + "grad_norm": 0.7688640356063843, + "learning_rate": 0.0006604602664492667, + "loss": 1.4069, + "step": 5146 + }, + { + "epoch": 0.5429324894514768, + "grad_norm": 0.6715136766433716, + "learning_rate": 0.0006602110074642682, + "loss": 1.436, + "step": 5147 + }, + { + "epoch": 0.5430379746835443, + "grad_norm": 0.6703335642814636, + "learning_rate": 0.000659961758540574, + "loss": 1.3842, + "step": 5148 + }, + { + "epoch": 0.5431434599156119, + "grad_norm": 0.6856567859649658, + "learning_rate": 0.0006597125197061133, + "loss": 1.4096, + "step": 5149 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.7218381762504578, + "learning_rate": 0.0006594632909888154, + "loss": 1.4341, + "step": 5150 + }, + { + "epoch": 0.5433544303797468, + "grad_norm": 0.7818481922149658, + "learning_rate": 0.0006592140724166073, + "loss": 1.3851, + "step": 5151 + }, + { + "epoch": 0.5434599156118144, + "grad_norm": 0.671804666519165, + "learning_rate": 0.000658964864017415, + "loss": 1.4254, + "step": 5152 + }, + { + "epoch": 0.5435654008438818, + "grad_norm": 0.7437461018562317, + "learning_rate": 0.0006587156658191635, + "loss": 1.3995, + "step": 5153 + }, + { + "epoch": 0.5436708860759494, + "grad_norm": 0.6406316757202148, + "learning_rate": 0.0006584664778497771, + "loss": 1.4183, + "step": 5154 + }, + { + "epoch": 0.5437763713080169, + "grad_norm": 0.6918665170669556, + "learning_rate": 0.0006582173001371781, + "loss": 1.4405, + "step": 5155 + }, + { + "epoch": 0.5438818565400844, + "grad_norm": 0.7614834308624268, + "learning_rate": 0.0006579681327092883, + "loss": 1.3756, + "step": 5156 + }, + { + "epoch": 0.5439873417721519, + "grad_norm": 0.6662439107894897, + "learning_rate": 0.0006577189755940282, + "loss": 1.4091, + "step": 5157 + }, + { + "epoch": 0.5440928270042195, + "grad_norm": 0.9165757298469543, + "learning_rate": 0.0006574698288193166, + "loss": 1.3962, + "step": 5158 + }, + { + "epoch": 0.5441983122362869, + "grad_norm": 0.7102365493774414, + "learning_rate": 0.0006572206924130725, + "loss": 1.4258, + "step": 5159 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.8116244673728943, + "learning_rate": 0.0006569715664032124, + "loss": 1.4105, + "step": 5160 + }, + { + "epoch": 0.544409282700422, + "grad_norm": 0.8397398591041565, + "learning_rate": 0.0006567224508176523, + "loss": 1.4404, + "step": 5161 + }, + { + "epoch": 0.5445147679324894, + "grad_norm": 0.6657224297523499, + "learning_rate": 0.0006564733456843067, + "loss": 1.4385, + "step": 5162 + }, + { + "epoch": 0.544620253164557, + "grad_norm": 0.7483649849891663, + "learning_rate": 0.000656224251031089, + "loss": 1.3747, + "step": 5163 + }, + { + "epoch": 0.5447257383966245, + "grad_norm": 0.6739777326583862, + "learning_rate": 0.0006559751668859115, + "loss": 1.41, + "step": 5164 + }, + { + "epoch": 0.544831223628692, + "grad_norm": 0.7164890766143799, + "learning_rate": 0.0006557260932766855, + "loss": 1.4304, + "step": 5165 + }, + { + "epoch": 0.5449367088607595, + "grad_norm": 0.9170278310775757, + "learning_rate": 0.0006554770302313205, + "loss": 1.3672, + "step": 5166 + }, + { + "epoch": 0.5450421940928271, + "grad_norm": 0.7016962170600891, + "learning_rate": 0.0006552279777777258, + "loss": 1.416, + "step": 5167 + }, + { + "epoch": 0.5451476793248945, + "grad_norm": 0.7338467836380005, + "learning_rate": 0.000654978935943809, + "loss": 1.4111, + "step": 5168 + }, + { + "epoch": 0.545253164556962, + "grad_norm": 0.7336734533309937, + "learning_rate": 0.0006547299047574761, + "loss": 1.4313, + "step": 5169 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.6824188828468323, + "learning_rate": 0.0006544808842466324, + "loss": 1.3568, + "step": 5170 + }, + { + "epoch": 0.545464135021097, + "grad_norm": 0.8796314001083374, + "learning_rate": 0.0006542318744391821, + "loss": 1.3897, + "step": 5171 + }, + { + "epoch": 0.5455696202531646, + "grad_norm": 0.7183203101158142, + "learning_rate": 0.0006539828753630276, + "loss": 1.4113, + "step": 5172 + }, + { + "epoch": 0.545675105485232, + "grad_norm": 0.9865493178367615, + "learning_rate": 0.0006537338870460708, + "loss": 1.4236, + "step": 5173 + }, + { + "epoch": 0.5457805907172996, + "grad_norm": 0.7908132076263428, + "learning_rate": 0.000653484909516212, + "loss": 1.4375, + "step": 5174 + }, + { + "epoch": 0.5458860759493671, + "grad_norm": 0.8189553618431091, + "learning_rate": 0.00065323594280135, + "loss": 1.4138, + "step": 5175 + }, + { + "epoch": 0.5459915611814345, + "grad_norm": 0.8113191723823547, + "learning_rate": 0.0006529869869293834, + "loss": 1.4, + "step": 5176 + }, + { + "epoch": 0.5460970464135021, + "grad_norm": 0.7204833626747131, + "learning_rate": 0.0006527380419282088, + "loss": 1.4355, + "step": 5177 + }, + { + "epoch": 0.5462025316455696, + "grad_norm": 0.7696975469589233, + "learning_rate": 0.0006524891078257215, + "loss": 1.4585, + "step": 5178 + }, + { + "epoch": 0.5463080168776371, + "grad_norm": 0.8380425572395325, + "learning_rate": 0.000652240184649816, + "loss": 1.3927, + "step": 5179 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.9510295391082764, + "learning_rate": 0.0006519912724283851, + "loss": 1.427, + "step": 5180 + }, + { + "epoch": 0.5465189873417722, + "grad_norm": 0.6544305682182312, + "learning_rate": 0.0006517423711893209, + "loss": 1.4052, + "step": 5181 + }, + { + "epoch": 0.5466244725738396, + "grad_norm": 0.6592302918434143, + "learning_rate": 0.000651493480960514, + "loss": 1.404, + "step": 5182 + }, + { + "epoch": 0.5467299578059072, + "grad_norm": 0.6705622673034668, + "learning_rate": 0.0006512446017698537, + "loss": 1.388, + "step": 5183 + }, + { + "epoch": 0.5468354430379747, + "grad_norm": 0.7493470907211304, + "learning_rate": 0.0006509957336452279, + "loss": 1.4235, + "step": 5184 + }, + { + "epoch": 0.5469409282700421, + "grad_norm": 0.708165168762207, + "learning_rate": 0.0006507468766145242, + "loss": 1.3816, + "step": 5185 + }, + { + "epoch": 0.5470464135021097, + "grad_norm": 0.7250615954399109, + "learning_rate": 0.000650498030705628, + "loss": 1.4399, + "step": 5186 + }, + { + "epoch": 0.5471518987341772, + "grad_norm": 0.6608795523643494, + "learning_rate": 0.0006502491959464235, + "loss": 1.3898, + "step": 5187 + }, + { + "epoch": 0.5472573839662447, + "grad_norm": 1.0127125978469849, + "learning_rate": 0.000650000372364794, + "loss": 1.3583, + "step": 5188 + }, + { + "epoch": 0.5473628691983122, + "grad_norm": 0.7481963634490967, + "learning_rate": 0.0006497515599886214, + "loss": 1.4052, + "step": 5189 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.7324149012565613, + "learning_rate": 0.0006495027588457864, + "loss": 1.387, + "step": 5190 + }, + { + "epoch": 0.5475738396624472, + "grad_norm": 0.654126763343811, + "learning_rate": 0.0006492539689641685, + "loss": 1.4107, + "step": 5191 + }, + { + "epoch": 0.5476793248945148, + "grad_norm": 0.832060694694519, + "learning_rate": 0.0006490051903716454, + "loss": 1.4084, + "step": 5192 + }, + { + "epoch": 0.5477848101265823, + "grad_norm": 0.6646851897239685, + "learning_rate": 0.0006487564230960944, + "loss": 1.3992, + "step": 5193 + }, + { + "epoch": 0.5478902953586497, + "grad_norm": 0.7815333604812622, + "learning_rate": 0.0006485076671653913, + "loss": 1.4241, + "step": 5194 + }, + { + "epoch": 0.5479957805907173, + "grad_norm": 0.7204762101173401, + "learning_rate": 0.00064825892260741, + "loss": 1.4195, + "step": 5195 + }, + { + "epoch": 0.5481012658227848, + "grad_norm": 0.7487678527832031, + "learning_rate": 0.0006480101894500239, + "loss": 1.431, + "step": 5196 + }, + { + "epoch": 0.5482067510548523, + "grad_norm": 0.7413361072540283, + "learning_rate": 0.0006477614677211046, + "loss": 1.419, + "step": 5197 + }, + { + "epoch": 0.5483122362869198, + "grad_norm": 0.630885899066925, + "learning_rate": 0.0006475127574485226, + "loss": 1.3908, + "step": 5198 + }, + { + "epoch": 0.5484177215189874, + "grad_norm": 0.883934497833252, + "learning_rate": 0.0006472640586601472, + "loss": 1.404, + "step": 5199 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.7980557084083557, + "learning_rate": 0.0006470153713838463, + "loss": 1.4025, + "step": 5200 + }, + { + "epoch": 0.5486286919831224, + "grad_norm": 0.6150339245796204, + "learning_rate": 0.0006467666956474865, + "loss": 1.3548, + "step": 5201 + }, + { + "epoch": 0.5487341772151899, + "grad_norm": 0.6800041794776917, + "learning_rate": 0.0006465180314789332, + "loss": 1.414, + "step": 5202 + }, + { + "epoch": 0.5488396624472573, + "grad_norm": 0.6982999444007874, + "learning_rate": 0.0006462693789060505, + "loss": 1.3979, + "step": 5203 + }, + { + "epoch": 0.5489451476793249, + "grad_norm": 0.714028000831604, + "learning_rate": 0.0006460207379567011, + "loss": 1.3977, + "step": 5204 + }, + { + "epoch": 0.5490506329113924, + "grad_norm": 0.6602289080619812, + "learning_rate": 0.0006457721086587468, + "loss": 1.4109, + "step": 5205 + }, + { + "epoch": 0.5491561181434599, + "grad_norm": 0.6118313670158386, + "learning_rate": 0.0006455234910400472, + "loss": 1.3882, + "step": 5206 + }, + { + "epoch": 0.5492616033755274, + "grad_norm": 0.7576491832733154, + "learning_rate": 0.0006452748851284615, + "loss": 1.3762, + "step": 5207 + }, + { + "epoch": 0.549367088607595, + "grad_norm": 0.6649750471115112, + "learning_rate": 0.0006450262909518471, + "loss": 1.422, + "step": 5208 + }, + { + "epoch": 0.5494725738396624, + "grad_norm": 0.7087109088897705, + "learning_rate": 0.0006447777085380603, + "loss": 1.4006, + "step": 5209 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.6927189230918884, + "learning_rate": 0.0006445291379149556, + "loss": 1.3942, + "step": 5210 + }, + { + "epoch": 0.5496835443037975, + "grad_norm": 0.6799608469009399, + "learning_rate": 0.0006442805791103873, + "loss": 1.4264, + "step": 5211 + }, + { + "epoch": 0.549789029535865, + "grad_norm": 0.7967249751091003, + "learning_rate": 0.0006440320321522071, + "loss": 1.3618, + "step": 5212 + }, + { + "epoch": 0.5498945147679325, + "grad_norm": 0.8502678275108337, + "learning_rate": 0.0006437834970682661, + "loss": 1.4374, + "step": 5213 + }, + { + "epoch": 0.55, + "grad_norm": 0.6959104537963867, + "learning_rate": 0.000643534973886414, + "loss": 1.4221, + "step": 5214 + }, + { + "epoch": 0.5501054852320675, + "grad_norm": 0.7348280549049377, + "learning_rate": 0.0006432864626344989, + "loss": 1.4212, + "step": 5215 + }, + { + "epoch": 0.550210970464135, + "grad_norm": 0.7714657783508301, + "learning_rate": 0.0006430379633403679, + "loss": 1.4283, + "step": 5216 + }, + { + "epoch": 0.5503164556962026, + "grad_norm": 0.6970704793930054, + "learning_rate": 0.0006427894760318664, + "loss": 1.4024, + "step": 5217 + }, + { + "epoch": 0.55042194092827, + "grad_norm": 0.7483640313148499, + "learning_rate": 0.0006425410007368385, + "loss": 1.398, + "step": 5218 + }, + { + "epoch": 0.5505274261603376, + "grad_norm": 0.7180496454238892, + "learning_rate": 0.0006422925374831275, + "loss": 1.4297, + "step": 5219 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.709416925907135, + "learning_rate": 0.0006420440862985748, + "loss": 1.374, + "step": 5220 + }, + { + "epoch": 0.5507383966244725, + "grad_norm": 0.6598813533782959, + "learning_rate": 0.0006417956472110205, + "loss": 1.3883, + "step": 5221 + }, + { + "epoch": 0.5508438818565401, + "grad_norm": 0.6877507567405701, + "learning_rate": 0.0006415472202483034, + "loss": 1.3771, + "step": 5222 + }, + { + "epoch": 0.5509493670886076, + "grad_norm": 0.6672431826591492, + "learning_rate": 0.0006412988054382611, + "loss": 1.3697, + "step": 5223 + }, + { + "epoch": 0.5510548523206751, + "grad_norm": 0.7435854077339172, + "learning_rate": 0.0006410504028087297, + "loss": 1.4266, + "step": 5224 + }, + { + "epoch": 0.5511603375527426, + "grad_norm": 0.7608259916305542, + "learning_rate": 0.000640802012387544, + "loss": 1.4152, + "step": 5225 + }, + { + "epoch": 0.5512658227848102, + "grad_norm": 0.8224394917488098, + "learning_rate": 0.0006405536342025374, + "loss": 1.3992, + "step": 5226 + }, + { + "epoch": 0.5513713080168776, + "grad_norm": 0.803557813167572, + "learning_rate": 0.0006403052682815415, + "loss": 1.3707, + "step": 5227 + }, + { + "epoch": 0.5514767932489452, + "grad_norm": 0.9858005046844482, + "learning_rate": 0.0006400569146523875, + "loss": 1.4321, + "step": 5228 + }, + { + "epoch": 0.5515822784810127, + "grad_norm": 0.8735037446022034, + "learning_rate": 0.0006398085733429045, + "loss": 1.4061, + "step": 5229 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 1.0093806982040405, + "learning_rate": 0.0006395602443809203, + "loss": 1.4184, + "step": 5230 + }, + { + "epoch": 0.5517932489451477, + "grad_norm": 0.7327403426170349, + "learning_rate": 0.0006393119277942614, + "loss": 1.4307, + "step": 5231 + }, + { + "epoch": 0.5518987341772152, + "grad_norm": 1.0154674053192139, + "learning_rate": 0.0006390636236107528, + "loss": 1.3997, + "step": 5232 + }, + { + "epoch": 0.5520042194092827, + "grad_norm": 0.6322083473205566, + "learning_rate": 0.0006388153318582185, + "loss": 1.3626, + "step": 5233 + }, + { + "epoch": 0.5521097046413502, + "grad_norm": 1.0569167137145996, + "learning_rate": 0.0006385670525644806, + "loss": 1.4047, + "step": 5234 + }, + { + "epoch": 0.5522151898734177, + "grad_norm": 0.6721194386482239, + "learning_rate": 0.0006383187857573601, + "loss": 1.4194, + "step": 5235 + }, + { + "epoch": 0.5523206751054852, + "grad_norm": 0.8609488606452942, + "learning_rate": 0.0006380705314646765, + "loss": 1.4142, + "step": 5236 + }, + { + "epoch": 0.5524261603375528, + "grad_norm": 0.7943341732025146, + "learning_rate": 0.0006378222897142482, + "loss": 1.4019, + "step": 5237 + }, + { + "epoch": 0.5525316455696202, + "grad_norm": 1.027715802192688, + "learning_rate": 0.0006375740605338916, + "loss": 1.3942, + "step": 5238 + }, + { + "epoch": 0.5526371308016877, + "grad_norm": 0.748354434967041, + "learning_rate": 0.0006373258439514221, + "loss": 1.4188, + "step": 5239 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.8387768268585205, + "learning_rate": 0.0006370776399946536, + "loss": 1.4348, + "step": 5240 + }, + { + "epoch": 0.5528481012658227, + "grad_norm": 0.7384750247001648, + "learning_rate": 0.0006368294486913987, + "loss": 1.4108, + "step": 5241 + }, + { + "epoch": 0.5529535864978903, + "grad_norm": 0.6996367573738098, + "learning_rate": 0.0006365812700694683, + "loss": 1.3948, + "step": 5242 + }, + { + "epoch": 0.5530590717299578, + "grad_norm": 0.8565415143966675, + "learning_rate": 0.0006363331041566723, + "loss": 1.3854, + "step": 5243 + }, + { + "epoch": 0.5531645569620253, + "grad_norm": 0.68348228931427, + "learning_rate": 0.0006360849509808184, + "loss": 1.3806, + "step": 5244 + }, + { + "epoch": 0.5532700421940928, + "grad_norm": 0.7771276831626892, + "learning_rate": 0.0006358368105697142, + "loss": 1.4109, + "step": 5245 + }, + { + "epoch": 0.5533755274261604, + "grad_norm": 0.762232780456543, + "learning_rate": 0.0006355886829511645, + "loss": 1.4072, + "step": 5246 + }, + { + "epoch": 0.5534810126582278, + "grad_norm": 0.9330772757530212, + "learning_rate": 0.0006353405681529734, + "loss": 1.402, + "step": 5247 + }, + { + "epoch": 0.5535864978902953, + "grad_norm": 0.8510139584541321, + "learning_rate": 0.0006350924662029433, + "loss": 1.401, + "step": 5248 + }, + { + "epoch": 0.5536919831223629, + "grad_norm": 1.1665887832641602, + "learning_rate": 0.0006348443771288755, + "loss": 1.3822, + "step": 5249 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.8187341690063477, + "learning_rate": 0.0006345963009585694, + "loss": 1.4324, + "step": 5250 + }, + { + "epoch": 0.5539029535864979, + "grad_norm": 1.170572280883789, + "learning_rate": 0.0006343482377198232, + "loss": 1.4214, + "step": 5251 + }, + { + "epoch": 0.5540084388185654, + "grad_norm": 0.858672559261322, + "learning_rate": 0.0006341001874404335, + "loss": 1.3834, + "step": 5252 + }, + { + "epoch": 0.5541139240506329, + "grad_norm": 1.2026820182800293, + "learning_rate": 0.0006338521501481957, + "loss": 1.3949, + "step": 5253 + }, + { + "epoch": 0.5542194092827004, + "grad_norm": 1.0411823987960815, + "learning_rate": 0.0006336041258709039, + "loss": 1.4223, + "step": 5254 + }, + { + "epoch": 0.554324894514768, + "grad_norm": 1.3228683471679688, + "learning_rate": 0.0006333561146363502, + "loss": 1.4198, + "step": 5255 + }, + { + "epoch": 0.5544303797468354, + "grad_norm": 1.094818353652954, + "learning_rate": 0.0006331081164723253, + "loss": 1.3725, + "step": 5256 + }, + { + "epoch": 0.554535864978903, + "grad_norm": 0.6658511757850647, + "learning_rate": 0.000632860131406619, + "loss": 1.4129, + "step": 5257 + }, + { + "epoch": 0.5546413502109705, + "grad_norm": 1.685613989830017, + "learning_rate": 0.0006326121594670191, + "loss": 1.4148, + "step": 5258 + }, + { + "epoch": 0.5547468354430379, + "grad_norm": 0.8013893365859985, + "learning_rate": 0.000632364200681312, + "loss": 1.4279, + "step": 5259 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 1.12322998046875, + "learning_rate": 0.0006321162550772829, + "loss": 1.3895, + "step": 5260 + }, + { + "epoch": 0.554957805907173, + "grad_norm": 0.9685990810394287, + "learning_rate": 0.0006318683226827151, + "loss": 1.4068, + "step": 5261 + }, + { + "epoch": 0.5550632911392405, + "grad_norm": 1.0057584047317505, + "learning_rate": 0.0006316204035253906, + "loss": 1.4205, + "step": 5262 + }, + { + "epoch": 0.555168776371308, + "grad_norm": 0.9574763774871826, + "learning_rate": 0.0006313724976330904, + "loss": 1.4069, + "step": 5263 + }, + { + "epoch": 0.5552742616033756, + "grad_norm": 0.8126172423362732, + "learning_rate": 0.0006311246050335934, + "loss": 1.3858, + "step": 5264 + }, + { + "epoch": 0.555379746835443, + "grad_norm": 0.8380897045135498, + "learning_rate": 0.0006308767257546772, + "loss": 1.3962, + "step": 5265 + }, + { + "epoch": 0.5554852320675105, + "grad_norm": 0.7334390878677368, + "learning_rate": 0.0006306288598241179, + "loss": 1.3962, + "step": 5266 + }, + { + "epoch": 0.5555907172995781, + "grad_norm": 0.8011385798454285, + "learning_rate": 0.00063038100726969, + "loss": 1.4132, + "step": 5267 + }, + { + "epoch": 0.5556962025316455, + "grad_norm": 0.7063050866127014, + "learning_rate": 0.0006301331681191668, + "loss": 1.4192, + "step": 5268 + }, + { + "epoch": 0.5558016877637131, + "grad_norm": 0.859683632850647, + "learning_rate": 0.0006298853424003199, + "loss": 1.4085, + "step": 5269 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.8140813112258911, + "learning_rate": 0.0006296375301409187, + "loss": 1.421, + "step": 5270 + }, + { + "epoch": 0.5560126582278481, + "grad_norm": 0.9982941746711731, + "learning_rate": 0.0006293897313687331, + "loss": 1.4106, + "step": 5271 + }, + { + "epoch": 0.5561181434599156, + "grad_norm": 0.6987082362174988, + "learning_rate": 0.0006291419461115293, + "loss": 1.4224, + "step": 5272 + }, + { + "epoch": 0.5562236286919832, + "grad_norm": 0.6809787154197693, + "learning_rate": 0.0006288941743970732, + "loss": 1.365, + "step": 5273 + }, + { + "epoch": 0.5563291139240506, + "grad_norm": 0.9184640645980835, + "learning_rate": 0.0006286464162531287, + "loss": 1.3763, + "step": 5274 + }, + { + "epoch": 0.5564345991561181, + "grad_norm": 0.7414993643760681, + "learning_rate": 0.0006283986717074585, + "loss": 1.3809, + "step": 5275 + }, + { + "epoch": 0.5565400843881857, + "grad_norm": 0.8378595113754272, + "learning_rate": 0.0006281509407878232, + "loss": 1.4124, + "step": 5276 + }, + { + "epoch": 0.5566455696202531, + "grad_norm": 0.7583740949630737, + "learning_rate": 0.0006279032235219829, + "loss": 1.4247, + "step": 5277 + }, + { + "epoch": 0.5567510548523207, + "grad_norm": 0.6829453706741333, + "learning_rate": 0.0006276555199376951, + "loss": 1.3965, + "step": 5278 + }, + { + "epoch": 0.5568565400843882, + "grad_norm": 0.7382926940917969, + "learning_rate": 0.000627407830062716, + "loss": 1.4138, + "step": 5279 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.7201187014579773, + "learning_rate": 0.0006271601539248012, + "loss": 1.3879, + "step": 5280 + }, + { + "epoch": 0.5570675105485232, + "grad_norm": 0.6946626901626587, + "learning_rate": 0.0006269124915517037, + "loss": 1.4203, + "step": 5281 + }, + { + "epoch": 0.5571729957805908, + "grad_norm": 0.700740396976471, + "learning_rate": 0.0006266648429711753, + "loss": 1.3917, + "step": 5282 + }, + { + "epoch": 0.5572784810126582, + "grad_norm": 0.6991609334945679, + "learning_rate": 0.0006264172082109661, + "loss": 1.3763, + "step": 5283 + }, + { + "epoch": 0.5573839662447257, + "grad_norm": 0.7385745644569397, + "learning_rate": 0.0006261695872988252, + "loss": 1.4074, + "step": 5284 + }, + { + "epoch": 0.5574894514767933, + "grad_norm": 0.7783645987510681, + "learning_rate": 0.0006259219802624994, + "loss": 1.3944, + "step": 5285 + }, + { + "epoch": 0.5575949367088607, + "grad_norm": 0.713005542755127, + "learning_rate": 0.0006256743871297344, + "loss": 1.3938, + "step": 5286 + }, + { + "epoch": 0.5577004219409283, + "grad_norm": 0.646266520023346, + "learning_rate": 0.0006254268079282743, + "loss": 1.3746, + "step": 5287 + }, + { + "epoch": 0.5578059071729958, + "grad_norm": 1.2629677057266235, + "learning_rate": 0.0006251792426858612, + "loss": 1.412, + "step": 5288 + }, + { + "epoch": 0.5579113924050633, + "grad_norm": 0.6748760342597961, + "learning_rate": 0.0006249316914302368, + "loss": 1.4233, + "step": 5289 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 1.0798879861831665, + "learning_rate": 0.0006246841541891399, + "loss": 1.4424, + "step": 5290 + }, + { + "epoch": 0.5581223628691984, + "grad_norm": 0.7148851752281189, + "learning_rate": 0.0006244366309903084, + "loss": 1.4142, + "step": 5291 + }, + { + "epoch": 0.5582278481012658, + "grad_norm": 1.0152547359466553, + "learning_rate": 0.0006241891218614786, + "loss": 1.3793, + "step": 5292 + }, + { + "epoch": 0.5583333333333333, + "grad_norm": 0.6237466931343079, + "learning_rate": 0.0006239416268303849, + "loss": 1.3991, + "step": 5293 + }, + { + "epoch": 0.5584388185654009, + "grad_norm": 0.6990357041358948, + "learning_rate": 0.0006236941459247606, + "loss": 1.4172, + "step": 5294 + }, + { + "epoch": 0.5585443037974683, + "grad_norm": 0.8643997311592102, + "learning_rate": 0.0006234466791723371, + "loss": 1.4019, + "step": 5295 + }, + { + "epoch": 0.5586497890295359, + "grad_norm": 0.6501394510269165, + "learning_rate": 0.0006231992266008438, + "loss": 1.4204, + "step": 5296 + }, + { + "epoch": 0.5587552742616034, + "grad_norm": 0.8727348446846008, + "learning_rate": 0.00062295178823801, + "loss": 1.3857, + "step": 5297 + }, + { + "epoch": 0.5588607594936709, + "grad_norm": 0.6793578863143921, + "learning_rate": 0.0006227043641115616, + "loss": 1.3818, + "step": 5298 + }, + { + "epoch": 0.5589662447257384, + "grad_norm": 0.6570491194725037, + "learning_rate": 0.0006224569542492241, + "loss": 1.4225, + "step": 5299 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.8784142136573792, + "learning_rate": 0.0006222095586787208, + "loss": 1.4204, + "step": 5300 + }, + { + "epoch": 0.5591772151898734, + "grad_norm": 0.6972753405570984, + "learning_rate": 0.0006219621774277737, + "loss": 1.3817, + "step": 5301 + }, + { + "epoch": 0.559282700421941, + "grad_norm": 0.9388535022735596, + "learning_rate": 0.000621714810524103, + "loss": 1.3922, + "step": 5302 + }, + { + "epoch": 0.5593881856540084, + "grad_norm": 0.7125361561775208, + "learning_rate": 0.0006214674579954276, + "loss": 1.4069, + "step": 5303 + }, + { + "epoch": 0.5594936708860759, + "grad_norm": 0.6720613241195679, + "learning_rate": 0.0006212201198694643, + "loss": 1.3889, + "step": 5304 + }, + { + "epoch": 0.5595991561181435, + "grad_norm": 0.7367134690284729, + "learning_rate": 0.0006209727961739286, + "loss": 1.4183, + "step": 5305 + }, + { + "epoch": 0.5597046413502109, + "grad_norm": 0.6937751770019531, + "learning_rate": 0.0006207254869365346, + "loss": 1.413, + "step": 5306 + }, + { + "epoch": 0.5598101265822785, + "grad_norm": 0.6726415157318115, + "learning_rate": 0.0006204781921849945, + "loss": 1.3966, + "step": 5307 + }, + { + "epoch": 0.559915611814346, + "grad_norm": 0.642645001411438, + "learning_rate": 0.0006202309119470188, + "loss": 1.3544, + "step": 5308 + }, + { + "epoch": 0.5600210970464135, + "grad_norm": 0.7323907613754272, + "learning_rate": 0.0006199836462503166, + "loss": 1.3885, + "step": 5309 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.7562379240989685, + "learning_rate": 0.0006197363951225951, + "loss": 1.3929, + "step": 5310 + }, + { + "epoch": 0.5602320675105485, + "grad_norm": 0.6722811460494995, + "learning_rate": 0.00061948915859156, + "loss": 1.3999, + "step": 5311 + }, + { + "epoch": 0.560337552742616, + "grad_norm": 0.6729975938796997, + "learning_rate": 0.0006192419366849155, + "loss": 1.3885, + "step": 5312 + }, + { + "epoch": 0.5604430379746835, + "grad_norm": 0.6383692622184753, + "learning_rate": 0.0006189947294303641, + "loss": 1.3782, + "step": 5313 + }, + { + "epoch": 0.5605485232067511, + "grad_norm": 0.7201743125915527, + "learning_rate": 0.000618747536855606, + "loss": 1.3768, + "step": 5314 + }, + { + "epoch": 0.5606540084388185, + "grad_norm": 0.6427522301673889, + "learning_rate": 0.0006185003589883413, + "loss": 1.3904, + "step": 5315 + }, + { + "epoch": 0.5607594936708861, + "grad_norm": 0.6784898638725281, + "learning_rate": 0.0006182531958562672, + "loss": 1.4107, + "step": 5316 + }, + { + "epoch": 0.5608649789029536, + "grad_norm": 0.6285962462425232, + "learning_rate": 0.0006180060474870793, + "loss": 1.3663, + "step": 5317 + }, + { + "epoch": 0.560970464135021, + "grad_norm": 0.672042191028595, + "learning_rate": 0.0006177589139084721, + "loss": 1.4222, + "step": 5318 + }, + { + "epoch": 0.5610759493670886, + "grad_norm": 0.6749352812767029, + "learning_rate": 0.000617511795148138, + "loss": 1.3964, + "step": 5319 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.6503754258155823, + "learning_rate": 0.0006172646912337678, + "loss": 1.3935, + "step": 5320 + }, + { + "epoch": 0.5612869198312236, + "grad_norm": 0.6436938047409058, + "learning_rate": 0.0006170176021930509, + "loss": 1.4299, + "step": 5321 + }, + { + "epoch": 0.5613924050632911, + "grad_norm": 0.6408564448356628, + "learning_rate": 0.0006167705280536745, + "loss": 1.4338, + "step": 5322 + }, + { + "epoch": 0.5614978902953587, + "grad_norm": 0.6751163005828857, + "learning_rate": 0.000616523468843325, + "loss": 1.4061, + "step": 5323 + }, + { + "epoch": 0.5616033755274261, + "grad_norm": 0.6704219579696655, + "learning_rate": 0.0006162764245896863, + "loss": 1.4044, + "step": 5324 + }, + { + "epoch": 0.5617088607594937, + "grad_norm": 0.7445951104164124, + "learning_rate": 0.0006160293953204412, + "loss": 1.3937, + "step": 5325 + }, + { + "epoch": 0.5618143459915612, + "grad_norm": 0.9398880004882812, + "learning_rate": 0.0006157823810632704, + "loss": 1.4051, + "step": 5326 + }, + { + "epoch": 0.5619198312236287, + "grad_norm": 0.6349620223045349, + "learning_rate": 0.000615535381845853, + "loss": 1.373, + "step": 5327 + }, + { + "epoch": 0.5620253164556962, + "grad_norm": 0.6650243401527405, + "learning_rate": 0.0006152883976958665, + "loss": 1.3767, + "step": 5328 + }, + { + "epoch": 0.5621308016877637, + "grad_norm": 1.0553816556930542, + "learning_rate": 0.0006150414286409869, + "loss": 1.4096, + "step": 5329 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.7074335217475891, + "learning_rate": 0.0006147944747088881, + "loss": 1.4044, + "step": 5330 + }, + { + "epoch": 0.5623417721518987, + "grad_norm": 0.9856396913528442, + "learning_rate": 0.0006145475359272424, + "loss": 1.4058, + "step": 5331 + }, + { + "epoch": 0.5624472573839663, + "grad_norm": 0.6850534677505493, + "learning_rate": 0.0006143006123237208, + "loss": 1.4047, + "step": 5332 + }, + { + "epoch": 0.5625527426160337, + "grad_norm": 0.7375887036323547, + "learning_rate": 0.0006140537039259925, + "loss": 1.3644, + "step": 5333 + }, + { + "epoch": 0.5626582278481013, + "grad_norm": 0.6903479695320129, + "learning_rate": 0.0006138068107617244, + "loss": 1.4126, + "step": 5334 + }, + { + "epoch": 0.5627637130801688, + "grad_norm": 0.7081104516983032, + "learning_rate": 0.0006135599328585824, + "loss": 1.3992, + "step": 5335 + }, + { + "epoch": 0.5628691983122363, + "grad_norm": 0.7458735108375549, + "learning_rate": 0.0006133130702442302, + "loss": 1.4081, + "step": 5336 + }, + { + "epoch": 0.5629746835443038, + "grad_norm": 0.6937315464019775, + "learning_rate": 0.0006130662229463301, + "loss": 1.3871, + "step": 5337 + }, + { + "epoch": 0.5630801687763713, + "grad_norm": 0.6573540568351746, + "learning_rate": 0.0006128193909925425, + "loss": 1.3762, + "step": 5338 + }, + { + "epoch": 0.5631856540084388, + "grad_norm": 0.7015396952629089, + "learning_rate": 0.0006125725744105263, + "loss": 1.4039, + "step": 5339 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.6269904375076294, + "learning_rate": 0.000612325773227938, + "loss": 1.408, + "step": 5340 + }, + { + "epoch": 0.5633966244725739, + "grad_norm": 0.7388302683830261, + "learning_rate": 0.0006120789874724336, + "loss": 1.4385, + "step": 5341 + }, + { + "epoch": 0.5635021097046413, + "grad_norm": 0.6892909407615662, + "learning_rate": 0.0006118322171716665, + "loss": 1.3854, + "step": 5342 + }, + { + "epoch": 0.5636075949367089, + "grad_norm": 0.6652457118034363, + "learning_rate": 0.0006115854623532884, + "loss": 1.4138, + "step": 5343 + }, + { + "epoch": 0.5637130801687764, + "grad_norm": 0.7734463810920715, + "learning_rate": 0.0006113387230449493, + "loss": 1.3806, + "step": 5344 + }, + { + "epoch": 0.5638185654008439, + "grad_norm": 0.6586437821388245, + "learning_rate": 0.0006110919992742978, + "loss": 1.4086, + "step": 5345 + }, + { + "epoch": 0.5639240506329114, + "grad_norm": 0.8925288915634155, + "learning_rate": 0.0006108452910689804, + "loss": 1.3855, + "step": 5346 + }, + { + "epoch": 0.564029535864979, + "grad_norm": 0.6426056027412415, + "learning_rate": 0.0006105985984566421, + "loss": 1.3464, + "step": 5347 + }, + { + "epoch": 0.5641350210970464, + "grad_norm": 0.7447285652160645, + "learning_rate": 0.0006103519214649256, + "loss": 1.3803, + "step": 5348 + }, + { + "epoch": 0.5642405063291139, + "grad_norm": 0.7279619574546814, + "learning_rate": 0.000610105260121473, + "loss": 1.3935, + "step": 5349 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.670150876045227, + "learning_rate": 0.0006098586144539235, + "loss": 1.371, + "step": 5350 + }, + { + "epoch": 0.5644514767932489, + "grad_norm": 0.8870493173599243, + "learning_rate": 0.0006096119844899151, + "loss": 1.383, + "step": 5351 + }, + { + "epoch": 0.5645569620253165, + "grad_norm": 0.7050657272338867, + "learning_rate": 0.000609365370257084, + "loss": 1.3871, + "step": 5352 + }, + { + "epoch": 0.564662447257384, + "grad_norm": 0.7147752046585083, + "learning_rate": 0.0006091187717830643, + "loss": 1.4131, + "step": 5353 + }, + { + "epoch": 0.5647679324894515, + "grad_norm": 0.6775675415992737, + "learning_rate": 0.0006088721890954887, + "loss": 1.4007, + "step": 5354 + }, + { + "epoch": 0.564873417721519, + "grad_norm": 0.6795251965522766, + "learning_rate": 0.0006086256222219881, + "loss": 1.3965, + "step": 5355 + }, + { + "epoch": 0.5649789029535865, + "grad_norm": 0.7270863652229309, + "learning_rate": 0.0006083790711901915, + "loss": 1.3665, + "step": 5356 + }, + { + "epoch": 0.565084388185654, + "grad_norm": 0.6442669630050659, + "learning_rate": 0.0006081325360277257, + "loss": 1.3903, + "step": 5357 + }, + { + "epoch": 0.5651898734177215, + "grad_norm": 0.7260444760322571, + "learning_rate": 0.0006078860167622171, + "loss": 1.4114, + "step": 5358 + }, + { + "epoch": 0.5652953586497891, + "grad_norm": 0.6660505533218384, + "learning_rate": 0.000607639513421289, + "loss": 1.4142, + "step": 5359 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.6614176034927368, + "learning_rate": 0.0006073930260325632, + "loss": 1.4368, + "step": 5360 + }, + { + "epoch": 0.5655063291139241, + "grad_norm": 0.629261314868927, + "learning_rate": 0.0006071465546236601, + "loss": 1.4005, + "step": 5361 + }, + { + "epoch": 0.5656118143459916, + "grad_norm": 0.6635757088661194, + "learning_rate": 0.0006069000992221977, + "loss": 1.3969, + "step": 5362 + }, + { + "epoch": 0.565717299578059, + "grad_norm": 0.6654855012893677, + "learning_rate": 0.0006066536598557927, + "loss": 1.4562, + "step": 5363 + }, + { + "epoch": 0.5658227848101266, + "grad_norm": 0.6679890155792236, + "learning_rate": 0.0006064072365520601, + "loss": 1.3921, + "step": 5364 + }, + { + "epoch": 0.5659282700421941, + "grad_norm": 0.6442721486091614, + "learning_rate": 0.0006061608293386126, + "loss": 1.3867, + "step": 5365 + }, + { + "epoch": 0.5660337552742616, + "grad_norm": 0.7033657431602478, + "learning_rate": 0.0006059144382430612, + "loss": 1.4066, + "step": 5366 + }, + { + "epoch": 0.5661392405063291, + "grad_norm": 0.8465014696121216, + "learning_rate": 0.0006056680632930154, + "loss": 1.3885, + "step": 5367 + }, + { + "epoch": 0.5662447257383966, + "grad_norm": 0.6523124575614929, + "learning_rate": 0.0006054217045160831, + "loss": 1.3947, + "step": 5368 + }, + { + "epoch": 0.5663502109704641, + "grad_norm": 0.8063421249389648, + "learning_rate": 0.0006051753619398697, + "loss": 1.3628, + "step": 5369 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.6755914092063904, + "learning_rate": 0.0006049290355919792, + "loss": 1.4011, + "step": 5370 + }, + { + "epoch": 0.5665611814345991, + "grad_norm": 0.7082235813140869, + "learning_rate": 0.0006046827255000135, + "loss": 1.4044, + "step": 5371 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 0.7035396099090576, + "learning_rate": 0.0006044364316915733, + "loss": 1.3951, + "step": 5372 + }, + { + "epoch": 0.5667721518987342, + "grad_norm": 0.7077887058258057, + "learning_rate": 0.0006041901541942565, + "loss": 1.3612, + "step": 5373 + }, + { + "epoch": 0.5668776371308016, + "grad_norm": 0.775062620639801, + "learning_rate": 0.0006039438930356601, + "loss": 1.4055, + "step": 5374 + }, + { + "epoch": 0.5669831223628692, + "grad_norm": 0.6433625817298889, + "learning_rate": 0.0006036976482433787, + "loss": 1.3888, + "step": 5375 + }, + { + "epoch": 0.5670886075949367, + "grad_norm": 0.822785496711731, + "learning_rate": 0.0006034514198450053, + "loss": 1.3553, + "step": 5376 + }, + { + "epoch": 0.5671940928270042, + "grad_norm": 0.6663935780525208, + "learning_rate": 0.0006032052078681312, + "loss": 1.4401, + "step": 5377 + }, + { + "epoch": 0.5672995780590717, + "grad_norm": 0.8586695194244385, + "learning_rate": 0.0006029590123403456, + "loss": 1.346, + "step": 5378 + }, + { + "epoch": 0.5674050632911393, + "grad_norm": 0.7171367406845093, + "learning_rate": 0.0006027128332892358, + "loss": 1.4169, + "step": 5379 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.6873763799667358, + "learning_rate": 0.0006024666707423875, + "loss": 1.3975, + "step": 5380 + }, + { + "epoch": 0.5676160337552743, + "grad_norm": 0.6659673452377319, + "learning_rate": 0.0006022205247273845, + "loss": 1.3542, + "step": 5381 + }, + { + "epoch": 0.5677215189873418, + "grad_norm": 0.7083982229232788, + "learning_rate": 0.0006019743952718085, + "loss": 1.3534, + "step": 5382 + }, + { + "epoch": 0.5678270042194092, + "grad_norm": 0.6873295903205872, + "learning_rate": 0.0006017282824032394, + "loss": 1.394, + "step": 5383 + }, + { + "epoch": 0.5679324894514768, + "grad_norm": 0.7925366759300232, + "learning_rate": 0.0006014821861492559, + "loss": 1.4003, + "step": 5384 + }, + { + "epoch": 0.5680379746835443, + "grad_norm": 0.7300980091094971, + "learning_rate": 0.0006012361065374339, + "loss": 1.3858, + "step": 5385 + }, + { + "epoch": 0.5681434599156118, + "grad_norm": 0.6352285146713257, + "learning_rate": 0.0006009900435953478, + "loss": 1.4114, + "step": 5386 + }, + { + "epoch": 0.5682489451476793, + "grad_norm": 0.68449866771698, + "learning_rate": 0.0006007439973505707, + "loss": 1.4046, + "step": 5387 + }, + { + "epoch": 0.5683544303797469, + "grad_norm": 0.6592658162117004, + "learning_rate": 0.0006004979678306729, + "loss": 1.3977, + "step": 5388 + }, + { + "epoch": 0.5684599156118143, + "grad_norm": 0.6655882596969604, + "learning_rate": 0.0006002519550632232, + "loss": 1.398, + "step": 5389 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.7091255784034729, + "learning_rate": 0.0006000059590757886, + "loss": 1.3988, + "step": 5390 + }, + { + "epoch": 0.5686708860759494, + "grad_norm": 0.6726558208465576, + "learning_rate": 0.0005997599798959343, + "loss": 1.3822, + "step": 5391 + }, + { + "epoch": 0.5687763713080168, + "grad_norm": 0.6887083053588867, + "learning_rate": 0.0005995140175512233, + "loss": 1.3881, + "step": 5392 + }, + { + "epoch": 0.5688818565400844, + "grad_norm": 0.6512153744697571, + "learning_rate": 0.000599268072069217, + "loss": 1.4181, + "step": 5393 + }, + { + "epoch": 0.5689873417721519, + "grad_norm": 0.685824453830719, + "learning_rate": 0.0005990221434774751, + "loss": 1.4002, + "step": 5394 + }, + { + "epoch": 0.5690928270042194, + "grad_norm": 0.6290827393531799, + "learning_rate": 0.0005987762318035546, + "loss": 1.3791, + "step": 5395 + }, + { + "epoch": 0.5691983122362869, + "grad_norm": 0.6750633120536804, + "learning_rate": 0.0005985303370750115, + "loss": 1.3939, + "step": 5396 + }, + { + "epoch": 0.5693037974683545, + "grad_norm": 0.7379575371742249, + "learning_rate": 0.0005982844593193995, + "loss": 1.4226, + "step": 5397 + }, + { + "epoch": 0.5694092827004219, + "grad_norm": 0.6484873294830322, + "learning_rate": 0.0005980385985642703, + "loss": 1.3893, + "step": 5398 + }, + { + "epoch": 0.5695147679324895, + "grad_norm": 0.9043927192687988, + "learning_rate": 0.000597792754837174, + "loss": 1.3762, + "step": 5399 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.6823144555091858, + "learning_rate": 0.0005975469281656581, + "loss": 1.3653, + "step": 5400 + }, + { + "epoch": 0.5697257383966244, + "grad_norm": 0.7024640440940857, + "learning_rate": 0.0005973011185772694, + "loss": 1.3946, + "step": 5401 + }, + { + "epoch": 0.569831223628692, + "grad_norm": 0.7248722910881042, + "learning_rate": 0.0005970553260995517, + "loss": 1.3968, + "step": 5402 + }, + { + "epoch": 0.5699367088607595, + "grad_norm": 0.7112752795219421, + "learning_rate": 0.0005968095507600476, + "loss": 1.4044, + "step": 5403 + }, + { + "epoch": 0.570042194092827, + "grad_norm": 0.7135589718818665, + "learning_rate": 0.000596563792586297, + "loss": 1.4466, + "step": 5404 + }, + { + "epoch": 0.5701476793248945, + "grad_norm": 0.7296105623245239, + "learning_rate": 0.0005963180516058386, + "loss": 1.383, + "step": 5405 + }, + { + "epoch": 0.5702531645569621, + "grad_norm": 0.7002021670341492, + "learning_rate": 0.0005960723278462086, + "loss": 1.3639, + "step": 5406 + }, + { + "epoch": 0.5703586497890295, + "grad_norm": 0.7388964891433716, + "learning_rate": 0.0005958266213349422, + "loss": 1.3822, + "step": 5407 + }, + { + "epoch": 0.570464135021097, + "grad_norm": 0.7172393798828125, + "learning_rate": 0.0005955809320995714, + "loss": 1.3888, + "step": 5408 + }, + { + "epoch": 0.5705696202531646, + "grad_norm": 0.7080879807472229, + "learning_rate": 0.0005953352601676272, + "loss": 1.4071, + "step": 5409 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.7052099108695984, + "learning_rate": 0.0005950896055666384, + "loss": 1.3774, + "step": 5410 + }, + { + "epoch": 0.5707805907172996, + "grad_norm": 0.731828510761261, + "learning_rate": 0.0005948439683241318, + "loss": 1.393, + "step": 5411 + }, + { + "epoch": 0.5708860759493671, + "grad_norm": 0.6735844016075134, + "learning_rate": 0.0005945983484676321, + "loss": 1.4003, + "step": 5412 + }, + { + "epoch": 0.5709915611814346, + "grad_norm": 0.693645715713501, + "learning_rate": 0.0005943527460246625, + "loss": 1.4062, + "step": 5413 + }, + { + "epoch": 0.5710970464135021, + "grad_norm": 0.734569787979126, + "learning_rate": 0.0005941071610227437, + "loss": 1.3473, + "step": 5414 + }, + { + "epoch": 0.5712025316455697, + "grad_norm": 0.6523593068122864, + "learning_rate": 0.000593861593489395, + "loss": 1.4182, + "step": 5415 + }, + { + "epoch": 0.5713080168776371, + "grad_norm": 0.6255670189857483, + "learning_rate": 0.000593616043452133, + "loss": 1.3801, + "step": 5416 + }, + { + "epoch": 0.5714135021097047, + "grad_norm": 0.6620481014251709, + "learning_rate": 0.0005933705109384735, + "loss": 1.4038, + "step": 5417 + }, + { + "epoch": 0.5715189873417722, + "grad_norm": 0.6752240061759949, + "learning_rate": 0.000593124995975929, + "loss": 1.3982, + "step": 5418 + }, + { + "epoch": 0.5716244725738396, + "grad_norm": 0.6579616069793701, + "learning_rate": 0.000592879498592011, + "loss": 1.366, + "step": 5419 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.6236806511878967, + "learning_rate": 0.0005926340188142289, + "loss": 1.3774, + "step": 5420 + }, + { + "epoch": 0.5718354430379747, + "grad_norm": 0.676558256149292, + "learning_rate": 0.0005923885566700896, + "loss": 1.4147, + "step": 5421 + }, + { + "epoch": 0.5719409282700422, + "grad_norm": 0.7086194753646851, + "learning_rate": 0.0005921431121870984, + "loss": 1.3977, + "step": 5422 + }, + { + "epoch": 0.5720464135021097, + "grad_norm": 0.7185627222061157, + "learning_rate": 0.0005918976853927586, + "loss": 1.3478, + "step": 5423 + }, + { + "epoch": 0.5721518987341773, + "grad_norm": 0.692703127861023, + "learning_rate": 0.0005916522763145715, + "loss": 1.357, + "step": 5424 + }, + { + "epoch": 0.5722573839662447, + "grad_norm": 0.8138614296913147, + "learning_rate": 0.0005914068849800365, + "loss": 1.3906, + "step": 5425 + }, + { + "epoch": 0.5723628691983123, + "grad_norm": 0.6987360119819641, + "learning_rate": 0.0005911615114166508, + "loss": 1.3971, + "step": 5426 + }, + { + "epoch": 0.5724683544303798, + "grad_norm": 0.7069118022918701, + "learning_rate": 0.0005909161556519096, + "loss": 1.42, + "step": 5427 + }, + { + "epoch": 0.5725738396624472, + "grad_norm": 0.6628559827804565, + "learning_rate": 0.0005906708177133066, + "loss": 1.3883, + "step": 5428 + }, + { + "epoch": 0.5726793248945148, + "grad_norm": 0.6844552159309387, + "learning_rate": 0.0005904254976283331, + "loss": 1.3921, + "step": 5429 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.6624218225479126, + "learning_rate": 0.0005901801954244782, + "loss": 1.3587, + "step": 5430 + }, + { + "epoch": 0.5728902953586498, + "grad_norm": 0.7679488658905029, + "learning_rate": 0.0005899349111292293, + "loss": 1.4107, + "step": 5431 + }, + { + "epoch": 0.5729957805907173, + "grad_norm": 0.6474065780639648, + "learning_rate": 0.0005896896447700718, + "loss": 1.3673, + "step": 5432 + }, + { + "epoch": 0.5731012658227848, + "grad_norm": 0.9679906368255615, + "learning_rate": 0.0005894443963744891, + "loss": 1.3671, + "step": 5433 + }, + { + "epoch": 0.5732067510548523, + "grad_norm": 0.7596959471702576, + "learning_rate": 0.0005891991659699622, + "loss": 1.4403, + "step": 5434 + }, + { + "epoch": 0.5733122362869199, + "grad_norm": 0.8727274537086487, + "learning_rate": 0.0005889539535839704, + "loss": 1.3509, + "step": 5435 + }, + { + "epoch": 0.5734177215189873, + "grad_norm": 0.786971926689148, + "learning_rate": 0.0005887087592439914, + "loss": 1.3703, + "step": 5436 + }, + { + "epoch": 0.5735232067510548, + "grad_norm": 0.6701396703720093, + "learning_rate": 0.0005884635829775002, + "loss": 1.3639, + "step": 5437 + }, + { + "epoch": 0.5736286919831224, + "grad_norm": 0.6716476082801819, + "learning_rate": 0.00058821842481197, + "loss": 1.4015, + "step": 5438 + }, + { + "epoch": 0.5737341772151898, + "grad_norm": 0.7215219736099243, + "learning_rate": 0.0005879732847748721, + "loss": 1.3993, + "step": 5439 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.6998829245567322, + "learning_rate": 0.0005877281628936756, + "loss": 1.3868, + "step": 5440 + }, + { + "epoch": 0.5739451476793249, + "grad_norm": 0.8445325493812561, + "learning_rate": 0.0005874830591958474, + "loss": 1.4039, + "step": 5441 + }, + { + "epoch": 0.5740506329113924, + "grad_norm": 0.6754697561264038, + "learning_rate": 0.000587237973708853, + "loss": 1.3995, + "step": 5442 + }, + { + "epoch": 0.5741561181434599, + "grad_norm": 0.6504505276679993, + "learning_rate": 0.0005869929064601551, + "loss": 1.394, + "step": 5443 + }, + { + "epoch": 0.5742616033755275, + "grad_norm": 0.9157344698905945, + "learning_rate": 0.0005867478574772147, + "loss": 1.3902, + "step": 5444 + }, + { + "epoch": 0.5743670886075949, + "grad_norm": 0.6474612951278687, + "learning_rate": 0.0005865028267874911, + "loss": 1.3715, + "step": 5445 + }, + { + "epoch": 0.5744725738396624, + "grad_norm": 0.898643434047699, + "learning_rate": 0.0005862578144184412, + "loss": 1.396, + "step": 5446 + }, + { + "epoch": 0.57457805907173, + "grad_norm": 0.6937795877456665, + "learning_rate": 0.0005860128203975196, + "loss": 1.4059, + "step": 5447 + }, + { + "epoch": 0.5746835443037974, + "grad_norm": 0.7393977642059326, + "learning_rate": 0.0005857678447521791, + "loss": 1.3993, + "step": 5448 + }, + { + "epoch": 0.574789029535865, + "grad_norm": 0.737991452217102, + "learning_rate": 0.0005855228875098706, + "loss": 1.3908, + "step": 5449 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.7911285758018494, + "learning_rate": 0.0005852779486980427, + "loss": 1.3705, + "step": 5450 + }, + { + "epoch": 0.575, + "grad_norm": 0.7419139742851257, + "learning_rate": 0.000585033028344142, + "loss": 1.3882, + "step": 5451 + }, + { + "epoch": 0.5751054852320675, + "grad_norm": 0.7037606239318848, + "learning_rate": 0.0005847881264756131, + "loss": 1.3546, + "step": 5452 + }, + { + "epoch": 0.575210970464135, + "grad_norm": 0.7464966773986816, + "learning_rate": 0.0005845432431198981, + "loss": 1.4185, + "step": 5453 + }, + { + "epoch": 0.5753164556962025, + "grad_norm": 0.7299268841743469, + "learning_rate": 0.0005842983783044381, + "loss": 1.3977, + "step": 5454 + }, + { + "epoch": 0.57542194092827, + "grad_norm": 0.7253472208976746, + "learning_rate": 0.0005840535320566711, + "loss": 1.442, + "step": 5455 + }, + { + "epoch": 0.5755274261603376, + "grad_norm": 0.747340977191925, + "learning_rate": 0.0005838087044040334, + "loss": 1.3684, + "step": 5456 + }, + { + "epoch": 0.575632911392405, + "grad_norm": 0.6816190481185913, + "learning_rate": 0.0005835638953739589, + "loss": 1.4062, + "step": 5457 + }, + { + "epoch": 0.5757383966244726, + "grad_norm": 0.647643506526947, + "learning_rate": 0.00058331910499388, + "loss": 1.3807, + "step": 5458 + }, + { + "epoch": 0.5758438818565401, + "grad_norm": 0.6834579706192017, + "learning_rate": 0.0005830743332912264, + "loss": 1.377, + "step": 5459 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.6728641390800476, + "learning_rate": 0.0005828295802934263, + "loss": 1.4022, + "step": 5460 + }, + { + "epoch": 0.5760548523206751, + "grad_norm": 0.7252381443977356, + "learning_rate": 0.0005825848460279048, + "loss": 1.3575, + "step": 5461 + }, + { + "epoch": 0.5761603375527427, + "grad_norm": 0.767842173576355, + "learning_rate": 0.0005823401305220865, + "loss": 1.3569, + "step": 5462 + }, + { + "epoch": 0.5762658227848101, + "grad_norm": 0.6995462775230408, + "learning_rate": 0.0005820954338033925, + "loss": 1.3929, + "step": 5463 + }, + { + "epoch": 0.5763713080168776, + "grad_norm": 0.7932540774345398, + "learning_rate": 0.0005818507558992426, + "loss": 1.3805, + "step": 5464 + }, + { + "epoch": 0.5764767932489452, + "grad_norm": 0.7374727129936218, + "learning_rate": 0.0005816060968370538, + "loss": 1.4281, + "step": 5465 + }, + { + "epoch": 0.5765822784810126, + "grad_norm": 0.7629088759422302, + "learning_rate": 0.0005813614566442416, + "loss": 1.412, + "step": 5466 + }, + { + "epoch": 0.5766877637130802, + "grad_norm": 0.686621904373169, + "learning_rate": 0.0005811168353482191, + "loss": 1.4038, + "step": 5467 + }, + { + "epoch": 0.5767932489451477, + "grad_norm": 0.7167893648147583, + "learning_rate": 0.0005808722329763974, + "loss": 1.4028, + "step": 5468 + }, + { + "epoch": 0.5768987341772152, + "grad_norm": 0.7302418947219849, + "learning_rate": 0.0005806276495561852, + "loss": 1.4072, + "step": 5469 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.8128111958503723, + "learning_rate": 0.0005803830851149892, + "loss": 1.3928, + "step": 5470 + }, + { + "epoch": 0.5771097046413503, + "grad_norm": 0.7854123115539551, + "learning_rate": 0.0005801385396802146, + "loss": 1.4088, + "step": 5471 + }, + { + "epoch": 0.5772151898734177, + "grad_norm": 0.8283343315124512, + "learning_rate": 0.0005798940132792636, + "loss": 1.3881, + "step": 5472 + }, + { + "epoch": 0.5773206751054852, + "grad_norm": 0.7754322290420532, + "learning_rate": 0.0005796495059395367, + "loss": 1.3664, + "step": 5473 + }, + { + "epoch": 0.5774261603375528, + "grad_norm": 0.8291491866111755, + "learning_rate": 0.0005794050176884321, + "loss": 1.3712, + "step": 5474 + }, + { + "epoch": 0.5775316455696202, + "grad_norm": 0.6883587837219238, + "learning_rate": 0.0005791605485533459, + "loss": 1.3831, + "step": 5475 + }, + { + "epoch": 0.5776371308016878, + "grad_norm": 0.8085895776748657, + "learning_rate": 0.0005789160985616721, + "loss": 1.3742, + "step": 5476 + }, + { + "epoch": 0.5777426160337553, + "grad_norm": 0.6637422442436218, + "learning_rate": 0.0005786716677408025, + "loss": 1.4213, + "step": 5477 + }, + { + "epoch": 0.5778481012658228, + "grad_norm": 0.7134640216827393, + "learning_rate": 0.0005784272561181269, + "loss": 1.407, + "step": 5478 + }, + { + "epoch": 0.5779535864978903, + "grad_norm": 0.7846036553382874, + "learning_rate": 0.0005781828637210325, + "loss": 1.4033, + "step": 5479 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.6611788868904114, + "learning_rate": 0.0005779384905769053, + "loss": 1.4278, + "step": 5480 + }, + { + "epoch": 0.5781645569620253, + "grad_norm": 0.7747367024421692, + "learning_rate": 0.0005776941367131282, + "loss": 1.3551, + "step": 5481 + }, + { + "epoch": 0.5782700421940928, + "grad_norm": 0.8550449013710022, + "learning_rate": 0.0005774498021570824, + "loss": 1.4028, + "step": 5482 + }, + { + "epoch": 0.5783755274261604, + "grad_norm": 0.6654477715492249, + "learning_rate": 0.0005772054869361465, + "loss": 1.4016, + "step": 5483 + }, + { + "epoch": 0.5784810126582278, + "grad_norm": 0.9350137114524841, + "learning_rate": 0.0005769611910776975, + "loss": 1.4009, + "step": 5484 + }, + { + "epoch": 0.5785864978902954, + "grad_norm": 0.6662137508392334, + "learning_rate": 0.0005767169146091098, + "loss": 1.3942, + "step": 5485 + }, + { + "epoch": 0.5786919831223629, + "grad_norm": 0.8201909065246582, + "learning_rate": 0.0005764726575577559, + "loss": 1.381, + "step": 5486 + }, + { + "epoch": 0.5787974683544304, + "grad_norm": 0.7256902456283569, + "learning_rate": 0.0005762284199510059, + "loss": 1.4145, + "step": 5487 + }, + { + "epoch": 0.5789029535864979, + "grad_norm": 0.7012301683425903, + "learning_rate": 0.000575984201816228, + "loss": 1.392, + "step": 5488 + }, + { + "epoch": 0.5790084388185655, + "grad_norm": 0.9852876663208008, + "learning_rate": 0.0005757400031807881, + "loss": 1.4493, + "step": 5489 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.7548601627349854, + "learning_rate": 0.0005754958240720498, + "loss": 1.3807, + "step": 5490 + }, + { + "epoch": 0.5792194092827004, + "grad_norm": 0.986919105052948, + "learning_rate": 0.0005752516645173745, + "loss": 1.3841, + "step": 5491 + }, + { + "epoch": 0.579324894514768, + "grad_norm": 0.6619555354118347, + "learning_rate": 0.0005750075245441218, + "loss": 1.386, + "step": 5492 + }, + { + "epoch": 0.5794303797468354, + "grad_norm": 0.9628007411956787, + "learning_rate": 0.0005747634041796484, + "loss": 1.4037, + "step": 5493 + }, + { + "epoch": 0.579535864978903, + "grad_norm": 0.7301965951919556, + "learning_rate": 0.0005745193034513092, + "loss": 1.375, + "step": 5494 + }, + { + "epoch": 0.5796413502109705, + "grad_norm": 0.7675021886825562, + "learning_rate": 0.0005742752223864573, + "loss": 1.4112, + "step": 5495 + }, + { + "epoch": 0.579746835443038, + "grad_norm": 0.652184009552002, + "learning_rate": 0.0005740311610124427, + "loss": 1.3648, + "step": 5496 + }, + { + "epoch": 0.5798523206751055, + "grad_norm": 0.7237139940261841, + "learning_rate": 0.0005737871193566141, + "loss": 1.3774, + "step": 5497 + }, + { + "epoch": 0.5799578059071729, + "grad_norm": 0.8201614022254944, + "learning_rate": 0.0005735430974463175, + "loss": 1.3833, + "step": 5498 + }, + { + "epoch": 0.5800632911392405, + "grad_norm": 0.9431794285774231, + "learning_rate": 0.0005732990953088968, + "loss": 1.4279, + "step": 5499 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.9232782125473022, + "learning_rate": 0.0005730551129716936, + "loss": 1.3644, + "step": 5500 + }, + { + "epoch": 0.5802742616033755, + "grad_norm": 1.1876574754714966, + "learning_rate": 0.0005728111504620472, + "loss": 1.3749, + "step": 5501 + }, + { + "epoch": 0.580379746835443, + "grad_norm": 1.0440293550491333, + "learning_rate": 0.000572567207807295, + "loss": 1.4189, + "step": 5502 + }, + { + "epoch": 0.5804852320675106, + "grad_norm": 0.9308492541313171, + "learning_rate": 0.000572323285034772, + "loss": 1.4047, + "step": 5503 + }, + { + "epoch": 0.580590717299578, + "grad_norm": 1.4647676944732666, + "learning_rate": 0.0005720793821718108, + "loss": 1.3815, + "step": 5504 + }, + { + "epoch": 0.5806962025316456, + "grad_norm": 0.6392360329627991, + "learning_rate": 0.0005718354992457417, + "loss": 1.3662, + "step": 5505 + }, + { + "epoch": 0.5808016877637131, + "grad_norm": 1.3475843667984009, + "learning_rate": 0.0005715916362838936, + "loss": 1.427, + "step": 5506 + }, + { + "epoch": 0.5809071729957805, + "grad_norm": 0.9196277260780334, + "learning_rate": 0.0005713477933135923, + "loss": 1.4178, + "step": 5507 + }, + { + "epoch": 0.5810126582278481, + "grad_norm": 1.4624749422073364, + "learning_rate": 0.0005711039703621616, + "loss": 1.3785, + "step": 5508 + }, + { + "epoch": 0.5811181434599156, + "grad_norm": 1.0691524744033813, + "learning_rate": 0.0005708601674569232, + "loss": 1.403, + "step": 5509 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.7909343242645264, + "learning_rate": 0.0005706163846251961, + "loss": 1.3803, + "step": 5510 + }, + { + "epoch": 0.5813291139240506, + "grad_norm": 1.1469240188598633, + "learning_rate": 0.0005703726218942976, + "loss": 1.3584, + "step": 5511 + }, + { + "epoch": 0.5814345991561182, + "grad_norm": 0.7577003836631775, + "learning_rate": 0.0005701288792915427, + "loss": 1.3903, + "step": 5512 + }, + { + "epoch": 0.5815400843881856, + "grad_norm": 1.1654030084609985, + "learning_rate": 0.0005698851568442434, + "loss": 1.4009, + "step": 5513 + }, + { + "epoch": 0.5816455696202532, + "grad_norm": 0.7245790362358093, + "learning_rate": 0.0005696414545797108, + "loss": 1.405, + "step": 5514 + }, + { + "epoch": 0.5817510548523207, + "grad_norm": 0.8939972519874573, + "learning_rate": 0.0005693977725252525, + "loss": 1.3915, + "step": 5515 + }, + { + "epoch": 0.5818565400843881, + "grad_norm": 0.7667882442474365, + "learning_rate": 0.0005691541107081743, + "loss": 1.3472, + "step": 5516 + }, + { + "epoch": 0.5819620253164557, + "grad_norm": 0.7426298260688782, + "learning_rate": 0.0005689104691557798, + "loss": 1.3667, + "step": 5517 + }, + { + "epoch": 0.5820675105485232, + "grad_norm": 1.0266950130462646, + "learning_rate": 0.0005686668478953702, + "loss": 1.405, + "step": 5518 + }, + { + "epoch": 0.5821729957805907, + "grad_norm": 0.7885024547576904, + "learning_rate": 0.0005684232469542446, + "loss": 1.3863, + "step": 5519 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.7607517838478088, + "learning_rate": 0.0005681796663596996, + "loss": 1.4163, + "step": 5520 + }, + { + "epoch": 0.5823839662447258, + "grad_norm": 0.7522769570350647, + "learning_rate": 0.0005679361061390295, + "loss": 1.3624, + "step": 5521 + }, + { + "epoch": 0.5824894514767932, + "grad_norm": 1.0042976140975952, + "learning_rate": 0.0005676925663195263, + "loss": 1.3719, + "step": 5522 + }, + { + "epoch": 0.5825949367088608, + "grad_norm": 0.7602242827415466, + "learning_rate": 0.0005674490469284805, + "loss": 1.3618, + "step": 5523 + }, + { + "epoch": 0.5827004219409283, + "grad_norm": 0.8286980986595154, + "learning_rate": 0.0005672055479931791, + "loss": 1.3912, + "step": 5524 + }, + { + "epoch": 0.5828059071729957, + "grad_norm": 0.9181353449821472, + "learning_rate": 0.0005669620695409076, + "loss": 1.3711, + "step": 5525 + }, + { + "epoch": 0.5829113924050633, + "grad_norm": 0.690788209438324, + "learning_rate": 0.000566718611598949, + "loss": 1.3876, + "step": 5526 + }, + { + "epoch": 0.5830168776371308, + "grad_norm": 1.0007014274597168, + "learning_rate": 0.0005664751741945839, + "loss": 1.3812, + "step": 5527 + }, + { + "epoch": 0.5831223628691983, + "grad_norm": 0.6508147120475769, + "learning_rate": 0.0005662317573550906, + "loss": 1.3984, + "step": 5528 + }, + { + "epoch": 0.5832278481012658, + "grad_norm": 0.9458132982254028, + "learning_rate": 0.0005659883611077453, + "loss": 1.3687, + "step": 5529 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.6604735255241394, + "learning_rate": 0.0005657449854798216, + "loss": 1.4144, + "step": 5530 + }, + { + "epoch": 0.5834388185654008, + "grad_norm": 0.814017653465271, + "learning_rate": 0.0005655016304985908, + "loss": 1.4043, + "step": 5531 + }, + { + "epoch": 0.5835443037974684, + "grad_norm": 0.7375466227531433, + "learning_rate": 0.0005652582961913227, + "loss": 1.3577, + "step": 5532 + }, + { + "epoch": 0.5836497890295359, + "grad_norm": 0.6597685217857361, + "learning_rate": 0.0005650149825852836, + "loss": 1.3573, + "step": 5533 + }, + { + "epoch": 0.5837552742616033, + "grad_norm": 0.8054673671722412, + "learning_rate": 0.0005647716897077382, + "loss": 1.3544, + "step": 5534 + }, + { + "epoch": 0.5838607594936709, + "grad_norm": 0.8557998538017273, + "learning_rate": 0.0005645284175859486, + "loss": 1.3727, + "step": 5535 + }, + { + "epoch": 0.5839662447257384, + "grad_norm": 0.723142683506012, + "learning_rate": 0.0005642851662471745, + "loss": 1.3726, + "step": 5536 + }, + { + "epoch": 0.5840717299578059, + "grad_norm": 0.8156350255012512, + "learning_rate": 0.0005640419357186738, + "loss": 1.3714, + "step": 5537 + }, + { + "epoch": 0.5841772151898734, + "grad_norm": 0.6839673519134521, + "learning_rate": 0.0005637987260277013, + "loss": 1.4292, + "step": 5538 + }, + { + "epoch": 0.584282700421941, + "grad_norm": 0.7457057237625122, + "learning_rate": 0.0005635555372015099, + "loss": 1.3763, + "step": 5539 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.7263725996017456, + "learning_rate": 0.0005633123692673503, + "loss": 1.4082, + "step": 5540 + }, + { + "epoch": 0.584493670886076, + "grad_norm": 0.670842170715332, + "learning_rate": 0.0005630692222524709, + "loss": 1.3989, + "step": 5541 + }, + { + "epoch": 0.5845991561181435, + "grad_norm": 0.6849950551986694, + "learning_rate": 0.0005628260961841171, + "loss": 1.3932, + "step": 5542 + }, + { + "epoch": 0.5847046413502109, + "grad_norm": 0.7166295647621155, + "learning_rate": 0.0005625829910895325, + "loss": 1.3981, + "step": 5543 + }, + { + "epoch": 0.5848101265822785, + "grad_norm": 0.6629204750061035, + "learning_rate": 0.0005623399069959585, + "loss": 1.3863, + "step": 5544 + }, + { + "epoch": 0.584915611814346, + "grad_norm": 0.6753358840942383, + "learning_rate": 0.0005620968439306335, + "loss": 1.3997, + "step": 5545 + }, + { + "epoch": 0.5850210970464135, + "grad_norm": 0.6584798693656921, + "learning_rate": 0.0005618538019207943, + "loss": 1.3814, + "step": 5546 + }, + { + "epoch": 0.585126582278481, + "grad_norm": 0.7124102711677551, + "learning_rate": 0.0005616107809936746, + "loss": 1.3704, + "step": 5547 + }, + { + "epoch": 0.5852320675105486, + "grad_norm": 0.7050707936286926, + "learning_rate": 0.0005613677811765062, + "loss": 1.4129, + "step": 5548 + }, + { + "epoch": 0.585337552742616, + "grad_norm": 0.6635227799415588, + "learning_rate": 0.0005611248024965186, + "loss": 1.4008, + "step": 5549 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.7069467306137085, + "learning_rate": 0.0005608818449809387, + "loss": 1.4252, + "step": 5550 + }, + { + "epoch": 0.5855485232067511, + "grad_norm": 0.7189291715621948, + "learning_rate": 0.0005606389086569911, + "loss": 1.4259, + "step": 5551 + }, + { + "epoch": 0.5856540084388185, + "grad_norm": 0.7274332642555237, + "learning_rate": 0.0005603959935518981, + "loss": 1.4495, + "step": 5552 + }, + { + "epoch": 0.5857594936708861, + "grad_norm": 0.7437248826026917, + "learning_rate": 0.0005601530996928795, + "loss": 1.3849, + "step": 5553 + }, + { + "epoch": 0.5858649789029536, + "grad_norm": 0.7834799289703369, + "learning_rate": 0.0005599102271071527, + "loss": 1.4022, + "step": 5554 + }, + { + "epoch": 0.5859704641350211, + "grad_norm": 0.8934963345527649, + "learning_rate": 0.0005596673758219327, + "loss": 1.4001, + "step": 5555 + }, + { + "epoch": 0.5860759493670886, + "grad_norm": 0.7854800820350647, + "learning_rate": 0.0005594245458644325, + "loss": 1.3753, + "step": 5556 + }, + { + "epoch": 0.5861814345991562, + "grad_norm": 0.7811126708984375, + "learning_rate": 0.0005591817372618621, + "loss": 1.3756, + "step": 5557 + }, + { + "epoch": 0.5862869198312236, + "grad_norm": 0.7372151613235474, + "learning_rate": 0.0005589389500414296, + "loss": 1.4271, + "step": 5558 + }, + { + "epoch": 0.5863924050632912, + "grad_norm": 0.6688711047172546, + "learning_rate": 0.0005586961842303405, + "loss": 1.3724, + "step": 5559 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.8586269021034241, + "learning_rate": 0.0005584534398557977, + "loss": 1.3877, + "step": 5560 + }, + { + "epoch": 0.5866033755274261, + "grad_norm": 0.7386944890022278, + "learning_rate": 0.0005582107169450023, + "loss": 1.3847, + "step": 5561 + }, + { + "epoch": 0.5867088607594937, + "grad_norm": 0.7729990482330322, + "learning_rate": 0.0005579680155251524, + "loss": 1.4001, + "step": 5562 + }, + { + "epoch": 0.5868143459915611, + "grad_norm": 0.6977526545524597, + "learning_rate": 0.0005577253356234439, + "loss": 1.3723, + "step": 5563 + }, + { + "epoch": 0.5869198312236287, + "grad_norm": 0.7260572910308838, + "learning_rate": 0.0005574826772670703, + "loss": 1.396, + "step": 5564 + }, + { + "epoch": 0.5870253164556962, + "grad_norm": 0.6708013415336609, + "learning_rate": 0.0005572400404832226, + "loss": 1.3838, + "step": 5565 + }, + { + "epoch": 0.5871308016877637, + "grad_norm": 0.809980034828186, + "learning_rate": 0.0005569974252990896, + "loss": 1.3956, + "step": 5566 + }, + { + "epoch": 0.5872362869198312, + "grad_norm": 0.6320539712905884, + "learning_rate": 0.0005567548317418576, + "loss": 1.425, + "step": 5567 + }, + { + "epoch": 0.5873417721518988, + "grad_norm": 0.75371915102005, + "learning_rate": 0.0005565122598387103, + "loss": 1.3596, + "step": 5568 + }, + { + "epoch": 0.5874472573839662, + "grad_norm": 0.6708369851112366, + "learning_rate": 0.0005562697096168289, + "loss": 1.3573, + "step": 5569 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.6763561964035034, + "learning_rate": 0.0005560271811033928, + "loss": 1.3729, + "step": 5570 + }, + { + "epoch": 0.5876582278481013, + "grad_norm": 0.7292065024375916, + "learning_rate": 0.0005557846743255783, + "loss": 1.3768, + "step": 5571 + }, + { + "epoch": 0.5877637130801687, + "grad_norm": 0.7360208034515381, + "learning_rate": 0.0005555421893105593, + "loss": 1.3877, + "step": 5572 + }, + { + "epoch": 0.5878691983122363, + "grad_norm": 1.0675349235534668, + "learning_rate": 0.0005552997260855077, + "loss": 1.3824, + "step": 5573 + }, + { + "epoch": 0.5879746835443038, + "grad_norm": 0.7946959733963013, + "learning_rate": 0.0005550572846775927, + "loss": 1.3888, + "step": 5574 + }, + { + "epoch": 0.5880801687763713, + "grad_norm": 0.9413056969642639, + "learning_rate": 0.0005548148651139809, + "loss": 1.3964, + "step": 5575 + }, + { + "epoch": 0.5881856540084388, + "grad_norm": 0.892327606678009, + "learning_rate": 0.0005545724674218368, + "loss": 1.3787, + "step": 5576 + }, + { + "epoch": 0.5882911392405064, + "grad_norm": 0.7449572086334229, + "learning_rate": 0.0005543300916283223, + "loss": 1.3997, + "step": 5577 + }, + { + "epoch": 0.5883966244725738, + "grad_norm": 0.9709592461585999, + "learning_rate": 0.0005540877377605968, + "loss": 1.3752, + "step": 5578 + }, + { + "epoch": 0.5885021097046413, + "grad_norm": 0.6493974328041077, + "learning_rate": 0.0005538454058458171, + "loss": 1.3984, + "step": 5579 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 1.1468669176101685, + "learning_rate": 0.0005536030959111377, + "loss": 1.3502, + "step": 5580 + }, + { + "epoch": 0.5887130801687763, + "grad_norm": 0.7777971029281616, + "learning_rate": 0.0005533608079837109, + "loss": 1.3858, + "step": 5581 + }, + { + "epoch": 0.5888185654008439, + "grad_norm": 1.0360891819000244, + "learning_rate": 0.0005531185420906859, + "loss": 1.3924, + "step": 5582 + }, + { + "epoch": 0.5889240506329114, + "grad_norm": 0.9802560210227966, + "learning_rate": 0.0005528762982592101, + "loss": 1.3643, + "step": 5583 + }, + { + "epoch": 0.5890295358649789, + "grad_norm": 0.9088213443756104, + "learning_rate": 0.000552634076516428, + "loss": 1.3864, + "step": 5584 + }, + { + "epoch": 0.5891350210970464, + "grad_norm": 1.0103867053985596, + "learning_rate": 0.0005523918768894819, + "loss": 1.4119, + "step": 5585 + }, + { + "epoch": 0.589240506329114, + "grad_norm": 0.8692920804023743, + "learning_rate": 0.0005521496994055112, + "loss": 1.3852, + "step": 5586 + }, + { + "epoch": 0.5893459915611814, + "grad_norm": 0.8983875513076782, + "learning_rate": 0.0005519075440916534, + "loss": 1.3772, + "step": 5587 + }, + { + "epoch": 0.5894514767932489, + "grad_norm": 0.7636449337005615, + "learning_rate": 0.000551665410975043, + "loss": 1.4219, + "step": 5588 + }, + { + "epoch": 0.5895569620253165, + "grad_norm": 0.8188800811767578, + "learning_rate": 0.0005514233000828121, + "loss": 1.375, + "step": 5589 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.6894319653511047, + "learning_rate": 0.0005511812114420908, + "loss": 1.3772, + "step": 5590 + }, + { + "epoch": 0.5897679324894515, + "grad_norm": 0.8358606696128845, + "learning_rate": 0.0005509391450800061, + "loss": 1.369, + "step": 5591 + }, + { + "epoch": 0.589873417721519, + "grad_norm": 0.7842937707901001, + "learning_rate": 0.0005506971010236829, + "loss": 1.4069, + "step": 5592 + }, + { + "epoch": 0.5899789029535865, + "grad_norm": 0.7160394191741943, + "learning_rate": 0.0005504550793002433, + "loss": 1.3646, + "step": 5593 + }, + { + "epoch": 0.590084388185654, + "grad_norm": 0.7088705897331238, + "learning_rate": 0.000550213079936807, + "loss": 1.3751, + "step": 5594 + }, + { + "epoch": 0.5901898734177216, + "grad_norm": 0.7409645318984985, + "learning_rate": 0.0005499711029604915, + "loss": 1.3862, + "step": 5595 + }, + { + "epoch": 0.590295358649789, + "grad_norm": 0.703123927116394, + "learning_rate": 0.0005497291483984113, + "loss": 1.3647, + "step": 5596 + }, + { + "epoch": 0.5904008438818565, + "grad_norm": 0.6564064025878906, + "learning_rate": 0.0005494872162776786, + "loss": 1.3623, + "step": 5597 + }, + { + "epoch": 0.5905063291139241, + "grad_norm": 0.6708796620368958, + "learning_rate": 0.0005492453066254032, + "loss": 1.3921, + "step": 5598 + }, + { + "epoch": 0.5906118143459915, + "grad_norm": 0.6984439492225647, + "learning_rate": 0.000549003419468692, + "loss": 1.3958, + "step": 5599 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.6731021404266357, + "learning_rate": 0.0005487615548346502, + "loss": 1.4063, + "step": 5600 + }, + { + "epoch": 0.5908227848101266, + "grad_norm": 0.7028473019599915, + "learning_rate": 0.0005485197127503795, + "loss": 1.4045, + "step": 5601 + }, + { + "epoch": 0.5909282700421941, + "grad_norm": 0.6593266129493713, + "learning_rate": 0.0005482778932429798, + "loss": 1.41, + "step": 5602 + }, + { + "epoch": 0.5910337552742616, + "grad_norm": 0.7481752038002014, + "learning_rate": 0.000548036096339548, + "loss": 1.4043, + "step": 5603 + }, + { + "epoch": 0.5911392405063292, + "grad_norm": 0.7019838094711304, + "learning_rate": 0.0005477943220671786, + "loss": 1.3671, + "step": 5604 + }, + { + "epoch": 0.5912447257383966, + "grad_norm": 0.7202194333076477, + "learning_rate": 0.0005475525704529638, + "loss": 1.3583, + "step": 5605 + }, + { + "epoch": 0.5913502109704641, + "grad_norm": 0.7094945907592773, + "learning_rate": 0.0005473108415239929, + "loss": 1.3889, + "step": 5606 + }, + { + "epoch": 0.5914556962025317, + "grad_norm": 0.6583877205848694, + "learning_rate": 0.0005470691353073531, + "loss": 1.3873, + "step": 5607 + }, + { + "epoch": 0.5915611814345991, + "grad_norm": 0.7471681833267212, + "learning_rate": 0.0005468274518301284, + "loss": 1.3947, + "step": 5608 + }, + { + "epoch": 0.5916666666666667, + "grad_norm": 0.7007555365562439, + "learning_rate": 0.0005465857911194006, + "loss": 1.38, + "step": 5609 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.6929042339324951, + "learning_rate": 0.0005463441532022495, + "loss": 1.3931, + "step": 5610 + }, + { + "epoch": 0.5918776371308017, + "grad_norm": 0.9837354421615601, + "learning_rate": 0.0005461025381057516, + "loss": 1.3888, + "step": 5611 + }, + { + "epoch": 0.5919831223628692, + "grad_norm": 1.0415139198303223, + "learning_rate": 0.000545860945856981, + "loss": 1.4075, + "step": 5612 + }, + { + "epoch": 0.5920886075949368, + "grad_norm": 0.7583218216896057, + "learning_rate": 0.0005456193764830093, + "loss": 1.3867, + "step": 5613 + }, + { + "epoch": 0.5921940928270042, + "grad_norm": 0.915955662727356, + "learning_rate": 0.0005453778300109056, + "loss": 1.402, + "step": 5614 + }, + { + "epoch": 0.5922995780590717, + "grad_norm": 0.9195017218589783, + "learning_rate": 0.0005451363064677365, + "loss": 1.4052, + "step": 5615 + }, + { + "epoch": 0.5924050632911393, + "grad_norm": 0.9299368262290955, + "learning_rate": 0.0005448948058805657, + "loss": 1.3943, + "step": 5616 + }, + { + "epoch": 0.5925105485232067, + "grad_norm": 0.9382524490356445, + "learning_rate": 0.0005446533282764543, + "loss": 1.3767, + "step": 5617 + }, + { + "epoch": 0.5926160337552743, + "grad_norm": 0.6989529728889465, + "learning_rate": 0.0005444118736824617, + "loss": 1.4011, + "step": 5618 + }, + { + "epoch": 0.5927215189873418, + "grad_norm": 1.1067359447479248, + "learning_rate": 0.000544170442125644, + "loss": 1.3973, + "step": 5619 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.6633231043815613, + "learning_rate": 0.0005439290336330545, + "loss": 1.4056, + "step": 5620 + }, + { + "epoch": 0.5929324894514768, + "grad_norm": 0.7043577432632446, + "learning_rate": 0.0005436876482317444, + "loss": 1.4017, + "step": 5621 + }, + { + "epoch": 0.5930379746835444, + "grad_norm": 0.6799861788749695, + "learning_rate": 0.000543446285948762, + "loss": 1.4148, + "step": 5622 + }, + { + "epoch": 0.5931434599156118, + "grad_norm": 0.6551592946052551, + "learning_rate": 0.0005432049468111534, + "loss": 1.3445, + "step": 5623 + }, + { + "epoch": 0.5932489451476793, + "grad_norm": 0.7807994484901428, + "learning_rate": 0.0005429636308459614, + "loss": 1.401, + "step": 5624 + }, + { + "epoch": 0.5933544303797469, + "grad_norm": 0.6964461803436279, + "learning_rate": 0.0005427223380802272, + "loss": 1.4027, + "step": 5625 + }, + { + "epoch": 0.5934599156118143, + "grad_norm": 0.6821000576019287, + "learning_rate": 0.0005424810685409881, + "loss": 1.3929, + "step": 5626 + }, + { + "epoch": 0.5935654008438819, + "grad_norm": 0.7046117186546326, + "learning_rate": 0.0005422398222552806, + "loss": 1.415, + "step": 5627 + }, + { + "epoch": 0.5936708860759494, + "grad_norm": 0.7648535370826721, + "learning_rate": 0.0005419985992501367, + "loss": 1.3587, + "step": 5628 + }, + { + "epoch": 0.5937763713080169, + "grad_norm": 1.0074220895767212, + "learning_rate": 0.0005417573995525871, + "loss": 1.3842, + "step": 5629 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.6681768298149109, + "learning_rate": 0.0005415162231896593, + "loss": 1.3709, + "step": 5630 + }, + { + "epoch": 0.5939873417721518, + "grad_norm": 0.975053071975708, + "learning_rate": 0.0005412750701883782, + "loss": 1.3985, + "step": 5631 + }, + { + "epoch": 0.5940928270042194, + "grad_norm": 0.6432140469551086, + "learning_rate": 0.0005410339405757665, + "loss": 1.3955, + "step": 5632 + }, + { + "epoch": 0.5941983122362869, + "grad_norm": 0.9459828734397888, + "learning_rate": 0.0005407928343788435, + "loss": 1.3707, + "step": 5633 + }, + { + "epoch": 0.5943037974683544, + "grad_norm": 0.7271698117256165, + "learning_rate": 0.0005405517516246267, + "loss": 1.3952, + "step": 5634 + }, + { + "epoch": 0.5944092827004219, + "grad_norm": 0.884108304977417, + "learning_rate": 0.0005403106923401302, + "loss": 1.3894, + "step": 5635 + }, + { + "epoch": 0.5945147679324895, + "grad_norm": 0.660134494304657, + "learning_rate": 0.0005400696565523666, + "loss": 1.3828, + "step": 5636 + }, + { + "epoch": 0.5946202531645569, + "grad_norm": 1.137641429901123, + "learning_rate": 0.0005398286442883448, + "loss": 1.3852, + "step": 5637 + }, + { + "epoch": 0.5947257383966245, + "grad_norm": 0.850109338760376, + "learning_rate": 0.0005395876555750712, + "loss": 1.4372, + "step": 5638 + }, + { + "epoch": 0.594831223628692, + "grad_norm": 1.2803575992584229, + "learning_rate": 0.0005393466904395503, + "loss": 1.3932, + "step": 5639 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 1.2639997005462646, + "learning_rate": 0.000539105748908783, + "loss": 1.4142, + "step": 5640 + }, + { + "epoch": 0.595042194092827, + "grad_norm": 0.7611759901046753, + "learning_rate": 0.0005388648310097682, + "loss": 1.4114, + "step": 5641 + }, + { + "epoch": 0.5951476793248945, + "grad_norm": 1.217403769493103, + "learning_rate": 0.0005386239367695018, + "loss": 1.3846, + "step": 5642 + }, + { + "epoch": 0.595253164556962, + "grad_norm": 0.8403786420822144, + "learning_rate": 0.0005383830662149771, + "loss": 1.3933, + "step": 5643 + }, + { + "epoch": 0.5953586497890295, + "grad_norm": 0.9770432114601135, + "learning_rate": 0.0005381422193731853, + "loss": 1.3998, + "step": 5644 + }, + { + "epoch": 0.5954641350210971, + "grad_norm": 1.123399019241333, + "learning_rate": 0.0005379013962711143, + "loss": 1.3962, + "step": 5645 + }, + { + "epoch": 0.5955696202531645, + "grad_norm": 0.8105776309967041, + "learning_rate": 0.0005376605969357494, + "loss": 1.3767, + "step": 5646 + }, + { + "epoch": 0.5956751054852321, + "grad_norm": 0.9721289277076721, + "learning_rate": 0.0005374198213940734, + "loss": 1.3906, + "step": 5647 + }, + { + "epoch": 0.5957805907172996, + "grad_norm": 0.8196500539779663, + "learning_rate": 0.0005371790696730665, + "loss": 1.3906, + "step": 5648 + }, + { + "epoch": 0.595886075949367, + "grad_norm": 0.8356267809867859, + "learning_rate": 0.000536938341799706, + "loss": 1.3982, + "step": 5649 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 1.094180703163147, + "learning_rate": 0.0005366976378009668, + "loss": 1.3819, + "step": 5650 + }, + { + "epoch": 0.5960970464135021, + "grad_norm": 0.7673808336257935, + "learning_rate": 0.000536456957703821, + "loss": 1.3749, + "step": 5651 + }, + { + "epoch": 0.5962025316455696, + "grad_norm": 0.8765784502029419, + "learning_rate": 0.0005362163015352374, + "loss": 1.3771, + "step": 5652 + }, + { + "epoch": 0.5963080168776371, + "grad_norm": 0.7717766761779785, + "learning_rate": 0.0005359756693221836, + "loss": 1.3465, + "step": 5653 + }, + { + "epoch": 0.5964135021097047, + "grad_norm": 0.7685056924819946, + "learning_rate": 0.0005357350610916233, + "loss": 1.3737, + "step": 5654 + }, + { + "epoch": 0.5965189873417721, + "grad_norm": 0.9057910442352295, + "learning_rate": 0.0005354944768705179, + "loss": 1.4051, + "step": 5655 + }, + { + "epoch": 0.5966244725738397, + "grad_norm": 0.7694523334503174, + "learning_rate": 0.0005352539166858258, + "loss": 1.4, + "step": 5656 + }, + { + "epoch": 0.5967299578059072, + "grad_norm": 0.8307443261146545, + "learning_rate": 0.0005350133805645034, + "loss": 1.4127, + "step": 5657 + }, + { + "epoch": 0.5968354430379746, + "grad_norm": 0.890816867351532, + "learning_rate": 0.0005347728685335036, + "loss": 1.4319, + "step": 5658 + }, + { + "epoch": 0.5969409282700422, + "grad_norm": 0.8479238152503967, + "learning_rate": 0.0005345323806197771, + "loss": 1.3707, + "step": 5659 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.6696478724479675, + "learning_rate": 0.0005342919168502717, + "loss": 1.3745, + "step": 5660 + }, + { + "epoch": 0.5971518987341772, + "grad_norm": 0.9601258635520935, + "learning_rate": 0.0005340514772519324, + "loss": 1.4183, + "step": 5661 + }, + { + "epoch": 0.5972573839662447, + "grad_norm": 0.8503170609474182, + "learning_rate": 0.0005338110618517022, + "loss": 1.3624, + "step": 5662 + }, + { + "epoch": 0.5973628691983123, + "grad_norm": 0.8513110876083374, + "learning_rate": 0.0005335706706765205, + "loss": 1.413, + "step": 5663 + }, + { + "epoch": 0.5974683544303797, + "grad_norm": 0.9493113160133362, + "learning_rate": 0.0005333303037533244, + "loss": 1.3911, + "step": 5664 + }, + { + "epoch": 0.5975738396624473, + "grad_norm": 0.7396041750907898, + "learning_rate": 0.0005330899611090482, + "loss": 1.396, + "step": 5665 + }, + { + "epoch": 0.5976793248945148, + "grad_norm": 1.0850187540054321, + "learning_rate": 0.0005328496427706235, + "loss": 1.3582, + "step": 5666 + }, + { + "epoch": 0.5977848101265822, + "grad_norm": 0.8770308494567871, + "learning_rate": 0.000532609348764979, + "loss": 1.4165, + "step": 5667 + }, + { + "epoch": 0.5978902953586498, + "grad_norm": 0.7475267648696899, + "learning_rate": 0.0005323690791190412, + "loss": 1.4229, + "step": 5668 + }, + { + "epoch": 0.5979957805907173, + "grad_norm": 0.9588388800621033, + "learning_rate": 0.0005321288338597327, + "loss": 1.3977, + "step": 5669 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.7361645698547363, + "learning_rate": 0.0005318886130139753, + "loss": 1.3661, + "step": 5670 + }, + { + "epoch": 0.5982067510548523, + "grad_norm": 0.7850918173789978, + "learning_rate": 0.0005316484166086863, + "loss": 1.3951, + "step": 5671 + }, + { + "epoch": 0.5983122362869199, + "grad_norm": 0.7557613253593445, + "learning_rate": 0.0005314082446707811, + "loss": 1.3802, + "step": 5672 + }, + { + "epoch": 0.5984177215189873, + "grad_norm": 0.7293479442596436, + "learning_rate": 0.000531168097227172, + "loss": 1.4202, + "step": 5673 + }, + { + "epoch": 0.5985232067510549, + "grad_norm": 0.6823287010192871, + "learning_rate": 0.0005309279743047687, + "loss": 1.4133, + "step": 5674 + }, + { + "epoch": 0.5986286919831224, + "grad_norm": 0.8248780965805054, + "learning_rate": 0.0005306878759304785, + "loss": 1.377, + "step": 5675 + }, + { + "epoch": 0.5987341772151898, + "grad_norm": 0.7616843581199646, + "learning_rate": 0.0005304478021312053, + "loss": 1.4085, + "step": 5676 + }, + { + "epoch": 0.5988396624472574, + "grad_norm": 0.7031829953193665, + "learning_rate": 0.0005302077529338507, + "loss": 1.3487, + "step": 5677 + }, + { + "epoch": 0.5989451476793249, + "grad_norm": 0.7182310223579407, + "learning_rate": 0.0005299677283653128, + "loss": 1.4178, + "step": 5678 + }, + { + "epoch": 0.5990506329113924, + "grad_norm": 0.7164528369903564, + "learning_rate": 0.0005297277284524888, + "loss": 1.3576, + "step": 5679 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.6926912069320679, + "learning_rate": 0.0005294877532222709, + "loss": 1.4033, + "step": 5680 + }, + { + "epoch": 0.5992616033755275, + "grad_norm": 0.6735715270042419, + "learning_rate": 0.00052924780270155, + "loss": 1.3794, + "step": 5681 + }, + { + "epoch": 0.5993670886075949, + "grad_norm": 0.810062050819397, + "learning_rate": 0.0005290078769172135, + "loss": 1.3746, + "step": 5682 + }, + { + "epoch": 0.5994725738396625, + "grad_norm": 0.6372467875480652, + "learning_rate": 0.0005287679758961465, + "loss": 1.3673, + "step": 5683 + }, + { + "epoch": 0.59957805907173, + "grad_norm": 0.8153367638587952, + "learning_rate": 0.0005285280996652308, + "loss": 1.3558, + "step": 5684 + }, + { + "epoch": 0.5996835443037974, + "grad_norm": 0.756057620048523, + "learning_rate": 0.0005282882482513459, + "loss": 1.3698, + "step": 5685 + }, + { + "epoch": 0.599789029535865, + "grad_norm": 0.6630066633224487, + "learning_rate": 0.0005280484216813686, + "loss": 1.4049, + "step": 5686 + }, + { + "epoch": 0.5998945147679325, + "grad_norm": 1.044120192527771, + "learning_rate": 0.0005278086199821718, + "loss": 1.3753, + "step": 5687 + }, + { + "epoch": 0.6, + "grad_norm": 0.7095268964767456, + "learning_rate": 0.0005275688431806274, + "loss": 1.3629, + "step": 5688 + }, + { + "epoch": 0.6001054852320675, + "grad_norm": 1.0710117816925049, + "learning_rate": 0.0005273290913036033, + "loss": 1.3507, + "step": 5689 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.6918399333953857, + "learning_rate": 0.0005270893643779649, + "loss": 1.3814, + "step": 5690 + }, + { + "epoch": 0.6003164556962025, + "grad_norm": 0.7996891140937805, + "learning_rate": 0.0005268496624305747, + "loss": 1.4036, + "step": 5691 + }, + { + "epoch": 0.6004219409282701, + "grad_norm": 0.6740238070487976, + "learning_rate": 0.0005266099854882927, + "loss": 1.3431, + "step": 5692 + }, + { + "epoch": 0.6005274261603376, + "grad_norm": 0.7142125368118286, + "learning_rate": 0.0005263703335779755, + "loss": 1.3961, + "step": 5693 + }, + { + "epoch": 0.600632911392405, + "grad_norm": 0.6781421899795532, + "learning_rate": 0.0005261307067264778, + "loss": 1.358, + "step": 5694 + }, + { + "epoch": 0.6007383966244726, + "grad_norm": 0.6251583099365234, + "learning_rate": 0.0005258911049606503, + "loss": 1.3432, + "step": 5695 + }, + { + "epoch": 0.60084388185654, + "grad_norm": 0.749600887298584, + "learning_rate": 0.0005256515283073422, + "loss": 1.3943, + "step": 5696 + }, + { + "epoch": 0.6009493670886076, + "grad_norm": 0.7294971942901611, + "learning_rate": 0.0005254119767933992, + "loss": 1.3881, + "step": 5697 + }, + { + "epoch": 0.6010548523206751, + "grad_norm": 0.6917129755020142, + "learning_rate": 0.0005251724504456641, + "loss": 1.3852, + "step": 5698 + }, + { + "epoch": 0.6011603375527426, + "grad_norm": 0.7677839398384094, + "learning_rate": 0.000524932949290977, + "loss": 1.3513, + "step": 5699 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.8449642658233643, + "learning_rate": 0.0005246934733561751, + "loss": 1.4103, + "step": 5700 + }, + { + "epoch": 0.6013713080168777, + "grad_norm": 0.6656381487846375, + "learning_rate": 0.0005244540226680931, + "loss": 1.3675, + "step": 5701 + }, + { + "epoch": 0.6014767932489451, + "grad_norm": 0.8228135704994202, + "learning_rate": 0.0005242145972535625, + "loss": 1.3905, + "step": 5702 + }, + { + "epoch": 0.6015822784810126, + "grad_norm": 0.7064121961593628, + "learning_rate": 0.0005239751971394122, + "loss": 1.4072, + "step": 5703 + }, + { + "epoch": 0.6016877637130802, + "grad_norm": 0.6947120428085327, + "learning_rate": 0.0005237358223524678, + "loss": 1.3685, + "step": 5704 + }, + { + "epoch": 0.6017932489451476, + "grad_norm": 0.7808952927589417, + "learning_rate": 0.000523496472919553, + "loss": 1.3893, + "step": 5705 + }, + { + "epoch": 0.6018987341772152, + "grad_norm": 0.680780827999115, + "learning_rate": 0.000523257148867488, + "loss": 1.3513, + "step": 5706 + }, + { + "epoch": 0.6020042194092827, + "grad_norm": 0.8507663011550903, + "learning_rate": 0.00052301785022309, + "loss": 1.3582, + "step": 5707 + }, + { + "epoch": 0.6021097046413502, + "grad_norm": 0.6862066984176636, + "learning_rate": 0.0005227785770131737, + "loss": 1.3722, + "step": 5708 + }, + { + "epoch": 0.6022151898734177, + "grad_norm": 0.8134487271308899, + "learning_rate": 0.0005225393292645509, + "loss": 1.381, + "step": 5709 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.7201959490776062, + "learning_rate": 0.0005223001070040305, + "loss": 1.4266, + "step": 5710 + }, + { + "epoch": 0.6024261603375527, + "grad_norm": 0.7155699133872986, + "learning_rate": 0.0005220609102584185, + "loss": 1.404, + "step": 5711 + }, + { + "epoch": 0.6025316455696202, + "grad_norm": 0.6771776080131531, + "learning_rate": 0.0005218217390545181, + "loss": 1.3744, + "step": 5712 + }, + { + "epoch": 0.6026371308016878, + "grad_norm": 0.773507297039032, + "learning_rate": 0.0005215825934191293, + "loss": 1.3878, + "step": 5713 + }, + { + "epoch": 0.6027426160337552, + "grad_norm": 0.6755473613739014, + "learning_rate": 0.0005213434733790503, + "loss": 1.3705, + "step": 5714 + }, + { + "epoch": 0.6028481012658228, + "grad_norm": 0.6909639835357666, + "learning_rate": 0.0005211043789610752, + "loss": 1.3668, + "step": 5715 + }, + { + "epoch": 0.6029535864978903, + "grad_norm": 0.7333071827888489, + "learning_rate": 0.0005208653101919959, + "loss": 1.409, + "step": 5716 + }, + { + "epoch": 0.6030590717299578, + "grad_norm": 0.6650283932685852, + "learning_rate": 0.0005206262670986012, + "loss": 1.4084, + "step": 5717 + }, + { + "epoch": 0.6031645569620253, + "grad_norm": 0.7321755886077881, + "learning_rate": 0.0005203872497076768, + "loss": 1.3749, + "step": 5718 + }, + { + "epoch": 0.6032700421940929, + "grad_norm": 0.6033060550689697, + "learning_rate": 0.0005201482580460063, + "loss": 1.3753, + "step": 5719 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.7018299698829651, + "learning_rate": 0.0005199092921403696, + "loss": 1.3865, + "step": 5720 + }, + { + "epoch": 0.6034810126582278, + "grad_norm": 0.7016337513923645, + "learning_rate": 0.0005196703520175437, + "loss": 1.3815, + "step": 5721 + }, + { + "epoch": 0.6035864978902954, + "grad_norm": 0.6442493796348572, + "learning_rate": 0.0005194314377043037, + "loss": 1.3901, + "step": 5722 + }, + { + "epoch": 0.6036919831223628, + "grad_norm": 0.7426306009292603, + "learning_rate": 0.0005191925492274205, + "loss": 1.3763, + "step": 5723 + }, + { + "epoch": 0.6037974683544304, + "grad_norm": 0.7386847138404846, + "learning_rate": 0.0005189536866136634, + "loss": 1.3988, + "step": 5724 + }, + { + "epoch": 0.6039029535864979, + "grad_norm": 0.7162504196166992, + "learning_rate": 0.0005187148498897977, + "loss": 1.3731, + "step": 5725 + }, + { + "epoch": 0.6040084388185654, + "grad_norm": 0.7346771955490112, + "learning_rate": 0.0005184760390825865, + "loss": 1.3823, + "step": 5726 + }, + { + "epoch": 0.6041139240506329, + "grad_norm": 0.7100133895874023, + "learning_rate": 0.0005182372542187895, + "loss": 1.3626, + "step": 5727 + }, + { + "epoch": 0.6042194092827005, + "grad_norm": 0.9203206896781921, + "learning_rate": 0.0005179984953251639, + "loss": 1.3738, + "step": 5728 + }, + { + "epoch": 0.6043248945147679, + "grad_norm": 0.6319573521614075, + "learning_rate": 0.0005177597624284637, + "loss": 1.3898, + "step": 5729 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.6649993658065796, + "learning_rate": 0.00051752105555544, + "loss": 1.4005, + "step": 5730 + }, + { + "epoch": 0.604535864978903, + "grad_norm": 0.7294961214065552, + "learning_rate": 0.0005172823747328415, + "loss": 1.3881, + "step": 5731 + }, + { + "epoch": 0.6046413502109704, + "grad_norm": 0.7815496921539307, + "learning_rate": 0.0005170437199874132, + "loss": 1.3724, + "step": 5732 + }, + { + "epoch": 0.604746835443038, + "grad_norm": 0.6439515948295593, + "learning_rate": 0.0005168050913458977, + "loss": 1.3892, + "step": 5733 + }, + { + "epoch": 0.6048523206751055, + "grad_norm": 1.0799697637557983, + "learning_rate": 0.0005165664888350347, + "loss": 1.3554, + "step": 5734 + }, + { + "epoch": 0.604957805907173, + "grad_norm": 0.6551617980003357, + "learning_rate": 0.0005163279124815605, + "loss": 1.3745, + "step": 5735 + }, + { + "epoch": 0.6050632911392405, + "grad_norm": 0.7422943711280823, + "learning_rate": 0.000516089362312209, + "loss": 1.4047, + "step": 5736 + }, + { + "epoch": 0.6051687763713081, + "grad_norm": 0.7498365640640259, + "learning_rate": 0.0005158508383537109, + "loss": 1.3671, + "step": 5737 + }, + { + "epoch": 0.6052742616033755, + "grad_norm": 0.676952600479126, + "learning_rate": 0.0005156123406327938, + "loss": 1.3778, + "step": 5738 + }, + { + "epoch": 0.605379746835443, + "grad_norm": 0.8993784189224243, + "learning_rate": 0.0005153738691761826, + "loss": 1.4271, + "step": 5739 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.6343795657157898, + "learning_rate": 0.0005151354240105994, + "loss": 1.3556, + "step": 5740 + }, + { + "epoch": 0.605590717299578, + "grad_norm": 1.0133804082870483, + "learning_rate": 0.0005148970051627632, + "loss": 1.3625, + "step": 5741 + }, + { + "epoch": 0.6056962025316456, + "grad_norm": 0.6469434499740601, + "learning_rate": 0.0005146586126593898, + "loss": 1.3823, + "step": 5742 + }, + { + "epoch": 0.6058016877637131, + "grad_norm": 0.8967620730400085, + "learning_rate": 0.0005144202465271922, + "loss": 1.3467, + "step": 5743 + }, + { + "epoch": 0.6059071729957806, + "grad_norm": 0.7316012382507324, + "learning_rate": 0.000514181906792881, + "loss": 1.3518, + "step": 5744 + }, + { + "epoch": 0.6060126582278481, + "grad_norm": 0.6935506463050842, + "learning_rate": 0.0005139435934831628, + "loss": 1.3388, + "step": 5745 + }, + { + "epoch": 0.6061181434599157, + "grad_norm": 0.8378331661224365, + "learning_rate": 0.0005137053066247421, + "loss": 1.3574, + "step": 5746 + }, + { + "epoch": 0.6062236286919831, + "grad_norm": 0.6845329999923706, + "learning_rate": 0.00051346704624432, + "loss": 1.3523, + "step": 5747 + }, + { + "epoch": 0.6063291139240506, + "grad_norm": 0.7852297425270081, + "learning_rate": 0.000513228812368595, + "loss": 1.367, + "step": 5748 + }, + { + "epoch": 0.6064345991561182, + "grad_norm": 0.6707380414009094, + "learning_rate": 0.0005129906050242622, + "loss": 1.3829, + "step": 5749 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.694103479385376, + "learning_rate": 0.0005127524242380139, + "loss": 1.322, + "step": 5750 + }, + { + "epoch": 0.6066455696202532, + "grad_norm": 0.7925467491149902, + "learning_rate": 0.0005125142700365394, + "loss": 1.3385, + "step": 5751 + }, + { + "epoch": 0.6067510548523207, + "grad_norm": 0.6937247514724731, + "learning_rate": 0.0005122761424465254, + "loss": 1.3506, + "step": 5752 + }, + { + "epoch": 0.6068565400843882, + "grad_norm": 0.6815968751907349, + "learning_rate": 0.0005120380414946546, + "loss": 1.3861, + "step": 5753 + }, + { + "epoch": 0.6069620253164557, + "grad_norm": 0.7101688981056213, + "learning_rate": 0.0005117999672076081, + "loss": 1.3543, + "step": 5754 + }, + { + "epoch": 0.6070675105485233, + "grad_norm": 0.827311098575592, + "learning_rate": 0.0005115619196120632, + "loss": 1.3919, + "step": 5755 + }, + { + "epoch": 0.6071729957805907, + "grad_norm": 0.6600461602210999, + "learning_rate": 0.0005113238987346939, + "loss": 1.3536, + "step": 5756 + }, + { + "epoch": 0.6072784810126582, + "grad_norm": 0.6488266587257385, + "learning_rate": 0.000511085904602172, + "loss": 1.3396, + "step": 5757 + }, + { + "epoch": 0.6073839662447258, + "grad_norm": 0.7405415177345276, + "learning_rate": 0.0005108479372411658, + "loss": 1.3849, + "step": 5758 + }, + { + "epoch": 0.6074894514767932, + "grad_norm": 0.7426075339317322, + "learning_rate": 0.0005106099966783409, + "loss": 1.373, + "step": 5759 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.7054737210273743, + "learning_rate": 0.0005103720829403594, + "loss": 1.3824, + "step": 5760 + }, + { + "epoch": 0.6077004219409282, + "grad_norm": 0.6354666948318481, + "learning_rate": 0.000510134196053881, + "loss": 1.3357, + "step": 5761 + }, + { + "epoch": 0.6078059071729958, + "grad_norm": 0.7680556774139404, + "learning_rate": 0.000509896336045562, + "loss": 1.3826, + "step": 5762 + }, + { + "epoch": 0.6079113924050633, + "grad_norm": 0.719870388507843, + "learning_rate": 0.0005096585029420556, + "loss": 1.3637, + "step": 5763 + }, + { + "epoch": 0.6080168776371307, + "grad_norm": 0.7203357219696045, + "learning_rate": 0.0005094206967700127, + "loss": 1.3643, + "step": 5764 + }, + { + "epoch": 0.6081223628691983, + "grad_norm": 0.7299761176109314, + "learning_rate": 0.0005091829175560801, + "loss": 1.3561, + "step": 5765 + }, + { + "epoch": 0.6082278481012658, + "grad_norm": 0.6929482221603394, + "learning_rate": 0.0005089451653269026, + "loss": 1.3851, + "step": 5766 + }, + { + "epoch": 0.6083333333333333, + "grad_norm": 0.7079092860221863, + "learning_rate": 0.0005087074401091212, + "loss": 1.3694, + "step": 5767 + }, + { + "epoch": 0.6084388185654008, + "grad_norm": 0.821198582649231, + "learning_rate": 0.0005084697419293746, + "loss": 1.3583, + "step": 5768 + }, + { + "epoch": 0.6085443037974684, + "grad_norm": 0.8242297768592834, + "learning_rate": 0.0005082320708142975, + "loss": 1.3695, + "step": 5769 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.7953400015830994, + "learning_rate": 0.0005079944267905226, + "loss": 1.3861, + "step": 5770 + }, + { + "epoch": 0.6087552742616034, + "grad_norm": 0.843505859375, + "learning_rate": 0.0005077568098846789, + "loss": 1.3488, + "step": 5771 + }, + { + "epoch": 0.6088607594936709, + "grad_norm": 0.6311532855033875, + "learning_rate": 0.0005075192201233924, + "loss": 1.3778, + "step": 5772 + }, + { + "epoch": 0.6089662447257383, + "grad_norm": 0.6294226050376892, + "learning_rate": 0.0005072816575332864, + "loss": 1.3902, + "step": 5773 + }, + { + "epoch": 0.6090717299578059, + "grad_norm": 0.7680147290229797, + "learning_rate": 0.0005070441221409811, + "loss": 1.3714, + "step": 5774 + }, + { + "epoch": 0.6091772151898734, + "grad_norm": 0.6537689566612244, + "learning_rate": 0.0005068066139730936, + "loss": 1.3556, + "step": 5775 + }, + { + "epoch": 0.6092827004219409, + "grad_norm": 0.7415584325790405, + "learning_rate": 0.0005065691330562375, + "loss": 1.3916, + "step": 5776 + }, + { + "epoch": 0.6093881856540084, + "grad_norm": 0.691333532333374, + "learning_rate": 0.0005063316794170239, + "loss": 1.3671, + "step": 5777 + }, + { + "epoch": 0.609493670886076, + "grad_norm": 0.7629533410072327, + "learning_rate": 0.0005060942530820607, + "loss": 1.389, + "step": 5778 + }, + { + "epoch": 0.6095991561181434, + "grad_norm": 0.6382713913917542, + "learning_rate": 0.0005058568540779526, + "loss": 1.3608, + "step": 5779 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.6452603340148926, + "learning_rate": 0.0005056194824313015, + "loss": 1.3826, + "step": 5780 + }, + { + "epoch": 0.6098101265822785, + "grad_norm": 0.665996789932251, + "learning_rate": 0.000505382138168706, + "loss": 1.3778, + "step": 5781 + }, + { + "epoch": 0.609915611814346, + "grad_norm": 0.6672236919403076, + "learning_rate": 0.0005051448213167614, + "loss": 1.3747, + "step": 5782 + }, + { + "epoch": 0.6100210970464135, + "grad_norm": 0.6321627497673035, + "learning_rate": 0.0005049075319020608, + "loss": 1.3929, + "step": 5783 + }, + { + "epoch": 0.610126582278481, + "grad_norm": 0.7126784920692444, + "learning_rate": 0.0005046702699511933, + "loss": 1.4068, + "step": 5784 + }, + { + "epoch": 0.6102320675105485, + "grad_norm": 0.6405827403068542, + "learning_rate": 0.0005044330354907454, + "loss": 1.3521, + "step": 5785 + }, + { + "epoch": 0.610337552742616, + "grad_norm": 0.6771809458732605, + "learning_rate": 0.0005041958285473005, + "loss": 1.3469, + "step": 5786 + }, + { + "epoch": 0.6104430379746836, + "grad_norm": 0.7058507204055786, + "learning_rate": 0.0005039586491474386, + "loss": 1.3452, + "step": 5787 + }, + { + "epoch": 0.610548523206751, + "grad_norm": 0.6856660842895508, + "learning_rate": 0.000503721497317737, + "loss": 1.3649, + "step": 5788 + }, + { + "epoch": 0.6106540084388186, + "grad_norm": 0.6611692905426025, + "learning_rate": 0.0005034843730847696, + "loss": 1.3861, + "step": 5789 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.662730872631073, + "learning_rate": 0.0005032472764751074, + "loss": 1.3571, + "step": 5790 + }, + { + "epoch": 0.6108649789029535, + "grad_norm": 0.703396737575531, + "learning_rate": 0.0005030102075153181, + "loss": 1.4064, + "step": 5791 + }, + { + "epoch": 0.6109704641350211, + "grad_norm": 0.7105530500411987, + "learning_rate": 0.000502773166231967, + "loss": 1.3769, + "step": 5792 + }, + { + "epoch": 0.6110759493670886, + "grad_norm": 0.8340848088264465, + "learning_rate": 0.0005025361526516151, + "loss": 1.3971, + "step": 5793 + }, + { + "epoch": 0.6111814345991561, + "grad_norm": 0.6870110034942627, + "learning_rate": 0.0005022991668008216, + "loss": 1.3758, + "step": 5794 + }, + { + "epoch": 0.6112869198312236, + "grad_norm": 0.6955728530883789, + "learning_rate": 0.0005020622087061415, + "loss": 1.3632, + "step": 5795 + }, + { + "epoch": 0.6113924050632912, + "grad_norm": 0.7042303681373596, + "learning_rate": 0.0005018252783941273, + "loss": 1.3659, + "step": 5796 + }, + { + "epoch": 0.6114978902953586, + "grad_norm": 0.809837281703949, + "learning_rate": 0.0005015883758913281, + "loss": 1.3783, + "step": 5797 + }, + { + "epoch": 0.6116033755274262, + "grad_norm": 0.6763957142829895, + "learning_rate": 0.0005013515012242901, + "loss": 1.3381, + "step": 5798 + }, + { + "epoch": 0.6117088607594937, + "grad_norm": 0.6813017129898071, + "learning_rate": 0.0005011146544195559, + "loss": 1.365, + "step": 5799 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.78061842918396, + "learning_rate": 0.000500877835503666, + "loss": 1.391, + "step": 5800 + }, + { + "epoch": 0.6119198312236287, + "grad_norm": 0.7221613526344299, + "learning_rate": 0.0005006410445031569, + "loss": 1.3856, + "step": 5801 + }, + { + "epoch": 0.6120253164556962, + "grad_norm": 0.8302212953567505, + "learning_rate": 0.0005004042814445622, + "loss": 1.3755, + "step": 5802 + }, + { + "epoch": 0.6121308016877637, + "grad_norm": 0.6695998311042786, + "learning_rate": 0.0005001675463544125, + "loss": 1.354, + "step": 5803 + }, + { + "epoch": 0.6122362869198312, + "grad_norm": 0.9332215785980225, + "learning_rate": 0.0004999308392592349, + "loss": 1.37, + "step": 5804 + }, + { + "epoch": 0.6123417721518988, + "grad_norm": 0.7039677500724792, + "learning_rate": 0.0004996941601855536, + "loss": 1.3583, + "step": 5805 + }, + { + "epoch": 0.6124472573839662, + "grad_norm": 0.8328734636306763, + "learning_rate": 0.0004994575091598898, + "loss": 1.3972, + "step": 5806 + }, + { + "epoch": 0.6125527426160338, + "grad_norm": 0.8295986652374268, + "learning_rate": 0.0004992208862087616, + "loss": 1.3966, + "step": 5807 + }, + { + "epoch": 0.6126582278481013, + "grad_norm": 0.7195462584495544, + "learning_rate": 0.0004989842913586832, + "loss": 1.3982, + "step": 5808 + }, + { + "epoch": 0.6127637130801687, + "grad_norm": 0.9067789912223816, + "learning_rate": 0.000498747724636167, + "loss": 1.4047, + "step": 5809 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.6931321024894714, + "learning_rate": 0.000498511186067721, + "loss": 1.3849, + "step": 5810 + }, + { + "epoch": 0.6129746835443038, + "grad_norm": 0.6881266832351685, + "learning_rate": 0.0004982746756798507, + "loss": 1.4133, + "step": 5811 + }, + { + "epoch": 0.6130801687763713, + "grad_norm": 0.6858708262443542, + "learning_rate": 0.0004980381934990583, + "loss": 1.3844, + "step": 5812 + }, + { + "epoch": 0.6131856540084388, + "grad_norm": 0.6533380150794983, + "learning_rate": 0.0004978017395518425, + "loss": 1.3676, + "step": 5813 + }, + { + "epoch": 0.6132911392405064, + "grad_norm": 0.6692208051681519, + "learning_rate": 0.0004975653138646994, + "loss": 1.3764, + "step": 5814 + }, + { + "epoch": 0.6133966244725738, + "grad_norm": 0.6456486582756042, + "learning_rate": 0.0004973289164641217, + "loss": 1.4152, + "step": 5815 + }, + { + "epoch": 0.6135021097046414, + "grad_norm": 0.7166901230812073, + "learning_rate": 0.0004970925473765988, + "loss": 1.4094, + "step": 5816 + }, + { + "epoch": 0.6136075949367089, + "grad_norm": 0.6874385476112366, + "learning_rate": 0.0004968562066286168, + "loss": 1.3927, + "step": 5817 + }, + { + "epoch": 0.6137130801687763, + "grad_norm": 0.8729189038276672, + "learning_rate": 0.0004966198942466595, + "loss": 1.4127, + "step": 5818 + }, + { + "epoch": 0.6138185654008439, + "grad_norm": 0.6435405611991882, + "learning_rate": 0.0004963836102572065, + "loss": 1.3764, + "step": 5819 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.8195585012435913, + "learning_rate": 0.0004961473546867346, + "loss": 1.3832, + "step": 5820 + }, + { + "epoch": 0.6140295358649789, + "grad_norm": 0.6590105295181274, + "learning_rate": 0.0004959111275617174, + "loss": 1.412, + "step": 5821 + }, + { + "epoch": 0.6141350210970464, + "grad_norm": 0.7525282502174377, + "learning_rate": 0.0004956749289086254, + "loss": 1.3595, + "step": 5822 + }, + { + "epoch": 0.614240506329114, + "grad_norm": 0.6802932620048523, + "learning_rate": 0.0004954387587539257, + "loss": 1.3669, + "step": 5823 + }, + { + "epoch": 0.6143459915611814, + "grad_norm": 0.8052703738212585, + "learning_rate": 0.0004952026171240826, + "loss": 1.3901, + "step": 5824 + }, + { + "epoch": 0.614451476793249, + "grad_norm": 0.7692441940307617, + "learning_rate": 0.0004949665040455566, + "loss": 1.3764, + "step": 5825 + }, + { + "epoch": 0.6145569620253165, + "grad_norm": 0.6795695424079895, + "learning_rate": 0.0004947304195448052, + "loss": 1.3613, + "step": 5826 + }, + { + "epoch": 0.614662447257384, + "grad_norm": 0.6874719262123108, + "learning_rate": 0.0004944943636482836, + "loss": 1.377, + "step": 5827 + }, + { + "epoch": 0.6147679324894515, + "grad_norm": 0.7662628889083862, + "learning_rate": 0.0004942583363824428, + "loss": 1.3527, + "step": 5828 + }, + { + "epoch": 0.6148734177215189, + "grad_norm": 0.7018500566482544, + "learning_rate": 0.0004940223377737304, + "loss": 1.3925, + "step": 5829 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.8186096549034119, + "learning_rate": 0.0004937863678485915, + "loss": 1.364, + "step": 5830 + }, + { + "epoch": 0.615084388185654, + "grad_norm": 0.7241255640983582, + "learning_rate": 0.0004935504266334677, + "loss": 1.3814, + "step": 5831 + }, + { + "epoch": 0.6151898734177215, + "grad_norm": 0.668744683265686, + "learning_rate": 0.0004933145141547975, + "loss": 1.4022, + "step": 5832 + }, + { + "epoch": 0.615295358649789, + "grad_norm": 0.7607747316360474, + "learning_rate": 0.0004930786304390158, + "loss": 1.3619, + "step": 5833 + }, + { + "epoch": 0.6154008438818566, + "grad_norm": 0.7154970169067383, + "learning_rate": 0.0004928427755125544, + "loss": 1.3981, + "step": 5834 + }, + { + "epoch": 0.615506329113924, + "grad_norm": 0.7475058436393738, + "learning_rate": 0.0004926069494018427, + "loss": 1.3625, + "step": 5835 + }, + { + "epoch": 0.6156118143459915, + "grad_norm": 0.6279102563858032, + "learning_rate": 0.0004923711521333056, + "loss": 1.4196, + "step": 5836 + }, + { + "epoch": 0.6157172995780591, + "grad_norm": 0.68039870262146, + "learning_rate": 0.0004921353837333657, + "loss": 1.3561, + "step": 5837 + }, + { + "epoch": 0.6158227848101265, + "grad_norm": 0.7078441381454468, + "learning_rate": 0.0004918996442284419, + "loss": 1.3977, + "step": 5838 + }, + { + "epoch": 0.6159282700421941, + "grad_norm": 0.679903507232666, + "learning_rate": 0.0004916639336449499, + "loss": 1.3864, + "step": 5839 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.8626116514205933, + "learning_rate": 0.0004914282520093023, + "loss": 1.4047, + "step": 5840 + }, + { + "epoch": 0.6161392405063291, + "grad_norm": 0.6571637392044067, + "learning_rate": 0.0004911925993479085, + "loss": 1.4002, + "step": 5841 + }, + { + "epoch": 0.6162447257383966, + "grad_norm": 0.7111542820930481, + "learning_rate": 0.0004909569756871745, + "loss": 1.4069, + "step": 5842 + }, + { + "epoch": 0.6163502109704642, + "grad_norm": 0.6606494784355164, + "learning_rate": 0.0004907213810535026, + "loss": 1.343, + "step": 5843 + }, + { + "epoch": 0.6164556962025316, + "grad_norm": 0.6449426412582397, + "learning_rate": 0.0004904858154732932, + "loss": 1.391, + "step": 5844 + }, + { + "epoch": 0.6165611814345991, + "grad_norm": 0.6933168768882751, + "learning_rate": 0.0004902502789729424, + "loss": 1.4034, + "step": 5845 + }, + { + "epoch": 0.6166666666666667, + "grad_norm": 0.6607386469841003, + "learning_rate": 0.0004900147715788429, + "loss": 1.3703, + "step": 5846 + }, + { + "epoch": 0.6167721518987341, + "grad_norm": 0.6496079564094543, + "learning_rate": 0.0004897792933173847, + "loss": 1.3827, + "step": 5847 + }, + { + "epoch": 0.6168776371308017, + "grad_norm": 0.6993774175643921, + "learning_rate": 0.0004895438442149542, + "loss": 1.4102, + "step": 5848 + }, + { + "epoch": 0.6169831223628692, + "grad_norm": 0.7795572876930237, + "learning_rate": 0.0004893084242979348, + "loss": 1.4412, + "step": 5849 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.6858840584754944, + "learning_rate": 0.0004890730335927063, + "loss": 1.3724, + "step": 5850 + }, + { + "epoch": 0.6171940928270042, + "grad_norm": 0.6984849572181702, + "learning_rate": 0.0004888376721256456, + "loss": 1.3846, + "step": 5851 + }, + { + "epoch": 0.6172995780590718, + "grad_norm": 0.7902636528015137, + "learning_rate": 0.0004886023399231255, + "loss": 1.3678, + "step": 5852 + }, + { + "epoch": 0.6174050632911392, + "grad_norm": 0.732243537902832, + "learning_rate": 0.0004883670370115173, + "loss": 1.3656, + "step": 5853 + }, + { + "epoch": 0.6175105485232067, + "grad_norm": 0.7183970808982849, + "learning_rate": 0.00048813176341718693, + "loss": 1.3436, + "step": 5854 + }, + { + "epoch": 0.6176160337552743, + "grad_norm": 0.6698121428489685, + "learning_rate": 0.0004878965191664983, + "loss": 1.4109, + "step": 5855 + }, + { + "epoch": 0.6177215189873417, + "grad_norm": 0.6740438342094421, + "learning_rate": 0.0004876613042858118, + "loss": 1.3707, + "step": 5856 + }, + { + "epoch": 0.6178270042194093, + "grad_norm": 0.6260630488395691, + "learning_rate": 0.0004874261188014842, + "loss": 1.3559, + "step": 5857 + }, + { + "epoch": 0.6179324894514768, + "grad_norm": 0.7343763113021851, + "learning_rate": 0.00048719096273986925, + "loss": 1.3955, + "step": 5858 + }, + { + "epoch": 0.6180379746835443, + "grad_norm": 0.6728991866111755, + "learning_rate": 0.0004869558361273175, + "loss": 1.3874, + "step": 5859 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.7280457615852356, + "learning_rate": 0.00048672073899017564, + "loss": 1.3826, + "step": 5860 + }, + { + "epoch": 0.6182489451476794, + "grad_norm": 0.8857213854789734, + "learning_rate": 0.00048648567135478805, + "loss": 1.4007, + "step": 5861 + }, + { + "epoch": 0.6183544303797468, + "grad_norm": 0.8009088635444641, + "learning_rate": 0.0004862506332474951, + "loss": 1.4389, + "step": 5862 + }, + { + "epoch": 0.6184599156118143, + "grad_norm": 0.6299470663070679, + "learning_rate": 0.0004860156246946338, + "loss": 1.3382, + "step": 5863 + }, + { + "epoch": 0.6185654008438819, + "grad_norm": 0.7095376253128052, + "learning_rate": 0.0004857806457225381, + "loss": 1.3449, + "step": 5864 + }, + { + "epoch": 0.6186708860759493, + "grad_norm": 0.7592669129371643, + "learning_rate": 0.00048554569635753857, + "loss": 1.378, + "step": 5865 + }, + { + "epoch": 0.6187763713080169, + "grad_norm": 0.7110881805419922, + "learning_rate": 0.00048531077662596246, + "loss": 1.3843, + "step": 5866 + }, + { + "epoch": 0.6188818565400844, + "grad_norm": 0.7127704620361328, + "learning_rate": 0.00048507588655413367, + "loss": 1.3187, + "step": 5867 + }, + { + "epoch": 0.6189873417721519, + "grad_norm": 0.6756796836853027, + "learning_rate": 0.00048484102616837277, + "loss": 1.3508, + "step": 5868 + }, + { + "epoch": 0.6190928270042194, + "grad_norm": 0.7331464290618896, + "learning_rate": 0.000484606195494997, + "loss": 1.3821, + "step": 5869 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.6586403846740723, + "learning_rate": 0.0004843713945603205, + "loss": 1.3618, + "step": 5870 + }, + { + "epoch": 0.6193037974683544, + "grad_norm": 0.711609959602356, + "learning_rate": 0.0004841366233906538, + "loss": 1.4108, + "step": 5871 + }, + { + "epoch": 0.619409282700422, + "grad_norm": 0.6807374358177185, + "learning_rate": 0.0004839018820123042, + "loss": 1.3753, + "step": 5872 + }, + { + "epoch": 0.6195147679324895, + "grad_norm": 0.6953888535499573, + "learning_rate": 0.0004836671704515756, + "loss": 1.3658, + "step": 5873 + }, + { + "epoch": 0.6196202531645569, + "grad_norm": 0.6858312487602234, + "learning_rate": 0.00048343248873476853, + "loss": 1.363, + "step": 5874 + }, + { + "epoch": 0.6197257383966245, + "grad_norm": 0.6958180665969849, + "learning_rate": 0.00048319783688818043, + "loss": 1.4346, + "step": 5875 + }, + { + "epoch": 0.619831223628692, + "grad_norm": 0.7493340969085693, + "learning_rate": 0.00048296321493810507, + "loss": 1.3378, + "step": 5876 + }, + { + "epoch": 0.6199367088607595, + "grad_norm": 0.6734772324562073, + "learning_rate": 0.0004827286229108331, + "loss": 1.3899, + "step": 5877 + }, + { + "epoch": 0.620042194092827, + "grad_norm": 0.7650784254074097, + "learning_rate": 0.00048249406083265123, + "loss": 1.3874, + "step": 5878 + }, + { + "epoch": 0.6201476793248946, + "grad_norm": 0.7325068116188049, + "learning_rate": 0.0004822595287298442, + "loss": 1.3592, + "step": 5879 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.7223153710365295, + "learning_rate": 0.00048202502662869195, + "loss": 1.3829, + "step": 5880 + }, + { + "epoch": 0.6203586497890295, + "grad_norm": 0.6746701002120972, + "learning_rate": 0.0004817905545554717, + "loss": 1.3642, + "step": 5881 + }, + { + "epoch": 0.6204641350210971, + "grad_norm": 0.6903307437896729, + "learning_rate": 0.00048155611253645727, + "loss": 1.3794, + "step": 5882 + }, + { + "epoch": 0.6205696202531645, + "grad_norm": 0.7220345139503479, + "learning_rate": 0.0004813217005979191, + "loss": 1.3571, + "step": 5883 + }, + { + "epoch": 0.6206751054852321, + "grad_norm": 0.6857538223266602, + "learning_rate": 0.000481087318766124, + "loss": 1.3385, + "step": 5884 + }, + { + "epoch": 0.6207805907172996, + "grad_norm": 0.6674337387084961, + "learning_rate": 0.0004808529670673358, + "loss": 1.3548, + "step": 5885 + }, + { + "epoch": 0.6208860759493671, + "grad_norm": 0.8412179946899414, + "learning_rate": 0.00048061864552781456, + "loss": 1.3967, + "step": 5886 + }, + { + "epoch": 0.6209915611814346, + "grad_norm": 0.7025663256645203, + "learning_rate": 0.0004803843541738173, + "loss": 1.3982, + "step": 5887 + }, + { + "epoch": 0.6210970464135022, + "grad_norm": 0.7035099864006042, + "learning_rate": 0.0004801500930315978, + "loss": 1.3473, + "step": 5888 + }, + { + "epoch": 0.6212025316455696, + "grad_norm": 0.7130693197250366, + "learning_rate": 0.000479915862127406, + "loss": 1.3794, + "step": 5889 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.7567024230957031, + "learning_rate": 0.0004796816614874885, + "loss": 1.4003, + "step": 5890 + }, + { + "epoch": 0.6214135021097047, + "grad_norm": 0.7082668542861938, + "learning_rate": 0.00047944749113808884, + "loss": 1.3502, + "step": 5891 + }, + { + "epoch": 0.6215189873417721, + "grad_norm": 0.7806409597396851, + "learning_rate": 0.0004792133511054469, + "loss": 1.3918, + "step": 5892 + }, + { + "epoch": 0.6216244725738397, + "grad_norm": 0.7505583167076111, + "learning_rate": 0.0004789792414157992, + "loss": 1.339, + "step": 5893 + }, + { + "epoch": 0.6217299578059071, + "grad_norm": 0.6687648296356201, + "learning_rate": 0.000478745162095379, + "loss": 1.347, + "step": 5894 + }, + { + "epoch": 0.6218354430379747, + "grad_norm": 0.6789942979812622, + "learning_rate": 0.0004785111131704157, + "loss": 1.3856, + "step": 5895 + }, + { + "epoch": 0.6219409282700422, + "grad_norm": 0.6976656317710876, + "learning_rate": 0.0004782770946671362, + "loss": 1.3803, + "step": 5896 + }, + { + "epoch": 0.6220464135021097, + "grad_norm": 0.7909019589424133, + "learning_rate": 0.0004780431066117629, + "loss": 1.4015, + "step": 5897 + }, + { + "epoch": 0.6221518987341772, + "grad_norm": 0.6975985765457153, + "learning_rate": 0.0004778091490305159, + "loss": 1.4018, + "step": 5898 + }, + { + "epoch": 0.6222573839662447, + "grad_norm": 0.6818307042121887, + "learning_rate": 0.0004775752219496109, + "loss": 1.3537, + "step": 5899 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.7634577751159668, + "learning_rate": 0.00047734132539526086, + "loss": 1.3922, + "step": 5900 + }, + { + "epoch": 0.6224683544303797, + "grad_norm": 0.6750212907791138, + "learning_rate": 0.00047710745939367474, + "loss": 1.3707, + "step": 5901 + }, + { + "epoch": 0.6225738396624473, + "grad_norm": 0.6584954857826233, + "learning_rate": 0.00047687362397105863, + "loss": 1.4178, + "step": 5902 + }, + { + "epoch": 0.6226793248945147, + "grad_norm": 0.782103955745697, + "learning_rate": 0.0004766398191536149, + "loss": 1.3395, + "step": 5903 + }, + { + "epoch": 0.6227848101265823, + "grad_norm": 0.7253423929214478, + "learning_rate": 0.00047640604496754235, + "loss": 1.3752, + "step": 5904 + }, + { + "epoch": 0.6228902953586498, + "grad_norm": 0.756072998046875, + "learning_rate": 0.000476172301439037, + "loss": 1.3483, + "step": 5905 + }, + { + "epoch": 0.6229957805907173, + "grad_norm": 0.6767773032188416, + "learning_rate": 0.00047593858859429035, + "loss": 1.3712, + "step": 5906 + }, + { + "epoch": 0.6231012658227848, + "grad_norm": 0.6818384528160095, + "learning_rate": 0.00047570490645949175, + "loss": 1.3378, + "step": 5907 + }, + { + "epoch": 0.6232067510548523, + "grad_norm": 0.7838733792304993, + "learning_rate": 0.000475471255060826, + "loss": 1.3651, + "step": 5908 + }, + { + "epoch": 0.6233122362869198, + "grad_norm": 0.739591121673584, + "learning_rate": 0.0004752376344244752, + "loss": 1.3741, + "step": 5909 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 1.0673471689224243, + "learning_rate": 0.00047500404457661747, + "loss": 1.375, + "step": 5910 + }, + { + "epoch": 0.6235232067510549, + "grad_norm": 0.8021429181098938, + "learning_rate": 0.0004747704855434278, + "loss": 1.3234, + "step": 5911 + }, + { + "epoch": 0.6236286919831223, + "grad_norm": 0.9069942235946655, + "learning_rate": 0.0004745369573510775, + "loss": 1.3938, + "step": 5912 + }, + { + "epoch": 0.6237341772151899, + "grad_norm": 0.7870131731033325, + "learning_rate": 0.0004743034600257348, + "loss": 1.3463, + "step": 5913 + }, + { + "epoch": 0.6238396624472574, + "grad_norm": 0.6775513887405396, + "learning_rate": 0.0004740699935935643, + "loss": 1.3488, + "step": 5914 + }, + { + "epoch": 0.6239451476793249, + "grad_norm": 0.8173260688781738, + "learning_rate": 0.0004738365580807268, + "loss": 1.3514, + "step": 5915 + }, + { + "epoch": 0.6240506329113924, + "grad_norm": 0.6943689584732056, + "learning_rate": 0.0004736031535133799, + "loss": 1.3596, + "step": 5916 + }, + { + "epoch": 0.62415611814346, + "grad_norm": 0.6809765696525574, + "learning_rate": 0.0004733697799176781, + "loss": 1.3921, + "step": 5917 + }, + { + "epoch": 0.6242616033755274, + "grad_norm": 0.8093090653419495, + "learning_rate": 0.0004731364373197718, + "loss": 1.3757, + "step": 5918 + }, + { + "epoch": 0.6243670886075949, + "grad_norm": 0.753294050693512, + "learning_rate": 0.00047290312574580835, + "loss": 1.3696, + "step": 5919 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.9917935729026794, + "learning_rate": 0.00047266984522193134, + "loss": 1.3652, + "step": 5920 + }, + { + "epoch": 0.6245780590717299, + "grad_norm": 0.7689763903617859, + "learning_rate": 0.0004724365957742809, + "loss": 1.3826, + "step": 5921 + }, + { + "epoch": 0.6246835443037975, + "grad_norm": 0.8510125279426575, + "learning_rate": 0.0004722033774289941, + "loss": 1.3536, + "step": 5922 + }, + { + "epoch": 0.624789029535865, + "grad_norm": 0.68907231092453, + "learning_rate": 0.0004719701902122041, + "loss": 1.383, + "step": 5923 + }, + { + "epoch": 0.6248945147679325, + "grad_norm": 0.9583951234817505, + "learning_rate": 0.00047173703415004066, + "loss": 1.3898, + "step": 5924 + }, + { + "epoch": 0.625, + "grad_norm": 0.7674715518951416, + "learning_rate": 0.0004715039092686302, + "loss": 1.3861, + "step": 5925 + }, + { + "epoch": 0.6251054852320675, + "grad_norm": 1.0247588157653809, + "learning_rate": 0.0004712708155940951, + "loss": 1.3166, + "step": 5926 + }, + { + "epoch": 0.625210970464135, + "grad_norm": 1.1654638051986694, + "learning_rate": 0.0004710377531525552, + "loss": 1.3773, + "step": 5927 + }, + { + "epoch": 0.6253164556962025, + "grad_norm": 0.7712395191192627, + "learning_rate": 0.000470804721970126, + "loss": 1.3518, + "step": 5928 + }, + { + "epoch": 0.6254219409282701, + "grad_norm": 1.5674586296081543, + "learning_rate": 0.00047057172207292004, + "loss": 1.3701, + "step": 5929 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.6653510332107544, + "learning_rate": 0.00047033875348704576, + "loss": 1.3815, + "step": 5930 + }, + { + "epoch": 0.6256329113924051, + "grad_norm": 0.8742679357528687, + "learning_rate": 0.00047010581623860883, + "loss": 1.3723, + "step": 5931 + }, + { + "epoch": 0.6257383966244726, + "grad_norm": 0.7504332661628723, + "learning_rate": 0.0004698729103537109, + "loss": 1.3891, + "step": 5932 + }, + { + "epoch": 0.62584388185654, + "grad_norm": 0.6900975108146667, + "learning_rate": 0.0004696400358584501, + "loss": 1.4, + "step": 5933 + }, + { + "epoch": 0.6259493670886076, + "grad_norm": 0.6808052659034729, + "learning_rate": 0.00046940719277892143, + "loss": 1.3929, + "step": 5934 + }, + { + "epoch": 0.6260548523206751, + "grad_norm": 0.6553429365158081, + "learning_rate": 0.0004691743811412159, + "loss": 1.3708, + "step": 5935 + }, + { + "epoch": 0.6261603375527426, + "grad_norm": 0.6264590620994568, + "learning_rate": 0.00046894160097142113, + "loss": 1.3709, + "step": 5936 + }, + { + "epoch": 0.6262658227848101, + "grad_norm": 0.668124258518219, + "learning_rate": 0.00046870885229562153, + "loss": 1.3868, + "step": 5937 + }, + { + "epoch": 0.6263713080168777, + "grad_norm": 0.7933694124221802, + "learning_rate": 0.0004684761351398976, + "loss": 1.3357, + "step": 5938 + }, + { + "epoch": 0.6264767932489451, + "grad_norm": 0.7368065118789673, + "learning_rate": 0.0004682434495303267, + "loss": 1.3194, + "step": 5939 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.8395361304283142, + "learning_rate": 0.00046801079549298224, + "loss": 1.4073, + "step": 5940 + }, + { + "epoch": 0.6266877637130802, + "grad_norm": 0.6646409034729004, + "learning_rate": 0.0004677781730539342, + "loss": 1.3958, + "step": 5941 + }, + { + "epoch": 0.6267932489451477, + "grad_norm": 0.8993902206420898, + "learning_rate": 0.00046754558223924926, + "loss": 1.3741, + "step": 5942 + }, + { + "epoch": 0.6268987341772152, + "grad_norm": 0.7177978754043579, + "learning_rate": 0.00046731302307499023, + "loss": 1.3289, + "step": 5943 + }, + { + "epoch": 0.6270042194092827, + "grad_norm": 0.9800382852554321, + "learning_rate": 0.0004670804955872166, + "loss": 1.3748, + "step": 5944 + }, + { + "epoch": 0.6271097046413502, + "grad_norm": 0.7115187048912048, + "learning_rate": 0.00046684799980198415, + "loss": 1.3435, + "step": 5945 + }, + { + "epoch": 0.6272151898734177, + "grad_norm": 0.6842699646949768, + "learning_rate": 0.0004666155357453451, + "loss": 1.3809, + "step": 5946 + }, + { + "epoch": 0.6273206751054853, + "grad_norm": 0.8740156292915344, + "learning_rate": 0.00046638310344334835, + "loss": 1.3563, + "step": 5947 + }, + { + "epoch": 0.6274261603375527, + "grad_norm": 0.7332566976547241, + "learning_rate": 0.0004661507029220393, + "loss": 1.3567, + "step": 5948 + }, + { + "epoch": 0.6275316455696203, + "grad_norm": 0.6450368165969849, + "learning_rate": 0.0004659183342074594, + "loss": 1.3687, + "step": 5949 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.7080195546150208, + "learning_rate": 0.0004656859973256466, + "loss": 1.3702, + "step": 5950 + }, + { + "epoch": 0.6277426160337553, + "grad_norm": 0.7372121214866638, + "learning_rate": 0.0004654536923026356, + "loss": 1.414, + "step": 5951 + }, + { + "epoch": 0.6278481012658228, + "grad_norm": 0.7152130007743835, + "learning_rate": 0.00046522141916445725, + "loss": 1.395, + "step": 5952 + }, + { + "epoch": 0.6279535864978903, + "grad_norm": 0.6720043420791626, + "learning_rate": 0.0004649891779371389, + "loss": 1.3682, + "step": 5953 + }, + { + "epoch": 0.6280590717299578, + "grad_norm": 0.6810348629951477, + "learning_rate": 0.0004647569686467043, + "loss": 1.3473, + "step": 5954 + }, + { + "epoch": 0.6281645569620253, + "grad_norm": 0.6379370093345642, + "learning_rate": 0.00046452479131917383, + "loss": 1.3789, + "step": 5955 + }, + { + "epoch": 0.6282700421940929, + "grad_norm": 0.7346146106719971, + "learning_rate": 0.0004642926459805636, + "loss": 1.3839, + "step": 5956 + }, + { + "epoch": 0.6283755274261603, + "grad_norm": 0.6996516585350037, + "learning_rate": 0.0004640605326568874, + "loss": 1.3502, + "step": 5957 + }, + { + "epoch": 0.6284810126582279, + "grad_norm": 0.9058144092559814, + "learning_rate": 0.00046382845137415437, + "loss": 1.3581, + "step": 5958 + }, + { + "epoch": 0.6285864978902953, + "grad_norm": 0.6966539025306702, + "learning_rate": 0.0004635964021583703, + "loss": 1.363, + "step": 5959 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.7860754132270813, + "learning_rate": 0.00046336438503553754, + "loss": 1.3782, + "step": 5960 + }, + { + "epoch": 0.6287974683544304, + "grad_norm": 0.6408897042274475, + "learning_rate": 0.00046313240003165466, + "loss": 1.3796, + "step": 5961 + }, + { + "epoch": 0.6289029535864978, + "grad_norm": 0.7097799777984619, + "learning_rate": 0.00046290044717271685, + "loss": 1.3631, + "step": 5962 + }, + { + "epoch": 0.6290084388185654, + "grad_norm": 0.6835980415344238, + "learning_rate": 0.00046266852648471553, + "loss": 1.3799, + "step": 5963 + }, + { + "epoch": 0.6291139240506329, + "grad_norm": 0.6769852042198181, + "learning_rate": 0.0004624366379936383, + "loss": 1.3728, + "step": 5964 + }, + { + "epoch": 0.6292194092827004, + "grad_norm": 0.6942307949066162, + "learning_rate": 0.00046220478172546997, + "loss": 1.4142, + "step": 5965 + }, + { + "epoch": 0.6293248945147679, + "grad_norm": 0.6874144077301025, + "learning_rate": 0.00046197295770619105, + "loss": 1.3732, + "step": 5966 + }, + { + "epoch": 0.6294303797468355, + "grad_norm": 0.6733388900756836, + "learning_rate": 0.00046174116596177833, + "loss": 1.3676, + "step": 5967 + }, + { + "epoch": 0.6295358649789029, + "grad_norm": 0.6411116123199463, + "learning_rate": 0.00046150940651820536, + "loss": 1.3399, + "step": 5968 + }, + { + "epoch": 0.6296413502109705, + "grad_norm": 0.6475054621696472, + "learning_rate": 0.0004612776794014419, + "loss": 1.3419, + "step": 5969 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.7058001756668091, + "learning_rate": 0.00046104598463745424, + "loss": 1.3982, + "step": 5970 + }, + { + "epoch": 0.6298523206751054, + "grad_norm": 0.6269000172615051, + "learning_rate": 0.0004608143222522048, + "loss": 1.3871, + "step": 5971 + }, + { + "epoch": 0.629957805907173, + "grad_norm": 0.6651265025138855, + "learning_rate": 0.00046058269227165256, + "loss": 1.3281, + "step": 5972 + }, + { + "epoch": 0.6300632911392405, + "grad_norm": 0.668925404548645, + "learning_rate": 0.0004603510947217526, + "loss": 1.3762, + "step": 5973 + }, + { + "epoch": 0.630168776371308, + "grad_norm": 0.7203687429428101, + "learning_rate": 0.000460119529628457, + "loss": 1.3769, + "step": 5974 + }, + { + "epoch": 0.6302742616033755, + "grad_norm": 0.6657797694206238, + "learning_rate": 0.00045988799701771364, + "loss": 1.3625, + "step": 5975 + }, + { + "epoch": 0.6303797468354431, + "grad_norm": 0.670214056968689, + "learning_rate": 0.0004596564969154668, + "loss": 1.3543, + "step": 5976 + }, + { + "epoch": 0.6304852320675105, + "grad_norm": 0.6484906673431396, + "learning_rate": 0.00045942502934765735, + "loss": 1.3301, + "step": 5977 + }, + { + "epoch": 0.630590717299578, + "grad_norm": 0.643893301486969, + "learning_rate": 0.0004591935943402222, + "loss": 1.3695, + "step": 5978 + }, + { + "epoch": 0.6306962025316456, + "grad_norm": 0.7120513319969177, + "learning_rate": 0.00045896219191909486, + "loss": 1.3241, + "step": 5979 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.7422221302986145, + "learning_rate": 0.0004587308221102053, + "loss": 1.3413, + "step": 5980 + }, + { + "epoch": 0.6309071729957806, + "grad_norm": 0.6662958860397339, + "learning_rate": 0.0004584994849394795, + "loss": 1.3787, + "step": 5981 + }, + { + "epoch": 0.6310126582278481, + "grad_norm": 0.7877079248428345, + "learning_rate": 0.0004582681804328396, + "loss": 1.3742, + "step": 5982 + }, + { + "epoch": 0.6311181434599156, + "grad_norm": 0.6854775547981262, + "learning_rate": 0.0004580369086162051, + "loss": 1.3358, + "step": 5983 + }, + { + "epoch": 0.6312236286919831, + "grad_norm": 0.6531469225883484, + "learning_rate": 0.0004578056695154909, + "loss": 1.377, + "step": 5984 + }, + { + "epoch": 0.6313291139240507, + "grad_norm": 0.6901594996452332, + "learning_rate": 0.0004575744631566083, + "loss": 1.3719, + "step": 5985 + }, + { + "epoch": 0.6314345991561181, + "grad_norm": 0.6955669522285461, + "learning_rate": 0.0004573432895654654, + "loss": 1.3872, + "step": 5986 + }, + { + "epoch": 0.6315400843881857, + "grad_norm": 0.6534554362297058, + "learning_rate": 0.00045711214876796623, + "loss": 1.3626, + "step": 5987 + }, + { + "epoch": 0.6316455696202532, + "grad_norm": 0.8217992782592773, + "learning_rate": 0.0004568810407900112, + "loss": 1.3927, + "step": 5988 + }, + { + "epoch": 0.6317510548523206, + "grad_norm": 0.7489247918128967, + "learning_rate": 0.00045664996565749716, + "loss": 1.4017, + "step": 5989 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.7784095406532288, + "learning_rate": 0.00045641892339631703, + "loss": 1.378, + "step": 5990 + }, + { + "epoch": 0.6319620253164557, + "grad_norm": 0.6412631273269653, + "learning_rate": 0.0004561879140323607, + "loss": 1.3771, + "step": 5991 + }, + { + "epoch": 0.6320675105485232, + "grad_norm": 0.737859308719635, + "learning_rate": 0.0004559569375915137, + "loss": 1.3592, + "step": 5992 + }, + { + "epoch": 0.6321729957805907, + "grad_norm": 0.7737705111503601, + "learning_rate": 0.00045572599409965804, + "loss": 1.4401, + "step": 5993 + }, + { + "epoch": 0.6322784810126583, + "grad_norm": 0.8040459156036377, + "learning_rate": 0.00045549508358267224, + "loss": 1.3922, + "step": 5994 + }, + { + "epoch": 0.6323839662447257, + "grad_norm": 0.6814796328544617, + "learning_rate": 0.0004552642060664307, + "loss": 1.3483, + "step": 5995 + }, + { + "epoch": 0.6324894514767933, + "grad_norm": 0.7544494271278381, + "learning_rate": 0.00045503336157680466, + "loss": 1.3783, + "step": 5996 + }, + { + "epoch": 0.6325949367088608, + "grad_norm": 0.6829193830490112, + "learning_rate": 0.00045480255013966123, + "loss": 1.3792, + "step": 5997 + }, + { + "epoch": 0.6327004219409282, + "grad_norm": 0.780666172504425, + "learning_rate": 0.00045457177178086407, + "loss": 1.3702, + "step": 5998 + }, + { + "epoch": 0.6328059071729958, + "grad_norm": 0.7945857644081116, + "learning_rate": 0.0004543410265262727, + "loss": 1.3814, + "step": 5999 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.7124767303466797, + "learning_rate": 0.000454110314401744, + "loss": 1.3928, + "step": 6000 + }, + { + "epoch": 0.6330168776371308, + "grad_norm": 0.6688746809959412, + "learning_rate": 0.0004538796354331298, + "loss": 1.3395, + "step": 6001 + }, + { + "epoch": 0.6331223628691983, + "grad_norm": 0.6974355578422546, + "learning_rate": 0.0004536489896462792, + "loss": 1.3396, + "step": 6002 + }, + { + "epoch": 0.6332278481012659, + "grad_norm": 0.7234956622123718, + "learning_rate": 0.0004534183770670371, + "loss": 1.3739, + "step": 6003 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 0.6986801624298096, + "learning_rate": 0.0004531877977212446, + "loss": 1.4109, + "step": 6004 + }, + { + "epoch": 0.6334388185654009, + "grad_norm": 0.8240973353385925, + "learning_rate": 0.00045295725163473945, + "loss": 1.3691, + "step": 6005 + }, + { + "epoch": 0.6335443037974684, + "grad_norm": 0.7398058176040649, + "learning_rate": 0.0004527267388333555, + "loss": 1.3677, + "step": 6006 + }, + { + "epoch": 0.6336497890295358, + "grad_norm": 0.7428410649299622, + "learning_rate": 0.0004524962593429227, + "loss": 1.3688, + "step": 6007 + }, + { + "epoch": 0.6337552742616034, + "grad_norm": 0.6740618944168091, + "learning_rate": 0.00045226581318926737, + "loss": 1.378, + "step": 6008 + }, + { + "epoch": 0.6338607594936709, + "grad_norm": 0.7175842523574829, + "learning_rate": 0.0004520354003982125, + "loss": 1.393, + "step": 6009 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.7314635515213013, + "learning_rate": 0.00045180502099557686, + "loss": 1.3441, + "step": 6010 + }, + { + "epoch": 0.6340717299578059, + "grad_norm": 0.7323969006538391, + "learning_rate": 0.0004515746750071754, + "loss": 1.3473, + "step": 6011 + }, + { + "epoch": 0.6341772151898735, + "grad_norm": 0.9064114093780518, + "learning_rate": 0.00045134436245881986, + "loss": 1.3901, + "step": 6012 + }, + { + "epoch": 0.6342827004219409, + "grad_norm": 0.6674203276634216, + "learning_rate": 0.0004511140833763177, + "loss": 1.3837, + "step": 6013 + }, + { + "epoch": 0.6343881856540085, + "grad_norm": 0.8249063491821289, + "learning_rate": 0.00045088383778547284, + "loss": 1.3857, + "step": 6014 + }, + { + "epoch": 0.634493670886076, + "grad_norm": 0.663036584854126, + "learning_rate": 0.0004506536257120856, + "loss": 1.3736, + "step": 6015 + }, + { + "epoch": 0.6345991561181434, + "grad_norm": 0.8447476625442505, + "learning_rate": 0.0004504234471819518, + "loss": 1.3695, + "step": 6016 + }, + { + "epoch": 0.634704641350211, + "grad_norm": 0.6715084910392761, + "learning_rate": 0.0004501933022208649, + "loss": 1.3954, + "step": 6017 + }, + { + "epoch": 0.6348101265822785, + "grad_norm": 0.8944247364997864, + "learning_rate": 0.00044996319085461353, + "loss": 1.3408, + "step": 6018 + }, + { + "epoch": 0.634915611814346, + "grad_norm": 0.6567977070808411, + "learning_rate": 0.00044973311310898275, + "loss": 1.3431, + "step": 6019 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.7071217894554138, + "learning_rate": 0.00044950306900975377, + "loss": 1.3808, + "step": 6020 + }, + { + "epoch": 0.6351265822784811, + "grad_norm": 0.7975397706031799, + "learning_rate": 0.0004492730585827046, + "loss": 1.3408, + "step": 6021 + }, + { + "epoch": 0.6352320675105485, + "grad_norm": 0.6843515634536743, + "learning_rate": 0.0004490430818536085, + "loss": 1.3367, + "step": 6022 + }, + { + "epoch": 0.635337552742616, + "grad_norm": 0.8446115851402283, + "learning_rate": 0.0004488131388482359, + "loss": 1.3762, + "step": 6023 + }, + { + "epoch": 0.6354430379746835, + "grad_norm": 0.7431327700614929, + "learning_rate": 0.000448583229592353, + "loss": 1.3296, + "step": 6024 + }, + { + "epoch": 0.635548523206751, + "grad_norm": 0.7007524967193604, + "learning_rate": 0.0004483533541117218, + "loss": 1.3643, + "step": 6025 + }, + { + "epoch": 0.6356540084388186, + "grad_norm": 0.7635689377784729, + "learning_rate": 0.0004481235124321018, + "loss": 1.3556, + "step": 6026 + }, + { + "epoch": 0.635759493670886, + "grad_norm": 0.7473506927490234, + "learning_rate": 0.0004478937045792474, + "loss": 1.37, + "step": 6027 + }, + { + "epoch": 0.6358649789029536, + "grad_norm": 0.826234757900238, + "learning_rate": 0.00044766393057891, + "loss": 1.3805, + "step": 6028 + }, + { + "epoch": 0.6359704641350211, + "grad_norm": 0.9857551455497742, + "learning_rate": 0.00044743419045683674, + "loss": 1.342, + "step": 6029 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.7615806460380554, + "learning_rate": 0.00044720448423877113, + "loss": 1.3464, + "step": 6030 + }, + { + "epoch": 0.6361814345991561, + "grad_norm": 1.132103681564331, + "learning_rate": 0.0004469748119504529, + "loss": 1.4001, + "step": 6031 + }, + { + "epoch": 0.6362869198312237, + "grad_norm": 0.68757164478302, + "learning_rate": 0.000446745173617618, + "loss": 1.3716, + "step": 6032 + }, + { + "epoch": 0.6363924050632911, + "grad_norm": 0.8449698686599731, + "learning_rate": 0.00044651556926599863, + "loss": 1.3599, + "step": 6033 + }, + { + "epoch": 0.6364978902953586, + "grad_norm": 1.067588210105896, + "learning_rate": 0.0004462859989213227, + "loss": 1.3928, + "step": 6034 + }, + { + "epoch": 0.6366033755274262, + "grad_norm": 0.7575163841247559, + "learning_rate": 0.0004460564626093154, + "loss": 1.3733, + "step": 6035 + }, + { + "epoch": 0.6367088607594936, + "grad_norm": 0.9930274486541748, + "learning_rate": 0.00044582696035569695, + "loss": 1.3574, + "step": 6036 + }, + { + "epoch": 0.6368143459915612, + "grad_norm": 0.7205777764320374, + "learning_rate": 0.00044559749218618444, + "loss": 1.3927, + "step": 6037 + }, + { + "epoch": 0.6369198312236287, + "grad_norm": 0.8428406715393066, + "learning_rate": 0.0004453680581264908, + "loss": 1.3706, + "step": 6038 + }, + { + "epoch": 0.6370253164556962, + "grad_norm": 0.887904167175293, + "learning_rate": 0.00044513865820232525, + "loss": 1.3501, + "step": 6039 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.8534832000732422, + "learning_rate": 0.0004449092924393933, + "loss": 1.3866, + "step": 6040 + }, + { + "epoch": 0.6372362869198313, + "grad_norm": 0.6942139267921448, + "learning_rate": 0.0004446799608633964, + "loss": 1.3378, + "step": 6041 + }, + { + "epoch": 0.6373417721518987, + "grad_norm": 0.7484821081161499, + "learning_rate": 0.00044445066350003203, + "loss": 1.361, + "step": 6042 + }, + { + "epoch": 0.6374472573839662, + "grad_norm": 0.7633310556411743, + "learning_rate": 0.00044422140037499473, + "loss": 1.3551, + "step": 6043 + }, + { + "epoch": 0.6375527426160338, + "grad_norm": 0.7211193442344666, + "learning_rate": 0.0004439921715139743, + "loss": 1.3498, + "step": 6044 + }, + { + "epoch": 0.6376582278481012, + "grad_norm": 0.7762455940246582, + "learning_rate": 0.00044376297694265687, + "loss": 1.3513, + "step": 6045 + }, + { + "epoch": 0.6377637130801688, + "grad_norm": 0.7285361289978027, + "learning_rate": 0.000443533816686725, + "loss": 1.3448, + "step": 6046 + }, + { + "epoch": 0.6378691983122363, + "grad_norm": 0.7416214942932129, + "learning_rate": 0.0004433046907718571, + "loss": 1.3632, + "step": 6047 + }, + { + "epoch": 0.6379746835443038, + "grad_norm": 0.7205978631973267, + "learning_rate": 0.0004430755992237278, + "loss": 1.3783, + "step": 6048 + }, + { + "epoch": 0.6380801687763713, + "grad_norm": 0.6663371920585632, + "learning_rate": 0.00044284654206800826, + "loss": 1.3413, + "step": 6049 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.7947536110877991, + "learning_rate": 0.00044261751933036525, + "loss": 1.3538, + "step": 6050 + }, + { + "epoch": 0.6382911392405063, + "grad_norm": 0.743706226348877, + "learning_rate": 0.00044238853103646154, + "loss": 1.3656, + "step": 6051 + }, + { + "epoch": 0.6383966244725738, + "grad_norm": 0.7769945859909058, + "learning_rate": 0.0004421595772119573, + "loss": 1.4047, + "step": 6052 + }, + { + "epoch": 0.6385021097046414, + "grad_norm": 1.0796643495559692, + "learning_rate": 0.0004419306578825073, + "loss": 1.3428, + "step": 6053 + }, + { + "epoch": 0.6386075949367088, + "grad_norm": 0.793050229549408, + "learning_rate": 0.0004417017730737633, + "loss": 1.3943, + "step": 6054 + }, + { + "epoch": 0.6387130801687764, + "grad_norm": 1.1645010709762573, + "learning_rate": 0.00044147292281137293, + "loss": 1.3383, + "step": 6055 + }, + { + "epoch": 0.6388185654008439, + "grad_norm": 0.6350115537643433, + "learning_rate": 0.00044124410712098014, + "loss": 1.3459, + "step": 6056 + }, + { + "epoch": 0.6389240506329114, + "grad_norm": 1.0648895502090454, + "learning_rate": 0.0004410153260282246, + "loss": 1.3819, + "step": 6057 + }, + { + "epoch": 0.6390295358649789, + "grad_norm": 0.7532727718353271, + "learning_rate": 0.00044078657955874245, + "loss": 1.3806, + "step": 6058 + }, + { + "epoch": 0.6391350210970465, + "grad_norm": 0.8333994150161743, + "learning_rate": 0.0004405578677381661, + "loss": 1.3819, + "step": 6059 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 1.0569937229156494, + "learning_rate": 0.0004403291905921233, + "loss": 1.3932, + "step": 6060 + }, + { + "epoch": 0.6393459915611814, + "grad_norm": 0.8421439528465271, + "learning_rate": 0.00044010054814623925, + "loss": 1.3641, + "step": 6061 + }, + { + "epoch": 0.639451476793249, + "grad_norm": 0.8377203941345215, + "learning_rate": 0.00043987194042613393, + "loss": 1.3556, + "step": 6062 + }, + { + "epoch": 0.6395569620253164, + "grad_norm": 0.6552917957305908, + "learning_rate": 0.0004396433674574242, + "loss": 1.3498, + "step": 6063 + }, + { + "epoch": 0.639662447257384, + "grad_norm": 0.7409442663192749, + "learning_rate": 0.00043941482926572277, + "loss": 1.4172, + "step": 6064 + }, + { + "epoch": 0.6397679324894515, + "grad_norm": 0.7774975299835205, + "learning_rate": 0.0004391863258766384, + "loss": 1.3523, + "step": 6065 + }, + { + "epoch": 0.639873417721519, + "grad_norm": 0.7491084933280945, + "learning_rate": 0.00043895785731577606, + "loss": 1.3405, + "step": 6066 + }, + { + "epoch": 0.6399789029535865, + "grad_norm": 0.7161375880241394, + "learning_rate": 0.0004387294236087368, + "loss": 1.3545, + "step": 6067 + }, + { + "epoch": 0.640084388185654, + "grad_norm": 0.7113397121429443, + "learning_rate": 0.00043850102478111764, + "loss": 1.3451, + "step": 6068 + }, + { + "epoch": 0.6401898734177215, + "grad_norm": 0.773125946521759, + "learning_rate": 0.00043827266085851203, + "loss": 1.4079, + "step": 6069 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.7314280271530151, + "learning_rate": 0.00043804433186650916, + "loss": 1.3653, + "step": 6070 + }, + { + "epoch": 0.6404008438818566, + "grad_norm": 0.7585614919662476, + "learning_rate": 0.0004378160378306944, + "loss": 1.3619, + "step": 6071 + }, + { + "epoch": 0.640506329113924, + "grad_norm": 0.6510088443756104, + "learning_rate": 0.0004375877787766495, + "loss": 1.3626, + "step": 6072 + }, + { + "epoch": 0.6406118143459916, + "grad_norm": 0.696243405342102, + "learning_rate": 0.0004373595547299517, + "loss": 1.3868, + "step": 6073 + }, + { + "epoch": 0.6407172995780591, + "grad_norm": 0.7971287369728088, + "learning_rate": 0.00043713136571617474, + "loss": 1.3905, + "step": 6074 + }, + { + "epoch": 0.6408227848101266, + "grad_norm": 0.7655066847801208, + "learning_rate": 0.00043690321176088843, + "loss": 1.3368, + "step": 6075 + }, + { + "epoch": 0.6409282700421941, + "grad_norm": 0.7584236860275269, + "learning_rate": 0.00043667509288965845, + "loss": 1.3336, + "step": 6076 + }, + { + "epoch": 0.6410337552742617, + "grad_norm": 0.7590962648391724, + "learning_rate": 0.0004364470091280463, + "loss": 1.3637, + "step": 6077 + }, + { + "epoch": 0.6411392405063291, + "grad_norm": 0.6708555221557617, + "learning_rate": 0.0004362189605016107, + "loss": 1.3627, + "step": 6078 + }, + { + "epoch": 0.6412447257383966, + "grad_norm": 0.6844414472579956, + "learning_rate": 0.00043599094703590524, + "loss": 1.3879, + "step": 6079 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.7304306626319885, + "learning_rate": 0.00043576296875647984, + "loss": 1.3336, + "step": 6080 + }, + { + "epoch": 0.6414556962025316, + "grad_norm": 0.6880146861076355, + "learning_rate": 0.00043553502568888095, + "loss": 1.3502, + "step": 6081 + }, + { + "epoch": 0.6415611814345992, + "grad_norm": 0.6724720597267151, + "learning_rate": 0.00043530711785865026, + "loss": 1.3349, + "step": 6082 + }, + { + "epoch": 0.6416666666666667, + "grad_norm": 0.8720074892044067, + "learning_rate": 0.00043507924529132637, + "loss": 1.3989, + "step": 6083 + }, + { + "epoch": 0.6417721518987342, + "grad_norm": 0.6565287113189697, + "learning_rate": 0.0004348514080124432, + "loss": 1.3475, + "step": 6084 + }, + { + "epoch": 0.6418776371308017, + "grad_norm": 1.1213605403900146, + "learning_rate": 0.0004346236060475314, + "loss": 1.3545, + "step": 6085 + }, + { + "epoch": 0.6419831223628693, + "grad_norm": 0.6381583213806152, + "learning_rate": 0.00043439583942211674, + "loss": 1.3791, + "step": 6086 + }, + { + "epoch": 0.6420886075949367, + "grad_norm": 0.7049232125282288, + "learning_rate": 0.00043416810816172244, + "loss": 1.382, + "step": 6087 + }, + { + "epoch": 0.6421940928270042, + "grad_norm": 0.7069041728973389, + "learning_rate": 0.0004339404122918664, + "loss": 1.3396, + "step": 6088 + }, + { + "epoch": 0.6422995780590718, + "grad_norm": 0.6966137290000916, + "learning_rate": 0.0004337127518380632, + "loss": 1.3469, + "step": 6089 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.6557683348655701, + "learning_rate": 0.0004334851268258234, + "loss": 1.376, + "step": 6090 + }, + { + "epoch": 0.6425105485232068, + "grad_norm": 0.6763143539428711, + "learning_rate": 0.0004332575372806534, + "loss": 1.3884, + "step": 6091 + }, + { + "epoch": 0.6426160337552742, + "grad_norm": 0.7155510783195496, + "learning_rate": 0.00043302998322805564, + "loss": 1.3575, + "step": 6092 + }, + { + "epoch": 0.6427215189873418, + "grad_norm": 0.6809484958648682, + "learning_rate": 0.0004328024646935289, + "loss": 1.3683, + "step": 6093 + }, + { + "epoch": 0.6428270042194093, + "grad_norm": 0.6885896921157837, + "learning_rate": 0.00043257498170256735, + "loss": 1.3578, + "step": 6094 + }, + { + "epoch": 0.6429324894514767, + "grad_norm": 0.6756695508956909, + "learning_rate": 0.0004323475342806622, + "loss": 1.3226, + "step": 6095 + }, + { + "epoch": 0.6430379746835443, + "grad_norm": 0.7043845653533936, + "learning_rate": 0.00043212012245329986, + "loss": 1.3408, + "step": 6096 + }, + { + "epoch": 0.6431434599156118, + "grad_norm": 0.7019138336181641, + "learning_rate": 0.0004318927462459629, + "loss": 1.3411, + "step": 6097 + }, + { + "epoch": 0.6432489451476793, + "grad_norm": 0.9170623421669006, + "learning_rate": 0.0004316654056841299, + "loss": 1.3807, + "step": 6098 + }, + { + "epoch": 0.6433544303797468, + "grad_norm": 0.6596401333808899, + "learning_rate": 0.0004314381007932756, + "loss": 1.3332, + "step": 6099 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.7188556790351868, + "learning_rate": 0.00043121083159887056, + "loss": 1.3677, + "step": 6100 + }, + { + "epoch": 0.6435654008438818, + "grad_norm": 0.7053860425949097, + "learning_rate": 0.00043098359812638145, + "loss": 1.4, + "step": 6101 + }, + { + "epoch": 0.6436708860759494, + "grad_norm": 0.7250891327857971, + "learning_rate": 0.000430756400401271, + "loss": 1.3374, + "step": 6102 + }, + { + "epoch": 0.6437763713080169, + "grad_norm": 0.9319062829017639, + "learning_rate": 0.00043052923844899733, + "loss": 1.3517, + "step": 6103 + }, + { + "epoch": 0.6438818565400843, + "grad_norm": 0.6808270812034607, + "learning_rate": 0.000430302112295016, + "loss": 1.3686, + "step": 6104 + }, + { + "epoch": 0.6439873417721519, + "grad_norm": 0.7392550110816956, + "learning_rate": 0.00043007502196477703, + "loss": 1.3751, + "step": 6105 + }, + { + "epoch": 0.6440928270042194, + "grad_norm": 0.8509371280670166, + "learning_rate": 0.00042984796748372716, + "loss": 1.3696, + "step": 6106 + }, + { + "epoch": 0.6441983122362869, + "grad_norm": 0.6670138239860535, + "learning_rate": 0.000429620948877309, + "loss": 1.4034, + "step": 6107 + }, + { + "epoch": 0.6443037974683544, + "grad_norm": 0.7961129546165466, + "learning_rate": 0.000429393966170961, + "loss": 1.3643, + "step": 6108 + }, + { + "epoch": 0.644409282700422, + "grad_norm": 0.7220608592033386, + "learning_rate": 0.00042916701939011787, + "loss": 1.3839, + "step": 6109 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.6691188216209412, + "learning_rate": 0.00042894010856020997, + "loss": 1.3575, + "step": 6110 + }, + { + "epoch": 0.644620253164557, + "grad_norm": 1.0455830097198486, + "learning_rate": 0.00042871323370666383, + "loss": 1.3697, + "step": 6111 + }, + { + "epoch": 0.6447257383966245, + "grad_norm": 0.7328425645828247, + "learning_rate": 0.00042848639485490165, + "loss": 1.3402, + "step": 6112 + }, + { + "epoch": 0.6448312236286919, + "grad_norm": 0.7351486682891846, + "learning_rate": 0.0004282595920303425, + "loss": 1.3926, + "step": 6113 + }, + { + "epoch": 0.6449367088607595, + "grad_norm": 0.7091797590255737, + "learning_rate": 0.00042803282525840036, + "loss": 1.3584, + "step": 6114 + }, + { + "epoch": 0.645042194092827, + "grad_norm": 0.6599289774894714, + "learning_rate": 0.0004278060945644856, + "loss": 1.3664, + "step": 6115 + }, + { + "epoch": 0.6451476793248945, + "grad_norm": 0.9124534726142883, + "learning_rate": 0.0004275793999740046, + "loss": 1.3974, + "step": 6116 + }, + { + "epoch": 0.645253164556962, + "grad_norm": 0.6259053349494934, + "learning_rate": 0.00042735274151235953, + "loss": 1.347, + "step": 6117 + }, + { + "epoch": 0.6453586497890296, + "grad_norm": 0.8601148128509521, + "learning_rate": 0.00042712611920494865, + "loss": 1.3874, + "step": 6118 + }, + { + "epoch": 0.645464135021097, + "grad_norm": 0.6695570349693298, + "learning_rate": 0.0004268995330771661, + "loss": 1.3521, + "step": 6119 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.7895935773849487, + "learning_rate": 0.0004266729831544017, + "loss": 1.3334, + "step": 6120 + }, + { + "epoch": 0.6456751054852321, + "grad_norm": 0.7232438921928406, + "learning_rate": 0.0004264464694620421, + "loss": 1.3547, + "step": 6121 + }, + { + "epoch": 0.6457805907172995, + "grad_norm": 0.7047086954116821, + "learning_rate": 0.00042621999202546897, + "loss": 1.3816, + "step": 6122 + }, + { + "epoch": 0.6458860759493671, + "grad_norm": 0.701577365398407, + "learning_rate": 0.0004259935508700603, + "loss": 1.3637, + "step": 6123 + }, + { + "epoch": 0.6459915611814346, + "grad_norm": 0.73112553358078, + "learning_rate": 0.0004257671460211898, + "loss": 1.3663, + "step": 6124 + }, + { + "epoch": 0.6460970464135021, + "grad_norm": 0.6491398215293884, + "learning_rate": 0.00042554077750422736, + "loss": 1.3587, + "step": 6125 + }, + { + "epoch": 0.6462025316455696, + "grad_norm": 0.6762859225273132, + "learning_rate": 0.00042531444534453885, + "loss": 1.3553, + "step": 6126 + }, + { + "epoch": 0.6463080168776372, + "grad_norm": 0.7185043096542358, + "learning_rate": 0.0004250881495674855, + "loss": 1.3474, + "step": 6127 + }, + { + "epoch": 0.6464135021097046, + "grad_norm": 0.6532772779464722, + "learning_rate": 0.00042486189019842535, + "loss": 1.3429, + "step": 6128 + }, + { + "epoch": 0.6465189873417722, + "grad_norm": 0.6537446975708008, + "learning_rate": 0.00042463566726271137, + "loss": 1.3675, + "step": 6129 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.7064982652664185, + "learning_rate": 0.0004244094807856936, + "loss": 1.4024, + "step": 6130 + }, + { + "epoch": 0.6467299578059071, + "grad_norm": 0.7440698146820068, + "learning_rate": 0.000424183330792717, + "loss": 1.4001, + "step": 6131 + }, + { + "epoch": 0.6468354430379747, + "grad_norm": 0.6495948433876038, + "learning_rate": 0.0004239572173091229, + "loss": 1.3777, + "step": 6132 + }, + { + "epoch": 0.6469409282700422, + "grad_norm": 0.7299064993858337, + "learning_rate": 0.0004237311403602484, + "loss": 1.3513, + "step": 6133 + }, + { + "epoch": 0.6470464135021097, + "grad_norm": 0.7585771083831787, + "learning_rate": 0.0004235050999714265, + "loss": 1.3757, + "step": 6134 + }, + { + "epoch": 0.6471518987341772, + "grad_norm": 0.6603680849075317, + "learning_rate": 0.00042327909616798616, + "loss": 1.3284, + "step": 6135 + }, + { + "epoch": 0.6472573839662448, + "grad_norm": 0.8091616034507751, + "learning_rate": 0.0004230531289752523, + "loss": 1.3862, + "step": 6136 + }, + { + "epoch": 0.6473628691983122, + "grad_norm": 0.7475970983505249, + "learning_rate": 0.00042282719841854567, + "loss": 1.3652, + "step": 6137 + }, + { + "epoch": 0.6474683544303798, + "grad_norm": 0.7008522748947144, + "learning_rate": 0.0004226013045231826, + "loss": 1.3726, + "step": 6138 + }, + { + "epoch": 0.6475738396624473, + "grad_norm": 0.6937210559844971, + "learning_rate": 0.00042237544731447616, + "loss": 1.3717, + "step": 6139 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.8740338087081909, + "learning_rate": 0.00042214962681773457, + "loss": 1.297, + "step": 6140 + }, + { + "epoch": 0.6477848101265823, + "grad_norm": 0.749339759349823, + "learning_rate": 0.0004219238430582621, + "loss": 1.3763, + "step": 6141 + }, + { + "epoch": 0.6478902953586498, + "grad_norm": 0.8170101046562195, + "learning_rate": 0.00042169809606135893, + "loss": 1.3412, + "step": 6142 + }, + { + "epoch": 0.6479957805907173, + "grad_norm": 0.6795600056648254, + "learning_rate": 0.0004214723858523212, + "loss": 1.3434, + "step": 6143 + }, + { + "epoch": 0.6481012658227848, + "grad_norm": 0.7469543814659119, + "learning_rate": 0.00042124671245644086, + "loss": 1.3839, + "step": 6144 + }, + { + "epoch": 0.6482067510548524, + "grad_norm": 0.726058840751648, + "learning_rate": 0.0004210210758990056, + "loss": 1.3906, + "step": 6145 + }, + { + "epoch": 0.6483122362869198, + "grad_norm": 0.8720397353172302, + "learning_rate": 0.00042079547620529927, + "loss": 1.3694, + "step": 6146 + }, + { + "epoch": 0.6484177215189874, + "grad_norm": 0.7082130312919617, + "learning_rate": 0.0004205699134006011, + "loss": 1.3821, + "step": 6147 + }, + { + "epoch": 0.6485232067510549, + "grad_norm": 0.6713295578956604, + "learning_rate": 0.0004203443875101871, + "loss": 1.355, + "step": 6148 + }, + { + "epoch": 0.6486286919831223, + "grad_norm": 0.7958627343177795, + "learning_rate": 0.0004201188985593283, + "loss": 1.3822, + "step": 6149 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.6922683715820312, + "learning_rate": 0.00041989344657329187, + "loss": 1.4044, + "step": 6150 + }, + { + "epoch": 0.6488396624472574, + "grad_norm": 0.6796203851699829, + "learning_rate": 0.0004196680315773408, + "loss": 1.3694, + "step": 6151 + }, + { + "epoch": 0.6489451476793249, + "grad_norm": 0.8616799712181091, + "learning_rate": 0.0004194426535967339, + "loss": 1.3952, + "step": 6152 + }, + { + "epoch": 0.6490506329113924, + "grad_norm": 0.6798238158226013, + "learning_rate": 0.00041921731265672613, + "loss": 1.356, + "step": 6153 + }, + { + "epoch": 0.64915611814346, + "grad_norm": 0.8699831366539001, + "learning_rate": 0.0004189920087825678, + "loss": 1.3506, + "step": 6154 + }, + { + "epoch": 0.6492616033755274, + "grad_norm": 0.7417158484458923, + "learning_rate": 0.00041876674199950545, + "loss": 1.3689, + "step": 6155 + }, + { + "epoch": 0.649367088607595, + "grad_norm": 0.9127528667449951, + "learning_rate": 0.0004185415123327813, + "loss": 1.3412, + "step": 6156 + }, + { + "epoch": 0.6494725738396624, + "grad_norm": 0.6490997076034546, + "learning_rate": 0.00041831631980763324, + "loss": 1.3638, + "step": 6157 + }, + { + "epoch": 0.6495780590717299, + "grad_norm": 0.7380533218383789, + "learning_rate": 0.00041809116444929586, + "loss": 1.3954, + "step": 6158 + }, + { + "epoch": 0.6496835443037975, + "grad_norm": 0.805001974105835, + "learning_rate": 0.00041786604628299846, + "loss": 1.3293, + "step": 6159 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.745599091053009, + "learning_rate": 0.00041764096533396667, + "loss": 1.3893, + "step": 6160 + }, + { + "epoch": 0.6498945147679325, + "grad_norm": 0.9029660224914551, + "learning_rate": 0.00041741592162742214, + "loss": 1.364, + "step": 6161 + }, + { + "epoch": 0.65, + "grad_norm": 0.6792778372764587, + "learning_rate": 0.0004171909151885819, + "loss": 1.3424, + "step": 6162 + }, + { + "epoch": 0.6501054852320675, + "grad_norm": 1.1244703531265259, + "learning_rate": 0.0004169659460426592, + "loss": 1.3042, + "step": 6163 + }, + { + "epoch": 0.650210970464135, + "grad_norm": 0.695449709892273, + "learning_rate": 0.00041674101421486294, + "loss": 1.3245, + "step": 6164 + }, + { + "epoch": 0.6503164556962026, + "grad_norm": 0.8482882976531982, + "learning_rate": 0.00041651611973039776, + "loss": 1.3572, + "step": 6165 + }, + { + "epoch": 0.65042194092827, + "grad_norm": 0.7231329083442688, + "learning_rate": 0.0004162912626144642, + "loss": 1.3495, + "step": 6166 + }, + { + "epoch": 0.6505274261603375, + "grad_norm": 0.7173487544059753, + "learning_rate": 0.0004160664428922586, + "loss": 1.3348, + "step": 6167 + }, + { + "epoch": 0.6506329113924051, + "grad_norm": 0.6722855567932129, + "learning_rate": 0.00041584166058897324, + "loss": 1.3416, + "step": 6168 + }, + { + "epoch": 0.6507383966244725, + "grad_norm": 0.7109314799308777, + "learning_rate": 0.00041561691572979624, + "loss": 1.3906, + "step": 6169 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.6344064474105835, + "learning_rate": 0.00041539220833991124, + "loss": 1.3374, + "step": 6170 + }, + { + "epoch": 0.6509493670886076, + "grad_norm": 0.6740775108337402, + "learning_rate": 0.0004151675384444978, + "loss": 1.3567, + "step": 6171 + }, + { + "epoch": 0.6510548523206751, + "grad_norm": 0.7819066047668457, + "learning_rate": 0.0004149429060687312, + "loss": 1.3336, + "step": 6172 + }, + { + "epoch": 0.6511603375527426, + "grad_norm": 0.8305350542068481, + "learning_rate": 0.00041471831123778284, + "loss": 1.378, + "step": 6173 + }, + { + "epoch": 0.6512658227848102, + "grad_norm": 0.6637952327728271, + "learning_rate": 0.0004144937539768195, + "loss": 1.3843, + "step": 6174 + }, + { + "epoch": 0.6513713080168776, + "grad_norm": 0.6661664247512817, + "learning_rate": 0.00041426923431100396, + "loss": 1.3466, + "step": 6175 + }, + { + "epoch": 0.6514767932489451, + "grad_norm": 0.6549107432365417, + "learning_rate": 0.0004140447522654946, + "loss": 1.3745, + "step": 6176 + }, + { + "epoch": 0.6515822784810127, + "grad_norm": 0.6381188631057739, + "learning_rate": 0.0004138203078654463, + "loss": 1.3739, + "step": 6177 + }, + { + "epoch": 0.6516877637130801, + "grad_norm": 0.6520124077796936, + "learning_rate": 0.0004135959011360088, + "loss": 1.3622, + "step": 6178 + }, + { + "epoch": 0.6517932489451477, + "grad_norm": 0.7173768877983093, + "learning_rate": 0.000413371532102328, + "loss": 1.3808, + "step": 6179 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.6609131097793579, + "learning_rate": 0.0004131472007895457, + "loss": 1.3681, + "step": 6180 + }, + { + "epoch": 0.6520042194092827, + "grad_norm": 0.7669507265090942, + "learning_rate": 0.00041292290722279914, + "loss": 1.3915, + "step": 6181 + }, + { + "epoch": 0.6521097046413502, + "grad_norm": 0.679756224155426, + "learning_rate": 0.00041269865142722176, + "loss": 1.379, + "step": 6182 + }, + { + "epoch": 0.6522151898734178, + "grad_norm": 0.836276650428772, + "learning_rate": 0.0004124744334279424, + "loss": 1.3651, + "step": 6183 + }, + { + "epoch": 0.6523206751054852, + "grad_norm": 0.6737832427024841, + "learning_rate": 0.0004122502532500858, + "loss": 1.3699, + "step": 6184 + }, + { + "epoch": 0.6524261603375527, + "grad_norm": 0.652202844619751, + "learning_rate": 0.0004120261109187724, + "loss": 1.3662, + "step": 6185 + }, + { + "epoch": 0.6525316455696203, + "grad_norm": 0.7591854929924011, + "learning_rate": 0.0004118020064591184, + "loss": 1.3819, + "step": 6186 + }, + { + "epoch": 0.6526371308016877, + "grad_norm": 0.6974359750747681, + "learning_rate": 0.00041157793989623625, + "loss": 1.3407, + "step": 6187 + }, + { + "epoch": 0.6527426160337553, + "grad_norm": 0.838176429271698, + "learning_rate": 0.0004113539112552334, + "loss": 1.3546, + "step": 6188 + }, + { + "epoch": 0.6528481012658228, + "grad_norm": 0.674755334854126, + "learning_rate": 0.0004111299205612135, + "loss": 1.3375, + "step": 6189 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.6827499270439148, + "learning_rate": 0.00041090596783927583, + "loss": 1.3897, + "step": 6190 + }, + { + "epoch": 0.6530590717299578, + "grad_norm": 0.7276760339736938, + "learning_rate": 0.00041068205311451517, + "loss": 1.3844, + "step": 6191 + }, + { + "epoch": 0.6531645569620254, + "grad_norm": 0.7128865718841553, + "learning_rate": 0.00041045817641202257, + "loss": 1.3353, + "step": 6192 + }, + { + "epoch": 0.6532700421940928, + "grad_norm": 0.7654020190238953, + "learning_rate": 0.00041023433775688435, + "loss": 1.3664, + "step": 6193 + }, + { + "epoch": 0.6533755274261603, + "grad_norm": 0.8867407441139221, + "learning_rate": 0.00041001053717418283, + "loss": 1.3282, + "step": 6194 + }, + { + "epoch": 0.6534810126582279, + "grad_norm": 0.6925840377807617, + "learning_rate": 0.000409786774688996, + "loss": 1.3624, + "step": 6195 + }, + { + "epoch": 0.6535864978902953, + "grad_norm": 0.7282606959342957, + "learning_rate": 0.00040956305032639723, + "loss": 1.3429, + "step": 6196 + }, + { + "epoch": 0.6536919831223629, + "grad_norm": 0.816646933555603, + "learning_rate": 0.0004093393641114565, + "loss": 1.358, + "step": 6197 + }, + { + "epoch": 0.6537974683544304, + "grad_norm": 0.6523333787918091, + "learning_rate": 0.00040911571606923867, + "loss": 1.3773, + "step": 6198 + }, + { + "epoch": 0.6539029535864979, + "grad_norm": 0.8620696663856506, + "learning_rate": 0.00040889210622480467, + "loss": 1.3941, + "step": 6199 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.6655833125114441, + "learning_rate": 0.0004086685346032111, + "loss": 1.3703, + "step": 6200 + }, + { + "epoch": 0.654113924050633, + "grad_norm": 0.7278186082839966, + "learning_rate": 0.00040844500122951026, + "loss": 1.3351, + "step": 6201 + }, + { + "epoch": 0.6542194092827004, + "grad_norm": 0.6669450998306274, + "learning_rate": 0.0004082215061287502, + "loss": 1.3594, + "step": 6202 + }, + { + "epoch": 0.6543248945147679, + "grad_norm": 0.8795672655105591, + "learning_rate": 0.00040799804932597464, + "loss": 1.3455, + "step": 6203 + }, + { + "epoch": 0.6544303797468355, + "grad_norm": 0.741908609867096, + "learning_rate": 0.00040777463084622304, + "loss": 1.3602, + "step": 6204 + }, + { + "epoch": 0.6545358649789029, + "grad_norm": 0.8381674885749817, + "learning_rate": 0.00040755125071453055, + "loss": 1.3595, + "step": 6205 + }, + { + "epoch": 0.6546413502109705, + "grad_norm": 0.6451119780540466, + "learning_rate": 0.00040732790895592764, + "loss": 1.3807, + "step": 6206 + }, + { + "epoch": 0.654746835443038, + "grad_norm": 0.7766493558883667, + "learning_rate": 0.00040710460559544167, + "loss": 1.3751, + "step": 6207 + }, + { + "epoch": 0.6548523206751055, + "grad_norm": 0.9080121517181396, + "learning_rate": 0.0004068813406580944, + "loss": 1.3663, + "step": 6208 + }, + { + "epoch": 0.654957805907173, + "grad_norm": 0.6915358901023865, + "learning_rate": 0.0004066581141689038, + "loss": 1.3555, + "step": 6209 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.8747574090957642, + "learning_rate": 0.00040643492615288367, + "loss": 1.35, + "step": 6210 + }, + { + "epoch": 0.655168776371308, + "grad_norm": 0.8679912090301514, + "learning_rate": 0.00040621177663504313, + "loss": 1.3723, + "step": 6211 + }, + { + "epoch": 0.6552742616033755, + "grad_norm": 0.8027997016906738, + "learning_rate": 0.0004059886656403874, + "loss": 1.3607, + "step": 6212 + }, + { + "epoch": 0.6553797468354431, + "grad_norm": 0.8875987529754639, + "learning_rate": 0.00040576559319391704, + "loss": 1.3699, + "step": 6213 + }, + { + "epoch": 0.6554852320675105, + "grad_norm": 0.6871267557144165, + "learning_rate": 0.0004055425593206285, + "loss": 1.344, + "step": 6214 + }, + { + "epoch": 0.6555907172995781, + "grad_norm": 0.7317743897438049, + "learning_rate": 0.0004053195640455137, + "loss": 1.3821, + "step": 6215 + }, + { + "epoch": 0.6556962025316456, + "grad_norm": 0.8504091501235962, + "learning_rate": 0.0004050966073935602, + "loss": 1.3942, + "step": 6216 + }, + { + "epoch": 0.6558016877637131, + "grad_norm": 0.6986285448074341, + "learning_rate": 0.00040487368938975214, + "loss": 1.3773, + "step": 6217 + }, + { + "epoch": 0.6559071729957806, + "grad_norm": 0.7912895679473877, + "learning_rate": 0.00040465081005906805, + "loss": 1.3799, + "step": 6218 + }, + { + "epoch": 0.6560126582278482, + "grad_norm": 0.7776387333869934, + "learning_rate": 0.00040442796942648273, + "loss": 1.3454, + "step": 6219 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.7725447416305542, + "learning_rate": 0.00040420516751696664, + "loss": 1.3406, + "step": 6220 + }, + { + "epoch": 0.6562236286919831, + "grad_norm": 0.8972671627998352, + "learning_rate": 0.00040398240435548583, + "loss": 1.3643, + "step": 6221 + }, + { + "epoch": 0.6563291139240506, + "grad_norm": 0.7044982314109802, + "learning_rate": 0.000403759679967002, + "loss": 1.3256, + "step": 6222 + }, + { + "epoch": 0.6564345991561181, + "grad_norm": 0.810449481010437, + "learning_rate": 0.00040353699437647257, + "loss": 1.3532, + "step": 6223 + }, + { + "epoch": 0.6565400843881857, + "grad_norm": 0.7202931642532349, + "learning_rate": 0.0004033143476088504, + "loss": 1.3463, + "step": 6224 + }, + { + "epoch": 0.6566455696202531, + "grad_norm": 0.8220978379249573, + "learning_rate": 0.00040309173968908413, + "loss": 1.3538, + "step": 6225 + }, + { + "epoch": 0.6567510548523207, + "grad_norm": 0.6816374659538269, + "learning_rate": 0.0004028691706421185, + "loss": 1.3714, + "step": 6226 + }, + { + "epoch": 0.6568565400843882, + "grad_norm": 0.6842511296272278, + "learning_rate": 0.00040264664049289336, + "loss": 1.354, + "step": 6227 + }, + { + "epoch": 0.6569620253164556, + "grad_norm": 0.7109050154685974, + "learning_rate": 0.00040242414926634415, + "loss": 1.3501, + "step": 6228 + }, + { + "epoch": 0.6570675105485232, + "grad_norm": 0.7109183669090271, + "learning_rate": 0.0004022016969874023, + "loss": 1.3524, + "step": 6229 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.6633743047714233, + "learning_rate": 0.00040197928368099445, + "loss": 1.3613, + "step": 6230 + }, + { + "epoch": 0.6572784810126582, + "grad_norm": 0.810761570930481, + "learning_rate": 0.00040175690937204324, + "loss": 1.3555, + "step": 6231 + }, + { + "epoch": 0.6573839662447257, + "grad_norm": 0.7341049313545227, + "learning_rate": 0.0004015345740854668, + "loss": 1.3695, + "step": 6232 + }, + { + "epoch": 0.6574894514767933, + "grad_norm": 0.7370947003364563, + "learning_rate": 0.00040131227784617876, + "loss": 1.3904, + "step": 6233 + }, + { + "epoch": 0.6575949367088607, + "grad_norm": 0.6880258321762085, + "learning_rate": 0.000401090020679089, + "loss": 1.3841, + "step": 6234 + }, + { + "epoch": 0.6577004219409283, + "grad_norm": 0.6626034379005432, + "learning_rate": 0.00040086780260910213, + "loss": 1.368, + "step": 6235 + }, + { + "epoch": 0.6578059071729958, + "grad_norm": 0.7487751245498657, + "learning_rate": 0.000400645623661119, + "loss": 1.3433, + "step": 6236 + }, + { + "epoch": 0.6579113924050632, + "grad_norm": 0.6526100039482117, + "learning_rate": 0.0004004234838600357, + "loss": 1.3336, + "step": 6237 + }, + { + "epoch": 0.6580168776371308, + "grad_norm": 0.8297781348228455, + "learning_rate": 0.00040020138323074427, + "loss": 1.3527, + "step": 6238 + }, + { + "epoch": 0.6581223628691983, + "grad_norm": 0.6211318373680115, + "learning_rate": 0.00039997932179813205, + "loss": 1.3345, + "step": 6239 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.8651831746101379, + "learning_rate": 0.00039975729958708223, + "loss": 1.3494, + "step": 6240 + }, + { + "epoch": 0.6583333333333333, + "grad_norm": 0.7648287415504456, + "learning_rate": 0.00039953531662247343, + "loss": 1.3483, + "step": 6241 + }, + { + "epoch": 0.6584388185654009, + "grad_norm": 0.7461156249046326, + "learning_rate": 0.00039931337292917966, + "loss": 1.3657, + "step": 6242 + }, + { + "epoch": 0.6585443037974683, + "grad_norm": 0.8753233551979065, + "learning_rate": 0.0003990914685320714, + "loss": 1.3978, + "step": 6243 + }, + { + "epoch": 0.6586497890295359, + "grad_norm": 0.7193900942802429, + "learning_rate": 0.00039886960345601394, + "loss": 1.3563, + "step": 6244 + }, + { + "epoch": 0.6587552742616034, + "grad_norm": 0.9479084610939026, + "learning_rate": 0.00039864777772586826, + "loss": 1.3477, + "step": 6245 + }, + { + "epoch": 0.6588607594936708, + "grad_norm": 0.7073331475257874, + "learning_rate": 0.00039842599136649117, + "loss": 1.3406, + "step": 6246 + }, + { + "epoch": 0.6589662447257384, + "grad_norm": 0.8952034115791321, + "learning_rate": 0.00039820424440273474, + "loss": 1.3734, + "step": 6247 + }, + { + "epoch": 0.6590717299578059, + "grad_norm": 0.8614504337310791, + "learning_rate": 0.000397982536859447, + "loss": 1.3461, + "step": 6248 + }, + { + "epoch": 0.6591772151898734, + "grad_norm": 0.6870449781417847, + "learning_rate": 0.00039776086876147133, + "loss": 1.3924, + "step": 6249 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 1.0891315937042236, + "learning_rate": 0.0003975392401336468, + "loss": 1.3961, + "step": 6250 + }, + { + "epoch": 0.6593881856540085, + "grad_norm": 0.963509202003479, + "learning_rate": 0.0003973176510008075, + "loss": 1.3466, + "step": 6251 + }, + { + "epoch": 0.6594936708860759, + "grad_norm": 1.005608320236206, + "learning_rate": 0.00039709610138778445, + "loss": 1.3616, + "step": 6252 + }, + { + "epoch": 0.6595991561181435, + "grad_norm": 0.9407158493995667, + "learning_rate": 0.0003968745913194029, + "loss": 1.34, + "step": 6253 + }, + { + "epoch": 0.659704641350211, + "grad_norm": 0.6797810792922974, + "learning_rate": 0.0003966531208204842, + "loss": 1.3674, + "step": 6254 + }, + { + "epoch": 0.6598101265822784, + "grad_norm": 0.9336441159248352, + "learning_rate": 0.0003964316899158454, + "loss": 1.375, + "step": 6255 + }, + { + "epoch": 0.659915611814346, + "grad_norm": 0.8333449959754944, + "learning_rate": 0.00039621029863029874, + "loss": 1.3522, + "step": 6256 + }, + { + "epoch": 0.6600210970464135, + "grad_norm": 0.7301167845726013, + "learning_rate": 0.00039598894698865216, + "loss": 1.3201, + "step": 6257 + }, + { + "epoch": 0.660126582278481, + "grad_norm": 0.7733619213104248, + "learning_rate": 0.00039576763501570944, + "loss": 1.3724, + "step": 6258 + }, + { + "epoch": 0.6602320675105485, + "grad_norm": 0.7313828468322754, + "learning_rate": 0.0003955463627362694, + "loss": 1.3715, + "step": 6259 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.7480108141899109, + "learning_rate": 0.00039532513017512694, + "loss": 1.374, + "step": 6260 + }, + { + "epoch": 0.6604430379746835, + "grad_norm": 0.6372901797294617, + "learning_rate": 0.00039510393735707233, + "loss": 1.3706, + "step": 6261 + }, + { + "epoch": 0.6605485232067511, + "grad_norm": 0.6914093494415283, + "learning_rate": 0.00039488278430689123, + "loss": 1.3735, + "step": 6262 + }, + { + "epoch": 0.6606540084388186, + "grad_norm": 0.6925854086875916, + "learning_rate": 0.0003946616710493649, + "loss": 1.3262, + "step": 6263 + }, + { + "epoch": 0.660759493670886, + "grad_norm": 0.7067632675170898, + "learning_rate": 0.0003944405976092702, + "loss": 1.4095, + "step": 6264 + }, + { + "epoch": 0.6608649789029536, + "grad_norm": 0.650996744632721, + "learning_rate": 0.0003942195640113795, + "loss": 1.3661, + "step": 6265 + }, + { + "epoch": 0.6609704641350211, + "grad_norm": 0.7077556848526001, + "learning_rate": 0.00039399857028046066, + "loss": 1.3669, + "step": 6266 + }, + { + "epoch": 0.6610759493670886, + "grad_norm": 0.6427075266838074, + "learning_rate": 0.0003937776164412773, + "loss": 1.3953, + "step": 6267 + }, + { + "epoch": 0.6611814345991561, + "grad_norm": 0.6813622117042542, + "learning_rate": 0.00039355670251858805, + "loss": 1.3415, + "step": 6268 + }, + { + "epoch": 0.6612869198312237, + "grad_norm": 0.629961371421814, + "learning_rate": 0.00039333582853714793, + "loss": 1.3878, + "step": 6269 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.6941843032836914, + "learning_rate": 0.00039311499452170665, + "loss": 1.3256, + "step": 6270 + }, + { + "epoch": 0.6614978902953587, + "grad_norm": 0.6935383081436157, + "learning_rate": 0.00039289420049700986, + "loss": 1.3442, + "step": 6271 + }, + { + "epoch": 0.6616033755274262, + "grad_norm": 0.669829249382019, + "learning_rate": 0.0003926734464877986, + "loss": 1.3709, + "step": 6272 + }, + { + "epoch": 0.6617088607594936, + "grad_norm": 0.9144964814186096, + "learning_rate": 0.0003924527325188095, + "loss": 1.4044, + "step": 6273 + }, + { + "epoch": 0.6618143459915612, + "grad_norm": 0.6605710387229919, + "learning_rate": 0.00039223205861477455, + "loss": 1.3566, + "step": 6274 + }, + { + "epoch": 0.6619198312236287, + "grad_norm": 0.6764668226242065, + "learning_rate": 0.00039201142480042145, + "loss": 1.3687, + "step": 6275 + }, + { + "epoch": 0.6620253164556962, + "grad_norm": 0.6919807195663452, + "learning_rate": 0.0003917908311004732, + "loss": 1.3624, + "step": 6276 + }, + { + "epoch": 0.6621308016877637, + "grad_norm": 0.6691452264785767, + "learning_rate": 0.0003915702775396483, + "loss": 1.3417, + "step": 6277 + }, + { + "epoch": 0.6622362869198313, + "grad_norm": 0.6806024312973022, + "learning_rate": 0.0003913497641426614, + "loss": 1.3791, + "step": 6278 + }, + { + "epoch": 0.6623417721518987, + "grad_norm": 0.7281175851821899, + "learning_rate": 0.00039112929093422185, + "loss": 1.3535, + "step": 6279 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.7128005027770996, + "learning_rate": 0.0003909088579390347, + "loss": 1.37, + "step": 6280 + }, + { + "epoch": 0.6625527426160338, + "grad_norm": 0.6466816663742065, + "learning_rate": 0.0003906884651818006, + "loss": 1.3092, + "step": 6281 + }, + { + "epoch": 0.6626582278481012, + "grad_norm": 0.7283273935317993, + "learning_rate": 0.0003904681126872157, + "loss": 1.3507, + "step": 6282 + }, + { + "epoch": 0.6627637130801688, + "grad_norm": 0.6789945960044861, + "learning_rate": 0.00039024780047997157, + "loss": 1.3462, + "step": 6283 + }, + { + "epoch": 0.6628691983122363, + "grad_norm": 0.7066200971603394, + "learning_rate": 0.00039002752858475527, + "loss": 1.3911, + "step": 6284 + }, + { + "epoch": 0.6629746835443038, + "grad_norm": 0.7964605689048767, + "learning_rate": 0.00038980729702624896, + "loss": 1.3822, + "step": 6285 + }, + { + "epoch": 0.6630801687763713, + "grad_norm": 0.7009385228157043, + "learning_rate": 0.00038958710582913153, + "loss": 1.3675, + "step": 6286 + }, + { + "epoch": 0.6631856540084389, + "grad_norm": 0.759072482585907, + "learning_rate": 0.0003893669550180761, + "loss": 1.3741, + "step": 6287 + }, + { + "epoch": 0.6632911392405063, + "grad_norm": 0.6864868998527527, + "learning_rate": 0.00038914684461775154, + "loss": 1.3867, + "step": 6288 + }, + { + "epoch": 0.6633966244725739, + "grad_norm": 0.7868582010269165, + "learning_rate": 0.0003889267746528225, + "loss": 1.3701, + "step": 6289 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.8487674593925476, + "learning_rate": 0.00038870674514794877, + "loss": 1.3388, + "step": 6290 + }, + { + "epoch": 0.6636075949367088, + "grad_norm": 0.7984734177589417, + "learning_rate": 0.00038848675612778577, + "loss": 1.3237, + "step": 6291 + }, + { + "epoch": 0.6637130801687764, + "grad_norm": 1.0030255317687988, + "learning_rate": 0.0003882668076169846, + "loss": 1.3787, + "step": 6292 + }, + { + "epoch": 0.6638185654008438, + "grad_norm": 0.674531102180481, + "learning_rate": 0.0003880468996401912, + "loss": 1.3636, + "step": 6293 + }, + { + "epoch": 0.6639240506329114, + "grad_norm": 0.8094853162765503, + "learning_rate": 0.0003878270322220474, + "loss": 1.3736, + "step": 6294 + }, + { + "epoch": 0.6640295358649789, + "grad_norm": 1.0017973184585571, + "learning_rate": 0.00038760720538719086, + "loss": 1.3565, + "step": 6295 + }, + { + "epoch": 0.6641350210970464, + "grad_norm": 0.7283895015716553, + "learning_rate": 0.0003873874191602539, + "loss": 1.3626, + "step": 6296 + }, + { + "epoch": 0.6642405063291139, + "grad_norm": 1.0529190301895142, + "learning_rate": 0.00038716767356586487, + "loss": 1.3628, + "step": 6297 + }, + { + "epoch": 0.6643459915611815, + "grad_norm": 0.6593754887580872, + "learning_rate": 0.00038694796862864724, + "loss": 1.3693, + "step": 6298 + }, + { + "epoch": 0.6644514767932489, + "grad_norm": 0.8029528856277466, + "learning_rate": 0.00038672830437322007, + "loss": 1.3325, + "step": 6299 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.6743192076683044, + "learning_rate": 0.0003865086808241979, + "loss": 1.3256, + "step": 6300 + }, + { + "epoch": 0.664662447257384, + "grad_norm": 0.7576407194137573, + "learning_rate": 0.00038628909800619046, + "loss": 1.3583, + "step": 6301 + }, + { + "epoch": 0.6647679324894514, + "grad_norm": 0.7714362144470215, + "learning_rate": 0.00038606955594380326, + "loss": 1.3432, + "step": 6302 + }, + { + "epoch": 0.664873417721519, + "grad_norm": 0.6552845239639282, + "learning_rate": 0.0003858500546616368, + "loss": 1.3243, + "step": 6303 + }, + { + "epoch": 0.6649789029535865, + "grad_norm": 0.7238420248031616, + "learning_rate": 0.0003856305941842878, + "loss": 1.3478, + "step": 6304 + }, + { + "epoch": 0.665084388185654, + "grad_norm": 0.6692920327186584, + "learning_rate": 0.0003854111745363476, + "loss": 1.3507, + "step": 6305 + }, + { + "epoch": 0.6651898734177215, + "grad_norm": 0.6756670475006104, + "learning_rate": 0.00038519179574240324, + "loss": 1.3374, + "step": 6306 + }, + { + "epoch": 0.6652953586497891, + "grad_norm": 0.7262660264968872, + "learning_rate": 0.0003849724578270374, + "loss": 1.3895, + "step": 6307 + }, + { + "epoch": 0.6654008438818565, + "grad_norm": 0.6948968768119812, + "learning_rate": 0.0003847531608148277, + "loss": 1.37, + "step": 6308 + }, + { + "epoch": 0.665506329113924, + "grad_norm": 0.6739389300346375, + "learning_rate": 0.0003845339047303477, + "loss": 1.3413, + "step": 6309 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.7137086391448975, + "learning_rate": 0.0003843146895981661, + "loss": 1.375, + "step": 6310 + }, + { + "epoch": 0.665717299578059, + "grad_norm": 0.7343080639839172, + "learning_rate": 0.0003840955154428467, + "loss": 1.3769, + "step": 6311 + }, + { + "epoch": 0.6658227848101266, + "grad_norm": 0.7091630697250366, + "learning_rate": 0.0003838763822889495, + "loss": 1.3184, + "step": 6312 + }, + { + "epoch": 0.6659282700421941, + "grad_norm": 0.9599053859710693, + "learning_rate": 0.0003836572901610295, + "loss": 1.3586, + "step": 6313 + }, + { + "epoch": 0.6660337552742616, + "grad_norm": 0.8094688653945923, + "learning_rate": 0.0003834382390836368, + "loss": 1.3306, + "step": 6314 + }, + { + "epoch": 0.6661392405063291, + "grad_norm": 0.9883232116699219, + "learning_rate": 0.00038321922908131736, + "loss": 1.334, + "step": 6315 + }, + { + "epoch": 0.6662447257383967, + "grad_norm": 0.9510544538497925, + "learning_rate": 0.0003830002601786121, + "loss": 1.3514, + "step": 6316 + }, + { + "epoch": 0.6663502109704641, + "grad_norm": 0.7265557646751404, + "learning_rate": 0.0003827813324000578, + "loss": 1.383, + "step": 6317 + }, + { + "epoch": 0.6664556962025316, + "grad_norm": 1.0222556591033936, + "learning_rate": 0.0003825624457701863, + "loss": 1.3568, + "step": 6318 + }, + { + "epoch": 0.6665611814345992, + "grad_norm": 0.7966063618659973, + "learning_rate": 0.00038234360031352485, + "loss": 1.3695, + "step": 6319 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.8849994540214539, + "learning_rate": 0.00038212479605459617, + "loss": 1.3288, + "step": 6320 + }, + { + "epoch": 0.6667721518987342, + "grad_norm": 0.8937309980392456, + "learning_rate": 0.00038190603301791864, + "loss": 1.3684, + "step": 6321 + }, + { + "epoch": 0.6668776371308017, + "grad_norm": 0.6688173413276672, + "learning_rate": 0.0003816873112280056, + "loss": 1.3563, + "step": 6322 + }, + { + "epoch": 0.6669831223628692, + "grad_norm": 0.8467441201210022, + "learning_rate": 0.00038146863070936607, + "loss": 1.3475, + "step": 6323 + }, + { + "epoch": 0.6670886075949367, + "grad_norm": 0.8157018423080444, + "learning_rate": 0.0003812499914865039, + "loss": 1.3972, + "step": 6324 + }, + { + "epoch": 0.6671940928270043, + "grad_norm": 0.8850915431976318, + "learning_rate": 0.00038103139358391914, + "loss": 1.367, + "step": 6325 + }, + { + "epoch": 0.6672995780590717, + "grad_norm": 0.7206857800483704, + "learning_rate": 0.0003808128370261065, + "loss": 1.3432, + "step": 6326 + }, + { + "epoch": 0.6674050632911392, + "grad_norm": 0.7271013855934143, + "learning_rate": 0.00038059432183755633, + "loss": 1.3713, + "step": 6327 + }, + { + "epoch": 0.6675105485232068, + "grad_norm": 0.669590175151825, + "learning_rate": 0.0003803758480427544, + "loss": 1.302, + "step": 6328 + }, + { + "epoch": 0.6676160337552742, + "grad_norm": 0.7888107895851135, + "learning_rate": 0.0003801574156661817, + "loss": 1.3578, + "step": 6329 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.8525625467300415, + "learning_rate": 0.000379939024732315, + "loss": 1.394, + "step": 6330 + }, + { + "epoch": 0.6678270042194093, + "grad_norm": 0.7798763513565063, + "learning_rate": 0.0003797206752656258, + "loss": 1.3142, + "step": 6331 + }, + { + "epoch": 0.6679324894514768, + "grad_norm": 0.7660180926322937, + "learning_rate": 0.0003795023672905814, + "loss": 1.3868, + "step": 6332 + }, + { + "epoch": 0.6680379746835443, + "grad_norm": 0.6993639469146729, + "learning_rate": 0.00037928410083164416, + "loss": 1.3673, + "step": 6333 + }, + { + "epoch": 0.6681434599156119, + "grad_norm": 0.664628267288208, + "learning_rate": 0.0003790658759132719, + "loss": 1.3388, + "step": 6334 + }, + { + "epoch": 0.6682489451476793, + "grad_norm": 0.8041455745697021, + "learning_rate": 0.0003788476925599181, + "loss": 1.3358, + "step": 6335 + }, + { + "epoch": 0.6683544303797468, + "grad_norm": 0.7494514584541321, + "learning_rate": 0.00037862955079603086, + "loss": 1.3834, + "step": 6336 + }, + { + "epoch": 0.6684599156118144, + "grad_norm": 0.7062026858329773, + "learning_rate": 0.00037841145064605416, + "loss": 1.351, + "step": 6337 + }, + { + "epoch": 0.6685654008438818, + "grad_norm": 0.8985625505447388, + "learning_rate": 0.00037819339213442744, + "loss": 1.3408, + "step": 6338 + }, + { + "epoch": 0.6686708860759494, + "grad_norm": 0.6798101663589478, + "learning_rate": 0.0003779753752855853, + "loss": 1.3669, + "step": 6339 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.7399622201919556, + "learning_rate": 0.0003777574001239573, + "loss": 1.3389, + "step": 6340 + }, + { + "epoch": 0.6688818565400844, + "grad_norm": 0.9534660577774048, + "learning_rate": 0.0003775394666739688, + "loss": 1.3475, + "step": 6341 + }, + { + "epoch": 0.6689873417721519, + "grad_norm": 0.6913794875144958, + "learning_rate": 0.0003773215749600404, + "loss": 1.3856, + "step": 6342 + }, + { + "epoch": 0.6690928270042195, + "grad_norm": 0.975497841835022, + "learning_rate": 0.0003771037250065878, + "loss": 1.3735, + "step": 6343 + }, + { + "epoch": 0.6691983122362869, + "grad_norm": 0.7259306907653809, + "learning_rate": 0.0003768859168380223, + "loss": 1.3469, + "step": 6344 + }, + { + "epoch": 0.6693037974683544, + "grad_norm": 0.7796319127082825, + "learning_rate": 0.0003766681504787503, + "loss": 1.3695, + "step": 6345 + }, + { + "epoch": 0.669409282700422, + "grad_norm": 0.8162688612937927, + "learning_rate": 0.0003764504259531734, + "loss": 1.3683, + "step": 6346 + }, + { + "epoch": 0.6695147679324894, + "grad_norm": 0.7349051833152771, + "learning_rate": 0.0003762327432856892, + "loss": 1.3446, + "step": 6347 + }, + { + "epoch": 0.669620253164557, + "grad_norm": 0.8816800713539124, + "learning_rate": 0.00037601510250068984, + "loss": 1.3407, + "step": 6348 + }, + { + "epoch": 0.6697257383966245, + "grad_norm": 0.6973425149917603, + "learning_rate": 0.0003757975036225632, + "loss": 1.3338, + "step": 6349 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.8056532740592957, + "learning_rate": 0.00037557994667569217, + "loss": 1.3591, + "step": 6350 + }, + { + "epoch": 0.6699367088607595, + "grad_norm": 0.7258418798446655, + "learning_rate": 0.00037536243168445507, + "loss": 1.3583, + "step": 6351 + }, + { + "epoch": 0.6700421940928271, + "grad_norm": 0.7051467895507812, + "learning_rate": 0.0003751449586732257, + "loss": 1.3464, + "step": 6352 + }, + { + "epoch": 0.6701476793248945, + "grad_norm": 0.6681436896324158, + "learning_rate": 0.0003749275276663729, + "loss": 1.3455, + "step": 6353 + }, + { + "epoch": 0.670253164556962, + "grad_norm": 0.7684869766235352, + "learning_rate": 0.0003747101386882609, + "loss": 1.389, + "step": 6354 + }, + { + "epoch": 0.6703586497890295, + "grad_norm": 0.6316916942596436, + "learning_rate": 0.0003744927917632489, + "loss": 1.31, + "step": 6355 + }, + { + "epoch": 0.670464135021097, + "grad_norm": 0.6576414108276367, + "learning_rate": 0.00037427548691569237, + "loss": 1.3419, + "step": 6356 + }, + { + "epoch": 0.6705696202531646, + "grad_norm": 0.6311424970626831, + "learning_rate": 0.000374058224169941, + "loss": 1.3598, + "step": 6357 + }, + { + "epoch": 0.670675105485232, + "grad_norm": 0.6930806636810303, + "learning_rate": 0.00037384100355034033, + "loss": 1.3888, + "step": 6358 + }, + { + "epoch": 0.6707805907172996, + "grad_norm": 0.8126099705696106, + "learning_rate": 0.0003736238250812308, + "loss": 1.3449, + "step": 6359 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.6765420436859131, + "learning_rate": 0.0003734066887869485, + "loss": 1.3659, + "step": 6360 + }, + { + "epoch": 0.6709915611814345, + "grad_norm": 0.739333987236023, + "learning_rate": 0.0003731895946918246, + "loss": 1.3396, + "step": 6361 + }, + { + "epoch": 0.6710970464135021, + "grad_norm": 0.7833899259567261, + "learning_rate": 0.0003729725428201856, + "loss": 1.362, + "step": 6362 + }, + { + "epoch": 0.6712025316455696, + "grad_norm": 0.7036701440811157, + "learning_rate": 0.00037275553319635285, + "loss": 1.3657, + "step": 6363 + }, + { + "epoch": 0.6713080168776371, + "grad_norm": 0.8642899394035339, + "learning_rate": 0.000372538565844644, + "loss": 1.3279, + "step": 6364 + }, + { + "epoch": 0.6714135021097046, + "grad_norm": 0.644781231880188, + "learning_rate": 0.00037232164078937106, + "loss": 1.32, + "step": 6365 + }, + { + "epoch": 0.6715189873417722, + "grad_norm": 0.7100293040275574, + "learning_rate": 0.00037210475805484156, + "loss": 1.3198, + "step": 6366 + }, + { + "epoch": 0.6716244725738396, + "grad_norm": 0.6873351335525513, + "learning_rate": 0.00037188791766535825, + "loss": 1.2937, + "step": 6367 + }, + { + "epoch": 0.6717299578059072, + "grad_norm": 0.6386294364929199, + "learning_rate": 0.0003716711196452192, + "loss": 1.337, + "step": 6368 + }, + { + "epoch": 0.6718354430379747, + "grad_norm": 0.6741834878921509, + "learning_rate": 0.0003714543640187177, + "loss": 1.3684, + "step": 6369 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.669513463973999, + "learning_rate": 0.0003712376508101424, + "loss": 1.3554, + "step": 6370 + }, + { + "epoch": 0.6720464135021097, + "grad_norm": 0.8447721600532532, + "learning_rate": 0.0003710209800437769, + "loss": 1.3135, + "step": 6371 + }, + { + "epoch": 0.6721518987341772, + "grad_norm": 0.6445410251617432, + "learning_rate": 0.00037080435174390014, + "loss": 1.3751, + "step": 6372 + }, + { + "epoch": 0.6722573839662447, + "grad_norm": 0.7083387970924377, + "learning_rate": 0.00037058776593478675, + "loss": 1.3702, + "step": 6373 + }, + { + "epoch": 0.6723628691983122, + "grad_norm": 0.8078057169914246, + "learning_rate": 0.00037037122264070625, + "loss": 1.3368, + "step": 6374 + }, + { + "epoch": 0.6724683544303798, + "grad_norm": 0.8835979700088501, + "learning_rate": 0.0003701547218859232, + "loss": 1.3241, + "step": 6375 + }, + { + "epoch": 0.6725738396624472, + "grad_norm": 0.8780420422554016, + "learning_rate": 0.0003699382636946977, + "loss": 1.3308, + "step": 6376 + }, + { + "epoch": 0.6726793248945148, + "grad_norm": 0.7164220213890076, + "learning_rate": 0.0003697218480912848, + "loss": 1.3658, + "step": 6377 + }, + { + "epoch": 0.6727848101265823, + "grad_norm": 0.945472240447998, + "learning_rate": 0.0003695054750999352, + "loss": 1.3527, + "step": 6378 + }, + { + "epoch": 0.6728902953586497, + "grad_norm": 0.701445996761322, + "learning_rate": 0.0003692891447448943, + "loss": 1.3554, + "step": 6379 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.7466229200363159, + "learning_rate": 0.0003690728570504032, + "loss": 1.3318, + "step": 6380 + }, + { + "epoch": 0.6731012658227848, + "grad_norm": 1.1507426500320435, + "learning_rate": 0.00036885661204069767, + "loss": 1.3527, + "step": 6381 + }, + { + "epoch": 0.6732067510548523, + "grad_norm": 0.8568866848945618, + "learning_rate": 0.00036864040974000955, + "loss": 1.3349, + "step": 6382 + }, + { + "epoch": 0.6733122362869198, + "grad_norm": 0.8015507459640503, + "learning_rate": 0.0003684242501725652, + "loss": 1.4056, + "step": 6383 + }, + { + "epoch": 0.6734177215189874, + "grad_norm": 0.6797661185264587, + "learning_rate": 0.00036820813336258624, + "loss": 1.3422, + "step": 6384 + }, + { + "epoch": 0.6735232067510548, + "grad_norm": 0.7461336255073547, + "learning_rate": 0.0003679920593342898, + "loss": 1.3809, + "step": 6385 + }, + { + "epoch": 0.6736286919831224, + "grad_norm": 0.8575366139411926, + "learning_rate": 0.0003677760281118879, + "loss": 1.3854, + "step": 6386 + }, + { + "epoch": 0.6737341772151899, + "grad_norm": 0.7490885853767395, + "learning_rate": 0.0003675600397195881, + "loss": 1.3487, + "step": 6387 + }, + { + "epoch": 0.6738396624472573, + "grad_norm": 0.8129202127456665, + "learning_rate": 0.0003673440941815928, + "loss": 1.3503, + "step": 6388 + }, + { + "epoch": 0.6739451476793249, + "grad_norm": 0.711126446723938, + "learning_rate": 0.00036712819152209954, + "loss": 1.3671, + "step": 6389 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.6892576217651367, + "learning_rate": 0.00036691233176530197, + "loss": 1.3015, + "step": 6390 + }, + { + "epoch": 0.6741561181434599, + "grad_norm": 1.0032721757888794, + "learning_rate": 0.0003666965149353878, + "loss": 1.3639, + "step": 6391 + }, + { + "epoch": 0.6742616033755274, + "grad_norm": 0.6205555200576782, + "learning_rate": 0.00036648074105654043, + "loss": 1.3925, + "step": 6392 + }, + { + "epoch": 0.674367088607595, + "grad_norm": 0.8889051675796509, + "learning_rate": 0.0003662650101529385, + "loss": 1.3507, + "step": 6393 + }, + { + "epoch": 0.6744725738396624, + "grad_norm": 0.6944653987884521, + "learning_rate": 0.00036604932224875564, + "loss": 1.3496, + "step": 6394 + }, + { + "epoch": 0.67457805907173, + "grad_norm": 0.9626950025558472, + "learning_rate": 0.0003658336773681607, + "loss": 1.3386, + "step": 6395 + }, + { + "epoch": 0.6746835443037975, + "grad_norm": 0.7048802971839905, + "learning_rate": 0.0003656180755353179, + "loss": 1.3485, + "step": 6396 + }, + { + "epoch": 0.674789029535865, + "grad_norm": 0.6877619028091431, + "learning_rate": 0.0003654025167743864, + "loss": 1.3935, + "step": 6397 + }, + { + "epoch": 0.6748945147679325, + "grad_norm": 0.9159237742424011, + "learning_rate": 0.0003651870011095204, + "loss": 1.3315, + "step": 6398 + }, + { + "epoch": 0.675, + "grad_norm": 0.7584328651428223, + "learning_rate": 0.0003649715285648701, + "loss": 1.3389, + "step": 6399 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.8802615404129028, + "learning_rate": 0.00036475609916457996, + "loss": 1.3673, + "step": 6400 + }, + { + "epoch": 0.675210970464135, + "grad_norm": 0.7767268419265747, + "learning_rate": 0.0003645407129327898, + "loss": 1.3237, + "step": 6401 + }, + { + "epoch": 0.6753164556962026, + "grad_norm": 0.721648633480072, + "learning_rate": 0.0003643253698936349, + "loss": 1.3552, + "step": 6402 + }, + { + "epoch": 0.67542194092827, + "grad_norm": 0.7739242911338806, + "learning_rate": 0.00036411007007124547, + "loss": 1.3882, + "step": 6403 + }, + { + "epoch": 0.6755274261603376, + "grad_norm": 0.7219763994216919, + "learning_rate": 0.0003638948134897469, + "loss": 1.3876, + "step": 6404 + }, + { + "epoch": 0.6756329113924051, + "grad_norm": 0.7353412508964539, + "learning_rate": 0.0003636796001732597, + "loss": 1.3617, + "step": 6405 + }, + { + "epoch": 0.6757383966244725, + "grad_norm": 0.716986358165741, + "learning_rate": 0.00036346443014589983, + "loss": 1.3647, + "step": 6406 + }, + { + "epoch": 0.6758438818565401, + "grad_norm": 0.6764306426048279, + "learning_rate": 0.00036324930343177754, + "loss": 1.3473, + "step": 6407 + }, + { + "epoch": 0.6759493670886076, + "grad_norm": 0.6531209349632263, + "learning_rate": 0.0003630342200549997, + "loss": 1.3305, + "step": 6408 + }, + { + "epoch": 0.6760548523206751, + "grad_norm": 0.7286219000816345, + "learning_rate": 0.000362819180039667, + "loss": 1.3348, + "step": 6409 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.7866173386573792, + "learning_rate": 0.000362604183409876, + "loss": 1.3307, + "step": 6410 + }, + { + "epoch": 0.6762658227848102, + "grad_norm": 0.6802971363067627, + "learning_rate": 0.00036238923018971783, + "loss": 1.3668, + "step": 6411 + }, + { + "epoch": 0.6763713080168776, + "grad_norm": 0.733454704284668, + "learning_rate": 0.00036217432040327926, + "loss": 1.3244, + "step": 6412 + }, + { + "epoch": 0.6764767932489452, + "grad_norm": 0.6676431894302368, + "learning_rate": 0.000361959454074642, + "loss": 1.3309, + "step": 6413 + }, + { + "epoch": 0.6765822784810127, + "grad_norm": 0.6912530660629272, + "learning_rate": 0.00036174463122788273, + "loss": 1.3275, + "step": 6414 + }, + { + "epoch": 0.6766877637130801, + "grad_norm": 0.7625917792320251, + "learning_rate": 0.00036152985188707344, + "loss": 1.3598, + "step": 6415 + }, + { + "epoch": 0.6767932489451477, + "grad_norm": 0.6284039616584778, + "learning_rate": 0.0003613151160762815, + "loss": 1.3118, + "step": 6416 + }, + { + "epoch": 0.6768987341772152, + "grad_norm": 0.9041814804077148, + "learning_rate": 0.00036110042381956895, + "loss": 1.3588, + "step": 6417 + }, + { + "epoch": 0.6770042194092827, + "grad_norm": 0.7154663801193237, + "learning_rate": 0.00036088577514099325, + "loss": 1.3672, + "step": 6418 + }, + { + "epoch": 0.6771097046413502, + "grad_norm": 0.7754157781600952, + "learning_rate": 0.0003606711700646067, + "loss": 1.319, + "step": 6419 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.6546618342399597, + "learning_rate": 0.00036045660861445684, + "loss": 1.3625, + "step": 6420 + }, + { + "epoch": 0.6773206751054852, + "grad_norm": 0.6470060348510742, + "learning_rate": 0.0003602420908145865, + "loss": 1.3599, + "step": 6421 + }, + { + "epoch": 0.6774261603375528, + "grad_norm": 0.7089857459068298, + "learning_rate": 0.00036002761668903335, + "loss": 1.3407, + "step": 6422 + }, + { + "epoch": 0.6775316455696202, + "grad_norm": 0.7471163272857666, + "learning_rate": 0.0003598131862618304, + "loss": 1.3369, + "step": 6423 + }, + { + "epoch": 0.6776371308016877, + "grad_norm": 0.6703023910522461, + "learning_rate": 0.0003595987995570052, + "loss": 1.3749, + "step": 6424 + }, + { + "epoch": 0.6777426160337553, + "grad_norm": 0.7338800430297852, + "learning_rate": 0.0003593844565985815, + "loss": 1.3832, + "step": 6425 + }, + { + "epoch": 0.6778481012658227, + "grad_norm": 0.7117764353752136, + "learning_rate": 0.00035917015741057727, + "loss": 1.3478, + "step": 6426 + }, + { + "epoch": 0.6779535864978903, + "grad_norm": 0.6916072368621826, + "learning_rate": 0.0003589559020170058, + "loss": 1.3683, + "step": 6427 + }, + { + "epoch": 0.6780590717299578, + "grad_norm": 0.6769193410873413, + "learning_rate": 0.00035874169044187537, + "loss": 1.354, + "step": 6428 + }, + { + "epoch": 0.6781645569620253, + "grad_norm": 0.7163969874382019, + "learning_rate": 0.00035852752270918955, + "loss": 1.3105, + "step": 6429 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.676753044128418, + "learning_rate": 0.0003583133988429468, + "loss": 1.3635, + "step": 6430 + }, + { + "epoch": 0.6783755274261604, + "grad_norm": 0.7944344282150269, + "learning_rate": 0.00035809931886714093, + "loss": 1.3282, + "step": 6431 + }, + { + "epoch": 0.6784810126582278, + "grad_norm": 0.7192618250846863, + "learning_rate": 0.00035788528280576053, + "loss": 1.3556, + "step": 6432 + }, + { + "epoch": 0.6785864978902953, + "grad_norm": 0.7072911262512207, + "learning_rate": 0.0003576712906827892, + "loss": 1.3232, + "step": 6433 + }, + { + "epoch": 0.6786919831223629, + "grad_norm": 0.7164477705955505, + "learning_rate": 0.00035745734252220633, + "loss": 1.3489, + "step": 6434 + }, + { + "epoch": 0.6787974683544303, + "grad_norm": 0.7845733761787415, + "learning_rate": 0.00035724343834798566, + "loss": 1.323, + "step": 6435 + }, + { + "epoch": 0.6789029535864979, + "grad_norm": 0.6884161233901978, + "learning_rate": 0.00035702957818409606, + "loss": 1.3472, + "step": 6436 + }, + { + "epoch": 0.6790084388185654, + "grad_norm": 0.6618589162826538, + "learning_rate": 0.0003568157620545019, + "loss": 1.3305, + "step": 6437 + }, + { + "epoch": 0.6791139240506329, + "grad_norm": 0.7782530188560486, + "learning_rate": 0.00035660198998316213, + "loss": 1.3561, + "step": 6438 + }, + { + "epoch": 0.6792194092827004, + "grad_norm": 0.695024311542511, + "learning_rate": 0.00035638826199403103, + "loss": 1.3431, + "step": 6439 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.8222241401672363, + "learning_rate": 0.0003561745781110579, + "loss": 1.352, + "step": 6440 + }, + { + "epoch": 0.6794303797468354, + "grad_norm": 0.6887189149856567, + "learning_rate": 0.00035596093835818683, + "loss": 1.3732, + "step": 6441 + }, + { + "epoch": 0.679535864978903, + "grad_norm": 0.6108000874519348, + "learning_rate": 0.0003557473427593578, + "loss": 1.3078, + "step": 6442 + }, + { + "epoch": 0.6796413502109705, + "grad_norm": 0.7861247062683105, + "learning_rate": 0.0003555337913385048, + "loss": 1.3527, + "step": 6443 + }, + { + "epoch": 0.6797468354430379, + "grad_norm": 0.6956799030303955, + "learning_rate": 0.0003553202841195576, + "loss": 1.3632, + "step": 6444 + }, + { + "epoch": 0.6798523206751055, + "grad_norm": 0.6499341726303101, + "learning_rate": 0.00035510682112644055, + "loss": 1.3509, + "step": 6445 + }, + { + "epoch": 0.679957805907173, + "grad_norm": 0.6749141812324524, + "learning_rate": 0.00035489340238307326, + "loss": 1.3391, + "step": 6446 + }, + { + "epoch": 0.6800632911392405, + "grad_norm": 0.648410439491272, + "learning_rate": 0.00035468002791337047, + "loss": 1.3381, + "step": 6447 + }, + { + "epoch": 0.680168776371308, + "grad_norm": 0.673168420791626, + "learning_rate": 0.0003544666977412418, + "loss": 1.3185, + "step": 6448 + }, + { + "epoch": 0.6802742616033756, + "grad_norm": 0.708993673324585, + "learning_rate": 0.000354253411890592, + "loss": 1.3177, + "step": 6449 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.6317639350891113, + "learning_rate": 0.00035404017038532045, + "loss": 1.3515, + "step": 6450 + }, + { + "epoch": 0.6804852320675105, + "grad_norm": 0.6701706647872925, + "learning_rate": 0.00035382697324932245, + "loss": 1.3367, + "step": 6451 + }, + { + "epoch": 0.6805907172995781, + "grad_norm": 0.7000930309295654, + "learning_rate": 0.0003536138205064877, + "loss": 1.3861, + "step": 6452 + }, + { + "epoch": 0.6806962025316455, + "grad_norm": 0.6511358618736267, + "learning_rate": 0.0003534007121807009, + "loss": 1.3816, + "step": 6453 + }, + { + "epoch": 0.6808016877637131, + "grad_norm": 0.7008346915245056, + "learning_rate": 0.00035318764829584185, + "loss": 1.3259, + "step": 6454 + }, + { + "epoch": 0.6809071729957806, + "grad_norm": 0.6346901059150696, + "learning_rate": 0.0003529746288757856, + "loss": 1.3456, + "step": 6455 + }, + { + "epoch": 0.6810126582278481, + "grad_norm": 0.6826952695846558, + "learning_rate": 0.0003527616539444019, + "loss": 1.3346, + "step": 6456 + }, + { + "epoch": 0.6811181434599156, + "grad_norm": 0.7917857766151428, + "learning_rate": 0.0003525487235255556, + "loss": 1.3681, + "step": 6457 + }, + { + "epoch": 0.6812236286919832, + "grad_norm": 0.6683038473129272, + "learning_rate": 0.0003523358376431068, + "loss": 1.3477, + "step": 6458 + }, + { + "epoch": 0.6813291139240506, + "grad_norm": 0.7160599827766418, + "learning_rate": 0.00035212299632090996, + "loss": 1.3691, + "step": 6459 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.9699233174324036, + "learning_rate": 0.00035191019958281575, + "loss": 1.3676, + "step": 6460 + }, + { + "epoch": 0.6815400843881857, + "grad_norm": 0.6689322590827942, + "learning_rate": 0.00035169744745266866, + "loss": 1.3418, + "step": 6461 + }, + { + "epoch": 0.6816455696202531, + "grad_norm": 0.7637701630592346, + "learning_rate": 0.0003514847399543087, + "loss": 1.357, + "step": 6462 + }, + { + "epoch": 0.6817510548523207, + "grad_norm": 0.8393051028251648, + "learning_rate": 0.00035127207711157084, + "loss": 1.3388, + "step": 6463 + }, + { + "epoch": 0.6818565400843882, + "grad_norm": 0.6530219912528992, + "learning_rate": 0.00035105945894828495, + "loss": 1.3584, + "step": 6464 + }, + { + "epoch": 0.6819620253164557, + "grad_norm": 1.1129449605941772, + "learning_rate": 0.000350846885488276, + "loss": 1.3485, + "step": 6465 + }, + { + "epoch": 0.6820675105485232, + "grad_norm": 0.6983477473258972, + "learning_rate": 0.00035063435675536386, + "loss": 1.3171, + "step": 6466 + }, + { + "epoch": 0.6821729957805908, + "grad_norm": 0.6560412645339966, + "learning_rate": 0.00035042187277336325, + "loss": 1.3442, + "step": 6467 + }, + { + "epoch": 0.6822784810126582, + "grad_norm": 0.8186971545219421, + "learning_rate": 0.00035020943356608444, + "loss": 1.3709, + "step": 6468 + }, + { + "epoch": 0.6823839662447257, + "grad_norm": 0.8880643844604492, + "learning_rate": 0.0003499970391573322, + "loss": 1.3383, + "step": 6469 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.6930777430534363, + "learning_rate": 0.00034978468957090635, + "loss": 1.385, + "step": 6470 + }, + { + "epoch": 0.6825949367088607, + "grad_norm": 0.9592317938804626, + "learning_rate": 0.0003495723848306017, + "loss": 1.3265, + "step": 6471 + }, + { + "epoch": 0.6827004219409283, + "grad_norm": 0.8406945466995239, + "learning_rate": 0.000349360124960208, + "loss": 1.3932, + "step": 6472 + }, + { + "epoch": 0.6828059071729958, + "grad_norm": 0.8497843742370605, + "learning_rate": 0.00034914790998351005, + "loss": 1.3661, + "step": 6473 + }, + { + "epoch": 0.6829113924050633, + "grad_norm": 0.8645557761192322, + "learning_rate": 0.0003489357399242876, + "loss": 1.3383, + "step": 6474 + }, + { + "epoch": 0.6830168776371308, + "grad_norm": 0.8433873653411865, + "learning_rate": 0.0003487236148063154, + "loss": 1.3653, + "step": 6475 + }, + { + "epoch": 0.6831223628691984, + "grad_norm": 0.7290499806404114, + "learning_rate": 0.0003485115346533629, + "loss": 1.371, + "step": 6476 + }, + { + "epoch": 0.6832278481012658, + "grad_norm": 1.0309748649597168, + "learning_rate": 0.00034829949948919517, + "loss": 1.3428, + "step": 6477 + }, + { + "epoch": 0.6833333333333333, + "grad_norm": 0.9414817690849304, + "learning_rate": 0.00034808750933757154, + "loss": 1.3581, + "step": 6478 + }, + { + "epoch": 0.6834388185654009, + "grad_norm": 0.6726256012916565, + "learning_rate": 0.0003478755642222466, + "loss": 1.341, + "step": 6479 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.4059226512908936, + "learning_rate": 0.0003476636641669699, + "loss": 1.3575, + "step": 6480 + }, + { + "epoch": 0.6836497890295359, + "grad_norm": 1.006642460823059, + "learning_rate": 0.0003474518091954859, + "loss": 1.3631, + "step": 6481 + }, + { + "epoch": 0.6837552742616034, + "grad_norm": 0.7512980699539185, + "learning_rate": 0.00034723999933153387, + "loss": 1.3061, + "step": 6482 + }, + { + "epoch": 0.6838607594936709, + "grad_norm": 1.0789027214050293, + "learning_rate": 0.00034702823459884836, + "loss": 1.3101, + "step": 6483 + }, + { + "epoch": 0.6839662447257384, + "grad_norm": 0.8368441462516785, + "learning_rate": 0.0003468165150211585, + "loss": 1.3795, + "step": 6484 + }, + { + "epoch": 0.6840717299578059, + "grad_norm": 0.7338466048240662, + "learning_rate": 0.0003466048406221883, + "loss": 1.3531, + "step": 6485 + }, + { + "epoch": 0.6841772151898734, + "grad_norm": 1.1000579595565796, + "learning_rate": 0.0003463932114256576, + "loss": 1.3575, + "step": 6486 + }, + { + "epoch": 0.684282700421941, + "grad_norm": 0.7644850611686707, + "learning_rate": 0.00034618162745528, + "loss": 1.3589, + "step": 6487 + }, + { + "epoch": 0.6843881856540084, + "grad_norm": 0.8402924537658691, + "learning_rate": 0.00034597008873476473, + "loss": 1.3801, + "step": 6488 + }, + { + "epoch": 0.6844936708860759, + "grad_norm": 0.7471780776977539, + "learning_rate": 0.0003457585952878156, + "loss": 1.3519, + "step": 6489 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.7467726469039917, + "learning_rate": 0.0003455471471381318, + "loss": 1.3559, + "step": 6490 + }, + { + "epoch": 0.6847046413502109, + "grad_norm": 0.7797772288322449, + "learning_rate": 0.0003453357443094068, + "loss": 1.373, + "step": 6491 + }, + { + "epoch": 0.6848101265822785, + "grad_norm": 0.6721264719963074, + "learning_rate": 0.0003451243868253294, + "loss": 1.3845, + "step": 6492 + }, + { + "epoch": 0.684915611814346, + "grad_norm": 0.7413660287857056, + "learning_rate": 0.0003449130747095835, + "loss": 1.3575, + "step": 6493 + }, + { + "epoch": 0.6850210970464135, + "grad_norm": 0.7213349342346191, + "learning_rate": 0.0003447018079858472, + "loss": 1.3464, + "step": 6494 + }, + { + "epoch": 0.685126582278481, + "grad_norm": 0.8235604166984558, + "learning_rate": 0.0003444905866777946, + "loss": 1.3815, + "step": 6495 + }, + { + "epoch": 0.6852320675105485, + "grad_norm": 0.7061792016029358, + "learning_rate": 0.0003442794108090938, + "loss": 1.3508, + "step": 6496 + }, + { + "epoch": 0.685337552742616, + "grad_norm": 0.8878853917121887, + "learning_rate": 0.0003440682804034081, + "loss": 1.3908, + "step": 6497 + }, + { + "epoch": 0.6854430379746835, + "grad_norm": 0.7498077154159546, + "learning_rate": 0.00034385719548439585, + "loss": 1.3394, + "step": 6498 + }, + { + "epoch": 0.6855485232067511, + "grad_norm": 0.8679275512695312, + "learning_rate": 0.00034364615607570994, + "loss": 1.3825, + "step": 6499 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.8990713357925415, + "learning_rate": 0.0003434351622009985, + "loss": 1.3495, + "step": 6500 + }, + { + "epoch": 0.6857594936708861, + "grad_norm": 0.6841896176338196, + "learning_rate": 0.00034322421388390456, + "loss": 1.363, + "step": 6501 + }, + { + "epoch": 0.6858649789029536, + "grad_norm": 0.889380156993866, + "learning_rate": 0.00034301331114806573, + "loss": 1.3451, + "step": 6502 + }, + { + "epoch": 0.685970464135021, + "grad_norm": 0.9102359414100647, + "learning_rate": 0.0003428024540171148, + "loss": 1.3939, + "step": 6503 + }, + { + "epoch": 0.6860759493670886, + "grad_norm": 0.6504446268081665, + "learning_rate": 0.0003425916425146791, + "loss": 1.3762, + "step": 6504 + }, + { + "epoch": 0.6861814345991561, + "grad_norm": 0.9117252230644226, + "learning_rate": 0.0003423808766643817, + "loss": 1.3704, + "step": 6505 + }, + { + "epoch": 0.6862869198312236, + "grad_norm": 1.0312509536743164, + "learning_rate": 0.00034217015648983957, + "loss": 1.3792, + "step": 6506 + }, + { + "epoch": 0.6863924050632911, + "grad_norm": 0.687157392501831, + "learning_rate": 0.0003419594820146652, + "loss": 1.358, + "step": 6507 + }, + { + "epoch": 0.6864978902953587, + "grad_norm": 1.2652952671051025, + "learning_rate": 0.0003417488532624653, + "loss": 1.3383, + "step": 6508 + }, + { + "epoch": 0.6866033755274261, + "grad_norm": 0.9112887978553772, + "learning_rate": 0.00034153827025684225, + "loss": 1.3744, + "step": 6509 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.7051622271537781, + "learning_rate": 0.0003413277330213928, + "loss": 1.3495, + "step": 6510 + }, + { + "epoch": 0.6868143459915612, + "grad_norm": 1.0251502990722656, + "learning_rate": 0.0003411172415797087, + "loss": 1.356, + "step": 6511 + }, + { + "epoch": 0.6869198312236287, + "grad_norm": 0.839400053024292, + "learning_rate": 0.00034090679595537646, + "loss": 1.3557, + "step": 6512 + }, + { + "epoch": 0.6870253164556962, + "grad_norm": 0.7997100353240967, + "learning_rate": 0.0003406963961719778, + "loss": 1.3567, + "step": 6513 + }, + { + "epoch": 0.6871308016877637, + "grad_norm": 0.8181828260421753, + "learning_rate": 0.00034048604225308854, + "loss": 1.3618, + "step": 6514 + }, + { + "epoch": 0.6872362869198312, + "grad_norm": 0.8043246269226074, + "learning_rate": 0.00034027573422228054, + "loss": 1.3354, + "step": 6515 + }, + { + "epoch": 0.6873417721518987, + "grad_norm": 0.794401228427887, + "learning_rate": 0.00034006547210311964, + "loss": 1.3613, + "step": 6516 + }, + { + "epoch": 0.6874472573839663, + "grad_norm": 0.9534015655517578, + "learning_rate": 0.0003398552559191667, + "loss": 1.3917, + "step": 6517 + }, + { + "epoch": 0.6875527426160337, + "grad_norm": 0.6916554570198059, + "learning_rate": 0.00033964508569397743, + "loss": 1.3265, + "step": 6518 + }, + { + "epoch": 0.6876582278481013, + "grad_norm": 0.7908419966697693, + "learning_rate": 0.0003394349614511026, + "loss": 1.3057, + "step": 6519 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.7678713202476501, + "learning_rate": 0.0003392248832140876, + "loss": 1.3385, + "step": 6520 + }, + { + "epoch": 0.6878691983122363, + "grad_norm": 0.7589985728263855, + "learning_rate": 0.0003390148510064727, + "loss": 1.3627, + "step": 6521 + }, + { + "epoch": 0.6879746835443038, + "grad_norm": 0.8163257837295532, + "learning_rate": 0.00033880486485179305, + "loss": 1.3213, + "step": 6522 + }, + { + "epoch": 0.6880801687763713, + "grad_norm": 0.6873239278793335, + "learning_rate": 0.0003385949247735786, + "loss": 1.3113, + "step": 6523 + }, + { + "epoch": 0.6881856540084388, + "grad_norm": 0.8482858538627625, + "learning_rate": 0.00033838503079535435, + "loss": 1.3479, + "step": 6524 + }, + { + "epoch": 0.6882911392405063, + "grad_norm": 0.7161694765090942, + "learning_rate": 0.00033817518294064003, + "loss": 1.358, + "step": 6525 + }, + { + "epoch": 0.6883966244725739, + "grad_norm": 0.7169045209884644, + "learning_rate": 0.00033796538123294996, + "loss": 1.3516, + "step": 6526 + }, + { + "epoch": 0.6885021097046413, + "grad_norm": 0.8610522747039795, + "learning_rate": 0.0003377556256957936, + "loss": 1.3505, + "step": 6527 + }, + { + "epoch": 0.6886075949367089, + "grad_norm": 0.7184323072433472, + "learning_rate": 0.0003375459163526749, + "loss": 1.3424, + "step": 6528 + }, + { + "epoch": 0.6887130801687764, + "grad_norm": 0.9129148721694946, + "learning_rate": 0.000337336253227093, + "loss": 1.332, + "step": 6529 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.7115538716316223, + "learning_rate": 0.00033712663634254163, + "loss": 1.3386, + "step": 6530 + }, + { + "epoch": 0.6889240506329114, + "grad_norm": 0.658371090888977, + "learning_rate": 0.0003369170657225094, + "loss": 1.3025, + "step": 6531 + }, + { + "epoch": 0.689029535864979, + "grad_norm": 0.8048012256622314, + "learning_rate": 0.0003367075413904799, + "loss": 1.3315, + "step": 6532 + }, + { + "epoch": 0.6891350210970464, + "grad_norm": 0.6541045904159546, + "learning_rate": 0.00033649806336993085, + "loss": 1.3334, + "step": 6533 + }, + { + "epoch": 0.6892405063291139, + "grad_norm": 0.6621049046516418, + "learning_rate": 0.0003362886316843361, + "loss": 1.3448, + "step": 6534 + }, + { + "epoch": 0.6893459915611815, + "grad_norm": 0.7894169092178345, + "learning_rate": 0.000336079246357163, + "loss": 1.3727, + "step": 6535 + }, + { + "epoch": 0.6894514767932489, + "grad_norm": 0.7185949683189392, + "learning_rate": 0.00033586990741187446, + "loss": 1.3647, + "step": 6536 + }, + { + "epoch": 0.6895569620253165, + "grad_norm": 0.7000490427017212, + "learning_rate": 0.0003356606148719277, + "loss": 1.3754, + "step": 6537 + }, + { + "epoch": 0.689662447257384, + "grad_norm": 0.7950170040130615, + "learning_rate": 0.00033545136876077524, + "loss": 1.3598, + "step": 6538 + }, + { + "epoch": 0.6897679324894515, + "grad_norm": 0.7931535243988037, + "learning_rate": 0.00033524216910186394, + "loss": 1.3839, + "step": 6539 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.7643699049949646, + "learning_rate": 0.00033503301591863586, + "loss": 1.3495, + "step": 6540 + }, + { + "epoch": 0.6899789029535865, + "grad_norm": 0.8147913217544556, + "learning_rate": 0.0003348239092345275, + "loss": 1.3398, + "step": 6541 + }, + { + "epoch": 0.690084388185654, + "grad_norm": 0.7079659104347229, + "learning_rate": 0.00033461484907297036, + "loss": 1.3498, + "step": 6542 + }, + { + "epoch": 0.6901898734177215, + "grad_norm": 0.8102227449417114, + "learning_rate": 0.00033440583545739046, + "loss": 1.3116, + "step": 6543 + }, + { + "epoch": 0.6902953586497891, + "grad_norm": 0.6573535799980164, + "learning_rate": 0.00033419686841120925, + "loss": 1.3736, + "step": 6544 + }, + { + "epoch": 0.6904008438818565, + "grad_norm": 0.643921434879303, + "learning_rate": 0.00033398794795784227, + "loss": 1.3447, + "step": 6545 + }, + { + "epoch": 0.6905063291139241, + "grad_norm": 0.7151456475257874, + "learning_rate": 0.0003337790741207003, + "loss": 1.3603, + "step": 6546 + }, + { + "epoch": 0.6906118143459916, + "grad_norm": 0.6813206076622009, + "learning_rate": 0.0003335702469231884, + "loss": 1.3736, + "step": 6547 + }, + { + "epoch": 0.690717299578059, + "grad_norm": 0.6806785464286804, + "learning_rate": 0.00033336146638870685, + "loss": 1.3531, + "step": 6548 + }, + { + "epoch": 0.6908227848101266, + "grad_norm": 0.6458054780960083, + "learning_rate": 0.0003331527325406506, + "loss": 1.338, + "step": 6549 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.7103319764137268, + "learning_rate": 0.0003329440454024092, + "loss": 1.3111, + "step": 6550 + }, + { + "epoch": 0.6910337552742616, + "grad_norm": 0.7158697247505188, + "learning_rate": 0.0003327354049973672, + "loss": 1.3765, + "step": 6551 + }, + { + "epoch": 0.6911392405063291, + "grad_norm": 0.7254961729049683, + "learning_rate": 0.00033252681134890373, + "loss": 1.3235, + "step": 6552 + }, + { + "epoch": 0.6912447257383966, + "grad_norm": 0.6737014651298523, + "learning_rate": 0.00033231826448039246, + "loss": 1.3444, + "step": 6553 + }, + { + "epoch": 0.6913502109704641, + "grad_norm": 0.7331088185310364, + "learning_rate": 0.0003321097644152027, + "loss": 1.3627, + "step": 6554 + }, + { + "epoch": 0.6914556962025317, + "grad_norm": 0.8435728549957275, + "learning_rate": 0.00033190131117669753, + "loss": 1.33, + "step": 6555 + }, + { + "epoch": 0.6915611814345991, + "grad_norm": 0.7506133317947388, + "learning_rate": 0.0003316929047882354, + "loss": 1.3622, + "step": 6556 + }, + { + "epoch": 0.6916666666666667, + "grad_norm": 0.8476306796073914, + "learning_rate": 0.0003314845452731691, + "loss": 1.3464, + "step": 6557 + }, + { + "epoch": 0.6917721518987342, + "grad_norm": 1.001125454902649, + "learning_rate": 0.00033127623265484643, + "loss": 1.3509, + "step": 6558 + }, + { + "epoch": 0.6918776371308016, + "grad_norm": 0.7020590305328369, + "learning_rate": 0.00033106796695660983, + "loss": 1.3267, + "step": 6559 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 1.1846123933792114, + "learning_rate": 0.0003308597482017965, + "loss": 1.3499, + "step": 6560 + }, + { + "epoch": 0.6920886075949367, + "grad_norm": 0.9742792248725891, + "learning_rate": 0.00033065157641373847, + "loss": 1.3705, + "step": 6561 + }, + { + "epoch": 0.6921940928270042, + "grad_norm": 0.6731942296028137, + "learning_rate": 0.00033044345161576224, + "loss": 1.3375, + "step": 6562 + }, + { + "epoch": 0.6922995780590717, + "grad_norm": 1.318389892578125, + "learning_rate": 0.00033023537383118916, + "loss": 1.3037, + "step": 6563 + }, + { + "epoch": 0.6924050632911393, + "grad_norm": 1.150465726852417, + "learning_rate": 0.0003300273430833358, + "loss": 1.333, + "step": 6564 + }, + { + "epoch": 0.6925105485232067, + "grad_norm": 0.7129090428352356, + "learning_rate": 0.00032981935939551294, + "loss": 1.3571, + "step": 6565 + }, + { + "epoch": 0.6926160337552743, + "grad_norm": 1.0190807580947876, + "learning_rate": 0.000329611422791026, + "loss": 1.3436, + "step": 6566 + }, + { + "epoch": 0.6927215189873418, + "grad_norm": 0.952193558216095, + "learning_rate": 0.00032940353329317533, + "loss": 1.3675, + "step": 6567 + }, + { + "epoch": 0.6928270042194092, + "grad_norm": 0.7190878391265869, + "learning_rate": 0.0003291956909252561, + "loss": 1.3479, + "step": 6568 + }, + { + "epoch": 0.6929324894514768, + "grad_norm": 1.1422123908996582, + "learning_rate": 0.00032898789571055796, + "loss": 1.3659, + "step": 6569 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.8945153951644897, + "learning_rate": 0.0003287801476723656, + "loss": 1.335, + "step": 6570 + }, + { + "epoch": 0.6931434599156118, + "grad_norm": 0.9006514549255371, + "learning_rate": 0.0003285724468339576, + "loss": 1.3563, + "step": 6571 + }, + { + "epoch": 0.6932489451476793, + "grad_norm": 1.1803321838378906, + "learning_rate": 0.00032836479321860884, + "loss": 1.3509, + "step": 6572 + }, + { + "epoch": 0.6933544303797469, + "grad_norm": 0.7358508110046387, + "learning_rate": 0.00032815718684958727, + "loss": 1.3348, + "step": 6573 + }, + { + "epoch": 0.6934599156118143, + "grad_norm": 0.8437590003013611, + "learning_rate": 0.00032794962775015656, + "loss": 1.3618, + "step": 6574 + }, + { + "epoch": 0.6935654008438819, + "grad_norm": 1.2936931848526, + "learning_rate": 0.0003277421159435745, + "loss": 1.3549, + "step": 6575 + }, + { + "epoch": 0.6936708860759494, + "grad_norm": 0.7992663979530334, + "learning_rate": 0.000327534651453094, + "loss": 1.2959, + "step": 6576 + }, + { + "epoch": 0.6937763713080168, + "grad_norm": 1.1382378339767456, + "learning_rate": 0.00032732723430196236, + "loss": 1.3357, + "step": 6577 + }, + { + "epoch": 0.6938818565400844, + "grad_norm": 0.7145035862922668, + "learning_rate": 0.0003271198645134218, + "loss": 1.3397, + "step": 6578 + }, + { + "epoch": 0.6939873417721519, + "grad_norm": 0.8306401968002319, + "learning_rate": 0.0003269125421107091, + "loss": 1.3254, + "step": 6579 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.7941994071006775, + "learning_rate": 0.00032670526711705536, + "loss": 1.3793, + "step": 6580 + }, + { + "epoch": 0.6941983122362869, + "grad_norm": 1.0194544792175293, + "learning_rate": 0.00032649803955568755, + "loss": 1.3251, + "step": 6581 + }, + { + "epoch": 0.6943037974683545, + "grad_norm": 0.7041123509407043, + "learning_rate": 0.0003262908594498262, + "loss": 1.3406, + "step": 6582 + }, + { + "epoch": 0.6944092827004219, + "grad_norm": 0.8447715044021606, + "learning_rate": 0.0003260837268226868, + "loss": 1.3439, + "step": 6583 + }, + { + "epoch": 0.6945147679324895, + "grad_norm": 0.7479461431503296, + "learning_rate": 0.0003258766416974796, + "loss": 1.3366, + "step": 6584 + }, + { + "epoch": 0.694620253164557, + "grad_norm": 0.70717453956604, + "learning_rate": 0.0003256696040974097, + "loss": 1.352, + "step": 6585 + }, + { + "epoch": 0.6947257383966244, + "grad_norm": 0.6887946128845215, + "learning_rate": 0.00032546261404567644, + "loss": 1.3239, + "step": 6586 + }, + { + "epoch": 0.694831223628692, + "grad_norm": 0.7126222848892212, + "learning_rate": 0.0003252556715654743, + "loss": 1.3482, + "step": 6587 + }, + { + "epoch": 0.6949367088607595, + "grad_norm": 0.7008379101753235, + "learning_rate": 0.00032504877667999206, + "loss": 1.3446, + "step": 6588 + }, + { + "epoch": 0.695042194092827, + "grad_norm": 0.7616559267044067, + "learning_rate": 0.00032484192941241316, + "loss": 1.341, + "step": 6589 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.7411245107650757, + "learning_rate": 0.0003246351297859164, + "loss": 1.4028, + "step": 6590 + }, + { + "epoch": 0.6952531645569621, + "grad_norm": 0.7135361433029175, + "learning_rate": 0.00032442837782367434, + "loss": 1.3384, + "step": 6591 + }, + { + "epoch": 0.6953586497890295, + "grad_norm": 0.7659331560134888, + "learning_rate": 0.00032422167354885463, + "loss": 1.3632, + "step": 6592 + }, + { + "epoch": 0.695464135021097, + "grad_norm": 0.6704236268997192, + "learning_rate": 0.0003240150169846196, + "loss": 1.3622, + "step": 6593 + }, + { + "epoch": 0.6955696202531646, + "grad_norm": 0.6528254151344299, + "learning_rate": 0.00032380840815412603, + "loss": 1.3336, + "step": 6594 + }, + { + "epoch": 0.695675105485232, + "grad_norm": 0.7441567182540894, + "learning_rate": 0.00032360184708052554, + "loss": 1.3267, + "step": 6595 + }, + { + "epoch": 0.6957805907172996, + "grad_norm": 0.6742079257965088, + "learning_rate": 0.00032339533378696424, + "loss": 1.3675, + "step": 6596 + }, + { + "epoch": 0.6958860759493671, + "grad_norm": 0.6885363459587097, + "learning_rate": 0.00032318886829658277, + "loss": 1.3642, + "step": 6597 + }, + { + "epoch": 0.6959915611814346, + "grad_norm": 0.8566861748695374, + "learning_rate": 0.0003229824506325172, + "loss": 1.3544, + "step": 6598 + }, + { + "epoch": 0.6960970464135021, + "grad_norm": 0.6737650632858276, + "learning_rate": 0.0003227760808178973, + "loss": 1.354, + "step": 6599 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.8699102401733398, + "learning_rate": 0.00032256975887584783, + "loss": 1.3697, + "step": 6600 + }, + { + "epoch": 0.6963080168776371, + "grad_norm": 0.794671893119812, + "learning_rate": 0.0003223634848294883, + "loss": 1.324, + "step": 6601 + }, + { + "epoch": 0.6964135021097047, + "grad_norm": 0.7154747247695923, + "learning_rate": 0.0003221572587019327, + "loss": 1.3398, + "step": 6602 + }, + { + "epoch": 0.6965189873417722, + "grad_norm": 1.1095257997512817, + "learning_rate": 0.0003219510805162896, + "loss": 1.3611, + "step": 6603 + }, + { + "epoch": 0.6966244725738396, + "grad_norm": 0.673861026763916, + "learning_rate": 0.0003217449502956624, + "loss": 1.3471, + "step": 6604 + }, + { + "epoch": 0.6967299578059072, + "grad_norm": 0.9027205109596252, + "learning_rate": 0.0003215388680631491, + "loss": 1.354, + "step": 6605 + }, + { + "epoch": 0.6968354430379747, + "grad_norm": 1.0077909231185913, + "learning_rate": 0.00032133283384184173, + "loss": 1.3037, + "step": 6606 + }, + { + "epoch": 0.6969409282700422, + "grad_norm": 0.677034854888916, + "learning_rate": 0.00032112684765482814, + "loss": 1.3396, + "step": 6607 + }, + { + "epoch": 0.6970464135021097, + "grad_norm": 0.7418259382247925, + "learning_rate": 0.00032092090952518996, + "loss": 1.2898, + "step": 6608 + }, + { + "epoch": 0.6971518987341773, + "grad_norm": 1.0616626739501953, + "learning_rate": 0.00032071501947600334, + "loss": 1.3321, + "step": 6609 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.7090403437614441, + "learning_rate": 0.00032050917753033935, + "loss": 1.3878, + "step": 6610 + }, + { + "epoch": 0.6973628691983123, + "grad_norm": 0.7633395195007324, + "learning_rate": 0.00032030338371126374, + "loss": 1.3133, + "step": 6611 + }, + { + "epoch": 0.6974683544303798, + "grad_norm": 1.0273178815841675, + "learning_rate": 0.0003200976380418366, + "loss": 1.3559, + "step": 6612 + }, + { + "epoch": 0.6975738396624472, + "grad_norm": 0.7010444402694702, + "learning_rate": 0.00031989194054511276, + "loss": 1.4009, + "step": 6613 + }, + { + "epoch": 0.6976793248945148, + "grad_norm": 0.9063969254493713, + "learning_rate": 0.0003196862912441418, + "loss": 1.3515, + "step": 6614 + }, + { + "epoch": 0.6977848101265823, + "grad_norm": 0.8219656348228455, + "learning_rate": 0.0003194806901619673, + "loss": 1.3326, + "step": 6615 + }, + { + "epoch": 0.6978902953586498, + "grad_norm": 0.7269443869590759, + "learning_rate": 0.00031927513732162856, + "loss": 1.3648, + "step": 6616 + }, + { + "epoch": 0.6979957805907173, + "grad_norm": 0.8381291031837463, + "learning_rate": 0.00031906963274615837, + "loss": 1.3735, + "step": 6617 + }, + { + "epoch": 0.6981012658227848, + "grad_norm": 0.7375606894493103, + "learning_rate": 0.00031886417645858475, + "loss": 1.329, + "step": 6618 + }, + { + "epoch": 0.6982067510548523, + "grad_norm": 0.7260634899139404, + "learning_rate": 0.00031865876848192993, + "loss": 1.3402, + "step": 6619 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.814452052116394, + "learning_rate": 0.000318453408839211, + "loss": 1.3402, + "step": 6620 + }, + { + "epoch": 0.6984177215189873, + "grad_norm": 0.7954577207565308, + "learning_rate": 0.0003182480975534395, + "loss": 1.3428, + "step": 6621 + }, + { + "epoch": 0.6985232067510548, + "grad_norm": 1.0581568479537964, + "learning_rate": 0.0003180428346476215, + "loss": 1.3693, + "step": 6622 + }, + { + "epoch": 0.6986286919831224, + "grad_norm": 0.8118441700935364, + "learning_rate": 0.0003178376201447576, + "loss": 1.3272, + "step": 6623 + }, + { + "epoch": 0.6987341772151898, + "grad_norm": 0.7157741189002991, + "learning_rate": 0.00031763245406784364, + "loss": 1.3387, + "step": 6624 + }, + { + "epoch": 0.6988396624472574, + "grad_norm": 0.8536592125892639, + "learning_rate": 0.0003174273364398691, + "loss": 1.352, + "step": 6625 + }, + { + "epoch": 0.6989451476793249, + "grad_norm": 0.7294368147850037, + "learning_rate": 0.00031722226728381854, + "loss": 1.3116, + "step": 6626 + }, + { + "epoch": 0.6990506329113924, + "grad_norm": 0.6786358952522278, + "learning_rate": 0.00031701724662267097, + "loss": 1.339, + "step": 6627 + }, + { + "epoch": 0.6991561181434599, + "grad_norm": 0.8026394248008728, + "learning_rate": 0.00031681227447939996, + "loss": 1.3499, + "step": 6628 + }, + { + "epoch": 0.6992616033755275, + "grad_norm": 0.7997431755065918, + "learning_rate": 0.00031660735087697363, + "loss": 1.3295, + "step": 6629 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.6777182221412659, + "learning_rate": 0.0003164024758383548, + "loss": 1.338, + "step": 6630 + }, + { + "epoch": 0.6994725738396624, + "grad_norm": 1.1775909662246704, + "learning_rate": 0.00031619764938650057, + "loss": 1.3466, + "step": 6631 + }, + { + "epoch": 0.69957805907173, + "grad_norm": 0.7964673638343811, + "learning_rate": 0.00031599287154436263, + "loss": 1.3433, + "step": 6632 + }, + { + "epoch": 0.6996835443037974, + "grad_norm": 0.8911005854606628, + "learning_rate": 0.0003157881423348879, + "loss": 1.3427, + "step": 6633 + }, + { + "epoch": 0.699789029535865, + "grad_norm": 0.711287260055542, + "learning_rate": 0.00031558346178101694, + "loss": 1.369, + "step": 6634 + }, + { + "epoch": 0.6998945147679325, + "grad_norm": 0.7004973292350769, + "learning_rate": 0.00031537882990568535, + "loss": 1.3497, + "step": 6635 + }, + { + "epoch": 0.7, + "grad_norm": 0.6872787475585938, + "learning_rate": 0.000315174246731823, + "loss": 1.3087, + "step": 6636 + }, + { + "epoch": 0.7001054852320675, + "grad_norm": 0.7374347448348999, + "learning_rate": 0.00031496971228235464, + "loss": 1.382, + "step": 6637 + }, + { + "epoch": 0.700210970464135, + "grad_norm": 0.7710724472999573, + "learning_rate": 0.00031476522658019916, + "loss": 1.3549, + "step": 6638 + }, + { + "epoch": 0.7003164556962025, + "grad_norm": 0.7258262634277344, + "learning_rate": 0.0003145607896482704, + "loss": 1.3329, + "step": 6639 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.9412587285041809, + "learning_rate": 0.00031435640150947645, + "loss": 1.3314, + "step": 6640 + }, + { + "epoch": 0.7005274261603376, + "grad_norm": 0.674812376499176, + "learning_rate": 0.0003141520621867197, + "loss": 1.3441, + "step": 6641 + }, + { + "epoch": 0.700632911392405, + "grad_norm": 0.9067224860191345, + "learning_rate": 0.00031394777170289806, + "loss": 1.3424, + "step": 6642 + }, + { + "epoch": 0.7007383966244726, + "grad_norm": 0.8538034558296204, + "learning_rate": 0.00031374353008090285, + "loss": 1.3625, + "step": 6643 + }, + { + "epoch": 0.7008438818565401, + "grad_norm": 0.7196929454803467, + "learning_rate": 0.0003135393373436206, + "loss": 1.3086, + "step": 6644 + }, + { + "epoch": 0.7009493670886076, + "grad_norm": 0.8861596584320068, + "learning_rate": 0.0003133351935139319, + "loss": 1.3164, + "step": 6645 + }, + { + "epoch": 0.7010548523206751, + "grad_norm": 0.7066764831542969, + "learning_rate": 0.00031313109861471223, + "loss": 1.3249, + "step": 6646 + }, + { + "epoch": 0.7011603375527427, + "grad_norm": 0.6867622137069702, + "learning_rate": 0.0003129270526688313, + "loss": 1.3588, + "step": 6647 + }, + { + "epoch": 0.7012658227848101, + "grad_norm": 0.846947431564331, + "learning_rate": 0.0003127230556991536, + "loss": 1.384, + "step": 6648 + }, + { + "epoch": 0.7013713080168776, + "grad_norm": 0.6649790406227112, + "learning_rate": 0.000312519107728538, + "loss": 1.293, + "step": 6649 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.6996504664421082, + "learning_rate": 0.0003123152087798376, + "loss": 1.3655, + "step": 6650 + }, + { + "epoch": 0.7015822784810126, + "grad_norm": 0.7329871654510498, + "learning_rate": 0.00031211135887590074, + "loss": 1.3354, + "step": 6651 + }, + { + "epoch": 0.7016877637130802, + "grad_norm": 0.7470362186431885, + "learning_rate": 0.0003119075580395697, + "loss": 1.3674, + "step": 6652 + }, + { + "epoch": 0.7017932489451477, + "grad_norm": 0.7193159461021423, + "learning_rate": 0.0003117038062936813, + "loss": 1.3233, + "step": 6653 + }, + { + "epoch": 0.7018987341772152, + "grad_norm": 0.7121776938438416, + "learning_rate": 0.0003115001036610669, + "loss": 1.3539, + "step": 6654 + }, + { + "epoch": 0.7020042194092827, + "grad_norm": 0.7297849059104919, + "learning_rate": 0.0003112964501645525, + "loss": 1.3413, + "step": 6655 + }, + { + "epoch": 0.7021097046413503, + "grad_norm": 0.7600197792053223, + "learning_rate": 0.0003110928458269584, + "loss": 1.3222, + "step": 6656 + }, + { + "epoch": 0.7022151898734177, + "grad_norm": 0.7216821312904358, + "learning_rate": 0.00031088929067109945, + "loss": 1.3435, + "step": 6657 + }, + { + "epoch": 0.7023206751054852, + "grad_norm": 0.7107782959938049, + "learning_rate": 0.0003106857847197849, + "loss": 1.372, + "step": 6658 + }, + { + "epoch": 0.7024261603375528, + "grad_norm": 0.7286760210990906, + "learning_rate": 0.0003104823279958191, + "loss": 1.339, + "step": 6659 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.6875558495521545, + "learning_rate": 0.00031027892052200003, + "loss": 1.317, + "step": 6660 + }, + { + "epoch": 0.7026371308016878, + "grad_norm": 0.7208929061889648, + "learning_rate": 0.0003100755623211205, + "loss": 1.3155, + "step": 6661 + }, + { + "epoch": 0.7027426160337553, + "grad_norm": 0.6967840790748596, + "learning_rate": 0.000309872253415968, + "loss": 1.3503, + "step": 6662 + }, + { + "epoch": 0.7028481012658228, + "grad_norm": 0.6720214486122131, + "learning_rate": 0.00030966899382932404, + "loss": 1.3409, + "step": 6663 + }, + { + "epoch": 0.7029535864978903, + "grad_norm": 0.669586718082428, + "learning_rate": 0.0003094657835839651, + "loss": 1.3213, + "step": 6664 + }, + { + "epoch": 0.7030590717299579, + "grad_norm": 0.6862291693687439, + "learning_rate": 0.00030926262270266177, + "loss": 1.3402, + "step": 6665 + }, + { + "epoch": 0.7031645569620253, + "grad_norm": 0.7047966718673706, + "learning_rate": 0.00030905951120817934, + "loss": 1.3427, + "step": 6666 + }, + { + "epoch": 0.7032700421940928, + "grad_norm": 0.692488431930542, + "learning_rate": 0.00030885644912327713, + "loss": 1.3387, + "step": 6667 + }, + { + "epoch": 0.7033755274261604, + "grad_norm": 0.667799174785614, + "learning_rate": 0.0003086534364707097, + "loss": 1.3406, + "step": 6668 + }, + { + "epoch": 0.7034810126582278, + "grad_norm": 0.6907200217247009, + "learning_rate": 0.00030845047327322556, + "loss": 1.3754, + "step": 6669 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.6694311499595642, + "learning_rate": 0.0003082475595535677, + "loss": 1.3655, + "step": 6670 + }, + { + "epoch": 0.7036919831223629, + "grad_norm": 0.725721001625061, + "learning_rate": 0.0003080446953344735, + "loss": 1.3784, + "step": 6671 + }, + { + "epoch": 0.7037974683544304, + "grad_norm": 0.7143324017524719, + "learning_rate": 0.000307841880638675, + "loss": 1.3354, + "step": 6672 + }, + { + "epoch": 0.7039029535864979, + "grad_norm": 0.8279668688774109, + "learning_rate": 0.0003076391154888985, + "loss": 1.3657, + "step": 6673 + }, + { + "epoch": 0.7040084388185655, + "grad_norm": 0.6760342717170715, + "learning_rate": 0.000307436399907865, + "loss": 1.3792, + "step": 6674 + }, + { + "epoch": 0.7041139240506329, + "grad_norm": 0.7371913194656372, + "learning_rate": 0.00030723373391828966, + "loss": 1.3166, + "step": 6675 + }, + { + "epoch": 0.7042194092827004, + "grad_norm": 0.6686314940452576, + "learning_rate": 0.00030703111754288204, + "loss": 1.3467, + "step": 6676 + }, + { + "epoch": 0.704324894514768, + "grad_norm": 0.7261573076248169, + "learning_rate": 0.0003068285508043467, + "loss": 1.3416, + "step": 6677 + }, + { + "epoch": 0.7044303797468354, + "grad_norm": 0.7308847904205322, + "learning_rate": 0.00030662603372538224, + "loss": 1.355, + "step": 6678 + }, + { + "epoch": 0.704535864978903, + "grad_norm": 0.7195906639099121, + "learning_rate": 0.0003064235663286815, + "loss": 1.3023, + "step": 6679 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.6755917072296143, + "learning_rate": 0.00030622114863693205, + "loss": 1.3171, + "step": 6680 + }, + { + "epoch": 0.704746835443038, + "grad_norm": 0.6675183176994324, + "learning_rate": 0.00030601878067281575, + "loss": 1.3424, + "step": 6681 + }, + { + "epoch": 0.7048523206751055, + "grad_norm": 0.7260431051254272, + "learning_rate": 0.00030581646245900895, + "loss": 1.3482, + "step": 6682 + }, + { + "epoch": 0.7049578059071729, + "grad_norm": 0.7447408437728882, + "learning_rate": 0.0003056141940181825, + "loss": 1.3181, + "step": 6683 + }, + { + "epoch": 0.7050632911392405, + "grad_norm": 0.6649128198623657, + "learning_rate": 0.0003054119753730012, + "loss": 1.2993, + "step": 6684 + }, + { + "epoch": 0.705168776371308, + "grad_norm": 0.6646566390991211, + "learning_rate": 0.00030520980654612527, + "loss": 1.3473, + "step": 6685 + }, + { + "epoch": 0.7052742616033755, + "grad_norm": 0.6800806522369385, + "learning_rate": 0.0003050076875602084, + "loss": 1.3457, + "step": 6686 + }, + { + "epoch": 0.705379746835443, + "grad_norm": 0.6932005286216736, + "learning_rate": 0.0003048056184378991, + "loss": 1.3423, + "step": 6687 + }, + { + "epoch": 0.7054852320675106, + "grad_norm": 0.6778120994567871, + "learning_rate": 0.0003046035992018402, + "loss": 1.3147, + "step": 6688 + }, + { + "epoch": 0.705590717299578, + "grad_norm": 0.6643600463867188, + "learning_rate": 0.00030440162987466896, + "loss": 1.3713, + "step": 6689 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.7982460856437683, + "learning_rate": 0.00030419971047901704, + "loss": 1.3338, + "step": 6690 + }, + { + "epoch": 0.7058016877637131, + "grad_norm": 0.6420358419418335, + "learning_rate": 0.00030399784103751044, + "loss": 1.3428, + "step": 6691 + }, + { + "epoch": 0.7059071729957805, + "grad_norm": 0.6781007647514343, + "learning_rate": 0.0003037960215727699, + "loss": 1.3505, + "step": 6692 + }, + { + "epoch": 0.7060126582278481, + "grad_norm": 0.7151222229003906, + "learning_rate": 0.0003035942521074097, + "loss": 1.361, + "step": 6693 + }, + { + "epoch": 0.7061181434599156, + "grad_norm": 0.6733508706092834, + "learning_rate": 0.0003033925326640398, + "loss": 1.3507, + "step": 6694 + }, + { + "epoch": 0.7062236286919831, + "grad_norm": 0.720792829990387, + "learning_rate": 0.00030319086326526364, + "loss": 1.3319, + "step": 6695 + }, + { + "epoch": 0.7063291139240506, + "grad_norm": 0.7530568242073059, + "learning_rate": 0.00030298924393367923, + "loss": 1.346, + "step": 6696 + }, + { + "epoch": 0.7064345991561182, + "grad_norm": 0.7020817399024963, + "learning_rate": 0.0003027876746918791, + "loss": 1.3163, + "step": 6697 + }, + { + "epoch": 0.7065400843881856, + "grad_norm": 0.7001156806945801, + "learning_rate": 0.00030258615556244995, + "loss": 1.364, + "step": 6698 + }, + { + "epoch": 0.7066455696202532, + "grad_norm": 0.7061696648597717, + "learning_rate": 0.0003023846865679731, + "loss": 1.3, + "step": 6699 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.6464085578918457, + "learning_rate": 0.00030218326773102407, + "loss": 1.3394, + "step": 6700 + }, + { + "epoch": 0.7068565400843881, + "grad_norm": 0.6611272096633911, + "learning_rate": 0.000301981899074173, + "loss": 1.3635, + "step": 6701 + }, + { + "epoch": 0.7069620253164557, + "grad_norm": 0.6904994249343872, + "learning_rate": 0.00030178058061998387, + "loss": 1.3567, + "step": 6702 + }, + { + "epoch": 0.7070675105485232, + "grad_norm": 0.6708048582077026, + "learning_rate": 0.00030157931239101595, + "loss": 1.326, + "step": 6703 + }, + { + "epoch": 0.7071729957805907, + "grad_norm": 0.6767542958259583, + "learning_rate": 0.00030137809440982207, + "loss": 1.3227, + "step": 6704 + }, + { + "epoch": 0.7072784810126582, + "grad_norm": 0.6846287846565247, + "learning_rate": 0.0003011769266989498, + "loss": 1.3625, + "step": 6705 + }, + { + "epoch": 0.7073839662447258, + "grad_norm": 0.7589588761329651, + "learning_rate": 0.0003009758092809409, + "loss": 1.3341, + "step": 6706 + }, + { + "epoch": 0.7074894514767932, + "grad_norm": 0.6722277998924255, + "learning_rate": 0.00030077474217833167, + "loss": 1.3522, + "step": 6707 + }, + { + "epoch": 0.7075949367088608, + "grad_norm": 0.7763364911079407, + "learning_rate": 0.0003005737254136525, + "loss": 1.3225, + "step": 6708 + }, + { + "epoch": 0.7077004219409283, + "grad_norm": 0.6865065097808838, + "learning_rate": 0.0003003727590094285, + "loss": 1.3472, + "step": 6709 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.6746013760566711, + "learning_rate": 0.00030017184298817873, + "loss": 1.3226, + "step": 6710 + }, + { + "epoch": 0.7079113924050633, + "grad_norm": 0.6497450470924377, + "learning_rate": 0.0002999709773724171, + "loss": 1.3148, + "step": 6711 + }, + { + "epoch": 0.7080168776371308, + "grad_norm": 0.6966383457183838, + "learning_rate": 0.00029977016218465154, + "loss": 1.3354, + "step": 6712 + }, + { + "epoch": 0.7081223628691983, + "grad_norm": 0.7672020196914673, + "learning_rate": 0.0002995693974473844, + "loss": 1.3374, + "step": 6713 + }, + { + "epoch": 0.7082278481012658, + "grad_norm": 0.7459972500801086, + "learning_rate": 0.00029936868318311235, + "loss": 1.3351, + "step": 6714 + }, + { + "epoch": 0.7083333333333334, + "grad_norm": 0.7400045394897461, + "learning_rate": 0.00029916801941432637, + "loss": 1.2857, + "step": 6715 + }, + { + "epoch": 0.7084388185654008, + "grad_norm": 0.8035005927085876, + "learning_rate": 0.00029896740616351187, + "loss": 1.3048, + "step": 6716 + }, + { + "epoch": 0.7085443037974684, + "grad_norm": 0.767734944820404, + "learning_rate": 0.00029876684345314853, + "loss": 1.3744, + "step": 6717 + }, + { + "epoch": 0.7086497890295359, + "grad_norm": 0.8328319787979126, + "learning_rate": 0.00029856633130571046, + "loss": 1.3067, + "step": 6718 + }, + { + "epoch": 0.7087552742616033, + "grad_norm": 0.6826788783073425, + "learning_rate": 0.00029836586974366574, + "loss": 1.3469, + "step": 6719 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.811160147190094, + "learning_rate": 0.00029816545878947763, + "loss": 1.3619, + "step": 6720 + }, + { + "epoch": 0.7089662447257384, + "grad_norm": 0.7497610449790955, + "learning_rate": 0.00029796509846560294, + "loss": 1.3334, + "step": 6721 + }, + { + "epoch": 0.7090717299578059, + "grad_norm": 0.6742927432060242, + "learning_rate": 0.00029776478879449305, + "loss": 1.3716, + "step": 6722 + }, + { + "epoch": 0.7091772151898734, + "grad_norm": 0.6665036082267761, + "learning_rate": 0.0002975645297985935, + "loss": 1.3398, + "step": 6723 + }, + { + "epoch": 0.709282700421941, + "grad_norm": 0.9047197699546814, + "learning_rate": 0.0002973643215003445, + "loss": 1.3343, + "step": 6724 + }, + { + "epoch": 0.7093881856540084, + "grad_norm": 0.705837607383728, + "learning_rate": 0.0002971641639221804, + "loss": 1.3279, + "step": 6725 + }, + { + "epoch": 0.709493670886076, + "grad_norm": 0.7633739113807678, + "learning_rate": 0.00029696405708652966, + "loss": 1.3468, + "step": 6726 + }, + { + "epoch": 0.7095991561181435, + "grad_norm": 0.6783986687660217, + "learning_rate": 0.00029676400101581545, + "loss": 1.3368, + "step": 6727 + }, + { + "epoch": 0.7097046413502109, + "grad_norm": 0.6655752062797546, + "learning_rate": 0.0002965639957324546, + "loss": 1.3574, + "step": 6728 + }, + { + "epoch": 0.7098101265822785, + "grad_norm": 0.6935710310935974, + "learning_rate": 0.00029636404125885936, + "loss": 1.3343, + "step": 6729 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.7092604041099548, + "learning_rate": 0.00029616413761743537, + "loss": 1.3269, + "step": 6730 + }, + { + "epoch": 0.7100210970464135, + "grad_norm": 0.710844874382019, + "learning_rate": 0.0002959642848305828, + "loss": 1.3574, + "step": 6731 + }, + { + "epoch": 0.710126582278481, + "grad_norm": 0.6646975874900818, + "learning_rate": 0.0002957644829206961, + "loss": 1.3383, + "step": 6732 + }, + { + "epoch": 0.7102320675105486, + "grad_norm": 0.8473820686340332, + "learning_rate": 0.0002955647319101641, + "loss": 1.3475, + "step": 6733 + }, + { + "epoch": 0.710337552742616, + "grad_norm": 0.6841672658920288, + "learning_rate": 0.00029536503182137, + "loss": 1.3174, + "step": 6734 + }, + { + "epoch": 0.7104430379746836, + "grad_norm": 0.6553299427032471, + "learning_rate": 0.00029516538267669096, + "loss": 1.344, + "step": 6735 + }, + { + "epoch": 0.7105485232067511, + "grad_norm": 0.842616856098175, + "learning_rate": 0.00029496578449849867, + "loss": 1.3582, + "step": 6736 + }, + { + "epoch": 0.7106540084388185, + "grad_norm": 0.7105711102485657, + "learning_rate": 0.00029476623730915943, + "loss": 1.3179, + "step": 6737 + }, + { + "epoch": 0.7107594936708861, + "grad_norm": 0.6595862507820129, + "learning_rate": 0.00029456674113103335, + "loss": 1.3472, + "step": 6738 + }, + { + "epoch": 0.7108649789029536, + "grad_norm": 0.7161896228790283, + "learning_rate": 0.00029436729598647483, + "loss": 1.3755, + "step": 6739 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.8191161155700684, + "learning_rate": 0.00029416790189783286, + "loss": 1.3315, + "step": 6740 + }, + { + "epoch": 0.7110759493670886, + "grad_norm": 0.7397555708885193, + "learning_rate": 0.00029396855888745045, + "loss": 1.3394, + "step": 6741 + }, + { + "epoch": 0.7111814345991562, + "grad_norm": 0.6892467141151428, + "learning_rate": 0.00029376926697766495, + "loss": 1.3439, + "step": 6742 + }, + { + "epoch": 0.7112869198312236, + "grad_norm": 0.6617898941040039, + "learning_rate": 0.00029357002619080814, + "loss": 1.324, + "step": 6743 + }, + { + "epoch": 0.7113924050632912, + "grad_norm": 0.7001317739486694, + "learning_rate": 0.0002933708365492058, + "loss": 1.3142, + "step": 6744 + }, + { + "epoch": 0.7114978902953587, + "grad_norm": 0.6644605994224548, + "learning_rate": 0.00029317169807517785, + "loss": 1.378, + "step": 6745 + }, + { + "epoch": 0.7116033755274261, + "grad_norm": 0.7040655612945557, + "learning_rate": 0.00029297261079103945, + "loss": 1.3601, + "step": 6746 + }, + { + "epoch": 0.7117088607594937, + "grad_norm": 0.6650591492652893, + "learning_rate": 0.000292773574719099, + "loss": 1.3569, + "step": 6747 + }, + { + "epoch": 0.7118143459915611, + "grad_norm": 0.7119704484939575, + "learning_rate": 0.0002925745898816594, + "loss": 1.3431, + "step": 6748 + }, + { + "epoch": 0.7119198312236287, + "grad_norm": 0.6876219511032104, + "learning_rate": 0.0002923756563010179, + "loss": 1.3746, + "step": 6749 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.6692550182342529, + "learning_rate": 0.000292176773999466, + "loss": 1.3401, + "step": 6750 + }, + { + "epoch": 0.7121308016877637, + "grad_norm": 0.7636682987213135, + "learning_rate": 0.0002919779429992895, + "loss": 1.3148, + "step": 6751 + }, + { + "epoch": 0.7122362869198312, + "grad_norm": 0.6432097554206848, + "learning_rate": 0.0002917791633227685, + "loss": 1.3234, + "step": 6752 + }, + { + "epoch": 0.7123417721518988, + "grad_norm": 0.6831169128417969, + "learning_rate": 0.000291580434992177, + "loss": 1.3289, + "step": 6753 + }, + { + "epoch": 0.7124472573839662, + "grad_norm": 0.7837585806846619, + "learning_rate": 0.00029138175802978343, + "loss": 1.385, + "step": 6754 + }, + { + "epoch": 0.7125527426160337, + "grad_norm": 0.6702820658683777, + "learning_rate": 0.00029118313245785104, + "loss": 1.3399, + "step": 6755 + }, + { + "epoch": 0.7126582278481013, + "grad_norm": 0.8149198293685913, + "learning_rate": 0.00029098455829863653, + "loss": 1.304, + "step": 6756 + }, + { + "epoch": 0.7127637130801687, + "grad_norm": 0.6894854307174683, + "learning_rate": 0.0002907860355743911, + "loss": 1.3537, + "step": 6757 + }, + { + "epoch": 0.7128691983122363, + "grad_norm": 0.7407640218734741, + "learning_rate": 0.00029058756430736025, + "loss": 1.3611, + "step": 6758 + }, + { + "epoch": 0.7129746835443038, + "grad_norm": 0.7014791369438171, + "learning_rate": 0.0002903891445197836, + "loss": 1.3544, + "step": 6759 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.7537114024162292, + "learning_rate": 0.0002901907762338952, + "loss": 1.3237, + "step": 6760 + }, + { + "epoch": 0.7131856540084388, + "grad_norm": 0.7278181314468384, + "learning_rate": 0.0002899924594719231, + "loss": 1.3567, + "step": 6761 + }, + { + "epoch": 0.7132911392405064, + "grad_norm": 0.6522688269615173, + "learning_rate": 0.0002897941942560894, + "loss": 1.3292, + "step": 6762 + }, + { + "epoch": 0.7133966244725738, + "grad_norm": 0.7334771156311035, + "learning_rate": 0.0002895959806086114, + "loss": 1.3498, + "step": 6763 + }, + { + "epoch": 0.7135021097046413, + "grad_norm": 0.9502294063568115, + "learning_rate": 0.0002893978185516995, + "loss": 1.3513, + "step": 6764 + }, + { + "epoch": 0.7136075949367089, + "grad_norm": 0.6888124346733093, + "learning_rate": 0.00028919970810755883, + "loss": 1.3571, + "step": 6765 + }, + { + "epoch": 0.7137130801687763, + "grad_norm": 0.9627870917320251, + "learning_rate": 0.0002890016492983886, + "loss": 1.3053, + "step": 6766 + }, + { + "epoch": 0.7138185654008439, + "grad_norm": 0.6996039152145386, + "learning_rate": 0.0002888036421463823, + "loss": 1.3285, + "step": 6767 + }, + { + "epoch": 0.7139240506329114, + "grad_norm": 0.7271669507026672, + "learning_rate": 0.0002886056866737277, + "loss": 1.3417, + "step": 6768 + }, + { + "epoch": 0.7140295358649789, + "grad_norm": 0.7866869568824768, + "learning_rate": 0.0002884077829026066, + "loss": 1.3561, + "step": 6769 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.6726711988449097, + "learning_rate": 0.0002882099308551951, + "loss": 1.3428, + "step": 6770 + }, + { + "epoch": 0.714240506329114, + "grad_norm": 0.6982660889625549, + "learning_rate": 0.00028801213055366335, + "loss": 1.399, + "step": 6771 + }, + { + "epoch": 0.7143459915611814, + "grad_norm": 0.670466423034668, + "learning_rate": 0.00028781438202017613, + "loss": 1.3313, + "step": 6772 + }, + { + "epoch": 0.7144514767932489, + "grad_norm": 0.6880061030387878, + "learning_rate": 0.0002876166852768923, + "loss": 1.3596, + "step": 6773 + }, + { + "epoch": 0.7145569620253165, + "grad_norm": 0.7545713782310486, + "learning_rate": 0.0002874190403459644, + "loss": 1.3041, + "step": 6774 + }, + { + "epoch": 0.7146624472573839, + "grad_norm": 0.6517192125320435, + "learning_rate": 0.0002872214472495397, + "loss": 1.3231, + "step": 6775 + }, + { + "epoch": 0.7147679324894515, + "grad_norm": 0.7664575576782227, + "learning_rate": 0.00028702390600975937, + "loss": 1.3121, + "step": 6776 + }, + { + "epoch": 0.714873417721519, + "grad_norm": 0.6818080544471741, + "learning_rate": 0.0002868264166487591, + "loss": 1.3382, + "step": 6777 + }, + { + "epoch": 0.7149789029535865, + "grad_norm": 0.6515604257583618, + "learning_rate": 0.0002866289791886684, + "loss": 1.3529, + "step": 6778 + }, + { + "epoch": 0.715084388185654, + "grad_norm": 0.7634026408195496, + "learning_rate": 0.00028643159365161113, + "loss": 1.3405, + "step": 6779 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.6865857839584351, + "learning_rate": 0.00028623426005970517, + "loss": 1.3479, + "step": 6780 + }, + { + "epoch": 0.715295358649789, + "grad_norm": 0.6865618824958801, + "learning_rate": 0.00028603697843506315, + "loss": 1.3677, + "step": 6781 + }, + { + "epoch": 0.7154008438818565, + "grad_norm": 0.7267800569534302, + "learning_rate": 0.00028583974879979113, + "loss": 1.3303, + "step": 6782 + }, + { + "epoch": 0.7155063291139241, + "grad_norm": 0.6934226751327515, + "learning_rate": 0.00028564257117598993, + "loss": 1.3239, + "step": 6783 + }, + { + "epoch": 0.7156118143459915, + "grad_norm": 0.9101791977882385, + "learning_rate": 0.00028544544558575395, + "loss": 1.3765, + "step": 6784 + }, + { + "epoch": 0.7157172995780591, + "grad_norm": 0.7432423233985901, + "learning_rate": 0.0002852483720511724, + "loss": 1.3553, + "step": 6785 + }, + { + "epoch": 0.7158227848101266, + "grad_norm": 0.9506931304931641, + "learning_rate": 0.0002850513505943281, + "loss": 1.3077, + "step": 6786 + }, + { + "epoch": 0.7159282700421941, + "grad_norm": 0.8364480137825012, + "learning_rate": 0.0002848543812372986, + "loss": 1.3275, + "step": 6787 + }, + { + "epoch": 0.7160337552742616, + "grad_norm": 0.6796891689300537, + "learning_rate": 0.00028465746400215463, + "loss": 1.3481, + "step": 6788 + }, + { + "epoch": 0.7161392405063292, + "grad_norm": 0.726295530796051, + "learning_rate": 0.00028446059891096265, + "loss": 1.327, + "step": 6789 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.8656126856803894, + "learning_rate": 0.00028426378598578187, + "loss": 1.3813, + "step": 6790 + }, + { + "epoch": 0.7163502109704641, + "grad_norm": 0.6643029451370239, + "learning_rate": 0.0002840670252486662, + "loss": 1.3322, + "step": 6791 + }, + { + "epoch": 0.7164556962025317, + "grad_norm": 0.8265358805656433, + "learning_rate": 0.00028387031672166385, + "loss": 1.3379, + "step": 6792 + }, + { + "epoch": 0.7165611814345991, + "grad_norm": 0.7817733287811279, + "learning_rate": 0.0002836736604268167, + "loss": 1.3499, + "step": 6793 + }, + { + "epoch": 0.7166666666666667, + "grad_norm": 0.7717934846878052, + "learning_rate": 0.0002834770563861613, + "loss": 1.3275, + "step": 6794 + }, + { + "epoch": 0.7167721518987342, + "grad_norm": 0.664434015750885, + "learning_rate": 0.000283280504621728, + "loss": 1.3363, + "step": 6795 + }, + { + "epoch": 0.7168776371308017, + "grad_norm": 0.7503387928009033, + "learning_rate": 0.0002830840051555414, + "loss": 1.2922, + "step": 6796 + }, + { + "epoch": 0.7169831223628692, + "grad_norm": 0.7137969732284546, + "learning_rate": 0.00028288755800962, + "loss": 1.3351, + "step": 6797 + }, + { + "epoch": 0.7170886075949368, + "grad_norm": 0.6570005416870117, + "learning_rate": 0.00028269116320597733, + "loss": 1.3381, + "step": 6798 + }, + { + "epoch": 0.7171940928270042, + "grad_norm": 0.8839870691299438, + "learning_rate": 0.0002824948207666199, + "loss": 1.3646, + "step": 6799 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.6736231446266174, + "learning_rate": 0.0002822985307135491, + "loss": 1.3056, + "step": 6800 + }, + { + "epoch": 0.7174050632911393, + "grad_norm": 0.8214830756187439, + "learning_rate": 0.00028210229306876, + "loss": 1.3759, + "step": 6801 + }, + { + "epoch": 0.7175105485232067, + "grad_norm": 0.7100083827972412, + "learning_rate": 0.0002819061078542422, + "loss": 1.3242, + "step": 6802 + }, + { + "epoch": 0.7176160337552743, + "grad_norm": 0.6726813316345215, + "learning_rate": 0.0002817099750919791, + "loss": 1.3428, + "step": 6803 + }, + { + "epoch": 0.7177215189873418, + "grad_norm": 0.6960775256156921, + "learning_rate": 0.0002815138948039485, + "loss": 1.2993, + "step": 6804 + }, + { + "epoch": 0.7178270042194093, + "grad_norm": 0.7249703407287598, + "learning_rate": 0.000281317867012122, + "loss": 1.3597, + "step": 6805 + }, + { + "epoch": 0.7179324894514768, + "grad_norm": 0.7405743598937988, + "learning_rate": 0.0002811218917384652, + "loss": 1.3542, + "step": 6806 + }, + { + "epoch": 0.7180379746835444, + "grad_norm": 0.7836902141571045, + "learning_rate": 0.00028092596900493885, + "loss": 1.3212, + "step": 6807 + }, + { + "epoch": 0.7181434599156118, + "grad_norm": 1.009305715560913, + "learning_rate": 0.00028073009883349665, + "loss": 1.3637, + "step": 6808 + }, + { + "epoch": 0.7182489451476793, + "grad_norm": 0.6932045221328735, + "learning_rate": 0.00028053428124608684, + "loss": 1.3437, + "step": 6809 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.8421967029571533, + "learning_rate": 0.0002803385162646518, + "loss": 1.281, + "step": 6810 + }, + { + "epoch": 0.7184599156118143, + "grad_norm": 0.725018322467804, + "learning_rate": 0.0002801428039111279, + "loss": 1.3645, + "step": 6811 + }, + { + "epoch": 0.7185654008438819, + "grad_norm": 0.7439923882484436, + "learning_rate": 0.0002799471442074459, + "loss": 1.359, + "step": 6812 + }, + { + "epoch": 0.7186708860759494, + "grad_norm": 0.8386185169219971, + "learning_rate": 0.00027975153717553014, + "loss": 1.3475, + "step": 6813 + }, + { + "epoch": 0.7187763713080169, + "grad_norm": 0.6986480355262756, + "learning_rate": 0.00027955598283729936, + "loss": 1.3142, + "step": 6814 + }, + { + "epoch": 0.7188818565400844, + "grad_norm": 0.6610730886459351, + "learning_rate": 0.00027936048121466673, + "loss": 1.3518, + "step": 6815 + }, + { + "epoch": 0.7189873417721518, + "grad_norm": 0.7606436014175415, + "learning_rate": 0.00027916503232953895, + "loss": 1.3334, + "step": 6816 + }, + { + "epoch": 0.7190928270042194, + "grad_norm": 0.7210558652877808, + "learning_rate": 0.0002789696362038172, + "loss": 1.3357, + "step": 6817 + }, + { + "epoch": 0.7191983122362869, + "grad_norm": 0.694450855255127, + "learning_rate": 0.0002787742928593965, + "loss": 1.3258, + "step": 6818 + }, + { + "epoch": 0.7193037974683544, + "grad_norm": 0.7453288435935974, + "learning_rate": 0.00027857900231816594, + "loss": 1.3254, + "step": 6819 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.7144908905029297, + "learning_rate": 0.0002783837646020089, + "loss": 1.3433, + "step": 6820 + }, + { + "epoch": 0.7195147679324895, + "grad_norm": 0.6988887786865234, + "learning_rate": 0.00027818857973280274, + "loss": 1.3462, + "step": 6821 + }, + { + "epoch": 0.7196202531645569, + "grad_norm": 0.9473550319671631, + "learning_rate": 0.0002779934477324189, + "loss": 1.393, + "step": 6822 + }, + { + "epoch": 0.7197257383966245, + "grad_norm": 0.6871075630187988, + "learning_rate": 0.0002777983686227226, + "loss": 1.3012, + "step": 6823 + }, + { + "epoch": 0.719831223628692, + "grad_norm": 0.738423228263855, + "learning_rate": 0.00027760334242557397, + "loss": 1.3371, + "step": 6824 + }, + { + "epoch": 0.7199367088607594, + "grad_norm": 0.825249433517456, + "learning_rate": 0.00027740836916282643, + "loss": 1.3414, + "step": 6825 + }, + { + "epoch": 0.720042194092827, + "grad_norm": 0.7514417767524719, + "learning_rate": 0.00027721344885632765, + "loss": 1.3126, + "step": 6826 + }, + { + "epoch": 0.7201476793248945, + "grad_norm": 0.7421851754188538, + "learning_rate": 0.0002770185815279195, + "loss": 1.3734, + "step": 6827 + }, + { + "epoch": 0.720253164556962, + "grad_norm": 0.6649591326713562, + "learning_rate": 0.0002768237671994377, + "loss": 1.3338, + "step": 6828 + }, + { + "epoch": 0.7203586497890295, + "grad_norm": 0.7038993835449219, + "learning_rate": 0.0002766290058927123, + "loss": 1.3215, + "step": 6829 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6709404587745667, + "learning_rate": 0.0002764342976295673, + "loss": 1.3341, + "step": 6830 + }, + { + "epoch": 0.7205696202531645, + "grad_norm": 0.7334896326065063, + "learning_rate": 0.0002762396424318206, + "loss": 1.3462, + "step": 6831 + }, + { + "epoch": 0.7206751054852321, + "grad_norm": 0.7224386930465698, + "learning_rate": 0.000276045040321284, + "loss": 1.3168, + "step": 6832 + }, + { + "epoch": 0.7207805907172996, + "grad_norm": 0.6879796385765076, + "learning_rate": 0.0002758504913197644, + "loss": 1.3289, + "step": 6833 + }, + { + "epoch": 0.720886075949367, + "grad_norm": 0.6520503163337708, + "learning_rate": 0.0002756559954490615, + "loss": 1.3209, + "step": 6834 + }, + { + "epoch": 0.7209915611814346, + "grad_norm": 0.7964606285095215, + "learning_rate": 0.0002754615527309696, + "loss": 1.3288, + "step": 6835 + }, + { + "epoch": 0.7210970464135021, + "grad_norm": 0.7710965275764465, + "learning_rate": 0.000275267163187277, + "loss": 1.3228, + "step": 6836 + }, + { + "epoch": 0.7212025316455696, + "grad_norm": 0.7303925156593323, + "learning_rate": 0.00027507282683976594, + "loss": 1.3236, + "step": 6837 + }, + { + "epoch": 0.7213080168776371, + "grad_norm": 0.6611430644989014, + "learning_rate": 0.0002748785437102129, + "loss": 1.3457, + "step": 6838 + }, + { + "epoch": 0.7214135021097047, + "grad_norm": 0.6862860321998596, + "learning_rate": 0.00027468431382038816, + "loss": 1.3176, + "step": 6839 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.8282113075256348, + "learning_rate": 0.00027449013719205623, + "loss": 1.314, + "step": 6840 + }, + { + "epoch": 0.7216244725738397, + "grad_norm": 0.7165990471839905, + "learning_rate": 0.00027429601384697526, + "loss": 1.3219, + "step": 6841 + }, + { + "epoch": 0.7217299578059072, + "grad_norm": 0.7700639367103577, + "learning_rate": 0.00027410194380689826, + "loss": 1.3394, + "step": 6842 + }, + { + "epoch": 0.7218354430379746, + "grad_norm": 0.7984527945518494, + "learning_rate": 0.00027390792709357155, + "loss": 1.3273, + "step": 6843 + }, + { + "epoch": 0.7219409282700422, + "grad_norm": 0.6936763525009155, + "learning_rate": 0.00027371396372873557, + "loss": 1.3518, + "step": 6844 + }, + { + "epoch": 0.7220464135021097, + "grad_norm": 0.79227614402771, + "learning_rate": 0.00027352005373412487, + "loss": 1.3352, + "step": 6845 + }, + { + "epoch": 0.7221518987341772, + "grad_norm": 0.702794075012207, + "learning_rate": 0.00027332619713146816, + "loss": 1.3559, + "step": 6846 + }, + { + "epoch": 0.7222573839662447, + "grad_norm": 0.676999032497406, + "learning_rate": 0.000273132393942488, + "loss": 1.3449, + "step": 6847 + }, + { + "epoch": 0.7223628691983123, + "grad_norm": 0.7484561800956726, + "learning_rate": 0.000272938644188901, + "loss": 1.317, + "step": 6848 + }, + { + "epoch": 0.7224683544303797, + "grad_norm": 0.708398699760437, + "learning_rate": 0.00027274494789241766, + "loss": 1.3449, + "step": 6849 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.6499912142753601, + "learning_rate": 0.00027255130507474276, + "loss": 1.3119, + "step": 6850 + }, + { + "epoch": 0.7226793248945148, + "grad_norm": 0.7858952283859253, + "learning_rate": 0.00027235771575757466, + "loss": 1.3467, + "step": 6851 + }, + { + "epoch": 0.7227848101265822, + "grad_norm": 0.6999127864837646, + "learning_rate": 0.00027216417996260654, + "loss": 1.353, + "step": 6852 + }, + { + "epoch": 0.7228902953586498, + "grad_norm": 0.7746074795722961, + "learning_rate": 0.00027197069771152464, + "loss": 1.3215, + "step": 6853 + }, + { + "epoch": 0.7229957805907173, + "grad_norm": 0.6709668636322021, + "learning_rate": 0.0002717772690260098, + "loss": 1.3326, + "step": 6854 + }, + { + "epoch": 0.7231012658227848, + "grad_norm": 0.7159369587898254, + "learning_rate": 0.0002715838939277366, + "loss": 1.2912, + "step": 6855 + }, + { + "epoch": 0.7232067510548523, + "grad_norm": 0.6679193377494812, + "learning_rate": 0.0002713905724383737, + "loss": 1.3274, + "step": 6856 + }, + { + "epoch": 0.7233122362869199, + "grad_norm": 0.7449219226837158, + "learning_rate": 0.00027119730457958376, + "loss": 1.2874, + "step": 6857 + }, + { + "epoch": 0.7234177215189873, + "grad_norm": 0.7574183940887451, + "learning_rate": 0.0002710040903730233, + "loss": 1.3629, + "step": 6858 + }, + { + "epoch": 0.7235232067510549, + "grad_norm": 0.656396210193634, + "learning_rate": 0.00027081092984034303, + "loss": 1.3618, + "step": 6859 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.7165781855583191, + "learning_rate": 0.00027061782300318726, + "loss": 1.3181, + "step": 6860 + }, + { + "epoch": 0.7237341772151898, + "grad_norm": 0.7117987871170044, + "learning_rate": 0.0002704247698831951, + "loss": 1.2912, + "step": 6861 + }, + { + "epoch": 0.7238396624472574, + "grad_norm": 0.7078778147697449, + "learning_rate": 0.00027023177050199885, + "loss": 1.3653, + "step": 6862 + }, + { + "epoch": 0.7239451476793249, + "grad_norm": 0.80256587266922, + "learning_rate": 0.00027003882488122507, + "loss": 1.3387, + "step": 6863 + }, + { + "epoch": 0.7240506329113924, + "grad_norm": 0.6978937983512878, + "learning_rate": 0.0002698459330424942, + "loss": 1.3581, + "step": 6864 + }, + { + "epoch": 0.7241561181434599, + "grad_norm": 0.6878658533096313, + "learning_rate": 0.0002696530950074208, + "loss": 1.3303, + "step": 6865 + }, + { + "epoch": 0.7242616033755275, + "grad_norm": 0.6931847333908081, + "learning_rate": 0.00026946031079761346, + "loss": 1.321, + "step": 6866 + }, + { + "epoch": 0.7243670886075949, + "grad_norm": 0.6873785257339478, + "learning_rate": 0.00026926758043467435, + "loss": 1.3175, + "step": 6867 + }, + { + "epoch": 0.7244725738396625, + "grad_norm": 0.6500967741012573, + "learning_rate": 0.00026907490394020004, + "loss": 1.3343, + "step": 6868 + }, + { + "epoch": 0.72457805907173, + "grad_norm": 0.6775810718536377, + "learning_rate": 0.00026888228133578086, + "loss": 1.3069, + "step": 6869 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.72084641456604, + "learning_rate": 0.0002686897126430009, + "loss": 1.3168, + "step": 6870 + }, + { + "epoch": 0.724789029535865, + "grad_norm": 0.6642606854438782, + "learning_rate": 0.0002684971978834389, + "loss": 1.3364, + "step": 6871 + }, + { + "epoch": 0.7248945147679325, + "grad_norm": 0.653588593006134, + "learning_rate": 0.00026830473707866684, + "loss": 1.3398, + "step": 6872 + }, + { + "epoch": 0.725, + "grad_norm": 0.7602398991584778, + "learning_rate": 0.00026811233025025096, + "loss": 1.3291, + "step": 6873 + }, + { + "epoch": 0.7251054852320675, + "grad_norm": 0.7114037871360779, + "learning_rate": 0.00026791997741975134, + "loss": 1.3321, + "step": 6874 + }, + { + "epoch": 0.7252109704641351, + "grad_norm": 0.8714654445648193, + "learning_rate": 0.00026772767860872216, + "loss": 1.3285, + "step": 6875 + }, + { + "epoch": 0.7253164556962025, + "grad_norm": 0.7026551365852356, + "learning_rate": 0.00026753543383871143, + "loss": 1.3274, + "step": 6876 + }, + { + "epoch": 0.7254219409282701, + "grad_norm": 0.6976476311683655, + "learning_rate": 0.0002673432431312611, + "loss": 1.3205, + "step": 6877 + }, + { + "epoch": 0.7255274261603376, + "grad_norm": 0.9343953132629395, + "learning_rate": 0.0002671511065079071, + "loss": 1.335, + "step": 6878 + }, + { + "epoch": 0.725632911392405, + "grad_norm": 0.7330653667449951, + "learning_rate": 0.00026695902399017935, + "loss": 1.3668, + "step": 6879 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.7529540061950684, + "learning_rate": 0.00026676699559960145, + "loss": 1.3407, + "step": 6880 + }, + { + "epoch": 0.72584388185654, + "grad_norm": 0.7241541743278503, + "learning_rate": 0.0002665750213576914, + "loss": 1.3286, + "step": 6881 + }, + { + "epoch": 0.7259493670886076, + "grad_norm": 0.6807400584220886, + "learning_rate": 0.0002663831012859609, + "loss": 1.3294, + "step": 6882 + }, + { + "epoch": 0.7260548523206751, + "grad_norm": 0.8603132963180542, + "learning_rate": 0.0002661912354059154, + "loss": 1.3259, + "step": 6883 + }, + { + "epoch": 0.7261603375527426, + "grad_norm": 0.7059510350227356, + "learning_rate": 0.0002659994237390545, + "loss": 1.3277, + "step": 6884 + }, + { + "epoch": 0.7262658227848101, + "grad_norm": 0.8036796450614929, + "learning_rate": 0.0002658076663068715, + "loss": 1.3638, + "step": 6885 + }, + { + "epoch": 0.7263713080168777, + "grad_norm": 0.9450075626373291, + "learning_rate": 0.00026561596313085396, + "loss": 1.3, + "step": 6886 + }, + { + "epoch": 0.7264767932489451, + "grad_norm": 0.7622621059417725, + "learning_rate": 0.00026542431423248313, + "loss": 1.3321, + "step": 6887 + }, + { + "epoch": 0.7265822784810126, + "grad_norm": 0.8093775510787964, + "learning_rate": 0.00026523271963323414, + "loss": 1.3405, + "step": 6888 + }, + { + "epoch": 0.7266877637130802, + "grad_norm": 0.7148992419242859, + "learning_rate": 0.0002650411793545763, + "loss": 1.3136, + "step": 6889 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.6813251376152039, + "learning_rate": 0.00026484969341797224, + "loss": 1.3001, + "step": 6890 + }, + { + "epoch": 0.7268987341772152, + "grad_norm": 0.7324780225753784, + "learning_rate": 0.0002646582618448794, + "loss": 1.3373, + "step": 6891 + }, + { + "epoch": 0.7270042194092827, + "grad_norm": 0.8165459632873535, + "learning_rate": 0.00026446688465674845, + "loss": 1.35, + "step": 6892 + }, + { + "epoch": 0.7271097046413502, + "grad_norm": 0.6848811507225037, + "learning_rate": 0.0002642755618750242, + "loss": 1.334, + "step": 6893 + }, + { + "epoch": 0.7272151898734177, + "grad_norm": 0.8305774331092834, + "learning_rate": 0.0002640842935211453, + "loss": 1.3053, + "step": 6894 + }, + { + "epoch": 0.7273206751054853, + "grad_norm": 0.9923475980758667, + "learning_rate": 0.0002638930796165443, + "loss": 1.2994, + "step": 6895 + }, + { + "epoch": 0.7274261603375527, + "grad_norm": 0.7047543525695801, + "learning_rate": 0.00026370192018264766, + "loss": 1.3209, + "step": 6896 + }, + { + "epoch": 0.7275316455696202, + "grad_norm": 0.7896862030029297, + "learning_rate": 0.00026351081524087573, + "loss": 1.3347, + "step": 6897 + }, + { + "epoch": 0.7276371308016878, + "grad_norm": 0.9751632213592529, + "learning_rate": 0.0002633197648126429, + "loss": 1.3497, + "step": 6898 + }, + { + "epoch": 0.7277426160337552, + "grad_norm": 0.8060071468353271, + "learning_rate": 0.0002631287689193571, + "loss": 1.3239, + "step": 6899 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.6359317898750305, + "learning_rate": 0.0002629378275824204, + "loss": 1.3217, + "step": 6900 + }, + { + "epoch": 0.7279535864978903, + "grad_norm": 0.6959629058837891, + "learning_rate": 0.00026274694082322896, + "loss": 1.3543, + "step": 6901 + }, + { + "epoch": 0.7280590717299578, + "grad_norm": 0.713252604007721, + "learning_rate": 0.00026255610866317253, + "loss": 1.3376, + "step": 6902 + }, + { + "epoch": 0.7281645569620253, + "grad_norm": 0.7153170704841614, + "learning_rate": 0.0002623653311236347, + "loss": 1.3584, + "step": 6903 + }, + { + "epoch": 0.7282700421940929, + "grad_norm": 0.7132186889648438, + "learning_rate": 0.0002621746082259931, + "loss": 1.3278, + "step": 6904 + }, + { + "epoch": 0.7283755274261603, + "grad_norm": 0.7391581535339355, + "learning_rate": 0.0002619839399916192, + "loss": 1.3355, + "step": 6905 + }, + { + "epoch": 0.7284810126582278, + "grad_norm": 0.7701289653778076, + "learning_rate": 0.0002617933264418782, + "loss": 1.3529, + "step": 6906 + }, + { + "epoch": 0.7285864978902954, + "grad_norm": 0.6833544969558716, + "learning_rate": 0.00026160276759812953, + "loss": 1.35, + "step": 6907 + }, + { + "epoch": 0.7286919831223628, + "grad_norm": 0.9117355942726135, + "learning_rate": 0.00026141226348172595, + "loss": 1.3491, + "step": 6908 + }, + { + "epoch": 0.7287974683544304, + "grad_norm": 0.7634113430976868, + "learning_rate": 0.00026122181411401444, + "loss": 1.3984, + "step": 6909 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.6835811138153076, + "learning_rate": 0.00026103141951633617, + "loss": 1.3432, + "step": 6910 + }, + { + "epoch": 0.7290084388185654, + "grad_norm": 0.7903118133544922, + "learning_rate": 0.0002608410797100255, + "loss": 1.357, + "step": 6911 + }, + { + "epoch": 0.7291139240506329, + "grad_norm": 0.8549637794494629, + "learning_rate": 0.000260650794716411, + "loss": 1.3074, + "step": 6912 + }, + { + "epoch": 0.7292194092827005, + "grad_norm": 0.7031216621398926, + "learning_rate": 0.00026046056455681515, + "loss": 1.3438, + "step": 6913 + }, + { + "epoch": 0.7293248945147679, + "grad_norm": 0.8296332359313965, + "learning_rate": 0.00026027038925255407, + "loss": 1.3195, + "step": 6914 + }, + { + "epoch": 0.7294303797468354, + "grad_norm": 0.6612951755523682, + "learning_rate": 0.00026008026882493783, + "loss": 1.3391, + "step": 6915 + }, + { + "epoch": 0.729535864978903, + "grad_norm": 0.7010314464569092, + "learning_rate": 0.00025989020329527057, + "loss": 1.3231, + "step": 6916 + }, + { + "epoch": 0.7296413502109704, + "grad_norm": 0.6472342610359192, + "learning_rate": 0.0002597001926848498, + "loss": 1.3406, + "step": 6917 + }, + { + "epoch": 0.729746835443038, + "grad_norm": 0.7606971859931946, + "learning_rate": 0.00025951023701496713, + "loss": 1.3409, + "step": 6918 + }, + { + "epoch": 0.7298523206751055, + "grad_norm": 0.718278169631958, + "learning_rate": 0.0002593203363069084, + "loss": 1.3324, + "step": 6919 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.6966518759727478, + "learning_rate": 0.00025913049058195277, + "loss": 1.326, + "step": 6920 + }, + { + "epoch": 0.7300632911392405, + "grad_norm": 0.8592552542686462, + "learning_rate": 0.0002589406998613733, + "loss": 1.3446, + "step": 6921 + }, + { + "epoch": 0.7301687763713081, + "grad_norm": 0.673882246017456, + "learning_rate": 0.0002587509641664372, + "loss": 1.3194, + "step": 6922 + }, + { + "epoch": 0.7302742616033755, + "grad_norm": 0.7634445428848267, + "learning_rate": 0.0002585612835184051, + "loss": 1.3385, + "step": 6923 + }, + { + "epoch": 0.730379746835443, + "grad_norm": 0.8340779542922974, + "learning_rate": 0.00025837165793853164, + "loss": 1.3389, + "step": 6924 + }, + { + "epoch": 0.7304852320675106, + "grad_norm": 0.6710879802703857, + "learning_rate": 0.0002581820874480654, + "loss": 1.3329, + "step": 6925 + }, + { + "epoch": 0.730590717299578, + "grad_norm": 0.6998624801635742, + "learning_rate": 0.0002579925720682487, + "loss": 1.3535, + "step": 6926 + }, + { + "epoch": 0.7306962025316456, + "grad_norm": 0.6689631938934326, + "learning_rate": 0.0002578031118203174, + "loss": 1.3264, + "step": 6927 + }, + { + "epoch": 0.7308016877637131, + "grad_norm": 0.6497957706451416, + "learning_rate": 0.00025761370672550203, + "loss": 1.3217, + "step": 6928 + }, + { + "epoch": 0.7309071729957806, + "grad_norm": 0.6849656105041504, + "learning_rate": 0.0002574243568050261, + "loss": 1.3429, + "step": 6929 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.6662910580635071, + "learning_rate": 0.0002572350620801072, + "loss": 1.3721, + "step": 6930 + }, + { + "epoch": 0.7311181434599157, + "grad_norm": 0.6953473091125488, + "learning_rate": 0.0002570458225719567, + "loss": 1.3516, + "step": 6931 + }, + { + "epoch": 0.7312236286919831, + "grad_norm": 0.6482794880867004, + "learning_rate": 0.0002568566383017799, + "loss": 1.3077, + "step": 6932 + }, + { + "epoch": 0.7313291139240506, + "grad_norm": 0.7131636142730713, + "learning_rate": 0.0002566675092907757, + "loss": 1.3685, + "step": 6933 + }, + { + "epoch": 0.7314345991561182, + "grad_norm": 0.7586805820465088, + "learning_rate": 0.0002564784355601372, + "loss": 1.3317, + "step": 6934 + }, + { + "epoch": 0.7315400843881856, + "grad_norm": 0.7300961017608643, + "learning_rate": 0.0002562894171310508, + "loss": 1.3791, + "step": 6935 + }, + { + "epoch": 0.7316455696202532, + "grad_norm": 0.7161966562271118, + "learning_rate": 0.00025610045402469695, + "loss": 1.2897, + "step": 6936 + }, + { + "epoch": 0.7317510548523207, + "grad_norm": 0.7850849032402039, + "learning_rate": 0.0002559115462622503, + "loss": 1.3192, + "step": 6937 + }, + { + "epoch": 0.7318565400843882, + "grad_norm": 0.8455963134765625, + "learning_rate": 0.00025572269386487853, + "loss": 1.3452, + "step": 6938 + }, + { + "epoch": 0.7319620253164557, + "grad_norm": 0.7809805870056152, + "learning_rate": 0.0002555338968537436, + "loss": 1.3511, + "step": 6939 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.7323690056800842, + "learning_rate": 0.0002553451552500012, + "loss": 1.3622, + "step": 6940 + }, + { + "epoch": 0.7321729957805907, + "grad_norm": 0.8063166737556458, + "learning_rate": 0.00025515646907480074, + "loss": 1.3603, + "step": 6941 + }, + { + "epoch": 0.7322784810126582, + "grad_norm": 0.6948785781860352, + "learning_rate": 0.0002549678383492854, + "loss": 1.3018, + "step": 6942 + }, + { + "epoch": 0.7323839662447258, + "grad_norm": 0.6615836024284363, + "learning_rate": 0.00025477926309459224, + "loss": 1.3168, + "step": 6943 + }, + { + "epoch": 0.7324894514767932, + "grad_norm": 0.731411337852478, + "learning_rate": 0.00025459074333185176, + "loss": 1.3248, + "step": 6944 + }, + { + "epoch": 0.7325949367088608, + "grad_norm": 0.6722842454910278, + "learning_rate": 0.0002544022790821891, + "loss": 1.3427, + "step": 6945 + }, + { + "epoch": 0.7327004219409282, + "grad_norm": 0.7167717814445496, + "learning_rate": 0.0002542138703667224, + "loss": 1.3099, + "step": 6946 + }, + { + "epoch": 0.7328059071729958, + "grad_norm": 0.6815075278282166, + "learning_rate": 0.00025402551720656366, + "loss": 1.3421, + "step": 6947 + }, + { + "epoch": 0.7329113924050633, + "grad_norm": 0.6556058526039124, + "learning_rate": 0.0002538372196228189, + "loss": 1.3624, + "step": 6948 + }, + { + "epoch": 0.7330168776371307, + "grad_norm": 0.6521414518356323, + "learning_rate": 0.00025364897763658777, + "loss": 1.3719, + "step": 6949 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.7006349563598633, + "learning_rate": 0.0002534607912689637, + "loss": 1.3354, + "step": 6950 + }, + { + "epoch": 0.7332278481012658, + "grad_norm": 0.7185226678848267, + "learning_rate": 0.00025327266054103395, + "loss": 1.3396, + "step": 6951 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.6556349396705627, + "learning_rate": 0.0002530845854738796, + "loss": 1.3291, + "step": 6952 + }, + { + "epoch": 0.7334388185654008, + "grad_norm": 0.6488032937049866, + "learning_rate": 0.0002528965660885749, + "loss": 1.3226, + "step": 6953 + }, + { + "epoch": 0.7335443037974684, + "grad_norm": 0.6444821953773499, + "learning_rate": 0.00025270860240618904, + "loss": 1.3329, + "step": 6954 + }, + { + "epoch": 0.7336497890295358, + "grad_norm": 0.7485703229904175, + "learning_rate": 0.000252520694447784, + "loss": 1.3348, + "step": 6955 + }, + { + "epoch": 0.7337552742616034, + "grad_norm": 0.6850400567054749, + "learning_rate": 0.0002523328422344158, + "loss": 1.3153, + "step": 6956 + }, + { + "epoch": 0.7338607594936709, + "grad_norm": 0.6686691641807556, + "learning_rate": 0.0002521450457871343, + "loss": 1.3887, + "step": 6957 + }, + { + "epoch": 0.7339662447257383, + "grad_norm": 0.724795401096344, + "learning_rate": 0.0002519573051269828, + "loss": 1.3059, + "step": 6958 + }, + { + "epoch": 0.7340717299578059, + "grad_norm": 0.7548190951347351, + "learning_rate": 0.0002517696202749988, + "loss": 1.299, + "step": 6959 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.6744855046272278, + "learning_rate": 0.00025158199125221325, + "loss": 1.3377, + "step": 6960 + }, + { + "epoch": 0.7342827004219409, + "grad_norm": 0.6788802146911621, + "learning_rate": 0.0002513944180796509, + "loss": 1.37, + "step": 6961 + }, + { + "epoch": 0.7343881856540084, + "grad_norm": 0.6781960129737854, + "learning_rate": 0.0002512069007783301, + "loss": 1.3092, + "step": 6962 + }, + { + "epoch": 0.734493670886076, + "grad_norm": 0.7114258408546448, + "learning_rate": 0.00025101943936926347, + "loss": 1.282, + "step": 6963 + }, + { + "epoch": 0.7345991561181434, + "grad_norm": 0.7287135720252991, + "learning_rate": 0.0002508320338734568, + "loss": 1.3464, + "step": 6964 + }, + { + "epoch": 0.734704641350211, + "grad_norm": 0.6743736863136292, + "learning_rate": 0.00025064468431190977, + "loss": 1.3091, + "step": 6965 + }, + { + "epoch": 0.7348101265822785, + "grad_norm": 0.6619522571563721, + "learning_rate": 0.0002504573907056159, + "loss": 1.3323, + "step": 6966 + }, + { + "epoch": 0.734915611814346, + "grad_norm": 0.6910430192947388, + "learning_rate": 0.00025027015307556234, + "loss": 1.3016, + "step": 6967 + }, + { + "epoch": 0.7350210970464135, + "grad_norm": 0.6777719259262085, + "learning_rate": 0.00025008297144273, + "loss": 1.3282, + "step": 6968 + }, + { + "epoch": 0.735126582278481, + "grad_norm": 0.6515904664993286, + "learning_rate": 0.0002498958458280936, + "loss": 1.3171, + "step": 6969 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.6774920225143433, + "learning_rate": 0.0002497087762526211, + "loss": 1.3284, + "step": 6970 + }, + { + "epoch": 0.735337552742616, + "grad_norm": 0.7388257384300232, + "learning_rate": 0.0002495217627372752, + "loss": 1.3141, + "step": 6971 + }, + { + "epoch": 0.7354430379746836, + "grad_norm": 0.6678743362426758, + "learning_rate": 0.0002493348053030113, + "loss": 1.3264, + "step": 6972 + }, + { + "epoch": 0.735548523206751, + "grad_norm": 0.6553627252578735, + "learning_rate": 0.0002491479039707791, + "loss": 1.3145, + "step": 6973 + }, + { + "epoch": 0.7356540084388186, + "grad_norm": 0.6900156140327454, + "learning_rate": 0.00024896105876152165, + "loss": 1.3184, + "step": 6974 + }, + { + "epoch": 0.7357594936708861, + "grad_norm": 0.7825663089752197, + "learning_rate": 0.0002487742696961761, + "loss": 1.366, + "step": 6975 + }, + { + "epoch": 0.7358649789029535, + "grad_norm": 0.6498423218727112, + "learning_rate": 0.0002485875367956729, + "loss": 1.3049, + "step": 6976 + }, + { + "epoch": 0.7359704641350211, + "grad_norm": 0.6598316431045532, + "learning_rate": 0.00024840086008093645, + "loss": 1.3585, + "step": 6977 + }, + { + "epoch": 0.7360759493670886, + "grad_norm": 0.6690289974212646, + "learning_rate": 0.0002482142395728848, + "loss": 1.3368, + "step": 6978 + }, + { + "epoch": 0.7361814345991561, + "grad_norm": 0.7843031883239746, + "learning_rate": 0.0002480276752924295, + "loss": 1.336, + "step": 6979 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.7073907256126404, + "learning_rate": 0.0002478411672604766, + "loss": 1.3161, + "step": 6980 + }, + { + "epoch": 0.7363924050632912, + "grad_norm": 0.6911773681640625, + "learning_rate": 0.0002476547154979248, + "loss": 1.3442, + "step": 6981 + }, + { + "epoch": 0.7364978902953586, + "grad_norm": 0.894743800163269, + "learning_rate": 0.00024746832002566703, + "loss": 1.3217, + "step": 6982 + }, + { + "epoch": 0.7366033755274262, + "grad_norm": 0.8307846784591675, + "learning_rate": 0.0002472819808645899, + "loss": 1.3025, + "step": 6983 + }, + { + "epoch": 0.7367088607594937, + "grad_norm": 0.7197129130363464, + "learning_rate": 0.0002470956980355735, + "loss": 1.3423, + "step": 6984 + }, + { + "epoch": 0.7368143459915611, + "grad_norm": 1.0179744958877563, + "learning_rate": 0.00024690947155949194, + "loss": 1.3473, + "step": 6985 + }, + { + "epoch": 0.7369198312236287, + "grad_norm": 0.6916355490684509, + "learning_rate": 0.0002467233014572127, + "loss": 1.2982, + "step": 6986 + }, + { + "epoch": 0.7370253164556962, + "grad_norm": 0.757317304611206, + "learning_rate": 0.00024653718774959713, + "loss": 1.3032, + "step": 6987 + }, + { + "epoch": 0.7371308016877637, + "grad_norm": 0.7843238711357117, + "learning_rate": 0.00024635113045749985, + "loss": 1.3503, + "step": 6988 + }, + { + "epoch": 0.7372362869198312, + "grad_norm": 0.8615266680717468, + "learning_rate": 0.00024616512960177014, + "loss": 1.3748, + "step": 6989 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.6747849583625793, + "learning_rate": 0.00024597918520324994, + "loss": 1.3054, + "step": 6990 + }, + { + "epoch": 0.7374472573839662, + "grad_norm": 0.7834898829460144, + "learning_rate": 0.00024579329728277534, + "loss": 1.3573, + "step": 6991 + }, + { + "epoch": 0.7375527426160338, + "grad_norm": 0.9038985967636108, + "learning_rate": 0.00024560746586117603, + "loss": 1.3679, + "step": 6992 + }, + { + "epoch": 0.7376582278481013, + "grad_norm": 0.6883530020713806, + "learning_rate": 0.00024542169095927526, + "loss": 1.3154, + "step": 6993 + }, + { + "epoch": 0.7377637130801687, + "grad_norm": 0.8405253291130066, + "learning_rate": 0.00024523597259789004, + "loss": 1.3264, + "step": 6994 + }, + { + "epoch": 0.7378691983122363, + "grad_norm": 0.7407637238502502, + "learning_rate": 0.0002450503107978311, + "loss": 1.3431, + "step": 6995 + }, + { + "epoch": 0.7379746835443038, + "grad_norm": 0.6728624105453491, + "learning_rate": 0.00024486470557990247, + "loss": 1.3385, + "step": 6996 + }, + { + "epoch": 0.7380801687763713, + "grad_norm": 0.7006783485412598, + "learning_rate": 0.0002446791569649027, + "loss": 1.3343, + "step": 6997 + }, + { + "epoch": 0.7381856540084388, + "grad_norm": 0.8338941335678101, + "learning_rate": 0.0002444936649736232, + "loss": 1.3139, + "step": 6998 + }, + { + "epoch": 0.7382911392405064, + "grad_norm": 0.8841831684112549, + "learning_rate": 0.00024430822962684905, + "loss": 1.3783, + "step": 6999 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.7423043251037598, + "learning_rate": 0.00024412285094535952, + "loss": 1.3241, + "step": 7000 + }, + { + "epoch": 0.7385021097046414, + "grad_norm": 1.0566983222961426, + "learning_rate": 0.00024393752894992708, + "loss": 1.3123, + "step": 7001 + }, + { + "epoch": 0.7386075949367089, + "grad_norm": 0.7025294899940491, + "learning_rate": 0.00024375226366131787, + "loss": 1.3271, + "step": 7002 + }, + { + "epoch": 0.7387130801687763, + "grad_norm": 0.8033367991447449, + "learning_rate": 0.00024356705510029196, + "loss": 1.3286, + "step": 7003 + }, + { + "epoch": 0.7388185654008439, + "grad_norm": 0.9270261526107788, + "learning_rate": 0.00024338190328760282, + "loss": 1.3008, + "step": 7004 + }, + { + "epoch": 0.7389240506329114, + "grad_norm": 0.6602017283439636, + "learning_rate": 0.00024319680824399736, + "loss": 1.292, + "step": 7005 + }, + { + "epoch": 0.7390295358649789, + "grad_norm": 0.9068267941474915, + "learning_rate": 0.00024301176999021702, + "loss": 1.304, + "step": 7006 + }, + { + "epoch": 0.7391350210970464, + "grad_norm": 0.7453782558441162, + "learning_rate": 0.00024282678854699592, + "loss": 1.3217, + "step": 7007 + }, + { + "epoch": 0.739240506329114, + "grad_norm": 0.6809092164039612, + "learning_rate": 0.00024264186393506206, + "loss": 1.2931, + "step": 7008 + }, + { + "epoch": 0.7393459915611814, + "grad_norm": 0.8692222833633423, + "learning_rate": 0.00024245699617513733, + "loss": 1.3258, + "step": 7009 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.7441030740737915, + "learning_rate": 0.00024227218528793696, + "loss": 1.3191, + "step": 7010 + }, + { + "epoch": 0.7395569620253165, + "grad_norm": 0.7553856372833252, + "learning_rate": 0.00024208743129417004, + "loss": 1.339, + "step": 7011 + }, + { + "epoch": 0.739662447257384, + "grad_norm": 0.8869059085845947, + "learning_rate": 0.00024190273421453913, + "loss": 1.3056, + "step": 7012 + }, + { + "epoch": 0.7397679324894515, + "grad_norm": 0.6620796322822571, + "learning_rate": 0.00024171809406974047, + "loss": 1.322, + "step": 7013 + }, + { + "epoch": 0.7398734177215189, + "grad_norm": 0.7578260898590088, + "learning_rate": 0.0002415335108804636, + "loss": 1.3078, + "step": 7014 + }, + { + "epoch": 0.7399789029535865, + "grad_norm": 0.7419329285621643, + "learning_rate": 0.0002413489846673925, + "loss": 1.3385, + "step": 7015 + }, + { + "epoch": 0.740084388185654, + "grad_norm": 0.7216989994049072, + "learning_rate": 0.0002411645154512041, + "loss": 1.3483, + "step": 7016 + }, + { + "epoch": 0.7401898734177215, + "grad_norm": 0.6737266778945923, + "learning_rate": 0.00024098010325256897, + "loss": 1.3217, + "step": 7017 + }, + { + "epoch": 0.740295358649789, + "grad_norm": 0.8574821352958679, + "learning_rate": 0.00024079574809215149, + "loss": 1.3426, + "step": 7018 + }, + { + "epoch": 0.7404008438818566, + "grad_norm": 1.0833011865615845, + "learning_rate": 0.00024061144999060956, + "loss": 1.2832, + "step": 7019 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.799399197101593, + "learning_rate": 0.00024042720896859471, + "loss": 1.3294, + "step": 7020 + }, + { + "epoch": 0.7406118143459915, + "grad_norm": 1.0755566358566284, + "learning_rate": 0.00024024302504675206, + "loss": 1.338, + "step": 7021 + }, + { + "epoch": 0.7407172995780591, + "grad_norm": 0.8282331824302673, + "learning_rate": 0.00024005889824572004, + "loss": 1.3297, + "step": 7022 + }, + { + "epoch": 0.7408227848101265, + "grad_norm": 0.6628104448318481, + "learning_rate": 0.00023987482858613154, + "loss": 1.2897, + "step": 7023 + }, + { + "epoch": 0.7409282700421941, + "grad_norm": 1.093538761138916, + "learning_rate": 0.0002396908160886123, + "loss": 1.3106, + "step": 7024 + }, + { + "epoch": 0.7410337552742616, + "grad_norm": 0.8291923403739929, + "learning_rate": 0.0002395068607737816, + "loss": 1.3125, + "step": 7025 + }, + { + "epoch": 0.7411392405063291, + "grad_norm": 0.6756449341773987, + "learning_rate": 0.0002393229626622528, + "loss": 1.3354, + "step": 7026 + }, + { + "epoch": 0.7412447257383966, + "grad_norm": 0.9910904169082642, + "learning_rate": 0.00023913912177463248, + "loss": 1.3706, + "step": 7027 + }, + { + "epoch": 0.7413502109704642, + "grad_norm": 0.7754594087600708, + "learning_rate": 0.0002389553381315209, + "loss": 1.3604, + "step": 7028 + }, + { + "epoch": 0.7414556962025316, + "grad_norm": 0.664727509021759, + "learning_rate": 0.00023877161175351206, + "loss": 1.3591, + "step": 7029 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.6787317991256714, + "learning_rate": 0.00023858794266119323, + "loss": 1.3103, + "step": 7030 + }, + { + "epoch": 0.7416666666666667, + "grad_norm": 0.8438594341278076, + "learning_rate": 0.0002384043308751454, + "loss": 1.3205, + "step": 7031 + }, + { + "epoch": 0.7417721518987341, + "grad_norm": 0.6983551383018494, + "learning_rate": 0.0002382207764159436, + "loss": 1.364, + "step": 7032 + }, + { + "epoch": 0.7418776371308017, + "grad_norm": 0.7057783007621765, + "learning_rate": 0.00023803727930415568, + "loss": 1.356, + "step": 7033 + }, + { + "epoch": 0.7419831223628692, + "grad_norm": 0.9420859813690186, + "learning_rate": 0.00023785383956034353, + "loss": 1.3175, + "step": 7034 + }, + { + "epoch": 0.7420886075949367, + "grad_norm": 0.8078567981719971, + "learning_rate": 0.00023767045720506243, + "loss": 1.3477, + "step": 7035 + }, + { + "epoch": 0.7421940928270042, + "grad_norm": 0.8679049015045166, + "learning_rate": 0.00023748713225886137, + "loss": 1.3194, + "step": 7036 + }, + { + "epoch": 0.7422995780590718, + "grad_norm": 1.0633506774902344, + "learning_rate": 0.0002373038647422827, + "loss": 1.3286, + "step": 7037 + }, + { + "epoch": 0.7424050632911392, + "grad_norm": 0.7202442288398743, + "learning_rate": 0.00023712065467586252, + "loss": 1.3346, + "step": 7038 + }, + { + "epoch": 0.7425105485232067, + "grad_norm": 0.7838741540908813, + "learning_rate": 0.00023693750208013045, + "loss": 1.3649, + "step": 7039 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 1.0808053016662598, + "learning_rate": 0.00023675440697560943, + "loss": 1.3064, + "step": 7040 + }, + { + "epoch": 0.7427215189873417, + "grad_norm": 0.6875027418136597, + "learning_rate": 0.00023657136938281653, + "loss": 1.2899, + "step": 7041 + }, + { + "epoch": 0.7428270042194093, + "grad_norm": 0.7470600605010986, + "learning_rate": 0.00023638838932226196, + "loss": 1.382, + "step": 7042 + }, + { + "epoch": 0.7429324894514768, + "grad_norm": 0.7910398840904236, + "learning_rate": 0.00023620546681444942, + "loss": 1.3444, + "step": 7043 + }, + { + "epoch": 0.7430379746835443, + "grad_norm": 0.7568538188934326, + "learning_rate": 0.00023602260187987635, + "loss": 1.3059, + "step": 7044 + }, + { + "epoch": 0.7431434599156118, + "grad_norm": 0.6822150945663452, + "learning_rate": 0.0002358397945390336, + "loss": 1.3323, + "step": 7045 + }, + { + "epoch": 0.7432489451476794, + "grad_norm": 0.965222954750061, + "learning_rate": 0.0002356570448124058, + "loss": 1.3281, + "step": 7046 + }, + { + "epoch": 0.7433544303797468, + "grad_norm": 0.7118803858757019, + "learning_rate": 0.00023547435272047083, + "loss": 1.2996, + "step": 7047 + }, + { + "epoch": 0.7434599156118143, + "grad_norm": 0.6674203276634216, + "learning_rate": 0.00023529171828370033, + "loss": 1.3203, + "step": 7048 + }, + { + "epoch": 0.7435654008438819, + "grad_norm": 0.8611128330230713, + "learning_rate": 0.0002351091415225591, + "loss": 1.3233, + "step": 7049 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.7589429020881653, + "learning_rate": 0.0002349266224575063, + "loss": 1.2971, + "step": 7050 + }, + { + "epoch": 0.7437763713080169, + "grad_norm": 0.696897566318512, + "learning_rate": 0.00023474416110899377, + "loss": 1.3326, + "step": 7051 + }, + { + "epoch": 0.7438818565400844, + "grad_norm": 0.7454007267951965, + "learning_rate": 0.00023456175749746736, + "loss": 1.2935, + "step": 7052 + }, + { + "epoch": 0.7439873417721519, + "grad_norm": 0.6826305389404297, + "learning_rate": 0.0002343794116433662, + "loss": 1.2884, + "step": 7053 + }, + { + "epoch": 0.7440928270042194, + "grad_norm": 0.7005605101585388, + "learning_rate": 0.00023419712356712307, + "loss": 1.3182, + "step": 7054 + }, + { + "epoch": 0.744198312236287, + "grad_norm": 0.6899558901786804, + "learning_rate": 0.00023401489328916432, + "loss": 1.358, + "step": 7055 + }, + { + "epoch": 0.7443037974683544, + "grad_norm": 0.7683312892913818, + "learning_rate": 0.00023383272082990963, + "loss": 1.3698, + "step": 7056 + }, + { + "epoch": 0.744409282700422, + "grad_norm": 0.7297269105911255, + "learning_rate": 0.00023365060620977223, + "loss": 1.3121, + "step": 7057 + }, + { + "epoch": 0.7445147679324895, + "grad_norm": 0.7066762447357178, + "learning_rate": 0.00023346854944915937, + "loss": 1.35, + "step": 7058 + }, + { + "epoch": 0.7446202531645569, + "grad_norm": 0.7876212000846863, + "learning_rate": 0.00023328655056847124, + "loss": 1.3308, + "step": 7059 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.7128951549530029, + "learning_rate": 0.0002331046095881017, + "loss": 1.336, + "step": 7060 + }, + { + "epoch": 0.744831223628692, + "grad_norm": 0.6939775943756104, + "learning_rate": 0.00023292272652843807, + "loss": 1.3845, + "step": 7061 + }, + { + "epoch": 0.7449367088607595, + "grad_norm": 0.8807617425918579, + "learning_rate": 0.00023274090140986138, + "loss": 1.306, + "step": 7062 + }, + { + "epoch": 0.745042194092827, + "grad_norm": 0.7204841375350952, + "learning_rate": 0.00023255913425274588, + "loss": 1.3082, + "step": 7063 + }, + { + "epoch": 0.7451476793248946, + "grad_norm": 0.6792747378349304, + "learning_rate": 0.00023237742507745964, + "loss": 1.3328, + "step": 7064 + }, + { + "epoch": 0.745253164556962, + "grad_norm": 0.809299647808075, + "learning_rate": 0.00023219577390436397, + "loss": 1.3183, + "step": 7065 + }, + { + "epoch": 0.7453586497890295, + "grad_norm": 0.8431228399276733, + "learning_rate": 0.00023201418075381364, + "loss": 1.3201, + "step": 7066 + }, + { + "epoch": 0.7454641350210971, + "grad_norm": 0.7249903082847595, + "learning_rate": 0.00023183264564615756, + "loss": 1.3328, + "step": 7067 + }, + { + "epoch": 0.7455696202531645, + "grad_norm": 0.7188300490379333, + "learning_rate": 0.00023165116860173726, + "loss": 1.3539, + "step": 7068 + }, + { + "epoch": 0.7456751054852321, + "grad_norm": 0.652993381023407, + "learning_rate": 0.00023146974964088825, + "loss": 1.3414, + "step": 7069 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.7076045274734497, + "learning_rate": 0.00023128838878393946, + "loss": 1.3334, + "step": 7070 + }, + { + "epoch": 0.7458860759493671, + "grad_norm": 0.7367606163024902, + "learning_rate": 0.00023110708605121317, + "loss": 1.3532, + "step": 7071 + }, + { + "epoch": 0.7459915611814346, + "grad_norm": 0.7192530632019043, + "learning_rate": 0.00023092584146302539, + "loss": 1.3473, + "step": 7072 + }, + { + "epoch": 0.7460970464135022, + "grad_norm": 0.7109804153442383, + "learning_rate": 0.0002307446550396854, + "loss": 1.3246, + "step": 7073 + }, + { + "epoch": 0.7462025316455696, + "grad_norm": 0.6603081822395325, + "learning_rate": 0.0002305635268014961, + "loss": 1.3232, + "step": 7074 + }, + { + "epoch": 0.7463080168776371, + "grad_norm": 0.7585765719413757, + "learning_rate": 0.0002303824567687534, + "loss": 1.3134, + "step": 7075 + }, + { + "epoch": 0.7464135021097047, + "grad_norm": 0.6834245920181274, + "learning_rate": 0.00023020144496174781, + "loss": 1.3093, + "step": 7076 + }, + { + "epoch": 0.7465189873417721, + "grad_norm": 0.658704936504364, + "learning_rate": 0.0002300204914007622, + "loss": 1.3055, + "step": 7077 + }, + { + "epoch": 0.7466244725738397, + "grad_norm": 0.8314408659934998, + "learning_rate": 0.00022983959610607338, + "loss": 1.3387, + "step": 7078 + }, + { + "epoch": 0.7467299578059071, + "grad_norm": 0.6981645822525024, + "learning_rate": 0.00022965875909795164, + "loss": 1.3457, + "step": 7079 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.6822185516357422, + "learning_rate": 0.00022947798039666051, + "loss": 1.3534, + "step": 7080 + }, + { + "epoch": 0.7469409282700422, + "grad_norm": 0.6976431608200073, + "learning_rate": 0.00022929726002245728, + "loss": 1.3067, + "step": 7081 + }, + { + "epoch": 0.7470464135021097, + "grad_norm": 0.7066746354103088, + "learning_rate": 0.00022911659799559254, + "loss": 1.3567, + "step": 7082 + }, + { + "epoch": 0.7471518987341772, + "grad_norm": 0.677930474281311, + "learning_rate": 0.00022893599433631014, + "loss": 1.3352, + "step": 7083 + }, + { + "epoch": 0.7472573839662447, + "grad_norm": 0.6618035435676575, + "learning_rate": 0.00022875544906484797, + "loss": 1.3075, + "step": 7084 + }, + { + "epoch": 0.7473628691983122, + "grad_norm": 0.6744056940078735, + "learning_rate": 0.00022857496220143696, + "loss": 1.3328, + "step": 7085 + }, + { + "epoch": 0.7474683544303797, + "grad_norm": 0.6927992105484009, + "learning_rate": 0.00022839453376630149, + "loss": 1.3168, + "step": 7086 + }, + { + "epoch": 0.7475738396624473, + "grad_norm": 0.7258657217025757, + "learning_rate": 0.00022821416377965948, + "loss": 1.3881, + "step": 7087 + }, + { + "epoch": 0.7476793248945147, + "grad_norm": 0.714501678943634, + "learning_rate": 0.00022803385226172226, + "loss": 1.3249, + "step": 7088 + }, + { + "epoch": 0.7477848101265823, + "grad_norm": 0.7105880975723267, + "learning_rate": 0.0002278535992326947, + "loss": 1.3577, + "step": 7089 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.8032772541046143, + "learning_rate": 0.00022767340471277492, + "loss": 1.2715, + "step": 7090 + }, + { + "epoch": 0.7479957805907173, + "grad_norm": 0.6882555484771729, + "learning_rate": 0.00022749326872215472, + "loss": 1.3001, + "step": 7091 + }, + { + "epoch": 0.7481012658227848, + "grad_norm": 0.740087628364563, + "learning_rate": 0.00022731319128101906, + "loss": 1.3092, + "step": 7092 + }, + { + "epoch": 0.7482067510548523, + "grad_norm": 0.7928578853607178, + "learning_rate": 0.0002271331724095468, + "loss": 1.3378, + "step": 7093 + }, + { + "epoch": 0.7483122362869198, + "grad_norm": 0.7092121243476868, + "learning_rate": 0.0002269532121279099, + "loss": 1.3399, + "step": 7094 + }, + { + "epoch": 0.7484177215189873, + "grad_norm": 0.7313840985298157, + "learning_rate": 0.00022677331045627366, + "loss": 1.3437, + "step": 7095 + }, + { + "epoch": 0.7485232067510549, + "grad_norm": 0.8056222796440125, + "learning_rate": 0.00022659346741479708, + "loss": 1.3466, + "step": 7096 + }, + { + "epoch": 0.7486286919831223, + "grad_norm": 0.6644865870475769, + "learning_rate": 0.00022641368302363235, + "loss": 1.325, + "step": 7097 + }, + { + "epoch": 0.7487341772151899, + "grad_norm": 0.705980658531189, + "learning_rate": 0.00022623395730292538, + "loss": 1.3529, + "step": 7098 + }, + { + "epoch": 0.7488396624472574, + "grad_norm": 0.6943669319152832, + "learning_rate": 0.0002260542902728151, + "loss": 1.3185, + "step": 7099 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.6829901933670044, + "learning_rate": 0.00022587468195343436, + "loss": 1.2923, + "step": 7100 + }, + { + "epoch": 0.7490506329113924, + "grad_norm": 0.6922453045845032, + "learning_rate": 0.0002256951323649087, + "loss": 1.3577, + "step": 7101 + }, + { + "epoch": 0.74915611814346, + "grad_norm": 0.6814213991165161, + "learning_rate": 0.00022551564152735814, + "loss": 1.3658, + "step": 7102 + }, + { + "epoch": 0.7492616033755274, + "grad_norm": 0.7084421515464783, + "learning_rate": 0.00022533620946089524, + "loss": 1.3307, + "step": 7103 + }, + { + "epoch": 0.7493670886075949, + "grad_norm": 0.7096202373504639, + "learning_rate": 0.00022515683618562626, + "loss": 1.2902, + "step": 7104 + }, + { + "epoch": 0.7494725738396625, + "grad_norm": 0.6780527234077454, + "learning_rate": 0.00022497752172165095, + "loss": 1.343, + "step": 7105 + }, + { + "epoch": 0.7495780590717299, + "grad_norm": 0.6789690852165222, + "learning_rate": 0.0002247982660890623, + "loss": 1.3486, + "step": 7106 + }, + { + "epoch": 0.7496835443037975, + "grad_norm": 0.705478847026825, + "learning_rate": 0.00022461906930794687, + "loss": 1.3374, + "step": 7107 + }, + { + "epoch": 0.749789029535865, + "grad_norm": 0.7325201630592346, + "learning_rate": 0.00022443993139838447, + "loss": 1.318, + "step": 7108 + }, + { + "epoch": 0.7498945147679325, + "grad_norm": 0.7482960224151611, + "learning_rate": 0.00022426085238044823, + "loss": 1.3145, + "step": 7109 + }, + { + "epoch": 0.75, + "grad_norm": 0.7301803231239319, + "learning_rate": 0.00022408183227420528, + "loss": 1.3477, + "step": 7110 + }, + { + "epoch": 0.7501054852320675, + "grad_norm": 0.7397965788841248, + "learning_rate": 0.00022390287109971547, + "loss": 1.3773, + "step": 7111 + }, + { + "epoch": 0.750210970464135, + "grad_norm": 0.7184997797012329, + "learning_rate": 0.00022372396887703234, + "loss": 1.3285, + "step": 7112 + }, + { + "epoch": 0.7503164556962025, + "grad_norm": 0.8052500486373901, + "learning_rate": 0.00022354512562620268, + "loss": 1.391, + "step": 7113 + }, + { + "epoch": 0.7504219409282701, + "grad_norm": 0.8186091780662537, + "learning_rate": 0.0002233663413672669, + "loss": 1.3139, + "step": 7114 + }, + { + "epoch": 0.7505274261603375, + "grad_norm": 0.7584077715873718, + "learning_rate": 0.00022318761612025856, + "loss": 1.3222, + "step": 7115 + }, + { + "epoch": 0.7506329113924051, + "grad_norm": 0.7889503240585327, + "learning_rate": 0.00022300894990520478, + "loss": 1.299, + "step": 7116 + }, + { + "epoch": 0.7507383966244726, + "grad_norm": 0.9680467844009399, + "learning_rate": 0.000222830342742126, + "loss": 1.3336, + "step": 7117 + }, + { + "epoch": 0.75084388185654, + "grad_norm": 0.70809406042099, + "learning_rate": 0.00022265179465103574, + "loss": 1.2932, + "step": 7118 + }, + { + "epoch": 0.7509493670886076, + "grad_norm": 0.8365845680236816, + "learning_rate": 0.00022247330565194171, + "loss": 1.2818, + "step": 7119 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.8568906188011169, + "learning_rate": 0.0002222948757648443, + "loss": 1.365, + "step": 7120 + }, + { + "epoch": 0.7511603375527426, + "grad_norm": 0.7198000550270081, + "learning_rate": 0.00022211650500973746, + "loss": 1.3805, + "step": 7121 + }, + { + "epoch": 0.7512658227848101, + "grad_norm": 0.8625149130821228, + "learning_rate": 0.0002219381934066084, + "loss": 1.3352, + "step": 7122 + }, + { + "epoch": 0.7513713080168777, + "grad_norm": 0.7745174169540405, + "learning_rate": 0.00022175994097543806, + "loss": 1.3246, + "step": 7123 + }, + { + "epoch": 0.7514767932489451, + "grad_norm": 0.7339627742767334, + "learning_rate": 0.0002215817477362003, + "loss": 1.3434, + "step": 7124 + }, + { + "epoch": 0.7515822784810127, + "grad_norm": 0.8341085314750671, + "learning_rate": 0.00022140361370886265, + "loss": 1.3514, + "step": 7125 + }, + { + "epoch": 0.7516877637130802, + "grad_norm": 0.736623227596283, + "learning_rate": 0.00022122553891338586, + "loss": 1.3567, + "step": 7126 + }, + { + "epoch": 0.7517932489451477, + "grad_norm": 0.7441138625144958, + "learning_rate": 0.00022104752336972396, + "loss": 1.343, + "step": 7127 + }, + { + "epoch": 0.7518987341772152, + "grad_norm": 0.7351277470588684, + "learning_rate": 0.00022086956709782495, + "loss": 1.3325, + "step": 7128 + }, + { + "epoch": 0.7520042194092827, + "grad_norm": 0.7864279747009277, + "learning_rate": 0.0002206916701176293, + "loss": 1.2813, + "step": 7129 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.6997153759002686, + "learning_rate": 0.00022051383244907143, + "loss": 1.333, + "step": 7130 + }, + { + "epoch": 0.7522151898734177, + "grad_norm": 0.7892683744430542, + "learning_rate": 0.0002203360541120789, + "loss": 1.3198, + "step": 7131 + }, + { + "epoch": 0.7523206751054853, + "grad_norm": 0.7501006722450256, + "learning_rate": 0.00022015833512657268, + "loss": 1.3046, + "step": 7132 + }, + { + "epoch": 0.7524261603375527, + "grad_norm": 0.6976683139801025, + "learning_rate": 0.000219980675512467, + "loss": 1.3041, + "step": 7133 + }, + { + "epoch": 0.7525316455696203, + "grad_norm": 0.7369154095649719, + "learning_rate": 0.00021980307528966962, + "loss": 1.2939, + "step": 7134 + }, + { + "epoch": 0.7526371308016878, + "grad_norm": 0.7533292174339294, + "learning_rate": 0.00021962553447808108, + "loss": 1.2927, + "step": 7135 + }, + { + "epoch": 0.7527426160337553, + "grad_norm": 0.7162905931472778, + "learning_rate": 0.00021944805309759643, + "loss": 1.343, + "step": 7136 + }, + { + "epoch": 0.7528481012658228, + "grad_norm": 0.7445090413093567, + "learning_rate": 0.000219270631168103, + "loss": 1.3619, + "step": 7137 + }, + { + "epoch": 0.7529535864978903, + "grad_norm": 0.7035163044929504, + "learning_rate": 0.0002190932687094818, + "loss": 1.3389, + "step": 7138 + }, + { + "epoch": 0.7530590717299578, + "grad_norm": 0.7389053702354431, + "learning_rate": 0.00021891596574160715, + "loss": 1.3256, + "step": 7139 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.6706537008285522, + "learning_rate": 0.0002187387222843467, + "loss": 1.3299, + "step": 7140 + }, + { + "epoch": 0.7532700421940929, + "grad_norm": 0.6375851631164551, + "learning_rate": 0.00021856153835756164, + "loss": 1.3322, + "step": 7141 + }, + { + "epoch": 0.7533755274261603, + "grad_norm": 0.7198580503463745, + "learning_rate": 0.00021838441398110617, + "loss": 1.322, + "step": 7142 + }, + { + "epoch": 0.7534810126582279, + "grad_norm": 0.6574905514717102, + "learning_rate": 0.000218207349174828, + "loss": 1.3029, + "step": 7143 + }, + { + "epoch": 0.7535864978902953, + "grad_norm": 0.7190489768981934, + "learning_rate": 0.0002180303439585678, + "loss": 1.3394, + "step": 7144 + }, + { + "epoch": 0.7536919831223629, + "grad_norm": 0.6996555924415588, + "learning_rate": 0.0002178533983521605, + "loss": 1.3069, + "step": 7145 + }, + { + "epoch": 0.7537974683544304, + "grad_norm": 0.6915375590324402, + "learning_rate": 0.0002176765123754334, + "loss": 1.3382, + "step": 7146 + }, + { + "epoch": 0.7539029535864978, + "grad_norm": 0.6938357353210449, + "learning_rate": 0.00021749968604820754, + "loss": 1.3377, + "step": 7147 + }, + { + "epoch": 0.7540084388185654, + "grad_norm": 0.6675061583518982, + "learning_rate": 0.00021732291939029712, + "loss": 1.2911, + "step": 7148 + }, + { + "epoch": 0.7541139240506329, + "grad_norm": 0.7343496680259705, + "learning_rate": 0.00021714621242150973, + "loss": 1.2845, + "step": 7149 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.7274339199066162, + "learning_rate": 0.0002169695651616463, + "loss": 1.3336, + "step": 7150 + }, + { + "epoch": 0.7543248945147679, + "grad_norm": 0.7097326517105103, + "learning_rate": 0.00021679297763050104, + "loss": 1.3192, + "step": 7151 + }, + { + "epoch": 0.7544303797468355, + "grad_norm": 0.6498091220855713, + "learning_rate": 0.00021661644984786142, + "loss": 1.3197, + "step": 7152 + }, + { + "epoch": 0.7545358649789029, + "grad_norm": 0.842009961605072, + "learning_rate": 0.00021643998183350802, + "loss": 1.3072, + "step": 7153 + }, + { + "epoch": 0.7546413502109705, + "grad_norm": 0.6540582180023193, + "learning_rate": 0.00021626357360721556, + "loss": 1.297, + "step": 7154 + }, + { + "epoch": 0.754746835443038, + "grad_norm": 0.8908199071884155, + "learning_rate": 0.0002160872251887511, + "loss": 1.3698, + "step": 7155 + }, + { + "epoch": 0.7548523206751054, + "grad_norm": 0.7614254355430603, + "learning_rate": 0.00021591093659787528, + "loss": 1.3602, + "step": 7156 + }, + { + "epoch": 0.754957805907173, + "grad_norm": 0.6550901532173157, + "learning_rate": 0.00021573470785434237, + "loss": 1.279, + "step": 7157 + }, + { + "epoch": 0.7550632911392405, + "grad_norm": 0.8552526235580444, + "learning_rate": 0.00021555853897789942, + "loss": 1.3271, + "step": 7158 + }, + { + "epoch": 0.755168776371308, + "grad_norm": 0.8760701417922974, + "learning_rate": 0.0002153824299882872, + "loss": 1.3306, + "step": 7159 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.6798653602600098, + "learning_rate": 0.00021520638090523955, + "loss": 1.3676, + "step": 7160 + }, + { + "epoch": 0.7553797468354431, + "grad_norm": 0.8907802700996399, + "learning_rate": 0.0002150303917484834, + "loss": 1.3395, + "step": 7161 + }, + { + "epoch": 0.7554852320675105, + "grad_norm": 0.8424414396286011, + "learning_rate": 0.00021485446253773966, + "loss": 1.3307, + "step": 7162 + }, + { + "epoch": 0.755590717299578, + "grad_norm": 0.6496010422706604, + "learning_rate": 0.00021467859329272188, + "loss": 1.3185, + "step": 7163 + }, + { + "epoch": 0.7556962025316456, + "grad_norm": 0.9280717372894287, + "learning_rate": 0.00021450278403313707, + "loss": 1.3421, + "step": 7164 + }, + { + "epoch": 0.755801687763713, + "grad_norm": 0.8859043717384338, + "learning_rate": 0.0002143270347786856, + "loss": 1.3424, + "step": 7165 + }, + { + "epoch": 0.7559071729957806, + "grad_norm": 0.653667688369751, + "learning_rate": 0.0002141513455490609, + "loss": 1.2874, + "step": 7166 + }, + { + "epoch": 0.7560126582278481, + "grad_norm": 0.9720617532730103, + "learning_rate": 0.00021397571636394991, + "loss": 1.33, + "step": 7167 + }, + { + "epoch": 0.7561181434599156, + "grad_norm": 0.8870658278465271, + "learning_rate": 0.00021380014724303286, + "loss": 1.3452, + "step": 7168 + }, + { + "epoch": 0.7562236286919831, + "grad_norm": 0.6549645662307739, + "learning_rate": 0.00021362463820598297, + "loss": 1.353, + "step": 7169 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.8932201266288757, + "learning_rate": 0.00021344918927246678, + "loss": 1.3645, + "step": 7170 + }, + { + "epoch": 0.7564345991561181, + "grad_norm": 0.9053828716278076, + "learning_rate": 0.0002132738004621446, + "loss": 1.3199, + "step": 7171 + }, + { + "epoch": 0.7565400843881857, + "grad_norm": 0.6656216979026794, + "learning_rate": 0.0002130984717946695, + "loss": 1.323, + "step": 7172 + }, + { + "epoch": 0.7566455696202532, + "grad_norm": 0.9120240807533264, + "learning_rate": 0.00021292320328968783, + "loss": 1.3332, + "step": 7173 + }, + { + "epoch": 0.7567510548523206, + "grad_norm": 0.7102605104446411, + "learning_rate": 0.0002127479949668393, + "loss": 1.2984, + "step": 7174 + }, + { + "epoch": 0.7568565400843882, + "grad_norm": 0.6489943265914917, + "learning_rate": 0.000212572846845757, + "loss": 1.3365, + "step": 7175 + }, + { + "epoch": 0.7569620253164557, + "grad_norm": 0.8028745651245117, + "learning_rate": 0.000212397758946067, + "loss": 1.3129, + "step": 7176 + }, + { + "epoch": 0.7570675105485232, + "grad_norm": 0.8690047264099121, + "learning_rate": 0.0002122227312873889, + "loss": 1.3227, + "step": 7177 + }, + { + "epoch": 0.7571729957805907, + "grad_norm": 0.6666791439056396, + "learning_rate": 0.00021204776388933534, + "loss": 1.2706, + "step": 7178 + }, + { + "epoch": 0.7572784810126583, + "grad_norm": 0.8091560006141663, + "learning_rate": 0.00021187285677151205, + "loss": 1.2936, + "step": 7179 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.8992524147033691, + "learning_rate": 0.00021169800995351874, + "loss": 1.3223, + "step": 7180 + }, + { + "epoch": 0.7574894514767933, + "grad_norm": 0.7749942541122437, + "learning_rate": 0.00021152322345494763, + "loss": 1.3139, + "step": 7181 + }, + { + "epoch": 0.7575949367088608, + "grad_norm": 0.8663312792778015, + "learning_rate": 0.00021134849729538438, + "loss": 1.3322, + "step": 7182 + }, + { + "epoch": 0.7577004219409282, + "grad_norm": 0.9857014417648315, + "learning_rate": 0.00021117383149440801, + "loss": 1.2879, + "step": 7183 + }, + { + "epoch": 0.7578059071729958, + "grad_norm": 0.6936389803886414, + "learning_rate": 0.00021099922607159064, + "loss": 1.3427, + "step": 7184 + }, + { + "epoch": 0.7579113924050633, + "grad_norm": 0.7647762894630432, + "learning_rate": 0.00021082468104649773, + "loss": 1.3654, + "step": 7185 + }, + { + "epoch": 0.7580168776371308, + "grad_norm": 0.7440724968910217, + "learning_rate": 0.00021065019643868785, + "loss": 1.2855, + "step": 7186 + }, + { + "epoch": 0.7581223628691983, + "grad_norm": 0.6537662148475647, + "learning_rate": 0.00021047577226771292, + "loss": 1.3435, + "step": 7187 + }, + { + "epoch": 0.7582278481012659, + "grad_norm": 0.6640272736549377, + "learning_rate": 0.00021030140855311772, + "loss": 1.2922, + "step": 7188 + }, + { + "epoch": 0.7583333333333333, + "grad_norm": 0.7013300061225891, + "learning_rate": 0.00021012710531444112, + "loss": 1.3197, + "step": 7189 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.6814765334129333, + "learning_rate": 0.00020995286257121453, + "loss": 1.3029, + "step": 7190 + }, + { + "epoch": 0.7585443037974684, + "grad_norm": 0.6927322149276733, + "learning_rate": 0.00020977868034296253, + "loss": 1.3137, + "step": 7191 + }, + { + "epoch": 0.7586497890295358, + "grad_norm": 0.7021428346633911, + "learning_rate": 0.0002096045586492031, + "loss": 1.3138, + "step": 7192 + }, + { + "epoch": 0.7587552742616034, + "grad_norm": 0.6792346835136414, + "learning_rate": 0.00020943049750944768, + "loss": 1.327, + "step": 7193 + }, + { + "epoch": 0.7588607594936709, + "grad_norm": 0.6719871163368225, + "learning_rate": 0.00020925649694320046, + "loss": 1.3017, + "step": 7194 + }, + { + "epoch": 0.7589662447257384, + "grad_norm": 0.677707314491272, + "learning_rate": 0.0002090825569699591, + "loss": 1.3494, + "step": 7195 + }, + { + "epoch": 0.7590717299578059, + "grad_norm": 0.7050117254257202, + "learning_rate": 0.0002089086776092146, + "loss": 1.3636, + "step": 7196 + }, + { + "epoch": 0.7591772151898735, + "grad_norm": 0.6873813271522522, + "learning_rate": 0.0002087348588804505, + "loss": 1.3018, + "step": 7197 + }, + { + "epoch": 0.7592827004219409, + "grad_norm": 0.7388730645179749, + "learning_rate": 0.0002085611008031449, + "loss": 1.3302, + "step": 7198 + }, + { + "epoch": 0.7593881856540085, + "grad_norm": 0.7124366164207458, + "learning_rate": 0.00020838740339676763, + "loss": 1.3242, + "step": 7199 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.6686258912086487, + "learning_rate": 0.00020821376668078264, + "loss": 1.3058, + "step": 7200 + }, + { + "epoch": 0.7595991561181434, + "grad_norm": 0.7014123797416687, + "learning_rate": 0.00020804019067464667, + "loss": 1.2933, + "step": 7201 + }, + { + "epoch": 0.759704641350211, + "grad_norm": 0.7181581258773804, + "learning_rate": 0.00020786667539780977, + "loss": 1.3184, + "step": 7202 + }, + { + "epoch": 0.7598101265822785, + "grad_norm": 0.7486109733581543, + "learning_rate": 0.00020769322086971524, + "loss": 1.3409, + "step": 7203 + }, + { + "epoch": 0.759915611814346, + "grad_norm": 0.6854384541511536, + "learning_rate": 0.00020751982710979944, + "loss": 1.3189, + "step": 7204 + }, + { + "epoch": 0.7600210970464135, + "grad_norm": 0.7154693007469177, + "learning_rate": 0.0002073464941374921, + "loss": 1.353, + "step": 7205 + }, + { + "epoch": 0.7601265822784811, + "grad_norm": 0.8215274810791016, + "learning_rate": 0.000207173221972216, + "loss": 1.3425, + "step": 7206 + }, + { + "epoch": 0.7602320675105485, + "grad_norm": 0.7922046780586243, + "learning_rate": 0.00020700001063338696, + "loss": 1.3097, + "step": 7207 + }, + { + "epoch": 0.760337552742616, + "grad_norm": 0.8183408975601196, + "learning_rate": 0.00020682686014041458, + "loss": 1.3421, + "step": 7208 + }, + { + "epoch": 0.7604430379746835, + "grad_norm": 0.760103702545166, + "learning_rate": 0.00020665377051270095, + "loss": 1.3108, + "step": 7209 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.815078616142273, + "learning_rate": 0.00020648074176964182, + "loss": 1.3237, + "step": 7210 + }, + { + "epoch": 0.7606540084388186, + "grad_norm": 0.710818886756897, + "learning_rate": 0.00020630777393062575, + "loss": 1.3546, + "step": 7211 + }, + { + "epoch": 0.760759493670886, + "grad_norm": 0.8924205899238586, + "learning_rate": 0.00020613486701503473, + "loss": 1.3223, + "step": 7212 + }, + { + "epoch": 0.7608649789029536, + "grad_norm": 0.8510847091674805, + "learning_rate": 0.00020596202104224376, + "loss": 1.344, + "step": 7213 + }, + { + "epoch": 0.7609704641350211, + "grad_norm": 0.7137897610664368, + "learning_rate": 0.0002057892360316212, + "loss": 1.3482, + "step": 7214 + }, + { + "epoch": 0.7610759493670886, + "grad_norm": 0.9587517380714417, + "learning_rate": 0.00020561651200252836, + "loss": 1.344, + "step": 7215 + }, + { + "epoch": 0.7611814345991561, + "grad_norm": 0.6524397134780884, + "learning_rate": 0.00020544384897431997, + "loss": 1.331, + "step": 7216 + }, + { + "epoch": 0.7612869198312237, + "grad_norm": 0.7114567160606384, + "learning_rate": 0.00020527124696634343, + "loss": 1.3561, + "step": 7217 + }, + { + "epoch": 0.7613924050632911, + "grad_norm": 0.7686005234718323, + "learning_rate": 0.00020509870599794022, + "loss": 1.3518, + "step": 7218 + }, + { + "epoch": 0.7614978902953586, + "grad_norm": 0.7476716041564941, + "learning_rate": 0.0002049262260884441, + "loss": 1.3195, + "step": 7219 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.6903195381164551, + "learning_rate": 0.00020475380725718228, + "loss": 1.3271, + "step": 7220 + }, + { + "epoch": 0.7617088607594936, + "grad_norm": 0.7576693296432495, + "learning_rate": 0.00020458144952347523, + "loss": 1.3442, + "step": 7221 + }, + { + "epoch": 0.7618143459915612, + "grad_norm": 0.6707242131233215, + "learning_rate": 0.0002044091529066365, + "loss": 1.3274, + "step": 7222 + }, + { + "epoch": 0.7619198312236287, + "grad_norm": 0.7069321274757385, + "learning_rate": 0.00020423691742597273, + "loss": 1.3117, + "step": 7223 + }, + { + "epoch": 0.7620253164556962, + "grad_norm": 0.6876087188720703, + "learning_rate": 0.0002040647431007837, + "loss": 1.3191, + "step": 7224 + }, + { + "epoch": 0.7621308016877637, + "grad_norm": 0.6940487027168274, + "learning_rate": 0.00020389262995036263, + "loss": 1.3528, + "step": 7225 + }, + { + "epoch": 0.7622362869198313, + "grad_norm": 0.6722114682197571, + "learning_rate": 0.00020372057799399534, + "loss": 1.334, + "step": 7226 + }, + { + "epoch": 0.7623417721518987, + "grad_norm": 0.7169630527496338, + "learning_rate": 0.00020354858725096122, + "loss": 1.3142, + "step": 7227 + }, + { + "epoch": 0.7624472573839662, + "grad_norm": 0.7783400416374207, + "learning_rate": 0.00020337665774053284, + "loss": 1.3293, + "step": 7228 + }, + { + "epoch": 0.7625527426160338, + "grad_norm": 0.701961874961853, + "learning_rate": 0.0002032047894819758, + "loss": 1.381, + "step": 7229 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.7554301023483276, + "learning_rate": 0.00020303298249454857, + "loss": 1.345, + "step": 7230 + }, + { + "epoch": 0.7627637130801688, + "grad_norm": 0.664858877658844, + "learning_rate": 0.00020286123679750314, + "loss": 1.3523, + "step": 7231 + }, + { + "epoch": 0.7628691983122363, + "grad_norm": 0.6662150025367737, + "learning_rate": 0.00020268955241008437, + "loss": 1.3218, + "step": 7232 + }, + { + "epoch": 0.7629746835443038, + "grad_norm": 0.7357528209686279, + "learning_rate": 0.00020251792935153037, + "loss": 1.316, + "step": 7233 + }, + { + "epoch": 0.7630801687763713, + "grad_norm": 0.7207895517349243, + "learning_rate": 0.0002023463676410724, + "loss": 1.3191, + "step": 7234 + }, + { + "epoch": 0.7631856540084389, + "grad_norm": 0.7231186628341675, + "learning_rate": 0.0002021748672979348, + "loss": 1.3482, + "step": 7235 + }, + { + "epoch": 0.7632911392405063, + "grad_norm": 0.6996248960494995, + "learning_rate": 0.00020200342834133497, + "loss": 1.3092, + "step": 7236 + }, + { + "epoch": 0.7633966244725738, + "grad_norm": 0.8122640252113342, + "learning_rate": 0.00020183205079048338, + "loss": 1.3527, + "step": 7237 + }, + { + "epoch": 0.7635021097046414, + "grad_norm": 0.6799023151397705, + "learning_rate": 0.0002016607346645841, + "loss": 1.3589, + "step": 7238 + }, + { + "epoch": 0.7636075949367088, + "grad_norm": 0.6867202520370483, + "learning_rate": 0.00020148947998283381, + "loss": 1.3109, + "step": 7239 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.7276530861854553, + "learning_rate": 0.00020131828676442237, + "loss": 1.3155, + "step": 7240 + }, + { + "epoch": 0.7638185654008439, + "grad_norm": 0.775681734085083, + "learning_rate": 0.00020114715502853292, + "loss": 1.2857, + "step": 7241 + }, + { + "epoch": 0.7639240506329114, + "grad_norm": 0.652664065361023, + "learning_rate": 0.00020097608479434153, + "loss": 1.3704, + "step": 7242 + }, + { + "epoch": 0.7640295358649789, + "grad_norm": 0.6881588697433472, + "learning_rate": 0.00020080507608101757, + "loss": 1.3467, + "step": 7243 + }, + { + "epoch": 0.7641350210970465, + "grad_norm": 0.7459666728973389, + "learning_rate": 0.0002006341289077233, + "loss": 1.3224, + "step": 7244 + }, + { + "epoch": 0.7642405063291139, + "grad_norm": 0.6699653267860413, + "learning_rate": 0.00020046324329361432, + "loss": 1.2988, + "step": 7245 + }, + { + "epoch": 0.7643459915611814, + "grad_norm": 0.6649810075759888, + "learning_rate": 0.00020029241925783908, + "loss": 1.2989, + "step": 7246 + }, + { + "epoch": 0.764451476793249, + "grad_norm": 0.7101086974143982, + "learning_rate": 0.00020012165681953923, + "loss": 1.3309, + "step": 7247 + }, + { + "epoch": 0.7645569620253164, + "grad_norm": 0.696987509727478, + "learning_rate": 0.00019995095599784985, + "loss": 1.3047, + "step": 7248 + }, + { + "epoch": 0.764662447257384, + "grad_norm": 0.8058615922927856, + "learning_rate": 0.00019978031681189864, + "loss": 1.3654, + "step": 7249 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.7396281957626343, + "learning_rate": 0.00019960973928080666, + "loss": 1.3209, + "step": 7250 + }, + { + "epoch": 0.764873417721519, + "grad_norm": 0.680537223815918, + "learning_rate": 0.0001994392234236878, + "loss": 1.3344, + "step": 7251 + }, + { + "epoch": 0.7649789029535865, + "grad_norm": 0.6568896770477295, + "learning_rate": 0.00019926876925964928, + "loss": 1.2994, + "step": 7252 + }, + { + "epoch": 0.765084388185654, + "grad_norm": 0.6899178624153137, + "learning_rate": 0.00019909837680779141, + "loss": 1.33, + "step": 7253 + }, + { + "epoch": 0.7651898734177215, + "grad_norm": 0.7971598505973816, + "learning_rate": 0.00019892804608720747, + "loss": 1.3371, + "step": 7254 + }, + { + "epoch": 0.765295358649789, + "grad_norm": 0.6491876840591431, + "learning_rate": 0.00019875777711698384, + "loss": 1.3369, + "step": 7255 + }, + { + "epoch": 0.7654008438818566, + "grad_norm": 0.7039133310317993, + "learning_rate": 0.00019858756991619978, + "loss": 1.3288, + "step": 7256 + }, + { + "epoch": 0.765506329113924, + "grad_norm": 0.8211657404899597, + "learning_rate": 0.00019841742450392837, + "loss": 1.3434, + "step": 7257 + }, + { + "epoch": 0.7656118143459916, + "grad_norm": 0.6880935430526733, + "learning_rate": 0.0001982473408992349, + "loss": 1.314, + "step": 7258 + }, + { + "epoch": 0.7657172995780591, + "grad_norm": 0.721005380153656, + "learning_rate": 0.00019807731912117828, + "loss": 1.3459, + "step": 7259 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.6518349647521973, + "learning_rate": 0.0001979073591888101, + "loss": 1.2712, + "step": 7260 + }, + { + "epoch": 0.7659282700421941, + "grad_norm": 0.6554935574531555, + "learning_rate": 0.0001977374611211754, + "loss": 1.2851, + "step": 7261 + }, + { + "epoch": 0.7660337552742617, + "grad_norm": 0.6600521206855774, + "learning_rate": 0.00019756762493731192, + "loss": 1.3056, + "step": 7262 + }, + { + "epoch": 0.7661392405063291, + "grad_norm": 0.6587800979614258, + "learning_rate": 0.00019739785065625077, + "loss": 1.3218, + "step": 7263 + }, + { + "epoch": 0.7662447257383966, + "grad_norm": 0.7029625177383423, + "learning_rate": 0.00019722813829701593, + "loss": 1.3592, + "step": 7264 + }, + { + "epoch": 0.7663502109704642, + "grad_norm": 0.7478629350662231, + "learning_rate": 0.0001970584878786244, + "loss": 1.3019, + "step": 7265 + }, + { + "epoch": 0.7664556962025316, + "grad_norm": 0.7051143050193787, + "learning_rate": 0.0001968888994200868, + "loss": 1.2838, + "step": 7266 + }, + { + "epoch": 0.7665611814345992, + "grad_norm": 0.6582384705543518, + "learning_rate": 0.00019671937294040595, + "loss": 1.3149, + "step": 7267 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 0.8329594135284424, + "learning_rate": 0.00019654990845857832, + "loss": 1.3464, + "step": 7268 + }, + { + "epoch": 0.7667721518987342, + "grad_norm": 0.7059938311576843, + "learning_rate": 0.00019638050599359326, + "loss": 1.3345, + "step": 7269 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.732908308506012, + "learning_rate": 0.000196211165564433, + "loss": 1.3639, + "step": 7270 + }, + { + "epoch": 0.7669831223628693, + "grad_norm": 0.696702241897583, + "learning_rate": 0.00019604188719007313, + "loss": 1.3327, + "step": 7271 + }, + { + "epoch": 0.7670886075949367, + "grad_norm": 0.7032861113548279, + "learning_rate": 0.00019587267088948214, + "loss": 1.3274, + "step": 7272 + }, + { + "epoch": 0.7671940928270042, + "grad_norm": 0.7002531290054321, + "learning_rate": 0.00019570351668162143, + "loss": 1.3307, + "step": 7273 + }, + { + "epoch": 0.7672995780590718, + "grad_norm": 0.7574492692947388, + "learning_rate": 0.00019553442458544542, + "loss": 1.285, + "step": 7274 + }, + { + "epoch": 0.7674050632911392, + "grad_norm": 0.7084521651268005, + "learning_rate": 0.00019536539461990224, + "loss": 1.3202, + "step": 7275 + }, + { + "epoch": 0.7675105485232068, + "grad_norm": 0.7185905575752258, + "learning_rate": 0.0001951964268039322, + "loss": 1.3629, + "step": 7276 + }, + { + "epoch": 0.7676160337552742, + "grad_norm": 0.7648557424545288, + "learning_rate": 0.00019502752115646901, + "loss": 1.3048, + "step": 7277 + }, + { + "epoch": 0.7677215189873418, + "grad_norm": 0.7194989323616028, + "learning_rate": 0.00019485867769643945, + "loss": 1.3491, + "step": 7278 + }, + { + "epoch": 0.7678270042194093, + "grad_norm": 0.706494927406311, + "learning_rate": 0.0001946898964427633, + "loss": 1.3231, + "step": 7279 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.7458300590515137, + "learning_rate": 0.00019452117741435314, + "loss": 1.3026, + "step": 7280 + }, + { + "epoch": 0.7680379746835443, + "grad_norm": 0.7076130509376526, + "learning_rate": 0.00019435252063011504, + "loss": 1.3668, + "step": 7281 + }, + { + "epoch": 0.7681434599156118, + "grad_norm": 0.6538662314414978, + "learning_rate": 0.00019418392610894768, + "loss": 1.3741, + "step": 7282 + }, + { + "epoch": 0.7682489451476793, + "grad_norm": 0.703188419342041, + "learning_rate": 0.0001940153938697427, + "loss": 1.3203, + "step": 7283 + }, + { + "epoch": 0.7683544303797468, + "grad_norm": 0.7760046124458313, + "learning_rate": 0.0001938469239313855, + "loss": 1.3128, + "step": 7284 + }, + { + "epoch": 0.7684599156118144, + "grad_norm": 0.9191363453865051, + "learning_rate": 0.00019367851631275362, + "loss": 1.3498, + "step": 7285 + }, + { + "epoch": 0.7685654008438818, + "grad_norm": 0.6997398734092712, + "learning_rate": 0.00019351017103271805, + "loss": 1.3155, + "step": 7286 + }, + { + "epoch": 0.7686708860759494, + "grad_norm": 0.9524333477020264, + "learning_rate": 0.00019334188811014278, + "loss": 1.3141, + "step": 7287 + }, + { + "epoch": 0.7687763713080169, + "grad_norm": 0.7794013619422913, + "learning_rate": 0.00019317366756388477, + "loss": 1.32, + "step": 7288 + }, + { + "epoch": 0.7688818565400843, + "grad_norm": 0.8320857286453247, + "learning_rate": 0.0001930055094127938, + "loss": 1.3521, + "step": 7289 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.6735365986824036, + "learning_rate": 0.00019283741367571294, + "loss": 1.3233, + "step": 7290 + }, + { + "epoch": 0.7690928270042194, + "grad_norm": 0.9142791032791138, + "learning_rate": 0.0001926693803714779, + "loss": 1.3966, + "step": 7291 + }, + { + "epoch": 0.7691983122362869, + "grad_norm": 0.6585968732833862, + "learning_rate": 0.00019250140951891813, + "loss": 1.3418, + "step": 7292 + }, + { + "epoch": 0.7693037974683544, + "grad_norm": 0.7258818745613098, + "learning_rate": 0.00019233350113685536, + "loss": 1.3035, + "step": 7293 + }, + { + "epoch": 0.769409282700422, + "grad_norm": 0.819872260093689, + "learning_rate": 0.00019216565524410455, + "loss": 1.3065, + "step": 7294 + }, + { + "epoch": 0.7695147679324894, + "grad_norm": 0.7510308623313904, + "learning_rate": 0.0001919978718594738, + "loss": 1.3651, + "step": 7295 + }, + { + "epoch": 0.769620253164557, + "grad_norm": 0.6579196453094482, + "learning_rate": 0.0001918301510017638, + "loss": 1.3447, + "step": 7296 + }, + { + "epoch": 0.7697257383966245, + "grad_norm": 0.8072502017021179, + "learning_rate": 0.0001916624926897687, + "loss": 1.3238, + "step": 7297 + }, + { + "epoch": 0.7698312236286919, + "grad_norm": 0.7326222658157349, + "learning_rate": 0.0001914948969422755, + "loss": 1.3047, + "step": 7298 + }, + { + "epoch": 0.7699367088607595, + "grad_norm": 0.6730518937110901, + "learning_rate": 0.00019132736377806394, + "loss": 1.3311, + "step": 7299 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.7848259806632996, + "learning_rate": 0.00019115989321590694, + "loss": 1.3711, + "step": 7300 + }, + { + "epoch": 0.7701476793248945, + "grad_norm": 0.6557775139808655, + "learning_rate": 0.00019099248527457068, + "loss": 1.3334, + "step": 7301 + }, + { + "epoch": 0.770253164556962, + "grad_norm": 0.698181688785553, + "learning_rate": 0.00019082513997281398, + "loss": 1.2859, + "step": 7302 + }, + { + "epoch": 0.7703586497890296, + "grad_norm": 0.7683056592941284, + "learning_rate": 0.0001906578573293886, + "loss": 1.3299, + "step": 7303 + }, + { + "epoch": 0.770464135021097, + "grad_norm": 0.6758607029914856, + "learning_rate": 0.00019049063736303946, + "loss": 1.3303, + "step": 7304 + }, + { + "epoch": 0.7705696202531646, + "grad_norm": 0.654451310634613, + "learning_rate": 0.00019032348009250433, + "loss": 1.3, + "step": 7305 + }, + { + "epoch": 0.7706751054852321, + "grad_norm": 0.6807926297187805, + "learning_rate": 0.0001901563855365141, + "loss": 1.3242, + "step": 7306 + }, + { + "epoch": 0.7707805907172995, + "grad_norm": 0.7053987383842468, + "learning_rate": 0.00018998935371379252, + "loss": 1.3382, + "step": 7307 + }, + { + "epoch": 0.7708860759493671, + "grad_norm": 0.6648688912391663, + "learning_rate": 0.00018982238464305623, + "loss": 1.3242, + "step": 7308 + }, + { + "epoch": 0.7709915611814346, + "grad_norm": 0.7123002409934998, + "learning_rate": 0.0001896554783430149, + "loss": 1.3311, + "step": 7309 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.6457261443138123, + "learning_rate": 0.00018948863483237154, + "loss": 1.3114, + "step": 7310 + }, + { + "epoch": 0.7712025316455696, + "grad_norm": 0.6410099864006042, + "learning_rate": 0.0001893218541298216, + "loss": 1.3218, + "step": 7311 + }, + { + "epoch": 0.7713080168776372, + "grad_norm": 0.70705246925354, + "learning_rate": 0.00018915513625405374, + "loss": 1.3335, + "step": 7312 + }, + { + "epoch": 0.7714135021097046, + "grad_norm": 0.7436321377754211, + "learning_rate": 0.00018898848122374942, + "loss": 1.3329, + "step": 7313 + }, + { + "epoch": 0.7715189873417722, + "grad_norm": 0.6718683838844299, + "learning_rate": 0.00018882188905758326, + "loss": 1.3072, + "step": 7314 + }, + { + "epoch": 0.7716244725738397, + "grad_norm": 0.6328548192977905, + "learning_rate": 0.00018865535977422273, + "loss": 1.2968, + "step": 7315 + }, + { + "epoch": 0.7717299578059071, + "grad_norm": 0.6841711401939392, + "learning_rate": 0.00018848889339232833, + "loss": 1.335, + "step": 7316 + }, + { + "epoch": 0.7718354430379747, + "grad_norm": 0.7574557065963745, + "learning_rate": 0.00018832248993055304, + "loss": 1.2849, + "step": 7317 + }, + { + "epoch": 0.7719409282700422, + "grad_norm": 0.6441839933395386, + "learning_rate": 0.00018815614940754377, + "loss": 1.2949, + "step": 7318 + }, + { + "epoch": 0.7720464135021097, + "grad_norm": 0.6497164368629456, + "learning_rate": 0.00018798987184193963, + "loss": 1.3283, + "step": 7319 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.7560223937034607, + "learning_rate": 0.00018782365725237272, + "loss": 1.3186, + "step": 7320 + }, + { + "epoch": 0.7722573839662448, + "grad_norm": 0.6941583752632141, + "learning_rate": 0.00018765750565746827, + "loss": 1.3577, + "step": 7321 + }, + { + "epoch": 0.7723628691983122, + "grad_norm": 0.7120454907417297, + "learning_rate": 0.00018749141707584443, + "loss": 1.304, + "step": 7322 + }, + { + "epoch": 0.7724683544303798, + "grad_norm": 0.6471433639526367, + "learning_rate": 0.0001873253915261123, + "loss": 1.3033, + "step": 7323 + }, + { + "epoch": 0.7725738396624473, + "grad_norm": 0.6954871416091919, + "learning_rate": 0.00018715942902687566, + "loss": 1.3165, + "step": 7324 + }, + { + "epoch": 0.7726793248945147, + "grad_norm": 0.7293204069137573, + "learning_rate": 0.00018699352959673172, + "loss": 1.3403, + "step": 7325 + }, + { + "epoch": 0.7727848101265823, + "grad_norm": 0.6743858456611633, + "learning_rate": 0.00018682769325426986, + "loss": 1.3094, + "step": 7326 + }, + { + "epoch": 0.7728902953586498, + "grad_norm": 0.6725485920906067, + "learning_rate": 0.00018666192001807344, + "loss": 1.2897, + "step": 7327 + }, + { + "epoch": 0.7729957805907173, + "grad_norm": 0.6629202961921692, + "learning_rate": 0.00018649620990671798, + "loss": 1.3149, + "step": 7328 + }, + { + "epoch": 0.7731012658227848, + "grad_norm": 0.7574886083602905, + "learning_rate": 0.00018633056293877203, + "loss": 1.2931, + "step": 7329 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.7287840843200684, + "learning_rate": 0.00018616497913279728, + "loss": 1.3781, + "step": 7330 + }, + { + "epoch": 0.7733122362869198, + "grad_norm": 0.6962081789970398, + "learning_rate": 0.00018599945850734812, + "loss": 1.3076, + "step": 7331 + }, + { + "epoch": 0.7734177215189874, + "grad_norm": 0.7233271598815918, + "learning_rate": 0.00018583400108097194, + "loss": 1.3309, + "step": 7332 + }, + { + "epoch": 0.7735232067510549, + "grad_norm": 0.6805657148361206, + "learning_rate": 0.00018566860687220922, + "loss": 1.3293, + "step": 7333 + }, + { + "epoch": 0.7736286919831223, + "grad_norm": 0.7084033489227295, + "learning_rate": 0.00018550327589959308, + "loss": 1.3116, + "step": 7334 + }, + { + "epoch": 0.7737341772151899, + "grad_norm": 0.7232285737991333, + "learning_rate": 0.00018533800818164943, + "loss": 1.3555, + "step": 7335 + }, + { + "epoch": 0.7738396624472574, + "grad_norm": 0.7143020629882812, + "learning_rate": 0.00018517280373689789, + "loss": 1.2948, + "step": 7336 + }, + { + "epoch": 0.7739451476793249, + "grad_norm": 0.715471088886261, + "learning_rate": 0.0001850076625838502, + "loss": 1.3034, + "step": 7337 + }, + { + "epoch": 0.7740506329113924, + "grad_norm": 0.6941644549369812, + "learning_rate": 0.0001848425847410112, + "loss": 1.3181, + "step": 7338 + }, + { + "epoch": 0.77415611814346, + "grad_norm": 0.6664291024208069, + "learning_rate": 0.00018467757022687864, + "loss": 1.3359, + "step": 7339 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.7286629676818848, + "learning_rate": 0.0001845126190599434, + "loss": 1.3062, + "step": 7340 + }, + { + "epoch": 0.774367088607595, + "grad_norm": 0.6886101961135864, + "learning_rate": 0.00018434773125868895, + "loss": 1.3096, + "step": 7341 + }, + { + "epoch": 0.7744725738396624, + "grad_norm": 0.6635888814926147, + "learning_rate": 0.00018418290684159175, + "loss": 1.3173, + "step": 7342 + }, + { + "epoch": 0.7745780590717299, + "grad_norm": 0.6976878046989441, + "learning_rate": 0.00018401814582712103, + "loss": 1.291, + "step": 7343 + }, + { + "epoch": 0.7746835443037975, + "grad_norm": 0.7181142568588257, + "learning_rate": 0.0001838534482337396, + "loss": 1.3517, + "step": 7344 + }, + { + "epoch": 0.7747890295358649, + "grad_norm": 0.6885108351707458, + "learning_rate": 0.0001836888140799023, + "loss": 1.3121, + "step": 7345 + }, + { + "epoch": 0.7748945147679325, + "grad_norm": 0.7077135443687439, + "learning_rate": 0.0001835242433840573, + "loss": 1.2583, + "step": 7346 + }, + { + "epoch": 0.775, + "grad_norm": 0.7465144991874695, + "learning_rate": 0.00018335973616464554, + "loss": 1.3326, + "step": 7347 + }, + { + "epoch": 0.7751054852320675, + "grad_norm": 0.7384396195411682, + "learning_rate": 0.00018319529244010082, + "loss": 1.3413, + "step": 7348 + }, + { + "epoch": 0.775210970464135, + "grad_norm": 0.7506087422370911, + "learning_rate": 0.00018303091222884998, + "loss": 1.3232, + "step": 7349 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.6957434415817261, + "learning_rate": 0.00018286659554931254, + "loss": 1.2948, + "step": 7350 + }, + { + "epoch": 0.77542194092827, + "grad_norm": 0.6678857207298279, + "learning_rate": 0.00018270234241990108, + "loss": 1.3015, + "step": 7351 + }, + { + "epoch": 0.7755274261603375, + "grad_norm": 0.7000881433486938, + "learning_rate": 0.00018253815285902074, + "loss": 1.3391, + "step": 7352 + }, + { + "epoch": 0.7756329113924051, + "grad_norm": 0.7071524262428284, + "learning_rate": 0.0001823740268850702, + "loss": 1.3259, + "step": 7353 + }, + { + "epoch": 0.7757383966244725, + "grad_norm": 0.7226134538650513, + "learning_rate": 0.0001822099645164404, + "loss": 1.295, + "step": 7354 + }, + { + "epoch": 0.7758438818565401, + "grad_norm": 1.014111042022705, + "learning_rate": 0.00018204596577151534, + "loss": 1.3243, + "step": 7355 + }, + { + "epoch": 0.7759493670886076, + "grad_norm": 0.6802889704704285, + "learning_rate": 0.00018188203066867178, + "loss": 1.3472, + "step": 7356 + }, + { + "epoch": 0.7760548523206751, + "grad_norm": 0.8533450961112976, + "learning_rate": 0.00018171815922627974, + "loss": 1.3278, + "step": 7357 + }, + { + "epoch": 0.7761603375527426, + "grad_norm": 0.7585994601249695, + "learning_rate": 0.00018155435146270158, + "loss": 1.3451, + "step": 7358 + }, + { + "epoch": 0.7762658227848102, + "grad_norm": 0.7085068821907043, + "learning_rate": 0.00018139060739629287, + "loss": 1.3249, + "step": 7359 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.7076596021652222, + "learning_rate": 0.00018122692704540194, + "loss": 1.3081, + "step": 7360 + }, + { + "epoch": 0.7764767932489451, + "grad_norm": 0.805507481098175, + "learning_rate": 0.0001810633104283698, + "loss": 1.3556, + "step": 7361 + }, + { + "epoch": 0.7765822784810127, + "grad_norm": 0.6653522253036499, + "learning_rate": 0.00018089975756353083, + "loss": 1.3466, + "step": 7362 + }, + { + "epoch": 0.7766877637130801, + "grad_norm": 0.7810810804367065, + "learning_rate": 0.0001807362684692119, + "loss": 1.2842, + "step": 7363 + }, + { + "epoch": 0.7767932489451477, + "grad_norm": 0.6719419360160828, + "learning_rate": 0.00018057284316373267, + "loss": 1.2908, + "step": 7364 + }, + { + "epoch": 0.7768987341772152, + "grad_norm": 0.6975407004356384, + "learning_rate": 0.00018040948166540586, + "loss": 1.3129, + "step": 7365 + }, + { + "epoch": 0.7770042194092827, + "grad_norm": 0.6830663084983826, + "learning_rate": 0.0001802461839925368, + "loss": 1.3064, + "step": 7366 + }, + { + "epoch": 0.7771097046413502, + "grad_norm": 0.695005476474762, + "learning_rate": 0.00018008295016342383, + "loss": 1.3195, + "step": 7367 + }, + { + "epoch": 0.7772151898734178, + "grad_norm": 0.7135804295539856, + "learning_rate": 0.00017991978019635819, + "loss": 1.2894, + "step": 7368 + }, + { + "epoch": 0.7773206751054852, + "grad_norm": 0.7204443216323853, + "learning_rate": 0.00017975667410962366, + "loss": 1.3447, + "step": 7369 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.6855598092079163, + "learning_rate": 0.00017959363192149752, + "loss": 1.3628, + "step": 7370 + }, + { + "epoch": 0.7775316455696203, + "grad_norm": 0.6798372268676758, + "learning_rate": 0.0001794306536502492, + "loss": 1.3303, + "step": 7371 + }, + { + "epoch": 0.7776371308016877, + "grad_norm": 0.7607227563858032, + "learning_rate": 0.0001792677393141412, + "loss": 1.342, + "step": 7372 + }, + { + "epoch": 0.7777426160337553, + "grad_norm": 0.6582652926445007, + "learning_rate": 0.00017910488893142903, + "loss": 1.2773, + "step": 7373 + }, + { + "epoch": 0.7778481012658228, + "grad_norm": 0.659331202507019, + "learning_rate": 0.00017894210252036069, + "loss": 1.3189, + "step": 7374 + }, + { + "epoch": 0.7779535864978903, + "grad_norm": 0.6587789058685303, + "learning_rate": 0.0001787793800991774, + "loss": 1.3535, + "step": 7375 + }, + { + "epoch": 0.7780590717299578, + "grad_norm": 0.6687893271446228, + "learning_rate": 0.00017861672168611293, + "loss": 1.2868, + "step": 7376 + }, + { + "epoch": 0.7781645569620254, + "grad_norm": 0.6741248369216919, + "learning_rate": 0.0001784541272993939, + "loss": 1.2999, + "step": 7377 + }, + { + "epoch": 0.7782700421940928, + "grad_norm": 0.7008171081542969, + "learning_rate": 0.00017829159695723973, + "loss": 1.2812, + "step": 7378 + }, + { + "epoch": 0.7783755274261603, + "grad_norm": 0.6581507325172424, + "learning_rate": 0.00017812913067786313, + "loss": 1.3106, + "step": 7379 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.6771066188812256, + "learning_rate": 0.00017796672847946905, + "loss": 1.316, + "step": 7380 + }, + { + "epoch": 0.7785864978902953, + "grad_norm": 0.6430746912956238, + "learning_rate": 0.0001778043903802555, + "loss": 1.3201, + "step": 7381 + }, + { + "epoch": 0.7786919831223629, + "grad_norm": 0.7222340106964111, + "learning_rate": 0.00017764211639841312, + "loss": 1.3937, + "step": 7382 + }, + { + "epoch": 0.7787974683544304, + "grad_norm": 0.7413226366043091, + "learning_rate": 0.0001774799065521257, + "loss": 1.3204, + "step": 7383 + }, + { + "epoch": 0.7789029535864979, + "grad_norm": 0.7029499411582947, + "learning_rate": 0.0001773177608595696, + "loss": 1.307, + "step": 7384 + }, + { + "epoch": 0.7790084388185654, + "grad_norm": 0.7194016575813293, + "learning_rate": 0.00017715567933891405, + "loss": 1.3154, + "step": 7385 + }, + { + "epoch": 0.779113924050633, + "grad_norm": 0.8785111904144287, + "learning_rate": 0.0001769936620083211, + "loss": 1.2711, + "step": 7386 + }, + { + "epoch": 0.7792194092827004, + "grad_norm": 0.6592636704444885, + "learning_rate": 0.0001768317088859453, + "loss": 1.3364, + "step": 7387 + }, + { + "epoch": 0.7793248945147679, + "grad_norm": 0.7825360298156738, + "learning_rate": 0.0001766698199899349, + "loss": 1.3062, + "step": 7388 + }, + { + "epoch": 0.7794303797468355, + "grad_norm": 0.6784241199493408, + "learning_rate": 0.00017650799533842996, + "loss": 1.3146, + "step": 7389 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.6420895457267761, + "learning_rate": 0.0001763462349495639, + "loss": 1.3117, + "step": 7390 + }, + { + "epoch": 0.7796413502109705, + "grad_norm": 0.6838799715042114, + "learning_rate": 0.0001761845388414627, + "loss": 1.3146, + "step": 7391 + }, + { + "epoch": 0.779746835443038, + "grad_norm": 0.7478772401809692, + "learning_rate": 0.00017602290703224525, + "loss": 1.3454, + "step": 7392 + }, + { + "epoch": 0.7798523206751055, + "grad_norm": 0.6165337562561035, + "learning_rate": 0.00017586133954002308, + "loss": 1.3204, + "step": 7393 + }, + { + "epoch": 0.779957805907173, + "grad_norm": 0.6775294542312622, + "learning_rate": 0.00017569983638290084, + "loss": 1.3186, + "step": 7394 + }, + { + "epoch": 0.7800632911392406, + "grad_norm": 0.7411644458770752, + "learning_rate": 0.0001755383975789754, + "loss": 1.3121, + "step": 7395 + }, + { + "epoch": 0.780168776371308, + "grad_norm": 0.6817713379859924, + "learning_rate": 0.00017537702314633722, + "loss": 1.3706, + "step": 7396 + }, + { + "epoch": 0.7802742616033755, + "grad_norm": 0.712638795375824, + "learning_rate": 0.00017521571310306889, + "loss": 1.3352, + "step": 7397 + }, + { + "epoch": 0.7803797468354431, + "grad_norm": 0.7289531826972961, + "learning_rate": 0.0001750544674672461, + "loss": 1.3014, + "step": 7398 + }, + { + "epoch": 0.7804852320675105, + "grad_norm": 0.7088237404823303, + "learning_rate": 0.00017489328625693715, + "loss": 1.3203, + "step": 7399 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.7455496788024902, + "learning_rate": 0.00017473216949020326, + "loss": 1.3396, + "step": 7400 + }, + { + "epoch": 0.7806962025316456, + "grad_norm": 0.7046405076980591, + "learning_rate": 0.00017457111718509831, + "loss": 1.2832, + "step": 7401 + }, + { + "epoch": 0.7808016877637131, + "grad_norm": 0.6847984790802002, + "learning_rate": 0.00017441012935966898, + "loss": 1.3198, + "step": 7402 + }, + { + "epoch": 0.7809071729957806, + "grad_norm": 0.6523292064666748, + "learning_rate": 0.00017424920603195483, + "loss": 1.2974, + "step": 7403 + }, + { + "epoch": 0.7810126582278482, + "grad_norm": 0.6515406370162964, + "learning_rate": 0.0001740883472199879, + "loss": 1.3123, + "step": 7404 + }, + { + "epoch": 0.7811181434599156, + "grad_norm": 0.7378098368644714, + "learning_rate": 0.00017392755294179363, + "loss": 1.3101, + "step": 7405 + }, + { + "epoch": 0.7812236286919831, + "grad_norm": 0.6583898067474365, + "learning_rate": 0.0001737668232153896, + "loss": 1.3026, + "step": 7406 + }, + { + "epoch": 0.7813291139240506, + "grad_norm": 0.712043046951294, + "learning_rate": 0.00017360615805878636, + "loss": 1.3064, + "step": 7407 + }, + { + "epoch": 0.7814345991561181, + "grad_norm": 0.7909241318702698, + "learning_rate": 0.00017344555748998727, + "loss": 1.3214, + "step": 7408 + }, + { + "epoch": 0.7815400843881857, + "grad_norm": 0.6597241163253784, + "learning_rate": 0.0001732850215269885, + "loss": 1.3297, + "step": 7409 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.6971682906150818, + "learning_rate": 0.0001731245501877787, + "loss": 1.2968, + "step": 7410 + }, + { + "epoch": 0.7817510548523207, + "grad_norm": 0.8281659483909607, + "learning_rate": 0.00017296414349033976, + "loss": 1.33, + "step": 7411 + }, + { + "epoch": 0.7818565400843882, + "grad_norm": 0.688751757144928, + "learning_rate": 0.0001728038014526458, + "loss": 1.3287, + "step": 7412 + }, + { + "epoch": 0.7819620253164556, + "grad_norm": 0.737083375453949, + "learning_rate": 0.00017264352409266385, + "loss": 1.3176, + "step": 7413 + }, + { + "epoch": 0.7820675105485232, + "grad_norm": 0.8326061367988586, + "learning_rate": 0.0001724833114283542, + "loss": 1.3236, + "step": 7414 + }, + { + "epoch": 0.7821729957805907, + "grad_norm": 0.6422482132911682, + "learning_rate": 0.0001723231634776693, + "loss": 1.2829, + "step": 7415 + }, + { + "epoch": 0.7822784810126582, + "grad_norm": 0.6597452759742737, + "learning_rate": 0.0001721630802585545, + "loss": 1.2724, + "step": 7416 + }, + { + "epoch": 0.7823839662447257, + "grad_norm": 0.7020402550697327, + "learning_rate": 0.00017200306178894785, + "loss": 1.2975, + "step": 7417 + }, + { + "epoch": 0.7824894514767933, + "grad_norm": 0.732501208782196, + "learning_rate": 0.00017184310808678028, + "loss": 1.3527, + "step": 7418 + }, + { + "epoch": 0.7825949367088607, + "grad_norm": 0.7734465599060059, + "learning_rate": 0.00017168321916997547, + "loss": 1.3431, + "step": 7419 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.7581454515457153, + "learning_rate": 0.00017152339505644963, + "loss": 1.2963, + "step": 7420 + }, + { + "epoch": 0.7828059071729958, + "grad_norm": 0.685950756072998, + "learning_rate": 0.00017136363576411172, + "loss": 1.3127, + "step": 7421 + }, + { + "epoch": 0.7829113924050632, + "grad_norm": 0.7448627352714539, + "learning_rate": 0.00017120394131086398, + "loss": 1.3094, + "step": 7422 + }, + { + "epoch": 0.7830168776371308, + "grad_norm": 0.6514400243759155, + "learning_rate": 0.00017104431171460077, + "loss": 1.2814, + "step": 7423 + }, + { + "epoch": 0.7831223628691983, + "grad_norm": 0.7152001857757568, + "learning_rate": 0.0001708847469932093, + "loss": 1.3176, + "step": 7424 + }, + { + "epoch": 0.7832278481012658, + "grad_norm": 0.6769898533821106, + "learning_rate": 0.00017072524716456975, + "loss": 1.3065, + "step": 7425 + }, + { + "epoch": 0.7833333333333333, + "grad_norm": 0.7049786448478699, + "learning_rate": 0.00017056581224655473, + "loss": 1.2609, + "step": 7426 + }, + { + "epoch": 0.7834388185654009, + "grad_norm": 0.6929465532302856, + "learning_rate": 0.0001704064422570298, + "loss": 1.2947, + "step": 7427 + }, + { + "epoch": 0.7835443037974683, + "grad_norm": 0.680397093296051, + "learning_rate": 0.0001702471372138531, + "loss": 1.2973, + "step": 7428 + }, + { + "epoch": 0.7836497890295359, + "grad_norm": 0.6477676630020142, + "learning_rate": 0.00017008789713487558, + "loss": 1.3153, + "step": 7429 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.6751905083656311, + "learning_rate": 0.0001699287220379407, + "loss": 1.3172, + "step": 7430 + }, + { + "epoch": 0.7838607594936708, + "grad_norm": 0.6629983186721802, + "learning_rate": 0.00016976961194088526, + "loss": 1.3012, + "step": 7431 + }, + { + "epoch": 0.7839662447257384, + "grad_norm": 0.6730597019195557, + "learning_rate": 0.000169610566861538, + "loss": 1.2985, + "step": 7432 + }, + { + "epoch": 0.7840717299578059, + "grad_norm": 0.6775293350219727, + "learning_rate": 0.0001694515868177209, + "loss": 1.2987, + "step": 7433 + }, + { + "epoch": 0.7841772151898734, + "grad_norm": 0.692372739315033, + "learning_rate": 0.0001692926718272483, + "loss": 1.3228, + "step": 7434 + }, + { + "epoch": 0.7842827004219409, + "grad_norm": 0.7010045647621155, + "learning_rate": 0.00016913382190792754, + "loss": 1.2964, + "step": 7435 + }, + { + "epoch": 0.7843881856540085, + "grad_norm": 0.6876017451286316, + "learning_rate": 0.0001689750370775584, + "loss": 1.2739, + "step": 7436 + }, + { + "epoch": 0.7844936708860759, + "grad_norm": 0.6678821444511414, + "learning_rate": 0.00016881631735393368, + "loss": 1.3281, + "step": 7437 + }, + { + "epoch": 0.7845991561181435, + "grad_norm": 0.6904488801956177, + "learning_rate": 0.00016865766275483865, + "loss": 1.2877, + "step": 7438 + }, + { + "epoch": 0.784704641350211, + "grad_norm": 0.6667417883872986, + "learning_rate": 0.00016849907329805118, + "loss": 1.3331, + "step": 7439 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.7011683583259583, + "learning_rate": 0.00016834054900134228, + "loss": 1.3062, + "step": 7440 + }, + { + "epoch": 0.784915611814346, + "grad_norm": 0.6921257972717285, + "learning_rate": 0.00016818208988247533, + "loss": 1.3266, + "step": 7441 + }, + { + "epoch": 0.7850210970464135, + "grad_norm": 0.6603653430938721, + "learning_rate": 0.00016802369595920647, + "loss": 1.3037, + "step": 7442 + }, + { + "epoch": 0.785126582278481, + "grad_norm": 0.6572083234786987, + "learning_rate": 0.00016786536724928432, + "loss": 1.274, + "step": 7443 + }, + { + "epoch": 0.7852320675105485, + "grad_norm": 0.6536577343940735, + "learning_rate": 0.00016770710377045074, + "loss": 1.295, + "step": 7444 + }, + { + "epoch": 0.7853375527426161, + "grad_norm": 0.7013777494430542, + "learning_rate": 0.00016754890554043965, + "loss": 1.3111, + "step": 7445 + }, + { + "epoch": 0.7854430379746835, + "grad_norm": 0.6796959042549133, + "learning_rate": 0.00016739077257697804, + "loss": 1.308, + "step": 7446 + }, + { + "epoch": 0.7855485232067511, + "grad_norm": 0.6885698437690735, + "learning_rate": 0.0001672327048977856, + "loss": 1.3271, + "step": 7447 + }, + { + "epoch": 0.7856540084388186, + "grad_norm": 0.7636042237281799, + "learning_rate": 0.00016707470252057423, + "loss": 1.2983, + "step": 7448 + }, + { + "epoch": 0.785759493670886, + "grad_norm": 0.6565546989440918, + "learning_rate": 0.00016691676546304936, + "loss": 1.3188, + "step": 7449 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.6837613582611084, + "learning_rate": 0.00016675889374290852, + "loss": 1.3142, + "step": 7450 + }, + { + "epoch": 0.7859704641350211, + "grad_norm": 0.6868768334388733, + "learning_rate": 0.0001666010873778419, + "loss": 1.3283, + "step": 7451 + }, + { + "epoch": 0.7860759493670886, + "grad_norm": 0.7038612961769104, + "learning_rate": 0.0001664433463855325, + "loss": 1.3211, + "step": 7452 + }, + { + "epoch": 0.7861814345991561, + "grad_norm": 0.7118485569953918, + "learning_rate": 0.00016628567078365612, + "loss": 1.3104, + "step": 7453 + }, + { + "epoch": 0.7862869198312237, + "grad_norm": 0.7031296491622925, + "learning_rate": 0.00016612806058988088, + "loss": 1.3219, + "step": 7454 + }, + { + "epoch": 0.7863924050632911, + "grad_norm": 0.7098848819732666, + "learning_rate": 0.0001659705158218679, + "loss": 1.3213, + "step": 7455 + }, + { + "epoch": 0.7864978902953587, + "grad_norm": 0.6821326613426208, + "learning_rate": 0.00016581303649727076, + "loss": 1.3241, + "step": 7456 + }, + { + "epoch": 0.7866033755274262, + "grad_norm": 0.7308175563812256, + "learning_rate": 0.000165655622633736, + "loss": 1.3128, + "step": 7457 + }, + { + "epoch": 0.7867088607594936, + "grad_norm": 0.6777307987213135, + "learning_rate": 0.00016549827424890257, + "loss": 1.3137, + "step": 7458 + }, + { + "epoch": 0.7868143459915612, + "grad_norm": 0.8413142561912537, + "learning_rate": 0.00016534099136040207, + "loss": 1.3266, + "step": 7459 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.7205766439437866, + "learning_rate": 0.0001651837739858589, + "loss": 1.2975, + "step": 7460 + }, + { + "epoch": 0.7870253164556962, + "grad_norm": 0.7120696306228638, + "learning_rate": 0.00016502662214289, + "loss": 1.3233, + "step": 7461 + }, + { + "epoch": 0.7871308016877637, + "grad_norm": 0.7004076838493347, + "learning_rate": 0.000164869535849105, + "loss": 1.3096, + "step": 7462 + }, + { + "epoch": 0.7872362869198313, + "grad_norm": 0.6838104724884033, + "learning_rate": 0.00016471251512210626, + "loss": 1.314, + "step": 7463 + }, + { + "epoch": 0.7873417721518987, + "grad_norm": 0.6700828075408936, + "learning_rate": 0.00016455555997948868, + "loss": 1.2906, + "step": 7464 + }, + { + "epoch": 0.7874472573839663, + "grad_norm": 0.6490572094917297, + "learning_rate": 0.0001643986704388397, + "loss": 1.3191, + "step": 7465 + }, + { + "epoch": 0.7875527426160338, + "grad_norm": 0.6780567169189453, + "learning_rate": 0.00016424184651773997, + "loss": 1.3312, + "step": 7466 + }, + { + "epoch": 0.7876582278481012, + "grad_norm": 0.6891033053398132, + "learning_rate": 0.0001640850882337622, + "loss": 1.3027, + "step": 7467 + }, + { + "epoch": 0.7877637130801688, + "grad_norm": 0.667777955532074, + "learning_rate": 0.00016392839560447196, + "loss": 1.3204, + "step": 7468 + }, + { + "epoch": 0.7878691983122363, + "grad_norm": 0.7182723879814148, + "learning_rate": 0.00016377176864742734, + "loss": 1.2965, + "step": 7469 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.6714968681335449, + "learning_rate": 0.00016361520738017934, + "loss": 1.313, + "step": 7470 + }, + { + "epoch": 0.7880801687763713, + "grad_norm": 0.701464831829071, + "learning_rate": 0.00016345871182027124, + "loss": 1.3122, + "step": 7471 + }, + { + "epoch": 0.7881856540084389, + "grad_norm": 0.7053104043006897, + "learning_rate": 0.00016330228198523927, + "loss": 1.2852, + "step": 7472 + }, + { + "epoch": 0.7882911392405063, + "grad_norm": 0.7096008658409119, + "learning_rate": 0.00016314591789261216, + "loss": 1.3442, + "step": 7473 + }, + { + "epoch": 0.7883966244725739, + "grad_norm": 0.692331075668335, + "learning_rate": 0.00016298961955991105, + "loss": 1.3068, + "step": 7474 + }, + { + "epoch": 0.7885021097046413, + "grad_norm": 0.647940456867218, + "learning_rate": 0.00016283338700465034, + "loss": 1.3122, + "step": 7475 + }, + { + "epoch": 0.7886075949367088, + "grad_norm": 0.7403746843338013, + "learning_rate": 0.00016267722024433654, + "loss": 1.3244, + "step": 7476 + }, + { + "epoch": 0.7887130801687764, + "grad_norm": 0.6634947061538696, + "learning_rate": 0.0001625211192964688, + "loss": 1.3398, + "step": 7477 + }, + { + "epoch": 0.7888185654008438, + "grad_norm": 0.7221820950508118, + "learning_rate": 0.00016236508417853917, + "loss": 1.3211, + "step": 7478 + }, + { + "epoch": 0.7889240506329114, + "grad_norm": 0.6275408864021301, + "learning_rate": 0.00016220911490803206, + "loss": 1.2855, + "step": 7479 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.6949054002761841, + "learning_rate": 0.00016205321150242454, + "loss": 1.3225, + "step": 7480 + }, + { + "epoch": 0.7891350210970464, + "grad_norm": 0.7150138020515442, + "learning_rate": 0.00016189737397918653, + "loss": 1.3288, + "step": 7481 + }, + { + "epoch": 0.7892405063291139, + "grad_norm": 0.6853651404380798, + "learning_rate": 0.00016174160235578, + "loss": 1.354, + "step": 7482 + }, + { + "epoch": 0.7893459915611815, + "grad_norm": 0.6429782509803772, + "learning_rate": 0.00016158589664966053, + "loss": 1.3107, + "step": 7483 + }, + { + "epoch": 0.7894514767932489, + "grad_norm": 0.7855679392814636, + "learning_rate": 0.00016143025687827538, + "loss": 1.3398, + "step": 7484 + }, + { + "epoch": 0.7895569620253164, + "grad_norm": 0.6738308668136597, + "learning_rate": 0.0001612746830590649, + "loss": 1.33, + "step": 7485 + }, + { + "epoch": 0.789662447257384, + "grad_norm": 0.6497506499290466, + "learning_rate": 0.00016111917520946175, + "loss": 1.2957, + "step": 7486 + }, + { + "epoch": 0.7897679324894514, + "grad_norm": 0.6781902313232422, + "learning_rate": 0.00016096373334689154, + "loss": 1.3169, + "step": 7487 + }, + { + "epoch": 0.789873417721519, + "grad_norm": 0.6875866651535034, + "learning_rate": 0.00016080835748877214, + "loss": 1.3215, + "step": 7488 + }, + { + "epoch": 0.7899789029535865, + "grad_norm": 0.6674945950508118, + "learning_rate": 0.00016065304765251423, + "loss": 1.3232, + "step": 7489 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.6667591333389282, + "learning_rate": 0.00016049780385552113, + "loss": 1.3222, + "step": 7490 + }, + { + "epoch": 0.7901898734177215, + "grad_norm": 0.6429251432418823, + "learning_rate": 0.0001603426261151884, + "loss": 1.2946, + "step": 7491 + }, + { + "epoch": 0.7902953586497891, + "grad_norm": 0.6901772022247314, + "learning_rate": 0.000160187514448905, + "loss": 1.3443, + "step": 7492 + }, + { + "epoch": 0.7904008438818565, + "grad_norm": 0.6555588841438293, + "learning_rate": 0.0001600324688740516, + "loss": 1.3179, + "step": 7493 + }, + { + "epoch": 0.790506329113924, + "grad_norm": 0.6866097450256348, + "learning_rate": 0.00015987748940800186, + "loss": 1.3117, + "step": 7494 + }, + { + "epoch": 0.7906118143459916, + "grad_norm": 0.6636956334114075, + "learning_rate": 0.0001597225760681221, + "loss": 1.3561, + "step": 7495 + }, + { + "epoch": 0.790717299578059, + "grad_norm": 0.6562795042991638, + "learning_rate": 0.00015956772887177115, + "loss": 1.2801, + "step": 7496 + }, + { + "epoch": 0.7908227848101266, + "grad_norm": 0.6444453001022339, + "learning_rate": 0.00015941294783630022, + "loss": 1.301, + "step": 7497 + }, + { + "epoch": 0.7909282700421941, + "grad_norm": 0.6643431782722473, + "learning_rate": 0.00015925823297905346, + "loss": 1.3091, + "step": 7498 + }, + { + "epoch": 0.7910337552742616, + "grad_norm": 0.6619279980659485, + "learning_rate": 0.00015910358431736745, + "loss": 1.2868, + "step": 7499 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.7151127457618713, + "learning_rate": 0.00015894900186857105, + "loss": 1.3603, + "step": 7500 + }, + { + "epoch": 0.7912447257383967, + "grad_norm": 0.9398965239524841, + "learning_rate": 0.00015879448564998648, + "loss": 1.374, + "step": 7501 + }, + { + "epoch": 0.7913502109704641, + "grad_norm": 0.6679266095161438, + "learning_rate": 0.00015864003567892776, + "loss": 1.3255, + "step": 7502 + }, + { + "epoch": 0.7914556962025316, + "grad_norm": 0.717357873916626, + "learning_rate": 0.00015848565197270175, + "loss": 1.3135, + "step": 7503 + }, + { + "epoch": 0.7915611814345992, + "grad_norm": 0.7972176671028137, + "learning_rate": 0.00015833133454860814, + "loss": 1.3768, + "step": 7504 + }, + { + "epoch": 0.7916666666666666, + "grad_norm": 0.7599050402641296, + "learning_rate": 0.00015817708342393878, + "loss": 1.3116, + "step": 7505 + }, + { + "epoch": 0.7917721518987342, + "grad_norm": 0.7122862935066223, + "learning_rate": 0.0001580228986159783, + "loss": 1.324, + "step": 7506 + }, + { + "epoch": 0.7918776371308017, + "grad_norm": 0.6743869185447693, + "learning_rate": 0.00015786878014200387, + "loss": 1.3501, + "step": 7507 + }, + { + "epoch": 0.7919831223628692, + "grad_norm": 0.9819631576538086, + "learning_rate": 0.0001577147280192851, + "loss": 1.3018, + "step": 7508 + }, + { + "epoch": 0.7920886075949367, + "grad_norm": 0.7122269868850708, + "learning_rate": 0.0001575607422650846, + "loss": 1.2654, + "step": 7509 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.6701552271842957, + "learning_rate": 0.00015740682289665714, + "loss": 1.311, + "step": 7510 + }, + { + "epoch": 0.7922995780590717, + "grad_norm": 0.77093106508255, + "learning_rate": 0.0001572529699312501, + "loss": 1.3035, + "step": 7511 + }, + { + "epoch": 0.7924050632911392, + "grad_norm": 0.8969048857688904, + "learning_rate": 0.0001570991833861035, + "loss": 1.3085, + "step": 7512 + }, + { + "epoch": 0.7925105485232068, + "grad_norm": 0.8169012069702148, + "learning_rate": 0.00015694546327844986, + "loss": 1.3015, + "step": 7513 + }, + { + "epoch": 0.7926160337552742, + "grad_norm": 0.7808084487915039, + "learning_rate": 0.00015679180962551435, + "loss": 1.3212, + "step": 7514 + }, + { + "epoch": 0.7927215189873418, + "grad_norm": 0.8683890700340271, + "learning_rate": 0.00015663822244451446, + "loss": 1.282, + "step": 7515 + }, + { + "epoch": 0.7928270042194093, + "grad_norm": 0.76680988073349, + "learning_rate": 0.00015648470175266057, + "loss": 1.3541, + "step": 7516 + }, + { + "epoch": 0.7929324894514768, + "grad_norm": 0.6472588181495667, + "learning_rate": 0.00015633124756715523, + "loss": 1.2912, + "step": 7517 + }, + { + "epoch": 0.7930379746835443, + "grad_norm": 0.7101832628250122, + "learning_rate": 0.00015617785990519403, + "loss": 1.3113, + "step": 7518 + }, + { + "epoch": 0.7931434599156119, + "grad_norm": 0.8347263932228088, + "learning_rate": 0.00015602453878396479, + "loss": 1.3127, + "step": 7519 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.670112133026123, + "learning_rate": 0.0001558712842206477, + "loss": 1.2957, + "step": 7520 + }, + { + "epoch": 0.7933544303797468, + "grad_norm": 0.6619836688041687, + "learning_rate": 0.0001557180962324158, + "loss": 1.2886, + "step": 7521 + }, + { + "epoch": 0.7934599156118144, + "grad_norm": 0.7179598212242126, + "learning_rate": 0.00015556497483643466, + "loss": 1.3184, + "step": 7522 + }, + { + "epoch": 0.7935654008438818, + "grad_norm": 0.6660670638084412, + "learning_rate": 0.00015541192004986222, + "loss": 1.2977, + "step": 7523 + }, + { + "epoch": 0.7936708860759494, + "grad_norm": 0.6623356342315674, + "learning_rate": 0.00015525893188984898, + "loss": 1.3262, + "step": 7524 + }, + { + "epoch": 0.7937763713080169, + "grad_norm": 0.7213552594184875, + "learning_rate": 0.00015510601037353804, + "loss": 1.3307, + "step": 7525 + }, + { + "epoch": 0.7938818565400844, + "grad_norm": 0.672336757183075, + "learning_rate": 0.00015495315551806486, + "loss": 1.3021, + "step": 7526 + }, + { + "epoch": 0.7939873417721519, + "grad_norm": 0.6456335186958313, + "learning_rate": 0.000154800367340558, + "loss": 1.3078, + "step": 7527 + }, + { + "epoch": 0.7940928270042195, + "grad_norm": 0.6888944506645203, + "learning_rate": 0.00015464764585813783, + "loss": 1.3042, + "step": 7528 + }, + { + "epoch": 0.7941983122362869, + "grad_norm": 0.7296013236045837, + "learning_rate": 0.0001544949910879177, + "loss": 1.3161, + "step": 7529 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.6627529263496399, + "learning_rate": 0.00015434240304700332, + "loss": 1.3105, + "step": 7530 + }, + { + "epoch": 0.794409282700422, + "grad_norm": 0.7811995148658752, + "learning_rate": 0.00015418988175249282, + "loss": 1.3254, + "step": 7531 + }, + { + "epoch": 0.7945147679324894, + "grad_norm": 0.6564275622367859, + "learning_rate": 0.00015403742722147707, + "loss": 1.3099, + "step": 7532 + }, + { + "epoch": 0.794620253164557, + "grad_norm": 0.7245955467224121, + "learning_rate": 0.00015388503947103937, + "loss": 1.3214, + "step": 7533 + }, + { + "epoch": 0.7947257383966245, + "grad_norm": 0.680638313293457, + "learning_rate": 0.00015373271851825527, + "loss": 1.3315, + "step": 7534 + }, + { + "epoch": 0.794831223628692, + "grad_norm": 0.7600327134132385, + "learning_rate": 0.00015358046438019356, + "loss": 1.2679, + "step": 7535 + }, + { + "epoch": 0.7949367088607595, + "grad_norm": 0.6967231035232544, + "learning_rate": 0.00015342827707391475, + "loss": 1.2881, + "step": 7536 + }, + { + "epoch": 0.7950421940928271, + "grad_norm": 0.7879817485809326, + "learning_rate": 0.0001532761566164723, + "loss": 1.3217, + "step": 7537 + }, + { + "epoch": 0.7951476793248945, + "grad_norm": 0.6847689151763916, + "learning_rate": 0.0001531241030249121, + "loss": 1.3038, + "step": 7538 + }, + { + "epoch": 0.795253164556962, + "grad_norm": 0.672204315662384, + "learning_rate": 0.00015297211631627234, + "loss": 1.287, + "step": 7539 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.7164720296859741, + "learning_rate": 0.0001528201965075841, + "loss": 1.3192, + "step": 7540 + }, + { + "epoch": 0.795464135021097, + "grad_norm": 0.6973363757133484, + "learning_rate": 0.00015266834361587063, + "loss": 1.2743, + "step": 7541 + }, + { + "epoch": 0.7955696202531646, + "grad_norm": 0.7085703015327454, + "learning_rate": 0.00015251655765814777, + "loss": 1.3078, + "step": 7542 + }, + { + "epoch": 0.795675105485232, + "grad_norm": 0.8185384273529053, + "learning_rate": 0.000152364838651424, + "loss": 1.3053, + "step": 7543 + }, + { + "epoch": 0.7957805907172996, + "grad_norm": 0.6975228190422058, + "learning_rate": 0.00015221318661269985, + "loss": 1.3048, + "step": 7544 + }, + { + "epoch": 0.7958860759493671, + "grad_norm": 0.6661352515220642, + "learning_rate": 0.00015206160155896924, + "loss": 1.3237, + "step": 7545 + }, + { + "epoch": 0.7959915611814345, + "grad_norm": 0.6745050549507141, + "learning_rate": 0.00015191008350721772, + "loss": 1.3238, + "step": 7546 + }, + { + "epoch": 0.7960970464135021, + "grad_norm": 0.6804004311561584, + "learning_rate": 0.00015175863247442374, + "loss": 1.2866, + "step": 7547 + }, + { + "epoch": 0.7962025316455696, + "grad_norm": 0.7068785429000854, + "learning_rate": 0.00015160724847755806, + "loss": 1.2977, + "step": 7548 + }, + { + "epoch": 0.7963080168776371, + "grad_norm": 0.7676887512207031, + "learning_rate": 0.00015145593153358412, + "loss": 1.3062, + "step": 7549 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.6809113621711731, + "learning_rate": 0.0001513046816594575, + "loss": 1.3174, + "step": 7550 + }, + { + "epoch": 0.7965189873417722, + "grad_norm": 0.8355563282966614, + "learning_rate": 0.00015115349887212678, + "loss": 1.3021, + "step": 7551 + }, + { + "epoch": 0.7966244725738396, + "grad_norm": 0.7546358108520508, + "learning_rate": 0.00015100238318853262, + "loss": 1.3387, + "step": 7552 + }, + { + "epoch": 0.7967299578059072, + "grad_norm": 0.6981265544891357, + "learning_rate": 0.00015085133462560833, + "loss": 1.3449, + "step": 7553 + }, + { + "epoch": 0.7968354430379747, + "grad_norm": 0.6652794480323792, + "learning_rate": 0.00015070035320027933, + "loss": 1.2898, + "step": 7554 + }, + { + "epoch": 0.7969409282700421, + "grad_norm": 0.6841363310813904, + "learning_rate": 0.00015054943892946446, + "loss": 1.2546, + "step": 7555 + }, + { + "epoch": 0.7970464135021097, + "grad_norm": 0.8333359360694885, + "learning_rate": 0.000150398591830074, + "loss": 1.3528, + "step": 7556 + }, + { + "epoch": 0.7971518987341772, + "grad_norm": 0.7547590136528015, + "learning_rate": 0.00015024781191901122, + "loss": 1.3117, + "step": 7557 + }, + { + "epoch": 0.7972573839662447, + "grad_norm": 0.7392070293426514, + "learning_rate": 0.00015009709921317172, + "loss": 1.2843, + "step": 7558 + }, + { + "epoch": 0.7973628691983122, + "grad_norm": 0.7599650025367737, + "learning_rate": 0.00014994645372944367, + "loss": 1.3017, + "step": 7559 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.7542463541030884, + "learning_rate": 0.0001497958754847076, + "loss": 1.3124, + "step": 7560 + }, + { + "epoch": 0.7975738396624472, + "grad_norm": 0.7287628054618835, + "learning_rate": 0.00014964536449583657, + "loss": 1.3041, + "step": 7561 + }, + { + "epoch": 0.7976793248945148, + "grad_norm": 0.7509565949440002, + "learning_rate": 0.0001494949207796961, + "loss": 1.3426, + "step": 7562 + }, + { + "epoch": 0.7977848101265823, + "grad_norm": 0.9506324529647827, + "learning_rate": 0.00014934454435314417, + "loss": 1.3213, + "step": 7563 + }, + { + "epoch": 0.7978902953586497, + "grad_norm": 0.6565811634063721, + "learning_rate": 0.00014919423523303095, + "loss": 1.3164, + "step": 7564 + }, + { + "epoch": 0.7979957805907173, + "grad_norm": 0.8608470559120178, + "learning_rate": 0.00014904399343619972, + "loss": 1.3288, + "step": 7565 + }, + { + "epoch": 0.7981012658227848, + "grad_norm": 0.8725191950798035, + "learning_rate": 0.00014889381897948575, + "loss": 1.2872, + "step": 7566 + }, + { + "epoch": 0.7982067510548523, + "grad_norm": 0.7125952839851379, + "learning_rate": 0.00014874371187971672, + "loss": 1.3467, + "step": 7567 + }, + { + "epoch": 0.7983122362869198, + "grad_norm": 0.7584227919578552, + "learning_rate": 0.00014859367215371293, + "loss": 1.3152, + "step": 7568 + }, + { + "epoch": 0.7984177215189874, + "grad_norm": 0.8799015879631042, + "learning_rate": 0.00014844369981828698, + "loss": 1.3391, + "step": 7569 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.7402393817901611, + "learning_rate": 0.00014829379489024415, + "loss": 1.3043, + "step": 7570 + }, + { + "epoch": 0.7986286919831224, + "grad_norm": 0.6287904381752014, + "learning_rate": 0.00014814395738638195, + "loss": 1.2762, + "step": 7571 + }, + { + "epoch": 0.7987341772151899, + "grad_norm": 0.8624987006187439, + "learning_rate": 0.0001479941873234905, + "loss": 1.325, + "step": 7572 + }, + { + "epoch": 0.7988396624472573, + "grad_norm": 0.6548227667808533, + "learning_rate": 0.00014784448471835224, + "loss": 1.3048, + "step": 7573 + }, + { + "epoch": 0.7989451476793249, + "grad_norm": 0.6883224844932556, + "learning_rate": 0.0001476948495877418, + "loss": 1.3278, + "step": 7574 + }, + { + "epoch": 0.7990506329113924, + "grad_norm": 0.7907189726829529, + "learning_rate": 0.00014754528194842707, + "loss": 1.3036, + "step": 7575 + }, + { + "epoch": 0.7991561181434599, + "grad_norm": 0.684829592704773, + "learning_rate": 0.00014739578181716765, + "loss": 1.3153, + "step": 7576 + }, + { + "epoch": 0.7992616033755274, + "grad_norm": 0.6370785236358643, + "learning_rate": 0.00014724634921071573, + "loss": 1.3265, + "step": 7577 + }, + { + "epoch": 0.799367088607595, + "grad_norm": 0.6789904832839966, + "learning_rate": 0.0001470969841458159, + "loss": 1.3247, + "step": 7578 + }, + { + "epoch": 0.7994725738396624, + "grad_norm": 0.698068380355835, + "learning_rate": 0.00014694768663920537, + "loss": 1.3379, + "step": 7579 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.739780604839325, + "learning_rate": 0.0001467984567076137, + "loss": 1.3104, + "step": 7580 + }, + { + "epoch": 0.7996835443037975, + "grad_norm": 0.7008093595504761, + "learning_rate": 0.00014664929436776278, + "loss": 1.2838, + "step": 7581 + }, + { + "epoch": 0.799789029535865, + "grad_norm": 0.700270414352417, + "learning_rate": 0.00014650019963636696, + "loss": 1.3384, + "step": 7582 + }, + { + "epoch": 0.7998945147679325, + "grad_norm": 0.6415829062461853, + "learning_rate": 0.0001463511725301331, + "loss": 1.3241, + "step": 7583 + }, + { + "epoch": 0.8, + "grad_norm": 0.7154666781425476, + "learning_rate": 0.00014620221306576027, + "loss": 1.3069, + "step": 7584 + }, + { + "epoch": 0.8001054852320675, + "grad_norm": 0.7108946442604065, + "learning_rate": 0.00014605332125994038, + "loss": 1.3196, + "step": 7585 + }, + { + "epoch": 0.800210970464135, + "grad_norm": 0.6825931072235107, + "learning_rate": 0.0001459044971293575, + "loss": 1.2951, + "step": 7586 + }, + { + "epoch": 0.8003164556962026, + "grad_norm": 0.6856725811958313, + "learning_rate": 0.000145755740690688, + "loss": 1.2954, + "step": 7587 + }, + { + "epoch": 0.80042194092827, + "grad_norm": 0.6460059881210327, + "learning_rate": 0.00014560705196060074, + "loss": 1.3074, + "step": 7588 + }, + { + "epoch": 0.8005274261603376, + "grad_norm": 0.6543325185775757, + "learning_rate": 0.00014545843095575709, + "loss": 1.3255, + "step": 7589 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.7242670059204102, + "learning_rate": 0.00014530987769281075, + "loss": 1.2764, + "step": 7590 + }, + { + "epoch": 0.8007383966244725, + "grad_norm": 0.7024568319320679, + "learning_rate": 0.00014516139218840788, + "loss": 1.3133, + "step": 7591 + }, + { + "epoch": 0.8008438818565401, + "grad_norm": 0.7145184874534607, + "learning_rate": 0.00014501297445918703, + "loss": 1.3231, + "step": 7592 + }, + { + "epoch": 0.8009493670886076, + "grad_norm": 0.6684396862983704, + "learning_rate": 0.00014486462452177896, + "loss": 1.3501, + "step": 7593 + }, + { + "epoch": 0.8010548523206751, + "grad_norm": 0.6888110637664795, + "learning_rate": 0.0001447163423928073, + "loss": 1.3022, + "step": 7594 + }, + { + "epoch": 0.8011603375527426, + "grad_norm": 0.7140849828720093, + "learning_rate": 0.00014456812808888775, + "loss": 1.3665, + "step": 7595 + }, + { + "epoch": 0.8012658227848102, + "grad_norm": 0.6840276122093201, + "learning_rate": 0.00014441998162662847, + "loss": 1.2904, + "step": 7596 + }, + { + "epoch": 0.8013713080168776, + "grad_norm": 0.7032005786895752, + "learning_rate": 0.00014427190302262989, + "loss": 1.2598, + "step": 7597 + }, + { + "epoch": 0.8014767932489452, + "grad_norm": 0.6599680185317993, + "learning_rate": 0.00014412389229348494, + "loss": 1.2953, + "step": 7598 + }, + { + "epoch": 0.8015822784810127, + "grad_norm": 0.6543871164321899, + "learning_rate": 0.00014397594945577912, + "loss": 1.3118, + "step": 7599 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.6772938370704651, + "learning_rate": 0.00014382807452609003, + "loss": 1.295, + "step": 7600 + }, + { + "epoch": 0.8017932489451477, + "grad_norm": 0.7108297348022461, + "learning_rate": 0.00014368026752098782, + "loss": 1.3426, + "step": 7601 + }, + { + "epoch": 0.8018987341772152, + "grad_norm": 0.763025164604187, + "learning_rate": 0.00014353252845703506, + "loss": 1.299, + "step": 7602 + }, + { + "epoch": 0.8020042194092827, + "grad_norm": 0.6913374066352844, + "learning_rate": 0.00014338485735078632, + "loss": 1.3572, + "step": 7603 + }, + { + "epoch": 0.8021097046413502, + "grad_norm": 0.6780367493629456, + "learning_rate": 0.0001432372542187895, + "loss": 1.3386, + "step": 7604 + }, + { + "epoch": 0.8022151898734177, + "grad_norm": 0.685626745223999, + "learning_rate": 0.00014308971907758383, + "loss": 1.311, + "step": 7605 + }, + { + "epoch": 0.8023206751054852, + "grad_norm": 0.6816147565841675, + "learning_rate": 0.00014294225194370154, + "loss": 1.3188, + "step": 7606 + }, + { + "epoch": 0.8024261603375528, + "grad_norm": 0.663231611251831, + "learning_rate": 0.00014279485283366696, + "loss": 1.2945, + "step": 7607 + }, + { + "epoch": 0.8025316455696202, + "grad_norm": 0.6467674970626831, + "learning_rate": 0.00014264752176399687, + "loss": 1.2963, + "step": 7608 + }, + { + "epoch": 0.8026371308016877, + "grad_norm": 0.6872180104255676, + "learning_rate": 0.0001425002587512005, + "loss": 1.3363, + "step": 7609 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.6783179640769958, + "learning_rate": 0.00014235306381177952, + "loss": 1.3391, + "step": 7610 + }, + { + "epoch": 0.8028481012658227, + "grad_norm": 0.6525648236274719, + "learning_rate": 0.00014220593696222768, + "loss": 1.2746, + "step": 7611 + }, + { + "epoch": 0.8029535864978903, + "grad_norm": 0.6650452613830566, + "learning_rate": 0.00014205887821903105, + "loss": 1.3203, + "step": 7612 + }, + { + "epoch": 0.8030590717299578, + "grad_norm": 0.7083962559700012, + "learning_rate": 0.00014191188759866887, + "loss": 1.2917, + "step": 7613 + }, + { + "epoch": 0.8031645569620253, + "grad_norm": 0.6349161267280579, + "learning_rate": 0.00014176496511761192, + "loss": 1.2808, + "step": 7614 + }, + { + "epoch": 0.8032700421940928, + "grad_norm": 0.6728173494338989, + "learning_rate": 0.0001416181107923235, + "loss": 1.3164, + "step": 7615 + }, + { + "epoch": 0.8033755274261604, + "grad_norm": 0.68346107006073, + "learning_rate": 0.0001414713246392594, + "loss": 1.3122, + "step": 7616 + }, + { + "epoch": 0.8034810126582278, + "grad_norm": 0.6343701481819153, + "learning_rate": 0.0001413246066748678, + "loss": 1.2777, + "step": 7617 + }, + { + "epoch": 0.8035864978902953, + "grad_norm": 0.6990145444869995, + "learning_rate": 0.00014117795691558915, + "loss": 1.2915, + "step": 7618 + }, + { + "epoch": 0.8036919831223629, + "grad_norm": 0.7615051865577698, + "learning_rate": 0.00014103137537785633, + "loss": 1.3113, + "step": 7619 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.6472484469413757, + "learning_rate": 0.00014088486207809449, + "loss": 1.2816, + "step": 7620 + }, + { + "epoch": 0.8039029535864979, + "grad_norm": 0.692581832408905, + "learning_rate": 0.00014073841703272092, + "loss": 1.3728, + "step": 7621 + }, + { + "epoch": 0.8040084388185654, + "grad_norm": 0.7561002969741821, + "learning_rate": 0.00014059204025814603, + "loss": 1.2936, + "step": 7622 + }, + { + "epoch": 0.8041139240506329, + "grad_norm": 0.7460399270057678, + "learning_rate": 0.0001404457317707718, + "loss": 1.2969, + "step": 7623 + }, + { + "epoch": 0.8042194092827004, + "grad_norm": 0.6978618502616882, + "learning_rate": 0.00014029949158699285, + "loss": 1.3322, + "step": 7624 + }, + { + "epoch": 0.804324894514768, + "grad_norm": 0.7334622144699097, + "learning_rate": 0.00014015331972319606, + "loss": 1.3019, + "step": 7625 + }, + { + "epoch": 0.8044303797468354, + "grad_norm": 0.653109073638916, + "learning_rate": 0.00014000721619576077, + "loss": 1.2966, + "step": 7626 + }, + { + "epoch": 0.804535864978903, + "grad_norm": 0.6619391441345215, + "learning_rate": 0.0001398611810210586, + "loss": 1.3148, + "step": 7627 + }, + { + "epoch": 0.8046413502109705, + "grad_norm": 0.6698680520057678, + "learning_rate": 0.0001397152142154536, + "loss": 1.3234, + "step": 7628 + }, + { + "epoch": 0.8047468354430379, + "grad_norm": 0.8523586988449097, + "learning_rate": 0.00013956931579530194, + "loss": 1.3221, + "step": 7629 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.6905794739723206, + "learning_rate": 0.0001394234857769521, + "loss": 1.3473, + "step": 7630 + }, + { + "epoch": 0.804957805907173, + "grad_norm": 0.6558629274368286, + "learning_rate": 0.00013927772417674558, + "loss": 1.3484, + "step": 7631 + }, + { + "epoch": 0.8050632911392405, + "grad_norm": 0.6944186687469482, + "learning_rate": 0.00013913203101101532, + "loss": 1.2899, + "step": 7632 + }, + { + "epoch": 0.805168776371308, + "grad_norm": 0.6862301826477051, + "learning_rate": 0.0001389864062960871, + "loss": 1.3147, + "step": 7633 + }, + { + "epoch": 0.8052742616033756, + "grad_norm": 0.703761875629425, + "learning_rate": 0.00013884085004827883, + "loss": 1.3237, + "step": 7634 + }, + { + "epoch": 0.805379746835443, + "grad_norm": 0.6951779723167419, + "learning_rate": 0.0001386953622839008, + "loss": 1.3428, + "step": 7635 + }, + { + "epoch": 0.8054852320675105, + "grad_norm": 0.6441842317581177, + "learning_rate": 0.0001385499430192557, + "loss": 1.2897, + "step": 7636 + }, + { + "epoch": 0.8055907172995781, + "grad_norm": 0.6569348573684692, + "learning_rate": 0.00013840459227063842, + "loss": 1.2871, + "step": 7637 + }, + { + "epoch": 0.8056962025316455, + "grad_norm": 0.6623474359512329, + "learning_rate": 0.00013825931005433605, + "loss": 1.3074, + "step": 7638 + }, + { + "epoch": 0.8058016877637131, + "grad_norm": 0.6646659970283508, + "learning_rate": 0.00013811409638662858, + "loss": 1.3184, + "step": 7639 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.7410973310470581, + "learning_rate": 0.0001379689512837878, + "loss": 1.2898, + "step": 7640 + }, + { + "epoch": 0.8060126582278481, + "grad_norm": 0.6648349165916443, + "learning_rate": 0.00013782387476207788, + "loss": 1.3239, + "step": 7641 + }, + { + "epoch": 0.8061181434599156, + "grad_norm": 0.6701328158378601, + "learning_rate": 0.0001376788668377554, + "loss": 1.3584, + "step": 7642 + }, + { + "epoch": 0.8062236286919832, + "grad_norm": 0.7286360263824463, + "learning_rate": 0.0001375339275270692, + "loss": 1.2864, + "step": 7643 + }, + { + "epoch": 0.8063291139240506, + "grad_norm": 0.6586554050445557, + "learning_rate": 0.00013738905684626044, + "loss": 1.3376, + "step": 7644 + }, + { + "epoch": 0.8064345991561181, + "grad_norm": 0.6860744953155518, + "learning_rate": 0.00013724425481156263, + "loss": 1.3248, + "step": 7645 + }, + { + "epoch": 0.8065400843881857, + "grad_norm": 0.6901468634605408, + "learning_rate": 0.00013709952143920148, + "loss": 1.3602, + "step": 7646 + }, + { + "epoch": 0.8066455696202531, + "grad_norm": 0.671943724155426, + "learning_rate": 0.000136954856745395, + "loss": 1.3181, + "step": 7647 + }, + { + "epoch": 0.8067510548523207, + "grad_norm": 0.7921189665794373, + "learning_rate": 0.000136810260746354, + "loss": 1.2957, + "step": 7648 + }, + { + "epoch": 0.8068565400843882, + "grad_norm": 0.7137415409088135, + "learning_rate": 0.00013666573345828083, + "loss": 1.3098, + "step": 7649 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.6813004612922668, + "learning_rate": 0.00013652127489737067, + "loss": 1.3511, + "step": 7650 + }, + { + "epoch": 0.8070675105485232, + "grad_norm": 0.843944251537323, + "learning_rate": 0.00013637688507981064, + "loss": 1.3188, + "step": 7651 + }, + { + "epoch": 0.8071729957805908, + "grad_norm": 0.7078913450241089, + "learning_rate": 0.0001362325640217805, + "loss": 1.2399, + "step": 7652 + }, + { + "epoch": 0.8072784810126582, + "grad_norm": 0.6855044364929199, + "learning_rate": 0.00013608831173945207, + "loss": 1.3143, + "step": 7653 + }, + { + "epoch": 0.8073839662447257, + "grad_norm": 0.7517228722572327, + "learning_rate": 0.0001359441282489895, + "loss": 1.341, + "step": 7654 + }, + { + "epoch": 0.8074894514767933, + "grad_norm": 0.6715016961097717, + "learning_rate": 0.0001358000135665494, + "loss": 1.2823, + "step": 7655 + }, + { + "epoch": 0.8075949367088607, + "grad_norm": 0.7544242739677429, + "learning_rate": 0.00013565596770828025, + "loss": 1.2792, + "step": 7656 + }, + { + "epoch": 0.8077004219409283, + "grad_norm": 0.6884590983390808, + "learning_rate": 0.00013551199069032348, + "loss": 1.3379, + "step": 7657 + }, + { + "epoch": 0.8078059071729958, + "grad_norm": 0.7594544887542725, + "learning_rate": 0.0001353680825288123, + "loss": 1.3298, + "step": 7658 + }, + { + "epoch": 0.8079113924050633, + "grad_norm": 0.6949738264083862, + "learning_rate": 0.0001352242432398723, + "loss": 1.3102, + "step": 7659 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.6734158992767334, + "learning_rate": 0.00013508047283962137, + "loss": 1.2904, + "step": 7660 + }, + { + "epoch": 0.8081223628691984, + "grad_norm": 0.6493299603462219, + "learning_rate": 0.0001349367713441697, + "loss": 1.2886, + "step": 7661 + }, + { + "epoch": 0.8082278481012658, + "grad_norm": 0.7036700248718262, + "learning_rate": 0.0001347931387696198, + "loss": 1.312, + "step": 7662 + }, + { + "epoch": 0.8083333333333333, + "grad_norm": 0.7135137319564819, + "learning_rate": 0.0001346495751320664, + "loss": 1.3269, + "step": 7663 + }, + { + "epoch": 0.8084388185654009, + "grad_norm": 0.7100498676300049, + "learning_rate": 0.00013450608044759634, + "loss": 1.3031, + "step": 7664 + }, + { + "epoch": 0.8085443037974683, + "grad_norm": 0.6929436922073364, + "learning_rate": 0.00013436265473228926, + "loss": 1.3243, + "step": 7665 + }, + { + "epoch": 0.8086497890295359, + "grad_norm": 0.6778643131256104, + "learning_rate": 0.0001342192980022166, + "loss": 1.2674, + "step": 7666 + }, + { + "epoch": 0.8087552742616034, + "grad_norm": 0.6614496111869812, + "learning_rate": 0.00013407601027344213, + "loss": 1.3057, + "step": 7667 + }, + { + "epoch": 0.8088607594936709, + "grad_norm": 0.6942973732948303, + "learning_rate": 0.00013393279156202197, + "loss": 1.3475, + "step": 7668 + }, + { + "epoch": 0.8089662447257384, + "grad_norm": 0.6810179352760315, + "learning_rate": 0.00013378964188400457, + "loss": 1.3157, + "step": 7669 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.6625216007232666, + "learning_rate": 0.00013364656125543044, + "loss": 1.3088, + "step": 7670 + }, + { + "epoch": 0.8091772151898734, + "grad_norm": 0.7163141965866089, + "learning_rate": 0.0001335035496923326, + "loss": 1.3263, + "step": 7671 + }, + { + "epoch": 0.809282700421941, + "grad_norm": 0.7089129686355591, + "learning_rate": 0.00013336060721073608, + "loss": 1.3525, + "step": 7672 + }, + { + "epoch": 0.8093881856540084, + "grad_norm": 0.6749106645584106, + "learning_rate": 0.00013321773382665822, + "loss": 1.3443, + "step": 7673 + }, + { + "epoch": 0.8094936708860759, + "grad_norm": 0.6618996262550354, + "learning_rate": 0.00013307492955610896, + "loss": 1.3344, + "step": 7674 + }, + { + "epoch": 0.8095991561181435, + "grad_norm": 0.6548619270324707, + "learning_rate": 0.0001329321944150902, + "loss": 1.2832, + "step": 7675 + }, + { + "epoch": 0.8097046413502109, + "grad_norm": 0.6870028972625732, + "learning_rate": 0.000132789528419596, + "loss": 1.3193, + "step": 7676 + }, + { + "epoch": 0.8098101265822785, + "grad_norm": 0.6806946992874146, + "learning_rate": 0.0001326469315856128, + "loss": 1.299, + "step": 7677 + }, + { + "epoch": 0.809915611814346, + "grad_norm": 0.661008894443512, + "learning_rate": 0.00013250440392911927, + "loss": 1.3431, + "step": 7678 + }, + { + "epoch": 0.8100210970464135, + "grad_norm": 0.6609967947006226, + "learning_rate": 0.00013236194546608645, + "loss": 1.2949, + "step": 7679 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.7954158782958984, + "learning_rate": 0.00013221955621247749, + "loss": 1.346, + "step": 7680 + }, + { + "epoch": 0.8102320675105485, + "grad_norm": 0.6839767694473267, + "learning_rate": 0.0001320772361842478, + "loss": 1.3114, + "step": 7681 + }, + { + "epoch": 0.810337552742616, + "grad_norm": 0.8326310515403748, + "learning_rate": 0.00013193498539734478, + "loss": 1.3032, + "step": 7682 + }, + { + "epoch": 0.8104430379746835, + "grad_norm": 0.664008617401123, + "learning_rate": 0.00013179280386770885, + "loss": 1.2834, + "step": 7683 + }, + { + "epoch": 0.8105485232067511, + "grad_norm": 0.7041043043136597, + "learning_rate": 0.00013165069161127183, + "loss": 1.2957, + "step": 7684 + }, + { + "epoch": 0.8106540084388185, + "grad_norm": 0.6995480060577393, + "learning_rate": 0.00013150864864395825, + "loss": 1.3251, + "step": 7685 + }, + { + "epoch": 0.8107594936708861, + "grad_norm": 0.6848492622375488, + "learning_rate": 0.00013136667498168464, + "loss": 1.3178, + "step": 7686 + }, + { + "epoch": 0.8108649789029536, + "grad_norm": 0.6860167384147644, + "learning_rate": 0.00013122477064035992, + "loss": 1.3127, + "step": 7687 + }, + { + "epoch": 0.810970464135021, + "grad_norm": 0.6690546274185181, + "learning_rate": 0.00013108293563588504, + "loss": 1.2809, + "step": 7688 + }, + { + "epoch": 0.8110759493670886, + "grad_norm": 0.6576233506202698, + "learning_rate": 0.00013094116998415358, + "loss": 1.2991, + "step": 7689 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.6687653660774231, + "learning_rate": 0.00013079947370105057, + "loss": 1.2603, + "step": 7690 + }, + { + "epoch": 0.8112869198312236, + "grad_norm": 0.638394832611084, + "learning_rate": 0.00013065784680245442, + "loss": 1.3219, + "step": 7691 + }, + { + "epoch": 0.8113924050632911, + "grad_norm": 0.6527536511421204, + "learning_rate": 0.00013051628930423485, + "loss": 1.3252, + "step": 7692 + }, + { + "epoch": 0.8114978902953587, + "grad_norm": 0.6858869791030884, + "learning_rate": 0.00013037480122225412, + "loss": 1.3007, + "step": 7693 + }, + { + "epoch": 0.8116033755274261, + "grad_norm": 0.6817305088043213, + "learning_rate": 0.00013023338257236655, + "loss": 1.2966, + "step": 7694 + }, + { + "epoch": 0.8117088607594937, + "grad_norm": 0.6545733213424683, + "learning_rate": 0.00013009203337041898, + "loss": 1.29, + "step": 7695 + }, + { + "epoch": 0.8118143459915612, + "grad_norm": 0.6673735976219177, + "learning_rate": 0.0001299507536322502, + "loss": 1.3343, + "step": 7696 + }, + { + "epoch": 0.8119198312236287, + "grad_norm": 0.7408354878425598, + "learning_rate": 0.00012980954337369133, + "loss": 1.2918, + "step": 7697 + }, + { + "epoch": 0.8120253164556962, + "grad_norm": 0.6735548973083496, + "learning_rate": 0.00012966840261056562, + "loss": 1.3025, + "step": 7698 + }, + { + "epoch": 0.8121308016877637, + "grad_norm": 0.6808425784111023, + "learning_rate": 0.0001295273313586885, + "loss": 1.2997, + "step": 7699 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.658164918422699, + "learning_rate": 0.00012938632963386808, + "loss": 1.325, + "step": 7700 + }, + { + "epoch": 0.8123417721518987, + "grad_norm": 0.7412241697311401, + "learning_rate": 0.00012924539745190402, + "loss": 1.2902, + "step": 7701 + }, + { + "epoch": 0.8124472573839663, + "grad_norm": 0.816356897354126, + "learning_rate": 0.0001291045348285885, + "loss": 1.3205, + "step": 7702 + }, + { + "epoch": 0.8125527426160337, + "grad_norm": 0.6489275097846985, + "learning_rate": 0.00012896374177970602, + "loss": 1.2781, + "step": 7703 + }, + { + "epoch": 0.8126582278481013, + "grad_norm": 0.688605546951294, + "learning_rate": 0.00012882301832103297, + "loss": 1.2998, + "step": 7704 + }, + { + "epoch": 0.8127637130801688, + "grad_norm": 0.681311309337616, + "learning_rate": 0.0001286823644683382, + "loss": 1.2949, + "step": 7705 + }, + { + "epoch": 0.8128691983122363, + "grad_norm": 0.7164801359176636, + "learning_rate": 0.0001285417802373827, + "loss": 1.2858, + "step": 7706 + }, + { + "epoch": 0.8129746835443038, + "grad_norm": 0.7103505730628967, + "learning_rate": 0.00012840126564391961, + "loss": 1.3225, + "step": 7707 + }, + { + "epoch": 0.8130801687763713, + "grad_norm": 0.6963397860527039, + "learning_rate": 0.00012826082070369402, + "loss": 1.3041, + "step": 7708 + }, + { + "epoch": 0.8131856540084388, + "grad_norm": 0.6861756443977356, + "learning_rate": 0.00012812044543244395, + "loss": 1.3476, + "step": 7709 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.8007830381393433, + "learning_rate": 0.00012798013984589894, + "loss": 1.301, + "step": 7710 + }, + { + "epoch": 0.8133966244725739, + "grad_norm": 0.6664379835128784, + "learning_rate": 0.0001278399039597809, + "loss": 1.3409, + "step": 7711 + }, + { + "epoch": 0.8135021097046413, + "grad_norm": 0.6769894361495972, + "learning_rate": 0.00012769973778980405, + "loss": 1.3055, + "step": 7712 + }, + { + "epoch": 0.8136075949367089, + "grad_norm": 0.6897506713867188, + "learning_rate": 0.00012755964135167464, + "loss": 1.2919, + "step": 7713 + }, + { + "epoch": 0.8137130801687764, + "grad_norm": 0.7270781397819519, + "learning_rate": 0.00012741961466109113, + "loss": 1.3184, + "step": 7714 + }, + { + "epoch": 0.8138185654008439, + "grad_norm": 0.6409034132957458, + "learning_rate": 0.00012727965773374434, + "loss": 1.3021, + "step": 7715 + }, + { + "epoch": 0.8139240506329114, + "grad_norm": 0.6968061327934265, + "learning_rate": 0.00012713977058531685, + "loss": 1.3211, + "step": 7716 + }, + { + "epoch": 0.814029535864979, + "grad_norm": 0.7030057311058044, + "learning_rate": 0.0001269999532314841, + "loss": 1.3594, + "step": 7717 + }, + { + "epoch": 0.8141350210970464, + "grad_norm": 0.6715369820594788, + "learning_rate": 0.00012686020568791311, + "loss": 1.2634, + "step": 7718 + }, + { + "epoch": 0.8142405063291139, + "grad_norm": 0.6943840384483337, + "learning_rate": 0.00012672052797026344, + "loss": 1.2793, + "step": 7719 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.724038302898407, + "learning_rate": 0.00012658092009418652, + "loss": 1.3061, + "step": 7720 + }, + { + "epoch": 0.8144514767932489, + "grad_norm": 0.6719893217086792, + "learning_rate": 0.0001264413820753261, + "loss": 1.3403, + "step": 7721 + }, + { + "epoch": 0.8145569620253165, + "grad_norm": 0.7065761685371399, + "learning_rate": 0.0001263019139293182, + "loss": 1.294, + "step": 7722 + }, + { + "epoch": 0.814662447257384, + "grad_norm": 0.6658100485801697, + "learning_rate": 0.0001261625156717909, + "loss": 1.3334, + "step": 7723 + }, + { + "epoch": 0.8147679324894515, + "grad_norm": 0.7489784955978394, + "learning_rate": 0.0001260231873183644, + "loss": 1.2843, + "step": 7724 + }, + { + "epoch": 0.814873417721519, + "grad_norm": 0.6700938940048218, + "learning_rate": 0.00012588392888465103, + "loss": 1.352, + "step": 7725 + }, + { + "epoch": 0.8149789029535865, + "grad_norm": 0.6699407696723938, + "learning_rate": 0.0001257447403862557, + "loss": 1.3179, + "step": 7726 + }, + { + "epoch": 0.815084388185654, + "grad_norm": 0.6939325332641602, + "learning_rate": 0.00012560562183877507, + "loss": 1.304, + "step": 7727 + }, + { + "epoch": 0.8151898734177215, + "grad_norm": 0.7277683615684509, + "learning_rate": 0.00012546657325779805, + "loss": 1.3001, + "step": 7728 + }, + { + "epoch": 0.8152953586497891, + "grad_norm": 0.6288520097732544, + "learning_rate": 0.00012532759465890567, + "loss": 1.3072, + "step": 7729 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.6469722390174866, + "learning_rate": 0.00012518868605767118, + "loss": 1.3288, + "step": 7730 + }, + { + "epoch": 0.8155063291139241, + "grad_norm": 0.6808873414993286, + "learning_rate": 0.00012504984746966003, + "loss": 1.2924, + "step": 7731 + }, + { + "epoch": 0.8156118143459916, + "grad_norm": 0.7068135142326355, + "learning_rate": 0.0001249110789104298, + "loss": 1.3296, + "step": 7732 + }, + { + "epoch": 0.815717299578059, + "grad_norm": 0.7447822690010071, + "learning_rate": 0.00012477238039553006, + "loss": 1.3107, + "step": 7733 + }, + { + "epoch": 0.8158227848101266, + "grad_norm": 0.6732677221298218, + "learning_rate": 0.00012463375194050267, + "loss": 1.2965, + "step": 7734 + }, + { + "epoch": 0.8159282700421941, + "grad_norm": 0.668741762638092, + "learning_rate": 0.00012449519356088192, + "loss": 1.3081, + "step": 7735 + }, + { + "epoch": 0.8160337552742616, + "grad_norm": 0.6904820799827576, + "learning_rate": 0.0001243567052721937, + "loss": 1.3245, + "step": 7736 + }, + { + "epoch": 0.8161392405063291, + "grad_norm": 0.7090840935707092, + "learning_rate": 0.00012421828708995649, + "loss": 1.2846, + "step": 7737 + }, + { + "epoch": 0.8162447257383966, + "grad_norm": 0.6774603724479675, + "learning_rate": 0.00012407993902968057, + "loss": 1.3291, + "step": 7738 + }, + { + "epoch": 0.8163502109704641, + "grad_norm": 0.663112998008728, + "learning_rate": 0.00012394166110686857, + "loss": 1.3495, + "step": 7739 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.6445118188858032, + "learning_rate": 0.0001238034533370153, + "loss": 1.324, + "step": 7740 + }, + { + "epoch": 0.8165611814345991, + "grad_norm": 0.6405184268951416, + "learning_rate": 0.00012366531573560754, + "loss": 1.281, + "step": 7741 + }, + { + "epoch": 0.8166666666666667, + "grad_norm": 0.6491638422012329, + "learning_rate": 0.00012352724831812424, + "loss": 1.2734, + "step": 7742 + }, + { + "epoch": 0.8167721518987342, + "grad_norm": 0.723696231842041, + "learning_rate": 0.0001233892511000368, + "loss": 1.3452, + "step": 7743 + }, + { + "epoch": 0.8168776371308016, + "grad_norm": 0.74579918384552, + "learning_rate": 0.00012325132409680829, + "loss": 1.3282, + "step": 7744 + }, + { + "epoch": 0.8169831223628692, + "grad_norm": 0.6726058125495911, + "learning_rate": 0.00012311346732389418, + "loss": 1.2667, + "step": 7745 + }, + { + "epoch": 0.8170886075949367, + "grad_norm": 0.6290761232376099, + "learning_rate": 0.000122975680796742, + "loss": 1.2973, + "step": 7746 + }, + { + "epoch": 0.8171940928270042, + "grad_norm": 0.6492972373962402, + "learning_rate": 0.00012283796453079146, + "loss": 1.2883, + "step": 7747 + }, + { + "epoch": 0.8172995780590717, + "grad_norm": 0.8246744871139526, + "learning_rate": 0.00012270031854147426, + "loss": 1.2878, + "step": 7748 + }, + { + "epoch": 0.8174050632911393, + "grad_norm": 0.7179677486419678, + "learning_rate": 0.0001225627428442143, + "loss": 1.3473, + "step": 7749 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.6841184496879578, + "learning_rate": 0.0001224252374544278, + "loss": 1.3091, + "step": 7750 + }, + { + "epoch": 0.8176160337552743, + "grad_norm": 0.9285070300102234, + "learning_rate": 0.00012228780238752264, + "loss": 1.3145, + "step": 7751 + }, + { + "epoch": 0.8177215189873418, + "grad_norm": 0.6679925322532654, + "learning_rate": 0.00012215043765889932, + "loss": 1.2614, + "step": 7752 + }, + { + "epoch": 0.8178270042194092, + "grad_norm": 0.6876408457756042, + "learning_rate": 0.00012201314328395032, + "loss": 1.2927, + "step": 7753 + }, + { + "epoch": 0.8179324894514768, + "grad_norm": 0.6602131128311157, + "learning_rate": 0.00012187591927806, + "loss": 1.3024, + "step": 7754 + }, + { + "epoch": 0.8180379746835443, + "grad_norm": 0.84895920753479, + "learning_rate": 0.0001217387656566051, + "loss": 1.3161, + "step": 7755 + }, + { + "epoch": 0.8181434599156118, + "grad_norm": 0.6666951179504395, + "learning_rate": 0.0001216016824349542, + "loss": 1.299, + "step": 7756 + }, + { + "epoch": 0.8182489451476793, + "grad_norm": 0.6518150568008423, + "learning_rate": 0.00012146466962846833, + "loss": 1.3037, + "step": 7757 + }, + { + "epoch": 0.8183544303797469, + "grad_norm": 0.9211503863334656, + "learning_rate": 0.00012132772725250038, + "loss": 1.2754, + "step": 7758 + }, + { + "epoch": 0.8184599156118143, + "grad_norm": 0.7057801485061646, + "learning_rate": 0.0001211908553223954, + "loss": 1.3384, + "step": 7759 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.8045841455459595, + "learning_rate": 0.00012105405385349047, + "loss": 1.3321, + "step": 7760 + }, + { + "epoch": 0.8186708860759494, + "grad_norm": 0.6674516797065735, + "learning_rate": 0.00012091732286111514, + "loss": 1.2825, + "step": 7761 + }, + { + "epoch": 0.8187763713080168, + "grad_norm": 0.6631309390068054, + "learning_rate": 0.00012078066236059068, + "loss": 1.2779, + "step": 7762 + }, + { + "epoch": 0.8188818565400844, + "grad_norm": 0.707173764705658, + "learning_rate": 0.00012064407236723066, + "loss": 1.3428, + "step": 7763 + }, + { + "epoch": 0.8189873417721519, + "grad_norm": 0.7105407118797302, + "learning_rate": 0.00012050755289634049, + "loss": 1.336, + "step": 7764 + }, + { + "epoch": 0.8190928270042194, + "grad_norm": 0.9474321603775024, + "learning_rate": 0.00012037110396321796, + "loss": 1.2976, + "step": 7765 + }, + { + "epoch": 0.8191983122362869, + "grad_norm": 0.7364041209220886, + "learning_rate": 0.0001202347255831529, + "loss": 1.3388, + "step": 7766 + }, + { + "epoch": 0.8193037974683545, + "grad_norm": 0.8532997369766235, + "learning_rate": 0.0001200984177714271, + "loss": 1.3196, + "step": 7767 + }, + { + "epoch": 0.8194092827004219, + "grad_norm": 0.8285003900527954, + "learning_rate": 0.00011996218054331434, + "loss": 1.3116, + "step": 7768 + }, + { + "epoch": 0.8195147679324895, + "grad_norm": 0.6876896619796753, + "learning_rate": 0.00011982601391408115, + "loss": 1.3498, + "step": 7769 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.6529660820960999, + "learning_rate": 0.00011968991789898533, + "loss": 1.2594, + "step": 7770 + }, + { + "epoch": 0.8197257383966244, + "grad_norm": 0.7282302975654602, + "learning_rate": 0.00011955389251327737, + "loss": 1.2911, + "step": 7771 + }, + { + "epoch": 0.819831223628692, + "grad_norm": 0.697611927986145, + "learning_rate": 0.00011941793777219937, + "loss": 1.2777, + "step": 7772 + }, + { + "epoch": 0.8199367088607595, + "grad_norm": 0.7067075967788696, + "learning_rate": 0.00011928205369098574, + "loss": 1.2924, + "step": 7773 + }, + { + "epoch": 0.820042194092827, + "grad_norm": 0.6582878828048706, + "learning_rate": 0.00011914624028486315, + "loss": 1.2588, + "step": 7774 + }, + { + "epoch": 0.8201476793248945, + "grad_norm": 0.6811896562576294, + "learning_rate": 0.00011901049756905, + "loss": 1.3268, + "step": 7775 + }, + { + "epoch": 0.8202531645569621, + "grad_norm": 0.7126224637031555, + "learning_rate": 0.00011887482555875695, + "loss": 1.3319, + "step": 7776 + }, + { + "epoch": 0.8203586497890295, + "grad_norm": 0.7241086363792419, + "learning_rate": 0.00011873922426918668, + "loss": 1.3303, + "step": 7777 + }, + { + "epoch": 0.820464135021097, + "grad_norm": 0.6355699300765991, + "learning_rate": 0.0001186036937155342, + "loss": 1.3132, + "step": 7778 + }, + { + "epoch": 0.8205696202531646, + "grad_norm": 0.672258734703064, + "learning_rate": 0.00011846823391298628, + "loss": 1.2815, + "step": 7779 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.9154344797134399, + "learning_rate": 0.00011833284487672185, + "loss": 1.3339, + "step": 7780 + }, + { + "epoch": 0.8207805907172996, + "grad_norm": 0.6697807312011719, + "learning_rate": 0.00011819752662191197, + "loss": 1.3539, + "step": 7781 + }, + { + "epoch": 0.8208860759493671, + "grad_norm": 0.6493597030639648, + "learning_rate": 0.00011806227916371964, + "loss": 1.3158, + "step": 7782 + }, + { + "epoch": 0.8209915611814346, + "grad_norm": 0.6990022659301758, + "learning_rate": 0.0001179271025173001, + "loss": 1.3128, + "step": 7783 + }, + { + "epoch": 0.8210970464135021, + "grad_norm": 0.6939128041267395, + "learning_rate": 0.00011779199669780046, + "loss": 1.3343, + "step": 7784 + }, + { + "epoch": 0.8212025316455697, + "grad_norm": 0.6871060729026794, + "learning_rate": 0.00011765696172036006, + "loss": 1.2985, + "step": 7785 + }, + { + "epoch": 0.8213080168776371, + "grad_norm": 0.6813386082649231, + "learning_rate": 0.00011752199760011017, + "loss": 1.3404, + "step": 7786 + }, + { + "epoch": 0.8214135021097047, + "grad_norm": 0.6575993895530701, + "learning_rate": 0.00011738710435217431, + "loss": 1.3384, + "step": 7787 + }, + { + "epoch": 0.8215189873417722, + "grad_norm": 0.8124431371688843, + "learning_rate": 0.00011725228199166805, + "loss": 1.3096, + "step": 7788 + }, + { + "epoch": 0.8216244725738396, + "grad_norm": 0.7231502532958984, + "learning_rate": 0.00011711753053369861, + "loss": 1.2939, + "step": 7789 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.7590978145599365, + "learning_rate": 0.00011698284999336578, + "loss": 1.3035, + "step": 7790 + }, + { + "epoch": 0.8218354430379747, + "grad_norm": 0.6898192167282104, + "learning_rate": 0.00011684824038576115, + "loss": 1.3103, + "step": 7791 + }, + { + "epoch": 0.8219409282700422, + "grad_norm": 0.8122782111167908, + "learning_rate": 0.00011671370172596829, + "loss": 1.355, + "step": 7792 + }, + { + "epoch": 0.8220464135021097, + "grad_norm": 0.8006315231323242, + "learning_rate": 0.00011657923402906309, + "loss": 1.3217, + "step": 7793 + }, + { + "epoch": 0.8221518987341773, + "grad_norm": 0.6745288372039795, + "learning_rate": 0.000116444837310113, + "loss": 1.2823, + "step": 7794 + }, + { + "epoch": 0.8222573839662447, + "grad_norm": 0.7740788459777832, + "learning_rate": 0.00011631051158417828, + "loss": 1.2816, + "step": 7795 + }, + { + "epoch": 0.8223628691983123, + "grad_norm": 0.7136624455451965, + "learning_rate": 0.00011617625686631056, + "loss": 1.3143, + "step": 7796 + }, + { + "epoch": 0.8224683544303798, + "grad_norm": 0.6801926493644714, + "learning_rate": 0.00011604207317155383, + "loss": 1.3128, + "step": 7797 + }, + { + "epoch": 0.8225738396624472, + "grad_norm": 0.7329780459403992, + "learning_rate": 0.00011590796051494395, + "loss": 1.2965, + "step": 7798 + }, + { + "epoch": 0.8226793248945148, + "grad_norm": 0.7936345338821411, + "learning_rate": 0.00011577391891150901, + "loss": 1.3477, + "step": 7799 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.697975218296051, + "learning_rate": 0.00011563994837626898, + "loss": 1.282, + "step": 7800 + }, + { + "epoch": 0.8228902953586498, + "grad_norm": 1.0356578826904297, + "learning_rate": 0.00011550604892423593, + "loss": 1.3245, + "step": 7801 + }, + { + "epoch": 0.8229957805907173, + "grad_norm": 0.6786567568778992, + "learning_rate": 0.00011537222057041396, + "loss": 1.3014, + "step": 7802 + }, + { + "epoch": 0.8231012658227848, + "grad_norm": 0.8468500971794128, + "learning_rate": 0.00011523846332979907, + "loss": 1.3026, + "step": 7803 + }, + { + "epoch": 0.8232067510548523, + "grad_norm": 0.8745244741439819, + "learning_rate": 0.00011510477721737974, + "loss": 1.3139, + "step": 7804 + }, + { + "epoch": 0.8233122362869199, + "grad_norm": 0.765455424785614, + "learning_rate": 0.00011497116224813604, + "loss": 1.2786, + "step": 7805 + }, + { + "epoch": 0.8234177215189873, + "grad_norm": 0.6769886016845703, + "learning_rate": 0.0001148376184370401, + "loss": 1.3504, + "step": 7806 + }, + { + "epoch": 0.8235232067510548, + "grad_norm": 0.7268636226654053, + "learning_rate": 0.00011470414579905617, + "loss": 1.3189, + "step": 7807 + }, + { + "epoch": 0.8236286919831224, + "grad_norm": 0.7597456574440002, + "learning_rate": 0.00011457074434914067, + "loss": 1.2634, + "step": 7808 + }, + { + "epoch": 0.8237341772151898, + "grad_norm": 0.821140468120575, + "learning_rate": 0.00011443741410224173, + "loss": 1.3184, + "step": 7809 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.711650013923645, + "learning_rate": 0.00011430415507329975, + "loss": 1.2963, + "step": 7810 + }, + { + "epoch": 0.8239451476793249, + "grad_norm": 0.7475688457489014, + "learning_rate": 0.0001141709672772471, + "loss": 1.294, + "step": 7811 + }, + { + "epoch": 0.8240506329113924, + "grad_norm": 0.7138768434524536, + "learning_rate": 0.00011403785072900793, + "loss": 1.3082, + "step": 7812 + }, + { + "epoch": 0.8241561181434599, + "grad_norm": 0.7406415939331055, + "learning_rate": 0.00011390480544349891, + "loss": 1.2986, + "step": 7813 + }, + { + "epoch": 0.8242616033755275, + "grad_norm": 0.6601818203926086, + "learning_rate": 0.00011377183143562833, + "loss": 1.3526, + "step": 7814 + }, + { + "epoch": 0.8243670886075949, + "grad_norm": 0.6633701324462891, + "learning_rate": 0.00011363892872029655, + "loss": 1.3079, + "step": 7815 + }, + { + "epoch": 0.8244725738396624, + "grad_norm": 0.6462123394012451, + "learning_rate": 0.00011350609731239597, + "loss": 1.2837, + "step": 7816 + }, + { + "epoch": 0.82457805907173, + "grad_norm": 0.6967359781265259, + "learning_rate": 0.00011337333722681104, + "loss": 1.2955, + "step": 7817 + }, + { + "epoch": 0.8246835443037974, + "grad_norm": 0.7091721892356873, + "learning_rate": 0.00011324064847841817, + "loss": 1.347, + "step": 7818 + }, + { + "epoch": 0.824789029535865, + "grad_norm": 0.6832412481307983, + "learning_rate": 0.00011310803108208581, + "loss": 1.3233, + "step": 7819 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.6709529161453247, + "learning_rate": 0.00011297548505267424, + "loss": 1.3463, + "step": 7820 + }, + { + "epoch": 0.825, + "grad_norm": 0.7090879678726196, + "learning_rate": 0.00011284301040503625, + "loss": 1.3273, + "step": 7821 + }, + { + "epoch": 0.8251054852320675, + "grad_norm": 0.6947682499885559, + "learning_rate": 0.00011271060715401604, + "loss": 1.2911, + "step": 7822 + }, + { + "epoch": 0.825210970464135, + "grad_norm": 0.6882447004318237, + "learning_rate": 0.00011257827531445017, + "loss": 1.3031, + "step": 7823 + }, + { + "epoch": 0.8253164556962025, + "grad_norm": 0.7436911463737488, + "learning_rate": 0.00011244601490116693, + "loss": 1.284, + "step": 7824 + }, + { + "epoch": 0.82542194092827, + "grad_norm": 0.6468577980995178, + "learning_rate": 0.00011231382592898698, + "loss": 1.2965, + "step": 7825 + }, + { + "epoch": 0.8255274261603376, + "grad_norm": 0.6991388201713562, + "learning_rate": 0.00011218170841272254, + "loss": 1.3039, + "step": 7826 + }, + { + "epoch": 0.825632911392405, + "grad_norm": 0.693621814250946, + "learning_rate": 0.00011204966236717811, + "loss": 1.3401, + "step": 7827 + }, + { + "epoch": 0.8257383966244726, + "grad_norm": 0.7046767473220825, + "learning_rate": 0.0001119176878071502, + "loss": 1.3057, + "step": 7828 + }, + { + "epoch": 0.8258438818565401, + "grad_norm": 0.6949167847633362, + "learning_rate": 0.00011178578474742687, + "loss": 1.3241, + "step": 7829 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.7160187363624573, + "learning_rate": 0.00011165395320278898, + "loss": 1.2613, + "step": 7830 + }, + { + "epoch": 0.8260548523206751, + "grad_norm": 0.6780179142951965, + "learning_rate": 0.0001115221931880088, + "loss": 1.3069, + "step": 7831 + }, + { + "epoch": 0.8261603375527427, + "grad_norm": 0.6796903610229492, + "learning_rate": 0.00011139050471785051, + "loss": 1.2973, + "step": 7832 + }, + { + "epoch": 0.8262658227848101, + "grad_norm": 0.6672101020812988, + "learning_rate": 0.00011125888780707064, + "loss": 1.2932, + "step": 7833 + }, + { + "epoch": 0.8263713080168776, + "grad_norm": 0.7115203738212585, + "learning_rate": 0.00011112734247041739, + "loss": 1.2975, + "step": 7834 + }, + { + "epoch": 0.8264767932489452, + "grad_norm": 0.6660970449447632, + "learning_rate": 0.00011099586872263107, + "loss": 1.2713, + "step": 7835 + }, + { + "epoch": 0.8265822784810126, + "grad_norm": 0.6854487657546997, + "learning_rate": 0.00011086446657844412, + "loss": 1.2583, + "step": 7836 + }, + { + "epoch": 0.8266877637130802, + "grad_norm": 0.7173907160758972, + "learning_rate": 0.0001107331360525807, + "loss": 1.315, + "step": 7837 + }, + { + "epoch": 0.8267932489451477, + "grad_norm": 0.7396365404129028, + "learning_rate": 0.00011060187715975686, + "loss": 1.3306, + "step": 7838 + }, + { + "epoch": 0.8268987341772152, + "grad_norm": 0.7185232043266296, + "learning_rate": 0.00011047068991468118, + "loss": 1.244, + "step": 7839 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.6480611562728882, + "learning_rate": 0.00011033957433205364, + "loss": 1.2917, + "step": 7840 + }, + { + "epoch": 0.8271097046413503, + "grad_norm": 0.685246467590332, + "learning_rate": 0.00011020853042656648, + "loss": 1.3118, + "step": 7841 + }, + { + "epoch": 0.8272151898734177, + "grad_norm": 0.7291903495788574, + "learning_rate": 0.00011007755821290371, + "loss": 1.3083, + "step": 7842 + }, + { + "epoch": 0.8273206751054852, + "grad_norm": 0.6661370396614075, + "learning_rate": 0.00010994665770574162, + "loss": 1.3153, + "step": 7843 + }, + { + "epoch": 0.8274261603375528, + "grad_norm": 0.6808461546897888, + "learning_rate": 0.000109815828919748, + "loss": 1.271, + "step": 7844 + }, + { + "epoch": 0.8275316455696202, + "grad_norm": 0.6983056664466858, + "learning_rate": 0.00010968507186958302, + "loss": 1.2855, + "step": 7845 + }, + { + "epoch": 0.8276371308016878, + "grad_norm": 0.8096519112586975, + "learning_rate": 0.00010955438656989849, + "loss": 1.303, + "step": 7846 + }, + { + "epoch": 0.8277426160337553, + "grad_norm": 0.6798051595687866, + "learning_rate": 0.00010942377303533865, + "loss": 1.3109, + "step": 7847 + }, + { + "epoch": 0.8278481012658228, + "grad_norm": 0.7814558744430542, + "learning_rate": 0.00010929323128053927, + "loss": 1.3145, + "step": 7848 + }, + { + "epoch": 0.8279535864978903, + "grad_norm": 0.6743497252464294, + "learning_rate": 0.00010916276132012818, + "loss": 1.2722, + "step": 7849 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.6526651382446289, + "learning_rate": 0.00010903236316872514, + "loss": 1.291, + "step": 7850 + }, + { + "epoch": 0.8281645569620253, + "grad_norm": 0.6667588949203491, + "learning_rate": 0.000108902036840942, + "loss": 1.3146, + "step": 7851 + }, + { + "epoch": 0.8282700421940928, + "grad_norm": 0.6776042580604553, + "learning_rate": 0.00010877178235138239, + "loss": 1.272, + "step": 7852 + }, + { + "epoch": 0.8283755274261604, + "grad_norm": 0.8932902216911316, + "learning_rate": 0.00010864159971464205, + "loss": 1.3059, + "step": 7853 + }, + { + "epoch": 0.8284810126582278, + "grad_norm": 0.6239233613014221, + "learning_rate": 0.00010851148894530858, + "loss": 1.3117, + "step": 7854 + }, + { + "epoch": 0.8285864978902954, + "grad_norm": 0.6654038429260254, + "learning_rate": 0.00010838145005796138, + "loss": 1.2929, + "step": 7855 + }, + { + "epoch": 0.8286919831223629, + "grad_norm": 0.7244188785552979, + "learning_rate": 0.00010825148306717222, + "loss": 1.2779, + "step": 7856 + }, + { + "epoch": 0.8287974683544304, + "grad_norm": 0.6803661584854126, + "learning_rate": 0.00010812158798750438, + "loss": 1.2833, + "step": 7857 + }, + { + "epoch": 0.8289029535864979, + "grad_norm": 0.7777215838432312, + "learning_rate": 0.00010799176483351337, + "loss": 1.267, + "step": 7858 + }, + { + "epoch": 0.8290084388185655, + "grad_norm": 0.6388925313949585, + "learning_rate": 0.00010786201361974646, + "loss": 1.2721, + "step": 7859 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.7341331839561462, + "learning_rate": 0.00010773233436074287, + "loss": 1.2764, + "step": 7860 + }, + { + "epoch": 0.8292194092827004, + "grad_norm": 0.6995307207107544, + "learning_rate": 0.00010760272707103389, + "loss": 1.3184, + "step": 7861 + }, + { + "epoch": 0.829324894514768, + "grad_norm": 0.7123779654502869, + "learning_rate": 0.00010747319176514264, + "loss": 1.3215, + "step": 7862 + }, + { + "epoch": 0.8294303797468354, + "grad_norm": 0.6868489384651184, + "learning_rate": 0.00010734372845758411, + "loss": 1.3005, + "step": 7863 + }, + { + "epoch": 0.829535864978903, + "grad_norm": 0.648676335811615, + "learning_rate": 0.00010721433716286527, + "loss": 1.3066, + "step": 7864 + }, + { + "epoch": 0.8296413502109705, + "grad_norm": 0.727490246295929, + "learning_rate": 0.00010708501789548527, + "loss": 1.2636, + "step": 7865 + }, + { + "epoch": 0.829746835443038, + "grad_norm": 0.680036187171936, + "learning_rate": 0.00010695577066993495, + "loss": 1.3416, + "step": 7866 + }, + { + "epoch": 0.8298523206751055, + "grad_norm": 0.6612994074821472, + "learning_rate": 0.00010682659550069704, + "loss": 1.2712, + "step": 7867 + }, + { + "epoch": 0.8299578059071729, + "grad_norm": 0.7312012314796448, + "learning_rate": 0.00010669749240224621, + "loss": 1.272, + "step": 7868 + }, + { + "epoch": 0.8300632911392405, + "grad_norm": 0.6856641173362732, + "learning_rate": 0.00010656846138904916, + "loss": 1.3298, + "step": 7869 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.6959646344184875, + "learning_rate": 0.00010643950247556447, + "loss": 1.3066, + "step": 7870 + }, + { + "epoch": 0.8302742616033755, + "grad_norm": 0.7382462620735168, + "learning_rate": 0.00010631061567624259, + "loss": 1.3612, + "step": 7871 + }, + { + "epoch": 0.830379746835443, + "grad_norm": 0.6670410633087158, + "learning_rate": 0.00010618180100552596, + "loss": 1.321, + "step": 7872 + }, + { + "epoch": 0.8304852320675106, + "grad_norm": 0.6533447504043579, + "learning_rate": 0.00010605305847784871, + "loss": 1.2888, + "step": 7873 + }, + { + "epoch": 0.830590717299578, + "grad_norm": 0.7087827324867249, + "learning_rate": 0.00010592438810763747, + "loss": 1.3274, + "step": 7874 + }, + { + "epoch": 0.8306962025316456, + "grad_norm": 0.6595481634140015, + "learning_rate": 0.00010579578990931019, + "loss": 1.3073, + "step": 7875 + }, + { + "epoch": 0.8308016877637131, + "grad_norm": 0.7166001796722412, + "learning_rate": 0.00010566726389727693, + "loss": 1.3261, + "step": 7876 + }, + { + "epoch": 0.8309071729957805, + "grad_norm": 0.6598710417747498, + "learning_rate": 0.00010553881008593969, + "loss": 1.3313, + "step": 7877 + }, + { + "epoch": 0.8310126582278481, + "grad_norm": 0.7099452614784241, + "learning_rate": 0.00010541042848969235, + "loss": 1.3248, + "step": 7878 + }, + { + "epoch": 0.8311181434599156, + "grad_norm": 0.6890414357185364, + "learning_rate": 0.00010528211912292066, + "loss": 1.3302, + "step": 7879 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.7224909067153931, + "learning_rate": 0.00010515388200000245, + "loss": 1.3031, + "step": 7880 + }, + { + "epoch": 0.8313291139240506, + "grad_norm": 0.6717213988304138, + "learning_rate": 0.00010502571713530706, + "loss": 1.3107, + "step": 7881 + }, + { + "epoch": 0.8314345991561182, + "grad_norm": 0.6912724375724792, + "learning_rate": 0.00010489762454319634, + "loss": 1.2991, + "step": 7882 + }, + { + "epoch": 0.8315400843881856, + "grad_norm": 0.6893671751022339, + "learning_rate": 0.00010476960423802356, + "loss": 1.2964, + "step": 7883 + }, + { + "epoch": 0.8316455696202532, + "grad_norm": 0.7106570601463318, + "learning_rate": 0.00010464165623413408, + "loss": 1.3494, + "step": 7884 + }, + { + "epoch": 0.8317510548523207, + "grad_norm": 0.6936958432197571, + "learning_rate": 0.00010451378054586508, + "loss": 1.3463, + "step": 7885 + }, + { + "epoch": 0.8318565400843881, + "grad_norm": 0.666041910648346, + "learning_rate": 0.00010438597718754561, + "loss": 1.3385, + "step": 7886 + }, + { + "epoch": 0.8319620253164557, + "grad_norm": 0.6925376057624817, + "learning_rate": 0.00010425824617349671, + "loss": 1.335, + "step": 7887 + }, + { + "epoch": 0.8320675105485232, + "grad_norm": 0.7321807146072388, + "learning_rate": 0.00010413058751803129, + "loss": 1.2867, + "step": 7888 + }, + { + "epoch": 0.8321729957805907, + "grad_norm": 0.6563854813575745, + "learning_rate": 0.0001040030012354542, + "loss": 1.3079, + "step": 7889 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.6980109810829163, + "learning_rate": 0.00010387548734006195, + "loss": 1.3085, + "step": 7890 + }, + { + "epoch": 0.8323839662447258, + "grad_norm": 0.7312310338020325, + "learning_rate": 0.00010374804584614308, + "loss": 1.2763, + "step": 7891 + }, + { + "epoch": 0.8324894514767932, + "grad_norm": 0.6773526668548584, + "learning_rate": 0.00010362067676797837, + "loss": 1.2673, + "step": 7892 + }, + { + "epoch": 0.8325949367088608, + "grad_norm": 0.6838180422782898, + "learning_rate": 0.00010349338011983998, + "loss": 1.3058, + "step": 7893 + }, + { + "epoch": 0.8327004219409283, + "grad_norm": 0.6804721355438232, + "learning_rate": 0.00010336615591599204, + "loss": 1.272, + "step": 7894 + }, + { + "epoch": 0.8328059071729957, + "grad_norm": 0.6738136410713196, + "learning_rate": 0.00010323900417069079, + "loss": 1.3369, + "step": 7895 + }, + { + "epoch": 0.8329113924050633, + "grad_norm": 0.6877273321151733, + "learning_rate": 0.00010311192489818421, + "loss": 1.2976, + "step": 7896 + }, + { + "epoch": 0.8330168776371308, + "grad_norm": 0.6597690582275391, + "learning_rate": 0.0001029849181127121, + "loss": 1.287, + "step": 7897 + }, + { + "epoch": 0.8331223628691983, + "grad_norm": 0.7035146951675415, + "learning_rate": 0.00010285798382850614, + "loss": 1.3173, + "step": 7898 + }, + { + "epoch": 0.8332278481012658, + "grad_norm": 0.8160134553909302, + "learning_rate": 0.00010273112205979012, + "loss": 1.2967, + "step": 7899 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.6627113819122314, + "learning_rate": 0.00010260433282077944, + "loss": 1.3163, + "step": 7900 + }, + { + "epoch": 0.8334388185654008, + "grad_norm": 0.6560868620872498, + "learning_rate": 0.00010247761612568129, + "loss": 1.3066, + "step": 7901 + }, + { + "epoch": 0.8335443037974684, + "grad_norm": 0.7227388620376587, + "learning_rate": 0.00010235097198869525, + "loss": 1.3242, + "step": 7902 + }, + { + "epoch": 0.8336497890295359, + "grad_norm": 0.9347479343414307, + "learning_rate": 0.0001022244004240123, + "loss": 1.27, + "step": 7903 + }, + { + "epoch": 0.8337552742616033, + "grad_norm": 0.6839740872383118, + "learning_rate": 0.00010209790144581533, + "loss": 1.2839, + "step": 7904 + }, + { + "epoch": 0.8338607594936709, + "grad_norm": 0.6965194940567017, + "learning_rate": 0.00010197147506827925, + "loss": 1.3205, + "step": 7905 + }, + { + "epoch": 0.8339662447257384, + "grad_norm": 0.7400017976760864, + "learning_rate": 0.00010184512130557074, + "loss": 1.3295, + "step": 7906 + }, + { + "epoch": 0.8340717299578059, + "grad_norm": 0.7253174185752869, + "learning_rate": 0.0001017188401718484, + "loss": 1.3023, + "step": 7907 + }, + { + "epoch": 0.8341772151898734, + "grad_norm": 0.8500030636787415, + "learning_rate": 0.00010159263168126265, + "loss": 1.3145, + "step": 7908 + }, + { + "epoch": 0.834282700421941, + "grad_norm": 0.6869648098945618, + "learning_rate": 0.00010146649584795575, + "loss": 1.3153, + "step": 7909 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.7871703505516052, + "learning_rate": 0.00010134043268606191, + "loss": 1.3168, + "step": 7910 + }, + { + "epoch": 0.834493670886076, + "grad_norm": 0.8158009648323059, + "learning_rate": 0.0001012144422097069, + "loss": 1.3078, + "step": 7911 + }, + { + "epoch": 0.8345991561181435, + "grad_norm": 0.6593579053878784, + "learning_rate": 0.00010108852443300895, + "loss": 1.302, + "step": 7912 + }, + { + "epoch": 0.8347046413502109, + "grad_norm": 0.6670273542404175, + "learning_rate": 0.00010096267937007758, + "loss": 1.3096, + "step": 7913 + }, + { + "epoch": 0.8348101265822785, + "grad_norm": 0.6637856364250183, + "learning_rate": 0.00010083690703501445, + "loss": 1.2677, + "step": 7914 + }, + { + "epoch": 0.834915611814346, + "grad_norm": 0.6637610793113708, + "learning_rate": 0.00010071120744191284, + "loss": 1.3309, + "step": 7915 + }, + { + "epoch": 0.8350210970464135, + "grad_norm": 0.678839921951294, + "learning_rate": 0.0001005855806048581, + "loss": 1.3119, + "step": 7916 + }, + { + "epoch": 0.835126582278481, + "grad_norm": 0.6931543350219727, + "learning_rate": 0.00010046002653792726, + "loss": 1.2994, + "step": 7917 + }, + { + "epoch": 0.8352320675105486, + "grad_norm": 0.6669533848762512, + "learning_rate": 0.00010033454525518945, + "loss": 1.3077, + "step": 7918 + }, + { + "epoch": 0.835337552742616, + "grad_norm": 0.6674361824989319, + "learning_rate": 0.0001002091367707053, + "loss": 1.2964, + "step": 7919 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.8698583245277405, + "learning_rate": 0.00010008380109852752, + "loss": 1.3183, + "step": 7920 + }, + { + "epoch": 0.8355485232067511, + "grad_norm": 0.7692226767539978, + "learning_rate": 9.995853825270052e-05, + "loss": 1.3257, + "step": 7921 + }, + { + "epoch": 0.8356540084388185, + "grad_norm": 0.8382233381271362, + "learning_rate": 9.983334824726081e-05, + "loss": 1.3175, + "step": 7922 + }, + { + "epoch": 0.8357594936708861, + "grad_norm": 0.7643097043037415, + "learning_rate": 9.970823109623644e-05, + "loss": 1.339, + "step": 7923 + }, + { + "epoch": 0.8358649789029536, + "grad_norm": 0.6785779595375061, + "learning_rate": 9.958318681364745e-05, + "loss": 1.3224, + "step": 7924 + }, + { + "epoch": 0.8359704641350211, + "grad_norm": 0.6900779008865356, + "learning_rate": 9.94582154135056e-05, + "loss": 1.3043, + "step": 7925 + }, + { + "epoch": 0.8360759493670886, + "grad_norm": 0.7546674609184265, + "learning_rate": 9.933331690981473e-05, + "loss": 1.3338, + "step": 7926 + }, + { + "epoch": 0.8361814345991562, + "grad_norm": 0.6486369967460632, + "learning_rate": 9.920849131657011e-05, + "loss": 1.2801, + "step": 7927 + }, + { + "epoch": 0.8362869198312236, + "grad_norm": 0.6574782729148865, + "learning_rate": 9.908373864775915e-05, + "loss": 1.3162, + "step": 7928 + }, + { + "epoch": 0.8363924050632912, + "grad_norm": 0.6552813649177551, + "learning_rate": 9.895905891736118e-05, + "loss": 1.3205, + "step": 7929 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.6789878010749817, + "learning_rate": 9.883445213934675e-05, + "loss": 1.2911, + "step": 7930 + }, + { + "epoch": 0.8366033755274261, + "grad_norm": 0.6988880038261414, + "learning_rate": 9.870991832767919e-05, + "loss": 1.3082, + "step": 7931 + }, + { + "epoch": 0.8367088607594937, + "grad_norm": 0.6919073462486267, + "learning_rate": 9.858545749631287e-05, + "loss": 1.2998, + "step": 7932 + }, + { + "epoch": 0.8368143459915611, + "grad_norm": 0.7599198818206787, + "learning_rate": 9.846106965919427e-05, + "loss": 1.322, + "step": 7933 + }, + { + "epoch": 0.8369198312236287, + "grad_norm": 0.7612559795379639, + "learning_rate": 9.833675483026175e-05, + "loss": 1.3048, + "step": 7934 + }, + { + "epoch": 0.8370253164556962, + "grad_norm": 0.6529632210731506, + "learning_rate": 9.821251302344525e-05, + "loss": 1.2966, + "step": 7935 + }, + { + "epoch": 0.8371308016877637, + "grad_norm": 0.7141567468643188, + "learning_rate": 9.80883442526668e-05, + "loss": 1.3062, + "step": 7936 + }, + { + "epoch": 0.8372362869198312, + "grad_norm": 0.6753206849098206, + "learning_rate": 9.79642485318401e-05, + "loss": 1.2836, + "step": 7937 + }, + { + "epoch": 0.8373417721518988, + "grad_norm": 0.6750956773757935, + "learning_rate": 9.78402258748708e-05, + "loss": 1.3088, + "step": 7938 + }, + { + "epoch": 0.8374472573839662, + "grad_norm": 0.7189840078353882, + "learning_rate": 9.771627629565599e-05, + "loss": 1.316, + "step": 7939 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.6841548681259155, + "learning_rate": 9.759239980808494e-05, + "loss": 1.3244, + "step": 7940 + }, + { + "epoch": 0.8376582278481013, + "grad_norm": 0.6557881236076355, + "learning_rate": 9.746859642603884e-05, + "loss": 1.3196, + "step": 7941 + }, + { + "epoch": 0.8377637130801687, + "grad_norm": 0.8116043210029602, + "learning_rate": 9.734486616339027e-05, + "loss": 1.3239, + "step": 7942 + }, + { + "epoch": 0.8378691983122363, + "grad_norm": 0.6598987579345703, + "learning_rate": 9.722120903400392e-05, + "loss": 1.3073, + "step": 7943 + }, + { + "epoch": 0.8379746835443038, + "grad_norm": 0.6827925443649292, + "learning_rate": 9.709762505173617e-05, + "loss": 1.3129, + "step": 7944 + }, + { + "epoch": 0.8380801687763713, + "grad_norm": 0.7054020166397095, + "learning_rate": 9.697411423043521e-05, + "loss": 1.2881, + "step": 7945 + }, + { + "epoch": 0.8381856540084388, + "grad_norm": 0.6673740148544312, + "learning_rate": 9.685067658394095e-05, + "loss": 1.3123, + "step": 7946 + }, + { + "epoch": 0.8382911392405064, + "grad_norm": 0.729158341884613, + "learning_rate": 9.672731212608535e-05, + "loss": 1.3177, + "step": 7947 + }, + { + "epoch": 0.8383966244725738, + "grad_norm": 0.6557486653327942, + "learning_rate": 9.660402087069192e-05, + "loss": 1.2837, + "step": 7948 + }, + { + "epoch": 0.8385021097046413, + "grad_norm": 0.6692977547645569, + "learning_rate": 9.648080283157604e-05, + "loss": 1.314, + "step": 7949 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.6619338989257812, + "learning_rate": 9.635765802254482e-05, + "loss": 1.3187, + "step": 7950 + }, + { + "epoch": 0.8387130801687763, + "grad_norm": 0.6873814463615417, + "learning_rate": 9.623458645739755e-05, + "loss": 1.3168, + "step": 7951 + }, + { + "epoch": 0.8388185654008439, + "grad_norm": 0.7178770303726196, + "learning_rate": 9.611158814992479e-05, + "loss": 1.2627, + "step": 7952 + }, + { + "epoch": 0.8389240506329114, + "grad_norm": 0.6840099692344666, + "learning_rate": 9.598866311390919e-05, + "loss": 1.3118, + "step": 7953 + }, + { + "epoch": 0.8390295358649789, + "grad_norm": 0.6550894379615784, + "learning_rate": 9.586581136312506e-05, + "loss": 1.2654, + "step": 7954 + }, + { + "epoch": 0.8391350210970464, + "grad_norm": 0.685653567314148, + "learning_rate": 9.574303291133862e-05, + "loss": 1.2741, + "step": 7955 + }, + { + "epoch": 0.839240506329114, + "grad_norm": 0.8205893039703369, + "learning_rate": 9.562032777230772e-05, + "loss": 1.2921, + "step": 7956 + }, + { + "epoch": 0.8393459915611814, + "grad_norm": 0.6665263772010803, + "learning_rate": 9.549769595978211e-05, + "loss": 1.3049, + "step": 7957 + }, + { + "epoch": 0.8394514767932489, + "grad_norm": 0.7128443717956543, + "learning_rate": 9.537513748750337e-05, + "loss": 1.3247, + "step": 7958 + }, + { + "epoch": 0.8395569620253165, + "grad_norm": 0.6908828020095825, + "learning_rate": 9.525265236920452e-05, + "loss": 1.3165, + "step": 7959 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.6555570363998413, + "learning_rate": 9.5130240618611e-05, + "loss": 1.303, + "step": 7960 + }, + { + "epoch": 0.8397679324894515, + "grad_norm": 0.6533353328704834, + "learning_rate": 9.50079022494395e-05, + "loss": 1.2756, + "step": 7961 + }, + { + "epoch": 0.839873417721519, + "grad_norm": 0.7289899587631226, + "learning_rate": 9.488563727539864e-05, + "loss": 1.2965, + "step": 7962 + }, + { + "epoch": 0.8399789029535865, + "grad_norm": 0.7126057744026184, + "learning_rate": 9.47634457101888e-05, + "loss": 1.299, + "step": 7963 + }, + { + "epoch": 0.840084388185654, + "grad_norm": 0.6636520028114319, + "learning_rate": 9.464132756750218e-05, + "loss": 1.3139, + "step": 7964 + }, + { + "epoch": 0.8401898734177216, + "grad_norm": 0.7398289442062378, + "learning_rate": 9.451928286102277e-05, + "loss": 1.3046, + "step": 7965 + }, + { + "epoch": 0.840295358649789, + "grad_norm": 0.6606161594390869, + "learning_rate": 9.439731160442619e-05, + "loss": 1.3021, + "step": 7966 + }, + { + "epoch": 0.8404008438818565, + "grad_norm": 0.6382386684417725, + "learning_rate": 9.427541381138002e-05, + "loss": 1.3033, + "step": 7967 + }, + { + "epoch": 0.8405063291139241, + "grad_norm": 0.8152931928634644, + "learning_rate": 9.415358949554326e-05, + "loss": 1.3041, + "step": 7968 + }, + { + "epoch": 0.8406118143459915, + "grad_norm": 0.6654432415962219, + "learning_rate": 9.40318386705673e-05, + "loss": 1.2682, + "step": 7969 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.7115824222564697, + "learning_rate": 9.391016135009484e-05, + "loss": 1.2842, + "step": 7970 + }, + { + "epoch": 0.8408227848101266, + "grad_norm": 1.0643283128738403, + "learning_rate": 9.378855754776028e-05, + "loss": 1.3194, + "step": 7971 + }, + { + "epoch": 0.8409282700421941, + "grad_norm": 0.820583701133728, + "learning_rate": 9.366702727719006e-05, + "loss": 1.3255, + "step": 7972 + }, + { + "epoch": 0.8410337552742616, + "grad_norm": 0.6337803602218628, + "learning_rate": 9.354557055200214e-05, + "loss": 1.2928, + "step": 7973 + }, + { + "epoch": 0.8411392405063292, + "grad_norm": 0.7278228998184204, + "learning_rate": 9.342418738580652e-05, + "loss": 1.2725, + "step": 7974 + }, + { + "epoch": 0.8412447257383966, + "grad_norm": 0.9663093686103821, + "learning_rate": 9.330287779220459e-05, + "loss": 1.3104, + "step": 7975 + }, + { + "epoch": 0.8413502109704641, + "grad_norm": 0.7055742740631104, + "learning_rate": 9.31816417847898e-05, + "loss": 1.3091, + "step": 7976 + }, + { + "epoch": 0.8414556962025317, + "grad_norm": 0.6801798939704895, + "learning_rate": 9.306047937714713e-05, + "loss": 1.2776, + "step": 7977 + }, + { + "epoch": 0.8415611814345991, + "grad_norm": 0.6371762752532959, + "learning_rate": 9.29393905828537e-05, + "loss": 1.2907, + "step": 7978 + }, + { + "epoch": 0.8416666666666667, + "grad_norm": 0.6666377186775208, + "learning_rate": 9.281837541547791e-05, + "loss": 1.2977, + "step": 7979 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.6754435300827026, + "learning_rate": 9.269743388858019e-05, + "loss": 1.2667, + "step": 7980 + }, + { + "epoch": 0.8418776371308017, + "grad_norm": 0.7144915461540222, + "learning_rate": 9.257656601571266e-05, + "loss": 1.2941, + "step": 7981 + }, + { + "epoch": 0.8419831223628692, + "grad_norm": 0.6991250514984131, + "learning_rate": 9.245577181041901e-05, + "loss": 1.2875, + "step": 7982 + }, + { + "epoch": 0.8420886075949368, + "grad_norm": 0.6632868647575378, + "learning_rate": 9.233505128623499e-05, + "loss": 1.3365, + "step": 7983 + }, + { + "epoch": 0.8421940928270042, + "grad_norm": 0.8146706223487854, + "learning_rate": 9.221440445668794e-05, + "loss": 1.2756, + "step": 7984 + }, + { + "epoch": 0.8422995780590717, + "grad_norm": 0.6934949159622192, + "learning_rate": 9.209383133529664e-05, + "loss": 1.3352, + "step": 7985 + }, + { + "epoch": 0.8424050632911393, + "grad_norm": 0.6797367334365845, + "learning_rate": 9.197333193557237e-05, + "loss": 1.3166, + "step": 7986 + }, + { + "epoch": 0.8425105485232067, + "grad_norm": 0.7742732167243958, + "learning_rate": 9.185290627101747e-05, + "loss": 1.321, + "step": 7987 + }, + { + "epoch": 0.8426160337552743, + "grad_norm": 0.7483085989952087, + "learning_rate": 9.173255435512617e-05, + "loss": 1.3503, + "step": 7988 + }, + { + "epoch": 0.8427215189873418, + "grad_norm": 0.6908089518547058, + "learning_rate": 9.161227620138468e-05, + "loss": 1.284, + "step": 7989 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.7167856693267822, + "learning_rate": 9.149207182327054e-05, + "loss": 1.3005, + "step": 7990 + }, + { + "epoch": 0.8429324894514768, + "grad_norm": 0.6432213187217712, + "learning_rate": 9.137194123425349e-05, + "loss": 1.3037, + "step": 7991 + }, + { + "epoch": 0.8430379746835444, + "grad_norm": 0.679578423500061, + "learning_rate": 9.125188444779458e-05, + "loss": 1.2799, + "step": 7992 + }, + { + "epoch": 0.8431434599156118, + "grad_norm": 0.6810556054115295, + "learning_rate": 9.113190147734682e-05, + "loss": 1.318, + "step": 7993 + }, + { + "epoch": 0.8432489451476793, + "grad_norm": 0.8980641961097717, + "learning_rate": 9.101199233635477e-05, + "loss": 1.2924, + "step": 7994 + }, + { + "epoch": 0.8433544303797469, + "grad_norm": 0.6683472990989685, + "learning_rate": 9.089215703825519e-05, + "loss": 1.289, + "step": 7995 + }, + { + "epoch": 0.8434599156118143, + "grad_norm": 0.6797450184822083, + "learning_rate": 9.077239559647591e-05, + "loss": 1.3306, + "step": 7996 + }, + { + "epoch": 0.8435654008438819, + "grad_norm": 0.6508178114891052, + "learning_rate": 9.065270802443704e-05, + "loss": 1.3045, + "step": 7997 + }, + { + "epoch": 0.8436708860759494, + "grad_norm": 0.6475904583930969, + "learning_rate": 9.053309433554993e-05, + "loss": 1.3014, + "step": 7998 + }, + { + "epoch": 0.8437763713080169, + "grad_norm": 0.7009424567222595, + "learning_rate": 9.041355454321803e-05, + "loss": 1.3031, + "step": 7999 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.6989547610282898, + "learning_rate": 9.029408866083638e-05, + "loss": 1.3018, + "step": 8000 + }, + { + "epoch": 0.8439873417721518, + "grad_norm": 0.6926971673965454, + "learning_rate": 9.017469670179168e-05, + "loss": 1.3151, + "step": 8001 + }, + { + "epoch": 0.8440928270042194, + "grad_norm": 0.7143039703369141, + "learning_rate": 9.00553786794624e-05, + "loss": 1.3414, + "step": 8002 + }, + { + "epoch": 0.8441983122362869, + "grad_norm": 0.6415383815765381, + "learning_rate": 8.99361346072185e-05, + "loss": 1.3213, + "step": 8003 + }, + { + "epoch": 0.8443037974683544, + "grad_norm": 0.6711907386779785, + "learning_rate": 8.98169644984223e-05, + "loss": 1.2793, + "step": 8004 + }, + { + "epoch": 0.8444092827004219, + "grad_norm": 0.7627303004264832, + "learning_rate": 8.96978683664272e-05, + "loss": 1.2926, + "step": 8005 + }, + { + "epoch": 0.8445147679324895, + "grad_norm": 0.7628506422042847, + "learning_rate": 8.957884622457854e-05, + "loss": 1.316, + "step": 8006 + }, + { + "epoch": 0.8446202531645569, + "grad_norm": 0.6672184467315674, + "learning_rate": 8.945989808621321e-05, + "loss": 1.3496, + "step": 8007 + }, + { + "epoch": 0.8447257383966245, + "grad_norm": 0.725650429725647, + "learning_rate": 8.934102396466016e-05, + "loss": 1.2969, + "step": 8008 + }, + { + "epoch": 0.844831223628692, + "grad_norm": 0.6409232020378113, + "learning_rate": 8.92222238732397e-05, + "loss": 1.2819, + "step": 8009 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.6444311738014221, + "learning_rate": 8.910349782526394e-05, + "loss": 1.2931, + "step": 8010 + }, + { + "epoch": 0.845042194092827, + "grad_norm": 0.7769061326980591, + "learning_rate": 8.898484583403668e-05, + "loss": 1.3106, + "step": 8011 + }, + { + "epoch": 0.8451476793248945, + "grad_norm": 0.6682472229003906, + "learning_rate": 8.886626791285369e-05, + "loss": 1.3022, + "step": 8012 + }, + { + "epoch": 0.845253164556962, + "grad_norm": 0.648542046546936, + "learning_rate": 8.874776407500206e-05, + "loss": 1.299, + "step": 8013 + }, + { + "epoch": 0.8453586497890295, + "grad_norm": 0.6770468950271606, + "learning_rate": 8.86293343337608e-05, + "loss": 1.3079, + "step": 8014 + }, + { + "epoch": 0.8454641350210971, + "grad_norm": 0.6932340860366821, + "learning_rate": 8.851097870240051e-05, + "loss": 1.2925, + "step": 8015 + }, + { + "epoch": 0.8455696202531645, + "grad_norm": 0.7029168605804443, + "learning_rate": 8.839269719418361e-05, + "loss": 1.3, + "step": 8016 + }, + { + "epoch": 0.8456751054852321, + "grad_norm": 0.6671229600906372, + "learning_rate": 8.827448982236397e-05, + "loss": 1.2519, + "step": 8017 + }, + { + "epoch": 0.8457805907172996, + "grad_norm": 0.6668541431427002, + "learning_rate": 8.815635660018742e-05, + "loss": 1.3353, + "step": 8018 + }, + { + "epoch": 0.845886075949367, + "grad_norm": 0.6798185706138611, + "learning_rate": 8.803829754089138e-05, + "loss": 1.3254, + "step": 8019 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.7087720036506653, + "learning_rate": 8.792031265770475e-05, + "loss": 1.3107, + "step": 8020 + }, + { + "epoch": 0.8460970464135021, + "grad_norm": 0.6692889928817749, + "learning_rate": 8.780240196384873e-05, + "loss": 1.3027, + "step": 8021 + }, + { + "epoch": 0.8462025316455696, + "grad_norm": 0.6685927510261536, + "learning_rate": 8.768456547253556e-05, + "loss": 1.2836, + "step": 8022 + }, + { + "epoch": 0.8463080168776371, + "grad_norm": 0.6867372393608093, + "learning_rate": 8.756680319696945e-05, + "loss": 1.2708, + "step": 8023 + }, + { + "epoch": 0.8464135021097047, + "grad_norm": 0.6806610226631165, + "learning_rate": 8.744911515034623e-05, + "loss": 1.3334, + "step": 8024 + }, + { + "epoch": 0.8465189873417721, + "grad_norm": 0.7182865738868713, + "learning_rate": 8.733150134585338e-05, + "loss": 1.3258, + "step": 8025 + }, + { + "epoch": 0.8466244725738397, + "grad_norm": 0.6507943272590637, + "learning_rate": 8.721396179667019e-05, + "loss": 1.2888, + "step": 8026 + }, + { + "epoch": 0.8467299578059072, + "grad_norm": 0.7242476940155029, + "learning_rate": 8.709649651596752e-05, + "loss": 1.2799, + "step": 8027 + }, + { + "epoch": 0.8468354430379746, + "grad_norm": 0.7544969320297241, + "learning_rate": 8.697910551690802e-05, + "loss": 1.2811, + "step": 8028 + }, + { + "epoch": 0.8469409282700422, + "grad_norm": 0.7074359059333801, + "learning_rate": 8.686178881264568e-05, + "loss": 1.3286, + "step": 8029 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.6240119934082031, + "learning_rate": 8.67445464163267e-05, + "loss": 1.2946, + "step": 8030 + }, + { + "epoch": 0.8471518987341772, + "grad_norm": 0.6839638948440552, + "learning_rate": 8.662737834108861e-05, + "loss": 1.3223, + "step": 8031 + }, + { + "epoch": 0.8472573839662447, + "grad_norm": 0.7237693071365356, + "learning_rate": 8.651028460006072e-05, + "loss": 1.2717, + "step": 8032 + }, + { + "epoch": 0.8473628691983123, + "grad_norm": 0.7934074401855469, + "learning_rate": 8.639326520636387e-05, + "loss": 1.3245, + "step": 8033 + }, + { + "epoch": 0.8474683544303797, + "grad_norm": 0.6952659487724304, + "learning_rate": 8.627632017311065e-05, + "loss": 1.2761, + "step": 8034 + }, + { + "epoch": 0.8475738396624473, + "grad_norm": 0.6880431771278381, + "learning_rate": 8.615944951340543e-05, + "loss": 1.3037, + "step": 8035 + }, + { + "epoch": 0.8476793248945148, + "grad_norm": 0.6690424680709839, + "learning_rate": 8.604265324034405e-05, + "loss": 1.3236, + "step": 8036 + }, + { + "epoch": 0.8477848101265822, + "grad_norm": 0.6823037266731262, + "learning_rate": 8.592593136701404e-05, + "loss": 1.3083, + "step": 8037 + }, + { + "epoch": 0.8478902953586498, + "grad_norm": 0.6574535369873047, + "learning_rate": 8.580928390649496e-05, + "loss": 1.3446, + "step": 8038 + }, + { + "epoch": 0.8479957805907173, + "grad_norm": 0.6997185945510864, + "learning_rate": 8.569271087185756e-05, + "loss": 1.3383, + "step": 8039 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.6734520196914673, + "learning_rate": 8.557621227616444e-05, + "loss": 1.3125, + "step": 8040 + }, + { + "epoch": 0.8482067510548523, + "grad_norm": 0.682710587978363, + "learning_rate": 8.545978813246987e-05, + "loss": 1.2836, + "step": 8041 + }, + { + "epoch": 0.8483122362869199, + "grad_norm": 0.6629582643508911, + "learning_rate": 8.53434384538197e-05, + "loss": 1.2546, + "step": 8042 + }, + { + "epoch": 0.8484177215189873, + "grad_norm": 0.6832284927368164, + "learning_rate": 8.522716325325155e-05, + "loss": 1.3291, + "step": 8043 + }, + { + "epoch": 0.8485232067510549, + "grad_norm": 0.6471917033195496, + "learning_rate": 8.51109625437946e-05, + "loss": 1.2962, + "step": 8044 + }, + { + "epoch": 0.8486286919831224, + "grad_norm": 0.6484545469284058, + "learning_rate": 8.499483633846977e-05, + "loss": 1.2805, + "step": 8045 + }, + { + "epoch": 0.8487341772151898, + "grad_norm": 0.6782088279724121, + "learning_rate": 8.48787846502893e-05, + "loss": 1.2841, + "step": 8046 + }, + { + "epoch": 0.8488396624472574, + "grad_norm": 0.666575014591217, + "learning_rate": 8.476280749225782e-05, + "loss": 1.2684, + "step": 8047 + }, + { + "epoch": 0.8489451476793249, + "grad_norm": 0.6396165490150452, + "learning_rate": 8.464690487737098e-05, + "loss": 1.2666, + "step": 8048 + }, + { + "epoch": 0.8490506329113924, + "grad_norm": 0.6636260747909546, + "learning_rate": 8.453107681861616e-05, + "loss": 1.3476, + "step": 8049 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.658654510974884, + "learning_rate": 8.441532332897248e-05, + "loss": 1.316, + "step": 8050 + }, + { + "epoch": 0.8492616033755275, + "grad_norm": 0.6511270999908447, + "learning_rate": 8.429964442141072e-05, + "loss": 1.3131, + "step": 8051 + }, + { + "epoch": 0.8493670886075949, + "grad_norm": 0.65986168384552, + "learning_rate": 8.418404010889336e-05, + "loss": 1.2813, + "step": 8052 + }, + { + "epoch": 0.8494725738396625, + "grad_norm": 0.6700445413589478, + "learning_rate": 8.406851040437426e-05, + "loss": 1.3216, + "step": 8053 + }, + { + "epoch": 0.84957805907173, + "grad_norm": 0.652511715888977, + "learning_rate": 8.395305532079928e-05, + "loss": 1.2937, + "step": 8054 + }, + { + "epoch": 0.8496835443037974, + "grad_norm": 0.6768162846565247, + "learning_rate": 8.383767487110552e-05, + "loss": 1.3141, + "step": 8055 + }, + { + "epoch": 0.849789029535865, + "grad_norm": 0.7326894998550415, + "learning_rate": 8.372236906822217e-05, + "loss": 1.2918, + "step": 8056 + }, + { + "epoch": 0.8498945147679325, + "grad_norm": 0.6585832238197327, + "learning_rate": 8.360713792506971e-05, + "loss": 1.3163, + "step": 8057 + }, + { + "epoch": 0.85, + "grad_norm": 0.6430966258049011, + "learning_rate": 8.349198145456049e-05, + "loss": 1.3285, + "step": 8058 + }, + { + "epoch": 0.8501054852320675, + "grad_norm": 0.672265350818634, + "learning_rate": 8.337689966959819e-05, + "loss": 1.2601, + "step": 8059 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.6628679037094116, + "learning_rate": 8.326189258307832e-05, + "loss": 1.2986, + "step": 8060 + }, + { + "epoch": 0.8503164556962025, + "grad_norm": 0.7234412431716919, + "learning_rate": 8.314696020788806e-05, + "loss": 1.3053, + "step": 8061 + }, + { + "epoch": 0.8504219409282701, + "grad_norm": 0.6612638235092163, + "learning_rate": 8.303210255690622e-05, + "loss": 1.3094, + "step": 8062 + }, + { + "epoch": 0.8505274261603376, + "grad_norm": 0.7055232524871826, + "learning_rate": 8.29173196430029e-05, + "loss": 1.2683, + "step": 8063 + }, + { + "epoch": 0.850632911392405, + "grad_norm": 0.7046555280685425, + "learning_rate": 8.280261147904039e-05, + "loss": 1.3237, + "step": 8064 + }, + { + "epoch": 0.8507383966244726, + "grad_norm": 0.6941623091697693, + "learning_rate": 8.268797807787226e-05, + "loss": 1.3549, + "step": 8065 + }, + { + "epoch": 0.85084388185654, + "grad_norm": 0.6671575903892517, + "learning_rate": 8.257341945234365e-05, + "loss": 1.2927, + "step": 8066 + }, + { + "epoch": 0.8509493670886076, + "grad_norm": 0.6722782254219055, + "learning_rate": 8.245893561529153e-05, + "loss": 1.2958, + "step": 8067 + }, + { + "epoch": 0.8510548523206751, + "grad_norm": 0.6600736975669861, + "learning_rate": 8.23445265795443e-05, + "loss": 1.3163, + "step": 8068 + }, + { + "epoch": 0.8511603375527426, + "grad_norm": 0.6474952101707458, + "learning_rate": 8.223019235792214e-05, + "loss": 1.3199, + "step": 8069 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.6932263970375061, + "learning_rate": 8.211593296323672e-05, + "loss": 1.2998, + "step": 8070 + }, + { + "epoch": 0.8513713080168777, + "grad_norm": 0.7446356415748596, + "learning_rate": 8.200174840829136e-05, + "loss": 1.2803, + "step": 8071 + }, + { + "epoch": 0.8514767932489451, + "grad_norm": 0.6639788150787354, + "learning_rate": 8.188763870588092e-05, + "loss": 1.3007, + "step": 8072 + }, + { + "epoch": 0.8515822784810126, + "grad_norm": 0.668778121471405, + "learning_rate": 8.177360386879217e-05, + "loss": 1.3124, + "step": 8073 + }, + { + "epoch": 0.8516877637130802, + "grad_norm": 0.709334135055542, + "learning_rate": 8.165964390980316e-05, + "loss": 1.3175, + "step": 8074 + }, + { + "epoch": 0.8517932489451476, + "grad_norm": 0.7745427489280701, + "learning_rate": 8.15457588416838e-05, + "loss": 1.3071, + "step": 8075 + }, + { + "epoch": 0.8518987341772152, + "grad_norm": 0.7520619630813599, + "learning_rate": 8.143194867719534e-05, + "loss": 1.3581, + "step": 8076 + }, + { + "epoch": 0.8520042194092827, + "grad_norm": 0.6649918556213379, + "learning_rate": 8.131821342909071e-05, + "loss": 1.3198, + "step": 8077 + }, + { + "epoch": 0.8521097046413502, + "grad_norm": 0.6961092948913574, + "learning_rate": 8.120455311011473e-05, + "loss": 1.2716, + "step": 8078 + }, + { + "epoch": 0.8522151898734177, + "grad_norm": 0.7025794386863708, + "learning_rate": 8.109096773300348e-05, + "loss": 1.3008, + "step": 8079 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.658119797706604, + "learning_rate": 8.097745731048475e-05, + "loss": 1.3279, + "step": 8080 + }, + { + "epoch": 0.8524261603375527, + "grad_norm": 0.6448274850845337, + "learning_rate": 8.08640218552778e-05, + "loss": 1.2532, + "step": 8081 + }, + { + "epoch": 0.8525316455696202, + "grad_norm": 0.7191643714904785, + "learning_rate": 8.075066138009396e-05, + "loss": 1.2395, + "step": 8082 + }, + { + "epoch": 0.8526371308016878, + "grad_norm": 0.7126110792160034, + "learning_rate": 8.063737589763573e-05, + "loss": 1.2733, + "step": 8083 + }, + { + "epoch": 0.8527426160337552, + "grad_norm": 0.673247754573822, + "learning_rate": 8.05241654205973e-05, + "loss": 1.2971, + "step": 8084 + }, + { + "epoch": 0.8528481012658228, + "grad_norm": 0.6715707778930664, + "learning_rate": 8.041102996166442e-05, + "loss": 1.3132, + "step": 8085 + }, + { + "epoch": 0.8529535864978903, + "grad_norm": 0.6805595755577087, + "learning_rate": 8.029796953351445e-05, + "loss": 1.3022, + "step": 8086 + }, + { + "epoch": 0.8530590717299578, + "grad_norm": 0.6727222800254822, + "learning_rate": 8.018498414881645e-05, + "loss": 1.2937, + "step": 8087 + }, + { + "epoch": 0.8531645569620253, + "grad_norm": 0.7301657795906067, + "learning_rate": 8.007207382023102e-05, + "loss": 1.3188, + "step": 8088 + }, + { + "epoch": 0.8532700421940929, + "grad_norm": 0.6599039435386658, + "learning_rate": 7.995923856041013e-05, + "loss": 1.3461, + "step": 8089 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.6769800782203674, + "learning_rate": 7.984647838199773e-05, + "loss": 1.3242, + "step": 8090 + }, + { + "epoch": 0.8534810126582278, + "grad_norm": 0.68125981092453, + "learning_rate": 7.973379329762925e-05, + "loss": 1.3019, + "step": 8091 + }, + { + "epoch": 0.8535864978902954, + "grad_norm": 0.8717138767242432, + "learning_rate": 7.96211833199314e-05, + "loss": 1.305, + "step": 8092 + }, + { + "epoch": 0.8536919831223628, + "grad_norm": 0.6750802397727966, + "learning_rate": 7.950864846152284e-05, + "loss": 1.2974, + "step": 8093 + }, + { + "epoch": 0.8537974683544304, + "grad_norm": 0.6918360590934753, + "learning_rate": 7.939618873501356e-05, + "loss": 1.2839, + "step": 8094 + }, + { + "epoch": 0.8539029535864979, + "grad_norm": 0.6337310075759888, + "learning_rate": 7.928380415300523e-05, + "loss": 1.2498, + "step": 8095 + }, + { + "epoch": 0.8540084388185654, + "grad_norm": 0.7113246321678162, + "learning_rate": 7.917149472809113e-05, + "loss": 1.2744, + "step": 8096 + }, + { + "epoch": 0.8541139240506329, + "grad_norm": 0.7771851420402527, + "learning_rate": 7.905926047285616e-05, + "loss": 1.3131, + "step": 8097 + }, + { + "epoch": 0.8542194092827005, + "grad_norm": 0.7091829776763916, + "learning_rate": 7.894710139987645e-05, + "loss": 1.2802, + "step": 8098 + }, + { + "epoch": 0.8543248945147679, + "grad_norm": 0.6813285946846008, + "learning_rate": 7.883501752172038e-05, + "loss": 1.3048, + "step": 8099 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.7125821113586426, + "learning_rate": 7.872300885094736e-05, + "loss": 1.308, + "step": 8100 + }, + { + "epoch": 0.854535864978903, + "grad_norm": 0.7093461751937866, + "learning_rate": 7.861107540010845e-05, + "loss": 1.3118, + "step": 8101 + }, + { + "epoch": 0.8546413502109704, + "grad_norm": 0.7045931816101074, + "learning_rate": 7.849921718174638e-05, + "loss": 1.3466, + "step": 8102 + }, + { + "epoch": 0.854746835443038, + "grad_norm": 0.6882233619689941, + "learning_rate": 7.838743420839544e-05, + "loss": 1.3036, + "step": 8103 + }, + { + "epoch": 0.8548523206751055, + "grad_norm": 0.7116303443908691, + "learning_rate": 7.827572649258147e-05, + "loss": 1.3338, + "step": 8104 + }, + { + "epoch": 0.854957805907173, + "grad_norm": 0.6559901237487793, + "learning_rate": 7.816409404682185e-05, + "loss": 1.308, + "step": 8105 + }, + { + "epoch": 0.8550632911392405, + "grad_norm": 0.710013210773468, + "learning_rate": 7.805253688362557e-05, + "loss": 1.2985, + "step": 8106 + }, + { + "epoch": 0.8551687763713081, + "grad_norm": 0.6713419556617737, + "learning_rate": 7.794105501549306e-05, + "loss": 1.2755, + "step": 8107 + }, + { + "epoch": 0.8552742616033755, + "grad_norm": 0.8488417267799377, + "learning_rate": 7.782964845491666e-05, + "loss": 1.2722, + "step": 8108 + }, + { + "epoch": 0.855379746835443, + "grad_norm": 0.7277877330780029, + "learning_rate": 7.771831721437989e-05, + "loss": 1.3063, + "step": 8109 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.6530169248580933, + "learning_rate": 7.760706130635792e-05, + "loss": 1.3432, + "step": 8110 + }, + { + "epoch": 0.855590717299578, + "grad_norm": 0.7218785881996155, + "learning_rate": 7.749588074331762e-05, + "loss": 1.3056, + "step": 8111 + }, + { + "epoch": 0.8556962025316456, + "grad_norm": 0.7435027956962585, + "learning_rate": 7.738477553771727e-05, + "loss": 1.3286, + "step": 8112 + }, + { + "epoch": 0.8558016877637131, + "grad_norm": 0.7644481062889099, + "learning_rate": 7.727374570200685e-05, + "loss": 1.2591, + "step": 8113 + }, + { + "epoch": 0.8559071729957806, + "grad_norm": 0.6717890501022339, + "learning_rate": 7.716279124862771e-05, + "loss": 1.3267, + "step": 8114 + }, + { + "epoch": 0.8560126582278481, + "grad_norm": 0.6589681506156921, + "learning_rate": 7.705191219001267e-05, + "loss": 1.3042, + "step": 8115 + }, + { + "epoch": 0.8561181434599157, + "grad_norm": 0.653434693813324, + "learning_rate": 7.694110853858671e-05, + "loss": 1.3093, + "step": 8116 + }, + { + "epoch": 0.8562236286919831, + "grad_norm": 0.8265036940574646, + "learning_rate": 7.683038030676573e-05, + "loss": 1.3175, + "step": 8117 + }, + { + "epoch": 0.8563291139240506, + "grad_norm": 0.6672627925872803, + "learning_rate": 7.67197275069573e-05, + "loss": 1.2785, + "step": 8118 + }, + { + "epoch": 0.8564345991561182, + "grad_norm": 0.6450141072273254, + "learning_rate": 7.660915015156067e-05, + "loss": 1.2914, + "step": 8119 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.6573497653007507, + "learning_rate": 7.649864825296669e-05, + "loss": 1.27, + "step": 8120 + }, + { + "epoch": 0.8566455696202532, + "grad_norm": 0.725169837474823, + "learning_rate": 7.63882218235575e-05, + "loss": 1.2872, + "step": 8121 + }, + { + "epoch": 0.8567510548523207, + "grad_norm": 0.6681546568870544, + "learning_rate": 7.627787087570692e-05, + "loss": 1.2935, + "step": 8122 + }, + { + "epoch": 0.8568565400843882, + "grad_norm": 0.6349011063575745, + "learning_rate": 7.616759542178045e-05, + "loss": 1.3005, + "step": 8123 + }, + { + "epoch": 0.8569620253164557, + "grad_norm": 0.6965759992599487, + "learning_rate": 7.605739547413487e-05, + "loss": 1.2918, + "step": 8124 + }, + { + "epoch": 0.8570675105485233, + "grad_norm": 0.6829814314842224, + "learning_rate": 7.594727104511873e-05, + "loss": 1.3017, + "step": 8125 + }, + { + "epoch": 0.8571729957805907, + "grad_norm": 0.6775792837142944, + "learning_rate": 7.583722214707206e-05, + "loss": 1.3075, + "step": 8126 + }, + { + "epoch": 0.8572784810126582, + "grad_norm": 0.6442824006080627, + "learning_rate": 7.572724879232634e-05, + "loss": 1.2997, + "step": 8127 + }, + { + "epoch": 0.8573839662447258, + "grad_norm": 0.6519848704338074, + "learning_rate": 7.561735099320463e-05, + "loss": 1.3087, + "step": 8128 + }, + { + "epoch": 0.8574894514767932, + "grad_norm": 0.6881347298622131, + "learning_rate": 7.55075287620215e-05, + "loss": 1.344, + "step": 8129 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.6864803433418274, + "learning_rate": 7.539778211108309e-05, + "loss": 1.2774, + "step": 8130 + }, + { + "epoch": 0.8577004219409282, + "grad_norm": 0.6511780619621277, + "learning_rate": 7.528811105268699e-05, + "loss": 1.2683, + "step": 8131 + }, + { + "epoch": 0.8578059071729958, + "grad_norm": 0.6605892181396484, + "learning_rate": 7.517851559912254e-05, + "loss": 1.3026, + "step": 8132 + }, + { + "epoch": 0.8579113924050633, + "grad_norm": 0.7129273414611816, + "learning_rate": 7.506899576267023e-05, + "loss": 1.3054, + "step": 8133 + }, + { + "epoch": 0.8580168776371307, + "grad_norm": 0.6834847331047058, + "learning_rate": 7.495955155560261e-05, + "loss": 1.2866, + "step": 8134 + }, + { + "epoch": 0.8581223628691983, + "grad_norm": 0.6845118403434753, + "learning_rate": 7.485018299018326e-05, + "loss": 1.2924, + "step": 8135 + }, + { + "epoch": 0.8582278481012658, + "grad_norm": 0.6963246464729309, + "learning_rate": 7.474089007866756e-05, + "loss": 1.3194, + "step": 8136 + }, + { + "epoch": 0.8583333333333333, + "grad_norm": 0.6514201164245605, + "learning_rate": 7.463167283330227e-05, + "loss": 1.2944, + "step": 8137 + }, + { + "epoch": 0.8584388185654008, + "grad_norm": 0.6499342322349548, + "learning_rate": 7.452253126632564e-05, + "loss": 1.3104, + "step": 8138 + }, + { + "epoch": 0.8585443037974684, + "grad_norm": 0.6614143252372742, + "learning_rate": 7.441346538996769e-05, + "loss": 1.308, + "step": 8139 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.6524242758750916, + "learning_rate": 7.430447521644973e-05, + "loss": 1.2969, + "step": 8140 + }, + { + "epoch": 0.8587552742616034, + "grad_norm": 0.644797682762146, + "learning_rate": 7.41955607579845e-05, + "loss": 1.258, + "step": 8141 + }, + { + "epoch": 0.8588607594936709, + "grad_norm": 0.6524240374565125, + "learning_rate": 7.408672202677666e-05, + "loss": 1.2918, + "step": 8142 + }, + { + "epoch": 0.8589662447257383, + "grad_norm": 0.6719551682472229, + "learning_rate": 7.397795903502202e-05, + "loss": 1.3141, + "step": 8143 + }, + { + "epoch": 0.8590717299578059, + "grad_norm": 0.6708241701126099, + "learning_rate": 7.386927179490801e-05, + "loss": 1.2701, + "step": 8144 + }, + { + "epoch": 0.8591772151898734, + "grad_norm": 0.6772025227546692, + "learning_rate": 7.376066031861364e-05, + "loss": 1.2837, + "step": 8145 + }, + { + "epoch": 0.8592827004219409, + "grad_norm": 0.6557523012161255, + "learning_rate": 7.365212461830933e-05, + "loss": 1.3351, + "step": 8146 + }, + { + "epoch": 0.8593881856540084, + "grad_norm": 0.6621993184089661, + "learning_rate": 7.354366470615695e-05, + "loss": 1.2925, + "step": 8147 + }, + { + "epoch": 0.859493670886076, + "grad_norm": 0.7676102519035339, + "learning_rate": 7.343528059431009e-05, + "loss": 1.2841, + "step": 8148 + }, + { + "epoch": 0.8595991561181434, + "grad_norm": 0.6479567885398865, + "learning_rate": 7.332697229491373e-05, + "loss": 1.2631, + "step": 8149 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.7352471947669983, + "learning_rate": 7.321873982010422e-05, + "loss": 1.3264, + "step": 8150 + }, + { + "epoch": 0.8598101265822785, + "grad_norm": 0.7161554098129272, + "learning_rate": 7.311058318200969e-05, + "loss": 1.3069, + "step": 8151 + }, + { + "epoch": 0.859915611814346, + "grad_norm": 0.6545277833938599, + "learning_rate": 7.300250239274964e-05, + "loss": 1.2822, + "step": 8152 + }, + { + "epoch": 0.8600210970464135, + "grad_norm": 0.6990761756896973, + "learning_rate": 7.289449746443494e-05, + "loss": 1.3372, + "step": 8153 + }, + { + "epoch": 0.860126582278481, + "grad_norm": 0.7043113708496094, + "learning_rate": 7.278656840916825e-05, + "loss": 1.32, + "step": 8154 + }, + { + "epoch": 0.8602320675105485, + "grad_norm": 0.7143555283546448, + "learning_rate": 7.26787152390434e-05, + "loss": 1.3158, + "step": 8155 + }, + { + "epoch": 0.860337552742616, + "grad_norm": 0.7246772646903992, + "learning_rate": 7.257093796614597e-05, + "loss": 1.3253, + "step": 8156 + }, + { + "epoch": 0.8604430379746836, + "grad_norm": 0.6957210302352905, + "learning_rate": 7.246323660255289e-05, + "loss": 1.2874, + "step": 8157 + }, + { + "epoch": 0.860548523206751, + "grad_norm": 0.7181717753410339, + "learning_rate": 7.235561116033265e-05, + "loss": 1.3078, + "step": 8158 + }, + { + "epoch": 0.8606540084388186, + "grad_norm": 0.6408311128616333, + "learning_rate": 7.224806165154504e-05, + "loss": 1.2726, + "step": 8159 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.6668227910995483, + "learning_rate": 7.214058808824192e-05, + "loss": 1.2968, + "step": 8160 + }, + { + "epoch": 0.8608649789029535, + "grad_norm": 0.7260586619377136, + "learning_rate": 7.203319048246599e-05, + "loss": 1.2705, + "step": 8161 + }, + { + "epoch": 0.8609704641350211, + "grad_norm": 0.6904289126396179, + "learning_rate": 7.192586884625169e-05, + "loss": 1.3183, + "step": 8162 + }, + { + "epoch": 0.8610759493670886, + "grad_norm": 0.697938859462738, + "learning_rate": 7.1818623191625e-05, + "loss": 1.3206, + "step": 8163 + }, + { + "epoch": 0.8611814345991561, + "grad_norm": 0.6485952138900757, + "learning_rate": 7.17114535306033e-05, + "loss": 1.2658, + "step": 8164 + }, + { + "epoch": 0.8612869198312236, + "grad_norm": 0.6991468071937561, + "learning_rate": 7.16043598751954e-05, + "loss": 1.2985, + "step": 8165 + }, + { + "epoch": 0.8613924050632912, + "grad_norm": 0.7320560216903687, + "learning_rate": 7.149734223740187e-05, + "loss": 1.3397, + "step": 8166 + }, + { + "epoch": 0.8614978902953586, + "grad_norm": 0.6297632455825806, + "learning_rate": 7.139040062921428e-05, + "loss": 1.296, + "step": 8167 + }, + { + "epoch": 0.8616033755274262, + "grad_norm": 0.6346927285194397, + "learning_rate": 7.128353506261631e-05, + "loss": 1.3023, + "step": 8168 + }, + { + "epoch": 0.8617088607594937, + "grad_norm": 0.7444086670875549, + "learning_rate": 7.117674554958253e-05, + "loss": 1.2729, + "step": 8169 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.6951671838760376, + "learning_rate": 7.107003210207947e-05, + "loss": 1.269, + "step": 8170 + }, + { + "epoch": 0.8619198312236287, + "grad_norm": 0.6751917004585266, + "learning_rate": 7.096339473206471e-05, + "loss": 1.3026, + "step": 8171 + }, + { + "epoch": 0.8620253164556962, + "grad_norm": 0.6630054116249084, + "learning_rate": 7.085683345148753e-05, + "loss": 1.2925, + "step": 8172 + }, + { + "epoch": 0.8621308016877637, + "grad_norm": 0.6654002666473389, + "learning_rate": 7.075034827228862e-05, + "loss": 1.338, + "step": 8173 + }, + { + "epoch": 0.8622362869198312, + "grad_norm": 0.6550641655921936, + "learning_rate": 7.064393920640031e-05, + "loss": 1.3071, + "step": 8174 + }, + { + "epoch": 0.8623417721518988, + "grad_norm": 0.7023673057556152, + "learning_rate": 7.053760626574618e-05, + "loss": 1.261, + "step": 8175 + }, + { + "epoch": 0.8624472573839662, + "grad_norm": 0.6452482342720032, + "learning_rate": 7.043134946224123e-05, + "loss": 1.3084, + "step": 8176 + }, + { + "epoch": 0.8625527426160338, + "grad_norm": 0.7523324489593506, + "learning_rate": 7.032516880779233e-05, + "loss": 1.3228, + "step": 8177 + }, + { + "epoch": 0.8626582278481013, + "grad_norm": 0.6456693410873413, + "learning_rate": 7.021906431429747e-05, + "loss": 1.2867, + "step": 8178 + }, + { + "epoch": 0.8627637130801687, + "grad_norm": 0.6367841958999634, + "learning_rate": 7.011303599364608e-05, + "loss": 1.2787, + "step": 8179 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.794101357460022, + "learning_rate": 7.000708385771928e-05, + "loss": 1.3402, + "step": 8180 + }, + { + "epoch": 0.8629746835443038, + "grad_norm": 0.7148482799530029, + "learning_rate": 6.990120791838953e-05, + "loss": 1.2984, + "step": 8181 + }, + { + "epoch": 0.8630801687763713, + "grad_norm": 0.670676589012146, + "learning_rate": 6.979540818752064e-05, + "loss": 1.3042, + "step": 8182 + }, + { + "epoch": 0.8631856540084388, + "grad_norm": 0.703589916229248, + "learning_rate": 6.968968467696806e-05, + "loss": 1.2772, + "step": 8183 + }, + { + "epoch": 0.8632911392405064, + "grad_norm": 0.6366937756538391, + "learning_rate": 6.958403739857866e-05, + "loss": 1.3082, + "step": 8184 + }, + { + "epoch": 0.8633966244725738, + "grad_norm": 0.6600885391235352, + "learning_rate": 6.947846636419061e-05, + "loss": 1.3012, + "step": 8185 + }, + { + "epoch": 0.8635021097046414, + "grad_norm": 0.6541845202445984, + "learning_rate": 6.937297158563389e-05, + "loss": 1.3076, + "step": 8186 + }, + { + "epoch": 0.8636075949367089, + "grad_norm": 0.8358847498893738, + "learning_rate": 6.926755307472968e-05, + "loss": 1.2908, + "step": 8187 + }, + { + "epoch": 0.8637130801687763, + "grad_norm": 0.7072727680206299, + "learning_rate": 6.916221084329055e-05, + "loss": 1.2835, + "step": 8188 + }, + { + "epoch": 0.8638185654008439, + "grad_norm": 0.6594629883766174, + "learning_rate": 6.905694490312064e-05, + "loss": 1.3049, + "step": 8189 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.8218790292739868, + "learning_rate": 6.89517552660156e-05, + "loss": 1.2948, + "step": 8190 + }, + { + "epoch": 0.8640295358649789, + "grad_norm": 0.749469518661499, + "learning_rate": 6.884664194376233e-05, + "loss": 1.3059, + "step": 8191 + }, + { + "epoch": 0.8641350210970464, + "grad_norm": 0.6781343817710876, + "learning_rate": 6.874160494813942e-05, + "loss": 1.2892, + "step": 8192 + }, + { + "epoch": 0.864240506329114, + "grad_norm": 0.6458573937416077, + "learning_rate": 6.86366442909166e-05, + "loss": 1.2861, + "step": 8193 + }, + { + "epoch": 0.8643459915611814, + "grad_norm": 0.742816686630249, + "learning_rate": 6.853175998385547e-05, + "loss": 1.3242, + "step": 8194 + }, + { + "epoch": 0.864451476793249, + "grad_norm": 0.7333568334579468, + "learning_rate": 6.842695203870872e-05, + "loss": 1.2825, + "step": 8195 + }, + { + "epoch": 0.8645569620253165, + "grad_norm": 0.6784447431564331, + "learning_rate": 6.832222046722069e-05, + "loss": 1.3386, + "step": 8196 + }, + { + "epoch": 0.864662447257384, + "grad_norm": 0.6568927764892578, + "learning_rate": 6.821756528112693e-05, + "loss": 1.3101, + "step": 8197 + }, + { + "epoch": 0.8647679324894515, + "grad_norm": 0.6490128636360168, + "learning_rate": 6.811298649215472e-05, + "loss": 1.3069, + "step": 8198 + }, + { + "epoch": 0.8648734177215189, + "grad_norm": 0.6738365888595581, + "learning_rate": 6.80084841120226e-05, + "loss": 1.3309, + "step": 8199 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.6705824732780457, + "learning_rate": 6.790405815244044e-05, + "loss": 1.2862, + "step": 8200 + }, + { + "epoch": 0.865084388185654, + "grad_norm": 0.6208506226539612, + "learning_rate": 6.779970862510989e-05, + "loss": 1.3088, + "step": 8201 + }, + { + "epoch": 0.8651898734177215, + "grad_norm": 0.6571778655052185, + "learning_rate": 6.769543554172361e-05, + "loss": 1.3422, + "step": 8202 + }, + { + "epoch": 0.865295358649789, + "grad_norm": 0.6465214490890503, + "learning_rate": 6.759123891396615e-05, + "loss": 1.2917, + "step": 8203 + }, + { + "epoch": 0.8654008438818566, + "grad_norm": 0.6667494177818298, + "learning_rate": 6.748711875351318e-05, + "loss": 1.2788, + "step": 8204 + }, + { + "epoch": 0.865506329113924, + "grad_norm": 0.6428148746490479, + "learning_rate": 6.738307507203187e-05, + "loss": 1.3021, + "step": 8205 + }, + { + "epoch": 0.8656118143459915, + "grad_norm": 0.639746904373169, + "learning_rate": 6.72791078811808e-05, + "loss": 1.3, + "step": 8206 + }, + { + "epoch": 0.8657172995780591, + "grad_norm": 0.6574333906173706, + "learning_rate": 6.717521719261016e-05, + "loss": 1.3199, + "step": 8207 + }, + { + "epoch": 0.8658227848101265, + "grad_norm": 0.6498188376426697, + "learning_rate": 6.707140301796122e-05, + "loss": 1.3209, + "step": 8208 + }, + { + "epoch": 0.8659282700421941, + "grad_norm": 0.640815794467926, + "learning_rate": 6.696766536886692e-05, + "loss": 1.2687, + "step": 8209 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.6555420160293579, + "learning_rate": 6.686400425695171e-05, + "loss": 1.2837, + "step": 8210 + }, + { + "epoch": 0.8661392405063291, + "grad_norm": 0.7819722294807434, + "learning_rate": 6.676041969383107e-05, + "loss": 1.3062, + "step": 8211 + }, + { + "epoch": 0.8662447257383966, + "grad_norm": 0.7697882056236267, + "learning_rate": 6.665691169111244e-05, + "loss": 1.2852, + "step": 8212 + }, + { + "epoch": 0.8663502109704642, + "grad_norm": 0.7351301908493042, + "learning_rate": 6.655348026039437e-05, + "loss": 1.294, + "step": 8213 + }, + { + "epoch": 0.8664556962025316, + "grad_norm": 0.6388496160507202, + "learning_rate": 6.645012541326678e-05, + "loss": 1.3075, + "step": 8214 + }, + { + "epoch": 0.8665611814345991, + "grad_norm": 0.6830363869667053, + "learning_rate": 6.634684716131114e-05, + "loss": 1.3231, + "step": 8215 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 0.6815956830978394, + "learning_rate": 6.62436455161003e-05, + "loss": 1.2969, + "step": 8216 + }, + { + "epoch": 0.8667721518987341, + "grad_norm": 0.895568311214447, + "learning_rate": 6.614052048919847e-05, + "loss": 1.3234, + "step": 8217 + }, + { + "epoch": 0.8668776371308017, + "grad_norm": 0.6812606453895569, + "learning_rate": 6.603747209216135e-05, + "loss": 1.2766, + "step": 8218 + }, + { + "epoch": 0.8669831223628692, + "grad_norm": 0.6949084401130676, + "learning_rate": 6.593450033653586e-05, + "loss": 1.3293, + "step": 8219 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.7055903077125549, + "learning_rate": 6.583160523386086e-05, + "loss": 1.3394, + "step": 8220 + }, + { + "epoch": 0.8671940928270042, + "grad_norm": 0.7037987112998962, + "learning_rate": 6.572878679566605e-05, + "loss": 1.2614, + "step": 8221 + }, + { + "epoch": 0.8672995780590718, + "grad_norm": 0.8252525329589844, + "learning_rate": 6.562604503347277e-05, + "loss": 1.2881, + "step": 8222 + }, + { + "epoch": 0.8674050632911392, + "grad_norm": 0.6669589281082153, + "learning_rate": 6.552337995879368e-05, + "loss": 1.3244, + "step": 8223 + }, + { + "epoch": 0.8675105485232067, + "grad_norm": 0.6447718739509583, + "learning_rate": 6.542079158313305e-05, + "loss": 1.3079, + "step": 8224 + }, + { + "epoch": 0.8676160337552743, + "grad_norm": 0.7527243494987488, + "learning_rate": 6.531827991798628e-05, + "loss": 1.3297, + "step": 8225 + }, + { + "epoch": 0.8677215189873417, + "grad_norm": 0.7434130311012268, + "learning_rate": 6.521584497484043e-05, + "loss": 1.2971, + "step": 8226 + }, + { + "epoch": 0.8678270042194093, + "grad_norm": 0.654345691204071, + "learning_rate": 6.511348676517373e-05, + "loss": 1.2754, + "step": 8227 + }, + { + "epoch": 0.8679324894514768, + "grad_norm": 0.6729066371917725, + "learning_rate": 6.501120530045593e-05, + "loss": 1.3197, + "step": 8228 + }, + { + "epoch": 0.8680379746835443, + "grad_norm": 0.6513105630874634, + "learning_rate": 6.490900059214836e-05, + "loss": 1.2991, + "step": 8229 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.7205623388290405, + "learning_rate": 6.480687265170342e-05, + "loss": 1.3303, + "step": 8230 + }, + { + "epoch": 0.8682489451476794, + "grad_norm": 0.6911213994026184, + "learning_rate": 6.470482149056509e-05, + "loss": 1.3049, + "step": 8231 + }, + { + "epoch": 0.8683544303797468, + "grad_norm": 0.6476954817771912, + "learning_rate": 6.460284712016868e-05, + "loss": 1.2749, + "step": 8232 + }, + { + "epoch": 0.8684599156118143, + "grad_norm": 0.6549925208091736, + "learning_rate": 6.450094955194096e-05, + "loss": 1.3132, + "step": 8233 + }, + { + "epoch": 0.8685654008438819, + "grad_norm": 0.640694797039032, + "learning_rate": 6.439912879730009e-05, + "loss": 1.3151, + "step": 8234 + }, + { + "epoch": 0.8686708860759493, + "grad_norm": 0.6614891290664673, + "learning_rate": 6.429738486765548e-05, + "loss": 1.2958, + "step": 8235 + }, + { + "epoch": 0.8687763713080169, + "grad_norm": 0.6716648936271667, + "learning_rate": 6.419571777440814e-05, + "loss": 1.3159, + "step": 8236 + }, + { + "epoch": 0.8688818565400844, + "grad_norm": 0.6474946737289429, + "learning_rate": 6.409412752895041e-05, + "loss": 1.3243, + "step": 8237 + }, + { + "epoch": 0.8689873417721519, + "grad_norm": 0.7180396318435669, + "learning_rate": 6.399261414266571e-05, + "loss": 1.3173, + "step": 8238 + }, + { + "epoch": 0.8690928270042194, + "grad_norm": 0.6934117674827576, + "learning_rate": 6.389117762692952e-05, + "loss": 1.2563, + "step": 8239 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.6248839497566223, + "learning_rate": 6.37898179931081e-05, + "loss": 1.2961, + "step": 8240 + }, + { + "epoch": 0.8693037974683544, + "grad_norm": 0.6561504006385803, + "learning_rate": 6.368853525255942e-05, + "loss": 1.3242, + "step": 8241 + }, + { + "epoch": 0.869409282700422, + "grad_norm": 0.660967230796814, + "learning_rate": 6.358732941663248e-05, + "loss": 1.2972, + "step": 8242 + }, + { + "epoch": 0.8695147679324895, + "grad_norm": 0.6654126048088074, + "learning_rate": 6.348620049666815e-05, + "loss": 1.316, + "step": 8243 + }, + { + "epoch": 0.8696202531645569, + "grad_norm": 0.7141154408454895, + "learning_rate": 6.338514850399826e-05, + "loss": 1.309, + "step": 8244 + }, + { + "epoch": 0.8697257383966245, + "grad_norm": 0.6706464290618896, + "learning_rate": 6.328417344994627e-05, + "loss": 1.2934, + "step": 8245 + }, + { + "epoch": 0.869831223628692, + "grad_norm": 0.6422244310379028, + "learning_rate": 6.318327534582688e-05, + "loss": 1.3123, + "step": 8246 + }, + { + "epoch": 0.8699367088607595, + "grad_norm": 0.6807412505149841, + "learning_rate": 6.308245420294636e-05, + "loss": 1.3283, + "step": 8247 + }, + { + "epoch": 0.870042194092827, + "grad_norm": 0.715387761592865, + "learning_rate": 6.298171003260194e-05, + "loss": 1.3322, + "step": 8248 + }, + { + "epoch": 0.8701476793248946, + "grad_norm": 0.6464476585388184, + "learning_rate": 6.288104284608284e-05, + "loss": 1.3138, + "step": 8249 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.6695284843444824, + "learning_rate": 6.278045265466911e-05, + "loss": 1.3282, + "step": 8250 + }, + { + "epoch": 0.8703586497890295, + "grad_norm": 0.6957433223724365, + "learning_rate": 6.267993946963249e-05, + "loss": 1.3339, + "step": 8251 + }, + { + "epoch": 0.8704641350210971, + "grad_norm": 0.6623613238334656, + "learning_rate": 6.257950330223597e-05, + "loss": 1.2752, + "step": 8252 + }, + { + "epoch": 0.8705696202531645, + "grad_norm": 0.6772637367248535, + "learning_rate": 6.247914416373387e-05, + "loss": 1.298, + "step": 8253 + }, + { + "epoch": 0.8706751054852321, + "grad_norm": 0.7333714365959167, + "learning_rate": 6.237886206537197e-05, + "loss": 1.2873, + "step": 8254 + }, + { + "epoch": 0.8707805907172996, + "grad_norm": 0.6750921607017517, + "learning_rate": 6.227865701838733e-05, + "loss": 1.3567, + "step": 8255 + }, + { + "epoch": 0.8708860759493671, + "grad_norm": 0.7456309795379639, + "learning_rate": 6.217852903400841e-05, + "loss": 1.3155, + "step": 8256 + }, + { + "epoch": 0.8709915611814346, + "grad_norm": 0.6976007223129272, + "learning_rate": 6.207847812345524e-05, + "loss": 1.3104, + "step": 8257 + }, + { + "epoch": 0.8710970464135022, + "grad_norm": 0.7423688769340515, + "learning_rate": 6.197850429793866e-05, + "loss": 1.2884, + "step": 8258 + }, + { + "epoch": 0.8712025316455696, + "grad_norm": 0.6588340997695923, + "learning_rate": 6.187860756866157e-05, + "loss": 1.313, + "step": 8259 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.6758650541305542, + "learning_rate": 6.177878794681782e-05, + "loss": 1.2775, + "step": 8260 + }, + { + "epoch": 0.8714135021097047, + "grad_norm": 0.6423763632774353, + "learning_rate": 6.167904544359265e-05, + "loss": 1.2998, + "step": 8261 + }, + { + "epoch": 0.8715189873417721, + "grad_norm": 0.6962431073188782, + "learning_rate": 6.157938007016279e-05, + "loss": 1.294, + "step": 8262 + }, + { + "epoch": 0.8716244725738397, + "grad_norm": 0.6676281690597534, + "learning_rate": 6.147979183769602e-05, + "loss": 1.3243, + "step": 8263 + }, + { + "epoch": 0.8717299578059071, + "grad_norm": 0.6559962034225464, + "learning_rate": 6.138028075735196e-05, + "loss": 1.2737, + "step": 8264 + }, + { + "epoch": 0.8718354430379747, + "grad_norm": 0.680476725101471, + "learning_rate": 6.128084684028118e-05, + "loss": 1.2786, + "step": 8265 + }, + { + "epoch": 0.8719409282700422, + "grad_norm": 0.6937569379806519, + "learning_rate": 6.118149009762574e-05, + "loss": 1.304, + "step": 8266 + }, + { + "epoch": 0.8720464135021097, + "grad_norm": 0.6477030515670776, + "learning_rate": 6.108221054051902e-05, + "loss": 1.306, + "step": 8267 + }, + { + "epoch": 0.8721518987341772, + "grad_norm": 0.6650908589363098, + "learning_rate": 6.0983008180086005e-05, + "loss": 1.2809, + "step": 8268 + }, + { + "epoch": 0.8722573839662447, + "grad_norm": 0.6687201857566833, + "learning_rate": 6.088388302744266e-05, + "loss": 1.3225, + "step": 8269 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.6866051554679871, + "learning_rate": 6.078483509369642e-05, + "loss": 1.324, + "step": 8270 + }, + { + "epoch": 0.8724683544303797, + "grad_norm": 0.7654911875724792, + "learning_rate": 6.068586438994617e-05, + "loss": 1.3418, + "step": 8271 + }, + { + "epoch": 0.8725738396624473, + "grad_norm": 0.6405658721923828, + "learning_rate": 6.058697092728202e-05, + "loss": 1.2698, + "step": 8272 + }, + { + "epoch": 0.8726793248945147, + "grad_norm": 0.6480641961097717, + "learning_rate": 6.048815471678554e-05, + "loss": 1.2831, + "step": 8273 + }, + { + "epoch": 0.8727848101265823, + "grad_norm": 0.6765572428703308, + "learning_rate": 6.038941576952952e-05, + "loss": 1.2813, + "step": 8274 + }, + { + "epoch": 0.8728902953586498, + "grad_norm": 0.6692928075790405, + "learning_rate": 6.029075409657822e-05, + "loss": 1.2664, + "step": 8275 + }, + { + "epoch": 0.8729957805907173, + "grad_norm": 0.7379283308982849, + "learning_rate": 6.0192169708987026e-05, + "loss": 1.3034, + "step": 8276 + }, + { + "epoch": 0.8731012658227848, + "grad_norm": 0.6747475862503052, + "learning_rate": 6.009366261780286e-05, + "loss": 1.2741, + "step": 8277 + }, + { + "epoch": 0.8732067510548523, + "grad_norm": 0.6713967323303223, + "learning_rate": 5.999523283406405e-05, + "loss": 1.2574, + "step": 8278 + }, + { + "epoch": 0.8733122362869198, + "grad_norm": 0.7116917967796326, + "learning_rate": 5.9896880368800115e-05, + "loss": 1.3001, + "step": 8279 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.6352666616439819, + "learning_rate": 5.9798605233031904e-05, + "loss": 1.2637, + "step": 8280 + }, + { + "epoch": 0.8735232067510549, + "grad_norm": 0.6546698808670044, + "learning_rate": 5.970040743777161e-05, + "loss": 1.2883, + "step": 8281 + }, + { + "epoch": 0.8736286919831223, + "grad_norm": 0.7004307508468628, + "learning_rate": 5.960228699402284e-05, + "loss": 1.3094, + "step": 8282 + }, + { + "epoch": 0.8737341772151899, + "grad_norm": 0.666735053062439, + "learning_rate": 5.9504243912780474e-05, + "loss": 1.304, + "step": 8283 + }, + { + "epoch": 0.8738396624472574, + "grad_norm": 0.6435979008674622, + "learning_rate": 5.940627820503064e-05, + "loss": 1.2607, + "step": 8284 + }, + { + "epoch": 0.8739451476793249, + "grad_norm": 0.685779869556427, + "learning_rate": 5.930838988175097e-05, + "loss": 1.2582, + "step": 8285 + }, + { + "epoch": 0.8740506329113924, + "grad_norm": 0.6526361703872681, + "learning_rate": 5.921057895391027e-05, + "loss": 1.2659, + "step": 8286 + }, + { + "epoch": 0.87415611814346, + "grad_norm": 0.73401939868927, + "learning_rate": 5.91128454324687e-05, + "loss": 1.3077, + "step": 8287 + }, + { + "epoch": 0.8742616033755274, + "grad_norm": 0.6843074560165405, + "learning_rate": 5.901518932837799e-05, + "loss": 1.3004, + "step": 8288 + }, + { + "epoch": 0.8743670886075949, + "grad_norm": 0.6361926198005676, + "learning_rate": 5.891761065258089e-05, + "loss": 1.2928, + "step": 8289 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.6678342819213867, + "learning_rate": 5.8820109416011485e-05, + "loss": 1.254, + "step": 8290 + }, + { + "epoch": 0.8745780590717299, + "grad_norm": 0.6895197033882141, + "learning_rate": 5.8722685629595454e-05, + "loss": 1.306, + "step": 8291 + }, + { + "epoch": 0.8746835443037975, + "grad_norm": 0.6636000275611877, + "learning_rate": 5.862533930424949e-05, + "loss": 1.3261, + "step": 8292 + }, + { + "epoch": 0.874789029535865, + "grad_norm": 0.6967983841896057, + "learning_rate": 5.852807045088177e-05, + "loss": 1.2757, + "step": 8293 + }, + { + "epoch": 0.8748945147679325, + "grad_norm": 0.7692165374755859, + "learning_rate": 5.843087908039166e-05, + "loss": 1.3137, + "step": 8294 + }, + { + "epoch": 0.875, + "grad_norm": 0.642680287361145, + "learning_rate": 5.833376520367012e-05, + "loss": 1.3339, + "step": 8295 + }, + { + "epoch": 0.8751054852320675, + "grad_norm": 0.6870511770248413, + "learning_rate": 5.823672883159911e-05, + "loss": 1.2946, + "step": 8296 + }, + { + "epoch": 0.875210970464135, + "grad_norm": 0.6469195485115051, + "learning_rate": 5.813976997505202e-05, + "loss": 1.3229, + "step": 8297 + }, + { + "epoch": 0.8753164556962025, + "grad_norm": 0.6345282196998596, + "learning_rate": 5.804288864489366e-05, + "loss": 1.3066, + "step": 8298 + }, + { + "epoch": 0.8754219409282701, + "grad_norm": 0.6977478265762329, + "learning_rate": 5.794608485198008e-05, + "loss": 1.3278, + "step": 8299 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.6439079642295837, + "learning_rate": 5.784935860715862e-05, + "loss": 1.3097, + "step": 8300 + }, + { + "epoch": 0.8756329113924051, + "grad_norm": 0.6782199144363403, + "learning_rate": 5.7752709921267855e-05, + "loss": 1.316, + "step": 8301 + }, + { + "epoch": 0.8757383966244726, + "grad_norm": 0.7421549558639526, + "learning_rate": 5.7656138805137785e-05, + "loss": 1.2852, + "step": 8302 + }, + { + "epoch": 0.87584388185654, + "grad_norm": 0.6369873285293579, + "learning_rate": 5.7559645269589764e-05, + "loss": 1.2675, + "step": 8303 + }, + { + "epoch": 0.8759493670886076, + "grad_norm": 0.6932855248451233, + "learning_rate": 5.746322932543621e-05, + "loss": 1.2941, + "step": 8304 + }, + { + "epoch": 0.8760548523206751, + "grad_norm": 0.7172894477844238, + "learning_rate": 5.736689098348125e-05, + "loss": 1.2687, + "step": 8305 + }, + { + "epoch": 0.8761603375527426, + "grad_norm": 0.7366466522216797, + "learning_rate": 5.727063025451973e-05, + "loss": 1.3052, + "step": 8306 + }, + { + "epoch": 0.8762658227848101, + "grad_norm": 0.6369022727012634, + "learning_rate": 5.717444714933845e-05, + "loss": 1.2758, + "step": 8307 + }, + { + "epoch": 0.8763713080168777, + "grad_norm": 0.6547600030899048, + "learning_rate": 5.707834167871512e-05, + "loss": 1.3089, + "step": 8308 + }, + { + "epoch": 0.8764767932489451, + "grad_norm": 0.6720584630966187, + "learning_rate": 5.698231385341887e-05, + "loss": 1.2799, + "step": 8309 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.7194197773933411, + "learning_rate": 5.6886363684210016e-05, + "loss": 1.2964, + "step": 8310 + }, + { + "epoch": 0.8766877637130802, + "grad_norm": 0.7786123156547546, + "learning_rate": 5.6790491181840294e-05, + "loss": 1.3441, + "step": 8311 + }, + { + "epoch": 0.8767932489451477, + "grad_norm": 0.7474740147590637, + "learning_rate": 5.6694696357052685e-05, + "loss": 1.3164, + "step": 8312 + }, + { + "epoch": 0.8768987341772152, + "grad_norm": 0.6790052652359009, + "learning_rate": 5.6598979220581434e-05, + "loss": 1.3238, + "step": 8313 + }, + { + "epoch": 0.8770042194092827, + "grad_norm": 0.6604540348052979, + "learning_rate": 5.650333978315223e-05, + "loss": 1.2941, + "step": 8314 + }, + { + "epoch": 0.8771097046413502, + "grad_norm": 0.8631206750869751, + "learning_rate": 5.640777805548181e-05, + "loss": 1.3078, + "step": 8315 + }, + { + "epoch": 0.8772151898734177, + "grad_norm": 0.6813403964042664, + "learning_rate": 5.631229404827845e-05, + "loss": 1.3006, + "step": 8316 + }, + { + "epoch": 0.8773206751054853, + "grad_norm": 0.6983660459518433, + "learning_rate": 5.6216887772241596e-05, + "loss": 1.2915, + "step": 8317 + }, + { + "epoch": 0.8774261603375527, + "grad_norm": 0.6747589111328125, + "learning_rate": 5.612155923806203e-05, + "loss": 1.3063, + "step": 8318 + }, + { + "epoch": 0.8775316455696203, + "grad_norm": 0.6942027807235718, + "learning_rate": 5.60263084564217e-05, + "loss": 1.306, + "step": 8319 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.7446079850196838, + "learning_rate": 5.5931135437993994e-05, + "loss": 1.2873, + "step": 8320 + }, + { + "epoch": 0.8777426160337553, + "grad_norm": 0.6991958618164062, + "learning_rate": 5.583604019344354e-05, + "loss": 1.2746, + "step": 8321 + }, + { + "epoch": 0.8778481012658228, + "grad_norm": 0.6339028477668762, + "learning_rate": 5.574102273342616e-05, + "loss": 1.3193, + "step": 8322 + }, + { + "epoch": 0.8779535864978903, + "grad_norm": 0.6583927273750305, + "learning_rate": 5.5646083068589065e-05, + "loss": 1.3127, + "step": 8323 + }, + { + "epoch": 0.8780590717299578, + "grad_norm": 0.6820333003997803, + "learning_rate": 5.5551221209570596e-05, + "loss": 1.3226, + "step": 8324 + }, + { + "epoch": 0.8781645569620253, + "grad_norm": 0.6583959460258484, + "learning_rate": 5.5456437167000746e-05, + "loss": 1.3025, + "step": 8325 + }, + { + "epoch": 0.8782700421940929, + "grad_norm": 0.7336207032203674, + "learning_rate": 5.536173095150043e-05, + "loss": 1.3039, + "step": 8326 + }, + { + "epoch": 0.8783755274261603, + "grad_norm": 0.6823206543922424, + "learning_rate": 5.526710257368192e-05, + "loss": 1.3178, + "step": 8327 + }, + { + "epoch": 0.8784810126582279, + "grad_norm": 0.6337628960609436, + "learning_rate": 5.517255204414889e-05, + "loss": 1.2882, + "step": 8328 + }, + { + "epoch": 0.8785864978902953, + "grad_norm": 0.6215229630470276, + "learning_rate": 5.507807937349604e-05, + "loss": 1.276, + "step": 8329 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.669689416885376, + "learning_rate": 5.498368457230965e-05, + "loss": 1.3073, + "step": 8330 + }, + { + "epoch": 0.8787974683544304, + "grad_norm": 0.6911442875862122, + "learning_rate": 5.4889367651167007e-05, + "loss": 1.323, + "step": 8331 + }, + { + "epoch": 0.8789029535864978, + "grad_norm": 0.6624521613121033, + "learning_rate": 5.479512862063674e-05, + "loss": 1.3274, + "step": 8332 + }, + { + "epoch": 0.8790084388185654, + "grad_norm": 0.6283166408538818, + "learning_rate": 5.470096749127906e-05, + "loss": 1.2859, + "step": 8333 + }, + { + "epoch": 0.8791139240506329, + "grad_norm": 0.6925534605979919, + "learning_rate": 5.460688427364505e-05, + "loss": 1.2799, + "step": 8334 + }, + { + "epoch": 0.8792194092827004, + "grad_norm": 0.6920391321182251, + "learning_rate": 5.451287897827725e-05, + "loss": 1.318, + "step": 8335 + }, + { + "epoch": 0.8793248945147679, + "grad_norm": 0.6340937614440918, + "learning_rate": 5.441895161570934e-05, + "loss": 1.2808, + "step": 8336 + }, + { + "epoch": 0.8794303797468355, + "grad_norm": 0.7124657034873962, + "learning_rate": 5.43251021964663e-05, + "loss": 1.2965, + "step": 8337 + }, + { + "epoch": 0.8795358649789029, + "grad_norm": 0.646905243396759, + "learning_rate": 5.423133073106457e-05, + "loss": 1.2956, + "step": 8338 + }, + { + "epoch": 0.8796413502109705, + "grad_norm": 0.6491025686264038, + "learning_rate": 5.413763723001164e-05, + "loss": 1.2167, + "step": 8339 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.6901640892028809, + "learning_rate": 5.4044021703806375e-05, + "loss": 1.261, + "step": 8340 + }, + { + "epoch": 0.8798523206751054, + "grad_norm": 0.6549571752548218, + "learning_rate": 5.3950484162938714e-05, + "loss": 1.2874, + "step": 8341 + }, + { + "epoch": 0.879957805907173, + "grad_norm": 0.6440114974975586, + "learning_rate": 5.385702461789019e-05, + "loss": 1.2858, + "step": 8342 + }, + { + "epoch": 0.8800632911392405, + "grad_norm": 0.650628387928009, + "learning_rate": 5.376364307913334e-05, + "loss": 1.2592, + "step": 8343 + }, + { + "epoch": 0.880168776371308, + "grad_norm": 0.6580369472503662, + "learning_rate": 5.3670339557132045e-05, + "loss": 1.2714, + "step": 8344 + }, + { + "epoch": 0.8802742616033755, + "grad_norm": 0.6664779782295227, + "learning_rate": 5.3577114062341446e-05, + "loss": 1.3082, + "step": 8345 + }, + { + "epoch": 0.8803797468354431, + "grad_norm": 0.7088260650634766, + "learning_rate": 5.348396660520785e-05, + "loss": 1.2921, + "step": 8346 + }, + { + "epoch": 0.8804852320675105, + "grad_norm": 0.6921047568321228, + "learning_rate": 5.339089719616891e-05, + "loss": 1.3365, + "step": 8347 + }, + { + "epoch": 0.880590717299578, + "grad_norm": 0.6783115267753601, + "learning_rate": 5.329790584565361e-05, + "loss": 1.2937, + "step": 8348 + }, + { + "epoch": 0.8806962025316456, + "grad_norm": 0.656164824962616, + "learning_rate": 5.320499256408204e-05, + "loss": 1.2739, + "step": 8349 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.6834188103675842, + "learning_rate": 5.311215736186536e-05, + "loss": 1.319, + "step": 8350 + }, + { + "epoch": 0.8809071729957806, + "grad_norm": 0.6278703808784485, + "learning_rate": 5.3019400249406686e-05, + "loss": 1.3129, + "step": 8351 + }, + { + "epoch": 0.8810126582278481, + "grad_norm": 0.6444242596626282, + "learning_rate": 5.29267212370996e-05, + "loss": 1.2662, + "step": 8352 + }, + { + "epoch": 0.8811181434599156, + "grad_norm": 0.6540027856826782, + "learning_rate": 5.283412033532939e-05, + "loss": 1.2466, + "step": 8353 + }, + { + "epoch": 0.8812236286919831, + "grad_norm": 0.689304769039154, + "learning_rate": 5.274159755447233e-05, + "loss": 1.2797, + "step": 8354 + }, + { + "epoch": 0.8813291139240507, + "grad_norm": 0.7279322743415833, + "learning_rate": 5.264915290489614e-05, + "loss": 1.2802, + "step": 8355 + }, + { + "epoch": 0.8814345991561181, + "grad_norm": 0.6419795155525208, + "learning_rate": 5.25567863969596e-05, + "loss": 1.2977, + "step": 8356 + }, + { + "epoch": 0.8815400843881857, + "grad_norm": 0.6538902521133423, + "learning_rate": 5.246449804101294e-05, + "loss": 1.3297, + "step": 8357 + }, + { + "epoch": 0.8816455696202532, + "grad_norm": 0.6645005345344543, + "learning_rate": 5.237228784739739e-05, + "loss": 1.2947, + "step": 8358 + }, + { + "epoch": 0.8817510548523206, + "grad_norm": 0.6880297064781189, + "learning_rate": 5.228015582644585e-05, + "loss": 1.3392, + "step": 8359 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.6490352749824524, + "learning_rate": 5.21881019884819e-05, + "loss": 1.2796, + "step": 8360 + }, + { + "epoch": 0.8819620253164557, + "grad_norm": 0.6858810782432556, + "learning_rate": 5.209612634382077e-05, + "loss": 1.3051, + "step": 8361 + }, + { + "epoch": 0.8820675105485232, + "grad_norm": 0.6777538657188416, + "learning_rate": 5.2004228902768815e-05, + "loss": 1.3433, + "step": 8362 + }, + { + "epoch": 0.8821729957805907, + "grad_norm": 0.6574166417121887, + "learning_rate": 5.191240967562347e-05, + "loss": 1.301, + "step": 8363 + }, + { + "epoch": 0.8822784810126583, + "grad_norm": 0.621586799621582, + "learning_rate": 5.182066867267357e-05, + "loss": 1.3128, + "step": 8364 + }, + { + "epoch": 0.8823839662447257, + "grad_norm": 0.6887949705123901, + "learning_rate": 5.172900590419915e-05, + "loss": 1.3235, + "step": 8365 + }, + { + "epoch": 0.8824894514767933, + "grad_norm": 0.685856282711029, + "learning_rate": 5.1637421380471586e-05, + "loss": 1.2807, + "step": 8366 + }, + { + "epoch": 0.8825949367088608, + "grad_norm": 0.6991561055183411, + "learning_rate": 5.154591511175316e-05, + "loss": 1.2892, + "step": 8367 + }, + { + "epoch": 0.8827004219409282, + "grad_norm": 0.6686751246452332, + "learning_rate": 5.1454487108297924e-05, + "loss": 1.3017, + "step": 8368 + }, + { + "epoch": 0.8828059071729958, + "grad_norm": 0.6445844769477844, + "learning_rate": 5.136313738035059e-05, + "loss": 1.2809, + "step": 8369 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.6447262763977051, + "learning_rate": 5.127186593814748e-05, + "loss": 1.2559, + "step": 8370 + }, + { + "epoch": 0.8830168776371308, + "grad_norm": 0.6673168540000916, + "learning_rate": 5.118067279191599e-05, + "loss": 1.2821, + "step": 8371 + }, + { + "epoch": 0.8831223628691983, + "grad_norm": 0.6568210124969482, + "learning_rate": 5.1089557951874696e-05, + "loss": 1.3224, + "step": 8372 + }, + { + "epoch": 0.8832278481012659, + "grad_norm": 0.6387734413146973, + "learning_rate": 5.0998521428233526e-05, + "loss": 1.2865, + "step": 8373 + }, + { + "epoch": 0.8833333333333333, + "grad_norm": 0.6355252861976624, + "learning_rate": 5.0907563231193556e-05, + "loss": 1.2841, + "step": 8374 + }, + { + "epoch": 0.8834388185654009, + "grad_norm": 0.7058017253875732, + "learning_rate": 5.081668337094713e-05, + "loss": 1.326, + "step": 8375 + }, + { + "epoch": 0.8835443037974684, + "grad_norm": 0.7099622488021851, + "learning_rate": 5.072588185767763e-05, + "loss": 1.3429, + "step": 8376 + }, + { + "epoch": 0.8836497890295358, + "grad_norm": 0.6625787615776062, + "learning_rate": 5.063515870156013e-05, + "loss": 1.2988, + "step": 8377 + }, + { + "epoch": 0.8837552742616034, + "grad_norm": 0.6522236466407776, + "learning_rate": 5.054451391276035e-05, + "loss": 1.2831, + "step": 8378 + }, + { + "epoch": 0.8838607594936709, + "grad_norm": 0.656394898891449, + "learning_rate": 5.045394750143567e-05, + "loss": 1.2889, + "step": 8379 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.6519041061401367, + "learning_rate": 5.0363459477734464e-05, + "loss": 1.3352, + "step": 8380 + }, + { + "epoch": 0.8840717299578059, + "grad_norm": 0.6539250016212463, + "learning_rate": 5.0273049851796205e-05, + "loss": 1.2869, + "step": 8381 + }, + { + "epoch": 0.8841772151898735, + "grad_norm": 0.6319943070411682, + "learning_rate": 5.0182718633751954e-05, + "loss": 1.319, + "step": 8382 + }, + { + "epoch": 0.8842827004219409, + "grad_norm": 0.757417619228363, + "learning_rate": 5.009246583372362e-05, + "loss": 1.2828, + "step": 8383 + }, + { + "epoch": 0.8843881856540085, + "grad_norm": 0.7050114274024963, + "learning_rate": 5.000229146182453e-05, + "loss": 1.2929, + "step": 8384 + }, + { + "epoch": 0.884493670886076, + "grad_norm": 0.6514630317687988, + "learning_rate": 4.9912195528159174e-05, + "loss": 1.319, + "step": 8385 + }, + { + "epoch": 0.8845991561181434, + "grad_norm": 0.6785515546798706, + "learning_rate": 4.982217804282332e-05, + "loss": 1.2969, + "step": 8386 + }, + { + "epoch": 0.884704641350211, + "grad_norm": 0.6863059997558594, + "learning_rate": 4.973223901590382e-05, + "loss": 1.2923, + "step": 8387 + }, + { + "epoch": 0.8848101265822785, + "grad_norm": 0.6524113416671753, + "learning_rate": 4.9642378457478847e-05, + "loss": 1.2639, + "step": 8388 + }, + { + "epoch": 0.884915611814346, + "grad_norm": 0.6451035737991333, + "learning_rate": 4.955259637761761e-05, + "loss": 1.2789, + "step": 8389 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.6399926543235779, + "learning_rate": 4.946289278638064e-05, + "loss": 1.3068, + "step": 8390 + }, + { + "epoch": 0.8851265822784811, + "grad_norm": 0.7188538312911987, + "learning_rate": 4.9373267693819805e-05, + "loss": 1.3146, + "step": 8391 + }, + { + "epoch": 0.8852320675105485, + "grad_norm": 0.6961350440979004, + "learning_rate": 4.928372110997792e-05, + "loss": 1.2728, + "step": 8392 + }, + { + "epoch": 0.885337552742616, + "grad_norm": 0.7096091508865356, + "learning_rate": 4.9194253044889117e-05, + "loss": 1.2948, + "step": 8393 + }, + { + "epoch": 0.8854430379746835, + "grad_norm": 0.687950074672699, + "learning_rate": 4.910486350857887e-05, + "loss": 1.2899, + "step": 8394 + }, + { + "epoch": 0.885548523206751, + "grad_norm": 0.6953470706939697, + "learning_rate": 4.90155525110636e-05, + "loss": 1.2827, + "step": 8395 + }, + { + "epoch": 0.8856540084388186, + "grad_norm": 0.6572344303131104, + "learning_rate": 4.89263200623512e-05, + "loss": 1.3011, + "step": 8396 + }, + { + "epoch": 0.885759493670886, + "grad_norm": 0.640799343585968, + "learning_rate": 4.883716617244044e-05, + "loss": 1.3033, + "step": 8397 + }, + { + "epoch": 0.8858649789029536, + "grad_norm": 0.6860338449478149, + "learning_rate": 4.874809085132148e-05, + "loss": 1.2802, + "step": 8398 + }, + { + "epoch": 0.8859704641350211, + "grad_norm": 0.7047885656356812, + "learning_rate": 4.865909410897576e-05, + "loss": 1.3109, + "step": 8399 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.6564874053001404, + "learning_rate": 4.8570175955375715e-05, + "loss": 1.2934, + "step": 8400 + }, + { + "epoch": 0.8861814345991561, + "grad_norm": 0.673367977142334, + "learning_rate": 4.848133640048513e-05, + "loss": 1.3071, + "step": 8401 + }, + { + "epoch": 0.8862869198312237, + "grad_norm": 0.6990137100219727, + "learning_rate": 4.839257545425879e-05, + "loss": 1.2979, + "step": 8402 + }, + { + "epoch": 0.8863924050632911, + "grad_norm": 0.6551023125648499, + "learning_rate": 4.830389312664299e-05, + "loss": 1.3194, + "step": 8403 + }, + { + "epoch": 0.8864978902953586, + "grad_norm": 0.6541658639907837, + "learning_rate": 4.821528942757494e-05, + "loss": 1.3165, + "step": 8404 + }, + { + "epoch": 0.8866033755274262, + "grad_norm": 0.8181291222572327, + "learning_rate": 4.8126764366983126e-05, + "loss": 1.3102, + "step": 8405 + }, + { + "epoch": 0.8867088607594936, + "grad_norm": 0.7482951879501343, + "learning_rate": 4.803831795478719e-05, + "loss": 1.3262, + "step": 8406 + }, + { + "epoch": 0.8868143459915612, + "grad_norm": 0.6664313077926636, + "learning_rate": 4.794995020089804e-05, + "loss": 1.3331, + "step": 8407 + }, + { + "epoch": 0.8869198312236287, + "grad_norm": 0.6648594737052917, + "learning_rate": 4.7861661115217754e-05, + "loss": 1.3018, + "step": 8408 + }, + { + "epoch": 0.8870253164556962, + "grad_norm": 0.70369952917099, + "learning_rate": 4.7773450707639414e-05, + "loss": 1.3336, + "step": 8409 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.6996369361877441, + "learning_rate": 4.768531898804754e-05, + "loss": 1.2786, + "step": 8410 + }, + { + "epoch": 0.8872362869198313, + "grad_norm": 0.6751964688301086, + "learning_rate": 4.75972659663178e-05, + "loss": 1.3259, + "step": 8411 + }, + { + "epoch": 0.8873417721518987, + "grad_norm": 0.6775730848312378, + "learning_rate": 4.75092916523169e-05, + "loss": 1.2855, + "step": 8412 + }, + { + "epoch": 0.8874472573839662, + "grad_norm": 0.6546880006790161, + "learning_rate": 4.742139605590279e-05, + "loss": 1.3181, + "step": 8413 + }, + { + "epoch": 0.8875527426160338, + "grad_norm": 0.6618191003799438, + "learning_rate": 4.733357918692466e-05, + "loss": 1.3108, + "step": 8414 + }, + { + "epoch": 0.8876582278481012, + "grad_norm": 0.6604642868041992, + "learning_rate": 4.7245841055222726e-05, + "loss": 1.3109, + "step": 8415 + }, + { + "epoch": 0.8877637130801688, + "grad_norm": 0.6475123167037964, + "learning_rate": 4.715818167062863e-05, + "loss": 1.2694, + "step": 8416 + }, + { + "epoch": 0.8878691983122363, + "grad_norm": 0.6478164792060852, + "learning_rate": 4.7070601042964925e-05, + "loss": 1.2907, + "step": 8417 + }, + { + "epoch": 0.8879746835443038, + "grad_norm": 0.6438318490982056, + "learning_rate": 4.698309918204552e-05, + "loss": 1.3016, + "step": 8418 + }, + { + "epoch": 0.8880801687763713, + "grad_norm": 0.7245948910713196, + "learning_rate": 4.6895676097675225e-05, + "loss": 1.2865, + "step": 8419 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.6411357522010803, + "learning_rate": 4.680833179965063e-05, + "loss": 1.3092, + "step": 8420 + }, + { + "epoch": 0.8882911392405063, + "grad_norm": 0.6403063535690308, + "learning_rate": 4.672106629775882e-05, + "loss": 1.3095, + "step": 8421 + }, + { + "epoch": 0.8883966244725738, + "grad_norm": 0.6454327702522278, + "learning_rate": 4.663387960177848e-05, + "loss": 1.308, + "step": 8422 + }, + { + "epoch": 0.8885021097046414, + "grad_norm": 0.6586141586303711, + "learning_rate": 4.654677172147912e-05, + "loss": 1.3227, + "step": 8423 + }, + { + "epoch": 0.8886075949367088, + "grad_norm": 0.6620680689811707, + "learning_rate": 4.645974266662176e-05, + "loss": 1.2667, + "step": 8424 + }, + { + "epoch": 0.8887130801687764, + "grad_norm": 0.6238624453544617, + "learning_rate": 4.637279244695844e-05, + "loss": 1.2777, + "step": 8425 + }, + { + "epoch": 0.8888185654008439, + "grad_norm": 0.7120482325553894, + "learning_rate": 4.628592107223229e-05, + "loss": 1.2723, + "step": 8426 + }, + { + "epoch": 0.8889240506329114, + "grad_norm": 0.6473460793495178, + "learning_rate": 4.6199128552177756e-05, + "loss": 1.3392, + "step": 8427 + }, + { + "epoch": 0.8890295358649789, + "grad_norm": 0.6656672954559326, + "learning_rate": 4.611241489652016e-05, + "loss": 1.2912, + "step": 8428 + }, + { + "epoch": 0.8891350210970465, + "grad_norm": 0.6583484411239624, + "learning_rate": 4.6025780114976545e-05, + "loss": 1.3153, + "step": 8429 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.6852595210075378, + "learning_rate": 4.5939224217254574e-05, + "loss": 1.2888, + "step": 8430 + }, + { + "epoch": 0.8893459915611814, + "grad_norm": 0.6687048077583313, + "learning_rate": 4.585274721305333e-05, + "loss": 1.2699, + "step": 8431 + }, + { + "epoch": 0.889451476793249, + "grad_norm": 0.6577207446098328, + "learning_rate": 4.576634911206296e-05, + "loss": 1.2778, + "step": 8432 + }, + { + "epoch": 0.8895569620253164, + "grad_norm": 0.6756909489631653, + "learning_rate": 4.5680029923964724e-05, + "loss": 1.2654, + "step": 8433 + }, + { + "epoch": 0.889662447257384, + "grad_norm": 0.6409329771995544, + "learning_rate": 4.559378965843122e-05, + "loss": 1.2713, + "step": 8434 + }, + { + "epoch": 0.8897679324894515, + "grad_norm": 0.6471333503723145, + "learning_rate": 4.5507628325126144e-05, + "loss": 1.313, + "step": 8435 + }, + { + "epoch": 0.889873417721519, + "grad_norm": 0.6615880727767944, + "learning_rate": 4.542154593370401e-05, + "loss": 1.3132, + "step": 8436 + }, + { + "epoch": 0.8899789029535865, + "grad_norm": 0.7176505923271179, + "learning_rate": 4.533554249381119e-05, + "loss": 1.2752, + "step": 8437 + }, + { + "epoch": 0.890084388185654, + "grad_norm": 0.6485661864280701, + "learning_rate": 4.524961801508456e-05, + "loss": 1.3167, + "step": 8438 + }, + { + "epoch": 0.8901898734177215, + "grad_norm": 0.654228925704956, + "learning_rate": 4.5163772507152425e-05, + "loss": 1.3014, + "step": 8439 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.6535524129867554, + "learning_rate": 4.507800597963424e-05, + "loss": 1.3059, + "step": 8440 + }, + { + "epoch": 0.8904008438818566, + "grad_norm": 0.6437348127365112, + "learning_rate": 4.4992318442140575e-05, + "loss": 1.2864, + "step": 8441 + }, + { + "epoch": 0.890506329113924, + "grad_norm": 0.6673814058303833, + "learning_rate": 4.490670990427309e-05, + "loss": 1.2839, + "step": 8442 + }, + { + "epoch": 0.8906118143459916, + "grad_norm": 0.6528987288475037, + "learning_rate": 4.4821180375624684e-05, + "loss": 1.3026, + "step": 8443 + }, + { + "epoch": 0.8907172995780591, + "grad_norm": 0.6500974893569946, + "learning_rate": 4.473572986577928e-05, + "loss": 1.2848, + "step": 8444 + }, + { + "epoch": 0.8908227848101266, + "grad_norm": 0.6505913734436035, + "learning_rate": 4.4650358384312056e-05, + "loss": 1.2734, + "step": 8445 + }, + { + "epoch": 0.8909282700421941, + "grad_norm": 0.7074286937713623, + "learning_rate": 4.4565065940789515e-05, + "loss": 1.2958, + "step": 8446 + }, + { + "epoch": 0.8910337552742617, + "grad_norm": 0.7638880014419556, + "learning_rate": 4.447985254476894e-05, + "loss": 1.3311, + "step": 8447 + }, + { + "epoch": 0.8911392405063291, + "grad_norm": 0.6542088985443115, + "learning_rate": 4.439471820579885e-05, + "loss": 1.32, + "step": 8448 + }, + { + "epoch": 0.8912447257383966, + "grad_norm": 0.7355243563652039, + "learning_rate": 4.430966293341912e-05, + "loss": 1.3205, + "step": 8449 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.7908052802085876, + "learning_rate": 4.422468673716054e-05, + "loss": 1.2862, + "step": 8450 + }, + { + "epoch": 0.8914556962025316, + "grad_norm": 0.6578970551490784, + "learning_rate": 4.413978962654508e-05, + "loss": 1.2903, + "step": 8451 + }, + { + "epoch": 0.8915611814345992, + "grad_norm": 0.6493092775344849, + "learning_rate": 4.405497161108596e-05, + "loss": 1.3033, + "step": 8452 + }, + { + "epoch": 0.8916666666666667, + "grad_norm": 0.6917029619216919, + "learning_rate": 4.397023270028749e-05, + "loss": 1.2932, + "step": 8453 + }, + { + "epoch": 0.8917721518987342, + "grad_norm": 0.6444376111030579, + "learning_rate": 4.388557290364484e-05, + "loss": 1.3173, + "step": 8454 + }, + { + "epoch": 0.8918776371308017, + "grad_norm": 0.738902747631073, + "learning_rate": 4.3800992230644904e-05, + "loss": 1.2541, + "step": 8455 + }, + { + "epoch": 0.8919831223628693, + "grad_norm": 0.6723805665969849, + "learning_rate": 4.3716490690765194e-05, + "loss": 1.3111, + "step": 8456 + }, + { + "epoch": 0.8920886075949367, + "grad_norm": 0.6534838676452637, + "learning_rate": 4.3632068293474545e-05, + "loss": 1.2771, + "step": 8457 + }, + { + "epoch": 0.8921940928270042, + "grad_norm": 0.6685818433761597, + "learning_rate": 4.35477250482329e-05, + "loss": 1.2884, + "step": 8458 + }, + { + "epoch": 0.8922995780590718, + "grad_norm": 0.6645937561988831, + "learning_rate": 4.346346096449136e-05, + "loss": 1.3089, + "step": 8459 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.701856255531311, + "learning_rate": 4.337927605169212e-05, + "loss": 1.3197, + "step": 8460 + }, + { + "epoch": 0.8925105485232068, + "grad_norm": 0.7135605812072754, + "learning_rate": 4.3295170319268554e-05, + "loss": 1.3163, + "step": 8461 + }, + { + "epoch": 0.8926160337552742, + "grad_norm": 0.6557718515396118, + "learning_rate": 4.321114377664495e-05, + "loss": 1.2969, + "step": 8462 + }, + { + "epoch": 0.8927215189873418, + "grad_norm": 0.6460669040679932, + "learning_rate": 4.3127196433237205e-05, + "loss": 1.2923, + "step": 8463 + }, + { + "epoch": 0.8928270042194093, + "grad_norm": 0.6624032855033875, + "learning_rate": 4.304332829845187e-05, + "loss": 1.2853, + "step": 8464 + }, + { + "epoch": 0.8929324894514767, + "grad_norm": 0.660780131816864, + "learning_rate": 4.2959539381686843e-05, + "loss": 1.2962, + "step": 8465 + }, + { + "epoch": 0.8930379746835443, + "grad_norm": 0.7050625085830688, + "learning_rate": 4.287582969233103e-05, + "loss": 1.2863, + "step": 8466 + }, + { + "epoch": 0.8931434599156118, + "grad_norm": 0.6553258895874023, + "learning_rate": 4.279219923976452e-05, + "loss": 1.2971, + "step": 8467 + }, + { + "epoch": 0.8932489451476793, + "grad_norm": 0.6529601216316223, + "learning_rate": 4.2708648033358554e-05, + "loss": 1.2883, + "step": 8468 + }, + { + "epoch": 0.8933544303797468, + "grad_norm": 0.6787359714508057, + "learning_rate": 4.26251760824754e-05, + "loss": 1.309, + "step": 8469 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.6336966156959534, + "learning_rate": 4.2541783396468584e-05, + "loss": 1.3092, + "step": 8470 + }, + { + "epoch": 0.8935654008438818, + "grad_norm": 0.8202620148658752, + "learning_rate": 4.245846998468261e-05, + "loss": 1.2878, + "step": 8471 + }, + { + "epoch": 0.8936708860759494, + "grad_norm": 0.6780153512954712, + "learning_rate": 4.2375235856453197e-05, + "loss": 1.2911, + "step": 8472 + }, + { + "epoch": 0.8937763713080169, + "grad_norm": 0.651413083076477, + "learning_rate": 4.229208102110721e-05, + "loss": 1.3192, + "step": 8473 + }, + { + "epoch": 0.8938818565400843, + "grad_norm": 0.6272668838500977, + "learning_rate": 4.220900548796244e-05, + "loss": 1.2843, + "step": 8474 + }, + { + "epoch": 0.8939873417721519, + "grad_norm": 0.7090466022491455, + "learning_rate": 4.212600926632804e-05, + "loss": 1.3102, + "step": 8475 + }, + { + "epoch": 0.8940928270042194, + "grad_norm": 0.6219647526741028, + "learning_rate": 4.204309236550405e-05, + "loss": 1.2559, + "step": 8476 + }, + { + "epoch": 0.8941983122362869, + "grad_norm": 0.6708633899688721, + "learning_rate": 4.1960254794781714e-05, + "loss": 1.2995, + "step": 8477 + }, + { + "epoch": 0.8943037974683544, + "grad_norm": 0.6878829598426819, + "learning_rate": 4.1877496563443446e-05, + "loss": 1.2973, + "step": 8478 + }, + { + "epoch": 0.894409282700422, + "grad_norm": 0.7225746512413025, + "learning_rate": 4.179481768076274e-05, + "loss": 1.2825, + "step": 8479 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.6201696395874023, + "learning_rate": 4.1712218156004014e-05, + "loss": 1.2889, + "step": 8480 + }, + { + "epoch": 0.894620253164557, + "grad_norm": 0.6513847708702087, + "learning_rate": 4.16296979984232e-05, + "loss": 1.3132, + "step": 8481 + }, + { + "epoch": 0.8947257383966245, + "grad_norm": 0.6443249583244324, + "learning_rate": 4.154725721726699e-05, + "loss": 1.322, + "step": 8482 + }, + { + "epoch": 0.8948312236286919, + "grad_norm": 0.6362040042877197, + "learning_rate": 4.1464895821773235e-05, + "loss": 1.3008, + "step": 8483 + }, + { + "epoch": 0.8949367088607595, + "grad_norm": 0.6573773622512817, + "learning_rate": 4.138261382117098e-05, + "loss": 1.2886, + "step": 8484 + }, + { + "epoch": 0.895042194092827, + "grad_norm": 0.6406295895576477, + "learning_rate": 4.130041122468042e-05, + "loss": 1.2902, + "step": 8485 + }, + { + "epoch": 0.8951476793248945, + "grad_norm": 0.6499406099319458, + "learning_rate": 4.1218288041512534e-05, + "loss": 1.307, + "step": 8486 + }, + { + "epoch": 0.895253164556962, + "grad_norm": 0.6699963212013245, + "learning_rate": 4.113624428086987e-05, + "loss": 1.319, + "step": 8487 + }, + { + "epoch": 0.8953586497890296, + "grad_norm": 0.6278684139251709, + "learning_rate": 4.105427995194566e-05, + "loss": 1.279, + "step": 8488 + }, + { + "epoch": 0.895464135021097, + "grad_norm": 0.6862331628799438, + "learning_rate": 4.0972395063924554e-05, + "loss": 1.2909, + "step": 8489 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.6614750027656555, + "learning_rate": 4.089058962598213e-05, + "loss": 1.2704, + "step": 8490 + }, + { + "epoch": 0.8956751054852321, + "grad_norm": 0.6273866295814514, + "learning_rate": 4.080886364728506e-05, + "loss": 1.2828, + "step": 8491 + }, + { + "epoch": 0.8957805907172995, + "grad_norm": 0.7845770120620728, + "learning_rate": 4.072721713699118e-05, + "loss": 1.2614, + "step": 8492 + }, + { + "epoch": 0.8958860759493671, + "grad_norm": 0.687372088432312, + "learning_rate": 4.064565010424942e-05, + "loss": 1.2497, + "step": 8493 + }, + { + "epoch": 0.8959915611814346, + "grad_norm": 0.7129165530204773, + "learning_rate": 4.056416255819964e-05, + "loss": 1.2672, + "step": 8494 + }, + { + "epoch": 0.8960970464135021, + "grad_norm": 0.6453121304512024, + "learning_rate": 4.048275450797312e-05, + "loss": 1.3477, + "step": 8495 + }, + { + "epoch": 0.8962025316455696, + "grad_norm": 0.7114900946617126, + "learning_rate": 4.0401425962691804e-05, + "loss": 1.3033, + "step": 8496 + }, + { + "epoch": 0.8963080168776372, + "grad_norm": 0.6953406929969788, + "learning_rate": 4.032017693146908e-05, + "loss": 1.3012, + "step": 8497 + }, + { + "epoch": 0.8964135021097046, + "grad_norm": 0.6165673136711121, + "learning_rate": 4.023900742340941e-05, + "loss": 1.2781, + "step": 8498 + }, + { + "epoch": 0.8965189873417722, + "grad_norm": 0.6324517130851746, + "learning_rate": 4.015791744760811e-05, + "loss": 1.2896, + "step": 8499 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.6398531198501587, + "learning_rate": 4.0076907013151726e-05, + "loss": 1.2889, + "step": 8500 + }, + { + "epoch": 0.8967299578059071, + "grad_norm": 0.673603892326355, + "learning_rate": 3.999597612911793e-05, + "loss": 1.3073, + "step": 8501 + }, + { + "epoch": 0.8968354430379747, + "grad_norm": 0.6921299695968628, + "learning_rate": 3.991512480457546e-05, + "loss": 1.3198, + "step": 8502 + }, + { + "epoch": 0.8969409282700422, + "grad_norm": 0.6812261939048767, + "learning_rate": 3.9834353048583984e-05, + "loss": 1.2787, + "step": 8503 + }, + { + "epoch": 0.8970464135021097, + "grad_norm": 0.6186887621879578, + "learning_rate": 3.9753660870194524e-05, + "loss": 1.3073, + "step": 8504 + }, + { + "epoch": 0.8971518987341772, + "grad_norm": 0.706139087677002, + "learning_rate": 3.967304827844892e-05, + "loss": 1.2802, + "step": 8505 + }, + { + "epoch": 0.8972573839662448, + "grad_norm": 0.6641781330108643, + "learning_rate": 3.95925152823802e-05, + "loss": 1.272, + "step": 8506 + }, + { + "epoch": 0.8973628691983122, + "grad_norm": 0.643637478351593, + "learning_rate": 3.9512061891012643e-05, + "loss": 1.3043, + "step": 8507 + }, + { + "epoch": 0.8974683544303798, + "grad_norm": 0.6426804065704346, + "learning_rate": 3.943168811336137e-05, + "loss": 1.3264, + "step": 8508 + }, + { + "epoch": 0.8975738396624473, + "grad_norm": 0.6553860306739807, + "learning_rate": 3.93513939584326e-05, + "loss": 1.312, + "step": 8509 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.6336636543273926, + "learning_rate": 3.927117943522379e-05, + "loss": 1.2794, + "step": 8510 + }, + { + "epoch": 0.8977848101265823, + "grad_norm": 0.6436659097671509, + "learning_rate": 3.9191044552723345e-05, + "loss": 1.278, + "step": 8511 + }, + { + "epoch": 0.8978902953586498, + "grad_norm": 0.6809067130088806, + "learning_rate": 3.911098931991075e-05, + "loss": 1.2913, + "step": 8512 + }, + { + "epoch": 0.8979957805907173, + "grad_norm": 0.656670331954956, + "learning_rate": 3.9031013745756655e-05, + "loss": 1.2817, + "step": 8513 + }, + { + "epoch": 0.8981012658227848, + "grad_norm": 0.6668751835823059, + "learning_rate": 3.895111783922256e-05, + "loss": 1.2618, + "step": 8514 + }, + { + "epoch": 0.8982067510548524, + "grad_norm": 0.6530550718307495, + "learning_rate": 3.887130160926139e-05, + "loss": 1.3004, + "step": 8515 + }, + { + "epoch": 0.8983122362869198, + "grad_norm": 0.6362914443016052, + "learning_rate": 3.879156506481699e-05, + "loss": 1.3201, + "step": 8516 + }, + { + "epoch": 0.8984177215189874, + "grad_norm": 0.6630980372428894, + "learning_rate": 3.8711908214824035e-05, + "loss": 1.3109, + "step": 8517 + }, + { + "epoch": 0.8985232067510549, + "grad_norm": 0.6946395635604858, + "learning_rate": 3.863233106820857e-05, + "loss": 1.3079, + "step": 8518 + }, + { + "epoch": 0.8986286919831223, + "grad_norm": 0.6490069627761841, + "learning_rate": 3.855283363388762e-05, + "loss": 1.2679, + "step": 8519 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.6598498821258545, + "learning_rate": 3.8473415920769304e-05, + "loss": 1.3219, + "step": 8520 + }, + { + "epoch": 0.8988396624472574, + "grad_norm": 0.6582968831062317, + "learning_rate": 3.839407793775268e-05, + "loss": 1.2978, + "step": 8521 + }, + { + "epoch": 0.8989451476793249, + "grad_norm": 0.6472191214561462, + "learning_rate": 3.8314819693727966e-05, + "loss": 1.3092, + "step": 8522 + }, + { + "epoch": 0.8990506329113924, + "grad_norm": 0.6875693202018738, + "learning_rate": 3.823564119757647e-05, + "loss": 1.3151, + "step": 8523 + }, + { + "epoch": 0.89915611814346, + "grad_norm": 0.6786731481552124, + "learning_rate": 3.81565424581706e-05, + "loss": 1.2777, + "step": 8524 + }, + { + "epoch": 0.8992616033755274, + "grad_norm": 0.7061293721199036, + "learning_rate": 3.8077523484373764e-05, + "loss": 1.328, + "step": 8525 + }, + { + "epoch": 0.899367088607595, + "grad_norm": 0.6899933218955994, + "learning_rate": 3.79985842850403e-05, + "loss": 1.3439, + "step": 8526 + }, + { + "epoch": 0.8994725738396624, + "grad_norm": 0.6416152715682983, + "learning_rate": 3.791972486901596e-05, + "loss": 1.2942, + "step": 8527 + }, + { + "epoch": 0.8995780590717299, + "grad_norm": 0.6791837811470032, + "learning_rate": 3.784094524513709e-05, + "loss": 1.3125, + "step": 8528 + }, + { + "epoch": 0.8996835443037975, + "grad_norm": 0.676744282245636, + "learning_rate": 3.7762245422231476e-05, + "loss": 1.2791, + "step": 8529 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.6469886898994446, + "learning_rate": 3.768362540911788e-05, + "loss": 1.3205, + "step": 8530 + }, + { + "epoch": 0.8998945147679325, + "grad_norm": 0.6775825023651123, + "learning_rate": 3.760508521460584e-05, + "loss": 1.2996, + "step": 8531 + }, + { + "epoch": 0.9, + "grad_norm": 0.6805945038795471, + "learning_rate": 3.7526624847496335e-05, + "loss": 1.2971, + "step": 8532 + }, + { + "epoch": 0.9001054852320675, + "grad_norm": 0.6610656976699829, + "learning_rate": 3.744824431658131e-05, + "loss": 1.2763, + "step": 8533 + }, + { + "epoch": 0.900210970464135, + "grad_norm": 0.6898884177207947, + "learning_rate": 3.736994363064358e-05, + "loss": 1.3263, + "step": 8534 + }, + { + "epoch": 0.9003164556962026, + "grad_norm": 0.6572892069816589, + "learning_rate": 3.7291722798457215e-05, + "loss": 1.2879, + "step": 8535 + }, + { + "epoch": 0.90042194092827, + "grad_norm": 0.725019097328186, + "learning_rate": 3.72135818287872e-05, + "loss": 1.3239, + "step": 8536 + }, + { + "epoch": 0.9005274261603375, + "grad_norm": 0.6349507570266724, + "learning_rate": 3.713552073038953e-05, + "loss": 1.3368, + "step": 8537 + }, + { + "epoch": 0.9006329113924051, + "grad_norm": 0.6619454026222229, + "learning_rate": 3.705753951201146e-05, + "loss": 1.2993, + "step": 8538 + }, + { + "epoch": 0.9007383966244725, + "grad_norm": 0.6463711857795715, + "learning_rate": 3.697963818239117e-05, + "loss": 1.2762, + "step": 8539 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.6336432695388794, + "learning_rate": 3.690181675025775e-05, + "loss": 1.3099, + "step": 8540 + }, + { + "epoch": 0.9009493670886076, + "grad_norm": 0.6576512455940247, + "learning_rate": 3.682407522433173e-05, + "loss": 1.2995, + "step": 8541 + }, + { + "epoch": 0.9010548523206751, + "grad_norm": 0.6453399658203125, + "learning_rate": 3.674641361332423e-05, + "loss": 1.3021, + "step": 8542 + }, + { + "epoch": 0.9011603375527426, + "grad_norm": 0.649519145488739, + "learning_rate": 3.66688319259377e-05, + "loss": 1.2771, + "step": 8543 + }, + { + "epoch": 0.9012658227848102, + "grad_norm": 0.674566388130188, + "learning_rate": 3.6591330170865524e-05, + "loss": 1.3226, + "step": 8544 + }, + { + "epoch": 0.9013713080168776, + "grad_norm": 0.6551771759986877, + "learning_rate": 3.6513908356792244e-05, + "loss": 1.2584, + "step": 8545 + }, + { + "epoch": 0.9014767932489451, + "grad_norm": 0.6654418706893921, + "learning_rate": 3.643656649239327e-05, + "loss": 1.2941, + "step": 8546 + }, + { + "epoch": 0.9015822784810127, + "grad_norm": 0.70919269323349, + "learning_rate": 3.635930458633516e-05, + "loss": 1.2765, + "step": 8547 + }, + { + "epoch": 0.9016877637130801, + "grad_norm": 0.6431152820587158, + "learning_rate": 3.628212264727548e-05, + "loss": 1.3202, + "step": 8548 + }, + { + "epoch": 0.9017932489451477, + "grad_norm": 0.6577557325363159, + "learning_rate": 3.6205020683862836e-05, + "loss": 1.2619, + "step": 8549 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.7003525495529175, + "learning_rate": 3.612799870473696e-05, + "loss": 1.3214, + "step": 8550 + }, + { + "epoch": 0.9020042194092827, + "grad_norm": 0.6926751136779785, + "learning_rate": 3.605105671852854e-05, + "loss": 1.2786, + "step": 8551 + }, + { + "epoch": 0.9021097046413502, + "grad_norm": 0.6847266554832458, + "learning_rate": 3.597419473385935e-05, + "loss": 1.2723, + "step": 8552 + }, + { + "epoch": 0.9022151898734178, + "grad_norm": 0.7679013013839722, + "learning_rate": 3.5897412759342e-05, + "loss": 1.2998, + "step": 8553 + }, + { + "epoch": 0.9023206751054852, + "grad_norm": 0.6746289134025574, + "learning_rate": 3.582071080358043e-05, + "loss": 1.3164, + "step": 8554 + }, + { + "epoch": 0.9024261603375527, + "grad_norm": 0.6477152705192566, + "learning_rate": 3.5744088875169446e-05, + "loss": 1.2944, + "step": 8555 + }, + { + "epoch": 0.9025316455696203, + "grad_norm": 0.6646168231964111, + "learning_rate": 3.566754698269492e-05, + "loss": 1.3063, + "step": 8556 + }, + { + "epoch": 0.9026371308016877, + "grad_norm": 0.7035142183303833, + "learning_rate": 3.5591085134733666e-05, + "loss": 1.231, + "step": 8557 + }, + { + "epoch": 0.9027426160337553, + "grad_norm": 0.6725677251815796, + "learning_rate": 3.5514703339853656e-05, + "loss": 1.2727, + "step": 8558 + }, + { + "epoch": 0.9028481012658228, + "grad_norm": 0.6462684273719788, + "learning_rate": 3.543840160661396e-05, + "loss": 1.3038, + "step": 8559 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.6282719373703003, + "learning_rate": 3.5362179943564496e-05, + "loss": 1.2735, + "step": 8560 + }, + { + "epoch": 0.9030590717299578, + "grad_norm": 0.6558697819709778, + "learning_rate": 3.528603835924626e-05, + "loss": 1.3041, + "step": 8561 + }, + { + "epoch": 0.9031645569620254, + "grad_norm": 0.625732958316803, + "learning_rate": 3.520997686219127e-05, + "loss": 1.3233, + "step": 8562 + }, + { + "epoch": 0.9032700421940928, + "grad_norm": 0.6765335202217102, + "learning_rate": 3.513399546092269e-05, + "loss": 1.2924, + "step": 8563 + }, + { + "epoch": 0.9033755274261603, + "grad_norm": 0.7976239919662476, + "learning_rate": 3.5058094163954556e-05, + "loss": 1.2828, + "step": 8564 + }, + { + "epoch": 0.9034810126582279, + "grad_norm": 0.7245626449584961, + "learning_rate": 3.498227297979198e-05, + "loss": 1.3021, + "step": 8565 + }, + { + "epoch": 0.9035864978902953, + "grad_norm": 0.6421371102333069, + "learning_rate": 3.4906531916931075e-05, + "loss": 1.3289, + "step": 8566 + }, + { + "epoch": 0.9036919831223629, + "grad_norm": 0.6632510423660278, + "learning_rate": 3.483087098385906e-05, + "loss": 1.31, + "step": 8567 + }, + { + "epoch": 0.9037974683544304, + "grad_norm": 0.6500763893127441, + "learning_rate": 3.475529018905416e-05, + "loss": 1.2792, + "step": 8568 + }, + { + "epoch": 0.9039029535864979, + "grad_norm": 0.6713687777519226, + "learning_rate": 3.467978954098549e-05, + "loss": 1.3002, + "step": 8569 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.785160481929779, + "learning_rate": 3.46043690481134e-05, + "loss": 1.3162, + "step": 8570 + }, + { + "epoch": 0.904113924050633, + "grad_norm": 0.6934322118759155, + "learning_rate": 3.4529028718888935e-05, + "loss": 1.2899, + "step": 8571 + }, + { + "epoch": 0.9042194092827004, + "grad_norm": 0.6128583550453186, + "learning_rate": 3.4453768561754525e-05, + "loss": 1.2731, + "step": 8572 + }, + { + "epoch": 0.9043248945147679, + "grad_norm": 0.6586999893188477, + "learning_rate": 3.437858858514334e-05, + "loss": 1.2921, + "step": 8573 + }, + { + "epoch": 0.9044303797468355, + "grad_norm": 0.6610689759254456, + "learning_rate": 3.43034887974798e-05, + "loss": 1.3105, + "step": 8574 + }, + { + "epoch": 0.9045358649789029, + "grad_norm": 0.701533854007721, + "learning_rate": 3.422846920717893e-05, + "loss": 1.3069, + "step": 8575 + }, + { + "epoch": 0.9046413502109705, + "grad_norm": 0.8480714559555054, + "learning_rate": 3.4153529822647414e-05, + "loss": 1.2712, + "step": 8576 + }, + { + "epoch": 0.904746835443038, + "grad_norm": 0.6663806438446045, + "learning_rate": 3.4078670652282374e-05, + "loss": 1.3226, + "step": 8577 + }, + { + "epoch": 0.9048523206751055, + "grad_norm": 0.6713228225708008, + "learning_rate": 3.400389170447218e-05, + "loss": 1.2674, + "step": 8578 + }, + { + "epoch": 0.904957805907173, + "grad_norm": 0.6596994400024414, + "learning_rate": 3.392919298759623e-05, + "loss": 1.3234, + "step": 8579 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.6692514419555664, + "learning_rate": 3.38545745100248e-05, + "loss": 1.3007, + "step": 8580 + }, + { + "epoch": 0.905168776371308, + "grad_norm": 0.6452614068984985, + "learning_rate": 3.378003628011938e-05, + "loss": 1.2851, + "step": 8581 + }, + { + "epoch": 0.9052742616033755, + "grad_norm": 0.7133308053016663, + "learning_rate": 3.3705578306232224e-05, + "loss": 1.3152, + "step": 8582 + }, + { + "epoch": 0.9053797468354431, + "grad_norm": 0.8342210650444031, + "learning_rate": 3.363120059670688e-05, + "loss": 1.3099, + "step": 8583 + }, + { + "epoch": 0.9054852320675105, + "grad_norm": 0.7290394902229309, + "learning_rate": 3.355690315987761e-05, + "loss": 1.2849, + "step": 8584 + }, + { + "epoch": 0.9055907172995781, + "grad_norm": 0.7123720645904541, + "learning_rate": 3.3482686004069755e-05, + "loss": 1.2964, + "step": 8585 + }, + { + "epoch": 0.9056962025316456, + "grad_norm": 0.6523795127868652, + "learning_rate": 3.340854913759983e-05, + "loss": 1.2827, + "step": 8586 + }, + { + "epoch": 0.9058016877637131, + "grad_norm": 0.6959734559059143, + "learning_rate": 3.3334492568775355e-05, + "loss": 1.2972, + "step": 8587 + }, + { + "epoch": 0.9059071729957806, + "grad_norm": 0.6817085146903992, + "learning_rate": 3.3260516305894526e-05, + "loss": 1.2847, + "step": 8588 + }, + { + "epoch": 0.9060126582278482, + "grad_norm": 0.7576567530632019, + "learning_rate": 3.318662035724679e-05, + "loss": 1.3189, + "step": 8589 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.7556055188179016, + "learning_rate": 3.31128047311127e-05, + "loss": 1.3387, + "step": 8590 + }, + { + "epoch": 0.9062236286919831, + "grad_norm": 0.6578317880630493, + "learning_rate": 3.303906943576346e-05, + "loss": 1.2885, + "step": 8591 + }, + { + "epoch": 0.9063291139240506, + "grad_norm": 0.7240660786628723, + "learning_rate": 3.296541447946164e-05, + "loss": 1.2948, + "step": 8592 + }, + { + "epoch": 0.9064345991561181, + "grad_norm": 0.6374247074127197, + "learning_rate": 3.2891839870460546e-05, + "loss": 1.3107, + "step": 8593 + }, + { + "epoch": 0.9065400843881857, + "grad_norm": 0.6854918599128723, + "learning_rate": 3.281834561700467e-05, + "loss": 1.2636, + "step": 8594 + }, + { + "epoch": 0.9066455696202531, + "grad_norm": 0.7448713779449463, + "learning_rate": 3.274493172732926e-05, + "loss": 1.3617, + "step": 8595 + }, + { + "epoch": 0.9067510548523207, + "grad_norm": Infinity, + "learning_rate": 3.274493172732926e-05, + "loss": 1.2321, + "step": 8596 + }, + { + "epoch": 0.9068565400843882, + "grad_norm": 0.6824391484260559, + "learning_rate": 3.26715982096609e-05, + "loss": 1.2583, + "step": 8597 + }, + { + "epoch": 0.9069620253164556, + "grad_norm": 0.719792366027832, + "learning_rate": 3.259834507221684e-05, + "loss": 1.317, + "step": 8598 + }, + { + "epoch": 0.9070675105485232, + "grad_norm": 0.7142028212547302, + "learning_rate": 3.2525172323205535e-05, + "loss": 1.2871, + "step": 8599 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.6519612669944763, + "learning_rate": 3.2452079970826335e-05, + "loss": 1.3141, + "step": 8600 + }, + { + "epoch": 0.9072784810126582, + "grad_norm": 0.6710625886917114, + "learning_rate": 3.237906802326951e-05, + "loss": 1.3221, + "step": 8601 + }, + { + "epoch": 0.9073839662447257, + "grad_norm": 0.6294894814491272, + "learning_rate": 3.230613648871661e-05, + "loss": 1.295, + "step": 8602 + }, + { + "epoch": 0.9074894514767933, + "grad_norm": 0.654015302658081, + "learning_rate": 3.223328537533976e-05, + "loss": 1.279, + "step": 8603 + }, + { + "epoch": 0.9075949367088607, + "grad_norm": 0.8260496258735657, + "learning_rate": 3.216051469130243e-05, + "loss": 1.3154, + "step": 8604 + }, + { + "epoch": 0.9077004219409283, + "grad_norm": 0.7503252625465393, + "learning_rate": 3.208782444475894e-05, + "loss": 1.2774, + "step": 8605 + }, + { + "epoch": 0.9078059071729958, + "grad_norm": 0.6484048962593079, + "learning_rate": 3.201521464385443e-05, + "loss": 1.281, + "step": 8606 + }, + { + "epoch": 0.9079113924050632, + "grad_norm": 0.6476485729217529, + "learning_rate": 3.194268529672539e-05, + "loss": 1.2808, + "step": 8607 + }, + { + "epoch": 0.9080168776371308, + "grad_norm": 0.6574289798736572, + "learning_rate": 3.187023641149908e-05, + "loss": 1.2821, + "step": 8608 + }, + { + "epoch": 0.9081223628691983, + "grad_norm": 0.6779351234436035, + "learning_rate": 3.1797867996293663e-05, + "loss": 1.3137, + "step": 8609 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.702333927154541, + "learning_rate": 3.172558005921841e-05, + "loss": 1.2935, + "step": 8610 + }, + { + "epoch": 0.9083333333333333, + "grad_norm": 0.6561712026596069, + "learning_rate": 3.165337260837351e-05, + "loss": 1.3027, + "step": 8611 + }, + { + "epoch": 0.9084388185654009, + "grad_norm": 0.8286715149879456, + "learning_rate": 3.158124565185022e-05, + "loss": 1.3, + "step": 8612 + }, + { + "epoch": 0.9085443037974683, + "grad_norm": 0.641632080078125, + "learning_rate": 3.1509199197730765e-05, + "loss": 1.2752, + "step": 8613 + }, + { + "epoch": 0.9086497890295359, + "grad_norm": 0.646466851234436, + "learning_rate": 3.143723325408826e-05, + "loss": 1.3132, + "step": 8614 + }, + { + "epoch": 0.9087552742616034, + "grad_norm": 0.652873694896698, + "learning_rate": 3.136534782898667e-05, + "loss": 1.2502, + "step": 8615 + }, + { + "epoch": 0.9088607594936708, + "grad_norm": 0.6753138899803162, + "learning_rate": 3.129354293048148e-05, + "loss": 1.3231, + "step": 8616 + }, + { + "epoch": 0.9089662447257384, + "grad_norm": 0.6539096236228943, + "learning_rate": 3.122181856661857e-05, + "loss": 1.2698, + "step": 8617 + }, + { + "epoch": 0.9090717299578059, + "grad_norm": 0.7198141813278198, + "learning_rate": 3.1150174745435026e-05, + "loss": 1.3097, + "step": 8618 + }, + { + "epoch": 0.9091772151898734, + "grad_norm": 0.7864772081375122, + "learning_rate": 3.107861147495891e-05, + "loss": 1.2607, + "step": 8619 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.6881057024002075, + "learning_rate": 3.100712876320924e-05, + "loss": 1.31, + "step": 8620 + }, + { + "epoch": 0.9093881856540085, + "grad_norm": 0.6165547966957092, + "learning_rate": 3.093572661819602e-05, + "loss": 1.2984, + "step": 8621 + }, + { + "epoch": 0.9094936708860759, + "grad_norm": 0.6451809406280518, + "learning_rate": 3.086440504792026e-05, + "loss": 1.2583, + "step": 8622 + }, + { + "epoch": 0.9095991561181435, + "grad_norm": 0.7388421297073364, + "learning_rate": 3.079316406037375e-05, + "loss": 1.3032, + "step": 8623 + }, + { + "epoch": 0.909704641350211, + "grad_norm": 0.6358308792114258, + "learning_rate": 3.072200366353958e-05, + "loss": 1.2753, + "step": 8624 + }, + { + "epoch": 0.9098101265822784, + "grad_norm": 0.7155098915100098, + "learning_rate": 3.0650923865391395e-05, + "loss": 1.3104, + "step": 8625 + }, + { + "epoch": 0.909915611814346, + "grad_norm": 0.6371184587478638, + "learning_rate": 3.057992467389431e-05, + "loss": 1.3097, + "step": 8626 + }, + { + "epoch": 0.9100210970464135, + "grad_norm": 0.6411905288696289, + "learning_rate": 3.0509006097004048e-05, + "loss": 1.3098, + "step": 8627 + }, + { + "epoch": 0.910126582278481, + "grad_norm": 0.6512603163719177, + "learning_rate": 3.043816814266734e-05, + "loss": 1.258, + "step": 8628 + }, + { + "epoch": 0.9102320675105485, + "grad_norm": 0.6894474625587463, + "learning_rate": 3.0367410818821913e-05, + "loss": 1.3246, + "step": 8629 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.6845337748527527, + "learning_rate": 3.029673413339651e-05, + "loss": 1.3469, + "step": 8630 + }, + { + "epoch": 0.9104430379746835, + "grad_norm": 0.6702587604522705, + "learning_rate": 3.022613809431088e-05, + "loss": 1.3028, + "step": 8631 + }, + { + "epoch": 0.9105485232067511, + "grad_norm": 0.6455851197242737, + "learning_rate": 3.015562270947553e-05, + "loss": 1.2657, + "step": 8632 + }, + { + "epoch": 0.9106540084388186, + "grad_norm": 0.6399766802787781, + "learning_rate": 3.0085187986792136e-05, + "loss": 1.2625, + "step": 8633 + }, + { + "epoch": 0.910759493670886, + "grad_norm": 0.6619231104850769, + "learning_rate": 3.00148339341533e-05, + "loss": 1.315, + "step": 8634 + }, + { + "epoch": 0.9108649789029536, + "grad_norm": 0.6672126054763794, + "learning_rate": 2.994456055944231e-05, + "loss": 1.2972, + "step": 8635 + }, + { + "epoch": 0.9109704641350211, + "grad_norm": 0.6796934008598328, + "learning_rate": 2.9874367870534018e-05, + "loss": 1.2598, + "step": 8636 + }, + { + "epoch": 0.9110759493670886, + "grad_norm": 0.7203946709632874, + "learning_rate": 2.9804255875293645e-05, + "loss": 1.3104, + "step": 8637 + }, + { + "epoch": 0.9111814345991561, + "grad_norm": 0.6814306974411011, + "learning_rate": 2.9734224581577568e-05, + "loss": 1.2711, + "step": 8638 + }, + { + "epoch": 0.9112869198312237, + "grad_norm": 0.6496993899345398, + "learning_rate": 2.966427399723326e-05, + "loss": 1.2759, + "step": 8639 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.7925711870193481, + "learning_rate": 2.959440413009895e-05, + "loss": 1.271, + "step": 8640 + }, + { + "epoch": 0.9114978902953587, + "grad_norm": 0.7064648270606995, + "learning_rate": 2.952461498800388e-05, + "loss": 1.278, + "step": 8641 + }, + { + "epoch": 0.9116033755274262, + "grad_norm": 0.6562111377716064, + "learning_rate": 2.945490657876837e-05, + "loss": 1.2949, + "step": 8642 + }, + { + "epoch": 0.9117088607594936, + "grad_norm": 0.6930472254753113, + "learning_rate": 2.938527891020351e-05, + "loss": 1.2821, + "step": 8643 + }, + { + "epoch": 0.9118143459915612, + "grad_norm": 0.6493368744850159, + "learning_rate": 2.931573199011148e-05, + "loss": 1.2664, + "step": 8644 + }, + { + "epoch": 0.9119198312236287, + "grad_norm": 0.657123863697052, + "learning_rate": 2.92462658262852e-05, + "loss": 1.2275, + "step": 8645 + }, + { + "epoch": 0.9120253164556962, + "grad_norm": 0.6919341087341309, + "learning_rate": 2.9176880426508957e-05, + "loss": 1.3007, + "step": 8646 + }, + { + "epoch": 0.9121308016877637, + "grad_norm": 0.6913860440254211, + "learning_rate": 2.9107575798557605e-05, + "loss": 1.2698, + "step": 8647 + }, + { + "epoch": 0.9122362869198313, + "grad_norm": 0.6598399877548218, + "learning_rate": 2.9038351950197107e-05, + "loss": 1.3404, + "step": 8648 + }, + { + "epoch": 0.9123417721518987, + "grad_norm": 0.7191439270973206, + "learning_rate": 2.8969208889184335e-05, + "loss": 1.2987, + "step": 8649 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.6472629904747009, + "learning_rate": 2.890014662326701e-05, + "loss": 1.313, + "step": 8650 + }, + { + "epoch": 0.9125527426160338, + "grad_norm": 0.6562347412109375, + "learning_rate": 2.8831165160184024e-05, + "loss": 1.294, + "step": 8651 + }, + { + "epoch": 0.9126582278481012, + "grad_norm": 0.7965092062950134, + "learning_rate": 2.8762264507665113e-05, + "loss": 1.3183, + "step": 8652 + }, + { + "epoch": 0.9127637130801688, + "grad_norm": 0.6373055577278137, + "learning_rate": 2.869344467343077e-05, + "loss": 1.2978, + "step": 8653 + }, + { + "epoch": 0.9128691983122363, + "grad_norm": 0.6433712840080261, + "learning_rate": 2.862470566519265e-05, + "loss": 1.3232, + "step": 8654 + }, + { + "epoch": 0.9129746835443038, + "grad_norm": 0.6581855416297913, + "learning_rate": 2.855604749065352e-05, + "loss": 1.2665, + "step": 8655 + }, + { + "epoch": 0.9130801687763713, + "grad_norm": 0.7671604752540588, + "learning_rate": 2.8487470157506633e-05, + "loss": 1.2708, + "step": 8656 + }, + { + "epoch": 0.9131856540084389, + "grad_norm": 0.7812369465827942, + "learning_rate": 2.84189736734366e-05, + "loss": 1.2472, + "step": 8657 + }, + { + "epoch": 0.9132911392405063, + "grad_norm": 0.7052977085113525, + "learning_rate": 2.8350558046118607e-05, + "loss": 1.2848, + "step": 8658 + }, + { + "epoch": 0.9133966244725739, + "grad_norm": 0.6998008489608765, + "learning_rate": 2.828222328321911e-05, + "loss": 1.2708, + "step": 8659 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.7283758521080017, + "learning_rate": 2.8213969392395233e-05, + "loss": 1.3035, + "step": 8660 + }, + { + "epoch": 0.9136075949367088, + "grad_norm": 0.6224330067634583, + "learning_rate": 2.8145796381295276e-05, + "loss": 1.2599, + "step": 8661 + }, + { + "epoch": 0.9137130801687764, + "grad_norm": 0.651337742805481, + "learning_rate": 2.807770425755829e-05, + "loss": 1.2854, + "step": 8662 + }, + { + "epoch": 0.9138185654008438, + "grad_norm": 0.7043775916099548, + "learning_rate": 2.800969302881434e-05, + "loss": 1.3166, + "step": 8663 + }, + { + "epoch": 0.9139240506329114, + "grad_norm": 0.647373616695404, + "learning_rate": 2.7941762702684503e-05, + "loss": 1.318, + "step": 8664 + }, + { + "epoch": 0.9140295358649789, + "grad_norm": 0.7116578221321106, + "learning_rate": 2.7873913286780683e-05, + "loss": 1.3109, + "step": 8665 + }, + { + "epoch": 0.9141350210970464, + "grad_norm": 0.678847074508667, + "learning_rate": 2.7806144788705718e-05, + "loss": 1.2687, + "step": 8666 + }, + { + "epoch": 0.9142405063291139, + "grad_norm": 0.6540934443473816, + "learning_rate": 2.7738457216053447e-05, + "loss": 1.2913, + "step": 8667 + }, + { + "epoch": 0.9143459915611815, + "grad_norm": 0.6275559067726135, + "learning_rate": 2.7670850576408556e-05, + "loss": 1.272, + "step": 8668 + }, + { + "epoch": 0.9144514767932489, + "grad_norm": 0.6350340247154236, + "learning_rate": 2.7603324877346653e-05, + "loss": 1.3039, + "step": 8669 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.6713284254074097, + "learning_rate": 2.7535880126434433e-05, + "loss": 1.329, + "step": 8670 + }, + { + "epoch": 0.914662447257384, + "grad_norm": 0.6577053666114807, + "learning_rate": 2.7468516331229432e-05, + "loss": 1.3392, + "step": 8671 + }, + { + "epoch": 0.9147679324894514, + "grad_norm": 0.6372117400169373, + "learning_rate": 2.7401233499279866e-05, + "loss": 1.3294, + "step": 8672 + }, + { + "epoch": 0.914873417721519, + "grad_norm": 0.6929393410682678, + "learning_rate": 2.7334031638125367e-05, + "loss": 1.281, + "step": 8673 + }, + { + "epoch": 0.9149789029535865, + "grad_norm": 0.6388505697250366, + "learning_rate": 2.726691075529625e-05, + "loss": 1.2875, + "step": 8674 + }, + { + "epoch": 0.915084388185654, + "grad_norm": 0.7683546543121338, + "learning_rate": 2.7199870858313574e-05, + "loss": 1.3074, + "step": 8675 + }, + { + "epoch": 0.9151898734177215, + "grad_norm": 0.6415644884109497, + "learning_rate": 2.7132911954689672e-05, + "loss": 1.2988, + "step": 8676 + }, + { + "epoch": 0.9152953586497891, + "grad_norm": 0.6611455678939819, + "learning_rate": 2.706603405192745e-05, + "loss": 1.2897, + "step": 8677 + }, + { + "epoch": 0.9154008438818565, + "grad_norm": 0.6181951761245728, + "learning_rate": 2.6999237157521005e-05, + "loss": 1.3172, + "step": 8678 + }, + { + "epoch": 0.915506329113924, + "grad_norm": 0.6540100574493408, + "learning_rate": 2.6932521278955262e-05, + "loss": 1.3335, + "step": 8679 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.6310561299324036, + "learning_rate": 2.686588642370591e-05, + "loss": 1.2635, + "step": 8680 + }, + { + "epoch": 0.915717299578059, + "grad_norm": 0.7019557356834412, + "learning_rate": 2.6799332599239974e-05, + "loss": 1.3366, + "step": 8681 + }, + { + "epoch": 0.9158227848101266, + "grad_norm": 0.7073654532432556, + "learning_rate": 2.6732859813014987e-05, + "loss": 1.2992, + "step": 8682 + }, + { + "epoch": 0.9159282700421941, + "grad_norm": 0.7044061422348022, + "learning_rate": 2.666646807247966e-05, + "loss": 1.2773, + "step": 8683 + }, + { + "epoch": 0.9160337552742616, + "grad_norm": 0.7019182443618774, + "learning_rate": 2.660015738507346e-05, + "loss": 1.3067, + "step": 8684 + }, + { + "epoch": 0.9161392405063291, + "grad_norm": 0.6419481039047241, + "learning_rate": 2.653392775822677e-05, + "loss": 1.2889, + "step": 8685 + }, + { + "epoch": 0.9162447257383967, + "grad_norm": 0.6880226135253906, + "learning_rate": 2.6467779199361e-05, + "loss": 1.2968, + "step": 8686 + }, + { + "epoch": 0.9163502109704641, + "grad_norm": 0.717553436756134, + "learning_rate": 2.6401711715888454e-05, + "loss": 1.2998, + "step": 8687 + }, + { + "epoch": 0.9164556962025316, + "grad_norm": 0.6162384748458862, + "learning_rate": 2.6335725315212304e-05, + "loss": 1.3005, + "step": 8688 + }, + { + "epoch": 0.9165611814345992, + "grad_norm": 0.6269016265869141, + "learning_rate": 2.626982000472655e-05, + "loss": 1.3243, + "step": 8689 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.6392075419425964, + "learning_rate": 2.6203995791816372e-05, + "loss": 1.2671, + "step": 8690 + }, + { + "epoch": 0.9167721518987342, + "grad_norm": 0.6887584328651428, + "learning_rate": 2.6138252683857693e-05, + "loss": 1.3374, + "step": 8691 + }, + { + "epoch": 0.9168776371308017, + "grad_norm": 0.6574378609657288, + "learning_rate": 2.607259068821721e-05, + "loss": 1.2919, + "step": 8692 + }, + { + "epoch": 0.9169831223628692, + "grad_norm": 0.6128501892089844, + "learning_rate": 2.6007009812252875e-05, + "loss": 1.2876, + "step": 8693 + }, + { + "epoch": 0.9170886075949367, + "grad_norm": 0.6469999551773071, + "learning_rate": 2.594151006331322e-05, + "loss": 1.3071, + "step": 8694 + }, + { + "epoch": 0.9171940928270043, + "grad_norm": 0.6321470737457275, + "learning_rate": 2.5876091448737788e-05, + "loss": 1.2647, + "step": 8695 + }, + { + "epoch": 0.9172995780590717, + "grad_norm": 0.6459198594093323, + "learning_rate": 2.5810753975857136e-05, + "loss": 1.2998, + "step": 8696 + }, + { + "epoch": 0.9174050632911392, + "grad_norm": 0.662045419216156, + "learning_rate": 2.5745497651992662e-05, + "loss": 1.282, + "step": 8697 + }, + { + "epoch": 0.9175105485232068, + "grad_norm": 0.6677088737487793, + "learning_rate": 2.568032248445651e-05, + "loss": 1.2875, + "step": 8698 + }, + { + "epoch": 0.9176160337552742, + "grad_norm": 0.6326224207878113, + "learning_rate": 2.561522848055217e-05, + "loss": 1.3206, + "step": 8699 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.654550313949585, + "learning_rate": 2.5550215647573482e-05, + "loss": 1.2841, + "step": 8700 + }, + { + "epoch": 0.9178270042194093, + "grad_norm": 0.6296983957290649, + "learning_rate": 2.5485283992805615e-05, + "loss": 1.2939, + "step": 8701 + }, + { + "epoch": 0.9179324894514768, + "grad_norm": 0.6644524335861206, + "learning_rate": 2.5420433523524493e-05, + "loss": 1.2625, + "step": 8702 + }, + { + "epoch": 0.9180379746835443, + "grad_norm": 0.6390732526779175, + "learning_rate": 2.5355664246996813e-05, + "loss": 1.3099, + "step": 8703 + }, + { + "epoch": 0.9181434599156119, + "grad_norm": 0.642070472240448, + "learning_rate": 2.5290976170480346e-05, + "loss": 1.2773, + "step": 8704 + }, + { + "epoch": 0.9182489451476793, + "grad_norm": 0.721962571144104, + "learning_rate": 2.522636930122371e-05, + "loss": 1.3195, + "step": 8705 + }, + { + "epoch": 0.9183544303797468, + "grad_norm": 0.6558160781860352, + "learning_rate": 2.516184364646637e-05, + "loss": 1.2785, + "step": 8706 + }, + { + "epoch": 0.9184599156118144, + "grad_norm": 0.6375119686126709, + "learning_rate": 2.5097399213438955e-05, + "loss": 1.3064, + "step": 8707 + }, + { + "epoch": 0.9185654008438818, + "grad_norm": 0.6398553848266602, + "learning_rate": 2.50330360093626e-05, + "loss": 1.2827, + "step": 8708 + }, + { + "epoch": 0.9186708860759494, + "grad_norm": 0.6359660625457764, + "learning_rate": 2.4968754041449633e-05, + "loss": 1.3049, + "step": 8709 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.662051260471344, + "learning_rate": 2.490455331690303e-05, + "loss": 1.3196, + "step": 8710 + }, + { + "epoch": 0.9188818565400844, + "grad_norm": 0.6447607278823853, + "learning_rate": 2.4840433842916872e-05, + "loss": 1.3106, + "step": 8711 + }, + { + "epoch": 0.9189873417721519, + "grad_norm": 0.6297264099121094, + "learning_rate": 2.4776395626676162e-05, + "loss": 1.2799, + "step": 8712 + }, + { + "epoch": 0.9190928270042195, + "grad_norm": 0.6347301602363586, + "learning_rate": 2.471243867535658e-05, + "loss": 1.3064, + "step": 8713 + }, + { + "epoch": 0.9191983122362869, + "grad_norm": 0.7061124444007874, + "learning_rate": 2.4648562996124806e-05, + "loss": 1.2846, + "step": 8714 + }, + { + "epoch": 0.9193037974683544, + "grad_norm": 0.6604891419410706, + "learning_rate": 2.4584768596138452e-05, + "loss": 1.2895, + "step": 8715 + }, + { + "epoch": 0.919409282700422, + "grad_norm": 0.6547966599464417, + "learning_rate": 2.4521055482546046e-05, + "loss": 1.2729, + "step": 8716 + }, + { + "epoch": 0.9195147679324894, + "grad_norm": 0.6604992151260376, + "learning_rate": 2.4457423662486962e-05, + "loss": 1.2365, + "step": 8717 + }, + { + "epoch": 0.919620253164557, + "grad_norm": 0.6430307626724243, + "learning_rate": 2.4393873143091495e-05, + "loss": 1.3137, + "step": 8718 + }, + { + "epoch": 0.9197257383966245, + "grad_norm": 0.6667742133140564, + "learning_rate": 2.43304039314807e-05, + "loss": 1.299, + "step": 8719 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.6624470949172974, + "learning_rate": 2.4267016034766637e-05, + "loss": 1.2982, + "step": 8720 + }, + { + "epoch": 0.9199367088607595, + "grad_norm": 0.6930283904075623, + "learning_rate": 2.4203709460052292e-05, + "loss": 1.3138, + "step": 8721 + }, + { + "epoch": 0.9200421940928271, + "grad_norm": 0.7309690713882446, + "learning_rate": 2.414048421443141e-05, + "loss": 1.3029, + "step": 8722 + }, + { + "epoch": 0.9201476793248945, + "grad_norm": 0.6373124718666077, + "learning_rate": 2.407734030498873e-05, + "loss": 1.3154, + "step": 8723 + }, + { + "epoch": 0.920253164556962, + "grad_norm": 0.6930968165397644, + "learning_rate": 2.4014277738799774e-05, + "loss": 1.3464, + "step": 8724 + }, + { + "epoch": 0.9203586497890295, + "grad_norm": 0.6553529500961304, + "learning_rate": 2.395129652293121e-05, + "loss": 1.2796, + "step": 8725 + }, + { + "epoch": 0.920464135021097, + "grad_norm": 0.6629611253738403, + "learning_rate": 2.3888396664440232e-05, + "loss": 1.2474, + "step": 8726 + }, + { + "epoch": 0.9205696202531646, + "grad_norm": 0.6469148993492126, + "learning_rate": 2.38255781703752e-05, + "loss": 1.2962, + "step": 8727 + }, + { + "epoch": 0.920675105485232, + "grad_norm": 0.6272135972976685, + "learning_rate": 2.3762841047775068e-05, + "loss": 1.2575, + "step": 8728 + }, + { + "epoch": 0.9207805907172996, + "grad_norm": 0.6384276151657104, + "learning_rate": 2.3700185303670046e-05, + "loss": 1.353, + "step": 8729 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.6558836698532104, + "learning_rate": 2.363761094508085e-05, + "loss": 1.2901, + "step": 8730 + }, + { + "epoch": 0.9209915611814345, + "grad_norm": 0.6272590756416321, + "learning_rate": 2.357511797901929e-05, + "loss": 1.2797, + "step": 8731 + }, + { + "epoch": 0.9210970464135021, + "grad_norm": 0.7000598907470703, + "learning_rate": 2.3512706412488012e-05, + "loss": 1.295, + "step": 8732 + }, + { + "epoch": 0.9212025316455696, + "grad_norm": 0.6340290307998657, + "learning_rate": 2.345037625248067e-05, + "loss": 1.3158, + "step": 8733 + }, + { + "epoch": 0.9213080168776371, + "grad_norm": 0.6799741983413696, + "learning_rate": 2.3388127505981515e-05, + "loss": 1.2648, + "step": 8734 + }, + { + "epoch": 0.9214135021097046, + "grad_norm": 0.6648841500282288, + "learning_rate": 2.3325960179965967e-05, + "loss": 1.271, + "step": 8735 + }, + { + "epoch": 0.9215189873417722, + "grad_norm": 0.7634294629096985, + "learning_rate": 2.3263874281400034e-05, + "loss": 1.2781, + "step": 8736 + }, + { + "epoch": 0.9216244725738396, + "grad_norm": 0.676110029220581, + "learning_rate": 2.3201869817240817e-05, + "loss": 1.2762, + "step": 8737 + }, + { + "epoch": 0.9217299578059072, + "grad_norm": 0.6797227263450623, + "learning_rate": 2.313994679443626e-05, + "loss": 1.311, + "step": 8738 + }, + { + "epoch": 0.9218354430379747, + "grad_norm": 0.6393612623214722, + "learning_rate": 2.307810521992515e-05, + "loss": 1.3026, + "step": 8739 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.6732264757156372, + "learning_rate": 2.301634510063702e-05, + "loss": 1.2899, + "step": 8740 + }, + { + "epoch": 0.9220464135021097, + "grad_norm": 0.6586934328079224, + "learning_rate": 2.2954666443492505e-05, + "loss": 1.2882, + "step": 8741 + }, + { + "epoch": 0.9221518987341772, + "grad_norm": 0.6305115818977356, + "learning_rate": 2.2893069255402993e-05, + "loss": 1.2705, + "step": 8742 + }, + { + "epoch": 0.9222573839662447, + "grad_norm": 0.6515875458717346, + "learning_rate": 2.2831553543270793e-05, + "loss": 1.2229, + "step": 8743 + }, + { + "epoch": 0.9223628691983122, + "grad_norm": 0.6770565509796143, + "learning_rate": 2.277011931398898e-05, + "loss": 1.2908, + "step": 8744 + }, + { + "epoch": 0.9224683544303798, + "grad_norm": 0.700370192527771, + "learning_rate": 2.2708766574441626e-05, + "loss": 1.3318, + "step": 8745 + }, + { + "epoch": 0.9225738396624472, + "grad_norm": 0.6876305937767029, + "learning_rate": 2.2647495331503565e-05, + "loss": 1.3047, + "step": 8746 + }, + { + "epoch": 0.9226793248945148, + "grad_norm": 0.6861106157302856, + "learning_rate": 2.2586305592040558e-05, + "loss": 1.2876, + "step": 8747 + }, + { + "epoch": 0.9227848101265823, + "grad_norm": 0.62627112865448, + "learning_rate": 2.2525197362909282e-05, + "loss": 1.277, + "step": 8748 + }, + { + "epoch": 0.9228902953586497, + "grad_norm": 0.644791305065155, + "learning_rate": 2.24641706509571e-05, + "loss": 1.2943, + "step": 8749 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.6515937447547913, + "learning_rate": 2.2403225463022288e-05, + "loss": 1.3124, + "step": 8750 + }, + { + "epoch": 0.9231012658227848, + "grad_norm": 0.6207574605941772, + "learning_rate": 2.2342361805934297e-05, + "loss": 1.2618, + "step": 8751 + }, + { + "epoch": 0.9232067510548523, + "grad_norm": 0.6734946966171265, + "learning_rate": 2.2281579686513176e-05, + "loss": 1.3073, + "step": 8752 + }, + { + "epoch": 0.9233122362869198, + "grad_norm": 0.6251261234283447, + "learning_rate": 2.2220879111569725e-05, + "loss": 1.2663, + "step": 8753 + }, + { + "epoch": 0.9234177215189874, + "grad_norm": 0.6605691909790039, + "learning_rate": 2.2160260087905753e-05, + "loss": 1.2845, + "step": 8754 + }, + { + "epoch": 0.9235232067510548, + "grad_norm": 0.6732035279273987, + "learning_rate": 2.2099722622314078e-05, + "loss": 1.2902, + "step": 8755 + }, + { + "epoch": 0.9236286919831224, + "grad_norm": 0.6248368620872498, + "learning_rate": 2.203926672157802e-05, + "loss": 1.2853, + "step": 8756 + }, + { + "epoch": 0.9237341772151899, + "grad_norm": 0.7231845259666443, + "learning_rate": 2.1978892392472085e-05, + "loss": 1.2852, + "step": 8757 + }, + { + "epoch": 0.9238396624472573, + "grad_norm": 0.6817112565040588, + "learning_rate": 2.1918599641761517e-05, + "loss": 1.2258, + "step": 8758 + }, + { + "epoch": 0.9239451476793249, + "grad_norm": 0.633173406124115, + "learning_rate": 2.185838847620242e-05, + "loss": 1.2994, + "step": 8759 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.6621851325035095, + "learning_rate": 2.1798258902541723e-05, + "loss": 1.2712, + "step": 8760 + }, + { + "epoch": 0.9241561181434599, + "grad_norm": 0.6575422286987305, + "learning_rate": 2.173821092751721e-05, + "loss": 1.3143, + "step": 8761 + }, + { + "epoch": 0.9242616033755274, + "grad_norm": 0.6650277376174927, + "learning_rate": 2.1678244557857663e-05, + "loss": 1.2541, + "step": 8762 + }, + { + "epoch": 0.924367088607595, + "grad_norm": 0.7058783769607544, + "learning_rate": 2.161835980028254e-05, + "loss": 1.3408, + "step": 8763 + }, + { + "epoch": 0.9244725738396624, + "grad_norm": 0.6186298727989197, + "learning_rate": 2.1558556661502222e-05, + "loss": 1.299, + "step": 8764 + }, + { + "epoch": 0.92457805907173, + "grad_norm": 0.6853761672973633, + "learning_rate": 2.1498835148218017e-05, + "loss": 1.2571, + "step": 8765 + }, + { + "epoch": 0.9246835443037975, + "grad_norm": 0.6792542338371277, + "learning_rate": 2.1439195267121902e-05, + "loss": 1.2834, + "step": 8766 + }, + { + "epoch": 0.924789029535865, + "grad_norm": 0.7595261335372925, + "learning_rate": 2.137963702489687e-05, + "loss": 1.2895, + "step": 8767 + }, + { + "epoch": 0.9248945147679325, + "grad_norm": 0.6392688155174255, + "learning_rate": 2.132016042821683e-05, + "loss": 1.2778, + "step": 8768 + }, + { + "epoch": 0.925, + "grad_norm": 0.6491480469703674, + "learning_rate": 2.1260765483746282e-05, + "loss": 1.2782, + "step": 8769 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.618364155292511, + "learning_rate": 2.120145219814082e-05, + "loss": 1.2624, + "step": 8770 + }, + { + "epoch": 0.925210970464135, + "grad_norm": 0.642127275466919, + "learning_rate": 2.1142220578046712e-05, + "loss": 1.3251, + "step": 8771 + }, + { + "epoch": 0.9253164556962026, + "grad_norm": 0.6392276883125305, + "learning_rate": 2.1083070630101232e-05, + "loss": 1.268, + "step": 8772 + }, + { + "epoch": 0.92542194092827, + "grad_norm": 0.6623163223266602, + "learning_rate": 2.102400236093241e-05, + "loss": 1.3095, + "step": 8773 + }, + { + "epoch": 0.9255274261603376, + "grad_norm": 0.6468362212181091, + "learning_rate": 2.096501577715912e-05, + "loss": 1.2685, + "step": 8774 + }, + { + "epoch": 0.9256329113924051, + "grad_norm": 0.6505231857299805, + "learning_rate": 2.0906110885391072e-05, + "loss": 1.2988, + "step": 8775 + }, + { + "epoch": 0.9257383966244725, + "grad_norm": 0.6288246512413025, + "learning_rate": 2.0847287692228905e-05, + "loss": 1.2907, + "step": 8776 + }, + { + "epoch": 0.9258438818565401, + "grad_norm": 0.6515727639198303, + "learning_rate": 2.0788546204264013e-05, + "loss": 1.2893, + "step": 8777 + }, + { + "epoch": 0.9259493670886076, + "grad_norm": 0.6384720802307129, + "learning_rate": 2.0729886428078716e-05, + "loss": 1.2837, + "step": 8778 + }, + { + "epoch": 0.9260548523206751, + "grad_norm": 0.6518926620483398, + "learning_rate": 2.0671308370246167e-05, + "loss": 1.3129, + "step": 8779 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.652438759803772, + "learning_rate": 2.0612812037330202e-05, + "loss": 1.305, + "step": 8780 + }, + { + "epoch": 0.9262658227848102, + "grad_norm": 0.6527673602104187, + "learning_rate": 2.0554397435885746e-05, + "loss": 1.3042, + "step": 8781 + }, + { + "epoch": 0.9263713080168776, + "grad_norm": 0.6762939691543579, + "learning_rate": 2.0496064572458395e-05, + "loss": 1.3268, + "step": 8782 + }, + { + "epoch": 0.9264767932489452, + "grad_norm": 0.6261479258537292, + "learning_rate": 2.043781345358467e-05, + "loss": 1.2535, + "step": 8783 + }, + { + "epoch": 0.9265822784810127, + "grad_norm": 0.6744138598442078, + "learning_rate": 2.0379644085791767e-05, + "loss": 1.2765, + "step": 8784 + }, + { + "epoch": 0.9266877637130801, + "grad_norm": 0.6861742734909058, + "learning_rate": 2.032155647559805e-05, + "loss": 1.3019, + "step": 8785 + }, + { + "epoch": 0.9267932489451477, + "grad_norm": 0.651098370552063, + "learning_rate": 2.0263550629512406e-05, + "loss": 1.2715, + "step": 8786 + }, + { + "epoch": 0.9268987341772152, + "grad_norm": 0.6742833852767944, + "learning_rate": 2.0205626554034713e-05, + "loss": 1.2601, + "step": 8787 + }, + { + "epoch": 0.9270042194092827, + "grad_norm": 0.6415863633155823, + "learning_rate": 2.0147784255655692e-05, + "loss": 1.2909, + "step": 8788 + }, + { + "epoch": 0.9271097046413502, + "grad_norm": 0.6445305943489075, + "learning_rate": 2.009002374085675e-05, + "loss": 1.2946, + "step": 8789 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.6274454593658447, + "learning_rate": 2.003234501611037e-05, + "loss": 1.304, + "step": 8790 + }, + { + "epoch": 0.9273206751054852, + "grad_norm": 0.6290265917778015, + "learning_rate": 1.9974748087879636e-05, + "loss": 1.309, + "step": 8791 + }, + { + "epoch": 0.9274261603375528, + "grad_norm": 0.6422339677810669, + "learning_rate": 1.991723296261863e-05, + "loss": 1.2747, + "step": 8792 + }, + { + "epoch": 0.9275316455696202, + "grad_norm": 0.6277301907539368, + "learning_rate": 1.985979964677212e-05, + "loss": 1.2915, + "step": 8793 + }, + { + "epoch": 0.9276371308016877, + "grad_norm": 0.6119152307510376, + "learning_rate": 1.9802448146775953e-05, + "loss": 1.2713, + "step": 8794 + }, + { + "epoch": 0.9277426160337553, + "grad_norm": 0.6365792751312256, + "learning_rate": 1.9745178469056575e-05, + "loss": 1.2759, + "step": 8795 + }, + { + "epoch": 0.9278481012658227, + "grad_norm": 0.6542288661003113, + "learning_rate": 1.9687990620031266e-05, + "loss": 1.303, + "step": 8796 + }, + { + "epoch": 0.9279535864978903, + "grad_norm": 0.7019672989845276, + "learning_rate": 1.963088460610832e-05, + "loss": 1.2945, + "step": 8797 + }, + { + "epoch": 0.9280590717299578, + "grad_norm": 0.659018337726593, + "learning_rate": 1.9573860433686696e-05, + "loss": 1.2891, + "step": 8798 + }, + { + "epoch": 0.9281645569620253, + "grad_norm": 0.6344911456108093, + "learning_rate": 1.9516918109156206e-05, + "loss": 1.2672, + "step": 8799 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.6486894488334656, + "learning_rate": 1.9460057638897578e-05, + "loss": 1.2897, + "step": 8800 + }, + { + "epoch": 0.9283755274261604, + "grad_norm": 0.6594616770744324, + "learning_rate": 1.9403279029282376e-05, + "loss": 1.3141, + "step": 8801 + }, + { + "epoch": 0.9284810126582278, + "grad_norm": 0.7131558060646057, + "learning_rate": 1.9346582286672686e-05, + "loss": 1.2862, + "step": 8802 + }, + { + "epoch": 0.9285864978902953, + "grad_norm": 0.6758350133895874, + "learning_rate": 1.9289967417421922e-05, + "loss": 1.2985, + "step": 8803 + }, + { + "epoch": 0.9286919831223629, + "grad_norm": 0.6371634006500244, + "learning_rate": 1.9233434427873924e-05, + "loss": 1.2884, + "step": 8804 + }, + { + "epoch": 0.9287974683544303, + "grad_norm": 0.6484395265579224, + "learning_rate": 1.9176983324363545e-05, + "loss": 1.2938, + "step": 8805 + }, + { + "epoch": 0.9289029535864979, + "grad_norm": 0.6984053254127502, + "learning_rate": 1.912061411321639e-05, + "loss": 1.2983, + "step": 8806 + }, + { + "epoch": 0.9290084388185654, + "grad_norm": 0.6278958320617676, + "learning_rate": 1.9064326800748906e-05, + "loss": 1.2985, + "step": 8807 + }, + { + "epoch": 0.9291139240506329, + "grad_norm": 0.6755838394165039, + "learning_rate": 1.9008121393268462e-05, + "loss": 1.3111, + "step": 8808 + }, + { + "epoch": 0.9292194092827004, + "grad_norm": 0.6650537252426147, + "learning_rate": 1.8951997897072943e-05, + "loss": 1.326, + "step": 8809 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.6477304100990295, + "learning_rate": 1.8895956318451398e-05, + "loss": 1.343, + "step": 8810 + }, + { + "epoch": 0.9294303797468354, + "grad_norm": 0.6568678021430969, + "learning_rate": 1.8839996663683635e-05, + "loss": 1.299, + "step": 8811 + }, + { + "epoch": 0.929535864978903, + "grad_norm": 0.6205330491065979, + "learning_rate": 1.878411893904014e-05, + "loss": 1.2903, + "step": 8812 + }, + { + "epoch": 0.9296413502109705, + "grad_norm": 0.639682412147522, + "learning_rate": 1.872832315078224e-05, + "loss": 1.3, + "step": 8813 + }, + { + "epoch": 0.9297468354430379, + "grad_norm": 0.6328705549240112, + "learning_rate": 1.8672609305162263e-05, + "loss": 1.3052, + "step": 8814 + }, + { + "epoch": 0.9298523206751055, + "grad_norm": 0.6304113864898682, + "learning_rate": 1.8616977408423053e-05, + "loss": 1.2848, + "step": 8815 + }, + { + "epoch": 0.929957805907173, + "grad_norm": 0.6599707007408142, + "learning_rate": 1.856142746679862e-05, + "loss": 1.2983, + "step": 8816 + }, + { + "epoch": 0.9300632911392405, + "grad_norm": 0.6294321417808533, + "learning_rate": 1.8505959486513485e-05, + "loss": 1.3065, + "step": 8817 + }, + { + "epoch": 0.930168776371308, + "grad_norm": 0.6387044191360474, + "learning_rate": 1.8450573473783094e-05, + "loss": 1.2887, + "step": 8818 + }, + { + "epoch": 0.9302742616033756, + "grad_norm": 0.6572495102882385, + "learning_rate": 1.8395269434813733e-05, + "loss": 1.2736, + "step": 8819 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.626339852809906, + "learning_rate": 1.8340047375802693e-05, + "loss": 1.2747, + "step": 8820 + }, + { + "epoch": 0.9304852320675105, + "grad_norm": 0.6193078756332397, + "learning_rate": 1.8284907302937608e-05, + "loss": 1.3035, + "step": 8821 + }, + { + "epoch": 0.9305907172995781, + "grad_norm": 0.6447814702987671, + "learning_rate": 1.822984922239737e-05, + "loss": 1.3662, + "step": 8822 + }, + { + "epoch": 0.9306962025316455, + "grad_norm": 0.6729254126548767, + "learning_rate": 1.8174873140351544e-05, + "loss": 1.2906, + "step": 8823 + }, + { + "epoch": 0.9308016877637131, + "grad_norm": 0.7099180817604065, + "learning_rate": 1.8119979062960286e-05, + "loss": 1.3311, + "step": 8824 + }, + { + "epoch": 0.9309071729957806, + "grad_norm": 0.6160340309143066, + "learning_rate": 1.806516699637492e-05, + "loss": 1.2994, + "step": 8825 + }, + { + "epoch": 0.9310126582278481, + "grad_norm": 0.6485100388526917, + "learning_rate": 1.8010436946737292e-05, + "loss": 1.3033, + "step": 8826 + }, + { + "epoch": 0.9311181434599156, + "grad_norm": 0.6546235084533691, + "learning_rate": 1.7955788920180238e-05, + "loss": 1.3017, + "step": 8827 + }, + { + "epoch": 0.9312236286919832, + "grad_norm": 0.630158007144928, + "learning_rate": 1.7901222922827282e-05, + "loss": 1.3271, + "step": 8828 + }, + { + "epoch": 0.9313291139240506, + "grad_norm": 0.706145167350769, + "learning_rate": 1.7846738960792945e-05, + "loss": 1.2889, + "step": 8829 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.6686950922012329, + "learning_rate": 1.7792337040182434e-05, + "loss": 1.2635, + "step": 8830 + }, + { + "epoch": 0.9315400843881857, + "grad_norm": 0.6501719951629639, + "learning_rate": 1.773801716709153e-05, + "loss": 1.2515, + "step": 8831 + }, + { + "epoch": 0.9316455696202531, + "grad_norm": 0.6068227887153625, + "learning_rate": 1.7683779347607286e-05, + "loss": 1.3217, + "step": 8832 + }, + { + "epoch": 0.9317510548523207, + "grad_norm": 0.646128237247467, + "learning_rate": 1.7629623587807175e-05, + "loss": 1.2796, + "step": 8833 + }, + { + "epoch": 0.9318565400843882, + "grad_norm": 0.6427727341651917, + "learning_rate": 1.7575549893759756e-05, + "loss": 1.2836, + "step": 8834 + }, + { + "epoch": 0.9319620253164557, + "grad_norm": 0.6217547059059143, + "learning_rate": 1.7521558271524103e-05, + "loss": 1.2929, + "step": 8835 + }, + { + "epoch": 0.9320675105485232, + "grad_norm": 0.7631289958953857, + "learning_rate": 1.7467648727150202e-05, + "loss": 1.3091, + "step": 8836 + }, + { + "epoch": 0.9321729957805908, + "grad_norm": 0.6873257756233215, + "learning_rate": 1.741382126667915e-05, + "loss": 1.318, + "step": 8837 + }, + { + "epoch": 0.9322784810126582, + "grad_norm": 0.6299095749855042, + "learning_rate": 1.7360075896142357e-05, + "loss": 1.302, + "step": 8838 + }, + { + "epoch": 0.9323839662447257, + "grad_norm": 0.6446636915206909, + "learning_rate": 1.7306412621562352e-05, + "loss": 1.256, + "step": 8839 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.6372004151344299, + "learning_rate": 1.72528314489524e-05, + "loss": 1.3001, + "step": 8840 + }, + { + "epoch": 0.9325949367088607, + "grad_norm": 0.6337741613388062, + "learning_rate": 1.719933238431645e-05, + "loss": 1.2652, + "step": 8841 + }, + { + "epoch": 0.9327004219409283, + "grad_norm": 0.6509608030319214, + "learning_rate": 1.714591543364938e-05, + "loss": 1.341, + "step": 8842 + }, + { + "epoch": 0.9328059071729958, + "grad_norm": 0.6718770265579224, + "learning_rate": 1.7092580602936807e-05, + "loss": 1.29, + "step": 8843 + }, + { + "epoch": 0.9329113924050633, + "grad_norm": 0.6530804634094238, + "learning_rate": 1.703932789815521e-05, + "loss": 1.2904, + "step": 8844 + }, + { + "epoch": 0.9330168776371308, + "grad_norm": 0.6238329410552979, + "learning_rate": 1.6986157325271727e-05, + "loss": 1.2819, + "step": 8845 + }, + { + "epoch": 0.9331223628691984, + "grad_norm": 0.7240874171257019, + "learning_rate": 1.6933068890244595e-05, + "loss": 1.2907, + "step": 8846 + }, + { + "epoch": 0.9332278481012658, + "grad_norm": 0.6216747760772705, + "learning_rate": 1.688006259902239e-05, + "loss": 1.245, + "step": 8847 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.6678140759468079, + "learning_rate": 1.6827138457544854e-05, + "loss": 1.318, + "step": 8848 + }, + { + "epoch": 0.9334388185654009, + "grad_norm": 0.6668659448623657, + "learning_rate": 1.677429647174242e-05, + "loss": 1.3182, + "step": 8849 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.702039361000061, + "learning_rate": 1.6721536647536255e-05, + "loss": 1.268, + "step": 8850 + }, + { + "epoch": 0.9336497890295359, + "grad_norm": 0.6385174989700317, + "learning_rate": 1.666885899083831e-05, + "loss": 1.3048, + "step": 8851 + }, + { + "epoch": 0.9337552742616034, + "grad_norm": 0.627306342124939, + "learning_rate": 1.6616263507551437e-05, + "loss": 1.295, + "step": 8852 + }, + { + "epoch": 0.9338607594936709, + "grad_norm": 0.6790322065353394, + "learning_rate": 1.656375020356926e-05, + "loss": 1.2766, + "step": 8853 + }, + { + "epoch": 0.9339662447257384, + "grad_norm": 0.6774879693984985, + "learning_rate": 1.6511319084776073e-05, + "loss": 1.2806, + "step": 8854 + }, + { + "epoch": 0.9340717299578059, + "grad_norm": 0.673404335975647, + "learning_rate": 1.645897015704709e-05, + "loss": 1.3214, + "step": 8855 + }, + { + "epoch": 0.9341772151898734, + "grad_norm": 0.6353392601013184, + "learning_rate": 1.6406703426248366e-05, + "loss": 1.2898, + "step": 8856 + }, + { + "epoch": 0.934282700421941, + "grad_norm": 0.6273800134658813, + "learning_rate": 1.6354518898236472e-05, + "loss": 1.2573, + "step": 8857 + }, + { + "epoch": 0.9343881856540084, + "grad_norm": 0.6198554635047913, + "learning_rate": 1.630241657885906e-05, + "loss": 1.3082, + "step": 8858 + }, + { + "epoch": 0.9344936708860759, + "grad_norm": 0.6407731175422668, + "learning_rate": 1.6250396473954377e-05, + "loss": 1.3071, + "step": 8859 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.7126111388206482, + "learning_rate": 1.6198458589351595e-05, + "loss": 1.2624, + "step": 8860 + }, + { + "epoch": 0.9347046413502109, + "grad_norm": 0.6424674987792969, + "learning_rate": 1.614660293087056e-05, + "loss": 1.2657, + "step": 8861 + }, + { + "epoch": 0.9348101265822785, + "grad_norm": 0.6175163388252258, + "learning_rate": 1.609482950432195e-05, + "loss": 1.3248, + "step": 8862 + }, + { + "epoch": 0.934915611814346, + "grad_norm": 0.6703386306762695, + "learning_rate": 1.6043138315507382e-05, + "loss": 1.3011, + "step": 8863 + }, + { + "epoch": 0.9350210970464135, + "grad_norm": 0.6956830024719238, + "learning_rate": 1.5991529370218887e-05, + "loss": 1.2767, + "step": 8864 + }, + { + "epoch": 0.935126582278481, + "grad_norm": 0.6625862717628479, + "learning_rate": 1.5940002674239756e-05, + "loss": 1.314, + "step": 8865 + }, + { + "epoch": 0.9352320675105485, + "grad_norm": 0.7016151547431946, + "learning_rate": 1.588855823334362e-05, + "loss": 1.3037, + "step": 8866 + }, + { + "epoch": 0.935337552742616, + "grad_norm": 0.6482671499252319, + "learning_rate": 1.5837196053295117e-05, + "loss": 1.3201, + "step": 8867 + }, + { + "epoch": 0.9354430379746835, + "grad_norm": 0.6590088605880737, + "learning_rate": 1.5785916139849725e-05, + "loss": 1.2933, + "step": 8868 + }, + { + "epoch": 0.9355485232067511, + "grad_norm": 0.6276624202728271, + "learning_rate": 1.573471849875352e-05, + "loss": 1.2867, + "step": 8869 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.6405860185623169, + "learning_rate": 1.568360313574349e-05, + "loss": 1.2601, + "step": 8870 + }, + { + "epoch": 0.9357594936708861, + "grad_norm": 0.6453024744987488, + "learning_rate": 1.5632570056547308e-05, + "loss": 1.3241, + "step": 8871 + }, + { + "epoch": 0.9358649789029536, + "grad_norm": 0.6304652690887451, + "learning_rate": 1.5581619266883563e-05, + "loss": 1.2523, + "step": 8872 + }, + { + "epoch": 0.935970464135021, + "grad_norm": 0.6493123173713684, + "learning_rate": 1.5530750772461522e-05, + "loss": 1.3156, + "step": 8873 + }, + { + "epoch": 0.9360759493670886, + "grad_norm": 0.645220160484314, + "learning_rate": 1.5479964578981293e-05, + "loss": 1.2819, + "step": 8874 + }, + { + "epoch": 0.9361814345991561, + "grad_norm": 0.8058980703353882, + "learning_rate": 1.5429260692133656e-05, + "loss": 1.274, + "step": 8875 + }, + { + "epoch": 0.9362869198312236, + "grad_norm": 0.6567893028259277, + "learning_rate": 1.5378639117600234e-05, + "loss": 1.3089, + "step": 8876 + }, + { + "epoch": 0.9363924050632911, + "grad_norm": 0.6442247033119202, + "learning_rate": 1.532809986105349e-05, + "loss": 1.2857, + "step": 8877 + }, + { + "epoch": 0.9364978902953587, + "grad_norm": 0.6348556876182556, + "learning_rate": 1.527764292815656e-05, + "loss": 1.314, + "step": 8878 + }, + { + "epoch": 0.9366033755274261, + "grad_norm": 0.6380431652069092, + "learning_rate": 1.522726832456342e-05, + "loss": 1.3071, + "step": 8879 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.7035972476005554, + "learning_rate": 1.517697605591864e-05, + "loss": 1.3012, + "step": 8880 + }, + { + "epoch": 0.9368143459915612, + "grad_norm": 0.7702103853225708, + "learning_rate": 1.512676612785796e-05, + "loss": 1.3029, + "step": 8881 + }, + { + "epoch": 0.9369198312236287, + "grad_norm": 0.6809871792793274, + "learning_rate": 1.5076638546007548e-05, + "loss": 1.3309, + "step": 8882 + }, + { + "epoch": 0.9370253164556962, + "grad_norm": 0.7176591157913208, + "learning_rate": 1.502659331598441e-05, + "loss": 1.2711, + "step": 8883 + }, + { + "epoch": 0.9371308016877637, + "grad_norm": 0.681861400604248, + "learning_rate": 1.4976630443396395e-05, + "loss": 1.3076, + "step": 8884 + }, + { + "epoch": 0.9372362869198312, + "grad_norm": 0.7158468961715698, + "learning_rate": 1.4926749933842187e-05, + "loss": 1.2797, + "step": 8885 + }, + { + "epoch": 0.9373417721518987, + "grad_norm": 0.6354989409446716, + "learning_rate": 1.4876951792910987e-05, + "loss": 1.3076, + "step": 8886 + }, + { + "epoch": 0.9374472573839663, + "grad_norm": 0.6345798373222351, + "learning_rate": 1.4827236026182994e-05, + "loss": 1.2818, + "step": 8887 + }, + { + "epoch": 0.9375527426160337, + "grad_norm": 0.6349202990531921, + "learning_rate": 1.4777602639229004e-05, + "loss": 1.3009, + "step": 8888 + }, + { + "epoch": 0.9376582278481013, + "grad_norm": 0.6453039050102234, + "learning_rate": 1.4728051637610902e-05, + "loss": 1.3006, + "step": 8889 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.6579549908638, + "learning_rate": 1.4678583026880993e-05, + "loss": 1.2741, + "step": 8890 + }, + { + "epoch": 0.9378691983122363, + "grad_norm": 0.7612799406051636, + "learning_rate": 1.4629196812582513e-05, + "loss": 1.2914, + "step": 8891 + }, + { + "epoch": 0.9379746835443038, + "grad_norm": 0.6332908868789673, + "learning_rate": 1.457989300024945e-05, + "loss": 1.2991, + "step": 8892 + }, + { + "epoch": 0.9380801687763713, + "grad_norm": 0.6452050805091858, + "learning_rate": 1.4530671595406469e-05, + "loss": 1.2705, + "step": 8893 + }, + { + "epoch": 0.9381856540084388, + "grad_norm": 0.6734269261360168, + "learning_rate": 1.4481532603569076e-05, + "loss": 1.2723, + "step": 8894 + }, + { + "epoch": 0.9382911392405063, + "grad_norm": 0.6415935158729553, + "learning_rate": 1.4432476030243696e-05, + "loss": 1.2989, + "step": 8895 + }, + { + "epoch": 0.9383966244725739, + "grad_norm": 0.7096937298774719, + "learning_rate": 1.4383501880927103e-05, + "loss": 1.2827, + "step": 8896 + }, + { + "epoch": 0.9385021097046413, + "grad_norm": 0.6522164344787598, + "learning_rate": 1.433461016110732e-05, + "loss": 1.3001, + "step": 8897 + }, + { + "epoch": 0.9386075949367089, + "grad_norm": 0.6308552622795105, + "learning_rate": 1.42858008762628e-05, + "loss": 1.2637, + "step": 8898 + }, + { + "epoch": 0.9387130801687764, + "grad_norm": 0.786566972732544, + "learning_rate": 1.4237074031862918e-05, + "loss": 1.3178, + "step": 8899 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.62906414270401, + "learning_rate": 1.4188429633367721e-05, + "loss": 1.2893, + "step": 8900 + }, + { + "epoch": 0.9389240506329114, + "grad_norm": 0.6601457595825195, + "learning_rate": 1.4139867686228102e-05, + "loss": 1.3076, + "step": 8901 + }, + { + "epoch": 0.939029535864979, + "grad_norm": 0.6498968601226807, + "learning_rate": 1.4091388195885625e-05, + "loss": 1.269, + "step": 8902 + }, + { + "epoch": 0.9391350210970464, + "grad_norm": 0.6329008340835571, + "learning_rate": 1.404299116777269e-05, + "loss": 1.295, + "step": 8903 + }, + { + "epoch": 0.9392405063291139, + "grad_norm": 0.6256287097930908, + "learning_rate": 1.3994676607312379e-05, + "loss": 1.2959, + "step": 8904 + }, + { + "epoch": 0.9393459915611815, + "grad_norm": 0.6441547870635986, + "learning_rate": 1.3946444519918611e-05, + "loss": 1.297, + "step": 8905 + }, + { + "epoch": 0.9394514767932489, + "grad_norm": 0.6476017236709595, + "learning_rate": 1.3898294910995979e-05, + "loss": 1.3228, + "step": 8906 + }, + { + "epoch": 0.9395569620253165, + "grad_norm": 0.6579238176345825, + "learning_rate": 1.385022778594e-05, + "loss": 1.2741, + "step": 8907 + }, + { + "epoch": 0.939662447257384, + "grad_norm": 0.6296886801719666, + "learning_rate": 1.3802243150136784e-05, + "loss": 1.2776, + "step": 8908 + }, + { + "epoch": 0.9397679324894515, + "grad_norm": 0.6482178568840027, + "learning_rate": 1.3754341008963194e-05, + "loss": 1.2784, + "step": 8909 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.6842902302742004, + "learning_rate": 1.370652136778694e-05, + "loss": 1.2585, + "step": 8910 + }, + { + "epoch": 0.9399789029535865, + "grad_norm": 0.6515993475914001, + "learning_rate": 1.3658784231966481e-05, + "loss": 1.2784, + "step": 8911 + }, + { + "epoch": 0.940084388185654, + "grad_norm": 0.6142240166664124, + "learning_rate": 1.3611129606851041e-05, + "loss": 1.316, + "step": 8912 + }, + { + "epoch": 0.9401898734177215, + "grad_norm": 0.6389456987380981, + "learning_rate": 1.3563557497780432e-05, + "loss": 1.3011, + "step": 8913 + }, + { + "epoch": 0.9402953586497891, + "grad_norm": 0.6490915417671204, + "learning_rate": 1.3516067910085306e-05, + "loss": 1.2955, + "step": 8914 + }, + { + "epoch": 0.9404008438818565, + "grad_norm": 0.6589459180831909, + "learning_rate": 1.3468660849087322e-05, + "loss": 1.2825, + "step": 8915 + }, + { + "epoch": 0.9405063291139241, + "grad_norm": 0.6719186902046204, + "learning_rate": 1.3421336320098565e-05, + "loss": 1.3411, + "step": 8916 + }, + { + "epoch": 0.9406118143459916, + "grad_norm": 0.6859983801841736, + "learning_rate": 1.3374094328422043e-05, + "loss": 1.3055, + "step": 8917 + }, + { + "epoch": 0.940717299578059, + "grad_norm": 0.7024413347244263, + "learning_rate": 1.3326934879351272e-05, + "loss": 1.2541, + "step": 8918 + }, + { + "epoch": 0.9408227848101266, + "grad_norm": 0.6571445465087891, + "learning_rate": 1.327985797817094e-05, + "loss": 1.2824, + "step": 8919 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.6402243971824646, + "learning_rate": 1.3232863630156077e-05, + "loss": 1.2634, + "step": 8920 + }, + { + "epoch": 0.9410337552742616, + "grad_norm": 0.6720501184463501, + "learning_rate": 1.3185951840572723e-05, + "loss": 1.2949, + "step": 8921 + }, + { + "epoch": 0.9411392405063291, + "grad_norm": 0.6298735737800598, + "learning_rate": 1.313912261467759e-05, + "loss": 1.2853, + "step": 8922 + }, + { + "epoch": 0.9412447257383966, + "grad_norm": 0.6442456841468811, + "learning_rate": 1.3092375957717978e-05, + "loss": 1.2886, + "step": 8923 + }, + { + "epoch": 0.9413502109704641, + "grad_norm": 0.6501312851905823, + "learning_rate": 1.3045711874932281e-05, + "loss": 1.2795, + "step": 8924 + }, + { + "epoch": 0.9414556962025317, + "grad_norm": 0.6483657360076904, + "learning_rate": 1.2999130371549318e-05, + "loss": 1.3191, + "step": 8925 + }, + { + "epoch": 0.9415611814345991, + "grad_norm": 0.6309420466423035, + "learning_rate": 1.2952631452788826e-05, + "loss": 1.2645, + "step": 8926 + }, + { + "epoch": 0.9416666666666667, + "grad_norm": 0.6453174948692322, + "learning_rate": 1.2906215123861226e-05, + "loss": 1.3115, + "step": 8927 + }, + { + "epoch": 0.9417721518987342, + "grad_norm": 0.6412608623504639, + "learning_rate": 1.2859881389967687e-05, + "loss": 1.3097, + "step": 8928 + }, + { + "epoch": 0.9418776371308016, + "grad_norm": 0.6194884777069092, + "learning_rate": 1.2813630256300224e-05, + "loss": 1.2627, + "step": 8929 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.6396390199661255, + "learning_rate": 1.2767461728041357e-05, + "loss": 1.273, + "step": 8930 + }, + { + "epoch": 0.9420886075949367, + "grad_norm": 0.6599783897399902, + "learning_rate": 1.2721375810364616e-05, + "loss": 1.2619, + "step": 8931 + }, + { + "epoch": 0.9421940928270042, + "grad_norm": 0.6427937150001526, + "learning_rate": 1.267537250843412e-05, + "loss": 1.2819, + "step": 8932 + }, + { + "epoch": 0.9422995780590717, + "grad_norm": 0.6201310753822327, + "learning_rate": 1.2629451827404659e-05, + "loss": 1.2884, + "step": 8933 + }, + { + "epoch": 0.9424050632911393, + "grad_norm": 0.6709611415863037, + "learning_rate": 1.258361377242212e-05, + "loss": 1.307, + "step": 8934 + }, + { + "epoch": 0.9425105485232067, + "grad_norm": 0.6638684868812561, + "learning_rate": 1.2537858348622728e-05, + "loss": 1.3049, + "step": 8935 + }, + { + "epoch": 0.9426160337552743, + "grad_norm": 0.6238030195236206, + "learning_rate": 1.2492185561133545e-05, + "loss": 1.2741, + "step": 8936 + }, + { + "epoch": 0.9427215189873418, + "grad_norm": 0.6426069140434265, + "learning_rate": 1.2446595415072565e-05, + "loss": 1.2916, + "step": 8937 + }, + { + "epoch": 0.9428270042194092, + "grad_norm": 0.6365111470222473, + "learning_rate": 1.2401087915548365e-05, + "loss": 1.2722, + "step": 8938 + }, + { + "epoch": 0.9429324894514768, + "grad_norm": 0.6403106451034546, + "learning_rate": 1.2355663067660283e-05, + "loss": 1.3089, + "step": 8939 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.6470937132835388, + "learning_rate": 1.2310320876498333e-05, + "loss": 1.2991, + "step": 8940 + }, + { + "epoch": 0.9431434599156118, + "grad_norm": 0.6233829259872437, + "learning_rate": 1.2265061347143447e-05, + "loss": 1.2597, + "step": 8941 + }, + { + "epoch": 0.9432489451476793, + "grad_norm": 0.6121683716773987, + "learning_rate": 1.2219884484667071e-05, + "loss": 1.2814, + "step": 8942 + }, + { + "epoch": 0.9433544303797469, + "grad_norm": 0.6794543266296387, + "learning_rate": 1.2174790294131405e-05, + "loss": 1.2889, + "step": 8943 + }, + { + "epoch": 0.9434599156118143, + "grad_norm": 0.6617560982704163, + "learning_rate": 1.2129778780589823e-05, + "loss": 1.3232, + "step": 8944 + }, + { + "epoch": 0.9435654008438819, + "grad_norm": 0.6266298294067383, + "learning_rate": 1.2084849949085791e-05, + "loss": 1.2819, + "step": 8945 + }, + { + "epoch": 0.9436708860759494, + "grad_norm": 0.6783742904663086, + "learning_rate": 1.2040003804653864e-05, + "loss": 1.2956, + "step": 8946 + }, + { + "epoch": 0.9437763713080168, + "grad_norm": 0.6271641254425049, + "learning_rate": 1.199524035231936e-05, + "loss": 1.2798, + "step": 8947 + }, + { + "epoch": 0.9438818565400844, + "grad_norm": 0.63860023021698, + "learning_rate": 1.195055959709826e-05, + "loss": 1.2929, + "step": 8948 + }, + { + "epoch": 0.9439873417721519, + "grad_norm": 0.6969582438468933, + "learning_rate": 1.1905961543997147e-05, + "loss": 1.3228, + "step": 8949 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.6074025630950928, + "learning_rate": 1.186144619801352e-05, + "loss": 1.2928, + "step": 8950 + }, + { + "epoch": 0.9441983122362869, + "grad_norm": 0.6289506554603577, + "learning_rate": 1.1817013564135475e-05, + "loss": 1.2777, + "step": 8951 + }, + { + "epoch": 0.9443037974683545, + "grad_norm": 0.6431355476379395, + "learning_rate": 1.1772663647341947e-05, + "loss": 1.2966, + "step": 8952 + }, + { + "epoch": 0.9444092827004219, + "grad_norm": 0.6640464663505554, + "learning_rate": 1.1728396452602708e-05, + "loss": 1.3158, + "step": 8953 + }, + { + "epoch": 0.9445147679324895, + "grad_norm": 0.6774585843086243, + "learning_rate": 1.1684211984877957e-05, + "loss": 1.2924, + "step": 8954 + }, + { + "epoch": 0.944620253164557, + "grad_norm": 0.6292241215705872, + "learning_rate": 1.1640110249118818e-05, + "loss": 1.3277, + "step": 8955 + }, + { + "epoch": 0.9447257383966244, + "grad_norm": 0.6690202355384827, + "learning_rate": 1.1596091250267171e-05, + "loss": 1.2829, + "step": 8956 + }, + { + "epoch": 0.944831223628692, + "grad_norm": 0.6799870133399963, + "learning_rate": 1.1552154993255488e-05, + "loss": 1.3061, + "step": 8957 + }, + { + "epoch": 0.9449367088607595, + "grad_norm": 0.6180528402328491, + "learning_rate": 1.1508301483007078e-05, + "loss": 1.2649, + "step": 8958 + }, + { + "epoch": 0.945042194092827, + "grad_norm": 0.6245583295822144, + "learning_rate": 1.1464530724435928e-05, + "loss": 1.2757, + "step": 8959 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.6303438544273376, + "learning_rate": 1.14208427224467e-05, + "loss": 1.3068, + "step": 8960 + }, + { + "epoch": 0.9452531645569621, + "grad_norm": 0.6522407531738281, + "learning_rate": 1.137723748193506e-05, + "loss": 1.279, + "step": 8961 + }, + { + "epoch": 0.9453586497890295, + "grad_norm": 0.6364803910255432, + "learning_rate": 1.1333715007786932e-05, + "loss": 1.2873, + "step": 8962 + }, + { + "epoch": 0.945464135021097, + "grad_norm": 0.6227708458900452, + "learning_rate": 1.12902753048795e-05, + "loss": 1.3043, + "step": 8963 + }, + { + "epoch": 0.9455696202531646, + "grad_norm": 0.6719104647636414, + "learning_rate": 1.1246918378080202e-05, + "loss": 1.2876, + "step": 8964 + }, + { + "epoch": 0.945675105485232, + "grad_norm": 0.6841028928756714, + "learning_rate": 1.12036442322474e-05, + "loss": 1.3079, + "step": 8965 + }, + { + "epoch": 0.9457805907172996, + "grad_norm": 0.6629080176353455, + "learning_rate": 1.1160452872230303e-05, + "loss": 1.335, + "step": 8966 + }, + { + "epoch": 0.9458860759493671, + "grad_norm": 0.6472931504249573, + "learning_rate": 1.111734430286862e-05, + "loss": 1.2541, + "step": 8967 + }, + { + "epoch": 0.9459915611814346, + "grad_norm": 0.7966212034225464, + "learning_rate": 1.1074318528992905e-05, + "loss": 1.2972, + "step": 8968 + }, + { + "epoch": 0.9460970464135021, + "grad_norm": 0.6411071419715881, + "learning_rate": 1.1031375555424466e-05, + "loss": 1.3095, + "step": 8969 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.6964038610458374, + "learning_rate": 1.0988515386975206e-05, + "loss": 1.2943, + "step": 8970 + }, + { + "epoch": 0.9463080168776371, + "grad_norm": 0.6495730876922607, + "learning_rate": 1.0945738028447783e-05, + "loss": 1.3059, + "step": 8971 + }, + { + "epoch": 0.9464135021097047, + "grad_norm": 0.6218165159225464, + "learning_rate": 1.0903043484635694e-05, + "loss": 1.2679, + "step": 8972 + }, + { + "epoch": 0.9465189873417722, + "grad_norm": 0.7002026438713074, + "learning_rate": 1.0860431760323032e-05, + "loss": 1.2457, + "step": 8973 + }, + { + "epoch": 0.9466244725738396, + "grad_norm": 0.6328230500221252, + "learning_rate": 1.0817902860284723e-05, + "loss": 1.2811, + "step": 8974 + }, + { + "epoch": 0.9467299578059072, + "grad_norm": 0.6602873206138611, + "learning_rate": 1.0775456789286291e-05, + "loss": 1.3193, + "step": 8975 + }, + { + "epoch": 0.9468354430379747, + "grad_norm": 0.6682476997375488, + "learning_rate": 1.0733093552084016e-05, + "loss": 1.3088, + "step": 8976 + }, + { + "epoch": 0.9469409282700422, + "grad_norm": 0.6591200232505798, + "learning_rate": 1.0690813153425016e-05, + "loss": 1.2875, + "step": 8977 + }, + { + "epoch": 0.9470464135021097, + "grad_norm": 0.6394771933555603, + "learning_rate": 1.0648615598046834e-05, + "loss": 1.3173, + "step": 8978 + }, + { + "epoch": 0.9471518987341773, + "grad_norm": 0.6557291150093079, + "learning_rate": 1.0606500890678023e-05, + "loss": 1.2573, + "step": 8979 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.7013487219810486, + "learning_rate": 1.0564469036037722e-05, + "loss": 1.3111, + "step": 8980 + }, + { + "epoch": 0.9473628691983123, + "grad_norm": 0.6661459803581238, + "learning_rate": 1.0522520038835831e-05, + "loss": 1.2965, + "step": 8981 + }, + { + "epoch": 0.9474683544303798, + "grad_norm": 0.6501221656799316, + "learning_rate": 1.0480653903772924e-05, + "loss": 1.2772, + "step": 8982 + }, + { + "epoch": 0.9475738396624472, + "grad_norm": 0.656578540802002, + "learning_rate": 1.0438870635540332e-05, + "loss": 1.3131, + "step": 8983 + }, + { + "epoch": 0.9476793248945148, + "grad_norm": 0.6982983350753784, + "learning_rate": 1.0397170238820142e-05, + "loss": 1.3101, + "step": 8984 + }, + { + "epoch": 0.9477848101265823, + "grad_norm": 0.6149391531944275, + "learning_rate": 1.0355552718284949e-05, + "loss": 1.3126, + "step": 8985 + }, + { + "epoch": 0.9478902953586498, + "grad_norm": 0.6242126822471619, + "learning_rate": 1.0314018078598275e-05, + "loss": 1.2918, + "step": 8986 + }, + { + "epoch": 0.9479957805907173, + "grad_norm": 0.6372401714324951, + "learning_rate": 1.0272566324414313e-05, + "loss": 1.2896, + "step": 8987 + }, + { + "epoch": 0.9481012658227848, + "grad_norm": 0.6471521258354187, + "learning_rate": 1.0231197460377845e-05, + "loss": 1.2819, + "step": 8988 + }, + { + "epoch": 0.9482067510548523, + "grad_norm": 0.6353458166122437, + "learning_rate": 1.0189911491124582e-05, + "loss": 1.2929, + "step": 8989 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.6495447158813477, + "learning_rate": 1.0148708421280822e-05, + "loss": 1.2811, + "step": 8990 + }, + { + "epoch": 0.9484177215189873, + "grad_norm": 0.6442285180091858, + "learning_rate": 1.0107588255463373e-05, + "loss": 1.2611, + "step": 8991 + }, + { + "epoch": 0.9485232067510548, + "grad_norm": 0.7421669960021973, + "learning_rate": 1.0066550998280132e-05, + "loss": 1.2669, + "step": 8992 + }, + { + "epoch": 0.9486286919831224, + "grad_norm": 0.6400871872901917, + "learning_rate": 1.0025596654329504e-05, + "loss": 1.2815, + "step": 8993 + }, + { + "epoch": 0.9487341772151898, + "grad_norm": 0.657497763633728, + "learning_rate": 9.984725228200654e-06, + "loss": 1.2871, + "step": 8994 + }, + { + "epoch": 0.9488396624472574, + "grad_norm": 0.62260901927948, + "learning_rate": 9.943936724473412e-06, + "loss": 1.3388, + "step": 8995 + }, + { + "epoch": 0.9489451476793249, + "grad_norm": 0.6164034605026245, + "learning_rate": 9.903231147718294e-06, + "loss": 1.2702, + "step": 8996 + }, + { + "epoch": 0.9490506329113924, + "grad_norm": 0.6391909718513489, + "learning_rate": 9.862608502496568e-06, + "loss": 1.293, + "step": 8997 + }, + { + "epoch": 0.9491561181434599, + "grad_norm": 0.6082350611686707, + "learning_rate": 9.822068793360172e-06, + "loss": 1.3109, + "step": 8998 + }, + { + "epoch": 0.9492616033755275, + "grad_norm": 0.6453732252120972, + "learning_rate": 9.781612024851893e-06, + "loss": 1.2899, + "step": 8999 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.6176613569259644, + "learning_rate": 9.74123820150502e-06, + "loss": 1.2744, + "step": 9000 + }, + { + "epoch": 0.9494725738396624, + "grad_norm": 0.6334360241889954, + "learning_rate": 9.700947327843685e-06, + "loss": 1.2914, + "step": 9001 + }, + { + "epoch": 0.94957805907173, + "grad_norm": 0.6836419105529785, + "learning_rate": 9.660739408382608e-06, + "loss": 1.2981, + "step": 9002 + }, + { + "epoch": 0.9496835443037974, + "grad_norm": 0.6452469825744629, + "learning_rate": 9.620614447627435e-06, + "loss": 1.303, + "step": 9003 + }, + { + "epoch": 0.949789029535865, + "grad_norm": 0.6374314427375793, + "learning_rate": 9.580572450074237e-06, + "loss": 1.2697, + "step": 9004 + }, + { + "epoch": 0.9498945147679325, + "grad_norm": 0.6346445679664612, + "learning_rate": 9.540613420209927e-06, + "loss": 1.2472, + "step": 9005 + }, + { + "epoch": 0.95, + "grad_norm": 0.6765795946121216, + "learning_rate": 9.500737362512168e-06, + "loss": 1.2848, + "step": 9006 + }, + { + "epoch": 0.9501054852320675, + "grad_norm": 0.6407663226127625, + "learning_rate": 9.460944281449307e-06, + "loss": 1.3106, + "step": 9007 + }, + { + "epoch": 0.950210970464135, + "grad_norm": 0.7086664438247681, + "learning_rate": 9.421234181480275e-06, + "loss": 1.3106, + "step": 9008 + }, + { + "epoch": 0.9503164556962025, + "grad_norm": 0.622385561466217, + "learning_rate": 9.381607067054764e-06, + "loss": 1.2739, + "step": 9009 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.6910991668701172, + "learning_rate": 9.342062942613222e-06, + "loss": 1.2805, + "step": 9010 + }, + { + "epoch": 0.9505274261603376, + "grad_norm": 0.6641107201576233, + "learning_rate": 9.302601812586852e-06, + "loss": 1.262, + "step": 9011 + }, + { + "epoch": 0.950632911392405, + "grad_norm": 0.6506165266036987, + "learning_rate": 9.26322368139737e-06, + "loss": 1.3045, + "step": 9012 + }, + { + "epoch": 0.9507383966244726, + "grad_norm": 0.643765926361084, + "learning_rate": 9.223928553457328e-06, + "loss": 1.3032, + "step": 9013 + }, + { + "epoch": 0.9508438818565401, + "grad_norm": 0.6980738043785095, + "learning_rate": 9.184716433169955e-06, + "loss": 1.2952, + "step": 9014 + }, + { + "epoch": 0.9509493670886076, + "grad_norm": 0.7325343489646912, + "learning_rate": 9.145587324929066e-06, + "loss": 1.2811, + "step": 9015 + }, + { + "epoch": 0.9510548523206751, + "grad_norm": 0.6427857875823975, + "learning_rate": 9.106541233119409e-06, + "loss": 1.2391, + "step": 9016 + }, + { + "epoch": 0.9511603375527427, + "grad_norm": 0.6501355171203613, + "learning_rate": 9.06757816211623e-06, + "loss": 1.2627, + "step": 9017 + }, + { + "epoch": 0.9512658227848101, + "grad_norm": 0.6207700967788696, + "learning_rate": 9.028698116285538e-06, + "loss": 1.2961, + "step": 9018 + }, + { + "epoch": 0.9513713080168776, + "grad_norm": 0.6164586544036865, + "learning_rate": 8.989901099984016e-06, + "loss": 1.2891, + "step": 9019 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.6522576808929443, + "learning_rate": 8.9511871175591e-06, + "loss": 1.3341, + "step": 9020 + }, + { + "epoch": 0.9515822784810126, + "grad_norm": 0.643582284450531, + "learning_rate": 8.912556173348907e-06, + "loss": 1.2978, + "step": 9021 + }, + { + "epoch": 0.9516877637130802, + "grad_norm": 0.6911383271217346, + "learning_rate": 8.874008271682222e-06, + "loss": 1.3416, + "step": 9022 + }, + { + "epoch": 0.9517932489451477, + "grad_norm": 0.6485402584075928, + "learning_rate": 8.835543416878422e-06, + "loss": 1.3238, + "step": 9023 + }, + { + "epoch": 0.9518987341772152, + "grad_norm": 0.6263551115989685, + "learning_rate": 8.797161613247728e-06, + "loss": 1.2705, + "step": 9024 + }, + { + "epoch": 0.9520042194092827, + "grad_norm": 0.6547183394432068, + "learning_rate": 8.758862865091117e-06, + "loss": 1.3174, + "step": 9025 + }, + { + "epoch": 0.9521097046413503, + "grad_norm": 0.6561456322669983, + "learning_rate": 8.72064717670007e-06, + "loss": 1.3432, + "step": 9026 + }, + { + "epoch": 0.9522151898734177, + "grad_norm": 0.6592156887054443, + "learning_rate": 8.68251455235683e-06, + "loss": 1.2825, + "step": 9027 + }, + { + "epoch": 0.9523206751054852, + "grad_norm": 0.6280444860458374, + "learning_rate": 8.644464996334395e-06, + "loss": 1.2921, + "step": 9028 + }, + { + "epoch": 0.9524261603375528, + "grad_norm": 0.6640043258666992, + "learning_rate": 8.606498512896438e-06, + "loss": 1.3227, + "step": 9029 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.6502074003219604, + "learning_rate": 8.568615106297223e-06, + "loss": 1.2856, + "step": 9030 + }, + { + "epoch": 0.9526371308016878, + "grad_norm": 0.6029249429702759, + "learning_rate": 8.53081478078177e-06, + "loss": 1.2492, + "step": 9031 + }, + { + "epoch": 0.9527426160337553, + "grad_norm": 0.662053108215332, + "learning_rate": 8.493097540585775e-06, + "loss": 1.3213, + "step": 9032 + }, + { + "epoch": 0.9528481012658228, + "grad_norm": 0.6623145937919617, + "learning_rate": 8.455463389935774e-06, + "loss": 1.3024, + "step": 9033 + }, + { + "epoch": 0.9529535864978903, + "grad_norm": 0.6460937261581421, + "learning_rate": 8.417912333048727e-06, + "loss": 1.3246, + "step": 9034 + }, + { + "epoch": 0.9530590717299579, + "grad_norm": 0.6372278928756714, + "learning_rate": 8.380444374132517e-06, + "loss": 1.2664, + "step": 9035 + }, + { + "epoch": 0.9531645569620253, + "grad_norm": 0.6857845783233643, + "learning_rate": 8.343059517385454e-06, + "loss": 1.2504, + "step": 9036 + }, + { + "epoch": 0.9532700421940928, + "grad_norm": 0.6457746624946594, + "learning_rate": 8.305757766996935e-06, + "loss": 1.2417, + "step": 9037 + }, + { + "epoch": 0.9533755274261604, + "grad_norm": 0.6257385611534119, + "learning_rate": 8.268539127146619e-06, + "loss": 1.2786, + "step": 9038 + }, + { + "epoch": 0.9534810126582278, + "grad_norm": 0.6950221657752991, + "learning_rate": 8.231403602005083e-06, + "loss": 1.2872, + "step": 9039 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.6425091028213501, + "learning_rate": 8.194351195733585e-06, + "loss": 1.3055, + "step": 9040 + }, + { + "epoch": 0.9536919831223629, + "grad_norm": 0.655342698097229, + "learning_rate": 8.157381912484053e-06, + "loss": 1.3178, + "step": 9041 + }, + { + "epoch": 0.9537974683544304, + "grad_norm": 0.6736946702003479, + "learning_rate": 8.120495756399005e-06, + "loss": 1.3012, + "step": 9042 + }, + { + "epoch": 0.9539029535864979, + "grad_norm": 0.6512265205383301, + "learning_rate": 8.08369273161172e-06, + "loss": 1.2657, + "step": 9043 + }, + { + "epoch": 0.9540084388185655, + "grad_norm": 0.6175602078437805, + "learning_rate": 8.046972842246147e-06, + "loss": 1.2913, + "step": 9044 + }, + { + "epoch": 0.9541139240506329, + "grad_norm": 0.6951375603675842, + "learning_rate": 8.01033609241708e-06, + "loss": 1.3028, + "step": 9045 + }, + { + "epoch": 0.9542194092827004, + "grad_norm": 0.6148128509521484, + "learning_rate": 7.973782486229737e-06, + "loss": 1.2766, + "step": 9046 + }, + { + "epoch": 0.954324894514768, + "grad_norm": 0.6511274576187134, + "learning_rate": 7.937312027780169e-06, + "loss": 1.3431, + "step": 9047 + }, + { + "epoch": 0.9544303797468354, + "grad_norm": 0.6338659524917603, + "learning_rate": 7.900924721154945e-06, + "loss": 1.2747, + "step": 9048 + }, + { + "epoch": 0.954535864978903, + "grad_norm": 0.6341341733932495, + "learning_rate": 7.864620570431635e-06, + "loss": 1.2919, + "step": 9049 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.6155084371566772, + "learning_rate": 7.828399579678153e-06, + "loss": 1.2773, + "step": 9050 + }, + { + "epoch": 0.954746835443038, + "grad_norm": 0.6365568041801453, + "learning_rate": 7.792261752953333e-06, + "loss": 1.3101, + "step": 9051 + }, + { + "epoch": 0.9548523206751055, + "grad_norm": 0.6335793733596802, + "learning_rate": 7.756207094306605e-06, + "loss": 1.2935, + "step": 9052 + }, + { + "epoch": 0.9549578059071729, + "grad_norm": 0.6382148861885071, + "learning_rate": 7.720235607777987e-06, + "loss": 1.2797, + "step": 9053 + }, + { + "epoch": 0.9550632911392405, + "grad_norm": 0.649518609046936, + "learning_rate": 7.684347297398254e-06, + "loss": 1.3184, + "step": 9054 + }, + { + "epoch": 0.955168776371308, + "grad_norm": 0.6331948637962341, + "learning_rate": 7.648542167189021e-06, + "loss": 1.2751, + "step": 9055 + }, + { + "epoch": 0.9552742616033755, + "grad_norm": 0.6545992493629456, + "learning_rate": 7.612820221162331e-06, + "loss": 1.323, + "step": 9056 + }, + { + "epoch": 0.955379746835443, + "grad_norm": 0.6614078283309937, + "learning_rate": 7.577181463320981e-06, + "loss": 1.2911, + "step": 9057 + }, + { + "epoch": 0.9554852320675106, + "grad_norm": 0.6459037661552429, + "learning_rate": 7.541625897658444e-06, + "loss": 1.3048, + "step": 9058 + }, + { + "epoch": 0.955590717299578, + "grad_norm": 0.6369062066078186, + "learning_rate": 7.506153528159032e-06, + "loss": 1.276, + "step": 9059 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.6399331092834473, + "learning_rate": 7.470764358797566e-06, + "loss": 1.2765, + "step": 9060 + }, + { + "epoch": 0.9558016877637131, + "grad_norm": 0.661858081817627, + "learning_rate": 7.435458393539457e-06, + "loss": 1.317, + "step": 9061 + }, + { + "epoch": 0.9559071729957805, + "grad_norm": 0.6325413584709167, + "learning_rate": 7.400235636340957e-06, + "loss": 1.3093, + "step": 9062 + }, + { + "epoch": 0.9560126582278481, + "grad_norm": 0.6180809140205383, + "learning_rate": 7.3650960911490764e-06, + "loss": 1.2944, + "step": 9063 + }, + { + "epoch": 0.9561181434599156, + "grad_norm": 0.6436099410057068, + "learning_rate": 7.330039761901247e-06, + "loss": 1.2814, + "step": 9064 + }, + { + "epoch": 0.9562236286919831, + "grad_norm": 0.648955762386322, + "learning_rate": 7.295066652525828e-06, + "loss": 1.3172, + "step": 9065 + }, + { + "epoch": 0.9563291139240506, + "grad_norm": 0.6253346800804138, + "learning_rate": 7.260176766941601e-06, + "loss": 1.2717, + "step": 9066 + }, + { + "epoch": 0.9564345991561182, + "grad_norm": 0.638270914554596, + "learning_rate": 7.225370109058188e-06, + "loss": 1.3136, + "step": 9067 + }, + { + "epoch": 0.9565400843881856, + "grad_norm": 0.611098051071167, + "learning_rate": 7.190646682775886e-06, + "loss": 1.2839, + "step": 9068 + }, + { + "epoch": 0.9566455696202532, + "grad_norm": 0.6453043818473816, + "learning_rate": 7.1560064919855835e-06, + "loss": 1.296, + "step": 9069 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.6397384405136108, + "learning_rate": 7.121449540568842e-06, + "loss": 1.2715, + "step": 9070 + }, + { + "epoch": 0.9568565400843881, + "grad_norm": 0.6234082579612732, + "learning_rate": 7.086975832398146e-06, + "loss": 1.3229, + "step": 9071 + }, + { + "epoch": 0.9569620253164557, + "grad_norm": 0.69614577293396, + "learning_rate": 7.0525853713362395e-06, + "loss": 1.2715, + "step": 9072 + }, + { + "epoch": 0.9570675105485232, + "grad_norm": 0.6532238721847534, + "learning_rate": 7.018278161236791e-06, + "loss": 1.3153, + "step": 9073 + }, + { + "epoch": 0.9571729957805907, + "grad_norm": 0.6583420038223267, + "learning_rate": 6.984054205944141e-06, + "loss": 1.317, + "step": 9074 + }, + { + "epoch": 0.9572784810126582, + "grad_norm": 0.6464735865592957, + "learning_rate": 6.949913509293221e-06, + "loss": 1.2809, + "step": 9075 + }, + { + "epoch": 0.9573839662447258, + "grad_norm": 0.65342116355896, + "learning_rate": 6.915856075109722e-06, + "loss": 1.2614, + "step": 9076 + }, + { + "epoch": 0.9574894514767932, + "grad_norm": 0.6394591927528381, + "learning_rate": 6.881881907209841e-06, + "loss": 1.3204, + "step": 9077 + }, + { + "epoch": 0.9575949367088608, + "grad_norm": 0.6329336762428284, + "learning_rate": 6.847991009400617e-06, + "loss": 1.3093, + "step": 9078 + }, + { + "epoch": 0.9577004219409283, + "grad_norm": 0.6268478035926819, + "learning_rate": 6.814183385479677e-06, + "loss": 1.2538, + "step": 9079 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.6542370319366455, + "learning_rate": 6.780459039235409e-06, + "loss": 1.3194, + "step": 9080 + }, + { + "epoch": 0.9579113924050633, + "grad_norm": 0.6893998980522156, + "learning_rate": 6.746817974446706e-06, + "loss": 1.314, + "step": 9081 + }, + { + "epoch": 0.9580168776371308, + "grad_norm": 0.6976578831672668, + "learning_rate": 6.71326019488322e-06, + "loss": 1.2949, + "step": 9082 + }, + { + "epoch": 0.9581223628691983, + "grad_norm": 0.6690940856933594, + "learning_rate": 6.679785704305358e-06, + "loss": 1.2955, + "step": 9083 + }, + { + "epoch": 0.9582278481012658, + "grad_norm": 0.6316529512405396, + "learning_rate": 6.6463945064639544e-06, + "loss": 1.2913, + "step": 9084 + }, + { + "epoch": 0.9583333333333334, + "grad_norm": 0.6446517109870911, + "learning_rate": 6.6130866051007654e-06, + "loss": 1.2618, + "step": 9085 + }, + { + "epoch": 0.9584388185654008, + "grad_norm": 0.6518252491950989, + "learning_rate": 6.57986200394814e-06, + "loss": 1.2912, + "step": 9086 + }, + { + "epoch": 0.9585443037974684, + "grad_norm": 0.645938515663147, + "learning_rate": 6.546720706728931e-06, + "loss": 1.2799, + "step": 9087 + }, + { + "epoch": 0.9586497890295359, + "grad_norm": 0.640209972858429, + "learning_rate": 6.513662717156838e-06, + "loss": 1.3118, + "step": 9088 + }, + { + "epoch": 0.9587552742616033, + "grad_norm": 0.6352262496948242, + "learning_rate": 6.480688038936311e-06, + "loss": 1.2921, + "step": 9089 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.6646974682807922, + "learning_rate": 6.447796675762146e-06, + "loss": 1.3212, + "step": 9090 + }, + { + "epoch": 0.9589662447257384, + "grad_norm": 0.6403957009315491, + "learning_rate": 6.414988631320062e-06, + "loss": 1.2611, + "step": 9091 + }, + { + "epoch": 0.9590717299578059, + "grad_norm": 0.7067971229553223, + "learning_rate": 6.3822639092862846e-06, + "loss": 1.2777, + "step": 9092 + }, + { + "epoch": 0.9591772151898734, + "grad_norm": 0.6518049240112305, + "learning_rate": 6.349622513327963e-06, + "loss": 1.285, + "step": 9093 + }, + { + "epoch": 0.959282700421941, + "grad_norm": 0.6435345411300659, + "learning_rate": 6.317064447102505e-06, + "loss": 1.2627, + "step": 9094 + }, + { + "epoch": 0.9593881856540084, + "grad_norm": 0.6668466329574585, + "learning_rate": 6.28458971425841e-06, + "loss": 1.2776, + "step": 9095 + }, + { + "epoch": 0.959493670886076, + "grad_norm": 0.6636325120925903, + "learning_rate": 6.252198318434432e-06, + "loss": 1.3015, + "step": 9096 + }, + { + "epoch": 0.9595991561181435, + "grad_norm": 0.6576698422431946, + "learning_rate": 6.219890263260336e-06, + "loss": 1.3189, + "step": 9097 + }, + { + "epoch": 0.9597046413502109, + "grad_norm": 0.635312557220459, + "learning_rate": 6.187665552356392e-06, + "loss": 1.2585, + "step": 9098 + }, + { + "epoch": 0.9598101265822785, + "grad_norm": 0.6680043339729309, + "learning_rate": 6.155524189333461e-06, + "loss": 1.2345, + "step": 9099 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.6865218877792358, + "learning_rate": 6.123466177793247e-06, + "loss": 1.2895, + "step": 9100 + }, + { + "epoch": 0.9600210970464135, + "grad_norm": 0.5827442407608032, + "learning_rate": 6.091491521327958e-06, + "loss": 1.3053, + "step": 9101 + }, + { + "epoch": 0.960126582278481, + "grad_norm": 0.6497006416320801, + "learning_rate": 6.059600223520478e-06, + "loss": 1.2805, + "step": 9102 + }, + { + "epoch": 0.9602320675105486, + "grad_norm": 0.657821536064148, + "learning_rate": 6.027792287944367e-06, + "loss": 1.2777, + "step": 9103 + }, + { + "epoch": 0.960337552742616, + "grad_norm": 0.6253600716590881, + "learning_rate": 5.996067718163939e-06, + "loss": 1.2934, + "step": 9104 + }, + { + "epoch": 0.9604430379746836, + "grad_norm": 0.6738739013671875, + "learning_rate": 5.964426517734101e-06, + "loss": 1.2869, + "step": 9105 + }, + { + "epoch": 0.9605485232067511, + "grad_norm": 0.6520026326179504, + "learning_rate": 5.932868690200266e-06, + "loss": 1.3074, + "step": 9106 + }, + { + "epoch": 0.9606540084388185, + "grad_norm": 0.6210452318191528, + "learning_rate": 5.901394239098856e-06, + "loss": 1.2821, + "step": 9107 + }, + { + "epoch": 0.9607594936708861, + "grad_norm": 0.6183045506477356, + "learning_rate": 5.870003167956634e-06, + "loss": 1.2658, + "step": 9108 + }, + { + "epoch": 0.9608649789029536, + "grad_norm": 0.637030839920044, + "learning_rate": 5.838695480291034e-06, + "loss": 1.3102, + "step": 9109 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.6395982503890991, + "learning_rate": 5.807471179610418e-06, + "loss": 1.27, + "step": 9110 + }, + { + "epoch": 0.9610759493670886, + "grad_norm": 0.6139439344406128, + "learning_rate": 5.776330269413488e-06, + "loss": 1.2957, + "step": 9111 + }, + { + "epoch": 0.9611814345991562, + "grad_norm": 0.6462023258209229, + "learning_rate": 5.745272753189784e-06, + "loss": 1.2957, + "step": 9112 + }, + { + "epoch": 0.9612869198312236, + "grad_norm": 0.648419201374054, + "learning_rate": 5.714298634419524e-06, + "loss": 1.277, + "step": 9113 + }, + { + "epoch": 0.9613924050632912, + "grad_norm": 0.6271203756332397, + "learning_rate": 5.6834079165733464e-06, + "loss": 1.2906, + "step": 9114 + }, + { + "epoch": 0.9614978902953587, + "grad_norm": 0.6451466083526611, + "learning_rate": 5.652600603112818e-06, + "loss": 1.2947, + "step": 9115 + }, + { + "epoch": 0.9616033755274261, + "grad_norm": 0.6648434996604919, + "learning_rate": 5.6218766974900915e-06, + "loss": 1.3342, + "step": 9116 + }, + { + "epoch": 0.9617088607594937, + "grad_norm": 0.6491971611976624, + "learning_rate": 5.591236203147915e-06, + "loss": 1.2997, + "step": 9117 + }, + { + "epoch": 0.9618143459915611, + "grad_norm": 0.6548794507980347, + "learning_rate": 5.560679123519624e-06, + "loss": 1.3308, + "step": 9118 + }, + { + "epoch": 0.9619198312236287, + "grad_norm": 0.7028245329856873, + "learning_rate": 5.530205462029314e-06, + "loss": 1.3149, + "step": 9119 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.6231363415718079, + "learning_rate": 5.499815222091836e-06, + "loss": 1.2412, + "step": 9120 + }, + { + "epoch": 0.9621308016877637, + "grad_norm": 0.655929446220398, + "learning_rate": 5.469508407112467e-06, + "loss": 1.3001, + "step": 9121 + }, + { + "epoch": 0.9622362869198312, + "grad_norm": 0.6573889255523682, + "learning_rate": 5.439285020487156e-06, + "loss": 1.27, + "step": 9122 + }, + { + "epoch": 0.9623417721518988, + "grad_norm": 0.6673445105552673, + "learning_rate": 5.409145065602694e-06, + "loss": 1.2913, + "step": 9123 + }, + { + "epoch": 0.9624472573839662, + "grad_norm": 0.6208800077438354, + "learning_rate": 5.379088545836464e-06, + "loss": 1.2684, + "step": 9124 + }, + { + "epoch": 0.9625527426160337, + "grad_norm": 0.6374074220657349, + "learning_rate": 5.349115464556354e-06, + "loss": 1.2671, + "step": 9125 + }, + { + "epoch": 0.9626582278481013, + "grad_norm": 0.6701366901397705, + "learning_rate": 5.319225825120927e-06, + "loss": 1.263, + "step": 9126 + }, + { + "epoch": 0.9627637130801687, + "grad_norm": 0.6460908055305481, + "learning_rate": 5.289419630879672e-06, + "loss": 1.2925, + "step": 9127 + }, + { + "epoch": 0.9628691983122363, + "grad_norm": 0.6532015800476074, + "learning_rate": 5.2596968851724155e-06, + "loss": 1.2617, + "step": 9128 + }, + { + "epoch": 0.9629746835443038, + "grad_norm": 0.6502644419670105, + "learning_rate": 5.230057591329662e-06, + "loss": 1.2549, + "step": 9129 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.695527195930481, + "learning_rate": 5.200501752672754e-06, + "loss": 1.346, + "step": 9130 + }, + { + "epoch": 0.9631856540084388, + "grad_norm": 0.6516790390014648, + "learning_rate": 5.171029372513458e-06, + "loss": 1.3112, + "step": 9131 + }, + { + "epoch": 0.9632911392405064, + "grad_norm": 0.6298621892929077, + "learning_rate": 5.141640454154467e-06, + "loss": 1.3312, + "step": 9132 + }, + { + "epoch": 0.9633966244725738, + "grad_norm": 0.7105998992919922, + "learning_rate": 5.112335000888813e-06, + "loss": 1.3446, + "step": 9133 + }, + { + "epoch": 0.9635021097046413, + "grad_norm": 0.6330274343490601, + "learning_rate": 5.083113016000368e-06, + "loss": 1.2637, + "step": 9134 + }, + { + "epoch": 0.9636075949367089, + "grad_norm": 0.6531079411506653, + "learning_rate": 5.053974502763681e-06, + "loss": 1.2767, + "step": 9135 + }, + { + "epoch": 0.9637130801687763, + "grad_norm": 0.6188520193099976, + "learning_rate": 5.024919464443723e-06, + "loss": 1.2977, + "step": 9136 + }, + { + "epoch": 0.9638185654008439, + "grad_norm": 0.6359450817108154, + "learning_rate": 4.995947904296305e-06, + "loss": 1.2958, + "step": 9137 + }, + { + "epoch": 0.9639240506329114, + "grad_norm": 0.6263090968132019, + "learning_rate": 4.967059825567832e-06, + "loss": 1.2926, + "step": 9138 + }, + { + "epoch": 0.9640295358649789, + "grad_norm": 0.675390362739563, + "learning_rate": 4.938255231495464e-06, + "loss": 1.3031, + "step": 9139 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.6897875666618347, + "learning_rate": 4.909534125306702e-06, + "loss": 1.2992, + "step": 9140 + }, + { + "epoch": 0.964240506329114, + "grad_norm": 0.7634959816932678, + "learning_rate": 4.880896510220056e-06, + "loss": 1.2721, + "step": 9141 + }, + { + "epoch": 0.9643459915611814, + "grad_norm": 0.6295509338378906, + "learning_rate": 4.852342389444458e-06, + "loss": 1.2795, + "step": 9142 + }, + { + "epoch": 0.9644514767932489, + "grad_norm": 0.6360350847244263, + "learning_rate": 4.823871766179516e-06, + "loss": 1.2917, + "step": 9143 + }, + { + "epoch": 0.9645569620253165, + "grad_norm": 0.6414978504180908, + "learning_rate": 4.7954846436155104e-06, + "loss": 1.2986, + "step": 9144 + }, + { + "epoch": 0.9646624472573839, + "grad_norm": 0.6269999742507935, + "learning_rate": 4.767181024933398e-06, + "loss": 1.2775, + "step": 9145 + }, + { + "epoch": 0.9647679324894515, + "grad_norm": 0.6771408319473267, + "learning_rate": 4.738960913304724e-06, + "loss": 1.3033, + "step": 9146 + }, + { + "epoch": 0.964873417721519, + "grad_norm": 0.6466595530509949, + "learning_rate": 4.710824311891709e-06, + "loss": 1.3138, + "step": 9147 + }, + { + "epoch": 0.9649789029535865, + "grad_norm": 0.6468120813369751, + "learning_rate": 4.682771223847166e-06, + "loss": 1.2831, + "step": 9148 + }, + { + "epoch": 0.965084388185654, + "grad_norm": 0.6507291793823242, + "learning_rate": 4.654801652314577e-06, + "loss": 1.2949, + "step": 9149 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.6576620936393738, + "learning_rate": 4.626915600428105e-06, + "loss": 1.2782, + "step": 9150 + }, + { + "epoch": 0.965295358649789, + "grad_norm": 0.6714293956756592, + "learning_rate": 4.5991130713124995e-06, + "loss": 1.261, + "step": 9151 + }, + { + "epoch": 0.9654008438818565, + "grad_norm": 0.6469624042510986, + "learning_rate": 4.571394068083185e-06, + "loss": 1.3195, + "step": 9152 + }, + { + "epoch": 0.9655063291139241, + "grad_norm": 0.6603587865829468, + "learning_rate": 4.543758593846175e-06, + "loss": 1.3135, + "step": 9153 + }, + { + "epoch": 0.9656118143459915, + "grad_norm": 0.6387156844139099, + "learning_rate": 4.516206651698246e-06, + "loss": 1.3031, + "step": 9154 + }, + { + "epoch": 0.9657172995780591, + "grad_norm": 0.6129004955291748, + "learning_rate": 4.488738244726593e-06, + "loss": 1.3091, + "step": 9155 + }, + { + "epoch": 0.9658227848101266, + "grad_norm": 0.6350065469741821, + "learning_rate": 4.4613533760093365e-06, + "loss": 1.2843, + "step": 9156 + }, + { + "epoch": 0.9659282700421941, + "grad_norm": 0.6633793115615845, + "learning_rate": 4.434052048615022e-06, + "loss": 1.2806, + "step": 9157 + }, + { + "epoch": 0.9660337552742616, + "grad_norm": 0.6701376438140869, + "learning_rate": 4.4068342656028715e-06, + "loss": 1.3199, + "step": 9158 + }, + { + "epoch": 0.9661392405063292, + "grad_norm": 0.6642991900444031, + "learning_rate": 4.37970003002286e-06, + "loss": 1.2715, + "step": 9159 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.6331567168235779, + "learning_rate": 4.352649344915471e-06, + "loss": 1.3037, + "step": 9160 + }, + { + "epoch": 0.9663502109704641, + "grad_norm": 0.6198595762252808, + "learning_rate": 4.325682213311782e-06, + "loss": 1.2498, + "step": 9161 + }, + { + "epoch": 0.9664556962025317, + "grad_norm": 0.6437236070632935, + "learning_rate": 4.298798638233709e-06, + "loss": 1.2809, + "step": 9162 + }, + { + "epoch": 0.9665611814345991, + "grad_norm": 0.664297878742218, + "learning_rate": 4.271998622693674e-06, + "loss": 1.2694, + "step": 9163 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 0.6923469305038452, + "learning_rate": 4.245282169694692e-06, + "loss": 1.3373, + "step": 9164 + }, + { + "epoch": 0.9667721518987342, + "grad_norm": 0.6318064332008362, + "learning_rate": 4.218649282230536e-06, + "loss": 1.3391, + "step": 9165 + }, + { + "epoch": 0.9668776371308017, + "grad_norm": 0.7040508985519409, + "learning_rate": 4.192099963285484e-06, + "loss": 1.3148, + "step": 9166 + }, + { + "epoch": 0.9669831223628692, + "grad_norm": 0.63563072681427, + "learning_rate": 4.165634215834574e-06, + "loss": 1.3004, + "step": 9167 + }, + { + "epoch": 0.9670886075949368, + "grad_norm": 0.6669729948043823, + "learning_rate": 4.139252042843517e-06, + "loss": 1.2918, + "step": 9168 + }, + { + "epoch": 0.9671940928270042, + "grad_norm": 0.6131215691566467, + "learning_rate": 4.112953447268364e-06, + "loss": 1.3221, + "step": 9169 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.6513232588768005, + "learning_rate": 4.086738432056092e-06, + "loss": 1.2606, + "step": 9170 + }, + { + "epoch": 0.9674050632911393, + "grad_norm": 0.6658788919448853, + "learning_rate": 4.060607000144351e-06, + "loss": 1.2511, + "step": 9171 + }, + { + "epoch": 0.9675105485232067, + "grad_norm": 0.6517140865325928, + "learning_rate": 4.034559154461049e-06, + "loss": 1.3365, + "step": 9172 + }, + { + "epoch": 0.9676160337552743, + "grad_norm": 0.6517356634140015, + "learning_rate": 4.008594897925183e-06, + "loss": 1.3126, + "step": 9173 + }, + { + "epoch": 0.9677215189873418, + "grad_norm": 0.6820710301399231, + "learning_rate": 3.982714233446094e-06, + "loss": 1.293, + "step": 9174 + }, + { + "epoch": 0.9678270042194093, + "grad_norm": 0.6385257840156555, + "learning_rate": 3.956917163923879e-06, + "loss": 1.2785, + "step": 9175 + }, + { + "epoch": 0.9679324894514768, + "grad_norm": 0.6371485590934753, + "learning_rate": 3.931203692249141e-06, + "loss": 1.2695, + "step": 9176 + }, + { + "epoch": 0.9680379746835444, + "grad_norm": 0.6403883695602417, + "learning_rate": 3.905573821303327e-06, + "loss": 1.2985, + "step": 9177 + }, + { + "epoch": 0.9681434599156118, + "grad_norm": 0.644416093826294, + "learning_rate": 3.880027553958304e-06, + "loss": 1.3169, + "step": 9178 + }, + { + "epoch": 0.9682489451476793, + "grad_norm": 0.6472262144088745, + "learning_rate": 3.8545648930767005e-06, + "loss": 1.3018, + "step": 9179 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.6423091292381287, + "learning_rate": 3.8291858415117344e-06, + "loss": 1.2662, + "step": 9180 + }, + { + "epoch": 0.9684599156118143, + "grad_norm": 0.6689353585243225, + "learning_rate": 3.803890402107213e-06, + "loss": 1.2996, + "step": 9181 + }, + { + "epoch": 0.9685654008438819, + "grad_norm": 0.6649501919746399, + "learning_rate": 3.7786785776976198e-06, + "loss": 1.2868, + "step": 9182 + }, + { + "epoch": 0.9686708860759494, + "grad_norm": 0.625525712966919, + "learning_rate": 3.7535503711080276e-06, + "loss": 1.214, + "step": 9183 + }, + { + "epoch": 0.9687763713080169, + "grad_norm": 0.7053998112678528, + "learning_rate": 3.7285057851543515e-06, + "loss": 1.3071, + "step": 9184 + }, + { + "epoch": 0.9688818565400844, + "grad_norm": 0.6995223760604858, + "learning_rate": 3.703544822642846e-06, + "loss": 1.3035, + "step": 9185 + }, + { + "epoch": 0.9689873417721518, + "grad_norm": 0.6531097292900085, + "learning_rate": 3.6786674863704406e-06, + "loss": 1.2789, + "step": 9186 + }, + { + "epoch": 0.9690928270042194, + "grad_norm": 0.6362310647964478, + "learning_rate": 3.6538737791249053e-06, + "loss": 1.3038, + "step": 9187 + }, + { + "epoch": 0.9691983122362869, + "grad_norm": 0.6245777606964111, + "learning_rate": 3.629163703684352e-06, + "loss": 1.3068, + "step": 9188 + }, + { + "epoch": 0.9693037974683544, + "grad_norm": 0.6433390378952026, + "learning_rate": 3.604537262817814e-06, + "loss": 1.3143, + "step": 9189 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.6272900700569153, + "learning_rate": 3.579994459284752e-06, + "loss": 1.2673, + "step": 9190 + }, + { + "epoch": 0.9695147679324895, + "grad_norm": 0.6630595922470093, + "learning_rate": 3.555535295835216e-06, + "loss": 1.3275, + "step": 9191 + }, + { + "epoch": 0.9696202531645569, + "grad_norm": 0.6326593160629272, + "learning_rate": 3.5311597752100964e-06, + "loss": 1.2885, + "step": 9192 + }, + { + "epoch": 0.9697257383966245, + "grad_norm": 0.6317233443260193, + "learning_rate": 3.506867900140792e-06, + "loss": 1.2693, + "step": 9193 + }, + { + "epoch": 0.969831223628692, + "grad_norm": 0.6475933194160461, + "learning_rate": 3.4826596733492087e-06, + "loss": 1.2994, + "step": 9194 + }, + { + "epoch": 0.9699367088607594, + "grad_norm": 0.6918490529060364, + "learning_rate": 3.4585350975481766e-06, + "loss": 1.2785, + "step": 9195 + }, + { + "epoch": 0.970042194092827, + "grad_norm": 0.6317195296287537, + "learning_rate": 3.4344941754408663e-06, + "loss": 1.2946, + "step": 9196 + }, + { + "epoch": 0.9701476793248945, + "grad_norm": 0.6485316157341003, + "learning_rate": 3.4105369097211238e-06, + "loss": 1.2965, + "step": 9197 + }, + { + "epoch": 0.970253164556962, + "grad_norm": 0.6291550993919373, + "learning_rate": 3.386663303073634e-06, + "loss": 1.2629, + "step": 9198 + }, + { + "epoch": 0.9703586497890295, + "grad_norm": 0.6634153127670288, + "learning_rate": 3.362873358173424e-06, + "loss": 1.3211, + "step": 9199 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.7121901512145996, + "learning_rate": 3.339167077686278e-06, + "loss": 1.2624, + "step": 9200 + }, + { + "epoch": 0.9705696202531645, + "grad_norm": 0.6568794846534729, + "learning_rate": 3.3155444642687384e-06, + "loss": 1.2976, + "step": 9201 + }, + { + "epoch": 0.9706751054852321, + "grad_norm": 0.6485229134559631, + "learning_rate": 3.2920055205676867e-06, + "loss": 1.3109, + "step": 9202 + }, + { + "epoch": 0.9707805907172996, + "grad_norm": 0.6110551357269287, + "learning_rate": 3.2685502492208475e-06, + "loss": 1.2627, + "step": 9203 + }, + { + "epoch": 0.970886075949367, + "grad_norm": 0.6375066637992859, + "learning_rate": 3.245178652856534e-06, + "loss": 1.3047, + "step": 9204 + }, + { + "epoch": 0.9709915611814346, + "grad_norm": 0.632014811038971, + "learning_rate": 3.221890734093569e-06, + "loss": 1.3176, + "step": 9205 + }, + { + "epoch": 0.9710970464135021, + "grad_norm": 0.6381463408470154, + "learning_rate": 3.198686495541531e-06, + "loss": 1.2594, + "step": 9206 + }, + { + "epoch": 0.9712025316455696, + "grad_norm": 0.6620228290557861, + "learning_rate": 3.1755659398005066e-06, + "loss": 1.3067, + "step": 9207 + }, + { + "epoch": 0.9713080168776371, + "grad_norm": 0.6467686295509338, + "learning_rate": 3.152529069461424e-06, + "loss": 1.2783, + "step": 9208 + }, + { + "epoch": 0.9714135021097047, + "grad_norm": 0.6449750065803528, + "learning_rate": 3.129575887105468e-06, + "loss": 1.2674, + "step": 9209 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.6378680467605591, + "learning_rate": 3.1067063953048313e-06, + "loss": 1.2815, + "step": 9210 + }, + { + "epoch": 0.9716244725738397, + "grad_norm": 0.6598891019821167, + "learning_rate": 3.0839205966220474e-06, + "loss": 1.2985, + "step": 9211 + }, + { + "epoch": 0.9717299578059072, + "grad_norm": 0.6581178903579712, + "learning_rate": 3.06121849361049e-06, + "loss": 1.3121, + "step": 9212 + }, + { + "epoch": 0.9718354430379746, + "grad_norm": 0.6305561661720276, + "learning_rate": 3.0386000888139588e-06, + "loss": 1.326, + "step": 9213 + }, + { + "epoch": 0.9719409282700422, + "grad_norm": 0.6378142237663269, + "learning_rate": 3.0160653847669252e-06, + "loss": 1.2765, + "step": 9214 + }, + { + "epoch": 0.9720464135021097, + "grad_norm": 0.671758770942688, + "learning_rate": 2.9936143839946193e-06, + "loss": 1.2815, + "step": 9215 + }, + { + "epoch": 0.9721518987341772, + "grad_norm": 0.6555628180503845, + "learning_rate": 2.9712470890126962e-06, + "loss": 1.2658, + "step": 9216 + }, + { + "epoch": 0.9722573839662447, + "grad_norm": 0.6423410177230835, + "learning_rate": 2.9489635023275676e-06, + "loss": 1.2925, + "step": 9217 + }, + { + "epoch": 0.9723628691983123, + "grad_norm": 0.6470122933387756, + "learning_rate": 2.9267636264361517e-06, + "loss": 1.2548, + "step": 9218 + }, + { + "epoch": 0.9724683544303797, + "grad_norm": 0.6558071374893188, + "learning_rate": 2.90464746382621e-06, + "loss": 1.3004, + "step": 9219 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.6364114284515381, + "learning_rate": 2.8826150169758425e-06, + "loss": 1.2942, + "step": 9220 + }, + { + "epoch": 0.9726793248945148, + "grad_norm": 0.6424758434295654, + "learning_rate": 2.8606662883539082e-06, + "loss": 1.3078, + "step": 9221 + }, + { + "epoch": 0.9727848101265822, + "grad_norm": 0.6306723952293396, + "learning_rate": 2.838801280419856e-06, + "loss": 1.2608, + "step": 9222 + }, + { + "epoch": 0.9728902953586498, + "grad_norm": 0.6402534246444702, + "learning_rate": 2.817019995623893e-06, + "loss": 1.2906, + "step": 9223 + }, + { + "epoch": 0.9729957805907173, + "grad_norm": 0.7116954326629639, + "learning_rate": 2.7953224364065667e-06, + "loss": 1.2745, + "step": 9224 + }, + { + "epoch": 0.9731012658227848, + "grad_norm": 0.6322745680809021, + "learning_rate": 2.7737086051992653e-06, + "loss": 1.2776, + "step": 9225 + }, + { + "epoch": 0.9732067510548523, + "grad_norm": 0.7115598320960999, + "learning_rate": 2.752178504423969e-06, + "loss": 1.2903, + "step": 9226 + }, + { + "epoch": 0.9733122362869199, + "grad_norm": 0.6404543519020081, + "learning_rate": 2.7307321364930804e-06, + "loss": 1.2572, + "step": 9227 + }, + { + "epoch": 0.9734177215189873, + "grad_norm": 0.6922749280929565, + "learning_rate": 2.7093695038099277e-06, + "loss": 1.2846, + "step": 9228 + }, + { + "epoch": 0.9735232067510549, + "grad_norm": 0.6807360649108887, + "learning_rate": 2.6880906087682622e-06, + "loss": 1.2588, + "step": 9229 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.6397135853767395, + "learning_rate": 2.66689545375251e-06, + "loss": 1.3096, + "step": 9230 + }, + { + "epoch": 0.9737341772151898, + "grad_norm": 0.661342442035675, + "learning_rate": 2.6457840411376888e-06, + "loss": 1.3036, + "step": 9231 + }, + { + "epoch": 0.9738396624472574, + "grad_norm": 0.6332105994224548, + "learning_rate": 2.624756373289322e-06, + "loss": 1.2396, + "step": 9232 + }, + { + "epoch": 0.9739451476793249, + "grad_norm": 0.6309587955474854, + "learning_rate": 2.603812452563775e-06, + "loss": 1.2674, + "step": 9233 + }, + { + "epoch": 0.9740506329113924, + "grad_norm": 0.6486936211585999, + "learning_rate": 2.5829522813079207e-06, + "loss": 1.2897, + "step": 9234 + }, + { + "epoch": 0.9741561181434599, + "grad_norm": 0.6422932744026184, + "learning_rate": 2.5621758618591394e-06, + "loss": 1.2895, + "step": 9235 + }, + { + "epoch": 0.9742616033755275, + "grad_norm": 0.6401110291481018, + "learning_rate": 2.541483196545735e-06, + "loss": 1.2671, + "step": 9236 + }, + { + "epoch": 0.9743670886075949, + "grad_norm": 0.626599907875061, + "learning_rate": 2.52087428768627e-06, + "loss": 1.2709, + "step": 9237 + }, + { + "epoch": 0.9744725738396625, + "grad_norm": 0.6361992955207825, + "learning_rate": 2.5003491375900633e-06, + "loss": 1.2875, + "step": 9238 + }, + { + "epoch": 0.97457805907173, + "grad_norm": 0.6189406514167786, + "learning_rate": 2.4799077485571087e-06, + "loss": 1.2442, + "step": 9239 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.6558420062065125, + "learning_rate": 2.4595501228779906e-06, + "loss": 1.2869, + "step": 9240 + }, + { + "epoch": 0.974789029535865, + "grad_norm": 0.6033783555030823, + "learning_rate": 2.4392762628338838e-06, + "loss": 1.2634, + "step": 9241 + }, + { + "epoch": 0.9748945147679325, + "grad_norm": 0.6373922824859619, + "learning_rate": 2.419086170696472e-06, + "loss": 1.3187, + "step": 9242 + }, + { + "epoch": 0.975, + "grad_norm": 0.641022801399231, + "learning_rate": 2.3989798487282776e-06, + "loss": 1.2682, + "step": 9243 + }, + { + "epoch": 0.9751054852320675, + "grad_norm": 0.7209365367889404, + "learning_rate": 2.3789572991822495e-06, + "loss": 1.2997, + "step": 9244 + }, + { + "epoch": 0.9752109704641351, + "grad_norm": 0.673014760017395, + "learning_rate": 2.3590185243020092e-06, + "loss": 1.2397, + "step": 9245 + }, + { + "epoch": 0.9753164556962025, + "grad_norm": 0.6279931664466858, + "learning_rate": 2.3391635263218526e-06, + "loss": 1.3, + "step": 9246 + }, + { + "epoch": 0.9754219409282701, + "grad_norm": 0.6472793817520142, + "learning_rate": 2.3193923074665834e-06, + "loss": 1.3151, + "step": 9247 + }, + { + "epoch": 0.9755274261603376, + "grad_norm": 0.6960034966468811, + "learning_rate": 2.299704869951763e-06, + "loss": 1.2815, + "step": 9248 + }, + { + "epoch": 0.975632911392405, + "grad_norm": 0.6340432167053223, + "learning_rate": 2.2801012159832933e-06, + "loss": 1.3081, + "step": 9249 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.6282653212547302, + "learning_rate": 2.2605813477579172e-06, + "loss": 1.277, + "step": 9250 + }, + { + "epoch": 0.97584388185654, + "grad_norm": 0.644497275352478, + "learning_rate": 2.2411452674630517e-06, + "loss": 1.2697, + "step": 9251 + }, + { + "epoch": 0.9759493670886076, + "grad_norm": 0.666607677936554, + "learning_rate": 2.2217929772764545e-06, + "loss": 1.2905, + "step": 9252 + }, + { + "epoch": 0.9760548523206751, + "grad_norm": 0.6427170634269714, + "learning_rate": 2.2025244793667242e-06, + "loss": 1.3001, + "step": 9253 + }, + { + "epoch": 0.9761603375527426, + "grad_norm": 0.6468278765678406, + "learning_rate": 2.1833397758929674e-06, + "loss": 1.2916, + "step": 9254 + }, + { + "epoch": 0.9762658227848101, + "grad_norm": 0.6543973088264465, + "learning_rate": 2.1642388690049643e-06, + "loss": 1.2672, + "step": 9255 + }, + { + "epoch": 0.9763713080168777, + "grad_norm": 0.633226752281189, + "learning_rate": 2.1452217608430857e-06, + "loss": 1.2737, + "step": 9256 + }, + { + "epoch": 0.9764767932489451, + "grad_norm": 0.6825886964797974, + "learning_rate": 2.126288453538211e-06, + "loss": 1.3268, + "step": 9257 + }, + { + "epoch": 0.9765822784810126, + "grad_norm": 0.6364591121673584, + "learning_rate": 2.107438949211976e-06, + "loss": 1.276, + "step": 9258 + }, + { + "epoch": 0.9766877637130802, + "grad_norm": 0.6414640545845032, + "learning_rate": 2.0886732499764416e-06, + "loss": 1.2758, + "step": 9259 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.6363616585731506, + "learning_rate": 2.069991357934592e-06, + "loss": 1.2721, + "step": 9260 + }, + { + "epoch": 0.9768987341772152, + "grad_norm": 0.6546957492828369, + "learning_rate": 2.0513932751796695e-06, + "loss": 1.2941, + "step": 9261 + }, + { + "epoch": 0.9770042194092827, + "grad_norm": 0.6884585618972778, + "learning_rate": 2.0328790037957568e-06, + "loss": 1.2958, + "step": 9262 + }, + { + "epoch": 0.9771097046413502, + "grad_norm": 0.6373911499977112, + "learning_rate": 2.0144485458574446e-06, + "loss": 1.2994, + "step": 9263 + }, + { + "epoch": 0.9772151898734177, + "grad_norm": 0.7087931632995605, + "learning_rate": 1.9961019034299976e-06, + "loss": 1.3202, + "step": 9264 + }, + { + "epoch": 0.9773206751054853, + "grad_norm": 0.6644478440284729, + "learning_rate": 1.977839078569188e-06, + "loss": 1.2785, + "step": 9265 + }, + { + "epoch": 0.9774261603375527, + "grad_norm": 0.6501903533935547, + "learning_rate": 1.959660073321545e-06, + "loss": 1.3112, + "step": 9266 + }, + { + "epoch": 0.9775316455696202, + "grad_norm": 0.6364040374755859, + "learning_rate": 1.94156488972394e-06, + "loss": 1.2878, + "step": 9267 + }, + { + "epoch": 0.9776371308016878, + "grad_norm": 0.6516813635826111, + "learning_rate": 1.9235535298042506e-06, + "loss": 1.2842, + "step": 9268 + }, + { + "epoch": 0.9777426160337552, + "grad_norm": 0.668470561504364, + "learning_rate": 1.905625995580612e-06, + "loss": 1.2885, + "step": 9269 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.6492795348167419, + "learning_rate": 1.8877822890618346e-06, + "loss": 1.3031, + "step": 9270 + }, + { + "epoch": 0.9779535864978903, + "grad_norm": 0.6409626007080078, + "learning_rate": 1.8700224122475683e-06, + "loss": 1.2573, + "step": 9271 + }, + { + "epoch": 0.9780590717299578, + "grad_norm": 0.6338777542114258, + "learning_rate": 1.8523463671278052e-06, + "loss": 1.2895, + "step": 9272 + }, + { + "epoch": 0.9781645569620253, + "grad_norm": 0.6054602861404419, + "learning_rate": 1.8347541556832104e-06, + "loss": 1.2917, + "step": 9273 + }, + { + "epoch": 0.9782700421940929, + "grad_norm": 0.6518710851669312, + "learning_rate": 1.8172457798850407e-06, + "loss": 1.3027, + "step": 9274 + }, + { + "epoch": 0.9783755274261603, + "grad_norm": 0.6286507248878479, + "learning_rate": 1.7998212416953096e-06, + "loss": 1.299, + "step": 9275 + }, + { + "epoch": 0.9784810126582278, + "grad_norm": 0.6832780241966248, + "learning_rate": 1.782480543066456e-06, + "loss": 1.264, + "step": 9276 + }, + { + "epoch": 0.9785864978902954, + "grad_norm": 0.6388874650001526, + "learning_rate": 1.7652236859416748e-06, + "loss": 1.2711, + "step": 9277 + }, + { + "epoch": 0.9786919831223628, + "grad_norm": 0.6543870568275452, + "learning_rate": 1.7480506722545864e-06, + "loss": 1.3083, + "step": 9278 + }, + { + "epoch": 0.9787974683544304, + "grad_norm": 0.6778031587600708, + "learning_rate": 1.7309615039294847e-06, + "loss": 1.2882, + "step": 9279 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.6584538221359253, + "learning_rate": 1.7139561828813377e-06, + "loss": 1.2506, + "step": 9280 + }, + { + "epoch": 0.9790084388185654, + "grad_norm": 0.697892963886261, + "learning_rate": 1.6970347110157879e-06, + "loss": 1.2773, + "step": 9281 + }, + { + "epoch": 0.9791139240506329, + "grad_norm": 0.6276612281799316, + "learning_rate": 1.6801970902288188e-06, + "loss": 1.2786, + "step": 9282 + }, + { + "epoch": 0.9792194092827005, + "grad_norm": 0.6208446025848389, + "learning_rate": 1.6634433224072543e-06, + "loss": 1.2787, + "step": 9283 + }, + { + "epoch": 0.9793248945147679, + "grad_norm": 0.6272377967834473, + "learning_rate": 1.6467734094283427e-06, + "loss": 1.2991, + "step": 9284 + }, + { + "epoch": 0.9794303797468354, + "grad_norm": 0.6567824482917786, + "learning_rate": 1.630187353160173e-06, + "loss": 1.3271, + "step": 9285 + }, + { + "epoch": 0.979535864978903, + "grad_norm": 0.6372593641281128, + "learning_rate": 1.6136851554611753e-06, + "loss": 1.271, + "step": 9286 + }, + { + "epoch": 0.9796413502109704, + "grad_norm": 0.6192654371261597, + "learning_rate": 1.5972668181805373e-06, + "loss": 1.2958, + "step": 9287 + }, + { + "epoch": 0.979746835443038, + "grad_norm": 0.6258891820907593, + "learning_rate": 1.580932343158037e-06, + "loss": 1.2904, + "step": 9288 + }, + { + "epoch": 0.9798523206751055, + "grad_norm": 0.6421869397163391, + "learning_rate": 1.5646817322240436e-06, + "loss": 1.2838, + "step": 9289 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.6185668110847473, + "learning_rate": 1.5485149871995175e-06, + "loss": 1.2881, + "step": 9290 + }, + { + "epoch": 0.9800632911392405, + "grad_norm": 0.6099067330360413, + "learning_rate": 1.532432109895926e-06, + "loss": 1.2677, + "step": 9291 + }, + { + "epoch": 0.9801687763713081, + "grad_norm": 0.6380749940872192, + "learning_rate": 1.5164331021155774e-06, + "loss": 1.2655, + "step": 9292 + }, + { + "epoch": 0.9802742616033755, + "grad_norm": 0.6423078179359436, + "learning_rate": 1.5005179656511213e-06, + "loss": 1.2972, + "step": 9293 + }, + { + "epoch": 0.980379746835443, + "grad_norm": 0.648646354675293, + "learning_rate": 1.4846867022860477e-06, + "loss": 1.3087, + "step": 9294 + }, + { + "epoch": 0.9804852320675106, + "grad_norm": 0.635659396648407, + "learning_rate": 1.4689393137941876e-06, + "loss": 1.2925, + "step": 9295 + }, + { + "epoch": 0.980590717299578, + "grad_norm": 0.6364109516143799, + "learning_rate": 1.4532758019402958e-06, + "loss": 1.2826, + "step": 9296 + }, + { + "epoch": 0.9806962025316456, + "grad_norm": 0.641211986541748, + "learning_rate": 1.4376961684793854e-06, + "loss": 1.2835, + "step": 9297 + }, + { + "epoch": 0.9808016877637131, + "grad_norm": 0.6186099052429199, + "learning_rate": 1.4222004151572265e-06, + "loss": 1.3037, + "step": 9298 + }, + { + "epoch": 0.9809071729957806, + "grad_norm": 0.6724885106086731, + "learning_rate": 1.4067885437103467e-06, + "loss": 1.283, + "step": 9299 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.6529941558837891, + "learning_rate": 1.3914605558656146e-06, + "loss": 1.3083, + "step": 9300 + }, + { + "epoch": 0.9811181434599157, + "grad_norm": 0.6553106904029846, + "learning_rate": 1.376216453340573e-06, + "loss": 1.2612, + "step": 9301 + }, + { + "epoch": 0.9812236286919831, + "grad_norm": 0.6234880685806274, + "learning_rate": 1.3610562378435221e-06, + "loss": 1.2855, + "step": 9302 + }, + { + "epoch": 0.9813291139240506, + "grad_norm": 0.6682955026626587, + "learning_rate": 1.345979911073103e-06, + "loss": 1.3117, + "step": 9303 + }, + { + "epoch": 0.9814345991561182, + "grad_norm": 0.6391987800598145, + "learning_rate": 1.3309874747187978e-06, + "loss": 1.2656, + "step": 9304 + }, + { + "epoch": 0.9815400843881856, + "grad_norm": 0.6273784041404724, + "learning_rate": 1.3160789304605958e-06, + "loss": 1.3014, + "step": 9305 + }, + { + "epoch": 0.9816455696202532, + "grad_norm": 0.6295777559280396, + "learning_rate": 1.3012542799689108e-06, + "loss": 1.2974, + "step": 9306 + }, + { + "epoch": 0.9817510548523207, + "grad_norm": 0.6395066976547241, + "learning_rate": 1.286513524905164e-06, + "loss": 1.3239, + "step": 9307 + }, + { + "epoch": 0.9818565400843882, + "grad_norm": 0.6329478025436401, + "learning_rate": 1.2718566669208675e-06, + "loss": 1.2981, + "step": 9308 + }, + { + "epoch": 0.9819620253164557, + "grad_norm": 0.6157691478729248, + "learning_rate": 1.2572837076586241e-06, + "loss": 1.2432, + "step": 9309 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.6334161162376404, + "learning_rate": 1.2427946487512941e-06, + "loss": 1.2982, + "step": 9310 + }, + { + "epoch": 0.9821729957805907, + "grad_norm": 0.6469703316688538, + "learning_rate": 1.2283894918224125e-06, + "loss": 1.2815, + "step": 9311 + }, + { + "epoch": 0.9822784810126582, + "grad_norm": 0.6313638687133789, + "learning_rate": 1.2140682384862712e-06, + "loss": 1.2951, + "step": 9312 + }, + { + "epoch": 0.9823839662447258, + "grad_norm": 0.6270200610160828, + "learning_rate": 1.199830890347503e-06, + "loss": 1.2821, + "step": 9313 + }, + { + "epoch": 0.9824894514767932, + "grad_norm": 0.6246111392974854, + "learning_rate": 1.185677449001582e-06, + "loss": 1.3201, + "step": 9314 + }, + { + "epoch": 0.9825949367088608, + "grad_norm": 0.6382172703742981, + "learning_rate": 1.1716079160344061e-06, + "loss": 1.2611, + "step": 9315 + }, + { + "epoch": 0.9827004219409282, + "grad_norm": 0.6173139214515686, + "learning_rate": 1.1576222930225478e-06, + "loss": 1.3093, + "step": 9316 + }, + { + "epoch": 0.9828059071729958, + "grad_norm": 0.6548547744750977, + "learning_rate": 1.143720581533253e-06, + "loss": 1.272, + "step": 9317 + }, + { + "epoch": 0.9829113924050633, + "grad_norm": 0.6818305850028992, + "learning_rate": 1.1299027831241094e-06, + "loss": 1.2634, + "step": 9318 + }, + { + "epoch": 0.9830168776371307, + "grad_norm": 0.7117757201194763, + "learning_rate": 1.1161688993435449e-06, + "loss": 1.2873, + "step": 9319 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.6463271379470825, + "learning_rate": 1.1025189317305784e-06, + "loss": 1.2737, + "step": 9320 + }, + { + "epoch": 0.9832278481012658, + "grad_norm": 0.6609259843826294, + "learning_rate": 1.0889528818147366e-06, + "loss": 1.3151, + "step": 9321 + }, + { + "epoch": 0.9833333333333333, + "grad_norm": 0.6530489921569824, + "learning_rate": 1.0754707511161365e-06, + "loss": 1.3023, + "step": 9322 + }, + { + "epoch": 0.9834388185654008, + "grad_norm": 0.6360712647438049, + "learning_rate": 1.0620725411454868e-06, + "loss": 1.3496, + "step": 9323 + }, + { + "epoch": 0.9835443037974684, + "grad_norm": 0.6301083564758301, + "learning_rate": 1.0487582534040863e-06, + "loss": 1.2964, + "step": 9324 + }, + { + "epoch": 0.9836497890295358, + "grad_norm": 0.6334710717201233, + "learning_rate": 1.0355278893839915e-06, + "loss": 1.2937, + "step": 9325 + }, + { + "epoch": 0.9837552742616034, + "grad_norm": 0.6545096635818481, + "learning_rate": 1.0223814505676832e-06, + "loss": 1.3054, + "step": 9326 + }, + { + "epoch": 0.9838607594936709, + "grad_norm": 0.6377995610237122, + "learning_rate": 1.009318938428233e-06, + "loss": 1.3032, + "step": 9327 + }, + { + "epoch": 0.9839662447257383, + "grad_norm": 0.6432526707649231, + "learning_rate": 9.963403544294702e-07, + "loss": 1.2587, + "step": 9328 + }, + { + "epoch": 0.9840717299578059, + "grad_norm": 0.735296905040741, + "learning_rate": 9.834457000255647e-07, + "loss": 1.2757, + "step": 9329 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.6314533948898315, + "learning_rate": 9.706349766615275e-07, + "loss": 1.2715, + "step": 9330 + }, + { + "epoch": 0.9842827004219409, + "grad_norm": 0.6441712379455566, + "learning_rate": 9.579081857728766e-07, + "loss": 1.2998, + "step": 9331 + }, + { + "epoch": 0.9843881856540084, + "grad_norm": 0.614608883857727, + "learning_rate": 9.452653287856383e-07, + "loss": 1.272, + "step": 9332 + }, + { + "epoch": 0.984493670886076, + "grad_norm": 0.6458448767662048, + "learning_rate": 9.327064071165126e-07, + "loss": 1.3145, + "step": 9333 + }, + { + "epoch": 0.9845991561181434, + "grad_norm": 0.6802945137023926, + "learning_rate": 9.202314221728735e-07, + "loss": 1.3087, + "step": 9334 + }, + { + "epoch": 0.984704641350211, + "grad_norm": 0.6255787014961243, + "learning_rate": 9.078403753525199e-07, + "loss": 1.2668, + "step": 9335 + }, + { + "epoch": 0.9848101265822785, + "grad_norm": 0.6371053457260132, + "learning_rate": 8.955332680440076e-07, + "loss": 1.2947, + "step": 9336 + }, + { + "epoch": 0.984915611814346, + "grad_norm": 0.6310151815414429, + "learning_rate": 8.833101016263168e-07, + "loss": 1.2982, + "step": 9337 + }, + { + "epoch": 0.9850210970464135, + "grad_norm": 0.6432595252990723, + "learning_rate": 8.711708774691851e-07, + "loss": 1.2837, + "step": 9338 + }, + { + "epoch": 0.985126582278481, + "grad_norm": 0.6738355755805969, + "learning_rate": 8.591155969327746e-07, + "loss": 1.2944, + "step": 9339 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.6326969265937805, + "learning_rate": 8.47144261368088e-07, + "loss": 1.3182, + "step": 9340 + }, + { + "epoch": 0.985337552742616, + "grad_norm": 0.6324338316917419, + "learning_rate": 8.352568721165521e-07, + "loss": 1.3096, + "step": 9341 + }, + { + "epoch": 0.9854430379746836, + "grad_norm": 0.6554070115089417, + "learning_rate": 8.234534305101015e-07, + "loss": 1.3059, + "step": 9342 + }, + { + "epoch": 0.985548523206751, + "grad_norm": 0.6472079157829285, + "learning_rate": 8.117339378714283e-07, + "loss": 1.3284, + "step": 9343 + }, + { + "epoch": 0.9856540084388186, + "grad_norm": 0.6252392530441284, + "learning_rate": 8.00098395513732e-07, + "loss": 1.2822, + "step": 9344 + }, + { + "epoch": 0.9857594936708861, + "grad_norm": 0.6815301775932312, + "learning_rate": 7.885468047408862e-07, + "loss": 1.2931, + "step": 9345 + }, + { + "epoch": 0.9858649789029535, + "grad_norm": 0.6835627555847168, + "learning_rate": 7.770791668472721e-07, + "loss": 1.3077, + "step": 9346 + }, + { + "epoch": 0.9859704641350211, + "grad_norm": 0.6876530051231384, + "learning_rate": 7.656954831178619e-07, + "loss": 1.3166, + "step": 9347 + }, + { + "epoch": 0.9860759493670886, + "grad_norm": 0.6278359889984131, + "learning_rate": 7.543957548283021e-07, + "loss": 1.3221, + "step": 9348 + }, + { + "epoch": 0.9861814345991561, + "grad_norm": 0.6592279076576233, + "learning_rate": 7.431799832448294e-07, + "loss": 1.2937, + "step": 9349 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.6224283576011658, + "learning_rate": 7.320481696241887e-07, + "loss": 1.2696, + "step": 9350 + }, + { + "epoch": 0.9863924050632912, + "grad_norm": 0.6765812039375305, + "learning_rate": 7.210003152136324e-07, + "loss": 1.3298, + "step": 9351 + }, + { + "epoch": 0.9864978902953586, + "grad_norm": 0.6327465772628784, + "learning_rate": 7.100364212513367e-07, + "loss": 1.2934, + "step": 9352 + }, + { + "epoch": 0.9866033755274262, + "grad_norm": 0.6829757690429688, + "learning_rate": 6.991564889656521e-07, + "loss": 1.313, + "step": 9353 + }, + { + "epoch": 0.9867088607594937, + "grad_norm": 0.6824861168861389, + "learning_rate": 6.883605195759369e-07, + "loss": 1.2777, + "step": 9354 + }, + { + "epoch": 0.9868143459915611, + "grad_norm": 0.6205384135246277, + "learning_rate": 6.776485142918065e-07, + "loss": 1.3084, + "step": 9355 + }, + { + "epoch": 0.9869198312236287, + "grad_norm": 0.6522344946861267, + "learning_rate": 6.67020474313551e-07, + "loss": 1.2845, + "step": 9356 + }, + { + "epoch": 0.9870253164556962, + "grad_norm": 0.6945660710334778, + "learning_rate": 6.564764008322177e-07, + "loss": 1.2796, + "step": 9357 + }, + { + "epoch": 0.9871308016877637, + "grad_norm": 0.6622933745384216, + "learning_rate": 6.460162950292781e-07, + "loss": 1.27, + "step": 9358 + }, + { + "epoch": 0.9872362869198312, + "grad_norm": 0.6612011194229126, + "learning_rate": 6.356401580767945e-07, + "loss": 1.3219, + "step": 9359 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.6472548246383667, + "learning_rate": 6.253479911375037e-07, + "loss": 1.2883, + "step": 9360 + }, + { + "epoch": 0.9874472573839662, + "grad_norm": 0.611268162727356, + "learning_rate": 6.151397953647331e-07, + "loss": 1.2945, + "step": 9361 + }, + { + "epoch": 0.9875527426160338, + "grad_norm": 0.6518707871437073, + "learning_rate": 6.050155719023176e-07, + "loss": 1.3006, + "step": 9362 + }, + { + "epoch": 0.9876582278481013, + "grad_norm": 0.6113879680633545, + "learning_rate": 5.949753218846832e-07, + "loss": 1.292, + "step": 9363 + }, + { + "epoch": 0.9877637130801687, + "grad_norm": 0.6448774337768555, + "learning_rate": 5.850190464369298e-07, + "loss": 1.2982, + "step": 9364 + }, + { + "epoch": 0.9878691983122363, + "grad_norm": 0.641599178314209, + "learning_rate": 5.751467466747484e-07, + "loss": 1.2942, + "step": 9365 + }, + { + "epoch": 0.9879746835443038, + "grad_norm": 0.6225180625915527, + "learning_rate": 5.653584237043374e-07, + "loss": 1.3121, + "step": 9366 + }, + { + "epoch": 0.9880801687763713, + "grad_norm": 0.6334304809570312, + "learning_rate": 5.556540786224862e-07, + "loss": 1.2553, + "step": 9367 + }, + { + "epoch": 0.9881856540084388, + "grad_norm": 0.6864491701126099, + "learning_rate": 5.460337125167414e-07, + "loss": 1.305, + "step": 9368 + }, + { + "epoch": 0.9882911392405064, + "grad_norm": 0.6534795165061951, + "learning_rate": 5.364973264649908e-07, + "loss": 1.2743, + "step": 9369 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.6500478982925415, + "learning_rate": 5.270449215358797e-07, + "loss": 1.3331, + "step": 9370 + }, + { + "epoch": 0.9885021097046414, + "grad_norm": 0.628669261932373, + "learning_rate": 5.176764987885607e-07, + "loss": 1.2774, + "step": 9371 + }, + { + "epoch": 0.9886075949367089, + "grad_norm": 0.6115462183952332, + "learning_rate": 5.08392059272944e-07, + "loss": 1.2744, + "step": 9372 + }, + { + "epoch": 0.9887130801687763, + "grad_norm": 0.6281651258468628, + "learning_rate": 4.991916040291977e-07, + "loss": 1.2997, + "step": 9373 + }, + { + "epoch": 0.9888185654008439, + "grad_norm": 0.6102859377861023, + "learning_rate": 4.900751340884135e-07, + "loss": 1.2342, + "step": 9374 + }, + { + "epoch": 0.9889240506329114, + "grad_norm": 0.6482998728752136, + "learning_rate": 4.810426504721077e-07, + "loss": 1.3119, + "step": 9375 + }, + { + "epoch": 0.9890295358649789, + "grad_norm": 0.609038233757019, + "learning_rate": 4.720941541923873e-07, + "loss": 1.2498, + "step": 9376 + }, + { + "epoch": 0.9891350210970464, + "grad_norm": 0.6575981974601746, + "learning_rate": 4.632296462520336e-07, + "loss": 1.2453, + "step": 9377 + }, + { + "epoch": 0.989240506329114, + "grad_norm": 0.6420463919639587, + "learning_rate": 4.544491276443352e-07, + "loss": 1.3236, + "step": 9378 + }, + { + "epoch": 0.9893459915611814, + "grad_norm": 0.6354494094848633, + "learning_rate": 4.457525993531719e-07, + "loss": 1.292, + "step": 9379 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.6438401937484741, + "learning_rate": 4.371400623530142e-07, + "loss": 1.2809, + "step": 9380 + }, + { + "epoch": 0.9895569620253165, + "grad_norm": 0.6486477851867676, + "learning_rate": 4.2861151760900665e-07, + "loss": 1.245, + "step": 9381 + }, + { + "epoch": 0.989662447257384, + "grad_norm": 0.6367839574813843, + "learning_rate": 4.2016696607680147e-07, + "loss": 1.3021, + "step": 9382 + }, + { + "epoch": 0.9897679324894515, + "grad_norm": 0.6425636410713196, + "learning_rate": 4.118064087025586e-07, + "loss": 1.2977, + "step": 9383 + }, + { + "epoch": 0.9898734177215189, + "grad_norm": 0.6433894038200378, + "learning_rate": 4.035298464232784e-07, + "loss": 1.2846, + "step": 9384 + }, + { + "epoch": 0.9899789029535865, + "grad_norm": 0.6561232209205627, + "learning_rate": 3.953372801662192e-07, + "loss": 1.2755, + "step": 9385 + }, + { + "epoch": 0.990084388185654, + "grad_norm": 0.6257612705230713, + "learning_rate": 3.8722871084956313e-07, + "loss": 1.2792, + "step": 9386 + }, + { + "epoch": 0.9901898734177215, + "grad_norm": 0.6446977853775024, + "learning_rate": 3.7920413938175027e-07, + "loss": 1.3116, + "step": 9387 + }, + { + "epoch": 0.990295358649789, + "grad_norm": 0.6415311694145203, + "learning_rate": 3.7126356666214447e-07, + "loss": 1.2655, + "step": 9388 + }, + { + "epoch": 0.9904008438818566, + "grad_norm": 0.700625479221344, + "learning_rate": 3.6340699358036743e-07, + "loss": 1.2911, + "step": 9389 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.643754243850708, + "learning_rate": 3.5563442101696486e-07, + "loss": 1.2922, + "step": 9390 + }, + { + "epoch": 0.9906118143459915, + "grad_norm": 0.6332276463508606, + "learning_rate": 3.479458498426569e-07, + "loss": 1.2964, + "step": 9391 + }, + { + "epoch": 0.9907172995780591, + "grad_norm": 0.6820589900016785, + "learning_rate": 3.4034128091917085e-07, + "loss": 1.2967, + "step": 9392 + }, + { + "epoch": 0.9908227848101265, + "grad_norm": 0.6562886238098145, + "learning_rate": 3.328207150986584e-07, + "loss": 1.2935, + "step": 9393 + }, + { + "epoch": 0.9909282700421941, + "grad_norm": 0.6736752390861511, + "learning_rate": 3.2538415322369563e-07, + "loss": 1.2726, + "step": 9394 + }, + { + "epoch": 0.9910337552742616, + "grad_norm": 0.6725515127182007, + "learning_rate": 3.180315961276159e-07, + "loss": 1.3393, + "step": 9395 + }, + { + "epoch": 0.9911392405063291, + "grad_norm": 0.7160547375679016, + "learning_rate": 3.107630446344267e-07, + "loss": 1.2733, + "step": 9396 + }, + { + "epoch": 0.9912447257383966, + "grad_norm": 0.6248162984848022, + "learning_rate": 3.035784995584767e-07, + "loss": 1.3001, + "step": 9397 + }, + { + "epoch": 0.9913502109704642, + "grad_norm": 0.654301106929779, + "learning_rate": 2.964779617049551e-07, + "loss": 1.2992, + "step": 9398 + }, + { + "epoch": 0.9914556962025316, + "grad_norm": 0.6266123056411743, + "learning_rate": 2.8946143186930896e-07, + "loss": 1.3134, + "step": 9399 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.6299537420272827, + "learning_rate": 2.825289108379925e-07, + "loss": 1.2955, + "step": 9400 + }, + { + "epoch": 0.9916666666666667, + "grad_norm": 0.6288341879844666, + "learning_rate": 2.756803993877177e-07, + "loss": 1.2497, + "step": 9401 + }, + { + "epoch": 0.9917721518987341, + "grad_norm": 0.6181665658950806, + "learning_rate": 2.689158982859541e-07, + "loss": 1.2942, + "step": 9402 + }, + { + "epoch": 0.9918776371308017, + "grad_norm": 0.6769506931304932, + "learning_rate": 2.622354082905953e-07, + "loss": 1.2395, + "step": 9403 + }, + { + "epoch": 0.9919831223628692, + "grad_norm": 0.6782415509223938, + "learning_rate": 2.556389301502926e-07, + "loss": 1.2765, + "step": 9404 + }, + { + "epoch": 0.9920886075949367, + "grad_norm": 0.6581709980964661, + "learning_rate": 2.491264646042879e-07, + "loss": 1.2644, + "step": 9405 + }, + { + "epoch": 0.9921940928270042, + "grad_norm": 0.6285138130187988, + "learning_rate": 2.426980123821643e-07, + "loss": 1.2714, + "step": 9406 + }, + { + "epoch": 0.9922995780590718, + "grad_norm": 0.6478481888771057, + "learning_rate": 2.3635357420442872e-07, + "loss": 1.2649, + "step": 9407 + }, + { + "epoch": 0.9924050632911392, + "grad_norm": 0.6524617671966553, + "learning_rate": 2.3009315078192926e-07, + "loss": 1.291, + "step": 9408 + }, + { + "epoch": 0.9925105485232067, + "grad_norm": 0.6337723135948181, + "learning_rate": 2.2391674281610486e-07, + "loss": 1.2945, + "step": 9409 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.6563867330551147, + "learning_rate": 2.1782435099923503e-07, + "loss": 1.2782, + "step": 9410 + }, + { + "epoch": 0.9927215189873417, + "grad_norm": 0.6480786204338074, + "learning_rate": 2.1181597601385716e-07, + "loss": 1.3314, + "step": 9411 + }, + { + "epoch": 0.9928270042194093, + "grad_norm": 0.6253181099891663, + "learning_rate": 2.05891618533266e-07, + "loss": 1.2532, + "step": 9412 + }, + { + "epoch": 0.9929324894514768, + "grad_norm": 0.6444860100746155, + "learning_rate": 2.0005127922134713e-07, + "loss": 1.2641, + "step": 9413 + }, + { + "epoch": 0.9930379746835443, + "grad_norm": 0.6213670969009399, + "learning_rate": 1.942949587324938e-07, + "loss": 1.316, + "step": 9414 + }, + { + "epoch": 0.9931434599156118, + "grad_norm": 0.6745802760124207, + "learning_rate": 1.8862265771177333e-07, + "loss": 1.3121, + "step": 9415 + }, + { + "epoch": 0.9932489451476794, + "grad_norm": 0.6585988402366638, + "learning_rate": 1.8303437679476065e-07, + "loss": 1.2849, + "step": 9416 + }, + { + "epoch": 0.9933544303797468, + "grad_norm": 0.6427037715911865, + "learning_rate": 1.775301166077048e-07, + "loss": 1.3188, + "step": 9417 + }, + { + "epoch": 0.9934599156118143, + "grad_norm": 0.6339648365974426, + "learning_rate": 1.7210987776736243e-07, + "loss": 1.2781, + "step": 9418 + }, + { + "epoch": 0.9935654008438819, + "grad_norm": 0.6431618928909302, + "learning_rate": 1.6677366088099777e-07, + "loss": 1.3146, + "step": 9419 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.6454148888587952, + "learning_rate": 1.6152146654671573e-07, + "loss": 1.2519, + "step": 9420 + }, + { + "epoch": 0.9937763713080169, + "grad_norm": 0.6315946578979492, + "learning_rate": 1.5635329535304554e-07, + "loss": 1.3264, + "step": 9421 + }, + { + "epoch": 0.9938818565400844, + "grad_norm": 0.6354814767837524, + "learning_rate": 1.5126914787894074e-07, + "loss": 1.3084, + "step": 9422 + }, + { + "epoch": 0.9939873417721519, + "grad_norm": 0.6677966713905334, + "learning_rate": 1.4626902469427882e-07, + "loss": 1.2793, + "step": 9423 + }, + { + "epoch": 0.9940928270042194, + "grad_norm": 0.679157555103302, + "learning_rate": 1.4135292635927832e-07, + "loss": 1.2694, + "step": 9424 + }, + { + "epoch": 0.994198312236287, + "grad_norm": 0.619487464427948, + "learning_rate": 1.365208534248319e-07, + "loss": 1.2781, + "step": 9425 + }, + { + "epoch": 0.9943037974683544, + "grad_norm": 0.6222397089004517, + "learning_rate": 1.3177280643233979e-07, + "loss": 1.3212, + "step": 9426 + }, + { + "epoch": 0.994409282700422, + "grad_norm": 0.6524842977523804, + "learning_rate": 1.271087859138764e-07, + "loss": 1.3142, + "step": 9427 + }, + { + "epoch": 0.9945147679324895, + "grad_norm": 0.6176474690437317, + "learning_rate": 1.2252879239210702e-07, + "loss": 1.2799, + "step": 9428 + }, + { + "epoch": 0.9946202531645569, + "grad_norm": 0.6307728886604309, + "learning_rate": 1.1803282638020441e-07, + "loss": 1.3164, + "step": 9429 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.6293161511421204, + "learning_rate": 1.1362088838193229e-07, + "loss": 1.2542, + "step": 9430 + }, + { + "epoch": 0.994831223628692, + "grad_norm": 0.6539080739021301, + "learning_rate": 1.0929297889172852e-07, + "loss": 1.2773, + "step": 9431 + }, + { + "epoch": 0.9949367088607595, + "grad_norm": 0.6174216270446777, + "learning_rate": 1.0504909839462173e-07, + "loss": 1.3124, + "step": 9432 + }, + { + "epoch": 0.995042194092827, + "grad_norm": 0.6271525621414185, + "learning_rate": 1.008892473659817e-07, + "loss": 1.2855, + "step": 9433 + }, + { + "epoch": 0.9951476793248946, + "grad_norm": 0.6262660026550293, + "learning_rate": 9.68134262721021e-08, + "loss": 1.2849, + "step": 9434 + }, + { + "epoch": 0.995253164556962, + "grad_norm": 0.6394692659378052, + "learning_rate": 9.282163556953437e-08, + "loss": 1.2821, + "step": 9435 + }, + { + "epoch": 0.9953586497890295, + "grad_norm": 0.6491167545318604, + "learning_rate": 8.891387570575393e-08, + "loss": 1.2737, + "step": 9436 + }, + { + "epoch": 0.9954641350210971, + "grad_norm": 0.6151066422462463, + "learning_rate": 8.509014711857721e-08, + "loss": 1.2749, + "step": 9437 + }, + { + "epoch": 0.9955696202531645, + "grad_norm": 0.6432328820228577, + "learning_rate": 8.135045023641152e-08, + "loss": 1.2848, + "step": 9438 + }, + { + "epoch": 0.9956751054852321, + "grad_norm": 0.6380372643470764, + "learning_rate": 7.769478547842157e-08, + "loss": 1.3251, + "step": 9439 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.6378102898597717, + "learning_rate": 7.412315325411312e-08, + "loss": 1.2902, + "step": 9440 + }, + { + "epoch": 0.9958860759493671, + "grad_norm": 0.6246368288993835, + "learning_rate": 7.063555396383259e-08, + "loss": 1.2944, + "step": 9441 + }, + { + "epoch": 0.9959915611814346, + "grad_norm": 0.6307075619697571, + "learning_rate": 6.723198799826746e-08, + "loss": 1.3066, + "step": 9442 + }, + { + "epoch": 0.9960970464135022, + "grad_norm": 0.6309693455696106, + "learning_rate": 6.391245573894588e-08, + "loss": 1.2693, + "step": 9443 + }, + { + "epoch": 0.9962025316455696, + "grad_norm": 0.6560496687889099, + "learning_rate": 6.067695755765379e-08, + "loss": 1.2958, + "step": 9444 + }, + { + "epoch": 0.9963080168776371, + "grad_norm": 0.643822193145752, + "learning_rate": 5.7525493817101035e-08, + "loss": 1.2977, + "step": 9445 + }, + { + "epoch": 0.9964135021097047, + "grad_norm": 0.6737205982208252, + "learning_rate": 5.4458064870338553e-08, + "loss": 1.2862, + "step": 9446 + }, + { + "epoch": 0.9965189873417721, + "grad_norm": 0.7465996146202087, + "learning_rate": 5.147467106117465e-08, + "loss": 1.3001, + "step": 9447 + }, + { + "epoch": 0.9966244725738397, + "grad_norm": 0.6036069989204407, + "learning_rate": 4.85753127237587e-08, + "loss": 1.3022, + "step": 9448 + }, + { + "epoch": 0.9967299578059071, + "grad_norm": 0.6353800296783447, + "learning_rate": 4.575999018316401e-08, + "loss": 1.3215, + "step": 9449 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.6332311630249023, + "learning_rate": 4.302870375472168e-08, + "loss": 1.318, + "step": 9450 + }, + { + "epoch": 0.9969409282700422, + "grad_norm": 0.6269175410270691, + "learning_rate": 4.038145374460345e-08, + "loss": 1.2967, + "step": 9451 + }, + { + "epoch": 0.9970464135021097, + "grad_norm": 0.6788895726203918, + "learning_rate": 3.781824044932214e-08, + "loss": 1.3373, + "step": 9452 + }, + { + "epoch": 0.9971518987341772, + "grad_norm": 0.6452693939208984, + "learning_rate": 3.533906415614796e-08, + "loss": 1.2793, + "step": 9453 + }, + { + "epoch": 0.9972573839662447, + "grad_norm": 0.6619746088981628, + "learning_rate": 3.294392514285871e-08, + "loss": 1.2767, + "step": 9454 + }, + { + "epoch": 0.9973628691983122, + "grad_norm": 0.6345202326774597, + "learning_rate": 3.0632823677906316e-08, + "loss": 1.2729, + "step": 9455 + }, + { + "epoch": 0.9974683544303797, + "grad_norm": 0.6180998682975769, + "learning_rate": 2.8405760020250304e-08, + "loss": 1.2772, + "step": 9456 + }, + { + "epoch": 0.9975738396624473, + "grad_norm": 0.6508226990699768, + "learning_rate": 2.6262734419441047e-08, + "loss": 1.2693, + "step": 9457 + }, + { + "epoch": 0.9976793248945147, + "grad_norm": 0.652187168598175, + "learning_rate": 2.420374711561979e-08, + "loss": 1.2832, + "step": 9458 + }, + { + "epoch": 0.9977848101265823, + "grad_norm": 0.6221712231636047, + "learning_rate": 2.2228798339435363e-08, + "loss": 1.255, + "step": 9459 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.6260695457458496, + "learning_rate": 2.0337888312210727e-08, + "loss": 1.2995, + "step": 9460 + }, + { + "epoch": 0.9979957805907173, + "grad_norm": 0.6160565614700317, + "learning_rate": 1.8531017245942972e-08, + "loss": 1.291, + "step": 9461 + }, + { + "epoch": 0.9981012658227848, + "grad_norm": 0.6190848350524902, + "learning_rate": 1.6808185342970238e-08, + "loss": 1.2968, + "step": 9462 + }, + { + "epoch": 0.9982067510548523, + "grad_norm": 0.8318657875061035, + "learning_rate": 1.516939279638807e-08, + "loss": 1.3036, + "step": 9463 + }, + { + "epoch": 0.9983122362869198, + "grad_norm": 0.6260751485824585, + "learning_rate": 1.3614639789882866e-08, + "loss": 1.3056, + "step": 9464 + }, + { + "epoch": 0.9984177215189873, + "grad_norm": 0.6026712656021118, + "learning_rate": 1.214392649756535e-08, + "loss": 1.3281, + "step": 9465 + }, + { + "epoch": 0.9985232067510549, + "grad_norm": 0.6214699149131775, + "learning_rate": 1.075725308438691e-08, + "loss": 1.2766, + "step": 9466 + }, + { + "epoch": 0.9986286919831223, + "grad_norm": 0.611592710018158, + "learning_rate": 9.454619705556722e-09, + "loss": 1.2765, + "step": 9467 + }, + { + "epoch": 0.9987341772151899, + "grad_norm": 0.6759645342826843, + "learning_rate": 8.236026507124628e-09, + "loss": 1.2883, + "step": 9468 + }, + { + "epoch": 0.9988396624472574, + "grad_norm": 0.6379421949386597, + "learning_rate": 7.101473625648058e-09, + "loss": 1.28, + "step": 9469 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.6191271543502808, + "learning_rate": 6.050961188358573e-09, + "loss": 1.3009, + "step": 9470 + }, + { + "epoch": 0.9990506329113924, + "grad_norm": 0.6771349310874939, + "learning_rate": 5.084489312745521e-09, + "loss": 1.3034, + "step": 9471 + }, + { + "epoch": 0.99915611814346, + "grad_norm": 0.6469061374664307, + "learning_rate": 4.202058107305451e-09, + "loss": 1.2615, + "step": 9472 + }, + { + "epoch": 0.9992616033755274, + "grad_norm": 0.6486461758613586, + "learning_rate": 3.403667670792698e-09, + "loss": 1.2998, + "step": 9473 + }, + { + "epoch": 0.9993670886075949, + "grad_norm": 0.6250303983688354, + "learning_rate": 2.689318092718995e-09, + "loss": 1.3034, + "step": 9474 + }, + { + "epoch": 0.9994725738396625, + "grad_norm": 0.6263360977172852, + "learning_rate": 2.059009453103666e-09, + "loss": 1.2839, + "step": 9475 + }, + { + "epoch": 0.9995780590717299, + "grad_norm": 0.6649500131607056, + "learning_rate": 1.5127418226401623e-09, + "loss": 1.2814, + "step": 9476 + }, + { + "epoch": 0.9996835443037975, + "grad_norm": 0.6585116386413574, + "learning_rate": 1.0505152625295278e-09, + "loss": 1.2841, + "step": 9477 + }, + { + "epoch": 0.999789029535865, + "grad_norm": 0.6414634585380554, + "learning_rate": 6.723298245636666e-10, + "loss": 1.3098, + "step": 9478 + }, + { + "epoch": 0.9998945147679325, + "grad_norm": 0.6560307145118713, + "learning_rate": 3.781855510420762e-10, + "loss": 1.2884, + "step": 9479 + }, + { + "epoch": 1.0, + "grad_norm": 1.8646347522735596, + "learning_rate": 1.6808247493838026e-10, + "loss": 1.239, + "step": 9480 + } + ], + "logging_steps": 1, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.832308198648013e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-llama-cosine/checkpoint-9480/training_args.bin b/saves-llama-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff2474f884cfde4d0a172d7dcc8b1902908a2b74 --- /dev/null +++ b/saves-llama-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70bc143c989a24bde4123158c92fd3dd5c721ab6a8986ba7ee409d5df94683b6 +size 5176 diff --git a/saves-llama-cosine/config.json b/saves-llama-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d241c1935481613c5259df93a97d2cacb314defb --- /dev/null +++ b/saves-llama-cosine/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-llama-cosine/generation_config.json b/saves-llama-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-llama-cosine/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-llama-cosine/model.safetensors b/saves-llama-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d4dbb9e44bb90fe03de5cf81edd5f8613b69244 --- /dev/null +++ b/saves-llama-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de33d4bf5107f5e47dc96c97470aad1b526688636f37d68afb095cf11e6613ea +size 8346712 diff --git a/saves-llama-cosine/result.log b/saves-llama-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..7a85354c21a9e5276a0099e5b81fc956402d298b --- /dev/null +++ b/saves-llama-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 2015.1004, 'train_samples_per_second': 4816.933, 'train_steps_per_second': 4.704, 'train_loss': 1.5626397716848157, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-llama-cosine/special_tokens_map.json b/saves-llama-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-llama-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-llama-cosine/tokenizer.json b/saves-llama-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-llama-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-llama-cosine/tokenizer_config.json b/saves-llama-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-llama-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-llama/checkpoint-9480/config.json b/saves-llama/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d241c1935481613c5259df93a97d2cacb314defb --- /dev/null +++ b/saves-llama/checkpoint-9480/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-llama/checkpoint-9480/generation_config.json b/saves-llama/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-llama/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-llama/checkpoint-9480/model.safetensors b/saves-llama/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ce218d499446a2f18b5c96f827124e4f6a26868 --- /dev/null +++ b/saves-llama/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23d8cb4fb2fa20d2a09ab95512bb7b9a4bc4ade52e7db7d83a476f8391aa6034 +size 8346712 diff --git a/saves-llama/checkpoint-9480/optimizer.pt b/saves-llama/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f0554b8431ab4af7f1323395f6efc2892f17099 --- /dev/null +++ b/saves-llama/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83850cbd4b18f864a5d04779274a53a9941b9983753bd2a7f34c393bddf1372d +size 16706402 diff --git a/saves-llama/checkpoint-9480/rng_state.pth b/saves-llama/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-llama/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-llama/checkpoint-9480/scheduler.pt b/saves-llama/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4e146fb9369424bca1e920276a86162b00d56fd --- /dev/null +++ b/saves-llama/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c33e6451a8a4598628b3479890d40774857cdcb0d8604c19f1bee5bdefe1e2f9 +size 1064 diff --git a/saves-llama/checkpoint-9480/special_tokens_map.json b/saves-llama/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-llama/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-llama/checkpoint-9480/tokenizer.json b/saves-llama/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-llama/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-llama/checkpoint-9480/tokenizer_config.json b/saves-llama/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-llama/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-llama/checkpoint-9480/trainer_state.json b/saves-llama/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..092473f8c82065a051e351af7ab3d02d05c6da84 --- /dev/null +++ b/saves-llama/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2895276546478271, + "learning_rate": 0.00015822784810126583, + "loss": 7.517, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1934658288955688, + "learning_rate": 0.00031645569620253165, + "loss": 6.9161, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8518206477165222, + "learning_rate": 0.00047468354430379745, + "loss": 6.2601, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.8079057931900024, + "learning_rate": 0.0006329113924050633, + "loss": 5.7759, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.9002494812011719, + "learning_rate": 0.0007911392405063291, + "loss": 5.312, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.8760701417922974, + "learning_rate": 0.0009493670886075949, + "loss": 4.7974, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.9227352738380432, + "learning_rate": 0.0011075949367088608, + "loss": 4.3956, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.3695945739746094, + "learning_rate": 0.0012658227848101266, + "loss": 4.1306, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.852247953414917, + "learning_rate": 0.0014240506329113926, + "loss": 3.9224, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.1712802648544312, + "learning_rate": 0.0015, + "loss": 3.7704, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.7504526972770691, + "learning_rate": 0.0015, + "loss": 3.6178, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 1.0794694423675537, + "learning_rate": 0.0015, + "loss": 3.5071, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.5889136791229248, + "learning_rate": 0.0015, + "loss": 3.3954, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.0176206827163696, + "learning_rate": 0.0015, + "loss": 3.2981, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.9184654951095581, + "learning_rate": 0.0015, + "loss": 3.2218, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.7661342024803162, + "learning_rate": 0.0015, + "loss": 3.1579, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.7326615452766418, + "learning_rate": 0.0015, + "loss": 3.0868, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.7881671190261841, + "learning_rate": 0.0015, + "loss": 3.0431, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.7601766586303711, + "learning_rate": 0.0015, + "loss": 2.9836, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.8240087628364563, + "learning_rate": 0.0015, + "loss": 2.9313, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.8446990251541138, + "learning_rate": 0.0015, + "loss": 2.8977, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.8233978748321533, + "learning_rate": 0.0015, + "loss": 2.8663, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.665172815322876, + "learning_rate": 0.0015, + "loss": 2.8137, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 1.2146613597869873, + "learning_rate": 0.0015, + "loss": 2.7762, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.7693014740943909, + "learning_rate": 0.0015, + "loss": 2.7423, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.6787528991699219, + "learning_rate": 0.0015, + "loss": 2.7056, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 1.0836946964263916, + "learning_rate": 0.0015, + "loss": 2.6826, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 1.054208517074585, + "learning_rate": 0.0015, + "loss": 2.6498, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.8280333876609802, + "learning_rate": 0.0015, + "loss": 2.6239, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.9097690582275391, + "learning_rate": 0.0015, + "loss": 2.5936, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.7872275114059448, + "learning_rate": 0.0015, + "loss": 2.5714, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.867725670337677, + "learning_rate": 0.0015, + "loss": 2.541, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 1.0791499614715576, + "learning_rate": 0.0015, + "loss": 2.5345, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.8318942189216614, + "learning_rate": 0.0015, + "loss": 2.5054, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.8537107706069946, + "learning_rate": 0.0015, + "loss": 2.4696, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.8908383846282959, + "learning_rate": 0.0015, + "loss": 2.4476, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.9335813522338867, + "learning_rate": 0.0015, + "loss": 2.436, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.9983330368995667, + "learning_rate": 0.0015, + "loss": 2.4292, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.0786769390106201, + "learning_rate": 0.0015, + "loss": 2.4068, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 1.0811774730682373, + "learning_rate": 0.0015, + "loss": 2.3768, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8649297952651978, + "learning_rate": 0.0015, + "loss": 2.3678, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.7537570595741272, + "learning_rate": 0.0015, + "loss": 2.3521, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.926679790019989, + "learning_rate": 0.0015, + "loss": 2.3297, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.7216717004776001, + "learning_rate": 0.0015, + "loss": 2.3084, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 1.0042897462844849, + "learning_rate": 0.0015, + "loss": 2.3136, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.8097466826438904, + "learning_rate": 0.0015, + "loss": 2.2885, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.8045905232429504, + "learning_rate": 0.0015, + "loss": 2.2683, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.7282854914665222, + "learning_rate": 0.0015, + "loss": 2.2647, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.7544787526130676, + "learning_rate": 0.0015, + "loss": 2.2406, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.8302231431007385, + "learning_rate": 0.0015, + "loss": 2.2308, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.8412373661994934, + "learning_rate": 0.0015, + "loss": 2.2221, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 1.0343395471572876, + "learning_rate": 0.0015, + "loss": 2.2048, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.7773603200912476, + "learning_rate": 0.0015, + "loss": 2.1894, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 1.1590663194656372, + "learning_rate": 0.0015, + "loss": 2.1731, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.8059956431388855, + "learning_rate": 0.0015, + "loss": 2.1756, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.8376686573028564, + "learning_rate": 0.0015, + "loss": 2.1363, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.7209805846214294, + "learning_rate": 0.0015, + "loss": 2.1488, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.020957112312317, + "learning_rate": 0.0015, + "loss": 2.143, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.791378915309906, + "learning_rate": 0.0015, + "loss": 2.1135, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.7916855216026306, + "learning_rate": 0.0015, + "loss": 2.0994, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.8433970212936401, + "learning_rate": 0.0015, + "loss": 2.1023, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.7047837972640991, + "learning_rate": 0.0015, + "loss": 2.087, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.7030632495880127, + "learning_rate": 0.0015, + "loss": 2.0761, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.8371254801750183, + "learning_rate": 0.0015, + "loss": 2.0831, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.8732612729072571, + "learning_rate": 0.0015, + "loss": 2.0722, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 1.0649760961532593, + "learning_rate": 0.0015, + "loss": 2.0583, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.8621139526367188, + "learning_rate": 0.0015, + "loss": 2.0427, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.7794877290725708, + "learning_rate": 0.0015, + "loss": 2.0327, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 1.0803486108779907, + "learning_rate": 0.0015, + "loss": 2.0538, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.839363694190979, + "learning_rate": 0.0015, + "loss": 2.034, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.7595516443252563, + "learning_rate": 0.0015, + "loss": 2.0166, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.9506072998046875, + "learning_rate": 0.0015, + "loss": 1.9997, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 1.1804136037826538, + "learning_rate": 0.0015, + "loss": 1.9879, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.8969766497612, + "learning_rate": 0.0015, + "loss": 2.0016, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.8268862962722778, + "learning_rate": 0.0015, + "loss": 1.9804, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 1.1649792194366455, + "learning_rate": 0.0015, + "loss": 1.9767, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 1.0339699983596802, + "learning_rate": 0.0015, + "loss": 1.9721, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.8496246933937073, + "learning_rate": 0.0015, + "loss": 1.968, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 1.0010546445846558, + "learning_rate": 0.0015, + "loss": 1.9687, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.9121392965316772, + "learning_rate": 0.0015, + "loss": 1.9478, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.7203763127326965, + "learning_rate": 0.0015, + "loss": 1.9338, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.8383666276931763, + "learning_rate": 0.0015, + "loss": 1.9452, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.971264123916626, + "learning_rate": 0.0015, + "loss": 1.9576, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.7471380829811096, + "learning_rate": 0.0015, + "loss": 1.921, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.7995803952217102, + "learning_rate": 0.0015, + "loss": 1.9197, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.6843324899673462, + "learning_rate": 0.0015, + "loss": 1.9173, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.7582018375396729, + "learning_rate": 0.0015, + "loss": 1.92, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.6837054491043091, + "learning_rate": 0.0015, + "loss": 1.9105, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 1.2546217441558838, + "learning_rate": 0.0015, + "loss": 1.9047, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.8935483694076538, + "learning_rate": 0.0015, + "loss": 1.8991, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.9508107900619507, + "learning_rate": 0.0015, + "loss": 1.9049, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.8027315139770508, + "learning_rate": 0.0015, + "loss": 1.895, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.8933790922164917, + "learning_rate": 0.0015, + "loss": 1.8736, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.7129293084144592, + "learning_rate": 0.0015, + "loss": 1.8881, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.6845495700836182, + "learning_rate": 0.0015, + "loss": 1.8655, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.8358139395713806, + "learning_rate": 0.0015, + "loss": 1.8765, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.7595527768135071, + "learning_rate": 0.0015, + "loss": 1.8794, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 1.3538849353790283, + "learning_rate": 0.0015, + "loss": 1.8578, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.8079885840415955, + "learning_rate": 0.0015, + "loss": 1.8542, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.6989109516143799, + "learning_rate": 0.0015, + "loss": 1.8611, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.6828711628913879, + "learning_rate": 0.0015, + "loss": 1.852, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 1.3393311500549316, + "learning_rate": 0.0015, + "loss": 1.8454, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.8985198140144348, + "learning_rate": 0.0015, + "loss": 1.8538, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.7504292726516724, + "learning_rate": 0.0015, + "loss": 1.8381, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.3631010055541992, + "learning_rate": 0.0015, + "loss": 1.8438, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.7131859660148621, + "learning_rate": 0.0015, + "loss": 1.8301, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.6771515011787415, + "learning_rate": 0.0015, + "loss": 1.8174, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.7526267170906067, + "learning_rate": 0.0015, + "loss": 1.8176, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 1.0048303604125977, + "learning_rate": 0.0015, + "loss": 1.8288, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.8215150237083435, + "learning_rate": 0.0015, + "loss": 1.8159, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.6539852619171143, + "learning_rate": 0.0015, + "loss": 1.817, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.891223669052124, + "learning_rate": 0.0015, + "loss": 1.8109, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.965599775314331, + "learning_rate": 0.0015, + "loss": 1.7988, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.9836862087249756, + "learning_rate": 0.0015, + "loss": 1.813, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.464642882347107, + "learning_rate": 0.0015, + "loss": 1.7969, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 1.1968554258346558, + "learning_rate": 0.0015, + "loss": 1.8133, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.8986308574676514, + "learning_rate": 0.0015, + "loss": 1.7902, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.793096661567688, + "learning_rate": 0.0015, + "loss": 1.7777, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.7369785308837891, + "learning_rate": 0.0015, + "loss": 1.7791, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.7001211047172546, + "learning_rate": 0.0015, + "loss": 1.7933, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.7702583074569702, + "learning_rate": 0.0015, + "loss": 1.7857, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6666435599327087, + "learning_rate": 0.0015, + "loss": 1.7676, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.8952589631080627, + "learning_rate": 0.0015, + "loss": 1.7612, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.8088576197624207, + "learning_rate": 0.0015, + "loss": 1.7765, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.727730929851532, + "learning_rate": 0.0015, + "loss": 1.7782, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 1.0173707008361816, + "learning_rate": 0.0015, + "loss": 1.7639, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.8644598126411438, + "learning_rate": 0.0015, + "loss": 1.7598, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.7914327383041382, + "learning_rate": 0.0015, + "loss": 1.7724, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.9528142213821411, + "learning_rate": 0.0015, + "loss": 1.7609, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 1.3115551471710205, + "learning_rate": 0.0015, + "loss": 1.7522, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.6554073095321655, + "learning_rate": 0.0015, + "loss": 1.7559, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 1.0573222637176514, + "learning_rate": 0.0015, + "loss": 1.7536, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.0416539907455444, + "learning_rate": 0.0015, + "loss": 1.7445, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.8454880118370056, + "learning_rate": 0.0015, + "loss": 1.7461, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.6951704621315002, + "learning_rate": 0.0015, + "loss": 1.7546, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.7804940938949585, + "learning_rate": 0.0015, + "loss": 1.7406, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.8311479091644287, + "learning_rate": 0.0015, + "loss": 1.7398, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.6383234858512878, + "learning_rate": 0.0015, + "loss": 1.7301, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.9428786635398865, + "learning_rate": 0.0015, + "loss": 1.7417, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.6423138380050659, + "learning_rate": 0.0015, + "loss": 1.7234, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.7015560269355774, + "learning_rate": 0.0015, + "loss": 1.7239, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.8056492805480957, + "learning_rate": 0.0015, + "loss": 1.7325, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.8110372424125671, + "learning_rate": 0.0015, + "loss": 1.7259, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.6866753101348877, + "learning_rate": 0.0015, + "loss": 1.723, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.7652639150619507, + "learning_rate": 0.0015, + "loss": 1.7205, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.6864675879478455, + "learning_rate": 0.0015, + "loss": 1.7119, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 1.2522108554840088, + "learning_rate": 0.0015, + "loss": 1.7139, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 1.1031931638717651, + "learning_rate": 0.0015, + "loss": 1.7194, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 1.0775736570358276, + "learning_rate": 0.0015, + "loss": 1.7151, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.64118891954422, + "learning_rate": 0.0015, + "loss": 1.7085, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.6656480431556702, + "learning_rate": 0.0015, + "loss": 1.7086, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.6896774172782898, + "learning_rate": 0.0015, + "loss": 1.7112, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.8207387924194336, + "learning_rate": 0.0015, + "loss": 1.6972, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6354748010635376, + "learning_rate": 0.0015, + "loss": 1.6883, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.8455018997192383, + "learning_rate": 0.0015, + "loss": 1.688, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.7910233736038208, + "learning_rate": 0.0015, + "loss": 1.6912, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.5973400473594666, + "learning_rate": 0.0015, + "loss": 1.6839, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.652214765548706, + "learning_rate": 0.0015, + "loss": 1.6958, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 1.1195935010910034, + "learning_rate": 0.0015, + "loss": 1.6929, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.759602963924408, + "learning_rate": 0.0015, + "loss": 1.6955, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.6735444068908691, + "learning_rate": 0.0015, + "loss": 1.6936, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.632819652557373, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.7378846406936646, + "learning_rate": 0.0015, + "loss": 1.6745, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.7184489965438843, + "learning_rate": 0.0015, + "loss": 1.6778, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.8162944316864014, + "learning_rate": 0.0015, + "loss": 1.6696, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.664391040802002, + "learning_rate": 0.0015, + "loss": 1.6735, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.6707069873809814, + "learning_rate": 0.0015, + "loss": 1.6806, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.696792483329773, + "learning_rate": 0.0015, + "loss": 1.6684, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 1.604297399520874, + "learning_rate": 0.0015, + "loss": 1.6746, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.8932088017463684, + "learning_rate": 0.0015, + "loss": 1.6749, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.8063291907310486, + "learning_rate": 0.0015, + "loss": 1.6467, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.7670196890830994, + "learning_rate": 0.0015, + "loss": 1.6502, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.6955190300941467, + "learning_rate": 0.0015, + "loss": 1.6571, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.6111567616462708, + "learning_rate": 0.0015, + "loss": 1.6594, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.6208847165107727, + "learning_rate": 0.0015, + "loss": 1.6808, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.6162334084510803, + "learning_rate": 0.0015, + "loss": 1.6614, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.7094123959541321, + "learning_rate": 0.0015, + "loss": 1.6642, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.757205069065094, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 1.2804656028747559, + "learning_rate": 0.0015, + "loss": 1.6646, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 1.031036615371704, + "learning_rate": 0.0015, + "loss": 1.6636, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.6760911345481873, + "learning_rate": 0.0015, + "loss": 1.6413, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.7321951389312744, + "learning_rate": 0.0015, + "loss": 1.6417, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.6221210360527039, + "learning_rate": 0.0015, + "loss": 1.6415, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.6461858153343201, + "learning_rate": 0.0015, + "loss": 1.644, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.8800684809684753, + "learning_rate": 0.0015, + "loss": 1.6467, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.7143259048461914, + "learning_rate": 0.0015, + "loss": 1.6529, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.7958574891090393, + "learning_rate": 0.0015, + "loss": 1.6512, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.6521611213684082, + "learning_rate": 0.0015, + "loss": 1.6373, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.5893529653549194, + "learning_rate": 0.0015, + "loss": 1.6418, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 1.071047067642212, + "learning_rate": 0.0015, + "loss": 1.6403, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.9781458973884583, + "learning_rate": 0.0015, + "loss": 1.6401, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.7105679512023926, + "learning_rate": 0.0015, + "loss": 1.6377, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.8399671316146851, + "learning_rate": 0.0015, + "loss": 1.6377, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.6633486747741699, + "learning_rate": 0.0015, + "loss": 1.63, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.8309907913208008, + "learning_rate": 0.0015, + "loss": 1.6264, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.689829409122467, + "learning_rate": 0.0015, + "loss": 1.6295, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.5847307443618774, + "learning_rate": 0.0015, + "loss": 1.6429, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.5803424715995789, + "learning_rate": 0.0015, + "loss": 1.6234, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.6408569812774658, + "learning_rate": 0.0015, + "loss": 1.6221, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.6490920782089233, + "learning_rate": 0.0015, + "loss": 1.6195, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.9579010605812073, + "learning_rate": 0.0015, + "loss": 1.6264, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.9444113969802856, + "learning_rate": 0.0015, + "loss": 1.6383, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 1.0114694833755493, + "learning_rate": 0.0015, + "loss": 1.6353, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.6270862221717834, + "learning_rate": 0.0015, + "loss": 1.6189, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.7058157324790955, + "learning_rate": 0.0015, + "loss": 1.6206, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.6812050342559814, + "learning_rate": 0.0015, + "loss": 1.6127, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.6866752505302429, + "learning_rate": 0.0015, + "loss": 1.6182, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.7980395555496216, + "learning_rate": 0.0015, + "loss": 1.6201, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.5818979740142822, + "learning_rate": 0.0015, + "loss": 1.6097, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.6750056743621826, + "learning_rate": 0.0015, + "loss": 1.6152, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 1.0068755149841309, + "learning_rate": 0.0015, + "loss": 1.6098, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.7054839134216309, + "learning_rate": 0.0015, + "loss": 1.6061, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.8757228255271912, + "learning_rate": 0.0015, + "loss": 1.612, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6026748418807983, + "learning_rate": 0.0015, + "loss": 1.6014, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.8974719643592834, + "learning_rate": 0.0015, + "loss": 1.6121, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.8309603929519653, + "learning_rate": 0.0015, + "loss": 1.6228, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.649837076663971, + "learning_rate": 0.0015, + "loss": 1.6106, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.6219896078109741, + "learning_rate": 0.0015, + "loss": 1.5917, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.6073859333992004, + "learning_rate": 0.0015, + "loss": 1.6079, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.8608683347702026, + "learning_rate": 0.0015, + "loss": 1.6086, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.6667501330375671, + "learning_rate": 0.0015, + "loss": 1.5948, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.7498992085456848, + "learning_rate": 0.0015, + "loss": 1.6081, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.9965426921844482, + "learning_rate": 0.0015, + "loss": 1.6008, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.7309927344322205, + "learning_rate": 0.0015, + "loss": 1.6001, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.7164555788040161, + "learning_rate": 0.0015, + "loss": 1.5945, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.6513476371765137, + "learning_rate": 0.0015, + "loss": 1.5855, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.6131182312965393, + "learning_rate": 0.0015, + "loss": 1.6185, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.9695400595664978, + "learning_rate": 0.0015, + "loss": 1.6028, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.8890817761421204, + "learning_rate": 0.0015, + "loss": 1.5952, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 1.3072495460510254, + "learning_rate": 0.0015, + "loss": 1.596, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.832031786441803, + "learning_rate": 0.0015, + "loss": 1.5854, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.7027363777160645, + "learning_rate": 0.0015, + "loss": 1.5973, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.7463565468788147, + "learning_rate": 0.0015, + "loss": 1.5812, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.8284775614738464, + "learning_rate": 0.0015, + "loss": 1.576, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.5679264664649963, + "learning_rate": 0.0015, + "loss": 1.5785, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.7692309021949768, + "learning_rate": 0.0015, + "loss": 1.5884, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.7318735718727112, + "learning_rate": 0.0015, + "loss": 1.5868, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.6649794578552246, + "learning_rate": 0.0015, + "loss": 1.5831, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.7323163747787476, + "learning_rate": 0.0015, + "loss": 1.5833, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6826845407485962, + "learning_rate": 0.0015, + "loss": 1.5846, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.7481759190559387, + "learning_rate": 0.0015, + "loss": 1.5869, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.6638614535331726, + "learning_rate": 0.0015, + "loss": 1.5798, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 1.000096082687378, + "learning_rate": 0.0015, + "loss": 1.5772, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.7527396082878113, + "learning_rate": 0.0015, + "loss": 1.588, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.7518728375434875, + "learning_rate": 0.0015, + "loss": 1.5728, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.6301272511482239, + "learning_rate": 0.0015, + "loss": 1.5891, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.93257737159729, + "learning_rate": 0.0015, + "loss": 1.5776, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.9046885967254639, + "learning_rate": 0.0015, + "loss": 1.586, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.5742714405059814, + "learning_rate": 0.0015, + "loss": 1.5693, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.7036783695220947, + "learning_rate": 0.0015, + "loss": 1.5742, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.5699503421783447, + "learning_rate": 0.0015, + "loss": 1.5751, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.9667662382125854, + "learning_rate": 0.0015, + "loss": 1.5781, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.7388290762901306, + "learning_rate": 0.0015, + "loss": 1.5693, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 1.6020649671554565, + "learning_rate": 0.0015, + "loss": 1.5778, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.9777061939239502, + "learning_rate": 0.0015, + "loss": 1.5772, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.6025921702384949, + "learning_rate": 0.0015, + "loss": 1.5709, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.7348845601081848, + "learning_rate": 0.0015, + "loss": 1.5691, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.6214437484741211, + "learning_rate": 0.0015, + "loss": 1.5676, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6454372406005859, + "learning_rate": 0.0015, + "loss": 1.5748, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.6785824298858643, + "learning_rate": 0.0015, + "loss": 1.5735, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.6077165603637695, + "learning_rate": 0.0015, + "loss": 1.5724, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.8761322498321533, + "learning_rate": 0.0015, + "loss": 1.5711, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.6504769921302795, + "learning_rate": 0.0015, + "loss": 1.5601, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.579383909702301, + "learning_rate": 0.0015, + "loss": 1.5752, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.7237796783447266, + "learning_rate": 0.0015, + "loss": 1.5657, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.694334089756012, + "learning_rate": 0.0015, + "loss": 1.558, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.5767725110054016, + "learning_rate": 0.0015, + "loss": 1.5589, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.5653151869773865, + "learning_rate": 0.0015, + "loss": 1.5609, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.6030099987983704, + "learning_rate": 0.0015, + "loss": 1.5621, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.7564494609832764, + "learning_rate": 0.0015, + "loss": 1.5556, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.613181471824646, + "learning_rate": 0.0015, + "loss": 1.5609, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.6615495681762695, + "learning_rate": 0.0015, + "loss": 1.5642, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.624372661113739, + "learning_rate": 0.0015, + "loss": 1.5418, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.9245483875274658, + "learning_rate": 0.0015, + "loss": 1.5589, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.8437123894691467, + "learning_rate": 0.0015, + "loss": 1.563, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.917031466960907, + "learning_rate": 0.0015, + "loss": 1.5701, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.5686878561973572, + "learning_rate": 0.0015, + "loss": 1.5614, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.6746931076049805, + "learning_rate": 0.0015, + "loss": 1.5585, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.698357105255127, + "learning_rate": 0.0015, + "loss": 1.5576, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.7661951184272766, + "learning_rate": 0.0015, + "loss": 1.5555, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.5810350179672241, + "learning_rate": 0.0015, + "loss": 1.5572, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.6435417532920837, + "learning_rate": 0.0015, + "loss": 1.5624, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.8342466950416565, + "learning_rate": 0.0015, + "loss": 1.5397, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.9760372638702393, + "learning_rate": 0.0015, + "loss": 1.5461, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.9893860816955566, + "learning_rate": 0.0015, + "loss": 1.5463, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.9626775979995728, + "learning_rate": 0.0015, + "loss": 1.5523, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.6983375549316406, + "learning_rate": 0.0015, + "loss": 1.549, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.6338869333267212, + "learning_rate": 0.0015, + "loss": 1.539, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.6026023626327515, + "learning_rate": 0.0015, + "loss": 1.5422, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.6701565384864807, + "learning_rate": 0.0015, + "loss": 1.5504, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.6777899265289307, + "learning_rate": 0.0015, + "loss": 1.5476, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.7234910130500793, + "learning_rate": 0.0015, + "loss": 1.5449, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.5603019595146179, + "learning_rate": 0.0015, + "loss": 1.5383, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.6655024290084839, + "learning_rate": 0.0015, + "loss": 1.547, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.8914442658424377, + "learning_rate": 0.0015, + "loss": 1.5568, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.676430881023407, + "learning_rate": 0.0015, + "loss": 1.5438, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.7790401577949524, + "learning_rate": 0.0015, + "loss": 1.528, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.6461721658706665, + "learning_rate": 0.0015, + "loss": 1.5297, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.6958960890769958, + "learning_rate": 0.0015, + "loss": 1.5484, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.5840914249420166, + "learning_rate": 0.0015, + "loss": 1.5294, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.641467273235321, + "learning_rate": 0.0015, + "loss": 1.5399, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.6739155650138855, + "learning_rate": 0.0015, + "loss": 1.5402, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.5983861684799194, + "learning_rate": 0.0015, + "loss": 1.5284, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.6415347456932068, + "learning_rate": 0.0015, + "loss": 1.5337, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.5555444359779358, + "learning_rate": 0.0015, + "loss": 1.5322, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.6042733788490295, + "learning_rate": 0.0015, + "loss": 1.5398, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5883864760398865, + "learning_rate": 0.0015, + "loss": 1.5334, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.5852831602096558, + "learning_rate": 0.0015, + "loss": 1.5243, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.6446427702903748, + "learning_rate": 0.0015, + "loss": 1.5401, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.7933229207992554, + "learning_rate": 0.0015, + "loss": 1.5255, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.6084532141685486, + "learning_rate": 0.0015, + "loss": 1.5316, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.5957900881767273, + "learning_rate": 0.0015, + "loss": 1.5329, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.5894820094108582, + "learning_rate": 0.0015, + "loss": 1.545, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.7152115106582642, + "learning_rate": 0.0015, + "loss": 1.5219, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.5920620560646057, + "learning_rate": 0.0015, + "loss": 1.5301, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.559782087802887, + "learning_rate": 0.0015, + "loss": 1.5301, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.6018707156181335, + "learning_rate": 0.0015, + "loss": 1.5341, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.6770259737968445, + "learning_rate": 0.0015, + "loss": 1.5295, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.636032223701477, + "learning_rate": 0.0015, + "loss": 1.5308, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.8031036853790283, + "learning_rate": 0.0015, + "loss": 1.5216, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.768401563167572, + "learning_rate": 0.0015, + "loss": 1.5342, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.7709994316101074, + "learning_rate": 0.0015, + "loss": 1.5267, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.7158469557762146, + "learning_rate": 0.0015, + "loss": 1.5224, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.6034460663795471, + "learning_rate": 0.0015, + "loss": 1.5013, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.6172865629196167, + "learning_rate": 0.0015, + "loss": 1.5299, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5750550031661987, + "learning_rate": 0.0015, + "loss": 1.5171, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.6256264448165894, + "learning_rate": 0.0015, + "loss": 1.5346, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.7018879055976868, + "learning_rate": 0.0015, + "loss": 1.5344, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.6707902550697327, + "learning_rate": 0.0015, + "loss": 1.5163, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.6164948344230652, + "learning_rate": 0.0015, + "loss": 1.5261, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.6536199450492859, + "learning_rate": 0.0015, + "loss": 1.5256, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.5821692943572998, + "learning_rate": 0.0015, + "loss": 1.5302, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.5878994464874268, + "learning_rate": 0.0015, + "loss": 1.5179, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.6048735976219177, + "learning_rate": 0.0015, + "loss": 1.5198, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.5959447026252747, + "learning_rate": 0.0015, + "loss": 1.5133, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.666780412197113, + "learning_rate": 0.0015, + "loss": 1.5154, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.5980917811393738, + "learning_rate": 0.0015, + "loss": 1.5214, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.6130801439285278, + "learning_rate": 0.0015, + "loss": 1.518, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.7602121829986572, + "learning_rate": 0.0015, + "loss": 1.512, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.5873205661773682, + "learning_rate": 0.0015, + "loss": 1.5219, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6906177997589111, + "learning_rate": 0.0015, + "loss": 1.5183, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 1.0668418407440186, + "learning_rate": 0.0015, + "loss": 1.5, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.7267111539840698, + "learning_rate": 0.0015, + "loss": 1.5165, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.5980613827705383, + "learning_rate": 0.0015, + "loss": 1.5143, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.7561492919921875, + "learning_rate": 0.0015, + "loss": 1.5102, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.9539973139762878, + "learning_rate": 0.0015, + "loss": 1.5069, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.607132077217102, + "learning_rate": 0.0015, + "loss": 1.5191, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.6048598289489746, + "learning_rate": 0.0015, + "loss": 1.5232, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.6812525391578674, + "learning_rate": 0.0015, + "loss": 1.5164, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.6144943237304688, + "learning_rate": 0.0015, + "loss": 1.4973, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.586285412311554, + "learning_rate": 0.0015, + "loss": 1.5191, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.6103049516677856, + "learning_rate": 0.0015, + "loss": 1.5153, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.7319056391716003, + "learning_rate": 0.0015, + "loss": 1.5249, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 1.117719292640686, + "learning_rate": 0.0015, + "loss": 1.5211, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.8086877465248108, + "learning_rate": 0.0015, + "loss": 1.5085, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.6862393021583557, + "learning_rate": 0.0015, + "loss": 1.5064, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.5833669304847717, + "learning_rate": 0.0015, + "loss": 1.5024, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.5645660161972046, + "learning_rate": 0.0015, + "loss": 1.5051, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.669995129108429, + "learning_rate": 0.0015, + "loss": 1.5135, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.7627750635147095, + "learning_rate": 0.0015, + "loss": 1.4987, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.6655398607254028, + "learning_rate": 0.0015, + "loss": 1.504, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.6024653911590576, + "learning_rate": 0.0015, + "loss": 1.5036, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.7302735447883606, + "learning_rate": 0.0015, + "loss": 1.5056, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.7307831645011902, + "learning_rate": 0.0015, + "loss": 1.5032, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.7583593130111694, + "learning_rate": 0.0015, + "loss": 1.5139, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.97417813539505, + "learning_rate": 0.0015, + "loss": 1.5061, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.6113054752349854, + "learning_rate": 0.0015, + "loss": 1.5012, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.7671633362770081, + "learning_rate": 0.0015, + "loss": 1.5051, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.7023954391479492, + "learning_rate": 0.0015, + "loss": 1.5124, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.8504228591918945, + "learning_rate": 0.0015, + "loss": 1.5165, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.5729485750198364, + "learning_rate": 0.0015, + "loss": 1.503, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.5483493804931641, + "learning_rate": 0.0015, + "loss": 1.4809, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.610343337059021, + "learning_rate": 0.0015, + "loss": 1.5009, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.6620327234268188, + "learning_rate": 0.0015, + "loss": 1.4999, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.7655419111251831, + "learning_rate": 0.0015, + "loss": 1.5091, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 1.024680256843567, + "learning_rate": 0.0015, + "loss": 1.4957, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.6346902847290039, + "learning_rate": 0.0015, + "loss": 1.5109, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5585358738899231, + "learning_rate": 0.0015, + "loss": 1.4933, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.6738066077232361, + "learning_rate": 0.0015, + "loss": 1.4997, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.8015758991241455, + "learning_rate": 0.0015, + "loss": 1.5062, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.5939581990242004, + "learning_rate": 0.0015, + "loss": 1.4959, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5677154660224915, + "learning_rate": 0.0015, + "loss": 1.4867, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.5126594305038452, + "learning_rate": 0.0015, + "loss": 1.4902, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.569953203201294, + "learning_rate": 0.0015, + "loss": 1.5075, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.6603114604949951, + "learning_rate": 0.0015, + "loss": 1.4899, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.755079448223114, + "learning_rate": 0.0015, + "loss": 1.5021, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.7609731554985046, + "learning_rate": 0.0015, + "loss": 1.4919, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.5657498240470886, + "learning_rate": 0.0015, + "loss": 1.4869, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.6761792302131653, + "learning_rate": 0.0015, + "loss": 1.4867, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.7019993662834167, + "learning_rate": 0.0015, + "loss": 1.4892, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.5951997637748718, + "learning_rate": 0.0015, + "loss": 1.4883, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.5679964423179626, + "learning_rate": 0.0015, + "loss": 1.4842, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.7679352164268494, + "learning_rate": 0.0015, + "loss": 1.4926, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.6341979503631592, + "learning_rate": 0.0015, + "loss": 1.4853, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.7205622792243958, + "learning_rate": 0.0015, + "loss": 1.4923, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.5378568768501282, + "learning_rate": 0.0015, + "loss": 1.4933, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.6372908353805542, + "learning_rate": 0.0015, + "loss": 1.4844, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.5489225387573242, + "learning_rate": 0.0015, + "loss": 1.4922, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.7630021572113037, + "learning_rate": 0.0015, + "loss": 1.4819, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.5850704908370972, + "learning_rate": 0.0015, + "loss": 1.4985, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.5904955267906189, + "learning_rate": 0.0015, + "loss": 1.4842, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.5306342840194702, + "learning_rate": 0.0015, + "loss": 1.4887, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.6891077756881714, + "learning_rate": 0.0015, + "loss": 1.4778, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5779524445533752, + "learning_rate": 0.0015, + "loss": 1.4879, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5809128880500793, + "learning_rate": 0.0015, + "loss": 1.4896, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 1.1024632453918457, + "learning_rate": 0.0015, + "loss": 1.4951, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.7641704678535461, + "learning_rate": 0.0015, + "loss": 1.4893, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.7490102648735046, + "learning_rate": 0.0015, + "loss": 1.497, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.8326114416122437, + "learning_rate": 0.0015, + "loss": 1.4943, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.6151942610740662, + "learning_rate": 0.0015, + "loss": 1.4852, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.8160589933395386, + "learning_rate": 0.0015, + "loss": 1.4764, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.6273179054260254, + "learning_rate": 0.0015, + "loss": 1.4857, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.6272881627082825, + "learning_rate": 0.0015, + "loss": 1.4751, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.7416477203369141, + "learning_rate": 0.0015, + "loss": 1.4773, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.624068021774292, + "learning_rate": 0.0015, + "loss": 1.4845, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.6187600493431091, + "learning_rate": 0.0015, + "loss": 1.4776, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.5595556497573853, + "learning_rate": 0.0015, + "loss": 1.477, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5256155133247375, + "learning_rate": 0.0015, + "loss": 1.4738, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.5809005498886108, + "learning_rate": 0.0015, + "loss": 1.4816, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.6279202699661255, + "learning_rate": 0.0015, + "loss": 1.4841, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.5803406238555908, + "learning_rate": 0.0015, + "loss": 1.4711, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.6868331432342529, + "learning_rate": 0.0015, + "loss": 1.4829, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 1.102671504020691, + "learning_rate": 0.0015, + "loss": 1.4806, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.8444005250930786, + "learning_rate": 0.0015, + "loss": 1.4786, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.596646249294281, + "learning_rate": 0.0015, + "loss": 1.482, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.671666145324707, + "learning_rate": 0.0015, + "loss": 1.4852, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.535335898399353, + "learning_rate": 0.0015, + "loss": 1.4766, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.7683068513870239, + "learning_rate": 0.0015, + "loss": 1.4693, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.9041469097137451, + "learning_rate": 0.0015, + "loss": 1.4826, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.7702934741973877, + "learning_rate": 0.0015, + "loss": 1.4805, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5070308446884155, + "learning_rate": 0.0015, + "loss": 1.4761, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.8397290110588074, + "learning_rate": 0.0015, + "loss": 1.4738, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.62149977684021, + "learning_rate": 0.0015, + "loss": 1.4813, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.7054444551467896, + "learning_rate": 0.0015, + "loss": 1.4729, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.6459339261054993, + "learning_rate": 0.0015, + "loss": 1.4727, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.5699241757392883, + "learning_rate": 0.0015, + "loss": 1.491, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.5244947671890259, + "learning_rate": 0.0015, + "loss": 1.4642, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.5413959622383118, + "learning_rate": 0.0015, + "loss": 1.474, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.61199551820755, + "learning_rate": 0.0015, + "loss": 1.4686, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.5997664332389832, + "learning_rate": 0.0015, + "loss": 1.4746, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.5737345218658447, + "learning_rate": 0.0015, + "loss": 1.4906, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.6178412437438965, + "learning_rate": 0.0015, + "loss": 1.4733, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.6506589651107788, + "learning_rate": 0.0015, + "loss": 1.4786, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.6712284088134766, + "learning_rate": 0.0015, + "loss": 1.4734, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.6803669929504395, + "learning_rate": 0.0015, + "loss": 1.4789, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.521842360496521, + "learning_rate": 0.0015, + "loss": 1.4667, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.5516724586486816, + "learning_rate": 0.0015, + "loss": 1.4779, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.5905670523643494, + "learning_rate": 0.0015, + "loss": 1.4818, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.7067710757255554, + "learning_rate": 0.0015, + "loss": 1.4685, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.6001560091972351, + "learning_rate": 0.0015, + "loss": 1.4676, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.6225304007530212, + "learning_rate": 0.0015, + "loss": 1.4719, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.5559874176979065, + "learning_rate": 0.0015, + "loss": 1.4545, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.6309592723846436, + "learning_rate": 0.0015, + "loss": 1.4736, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.7160559892654419, + "learning_rate": 0.0015, + "loss": 1.4743, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.8767718076705933, + "learning_rate": 0.0015, + "loss": 1.4742, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.7151129245758057, + "learning_rate": 0.0015, + "loss": 1.464, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.9302311539649963, + "learning_rate": 0.0015, + "loss": 1.4478, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.6355986595153809, + "learning_rate": 0.0015, + "loss": 1.4666, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.6565022468566895, + "learning_rate": 0.0015, + "loss": 1.4548, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.6305771470069885, + "learning_rate": 0.0015, + "loss": 1.4732, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.875548779964447, + "learning_rate": 0.0015, + "loss": 1.4776, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.5566856265068054, + "learning_rate": 0.0015, + "loss": 1.4538, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.584717333316803, + "learning_rate": 0.0015, + "loss": 1.471, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.7148219347000122, + "learning_rate": 0.0015, + "loss": 1.4742, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.6339941620826721, + "learning_rate": 0.0015, + "loss": 1.474, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.6222759485244751, + "learning_rate": 0.0015, + "loss": 1.464, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.6205152869224548, + "learning_rate": 0.0015, + "loss": 1.4614, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5359929203987122, + "learning_rate": 0.0015, + "loss": 1.4582, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.6826288104057312, + "learning_rate": 0.0015, + "loss": 1.4538, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.5032361745834351, + "learning_rate": 0.0015, + "loss": 1.4639, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.5172774195671082, + "learning_rate": 0.0015, + "loss": 1.4666, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.8674235939979553, + "learning_rate": 0.0015, + "loss": 1.4621, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.8208064436912537, + "learning_rate": 0.0015, + "loss": 1.46, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.9781603813171387, + "learning_rate": 0.0015, + "loss": 1.4599, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.571770966053009, + "learning_rate": 0.0015, + "loss": 1.4759, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.598699152469635, + "learning_rate": 0.0015, + "loss": 1.4627, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.7230565547943115, + "learning_rate": 0.0015, + "loss": 1.4604, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.5015134215354919, + "learning_rate": 0.0015, + "loss": 1.4722, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.7695246338844299, + "learning_rate": 0.0015, + "loss": 1.4521, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.6588584780693054, + "learning_rate": 0.0015, + "loss": 1.4594, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.5185235738754272, + "learning_rate": 0.0015, + "loss": 1.4598, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.545920193195343, + "learning_rate": 0.0015, + "loss": 1.4773, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.688926637172699, + "learning_rate": 0.0015, + "loss": 1.4607, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.5832346081733704, + "learning_rate": 0.0015, + "loss": 1.4542, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.5630446672439575, + "learning_rate": 0.0015, + "loss": 1.4683, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.6929224133491516, + "learning_rate": 0.0015, + "loss": 1.4565, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.9718674421310425, + "learning_rate": 0.0015, + "loss": 1.4505, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.5524126887321472, + "learning_rate": 0.0015, + "loss": 1.4718, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.5327380895614624, + "learning_rate": 0.0015, + "loss": 1.46, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.6507748365402222, + "learning_rate": 0.0015, + "loss": 1.4669, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.8093947172164917, + "learning_rate": 0.0015, + "loss": 1.4616, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.5995306372642517, + "learning_rate": 0.0015, + "loss": 1.4508, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.6007446050643921, + "learning_rate": 0.0015, + "loss": 1.4641, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.6167561411857605, + "learning_rate": 0.0015, + "loss": 1.4567, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.863328754901886, + "learning_rate": 0.0015, + "loss": 1.4665, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.7674944400787354, + "learning_rate": 0.0015, + "loss": 1.4498, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.5564290285110474, + "learning_rate": 0.0015, + "loss": 1.4544, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.6430163383483887, + "learning_rate": 0.0015, + "loss": 1.4512, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.851726770401001, + "learning_rate": 0.0015, + "loss": 1.454, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.671866238117218, + "learning_rate": 0.0015, + "loss": 1.4654, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.5586315989494324, + "learning_rate": 0.0015, + "loss": 1.4573, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.6230441331863403, + "learning_rate": 0.0015, + "loss": 1.4518, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.5090010166168213, + "learning_rate": 0.0015, + "loss": 1.4352, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5542201995849609, + "learning_rate": 0.0015, + "loss": 1.4629, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.5656949281692505, + "learning_rate": 0.0015, + "loss": 1.4523, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.6557237505912781, + "learning_rate": 0.0015, + "loss": 1.4672, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.7103344202041626, + "learning_rate": 0.0015, + "loss": 1.4506, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.6108971834182739, + "learning_rate": 0.0015, + "loss": 1.4638, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.5387849807739258, + "learning_rate": 0.0015, + "loss": 1.4553, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.7689235806465149, + "learning_rate": 0.0015, + "loss": 1.4404, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.6011940836906433, + "learning_rate": 0.0015, + "loss": 1.4542, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.7255417704582214, + "learning_rate": 0.0015, + "loss": 1.4506, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.578315019607544, + "learning_rate": 0.0015, + "loss": 1.4458, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.560231626033783, + "learning_rate": 0.0015, + "loss": 1.4629, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.6060218811035156, + "learning_rate": 0.0015, + "loss": 1.4599, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5653533339500427, + "learning_rate": 0.0015, + "loss": 1.4549, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5796107053756714, + "learning_rate": 0.0015, + "loss": 1.4537, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.8535887598991394, + "learning_rate": 0.0015, + "loss": 1.4646, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.8061315417289734, + "learning_rate": 0.0015, + "loss": 1.4458, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.8464187979698181, + "learning_rate": 0.0015, + "loss": 1.4581, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.5930094122886658, + "learning_rate": 0.0015, + "loss": 1.4461, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.5810009837150574, + "learning_rate": 0.0015, + "loss": 1.4544, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.588116466999054, + "learning_rate": 0.0015, + "loss": 1.4452, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6483892798423767, + "learning_rate": 0.0015, + "loss": 1.4533, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.6050438284873962, + "learning_rate": 0.0015, + "loss": 1.4438, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.5505537390708923, + "learning_rate": 0.0015, + "loss": 1.4502, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.518280565738678, + "learning_rate": 0.0015, + "loss": 1.4543, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.7972303628921509, + "learning_rate": 0.0015, + "loss": 1.4493, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.5991816520690918, + "learning_rate": 0.0015, + "loss": 1.4502, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.6275891065597534, + "learning_rate": 0.0015, + "loss": 1.4557, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.9002463817596436, + "learning_rate": 0.0015, + "loss": 1.4398, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.6013373732566833, + "learning_rate": 0.0015, + "loss": 1.4444, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.6320314407348633, + "learning_rate": 0.0015, + "loss": 1.4516, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.6931146383285522, + "learning_rate": 0.0015, + "loss": 1.4441, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.5166217088699341, + "learning_rate": 0.0015, + "loss": 1.4379, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.6747555732727051, + "learning_rate": 0.0015, + "loss": 1.4493, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.6006925106048584, + "learning_rate": 0.0015, + "loss": 1.449, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.5262987017631531, + "learning_rate": 0.0015, + "loss": 1.4423, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.5369434356689453, + "learning_rate": 0.0015, + "loss": 1.4412, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.6034254431724548, + "learning_rate": 0.0015, + "loss": 1.4429, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.5636699795722961, + "learning_rate": 0.0015, + "loss": 1.4442, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.4923035800457001, + "learning_rate": 0.0015, + "loss": 1.4403, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.5754071474075317, + "learning_rate": 0.0015, + "loss": 1.4355, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.5764600038528442, + "learning_rate": 0.0015, + "loss": 1.4324, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6578461527824402, + "learning_rate": 0.0015, + "loss": 1.4427, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.6753664016723633, + "learning_rate": 0.0015, + "loss": 1.4454, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.6659565567970276, + "learning_rate": 0.0015, + "loss": 1.4435, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.6190072298049927, + "learning_rate": 0.0015, + "loss": 1.4453, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.4922817349433899, + "learning_rate": 0.0015, + "loss": 1.4528, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.6177582740783691, + "learning_rate": 0.0015, + "loss": 1.4392, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.8453856110572815, + "learning_rate": 0.0015, + "loss": 1.4474, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.5570701956748962, + "learning_rate": 0.0015, + "loss": 1.4367, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.81148761510849, + "learning_rate": 0.0015, + "loss": 1.4338, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.5960834622383118, + "learning_rate": 0.0015, + "loss": 1.4334, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.5733485221862793, + "learning_rate": 0.0015, + "loss": 1.4517, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.529215395450592, + "learning_rate": 0.0015, + "loss": 1.4534, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5399163365364075, + "learning_rate": 0.0015, + "loss": 1.4381, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.5851649045944214, + "learning_rate": 0.0015, + "loss": 1.4391, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.6026334762573242, + "learning_rate": 0.0015, + "loss": 1.4412, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.6509226560592651, + "learning_rate": 0.0015, + "loss": 1.4355, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.5954581499099731, + "learning_rate": 0.0015, + "loss": 1.4416, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.5819922685623169, + "learning_rate": 0.0015, + "loss": 1.4511, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.848669707775116, + "learning_rate": 0.0015, + "loss": 1.4442, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.5753160715103149, + "learning_rate": 0.0015, + "loss": 1.4554, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.5640377402305603, + "learning_rate": 0.0015, + "loss": 1.444, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.6906534433364868, + "learning_rate": 0.0015, + "loss": 1.4425, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6102475523948669, + "learning_rate": 0.0015, + "loss": 1.4484, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.6447153091430664, + "learning_rate": 0.0015, + "loss": 1.4501, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.8873614072799683, + "learning_rate": 0.0015, + "loss": 1.4287, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.585718035697937, + "learning_rate": 0.0015, + "loss": 1.4338, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.7442241311073303, + "learning_rate": 0.0015, + "loss": 1.4366, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5508397817611694, + "learning_rate": 0.0015, + "loss": 1.4415, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.7039012312889099, + "learning_rate": 0.0015, + "loss": 1.4382, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.5600535869598389, + "learning_rate": 0.0015, + "loss": 1.4362, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.589037299156189, + "learning_rate": 0.0015, + "loss": 1.4137, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.555253267288208, + "learning_rate": 0.0015, + "loss": 1.4226, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.6245922446250916, + "learning_rate": 0.0015, + "loss": 1.4256, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.5162799954414368, + "learning_rate": 0.0015, + "loss": 1.4335, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.7023717164993286, + "learning_rate": 0.0015, + "loss": 1.4298, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.5719383358955383, + "learning_rate": 0.0015, + "loss": 1.4356, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.5629717707633972, + "learning_rate": 0.0015, + "loss": 1.4403, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.6027096509933472, + "learning_rate": 0.0015, + "loss": 1.451, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.6054722666740417, + "learning_rate": 0.0015, + "loss": 1.4349, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.6634573340415955, + "learning_rate": 0.0015, + "loss": 1.4461, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.6132192611694336, + "learning_rate": 0.0015, + "loss": 1.4522, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.5512849688529968, + "learning_rate": 0.0015, + "loss": 1.4397, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5478540658950806, + "learning_rate": 0.0015, + "loss": 1.432, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.5170896053314209, + "learning_rate": 0.0015, + "loss": 1.4417, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.5410272479057312, + "learning_rate": 0.0015, + "loss": 1.4315, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.633662760257721, + "learning_rate": 0.0015, + "loss": 1.4304, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.5839591026306152, + "learning_rate": 0.0015, + "loss": 1.4293, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.5846387147903442, + "learning_rate": 0.0015, + "loss": 1.4241, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.7573683857917786, + "learning_rate": 0.0015, + "loss": 1.4321, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.5895115733146667, + "learning_rate": 0.0015, + "loss": 1.4382, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.5857595205307007, + "learning_rate": 0.0015, + "loss": 1.4265, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.6862102150917053, + "learning_rate": 0.0015, + "loss": 1.4328, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5491304397583008, + "learning_rate": 0.0015, + "loss": 1.4367, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5281641483306885, + "learning_rate": 0.0015, + "loss": 1.4197, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.7536759972572327, + "learning_rate": 0.0015, + "loss": 1.4402, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.7013608813285828, + "learning_rate": 0.0015, + "loss": 1.4479, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.5345012545585632, + "learning_rate": 0.0015, + "loss": 1.4335, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.8666704297065735, + "learning_rate": 0.0015, + "loss": 1.431, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.5606105327606201, + "learning_rate": 0.0015, + "loss": 1.4166, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.6502395272254944, + "learning_rate": 0.0015, + "loss": 1.437, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.5042735934257507, + "learning_rate": 0.0015, + "loss": 1.4191, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.6597897410392761, + "learning_rate": 0.0015, + "loss": 1.4384, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.7267991900444031, + "learning_rate": 0.0015, + "loss": 1.4313, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.49978339672088623, + "learning_rate": 0.0015, + "loss": 1.4287, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.6802087426185608, + "learning_rate": 0.0015, + "loss": 1.4296, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.5565413236618042, + "learning_rate": 0.0015, + "loss": 1.4216, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.5923188924789429, + "learning_rate": 0.0015, + "loss": 1.4347, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.5441958904266357, + "learning_rate": 0.0015, + "loss": 1.4289, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.5620650053024292, + "learning_rate": 0.0015, + "loss": 1.4276, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.6061014533042908, + "learning_rate": 0.0015, + "loss": 1.4265, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.5683525800704956, + "learning_rate": 0.0015, + "loss": 1.4364, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.5564813613891602, + "learning_rate": 0.0015, + "loss": 1.4328, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.5868654847145081, + "learning_rate": 0.0014854972418331944, + "loss": 1.411, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.5802990794181824, + "learning_rate": 0.0014650219182191931, + "loss": 1.4332, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.5038129091262817, + "learning_rate": 0.001444828815847542, + "loss": 1.4332, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.6030346155166626, + "learning_rate": 0.0014249140447269945, + "loss": 1.4255, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.5199887156486511, + "learning_rate": 0.0014052737684839257, + "loss": 1.4219, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.5322880744934082, + "learning_rate": 0.0013859042036232954, + "loss": 1.4278, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.5880237817764282, + "learning_rate": 0.001366801618799797, + "loss": 1.4132, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.571148157119751, + "learning_rate": 0.001347962334099052, + "loss": 1.4179, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.5341891050338745, + "learning_rate": 0.0013293827203287143, + "loss": 1.4219, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.5636604428291321, + "learning_rate": 0.0013110591983193423, + "loss": 1.4127, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.5311402082443237, + "learning_rate": 0.0012929882382349102, + "loss": 1.4199, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.6288583278656006, + "learning_rate": 0.0012751663588928214, + "loss": 1.4187, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5141438245773315, + "learning_rate": 0.0012575901270932943, + "loss": 1.4128, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.7027204036712646, + "learning_rate": 0.0012402561569579936, + "loss": 1.4055, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.5865928530693054, + "learning_rate": 0.0012231611092777745, + "loss": 1.4031, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5040086507797241, + "learning_rate": 0.0012063016908694193, + "loss": 1.4031, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.5940446853637695, + "learning_rate": 0.0011896746539412405, + "loss": 1.4091, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6378319263458252, + "learning_rate": 0.0011732767954674265, + "loss": 1.4006, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.5189829468727112, + "learning_rate": 0.0011571049565710122, + "loss": 1.4057, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.7055829763412476, + "learning_rate": 0.001141156021915355, + "loss": 1.402, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.6014175415039062, + "learning_rate": 0.001125426919103997, + "loss": 1.3816, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.4926266372203827, + "learning_rate": 0.001109914618088799, + "loss": 1.3875, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.8465086817741394, + "learning_rate": 0.0010946161305862348, + "loss": 1.4016, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.6302617788314819, + "learning_rate": 0.001079528509501728, + "loss": 1.3986, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.5034769773483276, + "learning_rate": 0.0010646488483619261, + "loss": 1.3933, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.5093826055526733, + "learning_rate": 0.0010499742807547976, + "loss": 1.381, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.5489924550056458, + "learning_rate": 0.0010355019797774478, + "loss": 1.3922, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.5430496335029602, + "learning_rate": 0.001021229157491546, + "loss": 1.3736, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.5950986742973328, + "learning_rate": 0.0010071530643862578, + "loss": 1.3788, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.49053946137428284, + "learning_rate": 0.000993270988848579, + "loss": 1.3898, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.5982795357704163, + "learning_rate": 0.0009795802566409742, + "loss": 1.3844, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.6431863307952881, + "learning_rate": 0.0009660782303862109, + "loss": 1.3909, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.49055856466293335, + "learning_rate": 0.0009527623090592963, + "loss": 1.3854, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.6076632738113403, + "learning_rate": 0.0009396299274864177, + "loss": 1.3974, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.4661622643470764, + "learning_rate": 0.0009266785558507877, + "loss": 1.3964, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.47550806403160095, + "learning_rate": 0.0009139056992053016, + "loss": 1.3775, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.516701877117157, + "learning_rate": 0.000901308896991912, + "loss": 1.3726, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.6510137319564819, + "learning_rate": 0.000888885722567627, + "loss": 1.3807, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.5474238395690918, + "learning_rate": 0.0008766337827370438, + "loss": 1.3753, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.6826621294021606, + "learning_rate": 0.000864550717291324, + "loss": 1.3759, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.4997193217277527, + "learning_rate": 0.0008526341985535229, + "loss": 1.3708, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.49890807271003723, + "learning_rate": 0.0008408819309301891, + "loss": 1.3674, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.5390087366104126, + "learning_rate": 0.0008292916504691397, + "loss": 1.3704, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.5142775177955627, + "learning_rate": 0.0008178611244233354, + "loss": 1.3783, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.4918661415576935, + "learning_rate": 0.0008065881508207637, + "loss": 1.3635, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.49260514974594116, + "learning_rate": 0.0007954705580402523, + "loss": 1.3686, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.5528978705406189, + "learning_rate": 0.0007845062043931298, + "loss": 1.365, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.5975780487060547, + "learning_rate": 0.0007736929777106497, + "loss": 1.3651, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.5800184011459351, + "learning_rate": 0.000763028794937105, + "loss": 1.3597, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.5489034652709961, + "learning_rate": 0.0007525116017285476, + "loss": 1.3609, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.546373724937439, + "learning_rate": 0.0007421393720570417, + "loss": 1.361, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.472165584564209, + "learning_rate": 0.0007319101078203694, + "loss": 1.3636, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.5557143688201904, + "learning_rate": 0.0007218218384571178, + "loss": 1.3552, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.5475256443023682, + "learning_rate": 0.0007118726205670703, + "loss": 1.357, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.47926536202430725, + "learning_rate": 0.0007020605375368316, + "loss": 1.3585, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.5325424075126648, + "learning_rate": 0.000692383699170611, + "loss": 1.3481, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.5231983065605164, + "learning_rate": 0.0006828402413260966, + "loss": 1.3566, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.4812193512916565, + "learning_rate": 0.0006734283255553471, + "loss": 1.3573, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.48826074600219727, + "learning_rate": 0.0006641461387506347, + "loss": 1.3578, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.6144198179244995, + "learning_rate": 0.0006549918927951678, + "loss": 1.3541, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.551744818687439, + "learning_rate": 0.0006459638242186297, + "loss": 1.3572, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5477951765060425, + "learning_rate": 0.0006370601938574639, + "loss": 1.3553, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.49401840567588806, + "learning_rate": 0.0006282792865198421, + "loss": 1.3583, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.45498543977737427, + "learning_rate": 0.0006196194106552512, + "loss": 1.3437, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.7727215886116028, + "learning_rate": 0.0006110788980286328, + "loss": 1.3486, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5224341750144958, + "learning_rate": 0.0006026561033990158, + "loss": 1.35, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6160230040550232, + "learning_rate": 0.000594349404202577, + "loss": 1.3468, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.7007388472557068, + "learning_rate": 0.0005861572002400716, + "loss": 1.336, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.4681401550769806, + "learning_rate": 0.0005780779133685717, + "loss": 1.3459, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.5841922760009766, + "learning_rate": 0.0005701099871974524, + "loss": 1.3404, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.5203360319137573, + "learning_rate": 0.0005622518867885708, + "loss": 1.3364, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5727945566177368, + "learning_rate": 0.0005545020983605748, + "loss": 1.3438, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.5776407718658447, + "learning_rate": 0.0005468591289972898, + "loss": 1.3333, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5081267952919006, + "learning_rate": 0.0005393215063601232, + "loss": 1.3352, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.49716392159461975, + "learning_rate": 0.0005318877784044343, + "loss": 1.3565, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.590747594833374, + "learning_rate": 0.0005245565130998126, + "loss": 1.3375, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.4804704487323761, + "learning_rate": 0.000517326298154212, + "loss": 1.3436, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.4979119598865509, + "learning_rate": 0.0005101957407418877, + "loss": 1.3459, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.4929143190383911, + "learning_rate": 0.0005031634672350829, + "loss": 1.3443, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.4825648367404938, + "learning_rate": 0.0004962281229394129, + "loss": 1.3326, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.5000534653663635, + "learning_rate": 0.0004893883718328983, + "loss": 1.3279, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.48189786076545715, + "learning_rate": 0.0004826428963085938, + "loss": 1.3317, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.5176799893379211, + "learning_rate": 0.00047599039692076457, + "loss": 1.3317, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.49340298771858215, + "learning_rate": 0.0004694295921345622, + "loss": 1.3432, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.47781771421432495, + "learning_rate": 0.00046295921807915015, + "loss": 1.3144, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.6368991136550903, + "learning_rate": 0.00045657802830423164, + "loss": 1.3287, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.48899218440055847, + "learning_rate": 0.00045028479353993473, + "loss": 1.3336, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.6996035575866699, + "learning_rate": 0.00044407830146000587, + "loss": 1.3365, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.5863538980484009, + "learning_rate": 0.0004379573564482676, + "loss": 1.3263, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.5679978728294373, + "learning_rate": 0.0004319207793682963, + "loss": 1.3303, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.5467365384101868, + "learning_rate": 0.0004259674073362731, + "loss": 1.3368, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.4840575158596039, + "learning_rate": 0.00042009609349696626, + "loss": 1.3318, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.49811118841171265, + "learning_rate": 0.00041430570680280233, + "loss": 1.3278, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.5084867477416992, + "learning_rate": 0.0004085951317959809, + "loss": 1.3256, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.541933536529541, + "learning_rate": 0.00040296326839359315, + "loss": 1.3352, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.6174397468566895, + "learning_rate": 0.000397409031675703, + "loss": 1.3273, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.4871473014354706, + "learning_rate": 0.00039193135167634786, + "loss": 1.3371, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.4805721342563629, + "learning_rate": 0.00038652917317742123, + "loss": 1.32, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.4865748882293701, + "learning_rate": 0.0003812014555053956, + "loss": 1.3166, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.48043888807296753, + "learning_rate": 0.00037594717233084774, + "loss": 1.3256, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.651296854019165, + "learning_rate": 0.0003707653114707471, + "loss": 1.3346, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.5951259732246399, + "learning_rate": 0.00036565487469346906, + "loss": 1.3115, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.48794758319854736, + "learning_rate": 0.0003606148775264958, + "loss": 1.3138, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.5261918306350708, + "learning_rate": 0.0003556443490667684, + "loss": 1.3192, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.5287507176399231, + "learning_rate": 0.0003507423317936521, + "loss": 1.3237, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.5026518702507019, + "learning_rate": 0.00034590788138448006, + "loss": 1.3365, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.5466956496238708, + "learning_rate": 0.0003411400665326393, + "loss": 1.3308, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.5188835263252258, + "learning_rate": 0.00033643796876816424, + "loss": 1.3272, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.47899413108825684, + "learning_rate": 0.000331800682280803, + "loss": 1.3216, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.4771794378757477, + "learning_rate": 0.0003272273137455226, + "loss": 1.3193, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.4901445209980011, + "learning_rate": 0.00032271698215041863, + "loss": 1.3172, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.5075931549072266, + "learning_rate": 0.0003182688186269984, + "loss": 1.3193, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.4759884774684906, + "learning_rate": 0.0003138819662828017, + "loss": 1.3291, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.5074654221534729, + "learning_rate": 0.00030955558003632966, + "loss": 1.3315, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.4769397974014282, + "learning_rate": 0.0003052888264542483, + "loss": 1.3141, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.5517778396606445, + "learning_rate": 0.0003010808835908368, + "loss": 1.3114, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.5297053456306458, + "learning_rate": 0.00029693094082964785, + "loss": 1.3157, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.5409188866615295, + "learning_rate": 0.0002928381987273508, + "loss": 1.3108, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.538703441619873, + "learning_rate": 0.0002888018688597272, + "loss": 1.3089, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.46452927589416504, + "learning_rate": 0.0002848211736697894, + "loss": 1.3155, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5383497476577759, + "learning_rate": 0.00028089534631799183, + "loss": 1.3171, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.4903510510921478, + "learning_rate": 0.0002770236305345076, + "loss": 1.3045, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.4975186586380005, + "learning_rate": 0.00027320528047354093, + "loss": 1.3134, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.504026472568512, + "learning_rate": 0.00026943956056964773, + "loss": 1.321, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.4713297188282013, + "learning_rate": 0.0002657257453960364, + "loss": 1.2991, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.5451270937919617, + "learning_rate": 0.0002620631195248222, + "loss": 1.3088, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.4575710594654083, + "learning_rate": 0.00025845097738920735, + "loss": 1.2934, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.46422040462493896, + "learning_rate": 0.0002548886231475606, + "loss": 1.2987, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.4658612012863159, + "learning_rate": 0.0002513753705493713, + "loss": 1.2999, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.4573189616203308, + "learning_rate": 0.0002479105428030497, + "loss": 1.3112, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.5342480540275574, + "learning_rate": 0.00024449347244555043, + "loss": 1.3018, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.5524665117263794, + "learning_rate": 0.00024112350121379254, + "loss": 1.3097, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.4582677483558655, + "learning_rate": 0.000237799979917852, + "loss": 1.3151, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.5325183272361755, + "learning_rate": 0.00023452226831590227, + "loss": 1.3108, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.5005542039871216, + "learning_rate": 0.00023128973499087779, + "loss": 1.3185, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.47845563292503357, + "learning_rate": 0.00022810175722883858, + "loss": 1.3009, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.4878840446472168, + "learning_rate": 0.0002249577208990106, + "loss": 1.3028, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.5707334876060486, + "learning_rate": 0.00022185702033547996, + "loss": 1.3012, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.5070146918296814, + "learning_rate": 0.00021879905822051756, + "loss": 1.2966, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.529530942440033, + "learning_rate": 0.00021578324546951222, + "loss": 1.2998, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.5156906843185425, + "learning_rate": 0.00021280900111748948, + "loss": 1.3119, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.4774062931537628, + "learning_rate": 0.00020987575220719483, + "loss": 1.3072, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.49267953634262085, + "learning_rate": 0.00020698293367871933, + "loss": 1.2976, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.4676821827888489, + "learning_rate": 0.00020412998826064692, + "loss": 1.3019, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.4811369478702545, + "learning_rate": 0.00020131636636270178, + "loss": 1.3145, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.4719107747077942, + "learning_rate": 0.00019854152596987523, + "loss": 1.2886, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.48394128680229187, + "learning_rate": 0.00019580493253801255, + "loss": 1.3082, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.5540241599082947, + "learning_rate": 0.00019310605889083838, + "loss": 1.3008, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.4780868887901306, + "learning_rate": 0.0001904443851184018, + "loss": 1.32, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.47522732615470886, + "learning_rate": 0.00018781939847692096, + "loss": 1.294, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.4818137586116791, + "learning_rate": 0.00018523059329000844, + "loss": 1.3017, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.5102220177650452, + "learning_rate": 0.0001826774708512579, + "loss": 1.3157, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.5076879262924194, + "learning_rate": 0.00018015953932817348, + "loss": 1.2892, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.49528968334198, + "learning_rate": 0.00017767631366742332, + "loss": 1.2969, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.5853897929191589, + "learning_rate": 0.00017522731550139922, + "loss": 1.2964, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.4707849323749542, + "learning_rate": 0.00017281207305606407, + "loss": 1.3001, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.49931663274765015, + "learning_rate": 0.00017043012106006926, + "loss": 1.3061, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.4601503014564514, + "learning_rate": 0.00016808100065512528, + "loss": 1.3069, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.518934428691864, + "learning_rate": 0.00016576425930760734, + "loss": 1.2923, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.5301703214645386, + "learning_rate": 0.00016347945072137934, + "loss": 1.2922, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.4881986677646637, + "learning_rate": 0.00016122613475181977, + "loss": 1.2989, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.6081076264381409, + "learning_rate": 0.00015900387732103232, + "loss": 1.2943, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.5877121090888977, + "learning_rate": 0.00015681225033422526, + "loss": 1.3067, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.45483383536338806, + "learning_rate": 0.00015465083159724345, + "loss": 1.2985, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.562230110168457, + "learning_rate": 0.0001525192047352371, + "loss": 1.297, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.4690268039703369, + "learning_rate": 0.00015041695911245136, + "loss": 1.3049, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.48680195212364197, + "learning_rate": 0.00014834368975312172, + "loss": 1.2954, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.46910569071769714, + "learning_rate": 0.00014629899726345958, + "loss": 1.2818, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.47541576623916626, + "learning_rate": 0.00014428248775471316, + "loss": 1.2909, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.46656283736228943, + "learning_rate": 0.000142293772767289, + "loss": 1.2787, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5693825483322144, + "learning_rate": 0.00014033246919591922, + "loss": 1.2894, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.49619945883750916, + "learning_rate": 0.00013839819921586025, + "loss": 1.3101, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.4710785448551178, + "learning_rate": 0.00013649059021010894, + "loss": 1.3072, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.464539498090744, + "learning_rate": 0.00013460927469762155, + "loss": 1.285, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.5771812200546265, + "learning_rate": 0.00013275389026252255, + "loss": 1.2952, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.5432382225990295, + "learning_rate": 0.0001309240794842889, + "loss": 1.2942, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.4678005278110504, + "learning_rate": 0.00012911948986889664, + "loss": 1.3045, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.45135894417762756, + "learning_rate": 0.00012733977378091664, + "loss": 1.2992, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.49784210324287415, + "learning_rate": 0.00012558458837654633, + "loss": 1.3015, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.4665963053703308, + "learning_rate": 0.00012385359553756422, + "loss": 1.2853, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.47964969277381897, + "learning_rate": 0.0001221464618061951, + "loss": 1.2861, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.4370293915271759, + "learning_rate": 0.0001204628583208727, + "loss": 1.2861, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.5059381723403931, + "learning_rate": 0.00011880246075288824, + "loss": 1.2978, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.522208034992218, + "learning_rate": 0.00011716494924391148, + "loss": 1.2916, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.5110442042350769, + "learning_rate": 0.00011555000834437363, + "loss": 1.2972, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.5108746886253357, + "learning_rate": 0.00011395732695269907, + "loss": 1.2922, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.4836578369140625, + "learning_rate": 0.00011238659825537507, + "loss": 1.2904, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.48861879110336304, + "learning_rate": 0.00011083751966784716, + "loss": 1.3011, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.46914857625961304, + "learning_rate": 0.00010930979277622952, + "loss": 1.2827, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.48656436800956726, + "learning_rate": 0.00010780312327981853, + "loss": 1.2949, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.4590684473514557, + "learning_rate": 0.0001063172209343989, + "loss": 1.296, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.463483065366745, + "learning_rate": 0.000104851799496331, + "loss": 1.2982, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.4805615246295929, + "learning_rate": 0.00010340657666740917, + "loss": 1.2848, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.4917280972003937, + "learning_rate": 0.00010198127404047976, + "loss": 1.2802, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.5165645480155945, + "learning_rate": 0.00010057561704580898, + "loss": 1.2968, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.48266443610191345, + "learning_rate": 9.918933489818986e-05, + "loss": 1.2887, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.5153070688247681, + "learning_rate": 9.782216054477828e-05, + "loss": 1.2883, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.46915605664253235, + "learning_rate": 9.647383061364803e-05, + "loss": 1.2857, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.46430787444114685, + "learning_rate": 9.514408536305497e-05, + "loss": 1.2838, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.45965510606765747, + "learning_rate": 9.383266863140043e-05, + "loss": 1.295, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.4957108199596405, + "learning_rate": 9.25393277878844e-05, + "loss": 1.2855, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.5112273097038269, + "learning_rate": 9.126381368383881e-05, + "loss": 1.2931, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.4927181899547577, + "learning_rate": 9.000588060473158e-05, + "loss": 1.2866, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.46800509095191956, + "learning_rate": 8.876528622283232e-05, + "loss": 1.2973, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.4920525848865509, + "learning_rate": 8.754179155053052e-05, + "loss": 1.2897, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.4669337868690491, + "learning_rate": 8.63351608942968e-05, + "loss": 1.2974, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.4857999086380005, + "learning_rate": 8.514516180927926e-05, + "loss": 1.2947, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.47452864050865173, + "learning_rate": 8.397156505452524e-05, + "loss": 1.2901, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.4561096131801605, + "learning_rate": 8.28141445488205e-05, + "loss": 1.3045, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.45552825927734375, + "learning_rate": 8.167267732713705e-05, + "loss": 1.2944, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.48459377884864807, + "learning_rate": 8.054694349768114e-05, + "loss": 1.2902, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.4631972908973694, + "learning_rate": 7.943672619953359e-05, + "loss": 1.2746, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.48390087485313416, + "learning_rate": 7.834181156087357e-05, + "loss": 1.276, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.48202580213546753, + "learning_rate": 7.726198865777852e-05, + "loss": 1.2998, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.47279173135757446, + "learning_rate": 7.61970494735919e-05, + "loss": 1.2784, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4512956440448761, + "learning_rate": 7.514678885885086e-05, + "loss": 1.2988, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.47342824935913086, + "learning_rate": 7.411100449176634e-05, + "loss": 1.2899, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.4541882276535034, + "learning_rate": 7.308949683924792e-05, + "loss": 1.2777, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.4587934911251068, + "learning_rate": 7.208206911846581e-05, + "loss": 1.2806, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.4704788625240326, + "learning_rate": 7.10885272589427e-05, + "loss": 1.2791, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.43899935483932495, + "learning_rate": 7.010867986516811e-05, + "loss": 1.2871, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.44122639298439026, + "learning_rate": 6.914233817972799e-05, + "loss": 1.295, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.4458458423614502, + "learning_rate": 6.818931604694264e-05, + "loss": 1.2837, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.474127858877182, + "learning_rate": 6.724942987700563e-05, + "loss": 1.2832, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.48170867562294006, + "learning_rate": 6.632249861061733e-05, + "loss": 1.303, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.453289270401001, + "learning_rate": 6.540834368410549e-05, + "loss": 1.2888, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.48818162083625793, + "learning_rate": 6.4506788995027e-05, + "loss": 1.2853, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.4627540111541748, + "learning_rate": 6.361766086824344e-05, + "loss": 1.2797, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.4897553324699402, + "learning_rate": 6.274078802246449e-05, + "loss": 1.2896, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.5023053288459778, + "learning_rate": 6.187600153725223e-05, + "loss": 1.2838, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.4631926715373993, + "learning_rate": 6.1023134820480546e-05, + "loss": 1.2868, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.4571332633495331, + "learning_rate": 6.0182023576242725e-05, + "loss": 1.2797, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.4773905575275421, + "learning_rate": 5.9352505773201664e-05, + "loss": 1.2876, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.4614555537700653, + "learning_rate": 5.8534421613376175e-05, + "loss": 1.2776, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.48313337564468384, + "learning_rate": 5.772761350135759e-05, + "loss": 1.2866, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.4554440975189209, + "learning_rate": 5.6931926013950586e-05, + "loss": 1.2839, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.456855833530426, + "learning_rate": 5.61472058702326e-05, + "loss": 1.2986, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.45017388463020325, + "learning_rate": 5.53733019020258e-05, + "loss": 1.2908, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.4595263600349426, + "learning_rate": 5.4610065024776125e-05, + "loss": 1.2845, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.49134576320648193, + "learning_rate": 5.38573482088337e-05, + "loss": 1.2754, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.5356613397598267, + "learning_rate": 5.3115006451129075e-05, + "loss": 1.2955, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.4758250415325165, + "learning_rate": 5.2382896747239935e-05, + "loss": 1.2877, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.5160989761352539, + "learning_rate": 5.166087806384275e-05, + "loss": 1.2898, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.4644686281681061, + "learning_rate": 5.0948811311544186e-05, + "loss": 1.2823, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.49802398681640625, + "learning_rate": 5.024655931808697e-05, + "loss": 1.2852, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.47131872177124023, + "learning_rate": 4.955398680192509e-05, + "loss": 1.2807, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.46453210711479187, + "learning_rate": 4.887096034616319e-05, + "loss": 1.2911, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.5512585639953613, + "learning_rate": 4.819734837285529e-05, + "loss": 1.2717, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.46803995966911316, + "learning_rate": 4.7533021117657475e-05, + "loss": 1.2775, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5185009241104126, + "learning_rate": 4.687785060483031e-05, + "loss": 1.2789, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.4847567081451416, + "learning_rate": 4.623171062258557e-05, + "loss": 1.2859, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.44923222064971924, + "learning_rate": 4.559447669877288e-05, + "loss": 1.2936, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.4748402535915375, + "learning_rate": 4.496602607690141e-05, + "loss": 1.2865, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.4809083342552185, + "learning_rate": 4.434623769249217e-05, + "loss": 1.2847, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.4840593934059143, + "learning_rate": 4.373499214975615e-05, + "loss": 1.2837, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.46942973136901855, + "learning_rate": 4.313217169859397e-05, + "loss": 1.2782, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.46955642104148865, + "learning_rate": 4.253766021191256e-05, + "loss": 1.2901, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.5096837282180786, + "learning_rate": 4.19513431632545e-05, + "loss": 1.2797, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.47795701026916504, + "learning_rate": 4.1373107604735626e-05, + "loss": 1.2779, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.536361813545227, + "learning_rate": 4.0802842145286876e-05, + "loss": 1.2652, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4470536708831787, + "learning_rate": 4.024043692919589e-05, + "loss": 1.2773, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.4594772160053253, + "learning_rate": 3.968578361494449e-05, + "loss": 1.2848, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.4696202874183655, + "learning_rate": 3.91387753543378e-05, + "loss": 1.2777, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.4612068235874176, + "learning_rate": 3.859930677192103e-05, + "loss": 1.2739, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.44385606050491333, + "learning_rate": 3.806727394468005e-05, + "loss": 1.2954, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.4509481191635132, + "learning_rate": 3.7542574382021635e-05, + "loss": 1.2819, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.4618759751319885, + "learning_rate": 3.702510700602975e-05, + "loss": 1.298, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.4771023690700531, + "learning_rate": 3.651477213199394e-05, + "loss": 1.2818, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.5133115649223328, + "learning_rate": 3.601147144920609e-05, + "loss": 1.2819, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5078469514846802, + "learning_rate": 3.5515108002021946e-05, + "loss": 1.2812, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.49372270703315735, + "learning_rate": 3.502558617118352e-05, + "loss": 1.2853, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.4601893126964569, + "learning_rate": 3.454281165539913e-05, + "loss": 1.2868, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.48302769660949707, + "learning_rate": 3.406669145317717e-05, + "loss": 1.2866, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.47342342138290405, + "learning_rate": 3.359713384491036e-05, + "loss": 1.2768, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.5074547529220581, + "learning_rate": 3.313404837520694e-05, + "loss": 1.2764, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.4459538161754608, + "learning_rate": 3.267734583546536e-05, + "loss": 1.2833, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.45098063349723816, + "learning_rate": 3.222693824668916e-05, + "loss": 1.2783, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.4661492705345154, + "learning_rate": 3.178273884253874e-05, + "loss": 1.2797, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.4472538232803345, + "learning_rate": 3.134466205261674e-05, + "loss": 1.2823, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.4738203287124634, + "learning_rate": 3.0912623485983774e-05, + "loss": 1.2857, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.49671709537506104, + "learning_rate": 3.048653991490141e-05, + "loss": 1.2825, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5088520646095276, + "learning_rate": 3.0066329258799184e-05, + "loss": 1.28, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.4633517861366272, + "learning_rate": 2.965191056846266e-05, + "loss": 1.2847, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.4449499845504761, + "learning_rate": 2.9243204010439396e-05, + "loss": 1.2788, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.4768332839012146, + "learning_rate": 2.8840130851659852e-05, + "loss": 1.2753, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.4613446593284607, + "learning_rate": 2.844261344427029e-05, + "loss": 1.2779, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.48574671149253845, + "learning_rate": 2.805057521067472e-05, + "loss": 1.2969, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.46719831228256226, + "learning_rate": 2.766394062878302e-05, + "loss": 1.2725, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.45361828804016113, + "learning_rate": 2.7282635217462405e-05, + "loss": 1.2845, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.445158451795578, + "learning_rate": 2.6906585522189378e-05, + "loss": 1.2835, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.45497003197669983, + "learning_rate": 2.653571910089951e-05, + "loss": 1.2843, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.4976121783256531, + "learning_rate": 2.6169964510032243e-05, + "loss": 1.2842, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.476207435131073, + "learning_rate": 2.580925129076798e-05, + "loss": 1.2854, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.4796762764453888, + "learning_rate": 2.5453509955454954e-05, + "loss": 1.2659, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.45383167266845703, + "learning_rate": 2.510267197422317e-05, + "loss": 1.2769, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.47720637917518616, + "learning_rate": 2.4756669761782806e-05, + "loss": 1.288, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.4557831287384033, + "learning_rate": 2.4415436664404643e-05, + "loss": 1.2724, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.5000823140144348, + "learning_rate": 2.4078906947079882e-05, + "loss": 1.2913, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.4777922034263611, + "learning_rate": 2.3747015780857007e-05, + "loss": 1.28, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.44898125529289246, + "learning_rate": 2.3419699230353144e-05, + "loss": 1.2861, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.4562678635120392, + "learning_rate": 2.3096894241437583e-05, + "loss": 1.2838, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.4760551452636719, + "learning_rate": 2.2778538629085057e-05, + "loss": 1.2784, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.45998191833496094, + "learning_rate": 2.2464571065396428e-05, + "loss": 1.276, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.4722311794757843, + "learning_rate": 2.2154931067784525e-05, + "loss": 1.2804, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.46130430698394775, + "learning_rate": 2.1849558987322783e-05, + "loss": 1.2802, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.47175753116607666, + "learning_rate": 2.1548395997254516e-05, + "loss": 1.277, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.4584023058414459, + "learning_rate": 2.1251384081660546e-05, + "loss": 1.2692, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.4726228713989258, + "learning_rate": 2.0958466024283035e-05, + "loss": 1.2642, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.4451380968093872, + "learning_rate": 2.0669585397503362e-05, + "loss": 1.2749, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.4665733575820923, + "learning_rate": 2.0384686551471954e-05, + "loss": 1.2716, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.44663551449775696, + "learning_rate": 2.0103714603387898e-05, + "loss": 1.2839, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.44450730085372925, + "learning_rate": 1.9826615426926342e-05, + "loss": 1.2724, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.45374828577041626, + "learning_rate": 1.9553335641811623e-05, + "loss": 1.2792, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.4566164016723633, + "learning_rate": 1.9283822603534143e-05, + "loss": 1.2772, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.4880983233451843, + "learning_rate": 1.90180243932089e-05, + "loss": 1.277, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.4619382619857788, + "learning_rate": 1.8755889807573868e-05, + "loss": 1.2764, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.43980950117111206, + "learning_rate": 1.8497368349126255e-05, + "loss": 1.2869, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.4788510203361511, + "learning_rate": 1.824241021639465e-05, + "loss": 1.283, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.4611131548881531, + "learning_rate": 1.799096629434529e-05, + "loss": 1.295, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.4426988363265991, + "learning_rate": 1.7742988144920578e-05, + "loss": 1.2865, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.4616834819316864, + "learning_rate": 1.7498427997707978e-05, + "loss": 1.2855, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.4642038643360138, + "learning_rate": 1.7257238740737548e-05, + "loss": 1.2697, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.48394331336021423, + "learning_rate": 1.7019373911406307e-05, + "loss": 1.2731, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.4615224301815033, + "learning_rate": 1.67847876875277e-05, + "loss": 1.2887, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.5010892152786255, + "learning_rate": 1.655343487850443e-05, + "loss": 1.2635, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.4526462256908417, + "learning_rate": 1.6325270916622947e-05, + "loss": 1.2817, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.44873127341270447, + "learning_rate": 1.610025184846797e-05, + "loss": 1.2822, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.4450811743736267, + "learning_rate": 1.587833432645528e-05, + "loss": 1.278, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.4788590967655182, + "learning_rate": 1.5659475600481297e-05, + "loss": 1.288, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.4514161944389343, + "learning_rate": 1.544363350968769e-05, + "loss": 1.2714, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.44287604093551636, + "learning_rate": 1.523076647433954e-05, + "loss": 1.2835, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.3349804878234863, + "learning_rate": 1.5020833487815421e-05, + "loss": 1.274, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.832308198648013e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-llama/checkpoint-9480/training_args.bin b/saves-llama/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e1434d5699de9c201210a001ec773b06f6a8231 --- /dev/null +++ b/saves-llama/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:232d4a1f1f4364dcee0cec3bf2c9e6d360b9c10ff0d6701806434d99e5c39d28 +size 5112 diff --git a/saves-llama/config.json b/saves-llama/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d241c1935481613c5259df93a97d2cacb314defb --- /dev/null +++ b/saves-llama/config.json @@ -0,0 +1,28 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-llama/generation_config.json b/saves-llama/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-llama/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-llama/model.safetensors b/saves-llama/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ce218d499446a2f18b5c96f827124e4f6a26868 --- /dev/null +++ b/saves-llama/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23d8cb4fb2fa20d2a09ab95512bb7b9a4bc4ade52e7db7d83a476f8391aa6034 +size 8346712 diff --git a/saves-llama/result.log b/saves-llama/result.log new file mode 100644 index 0000000000000000000000000000000000000000..c2c03475aa654c0e8e705e767fa347f2c77cde7e --- /dev/null +++ b/saves-llama/result.log @@ -0,0 +1 @@ +{'train_runtime': 2179.4308, 'train_samples_per_second': 4453.733, 'train_steps_per_second': 4.35, 'train_loss': 1.5734189760835864, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-llama/special_tokens_map.json b/saves-llama/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-llama/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-llama/tokenizer.json b/saves-llama/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-llama/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-llama/tokenizer_config.json b/saves-llama/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-llama/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-mistral-bf16/checkpoint-9480/config.json b/saves-mistral-bf16/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..30ccb5f053665b9e670549b808a9653faa5efa34 --- /dev/null +++ b/saves-mistral-bf16/checkpoint-9480/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 131072, + "model_type": "mistral", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.0", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-mistral-bf16/checkpoint-9480/generation_config.json b/saves-mistral-bf16/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..59c0f3c6815a220b6b4e852c51be873503df2ce0 --- /dev/null +++ b/saves-mistral-bf16/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.0" +} diff --git a/saves-mistral-bf16/checkpoint-9480/model.safetensors b/saves-mistral-bf16/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3eeb6ef252acf50b39b3ed75976b84912b92be7b --- /dev/null +++ b/saves-mistral-bf16/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4939ffe6e400c9ae21c94dc5efb24d9e4fd3a81f8b30076bbd50a07906bad5e0 +size 8346712 diff --git a/saves-mistral-bf16/checkpoint-9480/optimizer.pt b/saves-mistral-bf16/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..afae58ab798d5b0a4ceae79cd749317836707970 --- /dev/null +++ b/saves-mistral-bf16/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d12d71b9bebd85baf010d607a7d338807c9323b1392271da8842ed8856c974 +size 16706402 diff --git a/saves-mistral-bf16/checkpoint-9480/rng_state.pth b/saves-mistral-bf16/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-mistral-bf16/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-mistral-bf16/checkpoint-9480/scheduler.pt b/saves-mistral-bf16/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-mistral-bf16/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-mistral-bf16/checkpoint-9480/special_tokens_map.json b/saves-mistral-bf16/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-mistral-bf16/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-mistral-bf16/checkpoint-9480/tokenizer.json b/saves-mistral-bf16/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-mistral-bf16/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-mistral-bf16/checkpoint-9480/tokenizer_config.json b/saves-mistral-bf16/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-mistral-bf16/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-mistral-bf16/checkpoint-9480/trainer_state.json b/saves-mistral-bf16/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..916a00edace28070103b2214e7b2b57632df2268 --- /dev/null +++ b/saves-mistral-bf16/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2911568880081177, + "learning_rate": 0.00015822784810126583, + "loss": 7.5122, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1530958414077759, + "learning_rate": 0.00031645569620253165, + "loss": 6.906, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8528544306755066, + "learning_rate": 0.00047468354430379745, + "loss": 6.2587, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.9695652723312378, + "learning_rate": 0.0006329113924050633, + "loss": 5.7729, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.8636988401412964, + "learning_rate": 0.0007911392405063291, + "loss": 5.3159, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.9786996841430664, + "learning_rate": 0.0009493670886075949, + "loss": 4.8059, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.5117980241775513, + "learning_rate": 0.0011075949367088608, + "loss": 4.3993, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.8246951699256897, + "learning_rate": 0.0012658227848101266, + "loss": 4.1436, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.2078777551651, + "learning_rate": 0.0014240506329113926, + "loss": 3.9317, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.9894590377807617, + "learning_rate": 0.0015, + "loss": 3.7802, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.8762866854667664, + "learning_rate": 0.0015, + "loss": 3.622, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.9929517507553101, + "learning_rate": 0.0015, + "loss": 3.5117, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.7622191905975342, + "learning_rate": 0.0015, + "loss": 3.4092, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.7740503549575806, + "learning_rate": 0.0015, + "loss": 3.3058, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.8201466202735901, + "learning_rate": 0.0015, + "loss": 3.2344, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.829488217830658, + "learning_rate": 0.0015, + "loss": 3.175, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 1.1429024934768677, + "learning_rate": 0.0015, + "loss": 3.0982, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.7297139167785645, + "learning_rate": 0.0015, + "loss": 3.0552, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.8383708000183105, + "learning_rate": 0.0015, + "loss": 2.9936, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.7045958638191223, + "learning_rate": 0.0015, + "loss": 2.9414, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.9524938464164734, + "learning_rate": 0.0015, + "loss": 2.9054, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.8136796951293945, + "learning_rate": 0.0015, + "loss": 2.8717, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.7210149765014648, + "learning_rate": 0.0015, + "loss": 2.8125, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 1.000649333000183, + "learning_rate": 0.0015, + "loss": 2.7796, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.9027130007743835, + "learning_rate": 0.0015, + "loss": 2.7438, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.8297377824783325, + "learning_rate": 0.0015, + "loss": 2.7113, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.9188632965087891, + "learning_rate": 0.0015, + "loss": 2.6782, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.7633031606674194, + "learning_rate": 0.0015, + "loss": 2.6398, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.9104907512664795, + "learning_rate": 0.0015, + "loss": 2.6154, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.8751909732818604, + "learning_rate": 0.0015, + "loss": 2.5923, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.8580324053764343, + "learning_rate": 0.0015, + "loss": 2.5698, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.9248947501182556, + "learning_rate": 0.0015, + "loss": 2.5412, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.7492154836654663, + "learning_rate": 0.0015, + "loss": 2.5276, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.8280085325241089, + "learning_rate": 0.0015, + "loss": 2.5025, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 1.2397363185882568, + "learning_rate": 0.0015, + "loss": 2.4612, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.8337180614471436, + "learning_rate": 0.0015, + "loss": 2.4443, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.8829106092453003, + "learning_rate": 0.0015, + "loss": 2.4333, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.7179862856864929, + "learning_rate": 0.0015, + "loss": 2.4126, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.0350315570831299, + "learning_rate": 0.0015, + "loss": 2.3976, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 1.0989201068878174, + "learning_rate": 0.0015, + "loss": 2.3746, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8930501937866211, + "learning_rate": 0.0015, + "loss": 2.3621, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.8837328553199768, + "learning_rate": 0.0015, + "loss": 2.3439, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.7845432162284851, + "learning_rate": 0.0015, + "loss": 2.3147, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.9344269037246704, + "learning_rate": 0.0015, + "loss": 2.3019, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.8818278908729553, + "learning_rate": 0.0015, + "loss": 2.3035, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.6915808916091919, + "learning_rate": 0.0015, + "loss": 2.2788, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.7943204045295715, + "learning_rate": 0.0015, + "loss": 2.2654, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.4367399215698242, + "learning_rate": 0.0015, + "loss": 2.2561, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.7440197467803955, + "learning_rate": 0.0015, + "loss": 2.2299, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 1.1008273363113403, + "learning_rate": 0.0015, + "loss": 2.2256, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 1.3533852100372314, + "learning_rate": 0.0015, + "loss": 2.2155, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.7687584161758423, + "learning_rate": 0.0015, + "loss": 2.1925, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.7902509570121765, + "learning_rate": 0.0015, + "loss": 2.1772, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 1.0160045623779297, + "learning_rate": 0.0015, + "loss": 2.1684, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.7253097295761108, + "learning_rate": 0.0015, + "loss": 2.1657, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.730516791343689, + "learning_rate": 0.0015, + "loss": 2.1272, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.8411962389945984, + "learning_rate": 0.0015, + "loss": 2.1316, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.9713028073310852, + "learning_rate": 0.0015, + "loss": 2.1361, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 1.1778079271316528, + "learning_rate": 0.0015, + "loss": 2.1091, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.6379977464675903, + "learning_rate": 0.0015, + "loss": 2.0929, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.8067770004272461, + "learning_rate": 0.0015, + "loss": 2.0945, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.8583511114120483, + "learning_rate": 0.0015, + "loss": 2.082, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 1.134140133857727, + "learning_rate": 0.0015, + "loss": 2.0674, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.8797304630279541, + "learning_rate": 0.0015, + "loss": 2.0776, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 1.2700276374816895, + "learning_rate": 0.0015, + "loss": 2.0655, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.8297256231307983, + "learning_rate": 0.0015, + "loss": 2.0615, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.8372782468795776, + "learning_rate": 0.0015, + "loss": 2.0386, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.7951764464378357, + "learning_rate": 0.0015, + "loss": 2.032, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.766379177570343, + "learning_rate": 0.0015, + "loss": 2.0374, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.7371763586997986, + "learning_rate": 0.0015, + "loss": 2.0164, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 1.0791587829589844, + "learning_rate": 0.0015, + "loss": 2.0091, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.7908256649971008, + "learning_rate": 0.0015, + "loss": 1.9936, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.8527359962463379, + "learning_rate": 0.0015, + "loss": 1.9844, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.8309198021888733, + "learning_rate": 0.0015, + "loss": 1.9956, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 1.3767690658569336, + "learning_rate": 0.0015, + "loss": 1.978, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.713165819644928, + "learning_rate": 0.0015, + "loss": 1.9813, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.8718229532241821, + "learning_rate": 0.0015, + "loss": 1.9639, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.8156098127365112, + "learning_rate": 0.0015, + "loss": 1.9645, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.9464802742004395, + "learning_rate": 0.0015, + "loss": 1.9661, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.7087975740432739, + "learning_rate": 0.0015, + "loss": 1.9405, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.6990103125572205, + "learning_rate": 0.0015, + "loss": 1.931, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 1.6295113563537598, + "learning_rate": 0.0015, + "loss": 1.9414, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.7534579038619995, + "learning_rate": 0.0015, + "loss": 1.9424, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.9456313252449036, + "learning_rate": 0.0015, + "loss": 1.9182, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.770246148109436, + "learning_rate": 0.0015, + "loss": 1.9216, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.8989337682723999, + "learning_rate": 0.0015, + "loss": 1.9174, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.8580651879310608, + "learning_rate": 0.0015, + "loss": 1.9165, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.8543643951416016, + "learning_rate": 0.0015, + "loss": 1.9084, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 1.1457914113998413, + "learning_rate": 0.0015, + "loss": 1.9017, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.7464300394058228, + "learning_rate": 0.0015, + "loss": 1.8976, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.8035700917243958, + "learning_rate": 0.0015, + "loss": 1.9015, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.7010158896446228, + "learning_rate": 0.0015, + "loss": 1.8904, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.8178808093070984, + "learning_rate": 0.0015, + "loss": 1.8712, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 1.0780892372131348, + "learning_rate": 0.0015, + "loss": 1.8812, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 1.793686032295227, + "learning_rate": 0.0015, + "loss": 1.8672, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.1407757997512817, + "learning_rate": 0.0015, + "loss": 1.8798, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 1.027924656867981, + "learning_rate": 0.0015, + "loss": 1.8681, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.9612833261489868, + "learning_rate": 0.0015, + "loss": 1.8474, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.7732949256896973, + "learning_rate": 0.0015, + "loss": 1.8529, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.9289379119873047, + "learning_rate": 0.0015, + "loss": 1.8605, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.7720140814781189, + "learning_rate": 0.0015, + "loss": 1.8526, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.910430908203125, + "learning_rate": 0.0015, + "loss": 1.8438, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.7728487849235535, + "learning_rate": 0.0015, + "loss": 1.8445, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.8599476218223572, + "learning_rate": 0.0015, + "loss": 1.8354, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.2211686372756958, + "learning_rate": 0.0015, + "loss": 1.839, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.7726693749427795, + "learning_rate": 0.0015, + "loss": 1.8263, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.8299482464790344, + "learning_rate": 0.0015, + "loss": 1.8151, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.6944489479064941, + "learning_rate": 0.0015, + "loss": 1.8128, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.671230673789978, + "learning_rate": 0.0015, + "loss": 1.8235, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.7142427563667297, + "learning_rate": 0.0015, + "loss": 1.8081, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.6354637145996094, + "learning_rate": 0.0015, + "loss": 1.8143, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.7774524092674255, + "learning_rate": 0.0015, + "loss": 1.8084, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.088733196258545, + "learning_rate": 0.0015, + "loss": 1.7934, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.82485032081604, + "learning_rate": 0.0015, + "loss": 1.8074, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.128315806388855, + "learning_rate": 0.0015, + "loss": 1.7956, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.7950155138969421, + "learning_rate": 0.0015, + "loss": 1.8044, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.7386237382888794, + "learning_rate": 0.0015, + "loss": 1.7874, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.7935805916786194, + "learning_rate": 0.0015, + "loss": 1.7747, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.7353824973106384, + "learning_rate": 0.0015, + "loss": 1.7793, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.8419713973999023, + "learning_rate": 0.0015, + "loss": 1.7993, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.7695147395133972, + "learning_rate": 0.0015, + "loss": 1.7884, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6916271448135376, + "learning_rate": 0.0015, + "loss": 1.7601, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.6987491846084595, + "learning_rate": 0.0015, + "loss": 1.7529, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.7663790583610535, + "learning_rate": 0.0015, + "loss": 1.7754, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.7070717811584473, + "learning_rate": 0.0015, + "loss": 1.7734, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.7695779800415039, + "learning_rate": 0.0015, + "loss": 1.7583, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.8380961418151855, + "learning_rate": 0.0015, + "loss": 1.7568, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.741341233253479, + "learning_rate": 0.0015, + "loss": 1.7734, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.8968631625175476, + "learning_rate": 0.0015, + "loss": 1.7575, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.6770614385604858, + "learning_rate": 0.0015, + "loss": 1.7471, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.7884116172790527, + "learning_rate": 0.0015, + "loss": 1.7506, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.8187959790229797, + "learning_rate": 0.0015, + "loss": 1.7519, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.047227382659912, + "learning_rate": 0.0015, + "loss": 1.7434, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.729209303855896, + "learning_rate": 0.0015, + "loss": 1.7457, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.6964226961135864, + "learning_rate": 0.0015, + "loss": 1.749, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.6814247965812683, + "learning_rate": 0.0015, + "loss": 1.7365, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.7576081156730652, + "learning_rate": 0.0015, + "loss": 1.7334, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.7002779841423035, + "learning_rate": 0.0015, + "loss": 1.7256, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.832123875617981, + "learning_rate": 0.0015, + "loss": 1.7396, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.670310378074646, + "learning_rate": 0.0015, + "loss": 1.7194, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.7912747263908386, + "learning_rate": 0.0015, + "loss": 1.724, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.752036988735199, + "learning_rate": 0.0015, + "loss": 1.7272, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.8257012963294983, + "learning_rate": 0.0015, + "loss": 1.7195, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.6395012140274048, + "learning_rate": 0.0015, + "loss": 1.7203, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 1.1045129299163818, + "learning_rate": 0.0015, + "loss": 1.7222, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.6789548397064209, + "learning_rate": 0.0015, + "loss": 1.7086, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.8164582848548889, + "learning_rate": 0.0015, + "loss": 1.7098, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.6820360422134399, + "learning_rate": 0.0015, + "loss": 1.713, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.6760832071304321, + "learning_rate": 0.0015, + "loss": 1.7102, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.7806017398834229, + "learning_rate": 0.0015, + "loss": 1.7094, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.8451957106590271, + "learning_rate": 0.0015, + "loss": 1.7126, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.7115667462348938, + "learning_rate": 0.0015, + "loss": 1.7092, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.6442527174949646, + "learning_rate": 0.0015, + "loss": 1.6909, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6934787034988403, + "learning_rate": 0.0015, + "loss": 1.6835, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.8956286311149597, + "learning_rate": 0.0015, + "loss": 1.6842, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.6710405349731445, + "learning_rate": 0.0015, + "loss": 1.6887, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.6764975786209106, + "learning_rate": 0.0015, + "loss": 1.6835, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.6724684834480286, + "learning_rate": 0.0015, + "loss": 1.6908, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.859164834022522, + "learning_rate": 0.0015, + "loss": 1.6862, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.7298034429550171, + "learning_rate": 0.0015, + "loss": 1.6919, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.6594526171684265, + "learning_rate": 0.0015, + "loss": 1.6892, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6169994473457336, + "learning_rate": 0.0015, + "loss": 1.6609, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.7296561002731323, + "learning_rate": 0.0015, + "loss": 1.6726, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.6978626847267151, + "learning_rate": 0.0015, + "loss": 1.6756, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.0857014656066895, + "learning_rate": 0.0015, + "loss": 1.6678, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.6385810375213623, + "learning_rate": 0.0015, + "loss": 1.6669, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.9220226407051086, + "learning_rate": 0.0015, + "loss": 1.6769, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.4121848344802856, + "learning_rate": 0.0015, + "loss": 1.6633, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.7881584167480469, + "learning_rate": 0.0015, + "loss": 1.6546, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.6409121155738831, + "learning_rate": 0.0015, + "loss": 1.6718, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.8101775050163269, + "learning_rate": 0.0015, + "loss": 1.6544, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.6343249678611755, + "learning_rate": 0.0015, + "loss": 1.651, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.7380338907241821, + "learning_rate": 0.0015, + "loss": 1.6558, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.7978950142860413, + "learning_rate": 0.0015, + "loss": 1.6561, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.9163487553596497, + "learning_rate": 0.0015, + "loss": 1.6779, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.6927811503410339, + "learning_rate": 0.0015, + "loss": 1.6588, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.6966419816017151, + "learning_rate": 0.0015, + "loss": 1.6612, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.6320984959602356, + "learning_rate": 0.0015, + "loss": 1.657, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.6386429667472839, + "learning_rate": 0.0015, + "loss": 1.6593, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.6478818655014038, + "learning_rate": 0.0015, + "loss": 1.6558, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.6771355271339417, + "learning_rate": 0.0015, + "loss": 1.6437, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.7388620376586914, + "learning_rate": 0.0015, + "loss": 1.6406, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.7418689131736755, + "learning_rate": 0.0015, + "loss": 1.6384, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.6168630123138428, + "learning_rate": 0.0015, + "loss": 1.6368, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.7633735537528992, + "learning_rate": 0.0015, + "loss": 1.6434, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.6480771899223328, + "learning_rate": 0.0015, + "loss": 1.6476, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.6744736433029175, + "learning_rate": 0.0015, + "loss": 1.6479, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.625220000743866, + "learning_rate": 0.0015, + "loss": 1.6316, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.636688768863678, + "learning_rate": 0.0015, + "loss": 1.6384, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.9341202974319458, + "learning_rate": 0.0015, + "loss": 1.637, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 1.0301650762557983, + "learning_rate": 0.0015, + "loss": 1.6372, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.6672254800796509, + "learning_rate": 0.0015, + "loss": 1.6312, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.8981499075889587, + "learning_rate": 0.0015, + "loss": 1.6329, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.6990149021148682, + "learning_rate": 0.0015, + "loss": 1.6252, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.6157594919204712, + "learning_rate": 0.0015, + "loss": 1.6239, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.6597276329994202, + "learning_rate": 0.0015, + "loss": 1.6248, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.6546605825424194, + "learning_rate": 0.0015, + "loss": 1.6354, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.7146171927452087, + "learning_rate": 0.0015, + "loss": 1.6207, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.6842172741889954, + "learning_rate": 0.0015, + "loss": 1.6175, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.7051606774330139, + "learning_rate": 0.0015, + "loss": 1.6161, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 1.0707893371582031, + "learning_rate": 0.0015, + "loss": 1.6239, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.7594018578529358, + "learning_rate": 0.0015, + "loss": 1.6324, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.6623409390449524, + "learning_rate": 0.0015, + "loss": 1.6194, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.6320188641548157, + "learning_rate": 0.0015, + "loss": 1.6173, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.756416380405426, + "learning_rate": 0.0015, + "loss": 1.6166, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.7982528805732727, + "learning_rate": 0.0015, + "loss": 1.6063, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.83033686876297, + "learning_rate": 0.0015, + "loss": 1.6108, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.7017702460289001, + "learning_rate": 0.0015, + "loss": 1.6147, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.8277395963668823, + "learning_rate": 0.0015, + "loss": 1.608, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.8305484056472778, + "learning_rate": 0.0015, + "loss": 1.6151, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.8680278658866882, + "learning_rate": 0.0015, + "loss": 1.6059, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6655151844024658, + "learning_rate": 0.0015, + "loss": 1.6058, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.7406314611434937, + "learning_rate": 0.0015, + "loss": 1.6052, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.590162992477417, + "learning_rate": 0.0015, + "loss": 1.6005, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.7729781866073608, + "learning_rate": 0.0015, + "loss": 1.6058, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.0980957746505737, + "learning_rate": 0.0015, + "loss": 1.6208, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.7007785439491272, + "learning_rate": 0.0015, + "loss": 1.6039, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.7019352912902832, + "learning_rate": 0.0015, + "loss": 1.5898, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.6203840374946594, + "learning_rate": 0.0015, + "loss": 1.6022, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.6874725222587585, + "learning_rate": 0.0015, + "loss": 1.6034, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.5788426399230957, + "learning_rate": 0.0015, + "loss": 1.5894, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.7042095065116882, + "learning_rate": 0.0015, + "loss": 1.6029, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.9149144887924194, + "learning_rate": 0.0015, + "loss": 1.5979, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.8104167580604553, + "learning_rate": 0.0015, + "loss": 1.5993, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 1.0500192642211914, + "learning_rate": 0.0015, + "loss": 1.5904, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 1.0872136354446411, + "learning_rate": 0.0015, + "loss": 1.5838, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.855541467666626, + "learning_rate": 0.0015, + "loss": 1.6158, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 1.3494621515274048, + "learning_rate": 0.0015, + "loss": 1.6022, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.9795144200325012, + "learning_rate": 0.0015, + "loss": 1.5896, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.6512905359268188, + "learning_rate": 0.0015, + "loss": 1.5793, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.6993908286094666, + "learning_rate": 0.0015, + "loss": 1.5821, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.6173799633979797, + "learning_rate": 0.0015, + "loss": 1.5959, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.7689816355705261, + "learning_rate": 0.0015, + "loss": 1.577, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.8228141665458679, + "learning_rate": 0.0015, + "loss": 1.5746, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.6704361438751221, + "learning_rate": 0.0015, + "loss": 1.5789, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.8982261419296265, + "learning_rate": 0.0015, + "loss": 1.5865, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.7105319499969482, + "learning_rate": 0.0015, + "loss": 1.5854, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.590654194355011, + "learning_rate": 0.0015, + "loss": 1.5799, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.60832679271698, + "learning_rate": 0.0015, + "loss": 1.582, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6442340612411499, + "learning_rate": 0.0015, + "loss": 1.5801, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.7528612017631531, + "learning_rate": 0.0015, + "loss": 1.5829, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.9050551652908325, + "learning_rate": 0.0015, + "loss": 1.5761, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.7395772933959961, + "learning_rate": 0.0015, + "loss": 1.5739, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.758362352848053, + "learning_rate": 0.0015, + "loss": 1.5848, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.5878888964653015, + "learning_rate": 0.0015, + "loss": 1.5685, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.6303932666778564, + "learning_rate": 0.0015, + "loss": 1.5837, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 1.1393781900405884, + "learning_rate": 0.0015, + "loss": 1.5771, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.7777864336967468, + "learning_rate": 0.0015, + "loss": 1.5828, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.5970431566238403, + "learning_rate": 0.0015, + "loss": 1.5623, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.8533217906951904, + "learning_rate": 0.0015, + "loss": 1.5695, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.5901464223861694, + "learning_rate": 0.0015, + "loss": 1.5704, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.7525666356086731, + "learning_rate": 0.0015, + "loss": 1.5735, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.6917023658752441, + "learning_rate": 0.0015, + "loss": 1.5696, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.7417186498641968, + "learning_rate": 0.0015, + "loss": 1.5712, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.6252340078353882, + "learning_rate": 0.0015, + "loss": 1.5657, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.7001520395278931, + "learning_rate": 0.0015, + "loss": 1.5735, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.6428219676017761, + "learning_rate": 0.0015, + "loss": 1.5655, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.6188051104545593, + "learning_rate": 0.0015, + "loss": 1.5654, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6137269735336304, + "learning_rate": 0.0015, + "loss": 1.5704, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.6634780764579773, + "learning_rate": 0.0015, + "loss": 1.5707, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.6164573431015015, + "learning_rate": 0.0015, + "loss": 1.567, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.67591792345047, + "learning_rate": 0.0015, + "loss": 1.5696, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.9030578136444092, + "learning_rate": 0.0015, + "loss": 1.5592, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.6329770088195801, + "learning_rate": 0.0015, + "loss": 1.5689, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.7755404114723206, + "learning_rate": 0.0015, + "loss": 1.5585, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.6837880611419678, + "learning_rate": 0.0015, + "loss": 1.5538, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.6190932989120483, + "learning_rate": 0.0015, + "loss": 1.5561, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.6166747808456421, + "learning_rate": 0.0015, + "loss": 1.5544, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.68338543176651, + "learning_rate": 0.0015, + "loss": 1.5552, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.6845906376838684, + "learning_rate": 0.0015, + "loss": 1.5486, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.5682319402694702, + "learning_rate": 0.0015, + "loss": 1.5566, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.5855002999305725, + "learning_rate": 0.0015, + "loss": 1.5574, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.6031274199485779, + "learning_rate": 0.0015, + "loss": 1.54, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 1.0569102764129639, + "learning_rate": 0.0015, + "loss": 1.5523, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.6331525444984436, + "learning_rate": 0.0015, + "loss": 1.5547, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.7460474967956543, + "learning_rate": 0.0015, + "loss": 1.5646, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.6277437806129456, + "learning_rate": 0.0015, + "loss": 1.5591, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.6590365767478943, + "learning_rate": 0.0015, + "loss": 1.5562, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.6113288998603821, + "learning_rate": 0.0015, + "loss": 1.5555, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.5620890855789185, + "learning_rate": 0.0015, + "loss": 1.5516, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.6174807548522949, + "learning_rate": 0.0015, + "loss": 1.5501, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.6747241616249084, + "learning_rate": 0.0015, + "loss": 1.5571, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.9310271143913269, + "learning_rate": 0.0015, + "loss": 1.5321, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 1.3230280876159668, + "learning_rate": 0.0015, + "loss": 1.5384, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 1.2144172191619873, + "learning_rate": 0.0015, + "loss": 1.5487, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.6515137553215027, + "learning_rate": 0.0015, + "loss": 1.5432, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.7877812385559082, + "learning_rate": 0.0015, + "loss": 1.5425, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.6689189672470093, + "learning_rate": 0.0015, + "loss": 1.5313, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.6395917534828186, + "learning_rate": 0.0015, + "loss": 1.5345, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.7754723429679871, + "learning_rate": 0.0015, + "loss": 1.5428, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.7293116450309753, + "learning_rate": 0.0015, + "loss": 1.5438, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 1.249470829963684, + "learning_rate": 0.0015, + "loss": 1.5401, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.6346191763877869, + "learning_rate": 0.0015, + "loss": 1.5335, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.6461648941040039, + "learning_rate": 0.0015, + "loss": 1.5374, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.7199176549911499, + "learning_rate": 0.0015, + "loss": 1.5476, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.7429535388946533, + "learning_rate": 0.0015, + "loss": 1.529, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.9656321406364441, + "learning_rate": 0.0015, + "loss": 1.5238, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 1.1244347095489502, + "learning_rate": 0.0015, + "loss": 1.5349, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.7225420475006104, + "learning_rate": 0.0015, + "loss": 1.5484, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.6689170598983765, + "learning_rate": 0.0015, + "loss": 1.5242, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.7187021374702454, + "learning_rate": 0.0015, + "loss": 1.5339, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.6817131042480469, + "learning_rate": 0.0015, + "loss": 1.5356, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.7035455703735352, + "learning_rate": 0.0015, + "loss": 1.5229, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.7125651240348816, + "learning_rate": 0.0015, + "loss": 1.5336, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.6795884966850281, + "learning_rate": 0.0015, + "loss": 1.5301, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.6996033787727356, + "learning_rate": 0.0015, + "loss": 1.5408, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5682607889175415, + "learning_rate": 0.0015, + "loss": 1.5276, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.5759983658790588, + "learning_rate": 0.0015, + "loss": 1.5197, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.582955539226532, + "learning_rate": 0.0015, + "loss": 1.5363, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.6314529180526733, + "learning_rate": 0.0015, + "loss": 1.5232, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.6088457703590393, + "learning_rate": 0.0015, + "loss": 1.5263, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.5606082677841187, + "learning_rate": 0.0015, + "loss": 1.526, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.7216711044311523, + "learning_rate": 0.0015, + "loss": 1.5379, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.7543506622314453, + "learning_rate": 0.0015, + "loss": 1.5192, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.5943099856376648, + "learning_rate": 0.0015, + "loss": 1.5272, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.60477215051651, + "learning_rate": 0.0015, + "loss": 1.5282, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.6148793697357178, + "learning_rate": 0.0015, + "loss": 1.5293, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.591863751411438, + "learning_rate": 0.0015, + "loss": 1.5238, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.6953649520874023, + "learning_rate": 0.0015, + "loss": 1.5245, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.74885493516922, + "learning_rate": 0.0015, + "loss": 1.5195, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.7037512063980103, + "learning_rate": 0.0015, + "loss": 1.5322, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.725911021232605, + "learning_rate": 0.0015, + "loss": 1.5245, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.6004663705825806, + "learning_rate": 0.0015, + "loss": 1.5144, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.7108480930328369, + "learning_rate": 0.0015, + "loss": 1.4998, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.6768283843994141, + "learning_rate": 0.0015, + "loss": 1.5249, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5562488436698914, + "learning_rate": 0.0015, + "loss": 1.5131, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.631421685218811, + "learning_rate": 0.0015, + "loss": 1.5294, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.7008868455886841, + "learning_rate": 0.0015, + "loss": 1.529, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.775509238243103, + "learning_rate": 0.0015, + "loss": 1.5112, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.5601686835289001, + "learning_rate": 0.0015, + "loss": 1.5219, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.5643460750579834, + "learning_rate": 0.0015, + "loss": 1.5197, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.5928805470466614, + "learning_rate": 0.0015, + "loss": 1.5228, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.5419443249702454, + "learning_rate": 0.0015, + "loss": 1.5147, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.7151007056236267, + "learning_rate": 0.0015, + "loss": 1.5166, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.5484126210212708, + "learning_rate": 0.0015, + "loss": 1.5121, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.6396426558494568, + "learning_rate": 0.0015, + "loss": 1.5102, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.6092063188552856, + "learning_rate": 0.0015, + "loss": 1.5189, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.6935619711875916, + "learning_rate": 0.0015, + "loss": 1.5133, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.551314115524292, + "learning_rate": 0.0015, + "loss": 1.505, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.5668650269508362, + "learning_rate": 0.0015, + "loss": 1.5166, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6797813177108765, + "learning_rate": 0.0015, + "loss": 1.5137, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.7667569518089294, + "learning_rate": 0.0015, + "loss": 1.4977, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.7580966353416443, + "learning_rate": 0.0015, + "loss": 1.5138, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.5931734442710876, + "learning_rate": 0.0015, + "loss": 1.5105, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.5610995292663574, + "learning_rate": 0.0015, + "loss": 1.505, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.712026059627533, + "learning_rate": 0.0015, + "loss": 1.4982, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.9137971997261047, + "learning_rate": 0.0015, + "loss": 1.5166, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.764611005783081, + "learning_rate": 0.0015, + "loss": 1.5218, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.5769204497337341, + "learning_rate": 0.0015, + "loss": 1.5108, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.6009048223495483, + "learning_rate": 0.0015, + "loss": 1.4939, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.7048670053482056, + "learning_rate": 0.0015, + "loss": 1.5146, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.6143278479576111, + "learning_rate": 0.0015, + "loss": 1.5105, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.5736284852027893, + "learning_rate": 0.0015, + "loss": 1.5183, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.741503894329071, + "learning_rate": 0.0015, + "loss": 1.5123, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.6641824245452881, + "learning_rate": 0.0015, + "loss": 1.5028, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.568838357925415, + "learning_rate": 0.0015, + "loss": 1.5011, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.7397016286849976, + "learning_rate": 0.0015, + "loss": 1.4981, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.5978692173957825, + "learning_rate": 0.0015, + "loss": 1.5027, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.5610095262527466, + "learning_rate": 0.0015, + "loss": 1.5129, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.7330253720283508, + "learning_rate": 0.0015, + "loss": 1.4975, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.5927225351333618, + "learning_rate": 0.0015, + "loss": 1.5004, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.5715808868408203, + "learning_rate": 0.0015, + "loss": 1.503, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.6043118834495544, + "learning_rate": 0.0015, + "loss": 1.4984, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.7284471392631531, + "learning_rate": 0.0015, + "loss": 1.5008, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.6941373944282532, + "learning_rate": 0.0015, + "loss": 1.5129, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.9151371717453003, + "learning_rate": 0.0015, + "loss": 1.5016, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.5350086092948914, + "learning_rate": 0.0015, + "loss": 1.4956, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.9535840749740601, + "learning_rate": 0.0015, + "loss": 1.4972, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.6288740038871765, + "learning_rate": 0.0015, + "loss": 1.5085, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6044665575027466, + "learning_rate": 0.0015, + "loss": 1.5114, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.6169572472572327, + "learning_rate": 0.0015, + "loss": 1.4977, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.572669267654419, + "learning_rate": 0.0015, + "loss": 1.4784, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.5647141933441162, + "learning_rate": 0.0015, + "loss": 1.4965, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.604538083076477, + "learning_rate": 0.0015, + "loss": 1.4967, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.729308545589447, + "learning_rate": 0.0015, + "loss": 1.5039, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.9094111323356628, + "learning_rate": 0.0015, + "loss": 1.4912, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.6112377643585205, + "learning_rate": 0.0015, + "loss": 1.5059, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5163501501083374, + "learning_rate": 0.0015, + "loss": 1.4938, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.683885931968689, + "learning_rate": 0.0015, + "loss": 1.4966, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.5952377319335938, + "learning_rate": 0.0015, + "loss": 1.5046, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.6416290998458862, + "learning_rate": 0.0015, + "loss": 1.4908, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.6121982336044312, + "learning_rate": 0.0015, + "loss": 1.4825, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.6151083707809448, + "learning_rate": 0.0015, + "loss": 1.4865, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.7092777490615845, + "learning_rate": 0.0015, + "loss": 1.5029, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.6845822334289551, + "learning_rate": 0.0015, + "loss": 1.4841, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.8743574619293213, + "learning_rate": 0.0015, + "loss": 1.4983, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.7992430925369263, + "learning_rate": 0.0015, + "loss": 1.4876, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.6235871315002441, + "learning_rate": 0.0015, + "loss": 1.4798, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.6806468367576599, + "learning_rate": 0.0015, + "loss": 1.4806, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 1.147836446762085, + "learning_rate": 0.0015, + "loss": 1.4805, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.6263267993927002, + "learning_rate": 0.0015, + "loss": 1.4856, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.6188958883285522, + "learning_rate": 0.0015, + "loss": 1.4809, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.5184334516525269, + "learning_rate": 0.0015, + "loss": 1.4862, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.6880916357040405, + "learning_rate": 0.0015, + "loss": 1.4813, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.8529372811317444, + "learning_rate": 0.0015, + "loss": 1.4879, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.6398452520370483, + "learning_rate": 0.0015, + "loss": 1.4903, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.7702900171279907, + "learning_rate": 0.0015, + "loss": 1.4788, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.6239493489265442, + "learning_rate": 0.0015, + "loss": 1.4908, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.6391934752464294, + "learning_rate": 0.0015, + "loss": 1.4797, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.7288190722465515, + "learning_rate": 0.0015, + "loss": 1.4941, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.5728703737258911, + "learning_rate": 0.0015, + "loss": 1.4841, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.5602556467056274, + "learning_rate": 0.0015, + "loss": 1.4835, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.7178170084953308, + "learning_rate": 0.0015, + "loss": 1.4751, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5581310391426086, + "learning_rate": 0.0015, + "loss": 1.4853, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5198979377746582, + "learning_rate": 0.0015, + "loss": 1.4871, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.6657981276512146, + "learning_rate": 0.0015, + "loss": 1.4923, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.765648603439331, + "learning_rate": 0.0015, + "loss": 1.4845, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.6785349249839783, + "learning_rate": 0.0015, + "loss": 1.4892, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.5718680620193481, + "learning_rate": 0.0015, + "loss": 1.4837, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.553119421005249, + "learning_rate": 0.0015, + "loss": 1.4814, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.5533855557441711, + "learning_rate": 0.0015, + "loss": 1.4721, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.6695696115493774, + "learning_rate": 0.0015, + "loss": 1.4782, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.5840980410575867, + "learning_rate": 0.0015, + "loss": 1.4717, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.6174877285957336, + "learning_rate": 0.0015, + "loss": 1.4761, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.6185278296470642, + "learning_rate": 0.0015, + "loss": 1.4792, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.5936148762702942, + "learning_rate": 0.0015, + "loss": 1.4735, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.6119903326034546, + "learning_rate": 0.0015, + "loss": 1.4731, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5228918194770813, + "learning_rate": 0.0015, + "loss": 1.4704, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.6289933323860168, + "learning_rate": 0.0015, + "loss": 1.4762, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.6278483271598816, + "learning_rate": 0.0015, + "loss": 1.4773, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.7192302942276001, + "learning_rate": 0.0015, + "loss": 1.4685, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.7284325957298279, + "learning_rate": 0.0015, + "loss": 1.4787, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.8943430781364441, + "learning_rate": 0.0015, + "loss": 1.4741, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.6659002304077148, + "learning_rate": 0.0015, + "loss": 1.4737, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.642861008644104, + "learning_rate": 0.0015, + "loss": 1.4774, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.7793336510658264, + "learning_rate": 0.0015, + "loss": 1.4825, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.5228441953659058, + "learning_rate": 0.0015, + "loss": 1.4727, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.9414927959442139, + "learning_rate": 0.0015, + "loss": 1.4664, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.8323007822036743, + "learning_rate": 0.0015, + "loss": 1.4782, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.9512622356414795, + "learning_rate": 0.0015, + "loss": 1.4791, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5568503141403198, + "learning_rate": 0.0015, + "loss": 1.4762, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.8442806005477905, + "learning_rate": 0.0015, + "loss": 1.4698, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.6293421387672424, + "learning_rate": 0.0015, + "loss": 1.4765, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.7461580634117126, + "learning_rate": 0.0015, + "loss": 1.4684, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.8717284798622131, + "learning_rate": 0.0015, + "loss": 1.4717, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.5873362421989441, + "learning_rate": 0.0015, + "loss": 1.4875, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.5259530544281006, + "learning_rate": 0.0015, + "loss": 1.4619, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.5751098394393921, + "learning_rate": 0.0015, + "loss": 1.4669, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5525999069213867, + "learning_rate": 0.0015, + "loss": 1.4649, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.579777717590332, + "learning_rate": 0.0015, + "loss": 1.4694, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.6130363941192627, + "learning_rate": 0.0015, + "loss": 1.4834, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.6964607834815979, + "learning_rate": 0.0015, + "loss": 1.4687, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.5204871892929077, + "learning_rate": 0.0015, + "loss": 1.4713, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.801157534122467, + "learning_rate": 0.0015, + "loss": 1.4676, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.7642245292663574, + "learning_rate": 0.0015, + "loss": 1.4765, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.5725940465927124, + "learning_rate": 0.0015, + "loss": 1.4615, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.5430575609207153, + "learning_rate": 0.0015, + "loss": 1.4712, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.6782746911048889, + "learning_rate": 0.0015, + "loss": 1.4767, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.9323136806488037, + "learning_rate": 0.0015, + "loss": 1.4633, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.6096838116645813, + "learning_rate": 0.0015, + "loss": 1.464, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.5575011372566223, + "learning_rate": 0.0015, + "loss": 1.4688, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.6049365401268005, + "learning_rate": 0.0015, + "loss": 1.4522, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.6802952289581299, + "learning_rate": 0.0015, + "loss": 1.4695, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.8433916568756104, + "learning_rate": 0.0015, + "loss": 1.4752, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.8293811082839966, + "learning_rate": 0.0015, + "loss": 1.4695, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.7346205115318298, + "learning_rate": 0.0015, + "loss": 1.4581, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.570183515548706, + "learning_rate": 0.0015, + "loss": 1.4408, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.7086605429649353, + "learning_rate": 0.0015, + "loss": 1.4648, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.619171142578125, + "learning_rate": 0.0015, + "loss": 1.452, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.6127137541770935, + "learning_rate": 0.0015, + "loss": 1.4723, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.734324038028717, + "learning_rate": 0.0015, + "loss": 1.4763, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.7030718922615051, + "learning_rate": 0.0015, + "loss": 1.4489, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.5352341532707214, + "learning_rate": 0.0015, + "loss": 1.4678, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6763170957565308, + "learning_rate": 0.0015, + "loss": 1.4677, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.6143730282783508, + "learning_rate": 0.0015, + "loss": 1.4707, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.5258755087852478, + "learning_rate": 0.0015, + "loss": 1.4559, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.67002272605896, + "learning_rate": 0.0015, + "loss": 1.456, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5338280200958252, + "learning_rate": 0.0015, + "loss": 1.4567, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.6864109635353088, + "learning_rate": 0.0015, + "loss": 1.4524, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.5267427563667297, + "learning_rate": 0.0015, + "loss": 1.4605, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.5892050862312317, + "learning_rate": 0.0015, + "loss": 1.4631, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.7090893983840942, + "learning_rate": 0.0015, + "loss": 1.4585, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.5960710048675537, + "learning_rate": 0.0015, + "loss": 1.4561, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.6754327416419983, + "learning_rate": 0.0015, + "loss": 1.4544, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.6936537623405457, + "learning_rate": 0.0015, + "loss": 1.4756, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.5348150730133057, + "learning_rate": 0.0015, + "loss": 1.459, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.6094486117362976, + "learning_rate": 0.0015, + "loss": 1.4567, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.6765019297599792, + "learning_rate": 0.0015, + "loss": 1.4678, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.5961644053459167, + "learning_rate": 0.0015, + "loss": 1.4503, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.6130753755569458, + "learning_rate": 0.0015, + "loss": 1.4536, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.5550441145896912, + "learning_rate": 0.0015, + "loss": 1.4551, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.6500513553619385, + "learning_rate": 0.0015, + "loss": 1.4735, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.7694984674453735, + "learning_rate": 0.0015, + "loss": 1.4563, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.6021876335144043, + "learning_rate": 0.0015, + "loss": 1.4505, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.5139393210411072, + "learning_rate": 0.0015, + "loss": 1.4624, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.610668957233429, + "learning_rate": 0.0015, + "loss": 1.4502, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.8146686553955078, + "learning_rate": 0.0015, + "loss": 1.4455, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.706887423992157, + "learning_rate": 0.0015, + "loss": 1.4652, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.6309559941291809, + "learning_rate": 0.0015, + "loss": 1.4552, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.6033481955528259, + "learning_rate": 0.0015, + "loss": 1.4637, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.5887914896011353, + "learning_rate": 0.0015, + "loss": 1.4594, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.5958083271980286, + "learning_rate": 0.0015, + "loss": 1.4467, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.6756103038787842, + "learning_rate": 0.0015, + "loss": 1.457, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.5370129346847534, + "learning_rate": 0.0015, + "loss": 1.4581, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.6108761429786682, + "learning_rate": 0.0015, + "loss": 1.4629, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.8404567241668701, + "learning_rate": 0.0015, + "loss": 1.4434, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.5708929300308228, + "learning_rate": 0.0015, + "loss": 1.4523, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.5647048354148865, + "learning_rate": 0.0015, + "loss": 1.446, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.5417590141296387, + "learning_rate": 0.0015, + "loss": 1.4491, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.7352495193481445, + "learning_rate": 0.0015, + "loss": 1.4607, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.7228708267211914, + "learning_rate": 0.0015, + "loss": 1.4534, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.6637216210365295, + "learning_rate": 0.0015, + "loss": 1.4468, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.5074453353881836, + "learning_rate": 0.0015, + "loss": 1.4306, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5190367698669434, + "learning_rate": 0.0015, + "loss": 1.4592, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.5237929224967957, + "learning_rate": 0.0015, + "loss": 1.4468, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.5514045357704163, + "learning_rate": 0.0015, + "loss": 1.4597, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.7031023502349854, + "learning_rate": 0.0015, + "loss": 1.4477, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.5926287174224854, + "learning_rate": 0.0015, + "loss": 1.4608, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.5033077001571655, + "learning_rate": 0.0015, + "loss": 1.4493, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.5126532316207886, + "learning_rate": 0.0015, + "loss": 1.4342, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.6754027009010315, + "learning_rate": 0.0015, + "loss": 1.4507, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.6079409718513489, + "learning_rate": 0.0015, + "loss": 1.4433, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5625218749046326, + "learning_rate": 0.0015, + "loss": 1.4438, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.8679294586181641, + "learning_rate": 0.0015, + "loss": 1.4604, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5662575364112854, + "learning_rate": 0.0015, + "loss": 1.4583, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5976355671882629, + "learning_rate": 0.0015, + "loss": 1.4506, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5095018148422241, + "learning_rate": 0.0015, + "loss": 1.4506, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.8036375045776367, + "learning_rate": 0.0015, + "loss": 1.4592, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.5669030547142029, + "learning_rate": 0.0015, + "loss": 1.4434, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.7651669979095459, + "learning_rate": 0.0015, + "loss": 1.4566, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.5880611538887024, + "learning_rate": 0.0015, + "loss": 1.4392, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.6016876697540283, + "learning_rate": 0.0015, + "loss": 1.4502, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.5552685856819153, + "learning_rate": 0.0015, + "loss": 1.4418, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.68308025598526, + "learning_rate": 0.0015, + "loss": 1.4491, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.6933003067970276, + "learning_rate": 0.0015, + "loss": 1.4413, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.5555622577667236, + "learning_rate": 0.0015, + "loss": 1.4483, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.6394757032394409, + "learning_rate": 0.0015, + "loss": 1.4559, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.704420268535614, + "learning_rate": 0.0015, + "loss": 1.4484, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.5235179662704468, + "learning_rate": 0.0015, + "loss": 1.4487, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.610462486743927, + "learning_rate": 0.0015, + "loss": 1.4505, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.7268403172492981, + "learning_rate": 0.0015, + "loss": 1.4379, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.6289941668510437, + "learning_rate": 0.0015, + "loss": 1.439, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.6171261072158813, + "learning_rate": 0.0015, + "loss": 1.4493, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.5034967064857483, + "learning_rate": 0.0015, + "loss": 1.442, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.5746512413024902, + "learning_rate": 0.0015, + "loss": 1.4342, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.8130653500556946, + "learning_rate": 0.0015, + "loss": 1.4459, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.7359591722488403, + "learning_rate": 0.0015, + "loss": 1.4428, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.543518602848053, + "learning_rate": 0.0015, + "loss": 1.438, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.5612589716911316, + "learning_rate": 0.0015, + "loss": 1.4356, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.678749144077301, + "learning_rate": 0.0015, + "loss": 1.437, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.5381031036376953, + "learning_rate": 0.0015, + "loss": 1.4393, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.5442678928375244, + "learning_rate": 0.0015, + "loss": 1.4348, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.643040657043457, + "learning_rate": 0.0015, + "loss": 1.43, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.5506795048713684, + "learning_rate": 0.0015, + "loss": 1.4304, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6098572015762329, + "learning_rate": 0.0015, + "loss": 1.4348, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.6758444905281067, + "learning_rate": 0.0015, + "loss": 1.4392, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.5184838771820068, + "learning_rate": 0.0015, + "loss": 1.4396, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.9065413475036621, + "learning_rate": 0.0015, + "loss": 1.4399, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.5394531488418579, + "learning_rate": 0.0015, + "loss": 1.4473, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.6227542757987976, + "learning_rate": 0.0015, + "loss": 1.4391, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.8134665489196777, + "learning_rate": 0.0015, + "loss": 1.4447, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.5177081823348999, + "learning_rate": 0.0015, + "loss": 1.436, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.6241568326950073, + "learning_rate": 0.0015, + "loss": 1.4298, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.5243352651596069, + "learning_rate": 0.0015, + "loss": 1.4286, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.5504492521286011, + "learning_rate": 0.0015, + "loss": 1.4453, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.5448554158210754, + "learning_rate": 0.0015, + "loss": 1.4487, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5740591287612915, + "learning_rate": 0.0015, + "loss": 1.4324, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.7025555968284607, + "learning_rate": 0.0015, + "loss": 1.4334, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.5911626815795898, + "learning_rate": 0.0015, + "loss": 1.4366, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.503633439540863, + "learning_rate": 0.0015, + "loss": 1.4313, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.6805056929588318, + "learning_rate": 0.0015, + "loss": 1.4384, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.5583463907241821, + "learning_rate": 0.0015, + "loss": 1.4463, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.6833731532096863, + "learning_rate": 0.0015, + "loss": 1.4413, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.5771982073783875, + "learning_rate": 0.0015, + "loss": 1.4511, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.6087093353271484, + "learning_rate": 0.0015, + "loss": 1.4457, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.7789549231529236, + "learning_rate": 0.0015, + "loss": 1.4377, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6058619022369385, + "learning_rate": 0.0015, + "loss": 1.4469, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.5375464558601379, + "learning_rate": 0.0015, + "loss": 1.4455, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.8218992352485657, + "learning_rate": 0.0015, + "loss": 1.4239, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.5948262810707092, + "learning_rate": 0.0015, + "loss": 1.43, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.5964241623878479, + "learning_rate": 0.0015, + "loss": 1.4337, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5453081727027893, + "learning_rate": 0.0015, + "loss": 1.4377, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.5510696768760681, + "learning_rate": 0.0015, + "loss": 1.4355, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.5511655211448669, + "learning_rate": 0.0015, + "loss": 1.4331, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.6737241744995117, + "learning_rate": 0.0015, + "loss": 1.4107, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.5429701805114746, + "learning_rate": 0.0015, + "loss": 1.4199, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.7445785403251648, + "learning_rate": 0.0015, + "loss": 1.4221, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.5367471575737, + "learning_rate": 0.0015, + "loss": 1.4298, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.6427586674690247, + "learning_rate": 0.0015, + "loss": 1.4255, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.6622743606567383, + "learning_rate": 0.0015, + "loss": 1.4321, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.5611438751220703, + "learning_rate": 0.0015, + "loss": 1.4336, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.5816289782524109, + "learning_rate": 0.0015, + "loss": 1.4464, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.5909755825996399, + "learning_rate": 0.0015, + "loss": 1.4297, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.7232508659362793, + "learning_rate": 0.0015, + "loss": 1.4431, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.8321460485458374, + "learning_rate": 0.0015, + "loss": 1.4451, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.5028564929962158, + "learning_rate": 0.0015, + "loss": 1.4327, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.7685743570327759, + "learning_rate": 0.0015, + "loss": 1.4256, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.5092529058456421, + "learning_rate": 0.0015, + "loss": 1.4391, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.5273023247718811, + "learning_rate": 0.0015, + "loss": 1.428, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.5593839883804321, + "learning_rate": 0.0015, + "loss": 1.4306, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.5306974053382874, + "learning_rate": 0.0015, + "loss": 1.425, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.5432724356651306, + "learning_rate": 0.0015, + "loss": 1.4189, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.200668215751648, + "learning_rate": 0.0015, + "loss": 1.4307, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.6720802187919617, + "learning_rate": 0.0015, + "loss": 1.4346, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.5483372807502747, + "learning_rate": 0.0015, + "loss": 1.4231, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.8048787117004395, + "learning_rate": 0.0015, + "loss": 1.4273, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5510703325271606, + "learning_rate": 0.0015, + "loss": 1.4342, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5123088955879211, + "learning_rate": 0.0015, + "loss": 1.4146, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.6412525177001953, + "learning_rate": 0.0015, + "loss": 1.4331, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.5405699610710144, + "learning_rate": 0.0015, + "loss": 1.4421, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.5397794842720032, + "learning_rate": 0.0015, + "loss": 1.4293, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.6345342397689819, + "learning_rate": 0.0015, + "loss": 1.4332, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.5723614692687988, + "learning_rate": 0.0015, + "loss": 1.4158, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.5299696326255798, + "learning_rate": 0.0015, + "loss": 1.4341, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.7609160542488098, + "learning_rate": 0.0015, + "loss": 1.4174, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.5781378746032715, + "learning_rate": 0.0015, + "loss": 1.4343, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.7221751809120178, + "learning_rate": 0.0015, + "loss": 1.4247, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.49339455366134644, + "learning_rate": 0.0015, + "loss": 1.4251, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.5464122295379639, + "learning_rate": 0.0015, + "loss": 1.4235, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6804472804069519, + "learning_rate": 0.0015, + "loss": 1.4191, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.6100871562957764, + "learning_rate": 0.0015, + "loss": 1.4308, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.49249380826950073, + "learning_rate": 0.0015, + "loss": 1.4239, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.5322859883308411, + "learning_rate": 0.0015, + "loss": 1.4243, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.6617424488067627, + "learning_rate": 0.0015, + "loss": 1.4211, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.5216740369796753, + "learning_rate": 0.0015, + "loss": 1.4342, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.6193856596946716, + "learning_rate": 0.0015, + "loss": 1.4267, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.5639455318450928, + "learning_rate": 0.0014834368975312174, + "loss": 1.4098, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.5887807011604309, + "learning_rate": 0.0014629899726345957, + "loss": 1.4291, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.5052474737167358, + "learning_rate": 0.0014428248775471316, + "loss": 1.43, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.5447661280632019, + "learning_rate": 0.00142293772767289, + "loss": 1.4228, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.5228587985038757, + "learning_rate": 0.001403324691959192, + "loss": 1.4163, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.5889528393745422, + "learning_rate": 0.0013839819921586025, + "loss": 1.4231, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.5708619952201843, + "learning_rate": 0.0013649059021010894, + "loss": 1.4109, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.6014156937599182, + "learning_rate": 0.0013460927469762154, + "loss": 1.4135, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.5244723558425903, + "learning_rate": 0.0013275389026252255, + "loss": 1.4207, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.5485060811042786, + "learning_rate": 0.0013092407948428887, + "loss": 1.4081, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.7510374188423157, + "learning_rate": 0.001291194898688966, + "loss": 1.4185, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.49165019392967224, + "learning_rate": 0.001273397737809166, + "loss": 1.4151, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5156455039978027, + "learning_rate": 0.001255845883765463, + "loss": 1.4084, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.6343047618865967, + "learning_rate": 0.001238535955375642, + "loss": 1.4036, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.6259722709655762, + "learning_rate": 0.0012214646180619506, + "loss": 1.3995, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.47512054443359375, + "learning_rate": 0.001204628583208727, + "loss": 1.3935, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.6797612309455872, + "learning_rate": 0.0011880246075288827, + "loss": 1.4032, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6804598569869995, + "learning_rate": 0.001171649492439115, + "loss": 1.3936, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.5329886674880981, + "learning_rate": 0.0011555000834437364, + "loss": 1.4002, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.6307658553123474, + "learning_rate": 0.0011395732695269908, + "loss": 1.3984, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.5046476125717163, + "learning_rate": 0.0011238659825537505, + "loss": 1.3772, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5106565356254578, + "learning_rate": 0.0011083751966784717, + "loss": 1.381, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.9580277800559998, + "learning_rate": 0.0010930979277622953, + "loss": 1.3967, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.5464169979095459, + "learning_rate": 0.0010780312327981854, + "loss": 1.395, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.503548800945282, + "learning_rate": 0.0010631722093439888, + "loss": 1.3895, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.5025346279144287, + "learning_rate": 0.00104851799496331, + "loss": 1.3778, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.4954620599746704, + "learning_rate": 0.0010340657666740914, + "loss": 1.3883, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.56938636302948, + "learning_rate": 0.0010198127404047975, + "loss": 1.372, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.5315554738044739, + "learning_rate": 0.0010057561704580897, + "loss": 1.3757, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.5042809844017029, + "learning_rate": 0.0009918933489818985, + "loss": 1.3863, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.5185731649398804, + "learning_rate": 0.0009782216054477827, + "loss": 1.3798, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.6783595681190491, + "learning_rate": 0.0009647383061364801, + "loss": 1.386, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5089995265007019, + "learning_rate": 0.0009514408536305495, + "loss": 1.3809, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.480932354927063, + "learning_rate": 0.0009383266863140042, + "loss": 1.3939, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.4996776878833771, + "learning_rate": 0.000925393277878844, + "loss": 1.3939, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.4868309497833252, + "learning_rate": 0.0009126381368383879, + "loss": 1.3754, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.5652695894241333, + "learning_rate": 0.0009000588060473156, + "loss": 1.3678, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.5811905264854431, + "learning_rate": 0.0008876528622283235, + "loss": 1.3757, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.5382415056228638, + "learning_rate": 0.0008754179155053053, + "loss": 1.3714, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.7246244549751282, + "learning_rate": 0.0008633516089429683, + "loss": 1.3727, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.49916544556617737, + "learning_rate": 0.0008514516180927928, + "loss": 1.3682, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.5470711588859558, + "learning_rate": 0.0008397156505452524, + "loss": 1.3623, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.6230381727218628, + "learning_rate": 0.0008281414454882051, + "loss": 1.3648, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.5245243906974792, + "learning_rate": 0.0008167267732713704, + "loss": 1.3725, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.505135178565979, + "learning_rate": 0.0008054694349768117, + "loss": 1.3576, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.4960167109966278, + "learning_rate": 0.0007943672619953359, + "loss": 1.3668, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.5264673829078674, + "learning_rate": 0.0007834181156087356, + "loss": 1.3599, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.6166502833366394, + "learning_rate": 0.0007726198865777852, + "loss": 1.3636, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.536520779132843, + "learning_rate": 0.0007619704947359191, + "loss": 1.3552, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.495047390460968, + "learning_rate": 0.0007514678885885087, + "loss": 1.3559, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.47605499625205994, + "learning_rate": 0.0007411100449176633, + "loss": 1.3585, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.4973078668117523, + "learning_rate": 0.0007308949683924791, + "loss": 1.3604, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.5651484131813049, + "learning_rate": 0.000720820691184658, + "loss": 1.3511, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.46959301829338074, + "learning_rate": 0.0007108852725894269, + "loss": 1.3531, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.5081770420074463, + "learning_rate": 0.000701086798651681, + "loss": 1.3544, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.5139282941818237, + "learning_rate": 0.0006914233817972798, + "loss": 1.3427, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.49106651544570923, + "learning_rate": 0.0006818931604694261, + "loss": 1.3531, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.46146702766418457, + "learning_rate": 0.0006724942987700563, + "loss": 1.3528, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.4885563552379608, + "learning_rate": 0.0006632249861061732, + "loss": 1.3578, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.6136794090270996, + "learning_rate": 0.0006540834368410549, + "loss": 1.3502, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.5482812523841858, + "learning_rate": 0.0006450678899502701, + "loss": 1.3528, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5637302398681641, + "learning_rate": 0.0006361766086824345, + "loss": 1.3499, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.46481969952583313, + "learning_rate": 0.000627407880224645, + "loss": 1.3556, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.4623727798461914, + "learning_rate": 0.0006187600153725225, + "loss": 1.3395, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.8997368216514587, + "learning_rate": 0.0006102313482048055, + "loss": 1.3446, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5417001247406006, + "learning_rate": 0.0006018202357624274, + "loss": 1.3472, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6152294874191284, + "learning_rate": 0.0005935250577320168, + "loss": 1.3418, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.672227144241333, + "learning_rate": 0.0005853442161337618, + "loss": 1.3325, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.4651714265346527, + "learning_rate": 0.0005772761350135759, + "loss": 1.3408, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.5990096926689148, + "learning_rate": 0.0005693192601395058, + "loss": 1.3335, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.4861851632595062, + "learning_rate": 0.000561472058702326, + "loss": 1.331, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5941367745399475, + "learning_rate": 0.000553733019020258, + "loss": 1.337, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.5600623488426208, + "learning_rate": 0.0005461006502477612, + "loss": 1.3305, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5032252669334412, + "learning_rate": 0.0005385734820883369, + "loss": 1.3289, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.5026607513427734, + "learning_rate": 0.0005311500645112907, + "loss": 1.355, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.5264232158660889, + "learning_rate": 0.0005238289674723993, + "loss": 1.3353, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.47649210691452026, + "learning_rate": 0.0005166087806384274, + "loss": 1.3377, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.4772520363330841, + "learning_rate": 0.0005094881131154418, + "loss": 1.3419, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.5586071610450745, + "learning_rate": 0.0005024655931808696, + "loss": 1.3374, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.49496790766716003, + "learning_rate": 0.0004955398680192508, + "loss": 1.328, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.5294111967086792, + "learning_rate": 0.000488709603461632, + "loss": 1.3248, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.47923532128334045, + "learning_rate": 0.000481973483728553, + "loss": 1.3276, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.4925960600376129, + "learning_rate": 0.0004753302111765748, + "loss": 1.3268, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.4940263032913208, + "learning_rate": 0.0004687785060483032, + "loss": 1.3384, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.5015807151794434, + "learning_rate": 0.0004623171062258558, + "loss": 1.3094, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.7085549235343933, + "learning_rate": 0.0004559447669877288, + "loss": 1.3253, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.4822874367237091, + "learning_rate": 0.00044966026076901413, + "loss": 1.3294, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.6149103045463562, + "learning_rate": 0.00044346237692492177, + "loss": 1.334, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.5246915221214294, + "learning_rate": 0.0004373499214975615, + "loss": 1.3194, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.5026849508285522, + "learning_rate": 0.0004313217169859396, + "loss": 1.324, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.585382878780365, + "learning_rate": 0.0004253766021191256, + "loss": 1.3323, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.4607876241207123, + "learning_rate": 0.00041951343163254497, + "loss": 1.3266, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.5416311621665955, + "learning_rate": 0.00041373107604735626, + "loss": 1.3255, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.48268038034439087, + "learning_rate": 0.0004080284214528687, + "loss": 1.3225, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.5381031036376953, + "learning_rate": 0.0004024043692919589, + "loss": 1.3282, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.5726626515388489, + "learning_rate": 0.0003968578361494449, + "loss": 1.325, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.4642830789089203, + "learning_rate": 0.000391387753543378, + "loss": 1.3323, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.4678974747657776, + "learning_rate": 0.00038599306771921023, + "loss": 1.3162, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.5471410751342773, + "learning_rate": 0.0003806727394468004, + "loss": 1.3134, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.46361368894577026, + "learning_rate": 0.0003754257438202162, + "loss": 1.3225, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.6638835668563843, + "learning_rate": 0.0003702510700602974, + "loss": 1.3296, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.5422773957252502, + "learning_rate": 0.0003651477213199393, + "loss": 1.3072, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.49227139353752136, + "learning_rate": 0.000360114714492061, + "loss": 1.3101, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.49915429949760437, + "learning_rate": 0.0003551510800202195, + "loss": 1.315, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.5297178030014038, + "learning_rate": 0.0003502558617118353, + "loss": 1.319, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.55345618724823, + "learning_rate": 0.0003454281165539914, + "loss": 1.3311, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.5680271983146667, + "learning_rate": 0.00034066691453177176, + "loss": 1.3288, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.47741663455963135, + "learning_rate": 0.0003359713384491037, + "loss": 1.3229, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.5046437978744507, + "learning_rate": 0.00033134048375206944, + "loss": 1.3176, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.457300066947937, + "learning_rate": 0.0003267734583546536, + "loss": 1.3159, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.5015650987625122, + "learning_rate": 0.00032226938246689157, + "loss": 1.3147, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.5395479202270508, + "learning_rate": 0.0003178273884253874, + "loss": 1.3158, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.4769648015499115, + "learning_rate": 0.0003134466205261674, + "loss": 1.3257, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.5480552315711975, + "learning_rate": 0.0003091262348598378, + "loss": 1.3288, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.47319650650024414, + "learning_rate": 0.0003048653991490141, + "loss": 1.3094, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.5190073847770691, + "learning_rate": 0.00030066329258799187, + "loss": 1.3068, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.5128898620605469, + "learning_rate": 0.0002965191056846266, + "loss": 1.3107, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.5712609887123108, + "learning_rate": 0.000292432040104394, + "loss": 1.3076, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.4749037027359009, + "learning_rate": 0.00028840130851659853, + "loss": 1.3045, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.48418837785720825, + "learning_rate": 0.0002844261344427028, + "loss": 1.3137, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.47260743379592896, + "learning_rate": 0.0002805057521067471, + "loss": 1.3122, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.47214198112487793, + "learning_rate": 0.00027663940628783017, + "loss": 1.3006, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.5273362398147583, + "learning_rate": 0.00027282635217462393, + "loss": 1.3119, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.5290525555610657, + "learning_rate": 0.0002690658552218937, + "loss": 1.3165, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.48313039541244507, + "learning_rate": 0.00026535719100899516, + "loss": 1.2975, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.49278801679611206, + "learning_rate": 0.00026169964510032245, + "loss": 1.3032, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.4592038691043854, + "learning_rate": 0.00025809251290767984, + "loss": 1.2909, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.47983959317207336, + "learning_rate": 0.00025453509955454957, + "loss": 1.2932, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.5009822845458984, + "learning_rate": 0.00025102671974223175, + "loss": 1.2955, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.47341182827949524, + "learning_rate": 0.00024756669761782815, + "loss": 1.3073, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.4908011555671692, + "learning_rate": 0.0002441543666440464, + "loss": 1.2989, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.5158747434616089, + "learning_rate": 0.00024078906947079878, + "loss": 1.3039, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.469517320394516, + "learning_rate": 0.00023747015780857005, + "loss": 1.3101, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.47917693853378296, + "learning_rate": 0.00023419699230353144, + "loss": 1.3061, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.4800731837749481, + "learning_rate": 0.00023096894241437586, + "loss": 1.312, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.48870769143104553, + "learning_rate": 0.00022778538629085056, + "loss": 1.2957, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.48116040229797363, + "learning_rate": 0.00022464571065396427, + "loss": 1.2967, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.5820254683494568, + "learning_rate": 0.00022154931067784521, + "loss": 1.2962, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.4812745451927185, + "learning_rate": 0.00021849558987322782, + "loss": 1.2924, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5120149254798889, + "learning_rate": 0.0002154839599725452, + "loss": 1.2939, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.4970279633998871, + "learning_rate": 0.00021251384081660544, + "loss": 1.307, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.4871574938297272, + "learning_rate": 0.0002095846602428303, + "loss": 1.3018, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.4676864445209503, + "learning_rate": 0.00020669585397503358, + "loss": 1.293, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.4470655620098114, + "learning_rate": 0.0002038468655147195, + "loss": 1.2965, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.5174440741539001, + "learning_rate": 0.00020103714603387894, + "loss": 1.3112, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.4520186483860016, + "learning_rate": 0.00019826615426926338, + "loss": 1.2835, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.45952776074409485, + "learning_rate": 0.00019553335641811625, + "loss": 1.3055, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.5378235578536987, + "learning_rate": 0.0001928382260353415, + "loss": 1.2962, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.4733293056488037, + "learning_rate": 0.00019018024393208902, + "loss": 1.3121, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.4595848321914673, + "learning_rate": 0.00018755889807573872, + "loss": 1.2913, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.4874734580516815, + "learning_rate": 0.00018497368349126262, + "loss": 1.2967, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.5490835309028625, + "learning_rate": 0.00018242410216394648, + "loss": 1.3108, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.44863075017929077, + "learning_rate": 0.0001799096629434529, + "loss": 1.2844, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.4595239758491516, + "learning_rate": 0.00017742988144920578, + "loss": 1.2936, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.6216907501220703, + "learning_rate": 0.00017498427997707976, + "loss": 1.2918, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.4747745394706726, + "learning_rate": 0.00017257238740737548, + "loss": 1.2967, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.49981772899627686, + "learning_rate": 0.00017019373911406307, + "loss": 1.3045, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.46517640352249146, + "learning_rate": 0.000167847876875277, + "loss": 1.2998, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.4871491491794586, + "learning_rate": 0.00016553434878504428, + "loss": 1.2859, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.5478642582893372, + "learning_rate": 0.00016325270916622947, + "loss": 1.2875, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.4661363959312439, + "learning_rate": 0.00016100251848467966, + "loss": 1.2952, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.5517392158508301, + "learning_rate": 0.0001587833432645528, + "loss": 1.2882, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.5190399289131165, + "learning_rate": 0.00015659475600481292, + "loss": 1.3051, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.46546727418899536, + "learning_rate": 0.00015443633509687688, + "loss": 1.2928, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.5459626913070679, + "learning_rate": 0.00015230766474339536, + "loss": 1.293, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.4505077302455902, + "learning_rate": 0.00015020833487815416, + "loss": 1.2994, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.47471731901168823, + "learning_rate": 0.0001481379410870792, + "loss": 1.2903, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.4919489324092865, + "learning_rate": 0.00014609608453033013, + "loss": 1.2758, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.48681142926216125, + "learning_rate": 0.00014408237186546807, + "loss": 1.2849, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.4562098979949951, + "learning_rate": 0.00014209641517168273, + "loss": 1.2734, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5935469269752502, + "learning_rate": 0.00014013783187506265, + "loss": 1.2873, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.47083306312561035, + "learning_rate": 0.00013820624467489697, + "loss": 1.3074, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.4802079498767853, + "learning_rate": 0.00013630128147099213, + "loss": 1.3035, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.4673043489456177, + "learning_rate": 0.00013442257529199068, + "loss": 1.2809, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.521275520324707, + "learning_rate": 0.00013256976422467803, + "loss": 1.2926, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.5621090531349182, + "learning_rate": 0.00013074249134426366, + "loss": 1.2899, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.46371445059776306, + "learning_rate": 0.0001289404046456233, + "loss": 1.3009, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.4880622625350952, + "learning_rate": 0.0001271631569754887, + "loss": 1.291, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.49742773175239563, + "learning_rate": 0.0001254104059655723, + "loss": 1.2959, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.45721614360809326, + "learning_rate": 0.00012368181396661337, + "loss": 1.2823, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.46568551659584045, + "learning_rate": 0.00012197704798333364, + "loss": 1.2831, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.47091469168663025, + "learning_rate": 0.00012029577961028894, + "loss": 1.282, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.5390154719352722, + "learning_rate": 0.00011863768496860542, + "loss": 1.2927, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.577823281288147, + "learning_rate": 0.00011700244464358777, + "loss": 1.2881, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.47146788239479065, + "learning_rate": 0.00011538974362318715, + "loss": 1.2935, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.49528032541275024, + "learning_rate": 0.00011379927123731737, + "loss": 1.2876, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.48834657669067383, + "learning_rate": 0.0001122307210980077, + "loss": 1.2853, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.45491474866867065, + "learning_rate": 0.00011068379104038026, + "loss": 1.2962, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.45627567172050476, + "learning_rate": 0.00010915818306444116, + "loss": 1.2778, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.4637642800807953, + "learning_rate": 0.00010765360327767384, + "loss": 1.2853, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.4641653299331665, + "learning_rate": 0.00010616976183842376, + "loss": 1.291, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.4908168911933899, + "learning_rate": 0.00010470637290006365, + "loss": 1.2926, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.4617968797683716, + "learning_rate": 0.00010326315455592764, + "loss": 1.28, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.47194594144821167, + "learning_rate": 0.0001018398287850053, + "loss": 1.2752, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.5317877531051636, + "learning_rate": 0.00010043612139838357, + "loss": 1.2938, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.46418246626853943, + "learning_rate": 9.905176198642719e-05, + "loss": 1.2834, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.49755966663360596, + "learning_rate": 9.76864838666871e-05, + "loss": 1.2832, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.46786341071128845, + "learning_rate": 9.634002403252676e-05, + "loss": 1.2836, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.4702818989753723, + "learning_rate": 9.501212310245681e-05, + "loss": 1.2783, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.44937238097190857, + "learning_rate": 9.370252527016777e-05, + "loss": 1.2899, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.49406906962394714, + "learning_rate": 9.241097825525163e-05, + "loss": 1.2807, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.5151875019073486, + "learning_rate": 9.113723325460276e-05, + "loss": 1.288, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.5420994162559509, + "learning_rate": 8.988104489448849e-05, + "loss": 1.2815, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.4622182250022888, + "learning_rate": 8.864217118328042e-05, + "loss": 1.2922, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.48794785141944885, + "learning_rate": 8.742037346483729e-05, + "loss": 1.2854, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.4837190508842468, + "learning_rate": 8.62154163725303e-05, + "loss": 1.2934, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.47257596254348755, + "learning_rate": 8.502706778390219e-05, + "loss": 1.29, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.46022605895996094, + "learning_rate": 8.38550987759513e-05, + "loss": 1.2858, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.4648395776748657, + "learning_rate": 8.269928358103191e-05, + "loss": 1.3001, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.4802897870540619, + "learning_rate": 8.155939954336243e-05, + "loss": 1.2892, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.49936237931251526, + "learning_rate": 8.043522707613312e-05, + "loss": 1.2858, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.4365840256214142, + "learning_rate": 7.932654961920486e-05, + "loss": 1.2715, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.4833032488822937, + "learning_rate": 7.823315359739135e-05, + "loss": 1.272, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.48729777336120605, + "learning_rate": 7.715482837931577e-05, + "loss": 1.2958, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.4626319110393524, + "learning_rate": 7.6091366236835e-05, + "loss": 1.2749, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.46448102593421936, + "learning_rate": 7.504256230502289e-05, + "loss": 1.2919, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.4604092538356781, + "learning_rate": 7.400821454270524e-05, + "loss": 1.2859, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.45476552844047546, + "learning_rate": 7.29881236935386e-05, + "loss": 1.274, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.46123555302619934, + "learning_rate": 7.198209324762562e-05, + "loss": 1.2777, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.4466295838356018, + "learning_rate": 7.098992940365946e-05, + "loss": 1.2739, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.4567776024341583, + "learning_rate": 7.001144103159e-05, + "loss": 1.2824, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.47256141901016235, + "learning_rate": 6.904643963580461e-05, + "loss": 1.2882, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.5056165456771851, + "learning_rate": 6.809473931881644e-05, + "loss": 1.2796, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.4639674127101898, + "learning_rate": 6.71561567454532e-05, + "loss": 1.2782, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.47839340567588806, + "learning_rate": 6.623051110753948e-05, + "loss": 1.2982, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.46292224526405334, + "learning_rate": 6.531762408906607e-05, + "loss": 1.2858, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.4787479639053345, + "learning_rate": 6.441731983183912e-05, + "loss": 1.2826, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.46926361322402954, + "learning_rate": 6.352942490160292e-05, + "loss": 1.2751, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.5352609157562256, + "learning_rate": 6.265376825462966e-05, + "loss": 1.2856, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.536734402179718, + "learning_rate": 6.179018120476945e-05, + "loss": 1.2794, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.44269171357154846, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.2805, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.4609244763851166, + "learning_rate": 6.009855274515339e-05, + "loss": 1.2765, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.48474302887916565, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.281, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.45089036226272583, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.2712, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.4711897671222687, + "learning_rate": 5.764754687033678e-05, + "loss": 1.2824, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.47829535603523254, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.2769, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.4775094985961914, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.2932, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.4563244581222534, + "learning_rate": 5.529650063720842e-05, + "loss": 1.2872, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.4800124764442444, + "learning_rate": 5.453432234876445e-05, + "loss": 1.2797, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.46603870391845703, + "learning_rate": 5.37826495305886e-05, + "loss": 1.272, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.5261629223823547, + "learning_rate": 5.304133738072674e-05, + "loss": 1.2907, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.47402966022491455, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.2805, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.5603070259094238, + "learning_rate": 5.158922582999368e-05, + "loss": 1.2857, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.4486725628376007, + "learning_rate": 5.087814669492819e-05, + "loss": 1.2796, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.4944787919521332, + "learning_rate": 5.017686870590028e-05, + "loss": 1.2788, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.4802660048007965, + "learning_rate": 4.948525676899577e-05, + "loss": 1.2749, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.48038867115974426, + "learning_rate": 4.880317765236493e-05, + "loss": 1.2865, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.5557894706726074, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.2679, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.4686918258666992, + "learning_rate": 4.746709410920699e-05, + "loss": 1.2718, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5099261999130249, + "learning_rate": 4.681283230007507e-05, + "loss": 1.2728, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.508106529712677, + "learning_rate": 4.616758849642509e-05, + "loss": 1.2793, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.4402479827404022, + "learning_rate": 4.553123839874615e-05, + "loss": 1.2907, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.48903051018714905, + "learning_rate": 4.490365942080736e-05, + "loss": 1.2817, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.45789381861686707, + "learning_rate": 4.428473066604285e-05, + "loss": 1.2809, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.4754452407360077, + "learning_rate": 4.367433290426233e-05, + "loss": 1.2802, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.4568568468093872, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.2731, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.4637247622013092, + "learning_rate": 4.247866163327575e-05, + "loss": 1.2856, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.4735919237136841, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.273, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.4588661789894104, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.2726, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.46978574991226196, + "learning_rate": 4.074624971216005e-05, + "loss": 1.2616, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4512912333011627, + "learning_rate": 4.018462453681889e-05, + "loss": 1.2716, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.45371580123901367, + "learning_rate": 3.963074051164014e-05, + "loss": 1.2799, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.4601062834262848, + "learning_rate": 3.908449093662446e-05, + "loss": 1.2732, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.461967408657074, + "learning_rate": 3.854577058246998e-05, + "loss": 1.2708, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.45294222235679626, + "learning_rate": 3.801447567030094e-05, + "loss": 1.2897, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.4439496397972107, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.2757, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.4653451442718506, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.2939, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.4448198676109314, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.2775, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.46878382563591003, + "learning_rate": 3.596152451701616e-05, + "loss": 1.2777, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5037729144096375, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.2766, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.46913567185401917, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.2801, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.5404587388038635, + "learning_rate": 3.449490171442838e-05, + "loss": 1.282, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.4438877999782562, + "learning_rate": 3.401944187798702e-05, + "loss": 1.283, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.45989537239074707, + "learning_rate": 3.355053553336137e-05, + "loss": 1.2742, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.4791684150695801, + "learning_rate": 3.308809235061882e-05, + "loss": 1.272, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.4399260878562927, + "learning_rate": 3.263202324488772e-05, + "loss": 1.2796, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.4575617015361786, + "learning_rate": 3.218224035919609e-05, + "loss": 1.2766, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.4506317377090454, + "learning_rate": 3.173865704754688e-05, + "loss": 1.2765, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.45531153678894043, + "learning_rate": 3.130118785822657e-05, + "loss": 1.2748, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.4633644223213196, + "learning_rate": 3.08697485173437e-05, + "loss": 1.2816, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5337445735931396, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.2772, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5188563466072083, + "learning_rate": 3.002462807725185e-05, + "loss": 1.275, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.48296037316322327, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.2797, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.46105465292930603, + "learning_rate": 2.920264448124087e-05, + "loss": 1.2758, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.45554500818252563, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.2726, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.4668406844139099, + "learning_rate": 2.84031643124288e-05, + "loss": 1.2734, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.4780922532081604, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.2923, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.4693585932254791, + "learning_rate": 2.762557149497405e-05, + "loss": 1.2676, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.4524311423301697, + "learning_rate": 2.724479494389592e-05, + "loss": 1.2813, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.44265279173851013, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.2797, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.4572458863258362, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.2814, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.4525168836116791, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.2802, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.4815879464149475, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.2822, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.44361522793769836, + "learning_rate": 2.541820662891541e-05, + "loss": 1.2632, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.4554971754550934, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.2706, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.520605742931366, + "learning_rate": 2.472233293365335e-05, + "loss": 1.2838, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.4688946604728699, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.268, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.5332078337669373, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.2868, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.47867295145988464, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.2729, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.46423396468162537, + "learning_rate": 2.338721674401494e-05, + "loss": 1.2813, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.46052441000938416, + "learning_rate": 2.30648594768459e-05, + "loss": 1.2796, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.4625004529953003, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.276, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.46688348054885864, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.2731, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.4896974563598633, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.2757, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.4933200180530548, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.2764, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.4900830388069153, + "learning_rate": 2.151850895764285e-05, + "loss": 1.2717, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.45735400915145874, + "learning_rate": 2.12219089895037e-05, + "loss": 1.2646, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.48359930515289307, + "learning_rate": 2.092939720151092e-05, + "loss": 1.2612, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.4569590985774994, + "learning_rate": 2.064091724430947e-05, + "loss": 1.2719, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.47614622116088867, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.2671, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.48315489292144775, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.2791, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.4540780484676361, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.2692, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.465520977973938, + "learning_rate": 1.952621569669175e-05, + "loss": 1.2733, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.4556669294834137, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.2719, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.46988987922668457, + "learning_rate": 1.899164691024229e-05, + "loss": 1.2741, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.48423823714256287, + "learning_rate": 1.872987589815331e-05, + "loss": 1.2691, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.4487670063972473, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.2821, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.4768301546573639, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.2784, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.462040513753891, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.2883, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.4433370530605316, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.2808, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.4689382314682007, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.2792, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.4566853940486908, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.2648, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.48965659737586975, + "learning_rate": 1.699576850233916e-05, + "loss": 1.27, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.45181959867477417, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.2827, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.5077662467956543, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.2588, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.4643486440181732, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.2787, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.4604697525501251, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.2783, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.44910043478012085, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.2722, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.45268765091896057, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.2814, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.4484785199165344, + "learning_rate": 1.542221360973268e-05, + "loss": 1.2664, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.4483862519264221, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.2798, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.4878731966018677, + "learning_rate": 1.5e-05, + "loss": 1.2718, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.832308198648013e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-mistral-bf16/checkpoint-9480/training_args.bin b/saves-mistral-bf16/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..aa3c35ed3fbe3812cc39e620a246556a678cf1b9 --- /dev/null +++ b/saves-mistral-bf16/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bc8b55a983f75c0f4eab6e47fd9864b7e3f9612fef049243208499e7b96333a +size 5112 diff --git a/saves-mistral-bf16/config.json b/saves-mistral-bf16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..30ccb5f053665b9e670549b808a9653faa5efa34 --- /dev/null +++ b/saves-mistral-bf16/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 131072, + "model_type": "mistral", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.0", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-mistral-bf16/generation_config.json b/saves-mistral-bf16/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..59c0f3c6815a220b6b4e852c51be873503df2ce0 --- /dev/null +++ b/saves-mistral-bf16/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.0" +} diff --git a/saves-mistral-bf16/model.safetensors b/saves-mistral-bf16/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3eeb6ef252acf50b39b3ed75976b84912b92be7b --- /dev/null +++ b/saves-mistral-bf16/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4939ffe6e400c9ae21c94dc5efb24d9e4fd3a81f8b30076bbd50a07906bad5e0 +size 8346712 diff --git a/saves-mistral-bf16/result.log b/saves-mistral-bf16/result.log new file mode 100644 index 0000000000000000000000000000000000000000..ab7cadc53bea052d3fd06302318d7dda2ad85976 --- /dev/null +++ b/saves-mistral-bf16/result.log @@ -0,0 +1 @@ +{'train_runtime': 5673.0952, 'train_samples_per_second': 1710.989, 'train_steps_per_second': 1.671, 'train_loss': 1.569398320274514, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-mistral-bf16/special_tokens_map.json b/saves-mistral-bf16/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-mistral-bf16/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-mistral-bf16/tokenizer.json b/saves-mistral-bf16/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-mistral-bf16/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-mistral-bf16/tokenizer_config.json b/saves-mistral-bf16/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-mistral-bf16/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-mistral-cosine/checkpoint-9480/config.json b/saves-mistral-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..27c3ad5440dbe5ee26df317f757bddbf9946d6e3 --- /dev/null +++ b/saves-mistral-cosine/checkpoint-9480/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 131072, + "model_type": "mistral", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-mistral-cosine/checkpoint-9480/generation_config.json b/saves-mistral-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-mistral-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-mistral-cosine/checkpoint-9480/model.safetensors b/saves-mistral-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1619dff2170727887ccbc223ae41474a23873267 --- /dev/null +++ b/saves-mistral-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4f13e3e899449d568b0f4ac09529011492b1eae3c3e9acefb7f156d5b1a782f +size 8346712 diff --git a/saves-mistral-cosine/checkpoint-9480/optimizer.pt b/saves-mistral-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..768612d87164cd4e76d618070aa58ec71c2ddce2 --- /dev/null +++ b/saves-mistral-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:970344683a44793d52d6baedc20ca13c228674f3894a256be961a8520b642e57 +size 16706530 diff --git a/saves-mistral-cosine/checkpoint-9480/rng_state.pth b/saves-mistral-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-mistral-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-mistral-cosine/checkpoint-9480/scheduler.pt b/saves-mistral-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..74b04fc48c12ecef4ed191c0e0bab93fab8eb99a --- /dev/null +++ b/saves-mistral-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec59c7f60583f92116a8c17261df6f5e6643e0df2f9a66b3c4ae6ce50b33704 +size 1064 diff --git a/saves-mistral-cosine/checkpoint-9480/special_tokens_map.json b/saves-mistral-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-mistral-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-mistral-cosine/checkpoint-9480/tokenizer.json b/saves-mistral-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-mistral-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-mistral-cosine/checkpoint-9480/tokenizer_config.json b/saves-mistral-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-mistral-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-mistral-cosine/checkpoint-9480/trainer_state.json b/saves-mistral-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1f9a1493b8f94d5777f878eb7db6b1c39854054b --- /dev/null +++ b/saves-mistral-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.3174139261245728, + "learning_rate": 0.00015789473684210527, + "loss": 7.4948, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1707481145858765, + "learning_rate": 0.00031578947368421053, + "loss": 6.8902, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8488827347755432, + "learning_rate": 0.00047368421052631577, + "loss": 6.2417, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.9104025363922119, + "learning_rate": 0.0006315789473684211, + "loss": 5.758, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.8210574388504028, + "learning_rate": 0.0007894736842105263, + "loss": 5.2979, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.5913366675376892, + "learning_rate": 0.0009473684210526315, + "loss": 4.7834, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.2791945934295654, + "learning_rate": 0.0011052631578947368, + "loss": 4.3813, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.7251872420310974, + "learning_rate": 0.0012631578947368421, + "loss": 4.1281, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.8138174414634705, + "learning_rate": 0.0014210526315789472, + "loss": 3.9337, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.1868605613708496, + "learning_rate": 0.0014999989494847376, + "loss": 3.7761, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.6601371169090271, + "learning_rate": 0.0014999905453802946, + "loss": 3.6199, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 1.1237516403198242, + "learning_rate": 0.0014999737372655805, + "loss": 3.5052, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.9068588018417358, + "learning_rate": 0.0014999485253289388, + "loss": 3.4008, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.8038877844810486, + "learning_rate": 0.0014999149098528814, + "loss": 3.3029, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.9885222911834717, + "learning_rate": 0.0014998728912140862, + "loss": 3.223, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.5853537321090698, + "learning_rate": 0.0014998224698833922, + "loss": 3.1588, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.7909514904022217, + "learning_rate": 0.0014997636464257956, + "loss": 3.0831, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.9314720630645752, + "learning_rate": 0.0014996964215004416, + "loss": 3.0371, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.7645787596702576, + "learning_rate": 0.0014996207958606182, + "loss": 2.9771, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.9222848415374756, + "learning_rate": 0.001499536770353748, + "loss": 2.9321, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.8581476211547852, + "learning_rate": 0.0014994443459213774, + "loss": 2.8951, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.772230863571167, + "learning_rate": 0.001499343523599168, + "loss": 2.8576, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.6938006281852722, + "learning_rate": 0.0014992343045168823, + "loss": 2.8074, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.9736182689666748, + "learning_rate": 0.0014991166898983739, + "loss": 2.765, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.8435516953468323, + "learning_rate": 0.001498990681061572, + "loss": 2.7312, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.8736611604690552, + "learning_rate": 0.001498856279418467, + "loss": 2.6974, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.8827903866767883, + "learning_rate": 0.0014987134864750948, + "loss": 2.6672, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.9826215505599976, + "learning_rate": 0.0014985623038315206, + "loss": 2.6284, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.8384682536125183, + "learning_rate": 0.0014984027331818193, + "loss": 2.6063, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.8106757998466492, + "learning_rate": 0.0014982347763140584, + "loss": 2.5831, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.8059612512588501, + "learning_rate": 0.0014980584351102762, + "loss": 2.5577, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.9752203226089478, + "learning_rate": 0.001497873711546462, + "loss": 2.528, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.8949389457702637, + "learning_rate": 0.0014976806076925334, + "loss": 2.5183, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 1.296714186668396, + "learning_rate": 0.0014974791257123137, + "loss": 2.5017, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.9222632646560669, + "learning_rate": 0.001497269267863507, + "loss": 2.4567, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.9824777245521545, + "learning_rate": 0.0014970510364976724, + "loss": 2.4348, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.8166004419326782, + "learning_rate": 0.0014968244340601996, + "loss": 2.423, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.7333206534385681, + "learning_rate": 0.001496589463090279, + "loss": 2.4143, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.539781093597412, + "learning_rate": 0.001496346126220875, + "loss": 2.3937, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.7138977646827698, + "learning_rate": 0.0014960944261786966, + "loss": 2.368, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8108177185058594, + "learning_rate": 0.0014958343657841655, + "loss": 2.3592, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.8652350902557373, + "learning_rate": 0.001495565947951385, + "loss": 2.3376, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.8376107215881348, + "learning_rate": 0.0014952891756881085, + "loss": 2.3139, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.8863264322280884, + "learning_rate": 0.0014950040520957037, + "loss": 2.2978, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.8651350140571594, + "learning_rate": 0.0014947105803691204, + "loss": 2.2996, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.813372015953064, + "learning_rate": 0.0014944087637968522, + "loss": 2.2764, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.7915297150611877, + "learning_rate": 0.0014940986057609012, + "loss": 2.2578, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.3394159078598022, + "learning_rate": 0.0014937801097367396, + "loss": 2.2565, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.8625537753105164, + "learning_rate": 0.001493453279293271, + "loss": 2.2273, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.795900821685791, + "learning_rate": 0.0014931181180927902, + "loss": 2.2188, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.8610779643058777, + "learning_rate": 0.001492774629890942, + "loss": 2.2162, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.785186767578125, + "learning_rate": 0.001492422818536679, + "loss": 2.2027, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.7343863844871521, + "learning_rate": 0.00149206268797222, + "loss": 2.1821, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.7887017130851746, + "learning_rate": 0.0014916942422330032, + "loss": 2.1646, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.763201892375946, + "learning_rate": 0.001491317485447643, + "loss": 2.1675, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.8176493644714355, + "learning_rate": 0.0014909324218378838, + "loss": 2.1303, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.9729762673377991, + "learning_rate": 0.0014905390557185508, + "loss": 2.1389, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.8628072142601013, + "learning_rate": 0.0014901373914975036, + "loss": 2.1402, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.7356904745101929, + "learning_rate": 0.0014897274336755856, + "loss": 2.113, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.7409104108810425, + "learning_rate": 0.001489309186846575, + "loss": 2.0935, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.9244835376739502, + "learning_rate": 0.0014888826556971313, + "loss": 2.0987, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.6698127388954163, + "learning_rate": 0.0014884478450067444, + "loss": 2.0877, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.9830562472343445, + "learning_rate": 0.0014880047596476807, + "loss": 2.0735, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.693690836429596, + "learning_rate": 0.0014875534045849274, + "loss": 2.0836, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.9646028876304626, + "learning_rate": 0.0014870937848761388, + "loss": 2.0705, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 1.1930804252624512, + "learning_rate": 0.001486625905671578, + "loss": 2.0608, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.8071310520172119, + "learning_rate": 0.00148614977221406, + "loss": 2.0418, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.8942294716835022, + "learning_rate": 0.0014856653898388927, + "loss": 2.0329, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 1.592276692390442, + "learning_rate": 0.001485172763973817, + "loss": 2.0493, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 1.0186961889266968, + "learning_rate": 0.0014846719001389466, + "loss": 2.0422, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.8879486918449402, + "learning_rate": 0.001484162803946705, + "loss": 2.0117, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.8518250584602356, + "learning_rate": 0.0014836454811017635, + "loss": 1.9981, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.8049193620681763, + "learning_rate": 0.0014831199374009778, + "loss": 1.9949, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.6881877779960632, + "learning_rate": 0.0014825861787333208, + "loss": 2.0068, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.8521316647529602, + "learning_rate": 0.0014820442110798197, + "loss": 1.9842, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.9349928498268127, + "learning_rate": 0.0014814940405134865, + "loss": 1.984, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 1.0040972232818604, + "learning_rate": 0.001480935673199251, + "loss": 1.9721, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.7317735552787781, + "learning_rate": 0.0014803691153938915, + "loss": 1.9711, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 1.0402263402938843, + "learning_rate": 0.0014797943734459653, + "loss": 1.977, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 1.1987184286117554, + "learning_rate": 0.001479211453795736, + "loss": 1.9627, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.7736979126930237, + "learning_rate": 0.0014786203629751033, + "loss": 1.9357, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.9265856742858887, + "learning_rate": 0.0014780211076075279, + "loss": 1.945, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.7357726693153381, + "learning_rate": 0.0014774136944079594, + "loss": 1.9499, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.7189483642578125, + "learning_rate": 0.0014767981301827592, + "loss": 1.9271, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.8008044362068176, + "learning_rate": 0.0014761744218296249, + "loss": 1.926, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.6457218527793884, + "learning_rate": 0.001475542576337513, + "loss": 1.9239, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.7206600904464722, + "learning_rate": 0.001474902600786561, + "loss": 1.9246, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.9191156029701233, + "learning_rate": 0.0014742545023480075, + "loss": 1.9214, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.7292855381965637, + "learning_rate": 0.0014735982882841117, + "loss": 1.9111, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.7191150784492493, + "learning_rate": 0.0014729339659480727, + "loss": 1.9057, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.9848440885543823, + "learning_rate": 0.0014722615427839468, + "loss": 1.9138, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.6940897107124329, + "learning_rate": 0.0014715810263265633, + "loss": 1.901, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.7699787616729736, + "learning_rate": 0.0014708924242014423, + "loss": 1.8826, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.7795881628990173, + "learning_rate": 0.0014701957441247064, + "loss": 1.8914, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.7712106108665466, + "learning_rate": 0.0014694909939029959, + "loss": 1.8731, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.7384747862815857, + "learning_rate": 0.0014687781814333814, + "loss": 1.8794, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.9972801208496094, + "learning_rate": 0.0014680573147032746, + "loss": 1.8806, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.7561640739440918, + "learning_rate": 0.0014673284017903392, + "loss": 1.858, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.6888183951377869, + "learning_rate": 0.0014665914508624, + "loss": 1.8559, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 1.424579381942749, + "learning_rate": 0.0014658464701773526, + "loss": 1.8695, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.8946407437324524, + "learning_rate": 0.0014650934680830688, + "loss": 1.8773, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.7477951645851135, + "learning_rate": 0.0014643324530173051, + "loss": 1.8468, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.7701631188392639, + "learning_rate": 0.0014635634335076067, + "loss": 1.8514, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.7846369743347168, + "learning_rate": 0.001462786418171213, + "loss": 1.843, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.0196762084960938, + "learning_rate": 0.0014620014157149597, + "loss": 1.8439, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.8147345185279846, + "learning_rate": 0.001461208434935183, + "loss": 1.833, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.8321821093559265, + "learning_rate": 0.0014604074847176197, + "loss": 1.8237, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.7946534156799316, + "learning_rate": 0.0014595985740373082, + "loss": 1.8246, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.8706448078155518, + "learning_rate": 0.0014587817119584873, + "loss": 1.8332, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.7565662860870361, + "learning_rate": 0.001457956907634496, + "loss": 1.8209, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.6479936838150024, + "learning_rate": 0.0014571241703076692, + "loss": 1.8273, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.8128470182418823, + "learning_rate": 0.0014562835093092348, + "loss": 1.8183, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.8394371271133423, + "learning_rate": 0.0014554349340592104, + "loss": 1.8051, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 1.036134958267212, + "learning_rate": 0.001454578454066296, + "loss": 1.8215, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.2242012023925781, + "learning_rate": 0.0014537140789277678, + "loss": 1.8029, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.638702392578125, + "learning_rate": 0.0014528418183293716, + "loss": 1.8108, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.6552984118461609, + "learning_rate": 0.001451961682045213, + "loss": 1.7987, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.8539090752601624, + "learning_rate": 0.001451073679937649, + "loss": 1.7812, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.6735822558403015, + "learning_rate": 0.0014501778219571766, + "loss": 1.7851, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.8044090270996094, + "learning_rate": 0.0014492741181423225, + "loss": 1.7978, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.700934112071991, + "learning_rate": 0.0014483625786195285, + "loss": 1.7911, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6910656690597534, + "learning_rate": 0.0014474432136030405, + "loss": 1.7758, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.7860173583030701, + "learning_rate": 0.0014465160333947923, + "loss": 1.7652, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.7052719593048096, + "learning_rate": 0.0014455810483842908, + "loss": 1.7813, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.7991353869438171, + "learning_rate": 0.0014446382690484997, + "loss": 1.7841, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 1.1499314308166504, + "learning_rate": 0.0014436877059517215, + "loss": 1.7676, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.754125714302063, + "learning_rate": 0.0014427293697454803, + "loss": 1.7666, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.6787992715835571, + "learning_rate": 0.001441763271168401, + "loss": 1.78, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.9125069975852966, + "learning_rate": 0.00144078942104609, + "loss": 1.7651, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.680433452129364, + "learning_rate": 0.001439807830291013, + "loss": 1.7552, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.7715925574302673, + "learning_rate": 0.0014388185099023744, + "loss": 1.7574, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 1.009745717048645, + "learning_rate": 0.0014378214709659916, + "loss": 1.7594, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.8896642923355103, + "learning_rate": 0.0014368167246541733, + "loss": 1.7528, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.9303349256515503, + "learning_rate": 0.0014358042822255918, + "loss": 1.7525, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.6688674092292786, + "learning_rate": 0.0014347841550251597, + "loss": 1.7581, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.7160698771476746, + "learning_rate": 0.0014337563544838997, + "loss": 1.7449, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.8008187413215637, + "learning_rate": 0.001432720892118819, + "loss": 1.7441, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.8115729093551636, + "learning_rate": 0.0014316777795327794, + "loss": 1.7362, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.816256582736969, + "learning_rate": 0.001430627028414366, + "loss": 1.7438, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.7473534345626831, + "learning_rate": 0.0014295686505377586, + "loss": 1.7269, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.7106541395187378, + "learning_rate": 0.0014285026577625982, + "loss": 1.729, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.7982098460197449, + "learning_rate": 0.0014274290620338542, + "loss": 1.736, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.8625721335411072, + "learning_rate": 0.0014263478753816906, + "loss": 1.732, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.6355438828468323, + "learning_rate": 0.0014252591099213326, + "loss": 1.7274, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.751062273979187, + "learning_rate": 0.001424162777852928, + "loss": 1.7259, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.8999553322792053, + "learning_rate": 0.0014230588914614134, + "loss": 1.7245, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 1.2298568487167358, + "learning_rate": 0.0014219474631163745, + "loss": 1.7261, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.7692976593971252, + "learning_rate": 0.001420828505271909, + "loss": 1.7239, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.6384195685386658, + "learning_rate": 0.0014197020304664856, + "loss": 1.7177, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.6885557770729065, + "learning_rate": 0.0014185680513228048, + "loss": 1.7168, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.8404626846313477, + "learning_rate": 0.0014174265805476564, + "loss": 1.7171, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.7215640544891357, + "learning_rate": 0.0014162776309317778, + "loss": 1.7173, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.6734811663627625, + "learning_rate": 0.0014151212153497108, + "loss": 1.702, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6385595798492432, + "learning_rate": 0.0014139573467596561, + "loss": 1.6875, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.6776177287101746, + "learning_rate": 0.00141278603820333, + "loss": 1.6919, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.6473292708396912, + "learning_rate": 0.0014116073028058165, + "loss": 1.6976, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.7719363570213318, + "learning_rate": 0.0014104211537754217, + "loss": 1.6909, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.6944288015365601, + "learning_rate": 0.001409227604403524, + "loss": 1.6958, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 1.185227632522583, + "learning_rate": 0.0014080266680644277, + "loss": 1.693, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.700070321559906, + "learning_rate": 0.0014068183582152103, + "loss": 1.6987, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.6996198296546936, + "learning_rate": 0.001405602688395574, + "loss": 1.6937, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.7854952216148376, + "learning_rate": 0.0014043796722276924, + "loss": 1.6681, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.7488117218017578, + "learning_rate": 0.0014031493234160591, + "loss": 1.6801, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.7356165051460266, + "learning_rate": 0.0014019116557473332, + "loss": 1.6837, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.7980801463127136, + "learning_rate": 0.0014006666830901854, + "loss": 1.6753, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.6883916854858398, + "learning_rate": 0.001399414419395142, + "loss": 1.6771, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.7319534420967102, + "learning_rate": 0.0013981548786944293, + "loss": 1.6861, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.24504554271698, + "learning_rate": 0.0013968880751018158, + "loss": 1.6705, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.7516098022460938, + "learning_rate": 0.0013956140228124545, + "loss": 1.6643, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.8416666984558105, + "learning_rate": 0.0013943327361027231, + "loss": 1.68, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.7274603247642517, + "learning_rate": 0.0013930442293300649, + "loss": 1.6585, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.650302529335022, + "learning_rate": 0.0013917485169328279, + "loss": 1.6561, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.6359012126922607, + "learning_rate": 0.0013904456134301016, + "loss": 1.6629, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.7389732003211975, + "learning_rate": 0.0013891355334215562, + "loss": 1.6637, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.6535093188285828, + "learning_rate": 0.0013878182915872776, + "loss": 1.6832, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.671858012676239, + "learning_rate": 0.001386493902687604, + "loss": 1.6612, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.6506868004798889, + "learning_rate": 0.00138516238156296, + "loss": 1.6666, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.8278342485427856, + "learning_rate": 0.0013838237431336895, + "loss": 1.6677, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 1.19625723361969, + "learning_rate": 0.0013824780023998899, + "loss": 1.6686, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 1.3451343774795532, + "learning_rate": 0.0013811251744412431, + "loss": 1.6628, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.6782642602920532, + "learning_rate": 0.0013797652744168473, + "loss": 1.6436, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.6068716645240784, + "learning_rate": 0.0013783983175650457, + "loss": 1.644, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.6921589970588684, + "learning_rate": 0.0013770243192032581, + "loss": 1.6456, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.8523951768875122, + "learning_rate": 0.0013756432947278064, + "loss": 1.6464, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.6872298121452332, + "learning_rate": 0.0013742552596137444, + "loss": 1.6496, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.7062289714813232, + "learning_rate": 0.0013728602294146833, + "loss": 1.649, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.7503275275230408, + "learning_rate": 0.0013714582197626175, + "loss": 1.6528, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.6693542003631592, + "learning_rate": 0.0013700492463677501, + "loss": 1.638, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.9530947804450989, + "learning_rate": 0.0013686333250183154, + "loss": 1.6472, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.8508546948432922, + "learning_rate": 0.001367210471580404, + "loss": 1.6372, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.8324335217475891, + "learning_rate": 0.0013657807019977835, + "loss": 1.6426, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.7004092335700989, + "learning_rate": 0.0013643440322917198, + "loss": 1.6357, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.7364173531532288, + "learning_rate": 0.0013629004785607989, + "loss": 1.6402, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.9684028625488281, + "learning_rate": 0.0013614500569807445, + "loss": 1.6387, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.5940460562705994, + "learning_rate": 0.0013599927838042394, + "loss": 1.6231, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.7447016835212708, + "learning_rate": 0.0013585286753607408, + "loss": 1.6268, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.7407899498939514, + "learning_rate": 0.0013570577480562986, + "loss": 1.6404, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.6907680630683899, + "learning_rate": 0.0013555800183733717, + "loss": 1.6216, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.7425059080123901, + "learning_rate": 0.0013540955028706425, + "loss": 1.6241, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.7894456386566162, + "learning_rate": 0.0013526042181828324, + "loss": 1.6219, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.733400821685791, + "learning_rate": 0.0013511061810205143, + "loss": 1.6304, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 1.1410382986068726, + "learning_rate": 0.001349601408169926, + "loss": 1.6392, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.6176708340644836, + "learning_rate": 0.0013480899164927823, + "loss": 1.625, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.6949132680892944, + "learning_rate": 0.0013465717229260853, + "loss": 1.6207, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.6405869126319885, + "learning_rate": 0.001345046844481935, + "loss": 1.6226, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.600914478302002, + "learning_rate": 0.0013435152982473396, + "loss": 1.6126, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.7879353761672974, + "learning_rate": 0.0013419771013840217, + "loss": 1.6185, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.826435387134552, + "learning_rate": 0.001340432271128229, + "loss": 1.6222, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.7783187031745911, + "learning_rate": 0.0013388808247905381, + "loss": 1.61, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 1.530731439590454, + "learning_rate": 0.0013373227797556634, + "loss": 1.6215, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 1.11808443069458, + "learning_rate": 0.00133575815348226, + "loss": 1.6147, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6055777072906494, + "learning_rate": 0.0013341869635027292, + "loss": 1.5999, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.6124173998832703, + "learning_rate": 0.001332609227423022, + "loss": 1.606, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.619021475315094, + "learning_rate": 0.0013310249629224417, + "loss": 1.6028, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.8506957292556763, + "learning_rate": 0.0013294341877534454, + "loss": 1.6109, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.125178575515747, + "learning_rate": 0.0013278369197414458, + "loss": 1.6248, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.7912428975105286, + "learning_rate": 0.0013262331767846104, + "loss": 1.6079, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.6803474426269531, + "learning_rate": 0.0013246229768536628, + "loss": 1.593, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 1.0238145589828491, + "learning_rate": 0.001323006337991679, + "loss": 1.6108, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.6804127097129822, + "learning_rate": 0.0013213832783138873, + "loss": 1.6083, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.8013482689857483, + "learning_rate": 0.0013197538160074633, + "loss": 1.5953, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.6808459162712097, + "learning_rate": 0.0013181179693313283, + "loss": 1.609, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.6706727743148804, + "learning_rate": 0.0013164757566159428, + "loss": 1.5996, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.7597745060920715, + "learning_rate": 0.001314827196263102, + "loss": 1.5949, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.9154698252677917, + "learning_rate": 0.0013131723067457302, + "loss": 1.5922, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.8039065003395081, + "learning_rate": 0.0013115111066076721, + "loss": 1.5838, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.7580074667930603, + "learning_rate": 0.0013098436144634862, + "loss": 1.6156, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.656127393245697, + "learning_rate": 0.0013081698489982364, + "loss": 1.6013, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.6302765607833862, + "learning_rate": 0.001306489828967282, + "loss": 1.5848, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.630262017250061, + "learning_rate": 0.0013048035731960679, + "loss": 1.5865, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.6741836071014404, + "learning_rate": 0.0013031111005799133, + "loss": 1.5904, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.6615102291107178, + "learning_rate": 0.0013014124300838004, + "loss": 1.6002, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.8633708357810974, + "learning_rate": 0.0012997075807421612, + "loss": 1.5795, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.9371483325958252, + "learning_rate": 0.0012979965716586653, + "loss": 1.5755, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.6783944964408875, + "learning_rate": 0.0012962794220060048, + "loss": 1.5765, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.6279230713844299, + "learning_rate": 0.0012945561510256801, + "loss": 1.5873, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.6697786450386047, + "learning_rate": 0.001292826778027784, + "loss": 1.586, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.7381576299667358, + "learning_rate": 0.0012910913223907856, + "loss": 1.5814, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.6456592679023743, + "learning_rate": 0.0012893498035613123, + "loss": 1.5813, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6246742606163025, + "learning_rate": 0.001287602241053933, + "loss": 1.5794, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.7302229404449463, + "learning_rate": 0.0012858486544509392, + "loss": 1.5835, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.900379478931427, + "learning_rate": 0.0012840890634021249, + "loss": 1.5777, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.8365148305892944, + "learning_rate": 0.0012823234876245667, + "loss": 1.5765, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.9398805499076843, + "learning_rate": 0.0012805519469024035, + "loss": 1.585, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6113522052764893, + "learning_rate": 0.0012787744610866143, + "loss": 1.5702, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.5784794092178345, + "learning_rate": 0.0012769910500947954, + "loss": 1.5856, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.8569198846817017, + "learning_rate": 0.0012752017339109376, + "loss": 1.5736, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.6998064517974854, + "learning_rate": 0.0012734065325852029, + "loss": 1.5819, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.6383525729179382, + "learning_rate": 0.0012716054662336987, + "loss": 1.5632, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.6893065571784973, + "learning_rate": 0.001269798555038252, + "loss": 1.5667, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.6477349400520325, + "learning_rate": 0.0012679858192461864, + "loss": 1.5698, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.6793829202651978, + "learning_rate": 0.0012661672791700906, + "loss": 1.5722, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.6277351379394531, + "learning_rate": 0.0012643429551875945, + "loss": 1.5676, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 1.0240154266357422, + "learning_rate": 0.0012625128677411388, + "loss": 1.568, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 1.028918743133545, + "learning_rate": 0.0012606770373377475, + "loss": 1.5679, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.7178888320922852, + "learning_rate": 0.0012588354845487959, + "loss": 1.5743, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.6488509178161621, + "learning_rate": 0.001256988230009783, + "loss": 1.5691, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.7239912748336792, + "learning_rate": 0.0012551352944200976, + "loss": 1.5643, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6325228810310364, + "learning_rate": 0.0012532766985427874, + "loss": 1.5705, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.6219179630279541, + "learning_rate": 0.0012514124632043272, + "loss": 1.5702, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.6535031199455261, + "learning_rate": 0.0012495426092943842, + "loss": 1.5669, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.9514474272727966, + "learning_rate": 0.0012476671577655845, + "loss": 1.5674, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.7504703998565674, + "learning_rate": 0.0012457861296332774, + "loss": 1.5542, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.6771042346954346, + "learning_rate": 0.001243899545975303, + "loss": 1.5669, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.6987870931625366, + "learning_rate": 0.0012420074279317515, + "loss": 1.5586, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.5987036228179932, + "learning_rate": 0.0012401097967047298, + "loss": 1.5547, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.7077672481536865, + "learning_rate": 0.001238206673558122, + "loss": 1.5538, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.6049537658691406, + "learning_rate": 0.0012362980798173526, + "loss": 1.5542, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.7271889448165894, + "learning_rate": 0.0012343840368691462, + "loss": 1.5543, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.7115278840065002, + "learning_rate": 0.0012324645661612886, + "loss": 1.5501, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.8059226870536804, + "learning_rate": 0.0012305396892023867, + "loss": 1.5567, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.6068459749221802, + "learning_rate": 0.0012286094275616264, + "loss": 1.5552, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.6605356335639954, + "learning_rate": 0.0012266738028685318, + "loss": 1.5375, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.6931618452072144, + "learning_rate": 0.001224732836812723, + "loss": 1.5475, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.6594390869140625, + "learning_rate": 0.0012227865511436724, + "loss": 1.5537, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.8647083640098572, + "learning_rate": 0.001220834967670461, + "loss": 1.5619, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.6316749453544617, + "learning_rate": 0.0012188781082615346, + "loss": 1.5564, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.6600530743598938, + "learning_rate": 0.0012169159948444588, + "loss": 1.5543, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.6457939743995667, + "learning_rate": 0.001214948649405672, + "loss": 1.5495, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.7187444567680359, + "learning_rate": 0.0012129760939902407, + "loss": 1.5432, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.5995075702667236, + "learning_rate": 0.0012109983507016114, + "loss": 1.5483, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.7687663435935974, + "learning_rate": 0.0012090154417013636, + "loss": 1.5531, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.6923853158950806, + "learning_rate": 0.0012070273892089605, + "loss": 1.5267, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 1.1382488012313843, + "learning_rate": 0.0012050342155015012, + "loss": 1.5324, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.6795493364334106, + "learning_rate": 0.0012030359429134707, + "loss": 1.5381, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.647104799747467, + "learning_rate": 0.0012010325938364883, + "loss": 1.5428, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 1.0964915752410889, + "learning_rate": 0.0011990241907190592, + "loss": 1.5431, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.7998847961425781, + "learning_rate": 0.001197010756066321, + "loss": 1.5294, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.724804699420929, + "learning_rate": 0.0011949923124397917, + "loss": 1.5357, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.6442999839782715, + "learning_rate": 0.001192968882457118, + "loss": 1.5392, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.6547338962554932, + "learning_rate": 0.001190940488791821, + "loss": 1.5379, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.6400957703590393, + "learning_rate": 0.0011889071541730419, + "loss": 1.5324, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.673004150390625, + "learning_rate": 0.001186868901385288, + "loss": 1.5296, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.6449806094169617, + "learning_rate": 0.001184825753268177, + "loss": 1.5368, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.6577426791191101, + "learning_rate": 0.0011827777327161814, + "loss": 1.5472, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.781528890132904, + "learning_rate": 0.0011807248626783714, + "loss": 1.5336, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.9285182356834412, + "learning_rate": 0.0011786671661581584, + "loss": 1.5161, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.6776363253593445, + "learning_rate": 0.001176604666213036, + "loss": 1.5218, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.745066225528717, + "learning_rate": 0.0011745373859543236, + "loss": 1.5418, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.7665883302688599, + "learning_rate": 0.0011724653485469063, + "loss": 1.5196, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.622420072555542, + "learning_rate": 0.0011703885772089743, + "loss": 1.5322, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.7399662137031555, + "learning_rate": 0.0011683070952117646, + "loss": 1.5294, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6748687028884888, + "learning_rate": 0.0011662209258792998, + "loss": 1.5208, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.7089667916297913, + "learning_rate": 0.0011641300925881257, + "loss": 1.5266, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.6806653738021851, + "learning_rate": 0.0011620346187670501, + "loss": 1.5214, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5722612142562866, + "learning_rate": 0.0011599345278968806, + "loss": 1.5314, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.6367582082748413, + "learning_rate": 0.0011578298435101604, + "loss": 1.5224, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.6236495971679688, + "learning_rate": 0.0011557205891909062, + "loss": 1.5128, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.5943174958229065, + "learning_rate": 0.0011536067885743423, + "loss": 1.5281, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.5894270539283752, + "learning_rate": 0.001151488465346637, + "loss": 1.5156, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.638626217842102, + "learning_rate": 0.0011493656432446362, + "loss": 1.5194, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.5813705921173096, + "learning_rate": 0.0011472383460555983, + "loss": 1.5192, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6146628856658936, + "learning_rate": 0.001145106597616927, + "loss": 1.5336, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.5674701929092407, + "learning_rate": 0.001142970421815904, + "loss": 1.5111, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.6109512448310852, + "learning_rate": 0.0011408298425894226, + "loss": 1.5198, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.6579158306121826, + "learning_rate": 0.0011386848839237186, + "loss": 1.5191, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.6389912366867065, + "learning_rate": 0.0011365355698541005, + "loss": 1.5213, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.6595737934112549, + "learning_rate": 0.0011343819244646824, + "loss": 1.516, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.7528008222579956, + "learning_rate": 0.001132223971888112, + "loss": 1.5168, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.65729820728302, + "learning_rate": 0.0011300617363053024, + "loss": 1.5117, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.7902970314025879, + "learning_rate": 0.0011278952419451586, + "loss": 1.5206, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.6076300144195557, + "learning_rate": 0.0011257245130843077, + "loss": 1.5135, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.649088442325592, + "learning_rate": 0.0011235495740468265, + "loss": 1.5075, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.7617542147636414, + "learning_rate": 0.0011213704492039694, + "loss": 1.488, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.5706382989883423, + "learning_rate": 0.001119187162973894, + "loss": 1.5151, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5654509663581848, + "learning_rate": 0.001116999739821388, + "loss": 1.5016, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.6327301859855652, + "learning_rate": 0.0011148082042575968, + "loss": 1.5222, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.6377241015434265, + "learning_rate": 0.0011126125808397461, + "loss": 1.5191, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.9808812141418457, + "learning_rate": 0.0011104128941708683, + "loss": 1.5012, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.5883389115333557, + "learning_rate": 0.001108209168899527, + "loss": 1.5102, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.6944339871406555, + "learning_rate": 0.0011060014297195396, + "loss": 1.5109, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.6329933404922485, + "learning_rate": 0.0011037897013697015, + "loss": 1.5144, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.647352933883667, + "learning_rate": 0.0011015740086335092, + "loss": 1.5038, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.6054313778877258, + "learning_rate": 0.0010993543763388814, + "loss": 1.5071, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.6633821725845337, + "learning_rate": 0.0010971308293578814, + "loss": 1.5, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.5898052453994751, + "learning_rate": 0.0010949033926064397, + "loss": 1.501, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.8046050667762756, + "learning_rate": 0.0010926720910440725, + "loss": 1.5065, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.8917463421821594, + "learning_rate": 0.001090436949673603, + "loss": 1.5034, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.6223356127738953, + "learning_rate": 0.0010881979935408815, + "loss": 1.4968, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.6322743892669678, + "learning_rate": 0.0010859552477345052, + "loss": 1.5045, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6608303785324097, + "learning_rate": 0.001083708737385536, + "loss": 1.5011, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.6565998196601868, + "learning_rate": 0.0010814584876672187, + "loss": 1.4837, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.640568196773529, + "learning_rate": 0.0010792045237947008, + "loss": 1.5025, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.62236487865448, + "learning_rate": 0.0010769468710247478, + "loss": 1.4966, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.6582492589950562, + "learning_rate": 0.0010746855546554612, + "loss": 1.4951, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.7884146571159363, + "learning_rate": 0.0010724206000259954, + "loss": 1.4882, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.6781127452850342, + "learning_rate": 0.0010701520325162727, + "loss": 1.5038, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.607782244682312, + "learning_rate": 0.0010678798775467001, + "loss": 1.5089, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.6141951680183411, + "learning_rate": 0.0010656041605778832, + "loss": 1.5007, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.6759193539619446, + "learning_rate": 0.001063324907110342, + "loss": 1.4823, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.659016489982605, + "learning_rate": 0.0010610421426842241, + "loss": 1.5021, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.7625165581703186, + "learning_rate": 0.00105875589287902, + "loss": 1.4998, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.6951397061347961, + "learning_rate": 0.0010564661833132752, + "loss": 1.5091, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.7422622442245483, + "learning_rate": 0.001054173039644303, + "loss": 1.4988, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.6339021325111389, + "learning_rate": 0.0010518764875678981, + "loss": 1.4886, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.7282052040100098, + "learning_rate": 0.001049576552818048, + "loss": 1.4909, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.6053773164749146, + "learning_rate": 0.0010472732611666448, + "loss": 1.4834, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 1.010981798171997, + "learning_rate": 0.0010449666384231954, + "loss": 1.4894, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.7876071333885193, + "learning_rate": 0.0010426567104345346, + "loss": 1.4995, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.5915725827217102, + "learning_rate": 0.0010403435030845332, + "loss": 1.4818, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.6278473138809204, + "learning_rate": 0.0010380270422938093, + "loss": 1.4839, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.5703684687614441, + "learning_rate": 0.0010357073540194362, + "loss": 1.4873, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.8193210959434509, + "learning_rate": 0.001033384464254655, + "loss": 1.4891, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.9068028926849365, + "learning_rate": 0.001031058399028579, + "loss": 1.4831, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.666253924369812, + "learning_rate": 0.001028729184405905, + "loss": 1.4946, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.6131936311721802, + "learning_rate": 0.0010263968464866201, + "loss": 1.488, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.6458554267883301, + "learning_rate": 0.0010240614114057098, + "loss": 1.4806, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 1.1987972259521484, + "learning_rate": 0.001021722905332864, + "loss": 1.4852, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.6627452373504639, + "learning_rate": 0.0010193813544721855, + "loss": 1.4931, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.636959969997406, + "learning_rate": 0.001017036785061895, + "loss": 1.4943, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.6383914351463318, + "learning_rate": 0.0010146892233740376, + "loss": 1.4813, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.9093825817108154, + "learning_rate": 0.0010123386957141883, + "loss": 1.4637, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.7391069531440735, + "learning_rate": 0.0010099852284211573, + "loss": 1.4843, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.7567445635795593, + "learning_rate": 0.0010076288478666944, + "loss": 1.4807, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.5907058715820312, + "learning_rate": 0.0010052695804551946, + "loss": 1.4843, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.5827977061271667, + "learning_rate": 0.0010029074526234014, + "loss": 1.4748, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.6169179081916809, + "learning_rate": 0.0010005424908401104, + "loss": 1.4903, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.7003010511398315, + "learning_rate": 0.0009981747216058728, + "loss": 1.4755, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.9711059331893921, + "learning_rate": 0.0009958041714526998, + "loss": 1.4781, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.6193206906318665, + "learning_rate": 0.0009934308669437627, + "loss": 1.4839, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.674976646900177, + "learning_rate": 0.0009910548346730972, + "loss": 1.4729, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5678077340126038, + "learning_rate": 0.0009886761012653062, + "loss": 1.4647, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.590964674949646, + "learning_rate": 0.000986294693375258, + "loss": 1.4663, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.5683673620223999, + "learning_rate": 0.000983910637687791, + "loss": 1.4832, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.6708618998527527, + "learning_rate": 0.0009815239609174138, + "loss": 1.4685, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.8948426246643066, + "learning_rate": 0.0009791346898080043, + "loss": 1.4826, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.9062578082084656, + "learning_rate": 0.0009767428511325122, + "loss": 1.4718, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.6340427398681641, + "learning_rate": 0.0009743484716926576, + "loss": 1.4642, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.5997183918952942, + "learning_rate": 0.0009719515783186319, + "loss": 1.4612, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.7104225754737854, + "learning_rate": 0.0009695521978687951, + "loss": 1.4625, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.8072808980941772, + "learning_rate": 0.0009671503572293767, + "loss": 1.465, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.6142177581787109, + "learning_rate": 0.0009647460833141742, + "loss": 1.4614, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.6179855465888977, + "learning_rate": 0.0009623394030642507, + "loss": 1.466, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.5846281051635742, + "learning_rate": 0.0009599303434476334, + "loss": 1.4671, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.6077613234519958, + "learning_rate": 0.0009575189314590118, + "loss": 1.467, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.6063594818115234, + "learning_rate": 0.0009551051941194346, + "loss": 1.4699, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 1.084765911102295, + "learning_rate": 0.0009526891584760071, + "loss": 1.4632, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.9172192215919495, + "learning_rate": 0.0009502708516015889, + "loss": 1.4708, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.7447502613067627, + "learning_rate": 0.0009478503005944888, + "loss": 1.4584, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 1.0701297521591187, + "learning_rate": 0.0009454275325781632, + "loss": 1.4714, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.5956206321716309, + "learning_rate": 0.0009430025747009104, + "loss": 1.4588, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.6094381809234619, + "learning_rate": 0.0009405754541355677, + "loss": 1.4653, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.6705819964408875, + "learning_rate": 0.0009381461980792061, + "loss": 1.4574, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.6066882610321045, + "learning_rate": 0.0009357148337528256, + "loss": 1.4668, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.6710831522941589, + "learning_rate": 0.0009332813884010511, + "loss": 1.4673, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.6543538570404053, + "learning_rate": 0.0009308458892918259, + "loss": 1.4692, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.7042601704597473, + "learning_rate": 0.0009284083637161064, + "loss": 1.4626, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.800094485282898, + "learning_rate": 0.0009259688389875574, + "loss": 1.4692, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.7962571978569031, + "learning_rate": 0.0009235273424422442, + "loss": 1.4602, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.7111530900001526, + "learning_rate": 0.0009210839014383282, + "loss": 1.462, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.7922571897506714, + "learning_rate": 0.0009186385433557584, + "loss": 1.4547, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.6563818454742432, + "learning_rate": 0.0009161912955959668, + "loss": 1.4581, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.646672785282135, + "learning_rate": 0.000913742185581559, + "loss": 1.4504, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.5600813627243042, + "learning_rate": 0.0009112912407560086, + "loss": 1.4527, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.6336472630500793, + "learning_rate": 0.0009088384885833495, + "loss": 1.459, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.7688009738922119, + "learning_rate": 0.000906383956547867, + "loss": 1.4515, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.7262480854988098, + "learning_rate": 0.0009039276721537915, + "loss": 1.4538, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.6007500886917114, + "learning_rate": 0.0009014696629249886, + "loss": 1.4494, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.6820017695426941, + "learning_rate": 0.0008990099564046522, + "loss": 1.4547, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.7682816982269287, + "learning_rate": 0.0008965485801549946, + "loss": 1.4578, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.5682747960090637, + "learning_rate": 0.000894085561756939, + "loss": 1.4436, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.624946117401123, + "learning_rate": 0.0008916209288098088, + "loss": 1.4561, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.6534101963043213, + "learning_rate": 0.0008891547089310198, + "loss": 1.4483, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.6529582738876343, + "learning_rate": 0.0008866869297557699, + "loss": 1.4489, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 1.0107979774475098, + "learning_rate": 0.0008842176189367299, + "loss": 1.4531, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.5868625044822693, + "learning_rate": 0.0008817468041437329, + "loss": 1.4579, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.6682906746864319, + "learning_rate": 0.0008792745130634654, + "loss": 1.4481, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.6011154651641846, + "learning_rate": 0.0008768007733991561, + "loss": 1.4427, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.5905972719192505, + "learning_rate": 0.0008743256128702658, + "loss": 1.4518, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.6142104864120483, + "learning_rate": 0.0008718490592121768, + "loss": 1.4532, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5816906690597534, + "learning_rate": 0.0008693711401758822, + "loss": 1.4481, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.7089439630508423, + "learning_rate": 0.0008668918835276747, + "loss": 1.444, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.6805419921875, + "learning_rate": 0.0008644113170488355, + "loss": 1.4529, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.5991624593734741, + "learning_rate": 0.0008619294685353235, + "loss": 1.4436, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.5728858709335327, + "learning_rate": 0.0008594463657974627, + "loss": 1.4454, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.6120371222496033, + "learning_rate": 0.0008569620366596322, + "loss": 1.4581, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.5799906253814697, + "learning_rate": 0.000854476508959953, + "loss": 1.4365, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.5929632782936096, + "learning_rate": 0.0008519898105499762, + "loss": 1.4435, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 1.1050008535385132, + "learning_rate": 0.0008495019692943721, + "loss": 1.4386, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.782640278339386, + "learning_rate": 0.0008470130130706166, + "loss": 1.4447, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.616293728351593, + "learning_rate": 0.0008445229697686795, + "loss": 1.4559, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.5817123055458069, + "learning_rate": 0.0008420318672907119, + "loss": 1.4396, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.8112937211990356, + "learning_rate": 0.0008395397335507334, + "loss": 1.4451, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.6168907284736633, + "learning_rate": 0.0008370465964743196, + "loss": 1.4378, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.66729336977005, + "learning_rate": 0.0008345524839982886, + "loss": 1.4477, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.6310428380966187, + "learning_rate": 0.0008320574240703886, + "loss": 1.4339, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.6348204612731934, + "learning_rate": 0.0008295614446489842, + "loss": 1.4448, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.6154766082763672, + "learning_rate": 0.0008270645737027441, + "loss": 1.4483, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.5712194442749023, + "learning_rate": 0.0008245668392103259, + "loss": 1.4367, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.6578401923179626, + "learning_rate": 0.0008220682691600645, + "loss": 1.435, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.6393753290176392, + "learning_rate": 0.0008195688915496571, + "loss": 1.4425, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.6196770668029785, + "learning_rate": 0.0008170687343858506, + "loss": 1.4245, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.6908407807350159, + "learning_rate": 0.0008145678256841265, + "loss": 1.4425, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.6425343751907349, + "learning_rate": 0.0008120661934683879, + "loss": 1.4424, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.8336207270622253, + "learning_rate": 0.0008095638657706456, + "loss": 1.4399, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.6580730080604553, + "learning_rate": 0.000807060870630703, + "loss": 1.4289, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.5974985957145691, + "learning_rate": 0.000804557236095843, + "loss": 1.4112, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 1.026724934577942, + "learning_rate": 0.0008020529902205129, + "loss": 1.4378, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.751319408416748, + "learning_rate": 0.0007995481610660108, + "loss": 1.4243, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.5636345744132996, + "learning_rate": 0.0007970427767001702, + "loss": 1.4386, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.7921223044395447, + "learning_rate": 0.0007945368651970464, + "loss": 1.4418, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.634220540523529, + "learning_rate": 0.0007920304546366013, + "loss": 1.4184, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6106100082397461, + "learning_rate": 0.000789523573104389, + "loss": 1.4387, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6244425773620605, + "learning_rate": 0.0007870162486912414, + "loss": 1.4397, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.6222022175788879, + "learning_rate": 0.0007845085094929527, + "loss": 1.4409, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.6542600393295288, + "learning_rate": 0.0007820003836099649, + "loss": 1.4276, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.6065540313720703, + "learning_rate": 0.0007794918991470537, + "loss": 1.4265, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.6150094866752625, + "learning_rate": 0.0007769830842130119, + "loss": 1.4241, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.7492828369140625, + "learning_rate": 0.0007744739669203361, + "loss": 1.4199, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.7529119849205017, + "learning_rate": 0.0007719645753849108, + "loss": 1.4315, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.5703341364860535, + "learning_rate": 0.0007694549377256932, + "loss": 1.4325, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.7048012018203735, + "learning_rate": 0.0007669450820643987, + "loss": 1.4272, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.6270100474357605, + "learning_rate": 0.0007644350365251855, + "loss": 1.4236, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.6480305194854736, + "learning_rate": 0.0007619248292343399, + "loss": 1.4216, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.6933591365814209, + "learning_rate": 0.0007594144883199599, + "loss": 1.4425, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.6051285266876221, + "learning_rate": 0.0007569040419116413, + "loss": 1.428, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.5682891607284546, + "learning_rate": 0.000754393518140162, + "loss": 1.4213, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.6518904566764832, + "learning_rate": 0.0007518829451371665, + "loss": 1.4351, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.6126288771629333, + "learning_rate": 0.0007493723510348516, + "loss": 1.4177, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.5965525507926941, + "learning_rate": 0.0007468617639656496, + "loss": 1.4219, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.64556485414505, + "learning_rate": 0.0007443512120619144, + "loss": 1.4217, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.7484649419784546, + "learning_rate": 0.0007418407234556067, + "loss": 1.4417, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.5707499980926514, + "learning_rate": 0.0007393303262779767, + "loss": 1.4222, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 1.0076894760131836, + "learning_rate": 0.0007368200486592507, + "loss": 1.4196, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.5676963925361633, + "learning_rate": 0.0007343099187283149, + "loss": 1.4301, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.6073578000068665, + "learning_rate": 0.0007317999646124011, + "loss": 1.4178, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.6999790072441101, + "learning_rate": 0.0007292902144367704, + "loss": 1.4135, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.7007207274436951, + "learning_rate": 0.0007267806963243995, + "loss": 1.4312, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.7890148758888245, + "learning_rate": 0.0007242714383956639, + "loss": 1.421, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.6128184199333191, + "learning_rate": 0.000721762468768024, + "loss": 1.4276, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6349893808364868, + "learning_rate": 0.0007192538155557094, + "loss": 1.4202, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.934368371963501, + "learning_rate": 0.0007167455068694046, + "loss": 1.4126, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.7725550532341003, + "learning_rate": 0.000714237570815933, + "loss": 1.4217, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.6636747121810913, + "learning_rate": 0.0007117300354979423, + "loss": 1.4201, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.5626820921897888, + "learning_rate": 0.000709222929013591, + "loss": 1.4265, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.643591582775116, + "learning_rate": 0.0007067162794562309, + "loss": 1.4078, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.9623111486434937, + "learning_rate": 0.0007042101149140943, + "loss": 1.4155, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.648608922958374, + "learning_rate": 0.0007017044634699787, + "loss": 1.4098, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.6090520620346069, + "learning_rate": 0.0006991993532009319, + "loss": 1.4095, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.5978587865829468, + "learning_rate": 0.0006966948121779378, + "loss": 1.4236, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.5725797414779663, + "learning_rate": 0.000694190868465601, + "loss": 1.4146, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.6230322122573853, + "learning_rate": 0.0006916875501218343, + "loss": 1.409, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.6581682562828064, + "learning_rate": 0.0006891848851975416, + "loss": 1.3963, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.6331873536109924, + "learning_rate": 0.0006866829017363054, + "loss": 1.4225, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.5603877902030945, + "learning_rate": 0.0006841816277740722, + "loss": 1.4105, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.7254918813705444, + "learning_rate": 0.0006816810913388379, + "loss": 1.4222, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.8332406878471375, + "learning_rate": 0.0006791813204503342, + "loss": 1.4088, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.6715386509895325, + "learning_rate": 0.0006766823431197147, + "loss": 1.4254, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.6891464591026306, + "learning_rate": 0.0006741841873492406, + "loss": 1.4156, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.5675250291824341, + "learning_rate": 0.0006716868811319671, + "loss": 1.3974, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.6305409073829651, + "learning_rate": 0.0006691904524514297, + "loss": 1.4124, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.7041273713111877, + "learning_rate": 0.0006666949292813306, + "loss": 1.4067, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.7549657821655273, + "learning_rate": 0.0006642003395852258, + "loss": 1.4097, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.5689598917961121, + "learning_rate": 0.0006617067113162103, + "loss": 1.4233, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.6756426692008972, + "learning_rate": 0.0006592140724166073, + "loss": 1.4189, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.7814314961433411, + "learning_rate": 0.0006567224508176523, + "loss": 1.4125, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5864002704620361, + "learning_rate": 0.0006542318744391821, + "loss": 1.4097, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.6386482119560242, + "learning_rate": 0.0006517423711893209, + "loss": 1.4208, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.7241588830947876, + "learning_rate": 0.0006492539689641685, + "loss": 1.4013, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.7079835534095764, + "learning_rate": 0.0006467666956474865, + "loss": 1.4146, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.5927901864051819, + "learning_rate": 0.0006442805791103873, + "loss": 1.4011, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.6164871454238892, + "learning_rate": 0.0006417956472110205, + "loss": 1.4103, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 1.0653833150863647, + "learning_rate": 0.0006393119277942614, + "loss": 1.4028, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 1.0406842231750488, + "learning_rate": 0.0006368294486913987, + "loss": 1.4098, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.7087342143058777, + "learning_rate": 0.0006343482377198232, + "loss": 1.4005, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.5774527788162231, + "learning_rate": 0.0006318683226827151, + "loss": 1.4044, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.6040995121002197, + "learning_rate": 0.0006293897313687331, + "loss": 1.4117, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.658976674079895, + "learning_rate": 0.0006269124915517037, + "loss": 1.4021, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.879693865776062, + "learning_rate": 0.0006244366309903084, + "loss": 1.4051, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.6642392873764038, + "learning_rate": 0.0006219621774277737, + "loss": 1.4082, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.7217620611190796, + "learning_rate": 0.00061948915859156, + "loss": 1.3969, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.5699112415313721, + "learning_rate": 0.0006170176021930509, + "loss": 1.3978, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.6053035259246826, + "learning_rate": 0.0006145475359272424, + "loss": 1.4057, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.6998172998428345, + "learning_rate": 0.0006120789874724336, + "loss": 1.3996, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.6016806960105896, + "learning_rate": 0.0006096119844899151, + "loss": 1.3932, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.616733729839325, + "learning_rate": 0.0006071465546236601, + "loss": 1.4006, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.6303740739822388, + "learning_rate": 0.0006046827255000135, + "loss": 1.3999, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.663960874080658, + "learning_rate": 0.0006022205247273845, + "loss": 1.3972, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.6034201383590698, + "learning_rate": 0.0005997599798959343, + "loss": 1.3952, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.72989821434021, + "learning_rate": 0.0005973011185772694, + "loss": 1.3968, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.5725027918815613, + "learning_rate": 0.0005948439683241318, + "loss": 1.3984, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.6143150329589844, + "learning_rate": 0.0005923885566700896, + "loss": 1.3924, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.6189152598381042, + "learning_rate": 0.0005899349111292293, + "loss": 1.3879, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.566215991973877, + "learning_rate": 0.0005874830591958474, + "loss": 1.3857, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6087139248847961, + "learning_rate": 0.000585033028344142, + "loss": 1.3925, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.5976879596710205, + "learning_rate": 0.0005825848460279048, + "loss": 1.3942, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.6875972151756287, + "learning_rate": 0.0005801385396802146, + "loss": 1.395, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.7841419577598572, + "learning_rate": 0.0005776941367131282, + "loss": 1.3987, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.5791902542114258, + "learning_rate": 0.0005752516645173745, + "loss": 1.4014, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.8053807616233826, + "learning_rate": 0.0005728111504620472, + "loss": 1.3918, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.6655654907226562, + "learning_rate": 0.0005703726218942976, + "loss": 1.3982, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.5464580059051514, + "learning_rate": 0.0005679361061390295, + "loss": 1.3906, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.621429979801178, + "learning_rate": 0.0005655016304985908, + "loss": 1.3839, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.5755382180213928, + "learning_rate": 0.0005630692222524709, + "loss": 1.381, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.6354694366455078, + "learning_rate": 0.0005606389086569911, + "loss": 1.3987, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.7283250093460083, + "learning_rate": 0.0005582107169450023, + "loss": 1.4041, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5821378827095032, + "learning_rate": 0.0005557846743255783, + "loss": 1.3899, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.6803022027015686, + "learning_rate": 0.0005533608079837109, + "loss": 1.3876, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.5731063485145569, + "learning_rate": 0.0005509391450800061, + "loss": 1.3894, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.5965539813041687, + "learning_rate": 0.0005485197127503795, + "loss": 1.3832, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.5898882746696472, + "learning_rate": 0.0005461025381057516, + "loss": 1.39, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.6075681447982788, + "learning_rate": 0.0005436876482317444, + "loss": 1.3982, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.6221696138381958, + "learning_rate": 0.0005412750701883782, + "loss": 1.3915, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.7819286584854126, + "learning_rate": 0.0005388648310097682, + "loss": 1.3995, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.8376879096031189, + "learning_rate": 0.000536456957703821, + "loss": 1.3942, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.6225863695144653, + "learning_rate": 0.0005340514772519324, + "loss": 1.3897, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6216580271720886, + "learning_rate": 0.0005316484166086863, + "loss": 1.3952, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.6364496350288391, + "learning_rate": 0.00052924780270155, + "loss": 1.3961, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.6016349196434021, + "learning_rate": 0.0005268496624305747, + "loss": 1.3755, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.6804255247116089, + "learning_rate": 0.0005244540226680931, + "loss": 1.3831, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.633782684803009, + "learning_rate": 0.0005220609102584185, + "loss": 1.384, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5835444927215576, + "learning_rate": 0.0005196703520175437, + "loss": 1.3921, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.722180962562561, + "learning_rate": 0.0005172823747328415, + "loss": 1.3879, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.642779529094696, + "learning_rate": 0.0005148970051627632, + "loss": 1.3847, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.6276348233222961, + "learning_rate": 0.0005125142700365394, + "loss": 1.3586, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.5951650738716125, + "learning_rate": 0.000510134196053881, + "loss": 1.3693, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.8358609676361084, + "learning_rate": 0.0005077568098846789, + "loss": 1.3709, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.5760234594345093, + "learning_rate": 0.000505382138168706, + "loss": 1.3784, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.6525946855545044, + "learning_rate": 0.0005030102075153181, + "loss": 1.3745, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.816611647605896, + "learning_rate": 0.0005006410445031569, + "loss": 1.3786, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.5947552919387817, + "learning_rate": 0.0004982746756798507, + "loss": 1.3843, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.6092637181282043, + "learning_rate": 0.0004959111275617174, + "loss": 1.3962, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.6072807908058167, + "learning_rate": 0.0004935504266334677, + "loss": 1.3784, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.6584548354148865, + "learning_rate": 0.0004911925993479085, + "loss": 1.392, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.6108152866363525, + "learning_rate": 0.0004888376721256456, + "loss": 1.396, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.6438764333724976, + "learning_rate": 0.00048648567135478805, + "loss": 1.38, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5953920483589172, + "learning_rate": 0.0004841366233906538, + "loss": 1.3759, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.5765502452850342, + "learning_rate": 0.0004817905545554717, + "loss": 1.3866, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.6278926134109497, + "learning_rate": 0.00047944749113808884, + "loss": 1.3758, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.6677291989326477, + "learning_rate": 0.00047710745939367474, + "loss": 1.3764, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.8307394981384277, + "learning_rate": 0.0004747704855434278, + "loss": 1.3723, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.9721304178237915, + "learning_rate": 0.0004724365957742809, + "loss": 1.3696, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.5986323356628418, + "learning_rate": 0.00047010581623860883, + "loss": 1.3735, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.7444632649421692, + "learning_rate": 0.0004677781730539342, + "loss": 1.3797, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.7583586573600769, + "learning_rate": 0.0004654536923026356, + "loss": 1.3698, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.6169868111610413, + "learning_rate": 0.00046313240003165466, + "loss": 1.3761, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.6253679394721985, + "learning_rate": 0.0004608143222522048, + "loss": 1.3759, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5868454575538635, + "learning_rate": 0.0004584994849394795, + "loss": 1.3588, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.7159611582756042, + "learning_rate": 0.0004561879140323607, + "loss": 1.3778, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.6684842109680176, + "learning_rate": 0.0004538796354331298, + "loss": 1.3837, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.5983527898788452, + "learning_rate": 0.0004515746750071754, + "loss": 1.3728, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.6036336421966553, + "learning_rate": 0.0004492730585827046, + "loss": 1.3764, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.6091444492340088, + "learning_rate": 0.0004469748119504529, + "loss": 1.3593, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.6025694012641907, + "learning_rate": 0.0004446799608633964, + "loss": 1.378, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.6018792986869812, + "learning_rate": 0.00044238853103646154, + "loss": 1.3606, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.8229264616966248, + "learning_rate": 0.00044010054814623925, + "loss": 1.3788, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.689720869064331, + "learning_rate": 0.0004378160378306944, + "loss": 1.3686, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.6157822608947754, + "learning_rate": 0.00043553502568888095, + "loss": 1.3679, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.6356511116027832, + "learning_rate": 0.0004332575372806534, + "loss": 1.3678, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.7170621156692505, + "learning_rate": 0.00043098359812638145, + "loss": 1.3631, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.5794965624809265, + "learning_rate": 0.00042871323370666383, + "loss": 1.3751, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.6166805028915405, + "learning_rate": 0.0004264464694620421, + "loss": 1.3702, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.6133190393447876, + "learning_rate": 0.000424183330792717, + "loss": 1.368, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.7885080575942993, + "learning_rate": 0.0004219238430582621, + "loss": 1.3688, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.5877053737640381, + "learning_rate": 0.0004196680315773408, + "loss": 1.3774, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.5834600925445557, + "learning_rate": 0.00041741592162742214, + "loss": 1.3687, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.6741209030151367, + "learning_rate": 0.0004151675384444978, + "loss": 1.3504, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.580945611000061, + "learning_rate": 0.00041292290722279914, + "loss": 1.3705, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.6574515700340271, + "learning_rate": 0.00041068205311451517, + "loss": 1.375, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.5988293290138245, + "learning_rate": 0.00040844500122951026, + "loss": 1.3666, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.587431788444519, + "learning_rate": 0.00040621177663504313, + "loss": 1.3632, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.6055265665054321, + "learning_rate": 0.00040398240435548583, + "loss": 1.3706, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.622055172920227, + "learning_rate": 0.00040175690937204324, + "loss": 1.3583, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.589766263961792, + "learning_rate": 0.00039953531662247343, + "loss": 1.3631, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.5884185433387756, + "learning_rate": 0.0003973176510008075, + "loss": 1.3716, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.6426213979721069, + "learning_rate": 0.00039510393735707233, + "loss": 1.3639, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.5970112681388855, + "learning_rate": 0.00039289420049700986, + "loss": 1.3709, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.5859561562538147, + "learning_rate": 0.0003906884651818006, + "loss": 1.3699, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.6624287962913513, + "learning_rate": 0.00038848675612778577, + "loss": 1.3665, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.5863662958145142, + "learning_rate": 0.00038628909800619046, + "loss": 1.3603, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.658142626285553, + "learning_rate": 0.0003840955154428467, + "loss": 1.3599, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.6615561842918396, + "learning_rate": 0.00038190603301791864, + "loss": 1.3551, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.6761365532875061, + "learning_rate": 0.0003797206752656258, + "loss": 1.3642, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6258945465087891, + "learning_rate": 0.0003775394666739688, + "loss": 1.3578, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.5698710680007935, + "learning_rate": 0.00037536243168445507, + "loss": 1.3612, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.5901042222976685, + "learning_rate": 0.0003731895946918246, + "loss": 1.3641, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.5920996069908142, + "learning_rate": 0.0003710209800437769, + "loss": 1.3437, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5895184874534607, + "learning_rate": 0.00036885661204069767, + "loss": 1.3484, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.6524982452392578, + "learning_rate": 0.0003666965149353878, + "loss": 1.3634, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.7138853669166565, + "learning_rate": 0.0003645407129327898, + "loss": 1.3637, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.7341338396072388, + "learning_rate": 0.00036238923018971783, + "loss": 1.3585, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.5982068181037903, + "learning_rate": 0.00036045660861445684, + "loss": 1.3505, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.7215583324432373, + "learning_rate": 0.0003583133988429468, + "loss": 1.3597, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.5957006812095642, + "learning_rate": 0.0003561745781110579, + "loss": 1.341, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.593417763710022, + "learning_rate": 0.00035404017038532045, + "loss": 1.3483, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.9390660524368286, + "learning_rate": 0.00035191019958281575, + "loss": 1.3604, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.5950254201889038, + "learning_rate": 0.00034978468957090635, + "loss": 1.3566, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.1260429620742798, + "learning_rate": 0.0003476636641669699, + "loss": 1.3618, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.7333889007568359, + "learning_rate": 0.0003455471471381318, + "loss": 1.3589, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.7040262222290039, + "learning_rate": 0.0003434351622009985, + "loss": 1.3718, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.569573700428009, + "learning_rate": 0.0003413277330213928, + "loss": 1.3705, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.6399335861206055, + "learning_rate": 0.0003392248832140876, + "loss": 1.3573, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.619164764881134, + "learning_rate": 0.00033712663634254163, + "loss": 1.3487, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.5800164341926575, + "learning_rate": 0.00033503301591863586, + "loss": 1.3558, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.6002144813537598, + "learning_rate": 0.0003329440454024092, + "loss": 1.3516, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.977389395236969, + "learning_rate": 0.0003308597482017965, + "loss": 1.3534, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.6972374320030212, + "learning_rate": 0.0003287801476723656, + "loss": 1.3511, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.660836398601532, + "learning_rate": 0.00032670526711705536, + "loss": 1.3488, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.8059242367744446, + "learning_rate": 0.0003246351297859164, + "loss": 1.3534, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.6628442406654358, + "learning_rate": 0.00032256975887584783, + "loss": 1.359, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.7931041717529297, + "learning_rate": 0.00032050917753033935, + "loss": 1.347, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.696062445640564, + "learning_rate": 0.000318453408839211, + "loss": 1.3549, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.6515058875083923, + "learning_rate": 0.0003164024758383548, + "loss": 1.3476, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.85069340467453, + "learning_rate": 0.00031435640150947645, + "loss": 1.3521, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.6602151393890381, + "learning_rate": 0.0003123152087798376, + "loss": 1.3448, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.6225120425224304, + "learning_rate": 0.00031027892052200003, + "loss": 1.3455, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.6053715944290161, + "learning_rate": 0.0003082475595535677, + "loss": 1.3496, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.6123716235160828, + "learning_rate": 0.00030622114863693205, + "loss": 1.3501, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.5887876152992249, + "learning_rate": 0.00030419971047901704, + "loss": 1.3465, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.5915765166282654, + "learning_rate": 0.00030218326773102407, + "loss": 1.3494, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.592790424823761, + "learning_rate": 0.00030017184298817873, + "loss": 1.3474, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.5996489524841309, + "learning_rate": 0.00029816545878947763, + "loss": 1.3381, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.6113138198852539, + "learning_rate": 0.00029616413761743537, + "loss": 1.3475, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.8245947360992432, + "learning_rate": 0.00029416790189783286, + "loss": 1.3497, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.5755251049995422, + "learning_rate": 0.000292176773999466, + "loss": 1.3535, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.5993833541870117, + "learning_rate": 0.0002901907762338952, + "loss": 1.3469, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.5684300065040588, + "learning_rate": 0.0002882099308551951, + "loss": 1.3486, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.581282913684845, + "learning_rate": 0.00028623426005970517, + "loss": 1.3458, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.5974403619766235, + "learning_rate": 0.00028426378598578187, + "loss": 1.3521, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.583656907081604, + "learning_rate": 0.0002822985307135491, + "loss": 1.3395, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.6583023071289062, + "learning_rate": 0.0002803385162646518, + "loss": 1.3434, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.6488555073738098, + "learning_rate": 0.0002783837646020089, + "loss": 1.3456, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6238535642623901, + "learning_rate": 0.0002764342976295673, + "loss": 1.344, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.7702028155326843, + "learning_rate": 0.00027449013719205623, + "loss": 1.3334, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.6324392557144165, + "learning_rate": 0.00027255130507474276, + "loss": 1.3412, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.6360041499137878, + "learning_rate": 0.00027061782300318726, + "loss": 1.3358, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.5927872061729431, + "learning_rate": 0.0002686897126430009, + "loss": 1.3353, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5667581558227539, + "learning_rate": 0.00026676699559960145, + "loss": 1.3456, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.6000430583953857, + "learning_rate": 0.00026484969341797224, + "loss": 1.3309, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.601155698299408, + "learning_rate": 0.0002629378275824204, + "loss": 1.3326, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.6132747530937195, + "learning_rate": 0.00026103141951633617, + "loss": 1.3596, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.6056342720985413, + "learning_rate": 0.00025913049058195277, + "loss": 1.3384, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.662994384765625, + "learning_rate": 0.0002572350620801072, + "loss": 1.3451, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.5929667353630066, + "learning_rate": 0.0002553451552500012, + "loss": 1.3481, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.6371379494667053, + "learning_rate": 0.0002534607912689637, + "loss": 1.3458, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.5961392521858215, + "learning_rate": 0.00025158199125221325, + "loss": 1.3367, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.6059876084327698, + "learning_rate": 0.0002497087762526211, + "loss": 1.3282, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.5926563739776611, + "learning_rate": 0.0002478411672604766, + "loss": 1.3359, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.7766771912574768, + "learning_rate": 0.00024597918520324994, + "loss": 1.3362, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.6712296009063721, + "learning_rate": 0.00024412285094535952, + "loss": 1.3491, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.5986452102661133, + "learning_rate": 0.00024227218528793696, + "loss": 1.3176, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.6893114447593689, + "learning_rate": 0.00024042720896859471, + "loss": 1.3348, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.57623690366745, + "learning_rate": 0.00023858794266119323, + "loss": 1.3374, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.7655357718467712, + "learning_rate": 0.00023675440697560943, + "loss": 1.3439, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.6630870699882507, + "learning_rate": 0.0002349266224575063, + "loss": 1.331, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.6774138808250427, + "learning_rate": 0.0002331046095881017, + "loss": 1.3344, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.6856870651245117, + "learning_rate": 0.00023128838878393946, + "loss": 1.342, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.6007643342018127, + "learning_rate": 0.00022947798039666051, + "loss": 1.3395, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.6101679801940918, + "learning_rate": 0.00022767340471277492, + "loss": 1.337, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.603408932685852, + "learning_rate": 0.00022587468195343436, + "loss": 1.3325, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.6391726136207581, + "learning_rate": 0.00022408183227420528, + "loss": 1.3429, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.7452594637870789, + "learning_rate": 0.0002222948757648443, + "loss": 1.3381, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.6168571710586548, + "learning_rate": 0.00022051383244907143, + "loss": 1.3464, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.6086563467979431, + "learning_rate": 0.0002187387222843467, + "loss": 1.3271, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.6698595881462097, + "learning_rate": 0.0002169695651616463, + "loss": 1.3255, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.6219019889831543, + "learning_rate": 0.00021520638090523955, + "loss": 1.3368, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.5978521704673767, + "learning_rate": 0.00021344918927246678, + "loss": 1.3437, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.6183485388755798, + "learning_rate": 0.00021169800995351874, + "loss": 1.3198, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.5797772407531738, + "learning_rate": 0.00020995286257121453, + "loss": 1.3217, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.6062895655632019, + "learning_rate": 0.00020821376668078264, + "loss": 1.3305, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.8143233060836792, + "learning_rate": 0.00020648074176964182, + "loss": 1.3353, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.6031996011734009, + "learning_rate": 0.00020475380725718228, + "loss": 1.3481, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.6227566003799438, + "learning_rate": 0.00020303298249454857, + "loss": 1.3422, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.5790601968765259, + "learning_rate": 0.00020131828676442237, + "loss": 1.3397, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.6626167297363281, + "learning_rate": 0.00019960973928080666, + "loss": 1.334, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.5787220001220703, + "learning_rate": 0.0001979073591888101, + "loss": 1.3318, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.611733078956604, + "learning_rate": 0.000196211165564433, + "loss": 1.3315, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.6100620031356812, + "learning_rate": 0.00019452117741435314, + "loss": 1.33, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.6300510168075562, + "learning_rate": 0.00019283741367571294, + "loss": 1.3418, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.6390256881713867, + "learning_rate": 0.00019115989321590694, + "loss": 1.3466, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.6182680130004883, + "learning_rate": 0.00018948863483237154, + "loss": 1.3281, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.6285327076911926, + "learning_rate": 0.00018782365725237272, + "loss": 1.3212, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.6002419590950012, + "learning_rate": 0.00018616497913279728, + "loss": 1.3279, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.6095607280731201, + "learning_rate": 0.0001845126190599434, + "loss": 1.3232, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.5531589388847351, + "learning_rate": 0.00018286659554931254, + "loss": 1.3225, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.5768207907676697, + "learning_rate": 0.00018122692704540194, + "loss": 1.3317, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.6630592942237854, + "learning_rate": 0.00017959363192149752, + "loss": 1.3295, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.6313626170158386, + "learning_rate": 0.00017796672847946905, + "loss": 1.3207, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.5856813788414001, + "learning_rate": 0.0001763462349495639, + "loss": 1.3304, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.6395465731620789, + "learning_rate": 0.00017473216949020326, + "loss": 1.3379, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.5684066414833069, + "learning_rate": 0.0001731245501877787, + "loss": 1.3185, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.6146547794342041, + "learning_rate": 0.00017152339505644963, + "loss": 1.3218, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.5705689191818237, + "learning_rate": 0.0001699287220379407, + "loss": 1.3112, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.6025727987289429, + "learning_rate": 0.00016834054900134228, + "loss": 1.3146, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.589371919631958, + "learning_rate": 0.00016675889374290852, + "loss": 1.3186, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.579790472984314, + "learning_rate": 0.0001651837739858589, + "loss": 1.3284, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.5928617119789124, + "learning_rate": 0.00016361520738017934, + "loss": 1.3187, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.6531867980957031, + "learning_rate": 0.00016205321150242454, + "loss": 1.3278, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.5766332745552063, + "learning_rate": 0.00016049780385552113, + "loss": 1.3313, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.6336475610733032, + "learning_rate": 0.00015894900186857105, + "loss": 1.3245, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.5933490991592407, + "learning_rate": 0.00015740682289665714, + "loss": 1.3327, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.6021314263343811, + "learning_rate": 0.0001558712842206477, + "loss": 1.3157, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.5703232288360596, + "learning_rate": 0.00015434240304700332, + "loss": 1.3196, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.6134620308876038, + "learning_rate": 0.0001528201965075841, + "loss": 1.316, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.580632746219635, + "learning_rate": 0.0001513046816594575, + "loss": 1.3137, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.6811385750770569, + "learning_rate": 0.0001497958754847076, + "loss": 1.3171, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.5898327827453613, + "learning_rate": 0.00014829379489024415, + "loss": 1.3291, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.5669706463813782, + "learning_rate": 0.0001467984567076137, + "loss": 1.3225, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.5978590846061707, + "learning_rate": 0.00014530987769281075, + "loss": 1.3157, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.5772284865379333, + "learning_rate": 0.00014382807452609003, + "loss": 1.3185, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.6116049289703369, + "learning_rate": 0.00014235306381177952, + "loss": 1.332, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.5752907395362854, + "learning_rate": 0.00014088486207809449, + "loss": 1.3053, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.5809873938560486, + "learning_rate": 0.0001394234857769521, + "loss": 1.3285, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.6350799798965454, + "learning_rate": 0.0001379689512837878, + "loss": 1.3197, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.6040031313896179, + "learning_rate": 0.00013652127489737067, + "loss": 1.3352, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.5897592902183533, + "learning_rate": 0.00013508047283962137, + "loss": 1.314, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.5742112994194031, + "learning_rate": 0.00013364656125543044, + "loss": 1.3211, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.6643802523612976, + "learning_rate": 0.00013221955621247749, + "loss": 1.3341, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.6589248180389404, + "learning_rate": 0.00013079947370105057, + "loss": 1.3079, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.5998160243034363, + "learning_rate": 0.00012938632963386808, + "loss": 1.3179, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.6544971466064453, + "learning_rate": 0.00012798013984589894, + "loss": 1.316, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.5767165422439575, + "learning_rate": 0.00012658092009418652, + "loss": 1.3191, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.6148854494094849, + "learning_rate": 0.00012518868605767118, + "loss": 1.3282, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.5716215968132019, + "learning_rate": 0.0001238034533370153, + "loss": 1.3263, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.6007248163223267, + "learning_rate": 0.0001224252374544278, + "loss": 1.3087, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.7543167471885681, + "learning_rate": 0.00012105405385349047, + "loss": 1.3121, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.5806342959403992, + "learning_rate": 0.00011968991789898533, + "loss": 1.3217, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.7105804085731506, + "learning_rate": 0.00011833284487672185, + "loss": 1.3127, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.6819510459899902, + "learning_rate": 0.00011698284999336578, + "loss": 1.3289, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.5882327556610107, + "learning_rate": 0.00011563994837626898, + "loss": 1.3181, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.6050553917884827, + "learning_rate": 0.00011430415507329975, + "loss": 1.3175, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.580377995967865, + "learning_rate": 0.00011297548505267424, + "loss": 1.324, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.6204248070716858, + "learning_rate": 0.00011165395320278898, + "loss": 1.3132, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.5612184405326843, + "learning_rate": 0.00011033957433205364, + "loss": 1.2985, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.5730684399604797, + "learning_rate": 0.00010903236316872514, + "loss": 1.3066, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.565626323223114, + "learning_rate": 0.00010773233436074287, + "loss": 1.2967, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5974787473678589, + "learning_rate": 0.00010643950247556447, + "loss": 1.309, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.6489875912666321, + "learning_rate": 0.00010515388200000245, + "loss": 1.3303, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.5832009315490723, + "learning_rate": 0.00010387548734006195, + "loss": 1.3273, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5801990032196045, + "learning_rate": 0.00010260433282077944, + "loss": 1.306, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.6794956922531128, + "learning_rate": 0.00010134043268606191, + "loss": 1.319, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.6669871211051941, + "learning_rate": 0.00010008380109852752, + "loss": 1.3169, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.5737297534942627, + "learning_rate": 9.883445213934675e-05, + "loss": 1.3251, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.5905636548995972, + "learning_rate": 9.759239980808494e-05, + "loss": 1.3172, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.6105249524116516, + "learning_rate": 9.635765802254482e-05, + "loss": 1.321, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.5987375378608704, + "learning_rate": 9.5130240618611e-05, + "loss": 1.3062, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.6174719929695129, + "learning_rate": 9.391016135009484e-05, + "loss": 1.3078, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.5737168788909912, + "learning_rate": 9.269743388858019e-05, + "loss": 1.3051, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.6316167116165161, + "learning_rate": 9.149207182327054e-05, + "loss": 1.3175, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.6307011246681213, + "learning_rate": 9.029408866083638e-05, + "loss": 1.3135, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.6022709012031555, + "learning_rate": 8.910349782526394e-05, + "loss": 1.3194, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.6304408311843872, + "learning_rate": 8.792031265770475e-05, + "loss": 1.3107, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.6313706636428833, + "learning_rate": 8.67445464163267e-05, + "loss": 1.3091, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.6577855348587036, + "learning_rate": 8.557621227616444e-05, + "loss": 1.3223, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.5904058218002319, + "learning_rate": 8.441532332897248e-05, + "loss": 1.3029, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.5858882069587708, + "learning_rate": 8.326189258307832e-05, + "loss": 1.3127, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.5945744514465332, + "learning_rate": 8.211593296323672e-05, + "loss": 1.3174, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.5655848383903503, + "learning_rate": 8.097745731048475e-05, + "loss": 1.3209, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.5851868987083435, + "learning_rate": 7.984647838199773e-05, + "loss": 1.307, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.6638721823692322, + "learning_rate": 7.872300885094736e-05, + "loss": 1.2987, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.5966708064079285, + "learning_rate": 7.760706130635792e-05, + "loss": 1.3192, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.5872936844825745, + "learning_rate": 7.649864825296669e-05, + "loss": 1.307, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.603786289691925, + "learning_rate": 7.539778211108309e-05, + "loss": 1.3103, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.5888696908950806, + "learning_rate": 7.430447521644973e-05, + "loss": 1.3078, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.5880342721939087, + "learning_rate": 7.321873982010422e-05, + "loss": 1.3042, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.5697059631347656, + "learning_rate": 7.214058808824192e-05, + "loss": 1.316, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.6288982629776001, + "learning_rate": 7.107003210207947e-05, + "loss": 1.3049, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.5889933705329895, + "learning_rate": 7.000708385771928e-05, + "loss": 1.3148, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.6553212404251099, + "learning_rate": 6.89517552660156e-05, + "loss": 1.3042, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.5648336410522461, + "learning_rate": 6.790405815244044e-05, + "loss": 1.3169, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.565113365650177, + "learning_rate": 6.686400425695171e-05, + "loss": 1.3109, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.5684016942977905, + "learning_rate": 6.583160523386086e-05, + "loss": 1.3187, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.5970169901847839, + "learning_rate": 6.480687265170342e-05, + "loss": 1.3125, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.5650521516799927, + "learning_rate": 6.37898179931081e-05, + "loss": 1.3087, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.5817661285400391, + "learning_rate": 6.278045265466911e-05, + "loss": 1.3244, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.6008373498916626, + "learning_rate": 6.177878794681782e-05, + "loss": 1.3162, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.6469988822937012, + "learning_rate": 6.078483509369642e-05, + "loss": 1.3124, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.5787304043769836, + "learning_rate": 5.9798605233031904e-05, + "loss": 1.2969, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.6522219181060791, + "learning_rate": 5.8820109416011485e-05, + "loss": 1.297, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.5949900150299072, + "learning_rate": 5.784935860715862e-05, + "loss": 1.3213, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.624856173992157, + "learning_rate": 5.6886363684210016e-05, + "loss": 1.3, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.5555047988891602, + "learning_rate": 5.5931135437993994e-05, + "loss": 1.3204, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.573746919631958, + "learning_rate": 5.498368457230965e-05, + "loss": 1.3132, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.5944172143936157, + "learning_rate": 5.4044021703806375e-05, + "loss": 1.2996, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.580845296382904, + "learning_rate": 5.311215736186536e-05, + "loss": 1.3055, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.5658819079399109, + "learning_rate": 5.21881019884819e-05, + "loss": 1.3001, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.5697470903396606, + "learning_rate": 5.127186593814748e-05, + "loss": 1.3069, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.5860816240310669, + "learning_rate": 5.0363459477734464e-05, + "loss": 1.3164, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.5837072730064392, + "learning_rate": 4.946289278638064e-05, + "loss": 1.3036, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.5782831311225891, + "learning_rate": 4.8570175955375715e-05, + "loss": 1.3053, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.6137297749519348, + "learning_rate": 4.768531898804754e-05, + "loss": 1.3218, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.5723269581794739, + "learning_rate": 4.680833179965063e-05, + "loss": 1.3105, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.5757145285606384, + "learning_rate": 4.5939224217254574e-05, + "loss": 1.3077, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.5817318558692932, + "learning_rate": 4.507800597963424e-05, + "loss": 1.3015, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.6427285671234131, + "learning_rate": 4.422468673716054e-05, + "loss": 1.3098, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.6045941710472107, + "learning_rate": 4.337927605169212e-05, + "loss": 1.304, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.5604165196418762, + "learning_rate": 4.2541783396468584e-05, + "loss": 1.3058, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.5670589208602905, + "learning_rate": 4.1712218156004014e-05, + "loss": 1.3005, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.613858163356781, + "learning_rate": 4.089058962598213e-05, + "loss": 1.3074, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.6006304025650024, + "learning_rate": 4.0076907013151726e-05, + "loss": 1.2959, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.5667493343353271, + "learning_rate": 3.927117943522379e-05, + "loss": 1.3064, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.5587700009346008, + "learning_rate": 3.8473415920769304e-05, + "loss": 1.3029, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.5939697623252869, + "learning_rate": 3.768362540911788e-05, + "loss": 1.3187, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.5569849610328674, + "learning_rate": 3.690181675025775e-05, + "loss": 1.313, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.5981857180595398, + "learning_rate": 3.612799870473696e-05, + "loss": 1.3037, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.573263943195343, + "learning_rate": 3.5362179943564496e-05, + "loss": 1.296, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.7049583196640015, + "learning_rate": 3.46043690481134e-05, + "loss": 1.3152, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.5500507354736328, + "learning_rate": 3.38545745100248e-05, + "loss": 1.3068, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.6633813381195068, + "learning_rate": 3.31128047311127e-05, + "loss": 1.3121, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.5680724382400513, + "learning_rate": 3.237906802326951e-05, + "loss": 1.3027, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.6080586314201355, + "learning_rate": 3.165337260837351e-05, + "loss": 1.3044, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.603796660900116, + "learning_rate": 3.093572661819602e-05, + "loss": 1.3019, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.6002474427223206, + "learning_rate": 3.022613809431088e-05, + "loss": 1.3095, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.6760857105255127, + "learning_rate": 2.952461498800388e-05, + "loss": 1.2926, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.5631442070007324, + "learning_rate": 2.8831165160184024e-05, + "loss": 1.297, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5859106779098511, + "learning_rate": 2.8145796381295276e-05, + "loss": 1.2996, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.5785754919052124, + "learning_rate": 2.7468516331229432e-05, + "loss": 1.3068, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.5460254549980164, + "learning_rate": 2.6799332599239974e-05, + "loss": 1.3154, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.5864307284355164, + "learning_rate": 2.6138252683857693e-05, + "loss": 1.3088, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.5521966814994812, + "learning_rate": 2.5485283992805615e-05, + "loss": 1.3078, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.5626932382583618, + "learning_rate": 2.4840433842916872e-05, + "loss": 1.3048, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.5465701818466187, + "learning_rate": 2.4203709460052292e-05, + "loss": 1.299, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.5713563561439514, + "learning_rate": 2.357511797901929e-05, + "loss": 1.3127, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.5865451693534851, + "learning_rate": 2.2954666443492505e-05, + "loss": 1.2992, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.645768404006958, + "learning_rate": 2.2342361805934297e-05, + "loss": 1.2961, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.5648680925369263, + "learning_rate": 2.173821092751721e-05, + "loss": 1.2872, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.576321542263031, + "learning_rate": 2.1142220578046712e-05, + "loss": 1.2964, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.5759575366973877, + "learning_rate": 2.0554397435885746e-05, + "loss": 1.3056, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.5627743601799011, + "learning_rate": 1.9974748087879636e-05, + "loss": 1.2991, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.5651407241821289, + "learning_rate": 1.9403279029282376e-05, + "loss": 1.2973, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.5628427863121033, + "learning_rate": 1.8839996663683635e-05, + "loss": 1.3161, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.5518158078193665, + "learning_rate": 1.8284907302937608e-05, + "loss": 1.3014, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.5657414197921753, + "learning_rate": 1.773801716709153e-05, + "loss": 1.3192, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.5501078963279724, + "learning_rate": 1.719933238431645e-05, + "loss": 1.3019, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.6224268078804016, + "learning_rate": 1.666885899083831e-05, + "loss": 1.3038, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5688881278038025, + "learning_rate": 1.614660293087056e-05, + "loss": 1.3016, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.5819045305252075, + "learning_rate": 1.5632570056547308e-05, + "loss": 1.3038, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.6100286841392517, + "learning_rate": 1.512676612785796e-05, + "loss": 1.3096, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.5607612729072571, + "learning_rate": 1.4629196812582513e-05, + "loss": 1.3074, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.5665546655654907, + "learning_rate": 1.4139867686228102e-05, + "loss": 1.2954, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.60845947265625, + "learning_rate": 1.3658784231966481e-05, + "loss": 1.2977, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.5687810778617859, + "learning_rate": 1.3185951840572723e-05, + "loss": 1.3038, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.572655975818634, + "learning_rate": 1.2721375810364616e-05, + "loss": 1.3011, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.5606327056884766, + "learning_rate": 1.2265061347143447e-05, + "loss": 1.302, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.5624728202819824, + "learning_rate": 1.1817013564135475e-05, + "loss": 1.3036, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.5751215815544128, + "learning_rate": 1.137723748193506e-05, + "loss": 1.3062, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5990484356880188, + "learning_rate": 1.0945738028447783e-05, + "loss": 1.3045, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.6358960270881653, + "learning_rate": 1.0522520038835831e-05, + "loss": 1.3002, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.5532749891281128, + "learning_rate": 1.0107588255463373e-05, + "loss": 1.3073, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.5553548336029053, + "learning_rate": 9.700947327843685e-06, + "loss": 1.2993, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.5885767340660095, + "learning_rate": 9.302601812586852e-06, + "loss": 1.298, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.5714589953422546, + "learning_rate": 8.912556173348907e-06, + "loss": 1.2978, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.5884906649589539, + "learning_rate": 8.53081478078177e-06, + "loss": 1.3192, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.5623753070831299, + "learning_rate": 8.157381912484053e-06, + "loss": 1.2929, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.5589457750320435, + "learning_rate": 7.792261752953333e-06, + "loss": 1.3054, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.539983868598938, + "learning_rate": 7.435458393539457e-06, + "loss": 1.304, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.5631902813911438, + "learning_rate": 7.086975832398146e-06, + "loss": 1.3062, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.5820996165275574, + "learning_rate": 6.746817974446706e-06, + "loss": 1.3072, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.5756253004074097, + "learning_rate": 6.414988631320062e-06, + "loss": 1.3093, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.5730090141296387, + "learning_rate": 6.091491521327958e-06, + "loss": 1.2881, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.5700660347938538, + "learning_rate": 5.776330269413488e-06, + "loss": 1.2993, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.558659553527832, + "learning_rate": 5.469508407112467e-06, + "loss": 1.3092, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.578532874584198, + "learning_rate": 5.171029372513458e-06, + "loss": 1.2949, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.5965441465377808, + "learning_rate": 4.880896510220056e-06, + "loss": 1.3113, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.5753445625305176, + "learning_rate": 4.5991130713124995e-06, + "loss": 1.301, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.5625138282775879, + "learning_rate": 4.325682213311782e-06, + "loss": 1.3088, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.5745208263397217, + "learning_rate": 4.060607000144351e-06, + "loss": 1.3059, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.5613098740577698, + "learning_rate": 3.803890402107213e-06, + "loss": 1.3035, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.5401063561439514, + "learning_rate": 3.555535295835216e-06, + "loss": 1.2982, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.6082156300544739, + "learning_rate": 3.3155444642687384e-06, + "loss": 1.3024, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.5557345151901245, + "learning_rate": 3.0839205966220474e-06, + "loss": 1.3003, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.5726261734962463, + "learning_rate": 2.8606662883539082e-06, + "loss": 1.2992, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.5471786856651306, + "learning_rate": 2.6457840411376888e-06, + "loss": 1.2929, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.5730890035629272, + "learning_rate": 2.4392762628338838e-06, + "loss": 1.2847, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.549816370010376, + "learning_rate": 2.2411452674630517e-06, + "loss": 1.2992, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.580393373966217, + "learning_rate": 2.0513932751796695e-06, + "loss": 1.2947, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.5714750289916992, + "learning_rate": 1.8700224122475683e-06, + "loss": 1.3065, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.5562458038330078, + "learning_rate": 1.6970347110157879e-06, + "loss": 1.2941, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.5664286017417908, + "learning_rate": 1.532432109895926e-06, + "loss": 1.3001, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.576139509677887, + "learning_rate": 1.376216453340573e-06, + "loss": 1.2997, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.5583192110061646, + "learning_rate": 1.2283894918224125e-06, + "loss": 1.3016, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.5727718472480774, + "learning_rate": 1.0889528818147366e-06, + "loss": 1.2963, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.5543742179870605, + "learning_rate": 9.579081857728766e-07, + "loss": 1.3079, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.5506760478019714, + "learning_rate": 8.352568721165521e-07, + "loss": 1.3039, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.5552864074707031, + "learning_rate": 7.210003152136324e-07, + "loss": 1.3141, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.5546798706054688, + "learning_rate": 6.151397953647331e-07, + "loss": 1.3102, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.5990240573883057, + "learning_rate": 5.176764987885607e-07, + "loss": 1.307, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.57259601354599, + "learning_rate": 4.2861151760900665e-07, + "loss": 1.2916, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.5830367207527161, + "learning_rate": 3.479458498426569e-07, + "loss": 1.296, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.5781258940696716, + "learning_rate": 2.756803993877177e-07, + "loss": 1.3099, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.5923910140991211, + "learning_rate": 2.1181597601385716e-07, + "loss": 1.2859, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.5599663853645325, + "learning_rate": 1.5635329535304554e-07, + "loss": 1.3031, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.5727519392967224, + "learning_rate": 1.0929297889172852e-07, + "loss": 1.3045, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.5572170615196228, + "learning_rate": 7.063555396383259e-08, + "loss": 1.3002, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.5752370953559875, + "learning_rate": 4.038145374460345e-08, + "loss": 1.309, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.5557172894477844, + "learning_rate": 1.8531017245942972e-08, + "loss": 1.2934, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.5520392656326294, + "learning_rate": 5.084489312745521e-09, + "loss": 1.3054, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.635014533996582, + "learning_rate": 4.202061990032924e-11, + "loss": 1.2965, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.832308198648013e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-mistral-cosine/checkpoint-9480/training_args.bin b/saves-mistral-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e0847cd4d114cbb9e88c673a5963084928b04a9 --- /dev/null +++ b/saves-mistral-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94525ae38b2a20ca9f7330ff13c1357dd18cf6ae9dec1d921a2d6b87c3352533 +size 5176 diff --git a/saves-mistral-cosine/config.json b/saves-mistral-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..27c3ad5440dbe5ee26df317f757bddbf9946d6e3 --- /dev/null +++ b/saves-mistral-cosine/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 131072, + "model_type": "mistral", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-mistral-cosine/generation_config.json b/saves-mistral-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-mistral-cosine/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-mistral-cosine/model.safetensors b/saves-mistral-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1619dff2170727887ccbc223ae41474a23873267 --- /dev/null +++ b/saves-mistral-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4f13e3e899449d568b0f4ac09529011492b1eae3c3e9acefb7f156d5b1a782f +size 8346712 diff --git a/saves-mistral-cosine/special_tokens_map.json b/saves-mistral-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-mistral-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-mistral-cosine/tokenizer.json b/saves-mistral-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-mistral-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-mistral-cosine/tokenizer_config.json b/saves-mistral-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-mistral-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-mistral/checkpoint-9480/config.json b/saves-mistral/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..27c3ad5440dbe5ee26df317f757bddbf9946d6e3 --- /dev/null +++ b/saves-mistral/checkpoint-9480/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 131072, + "model_type": "mistral", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-mistral/checkpoint-9480/generation_config.json b/saves-mistral/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-mistral/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-mistral/checkpoint-9480/model.safetensors b/saves-mistral/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa9e3c118f6037392d9cfa26a160c6d59ddd6159 --- /dev/null +++ b/saves-mistral/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2bb958b87ab0f1c22dc19ea247e5d65a6e3bd054f83b734e3dd9c6ddc7537b7 +size 8346712 diff --git a/saves-mistral/checkpoint-9480/optimizer.pt b/saves-mistral/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee509d3399290b699284df3ca7fa0bbf3f0134cf --- /dev/null +++ b/saves-mistral/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:790f8ee7c2f688f2e9d1da335a1c9cb44cb84f665a7dc0622bfed75ed0e4a347 +size 16706402 diff --git a/saves-mistral/checkpoint-9480/rng_state.pth b/saves-mistral/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-mistral/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-mistral/checkpoint-9480/scheduler.pt b/saves-mistral/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4e146fb9369424bca1e920276a86162b00d56fd --- /dev/null +++ b/saves-mistral/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c33e6451a8a4598628b3479890d40774857cdcb0d8604c19f1bee5bdefe1e2f9 +size 1064 diff --git a/saves-mistral/checkpoint-9480/special_tokens_map.json b/saves-mistral/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-mistral/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-mistral/checkpoint-9480/tokenizer.json b/saves-mistral/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-mistral/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-mistral/checkpoint-9480/tokenizer_config.json b/saves-mistral/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-mistral/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-mistral/checkpoint-9480/trainer_state.json b/saves-mistral/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3a894b044004ab0bac178f3346f24585623e6309 --- /dev/null +++ b/saves-mistral/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2691798210144043, + "learning_rate": 0.00015822784810126583, + "loss": 7.522, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.190048336982727, + "learning_rate": 0.00031645569620253165, + "loss": 6.9155, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8505806922912598, + "learning_rate": 0.00047468354430379745, + "loss": 6.2513, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.8368041515350342, + "learning_rate": 0.0006329113924050633, + "loss": 5.7561, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.820473849773407, + "learning_rate": 0.0007911392405063291, + "loss": 5.2893, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 1.0409610271453857, + "learning_rate": 0.0009493670886075949, + "loss": 4.7736, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.1511647701263428, + "learning_rate": 0.0011075949367088608, + "loss": 4.3722, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.1112607717514038, + "learning_rate": 0.0012658227848101266, + "loss": 4.1169, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.9545691609382629, + "learning_rate": 0.0014240506329113926, + "loss": 3.9197, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.5341774225234985, + "learning_rate": 0.0015, + "loss": 3.7698, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 1.0685570240020752, + "learning_rate": 0.0015, + "loss": 3.619, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.8480899333953857, + "learning_rate": 0.0015, + "loss": 3.5075, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.6460363864898682, + "learning_rate": 0.0015, + "loss": 3.3988, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.7429052591323853, + "learning_rate": 0.0015, + "loss": 3.3032, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.9308001399040222, + "learning_rate": 0.0015, + "loss": 3.2196, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.6899569630622864, + "learning_rate": 0.0015, + "loss": 3.1532, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.9509626626968384, + "learning_rate": 0.0015, + "loss": 3.0805, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.776450514793396, + "learning_rate": 0.0015, + "loss": 3.0375, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.7692133784294128, + "learning_rate": 0.0015, + "loss": 2.975, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.8211736679077148, + "learning_rate": 0.0015, + "loss": 2.9283, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.7810904383659363, + "learning_rate": 0.0015, + "loss": 2.889, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.8243034482002258, + "learning_rate": 0.0015, + "loss": 2.8562, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.768045961856842, + "learning_rate": 0.0015, + "loss": 2.8091, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.9104819297790527, + "learning_rate": 0.0015, + "loss": 2.7694, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 1.0683870315551758, + "learning_rate": 0.0015, + "loss": 2.7353, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.7473042607307434, + "learning_rate": 0.0015, + "loss": 2.7033, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.8680592775344849, + "learning_rate": 0.0015, + "loss": 2.6715, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.7758686542510986, + "learning_rate": 0.0015, + "loss": 2.6387, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 1.1664143800735474, + "learning_rate": 0.0015, + "loss": 2.613, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 1.0230486392974854, + "learning_rate": 0.0015, + "loss": 2.5867, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.7810431122779846, + "learning_rate": 0.0015, + "loss": 2.5615, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 1.0520604848861694, + "learning_rate": 0.0015, + "loss": 2.539, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.9819400310516357, + "learning_rate": 0.0015, + "loss": 2.5296, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 1.0482884645462036, + "learning_rate": 0.0015, + "loss": 2.5035, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.8537718653678894, + "learning_rate": 0.0015, + "loss": 2.4623, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.8536440134048462, + "learning_rate": 0.0015, + "loss": 2.4484, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.8123664259910583, + "learning_rate": 0.0015, + "loss": 2.4335, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.8474407196044922, + "learning_rate": 0.0015, + "loss": 2.4246, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.7529342174530029, + "learning_rate": 0.0015, + "loss": 2.3984, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.986809253692627, + "learning_rate": 0.0015, + "loss": 2.3729, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8170089721679688, + "learning_rate": 0.0015, + "loss": 2.3652, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.7728642821311951, + "learning_rate": 0.0015, + "loss": 2.3442, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.9778690934181213, + "learning_rate": 0.0015, + "loss": 2.3212, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.7992797493934631, + "learning_rate": 0.0015, + "loss": 2.3086, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.9398708343505859, + "learning_rate": 0.0015, + "loss": 2.3091, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.8730383515357971, + "learning_rate": 0.0015, + "loss": 2.2856, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.8130483627319336, + "learning_rate": 0.0015, + "loss": 2.267, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.8602993488311768, + "learning_rate": 0.0015, + "loss": 2.2583, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 1.0511897802352905, + "learning_rate": 0.0015, + "loss": 2.2402, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.9979124069213867, + "learning_rate": 0.0015, + "loss": 2.2346, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.9300376772880554, + "learning_rate": 0.0015, + "loss": 2.2198, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.8899880647659302, + "learning_rate": 0.0015, + "loss": 2.206, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.9637308716773987, + "learning_rate": 0.0015, + "loss": 2.1915, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.8068028688430786, + "learning_rate": 0.0015, + "loss": 2.1731, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.8182671666145325, + "learning_rate": 0.0015, + "loss": 2.1743, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.704035758972168, + "learning_rate": 0.0015, + "loss": 2.1413, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.8451551795005798, + "learning_rate": 0.0015, + "loss": 2.1429, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.9787982106208801, + "learning_rate": 0.0015, + "loss": 2.1469, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.784288763999939, + "learning_rate": 0.0015, + "loss": 2.1196, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.7017039656639099, + "learning_rate": 0.0015, + "loss": 2.0997, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.7353077530860901, + "learning_rate": 0.0015, + "loss": 2.1061, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.7414589524269104, + "learning_rate": 0.0015, + "loss": 2.0927, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.8267672061920166, + "learning_rate": 0.0015, + "loss": 2.0754, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.7181387543678284, + "learning_rate": 0.0015, + "loss": 2.0883, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 1.069088101387024, + "learning_rate": 0.0015, + "loss": 2.0758, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.9701215624809265, + "learning_rate": 0.0015, + "loss": 2.0672, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.7990618348121643, + "learning_rate": 0.0015, + "loss": 2.0503, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.8165965676307678, + "learning_rate": 0.0015, + "loss": 2.0366, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.7588340044021606, + "learning_rate": 0.0015, + "loss": 2.0473, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.7811934351921082, + "learning_rate": 0.0015, + "loss": 2.0292, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.8503795266151428, + "learning_rate": 0.0015, + "loss": 2.0208, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.1329801082611084, + "learning_rate": 0.0015, + "loss": 1.9997, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.95328688621521, + "learning_rate": 0.0015, + "loss": 1.9936, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.8979207277297974, + "learning_rate": 0.0015, + "loss": 2.0033, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.8836649656295776, + "learning_rate": 0.0015, + "loss": 1.988, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 1.0178972482681274, + "learning_rate": 0.0015, + "loss": 1.9851, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.8203606009483337, + "learning_rate": 0.0015, + "loss": 1.9739, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.776364803314209, + "learning_rate": 0.0015, + "loss": 1.9693, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.7987720966339111, + "learning_rate": 0.0015, + "loss": 1.9736, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.7319262027740479, + "learning_rate": 0.0015, + "loss": 1.9482, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.8773535490036011, + "learning_rate": 0.0015, + "loss": 1.9367, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.7392305731773376, + "learning_rate": 0.0015, + "loss": 1.9461, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 1.023331642150879, + "learning_rate": 0.0015, + "loss": 1.9494, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.9687519669532776, + "learning_rate": 0.0015, + "loss": 1.9237, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.9138364195823669, + "learning_rate": 0.0015, + "loss": 1.9254, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.7576828598976135, + "learning_rate": 0.0015, + "loss": 1.9254, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.7307135462760925, + "learning_rate": 0.0015, + "loss": 1.9204, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.738254189491272, + "learning_rate": 0.0015, + "loss": 1.9153, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.9347200989723206, + "learning_rate": 0.0015, + "loss": 1.9064, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 1.0709482431411743, + "learning_rate": 0.0015, + "loss": 1.9023, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.7532578706741333, + "learning_rate": 0.0015, + "loss": 1.9069, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.7279590964317322, + "learning_rate": 0.0015, + "loss": 1.8944, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.7947350144386292, + "learning_rate": 0.0015, + "loss": 1.8804, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.9116294384002686, + "learning_rate": 0.0015, + "loss": 1.8878, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.9219313263893127, + "learning_rate": 0.0015, + "loss": 1.8726, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.7465686798095703, + "learning_rate": 0.0015, + "loss": 1.8755, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.8785697817802429, + "learning_rate": 0.0015, + "loss": 1.8777, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.7366559505462646, + "learning_rate": 0.0015, + "loss": 1.859, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.7610734701156616, + "learning_rate": 0.0015, + "loss": 1.8594, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 1.1366569995880127, + "learning_rate": 0.0015, + "loss": 1.8687, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.7980130910873413, + "learning_rate": 0.0015, + "loss": 1.8575, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.887115478515625, + "learning_rate": 0.0015, + "loss": 1.8452, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.8321040868759155, + "learning_rate": 0.0015, + "loss": 1.8486, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.750380277633667, + "learning_rate": 0.0015, + "loss": 1.838, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.9794703125953674, + "learning_rate": 0.0015, + "loss": 1.8429, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.8354634642601013, + "learning_rate": 0.0015, + "loss": 1.8325, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.6737495064735413, + "learning_rate": 0.0015, + "loss": 1.8227, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.9093047380447388, + "learning_rate": 0.0015, + "loss": 1.8232, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 1.0107520818710327, + "learning_rate": 0.0015, + "loss": 1.829, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.848486602306366, + "learning_rate": 0.0015, + "loss": 1.8167, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.7347695231437683, + "learning_rate": 0.0015, + "loss": 1.8224, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.9195750951766968, + "learning_rate": 0.0015, + "loss": 1.8149, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.7962725758552551, + "learning_rate": 0.0015, + "loss": 1.8007, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.8207571506500244, + "learning_rate": 0.0015, + "loss": 1.8139, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.2019542455673218, + "learning_rate": 0.0015, + "loss": 1.7987, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.7293469309806824, + "learning_rate": 0.0015, + "loss": 1.8107, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.9432297348976135, + "learning_rate": 0.0015, + "loss": 1.7945, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.7298336625099182, + "learning_rate": 0.0015, + "loss": 1.7808, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.6769946217536926, + "learning_rate": 0.0015, + "loss": 1.7828, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.7276390194892883, + "learning_rate": 0.0015, + "loss": 1.7998, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.6962103247642517, + "learning_rate": 0.0015, + "loss": 1.789, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6777324080467224, + "learning_rate": 0.0015, + "loss": 1.7748, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.6568731665611267, + "learning_rate": 0.0015, + "loss": 1.7628, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.7241616249084473, + "learning_rate": 0.0015, + "loss": 1.7801, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.7091214656829834, + "learning_rate": 0.0015, + "loss": 1.7821, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.9228652715682983, + "learning_rate": 0.0015, + "loss": 1.7668, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.7739994525909424, + "learning_rate": 0.0015, + "loss": 1.7643, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.7284632325172424, + "learning_rate": 0.0015, + "loss": 1.7771, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.9050618410110474, + "learning_rate": 0.0015, + "loss": 1.7646, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 1.594030499458313, + "learning_rate": 0.0015, + "loss": 1.7542, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 1.271827220916748, + "learning_rate": 0.0015, + "loss": 1.7756, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 1.489267110824585, + "learning_rate": 0.0015, + "loss": 1.7589, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.9819322824478149, + "learning_rate": 0.0015, + "loss": 1.7434, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.6417033076286316, + "learning_rate": 0.0015, + "loss": 1.7429, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.7139493823051453, + "learning_rate": 0.0015, + "loss": 1.7507, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.7161443829536438, + "learning_rate": 0.0015, + "loss": 1.7446, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.7190232276916504, + "learning_rate": 0.0015, + "loss": 1.7409, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.6767439842224121, + "learning_rate": 0.0015, + "loss": 1.7308, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.8346786499023438, + "learning_rate": 0.0015, + "loss": 1.7453, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.6780781149864197, + "learning_rate": 0.0015, + "loss": 1.7296, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6594715118408203, + "learning_rate": 0.0015, + "loss": 1.7307, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.740116536617279, + "learning_rate": 0.0015, + "loss": 1.737, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.7160284519195557, + "learning_rate": 0.0015, + "loss": 1.7317, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.7835481762886047, + "learning_rate": 0.0015, + "loss": 1.7289, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.7016604542732239, + "learning_rate": 0.0015, + "loss": 1.723, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.707410991191864, + "learning_rate": 0.0015, + "loss": 1.721, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 1.1425987482070923, + "learning_rate": 0.0015, + "loss": 1.72, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.624231219291687, + "learning_rate": 0.0015, + "loss": 1.7235, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.8226703405380249, + "learning_rate": 0.0015, + "loss": 1.7204, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.6356648802757263, + "learning_rate": 0.0015, + "loss": 1.7213, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.7041848301887512, + "learning_rate": 0.0015, + "loss": 1.7161, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.6434488892555237, + "learning_rate": 0.0015, + "loss": 1.7198, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.6322984099388123, + "learning_rate": 0.0015, + "loss": 1.7017, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6440200805664062, + "learning_rate": 0.0015, + "loss": 1.6916, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.7490743398666382, + "learning_rate": 0.0015, + "loss": 1.6954, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.7250597476959229, + "learning_rate": 0.0015, + "loss": 1.7001, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.640522301197052, + "learning_rate": 0.0015, + "loss": 1.6894, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.719833493232727, + "learning_rate": 0.0015, + "loss": 1.7001, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 1.0009751319885254, + "learning_rate": 0.0015, + "loss": 1.6927, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.7644470930099487, + "learning_rate": 0.0015, + "loss": 1.701, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.7390194535255432, + "learning_rate": 0.0015, + "loss": 1.6982, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.639541506767273, + "learning_rate": 0.0015, + "loss": 1.6695, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.7821475267410278, + "learning_rate": 0.0015, + "loss": 1.6799, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.7226126790046692, + "learning_rate": 0.0015, + "loss": 1.6847, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.93052738904953, + "learning_rate": 0.0015, + "loss": 1.6786, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.7509690523147583, + "learning_rate": 0.0015, + "loss": 1.6768, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.7351312041282654, + "learning_rate": 0.0015, + "loss": 1.6843, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.383955717086792, + "learning_rate": 0.0015, + "loss": 1.673, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.7214679718017578, + "learning_rate": 0.0015, + "loss": 1.6649, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.6363106369972229, + "learning_rate": 0.0015, + "loss": 1.6816, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.7506694793701172, + "learning_rate": 0.0015, + "loss": 1.6629, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.6698363423347473, + "learning_rate": 0.0015, + "loss": 1.6614, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.8435531854629517, + "learning_rate": 0.0015, + "loss": 1.6663, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.7420846819877625, + "learning_rate": 0.0015, + "loss": 1.6672, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.6367708444595337, + "learning_rate": 0.0015, + "loss": 1.6847, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.7232658267021179, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.8593874573707581, + "learning_rate": 0.0015, + "loss": 1.6674, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.6410152912139893, + "learning_rate": 0.0015, + "loss": 1.6697, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 1.2279235124588013, + "learning_rate": 0.0015, + "loss": 1.6695, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 1.0295931100845337, + "learning_rate": 0.0015, + "loss": 1.6695, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.6923174262046814, + "learning_rate": 0.0015, + "loss": 1.6462, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.7438858151435852, + "learning_rate": 0.0015, + "loss": 1.6477, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.7807824611663818, + "learning_rate": 0.0015, + "loss": 1.6483, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.7299506068229675, + "learning_rate": 0.0015, + "loss": 1.6473, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.857148289680481, + "learning_rate": 0.0015, + "loss": 1.6516, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.7330800294876099, + "learning_rate": 0.0015, + "loss": 1.6532, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.6870981454849243, + "learning_rate": 0.0015, + "loss": 1.6546, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.7019320130348206, + "learning_rate": 0.0015, + "loss": 1.6396, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.696793794631958, + "learning_rate": 0.0015, + "loss": 1.6491, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.9838576912879944, + "learning_rate": 0.0015, + "loss": 1.6449, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 1.0997356176376343, + "learning_rate": 0.0015, + "loss": 1.6475, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.6624078750610352, + "learning_rate": 0.0015, + "loss": 1.6404, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 1.037745714187622, + "learning_rate": 0.0015, + "loss": 1.6412, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.6833543181419373, + "learning_rate": 0.0015, + "loss": 1.6325, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.6265554428100586, + "learning_rate": 0.0015, + "loss": 1.6297, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.7579610347747803, + "learning_rate": 0.0015, + "loss": 1.6345, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.5994946956634521, + "learning_rate": 0.0015, + "loss": 1.6464, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.8627594113349915, + "learning_rate": 0.0015, + "loss": 1.6257, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.6660335659980774, + "learning_rate": 0.0015, + "loss": 1.6272, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.6796160936355591, + "learning_rate": 0.0015, + "loss": 1.624, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.9388406872749329, + "learning_rate": 0.0015, + "loss": 1.6321, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 1.0387760400772095, + "learning_rate": 0.0015, + "loss": 1.6426, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.6553621888160706, + "learning_rate": 0.0015, + "loss": 1.627, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.6387179493904114, + "learning_rate": 0.0015, + "loss": 1.6276, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.7289672493934631, + "learning_rate": 0.0015, + "loss": 1.6261, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.6390879154205322, + "learning_rate": 0.0015, + "loss": 1.6166, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.884262204170227, + "learning_rate": 0.0015, + "loss": 1.6222, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.7829119563102722, + "learning_rate": 0.0015, + "loss": 1.6234, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.6269692182540894, + "learning_rate": 0.0015, + "loss": 1.613, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.6027466654777527, + "learning_rate": 0.0015, + "loss": 1.6198, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 1.0918283462524414, + "learning_rate": 0.0015, + "loss": 1.6171, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6074740290641785, + "learning_rate": 0.0015, + "loss": 1.6093, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.7111483812332153, + "learning_rate": 0.0015, + "loss": 1.6141, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6221132278442383, + "learning_rate": 0.0015, + "loss": 1.6089, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.9400107264518738, + "learning_rate": 0.0015, + "loss": 1.6158, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.103214144706726, + "learning_rate": 0.0015, + "loss": 1.6291, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.7383182644844055, + "learning_rate": 0.0015, + "loss": 1.6138, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.713843584060669, + "learning_rate": 0.0015, + "loss": 1.5957, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 1.0974149703979492, + "learning_rate": 0.0015, + "loss": 1.6112, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.723660945892334, + "learning_rate": 0.0015, + "loss": 1.6146, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.782029390335083, + "learning_rate": 0.0015, + "loss": 1.5973, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.6948687434196472, + "learning_rate": 0.0015, + "loss": 1.6127, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 1.1345460414886475, + "learning_rate": 0.0015, + "loss": 1.6054, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 1.0687614679336548, + "learning_rate": 0.0015, + "loss": 1.6031, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.6718165278434753, + "learning_rate": 0.0015, + "loss": 1.5958, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.6679635047912598, + "learning_rate": 0.0015, + "loss": 1.5853, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.8055697679519653, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.9656091332435608, + "learning_rate": 0.0015, + "loss": 1.6072, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.9608715772628784, + "learning_rate": 0.0015, + "loss": 1.5968, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.8238662481307983, + "learning_rate": 0.0015, + "loss": 1.5948, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.7834868431091309, + "learning_rate": 0.0015, + "loss": 1.5952, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.6714757680892944, + "learning_rate": 0.0015, + "loss": 1.6039, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.6760356426239014, + "learning_rate": 0.0015, + "loss": 1.5854, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.7482396960258484, + "learning_rate": 0.0015, + "loss": 1.5804, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.8608502745628357, + "learning_rate": 0.0015, + "loss": 1.5825, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.6435486674308777, + "learning_rate": 0.0015, + "loss": 1.5942, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.5963234901428223, + "learning_rate": 0.0015, + "loss": 1.5931, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.6503880023956299, + "learning_rate": 0.0015, + "loss": 1.5901, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.6955623030662537, + "learning_rate": 0.0015, + "loss": 1.5884, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6004531383514404, + "learning_rate": 0.0015, + "loss": 1.5883, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.607153594493866, + "learning_rate": 0.0015, + "loss": 1.5892, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.9378383755683899, + "learning_rate": 0.0015, + "loss": 1.5851, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.7985408902168274, + "learning_rate": 0.0015, + "loss": 1.583, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.8261814117431641, + "learning_rate": 0.0015, + "loss": 1.5893, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6244887709617615, + "learning_rate": 0.0015, + "loss": 1.575, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.6061866283416748, + "learning_rate": 0.0015, + "loss": 1.5909, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.9484696388244629, + "learning_rate": 0.0015, + "loss": 1.5829, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.7839576601982117, + "learning_rate": 0.0015, + "loss": 1.5884, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.6155287623405457, + "learning_rate": 0.0015, + "loss": 1.573, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.7642000317573547, + "learning_rate": 0.0015, + "loss": 1.5784, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.6023858785629272, + "learning_rate": 0.0015, + "loss": 1.5767, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.725964367389679, + "learning_rate": 0.0015, + "loss": 1.5816, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.7024165987968445, + "learning_rate": 0.0015, + "loss": 1.5737, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.7889173626899719, + "learning_rate": 0.0015, + "loss": 1.5793, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.6713038086891174, + "learning_rate": 0.0015, + "loss": 1.5748, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.6811700463294983, + "learning_rate": 0.0015, + "loss": 1.5819, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.6109973192214966, + "learning_rate": 0.0015, + "loss": 1.5745, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.5974403619766235, + "learning_rate": 0.0015, + "loss": 1.5716, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.7356406450271606, + "learning_rate": 0.0015, + "loss": 1.582, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.6180926561355591, + "learning_rate": 0.0015, + "loss": 1.5783, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.5976831316947937, + "learning_rate": 0.0015, + "loss": 1.5754, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.6535393595695496, + "learning_rate": 0.0015, + "loss": 1.5743, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.7285734415054321, + "learning_rate": 0.0015, + "loss": 1.5647, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.6646901965141296, + "learning_rate": 0.0015, + "loss": 1.5788, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.7660537958145142, + "learning_rate": 0.0015, + "loss": 1.5706, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.8967321515083313, + "learning_rate": 0.0015, + "loss": 1.5631, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.6177824139595032, + "learning_rate": 0.0015, + "loss": 1.5653, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.6162772178649902, + "learning_rate": 0.0015, + "loss": 1.5656, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.7549130320549011, + "learning_rate": 0.0015, + "loss": 1.5648, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.6640269756317139, + "learning_rate": 0.0015, + "loss": 1.5581, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.7048502564430237, + "learning_rate": 0.0015, + "loss": 1.5683, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.615058958530426, + "learning_rate": 0.0015, + "loss": 1.5679, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.5851198434829712, + "learning_rate": 0.0015, + "loss": 1.546, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 1.017960548400879, + "learning_rate": 0.0015, + "loss": 1.5574, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.6042937636375427, + "learning_rate": 0.0015, + "loss": 1.5665, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.926185667514801, + "learning_rate": 0.0015, + "loss": 1.5707, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.6197679042816162, + "learning_rate": 0.0015, + "loss": 1.5672, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.6802936792373657, + "learning_rate": 0.0015, + "loss": 1.564, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.7331050038337708, + "learning_rate": 0.0015, + "loss": 1.5602, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.721390962600708, + "learning_rate": 0.0015, + "loss": 1.5589, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.6204345226287842, + "learning_rate": 0.0015, + "loss": 1.5564, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.6381465196609497, + "learning_rate": 0.0015, + "loss": 1.5651, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 1.0429388284683228, + "learning_rate": 0.0015, + "loss": 1.5408, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.8542698621749878, + "learning_rate": 0.0015, + "loss": 1.5455, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.6300585865974426, + "learning_rate": 0.0015, + "loss": 1.5481, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.6176901459693909, + "learning_rate": 0.0015, + "loss": 1.5525, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.9087796807289124, + "learning_rate": 0.0015, + "loss": 1.5546, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.7421715259552002, + "learning_rate": 0.0015, + "loss": 1.5441, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.6909385919570923, + "learning_rate": 0.0015, + "loss": 1.547, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.7861342430114746, + "learning_rate": 0.0015, + "loss": 1.5522, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.9386938214302063, + "learning_rate": 0.0015, + "loss": 1.5546, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.7081777453422546, + "learning_rate": 0.0015, + "loss": 1.5461, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.6430122256278992, + "learning_rate": 0.0015, + "loss": 1.5435, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.8484134078025818, + "learning_rate": 0.0015, + "loss": 1.5533, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.8623349666595459, + "learning_rate": 0.0015, + "loss": 1.5589, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.9112716317176819, + "learning_rate": 0.0015, + "loss": 1.55, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.8495725989341736, + "learning_rate": 0.0015, + "loss": 1.5305, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.5744346976280212, + "learning_rate": 0.0015, + "loss": 1.5354, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.6701624393463135, + "learning_rate": 0.0015, + "loss": 1.55, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.6425880789756775, + "learning_rate": 0.0015, + "loss": 1.533, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.6229080557823181, + "learning_rate": 0.0015, + "loss": 1.5425, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.755316972732544, + "learning_rate": 0.0015, + "loss": 1.5447, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6249800324440002, + "learning_rate": 0.0015, + "loss": 1.5311, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.7024720311164856, + "learning_rate": 0.0015, + "loss": 1.5398, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.6813156604766846, + "learning_rate": 0.0015, + "loss": 1.5375, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.6011126637458801, + "learning_rate": 0.0015, + "loss": 1.5471, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5836905837059021, + "learning_rate": 0.0015, + "loss": 1.536, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.7479152679443359, + "learning_rate": 0.0015, + "loss": 1.5288, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.5793021321296692, + "learning_rate": 0.0015, + "loss": 1.5433, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.6929194331169128, + "learning_rate": 0.0015, + "loss": 1.5294, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.6408833265304565, + "learning_rate": 0.0015, + "loss": 1.5349, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.721777617931366, + "learning_rate": 0.0015, + "loss": 1.5372, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6097099781036377, + "learning_rate": 0.0015, + "loss": 1.5493, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.7727136611938477, + "learning_rate": 0.0015, + "loss": 1.5257, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.5993808507919312, + "learning_rate": 0.0015, + "loss": 1.5368, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.6082150340080261, + "learning_rate": 0.0015, + "loss": 1.5364, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.647101640701294, + "learning_rate": 0.0015, + "loss": 1.5358, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.6560900211334229, + "learning_rate": 0.0015, + "loss": 1.532, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.715570867061615, + "learning_rate": 0.0015, + "loss": 1.5334, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.7903317809104919, + "learning_rate": 0.0015, + "loss": 1.5259, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.6264176964759827, + "learning_rate": 0.0015, + "loss": 1.5385, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.6412289142608643, + "learning_rate": 0.0015, + "loss": 1.5304, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.8620365262031555, + "learning_rate": 0.0015, + "loss": 1.525, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.6095845103263855, + "learning_rate": 0.0015, + "loss": 1.5059, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.6399675607681274, + "learning_rate": 0.0015, + "loss": 1.5331, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5802530646324158, + "learning_rate": 0.0015, + "loss": 1.5188, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.591494083404541, + "learning_rate": 0.0015, + "loss": 1.5383, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.665297269821167, + "learning_rate": 0.0015, + "loss": 1.5373, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 1.0794377326965332, + "learning_rate": 0.0015, + "loss": 1.5198, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.6363126039505005, + "learning_rate": 0.0015, + "loss": 1.5279, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.612177312374115, + "learning_rate": 0.0015, + "loss": 1.5271, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.748126745223999, + "learning_rate": 0.0015, + "loss": 1.5367, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.5856114029884338, + "learning_rate": 0.0015, + "loss": 1.522, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.6275367140769958, + "learning_rate": 0.0015, + "loss": 1.5244, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.6555677056312561, + "learning_rate": 0.0015, + "loss": 1.5165, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.6494587063789368, + "learning_rate": 0.0015, + "loss": 1.5181, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.806426465511322, + "learning_rate": 0.0015, + "loss": 1.5264, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.6121223568916321, + "learning_rate": 0.0015, + "loss": 1.5209, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.7112828493118286, + "learning_rate": 0.0015, + "loss": 1.5158, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.5737324953079224, + "learning_rate": 0.0015, + "loss": 1.5272, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6188103556632996, + "learning_rate": 0.0015, + "loss": 1.5238, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.7734886407852173, + "learning_rate": 0.0015, + "loss": 1.5043, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.6218626499176025, + "learning_rate": 0.0015, + "loss": 1.5183, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.6316360235214233, + "learning_rate": 0.0015, + "loss": 1.5152, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.5388330221176147, + "learning_rate": 0.0015, + "loss": 1.5151, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.6421667337417603, + "learning_rate": 0.0015, + "loss": 1.5065, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.8086821436882019, + "learning_rate": 0.0015, + "loss": 1.5244, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.5916324853897095, + "learning_rate": 0.0015, + "loss": 1.5269, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.5721726417541504, + "learning_rate": 0.0015, + "loss": 1.5208, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.6366936564445496, + "learning_rate": 0.0015, + "loss": 1.5009, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.7353329062461853, + "learning_rate": 0.0015, + "loss": 1.5208, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.6907286643981934, + "learning_rate": 0.0015, + "loss": 1.5211, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.6311991214752197, + "learning_rate": 0.0015, + "loss": 1.5299, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.7826913595199585, + "learning_rate": 0.0015, + "loss": 1.5213, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.7268832325935364, + "learning_rate": 0.0015, + "loss": 1.5108, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.6679678559303284, + "learning_rate": 0.0015, + "loss": 1.5111, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.9584214091300964, + "learning_rate": 0.0015, + "loss": 1.5064, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.5330318212509155, + "learning_rate": 0.0015, + "loss": 1.5075, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.6112387180328369, + "learning_rate": 0.0015, + "loss": 1.5196, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.55507493019104, + "learning_rate": 0.0015, + "loss": 1.5052, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.576727569103241, + "learning_rate": 0.0015, + "loss": 1.5055, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.7007277607917786, + "learning_rate": 0.0015, + "loss": 1.507, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.6003903746604919, + "learning_rate": 0.0015, + "loss": 1.5091, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.7698028087615967, + "learning_rate": 0.0015, + "loss": 1.5063, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.6928577423095703, + "learning_rate": 0.0015, + "loss": 1.5187, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.8100811243057251, + "learning_rate": 0.0015, + "loss": 1.5111, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.7094262838363647, + "learning_rate": 0.0015, + "loss": 1.5058, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.681793212890625, + "learning_rate": 0.0015, + "loss": 1.5039, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.6533880233764648, + "learning_rate": 0.0015, + "loss": 1.5156, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6184443831443787, + "learning_rate": 0.0015, + "loss": 1.5196, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.6186136603355408, + "learning_rate": 0.0015, + "loss": 1.5064, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.6013845205307007, + "learning_rate": 0.0015, + "loss": 1.4854, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.7819629311561584, + "learning_rate": 0.0015, + "loss": 1.5063, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.5571489930152893, + "learning_rate": 0.0015, + "loss": 1.5021, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.7706273794174194, + "learning_rate": 0.0015, + "loss": 1.5108, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 1.0314881801605225, + "learning_rate": 0.0015, + "loss": 1.4991, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.6681764125823975, + "learning_rate": 0.0015, + "loss": 1.5114, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5619609951972961, + "learning_rate": 0.0015, + "loss": 1.5023, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.7922562956809998, + "learning_rate": 0.0015, + "loss": 1.5028, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.5868403315544128, + "learning_rate": 0.0015, + "loss": 1.5112, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.5683006644248962, + "learning_rate": 0.0015, + "loss": 1.4971, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.629831850528717, + "learning_rate": 0.0015, + "loss": 1.4916, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.5975214242935181, + "learning_rate": 0.0015, + "loss": 1.4943, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.6330074071884155, + "learning_rate": 0.0015, + "loss": 1.5121, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.6558031439781189, + "learning_rate": 0.0015, + "loss": 1.4951, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.5945179462432861, + "learning_rate": 0.0015, + "loss": 1.5067, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.5857622623443604, + "learning_rate": 0.0015, + "loss": 1.4945, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.6149414777755737, + "learning_rate": 0.0015, + "loss": 1.4876, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.631554901599884, + "learning_rate": 0.0015, + "loss": 1.486, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.9400520920753479, + "learning_rate": 0.0015, + "loss": 1.4882, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.7286548614501953, + "learning_rate": 0.0015, + "loss": 1.4935, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.5828094482421875, + "learning_rate": 0.0015, + "loss": 1.4896, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.6287121772766113, + "learning_rate": 0.0015, + "loss": 1.4948, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.7357299327850342, + "learning_rate": 0.0015, + "loss": 1.4903, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.5453791618347168, + "learning_rate": 0.0015, + "loss": 1.4958, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.5658535361289978, + "learning_rate": 0.0015, + "loss": 1.4963, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.5815938115119934, + "learning_rate": 0.0015, + "loss": 1.4849, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.5606580376625061, + "learning_rate": 0.0015, + "loss": 1.496, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.585895836353302, + "learning_rate": 0.0015, + "loss": 1.4854, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.5472810864448547, + "learning_rate": 0.0015, + "loss": 1.503, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.6179347038269043, + "learning_rate": 0.0015, + "loss": 1.4866, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.6303660273551941, + "learning_rate": 0.0015, + "loss": 1.491, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.707388162612915, + "learning_rate": 0.0015, + "loss": 1.4812, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.6531679630279541, + "learning_rate": 0.0015, + "loss": 1.4921, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5899868011474609, + "learning_rate": 0.0015, + "loss": 1.4932, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.8273098468780518, + "learning_rate": 0.0015, + "loss": 1.4965, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.7142001986503601, + "learning_rate": 0.0015, + "loss": 1.4916, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.6741710305213928, + "learning_rate": 0.0015, + "loss": 1.501, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.8062102198600769, + "learning_rate": 0.0015, + "loss": 1.4922, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.7213407158851624, + "learning_rate": 0.0015, + "loss": 1.489, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.6004040241241455, + "learning_rate": 0.0015, + "loss": 1.4825, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.921000063419342, + "learning_rate": 0.0015, + "loss": 1.4895, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.759276270866394, + "learning_rate": 0.0015, + "loss": 1.4822, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.5667396187782288, + "learning_rate": 0.0015, + "loss": 1.4836, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.5702187418937683, + "learning_rate": 0.0015, + "loss": 1.4897, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.5534017086029053, + "learning_rate": 0.0015, + "loss": 1.4823, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.6381139755249023, + "learning_rate": 0.0015, + "loss": 1.4792, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.6257467865943909, + "learning_rate": 0.0015, + "loss": 1.4813, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.6904028058052063, + "learning_rate": 0.0015, + "loss": 1.4856, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.7427784204483032, + "learning_rate": 0.0015, + "loss": 1.4891, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.5929211378097534, + "learning_rate": 0.0015, + "loss": 1.4776, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.6614937782287598, + "learning_rate": 0.0015, + "loss": 1.4844, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.8259798884391785, + "learning_rate": 0.0015, + "loss": 1.4816, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.6505470871925354, + "learning_rate": 0.0015, + "loss": 1.4803, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.5709390640258789, + "learning_rate": 0.0015, + "loss": 1.484, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.651495635509491, + "learning_rate": 0.0015, + "loss": 1.4919, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.6256493926048279, + "learning_rate": 0.0015, + "loss": 1.4789, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.7242328524589539, + "learning_rate": 0.0015, + "loss": 1.475, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.7946824431419373, + "learning_rate": 0.0015, + "loss": 1.4838, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.7557520270347595, + "learning_rate": 0.0015, + "loss": 1.4837, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.6960965394973755, + "learning_rate": 0.0015, + "loss": 1.4838, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.9885152578353882, + "learning_rate": 0.0015, + "loss": 1.476, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.6563693881034851, + "learning_rate": 0.0015, + "loss": 1.4868, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.8019644021987915, + "learning_rate": 0.0015, + "loss": 1.4758, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.6949400901794434, + "learning_rate": 0.0015, + "loss": 1.4792, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.6423824429512024, + "learning_rate": 0.0015, + "loss": 1.4921, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.6390447616577148, + "learning_rate": 0.0015, + "loss": 1.4679, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.5630648732185364, + "learning_rate": 0.0015, + "loss": 1.4748, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.669380784034729, + "learning_rate": 0.0015, + "loss": 1.472, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.7069663405418396, + "learning_rate": 0.0015, + "loss": 1.4778, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.6067928075790405, + "learning_rate": 0.0015, + "loss": 1.494, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.6364792585372925, + "learning_rate": 0.0015, + "loss": 1.4749, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.6240522861480713, + "learning_rate": 0.0015, + "loss": 1.4811, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.6970255374908447, + "learning_rate": 0.0015, + "loss": 1.4754, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.5970498919487, + "learning_rate": 0.0015, + "loss": 1.4823, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.567321240901947, + "learning_rate": 0.0015, + "loss": 1.4697, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.6129364371299744, + "learning_rate": 0.0015, + "loss": 1.4778, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.6318622827529907, + "learning_rate": 0.0015, + "loss": 1.4832, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.6397720575332642, + "learning_rate": 0.0015, + "loss": 1.4717, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.7977009415626526, + "learning_rate": 0.0015, + "loss": 1.4705, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.5972973108291626, + "learning_rate": 0.0015, + "loss": 1.4744, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.5378081202507019, + "learning_rate": 0.0015, + "loss": 1.4592, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.8785254955291748, + "learning_rate": 0.0015, + "loss": 1.4799, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.5821215510368347, + "learning_rate": 0.0015, + "loss": 1.4795, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.7573177814483643, + "learning_rate": 0.0015, + "loss": 1.4753, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.5414453148841858, + "learning_rate": 0.0015, + "loss": 1.4654, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.5165130496025085, + "learning_rate": 0.0015, + "loss": 1.4481, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.7440067529678345, + "learning_rate": 0.0015, + "loss": 1.4733, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.7288051247596741, + "learning_rate": 0.0015, + "loss": 1.4609, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.6948131918907166, + "learning_rate": 0.0015, + "loss": 1.4802, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": Infinity, + "learning_rate": 0.0015, + "loss": 1.4841, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 1.2940268516540527, + "learning_rate": 0.0015, + "loss": 1.4655, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6071725487709045, + "learning_rate": 0.0015, + "loss": 1.4767, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.5999435782432556, + "learning_rate": 0.0015, + "loss": 1.472, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.5505561232566833, + "learning_rate": 0.0015, + "loss": 1.4758, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.6503564715385437, + "learning_rate": 0.0015, + "loss": 1.464, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.7435416579246521, + "learning_rate": 0.0015, + "loss": 1.465, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5838521122932434, + "learning_rate": 0.0015, + "loss": 1.4636, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.5816810131072998, + "learning_rate": 0.0015, + "loss": 1.4578, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.5804526209831238, + "learning_rate": 0.0015, + "loss": 1.466, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.6030544638633728, + "learning_rate": 0.0015, + "loss": 1.4703, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.6989827752113342, + "learning_rate": 0.0015, + "loss": 1.4654, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.5915131568908691, + "learning_rate": 0.0015, + "loss": 1.4624, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.6471115946769714, + "learning_rate": 0.0015, + "loss": 1.4624, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.6063437461853027, + "learning_rate": 0.0015, + "loss": 1.4808, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.518700361251831, + "learning_rate": 0.0015, + "loss": 1.4661, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.543921947479248, + "learning_rate": 0.0015, + "loss": 1.4641, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.5837245583534241, + "learning_rate": 0.0015, + "loss": 1.4732, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.760454535484314, + "learning_rate": 0.0015, + "loss": 1.4582, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.6135420799255371, + "learning_rate": 0.0015, + "loss": 1.4664, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.571808397769928, + "learning_rate": 0.0015, + "loss": 1.4618, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.6032215356826782, + "learning_rate": 0.0015, + "loss": 1.4817, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.6733672618865967, + "learning_rate": 0.0015, + "loss": 1.4631, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.6539626121520996, + "learning_rate": 0.0015, + "loss": 1.458, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.5375906825065613, + "learning_rate": 0.0015, + "loss": 1.4696, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.6148267984390259, + "learning_rate": 0.0015, + "loss": 1.4584, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.6515779495239258, + "learning_rate": 0.0015, + "loss": 1.4512, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.593586802482605, + "learning_rate": 0.0015, + "loss": 1.4719, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.5916845202445984, + "learning_rate": 0.0015, + "loss": 1.4612, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.5250903367996216, + "learning_rate": 0.0015, + "loss": 1.4677, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6175042986869812, + "learning_rate": 0.0015, + "loss": 1.4622, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.6020768880844116, + "learning_rate": 0.0015, + "loss": 1.4574, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.6732794046401978, + "learning_rate": 0.0015, + "loss": 1.466, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.7224721908569336, + "learning_rate": 0.0015, + "loss": 1.4652, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.8541702032089233, + "learning_rate": 0.0015, + "loss": 1.4727, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.730940580368042, + "learning_rate": 0.0015, + "loss": 1.4516, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.5959996581077576, + "learning_rate": 0.0015, + "loss": 1.4595, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.5917717218399048, + "learning_rate": 0.0015, + "loss": 1.4548, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.5279277563095093, + "learning_rate": 0.0015, + "loss": 1.4564, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.639276385307312, + "learning_rate": 0.0015, + "loss": 1.4688, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.6213117241859436, + "learning_rate": 0.0015, + "loss": 1.4593, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5400778651237488, + "learning_rate": 0.0015, + "loss": 1.454, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.5232504606246948, + "learning_rate": 0.0015, + "loss": 1.4387, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5442522764205933, + "learning_rate": 0.0015, + "loss": 1.4666, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.6611676812171936, + "learning_rate": 0.0015, + "loss": 1.4558, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.535550057888031, + "learning_rate": 0.0015, + "loss": 1.4664, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.8105790019035339, + "learning_rate": 0.0015, + "loss": 1.4542, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.5619795322418213, + "learning_rate": 0.0015, + "loss": 1.4706, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.6469224095344543, + "learning_rate": 0.0015, + "loss": 1.4604, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.5533626675605774, + "learning_rate": 0.0015, + "loss": 1.4437, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.5283637642860413, + "learning_rate": 0.0015, + "loss": 1.4566, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.6227394938468933, + "learning_rate": 0.0015, + "loss": 1.4553, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5344183444976807, + "learning_rate": 0.0015, + "loss": 1.4506, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.588984489440918, + "learning_rate": 0.0015, + "loss": 1.4678, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5949463844299316, + "learning_rate": 0.0015, + "loss": 1.464, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.557900071144104, + "learning_rate": 0.0015, + "loss": 1.4578, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5850265026092529, + "learning_rate": 0.0015, + "loss": 1.4564, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.7179521322250366, + "learning_rate": 0.0015, + "loss": 1.4667, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.6477261781692505, + "learning_rate": 0.0015, + "loss": 1.4478, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.7852134108543396, + "learning_rate": 0.0015, + "loss": 1.4621, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.7401909232139587, + "learning_rate": 0.0015, + "loss": 1.4471, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.6288110613822937, + "learning_rate": 0.0015, + "loss": 1.4581, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.5337368249893188, + "learning_rate": 0.0015, + "loss": 1.4498, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6970085501670837, + "learning_rate": 0.0015, + "loss": 1.4552, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.634314239025116, + "learning_rate": 0.0015, + "loss": 1.4472, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.5549997687339783, + "learning_rate": 0.0015, + "loss": 1.4535, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.5504727959632874, + "learning_rate": 0.0015, + "loss": 1.4627, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.8286926746368408, + "learning_rate": 0.0015, + "loss": 1.4535, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.6269571781158447, + "learning_rate": 0.0015, + "loss": 1.4547, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.8340073227882385, + "learning_rate": 0.0015, + "loss": 1.4577, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.9701064229011536, + "learning_rate": 0.0015, + "loss": 1.448, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.5321536660194397, + "learning_rate": 0.0015, + "loss": 1.4444, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.5817542672157288, + "learning_rate": 0.0015, + "loss": 1.4547, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.74298095703125, + "learning_rate": 0.0015, + "loss": 1.4479, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.5651299357414246, + "learning_rate": 0.0015, + "loss": 1.4435, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.723031222820282, + "learning_rate": 0.0015, + "loss": 1.4524, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.6351364254951477, + "learning_rate": 0.0015, + "loss": 1.4532, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.5029274225234985, + "learning_rate": 0.0015, + "loss": 1.4441, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.5667977929115295, + "learning_rate": 0.0015, + "loss": 1.4432, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.5874744057655334, + "learning_rate": 0.0015, + "loss": 1.4434, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.5383337736129761, + "learning_rate": 0.0015, + "loss": 1.4468, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.6023250818252563, + "learning_rate": 0.0015, + "loss": 1.4395, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.6562930345535278, + "learning_rate": 0.0015, + "loss": 1.44, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.528099000453949, + "learning_rate": 0.0015, + "loss": 1.4375, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6635940074920654, + "learning_rate": 0.0015, + "loss": 1.4468, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.5587949156761169, + "learning_rate": 0.0015, + "loss": 1.4493, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.6516025066375732, + "learning_rate": 0.0015, + "loss": 1.447, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.6126524806022644, + "learning_rate": 0.0015, + "loss": 1.4485, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.5466421842575073, + "learning_rate": 0.0015, + "loss": 1.454, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.5659489035606384, + "learning_rate": 0.0015, + "loss": 1.4445, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.6079151630401611, + "learning_rate": 0.0015, + "loss": 1.4508, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.5349658727645874, + "learning_rate": 0.0015, + "loss": 1.439, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.5877987742424011, + "learning_rate": 0.0015, + "loss": 1.4345, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.5495848655700684, + "learning_rate": 0.0015, + "loss": 1.4355, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.517038881778717, + "learning_rate": 0.0015, + "loss": 1.4517, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.5200017690658569, + "learning_rate": 0.0015, + "loss": 1.4566, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5127320289611816, + "learning_rate": 0.0015, + "loss": 1.4427, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.6458799242973328, + "learning_rate": 0.0015, + "loss": 1.4405, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.5785557627677917, + "learning_rate": 0.0015, + "loss": 1.4433, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.6503692269325256, + "learning_rate": 0.0015, + "loss": 1.4393, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.5408360958099365, + "learning_rate": 0.0015, + "loss": 1.4452, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.6215591430664062, + "learning_rate": 0.0015, + "loss": 1.4518, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.7795698642730713, + "learning_rate": 0.0015, + "loss": 1.4454, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.5660273432731628, + "learning_rate": 0.0015, + "loss": 1.4559, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.7219056487083435, + "learning_rate": 0.0015, + "loss": 1.4496, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.741102397441864, + "learning_rate": 0.0015, + "loss": 1.4453, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.5923230051994324, + "learning_rate": 0.0015, + "loss": 1.451, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.5904003977775574, + "learning_rate": 0.0015, + "loss": 1.4518, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.7221293449401855, + "learning_rate": 0.0015, + "loss": 1.433, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.6381110548973083, + "learning_rate": 0.0015, + "loss": 1.4378, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.5245643258094788, + "learning_rate": 0.0015, + "loss": 1.4406, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5776628851890564, + "learning_rate": 0.0015, + "loss": 1.4452, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.6253775954246521, + "learning_rate": 0.0015, + "loss": 1.4406, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.5543458461761475, + "learning_rate": 0.0015, + "loss": 1.4396, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.7039231657981873, + "learning_rate": 0.0015, + "loss": 1.4164, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.5481839776039124, + "learning_rate": 0.0015, + "loss": 1.4243, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.6938220262527466, + "learning_rate": 0.0015, + "loss": 1.4292, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.5412605404853821, + "learning_rate": 0.0015, + "loss": 1.4366, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.7932698130607605, + "learning_rate": 0.0015, + "loss": 1.4333, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.539391815662384, + "learning_rate": 0.0015, + "loss": 1.4368, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.6700359582901001, + "learning_rate": 0.0015, + "loss": 1.4403, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.5859553813934326, + "learning_rate": 0.0015, + "loss": 1.4561, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.6671823263168335, + "learning_rate": 0.0015, + "loss": 1.4381, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.7624210119247437, + "learning_rate": 0.0015, + "loss": 1.4509, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.7688072919845581, + "learning_rate": 0.0015, + "loss": 1.4541, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.5609461069107056, + "learning_rate": 0.0015, + "loss": 1.4402, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5772584676742554, + "learning_rate": 0.0015, + "loss": 1.4337, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.6252260804176331, + "learning_rate": 0.0015, + "loss": 1.4432, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.5394101738929749, + "learning_rate": 0.0015, + "loss": 1.4349, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.5544285178184509, + "learning_rate": 0.0015, + "loss": 1.4356, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.6994200944900513, + "learning_rate": 0.0015, + "loss": 1.4331, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.580315887928009, + "learning_rate": 0.0015, + "loss": 1.4266, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.8660300374031067, + "learning_rate": 0.0015, + "loss": 1.4376, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.6988215446472168, + "learning_rate": 0.0015, + "loss": 1.4417, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.6541784405708313, + "learning_rate": 0.0015, + "loss": 1.4309, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.6802022457122803, + "learning_rate": 0.0015, + "loss": 1.4372, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5214741826057434, + "learning_rate": 0.0015, + "loss": 1.4372, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5116277933120728, + "learning_rate": 0.0015, + "loss": 1.4204, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.5941348075866699, + "learning_rate": 0.0015, + "loss": 1.4408, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.54196697473526, + "learning_rate": 0.0015, + "loss": 1.4481, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.5527881979942322, + "learning_rate": 0.0015, + "loss": 1.4356, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.7087220549583435, + "learning_rate": 0.0015, + "loss": 1.438, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.547170877456665, + "learning_rate": 0.0015, + "loss": 1.4182, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.6261120438575745, + "learning_rate": 0.0015, + "loss": 1.4384, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.5071873068809509, + "learning_rate": 0.0015, + "loss": 1.421, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.5573424100875854, + "learning_rate": 0.0015, + "loss": 1.441, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.6330763101577759, + "learning_rate": 0.0015, + "loss": 1.43, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.584770143032074, + "learning_rate": 0.0015, + "loss": 1.431, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.5460183620452881, + "learning_rate": 0.0015, + "loss": 1.4307, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6885669231414795, + "learning_rate": 0.0015, + "loss": 1.4254, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.7244248986244202, + "learning_rate": 0.0015, + "loss": 1.4402, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.639764666557312, + "learning_rate": 0.0015, + "loss": 1.4336, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.6047524213790894, + "learning_rate": 0.0015, + "loss": 1.4299, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.9635192155838013, + "learning_rate": 0.0015, + "loss": 1.4311, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.604000449180603, + "learning_rate": 0.0015, + "loss": 1.4419, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.6478769183158875, + "learning_rate": 0.0015, + "loss": 1.4322, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.5845364928245544, + "learning_rate": 0.0014854972418331944, + "loss": 1.4125, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.9109283685684204, + "learning_rate": 0.0014650219182191931, + "loss": 1.4363, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.5506412386894226, + "learning_rate": 0.001444828815847542, + "loss": 1.4375, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.69488126039505, + "learning_rate": 0.0014249140447269945, + "loss": 1.4264, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.5653802752494812, + "learning_rate": 0.0014052737684839257, + "loss": 1.4226, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.5073166489601135, + "learning_rate": 0.0013859042036232954, + "loss": 1.4305, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.5020584464073181, + "learning_rate": 0.001366801618799797, + "loss": 1.4168, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.5227780342102051, + "learning_rate": 0.001347962334099052, + "loss": 1.4184, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.4819777309894562, + "learning_rate": 0.0013293827203287143, + "loss": 1.4247, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.5877610445022583, + "learning_rate": 0.0013110591983193423, + "loss": 1.4156, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.5603451132774353, + "learning_rate": 0.0012929882382349102, + "loss": 1.4227, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.5968028903007507, + "learning_rate": 0.0012751663588928214, + "loss": 1.4207, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5048508644104004, + "learning_rate": 0.0012575901270932943, + "loss": 1.4161, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.7932562232017517, + "learning_rate": 0.0012402561569579936, + "loss": 1.4107, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.6583470702171326, + "learning_rate": 0.0012231611092777745, + "loss": 1.4086, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5934231281280518, + "learning_rate": 0.0012063016908694193, + "loss": 1.4025, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.5164998173713684, + "learning_rate": 0.0011896746539412405, + "loss": 1.4091, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.5378324389457703, + "learning_rate": 0.0011732767954674265, + "loss": 1.4017, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.5045227408409119, + "learning_rate": 0.0011571049565710122, + "loss": 1.4052, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.6413946747779846, + "learning_rate": 0.001141156021915355, + "loss": 1.4033, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.5191912055015564, + "learning_rate": 0.001125426919103997, + "loss": 1.3832, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5992634892463684, + "learning_rate": 0.001109914618088799, + "loss": 1.3866, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.7955918312072754, + "learning_rate": 0.0010946161305862348, + "loss": 1.4026, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.7773348689079285, + "learning_rate": 0.001079528509501728, + "loss": 1.4016, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.5571773052215576, + "learning_rate": 0.0010646488483619261, + "loss": 1.3965, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.4990285038948059, + "learning_rate": 0.0010499742807547976, + "loss": 1.3841, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.5601270198822021, + "learning_rate": 0.0010355019797774478, + "loss": 1.3932, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.5050643682479858, + "learning_rate": 0.001021229157491546, + "loss": 1.3765, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.5214822888374329, + "learning_rate": 0.0010071530643862578, + "loss": 1.3791, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.5144291520118713, + "learning_rate": 0.000993270988848579, + "loss": 1.3921, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.5946586728096008, + "learning_rate": 0.0009795802566409742, + "loss": 1.3861, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.7176449298858643, + "learning_rate": 0.0009660782303862109, + "loss": 1.3913, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.45808374881744385, + "learning_rate": 0.0009527623090592963, + "loss": 1.3844, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.5587730407714844, + "learning_rate": 0.0009396299274864177, + "loss": 1.397, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.5175219774246216, + "learning_rate": 0.0009266785558507877, + "loss": 1.3975, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.4646693766117096, + "learning_rate": 0.0009139056992053016, + "loss": 1.3819, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.5937052965164185, + "learning_rate": 0.000901308896991912, + "loss": 1.3725, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.5852144956588745, + "learning_rate": 0.000888885722567627, + "loss": 1.3832, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.4718400835990906, + "learning_rate": 0.0008766337827370438, + "loss": 1.3758, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.5763649344444275, + "learning_rate": 0.000864550717291324, + "loss": 1.3747, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.541046679019928, + "learning_rate": 0.0008526341985535229, + "loss": 1.3712, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.5396777391433716, + "learning_rate": 0.0008408819309301891, + "loss": 1.3694, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.5645665526390076, + "learning_rate": 0.0008292916504691397, + "loss": 1.3736, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.5329201817512512, + "learning_rate": 0.0008178611244233354, + "loss": 1.3778, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.47563907504081726, + "learning_rate": 0.0008065881508207637, + "loss": 1.3632, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.48661863803863525, + "learning_rate": 0.0007954705580402523, + "loss": 1.3717, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.5526289939880371, + "learning_rate": 0.0007845062043931298, + "loss": 1.366, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.6463859677314758, + "learning_rate": 0.0007736929777106497, + "loss": 1.3674, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.4902038276195526, + "learning_rate": 0.000763028794937105, + "loss": 1.3637, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.5209696888923645, + "learning_rate": 0.0007525116017285476, + "loss": 1.3631, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.5268991589546204, + "learning_rate": 0.0007421393720570417, + "loss": 1.3648, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.5123580694198608, + "learning_rate": 0.0007319101078203694, + "loss": 1.366, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.48749664425849915, + "learning_rate": 0.0007218218384571178, + "loss": 1.3582, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.4711751639842987, + "learning_rate": 0.0007118726205670703, + "loss": 1.3581, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.5960403084754944, + "learning_rate": 0.0007020605375368316, + "loss": 1.3619, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.6335268616676331, + "learning_rate": 0.000692383699170611, + "loss": 1.3499, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.5655462145805359, + "learning_rate": 0.0006828402413260966, + "loss": 1.3591, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.5002266764640808, + "learning_rate": 0.0006734283255553471, + "loss": 1.3602, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.5185707807540894, + "learning_rate": 0.0006641461387506347, + "loss": 1.3641, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.614739716053009, + "learning_rate": 0.0006549918927951678, + "loss": 1.3574, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.49084967374801636, + "learning_rate": 0.0006459638242186297, + "loss": 1.3588, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5136387348175049, + "learning_rate": 0.0006370601938574639, + "loss": 1.3551, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.5223336815834045, + "learning_rate": 0.0006282792865198421, + "loss": 1.3584, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.4992089867591858, + "learning_rate": 0.0006196194106552512, + "loss": 1.3447, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.5769894123077393, + "learning_rate": 0.0006110788980286328, + "loss": 1.3504, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5668891668319702, + "learning_rate": 0.0006026561033990158, + "loss": 1.3509, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6820117235183716, + "learning_rate": 0.000594349404202577, + "loss": 1.3497, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.5896968245506287, + "learning_rate": 0.0005861572002400716, + "loss": 1.3386, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.5106127262115479, + "learning_rate": 0.0005780779133685717, + "loss": 1.3461, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.5448651909828186, + "learning_rate": 0.0005701099871974524, + "loss": 1.3422, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.5113927125930786, + "learning_rate": 0.0005622518867885708, + "loss": 1.3395, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5165741443634033, + "learning_rate": 0.0005545020983605748, + "loss": 1.3465, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.4909664988517761, + "learning_rate": 0.0005468591289972898, + "loss": 1.3362, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5048149824142456, + "learning_rate": 0.0005393215063601232, + "loss": 1.3356, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.48986268043518066, + "learning_rate": 0.0005318877784044343, + "loss": 1.3586, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.5755218267440796, + "learning_rate": 0.0005245565130998126, + "loss": 1.3396, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.5522173047065735, + "learning_rate": 0.000517326298154212, + "loss": 1.3455, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.5478484630584717, + "learning_rate": 0.0005101957407418877, + "loss": 1.3482, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.4900250732898712, + "learning_rate": 0.0005031634672350829, + "loss": 1.3445, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.473868727684021, + "learning_rate": 0.0004962281229394129, + "loss": 1.3346, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.5402085185050964, + "learning_rate": 0.0004893883718328983, + "loss": 1.3276, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.5062165260314941, + "learning_rate": 0.0004826428963085938, + "loss": 1.3313, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.5080792903900146, + "learning_rate": 0.00047599039692076457, + "loss": 1.3331, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.541022777557373, + "learning_rate": 0.0004694295921345622, + "loss": 1.3436, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.47802749276161194, + "learning_rate": 0.00046295921807915015, + "loss": 1.3156, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.6114453077316284, + "learning_rate": 0.00045657802830423164, + "loss": 1.3308, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.5069577693939209, + "learning_rate": 0.00045028479353993473, + "loss": 1.3348, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.6038907170295715, + "learning_rate": 0.00044407830146000587, + "loss": 1.3362, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.5295746326446533, + "learning_rate": 0.0004379573564482676, + "loss": 1.3257, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.5159029960632324, + "learning_rate": 0.0004319207793682963, + "loss": 1.3319, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.5625802874565125, + "learning_rate": 0.0004259674073362731, + "loss": 1.3373, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.5231423377990723, + "learning_rate": 0.00042009609349696626, + "loss": 1.3331, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.5272135734558105, + "learning_rate": 0.00041430570680280233, + "loss": 1.3304, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.5432569980621338, + "learning_rate": 0.0004085951317959809, + "loss": 1.3266, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.5289936065673828, + "learning_rate": 0.00040296326839359315, + "loss": 1.3385, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.5625152587890625, + "learning_rate": 0.000397409031675703, + "loss": 1.3296, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.511777937412262, + "learning_rate": 0.00039193135167634786, + "loss": 1.3387, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.5690923929214478, + "learning_rate": 0.00038652917317742123, + "loss": 1.3217, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.46988624334335327, + "learning_rate": 0.0003812014555053956, + "loss": 1.3181, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.4778980314731598, + "learning_rate": 0.00037594717233084774, + "loss": 1.3273, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.6266793012619019, + "learning_rate": 0.0003707653114707471, + "loss": 1.3357, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.5043386816978455, + "learning_rate": 0.00036565487469346906, + "loss": 1.3142, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.49880918860435486, + "learning_rate": 0.0003606148775264958, + "loss": 1.3154, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.5304259061813354, + "learning_rate": 0.0003556443490667684, + "loss": 1.3211, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.5800580382347107, + "learning_rate": 0.0003507423317936521, + "loss": 1.324, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.5011001825332642, + "learning_rate": 0.00034590788138448006, + "loss": 1.3374, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.5401927828788757, + "learning_rate": 0.0003411400665326393, + "loss": 1.3331, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.487700492143631, + "learning_rate": 0.00033643796876816424, + "loss": 1.3285, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.49600911140441895, + "learning_rate": 0.000331800682280803, + "loss": 1.3238, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.48997899889945984, + "learning_rate": 0.0003272273137455226, + "loss": 1.3196, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.4990381896495819, + "learning_rate": 0.00032271698215041863, + "loss": 1.318, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.5055996775627136, + "learning_rate": 0.0003182688186269984, + "loss": 1.321, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.5182024240493774, + "learning_rate": 0.0003138819662828017, + "loss": 1.3316, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.5547622442245483, + "learning_rate": 0.00030955558003632966, + "loss": 1.335, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.4912651777267456, + "learning_rate": 0.0003052888264542483, + "loss": 1.315, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.509490966796875, + "learning_rate": 0.0003010808835908368, + "loss": 1.3106, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.5316436290740967, + "learning_rate": 0.00029693094082964785, + "loss": 1.3152, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.5913136601448059, + "learning_rate": 0.0002928381987273508, + "loss": 1.3138, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.4977995753288269, + "learning_rate": 0.0002888018688597272, + "loss": 1.3106, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.48362046480178833, + "learning_rate": 0.0002848211736697894, + "loss": 1.3176, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5586822032928467, + "learning_rate": 0.00028089534631799183, + "loss": 1.3199, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.47931405901908875, + "learning_rate": 0.0002770236305345076, + "loss": 1.3068, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.4615863859653473, + "learning_rate": 0.00027320528047354093, + "loss": 1.317, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.4860086441040039, + "learning_rate": 0.00026943956056964773, + "loss": 1.3204, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.4814727008342743, + "learning_rate": 0.0002657257453960364, + "loss": 1.3026, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.5079910755157471, + "learning_rate": 0.0002620631195248222, + "loss": 1.3086, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.4841192364692688, + "learning_rate": 0.00025845097738920735, + "loss": 1.295, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.47682246565818787, + "learning_rate": 0.0002548886231475606, + "loss": 1.2986, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.5059146285057068, + "learning_rate": 0.0002513753705493713, + "loss": 1.3018, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.4751178026199341, + "learning_rate": 0.0002479105428030497, + "loss": 1.311, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.46254920959472656, + "learning_rate": 0.00024449347244555043, + "loss": 1.3038, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.49197128415107727, + "learning_rate": 0.00024112350121379254, + "loss": 1.3107, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.4641369879245758, + "learning_rate": 0.000237799979917852, + "loss": 1.3175, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.5277642011642456, + "learning_rate": 0.00023452226831590227, + "loss": 1.3112, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.4912530779838562, + "learning_rate": 0.00023128973499087779, + "loss": 1.318, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.4632818102836609, + "learning_rate": 0.00022810175722883858, + "loss": 1.3012, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.48173755407333374, + "learning_rate": 0.0002249577208990106, + "loss": 1.3033, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.5092982053756714, + "learning_rate": 0.00022185702033547996, + "loss": 1.2993, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.518355667591095, + "learning_rate": 0.00021879905822051756, + "loss": 1.2987, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.4870567321777344, + "learning_rate": 0.00021578324546951222, + "loss": 1.3004, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.5034220218658447, + "learning_rate": 0.00021280900111748948, + "loss": 1.3119, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.49648088216781616, + "learning_rate": 0.00020987575220719483, + "loss": 1.3082, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.48447710275650024, + "learning_rate": 0.00020698293367871933, + "loss": 1.2997, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.48734259605407715, + "learning_rate": 0.00020412998826064692, + "loss": 1.3055, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.48913803696632385, + "learning_rate": 0.00020131636636270178, + "loss": 1.3137, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.4835722744464874, + "learning_rate": 0.00019854152596987523, + "loss": 1.2881, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.5292708873748779, + "learning_rate": 0.00019580493253801255, + "loss": 1.3117, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.49038606882095337, + "learning_rate": 0.00019310605889083838, + "loss": 1.3026, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.5049091577529907, + "learning_rate": 0.0001904443851184018, + "loss": 1.3194, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.48894670605659485, + "learning_rate": 0.00018781939847692096, + "loss": 1.2952, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.469951331615448, + "learning_rate": 0.00018523059329000844, + "loss": 1.3035, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.5279447436332703, + "learning_rate": 0.0001826774708512579, + "loss": 1.3162, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.5621384382247925, + "learning_rate": 0.00018015953932817348, + "loss": 1.2877, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.48656702041625977, + "learning_rate": 0.00017767631366742332, + "loss": 1.3003, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.5730150938034058, + "learning_rate": 0.00017522731550139922, + "loss": 1.2989, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.5166862607002258, + "learning_rate": 0.00017281207305606407, + "loss": 1.3014, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.5328842997550964, + "learning_rate": 0.00017043012106006926, + "loss": 1.3076, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.46243101358413696, + "learning_rate": 0.00016808100065512528, + "loss": 1.3081, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.4814881384372711, + "learning_rate": 0.00016576425930760734, + "loss": 1.2896, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.5162342190742493, + "learning_rate": 0.00016347945072137934, + "loss": 1.2925, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.46444493532180786, + "learning_rate": 0.00016122613475181977, + "loss": 1.2996, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.5196223258972168, + "learning_rate": 0.00015900387732103232, + "loss": 1.2922, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.5249384641647339, + "learning_rate": 0.00015681225033422526, + "loss": 1.3112, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.4600869417190552, + "learning_rate": 0.00015465083159724345, + "loss": 1.3001, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.5109081268310547, + "learning_rate": 0.0001525192047352371, + "loss": 1.2955, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.4703962206840515, + "learning_rate": 0.00015041695911245136, + "loss": 1.3057, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.49680525064468384, + "learning_rate": 0.00014834368975312172, + "loss": 1.2954, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.4776685833930969, + "learning_rate": 0.00014629899726345958, + "loss": 1.2821, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.5237215757369995, + "learning_rate": 0.00014428248775471316, + "loss": 1.2897, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.45696312189102173, + "learning_rate": 0.000142293772767289, + "loss": 1.2787, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5189747214317322, + "learning_rate": 0.00014033246919591922, + "loss": 1.2911, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.5011923313140869, + "learning_rate": 0.00013839819921586025, + "loss": 1.3128, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.4645780026912689, + "learning_rate": 0.00013649059021010894, + "loss": 1.3079, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5004777312278748, + "learning_rate": 0.00013460927469762155, + "loss": 1.2872, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.5973535776138306, + "learning_rate": 0.00013275389026252255, + "loss": 1.2976, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.5669199824333191, + "learning_rate": 0.0001309240794842889, + "loss": 1.296, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.46773242950439453, + "learning_rate": 0.00012911948986889664, + "loss": 1.3041, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.48976677656173706, + "learning_rate": 0.00012733977378091664, + "loss": 1.2981, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.47129586338996887, + "learning_rate": 0.00012558458837654633, + "loss": 1.3007, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.4685057997703552, + "learning_rate": 0.00012385359553756422, + "loss": 1.2874, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.47045186161994934, + "learning_rate": 0.0001221464618061951, + "loss": 1.2871, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.4687444567680359, + "learning_rate": 0.0001204628583208727, + "loss": 1.2839, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.6013496518135071, + "learning_rate": 0.00011880246075288824, + "loss": 1.2983, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.5217690467834473, + "learning_rate": 0.00011716494924391148, + "loss": 1.2925, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.5072504281997681, + "learning_rate": 0.00011555000834437363, + "loss": 1.2992, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.47519269585609436, + "learning_rate": 0.00011395732695269907, + "loss": 1.2932, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.5145894289016724, + "learning_rate": 0.00011238659825537507, + "loss": 1.2904, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.4669525623321533, + "learning_rate": 0.00011083751966784716, + "loss": 1.3018, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.4746662676334381, + "learning_rate": 0.00010930979277622952, + "loss": 1.2822, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.5146121978759766, + "learning_rate": 0.00010780312327981853, + "loss": 1.2903, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.465825617313385, + "learning_rate": 0.0001063172209343989, + "loss": 1.2965, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.4787508547306061, + "learning_rate": 0.000104851799496331, + "loss": 1.2973, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.512465238571167, + "learning_rate": 0.00010340657666740917, + "loss": 1.2871, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.5024791955947876, + "learning_rate": 0.00010198127404047976, + "loss": 1.2792, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.5367498397827148, + "learning_rate": 0.00010057561704580898, + "loss": 1.2985, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.4867110848426819, + "learning_rate": 9.918933489818986e-05, + "loss": 1.2879, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.48658135533332825, + "learning_rate": 9.782216054477828e-05, + "loss": 1.2897, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.5055621266365051, + "learning_rate": 9.647383061364803e-05, + "loss": 1.2872, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.4670827388763428, + "learning_rate": 9.514408536305497e-05, + "loss": 1.2841, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.49410444498062134, + "learning_rate": 9.383266863140043e-05, + "loss": 1.2955, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.47974035143852234, + "learning_rate": 9.25393277878844e-05, + "loss": 1.2855, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.4761445224285126, + "learning_rate": 9.126381368383881e-05, + "loss": 1.293, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.48335903882980347, + "learning_rate": 9.000588060473158e-05, + "loss": 1.286, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.4784451425075531, + "learning_rate": 8.876528622283232e-05, + "loss": 1.2991, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.46803441643714905, + "learning_rate": 8.754179155053052e-05, + "loss": 1.2922, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.4703162610530853, + "learning_rate": 8.63351608942968e-05, + "loss": 1.2985, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.4672531485557556, + "learning_rate": 8.514516180927926e-05, + "loss": 1.2918, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.4863806664943695, + "learning_rate": 8.397156505452524e-05, + "loss": 1.2909, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.47227323055267334, + "learning_rate": 8.28141445488205e-05, + "loss": 1.3038, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.5004388689994812, + "learning_rate": 8.167267732713705e-05, + "loss": 1.2945, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.5001670718193054, + "learning_rate": 8.054694349768114e-05, + "loss": 1.2913, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.4512779414653778, + "learning_rate": 7.943672619953359e-05, + "loss": 1.2755, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.5033439993858337, + "learning_rate": 7.834181156087357e-05, + "loss": 1.2741, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.4707455039024353, + "learning_rate": 7.726198865777852e-05, + "loss": 1.3006, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.4731435775756836, + "learning_rate": 7.61970494735919e-05, + "loss": 1.2797, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4715854823589325, + "learning_rate": 7.514678885885086e-05, + "loss": 1.2986, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.4568154215812683, + "learning_rate": 7.411100449176634e-05, + "loss": 1.2921, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.4706604480743408, + "learning_rate": 7.308949683924792e-05, + "loss": 1.2792, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.483224093914032, + "learning_rate": 7.208206911846581e-05, + "loss": 1.2834, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.4954493045806885, + "learning_rate": 7.10885272589427e-05, + "loss": 1.2805, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.4545811116695404, + "learning_rate": 7.010867986516811e-05, + "loss": 1.2882, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.49659305810928345, + "learning_rate": 6.914233817972799e-05, + "loss": 1.296, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.4791646897792816, + "learning_rate": 6.818931604694264e-05, + "loss": 1.2845, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.4629514217376709, + "learning_rate": 6.724942987700563e-05, + "loss": 1.2866, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.452927827835083, + "learning_rate": 6.632249861061733e-05, + "loss": 1.3022, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.4618093967437744, + "learning_rate": 6.540834368410549e-05, + "loss": 1.2893, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.48160403966903687, + "learning_rate": 6.4506788995027e-05, + "loss": 1.2874, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.4891617000102997, + "learning_rate": 6.361766086824344e-05, + "loss": 1.2814, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.5169182419776917, + "learning_rate": 6.274078802246449e-05, + "loss": 1.2893, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.5058215856552124, + "learning_rate": 6.187600153725223e-05, + "loss": 1.2871, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.460235059261322, + "learning_rate": 6.1023134820480546e-05, + "loss": 1.2875, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.4831646978855133, + "learning_rate": 6.0182023576242725e-05, + "loss": 1.2804, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.46499505639076233, + "learning_rate": 5.9352505773201664e-05, + "loss": 1.2869, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.47788241505622864, + "learning_rate": 5.8534421613376175e-05, + "loss": 1.2784, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.46817654371261597, + "learning_rate": 5.772761350135759e-05, + "loss": 1.2869, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.48694300651550293, + "learning_rate": 5.6931926013950586e-05, + "loss": 1.2842, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.4783858358860016, + "learning_rate": 5.61472058702326e-05, + "loss": 1.2958, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.4681575298309326, + "learning_rate": 5.53733019020258e-05, + "loss": 1.2928, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.5110870003700256, + "learning_rate": 5.4610065024776125e-05, + "loss": 1.2857, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.47763773798942566, + "learning_rate": 5.38573482088337e-05, + "loss": 1.2751, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.527594268321991, + "learning_rate": 5.3115006451129075e-05, + "loss": 1.2961, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.45547598600387573, + "learning_rate": 5.2382896747239935e-05, + "loss": 1.2859, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.5116627812385559, + "learning_rate": 5.166087806384275e-05, + "loss": 1.291, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.453938752412796, + "learning_rate": 5.0948811311544186e-05, + "loss": 1.2832, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.5115503668785095, + "learning_rate": 5.024655931808697e-05, + "loss": 1.2866, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.4982527494430542, + "learning_rate": 4.955398680192509e-05, + "loss": 1.2828, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.4778030216693878, + "learning_rate": 4.887096034616319e-05, + "loss": 1.2913, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.5285346508026123, + "learning_rate": 4.819734837285529e-05, + "loss": 1.2752, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.4840639531612396, + "learning_rate": 4.7533021117657475e-05, + "loss": 1.2772, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5008677840232849, + "learning_rate": 4.687785060483031e-05, + "loss": 1.2799, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.46262115240097046, + "learning_rate": 4.623171062258557e-05, + "loss": 1.2864, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.460462361574173, + "learning_rate": 4.559447669877288e-05, + "loss": 1.2941, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.47785452008247375, + "learning_rate": 4.496602607690141e-05, + "loss": 1.2868, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.45095816254615784, + "learning_rate": 4.434623769249217e-05, + "loss": 1.2859, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.46034693717956543, + "learning_rate": 4.373499214975615e-05, + "loss": 1.2843, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.4498767554759979, + "learning_rate": 4.313217169859397e-05, + "loss": 1.2785, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.4550316333770752, + "learning_rate": 4.253766021191256e-05, + "loss": 1.2917, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.4780294597148895, + "learning_rate": 4.19513431632545e-05, + "loss": 1.2802, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.45463302731513977, + "learning_rate": 4.1373107604735626e-05, + "loss": 1.2751, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.4685260057449341, + "learning_rate": 4.0802842145286876e-05, + "loss": 1.2658, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4742572605609894, + "learning_rate": 4.024043692919589e-05, + "loss": 1.2785, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.48642367124557495, + "learning_rate": 3.968578361494449e-05, + "loss": 1.2836, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.44754165410995483, + "learning_rate": 3.91387753543378e-05, + "loss": 1.2781, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.4758548438549042, + "learning_rate": 3.859930677192103e-05, + "loss": 1.274, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.4646531641483307, + "learning_rate": 3.806727394468005e-05, + "loss": 1.2955, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.4570622444152832, + "learning_rate": 3.7542574382021635e-05, + "loss": 1.2805, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.48418715596199036, + "learning_rate": 3.702510700602975e-05, + "loss": 1.2977, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.4781525433063507, + "learning_rate": 3.651477213199394e-05, + "loss": 1.2817, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.4933951497077942, + "learning_rate": 3.601147144920609e-05, + "loss": 1.2811, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5042923092842102, + "learning_rate": 3.5515108002021946e-05, + "loss": 1.2817, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.4611527919769287, + "learning_rate": 3.502558617118352e-05, + "loss": 1.2844, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.48543456196784973, + "learning_rate": 3.454281165539913e-05, + "loss": 1.2881, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.4693308472633362, + "learning_rate": 3.406669145317717e-05, + "loss": 1.2877, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.4632687568664551, + "learning_rate": 3.359713384491036e-05, + "loss": 1.2776, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.5444868206977844, + "learning_rate": 3.313404837520694e-05, + "loss": 1.2757, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.44671541452407837, + "learning_rate": 3.267734583546536e-05, + "loss": 1.2827, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.46076348423957825, + "learning_rate": 3.222693824668916e-05, + "loss": 1.2806, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.49430784583091736, + "learning_rate": 3.178273884253874e-05, + "loss": 1.2814, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.45694178342819214, + "learning_rate": 3.134466205261674e-05, + "loss": 1.2829, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.5009331703186035, + "learning_rate": 3.0912623485983774e-05, + "loss": 1.2863, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5236653685569763, + "learning_rate": 3.048653991490141e-05, + "loss": 1.2828, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5181384682655334, + "learning_rate": 3.0066329258799184e-05, + "loss": 1.2812, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.4782514274120331, + "learning_rate": 2.965191056846266e-05, + "loss": 1.2875, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.4620096683502197, + "learning_rate": 2.9243204010439396e-05, + "loss": 1.2805, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.4873718321323395, + "learning_rate": 2.8840130851659852e-05, + "loss": 1.2786, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.48899489641189575, + "learning_rate": 2.844261344427029e-05, + "loss": 1.2766, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.481893390417099, + "learning_rate": 2.805057521067472e-05, + "loss": 1.2981, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.4656616449356079, + "learning_rate": 2.766394062878302e-05, + "loss": 1.2737, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.4694961607456207, + "learning_rate": 2.7282635217462405e-05, + "loss": 1.285, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.45946142077445984, + "learning_rate": 2.6906585522189378e-05, + "loss": 1.2851, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.46677711606025696, + "learning_rate": 2.653571910089951e-05, + "loss": 1.288, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.45549505949020386, + "learning_rate": 2.6169964510032243e-05, + "loss": 1.2878, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.47640255093574524, + "learning_rate": 2.580925129076798e-05, + "loss": 1.2882, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.462722510099411, + "learning_rate": 2.5453509955454954e-05, + "loss": 1.2666, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.46981674432754517, + "learning_rate": 2.510267197422317e-05, + "loss": 1.279, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.4761793315410614, + "learning_rate": 2.4756669761782806e-05, + "loss": 1.288, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.4618094861507416, + "learning_rate": 2.4415436664404643e-05, + "loss": 1.2744, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.49426230788230896, + "learning_rate": 2.4078906947079882e-05, + "loss": 1.2917, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.49430301785469055, + "learning_rate": 2.3747015780857007e-05, + "loss": 1.2808, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.46488291025161743, + "learning_rate": 2.3419699230353144e-05, + "loss": 1.2862, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.47296518087387085, + "learning_rate": 2.3096894241437583e-05, + "loss": 1.2852, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.46403154730796814, + "learning_rate": 2.2778538629085057e-05, + "loss": 1.2819, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.5006576180458069, + "learning_rate": 2.2464571065396428e-05, + "loss": 1.2761, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.47130194306373596, + "learning_rate": 2.2154931067784525e-05, + "loss": 1.2807, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.46654370427131653, + "learning_rate": 2.1849558987322783e-05, + "loss": 1.2812, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.46959954500198364, + "learning_rate": 2.1548395997254516e-05, + "loss": 1.279, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.44776010513305664, + "learning_rate": 2.1251384081660546e-05, + "loss": 1.2699, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.476149320602417, + "learning_rate": 2.0958466024283035e-05, + "loss": 1.2654, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.4552365839481354, + "learning_rate": 2.0669585397503362e-05, + "loss": 1.2774, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.46575242280960083, + "learning_rate": 2.0384686551471954e-05, + "loss": 1.2737, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.4691290855407715, + "learning_rate": 2.0103714603387898e-05, + "loss": 1.2821, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.47082453966140747, + "learning_rate": 1.9826615426926342e-05, + "loss": 1.2719, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.4488515853881836, + "learning_rate": 1.9553335641811623e-05, + "loss": 1.2788, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.4794052541255951, + "learning_rate": 1.9283822603534143e-05, + "loss": 1.2789, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.4959319531917572, + "learning_rate": 1.90180243932089e-05, + "loss": 1.2785, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.48605209589004517, + "learning_rate": 1.8755889807573868e-05, + "loss": 1.2741, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.4825482964515686, + "learning_rate": 1.8497368349126255e-05, + "loss": 1.2858, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.47821491956710815, + "learning_rate": 1.824241021639465e-05, + "loss": 1.2851, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.44771477580070496, + "learning_rate": 1.799096629434529e-05, + "loss": 1.2944, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.46373650431632996, + "learning_rate": 1.7742988144920578e-05, + "loss": 1.2858, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.46483108401298523, + "learning_rate": 1.7498427997707978e-05, + "loss": 1.2862, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.4672599732875824, + "learning_rate": 1.7257238740737548e-05, + "loss": 1.2671, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.4882128834724426, + "learning_rate": 1.7019373911406307e-05, + "loss": 1.2739, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.4712848365306854, + "learning_rate": 1.67847876875277e-05, + "loss": 1.2876, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.4877878427505493, + "learning_rate": 1.655343487850443e-05, + "loss": 1.2627, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.4719924032688141, + "learning_rate": 1.6325270916622947e-05, + "loss": 1.2839, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.45898473262786865, + "learning_rate": 1.610025184846797e-05, + "loss": 1.2839, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.46861326694488525, + "learning_rate": 1.587833432645528e-05, + "loss": 1.2782, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.45154401659965515, + "learning_rate": 1.5659475600481297e-05, + "loss": 1.2865, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.4655771851539612, + "learning_rate": 1.544363350968769e-05, + "loss": 1.2716, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.4726909101009369, + "learning_rate": 1.523076647433954e-05, + "loss": 1.2829, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.4056248664855957, + "learning_rate": 1.5020833487815421e-05, + "loss": 1.2754, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.832308198648013e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-mistral/checkpoint-9480/training_args.bin b/saves-mistral/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d1faa1c48c7f60ba47caed8764494750f8beff2a --- /dev/null +++ b/saves-mistral/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f901fa42c9c31b84daa832116f28b5ac09fc294fbfd8b28c9852fb42d560552 +size 5112 diff --git a/saves-mistral/config.json b/saves-mistral/config.json new file mode 100644 index 0000000000000000000000000000000000000000..27c3ad5440dbe5ee26df317f757bddbf9946d6e3 --- /dev/null +++ b/saves-mistral/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 131072, + "model_type": "mistral", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-mistral/generation_config.json b/saves-mistral/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-mistral/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-mistral/model.safetensors b/saves-mistral/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa9e3c118f6037392d9cfa26a160c6d59ddd6159 --- /dev/null +++ b/saves-mistral/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2bb958b87ab0f1c22dc19ea247e5d65a6e3bd054f83b734e3dd9c6ddc7537b7 +size 8346712 diff --git a/saves-mistral/result.log b/saves-mistral/result.log new file mode 100644 index 0000000000000000000000000000000000000000..60fd087d9ae2757dee0c723dd39f27cad0aa175d --- /dev/null +++ b/saves-mistral/result.log @@ -0,0 +1 @@ +{'train_runtime': 2136.997, 'train_samples_per_second': 4542.17, 'train_steps_per_second': 4.436, 'train_loss': 1.575562152580873, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-mistral/special_tokens_map.json b/saves-mistral/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-mistral/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-mistral/tokenizer.json b/saves-mistral/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-mistral/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-mistral/tokenizer_config.json b/saves-mistral/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-mistral/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-mpt-cosine/checkpoint-9480/config.json b/saves-mpt-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7bae8bd28a02ff350c658f15c2c052270df91b7 --- /dev/null +++ b/saves-mpt-cosine/checkpoint-9480/config.json @@ -0,0 +1,32 @@ +{ + "architectures": [ + "MptForCausalLM" + ], + "attn_config": { + "model_type": "" + }, + "d_model": 256, + "emb_pdrop": 0.0, + "embedding_fraction": 1.0, + "expansion_ratio": 4, + "hidden_act": "gelu", + "init_device": "cpu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "learned_pos_emb": true, + "logit_scale": null, + "max_seq_len": 2048, + "model_type": "mpt", + "n_heads": 8, + "n_layers": 2, + "no_bias": true, + "norm_type": "low_precision_layernorm", + "num_key_value_heads": 8, + "resid_pdrop": 0.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": false, + "verbose": 0, + "vocab_size": 2000 +} diff --git a/saves-mpt-cosine/checkpoint-9480/generation_config.json b/saves-mpt-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..805bc20f96323ae6bf455904f78f359bf95ce35b --- /dev/null +++ b/saves-mpt-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,5 @@ +{ + "_from_model_config": true, + "transformers_version": "4.42.4", + "use_cache": false +} diff --git a/saves-mpt-cosine/checkpoint-9480/model.safetensors b/saves-mpt-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9da5bf88089d4fb214e66f23ab2c5712c957eccd --- /dev/null +++ b/saves-mpt-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d732944cce2bcceb1c14eb977d09d5633b7dcc02fb8a2d325207fb87d34197 +size 8346072 diff --git a/saves-mpt-cosine/checkpoint-9480/optimizer.pt b/saves-mpt-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..899f733f45c5e3156f8ad869aa3ca135e0dcb9ca --- /dev/null +++ b/saves-mpt-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abe6ccc5d602c4b6a4f562f99657d5a5d612b8ffc97f99990b0879114766e63a +size 16701492 diff --git a/saves-mpt-cosine/checkpoint-9480/rng_state.pth b/saves-mpt-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-mpt-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-mpt-cosine/checkpoint-9480/scheduler.pt b/saves-mpt-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..74b04fc48c12ecef4ed191c0e0bab93fab8eb99a --- /dev/null +++ b/saves-mpt-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec59c7f60583f92116a8c17261df6f5e6643e0df2f9a66b3c4ae6ce50b33704 +size 1064 diff --git a/saves-mpt-cosine/checkpoint-9480/special_tokens_map.json b/saves-mpt-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-mpt-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-mpt-cosine/checkpoint-9480/tokenizer.json b/saves-mpt-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-mpt-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-mpt-cosine/checkpoint-9480/tokenizer_config.json b/saves-mpt-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-mpt-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-mpt-cosine/checkpoint-9480/trainer_state.json b/saves-mpt-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..52f72cd15d95a04fce94fbd34f1630e61e5eccdd --- /dev/null +++ b/saves-mpt-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,66393 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00010548523206751055, + "grad_norm": 1.7449902296066284, + "learning_rate": 1.5789473684210526e-05, + "loss": 7.6289, + "step": 1 + }, + { + "epoch": 0.0002109704641350211, + "grad_norm": 1.7338628768920898, + "learning_rate": 3.157894736842105e-05, + "loss": 7.6291, + "step": 2 + }, + { + "epoch": 0.00031645569620253165, + "grad_norm": 1.716973900794983, + "learning_rate": 4.736842105263158e-05, + "loss": 7.611, + "step": 3 + }, + { + "epoch": 0.0004219409282700422, + "grad_norm": 1.7201451063156128, + "learning_rate": 6.31578947368421e-05, + "loss": 7.572, + "step": 4 + }, + { + "epoch": 0.0005274261603375527, + "grad_norm": 1.6881887912750244, + "learning_rate": 7.894736842105263e-05, + "loss": 7.5116, + "step": 5 + }, + { + "epoch": 0.0006329113924050633, + "grad_norm": 1.54430091381073, + "learning_rate": 9.473684210526316e-05, + "loss": 7.4509, + "step": 6 + }, + { + "epoch": 0.0007383966244725738, + "grad_norm": 1.4471763372421265, + "learning_rate": 0.00011052631578947368, + "loss": 7.3629, + "step": 7 + }, + { + "epoch": 0.0008438818565400844, + "grad_norm": 1.337325096130371, + "learning_rate": 0.0001263157894736842, + "loss": 7.2907, + "step": 8 + }, + { + "epoch": 0.0009493670886075949, + "grad_norm": 1.2918668985366821, + "learning_rate": 0.00014210526315789474, + "loss": 7.204, + "step": 9 + }, + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.240515112876892, + "learning_rate": 0.00015789473684210527, + "loss": 7.1351, + "step": 10 + }, + { + "epoch": 0.001160337552742616, + "grad_norm": 1.2354230880737305, + "learning_rate": 0.0001736842105263158, + "loss": 7.0535, + "step": 11 + }, + { + "epoch": 0.0012658227848101266, + "grad_norm": 1.210645079612732, + "learning_rate": 0.00018947368421052632, + "loss": 6.9832, + "step": 12 + }, + { + "epoch": 0.0013713080168776372, + "grad_norm": 1.2006981372833252, + "learning_rate": 0.00020526315789473685, + "loss": 6.9151, + "step": 13 + }, + { + "epoch": 0.0014767932489451476, + "grad_norm": 1.1963897943496704, + "learning_rate": 0.00022105263157894735, + "loss": 6.8483, + "step": 14 + }, + { + "epoch": 0.0015822784810126582, + "grad_norm": 1.171834111213684, + "learning_rate": 0.00023684210526315788, + "loss": 6.7854, + "step": 15 + }, + { + "epoch": 0.0016877637130801688, + "grad_norm": 1.144079327583313, + "learning_rate": 0.0002526315789473684, + "loss": 6.7298, + "step": 16 + }, + { + "epoch": 0.0017932489451476794, + "grad_norm": 1.1358823776245117, + "learning_rate": 0.00026842105263157897, + "loss": 6.649, + "step": 17 + }, + { + "epoch": 0.0018987341772151898, + "grad_norm": 1.1222927570343018, + "learning_rate": 0.00028421052631578947, + "loss": 6.5873, + "step": 18 + }, + { + "epoch": 0.0020042194092827004, + "grad_norm": 1.1203171014785767, + "learning_rate": 0.00030000000000000003, + "loss": 6.515, + "step": 19 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.0888824462890625, + "learning_rate": 0.00031578947368421053, + "loss": 6.4597, + "step": 20 + }, + { + "epoch": 0.0022151898734177216, + "grad_norm": 1.0599946975708008, + "learning_rate": 0.00033157894736842103, + "loss": 6.3966, + "step": 21 + }, + { + "epoch": 0.002320675105485232, + "grad_norm": 1.0464988946914673, + "learning_rate": 0.0003473684210526316, + "loss": 6.3349, + "step": 22 + }, + { + "epoch": 0.002426160337552743, + "grad_norm": 1.0240097045898438, + "learning_rate": 0.0003631578947368421, + "loss": 6.2705, + "step": 23 + }, + { + "epoch": 0.002531645569620253, + "grad_norm": 1.0054209232330322, + "learning_rate": 0.00037894736842105265, + "loss": 6.2096, + "step": 24 + }, + { + "epoch": 0.0026371308016877636, + "grad_norm": 0.9734827280044556, + "learning_rate": 0.00039473684210526315, + "loss": 6.1676, + "step": 25 + }, + { + "epoch": 0.0027426160337552744, + "grad_norm": 0.9612790942192078, + "learning_rate": 0.0004105263157894737, + "loss": 6.1043, + "step": 26 + }, + { + "epoch": 0.002848101265822785, + "grad_norm": 0.912335991859436, + "learning_rate": 0.0004263157894736842, + "loss": 6.0772, + "step": 27 + }, + { + "epoch": 0.002953586497890295, + "grad_norm": 0.90127032995224, + "learning_rate": 0.0004421052631578947, + "loss": 6.0061, + "step": 28 + }, + { + "epoch": 0.003059071729957806, + "grad_norm": 0.8752526044845581, + "learning_rate": 0.00045789473684210527, + "loss": 5.9622, + "step": 29 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8095108866691589, + "learning_rate": 0.00047368421052631577, + "loss": 5.9396, + "step": 30 + }, + { + "epoch": 0.003270042194092827, + "grad_norm": 0.7956899404525757, + "learning_rate": 0.0004894736842105264, + "loss": 5.8864, + "step": 31 + }, + { + "epoch": 0.0033755274261603376, + "grad_norm": 0.7796978950500488, + "learning_rate": 0.0005052631578947368, + "loss": 5.8281, + "step": 32 + }, + { + "epoch": 0.003481012658227848, + "grad_norm": 0.7578688859939575, + "learning_rate": 0.0005210526315789474, + "loss": 5.7744, + "step": 33 + }, + { + "epoch": 0.003586497890295359, + "grad_norm": 0.7146009206771851, + "learning_rate": 0.0005368421052631579, + "loss": 5.7591, + "step": 34 + }, + { + "epoch": 0.003691983122362869, + "grad_norm": 0.7075921297073364, + "learning_rate": 0.0005526315789473684, + "loss": 5.6787, + "step": 35 + }, + { + "epoch": 0.0037974683544303796, + "grad_norm": 0.6726436018943787, + "learning_rate": 0.0005684210526315789, + "loss": 5.662, + "step": 36 + }, + { + "epoch": 0.0039029535864978904, + "grad_norm": 0.6884089708328247, + "learning_rate": 0.0005842105263157895, + "loss": 5.5979, + "step": 37 + }, + { + "epoch": 0.004008438818565401, + "grad_norm": 0.6276026368141174, + "learning_rate": 0.0006000000000000001, + "loss": 5.5751, + "step": 38 + }, + { + "epoch": 0.004113924050632912, + "grad_norm": 0.6278057098388672, + "learning_rate": 0.0006157894736842105, + "loss": 5.4973, + "step": 39 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.6028639674186707, + "learning_rate": 0.0006315789473684211, + "loss": 5.4805, + "step": 40 + }, + { + "epoch": 0.004324894514767932, + "grad_norm": 0.6729767918586731, + "learning_rate": 0.0006473684210526316, + "loss": 5.4415, + "step": 41 + }, + { + "epoch": 0.004430379746835443, + "grad_norm": 0.8156175017356873, + "learning_rate": 0.0006631578947368421, + "loss": 5.4071, + "step": 42 + }, + { + "epoch": 0.004535864978902953, + "grad_norm": 1.0230156183242798, + "learning_rate": 0.0006789473684210526, + "loss": 5.3306, + "step": 43 + }, + { + "epoch": 0.004641350210970464, + "grad_norm": 0.7600889205932617, + "learning_rate": 0.0006947368421052632, + "loss": 5.3349, + "step": 44 + }, + { + "epoch": 0.004746835443037975, + "grad_norm": 0.5166876316070557, + "learning_rate": 0.0007105263157894736, + "loss": 5.2416, + "step": 45 + }, + { + "epoch": 0.004852320675105486, + "grad_norm": 0.8702167272567749, + "learning_rate": 0.0007263157894736842, + "loss": 5.2023, + "step": 46 + }, + { + "epoch": 0.004957805907172996, + "grad_norm": 0.7771248817443848, + "learning_rate": 0.0007421052631578947, + "loss": 5.1714, + "step": 47 + }, + { + "epoch": 0.005063291139240506, + "grad_norm": 0.5871554017066956, + "learning_rate": 0.0007578947368421053, + "loss": 5.1208, + "step": 48 + }, + { + "epoch": 0.005168776371308017, + "grad_norm": 0.7715412974357605, + "learning_rate": 0.0007736842105263159, + "loss": 5.0558, + "step": 49 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.7553368210792542, + "learning_rate": 0.0007894736842105263, + "loss": 5.0294, + "step": 50 + }, + { + "epoch": 0.005379746835443038, + "grad_norm": 0.6124551296234131, + "learning_rate": 0.0008052631578947369, + "loss": 4.9747, + "step": 51 + }, + { + "epoch": 0.005485232067510549, + "grad_norm": 0.679780125617981, + "learning_rate": 0.0008210526315789474, + "loss": 4.9187, + "step": 52 + }, + { + "epoch": 0.005590717299578059, + "grad_norm": 0.5326610803604126, + "learning_rate": 0.0008368421052631579, + "loss": 4.8824, + "step": 53 + }, + { + "epoch": 0.00569620253164557, + "grad_norm": 0.5514304041862488, + "learning_rate": 0.0008526315789473684, + "loss": 4.8024, + "step": 54 + }, + { + "epoch": 0.0058016877637130804, + "grad_norm": 0.5211371779441833, + "learning_rate": 0.000868421052631579, + "loss": 4.7724, + "step": 55 + }, + { + "epoch": 0.00590717299578059, + "grad_norm": 0.4795949161052704, + "learning_rate": 0.0008842105263157894, + "loss": 4.708, + "step": 56 + }, + { + "epoch": 0.006012658227848101, + "grad_norm": 0.5955512523651123, + "learning_rate": 0.0009, + "loss": 4.7012, + "step": 57 + }, + { + "epoch": 0.006118143459915612, + "grad_norm": 0.7217339873313904, + "learning_rate": 0.0009157894736842105, + "loss": 4.6721, + "step": 58 + }, + { + "epoch": 0.006223628691983122, + "grad_norm": 0.8777037858963013, + "learning_rate": 0.0009315789473684211, + "loss": 4.6172, + "step": 59 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.8285601735115051, + "learning_rate": 0.0009473684210526315, + "loss": 4.5655, + "step": 60 + }, + { + "epoch": 0.006434599156118144, + "grad_norm": 0.9089198708534241, + "learning_rate": 0.0009631578947368421, + "loss": 4.5524, + "step": 61 + }, + { + "epoch": 0.006540084388185654, + "grad_norm": 1.1070879697799683, + "learning_rate": 0.0009789473684210528, + "loss": 4.4981, + "step": 62 + }, + { + "epoch": 0.006645569620253164, + "grad_norm": 1.0586819648742676, + "learning_rate": 0.000994736842105263, + "loss": 4.4752, + "step": 63 + }, + { + "epoch": 0.006751054852320675, + "grad_norm": 0.9307000041007996, + "learning_rate": 0.0010105263157894737, + "loss": 4.4389, + "step": 64 + }, + { + "epoch": 0.006856540084388186, + "grad_norm": 0.6454178690910339, + "learning_rate": 0.0010263157894736842, + "loss": 4.4157, + "step": 65 + }, + { + "epoch": 0.006962025316455696, + "grad_norm": 0.714447021484375, + "learning_rate": 0.0010421052631578948, + "loss": 4.3694, + "step": 66 + }, + { + "epoch": 0.007067510548523207, + "grad_norm": 0.5636608004570007, + "learning_rate": 0.0010578947368421053, + "loss": 4.3479, + "step": 67 + }, + { + "epoch": 0.007172995780590718, + "grad_norm": 0.6415608525276184, + "learning_rate": 0.0010736842105263159, + "loss": 4.3142, + "step": 68 + }, + { + "epoch": 0.007278481012658228, + "grad_norm": 0.7091131806373596, + "learning_rate": 0.0010894736842105264, + "loss": 4.2493, + "step": 69 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.0693920850753784, + "learning_rate": 0.0011052631578947368, + "loss": 4.2392, + "step": 70 + }, + { + "epoch": 0.007489451476793249, + "grad_norm": 1.3666249513626099, + "learning_rate": 0.0011210526315789473, + "loss": 4.2674, + "step": 71 + }, + { + "epoch": 0.007594936708860759, + "grad_norm": 1.1438789367675781, + "learning_rate": 0.0011368421052631579, + "loss": 4.2183, + "step": 72 + }, + { + "epoch": 0.00770042194092827, + "grad_norm": 1.14469313621521, + "learning_rate": 0.0011526315789473684, + "loss": 4.1865, + "step": 73 + }, + { + "epoch": 0.007805907172995781, + "grad_norm": 0.9215512275695801, + "learning_rate": 0.001168421052631579, + "loss": 4.1557, + "step": 74 + }, + { + "epoch": 0.007911392405063292, + "grad_norm": 1.0509916543960571, + "learning_rate": 0.0011842105263157896, + "loss": 4.1356, + "step": 75 + }, + { + "epoch": 0.008016877637130802, + "grad_norm": 0.7129611968994141, + "learning_rate": 0.0012000000000000001, + "loss": 4.1133, + "step": 76 + }, + { + "epoch": 0.008122362869198312, + "grad_norm": 0.6992345452308655, + "learning_rate": 0.0012157894736842105, + "loss": 4.0978, + "step": 77 + }, + { + "epoch": 0.008227848101265823, + "grad_norm": 0.6869945526123047, + "learning_rate": 0.001231578947368421, + "loss": 4.0684, + "step": 78 + }, + { + "epoch": 0.008333333333333333, + "grad_norm": 0.9028368592262268, + "learning_rate": 0.0012473684210526316, + "loss": 4.0426, + "step": 79 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.2915066480636597, + "learning_rate": 0.0012631578947368421, + "loss": 4.0491, + "step": 80 + }, + { + "epoch": 0.008544303797468355, + "grad_norm": 0.7310708165168762, + "learning_rate": 0.0012789473684210527, + "loss": 4.012, + "step": 81 + }, + { + "epoch": 0.008649789029535865, + "grad_norm": 1.0205239057540894, + "learning_rate": 0.0012947368421052632, + "loss": 3.9973, + "step": 82 + }, + { + "epoch": 0.008755274261603375, + "grad_norm": 1.4164910316467285, + "learning_rate": 0.0013105263157894738, + "loss": 3.985, + "step": 83 + }, + { + "epoch": 0.008860759493670886, + "grad_norm": 0.7533506155014038, + "learning_rate": 0.0013263157894736841, + "loss": 3.9303, + "step": 84 + }, + { + "epoch": 0.008966244725738396, + "grad_norm": 0.791569709777832, + "learning_rate": 0.0013421052631578947, + "loss": 3.9227, + "step": 85 + }, + { + "epoch": 0.009071729957805906, + "grad_norm": 0.681761622428894, + "learning_rate": 0.0013578947368421052, + "loss": 3.9313, + "step": 86 + }, + { + "epoch": 0.009177215189873418, + "grad_norm": 0.8412415385246277, + "learning_rate": 0.0013736842105263158, + "loss": 3.9217, + "step": 87 + }, + { + "epoch": 0.009282700421940928, + "grad_norm": 0.8644207119941711, + "learning_rate": 0.0013894736842105264, + "loss": 3.8452, + "step": 88 + }, + { + "epoch": 0.009388185654008438, + "grad_norm": 1.0778595209121704, + "learning_rate": 0.001405263157894737, + "loss": 3.8645, + "step": 89 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.128861904144287, + "learning_rate": 0.0014210526315789472, + "loss": 3.8562, + "step": 90 + }, + { + "epoch": 0.00959915611814346, + "grad_norm": 1.4605854749679565, + "learning_rate": 0.0014368421052631578, + "loss": 3.8593, + "step": 91 + }, + { + "epoch": 0.009704641350210971, + "grad_norm": 0.8654713034629822, + "learning_rate": 0.0014526315789473684, + "loss": 3.8271, + "step": 92 + }, + { + "epoch": 0.009810126582278481, + "grad_norm": 1.052846074104309, + "learning_rate": 0.0014684210526315791, + "loss": 3.8126, + "step": 93 + }, + { + "epoch": 0.009915611814345991, + "grad_norm": 1.1422046422958374, + "learning_rate": 0.0014842105263157895, + "loss": 3.8476, + "step": 94 + }, + { + "epoch": 0.010021097046413503, + "grad_norm": 0.8188043236732483, + "learning_rate": 0.0015, + "loss": 3.7875, + "step": 95 + }, + { + "epoch": 0.010126582278481013, + "grad_norm": 0.9520319104194641, + "learning_rate": 0.00149999995797938, + "loss": 3.7896, + "step": 96 + }, + { + "epoch": 0.010232067510548523, + "grad_norm": 1.0652656555175781, + "learning_rate": 0.001499999831917525, + "loss": 3.7584, + "step": 97 + }, + { + "epoch": 0.010337552742616034, + "grad_norm": 1.3198622465133667, + "learning_rate": 0.001499999621814449, + "loss": 3.7555, + "step": 98 + }, + { + "epoch": 0.010443037974683544, + "grad_norm": 0.8654085397720337, + "learning_rate": 0.0014999993276701756, + "loss": 3.7755, + "step": 99 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.6829955577850342, + "learning_rate": 0.0014999989494847376, + "loss": 3.7139, + "step": 100 + }, + { + "epoch": 0.010654008438818566, + "grad_norm": 0.9426255822181702, + "learning_rate": 0.0014999984872581774, + "loss": 3.7199, + "step": 101 + }, + { + "epoch": 0.010759493670886076, + "grad_norm": 0.9915966987609863, + "learning_rate": 0.0014999979409905469, + "loss": 3.6799, + "step": 102 + }, + { + "epoch": 0.010864978902953586, + "grad_norm": 1.029162049293518, + "learning_rate": 0.0014999973106819074, + "loss": 3.6747, + "step": 103 + }, + { + "epoch": 0.010970464135021098, + "grad_norm": 1.0809555053710938, + "learning_rate": 0.0014999965963323294, + "loss": 3.6581, + "step": 104 + }, + { + "epoch": 0.011075949367088608, + "grad_norm": 0.9141569137573242, + "learning_rate": 0.0014999957979418927, + "loss": 3.6373, + "step": 105 + }, + { + "epoch": 0.011181434599156118, + "grad_norm": 0.9079375267028809, + "learning_rate": 0.0014999949155106874, + "loss": 3.6415, + "step": 106 + }, + { + "epoch": 0.01128691983122363, + "grad_norm": 0.8968284726142883, + "learning_rate": 0.0014999939490388115, + "loss": 3.6428, + "step": 107 + }, + { + "epoch": 0.01139240506329114, + "grad_norm": 0.7162794470787048, + "learning_rate": 0.0014999928985263743, + "loss": 3.6101, + "step": 108 + }, + { + "epoch": 0.01149789029535865, + "grad_norm": 0.7564324736595154, + "learning_rate": 0.001499991763973493, + "loss": 3.621, + "step": 109 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.6546834707260132, + "learning_rate": 0.0014999905453802946, + "loss": 3.5727, + "step": 110 + }, + { + "epoch": 0.01170886075949367, + "grad_norm": 0.6213345527648926, + "learning_rate": 0.0014999892427469156, + "loss": 3.5805, + "step": 111 + }, + { + "epoch": 0.01181434599156118, + "grad_norm": 0.6635833382606506, + "learning_rate": 0.0014999878560735024, + "loss": 3.5753, + "step": 112 + }, + { + "epoch": 0.011919831223628692, + "grad_norm": 0.7797887921333313, + "learning_rate": 0.0014999863853602101, + "loss": 3.5457, + "step": 113 + }, + { + "epoch": 0.012025316455696202, + "grad_norm": 0.9612129330635071, + "learning_rate": 0.0014999848306072037, + "loss": 3.573, + "step": 114 + }, + { + "epoch": 0.012130801687763712, + "grad_norm": 1.0195062160491943, + "learning_rate": 0.0014999831918146571, + "loss": 3.5633, + "step": 115 + }, + { + "epoch": 0.012236286919831224, + "grad_norm": 1.2233003377914429, + "learning_rate": 0.001499981468982754, + "loss": 3.5435, + "step": 116 + }, + { + "epoch": 0.012341772151898734, + "grad_norm": 0.7325146198272705, + "learning_rate": 0.001499979662111688, + "loss": 3.5526, + "step": 117 + }, + { + "epoch": 0.012447257383966244, + "grad_norm": 0.919965386390686, + "learning_rate": 0.0014999777712016607, + "loss": 3.5256, + "step": 118 + }, + { + "epoch": 0.012552742616033756, + "grad_norm": 1.0901516675949097, + "learning_rate": 0.0014999757962528846, + "loss": 3.4886, + "step": 119 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.9566196203231812, + "learning_rate": 0.0014999737372655805, + "loss": 3.5064, + "step": 120 + }, + { + "epoch": 0.012763713080168776, + "grad_norm": 0.9801931381225586, + "learning_rate": 0.0014999715942399798, + "loss": 3.5122, + "step": 121 + }, + { + "epoch": 0.012869198312236287, + "grad_norm": 0.7311195135116577, + "learning_rate": 0.001499969367176322, + "loss": 3.4947, + "step": 122 + }, + { + "epoch": 0.012974683544303797, + "grad_norm": 0.8884874582290649, + "learning_rate": 0.0014999670560748573, + "loss": 3.4599, + "step": 123 + }, + { + "epoch": 0.013080168776371307, + "grad_norm": 0.9854843020439148, + "learning_rate": 0.001499964660935844, + "loss": 3.454, + "step": 124 + }, + { + "epoch": 0.013185654008438819, + "grad_norm": 1.1178979873657227, + "learning_rate": 0.0014999621817595509, + "loss": 3.4734, + "step": 125 + }, + { + "epoch": 0.013291139240506329, + "grad_norm": 0.9195834398269653, + "learning_rate": 0.0014999596185462556, + "loss": 3.4436, + "step": 126 + }, + { + "epoch": 0.01339662447257384, + "grad_norm": 0.9643999338150024, + "learning_rate": 0.0014999569712962452, + "loss": 3.4614, + "step": 127 + }, + { + "epoch": 0.01350210970464135, + "grad_norm": 0.9799708724021912, + "learning_rate": 0.0014999542400098169, + "loss": 3.4246, + "step": 128 + }, + { + "epoch": 0.01360759493670886, + "grad_norm": 0.8561819195747375, + "learning_rate": 0.0014999514246872762, + "loss": 3.4144, + "step": 129 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.8281694650650024, + "learning_rate": 0.0014999485253289388, + "loss": 3.4325, + "step": 130 + }, + { + "epoch": 0.013818565400843882, + "grad_norm": 1.3589009046554565, + "learning_rate": 0.0014999455419351297, + "loss": 3.3991, + "step": 131 + }, + { + "epoch": 0.013924050632911392, + "grad_norm": 0.9681662917137146, + "learning_rate": 0.001499942474506183, + "loss": 3.4486, + "step": 132 + }, + { + "epoch": 0.014029535864978904, + "grad_norm": 0.8271952867507935, + "learning_rate": 0.0014999393230424422, + "loss": 3.4166, + "step": 133 + }, + { + "epoch": 0.014135021097046414, + "grad_norm": 1.016274333000183, + "learning_rate": 0.001499936087544261, + "loss": 3.3614, + "step": 134 + }, + { + "epoch": 0.014240506329113924, + "grad_norm": 0.9969285130500793, + "learning_rate": 0.001499932768012002, + "loss": 3.3817, + "step": 135 + }, + { + "epoch": 0.014345991561181435, + "grad_norm": 0.9318091869354248, + "learning_rate": 0.0014999293644460362, + "loss": 3.3661, + "step": 136 + }, + { + "epoch": 0.014451476793248945, + "grad_norm": 1.208132028579712, + "learning_rate": 0.0014999258768467459, + "loss": 3.3827, + "step": 137 + }, + { + "epoch": 0.014556962025316455, + "grad_norm": 0.9803891777992249, + "learning_rate": 0.0014999223052145215, + "loss": 3.3407, + "step": 138 + }, + { + "epoch": 0.014662447257383967, + "grad_norm": 1.0232402086257935, + "learning_rate": 0.0014999186495497636, + "loss": 3.3286, + "step": 139 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.0897318124771118, + "learning_rate": 0.0014999149098528814, + "loss": 3.3331, + "step": 140 + }, + { + "epoch": 0.014873417721518987, + "grad_norm": 1.0326924324035645, + "learning_rate": 0.0014999110861242944, + "loss": 3.341, + "step": 141 + }, + { + "epoch": 0.014978902953586498, + "grad_norm": 0.7731376886367798, + "learning_rate": 0.0014999071783644306, + "loss": 3.3213, + "step": 142 + }, + { + "epoch": 0.015084388185654008, + "grad_norm": 0.7237089276313782, + "learning_rate": 0.001499903186573728, + "loss": 3.307, + "step": 143 + }, + { + "epoch": 0.015189873417721518, + "grad_norm": 0.8910592794418335, + "learning_rate": 0.001499899110752634, + "loss": 3.3199, + "step": 144 + }, + { + "epoch": 0.01529535864978903, + "grad_norm": 0.7983212471008301, + "learning_rate": 0.0014998949509016054, + "loss": 3.3293, + "step": 145 + }, + { + "epoch": 0.01540084388185654, + "grad_norm": 0.8508373498916626, + "learning_rate": 0.0014998907070211084, + "loss": 3.2842, + "step": 146 + }, + { + "epoch": 0.01550632911392405, + "grad_norm": 0.7900875210762024, + "learning_rate": 0.0014998863791116182, + "loss": 3.3113, + "step": 147 + }, + { + "epoch": 0.015611814345991562, + "grad_norm": 0.6923654675483704, + "learning_rate": 0.0014998819671736198, + "loss": 3.2521, + "step": 148 + }, + { + "epoch": 0.015717299578059073, + "grad_norm": 0.6420534253120422, + "learning_rate": 0.001499877471207608, + "loss": 3.2711, + "step": 149 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.6418945789337158, + "learning_rate": 0.0014998728912140862, + "loss": 3.2671, + "step": 150 + }, + { + "epoch": 0.015928270042194093, + "grad_norm": 0.6371487975120544, + "learning_rate": 0.0014998682271935677, + "loss": 3.3004, + "step": 151 + }, + { + "epoch": 0.016033755274261603, + "grad_norm": 0.792375922203064, + "learning_rate": 0.0014998634791465752, + "loss": 3.2312, + "step": 152 + }, + { + "epoch": 0.016139240506329113, + "grad_norm": 0.9434076547622681, + "learning_rate": 0.001499858647073641, + "loss": 3.2927, + "step": 153 + }, + { + "epoch": 0.016244725738396623, + "grad_norm": 1.125985860824585, + "learning_rate": 0.0014998537309753057, + "loss": 3.2413, + "step": 154 + }, + { + "epoch": 0.016350210970464137, + "grad_norm": 1.0840271711349487, + "learning_rate": 0.001499848730852121, + "loss": 3.2529, + "step": 155 + }, + { + "epoch": 0.016455696202531647, + "grad_norm": 1.471110463142395, + "learning_rate": 0.001499843646704647, + "loss": 3.2508, + "step": 156 + }, + { + "epoch": 0.016561181434599156, + "grad_norm": 0.948409378528595, + "learning_rate": 0.0014998384785334532, + "loss": 3.2609, + "step": 157 + }, + { + "epoch": 0.016666666666666666, + "grad_norm": 0.883063793182373, + "learning_rate": 0.0014998332263391192, + "loss": 3.2226, + "step": 158 + }, + { + "epoch": 0.016772151898734176, + "grad_norm": 0.8024165034294128, + "learning_rate": 0.0014998278901222327, + "loss": 3.226, + "step": 159 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.8328113555908203, + "learning_rate": 0.0014998224698833922, + "loss": 3.2249, + "step": 160 + }, + { + "epoch": 0.0169831223628692, + "grad_norm": 0.8236071467399597, + "learning_rate": 0.0014998169656232053, + "loss": 3.1797, + "step": 161 + }, + { + "epoch": 0.01708860759493671, + "grad_norm": 0.8530570268630981, + "learning_rate": 0.0014998113773422883, + "loss": 3.2283, + "step": 162 + }, + { + "epoch": 0.01719409282700422, + "grad_norm": 1.046746850013733, + "learning_rate": 0.0014998057050412674, + "loss": 3.214, + "step": 163 + }, + { + "epoch": 0.01729957805907173, + "grad_norm": 0.9293539524078369, + "learning_rate": 0.0014997999487207786, + "loss": 3.1937, + "step": 164 + }, + { + "epoch": 0.01740506329113924, + "grad_norm": 0.7571192979812622, + "learning_rate": 0.0014997941083814666, + "loss": 3.2023, + "step": 165 + }, + { + "epoch": 0.01751054852320675, + "grad_norm": 0.9504626989364624, + "learning_rate": 0.001499788184023986, + "loss": 3.1936, + "step": 166 + }, + { + "epoch": 0.017616033755274263, + "grad_norm": 0.9948880076408386, + "learning_rate": 0.0014997821756490008, + "loss": 3.1606, + "step": 167 + }, + { + "epoch": 0.017721518987341773, + "grad_norm": 0.855860710144043, + "learning_rate": 0.0014997760832571839, + "loss": 3.1552, + "step": 168 + }, + { + "epoch": 0.017827004219409283, + "grad_norm": 0.9467235803604126, + "learning_rate": 0.001499769906849218, + "loss": 3.1409, + "step": 169 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 1.059126377105713, + "learning_rate": 0.0014997636464257956, + "loss": 3.1568, + "step": 170 + }, + { + "epoch": 0.018037974683544303, + "grad_norm": 1.2082563638687134, + "learning_rate": 0.0014997573019876179, + "loss": 3.1462, + "step": 171 + }, + { + "epoch": 0.018143459915611813, + "grad_norm": 1.1381077766418457, + "learning_rate": 0.0014997508735353957, + "loss": 3.1906, + "step": 172 + }, + { + "epoch": 0.018248945147679326, + "grad_norm": 1.0192127227783203, + "learning_rate": 0.0014997443610698497, + "loss": 3.1718, + "step": 173 + }, + { + "epoch": 0.018354430379746836, + "grad_norm": 0.8140352964401245, + "learning_rate": 0.0014997377645917095, + "loss": 3.1154, + "step": 174 + }, + { + "epoch": 0.018459915611814346, + "grad_norm": 0.9200620055198669, + "learning_rate": 0.001499731084101714, + "loss": 3.1716, + "step": 175 + }, + { + "epoch": 0.018565400843881856, + "grad_norm": 0.7317301630973816, + "learning_rate": 0.0014997243196006125, + "loss": 3.1404, + "step": 176 + }, + { + "epoch": 0.018670886075949366, + "grad_norm": 0.7657119035720825, + "learning_rate": 0.001499717471089162, + "loss": 3.1464, + "step": 177 + }, + { + "epoch": 0.018776371308016876, + "grad_norm": 0.9229580760002136, + "learning_rate": 0.0014997105385681306, + "loss": 3.1328, + "step": 178 + }, + { + "epoch": 0.01888185654008439, + "grad_norm": 1.0139609575271606, + "learning_rate": 0.001499703522038295, + "loss": 3.1525, + "step": 179 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 1.03373384475708, + "learning_rate": 0.0014996964215004416, + "loss": 3.127, + "step": 180 + }, + { + "epoch": 0.01909282700421941, + "grad_norm": 1.160645604133606, + "learning_rate": 0.0014996892369553655, + "loss": 3.1409, + "step": 181 + }, + { + "epoch": 0.01919831223628692, + "grad_norm": 0.9137207269668579, + "learning_rate": 0.0014996819684038726, + "loss": 3.1218, + "step": 182 + }, + { + "epoch": 0.01930379746835443, + "grad_norm": 0.9125514626502991, + "learning_rate": 0.0014996746158467762, + "loss": 3.0961, + "step": 183 + }, + { + "epoch": 0.019409282700421943, + "grad_norm": 0.8952777981758118, + "learning_rate": 0.0014996671792849015, + "loss": 3.1264, + "step": 184 + }, + { + "epoch": 0.019514767932489453, + "grad_norm": 1.0569825172424316, + "learning_rate": 0.001499659658719081, + "loss": 3.0838, + "step": 185 + }, + { + "epoch": 0.019620253164556962, + "grad_norm": 1.2028380632400513, + "learning_rate": 0.0014996520541501574, + "loss": 3.0729, + "step": 186 + }, + { + "epoch": 0.019725738396624472, + "grad_norm": 1.1114720106124878, + "learning_rate": 0.0014996443655789832, + "loss": 3.0613, + "step": 187 + }, + { + "epoch": 0.019831223628691982, + "grad_norm": 0.726711094379425, + "learning_rate": 0.0014996365930064197, + "loss": 3.0578, + "step": 188 + }, + { + "epoch": 0.019936708860759492, + "grad_norm": 0.8056529760360718, + "learning_rate": 0.001499628736433338, + "loss": 3.0649, + "step": 189 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.9793965816497803, + "learning_rate": 0.0014996207958606182, + "loss": 3.0912, + "step": 190 + }, + { + "epoch": 0.020147679324894516, + "grad_norm": 0.9926120042800903, + "learning_rate": 0.0014996127712891504, + "loss": 3.0485, + "step": 191 + }, + { + "epoch": 0.020253164556962026, + "grad_norm": 1.2016924619674683, + "learning_rate": 0.0014996046627198337, + "loss": 3.102, + "step": 192 + }, + { + "epoch": 0.020358649789029536, + "grad_norm": 1.0392396450042725, + "learning_rate": 0.0014995964701535768, + "loss": 3.0328, + "step": 193 + }, + { + "epoch": 0.020464135021097046, + "grad_norm": 1.150130033493042, + "learning_rate": 0.0014995881935912973, + "loss": 3.0741, + "step": 194 + }, + { + "epoch": 0.020569620253164556, + "grad_norm": 0.9841833710670471, + "learning_rate": 0.0014995798330339233, + "loss": 3.0444, + "step": 195 + }, + { + "epoch": 0.02067510548523207, + "grad_norm": 0.9405685067176819, + "learning_rate": 0.001499571388482391, + "loss": 3.0383, + "step": 196 + }, + { + "epoch": 0.02078059071729958, + "grad_norm": 1.025615930557251, + "learning_rate": 0.001499562859937647, + "loss": 3.0365, + "step": 197 + }, + { + "epoch": 0.02088607594936709, + "grad_norm": 1.0500154495239258, + "learning_rate": 0.001499554247400647, + "loss": 3.056, + "step": 198 + }, + { + "epoch": 0.0209915611814346, + "grad_norm": 0.9123498797416687, + "learning_rate": 0.0014995455508723557, + "loss": 3.0539, + "step": 199 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.7662825584411621, + "learning_rate": 0.001499536770353748, + "loss": 2.9924, + "step": 200 + }, + { + "epoch": 0.02120253164556962, + "grad_norm": 0.8258485198020935, + "learning_rate": 0.0014995279058458075, + "loss": 3.0295, + "step": 201 + }, + { + "epoch": 0.021308016877637132, + "grad_norm": 0.8039643168449402, + "learning_rate": 0.001499518957349528, + "loss": 3.019, + "step": 202 + }, + { + "epoch": 0.021413502109704642, + "grad_norm": 0.7900151610374451, + "learning_rate": 0.0014995099248659115, + "loss": 3.0207, + "step": 203 + }, + { + "epoch": 0.021518987341772152, + "grad_norm": 1.0187426805496216, + "learning_rate": 0.001499500808395971, + "loss": 3.0161, + "step": 204 + }, + { + "epoch": 0.021624472573839662, + "grad_norm": 1.2213716506958008, + "learning_rate": 0.0014994916079407272, + "loss": 2.998, + "step": 205 + }, + { + "epoch": 0.021729957805907172, + "grad_norm": 1.0322155952453613, + "learning_rate": 0.0014994823235012114, + "loss": 2.9705, + "step": 206 + }, + { + "epoch": 0.021835443037974682, + "grad_norm": 0.8874404430389404, + "learning_rate": 0.0014994729550784642, + "loss": 2.9919, + "step": 207 + }, + { + "epoch": 0.021940928270042195, + "grad_norm": 0.8917050361633301, + "learning_rate": 0.001499463502673535, + "loss": 2.9676, + "step": 208 + }, + { + "epoch": 0.022046413502109705, + "grad_norm": 0.853066623210907, + "learning_rate": 0.0014994539662874832, + "loss": 2.9772, + "step": 209 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.7352718710899353, + "learning_rate": 0.0014994443459213774, + "loss": 3.0185, + "step": 210 + }, + { + "epoch": 0.022257383966244725, + "grad_norm": 0.7172362804412842, + "learning_rate": 0.0014994346415762956, + "loss": 2.9693, + "step": 211 + }, + { + "epoch": 0.022362869198312235, + "grad_norm": 0.9413065910339355, + "learning_rate": 0.0014994248532533253, + "loss": 2.9696, + "step": 212 + }, + { + "epoch": 0.022468354430379745, + "grad_norm": 1.2420397996902466, + "learning_rate": 0.001499414980953563, + "loss": 2.9994, + "step": 213 + }, + { + "epoch": 0.02257383966244726, + "grad_norm": 1.0437681674957275, + "learning_rate": 0.0014994050246781153, + "loss": 2.9756, + "step": 214 + }, + { + "epoch": 0.02267932489451477, + "grad_norm": 1.0030962228775024, + "learning_rate": 0.0014993949844280977, + "loss": 2.9346, + "step": 215 + }, + { + "epoch": 0.02278481012658228, + "grad_norm": 0.9040054082870483, + "learning_rate": 0.0014993848602046355, + "loss": 2.9625, + "step": 216 + }, + { + "epoch": 0.02289029535864979, + "grad_norm": 0.9244067668914795, + "learning_rate": 0.0014993746520088626, + "loss": 2.9478, + "step": 217 + }, + { + "epoch": 0.0229957805907173, + "grad_norm": 0.9604316353797913, + "learning_rate": 0.0014993643598419234, + "loss": 2.9745, + "step": 218 + }, + { + "epoch": 0.023101265822784812, + "grad_norm": 0.8594542145729065, + "learning_rate": 0.0014993539837049707, + "loss": 2.9654, + "step": 219 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.8930016756057739, + "learning_rate": 0.001499343523599168, + "loss": 2.9243, + "step": 220 + }, + { + "epoch": 0.02331223628691983, + "grad_norm": 1.065418004989624, + "learning_rate": 0.0014993329795256864, + "loss": 2.934, + "step": 221 + }, + { + "epoch": 0.02341772151898734, + "grad_norm": 1.2418255805969238, + "learning_rate": 0.0014993223514857081, + "loss": 2.9131, + "step": 222 + }, + { + "epoch": 0.02352320675105485, + "grad_norm": 1.0544636249542236, + "learning_rate": 0.001499311639480424, + "loss": 2.9247, + "step": 223 + }, + { + "epoch": 0.02362869198312236, + "grad_norm": 1.0941312313079834, + "learning_rate": 0.0014993008435110345, + "loss": 2.9115, + "step": 224 + }, + { + "epoch": 0.023734177215189875, + "grad_norm": 1.1603505611419678, + "learning_rate": 0.0014992899635787487, + "loss": 2.9077, + "step": 225 + }, + { + "epoch": 0.023839662447257385, + "grad_norm": 0.8226144313812256, + "learning_rate": 0.0014992789996847863, + "loss": 2.9441, + "step": 226 + }, + { + "epoch": 0.023945147679324895, + "grad_norm": 0.9195393919944763, + "learning_rate": 0.0014992679518303761, + "loss": 2.8946, + "step": 227 + }, + { + "epoch": 0.024050632911392405, + "grad_norm": 1.0850510597229004, + "learning_rate": 0.001499256820016755, + "loss": 2.9029, + "step": 228 + }, + { + "epoch": 0.024156118143459915, + "grad_norm": 0.964654803276062, + "learning_rate": 0.0014992456042451717, + "loss": 2.8735, + "step": 229 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 1.1028965711593628, + "learning_rate": 0.0014992343045168823, + "loss": 2.9071, + "step": 230 + }, + { + "epoch": 0.024367088607594938, + "grad_norm": 0.919209361076355, + "learning_rate": 0.0014992229208331527, + "loss": 2.8852, + "step": 231 + }, + { + "epoch": 0.024472573839662448, + "grad_norm": 0.8909591436386108, + "learning_rate": 0.0014992114531952592, + "loss": 2.903, + "step": 232 + }, + { + "epoch": 0.024578059071729958, + "grad_norm": 0.8053556680679321, + "learning_rate": 0.0014991999016044865, + "loss": 2.8513, + "step": 233 + }, + { + "epoch": 0.024683544303797468, + "grad_norm": 0.9048803448677063, + "learning_rate": 0.0014991882660621285, + "loss": 2.8975, + "step": 234 + }, + { + "epoch": 0.024789029535864978, + "grad_norm": 1.027725338935852, + "learning_rate": 0.0014991765465694898, + "loss": 2.8317, + "step": 235 + }, + { + "epoch": 0.024894514767932488, + "grad_norm": 1.5395450592041016, + "learning_rate": 0.0014991647431278835, + "loss": 2.8806, + "step": 236 + }, + { + "epoch": 0.025, + "grad_norm": 1.0383232831954956, + "learning_rate": 0.001499152855738632, + "loss": 2.8554, + "step": 237 + }, + { + "epoch": 0.02510548523206751, + "grad_norm": 0.9904637932777405, + "learning_rate": 0.0014991408844030672, + "loss": 2.846, + "step": 238 + }, + { + "epoch": 0.02521097046413502, + "grad_norm": 1.0582256317138672, + "learning_rate": 0.0014991288291225308, + "loss": 2.876, + "step": 239 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.8040115833282471, + "learning_rate": 0.0014991166898983739, + "loss": 2.8756, + "step": 240 + }, + { + "epoch": 0.02542194092827004, + "grad_norm": 0.9016910195350647, + "learning_rate": 0.001499104466731956, + "loss": 2.8246, + "step": 241 + }, + { + "epoch": 0.02552742616033755, + "grad_norm": 1.1995376348495483, + "learning_rate": 0.0014990921596246475, + "loss": 2.8616, + "step": 242 + }, + { + "epoch": 0.025632911392405065, + "grad_norm": 1.2860021591186523, + "learning_rate": 0.0014990797685778272, + "loss": 2.8766, + "step": 243 + }, + { + "epoch": 0.025738396624472575, + "grad_norm": 0.9626737236976624, + "learning_rate": 0.0014990672935928835, + "loss": 2.861, + "step": 244 + }, + { + "epoch": 0.025843881856540084, + "grad_norm": 0.9582166075706482, + "learning_rate": 0.0014990547346712144, + "loss": 2.8416, + "step": 245 + }, + { + "epoch": 0.025949367088607594, + "grad_norm": 1.0527467727661133, + "learning_rate": 0.0014990420918142271, + "loss": 2.8432, + "step": 246 + }, + { + "epoch": 0.026054852320675104, + "grad_norm": 1.0649561882019043, + "learning_rate": 0.0014990293650233384, + "loss": 2.8024, + "step": 247 + }, + { + "epoch": 0.026160337552742614, + "grad_norm": 1.0202749967575073, + "learning_rate": 0.0014990165542999746, + "loss": 2.8613, + "step": 248 + }, + { + "epoch": 0.026265822784810128, + "grad_norm": 1.0614309310913086, + "learning_rate": 0.0014990036596455706, + "loss": 2.8024, + "step": 249 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 1.4857145547866821, + "learning_rate": 0.001498990681061572, + "loss": 2.8019, + "step": 250 + }, + { + "epoch": 0.026476793248945148, + "grad_norm": 1.0893112421035767, + "learning_rate": 0.0014989776185494322, + "loss": 2.8418, + "step": 251 + }, + { + "epoch": 0.026582278481012658, + "grad_norm": 0.9493918418884277, + "learning_rate": 0.001498964472110616, + "loss": 2.8049, + "step": 252 + }, + { + "epoch": 0.026687763713080168, + "grad_norm": 0.9483439326286316, + "learning_rate": 0.001498951241746596, + "loss": 2.8042, + "step": 253 + }, + { + "epoch": 0.02679324894514768, + "grad_norm": 0.7811446785926819, + "learning_rate": 0.0014989379274588546, + "loss": 2.7807, + "step": 254 + }, + { + "epoch": 0.02689873417721519, + "grad_norm": 0.8921216726303101, + "learning_rate": 0.0014989245292488839, + "loss": 2.7797, + "step": 255 + }, + { + "epoch": 0.0270042194092827, + "grad_norm": 1.3596833944320679, + "learning_rate": 0.0014989110471181853, + "loss": 2.7597, + "step": 256 + }, + { + "epoch": 0.02710970464135021, + "grad_norm": 1.2362720966339111, + "learning_rate": 0.0014988974810682695, + "loss": 2.8017, + "step": 257 + }, + { + "epoch": 0.02721518987341772, + "grad_norm": 1.0060396194458008, + "learning_rate": 0.0014988838311006565, + "loss": 2.8115, + "step": 258 + }, + { + "epoch": 0.02732067510548523, + "grad_norm": 0.8425025343894958, + "learning_rate": 0.0014988700972168758, + "loss": 2.8028, + "step": 259 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.8978121876716614, + "learning_rate": 0.001498856279418467, + "loss": 2.7433, + "step": 260 + }, + { + "epoch": 0.027531645569620254, + "grad_norm": 1.0432380437850952, + "learning_rate": 0.0014988423777069775, + "loss": 2.7808, + "step": 261 + }, + { + "epoch": 0.027637130801687764, + "grad_norm": 1.1375938653945923, + "learning_rate": 0.0014988283920839658, + "loss": 2.7603, + "step": 262 + }, + { + "epoch": 0.027742616033755274, + "grad_norm": 1.0427558422088623, + "learning_rate": 0.0014988143225509983, + "loss": 2.7579, + "step": 263 + }, + { + "epoch": 0.027848101265822784, + "grad_norm": 1.2168338298797607, + "learning_rate": 0.0014988001691096525, + "loss": 2.7729, + "step": 264 + }, + { + "epoch": 0.027953586497890294, + "grad_norm": 1.2269620895385742, + "learning_rate": 0.0014987859317615137, + "loss": 2.7781, + "step": 265 + }, + { + "epoch": 0.028059071729957807, + "grad_norm": 1.0744543075561523, + "learning_rate": 0.0014987716105081775, + "loss": 2.7707, + "step": 266 + }, + { + "epoch": 0.028164556962025317, + "grad_norm": 0.9745099544525146, + "learning_rate": 0.001498757205351249, + "loss": 2.7468, + "step": 267 + }, + { + "epoch": 0.028270042194092827, + "grad_norm": 0.8737015724182129, + "learning_rate": 0.0014987427162923416, + "loss": 2.7169, + "step": 268 + }, + { + "epoch": 0.028375527426160337, + "grad_norm": 0.9074501991271973, + "learning_rate": 0.001498728143333079, + "loss": 2.7516, + "step": 269 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 1.268550992012024, + "learning_rate": 0.0014987134864750948, + "loss": 2.7635, + "step": 270 + }, + { + "epoch": 0.028586497890295357, + "grad_norm": 1.0853469371795654, + "learning_rate": 0.0014986987457200312, + "loss": 2.7459, + "step": 271 + }, + { + "epoch": 0.02869198312236287, + "grad_norm": 1.236781120300293, + "learning_rate": 0.0014986839210695394, + "loss": 2.7353, + "step": 272 + }, + { + "epoch": 0.02879746835443038, + "grad_norm": 1.035058617591858, + "learning_rate": 0.0014986690125252814, + "loss": 2.7164, + "step": 273 + }, + { + "epoch": 0.02890295358649789, + "grad_norm": 1.0760972499847412, + "learning_rate": 0.001498654020088927, + "loss": 2.7252, + "step": 274 + }, + { + "epoch": 0.0290084388185654, + "grad_norm": 1.1209136247634888, + "learning_rate": 0.0014986389437621566, + "loss": 2.7559, + "step": 275 + }, + { + "epoch": 0.02911392405063291, + "grad_norm": 1.2214523553848267, + "learning_rate": 0.0014986237835466596, + "loss": 2.7055, + "step": 276 + }, + { + "epoch": 0.02921940928270042, + "grad_norm": 1.0056999921798706, + "learning_rate": 0.0014986085394441343, + "loss": 2.7288, + "step": 277 + }, + { + "epoch": 0.029324894514767934, + "grad_norm": 0.9270488023757935, + "learning_rate": 0.0014985932114562896, + "loss": 2.6592, + "step": 278 + }, + { + "epoch": 0.029430379746835444, + "grad_norm": 0.9012102484703064, + "learning_rate": 0.0014985777995848428, + "loss": 2.7101, + "step": 279 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 1.07014000415802, + "learning_rate": 0.0014985623038315206, + "loss": 2.6911, + "step": 280 + }, + { + "epoch": 0.029641350210970464, + "grad_norm": 1.2546824216842651, + "learning_rate": 0.0014985467241980597, + "loss": 2.6695, + "step": 281 + }, + { + "epoch": 0.029746835443037974, + "grad_norm": 1.0099031925201416, + "learning_rate": 0.0014985310606862058, + "loss": 2.7361, + "step": 282 + }, + { + "epoch": 0.029852320675105484, + "grad_norm": 0.9561591148376465, + "learning_rate": 0.0014985153132977141, + "loss": 2.6583, + "step": 283 + }, + { + "epoch": 0.029957805907172997, + "grad_norm": 0.7750890851020813, + "learning_rate": 0.0014984994820343488, + "loss": 2.6654, + "step": 284 + }, + { + "epoch": 0.030063291139240507, + "grad_norm": 0.916837751865387, + "learning_rate": 0.0014984835668978844, + "loss": 2.7234, + "step": 285 + }, + { + "epoch": 0.030168776371308017, + "grad_norm": 0.8950905799865723, + "learning_rate": 0.0014984675678901042, + "loss": 2.6952, + "step": 286 + }, + { + "epoch": 0.030274261603375527, + "grad_norm": 0.9260502457618713, + "learning_rate": 0.0014984514850128006, + "loss": 2.6892, + "step": 287 + }, + { + "epoch": 0.030379746835443037, + "grad_norm": 1.1801204681396484, + "learning_rate": 0.0014984353182677759, + "loss": 2.6891, + "step": 288 + }, + { + "epoch": 0.03048523206751055, + "grad_norm": 1.1376386880874634, + "learning_rate": 0.001498419067656842, + "loss": 2.6845, + "step": 289 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 1.1935220956802368, + "learning_rate": 0.0014984027331818193, + "loss": 2.669, + "step": 290 + }, + { + "epoch": 0.03069620253164557, + "grad_norm": 1.3063431978225708, + "learning_rate": 0.0014983863148445389, + "loss": 2.6803, + "step": 291 + }, + { + "epoch": 0.03080168776371308, + "grad_norm": 1.0460715293884277, + "learning_rate": 0.0014983698126468398, + "loss": 2.7029, + "step": 292 + }, + { + "epoch": 0.03090717299578059, + "grad_norm": 0.8229396939277649, + "learning_rate": 0.0014983532265905716, + "loss": 2.6714, + "step": 293 + }, + { + "epoch": 0.0310126582278481, + "grad_norm": 0.8519484400749207, + "learning_rate": 0.0014983365566775928, + "loss": 2.6218, + "step": 294 + }, + { + "epoch": 0.031118143459915613, + "grad_norm": 0.8794186115264893, + "learning_rate": 0.0014983198029097711, + "loss": 2.6301, + "step": 295 + }, + { + "epoch": 0.031223628691983123, + "grad_norm": 1.283088207244873, + "learning_rate": 0.0014983029652889843, + "loss": 2.6845, + "step": 296 + }, + { + "epoch": 0.03132911392405063, + "grad_norm": 1.3446671962738037, + "learning_rate": 0.0014982860438171187, + "loss": 2.6995, + "step": 297 + }, + { + "epoch": 0.03143459915611815, + "grad_norm": 1.0857517719268799, + "learning_rate": 0.0014982690384960705, + "loss": 2.663, + "step": 298 + }, + { + "epoch": 0.03154008438818565, + "grad_norm": 1.1367014646530151, + "learning_rate": 0.0014982519493277455, + "loss": 2.6326, + "step": 299 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 1.0733908414840698, + "learning_rate": 0.0014982347763140584, + "loss": 2.6341, + "step": 300 + }, + { + "epoch": 0.03175105485232067, + "grad_norm": 1.2120695114135742, + "learning_rate": 0.0014982175194569337, + "loss": 2.6125, + "step": 301 + }, + { + "epoch": 0.03185654008438819, + "grad_norm": 1.1324199438095093, + "learning_rate": 0.0014982001787583047, + "loss": 2.6296, + "step": 302 + }, + { + "epoch": 0.03196202531645569, + "grad_norm": 0.8142815828323364, + "learning_rate": 0.001498182754220115, + "loss": 2.6257, + "step": 303 + }, + { + "epoch": 0.032067510548523206, + "grad_norm": 0.9081944823265076, + "learning_rate": 0.001498165245844317, + "loss": 2.634, + "step": 304 + }, + { + "epoch": 0.03217299578059072, + "grad_norm": 0.9077059626579285, + "learning_rate": 0.0014981476536328722, + "loss": 2.6392, + "step": 305 + }, + { + "epoch": 0.032278481012658226, + "grad_norm": 0.8216769695281982, + "learning_rate": 0.0014981299775877525, + "loss": 2.6062, + "step": 306 + }, + { + "epoch": 0.03238396624472574, + "grad_norm": 1.132973551750183, + "learning_rate": 0.0014981122177109383, + "loss": 2.6433, + "step": 307 + }, + { + "epoch": 0.032489451476793246, + "grad_norm": 1.2976328134536743, + "learning_rate": 0.0014980943740044196, + "loss": 2.6166, + "step": 308 + }, + { + "epoch": 0.03259493670886076, + "grad_norm": 1.019552230834961, + "learning_rate": 0.0014980764464701958, + "loss": 2.6361, + "step": 309 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 1.09702730178833, + "learning_rate": 0.0014980584351102762, + "loss": 2.6321, + "step": 310 + }, + { + "epoch": 0.03280590717299578, + "grad_norm": 1.3344144821166992, + "learning_rate": 0.0014980403399266786, + "loss": 2.6048, + "step": 311 + }, + { + "epoch": 0.03291139240506329, + "grad_norm": 1.3199617862701416, + "learning_rate": 0.0014980221609214308, + "loss": 2.6243, + "step": 312 + }, + { + "epoch": 0.0330168776371308, + "grad_norm": 0.949766218662262, + "learning_rate": 0.0014980038980965701, + "loss": 2.5865, + "step": 313 + }, + { + "epoch": 0.03312236286919831, + "grad_norm": 1.1385403871536255, + "learning_rate": 0.0014979855514541424, + "loss": 2.5867, + "step": 314 + }, + { + "epoch": 0.03322784810126582, + "grad_norm": 1.6385608911514282, + "learning_rate": 0.0014979671209962044, + "loss": 2.6212, + "step": 315 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 0.8570581078529358, + "learning_rate": 0.0014979486067248204, + "loss": 2.573, + "step": 316 + }, + { + "epoch": 0.033438818565400846, + "grad_norm": 1.4652845859527588, + "learning_rate": 0.0014979300086420655, + "loss": 2.6246, + "step": 317 + }, + { + "epoch": 0.03354430379746835, + "grad_norm": 1.3002411127090454, + "learning_rate": 0.0014979113267500235, + "loss": 2.6007, + "step": 318 + }, + { + "epoch": 0.033649789029535866, + "grad_norm": 1.0077307224273682, + "learning_rate": 0.0014978925610507879, + "loss": 2.5925, + "step": 319 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.9268769025802612, + "learning_rate": 0.001497873711546462, + "loss": 2.6006, + "step": 320 + }, + { + "epoch": 0.033860759493670886, + "grad_norm": 0.997338056564331, + "learning_rate": 0.001497854778239157, + "loss": 2.6043, + "step": 321 + }, + { + "epoch": 0.0339662447257384, + "grad_norm": 0.9850685000419617, + "learning_rate": 0.0014978357611309951, + "loss": 2.5982, + "step": 322 + }, + { + "epoch": 0.034071729957805906, + "grad_norm": 0.9306216835975647, + "learning_rate": 0.0014978166602241068, + "loss": 2.6129, + "step": 323 + }, + { + "epoch": 0.03417721518987342, + "grad_norm": 1.0908622741699219, + "learning_rate": 0.0014977974755206334, + "loss": 2.6414, + "step": 324 + }, + { + "epoch": 0.034282700421940926, + "grad_norm": 0.8892245292663574, + "learning_rate": 0.0014977782070227236, + "loss": 2.5673, + "step": 325 + }, + { + "epoch": 0.03438818565400844, + "grad_norm": 0.9929649829864502, + "learning_rate": 0.001497758854732537, + "loss": 2.5806, + "step": 326 + }, + { + "epoch": 0.03449367088607595, + "grad_norm": 1.3797552585601807, + "learning_rate": 0.001497739418652242, + "loss": 2.6077, + "step": 327 + }, + { + "epoch": 0.03459915611814346, + "grad_norm": 1.0671138763427734, + "learning_rate": 0.0014977198987840168, + "loss": 2.5613, + "step": 328 + }, + { + "epoch": 0.03470464135021097, + "grad_norm": 1.1433043479919434, + "learning_rate": 0.0014977002951300483, + "loss": 2.5677, + "step": 329 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.8440949320793152, + "learning_rate": 0.0014976806076925334, + "loss": 2.543, + "step": 330 + }, + { + "epoch": 0.03491561181434599, + "grad_norm": 0.8378992080688477, + "learning_rate": 0.0014976608364736781, + "loss": 2.5406, + "step": 331 + }, + { + "epoch": 0.0350210970464135, + "grad_norm": 0.8893612623214722, + "learning_rate": 0.001497640981475698, + "loss": 2.5535, + "step": 332 + }, + { + "epoch": 0.03512658227848101, + "grad_norm": 1.1259419918060303, + "learning_rate": 0.0014976210427008177, + "loss": 2.598, + "step": 333 + }, + { + "epoch": 0.035232067510548526, + "grad_norm": 1.1516916751861572, + "learning_rate": 0.0014976010201512718, + "loss": 2.5728, + "step": 334 + }, + { + "epoch": 0.03533755274261603, + "grad_norm": 1.2896422147750854, + "learning_rate": 0.0014975809138293036, + "loss": 2.5654, + "step": 335 + }, + { + "epoch": 0.035443037974683546, + "grad_norm": 1.6328109502792358, + "learning_rate": 0.0014975607237371663, + "loss": 2.5698, + "step": 336 + }, + { + "epoch": 0.03554852320675105, + "grad_norm": 0.9485942125320435, + "learning_rate": 0.0014975404498771222, + "loss": 2.5589, + "step": 337 + }, + { + "epoch": 0.035654008438818566, + "grad_norm": 1.1837468147277832, + "learning_rate": 0.0014975200922514428, + "loss": 2.5815, + "step": 338 + }, + { + "epoch": 0.03575949367088608, + "grad_norm": 1.8870047330856323, + "learning_rate": 0.00149749965086241, + "loss": 2.6032, + "step": 339 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.9463247060775757, + "learning_rate": 0.0014974791257123137, + "loss": 2.5674, + "step": 340 + }, + { + "epoch": 0.0359704641350211, + "grad_norm": 1.62657630443573, + "learning_rate": 0.0014974585168034543, + "loss": 2.5476, + "step": 341 + }, + { + "epoch": 0.036075949367088606, + "grad_norm": 1.084662675857544, + "learning_rate": 0.0014974378241381409, + "loss": 2.5202, + "step": 342 + }, + { + "epoch": 0.03618143459915612, + "grad_norm": 1.0021307468414307, + "learning_rate": 0.001497417047718692, + "loss": 2.5467, + "step": 343 + }, + { + "epoch": 0.036286919831223625, + "grad_norm": 1.395853042602539, + "learning_rate": 0.0014973961875474364, + "loss": 2.5363, + "step": 344 + }, + { + "epoch": 0.03639240506329114, + "grad_norm": 1.0814522504806519, + "learning_rate": 0.0014973752436267106, + "loss": 2.5484, + "step": 345 + }, + { + "epoch": 0.03649789029535865, + "grad_norm": 0.9900476932525635, + "learning_rate": 0.0014973542159588623, + "loss": 2.5019, + "step": 346 + }, + { + "epoch": 0.03660337552742616, + "grad_norm": 1.3664792776107788, + "learning_rate": 0.0014973331045462475, + "loss": 2.5569, + "step": 347 + }, + { + "epoch": 0.03670886075949367, + "grad_norm": 0.9384375810623169, + "learning_rate": 0.0014973119093912317, + "loss": 2.5292, + "step": 348 + }, + { + "epoch": 0.03681434599156118, + "grad_norm": 1.0815021991729736, + "learning_rate": 0.00149729063049619, + "loss": 2.4959, + "step": 349 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 1.2254843711853027, + "learning_rate": 0.001497269267863507, + "loss": 2.5094, + "step": 350 + }, + { + "epoch": 0.037025316455696206, + "grad_norm": 0.934872031211853, + "learning_rate": 0.0014972478214955762, + "loss": 2.4974, + "step": 351 + }, + { + "epoch": 0.03713080168776371, + "grad_norm": 1.0753122568130493, + "learning_rate": 0.0014972262913948008, + "loss": 2.5234, + "step": 352 + }, + { + "epoch": 0.037236286919831225, + "grad_norm": 1.258839726448059, + "learning_rate": 0.0014972046775635934, + "loss": 2.5464, + "step": 353 + }, + { + "epoch": 0.03734177215189873, + "grad_norm": 1.0882951021194458, + "learning_rate": 0.0014971829800043762, + "loss": 2.4666, + "step": 354 + }, + { + "epoch": 0.037447257383966245, + "grad_norm": 1.3751091957092285, + "learning_rate": 0.0014971611987195802, + "loss": 2.5102, + "step": 355 + }, + { + "epoch": 0.03755274261603375, + "grad_norm": 0.9528074860572815, + "learning_rate": 0.0014971393337116462, + "loss": 2.5226, + "step": 356 + }, + { + "epoch": 0.037658227848101265, + "grad_norm": 0.7720966935157776, + "learning_rate": 0.0014971173849830243, + "loss": 2.4614, + "step": 357 + }, + { + "epoch": 0.03776371308016878, + "grad_norm": 0.8414681553840637, + "learning_rate": 0.0014970953525361738, + "loss": 2.4903, + "step": 358 + }, + { + "epoch": 0.037869198312236285, + "grad_norm": 0.8028154373168945, + "learning_rate": 0.001497073236373564, + "loss": 2.465, + "step": 359 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.943872332572937, + "learning_rate": 0.0014970510364976724, + "loss": 2.515, + "step": 360 + }, + { + "epoch": 0.038080168776371305, + "grad_norm": 1.2961680889129639, + "learning_rate": 0.0014970287529109873, + "loss": 2.4938, + "step": 361 + }, + { + "epoch": 0.03818565400843882, + "grad_norm": 1.2172638177871704, + "learning_rate": 0.0014970063856160054, + "loss": 2.5239, + "step": 362 + }, + { + "epoch": 0.03829113924050633, + "grad_norm": 1.337454080581665, + "learning_rate": 0.0014969839346152332, + "loss": 2.4872, + "step": 363 + }, + { + "epoch": 0.03839662447257384, + "grad_norm": 1.1448066234588623, + "learning_rate": 0.001496961399911186, + "loss": 2.4841, + "step": 364 + }, + { + "epoch": 0.03850210970464135, + "grad_norm": 0.9388923048973083, + "learning_rate": 0.0014969387815063897, + "loss": 2.5071, + "step": 365 + }, + { + "epoch": 0.03860759493670886, + "grad_norm": 1.0893713235855103, + "learning_rate": 0.0014969160794033778, + "loss": 2.4758, + "step": 366 + }, + { + "epoch": 0.03871308016877637, + "grad_norm": 1.2458139657974243, + "learning_rate": 0.0014968932936046953, + "loss": 2.4835, + "step": 367 + }, + { + "epoch": 0.038818565400843885, + "grad_norm": 1.191352128982544, + "learning_rate": 0.0014968704241128947, + "loss": 2.5155, + "step": 368 + }, + { + "epoch": 0.03892405063291139, + "grad_norm": 1.0802515745162964, + "learning_rate": 0.0014968474709305384, + "loss": 2.4985, + "step": 369 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.9673506021499634, + "learning_rate": 0.0014968244340601996, + "loss": 2.4577, + "step": 370 + }, + { + "epoch": 0.03913502109704641, + "grad_norm": 1.0400539636611938, + "learning_rate": 0.0014968013135044586, + "loss": 2.4693, + "step": 371 + }, + { + "epoch": 0.039240506329113925, + "grad_norm": 1.3257502317428589, + "learning_rate": 0.0014967781092659065, + "loss": 2.4589, + "step": 372 + }, + { + "epoch": 0.03934599156118143, + "grad_norm": 1.5541125535964966, + "learning_rate": 0.0014967548213471436, + "loss": 2.5057, + "step": 373 + }, + { + "epoch": 0.039451476793248945, + "grad_norm": 0.8203284740447998, + "learning_rate": 0.0014967314497507792, + "loss": 2.5084, + "step": 374 + }, + { + "epoch": 0.03955696202531646, + "grad_norm": 0.8262734413146973, + "learning_rate": 0.0014967079944794323, + "loss": 2.4391, + "step": 375 + }, + { + "epoch": 0.039662447257383965, + "grad_norm": 1.3534080982208252, + "learning_rate": 0.0014966844555357314, + "loss": 2.4774, + "step": 376 + }, + { + "epoch": 0.03976793248945148, + "grad_norm": 1.2121087312698364, + "learning_rate": 0.0014966608329223137, + "loss": 2.4799, + "step": 377 + }, + { + "epoch": 0.039873417721518985, + "grad_norm": 0.8483554124832153, + "learning_rate": 0.0014966371266418267, + "loss": 2.4546, + "step": 378 + }, + { + "epoch": 0.0399789029535865, + "grad_norm": 0.839928925037384, + "learning_rate": 0.0014966133366969264, + "loss": 2.4767, + "step": 379 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 1.0660420656204224, + "learning_rate": 0.001496589463090279, + "loss": 2.4521, + "step": 380 + }, + { + "epoch": 0.04018987341772152, + "grad_norm": 1.360600471496582, + "learning_rate": 0.0014965655058245592, + "loss": 2.468, + "step": 381 + }, + { + "epoch": 0.04029535864978903, + "grad_norm": 1.0799797773361206, + "learning_rate": 0.001496541464902452, + "loss": 2.4763, + "step": 382 + }, + { + "epoch": 0.04040084388185654, + "grad_norm": 0.8181172609329224, + "learning_rate": 0.001496517340326651, + "loss": 2.4661, + "step": 383 + }, + { + "epoch": 0.04050632911392405, + "grad_norm": 0.9945867657661438, + "learning_rate": 0.0014964931320998593, + "loss": 2.4566, + "step": 384 + }, + { + "epoch": 0.04061181434599156, + "grad_norm": 1.309409260749817, + "learning_rate": 0.00149646884022479, + "loss": 2.4439, + "step": 385 + }, + { + "epoch": 0.04071729957805907, + "grad_norm": 1.052648663520813, + "learning_rate": 0.0014964444647041647, + "loss": 2.4253, + "step": 386 + }, + { + "epoch": 0.040822784810126585, + "grad_norm": 1.1055716276168823, + "learning_rate": 0.0014964200055407153, + "loss": 2.4664, + "step": 387 + }, + { + "epoch": 0.04092827004219409, + "grad_norm": 1.1337897777557373, + "learning_rate": 0.0014963954627371823, + "loss": 2.4452, + "step": 388 + }, + { + "epoch": 0.041033755274261605, + "grad_norm": 0.9457775354385376, + "learning_rate": 0.0014963708362963157, + "loss": 2.4255, + "step": 389 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.4046257734298706, + "learning_rate": 0.001496346126220875, + "loss": 2.4448, + "step": 390 + }, + { + "epoch": 0.041244725738396625, + "grad_norm": 1.3065813779830933, + "learning_rate": 0.0014963213325136296, + "loss": 2.4474, + "step": 391 + }, + { + "epoch": 0.04135021097046414, + "grad_norm": 0.7920165061950684, + "learning_rate": 0.0014962964551773572, + "loss": 2.4287, + "step": 392 + }, + { + "epoch": 0.041455696202531644, + "grad_norm": 0.8692680597305298, + "learning_rate": 0.0014962714942148457, + "loss": 2.4117, + "step": 393 + }, + { + "epoch": 0.04156118143459916, + "grad_norm": 1.2590408325195312, + "learning_rate": 0.001496246449628892, + "loss": 2.4137, + "step": 394 + }, + { + "epoch": 0.041666666666666664, + "grad_norm": 1.3338404893875122, + "learning_rate": 0.0014962213214223025, + "loss": 2.4164, + "step": 395 + }, + { + "epoch": 0.04177215189873418, + "grad_norm": 1.0365607738494873, + "learning_rate": 0.001496196109597893, + "loss": 2.4174, + "step": 396 + }, + { + "epoch": 0.04187763713080169, + "grad_norm": 0.9470320343971252, + "learning_rate": 0.0014961708141584885, + "loss": 2.4038, + "step": 397 + }, + { + "epoch": 0.0419831223628692, + "grad_norm": 0.8841492533683777, + "learning_rate": 0.0014961454351069233, + "loss": 2.4133, + "step": 398 + }, + { + "epoch": 0.04208860759493671, + "grad_norm": 0.9154709577560425, + "learning_rate": 0.0014961199724460418, + "loss": 2.4091, + "step": 399 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 1.0864479541778564, + "learning_rate": 0.0014960944261786966, + "loss": 2.4253, + "step": 400 + }, + { + "epoch": 0.04229957805907173, + "grad_norm": 1.486473560333252, + "learning_rate": 0.001496068796307751, + "loss": 2.4241, + "step": 401 + }, + { + "epoch": 0.04240506329113924, + "grad_norm": 0.9453651905059814, + "learning_rate": 0.0014960430828360762, + "loss": 2.3876, + "step": 402 + }, + { + "epoch": 0.04251054852320675, + "grad_norm": 0.8527156710624695, + "learning_rate": 0.001496017285766554, + "loss": 2.4359, + "step": 403 + }, + { + "epoch": 0.042616033755274264, + "grad_norm": 1.0296834707260132, + "learning_rate": 0.0014959914051020748, + "loss": 2.4283, + "step": 404 + }, + { + "epoch": 0.04272151898734177, + "grad_norm": 1.6746788024902344, + "learning_rate": 0.001495965440845539, + "loss": 2.3999, + "step": 405 + }, + { + "epoch": 0.042827004219409284, + "grad_norm": 0.9232812523841858, + "learning_rate": 0.0014959393929998557, + "loss": 2.4299, + "step": 406 + }, + { + "epoch": 0.04293248945147679, + "grad_norm": 0.9611865282058716, + "learning_rate": 0.001495913261567944, + "loss": 2.4506, + "step": 407 + }, + { + "epoch": 0.043037974683544304, + "grad_norm": 1.43991219997406, + "learning_rate": 0.0014958870465527317, + "loss": 2.4205, + "step": 408 + }, + { + "epoch": 0.04314345991561182, + "grad_norm": 1.1359418630599976, + "learning_rate": 0.0014958607479571564, + "loss": 2.4052, + "step": 409 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.929975152015686, + "learning_rate": 0.0014958343657841655, + "loss": 2.3984, + "step": 410 + }, + { + "epoch": 0.04335443037974684, + "grad_norm": 0.8108623623847961, + "learning_rate": 0.0014958079000367147, + "loss": 2.385, + "step": 411 + }, + { + "epoch": 0.043459915611814344, + "grad_norm": 0.8337361216545105, + "learning_rate": 0.0014957813507177696, + "loss": 2.395, + "step": 412 + }, + { + "epoch": 0.04356540084388186, + "grad_norm": 0.8966922163963318, + "learning_rate": 0.0014957547178303054, + "loss": 2.4003, + "step": 413 + }, + { + "epoch": 0.043670886075949364, + "grad_norm": 0.8751859664916992, + "learning_rate": 0.0014957280013773065, + "loss": 2.3752, + "step": 414 + }, + { + "epoch": 0.04377637130801688, + "grad_norm": 1.363107442855835, + "learning_rate": 0.0014957012013617663, + "loss": 2.4121, + "step": 415 + }, + { + "epoch": 0.04388185654008439, + "grad_norm": 1.1707082986831665, + "learning_rate": 0.0014956743177866882, + "loss": 2.3931, + "step": 416 + }, + { + "epoch": 0.0439873417721519, + "grad_norm": 1.1500344276428223, + "learning_rate": 0.0014956473506550845, + "loss": 2.4038, + "step": 417 + }, + { + "epoch": 0.04409282700421941, + "grad_norm": 0.9956418871879578, + "learning_rate": 0.0014956202999699773, + "loss": 2.4178, + "step": 418 + }, + { + "epoch": 0.04419831223628692, + "grad_norm": 0.973657488822937, + "learning_rate": 0.001495593165734397, + "loss": 2.3995, + "step": 419 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.9981339573860168, + "learning_rate": 0.001495565947951385, + "loss": 2.358, + "step": 420 + }, + { + "epoch": 0.044409282700421944, + "grad_norm": 0.8396971225738525, + "learning_rate": 0.0014955386466239907, + "loss": 2.379, + "step": 421 + }, + { + "epoch": 0.04451476793248945, + "grad_norm": 0.7592825889587402, + "learning_rate": 0.0014955112617552734, + "loss": 2.3662, + "step": 422 + }, + { + "epoch": 0.044620253164556964, + "grad_norm": 0.950877845287323, + "learning_rate": 0.001495483793348302, + "loss": 2.3775, + "step": 423 + }, + { + "epoch": 0.04472573839662447, + "grad_norm": 0.9075492024421692, + "learning_rate": 0.0014954562414061538, + "loss": 2.3758, + "step": 424 + }, + { + "epoch": 0.044831223628691984, + "grad_norm": 1.042842984199524, + "learning_rate": 0.0014954286059319167, + "loss": 2.3463, + "step": 425 + }, + { + "epoch": 0.04493670886075949, + "grad_norm": 1.4204667806625366, + "learning_rate": 0.0014954008869286876, + "loss": 2.3919, + "step": 426 + }, + { + "epoch": 0.045042194092827004, + "grad_norm": 1.1118993759155273, + "learning_rate": 0.001495373084399572, + "loss": 2.3808, + "step": 427 + }, + { + "epoch": 0.04514767932489452, + "grad_norm": 0.9536285996437073, + "learning_rate": 0.0014953451983476854, + "loss": 2.3734, + "step": 428 + }, + { + "epoch": 0.045253164556962024, + "grad_norm": 0.8951354026794434, + "learning_rate": 0.0014953172287761529, + "loss": 2.3533, + "step": 429 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.8978113532066345, + "learning_rate": 0.0014952891756881085, + "loss": 2.3332, + "step": 430 + }, + { + "epoch": 0.045464135021097044, + "grad_norm": 1.247363805770874, + "learning_rate": 0.0014952610390866954, + "loss": 2.3176, + "step": 431 + }, + { + "epoch": 0.04556962025316456, + "grad_norm": 1.439074158668518, + "learning_rate": 0.0014952328189750666, + "loss": 2.3589, + "step": 432 + }, + { + "epoch": 0.04567510548523207, + "grad_norm": 0.9334609508514404, + "learning_rate": 0.0014952045153563845, + "loss": 2.3469, + "step": 433 + }, + { + "epoch": 0.04578059071729958, + "grad_norm": 0.7827621102333069, + "learning_rate": 0.0014951761282338205, + "loss": 2.358, + "step": 434 + }, + { + "epoch": 0.04588607594936709, + "grad_norm": 0.785163164138794, + "learning_rate": 0.0014951476576105555, + "loss": 2.3472, + "step": 435 + }, + { + "epoch": 0.0459915611814346, + "grad_norm": 0.7828155159950256, + "learning_rate": 0.00149511910348978, + "loss": 2.3507, + "step": 436 + }, + { + "epoch": 0.04609704641350211, + "grad_norm": 0.7457301616668701, + "learning_rate": 0.0014950904658746933, + "loss": 2.3371, + "step": 437 + }, + { + "epoch": 0.046202531645569624, + "grad_norm": 0.7861217260360718, + "learning_rate": 0.0014950617447685047, + "loss": 2.3663, + "step": 438 + }, + { + "epoch": 0.04630801687763713, + "grad_norm": 0.7720435857772827, + "learning_rate": 0.001495032940174432, + "loss": 2.3201, + "step": 439 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.9602258205413818, + "learning_rate": 0.0014950040520957037, + "loss": 2.3564, + "step": 440 + }, + { + "epoch": 0.04651898734177215, + "grad_norm": 1.7887842655181885, + "learning_rate": 0.0014949750805355563, + "loss": 2.3611, + "step": 441 + }, + { + "epoch": 0.04662447257383966, + "grad_norm": 0.9026731848716736, + "learning_rate": 0.0014949460254972363, + "loss": 2.3616, + "step": 442 + }, + { + "epoch": 0.04672995780590717, + "grad_norm": 0.980740487575531, + "learning_rate": 0.0014949168869839997, + "loss": 2.3321, + "step": 443 + }, + { + "epoch": 0.04683544303797468, + "grad_norm": 1.20274817943573, + "learning_rate": 0.0014948876649991112, + "loss": 2.357, + "step": 444 + }, + { + "epoch": 0.0469409282700422, + "grad_norm": 1.447656273841858, + "learning_rate": 0.0014948583595458455, + "loss": 2.3501, + "step": 445 + }, + { + "epoch": 0.0470464135021097, + "grad_norm": 1.2681281566619873, + "learning_rate": 0.0014948289706274865, + "loss": 2.3762, + "step": 446 + }, + { + "epoch": 0.04715189873417722, + "grad_norm": 0.930767834186554, + "learning_rate": 0.0014947994982473273, + "loss": 2.3539, + "step": 447 + }, + { + "epoch": 0.04725738396624472, + "grad_norm": 0.9533438682556152, + "learning_rate": 0.0014947699424086704, + "loss": 2.3239, + "step": 448 + }, + { + "epoch": 0.04736286919831224, + "grad_norm": 0.9127470254898071, + "learning_rate": 0.0014947403031148278, + "loss": 2.3652, + "step": 449 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 1.127302885055542, + "learning_rate": 0.0014947105803691204, + "loss": 2.3331, + "step": 450 + }, + { + "epoch": 0.047573839662447256, + "grad_norm": 0.8813461661338806, + "learning_rate": 0.0014946807741748791, + "loss": 2.3377, + "step": 451 + }, + { + "epoch": 0.04767932489451477, + "grad_norm": 1.0821599960327148, + "learning_rate": 0.001494650884535444, + "loss": 2.3146, + "step": 452 + }, + { + "epoch": 0.047784810126582276, + "grad_norm": 1.20368492603302, + "learning_rate": 0.0014946209114541636, + "loss": 2.3783, + "step": 453 + }, + { + "epoch": 0.04789029535864979, + "grad_norm": 1.365116834640503, + "learning_rate": 0.0014945908549343974, + "loss": 2.3574, + "step": 454 + }, + { + "epoch": 0.047995780590717296, + "grad_norm": 0.9531636238098145, + "learning_rate": 0.001494560714979513, + "loss": 2.3273, + "step": 455 + }, + { + "epoch": 0.04810126582278481, + "grad_norm": 0.9679948091506958, + "learning_rate": 0.0014945304915928875, + "loss": 2.2929, + "step": 456 + }, + { + "epoch": 0.04820675105485232, + "grad_norm": 1.059843897819519, + "learning_rate": 0.0014945001847779082, + "loss": 2.3088, + "step": 457 + }, + { + "epoch": 0.04831223628691983, + "grad_norm": 0.8817211985588074, + "learning_rate": 0.0014944697945379708, + "loss": 2.2968, + "step": 458 + }, + { + "epoch": 0.04841772151898734, + "grad_norm": 0.8478899598121643, + "learning_rate": 0.0014944393208764805, + "loss": 2.2928, + "step": 459 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.8140817880630493, + "learning_rate": 0.0014944087637968522, + "loss": 2.3398, + "step": 460 + }, + { + "epoch": 0.04862869198312236, + "grad_norm": 0.943507969379425, + "learning_rate": 0.00149437812330251, + "loss": 2.3089, + "step": 461 + }, + { + "epoch": 0.048734177215189876, + "grad_norm": 0.9551388621330261, + "learning_rate": 0.0014943473993968871, + "loss": 2.3241, + "step": 462 + }, + { + "epoch": 0.04883966244725738, + "grad_norm": 0.8838210105895996, + "learning_rate": 0.0014943165920834266, + "loss": 2.2959, + "step": 463 + }, + { + "epoch": 0.048945147679324896, + "grad_norm": 0.8404055833816528, + "learning_rate": 0.0014942857013655806, + "loss": 2.2921, + "step": 464 + }, + { + "epoch": 0.0490506329113924, + "grad_norm": 1.2387079000473022, + "learning_rate": 0.0014942547272468103, + "loss": 2.3118, + "step": 465 + }, + { + "epoch": 0.049156118143459916, + "grad_norm": 1.2832729816436768, + "learning_rate": 0.0014942236697305866, + "loss": 2.2899, + "step": 466 + }, + { + "epoch": 0.04926160337552743, + "grad_norm": 1.2614774703979492, + "learning_rate": 0.0014941925288203897, + "loss": 2.3325, + "step": 467 + }, + { + "epoch": 0.049367088607594936, + "grad_norm": 1.1121723651885986, + "learning_rate": 0.001494161304519709, + "loss": 2.3153, + "step": 468 + }, + { + "epoch": 0.04947257383966245, + "grad_norm": 0.9853944182395935, + "learning_rate": 0.0014941299968320434, + "loss": 2.2899, + "step": 469 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 1.002018928527832, + "learning_rate": 0.0014940986057609012, + "loss": 2.28, + "step": 470 + }, + { + "epoch": 0.04968354430379747, + "grad_norm": 1.3494192361831665, + "learning_rate": 0.0014940671313097998, + "loss": 2.2995, + "step": 471 + }, + { + "epoch": 0.049789029535864976, + "grad_norm": 1.049818754196167, + "learning_rate": 0.001494035573482266, + "loss": 2.2883, + "step": 472 + }, + { + "epoch": 0.04989451476793249, + "grad_norm": 0.8760460615158081, + "learning_rate": 0.0014940039322818362, + "loss": 2.3237, + "step": 473 + }, + { + "epoch": 0.05, + "grad_norm": 0.8347345590591431, + "learning_rate": 0.0014939722077120558, + "loss": 2.3138, + "step": 474 + }, + { + "epoch": 0.05010548523206751, + "grad_norm": 0.8713938593864441, + "learning_rate": 0.0014939403997764795, + "loss": 2.3059, + "step": 475 + }, + { + "epoch": 0.05021097046413502, + "grad_norm": 1.0673741102218628, + "learning_rate": 0.001493908508478672, + "loss": 2.2894, + "step": 476 + }, + { + "epoch": 0.05031645569620253, + "grad_norm": 1.2919914722442627, + "learning_rate": 0.0014938765338222068, + "loss": 2.2673, + "step": 477 + }, + { + "epoch": 0.05042194092827004, + "grad_norm": 1.1538162231445312, + "learning_rate": 0.0014938444758106665, + "loss": 2.3501, + "step": 478 + }, + { + "epoch": 0.050527426160337556, + "grad_norm": 1.0773078203201294, + "learning_rate": 0.0014938123344476436, + "loss": 2.2824, + "step": 479 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.0545172691345215, + "learning_rate": 0.0014937801097367396, + "loss": 2.292, + "step": 480 + }, + { + "epoch": 0.050738396624472576, + "grad_norm": 1.3733197450637817, + "learning_rate": 0.0014937478016815657, + "loss": 2.2623, + "step": 481 + }, + { + "epoch": 0.05084388185654008, + "grad_norm": 1.1418392658233643, + "learning_rate": 0.0014937154102857416, + "loss": 2.2997, + "step": 482 + }, + { + "epoch": 0.050949367088607596, + "grad_norm": 0.8705460429191589, + "learning_rate": 0.0014936829355528976, + "loss": 2.3197, + "step": 483 + }, + { + "epoch": 0.0510548523206751, + "grad_norm": 0.7907448410987854, + "learning_rate": 0.0014936503774866721, + "loss": 2.2726, + "step": 484 + }, + { + "epoch": 0.051160337552742616, + "grad_norm": 1.04575514793396, + "learning_rate": 0.0014936177360907138, + "loss": 2.2371, + "step": 485 + }, + { + "epoch": 0.05126582278481013, + "grad_norm": 1.2633823156356812, + "learning_rate": 0.00149358501136868, + "loss": 2.248, + "step": 486 + }, + { + "epoch": 0.051371308016877636, + "grad_norm": 1.2141749858856201, + "learning_rate": 0.0014935522033242379, + "loss": 2.2988, + "step": 487 + }, + { + "epoch": 0.05147679324894515, + "grad_norm": 1.2107560634613037, + "learning_rate": 0.0014935193119610638, + "loss": 2.2568, + "step": 488 + }, + { + "epoch": 0.051582278481012656, + "grad_norm": 0.9247601628303528, + "learning_rate": 0.0014934863372828432, + "loss": 2.2614, + "step": 489 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.8669816255569458, + "learning_rate": 0.001493453279293271, + "loss": 2.2846, + "step": 490 + }, + { + "epoch": 0.05179324894514768, + "grad_norm": 0.8475592732429504, + "learning_rate": 0.001493420137996052, + "loss": 2.2873, + "step": 491 + }, + { + "epoch": 0.05189873417721519, + "grad_norm": 1.2943617105484009, + "learning_rate": 0.0014933869133948992, + "loss": 2.2803, + "step": 492 + }, + { + "epoch": 0.0520042194092827, + "grad_norm": 1.7053472995758057, + "learning_rate": 0.0014933536054935362, + "loss": 2.2791, + "step": 493 + }, + { + "epoch": 0.05210970464135021, + "grad_norm": 0.841754674911499, + "learning_rate": 0.0014933202142956947, + "loss": 2.2795, + "step": 494 + }, + { + "epoch": 0.05221518987341772, + "grad_norm": 1.110651969909668, + "learning_rate": 0.0014932867398051168, + "loss": 2.2647, + "step": 495 + }, + { + "epoch": 0.05232067510548523, + "grad_norm": 1.7646479606628418, + "learning_rate": 0.0014932531820255534, + "loss": 2.2577, + "step": 496 + }, + { + "epoch": 0.05242616033755274, + "grad_norm": 0.8262982368469238, + "learning_rate": 0.0014932195409607645, + "loss": 2.2873, + "step": 497 + }, + { + "epoch": 0.052531645569620256, + "grad_norm": 1.7849030494689941, + "learning_rate": 0.0014931858166145203, + "loss": 2.2613, + "step": 498 + }, + { + "epoch": 0.05263713080168776, + "grad_norm": 0.8727604150772095, + "learning_rate": 0.0014931520089905993, + "loss": 2.2867, + "step": 499 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 1.8944811820983887, + "learning_rate": 0.0014931181180927902, + "loss": 2.2811, + "step": 500 + }, + { + "epoch": 0.05284810126582278, + "grad_norm": 0.9346913695335388, + "learning_rate": 0.0014930841439248904, + "loss": 2.2711, + "step": 501 + }, + { + "epoch": 0.052953586497890295, + "grad_norm": 1.2265738248825073, + "learning_rate": 0.0014930500864907066, + "loss": 2.305, + "step": 502 + }, + { + "epoch": 0.05305907172995781, + "grad_norm": 1.288548231124878, + "learning_rate": 0.001493015945794056, + "loss": 2.2406, + "step": 503 + }, + { + "epoch": 0.053164556962025315, + "grad_norm": 0.8754206895828247, + "learning_rate": 0.0014929817218387632, + "loss": 2.2446, + "step": 504 + }, + { + "epoch": 0.05327004219409283, + "grad_norm": 1.2048087120056152, + "learning_rate": 0.0014929474146286638, + "loss": 2.2605, + "step": 505 + }, + { + "epoch": 0.053375527426160335, + "grad_norm": 1.3326847553253174, + "learning_rate": 0.001492913024167602, + "loss": 2.2947, + "step": 506 + }, + { + "epoch": 0.05348101265822785, + "grad_norm": 0.9137036800384521, + "learning_rate": 0.001492878550459431, + "loss": 2.257, + "step": 507 + }, + { + "epoch": 0.05358649789029536, + "grad_norm": 1.084608793258667, + "learning_rate": 0.0014928439935080143, + "loss": 2.2487, + "step": 508 + }, + { + "epoch": 0.05369198312236287, + "grad_norm": 1.1577681303024292, + "learning_rate": 0.0014928093533172243, + "loss": 2.202, + "step": 509 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 1.0054048299789429, + "learning_rate": 0.001492774629890942, + "loss": 2.2735, + "step": 510 + }, + { + "epoch": 0.05390295358649789, + "grad_norm": 0.9157722592353821, + "learning_rate": 0.0014927398232330584, + "loss": 2.2307, + "step": 511 + }, + { + "epoch": 0.0540084388185654, + "grad_norm": 0.9924795627593994, + "learning_rate": 0.0014927049333474743, + "loss": 2.2441, + "step": 512 + }, + { + "epoch": 0.05411392405063291, + "grad_norm": 1.0030115842819214, + "learning_rate": 0.001492669960238099, + "loss": 2.2478, + "step": 513 + }, + { + "epoch": 0.05421940928270042, + "grad_norm": 0.8482292890548706, + "learning_rate": 0.001492634903908851, + "loss": 2.2519, + "step": 514 + }, + { + "epoch": 0.054324894514767935, + "grad_norm": 0.9209276437759399, + "learning_rate": 0.001492599764363659, + "loss": 2.2097, + "step": 515 + }, + { + "epoch": 0.05443037974683544, + "grad_norm": 1.051873803138733, + "learning_rate": 0.0014925645416064605, + "loss": 2.2442, + "step": 516 + }, + { + "epoch": 0.054535864978902955, + "grad_norm": 0.8542136549949646, + "learning_rate": 0.0014925292356412025, + "loss": 2.2411, + "step": 517 + }, + { + "epoch": 0.05464135021097046, + "grad_norm": 0.791685163974762, + "learning_rate": 0.001492493846471841, + "loss": 2.1907, + "step": 518 + }, + { + "epoch": 0.054746835443037975, + "grad_norm": 1.0013729333877563, + "learning_rate": 0.0014924583741023417, + "loss": 2.2528, + "step": 519 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 1.2253073453903198, + "learning_rate": 0.001492422818536679, + "loss": 2.262, + "step": 520 + }, + { + "epoch": 0.054957805907172995, + "grad_norm": 0.8693603277206421, + "learning_rate": 0.0014923871797788378, + "loss": 2.2419, + "step": 521 + }, + { + "epoch": 0.05506329113924051, + "grad_norm": 0.7842398285865784, + "learning_rate": 0.001492351457832811, + "loss": 2.2004, + "step": 522 + }, + { + "epoch": 0.055168776371308015, + "grad_norm": 0.93869549036026, + "learning_rate": 0.0014923156527026017, + "loss": 2.2488, + "step": 523 + }, + { + "epoch": 0.05527426160337553, + "grad_norm": 0.8990471363067627, + "learning_rate": 0.001492279764392222, + "loss": 2.2503, + "step": 524 + }, + { + "epoch": 0.055379746835443035, + "grad_norm": 0.8321981430053711, + "learning_rate": 0.0014922437929056934, + "loss": 2.2274, + "step": 525 + }, + { + "epoch": 0.05548523206751055, + "grad_norm": 1.0824365615844727, + "learning_rate": 0.0014922077382470468, + "loss": 2.225, + "step": 526 + }, + { + "epoch": 0.05559071729957806, + "grad_norm": 1.2773563861846924, + "learning_rate": 0.001492171600420322, + "loss": 2.2103, + "step": 527 + }, + { + "epoch": 0.05569620253164557, + "grad_norm": 0.9253377914428711, + "learning_rate": 0.0014921353794295684, + "loss": 2.2391, + "step": 528 + }, + { + "epoch": 0.05580168776371308, + "grad_norm": 0.9512479901313782, + "learning_rate": 0.001492099075278845, + "loss": 2.2408, + "step": 529 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.9813757538795471, + "learning_rate": 0.00149206268797222, + "loss": 2.2201, + "step": 530 + }, + { + "epoch": 0.0560126582278481, + "grad_norm": 1.1339943408966064, + "learning_rate": 0.0014920262175137703, + "loss": 2.2282, + "step": 531 + }, + { + "epoch": 0.056118143459915615, + "grad_norm": 1.2923930883407593, + "learning_rate": 0.001491989663907583, + "loss": 2.194, + "step": 532 + }, + { + "epoch": 0.05622362869198312, + "grad_norm": 0.8682931065559387, + "learning_rate": 0.001491953027157754, + "loss": 2.2174, + "step": 533 + }, + { + "epoch": 0.056329113924050635, + "grad_norm": 0.8394701480865479, + "learning_rate": 0.0014919163072683883, + "loss": 2.2074, + "step": 534 + }, + { + "epoch": 0.05643459915611814, + "grad_norm": 1.2486443519592285, + "learning_rate": 0.0014918795042436013, + "loss": 2.2112, + "step": 535 + }, + { + "epoch": 0.056540084388185655, + "grad_norm": 1.269212245941162, + "learning_rate": 0.001491842618087516, + "loss": 2.2163, + "step": 536 + }, + { + "epoch": 0.05664556962025316, + "grad_norm": 0.876746416091919, + "learning_rate": 0.0014918056488042665, + "loss": 2.1706, + "step": 537 + }, + { + "epoch": 0.056751054852320675, + "grad_norm": 0.7463722825050354, + "learning_rate": 0.0014917685963979949, + "loss": 2.2042, + "step": 538 + }, + { + "epoch": 0.05685654008438819, + "grad_norm": 0.8344563841819763, + "learning_rate": 0.0014917314608728536, + "loss": 2.2189, + "step": 539 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 1.253917932510376, + "learning_rate": 0.0014916942422330032, + "loss": 2.2228, + "step": 540 + }, + { + "epoch": 0.05706751054852321, + "grad_norm": 1.1135059595108032, + "learning_rate": 0.0014916569404826146, + "loss": 2.2496, + "step": 541 + }, + { + "epoch": 0.057172995780590714, + "grad_norm": 0.940627932548523, + "learning_rate": 0.0014916195556258676, + "loss": 2.2208, + "step": 542 + }, + { + "epoch": 0.05727848101265823, + "grad_norm": 0.8965739011764526, + "learning_rate": 0.0014915820876669514, + "loss": 2.1859, + "step": 543 + }, + { + "epoch": 0.05738396624472574, + "grad_norm": 0.8374440670013428, + "learning_rate": 0.0014915445366100641, + "loss": 2.1958, + "step": 544 + }, + { + "epoch": 0.05748945147679325, + "grad_norm": 0.8810030817985535, + "learning_rate": 0.0014915069024594144, + "loss": 2.2204, + "step": 545 + }, + { + "epoch": 0.05759493670886076, + "grad_norm": 1.0695466995239258, + "learning_rate": 0.0014914691852192183, + "loss": 2.2058, + "step": 546 + }, + { + "epoch": 0.05770042194092827, + "grad_norm": 1.2655460834503174, + "learning_rate": 0.001491431384893703, + "loss": 2.2143, + "step": 547 + }, + { + "epoch": 0.05780590717299578, + "grad_norm": 0.8625794649124146, + "learning_rate": 0.0014913935014871035, + "loss": 2.2225, + "step": 548 + }, + { + "epoch": 0.057911392405063294, + "grad_norm": 0.9526817202568054, + "learning_rate": 0.0014913555350036657, + "loss": 2.2124, + "step": 549 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.9690890908241272, + "learning_rate": 0.001491317485447643, + "loss": 2.1759, + "step": 550 + }, + { + "epoch": 0.058122362869198314, + "grad_norm": 0.9259915947914124, + "learning_rate": 0.0014912793528233, + "loss": 2.1753, + "step": 551 + }, + { + "epoch": 0.05822784810126582, + "grad_norm": 1.0174555778503418, + "learning_rate": 0.0014912411371349088, + "loss": 2.1995, + "step": 552 + }, + { + "epoch": 0.058333333333333334, + "grad_norm": 1.3493752479553223, + "learning_rate": 0.0014912028383867522, + "loss": 2.2, + "step": 553 + }, + { + "epoch": 0.05843881856540084, + "grad_norm": 1.090035319328308, + "learning_rate": 0.0014911644565831217, + "loss": 2.1321, + "step": 554 + }, + { + "epoch": 0.058544303797468354, + "grad_norm": 1.3238784074783325, + "learning_rate": 0.001491125991728318, + "loss": 2.1567, + "step": 555 + }, + { + "epoch": 0.05864978902953587, + "grad_norm": 0.8985042572021484, + "learning_rate": 0.001491087443826651, + "loss": 2.2042, + "step": 556 + }, + { + "epoch": 0.058755274261603374, + "grad_norm": 0.8639411926269531, + "learning_rate": 0.0014910488128824409, + "loss": 2.2248, + "step": 557 + }, + { + "epoch": 0.05886075949367089, + "grad_norm": 0.8204599022865295, + "learning_rate": 0.0014910100989000159, + "loss": 2.1738, + "step": 558 + }, + { + "epoch": 0.058966244725738394, + "grad_norm": 0.9084746837615967, + "learning_rate": 0.0014909713018837144, + "loss": 2.1446, + "step": 559 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 1.3453181982040405, + "learning_rate": 0.0014909324218378838, + "loss": 2.1813, + "step": 560 + }, + { + "epoch": 0.05917721518987342, + "grad_norm": 1.1253858804702759, + "learning_rate": 0.0014908934587668805, + "loss": 2.1817, + "step": 561 + }, + { + "epoch": 0.05928270042194093, + "grad_norm": 0.815824568271637, + "learning_rate": 0.001490854412675071, + "loss": 2.1666, + "step": 562 + }, + { + "epoch": 0.05938818565400844, + "grad_norm": 0.7732762098312378, + "learning_rate": 0.0014908152835668301, + "loss": 2.2055, + "step": 563 + }, + { + "epoch": 0.05949367088607595, + "grad_norm": 0.7473286390304565, + "learning_rate": 0.0014907760714465428, + "loss": 2.1801, + "step": 564 + }, + { + "epoch": 0.05959915611814346, + "grad_norm": 0.8543726801872253, + "learning_rate": 0.0014907367763186026, + "loss": 2.183, + "step": 565 + }, + { + "epoch": 0.05970464135021097, + "grad_norm": 1.0209376811981201, + "learning_rate": 0.0014906973981874132, + "loss": 2.1949, + "step": 566 + }, + { + "epoch": 0.05981012658227848, + "grad_norm": 1.235073208808899, + "learning_rate": 0.0014906579370573868, + "loss": 2.2046, + "step": 567 + }, + { + "epoch": 0.059915611814345994, + "grad_norm": 1.1444753408432007, + "learning_rate": 0.0014906183929329455, + "loss": 2.1446, + "step": 568 + }, + { + "epoch": 0.0600210970464135, + "grad_norm": 0.9577688574790955, + "learning_rate": 0.00149057876581852, + "loss": 2.1166, + "step": 569 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.907133162021637, + "learning_rate": 0.0014905390557185508, + "loss": 2.2081, + "step": 570 + }, + { + "epoch": 0.06023206751054852, + "grad_norm": 1.0217584371566772, + "learning_rate": 0.0014904992626374879, + "loss": 2.1942, + "step": 571 + }, + { + "epoch": 0.060337552742616034, + "grad_norm": 1.1450847387313843, + "learning_rate": 0.0014904593865797903, + "loss": 2.1773, + "step": 572 + }, + { + "epoch": 0.06044303797468355, + "grad_norm": 1.208625078201294, + "learning_rate": 0.0014904194275499258, + "loss": 2.1945, + "step": 573 + }, + { + "epoch": 0.060548523206751054, + "grad_norm": 1.1545144319534302, + "learning_rate": 0.0014903793855523726, + "loss": 2.1552, + "step": 574 + }, + { + "epoch": 0.06065400843881857, + "grad_norm": 0.8768011331558228, + "learning_rate": 0.0014903392605916175, + "loss": 2.206, + "step": 575 + }, + { + "epoch": 0.060759493670886074, + "grad_norm": 0.8840968608856201, + "learning_rate": 0.0014902990526721564, + "loss": 2.1787, + "step": 576 + }, + { + "epoch": 0.06086497890295359, + "grad_norm": 0.861977756023407, + "learning_rate": 0.0014902587617984951, + "loss": 2.1647, + "step": 577 + }, + { + "epoch": 0.0609704641350211, + "grad_norm": 1.0897611379623413, + "learning_rate": 0.0014902183879751483, + "loss": 2.1708, + "step": 578 + }, + { + "epoch": 0.06107594936708861, + "grad_norm": 1.0341901779174805, + "learning_rate": 0.0014901779312066399, + "loss": 2.1711, + "step": 579 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.0672699213027954, + "learning_rate": 0.0014901373914975036, + "loss": 2.1801, + "step": 580 + }, + { + "epoch": 0.06128691983122363, + "grad_norm": 1.2755062580108643, + "learning_rate": 0.0014900967688522818, + "loss": 2.1853, + "step": 581 + }, + { + "epoch": 0.06139240506329114, + "grad_norm": 0.9460834860801697, + "learning_rate": 0.0014900560632755265, + "loss": 2.1372, + "step": 582 + }, + { + "epoch": 0.06149789029535865, + "grad_norm": 1.1709284782409668, + "learning_rate": 0.0014900152747717994, + "loss": 2.1878, + "step": 583 + }, + { + "epoch": 0.06160337552742616, + "grad_norm": 0.8594476580619812, + "learning_rate": 0.0014899744033456705, + "loss": 2.1515, + "step": 584 + }, + { + "epoch": 0.061708860759493674, + "grad_norm": 0.7174265384674072, + "learning_rate": 0.0014899334490017198, + "loss": 2.1798, + "step": 585 + }, + { + "epoch": 0.06181434599156118, + "grad_norm": 0.9231157898902893, + "learning_rate": 0.0014898924117445367, + "loss": 2.1591, + "step": 586 + }, + { + "epoch": 0.061919831223628694, + "grad_norm": 1.15786612033844, + "learning_rate": 0.0014898512915787192, + "loss": 2.1274, + "step": 587 + }, + { + "epoch": 0.0620253164556962, + "grad_norm": 1.252722144126892, + "learning_rate": 0.0014898100885088754, + "loss": 2.139, + "step": 588 + }, + { + "epoch": 0.06213080168776371, + "grad_norm": 0.9509036540985107, + "learning_rate": 0.001489768802539622, + "loss": 2.1681, + "step": 589 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.7998105883598328, + "learning_rate": 0.0014897274336755856, + "loss": 2.1166, + "step": 590 + }, + { + "epoch": 0.06234177215189873, + "grad_norm": 0.7956495881080627, + "learning_rate": 0.0014896859819214018, + "loss": 2.1303, + "step": 591 + }, + { + "epoch": 0.06244725738396625, + "grad_norm": 0.7286757826805115, + "learning_rate": 0.001489644447281715, + "loss": 2.13, + "step": 592 + }, + { + "epoch": 0.06255274261603376, + "grad_norm": 0.8013424873352051, + "learning_rate": 0.00148960282976118, + "loss": 2.1534, + "step": 593 + }, + { + "epoch": 0.06265822784810127, + "grad_norm": 0.9863945841789246, + "learning_rate": 0.0014895611293644596, + "loss": 2.115, + "step": 594 + }, + { + "epoch": 0.06276371308016877, + "grad_norm": 1.508673906326294, + "learning_rate": 0.0014895193460962271, + "loss": 2.1487, + "step": 595 + }, + { + "epoch": 0.0628691983122363, + "grad_norm": 0.8519280552864075, + "learning_rate": 0.001489477479961164, + "loss": 2.1058, + "step": 596 + }, + { + "epoch": 0.0629746835443038, + "grad_norm": 1.0871610641479492, + "learning_rate": 0.0014894355309639621, + "loss": 2.1435, + "step": 597 + }, + { + "epoch": 0.0630801687763713, + "grad_norm": 1.472081184387207, + "learning_rate": 0.0014893934991093221, + "loss": 2.1928, + "step": 598 + }, + { + "epoch": 0.06318565400843881, + "grad_norm": 0.8907502889633179, + "learning_rate": 0.0014893513844019533, + "loss": 2.138, + "step": 599 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.9362555146217346, + "learning_rate": 0.001489309186846575, + "loss": 2.1378, + "step": 600 + }, + { + "epoch": 0.06339662447257384, + "grad_norm": 1.3416175842285156, + "learning_rate": 0.001489266906447916, + "loss": 2.1525, + "step": 601 + }, + { + "epoch": 0.06350210970464135, + "grad_norm": 0.9855182766914368, + "learning_rate": 0.0014892245432107138, + "loss": 2.1471, + "step": 602 + }, + { + "epoch": 0.06360759493670887, + "grad_norm": 0.8860292434692383, + "learning_rate": 0.0014891820971397152, + "loss": 2.1628, + "step": 603 + }, + { + "epoch": 0.06371308016877637, + "grad_norm": 0.8896629214286804, + "learning_rate": 0.001489139568239677, + "loss": 2.1027, + "step": 604 + }, + { + "epoch": 0.06381856540084388, + "grad_norm": 0.8222452998161316, + "learning_rate": 0.0014890969565153642, + "loss": 2.1414, + "step": 605 + }, + { + "epoch": 0.06392405063291139, + "grad_norm": 1.0252532958984375, + "learning_rate": 0.0014890542619715522, + "loss": 2.1171, + "step": 606 + }, + { + "epoch": 0.0640295358649789, + "grad_norm": 1.384576439857483, + "learning_rate": 0.0014890114846130248, + "loss": 2.1655, + "step": 607 + }, + { + "epoch": 0.06413502109704641, + "grad_norm": 0.8224595785140991, + "learning_rate": 0.0014889686244445755, + "loss": 2.1959, + "step": 608 + }, + { + "epoch": 0.06424050632911392, + "grad_norm": 0.919645369052887, + "learning_rate": 0.0014889256814710071, + "loss": 2.1318, + "step": 609 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 1.5191885232925415, + "learning_rate": 0.0014888826556971313, + "loss": 2.1443, + "step": 610 + }, + { + "epoch": 0.06445147679324895, + "grad_norm": 0.8383588790893555, + "learning_rate": 0.0014888395471277698, + "loss": 2.1386, + "step": 611 + }, + { + "epoch": 0.06455696202531645, + "grad_norm": 1.0460540056228638, + "learning_rate": 0.0014887963557677526, + "loss": 2.1357, + "step": 612 + }, + { + "epoch": 0.06466244725738397, + "grad_norm": 1.9410914182662964, + "learning_rate": 0.00148875308162192, + "loss": 2.1445, + "step": 613 + }, + { + "epoch": 0.06476793248945148, + "grad_norm": 0.9633649587631226, + "learning_rate": 0.0014887097246951205, + "loss": 2.0894, + "step": 614 + }, + { + "epoch": 0.06487341772151899, + "grad_norm": 2.7085015773773193, + "learning_rate": 0.001488666284992213, + "loss": 2.1858, + "step": 615 + }, + { + "epoch": 0.06497890295358649, + "grad_norm": 1.6525925397872925, + "learning_rate": 0.001488622762518065, + "loss": 2.1731, + "step": 616 + }, + { + "epoch": 0.06508438818565401, + "grad_norm": 2.1567559242248535, + "learning_rate": 0.0014885791572775533, + "loss": 2.182, + "step": 617 + }, + { + "epoch": 0.06518987341772152, + "grad_norm": 1.92631196975708, + "learning_rate": 0.0014885354692755642, + "loss": 2.2057, + "step": 618 + }, + { + "epoch": 0.06529535864978903, + "grad_norm": 1.1746656894683838, + "learning_rate": 0.001488491698516993, + "loss": 2.1988, + "step": 619 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 1.1664940118789673, + "learning_rate": 0.0014884478450067444, + "loss": 2.1575, + "step": 620 + }, + { + "epoch": 0.06550632911392405, + "grad_norm": 1.240893006324768, + "learning_rate": 0.001488403908749733, + "loss": 2.1301, + "step": 621 + }, + { + "epoch": 0.06561181434599156, + "grad_norm": 0.7965457439422607, + "learning_rate": 0.0014883598897508811, + "loss": 2.144, + "step": 622 + }, + { + "epoch": 0.06571729957805907, + "grad_norm": 1.3289674520492554, + "learning_rate": 0.0014883157880151222, + "loss": 2.1211, + "step": 623 + }, + { + "epoch": 0.06582278481012659, + "grad_norm": 0.8707451820373535, + "learning_rate": 0.0014882716035473974, + "loss": 2.0814, + "step": 624 + }, + { + "epoch": 0.06592827004219409, + "grad_norm": 1.020910382270813, + "learning_rate": 0.001488227336352658, + "loss": 2.114, + "step": 625 + }, + { + "epoch": 0.0660337552742616, + "grad_norm": 1.2227314710617065, + "learning_rate": 0.0014881829864358644, + "loss": 2.1099, + "step": 626 + }, + { + "epoch": 0.06613924050632912, + "grad_norm": 0.8931713104248047, + "learning_rate": 0.0014881385538019867, + "loss": 2.1267, + "step": 627 + }, + { + "epoch": 0.06624472573839663, + "grad_norm": 0.9250797033309937, + "learning_rate": 0.0014880940384560028, + "loss": 2.1112, + "step": 628 + }, + { + "epoch": 0.06635021097046413, + "grad_norm": 0.9276837706565857, + "learning_rate": 0.0014880494404029016, + "loss": 2.1267, + "step": 629 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.6908464431762695, + "learning_rate": 0.0014880047596476807, + "loss": 2.0757, + "step": 630 + }, + { + "epoch": 0.06656118143459916, + "grad_norm": 0.9262060523033142, + "learning_rate": 0.0014879599961953461, + "loss": 2.0901, + "step": 631 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 1.0176233053207397, + "learning_rate": 0.0014879151500509142, + "loss": 2.13, + "step": 632 + }, + { + "epoch": 0.06677215189873417, + "grad_norm": 0.7936601042747498, + "learning_rate": 0.0014878702212194103, + "loss": 2.1178, + "step": 633 + }, + { + "epoch": 0.06687763713080169, + "grad_norm": 0.796466588973999, + "learning_rate": 0.0014878252097058685, + "loss": 2.0867, + "step": 634 + }, + { + "epoch": 0.0669831223628692, + "grad_norm": 0.8000777959823608, + "learning_rate": 0.001487780115515333, + "loss": 2.1553, + "step": 635 + }, + { + "epoch": 0.0670886075949367, + "grad_norm": 0.7119607925415039, + "learning_rate": 0.0014877349386528565, + "loss": 2.1042, + "step": 636 + }, + { + "epoch": 0.06719409282700423, + "grad_norm": 0.8629552125930786, + "learning_rate": 0.0014876896791235015, + "loss": 2.1663, + "step": 637 + }, + { + "epoch": 0.06729957805907173, + "grad_norm": 0.8312908411026001, + "learning_rate": 0.0014876443369323397, + "loss": 2.1266, + "step": 638 + }, + { + "epoch": 0.06740506329113924, + "grad_norm": 0.7729882597923279, + "learning_rate": 0.0014875989120844517, + "loss": 2.1163, + "step": 639 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.8617845177650452, + "learning_rate": 0.0014875534045849274, + "loss": 2.123, + "step": 640 + }, + { + "epoch": 0.06761603375527427, + "grad_norm": 0.8141010403633118, + "learning_rate": 0.0014875078144388665, + "loss": 2.1053, + "step": 641 + }, + { + "epoch": 0.06772151898734177, + "grad_norm": 0.9151472449302673, + "learning_rate": 0.0014874621416513774, + "loss": 2.1256, + "step": 642 + }, + { + "epoch": 0.06782700421940928, + "grad_norm": 1.0169949531555176, + "learning_rate": 0.001487416386227578, + "loss": 2.1297, + "step": 643 + }, + { + "epoch": 0.0679324894514768, + "grad_norm": 1.2091503143310547, + "learning_rate": 0.0014873705481725952, + "loss": 2.0892, + "step": 644 + }, + { + "epoch": 0.0680379746835443, + "grad_norm": 0.9129445552825928, + "learning_rate": 0.0014873246274915658, + "loss": 2.0952, + "step": 645 + }, + { + "epoch": 0.06814345991561181, + "grad_norm": 0.7807171940803528, + "learning_rate": 0.0014872786241896354, + "loss": 2.0714, + "step": 646 + }, + { + "epoch": 0.06824894514767932, + "grad_norm": 0.7391737699508667, + "learning_rate": 0.0014872325382719587, + "loss": 2.1482, + "step": 647 + }, + { + "epoch": 0.06835443037974684, + "grad_norm": 0.7790183424949646, + "learning_rate": 0.0014871863697436998, + "loss": 2.1602, + "step": 648 + }, + { + "epoch": 0.06845991561181435, + "grad_norm": 0.879579484462738, + "learning_rate": 0.0014871401186100322, + "loss": 2.1199, + "step": 649 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.9690933227539062, + "learning_rate": 0.0014870937848761388, + "loss": 2.1369, + "step": 650 + }, + { + "epoch": 0.06867088607594937, + "grad_norm": 0.9486793875694275, + "learning_rate": 0.0014870473685472112, + "loss": 2.0974, + "step": 651 + }, + { + "epoch": 0.06877637130801688, + "grad_norm": 0.9694644808769226, + "learning_rate": 0.0014870008696284507, + "loss": 2.1112, + "step": 652 + }, + { + "epoch": 0.06888185654008439, + "grad_norm": 0.8347042202949524, + "learning_rate": 0.0014869542881250678, + "loss": 2.1086, + "step": 653 + }, + { + "epoch": 0.0689873417721519, + "grad_norm": 0.7752441167831421, + "learning_rate": 0.001486907624042282, + "loss": 2.121, + "step": 654 + }, + { + "epoch": 0.06909282700421941, + "grad_norm": 1.0011439323425293, + "learning_rate": 0.0014868608773853226, + "loss": 2.1068, + "step": 655 + }, + { + "epoch": 0.06919831223628692, + "grad_norm": 0.8925149440765381, + "learning_rate": 0.0014868140481594273, + "loss": 2.1163, + "step": 656 + }, + { + "epoch": 0.06930379746835443, + "grad_norm": 0.8752669095993042, + "learning_rate": 0.001486767136369844, + "loss": 2.089, + "step": 657 + }, + { + "epoch": 0.06940928270042195, + "grad_norm": 1.11385977268219, + "learning_rate": 0.0014867201420218292, + "loss": 2.0876, + "step": 658 + }, + { + "epoch": 0.06951476793248945, + "grad_norm": 1.2447515726089478, + "learning_rate": 0.0014866730651206487, + "loss": 2.129, + "step": 659 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.8722012042999268, + "learning_rate": 0.001486625905671578, + "loss": 2.1046, + "step": 660 + }, + { + "epoch": 0.06972573839662448, + "grad_norm": 0.8304708003997803, + "learning_rate": 0.0014865786636799015, + "loss": 2.0786, + "step": 661 + }, + { + "epoch": 0.06983122362869199, + "grad_norm": 0.946027934551239, + "learning_rate": 0.0014865313391509126, + "loss": 2.0896, + "step": 662 + }, + { + "epoch": 0.06993670886075949, + "grad_norm": 1.423495888710022, + "learning_rate": 0.0014864839320899148, + "loss": 2.0549, + "step": 663 + }, + { + "epoch": 0.070042194092827, + "grad_norm": 1.0090769529342651, + "learning_rate": 0.0014864364425022198, + "loss": 2.1215, + "step": 664 + }, + { + "epoch": 0.07014767932489452, + "grad_norm": 0.8582932949066162, + "learning_rate": 0.001486388870393149, + "loss": 2.0974, + "step": 665 + }, + { + "epoch": 0.07025316455696203, + "grad_norm": 0.8281795382499695, + "learning_rate": 0.0014863412157680336, + "loss": 2.0674, + "step": 666 + }, + { + "epoch": 0.07035864978902953, + "grad_norm": 1.1328200101852417, + "learning_rate": 0.0014862934786322131, + "loss": 2.1132, + "step": 667 + }, + { + "epoch": 0.07046413502109705, + "grad_norm": 1.2562497854232788, + "learning_rate": 0.0014862456589910368, + "loss": 2.08, + "step": 668 + }, + { + "epoch": 0.07056962025316456, + "grad_norm": 0.8626007437705994, + "learning_rate": 0.0014861977568498632, + "loss": 2.1288, + "step": 669 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.9340394735336304, + "learning_rate": 0.00148614977221406, + "loss": 2.114, + "step": 670 + }, + { + "epoch": 0.07078059071729957, + "grad_norm": 1.0415736436843872, + "learning_rate": 0.001486101705089004, + "loss": 2.1097, + "step": 671 + }, + { + "epoch": 0.07088607594936709, + "grad_norm": 1.6029143333435059, + "learning_rate": 0.0014860535554800814, + "loss": 2.0973, + "step": 672 + }, + { + "epoch": 0.0709915611814346, + "grad_norm": 0.7852091789245605, + "learning_rate": 0.0014860053233926875, + "loss": 2.115, + "step": 673 + }, + { + "epoch": 0.0710970464135021, + "grad_norm": 1.6863166093826294, + "learning_rate": 0.0014859570088322273, + "loss": 2.0516, + "step": 674 + }, + { + "epoch": 0.07120253164556962, + "grad_norm": 1.212555170059204, + "learning_rate": 0.0014859086118041145, + "loss": 2.109, + "step": 675 + }, + { + "epoch": 0.07130801687763713, + "grad_norm": 0.721516489982605, + "learning_rate": 0.001485860132313772, + "loss": 2.0842, + "step": 676 + }, + { + "epoch": 0.07141350210970464, + "grad_norm": 0.8842225670814514, + "learning_rate": 0.0014858115703666325, + "loss": 2.0584, + "step": 677 + }, + { + "epoch": 0.07151898734177216, + "grad_norm": 1.0598859786987305, + "learning_rate": 0.001485762925968137, + "loss": 2.0942, + "step": 678 + }, + { + "epoch": 0.07162447257383966, + "grad_norm": 0.9351316094398499, + "learning_rate": 0.0014857141991237372, + "loss": 2.0661, + "step": 679 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.7659926414489746, + "learning_rate": 0.0014856653898388927, + "loss": 2.0964, + "step": 680 + }, + { + "epoch": 0.07183544303797468, + "grad_norm": 0.7767763733863831, + "learning_rate": 0.0014856164981190728, + "loss": 2.104, + "step": 681 + }, + { + "epoch": 0.0719409282700422, + "grad_norm": 0.7761382460594177, + "learning_rate": 0.0014855675239697564, + "loss": 2.1091, + "step": 682 + }, + { + "epoch": 0.0720464135021097, + "grad_norm": 0.7690410017967224, + "learning_rate": 0.0014855184673964311, + "loss": 2.096, + "step": 683 + }, + { + "epoch": 0.07215189873417721, + "grad_norm": 0.9066241979598999, + "learning_rate": 0.0014854693284045936, + "loss": 2.098, + "step": 684 + }, + { + "epoch": 0.07225738396624473, + "grad_norm": 1.0239850282669067, + "learning_rate": 0.0014854201069997505, + "loss": 2.063, + "step": 685 + }, + { + "epoch": 0.07236286919831224, + "grad_norm": 1.0527372360229492, + "learning_rate": 0.0014853708031874176, + "loss": 2.1242, + "step": 686 + }, + { + "epoch": 0.07246835443037974, + "grad_norm": 1.064555287361145, + "learning_rate": 0.001485321416973119, + "loss": 2.0879, + "step": 687 + }, + { + "epoch": 0.07257383966244725, + "grad_norm": 0.8515613079071045, + "learning_rate": 0.0014852719483623893, + "loss": 2.1161, + "step": 688 + }, + { + "epoch": 0.07267932489451477, + "grad_norm": 0.7750493884086609, + "learning_rate": 0.001485222397360771, + "loss": 2.031, + "step": 689 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.8085260987281799, + "learning_rate": 0.001485172763973817, + "loss": 2.1109, + "step": 690 + }, + { + "epoch": 0.07289029535864978, + "grad_norm": 0.8057886958122253, + "learning_rate": 0.0014851230482070892, + "loss": 2.0964, + "step": 691 + }, + { + "epoch": 0.0729957805907173, + "grad_norm": 0.8714965581893921, + "learning_rate": 0.001485073250066158, + "loss": 2.0631, + "step": 692 + }, + { + "epoch": 0.07310126582278481, + "grad_norm": 0.8654559850692749, + "learning_rate": 0.0014850233695566034, + "loss": 2.0966, + "step": 693 + }, + { + "epoch": 0.07320675105485232, + "grad_norm": 0.7178478240966797, + "learning_rate": 0.0014849734066840158, + "loss": 2.1225, + "step": 694 + }, + { + "epoch": 0.07331223628691984, + "grad_norm": 0.8273146748542786, + "learning_rate": 0.0014849233614539926, + "loss": 2.0771, + "step": 695 + }, + { + "epoch": 0.07341772151898734, + "grad_norm": 1.045142650604248, + "learning_rate": 0.001484873233872142, + "loss": 2.0672, + "step": 696 + }, + { + "epoch": 0.07352320675105485, + "grad_norm": 1.1057885885238647, + "learning_rate": 0.0014848230239440812, + "loss": 2.087, + "step": 697 + }, + { + "epoch": 0.07362869198312236, + "grad_norm": 1.0068143606185913, + "learning_rate": 0.0014847727316754367, + "loss": 2.0547, + "step": 698 + }, + { + "epoch": 0.07373417721518988, + "grad_norm": 0.8035816550254822, + "learning_rate": 0.0014847223570718436, + "loss": 2.037, + "step": 699 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.8202975988388062, + "learning_rate": 0.0014846719001389466, + "loss": 2.0303, + "step": 700 + }, + { + "epoch": 0.07394514767932489, + "grad_norm": 0.8291400074958801, + "learning_rate": 0.0014846213608823997, + "loss": 2.0313, + "step": 701 + }, + { + "epoch": 0.07405063291139241, + "grad_norm": 0.7621009349822998, + "learning_rate": 0.0014845707393078664, + "loss": 2.0487, + "step": 702 + }, + { + "epoch": 0.07415611814345992, + "grad_norm": 0.8081808090209961, + "learning_rate": 0.0014845200354210186, + "loss": 2.0922, + "step": 703 + }, + { + "epoch": 0.07426160337552742, + "grad_norm": 0.7606956362724304, + "learning_rate": 0.0014844692492275385, + "loss": 2.0663, + "step": 704 + }, + { + "epoch": 0.07436708860759493, + "grad_norm": 0.8493958115577698, + "learning_rate": 0.0014844183807331164, + "loss": 2.062, + "step": 705 + }, + { + "epoch": 0.07447257383966245, + "grad_norm": 0.8721317052841187, + "learning_rate": 0.0014843674299434527, + "loss": 2.0757, + "step": 706 + }, + { + "epoch": 0.07457805907172996, + "grad_norm": 0.865729033946991, + "learning_rate": 0.0014843163968642566, + "loss": 2.054, + "step": 707 + }, + { + "epoch": 0.07468354430379746, + "grad_norm": 1.0106197595596313, + "learning_rate": 0.0014842652815012466, + "loss": 2.0818, + "step": 708 + }, + { + "epoch": 0.07478902953586498, + "grad_norm": 1.2174444198608398, + "learning_rate": 0.0014842140838601501, + "loss": 2.074, + "step": 709 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 1.1006883382797241, + "learning_rate": 0.001484162803946705, + "loss": 2.0704, + "step": 710 + }, + { + "epoch": 0.075, + "grad_norm": 0.865328848361969, + "learning_rate": 0.0014841114417666564, + "loss": 2.0594, + "step": 711 + }, + { + "epoch": 0.0751054852320675, + "grad_norm": 0.8959242701530457, + "learning_rate": 0.0014840599973257604, + "loss": 2.0763, + "step": 712 + }, + { + "epoch": 0.07521097046413502, + "grad_norm": 0.7798354625701904, + "learning_rate": 0.001484008470629781, + "loss": 2.0633, + "step": 713 + }, + { + "epoch": 0.07531645569620253, + "grad_norm": 0.7657397389411926, + "learning_rate": 0.0014839568616844927, + "loss": 2.039, + "step": 714 + }, + { + "epoch": 0.07542194092827004, + "grad_norm": 0.7751082181930542, + "learning_rate": 0.0014839051704956781, + "loss": 2.0634, + "step": 715 + }, + { + "epoch": 0.07552742616033756, + "grad_norm": 0.8362546563148499, + "learning_rate": 0.0014838533970691296, + "loss": 2.0473, + "step": 716 + }, + { + "epoch": 0.07563291139240506, + "grad_norm": 0.7961884140968323, + "learning_rate": 0.0014838015414106486, + "loss": 2.0365, + "step": 717 + }, + { + "epoch": 0.07573839662447257, + "grad_norm": 0.7485106587409973, + "learning_rate": 0.0014837496035260457, + "loss": 2.0026, + "step": 718 + }, + { + "epoch": 0.07584388185654009, + "grad_norm": 0.9232888221740723, + "learning_rate": 0.0014836975834211412, + "loss": 2.0594, + "step": 719 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.8813710808753967, + "learning_rate": 0.0014836454811017635, + "loss": 2.0416, + "step": 720 + }, + { + "epoch": 0.0760548523206751, + "grad_norm": 1.0385135412216187, + "learning_rate": 0.0014835932965737517, + "loss": 2.074, + "step": 721 + }, + { + "epoch": 0.07616033755274261, + "grad_norm": 1.4035593271255493, + "learning_rate": 0.0014835410298429529, + "loss": 2.0636, + "step": 722 + }, + { + "epoch": 0.07626582278481013, + "grad_norm": 1.1344470977783203, + "learning_rate": 0.001483488680915224, + "loss": 2.0593, + "step": 723 + }, + { + "epoch": 0.07637130801687764, + "grad_norm": 0.8368368744850159, + "learning_rate": 0.0014834362497964308, + "loss": 2.0191, + "step": 724 + }, + { + "epoch": 0.07647679324894514, + "grad_norm": 0.8818578124046326, + "learning_rate": 0.0014833837364924484, + "loss": 2.0495, + "step": 725 + }, + { + "epoch": 0.07658227848101266, + "grad_norm": 1.0669822692871094, + "learning_rate": 0.0014833311410091617, + "loss": 2.044, + "step": 726 + }, + { + "epoch": 0.07668776371308017, + "grad_norm": 1.5602134466171265, + "learning_rate": 0.0014832784633524638, + "loss": 2.0463, + "step": 727 + }, + { + "epoch": 0.07679324894514768, + "grad_norm": 0.8080726861953735, + "learning_rate": 0.0014832257035282577, + "loss": 2.0629, + "step": 728 + }, + { + "epoch": 0.07689873417721518, + "grad_norm": 0.7921810746192932, + "learning_rate": 0.0014831728615424553, + "loss": 2.0432, + "step": 729 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 1.01386559009552, + "learning_rate": 0.0014831199374009778, + "loss": 2.0224, + "step": 730 + }, + { + "epoch": 0.07710970464135021, + "grad_norm": 1.2584646940231323, + "learning_rate": 0.0014830669311097554, + "loss": 2.0583, + "step": 731 + }, + { + "epoch": 0.07721518987341772, + "grad_norm": 1.114546775817871, + "learning_rate": 0.0014830138426747282, + "loss": 2.0531, + "step": 732 + }, + { + "epoch": 0.07732067510548524, + "grad_norm": 1.0333647727966309, + "learning_rate": 0.0014829606721018448, + "loss": 2.0647, + "step": 733 + }, + { + "epoch": 0.07742616033755274, + "grad_norm": 0.8713186383247375, + "learning_rate": 0.0014829074193970634, + "loss": 2.0855, + "step": 734 + }, + { + "epoch": 0.07753164556962025, + "grad_norm": 0.8113733530044556, + "learning_rate": 0.0014828540845663507, + "loss": 2.0507, + "step": 735 + }, + { + "epoch": 0.07763713080168777, + "grad_norm": 0.8446391820907593, + "learning_rate": 0.0014828006676156837, + "loss": 2.0206, + "step": 736 + }, + { + "epoch": 0.07774261603375528, + "grad_norm": 0.8235129117965698, + "learning_rate": 0.0014827471685510477, + "loss": 2.06, + "step": 737 + }, + { + "epoch": 0.07784810126582278, + "grad_norm": 0.7852071523666382, + "learning_rate": 0.0014826935873784378, + "loss": 2.0316, + "step": 738 + }, + { + "epoch": 0.07795358649789029, + "grad_norm": 0.8069316744804382, + "learning_rate": 0.0014826399241038577, + "loss": 2.0775, + "step": 739 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 1.0519003868103027, + "learning_rate": 0.0014825861787333208, + "loss": 2.0248, + "step": 740 + }, + { + "epoch": 0.07816455696202532, + "grad_norm": 1.0177693367004395, + "learning_rate": 0.00148253235127285, + "loss": 2.0369, + "step": 741 + }, + { + "epoch": 0.07827004219409282, + "grad_norm": 1.100839614868164, + "learning_rate": 0.001482478441728476, + "loss": 2.0734, + "step": 742 + }, + { + "epoch": 0.07837552742616034, + "grad_norm": 0.9824693202972412, + "learning_rate": 0.0014824244501062402, + "loss": 2.0396, + "step": 743 + }, + { + "epoch": 0.07848101265822785, + "grad_norm": 0.8526871800422668, + "learning_rate": 0.0014823703764121929, + "loss": 2.0514, + "step": 744 + }, + { + "epoch": 0.07858649789029536, + "grad_norm": 0.7379761934280396, + "learning_rate": 0.0014823162206523926, + "loss": 2.0486, + "step": 745 + }, + { + "epoch": 0.07869198312236286, + "grad_norm": 0.7075592875480652, + "learning_rate": 0.0014822619828329085, + "loss": 2.0396, + "step": 746 + }, + { + "epoch": 0.07879746835443038, + "grad_norm": 0.7206206321716309, + "learning_rate": 0.0014822076629598176, + "loss": 2.015, + "step": 747 + }, + { + "epoch": 0.07890295358649789, + "grad_norm": 0.7520971894264221, + "learning_rate": 0.001482153261039207, + "loss": 1.9823, + "step": 748 + }, + { + "epoch": 0.0790084388185654, + "grad_norm": 0.6925330758094788, + "learning_rate": 0.0014820987770771726, + "loss": 2.019, + "step": 749 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.792007327079773, + "learning_rate": 0.0014820442110798197, + "loss": 2.0155, + "step": 750 + }, + { + "epoch": 0.07921940928270042, + "grad_norm": 1.2560354471206665, + "learning_rate": 0.0014819895630532628, + "loss": 2.0406, + "step": 751 + }, + { + "epoch": 0.07932489451476793, + "grad_norm": 1.6409591436386108, + "learning_rate": 0.0014819348330036251, + "loss": 2.0743, + "step": 752 + }, + { + "epoch": 0.07943037974683544, + "grad_norm": 0.8316405415534973, + "learning_rate": 0.0014818800209370397, + "loss": 2.0303, + "step": 753 + }, + { + "epoch": 0.07953586497890296, + "grad_norm": 1.1001383066177368, + "learning_rate": 0.0014818251268596486, + "loss": 2.0394, + "step": 754 + }, + { + "epoch": 0.07964135021097046, + "grad_norm": 2.0293633937835693, + "learning_rate": 0.0014817701507776025, + "loss": 2.0686, + "step": 755 + }, + { + "epoch": 0.07974683544303797, + "grad_norm": 0.9674520492553711, + "learning_rate": 0.0014817150926970625, + "loss": 2.028, + "step": 756 + }, + { + "epoch": 0.07985232067510549, + "grad_norm": 2.865534782409668, + "learning_rate": 0.0014816599526241974, + "loss": 2.0705, + "step": 757 + }, + { + "epoch": 0.079957805907173, + "grad_norm": 1.949581265449524, + "learning_rate": 0.0014816047305651863, + "loss": 2.1251, + "step": 758 + }, + { + "epoch": 0.0800632911392405, + "grad_norm": 1.744071125984192, + "learning_rate": 0.0014815494265262169, + "loss": 2.0456, + "step": 759 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 1.6561661958694458, + "learning_rate": 0.0014814940405134865, + "loss": 2.0571, + "step": 760 + }, + { + "epoch": 0.08027426160337553, + "grad_norm": 1.265213131904602, + "learning_rate": 0.0014814385725332015, + "loss": 2.099, + "step": 761 + }, + { + "epoch": 0.08037974683544304, + "grad_norm": 1.2245393991470337, + "learning_rate": 0.001481383022591577, + "loss": 2.0387, + "step": 762 + }, + { + "epoch": 0.08048523206751054, + "grad_norm": 1.160888671875, + "learning_rate": 0.0014813273906948378, + "loss": 2.0098, + "step": 763 + }, + { + "epoch": 0.08059071729957806, + "grad_norm": 1.0517593622207642, + "learning_rate": 0.0014812716768492177, + "loss": 2.028, + "step": 764 + }, + { + "epoch": 0.08069620253164557, + "grad_norm": 1.5984841585159302, + "learning_rate": 0.0014812158810609598, + "loss": 2.0329, + "step": 765 + }, + { + "epoch": 0.08080168776371308, + "grad_norm": 1.0598139762878418, + "learning_rate": 0.0014811600033363165, + "loss": 2.0361, + "step": 766 + }, + { + "epoch": 0.0809071729957806, + "grad_norm": 1.881298303604126, + "learning_rate": 0.0014811040436815486, + "loss": 2.0235, + "step": 767 + }, + { + "epoch": 0.0810126582278481, + "grad_norm": 1.29306960105896, + "learning_rate": 0.001481048002102927, + "loss": 2.033, + "step": 768 + }, + { + "epoch": 0.08111814345991561, + "grad_norm": 1.5166429281234741, + "learning_rate": 0.0014809918786067315, + "loss": 2.0589, + "step": 769 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 1.2242231369018555, + "learning_rate": 0.001480935673199251, + "loss": 2.0068, + "step": 770 + }, + { + "epoch": 0.08132911392405064, + "grad_norm": 1.4611306190490723, + "learning_rate": 0.0014808793858867837, + "loss": 2.0148, + "step": 771 + }, + { + "epoch": 0.08143459915611814, + "grad_norm": 1.1701828241348267, + "learning_rate": 0.0014808230166756366, + "loss": 2.0903, + "step": 772 + }, + { + "epoch": 0.08154008438818565, + "grad_norm": 1.2128311395645142, + "learning_rate": 0.0014807665655721261, + "loss": 2.0228, + "step": 773 + }, + { + "epoch": 0.08164556962025317, + "grad_norm": 1.0850919485092163, + "learning_rate": 0.0014807100325825782, + "loss": 2.0281, + "step": 774 + }, + { + "epoch": 0.08175105485232068, + "grad_norm": 1.1375519037246704, + "learning_rate": 0.0014806534177133274, + "loss": 1.9864, + "step": 775 + }, + { + "epoch": 0.08185654008438818, + "grad_norm": 0.9810986518859863, + "learning_rate": 0.0014805967209707178, + "loss": 1.9733, + "step": 776 + }, + { + "epoch": 0.0819620253164557, + "grad_norm": 1.4002084732055664, + "learning_rate": 0.0014805399423611025, + "loss": 2.0171, + "step": 777 + }, + { + "epoch": 0.08206751054852321, + "grad_norm": 0.9798365235328674, + "learning_rate": 0.0014804830818908438, + "loss": 2.0397, + "step": 778 + }, + { + "epoch": 0.08217299578059072, + "grad_norm": 1.2090212106704712, + "learning_rate": 0.0014804261395663133, + "loss": 2.0647, + "step": 779 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.974387526512146, + "learning_rate": 0.0014803691153938915, + "loss": 2.0383, + "step": 780 + }, + { + "epoch": 0.08238396624472574, + "grad_norm": 1.2509952783584595, + "learning_rate": 0.0014803120093799687, + "loss": 1.9968, + "step": 781 + }, + { + "epoch": 0.08248945147679325, + "grad_norm": 0.9546487927436829, + "learning_rate": 0.0014802548215309434, + "loss": 2.0168, + "step": 782 + }, + { + "epoch": 0.08259493670886076, + "grad_norm": 1.3256502151489258, + "learning_rate": 0.001480197551853224, + "loss": 2.0199, + "step": 783 + }, + { + "epoch": 0.08270042194092828, + "grad_norm": 0.8904930949211121, + "learning_rate": 0.0014801402003532277, + "loss": 2.0239, + "step": 784 + }, + { + "epoch": 0.08280590717299578, + "grad_norm": 1.0246342420578003, + "learning_rate": 0.0014800827670373815, + "loss": 2.0319, + "step": 785 + }, + { + "epoch": 0.08291139240506329, + "grad_norm": 0.8444899916648865, + "learning_rate": 0.0014800252519121203, + "loss": 1.9992, + "step": 786 + }, + { + "epoch": 0.0830168776371308, + "grad_norm": 1.15000581741333, + "learning_rate": 0.0014799676549838898, + "loss": 2.0616, + "step": 787 + }, + { + "epoch": 0.08312236286919832, + "grad_norm": 0.9025602340698242, + "learning_rate": 0.0014799099762591434, + "loss": 2.038, + "step": 788 + }, + { + "epoch": 0.08322784810126582, + "grad_norm": 0.8153927326202393, + "learning_rate": 0.0014798522157443443, + "loss": 2.0144, + "step": 789 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.9251173138618469, + "learning_rate": 0.0014797943734459653, + "loss": 2.074, + "step": 790 + }, + { + "epoch": 0.08343881856540085, + "grad_norm": 0.7228313684463501, + "learning_rate": 0.0014797364493704876, + "loss": 2.0385, + "step": 791 + }, + { + "epoch": 0.08354430379746836, + "grad_norm": 0.9697998762130737, + "learning_rate": 0.001479678443524402, + "loss": 2.022, + "step": 792 + }, + { + "epoch": 0.08364978902953586, + "grad_norm": 0.8954012989997864, + "learning_rate": 0.0014796203559142081, + "loss": 1.9854, + "step": 793 + }, + { + "epoch": 0.08375527426160338, + "grad_norm": 0.7926121354103088, + "learning_rate": 0.0014795621865464155, + "loss": 2.0025, + "step": 794 + }, + { + "epoch": 0.08386075949367089, + "grad_norm": 0.785712718963623, + "learning_rate": 0.0014795039354275417, + "loss": 1.9792, + "step": 795 + }, + { + "epoch": 0.0839662447257384, + "grad_norm": 0.7563058733940125, + "learning_rate": 0.0014794456025641143, + "loss": 2.0066, + "step": 796 + }, + { + "epoch": 0.0840717299578059, + "grad_norm": 0.9606810212135315, + "learning_rate": 0.00147938718796267, + "loss": 2.036, + "step": 797 + }, + { + "epoch": 0.08417721518987342, + "grad_norm": 1.3615429401397705, + "learning_rate": 0.001479328691629754, + "loss": 2.0051, + "step": 798 + }, + { + "epoch": 0.08428270042194093, + "grad_norm": 0.8458890914916992, + "learning_rate": 0.0014792701135719214, + "loss": 1.9944, + "step": 799 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 1.1538286209106445, + "learning_rate": 0.001479211453795736, + "loss": 1.9846, + "step": 800 + }, + { + "epoch": 0.08449367088607596, + "grad_norm": 1.545150876045227, + "learning_rate": 0.001479152712307771, + "loss": 2.0019, + "step": 801 + }, + { + "epoch": 0.08459915611814346, + "grad_norm": 0.8827580213546753, + "learning_rate": 0.0014790938891146089, + "loss": 2.0146, + "step": 802 + }, + { + "epoch": 0.08470464135021097, + "grad_norm": 2.012512683868408, + "learning_rate": 0.001479034984222841, + "loss": 2.0082, + "step": 803 + }, + { + "epoch": 0.08481012658227848, + "grad_norm": 1.197212815284729, + "learning_rate": 0.0014789759976390675, + "loss": 1.9695, + "step": 804 + }, + { + "epoch": 0.084915611814346, + "grad_norm": 2.078927516937256, + "learning_rate": 0.0014789169293698988, + "loss": 2.0506, + "step": 805 + }, + { + "epoch": 0.0850210970464135, + "grad_norm": 1.7835816144943237, + "learning_rate": 0.0014788577794219533, + "loss": 2.0816, + "step": 806 + }, + { + "epoch": 0.08512658227848101, + "grad_norm": 1.1964116096496582, + "learning_rate": 0.0014787985478018593, + "loss": 2.0144, + "step": 807 + }, + { + "epoch": 0.08523206751054853, + "grad_norm": 1.3016327619552612, + "learning_rate": 0.0014787392345162538, + "loss": 2.0485, + "step": 808 + }, + { + "epoch": 0.08533755274261604, + "grad_norm": 1.0853155851364136, + "learning_rate": 0.0014786798395717833, + "loss": 1.9702, + "step": 809 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.9616756439208984, + "learning_rate": 0.0014786203629751033, + "loss": 1.9883, + "step": 810 + }, + { + "epoch": 0.08554852320675105, + "grad_norm": 1.2879036664962769, + "learning_rate": 0.001478560804732878, + "loss": 2.0183, + "step": 811 + }, + { + "epoch": 0.08565400843881857, + "grad_norm": 0.8033225536346436, + "learning_rate": 0.001478501164851782, + "loss": 1.9805, + "step": 812 + }, + { + "epoch": 0.08575949367088608, + "grad_norm": 1.183927059173584, + "learning_rate": 0.0014784414433384977, + "loss": 2.0411, + "step": 813 + }, + { + "epoch": 0.08586497890295358, + "grad_norm": 0.8274829983711243, + "learning_rate": 0.0014783816401997174, + "loss": 2.0084, + "step": 814 + }, + { + "epoch": 0.0859704641350211, + "grad_norm": 1.0632599592208862, + "learning_rate": 0.0014783217554421423, + "loss": 1.985, + "step": 815 + }, + { + "epoch": 0.08607594936708861, + "grad_norm": 0.9875366687774658, + "learning_rate": 0.0014782617890724827, + "loss": 1.9882, + "step": 816 + }, + { + "epoch": 0.08618143459915611, + "grad_norm": 0.7915925979614258, + "learning_rate": 0.0014782017410974583, + "loss": 1.9948, + "step": 817 + }, + { + "epoch": 0.08628691983122364, + "grad_norm": 0.8526202440261841, + "learning_rate": 0.0014781416115237976, + "loss": 1.9981, + "step": 818 + }, + { + "epoch": 0.08639240506329114, + "grad_norm": 0.8535162210464478, + "learning_rate": 0.0014780814003582385, + "loss": 1.9661, + "step": 819 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.7484958171844482, + "learning_rate": 0.0014780211076075279, + "loss": 1.9676, + "step": 820 + }, + { + "epoch": 0.08660337552742615, + "grad_norm": 0.7553471326828003, + "learning_rate": 0.001477960733278422, + "loss": 1.9941, + "step": 821 + }, + { + "epoch": 0.08670886075949367, + "grad_norm": 0.7360389232635498, + "learning_rate": 0.001477900277377686, + "loss": 2.002, + "step": 822 + }, + { + "epoch": 0.08681434599156118, + "grad_norm": 0.7117512226104736, + "learning_rate": 0.0014778397399120942, + "loss": 2.0019, + "step": 823 + }, + { + "epoch": 0.08691983122362869, + "grad_norm": 0.9297134280204773, + "learning_rate": 0.0014777791208884304, + "loss": 2.047, + "step": 824 + }, + { + "epoch": 0.08702531645569621, + "grad_norm": 0.9333713054656982, + "learning_rate": 0.0014777184203134867, + "loss": 1.9789, + "step": 825 + }, + { + "epoch": 0.08713080168776371, + "grad_norm": 0.7375414967536926, + "learning_rate": 0.0014776576381940658, + "loss": 2.0131, + "step": 826 + }, + { + "epoch": 0.08723628691983122, + "grad_norm": 0.9693120718002319, + "learning_rate": 0.0014775967745369778, + "loss": 1.9965, + "step": 827 + }, + { + "epoch": 0.08734177215189873, + "grad_norm": 0.9842984080314636, + "learning_rate": 0.001477535829349043, + "loss": 1.9872, + "step": 828 + }, + { + "epoch": 0.08744725738396625, + "grad_norm": 0.7348518967628479, + "learning_rate": 0.0014774748026370908, + "loss": 1.9884, + "step": 829 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.8084006905555725, + "learning_rate": 0.0014774136944079594, + "loss": 1.9995, + "step": 830 + }, + { + "epoch": 0.08765822784810126, + "grad_norm": 0.7598916888237, + "learning_rate": 0.0014773525046684964, + "loss": 1.9599, + "step": 831 + }, + { + "epoch": 0.08776371308016878, + "grad_norm": 0.7162100076675415, + "learning_rate": 0.0014772912334255585, + "loss": 1.9653, + "step": 832 + }, + { + "epoch": 0.08786919831223629, + "grad_norm": 0.9068751931190491, + "learning_rate": 0.0014772298806860111, + "loss": 1.9823, + "step": 833 + }, + { + "epoch": 0.0879746835443038, + "grad_norm": 0.9478791952133179, + "learning_rate": 0.0014771684464567293, + "loss": 1.9478, + "step": 834 + }, + { + "epoch": 0.08808016877637131, + "grad_norm": 0.6991375684738159, + "learning_rate": 0.0014771069307445972, + "loss": 2.0, + "step": 835 + }, + { + "epoch": 0.08818565400843882, + "grad_norm": 0.8604863882064819, + "learning_rate": 0.0014770453335565077, + "loss": 1.975, + "step": 836 + }, + { + "epoch": 0.08829113924050633, + "grad_norm": 1.1100908517837524, + "learning_rate": 0.0014769836548993631, + "loss": 2.0049, + "step": 837 + }, + { + "epoch": 0.08839662447257383, + "grad_norm": 0.8229016661643982, + "learning_rate": 0.0014769218947800749, + "loss": 2.0169, + "step": 838 + }, + { + "epoch": 0.08850210970464135, + "grad_norm": 0.7901437282562256, + "learning_rate": 0.0014768600532055638, + "loss": 1.9842, + "step": 839 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 1.0930571556091309, + "learning_rate": 0.0014767981301827592, + "loss": 2.0112, + "step": 840 + }, + { + "epoch": 0.08871308016877637, + "grad_norm": 1.076067328453064, + "learning_rate": 0.0014767361257186, + "loss": 2.0, + "step": 841 + }, + { + "epoch": 0.08881856540084389, + "grad_norm": 0.7977123260498047, + "learning_rate": 0.0014766740398200343, + "loss": 1.9495, + "step": 842 + }, + { + "epoch": 0.0889240506329114, + "grad_norm": 0.9745168089866638, + "learning_rate": 0.0014766118724940185, + "loss": 1.9994, + "step": 843 + }, + { + "epoch": 0.0890295358649789, + "grad_norm": 1.2153669595718384, + "learning_rate": 0.0014765496237475195, + "loss": 1.9901, + "step": 844 + }, + { + "epoch": 0.08913502109704641, + "grad_norm": 0.7874230742454529, + "learning_rate": 0.001476487293587512, + "loss": 1.9933, + "step": 845 + }, + { + "epoch": 0.08924050632911393, + "grad_norm": 1.208825945854187, + "learning_rate": 0.0014764248820209808, + "loss": 2.0072, + "step": 846 + }, + { + "epoch": 0.08934599156118143, + "grad_norm": 1.023760437965393, + "learning_rate": 0.0014763623890549193, + "loss": 2.0092, + "step": 847 + }, + { + "epoch": 0.08945147679324894, + "grad_norm": 0.9951703548431396, + "learning_rate": 0.00147629981469633, + "loss": 2.0062, + "step": 848 + }, + { + "epoch": 0.08955696202531646, + "grad_norm": 0.7812696099281311, + "learning_rate": 0.001476237158952225, + "loss": 1.959, + "step": 849 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.8475250005722046, + "learning_rate": 0.0014761744218296249, + "loss": 1.9412, + "step": 850 + }, + { + "epoch": 0.08976793248945147, + "grad_norm": 0.8027808666229248, + "learning_rate": 0.0014761116033355597, + "loss": 1.9849, + "step": 851 + }, + { + "epoch": 0.08987341772151898, + "grad_norm": 0.7007187604904175, + "learning_rate": 0.001476048703477069, + "loss": 2.0059, + "step": 852 + }, + { + "epoch": 0.0899789029535865, + "grad_norm": 0.7540977001190186, + "learning_rate": 0.0014759857222612003, + "loss": 1.9648, + "step": 853 + }, + { + "epoch": 0.09008438818565401, + "grad_norm": 0.7011328339576721, + "learning_rate": 0.0014759226596950115, + "loss": 2.0165, + "step": 854 + }, + { + "epoch": 0.09018987341772151, + "grad_norm": 0.7513875961303711, + "learning_rate": 0.0014758595157855687, + "loss": 1.9803, + "step": 855 + }, + { + "epoch": 0.09029535864978903, + "grad_norm": 0.9416045546531677, + "learning_rate": 0.001475796290539948, + "loss": 1.9973, + "step": 856 + }, + { + "epoch": 0.09040084388185654, + "grad_norm": 1.0127289295196533, + "learning_rate": 0.0014757329839652335, + "loss": 1.9689, + "step": 857 + }, + { + "epoch": 0.09050632911392405, + "grad_norm": 1.0179680585861206, + "learning_rate": 0.0014756695960685194, + "loss": 1.9859, + "step": 858 + }, + { + "epoch": 0.09061181434599157, + "grad_norm": 0.9597452282905579, + "learning_rate": 0.0014756061268569086, + "loss": 1.9821, + "step": 859 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.7076016068458557, + "learning_rate": 0.001475542576337513, + "loss": 1.9314, + "step": 860 + }, + { + "epoch": 0.09082278481012658, + "grad_norm": 0.7875258326530457, + "learning_rate": 0.001475478944517454, + "loss": 1.9894, + "step": 861 + }, + { + "epoch": 0.09092827004219409, + "grad_norm": 0.8867344260215759, + "learning_rate": 0.0014754152314038617, + "loss": 1.9666, + "step": 862 + }, + { + "epoch": 0.09103375527426161, + "grad_norm": 0.7320569753646851, + "learning_rate": 0.0014753514370038753, + "loss": 1.9787, + "step": 863 + }, + { + "epoch": 0.09113924050632911, + "grad_norm": 0.7529007792472839, + "learning_rate": 0.0014752875613246435, + "loss": 1.9915, + "step": 864 + }, + { + "epoch": 0.09124472573839662, + "grad_norm": 0.9473827481269836, + "learning_rate": 0.001475223604373324, + "loss": 1.9864, + "step": 865 + }, + { + "epoch": 0.09135021097046414, + "grad_norm": 0.9038528203964233, + "learning_rate": 0.0014751595661570832, + "loss": 1.9342, + "step": 866 + }, + { + "epoch": 0.09145569620253165, + "grad_norm": 0.766322672367096, + "learning_rate": 0.001475095446683097, + "loss": 1.9819, + "step": 867 + }, + { + "epoch": 0.09156118143459915, + "grad_norm": 0.733440101146698, + "learning_rate": 0.0014750312459585505, + "loss": 2.0054, + "step": 868 + }, + { + "epoch": 0.09166666666666666, + "grad_norm": 1.0222501754760742, + "learning_rate": 0.0014749669639906374, + "loss": 1.954, + "step": 869 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 1.1542280912399292, + "learning_rate": 0.001474902600786561, + "loss": 1.9938, + "step": 870 + }, + { + "epoch": 0.09187763713080169, + "grad_norm": 0.7863971590995789, + "learning_rate": 0.0014748381563535337, + "loss": 1.9768, + "step": 871 + }, + { + "epoch": 0.0919831223628692, + "grad_norm": 0.750859260559082, + "learning_rate": 0.0014747736306987764, + "loss": 1.9752, + "step": 872 + }, + { + "epoch": 0.09208860759493671, + "grad_norm": 0.813593864440918, + "learning_rate": 0.0014747090238295198, + "loss": 1.9664, + "step": 873 + }, + { + "epoch": 0.09219409282700422, + "grad_norm": 0.7085866928100586, + "learning_rate": 0.0014746443357530033, + "loss": 1.9718, + "step": 874 + }, + { + "epoch": 0.09229957805907173, + "grad_norm": 0.8297715187072754, + "learning_rate": 0.0014745795664764757, + "loss": 1.9625, + "step": 875 + }, + { + "epoch": 0.09240506329113925, + "grad_norm": 0.9409791827201843, + "learning_rate": 0.0014745147160071944, + "loss": 1.9833, + "step": 876 + }, + { + "epoch": 0.09251054852320675, + "grad_norm": 0.8908016085624695, + "learning_rate": 0.0014744497843524266, + "loss": 1.9664, + "step": 877 + }, + { + "epoch": 0.09261603375527426, + "grad_norm": 0.810802161693573, + "learning_rate": 0.001474384771519448, + "loss": 1.9535, + "step": 878 + }, + { + "epoch": 0.09272151898734177, + "grad_norm": 0.7044139504432678, + "learning_rate": 0.0014743196775155434, + "loss": 2.002, + "step": 879 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.773737370967865, + "learning_rate": 0.0014742545023480075, + "loss": 1.9801, + "step": 880 + }, + { + "epoch": 0.0929324894514768, + "grad_norm": 0.9235855937004089, + "learning_rate": 0.001474189246024143, + "loss": 2.008, + "step": 881 + }, + { + "epoch": 0.0930379746835443, + "grad_norm": 1.1201812028884888, + "learning_rate": 0.0014741239085512624, + "loss": 1.9529, + "step": 882 + }, + { + "epoch": 0.09314345991561182, + "grad_norm": 1.0189557075500488, + "learning_rate": 0.0014740584899366868, + "loss": 1.9167, + "step": 883 + }, + { + "epoch": 0.09324894514767933, + "grad_norm": 0.7264218926429749, + "learning_rate": 0.0014739929901877473, + "loss": 1.9481, + "step": 884 + }, + { + "epoch": 0.09335443037974683, + "grad_norm": 1.094464659690857, + "learning_rate": 0.001473927409311783, + "loss": 2.0175, + "step": 885 + }, + { + "epoch": 0.09345991561181434, + "grad_norm": 1.5549826622009277, + "learning_rate": 0.0014738617473161425, + "loss": 1.9456, + "step": 886 + }, + { + "epoch": 0.09356540084388186, + "grad_norm": 0.7962160110473633, + "learning_rate": 0.0014737960042081836, + "loss": 1.9583, + "step": 887 + }, + { + "epoch": 0.09367088607594937, + "grad_norm": 1.5148277282714844, + "learning_rate": 0.0014737301799952734, + "loss": 1.9996, + "step": 888 + }, + { + "epoch": 0.09377637130801687, + "grad_norm": 0.8878936171531677, + "learning_rate": 0.001473664274684788, + "loss": 1.9714, + "step": 889 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.811007559299469, + "learning_rate": 0.0014735982882841117, + "loss": 1.983, + "step": 890 + }, + { + "epoch": 0.0939873417721519, + "grad_norm": 0.9040500521659851, + "learning_rate": 0.0014735322208006391, + "loss": 2.0146, + "step": 891 + }, + { + "epoch": 0.0940928270042194, + "grad_norm": 0.9184568524360657, + "learning_rate": 0.0014734660722417734, + "loss": 1.9616, + "step": 892 + }, + { + "epoch": 0.09419831223628691, + "grad_norm": 0.7640373110771179, + "learning_rate": 0.0014733998426149266, + "loss": 1.942, + "step": 893 + }, + { + "epoch": 0.09430379746835443, + "grad_norm": 0.754875898361206, + "learning_rate": 0.0014733335319275203, + "loss": 1.991, + "step": 894 + }, + { + "epoch": 0.09440928270042194, + "grad_norm": 0.8229932188987732, + "learning_rate": 0.001473267140186985, + "loss": 1.947, + "step": 895 + }, + { + "epoch": 0.09451476793248945, + "grad_norm": 1.094293475151062, + "learning_rate": 0.00147320066740076, + "loss": 1.9562, + "step": 896 + }, + { + "epoch": 0.09462025316455697, + "grad_norm": 1.1114026308059692, + "learning_rate": 0.001473134113576294, + "loss": 1.96, + "step": 897 + }, + { + "epoch": 0.09472573839662447, + "grad_norm": 0.7743099331855774, + "learning_rate": 0.0014730674787210448, + "loss": 1.9604, + "step": 898 + }, + { + "epoch": 0.09483122362869198, + "grad_norm": 0.9337162971496582, + "learning_rate": 0.0014730007628424792, + "loss": 1.9344, + "step": 899 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 1.3683785200119019, + "learning_rate": 0.0014729339659480727, + "loss": 1.9614, + "step": 900 + }, + { + "epoch": 0.095042194092827, + "grad_norm": 0.8284376859664917, + "learning_rate": 0.0014728670880453105, + "loss": 1.9789, + "step": 901 + }, + { + "epoch": 0.09514767932489451, + "grad_norm": 0.8244580626487732, + "learning_rate": 0.0014728001291416863, + "loss": 1.9734, + "step": 902 + }, + { + "epoch": 0.09525316455696202, + "grad_norm": 1.2106457948684692, + "learning_rate": 0.001472733089244704, + "loss": 1.9692, + "step": 903 + }, + { + "epoch": 0.09535864978902954, + "grad_norm": 0.8426507711410522, + "learning_rate": 0.0014726659683618746, + "loss": 1.965, + "step": 904 + }, + { + "epoch": 0.09546413502109705, + "grad_norm": 0.9278005957603455, + "learning_rate": 0.0014725987665007202, + "loss": 1.9492, + "step": 905 + }, + { + "epoch": 0.09556962025316455, + "grad_norm": 1.3644222021102905, + "learning_rate": 0.0014725314836687708, + "loss": 1.9833, + "step": 906 + }, + { + "epoch": 0.09567510548523207, + "grad_norm": 0.8199100494384766, + "learning_rate": 0.0014724641198735659, + "loss": 1.9954, + "step": 907 + }, + { + "epoch": 0.09578059071729958, + "grad_norm": 1.1375141143798828, + "learning_rate": 0.0014723966751226535, + "loss": 1.9533, + "step": 908 + }, + { + "epoch": 0.09588607594936709, + "grad_norm": 1.151368498802185, + "learning_rate": 0.0014723291494235916, + "loss": 1.9782, + "step": 909 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.8980810642242432, + "learning_rate": 0.0014722615427839468, + "loss": 2.0227, + "step": 910 + }, + { + "epoch": 0.09609704641350211, + "grad_norm": 0.8359660506248474, + "learning_rate": 0.0014721938552112943, + "loss": 1.9619, + "step": 911 + }, + { + "epoch": 0.09620253164556962, + "grad_norm": 0.9187983274459839, + "learning_rate": 0.0014721260867132193, + "loss": 1.9527, + "step": 912 + }, + { + "epoch": 0.09630801687763713, + "grad_norm": 0.8415696024894714, + "learning_rate": 0.0014720582372973155, + "loss": 1.9395, + "step": 913 + }, + { + "epoch": 0.09641350210970465, + "grad_norm": 0.7496194839477539, + "learning_rate": 0.0014719903069711857, + "loss": 1.9765, + "step": 914 + }, + { + "epoch": 0.09651898734177215, + "grad_norm": 0.7602097392082214, + "learning_rate": 0.0014719222957424417, + "loss": 1.9925, + "step": 915 + }, + { + "epoch": 0.09662447257383966, + "grad_norm": 0.7705942392349243, + "learning_rate": 0.0014718542036187049, + "loss": 1.9715, + "step": 916 + }, + { + "epoch": 0.09672995780590718, + "grad_norm": 0.962562084197998, + "learning_rate": 0.0014717860306076049, + "loss": 1.9409, + "step": 917 + }, + { + "epoch": 0.09683544303797469, + "grad_norm": 1.0055153369903564, + "learning_rate": 0.0014717177767167812, + "loss": 1.9438, + "step": 918 + }, + { + "epoch": 0.09694092827004219, + "grad_norm": 0.7313381433486938, + "learning_rate": 0.0014716494419538815, + "loss": 1.9754, + "step": 919 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.7872666716575623, + "learning_rate": 0.0014715810263265633, + "loss": 1.8959, + "step": 920 + }, + { + "epoch": 0.09715189873417722, + "grad_norm": 1.0041792392730713, + "learning_rate": 0.0014715125298424934, + "loss": 1.9766, + "step": 921 + }, + { + "epoch": 0.09725738396624473, + "grad_norm": 1.1317445039749146, + "learning_rate": 0.0014714439525093466, + "loss": 1.9571, + "step": 922 + }, + { + "epoch": 0.09736286919831223, + "grad_norm": 0.7573800086975098, + "learning_rate": 0.0014713752943348074, + "loss": 1.9193, + "step": 923 + }, + { + "epoch": 0.09746835443037975, + "grad_norm": 0.8204968571662903, + "learning_rate": 0.0014713065553265694, + "loss": 1.9581, + "step": 924 + }, + { + "epoch": 0.09757383966244726, + "grad_norm": 1.208304762840271, + "learning_rate": 0.001471237735492335, + "loss": 1.9292, + "step": 925 + }, + { + "epoch": 0.09767932489451477, + "grad_norm": 0.9103373885154724, + "learning_rate": 0.0014711688348398161, + "loss": 1.951, + "step": 926 + }, + { + "epoch": 0.09778481012658227, + "grad_norm": 0.7377431392669678, + "learning_rate": 0.001471099853376733, + "loss": 1.9515, + "step": 927 + }, + { + "epoch": 0.09789029535864979, + "grad_norm": 1.261988878250122, + "learning_rate": 0.0014710307911108159, + "loss": 1.9121, + "step": 928 + }, + { + "epoch": 0.0979957805907173, + "grad_norm": 1.0393929481506348, + "learning_rate": 0.0014709616480498029, + "loss": 1.9634, + "step": 929 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.714392900466919, + "learning_rate": 0.0014708924242014423, + "loss": 1.9458, + "step": 930 + }, + { + "epoch": 0.09820675105485233, + "grad_norm": 0.9038212895393372, + "learning_rate": 0.001470823119573491, + "loss": 1.9261, + "step": 931 + }, + { + "epoch": 0.09831223628691983, + "grad_norm": 1.3091992139816284, + "learning_rate": 0.0014707537341737149, + "loss": 1.9844, + "step": 932 + }, + { + "epoch": 0.09841772151898734, + "grad_norm": 0.7651053071022034, + "learning_rate": 0.0014706842680098887, + "loss": 1.9669, + "step": 933 + }, + { + "epoch": 0.09852320675105486, + "grad_norm": 0.9429594874382019, + "learning_rate": 0.0014706147210897967, + "loss": 1.9678, + "step": 934 + }, + { + "epoch": 0.09862869198312237, + "grad_norm": 1.3386077880859375, + "learning_rate": 0.0014705450934212317, + "loss": 1.9688, + "step": 935 + }, + { + "epoch": 0.09873417721518987, + "grad_norm": 0.7105293273925781, + "learning_rate": 0.0014704753850119962, + "loss": 1.9327, + "step": 936 + }, + { + "epoch": 0.09883966244725738, + "grad_norm": 1.4846140146255493, + "learning_rate": 0.001470405595869901, + "loss": 1.973, + "step": 937 + }, + { + "epoch": 0.0989451476793249, + "grad_norm": 0.8131608963012695, + "learning_rate": 0.0014703357260027667, + "loss": 1.9457, + "step": 938 + }, + { + "epoch": 0.0990506329113924, + "grad_norm": 0.9219775795936584, + "learning_rate": 0.0014702657754184225, + "loss": 1.955, + "step": 939 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.8620164394378662, + "learning_rate": 0.0014701957441247064, + "loss": 1.9349, + "step": 940 + }, + { + "epoch": 0.09926160337552743, + "grad_norm": 0.73787522315979, + "learning_rate": 0.001470125632129466, + "loss": 1.9147, + "step": 941 + }, + { + "epoch": 0.09936708860759494, + "grad_norm": 0.9574810266494751, + "learning_rate": 0.0014700554394405576, + "loss": 1.9611, + "step": 942 + }, + { + "epoch": 0.09947257383966245, + "grad_norm": 0.7807037234306335, + "learning_rate": 0.0014699851660658469, + "loss": 1.9112, + "step": 943 + }, + { + "epoch": 0.09957805907172995, + "grad_norm": 0.829177975654602, + "learning_rate": 0.0014699148120132079, + "loss": 1.9039, + "step": 944 + }, + { + "epoch": 0.09968354430379747, + "grad_norm": 0.9019765257835388, + "learning_rate": 0.0014698443772905247, + "loss": 1.9368, + "step": 945 + }, + { + "epoch": 0.09978902953586498, + "grad_norm": 0.764459490776062, + "learning_rate": 0.0014697738619056891, + "loss": 1.9402, + "step": 946 + }, + { + "epoch": 0.09989451476793249, + "grad_norm": 0.9456490278244019, + "learning_rate": 0.0014697032658666036, + "loss": 1.8995, + "step": 947 + }, + { + "epoch": 0.1, + "grad_norm": 0.8438132405281067, + "learning_rate": 0.001469632589181178, + "loss": 1.9732, + "step": 948 + }, + { + "epoch": 0.10010548523206751, + "grad_norm": 0.7316262722015381, + "learning_rate": 0.0014695618318573327, + "loss": 1.9119, + "step": 949 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.9175291657447815, + "learning_rate": 0.0014694909939029959, + "loss": 1.9436, + "step": 950 + }, + { + "epoch": 0.10031645569620253, + "grad_norm": 0.8727970719337463, + "learning_rate": 0.0014694200753261057, + "loss": 1.9629, + "step": 951 + }, + { + "epoch": 0.10042194092827005, + "grad_norm": 0.7307848334312439, + "learning_rate": 0.0014693490761346086, + "loss": 1.9491, + "step": 952 + }, + { + "epoch": 0.10052742616033755, + "grad_norm": 0.7603338360786438, + "learning_rate": 0.0014692779963364606, + "loss": 1.9611, + "step": 953 + }, + { + "epoch": 0.10063291139240506, + "grad_norm": 0.9281390905380249, + "learning_rate": 0.0014692068359396264, + "loss": 1.957, + "step": 954 + }, + { + "epoch": 0.10073839662447258, + "grad_norm": 0.885502815246582, + "learning_rate": 0.00146913559495208, + "loss": 1.9468, + "step": 955 + }, + { + "epoch": 0.10084388185654009, + "grad_norm": 0.7401512265205383, + "learning_rate": 0.001469064273381804, + "loss": 1.9277, + "step": 956 + }, + { + "epoch": 0.10094936708860759, + "grad_norm": 0.7650275230407715, + "learning_rate": 0.0014689928712367907, + "loss": 1.9081, + "step": 957 + }, + { + "epoch": 0.10105485232067511, + "grad_norm": 0.7920807600021362, + "learning_rate": 0.0014689213885250411, + "loss": 1.9627, + "step": 958 + }, + { + "epoch": 0.10116033755274262, + "grad_norm": 0.9023548364639282, + "learning_rate": 0.001468849825254565, + "loss": 1.9029, + "step": 959 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.9114206433296204, + "learning_rate": 0.0014687781814333814, + "loss": 1.9274, + "step": 960 + }, + { + "epoch": 0.10137130801687763, + "grad_norm": 0.9302749037742615, + "learning_rate": 0.0014687064570695185, + "loss": 1.9474, + "step": 961 + }, + { + "epoch": 0.10147679324894515, + "grad_norm": 0.8110483288764954, + "learning_rate": 0.0014686346521710133, + "loss": 1.9123, + "step": 962 + }, + { + "epoch": 0.10158227848101266, + "grad_norm": 0.6880903244018555, + "learning_rate": 0.0014685627667459118, + "loss": 1.9571, + "step": 963 + }, + { + "epoch": 0.10168776371308016, + "grad_norm": 0.7300034761428833, + "learning_rate": 0.0014684908008022694, + "loss": 1.9209, + "step": 964 + }, + { + "epoch": 0.10179324894514769, + "grad_norm": 0.7196750044822693, + "learning_rate": 0.00146841875434815, + "loss": 1.9782, + "step": 965 + }, + { + "epoch": 0.10189873417721519, + "grad_norm": 0.881179690361023, + "learning_rate": 0.0014683466273916266, + "loss": 1.9143, + "step": 966 + }, + { + "epoch": 0.1020042194092827, + "grad_norm": 1.238966464996338, + "learning_rate": 0.0014682744199407817, + "loss": 1.9403, + "step": 967 + }, + { + "epoch": 0.1021097046413502, + "grad_norm": 0.7588626742362976, + "learning_rate": 0.0014682021320037064, + "loss": 1.9719, + "step": 968 + }, + { + "epoch": 0.10221518987341772, + "grad_norm": 1.098222017288208, + "learning_rate": 0.0014681297635885011, + "loss": 1.9282, + "step": 969 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 1.427003264427185, + "learning_rate": 0.0014680573147032746, + "loss": 1.9582, + "step": 970 + }, + { + "epoch": 0.10242616033755274, + "grad_norm": 0.7662996053695679, + "learning_rate": 0.0014679847853561457, + "loss": 1.9226, + "step": 971 + }, + { + "epoch": 0.10253164556962026, + "grad_norm": 1.568458914756775, + "learning_rate": 0.0014679121755552412, + "loss": 1.9304, + "step": 972 + }, + { + "epoch": 0.10263713080168776, + "grad_norm": 0.8240239024162292, + "learning_rate": 0.0014678394853086976, + "loss": 1.9234, + "step": 973 + }, + { + "epoch": 0.10274261603375527, + "grad_norm": 1.0468767881393433, + "learning_rate": 0.0014677667146246604, + "loss": 1.94, + "step": 974 + }, + { + "epoch": 0.10284810126582279, + "grad_norm": 1.3487763404846191, + "learning_rate": 0.0014676938635112835, + "loss": 1.928, + "step": 975 + }, + { + "epoch": 0.1029535864978903, + "grad_norm": 0.7491296529769897, + "learning_rate": 0.0014676209319767306, + "loss": 1.9247, + "step": 976 + }, + { + "epoch": 0.1030590717299578, + "grad_norm": 1.1662602424621582, + "learning_rate": 0.0014675479200291738, + "loss": 1.9373, + "step": 977 + }, + { + "epoch": 0.10316455696202531, + "grad_norm": 0.8769709467887878, + "learning_rate": 0.0014674748276767944, + "loss": 1.915, + "step": 978 + }, + { + "epoch": 0.10327004219409283, + "grad_norm": 0.8672403693199158, + "learning_rate": 0.0014674016549277831, + "loss": 1.9067, + "step": 979 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.8313528299331665, + "learning_rate": 0.0014673284017903392, + "loss": 1.9598, + "step": 980 + }, + { + "epoch": 0.10348101265822784, + "grad_norm": 0.9144379496574402, + "learning_rate": 0.001467255068272671, + "loss": 1.9265, + "step": 981 + }, + { + "epoch": 0.10358649789029536, + "grad_norm": 0.9855427145957947, + "learning_rate": 0.0014671816543829954, + "loss": 1.9454, + "step": 982 + }, + { + "epoch": 0.10369198312236287, + "grad_norm": 0.8965082168579102, + "learning_rate": 0.0014671081601295394, + "loss": 1.8994, + "step": 983 + }, + { + "epoch": 0.10379746835443038, + "grad_norm": 0.7614032030105591, + "learning_rate": 0.0014670345855205384, + "loss": 1.9082, + "step": 984 + }, + { + "epoch": 0.10390295358649788, + "grad_norm": 0.9031005501747131, + "learning_rate": 0.0014669609305642366, + "loss": 1.9148, + "step": 985 + }, + { + "epoch": 0.1040084388185654, + "grad_norm": 1.034157156944275, + "learning_rate": 0.0014668871952688873, + "loss": 1.9127, + "step": 986 + }, + { + "epoch": 0.10411392405063291, + "grad_norm": 0.8076095581054688, + "learning_rate": 0.0014668133796427532, + "loss": 1.9537, + "step": 987 + }, + { + "epoch": 0.10421940928270042, + "grad_norm": 0.7174389362335205, + "learning_rate": 0.0014667394836941055, + "loss": 1.9058, + "step": 988 + }, + { + "epoch": 0.10432489451476794, + "grad_norm": 0.7461249232292175, + "learning_rate": 0.0014666655074312247, + "loss": 1.9013, + "step": 989 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.7113910913467407, + "learning_rate": 0.0014665914508624, + "loss": 1.9098, + "step": 990 + }, + { + "epoch": 0.10453586497890295, + "grad_norm": 0.8877383470535278, + "learning_rate": 0.0014665173139959305, + "loss": 1.9853, + "step": 991 + }, + { + "epoch": 0.10464135021097046, + "grad_norm": 1.0026034116744995, + "learning_rate": 0.0014664430968401225, + "loss": 1.9234, + "step": 992 + }, + { + "epoch": 0.10474683544303798, + "grad_norm": 0.8386844396591187, + "learning_rate": 0.0014663687994032931, + "loss": 1.9106, + "step": 993 + }, + { + "epoch": 0.10485232067510548, + "grad_norm": 0.7554658055305481, + "learning_rate": 0.0014662944216937677, + "loss": 1.9449, + "step": 994 + }, + { + "epoch": 0.10495780590717299, + "grad_norm": 0.8492615818977356, + "learning_rate": 0.0014662199637198807, + "loss": 1.9025, + "step": 995 + }, + { + "epoch": 0.10506329113924051, + "grad_norm": 0.736452579498291, + "learning_rate": 0.0014661454254899754, + "loss": 1.9264, + "step": 996 + }, + { + "epoch": 0.10516877637130802, + "grad_norm": 0.74644535779953, + "learning_rate": 0.0014660708070124038, + "loss": 1.8988, + "step": 997 + }, + { + "epoch": 0.10527426160337552, + "grad_norm": 0.71762615442276, + "learning_rate": 0.0014659961082955277, + "loss": 1.9409, + "step": 998 + }, + { + "epoch": 0.10537974683544304, + "grad_norm": 0.7280663251876831, + "learning_rate": 0.0014659213293477177, + "loss": 1.9445, + "step": 999 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.9488875865936279, + "learning_rate": 0.0014658464701773526, + "loss": 1.9451, + "step": 1000 + }, + { + "epoch": 0.10559071729957806, + "grad_norm": 0.7421017289161682, + "learning_rate": 0.0014657715307928212, + "loss": 1.8955, + "step": 1001 + }, + { + "epoch": 0.10569620253164556, + "grad_norm": 0.7753511071205139, + "learning_rate": 0.0014656965112025203, + "loss": 1.8987, + "step": 1002 + }, + { + "epoch": 0.10580168776371308, + "grad_norm": 0.8213240504264832, + "learning_rate": 0.0014656214114148567, + "loss": 1.9568, + "step": 1003 + }, + { + "epoch": 0.10590717299578059, + "grad_norm": 0.8699269890785217, + "learning_rate": 0.0014655462314382456, + "loss": 1.8762, + "step": 1004 + }, + { + "epoch": 0.1060126582278481, + "grad_norm": 0.9731547832489014, + "learning_rate": 0.0014654709712811113, + "loss": 1.9028, + "step": 1005 + }, + { + "epoch": 0.10611814345991562, + "grad_norm": 1.0164216756820679, + "learning_rate": 0.0014653956309518866, + "loss": 1.9209, + "step": 1006 + }, + { + "epoch": 0.10622362869198312, + "grad_norm": 0.811352014541626, + "learning_rate": 0.0014653202104590146, + "loss": 1.951, + "step": 1007 + }, + { + "epoch": 0.10632911392405063, + "grad_norm": 0.7972273230552673, + "learning_rate": 0.0014652447098109458, + "loss": 1.9094, + "step": 1008 + }, + { + "epoch": 0.10643459915611814, + "grad_norm": 1.1691479682922363, + "learning_rate": 0.001465169129016141, + "loss": 1.9291, + "step": 1009 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 1.0754554271697998, + "learning_rate": 0.0014650934680830688, + "loss": 1.9364, + "step": 1010 + }, + { + "epoch": 0.10664556962025316, + "grad_norm": 0.6810041069984436, + "learning_rate": 0.001465017727020208, + "loss": 1.897, + "step": 1011 + }, + { + "epoch": 0.10675105485232067, + "grad_norm": 0.8493066430091858, + "learning_rate": 0.0014649419058360455, + "loss": 1.8916, + "step": 1012 + }, + { + "epoch": 0.10685654008438819, + "grad_norm": 1.0545035600662231, + "learning_rate": 0.0014648660045390772, + "loss": 1.8729, + "step": 1013 + }, + { + "epoch": 0.1069620253164557, + "grad_norm": 0.8649955987930298, + "learning_rate": 0.0014647900231378086, + "loss": 1.9271, + "step": 1014 + }, + { + "epoch": 0.1070675105485232, + "grad_norm": 0.7901596426963806, + "learning_rate": 0.0014647139616407539, + "loss": 1.9041, + "step": 1015 + }, + { + "epoch": 0.10717299578059072, + "grad_norm": 1.213867425918579, + "learning_rate": 0.0014646378200564355, + "loss": 1.8694, + "step": 1016 + }, + { + "epoch": 0.10727848101265823, + "grad_norm": 1.214119791984558, + "learning_rate": 0.001464561598393386, + "loss": 1.9411, + "step": 1017 + }, + { + "epoch": 0.10738396624472574, + "grad_norm": 0.7162253856658936, + "learning_rate": 0.0014644852966601463, + "loss": 1.9465, + "step": 1018 + }, + { + "epoch": 0.10748945147679324, + "grad_norm": 1.4188722372055054, + "learning_rate": 0.0014644089148652664, + "loss": 1.9986, + "step": 1019 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 1.1966915130615234, + "learning_rate": 0.0014643324530173051, + "loss": 1.911, + "step": 1020 + }, + { + "epoch": 0.10770042194092827, + "grad_norm": 0.862467348575592, + "learning_rate": 0.0014642559111248306, + "loss": 1.913, + "step": 1021 + }, + { + "epoch": 0.10780590717299578, + "grad_norm": 1.161858081817627, + "learning_rate": 0.0014641792891964195, + "loss": 1.9173, + "step": 1022 + }, + { + "epoch": 0.1079113924050633, + "grad_norm": 0.9471576809883118, + "learning_rate": 0.0014641025872406581, + "loss": 1.8983, + "step": 1023 + }, + { + "epoch": 0.1080168776371308, + "grad_norm": 0.9642608165740967, + "learning_rate": 0.0014640258052661405, + "loss": 1.9181, + "step": 1024 + }, + { + "epoch": 0.10812236286919831, + "grad_norm": 0.8017929792404175, + "learning_rate": 0.0014639489432814712, + "loss": 1.9478, + "step": 1025 + }, + { + "epoch": 0.10822784810126582, + "grad_norm": 0.80342036485672, + "learning_rate": 0.001463872001295263, + "loss": 1.887, + "step": 1026 + }, + { + "epoch": 0.10833333333333334, + "grad_norm": 0.8779991269111633, + "learning_rate": 0.0014637949793161371, + "loss": 1.9289, + "step": 1027 + }, + { + "epoch": 0.10843881856540084, + "grad_norm": 0.8562830686569214, + "learning_rate": 0.0014637178773527246, + "loss": 1.8863, + "step": 1028 + }, + { + "epoch": 0.10854430379746835, + "grad_norm": 0.972145140171051, + "learning_rate": 0.001463640695413665, + "loss": 1.926, + "step": 1029 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 1.1361868381500244, + "learning_rate": 0.0014635634335076067, + "loss": 1.9211, + "step": 1030 + }, + { + "epoch": 0.10875527426160338, + "grad_norm": 0.7558760046958923, + "learning_rate": 0.0014634860916432077, + "loss": 1.8916, + "step": 1031 + }, + { + "epoch": 0.10886075949367088, + "grad_norm": 0.8338297605514526, + "learning_rate": 0.0014634086698291345, + "loss": 1.8702, + "step": 1032 + }, + { + "epoch": 0.10896624472573839, + "grad_norm": 0.8229060769081116, + "learning_rate": 0.0014633311680740625, + "loss": 1.8962, + "step": 1033 + }, + { + "epoch": 0.10907172995780591, + "grad_norm": 1.0593117475509644, + "learning_rate": 0.0014632535863866756, + "loss": 1.9492, + "step": 1034 + }, + { + "epoch": 0.10917721518987342, + "grad_norm": 0.8677767515182495, + "learning_rate": 0.0014631759247756683, + "loss": 1.933, + "step": 1035 + }, + { + "epoch": 0.10928270042194092, + "grad_norm": 0.6995624899864197, + "learning_rate": 0.0014630981832497421, + "loss": 1.8735, + "step": 1036 + }, + { + "epoch": 0.10938818565400844, + "grad_norm": 0.8191702961921692, + "learning_rate": 0.0014630203618176088, + "loss": 1.9043, + "step": 1037 + }, + { + "epoch": 0.10949367088607595, + "grad_norm": 0.869900643825531, + "learning_rate": 0.0014629424604879885, + "loss": 1.9346, + "step": 1038 + }, + { + "epoch": 0.10959915611814346, + "grad_norm": 0.8660871386528015, + "learning_rate": 0.0014628644792696105, + "loss": 1.89, + "step": 1039 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.7353335618972778, + "learning_rate": 0.001462786418171213, + "loss": 1.8986, + "step": 1040 + }, + { + "epoch": 0.10981012658227848, + "grad_norm": 0.6760181188583374, + "learning_rate": 0.0014627082772015428, + "loss": 1.9146, + "step": 1041 + }, + { + "epoch": 0.10991561181434599, + "grad_norm": 0.7706218957901001, + "learning_rate": 0.0014626300563693566, + "loss": 1.9146, + "step": 1042 + }, + { + "epoch": 0.1100210970464135, + "grad_norm": 1.0490531921386719, + "learning_rate": 0.0014625517556834187, + "loss": 1.9087, + "step": 1043 + }, + { + "epoch": 0.11012658227848102, + "grad_norm": 1.2363831996917725, + "learning_rate": 0.0014624733751525036, + "loss": 1.8984, + "step": 1044 + }, + { + "epoch": 0.11023206751054852, + "grad_norm": 0.764866828918457, + "learning_rate": 0.001462394914785394, + "loss": 1.9102, + "step": 1045 + }, + { + "epoch": 0.11033755274261603, + "grad_norm": 0.8138965368270874, + "learning_rate": 0.0014623163745908821, + "loss": 1.9334, + "step": 1046 + }, + { + "epoch": 0.11044303797468355, + "grad_norm": 1.0330348014831543, + "learning_rate": 0.0014622377545777687, + "loss": 1.9146, + "step": 1047 + }, + { + "epoch": 0.11054852320675106, + "grad_norm": 1.0037801265716553, + "learning_rate": 0.001462159054754863, + "loss": 1.9178, + "step": 1048 + }, + { + "epoch": 0.11065400843881856, + "grad_norm": 0.9186831116676331, + "learning_rate": 0.0014620802751309841, + "loss": 1.9001, + "step": 1049 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.7696121335029602, + "learning_rate": 0.0014620014157149597, + "loss": 1.93, + "step": 1050 + }, + { + "epoch": 0.11086497890295359, + "grad_norm": 0.723135769367218, + "learning_rate": 0.0014619224765156263, + "loss": 1.9012, + "step": 1051 + }, + { + "epoch": 0.1109704641350211, + "grad_norm": 0.8412973284721375, + "learning_rate": 0.0014618434575418293, + "loss": 1.9174, + "step": 1052 + }, + { + "epoch": 0.1110759493670886, + "grad_norm": 1.1051959991455078, + "learning_rate": 0.0014617643588024237, + "loss": 1.9294, + "step": 1053 + }, + { + "epoch": 0.11118143459915612, + "grad_norm": 0.8600526452064514, + "learning_rate": 0.001461685180306272, + "loss": 1.9262, + "step": 1054 + }, + { + "epoch": 0.11128691983122363, + "grad_norm": 0.762802004814148, + "learning_rate": 0.0014616059220622475, + "loss": 1.8917, + "step": 1055 + }, + { + "epoch": 0.11139240506329114, + "grad_norm": 1.3881192207336426, + "learning_rate": 0.0014615265840792308, + "loss": 1.8824, + "step": 1056 + }, + { + "epoch": 0.11149789029535866, + "grad_norm": 0.8138700723648071, + "learning_rate": 0.0014614471663661123, + "loss": 1.9128, + "step": 1057 + }, + { + "epoch": 0.11160337552742616, + "grad_norm": 0.8121997117996216, + "learning_rate": 0.0014613676689317916, + "loss": 1.9027, + "step": 1058 + }, + { + "epoch": 0.11170886075949367, + "grad_norm": 0.9810976386070251, + "learning_rate": 0.001461288091785176, + "loss": 1.8693, + "step": 1059 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 1.0123711824417114, + "learning_rate": 0.001461208434935183, + "loss": 1.9025, + "step": 1060 + }, + { + "epoch": 0.1119198312236287, + "grad_norm": 0.9526937007904053, + "learning_rate": 0.0014611286983907384, + "loss": 1.9249, + "step": 1061 + }, + { + "epoch": 0.1120253164556962, + "grad_norm": 0.8683575391769409, + "learning_rate": 0.0014610488821607775, + "loss": 1.8616, + "step": 1062 + }, + { + "epoch": 0.11213080168776371, + "grad_norm": 0.7932003736495972, + "learning_rate": 0.0014609689862542434, + "loss": 1.9196, + "step": 1063 + }, + { + "epoch": 0.11223628691983123, + "grad_norm": 0.7800149321556091, + "learning_rate": 0.0014608890106800893, + "loss": 1.8627, + "step": 1064 + }, + { + "epoch": 0.11234177215189874, + "grad_norm": 1.0157136917114258, + "learning_rate": 0.0014608089554472767, + "loss": 1.8816, + "step": 1065 + }, + { + "epoch": 0.11244725738396624, + "grad_norm": 1.2331678867340088, + "learning_rate": 0.0014607288205647762, + "loss": 1.8671, + "step": 1066 + }, + { + "epoch": 0.11255274261603375, + "grad_norm": 0.8605229258537292, + "learning_rate": 0.0014606486060415673, + "loss": 1.9014, + "step": 1067 + }, + { + "epoch": 0.11265822784810127, + "grad_norm": 0.6830189824104309, + "learning_rate": 0.0014605683118866387, + "loss": 1.87, + "step": 1068 + }, + { + "epoch": 0.11276371308016878, + "grad_norm": 0.7945063710212708, + "learning_rate": 0.0014604879381089873, + "loss": 1.8936, + "step": 1069 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.810616672039032, + "learning_rate": 0.0014604074847176197, + "loss": 1.9124, + "step": 1070 + }, + { + "epoch": 0.1129746835443038, + "grad_norm": 0.6590541005134583, + "learning_rate": 0.0014603269517215512, + "loss": 1.863, + "step": 1071 + }, + { + "epoch": 0.11308016877637131, + "grad_norm": 0.7632009983062744, + "learning_rate": 0.0014602463391298055, + "loss": 1.9279, + "step": 1072 + }, + { + "epoch": 0.11318565400843882, + "grad_norm": 0.7055690884590149, + "learning_rate": 0.0014601656469514159, + "loss": 1.8744, + "step": 1073 + }, + { + "epoch": 0.11329113924050632, + "grad_norm": 0.7830638289451599, + "learning_rate": 0.0014600848751954248, + "loss": 1.8564, + "step": 1074 + }, + { + "epoch": 0.11339662447257384, + "grad_norm": 0.8189989924430847, + "learning_rate": 0.001460004023870882, + "loss": 1.8938, + "step": 1075 + }, + { + "epoch": 0.11350210970464135, + "grad_norm": 0.7221280932426453, + "learning_rate": 0.0014599230929868482, + "loss": 1.9226, + "step": 1076 + }, + { + "epoch": 0.11360759493670886, + "grad_norm": 0.723273515701294, + "learning_rate": 0.0014598420825523918, + "loss": 1.9009, + "step": 1077 + }, + { + "epoch": 0.11371308016877638, + "grad_norm": 0.688467800617218, + "learning_rate": 0.0014597609925765906, + "loss": 1.8591, + "step": 1078 + }, + { + "epoch": 0.11381856540084388, + "grad_norm": 0.7285111546516418, + "learning_rate": 0.0014596798230685308, + "loss": 1.8999, + "step": 1079 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.7482600808143616, + "learning_rate": 0.0014595985740373082, + "loss": 1.8745, + "step": 1080 + }, + { + "epoch": 0.11402953586497891, + "grad_norm": 0.7125475406646729, + "learning_rate": 0.001459517245492027, + "loss": 1.8905, + "step": 1081 + }, + { + "epoch": 0.11413502109704642, + "grad_norm": 0.714189350605011, + "learning_rate": 0.0014594358374418004, + "loss": 1.9101, + "step": 1082 + }, + { + "epoch": 0.11424050632911392, + "grad_norm": 0.7363860011100769, + "learning_rate": 0.0014593543498957506, + "loss": 1.8675, + "step": 1083 + }, + { + "epoch": 0.11434599156118143, + "grad_norm": 0.7624891400337219, + "learning_rate": 0.0014592727828630088, + "loss": 1.9021, + "step": 1084 + }, + { + "epoch": 0.11445147679324895, + "grad_norm": 0.7180051207542419, + "learning_rate": 0.001459191136352715, + "loss": 1.867, + "step": 1085 + }, + { + "epoch": 0.11455696202531646, + "grad_norm": 0.7767930030822754, + "learning_rate": 0.0014591094103740179, + "loss": 1.8598, + "step": 1086 + }, + { + "epoch": 0.11466244725738396, + "grad_norm": 0.7427185773849487, + "learning_rate": 0.0014590276049360755, + "loss": 1.9371, + "step": 1087 + }, + { + "epoch": 0.11476793248945148, + "grad_norm": 0.8511559963226318, + "learning_rate": 0.0014589457200480543, + "loss": 1.9234, + "step": 1088 + }, + { + "epoch": 0.11487341772151899, + "grad_norm": 0.7273308038711548, + "learning_rate": 0.0014588637557191302, + "loss": 1.9223, + "step": 1089 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.805201530456543, + "learning_rate": 0.0014587817119584873, + "loss": 1.8944, + "step": 1090 + }, + { + "epoch": 0.115084388185654, + "grad_norm": 0.9723814725875854, + "learning_rate": 0.0014586995887753197, + "loss": 1.8551, + "step": 1091 + }, + { + "epoch": 0.11518987341772152, + "grad_norm": 0.9568383097648621, + "learning_rate": 0.001458617386178829, + "loss": 1.859, + "step": 1092 + }, + { + "epoch": 0.11529535864978903, + "grad_norm": 1.1083741188049316, + "learning_rate": 0.001458535104178227, + "loss": 1.867, + "step": 1093 + }, + { + "epoch": 0.11540084388185654, + "grad_norm": 0.9695031046867371, + "learning_rate": 0.001458452742782733, + "loss": 1.9075, + "step": 1094 + }, + { + "epoch": 0.11550632911392406, + "grad_norm": 0.7541027665138245, + "learning_rate": 0.0014583703020015768, + "loss": 1.899, + "step": 1095 + }, + { + "epoch": 0.11561181434599156, + "grad_norm": 0.7069128751754761, + "learning_rate": 0.001458287781843996, + "loss": 1.911, + "step": 1096 + }, + { + "epoch": 0.11571729957805907, + "grad_norm": 0.9184324145317078, + "learning_rate": 0.0014582051823192374, + "loss": 1.8829, + "step": 1097 + }, + { + "epoch": 0.11582278481012659, + "grad_norm": 1.02887761592865, + "learning_rate": 0.0014581225034365564, + "loss": 1.9116, + "step": 1098 + }, + { + "epoch": 0.1159282700421941, + "grad_norm": 0.7440295815467834, + "learning_rate": 0.0014580397452052182, + "loss": 1.8848, + "step": 1099 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.8049793839454651, + "learning_rate": 0.001457956907634496, + "loss": 1.8778, + "step": 1100 + }, + { + "epoch": 0.11613924050632911, + "grad_norm": 1.2501120567321777, + "learning_rate": 0.001457873990733672, + "loss": 1.8944, + "step": 1101 + }, + { + "epoch": 0.11624472573839663, + "grad_norm": 1.0324928760528564, + "learning_rate": 0.0014577909945120376, + "loss": 1.8761, + "step": 1102 + }, + { + "epoch": 0.11635021097046414, + "grad_norm": 0.7581961750984192, + "learning_rate": 0.001457707918978893, + "loss": 1.8899, + "step": 1103 + }, + { + "epoch": 0.11645569620253164, + "grad_norm": 0.7634273171424866, + "learning_rate": 0.0014576247641435469, + "loss": 1.9025, + "step": 1104 + }, + { + "epoch": 0.11656118143459916, + "grad_norm": 0.8175299763679504, + "learning_rate": 0.0014575415300153174, + "loss": 1.9121, + "step": 1105 + }, + { + "epoch": 0.11666666666666667, + "grad_norm": 0.7725415229797363, + "learning_rate": 0.0014574582166035314, + "loss": 1.8957, + "step": 1106 + }, + { + "epoch": 0.11677215189873418, + "grad_norm": 0.6838293075561523, + "learning_rate": 0.0014573748239175247, + "loss": 1.8716, + "step": 1107 + }, + { + "epoch": 0.11687763713080168, + "grad_norm": 0.7362942695617676, + "learning_rate": 0.0014572913519666417, + "loss": 1.8729, + "step": 1108 + }, + { + "epoch": 0.1169831223628692, + "grad_norm": 0.7791047692298889, + "learning_rate": 0.0014572078007602355, + "loss": 1.91, + "step": 1109 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.7164736390113831, + "learning_rate": 0.0014571241703076692, + "loss": 1.8653, + "step": 1110 + }, + { + "epoch": 0.11719409282700421, + "grad_norm": 0.6913135647773743, + "learning_rate": 0.0014570404606183132, + "loss": 1.9122, + "step": 1111 + }, + { + "epoch": 0.11729957805907174, + "grad_norm": 0.7453840970993042, + "learning_rate": 0.0014569566717015483, + "loss": 1.8609, + "step": 1112 + }, + { + "epoch": 0.11740506329113924, + "grad_norm": 0.8508825898170471, + "learning_rate": 0.0014568728035667627, + "loss": 1.9042, + "step": 1113 + }, + { + "epoch": 0.11751054852320675, + "grad_norm": 0.8610578179359436, + "learning_rate": 0.001456788856223355, + "loss": 1.8683, + "step": 1114 + }, + { + "epoch": 0.11761603375527427, + "grad_norm": 0.9674621820449829, + "learning_rate": 0.0014567048296807315, + "loss": 1.909, + "step": 1115 + }, + { + "epoch": 0.11772151898734177, + "grad_norm": 1.1482352018356323, + "learning_rate": 0.0014566207239483078, + "loss": 1.8881, + "step": 1116 + }, + { + "epoch": 0.11782700421940928, + "grad_norm": 0.9682254791259766, + "learning_rate": 0.0014565365390355087, + "loss": 1.8778, + "step": 1117 + }, + { + "epoch": 0.11793248945147679, + "grad_norm": 0.8148679137229919, + "learning_rate": 0.001456452274951767, + "loss": 1.907, + "step": 1118 + }, + { + "epoch": 0.11803797468354431, + "grad_norm": 0.7349154949188232, + "learning_rate": 0.0014563679317065254, + "loss": 1.871, + "step": 1119 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.6759890913963318, + "learning_rate": 0.0014562835093092348, + "loss": 1.8442, + "step": 1120 + }, + { + "epoch": 0.11824894514767932, + "grad_norm": 0.7035323977470398, + "learning_rate": 0.0014561990077693553, + "loss": 1.8402, + "step": 1121 + }, + { + "epoch": 0.11835443037974684, + "grad_norm": 0.9967879056930542, + "learning_rate": 0.0014561144270963551, + "loss": 1.8936, + "step": 1122 + }, + { + "epoch": 0.11845991561181435, + "grad_norm": 1.3393194675445557, + "learning_rate": 0.0014560297672997127, + "loss": 1.8865, + "step": 1123 + }, + { + "epoch": 0.11856540084388185, + "grad_norm": 0.9149357080459595, + "learning_rate": 0.001455945028388914, + "loss": 1.8515, + "step": 1124 + }, + { + "epoch": 0.11867088607594936, + "grad_norm": 0.7343166470527649, + "learning_rate": 0.001455860210373455, + "loss": 1.8796, + "step": 1125 + }, + { + "epoch": 0.11877637130801688, + "grad_norm": 0.8970596194267273, + "learning_rate": 0.0014557753132628396, + "loss": 1.8595, + "step": 1126 + }, + { + "epoch": 0.11888185654008439, + "grad_norm": 1.1826671361923218, + "learning_rate": 0.0014556903370665807, + "loss": 1.8855, + "step": 1127 + }, + { + "epoch": 0.1189873417721519, + "grad_norm": 0.8844490051269531, + "learning_rate": 0.0014556052817942013, + "loss": 1.885, + "step": 1128 + }, + { + "epoch": 0.11909282700421941, + "grad_norm": 0.7994623184204102, + "learning_rate": 0.001455520147455231, + "loss": 1.8837, + "step": 1129 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.194509506225586, + "learning_rate": 0.0014554349340592104, + "loss": 1.8652, + "step": 1130 + }, + { + "epoch": 0.11930379746835443, + "grad_norm": 1.1485731601715088, + "learning_rate": 0.001455349641615688, + "loss": 1.9383, + "step": 1131 + }, + { + "epoch": 0.11940928270042193, + "grad_norm": 0.8996093273162842, + "learning_rate": 0.001455264270134221, + "loss": 1.862, + "step": 1132 + }, + { + "epoch": 0.11951476793248945, + "grad_norm": 0.7816439867019653, + "learning_rate": 0.0014551788196243754, + "loss": 1.877, + "step": 1133 + }, + { + "epoch": 0.11962025316455696, + "grad_norm": 0.896456241607666, + "learning_rate": 0.0014550932900957271, + "loss": 1.9102, + "step": 1134 + }, + { + "epoch": 0.11972573839662447, + "grad_norm": 0.9533631205558777, + "learning_rate": 0.0014550076815578595, + "loss": 1.8757, + "step": 1135 + }, + { + "epoch": 0.11983122362869199, + "grad_norm": 1.0941636562347412, + "learning_rate": 0.0014549219940203659, + "loss": 1.8372, + "step": 1136 + }, + { + "epoch": 0.1199367088607595, + "grad_norm": 0.8346354365348816, + "learning_rate": 0.0014548362274928476, + "loss": 1.9003, + "step": 1137 + }, + { + "epoch": 0.120042194092827, + "grad_norm": 0.8940152525901794, + "learning_rate": 0.0014547503819849154, + "loss": 1.8877, + "step": 1138 + }, + { + "epoch": 0.12014767932489452, + "grad_norm": 1.0796623229980469, + "learning_rate": 0.001454664457506189, + "loss": 1.8739, + "step": 1139 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 1.056484341621399, + "learning_rate": 0.001454578454066296, + "loss": 1.9025, + "step": 1140 + }, + { + "epoch": 0.12035864978902953, + "grad_norm": 0.9134669899940491, + "learning_rate": 0.001454492371674874, + "loss": 1.8226, + "step": 1141 + }, + { + "epoch": 0.12046413502109704, + "grad_norm": 0.9229429364204407, + "learning_rate": 0.0014544062103415687, + "loss": 1.8759, + "step": 1142 + }, + { + "epoch": 0.12056962025316456, + "grad_norm": 0.8840562701225281, + "learning_rate": 0.0014543199700760353, + "loss": 1.9136, + "step": 1143 + }, + { + "epoch": 0.12067510548523207, + "grad_norm": 0.793646514415741, + "learning_rate": 0.0014542336508879372, + "loss": 1.8295, + "step": 1144 + }, + { + "epoch": 0.12078059071729957, + "grad_norm": 0.7215383648872375, + "learning_rate": 0.0014541472527869468, + "loss": 1.8747, + "step": 1145 + }, + { + "epoch": 0.1208860759493671, + "grad_norm": 0.8043617606163025, + "learning_rate": 0.0014540607757827456, + "loss": 1.865, + "step": 1146 + }, + { + "epoch": 0.1209915611814346, + "grad_norm": 1.1848480701446533, + "learning_rate": 0.0014539742198850234, + "loss": 1.8516, + "step": 1147 + }, + { + "epoch": 0.12109704641350211, + "grad_norm": 1.01466965675354, + "learning_rate": 0.0014538875851034798, + "loss": 1.9256, + "step": 1148 + }, + { + "epoch": 0.12120253164556961, + "grad_norm": 0.7128393054008484, + "learning_rate": 0.0014538008714478224, + "loss": 1.8772, + "step": 1149 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.8702637553215027, + "learning_rate": 0.0014537140789277678, + "loss": 1.8681, + "step": 1150 + }, + { + "epoch": 0.12141350210970464, + "grad_norm": 1.112856388092041, + "learning_rate": 0.0014536272075530417, + "loss": 1.872, + "step": 1151 + }, + { + "epoch": 0.12151898734177215, + "grad_norm": 0.9551786780357361, + "learning_rate": 0.0014535402573333783, + "loss": 1.922, + "step": 1152 + }, + { + "epoch": 0.12162447257383967, + "grad_norm": 0.8481540083885193, + "learning_rate": 0.001453453228278521, + "loss": 1.8576, + "step": 1153 + }, + { + "epoch": 0.12172995780590717, + "grad_norm": 0.74005126953125, + "learning_rate": 0.0014533661203982215, + "loss": 1.8703, + "step": 1154 + }, + { + "epoch": 0.12183544303797468, + "grad_norm": 0.80185467004776, + "learning_rate": 0.0014532789337022413, + "loss": 1.8733, + "step": 1155 + }, + { + "epoch": 0.1219409282700422, + "grad_norm": 1.018252968788147, + "learning_rate": 0.0014531916682003494, + "loss": 1.8896, + "step": 1156 + }, + { + "epoch": 0.12204641350210971, + "grad_norm": 0.8627280592918396, + "learning_rate": 0.0014531043239023247, + "loss": 1.8587, + "step": 1157 + }, + { + "epoch": 0.12215189873417721, + "grad_norm": 0.7386232614517212, + "learning_rate": 0.0014530169008179546, + "loss": 1.8753, + "step": 1158 + }, + { + "epoch": 0.12225738396624472, + "grad_norm": 0.7604181170463562, + "learning_rate": 0.001452929398957035, + "loss": 1.883, + "step": 1159 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.6775388121604919, + "learning_rate": 0.0014528418183293716, + "loss": 1.8533, + "step": 1160 + }, + { + "epoch": 0.12246835443037975, + "grad_norm": 0.7043300271034241, + "learning_rate": 0.0014527541589447774, + "loss": 1.89, + "step": 1161 + }, + { + "epoch": 0.12257383966244725, + "grad_norm": 0.7844849228858948, + "learning_rate": 0.0014526664208130756, + "loss": 1.899, + "step": 1162 + }, + { + "epoch": 0.12267932489451477, + "grad_norm": 0.7975372076034546, + "learning_rate": 0.0014525786039440971, + "loss": 1.8548, + "step": 1163 + }, + { + "epoch": 0.12278481012658228, + "grad_norm": 0.936034619808197, + "learning_rate": 0.001452490708347683, + "loss": 1.8632, + "step": 1164 + }, + { + "epoch": 0.12289029535864979, + "grad_norm": 1.1872025728225708, + "learning_rate": 0.0014524027340336821, + "loss": 1.8622, + "step": 1165 + }, + { + "epoch": 0.1229957805907173, + "grad_norm": 1.0530734062194824, + "learning_rate": 0.0014523146810119525, + "loss": 1.8478, + "step": 1166 + }, + { + "epoch": 0.12310126582278481, + "grad_norm": 0.8886854648590088, + "learning_rate": 0.0014522265492923608, + "loss": 1.9072, + "step": 1167 + }, + { + "epoch": 0.12320675105485232, + "grad_norm": 0.682741105556488, + "learning_rate": 0.0014521383388847824, + "loss": 1.8285, + "step": 1168 + }, + { + "epoch": 0.12331223628691983, + "grad_norm": 1.0574209690093994, + "learning_rate": 0.0014520500497991022, + "loss": 1.8739, + "step": 1169 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 1.0627895593643188, + "learning_rate": 0.001451961682045213, + "loss": 1.8345, + "step": 1170 + }, + { + "epoch": 0.12352320675105485, + "grad_norm": 0.7757817506790161, + "learning_rate": 0.001451873235633017, + "loss": 1.8948, + "step": 1171 + }, + { + "epoch": 0.12362869198312236, + "grad_norm": 0.7930995225906372, + "learning_rate": 0.0014517847105724251, + "loss": 1.8621, + "step": 1172 + }, + { + "epoch": 0.12373417721518987, + "grad_norm": 0.8430611491203308, + "learning_rate": 0.0014516961068733569, + "loss": 1.8068, + "step": 1173 + }, + { + "epoch": 0.12383966244725739, + "grad_norm": 0.892241895198822, + "learning_rate": 0.0014516074245457412, + "loss": 1.8744, + "step": 1174 + }, + { + "epoch": 0.1239451476793249, + "grad_norm": 0.9607864022254944, + "learning_rate": 0.001451518663599515, + "loss": 1.835, + "step": 1175 + }, + { + "epoch": 0.1240506329113924, + "grad_norm": 0.8766289949417114, + "learning_rate": 0.0014514298240446244, + "loss": 1.7952, + "step": 1176 + }, + { + "epoch": 0.12415611814345992, + "grad_norm": 0.7038484811782837, + "learning_rate": 0.0014513409058910243, + "loss": 1.8312, + "step": 1177 + }, + { + "epoch": 0.12426160337552743, + "grad_norm": 0.7582265138626099, + "learning_rate": 0.0014512519091486786, + "loss": 1.8764, + "step": 1178 + }, + { + "epoch": 0.12436708860759493, + "grad_norm": 1.0074708461761475, + "learning_rate": 0.0014511628338275597, + "loss": 1.8884, + "step": 1179 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 1.1203702688217163, + "learning_rate": 0.001451073679937649, + "loss": 1.8605, + "step": 1180 + }, + { + "epoch": 0.12457805907172996, + "grad_norm": 0.7781455516815186, + "learning_rate": 0.0014509844474889365, + "loss": 1.8476, + "step": 1181 + }, + { + "epoch": 0.12468354430379747, + "grad_norm": 0.8911297917366028, + "learning_rate": 0.0014508951364914213, + "loss": 1.8507, + "step": 1182 + }, + { + "epoch": 0.12478902953586497, + "grad_norm": 1.0801479816436768, + "learning_rate": 0.001450805746955111, + "loss": 1.8777, + "step": 1183 + }, + { + "epoch": 0.1248945147679325, + "grad_norm": 0.8749620318412781, + "learning_rate": 0.001450716278890022, + "loss": 1.8765, + "step": 1184 + }, + { + "epoch": 0.125, + "grad_norm": 0.6979150772094727, + "learning_rate": 0.0014506267323061803, + "loss": 1.8543, + "step": 1185 + }, + { + "epoch": 0.12510548523206752, + "grad_norm": 0.7427628040313721, + "learning_rate": 0.0014505371072136195, + "loss": 1.8568, + "step": 1186 + }, + { + "epoch": 0.125210970464135, + "grad_norm": 0.8229203820228577, + "learning_rate": 0.0014504474036223826, + "loss": 1.8842, + "step": 1187 + }, + { + "epoch": 0.12531645569620253, + "grad_norm": 0.9062377214431763, + "learning_rate": 0.0014503576215425212, + "loss": 1.8681, + "step": 1188 + }, + { + "epoch": 0.12542194092827005, + "grad_norm": 0.8313186764717102, + "learning_rate": 0.0014502677609840964, + "loss": 1.8335, + "step": 1189 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.6972150802612305, + "learning_rate": 0.0014501778219571766, + "loss": 1.8216, + "step": 1190 + }, + { + "epoch": 0.12563291139240507, + "grad_norm": 0.7361453771591187, + "learning_rate": 0.0014500878044718408, + "loss": 1.8557, + "step": 1191 + }, + { + "epoch": 0.1257383966244726, + "grad_norm": 0.7206668853759766, + "learning_rate": 0.0014499977085381756, + "loss": 1.8352, + "step": 1192 + }, + { + "epoch": 0.12584388185654008, + "grad_norm": 0.7169440388679504, + "learning_rate": 0.0014499075341662764, + "loss": 1.8871, + "step": 1193 + }, + { + "epoch": 0.1259493670886076, + "grad_norm": 0.6685090065002441, + "learning_rate": 0.0014498172813662482, + "loss": 1.9065, + "step": 1194 + }, + { + "epoch": 0.1260548523206751, + "grad_norm": 0.7272512316703796, + "learning_rate": 0.0014497269501482037, + "loss": 1.8511, + "step": 1195 + }, + { + "epoch": 0.1261603375527426, + "grad_norm": 0.938203752040863, + "learning_rate": 0.0014496365405222656, + "loss": 1.849, + "step": 1196 + }, + { + "epoch": 0.12626582278481013, + "grad_norm": 1.1306114196777344, + "learning_rate": 0.0014495460524985644, + "loss": 1.8478, + "step": 1197 + }, + { + "epoch": 0.12637130801687763, + "grad_norm": 0.9540188312530518, + "learning_rate": 0.0014494554860872398, + "loss": 1.8773, + "step": 1198 + }, + { + "epoch": 0.12647679324894515, + "grad_norm": 0.9077197909355164, + "learning_rate": 0.00144936484129844, + "loss": 1.8915, + "step": 1199 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.7219662666320801, + "learning_rate": 0.0014492741181423225, + "loss": 1.8513, + "step": 1200 + }, + { + "epoch": 0.12668776371308016, + "grad_norm": 0.715249240398407, + "learning_rate": 0.001449183316629053, + "loss": 1.8605, + "step": 1201 + }, + { + "epoch": 0.12679324894514768, + "grad_norm": 0.8628213405609131, + "learning_rate": 0.0014490924367688066, + "loss": 1.8586, + "step": 1202 + }, + { + "epoch": 0.1268987341772152, + "grad_norm": 0.9234069585800171, + "learning_rate": 0.0014490014785717667, + "loss": 1.8458, + "step": 1203 + }, + { + "epoch": 0.1270042194092827, + "grad_norm": 0.8407811522483826, + "learning_rate": 0.0014489104420481254, + "loss": 1.8549, + "step": 1204 + }, + { + "epoch": 0.1271097046413502, + "grad_norm": 0.818147599697113, + "learning_rate": 0.001448819327208084, + "loss": 1.8861, + "step": 1205 + }, + { + "epoch": 0.12721518987341773, + "grad_norm": 0.7287795543670654, + "learning_rate": 0.0014487281340618526, + "loss": 1.858, + "step": 1206 + }, + { + "epoch": 0.12732067510548523, + "grad_norm": 0.7393110394477844, + "learning_rate": 0.0014486368626196494, + "loss": 1.8438, + "step": 1207 + }, + { + "epoch": 0.12742616033755275, + "grad_norm": 0.7775914669036865, + "learning_rate": 0.001448545512891702, + "loss": 1.8584, + "step": 1208 + }, + { + "epoch": 0.12753164556962027, + "grad_norm": 0.7950329780578613, + "learning_rate": 0.0014484540848882469, + "loss": 1.8667, + "step": 1209 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.8521889448165894, + "learning_rate": 0.0014483625786195285, + "loss": 1.8879, + "step": 1210 + }, + { + "epoch": 0.12774261603375528, + "grad_norm": 0.8098447918891907, + "learning_rate": 0.0014482709940958009, + "loss": 1.8322, + "step": 1211 + }, + { + "epoch": 0.12784810126582277, + "grad_norm": 0.9775342345237732, + "learning_rate": 0.0014481793313273266, + "loss": 1.834, + "step": 1212 + }, + { + "epoch": 0.1279535864978903, + "grad_norm": 1.3006258010864258, + "learning_rate": 0.0014480875903243766, + "loss": 1.8437, + "step": 1213 + }, + { + "epoch": 0.1280590717299578, + "grad_norm": 1.0342538356781006, + "learning_rate": 0.0014479957710972313, + "loss": 1.8472, + "step": 1214 + }, + { + "epoch": 0.1281645569620253, + "grad_norm": 0.9770470261573792, + "learning_rate": 0.0014479038736561793, + "loss": 1.8346, + "step": 1215 + }, + { + "epoch": 0.12827004219409283, + "grad_norm": 1.056506633758545, + "learning_rate": 0.001447811898011518, + "loss": 1.8706, + "step": 1216 + }, + { + "epoch": 0.12837552742616035, + "grad_norm": 0.9311084747314453, + "learning_rate": 0.0014477198441735543, + "loss": 1.8581, + "step": 1217 + }, + { + "epoch": 0.12848101265822784, + "grad_norm": 0.802361011505127, + "learning_rate": 0.0014476277121526027, + "loss": 1.9018, + "step": 1218 + }, + { + "epoch": 0.12858649789029536, + "grad_norm": 0.7156220078468323, + "learning_rate": 0.0014475355019589872, + "loss": 1.8234, + "step": 1219 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.7085161805152893, + "learning_rate": 0.0014474432136030405, + "loss": 1.8147, + "step": 1220 + }, + { + "epoch": 0.12879746835443037, + "grad_norm": 0.9474769830703735, + "learning_rate": 0.001447350847095104, + "loss": 1.8496, + "step": 1221 + }, + { + "epoch": 0.1289029535864979, + "grad_norm": 1.3103190660476685, + "learning_rate": 0.001447258402445528, + "loss": 1.8705, + "step": 1222 + }, + { + "epoch": 0.1290084388185654, + "grad_norm": 0.7421398162841797, + "learning_rate": 0.0014471658796646708, + "loss": 1.8323, + "step": 1223 + }, + { + "epoch": 0.1291139240506329, + "grad_norm": 0.9503397941589355, + "learning_rate": 0.0014470732787629005, + "loss": 1.8729, + "step": 1224 + }, + { + "epoch": 0.12921940928270043, + "grad_norm": 1.1311473846435547, + "learning_rate": 0.0014469805997505932, + "loss": 1.8072, + "step": 1225 + }, + { + "epoch": 0.12932489451476795, + "grad_norm": 1.0746935606002808, + "learning_rate": 0.0014468878426381346, + "loss": 1.8191, + "step": 1226 + }, + { + "epoch": 0.12943037974683544, + "grad_norm": 0.8885477781295776, + "learning_rate": 0.001446795007435918, + "loss": 1.8292, + "step": 1227 + }, + { + "epoch": 0.12953586497890296, + "grad_norm": 0.7039929628372192, + "learning_rate": 0.0014467020941543464, + "loss": 1.8293, + "step": 1228 + }, + { + "epoch": 0.12964135021097045, + "grad_norm": 0.6865050196647644, + "learning_rate": 0.0014466091028038314, + "loss": 1.8638, + "step": 1229 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.8031682372093201, + "learning_rate": 0.0014465160333947923, + "loss": 1.7981, + "step": 1230 + }, + { + "epoch": 0.1298523206751055, + "grad_norm": 0.9198803901672363, + "learning_rate": 0.0014464228859376587, + "loss": 1.8626, + "step": 1231 + }, + { + "epoch": 0.12995780590717299, + "grad_norm": 0.7928988933563232, + "learning_rate": 0.001446329660442868, + "loss": 1.8877, + "step": 1232 + }, + { + "epoch": 0.1300632911392405, + "grad_norm": 0.7825950384140015, + "learning_rate": 0.0014462363569208666, + "loss": 1.8127, + "step": 1233 + }, + { + "epoch": 0.13016877637130803, + "grad_norm": 0.7811450362205505, + "learning_rate": 0.00144614297538211, + "loss": 1.8736, + "step": 1234 + }, + { + "epoch": 0.13027426160337552, + "grad_norm": 0.7090624570846558, + "learning_rate": 0.0014460495158370615, + "loss": 1.867, + "step": 1235 + }, + { + "epoch": 0.13037974683544304, + "grad_norm": 0.7868795990943909, + "learning_rate": 0.0014459559782961937, + "loss": 1.844, + "step": 1236 + }, + { + "epoch": 0.13048523206751056, + "grad_norm": 0.9338590502738953, + "learning_rate": 0.0014458623627699883, + "loss": 1.8324, + "step": 1237 + }, + { + "epoch": 0.13059071729957805, + "grad_norm": 1.226667881011963, + "learning_rate": 0.0014457686692689355, + "loss": 1.845, + "step": 1238 + }, + { + "epoch": 0.13069620253164557, + "grad_norm": 0.8656629323959351, + "learning_rate": 0.0014456748978035339, + "loss": 1.8796, + "step": 1239 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.7429167628288269, + "learning_rate": 0.0014455810483842908, + "loss": 1.8297, + "step": 1240 + }, + { + "epoch": 0.13090717299578059, + "grad_norm": 0.904303789138794, + "learning_rate": 0.0014454871210217229, + "loss": 1.8115, + "step": 1241 + }, + { + "epoch": 0.1310126582278481, + "grad_norm": 0.9809480309486389, + "learning_rate": 0.0014453931157263548, + "loss": 1.8673, + "step": 1242 + }, + { + "epoch": 0.1311181434599156, + "grad_norm": 1.0109357833862305, + "learning_rate": 0.001445299032508721, + "loss": 1.8565, + "step": 1243 + }, + { + "epoch": 0.13122362869198312, + "grad_norm": 0.7752487063407898, + "learning_rate": 0.0014452048713793633, + "loss": 1.8488, + "step": 1244 + }, + { + "epoch": 0.13132911392405064, + "grad_norm": 0.7565234899520874, + "learning_rate": 0.0014451106323488331, + "loss": 1.8758, + "step": 1245 + }, + { + "epoch": 0.13143459915611813, + "grad_norm": 0.9419227242469788, + "learning_rate": 0.0014450163154276906, + "loss": 1.8836, + "step": 1246 + }, + { + "epoch": 0.13154008438818565, + "grad_norm": 0.8761952519416809, + "learning_rate": 0.0014449219206265041, + "loss": 1.8399, + "step": 1247 + }, + { + "epoch": 0.13164556962025317, + "grad_norm": 0.7974103689193726, + "learning_rate": 0.0014448274479558513, + "loss": 1.8543, + "step": 1248 + }, + { + "epoch": 0.13175105485232066, + "grad_norm": 0.7015957236289978, + "learning_rate": 0.0014447328974263182, + "loss": 1.8386, + "step": 1249 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.8122264742851257, + "learning_rate": 0.0014446382690484997, + "loss": 1.8424, + "step": 1250 + }, + { + "epoch": 0.1319620253164557, + "grad_norm": 1.1072255373001099, + "learning_rate": 0.0014445435628329993, + "loss": 1.8793, + "step": 1251 + }, + { + "epoch": 0.1320675105485232, + "grad_norm": 0.937644898891449, + "learning_rate": 0.0014444487787904294, + "loss": 1.8349, + "step": 1252 + }, + { + "epoch": 0.13217299578059072, + "grad_norm": 0.9043557047843933, + "learning_rate": 0.001444353916931411, + "loss": 1.8248, + "step": 1253 + }, + { + "epoch": 0.13227848101265824, + "grad_norm": 0.7565386891365051, + "learning_rate": 0.001444258977266574, + "loss": 1.8116, + "step": 1254 + }, + { + "epoch": 0.13238396624472573, + "grad_norm": 0.8076348900794983, + "learning_rate": 0.0014441639598065565, + "loss": 1.8222, + "step": 1255 + }, + { + "epoch": 0.13248945147679325, + "grad_norm": 0.7431336045265198, + "learning_rate": 0.001444068864562006, + "loss": 1.8258, + "step": 1256 + }, + { + "epoch": 0.13259493670886077, + "grad_norm": 0.7702952027320862, + "learning_rate": 0.0014439736915435786, + "loss": 1.8603, + "step": 1257 + }, + { + "epoch": 0.13270042194092826, + "grad_norm": 0.7576225399971008, + "learning_rate": 0.001443878440761938, + "loss": 1.84, + "step": 1258 + }, + { + "epoch": 0.13280590717299579, + "grad_norm": 0.7777151465415955, + "learning_rate": 0.0014437831122277585, + "loss": 1.8484, + "step": 1259 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.8406563997268677, + "learning_rate": 0.0014436877059517215, + "loss": 1.8379, + "step": 1260 + }, + { + "epoch": 0.1330168776371308, + "grad_norm": 0.8506481647491455, + "learning_rate": 0.0014435922219445182, + "loss": 1.853, + "step": 1261 + }, + { + "epoch": 0.13312236286919832, + "grad_norm": 0.7482781410217285, + "learning_rate": 0.0014434966602168478, + "loss": 1.8285, + "step": 1262 + }, + { + "epoch": 0.1332278481012658, + "grad_norm": 0.9213885068893433, + "learning_rate": 0.0014434010207794185, + "loss": 1.8232, + "step": 1263 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 0.8794370293617249, + "learning_rate": 0.0014433053036429474, + "loss": 1.8141, + "step": 1264 + }, + { + "epoch": 0.13343881856540085, + "grad_norm": 1.021648645401001, + "learning_rate": 0.00144320950881816, + "loss": 1.8409, + "step": 1265 + }, + { + "epoch": 0.13354430379746834, + "grad_norm": 1.0359309911727905, + "learning_rate": 0.0014431136363157902, + "loss": 1.8433, + "step": 1266 + }, + { + "epoch": 0.13364978902953586, + "grad_norm": 0.9645574688911438, + "learning_rate": 0.0014430176861465812, + "loss": 1.8723, + "step": 1267 + }, + { + "epoch": 0.13375527426160339, + "grad_norm": 0.9081580638885498, + "learning_rate": 0.001442921658321285, + "loss": 1.821, + "step": 1268 + }, + { + "epoch": 0.13386075949367088, + "grad_norm": 1.0830297470092773, + "learning_rate": 0.0014428255528506617, + "loss": 1.8771, + "step": 1269 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.8803933262825012, + "learning_rate": 0.0014427293697454803, + "loss": 1.8484, + "step": 1270 + }, + { + "epoch": 0.13407172995780592, + "grad_norm": 0.8912661075592041, + "learning_rate": 0.001442633109016519, + "loss": 1.8771, + "step": 1271 + }, + { + "epoch": 0.1341772151898734, + "grad_norm": 1.603084921836853, + "learning_rate": 0.001442536770674564, + "loss": 1.9155, + "step": 1272 + }, + { + "epoch": 0.13428270042194093, + "grad_norm": 0.8027404546737671, + "learning_rate": 0.0014424403547304103, + "loss": 1.8282, + "step": 1273 + }, + { + "epoch": 0.13438818565400845, + "grad_norm": 1.051067590713501, + "learning_rate": 0.0014423438611948624, + "loss": 1.8593, + "step": 1274 + }, + { + "epoch": 0.13449367088607594, + "grad_norm": 1.3968749046325684, + "learning_rate": 0.0014422472900787323, + "loss": 1.8698, + "step": 1275 + }, + { + "epoch": 0.13459915611814346, + "grad_norm": 0.7612778544425964, + "learning_rate": 0.0014421506413928415, + "loss": 1.8328, + "step": 1276 + }, + { + "epoch": 0.13470464135021096, + "grad_norm": 1.4181158542633057, + "learning_rate": 0.0014420539151480199, + "loss": 1.8862, + "step": 1277 + }, + { + "epoch": 0.13481012658227848, + "grad_norm": 1.0417275428771973, + "learning_rate": 0.0014419571113551063, + "loss": 1.839, + "step": 1278 + }, + { + "epoch": 0.134915611814346, + "grad_norm": 0.7226706147193909, + "learning_rate": 0.0014418602300249482, + "loss": 1.8735, + "step": 1279 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 1.1738286018371582, + "learning_rate": 0.001441763271168401, + "loss": 1.8113, + "step": 1280 + }, + { + "epoch": 0.135126582278481, + "grad_norm": 0.9902945160865784, + "learning_rate": 0.00144166623479633, + "loss": 1.8461, + "step": 1281 + }, + { + "epoch": 0.13523206751054853, + "grad_norm": 0.7159084677696228, + "learning_rate": 0.0014415691209196085, + "loss": 1.7914, + "step": 1282 + }, + { + "epoch": 0.13533755274261602, + "grad_norm": 0.709564745426178, + "learning_rate": 0.0014414719295491184, + "loss": 1.8547, + "step": 1283 + }, + { + "epoch": 0.13544303797468354, + "grad_norm": 0.7577031850814819, + "learning_rate": 0.0014413746606957505, + "loss": 1.8076, + "step": 1284 + }, + { + "epoch": 0.13554852320675106, + "grad_norm": 0.8331753015518188, + "learning_rate": 0.0014412773143704046, + "loss": 1.8106, + "step": 1285 + }, + { + "epoch": 0.13565400843881856, + "grad_norm": 0.7483537197113037, + "learning_rate": 0.0014411798905839884, + "loss": 1.8244, + "step": 1286 + }, + { + "epoch": 0.13575949367088608, + "grad_norm": 0.7385319471359253, + "learning_rate": 0.0014410823893474193, + "loss": 1.8244, + "step": 1287 + }, + { + "epoch": 0.1358649789029536, + "grad_norm": 1.1561002731323242, + "learning_rate": 0.001440984810671622, + "loss": 1.8526, + "step": 1288 + }, + { + "epoch": 0.1359704641350211, + "grad_norm": 1.2346051931381226, + "learning_rate": 0.0014408871545675314, + "loss": 1.8872, + "step": 1289 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.784325897693634, + "learning_rate": 0.00144078942104609, + "loss": 1.86, + "step": 1290 + }, + { + "epoch": 0.13618143459915613, + "grad_norm": 0.7522047162055969, + "learning_rate": 0.0014406916101182491, + "loss": 1.8577, + "step": 1291 + }, + { + "epoch": 0.13628691983122362, + "grad_norm": 1.0029914379119873, + "learning_rate": 0.0014405937217949695, + "loss": 1.8262, + "step": 1292 + }, + { + "epoch": 0.13639240506329114, + "grad_norm": 1.0353062152862549, + "learning_rate": 0.0014404957560872197, + "loss": 1.8293, + "step": 1293 + }, + { + "epoch": 0.13649789029535864, + "grad_norm": 0.8862709999084473, + "learning_rate": 0.0014403977130059773, + "loss": 1.8539, + "step": 1294 + }, + { + "epoch": 0.13660337552742616, + "grad_norm": 0.8435131311416626, + "learning_rate": 0.0014402995925622284, + "loss": 1.7692, + "step": 1295 + }, + { + "epoch": 0.13670886075949368, + "grad_norm": 0.7148916721343994, + "learning_rate": 0.0014402013947669681, + "loss": 1.8309, + "step": 1296 + }, + { + "epoch": 0.13681434599156117, + "grad_norm": 0.756952702999115, + "learning_rate": 0.0014401031196312, + "loss": 1.8308, + "step": 1297 + }, + { + "epoch": 0.1369198312236287, + "grad_norm": 0.9148671627044678, + "learning_rate": 0.001440004767165936, + "loss": 1.776, + "step": 1298 + }, + { + "epoch": 0.1370253164556962, + "grad_norm": 1.1292259693145752, + "learning_rate": 0.0014399063373821972, + "loss": 1.8151, + "step": 1299 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.8694108724594116, + "learning_rate": 0.001439807830291013, + "loss": 1.8237, + "step": 1300 + }, + { + "epoch": 0.13723628691983122, + "grad_norm": 0.6765185594558716, + "learning_rate": 0.001439709245903422, + "loss": 1.8251, + "step": 1301 + }, + { + "epoch": 0.13734177215189874, + "grad_norm": 0.9260671734809875, + "learning_rate": 0.0014396105842304707, + "loss": 1.8502, + "step": 1302 + }, + { + "epoch": 0.13744725738396624, + "grad_norm": 1.1612194776535034, + "learning_rate": 0.0014395118452832146, + "loss": 1.8376, + "step": 1303 + }, + { + "epoch": 0.13755274261603376, + "grad_norm": 0.7939783334732056, + "learning_rate": 0.001439413029072718, + "loss": 1.8249, + "step": 1304 + }, + { + "epoch": 0.13765822784810128, + "grad_norm": 0.67336505651474, + "learning_rate": 0.001439314135610054, + "loss": 1.8426, + "step": 1305 + }, + { + "epoch": 0.13776371308016877, + "grad_norm": 0.7437554001808167, + "learning_rate": 0.0014392151649063039, + "loss": 1.819, + "step": 1306 + }, + { + "epoch": 0.1378691983122363, + "grad_norm": 0.8352715969085693, + "learning_rate": 0.0014391161169725573, + "loss": 1.8088, + "step": 1307 + }, + { + "epoch": 0.1379746835443038, + "grad_norm": 0.9070545434951782, + "learning_rate": 0.001439016991819914, + "loss": 1.852, + "step": 1308 + }, + { + "epoch": 0.1380801687763713, + "grad_norm": 0.9120774269104004, + "learning_rate": 0.001438917789459481, + "loss": 1.8158, + "step": 1309 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.8207044005393982, + "learning_rate": 0.0014388185099023744, + "loss": 1.8079, + "step": 1310 + }, + { + "epoch": 0.13829113924050632, + "grad_norm": 0.7265280485153198, + "learning_rate": 0.001438719153159719, + "loss": 1.8227, + "step": 1311 + }, + { + "epoch": 0.13839662447257384, + "grad_norm": 0.6807964444160461, + "learning_rate": 0.0014386197192426482, + "loss": 1.8193, + "step": 1312 + }, + { + "epoch": 0.13850210970464136, + "grad_norm": 0.8306171298027039, + "learning_rate": 0.001438520208162304, + "loss": 1.8486, + "step": 1313 + }, + { + "epoch": 0.13860759493670885, + "grad_norm": 0.736253559589386, + "learning_rate": 0.0014384206199298374, + "loss": 1.8536, + "step": 1314 + }, + { + "epoch": 0.13871308016877637, + "grad_norm": 0.6831203103065491, + "learning_rate": 0.0014383209545564073, + "loss": 1.8515, + "step": 1315 + }, + { + "epoch": 0.1388185654008439, + "grad_norm": 0.7448873519897461, + "learning_rate": 0.001438221212053182, + "loss": 1.7865, + "step": 1316 + }, + { + "epoch": 0.13892405063291138, + "grad_norm": 0.7462145686149597, + "learning_rate": 0.0014381213924313386, + "loss": 1.8196, + "step": 1317 + }, + { + "epoch": 0.1390295358649789, + "grad_norm": 0.8142414689064026, + "learning_rate": 0.0014380214957020613, + "loss": 1.8198, + "step": 1318 + }, + { + "epoch": 0.13913502109704642, + "grad_norm": 1.0314310789108276, + "learning_rate": 0.001437921521876545, + "loss": 1.8445, + "step": 1319 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 1.2293202877044678, + "learning_rate": 0.0014378214709659916, + "loss": 1.8252, + "step": 1320 + }, + { + "epoch": 0.13934599156118144, + "grad_norm": 0.9079253077507019, + "learning_rate": 0.0014377213429816128, + "loss": 1.8076, + "step": 1321 + }, + { + "epoch": 0.13945147679324896, + "grad_norm": 0.9450768828392029, + "learning_rate": 0.0014376211379346282, + "loss": 1.8239, + "step": 1322 + }, + { + "epoch": 0.13955696202531645, + "grad_norm": 0.8127079010009766, + "learning_rate": 0.0014375208558362663, + "loss": 1.8337, + "step": 1323 + }, + { + "epoch": 0.13966244725738397, + "grad_norm": 0.7903156876564026, + "learning_rate": 0.0014374204966977639, + "loss": 1.8363, + "step": 1324 + }, + { + "epoch": 0.13976793248945146, + "grad_norm": 0.9080058336257935, + "learning_rate": 0.0014373200605303674, + "loss": 1.8086, + "step": 1325 + }, + { + "epoch": 0.13987341772151898, + "grad_norm": 0.7254908084869385, + "learning_rate": 0.001437219547345331, + "loss": 1.8367, + "step": 1326 + }, + { + "epoch": 0.1399789029535865, + "grad_norm": 0.9146758913993835, + "learning_rate": 0.0014371189571539174, + "loss": 1.8198, + "step": 1327 + }, + { + "epoch": 0.140084388185654, + "grad_norm": 0.9812776446342468, + "learning_rate": 0.0014370182899673982, + "loss": 1.8114, + "step": 1328 + }, + { + "epoch": 0.14018987341772152, + "grad_norm": 0.7649427652359009, + "learning_rate": 0.0014369175457970538, + "loss": 1.8306, + "step": 1329 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.8404338955879211, + "learning_rate": 0.0014368167246541733, + "loss": 1.852, + "step": 1330 + }, + { + "epoch": 0.14040084388185653, + "grad_norm": 1.1790398359298706, + "learning_rate": 0.0014367158265500537, + "loss": 1.8455, + "step": 1331 + }, + { + "epoch": 0.14050632911392405, + "grad_norm": 0.9462620615959167, + "learning_rate": 0.0014366148514960016, + "loss": 1.8287, + "step": 1332 + }, + { + "epoch": 0.14061181434599157, + "grad_norm": 0.7867636680603027, + "learning_rate": 0.001436513799503332, + "loss": 1.7929, + "step": 1333 + }, + { + "epoch": 0.14071729957805906, + "grad_norm": 0.7641672492027283, + "learning_rate": 0.0014364126705833675, + "loss": 1.8095, + "step": 1334 + }, + { + "epoch": 0.14082278481012658, + "grad_norm": 0.8680098652839661, + "learning_rate": 0.0014363114647474406, + "loss": 1.7919, + "step": 1335 + }, + { + "epoch": 0.1409282700421941, + "grad_norm": 0.8723886013031006, + "learning_rate": 0.0014362101820068918, + "loss": 1.8197, + "step": 1336 + }, + { + "epoch": 0.1410337552742616, + "grad_norm": 0.6791922450065613, + "learning_rate": 0.0014361088223730704, + "loss": 1.8463, + "step": 1337 + }, + { + "epoch": 0.14113924050632912, + "grad_norm": 1.08528733253479, + "learning_rate": 0.0014360073858573341, + "loss": 1.8382, + "step": 1338 + }, + { + "epoch": 0.14124472573839664, + "grad_norm": 1.0779986381530762, + "learning_rate": 0.0014359058724710497, + "loss": 1.8731, + "step": 1339 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.9356694221496582, + "learning_rate": 0.0014358042822255918, + "loss": 1.8071, + "step": 1340 + }, + { + "epoch": 0.14145569620253165, + "grad_norm": 0.8863275051116943, + "learning_rate": 0.0014357026151323444, + "loss": 1.8446, + "step": 1341 + }, + { + "epoch": 0.14156118143459914, + "grad_norm": 1.1210741996765137, + "learning_rate": 0.0014356008712027, + "loss": 1.8719, + "step": 1342 + }, + { + "epoch": 0.14166666666666666, + "grad_norm": 1.0850670337677002, + "learning_rate": 0.0014354990504480592, + "loss": 1.8087, + "step": 1343 + }, + { + "epoch": 0.14177215189873418, + "grad_norm": 0.822909951210022, + "learning_rate": 0.0014353971528798313, + "loss": 1.8402, + "step": 1344 + }, + { + "epoch": 0.14187763713080168, + "grad_norm": 0.7836199998855591, + "learning_rate": 0.001435295178509435, + "loss": 1.8181, + "step": 1345 + }, + { + "epoch": 0.1419831223628692, + "grad_norm": 0.840388834476471, + "learning_rate": 0.0014351931273482966, + "loss": 1.8128, + "step": 1346 + }, + { + "epoch": 0.14208860759493672, + "grad_norm": 0.9273520708084106, + "learning_rate": 0.0014350909994078516, + "loss": 1.8311, + "step": 1347 + }, + { + "epoch": 0.1421940928270042, + "grad_norm": 1.027754306793213, + "learning_rate": 0.0014349887946995441, + "loss": 1.8098, + "step": 1348 + }, + { + "epoch": 0.14229957805907173, + "grad_norm": 0.9560449123382568, + "learning_rate": 0.0014348865132348262, + "loss": 1.8371, + "step": 1349 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.7078729867935181, + "learning_rate": 0.0014347841550251597, + "loss": 1.8193, + "step": 1350 + }, + { + "epoch": 0.14251054852320674, + "grad_norm": 0.8572754263877869, + "learning_rate": 0.0014346817200820137, + "loss": 1.8156, + "step": 1351 + }, + { + "epoch": 0.14261603375527426, + "grad_norm": 0.9332448244094849, + "learning_rate": 0.0014345792084168672, + "loss": 1.7936, + "step": 1352 + }, + { + "epoch": 0.14272151898734178, + "grad_norm": 0.9034005403518677, + "learning_rate": 0.0014344766200412062, + "loss": 1.8395, + "step": 1353 + }, + { + "epoch": 0.14282700421940928, + "grad_norm": 0.7438182234764099, + "learning_rate": 0.0014343739549665274, + "loss": 1.8458, + "step": 1354 + }, + { + "epoch": 0.1429324894514768, + "grad_norm": 0.8259159922599792, + "learning_rate": 0.0014342712132043342, + "loss": 1.8161, + "step": 1355 + }, + { + "epoch": 0.14303797468354432, + "grad_norm": 1.0151033401489258, + "learning_rate": 0.001434168394766139, + "loss": 1.8096, + "step": 1356 + }, + { + "epoch": 0.1431434599156118, + "grad_norm": 0.9227373600006104, + "learning_rate": 0.001434065499663464, + "loss": 1.8462, + "step": 1357 + }, + { + "epoch": 0.14324894514767933, + "grad_norm": 0.7570807933807373, + "learning_rate": 0.0014339625279078388, + "loss": 1.8253, + "step": 1358 + }, + { + "epoch": 0.14335443037974682, + "grad_norm": 0.7245476841926575, + "learning_rate": 0.0014338594795108017, + "loss": 1.7947, + "step": 1359 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.7970989346504211, + "learning_rate": 0.0014337563544838997, + "loss": 1.8417, + "step": 1360 + }, + { + "epoch": 0.14356540084388186, + "grad_norm": 1.0802357196807861, + "learning_rate": 0.0014336531528386888, + "loss": 1.8047, + "step": 1361 + }, + { + "epoch": 0.14367088607594936, + "grad_norm": 0.9135968685150146, + "learning_rate": 0.0014335498745867332, + "loss": 1.8231, + "step": 1362 + }, + { + "epoch": 0.14377637130801688, + "grad_norm": 0.7015502452850342, + "learning_rate": 0.0014334465197396054, + "loss": 1.8239, + "step": 1363 + }, + { + "epoch": 0.1438818565400844, + "grad_norm": 1.0331732034683228, + "learning_rate": 0.0014333430883088877, + "loss": 1.8164, + "step": 1364 + }, + { + "epoch": 0.1439873417721519, + "grad_norm": 1.1173673868179321, + "learning_rate": 0.001433239580306169, + "loss": 1.8075, + "step": 1365 + }, + { + "epoch": 0.1440928270042194, + "grad_norm": 0.7286204099655151, + "learning_rate": 0.0014331359957430482, + "loss": 1.813, + "step": 1366 + }, + { + "epoch": 0.14419831223628693, + "grad_norm": 0.7752498388290405, + "learning_rate": 0.001433032334631133, + "loss": 1.8303, + "step": 1367 + }, + { + "epoch": 0.14430379746835442, + "grad_norm": 0.9174172282218933, + "learning_rate": 0.0014329285969820389, + "loss": 1.8127, + "step": 1368 + }, + { + "epoch": 0.14440928270042194, + "grad_norm": 0.9906301498413086, + "learning_rate": 0.00143282478280739, + "loss": 1.8353, + "step": 1369 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 1.101486086845398, + "learning_rate": 0.001432720892118819, + "loss": 1.8094, + "step": 1370 + }, + { + "epoch": 0.14462025316455696, + "grad_norm": 0.8762015700340271, + "learning_rate": 0.0014326169249279683, + "loss": 1.7984, + "step": 1371 + }, + { + "epoch": 0.14472573839662448, + "grad_norm": 0.6572405099868774, + "learning_rate": 0.001432512881246487, + "loss": 1.7914, + "step": 1372 + }, + { + "epoch": 0.144831223628692, + "grad_norm": 0.9006447792053223, + "learning_rate": 0.0014324087610860339, + "loss": 1.8141, + "step": 1373 + }, + { + "epoch": 0.1449367088607595, + "grad_norm": 1.0030016899108887, + "learning_rate": 0.0014323045644582765, + "loss": 1.7989, + "step": 1374 + }, + { + "epoch": 0.145042194092827, + "grad_norm": 0.8561321496963501, + "learning_rate": 0.0014322002913748902, + "loss": 1.816, + "step": 1375 + }, + { + "epoch": 0.1451476793248945, + "grad_norm": 0.6894409656524658, + "learning_rate": 0.0014320959418475596, + "loss": 1.8021, + "step": 1376 + }, + { + "epoch": 0.14525316455696202, + "grad_norm": 0.7009186744689941, + "learning_rate": 0.0014319915158879776, + "loss": 1.769, + "step": 1377 + }, + { + "epoch": 0.14535864978902954, + "grad_norm": 0.684551477432251, + "learning_rate": 0.0014318870135078452, + "loss": 1.7771, + "step": 1378 + }, + { + "epoch": 0.14546413502109704, + "grad_norm": 0.6557650566101074, + "learning_rate": 0.001431782434718873, + "loss": 1.8529, + "step": 1379 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.6573826670646667, + "learning_rate": 0.0014316777795327794, + "loss": 1.7873, + "step": 1380 + }, + { + "epoch": 0.14567510548523208, + "grad_norm": 0.8142624497413635, + "learning_rate": 0.0014315730479612914, + "loss": 1.8436, + "step": 1381 + }, + { + "epoch": 0.14578059071729957, + "grad_norm": 0.7776772379875183, + "learning_rate": 0.0014314682400161445, + "loss": 1.801, + "step": 1382 + }, + { + "epoch": 0.1458860759493671, + "grad_norm": 0.6987738013267517, + "learning_rate": 0.0014313633557090834, + "loss": 1.8077, + "step": 1383 + }, + { + "epoch": 0.1459915611814346, + "grad_norm": 0.6793323159217834, + "learning_rate": 0.0014312583950518607, + "loss": 1.791, + "step": 1384 + }, + { + "epoch": 0.1460970464135021, + "grad_norm": 0.6964205503463745, + "learning_rate": 0.0014311533580562378, + "loss": 1.8373, + "step": 1385 + }, + { + "epoch": 0.14620253164556962, + "grad_norm": 0.6815252900123596, + "learning_rate": 0.0014310482447339845, + "loss": 1.8127, + "step": 1386 + }, + { + "epoch": 0.14630801687763714, + "grad_norm": 0.7421700954437256, + "learning_rate": 0.0014309430550968794, + "loss": 1.8135, + "step": 1387 + }, + { + "epoch": 0.14641350210970464, + "grad_norm": 0.7072603702545166, + "learning_rate": 0.0014308377891567095, + "loss": 1.8092, + "step": 1388 + }, + { + "epoch": 0.14651898734177216, + "grad_norm": 0.7433549761772156, + "learning_rate": 0.0014307324469252703, + "loss": 1.8179, + "step": 1389 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.8338775038719177, + "learning_rate": 0.001430627028414366, + "loss": 1.8299, + "step": 1390 + }, + { + "epoch": 0.14672995780590717, + "grad_norm": 0.8964146971702576, + "learning_rate": 0.0014305215336358093, + "loss": 1.8398, + "step": 1391 + }, + { + "epoch": 0.1468354430379747, + "grad_norm": 1.040199875831604, + "learning_rate": 0.0014304159626014213, + "loss": 1.8268, + "step": 1392 + }, + { + "epoch": 0.14694092827004218, + "grad_norm": 0.9061245918273926, + "learning_rate": 0.0014303103153230322, + "loss": 1.8047, + "step": 1393 + }, + { + "epoch": 0.1470464135021097, + "grad_norm": 0.7978073358535767, + "learning_rate": 0.0014302045918124795, + "loss": 1.8005, + "step": 1394 + }, + { + "epoch": 0.14715189873417722, + "grad_norm": 0.7624102830886841, + "learning_rate": 0.0014300987920816107, + "loss": 1.8252, + "step": 1395 + }, + { + "epoch": 0.14725738396624471, + "grad_norm": 0.6870443224906921, + "learning_rate": 0.0014299929161422807, + "loss": 1.8104, + "step": 1396 + }, + { + "epoch": 0.14736286919831224, + "grad_norm": 1.0597504377365112, + "learning_rate": 0.001429886964006354, + "loss": 1.7729, + "step": 1397 + }, + { + "epoch": 0.14746835443037976, + "grad_norm": 1.5714269876480103, + "learning_rate": 0.0014297809356857026, + "loss": 1.7734, + "step": 1398 + }, + { + "epoch": 0.14757383966244725, + "grad_norm": 0.8631510734558105, + "learning_rate": 0.0014296748311922074, + "loss": 1.817, + "step": 1399 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 1.6420420408248901, + "learning_rate": 0.0014295686505377586, + "loss": 1.791, + "step": 1400 + }, + { + "epoch": 0.1477848101265823, + "grad_norm": 0.9433822631835938, + "learning_rate": 0.001429462393734254, + "loss": 1.8341, + "step": 1401 + }, + { + "epoch": 0.14789029535864978, + "grad_norm": 1.6634620428085327, + "learning_rate": 0.0014293560607935999, + "loss": 1.8116, + "step": 1402 + }, + { + "epoch": 0.1479957805907173, + "grad_norm": 1.140648365020752, + "learning_rate": 0.0014292496517277116, + "loss": 1.8637, + "step": 1403 + }, + { + "epoch": 0.14810126582278482, + "grad_norm": 1.6591262817382812, + "learning_rate": 0.0014291431665485125, + "loss": 1.8316, + "step": 1404 + }, + { + "epoch": 0.14820675105485231, + "grad_norm": 1.369357943534851, + "learning_rate": 0.0014290366052679352, + "loss": 1.8172, + "step": 1405 + }, + { + "epoch": 0.14831223628691984, + "grad_norm": 1.1823511123657227, + "learning_rate": 0.0014289299678979207, + "loss": 1.8137, + "step": 1406 + }, + { + "epoch": 0.14841772151898736, + "grad_norm": 1.0276343822479248, + "learning_rate": 0.0014288232544504174, + "loss": 1.7503, + "step": 1407 + }, + { + "epoch": 0.14852320675105485, + "grad_norm": 1.0917946100234985, + "learning_rate": 0.0014287164649373837, + "loss": 1.8297, + "step": 1408 + }, + { + "epoch": 0.14862869198312237, + "grad_norm": 1.0181547403335571, + "learning_rate": 0.0014286095993707856, + "loss": 1.8458, + "step": 1409 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 1.0214604139328003, + "learning_rate": 0.0014285026577625982, + "loss": 1.8409, + "step": 1410 + }, + { + "epoch": 0.14883966244725738, + "grad_norm": 0.9402409791946411, + "learning_rate": 0.0014283956401248048, + "loss": 1.8355, + "step": 1411 + }, + { + "epoch": 0.1489451476793249, + "grad_norm": 0.8506402373313904, + "learning_rate": 0.0014282885464693969, + "loss": 1.7926, + "step": 1412 + }, + { + "epoch": 0.1490506329113924, + "grad_norm": 0.9423339366912842, + "learning_rate": 0.001428181376808375, + "loss": 1.8104, + "step": 1413 + }, + { + "epoch": 0.14915611814345991, + "grad_norm": 0.9923974871635437, + "learning_rate": 0.0014280741311537483, + "loss": 1.8261, + "step": 1414 + }, + { + "epoch": 0.14926160337552744, + "grad_norm": 0.9954825639724731, + "learning_rate": 0.001427966809517534, + "loss": 1.7737, + "step": 1415 + }, + { + "epoch": 0.14936708860759493, + "grad_norm": 0.7509697079658508, + "learning_rate": 0.001427859411911758, + "loss": 1.8481, + "step": 1416 + }, + { + "epoch": 0.14947257383966245, + "grad_norm": 0.9333043098449707, + "learning_rate": 0.0014277519383484548, + "loss": 1.7851, + "step": 1417 + }, + { + "epoch": 0.14957805907172997, + "grad_norm": 1.0519899129867554, + "learning_rate": 0.0014276443888396675, + "loss": 1.8218, + "step": 1418 + }, + { + "epoch": 0.14968354430379746, + "grad_norm": 0.8751907348632812, + "learning_rate": 0.0014275367633974473, + "loss": 1.8139, + "step": 1419 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.7943668961524963, + "learning_rate": 0.0014274290620338542, + "loss": 1.8241, + "step": 1420 + }, + { + "epoch": 0.1498945147679325, + "grad_norm": 1.1156803369522095, + "learning_rate": 0.0014273212847609566, + "loss": 1.8149, + "step": 1421 + }, + { + "epoch": 0.15, + "grad_norm": 0.7849866151809692, + "learning_rate": 0.0014272134315908317, + "loss": 1.7941, + "step": 1422 + }, + { + "epoch": 0.15010548523206751, + "grad_norm": 0.875567615032196, + "learning_rate": 0.0014271055025355652, + "loss": 1.813, + "step": 1423 + }, + { + "epoch": 0.150210970464135, + "grad_norm": 0.8688913583755493, + "learning_rate": 0.0014269974976072505, + "loss": 1.8025, + "step": 1424 + }, + { + "epoch": 0.15031645569620253, + "grad_norm": 0.7478482723236084, + "learning_rate": 0.0014268894168179903, + "loss": 1.7848, + "step": 1425 + }, + { + "epoch": 0.15042194092827005, + "grad_norm": 0.6770508289337158, + "learning_rate": 0.0014267812601798957, + "loss": 1.8202, + "step": 1426 + }, + { + "epoch": 0.15052742616033754, + "grad_norm": 0.7673293948173523, + "learning_rate": 0.0014266730277050863, + "loss": 1.8399, + "step": 1427 + }, + { + "epoch": 0.15063291139240506, + "grad_norm": 0.7616725564002991, + "learning_rate": 0.00142656471940569, + "loss": 1.7922, + "step": 1428 + }, + { + "epoch": 0.15073839662447258, + "grad_norm": 0.7662429809570312, + "learning_rate": 0.001426456335293843, + "loss": 1.7841, + "step": 1429 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.7225232720375061, + "learning_rate": 0.0014263478753816906, + "loss": 1.7827, + "step": 1430 + }, + { + "epoch": 0.1509493670886076, + "grad_norm": 0.7219555974006653, + "learning_rate": 0.0014262393396813863, + "loss": 1.8071, + "step": 1431 + }, + { + "epoch": 0.15105485232067511, + "grad_norm": 0.7094619274139404, + "learning_rate": 0.001426130728205092, + "loss": 1.8065, + "step": 1432 + }, + { + "epoch": 0.1511603375527426, + "grad_norm": 0.6785722374916077, + "learning_rate": 0.001426022040964978, + "loss": 1.7549, + "step": 1433 + }, + { + "epoch": 0.15126582278481013, + "grad_norm": 0.7303653955459595, + "learning_rate": 0.0014259132779732234, + "loss": 1.7822, + "step": 1434 + }, + { + "epoch": 0.15137130801687765, + "grad_norm": 0.9247709512710571, + "learning_rate": 0.0014258044392420155, + "loss": 1.8399, + "step": 1435 + }, + { + "epoch": 0.15147679324894514, + "grad_norm": 1.1930744647979736, + "learning_rate": 0.0014256955247835504, + "loss": 1.7893, + "step": 1436 + }, + { + "epoch": 0.15158227848101266, + "grad_norm": 0.7547588348388672, + "learning_rate": 0.0014255865346100324, + "loss": 1.8239, + "step": 1437 + }, + { + "epoch": 0.15168776371308018, + "grad_norm": 0.8839133977890015, + "learning_rate": 0.0014254774687336744, + "loss": 1.7713, + "step": 1438 + }, + { + "epoch": 0.15179324894514767, + "grad_norm": 1.291165828704834, + "learning_rate": 0.0014253683271666978, + "loss": 1.8087, + "step": 1439 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.8591769337654114, + "learning_rate": 0.0014252591099213326, + "loss": 1.8147, + "step": 1440 + }, + { + "epoch": 0.1520042194092827, + "grad_norm": 0.7773945331573486, + "learning_rate": 0.0014251498170098167, + "loss": 1.7705, + "step": 1441 + }, + { + "epoch": 0.1521097046413502, + "grad_norm": 1.1012020111083984, + "learning_rate": 0.0014250404484443975, + "loss": 1.8134, + "step": 1442 + }, + { + "epoch": 0.15221518987341773, + "grad_norm": 1.0120774507522583, + "learning_rate": 0.0014249310042373298, + "loss": 1.81, + "step": 1443 + }, + { + "epoch": 0.15232067510548522, + "grad_norm": 0.7731288075447083, + "learning_rate": 0.0014248214844008776, + "loss": 1.8012, + "step": 1444 + }, + { + "epoch": 0.15242616033755274, + "grad_norm": 0.6917873620986938, + "learning_rate": 0.001424711888947313, + "loss": 1.7839, + "step": 1445 + }, + { + "epoch": 0.15253164556962026, + "grad_norm": 0.6886259913444519, + "learning_rate": 0.001424602217888917, + "loss": 1.7779, + "step": 1446 + }, + { + "epoch": 0.15263713080168775, + "grad_norm": 0.7234246134757996, + "learning_rate": 0.0014244924712379786, + "loss": 1.8059, + "step": 1447 + }, + { + "epoch": 0.15274261603375527, + "grad_norm": 0.7999804615974426, + "learning_rate": 0.0014243826490067954, + "loss": 1.8013, + "step": 1448 + }, + { + "epoch": 0.1528481012658228, + "grad_norm": 0.7066593766212463, + "learning_rate": 0.0014242727512076736, + "loss": 1.8143, + "step": 1449 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.7660966515541077, + "learning_rate": 0.001424162777852928, + "loss": 1.8164, + "step": 1450 + }, + { + "epoch": 0.1530590717299578, + "grad_norm": 0.6992313265800476, + "learning_rate": 0.0014240527289548814, + "loss": 1.807, + "step": 1451 + }, + { + "epoch": 0.15316455696202533, + "grad_norm": 0.6635377407073975, + "learning_rate": 0.0014239426045258652, + "loss": 1.7942, + "step": 1452 + }, + { + "epoch": 0.15327004219409282, + "grad_norm": 0.6798397302627563, + "learning_rate": 0.0014238324045782198, + "loss": 1.7757, + "step": 1453 + }, + { + "epoch": 0.15337552742616034, + "grad_norm": 0.6438177227973938, + "learning_rate": 0.0014237221291242932, + "loss": 1.7521, + "step": 1454 + }, + { + "epoch": 0.15348101265822786, + "grad_norm": 0.7160118818283081, + "learning_rate": 0.0014236117781764425, + "loss": 1.7728, + "step": 1455 + }, + { + "epoch": 0.15358649789029535, + "grad_norm": 0.7813863158226013, + "learning_rate": 0.0014235013517470334, + "loss": 1.8298, + "step": 1456 + }, + { + "epoch": 0.15369198312236287, + "grad_norm": 0.9953465461730957, + "learning_rate": 0.0014233908498484393, + "loss": 1.7764, + "step": 1457 + }, + { + "epoch": 0.15379746835443037, + "grad_norm": 1.2269017696380615, + "learning_rate": 0.0014232802724930427, + "loss": 1.7857, + "step": 1458 + }, + { + "epoch": 0.1539029535864979, + "grad_norm": 0.8028475642204285, + "learning_rate": 0.0014231696196932342, + "loss": 1.7994, + "step": 1459 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.6898902058601379, + "learning_rate": 0.0014230588914614134, + "loss": 1.845, + "step": 1460 + }, + { + "epoch": 0.1541139240506329, + "grad_norm": 1.034566044807434, + "learning_rate": 0.0014229480878099872, + "loss": 1.8105, + "step": 1461 + }, + { + "epoch": 0.15421940928270042, + "grad_norm": 1.4240292310714722, + "learning_rate": 0.0014228372087513725, + "loss": 1.7613, + "step": 1462 + }, + { + "epoch": 0.15432489451476794, + "grad_norm": 0.6997073888778687, + "learning_rate": 0.0014227262542979933, + "loss": 1.7647, + "step": 1463 + }, + { + "epoch": 0.15443037974683543, + "grad_norm": 1.1891988515853882, + "learning_rate": 0.0014226152244622826, + "loss": 1.7968, + "step": 1464 + }, + { + "epoch": 0.15453586497890295, + "grad_norm": 0.9829100370407104, + "learning_rate": 0.0014225041192566822, + "loss": 1.7805, + "step": 1465 + }, + { + "epoch": 0.15464135021097047, + "grad_norm": 0.7397308945655823, + "learning_rate": 0.001422392938693642, + "loss": 1.7862, + "step": 1466 + }, + { + "epoch": 0.15474683544303797, + "grad_norm": 1.0379630327224731, + "learning_rate": 0.0014222816827856202, + "loss": 1.8304, + "step": 1467 + }, + { + "epoch": 0.1548523206751055, + "grad_norm": 1.2298884391784668, + "learning_rate": 0.0014221703515450834, + "loss": 1.8162, + "step": 1468 + }, + { + "epoch": 0.154957805907173, + "grad_norm": 0.685638964176178, + "learning_rate": 0.001422058944984507, + "loss": 1.8067, + "step": 1469 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 1.2940245866775513, + "learning_rate": 0.0014219474631163745, + "loss": 1.8006, + "step": 1470 + }, + { + "epoch": 0.15516877637130802, + "grad_norm": 0.9126536250114441, + "learning_rate": 0.0014218359059531783, + "loss": 1.8174, + "step": 1471 + }, + { + "epoch": 0.15527426160337554, + "grad_norm": 0.7461686730384827, + "learning_rate": 0.0014217242735074188, + "loss": 1.8173, + "step": 1472 + }, + { + "epoch": 0.15537974683544303, + "grad_norm": 1.0144294500350952, + "learning_rate": 0.0014216125657916046, + "loss": 1.7786, + "step": 1473 + }, + { + "epoch": 0.15548523206751055, + "grad_norm": 0.9673574566841125, + "learning_rate": 0.0014215007828182536, + "loss": 1.7928, + "step": 1474 + }, + { + "epoch": 0.15559071729957805, + "grad_norm": 0.6872503757476807, + "learning_rate": 0.0014213889245998917, + "loss": 1.7996, + "step": 1475 + }, + { + "epoch": 0.15569620253164557, + "grad_norm": 0.7874978184700012, + "learning_rate": 0.0014212769911490528, + "loss": 1.7943, + "step": 1476 + }, + { + "epoch": 0.1558016877637131, + "grad_norm": 0.9379853010177612, + "learning_rate": 0.0014211649824782797, + "loss": 1.7936, + "step": 1477 + }, + { + "epoch": 0.15590717299578058, + "grad_norm": 0.8965063691139221, + "learning_rate": 0.0014210528986001237, + "loss": 1.7908, + "step": 1478 + }, + { + "epoch": 0.1560126582278481, + "grad_norm": 0.8640350103378296, + "learning_rate": 0.001420940739527144, + "loss": 1.8208, + "step": 1479 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.7872280478477478, + "learning_rate": 0.001420828505271909, + "loss": 1.7516, + "step": 1480 + }, + { + "epoch": 0.1562236286919831, + "grad_norm": 0.6795399785041809, + "learning_rate": 0.001420716195846995, + "loss": 1.802, + "step": 1481 + }, + { + "epoch": 0.15632911392405063, + "grad_norm": 0.7222543954849243, + "learning_rate": 0.0014206038112649865, + "loss": 1.7862, + "step": 1482 + }, + { + "epoch": 0.15643459915611815, + "grad_norm": 0.795738160610199, + "learning_rate": 0.0014204913515384772, + "loss": 1.7906, + "step": 1483 + }, + { + "epoch": 0.15654008438818565, + "grad_norm": 0.8476166725158691, + "learning_rate": 0.0014203788166800685, + "loss": 1.8196, + "step": 1484 + }, + { + "epoch": 0.15664556962025317, + "grad_norm": 0.7764453887939453, + "learning_rate": 0.0014202662067023708, + "loss": 1.7559, + "step": 1485 + }, + { + "epoch": 0.1567510548523207, + "grad_norm": 0.7646467089653015, + "learning_rate": 0.0014201535216180024, + "loss": 1.8278, + "step": 1486 + }, + { + "epoch": 0.15685654008438818, + "grad_norm": 0.7507727146148682, + "learning_rate": 0.0014200407614395898, + "loss": 1.7535, + "step": 1487 + }, + { + "epoch": 0.1569620253164557, + "grad_norm": 0.946268618106842, + "learning_rate": 0.0014199279261797692, + "loss": 1.8133, + "step": 1488 + }, + { + "epoch": 0.15706751054852322, + "grad_norm": 0.989761471748352, + "learning_rate": 0.0014198150158511837, + "loss": 1.7782, + "step": 1489 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.8014369606971741, + "learning_rate": 0.0014197020304664856, + "loss": 1.7935, + "step": 1490 + }, + { + "epoch": 0.15727848101265823, + "grad_norm": 0.7899013757705688, + "learning_rate": 0.0014195889700383357, + "loss": 1.7554, + "step": 1491 + }, + { + "epoch": 0.15738396624472573, + "grad_norm": 0.8889644145965576, + "learning_rate": 0.0014194758345794029, + "loss": 1.7952, + "step": 1492 + }, + { + "epoch": 0.15748945147679325, + "grad_norm": 1.1241872310638428, + "learning_rate": 0.0014193626241023644, + "loss": 1.7869, + "step": 1493 + }, + { + "epoch": 0.15759493670886077, + "grad_norm": 1.0292199850082397, + "learning_rate": 0.001419249338619906, + "loss": 1.7849, + "step": 1494 + }, + { + "epoch": 0.15770042194092826, + "grad_norm": 0.8019005060195923, + "learning_rate": 0.0014191359781447223, + "loss": 1.7716, + "step": 1495 + }, + { + "epoch": 0.15780590717299578, + "grad_norm": 0.7598795890808105, + "learning_rate": 0.0014190225426895153, + "loss": 1.8387, + "step": 1496 + }, + { + "epoch": 0.1579113924050633, + "grad_norm": 0.6907316446304321, + "learning_rate": 0.0014189090322669967, + "loss": 1.8098, + "step": 1497 + }, + { + "epoch": 0.1580168776371308, + "grad_norm": 0.6657751202583313, + "learning_rate": 0.0014187954468898854, + "loss": 1.7696, + "step": 1498 + }, + { + "epoch": 0.1581223628691983, + "grad_norm": 0.637987494468689, + "learning_rate": 0.0014186817865709095, + "loss": 1.8117, + "step": 1499 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.6753990650177002, + "learning_rate": 0.0014185680513228048, + "loss": 1.7846, + "step": 1500 + }, + { + "epoch": 0.15833333333333333, + "grad_norm": 0.6743104457855225, + "learning_rate": 0.0014184542411583162, + "loss": 1.7811, + "step": 1501 + }, + { + "epoch": 0.15843881856540085, + "grad_norm": 0.7574287056922913, + "learning_rate": 0.001418340356090197, + "loss": 1.773, + "step": 1502 + }, + { + "epoch": 0.15854430379746837, + "grad_norm": 0.9662079811096191, + "learning_rate": 0.0014182263961312078, + "loss": 1.7821, + "step": 1503 + }, + { + "epoch": 0.15864978902953586, + "grad_norm": 0.9094187617301941, + "learning_rate": 0.001418112361294119, + "loss": 1.8299, + "step": 1504 + }, + { + "epoch": 0.15875527426160338, + "grad_norm": 0.7148818373680115, + "learning_rate": 0.0014179982515917088, + "loss": 1.7867, + "step": 1505 + }, + { + "epoch": 0.15886075949367087, + "grad_norm": 0.7476170063018799, + "learning_rate": 0.0014178840670367634, + "loss": 1.7694, + "step": 1506 + }, + { + "epoch": 0.1589662447257384, + "grad_norm": 1.0196008682250977, + "learning_rate": 0.001417769807642078, + "loss": 1.8323, + "step": 1507 + }, + { + "epoch": 0.1590717299578059, + "grad_norm": 1.103872299194336, + "learning_rate": 0.0014176554734204557, + "loss": 1.7655, + "step": 1508 + }, + { + "epoch": 0.1591772151898734, + "grad_norm": 0.668396532535553, + "learning_rate": 0.0014175410643847085, + "loss": 1.789, + "step": 1509 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.9200111627578735, + "learning_rate": 0.0014174265805476564, + "loss": 1.7937, + "step": 1510 + }, + { + "epoch": 0.15938818565400845, + "grad_norm": 1.3058662414550781, + "learning_rate": 0.001417312021922128, + "loss": 1.775, + "step": 1511 + }, + { + "epoch": 0.15949367088607594, + "grad_norm": 0.7710741758346558, + "learning_rate": 0.0014171973885209596, + "loss": 1.7985, + "step": 1512 + }, + { + "epoch": 0.15959915611814346, + "grad_norm": 1.0396358966827393, + "learning_rate": 0.0014170826803569971, + "loss": 1.807, + "step": 1513 + }, + { + "epoch": 0.15970464135021098, + "grad_norm": 1.593971610069275, + "learning_rate": 0.0014169678974430941, + "loss": 1.7998, + "step": 1514 + }, + { + "epoch": 0.15981012658227847, + "grad_norm": 0.7344173192977905, + "learning_rate": 0.0014168530397921121, + "loss": 1.8118, + "step": 1515 + }, + { + "epoch": 0.159915611814346, + "grad_norm": 1.8260334730148315, + "learning_rate": 0.0014167381074169218, + "loss": 1.8205, + "step": 1516 + }, + { + "epoch": 0.1600210970464135, + "grad_norm": 0.7788342833518982, + "learning_rate": 0.0014166231003304019, + "loss": 1.79, + "step": 1517 + }, + { + "epoch": 0.160126582278481, + "grad_norm": 1.5833603143692017, + "learning_rate": 0.0014165080185454396, + "loss": 1.8077, + "step": 1518 + }, + { + "epoch": 0.16023206751054853, + "grad_norm": 0.8203885555267334, + "learning_rate": 0.0014163928620749301, + "loss": 1.7984, + "step": 1519 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 1.415331482887268, + "learning_rate": 0.0014162776309317778, + "loss": 1.7956, + "step": 1520 + }, + { + "epoch": 0.16044303797468354, + "grad_norm": 0.7998037934303284, + "learning_rate": 0.0014161623251288944, + "loss": 1.8034, + "step": 1521 + }, + { + "epoch": 0.16054852320675106, + "grad_norm": 1.5905874967575073, + "learning_rate": 0.001416046944679201, + "loss": 1.7853, + "step": 1522 + }, + { + "epoch": 0.16065400843881855, + "grad_norm": 0.8481216430664062, + "learning_rate": 0.0014159314895956258, + "loss": 1.7704, + "step": 1523 + }, + { + "epoch": 0.16075949367088607, + "grad_norm": 1.7705613374710083, + "learning_rate": 0.0014158159598911067, + "loss": 1.7731, + "step": 1524 + }, + { + "epoch": 0.1608649789029536, + "grad_norm": 0.8800458908081055, + "learning_rate": 0.0014157003555785893, + "loss": 1.7993, + "step": 1525 + }, + { + "epoch": 0.16097046413502109, + "grad_norm": 1.142540454864502, + "learning_rate": 0.0014155846766710277, + "loss": 1.7281, + "step": 1526 + }, + { + "epoch": 0.1610759493670886, + "grad_norm": 0.9837116599082947, + "learning_rate": 0.0014154689231813838, + "loss": 1.8106, + "step": 1527 + }, + { + "epoch": 0.16118143459915613, + "grad_norm": 0.6699659824371338, + "learning_rate": 0.001415353095122629, + "loss": 1.7456, + "step": 1528 + }, + { + "epoch": 0.16128691983122362, + "grad_norm": 0.742780327796936, + "learning_rate": 0.0014152371925077423, + "loss": 1.8139, + "step": 1529 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.7861393094062805, + "learning_rate": 0.0014151212153497108, + "loss": 1.7459, + "step": 1530 + }, + { + "epoch": 0.16149789029535866, + "grad_norm": 0.7332009077072144, + "learning_rate": 0.0014150051636615305, + "loss": 1.7284, + "step": 1531 + }, + { + "epoch": 0.16160337552742615, + "grad_norm": 0.7253724336624146, + "learning_rate": 0.0014148890374562056, + "loss": 1.7397, + "step": 1532 + }, + { + "epoch": 0.16170886075949367, + "grad_norm": 0.7695915102958679, + "learning_rate": 0.0014147728367467486, + "loss": 1.7785, + "step": 1533 + }, + { + "epoch": 0.1618143459915612, + "grad_norm": 0.7373976111412048, + "learning_rate": 0.0014146565615461805, + "loss": 1.7692, + "step": 1534 + }, + { + "epoch": 0.16191983122362869, + "grad_norm": 0.9723045825958252, + "learning_rate": 0.0014145402118675302, + "loss": 1.8019, + "step": 1535 + }, + { + "epoch": 0.1620253164556962, + "grad_norm": 1.0119552612304688, + "learning_rate": 0.0014144237877238355, + "loss": 1.7349, + "step": 1536 + }, + { + "epoch": 0.16213080168776373, + "grad_norm": 0.7821741104125977, + "learning_rate": 0.0014143072891281425, + "loss": 1.7435, + "step": 1537 + }, + { + "epoch": 0.16223628691983122, + "grad_norm": 0.786466121673584, + "learning_rate": 0.001414190716093505, + "loss": 1.7591, + "step": 1538 + }, + { + "epoch": 0.16234177215189874, + "grad_norm": 1.1308854818344116, + "learning_rate": 0.001414074068632986, + "loss": 1.7757, + "step": 1539 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.8576799035072327, + "learning_rate": 0.0014139573467596561, + "loss": 1.7885, + "step": 1540 + }, + { + "epoch": 0.16255274261603375, + "grad_norm": 0.8230303525924683, + "learning_rate": 0.0014138405504865949, + "loss": 1.7727, + "step": 1541 + }, + { + "epoch": 0.16265822784810127, + "grad_norm": 0.9325475692749023, + "learning_rate": 0.0014137236798268896, + "loss": 1.7376, + "step": 1542 + }, + { + "epoch": 0.16276371308016876, + "grad_norm": 0.8823547959327698, + "learning_rate": 0.0014136067347936363, + "loss": 1.7656, + "step": 1543 + }, + { + "epoch": 0.16286919831223629, + "grad_norm": 0.7363696694374084, + "learning_rate": 0.0014134897153999394, + "loss": 1.7807, + "step": 1544 + }, + { + "epoch": 0.1629746835443038, + "grad_norm": 0.7699634432792664, + "learning_rate": 0.0014133726216589114, + "loss": 1.7444, + "step": 1545 + }, + { + "epoch": 0.1630801687763713, + "grad_norm": 0.9539462924003601, + "learning_rate": 0.0014132554535836732, + "loss": 1.7356, + "step": 1546 + }, + { + "epoch": 0.16318565400843882, + "grad_norm": 0.7377378344535828, + "learning_rate": 0.0014131382111873543, + "loss": 1.7655, + "step": 1547 + }, + { + "epoch": 0.16329113924050634, + "grad_norm": 0.8107068538665771, + "learning_rate": 0.0014130208944830923, + "loss": 1.8076, + "step": 1548 + }, + { + "epoch": 0.16339662447257383, + "grad_norm": 0.7905434966087341, + "learning_rate": 0.0014129035034840325, + "loss": 1.7971, + "step": 1549 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.6842740178108215, + "learning_rate": 0.00141278603820333, + "loss": 1.7596, + "step": 1550 + }, + { + "epoch": 0.16360759493670887, + "grad_norm": 0.8161755800247192, + "learning_rate": 0.0014126684986541468, + "loss": 1.7799, + "step": 1551 + }, + { + "epoch": 0.16371308016877636, + "grad_norm": 0.7704566717147827, + "learning_rate": 0.0014125508848496539, + "loss": 1.7835, + "step": 1552 + }, + { + "epoch": 0.16381856540084389, + "grad_norm": 0.7595329284667969, + "learning_rate": 0.0014124331968030307, + "loss": 1.8179, + "step": 1553 + }, + { + "epoch": 0.1639240506329114, + "grad_norm": 0.7498695850372314, + "learning_rate": 0.0014123154345274645, + "loss": 1.7415, + "step": 1554 + }, + { + "epoch": 0.1640295358649789, + "grad_norm": 0.6836254596710205, + "learning_rate": 0.0014121975980361512, + "loss": 1.7569, + "step": 1555 + }, + { + "epoch": 0.16413502109704642, + "grad_norm": 0.7432002425193787, + "learning_rate": 0.0014120796873422952, + "loss": 1.7713, + "step": 1556 + }, + { + "epoch": 0.1642405063291139, + "grad_norm": 0.7565982937812805, + "learning_rate": 0.0014119617024591089, + "loss": 1.7898, + "step": 1557 + }, + { + "epoch": 0.16434599156118143, + "grad_norm": 0.8619751334190369, + "learning_rate": 0.0014118436433998127, + "loss": 1.7481, + "step": 1558 + }, + { + "epoch": 0.16445147679324895, + "grad_norm": 0.925166666507721, + "learning_rate": 0.0014117255101776362, + "loss": 1.7384, + "step": 1559 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.7232134342193604, + "learning_rate": 0.0014116073028058165, + "loss": 1.7625, + "step": 1560 + }, + { + "epoch": 0.16466244725738396, + "grad_norm": 0.677100658416748, + "learning_rate": 0.0014114890212975997, + "loss": 1.7073, + "step": 1561 + }, + { + "epoch": 0.16476793248945149, + "grad_norm": 0.9270104765892029, + "learning_rate": 0.0014113706656662393, + "loss": 1.7999, + "step": 1562 + }, + { + "epoch": 0.16487341772151898, + "grad_norm": 0.865465521812439, + "learning_rate": 0.001411252235924998, + "loss": 1.7555, + "step": 1563 + }, + { + "epoch": 0.1649789029535865, + "grad_norm": 0.7262866497039795, + "learning_rate": 0.0014111337320871463, + "loss": 1.7531, + "step": 1564 + }, + { + "epoch": 0.16508438818565402, + "grad_norm": 0.7132624387741089, + "learning_rate": 0.0014110151541659633, + "loss": 1.7262, + "step": 1565 + }, + { + "epoch": 0.1651898734177215, + "grad_norm": 0.7910416126251221, + "learning_rate": 0.0014108965021747363, + "loss": 1.7803, + "step": 1566 + }, + { + "epoch": 0.16529535864978903, + "grad_norm": 0.7453775405883789, + "learning_rate": 0.0014107777761267605, + "loss": 1.7677, + "step": 1567 + }, + { + "epoch": 0.16540084388185655, + "grad_norm": 0.750866711139679, + "learning_rate": 0.00141065897603534, + "loss": 1.8046, + "step": 1568 + }, + { + "epoch": 0.16550632911392404, + "grad_norm": 0.8254522085189819, + "learning_rate": 0.001410540101913787, + "loss": 1.7601, + "step": 1569 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.8468154072761536, + "learning_rate": 0.0014104211537754217, + "loss": 1.778, + "step": 1570 + }, + { + "epoch": 0.16571729957805909, + "grad_norm": 0.7022504806518555, + "learning_rate": 0.001410302131633573, + "loss": 1.7389, + "step": 1571 + }, + { + "epoch": 0.16582278481012658, + "grad_norm": 0.7319542169570923, + "learning_rate": 0.0014101830355015778, + "loss": 1.8034, + "step": 1572 + }, + { + "epoch": 0.1659282700421941, + "grad_norm": 0.8048992156982422, + "learning_rate": 0.0014100638653927816, + "loss": 1.7885, + "step": 1573 + }, + { + "epoch": 0.1660337552742616, + "grad_norm": 1.0447649955749512, + "learning_rate": 0.0014099446213205378, + "loss": 1.7526, + "step": 1574 + }, + { + "epoch": 0.1661392405063291, + "grad_norm": 0.9529373645782471, + "learning_rate": 0.0014098253032982086, + "loss": 1.7619, + "step": 1575 + }, + { + "epoch": 0.16624472573839663, + "grad_norm": 0.6626268625259399, + "learning_rate": 0.0014097059113391639, + "loss": 1.7873, + "step": 1576 + }, + { + "epoch": 0.16635021097046412, + "grad_norm": 0.8689193725585938, + "learning_rate": 0.0014095864454567821, + "loss": 1.7901, + "step": 1577 + }, + { + "epoch": 0.16645569620253164, + "grad_norm": 1.0272941589355469, + "learning_rate": 0.0014094669056644502, + "loss": 1.7603, + "step": 1578 + }, + { + "epoch": 0.16656118143459916, + "grad_norm": 0.9356609582901001, + "learning_rate": 0.001409347291975563, + "loss": 1.8145, + "step": 1579 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.899937093257904, + "learning_rate": 0.001409227604403524, + "loss": 1.7739, + "step": 1580 + }, + { + "epoch": 0.16677215189873418, + "grad_norm": 0.7018307447433472, + "learning_rate": 0.0014091078429617448, + "loss": 1.7837, + "step": 1581 + }, + { + "epoch": 0.1668776371308017, + "grad_norm": 0.8009684681892395, + "learning_rate": 0.0014089880076636452, + "loss": 1.7837, + "step": 1582 + }, + { + "epoch": 0.1669831223628692, + "grad_norm": 0.861017107963562, + "learning_rate": 0.0014088680985226533, + "loss": 1.7521, + "step": 1583 + }, + { + "epoch": 0.1670886075949367, + "grad_norm": 0.6855072975158691, + "learning_rate": 0.0014087481155522056, + "loss": 1.7549, + "step": 1584 + }, + { + "epoch": 0.16719409282700423, + "grad_norm": 0.7806000709533691, + "learning_rate": 0.0014086280587657467, + "loss": 1.7993, + "step": 1585 + }, + { + "epoch": 0.16729957805907172, + "grad_norm": 0.7895873188972473, + "learning_rate": 0.0014085079281767295, + "loss": 1.7844, + "step": 1586 + }, + { + "epoch": 0.16740506329113924, + "grad_norm": 0.6654752492904663, + "learning_rate": 0.0014083877237986153, + "loss": 1.7356, + "step": 1587 + }, + { + "epoch": 0.16751054852320676, + "grad_norm": 0.6881792545318604, + "learning_rate": 0.0014082674456448738, + "loss": 1.7624, + "step": 1588 + }, + { + "epoch": 0.16761603375527426, + "grad_norm": 0.8876838684082031, + "learning_rate": 0.0014081470937289827, + "loss": 1.7906, + "step": 1589 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 1.0083403587341309, + "learning_rate": 0.0014080266680644277, + "loss": 1.7474, + "step": 1590 + }, + { + "epoch": 0.16782700421940927, + "grad_norm": 0.9896491765975952, + "learning_rate": 0.0014079061686647033, + "loss": 1.7721, + "step": 1591 + }, + { + "epoch": 0.1679324894514768, + "grad_norm": 0.8419106602668762, + "learning_rate": 0.0014077855955433123, + "loss": 1.7388, + "step": 1592 + }, + { + "epoch": 0.1680379746835443, + "grad_norm": 0.7096689939498901, + "learning_rate": 0.001407664948713765, + "loss": 1.7905, + "step": 1593 + }, + { + "epoch": 0.1681434599156118, + "grad_norm": 0.8168047666549683, + "learning_rate": 0.001407544228189581, + "loss": 1.755, + "step": 1594 + }, + { + "epoch": 0.16824894514767932, + "grad_norm": 1.1311228275299072, + "learning_rate": 0.0014074234339842874, + "loss": 1.8112, + "step": 1595 + }, + { + "epoch": 0.16835443037974684, + "grad_norm": 1.0729269981384277, + "learning_rate": 0.00140730256611142, + "loss": 1.7979, + "step": 1596 + }, + { + "epoch": 0.16845991561181434, + "grad_norm": 0.7659058570861816, + "learning_rate": 0.001407181624584522, + "loss": 1.763, + "step": 1597 + }, + { + "epoch": 0.16856540084388186, + "grad_norm": 0.7779613137245178, + "learning_rate": 0.0014070606094171464, + "loss": 1.7707, + "step": 1598 + }, + { + "epoch": 0.16867088607594938, + "grad_norm": 0.872779905796051, + "learning_rate": 0.0014069395206228528, + "loss": 1.7743, + "step": 1599 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.775273859500885, + "learning_rate": 0.0014068183582152103, + "loss": 1.787, + "step": 1600 + }, + { + "epoch": 0.1688818565400844, + "grad_norm": 0.8313384652137756, + "learning_rate": 0.0014066971222077955, + "loss": 1.7367, + "step": 1601 + }, + { + "epoch": 0.1689873417721519, + "grad_norm": 0.7122327089309692, + "learning_rate": 0.0014065758126141938, + "loss": 1.8165, + "step": 1602 + }, + { + "epoch": 0.1690928270042194, + "grad_norm": 0.7227259278297424, + "learning_rate": 0.0014064544294479981, + "loss": 1.7836, + "step": 1603 + }, + { + "epoch": 0.16919831223628692, + "grad_norm": 0.6701783537864685, + "learning_rate": 0.0014063329727228102, + "loss": 1.7519, + "step": 1604 + }, + { + "epoch": 0.16930379746835442, + "grad_norm": 0.6737704873085022, + "learning_rate": 0.0014062114424522397, + "loss": 1.7414, + "step": 1605 + }, + { + "epoch": 0.16940928270042194, + "grad_norm": 0.7520363330841064, + "learning_rate": 0.0014060898386499053, + "loss": 1.7956, + "step": 1606 + }, + { + "epoch": 0.16951476793248946, + "grad_norm": 0.7570095658302307, + "learning_rate": 0.0014059681613294327, + "loss": 1.8014, + "step": 1607 + }, + { + "epoch": 0.16962025316455695, + "grad_norm": 0.9226526021957397, + "learning_rate": 0.0014058464105044567, + "loss": 1.7459, + "step": 1608 + }, + { + "epoch": 0.16972573839662447, + "grad_norm": 0.8738808035850525, + "learning_rate": 0.0014057245861886201, + "loss": 1.7739, + "step": 1609 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.8323056697845459, + "learning_rate": 0.001405602688395574, + "loss": 1.7781, + "step": 1610 + }, + { + "epoch": 0.16993670886075948, + "grad_norm": 0.7914292812347412, + "learning_rate": 0.0014054807171389773, + "loss": 1.7223, + "step": 1611 + }, + { + "epoch": 0.170042194092827, + "grad_norm": 0.7224916219711304, + "learning_rate": 0.001405358672432498, + "loss": 1.6912, + "step": 1612 + }, + { + "epoch": 0.17014767932489452, + "grad_norm": 0.830104410648346, + "learning_rate": 0.0014052365542898111, + "loss": 1.7428, + "step": 1613 + }, + { + "epoch": 0.17025316455696202, + "grad_norm": 0.9388877749443054, + "learning_rate": 0.0014051143627246015, + "loss": 1.7754, + "step": 1614 + }, + { + "epoch": 0.17035864978902954, + "grad_norm": 0.8652804493904114, + "learning_rate": 0.0014049920977505608, + "loss": 1.7536, + "step": 1615 + }, + { + "epoch": 0.17046413502109706, + "grad_norm": 0.723893940448761, + "learning_rate": 0.0014048697593813891, + "loss": 1.7636, + "step": 1616 + }, + { + "epoch": 0.17056962025316455, + "grad_norm": 0.7559022903442383, + "learning_rate": 0.0014047473476307955, + "loss": 1.7599, + "step": 1617 + }, + { + "epoch": 0.17067510548523207, + "grad_norm": 0.8565883636474609, + "learning_rate": 0.001404624862512497, + "loss": 1.7403, + "step": 1618 + }, + { + "epoch": 0.1707805907172996, + "grad_norm": 1.0962861776351929, + "learning_rate": 0.001404502304040218, + "loss": 1.7528, + "step": 1619 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 1.1745723485946655, + "learning_rate": 0.0014043796722276924, + "loss": 1.7624, + "step": 1620 + }, + { + "epoch": 0.1709915611814346, + "grad_norm": 0.8444562554359436, + "learning_rate": 0.0014042569670886615, + "loss": 1.7728, + "step": 1621 + }, + { + "epoch": 0.1710970464135021, + "grad_norm": 0.6753362417221069, + "learning_rate": 0.0014041341886368752, + "loss": 1.8107, + "step": 1622 + }, + { + "epoch": 0.17120253164556962, + "grad_norm": 0.7373920679092407, + "learning_rate": 0.0014040113368860908, + "loss": 1.7662, + "step": 1623 + }, + { + "epoch": 0.17130801687763714, + "grad_norm": 0.6235541105270386, + "learning_rate": 0.0014038884118500754, + "loss": 1.7423, + "step": 1624 + }, + { + "epoch": 0.17141350210970463, + "grad_norm": 0.7894159555435181, + "learning_rate": 0.0014037654135426025, + "loss": 1.7987, + "step": 1625 + }, + { + "epoch": 0.17151898734177215, + "grad_norm": 0.9448953866958618, + "learning_rate": 0.0014036423419774551, + "loss": 1.724, + "step": 1626 + }, + { + "epoch": 0.17162447257383967, + "grad_norm": 1.221705675125122, + "learning_rate": 0.0014035191971684242, + "loss": 1.7423, + "step": 1627 + }, + { + "epoch": 0.17172995780590716, + "grad_norm": 0.8607805967330933, + "learning_rate": 0.0014033959791293082, + "loss": 1.7447, + "step": 1628 + }, + { + "epoch": 0.17183544303797468, + "grad_norm": 0.7272552847862244, + "learning_rate": 0.0014032726878739148, + "loss": 1.7727, + "step": 1629 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 1.3156992197036743, + "learning_rate": 0.0014031493234160591, + "loss": 1.7437, + "step": 1630 + }, + { + "epoch": 0.1720464135021097, + "grad_norm": 1.0124140977859497, + "learning_rate": 0.001403025885769565, + "loss": 1.7822, + "step": 1631 + }, + { + "epoch": 0.17215189873417722, + "grad_norm": 0.7597020268440247, + "learning_rate": 0.001402902374948264, + "loss": 1.7654, + "step": 1632 + }, + { + "epoch": 0.17225738396624474, + "grad_norm": 0.8838198781013489, + "learning_rate": 0.0014027787909659962, + "loss": 1.7596, + "step": 1633 + }, + { + "epoch": 0.17236286919831223, + "grad_norm": 1.2640994787216187, + "learning_rate": 0.0014026551338366098, + "loss": 1.7709, + "step": 1634 + }, + { + "epoch": 0.17246835443037975, + "grad_norm": 0.8786333203315735, + "learning_rate": 0.0014025314035739614, + "loss": 1.7474, + "step": 1635 + }, + { + "epoch": 0.17257383966244727, + "grad_norm": 0.6995647549629211, + "learning_rate": 0.001402407600191915, + "loss": 1.7535, + "step": 1636 + }, + { + "epoch": 0.17267932489451476, + "grad_norm": 0.9324917793273926, + "learning_rate": 0.0014022837237043441, + "loss": 1.767, + "step": 1637 + }, + { + "epoch": 0.17278481012658228, + "grad_norm": 0.9627678990364075, + "learning_rate": 0.0014021597741251295, + "loss": 1.7733, + "step": 1638 + }, + { + "epoch": 0.17289029535864978, + "grad_norm": 0.7979447841644287, + "learning_rate": 0.00140203575146816, + "loss": 1.7661, + "step": 1639 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.8349020481109619, + "learning_rate": 0.0014019116557473332, + "loss": 1.7328, + "step": 1640 + }, + { + "epoch": 0.17310126582278482, + "grad_norm": 0.7408688068389893, + "learning_rate": 0.0014017874869765548, + "loss": 1.7299, + "step": 1641 + }, + { + "epoch": 0.1732067510548523, + "grad_norm": 0.7241823077201843, + "learning_rate": 0.0014016632451697383, + "loss": 1.7301, + "step": 1642 + }, + { + "epoch": 0.17331223628691983, + "grad_norm": 0.6876802444458008, + "learning_rate": 0.0014015389303408058, + "loss": 1.7374, + "step": 1643 + }, + { + "epoch": 0.17341772151898735, + "grad_norm": 0.770102858543396, + "learning_rate": 0.001401414542503687, + "loss": 1.7453, + "step": 1644 + }, + { + "epoch": 0.17352320675105484, + "grad_norm": 0.9142905473709106, + "learning_rate": 0.001401290081672321, + "loss": 1.7525, + "step": 1645 + }, + { + "epoch": 0.17362869198312236, + "grad_norm": 1.1934207677841187, + "learning_rate": 0.0014011655478606531, + "loss": 1.7627, + "step": 1646 + }, + { + "epoch": 0.17373417721518988, + "grad_norm": 1.117079496383667, + "learning_rate": 0.001401040941082639, + "loss": 1.7547, + "step": 1647 + }, + { + "epoch": 0.17383966244725738, + "grad_norm": 0.7713111639022827, + "learning_rate": 0.001400916261352241, + "loss": 1.7419, + "step": 1648 + }, + { + "epoch": 0.1739451476793249, + "grad_norm": 0.7779275178909302, + "learning_rate": 0.00140079150868343, + "loss": 1.7922, + "step": 1649 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.1987534761428833, + "learning_rate": 0.0014006666830901854, + "loss": 1.7686, + "step": 1650 + }, + { + "epoch": 0.1741561181434599, + "grad_norm": 0.7709859609603882, + "learning_rate": 0.0014005417845864945, + "loss": 1.755, + "step": 1651 + }, + { + "epoch": 0.17426160337552743, + "grad_norm": 0.80942702293396, + "learning_rate": 0.0014004168131863525, + "loss": 1.7323, + "step": 1652 + }, + { + "epoch": 0.17436708860759495, + "grad_norm": 1.1601324081420898, + "learning_rate": 0.0014002917689037637, + "loss": 1.726, + "step": 1653 + }, + { + "epoch": 0.17447257383966244, + "grad_norm": 0.7431904673576355, + "learning_rate": 0.0014001666517527392, + "loss": 1.768, + "step": 1654 + }, + { + "epoch": 0.17457805907172996, + "grad_norm": 0.7419869899749756, + "learning_rate": 0.0014000414617472996, + "loss": 1.7483, + "step": 1655 + }, + { + "epoch": 0.17468354430379746, + "grad_norm": 0.8115363717079163, + "learning_rate": 0.0013999161989014725, + "loss": 1.7883, + "step": 1656 + }, + { + "epoch": 0.17478902953586498, + "grad_norm": 0.6547898054122925, + "learning_rate": 0.0013997908632292948, + "loss": 1.7292, + "step": 1657 + }, + { + "epoch": 0.1748945147679325, + "grad_norm": 0.8526284694671631, + "learning_rate": 0.0013996654547448106, + "loss": 1.7351, + "step": 1658 + }, + { + "epoch": 0.175, + "grad_norm": 1.1451259851455688, + "learning_rate": 0.0013995399734620729, + "loss": 1.7524, + "step": 1659 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 1.0670439004898071, + "learning_rate": 0.001399414419395142, + "loss": 1.7425, + "step": 1660 + }, + { + "epoch": 0.17521097046413503, + "grad_norm": 0.7587547898292542, + "learning_rate": 0.0013992887925580874, + "loss": 1.7893, + "step": 1661 + }, + { + "epoch": 0.17531645569620252, + "grad_norm": 0.8335049152374268, + "learning_rate": 0.0013991630929649857, + "loss": 1.7396, + "step": 1662 + }, + { + "epoch": 0.17542194092827004, + "grad_norm": 1.7220485210418701, + "learning_rate": 0.0013990373206299225, + "loss": 1.7593, + "step": 1663 + }, + { + "epoch": 0.17552742616033756, + "grad_norm": 0.8240562677383423, + "learning_rate": 0.0013989114755669912, + "loss": 1.7554, + "step": 1664 + }, + { + "epoch": 0.17563291139240506, + "grad_norm": 2.4016990661621094, + "learning_rate": 0.001398785557790293, + "loss": 1.8007, + "step": 1665 + }, + { + "epoch": 0.17573839662447258, + "grad_norm": 1.3821533918380737, + "learning_rate": 0.0013986595673139382, + "loss": 1.8073, + "step": 1666 + }, + { + "epoch": 0.1758438818565401, + "grad_norm": 2.1217947006225586, + "learning_rate": 0.0013985335041520443, + "loss": 1.8049, + "step": 1667 + }, + { + "epoch": 0.1759493670886076, + "grad_norm": 2.0518813133239746, + "learning_rate": 0.0013984073683187374, + "loss": 1.8139, + "step": 1668 + }, + { + "epoch": 0.1760548523206751, + "grad_norm": 0.9033864736557007, + "learning_rate": 0.0013982811598281517, + "loss": 1.7558, + "step": 1669 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 1.5714832544326782, + "learning_rate": 0.0013981548786944293, + "loss": 1.722, + "step": 1670 + }, + { + "epoch": 0.17626582278481012, + "grad_norm": 0.9259054064750671, + "learning_rate": 0.0013980285249317209, + "loss": 1.7298, + "step": 1671 + }, + { + "epoch": 0.17637130801687764, + "grad_norm": 1.2536653280258179, + "learning_rate": 0.0013979020985541847, + "loss": 1.7636, + "step": 1672 + }, + { + "epoch": 0.17647679324894514, + "grad_norm": 0.9883777499198914, + "learning_rate": 0.0013977755995759876, + "loss": 1.7781, + "step": 1673 + }, + { + "epoch": 0.17658227848101266, + "grad_norm": 1.0763797760009766, + "learning_rate": 0.0013976490280113048, + "loss": 1.777, + "step": 1674 + }, + { + "epoch": 0.17668776371308018, + "grad_norm": 0.9502475261688232, + "learning_rate": 0.0013975223838743188, + "loss": 1.7491, + "step": 1675 + }, + { + "epoch": 0.17679324894514767, + "grad_norm": 0.9625199437141418, + "learning_rate": 0.0013973956671792206, + "loss": 1.7638, + "step": 1676 + }, + { + "epoch": 0.1768987341772152, + "grad_norm": 0.9252185821533203, + "learning_rate": 0.00139726887794021, + "loss": 1.712, + "step": 1677 + }, + { + "epoch": 0.1770042194092827, + "grad_norm": 0.9986273050308228, + "learning_rate": 0.001397142016171494, + "loss": 1.7748, + "step": 1678 + }, + { + "epoch": 0.1771097046413502, + "grad_norm": 1.0807664394378662, + "learning_rate": 0.0013970150818872881, + "loss": 1.7251, + "step": 1679 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.9184367060661316, + "learning_rate": 0.0013968880751018158, + "loss": 1.7519, + "step": 1680 + }, + { + "epoch": 0.17732067510548524, + "grad_norm": 1.62092125415802, + "learning_rate": 0.0013967609958293091, + "loss": 1.7469, + "step": 1681 + }, + { + "epoch": 0.17742616033755274, + "grad_norm": 0.9476982355117798, + "learning_rate": 0.001396633844084008, + "loss": 1.7365, + "step": 1682 + }, + { + "epoch": 0.17753164556962026, + "grad_norm": 1.5559287071228027, + "learning_rate": 0.00139650661988016, + "loss": 1.7939, + "step": 1683 + }, + { + "epoch": 0.17763713080168778, + "grad_norm": 1.1918830871582031, + "learning_rate": 0.0013963793232320216, + "loss": 1.7901, + "step": 1684 + }, + { + "epoch": 0.17774261603375527, + "grad_norm": 1.4200773239135742, + "learning_rate": 0.0013962519541538569, + "loss": 1.7121, + "step": 1685 + }, + { + "epoch": 0.1778481012658228, + "grad_norm": 1.207051396369934, + "learning_rate": 0.001396124512659938, + "loss": 1.7103, + "step": 1686 + }, + { + "epoch": 0.17795358649789028, + "grad_norm": 1.4440861940383911, + "learning_rate": 0.001395996998764546, + "loss": 1.7647, + "step": 1687 + }, + { + "epoch": 0.1780590717299578, + "grad_norm": 1.0257434844970703, + "learning_rate": 0.0013958694124819688, + "loss": 1.7059, + "step": 1688 + }, + { + "epoch": 0.17816455696202532, + "grad_norm": 1.262793779373169, + "learning_rate": 0.0013957417538265032, + "loss": 1.767, + "step": 1689 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 1.0009833574295044, + "learning_rate": 0.0013956140228124545, + "loss": 1.7343, + "step": 1690 + }, + { + "epoch": 0.17837552742616034, + "grad_norm": 1.0606180429458618, + "learning_rate": 0.001395486219454135, + "loss": 1.7935, + "step": 1691 + }, + { + "epoch": 0.17848101265822786, + "grad_norm": 0.8480616807937622, + "learning_rate": 0.0013953583437658658, + "loss": 1.7549, + "step": 1692 + }, + { + "epoch": 0.17858649789029535, + "grad_norm": 1.1206985712051392, + "learning_rate": 0.0013952303957619763, + "loss": 1.7268, + "step": 1693 + }, + { + "epoch": 0.17869198312236287, + "grad_norm": 1.1642671823501587, + "learning_rate": 0.0013951023754568035, + "loss": 1.7466, + "step": 1694 + }, + { + "epoch": 0.1787974683544304, + "grad_norm": 0.8711743950843811, + "learning_rate": 0.001394974282864693, + "loss": 1.7253, + "step": 1695 + }, + { + "epoch": 0.17890295358649788, + "grad_norm": 0.8018665909767151, + "learning_rate": 0.0013948461179999977, + "loss": 1.7425, + "step": 1696 + }, + { + "epoch": 0.1790084388185654, + "grad_norm": 0.8941536545753479, + "learning_rate": 0.0013947178808770794, + "loss": 1.7343, + "step": 1697 + }, + { + "epoch": 0.17911392405063292, + "grad_norm": 0.9376354217529297, + "learning_rate": 0.0013945895715103077, + "loss": 1.7591, + "step": 1698 + }, + { + "epoch": 0.17921940928270041, + "grad_norm": 0.7711248993873596, + "learning_rate": 0.0013944611899140604, + "loss": 1.7944, + "step": 1699 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.8404346108436584, + "learning_rate": 0.0013943327361027231, + "loss": 1.7654, + "step": 1700 + }, + { + "epoch": 0.17943037974683546, + "grad_norm": 0.9329455494880676, + "learning_rate": 0.0013942042100906899, + "loss": 1.7359, + "step": 1701 + }, + { + "epoch": 0.17953586497890295, + "grad_norm": 0.8032305240631104, + "learning_rate": 0.0013940756118923626, + "loss": 1.733, + "step": 1702 + }, + { + "epoch": 0.17964135021097047, + "grad_norm": 0.7327213287353516, + "learning_rate": 0.0013939469415221513, + "loss": 1.6962, + "step": 1703 + }, + { + "epoch": 0.17974683544303796, + "grad_norm": 0.6294657588005066, + "learning_rate": 0.0013938181989944741, + "loss": 1.725, + "step": 1704 + }, + { + "epoch": 0.17985232067510548, + "grad_norm": 0.6925255060195923, + "learning_rate": 0.0013936893843237573, + "loss": 1.7347, + "step": 1705 + }, + { + "epoch": 0.179957805907173, + "grad_norm": 0.7764161825180054, + "learning_rate": 0.0013935604975244356, + "loss": 1.7549, + "step": 1706 + }, + { + "epoch": 0.1800632911392405, + "grad_norm": 0.6730543375015259, + "learning_rate": 0.0013934315386109509, + "loss": 1.7008, + "step": 1707 + }, + { + "epoch": 0.18016877637130801, + "grad_norm": 0.726164698600769, + "learning_rate": 0.0013933025075977539, + "loss": 1.7122, + "step": 1708 + }, + { + "epoch": 0.18027426160337554, + "grad_norm": 0.8454710245132446, + "learning_rate": 0.0013931734044993031, + "loss": 1.7198, + "step": 1709 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.7821708917617798, + "learning_rate": 0.0013930442293300649, + "loss": 1.7669, + "step": 1710 + }, + { + "epoch": 0.18048523206751055, + "grad_norm": 0.7029886245727539, + "learning_rate": 0.0013929149821045148, + "loss": 1.7583, + "step": 1711 + }, + { + "epoch": 0.18059071729957807, + "grad_norm": 0.7988204956054688, + "learning_rate": 0.0013927856628371347, + "loss": 1.7758, + "step": 1712 + }, + { + "epoch": 0.18069620253164556, + "grad_norm": 0.7354252934455872, + "learning_rate": 0.0013926562715424159, + "loss": 1.7269, + "step": 1713 + }, + { + "epoch": 0.18080168776371308, + "grad_norm": 0.6789473295211792, + "learning_rate": 0.0013925268082348576, + "loss": 1.7172, + "step": 1714 + }, + { + "epoch": 0.1809071729957806, + "grad_norm": 0.7800036668777466, + "learning_rate": 0.0013923972729289662, + "loss": 1.7218, + "step": 1715 + }, + { + "epoch": 0.1810126582278481, + "grad_norm": 0.8185677528381348, + "learning_rate": 0.0013922676656392572, + "loss": 1.7251, + "step": 1716 + }, + { + "epoch": 0.18111814345991561, + "grad_norm": 0.8112431168556213, + "learning_rate": 0.0013921379863802536, + "loss": 1.6971, + "step": 1717 + }, + { + "epoch": 0.18122362869198314, + "grad_norm": 0.6941943168640137, + "learning_rate": 0.0013920082351664867, + "loss": 1.7287, + "step": 1718 + }, + { + "epoch": 0.18132911392405063, + "grad_norm": 1.049030065536499, + "learning_rate": 0.0013918784120124956, + "loss": 1.7374, + "step": 1719 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 1.1573176383972168, + "learning_rate": 0.0013917485169328279, + "loss": 1.7689, + "step": 1720 + }, + { + "epoch": 0.18154008438818564, + "grad_norm": 0.8728642463684082, + "learning_rate": 0.0013916185499420386, + "loss": 1.7298, + "step": 1721 + }, + { + "epoch": 0.18164556962025316, + "grad_norm": 0.6963617205619812, + "learning_rate": 0.0013914885110546916, + "loss": 1.7263, + "step": 1722 + }, + { + "epoch": 0.18175105485232068, + "grad_norm": 0.7472816109657288, + "learning_rate": 0.001391358400285358, + "loss": 1.7229, + "step": 1723 + }, + { + "epoch": 0.18185654008438817, + "grad_norm": 0.8032366037368774, + "learning_rate": 0.0013912282176486177, + "loss": 1.714, + "step": 1724 + }, + { + "epoch": 0.1819620253164557, + "grad_norm": 0.6851471066474915, + "learning_rate": 0.0013910979631590581, + "loss": 1.7173, + "step": 1725 + }, + { + "epoch": 0.18206751054852321, + "grad_norm": 0.7116028666496277, + "learning_rate": 0.001390967636831275, + "loss": 1.7609, + "step": 1726 + }, + { + "epoch": 0.1821729957805907, + "grad_norm": 0.9115207195281982, + "learning_rate": 0.0013908372386798717, + "loss": 1.723, + "step": 1727 + }, + { + "epoch": 0.18227848101265823, + "grad_norm": 0.7241288423538208, + "learning_rate": 0.0013907067687194607, + "loss": 1.736, + "step": 1728 + }, + { + "epoch": 0.18238396624472575, + "grad_norm": 0.7679779529571533, + "learning_rate": 0.0013905762269646614, + "loss": 1.757, + "step": 1729 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.7520509362220764, + "learning_rate": 0.0013904456134301016, + "loss": 1.7753, + "step": 1730 + }, + { + "epoch": 0.18259493670886076, + "grad_norm": 0.7841134667396545, + "learning_rate": 0.001390314928130417, + "loss": 1.731, + "step": 1731 + }, + { + "epoch": 0.18270042194092828, + "grad_norm": 0.721804141998291, + "learning_rate": 0.0013901841710802522, + "loss": 1.7282, + "step": 1732 + }, + { + "epoch": 0.18280590717299577, + "grad_norm": 0.7132200002670288, + "learning_rate": 0.0013900533422942585, + "loss": 1.7198, + "step": 1733 + }, + { + "epoch": 0.1829113924050633, + "grad_norm": 0.6780728101730347, + "learning_rate": 0.0013899224417870963, + "loss": 1.7437, + "step": 1734 + }, + { + "epoch": 0.18301687763713081, + "grad_norm": 0.6842831969261169, + "learning_rate": 0.0013897914695734336, + "loss": 1.7272, + "step": 1735 + }, + { + "epoch": 0.1831223628691983, + "grad_norm": 0.7051767706871033, + "learning_rate": 0.0013896604256679462, + "loss": 1.7623, + "step": 1736 + }, + { + "epoch": 0.18322784810126583, + "grad_norm": 0.6974624395370483, + "learning_rate": 0.0013895293100853188, + "loss": 1.7196, + "step": 1737 + }, + { + "epoch": 0.18333333333333332, + "grad_norm": 0.8593233227729797, + "learning_rate": 0.001389398122840243, + "loss": 1.7343, + "step": 1738 + }, + { + "epoch": 0.18343881856540084, + "grad_norm": 0.8465375900268555, + "learning_rate": 0.0013892668639474194, + "loss": 1.7671, + "step": 1739 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.7442417740821838, + "learning_rate": 0.0013891355334215562, + "loss": 1.7671, + "step": 1740 + }, + { + "epoch": 0.18364978902953585, + "grad_norm": 0.81801837682724, + "learning_rate": 0.001389004131277369, + "loss": 1.7607, + "step": 1741 + }, + { + "epoch": 0.18375527426160337, + "grad_norm": 0.9362391829490662, + "learning_rate": 0.0013888726575295826, + "loss": 1.7532, + "step": 1742 + }, + { + "epoch": 0.1838607594936709, + "grad_norm": 1.064470648765564, + "learning_rate": 0.0013887411121929294, + "loss": 1.7598, + "step": 1743 + }, + { + "epoch": 0.1839662447257384, + "grad_norm": 1.057011365890503, + "learning_rate": 0.0013886094952821496, + "loss": 1.7619, + "step": 1744 + }, + { + "epoch": 0.1840717299578059, + "grad_norm": 0.8175992369651794, + "learning_rate": 0.0013884778068119913, + "loss": 1.7469, + "step": 1745 + }, + { + "epoch": 0.18417721518987343, + "grad_norm": 0.7028190493583679, + "learning_rate": 0.0013883460467972108, + "loss": 1.7647, + "step": 1746 + }, + { + "epoch": 0.18428270042194092, + "grad_norm": 0.6748763918876648, + "learning_rate": 0.0013882142152525732, + "loss": 1.7905, + "step": 1747 + }, + { + "epoch": 0.18438818565400844, + "grad_norm": 0.7403474450111389, + "learning_rate": 0.0013880823121928498, + "loss": 1.7347, + "step": 1748 + }, + { + "epoch": 0.18449367088607596, + "grad_norm": 0.7543158531188965, + "learning_rate": 0.0013879503376328219, + "loss": 1.7842, + "step": 1749 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.665582001209259, + "learning_rate": 0.0013878182915872776, + "loss": 1.7774, + "step": 1750 + }, + { + "epoch": 0.18470464135021097, + "grad_norm": 1.016636610031128, + "learning_rate": 0.001387686174071013, + "loss": 1.738, + "step": 1751 + }, + { + "epoch": 0.1848101265822785, + "grad_norm": 1.239404559135437, + "learning_rate": 0.001387553985098833, + "loss": 1.7556, + "step": 1752 + }, + { + "epoch": 0.184915611814346, + "grad_norm": 0.884087085723877, + "learning_rate": 0.0013874217246855499, + "loss": 1.7537, + "step": 1753 + }, + { + "epoch": 0.1850210970464135, + "grad_norm": 0.8518487215042114, + "learning_rate": 0.001387289392845984, + "loss": 1.7491, + "step": 1754 + }, + { + "epoch": 0.185126582278481, + "grad_norm": 1.2506965398788452, + "learning_rate": 0.0013871569895949635, + "loss": 1.7655, + "step": 1755 + }, + { + "epoch": 0.18523206751054852, + "grad_norm": 0.8242591619491577, + "learning_rate": 0.0013870245149473256, + "loss": 1.7142, + "step": 1756 + }, + { + "epoch": 0.18533755274261604, + "grad_norm": 0.7729036211967468, + "learning_rate": 0.0013868919689179143, + "loss": 1.7334, + "step": 1757 + }, + { + "epoch": 0.18544303797468353, + "grad_norm": 0.9772310853004456, + "learning_rate": 0.001386759351521582, + "loss": 1.7607, + "step": 1758 + }, + { + "epoch": 0.18554852320675105, + "grad_norm": 1.21804678440094, + "learning_rate": 0.0013866266627731892, + "loss": 1.7474, + "step": 1759 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.673929750919342, + "learning_rate": 0.001386493902687604, + "loss": 1.6983, + "step": 1760 + }, + { + "epoch": 0.18575949367088607, + "grad_norm": 0.9135398268699646, + "learning_rate": 0.0013863610712797035, + "loss": 1.739, + "step": 1761 + }, + { + "epoch": 0.1858649789029536, + "grad_norm": 0.9285421967506409, + "learning_rate": 0.0013862281685643716, + "loss": 1.7597, + "step": 1762 + }, + { + "epoch": 0.1859704641350211, + "grad_norm": 0.7920629382133484, + "learning_rate": 0.001386095194556501, + "loss": 1.7355, + "step": 1763 + }, + { + "epoch": 0.1860759493670886, + "grad_norm": 0.7963035702705383, + "learning_rate": 0.001385962149270992, + "loss": 1.7166, + "step": 1764 + }, + { + "epoch": 0.18618143459915612, + "grad_norm": 1.0677900314331055, + "learning_rate": 0.001385829032722753, + "loss": 1.7401, + "step": 1765 + }, + { + "epoch": 0.18628691983122364, + "grad_norm": 0.8028519153594971, + "learning_rate": 0.0013856958449267002, + "loss": 1.7427, + "step": 1766 + }, + { + "epoch": 0.18639240506329113, + "grad_norm": 0.8589402437210083, + "learning_rate": 0.0013855625858977584, + "loss": 1.7544, + "step": 1767 + }, + { + "epoch": 0.18649789029535865, + "grad_norm": 1.3844349384307861, + "learning_rate": 0.0013854292556508593, + "loss": 1.7269, + "step": 1768 + }, + { + "epoch": 0.18660337552742617, + "grad_norm": 0.7995085716247559, + "learning_rate": 0.0013852958542009438, + "loss": 1.7618, + "step": 1769 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.9628868699073792, + "learning_rate": 0.00138516238156296, + "loss": 1.7552, + "step": 1770 + }, + { + "epoch": 0.1868143459915612, + "grad_norm": 1.0164662599563599, + "learning_rate": 0.001385028837751864, + "loss": 1.7522, + "step": 1771 + }, + { + "epoch": 0.18691983122362868, + "grad_norm": 0.7339825630187988, + "learning_rate": 0.0013848952227826202, + "loss": 1.7465, + "step": 1772 + }, + { + "epoch": 0.1870253164556962, + "grad_norm": 1.007260799407959, + "learning_rate": 0.0013847615366702009, + "loss": 1.7388, + "step": 1773 + }, + { + "epoch": 0.18713080168776372, + "grad_norm": 1.035135269165039, + "learning_rate": 0.001384627779429586, + "loss": 1.7817, + "step": 1774 + }, + { + "epoch": 0.1872362869198312, + "grad_norm": 0.8375556468963623, + "learning_rate": 0.0013844939510757642, + "loss": 1.7947, + "step": 1775 + }, + { + "epoch": 0.18734177215189873, + "grad_norm": 0.706091582775116, + "learning_rate": 0.0013843600516237312, + "loss": 1.7443, + "step": 1776 + }, + { + "epoch": 0.18744725738396625, + "grad_norm": 0.8893895149230957, + "learning_rate": 0.001384226081088491, + "loss": 1.7211, + "step": 1777 + }, + { + "epoch": 0.18755274261603375, + "grad_norm": 0.8006113767623901, + "learning_rate": 0.001384092039485056, + "loss": 1.6832, + "step": 1778 + }, + { + "epoch": 0.18765822784810127, + "grad_norm": 0.7186245322227478, + "learning_rate": 0.0013839579268284461, + "loss": 1.7319, + "step": 1779 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.8055110573768616, + "learning_rate": 0.0013838237431336895, + "loss": 1.7348, + "step": 1780 + }, + { + "epoch": 0.18786919831223628, + "grad_norm": 0.8897551894187927, + "learning_rate": 0.0013836894884158217, + "loss": 1.7334, + "step": 1781 + }, + { + "epoch": 0.1879746835443038, + "grad_norm": 0.7141485214233398, + "learning_rate": 0.001383555162689887, + "loss": 1.7948, + "step": 1782 + }, + { + "epoch": 0.18808016877637132, + "grad_norm": 0.9107764959335327, + "learning_rate": 0.001383420765970937, + "loss": 1.7518, + "step": 1783 + }, + { + "epoch": 0.1881856540084388, + "grad_norm": 1.0374594926834106, + "learning_rate": 0.0013832862982740318, + "loss": 1.7144, + "step": 1784 + }, + { + "epoch": 0.18829113924050633, + "grad_norm": 0.727969229221344, + "learning_rate": 0.001383151759614239, + "loss": 1.7534, + "step": 1785 + }, + { + "epoch": 0.18839662447257383, + "grad_norm": 1.364011526107788, + "learning_rate": 0.0013830171500066343, + "loss": 1.7226, + "step": 1786 + }, + { + "epoch": 0.18850210970464135, + "grad_norm": 0.9982249140739441, + "learning_rate": 0.0013828824694663013, + "loss": 1.7452, + "step": 1787 + }, + { + "epoch": 0.18860759493670887, + "grad_norm": 0.7078720927238464, + "learning_rate": 0.001382747718008332, + "loss": 1.759, + "step": 1788 + }, + { + "epoch": 0.18871308016877636, + "grad_norm": 0.8998008370399475, + "learning_rate": 0.0013826128956478255, + "loss": 1.745, + "step": 1789 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.6802699565887451, + "learning_rate": 0.0013824780023998899, + "loss": 1.7282, + "step": 1790 + }, + { + "epoch": 0.1889240506329114, + "grad_norm": 0.8458264470100403, + "learning_rate": 0.0013823430382796402, + "loss": 1.7002, + "step": 1791 + }, + { + "epoch": 0.1890295358649789, + "grad_norm": 1.1582791805267334, + "learning_rate": 0.0013822080033021997, + "loss": 1.7387, + "step": 1792 + }, + { + "epoch": 0.1891350210970464, + "grad_norm": 0.7963630557060242, + "learning_rate": 0.0013820728974827, + "loss": 1.7524, + "step": 1793 + }, + { + "epoch": 0.18924050632911393, + "grad_norm": 0.728833019733429, + "learning_rate": 0.0013819377208362806, + "loss": 1.8031, + "step": 1794 + }, + { + "epoch": 0.18934599156118143, + "grad_norm": 1.0251619815826416, + "learning_rate": 0.0013818024733780881, + "loss": 1.7203, + "step": 1795 + }, + { + "epoch": 0.18945147679324895, + "grad_norm": 0.9746160507202148, + "learning_rate": 0.0013816671551232782, + "loss": 1.774, + "step": 1796 + }, + { + "epoch": 0.18955696202531647, + "grad_norm": 0.6995503306388855, + "learning_rate": 0.0013815317660870138, + "loss": 1.7229, + "step": 1797 + }, + { + "epoch": 0.18966244725738396, + "grad_norm": 0.7225999236106873, + "learning_rate": 0.001381396306284466, + "loss": 1.7329, + "step": 1798 + }, + { + "epoch": 0.18976793248945148, + "grad_norm": 0.8503145575523376, + "learning_rate": 0.0013812607757308134, + "loss": 1.7282, + "step": 1799 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.8033541440963745, + "learning_rate": 0.0013811251744412431, + "loss": 1.7289, + "step": 1800 + }, + { + "epoch": 0.1899789029535865, + "grad_norm": 0.6668581962585449, + "learning_rate": 0.0013809895024309501, + "loss": 1.6841, + "step": 1801 + }, + { + "epoch": 0.190084388185654, + "grad_norm": 0.6927618980407715, + "learning_rate": 0.001380853759715137, + "loss": 1.7136, + "step": 1802 + }, + { + "epoch": 0.1901898734177215, + "grad_norm": 0.7392303347587585, + "learning_rate": 0.0013807179463090143, + "loss": 1.7354, + "step": 1803 + }, + { + "epoch": 0.19029535864978903, + "grad_norm": 0.7314698696136475, + "learning_rate": 0.0013805820622278008, + "loss": 1.7263, + "step": 1804 + }, + { + "epoch": 0.19040084388185655, + "grad_norm": 0.7891994118690491, + "learning_rate": 0.0013804461074867227, + "loss": 1.7207, + "step": 1805 + }, + { + "epoch": 0.19050632911392404, + "grad_norm": 0.6967216730117798, + "learning_rate": 0.0013803100821010146, + "loss": 1.7222, + "step": 1806 + }, + { + "epoch": 0.19061181434599156, + "grad_norm": 0.677146852016449, + "learning_rate": 0.0013801739860859188, + "loss": 1.7307, + "step": 1807 + }, + { + "epoch": 0.19071729957805908, + "grad_norm": 0.8083590269088745, + "learning_rate": 0.0013800378194566856, + "loss": 1.7176, + "step": 1808 + }, + { + "epoch": 0.19082278481012657, + "grad_norm": 0.7718316912651062, + "learning_rate": 0.001379901582228573, + "loss": 1.7696, + "step": 1809 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.7550231218338013, + "learning_rate": 0.0013797652744168473, + "loss": 1.7036, + "step": 1810 + }, + { + "epoch": 0.1910337552742616, + "grad_norm": 1.0522822141647339, + "learning_rate": 0.0013796288960367822, + "loss": 1.6828, + "step": 1811 + }, + { + "epoch": 0.1911392405063291, + "grad_norm": 1.0956426858901978, + "learning_rate": 0.0013794924471036596, + "loss": 1.7415, + "step": 1812 + }, + { + "epoch": 0.19124472573839663, + "grad_norm": 0.733767569065094, + "learning_rate": 0.0013793559276327695, + "loss": 1.7161, + "step": 1813 + }, + { + "epoch": 0.19135021097046415, + "grad_norm": 0.942251443862915, + "learning_rate": 0.0013792193376394094, + "loss": 1.7, + "step": 1814 + }, + { + "epoch": 0.19145569620253164, + "grad_norm": 1.268876552581787, + "learning_rate": 0.001379082677138885, + "loss": 1.7284, + "step": 1815 + }, + { + "epoch": 0.19156118143459916, + "grad_norm": 0.8536684513092041, + "learning_rate": 0.0013789459461465096, + "loss": 1.737, + "step": 1816 + }, + { + "epoch": 0.19166666666666668, + "grad_norm": 0.7624783515930176, + "learning_rate": 0.001378809144677605, + "loss": 1.7031, + "step": 1817 + }, + { + "epoch": 0.19177215189873417, + "grad_norm": 0.9973534345626831, + "learning_rate": 0.0013786722727474998, + "loss": 1.7218, + "step": 1818 + }, + { + "epoch": 0.1918776371308017, + "grad_norm": 0.924683153629303, + "learning_rate": 0.0013785353303715317, + "loss": 1.7458, + "step": 1819 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.7389134168624878, + "learning_rate": 0.0013783983175650457, + "loss": 1.7808, + "step": 1820 + }, + { + "epoch": 0.1920886075949367, + "grad_norm": 0.7557039856910706, + "learning_rate": 0.001378261234343395, + "loss": 1.716, + "step": 1821 + }, + { + "epoch": 0.19219409282700423, + "grad_norm": 1.144822120666504, + "learning_rate": 0.0013781240807219399, + "loss": 1.7621, + "step": 1822 + }, + { + "epoch": 0.19229957805907172, + "grad_norm": 0.7838624119758606, + "learning_rate": 0.0013779868567160495, + "loss": 1.7488, + "step": 1823 + }, + { + "epoch": 0.19240506329113924, + "grad_norm": 0.7398967146873474, + "learning_rate": 0.0013778495623411008, + "loss": 1.7377, + "step": 1824 + }, + { + "epoch": 0.19251054852320676, + "grad_norm": 0.7167696952819824, + "learning_rate": 0.0013777121976124775, + "loss": 1.7674, + "step": 1825 + }, + { + "epoch": 0.19261603375527425, + "grad_norm": 0.6798158288002014, + "learning_rate": 0.0013775747625455724, + "loss": 1.712, + "step": 1826 + }, + { + "epoch": 0.19272151898734177, + "grad_norm": 0.7130699157714844, + "learning_rate": 0.0013774372571557856, + "loss": 1.7125, + "step": 1827 + }, + { + "epoch": 0.1928270042194093, + "grad_norm": 0.6951613426208496, + "learning_rate": 0.0013772996814585261, + "loss": 1.672, + "step": 1828 + }, + { + "epoch": 0.19293248945147679, + "grad_norm": 0.7474777698516846, + "learning_rate": 0.0013771620354692087, + "loss": 1.6941, + "step": 1829 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.7462137937545776, + "learning_rate": 0.0013770243192032581, + "loss": 1.6982, + "step": 1830 + }, + { + "epoch": 0.19314345991561183, + "grad_norm": 0.9565681219100952, + "learning_rate": 0.0013768865326761058, + "loss": 1.7151, + "step": 1831 + }, + { + "epoch": 0.19324894514767932, + "grad_norm": 0.9654238224029541, + "learning_rate": 0.0013767486759031918, + "loss": 1.721, + "step": 1832 + }, + { + "epoch": 0.19335443037974684, + "grad_norm": 0.9777080416679382, + "learning_rate": 0.0013766107488999632, + "loss": 1.7581, + "step": 1833 + }, + { + "epoch": 0.19345991561181436, + "grad_norm": 0.8674164414405823, + "learning_rate": 0.0013764727516818757, + "loss": 1.7582, + "step": 1834 + }, + { + "epoch": 0.19356540084388185, + "grad_norm": 0.7524341940879822, + "learning_rate": 0.0013763346842643927, + "loss": 1.7072, + "step": 1835 + }, + { + "epoch": 0.19367088607594937, + "grad_norm": 0.7051586508750916, + "learning_rate": 0.0013761965466629847, + "loss": 1.735, + "step": 1836 + }, + { + "epoch": 0.19377637130801686, + "grad_norm": 0.7337453961372375, + "learning_rate": 0.0013760583388931315, + "loss": 1.6977, + "step": 1837 + }, + { + "epoch": 0.19388185654008439, + "grad_norm": 0.6992490291595459, + "learning_rate": 0.0013759200609703196, + "loss": 1.7123, + "step": 1838 + }, + { + "epoch": 0.1939873417721519, + "grad_norm": 0.8042798042297363, + "learning_rate": 0.0013757817129100437, + "loss": 1.7231, + "step": 1839 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.6742119789123535, + "learning_rate": 0.0013756432947278064, + "loss": 1.7259, + "step": 1840 + }, + { + "epoch": 0.19419831223628692, + "grad_norm": 0.8066233396530151, + "learning_rate": 0.0013755048064391182, + "loss": 1.7481, + "step": 1841 + }, + { + "epoch": 0.19430379746835444, + "grad_norm": 0.8270861506462097, + "learning_rate": 0.0013753662480594973, + "loss": 1.7183, + "step": 1842 + }, + { + "epoch": 0.19440928270042193, + "grad_norm": 0.9463607668876648, + "learning_rate": 0.0013752276196044699, + "loss": 1.7091, + "step": 1843 + }, + { + "epoch": 0.19451476793248945, + "grad_norm": 0.8910336494445801, + "learning_rate": 0.0013750889210895705, + "loss": 1.7566, + "step": 1844 + }, + { + "epoch": 0.19462025316455697, + "grad_norm": 0.8924692869186401, + "learning_rate": 0.0013749501525303401, + "loss": 1.7185, + "step": 1845 + }, + { + "epoch": 0.19472573839662446, + "grad_norm": 0.7748115062713623, + "learning_rate": 0.0013748113139423288, + "loss": 1.7602, + "step": 1846 + }, + { + "epoch": 0.19483122362869199, + "grad_norm": 0.7652146816253662, + "learning_rate": 0.0013746724053410944, + "loss": 1.7483, + "step": 1847 + }, + { + "epoch": 0.1949367088607595, + "grad_norm": 0.7639327049255371, + "learning_rate": 0.001374533426742202, + "loss": 1.6886, + "step": 1848 + }, + { + "epoch": 0.195042194092827, + "grad_norm": 1.102126121520996, + "learning_rate": 0.0013743943781612251, + "loss": 1.6834, + "step": 1849 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 1.1086560487747192, + "learning_rate": 0.0013742552596137444, + "loss": 1.7487, + "step": 1850 + }, + { + "epoch": 0.19525316455696204, + "grad_norm": 0.7383568286895752, + "learning_rate": 0.0013741160711153492, + "loss": 1.7011, + "step": 1851 + }, + { + "epoch": 0.19535864978902953, + "grad_norm": 1.0879368782043457, + "learning_rate": 0.0013739768126816358, + "loss": 1.741, + "step": 1852 + }, + { + "epoch": 0.19546413502109705, + "grad_norm": 1.3076605796813965, + "learning_rate": 0.0013738374843282094, + "loss": 1.7305, + "step": 1853 + }, + { + "epoch": 0.19556962025316454, + "grad_norm": 0.7350767850875854, + "learning_rate": 0.0013736980860706819, + "loss": 1.738, + "step": 1854 + }, + { + "epoch": 0.19567510548523206, + "grad_norm": 1.1638553142547607, + "learning_rate": 0.001373558617924674, + "loss": 1.7359, + "step": 1855 + }, + { + "epoch": 0.19578059071729959, + "grad_norm": 1.2243679761886597, + "learning_rate": 0.0013734190799058136, + "loss": 1.7478, + "step": 1856 + }, + { + "epoch": 0.19588607594936708, + "grad_norm": 0.7830071449279785, + "learning_rate": 0.0013732794720297367, + "loss": 1.7232, + "step": 1857 + }, + { + "epoch": 0.1959915611814346, + "grad_norm": 1.4343202114105225, + "learning_rate": 0.0013731397943120868, + "loss": 1.7361, + "step": 1858 + }, + { + "epoch": 0.19609704641350212, + "grad_norm": 0.7234956622123718, + "learning_rate": 0.001373000046768516, + "loss": 1.7218, + "step": 1859 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 1.0234639644622803, + "learning_rate": 0.0013728602294146833, + "loss": 1.7536, + "step": 1860 + }, + { + "epoch": 0.19630801687763713, + "grad_norm": 1.3283703327178955, + "learning_rate": 0.001372720342266256, + "loss": 1.7388, + "step": 1861 + }, + { + "epoch": 0.19641350210970465, + "grad_norm": 0.6829545497894287, + "learning_rate": 0.001372580385338909, + "loss": 1.7213, + "step": 1862 + }, + { + "epoch": 0.19651898734177214, + "grad_norm": 1.2009776830673218, + "learning_rate": 0.0013724403586483254, + "loss": 1.7992, + "step": 1863 + }, + { + "epoch": 0.19662447257383966, + "grad_norm": 0.931550920009613, + "learning_rate": 0.001372300262210196, + "loss": 1.7087, + "step": 1864 + }, + { + "epoch": 0.19672995780590719, + "grad_norm": 0.6441112756729126, + "learning_rate": 0.001372160096040219, + "loss": 1.7102, + "step": 1865 + }, + { + "epoch": 0.19683544303797468, + "grad_norm": 0.8743553757667542, + "learning_rate": 0.001372019860154101, + "loss": 1.6946, + "step": 1866 + }, + { + "epoch": 0.1969409282700422, + "grad_norm": 0.7881885766983032, + "learning_rate": 0.001371879554567556, + "loss": 1.7336, + "step": 1867 + }, + { + "epoch": 0.19704641350210972, + "grad_norm": 0.6738293766975403, + "learning_rate": 0.0013717391792963062, + "loss": 1.6988, + "step": 1868 + }, + { + "epoch": 0.1971518987341772, + "grad_norm": 0.8838372230529785, + "learning_rate": 0.0013715987343560804, + "loss": 1.7421, + "step": 1869 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.885265588760376, + "learning_rate": 0.0013714582197626175, + "loss": 1.751, + "step": 1870 + }, + { + "epoch": 0.19736286919831222, + "grad_norm": 0.6846805810928345, + "learning_rate": 0.001371317635531662, + "loss": 1.6772, + "step": 1871 + }, + { + "epoch": 0.19746835443037974, + "grad_norm": 0.7496611475944519, + "learning_rate": 0.001371176981678967, + "loss": 1.6884, + "step": 1872 + }, + { + "epoch": 0.19757383966244726, + "grad_norm": 0.7249540090560913, + "learning_rate": 0.001371036258220294, + "loss": 1.6799, + "step": 1873 + }, + { + "epoch": 0.19767932489451476, + "grad_norm": 0.774915874004364, + "learning_rate": 0.0013708954651714116, + "loss": 1.6997, + "step": 1874 + }, + { + "epoch": 0.19778481012658228, + "grad_norm": 0.7303387522697449, + "learning_rate": 0.0013707546025480961, + "loss": 1.7191, + "step": 1875 + }, + { + "epoch": 0.1978902953586498, + "grad_norm": 1.1347702741622925, + "learning_rate": 0.001370613670366132, + "loss": 1.7473, + "step": 1876 + }, + { + "epoch": 0.1979957805907173, + "grad_norm": 0.8618520498275757, + "learning_rate": 0.0013704726686413116, + "loss": 1.7736, + "step": 1877 + }, + { + "epoch": 0.1981012658227848, + "grad_norm": 0.8512491583824158, + "learning_rate": 0.0013703315973894346, + "loss": 1.6904, + "step": 1878 + }, + { + "epoch": 0.19820675105485233, + "grad_norm": 1.3614270687103271, + "learning_rate": 0.001370190456626309, + "loss": 1.683, + "step": 1879 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.7100306749343872, + "learning_rate": 0.0013700492463677501, + "loss": 1.74, + "step": 1880 + }, + { + "epoch": 0.19841772151898734, + "grad_norm": 1.0686612129211426, + "learning_rate": 0.0013699079666295811, + "loss": 1.7344, + "step": 1881 + }, + { + "epoch": 0.19852320675105486, + "grad_norm": 1.1762845516204834, + "learning_rate": 0.0013697666174276337, + "loss": 1.7022, + "step": 1882 + }, + { + "epoch": 0.19862869198312236, + "grad_norm": 0.6906229853630066, + "learning_rate": 0.001369625198777746, + "loss": 1.7213, + "step": 1883 + }, + { + "epoch": 0.19873417721518988, + "grad_norm": 0.6772141456604004, + "learning_rate": 0.0013694837106957654, + "loss": 1.7214, + "step": 1884 + }, + { + "epoch": 0.19883966244725737, + "grad_norm": 0.6661077737808228, + "learning_rate": 0.0013693421531975455, + "loss": 1.6967, + "step": 1885 + }, + { + "epoch": 0.1989451476793249, + "grad_norm": 0.6766542792320251, + "learning_rate": 0.0013692005262989496, + "loss": 1.7184, + "step": 1886 + }, + { + "epoch": 0.1990506329113924, + "grad_norm": 0.6991927027702332, + "learning_rate": 0.0013690588300158467, + "loss": 1.7155, + "step": 1887 + }, + { + "epoch": 0.1991561181434599, + "grad_norm": 0.6834880709648132, + "learning_rate": 0.001368917064364115, + "loss": 1.7521, + "step": 1888 + }, + { + "epoch": 0.19926160337552742, + "grad_norm": 0.8066041469573975, + "learning_rate": 0.0013687752293596402, + "loss": 1.7067, + "step": 1889 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.8059431314468384, + "learning_rate": 0.0013686333250183154, + "loss": 1.7065, + "step": 1890 + }, + { + "epoch": 0.19947257383966244, + "grad_norm": 0.8374693393707275, + "learning_rate": 0.0013684913513560418, + "loss": 1.7312, + "step": 1891 + }, + { + "epoch": 0.19957805907172996, + "grad_norm": 0.7275794744491577, + "learning_rate": 0.0013683493083887282, + "loss": 1.686, + "step": 1892 + }, + { + "epoch": 0.19968354430379748, + "grad_norm": 0.684088408946991, + "learning_rate": 0.0013682071961322914, + "loss": 1.6666, + "step": 1893 + }, + { + "epoch": 0.19978902953586497, + "grad_norm": 0.7127245664596558, + "learning_rate": 0.0013680650146026554, + "loss": 1.7115, + "step": 1894 + }, + { + "epoch": 0.1998945147679325, + "grad_norm": 0.679622232913971, + "learning_rate": 0.0013679227638157523, + "loss": 1.7494, + "step": 1895 + }, + { + "epoch": 0.2, + "grad_norm": 0.673174262046814, + "learning_rate": 0.0013677804437875227, + "loss": 1.7198, + "step": 1896 + }, + { + "epoch": 0.2001054852320675, + "grad_norm": 0.7438588738441467, + "learning_rate": 0.0013676380545339136, + "loss": 1.7252, + "step": 1897 + }, + { + "epoch": 0.20021097046413502, + "grad_norm": 0.7491299510002136, + "learning_rate": 0.0013674955960708808, + "loss": 1.7331, + "step": 1898 + }, + { + "epoch": 0.20031645569620254, + "grad_norm": 0.7383769154548645, + "learning_rate": 0.0013673530684143874, + "loss": 1.7184, + "step": 1899 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.7300867438316345, + "learning_rate": 0.001367210471580404, + "loss": 1.7316, + "step": 1900 + }, + { + "epoch": 0.20052742616033756, + "grad_norm": 0.779679536819458, + "learning_rate": 0.0013670678055849098, + "loss": 1.7103, + "step": 1901 + }, + { + "epoch": 0.20063291139240505, + "grad_norm": 0.6432147026062012, + "learning_rate": 0.0013669250704438911, + "loss": 1.6712, + "step": 1902 + }, + { + "epoch": 0.20073839662447257, + "grad_norm": 0.7199810743331909, + "learning_rate": 0.0013667822661733418, + "loss": 1.7302, + "step": 1903 + }, + { + "epoch": 0.2008438818565401, + "grad_norm": 0.7207539677619934, + "learning_rate": 0.0013666393927892642, + "loss": 1.7018, + "step": 1904 + }, + { + "epoch": 0.20094936708860758, + "grad_norm": 0.8268831372261047, + "learning_rate": 0.0013664964503076677, + "loss": 1.6671, + "step": 1905 + }, + { + "epoch": 0.2010548523206751, + "grad_norm": 1.0413949489593506, + "learning_rate": 0.0013663534387445696, + "loss": 1.7488, + "step": 1906 + }, + { + "epoch": 0.20116033755274262, + "grad_norm": 0.8984591364860535, + "learning_rate": 0.0013662103581159955, + "loss": 1.71, + "step": 1907 + }, + { + "epoch": 0.20126582278481012, + "grad_norm": 0.6967827081680298, + "learning_rate": 0.0013660672084379781, + "loss": 1.7329, + "step": 1908 + }, + { + "epoch": 0.20137130801687764, + "grad_norm": 0.9720147252082825, + "learning_rate": 0.001365923989726558, + "loss": 1.7619, + "step": 1909 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 1.3310585021972656, + "learning_rate": 0.0013657807019977835, + "loss": 1.7373, + "step": 1910 + }, + { + "epoch": 0.20158227848101265, + "grad_norm": 0.7319318056106567, + "learning_rate": 0.0013656373452677107, + "loss": 1.7399, + "step": 1911 + }, + { + "epoch": 0.20168776371308017, + "grad_norm": 0.8556087613105774, + "learning_rate": 0.0013654939195524038, + "loss": 1.7081, + "step": 1912 + }, + { + "epoch": 0.2017932489451477, + "grad_norm": 0.9914360046386719, + "learning_rate": 0.0013653504248679338, + "loss": 1.7008, + "step": 1913 + }, + { + "epoch": 0.20189873417721518, + "grad_norm": 1.1340121030807495, + "learning_rate": 0.0013652068612303803, + "loss": 1.7288, + "step": 1914 + }, + { + "epoch": 0.2020042194092827, + "grad_norm": 0.882554829120636, + "learning_rate": 0.0013650632286558305, + "loss": 1.6937, + "step": 1915 + }, + { + "epoch": 0.20210970464135022, + "grad_norm": 0.8325515985488892, + "learning_rate": 0.001364919527160379, + "loss": 1.7187, + "step": 1916 + }, + { + "epoch": 0.20221518987341772, + "grad_norm": 0.7468682527542114, + "learning_rate": 0.001364775756760128, + "loss": 1.696, + "step": 1917 + }, + { + "epoch": 0.20232067510548524, + "grad_norm": 0.7923012375831604, + "learning_rate": 0.0013646319174711878, + "loss": 1.721, + "step": 1918 + }, + { + "epoch": 0.20242616033755273, + "grad_norm": 0.8816736936569214, + "learning_rate": 0.0013644880093096766, + "loss": 1.7675, + "step": 1919 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.7137885689735413, + "learning_rate": 0.0013643440322917198, + "loss": 1.6909, + "step": 1920 + }, + { + "epoch": 0.20263713080168777, + "grad_norm": 0.7821003794670105, + "learning_rate": 0.0013641999864334507, + "loss": 1.7383, + "step": 1921 + }, + { + "epoch": 0.20274261603375526, + "grad_norm": 1.2549324035644531, + "learning_rate": 0.0013640558717510107, + "loss": 1.6903, + "step": 1922 + }, + { + "epoch": 0.20284810126582278, + "grad_norm": 0.692743718624115, + "learning_rate": 0.0013639116882605481, + "loss": 1.7271, + "step": 1923 + }, + { + "epoch": 0.2029535864978903, + "grad_norm": 1.0527983903884888, + "learning_rate": 0.0013637674359782196, + "loss": 1.7036, + "step": 1924 + }, + { + "epoch": 0.2030590717299578, + "grad_norm": 1.3300303220748901, + "learning_rate": 0.0013636231149201895, + "loss": 1.7707, + "step": 1925 + }, + { + "epoch": 0.20316455696202532, + "grad_norm": 0.6958779096603394, + "learning_rate": 0.0013634787251026296, + "loss": 1.7065, + "step": 1926 + }, + { + "epoch": 0.20327004219409284, + "grad_norm": 1.213886022567749, + "learning_rate": 0.0013633342665417192, + "loss": 1.7062, + "step": 1927 + }, + { + "epoch": 0.20337552742616033, + "grad_norm": 0.7662460207939148, + "learning_rate": 0.0013631897392536463, + "loss": 1.7558, + "step": 1928 + }, + { + "epoch": 0.20348101265822785, + "grad_norm": 1.1003443002700806, + "learning_rate": 0.001363045143254605, + "loss": 1.6989, + "step": 1929 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 1.3721284866333008, + "learning_rate": 0.0013629004785607989, + "loss": 1.737, + "step": 1930 + }, + { + "epoch": 0.20369198312236286, + "grad_norm": 0.7752100825309753, + "learning_rate": 0.0013627557451884374, + "loss": 1.6625, + "step": 1931 + }, + { + "epoch": 0.20379746835443038, + "grad_norm": 1.2047680616378784, + "learning_rate": 0.0013626109431537398, + "loss": 1.7229, + "step": 1932 + }, + { + "epoch": 0.2039029535864979, + "grad_norm": 0.7096976637840271, + "learning_rate": 0.001362466072472931, + "loss": 1.7121, + "step": 1933 + }, + { + "epoch": 0.2040084388185654, + "grad_norm": 0.79989093542099, + "learning_rate": 0.0013623211331622448, + "loss": 1.7189, + "step": 1934 + }, + { + "epoch": 0.20411392405063292, + "grad_norm": 0.7396421432495117, + "learning_rate": 0.0013621761252379221, + "loss": 1.6923, + "step": 1935 + }, + { + "epoch": 0.2042194092827004, + "grad_norm": 0.8233966827392578, + "learning_rate": 0.0013620310487162124, + "loss": 1.708, + "step": 1936 + }, + { + "epoch": 0.20432489451476793, + "grad_norm": 0.7290173768997192, + "learning_rate": 0.0013618859036133714, + "loss": 1.7227, + "step": 1937 + }, + { + "epoch": 0.20443037974683545, + "grad_norm": 0.7066577076911926, + "learning_rate": 0.001361740689945664, + "loss": 1.7303, + "step": 1938 + }, + { + "epoch": 0.20453586497890294, + "grad_norm": 0.7711053490638733, + "learning_rate": 0.001361595407729362, + "loss": 1.7121, + "step": 1939 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.7405821681022644, + "learning_rate": 0.0013614500569807445, + "loss": 1.6794, + "step": 1940 + }, + { + "epoch": 0.20474683544303798, + "grad_norm": 0.6971741318702698, + "learning_rate": 0.0013613046377160996, + "loss": 1.7109, + "step": 1941 + }, + { + "epoch": 0.20485232067510548, + "grad_norm": 0.786941409111023, + "learning_rate": 0.0013611591499517212, + "loss": 1.7302, + "step": 1942 + }, + { + "epoch": 0.204957805907173, + "grad_norm": 0.7232295870780945, + "learning_rate": 0.001361013593703913, + "loss": 1.6982, + "step": 1943 + }, + { + "epoch": 0.20506329113924052, + "grad_norm": 0.7321121692657471, + "learning_rate": 0.0013608679689889847, + "loss": 1.7038, + "step": 1944 + }, + { + "epoch": 0.205168776371308, + "grad_norm": 1.0313951969146729, + "learning_rate": 0.0013607222758232546, + "loss": 1.7492, + "step": 1945 + }, + { + "epoch": 0.20527426160337553, + "grad_norm": 1.2009357213974, + "learning_rate": 0.0013605765142230479, + "loss": 1.6731, + "step": 1946 + }, + { + "epoch": 0.20537974683544305, + "grad_norm": 0.7572897672653198, + "learning_rate": 0.0013604306842046983, + "loss": 1.7331, + "step": 1947 + }, + { + "epoch": 0.20548523206751054, + "grad_norm": 0.751259446144104, + "learning_rate": 0.0013602847857845466, + "loss": 1.6914, + "step": 1948 + }, + { + "epoch": 0.20559071729957806, + "grad_norm": 1.0502233505249023, + "learning_rate": 0.0013601388189789414, + "loss": 1.6891, + "step": 1949 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.8083106875419617, + "learning_rate": 0.0013599927838042394, + "loss": 1.6792, + "step": 1950 + }, + { + "epoch": 0.20580168776371308, + "grad_norm": 0.6450133919715881, + "learning_rate": 0.0013598466802768041, + "loss": 1.744, + "step": 1951 + }, + { + "epoch": 0.2059071729957806, + "grad_norm": 0.8665514588356018, + "learning_rate": 0.0013597005084130072, + "loss": 1.7148, + "step": 1952 + }, + { + "epoch": 0.2060126582278481, + "grad_norm": 0.9379560947418213, + "learning_rate": 0.0013595542682292281, + "loss": 1.7391, + "step": 1953 + }, + { + "epoch": 0.2061181434599156, + "grad_norm": 0.9356887340545654, + "learning_rate": 0.0013594079597418541, + "loss": 1.6612, + "step": 1954 + }, + { + "epoch": 0.20622362869198313, + "grad_norm": 0.8913035988807678, + "learning_rate": 0.0013592615829672791, + "loss": 1.7183, + "step": 1955 + }, + { + "epoch": 0.20632911392405062, + "grad_norm": 0.8160530924797058, + "learning_rate": 0.0013591151379219058, + "loss": 1.6681, + "step": 1956 + }, + { + "epoch": 0.20643459915611814, + "grad_norm": 0.8431174159049988, + "learning_rate": 0.0013589686246221438, + "loss": 1.7056, + "step": 1957 + }, + { + "epoch": 0.20654008438818566, + "grad_norm": 1.071945071220398, + "learning_rate": 0.001358822043084411, + "loss": 1.7147, + "step": 1958 + }, + { + "epoch": 0.20664556962025316, + "grad_norm": 0.8916067481040955, + "learning_rate": 0.0013586753933251322, + "loss": 1.7334, + "step": 1959 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.7169811725616455, + "learning_rate": 0.0013585286753607408, + "loss": 1.6899, + "step": 1960 + }, + { + "epoch": 0.2068565400843882, + "grad_norm": 0.706357479095459, + "learning_rate": 0.0013583818892076765, + "loss": 1.744, + "step": 1961 + }, + { + "epoch": 0.2069620253164557, + "grad_norm": 0.7012400031089783, + "learning_rate": 0.0013582350348823882, + "loss": 1.7539, + "step": 1962 + }, + { + "epoch": 0.2070675105485232, + "grad_norm": 0.7794974446296692, + "learning_rate": 0.0013580881124013312, + "loss": 1.7305, + "step": 1963 + }, + { + "epoch": 0.20717299578059073, + "grad_norm": 0.7428677082061768, + "learning_rate": 0.001357941121780969, + "loss": 1.6863, + "step": 1964 + }, + { + "epoch": 0.20727848101265822, + "grad_norm": 0.7112969756126404, + "learning_rate": 0.0013577940630377725, + "loss": 1.7179, + "step": 1965 + }, + { + "epoch": 0.20738396624472574, + "grad_norm": 0.6479958295822144, + "learning_rate": 0.0013576469361882208, + "loss": 1.702, + "step": 1966 + }, + { + "epoch": 0.20748945147679324, + "grad_norm": 0.7010189294815063, + "learning_rate": 0.0013574997412487996, + "loss": 1.7332, + "step": 1967 + }, + { + "epoch": 0.20759493670886076, + "grad_norm": 0.6216477751731873, + "learning_rate": 0.0013573524782360034, + "loss": 1.7026, + "step": 1968 + }, + { + "epoch": 0.20770042194092828, + "grad_norm": 1.0770529508590698, + "learning_rate": 0.0013572051471663332, + "loss": 1.7116, + "step": 1969 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 1.0246919393539429, + "learning_rate": 0.0013570577480562986, + "loss": 1.7134, + "step": 1970 + }, + { + "epoch": 0.2079113924050633, + "grad_norm": 0.732853889465332, + "learning_rate": 0.0013569102809224162, + "loss": 1.7143, + "step": 1971 + }, + { + "epoch": 0.2080168776371308, + "grad_norm": 0.9493834972381592, + "learning_rate": 0.0013567627457812105, + "loss": 1.6694, + "step": 1972 + }, + { + "epoch": 0.2081223628691983, + "grad_norm": 1.0634267330169678, + "learning_rate": 0.0013566151426492137, + "loss": 1.7137, + "step": 1973 + }, + { + "epoch": 0.20822784810126582, + "grad_norm": 0.7595221400260925, + "learning_rate": 0.0013564674715429651, + "loss": 1.6984, + "step": 1974 + }, + { + "epoch": 0.20833333333333334, + "grad_norm": 1.260229229927063, + "learning_rate": 0.0013563197324790123, + "loss": 1.7295, + "step": 1975 + }, + { + "epoch": 0.20843881856540084, + "grad_norm": 1.183432698249817, + "learning_rate": 0.0013561719254739104, + "loss": 1.7058, + "step": 1976 + }, + { + "epoch": 0.20854430379746836, + "grad_norm": 0.7262617349624634, + "learning_rate": 0.001356024050544221, + "loss": 1.7246, + "step": 1977 + }, + { + "epoch": 0.20864978902953588, + "grad_norm": 0.7098615765571594, + "learning_rate": 0.0013558761077065154, + "loss": 1.6935, + "step": 1978 + }, + { + "epoch": 0.20875527426160337, + "grad_norm": 0.7875899076461792, + "learning_rate": 0.0013557280969773704, + "loss": 1.6749, + "step": 1979 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.705172598361969, + "learning_rate": 0.0013555800183733717, + "loss": 1.7183, + "step": 1980 + }, + { + "epoch": 0.2089662447257384, + "grad_norm": 0.749589741230011, + "learning_rate": 0.0013554318719111124, + "loss": 1.6791, + "step": 1981 + }, + { + "epoch": 0.2090717299578059, + "grad_norm": 1.0610191822052002, + "learning_rate": 0.0013552836576071925, + "loss": 1.6599, + "step": 1982 + }, + { + "epoch": 0.20917721518987342, + "grad_norm": 0.9439182281494141, + "learning_rate": 0.0013551353754782211, + "loss": 1.7032, + "step": 1983 + }, + { + "epoch": 0.20928270042194091, + "grad_norm": 0.7144791483879089, + "learning_rate": 0.0013549870255408132, + "loss": 1.7406, + "step": 1984 + }, + { + "epoch": 0.20938818565400844, + "grad_norm": 0.7207260131835938, + "learning_rate": 0.0013548386078115924, + "loss": 1.7027, + "step": 1985 + }, + { + "epoch": 0.20949367088607596, + "grad_norm": 0.726900577545166, + "learning_rate": 0.0013546901223071893, + "loss": 1.6711, + "step": 1986 + }, + { + "epoch": 0.20959915611814345, + "grad_norm": 0.664188027381897, + "learning_rate": 0.001354541569044243, + "loss": 1.6665, + "step": 1987 + }, + { + "epoch": 0.20970464135021097, + "grad_norm": 0.6815235018730164, + "learning_rate": 0.0013543929480393994, + "loss": 1.7187, + "step": 1988 + }, + { + "epoch": 0.2098101265822785, + "grad_norm": 0.7962552309036255, + "learning_rate": 0.0013542442593093122, + "loss": 1.7269, + "step": 1989 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.7301221489906311, + "learning_rate": 0.0013540955028706425, + "loss": 1.709, + "step": 1990 + }, + { + "epoch": 0.2100210970464135, + "grad_norm": 0.6269825100898743, + "learning_rate": 0.0013539466787400598, + "loss": 1.7313, + "step": 1991 + }, + { + "epoch": 0.21012658227848102, + "grad_norm": 0.7904106378555298, + "learning_rate": 0.00135379778693424, + "loss": 1.6894, + "step": 1992 + }, + { + "epoch": 0.21023206751054851, + "grad_norm": 0.9329445958137512, + "learning_rate": 0.0013536488274698672, + "loss": 1.7248, + "step": 1993 + }, + { + "epoch": 0.21033755274261604, + "grad_norm": 0.8090283870697021, + "learning_rate": 0.0013534998003636332, + "loss": 1.6669, + "step": 1994 + }, + { + "epoch": 0.21044303797468356, + "grad_norm": 0.7560129165649414, + "learning_rate": 0.0013533507056322374, + "loss": 1.7428, + "step": 1995 + }, + { + "epoch": 0.21054852320675105, + "grad_norm": 0.6775130033493042, + "learning_rate": 0.0013532015432923864, + "loss": 1.6841, + "step": 1996 + }, + { + "epoch": 0.21065400843881857, + "grad_norm": 0.8837060332298279, + "learning_rate": 0.0013530523133607948, + "loss": 1.7005, + "step": 1997 + }, + { + "epoch": 0.2107594936708861, + "grad_norm": 1.0978809595108032, + "learning_rate": 0.0013529030158541842, + "loss": 1.6859, + "step": 1998 + }, + { + "epoch": 0.21086497890295358, + "grad_norm": 0.7232955098152161, + "learning_rate": 0.0013527536507892844, + "loss": 1.6845, + "step": 1999 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.7284314036369324, + "learning_rate": 0.0013526042181828324, + "loss": 1.7231, + "step": 2000 + }, + { + "epoch": 0.2110759493670886, + "grad_norm": 1.019104242324829, + "learning_rate": 0.001352454718051573, + "loss": 1.7442, + "step": 2001 + }, + { + "epoch": 0.21118143459915611, + "grad_norm": 0.8879207968711853, + "learning_rate": 0.0013523051504122584, + "loss": 1.7113, + "step": 2002 + }, + { + "epoch": 0.21128691983122364, + "grad_norm": 0.6414099931716919, + "learning_rate": 0.0013521555152816481, + "loss": 1.6538, + "step": 2003 + }, + { + "epoch": 0.21139240506329113, + "grad_norm": 1.1022038459777832, + "learning_rate": 0.0013520058126765097, + "loss": 1.7051, + "step": 2004 + }, + { + "epoch": 0.21149789029535865, + "grad_norm": 1.3093273639678955, + "learning_rate": 0.0013518560426136182, + "loss": 1.7455, + "step": 2005 + }, + { + "epoch": 0.21160337552742617, + "grad_norm": 0.700279176235199, + "learning_rate": 0.001351706205109756, + "loss": 1.7127, + "step": 2006 + }, + { + "epoch": 0.21170886075949366, + "grad_norm": 0.9771433472633362, + "learning_rate": 0.001351556300181713, + "loss": 1.6969, + "step": 2007 + }, + { + "epoch": 0.21181434599156118, + "grad_norm": 0.9016830921173096, + "learning_rate": 0.001351406327846287, + "loss": 1.7418, + "step": 2008 + }, + { + "epoch": 0.2119198312236287, + "grad_norm": 0.6705576181411743, + "learning_rate": 0.0013512562881202832, + "loss": 1.6827, + "step": 2009 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.8668855428695679, + "learning_rate": 0.0013511061810205143, + "loss": 1.6804, + "step": 2010 + }, + { + "epoch": 0.21213080168776371, + "grad_norm": 1.0481418371200562, + "learning_rate": 0.0013509560065638002, + "loss": 1.7182, + "step": 2011 + }, + { + "epoch": 0.21223628691983124, + "grad_norm": 0.7315172553062439, + "learning_rate": 0.001350805764766969, + "loss": 1.7205, + "step": 2012 + }, + { + "epoch": 0.21234177215189873, + "grad_norm": 0.8118634819984436, + "learning_rate": 0.0013506554556468558, + "loss": 1.7179, + "step": 2013 + }, + { + "epoch": 0.21244725738396625, + "grad_norm": 1.181410789489746, + "learning_rate": 0.001350505079220304, + "loss": 1.7147, + "step": 2014 + }, + { + "epoch": 0.21255274261603377, + "grad_norm": 0.7354782223701477, + "learning_rate": 0.0013503546355041636, + "loss": 1.7182, + "step": 2015 + }, + { + "epoch": 0.21265822784810126, + "grad_norm": 0.8045042753219604, + "learning_rate": 0.0013502041245152924, + "loss": 1.743, + "step": 2016 + }, + { + "epoch": 0.21276371308016878, + "grad_norm": 1.179741621017456, + "learning_rate": 0.0013500535462705565, + "loss": 1.7108, + "step": 2017 + }, + { + "epoch": 0.21286919831223627, + "grad_norm": 1.0224745273590088, + "learning_rate": 0.0013499029007868284, + "loss": 1.7146, + "step": 2018 + }, + { + "epoch": 0.2129746835443038, + "grad_norm": 0.6193869113922119, + "learning_rate": 0.0013497521880809888, + "loss": 1.7241, + "step": 2019 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 1.1506884098052979, + "learning_rate": 0.001349601408169926, + "loss": 1.7043, + "step": 2020 + }, + { + "epoch": 0.2131856540084388, + "grad_norm": 0.8990529179573059, + "learning_rate": 0.0013494505610705356, + "loss": 1.6933, + "step": 2021 + }, + { + "epoch": 0.21329113924050633, + "grad_norm": 0.6916031241416931, + "learning_rate": 0.0013492996467997205, + "loss": 1.6791, + "step": 2022 + }, + { + "epoch": 0.21339662447257385, + "grad_norm": 1.0579508543014526, + "learning_rate": 0.0013491486653743918, + "loss": 1.758, + "step": 2023 + }, + { + "epoch": 0.21350210970464134, + "grad_norm": 1.0333296060562134, + "learning_rate": 0.0013489976168114676, + "loss": 1.73, + "step": 2024 + }, + { + "epoch": 0.21360759493670886, + "grad_norm": 0.7353625297546387, + "learning_rate": 0.0013488465011278733, + "loss": 1.6642, + "step": 2025 + }, + { + "epoch": 0.21371308016877638, + "grad_norm": 0.6540570855140686, + "learning_rate": 0.0013486953183405425, + "loss": 1.6786, + "step": 2026 + }, + { + "epoch": 0.21381856540084387, + "grad_norm": 0.7348955869674683, + "learning_rate": 0.001348544068466416, + "loss": 1.6815, + "step": 2027 + }, + { + "epoch": 0.2139240506329114, + "grad_norm": 0.7371835112571716, + "learning_rate": 0.0013483927515224418, + "loss": 1.7175, + "step": 2028 + }, + { + "epoch": 0.21402953586497891, + "grad_norm": 0.7274240851402283, + "learning_rate": 0.0013482413675255762, + "loss": 1.7234, + "step": 2029 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.6917459964752197, + "learning_rate": 0.0013480899164927823, + "loss": 1.6869, + "step": 2030 + }, + { + "epoch": 0.21424050632911393, + "grad_norm": 0.6687062978744507, + "learning_rate": 0.0013479383984410305, + "loss": 1.7167, + "step": 2031 + }, + { + "epoch": 0.21434599156118145, + "grad_norm": 0.8059474229812622, + "learning_rate": 0.0013477868133873001, + "loss": 1.7045, + "step": 2032 + }, + { + "epoch": 0.21445147679324894, + "grad_norm": 0.7239556908607483, + "learning_rate": 0.0013476351613485762, + "loss": 1.6939, + "step": 2033 + }, + { + "epoch": 0.21455696202531646, + "grad_norm": 0.7880952954292297, + "learning_rate": 0.0013474834423418522, + "loss": 1.6911, + "step": 2034 + }, + { + "epoch": 0.21466244725738395, + "grad_norm": 0.735971212387085, + "learning_rate": 0.0013473316563841296, + "loss": 1.6901, + "step": 2035 + }, + { + "epoch": 0.21476793248945147, + "grad_norm": 0.6842973828315735, + "learning_rate": 0.0013471798034924158, + "loss": 1.7131, + "step": 2036 + }, + { + "epoch": 0.214873417721519, + "grad_norm": 0.8862730860710144, + "learning_rate": 0.0013470278836837275, + "loss": 1.6861, + "step": 2037 + }, + { + "epoch": 0.2149789029535865, + "grad_norm": 0.9055429697036743, + "learning_rate": 0.001346875896975088, + "loss": 1.7011, + "step": 2038 + }, + { + "epoch": 0.215084388185654, + "grad_norm": 0.7255764007568359, + "learning_rate": 0.0013467238433835277, + "loss": 1.6988, + "step": 2039 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.7462263703346252, + "learning_rate": 0.0013465717229260853, + "loss": 1.7177, + "step": 2040 + }, + { + "epoch": 0.21529535864978902, + "grad_norm": 0.7240472435951233, + "learning_rate": 0.0013464195356198065, + "loss": 1.689, + "step": 2041 + }, + { + "epoch": 0.21540084388185654, + "grad_norm": 0.7087171673774719, + "learning_rate": 0.0013462672814817445, + "loss": 1.6791, + "step": 2042 + }, + { + "epoch": 0.21550632911392406, + "grad_norm": 0.7705201506614685, + "learning_rate": 0.0013461149605289607, + "loss": 1.7177, + "step": 2043 + }, + { + "epoch": 0.21561181434599155, + "grad_norm": 0.7767625451087952, + "learning_rate": 0.001345962572778523, + "loss": 1.7445, + "step": 2044 + }, + { + "epoch": 0.21571729957805907, + "grad_norm": 0.7874317169189453, + "learning_rate": 0.0013458101182475073, + "loss": 1.6856, + "step": 2045 + }, + { + "epoch": 0.2158227848101266, + "grad_norm": 0.787982702255249, + "learning_rate": 0.0013456575969529967, + "loss": 1.7168, + "step": 2046 + }, + { + "epoch": 0.2159282700421941, + "grad_norm": 0.7708723545074463, + "learning_rate": 0.001345505008912082, + "loss": 1.68, + "step": 2047 + }, + { + "epoch": 0.2160337552742616, + "grad_norm": 1.0203640460968018, + "learning_rate": 0.0013453523541418623, + "loss": 1.6974, + "step": 2048 + }, + { + "epoch": 0.21613924050632913, + "grad_norm": 1.4832055568695068, + "learning_rate": 0.001345199632659442, + "loss": 1.7087, + "step": 2049 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.7408728003501892, + "learning_rate": 0.001345046844481935, + "loss": 1.7136, + "step": 2050 + }, + { + "epoch": 0.21635021097046414, + "grad_norm": 1.540625810623169, + "learning_rate": 0.0013448939896264622, + "loss": 1.7308, + "step": 2051 + }, + { + "epoch": 0.21645569620253163, + "grad_norm": 0.7784832119941711, + "learning_rate": 0.001344741068110151, + "loss": 1.7407, + "step": 2052 + }, + { + "epoch": 0.21656118143459915, + "grad_norm": 0.9747315645217896, + "learning_rate": 0.001344588079950138, + "loss": 1.6941, + "step": 2053 + }, + { + "epoch": 0.21666666666666667, + "grad_norm": 0.8125371336936951, + "learning_rate": 0.0013444350251635654, + "loss": 1.7029, + "step": 2054 + }, + { + "epoch": 0.21677215189873417, + "grad_norm": 0.830982506275177, + "learning_rate": 0.0013442819037675843, + "loss": 1.7073, + "step": 2055 + }, + { + "epoch": 0.2168776371308017, + "grad_norm": 0.7414536476135254, + "learning_rate": 0.0013441287157793522, + "loss": 1.7077, + "step": 2056 + }, + { + "epoch": 0.2169831223628692, + "grad_norm": 0.7634840607643127, + "learning_rate": 0.0013439754612160353, + "loss": 1.686, + "step": 2057 + }, + { + "epoch": 0.2170886075949367, + "grad_norm": 0.6957299709320068, + "learning_rate": 0.001343822140094806, + "loss": 1.6598, + "step": 2058 + }, + { + "epoch": 0.21719409282700422, + "grad_norm": 0.7677130699157715, + "learning_rate": 0.0013436687524328449, + "loss": 1.6634, + "step": 2059 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.6584020256996155, + "learning_rate": 0.0013435152982473396, + "loss": 1.6642, + "step": 2060 + }, + { + "epoch": 0.21740506329113923, + "grad_norm": 0.72737717628479, + "learning_rate": 0.0013433617775554854, + "loss": 1.6886, + "step": 2061 + }, + { + "epoch": 0.21751054852320675, + "grad_norm": 0.7139946222305298, + "learning_rate": 0.0013432081903744857, + "loss": 1.7199, + "step": 2062 + }, + { + "epoch": 0.21761603375527427, + "grad_norm": 0.9219478368759155, + "learning_rate": 0.00134305453672155, + "loss": 1.6746, + "step": 2063 + }, + { + "epoch": 0.21772151898734177, + "grad_norm": 0.7889354825019836, + "learning_rate": 0.0013429008166138965, + "loss": 1.6676, + "step": 2064 + }, + { + "epoch": 0.2178270042194093, + "grad_norm": 0.6869561672210693, + "learning_rate": 0.0013427470300687498, + "loss": 1.6723, + "step": 2065 + }, + { + "epoch": 0.21793248945147678, + "grad_norm": 0.6832531690597534, + "learning_rate": 0.0013425931771033426, + "loss": 1.719, + "step": 2066 + }, + { + "epoch": 0.2180379746835443, + "grad_norm": 0.6725070476531982, + "learning_rate": 0.0013424392577349152, + "loss": 1.6669, + "step": 2067 + }, + { + "epoch": 0.21814345991561182, + "grad_norm": 0.7334038019180298, + "learning_rate": 0.001342285271980715, + "loss": 1.7039, + "step": 2068 + }, + { + "epoch": 0.2182489451476793, + "grad_norm": 0.6690826416015625, + "learning_rate": 0.0013421312198579963, + "loss": 1.7103, + "step": 2069 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.6701409816741943, + "learning_rate": 0.0013419771013840217, + "loss": 1.6897, + "step": 2070 + }, + { + "epoch": 0.21845991561181435, + "grad_norm": 0.6492443680763245, + "learning_rate": 0.0013418229165760613, + "loss": 1.7179, + "step": 2071 + }, + { + "epoch": 0.21856540084388185, + "grad_norm": 0.7301512360572815, + "learning_rate": 0.001341668665451392, + "loss": 1.6895, + "step": 2072 + }, + { + "epoch": 0.21867088607594937, + "grad_norm": 0.7302676439285278, + "learning_rate": 0.0013415143480272982, + "loss": 1.707, + "step": 2073 + }, + { + "epoch": 0.2187763713080169, + "grad_norm": 0.6876764297485352, + "learning_rate": 0.0013413599643210723, + "loss": 1.6833, + "step": 2074 + }, + { + "epoch": 0.21888185654008438, + "grad_norm": 0.6440564393997192, + "learning_rate": 0.0013412055143500136, + "loss": 1.6777, + "step": 2075 + }, + { + "epoch": 0.2189873417721519, + "grad_norm": 0.6523862481117249, + "learning_rate": 0.001341050998131429, + "loss": 1.6625, + "step": 2076 + }, + { + "epoch": 0.21909282700421942, + "grad_norm": 0.6346778273582458, + "learning_rate": 0.0013408964156826327, + "loss": 1.7095, + "step": 2077 + }, + { + "epoch": 0.2191983122362869, + "grad_norm": 0.6939264535903931, + "learning_rate": 0.0013407417670209467, + "loss": 1.6732, + "step": 2078 + }, + { + "epoch": 0.21930379746835443, + "grad_norm": 0.7111228704452515, + "learning_rate": 0.0013405870521636999, + "loss": 1.7146, + "step": 2079 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.6867181658744812, + "learning_rate": 0.001340432271128229, + "loss": 1.7173, + "step": 2080 + }, + { + "epoch": 0.21951476793248945, + "grad_norm": 0.9025027751922607, + "learning_rate": 0.001340277423931878, + "loss": 1.6849, + "step": 2081 + }, + { + "epoch": 0.21962025316455697, + "grad_norm": 1.341454267501831, + "learning_rate": 0.0013401225105919982, + "loss": 1.6835, + "step": 2082 + }, + { + "epoch": 0.21972573839662446, + "grad_norm": 0.9771789312362671, + "learning_rate": 0.0013399675311259484, + "loss": 1.6727, + "step": 2083 + }, + { + "epoch": 0.21983122362869198, + "grad_norm": 0.8870276212692261, + "learning_rate": 0.0013398124855510951, + "loss": 1.6864, + "step": 2084 + }, + { + "epoch": 0.2199367088607595, + "grad_norm": 0.7793949842453003, + "learning_rate": 0.0013396573738848115, + "loss": 1.7006, + "step": 2085 + }, + { + "epoch": 0.220042194092827, + "grad_norm": 1.0531845092773438, + "learning_rate": 0.001339502196144479, + "loss": 1.653, + "step": 2086 + }, + { + "epoch": 0.2201476793248945, + "grad_norm": 1.1238453388214111, + "learning_rate": 0.0013393469523474858, + "loss": 1.7424, + "step": 2087 + }, + { + "epoch": 0.22025316455696203, + "grad_norm": 1.0602532625198364, + "learning_rate": 0.001339191642511228, + "loss": 1.6573, + "step": 2088 + }, + { + "epoch": 0.22035864978902953, + "grad_norm": 0.8835633397102356, + "learning_rate": 0.0013390362666531085, + "loss": 1.7048, + "step": 2089 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.7268829345703125, + "learning_rate": 0.0013388808247905381, + "loss": 1.7109, + "step": 2090 + }, + { + "epoch": 0.22056962025316457, + "grad_norm": 0.8352702856063843, + "learning_rate": 0.0013387253169409351, + "loss": 1.6942, + "step": 2091 + }, + { + "epoch": 0.22067510548523206, + "grad_norm": 0.8249679803848267, + "learning_rate": 0.0013385697431217247, + "loss": 1.7031, + "step": 2092 + }, + { + "epoch": 0.22078059071729958, + "grad_norm": 0.7727959156036377, + "learning_rate": 0.0013384141033503394, + "loss": 1.6843, + "step": 2093 + }, + { + "epoch": 0.2208860759493671, + "grad_norm": 0.767487645149231, + "learning_rate": 0.0013382583976442198, + "loss": 1.6903, + "step": 2094 + }, + { + "epoch": 0.2209915611814346, + "grad_norm": 0.6412201523780823, + "learning_rate": 0.0013381026260208136, + "loss": 1.637, + "step": 2095 + }, + { + "epoch": 0.2210970464135021, + "grad_norm": 0.7857904434204102, + "learning_rate": 0.0013379467884975756, + "loss": 1.7005, + "step": 2096 + }, + { + "epoch": 0.22120253164556963, + "grad_norm": 0.7897433638572693, + "learning_rate": 0.001337790885091968, + "loss": 1.7003, + "step": 2097 + }, + { + "epoch": 0.22130801687763713, + "grad_norm": 0.708665132522583, + "learning_rate": 0.0013376349158214609, + "loss": 1.7267, + "step": 2098 + }, + { + "epoch": 0.22141350210970465, + "grad_norm": 0.7874838709831238, + "learning_rate": 0.0013374788807035314, + "loss": 1.7378, + "step": 2099 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.8761564493179321, + "learning_rate": 0.0013373227797556634, + "loss": 1.6927, + "step": 2100 + }, + { + "epoch": 0.22162447257383966, + "grad_norm": 0.7902648448944092, + "learning_rate": 0.0013371666129953497, + "loss": 1.683, + "step": 2101 + }, + { + "epoch": 0.22172995780590718, + "grad_norm": 0.7585179805755615, + "learning_rate": 0.0013370103804400887, + "loss": 1.7248, + "step": 2102 + }, + { + "epoch": 0.22183544303797467, + "grad_norm": 0.8421981930732727, + "learning_rate": 0.001336854082107388, + "loss": 1.6862, + "step": 2103 + }, + { + "epoch": 0.2219409282700422, + "grad_norm": 0.9493434429168701, + "learning_rate": 0.001336697718014761, + "loss": 1.6902, + "step": 2104 + }, + { + "epoch": 0.2220464135021097, + "grad_norm": 0.8141738176345825, + "learning_rate": 0.001336541288179729, + "loss": 1.7074, + "step": 2105 + }, + { + "epoch": 0.2221518987341772, + "grad_norm": 0.7914167046546936, + "learning_rate": 0.0013363847926198208, + "loss": 1.6618, + "step": 2106 + }, + { + "epoch": 0.22225738396624473, + "grad_norm": 0.8910229802131653, + "learning_rate": 0.0013362282313525728, + "loss": 1.6856, + "step": 2107 + }, + { + "epoch": 0.22236286919831225, + "grad_norm": 1.047402024269104, + "learning_rate": 0.001336071604395528, + "loss": 1.6663, + "step": 2108 + }, + { + "epoch": 0.22246835443037974, + "grad_norm": 0.8325074911117554, + "learning_rate": 0.0013359149117662377, + "loss": 1.6834, + "step": 2109 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.7217932939529419, + "learning_rate": 0.00133575815348226, + "loss": 1.6918, + "step": 2110 + }, + { + "epoch": 0.22267932489451478, + "grad_norm": 0.8378740549087524, + "learning_rate": 0.0013356013295611603, + "loss": 1.6478, + "step": 2111 + }, + { + "epoch": 0.22278481012658227, + "grad_norm": 1.2772005796432495, + "learning_rate": 0.0013354444400205114, + "loss": 1.6958, + "step": 2112 + }, + { + "epoch": 0.2228902953586498, + "grad_norm": 0.8510180711746216, + "learning_rate": 0.0013352874848778938, + "loss": 1.7147, + "step": 2113 + }, + { + "epoch": 0.2229957805907173, + "grad_norm": 0.7494268417358398, + "learning_rate": 0.0013351304641508951, + "loss": 1.6847, + "step": 2114 + }, + { + "epoch": 0.2231012658227848, + "grad_norm": 0.8048422336578369, + "learning_rate": 0.0013349733778571101, + "loss": 1.695, + "step": 2115 + }, + { + "epoch": 0.22320675105485233, + "grad_norm": 0.9114487767219543, + "learning_rate": 0.0013348162260141412, + "loss": 1.6906, + "step": 2116 + }, + { + "epoch": 0.22331223628691982, + "grad_norm": 0.7489985823631287, + "learning_rate": 0.001334659008639598, + "loss": 1.6983, + "step": 2117 + }, + { + "epoch": 0.22341772151898734, + "grad_norm": 0.8249994516372681, + "learning_rate": 0.0013345017257510975, + "loss": 1.6848, + "step": 2118 + }, + { + "epoch": 0.22352320675105486, + "grad_norm": 0.9318634271621704, + "learning_rate": 0.001334344377366264, + "loss": 1.7119, + "step": 2119 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 1.097084403038025, + "learning_rate": 0.0013341869635027292, + "loss": 1.677, + "step": 2120 + }, + { + "epoch": 0.22373417721518987, + "grad_norm": 1.2067053318023682, + "learning_rate": 0.0013340294841781323, + "loss": 1.6909, + "step": 2121 + }, + { + "epoch": 0.2238396624472574, + "grad_norm": 0.6951210498809814, + "learning_rate": 0.0013338719394101193, + "loss": 1.6525, + "step": 2122 + }, + { + "epoch": 0.22394514767932489, + "grad_norm": 0.8217737674713135, + "learning_rate": 0.001333714329216344, + "loss": 1.7248, + "step": 2123 + }, + { + "epoch": 0.2240506329113924, + "grad_norm": 0.9890689253807068, + "learning_rate": 0.0013335566536144675, + "loss": 1.6871, + "step": 2124 + }, + { + "epoch": 0.22415611814345993, + "grad_norm": 0.9237351417541504, + "learning_rate": 0.0013333989126221581, + "loss": 1.6676, + "step": 2125 + }, + { + "epoch": 0.22426160337552742, + "grad_norm": 0.6882431507110596, + "learning_rate": 0.0013332411062570914, + "loss": 1.6716, + "step": 2126 + }, + { + "epoch": 0.22436708860759494, + "grad_norm": 0.8858751654624939, + "learning_rate": 0.0013330832345369505, + "loss": 1.7272, + "step": 2127 + }, + { + "epoch": 0.22447257383966246, + "grad_norm": 1.0885907411575317, + "learning_rate": 0.0013329252974794256, + "loss": 1.671, + "step": 2128 + }, + { + "epoch": 0.22457805907172995, + "grad_norm": 0.7304031252861023, + "learning_rate": 0.0013327672951022145, + "loss": 1.7097, + "step": 2129 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.7143399119377136, + "learning_rate": 0.001332609227423022, + "loss": 1.6852, + "step": 2130 + }, + { + "epoch": 0.224789029535865, + "grad_norm": 0.9987714886665344, + "learning_rate": 0.0013324510944595605, + "loss": 1.6589, + "step": 2131 + }, + { + "epoch": 0.22489451476793249, + "grad_norm": 1.121476650238037, + "learning_rate": 0.0013322928962295492, + "loss": 1.6334, + "step": 2132 + }, + { + "epoch": 0.225, + "grad_norm": 0.7422921061515808, + "learning_rate": 0.0013321346327507158, + "loss": 1.6616, + "step": 2133 + }, + { + "epoch": 0.2251054852320675, + "grad_norm": 0.828737199306488, + "learning_rate": 0.0013319763040407938, + "loss": 1.7075, + "step": 2134 + }, + { + "epoch": 0.22521097046413502, + "grad_norm": 0.9609590172767639, + "learning_rate": 0.0013318179101175246, + "loss": 1.6884, + "step": 2135 + }, + { + "epoch": 0.22531645569620254, + "grad_norm": 0.7084512114524841, + "learning_rate": 0.0013316594509986577, + "loss": 1.7129, + "step": 2136 + }, + { + "epoch": 0.22542194092827003, + "grad_norm": 0.7091960906982422, + "learning_rate": 0.0013315009267019487, + "loss": 1.6718, + "step": 2137 + }, + { + "epoch": 0.22552742616033755, + "grad_norm": 0.7956909537315369, + "learning_rate": 0.0013313423372451614, + "loss": 1.6793, + "step": 2138 + }, + { + "epoch": 0.22563291139240507, + "grad_norm": 0.766071081161499, + "learning_rate": 0.0013311836826460665, + "loss": 1.706, + "step": 2139 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6688433289527893, + "learning_rate": 0.0013310249629224417, + "loss": 1.7049, + "step": 2140 + }, + { + "epoch": 0.22584388185654009, + "grad_norm": 0.6819438934326172, + "learning_rate": 0.0013308661780920728, + "loss": 1.677, + "step": 2141 + }, + { + "epoch": 0.2259493670886076, + "grad_norm": 0.6818499565124512, + "learning_rate": 0.0013307073281727518, + "loss": 1.6501, + "step": 2142 + }, + { + "epoch": 0.2260548523206751, + "grad_norm": 0.6854677200317383, + "learning_rate": 0.0013305484131822792, + "loss": 1.6817, + "step": 2143 + }, + { + "epoch": 0.22616033755274262, + "grad_norm": 0.6594569087028503, + "learning_rate": 0.001330389433138462, + "loss": 1.7384, + "step": 2144 + }, + { + "epoch": 0.22626582278481014, + "grad_norm": 0.6857029795646667, + "learning_rate": 0.0013302303880591147, + "loss": 1.7055, + "step": 2145 + }, + { + "epoch": 0.22637130801687763, + "grad_norm": 0.723915696144104, + "learning_rate": 0.0013300712779620593, + "loss": 1.682, + "step": 2146 + }, + { + "epoch": 0.22647679324894515, + "grad_norm": 0.8507179021835327, + "learning_rate": 0.0013299121028651246, + "loss": 1.7177, + "step": 2147 + }, + { + "epoch": 0.22658227848101264, + "grad_norm": 0.8650104403495789, + "learning_rate": 0.001329752862786147, + "loss": 1.7122, + "step": 2148 + }, + { + "epoch": 0.22668776371308016, + "grad_norm": 0.7042638063430786, + "learning_rate": 0.0013295935577429703, + "loss": 1.6747, + "step": 2149 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.712619960308075, + "learning_rate": 0.0013294341877534454, + "loss": 1.6619, + "step": 2150 + }, + { + "epoch": 0.22689873417721518, + "grad_norm": 0.6927481889724731, + "learning_rate": 0.0013292747528354304, + "loss": 1.7354, + "step": 2151 + }, + { + "epoch": 0.2270042194092827, + "grad_norm": 0.657899022102356, + "learning_rate": 0.0013291152530067907, + "loss": 1.7428, + "step": 2152 + }, + { + "epoch": 0.22710970464135022, + "grad_norm": 0.6894488334655762, + "learning_rate": 0.0013289556882853993, + "loss": 1.6687, + "step": 2153 + }, + { + "epoch": 0.2272151898734177, + "grad_norm": 0.7863173484802246, + "learning_rate": 0.0013287960586891362, + "loss": 1.6978, + "step": 2154 + }, + { + "epoch": 0.22732067510548523, + "grad_norm": 0.871900200843811, + "learning_rate": 0.0013286363642358884, + "loss": 1.7002, + "step": 2155 + }, + { + "epoch": 0.22742616033755275, + "grad_norm": 0.772587239742279, + "learning_rate": 0.0013284766049435504, + "loss": 1.6713, + "step": 2156 + }, + { + "epoch": 0.22753164556962024, + "grad_norm": 0.8684716820716858, + "learning_rate": 0.0013283167808300247, + "loss": 1.6997, + "step": 2157 + }, + { + "epoch": 0.22763713080168776, + "grad_norm": 1.0030254125595093, + "learning_rate": 0.0013281568919132198, + "loss": 1.6564, + "step": 2158 + }, + { + "epoch": 0.22774261603375529, + "grad_norm": 0.7432823777198792, + "learning_rate": 0.0013279969382110524, + "loss": 1.7371, + "step": 2159 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.7450642585754395, + "learning_rate": 0.0013278369197414458, + "loss": 1.6897, + "step": 2160 + }, + { + "epoch": 0.2279535864978903, + "grad_norm": 0.9178093075752258, + "learning_rate": 0.0013276768365223306, + "loss": 1.6904, + "step": 2161 + }, + { + "epoch": 0.22805907172995782, + "grad_norm": 1.0636448860168457, + "learning_rate": 0.0013275166885716458, + "loss": 1.6723, + "step": 2162 + }, + { + "epoch": 0.2281645569620253, + "grad_norm": 0.8251701593399048, + "learning_rate": 0.0013273564759073361, + "loss": 1.6897, + "step": 2163 + }, + { + "epoch": 0.22827004219409283, + "grad_norm": 0.7345632910728455, + "learning_rate": 0.0013271961985473544, + "loss": 1.6683, + "step": 2164 + }, + { + "epoch": 0.22837552742616032, + "grad_norm": 1.177236795425415, + "learning_rate": 0.0013270358565096606, + "loss": 1.6489, + "step": 2165 + }, + { + "epoch": 0.22848101265822784, + "grad_norm": 0.9681594967842102, + "learning_rate": 0.0013268754498122215, + "loss": 1.7254, + "step": 2166 + }, + { + "epoch": 0.22858649789029536, + "grad_norm": 0.7249543070793152, + "learning_rate": 0.0013267149784730117, + "loss": 1.7312, + "step": 2167 + }, + { + "epoch": 0.22869198312236286, + "grad_norm": 1.1086076498031616, + "learning_rate": 0.0013265544425100128, + "loss": 1.715, + "step": 2168 + }, + { + "epoch": 0.22879746835443038, + "grad_norm": 1.275010108947754, + "learning_rate": 0.0013263938419412137, + "loss": 1.6805, + "step": 2169 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.7401876449584961, + "learning_rate": 0.0013262331767846104, + "loss": 1.6843, + "step": 2170 + }, + { + "epoch": 0.2290084388185654, + "grad_norm": 0.7918276190757751, + "learning_rate": 0.0013260724470582064, + "loss": 1.6704, + "step": 2171 + }, + { + "epoch": 0.2291139240506329, + "grad_norm": 0.9838792085647583, + "learning_rate": 0.001325911652780012, + "loss": 1.6602, + "step": 2172 + }, + { + "epoch": 0.22921940928270043, + "grad_norm": 0.8524264693260193, + "learning_rate": 0.0013257507939680453, + "loss": 1.6469, + "step": 2173 + }, + { + "epoch": 0.22932489451476792, + "grad_norm": 0.716371476650238, + "learning_rate": 0.0013255898706403312, + "loss": 1.6582, + "step": 2174 + }, + { + "epoch": 0.22943037974683544, + "grad_norm": 0.7201961874961853, + "learning_rate": 0.001325428882814902, + "loss": 1.6575, + "step": 2175 + }, + { + "epoch": 0.22953586497890296, + "grad_norm": 0.804246723651886, + "learning_rate": 0.001325267830509797, + "loss": 1.6851, + "step": 2176 + }, + { + "epoch": 0.22964135021097046, + "grad_norm": 0.9099164605140686, + "learning_rate": 0.0013251067137430629, + "loss": 1.6589, + "step": 2177 + }, + { + "epoch": 0.22974683544303798, + "grad_norm": 0.9651268720626831, + "learning_rate": 0.001324945532532754, + "loss": 1.6894, + "step": 2178 + }, + { + "epoch": 0.2298523206751055, + "grad_norm": 0.9788320064544678, + "learning_rate": 0.0013247842868969312, + "loss": 1.6925, + "step": 2179 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.8352435231208801, + "learning_rate": 0.0013246229768536628, + "loss": 1.6866, + "step": 2180 + }, + { + "epoch": 0.2300632911392405, + "grad_norm": 0.6915081143379211, + "learning_rate": 0.0013244616024210246, + "loss": 1.7045, + "step": 2181 + }, + { + "epoch": 0.230168776371308, + "grad_norm": 0.9020565748214722, + "learning_rate": 0.0013243001636170993, + "loss": 1.6948, + "step": 2182 + }, + { + "epoch": 0.23027426160337552, + "grad_norm": 0.8535006046295166, + "learning_rate": 0.0013241386604599772, + "loss": 1.7048, + "step": 2183 + }, + { + "epoch": 0.23037974683544304, + "grad_norm": 0.8151533603668213, + "learning_rate": 0.001323977092967755, + "loss": 1.6844, + "step": 2184 + }, + { + "epoch": 0.23048523206751054, + "grad_norm": 0.7210182547569275, + "learning_rate": 0.0013238154611585375, + "loss": 1.6589, + "step": 2185 + }, + { + "epoch": 0.23059071729957806, + "grad_norm": 0.841837465763092, + "learning_rate": 0.0013236537650504361, + "loss": 1.7067, + "step": 2186 + }, + { + "epoch": 0.23069620253164558, + "grad_norm": 0.7271419167518616, + "learning_rate": 0.00132349200466157, + "loss": 1.6547, + "step": 2187 + }, + { + "epoch": 0.23080168776371307, + "grad_norm": 0.7574219703674316, + "learning_rate": 0.0013233301800100652, + "loss": 1.6828, + "step": 2188 + }, + { + "epoch": 0.2309071729957806, + "grad_norm": 0.6297644972801208, + "learning_rate": 0.0013231682911140545, + "loss": 1.6892, + "step": 2189 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.6509144306182861, + "learning_rate": 0.001323006337991679, + "loss": 1.687, + "step": 2190 + }, + { + "epoch": 0.2311181434599156, + "grad_norm": 0.6991715431213379, + "learning_rate": 0.0013228443206610861, + "loss": 1.6879, + "step": 2191 + }, + { + "epoch": 0.23122362869198312, + "grad_norm": 0.7651558518409729, + "learning_rate": 0.0013226822391404305, + "loss": 1.6837, + "step": 2192 + }, + { + "epoch": 0.23132911392405064, + "grad_norm": 0.711936891078949, + "learning_rate": 0.0013225200934478744, + "loss": 1.734, + "step": 2193 + }, + { + "epoch": 0.23143459915611814, + "grad_norm": 0.6811397075653076, + "learning_rate": 0.0013223578836015868, + "loss": 1.7158, + "step": 2194 + }, + { + "epoch": 0.23154008438818566, + "grad_norm": 0.7964632511138916, + "learning_rate": 0.0013221956096197446, + "loss": 1.6651, + "step": 2195 + }, + { + "epoch": 0.23164556962025318, + "grad_norm": 0.7923198938369751, + "learning_rate": 0.001322033271520531, + "loss": 1.6596, + "step": 2196 + }, + { + "epoch": 0.23175105485232067, + "grad_norm": 0.8104314804077148, + "learning_rate": 0.001321870869322137, + "loss": 1.6795, + "step": 2197 + }, + { + "epoch": 0.2318565400843882, + "grad_norm": 0.6918461322784424, + "learning_rate": 0.0013217084030427604, + "loss": 1.658, + "step": 2198 + }, + { + "epoch": 0.23196202531645568, + "grad_norm": 1.0207080841064453, + "learning_rate": 0.0013215458727006064, + "loss": 1.6639, + "step": 2199 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 1.0670534372329712, + "learning_rate": 0.0013213832783138873, + "loss": 1.7276, + "step": 2200 + }, + { + "epoch": 0.23217299578059072, + "grad_norm": 0.7524067163467407, + "learning_rate": 0.0013212206199008226, + "loss": 1.6805, + "step": 2201 + }, + { + "epoch": 0.23227848101265822, + "grad_norm": 0.6623596549034119, + "learning_rate": 0.0013210578974796393, + "loss": 1.6876, + "step": 2202 + }, + { + "epoch": 0.23238396624472574, + "grad_norm": 0.7034754157066345, + "learning_rate": 0.001320895111068571, + "loss": 1.6665, + "step": 2203 + }, + { + "epoch": 0.23248945147679326, + "grad_norm": 0.693537175655365, + "learning_rate": 0.0013207322606858588, + "loss": 1.6628, + "step": 2204 + }, + { + "epoch": 0.23259493670886075, + "grad_norm": 0.6720616221427917, + "learning_rate": 0.001320569346349751, + "loss": 1.6208, + "step": 2205 + }, + { + "epoch": 0.23270042194092827, + "grad_norm": 0.7410646080970764, + "learning_rate": 0.0013204063680785025, + "loss": 1.7246, + "step": 2206 + }, + { + "epoch": 0.2328059071729958, + "grad_norm": 0.8171406388282776, + "learning_rate": 0.0013202433258903761, + "loss": 1.6628, + "step": 2207 + }, + { + "epoch": 0.23291139240506328, + "grad_norm": 0.8520695567131042, + "learning_rate": 0.001320080219803642, + "loss": 1.6801, + "step": 2208 + }, + { + "epoch": 0.2330168776371308, + "grad_norm": 0.8768017888069153, + "learning_rate": 0.0013199170498365764, + "loss": 1.6581, + "step": 2209 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.7640048265457153, + "learning_rate": 0.0013197538160074633, + "loss": 1.6817, + "step": 2210 + }, + { + "epoch": 0.23322784810126582, + "grad_norm": 0.7040453553199768, + "learning_rate": 0.0013195905183345943, + "loss": 1.6711, + "step": 2211 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 1.0890722274780273, + "learning_rate": 0.0013194271568362673, + "loss": 1.6936, + "step": 2212 + }, + { + "epoch": 0.23343881856540086, + "grad_norm": 0.9560673832893372, + "learning_rate": 0.001319263731530788, + "loss": 1.6705, + "step": 2213 + }, + { + "epoch": 0.23354430379746835, + "grad_norm": 0.6959980726242065, + "learning_rate": 0.0013191002424364693, + "loss": 1.6831, + "step": 2214 + }, + { + "epoch": 0.23364978902953587, + "grad_norm": 0.6521531343460083, + "learning_rate": 0.0013189366895716302, + "loss": 1.7045, + "step": 2215 + }, + { + "epoch": 0.23375527426160336, + "grad_norm": 0.7520129084587097, + "learning_rate": 0.0013187730729545982, + "loss": 1.6762, + "step": 2216 + }, + { + "epoch": 0.23386075949367088, + "grad_norm": 0.671442985534668, + "learning_rate": 0.0013186093926037072, + "loss": 1.6879, + "step": 2217 + }, + { + "epoch": 0.2339662447257384, + "grad_norm": 0.7404191493988037, + "learning_rate": 0.0013184456485372986, + "loss": 1.702, + "step": 2218 + }, + { + "epoch": 0.2340717299578059, + "grad_norm": 1.016361117362976, + "learning_rate": 0.0013182818407737203, + "loss": 1.6671, + "step": 2219 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.9428738355636597, + "learning_rate": 0.0013181179693313283, + "loss": 1.7032, + "step": 2220 + }, + { + "epoch": 0.23428270042194094, + "grad_norm": 0.6993369460105896, + "learning_rate": 0.0013179540342284847, + "loss": 1.6709, + "step": 2221 + }, + { + "epoch": 0.23438818565400843, + "grad_norm": 0.949020266532898, + "learning_rate": 0.0013177900354835598, + "loss": 1.6421, + "step": 2222 + }, + { + "epoch": 0.23449367088607595, + "grad_norm": 1.1590007543563843, + "learning_rate": 0.00131762597311493, + "loss": 1.6757, + "step": 2223 + }, + { + "epoch": 0.23459915611814347, + "grad_norm": 0.7403213381767273, + "learning_rate": 0.0013174618471409793, + "loss": 1.7083, + "step": 2224 + }, + { + "epoch": 0.23470464135021096, + "grad_norm": 0.808319628238678, + "learning_rate": 0.0013172976575800991, + "loss": 1.6716, + "step": 2225 + }, + { + "epoch": 0.23481012658227848, + "grad_norm": 1.0501219034194946, + "learning_rate": 0.0013171334044506878, + "loss": 1.7029, + "step": 2226 + }, + { + "epoch": 0.234915611814346, + "grad_norm": 1.224710464477539, + "learning_rate": 0.0013169690877711502, + "loss": 1.6692, + "step": 2227 + }, + { + "epoch": 0.2350210970464135, + "grad_norm": 0.8451716899871826, + "learning_rate": 0.0013168047075598993, + "loss": 1.6818, + "step": 2228 + }, + { + "epoch": 0.23512658227848102, + "grad_norm": 0.7384949326515198, + "learning_rate": 0.0013166402638353548, + "loss": 1.6923, + "step": 2229 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 1.1804237365722656, + "learning_rate": 0.0013164757566159428, + "loss": 1.6689, + "step": 2230 + }, + { + "epoch": 0.23533755274261603, + "grad_norm": 0.7365193963050842, + "learning_rate": 0.0013163111859200978, + "loss": 1.6698, + "step": 2231 + }, + { + "epoch": 0.23544303797468355, + "grad_norm": 1.0361509323120117, + "learning_rate": 0.0013161465517662603, + "loss": 1.681, + "step": 2232 + }, + { + "epoch": 0.23554852320675104, + "grad_norm": 1.0377280712127686, + "learning_rate": 0.001315981854172879, + "loss": 1.7057, + "step": 2233 + }, + { + "epoch": 0.23565400843881856, + "grad_norm": 0.9205273389816284, + "learning_rate": 0.0013158170931584084, + "loss": 1.6724, + "step": 2234 + }, + { + "epoch": 0.23575949367088608, + "grad_norm": 0.6692016124725342, + "learning_rate": 0.0013156522687413114, + "loss": 1.6455, + "step": 2235 + }, + { + "epoch": 0.23586497890295358, + "grad_norm": 0.8016514778137207, + "learning_rate": 0.0013154873809400568, + "loss": 1.6665, + "step": 2236 + }, + { + "epoch": 0.2359704641350211, + "grad_norm": 0.9313954710960388, + "learning_rate": 0.0013153224297731215, + "loss": 1.6475, + "step": 2237 + }, + { + "epoch": 0.23607594936708862, + "grad_norm": 0.7346334457397461, + "learning_rate": 0.0013151574152589888, + "loss": 1.6958, + "step": 2238 + }, + { + "epoch": 0.2361814345991561, + "grad_norm": 0.7934582829475403, + "learning_rate": 0.00131499233741615, + "loss": 1.6621, + "step": 2239 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 1.1093356609344482, + "learning_rate": 0.001314827196263102, + "loss": 1.673, + "step": 2240 + }, + { + "epoch": 0.23639240506329115, + "grad_norm": 0.8811218738555908, + "learning_rate": 0.0013146619918183507, + "loss": 1.6664, + "step": 2241 + }, + { + "epoch": 0.23649789029535864, + "grad_norm": 0.819142758846283, + "learning_rate": 0.0013144967241004073, + "loss": 1.645, + "step": 2242 + }, + { + "epoch": 0.23660337552742616, + "grad_norm": 1.5083472728729248, + "learning_rate": 0.001314331393127791, + "loss": 1.6557, + "step": 2243 + }, + { + "epoch": 0.23670886075949368, + "grad_norm": 0.8308227062225342, + "learning_rate": 0.0013141659989190282, + "loss": 1.6639, + "step": 2244 + }, + { + "epoch": 0.23681434599156118, + "grad_norm": 0.7359859943389893, + "learning_rate": 0.001314000541492652, + "loss": 1.6764, + "step": 2245 + }, + { + "epoch": 0.2369198312236287, + "grad_norm": 0.8649410605430603, + "learning_rate": 0.0013138350208672029, + "loss": 1.7363, + "step": 2246 + }, + { + "epoch": 0.2370253164556962, + "grad_norm": 0.9741116762161255, + "learning_rate": 0.001313669437061228, + "loss": 1.6435, + "step": 2247 + }, + { + "epoch": 0.2371308016877637, + "grad_norm": 0.7931279540061951, + "learning_rate": 0.0013135037900932822, + "loss": 1.6999, + "step": 2248 + }, + { + "epoch": 0.23723628691983123, + "grad_norm": 1.0431009531021118, + "learning_rate": 0.0013133380799819267, + "loss": 1.7028, + "step": 2249 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 1.084334135055542, + "learning_rate": 0.0013131723067457302, + "loss": 1.6709, + "step": 2250 + }, + { + "epoch": 0.23744725738396624, + "grad_norm": 1.1444836854934692, + "learning_rate": 0.0013130064704032684, + "loss": 1.6576, + "step": 2251 + }, + { + "epoch": 0.23755274261603376, + "grad_norm": 0.7079692482948303, + "learning_rate": 0.0013128405709731245, + "loss": 1.6717, + "step": 2252 + }, + { + "epoch": 0.23765822784810126, + "grad_norm": 0.9345234036445618, + "learning_rate": 0.001312674608473888, + "loss": 1.6678, + "step": 2253 + }, + { + "epoch": 0.23776371308016878, + "grad_norm": 1.0634424686431885, + "learning_rate": 0.0013125085829241558, + "loss": 1.6656, + "step": 2254 + }, + { + "epoch": 0.2378691983122363, + "grad_norm": 0.7294056415557861, + "learning_rate": 0.0013123424943425317, + "loss": 1.6589, + "step": 2255 + }, + { + "epoch": 0.2379746835443038, + "grad_norm": 0.8709049224853516, + "learning_rate": 0.0013121763427476273, + "loss": 1.6609, + "step": 2256 + }, + { + "epoch": 0.2380801687763713, + "grad_norm": 0.6888116002082825, + "learning_rate": 0.0013120101281580605, + "loss": 1.6328, + "step": 2257 + }, + { + "epoch": 0.23818565400843883, + "grad_norm": 0.7187904715538025, + "learning_rate": 0.0013118438505924563, + "loss": 1.6594, + "step": 2258 + }, + { + "epoch": 0.23829113924050632, + "grad_norm": 0.7715730667114258, + "learning_rate": 0.001311677510069447, + "loss": 1.6943, + "step": 2259 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.7155634164810181, + "learning_rate": 0.0013115111066076721, + "loss": 1.684, + "step": 2260 + }, + { + "epoch": 0.23850210970464136, + "grad_norm": 0.7186439037322998, + "learning_rate": 0.0013113446402257774, + "loss": 1.6608, + "step": 2261 + }, + { + "epoch": 0.23860759493670886, + "grad_norm": 0.6686859130859375, + "learning_rate": 0.001311178110942417, + "loss": 1.6631, + "step": 2262 + }, + { + "epoch": 0.23871308016877638, + "grad_norm": 0.747749388217926, + "learning_rate": 0.0013110115187762506, + "loss": 1.6869, + "step": 2263 + }, + { + "epoch": 0.23881856540084387, + "grad_norm": 0.7314268350601196, + "learning_rate": 0.0013108448637459465, + "loss": 1.7191, + "step": 2264 + }, + { + "epoch": 0.2389240506329114, + "grad_norm": 0.7372973561286926, + "learning_rate": 0.0013106781458701784, + "loss": 1.7331, + "step": 2265 + }, + { + "epoch": 0.2390295358649789, + "grad_norm": 0.7236735224723816, + "learning_rate": 0.0013105113651676287, + "loss": 1.6826, + "step": 2266 + }, + { + "epoch": 0.2391350210970464, + "grad_norm": 0.7611958384513855, + "learning_rate": 0.001310344521656985, + "loss": 1.7199, + "step": 2267 + }, + { + "epoch": 0.23924050632911392, + "grad_norm": 0.6478826999664307, + "learning_rate": 0.001310177615356944, + "loss": 1.6909, + "step": 2268 + }, + { + "epoch": 0.23934599156118144, + "grad_norm": 0.7545151710510254, + "learning_rate": 0.0013100106462862076, + "loss": 1.6869, + "step": 2269 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.7297106385231018, + "learning_rate": 0.0013098436144634862, + "loss": 1.6981, + "step": 2270 + }, + { + "epoch": 0.23955696202531646, + "grad_norm": 0.7045753598213196, + "learning_rate": 0.0013096765199074958, + "loss": 1.6872, + "step": 2271 + }, + { + "epoch": 0.23966244725738398, + "grad_norm": 0.6816421151161194, + "learning_rate": 0.0013095093626369608, + "loss": 1.6555, + "step": 2272 + }, + { + "epoch": 0.23976793248945147, + "grad_norm": 0.6959068179130554, + "learning_rate": 0.0013093421426706117, + "loss": 1.6505, + "step": 2273 + }, + { + "epoch": 0.239873417721519, + "grad_norm": 0.723565936088562, + "learning_rate": 0.0013091748600271862, + "loss": 1.6911, + "step": 2274 + }, + { + "epoch": 0.2399789029535865, + "grad_norm": 1.0572328567504883, + "learning_rate": 0.0013090075147254294, + "loss": 1.6698, + "step": 2275 + }, + { + "epoch": 0.240084388185654, + "grad_norm": 0.7834790945053101, + "learning_rate": 0.0013088401067840932, + "loss": 1.699, + "step": 2276 + }, + { + "epoch": 0.24018987341772152, + "grad_norm": 0.687852680683136, + "learning_rate": 0.0013086726362219363, + "loss": 1.6812, + "step": 2277 + }, + { + "epoch": 0.24029535864978904, + "grad_norm": 0.6773978471755981, + "learning_rate": 0.0013085051030577246, + "loss": 1.6775, + "step": 2278 + }, + { + "epoch": 0.24040084388185654, + "grad_norm": 0.6702667474746704, + "learning_rate": 0.0013083375073102315, + "loss": 1.6888, + "step": 2279 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.6954066157341003, + "learning_rate": 0.0013081698489982364, + "loss": 1.6914, + "step": 2280 + }, + { + "epoch": 0.24061181434599155, + "grad_norm": 0.6906900405883789, + "learning_rate": 0.0013080021281405264, + "loss": 1.6711, + "step": 2281 + }, + { + "epoch": 0.24071729957805907, + "grad_norm": 0.8775025606155396, + "learning_rate": 0.0013078343447558954, + "loss": 1.642, + "step": 2282 + }, + { + "epoch": 0.2408227848101266, + "grad_norm": 1.1817222833633423, + "learning_rate": 0.0013076664988631447, + "loss": 1.6782, + "step": 2283 + }, + { + "epoch": 0.24092827004219408, + "grad_norm": 1.0530728101730347, + "learning_rate": 0.001307498590481082, + "loss": 1.6395, + "step": 2284 + }, + { + "epoch": 0.2410337552742616, + "grad_norm": 0.6859136819839478, + "learning_rate": 0.001307330619628522, + "loss": 1.6589, + "step": 2285 + }, + { + "epoch": 0.24113924050632912, + "grad_norm": 0.8616005778312683, + "learning_rate": 0.0013071625863242875, + "loss": 1.7047, + "step": 2286 + }, + { + "epoch": 0.24124472573839661, + "grad_norm": 0.8737305402755737, + "learning_rate": 0.0013069944905872064, + "loss": 1.6882, + "step": 2287 + }, + { + "epoch": 0.24135021097046414, + "grad_norm": 0.6956271529197693, + "learning_rate": 0.0013068263324361156, + "loss": 1.6842, + "step": 2288 + }, + { + "epoch": 0.24145569620253166, + "grad_norm": 0.6764815449714661, + "learning_rate": 0.0013066581118898574, + "loss": 1.6497, + "step": 2289 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.7808052897453308, + "learning_rate": 0.001306489828967282, + "loss": 1.6876, + "step": 2290 + }, + { + "epoch": 0.24166666666666667, + "grad_norm": 0.8137451410293579, + "learning_rate": 0.0013063214836872465, + "loss": 1.6783, + "step": 2291 + }, + { + "epoch": 0.2417721518987342, + "grad_norm": 0.7655509114265442, + "learning_rate": 0.0013061530760686145, + "loss": 1.6253, + "step": 2292 + }, + { + "epoch": 0.24187763713080168, + "grad_norm": 0.7383624911308289, + "learning_rate": 0.0013059846061302574, + "loss": 1.6735, + "step": 2293 + }, + { + "epoch": 0.2419831223628692, + "grad_norm": 0.6313809752464294, + "learning_rate": 0.0013058160738910526, + "loss": 1.6771, + "step": 2294 + }, + { + "epoch": 0.24208860759493672, + "grad_norm": 0.6832754611968994, + "learning_rate": 0.0013056474793698852, + "loss": 1.6704, + "step": 2295 + }, + { + "epoch": 0.24219409282700421, + "grad_norm": 0.7055528163909912, + "learning_rate": 0.001305478822585647, + "loss": 1.6764, + "step": 2296 + }, + { + "epoch": 0.24229957805907174, + "grad_norm": 0.6764196753501892, + "learning_rate": 0.001305310103557237, + "loss": 1.7249, + "step": 2297 + }, + { + "epoch": 0.24240506329113923, + "grad_norm": 0.6958417892456055, + "learning_rate": 0.0013051413223035607, + "loss": 1.6602, + "step": 2298 + }, + { + "epoch": 0.24251054852320675, + "grad_norm": 0.6194214820861816, + "learning_rate": 0.0013049724788435312, + "loss": 1.6565, + "step": 2299 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.6972980499267578, + "learning_rate": 0.0013048035731960679, + "loss": 1.6381, + "step": 2300 + }, + { + "epoch": 0.24272151898734176, + "grad_norm": 0.7368130087852478, + "learning_rate": 0.0013046346053800979, + "loss": 1.6828, + "step": 2301 + }, + { + "epoch": 0.24282700421940928, + "grad_norm": 0.8719221353530884, + "learning_rate": 0.0013044655754145546, + "loss": 1.635, + "step": 2302 + }, + { + "epoch": 0.2429324894514768, + "grad_norm": 0.7082338333129883, + "learning_rate": 0.001304296483318379, + "loss": 1.6914, + "step": 2303 + }, + { + "epoch": 0.2430379746835443, + "grad_norm": 0.7057151198387146, + "learning_rate": 0.0013041273291105181, + "loss": 1.6719, + "step": 2304 + }, + { + "epoch": 0.24314345991561181, + "grad_norm": 0.6929590106010437, + "learning_rate": 0.0013039581128099272, + "loss": 1.6592, + "step": 2305 + }, + { + "epoch": 0.24324894514767934, + "grad_norm": 0.7683284878730774, + "learning_rate": 0.0013037888344355673, + "loss": 1.6879, + "step": 2306 + }, + { + "epoch": 0.24335443037974683, + "grad_norm": 1.1442360877990723, + "learning_rate": 0.001303619494006407, + "loss": 1.6497, + "step": 2307 + }, + { + "epoch": 0.24345991561181435, + "grad_norm": 1.0288599729537964, + "learning_rate": 0.0013034500915414218, + "loss": 1.632, + "step": 2308 + }, + { + "epoch": 0.24356540084388187, + "grad_norm": 0.7286476492881775, + "learning_rate": 0.0013032806270595941, + "loss": 1.68, + "step": 2309 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.802225649356842, + "learning_rate": 0.0013031111005799133, + "loss": 1.688, + "step": 2310 + }, + { + "epoch": 0.24377637130801688, + "grad_norm": 0.7976981401443481, + "learning_rate": 0.0013029415121213756, + "loss": 1.663, + "step": 2311 + }, + { + "epoch": 0.2438818565400844, + "grad_norm": 0.8118452429771423, + "learning_rate": 0.0013027718617029842, + "loss": 1.6953, + "step": 2312 + }, + { + "epoch": 0.2439873417721519, + "grad_norm": 0.7371177673339844, + "learning_rate": 0.0013026021493437495, + "loss": 1.7245, + "step": 2313 + }, + { + "epoch": 0.24409282700421941, + "grad_norm": 0.7439901232719421, + "learning_rate": 0.0013024323750626882, + "loss": 1.6702, + "step": 2314 + }, + { + "epoch": 0.2441983122362869, + "grad_norm": 1.08975088596344, + "learning_rate": 0.0013022625388788248, + "loss": 1.6768, + "step": 2315 + }, + { + "epoch": 0.24430379746835443, + "grad_norm": 1.282416582107544, + "learning_rate": 0.0013020926408111903, + "loss": 1.7115, + "step": 2316 + }, + { + "epoch": 0.24440928270042195, + "grad_norm": 0.652447521686554, + "learning_rate": 0.001301922680878822, + "loss": 1.6301, + "step": 2317 + }, + { + "epoch": 0.24451476793248944, + "grad_norm": 1.1928966045379639, + "learning_rate": 0.001301752659100765, + "loss": 1.6717, + "step": 2318 + }, + { + "epoch": 0.24462025316455696, + "grad_norm": 1.2368344068527222, + "learning_rate": 0.001301582575496072, + "loss": 1.698, + "step": 2319 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.6524198055267334, + "learning_rate": 0.0013014124300838004, + "loss": 1.6309, + "step": 2320 + }, + { + "epoch": 0.24483122362869197, + "grad_norm": 1.1366405487060547, + "learning_rate": 0.0013012422228830165, + "loss": 1.6386, + "step": 2321 + }, + { + "epoch": 0.2449367088607595, + "grad_norm": 1.0795437097549438, + "learning_rate": 0.0013010719539127927, + "loss": 1.6943, + "step": 2322 + }, + { + "epoch": 0.24504219409282701, + "grad_norm": 0.6479732990264893, + "learning_rate": 0.001300901623192209, + "loss": 1.6695, + "step": 2323 + }, + { + "epoch": 0.2451476793248945, + "grad_norm": 1.0214406251907349, + "learning_rate": 0.0013007312307403507, + "loss": 1.6598, + "step": 2324 + }, + { + "epoch": 0.24525316455696203, + "grad_norm": 0.8637327551841736, + "learning_rate": 0.0013005607765763122, + "loss": 1.636, + "step": 2325 + }, + { + "epoch": 0.24535864978902955, + "grad_norm": 0.6732713580131531, + "learning_rate": 0.0013003902607191934, + "loss": 1.6754, + "step": 2326 + }, + { + "epoch": 0.24546413502109704, + "grad_norm": 0.9716955423355103, + "learning_rate": 0.0013002196831881014, + "loss": 1.6575, + "step": 2327 + }, + { + "epoch": 0.24556962025316456, + "grad_norm": 1.158198356628418, + "learning_rate": 0.0013000490440021502, + "loss": 1.6581, + "step": 2328 + }, + { + "epoch": 0.24567510548523205, + "grad_norm": 0.7434486746788025, + "learning_rate": 0.0012998783431804608, + "loss": 1.6561, + "step": 2329 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.8002890944480896, + "learning_rate": 0.0012997075807421612, + "loss": 1.6415, + "step": 2330 + }, + { + "epoch": 0.2458860759493671, + "grad_norm": 1.2969961166381836, + "learning_rate": 0.0012995367567063861, + "loss": 1.6245, + "step": 2331 + }, + { + "epoch": 0.2459915611814346, + "grad_norm": 0.6723568439483643, + "learning_rate": 0.001299365871092277, + "loss": 1.6879, + "step": 2332 + }, + { + "epoch": 0.2460970464135021, + "grad_norm": 1.1214392185211182, + "learning_rate": 0.0012991949239189826, + "loss": 1.6695, + "step": 2333 + }, + { + "epoch": 0.24620253164556963, + "grad_norm": 0.9845348000526428, + "learning_rate": 0.0012990239152056587, + "loss": 1.6854, + "step": 2334 + }, + { + "epoch": 0.24630801687763712, + "grad_norm": 0.6560221314430237, + "learning_rate": 0.0012988528449714672, + "loss": 1.6725, + "step": 2335 + }, + { + "epoch": 0.24641350210970464, + "grad_norm": 1.1302063465118408, + "learning_rate": 0.001298681713235578, + "loss": 1.668, + "step": 2336 + }, + { + "epoch": 0.24651898734177216, + "grad_norm": 0.98395174741745, + "learning_rate": 0.0012985105200171664, + "loss": 1.5946, + "step": 2337 + }, + { + "epoch": 0.24662447257383965, + "grad_norm": 0.7084511518478394, + "learning_rate": 0.001298339265335416, + "loss": 1.6387, + "step": 2338 + }, + { + "epoch": 0.24672995780590717, + "grad_norm": 0.9622241258621216, + "learning_rate": 0.0012981679492095166, + "loss": 1.6599, + "step": 2339 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 1.0915008783340454, + "learning_rate": 0.0012979965716586653, + "loss": 1.6553, + "step": 2340 + }, + { + "epoch": 0.2469409282700422, + "grad_norm": 0.6862816214561462, + "learning_rate": 0.0012978251327020655, + "loss": 1.6753, + "step": 2341 + }, + { + "epoch": 0.2470464135021097, + "grad_norm": 0.7606744170188904, + "learning_rate": 0.0012976536323589278, + "loss": 1.6501, + "step": 2342 + }, + { + "epoch": 0.24715189873417723, + "grad_norm": 0.8630692362785339, + "learning_rate": 0.0012974820706484697, + "loss": 1.6503, + "step": 2343 + }, + { + "epoch": 0.24725738396624472, + "grad_norm": 0.6613379716873169, + "learning_rate": 0.001297310447589916, + "loss": 1.6706, + "step": 2344 + }, + { + "epoch": 0.24736286919831224, + "grad_norm": 0.7647914886474609, + "learning_rate": 0.0012971387632024968, + "loss": 1.6393, + "step": 2345 + }, + { + "epoch": 0.24746835443037973, + "grad_norm": 0.743462324142456, + "learning_rate": 0.0012969670175054515, + "loss": 1.6602, + "step": 2346 + }, + { + "epoch": 0.24757383966244725, + "grad_norm": 0.7169549465179443, + "learning_rate": 0.0012967952105180243, + "loss": 1.6324, + "step": 2347 + }, + { + "epoch": 0.24767932489451477, + "grad_norm": 0.7304895520210266, + "learning_rate": 0.001296623342259467, + "loss": 1.6252, + "step": 2348 + }, + { + "epoch": 0.24778481012658227, + "grad_norm": 0.7511199712753296, + "learning_rate": 0.0012964514127490388, + "loss": 1.6384, + "step": 2349 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.9569240808486938, + "learning_rate": 0.0012962794220060048, + "loss": 1.6705, + "step": 2350 + }, + { + "epoch": 0.2479957805907173, + "grad_norm": 0.9161232709884644, + "learning_rate": 0.0012961073700496378, + "loss": 1.6646, + "step": 2351 + }, + { + "epoch": 0.2481012658227848, + "grad_norm": 0.7094020843505859, + "learning_rate": 0.0012959352568992163, + "loss": 1.6832, + "step": 2352 + }, + { + "epoch": 0.24820675105485232, + "grad_norm": 0.8542672395706177, + "learning_rate": 0.0012957630825740274, + "loss": 1.6396, + "step": 2353 + }, + { + "epoch": 0.24831223628691984, + "grad_norm": 0.9127027988433838, + "learning_rate": 0.0012955908470933637, + "loss": 1.6719, + "step": 2354 + }, + { + "epoch": 0.24841772151898733, + "grad_norm": 1.011279821395874, + "learning_rate": 0.0012954185504765248, + "loss": 1.6929, + "step": 2355 + }, + { + "epoch": 0.24852320675105485, + "grad_norm": 1.1627048254013062, + "learning_rate": 0.0012952461927428177, + "loss": 1.6537, + "step": 2356 + }, + { + "epoch": 0.24862869198312237, + "grad_norm": 0.6880981922149658, + "learning_rate": 0.001295073773911556, + "loss": 1.6683, + "step": 2357 + }, + { + "epoch": 0.24873417721518987, + "grad_norm": 0.9987649917602539, + "learning_rate": 0.0012949012940020599, + "loss": 1.6913, + "step": 2358 + }, + { + "epoch": 0.2488396624472574, + "grad_norm": 1.0093494653701782, + "learning_rate": 0.0012947287530336565, + "loss": 1.6549, + "step": 2359 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.9650765061378479, + "learning_rate": 0.0012945561510256801, + "loss": 1.6544, + "step": 2360 + }, + { + "epoch": 0.2490506329113924, + "grad_norm": 0.7495661377906799, + "learning_rate": 0.0012943834879974717, + "loss": 1.6415, + "step": 2361 + }, + { + "epoch": 0.24915611814345992, + "grad_norm": 0.7085552215576172, + "learning_rate": 0.001294210763968379, + "loss": 1.6719, + "step": 2362 + }, + { + "epoch": 0.2492616033755274, + "grad_norm": 0.7122513651847839, + "learning_rate": 0.0012940379789577565, + "loss": 1.6713, + "step": 2363 + }, + { + "epoch": 0.24936708860759493, + "grad_norm": 0.6951025128364563, + "learning_rate": 0.0012938651329849654, + "loss": 1.6509, + "step": 2364 + }, + { + "epoch": 0.24947257383966245, + "grad_norm": 0.7669538259506226, + "learning_rate": 0.0012936922260693743, + "loss": 1.6388, + "step": 2365 + }, + { + "epoch": 0.24957805907172995, + "grad_norm": 0.8098183274269104, + "learning_rate": 0.0012935192582303582, + "loss": 1.6528, + "step": 2366 + }, + { + "epoch": 0.24968354430379747, + "grad_norm": 0.7658627033233643, + "learning_rate": 0.001293346229487299, + "loss": 1.6792, + "step": 2367 + }, + { + "epoch": 0.249789029535865, + "grad_norm": 0.6637319326400757, + "learning_rate": 0.0012931731398595854, + "loss": 1.6756, + "step": 2368 + }, + { + "epoch": 0.24989451476793248, + "grad_norm": 0.7871140241622925, + "learning_rate": 0.001292999989366613, + "loss": 1.6642, + "step": 2369 + }, + { + "epoch": 0.25, + "grad_norm": 0.6871216893196106, + "learning_rate": 0.001292826778027784, + "loss": 1.6721, + "step": 2370 + }, + { + "epoch": 0.2501054852320675, + "grad_norm": 0.8020870089530945, + "learning_rate": 0.001292653505862508, + "loss": 1.6632, + "step": 2371 + }, + { + "epoch": 0.25021097046413504, + "grad_norm": 0.7164187431335449, + "learning_rate": 0.0012924801728902006, + "loss": 1.6534, + "step": 2372 + }, + { + "epoch": 0.25031645569620253, + "grad_norm": 0.7230563163757324, + "learning_rate": 0.0012923067791302848, + "loss": 1.6487, + "step": 2373 + }, + { + "epoch": 0.25042194092827, + "grad_norm": 0.7005426287651062, + "learning_rate": 0.0012921333246021904, + "loss": 1.6657, + "step": 2374 + }, + { + "epoch": 0.2505274261603376, + "grad_norm": 0.8939343690872192, + "learning_rate": 0.0012919598093253533, + "loss": 1.6421, + "step": 2375 + }, + { + "epoch": 0.25063291139240507, + "grad_norm": 0.9125320911407471, + "learning_rate": 0.0012917862333192173, + "loss": 1.6999, + "step": 2376 + }, + { + "epoch": 0.25073839662447256, + "grad_norm": 0.76955246925354, + "learning_rate": 0.0012916125966032322, + "loss": 1.6257, + "step": 2377 + }, + { + "epoch": 0.2508438818565401, + "grad_norm": 0.7116618156433105, + "learning_rate": 0.001291438899196855, + "loss": 1.6602, + "step": 2378 + }, + { + "epoch": 0.2509493670886076, + "grad_norm": 0.7037491798400879, + "learning_rate": 0.0012912651411195494, + "loss": 1.6924, + "step": 2379 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.6634914875030518, + "learning_rate": 0.0012910913223907856, + "loss": 1.6431, + "step": 2380 + }, + { + "epoch": 0.25116033755274264, + "grad_norm": 0.7754793763160706, + "learning_rate": 0.0012909174430300412, + "loss": 1.6842, + "step": 2381 + }, + { + "epoch": 0.25126582278481013, + "grad_norm": 0.7476556301116943, + "learning_rate": 0.0012907435030567996, + "loss": 1.6496, + "step": 2382 + }, + { + "epoch": 0.2513713080168776, + "grad_norm": 0.722328245639801, + "learning_rate": 0.0012905695024905525, + "loss": 1.6678, + "step": 2383 + }, + { + "epoch": 0.2514767932489452, + "grad_norm": 0.6710751056671143, + "learning_rate": 0.0012903954413507968, + "loss": 1.6348, + "step": 2384 + }, + { + "epoch": 0.25158227848101267, + "grad_norm": 0.8761717081069946, + "learning_rate": 0.0012902213196570376, + "loss": 1.672, + "step": 2385 + }, + { + "epoch": 0.25168776371308016, + "grad_norm": 1.1407588720321655, + "learning_rate": 0.0012900471374287855, + "loss": 1.6684, + "step": 2386 + }, + { + "epoch": 0.25179324894514765, + "grad_norm": 0.9495872259140015, + "learning_rate": 0.0012898728946855588, + "loss": 1.7282, + "step": 2387 + }, + { + "epoch": 0.2518987341772152, + "grad_norm": 0.7313913702964783, + "learning_rate": 0.001289698591446882, + "loss": 1.6313, + "step": 2388 + }, + { + "epoch": 0.2520042194092827, + "grad_norm": 0.6649331450462341, + "learning_rate": 0.0012895242277322872, + "loss": 1.6273, + "step": 2389 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.7971710562705994, + "learning_rate": 0.0012893498035613123, + "loss": 1.6597, + "step": 2390 + }, + { + "epoch": 0.25221518987341773, + "grad_norm": 1.0204741954803467, + "learning_rate": 0.0012891753189535023, + "loss": 1.638, + "step": 2391 + }, + { + "epoch": 0.2523206751054852, + "grad_norm": 1.0780761241912842, + "learning_rate": 0.0012890007739284092, + "loss": 1.6333, + "step": 2392 + }, + { + "epoch": 0.2524261603375527, + "grad_norm": 0.9599758386611938, + "learning_rate": 0.001288826168505592, + "loss": 1.6418, + "step": 2393 + }, + { + "epoch": 0.25253164556962027, + "grad_norm": 0.7273114323616028, + "learning_rate": 0.0012886515027046156, + "loss": 1.6541, + "step": 2394 + }, + { + "epoch": 0.25263713080168776, + "grad_norm": 0.7627483010292053, + "learning_rate": 0.0012884767765450524, + "loss": 1.5985, + "step": 2395 + }, + { + "epoch": 0.25274261603375525, + "grad_norm": 1.0846034288406372, + "learning_rate": 0.0012883019900464814, + "loss": 1.6861, + "step": 2396 + }, + { + "epoch": 0.2528481012658228, + "grad_norm": 0.8005185127258301, + "learning_rate": 0.001288127143228488, + "loss": 1.7174, + "step": 2397 + }, + { + "epoch": 0.2529535864978903, + "grad_norm": 0.7190537452697754, + "learning_rate": 0.0012879522361106646, + "loss": 1.6871, + "step": 2398 + }, + { + "epoch": 0.2530590717299578, + "grad_norm": 1.0913289785385132, + "learning_rate": 0.0012877772687126111, + "loss": 1.6498, + "step": 2399 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.9703072905540466, + "learning_rate": 0.001287602241053933, + "loss": 1.6724, + "step": 2400 + }, + { + "epoch": 0.2532700421940928, + "grad_norm": 0.7724683880805969, + "learning_rate": 0.001287427153154243, + "loss": 1.6604, + "step": 2401 + }, + { + "epoch": 0.2533755274261603, + "grad_norm": 0.7974680066108704, + "learning_rate": 0.0012872520050331608, + "loss": 1.6732, + "step": 2402 + }, + { + "epoch": 0.25348101265822787, + "grad_norm": 1.2008014917373657, + "learning_rate": 0.0012870767967103122, + "loss": 1.6796, + "step": 2403 + }, + { + "epoch": 0.25358649789029536, + "grad_norm": 0.9234564304351807, + "learning_rate": 0.0012869015282053304, + "loss": 1.6243, + "step": 2404 + }, + { + "epoch": 0.25369198312236285, + "grad_norm": 0.7350353598594666, + "learning_rate": 0.0012867261995378554, + "loss": 1.6925, + "step": 2405 + }, + { + "epoch": 0.2537974683544304, + "grad_norm": 1.1459323167800903, + "learning_rate": 0.001286550810727533, + "loss": 1.6549, + "step": 2406 + }, + { + "epoch": 0.2539029535864979, + "grad_norm": 0.8205280900001526, + "learning_rate": 0.0012863753617940172, + "loss": 1.6808, + "step": 2407 + }, + { + "epoch": 0.2540084388185654, + "grad_norm": 0.8433129191398621, + "learning_rate": 0.001286199852756967, + "loss": 1.6483, + "step": 2408 + }, + { + "epoch": 0.25411392405063293, + "grad_norm": 1.5922964811325073, + "learning_rate": 0.0012860242836360502, + "loss": 1.638, + "step": 2409 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.726746141910553, + "learning_rate": 0.0012858486544509392, + "loss": 1.6927, + "step": 2410 + }, + { + "epoch": 0.2543248945147679, + "grad_norm": 1.0322366952896118, + "learning_rate": 0.0012856729652213144, + "loss": 1.7032, + "step": 2411 + }, + { + "epoch": 0.25443037974683547, + "grad_norm": 1.1674890518188477, + "learning_rate": 0.001285497215966863, + "loss": 1.6499, + "step": 2412 + }, + { + "epoch": 0.25453586497890296, + "grad_norm": 0.7083207368850708, + "learning_rate": 0.0012853214067072782, + "loss": 1.6672, + "step": 2413 + }, + { + "epoch": 0.25464135021097045, + "grad_norm": 1.175304651260376, + "learning_rate": 0.0012851455374622604, + "loss": 1.6234, + "step": 2414 + }, + { + "epoch": 0.254746835443038, + "grad_norm": 0.7133437991142273, + "learning_rate": 0.0012849696082515166, + "loss": 1.6426, + "step": 2415 + }, + { + "epoch": 0.2548523206751055, + "grad_norm": 1.1257070302963257, + "learning_rate": 0.0012847936190947605, + "loss": 1.6617, + "step": 2416 + }, + { + "epoch": 0.254957805907173, + "grad_norm": 0.9257820844650269, + "learning_rate": 0.001284617570011713, + "loss": 1.6326, + "step": 2417 + }, + { + "epoch": 0.25506329113924053, + "grad_norm": 0.6812304854393005, + "learning_rate": 0.0012844414610221006, + "loss": 1.6553, + "step": 2418 + }, + { + "epoch": 0.255168776371308, + "grad_norm": 0.6758115291595459, + "learning_rate": 0.0012842652921456576, + "loss": 1.6183, + "step": 2419 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.6753324866294861, + "learning_rate": 0.0012840890634021249, + "loss": 1.6842, + "step": 2420 + }, + { + "epoch": 0.255379746835443, + "grad_norm": 0.7053277492523193, + "learning_rate": 0.001283912774811249, + "loss": 1.6276, + "step": 2421 + }, + { + "epoch": 0.25548523206751056, + "grad_norm": 0.6779065728187561, + "learning_rate": 0.0012837364263927843, + "loss": 1.7269, + "step": 2422 + }, + { + "epoch": 0.25559071729957805, + "grad_norm": 0.8208703994750977, + "learning_rate": 0.001283560018166492, + "loss": 1.6896, + "step": 2423 + }, + { + "epoch": 0.25569620253164554, + "grad_norm": 0.7189366221427917, + "learning_rate": 0.0012833835501521386, + "loss": 1.634, + "step": 2424 + }, + { + "epoch": 0.2558016877637131, + "grad_norm": 0.6610392928123474, + "learning_rate": 0.0012832070223694992, + "loss": 1.6173, + "step": 2425 + }, + { + "epoch": 0.2559071729957806, + "grad_norm": 0.7939918637275696, + "learning_rate": 0.0012830304348383538, + "loss": 1.6509, + "step": 2426 + }, + { + "epoch": 0.2560126582278481, + "grad_norm": 0.7501705884933472, + "learning_rate": 0.0012828537875784905, + "loss": 1.6839, + "step": 2427 + }, + { + "epoch": 0.2561181434599156, + "grad_norm": 0.6843963265419006, + "learning_rate": 0.001282677080609703, + "loss": 1.6215, + "step": 2428 + }, + { + "epoch": 0.2562236286919831, + "grad_norm": 0.7524582147598267, + "learning_rate": 0.0012825003139517925, + "loss": 1.6284, + "step": 2429 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.7165771722793579, + "learning_rate": 0.0012823234876245667, + "loss": 1.6252, + "step": 2430 + }, + { + "epoch": 0.25643459915611816, + "grad_norm": 0.7165809869766235, + "learning_rate": 0.0012821466016478395, + "loss": 1.6741, + "step": 2431 + }, + { + "epoch": 0.25654008438818565, + "grad_norm": 0.6831546425819397, + "learning_rate": 0.0012819696560414323, + "loss": 1.6807, + "step": 2432 + }, + { + "epoch": 0.25664556962025314, + "grad_norm": 0.6998558640480042, + "learning_rate": 0.0012817926508251723, + "loss": 1.699, + "step": 2433 + }, + { + "epoch": 0.2567510548523207, + "grad_norm": 0.8316354751586914, + "learning_rate": 0.0012816155860188938, + "loss": 1.6095, + "step": 2434 + }, + { + "epoch": 0.2568565400843882, + "grad_norm": 1.146774411201477, + "learning_rate": 0.0012814384616424384, + "loss": 1.6268, + "step": 2435 + }, + { + "epoch": 0.2569620253164557, + "grad_norm": 0.7889807820320129, + "learning_rate": 0.0012812612777156533, + "loss": 1.6318, + "step": 2436 + }, + { + "epoch": 0.2570675105485232, + "grad_norm": 0.7296469211578369, + "learning_rate": 0.001281084034258393, + "loss": 1.666, + "step": 2437 + }, + { + "epoch": 0.2571729957805907, + "grad_norm": 0.7729725241661072, + "learning_rate": 0.0012809067312905182, + "loss": 1.6685, + "step": 2438 + }, + { + "epoch": 0.2572784810126582, + "grad_norm": 0.6930496096611023, + "learning_rate": 0.0012807293688318969, + "loss": 1.6468, + "step": 2439 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.7852159142494202, + "learning_rate": 0.0012805519469024035, + "loss": 1.6896, + "step": 2440 + }, + { + "epoch": 0.25748945147679325, + "grad_norm": 0.8878028988838196, + "learning_rate": 0.0012803744655219187, + "loss": 1.6592, + "step": 2441 + }, + { + "epoch": 0.25759493670886074, + "grad_norm": 0.8151628971099854, + "learning_rate": 0.0012801969247103306, + "loss": 1.653, + "step": 2442 + }, + { + "epoch": 0.2577004219409283, + "grad_norm": 0.7451884746551514, + "learning_rate": 0.001280019324487533, + "loss": 1.6548, + "step": 2443 + }, + { + "epoch": 0.2578059071729958, + "grad_norm": 0.8131077289581299, + "learning_rate": 0.0012798416648734272, + "loss": 1.6528, + "step": 2444 + }, + { + "epoch": 0.2579113924050633, + "grad_norm": 1.0860791206359863, + "learning_rate": 0.001279663945887921, + "loss": 1.6922, + "step": 2445 + }, + { + "epoch": 0.2580168776371308, + "grad_norm": 0.7647292017936707, + "learning_rate": 0.0012794861675509285, + "loss": 1.6218, + "step": 2446 + }, + { + "epoch": 0.2581223628691983, + "grad_norm": 0.8131096363067627, + "learning_rate": 0.0012793083298823708, + "loss": 1.6496, + "step": 2447 + }, + { + "epoch": 0.2582278481012658, + "grad_norm": 1.1202915906906128, + "learning_rate": 0.0012791304329021751, + "loss": 1.6551, + "step": 2448 + }, + { + "epoch": 0.25833333333333336, + "grad_norm": 0.7687726020812988, + "learning_rate": 0.001278952476630276, + "loss": 1.6328, + "step": 2449 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.7969813346862793, + "learning_rate": 0.0012787744610866143, + "loss": 1.632, + "step": 2450 + }, + { + "epoch": 0.25854430379746834, + "grad_norm": 1.1886993646621704, + "learning_rate": 0.0012785963862911376, + "loss": 1.6569, + "step": 2451 + }, + { + "epoch": 0.2586497890295359, + "grad_norm": 0.8044098019599915, + "learning_rate": 0.0012784182522637998, + "loss": 1.6896, + "step": 2452 + }, + { + "epoch": 0.2587552742616034, + "grad_norm": 0.7480721473693848, + "learning_rate": 0.001278240059024562, + "loss": 1.6462, + "step": 2453 + }, + { + "epoch": 0.2588607594936709, + "grad_norm": 0.9140893816947937, + "learning_rate": 0.0012780618065933915, + "loss": 1.6875, + "step": 2454 + }, + { + "epoch": 0.25896624472573837, + "grad_norm": 0.8088756799697876, + "learning_rate": 0.0012778834949902626, + "loss": 1.6391, + "step": 2455 + }, + { + "epoch": 0.2590717299578059, + "grad_norm": 0.6690123081207275, + "learning_rate": 0.0012777051242351557, + "loss": 1.6606, + "step": 2456 + }, + { + "epoch": 0.2591772151898734, + "grad_norm": 0.7884414792060852, + "learning_rate": 0.0012775266943480582, + "loss": 1.6471, + "step": 2457 + }, + { + "epoch": 0.2592827004219409, + "grad_norm": 0.9357655644416809, + "learning_rate": 0.0012773482053489642, + "loss": 1.6801, + "step": 2458 + }, + { + "epoch": 0.25938818565400845, + "grad_norm": 0.7977480292320251, + "learning_rate": 0.0012771696572578743, + "loss": 1.6762, + "step": 2459 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.681511640548706, + "learning_rate": 0.0012769910500947954, + "loss": 1.6412, + "step": 2460 + }, + { + "epoch": 0.25959915611814344, + "grad_norm": 0.8468360304832458, + "learning_rate": 0.0012768123838797414, + "loss": 1.6087, + "step": 2461 + }, + { + "epoch": 0.259704641350211, + "grad_norm": 0.979762077331543, + "learning_rate": 0.0012766336586327333, + "loss": 1.6639, + "step": 2462 + }, + { + "epoch": 0.2598101265822785, + "grad_norm": 0.7820468544960022, + "learning_rate": 0.0012764548743737973, + "loss": 1.6548, + "step": 2463 + }, + { + "epoch": 0.25991561181434597, + "grad_norm": 0.7340630292892456, + "learning_rate": 0.001276276031122968, + "loss": 1.6147, + "step": 2464 + }, + { + "epoch": 0.2600210970464135, + "grad_norm": 0.8798895478248596, + "learning_rate": 0.0012760971289002847, + "loss": 1.6869, + "step": 2465 + }, + { + "epoch": 0.260126582278481, + "grad_norm": 0.7153478264808655, + "learning_rate": 0.0012759181677257946, + "loss": 1.6348, + "step": 2466 + }, + { + "epoch": 0.2602320675105485, + "grad_norm": 0.7346628904342651, + "learning_rate": 0.0012757391476195517, + "loss": 1.6453, + "step": 2467 + }, + { + "epoch": 0.26033755274261605, + "grad_norm": 0.8889830708503723, + "learning_rate": 0.0012755600686016155, + "loss": 1.6632, + "step": 2468 + }, + { + "epoch": 0.26044303797468354, + "grad_norm": 0.6384941339492798, + "learning_rate": 0.0012753809306920532, + "loss": 1.6623, + "step": 2469 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.9406026601791382, + "learning_rate": 0.0012752017339109376, + "loss": 1.6778, + "step": 2470 + }, + { + "epoch": 0.2606540084388186, + "grad_norm": 1.007405161857605, + "learning_rate": 0.0012750224782783492, + "loss": 1.6857, + "step": 2471 + }, + { + "epoch": 0.2607594936708861, + "grad_norm": 0.7171160578727722, + "learning_rate": 0.0012748431638143739, + "loss": 1.6393, + "step": 2472 + }, + { + "epoch": 0.26086497890295357, + "grad_norm": 0.785631537437439, + "learning_rate": 0.0012746637905391048, + "loss": 1.6523, + "step": 2473 + }, + { + "epoch": 0.2609704641350211, + "grad_norm": 0.8420215845108032, + "learning_rate": 0.001274484358472642, + "loss": 1.6606, + "step": 2474 + }, + { + "epoch": 0.2610759493670886, + "grad_norm": 0.6966327428817749, + "learning_rate": 0.0012743048676350911, + "loss": 1.6523, + "step": 2475 + }, + { + "epoch": 0.2611814345991561, + "grad_norm": 0.6861903071403503, + "learning_rate": 0.001274125318046566, + "loss": 1.6296, + "step": 2476 + }, + { + "epoch": 0.26128691983122365, + "grad_norm": 0.7084726095199585, + "learning_rate": 0.0012739457097271849, + "loss": 1.6705, + "step": 2477 + }, + { + "epoch": 0.26139240506329114, + "grad_norm": 0.6493290662765503, + "learning_rate": 0.0012737660426970748, + "loss": 1.6508, + "step": 2478 + }, + { + "epoch": 0.26149789029535864, + "grad_norm": 0.6384016275405884, + "learning_rate": 0.0012735863169763678, + "loss": 1.6451, + "step": 2479 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.6779707074165344, + "learning_rate": 0.0012734065325852029, + "loss": 1.6573, + "step": 2480 + }, + { + "epoch": 0.2617088607594937, + "grad_norm": 0.8163171410560608, + "learning_rate": 0.0012732266895437265, + "loss": 1.6405, + "step": 2481 + }, + { + "epoch": 0.26181434599156117, + "grad_norm": 0.9689272046089172, + "learning_rate": 0.00127304678787209, + "loss": 1.625, + "step": 2482 + }, + { + "epoch": 0.2619198312236287, + "grad_norm": 0.9777700901031494, + "learning_rate": 0.001272866827590453, + "loss": 1.6625, + "step": 2483 + }, + { + "epoch": 0.2620253164556962, + "grad_norm": 0.7405293583869934, + "learning_rate": 0.001272686808718981, + "loss": 1.6125, + "step": 2484 + }, + { + "epoch": 0.2621308016877637, + "grad_norm": 0.7093461155891418, + "learning_rate": 0.0012725067312778454, + "loss": 1.6519, + "step": 2485 + }, + { + "epoch": 0.2622362869198312, + "grad_norm": 1.0192828178405762, + "learning_rate": 0.0012723265952872252, + "loss": 1.6371, + "step": 2486 + }, + { + "epoch": 0.26234177215189874, + "grad_norm": 0.8662915229797363, + "learning_rate": 0.0012721464007673055, + "loss": 1.6472, + "step": 2487 + }, + { + "epoch": 0.26244725738396624, + "grad_norm": 0.7166410088539124, + "learning_rate": 0.0012719661477382778, + "loss": 1.6762, + "step": 2488 + }, + { + "epoch": 0.26255274261603373, + "grad_norm": 0.7818217873573303, + "learning_rate": 0.0012717858362203407, + "loss": 1.6073, + "step": 2489 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.921868622303009, + "learning_rate": 0.0012716054662336987, + "loss": 1.6726, + "step": 2490 + }, + { + "epoch": 0.26276371308016877, + "grad_norm": 0.8518598079681396, + "learning_rate": 0.001271425037798563, + "loss": 1.6322, + "step": 2491 + }, + { + "epoch": 0.26286919831223626, + "grad_norm": 0.6639887094497681, + "learning_rate": 0.0012712445509351518, + "loss": 1.6429, + "step": 2492 + }, + { + "epoch": 0.2629746835443038, + "grad_norm": 0.816261887550354, + "learning_rate": 0.00127106400566369, + "loss": 1.6965, + "step": 2493 + }, + { + "epoch": 0.2630801687763713, + "grad_norm": 0.9634572267532349, + "learning_rate": 0.0012708834020044076, + "loss": 1.6393, + "step": 2494 + }, + { + "epoch": 0.2631856540084388, + "grad_norm": 0.6845805644989014, + "learning_rate": 0.0012707027399775429, + "loss": 1.6475, + "step": 2495 + }, + { + "epoch": 0.26329113924050634, + "grad_norm": 0.7408917546272278, + "learning_rate": 0.0012705220196033396, + "loss": 1.6388, + "step": 2496 + }, + { + "epoch": 0.26339662447257384, + "grad_norm": 0.8793673515319824, + "learning_rate": 0.0012703412409020484, + "loss": 1.6847, + "step": 2497 + }, + { + "epoch": 0.26350210970464133, + "grad_norm": 0.703798770904541, + "learning_rate": 0.0012701604038939268, + "loss": 1.6348, + "step": 2498 + }, + { + "epoch": 0.2636075949367089, + "grad_norm": 0.7472414970397949, + "learning_rate": 0.0012699795085992379, + "loss": 1.6554, + "step": 2499 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.8386152386665344, + "learning_rate": 0.001269798555038252, + "loss": 1.6253, + "step": 2500 + }, + { + "epoch": 0.26381856540084386, + "grad_norm": 0.6596741676330566, + "learning_rate": 0.0012696175432312465, + "loss": 1.6284, + "step": 2501 + }, + { + "epoch": 0.2639240506329114, + "grad_norm": 0.6862741112709045, + "learning_rate": 0.0012694364731985041, + "loss": 1.6743, + "step": 2502 + }, + { + "epoch": 0.2640295358649789, + "grad_norm": 0.8004752993583679, + "learning_rate": 0.0012692553449603148, + "loss": 1.653, + "step": 2503 + }, + { + "epoch": 0.2641350210970464, + "grad_norm": 0.8445788025856018, + "learning_rate": 0.0012690741585369748, + "loss": 1.6237, + "step": 2504 + }, + { + "epoch": 0.26424050632911394, + "grad_norm": 0.8107191324234009, + "learning_rate": 0.0012688929139487869, + "loss": 1.6798, + "step": 2505 + }, + { + "epoch": 0.26434599156118144, + "grad_norm": 0.6566908359527588, + "learning_rate": 0.0012687116112160607, + "loss": 1.6149, + "step": 2506 + }, + { + "epoch": 0.26445147679324893, + "grad_norm": 0.7817515134811401, + "learning_rate": 0.0012685302503591118, + "loss": 1.6584, + "step": 2507 + }, + { + "epoch": 0.2645569620253165, + "grad_norm": 0.8772174119949341, + "learning_rate": 0.0012683488313982628, + "loss": 1.6355, + "step": 2508 + }, + { + "epoch": 0.26466244725738397, + "grad_norm": 0.783196747303009, + "learning_rate": 0.0012681673543538427, + "loss": 1.6297, + "step": 2509 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.6242533326148987, + "learning_rate": 0.0012679858192461864, + "loss": 1.6748, + "step": 2510 + }, + { + "epoch": 0.264873417721519, + "grad_norm": 0.757429838180542, + "learning_rate": 0.0012678042260956363, + "loss": 1.6802, + "step": 2511 + }, + { + "epoch": 0.2649789029535865, + "grad_norm": 0.8071131110191345, + "learning_rate": 0.0012676225749225407, + "loss": 1.6551, + "step": 2512 + }, + { + "epoch": 0.265084388185654, + "grad_norm": 0.684274435043335, + "learning_rate": 0.0012674408657472542, + "loss": 1.6193, + "step": 2513 + }, + { + "epoch": 0.26518987341772154, + "grad_norm": 0.6977252960205078, + "learning_rate": 0.0012672590985901386, + "loss": 1.638, + "step": 2514 + }, + { + "epoch": 0.26529535864978904, + "grad_norm": 0.7712351083755493, + "learning_rate": 0.001267077273471562, + "loss": 1.6324, + "step": 2515 + }, + { + "epoch": 0.26540084388185653, + "grad_norm": 0.714293360710144, + "learning_rate": 0.0012668953904118984, + "loss": 1.6546, + "step": 2516 + }, + { + "epoch": 0.2655063291139241, + "grad_norm": 0.6730122566223145, + "learning_rate": 0.001266713449431529, + "loss": 1.6597, + "step": 2517 + }, + { + "epoch": 0.26561181434599157, + "grad_norm": 0.7242845296859741, + "learning_rate": 0.0012665314505508406, + "loss": 1.6643, + "step": 2518 + }, + { + "epoch": 0.26571729957805906, + "grad_norm": 0.6487644910812378, + "learning_rate": 0.0012663493937902278, + "loss": 1.6593, + "step": 2519 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.6883183121681213, + "learning_rate": 0.0012661672791700906, + "loss": 1.6579, + "step": 2520 + }, + { + "epoch": 0.2659282700421941, + "grad_norm": 0.7534651160240173, + "learning_rate": 0.001265985106710836, + "loss": 1.6528, + "step": 2521 + }, + { + "epoch": 0.2660337552742616, + "grad_norm": 0.9104098081588745, + "learning_rate": 0.0012658028764328771, + "loss": 1.6402, + "step": 2522 + }, + { + "epoch": 0.2661392405063291, + "grad_norm": 1.088137149810791, + "learning_rate": 0.0012656205883566339, + "loss": 1.6761, + "step": 2523 + }, + { + "epoch": 0.26624472573839664, + "grad_norm": 0.769451916217804, + "learning_rate": 0.0012654382425025328, + "loss": 1.5975, + "step": 2524 + }, + { + "epoch": 0.26635021097046413, + "grad_norm": 0.7039145827293396, + "learning_rate": 0.0012652558388910062, + "loss": 1.6091, + "step": 2525 + }, + { + "epoch": 0.2664556962025316, + "grad_norm": 0.9848747253417969, + "learning_rate": 0.0012650733775424938, + "loss": 1.6334, + "step": 2526 + }, + { + "epoch": 0.26656118143459917, + "grad_norm": 0.8245536684989929, + "learning_rate": 0.001264890858477441, + "loss": 1.6414, + "step": 2527 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.668037474155426, + "learning_rate": 0.0012647082817162998, + "loss": 1.6486, + "step": 2528 + }, + { + "epoch": 0.26677215189873416, + "grad_norm": 1.1848117113113403, + "learning_rate": 0.0012645256472795295, + "loss": 1.7401, + "step": 2529 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 1.1622365713119507, + "learning_rate": 0.0012643429551875945, + "loss": 1.6397, + "step": 2530 + }, + { + "epoch": 0.2669831223628692, + "grad_norm": 0.6911751627922058, + "learning_rate": 0.0012641602054609662, + "loss": 1.6368, + "step": 2531 + }, + { + "epoch": 0.2670886075949367, + "grad_norm": 1.4435443878173828, + "learning_rate": 0.0012639773981201238, + "loss": 1.647, + "step": 2532 + }, + { + "epoch": 0.26719409282700424, + "grad_norm": 0.7966960072517395, + "learning_rate": 0.0012637945331855506, + "loss": 1.5829, + "step": 2533 + }, + { + "epoch": 0.26729957805907173, + "grad_norm": 0.9690499305725098, + "learning_rate": 0.0012636116106777382, + "loss": 1.634, + "step": 2534 + }, + { + "epoch": 0.2674050632911392, + "grad_norm": 1.162021279335022, + "learning_rate": 0.0012634286306171835, + "loss": 1.6944, + "step": 2535 + }, + { + "epoch": 0.26751054852320677, + "grad_norm": 0.7967457175254822, + "learning_rate": 0.0012632455930243907, + "loss": 1.6444, + "step": 2536 + }, + { + "epoch": 0.26761603375527426, + "grad_norm": 1.019643783569336, + "learning_rate": 0.0012630624979198697, + "loss": 1.6487, + "step": 2537 + }, + { + "epoch": 0.26772151898734176, + "grad_norm": 1.1604700088500977, + "learning_rate": 0.0012628793453241377, + "loss": 1.6986, + "step": 2538 + }, + { + "epoch": 0.2678270042194093, + "grad_norm": 0.7793677449226379, + "learning_rate": 0.0012626961352577174, + "loss": 1.6573, + "step": 2539 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 1.3199818134307861, + "learning_rate": 0.0012625128677411388, + "loss": 1.6417, + "step": 2540 + }, + { + "epoch": 0.2680379746835443, + "grad_norm": 0.932346761226654, + "learning_rate": 0.0012623295427949377, + "loss": 1.6413, + "step": 2541 + }, + { + "epoch": 0.26814345991561184, + "grad_norm": 0.884177565574646, + "learning_rate": 0.0012621461604396566, + "loss": 1.6749, + "step": 2542 + }, + { + "epoch": 0.26824894514767933, + "grad_norm": 1.1009846925735474, + "learning_rate": 0.0012619627206958445, + "loss": 1.6382, + "step": 2543 + }, + { + "epoch": 0.2683544303797468, + "grad_norm": 0.9186540842056274, + "learning_rate": 0.0012617792235840564, + "loss": 1.6183, + "step": 2544 + }, + { + "epoch": 0.26845991561181437, + "grad_norm": 0.6290854811668396, + "learning_rate": 0.0012615956691248544, + "loss": 1.6493, + "step": 2545 + }, + { + "epoch": 0.26856540084388186, + "grad_norm": 0.7433156371116638, + "learning_rate": 0.001261412057338807, + "loss": 1.6377, + "step": 2546 + }, + { + "epoch": 0.26867088607594936, + "grad_norm": 0.7771925926208496, + "learning_rate": 0.0012612283882464882, + "loss": 1.63, + "step": 2547 + }, + { + "epoch": 0.2687763713080169, + "grad_norm": 0.7061017751693726, + "learning_rate": 0.0012610446618684793, + "loss": 1.6434, + "step": 2548 + }, + { + "epoch": 0.2688818565400844, + "grad_norm": 0.7132526636123657, + "learning_rate": 0.0012608608782253676, + "loss": 1.6462, + "step": 2549 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.7587828636169434, + "learning_rate": 0.0012606770373377475, + "loss": 1.633, + "step": 2550 + }, + { + "epoch": 0.26909282700421944, + "grad_norm": 0.6880494356155396, + "learning_rate": 0.0012604931392262186, + "loss": 1.6643, + "step": 2551 + }, + { + "epoch": 0.26919831223628693, + "grad_norm": 0.9326207637786865, + "learning_rate": 0.001260309183911388, + "loss": 1.6731, + "step": 2552 + }, + { + "epoch": 0.2693037974683544, + "grad_norm": 1.0161691904067993, + "learning_rate": 0.0012601251714138683, + "loss": 1.6828, + "step": 2553 + }, + { + "epoch": 0.2694092827004219, + "grad_norm": 0.7566787004470825, + "learning_rate": 0.0012599411017542798, + "loss": 1.6255, + "step": 2554 + }, + { + "epoch": 0.26951476793248946, + "grad_norm": 0.7152695059776306, + "learning_rate": 0.0012597569749532482, + "loss": 1.6472, + "step": 2555 + }, + { + "epoch": 0.26962025316455696, + "grad_norm": 0.738936185836792, + "learning_rate": 0.0012595727910314056, + "loss": 1.6584, + "step": 2556 + }, + { + "epoch": 0.26972573839662445, + "grad_norm": 0.7279407978057861, + "learning_rate": 0.0012593885500093906, + "loss": 1.6347, + "step": 2557 + }, + { + "epoch": 0.269831223628692, + "grad_norm": 0.7134959697723389, + "learning_rate": 0.0012592042519078486, + "loss": 1.6485, + "step": 2558 + }, + { + "epoch": 0.2699367088607595, + "grad_norm": 0.9286974668502808, + "learning_rate": 0.0012590198967474312, + "loss": 1.6549, + "step": 2559 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.8098654747009277, + "learning_rate": 0.0012588354845487959, + "loss": 1.603, + "step": 2560 + }, + { + "epoch": 0.27014767932489453, + "grad_norm": 0.717017650604248, + "learning_rate": 0.0012586510153326075, + "loss": 1.6229, + "step": 2561 + }, + { + "epoch": 0.270253164556962, + "grad_norm": 0.9235054850578308, + "learning_rate": 0.0012584664891195365, + "loss": 1.6675, + "step": 2562 + }, + { + "epoch": 0.2703586497890295, + "grad_norm": 0.7850826382637024, + "learning_rate": 0.0012582819059302598, + "loss": 1.6841, + "step": 2563 + }, + { + "epoch": 0.27046413502109706, + "grad_norm": 0.6891984343528748, + "learning_rate": 0.001258097265785461, + "loss": 1.6311, + "step": 2564 + }, + { + "epoch": 0.27056962025316456, + "grad_norm": 0.684668242931366, + "learning_rate": 0.0012579125687058302, + "loss": 1.6036, + "step": 2565 + }, + { + "epoch": 0.27067510548523205, + "grad_norm": 0.7330654859542847, + "learning_rate": 0.0012577278147120632, + "loss": 1.6198, + "step": 2566 + }, + { + "epoch": 0.2707805907172996, + "grad_norm": 0.7428572177886963, + "learning_rate": 0.0012575430038248628, + "loss": 1.6713, + "step": 2567 + }, + { + "epoch": 0.2708860759493671, + "grad_norm": 0.9125249981880188, + "learning_rate": 0.001257358136064938, + "loss": 1.6386, + "step": 2568 + }, + { + "epoch": 0.2709915611814346, + "grad_norm": 0.8294754028320312, + "learning_rate": 0.001257173211453004, + "loss": 1.5967, + "step": 2569 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.683096170425415, + "learning_rate": 0.001256988230009783, + "loss": 1.6702, + "step": 2570 + }, + { + "epoch": 0.2712025316455696, + "grad_norm": 0.6866984963417053, + "learning_rate": 0.0012568031917560027, + "loss": 1.6065, + "step": 2571 + }, + { + "epoch": 0.2713080168776371, + "grad_norm": 0.6915073990821838, + "learning_rate": 0.0012566180967123976, + "loss": 1.6417, + "step": 2572 + }, + { + "epoch": 0.27141350210970466, + "grad_norm": 0.6664041876792908, + "learning_rate": 0.0012564329448997082, + "loss": 1.6422, + "step": 2573 + }, + { + "epoch": 0.27151898734177216, + "grad_norm": 0.6869789361953735, + "learning_rate": 0.0012562477363386821, + "loss": 1.6397, + "step": 2574 + }, + { + "epoch": 0.27162447257383965, + "grad_norm": 0.7732247710227966, + "learning_rate": 0.0012560624710500731, + "loss": 1.6387, + "step": 2575 + }, + { + "epoch": 0.2717299578059072, + "grad_norm": 0.7859042286872864, + "learning_rate": 0.0012558771490546407, + "loss": 1.6086, + "step": 2576 + }, + { + "epoch": 0.2718354430379747, + "grad_norm": 0.7394629716873169, + "learning_rate": 0.0012556917703731509, + "loss": 1.6532, + "step": 2577 + }, + { + "epoch": 0.2719409282700422, + "grad_norm": 0.657219648361206, + "learning_rate": 0.0012555063350263768, + "loss": 1.6997, + "step": 2578 + }, + { + "epoch": 0.27204641350210973, + "grad_norm": 0.7112653851509094, + "learning_rate": 0.0012553208430350973, + "loss": 1.6428, + "step": 2579 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.6749873161315918, + "learning_rate": 0.0012551352944200976, + "loss": 1.6387, + "step": 2580 + }, + { + "epoch": 0.2722573839662447, + "grad_norm": 0.6700681447982788, + "learning_rate": 0.0012549496892021693, + "loss": 1.6214, + "step": 2581 + }, + { + "epoch": 0.27236286919831226, + "grad_norm": 0.774787425994873, + "learning_rate": 0.0012547640274021103, + "loss": 1.6503, + "step": 2582 + }, + { + "epoch": 0.27246835443037976, + "grad_norm": 0.7524164319038391, + "learning_rate": 0.001254578309040725, + "loss": 1.6433, + "step": 2583 + }, + { + "epoch": 0.27257383966244725, + "grad_norm": 0.8102473616600037, + "learning_rate": 0.001254392534138824, + "loss": 1.6521, + "step": 2584 + }, + { + "epoch": 0.27267932489451474, + "grad_norm": 0.7593845725059509, + "learning_rate": 0.0012542067027172248, + "loss": 1.6679, + "step": 2585 + }, + { + "epoch": 0.2727848101265823, + "grad_norm": 0.7499566674232483, + "learning_rate": 0.0012540208147967503, + "loss": 1.6374, + "step": 2586 + }, + { + "epoch": 0.2728902953586498, + "grad_norm": 1.1159844398498535, + "learning_rate": 0.00125383487039823, + "loss": 1.6661, + "step": 2587 + }, + { + "epoch": 0.2729957805907173, + "grad_norm": 1.2443692684173584, + "learning_rate": 0.0012536488695425003, + "loss": 1.6619, + "step": 2588 + }, + { + "epoch": 0.2731012658227848, + "grad_norm": 0.7559158205986023, + "learning_rate": 0.0012534628122504031, + "loss": 1.6369, + "step": 2589 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.9439697861671448, + "learning_rate": 0.0012532766985427874, + "loss": 1.6555, + "step": 2590 + }, + { + "epoch": 0.2733122362869198, + "grad_norm": 1.1905267238616943, + "learning_rate": 0.0012530905284405083, + "loss": 1.6523, + "step": 2591 + }, + { + "epoch": 0.27341772151898736, + "grad_norm": 0.7238471508026123, + "learning_rate": 0.0012529043019644266, + "loss": 1.6644, + "step": 2592 + }, + { + "epoch": 0.27352320675105485, + "grad_norm": 0.9727035164833069, + "learning_rate": 0.0012527180191354104, + "loss": 1.6516, + "step": 2593 + }, + { + "epoch": 0.27362869198312234, + "grad_norm": 1.075106143951416, + "learning_rate": 0.0012525316799743332, + "loss": 1.6607, + "step": 2594 + }, + { + "epoch": 0.2737341772151899, + "grad_norm": 0.6668490171432495, + "learning_rate": 0.0012523452845020755, + "loss": 1.6776, + "step": 2595 + }, + { + "epoch": 0.2738396624472574, + "grad_norm": 0.8466182947158813, + "learning_rate": 0.0012521588327395236, + "loss": 1.6465, + "step": 2596 + }, + { + "epoch": 0.2739451476793249, + "grad_norm": 1.0160082578659058, + "learning_rate": 0.0012519723247075706, + "loss": 1.6327, + "step": 2597 + }, + { + "epoch": 0.2740506329113924, + "grad_norm": 0.8699989914894104, + "learning_rate": 0.0012517857604271156, + "loss": 1.6083, + "step": 2598 + }, + { + "epoch": 0.2741561181434599, + "grad_norm": 0.695787787437439, + "learning_rate": 0.001251599139919064, + "loss": 1.6248, + "step": 2599 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.705179750919342, + "learning_rate": 0.0012514124632043272, + "loss": 1.6722, + "step": 2600 + }, + { + "epoch": 0.27436708860759496, + "grad_norm": 0.6471294164657593, + "learning_rate": 0.001251225730303824, + "loss": 1.6189, + "step": 2601 + }, + { + "epoch": 0.27447257383966245, + "grad_norm": 0.6597792506217957, + "learning_rate": 0.0012510389412384785, + "loss": 1.6383, + "step": 2602 + }, + { + "epoch": 0.27457805907172994, + "grad_norm": 0.6893908977508545, + "learning_rate": 0.001250852096029221, + "loss": 1.5708, + "step": 2603 + }, + { + "epoch": 0.2746835443037975, + "grad_norm": 0.7250192165374756, + "learning_rate": 0.0012506651946969888, + "loss": 1.6525, + "step": 2604 + }, + { + "epoch": 0.274789029535865, + "grad_norm": 0.8630037903785706, + "learning_rate": 0.0012504782372627248, + "loss": 1.6779, + "step": 2605 + }, + { + "epoch": 0.2748945147679325, + "grad_norm": 0.709363579750061, + "learning_rate": 0.0012502912237473789, + "loss": 1.6647, + "step": 2606 + }, + { + "epoch": 0.275, + "grad_norm": 0.7350055575370789, + "learning_rate": 0.0012501041541719067, + "loss": 1.6619, + "step": 2607 + }, + { + "epoch": 0.2751054852320675, + "grad_norm": 0.8259351253509521, + "learning_rate": 0.0012499170285572702, + "loss": 1.6324, + "step": 2608 + }, + { + "epoch": 0.275210970464135, + "grad_norm": 0.6789529919624329, + "learning_rate": 0.0012497298469244377, + "loss": 1.657, + "step": 2609 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.7896199822425842, + "learning_rate": 0.0012495426092943842, + "loss": 1.6506, + "step": 2610 + }, + { + "epoch": 0.27542194092827005, + "grad_norm": 0.7265750765800476, + "learning_rate": 0.0012493553156880904, + "loss": 1.6654, + "step": 2611 + }, + { + "epoch": 0.27552742616033754, + "grad_norm": 0.802940309047699, + "learning_rate": 0.0012491679661265434, + "loss": 1.6567, + "step": 2612 + }, + { + "epoch": 0.2756329113924051, + "grad_norm": 0.7558470964431763, + "learning_rate": 0.0012489805606307367, + "loss": 1.6692, + "step": 2613 + }, + { + "epoch": 0.2757383966244726, + "grad_norm": 0.7041638493537903, + "learning_rate": 0.00124879309922167, + "loss": 1.6394, + "step": 2614 + }, + { + "epoch": 0.2758438818565401, + "grad_norm": 0.785492479801178, + "learning_rate": 0.0012486055819203494, + "loss": 1.6437, + "step": 2615 + }, + { + "epoch": 0.2759493670886076, + "grad_norm": 0.6599269509315491, + "learning_rate": 0.001248418008747787, + "loss": 1.6227, + "step": 2616 + }, + { + "epoch": 0.2760548523206751, + "grad_norm": 0.6886391043663025, + "learning_rate": 0.0012482303797250014, + "loss": 1.6301, + "step": 2617 + }, + { + "epoch": 0.2761603375527426, + "grad_norm": 0.8918367028236389, + "learning_rate": 0.0012480426948730174, + "loss": 1.6221, + "step": 2618 + }, + { + "epoch": 0.2762658227848101, + "grad_norm": 1.1233196258544922, + "learning_rate": 0.001247854954212866, + "loss": 1.649, + "step": 2619 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.8642522692680359, + "learning_rate": 0.0012476671577655845, + "loss": 1.625, + "step": 2620 + }, + { + "epoch": 0.27647679324894514, + "grad_norm": 0.6912023425102234, + "learning_rate": 0.001247479305552216, + "loss": 1.6236, + "step": 2621 + }, + { + "epoch": 0.27658227848101263, + "grad_norm": 1.0860774517059326, + "learning_rate": 0.001247291397593811, + "loss": 1.603, + "step": 2622 + }, + { + "epoch": 0.2766877637130802, + "grad_norm": 1.0685538053512573, + "learning_rate": 0.001247103433911425, + "loss": 1.6161, + "step": 2623 + }, + { + "epoch": 0.2767932489451477, + "grad_norm": 0.8379772901535034, + "learning_rate": 0.0012469154145261208, + "loss": 1.6778, + "step": 2624 + }, + { + "epoch": 0.27689873417721517, + "grad_norm": 0.8333815336227417, + "learning_rate": 0.0012467273394589664, + "loss": 1.6222, + "step": 2625 + }, + { + "epoch": 0.2770042194092827, + "grad_norm": 1.1944139003753662, + "learning_rate": 0.0012465392087310366, + "loss": 1.6249, + "step": 2626 + }, + { + "epoch": 0.2771097046413502, + "grad_norm": 0.8926601409912109, + "learning_rate": 0.0012463510223634125, + "loss": 1.6262, + "step": 2627 + }, + { + "epoch": 0.2772151898734177, + "grad_norm": 0.7094196081161499, + "learning_rate": 0.0012461627803771812, + "loss": 1.6593, + "step": 2628 + }, + { + "epoch": 0.27732067510548525, + "grad_norm": 0.8786176443099976, + "learning_rate": 0.0012459744827934367, + "loss": 1.6296, + "step": 2629 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 1.2719541788101196, + "learning_rate": 0.0012457861296332774, + "loss": 1.691, + "step": 2630 + }, + { + "epoch": 0.27753164556962023, + "grad_norm": 0.7598257064819336, + "learning_rate": 0.0012455977209178109, + "loss": 1.6506, + "step": 2631 + }, + { + "epoch": 0.2776371308016878, + "grad_norm": 1.2018969058990479, + "learning_rate": 0.0012454092566681482, + "loss": 1.6549, + "step": 2632 + }, + { + "epoch": 0.2777426160337553, + "grad_norm": 0.9543249011039734, + "learning_rate": 0.001245220736905408, + "loss": 1.619, + "step": 2633 + }, + { + "epoch": 0.27784810126582277, + "grad_norm": 0.9191215634346008, + "learning_rate": 0.0012450321616507148, + "loss": 1.6583, + "step": 2634 + }, + { + "epoch": 0.2779535864978903, + "grad_norm": 1.545577049255371, + "learning_rate": 0.0012448435309251995, + "loss": 1.6993, + "step": 2635 + }, + { + "epoch": 0.2780590717299578, + "grad_norm": 0.9537524580955505, + "learning_rate": 0.001244654844749999, + "loss": 1.6366, + "step": 2636 + }, + { + "epoch": 0.2781645569620253, + "grad_norm": 1.9738924503326416, + "learning_rate": 0.0012444661031462566, + "loss": 1.6779, + "step": 2637 + }, + { + "epoch": 0.27827004219409285, + "grad_norm": 1.8190100193023682, + "learning_rate": 0.0012442773061351216, + "loss": 1.6355, + "step": 2638 + }, + { + "epoch": 0.27837552742616034, + "grad_norm": 1.5301991701126099, + "learning_rate": 0.0012440884537377498, + "loss": 1.649, + "step": 2639 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.5671275854110718, + "learning_rate": 0.001243899545975303, + "loss": 1.6443, + "step": 2640 + }, + { + "epoch": 0.2785864978902954, + "grad_norm": 1.2427258491516113, + "learning_rate": 0.0012437105828689494, + "loss": 1.6616, + "step": 2641 + }, + { + "epoch": 0.2786919831223629, + "grad_norm": 1.1113601922988892, + "learning_rate": 0.0012435215644398632, + "loss": 1.6153, + "step": 2642 + }, + { + "epoch": 0.27879746835443037, + "grad_norm": 1.5303798913955688, + "learning_rate": 0.0012433324907092243, + "loss": 1.6244, + "step": 2643 + }, + { + "epoch": 0.2789029535864979, + "grad_norm": 1.1876270771026611, + "learning_rate": 0.0012431433616982204, + "loss": 1.597, + "step": 2644 + }, + { + "epoch": 0.2790084388185654, + "grad_norm": 1.772317886352539, + "learning_rate": 0.0012429541774280435, + "loss": 1.6859, + "step": 2645 + }, + { + "epoch": 0.2791139240506329, + "grad_norm": 1.4478439092636108, + "learning_rate": 0.0012427649379198932, + "loss": 1.6452, + "step": 2646 + }, + { + "epoch": 0.27921940928270045, + "grad_norm": 1.393757939338684, + "learning_rate": 0.0012425756431949742, + "loss": 1.6509, + "step": 2647 + }, + { + "epoch": 0.27932489451476794, + "grad_norm": 1.176690697669983, + "learning_rate": 0.001242386293274498, + "loss": 1.6493, + "step": 2648 + }, + { + "epoch": 0.27943037974683543, + "grad_norm": 1.2514933347702026, + "learning_rate": 0.0012421968881796827, + "loss": 1.6375, + "step": 2649 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 1.0940608978271484, + "learning_rate": 0.0012420074279317515, + "loss": 1.6286, + "step": 2650 + }, + { + "epoch": 0.2796413502109705, + "grad_norm": 1.3726834058761597, + "learning_rate": 0.001241817912551935, + "loss": 1.6114, + "step": 2651 + }, + { + "epoch": 0.27974683544303797, + "grad_norm": 0.9643866419792175, + "learning_rate": 0.0012416283420614686, + "loss": 1.6436, + "step": 2652 + }, + { + "epoch": 0.27985232067510546, + "grad_norm": 1.233285665512085, + "learning_rate": 0.0012414387164815953, + "loss": 1.6463, + "step": 2653 + }, + { + "epoch": 0.279957805907173, + "grad_norm": 0.9728710055351257, + "learning_rate": 0.001241249035833563, + "loss": 1.6496, + "step": 2654 + }, + { + "epoch": 0.2800632911392405, + "grad_norm": 1.3090026378631592, + "learning_rate": 0.0012410593001386267, + "loss": 1.6153, + "step": 2655 + }, + { + "epoch": 0.280168776371308, + "grad_norm": 0.9553221464157104, + "learning_rate": 0.0012408695094180474, + "loss": 1.6189, + "step": 2656 + }, + { + "epoch": 0.28027426160337554, + "grad_norm": 1.226815104484558, + "learning_rate": 0.0012406796636930918, + "loss": 1.5974, + "step": 2657 + }, + { + "epoch": 0.28037974683544303, + "grad_norm": 0.8489522337913513, + "learning_rate": 0.001240489762985033, + "loss": 1.5712, + "step": 2658 + }, + { + "epoch": 0.2804852320675105, + "grad_norm": 1.1510286331176758, + "learning_rate": 0.0012402998073151505, + "loss": 1.6428, + "step": 2659 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.7797077298164368, + "learning_rate": 0.0012401097967047298, + "loss": 1.6274, + "step": 2660 + }, + { + "epoch": 0.28069620253164557, + "grad_norm": 0.9535449743270874, + "learning_rate": 0.0012399197311750623, + "loss": 1.6493, + "step": 2661 + }, + { + "epoch": 0.28080168776371306, + "grad_norm": 0.8129039406776428, + "learning_rate": 0.001239729610747446, + "loss": 1.6337, + "step": 2662 + }, + { + "epoch": 0.2809071729957806, + "grad_norm": 0.8036606311798096, + "learning_rate": 0.001239539435443185, + "loss": 1.6169, + "step": 2663 + }, + { + "epoch": 0.2810126582278481, + "grad_norm": 0.7693716883659363, + "learning_rate": 0.001239349205283589, + "loss": 1.6115, + "step": 2664 + }, + { + "epoch": 0.2811181434599156, + "grad_norm": 0.7182550430297852, + "learning_rate": 0.0012391589202899746, + "loss": 1.6269, + "step": 2665 + }, + { + "epoch": 0.28122362869198314, + "grad_norm": 0.7046446800231934, + "learning_rate": 0.001238968580483664, + "loss": 1.6275, + "step": 2666 + }, + { + "epoch": 0.28132911392405063, + "grad_norm": 0.7625762820243835, + "learning_rate": 0.0012387781858859857, + "loss": 1.6144, + "step": 2667 + }, + { + "epoch": 0.2814345991561181, + "grad_norm": 0.7335032224655151, + "learning_rate": 0.0012385877365182743, + "loss": 1.6179, + "step": 2668 + }, + { + "epoch": 0.2815400843881857, + "grad_norm": 0.70329350233078, + "learning_rate": 0.0012383972324018708, + "loss": 1.6352, + "step": 2669 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.7936944961547852, + "learning_rate": 0.001238206673558122, + "loss": 1.6173, + "step": 2670 + }, + { + "epoch": 0.28175105485232066, + "grad_norm": 0.719795823097229, + "learning_rate": 0.001238016060008381, + "loss": 1.6567, + "step": 2671 + }, + { + "epoch": 0.2818565400843882, + "grad_norm": 0.7076047658920288, + "learning_rate": 0.0012378253917740072, + "loss": 1.6351, + "step": 2672 + }, + { + "epoch": 0.2819620253164557, + "grad_norm": 1.146682858467102, + "learning_rate": 0.0012376346688763656, + "loss": 1.6788, + "step": 2673 + }, + { + "epoch": 0.2820675105485232, + "grad_norm": 0.739061713218689, + "learning_rate": 0.0012374438913368277, + "loss": 1.6007, + "step": 2674 + }, + { + "epoch": 0.28217299578059074, + "grad_norm": 0.885874330997467, + "learning_rate": 0.0012372530591767711, + "loss": 1.6285, + "step": 2675 + }, + { + "epoch": 0.28227848101265823, + "grad_norm": 1.1954102516174316, + "learning_rate": 0.0012370621724175797, + "loss": 1.6409, + "step": 2676 + }, + { + "epoch": 0.2823839662447257, + "grad_norm": 0.6706337928771973, + "learning_rate": 0.0012368712310806432, + "loss": 1.5978, + "step": 2677 + }, + { + "epoch": 0.2824894514767933, + "grad_norm": 0.8663415312767029, + "learning_rate": 0.0012366802351873574, + "loss": 1.5974, + "step": 2678 + }, + { + "epoch": 0.28259493670886077, + "grad_norm": 0.7844992280006409, + "learning_rate": 0.0012364891847591246, + "loss": 1.6446, + "step": 2679 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.7213519811630249, + "learning_rate": 0.0012362980798173526, + "loss": 1.6145, + "step": 2680 + }, + { + "epoch": 0.2828059071729958, + "grad_norm": 0.7322648763656616, + "learning_rate": 0.0012361069203834561, + "loss": 1.6062, + "step": 2681 + }, + { + "epoch": 0.2829113924050633, + "grad_norm": 0.6675441861152649, + "learning_rate": 0.0012359157064788548, + "loss": 1.5847, + "step": 2682 + }, + { + "epoch": 0.2830168776371308, + "grad_norm": 0.7546653747558594, + "learning_rate": 0.0012357244381249759, + "loss": 1.6488, + "step": 2683 + }, + { + "epoch": 0.2831223628691983, + "grad_norm": 0.8150287866592407, + "learning_rate": 0.0012355331153432517, + "loss": 1.647, + "step": 2684 + }, + { + "epoch": 0.28322784810126583, + "grad_norm": 0.6966265439987183, + "learning_rate": 0.0012353417381551206, + "loss": 1.6033, + "step": 2685 + }, + { + "epoch": 0.2833333333333333, + "grad_norm": 0.6496126651763916, + "learning_rate": 0.001235150306582028, + "loss": 1.6242, + "step": 2686 + }, + { + "epoch": 0.2834388185654008, + "grad_norm": 0.7338182330131531, + "learning_rate": 0.001234958820645424, + "loss": 1.6432, + "step": 2687 + }, + { + "epoch": 0.28354430379746837, + "grad_norm": 0.7376387119293213, + "learning_rate": 0.0012347672803667662, + "loss": 1.624, + "step": 2688 + }, + { + "epoch": 0.28364978902953586, + "grad_norm": 0.6296065449714661, + "learning_rate": 0.0012345756857675171, + "loss": 1.6383, + "step": 2689 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.6635739207267761, + "learning_rate": 0.0012343840368691462, + "loss": 1.6754, + "step": 2690 + }, + { + "epoch": 0.2838607594936709, + "grad_norm": 0.6771047711372375, + "learning_rate": 0.0012341923336931287, + "loss": 1.6389, + "step": 2691 + }, + { + "epoch": 0.2839662447257384, + "grad_norm": 0.6565672159194946, + "learning_rate": 0.0012340005762609457, + "loss": 1.6109, + "step": 2692 + }, + { + "epoch": 0.2840717299578059, + "grad_norm": 0.931950032711029, + "learning_rate": 0.0012338087645940847, + "loss": 1.6598, + "step": 2693 + }, + { + "epoch": 0.28417721518987343, + "grad_norm": 1.0284841060638428, + "learning_rate": 0.001233616898714039, + "loss": 1.6005, + "step": 2694 + }, + { + "epoch": 0.2842827004219409, + "grad_norm": 0.8755816221237183, + "learning_rate": 0.0012334249786423086, + "loss": 1.6486, + "step": 2695 + }, + { + "epoch": 0.2843881856540084, + "grad_norm": 0.6699270009994507, + "learning_rate": 0.0012332330044003987, + "loss": 1.6683, + "step": 2696 + }, + { + "epoch": 0.28449367088607597, + "grad_norm": 0.8623033165931702, + "learning_rate": 0.0012330409760098208, + "loss": 1.6136, + "step": 2697 + }, + { + "epoch": 0.28459915611814346, + "grad_norm": 0.7619473338127136, + "learning_rate": 0.0012328488934920932, + "loss": 1.6311, + "step": 2698 + }, + { + "epoch": 0.28470464135021095, + "grad_norm": 0.6894973516464233, + "learning_rate": 0.001232656756868739, + "loss": 1.6133, + "step": 2699 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 1.1020275354385376, + "learning_rate": 0.0012324645661612886, + "loss": 1.5926, + "step": 2700 + }, + { + "epoch": 0.284915611814346, + "grad_norm": 1.1548385620117188, + "learning_rate": 0.001232272321391278, + "loss": 1.666, + "step": 2701 + }, + { + "epoch": 0.2850210970464135, + "grad_norm": 0.6654823422431946, + "learning_rate": 0.0012320800225802488, + "loss": 1.6463, + "step": 2702 + }, + { + "epoch": 0.28512658227848103, + "grad_norm": 0.7321997880935669, + "learning_rate": 0.001231887669749749, + "loss": 1.644, + "step": 2703 + }, + { + "epoch": 0.2852320675105485, + "grad_norm": 0.6755750775337219, + "learning_rate": 0.0012316952629213332, + "loss": 1.6102, + "step": 2704 + }, + { + "epoch": 0.285337552742616, + "grad_norm": 0.7617190480232239, + "learning_rate": 0.001231502802116561, + "loss": 1.6472, + "step": 2705 + }, + { + "epoch": 0.28544303797468357, + "grad_norm": 0.6864871382713318, + "learning_rate": 0.0012313102873569993, + "loss": 1.628, + "step": 2706 + }, + { + "epoch": 0.28554852320675106, + "grad_norm": 0.7773123383522034, + "learning_rate": 0.0012311177186642194, + "loss": 1.6286, + "step": 2707 + }, + { + "epoch": 0.28565400843881855, + "grad_norm": 0.6974926590919495, + "learning_rate": 0.0012309250960598, + "loss": 1.6078, + "step": 2708 + }, + { + "epoch": 0.2857594936708861, + "grad_norm": 0.8668237924575806, + "learning_rate": 0.0012307324195653256, + "loss": 1.6132, + "step": 2709 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.77423495054245, + "learning_rate": 0.0012305396892023867, + "loss": 1.637, + "step": 2710 + }, + { + "epoch": 0.2859704641350211, + "grad_norm": 0.7951544523239136, + "learning_rate": 0.0012303469049925791, + "loss": 1.6338, + "step": 2711 + }, + { + "epoch": 0.28607594936708863, + "grad_norm": 0.8779679536819458, + "learning_rate": 0.001230154066957506, + "loss": 1.6717, + "step": 2712 + }, + { + "epoch": 0.2861814345991561, + "grad_norm": 0.6584620475769043, + "learning_rate": 0.001229961175118775, + "loss": 1.6175, + "step": 2713 + }, + { + "epoch": 0.2862869198312236, + "grad_norm": 0.8629369735717773, + "learning_rate": 0.0012297682294980013, + "loss": 1.6584, + "step": 2714 + }, + { + "epoch": 0.28639240506329117, + "grad_norm": 0.8503738641738892, + "learning_rate": 0.0012295752301168048, + "loss": 1.6493, + "step": 2715 + }, + { + "epoch": 0.28649789029535866, + "grad_norm": 0.7552770972251892, + "learning_rate": 0.0012293821769968126, + "loss": 1.6337, + "step": 2716 + }, + { + "epoch": 0.28660337552742615, + "grad_norm": 0.9436476826667786, + "learning_rate": 0.001229189070159657, + "loss": 1.6647, + "step": 2717 + }, + { + "epoch": 0.28670886075949364, + "grad_norm": 0.6887471675872803, + "learning_rate": 0.0012289959096269767, + "loss": 1.6002, + "step": 2718 + }, + { + "epoch": 0.2868143459915612, + "grad_norm": 0.7617764472961426, + "learning_rate": 0.0012288026954204165, + "loss": 1.6037, + "step": 2719 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.8882134556770325, + "learning_rate": 0.0012286094275616264, + "loss": 1.6098, + "step": 2720 + }, + { + "epoch": 0.2870253164556962, + "grad_norm": 0.839887261390686, + "learning_rate": 0.0012284161060722634, + "loss": 1.6161, + "step": 2721 + }, + { + "epoch": 0.2871308016877637, + "grad_norm": 0.810077965259552, + "learning_rate": 0.00122822273097399, + "loss": 1.6011, + "step": 2722 + }, + { + "epoch": 0.2872362869198312, + "grad_norm": 0.851706326007843, + "learning_rate": 0.0012280293022884753, + "loss": 1.6382, + "step": 2723 + }, + { + "epoch": 0.2873417721518987, + "grad_norm": 0.7951095104217529, + "learning_rate": 0.0012278358200373935, + "loss": 1.5842, + "step": 2724 + }, + { + "epoch": 0.28744725738396626, + "grad_norm": 0.7145553231239319, + "learning_rate": 0.001227642284242425, + "loss": 1.6184, + "step": 2725 + }, + { + "epoch": 0.28755274261603375, + "grad_norm": 0.7010661959648132, + "learning_rate": 0.0012274486949252572, + "loss": 1.6393, + "step": 2726 + }, + { + "epoch": 0.28765822784810124, + "grad_norm": 0.6945564150810242, + "learning_rate": 0.0012272550521075824, + "loss": 1.623, + "step": 2727 + }, + { + "epoch": 0.2877637130801688, + "grad_norm": 0.7284974455833435, + "learning_rate": 0.0012270613558110993, + "loss": 1.5984, + "step": 2728 + }, + { + "epoch": 0.2878691983122363, + "grad_norm": 0.8270639181137085, + "learning_rate": 0.001226867606057512, + "loss": 1.6373, + "step": 2729 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.8443357348442078, + "learning_rate": 0.0012266738028685318, + "loss": 1.6291, + "step": 2730 + }, + { + "epoch": 0.2880801687763713, + "grad_norm": 0.7627793550491333, + "learning_rate": 0.001226479946265875, + "loss": 1.6368, + "step": 2731 + }, + { + "epoch": 0.2881856540084388, + "grad_norm": 0.8721659183502197, + "learning_rate": 0.0012262860362712645, + "loss": 1.6402, + "step": 2732 + }, + { + "epoch": 0.2882911392405063, + "grad_norm": 0.9444838762283325, + "learning_rate": 0.0012260920729064285, + "loss": 1.6269, + "step": 2733 + }, + { + "epoch": 0.28839662447257386, + "grad_norm": 0.6343170404434204, + "learning_rate": 0.0012258980561931016, + "loss": 1.6133, + "step": 2734 + }, + { + "epoch": 0.28850210970464135, + "grad_norm": 0.811345100402832, + "learning_rate": 0.0012257039861530246, + "loss": 1.5977, + "step": 2735 + }, + { + "epoch": 0.28860759493670884, + "grad_norm": 0.7546283006668091, + "learning_rate": 0.0012255098628079439, + "loss": 1.6034, + "step": 2736 + }, + { + "epoch": 0.2887130801687764, + "grad_norm": 0.6987741589546204, + "learning_rate": 0.0012253156861796119, + "loss": 1.6051, + "step": 2737 + }, + { + "epoch": 0.2888185654008439, + "grad_norm": 0.7356367707252502, + "learning_rate": 0.0012251214562897872, + "loss": 1.6406, + "step": 2738 + }, + { + "epoch": 0.2889240506329114, + "grad_norm": 0.8317294716835022, + "learning_rate": 0.0012249271731602342, + "loss": 1.6553, + "step": 2739 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.884003758430481, + "learning_rate": 0.001224732836812723, + "loss": 1.6701, + "step": 2740 + }, + { + "epoch": 0.2891350210970464, + "grad_norm": 0.9512714147567749, + "learning_rate": 0.0012245384472690302, + "loss": 1.6212, + "step": 2741 + }, + { + "epoch": 0.2892405063291139, + "grad_norm": 0.7853246927261353, + "learning_rate": 0.0012243440045509384, + "loss": 1.6431, + "step": 2742 + }, + { + "epoch": 0.28934599156118146, + "grad_norm": 0.6707215309143066, + "learning_rate": 0.0012241495086802356, + "loss": 1.6047, + "step": 2743 + }, + { + "epoch": 0.28945147679324895, + "grad_norm": 0.8586227297782898, + "learning_rate": 0.0012239549596787158, + "loss": 1.6439, + "step": 2744 + }, + { + "epoch": 0.28955696202531644, + "grad_norm": 0.8879833221435547, + "learning_rate": 0.0012237603575681797, + "loss": 1.6426, + "step": 2745 + }, + { + "epoch": 0.289662447257384, + "grad_norm": 0.6875707507133484, + "learning_rate": 0.0012235657023704327, + "loss": 1.6374, + "step": 2746 + }, + { + "epoch": 0.2897679324894515, + "grad_norm": 0.7488815188407898, + "learning_rate": 0.001223370994107288, + "loss": 1.6191, + "step": 2747 + }, + { + "epoch": 0.289873417721519, + "grad_norm": 0.6836351156234741, + "learning_rate": 0.0012231762328005623, + "loss": 1.6415, + "step": 2748 + }, + { + "epoch": 0.28997890295358647, + "grad_norm": 0.7042399048805237, + "learning_rate": 0.0012229814184720805, + "loss": 1.657, + "step": 2749 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.6556987166404724, + "learning_rate": 0.0012227865511436724, + "loss": 1.6218, + "step": 2750 + }, + { + "epoch": 0.2901898734177215, + "grad_norm": 0.6746068000793457, + "learning_rate": 0.0012225916308371736, + "loss": 1.6149, + "step": 2751 + }, + { + "epoch": 0.290295358649789, + "grad_norm": 0.7582192420959473, + "learning_rate": 0.001222396657574426, + "loss": 1.6476, + "step": 2752 + }, + { + "epoch": 0.29040084388185655, + "grad_norm": 0.7710169553756714, + "learning_rate": 0.0012222016313772773, + "loss": 1.6123, + "step": 2753 + }, + { + "epoch": 0.29050632911392404, + "grad_norm": 0.7131015062332153, + "learning_rate": 0.0012220065522675811, + "loss": 1.654, + "step": 2754 + }, + { + "epoch": 0.29061181434599154, + "grad_norm": 0.6998159885406494, + "learning_rate": 0.0012218114202671973, + "loss": 1.6637, + "step": 2755 + }, + { + "epoch": 0.2907172995780591, + "grad_norm": 0.7233477830886841, + "learning_rate": 0.001221616235397991, + "loss": 1.642, + "step": 2756 + }, + { + "epoch": 0.2908227848101266, + "grad_norm": 0.7871367931365967, + "learning_rate": 0.001221420997681834, + "loss": 1.6154, + "step": 2757 + }, + { + "epoch": 0.29092827004219407, + "grad_norm": 0.8512035012245178, + "learning_rate": 0.0012212257071406037, + "loss": 1.6265, + "step": 2758 + }, + { + "epoch": 0.2910337552742616, + "grad_norm": 0.884881854057312, + "learning_rate": 0.0012210303637961828, + "loss": 1.6892, + "step": 2759 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.8364807963371277, + "learning_rate": 0.001220834967670461, + "loss": 1.6282, + "step": 2760 + }, + { + "epoch": 0.2912447257383966, + "grad_norm": 0.7049006819725037, + "learning_rate": 0.0012206395187853334, + "loss": 1.6505, + "step": 2761 + }, + { + "epoch": 0.29135021097046415, + "grad_norm": 0.826811671257019, + "learning_rate": 0.0012204440171627005, + "loss": 1.6634, + "step": 2762 + }, + { + "epoch": 0.29145569620253164, + "grad_norm": 0.7234025001525879, + "learning_rate": 0.00122024846282447, + "loss": 1.6623, + "step": 2763 + }, + { + "epoch": 0.29156118143459914, + "grad_norm": 0.8023393750190735, + "learning_rate": 0.0012200528557925543, + "loss": 1.5981, + "step": 2764 + }, + { + "epoch": 0.2916666666666667, + "grad_norm": 1.0209184885025024, + "learning_rate": 0.0012198571960888721, + "loss": 1.6493, + "step": 2765 + }, + { + "epoch": 0.2917721518987342, + "grad_norm": 0.9799633026123047, + "learning_rate": 0.0012196614837353481, + "loss": 1.623, + "step": 2766 + }, + { + "epoch": 0.29187763713080167, + "grad_norm": 0.80025714635849, + "learning_rate": 0.001219465718753913, + "loss": 1.6293, + "step": 2767 + }, + { + "epoch": 0.2919831223628692, + "grad_norm": 0.748406171798706, + "learning_rate": 0.0012192699011665034, + "loss": 1.6069, + "step": 2768 + }, + { + "epoch": 0.2920886075949367, + "grad_norm": 0.6499134302139282, + "learning_rate": 0.0012190740309950612, + "loss": 1.6343, + "step": 2769 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.7308557629585266, + "learning_rate": 0.0012188781082615346, + "loss": 1.6573, + "step": 2770 + }, + { + "epoch": 0.29229957805907175, + "grad_norm": 0.6848729848861694, + "learning_rate": 0.0012186821329878783, + "loss": 1.6341, + "step": 2771 + }, + { + "epoch": 0.29240506329113924, + "grad_norm": 0.6609756946563721, + "learning_rate": 0.0012184861051960517, + "loss": 1.6527, + "step": 2772 + }, + { + "epoch": 0.29251054852320674, + "grad_norm": 0.8009521961212158, + "learning_rate": 0.001218290024908021, + "loss": 1.6345, + "step": 2773 + }, + { + "epoch": 0.2926160337552743, + "grad_norm": 0.7606810927391052, + "learning_rate": 0.0012180938921457576, + "loss": 1.6263, + "step": 2774 + }, + { + "epoch": 0.2927215189873418, + "grad_norm": 0.6851853132247925, + "learning_rate": 0.00121789770693124, + "loss": 1.6164, + "step": 2775 + }, + { + "epoch": 0.29282700421940927, + "grad_norm": 0.6578633785247803, + "learning_rate": 0.001217701469286451, + "loss": 1.6349, + "step": 2776 + }, + { + "epoch": 0.2929324894514768, + "grad_norm": 0.6971039175987244, + "learning_rate": 0.00121750517923338, + "loss": 1.6387, + "step": 2777 + }, + { + "epoch": 0.2930379746835443, + "grad_norm": 0.6745924353599548, + "learning_rate": 0.0012173088367940228, + "loss": 1.6184, + "step": 2778 + }, + { + "epoch": 0.2931434599156118, + "grad_norm": 0.7943026423454285, + "learning_rate": 0.0012171124419903799, + "loss": 1.6373, + "step": 2779 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.7099438309669495, + "learning_rate": 0.0012169159948444588, + "loss": 1.6116, + "step": 2780 + }, + { + "epoch": 0.29335443037974684, + "grad_norm": 0.849506676197052, + "learning_rate": 0.001216719495378272, + "loss": 1.6312, + "step": 2781 + }, + { + "epoch": 0.29345991561181434, + "grad_norm": 0.8876755237579346, + "learning_rate": 0.0012165229436138388, + "loss": 1.6337, + "step": 2782 + }, + { + "epoch": 0.29356540084388183, + "grad_norm": 0.7696475982666016, + "learning_rate": 0.0012163263395731834, + "loss": 1.6197, + "step": 2783 + }, + { + "epoch": 0.2936708860759494, + "grad_norm": 0.7333899736404419, + "learning_rate": 0.0012161296832783363, + "loss": 1.6303, + "step": 2784 + }, + { + "epoch": 0.29377637130801687, + "grad_norm": 0.861324667930603, + "learning_rate": 0.0012159329747513338, + "loss": 1.6294, + "step": 2785 + }, + { + "epoch": 0.29388185654008436, + "grad_norm": 1.1407444477081299, + "learning_rate": 0.001215736214014218, + "loss": 1.5954, + "step": 2786 + }, + { + "epoch": 0.2939873417721519, + "grad_norm": 0.7827795147895813, + "learning_rate": 0.001215539401089037, + "loss": 1.6191, + "step": 2787 + }, + { + "epoch": 0.2940928270042194, + "grad_norm": 0.725460946559906, + "learning_rate": 0.0012153425359978452, + "loss": 1.6516, + "step": 2788 + }, + { + "epoch": 0.2941983122362869, + "grad_norm": 1.0486637353897095, + "learning_rate": 0.0012151456187627016, + "loss": 1.6581, + "step": 2789 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 1.1834450960159302, + "learning_rate": 0.001214948649405672, + "loss": 1.6312, + "step": 2790 + }, + { + "epoch": 0.29440928270042194, + "grad_norm": 0.7165926098823547, + "learning_rate": 0.0012147516279488275, + "loss": 1.6477, + "step": 2791 + }, + { + "epoch": 0.29451476793248943, + "grad_norm": 0.9313712120056152, + "learning_rate": 0.0012145545544142461, + "loss": 1.5948, + "step": 2792 + }, + { + "epoch": 0.294620253164557, + "grad_norm": 1.175876498222351, + "learning_rate": 0.00121435742882401, + "loss": 1.6356, + "step": 2793 + }, + { + "epoch": 0.29472573839662447, + "grad_norm": 0.6638351678848267, + "learning_rate": 0.001214160251200209, + "loss": 1.6409, + "step": 2794 + }, + { + "epoch": 0.29483122362869196, + "grad_norm": 1.1977322101593018, + "learning_rate": 0.0012139630215649369, + "loss": 1.6123, + "step": 2795 + }, + { + "epoch": 0.2949367088607595, + "grad_norm": 0.8269637227058411, + "learning_rate": 0.0012137657399402947, + "loss": 1.6627, + "step": 2796 + }, + { + "epoch": 0.295042194092827, + "grad_norm": 0.7425240278244019, + "learning_rate": 0.0012135684063483891, + "loss": 1.6323, + "step": 2797 + }, + { + "epoch": 0.2951476793248945, + "grad_norm": 1.1747558116912842, + "learning_rate": 0.0012133710208113318, + "loss": 1.6495, + "step": 2798 + }, + { + "epoch": 0.29525316455696204, + "grad_norm": 0.992513120174408, + "learning_rate": 0.0012131735833512411, + "loss": 1.6336, + "step": 2799 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.6781622171401978, + "learning_rate": 0.0012129760939902407, + "loss": 1.5883, + "step": 2800 + }, + { + "epoch": 0.29546413502109703, + "grad_norm": 0.9226973652839661, + "learning_rate": 0.0012127785527504603, + "loss": 1.6351, + "step": 2801 + }, + { + "epoch": 0.2955696202531646, + "grad_norm": 1.1647703647613525, + "learning_rate": 0.0012125809596540357, + "loss": 1.6084, + "step": 2802 + }, + { + "epoch": 0.29567510548523207, + "grad_norm": 0.6967902779579163, + "learning_rate": 0.0012123833147231079, + "loss": 1.6375, + "step": 2803 + }, + { + "epoch": 0.29578059071729956, + "grad_norm": 0.849210262298584, + "learning_rate": 0.0012121856179798237, + "loss": 1.6377, + "step": 2804 + }, + { + "epoch": 0.2958860759493671, + "grad_norm": 0.9073466062545776, + "learning_rate": 0.0012119878694463366, + "loss": 1.6042, + "step": 2805 + }, + { + "epoch": 0.2959915611814346, + "grad_norm": 0.7191405892372131, + "learning_rate": 0.001211790069144805, + "loss": 1.6278, + "step": 2806 + }, + { + "epoch": 0.2960970464135021, + "grad_norm": 0.7737127542495728, + "learning_rate": 0.0012115922170973935, + "loss": 1.6311, + "step": 2807 + }, + { + "epoch": 0.29620253164556964, + "grad_norm": 0.9117769002914429, + "learning_rate": 0.0012113943133262722, + "loss": 1.6141, + "step": 2808 + }, + { + "epoch": 0.29630801687763714, + "grad_norm": 0.9149655699729919, + "learning_rate": 0.0012111963578536177, + "loss": 1.6529, + "step": 2809 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.7528342604637146, + "learning_rate": 0.0012109983507016114, + "loss": 1.6252, + "step": 2810 + }, + { + "epoch": 0.2965189873417722, + "grad_norm": 0.8505373597145081, + "learning_rate": 0.0012108002918924411, + "loss": 1.6238, + "step": 2811 + }, + { + "epoch": 0.29662447257383967, + "grad_norm": 0.9905965328216553, + "learning_rate": 0.0012106021814483007, + "loss": 1.6168, + "step": 2812 + }, + { + "epoch": 0.29672995780590716, + "grad_norm": 0.7602477669715881, + "learning_rate": 0.0012104040193913884, + "loss": 1.6258, + "step": 2813 + }, + { + "epoch": 0.2968354430379747, + "grad_norm": 0.7356570959091187, + "learning_rate": 0.0012102058057439104, + "loss": 1.6663, + "step": 2814 + }, + { + "epoch": 0.2969409282700422, + "grad_norm": 0.9693005084991455, + "learning_rate": 0.001210007540528077, + "loss": 1.6386, + "step": 2815 + }, + { + "epoch": 0.2970464135021097, + "grad_norm": 0.8016921281814575, + "learning_rate": 0.0012098092237661049, + "loss": 1.5934, + "step": 2816 + }, + { + "epoch": 0.2971518987341772, + "grad_norm": 0.6700240969657898, + "learning_rate": 0.0012096108554802165, + "loss": 1.6315, + "step": 2817 + }, + { + "epoch": 0.29725738396624474, + "grad_norm": 0.8062793016433716, + "learning_rate": 0.0012094124356926397, + "loss": 1.6616, + "step": 2818 + }, + { + "epoch": 0.29736286919831223, + "grad_norm": 0.7591367959976196, + "learning_rate": 0.001209213964425609, + "loss": 1.6365, + "step": 2819 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.6977962851524353, + "learning_rate": 0.0012090154417013636, + "loss": 1.6606, + "step": 2820 + }, + { + "epoch": 0.29757383966244727, + "grad_norm": 0.7375084757804871, + "learning_rate": 0.0012088168675421487, + "loss": 1.5958, + "step": 2821 + }, + { + "epoch": 0.29767932489451476, + "grad_norm": 0.6917998194694519, + "learning_rate": 0.0012086182419702165, + "loss": 1.5917, + "step": 2822 + }, + { + "epoch": 0.29778481012658226, + "grad_norm": 0.6293259859085083, + "learning_rate": 0.0012084195650078232, + "loss": 1.6266, + "step": 2823 + }, + { + "epoch": 0.2978902953586498, + "grad_norm": 0.6965408325195312, + "learning_rate": 0.001208220836677232, + "loss": 1.5933, + "step": 2824 + }, + { + "epoch": 0.2979957805907173, + "grad_norm": 0.6604257225990295, + "learning_rate": 0.0012080220570007108, + "loss": 1.598, + "step": 2825 + }, + { + "epoch": 0.2981012658227848, + "grad_norm": 0.6697686314582825, + "learning_rate": 0.001207823226000534, + "loss": 1.6157, + "step": 2826 + }, + { + "epoch": 0.29820675105485234, + "grad_norm": 0.6759828329086304, + "learning_rate": 0.0012076243436989823, + "loss": 1.5712, + "step": 2827 + }, + { + "epoch": 0.29831223628691983, + "grad_norm": 0.9203044772148132, + "learning_rate": 0.0012074254101183408, + "loss": 1.6072, + "step": 2828 + }, + { + "epoch": 0.2984177215189873, + "grad_norm": 1.0256153345108032, + "learning_rate": 0.001207226425280901, + "loss": 1.6254, + "step": 2829 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.7486294507980347, + "learning_rate": 0.0012070273892089605, + "loss": 1.6089, + "step": 2830 + }, + { + "epoch": 0.29862869198312236, + "grad_norm": 0.7653860449790955, + "learning_rate": 0.001206828301924822, + "loss": 1.5917, + "step": 2831 + }, + { + "epoch": 0.29873417721518986, + "grad_norm": 1.0308483839035034, + "learning_rate": 0.0012066291634507944, + "loss": 1.6401, + "step": 2832 + }, + { + "epoch": 0.2988396624472574, + "grad_norm": 0.9453378915786743, + "learning_rate": 0.001206429973809192, + "loss": 1.6093, + "step": 2833 + }, + { + "epoch": 0.2989451476793249, + "grad_norm": 0.7903749942779541, + "learning_rate": 0.001206230733022335, + "loss": 1.6694, + "step": 2834 + }, + { + "epoch": 0.2990506329113924, + "grad_norm": 0.6939392685890198, + "learning_rate": 0.0012060314411125497, + "loss": 1.584, + "step": 2835 + }, + { + "epoch": 0.29915611814345994, + "grad_norm": 0.7264984846115112, + "learning_rate": 0.0012058320981021672, + "loss": 1.5851, + "step": 2836 + }, + { + "epoch": 0.29926160337552743, + "grad_norm": 0.8615245819091797, + "learning_rate": 0.001205632704013525, + "loss": 1.6133, + "step": 2837 + }, + { + "epoch": 0.2993670886075949, + "grad_norm": 0.8284863829612732, + "learning_rate": 0.0012054332588689667, + "loss": 1.5931, + "step": 2838 + }, + { + "epoch": 0.29947257383966247, + "grad_norm": 0.6400719285011292, + "learning_rate": 0.0012052337626908406, + "loss": 1.5986, + "step": 2839 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.8828486800193787, + "learning_rate": 0.0012050342155015012, + "loss": 1.6341, + "step": 2840 + }, + { + "epoch": 0.29968354430379746, + "grad_norm": 0.7950198650360107, + "learning_rate": 0.0012048346173233091, + "loss": 1.5951, + "step": 2841 + }, + { + "epoch": 0.299789029535865, + "grad_norm": 0.7388678193092346, + "learning_rate": 0.0012046349681786304, + "loss": 1.6417, + "step": 2842 + }, + { + "epoch": 0.2998945147679325, + "grad_norm": 0.8328035473823547, + "learning_rate": 0.001204435268089836, + "loss": 1.6039, + "step": 2843 + }, + { + "epoch": 0.3, + "grad_norm": 1.0897471904754639, + "learning_rate": 0.001204235517079304, + "loss": 1.644, + "step": 2844 + }, + { + "epoch": 0.30010548523206754, + "grad_norm": 0.657543957233429, + "learning_rate": 0.0012040357151694172, + "loss": 1.6216, + "step": 2845 + }, + { + "epoch": 0.30021097046413503, + "grad_norm": 0.9036891460418701, + "learning_rate": 0.0012038358623825646, + "loss": 1.6278, + "step": 2846 + }, + { + "epoch": 0.3003164556962025, + "grad_norm": 1.0898886919021606, + "learning_rate": 0.0012036359587411405, + "loss": 1.6236, + "step": 2847 + }, + { + "epoch": 0.30042194092827, + "grad_norm": 0.6594389081001282, + "learning_rate": 0.0012034360042675453, + "loss": 1.5875, + "step": 2848 + }, + { + "epoch": 0.30052742616033756, + "grad_norm": 1.4525192975997925, + "learning_rate": 0.0012032359989841849, + "loss": 1.6141, + "step": 2849 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.7072875499725342, + "learning_rate": 0.0012030359429134707, + "loss": 1.624, + "step": 2850 + }, + { + "epoch": 0.30073839662447255, + "grad_norm": 1.1452791690826416, + "learning_rate": 0.00120283583607782, + "loss": 1.5814, + "step": 2851 + }, + { + "epoch": 0.3008438818565401, + "grad_norm": 1.0669625997543335, + "learning_rate": 0.0012026356784996554, + "loss": 1.6326, + "step": 2852 + }, + { + "epoch": 0.3009493670886076, + "grad_norm": 0.63181471824646, + "learning_rate": 0.0012024354702014066, + "loss": 1.6152, + "step": 2853 + }, + { + "epoch": 0.3010548523206751, + "grad_norm": 0.8501518964767456, + "learning_rate": 0.0012022352112055071, + "loss": 1.6211, + "step": 2854 + }, + { + "epoch": 0.30116033755274263, + "grad_norm": 0.6803345680236816, + "learning_rate": 0.001202034901534397, + "loss": 1.6676, + "step": 2855 + }, + { + "epoch": 0.3012658227848101, + "grad_norm": 0.8203641176223755, + "learning_rate": 0.0012018345412105223, + "loss": 1.646, + "step": 2856 + }, + { + "epoch": 0.3013713080168776, + "grad_norm": 1.0445876121520996, + "learning_rate": 0.0012016341302563342, + "loss": 1.6214, + "step": 2857 + }, + { + "epoch": 0.30147679324894516, + "grad_norm": 0.73976069688797, + "learning_rate": 0.0012014336686942898, + "loss": 1.6072, + "step": 2858 + }, + { + "epoch": 0.30158227848101266, + "grad_norm": 0.9045741558074951, + "learning_rate": 0.0012012331565468518, + "loss": 1.5906, + "step": 2859 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 1.13524329662323, + "learning_rate": 0.0012010325938364883, + "loss": 1.6222, + "step": 2860 + }, + { + "epoch": 0.3017932489451477, + "grad_norm": 0.6813820600509644, + "learning_rate": 0.0012008319805856737, + "loss": 1.631, + "step": 2861 + }, + { + "epoch": 0.3018987341772152, + "grad_norm": 1.3356380462646484, + "learning_rate": 0.0012006313168168878, + "loss": 1.6282, + "step": 2862 + }, + { + "epoch": 0.3020042194092827, + "grad_norm": 0.8539438843727112, + "learning_rate": 0.0012004306025526158, + "loss": 1.6082, + "step": 2863 + }, + { + "epoch": 0.30210970464135023, + "grad_norm": 0.6831079721450806, + "learning_rate": 0.0012002298378153485, + "loss": 1.5916, + "step": 2864 + }, + { + "epoch": 0.3022151898734177, + "grad_norm": 0.8413076996803284, + "learning_rate": 0.001200029022627583, + "loss": 1.6263, + "step": 2865 + }, + { + "epoch": 0.3023206751054852, + "grad_norm": 0.7010160684585571, + "learning_rate": 0.0011998281570118213, + "loss": 1.673, + "step": 2866 + }, + { + "epoch": 0.30242616033755276, + "grad_norm": 0.7444138526916504, + "learning_rate": 0.0011996272409905717, + "loss": 1.6183, + "step": 2867 + }, + { + "epoch": 0.30253164556962026, + "grad_norm": 0.7955793142318726, + "learning_rate": 0.0011994262745863478, + "loss": 1.6124, + "step": 2868 + }, + { + "epoch": 0.30263713080168775, + "grad_norm": 0.7815811038017273, + "learning_rate": 0.0011992252578216683, + "loss": 1.5554, + "step": 2869 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 1.138445496559143, + "learning_rate": 0.0011990241907190592, + "loss": 1.6686, + "step": 2870 + }, + { + "epoch": 0.3028481012658228, + "grad_norm": 1.2373981475830078, + "learning_rate": 0.0011988230733010502, + "loss": 1.6099, + "step": 2871 + }, + { + "epoch": 0.3029535864978903, + "grad_norm": 0.6844114065170288, + "learning_rate": 0.0011986219055901781, + "loss": 1.6126, + "step": 2872 + }, + { + "epoch": 0.30305907172995783, + "grad_norm": 0.860765814781189, + "learning_rate": 0.0011984206876089842, + "loss": 1.6307, + "step": 2873 + }, + { + "epoch": 0.3031645569620253, + "grad_norm": 0.7952919602394104, + "learning_rate": 0.001198219419380016, + "loss": 1.5577, + "step": 2874 + }, + { + "epoch": 0.3032700421940928, + "grad_norm": 0.683307409286499, + "learning_rate": 0.0011980181009258273, + "loss": 1.6097, + "step": 2875 + }, + { + "epoch": 0.30337552742616036, + "grad_norm": 0.7299294471740723, + "learning_rate": 0.0011978167322689761, + "loss": 1.5919, + "step": 2876 + }, + { + "epoch": 0.30348101265822786, + "grad_norm": 0.7499287724494934, + "learning_rate": 0.001197615313432027, + "loss": 1.5872, + "step": 2877 + }, + { + "epoch": 0.30358649789029535, + "grad_norm": 0.6849638223648071, + "learning_rate": 0.00119741384443755, + "loss": 1.6221, + "step": 2878 + }, + { + "epoch": 0.3036919831223629, + "grad_norm": 0.6848465800285339, + "learning_rate": 0.001197212325308121, + "loss": 1.6043, + "step": 2879 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.7513267397880554, + "learning_rate": 0.001197010756066321, + "loss": 1.6094, + "step": 2880 + }, + { + "epoch": 0.3039029535864979, + "grad_norm": 0.6913958787918091, + "learning_rate": 0.0011968091367347367, + "loss": 1.6083, + "step": 2881 + }, + { + "epoch": 0.3040084388185654, + "grad_norm": 0.7012288570404053, + "learning_rate": 0.0011966074673359602, + "loss": 1.6159, + "step": 2882 + }, + { + "epoch": 0.3041139240506329, + "grad_norm": 0.662302553653717, + "learning_rate": 0.0011964057478925903, + "loss": 1.5743, + "step": 2883 + }, + { + "epoch": 0.3042194092827004, + "grad_norm": 0.6862317323684692, + "learning_rate": 0.0011962039784272306, + "loss": 1.6109, + "step": 2884 + }, + { + "epoch": 0.3043248945147679, + "grad_norm": 0.6972416639328003, + "learning_rate": 0.0011960021589624897, + "loss": 1.6171, + "step": 2885 + }, + { + "epoch": 0.30443037974683546, + "grad_norm": 0.7676247358322144, + "learning_rate": 0.001195800289520983, + "loss": 1.6221, + "step": 2886 + }, + { + "epoch": 0.30453586497890295, + "grad_norm": 0.7390176057815552, + "learning_rate": 0.0011955983701253312, + "loss": 1.6372, + "step": 2887 + }, + { + "epoch": 0.30464135021097044, + "grad_norm": 0.7550668716430664, + "learning_rate": 0.0011953964007981601, + "loss": 1.5764, + "step": 2888 + }, + { + "epoch": 0.304746835443038, + "grad_norm": 1.0807167291641235, + "learning_rate": 0.001195194381562101, + "loss": 1.6501, + "step": 2889 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.7804428339004517, + "learning_rate": 0.0011949923124397917, + "loss": 1.6052, + "step": 2890 + }, + { + "epoch": 0.304957805907173, + "grad_norm": 0.8013474345207214, + "learning_rate": 0.0011947901934538747, + "loss": 1.6373, + "step": 2891 + }, + { + "epoch": 0.3050632911392405, + "grad_norm": 0.7479227185249329, + "learning_rate": 0.0011945880246269987, + "loss": 1.6188, + "step": 2892 + }, + { + "epoch": 0.305168776371308, + "grad_norm": 0.7550715208053589, + "learning_rate": 0.0011943858059818178, + "loss": 1.5945, + "step": 2893 + }, + { + "epoch": 0.3052742616033755, + "grad_norm": 0.8344272971153259, + "learning_rate": 0.0011941835375409912, + "loss": 1.6197, + "step": 2894 + }, + { + "epoch": 0.30537974683544306, + "grad_norm": 0.7725407481193542, + "learning_rate": 0.0011939812193271844, + "loss": 1.606, + "step": 2895 + }, + { + "epoch": 0.30548523206751055, + "grad_norm": 0.7047918438911438, + "learning_rate": 0.001193778851363068, + "loss": 1.6123, + "step": 2896 + }, + { + "epoch": 0.30559071729957804, + "grad_norm": 0.7010198831558228, + "learning_rate": 0.0011935764336713187, + "loss": 1.6421, + "step": 2897 + }, + { + "epoch": 0.3056962025316456, + "grad_norm": 0.776695966720581, + "learning_rate": 0.0011933739662746178, + "loss": 1.6297, + "step": 2898 + }, + { + "epoch": 0.3058016877637131, + "grad_norm": 0.736539900302887, + "learning_rate": 0.0011931714491956531, + "loss": 1.6192, + "step": 2899 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.7264310121536255, + "learning_rate": 0.001192968882457118, + "loss": 1.6005, + "step": 2900 + }, + { + "epoch": 0.3060126582278481, + "grad_norm": 0.9560233950614929, + "learning_rate": 0.0011927662660817105, + "loss": 1.6292, + "step": 2901 + }, + { + "epoch": 0.3061181434599156, + "grad_norm": 0.8782718181610107, + "learning_rate": 0.0011925636000921355, + "loss": 1.6489, + "step": 2902 + }, + { + "epoch": 0.3062236286919831, + "grad_norm": 0.7440299391746521, + "learning_rate": 0.0011923608845111017, + "loss": 1.6237, + "step": 2903 + }, + { + "epoch": 0.30632911392405066, + "grad_norm": 1.054153561592102, + "learning_rate": 0.0011921581193613253, + "loss": 1.6374, + "step": 2904 + }, + { + "epoch": 0.30643459915611815, + "grad_norm": 1.453239917755127, + "learning_rate": 0.0011919553046655267, + "loss": 1.648, + "step": 2905 + }, + { + "epoch": 0.30654008438818564, + "grad_norm": 0.7154324650764465, + "learning_rate": 0.0011917524404464325, + "loss": 1.6348, + "step": 2906 + }, + { + "epoch": 0.3066455696202532, + "grad_norm": 1.8584699630737305, + "learning_rate": 0.0011915495267267745, + "loss": 1.5991, + "step": 2907 + }, + { + "epoch": 0.3067510548523207, + "grad_norm": 0.8106597661972046, + "learning_rate": 0.0011913465635292903, + "loss": 1.6138, + "step": 2908 + }, + { + "epoch": 0.3068565400843882, + "grad_norm": 1.7453864812850952, + "learning_rate": 0.001191143550876723, + "loss": 1.6123, + "step": 2909 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 1.0785211324691772, + "learning_rate": 0.001190940488791821, + "loss": 1.6037, + "step": 2910 + }, + { + "epoch": 0.3070675105485232, + "grad_norm": 2.145998001098633, + "learning_rate": 0.0011907373772973384, + "loss": 1.5963, + "step": 2911 + }, + { + "epoch": 0.3071729957805907, + "grad_norm": 1.8108482360839844, + "learning_rate": 0.001190534216416035, + "loss": 1.6416, + "step": 2912 + }, + { + "epoch": 0.30727848101265826, + "grad_norm": 1.3545210361480713, + "learning_rate": 0.0011903310061706762, + "loss": 1.6112, + "step": 2913 + }, + { + "epoch": 0.30738396624472575, + "grad_norm": 1.2573870420455933, + "learning_rate": 0.0011901277465840323, + "loss": 1.6433, + "step": 2914 + }, + { + "epoch": 0.30748945147679324, + "grad_norm": 1.0550734996795654, + "learning_rate": 0.0011899244376788797, + "loss": 1.6451, + "step": 2915 + }, + { + "epoch": 0.30759493670886073, + "grad_norm": 1.1441153287887573, + "learning_rate": 0.001189721079478, + "loss": 1.6293, + "step": 2916 + }, + { + "epoch": 0.3077004219409283, + "grad_norm": 1.219871163368225, + "learning_rate": 0.001189517672004181, + "loss": 1.5825, + "step": 2917 + }, + { + "epoch": 0.3078059071729958, + "grad_norm": 0.87290358543396, + "learning_rate": 0.0011893142152802152, + "loss": 1.5916, + "step": 2918 + }, + { + "epoch": 0.30791139240506327, + "grad_norm": 1.358223795890808, + "learning_rate": 0.0011891107093289007, + "loss": 1.6375, + "step": 2919 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.8848815560340881, + "learning_rate": 0.0011889071541730419, + "loss": 1.5896, + "step": 2920 + }, + { + "epoch": 0.3081223628691983, + "grad_norm": 1.6990283727645874, + "learning_rate": 0.0011887035498354475, + "loss": 1.5799, + "step": 2921 + }, + { + "epoch": 0.3082278481012658, + "grad_norm": 1.1411890983581543, + "learning_rate": 0.0011884998963389334, + "loss": 1.618, + "step": 2922 + }, + { + "epoch": 0.30833333333333335, + "grad_norm": 1.4832454919815063, + "learning_rate": 0.0011882961937063187, + "loss": 1.6293, + "step": 2923 + }, + { + "epoch": 0.30843881856540084, + "grad_norm": 1.5843409299850464, + "learning_rate": 0.0011880924419604305, + "loss": 1.6022, + "step": 2924 + }, + { + "epoch": 0.30854430379746833, + "grad_norm": 1.1526682376861572, + "learning_rate": 0.0011878886411240991, + "loss": 1.5952, + "step": 2925 + }, + { + "epoch": 0.3086497890295359, + "grad_norm": 1.0553909540176392, + "learning_rate": 0.0011876847912201624, + "loss": 1.6132, + "step": 2926 + }, + { + "epoch": 0.3087552742616034, + "grad_norm": 1.291288137435913, + "learning_rate": 0.0011874808922714623, + "loss": 1.6274, + "step": 2927 + }, + { + "epoch": 0.30886075949367087, + "grad_norm": 0.8099333643913269, + "learning_rate": 0.0011872769443008466, + "loss": 1.6346, + "step": 2928 + }, + { + "epoch": 0.3089662447257384, + "grad_norm": 1.0981724262237549, + "learning_rate": 0.001187072947331169, + "loss": 1.5807, + "step": 2929 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.7064989805221558, + "learning_rate": 0.001186868901385288, + "loss": 1.5713, + "step": 2930 + }, + { + "epoch": 0.3091772151898734, + "grad_norm": 1.290012001991272, + "learning_rate": 0.0011866648064860683, + "loss": 1.6333, + "step": 2931 + }, + { + "epoch": 0.30928270042194095, + "grad_norm": 0.7158396244049072, + "learning_rate": 0.0011864606626563795, + "loss": 1.6507, + "step": 2932 + }, + { + "epoch": 0.30938818565400844, + "grad_norm": 1.141839623451233, + "learning_rate": 0.0011862564699190972, + "loss": 1.6024, + "step": 2933 + }, + { + "epoch": 0.30949367088607593, + "grad_norm": 0.774097204208374, + "learning_rate": 0.0011860522282971019, + "loss": 1.6272, + "step": 2934 + }, + { + "epoch": 0.3095991561181435, + "grad_norm": 0.8854445219039917, + "learning_rate": 0.0011858479378132802, + "loss": 1.6088, + "step": 2935 + }, + { + "epoch": 0.309704641350211, + "grad_norm": 0.9116334915161133, + "learning_rate": 0.0011856435984905237, + "loss": 1.6409, + "step": 2936 + }, + { + "epoch": 0.30981012658227847, + "grad_norm": 0.7188323140144348, + "learning_rate": 0.00118543921035173, + "loss": 1.5663, + "step": 2937 + }, + { + "epoch": 0.309915611814346, + "grad_norm": 0.9420689344406128, + "learning_rate": 0.001185234773419801, + "loss": 1.5894, + "step": 2938 + }, + { + "epoch": 0.3100210970464135, + "grad_norm": 0.7351541519165039, + "learning_rate": 0.0011850302877176456, + "loss": 1.556, + "step": 2939 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.9007724523544312, + "learning_rate": 0.001184825753268177, + "loss": 1.5846, + "step": 2940 + }, + { + "epoch": 0.31023206751054855, + "grad_norm": 0.985805332660675, + "learning_rate": 0.0011846211700943148, + "loss": 1.6276, + "step": 2941 + }, + { + "epoch": 0.31033755274261604, + "grad_norm": 0.6676756739616394, + "learning_rate": 0.001184416538218983, + "loss": 1.6057, + "step": 2942 + }, + { + "epoch": 0.31044303797468353, + "grad_norm": 0.8494294881820679, + "learning_rate": 0.0011842118576651122, + "loss": 1.6319, + "step": 2943 + }, + { + "epoch": 0.3105485232067511, + "grad_norm": 0.924845278263092, + "learning_rate": 0.0011840071284556373, + "loss": 1.6485, + "step": 2944 + }, + { + "epoch": 0.3106540084388186, + "grad_norm": 0.6884049773216248, + "learning_rate": 0.0011838023506134997, + "loss": 1.6246, + "step": 2945 + }, + { + "epoch": 0.31075949367088607, + "grad_norm": 0.7656282186508179, + "learning_rate": 0.0011835975241616455, + "loss": 1.5948, + "step": 2946 + }, + { + "epoch": 0.31086497890295356, + "grad_norm": 0.9436163306236267, + "learning_rate": 0.0011833926491230265, + "loss": 1.6199, + "step": 2947 + }, + { + "epoch": 0.3109704641350211, + "grad_norm": 0.7179803252220154, + "learning_rate": 0.0011831877255206002, + "loss": 1.6264, + "step": 2948 + }, + { + "epoch": 0.3110759493670886, + "grad_norm": 0.9810307621955872, + "learning_rate": 0.0011829827533773292, + "loss": 1.6195, + "step": 2949 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 1.2799290418624878, + "learning_rate": 0.0011827777327161814, + "loss": 1.591, + "step": 2950 + }, + { + "epoch": 0.31128691983122364, + "grad_norm": 0.8571782112121582, + "learning_rate": 0.001182572663560131, + "loss": 1.5796, + "step": 2951 + }, + { + "epoch": 0.31139240506329113, + "grad_norm": 1.761060357093811, + "learning_rate": 0.0011823675459321564, + "loss": 1.617, + "step": 2952 + }, + { + "epoch": 0.3114978902953586, + "grad_norm": 1.0878909826278687, + "learning_rate": 0.0011821623798552424, + "loss": 1.6292, + "step": 2953 + }, + { + "epoch": 0.3116033755274262, + "grad_norm": 2.250284194946289, + "learning_rate": 0.001181957165352379, + "loss": 1.6071, + "step": 2954 + }, + { + "epoch": 0.31170886075949367, + "grad_norm": 2.1789653301239014, + "learning_rate": 0.0011817519024465608, + "loss": 1.6183, + "step": 2955 + }, + { + "epoch": 0.31181434599156116, + "grad_norm": 1.1920247077941895, + "learning_rate": 0.0011815465911607893, + "loss": 1.6316, + "step": 2956 + }, + { + "epoch": 0.3119198312236287, + "grad_norm": 1.1668347120285034, + "learning_rate": 0.0011813412315180704, + "loss": 1.6278, + "step": 2957 + }, + { + "epoch": 0.3120253164556962, + "grad_norm": 0.9986442923545837, + "learning_rate": 0.0011811358235414154, + "loss": 1.5817, + "step": 2958 + }, + { + "epoch": 0.3121308016877637, + "grad_norm": 0.8979436159133911, + "learning_rate": 0.0011809303672538417, + "loss": 1.6078, + "step": 2959 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 1.0765050649642944, + "learning_rate": 0.0011807248626783714, + "loss": 1.6115, + "step": 2960 + }, + { + "epoch": 0.31234177215189873, + "grad_norm": 0.7781408429145813, + "learning_rate": 0.0011805193098380327, + "loss": 1.5733, + "step": 2961 + }, + { + "epoch": 0.3124472573839662, + "grad_norm": 1.1668667793273926, + "learning_rate": 0.0011803137087558584, + "loss": 1.5957, + "step": 2962 + }, + { + "epoch": 0.3125527426160338, + "grad_norm": 0.7240769267082214, + "learning_rate": 0.0011801080594548874, + "loss": 1.592, + "step": 2963 + }, + { + "epoch": 0.31265822784810127, + "grad_norm": 1.0710575580596924, + "learning_rate": 0.0011799023619581638, + "loss": 1.563, + "step": 2964 + }, + { + "epoch": 0.31276371308016876, + "grad_norm": 0.7380533218383789, + "learning_rate": 0.0011796966162887364, + "loss": 1.5908, + "step": 2965 + }, + { + "epoch": 0.3128691983122363, + "grad_norm": 0.8350695371627808, + "learning_rate": 0.0011794908224696608, + "loss": 1.5941, + "step": 2966 + }, + { + "epoch": 0.3129746835443038, + "grad_norm": 0.6763006448745728, + "learning_rate": 0.0011792849805239967, + "loss": 1.6137, + "step": 2967 + }, + { + "epoch": 0.3130801687763713, + "grad_norm": 1.0662107467651367, + "learning_rate": 0.0011790790904748103, + "loss": 1.6244, + "step": 2968 + }, + { + "epoch": 0.31318565400843884, + "grad_norm": 0.6753528714179993, + "learning_rate": 0.0011788731523451718, + "loss": 1.5795, + "step": 2969 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.8865507245063782, + "learning_rate": 0.0011786671661581584, + "loss": 1.5978, + "step": 2970 + }, + { + "epoch": 0.3133966244725738, + "grad_norm": 0.7988911271095276, + "learning_rate": 0.0011784611319368512, + "loss": 1.5546, + "step": 2971 + }, + { + "epoch": 0.3135021097046414, + "grad_norm": 0.7270774245262146, + "learning_rate": 0.0011782550497043379, + "loss": 1.5722, + "step": 2972 + }, + { + "epoch": 0.31360759493670887, + "grad_norm": 0.7638905644416809, + "learning_rate": 0.0011780489194837106, + "loss": 1.609, + "step": 2973 + }, + { + "epoch": 0.31371308016877636, + "grad_norm": 0.7377998232841492, + "learning_rate": 0.0011778427412980675, + "loss": 1.6124, + "step": 2974 + }, + { + "epoch": 0.3138185654008439, + "grad_norm": 0.8501233458518982, + "learning_rate": 0.0011776365151705119, + "loss": 1.5818, + "step": 2975 + }, + { + "epoch": 0.3139240506329114, + "grad_norm": 0.7047660946846008, + "learning_rate": 0.0011774302411241525, + "loss": 1.6071, + "step": 2976 + }, + { + "epoch": 0.3140295358649789, + "grad_norm": 0.7249424457550049, + "learning_rate": 0.0011772239191821029, + "loss": 1.6197, + "step": 2977 + }, + { + "epoch": 0.31413502109704644, + "grad_norm": 0.7706026434898376, + "learning_rate": 0.0011770175493674827, + "loss": 1.612, + "step": 2978 + }, + { + "epoch": 0.31424050632911393, + "grad_norm": 0.8568241596221924, + "learning_rate": 0.0011768111317034173, + "loss": 1.6209, + "step": 2979 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.7717640995979309, + "learning_rate": 0.001176604666213036, + "loss": 1.6294, + "step": 2980 + }, + { + "epoch": 0.3144514767932489, + "grad_norm": 0.7155718207359314, + "learning_rate": 0.0011763981529194748, + "loss": 1.6409, + "step": 2981 + }, + { + "epoch": 0.31455696202531647, + "grad_norm": 0.9177278280258179, + "learning_rate": 0.001176191591845874, + "loss": 1.5744, + "step": 2982 + }, + { + "epoch": 0.31466244725738396, + "grad_norm": 0.7463675737380981, + "learning_rate": 0.0011759849830153806, + "loss": 1.6031, + "step": 2983 + }, + { + "epoch": 0.31476793248945145, + "grad_norm": 0.7206559181213379, + "learning_rate": 0.0011757783264511456, + "loss": 1.603, + "step": 2984 + }, + { + "epoch": 0.314873417721519, + "grad_norm": 0.7212887406349182, + "learning_rate": 0.001175571622176326, + "loss": 1.588, + "step": 2985 + }, + { + "epoch": 0.3149789029535865, + "grad_norm": 0.6895021200180054, + "learning_rate": 0.0011753648702140837, + "loss": 1.6339, + "step": 2986 + }, + { + "epoch": 0.315084388185654, + "grad_norm": 0.792165994644165, + "learning_rate": 0.001175158070587587, + "loss": 1.6376, + "step": 2987 + }, + { + "epoch": 0.31518987341772153, + "grad_norm": 0.6949630975723267, + "learning_rate": 0.0011749512233200081, + "loss": 1.6296, + "step": 2988 + }, + { + "epoch": 0.315295358649789, + "grad_norm": 0.7343791723251343, + "learning_rate": 0.001174744328434526, + "loss": 1.6545, + "step": 2989 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.7410194277763367, + "learning_rate": 0.0011745373859543236, + "loss": 1.6269, + "step": 2990 + }, + { + "epoch": 0.31550632911392407, + "grad_norm": 0.713087797164917, + "learning_rate": 0.0011743303959025906, + "loss": 1.5798, + "step": 2991 + }, + { + "epoch": 0.31561181434599156, + "grad_norm": 0.712968111038208, + "learning_rate": 0.0011741233583025205, + "loss": 1.5752, + "step": 2992 + }, + { + "epoch": 0.31571729957805905, + "grad_norm": 0.675612211227417, + "learning_rate": 0.0011739162731773133, + "loss": 1.5594, + "step": 2993 + }, + { + "epoch": 0.3158227848101266, + "grad_norm": 0.687455952167511, + "learning_rate": 0.0011737091405501741, + "loss": 1.6326, + "step": 2994 + }, + { + "epoch": 0.3159282700421941, + "grad_norm": 0.6676709055900574, + "learning_rate": 0.0011735019604443126, + "loss": 1.6458, + "step": 2995 + }, + { + "epoch": 0.3160337552742616, + "grad_norm": 0.6087421178817749, + "learning_rate": 0.0011732947328829447, + "loss": 1.5861, + "step": 2996 + }, + { + "epoch": 0.31613924050632913, + "grad_norm": 0.6523327231407166, + "learning_rate": 0.0011730874578892913, + "loss": 1.5848, + "step": 2997 + }, + { + "epoch": 0.3162447257383966, + "grad_norm": 0.6576799750328064, + "learning_rate": 0.0011728801354865786, + "loss": 1.588, + "step": 2998 + }, + { + "epoch": 0.3163502109704641, + "grad_norm": 0.6479187607765198, + "learning_rate": 0.0011726727656980378, + "loss": 1.619, + "step": 2999 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.8956044912338257, + "learning_rate": 0.0011724653485469063, + "loss": 1.6041, + "step": 3000 + }, + { + "epoch": 0.31656118143459916, + "grad_norm": 0.8102289438247681, + "learning_rate": 0.0011722578840564256, + "loss": 1.5988, + "step": 3001 + }, + { + "epoch": 0.31666666666666665, + "grad_norm": 0.694053053855896, + "learning_rate": 0.0011720503722498436, + "loss": 1.5816, + "step": 3002 + }, + { + "epoch": 0.3167721518987342, + "grad_norm": 0.9262793064117432, + "learning_rate": 0.0011718428131504127, + "loss": 1.5724, + "step": 3003 + }, + { + "epoch": 0.3168776371308017, + "grad_norm": 0.9012019634246826, + "learning_rate": 0.0011716352067813914, + "loss": 1.6617, + "step": 3004 + }, + { + "epoch": 0.3169831223628692, + "grad_norm": 0.7937500476837158, + "learning_rate": 0.0011714275531660423, + "loss": 1.6227, + "step": 3005 + }, + { + "epoch": 0.31708860759493673, + "grad_norm": 0.8278478980064392, + "learning_rate": 0.0011712198523276347, + "loss": 1.6202, + "step": 3006 + }, + { + "epoch": 0.3171940928270042, + "grad_norm": 0.8286076784133911, + "learning_rate": 0.0011710121042894425, + "loss": 1.5999, + "step": 3007 + }, + { + "epoch": 0.3172995780590717, + "grad_norm": 0.6727810502052307, + "learning_rate": 0.0011708043090747442, + "loss": 1.5985, + "step": 3008 + }, + { + "epoch": 0.31740506329113927, + "grad_norm": 1.1538615226745605, + "learning_rate": 0.001170596466706825, + "loss": 1.6045, + "step": 3009 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.8626999855041504, + "learning_rate": 0.0011703885772089743, + "loss": 1.6076, + "step": 3010 + }, + { + "epoch": 0.31761603375527425, + "grad_norm": 0.7984665036201477, + "learning_rate": 0.0011701806406044875, + "loss": 1.5982, + "step": 3011 + }, + { + "epoch": 0.31772151898734174, + "grad_norm": 1.012645959854126, + "learning_rate": 0.0011699726569166643, + "loss": 1.5981, + "step": 3012 + }, + { + "epoch": 0.3178270042194093, + "grad_norm": 0.8330660462379456, + "learning_rate": 0.0011697646261688108, + "loss": 1.5856, + "step": 3013 + }, + { + "epoch": 0.3179324894514768, + "grad_norm": 1.0910956859588623, + "learning_rate": 0.0011695565483842382, + "loss": 1.6124, + "step": 3014 + }, + { + "epoch": 0.3180379746835443, + "grad_norm": 0.9000475406646729, + "learning_rate": 0.001169348423586262, + "loss": 1.6288, + "step": 3015 + }, + { + "epoch": 0.3181434599156118, + "grad_norm": 0.8238571286201477, + "learning_rate": 0.0011691402517982038, + "loss": 1.6005, + "step": 3016 + }, + { + "epoch": 0.3182489451476793, + "grad_norm": 0.8521479964256287, + "learning_rate": 0.0011689320330433904, + "loss": 1.6358, + "step": 3017 + }, + { + "epoch": 0.3183544303797468, + "grad_norm": 0.9162194132804871, + "learning_rate": 0.0011687237673451538, + "loss": 1.634, + "step": 3018 + }, + { + "epoch": 0.31845991561181436, + "grad_norm": 0.8086808919906616, + "learning_rate": 0.0011685154547268312, + "loss": 1.594, + "step": 3019 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.925930380821228, + "learning_rate": 0.0011683070952117646, + "loss": 1.5951, + "step": 3020 + }, + { + "epoch": 0.31867088607594934, + "grad_norm": 0.9188184142112732, + "learning_rate": 0.0011680986888233024, + "loss": 1.5719, + "step": 3021 + }, + { + "epoch": 0.3187763713080169, + "grad_norm": 0.7733421921730042, + "learning_rate": 0.0011678902355847973, + "loss": 1.6116, + "step": 3022 + }, + { + "epoch": 0.3188818565400844, + "grad_norm": 0.825961709022522, + "learning_rate": 0.0011676817355196075, + "loss": 1.5676, + "step": 3023 + }, + { + "epoch": 0.3189873417721519, + "grad_norm": 0.6842225790023804, + "learning_rate": 0.0011674731886510967, + "loss": 1.5863, + "step": 3024 + }, + { + "epoch": 0.3190928270042194, + "grad_norm": 0.8417406678199768, + "learning_rate": 0.0011672645950026332, + "loss": 1.5598, + "step": 3025 + }, + { + "epoch": 0.3191983122362869, + "grad_norm": 0.7788323163986206, + "learning_rate": 0.001167055954597591, + "loss": 1.6169, + "step": 3026 + }, + { + "epoch": 0.3193037974683544, + "grad_norm": 0.8153231739997864, + "learning_rate": 0.0011668472674593497, + "loss": 1.6065, + "step": 3027 + }, + { + "epoch": 0.31940928270042196, + "grad_norm": 0.8258696794509888, + "learning_rate": 0.0011666385336112934, + "loss": 1.6239, + "step": 3028 + }, + { + "epoch": 0.31951476793248945, + "grad_norm": 0.8419963121414185, + "learning_rate": 0.0011664297530768117, + "loss": 1.5913, + "step": 3029 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6912118196487427, + "learning_rate": 0.0011662209258792998, + "loss": 1.6339, + "step": 3030 + }, + { + "epoch": 0.3197257383966245, + "grad_norm": 0.7191562056541443, + "learning_rate": 0.0011660120520421578, + "loss": 1.6083, + "step": 3031 + }, + { + "epoch": 0.319831223628692, + "grad_norm": 0.6793178915977478, + "learning_rate": 0.0011658031315887908, + "loss": 1.6003, + "step": 3032 + }, + { + "epoch": 0.3199367088607595, + "grad_norm": 0.7912036776542664, + "learning_rate": 0.0011655941645426096, + "loss": 1.5772, + "step": 3033 + }, + { + "epoch": 0.320042194092827, + "grad_norm": 0.8914179801940918, + "learning_rate": 0.00116538515092703, + "loss": 1.6417, + "step": 3034 + }, + { + "epoch": 0.3201476793248945, + "grad_norm": 0.6789207458496094, + "learning_rate": 0.0011651760907654728, + "loss": 1.5978, + "step": 3035 + }, + { + "epoch": 0.320253164556962, + "grad_norm": 0.6960229873657227, + "learning_rate": 0.0011649669840813645, + "loss": 1.6552, + "step": 3036 + }, + { + "epoch": 0.32035864978902956, + "grad_norm": 0.786503791809082, + "learning_rate": 0.0011647578308981363, + "loss": 1.6335, + "step": 3037 + }, + { + "epoch": 0.32046413502109705, + "grad_norm": 0.642805814743042, + "learning_rate": 0.001164548631239225, + "loss": 1.5706, + "step": 3038 + }, + { + "epoch": 0.32056962025316454, + "grad_norm": 0.8487593531608582, + "learning_rate": 0.0011643393851280724, + "loss": 1.5898, + "step": 3039 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.771835207939148, + "learning_rate": 0.0011641300925881257, + "loss": 1.5777, + "step": 3040 + }, + { + "epoch": 0.3207805907172996, + "grad_norm": 0.7386382818222046, + "learning_rate": 0.001163920753642837, + "loss": 1.6038, + "step": 3041 + }, + { + "epoch": 0.3208860759493671, + "grad_norm": 0.9656798243522644, + "learning_rate": 0.001163711368315664, + "loss": 1.59, + "step": 3042 + }, + { + "epoch": 0.3209915611814346, + "grad_norm": 0.6898447275161743, + "learning_rate": 0.001163501936630069, + "loss": 1.6249, + "step": 3043 + }, + { + "epoch": 0.3210970464135021, + "grad_norm": 0.8696707487106323, + "learning_rate": 0.0011632924586095204, + "loss": 1.6045, + "step": 3044 + }, + { + "epoch": 0.3212025316455696, + "grad_norm": 0.7985130548477173, + "learning_rate": 0.0011630829342774906, + "loss": 1.6093, + "step": 3045 + }, + { + "epoch": 0.3213080168776371, + "grad_norm": 0.7982620000839233, + "learning_rate": 0.0011628733636574586, + "loss": 1.6465, + "step": 3046 + }, + { + "epoch": 0.32141350210970465, + "grad_norm": 1.055997371673584, + "learning_rate": 0.0011626637467729072, + "loss": 1.5605, + "step": 3047 + }, + { + "epoch": 0.32151898734177214, + "grad_norm": 0.8975897431373596, + "learning_rate": 0.0011624540836473252, + "loss": 1.595, + "step": 3048 + }, + { + "epoch": 0.32162447257383964, + "grad_norm": 0.6933494806289673, + "learning_rate": 0.0011622443743042065, + "loss": 1.5799, + "step": 3049 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.6982442140579224, + "learning_rate": 0.0011620346187670501, + "loss": 1.6031, + "step": 3050 + }, + { + "epoch": 0.3218354430379747, + "grad_norm": 0.7291011214256287, + "learning_rate": 0.0011618248170593597, + "loss": 1.5931, + "step": 3051 + }, + { + "epoch": 0.32194092827004217, + "grad_norm": 0.8365522623062134, + "learning_rate": 0.0011616149692046454, + "loss": 1.583, + "step": 3052 + }, + { + "epoch": 0.3220464135021097, + "grad_norm": 0.6830744743347168, + "learning_rate": 0.0011614050752264216, + "loss": 1.6158, + "step": 3053 + }, + { + "epoch": 0.3221518987341772, + "grad_norm": 0.7515076398849487, + "learning_rate": 0.0011611951351482071, + "loss": 1.6066, + "step": 3054 + }, + { + "epoch": 0.3222573839662447, + "grad_norm": 0.7684136629104614, + "learning_rate": 0.0011609851489935274, + "loss": 1.6436, + "step": 3055 + }, + { + "epoch": 0.32236286919831225, + "grad_norm": 0.6421042084693909, + "learning_rate": 0.0011607751167859125, + "loss": 1.6176, + "step": 3056 + }, + { + "epoch": 0.32246835443037974, + "grad_norm": 1.0065962076187134, + "learning_rate": 0.0011605650385488977, + "loss": 1.6139, + "step": 3057 + }, + { + "epoch": 0.32257383966244724, + "grad_norm": 1.2025588750839233, + "learning_rate": 0.0011603549143060225, + "loss": 1.6039, + "step": 3058 + }, + { + "epoch": 0.3226793248945148, + "grad_norm": 0.6928632855415344, + "learning_rate": 0.0011601447440808335, + "loss": 1.6144, + "step": 3059 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 1.0921978950500488, + "learning_rate": 0.0011599345278968806, + "loss": 1.6214, + "step": 3060 + }, + { + "epoch": 0.32289029535864977, + "grad_norm": 0.826137125492096, + "learning_rate": 0.0011597242657777195, + "loss": 1.5866, + "step": 3061 + }, + { + "epoch": 0.3229957805907173, + "grad_norm": 0.8176423907279968, + "learning_rate": 0.0011595139577469115, + "loss": 1.5774, + "step": 3062 + }, + { + "epoch": 0.3231012658227848, + "grad_norm": 1.277761697769165, + "learning_rate": 0.0011593036038280225, + "loss": 1.6068, + "step": 3063 + }, + { + "epoch": 0.3232067510548523, + "grad_norm": 0.6623169183731079, + "learning_rate": 0.0011590932040446236, + "loss": 1.6152, + "step": 3064 + }, + { + "epoch": 0.32331223628691985, + "grad_norm": 0.9208691120147705, + "learning_rate": 0.0011588827584202914, + "loss": 1.5788, + "step": 3065 + }, + { + "epoch": 0.32341772151898734, + "grad_norm": 0.7585914134979248, + "learning_rate": 0.0011586722669786073, + "loss": 1.5995, + "step": 3066 + }, + { + "epoch": 0.32352320675105484, + "grad_norm": 0.8928055167198181, + "learning_rate": 0.0011584617297431578, + "loss": 1.6115, + "step": 3067 + }, + { + "epoch": 0.3236286919831224, + "grad_norm": 1.0386213064193726, + "learning_rate": 0.0011582511467375346, + "loss": 1.6001, + "step": 3068 + }, + { + "epoch": 0.3237341772151899, + "grad_norm": 0.7036697268486023, + "learning_rate": 0.001158040517985335, + "loss": 1.6152, + "step": 3069 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.7237119674682617, + "learning_rate": 0.0011578298435101604, + "loss": 1.618, + "step": 3070 + }, + { + "epoch": 0.3239451476793249, + "grad_norm": 0.9000774621963501, + "learning_rate": 0.0011576191233356181, + "loss": 1.638, + "step": 3071 + }, + { + "epoch": 0.3240506329113924, + "grad_norm": 0.6683383584022522, + "learning_rate": 0.0011574083574853208, + "loss": 1.5429, + "step": 3072 + }, + { + "epoch": 0.3241561181434599, + "grad_norm": 0.8452928066253662, + "learning_rate": 0.0011571975459828852, + "loss": 1.5954, + "step": 3073 + }, + { + "epoch": 0.32426160337552745, + "grad_norm": 0.8670528531074524, + "learning_rate": 0.0011569866888519343, + "loss": 1.6132, + "step": 3074 + }, + { + "epoch": 0.32436708860759494, + "grad_norm": 0.7886408567428589, + "learning_rate": 0.0011567757861160955, + "loss": 1.5702, + "step": 3075 + }, + { + "epoch": 0.32447257383966244, + "grad_norm": 0.6979184746742249, + "learning_rate": 0.0011565648377990017, + "loss": 1.5641, + "step": 3076 + }, + { + "epoch": 0.32457805907173, + "grad_norm": 0.6900529265403748, + "learning_rate": 0.0011563538439242902, + "loss": 1.5694, + "step": 3077 + }, + { + "epoch": 0.3246835443037975, + "grad_norm": 0.8007985353469849, + "learning_rate": 0.0011561428045156043, + "loss": 1.58, + "step": 3078 + }, + { + "epoch": 0.32478902953586497, + "grad_norm": 0.7090700268745422, + "learning_rate": 0.001155931719596592, + "loss": 1.5898, + "step": 3079 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.7322080135345459, + "learning_rate": 0.0011557205891909062, + "loss": 1.6377, + "step": 3080 + }, + { + "epoch": 0.325, + "grad_norm": 0.7438018918037415, + "learning_rate": 0.0011555094133222053, + "loss": 1.545, + "step": 3081 + }, + { + "epoch": 0.3251054852320675, + "grad_norm": 0.6804894208908081, + "learning_rate": 0.0011552981920141528, + "loss": 1.6061, + "step": 3082 + }, + { + "epoch": 0.325210970464135, + "grad_norm": 0.7551174759864807, + "learning_rate": 0.0011550869252904166, + "loss": 1.6063, + "step": 3083 + }, + { + "epoch": 0.32531645569620254, + "grad_norm": 0.670036256313324, + "learning_rate": 0.0011548756131746706, + "loss": 1.604, + "step": 3084 + }, + { + "epoch": 0.32542194092827004, + "grad_norm": 0.7210105657577515, + "learning_rate": 0.0011546642556905934, + "loss": 1.6268, + "step": 3085 + }, + { + "epoch": 0.32552742616033753, + "grad_norm": 0.8959476351737976, + "learning_rate": 0.0011544528528618682, + "loss": 1.6014, + "step": 3086 + }, + { + "epoch": 0.3256329113924051, + "grad_norm": 0.8564140796661377, + "learning_rate": 0.0011542414047121842, + "loss": 1.6658, + "step": 3087 + }, + { + "epoch": 0.32573839662447257, + "grad_norm": 0.7462244629859924, + "learning_rate": 0.0011540299112652351, + "loss": 1.5902, + "step": 3088 + }, + { + "epoch": 0.32584388185654006, + "grad_norm": 0.7580071687698364, + "learning_rate": 0.00115381837254472, + "loss": 1.6066, + "step": 3089 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.8331851959228516, + "learning_rate": 0.0011536067885743423, + "loss": 1.6291, + "step": 3090 + }, + { + "epoch": 0.3260548523206751, + "grad_norm": 0.7595875263214111, + "learning_rate": 0.0011533951593778115, + "loss": 1.6259, + "step": 3091 + }, + { + "epoch": 0.3261603375527426, + "grad_norm": 0.6715078353881836, + "learning_rate": 0.0011531834849788417, + "loss": 1.5736, + "step": 3092 + }, + { + "epoch": 0.32626582278481014, + "grad_norm": 0.6639381051063538, + "learning_rate": 0.0011529717654011518, + "loss": 1.5601, + "step": 3093 + }, + { + "epoch": 0.32637130801687764, + "grad_norm": 0.6993007063865662, + "learning_rate": 0.001152760000668466, + "loss": 1.5874, + "step": 3094 + }, + { + "epoch": 0.32647679324894513, + "grad_norm": 0.6717429161071777, + "learning_rate": 0.001152548190804514, + "loss": 1.5891, + "step": 3095 + }, + { + "epoch": 0.3265822784810127, + "grad_norm": 0.6243254542350769, + "learning_rate": 0.0011523363358330301, + "loss": 1.5991, + "step": 3096 + }, + { + "epoch": 0.32668776371308017, + "grad_norm": 0.6975856423377991, + "learning_rate": 0.0011521244357777533, + "loss": 1.5835, + "step": 3097 + }, + { + "epoch": 0.32679324894514766, + "grad_norm": 0.656753659248352, + "learning_rate": 0.0011519124906624284, + "loss": 1.619, + "step": 3098 + }, + { + "epoch": 0.3268987341772152, + "grad_norm": 0.6539972424507141, + "learning_rate": 0.0011517005005108048, + "loss": 1.5827, + "step": 3099 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.7129844427108765, + "learning_rate": 0.001151488465346637, + "loss": 1.6111, + "step": 3100 + }, + { + "epoch": 0.3271097046413502, + "grad_norm": 0.6780965924263, + "learning_rate": 0.0011512763851936848, + "loss": 1.6033, + "step": 3101 + }, + { + "epoch": 0.32721518987341774, + "grad_norm": 0.694021463394165, + "learning_rate": 0.0011510642600757123, + "loss": 1.6157, + "step": 3102 + }, + { + "epoch": 0.32732067510548524, + "grad_norm": 0.7651616334915161, + "learning_rate": 0.00115085209001649, + "loss": 1.6071, + "step": 3103 + }, + { + "epoch": 0.32742616033755273, + "grad_norm": 0.6810897588729858, + "learning_rate": 0.0011506398750397919, + "loss": 1.5769, + "step": 3104 + }, + { + "epoch": 0.3275316455696203, + "grad_norm": 0.6827921867370605, + "learning_rate": 0.0011504276151693984, + "loss": 1.5807, + "step": 3105 + }, + { + "epoch": 0.32763713080168777, + "grad_norm": 0.6761003136634827, + "learning_rate": 0.0011502153104290937, + "loss": 1.6127, + "step": 3106 + }, + { + "epoch": 0.32774261603375526, + "grad_norm": 0.7223131060600281, + "learning_rate": 0.0011500029608426676, + "loss": 1.5736, + "step": 3107 + }, + { + "epoch": 0.3278481012658228, + "grad_norm": 0.6852105855941772, + "learning_rate": 0.0011497905664339153, + "loss": 1.5717, + "step": 3108 + }, + { + "epoch": 0.3279535864978903, + "grad_norm": 0.6612615585327148, + "learning_rate": 0.0011495781272266366, + "loss": 1.6097, + "step": 3109 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.770760178565979, + "learning_rate": 0.0011493656432446362, + "loss": 1.5968, + "step": 3110 + }, + { + "epoch": 0.3281645569620253, + "grad_norm": 0.6618778705596924, + "learning_rate": 0.0011491531145117243, + "loss": 1.6052, + "step": 3111 + }, + { + "epoch": 0.32827004219409284, + "grad_norm": 0.6091045141220093, + "learning_rate": 0.0011489405410517151, + "loss": 1.6001, + "step": 3112 + }, + { + "epoch": 0.32837552742616033, + "grad_norm": 0.7482385635375977, + "learning_rate": 0.0011487279228884293, + "loss": 1.6003, + "step": 3113 + }, + { + "epoch": 0.3284810126582278, + "grad_norm": 0.9266412854194641, + "learning_rate": 0.0011485152600456913, + "loss": 1.6081, + "step": 3114 + }, + { + "epoch": 0.32858649789029537, + "grad_norm": 0.9291563034057617, + "learning_rate": 0.0011483025525473314, + "loss": 1.5915, + "step": 3115 + }, + { + "epoch": 0.32869198312236286, + "grad_norm": 0.6621690392494202, + "learning_rate": 0.001148089800417184, + "loss": 1.5705, + "step": 3116 + }, + { + "epoch": 0.32879746835443036, + "grad_norm": 0.7722126245498657, + "learning_rate": 0.00114787700367909, + "loss": 1.5987, + "step": 3117 + }, + { + "epoch": 0.3289029535864979, + "grad_norm": 0.7310183644294739, + "learning_rate": 0.0011476641623568934, + "loss": 1.5657, + "step": 3118 + }, + { + "epoch": 0.3290084388185654, + "grad_norm": 0.6689575910568237, + "learning_rate": 0.0011474512764744445, + "loss": 1.59, + "step": 3119 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.7859111428260803, + "learning_rate": 0.0011472383460555983, + "loss": 1.6425, + "step": 3120 + }, + { + "epoch": 0.32921940928270044, + "grad_norm": 0.7730305790901184, + "learning_rate": 0.0011470253711242146, + "loss": 1.5921, + "step": 3121 + }, + { + "epoch": 0.32932489451476793, + "grad_norm": 0.6885017156600952, + "learning_rate": 0.001146812351704158, + "loss": 1.5858, + "step": 3122 + }, + { + "epoch": 0.3294303797468354, + "grad_norm": 0.6857640147209167, + "learning_rate": 0.001146599287819299, + "loss": 1.6541, + "step": 3123 + }, + { + "epoch": 0.32953586497890297, + "grad_norm": 0.6714873909950256, + "learning_rate": 0.0011463861794935122, + "loss": 1.5746, + "step": 3124 + }, + { + "epoch": 0.32964135021097046, + "grad_norm": 0.7246956825256348, + "learning_rate": 0.0011461730267506775, + "loss": 1.6294, + "step": 3125 + }, + { + "epoch": 0.32974683544303796, + "grad_norm": 0.7815225720405579, + "learning_rate": 0.0011459598296146795, + "loss": 1.6144, + "step": 3126 + }, + { + "epoch": 0.3298523206751055, + "grad_norm": 0.8871744871139526, + "learning_rate": 0.001145746588109408, + "loss": 1.6151, + "step": 3127 + }, + { + "epoch": 0.329957805907173, + "grad_norm": 0.7395591139793396, + "learning_rate": 0.0011455333022587582, + "loss": 1.5956, + "step": 3128 + }, + { + "epoch": 0.3300632911392405, + "grad_norm": 0.7969853281974792, + "learning_rate": 0.0011453199720866296, + "loss": 1.6351, + "step": 3129 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.681366503238678, + "learning_rate": 0.001145106597616927, + "loss": 1.6184, + "step": 3130 + }, + { + "epoch": 0.33027426160337553, + "grad_norm": 0.7303506135940552, + "learning_rate": 0.0011448931788735595, + "loss": 1.5885, + "step": 3131 + }, + { + "epoch": 0.330379746835443, + "grad_norm": 0.6731773018836975, + "learning_rate": 0.0011446797158804426, + "loss": 1.6184, + "step": 3132 + }, + { + "epoch": 0.33048523206751057, + "grad_norm": 0.9437779784202576, + "learning_rate": 0.0011444662086614952, + "loss": 1.5399, + "step": 3133 + }, + { + "epoch": 0.33059071729957806, + "grad_norm": 1.3056329488754272, + "learning_rate": 0.0011442526572406422, + "loss": 1.6407, + "step": 3134 + }, + { + "epoch": 0.33069620253164556, + "grad_norm": 0.6540379524230957, + "learning_rate": 0.001144039061641813, + "loss": 1.5518, + "step": 3135 + }, + { + "epoch": 0.3308016877637131, + "grad_norm": 1.0655856132507324, + "learning_rate": 0.0011438254218889422, + "loss": 1.5812, + "step": 3136 + }, + { + "epoch": 0.3309071729957806, + "grad_norm": 0.849429190158844, + "learning_rate": 0.0011436117380059692, + "loss": 1.5562, + "step": 3137 + }, + { + "epoch": 0.3310126582278481, + "grad_norm": 0.721428632736206, + "learning_rate": 0.0011433980100168382, + "loss": 1.6217, + "step": 3138 + }, + { + "epoch": 0.33111814345991564, + "grad_norm": 1.2363234758377075, + "learning_rate": 0.0011431842379454982, + "loss": 1.613, + "step": 3139 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 1.0920205116271973, + "learning_rate": 0.001142970421815904, + "loss": 1.5841, + "step": 3140 + }, + { + "epoch": 0.3313291139240506, + "grad_norm": 0.6989731192588806, + "learning_rate": 0.0011427565616520144, + "loss": 1.6168, + "step": 3141 + }, + { + "epoch": 0.33143459915611817, + "grad_norm": 0.7118903398513794, + "learning_rate": 0.0011425426574777936, + "loss": 1.5668, + "step": 3142 + }, + { + "epoch": 0.33154008438818566, + "grad_norm": 0.9210342168807983, + "learning_rate": 0.0011423287093172106, + "loss": 1.6115, + "step": 3143 + }, + { + "epoch": 0.33164556962025316, + "grad_norm": 0.8570569753646851, + "learning_rate": 0.0011421147171942398, + "loss": 1.624, + "step": 3144 + }, + { + "epoch": 0.33175105485232065, + "grad_norm": 0.6654054522514343, + "learning_rate": 0.0011419006811328593, + "loss": 1.607, + "step": 3145 + }, + { + "epoch": 0.3318565400843882, + "grad_norm": 0.9263875484466553, + "learning_rate": 0.0011416866011570534, + "loss": 1.5984, + "step": 3146 + }, + { + "epoch": 0.3319620253164557, + "grad_norm": 0.939127504825592, + "learning_rate": 0.0011414724772908105, + "loss": 1.5756, + "step": 3147 + }, + { + "epoch": 0.3320675105485232, + "grad_norm": 0.6734333634376526, + "learning_rate": 0.0011412583095581248, + "loss": 1.584, + "step": 3148 + }, + { + "epoch": 0.33217299578059073, + "grad_norm": 1.2160972356796265, + "learning_rate": 0.0011410440979829942, + "loss": 1.5796, + "step": 3149 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.7493484616279602, + "learning_rate": 0.0011408298425894226, + "loss": 1.5939, + "step": 3150 + }, + { + "epoch": 0.3323839662447257, + "grad_norm": 1.0517373085021973, + "learning_rate": 0.0011406155434014185, + "loss": 1.6018, + "step": 3151 + }, + { + "epoch": 0.33248945147679326, + "grad_norm": 1.278167724609375, + "learning_rate": 0.0011404012004429948, + "loss": 1.5959, + "step": 3152 + }, + { + "epoch": 0.33259493670886076, + "grad_norm": 0.6815617680549622, + "learning_rate": 0.00114018681373817, + "loss": 1.6132, + "step": 3153 + }, + { + "epoch": 0.33270042194092825, + "grad_norm": 0.9259607195854187, + "learning_rate": 0.001139972383310967, + "loss": 1.6027, + "step": 3154 + }, + { + "epoch": 0.3328059071729958, + "grad_norm": 0.7598952651023865, + "learning_rate": 0.0011397579091854137, + "loss": 1.5998, + "step": 3155 + }, + { + "epoch": 0.3329113924050633, + "grad_norm": 0.6905957460403442, + "learning_rate": 0.0011395433913855434, + "loss": 1.5976, + "step": 3156 + }, + { + "epoch": 0.3330168776371308, + "grad_norm": 0.7355316877365112, + "learning_rate": 0.0011393288299353934, + "loss": 1.5929, + "step": 3157 + }, + { + "epoch": 0.33312236286919833, + "grad_norm": 0.7028221487998962, + "learning_rate": 0.001139114224859007, + "loss": 1.5747, + "step": 3158 + }, + { + "epoch": 0.3332278481012658, + "grad_norm": 0.7798256874084473, + "learning_rate": 0.0011388995761804311, + "loss": 1.601, + "step": 3159 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.9116944074630737, + "learning_rate": 0.0011386848839237186, + "loss": 1.6008, + "step": 3160 + }, + { + "epoch": 0.33343881856540086, + "grad_norm": 1.0170785188674927, + "learning_rate": 0.0011384701481129266, + "loss": 1.6026, + "step": 3161 + }, + { + "epoch": 0.33354430379746836, + "grad_norm": 0.787625789642334, + "learning_rate": 0.0011382553687721174, + "loss": 1.5874, + "step": 3162 + }, + { + "epoch": 0.33364978902953585, + "grad_norm": 0.6939123272895813, + "learning_rate": 0.0011380405459253582, + "loss": 1.595, + "step": 3163 + }, + { + "epoch": 0.3337552742616034, + "grad_norm": 0.964408278465271, + "learning_rate": 0.0011378256795967208, + "loss": 1.6079, + "step": 3164 + }, + { + "epoch": 0.3338607594936709, + "grad_norm": 0.9530452489852905, + "learning_rate": 0.0011376107698102822, + "loss": 1.5817, + "step": 3165 + }, + { + "epoch": 0.3339662447257384, + "grad_norm": 0.7117488384246826, + "learning_rate": 0.001137395816590124, + "loss": 1.6338, + "step": 3166 + }, + { + "epoch": 0.33407172995780593, + "grad_norm": 0.9362091422080994, + "learning_rate": 0.001137180819960333, + "loss": 1.5661, + "step": 3167 + }, + { + "epoch": 0.3341772151898734, + "grad_norm": 0.940995454788208, + "learning_rate": 0.0011369657799450005, + "loss": 1.6147, + "step": 3168 + }, + { + "epoch": 0.3342827004219409, + "grad_norm": 0.7860775589942932, + "learning_rate": 0.0011367506965682225, + "loss": 1.6232, + "step": 3169 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.7188010215759277, + "learning_rate": 0.0011365355698541005, + "loss": 1.5538, + "step": 3170 + }, + { + "epoch": 0.33449367088607596, + "grad_norm": 0.642544150352478, + "learning_rate": 0.0011363203998267406, + "loss": 1.5864, + "step": 3171 + }, + { + "epoch": 0.33459915611814345, + "grad_norm": 0.7007741928100586, + "learning_rate": 0.0011361051865102533, + "loss": 1.5684, + "step": 3172 + }, + { + "epoch": 0.334704641350211, + "grad_norm": 0.6862145066261292, + "learning_rate": 0.0011358899299287546, + "loss": 1.5979, + "step": 3173 + }, + { + "epoch": 0.3348101265822785, + "grad_norm": 0.8996045589447021, + "learning_rate": 0.0011356746301063652, + "loss": 1.6066, + "step": 3174 + }, + { + "epoch": 0.334915611814346, + "grad_norm": 1.210575819015503, + "learning_rate": 0.0011354592870672104, + "loss": 1.5913, + "step": 3175 + }, + { + "epoch": 0.33502109704641353, + "grad_norm": 0.67326420545578, + "learning_rate": 0.0011352439008354201, + "loss": 1.5889, + "step": 3176 + }, + { + "epoch": 0.335126582278481, + "grad_norm": 0.796511173248291, + "learning_rate": 0.0011350284714351298, + "loss": 1.5861, + "step": 3177 + }, + { + "epoch": 0.3352320675105485, + "grad_norm": 0.8070449233055115, + "learning_rate": 0.0011348129988904797, + "loss": 1.5875, + "step": 3178 + }, + { + "epoch": 0.335337552742616, + "grad_norm": 0.6580392718315125, + "learning_rate": 0.0011345974832256138, + "loss": 1.5575, + "step": 3179 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.823894202709198, + "learning_rate": 0.0011343819244646824, + "loss": 1.626, + "step": 3180 + }, + { + "epoch": 0.33554852320675105, + "grad_norm": 0.7126208543777466, + "learning_rate": 0.0011341663226318395, + "loss": 1.5564, + "step": 3181 + }, + { + "epoch": 0.33565400843881854, + "grad_norm": 1.1310843229293823, + "learning_rate": 0.0011339506777512446, + "loss": 1.5803, + "step": 3182 + }, + { + "epoch": 0.3357594936708861, + "grad_norm": 0.8011541962623596, + "learning_rate": 0.0011337349898470617, + "loss": 1.5927, + "step": 3183 + }, + { + "epoch": 0.3358649789029536, + "grad_norm": 0.7744584083557129, + "learning_rate": 0.0011335192589434597, + "loss": 1.5977, + "step": 3184 + }, + { + "epoch": 0.3359704641350211, + "grad_norm": 0.9158652424812317, + "learning_rate": 0.0011333034850646124, + "loss": 1.602, + "step": 3185 + }, + { + "epoch": 0.3360759493670886, + "grad_norm": 0.8521155118942261, + "learning_rate": 0.0011330876682346981, + "loss": 1.5826, + "step": 3186 + }, + { + "epoch": 0.3361814345991561, + "grad_norm": 0.7275968790054321, + "learning_rate": 0.0011328718084779004, + "loss": 1.6173, + "step": 3187 + }, + { + "epoch": 0.3362869198312236, + "grad_norm": 0.8458832502365112, + "learning_rate": 0.0011326559058184075, + "loss": 1.5972, + "step": 3188 + }, + { + "epoch": 0.33639240506329116, + "grad_norm": 1.0256805419921875, + "learning_rate": 0.001132439960280412, + "loss": 1.5724, + "step": 3189 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.8543221950531006, + "learning_rate": 0.001132223971888112, + "loss": 1.6265, + "step": 3190 + }, + { + "epoch": 0.33660337552742614, + "grad_norm": 0.7348299622535706, + "learning_rate": 0.0011320079406657102, + "loss": 1.5899, + "step": 3191 + }, + { + "epoch": 0.3367088607594937, + "grad_norm": 0.9025410413742065, + "learning_rate": 0.0011317918666374138, + "loss": 1.5588, + "step": 3192 + }, + { + "epoch": 0.3368143459915612, + "grad_norm": 0.7692383527755737, + "learning_rate": 0.0011315757498274349, + "loss": 1.6354, + "step": 3193 + }, + { + "epoch": 0.3369198312236287, + "grad_norm": 0.7230756878852844, + "learning_rate": 0.0011313595902599904, + "loss": 1.6118, + "step": 3194 + }, + { + "epoch": 0.3370253164556962, + "grad_norm": 0.7268738150596619, + "learning_rate": 0.0011311433879593023, + "loss": 1.559, + "step": 3195 + }, + { + "epoch": 0.3371308016877637, + "grad_norm": 0.7904716730117798, + "learning_rate": 0.001130927142949597, + "loss": 1.6218, + "step": 3196 + }, + { + "epoch": 0.3372362869198312, + "grad_norm": 0.7146595120429993, + "learning_rate": 0.001130710855255106, + "loss": 1.5344, + "step": 3197 + }, + { + "epoch": 0.33734177215189876, + "grad_norm": 0.8766056299209595, + "learning_rate": 0.001130494524900065, + "loss": 1.5954, + "step": 3198 + }, + { + "epoch": 0.33744725738396625, + "grad_norm": 0.8738231658935547, + "learning_rate": 0.0011302781519087154, + "loss": 1.5601, + "step": 3199 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.6834279298782349, + "learning_rate": 0.0011300617363053024, + "loss": 1.5952, + "step": 3200 + }, + { + "epoch": 0.3376582278481013, + "grad_norm": 0.7676968574523926, + "learning_rate": 0.0011298452781140769, + "loss": 1.5909, + "step": 3201 + }, + { + "epoch": 0.3377637130801688, + "grad_norm": 0.8864360451698303, + "learning_rate": 0.0011296287773592938, + "loss": 1.6256, + "step": 3202 + }, + { + "epoch": 0.3378691983122363, + "grad_norm": 1.1105132102966309, + "learning_rate": 0.0011294122340652132, + "loss": 1.6196, + "step": 3203 + }, + { + "epoch": 0.3379746835443038, + "grad_norm": 0.9397533535957336, + "learning_rate": 0.0011291956482561, + "loss": 1.6096, + "step": 3204 + }, + { + "epoch": 0.3380801687763713, + "grad_norm": 0.6950457692146301, + "learning_rate": 0.0011289790199562233, + "loss": 1.6145, + "step": 3205 + }, + { + "epoch": 0.3381856540084388, + "grad_norm": 0.7728614211082458, + "learning_rate": 0.001128762349189858, + "loss": 1.6146, + "step": 3206 + }, + { + "epoch": 0.33829113924050636, + "grad_norm": 0.701910138130188, + "learning_rate": 0.0011285456359812825, + "loss": 1.6178, + "step": 3207 + }, + { + "epoch": 0.33839662447257385, + "grad_norm": 0.6808938980102539, + "learning_rate": 0.0011283288803547809, + "loss": 1.5651, + "step": 3208 + }, + { + "epoch": 0.33850210970464134, + "grad_norm": 0.6662678122520447, + "learning_rate": 0.0011281120823346418, + "loss": 1.5778, + "step": 3209 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.7429592609405518, + "learning_rate": 0.0011278952419451586, + "loss": 1.6042, + "step": 3210 + }, + { + "epoch": 0.3387130801687764, + "grad_norm": 0.6695657968521118, + "learning_rate": 0.0011276783592106291, + "loss": 1.5812, + "step": 3211 + }, + { + "epoch": 0.3388185654008439, + "grad_norm": 0.7108159065246582, + "learning_rate": 0.001127461434155356, + "loss": 1.577, + "step": 3212 + }, + { + "epoch": 0.33892405063291137, + "grad_norm": 0.6480672955513, + "learning_rate": 0.001127244466803647, + "loss": 1.5635, + "step": 3213 + }, + { + "epoch": 0.3390295358649789, + "grad_norm": 0.6807981729507446, + "learning_rate": 0.0011270274571798147, + "loss": 1.6234, + "step": 3214 + }, + { + "epoch": 0.3391350210970464, + "grad_norm": 0.7342066764831543, + "learning_rate": 0.0011268104053081755, + "loss": 1.5988, + "step": 3215 + }, + { + "epoch": 0.3392405063291139, + "grad_norm": 0.7091277837753296, + "learning_rate": 0.0011265933112130516, + "loss": 1.5999, + "step": 3216 + }, + { + "epoch": 0.33934599156118145, + "grad_norm": 0.6736048460006714, + "learning_rate": 0.0011263761749187693, + "loss": 1.5871, + "step": 3217 + }, + { + "epoch": 0.33945147679324894, + "grad_norm": 0.8016487956047058, + "learning_rate": 0.0011261589964496597, + "loss": 1.5942, + "step": 3218 + }, + { + "epoch": 0.33955696202531643, + "grad_norm": 1.0946990251541138, + "learning_rate": 0.001125941775830059, + "loss": 1.5698, + "step": 3219 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.6899036765098572, + "learning_rate": 0.0011257245130843077, + "loss": 1.6062, + "step": 3220 + }, + { + "epoch": 0.3397679324894515, + "grad_norm": 0.8788459300994873, + "learning_rate": 0.0011255072082367512, + "loss": 1.5905, + "step": 3221 + }, + { + "epoch": 0.33987341772151897, + "grad_norm": 1.0593100786209106, + "learning_rate": 0.0011252898613117394, + "loss": 1.5996, + "step": 3222 + }, + { + "epoch": 0.3399789029535865, + "grad_norm": 0.7591010928153992, + "learning_rate": 0.0011250724723336273, + "loss": 1.5952, + "step": 3223 + }, + { + "epoch": 0.340084388185654, + "grad_norm": 0.8378796577453613, + "learning_rate": 0.0011248550413267746, + "loss": 1.5631, + "step": 3224 + }, + { + "epoch": 0.3401898734177215, + "grad_norm": 1.0722426176071167, + "learning_rate": 0.001124637568315545, + "loss": 1.6073, + "step": 3225 + }, + { + "epoch": 0.34029535864978905, + "grad_norm": 0.8408674597740173, + "learning_rate": 0.001124420053324308, + "loss": 1.5677, + "step": 3226 + }, + { + "epoch": 0.34040084388185654, + "grad_norm": 0.6982042193412781, + "learning_rate": 0.001124202496377437, + "loss": 1.5502, + "step": 3227 + }, + { + "epoch": 0.34050632911392403, + "grad_norm": 0.7120385766029358, + "learning_rate": 0.0011239848974993103, + "loss": 1.5685, + "step": 3228 + }, + { + "epoch": 0.3406118143459916, + "grad_norm": 0.780621349811554, + "learning_rate": 0.0011237672567143107, + "loss": 1.5999, + "step": 3229 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.648231565952301, + "learning_rate": 0.0011235495740468265, + "loss": 1.6018, + "step": 3230 + }, + { + "epoch": 0.34082278481012657, + "grad_norm": 0.7143185138702393, + "learning_rate": 0.00112333184952125, + "loss": 1.5668, + "step": 3231 + }, + { + "epoch": 0.3409282700421941, + "grad_norm": 0.7490081787109375, + "learning_rate": 0.001123114083161978, + "loss": 1.5577, + "step": 3232 + }, + { + "epoch": 0.3410337552742616, + "grad_norm": 0.7288658022880554, + "learning_rate": 0.0011228962749934123, + "loss": 1.5724, + "step": 3233 + }, + { + "epoch": 0.3411392405063291, + "grad_norm": 0.9019172787666321, + "learning_rate": 0.0011226784250399598, + "loss": 1.5546, + "step": 3234 + }, + { + "epoch": 0.34124472573839665, + "grad_norm": 0.8146930932998657, + "learning_rate": 0.0011224605333260312, + "loss": 1.5534, + "step": 3235 + }, + { + "epoch": 0.34135021097046414, + "grad_norm": 0.6561857461929321, + "learning_rate": 0.0011222425998760428, + "loss": 1.5917, + "step": 3236 + }, + { + "epoch": 0.34145569620253163, + "grad_norm": 0.7667781114578247, + "learning_rate": 0.0011220246247144149, + "loss": 1.5527, + "step": 3237 + }, + { + "epoch": 0.3415611814345992, + "grad_norm": 0.9169859886169434, + "learning_rate": 0.0011218066078655725, + "loss": 1.5684, + "step": 3238 + }, + { + "epoch": 0.3416666666666667, + "grad_norm": 0.8719794154167175, + "learning_rate": 0.001121588549353946, + "loss": 1.5514, + "step": 3239 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.6798635721206665, + "learning_rate": 0.0011213704492039694, + "loss": 1.5928, + "step": 3240 + }, + { + "epoch": 0.3418776371308017, + "grad_norm": 0.9462136030197144, + "learning_rate": 0.0011211523074400823, + "loss": 1.5674, + "step": 3241 + }, + { + "epoch": 0.3419831223628692, + "grad_norm": 1.2175477743148804, + "learning_rate": 0.0011209341240867282, + "loss": 1.6108, + "step": 3242 + }, + { + "epoch": 0.3420886075949367, + "grad_norm": 0.667378306388855, + "learning_rate": 0.001120715899168356, + "loss": 1.5605, + "step": 3243 + }, + { + "epoch": 0.3421940928270042, + "grad_norm": 1.2481290102005005, + "learning_rate": 0.0011204976327094187, + "loss": 1.6001, + "step": 3244 + }, + { + "epoch": 0.34229957805907174, + "grad_norm": 0.67437344789505, + "learning_rate": 0.0011202793247343742, + "loss": 1.5613, + "step": 3245 + }, + { + "epoch": 0.34240506329113923, + "grad_norm": 1.191724181175232, + "learning_rate": 0.001120060975267685, + "loss": 1.587, + "step": 3246 + }, + { + "epoch": 0.3425105485232067, + "grad_norm": 0.7727429866790771, + "learning_rate": 0.0011198425843338183, + "loss": 1.6139, + "step": 3247 + }, + { + "epoch": 0.3426160337552743, + "grad_norm": 0.9065296649932861, + "learning_rate": 0.0011196241519572457, + "loss": 1.6026, + "step": 3248 + }, + { + "epoch": 0.34272151898734177, + "grad_norm": 1.2060061693191528, + "learning_rate": 0.001119405678162444, + "loss": 1.6006, + "step": 3249 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.731829822063446, + "learning_rate": 0.001119187162973894, + "loss": 1.6037, + "step": 3250 + }, + { + "epoch": 0.3429324894514768, + "grad_norm": 1.4818177223205566, + "learning_rate": 0.0011189686064160811, + "loss": 1.5726, + "step": 3251 + }, + { + "epoch": 0.3430379746835443, + "grad_norm": 0.7902392745018005, + "learning_rate": 0.001118750008513496, + "loss": 1.6132, + "step": 3252 + }, + { + "epoch": 0.3431434599156118, + "grad_norm": 0.8896999955177307, + "learning_rate": 0.0011185313692906342, + "loss": 1.6148, + "step": 3253 + }, + { + "epoch": 0.34324894514767934, + "grad_norm": 0.7684738636016846, + "learning_rate": 0.0011183126887719945, + "loss": 1.5773, + "step": 3254 + }, + { + "epoch": 0.34335443037974683, + "grad_norm": 0.8566173315048218, + "learning_rate": 0.0011180939669820813, + "loss": 1.5513, + "step": 3255 + }, + { + "epoch": 0.3434599156118143, + "grad_norm": 0.9108127355575562, + "learning_rate": 0.001117875203945404, + "loss": 1.5398, + "step": 3256 + }, + { + "epoch": 0.3435654008438819, + "grad_norm": 0.7610388398170471, + "learning_rate": 0.0011176563996864754, + "loss": 1.6089, + "step": 3257 + }, + { + "epoch": 0.34367088607594937, + "grad_norm": 1.0858160257339478, + "learning_rate": 0.0011174375542298142, + "loss": 1.5725, + "step": 3258 + }, + { + "epoch": 0.34377637130801686, + "grad_norm": 1.1792410612106323, + "learning_rate": 0.0011172186675999425, + "loss": 1.5814, + "step": 3259 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.7290763258934021, + "learning_rate": 0.001116999739821388, + "loss": 1.5623, + "step": 3260 + }, + { + "epoch": 0.3439873417721519, + "grad_norm": 1.0995831489562988, + "learning_rate": 0.0011167807709186828, + "loss": 1.6196, + "step": 3261 + }, + { + "epoch": 0.3440928270042194, + "grad_norm": 0.796717643737793, + "learning_rate": 0.0011165617609163632, + "loss": 1.6057, + "step": 3262 + }, + { + "epoch": 0.34419831223628694, + "grad_norm": 0.8245158195495605, + "learning_rate": 0.0011163427098389706, + "loss": 1.5954, + "step": 3263 + }, + { + "epoch": 0.34430379746835443, + "grad_norm": 1.1002838611602783, + "learning_rate": 0.0011161236177110504, + "loss": 1.5265, + "step": 3264 + }, + { + "epoch": 0.3444092827004219, + "grad_norm": 0.8379111886024475, + "learning_rate": 0.0011159044845571533, + "loss": 1.5711, + "step": 3265 + }, + { + "epoch": 0.3445147679324895, + "grad_norm": 0.6376960873603821, + "learning_rate": 0.0011156853104018342, + "loss": 1.6081, + "step": 3266 + }, + { + "epoch": 0.34462025316455697, + "grad_norm": 0.7350111603736877, + "learning_rate": 0.0011154660952696525, + "loss": 1.6154, + "step": 3267 + }, + { + "epoch": 0.34472573839662446, + "grad_norm": 0.6737897992134094, + "learning_rate": 0.0011152468391851724, + "loss": 1.605, + "step": 3268 + }, + { + "epoch": 0.344831223628692, + "grad_norm": 0.6995928287506104, + "learning_rate": 0.0011150275421729628, + "loss": 1.5922, + "step": 3269 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.7922198176383972, + "learning_rate": 0.0011148082042575968, + "loss": 1.6029, + "step": 3270 + }, + { + "epoch": 0.345042194092827, + "grad_norm": 0.6486632823944092, + "learning_rate": 0.0011145888254636526, + "loss": 1.5594, + "step": 3271 + }, + { + "epoch": 0.34514767932489454, + "grad_norm": 1.0123589038848877, + "learning_rate": 0.0011143694058157122, + "loss": 1.6346, + "step": 3272 + }, + { + "epoch": 0.34525316455696203, + "grad_norm": 1.0343881845474243, + "learning_rate": 0.0011141499453383632, + "loss": 1.6022, + "step": 3273 + }, + { + "epoch": 0.3453586497890295, + "grad_norm": 0.6650921106338501, + "learning_rate": 0.001113930444056197, + "loss": 1.5879, + "step": 3274 + }, + { + "epoch": 0.3454641350210971, + "grad_norm": 0.9576541185379028, + "learning_rate": 0.00111371090199381, + "loss": 1.6049, + "step": 3275 + }, + { + "epoch": 0.34556962025316457, + "grad_norm": 0.8670870661735535, + "learning_rate": 0.0011134913191758024, + "loss": 1.6162, + "step": 3276 + }, + { + "epoch": 0.34567510548523206, + "grad_norm": 0.7880740165710449, + "learning_rate": 0.00111327169562678, + "loss": 1.5918, + "step": 3277 + }, + { + "epoch": 0.34578059071729955, + "grad_norm": 0.9882268905639648, + "learning_rate": 0.0011130520313713528, + "loss": 1.5567, + "step": 3278 + }, + { + "epoch": 0.3458860759493671, + "grad_norm": 0.740964949131012, + "learning_rate": 0.0011128323264341352, + "loss": 1.597, + "step": 3279 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.839352011680603, + "learning_rate": 0.0011126125808397461, + "loss": 1.5899, + "step": 3280 + }, + { + "epoch": 0.3460970464135021, + "grad_norm": 0.8526003360748291, + "learning_rate": 0.0011123927946128092, + "loss": 1.5724, + "step": 3281 + }, + { + "epoch": 0.34620253164556963, + "grad_norm": 0.9130859375, + "learning_rate": 0.0011121729677779526, + "loss": 1.5902, + "step": 3282 + }, + { + "epoch": 0.3463080168776371, + "grad_norm": 1.316423773765564, + "learning_rate": 0.001111953100359809, + "loss": 1.5475, + "step": 3283 + }, + { + "epoch": 0.3464135021097046, + "grad_norm": 0.8765917420387268, + "learning_rate": 0.0011117331923830157, + "loss": 1.5983, + "step": 3284 + }, + { + "epoch": 0.34651898734177217, + "grad_norm": 0.7321099042892456, + "learning_rate": 0.0011115132438722143, + "loss": 1.5929, + "step": 3285 + }, + { + "epoch": 0.34662447257383966, + "grad_norm": 0.9118958115577698, + "learning_rate": 0.0011112932548520513, + "loss": 1.6064, + "step": 3286 + }, + { + "epoch": 0.34672995780590715, + "grad_norm": 0.7161591053009033, + "learning_rate": 0.0011110732253471777, + "loss": 1.6484, + "step": 3287 + }, + { + "epoch": 0.3468354430379747, + "grad_norm": 0.763673722743988, + "learning_rate": 0.0011108531553822485, + "loss": 1.5484, + "step": 3288 + }, + { + "epoch": 0.3469409282700422, + "grad_norm": 0.693870484828949, + "learning_rate": 0.001110633044981924, + "loss": 1.5566, + "step": 3289 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.8718399405479431, + "learning_rate": 0.0011104128941708683, + "loss": 1.5153, + "step": 3290 + }, + { + "epoch": 0.34715189873417723, + "grad_norm": 1.0853921175003052, + "learning_rate": 0.001110192702973751, + "loss": 1.5598, + "step": 3291 + }, + { + "epoch": 0.3472573839662447, + "grad_norm": 0.8791952133178711, + "learning_rate": 0.001109972471415245, + "loss": 1.5752, + "step": 3292 + }, + { + "epoch": 0.3473628691983122, + "grad_norm": 0.6844127178192139, + "learning_rate": 0.0011097521995200288, + "loss": 1.574, + "step": 3293 + }, + { + "epoch": 0.34746835443037977, + "grad_norm": 0.8423905968666077, + "learning_rate": 0.0011095318873127844, + "loss": 1.5812, + "step": 3294 + }, + { + "epoch": 0.34757383966244726, + "grad_norm": 1.150478720664978, + "learning_rate": 0.0011093115348181995, + "loss": 1.5732, + "step": 3295 + }, + { + "epoch": 0.34767932489451475, + "grad_norm": 0.7870450615882874, + "learning_rate": 0.0011090911420609654, + "loss": 1.6331, + "step": 3296 + }, + { + "epoch": 0.3477848101265823, + "grad_norm": 0.7976176142692566, + "learning_rate": 0.0011088707090657784, + "loss": 1.5887, + "step": 3297 + }, + { + "epoch": 0.3478902953586498, + "grad_norm": 1.1108342409133911, + "learning_rate": 0.0011086502358573387, + "loss": 1.5768, + "step": 3298 + }, + { + "epoch": 0.3479957805907173, + "grad_norm": 0.7600010633468628, + "learning_rate": 0.0011084297224603517, + "loss": 1.607, + "step": 3299 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.8985463380813599, + "learning_rate": 0.001108209168899527, + "loss": 1.6043, + "step": 3300 + }, + { + "epoch": 0.3482067510548523, + "grad_norm": 1.2361915111541748, + "learning_rate": 0.0011079885751995788, + "loss": 1.5912, + "step": 3301 + }, + { + "epoch": 0.3483122362869198, + "grad_norm": 0.6376525163650513, + "learning_rate": 0.0011077679413852258, + "loss": 1.5717, + "step": 3302 + }, + { + "epoch": 0.34841772151898737, + "grad_norm": 0.9041579365730286, + "learning_rate": 0.0011075472674811908, + "loss": 1.6222, + "step": 3303 + }, + { + "epoch": 0.34852320675105486, + "grad_norm": 0.6833534240722656, + "learning_rate": 0.0011073265535122016, + "loss": 1.598, + "step": 3304 + }, + { + "epoch": 0.34862869198312235, + "grad_norm": 1.3646377325057983, + "learning_rate": 0.0011071057995029902, + "loss": 1.6198, + "step": 3305 + }, + { + "epoch": 0.3487341772151899, + "grad_norm": 0.8067207336425781, + "learning_rate": 0.0011068850054782933, + "loss": 1.5624, + "step": 3306 + }, + { + "epoch": 0.3488396624472574, + "grad_norm": 0.9936261177062988, + "learning_rate": 0.0011066641714628522, + "loss": 1.5819, + "step": 3307 + }, + { + "epoch": 0.3489451476793249, + "grad_norm": 0.8461071848869324, + "learning_rate": 0.001106443297481412, + "loss": 1.6044, + "step": 3308 + }, + { + "epoch": 0.3490506329113924, + "grad_norm": 0.7331381440162659, + "learning_rate": 0.001106222383558723, + "loss": 1.5871, + "step": 3309 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.6753408908843994, + "learning_rate": 0.0011060014297195396, + "loss": 1.5628, + "step": 3310 + }, + { + "epoch": 0.3492616033755274, + "grad_norm": 0.7204347848892212, + "learning_rate": 0.0011057804359886209, + "loss": 1.57, + "step": 3311 + }, + { + "epoch": 0.3493670886075949, + "grad_norm": 0.7116643190383911, + "learning_rate": 0.0011055594023907302, + "loss": 1.6244, + "step": 3312 + }, + { + "epoch": 0.34947257383966246, + "grad_norm": 0.8085433840751648, + "learning_rate": 0.0011053383289506354, + "loss": 1.5383, + "step": 3313 + }, + { + "epoch": 0.34957805907172995, + "grad_norm": 0.7241156697273254, + "learning_rate": 0.001105117215693109, + "loss": 1.6151, + "step": 3314 + }, + { + "epoch": 0.34968354430379744, + "grad_norm": 0.6573028564453125, + "learning_rate": 0.001104896062642928, + "loss": 1.5736, + "step": 3315 + }, + { + "epoch": 0.349789029535865, + "grad_norm": 0.7576993703842163, + "learning_rate": 0.001104674869824873, + "loss": 1.5583, + "step": 3316 + }, + { + "epoch": 0.3498945147679325, + "grad_norm": 0.6863471865653992, + "learning_rate": 0.0011044536372637307, + "loss": 1.6276, + "step": 3317 + }, + { + "epoch": 0.35, + "grad_norm": 0.6723792552947998, + "learning_rate": 0.001104232364984291, + "loss": 1.6183, + "step": 3318 + }, + { + "epoch": 0.3501054852320675, + "grad_norm": 0.7878588438034058, + "learning_rate": 0.001104011053011348, + "loss": 1.5883, + "step": 3319 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.7143304944038391, + "learning_rate": 0.0011037897013697015, + "loss": 1.5923, + "step": 3320 + }, + { + "epoch": 0.3503164556962025, + "grad_norm": 0.8204415440559387, + "learning_rate": 0.0011035683100841548, + "loss": 1.5332, + "step": 3321 + }, + { + "epoch": 0.35042194092827006, + "grad_norm": 1.1425844430923462, + "learning_rate": 0.0011033468791795161, + "loss": 1.5753, + "step": 3322 + }, + { + "epoch": 0.35052742616033755, + "grad_norm": 0.6537514925003052, + "learning_rate": 0.0011031254086805973, + "loss": 1.6269, + "step": 3323 + }, + { + "epoch": 0.35063291139240504, + "grad_norm": 1.1918712854385376, + "learning_rate": 0.0011029038986122156, + "loss": 1.5649, + "step": 3324 + }, + { + "epoch": 0.3507383966244726, + "grad_norm": 0.7245627045631409, + "learning_rate": 0.0011026823489991924, + "loss": 1.6157, + "step": 3325 + }, + { + "epoch": 0.3508438818565401, + "grad_norm": 0.941327691078186, + "learning_rate": 0.0011024607598663539, + "loss": 1.5564, + "step": 3326 + }, + { + "epoch": 0.3509493670886076, + "grad_norm": 1.0986944437026978, + "learning_rate": 0.001102239131238529, + "loss": 1.6049, + "step": 3327 + }, + { + "epoch": 0.3510548523206751, + "grad_norm": 0.7175740003585815, + "learning_rate": 0.0011020174631405533, + "loss": 1.608, + "step": 3328 + }, + { + "epoch": 0.3511603375527426, + "grad_norm": 1.013954997062683, + "learning_rate": 0.0011017957555972656, + "loss": 1.5955, + "step": 3329 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.7482179403305054, + "learning_rate": 0.0011015740086335092, + "loss": 1.5555, + "step": 3330 + }, + { + "epoch": 0.35137130801687766, + "grad_norm": 0.7186481356620789, + "learning_rate": 0.001101352222274132, + "loss": 1.5754, + "step": 3331 + }, + { + "epoch": 0.35147679324894515, + "grad_norm": 0.6674057841300964, + "learning_rate": 0.0011011303965439863, + "loss": 1.5709, + "step": 3332 + }, + { + "epoch": 0.35158227848101264, + "grad_norm": 1.0057973861694336, + "learning_rate": 0.0011009085314679287, + "loss": 1.6452, + "step": 3333 + }, + { + "epoch": 0.3516877637130802, + "grad_norm": 1.1494314670562744, + "learning_rate": 0.0011006866270708204, + "loss": 1.5631, + "step": 3334 + }, + { + "epoch": 0.3517932489451477, + "grad_norm": 0.7519131898880005, + "learning_rate": 0.0011004646833775269, + "loss": 1.5632, + "step": 3335 + }, + { + "epoch": 0.3518987341772152, + "grad_norm": 0.9409007430076599, + "learning_rate": 0.0011002427004129184, + "loss": 1.5816, + "step": 3336 + }, + { + "epoch": 0.3520042194092827, + "grad_norm": 1.1490789651870728, + "learning_rate": 0.0011000206782018683, + "loss": 1.5767, + "step": 3337 + }, + { + "epoch": 0.3521097046413502, + "grad_norm": 0.7404630780220032, + "learning_rate": 0.001099798616769256, + "loss": 1.5854, + "step": 3338 + }, + { + "epoch": 0.3522151898734177, + "grad_norm": 0.7371399998664856, + "learning_rate": 0.0010995765161399646, + "loss": 1.6046, + "step": 3339 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.7604857683181763, + "learning_rate": 0.0010993543763388814, + "loss": 1.5619, + "step": 3340 + }, + { + "epoch": 0.35242616033755275, + "grad_norm": 0.6846300959587097, + "learning_rate": 0.0010991321973908982, + "loss": 1.5532, + "step": 3341 + }, + { + "epoch": 0.35253164556962024, + "grad_norm": 0.727995753288269, + "learning_rate": 0.0010989099793209112, + "loss": 1.6011, + "step": 3342 + }, + { + "epoch": 0.35263713080168774, + "grad_norm": 0.7050769329071045, + "learning_rate": 0.0010986877221538214, + "loss": 1.5561, + "step": 3343 + }, + { + "epoch": 0.3527426160337553, + "grad_norm": 0.6893633604049683, + "learning_rate": 0.0010984654259145335, + "loss": 1.5979, + "step": 3344 + }, + { + "epoch": 0.3528481012658228, + "grad_norm": 0.8054896593093872, + "learning_rate": 0.0010982430906279572, + "loss": 1.6094, + "step": 3345 + }, + { + "epoch": 0.35295358649789027, + "grad_norm": 0.769532322883606, + "learning_rate": 0.001098020716319006, + "loss": 1.5774, + "step": 3346 + }, + { + "epoch": 0.3530590717299578, + "grad_norm": 0.6619129180908203, + "learning_rate": 0.0010977983030125982, + "loss": 1.5971, + "step": 3347 + }, + { + "epoch": 0.3531645569620253, + "grad_norm": 0.7037948369979858, + "learning_rate": 0.001097575850733656, + "loss": 1.5618, + "step": 3348 + }, + { + "epoch": 0.3532700421940928, + "grad_norm": 0.686381459236145, + "learning_rate": 0.001097353359507107, + "loss": 1.5296, + "step": 3349 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.7160583138465881, + "learning_rate": 0.0010971308293578814, + "loss": 1.5673, + "step": 3350 + }, + { + "epoch": 0.35348101265822784, + "grad_norm": 0.6722518801689148, + "learning_rate": 0.0010969082603109158, + "loss": 1.5638, + "step": 3351 + }, + { + "epoch": 0.35358649789029534, + "grad_norm": 0.6956047415733337, + "learning_rate": 0.00109668565239115, + "loss": 1.6103, + "step": 3352 + }, + { + "epoch": 0.3536919831223629, + "grad_norm": 0.7233831286430359, + "learning_rate": 0.001096463005623528, + "loss": 1.5668, + "step": 3353 + }, + { + "epoch": 0.3537974683544304, + "grad_norm": 0.6531406044960022, + "learning_rate": 0.0010962403200329984, + "loss": 1.5329, + "step": 3354 + }, + { + "epoch": 0.35390295358649787, + "grad_norm": 0.7239596843719482, + "learning_rate": 0.0010960175956445145, + "loss": 1.6004, + "step": 3355 + }, + { + "epoch": 0.3540084388185654, + "grad_norm": 0.7007479071617126, + "learning_rate": 0.0010957948324830337, + "loss": 1.5648, + "step": 3356 + }, + { + "epoch": 0.3541139240506329, + "grad_norm": 0.7700148224830627, + "learning_rate": 0.0010955720305735176, + "loss": 1.5888, + "step": 3357 + }, + { + "epoch": 0.3542194092827004, + "grad_norm": 0.7907053232192993, + "learning_rate": 0.0010953491899409321, + "loss": 1.5542, + "step": 3358 + }, + { + "epoch": 0.35432489451476795, + "grad_norm": 0.6905774474143982, + "learning_rate": 0.001095126310610248, + "loss": 1.5693, + "step": 3359 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.7603866457939148, + "learning_rate": 0.0010949033926064397, + "loss": 1.6079, + "step": 3360 + }, + { + "epoch": 0.35453586497890294, + "grad_norm": 0.7055365443229675, + "learning_rate": 0.0010946804359544867, + "loss": 1.5863, + "step": 3361 + }, + { + "epoch": 0.3546413502109705, + "grad_norm": 0.8609740138053894, + "learning_rate": 0.001094457440679372, + "loss": 1.6045, + "step": 3362 + }, + { + "epoch": 0.354746835443038, + "grad_norm": 0.8570699095726013, + "learning_rate": 0.0010942344068060833, + "loss": 1.5837, + "step": 3363 + }, + { + "epoch": 0.35485232067510547, + "grad_norm": 0.7035276293754578, + "learning_rate": 0.001094011334359613, + "loss": 1.5865, + "step": 3364 + }, + { + "epoch": 0.354957805907173, + "grad_norm": 0.7488978505134583, + "learning_rate": 0.0010937882233649572, + "loss": 1.5345, + "step": 3365 + }, + { + "epoch": 0.3550632911392405, + "grad_norm": 0.7258545160293579, + "learning_rate": 0.0010935650738471167, + "loss": 1.5763, + "step": 3366 + }, + { + "epoch": 0.355168776371308, + "grad_norm": 0.6651206612586975, + "learning_rate": 0.0010933418858310965, + "loss": 1.5937, + "step": 3367 + }, + { + "epoch": 0.35527426160337555, + "grad_norm": 0.6692237854003906, + "learning_rate": 0.0010931186593419059, + "loss": 1.5954, + "step": 3368 + }, + { + "epoch": 0.35537974683544304, + "grad_norm": 0.7444521188735962, + "learning_rate": 0.0010928953944045585, + "loss": 1.6053, + "step": 3369 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.6910592913627625, + "learning_rate": 0.0010926720910440725, + "loss": 1.5825, + "step": 3370 + }, + { + "epoch": 0.3555907172995781, + "grad_norm": 0.7717630863189697, + "learning_rate": 0.00109244874928547, + "loss": 1.5874, + "step": 3371 + }, + { + "epoch": 0.3556962025316456, + "grad_norm": 0.6566793322563171, + "learning_rate": 0.0010922253691537773, + "loss": 1.5609, + "step": 3372 + }, + { + "epoch": 0.35580168776371307, + "grad_norm": 0.7156280875205994, + "learning_rate": 0.0010920019506740256, + "loss": 1.5586, + "step": 3373 + }, + { + "epoch": 0.35590717299578056, + "grad_norm": 0.6819287538528442, + "learning_rate": 0.00109177849387125, + "loss": 1.5884, + "step": 3374 + }, + { + "epoch": 0.3560126582278481, + "grad_norm": 0.7155669331550598, + "learning_rate": 0.00109155499877049, + "loss": 1.5943, + "step": 3375 + }, + { + "epoch": 0.3561181434599156, + "grad_norm": 0.7611526846885681, + "learning_rate": 0.001091331465396789, + "loss": 1.5987, + "step": 3376 + }, + { + "epoch": 0.3562236286919831, + "grad_norm": 0.6825032234191895, + "learning_rate": 0.0010911078937751954, + "loss": 1.5656, + "step": 3377 + }, + { + "epoch": 0.35632911392405064, + "grad_norm": 1.115450382232666, + "learning_rate": 0.0010908842839307614, + "loss": 1.5792, + "step": 3378 + }, + { + "epoch": 0.35643459915611814, + "grad_norm": 0.9159544706344604, + "learning_rate": 0.0010906606358885437, + "loss": 1.5851, + "step": 3379 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.6679187417030334, + "learning_rate": 0.001090436949673603, + "loss": 1.5849, + "step": 3380 + }, + { + "epoch": 0.3566455696202532, + "grad_norm": 0.8706899881362915, + "learning_rate": 0.0010902132253110043, + "loss": 1.5663, + "step": 3381 + }, + { + "epoch": 0.35675105485232067, + "grad_norm": 0.8416879773139954, + "learning_rate": 0.0010899894628258174, + "loss": 1.523, + "step": 3382 + }, + { + "epoch": 0.35685654008438816, + "grad_norm": 0.7293644547462463, + "learning_rate": 0.001089765662243116, + "loss": 1.6106, + "step": 3383 + }, + { + "epoch": 0.3569620253164557, + "grad_norm": 0.6407566070556641, + "learning_rate": 0.0010895418235879776, + "loss": 1.5951, + "step": 3384 + }, + { + "epoch": 0.3570675105485232, + "grad_norm": 0.725942075252533, + "learning_rate": 0.0010893179468854848, + "loss": 1.5217, + "step": 3385 + }, + { + "epoch": 0.3571729957805907, + "grad_norm": 0.7917702794075012, + "learning_rate": 0.0010890940321607245, + "loss": 1.5533, + "step": 3386 + }, + { + "epoch": 0.35727848101265824, + "grad_norm": 0.652707040309906, + "learning_rate": 0.0010888700794387867, + "loss": 1.6057, + "step": 3387 + }, + { + "epoch": 0.35738396624472574, + "grad_norm": 0.8608785271644592, + "learning_rate": 0.0010886460887447667, + "loss": 1.6013, + "step": 3388 + }, + { + "epoch": 0.35748945147679323, + "grad_norm": 1.0850006341934204, + "learning_rate": 0.0010884220601037637, + "loss": 1.6177, + "step": 3389 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.8526700139045715, + "learning_rate": 0.0010881979935408815, + "loss": 1.5623, + "step": 3390 + }, + { + "epoch": 0.35770042194092827, + "grad_norm": 0.7700873613357544, + "learning_rate": 0.0010879738890812278, + "loss": 1.5672, + "step": 3391 + }, + { + "epoch": 0.35780590717299576, + "grad_norm": 1.296494483947754, + "learning_rate": 0.0010877497467499146, + "loss": 1.5563, + "step": 3392 + }, + { + "epoch": 0.3579113924050633, + "grad_norm": 0.6871975064277649, + "learning_rate": 0.001087525566572058, + "loss": 1.5618, + "step": 3393 + }, + { + "epoch": 0.3580168776371308, + "grad_norm": 1.028193712234497, + "learning_rate": 0.0010873013485727782, + "loss": 1.5618, + "step": 3394 + }, + { + "epoch": 0.3581223628691983, + "grad_norm": 0.9528758525848389, + "learning_rate": 0.001087077092777201, + "loss": 1.6049, + "step": 3395 + }, + { + "epoch": 0.35822784810126584, + "grad_norm": 0.6827400326728821, + "learning_rate": 0.0010868527992104545, + "loss": 1.5847, + "step": 3396 + }, + { + "epoch": 0.35833333333333334, + "grad_norm": 1.025891661643982, + "learning_rate": 0.001086628467897672, + "loss": 1.5705, + "step": 3397 + }, + { + "epoch": 0.35843881856540083, + "grad_norm": 0.8430443406105042, + "learning_rate": 0.0010864040988639912, + "loss": 1.5756, + "step": 3398 + }, + { + "epoch": 0.3585443037974684, + "grad_norm": 0.7402358055114746, + "learning_rate": 0.0010861796921345537, + "loss": 1.6312, + "step": 3399 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.9604320526123047, + "learning_rate": 0.0010859552477345052, + "loss": 1.6305, + "step": 3400 + }, + { + "epoch": 0.35875527426160336, + "grad_norm": 0.8654452562332153, + "learning_rate": 0.0010857307656889962, + "loss": 1.5994, + "step": 3401 + }, + { + "epoch": 0.3588607594936709, + "grad_norm": 0.686408281326294, + "learning_rate": 0.0010855062460231807, + "loss": 1.5717, + "step": 3402 + }, + { + "epoch": 0.3589662447257384, + "grad_norm": 1.0286822319030762, + "learning_rate": 0.0010852816887622174, + "loss": 1.6099, + "step": 3403 + }, + { + "epoch": 0.3590717299578059, + "grad_norm": 1.1313564777374268, + "learning_rate": 0.0010850570939312687, + "loss": 1.5823, + "step": 3404 + }, + { + "epoch": 0.35917721518987344, + "grad_norm": 0.6781228184700012, + "learning_rate": 0.0010848324615555024, + "loss": 1.5714, + "step": 3405 + }, + { + "epoch": 0.35928270042194094, + "grad_norm": 0.9651365876197815, + "learning_rate": 0.0010846077916600888, + "loss": 1.5927, + "step": 3406 + }, + { + "epoch": 0.35938818565400843, + "grad_norm": 0.902193009853363, + "learning_rate": 0.0010843830842702036, + "loss": 1.5602, + "step": 3407 + }, + { + "epoch": 0.3594936708860759, + "grad_norm": 0.6781826019287109, + "learning_rate": 0.0010841583394110266, + "loss": 1.5743, + "step": 3408 + }, + { + "epoch": 0.35959915611814347, + "grad_norm": 1.0436489582061768, + "learning_rate": 0.0010839335571077415, + "loss": 1.5883, + "step": 3409 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.9589829444885254, + "learning_rate": 0.001083708737385536, + "loss": 1.5546, + "step": 3410 + }, + { + "epoch": 0.35981012658227846, + "grad_norm": 0.6643384695053101, + "learning_rate": 0.0010834838802696023, + "loss": 1.5698, + "step": 3411 + }, + { + "epoch": 0.359915611814346, + "grad_norm": 0.7797092795372009, + "learning_rate": 0.0010832589857851373, + "loss": 1.5553, + "step": 3412 + }, + { + "epoch": 0.3600210970464135, + "grad_norm": 0.7601721286773682, + "learning_rate": 0.001083034053957341, + "loss": 1.5695, + "step": 3413 + }, + { + "epoch": 0.360126582278481, + "grad_norm": 0.7349407076835632, + "learning_rate": 0.0010828090848114182, + "loss": 1.5503, + "step": 3414 + }, + { + "epoch": 0.36023206751054854, + "grad_norm": 0.8146485686302185, + "learning_rate": 0.001082584078372578, + "loss": 1.5408, + "step": 3415 + }, + { + "epoch": 0.36033755274261603, + "grad_norm": 0.737078845500946, + "learning_rate": 0.0010823590346660335, + "loss": 1.5777, + "step": 3416 + }, + { + "epoch": 0.3604430379746835, + "grad_norm": 0.6917500495910645, + "learning_rate": 0.0010821339537170015, + "loss": 1.5656, + "step": 3417 + }, + { + "epoch": 0.36054852320675107, + "grad_norm": 0.6485565304756165, + "learning_rate": 0.0010819088355507043, + "loss": 1.541, + "step": 3418 + }, + { + "epoch": 0.36065400843881856, + "grad_norm": 0.703639566898346, + "learning_rate": 0.0010816836801923666, + "loss": 1.5324, + "step": 3419 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.7529979348182678, + "learning_rate": 0.0010814584876672187, + "loss": 1.569, + "step": 3420 + }, + { + "epoch": 0.3608649789029536, + "grad_norm": 0.7051487565040588, + "learning_rate": 0.0010812332580004947, + "loss": 1.5925, + "step": 3421 + }, + { + "epoch": 0.3609704641350211, + "grad_norm": 0.8537776470184326, + "learning_rate": 0.0010810079912174323, + "loss": 1.6249, + "step": 3422 + }, + { + "epoch": 0.3610759493670886, + "grad_norm": 0.8032751679420471, + "learning_rate": 0.001080782687343274, + "loss": 1.5787, + "step": 3423 + }, + { + "epoch": 0.36118143459915614, + "grad_norm": 0.7838888764381409, + "learning_rate": 0.0010805573464032659, + "loss": 1.5966, + "step": 3424 + }, + { + "epoch": 0.36128691983122363, + "grad_norm": 0.768490731716156, + "learning_rate": 0.0010803319684226593, + "loss": 1.5951, + "step": 3425 + }, + { + "epoch": 0.3613924050632911, + "grad_norm": 0.7668374180793762, + "learning_rate": 0.001080106553426708, + "loss": 1.5442, + "step": 3426 + }, + { + "epoch": 0.36149789029535867, + "grad_norm": 0.8190905451774597, + "learning_rate": 0.0010798811014406716, + "loss": 1.5899, + "step": 3427 + }, + { + "epoch": 0.36160337552742616, + "grad_norm": 0.7929321527481079, + "learning_rate": 0.0010796556124898127, + "loss": 1.5715, + "step": 3428 + }, + { + "epoch": 0.36170886075949366, + "grad_norm": 0.7207669615745544, + "learning_rate": 0.0010794300865993988, + "loss": 1.5536, + "step": 3429 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.6779942512512207, + "learning_rate": 0.0010792045237947008, + "loss": 1.5484, + "step": 3430 + }, + { + "epoch": 0.3619198312236287, + "grad_norm": 0.8067245483398438, + "learning_rate": 0.0010789789241009945, + "loss": 1.5821, + "step": 3431 + }, + { + "epoch": 0.3620253164556962, + "grad_norm": 0.7098557353019714, + "learning_rate": 0.0010787532875435593, + "loss": 1.5579, + "step": 3432 + }, + { + "epoch": 0.36213080168776374, + "grad_norm": 0.7303187847137451, + "learning_rate": 0.0010785276141476786, + "loss": 1.5848, + "step": 3433 + }, + { + "epoch": 0.36223628691983123, + "grad_norm": 0.668641209602356, + "learning_rate": 0.001078301903938641, + "loss": 1.543, + "step": 3434 + }, + { + "epoch": 0.3623417721518987, + "grad_norm": 0.6749128699302673, + "learning_rate": 0.0010780761569417377, + "loss": 1.5869, + "step": 3435 + }, + { + "epoch": 0.36244725738396627, + "grad_norm": 0.6978009939193726, + "learning_rate": 0.0010778503731822652, + "loss": 1.5507, + "step": 3436 + }, + { + "epoch": 0.36255274261603376, + "grad_norm": 0.6659159660339355, + "learning_rate": 0.0010776245526855235, + "loss": 1.6229, + "step": 3437 + }, + { + "epoch": 0.36265822784810126, + "grad_norm": 0.80623859167099, + "learning_rate": 0.0010773986954768172, + "loss": 1.6066, + "step": 3438 + }, + { + "epoch": 0.3627637130801688, + "grad_norm": 0.7342515587806702, + "learning_rate": 0.0010771728015814544, + "loss": 1.5423, + "step": 3439 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.7145922780036926, + "learning_rate": 0.0010769468710247478, + "loss": 1.56, + "step": 3440 + }, + { + "epoch": 0.3629746835443038, + "grad_norm": 0.6753692030906677, + "learning_rate": 0.0010767209038320138, + "loss": 1.5381, + "step": 3441 + }, + { + "epoch": 0.3630801687763713, + "grad_norm": 0.7777947187423706, + "learning_rate": 0.0010764949000285735, + "loss": 1.5811, + "step": 3442 + }, + { + "epoch": 0.36318565400843883, + "grad_norm": 0.6799565553665161, + "learning_rate": 0.0010762688596397515, + "loss": 1.5971, + "step": 3443 + }, + { + "epoch": 0.3632911392405063, + "grad_norm": 0.7399224638938904, + "learning_rate": 0.001076042782690877, + "loss": 1.542, + "step": 3444 + }, + { + "epoch": 0.3633966244725738, + "grad_norm": 0.7277860045433044, + "learning_rate": 0.001075816669207283, + "loss": 1.5978, + "step": 3445 + }, + { + "epoch": 0.36350210970464136, + "grad_norm": 0.7697589993476868, + "learning_rate": 0.0010755905192143063, + "loss": 1.6123, + "step": 3446 + }, + { + "epoch": 0.36360759493670886, + "grad_norm": 0.8141958713531494, + "learning_rate": 0.0010753643327372886, + "loss": 1.5479, + "step": 3447 + }, + { + "epoch": 0.36371308016877635, + "grad_norm": 0.7804858684539795, + "learning_rate": 0.0010751381098015747, + "loss": 1.5783, + "step": 3448 + }, + { + "epoch": 0.3638185654008439, + "grad_norm": 0.7137274146080017, + "learning_rate": 0.0010749118504325146, + "loss": 1.5683, + "step": 3449 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.7047985196113586, + "learning_rate": 0.0010746855546554612, + "loss": 1.5411, + "step": 3450 + }, + { + "epoch": 0.3640295358649789, + "grad_norm": 0.6936703324317932, + "learning_rate": 0.0010744592224957727, + "loss": 1.5589, + "step": 3451 + }, + { + "epoch": 0.36413502109704643, + "grad_norm": 0.6585363149642944, + "learning_rate": 0.00107423285397881, + "loss": 1.5242, + "step": 3452 + }, + { + "epoch": 0.3642405063291139, + "grad_norm": 0.6780238151550293, + "learning_rate": 0.0010740064491299398, + "loss": 1.574, + "step": 3453 + }, + { + "epoch": 0.3643459915611814, + "grad_norm": 0.775230348110199, + "learning_rate": 0.0010737800079745308, + "loss": 1.5502, + "step": 3454 + }, + { + "epoch": 0.36445147679324896, + "grad_norm": 0.703164279460907, + "learning_rate": 0.0010735535305379576, + "loss": 1.6071, + "step": 3455 + }, + { + "epoch": 0.36455696202531646, + "grad_norm": 0.8603801727294922, + "learning_rate": 0.001073327016845598, + "loss": 1.5617, + "step": 3456 + }, + { + "epoch": 0.36466244725738395, + "grad_norm": 0.7347352504730225, + "learning_rate": 0.001073100466922834, + "loss": 1.5718, + "step": 3457 + }, + { + "epoch": 0.3647679324894515, + "grad_norm": 0.9296137094497681, + "learning_rate": 0.0010728738807950515, + "loss": 1.5891, + "step": 3458 + }, + { + "epoch": 0.364873417721519, + "grad_norm": 1.1572057008743286, + "learning_rate": 0.0010726472584876403, + "loss": 1.5347, + "step": 3459 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.7260011434555054, + "learning_rate": 0.0010724206000259954, + "loss": 1.5808, + "step": 3460 + }, + { + "epoch": 0.36508438818565403, + "grad_norm": 1.0850508213043213, + "learning_rate": 0.0010721939054355145, + "loss": 1.5949, + "step": 3461 + }, + { + "epoch": 0.3651898734177215, + "grad_norm": 0.7135341167449951, + "learning_rate": 0.0010719671747415995, + "loss": 1.5718, + "step": 3462 + }, + { + "epoch": 0.365295358649789, + "grad_norm": 0.9293360710144043, + "learning_rate": 0.0010717404079696575, + "loss": 1.5806, + "step": 3463 + }, + { + "epoch": 0.36540084388185656, + "grad_norm": 0.9994378685951233, + "learning_rate": 0.0010715136051450982, + "loss": 1.6022, + "step": 3464 + }, + { + "epoch": 0.36550632911392406, + "grad_norm": 0.9333184361457825, + "learning_rate": 0.0010712867662933364, + "loss": 1.5713, + "step": 3465 + }, + { + "epoch": 0.36561181434599155, + "grad_norm": 0.9384711384773254, + "learning_rate": 0.0010710598914397901, + "loss": 1.5501, + "step": 3466 + }, + { + "epoch": 0.3657172995780591, + "grad_norm": 0.7620593905448914, + "learning_rate": 0.0010708329806098822, + "loss": 1.6037, + "step": 3467 + }, + { + "epoch": 0.3658227848101266, + "grad_norm": 0.8934109807014465, + "learning_rate": 0.001070606033829039, + "loss": 1.5476, + "step": 3468 + }, + { + "epoch": 0.3659282700421941, + "grad_norm": 0.8710029721260071, + "learning_rate": 0.001070379051122691, + "loss": 1.6173, + "step": 3469 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.7169710397720337, + "learning_rate": 0.0010701520325162727, + "loss": 1.5821, + "step": 3470 + }, + { + "epoch": 0.3661392405063291, + "grad_norm": 1.1962218284606934, + "learning_rate": 0.001069924978035223, + "loss": 1.5688, + "step": 3471 + }, + { + "epoch": 0.3662447257383966, + "grad_norm": 0.8851218223571777, + "learning_rate": 0.0010696978877049838, + "loss": 1.6184, + "step": 3472 + }, + { + "epoch": 0.3663502109704641, + "grad_norm": 0.7049900889396667, + "learning_rate": 0.0010694707615510023, + "loss": 1.6298, + "step": 3473 + }, + { + "epoch": 0.36645569620253166, + "grad_norm": 0.7368196249008179, + "learning_rate": 0.0010692435995987293, + "loss": 1.601, + "step": 3474 + }, + { + "epoch": 0.36656118143459915, + "grad_norm": 0.6998977065086365, + "learning_rate": 0.0010690164018736187, + "loss": 1.6171, + "step": 3475 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 0.7323729395866394, + "learning_rate": 0.0010687891684011295, + "loss": 1.568, + "step": 3476 + }, + { + "epoch": 0.3667721518987342, + "grad_norm": 0.68841552734375, + "learning_rate": 0.0010685618992067243, + "loss": 1.5777, + "step": 3477 + }, + { + "epoch": 0.3668776371308017, + "grad_norm": 0.7300235033035278, + "learning_rate": 0.00106833459431587, + "loss": 1.5323, + "step": 3478 + }, + { + "epoch": 0.3669831223628692, + "grad_norm": 0.666501522064209, + "learning_rate": 0.001068107253754037, + "loss": 1.575, + "step": 3479 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.6814805269241333, + "learning_rate": 0.0010678798775467001, + "loss": 1.5522, + "step": 3480 + }, + { + "epoch": 0.3671940928270042, + "grad_norm": 0.6884227991104126, + "learning_rate": 0.0010676524657193378, + "loss": 1.5861, + "step": 3481 + }, + { + "epoch": 0.3672995780590717, + "grad_norm": 0.643268883228302, + "learning_rate": 0.0010674250182974325, + "loss": 1.5903, + "step": 3482 + }, + { + "epoch": 0.36740506329113926, + "grad_norm": 0.7909756302833557, + "learning_rate": 0.0010671975353064712, + "loss": 1.5516, + "step": 3483 + }, + { + "epoch": 0.36751054852320675, + "grad_norm": 0.6672321557998657, + "learning_rate": 0.0010669700167719443, + "loss": 1.6108, + "step": 3484 + }, + { + "epoch": 0.36761603375527424, + "grad_norm": 0.6969887614250183, + "learning_rate": 0.0010667424627193469, + "loss": 1.5914, + "step": 3485 + }, + { + "epoch": 0.3677215189873418, + "grad_norm": 0.7354982495307922, + "learning_rate": 0.0010665148731741768, + "loss": 1.5657, + "step": 3486 + }, + { + "epoch": 0.3678270042194093, + "grad_norm": 0.6628140211105347, + "learning_rate": 0.0010662872481619367, + "loss": 1.5616, + "step": 3487 + }, + { + "epoch": 0.3679324894514768, + "grad_norm": 0.6368685364723206, + "learning_rate": 0.0010660595877081335, + "loss": 1.5651, + "step": 3488 + }, + { + "epoch": 0.3680379746835443, + "grad_norm": 0.6479616165161133, + "learning_rate": 0.0010658318918382774, + "loss": 1.5681, + "step": 3489 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.6656640768051147, + "learning_rate": 0.0010656041605778832, + "loss": 1.5943, + "step": 3490 + }, + { + "epoch": 0.3682489451476793, + "grad_norm": 0.6604931950569153, + "learning_rate": 0.0010653763939524688, + "loss": 1.5662, + "step": 3491 + }, + { + "epoch": 0.36835443037974686, + "grad_norm": 0.6808927059173584, + "learning_rate": 0.0010651485919875568, + "loss": 1.5594, + "step": 3492 + }, + { + "epoch": 0.36845991561181435, + "grad_norm": 0.6682624220848083, + "learning_rate": 0.0010649207547086738, + "loss": 1.5545, + "step": 3493 + }, + { + "epoch": 0.36856540084388184, + "grad_norm": 0.6909341812133789, + "learning_rate": 0.0010646928821413499, + "loss": 1.5924, + "step": 3494 + }, + { + "epoch": 0.3686708860759494, + "grad_norm": 0.7096406817436218, + "learning_rate": 0.0010644649743111192, + "loss": 1.579, + "step": 3495 + }, + { + "epoch": 0.3687763713080169, + "grad_norm": 0.6541176438331604, + "learning_rate": 0.0010642370312435201, + "loss": 1.5358, + "step": 3496 + }, + { + "epoch": 0.3688818565400844, + "grad_norm": 0.7604002952575684, + "learning_rate": 0.0010640090529640948, + "loss": 1.5755, + "step": 3497 + }, + { + "epoch": 0.3689873417721519, + "grad_norm": 1.0986945629119873, + "learning_rate": 0.0010637810394983893, + "loss": 1.5309, + "step": 3498 + }, + { + "epoch": 0.3690928270042194, + "grad_norm": 0.8597815632820129, + "learning_rate": 0.0010635529908719537, + "loss": 1.5605, + "step": 3499 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.7147064208984375, + "learning_rate": 0.001063324907110342, + "loss": 1.5203, + "step": 3500 + }, + { + "epoch": 0.36930379746835446, + "grad_norm": 0.8041125535964966, + "learning_rate": 0.001063096788239112, + "loss": 1.5548, + "step": 3501 + }, + { + "epoch": 0.36940928270042195, + "grad_norm": 0.6383129358291626, + "learning_rate": 0.0010628686342838253, + "loss": 1.5842, + "step": 3502 + }, + { + "epoch": 0.36951476793248944, + "grad_norm": 0.792730450630188, + "learning_rate": 0.0010626404452700486, + "loss": 1.606, + "step": 3503 + }, + { + "epoch": 0.369620253164557, + "grad_norm": 0.7808341383934021, + "learning_rate": 0.0010624122212233506, + "loss": 1.6026, + "step": 3504 + }, + { + "epoch": 0.3697257383966245, + "grad_norm": 0.7334787845611572, + "learning_rate": 0.0010621839621693056, + "loss": 1.5463, + "step": 3505 + }, + { + "epoch": 0.369831223628692, + "grad_norm": 0.7064646482467651, + "learning_rate": 0.0010619556681334909, + "loss": 1.5856, + "step": 3506 + }, + { + "epoch": 0.36993670886075947, + "grad_norm": 0.7723857760429382, + "learning_rate": 0.001061727339141488, + "loss": 1.6004, + "step": 3507 + }, + { + "epoch": 0.370042194092827, + "grad_norm": 0.715022623538971, + "learning_rate": 0.0010614989752188823, + "loss": 1.5506, + "step": 3508 + }, + { + "epoch": 0.3701476793248945, + "grad_norm": 0.9541425704956055, + "learning_rate": 0.0010612705763912635, + "loss": 1.5877, + "step": 3509 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 1.2524274587631226, + "learning_rate": 0.0010610421426842241, + "loss": 1.5843, + "step": 3510 + }, + { + "epoch": 0.37035864978902955, + "grad_norm": 0.6764320731163025, + "learning_rate": 0.0010608136741233618, + "loss": 1.585, + "step": 3511 + }, + { + "epoch": 0.37046413502109704, + "grad_norm": 1.0435811281204224, + "learning_rate": 0.0010605851707342774, + "loss": 1.5618, + "step": 3512 + }, + { + "epoch": 0.37056962025316453, + "grad_norm": 1.0111600160598755, + "learning_rate": 0.0010603566325425758, + "loss": 1.5535, + "step": 3513 + }, + { + "epoch": 0.3706751054852321, + "grad_norm": 0.6572437286376953, + "learning_rate": 0.001060128059573866, + "loss": 1.5703, + "step": 3514 + }, + { + "epoch": 0.3707805907172996, + "grad_norm": 0.93827885389328, + "learning_rate": 0.0010598994518537608, + "loss": 1.5589, + "step": 3515 + }, + { + "epoch": 0.37088607594936707, + "grad_norm": 0.8185520172119141, + "learning_rate": 0.0010596708094078766, + "loss": 1.603, + "step": 3516 + }, + { + "epoch": 0.3709915611814346, + "grad_norm": 0.7157159447669983, + "learning_rate": 0.0010594421322618341, + "loss": 1.5787, + "step": 3517 + }, + { + "epoch": 0.3710970464135021, + "grad_norm": 0.890509843826294, + "learning_rate": 0.0010592134204412578, + "loss": 1.5578, + "step": 3518 + }, + { + "epoch": 0.3712025316455696, + "grad_norm": 0.6750526428222656, + "learning_rate": 0.0010589846739717755, + "loss": 1.585, + "step": 3519 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.8499715924263, + "learning_rate": 0.00105875589287902, + "loss": 1.5927, + "step": 3520 + }, + { + "epoch": 0.37141350210970464, + "grad_norm": 0.734464168548584, + "learning_rate": 0.001058527077188627, + "loss": 1.5903, + "step": 3521 + }, + { + "epoch": 0.37151898734177213, + "grad_norm": 0.7359299659729004, + "learning_rate": 0.001058298226926237, + "loss": 1.569, + "step": 3522 + }, + { + "epoch": 0.3716244725738397, + "grad_norm": 0.8024036884307861, + "learning_rate": 0.0010580693421174928, + "loss": 1.5691, + "step": 3523 + }, + { + "epoch": 0.3717299578059072, + "grad_norm": 0.8831567168235779, + "learning_rate": 0.0010578404227880429, + "loss": 1.5361, + "step": 3524 + }, + { + "epoch": 0.37183544303797467, + "grad_norm": 0.6938677430152893, + "learning_rate": 0.0010576114689635383, + "loss": 1.6097, + "step": 3525 + }, + { + "epoch": 0.3719409282700422, + "grad_norm": 0.7865781784057617, + "learning_rate": 0.0010573824806696351, + "loss": 1.5934, + "step": 3526 + }, + { + "epoch": 0.3720464135021097, + "grad_norm": 0.7842490673065186, + "learning_rate": 0.001057153457931992, + "loss": 1.5728, + "step": 3527 + }, + { + "epoch": 0.3721518987341772, + "grad_norm": 0.811928391456604, + "learning_rate": 0.0010569244007762723, + "loss": 1.6133, + "step": 3528 + }, + { + "epoch": 0.37225738396624475, + "grad_norm": 0.7388095259666443, + "learning_rate": 0.0010566953092281432, + "loss": 1.5499, + "step": 3529 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 1.0586607456207275, + "learning_rate": 0.0010564661833132752, + "loss": 1.6433, + "step": 3530 + }, + { + "epoch": 0.37246835443037973, + "grad_norm": 0.7915521264076233, + "learning_rate": 0.0010562370230573432, + "loss": 1.5863, + "step": 3531 + }, + { + "epoch": 0.3725738396624473, + "grad_norm": 0.7204082608222961, + "learning_rate": 0.0010560078284860257, + "loss": 1.6118, + "step": 3532 + }, + { + "epoch": 0.3726793248945148, + "grad_norm": 0.8217669725418091, + "learning_rate": 0.0010557785996250053, + "loss": 1.5587, + "step": 3533 + }, + { + "epoch": 0.37278481012658227, + "grad_norm": 0.7496677041053772, + "learning_rate": 0.0010555493364999679, + "loss": 1.5934, + "step": 3534 + }, + { + "epoch": 0.3728902953586498, + "grad_norm": 1.3094440698623657, + "learning_rate": 0.001055320039136604, + "loss": 1.5969, + "step": 3535 + }, + { + "epoch": 0.3729957805907173, + "grad_norm": 0.7500072121620178, + "learning_rate": 0.001055090707560607, + "loss": 1.5527, + "step": 3536 + }, + { + "epoch": 0.3731012658227848, + "grad_norm": 1.0009034872055054, + "learning_rate": 0.0010548613417976748, + "loss": 1.5386, + "step": 3537 + }, + { + "epoch": 0.37320675105485235, + "grad_norm": 1.2653918266296387, + "learning_rate": 0.0010546319418735094, + "loss": 1.6086, + "step": 3538 + }, + { + "epoch": 0.37331223628691984, + "grad_norm": 0.7403241395950317, + "learning_rate": 0.0010544025078138156, + "loss": 1.5717, + "step": 3539 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 1.2869960069656372, + "learning_rate": 0.001054173039644303, + "loss": 1.5698, + "step": 3540 + }, + { + "epoch": 0.3735232067510548, + "grad_norm": 0.6501700282096863, + "learning_rate": 0.0010539435373906846, + "loss": 1.5645, + "step": 3541 + }, + { + "epoch": 0.3736286919831224, + "grad_norm": 1.0709211826324463, + "learning_rate": 0.0010537140010786774, + "loss": 1.6, + "step": 3542 + }, + { + "epoch": 0.37373417721518987, + "grad_norm": 0.6877700686454773, + "learning_rate": 0.0010534844307340016, + "loss": 1.5622, + "step": 3543 + }, + { + "epoch": 0.37383966244725736, + "grad_norm": 1.216977596282959, + "learning_rate": 0.0010532548263823822, + "loss": 1.5502, + "step": 3544 + }, + { + "epoch": 0.3739451476793249, + "grad_norm": 0.7277476787567139, + "learning_rate": 0.0010530251880495473, + "loss": 1.5919, + "step": 3545 + }, + { + "epoch": 0.3740506329113924, + "grad_norm": 1.1187546253204346, + "learning_rate": 0.0010527955157612291, + "loss": 1.5577, + "step": 3546 + }, + { + "epoch": 0.3741561181434599, + "grad_norm": 1.098911166191101, + "learning_rate": 0.0010525658095431635, + "loss": 1.5336, + "step": 3547 + }, + { + "epoch": 0.37426160337552744, + "grad_norm": 0.6868484020233154, + "learning_rate": 0.00105233606942109, + "loss": 1.5534, + "step": 3548 + }, + { + "epoch": 0.37436708860759493, + "grad_norm": 0.9820857644081116, + "learning_rate": 0.0010521062954207527, + "loss": 1.5692, + "step": 3549 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.8987963795661926, + "learning_rate": 0.0010518764875678981, + "loss": 1.5795, + "step": 3550 + }, + { + "epoch": 0.37457805907173, + "grad_norm": 0.7954782843589783, + "learning_rate": 0.001051646645888278, + "loss": 1.5579, + "step": 3551 + }, + { + "epoch": 0.37468354430379747, + "grad_norm": 0.9505137801170349, + "learning_rate": 0.0010514167704076473, + "loss": 1.5623, + "step": 3552 + }, + { + "epoch": 0.37478902953586496, + "grad_norm": 0.7878676056861877, + "learning_rate": 0.0010511868611517644, + "loss": 1.5788, + "step": 3553 + }, + { + "epoch": 0.3748945147679325, + "grad_norm": 1.2213748693466187, + "learning_rate": 0.0010509569181463916, + "loss": 1.5722, + "step": 3554 + }, + { + "epoch": 0.375, + "grad_norm": 0.7628247141838074, + "learning_rate": 0.0010507269414172956, + "loss": 1.5766, + "step": 3555 + }, + { + "epoch": 0.3751054852320675, + "grad_norm": 0.9318822622299194, + "learning_rate": 0.0010504969309902462, + "loss": 1.5716, + "step": 3556 + }, + { + "epoch": 0.37521097046413504, + "grad_norm": 0.9249821901321411, + "learning_rate": 0.0010502668868910174, + "loss": 1.5758, + "step": 3557 + }, + { + "epoch": 0.37531645569620253, + "grad_norm": 0.666480302810669, + "learning_rate": 0.0010500368091453864, + "loss": 1.5438, + "step": 3558 + }, + { + "epoch": 0.37542194092827, + "grad_norm": 0.7162619233131409, + "learning_rate": 0.001049806697779135, + "loss": 1.5694, + "step": 3559 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.7550562620162964, + "learning_rate": 0.001049576552818048, + "loss": 1.5603, + "step": 3560 + }, + { + "epoch": 0.37563291139240507, + "grad_norm": 0.7061163187026978, + "learning_rate": 0.0010493463742879147, + "loss": 1.5722, + "step": 3561 + }, + { + "epoch": 0.37573839662447256, + "grad_norm": 0.6910367012023926, + "learning_rate": 0.0010491161622145275, + "loss": 1.547, + "step": 3562 + }, + { + "epoch": 0.3758438818565401, + "grad_norm": 0.8399795293807983, + "learning_rate": 0.0010488859166236824, + "loss": 1.5579, + "step": 3563 + }, + { + "epoch": 0.3759493670886076, + "grad_norm": 0.7210567593574524, + "learning_rate": 0.0010486556375411803, + "loss": 1.5587, + "step": 3564 + }, + { + "epoch": 0.3760548523206751, + "grad_norm": 0.7326685190200806, + "learning_rate": 0.0010484253249928247, + "loss": 1.5675, + "step": 3565 + }, + { + "epoch": 0.37616033755274264, + "grad_norm": 0.920733630657196, + "learning_rate": 0.0010481949790044234, + "loss": 1.5413, + "step": 3566 + }, + { + "epoch": 0.37626582278481013, + "grad_norm": 0.7439384460449219, + "learning_rate": 0.0010479645996017875, + "loss": 1.5527, + "step": 3567 + }, + { + "epoch": 0.3763713080168776, + "grad_norm": 0.9990704655647278, + "learning_rate": 0.0010477341868107327, + "loss": 1.594, + "step": 3568 + }, + { + "epoch": 0.3764767932489452, + "grad_norm": 1.2516640424728394, + "learning_rate": 0.0010475037406570775, + "loss": 1.554, + "step": 3569 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.7980985641479492, + "learning_rate": 0.0010472732611666448, + "loss": 1.5856, + "step": 3570 + }, + { + "epoch": 0.37668776371308016, + "grad_norm": 1.0694468021392822, + "learning_rate": 0.0010470427483652608, + "loss": 1.5258, + "step": 3571 + }, + { + "epoch": 0.37679324894514765, + "grad_norm": 1.1398589611053467, + "learning_rate": 0.0010468122022787554, + "loss": 1.5605, + "step": 3572 + }, + { + "epoch": 0.3768987341772152, + "grad_norm": 0.7561160326004028, + "learning_rate": 0.001046581622932963, + "loss": 1.57, + "step": 3573 + }, + { + "epoch": 0.3770042194092827, + "grad_norm": 0.8989192247390747, + "learning_rate": 0.001046351010353721, + "loss": 1.5937, + "step": 3574 + }, + { + "epoch": 0.3771097046413502, + "grad_norm": 0.808480978012085, + "learning_rate": 0.0010461203645668702, + "loss": 1.5836, + "step": 3575 + }, + { + "epoch": 0.37721518987341773, + "grad_norm": 0.7817773818969727, + "learning_rate": 0.001045889685598256, + "loss": 1.5735, + "step": 3576 + }, + { + "epoch": 0.3773206751054852, + "grad_norm": 0.862443208694458, + "learning_rate": 0.0010456589734737273, + "loss": 1.5514, + "step": 3577 + }, + { + "epoch": 0.3774261603375527, + "grad_norm": 0.7069909572601318, + "learning_rate": 0.0010454282282191362, + "loss": 1.5601, + "step": 3578 + }, + { + "epoch": 0.37753164556962027, + "grad_norm": 0.7933304309844971, + "learning_rate": 0.001045197449860339, + "loss": 1.5572, + "step": 3579 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.6414366364479065, + "learning_rate": 0.0010449666384231954, + "loss": 1.5808, + "step": 3580 + }, + { + "epoch": 0.37774261603375525, + "grad_norm": 0.9058160781860352, + "learning_rate": 0.0010447357939335693, + "loss": 1.576, + "step": 3581 + }, + { + "epoch": 0.3778481012658228, + "grad_norm": 0.7347251176834106, + "learning_rate": 0.001044504916417328, + "loss": 1.552, + "step": 3582 + }, + { + "epoch": 0.3779535864978903, + "grad_norm": 0.741729736328125, + "learning_rate": 0.001044274005900342, + "loss": 1.5731, + "step": 3583 + }, + { + "epoch": 0.3780590717299578, + "grad_norm": 1.0531587600708008, + "learning_rate": 0.0010440430624084863, + "loss": 1.5515, + "step": 3584 + }, + { + "epoch": 0.37816455696202533, + "grad_norm": 0.7183282375335693, + "learning_rate": 0.0010438120859676393, + "loss": 1.6124, + "step": 3585 + }, + { + "epoch": 0.3782700421940928, + "grad_norm": 0.9894569516181946, + "learning_rate": 0.0010435810766036828, + "loss": 1.5977, + "step": 3586 + }, + { + "epoch": 0.3783755274261603, + "grad_norm": 0.978705108165741, + "learning_rate": 0.001043350034342503, + "loss": 1.5559, + "step": 3587 + }, + { + "epoch": 0.37848101265822787, + "grad_norm": 0.7706182599067688, + "learning_rate": 0.001043118959209989, + "loss": 1.5753, + "step": 3588 + }, + { + "epoch": 0.37858649789029536, + "grad_norm": 0.9661357998847961, + "learning_rate": 0.001042887851232034, + "loss": 1.6089, + "step": 3589 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.9368203282356262, + "learning_rate": 0.0010426567104345346, + "loss": 1.5841, + "step": 3590 + }, + { + "epoch": 0.3787974683544304, + "grad_norm": 0.6770279407501221, + "learning_rate": 0.0010424255368433916, + "loss": 1.5389, + "step": 3591 + }, + { + "epoch": 0.3789029535864979, + "grad_norm": 0.9691004753112793, + "learning_rate": 0.0010421943304845093, + "loss": 1.5689, + "step": 3592 + }, + { + "epoch": 0.3790084388185654, + "grad_norm": 0.9747665524482727, + "learning_rate": 0.0010419630913837948, + "loss": 1.6064, + "step": 3593 + }, + { + "epoch": 0.37911392405063293, + "grad_norm": 0.7887089848518372, + "learning_rate": 0.0010417318195671604, + "loss": 1.5718, + "step": 3594 + }, + { + "epoch": 0.3792194092827004, + "grad_norm": 1.190483808517456, + "learning_rate": 0.0010415005150605208, + "loss": 1.5872, + "step": 3595 + }, + { + "epoch": 0.3793248945147679, + "grad_norm": 0.7108887434005737, + "learning_rate": 0.001041269177889795, + "loss": 1.5107, + "step": 3596 + }, + { + "epoch": 0.37943037974683547, + "grad_norm": 1.376679539680481, + "learning_rate": 0.0010410378080809052, + "loss": 1.5568, + "step": 3597 + }, + { + "epoch": 0.37953586497890296, + "grad_norm": 0.6860396265983582, + "learning_rate": 0.001040806405659778, + "loss": 1.5673, + "step": 3598 + }, + { + "epoch": 0.37964135021097045, + "grad_norm": 1.5633856058120728, + "learning_rate": 0.0010405749706523428, + "loss": 1.5528, + "step": 3599 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.8053755760192871, + "learning_rate": 0.0010403435030845332, + "loss": 1.5539, + "step": 3600 + }, + { + "epoch": 0.3798523206751055, + "grad_norm": 1.9746559858322144, + "learning_rate": 0.0010401120029822864, + "loss": 1.5893, + "step": 3601 + }, + { + "epoch": 0.379957805907173, + "grad_norm": 1.3419959545135498, + "learning_rate": 0.001039880470371543, + "loss": 1.5566, + "step": 3602 + }, + { + "epoch": 0.38006329113924053, + "grad_norm": 1.8367960453033447, + "learning_rate": 0.0010396489052782473, + "loss": 1.5884, + "step": 3603 + }, + { + "epoch": 0.380168776371308, + "grad_norm": 1.8145722150802612, + "learning_rate": 0.0010394173077283477, + "loss": 1.5544, + "step": 3604 + }, + { + "epoch": 0.3802742616033755, + "grad_norm": 0.8353047370910645, + "learning_rate": 0.0010391856777477954, + "loss": 1.5949, + "step": 3605 + }, + { + "epoch": 0.380379746835443, + "grad_norm": 1.2800959348678589, + "learning_rate": 0.001038954015362546, + "loss": 1.5563, + "step": 3606 + }, + { + "epoch": 0.38048523206751056, + "grad_norm": 0.7750309705734253, + "learning_rate": 0.001038722320598558, + "loss": 1.5518, + "step": 3607 + }, + { + "epoch": 0.38059071729957805, + "grad_norm": 1.0425488948822021, + "learning_rate": 0.001038490593481795, + "loss": 1.5891, + "step": 3608 + }, + { + "epoch": 0.38069620253164554, + "grad_norm": 0.8422064185142517, + "learning_rate": 0.0010382588340382218, + "loss": 1.5859, + "step": 3609 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 1.0309568643569946, + "learning_rate": 0.0010380270422938093, + "loss": 1.5218, + "step": 3610 + }, + { + "epoch": 0.3809071729957806, + "grad_norm": 1.0700976848602295, + "learning_rate": 0.00103779521827453, + "loss": 1.5774, + "step": 3611 + }, + { + "epoch": 0.3810126582278481, + "grad_norm": 0.6835166811943054, + "learning_rate": 0.0010375633620063618, + "loss": 1.5414, + "step": 3612 + }, + { + "epoch": 0.3811181434599156, + "grad_norm": 0.8056224584579468, + "learning_rate": 0.0010373314735152848, + "loss": 1.5565, + "step": 3613 + }, + { + "epoch": 0.3812236286919831, + "grad_norm": 0.7493970394134521, + "learning_rate": 0.0010370995528272836, + "loss": 1.5308, + "step": 3614 + }, + { + "epoch": 0.3813291139240506, + "grad_norm": 0.8334560990333557, + "learning_rate": 0.0010368675999683455, + "loss": 1.5143, + "step": 3615 + }, + { + "epoch": 0.38143459915611816, + "grad_norm": 0.8160945773124695, + "learning_rate": 0.0010366356149644628, + "loss": 1.5672, + "step": 3616 + }, + { + "epoch": 0.38154008438818565, + "grad_norm": 0.7754315137863159, + "learning_rate": 0.0010364035978416297, + "loss": 1.604, + "step": 3617 + }, + { + "epoch": 0.38164556962025314, + "grad_norm": 0.7455788254737854, + "learning_rate": 0.001036171548625846, + "loss": 1.5932, + "step": 3618 + }, + { + "epoch": 0.3817510548523207, + "grad_norm": 0.7669958472251892, + "learning_rate": 0.0010359394673431126, + "loss": 1.5503, + "step": 3619 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.7243880033493042, + "learning_rate": 0.0010357073540194362, + "loss": 1.523, + "step": 3620 + }, + { + "epoch": 0.3819620253164557, + "grad_norm": 0.8334965705871582, + "learning_rate": 0.0010354752086808264, + "loss": 1.5631, + "step": 3621 + }, + { + "epoch": 0.3820675105485232, + "grad_norm": 0.764119029045105, + "learning_rate": 0.001035243031353296, + "loss": 1.5178, + "step": 3622 + }, + { + "epoch": 0.3821729957805907, + "grad_norm": 0.7308919429779053, + "learning_rate": 0.0010350108220628614, + "loss": 1.5739, + "step": 3623 + }, + { + "epoch": 0.3822784810126582, + "grad_norm": 0.7100711464881897, + "learning_rate": 0.001034778580835543, + "loss": 1.5525, + "step": 3624 + }, + { + "epoch": 0.38238396624472576, + "grad_norm": 0.7122567296028137, + "learning_rate": 0.0010345463076973645, + "loss": 1.5617, + "step": 3625 + }, + { + "epoch": 0.38248945147679325, + "grad_norm": 0.7060985565185547, + "learning_rate": 0.0010343140026743535, + "loss": 1.5592, + "step": 3626 + }, + { + "epoch": 0.38259493670886074, + "grad_norm": 0.8406647443771362, + "learning_rate": 0.0010340816657925407, + "loss": 1.5581, + "step": 3627 + }, + { + "epoch": 0.3827004219409283, + "grad_norm": 0.8082764148712158, + "learning_rate": 0.0010338492970779606, + "loss": 1.5774, + "step": 3628 + }, + { + "epoch": 0.3828059071729958, + "grad_norm": 0.8360951542854309, + "learning_rate": 0.0010336168965566516, + "loss": 1.5406, + "step": 3629 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 1.0489424467086792, + "learning_rate": 0.001033384464254655, + "loss": 1.5664, + "step": 3630 + }, + { + "epoch": 0.3830168776371308, + "grad_norm": 0.791122555732727, + "learning_rate": 0.001033152000198016, + "loss": 1.5713, + "step": 3631 + }, + { + "epoch": 0.3831223628691983, + "grad_norm": 0.8153091669082642, + "learning_rate": 0.0010329195044127834, + "loss": 1.5515, + "step": 3632 + }, + { + "epoch": 0.3832278481012658, + "grad_norm": 0.7038738131523132, + "learning_rate": 0.0010326869769250097, + "loss": 1.5883, + "step": 3633 + }, + { + "epoch": 0.38333333333333336, + "grad_norm": 0.8065722584724426, + "learning_rate": 0.0010324544177607508, + "loss": 1.5245, + "step": 3634 + }, + { + "epoch": 0.38343881856540085, + "grad_norm": 0.8093060255050659, + "learning_rate": 0.0010322218269460657, + "loss": 1.5686, + "step": 3635 + }, + { + "epoch": 0.38354430379746834, + "grad_norm": 0.8642284870147705, + "learning_rate": 0.001031989204507018, + "loss": 1.55, + "step": 3636 + }, + { + "epoch": 0.3836497890295359, + "grad_norm": 0.6871453523635864, + "learning_rate": 0.0010317565504696733, + "loss": 1.639, + "step": 3637 + }, + { + "epoch": 0.3837552742616034, + "grad_norm": 0.8035463690757751, + "learning_rate": 0.0010315238648601025, + "loss": 1.5607, + "step": 3638 + }, + { + "epoch": 0.3838607594936709, + "grad_norm": 0.859209418296814, + "learning_rate": 0.0010312911477043784, + "loss": 1.5629, + "step": 3639 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.7413459420204163, + "learning_rate": 0.001031058399028579, + "loss": 1.5136, + "step": 3640 + }, + { + "epoch": 0.3840717299578059, + "grad_norm": 0.8715896010398865, + "learning_rate": 0.0010308256188587843, + "loss": 1.5519, + "step": 3641 + }, + { + "epoch": 0.3841772151898734, + "grad_norm": 1.0396448373794556, + "learning_rate": 0.0010305928072210787, + "loss": 1.6058, + "step": 3642 + }, + { + "epoch": 0.3842827004219409, + "grad_norm": 0.9034422636032104, + "learning_rate": 0.00103035996414155, + "loss": 1.565, + "step": 3643 + }, + { + "epoch": 0.38438818565400845, + "grad_norm": 0.7555263042449951, + "learning_rate": 0.0010301270896462893, + "loss": 1.5526, + "step": 3644 + }, + { + "epoch": 0.38449367088607594, + "grad_norm": 0.7717058658599854, + "learning_rate": 0.0010298941837613913, + "loss": 1.567, + "step": 3645 + }, + { + "epoch": 0.38459915611814344, + "grad_norm": 0.8087957501411438, + "learning_rate": 0.0010296612465129542, + "loss": 1.5356, + "step": 3646 + }, + { + "epoch": 0.384704641350211, + "grad_norm": 0.8528350591659546, + "learning_rate": 0.0010294282779270802, + "loss": 1.5736, + "step": 3647 + }, + { + "epoch": 0.3848101265822785, + "grad_norm": 0.700581967830658, + "learning_rate": 0.001029195278029874, + "loss": 1.6036, + "step": 3648 + }, + { + "epoch": 0.38491561181434597, + "grad_norm": 0.7694661617279053, + "learning_rate": 0.0010289622468474448, + "loss": 1.577, + "step": 3649 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.8834878206253052, + "learning_rate": 0.001028729184405905, + "loss": 1.5731, + "step": 3650 + }, + { + "epoch": 0.385126582278481, + "grad_norm": 0.6996549367904663, + "learning_rate": 0.00102849609073137, + "loss": 1.5872, + "step": 3651 + }, + { + "epoch": 0.3852320675105485, + "grad_norm": 0.8705095052719116, + "learning_rate": 0.0010282629658499593, + "loss": 1.567, + "step": 3652 + }, + { + "epoch": 0.38533755274261605, + "grad_norm": 1.0028982162475586, + "learning_rate": 0.001028029809787796, + "loss": 1.5526, + "step": 3653 + }, + { + "epoch": 0.38544303797468354, + "grad_norm": 0.8697168231010437, + "learning_rate": 0.001027796622571006, + "loss": 1.5913, + "step": 3654 + }, + { + "epoch": 0.38554852320675104, + "grad_norm": 0.7187618017196655, + "learning_rate": 0.001027563404225719, + "loss": 1.567, + "step": 3655 + }, + { + "epoch": 0.3856540084388186, + "grad_norm": 0.7079871296882629, + "learning_rate": 0.0010273301547780687, + "loss": 1.5525, + "step": 3656 + }, + { + "epoch": 0.3857594936708861, + "grad_norm": 0.6848574876785278, + "learning_rate": 0.0010270968742541917, + "loss": 1.5299, + "step": 3657 + }, + { + "epoch": 0.38586497890295357, + "grad_norm": 0.8081691861152649, + "learning_rate": 0.0010268635626802282, + "loss": 1.555, + "step": 3658 + }, + { + "epoch": 0.3859704641350211, + "grad_norm": 0.6566412448883057, + "learning_rate": 0.001026630220082322, + "loss": 1.5895, + "step": 3659 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.9524419903755188, + "learning_rate": 0.0010263968464866201, + "loss": 1.5457, + "step": 3660 + }, + { + "epoch": 0.3861814345991561, + "grad_norm": 0.7908095717430115, + "learning_rate": 0.0010261634419192732, + "loss": 1.577, + "step": 3661 + }, + { + "epoch": 0.38628691983122365, + "grad_norm": 0.716161847114563, + "learning_rate": 0.001025930006406436, + "loss": 1.5525, + "step": 3662 + }, + { + "epoch": 0.38639240506329114, + "grad_norm": 0.7541593909263611, + "learning_rate": 0.0010256965399742652, + "loss": 1.5493, + "step": 3663 + }, + { + "epoch": 0.38649789029535864, + "grad_norm": 0.7203877568244934, + "learning_rate": 0.0010254630426489225, + "loss": 1.5384, + "step": 3664 + }, + { + "epoch": 0.3866033755274262, + "grad_norm": 0.9970970749855042, + "learning_rate": 0.0010252295144565725, + "loss": 1.5415, + "step": 3665 + }, + { + "epoch": 0.3867088607594937, + "grad_norm": 0.7830607891082764, + "learning_rate": 0.0010249959554233827, + "loss": 1.5475, + "step": 3666 + }, + { + "epoch": 0.38681434599156117, + "grad_norm": 0.7594409584999084, + "learning_rate": 0.001024762365575525, + "loss": 1.576, + "step": 3667 + }, + { + "epoch": 0.3869198312236287, + "grad_norm": 0.724692702293396, + "learning_rate": 0.001024528744939174, + "loss": 1.5799, + "step": 3668 + }, + { + "epoch": 0.3870253164556962, + "grad_norm": 0.8121001720428467, + "learning_rate": 0.0010242950935405084, + "loss": 1.5499, + "step": 3669 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.6778778433799744, + "learning_rate": 0.0010240614114057098, + "loss": 1.5398, + "step": 3670 + }, + { + "epoch": 0.3872362869198312, + "grad_norm": 1.0171278715133667, + "learning_rate": 0.0010238276985609631, + "loss": 1.5727, + "step": 3671 + }, + { + "epoch": 0.38734177215189874, + "grad_norm": 1.081398367881775, + "learning_rate": 0.0010235939550324576, + "loss": 1.5556, + "step": 3672 + }, + { + "epoch": 0.38744725738396624, + "grad_norm": 0.8650528788566589, + "learning_rate": 0.0010233601808463852, + "loss": 1.5641, + "step": 3673 + }, + { + "epoch": 0.38755274261603373, + "grad_norm": 0.9611278176307678, + "learning_rate": 0.0010231263760289416, + "loss": 1.5479, + "step": 3674 + }, + { + "epoch": 0.3876582278481013, + "grad_norm": 0.9075037837028503, + "learning_rate": 0.0010228925406063254, + "loss": 1.5296, + "step": 3675 + }, + { + "epoch": 0.38776371308016877, + "grad_norm": 0.7765751481056213, + "learning_rate": 0.0010226586746047393, + "loss": 1.5455, + "step": 3676 + }, + { + "epoch": 0.38786919831223626, + "grad_norm": 0.8091642260551453, + "learning_rate": 0.0010224247780503892, + "loss": 1.5754, + "step": 3677 + }, + { + "epoch": 0.3879746835443038, + "grad_norm": 0.8667533993721008, + "learning_rate": 0.0010221908509694842, + "loss": 1.5854, + "step": 3678 + }, + { + "epoch": 0.3880801687763713, + "grad_norm": 0.7747409343719482, + "learning_rate": 0.0010219568933882372, + "loss": 1.5945, + "step": 3679 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.8675957322120667, + "learning_rate": 0.001021722905332864, + "loss": 1.5222, + "step": 3680 + }, + { + "epoch": 0.38829113924050634, + "grad_norm": 0.7583568096160889, + "learning_rate": 0.0010214888868295842, + "loss": 1.5745, + "step": 3681 + }, + { + "epoch": 0.38839662447257384, + "grad_norm": 0.7652279734611511, + "learning_rate": 0.0010212548379046214, + "loss": 1.5595, + "step": 3682 + }, + { + "epoch": 0.38850210970464133, + "grad_norm": 0.6692191362380981, + "learning_rate": 0.001021020758584201, + "loss": 1.5724, + "step": 3683 + }, + { + "epoch": 0.3886075949367089, + "grad_norm": 0.7486600875854492, + "learning_rate": 0.0010207866488945532, + "loss": 1.5796, + "step": 3684 + }, + { + "epoch": 0.38871308016877637, + "grad_norm": 0.7274779677391052, + "learning_rate": 0.0010205525088619112, + "loss": 1.6044, + "step": 3685 + }, + { + "epoch": 0.38881856540084386, + "grad_norm": 0.7047606706619263, + "learning_rate": 0.0010203183385125115, + "loss": 1.5551, + "step": 3686 + }, + { + "epoch": 0.3889240506329114, + "grad_norm": 0.6930751800537109, + "learning_rate": 0.001020084137872594, + "loss": 1.5437, + "step": 3687 + }, + { + "epoch": 0.3890295358649789, + "grad_norm": 0.6608148217201233, + "learning_rate": 0.0010198499069684023, + "loss": 1.5315, + "step": 3688 + }, + { + "epoch": 0.3891350210970464, + "grad_norm": 0.7217460870742798, + "learning_rate": 0.0010196156458261827, + "loss": 1.5975, + "step": 3689 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.6810771226882935, + "learning_rate": 0.0010193813544721855, + "loss": 1.5706, + "step": 3690 + }, + { + "epoch": 0.38934599156118144, + "grad_norm": 0.7999672889709473, + "learning_rate": 0.0010191470329326646, + "loss": 1.6352, + "step": 3691 + }, + { + "epoch": 0.38945147679324893, + "grad_norm": 0.8231928944587708, + "learning_rate": 0.0010189126812338765, + "loss": 1.5499, + "step": 3692 + }, + { + "epoch": 0.3895569620253165, + "grad_norm": 0.7677491307258606, + "learning_rate": 0.0010186782994020811, + "loss": 1.5122, + "step": 3693 + }, + { + "epoch": 0.38966244725738397, + "grad_norm": 0.7340046167373657, + "learning_rate": 0.0010184438874635427, + "loss": 1.5498, + "step": 3694 + }, + { + "epoch": 0.38976793248945146, + "grad_norm": 0.7499405741691589, + "learning_rate": 0.0010182094454445282, + "loss": 1.5785, + "step": 3695 + }, + { + "epoch": 0.389873417721519, + "grad_norm": 0.8243284225463867, + "learning_rate": 0.001017974973371308, + "loss": 1.5601, + "step": 3696 + }, + { + "epoch": 0.3899789029535865, + "grad_norm": 0.91752690076828, + "learning_rate": 0.0010177404712701558, + "loss": 1.5796, + "step": 3697 + }, + { + "epoch": 0.390084388185654, + "grad_norm": 0.8358504176139832, + "learning_rate": 0.0010175059391673486, + "loss": 1.6037, + "step": 3698 + }, + { + "epoch": 0.39018987341772154, + "grad_norm": 0.6496939063072205, + "learning_rate": 0.0010172713770891673, + "loss": 1.5278, + "step": 3699 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.7310910224914551, + "learning_rate": 0.001017036785061895, + "loss": 1.6349, + "step": 3700 + }, + { + "epoch": 0.39040084388185653, + "grad_norm": 0.7967926263809204, + "learning_rate": 0.0010168021631118199, + "loss": 1.5764, + "step": 3701 + }, + { + "epoch": 0.3905063291139241, + "grad_norm": 0.7453489899635315, + "learning_rate": 0.0010165675112652314, + "loss": 1.5542, + "step": 3702 + }, + { + "epoch": 0.39061181434599157, + "grad_norm": 0.706334114074707, + "learning_rate": 0.0010163328295484245, + "loss": 1.5327, + "step": 3703 + }, + { + "epoch": 0.39071729957805906, + "grad_norm": 0.7157964706420898, + "learning_rate": 0.001016098117987696, + "loss": 1.5727, + "step": 3704 + }, + { + "epoch": 0.39082278481012656, + "grad_norm": 0.7093988656997681, + "learning_rate": 0.0010158633766093462, + "loss": 1.5526, + "step": 3705 + }, + { + "epoch": 0.3909282700421941, + "grad_norm": 0.8518031239509583, + "learning_rate": 0.0010156286054396795, + "loss": 1.577, + "step": 3706 + }, + { + "epoch": 0.3910337552742616, + "grad_norm": 0.7734776139259338, + "learning_rate": 0.001015393804505003, + "loss": 1.6094, + "step": 3707 + }, + { + "epoch": 0.3911392405063291, + "grad_norm": 0.6916980147361755, + "learning_rate": 0.0010151589738316275, + "loss": 1.5532, + "step": 3708 + }, + { + "epoch": 0.39124472573839664, + "grad_norm": 0.8141138553619385, + "learning_rate": 0.0010149241134458666, + "loss": 1.5646, + "step": 3709 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.7900592088699341, + "learning_rate": 0.0010146892233740376, + "loss": 1.4961, + "step": 3710 + }, + { + "epoch": 0.3914556962025316, + "grad_norm": 0.7097344994544983, + "learning_rate": 0.0010144543036424616, + "loss": 1.5285, + "step": 3711 + }, + { + "epoch": 0.39156118143459917, + "grad_norm": 0.6890658140182495, + "learning_rate": 0.001014219354277462, + "loss": 1.5153, + "step": 3712 + }, + { + "epoch": 0.39166666666666666, + "grad_norm": 0.7320212721824646, + "learning_rate": 0.0010139843753053663, + "loss": 1.5166, + "step": 3713 + }, + { + "epoch": 0.39177215189873416, + "grad_norm": 0.7036203145980835, + "learning_rate": 0.001013749366752505, + "loss": 1.5574, + "step": 3714 + }, + { + "epoch": 0.3918776371308017, + "grad_norm": 0.706278383731842, + "learning_rate": 0.0010135143286452118, + "loss": 1.5391, + "step": 3715 + }, + { + "epoch": 0.3919831223628692, + "grad_norm": 0.7710501551628113, + "learning_rate": 0.0010132792610098244, + "loss": 1.5402, + "step": 3716 + }, + { + "epoch": 0.3920886075949367, + "grad_norm": 0.7435221076011658, + "learning_rate": 0.0010130441638726828, + "loss": 1.5732, + "step": 3717 + }, + { + "epoch": 0.39219409282700424, + "grad_norm": 0.6779868602752686, + "learning_rate": 0.001012809037260131, + "loss": 1.5043, + "step": 3718 + }, + { + "epoch": 0.39229957805907173, + "grad_norm": 0.6662476062774658, + "learning_rate": 0.001012573881198516, + "loss": 1.5722, + "step": 3719 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.6320712566375732, + "learning_rate": 0.0010123386957141883, + "loss": 1.5499, + "step": 3720 + }, + { + "epoch": 0.39251054852320677, + "grad_norm": 0.6894907355308533, + "learning_rate": 0.0010121034808335018, + "loss": 1.5402, + "step": 3721 + }, + { + "epoch": 0.39261603375527426, + "grad_norm": 0.6706798672676086, + "learning_rate": 0.0010118682365828132, + "loss": 1.5649, + "step": 3722 + }, + { + "epoch": 0.39272151898734176, + "grad_norm": 0.7610365152359009, + "learning_rate": 0.0010116329629884827, + "loss": 1.5404, + "step": 3723 + }, + { + "epoch": 0.3928270042194093, + "grad_norm": 1.1765838861465454, + "learning_rate": 0.0010113976600768743, + "loss": 1.5802, + "step": 3724 + }, + { + "epoch": 0.3929324894514768, + "grad_norm": 0.6423248648643494, + "learning_rate": 0.0010111623278743547, + "loss": 1.5337, + "step": 3725 + }, + { + "epoch": 0.3930379746835443, + "grad_norm": 1.149620771408081, + "learning_rate": 0.001010926966407294, + "loss": 1.567, + "step": 3726 + }, + { + "epoch": 0.39314345991561184, + "grad_norm": 0.6979532837867737, + "learning_rate": 0.0010106915757020654, + "loss": 1.5621, + "step": 3727 + }, + { + "epoch": 0.39324894514767933, + "grad_norm": 0.9020812511444092, + "learning_rate": 0.0010104561557850457, + "loss": 1.57, + "step": 3728 + }, + { + "epoch": 0.3933544303797468, + "grad_norm": 0.9356682896614075, + "learning_rate": 0.0010102207066826155, + "loss": 1.5731, + "step": 3729 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.6777065992355347, + "learning_rate": 0.0010099852284211573, + "loss": 1.5336, + "step": 3730 + }, + { + "epoch": 0.39356540084388186, + "grad_norm": 1.041947364807129, + "learning_rate": 0.0010097497210270578, + "loss": 1.5597, + "step": 3731 + }, + { + "epoch": 0.39367088607594936, + "grad_norm": 0.9245103597640991, + "learning_rate": 0.0010095141845267066, + "loss": 1.5965, + "step": 3732 + }, + { + "epoch": 0.3937763713080169, + "grad_norm": 0.6847149133682251, + "learning_rate": 0.0010092786189464975, + "loss": 1.5537, + "step": 3733 + }, + { + "epoch": 0.3938818565400844, + "grad_norm": 0.9959841370582581, + "learning_rate": 0.0010090430243128259, + "loss": 1.5633, + "step": 3734 + }, + { + "epoch": 0.3939873417721519, + "grad_norm": 1.1526083946228027, + "learning_rate": 0.0010088074006520918, + "loss": 1.5754, + "step": 3735 + }, + { + "epoch": 0.39409282700421944, + "grad_norm": 0.715377926826477, + "learning_rate": 0.0010085717479906978, + "loss": 1.5824, + "step": 3736 + }, + { + "epoch": 0.39419831223628693, + "grad_norm": 1.0828629732131958, + "learning_rate": 0.0010083360663550502, + "loss": 1.5294, + "step": 3737 + }, + { + "epoch": 0.3943037974683544, + "grad_norm": 0.7427741885185242, + "learning_rate": 0.0010081003557715583, + "loss": 1.5326, + "step": 3738 + }, + { + "epoch": 0.3944092827004219, + "grad_norm": 0.8258534073829651, + "learning_rate": 0.0010078646162666345, + "loss": 1.5109, + "step": 3739 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.7979774475097656, + "learning_rate": 0.0010076288478666944, + "loss": 1.5863, + "step": 3740 + }, + { + "epoch": 0.39462025316455696, + "grad_norm": 0.7901679277420044, + "learning_rate": 0.0010073930505981573, + "loss": 1.5726, + "step": 3741 + }, + { + "epoch": 0.39472573839662445, + "grad_norm": 0.8784754276275635, + "learning_rate": 0.0010071572244874456, + "loss": 1.5697, + "step": 3742 + }, + { + "epoch": 0.394831223628692, + "grad_norm": 0.6756990551948547, + "learning_rate": 0.0010069213695609845, + "loss": 1.5474, + "step": 3743 + }, + { + "epoch": 0.3949367088607595, + "grad_norm": 0.9445067048072815, + "learning_rate": 0.0010066854858452028, + "loss": 1.6037, + "step": 3744 + }, + { + "epoch": 0.395042194092827, + "grad_norm": 0.7161686420440674, + "learning_rate": 0.0010064495733665324, + "loss": 1.521, + "step": 3745 + }, + { + "epoch": 0.39514767932489453, + "grad_norm": 1.1721473932266235, + "learning_rate": 0.0010062136321514084, + "loss": 1.5624, + "step": 3746 + }, + { + "epoch": 0.395253164556962, + "grad_norm": 1.0922530889511108, + "learning_rate": 0.0010059776622262698, + "loss": 1.5966, + "step": 3747 + }, + { + "epoch": 0.3953586497890295, + "grad_norm": 0.7422212958335876, + "learning_rate": 0.0010057416636175575, + "loss": 1.5444, + "step": 3748 + }, + { + "epoch": 0.39546413502109706, + "grad_norm": 0.9434147477149963, + "learning_rate": 0.0010055056363517162, + "loss": 1.53, + "step": 3749 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.843855082988739, + "learning_rate": 0.0010052695804551946, + "loss": 1.5594, + "step": 3750 + }, + { + "epoch": 0.39567510548523205, + "grad_norm": 0.7123110890388489, + "learning_rate": 0.0010050334959544438, + "loss": 1.5538, + "step": 3751 + }, + { + "epoch": 0.3957805907172996, + "grad_norm": 0.8802634477615356, + "learning_rate": 0.0010047973828759178, + "loss": 1.583, + "step": 3752 + }, + { + "epoch": 0.3958860759493671, + "grad_norm": 0.6801364421844482, + "learning_rate": 0.0010045612412460747, + "loss": 1.5703, + "step": 3753 + }, + { + "epoch": 0.3959915611814346, + "grad_norm": 0.8391410708427429, + "learning_rate": 0.0010043250710913747, + "loss": 1.5549, + "step": 3754 + }, + { + "epoch": 0.39609704641350213, + "grad_norm": 0.9061526656150818, + "learning_rate": 0.0010040888724382828, + "loss": 1.5405, + "step": 3755 + }, + { + "epoch": 0.3962025316455696, + "grad_norm": 0.8860055208206177, + "learning_rate": 0.0010038526453132655, + "loss": 1.5626, + "step": 3756 + }, + { + "epoch": 0.3963080168776371, + "grad_norm": 0.6549196839332581, + "learning_rate": 0.0010036163897427937, + "loss": 1.5556, + "step": 3757 + }, + { + "epoch": 0.39641350210970466, + "grad_norm": 0.9763411283493042, + "learning_rate": 0.0010033801057533404, + "loss": 1.5418, + "step": 3758 + }, + { + "epoch": 0.39651898734177216, + "grad_norm": 0.6851014494895935, + "learning_rate": 0.001003143793371383, + "loss": 1.5013, + "step": 3759 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 1.2030302286148071, + "learning_rate": 0.0010029074526234014, + "loss": 1.5527, + "step": 3760 + }, + { + "epoch": 0.3967299578059072, + "grad_norm": 0.877166748046875, + "learning_rate": 0.0010026710835358786, + "loss": 1.5805, + "step": 3761 + }, + { + "epoch": 0.3968354430379747, + "grad_norm": 0.7607035636901855, + "learning_rate": 0.0010024346861353007, + "loss": 1.5502, + "step": 3762 + }, + { + "epoch": 0.3969409282700422, + "grad_norm": 0.7363113760948181, + "learning_rate": 0.0010021982604481575, + "loss": 1.5563, + "step": 3763 + }, + { + "epoch": 0.39704641350210973, + "grad_norm": 0.8204362988471985, + "learning_rate": 0.001001961806500942, + "loss": 1.5329, + "step": 3764 + }, + { + "epoch": 0.3971518987341772, + "grad_norm": 0.8626847267150879, + "learning_rate": 0.0010017253243201495, + "loss": 1.5619, + "step": 3765 + }, + { + "epoch": 0.3972573839662447, + "grad_norm": 0.7101148366928101, + "learning_rate": 0.0010014888139322792, + "loss": 1.5584, + "step": 3766 + }, + { + "epoch": 0.39736286919831226, + "grad_norm": 0.7921831607818604, + "learning_rate": 0.001001252275363833, + "loss": 1.6118, + "step": 3767 + }, + { + "epoch": 0.39746835443037976, + "grad_norm": 0.8461834192276001, + "learning_rate": 0.0010010157086413167, + "loss": 1.6222, + "step": 3768 + }, + { + "epoch": 0.39757383966244725, + "grad_norm": 0.8769395351409912, + "learning_rate": 0.0010007791137912386, + "loss": 1.5618, + "step": 3769 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.760367214679718, + "learning_rate": 0.0010005424908401104, + "loss": 1.551, + "step": 3770 + }, + { + "epoch": 0.3977848101265823, + "grad_norm": 0.8606870174407959, + "learning_rate": 0.0010003058398144464, + "loss": 1.5249, + "step": 3771 + }, + { + "epoch": 0.3978902953586498, + "grad_norm": 0.6911659836769104, + "learning_rate": 0.0010000691607407652, + "loss": 1.5588, + "step": 3772 + }, + { + "epoch": 0.3979957805907173, + "grad_norm": 0.77984619140625, + "learning_rate": 0.0009998324536455877, + "loss": 1.5728, + "step": 3773 + }, + { + "epoch": 0.3981012658227848, + "grad_norm": 0.6606547236442566, + "learning_rate": 0.0009995957185554378, + "loss": 1.5743, + "step": 3774 + }, + { + "epoch": 0.3982067510548523, + "grad_norm": 0.8485710024833679, + "learning_rate": 0.000999358955496843, + "loss": 1.5531, + "step": 3775 + }, + { + "epoch": 0.3983122362869198, + "grad_norm": 0.8939611911773682, + "learning_rate": 0.000999122164496334, + "loss": 1.5412, + "step": 3776 + }, + { + "epoch": 0.39841772151898736, + "grad_norm": 0.715502142906189, + "learning_rate": 0.0009988853455804442, + "loss": 1.5259, + "step": 3777 + }, + { + "epoch": 0.39852320675105485, + "grad_norm": 0.860198438167572, + "learning_rate": 0.0009986484987757102, + "loss": 1.5629, + "step": 3778 + }, + { + "epoch": 0.39862869198312234, + "grad_norm": 0.9376717209815979, + "learning_rate": 0.0009984116241086723, + "loss": 1.5632, + "step": 3779 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.6412420272827148, + "learning_rate": 0.0009981747216058728, + "loss": 1.5325, + "step": 3780 + }, + { + "epoch": 0.3988396624472574, + "grad_norm": 0.9634427428245544, + "learning_rate": 0.0009979377912938587, + "loss": 1.5276, + "step": 3781 + }, + { + "epoch": 0.3989451476793249, + "grad_norm": 0.9802106618881226, + "learning_rate": 0.0009977008331991785, + "loss": 1.5389, + "step": 3782 + }, + { + "epoch": 0.3990506329113924, + "grad_norm": 0.6390166878700256, + "learning_rate": 0.000997463847348385, + "loss": 1.5772, + "step": 3783 + }, + { + "epoch": 0.3991561181434599, + "grad_norm": 1.0834404230117798, + "learning_rate": 0.000997226833768033, + "loss": 1.528, + "step": 3784 + }, + { + "epoch": 0.3992616033755274, + "grad_norm": 0.8877212405204773, + "learning_rate": 0.0009969897924846818, + "loss": 1.5905, + "step": 3785 + }, + { + "epoch": 0.39936708860759496, + "grad_norm": 0.7237524390220642, + "learning_rate": 0.0009967527235248928, + "loss": 1.5834, + "step": 3786 + }, + { + "epoch": 0.39947257383966245, + "grad_norm": 1.1476733684539795, + "learning_rate": 0.0009965156269152308, + "loss": 1.5279, + "step": 3787 + }, + { + "epoch": 0.39957805907172994, + "grad_norm": 0.6681859493255615, + "learning_rate": 0.0009962785026822632, + "loss": 1.5849, + "step": 3788 + }, + { + "epoch": 0.3996835443037975, + "grad_norm": 0.8808258175849915, + "learning_rate": 0.0009960413508525617, + "loss": 1.5565, + "step": 3789 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.6952772736549377, + "learning_rate": 0.0009958041714526998, + "loss": 1.5531, + "step": 3790 + }, + { + "epoch": 0.3998945147679325, + "grad_norm": 1.1171207427978516, + "learning_rate": 0.0009955669645092546, + "loss": 1.5513, + "step": 3791 + }, + { + "epoch": 0.4, + "grad_norm": 0.71282559633255, + "learning_rate": 0.0009953297300488069, + "loss": 1.5348, + "step": 3792 + }, + { + "epoch": 0.4001054852320675, + "grad_norm": 1.4979170560836792, + "learning_rate": 0.0009950924680979393, + "loss": 1.6032, + "step": 3793 + }, + { + "epoch": 0.400210970464135, + "grad_norm": 0.7622915506362915, + "learning_rate": 0.0009948551786832386, + "loss": 1.5787, + "step": 3794 + }, + { + "epoch": 0.40031645569620256, + "grad_norm": 1.366343379020691, + "learning_rate": 0.0009946178618312942, + "loss": 1.5822, + "step": 3795 + }, + { + "epoch": 0.40042194092827005, + "grad_norm": 0.8493646383285522, + "learning_rate": 0.0009943805175686986, + "loss": 1.5874, + "step": 3796 + }, + { + "epoch": 0.40052742616033754, + "grad_norm": 1.2869855165481567, + "learning_rate": 0.0009941431459220475, + "loss": 1.5576, + "step": 3797 + }, + { + "epoch": 0.4006329113924051, + "grad_norm": 0.9457789659500122, + "learning_rate": 0.0009939057469179394, + "loss": 1.5915, + "step": 3798 + }, + { + "epoch": 0.4007383966244726, + "grad_norm": 0.8360527157783508, + "learning_rate": 0.0009936683205829762, + "loss": 1.5188, + "step": 3799 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.9190962314605713, + "learning_rate": 0.0009934308669437627, + "loss": 1.5581, + "step": 3800 + }, + { + "epoch": 0.4009493670886076, + "grad_norm": 0.8016577363014221, + "learning_rate": 0.0009931933860269063, + "loss": 1.5148, + "step": 3801 + }, + { + "epoch": 0.4010548523206751, + "grad_norm": 0.7542376518249512, + "learning_rate": 0.0009929558778590188, + "loss": 1.5247, + "step": 3802 + }, + { + "epoch": 0.4011603375527426, + "grad_norm": 0.7522518038749695, + "learning_rate": 0.0009927183424667135, + "loss": 1.595, + "step": 3803 + }, + { + "epoch": 0.4012658227848101, + "grad_norm": 0.9973788261413574, + "learning_rate": 0.0009924807798766077, + "loss": 1.5751, + "step": 3804 + }, + { + "epoch": 0.40137130801687765, + "grad_norm": 0.8856701254844666, + "learning_rate": 0.0009922431901153213, + "loss": 1.5161, + "step": 3805 + }, + { + "epoch": 0.40147679324894514, + "grad_norm": 0.783345639705658, + "learning_rate": 0.0009920055732094775, + "loss": 1.5034, + "step": 3806 + }, + { + "epoch": 0.40158227848101263, + "grad_norm": 1.063052773475647, + "learning_rate": 0.0009917679291857027, + "loss": 1.5368, + "step": 3807 + }, + { + "epoch": 0.4016877637130802, + "grad_norm": 0.7557124495506287, + "learning_rate": 0.0009915302580706256, + "loss": 1.5528, + "step": 3808 + }, + { + "epoch": 0.4017932489451477, + "grad_norm": 0.7199371457099915, + "learning_rate": 0.0009912925598908788, + "loss": 1.5757, + "step": 3809 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.8625279664993286, + "learning_rate": 0.0009910548346730972, + "loss": 1.5923, + "step": 3810 + }, + { + "epoch": 0.4020042194092827, + "grad_norm": 0.7824478149414062, + "learning_rate": 0.00099081708244392, + "loss": 1.5429, + "step": 3811 + }, + { + "epoch": 0.4021097046413502, + "grad_norm": 0.714788019657135, + "learning_rate": 0.0009905793032299875, + "loss": 1.5281, + "step": 3812 + }, + { + "epoch": 0.4022151898734177, + "grad_norm": 0.8080301284790039, + "learning_rate": 0.0009903414970579443, + "loss": 1.5262, + "step": 3813 + }, + { + "epoch": 0.40232067510548525, + "grad_norm": 0.7356367707252502, + "learning_rate": 0.000990103663954438, + "loss": 1.5474, + "step": 3814 + }, + { + "epoch": 0.40242616033755274, + "grad_norm": 0.7162673473358154, + "learning_rate": 0.000989865803946119, + "loss": 1.5649, + "step": 3815 + }, + { + "epoch": 0.40253164556962023, + "grad_norm": 0.7211686372756958, + "learning_rate": 0.0009896279170596406, + "loss": 1.5359, + "step": 3816 + }, + { + "epoch": 0.4026371308016878, + "grad_norm": 0.7798879146575928, + "learning_rate": 0.0009893900033216593, + "loss": 1.5165, + "step": 3817 + }, + { + "epoch": 0.4027426160337553, + "grad_norm": 0.7288995981216431, + "learning_rate": 0.0009891520627588342, + "loss": 1.5469, + "step": 3818 + }, + { + "epoch": 0.40284810126582277, + "grad_norm": 0.760934591293335, + "learning_rate": 0.000988914095397828, + "loss": 1.5647, + "step": 3819 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.6942379474639893, + "learning_rate": 0.0009886761012653062, + "loss": 1.5166, + "step": 3820 + }, + { + "epoch": 0.4030590717299578, + "grad_norm": 0.7822149991989136, + "learning_rate": 0.000988438080387937, + "loss": 1.5564, + "step": 3821 + }, + { + "epoch": 0.4031645569620253, + "grad_norm": 0.7543655633926392, + "learning_rate": 0.000988200032792392, + "loss": 1.4992, + "step": 3822 + }, + { + "epoch": 0.40327004219409285, + "grad_norm": 0.7120683193206787, + "learning_rate": 0.0009879619585053455, + "loss": 1.5579, + "step": 3823 + }, + { + "epoch": 0.40337552742616034, + "grad_norm": 0.70358806848526, + "learning_rate": 0.0009877238575534749, + "loss": 1.5553, + "step": 3824 + }, + { + "epoch": 0.40348101265822783, + "grad_norm": 0.7535173296928406, + "learning_rate": 0.0009874857299634605, + "loss": 1.5525, + "step": 3825 + }, + { + "epoch": 0.4035864978902954, + "grad_norm": 0.6715105772018433, + "learning_rate": 0.0009872475757619862, + "loss": 1.5851, + "step": 3826 + }, + { + "epoch": 0.4036919831223629, + "grad_norm": 0.7529786825180054, + "learning_rate": 0.000987009394975738, + "loss": 1.5559, + "step": 3827 + }, + { + "epoch": 0.40379746835443037, + "grad_norm": 0.6599568128585815, + "learning_rate": 0.0009867711876314052, + "loss": 1.5252, + "step": 3828 + }, + { + "epoch": 0.4039029535864979, + "grad_norm": 0.7733898758888245, + "learning_rate": 0.00098653295375568, + "loss": 1.5364, + "step": 3829 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.6555258631706238, + "learning_rate": 0.000986294693375258, + "loss": 1.5208, + "step": 3830 + }, + { + "epoch": 0.4041139240506329, + "grad_norm": 0.7163701057434082, + "learning_rate": 0.0009860564065168375, + "loss": 1.5633, + "step": 3831 + }, + { + "epoch": 0.40421940928270045, + "grad_norm": 0.7664496302604675, + "learning_rate": 0.0009858180932071192, + "loss": 1.5761, + "step": 3832 + }, + { + "epoch": 0.40432489451476794, + "grad_norm": 0.6965370774269104, + "learning_rate": 0.000985579753472808, + "loss": 1.5528, + "step": 3833 + }, + { + "epoch": 0.40443037974683543, + "grad_norm": 0.8063269853591919, + "learning_rate": 0.0009853413873406104, + "loss": 1.5434, + "step": 3834 + }, + { + "epoch": 0.4045358649789029, + "grad_norm": 0.8085960149765015, + "learning_rate": 0.000985102994837237, + "loss": 1.5461, + "step": 3835 + }, + { + "epoch": 0.4046413502109705, + "grad_norm": 0.9235751628875732, + "learning_rate": 0.0009848645759894005, + "loss": 1.5735, + "step": 3836 + }, + { + "epoch": 0.40474683544303797, + "grad_norm": 0.8624776005744934, + "learning_rate": 0.0009846261308238177, + "loss": 1.5265, + "step": 3837 + }, + { + "epoch": 0.40485232067510546, + "grad_norm": 0.7165701985359192, + "learning_rate": 0.0009843876593672064, + "loss": 1.5478, + "step": 3838 + }, + { + "epoch": 0.404957805907173, + "grad_norm": 0.8040773868560791, + "learning_rate": 0.0009841491616462892, + "loss": 1.6064, + "step": 3839 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.7989050149917603, + "learning_rate": 0.000983910637687791, + "loss": 1.5933, + "step": 3840 + }, + { + "epoch": 0.405168776371308, + "grad_norm": 0.6649971008300781, + "learning_rate": 0.0009836720875184394, + "loss": 1.5795, + "step": 3841 + }, + { + "epoch": 0.40527426160337554, + "grad_norm": 0.7530891299247742, + "learning_rate": 0.0009834335111649655, + "loss": 1.5323, + "step": 3842 + }, + { + "epoch": 0.40537974683544303, + "grad_norm": 0.7579845190048218, + "learning_rate": 0.0009831949086541024, + "loss": 1.5468, + "step": 3843 + }, + { + "epoch": 0.4054852320675105, + "grad_norm": 0.7081655859947205, + "learning_rate": 0.0009829562800125868, + "loss": 1.5376, + "step": 3844 + }, + { + "epoch": 0.4055907172995781, + "grad_norm": 0.9437825083732605, + "learning_rate": 0.0009827176252671587, + "loss": 1.5717, + "step": 3845 + }, + { + "epoch": 0.40569620253164557, + "grad_norm": 1.2238421440124512, + "learning_rate": 0.0009824789444445603, + "loss": 1.5392, + "step": 3846 + }, + { + "epoch": 0.40580168776371306, + "grad_norm": 0.7500943541526794, + "learning_rate": 0.0009822402375715366, + "loss": 1.5233, + "step": 3847 + }, + { + "epoch": 0.4059071729957806, + "grad_norm": 1.209041714668274, + "learning_rate": 0.0009820015046748366, + "loss": 1.5524, + "step": 3848 + }, + { + "epoch": 0.4060126582278481, + "grad_norm": 0.8548074960708618, + "learning_rate": 0.0009817627457812106, + "loss": 1.5345, + "step": 3849 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.9063013792037964, + "learning_rate": 0.0009815239609174138, + "loss": 1.542, + "step": 3850 + }, + { + "epoch": 0.40622362869198314, + "grad_norm": 1.1254750490188599, + "learning_rate": 0.0009812851501102024, + "loss": 1.6004, + "step": 3851 + }, + { + "epoch": 0.40632911392405063, + "grad_norm": 0.7672401666641235, + "learning_rate": 0.0009810463133863368, + "loss": 1.5646, + "step": 3852 + }, + { + "epoch": 0.4064345991561181, + "grad_norm": 0.8608214259147644, + "learning_rate": 0.0009808074507725794, + "loss": 1.5726, + "step": 3853 + }, + { + "epoch": 0.4065400843881857, + "grad_norm": 0.9309853911399841, + "learning_rate": 0.0009805685622956966, + "loss": 1.5669, + "step": 3854 + }, + { + "epoch": 0.40664556962025317, + "grad_norm": 0.7094742655754089, + "learning_rate": 0.0009803296479824564, + "loss": 1.5503, + "step": 3855 + }, + { + "epoch": 0.40675105485232066, + "grad_norm": 0.9162649512290955, + "learning_rate": 0.0009800907078596308, + "loss": 1.5715, + "step": 3856 + }, + { + "epoch": 0.4068565400843882, + "grad_norm": 0.6686220169067383, + "learning_rate": 0.000979851741953994, + "loss": 1.5433, + "step": 3857 + }, + { + "epoch": 0.4069620253164557, + "grad_norm": 1.1458511352539062, + "learning_rate": 0.0009796127502923232, + "loss": 1.5578, + "step": 3858 + }, + { + "epoch": 0.4070675105485232, + "grad_norm": 0.6847813725471497, + "learning_rate": 0.000979373732901399, + "loss": 1.5293, + "step": 3859 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 1.1015087366104126, + "learning_rate": 0.0009791346898080043, + "loss": 1.5375, + "step": 3860 + }, + { + "epoch": 0.40727848101265823, + "grad_norm": 0.9087206721305847, + "learning_rate": 0.000978895621038925, + "loss": 1.5453, + "step": 3861 + }, + { + "epoch": 0.4073839662447257, + "grad_norm": 0.7723161578178406, + "learning_rate": 0.0009786565266209496, + "loss": 1.5513, + "step": 3862 + }, + { + "epoch": 0.4074894514767933, + "grad_norm": 0.879840612411499, + "learning_rate": 0.0009784174065808706, + "loss": 1.5718, + "step": 3863 + }, + { + "epoch": 0.40759493670886077, + "grad_norm": 0.7211084961891174, + "learning_rate": 0.0009781782609454821, + "loss": 1.517, + "step": 3864 + }, + { + "epoch": 0.40770042194092826, + "grad_norm": 1.2297585010528564, + "learning_rate": 0.000977939089741582, + "loss": 1.5262, + "step": 3865 + }, + { + "epoch": 0.4078059071729958, + "grad_norm": 1.0008622407913208, + "learning_rate": 0.0009776998929959695, + "loss": 1.5768, + "step": 3866 + }, + { + "epoch": 0.4079113924050633, + "grad_norm": 0.7060263156890869, + "learning_rate": 0.0009774606707354493, + "loss": 1.5198, + "step": 3867 + }, + { + "epoch": 0.4080168776371308, + "grad_norm": 0.8464383482933044, + "learning_rate": 0.0009772214229868265, + "loss": 1.527, + "step": 3868 + }, + { + "epoch": 0.4081223628691983, + "grad_norm": 0.8271245956420898, + "learning_rate": 0.0009769821497769102, + "loss": 1.5763, + "step": 3869 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.738031268119812, + "learning_rate": 0.0009767428511325122, + "loss": 1.5526, + "step": 3870 + }, + { + "epoch": 0.4083333333333333, + "grad_norm": 0.8513022065162659, + "learning_rate": 0.000976503527080447, + "loss": 1.5296, + "step": 3871 + }, + { + "epoch": 0.4084388185654008, + "grad_norm": 0.7244751453399658, + "learning_rate": 0.0009762641776475322, + "loss": 1.5843, + "step": 3872 + }, + { + "epoch": 0.40854430379746837, + "grad_norm": 0.8052164316177368, + "learning_rate": 0.0009760248028605882, + "loss": 1.5442, + "step": 3873 + }, + { + "epoch": 0.40864978902953586, + "grad_norm": 0.7332009077072144, + "learning_rate": 0.0009757854027464377, + "loss": 1.5505, + "step": 3874 + }, + { + "epoch": 0.40875527426160335, + "grad_norm": 0.710331916809082, + "learning_rate": 0.000975545977331907, + "loss": 1.5488, + "step": 3875 + }, + { + "epoch": 0.4088607594936709, + "grad_norm": 0.7128881812095642, + "learning_rate": 0.0009753065266438249, + "loss": 1.51, + "step": 3876 + }, + { + "epoch": 0.4089662447257384, + "grad_norm": 0.7109745740890503, + "learning_rate": 0.0009750670507090233, + "loss": 1.5062, + "step": 3877 + }, + { + "epoch": 0.4090717299578059, + "grad_norm": 0.6654733419418335, + "learning_rate": 0.000974827549554336, + "loss": 1.503, + "step": 3878 + }, + { + "epoch": 0.40917721518987343, + "grad_norm": 0.6557893753051758, + "learning_rate": 0.0009745880232066007, + "loss": 1.554, + "step": 3879 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.7236716747283936, + "learning_rate": 0.0009743484716926576, + "loss": 1.5111, + "step": 3880 + }, + { + "epoch": 0.4093881856540084, + "grad_norm": 0.6860948204994202, + "learning_rate": 0.0009741088950393497, + "loss": 1.5545, + "step": 3881 + }, + { + "epoch": 0.40949367088607597, + "grad_norm": 0.6518135070800781, + "learning_rate": 0.0009738692932735225, + "loss": 1.5371, + "step": 3882 + }, + { + "epoch": 0.40959915611814346, + "grad_norm": 0.9402605295181274, + "learning_rate": 0.0009736296664220247, + "loss": 1.5355, + "step": 3883 + }, + { + "epoch": 0.40970464135021095, + "grad_norm": 0.8298816084861755, + "learning_rate": 0.0009733900145117075, + "loss": 1.519, + "step": 3884 + }, + { + "epoch": 0.4098101265822785, + "grad_norm": 0.7044643759727478, + "learning_rate": 0.0009731503375694253, + "loss": 1.5221, + "step": 3885 + }, + { + "epoch": 0.409915611814346, + "grad_norm": 0.7053385376930237, + "learning_rate": 0.0009729106356220352, + "loss": 1.5512, + "step": 3886 + }, + { + "epoch": 0.4100210970464135, + "grad_norm": 0.6996482610702515, + "learning_rate": 0.0009726709086963967, + "loss": 1.5423, + "step": 3887 + }, + { + "epoch": 0.41012658227848103, + "grad_norm": 0.6817649006843567, + "learning_rate": 0.0009724311568193726, + "loss": 1.5563, + "step": 3888 + }, + { + "epoch": 0.4102320675105485, + "grad_norm": 0.7300124168395996, + "learning_rate": 0.0009721913800178281, + "loss": 1.5193, + "step": 3889 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.8259506821632385, + "learning_rate": 0.0009719515783186319, + "loss": 1.5129, + "step": 3890 + }, + { + "epoch": 0.41044303797468357, + "grad_norm": 0.708491325378418, + "learning_rate": 0.0009717117517486543, + "loss": 1.5365, + "step": 3891 + }, + { + "epoch": 0.41054852320675106, + "grad_norm": 0.6298888921737671, + "learning_rate": 0.0009714719003347693, + "loss": 1.5775, + "step": 3892 + }, + { + "epoch": 0.41065400843881855, + "grad_norm": 0.7180604338645935, + "learning_rate": 0.0009712320241038537, + "loss": 1.5657, + "step": 3893 + }, + { + "epoch": 0.4107594936708861, + "grad_norm": 0.6602415442466736, + "learning_rate": 0.0009709921230827865, + "loss": 1.5357, + "step": 3894 + }, + { + "epoch": 0.4108649789029536, + "grad_norm": 0.7754647731781006, + "learning_rate": 0.00097075219729845, + "loss": 1.5373, + "step": 3895 + }, + { + "epoch": 0.4109704641350211, + "grad_norm": 0.9935294389724731, + "learning_rate": 0.0009705122467777292, + "loss": 1.5073, + "step": 3896 + }, + { + "epoch": 0.41107594936708863, + "grad_norm": 0.8324797749519348, + "learning_rate": 0.0009702722715475113, + "loss": 1.529, + "step": 3897 + }, + { + "epoch": 0.4111814345991561, + "grad_norm": 0.6747130155563354, + "learning_rate": 0.000970032271634687, + "loss": 1.5312, + "step": 3898 + }, + { + "epoch": 0.4112869198312236, + "grad_norm": 1.1660836935043335, + "learning_rate": 0.0009697922470661497, + "loss": 1.5164, + "step": 3899 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.8841404318809509, + "learning_rate": 0.0009695521978687951, + "loss": 1.5608, + "step": 3900 + }, + { + "epoch": 0.41149789029535866, + "grad_norm": 0.7044554948806763, + "learning_rate": 0.0009693121240695216, + "loss": 1.5219, + "step": 3901 + }, + { + "epoch": 0.41160337552742615, + "grad_norm": 0.9391558766365051, + "learning_rate": 0.0009690720256952314, + "loss": 1.593, + "step": 3902 + }, + { + "epoch": 0.41170886075949364, + "grad_norm": 0.7093097567558289, + "learning_rate": 0.0009688319027728282, + "loss": 1.5384, + "step": 3903 + }, + { + "epoch": 0.4118143459915612, + "grad_norm": 0.9732421040534973, + "learning_rate": 0.0009685917553292192, + "loss": 1.5265, + "step": 3904 + }, + { + "epoch": 0.4119198312236287, + "grad_norm": 1.0878092050552368, + "learning_rate": 0.0009683515833913137, + "loss": 1.5363, + "step": 3905 + }, + { + "epoch": 0.4120253164556962, + "grad_norm": 0.7049824595451355, + "learning_rate": 0.0009681113869860247, + "loss": 1.5387, + "step": 3906 + }, + { + "epoch": 0.4121308016877637, + "grad_norm": 1.0745465755462646, + "learning_rate": 0.0009678711661402672, + "loss": 1.5145, + "step": 3907 + }, + { + "epoch": 0.4122362869198312, + "grad_norm": 0.7493587732315063, + "learning_rate": 0.0009676309208809592, + "loss": 1.539, + "step": 3908 + }, + { + "epoch": 0.4123417721518987, + "grad_norm": 0.8103012442588806, + "learning_rate": 0.0009673906512350213, + "loss": 1.5352, + "step": 3909 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.8610765933990479, + "learning_rate": 0.0009671503572293767, + "loss": 1.5801, + "step": 3910 + }, + { + "epoch": 0.41255274261603375, + "grad_norm": 0.6370657682418823, + "learning_rate": 0.000966910038890952, + "loss": 1.5443, + "step": 3911 + }, + { + "epoch": 0.41265822784810124, + "grad_norm": 0.7906767725944519, + "learning_rate": 0.0009666696962466757, + "loss": 1.5837, + "step": 3912 + }, + { + "epoch": 0.4127637130801688, + "grad_norm": 0.7457589507102966, + "learning_rate": 0.0009664293293234795, + "loss": 1.5408, + "step": 3913 + }, + { + "epoch": 0.4128691983122363, + "grad_norm": 0.7062943577766418, + "learning_rate": 0.0009661889381482977, + "loss": 1.4658, + "step": 3914 + }, + { + "epoch": 0.4129746835443038, + "grad_norm": 0.7394726276397705, + "learning_rate": 0.0009659485227480676, + "loss": 1.5344, + "step": 3915 + }, + { + "epoch": 0.4130801687763713, + "grad_norm": 0.832467257976532, + "learning_rate": 0.0009657080831497284, + "loss": 1.5454, + "step": 3916 + }, + { + "epoch": 0.4131856540084388, + "grad_norm": 0.7655444145202637, + "learning_rate": 0.0009654676193802232, + "loss": 1.5115, + "step": 3917 + }, + { + "epoch": 0.4132911392405063, + "grad_norm": 0.903254508972168, + "learning_rate": 0.0009652271314664966, + "loss": 1.5587, + "step": 3918 + }, + { + "epoch": 0.41339662447257386, + "grad_norm": 1.4208942651748657, + "learning_rate": 0.0009649866194354967, + "loss": 1.5293, + "step": 3919 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.7487862706184387, + "learning_rate": 0.0009647460833141742, + "loss": 1.5548, + "step": 3920 + }, + { + "epoch": 0.41360759493670884, + "grad_norm": 1.207499623298645, + "learning_rate": 0.0009645055231294823, + "loss": 1.5227, + "step": 3921 + }, + { + "epoch": 0.4137130801687764, + "grad_norm": 0.7497016191482544, + "learning_rate": 0.0009642649389083768, + "loss": 1.5819, + "step": 3922 + }, + { + "epoch": 0.4138185654008439, + "grad_norm": 1.1272863149642944, + "learning_rate": 0.0009640243306778162, + "loss": 1.5648, + "step": 3923 + }, + { + "epoch": 0.4139240506329114, + "grad_norm": 0.8584696650505066, + "learning_rate": 0.0009637836984647627, + "loss": 1.536, + "step": 3924 + }, + { + "epoch": 0.4140295358649789, + "grad_norm": 1.2272335290908813, + "learning_rate": 0.0009635430422961794, + "loss": 1.4995, + "step": 3925 + }, + { + "epoch": 0.4141350210970464, + "grad_norm": 0.8763427734375, + "learning_rate": 0.0009633023621990334, + "loss": 1.5524, + "step": 3926 + }, + { + "epoch": 0.4142405063291139, + "grad_norm": 1.0933728218078613, + "learning_rate": 0.000963061658200294, + "loss": 1.5404, + "step": 3927 + }, + { + "epoch": 0.41434599156118146, + "grad_norm": 0.8595321774482727, + "learning_rate": 0.0009628209303269335, + "loss": 1.5524, + "step": 3928 + }, + { + "epoch": 0.41445147679324895, + "grad_norm": 0.8621724247932434, + "learning_rate": 0.0009625801786059267, + "loss": 1.5621, + "step": 3929 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.7917286157608032, + "learning_rate": 0.0009623394030642507, + "loss": 1.5563, + "step": 3930 + }, + { + "epoch": 0.414662447257384, + "grad_norm": 0.7647761702537537, + "learning_rate": 0.0009620986037288858, + "loss": 1.5132, + "step": 3931 + }, + { + "epoch": 0.4147679324894515, + "grad_norm": 0.8932857513427734, + "learning_rate": 0.0009618577806268147, + "loss": 1.5242, + "step": 3932 + }, + { + "epoch": 0.414873417721519, + "grad_norm": 0.7989896535873413, + "learning_rate": 0.0009616169337850229, + "loss": 1.5341, + "step": 3933 + }, + { + "epoch": 0.41497890295358647, + "grad_norm": 0.7611507773399353, + "learning_rate": 0.0009613760632304985, + "loss": 1.5458, + "step": 3934 + }, + { + "epoch": 0.415084388185654, + "grad_norm": 1.0191787481307983, + "learning_rate": 0.0009611351689902321, + "loss": 1.5662, + "step": 3935 + }, + { + "epoch": 0.4151898734177215, + "grad_norm": 0.8283717036247253, + "learning_rate": 0.000960894251091217, + "loss": 1.5343, + "step": 3936 + }, + { + "epoch": 0.415295358649789, + "grad_norm": 0.8015774488449097, + "learning_rate": 0.0009606533095604499, + "loss": 1.5747, + "step": 3937 + }, + { + "epoch": 0.41540084388185655, + "grad_norm": 0.9598122239112854, + "learning_rate": 0.0009604123444249288, + "loss": 1.494, + "step": 3938 + }, + { + "epoch": 0.41550632911392404, + "grad_norm": 0.6751738786697388, + "learning_rate": 0.0009601713557116554, + "loss": 1.5508, + "step": 3939 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 1.1150704622268677, + "learning_rate": 0.0009599303434476334, + "loss": 1.5426, + "step": 3940 + }, + { + "epoch": 0.4157172995780591, + "grad_norm": 0.7765944004058838, + "learning_rate": 0.0009596893076598698, + "loss": 1.5424, + "step": 3941 + }, + { + "epoch": 0.4158227848101266, + "grad_norm": 0.8746556639671326, + "learning_rate": 0.0009594482483753736, + "loss": 1.5739, + "step": 3942 + }, + { + "epoch": 0.41592827004219407, + "grad_norm": 0.9627506136894226, + "learning_rate": 0.0009592071656211568, + "loss": 1.5368, + "step": 3943 + }, + { + "epoch": 0.4160337552742616, + "grad_norm": 0.7076307535171509, + "learning_rate": 0.0009589660594242338, + "loss": 1.5545, + "step": 3944 + }, + { + "epoch": 0.4161392405063291, + "grad_norm": 0.6761050224304199, + "learning_rate": 0.0009587249298116219, + "loss": 1.5136, + "step": 3945 + }, + { + "epoch": 0.4162447257383966, + "grad_norm": 0.7302618026733398, + "learning_rate": 0.0009584837768103408, + "loss": 1.5045, + "step": 3946 + }, + { + "epoch": 0.41635021097046415, + "grad_norm": 0.7844850420951843, + "learning_rate": 0.0009582426004474129, + "loss": 1.5691, + "step": 3947 + }, + { + "epoch": 0.41645569620253164, + "grad_norm": 0.6839796304702759, + "learning_rate": 0.0009580014007498634, + "loss": 1.5528, + "step": 3948 + }, + { + "epoch": 0.41656118143459914, + "grad_norm": 0.836275041103363, + "learning_rate": 0.0009577601777447194, + "loss": 1.522, + "step": 3949 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 1.1061640977859497, + "learning_rate": 0.0009575189314590118, + "loss": 1.5543, + "step": 3950 + }, + { + "epoch": 0.4167721518987342, + "grad_norm": 0.7771759629249573, + "learning_rate": 0.0009572776619197731, + "loss": 1.5622, + "step": 3951 + }, + { + "epoch": 0.41687763713080167, + "grad_norm": 0.8186155557632446, + "learning_rate": 0.0009570363691540387, + "loss": 1.5385, + "step": 3952 + }, + { + "epoch": 0.4169831223628692, + "grad_norm": 0.7742159962654114, + "learning_rate": 0.0009567950531888469, + "loss": 1.5727, + "step": 3953 + }, + { + "epoch": 0.4170886075949367, + "grad_norm": 0.6852231621742249, + "learning_rate": 0.0009565537140512381, + "loss": 1.5387, + "step": 3954 + }, + { + "epoch": 0.4171940928270042, + "grad_norm": 0.8609788417816162, + "learning_rate": 0.0009563123517682559, + "loss": 1.5355, + "step": 3955 + }, + { + "epoch": 0.41729957805907175, + "grad_norm": 0.7205260396003723, + "learning_rate": 0.0009560709663669456, + "loss": 1.512, + "step": 3956 + }, + { + "epoch": 0.41740506329113924, + "grad_norm": 0.8478295803070068, + "learning_rate": 0.0009558295578743559, + "loss": 1.5406, + "step": 3957 + }, + { + "epoch": 0.41751054852320674, + "grad_norm": 0.898260235786438, + "learning_rate": 0.0009555881263175381, + "loss": 1.571, + "step": 3958 + }, + { + "epoch": 0.4176160337552743, + "grad_norm": 0.6835172176361084, + "learning_rate": 0.0009553466717235456, + "loss": 1.497, + "step": 3959 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.6970818638801575, + "learning_rate": 0.0009551051941194346, + "loss": 1.5486, + "step": 3960 + }, + { + "epoch": 0.41782700421940927, + "grad_norm": 0.8452182412147522, + "learning_rate": 0.0009548636935322639, + "loss": 1.5094, + "step": 3961 + }, + { + "epoch": 0.4179324894514768, + "grad_norm": 0.7500672340393066, + "learning_rate": 0.0009546221699890945, + "loss": 1.5145, + "step": 3962 + }, + { + "epoch": 0.4180379746835443, + "grad_norm": 0.7015186548233032, + "learning_rate": 0.0009543806235169909, + "loss": 1.5146, + "step": 3963 + }, + { + "epoch": 0.4181434599156118, + "grad_norm": 0.8761023879051208, + "learning_rate": 0.0009541390541430192, + "loss": 1.5681, + "step": 3964 + }, + { + "epoch": 0.41824894514767935, + "grad_norm": 0.6663405895233154, + "learning_rate": 0.0009538974618942486, + "loss": 1.5594, + "step": 3965 + }, + { + "epoch": 0.41835443037974684, + "grad_norm": 0.809868574142456, + "learning_rate": 0.0009536558467977505, + "loss": 1.5225, + "step": 3966 + }, + { + "epoch": 0.41845991561181434, + "grad_norm": 0.7227051854133606, + "learning_rate": 0.0009534142088805994, + "loss": 1.5324, + "step": 3967 + }, + { + "epoch": 0.41856540084388183, + "grad_norm": 1.1289951801300049, + "learning_rate": 0.0009531725481698719, + "loss": 1.5451, + "step": 3968 + }, + { + "epoch": 0.4186708860759494, + "grad_norm": 0.8250858187675476, + "learning_rate": 0.0009529308646926473, + "loss": 1.5434, + "step": 3969 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.8826503157615662, + "learning_rate": 0.0009526891584760071, + "loss": 1.5115, + "step": 3970 + }, + { + "epoch": 0.41888185654008436, + "grad_norm": 1.1021182537078857, + "learning_rate": 0.0009524474295470362, + "loss": 1.5571, + "step": 3971 + }, + { + "epoch": 0.4189873417721519, + "grad_norm": 0.7184768915176392, + "learning_rate": 0.0009522056779328214, + "loss": 1.5302, + "step": 3972 + }, + { + "epoch": 0.4190928270042194, + "grad_norm": 1.3033256530761719, + "learning_rate": 0.0009519639036604522, + "loss": 1.5751, + "step": 3973 + }, + { + "epoch": 0.4191983122362869, + "grad_norm": 0.719514787197113, + "learning_rate": 0.0009517221067570204, + "loss": 1.5309, + "step": 3974 + }, + { + "epoch": 0.41930379746835444, + "grad_norm": 1.0539745092391968, + "learning_rate": 0.0009514802872496205, + "loss": 1.5394, + "step": 3975 + }, + { + "epoch": 0.41940928270042194, + "grad_norm": 0.7126771211624146, + "learning_rate": 0.0009512384451653499, + "loss": 1.539, + "step": 3976 + }, + { + "epoch": 0.41951476793248943, + "grad_norm": 0.758156418800354, + "learning_rate": 0.000950996580531308, + "loss": 1.5369, + "step": 3977 + }, + { + "epoch": 0.419620253164557, + "grad_norm": 0.6687197685241699, + "learning_rate": 0.000950754693374597, + "loss": 1.4969, + "step": 3978 + }, + { + "epoch": 0.41972573839662447, + "grad_norm": 0.8352579474449158, + "learning_rate": 0.0009505127837223215, + "loss": 1.5529, + "step": 3979 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.8847649693489075, + "learning_rate": 0.0009502708516015889, + "loss": 1.5726, + "step": 3980 + }, + { + "epoch": 0.4199367088607595, + "grad_norm": 0.7737507224082947, + "learning_rate": 0.0009500288970395085, + "loss": 1.5243, + "step": 3981 + }, + { + "epoch": 0.420042194092827, + "grad_norm": 0.745621919631958, + "learning_rate": 0.000949786920063193, + "loss": 1.546, + "step": 3982 + }, + { + "epoch": 0.4201476793248945, + "grad_norm": 0.9338630437850952, + "learning_rate": 0.0009495449206997568, + "loss": 1.5368, + "step": 3983 + }, + { + "epoch": 0.42025316455696204, + "grad_norm": 0.8532330393791199, + "learning_rate": 0.0009493028989763171, + "loss": 1.5397, + "step": 3984 + }, + { + "epoch": 0.42035864978902954, + "grad_norm": 0.743352472782135, + "learning_rate": 0.0009490608549199939, + "loss": 1.5272, + "step": 3985 + }, + { + "epoch": 0.42046413502109703, + "grad_norm": 1.0237423181533813, + "learning_rate": 0.0009488187885579092, + "loss": 1.522, + "step": 3986 + }, + { + "epoch": 0.4205696202531646, + "grad_norm": 0.8517845869064331, + "learning_rate": 0.000948576699917188, + "loss": 1.5077, + "step": 3987 + }, + { + "epoch": 0.42067510548523207, + "grad_norm": 0.716112494468689, + "learning_rate": 0.0009483345890249571, + "loss": 1.5375, + "step": 3988 + }, + { + "epoch": 0.42078059071729956, + "grad_norm": 0.7411909699440002, + "learning_rate": 0.0009480924559083468, + "loss": 1.5459, + "step": 3989 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.7164613604545593, + "learning_rate": 0.0009478503005944888, + "loss": 1.5419, + "step": 3990 + }, + { + "epoch": 0.4209915611814346, + "grad_norm": 0.6778056025505066, + "learning_rate": 0.0009476081231105183, + "loss": 1.5463, + "step": 3991 + }, + { + "epoch": 0.4210970464135021, + "grad_norm": 0.682576596736908, + "learning_rate": 0.0009473659234835722, + "loss": 1.5509, + "step": 3992 + }, + { + "epoch": 0.42120253164556964, + "grad_norm": 0.835035502910614, + "learning_rate": 0.00094712370174079, + "loss": 1.5484, + "step": 3993 + }, + { + "epoch": 0.42130801687763714, + "grad_norm": 0.7932168841362, + "learning_rate": 0.0009468814579093141, + "loss": 1.553, + "step": 3994 + }, + { + "epoch": 0.42141350210970463, + "grad_norm": 0.7312842011451721, + "learning_rate": 0.0009466391920162894, + "loss": 1.5394, + "step": 3995 + }, + { + "epoch": 0.4215189873417722, + "grad_norm": 0.7138736844062805, + "learning_rate": 0.0009463969040888624, + "loss": 1.5535, + "step": 3996 + }, + { + "epoch": 0.42162447257383967, + "grad_norm": 0.7114962935447693, + "learning_rate": 0.0009461545941541832, + "loss": 1.5653, + "step": 3997 + }, + { + "epoch": 0.42172995780590716, + "grad_norm": 0.7484792470932007, + "learning_rate": 0.0009459122622394033, + "loss": 1.5302, + "step": 3998 + }, + { + "epoch": 0.4218354430379747, + "grad_norm": 0.8209818005561829, + "learning_rate": 0.0009456699083716777, + "loss": 1.5563, + "step": 3999 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.6589393019676208, + "learning_rate": 0.0009454275325781632, + "loss": 1.5449, + "step": 4000 + }, + { + "epoch": 0.4220464135021097, + "grad_norm": 0.8350273370742798, + "learning_rate": 0.0009451851348860191, + "loss": 1.5265, + "step": 4001 + }, + { + "epoch": 0.4221518987341772, + "grad_norm": 0.7932733297348022, + "learning_rate": 0.0009449427153224076, + "loss": 1.5352, + "step": 4002 + }, + { + "epoch": 0.42225738396624474, + "grad_norm": 0.6880390644073486, + "learning_rate": 0.0009447002739144924, + "loss": 1.5067, + "step": 4003 + }, + { + "epoch": 0.42236286919831223, + "grad_norm": 1.2066669464111328, + "learning_rate": 0.0009444578106894408, + "loss": 1.5606, + "step": 4004 + }, + { + "epoch": 0.4224683544303797, + "grad_norm": 0.6697984337806702, + "learning_rate": 0.000944215325674422, + "loss": 1.5273, + "step": 4005 + }, + { + "epoch": 0.42257383966244727, + "grad_norm": 1.1991424560546875, + "learning_rate": 0.0009439728188966074, + "loss": 1.5608, + "step": 4006 + }, + { + "epoch": 0.42267932489451476, + "grad_norm": 0.9144265055656433, + "learning_rate": 0.0009437302903831712, + "loss": 1.5378, + "step": 4007 + }, + { + "epoch": 0.42278481012658226, + "grad_norm": 0.6536105871200562, + "learning_rate": 0.0009434877401612898, + "loss": 1.517, + "step": 4008 + }, + { + "epoch": 0.4228902953586498, + "grad_norm": 0.7993787527084351, + "learning_rate": 0.0009432451682581424, + "loss": 1.5295, + "step": 4009 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.6943527460098267, + "learning_rate": 0.0009430025747009104, + "loss": 1.561, + "step": 4010 + }, + { + "epoch": 0.4231012658227848, + "grad_norm": 0.7646738290786743, + "learning_rate": 0.0009427599595167776, + "loss": 1.5128, + "step": 4011 + }, + { + "epoch": 0.42320675105485234, + "grad_norm": 0.6669653058052063, + "learning_rate": 0.0009425173227329297, + "loss": 1.501, + "step": 4012 + }, + { + "epoch": 0.42331223628691983, + "grad_norm": 0.6767058968544006, + "learning_rate": 0.0009422746643765563, + "loss": 1.5228, + "step": 4013 + }, + { + "epoch": 0.4234177215189873, + "grad_norm": 0.7699718475341797, + "learning_rate": 0.0009420319844748476, + "loss": 1.5351, + "step": 4014 + }, + { + "epoch": 0.42352320675105487, + "grad_norm": 0.7943326830863953, + "learning_rate": 0.0009417892830549978, + "loss": 1.5579, + "step": 4015 + }, + { + "epoch": 0.42362869198312236, + "grad_norm": 0.6749067902565002, + "learning_rate": 0.0009415465601442023, + "loss": 1.5513, + "step": 4016 + }, + { + "epoch": 0.42373417721518986, + "grad_norm": 0.8677799701690674, + "learning_rate": 0.0009413038157696595, + "loss": 1.5214, + "step": 4017 + }, + { + "epoch": 0.4238396624472574, + "grad_norm": 0.6878500580787659, + "learning_rate": 0.0009410610499585705, + "loss": 1.5531, + "step": 4018 + }, + { + "epoch": 0.4239451476793249, + "grad_norm": 0.8919906616210938, + "learning_rate": 0.000940818262738138, + "loss": 1.5148, + "step": 4019 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.9379571676254272, + "learning_rate": 0.0009405754541355677, + "loss": 1.5747, + "step": 4020 + }, + { + "epoch": 0.42415611814345994, + "grad_norm": 0.6831204891204834, + "learning_rate": 0.0009403326241780674, + "loss": 1.5113, + "step": 4021 + }, + { + "epoch": 0.42426160337552743, + "grad_norm": 0.8475743532180786, + "learning_rate": 0.0009400897728928475, + "loss": 1.5587, + "step": 4022 + }, + { + "epoch": 0.4243670886075949, + "grad_norm": 0.7809279561042786, + "learning_rate": 0.0009398469003071207, + "loss": 1.5162, + "step": 4023 + }, + { + "epoch": 0.42447257383966247, + "grad_norm": 0.6452289819717407, + "learning_rate": 0.0009396040064481021, + "loss": 1.5371, + "step": 4024 + }, + { + "epoch": 0.42457805907172996, + "grad_norm": 0.793380081653595, + "learning_rate": 0.000939361091343009, + "loss": 1.5604, + "step": 4025 + }, + { + "epoch": 0.42468354430379746, + "grad_norm": 0.7871118187904358, + "learning_rate": 0.0009391181550190615, + "loss": 1.5496, + "step": 4026 + }, + { + "epoch": 0.424789029535865, + "grad_norm": 0.6912455558776855, + "learning_rate": 0.0009388751975034815, + "loss": 1.504, + "step": 4027 + }, + { + "epoch": 0.4248945147679325, + "grad_norm": 0.7093061208724976, + "learning_rate": 0.0009386322188234941, + "loss": 1.5359, + "step": 4028 + }, + { + "epoch": 0.425, + "grad_norm": 0.7101051211357117, + "learning_rate": 0.0009383892190063256, + "loss": 1.5248, + "step": 4029 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.7405291199684143, + "learning_rate": 0.0009381461980792061, + "loss": 1.5349, + "step": 4030 + }, + { + "epoch": 0.42521097046413503, + "grad_norm": 0.6671616435050964, + "learning_rate": 0.0009379031560693665, + "loss": 1.5203, + "step": 4031 + }, + { + "epoch": 0.4253164556962025, + "grad_norm": 0.882743239402771, + "learning_rate": 0.0009376600930040417, + "loss": 1.5705, + "step": 4032 + }, + { + "epoch": 0.42542194092827, + "grad_norm": 0.7401672601699829, + "learning_rate": 0.0009374170089104676, + "loss": 1.5504, + "step": 4033 + }, + { + "epoch": 0.42552742616033756, + "grad_norm": 0.6869709491729736, + "learning_rate": 0.000937173903815883, + "loss": 1.5387, + "step": 4034 + }, + { + "epoch": 0.42563291139240506, + "grad_norm": 0.6964125633239746, + "learning_rate": 0.0009369307777475293, + "loss": 1.5363, + "step": 4035 + }, + { + "epoch": 0.42573839662447255, + "grad_norm": 0.7812589406967163, + "learning_rate": 0.0009366876307326496, + "loss": 1.5293, + "step": 4036 + }, + { + "epoch": 0.4258438818565401, + "grad_norm": 0.6914496421813965, + "learning_rate": 0.0009364444627984902, + "loss": 1.5658, + "step": 4037 + }, + { + "epoch": 0.4259493670886076, + "grad_norm": 0.7036728858947754, + "learning_rate": 0.000936201273972299, + "loss": 1.5521, + "step": 4038 + }, + { + "epoch": 0.4260548523206751, + "grad_norm": 0.8942505121231079, + "learning_rate": 0.0009359580642813265, + "loss": 1.5143, + "step": 4039 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.7368879318237305, + "learning_rate": 0.0009357148337528256, + "loss": 1.5191, + "step": 4040 + }, + { + "epoch": 0.4262658227848101, + "grad_norm": 0.8612902760505676, + "learning_rate": 0.0009354715824140515, + "loss": 1.5358, + "step": 4041 + }, + { + "epoch": 0.4263713080168776, + "grad_norm": 1.1197867393493652, + "learning_rate": 0.0009352283102922619, + "loss": 1.5642, + "step": 4042 + }, + { + "epoch": 0.42647679324894516, + "grad_norm": 0.6966684460639954, + "learning_rate": 0.0009349850174147165, + "loss": 1.5168, + "step": 4043 + }, + { + "epoch": 0.42658227848101266, + "grad_norm": 0.8184093832969666, + "learning_rate": 0.0009347417038086772, + "loss": 1.53, + "step": 4044 + }, + { + "epoch": 0.42668776371308015, + "grad_norm": 1.0225011110305786, + "learning_rate": 0.000934498369501409, + "loss": 1.5858, + "step": 4045 + }, + { + "epoch": 0.4267932489451477, + "grad_norm": 0.6942309737205505, + "learning_rate": 0.0009342550145201786, + "loss": 1.5236, + "step": 4046 + }, + { + "epoch": 0.4268987341772152, + "grad_norm": 0.8458907604217529, + "learning_rate": 0.0009340116388922551, + "loss": 1.5353, + "step": 4047 + }, + { + "epoch": 0.4270042194092827, + "grad_norm": 0.9149383902549744, + "learning_rate": 0.0009337682426449097, + "loss": 1.5467, + "step": 4048 + }, + { + "epoch": 0.42710970464135023, + "grad_norm": 0.780382513999939, + "learning_rate": 0.0009335248258054162, + "loss": 1.5238, + "step": 4049 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.6904734373092651, + "learning_rate": 0.0009332813884010511, + "loss": 1.5457, + "step": 4050 + }, + { + "epoch": 0.4273206751054852, + "grad_norm": 1.0575125217437744, + "learning_rate": 0.0009330379304590924, + "loss": 1.5132, + "step": 4051 + }, + { + "epoch": 0.42742616033755276, + "grad_norm": 0.801130473613739, + "learning_rate": 0.000932794452006821, + "loss": 1.57, + "step": 4052 + }, + { + "epoch": 0.42753164556962026, + "grad_norm": 0.7167452573776245, + "learning_rate": 0.0009325509530715196, + "loss": 1.549, + "step": 4053 + }, + { + "epoch": 0.42763713080168775, + "grad_norm": 0.9416911005973816, + "learning_rate": 0.0009323074336804738, + "loss": 1.5415, + "step": 4054 + }, + { + "epoch": 0.4277426160337553, + "grad_norm": 0.79378342628479, + "learning_rate": 0.0009320638938609708, + "loss": 1.556, + "step": 4055 + }, + { + "epoch": 0.4278481012658228, + "grad_norm": 0.6710862517356873, + "learning_rate": 0.0009318203336403008, + "loss": 1.5211, + "step": 4056 + }, + { + "epoch": 0.4279535864978903, + "grad_norm": 0.8243035078048706, + "learning_rate": 0.0009315767530457556, + "loss": 1.524, + "step": 4057 + }, + { + "epoch": 0.42805907172995783, + "grad_norm": 0.6397964358329773, + "learning_rate": 0.0009313331521046299, + "loss": 1.553, + "step": 4058 + }, + { + "epoch": 0.4281645569620253, + "grad_norm": 1.1184732913970947, + "learning_rate": 0.0009310895308442202, + "loss": 1.5782, + "step": 4059 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.8219714164733887, + "learning_rate": 0.0009308458892918259, + "loss": 1.529, + "step": 4060 + }, + { + "epoch": 0.42837552742616036, + "grad_norm": 0.8935809135437012, + "learning_rate": 0.0009306022274747478, + "loss": 1.5578, + "step": 4061 + }, + { + "epoch": 0.42848101265822786, + "grad_norm": 0.9541922807693481, + "learning_rate": 0.0009303585454202892, + "loss": 1.5437, + "step": 4062 + }, + { + "epoch": 0.42858649789029535, + "grad_norm": 0.8427433371543884, + "learning_rate": 0.0009301148431557565, + "loss": 1.4932, + "step": 4063 + }, + { + "epoch": 0.4286919831223629, + "grad_norm": 1.5217996835708618, + "learning_rate": 0.0009298711207084575, + "loss": 1.5177, + "step": 4064 + }, + { + "epoch": 0.4287974683544304, + "grad_norm": 0.8037291169166565, + "learning_rate": 0.0009296273781057026, + "loss": 1.5791, + "step": 4065 + }, + { + "epoch": 0.4289029535864979, + "grad_norm": 1.6678203344345093, + "learning_rate": 0.0009293836153748039, + "loss": 1.5394, + "step": 4066 + }, + { + "epoch": 0.4290084388185654, + "grad_norm": 1.0613194704055786, + "learning_rate": 0.0009291398325430771, + "loss": 1.5371, + "step": 4067 + }, + { + "epoch": 0.4291139240506329, + "grad_norm": 1.5549988746643066, + "learning_rate": 0.0009288960296378386, + "loss": 1.5267, + "step": 4068 + }, + { + "epoch": 0.4292194092827004, + "grad_norm": 1.419244408607483, + "learning_rate": 0.0009286522066864078, + "loss": 1.5996, + "step": 4069 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 1.0520957708358765, + "learning_rate": 0.0009284083637161064, + "loss": 1.5514, + "step": 4070 + }, + { + "epoch": 0.42943037974683546, + "grad_norm": 0.9038794040679932, + "learning_rate": 0.0009281645007542584, + "loss": 1.5707, + "step": 4071 + }, + { + "epoch": 0.42953586497890295, + "grad_norm": 1.027587652206421, + "learning_rate": 0.0009279206178281895, + "loss": 1.5255, + "step": 4072 + }, + { + "epoch": 0.42964135021097044, + "grad_norm": 0.7100927233695984, + "learning_rate": 0.0009276767149652284, + "loss": 1.5168, + "step": 4073 + }, + { + "epoch": 0.429746835443038, + "grad_norm": 0.935554563999176, + "learning_rate": 0.0009274327921927054, + "loss": 1.5276, + "step": 4074 + }, + { + "epoch": 0.4298523206751055, + "grad_norm": 0.66031813621521, + "learning_rate": 0.0009271888495379529, + "loss": 1.5767, + "step": 4075 + }, + { + "epoch": 0.429957805907173, + "grad_norm": 0.8396720290184021, + "learning_rate": 0.0009269448870283067, + "loss": 1.5389, + "step": 4076 + }, + { + "epoch": 0.4300632911392405, + "grad_norm": 0.7592858672142029, + "learning_rate": 0.0009267009046911032, + "loss": 1.548, + "step": 4077 + }, + { + "epoch": 0.430168776371308, + "grad_norm": 0.7062767744064331, + "learning_rate": 0.0009264569025536825, + "loss": 1.5324, + "step": 4078 + }, + { + "epoch": 0.4302742616033755, + "grad_norm": 0.6893914937973022, + "learning_rate": 0.0009262128806433858, + "loss": 1.5516, + "step": 4079 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.7342845797538757, + "learning_rate": 0.0009259688389875574, + "loss": 1.519, + "step": 4080 + }, + { + "epoch": 0.43048523206751055, + "grad_norm": 0.6242480278015137, + "learning_rate": 0.000925724777613543, + "loss": 1.5389, + "step": 4081 + }, + { + "epoch": 0.43059071729957804, + "grad_norm": 0.7060614228248596, + "learning_rate": 0.0009254806965486909, + "loss": 1.563, + "step": 4082 + }, + { + "epoch": 0.4306962025316456, + "grad_norm": 0.6950621604919434, + "learning_rate": 0.0009252365958203518, + "loss": 1.5686, + "step": 4083 + }, + { + "epoch": 0.4308016877637131, + "grad_norm": 0.7070620059967041, + "learning_rate": 0.0009249924754558785, + "loss": 1.5017, + "step": 4084 + }, + { + "epoch": 0.4309071729957806, + "grad_norm": 0.7064811587333679, + "learning_rate": 0.0009247483354826255, + "loss": 1.519, + "step": 4085 + }, + { + "epoch": 0.4310126582278481, + "grad_norm": 0.7475478053092957, + "learning_rate": 0.0009245041759279502, + "loss": 1.5488, + "step": 4086 + }, + { + "epoch": 0.4311181434599156, + "grad_norm": 0.7164301872253418, + "learning_rate": 0.0009242599968192119, + "loss": 1.5503, + "step": 4087 + }, + { + "epoch": 0.4312236286919831, + "grad_norm": 0.7785665988922119, + "learning_rate": 0.000924015798183772, + "loss": 1.5257, + "step": 4088 + }, + { + "epoch": 0.43132911392405066, + "grad_norm": 0.6626662611961365, + "learning_rate": 0.0009237715800489942, + "loss": 1.5146, + "step": 4089 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.8460595011711121, + "learning_rate": 0.0009235273424422442, + "loss": 1.5136, + "step": 4090 + }, + { + "epoch": 0.43154008438818564, + "grad_norm": 0.6897493004798889, + "learning_rate": 0.0009232830853908904, + "loss": 1.5718, + "step": 4091 + }, + { + "epoch": 0.4316455696202532, + "grad_norm": 0.9650319218635559, + "learning_rate": 0.0009230388089223028, + "loss": 1.5385, + "step": 4092 + }, + { + "epoch": 0.4317510548523207, + "grad_norm": 0.8589827418327332, + "learning_rate": 0.0009227945130638537, + "loss": 1.5364, + "step": 4093 + }, + { + "epoch": 0.4318565400843882, + "grad_norm": 0.7425510883331299, + "learning_rate": 0.0009225501978429177, + "loss": 1.5473, + "step": 4094 + }, + { + "epoch": 0.4319620253164557, + "grad_norm": 0.8931043148040771, + "learning_rate": 0.0009223058632868719, + "loss": 1.534, + "step": 4095 + }, + { + "epoch": 0.4320675105485232, + "grad_norm": 0.7305726408958435, + "learning_rate": 0.0009220615094230946, + "loss": 1.5345, + "step": 4096 + }, + { + "epoch": 0.4321729957805907, + "grad_norm": 0.8190504908561707, + "learning_rate": 0.0009218171362789674, + "loss": 1.5253, + "step": 4097 + }, + { + "epoch": 0.43227848101265826, + "grad_norm": 0.7342987060546875, + "learning_rate": 0.0009215727438818733, + "loss": 1.5456, + "step": 4098 + }, + { + "epoch": 0.43238396624472575, + "grad_norm": 0.7308434844017029, + "learning_rate": 0.0009213283322591977, + "loss": 1.4883, + "step": 4099 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.8232442140579224, + "learning_rate": 0.0009210839014383282, + "loss": 1.5158, + "step": 4100 + }, + { + "epoch": 0.43259493670886073, + "grad_norm": 0.7591631412506104, + "learning_rate": 0.0009208394514466544, + "loss": 1.5238, + "step": 4101 + }, + { + "epoch": 0.4327004219409283, + "grad_norm": 0.6651302576065063, + "learning_rate": 0.0009205949823115681, + "loss": 1.5088, + "step": 4102 + }, + { + "epoch": 0.4328059071729958, + "grad_norm": 0.8679419755935669, + "learning_rate": 0.0009203504940604634, + "loss": 1.4663, + "step": 4103 + }, + { + "epoch": 0.43291139240506327, + "grad_norm": 0.9109071493148804, + "learning_rate": 0.0009201059867207366, + "loss": 1.5312, + "step": 4104 + }, + { + "epoch": 0.4330168776371308, + "grad_norm": 0.7218418717384338, + "learning_rate": 0.0009198614603197854, + "loss": 1.5131, + "step": 4105 + }, + { + "epoch": 0.4331223628691983, + "grad_norm": 0.9493518471717834, + "learning_rate": 0.0009196169148850108, + "loss": 1.5497, + "step": 4106 + }, + { + "epoch": 0.4332278481012658, + "grad_norm": 0.7528095841407776, + "learning_rate": 0.000919372350443815, + "loss": 1.5509, + "step": 4107 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.8055837750434875, + "learning_rate": 0.000919127767023603, + "loss": 1.5222, + "step": 4108 + }, + { + "epoch": 0.43343881856540084, + "grad_norm": 0.6562950611114502, + "learning_rate": 0.000918883164651781, + "loss": 1.5522, + "step": 4109 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.962735652923584, + "learning_rate": 0.0009186385433557584, + "loss": 1.5627, + "step": 4110 + }, + { + "epoch": 0.4336497890295359, + "grad_norm": 0.714625358581543, + "learning_rate": 0.0009183939031629462, + "loss": 1.5142, + "step": 4111 + }, + { + "epoch": 0.4337552742616034, + "grad_norm": 0.998435378074646, + "learning_rate": 0.0009181492441007577, + "loss": 1.498, + "step": 4112 + }, + { + "epoch": 0.43386075949367087, + "grad_norm": 0.9519908428192139, + "learning_rate": 0.0009179045661966075, + "loss": 1.5395, + "step": 4113 + }, + { + "epoch": 0.4339662447257384, + "grad_norm": 0.7887759804725647, + "learning_rate": 0.0009176598694779134, + "loss": 1.531, + "step": 4114 + }, + { + "epoch": 0.4340717299578059, + "grad_norm": 1.2048386335372925, + "learning_rate": 0.0009174151539720953, + "loss": 1.5357, + "step": 4115 + }, + { + "epoch": 0.4341772151898734, + "grad_norm": 0.7586068511009216, + "learning_rate": 0.0009171704197065741, + "loss": 1.5287, + "step": 4116 + }, + { + "epoch": 0.43428270042194095, + "grad_norm": 1.415606141090393, + "learning_rate": 0.0009169256667087738, + "loss": 1.5407, + "step": 4117 + }, + { + "epoch": 0.43438818565400844, + "grad_norm": 0.8212902545928955, + "learning_rate": 0.0009166808950061202, + "loss": 1.5287, + "step": 4118 + }, + { + "epoch": 0.43449367088607593, + "grad_norm": 1.6451188325881958, + "learning_rate": 0.0009164361046260412, + "loss": 1.5625, + "step": 4119 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 1.017853856086731, + "learning_rate": 0.0009161912955959668, + "loss": 1.5627, + "step": 4120 + }, + { + "epoch": 0.434704641350211, + "grad_norm": 1.4888157844543457, + "learning_rate": 0.0009159464679433289, + "loss": 1.5412, + "step": 4121 + }, + { + "epoch": 0.43481012658227847, + "grad_norm": 1.4266104698181152, + "learning_rate": 0.0009157016216955618, + "loss": 1.5223, + "step": 4122 + }, + { + "epoch": 0.434915611814346, + "grad_norm": 1.2170336246490479, + "learning_rate": 0.0009154567568801019, + "loss": 1.5031, + "step": 4123 + }, + { + "epoch": 0.4350210970464135, + "grad_norm": 1.0767579078674316, + "learning_rate": 0.0009152118735243871, + "loss": 1.5312, + "step": 4124 + }, + { + "epoch": 0.435126582278481, + "grad_norm": 0.9030364155769348, + "learning_rate": 0.0009149669716558582, + "loss": 1.5125, + "step": 4125 + }, + { + "epoch": 0.43523206751054855, + "grad_norm": 0.8334112167358398, + "learning_rate": 0.0009147220513019577, + "loss": 1.5236, + "step": 4126 + }, + { + "epoch": 0.43533755274261604, + "grad_norm": 0.7224156260490417, + "learning_rate": 0.0009144771124901295, + "loss": 1.5095, + "step": 4127 + }, + { + "epoch": 0.43544303797468353, + "grad_norm": 0.7775344848632812, + "learning_rate": 0.000914232155247821, + "loss": 1.5329, + "step": 4128 + }, + { + "epoch": 0.4355485232067511, + "grad_norm": 0.6730276942253113, + "learning_rate": 0.0009139871796024807, + "loss": 1.5375, + "step": 4129 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.7603509426116943, + "learning_rate": 0.000913742185581559, + "loss": 1.5388, + "step": 4130 + }, + { + "epoch": 0.43575949367088607, + "grad_norm": 0.676482081413269, + "learning_rate": 0.0009134971732125088, + "loss": 1.5353, + "step": 4131 + }, + { + "epoch": 0.43586497890295356, + "grad_norm": 0.7391741871833801, + "learning_rate": 0.0009132521425227852, + "loss": 1.5066, + "step": 4132 + }, + { + "epoch": 0.4359704641350211, + "grad_norm": 0.8075409531593323, + "learning_rate": 0.0009130070935398451, + "loss": 1.5434, + "step": 4133 + }, + { + "epoch": 0.4360759493670886, + "grad_norm": 0.9396023154258728, + "learning_rate": 0.0009127620262911473, + "loss": 1.5517, + "step": 4134 + }, + { + "epoch": 0.4361814345991561, + "grad_norm": 0.6939293742179871, + "learning_rate": 0.0009125169408041526, + "loss": 1.503, + "step": 4135 + }, + { + "epoch": 0.43628691983122364, + "grad_norm": 0.9087029695510864, + "learning_rate": 0.0009122718371063247, + "loss": 1.5111, + "step": 4136 + }, + { + "epoch": 0.43639240506329113, + "grad_norm": 0.8820258378982544, + "learning_rate": 0.0009120267152251281, + "loss": 1.5187, + "step": 4137 + }, + { + "epoch": 0.4364978902953586, + "grad_norm": 0.6564604640007019, + "learning_rate": 0.0009117815751880301, + "loss": 1.5273, + "step": 4138 + }, + { + "epoch": 0.4366033755274262, + "grad_norm": 0.7696422934532166, + "learning_rate": 0.0009115364170225, + "loss": 1.5369, + "step": 4139 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.741646945476532, + "learning_rate": 0.0009112912407560086, + "loss": 1.5186, + "step": 4140 + }, + { + "epoch": 0.43681434599156116, + "grad_norm": 0.6971184611320496, + "learning_rate": 0.0009110460464160295, + "loss": 1.5, + "step": 4141 + }, + { + "epoch": 0.4369198312236287, + "grad_norm": 0.665569007396698, + "learning_rate": 0.000910800834030038, + "loss": 1.5268, + "step": 4142 + }, + { + "epoch": 0.4370253164556962, + "grad_norm": 0.6328557729721069, + "learning_rate": 0.0009105556036255113, + "loss": 1.536, + "step": 4143 + }, + { + "epoch": 0.4371308016877637, + "grad_norm": 0.6817472577095032, + "learning_rate": 0.0009103103552299283, + "loss": 1.5392, + "step": 4144 + }, + { + "epoch": 0.43723628691983124, + "grad_norm": 0.6218203902244568, + "learning_rate": 0.0009100650888707709, + "loss": 1.5136, + "step": 4145 + }, + { + "epoch": 0.43734177215189873, + "grad_norm": 0.6865107417106628, + "learning_rate": 0.000909819804575522, + "loss": 1.5136, + "step": 4146 + }, + { + "epoch": 0.4374472573839662, + "grad_norm": 0.6362533569335938, + "learning_rate": 0.0009095745023716671, + "loss": 1.5424, + "step": 4147 + }, + { + "epoch": 0.4375527426160338, + "grad_norm": 0.8564329147338867, + "learning_rate": 0.0009093291822866933, + "loss": 1.5721, + "step": 4148 + }, + { + "epoch": 0.43765822784810127, + "grad_norm": 0.8392696380615234, + "learning_rate": 0.0009090838443480903, + "loss": 1.5211, + "step": 4149 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.7195979952812195, + "learning_rate": 0.0009088384885833495, + "loss": 1.5243, + "step": 4150 + }, + { + "epoch": 0.4378691983122363, + "grad_norm": 0.6695921421051025, + "learning_rate": 0.0009085931150199638, + "loss": 1.5347, + "step": 4151 + }, + { + "epoch": 0.4379746835443038, + "grad_norm": 0.7176480889320374, + "learning_rate": 0.0009083477236854287, + "loss": 1.5492, + "step": 4152 + }, + { + "epoch": 0.4380801687763713, + "grad_norm": 0.6480945944786072, + "learning_rate": 0.0009081023146072414, + "loss": 1.5412, + "step": 4153 + }, + { + "epoch": 0.43818565400843884, + "grad_norm": 0.790723979473114, + "learning_rate": 0.0009078568878129018, + "loss": 1.4812, + "step": 4154 + }, + { + "epoch": 0.43829113924050633, + "grad_norm": 0.7726647853851318, + "learning_rate": 0.0009076114433299107, + "loss": 1.5181, + "step": 4155 + }, + { + "epoch": 0.4383966244725738, + "grad_norm": 0.719828188419342, + "learning_rate": 0.0009073659811857712, + "loss": 1.5427, + "step": 4156 + }, + { + "epoch": 0.4385021097046414, + "grad_norm": 0.8236888647079468, + "learning_rate": 0.0009071205014079888, + "loss": 1.5427, + "step": 4157 + }, + { + "epoch": 0.43860759493670887, + "grad_norm": 0.8881818056106567, + "learning_rate": 0.0009068750040240709, + "loss": 1.5023, + "step": 4158 + }, + { + "epoch": 0.43871308016877636, + "grad_norm": 0.7540772557258606, + "learning_rate": 0.0009066294890615266, + "loss": 1.5204, + "step": 4159 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.7475396990776062, + "learning_rate": 0.000906383956547867, + "loss": 1.5154, + "step": 4160 + }, + { + "epoch": 0.4389240506329114, + "grad_norm": 0.8989144563674927, + "learning_rate": 0.0009061384065106051, + "loss": 1.5263, + "step": 4161 + }, + { + "epoch": 0.4390295358649789, + "grad_norm": 0.8788692355155945, + "learning_rate": 0.0009058928389772564, + "loss": 1.5439, + "step": 4162 + }, + { + "epoch": 0.43913502109704644, + "grad_norm": 0.7350172400474548, + "learning_rate": 0.0009056472539753377, + "loss": 1.5359, + "step": 4163 + }, + { + "epoch": 0.43924050632911393, + "grad_norm": 0.7486791610717773, + "learning_rate": 0.0009054016515323679, + "loss": 1.4862, + "step": 4164 + }, + { + "epoch": 0.4393459915611814, + "grad_norm": 0.7470240592956543, + "learning_rate": 0.0009051560316758684, + "loss": 1.5304, + "step": 4165 + }, + { + "epoch": 0.4394514767932489, + "grad_norm": 0.6985856890678406, + "learning_rate": 0.0009049103944333616, + "loss": 1.5143, + "step": 4166 + }, + { + "epoch": 0.43955696202531647, + "grad_norm": 0.724311888217926, + "learning_rate": 0.0009046647398323728, + "loss": 1.513, + "step": 4167 + }, + { + "epoch": 0.43966244725738396, + "grad_norm": 0.9982399344444275, + "learning_rate": 0.0009044190679004286, + "loss": 1.5309, + "step": 4168 + }, + { + "epoch": 0.43976793248945145, + "grad_norm": 1.0291616916656494, + "learning_rate": 0.0009041733786650578, + "loss": 1.5311, + "step": 4169 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.6885634064674377, + "learning_rate": 0.0009039276721537915, + "loss": 1.5585, + "step": 4170 + }, + { + "epoch": 0.4399789029535865, + "grad_norm": 0.8191965222358704, + "learning_rate": 0.0009036819483941614, + "loss": 1.5298, + "step": 4171 + }, + { + "epoch": 0.440084388185654, + "grad_norm": 0.701108992099762, + "learning_rate": 0.0009034362074137032, + "loss": 1.5215, + "step": 4172 + }, + { + "epoch": 0.44018987341772153, + "grad_norm": 0.721072793006897, + "learning_rate": 0.0009031904492399526, + "loss": 1.5593, + "step": 4173 + }, + { + "epoch": 0.440295358649789, + "grad_norm": 0.7433125972747803, + "learning_rate": 0.0009029446739004483, + "loss": 1.5267, + "step": 4174 + }, + { + "epoch": 0.4404008438818565, + "grad_norm": 0.7142111659049988, + "learning_rate": 0.0009026988814227308, + "loss": 1.5067, + "step": 4175 + }, + { + "epoch": 0.44050632911392407, + "grad_norm": 0.6537703275680542, + "learning_rate": 0.0009024530718343418, + "loss": 1.5022, + "step": 4176 + }, + { + "epoch": 0.44061181434599156, + "grad_norm": 0.7382895946502686, + "learning_rate": 0.0009022072451628263, + "loss": 1.5374, + "step": 4177 + }, + { + "epoch": 0.44071729957805905, + "grad_norm": 0.6534526348114014, + "learning_rate": 0.0009019614014357298, + "loss": 1.5036, + "step": 4178 + }, + { + "epoch": 0.4408227848101266, + "grad_norm": 0.6759588718414307, + "learning_rate": 0.0009017155406806006, + "loss": 1.5427, + "step": 4179 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.6458433866500854, + "learning_rate": 0.0009014696629249886, + "loss": 1.506, + "step": 4180 + }, + { + "epoch": 0.4410337552742616, + "grad_norm": 0.7481001019477844, + "learning_rate": 0.0009012237681964454, + "loss": 1.5442, + "step": 4181 + }, + { + "epoch": 0.44113924050632913, + "grad_norm": 0.6950808167457581, + "learning_rate": 0.0009009778565225251, + "loss": 1.53, + "step": 4182 + }, + { + "epoch": 0.4412447257383966, + "grad_norm": 0.902151882648468, + "learning_rate": 0.000900731927930783, + "loss": 1.5124, + "step": 4183 + }, + { + "epoch": 0.4413502109704641, + "grad_norm": 0.7536179423332214, + "learning_rate": 0.0009004859824487769, + "loss": 1.5291, + "step": 4184 + }, + { + "epoch": 0.44145569620253167, + "grad_norm": 0.7349167466163635, + "learning_rate": 0.0009002400201040659, + "loss": 1.5298, + "step": 4185 + }, + { + "epoch": 0.44156118143459916, + "grad_norm": 0.7043653130531311, + "learning_rate": 0.0008999940409242115, + "loss": 1.5146, + "step": 4186 + }, + { + "epoch": 0.44166666666666665, + "grad_norm": 0.7647130489349365, + "learning_rate": 0.0008997480449367771, + "loss": 1.555, + "step": 4187 + }, + { + "epoch": 0.4417721518987342, + "grad_norm": 0.7915642857551575, + "learning_rate": 0.0008995020321693274, + "loss": 1.5474, + "step": 4188 + }, + { + "epoch": 0.4418776371308017, + "grad_norm": 0.8356932997703552, + "learning_rate": 0.0008992560026494294, + "loss": 1.5367, + "step": 4189 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.7224768996238708, + "learning_rate": 0.0008990099564046522, + "loss": 1.5403, + "step": 4190 + }, + { + "epoch": 0.44208860759493673, + "grad_norm": 0.8281840682029724, + "learning_rate": 0.0008987638934625662, + "loss": 1.5212, + "step": 4191 + }, + { + "epoch": 0.4421940928270042, + "grad_norm": 0.851258397102356, + "learning_rate": 0.0008985178138507441, + "loss": 1.5451, + "step": 4192 + }, + { + "epoch": 0.4422995780590717, + "grad_norm": 0.9512465000152588, + "learning_rate": 0.0008982717175967606, + "loss": 1.5146, + "step": 4193 + }, + { + "epoch": 0.44240506329113927, + "grad_norm": 0.7850602269172668, + "learning_rate": 0.0008980256047281919, + "loss": 1.5717, + "step": 4194 + }, + { + "epoch": 0.44251054852320676, + "grad_norm": 0.9189220666885376, + "learning_rate": 0.0008977794752726159, + "loss": 1.5339, + "step": 4195 + }, + { + "epoch": 0.44261603375527425, + "grad_norm": 0.8519622087478638, + "learning_rate": 0.0008975333292576125, + "loss": 1.499, + "step": 4196 + }, + { + "epoch": 0.44272151898734174, + "grad_norm": 0.8132935762405396, + "learning_rate": 0.0008972871667107643, + "loss": 1.5299, + "step": 4197 + }, + { + "epoch": 0.4428270042194093, + "grad_norm": 0.9802167415618896, + "learning_rate": 0.0008970409876596545, + "loss": 1.5461, + "step": 4198 + }, + { + "epoch": 0.4429324894514768, + "grad_norm": 0.7223727703094482, + "learning_rate": 0.0008967947921318689, + "loss": 1.5042, + "step": 4199 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.7903794050216675, + "learning_rate": 0.0008965485801549946, + "loss": 1.5716, + "step": 4200 + }, + { + "epoch": 0.4431434599156118, + "grad_norm": 0.897591233253479, + "learning_rate": 0.0008963023517566213, + "loss": 1.5304, + "step": 4201 + }, + { + "epoch": 0.4432489451476793, + "grad_norm": 0.7039647102355957, + "learning_rate": 0.0008960561069643402, + "loss": 1.5305, + "step": 4202 + }, + { + "epoch": 0.4433544303797468, + "grad_norm": 0.918906569480896, + "learning_rate": 0.0008958098458057436, + "loss": 1.4999, + "step": 4203 + }, + { + "epoch": 0.44345991561181436, + "grad_norm": 0.6651415228843689, + "learning_rate": 0.000895563568308427, + "loss": 1.5345, + "step": 4204 + }, + { + "epoch": 0.44356540084388185, + "grad_norm": 0.9457806944847107, + "learning_rate": 0.0008953172744999865, + "loss": 1.4937, + "step": 4205 + }, + { + "epoch": 0.44367088607594934, + "grad_norm": 0.841572105884552, + "learning_rate": 0.000895070964408021, + "loss": 1.5652, + "step": 4206 + }, + { + "epoch": 0.4437763713080169, + "grad_norm": 0.7042601108551025, + "learning_rate": 0.0008948246380601303, + "loss": 1.4909, + "step": 4207 + }, + { + "epoch": 0.4438818565400844, + "grad_norm": 0.9004464149475098, + "learning_rate": 0.000894578295483917, + "loss": 1.5207, + "step": 4208 + }, + { + "epoch": 0.4439873417721519, + "grad_norm": 0.7342594265937805, + "learning_rate": 0.0008943319367069844, + "loss": 1.5195, + "step": 4209 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.7550455331802368, + "learning_rate": 0.000894085561756939, + "loss": 1.5325, + "step": 4210 + }, + { + "epoch": 0.4441983122362869, + "grad_norm": 0.8360064029693604, + "learning_rate": 0.0008938391706613878, + "loss": 1.5542, + "step": 4211 + }, + { + "epoch": 0.4443037974683544, + "grad_norm": 0.8406584858894348, + "learning_rate": 0.0008935927634479403, + "loss": 1.5186, + "step": 4212 + }, + { + "epoch": 0.44440928270042196, + "grad_norm": 0.7527983784675598, + "learning_rate": 0.0008933463401442073, + "loss": 1.5415, + "step": 4213 + }, + { + "epoch": 0.44451476793248945, + "grad_norm": 0.7863828539848328, + "learning_rate": 0.0008930999007778025, + "loss": 1.5138, + "step": 4214 + }, + { + "epoch": 0.44462025316455694, + "grad_norm": 0.9481515288352966, + "learning_rate": 0.0008928534453763402, + "loss": 1.5229, + "step": 4215 + }, + { + "epoch": 0.4447257383966245, + "grad_norm": 1.0041327476501465, + "learning_rate": 0.0008926069739674369, + "loss": 1.5324, + "step": 4216 + }, + { + "epoch": 0.444831223628692, + "grad_norm": 0.6872608065605164, + "learning_rate": 0.000892360486578711, + "loss": 1.522, + "step": 4217 + }, + { + "epoch": 0.4449367088607595, + "grad_norm": 0.9138922095298767, + "learning_rate": 0.0008921139832377829, + "loss": 1.4964, + "step": 4218 + }, + { + "epoch": 0.445042194092827, + "grad_norm": 0.7073841691017151, + "learning_rate": 0.0008918674639722742, + "loss": 1.5435, + "step": 4219 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.7584347724914551, + "learning_rate": 0.0008916209288098088, + "loss": 1.542, + "step": 4220 + }, + { + "epoch": 0.445253164556962, + "grad_norm": 0.8150386214256287, + "learning_rate": 0.0008913743777780122, + "loss": 1.5306, + "step": 4221 + }, + { + "epoch": 0.44535864978902956, + "grad_norm": 0.7023625373840332, + "learning_rate": 0.0008911278109045114, + "loss": 1.5382, + "step": 4222 + }, + { + "epoch": 0.44546413502109705, + "grad_norm": 0.7820265293121338, + "learning_rate": 0.0008908812282169359, + "loss": 1.5471, + "step": 4223 + }, + { + "epoch": 0.44556962025316454, + "grad_norm": 0.7114424705505371, + "learning_rate": 0.0008906346297429161, + "loss": 1.5015, + "step": 4224 + }, + { + "epoch": 0.4456751054852321, + "grad_norm": 0.8255308866500854, + "learning_rate": 0.000890388015510085, + "loss": 1.521, + "step": 4225 + }, + { + "epoch": 0.4457805907172996, + "grad_norm": 1.0862019062042236, + "learning_rate": 0.0008901413855460764, + "loss": 1.5099, + "step": 4226 + }, + { + "epoch": 0.4458860759493671, + "grad_norm": 0.6814942955970764, + "learning_rate": 0.0008898947398785271, + "loss": 1.5147, + "step": 4227 + }, + { + "epoch": 0.4459915611814346, + "grad_norm": 1.023859977722168, + "learning_rate": 0.0008896480785350743, + "loss": 1.5481, + "step": 4228 + }, + { + "epoch": 0.4460970464135021, + "grad_norm": 0.696678876876831, + "learning_rate": 0.0008894014015433582, + "loss": 1.5147, + "step": 4229 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 1.1463165283203125, + "learning_rate": 0.0008891547089310198, + "loss": 1.5204, + "step": 4230 + }, + { + "epoch": 0.4463080168776371, + "grad_norm": 1.0159375667572021, + "learning_rate": 0.0008889080007257024, + "loss": 1.5791, + "step": 4231 + }, + { + "epoch": 0.44641350210970465, + "grad_norm": 0.682907223701477, + "learning_rate": 0.0008886612769550508, + "loss": 1.5263, + "step": 4232 + }, + { + "epoch": 0.44651898734177214, + "grad_norm": 1.2084293365478516, + "learning_rate": 0.0008884145376467119, + "loss": 1.5309, + "step": 4233 + }, + { + "epoch": 0.44662447257383964, + "grad_norm": 0.6993973255157471, + "learning_rate": 0.0008881677828283337, + "loss": 1.5616, + "step": 4234 + }, + { + "epoch": 0.4467299578059072, + "grad_norm": 1.143420934677124, + "learning_rate": 0.0008879210125275664, + "loss": 1.5251, + "step": 4235 + }, + { + "epoch": 0.4468354430379747, + "grad_norm": 0.8119604587554932, + "learning_rate": 0.000887674226772062, + "loss": 1.5014, + "step": 4236 + }, + { + "epoch": 0.44694092827004217, + "grad_norm": 1.0074689388275146, + "learning_rate": 0.000887427425589474, + "loss": 1.536, + "step": 4237 + }, + { + "epoch": 0.4470464135021097, + "grad_norm": 1.2840533256530762, + "learning_rate": 0.0008871806090074577, + "loss": 1.4777, + "step": 4238 + }, + { + "epoch": 0.4471518987341772, + "grad_norm": 0.7368476986885071, + "learning_rate": 0.0008869337770536699, + "loss": 1.5117, + "step": 4239 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 1.2458182573318481, + "learning_rate": 0.0008866869297557699, + "loss": 1.5074, + "step": 4240 + }, + { + "epoch": 0.44736286919831225, + "grad_norm": 0.6524723768234253, + "learning_rate": 0.0008864400671414177, + "loss": 1.5112, + "step": 4241 + }, + { + "epoch": 0.44746835443037974, + "grad_norm": 1.1020970344543457, + "learning_rate": 0.0008861931892382756, + "loss": 1.5386, + "step": 4242 + }, + { + "epoch": 0.44757383966244724, + "grad_norm": 0.7302436828613281, + "learning_rate": 0.0008859462960740076, + "loss": 1.5358, + "step": 4243 + }, + { + "epoch": 0.4476793248945148, + "grad_norm": 0.896528959274292, + "learning_rate": 0.000885699387676279, + "loss": 1.5344, + "step": 4244 + }, + { + "epoch": 0.4477848101265823, + "grad_norm": 0.7025715708732605, + "learning_rate": 0.0008854524640727575, + "loss": 1.5611, + "step": 4245 + }, + { + "epoch": 0.44789029535864977, + "grad_norm": 1.24149751663208, + "learning_rate": 0.0008852055252911121, + "loss": 1.5053, + "step": 4246 + }, + { + "epoch": 0.4479957805907173, + "grad_norm": 0.8295166492462158, + "learning_rate": 0.0008849585713590134, + "loss": 1.4751, + "step": 4247 + }, + { + "epoch": 0.4481012658227848, + "grad_norm": 1.3127480745315552, + "learning_rate": 0.0008847116023041336, + "loss": 1.5451, + "step": 4248 + }, + { + "epoch": 0.4482067510548523, + "grad_norm": 0.8113483190536499, + "learning_rate": 0.0008844646181541472, + "loss": 1.5474, + "step": 4249 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.8494351506233215, + "learning_rate": 0.0008842176189367299, + "loss": 1.5331, + "step": 4250 + }, + { + "epoch": 0.44841772151898734, + "grad_norm": 0.6708007454872131, + "learning_rate": 0.000883970604679559, + "loss": 1.5195, + "step": 4251 + }, + { + "epoch": 0.44852320675105484, + "grad_norm": 1.0595104694366455, + "learning_rate": 0.0008837235754103136, + "loss": 1.5444, + "step": 4252 + }, + { + "epoch": 0.4486286919831224, + "grad_norm": 0.8287375569343567, + "learning_rate": 0.000883476531156675, + "loss": 1.5294, + "step": 4253 + }, + { + "epoch": 0.4487341772151899, + "grad_norm": 0.8216384053230286, + "learning_rate": 0.0008832294719463256, + "loss": 1.5236, + "step": 4254 + }, + { + "epoch": 0.44883966244725737, + "grad_norm": 0.637876570224762, + "learning_rate": 0.0008829823978069494, + "loss": 1.5615, + "step": 4255 + }, + { + "epoch": 0.4489451476793249, + "grad_norm": 0.793632984161377, + "learning_rate": 0.0008827353087662326, + "loss": 1.5611, + "step": 4256 + }, + { + "epoch": 0.4490506329113924, + "grad_norm": 0.7366625070571899, + "learning_rate": 0.0008824882048518622, + "loss": 1.4911, + "step": 4257 + }, + { + "epoch": 0.4491561181434599, + "grad_norm": 0.9358729124069214, + "learning_rate": 0.0008822410860915281, + "loss": 1.5239, + "step": 4258 + }, + { + "epoch": 0.44926160337552745, + "grad_norm": 0.7090138792991638, + "learning_rate": 0.0008819939525129207, + "loss": 1.5544, + "step": 4259 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.993075966835022, + "learning_rate": 0.0008817468041437329, + "loss": 1.5103, + "step": 4260 + }, + { + "epoch": 0.44947257383966244, + "grad_norm": 0.8351815342903137, + "learning_rate": 0.0008814996410116587, + "loss": 1.5417, + "step": 4261 + }, + { + "epoch": 0.44957805907173, + "grad_norm": 0.8179731369018555, + "learning_rate": 0.0008812524631443938, + "loss": 1.5373, + "step": 4262 + }, + { + "epoch": 0.4496835443037975, + "grad_norm": 0.9118608236312866, + "learning_rate": 0.0008810052705696363, + "loss": 1.4933, + "step": 4263 + }, + { + "epoch": 0.44978902953586497, + "grad_norm": 0.7403278350830078, + "learning_rate": 0.0008807580633150848, + "loss": 1.5455, + "step": 4264 + }, + { + "epoch": 0.44989451476793246, + "grad_norm": 0.7112175822257996, + "learning_rate": 0.0008805108414084401, + "loss": 1.5138, + "step": 4265 + }, + { + "epoch": 0.45, + "grad_norm": 0.8701838850975037, + "learning_rate": 0.0008802636048774052, + "loss": 1.5259, + "step": 4266 + }, + { + "epoch": 0.4501054852320675, + "grad_norm": 0.7100674510002136, + "learning_rate": 0.0008800163537496837, + "loss": 1.4839, + "step": 4267 + }, + { + "epoch": 0.450210970464135, + "grad_norm": 0.8005573153495789, + "learning_rate": 0.0008797690880529813, + "loss": 1.5097, + "step": 4268 + }, + { + "epoch": 0.45031645569620254, + "grad_norm": 0.754695475101471, + "learning_rate": 0.0008795218078150056, + "loss": 1.5064, + "step": 4269 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.7928927540779114, + "learning_rate": 0.0008792745130634654, + "loss": 1.542, + "step": 4270 + }, + { + "epoch": 0.45052742616033753, + "grad_norm": 0.7881571054458618, + "learning_rate": 0.0008790272038260715, + "loss": 1.4847, + "step": 4271 + }, + { + "epoch": 0.4506329113924051, + "grad_norm": 0.6869910359382629, + "learning_rate": 0.000878779880130536, + "loss": 1.5212, + "step": 4272 + }, + { + "epoch": 0.45073839662447257, + "grad_norm": 0.73291015625, + "learning_rate": 0.0008785325420045727, + "loss": 1.52, + "step": 4273 + }, + { + "epoch": 0.45084388185654006, + "grad_norm": 0.7089412808418274, + "learning_rate": 0.0008782851894758971, + "loss": 1.519, + "step": 4274 + }, + { + "epoch": 0.4509493670886076, + "grad_norm": 0.7512921690940857, + "learning_rate": 0.0008780378225722264, + "loss": 1.5084, + "step": 4275 + }, + { + "epoch": 0.4510548523206751, + "grad_norm": 0.815731942653656, + "learning_rate": 0.0008777904413212794, + "loss": 1.5057, + "step": 4276 + }, + { + "epoch": 0.4511603375527426, + "grad_norm": 1.0946261882781982, + "learning_rate": 0.0008775430457507759, + "loss": 1.5441, + "step": 4277 + }, + { + "epoch": 0.45126582278481014, + "grad_norm": 0.6919004917144775, + "learning_rate": 0.0008772956358884383, + "loss": 1.5063, + "step": 4278 + }, + { + "epoch": 0.45137130801687764, + "grad_norm": 1.180458664894104, + "learning_rate": 0.0008770482117619901, + "loss": 1.5032, + "step": 4279 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.7580441236495972, + "learning_rate": 0.0008768007733991561, + "loss": 1.5554, + "step": 4280 + }, + { + "epoch": 0.4515822784810127, + "grad_norm": 0.7548078894615173, + "learning_rate": 0.0008765533208276632, + "loss": 1.5113, + "step": 4281 + }, + { + "epoch": 0.45168776371308017, + "grad_norm": 0.6802889108657837, + "learning_rate": 0.0008763058540752396, + "loss": 1.5643, + "step": 4282 + }, + { + "epoch": 0.45179324894514766, + "grad_norm": 0.9072362780570984, + "learning_rate": 0.0008760583731696151, + "loss": 1.531, + "step": 4283 + }, + { + "epoch": 0.4518987341772152, + "grad_norm": 1.0625416040420532, + "learning_rate": 0.0008758108781385216, + "loss": 1.5353, + "step": 4284 + }, + { + "epoch": 0.4520042194092827, + "grad_norm": 0.7355237603187561, + "learning_rate": 0.0008755633690096918, + "loss": 1.519, + "step": 4285 + }, + { + "epoch": 0.4521097046413502, + "grad_norm": 0.753156840801239, + "learning_rate": 0.0008753158458108604, + "loss": 1.497, + "step": 4286 + }, + { + "epoch": 0.45221518987341774, + "grad_norm": 0.8688989877700806, + "learning_rate": 0.0008750683085697632, + "loss": 1.5097, + "step": 4287 + }, + { + "epoch": 0.45232067510548524, + "grad_norm": 0.7377491593360901, + "learning_rate": 0.0008748207573141388, + "loss": 1.558, + "step": 4288 + }, + { + "epoch": 0.45242616033755273, + "grad_norm": 1.1099822521209717, + "learning_rate": 0.000874573192071726, + "loss": 1.5477, + "step": 4289 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.8526052832603455, + "learning_rate": 0.0008743256128702658, + "loss": 1.4991, + "step": 4290 + }, + { + "epoch": 0.45263713080168777, + "grad_norm": 0.6804304122924805, + "learning_rate": 0.0008740780197375007, + "loss": 1.5131, + "step": 4291 + }, + { + "epoch": 0.45274261603375526, + "grad_norm": 0.7116856575012207, + "learning_rate": 0.000873830412701175, + "loss": 1.5229, + "step": 4292 + }, + { + "epoch": 0.4528481012658228, + "grad_norm": 0.6992835998535156, + "learning_rate": 0.0008735827917890339, + "loss": 1.5563, + "step": 4293 + }, + { + "epoch": 0.4529535864978903, + "grad_norm": 0.7480630278587341, + "learning_rate": 0.000873335157028825, + "loss": 1.5286, + "step": 4294 + }, + { + "epoch": 0.4530590717299578, + "grad_norm": 0.8540792465209961, + "learning_rate": 0.0008730875084482964, + "loss": 1.5196, + "step": 4295 + }, + { + "epoch": 0.4531645569620253, + "grad_norm": 0.6597231030464172, + "learning_rate": 0.0008728398460751989, + "loss": 1.528, + "step": 4296 + }, + { + "epoch": 0.45327004219409284, + "grad_norm": 1.182295322418213, + "learning_rate": 0.0008725921699372839, + "loss": 1.5131, + "step": 4297 + }, + { + "epoch": 0.45337552742616033, + "grad_norm": 0.8511938452720642, + "learning_rate": 0.0008723444800623053, + "loss": 1.5184, + "step": 4298 + }, + { + "epoch": 0.4534810126582278, + "grad_norm": 0.984559953212738, + "learning_rate": 0.0008720967764780173, + "loss": 1.5194, + "step": 4299 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 1.2680779695510864, + "learning_rate": 0.0008718490592121768, + "loss": 1.5161, + "step": 4300 + }, + { + "epoch": 0.45369198312236286, + "grad_norm": 0.8368884325027466, + "learning_rate": 0.0008716013282925418, + "loss": 1.5624, + "step": 4301 + }, + { + "epoch": 0.45379746835443036, + "grad_norm": 1.376446008682251, + "learning_rate": 0.0008713535837468714, + "loss": 1.521, + "step": 4302 + }, + { + "epoch": 0.4539029535864979, + "grad_norm": 0.7802363634109497, + "learning_rate": 0.0008711058256029269, + "loss": 1.5269, + "step": 4303 + }, + { + "epoch": 0.4540084388185654, + "grad_norm": 1.172247290611267, + "learning_rate": 0.0008708580538884707, + "loss": 1.5183, + "step": 4304 + }, + { + "epoch": 0.4541139240506329, + "grad_norm": 0.7741556167602539, + "learning_rate": 0.0008706102686312668, + "loss": 1.4766, + "step": 4305 + }, + { + "epoch": 0.45421940928270044, + "grad_norm": 0.9379894137382507, + "learning_rate": 0.0008703624698590811, + "loss": 1.5525, + "step": 4306 + }, + { + "epoch": 0.45432489451476793, + "grad_norm": 0.7629097104072571, + "learning_rate": 0.0008701146575996804, + "loss": 1.5473, + "step": 4307 + }, + { + "epoch": 0.4544303797468354, + "grad_norm": 0.9950286746025085, + "learning_rate": 0.0008698668318808334, + "loss": 1.5212, + "step": 4308 + }, + { + "epoch": 0.45453586497890297, + "grad_norm": 0.6937878727912903, + "learning_rate": 0.0008696189927303101, + "loss": 1.5148, + "step": 4309 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.8431617021560669, + "learning_rate": 0.0008693711401758822, + "loss": 1.4849, + "step": 4310 + }, + { + "epoch": 0.45474683544303796, + "grad_norm": 0.8533664345741272, + "learning_rate": 0.0008691232742453229, + "loss": 1.5296, + "step": 4311 + }, + { + "epoch": 0.4548523206751055, + "grad_norm": 0.7400055527687073, + "learning_rate": 0.0008688753949664067, + "loss": 1.5041, + "step": 4312 + }, + { + "epoch": 0.454957805907173, + "grad_norm": 0.7588275671005249, + "learning_rate": 0.0008686275023669096, + "loss": 1.5302, + "step": 4313 + }, + { + "epoch": 0.4550632911392405, + "grad_norm": 0.7548259496688843, + "learning_rate": 0.0008683795964746094, + "loss": 1.4885, + "step": 4314 + }, + { + "epoch": 0.45516877637130804, + "grad_norm": 0.8770501613616943, + "learning_rate": 0.0008681316773172852, + "loss": 1.5144, + "step": 4315 + }, + { + "epoch": 0.45527426160337553, + "grad_norm": 0.6960789561271667, + "learning_rate": 0.0008678837449227174, + "loss": 1.4919, + "step": 4316 + }, + { + "epoch": 0.455379746835443, + "grad_norm": 0.8955193161964417, + "learning_rate": 0.0008676357993186882, + "loss": 1.5241, + "step": 4317 + }, + { + "epoch": 0.45548523206751057, + "grad_norm": 1.0011838674545288, + "learning_rate": 0.000867387840532981, + "loss": 1.5041, + "step": 4318 + }, + { + "epoch": 0.45559071729957806, + "grad_norm": 0.6935359239578247, + "learning_rate": 0.0008671398685933811, + "loss": 1.5018, + "step": 4319 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.9434210062026978, + "learning_rate": 0.0008668918835276747, + "loss": 1.5818, + "step": 4320 + }, + { + "epoch": 0.4558016877637131, + "grad_norm": 0.7925006747245789, + "learning_rate": 0.0008666438853636499, + "loss": 1.4954, + "step": 4321 + }, + { + "epoch": 0.4559071729957806, + "grad_norm": 0.7642081379890442, + "learning_rate": 0.0008663958741290961, + "loss": 1.4974, + "step": 4322 + }, + { + "epoch": 0.4560126582278481, + "grad_norm": 0.8728026747703552, + "learning_rate": 0.0008661478498518042, + "loss": 1.5816, + "step": 4323 + }, + { + "epoch": 0.45611814345991564, + "grad_norm": 0.7470298409461975, + "learning_rate": 0.0008658998125595666, + "loss": 1.504, + "step": 4324 + }, + { + "epoch": 0.45622362869198313, + "grad_norm": 0.6729409694671631, + "learning_rate": 0.0008656517622801771, + "loss": 1.5446, + "step": 4325 + }, + { + "epoch": 0.4563291139240506, + "grad_norm": 0.6824792623519897, + "learning_rate": 0.0008654036990414308, + "loss": 1.5006, + "step": 4326 + }, + { + "epoch": 0.45643459915611817, + "grad_norm": 0.8042758703231812, + "learning_rate": 0.0008651556228711247, + "loss": 1.5686, + "step": 4327 + }, + { + "epoch": 0.45654008438818566, + "grad_norm": 0.7164283990859985, + "learning_rate": 0.0008649075337970567, + "loss": 1.5323, + "step": 4328 + }, + { + "epoch": 0.45664556962025316, + "grad_norm": 0.719954788684845, + "learning_rate": 0.0008646594318470268, + "loss": 1.5296, + "step": 4329 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.8175467848777771, + "learning_rate": 0.0008644113170488355, + "loss": 1.5083, + "step": 4330 + }, + { + "epoch": 0.4568565400843882, + "grad_norm": 0.6839965581893921, + "learning_rate": 0.0008641631894302858, + "loss": 1.5211, + "step": 4331 + }, + { + "epoch": 0.4569620253164557, + "grad_norm": 0.742830753326416, + "learning_rate": 0.0008639150490191814, + "loss": 1.5426, + "step": 4332 + }, + { + "epoch": 0.4570675105485232, + "grad_norm": 0.6796922087669373, + "learning_rate": 0.0008636668958433279, + "loss": 1.487, + "step": 4333 + }, + { + "epoch": 0.45717299578059073, + "grad_norm": 0.7086787223815918, + "learning_rate": 0.0008634187299305318, + "loss": 1.5287, + "step": 4334 + }, + { + "epoch": 0.4572784810126582, + "grad_norm": 0.7301658987998962, + "learning_rate": 0.0008631705513086013, + "loss": 1.5117, + "step": 4335 + }, + { + "epoch": 0.4573839662447257, + "grad_norm": 0.6982380151748657, + "learning_rate": 0.0008629223600053465, + "loss": 1.5447, + "step": 4336 + }, + { + "epoch": 0.45748945147679326, + "grad_norm": 0.9199793338775635, + "learning_rate": 0.000862674156048578, + "loss": 1.5075, + "step": 4337 + }, + { + "epoch": 0.45759493670886076, + "grad_norm": 0.6747375726699829, + "learning_rate": 0.0008624259394661085, + "loss": 1.5096, + "step": 4338 + }, + { + "epoch": 0.45770042194092825, + "grad_norm": 1.0154558420181274, + "learning_rate": 0.000862177710285752, + "loss": 1.5486, + "step": 4339 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 1.1922436952590942, + "learning_rate": 0.0008619294685353235, + "loss": 1.5051, + "step": 4340 + }, + { + "epoch": 0.4579113924050633, + "grad_norm": 0.6591933369636536, + "learning_rate": 0.00086168121424264, + "loss": 1.5044, + "step": 4341 + }, + { + "epoch": 0.4580168776371308, + "grad_norm": 0.8661260008811951, + "learning_rate": 0.0008614329474355196, + "loss": 1.5025, + "step": 4342 + }, + { + "epoch": 0.45812236286919833, + "grad_norm": 0.6823516488075256, + "learning_rate": 0.0008611846681417818, + "loss": 1.4807, + "step": 4343 + }, + { + "epoch": 0.4582278481012658, + "grad_norm": 0.7026731371879578, + "learning_rate": 0.0008609363763892474, + "loss": 1.5309, + "step": 4344 + }, + { + "epoch": 0.4583333333333333, + "grad_norm": 0.7246387600898743, + "learning_rate": 0.0008606880722057386, + "loss": 1.5294, + "step": 4345 + }, + { + "epoch": 0.45843881856540086, + "grad_norm": 0.783768355846405, + "learning_rate": 0.0008604397556190797, + "loss": 1.556, + "step": 4346 + }, + { + "epoch": 0.45854430379746836, + "grad_norm": 1.0500558614730835, + "learning_rate": 0.0008601914266570956, + "loss": 1.5, + "step": 4347 + }, + { + "epoch": 0.45864978902953585, + "grad_norm": 0.7514777183532715, + "learning_rate": 0.0008599430853476126, + "loss": 1.5092, + "step": 4348 + }, + { + "epoch": 0.4587552742616034, + "grad_norm": 1.1488851308822632, + "learning_rate": 0.0008596947317184585, + "loss": 1.5336, + "step": 4349 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 1.0007883310317993, + "learning_rate": 0.0008594463657974627, + "loss": 1.5334, + "step": 4350 + }, + { + "epoch": 0.4589662447257384, + "grad_norm": 0.745847225189209, + "learning_rate": 0.000859197987612456, + "loss": 1.5628, + "step": 4351 + }, + { + "epoch": 0.45907172995780593, + "grad_norm": 0.8768055438995361, + "learning_rate": 0.0008589495971912703, + "loss": 1.5592, + "step": 4352 + }, + { + "epoch": 0.4591772151898734, + "grad_norm": 0.7788673043251038, + "learning_rate": 0.000858701194561739, + "loss": 1.5108, + "step": 4353 + }, + { + "epoch": 0.4592827004219409, + "grad_norm": 0.9683767557144165, + "learning_rate": 0.0008584527797516966, + "loss": 1.5438, + "step": 4354 + }, + { + "epoch": 0.45938818565400846, + "grad_norm": 0.7703229784965515, + "learning_rate": 0.0008582043527889797, + "loss": 1.5482, + "step": 4355 + }, + { + "epoch": 0.45949367088607596, + "grad_norm": 0.9344459772109985, + "learning_rate": 0.0008579559137014254, + "loss": 1.5092, + "step": 4356 + }, + { + "epoch": 0.45959915611814345, + "grad_norm": 0.8970688581466675, + "learning_rate": 0.0008577074625168725, + "loss": 1.5356, + "step": 4357 + }, + { + "epoch": 0.459704641350211, + "grad_norm": 0.916961133480072, + "learning_rate": 0.0008574589992631617, + "loss": 1.5449, + "step": 4358 + }, + { + "epoch": 0.4598101265822785, + "grad_norm": 0.7743061184883118, + "learning_rate": 0.0008572105239681338, + "loss": 1.5307, + "step": 4359 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.7313348054885864, + "learning_rate": 0.0008569620366596322, + "loss": 1.5101, + "step": 4360 + }, + { + "epoch": 0.46002109704641353, + "grad_norm": 0.8149069547653198, + "learning_rate": 0.0008567135373655012, + "loss": 1.5266, + "step": 4361 + }, + { + "epoch": 0.460126582278481, + "grad_norm": 0.743301510810852, + "learning_rate": 0.0008564650261135862, + "loss": 1.4848, + "step": 4362 + }, + { + "epoch": 0.4602320675105485, + "grad_norm": 0.9778168201446533, + "learning_rate": 0.0008562165029317339, + "loss": 1.4989, + "step": 4363 + }, + { + "epoch": 0.460337552742616, + "grad_norm": 0.7094020247459412, + "learning_rate": 0.0008559679678477929, + "loss": 1.5302, + "step": 4364 + }, + { + "epoch": 0.46044303797468356, + "grad_norm": 0.8559139966964722, + "learning_rate": 0.0008557194208896129, + "loss": 1.5287, + "step": 4365 + }, + { + "epoch": 0.46054852320675105, + "grad_norm": 0.6704040169715881, + "learning_rate": 0.0008554708620850445, + "loss": 1.4801, + "step": 4366 + }, + { + "epoch": 0.46065400843881854, + "grad_norm": 0.7402414083480835, + "learning_rate": 0.0008552222914619401, + "loss": 1.5215, + "step": 4367 + }, + { + "epoch": 0.4607594936708861, + "grad_norm": 0.7920910120010376, + "learning_rate": 0.0008549737090481532, + "loss": 1.5055, + "step": 4368 + }, + { + "epoch": 0.4608649789029536, + "grad_norm": 0.6753297448158264, + "learning_rate": 0.0008547251148715386, + "loss": 1.5186, + "step": 4369 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.6996132135391235, + "learning_rate": 0.000854476508959953, + "loss": 1.5138, + "step": 4370 + }, + { + "epoch": 0.4610759493670886, + "grad_norm": 0.712092399597168, + "learning_rate": 0.0008542278913412535, + "loss": 1.4646, + "step": 4371 + }, + { + "epoch": 0.4611814345991561, + "grad_norm": 0.7316024303436279, + "learning_rate": 0.0008539792620432989, + "loss": 1.5345, + "step": 4372 + }, + { + "epoch": 0.4612869198312236, + "grad_norm": 0.7215169072151184, + "learning_rate": 0.0008537306210939497, + "loss": 1.4895, + "step": 4373 + }, + { + "epoch": 0.46139240506329116, + "grad_norm": 0.7420945763587952, + "learning_rate": 0.0008534819685210668, + "loss": 1.5144, + "step": 4374 + }, + { + "epoch": 0.46149789029535865, + "grad_norm": 0.7178360819816589, + "learning_rate": 0.0008532333043525136, + "loss": 1.5319, + "step": 4375 + }, + { + "epoch": 0.46160337552742614, + "grad_norm": 0.6405024528503418, + "learning_rate": 0.0008529846286161539, + "loss": 1.5435, + "step": 4376 + }, + { + "epoch": 0.4617088607594937, + "grad_norm": 0.7033635973930359, + "learning_rate": 0.000852735941339853, + "loss": 1.4933, + "step": 4377 + }, + { + "epoch": 0.4618143459915612, + "grad_norm": 0.7280958294868469, + "learning_rate": 0.0008524872425514775, + "loss": 1.5294, + "step": 4378 + }, + { + "epoch": 0.4619198312236287, + "grad_norm": 0.6820814609527588, + "learning_rate": 0.0008522385322788955, + "loss": 1.5464, + "step": 4379 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.8796482086181641, + "learning_rate": 0.0008519898105499762, + "loss": 1.5285, + "step": 4380 + }, + { + "epoch": 0.4621308016877637, + "grad_norm": 0.8257671594619751, + "learning_rate": 0.00085174107739259, + "loss": 1.5523, + "step": 4381 + }, + { + "epoch": 0.4622362869198312, + "grad_norm": 0.7619385719299316, + "learning_rate": 0.000851492332834609, + "loss": 1.5066, + "step": 4382 + }, + { + "epoch": 0.46234177215189876, + "grad_norm": 0.8472577333450317, + "learning_rate": 0.0008512435769039055, + "loss": 1.5078, + "step": 4383 + }, + { + "epoch": 0.46244725738396625, + "grad_norm": 0.7397871613502502, + "learning_rate": 0.0008509948096283547, + "loss": 1.4845, + "step": 4384 + }, + { + "epoch": 0.46255274261603374, + "grad_norm": 0.9695469737052917, + "learning_rate": 0.0008507460310358319, + "loss": 1.5252, + "step": 4385 + }, + { + "epoch": 0.4626582278481013, + "grad_norm": 0.6707943677902222, + "learning_rate": 0.0008504972411542138, + "loss": 1.5276, + "step": 4386 + }, + { + "epoch": 0.4627637130801688, + "grad_norm": 0.869189441204071, + "learning_rate": 0.0008502484400113787, + "loss": 1.4931, + "step": 4387 + }, + { + "epoch": 0.4628691983122363, + "grad_norm": 0.7860904932022095, + "learning_rate": 0.0008499996276352061, + "loss": 1.5073, + "step": 4388 + }, + { + "epoch": 0.4629746835443038, + "grad_norm": 0.6828668117523193, + "learning_rate": 0.0008497508040535766, + "loss": 1.5107, + "step": 4389 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.9269625544548035, + "learning_rate": 0.0008495019692943721, + "loss": 1.5154, + "step": 4390 + }, + { + "epoch": 0.4631856540084388, + "grad_norm": 0.7837143540382385, + "learning_rate": 0.0008492531233854757, + "loss": 1.5093, + "step": 4391 + }, + { + "epoch": 0.46329113924050636, + "grad_norm": 0.875279426574707, + "learning_rate": 0.0008490042663547719, + "loss": 1.4915, + "step": 4392 + }, + { + "epoch": 0.46339662447257385, + "grad_norm": 0.7133172154426575, + "learning_rate": 0.0008487553982301465, + "loss": 1.5094, + "step": 4393 + }, + { + "epoch": 0.46350210970464134, + "grad_norm": 0.7553754448890686, + "learning_rate": 0.0008485065190394863, + "loss": 1.5303, + "step": 4394 + }, + { + "epoch": 0.46360759493670883, + "grad_norm": 0.830653965473175, + "learning_rate": 0.0008482576288106794, + "loss": 1.5277, + "step": 4395 + }, + { + "epoch": 0.4637130801687764, + "grad_norm": 0.8339614272117615, + "learning_rate": 0.000848008727571615, + "loss": 1.5163, + "step": 4396 + }, + { + "epoch": 0.4638185654008439, + "grad_norm": 0.7124367952346802, + "learning_rate": 0.0008477598153501842, + "loss": 1.5211, + "step": 4397 + }, + { + "epoch": 0.46392405063291137, + "grad_norm": 0.8924593925476074, + "learning_rate": 0.0008475108921742787, + "loss": 1.5512, + "step": 4398 + }, + { + "epoch": 0.4640295358649789, + "grad_norm": 0.7287356853485107, + "learning_rate": 0.0008472619580717914, + "loss": 1.4796, + "step": 4399 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.8899200558662415, + "learning_rate": 0.0008470130130706166, + "loss": 1.5474, + "step": 4400 + }, + { + "epoch": 0.4642405063291139, + "grad_norm": 0.8896593451499939, + "learning_rate": 0.00084676405719865, + "loss": 1.5074, + "step": 4401 + }, + { + "epoch": 0.46434599156118145, + "grad_norm": 0.8921487927436829, + "learning_rate": 0.0008465150904837883, + "loss": 1.5447, + "step": 4402 + }, + { + "epoch": 0.46445147679324894, + "grad_norm": 1.505585789680481, + "learning_rate": 0.0008462661129539296, + "loss": 1.5771, + "step": 4403 + }, + { + "epoch": 0.46455696202531643, + "grad_norm": 0.8730548024177551, + "learning_rate": 0.0008460171246369725, + "loss": 1.5505, + "step": 4404 + }, + { + "epoch": 0.464662447257384, + "grad_norm": 1.4126110076904297, + "learning_rate": 0.000845768125560818, + "loss": 1.5275, + "step": 4405 + }, + { + "epoch": 0.4647679324894515, + "grad_norm": 1.0685545206069946, + "learning_rate": 0.0008455191157533677, + "loss": 1.5315, + "step": 4406 + }, + { + "epoch": 0.46487341772151897, + "grad_norm": 1.4369056224822998, + "learning_rate": 0.000845270095242524, + "loss": 1.5188, + "step": 4407 + }, + { + "epoch": 0.4649789029535865, + "grad_norm": 1.1601299047470093, + "learning_rate": 0.0008450210640561912, + "loss": 1.5501, + "step": 4408 + }, + { + "epoch": 0.465084388185654, + "grad_norm": 1.1588053703308105, + "learning_rate": 0.000844772022222274, + "loss": 1.52, + "step": 4409 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 1.0504812002182007, + "learning_rate": 0.0008445229697686795, + "loss": 1.5214, + "step": 4410 + }, + { + "epoch": 0.46529535864978905, + "grad_norm": 1.2684154510498047, + "learning_rate": 0.0008442739067233148, + "loss": 1.515, + "step": 4411 + }, + { + "epoch": 0.46540084388185654, + "grad_norm": 1.046157717704773, + "learning_rate": 0.0008440248331140888, + "loss": 1.5621, + "step": 4412 + }, + { + "epoch": 0.46550632911392403, + "grad_norm": 1.6774131059646606, + "learning_rate": 0.0008437757489689113, + "loss": 1.4764, + "step": 4413 + }, + { + "epoch": 0.4656118143459916, + "grad_norm": 1.2559318542480469, + "learning_rate": 0.0008435266543156935, + "loss": 1.5222, + "step": 4414 + }, + { + "epoch": 0.4657172995780591, + "grad_norm": 1.3309247493743896, + "learning_rate": 0.0008432775491823477, + "loss": 1.5307, + "step": 4415 + }, + { + "epoch": 0.46582278481012657, + "grad_norm": 1.1869642734527588, + "learning_rate": 0.0008430284335967876, + "loss": 1.5488, + "step": 4416 + }, + { + "epoch": 0.4659282700421941, + "grad_norm": 1.0642273426055908, + "learning_rate": 0.0008427793075869275, + "loss": 1.4843, + "step": 4417 + }, + { + "epoch": 0.4660337552742616, + "grad_norm": 1.1360278129577637, + "learning_rate": 0.0008425301711806833, + "loss": 1.5201, + "step": 4418 + }, + { + "epoch": 0.4661392405063291, + "grad_norm": 1.0072466135025024, + "learning_rate": 0.0008422810244059721, + "loss": 1.5162, + "step": 4419 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 1.0671417713165283, + "learning_rate": 0.0008420318672907119, + "loss": 1.5008, + "step": 4420 + }, + { + "epoch": 0.46635021097046414, + "grad_norm": 1.1461483240127563, + "learning_rate": 0.0008417826998628222, + "loss": 1.5131, + "step": 4421 + }, + { + "epoch": 0.46645569620253163, + "grad_norm": 0.9889411330223083, + "learning_rate": 0.0008415335221502231, + "loss": 1.522, + "step": 4422 + }, + { + "epoch": 0.4665611814345992, + "grad_norm": 1.3296527862548828, + "learning_rate": 0.0008412843341808365, + "loss": 1.5344, + "step": 4423 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 0.8586921691894531, + "learning_rate": 0.0008410351359825851, + "loss": 1.538, + "step": 4424 + }, + { + "epoch": 0.46677215189873417, + "grad_norm": 1.2206171751022339, + "learning_rate": 0.0008407859275833928, + "loss": 1.4993, + "step": 4425 + }, + { + "epoch": 0.4668776371308017, + "grad_norm": 0.8094790577888489, + "learning_rate": 0.0008405367090111845, + "loss": 1.5051, + "step": 4426 + }, + { + "epoch": 0.4669831223628692, + "grad_norm": 1.0818268060684204, + "learning_rate": 0.0008402874802938866, + "loss": 1.5064, + "step": 4427 + }, + { + "epoch": 0.4670886075949367, + "grad_norm": 0.7715997695922852, + "learning_rate": 0.0008400382414594263, + "loss": 1.5114, + "step": 4428 + }, + { + "epoch": 0.4671940928270042, + "grad_norm": 0.9771600365638733, + "learning_rate": 0.000839788992535732, + "loss": 1.5508, + "step": 4429 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.8266493082046509, + "learning_rate": 0.0008395397335507334, + "loss": 1.4806, + "step": 4430 + }, + { + "epoch": 0.46740506329113923, + "grad_norm": 1.154990315437317, + "learning_rate": 0.0008392904645323612, + "loss": 1.5003, + "step": 4431 + }, + { + "epoch": 0.4675105485232067, + "grad_norm": 0.7615726590156555, + "learning_rate": 0.0008390411855085473, + "loss": 1.5459, + "step": 4432 + }, + { + "epoch": 0.4676160337552743, + "grad_norm": 1.136053204536438, + "learning_rate": 0.0008387918965072244, + "loss": 1.5103, + "step": 4433 + }, + { + "epoch": 0.46772151898734177, + "grad_norm": 0.8262894153594971, + "learning_rate": 0.0008385425975563269, + "loss": 1.5366, + "step": 4434 + }, + { + "epoch": 0.46782700421940926, + "grad_norm": 0.8841549158096313, + "learning_rate": 0.0008382932886837897, + "loss": 1.4814, + "step": 4435 + }, + { + "epoch": 0.4679324894514768, + "grad_norm": 0.8940154314041138, + "learning_rate": 0.0008380439699175493, + "loss": 1.4873, + "step": 4436 + }, + { + "epoch": 0.4680379746835443, + "grad_norm": 0.8282504677772522, + "learning_rate": 0.000837794641285543, + "loss": 1.5078, + "step": 4437 + }, + { + "epoch": 0.4681434599156118, + "grad_norm": 0.9040402173995972, + "learning_rate": 0.0008375453028157093, + "loss": 1.5188, + "step": 4438 + }, + { + "epoch": 0.46824894514767934, + "grad_norm": 0.7252212762832642, + "learning_rate": 0.000837295954535988, + "loss": 1.4798, + "step": 4439 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.8349424600601196, + "learning_rate": 0.0008370465964743196, + "loss": 1.5325, + "step": 4440 + }, + { + "epoch": 0.4684599156118143, + "grad_norm": 0.6603984832763672, + "learning_rate": 0.0008367972286586461, + "loss": 1.5025, + "step": 4441 + }, + { + "epoch": 0.4685654008438819, + "grad_norm": 0.7294588685035706, + "learning_rate": 0.0008365478511169103, + "loss": 1.5472, + "step": 4442 + }, + { + "epoch": 0.46867088607594937, + "grad_norm": 0.6615921854972839, + "learning_rate": 0.000836298463877056, + "loss": 1.519, + "step": 4443 + }, + { + "epoch": 0.46877637130801686, + "grad_norm": 0.6902576684951782, + "learning_rate": 0.0008360490669670288, + "loss": 1.499, + "step": 4444 + }, + { + "epoch": 0.4688818565400844, + "grad_norm": 0.7958689332008362, + "learning_rate": 0.0008357996604147744, + "loss": 1.5389, + "step": 4445 + }, + { + "epoch": 0.4689873417721519, + "grad_norm": 0.7041028738021851, + "learning_rate": 0.0008355502442482403, + "loss": 1.4757, + "step": 4446 + }, + { + "epoch": 0.4690928270042194, + "grad_norm": 0.7617419362068176, + "learning_rate": 0.0008353008184953748, + "loss": 1.535, + "step": 4447 + }, + { + "epoch": 0.46919831223628694, + "grad_norm": 0.6959624886512756, + "learning_rate": 0.0008350513831841271, + "loss": 1.5467, + "step": 4448 + }, + { + "epoch": 0.46930379746835443, + "grad_norm": 0.7816561460494995, + "learning_rate": 0.0008348019383424479, + "loss": 1.5145, + "step": 4449 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.7100427150726318, + "learning_rate": 0.0008345524839982886, + "loss": 1.5004, + "step": 4450 + }, + { + "epoch": 0.4695147679324895, + "grad_norm": 0.8435419201850891, + "learning_rate": 0.000834303020179602, + "loss": 1.5092, + "step": 4451 + }, + { + "epoch": 0.46962025316455697, + "grad_norm": 0.74429851770401, + "learning_rate": 0.0008340535469143414, + "loss": 1.486, + "step": 4452 + }, + { + "epoch": 0.46972573839662446, + "grad_norm": 0.7145223021507263, + "learning_rate": 0.0008338040642304618, + "loss": 1.5258, + "step": 4453 + }, + { + "epoch": 0.469831223628692, + "grad_norm": 0.8815164566040039, + "learning_rate": 0.0008335545721559188, + "loss": 1.4815, + "step": 4454 + }, + { + "epoch": 0.4699367088607595, + "grad_norm": 0.6696032285690308, + "learning_rate": 0.0008333050707186696, + "loss": 1.4927, + "step": 4455 + }, + { + "epoch": 0.470042194092827, + "grad_norm": 0.7392616271972656, + "learning_rate": 0.0008330555599466716, + "loss": 1.5056, + "step": 4456 + }, + { + "epoch": 0.47014767932489454, + "grad_norm": 0.7216477394104004, + "learning_rate": 0.000832806039867884, + "loss": 1.4956, + "step": 4457 + }, + { + "epoch": 0.47025316455696203, + "grad_norm": 0.6759499311447144, + "learning_rate": 0.000832556510510267, + "loss": 1.497, + "step": 4458 + }, + { + "epoch": 0.4703586497890295, + "grad_norm": 0.7354172468185425, + "learning_rate": 0.0008323069719017812, + "loss": 1.521, + "step": 4459 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.652135968208313, + "learning_rate": 0.0008320574240703886, + "loss": 1.5396, + "step": 4460 + }, + { + "epoch": 0.47056962025316457, + "grad_norm": 0.7405062317848206, + "learning_rate": 0.0008318078670440525, + "loss": 1.5159, + "step": 4461 + }, + { + "epoch": 0.47067510548523206, + "grad_norm": 0.7407522797584534, + "learning_rate": 0.0008315583008507372, + "loss": 1.5193, + "step": 4462 + }, + { + "epoch": 0.47078059071729955, + "grad_norm": 0.782688558101654, + "learning_rate": 0.0008313087255184074, + "loss": 1.5038, + "step": 4463 + }, + { + "epoch": 0.4708860759493671, + "grad_norm": 0.8610092997550964, + "learning_rate": 0.0008310591410750295, + "loss": 1.5069, + "step": 4464 + }, + { + "epoch": 0.4709915611814346, + "grad_norm": 0.7027883529663086, + "learning_rate": 0.0008308095475485706, + "loss": 1.4867, + "step": 4465 + }, + { + "epoch": 0.4710970464135021, + "grad_norm": 0.7646484971046448, + "learning_rate": 0.0008305599449669989, + "loss": 1.5303, + "step": 4466 + }, + { + "epoch": 0.47120253164556963, + "grad_norm": 0.7204686403274536, + "learning_rate": 0.0008303103333582839, + "loss": 1.4968, + "step": 4467 + }, + { + "epoch": 0.4713080168776371, + "grad_norm": 0.8129367232322693, + "learning_rate": 0.0008300607127503952, + "loss": 1.5133, + "step": 4468 + }, + { + "epoch": 0.4714135021097046, + "grad_norm": 0.7356871366500854, + "learning_rate": 0.0008298110831713047, + "loss": 1.5551, + "step": 4469 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.6743361353874207, + "learning_rate": 0.0008295614446489842, + "loss": 1.5457, + "step": 4470 + }, + { + "epoch": 0.47162447257383966, + "grad_norm": 0.7519968152046204, + "learning_rate": 0.0008293117972114074, + "loss": 1.5035, + "step": 4471 + }, + { + "epoch": 0.47172995780590715, + "grad_norm": 0.6616206765174866, + "learning_rate": 0.0008290621408865481, + "loss": 1.495, + "step": 4472 + }, + { + "epoch": 0.4718354430379747, + "grad_norm": 0.756255030632019, + "learning_rate": 0.0008288124757023816, + "loss": 1.5297, + "step": 4473 + }, + { + "epoch": 0.4719409282700422, + "grad_norm": 0.7615984082221985, + "learning_rate": 0.0008285628016868841, + "loss": 1.5556, + "step": 4474 + }, + { + "epoch": 0.4720464135021097, + "grad_norm": 0.7258898019790649, + "learning_rate": 0.0008283131188680332, + "loss": 1.56, + "step": 4475 + }, + { + "epoch": 0.47215189873417723, + "grad_norm": 0.7196488976478577, + "learning_rate": 0.0008280634272738066, + "loss": 1.4889, + "step": 4476 + }, + { + "epoch": 0.4722573839662447, + "grad_norm": 0.7510094046592712, + "learning_rate": 0.0008278137269321837, + "loss": 1.5255, + "step": 4477 + }, + { + "epoch": 0.4723628691983122, + "grad_norm": 0.7388684153556824, + "learning_rate": 0.0008275640178711447, + "loss": 1.5343, + "step": 4478 + }, + { + "epoch": 0.47246835443037977, + "grad_norm": 0.8226412534713745, + "learning_rate": 0.0008273143001186709, + "loss": 1.5129, + "step": 4479 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.80885249376297, + "learning_rate": 0.0008270645737027441, + "loss": 1.5232, + "step": 4480 + }, + { + "epoch": 0.47267932489451475, + "grad_norm": 0.6849955916404724, + "learning_rate": 0.0008268148386513475, + "loss": 1.4911, + "step": 4481 + }, + { + "epoch": 0.4727848101265823, + "grad_norm": 0.7365936040878296, + "learning_rate": 0.0008265650949924652, + "loss": 1.5061, + "step": 4482 + }, + { + "epoch": 0.4728902953586498, + "grad_norm": 0.7210370898246765, + "learning_rate": 0.0008263153427540825, + "loss": 1.4907, + "step": 4483 + }, + { + "epoch": 0.4729957805907173, + "grad_norm": 0.6627243757247925, + "learning_rate": 0.0008260655819641849, + "loss": 1.4886, + "step": 4484 + }, + { + "epoch": 0.47310126582278483, + "grad_norm": 0.6631752252578735, + "learning_rate": 0.0008258158126507594, + "loss": 1.5135, + "step": 4485 + }, + { + "epoch": 0.4732067510548523, + "grad_norm": 0.7057051062583923, + "learning_rate": 0.0008255660348417944, + "loss": 1.5108, + "step": 4486 + }, + { + "epoch": 0.4733122362869198, + "grad_norm": 0.6767529845237732, + "learning_rate": 0.0008253162485652779, + "loss": 1.5021, + "step": 4487 + }, + { + "epoch": 0.47341772151898737, + "grad_norm": 0.731368899345398, + "learning_rate": 0.0008250664538492006, + "loss": 1.529, + "step": 4488 + }, + { + "epoch": 0.47352320675105486, + "grad_norm": 0.8660839796066284, + "learning_rate": 0.0008248166507215526, + "loss": 1.5082, + "step": 4489 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.7249590754508972, + "learning_rate": 0.0008245668392103259, + "loss": 1.5557, + "step": 4490 + }, + { + "epoch": 0.4737341772151899, + "grad_norm": 0.7598720192909241, + "learning_rate": 0.000824317019343513, + "loss": 1.5035, + "step": 4491 + }, + { + "epoch": 0.4738396624472574, + "grad_norm": 0.7716814279556274, + "learning_rate": 0.0008240671911491077, + "loss": 1.5303, + "step": 4492 + }, + { + "epoch": 0.4739451476793249, + "grad_norm": 0.6707994341850281, + "learning_rate": 0.000823817354655104, + "loss": 1.495, + "step": 4493 + }, + { + "epoch": 0.4740506329113924, + "grad_norm": 0.7751393914222717, + "learning_rate": 0.0008235675098894979, + "loss": 1.4909, + "step": 4494 + }, + { + "epoch": 0.4741561181434599, + "grad_norm": 0.6862543821334839, + "learning_rate": 0.0008233176568802851, + "loss": 1.5338, + "step": 4495 + }, + { + "epoch": 0.4742616033755274, + "grad_norm": 0.6793820858001709, + "learning_rate": 0.0008230677956554637, + "loss": 1.5069, + "step": 4496 + }, + { + "epoch": 0.4743670886075949, + "grad_norm": 0.800846517086029, + "learning_rate": 0.0008228179262430313, + "loss": 1.4937, + "step": 4497 + }, + { + "epoch": 0.47447257383966246, + "grad_norm": 0.7176148891448975, + "learning_rate": 0.0008225680486709871, + "loss": 1.5157, + "step": 4498 + }, + { + "epoch": 0.47457805907172995, + "grad_norm": 0.839985728263855, + "learning_rate": 0.0008223181629673312, + "loss": 1.5135, + "step": 4499 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.9285463094711304, + "learning_rate": 0.0008220682691600645, + "loss": 1.5112, + "step": 4500 + }, + { + "epoch": 0.474789029535865, + "grad_norm": 0.6731441617012024, + "learning_rate": 0.0008218183672771889, + "loss": 1.5298, + "step": 4501 + }, + { + "epoch": 0.4748945147679325, + "grad_norm": 0.8381966948509216, + "learning_rate": 0.0008215684573467071, + "loss": 1.5359, + "step": 4502 + }, + { + "epoch": 0.475, + "grad_norm": 0.6781648993492126, + "learning_rate": 0.0008213185393966229, + "loss": 1.5089, + "step": 4503 + }, + { + "epoch": 0.4751054852320675, + "grad_norm": 0.7133978605270386, + "learning_rate": 0.0008210686134549406, + "loss": 1.5144, + "step": 4504 + }, + { + "epoch": 0.475210970464135, + "grad_norm": 0.6452988386154175, + "learning_rate": 0.0008208186795496657, + "loss": 1.5045, + "step": 4505 + }, + { + "epoch": 0.4753164556962025, + "grad_norm": 0.7995943427085876, + "learning_rate": 0.0008205687377088048, + "loss": 1.5168, + "step": 4506 + }, + { + "epoch": 0.47542194092827006, + "grad_norm": 0.652507483959198, + "learning_rate": 0.000820318787960365, + "loss": 1.5304, + "step": 4507 + }, + { + "epoch": 0.47552742616033755, + "grad_norm": 0.8309377431869507, + "learning_rate": 0.0008200688303323542, + "loss": 1.4971, + "step": 4508 + }, + { + "epoch": 0.47563291139240504, + "grad_norm": 0.7988290786743164, + "learning_rate": 0.0008198188648527818, + "loss": 1.5258, + "step": 4509 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.7094727754592896, + "learning_rate": 0.0008195688915496571, + "loss": 1.4842, + "step": 4510 + }, + { + "epoch": 0.4758438818565401, + "grad_norm": 0.8954235315322876, + "learning_rate": 0.0008193189104509915, + "loss": 1.4946, + "step": 4511 + }, + { + "epoch": 0.4759493670886076, + "grad_norm": 0.7026421427726746, + "learning_rate": 0.0008190689215847963, + "loss": 1.4924, + "step": 4512 + }, + { + "epoch": 0.4760548523206751, + "grad_norm": 0.7667883038520813, + "learning_rate": 0.0008188189249790838, + "loss": 1.5133, + "step": 4513 + }, + { + "epoch": 0.4761603375527426, + "grad_norm": 0.6379601359367371, + "learning_rate": 0.0008185689206618677, + "loss": 1.5198, + "step": 4514 + }, + { + "epoch": 0.4762658227848101, + "grad_norm": 0.699078381061554, + "learning_rate": 0.0008183189086611623, + "loss": 1.5403, + "step": 4515 + }, + { + "epoch": 0.47637130801687766, + "grad_norm": 0.6804124712944031, + "learning_rate": 0.0008180688890049823, + "loss": 1.4913, + "step": 4516 + }, + { + "epoch": 0.47647679324894515, + "grad_norm": 0.7580643892288208, + "learning_rate": 0.000817818861721344, + "loss": 1.4861, + "step": 4517 + }, + { + "epoch": 0.47658227848101264, + "grad_norm": 0.7389572262763977, + "learning_rate": 0.0008175688268382639, + "loss": 1.4716, + "step": 4518 + }, + { + "epoch": 0.4766877637130802, + "grad_norm": 0.777718722820282, + "learning_rate": 0.00081731878438376, + "loss": 1.4695, + "step": 4519 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.675842821598053, + "learning_rate": 0.0008170687343858506, + "loss": 1.4852, + "step": 4520 + }, + { + "epoch": 0.4768987341772152, + "grad_norm": 0.7138401865959167, + "learning_rate": 0.000816818676872555, + "loss": 1.5423, + "step": 4521 + }, + { + "epoch": 0.4770042194092827, + "grad_norm": 0.8684545755386353, + "learning_rate": 0.0008165686118718935, + "loss": 1.5448, + "step": 4522 + }, + { + "epoch": 0.4771097046413502, + "grad_norm": 0.9756558537483215, + "learning_rate": 0.000816318539411887, + "loss": 1.5136, + "step": 4523 + }, + { + "epoch": 0.4772151898734177, + "grad_norm": 0.7119261026382446, + "learning_rate": 0.0008160684595205577, + "loss": 1.5157, + "step": 4524 + }, + { + "epoch": 0.47732067510548526, + "grad_norm": 0.9498232007026672, + "learning_rate": 0.000815818372225928, + "loss": 1.4883, + "step": 4525 + }, + { + "epoch": 0.47742616033755275, + "grad_norm": 0.800697922706604, + "learning_rate": 0.0008155682775560215, + "loss": 1.5373, + "step": 4526 + }, + { + "epoch": 0.47753164556962024, + "grad_norm": 0.9370514750480652, + "learning_rate": 0.0008153181755388624, + "loss": 1.4865, + "step": 4527 + }, + { + "epoch": 0.47763713080168774, + "grad_norm": 1.008793830871582, + "learning_rate": 0.0008150680662024761, + "loss": 1.5148, + "step": 4528 + }, + { + "epoch": 0.4777426160337553, + "grad_norm": 0.6974062919616699, + "learning_rate": 0.0008148179495748885, + "loss": 1.4942, + "step": 4529 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.7276579141616821, + "learning_rate": 0.0008145678256841265, + "loss": 1.5216, + "step": 4530 + }, + { + "epoch": 0.47795358649789027, + "grad_norm": 0.7428579926490784, + "learning_rate": 0.0008143176945582175, + "loss": 1.4876, + "step": 4531 + }, + { + "epoch": 0.4780590717299578, + "grad_norm": 0.733733057975769, + "learning_rate": 0.0008140675562251904, + "loss": 1.507, + "step": 4532 + }, + { + "epoch": 0.4781645569620253, + "grad_norm": 0.7213668823242188, + "learning_rate": 0.0008138174107130739, + "loss": 1.5483, + "step": 4533 + }, + { + "epoch": 0.4782700421940928, + "grad_norm": 0.8609997630119324, + "learning_rate": 0.0008135672580498984, + "loss": 1.5417, + "step": 4534 + }, + { + "epoch": 0.47837552742616035, + "grad_norm": 0.8417364954948425, + "learning_rate": 0.0008133170982636946, + "loss": 1.4913, + "step": 4535 + }, + { + "epoch": 0.47848101265822784, + "grad_norm": 0.8277696967124939, + "learning_rate": 0.0008130669313824944, + "loss": 1.5061, + "step": 4536 + }, + { + "epoch": 0.47858649789029534, + "grad_norm": 0.7129099369049072, + "learning_rate": 0.0008128167574343299, + "loss": 1.5161, + "step": 4537 + }, + { + "epoch": 0.4786919831223629, + "grad_norm": 0.8232488632202148, + "learning_rate": 0.0008125665764472345, + "loss": 1.5224, + "step": 4538 + }, + { + "epoch": 0.4787974683544304, + "grad_norm": 0.6816334128379822, + "learning_rate": 0.0008123163884492422, + "loss": 1.5108, + "step": 4539 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.8234832286834717, + "learning_rate": 0.0008120661934683879, + "loss": 1.5435, + "step": 4540 + }, + { + "epoch": 0.4790084388185654, + "grad_norm": 0.7689533829689026, + "learning_rate": 0.0008118159915327072, + "loss": 1.4879, + "step": 4541 + }, + { + "epoch": 0.4791139240506329, + "grad_norm": 0.6860280632972717, + "learning_rate": 0.0008115657826702364, + "loss": 1.4961, + "step": 4542 + }, + { + "epoch": 0.4792194092827004, + "grad_norm": 0.9507847428321838, + "learning_rate": 0.0008113155669090124, + "loss": 1.5314, + "step": 4543 + }, + { + "epoch": 0.47932489451476795, + "grad_norm": 0.8759494423866272, + "learning_rate": 0.0008110653442770736, + "loss": 1.5347, + "step": 4544 + }, + { + "epoch": 0.47943037974683544, + "grad_norm": 0.7679489254951477, + "learning_rate": 0.0008108151148024584, + "loss": 1.506, + "step": 4545 + }, + { + "epoch": 0.47953586497890294, + "grad_norm": 0.7906873822212219, + "learning_rate": 0.0008105648785132065, + "loss": 1.5226, + "step": 4546 + }, + { + "epoch": 0.4796413502109705, + "grad_norm": 0.6952474117279053, + "learning_rate": 0.0008103146354373577, + "loss": 1.5175, + "step": 4547 + }, + { + "epoch": 0.479746835443038, + "grad_norm": 0.8400688171386719, + "learning_rate": 0.0008100643856029534, + "loss": 1.5468, + "step": 4548 + }, + { + "epoch": 0.47985232067510547, + "grad_norm": 0.7117214798927307, + "learning_rate": 0.0008098141290380353, + "loss": 1.5311, + "step": 4549 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.8827311396598816, + "learning_rate": 0.0008095638657706456, + "loss": 1.4826, + "step": 4550 + }, + { + "epoch": 0.4800632911392405, + "grad_norm": 0.8120549321174622, + "learning_rate": 0.0008093135958288278, + "loss": 1.4988, + "step": 4551 + }, + { + "epoch": 0.480168776371308, + "grad_norm": 0.8651955723762512, + "learning_rate": 0.0008090633192406256, + "loss": 1.4799, + "step": 4552 + }, + { + "epoch": 0.48027426160337555, + "grad_norm": 0.9917541146278381, + "learning_rate": 0.0008088130360340843, + "loss": 1.4839, + "step": 4553 + }, + { + "epoch": 0.48037974683544304, + "grad_norm": 0.67029869556427, + "learning_rate": 0.0008085627462372489, + "loss": 1.5146, + "step": 4554 + }, + { + "epoch": 0.48048523206751054, + "grad_norm": 0.7690814137458801, + "learning_rate": 0.0008083124498781658, + "loss": 1.4758, + "step": 4555 + }, + { + "epoch": 0.4805907172995781, + "grad_norm": 0.7764253616333008, + "learning_rate": 0.0008080621469848817, + "loss": 1.5308, + "step": 4556 + }, + { + "epoch": 0.4806962025316456, + "grad_norm": 0.8294380307197571, + "learning_rate": 0.0008078118375854449, + "loss": 1.5062, + "step": 4557 + }, + { + "epoch": 0.48080168776371307, + "grad_norm": 1.142548680305481, + "learning_rate": 0.000807561521707903, + "loss": 1.5227, + "step": 4558 + }, + { + "epoch": 0.48090717299578056, + "grad_norm": 0.6520898342132568, + "learning_rate": 0.000807311199380306, + "loss": 1.5196, + "step": 4559 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.9651477932929993, + "learning_rate": 0.000807060870630703, + "loss": 1.5241, + "step": 4560 + }, + { + "epoch": 0.4811181434599156, + "grad_norm": 0.8364325165748596, + "learning_rate": 0.0008068105354871449, + "loss": 1.4572, + "step": 4561 + }, + { + "epoch": 0.4812236286919831, + "grad_norm": 0.6833959221839905, + "learning_rate": 0.0008065601939776833, + "loss": 1.4675, + "step": 4562 + }, + { + "epoch": 0.48132911392405064, + "grad_norm": 0.8695752024650574, + "learning_rate": 0.0008063098461303698, + "loss": 1.4864, + "step": 4563 + }, + { + "epoch": 0.48143459915611814, + "grad_norm": 0.6777974963188171, + "learning_rate": 0.0008060594919732572, + "loss": 1.5077, + "step": 4564 + }, + { + "epoch": 0.48154008438818563, + "grad_norm": 0.8448325991630554, + "learning_rate": 0.0008058091315343988, + "loss": 1.4679, + "step": 4565 + }, + { + "epoch": 0.4816455696202532, + "grad_norm": 0.7166736125946045, + "learning_rate": 0.0008055587648418492, + "loss": 1.5204, + "step": 4566 + }, + { + "epoch": 0.48175105485232067, + "grad_norm": 0.7527313828468323, + "learning_rate": 0.000805308391923663, + "loss": 1.4784, + "step": 4567 + }, + { + "epoch": 0.48185654008438816, + "grad_norm": 0.7199643850326538, + "learning_rate": 0.0008050580128078957, + "loss": 1.5045, + "step": 4568 + }, + { + "epoch": 0.4819620253164557, + "grad_norm": 0.9280028343200684, + "learning_rate": 0.0008048076275226032, + "loss": 1.4669, + "step": 4569 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.8331152200698853, + "learning_rate": 0.000804557236095843, + "loss": 1.4892, + "step": 4570 + }, + { + "epoch": 0.4821729957805907, + "grad_norm": 0.7514859437942505, + "learning_rate": 0.0008043068385556725, + "loss": 1.4955, + "step": 4571 + }, + { + "epoch": 0.48227848101265824, + "grad_norm": 0.8082139492034912, + "learning_rate": 0.0008040564349301498, + "loss": 1.4659, + "step": 4572 + }, + { + "epoch": 0.48238396624472574, + "grad_norm": 0.711844265460968, + "learning_rate": 0.0008038060252473339, + "loss": 1.4719, + "step": 4573 + }, + { + "epoch": 0.48248945147679323, + "grad_norm": 0.9373201727867126, + "learning_rate": 0.0008035556095352847, + "loss": 1.5669, + "step": 4574 + }, + { + "epoch": 0.4825949367088608, + "grad_norm": 0.7223207354545593, + "learning_rate": 0.0008033051878220624, + "loss": 1.4831, + "step": 4575 + }, + { + "epoch": 0.48270042194092827, + "grad_norm": 0.9074340462684631, + "learning_rate": 0.0008030547601357281, + "loss": 1.543, + "step": 4576 + }, + { + "epoch": 0.48280590717299576, + "grad_norm": 0.89827561378479, + "learning_rate": 0.0008028043265043434, + "loss": 1.4871, + "step": 4577 + }, + { + "epoch": 0.4829113924050633, + "grad_norm": 0.7378258109092712, + "learning_rate": 0.0008025538869559703, + "loss": 1.5087, + "step": 4578 + }, + { + "epoch": 0.4830168776371308, + "grad_norm": 0.8768734335899353, + "learning_rate": 0.0008023034415186725, + "loss": 1.5362, + "step": 4579 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.6879096031188965, + "learning_rate": 0.0008020529902205129, + "loss": 1.5293, + "step": 4580 + }, + { + "epoch": 0.48322784810126584, + "grad_norm": 0.9953196048736572, + "learning_rate": 0.0008018025330895566, + "loss": 1.47, + "step": 4581 + }, + { + "epoch": 0.48333333333333334, + "grad_norm": 0.6753053069114685, + "learning_rate": 0.0008015520701538677, + "loss": 1.4834, + "step": 4582 + }, + { + "epoch": 0.48343881856540083, + "grad_norm": 1.107089877128601, + "learning_rate": 0.0008013016014415126, + "loss": 1.4967, + "step": 4583 + }, + { + "epoch": 0.4835443037974684, + "grad_norm": 0.6763057112693787, + "learning_rate": 0.0008010511269805571, + "loss": 1.5287, + "step": 4584 + }, + { + "epoch": 0.48364978902953587, + "grad_norm": 0.8538289666175842, + "learning_rate": 0.0008008006467990684, + "loss": 1.5154, + "step": 4585 + }, + { + "epoch": 0.48375527426160336, + "grad_norm": 0.8262891173362732, + "learning_rate": 0.0008005501609251136, + "loss": 1.5021, + "step": 4586 + }, + { + "epoch": 0.4838607594936709, + "grad_norm": 0.8880492448806763, + "learning_rate": 0.0008002996693867615, + "loss": 1.5079, + "step": 4587 + }, + { + "epoch": 0.4839662447257384, + "grad_norm": 0.8225281834602356, + "learning_rate": 0.0008000491722120806, + "loss": 1.4607, + "step": 4588 + }, + { + "epoch": 0.4840717299578059, + "grad_norm": 0.7854773998260498, + "learning_rate": 0.0007997986694291404, + "loss": 1.4986, + "step": 4589 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.8810625672340393, + "learning_rate": 0.0007995481610660108, + "loss": 1.4947, + "step": 4590 + }, + { + "epoch": 0.48428270042194094, + "grad_norm": 0.6884089708328247, + "learning_rate": 0.0007992976471507628, + "loss": 1.5112, + "step": 4591 + }, + { + "epoch": 0.48438818565400843, + "grad_norm": 0.8837326765060425, + "learning_rate": 0.0007990471277114676, + "loss": 1.4974, + "step": 4592 + }, + { + "epoch": 0.4844936708860759, + "grad_norm": 0.7037901282310486, + "learning_rate": 0.0007987966027761972, + "loss": 1.5012, + "step": 4593 + }, + { + "epoch": 0.48459915611814347, + "grad_norm": 0.737686276435852, + "learning_rate": 0.0007985460723730242, + "loss": 1.4714, + "step": 4594 + }, + { + "epoch": 0.48470464135021096, + "grad_norm": 0.7603896260261536, + "learning_rate": 0.0007982955365300214, + "loss": 1.5235, + "step": 4595 + }, + { + "epoch": 0.48481012658227846, + "grad_norm": 0.7268636226654053, + "learning_rate": 0.0007980449952752633, + "loss": 1.5328, + "step": 4596 + }, + { + "epoch": 0.484915611814346, + "grad_norm": 0.7437018156051636, + "learning_rate": 0.0007977944486368237, + "loss": 1.489, + "step": 4597 + }, + { + "epoch": 0.4850210970464135, + "grad_norm": 0.7671444416046143, + "learning_rate": 0.0007975438966427778, + "loss": 1.5148, + "step": 4598 + }, + { + "epoch": 0.485126582278481, + "grad_norm": 0.7568908333778381, + "learning_rate": 0.0007972933393212012, + "loss": 1.5609, + "step": 4599 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.7596182823181152, + "learning_rate": 0.0007970427767001702, + "loss": 1.5295, + "step": 4600 + }, + { + "epoch": 0.48533755274261603, + "grad_norm": 0.8959376811981201, + "learning_rate": 0.0007967922088077615, + "loss": 1.5431, + "step": 4601 + }, + { + "epoch": 0.4854430379746835, + "grad_norm": 0.686716616153717, + "learning_rate": 0.0007965416356720524, + "loss": 1.5144, + "step": 4602 + }, + { + "epoch": 0.48554852320675107, + "grad_norm": 0.7429050803184509, + "learning_rate": 0.000796291057321121, + "loss": 1.461, + "step": 4603 + }, + { + "epoch": 0.48565400843881856, + "grad_norm": 0.7677836418151855, + "learning_rate": 0.0007960404737830457, + "loss": 1.4926, + "step": 4604 + }, + { + "epoch": 0.48575949367088606, + "grad_norm": 0.8359735012054443, + "learning_rate": 0.0007957898850859058, + "loss": 1.5377, + "step": 4605 + }, + { + "epoch": 0.4858649789029536, + "grad_norm": 0.7335144877433777, + "learning_rate": 0.000795539291257781, + "loss": 1.527, + "step": 4606 + }, + { + "epoch": 0.4859704641350211, + "grad_norm": 0.76484215259552, + "learning_rate": 0.0007952886923267516, + "loss": 1.5173, + "step": 4607 + }, + { + "epoch": 0.4860759493670886, + "grad_norm": 0.8174533843994141, + "learning_rate": 0.0007950380883208981, + "loss": 1.505, + "step": 4608 + }, + { + "epoch": 0.48618143459915614, + "grad_norm": 0.8035338521003723, + "learning_rate": 0.0007947874792683025, + "loss": 1.5207, + "step": 4609 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.9388424754142761, + "learning_rate": 0.0007945368651970464, + "loss": 1.5391, + "step": 4610 + }, + { + "epoch": 0.4863924050632911, + "grad_norm": 0.8170902132987976, + "learning_rate": 0.0007942862461352125, + "loss": 1.4744, + "step": 4611 + }, + { + "epoch": 0.48649789029535867, + "grad_norm": 0.9476525187492371, + "learning_rate": 0.0007940356221108837, + "loss": 1.5053, + "step": 4612 + }, + { + "epoch": 0.48660337552742616, + "grad_norm": 0.8375870585441589, + "learning_rate": 0.0007937849931521441, + "loss": 1.4789, + "step": 4613 + }, + { + "epoch": 0.48670886075949366, + "grad_norm": 0.8920224905014038, + "learning_rate": 0.0007935343592870778, + "loss": 1.4885, + "step": 4614 + }, + { + "epoch": 0.4868143459915612, + "grad_norm": 0.9372267723083496, + "learning_rate": 0.0007932837205437692, + "loss": 1.5369, + "step": 4615 + }, + { + "epoch": 0.4869198312236287, + "grad_norm": 0.7394975423812866, + "learning_rate": 0.000793033076950304, + "loss": 1.5154, + "step": 4616 + }, + { + "epoch": 0.4870253164556962, + "grad_norm": 0.8723307847976685, + "learning_rate": 0.0007927824285347678, + "loss": 1.5315, + "step": 4617 + }, + { + "epoch": 0.48713080168776374, + "grad_norm": 0.6837776899337769, + "learning_rate": 0.0007925317753252473, + "loss": 1.469, + "step": 4618 + }, + { + "epoch": 0.48723628691983123, + "grad_norm": 0.8529983758926392, + "learning_rate": 0.0007922811173498293, + "loss": 1.4328, + "step": 4619 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.7964123487472534, + "learning_rate": 0.0007920304546366013, + "loss": 1.5061, + "step": 4620 + }, + { + "epoch": 0.48744725738396627, + "grad_norm": 0.7711484432220459, + "learning_rate": 0.0007917797872136511, + "loss": 1.5183, + "step": 4621 + }, + { + "epoch": 0.48755274261603376, + "grad_norm": 0.7221225500106812, + "learning_rate": 0.0007915291151090676, + "loss": 1.4763, + "step": 4622 + }, + { + "epoch": 0.48765822784810126, + "grad_norm": 0.7941020131111145, + "learning_rate": 0.0007912784383509396, + "loss": 1.4929, + "step": 4623 + }, + { + "epoch": 0.4877637130801688, + "grad_norm": 0.7650834918022156, + "learning_rate": 0.0007910277569673568, + "loss": 1.4993, + "step": 4624 + }, + { + "epoch": 0.4878691983122363, + "grad_norm": 0.7210298180580139, + "learning_rate": 0.000790777070986409, + "loss": 1.4935, + "step": 4625 + }, + { + "epoch": 0.4879746835443038, + "grad_norm": 0.7971698641777039, + "learning_rate": 0.0007905263804361873, + "loss": 1.5455, + "step": 4626 + }, + { + "epoch": 0.4880801687763713, + "grad_norm": 0.6992742419242859, + "learning_rate": 0.0007902756853447824, + "loss": 1.5389, + "step": 4627 + }, + { + "epoch": 0.48818565400843883, + "grad_norm": 0.7835689187049866, + "learning_rate": 0.0007900249857402863, + "loss": 1.5003, + "step": 4628 + }, + { + "epoch": 0.4882911392405063, + "grad_norm": 0.6786754727363586, + "learning_rate": 0.000789774281650791, + "loss": 1.4823, + "step": 4629 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.7772781848907471, + "learning_rate": 0.000789523573104389, + "loss": 1.5437, + "step": 4630 + }, + { + "epoch": 0.48850210970464136, + "grad_norm": 0.6721428632736206, + "learning_rate": 0.0007892728601291737, + "loss": 1.5168, + "step": 4631 + }, + { + "epoch": 0.48860759493670886, + "grad_norm": 0.9551981091499329, + "learning_rate": 0.0007890221427532384, + "loss": 1.5009, + "step": 4632 + }, + { + "epoch": 0.48871308016877635, + "grad_norm": 0.7772983312606812, + "learning_rate": 0.0007887714210046775, + "loss": 1.5113, + "step": 4633 + }, + { + "epoch": 0.4888185654008439, + "grad_norm": 0.7115404009819031, + "learning_rate": 0.0007885206949115855, + "loss": 1.492, + "step": 4634 + }, + { + "epoch": 0.4889240506329114, + "grad_norm": 0.6985774040222168, + "learning_rate": 0.0007882699645020577, + "loss": 1.4923, + "step": 4635 + }, + { + "epoch": 0.4890295358649789, + "grad_norm": 0.7953736782073975, + "learning_rate": 0.0007880192298041893, + "loss": 1.5205, + "step": 4636 + }, + { + "epoch": 0.48913502109704643, + "grad_norm": 0.6445872783660889, + "learning_rate": 0.0007877684908460768, + "loss": 1.5096, + "step": 4637 + }, + { + "epoch": 0.4892405063291139, + "grad_norm": 0.725935697555542, + "learning_rate": 0.0007875177476558165, + "loss": 1.5136, + "step": 4638 + }, + { + "epoch": 0.4893459915611814, + "grad_norm": 0.7311447262763977, + "learning_rate": 0.0007872670002615056, + "loss": 1.5126, + "step": 4639 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6417257785797119, + "learning_rate": 0.0007870162486912414, + "loss": 1.5297, + "step": 4640 + }, + { + "epoch": 0.48955696202531646, + "grad_norm": 0.7816077470779419, + "learning_rate": 0.0007867654929731221, + "loss": 1.5193, + "step": 4641 + }, + { + "epoch": 0.48966244725738395, + "grad_norm": 0.8044137954711914, + "learning_rate": 0.0007865147331352457, + "loss": 1.4803, + "step": 4642 + }, + { + "epoch": 0.4897679324894515, + "grad_norm": 0.8092776536941528, + "learning_rate": 0.0007862639692057115, + "loss": 1.5092, + "step": 4643 + }, + { + "epoch": 0.489873417721519, + "grad_norm": 0.6873056888580322, + "learning_rate": 0.0007860132012126187, + "loss": 1.5217, + "step": 4644 + }, + { + "epoch": 0.4899789029535865, + "grad_norm": 0.8340899348258972, + "learning_rate": 0.0007857624291840672, + "loss": 1.5461, + "step": 4645 + }, + { + "epoch": 0.49008438818565403, + "grad_norm": 0.8726725578308105, + "learning_rate": 0.0007855116531481572, + "loss": 1.4987, + "step": 4646 + }, + { + "epoch": 0.4901898734177215, + "grad_norm": 0.6697202324867249, + "learning_rate": 0.0007852608731329893, + "loss": 1.5238, + "step": 4647 + }, + { + "epoch": 0.490295358649789, + "grad_norm": 0.7109459042549133, + "learning_rate": 0.0007850100891666648, + "loss": 1.5012, + "step": 4648 + }, + { + "epoch": 0.49040084388185656, + "grad_norm": 0.7143064141273499, + "learning_rate": 0.0007847593012772852, + "loss": 1.4812, + "step": 4649 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.7077755928039551, + "learning_rate": 0.0007845085094929527, + "loss": 1.5162, + "step": 4650 + }, + { + "epoch": 0.49061181434599155, + "grad_norm": 0.7943423390388489, + "learning_rate": 0.0007842577138417695, + "loss": 1.5077, + "step": 4651 + }, + { + "epoch": 0.4907172995780591, + "grad_norm": 0.7756275534629822, + "learning_rate": 0.0007840069143518386, + "loss": 1.5024, + "step": 4652 + }, + { + "epoch": 0.4908227848101266, + "grad_norm": 0.6697688698768616, + "learning_rate": 0.0007837561110512635, + "loss": 1.4853, + "step": 4653 + }, + { + "epoch": 0.4909282700421941, + "grad_norm": 0.7155890464782715, + "learning_rate": 0.0007835053039681476, + "loss": 1.5191, + "step": 4654 + }, + { + "epoch": 0.49103375527426163, + "grad_norm": 0.6958602070808411, + "learning_rate": 0.0007832544931305956, + "loss": 1.4872, + "step": 4655 + }, + { + "epoch": 0.4911392405063291, + "grad_norm": 0.7023391127586365, + "learning_rate": 0.0007830036785667116, + "loss": 1.4911, + "step": 4656 + }, + { + "epoch": 0.4912447257383966, + "grad_norm": 0.654276430606842, + "learning_rate": 0.000782752860304601, + "loss": 1.4951, + "step": 4657 + }, + { + "epoch": 0.4913502109704641, + "grad_norm": 0.6913372874259949, + "learning_rate": 0.0007825020383723692, + "loss": 1.5088, + "step": 4658 + }, + { + "epoch": 0.49145569620253166, + "grad_norm": 0.6325933933258057, + "learning_rate": 0.0007822512127981218, + "loss": 1.4915, + "step": 4659 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.7521264553070068, + "learning_rate": 0.0007820003836099649, + "loss": 1.5075, + "step": 4660 + }, + { + "epoch": 0.49166666666666664, + "grad_norm": 0.660923957824707, + "learning_rate": 0.0007817495508360057, + "loss": 1.5143, + "step": 4661 + }, + { + "epoch": 0.4917721518987342, + "grad_norm": 0.6440953612327576, + "learning_rate": 0.0007814987145043511, + "loss": 1.4718, + "step": 4662 + }, + { + "epoch": 0.4918776371308017, + "grad_norm": 0.7731701731681824, + "learning_rate": 0.0007812478746431085, + "loss": 1.4838, + "step": 4663 + }, + { + "epoch": 0.4919831223628692, + "grad_norm": 0.7576044797897339, + "learning_rate": 0.0007809970312803855, + "loss": 1.4993, + "step": 4664 + }, + { + "epoch": 0.4920886075949367, + "grad_norm": 0.8682838082313538, + "learning_rate": 0.0007807461844442906, + "loss": 1.498, + "step": 4665 + }, + { + "epoch": 0.4921940928270042, + "grad_norm": 1.0930794477462769, + "learning_rate": 0.0007804953341629326, + "loss": 1.4976, + "step": 4666 + }, + { + "epoch": 0.4922995780590717, + "grad_norm": 0.9500361680984497, + "learning_rate": 0.0007802444804644202, + "loss": 1.5145, + "step": 4667 + }, + { + "epoch": 0.49240506329113926, + "grad_norm": 1.599717617034912, + "learning_rate": 0.0007799936233768632, + "loss": 1.5115, + "step": 4668 + }, + { + "epoch": 0.49251054852320675, + "grad_norm": 1.1987297534942627, + "learning_rate": 0.0007797427629283708, + "loss": 1.5147, + "step": 4669 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 1.2655595541000366, + "learning_rate": 0.0007794918991470537, + "loss": 1.4946, + "step": 4670 + }, + { + "epoch": 0.4927215189873418, + "grad_norm": 1.1201311349868774, + "learning_rate": 0.0007792410320610222, + "loss": 1.516, + "step": 4671 + }, + { + "epoch": 0.4928270042194093, + "grad_norm": 1.1773380041122437, + "learning_rate": 0.0007789901616983872, + "loss": 1.474, + "step": 4672 + }, + { + "epoch": 0.4929324894514768, + "grad_norm": 0.9273606538772583, + "learning_rate": 0.0007787392880872601, + "loss": 1.5172, + "step": 4673 + }, + { + "epoch": 0.4930379746835443, + "grad_norm": 1.2190510034561157, + "learning_rate": 0.0007784884112557524, + "loss": 1.5471, + "step": 4674 + }, + { + "epoch": 0.4931434599156118, + "grad_norm": 0.8668409585952759, + "learning_rate": 0.0007782375312319761, + "loss": 1.487, + "step": 4675 + }, + { + "epoch": 0.4932489451476793, + "grad_norm": 1.2185826301574707, + "learning_rate": 0.0007779866480440437, + "loss": 1.4831, + "step": 4676 + }, + { + "epoch": 0.49335443037974686, + "grad_norm": 0.997990071773529, + "learning_rate": 0.0007777357617200679, + "loss": 1.4935, + "step": 4677 + }, + { + "epoch": 0.49345991561181435, + "grad_norm": 1.4260051250457764, + "learning_rate": 0.0007774848722881616, + "loss": 1.4887, + "step": 4678 + }, + { + "epoch": 0.49356540084388184, + "grad_norm": 1.2280452251434326, + "learning_rate": 0.0007772339797764385, + "loss": 1.5167, + "step": 4679 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 1.043880581855774, + "learning_rate": 0.0007769830842130119, + "loss": 1.4729, + "step": 4680 + }, + { + "epoch": 0.4937763713080169, + "grad_norm": 1.1415214538574219, + "learning_rate": 0.0007767321856259963, + "loss": 1.4946, + "step": 4681 + }, + { + "epoch": 0.4938818565400844, + "grad_norm": 1.0038607120513916, + "learning_rate": 0.0007764812840435058, + "loss": 1.4757, + "step": 4682 + }, + { + "epoch": 0.4939873417721519, + "grad_norm": 0.9632622599601746, + "learning_rate": 0.0007762303794936556, + "loss": 1.4918, + "step": 4683 + }, + { + "epoch": 0.4940928270042194, + "grad_norm": 1.0447962284088135, + "learning_rate": 0.0007759794720045606, + "loss": 1.46, + "step": 4684 + }, + { + "epoch": 0.4941983122362869, + "grad_norm": 0.7625881433486938, + "learning_rate": 0.0007757285616043363, + "loss": 1.4469, + "step": 4685 + }, + { + "epoch": 0.49430379746835446, + "grad_norm": 0.9874687194824219, + "learning_rate": 0.0007754776483210981, + "loss": 1.5059, + "step": 4686 + }, + { + "epoch": 0.49440928270042195, + "grad_norm": 0.7969593405723572, + "learning_rate": 0.0007752267321829624, + "loss": 1.5279, + "step": 4687 + }, + { + "epoch": 0.49451476793248944, + "grad_norm": 1.1246598958969116, + "learning_rate": 0.0007749758132180459, + "loss": 1.4848, + "step": 4688 + }, + { + "epoch": 0.494620253164557, + "grad_norm": 0.8999158143997192, + "learning_rate": 0.0007747248914544646, + "loss": 1.472, + "step": 4689 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 1.3431223630905151, + "learning_rate": 0.0007744739669203361, + "loss": 1.5428, + "step": 4690 + }, + { + "epoch": 0.494831223628692, + "grad_norm": 1.0799466371536255, + "learning_rate": 0.0007742230396437775, + "loss": 1.5345, + "step": 4691 + }, + { + "epoch": 0.49493670886075947, + "grad_norm": 1.2211021184921265, + "learning_rate": 0.0007739721096529066, + "loss": 1.5358, + "step": 4692 + }, + { + "epoch": 0.495042194092827, + "grad_norm": 1.2499310970306396, + "learning_rate": 0.0007737211769758412, + "loss": 1.4968, + "step": 4693 + }, + { + "epoch": 0.4951476793248945, + "grad_norm": 1.0297057628631592, + "learning_rate": 0.0007734702416406997, + "loss": 1.5185, + "step": 4694 + }, + { + "epoch": 0.495253164556962, + "grad_norm": 1.0062215328216553, + "learning_rate": 0.0007732193036756006, + "loss": 1.4801, + "step": 4695 + }, + { + "epoch": 0.49535864978902955, + "grad_norm": 0.9743273854255676, + "learning_rate": 0.0007729683631086627, + "loss": 1.5132, + "step": 4696 + }, + { + "epoch": 0.49546413502109704, + "grad_norm": 0.919133186340332, + "learning_rate": 0.0007727174199680051, + "loss": 1.4804, + "step": 4697 + }, + { + "epoch": 0.49556962025316453, + "grad_norm": 0.9413127899169922, + "learning_rate": 0.0007724664742817475, + "loss": 1.4577, + "step": 4698 + }, + { + "epoch": 0.4956751054852321, + "grad_norm": 0.905826210975647, + "learning_rate": 0.0007722155260780093, + "loss": 1.5092, + "step": 4699 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 1.1949831247329712, + "learning_rate": 0.0007719645753849108, + "loss": 1.479, + "step": 4700 + }, + { + "epoch": 0.49588607594936707, + "grad_norm": 0.741329550743103, + "learning_rate": 0.0007717136222305718, + "loss": 1.5008, + "step": 4701 + }, + { + "epoch": 0.4959915611814346, + "grad_norm": 0.7704834342002869, + "learning_rate": 0.0007714626666431134, + "loss": 1.5126, + "step": 4702 + }, + { + "epoch": 0.4960970464135021, + "grad_norm": 0.845852792263031, + "learning_rate": 0.000771211708650656, + "loss": 1.473, + "step": 4703 + }, + { + "epoch": 0.4962025316455696, + "grad_norm": 0.9505012035369873, + "learning_rate": 0.000770960748281321, + "loss": 1.499, + "step": 4704 + }, + { + "epoch": 0.49630801687763715, + "grad_norm": 0.7949641346931458, + "learning_rate": 0.0007707097855632297, + "loss": 1.476, + "step": 4705 + }, + { + "epoch": 0.49641350210970464, + "grad_norm": 1.3208800554275513, + "learning_rate": 0.0007704588205245034, + "loss": 1.4915, + "step": 4706 + }, + { + "epoch": 0.49651898734177213, + "grad_norm": 0.6999176144599915, + "learning_rate": 0.0007702078531932645, + "loss": 1.5395, + "step": 4707 + }, + { + "epoch": 0.4966244725738397, + "grad_norm": 0.8960076570510864, + "learning_rate": 0.0007699568835976348, + "loss": 1.5153, + "step": 4708 + }, + { + "epoch": 0.4967299578059072, + "grad_norm": 0.8272984027862549, + "learning_rate": 0.0007697059117657368, + "loss": 1.4853, + "step": 4709 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 1.084540605545044, + "learning_rate": 0.0007694549377256932, + "loss": 1.5103, + "step": 4710 + }, + { + "epoch": 0.4969409282700422, + "grad_norm": 0.675737202167511, + "learning_rate": 0.0007692039615056264, + "loss": 1.5163, + "step": 4711 + }, + { + "epoch": 0.4970464135021097, + "grad_norm": 0.8819631934165955, + "learning_rate": 0.0007689529831336604, + "loss": 1.4925, + "step": 4712 + }, + { + "epoch": 0.4971518987341772, + "grad_norm": 0.6940995454788208, + "learning_rate": 0.0007687020026379181, + "loss": 1.4684, + "step": 4713 + }, + { + "epoch": 0.49725738396624475, + "grad_norm": 0.8968209624290466, + "learning_rate": 0.0007684510200465231, + "loss": 1.5147, + "step": 4714 + }, + { + "epoch": 0.49736286919831224, + "grad_norm": 0.6876379251480103, + "learning_rate": 0.0007682000353875992, + "loss": 1.4889, + "step": 4715 + }, + { + "epoch": 0.49746835443037973, + "grad_norm": 1.1098241806030273, + "learning_rate": 0.0007679490486892705, + "loss": 1.4765, + "step": 4716 + }, + { + "epoch": 0.4975738396624473, + "grad_norm": 0.9243025779724121, + "learning_rate": 0.0007676980599796616, + "loss": 1.4872, + "step": 4717 + }, + { + "epoch": 0.4976793248945148, + "grad_norm": 1.1408238410949707, + "learning_rate": 0.0007674470692868967, + "loss": 1.5389, + "step": 4718 + }, + { + "epoch": 0.49778481012658227, + "grad_norm": 1.2074824571609497, + "learning_rate": 0.0007671960766391008, + "loss": 1.5104, + "step": 4719 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.830368161201477, + "learning_rate": 0.0007669450820643987, + "loss": 1.4633, + "step": 4720 + }, + { + "epoch": 0.4979957805907173, + "grad_norm": 1.0565953254699707, + "learning_rate": 0.0007666940855909155, + "loss": 1.5017, + "step": 4721 + }, + { + "epoch": 0.4981012658227848, + "grad_norm": 0.9352927207946777, + "learning_rate": 0.000766443087246777, + "loss": 1.5028, + "step": 4722 + }, + { + "epoch": 0.49820675105485235, + "grad_norm": 1.2655442953109741, + "learning_rate": 0.0007661920870601085, + "loss": 1.504, + "step": 4723 + }, + { + "epoch": 0.49831223628691984, + "grad_norm": 1.051146149635315, + "learning_rate": 0.000765941085059036, + "loss": 1.5117, + "step": 4724 + }, + { + "epoch": 0.49841772151898733, + "grad_norm": 1.084197998046875, + "learning_rate": 0.0007656900812716853, + "loss": 1.5085, + "step": 4725 + }, + { + "epoch": 0.4985232067510548, + "grad_norm": 0.8956390023231506, + "learning_rate": 0.0007654390757261827, + "loss": 1.4903, + "step": 4726 + }, + { + "epoch": 0.4986286919831224, + "grad_norm": 0.7901977300643921, + "learning_rate": 0.0007651880684506548, + "loss": 1.4556, + "step": 4727 + }, + { + "epoch": 0.49873417721518987, + "grad_norm": 0.9361427426338196, + "learning_rate": 0.0007649370594732282, + "loss": 1.5169, + "step": 4728 + }, + { + "epoch": 0.49883966244725736, + "grad_norm": 1.1492608785629272, + "learning_rate": 0.0007646860488220293, + "loss": 1.4804, + "step": 4729 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.7457550168037415, + "learning_rate": 0.0007644350365251855, + "loss": 1.4794, + "step": 4730 + }, + { + "epoch": 0.4990506329113924, + "grad_norm": 0.9322028160095215, + "learning_rate": 0.0007641840226108241, + "loss": 1.5146, + "step": 4731 + }, + { + "epoch": 0.4991561181434599, + "grad_norm": 0.6696295738220215, + "learning_rate": 0.000763933007107072, + "loss": 1.4751, + "step": 4732 + }, + { + "epoch": 0.49926160337552744, + "grad_norm": 0.8966294527053833, + "learning_rate": 0.0007636819900420572, + "loss": 1.5142, + "step": 4733 + }, + { + "epoch": 0.49936708860759493, + "grad_norm": 0.744299054145813, + "learning_rate": 0.0007634309714439069, + "loss": 1.501, + "step": 4734 + }, + { + "epoch": 0.4994725738396624, + "grad_norm": 0.984420895576477, + "learning_rate": 0.0007631799513407495, + "loss": 1.5182, + "step": 4735 + }, + { + "epoch": 0.49957805907173, + "grad_norm": 0.7614747285842896, + "learning_rate": 0.0007629289297607127, + "loss": 1.4987, + "step": 4736 + }, + { + "epoch": 0.49968354430379747, + "grad_norm": 0.7875403165817261, + "learning_rate": 0.0007626779067319251, + "loss": 1.4646, + "step": 4737 + }, + { + "epoch": 0.49978902953586496, + "grad_norm": 0.7345932722091675, + "learning_rate": 0.0007624268822825145, + "loss": 1.4494, + "step": 4738 + }, + { + "epoch": 0.4998945147679325, + "grad_norm": 0.8045998811721802, + "learning_rate": 0.00076217585644061, + "loss": 1.4902, + "step": 4739 + }, + { + "epoch": 0.5, + "grad_norm": 0.8017346262931824, + "learning_rate": 0.0007619248292343399, + "loss": 1.4972, + "step": 4740 + }, + { + "epoch": 0.5001054852320675, + "grad_norm": 0.7564310431480408, + "learning_rate": 0.0007616738006918334, + "loss": 1.5278, + "step": 4741 + }, + { + "epoch": 0.500210970464135, + "grad_norm": 0.7691919803619385, + "learning_rate": 0.0007614227708412191, + "loss": 1.4819, + "step": 4742 + }, + { + "epoch": 0.5003164556962025, + "grad_norm": 0.7489315271377563, + "learning_rate": 0.0007611717397106265, + "loss": 1.4846, + "step": 4743 + }, + { + "epoch": 0.5004219409282701, + "grad_norm": 0.8007112145423889, + "learning_rate": 0.0007609207073281848, + "loss": 1.5416, + "step": 4744 + }, + { + "epoch": 0.5005274261603375, + "grad_norm": 0.6866470575332642, + "learning_rate": 0.0007606696737220233, + "loss": 1.4877, + "step": 4745 + }, + { + "epoch": 0.5006329113924051, + "grad_norm": 0.7067131996154785, + "learning_rate": 0.000760418638920272, + "loss": 1.5131, + "step": 4746 + }, + { + "epoch": 0.5007383966244726, + "grad_norm": 0.7186025381088257, + "learning_rate": 0.0007601676029510597, + "loss": 1.5191, + "step": 4747 + }, + { + "epoch": 0.50084388185654, + "grad_norm": 0.9865480065345764, + "learning_rate": 0.000759916565842517, + "loss": 1.5411, + "step": 4748 + }, + { + "epoch": 0.5009493670886076, + "grad_norm": 0.6854108572006226, + "learning_rate": 0.0007596655276227739, + "loss": 1.5231, + "step": 4749 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.9316319823265076, + "learning_rate": 0.0007594144883199599, + "loss": 1.5109, + "step": 4750 + }, + { + "epoch": 0.5011603375527426, + "grad_norm": 0.7601652145385742, + "learning_rate": 0.0007591634479622056, + "loss": 1.5221, + "step": 4751 + }, + { + "epoch": 0.5012658227848101, + "grad_norm": 1.0131869316101074, + "learning_rate": 0.0007589124065776414, + "loss": 1.5047, + "step": 4752 + }, + { + "epoch": 0.5013713080168777, + "grad_norm": 0.9540952444076538, + "learning_rate": 0.0007586613641943976, + "loss": 1.5278, + "step": 4753 + }, + { + "epoch": 0.5014767932489451, + "grad_norm": 0.7884735465049744, + "learning_rate": 0.0007584103208406048, + "loss": 1.4951, + "step": 4754 + }, + { + "epoch": 0.5015822784810127, + "grad_norm": 0.9023188948631287, + "learning_rate": 0.0007581592765443933, + "loss": 1.4822, + "step": 4755 + }, + { + "epoch": 0.5016877637130802, + "grad_norm": 0.6776342988014221, + "learning_rate": 0.0007579082313338943, + "loss": 1.4539, + "step": 4756 + }, + { + "epoch": 0.5017932489451477, + "grad_norm": 0.7682253122329712, + "learning_rate": 0.0007576571852372386, + "loss": 1.5156, + "step": 4757 + }, + { + "epoch": 0.5018987341772152, + "grad_norm": 0.7423089146614075, + "learning_rate": 0.0007574061382825572, + "loss": 1.4863, + "step": 4758 + }, + { + "epoch": 0.5020042194092827, + "grad_norm": 0.6992439031600952, + "learning_rate": 0.0007571550904979812, + "loss": 1.4762, + "step": 4759 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.6730438470840454, + "learning_rate": 0.0007569040419116413, + "loss": 1.5035, + "step": 4760 + }, + { + "epoch": 0.5022151898734177, + "grad_norm": 0.6807112097740173, + "learning_rate": 0.0007566529925516692, + "loss": 1.5042, + "step": 4761 + }, + { + "epoch": 0.5023206751054853, + "grad_norm": 0.699651300907135, + "learning_rate": 0.0007564019424461962, + "loss": 1.5222, + "step": 4762 + }, + { + "epoch": 0.5024261603375527, + "grad_norm": 0.7072161436080933, + "learning_rate": 0.0007561508916233535, + "loss": 1.4739, + "step": 4763 + }, + { + "epoch": 0.5025316455696203, + "grad_norm": 0.7545862197875977, + "learning_rate": 0.0007558998401112727, + "loss": 1.5022, + "step": 4764 + }, + { + "epoch": 0.5026371308016878, + "grad_norm": 0.6929441690444946, + "learning_rate": 0.0007556487879380856, + "loss": 1.4787, + "step": 4765 + }, + { + "epoch": 0.5027426160337553, + "grad_norm": 0.865319550037384, + "learning_rate": 0.0007553977351319235, + "loss": 1.4612, + "step": 4766 + }, + { + "epoch": 0.5028481012658228, + "grad_norm": 0.6836400628089905, + "learning_rate": 0.0007551466817209183, + "loss": 1.5074, + "step": 4767 + }, + { + "epoch": 0.5029535864978903, + "grad_norm": 1.031912088394165, + "learning_rate": 0.0007548956277332016, + "loss": 1.4882, + "step": 4768 + }, + { + "epoch": 0.5030590717299578, + "grad_norm": 0.7372215986251831, + "learning_rate": 0.0007546445731969056, + "loss": 1.5323, + "step": 4769 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.8302517533302307, + "learning_rate": 0.000754393518140162, + "loss": 1.501, + "step": 4770 + }, + { + "epoch": 0.5032700421940929, + "grad_norm": 0.756977915763855, + "learning_rate": 0.0007541424625911026, + "loss": 1.4912, + "step": 4771 + }, + { + "epoch": 0.5033755274261603, + "grad_norm": 0.7118030190467834, + "learning_rate": 0.0007538914065778598, + "loss": 1.5607, + "step": 4772 + }, + { + "epoch": 0.5034810126582279, + "grad_norm": 0.6702001690864563, + "learning_rate": 0.0007536403501285653, + "loss": 1.506, + "step": 4773 + }, + { + "epoch": 0.5035864978902953, + "grad_norm": 0.8181508183479309, + "learning_rate": 0.0007533892932713517, + "loss": 1.4839, + "step": 4774 + }, + { + "epoch": 0.5036919831223629, + "grad_norm": 0.6963851451873779, + "learning_rate": 0.0007531382360343507, + "loss": 1.531, + "step": 4775 + }, + { + "epoch": 0.5037974683544304, + "grad_norm": 0.996468722820282, + "learning_rate": 0.0007528871784456948, + "loss": 1.5175, + "step": 4776 + }, + { + "epoch": 0.5039029535864978, + "grad_norm": 0.9872748255729675, + "learning_rate": 0.0007526361205335159, + "loss": 1.4884, + "step": 4777 + }, + { + "epoch": 0.5040084388185654, + "grad_norm": 0.6626024842262268, + "learning_rate": 0.0007523850623259469, + "loss": 1.5244, + "step": 4778 + }, + { + "epoch": 0.5041139240506329, + "grad_norm": 0.7690697908401489, + "learning_rate": 0.0007521340038511196, + "loss": 1.4839, + "step": 4779 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.7323869466781616, + "learning_rate": 0.0007518829451371665, + "loss": 1.4793, + "step": 4780 + }, + { + "epoch": 0.5043248945147679, + "grad_norm": 0.7405219078063965, + "learning_rate": 0.0007516318862122199, + "loss": 1.4694, + "step": 4781 + }, + { + "epoch": 0.5044303797468355, + "grad_norm": 0.6824813485145569, + "learning_rate": 0.0007513808271044125, + "loss": 1.5296, + "step": 4782 + }, + { + "epoch": 0.5045358649789029, + "grad_norm": 0.7523124814033508, + "learning_rate": 0.0007511297678418766, + "loss": 1.5039, + "step": 4783 + }, + { + "epoch": 0.5046413502109705, + "grad_norm": 0.841255784034729, + "learning_rate": 0.0007508787084527445, + "loss": 1.478, + "step": 4784 + }, + { + "epoch": 0.504746835443038, + "grad_norm": 0.7491867542266846, + "learning_rate": 0.0007506276489651489, + "loss": 1.4936, + "step": 4785 + }, + { + "epoch": 0.5048523206751054, + "grad_norm": 0.6614307761192322, + "learning_rate": 0.0007503765894072217, + "loss": 1.4787, + "step": 4786 + }, + { + "epoch": 0.504957805907173, + "grad_norm": 0.6622448563575745, + "learning_rate": 0.000750125529807096, + "loss": 1.4761, + "step": 4787 + }, + { + "epoch": 0.5050632911392405, + "grad_norm": 0.7332028746604919, + "learning_rate": 0.0007498744701929041, + "loss": 1.4848, + "step": 4788 + }, + { + "epoch": 0.505168776371308, + "grad_norm": 0.7538337707519531, + "learning_rate": 0.0007496234105927785, + "loss": 1.4422, + "step": 4789 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.6806461215019226, + "learning_rate": 0.0007493723510348516, + "loss": 1.5417, + "step": 4790 + }, + { + "epoch": 0.5053797468354431, + "grad_norm": 0.7973043322563171, + "learning_rate": 0.0007491212915472557, + "loss": 1.5257, + "step": 4791 + }, + { + "epoch": 0.5054852320675105, + "grad_norm": 0.7458393573760986, + "learning_rate": 0.0007488702321581234, + "loss": 1.4558, + "step": 4792 + }, + { + "epoch": 0.505590717299578, + "grad_norm": 0.796363115310669, + "learning_rate": 0.0007486191728955873, + "loss": 1.5227, + "step": 4793 + }, + { + "epoch": 0.5056962025316456, + "grad_norm": 0.7010213732719421, + "learning_rate": 0.00074836811378778, + "loss": 1.5638, + "step": 4794 + }, + { + "epoch": 0.505801687763713, + "grad_norm": 0.8043503165245056, + "learning_rate": 0.0007481170548628335, + "loss": 1.49, + "step": 4795 + }, + { + "epoch": 0.5059071729957806, + "grad_norm": 0.7049981951713562, + "learning_rate": 0.0007478659961488805, + "loss": 1.4812, + "step": 4796 + }, + { + "epoch": 0.5060126582278481, + "grad_norm": 0.7814225554466248, + "learning_rate": 0.0007476149376740533, + "loss": 1.4879, + "step": 4797 + }, + { + "epoch": 0.5061181434599156, + "grad_norm": 0.6597009301185608, + "learning_rate": 0.0007473638794664841, + "loss": 1.4459, + "step": 4798 + }, + { + "epoch": 0.5062236286919831, + "grad_norm": 0.8713012337684631, + "learning_rate": 0.0007471128215543056, + "loss": 1.4891, + "step": 4799 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.8553509712219238, + "learning_rate": 0.0007468617639656496, + "loss": 1.473, + "step": 4800 + }, + { + "epoch": 0.5064345991561181, + "grad_norm": 0.6868170499801636, + "learning_rate": 0.0007466107067286483, + "loss": 1.4723, + "step": 4801 + }, + { + "epoch": 0.5065400843881857, + "grad_norm": 0.740841269493103, + "learning_rate": 0.0007463596498714346, + "loss": 1.5011, + "step": 4802 + }, + { + "epoch": 0.5066455696202532, + "grad_norm": 0.7044812440872192, + "learning_rate": 0.0007461085934221402, + "loss": 1.4871, + "step": 4803 + }, + { + "epoch": 0.5067510548523206, + "grad_norm": 0.7527638673782349, + "learning_rate": 0.0007458575374088974, + "loss": 1.4663, + "step": 4804 + }, + { + "epoch": 0.5068565400843882, + "grad_norm": 0.6995745897293091, + "learning_rate": 0.0007456064818598382, + "loss": 1.4945, + "step": 4805 + }, + { + "epoch": 0.5069620253164557, + "grad_norm": 0.6919389963150024, + "learning_rate": 0.0007453554268030946, + "loss": 1.492, + "step": 4806 + }, + { + "epoch": 0.5070675105485232, + "grad_norm": 0.7202015519142151, + "learning_rate": 0.0007451043722667985, + "loss": 1.5293, + "step": 4807 + }, + { + "epoch": 0.5071729957805907, + "grad_norm": 0.7121363282203674, + "learning_rate": 0.000744853318279082, + "loss": 1.5018, + "step": 4808 + }, + { + "epoch": 0.5072784810126583, + "grad_norm": 0.7434036731719971, + "learning_rate": 0.0007446022648680768, + "loss": 1.4991, + "step": 4809 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.6584391593933105, + "learning_rate": 0.0007443512120619144, + "loss": 1.4902, + "step": 4810 + }, + { + "epoch": 0.5074894514767933, + "grad_norm": 0.7020818591117859, + "learning_rate": 0.0007441001598887273, + "loss": 1.4903, + "step": 4811 + }, + { + "epoch": 0.5075949367088608, + "grad_norm": 0.6606990694999695, + "learning_rate": 0.0007438491083766465, + "loss": 1.4864, + "step": 4812 + }, + { + "epoch": 0.5077004219409282, + "grad_norm": 0.7459449172019958, + "learning_rate": 0.000743598057553804, + "loss": 1.5387, + "step": 4813 + }, + { + "epoch": 0.5078059071729958, + "grad_norm": 0.6995876431465149, + "learning_rate": 0.0007433470074483309, + "loss": 1.5303, + "step": 4814 + }, + { + "epoch": 0.5079113924050633, + "grad_norm": 0.6460826396942139, + "learning_rate": 0.0007430959580883589, + "loss": 1.5087, + "step": 4815 + }, + { + "epoch": 0.5080168776371308, + "grad_norm": 0.7399743795394897, + "learning_rate": 0.0007428449095020192, + "loss": 1.4791, + "step": 4816 + }, + { + "epoch": 0.5081223628691983, + "grad_norm": 0.7765082716941833, + "learning_rate": 0.000742593861717443, + "loss": 1.5004, + "step": 4817 + }, + { + "epoch": 0.5082278481012659, + "grad_norm": 0.8056176900863647, + "learning_rate": 0.0007423428147627613, + "loss": 1.564, + "step": 4818 + }, + { + "epoch": 0.5083333333333333, + "grad_norm": 0.7533096075057983, + "learning_rate": 0.0007420917686661055, + "loss": 1.5305, + "step": 4819 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.8577434420585632, + "learning_rate": 0.0007418407234556067, + "loss": 1.4836, + "step": 4820 + }, + { + "epoch": 0.5085443037974684, + "grad_norm": 0.7374603152275085, + "learning_rate": 0.0007415896791593955, + "loss": 1.5214, + "step": 4821 + }, + { + "epoch": 0.5086497890295358, + "grad_norm": 0.760660707950592, + "learning_rate": 0.0007413386358056025, + "loss": 1.492, + "step": 4822 + }, + { + "epoch": 0.5087552742616034, + "grad_norm": 0.7290055751800537, + "learning_rate": 0.0007410875934223588, + "loss": 1.5091, + "step": 4823 + }, + { + "epoch": 0.5088607594936709, + "grad_norm": 0.7651715278625488, + "learning_rate": 0.0007408365520377945, + "loss": 1.5137, + "step": 4824 + }, + { + "epoch": 0.5089662447257384, + "grad_norm": 0.7544330358505249, + "learning_rate": 0.0007405855116800403, + "loss": 1.5025, + "step": 4825 + }, + { + "epoch": 0.5090717299578059, + "grad_norm": 1.016208291053772, + "learning_rate": 0.0007403344723772265, + "loss": 1.4633, + "step": 4826 + }, + { + "epoch": 0.5091772151898735, + "grad_norm": 0.7526769042015076, + "learning_rate": 0.0007400834341574829, + "loss": 1.4792, + "step": 4827 + }, + { + "epoch": 0.5092827004219409, + "grad_norm": 0.7733822464942932, + "learning_rate": 0.0007398323970489402, + "loss": 1.4998, + "step": 4828 + }, + { + "epoch": 0.5093881856540085, + "grad_norm": 0.7899132370948792, + "learning_rate": 0.0007395813610797283, + "loss": 1.4877, + "step": 4829 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.6978710889816284, + "learning_rate": 0.0007393303262779767, + "loss": 1.4877, + "step": 4830 + }, + { + "epoch": 0.5095991561181434, + "grad_norm": 0.7668545842170715, + "learning_rate": 0.0007390792926718153, + "loss": 1.5017, + "step": 4831 + }, + { + "epoch": 0.509704641350211, + "grad_norm": 0.7874740362167358, + "learning_rate": 0.0007388282602893737, + "loss": 1.5058, + "step": 4832 + }, + { + "epoch": 0.5098101265822785, + "grad_norm": 0.8286600708961487, + "learning_rate": 0.000738577229158781, + "loss": 1.4591, + "step": 4833 + }, + { + "epoch": 0.509915611814346, + "grad_norm": 0.7737740278244019, + "learning_rate": 0.000738326199308167, + "loss": 1.4903, + "step": 4834 + }, + { + "epoch": 0.5100210970464135, + "grad_norm": 1.072513461112976, + "learning_rate": 0.0007380751707656603, + "loss": 1.491, + "step": 4835 + }, + { + "epoch": 0.5101265822784811, + "grad_norm": 0.7766354084014893, + "learning_rate": 0.0007378241435593901, + "loss": 1.4676, + "step": 4836 + }, + { + "epoch": 0.5102320675105485, + "grad_norm": 0.8651732802391052, + "learning_rate": 0.0007375731177174855, + "loss": 1.4609, + "step": 4837 + }, + { + "epoch": 0.510337552742616, + "grad_norm": 0.7604774236679077, + "learning_rate": 0.0007373220932680751, + "loss": 1.5002, + "step": 4838 + }, + { + "epoch": 0.5104430379746835, + "grad_norm": 0.666291356086731, + "learning_rate": 0.0007370710702392873, + "loss": 1.4848, + "step": 4839 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.7917542457580566, + "learning_rate": 0.0007368200486592507, + "loss": 1.5325, + "step": 4840 + }, + { + "epoch": 0.5106540084388186, + "grad_norm": 0.7800229787826538, + "learning_rate": 0.0007365690285560932, + "loss": 1.4788, + "step": 4841 + }, + { + "epoch": 0.510759493670886, + "grad_norm": 0.7564208507537842, + "learning_rate": 0.0007363180099579431, + "loss": 1.5041, + "step": 4842 + }, + { + "epoch": 0.5108649789029536, + "grad_norm": 0.8132686018943787, + "learning_rate": 0.0007360669928929282, + "loss": 1.5053, + "step": 4843 + }, + { + "epoch": 0.5109704641350211, + "grad_norm": 0.7730945348739624, + "learning_rate": 0.000735815977389176, + "loss": 1.5029, + "step": 4844 + }, + { + "epoch": 0.5110759493670886, + "grad_norm": 0.7092160582542419, + "learning_rate": 0.0007355649634748143, + "loss": 1.4888, + "step": 4845 + }, + { + "epoch": 0.5111814345991561, + "grad_norm": 0.6579422950744629, + "learning_rate": 0.0007353139511779707, + "loss": 1.5185, + "step": 4846 + }, + { + "epoch": 0.5112869198312237, + "grad_norm": 0.7174131274223328, + "learning_rate": 0.000735062940526772, + "loss": 1.4911, + "step": 4847 + }, + { + "epoch": 0.5113924050632911, + "grad_norm": 0.6872413754463196, + "learning_rate": 0.0007348119315493453, + "loss": 1.5165, + "step": 4848 + }, + { + "epoch": 0.5114978902953586, + "grad_norm": 0.7323470115661621, + "learning_rate": 0.0007345609242738173, + "loss": 1.4922, + "step": 4849 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.7344085574150085, + "learning_rate": 0.0007343099187283149, + "loss": 1.5179, + "step": 4850 + }, + { + "epoch": 0.5117088607594936, + "grad_norm": 0.7634298801422119, + "learning_rate": 0.0007340589149409644, + "loss": 1.5396, + "step": 4851 + }, + { + "epoch": 0.5118143459915612, + "grad_norm": 0.7506126761436462, + "learning_rate": 0.0007338079129398917, + "loss": 1.5221, + "step": 4852 + }, + { + "epoch": 0.5119198312236287, + "grad_norm": 0.691744863986969, + "learning_rate": 0.0007335569127532231, + "loss": 1.4643, + "step": 4853 + }, + { + "epoch": 0.5120253164556962, + "grad_norm": 0.7769249677658081, + "learning_rate": 0.0007333059144090845, + "loss": 1.538, + "step": 4854 + }, + { + "epoch": 0.5121308016877637, + "grad_norm": 0.7145251035690308, + "learning_rate": 0.0007330549179356014, + "loss": 1.4759, + "step": 4855 + }, + { + "epoch": 0.5122362869198313, + "grad_norm": 0.7442019581794739, + "learning_rate": 0.0007328039233608993, + "loss": 1.4736, + "step": 4856 + }, + { + "epoch": 0.5123417721518987, + "grad_norm": 0.7983344197273254, + "learning_rate": 0.0007325529307131034, + "loss": 1.4819, + "step": 4857 + }, + { + "epoch": 0.5124472573839662, + "grad_norm": 0.915967583656311, + "learning_rate": 0.0007323019400203386, + "loss": 1.4627, + "step": 4858 + }, + { + "epoch": 0.5125527426160338, + "grad_norm": 0.7363025546073914, + "learning_rate": 0.0007320509513107296, + "loss": 1.4469, + "step": 4859 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.8668085336685181, + "learning_rate": 0.0007317999646124011, + "loss": 1.4851, + "step": 4860 + }, + { + "epoch": 0.5127637130801688, + "grad_norm": 0.6756255030632019, + "learning_rate": 0.0007315489799534772, + "loss": 1.4792, + "step": 4861 + }, + { + "epoch": 0.5128691983122363, + "grad_norm": 0.9796139001846313, + "learning_rate": 0.000731297997362082, + "loss": 1.4778, + "step": 4862 + }, + { + "epoch": 0.5129746835443038, + "grad_norm": 0.6874305605888367, + "learning_rate": 0.0007310470168663397, + "loss": 1.5066, + "step": 4863 + }, + { + "epoch": 0.5130801687763713, + "grad_norm": 1.1223905086517334, + "learning_rate": 0.0007307960384943736, + "loss": 1.5193, + "step": 4864 + }, + { + "epoch": 0.5131856540084389, + "grad_norm": 0.8445841670036316, + "learning_rate": 0.000730545062274307, + "loss": 1.4596, + "step": 4865 + }, + { + "epoch": 0.5132911392405063, + "grad_norm": 0.7914537787437439, + "learning_rate": 0.0007302940882342634, + "loss": 1.4856, + "step": 4866 + }, + { + "epoch": 0.5133966244725738, + "grad_norm": 0.9014516472816467, + "learning_rate": 0.0007300431164023653, + "loss": 1.4515, + "step": 4867 + }, + { + "epoch": 0.5135021097046414, + "grad_norm": 0.6933799982070923, + "learning_rate": 0.0007297921468067357, + "loss": 1.4867, + "step": 4868 + }, + { + "epoch": 0.5136075949367088, + "grad_norm": 0.8769029974937439, + "learning_rate": 0.0007295411794754967, + "loss": 1.4825, + "step": 4869 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.7786666750907898, + "learning_rate": 0.0007292902144367704, + "loss": 1.4914, + "step": 4870 + }, + { + "epoch": 0.5138185654008439, + "grad_norm": 0.806551456451416, + "learning_rate": 0.0007290392517186791, + "loss": 1.5144, + "step": 4871 + }, + { + "epoch": 0.5139240506329114, + "grad_norm": 0.8595383763313293, + "learning_rate": 0.000728788291349344, + "loss": 1.4809, + "step": 4872 + }, + { + "epoch": 0.5140295358649789, + "grad_norm": 0.9203867316246033, + "learning_rate": 0.0007285373333568868, + "loss": 1.547, + "step": 4873 + }, + { + "epoch": 0.5141350210970465, + "grad_norm": 1.1336934566497803, + "learning_rate": 0.0007282863777694283, + "loss": 1.5158, + "step": 4874 + }, + { + "epoch": 0.5142405063291139, + "grad_norm": 0.7907871007919312, + "learning_rate": 0.0007280354246150894, + "loss": 1.4637, + "step": 4875 + }, + { + "epoch": 0.5143459915611814, + "grad_norm": 1.0566433668136597, + "learning_rate": 0.0007277844739219908, + "loss": 1.5144, + "step": 4876 + }, + { + "epoch": 0.514451476793249, + "grad_norm": 0.7849918007850647, + "learning_rate": 0.0007275335257182526, + "loss": 1.4986, + "step": 4877 + }, + { + "epoch": 0.5145569620253164, + "grad_norm": 1.1962617635726929, + "learning_rate": 0.000727282580031995, + "loss": 1.5332, + "step": 4878 + }, + { + "epoch": 0.514662447257384, + "grad_norm": 0.758672297000885, + "learning_rate": 0.0007270316368913374, + "loss": 1.5178, + "step": 4879 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.7719336152076721, + "learning_rate": 0.0007267806963243995, + "loss": 1.4613, + "step": 4880 + }, + { + "epoch": 0.514873417721519, + "grad_norm": 0.6990248560905457, + "learning_rate": 0.0007265297583593003, + "loss": 1.5137, + "step": 4881 + }, + { + "epoch": 0.5149789029535865, + "grad_norm": 0.8070154786109924, + "learning_rate": 0.0007262788230241588, + "loss": 1.498, + "step": 4882 + }, + { + "epoch": 0.515084388185654, + "grad_norm": 0.7678052186965942, + "learning_rate": 0.0007260278903470935, + "loss": 1.5054, + "step": 4883 + }, + { + "epoch": 0.5151898734177215, + "grad_norm": 0.7488037943840027, + "learning_rate": 0.0007257769603562227, + "loss": 1.4938, + "step": 4884 + }, + { + "epoch": 0.515295358649789, + "grad_norm": 0.7219677567481995, + "learning_rate": 0.0007255260330796639, + "loss": 1.4633, + "step": 4885 + }, + { + "epoch": 0.5154008438818566, + "grad_norm": 0.8657189607620239, + "learning_rate": 0.0007252751085455355, + "loss": 1.5104, + "step": 4886 + }, + { + "epoch": 0.515506329113924, + "grad_norm": 0.7878258228302002, + "learning_rate": 0.0007250241867819544, + "loss": 1.4831, + "step": 4887 + }, + { + "epoch": 0.5156118143459916, + "grad_norm": 0.8059980273246765, + "learning_rate": 0.0007247732678170375, + "loss": 1.5242, + "step": 4888 + }, + { + "epoch": 0.5157172995780591, + "grad_norm": 0.7730981707572937, + "learning_rate": 0.0007245223516789019, + "loss": 1.4723, + "step": 4889 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.8106138110160828, + "learning_rate": 0.0007242714383956639, + "loss": 1.4638, + "step": 4890 + }, + { + "epoch": 0.5159282700421941, + "grad_norm": 1.0114681720733643, + "learning_rate": 0.0007240205279954395, + "loss": 1.4581, + "step": 4891 + }, + { + "epoch": 0.5160337552742617, + "grad_norm": 0.6806797981262207, + "learning_rate": 0.0007237696205063444, + "loss": 1.5214, + "step": 4892 + }, + { + "epoch": 0.5161392405063291, + "grad_norm": 0.9427956938743591, + "learning_rate": 0.0007235187159564942, + "loss": 1.4937, + "step": 4893 + }, + { + "epoch": 0.5162447257383966, + "grad_norm": 0.8982729315757751, + "learning_rate": 0.0007232678143740038, + "loss": 1.5153, + "step": 4894 + }, + { + "epoch": 0.5163502109704642, + "grad_norm": 0.6975923180580139, + "learning_rate": 0.0007230169157869882, + "loss": 1.4667, + "step": 4895 + }, + { + "epoch": 0.5164556962025316, + "grad_norm": 1.034011721611023, + "learning_rate": 0.0007227660202235616, + "loss": 1.5332, + "step": 4896 + }, + { + "epoch": 0.5165611814345992, + "grad_norm": 0.7545966506004333, + "learning_rate": 0.0007225151277118384, + "loss": 1.4917, + "step": 4897 + }, + { + "epoch": 0.5166666666666667, + "grad_norm": 0.727112889289856, + "learning_rate": 0.0007222642382799322, + "loss": 1.5287, + "step": 4898 + }, + { + "epoch": 0.5167721518987342, + "grad_norm": 0.6560326814651489, + "learning_rate": 0.0007220133519559563, + "loss": 1.4886, + "step": 4899 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.8010597229003906, + "learning_rate": 0.000721762468768024, + "loss": 1.5027, + "step": 4900 + }, + { + "epoch": 0.5169831223628693, + "grad_norm": 0.7559993863105774, + "learning_rate": 0.0007215115887442478, + "loss": 1.5107, + "step": 4901 + }, + { + "epoch": 0.5170886075949367, + "grad_norm": 0.7655650973320007, + "learning_rate": 0.0007212607119127402, + "loss": 1.4705, + "step": 4902 + }, + { + "epoch": 0.5171940928270042, + "grad_norm": 0.8146873712539673, + "learning_rate": 0.000721009838301613, + "loss": 1.4879, + "step": 4903 + }, + { + "epoch": 0.5172995780590718, + "grad_norm": 0.6768746972084045, + "learning_rate": 0.000720758967938978, + "loss": 1.5078, + "step": 4904 + }, + { + "epoch": 0.5174050632911392, + "grad_norm": 0.6858991980552673, + "learning_rate": 0.0007205081008529463, + "loss": 1.5183, + "step": 4905 + }, + { + "epoch": 0.5175105485232068, + "grad_norm": 0.6848714351654053, + "learning_rate": 0.0007202572370716292, + "loss": 1.4794, + "step": 4906 + }, + { + "epoch": 0.5176160337552742, + "grad_norm": 0.6681459546089172, + "learning_rate": 0.000720006376623137, + "loss": 1.4409, + "step": 4907 + }, + { + "epoch": 0.5177215189873418, + "grad_norm": 0.7538333535194397, + "learning_rate": 0.0007197555195355799, + "loss": 1.5144, + "step": 4908 + }, + { + "epoch": 0.5178270042194093, + "grad_norm": 0.7954360246658325, + "learning_rate": 0.0007195046658370675, + "loss": 1.4761, + "step": 4909 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.7613559365272522, + "learning_rate": 0.0007192538155557094, + "loss": 1.537, + "step": 4910 + }, + { + "epoch": 0.5180379746835443, + "grad_norm": 0.9600253105163574, + "learning_rate": 0.0007190029687196148, + "loss": 1.4847, + "step": 4911 + }, + { + "epoch": 0.5181434599156118, + "grad_norm": 0.6812431216239929, + "learning_rate": 0.0007187521253568919, + "loss": 1.46, + "step": 4912 + }, + { + "epoch": 0.5182489451476793, + "grad_norm": 0.9853848218917847, + "learning_rate": 0.0007185012854956491, + "loss": 1.4637, + "step": 4913 + }, + { + "epoch": 0.5183544303797468, + "grad_norm": 1.111080527305603, + "learning_rate": 0.0007182504491639942, + "loss": 1.4664, + "step": 4914 + }, + { + "epoch": 0.5184599156118144, + "grad_norm": 0.738676905632019, + "learning_rate": 0.000717999616390035, + "loss": 1.4923, + "step": 4915 + }, + { + "epoch": 0.5185654008438818, + "grad_norm": 1.0577664375305176, + "learning_rate": 0.0007177487872018784, + "loss": 1.5113, + "step": 4916 + }, + { + "epoch": 0.5186708860759494, + "grad_norm": 0.8313449025154114, + "learning_rate": 0.000717497961627631, + "loss": 1.4999, + "step": 4917 + }, + { + "epoch": 0.5187763713080169, + "grad_norm": 0.7978147268295288, + "learning_rate": 0.0007172471396953991, + "loss": 1.4692, + "step": 4918 + }, + { + "epoch": 0.5188818565400843, + "grad_norm": 0.7023072242736816, + "learning_rate": 0.0007169963214332885, + "loss": 1.4752, + "step": 4919 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 1.0675066709518433, + "learning_rate": 0.0007167455068694046, + "loss": 1.5137, + "step": 4920 + }, + { + "epoch": 0.5190928270042194, + "grad_norm": 0.859831690788269, + "learning_rate": 0.0007164946960318525, + "loss": 1.5096, + "step": 4921 + }, + { + "epoch": 0.5191983122362869, + "grad_norm": 0.7838005423545837, + "learning_rate": 0.0007162438889487365, + "loss": 1.5185, + "step": 4922 + }, + { + "epoch": 0.5193037974683544, + "grad_norm": 0.8191056847572327, + "learning_rate": 0.0007159930856481614, + "loss": 1.5055, + "step": 4923 + }, + { + "epoch": 0.519409282700422, + "grad_norm": 0.7161900997161865, + "learning_rate": 0.0007157422861582306, + "loss": 1.447, + "step": 4924 + }, + { + "epoch": 0.5195147679324894, + "grad_norm": 0.7820819020271301, + "learning_rate": 0.0007154914905070475, + "loss": 1.4894, + "step": 4925 + }, + { + "epoch": 0.519620253164557, + "grad_norm": 0.6715101599693298, + "learning_rate": 0.0007152406987227149, + "loss": 1.5032, + "step": 4926 + }, + { + "epoch": 0.5197257383966245, + "grad_norm": 0.7450423240661621, + "learning_rate": 0.0007149899108333354, + "loss": 1.4666, + "step": 4927 + }, + { + "epoch": 0.5198312236286919, + "grad_norm": 0.8177722692489624, + "learning_rate": 0.0007147391268670109, + "loss": 1.5057, + "step": 4928 + }, + { + "epoch": 0.5199367088607595, + "grad_norm": 0.7067716121673584, + "learning_rate": 0.000714488346851843, + "loss": 1.5, + "step": 4929 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 1.0593668222427368, + "learning_rate": 0.000714237570815933, + "loss": 1.4813, + "step": 4930 + }, + { + "epoch": 0.5201476793248945, + "grad_norm": 0.7204735279083252, + "learning_rate": 0.0007139867987873812, + "loss": 1.4956, + "step": 4931 + }, + { + "epoch": 0.520253164556962, + "grad_norm": 1.1279354095458984, + "learning_rate": 0.0007137360307942885, + "loss": 1.5077, + "step": 4932 + }, + { + "epoch": 0.5203586497890296, + "grad_norm": 0.7823067307472229, + "learning_rate": 0.0007134852668647543, + "loss": 1.5189, + "step": 4933 + }, + { + "epoch": 0.520464135021097, + "grad_norm": 1.0429478883743286, + "learning_rate": 0.0007132345070268781, + "loss": 1.487, + "step": 4934 + }, + { + "epoch": 0.5205696202531646, + "grad_norm": 0.7986664175987244, + "learning_rate": 0.0007129837513087587, + "loss": 1.4396, + "step": 4935 + }, + { + "epoch": 0.5206751054852321, + "grad_norm": 1.1234275102615356, + "learning_rate": 0.0007127329997384946, + "loss": 1.4852, + "step": 4936 + }, + { + "epoch": 0.5207805907172995, + "grad_norm": 0.9782840609550476, + "learning_rate": 0.0007124822523441837, + "loss": 1.488, + "step": 4937 + }, + { + "epoch": 0.5208860759493671, + "grad_norm": 0.8260686993598938, + "learning_rate": 0.0007122315091539234, + "loss": 1.4975, + "step": 4938 + }, + { + "epoch": 0.5209915611814346, + "grad_norm": 1.0345863103866577, + "learning_rate": 0.000711980770195811, + "loss": 1.5136, + "step": 4939 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.7572880387306213, + "learning_rate": 0.0007117300354979423, + "loss": 1.4773, + "step": 4940 + }, + { + "epoch": 0.5212025316455696, + "grad_norm": 0.8784215450286865, + "learning_rate": 0.0007114793050884145, + "loss": 1.527, + "step": 4941 + }, + { + "epoch": 0.5213080168776372, + "grad_norm": 0.7942994832992554, + "learning_rate": 0.0007112285789953226, + "loss": 1.4946, + "step": 4942 + }, + { + "epoch": 0.5214135021097046, + "grad_norm": 0.7453052997589111, + "learning_rate": 0.0007109778572467616, + "loss": 1.4849, + "step": 4943 + }, + { + "epoch": 0.5215189873417722, + "grad_norm": 0.6672415137290955, + "learning_rate": 0.0007107271398708266, + "loss": 1.5105, + "step": 4944 + }, + { + "epoch": 0.5216244725738397, + "grad_norm": 0.6991457939147949, + "learning_rate": 0.0007104764268956111, + "loss": 1.4735, + "step": 4945 + }, + { + "epoch": 0.5217299578059071, + "grad_norm": 0.8008588552474976, + "learning_rate": 0.0007102257183492092, + "loss": 1.4876, + "step": 4946 + }, + { + "epoch": 0.5218354430379747, + "grad_norm": 0.7578993439674377, + "learning_rate": 0.0007099750142597138, + "loss": 1.5046, + "step": 4947 + }, + { + "epoch": 0.5219409282700422, + "grad_norm": 0.7308734655380249, + "learning_rate": 0.0007097243146552175, + "loss": 1.4969, + "step": 4948 + }, + { + "epoch": 0.5220464135021097, + "grad_norm": 0.7199716567993164, + "learning_rate": 0.0007094736195638128, + "loss": 1.5004, + "step": 4949 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.7032922506332397, + "learning_rate": 0.000709222929013591, + "loss": 1.5009, + "step": 4950 + }, + { + "epoch": 0.5222573839662448, + "grad_norm": 0.7022863626480103, + "learning_rate": 0.0007089722430326434, + "loss": 1.5012, + "step": 4951 + }, + { + "epoch": 0.5223628691983122, + "grad_norm": 0.6868120431900024, + "learning_rate": 0.0007087215616490606, + "loss": 1.4758, + "step": 4952 + }, + { + "epoch": 0.5224683544303798, + "grad_norm": 0.8913400769233704, + "learning_rate": 0.0007084708848909326, + "loss": 1.4287, + "step": 4953 + }, + { + "epoch": 0.5225738396624473, + "grad_norm": 0.838385283946991, + "learning_rate": 0.000708220212786349, + "loss": 1.4789, + "step": 4954 + }, + { + "epoch": 0.5226793248945147, + "grad_norm": 0.7102712392807007, + "learning_rate": 0.000707969545363399, + "loss": 1.4971, + "step": 4955 + }, + { + "epoch": 0.5227848101265823, + "grad_norm": 0.9963305592536926, + "learning_rate": 0.000707718882650171, + "loss": 1.4786, + "step": 4956 + }, + { + "epoch": 0.5228902953586498, + "grad_norm": 0.73219233751297, + "learning_rate": 0.0007074682246747526, + "loss": 1.4946, + "step": 4957 + }, + { + "epoch": 0.5229957805907173, + "grad_norm": 0.8784971833229065, + "learning_rate": 0.0007072175714652321, + "loss": 1.4806, + "step": 4958 + }, + { + "epoch": 0.5231012658227848, + "grad_norm": 0.9414666295051575, + "learning_rate": 0.0007069669230496961, + "loss": 1.4539, + "step": 4959 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.7330427169799805, + "learning_rate": 0.0007067162794562309, + "loss": 1.4794, + "step": 4960 + }, + { + "epoch": 0.5233122362869198, + "grad_norm": 0.7726001739501953, + "learning_rate": 0.0007064656407129224, + "loss": 1.4672, + "step": 4961 + }, + { + "epoch": 0.5234177215189874, + "grad_norm": 0.6882714033126831, + "learning_rate": 0.000706215006847856, + "loss": 1.4872, + "step": 4962 + }, + { + "epoch": 0.5235232067510549, + "grad_norm": 0.7937681078910828, + "learning_rate": 0.0007059643778891164, + "loss": 1.5042, + "step": 4963 + }, + { + "epoch": 0.5236286919831223, + "grad_norm": 0.6928925514221191, + "learning_rate": 0.0007057137538647878, + "loss": 1.481, + "step": 4964 + }, + { + "epoch": 0.5237341772151899, + "grad_norm": 0.7106788754463196, + "learning_rate": 0.0007054631348029539, + "loss": 1.5549, + "step": 4965 + }, + { + "epoch": 0.5238396624472574, + "grad_norm": 0.6728728413581848, + "learning_rate": 0.0007052125207316975, + "loss": 1.4739, + "step": 4966 + }, + { + "epoch": 0.5239451476793249, + "grad_norm": 0.7235791683197021, + "learning_rate": 0.0007049619116791019, + "loss": 1.4586, + "step": 4967 + }, + { + "epoch": 0.5240506329113924, + "grad_norm": 0.7457456588745117, + "learning_rate": 0.0007047113076732485, + "loss": 1.4277, + "step": 4968 + }, + { + "epoch": 0.52415611814346, + "grad_norm": 0.6859675645828247, + "learning_rate": 0.0007044607087422191, + "loss": 1.49, + "step": 4969 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.9924870729446411, + "learning_rate": 0.0007042101149140943, + "loss": 1.5298, + "step": 4970 + }, + { + "epoch": 0.524367088607595, + "grad_norm": 0.8485029935836792, + "learning_rate": 0.0007039595262169544, + "loss": 1.5481, + "step": 4971 + }, + { + "epoch": 0.5244725738396624, + "grad_norm": 0.7446405291557312, + "learning_rate": 0.0007037089426788792, + "loss": 1.4309, + "step": 4972 + }, + { + "epoch": 0.5245780590717299, + "grad_norm": 0.7993538975715637, + "learning_rate": 0.0007034583643279479, + "loss": 1.4779, + "step": 4973 + }, + { + "epoch": 0.5246835443037975, + "grad_norm": 0.776822030544281, + "learning_rate": 0.0007032077911922384, + "loss": 1.4801, + "step": 4974 + }, + { + "epoch": 0.5247890295358649, + "grad_norm": 0.7602906823158264, + "learning_rate": 0.0007029572232998298, + "loss": 1.5284, + "step": 4975 + }, + { + "epoch": 0.5248945147679325, + "grad_norm": 0.7203003168106079, + "learning_rate": 0.0007027066606787988, + "loss": 1.4871, + "step": 4976 + }, + { + "epoch": 0.525, + "grad_norm": 0.947700560092926, + "learning_rate": 0.0007024561033572223, + "loss": 1.4972, + "step": 4977 + }, + { + "epoch": 0.5251054852320675, + "grad_norm": 0.7633508443832397, + "learning_rate": 0.0007022055513631764, + "loss": 1.4831, + "step": 4978 + }, + { + "epoch": 0.525210970464135, + "grad_norm": 0.7156700491905212, + "learning_rate": 0.000701955004724737, + "loss": 1.4431, + "step": 4979 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.7486793994903564, + "learning_rate": 0.0007017044634699787, + "loss": 1.4629, + "step": 4980 + }, + { + "epoch": 0.52542194092827, + "grad_norm": 0.7768197059631348, + "learning_rate": 0.0007014539276269762, + "loss": 1.5078, + "step": 4981 + }, + { + "epoch": 0.5255274261603375, + "grad_norm": 0.70685875415802, + "learning_rate": 0.0007012033972238031, + "loss": 1.465, + "step": 4982 + }, + { + "epoch": 0.5256329113924051, + "grad_norm": 0.9090269207954407, + "learning_rate": 0.0007009528722885323, + "loss": 1.4792, + "step": 4983 + }, + { + "epoch": 0.5257383966244725, + "grad_norm": 0.8053869605064392, + "learning_rate": 0.0007007023528492372, + "loss": 1.4703, + "step": 4984 + }, + { + "epoch": 0.5258438818565401, + "grad_norm": 0.9673369526863098, + "learning_rate": 0.0007004518389339893, + "loss": 1.4879, + "step": 4985 + }, + { + "epoch": 0.5259493670886076, + "grad_norm": 0.8315147757530212, + "learning_rate": 0.0007002013305708598, + "loss": 1.4606, + "step": 4986 + }, + { + "epoch": 0.5260548523206751, + "grad_norm": 0.986961841583252, + "learning_rate": 0.0006999508277879196, + "loss": 1.4535, + "step": 4987 + }, + { + "epoch": 0.5261603375527426, + "grad_norm": 0.9178986549377441, + "learning_rate": 0.0006997003306132386, + "loss": 1.4936, + "step": 4988 + }, + { + "epoch": 0.5262658227848102, + "grad_norm": 0.783030092716217, + "learning_rate": 0.0006994498390748865, + "loss": 1.52, + "step": 4989 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.7092655301094055, + "learning_rate": 0.0006991993532009319, + "loss": 1.483, + "step": 4990 + }, + { + "epoch": 0.5264767932489451, + "grad_norm": 0.85148024559021, + "learning_rate": 0.0006989488730194432, + "loss": 1.4897, + "step": 4991 + }, + { + "epoch": 0.5265822784810127, + "grad_norm": 0.6566258072853088, + "learning_rate": 0.0006986983985584874, + "loss": 1.481, + "step": 4992 + }, + { + "epoch": 0.5266877637130801, + "grad_norm": 0.7801067233085632, + "learning_rate": 0.0006984479298461323, + "loss": 1.5004, + "step": 4993 + }, + { + "epoch": 0.5267932489451477, + "grad_norm": 0.7775512933731079, + "learning_rate": 0.0006981974669104436, + "loss": 1.533, + "step": 4994 + }, + { + "epoch": 0.5268987341772152, + "grad_norm": 0.8983667492866516, + "learning_rate": 0.0006979470097794871, + "loss": 1.4984, + "step": 4995 + }, + { + "epoch": 0.5270042194092827, + "grad_norm": 0.7169232368469238, + "learning_rate": 0.0006976965584813277, + "loss": 1.4913, + "step": 4996 + }, + { + "epoch": 0.5271097046413502, + "grad_norm": 0.8992185592651367, + "learning_rate": 0.0006974461130440298, + "loss": 1.4635, + "step": 4997 + }, + { + "epoch": 0.5272151898734178, + "grad_norm": 0.7260494828224182, + "learning_rate": 0.0006971956734956569, + "loss": 1.5083, + "step": 4998 + }, + { + "epoch": 0.5273206751054852, + "grad_norm": 0.9225859642028809, + "learning_rate": 0.0006969452398642721, + "loss": 1.5286, + "step": 4999 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.6995916366577148, + "learning_rate": 0.0006966948121779378, + "loss": 1.4781, + "step": 5000 + }, + { + "epoch": 0.5275316455696203, + "grad_norm": 0.9662778377532959, + "learning_rate": 0.0006964443904647152, + "loss": 1.5175, + "step": 5001 + }, + { + "epoch": 0.5276371308016877, + "grad_norm": 0.7372615337371826, + "learning_rate": 0.0006961939747526661, + "loss": 1.5156, + "step": 5002 + }, + { + "epoch": 0.5277426160337553, + "grad_norm": 0.8507598042488098, + "learning_rate": 0.0006959435650698504, + "loss": 1.4904, + "step": 5003 + }, + { + "epoch": 0.5278481012658228, + "grad_norm": 0.8907076716423035, + "learning_rate": 0.0006956931614443278, + "loss": 1.4823, + "step": 5004 + }, + { + "epoch": 0.5279535864978903, + "grad_norm": 0.7125250697135925, + "learning_rate": 0.0006954427639041572, + "loss": 1.4724, + "step": 5005 + }, + { + "epoch": 0.5280590717299578, + "grad_norm": 1.010608434677124, + "learning_rate": 0.000695192372477397, + "loss": 1.4455, + "step": 5006 + }, + { + "epoch": 0.5281645569620254, + "grad_norm": 0.7226719856262207, + "learning_rate": 0.0006949419871921047, + "loss": 1.4766, + "step": 5007 + }, + { + "epoch": 0.5282700421940928, + "grad_norm": 0.7989937663078308, + "learning_rate": 0.0006946916080763373, + "loss": 1.5051, + "step": 5008 + }, + { + "epoch": 0.5283755274261603, + "grad_norm": 0.7716633081436157, + "learning_rate": 0.0006944412351581506, + "loss": 1.4874, + "step": 5009 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.9454348683357239, + "learning_rate": 0.000694190868465601, + "loss": 1.492, + "step": 5010 + }, + { + "epoch": 0.5285864978902953, + "grad_norm": 0.6563282608985901, + "learning_rate": 0.0006939405080267428, + "loss": 1.4691, + "step": 5011 + }, + { + "epoch": 0.5286919831223629, + "grad_norm": 0.9610829949378967, + "learning_rate": 0.0006936901538696303, + "loss": 1.5114, + "step": 5012 + }, + { + "epoch": 0.5287974683544304, + "grad_norm": 0.83012455701828, + "learning_rate": 0.0006934398060223168, + "loss": 1.4784, + "step": 5013 + }, + { + "epoch": 0.5289029535864979, + "grad_norm": 0.8809906244277954, + "learning_rate": 0.0006931894645128551, + "loss": 1.4777, + "step": 5014 + }, + { + "epoch": 0.5290084388185654, + "grad_norm": 1.0176689624786377, + "learning_rate": 0.0006929391293692972, + "loss": 1.4275, + "step": 5015 + }, + { + "epoch": 0.529113924050633, + "grad_norm": 0.7875590324401855, + "learning_rate": 0.0006926888006196944, + "loss": 1.516, + "step": 5016 + }, + { + "epoch": 0.5292194092827004, + "grad_norm": 0.9864358901977539, + "learning_rate": 0.0006924384782920971, + "loss": 1.5105, + "step": 5017 + }, + { + "epoch": 0.5293248945147679, + "grad_norm": 0.7002938985824585, + "learning_rate": 0.0006921881624145554, + "loss": 1.4799, + "step": 5018 + }, + { + "epoch": 0.5294303797468355, + "grad_norm": 0.9035311341285706, + "learning_rate": 0.0006919378530151182, + "loss": 1.472, + "step": 5019 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.7270147204399109, + "learning_rate": 0.0006916875501218343, + "loss": 1.5073, + "step": 5020 + }, + { + "epoch": 0.5296413502109705, + "grad_norm": 0.9165782928466797, + "learning_rate": 0.0006914372537627512, + "loss": 1.4679, + "step": 5021 + }, + { + "epoch": 0.529746835443038, + "grad_norm": 0.7587229609489441, + "learning_rate": 0.0006911869639659159, + "loss": 1.4784, + "step": 5022 + }, + { + "epoch": 0.5298523206751055, + "grad_norm": 0.8500072956085205, + "learning_rate": 0.0006909366807593744, + "loss": 1.4754, + "step": 5023 + }, + { + "epoch": 0.529957805907173, + "grad_norm": 0.8956575393676758, + "learning_rate": 0.0006906864041711725, + "loss": 1.475, + "step": 5024 + }, + { + "epoch": 0.5300632911392406, + "grad_norm": 0.6899866461753845, + "learning_rate": 0.0006904361342293546, + "loss": 1.457, + "step": 5025 + }, + { + "epoch": 0.530168776371308, + "grad_norm": 0.7745136022567749, + "learning_rate": 0.000690185870961965, + "loss": 1.4713, + "step": 5026 + }, + { + "epoch": 0.5302742616033755, + "grad_norm": 0.6635140180587769, + "learning_rate": 0.0006899356143970467, + "loss": 1.4983, + "step": 5027 + }, + { + "epoch": 0.5303797468354431, + "grad_norm": 0.8142228722572327, + "learning_rate": 0.0006896853645626424, + "loss": 1.4696, + "step": 5028 + }, + { + "epoch": 0.5304852320675105, + "grad_norm": 0.6638959646224976, + "learning_rate": 0.0006894351214867937, + "loss": 1.4385, + "step": 5029 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.8390682339668274, + "learning_rate": 0.0006891848851975416, + "loss": 1.4472, + "step": 5030 + }, + { + "epoch": 0.5306962025316456, + "grad_norm": 0.717221200466156, + "learning_rate": 0.0006889346557229265, + "loss": 1.4955, + "step": 5031 + }, + { + "epoch": 0.5308016877637131, + "grad_norm": 0.732249915599823, + "learning_rate": 0.0006886844330909877, + "loss": 1.5089, + "step": 5032 + }, + { + "epoch": 0.5309071729957806, + "grad_norm": 0.7288565635681152, + "learning_rate": 0.0006884342173297639, + "loss": 1.495, + "step": 5033 + }, + { + "epoch": 0.5310126582278482, + "grad_norm": 0.7756404280662537, + "learning_rate": 0.000688184008467293, + "loss": 1.4875, + "step": 5034 + }, + { + "epoch": 0.5311181434599156, + "grad_norm": 0.6772254705429077, + "learning_rate": 0.0006879338065316122, + "loss": 1.4961, + "step": 5035 + }, + { + "epoch": 0.5312236286919831, + "grad_norm": 0.6439512968063354, + "learning_rate": 0.0006876836115507579, + "loss": 1.4921, + "step": 5036 + }, + { + "epoch": 0.5313291139240506, + "grad_norm": 0.8260946273803711, + "learning_rate": 0.0006874334235527657, + "loss": 1.4639, + "step": 5037 + }, + { + "epoch": 0.5314345991561181, + "grad_norm": 0.8349964022636414, + "learning_rate": 0.0006871832425656702, + "loss": 1.5292, + "step": 5038 + }, + { + "epoch": 0.5315400843881857, + "grad_norm": 0.7731090784072876, + "learning_rate": 0.0006869330686175058, + "loss": 1.4919, + "step": 5039 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.7145270109176636, + "learning_rate": 0.0006866829017363054, + "loss": 1.4666, + "step": 5040 + }, + { + "epoch": 0.5317510548523207, + "grad_norm": 0.8825110197067261, + "learning_rate": 0.0006864327419501017, + "loss": 1.4908, + "step": 5041 + }, + { + "epoch": 0.5318565400843882, + "grad_norm": 0.7279706597328186, + "learning_rate": 0.0006861825892869262, + "loss": 1.4884, + "step": 5042 + }, + { + "epoch": 0.5319620253164556, + "grad_norm": 1.274801254272461, + "learning_rate": 0.0006859324437748099, + "loss": 1.4568, + "step": 5043 + }, + { + "epoch": 0.5320675105485232, + "grad_norm": 0.6786714792251587, + "learning_rate": 0.0006856823054417825, + "loss": 1.4913, + "step": 5044 + }, + { + "epoch": 0.5321729957805907, + "grad_norm": 0.9673475027084351, + "learning_rate": 0.0006854321743158737, + "loss": 1.4633, + "step": 5045 + }, + { + "epoch": 0.5322784810126582, + "grad_norm": 0.6984145045280457, + "learning_rate": 0.0006851820504251117, + "loss": 1.5243, + "step": 5046 + }, + { + "epoch": 0.5323839662447257, + "grad_norm": 0.806885838508606, + "learning_rate": 0.0006849319337975242, + "loss": 1.4778, + "step": 5047 + }, + { + "epoch": 0.5324894514767933, + "grad_norm": 0.7729060053825378, + "learning_rate": 0.0006846818244611376, + "loss": 1.4422, + "step": 5048 + }, + { + "epoch": 0.5325949367088607, + "grad_norm": 0.7141607999801636, + "learning_rate": 0.0006844317224439788, + "loss": 1.5024, + "step": 5049 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.7889525294303894, + "learning_rate": 0.0006841816277740722, + "loss": 1.475, + "step": 5050 + }, + { + "epoch": 0.5328059071729958, + "grad_norm": 0.7088819742202759, + "learning_rate": 0.0006839315404794424, + "loss": 1.5023, + "step": 5051 + }, + { + "epoch": 0.5329113924050632, + "grad_norm": 0.9359884858131409, + "learning_rate": 0.0006836814605881131, + "loss": 1.4968, + "step": 5052 + }, + { + "epoch": 0.5330168776371308, + "grad_norm": 1.0335006713867188, + "learning_rate": 0.0006834313881281066, + "loss": 1.4966, + "step": 5053 + }, + { + "epoch": 0.5331223628691983, + "grad_norm": 0.6887444853782654, + "learning_rate": 0.0006831813231274451, + "loss": 1.4937, + "step": 5054 + }, + { + "epoch": 0.5332278481012658, + "grad_norm": 0.7712413668632507, + "learning_rate": 0.0006829312656141496, + "loss": 1.4922, + "step": 5055 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.7357195615768433, + "learning_rate": 0.0006826812156162401, + "loss": 1.4882, + "step": 5056 + }, + { + "epoch": 0.5334388185654009, + "grad_norm": 0.7103298306465149, + "learning_rate": 0.0006824311731617363, + "loss": 1.5077, + "step": 5057 + }, + { + "epoch": 0.5335443037974683, + "grad_norm": 0.6788186430931091, + "learning_rate": 0.0006821811382786561, + "loss": 1.5002, + "step": 5058 + }, + { + "epoch": 0.5336497890295359, + "grad_norm": 0.7933992743492126, + "learning_rate": 0.0006819311109950177, + "loss": 1.4755, + "step": 5059 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.7106105089187622, + "learning_rate": 0.0006816810913388379, + "loss": 1.4758, + "step": 5060 + }, + { + "epoch": 0.5338607594936708, + "grad_norm": 0.7963417172431946, + "learning_rate": 0.0006814310793381322, + "loss": 1.5219, + "step": 5061 + }, + { + "epoch": 0.5339662447257384, + "grad_norm": 0.7078157663345337, + "learning_rate": 0.0006811810750209161, + "loss": 1.4638, + "step": 5062 + }, + { + "epoch": 0.5340717299578059, + "grad_norm": 0.891534686088562, + "learning_rate": 0.0006809310784152039, + "loss": 1.4701, + "step": 5063 + }, + { + "epoch": 0.5341772151898734, + "grad_norm": 0.7644140720367432, + "learning_rate": 0.0006806810895490087, + "loss": 1.527, + "step": 5064 + }, + { + "epoch": 0.5342827004219409, + "grad_norm": 0.7781749963760376, + "learning_rate": 0.000680431108450343, + "loss": 1.4611, + "step": 5065 + }, + { + "epoch": 0.5343881856540085, + "grad_norm": 0.6952613592147827, + "learning_rate": 0.0006801811351472185, + "loss": 1.4896, + "step": 5066 + }, + { + "epoch": 0.5344936708860759, + "grad_norm": 0.7075911164283752, + "learning_rate": 0.000679931169667646, + "loss": 1.4613, + "step": 5067 + }, + { + "epoch": 0.5345991561181435, + "grad_norm": 0.7935552597045898, + "learning_rate": 0.0006796812120396351, + "loss": 1.5118, + "step": 5068 + }, + { + "epoch": 0.534704641350211, + "grad_norm": 0.6896457076072693, + "learning_rate": 0.0006794312622911953, + "loss": 1.463, + "step": 5069 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.8995296359062195, + "learning_rate": 0.0006791813204503342, + "loss": 1.4439, + "step": 5070 + }, + { + "epoch": 0.534915611814346, + "grad_norm": 0.757788360118866, + "learning_rate": 0.0006789313865450594, + "loss": 1.4785, + "step": 5071 + }, + { + "epoch": 0.5350210970464135, + "grad_norm": 0.738272488117218, + "learning_rate": 0.0006786814606033773, + "loss": 1.4991, + "step": 5072 + }, + { + "epoch": 0.535126582278481, + "grad_norm": 0.7208561301231384, + "learning_rate": 0.0006784315426532929, + "loss": 1.4667, + "step": 5073 + }, + { + "epoch": 0.5352320675105485, + "grad_norm": 0.9136431813240051, + "learning_rate": 0.0006781816327228112, + "loss": 1.5248, + "step": 5074 + }, + { + "epoch": 0.5353375527426161, + "grad_norm": 0.7857372164726257, + "learning_rate": 0.0006779317308399357, + "loss": 1.5192, + "step": 5075 + }, + { + "epoch": 0.5354430379746835, + "grad_norm": 0.9343454837799072, + "learning_rate": 0.000677681837032669, + "loss": 1.4922, + "step": 5076 + }, + { + "epoch": 0.5355485232067511, + "grad_norm": 0.7939678430557251, + "learning_rate": 0.0006774319513290132, + "loss": 1.491, + "step": 5077 + }, + { + "epoch": 0.5356540084388186, + "grad_norm": 1.159944772720337, + "learning_rate": 0.0006771820737569689, + "loss": 1.509, + "step": 5078 + }, + { + "epoch": 0.535759493670886, + "grad_norm": 0.7339890599250793, + "learning_rate": 0.0006769322043445363, + "loss": 1.4749, + "step": 5079 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.947131872177124, + "learning_rate": 0.0006766823431197147, + "loss": 1.4909, + "step": 5080 + }, + { + "epoch": 0.5359704641350211, + "grad_norm": 0.7686026692390442, + "learning_rate": 0.0006764324901105022, + "loss": 1.4692, + "step": 5081 + }, + { + "epoch": 0.5360759493670886, + "grad_norm": 0.9852056503295898, + "learning_rate": 0.000676182645344896, + "loss": 1.4991, + "step": 5082 + }, + { + "epoch": 0.5361814345991561, + "grad_norm": 0.7485371232032776, + "learning_rate": 0.0006759328088508925, + "loss": 1.4634, + "step": 5083 + }, + { + "epoch": 0.5362869198312237, + "grad_norm": 1.212648868560791, + "learning_rate": 0.0006756829806564872, + "loss": 1.5084, + "step": 5084 + }, + { + "epoch": 0.5363924050632911, + "grad_norm": 0.7749150991439819, + "learning_rate": 0.0006754331607896742, + "loss": 1.5024, + "step": 5085 + }, + { + "epoch": 0.5364978902953587, + "grad_norm": 0.8907735347747803, + "learning_rate": 0.0006751833492784476, + "loss": 1.4382, + "step": 5086 + }, + { + "epoch": 0.5366033755274262, + "grad_norm": 0.7190759778022766, + "learning_rate": 0.0006749335461507995, + "loss": 1.4398, + "step": 5087 + }, + { + "epoch": 0.5367088607594936, + "grad_norm": 0.9892345666885376, + "learning_rate": 0.000674683751434722, + "loss": 1.5224, + "step": 5088 + }, + { + "epoch": 0.5368143459915612, + "grad_norm": 0.8689709305763245, + "learning_rate": 0.0006744339651582059, + "loss": 1.4955, + "step": 5089 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.7691560983657837, + "learning_rate": 0.0006741841873492406, + "loss": 1.5205, + "step": 5090 + }, + { + "epoch": 0.5370253164556962, + "grad_norm": 0.7902110815048218, + "learning_rate": 0.0006739344180358153, + "loss": 1.5104, + "step": 5091 + }, + { + "epoch": 0.5371308016877637, + "grad_norm": 0.8433301448822021, + "learning_rate": 0.0006736846572459178, + "loss": 1.4112, + "step": 5092 + }, + { + "epoch": 0.5372362869198313, + "grad_norm": 0.8191484212875366, + "learning_rate": 0.0006734349050075348, + "loss": 1.459, + "step": 5093 + }, + { + "epoch": 0.5373417721518987, + "grad_norm": 1.0484364032745361, + "learning_rate": 0.0006731851613486526, + "loss": 1.45, + "step": 5094 + }, + { + "epoch": 0.5374472573839663, + "grad_norm": 0.9719486832618713, + "learning_rate": 0.0006729354262972561, + "loss": 1.4601, + "step": 5095 + }, + { + "epoch": 0.5375527426160338, + "grad_norm": 0.9014251828193665, + "learning_rate": 0.0006726856998813291, + "loss": 1.4715, + "step": 5096 + }, + { + "epoch": 0.5376582278481012, + "grad_norm": 1.0964349508285522, + "learning_rate": 0.0006724359821288552, + "loss": 1.5, + "step": 5097 + }, + { + "epoch": 0.5377637130801688, + "grad_norm": 0.8122009038925171, + "learning_rate": 0.0006721862730678164, + "loss": 1.4795, + "step": 5098 + }, + { + "epoch": 0.5378691983122363, + "grad_norm": 1.071975827217102, + "learning_rate": 0.0006719365727261935, + "loss": 1.4938, + "step": 5099 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.7659344673156738, + "learning_rate": 0.0006716868811319671, + "loss": 1.4411, + "step": 5100 + }, + { + "epoch": 0.5380801687763713, + "grad_norm": 1.092244029045105, + "learning_rate": 0.000671437198313116, + "loss": 1.4966, + "step": 5101 + }, + { + "epoch": 0.5381856540084389, + "grad_norm": 0.7428788542747498, + "learning_rate": 0.0006711875242976187, + "loss": 1.5061, + "step": 5102 + }, + { + "epoch": 0.5382911392405063, + "grad_norm": 1.1785001754760742, + "learning_rate": 0.0006709378591134523, + "loss": 1.4752, + "step": 5103 + }, + { + "epoch": 0.5383966244725739, + "grad_norm": 0.7516124248504639, + "learning_rate": 0.0006706882027885929, + "loss": 1.4613, + "step": 5104 + }, + { + "epoch": 0.5385021097046413, + "grad_norm": 1.13914155960083, + "learning_rate": 0.0006704385553510156, + "loss": 1.4479, + "step": 5105 + }, + { + "epoch": 0.5386075949367088, + "grad_norm": 0.6972185969352722, + "learning_rate": 0.0006701889168286953, + "loss": 1.4834, + "step": 5106 + }, + { + "epoch": 0.5387130801687764, + "grad_norm": 1.0961488485336304, + "learning_rate": 0.0006699392872496048, + "loss": 1.516, + "step": 5107 + }, + { + "epoch": 0.5388185654008438, + "grad_norm": 0.6824047565460205, + "learning_rate": 0.0006696896666417163, + "loss": 1.4824, + "step": 5108 + }, + { + "epoch": 0.5389240506329114, + "grad_norm": 1.154653787612915, + "learning_rate": 0.0006694400550330013, + "loss": 1.5033, + "step": 5109 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.7211856842041016, + "learning_rate": 0.0006691904524514297, + "loss": 1.4343, + "step": 5110 + }, + { + "epoch": 0.5391350210970464, + "grad_norm": 0.9857209920883179, + "learning_rate": 0.0006689408589249709, + "loss": 1.4977, + "step": 5111 + }, + { + "epoch": 0.5392405063291139, + "grad_norm": 0.660964846611023, + "learning_rate": 0.000668691274481593, + "loss": 1.4742, + "step": 5112 + }, + { + "epoch": 0.5393459915611815, + "grad_norm": 1.0447288751602173, + "learning_rate": 0.0006684416991492629, + "loss": 1.5023, + "step": 5113 + }, + { + "epoch": 0.5394514767932489, + "grad_norm": 0.7421641945838928, + "learning_rate": 0.0006681921329559475, + "loss": 1.4643, + "step": 5114 + }, + { + "epoch": 0.5395569620253164, + "grad_norm": 0.8495551943778992, + "learning_rate": 0.0006679425759296114, + "loss": 1.465, + "step": 5115 + }, + { + "epoch": 0.539662447257384, + "grad_norm": 0.8149023056030273, + "learning_rate": 0.000667693028098219, + "loss": 1.4856, + "step": 5116 + }, + { + "epoch": 0.5397679324894514, + "grad_norm": 0.7089101672172546, + "learning_rate": 0.0006674434894897332, + "loss": 1.4753, + "step": 5117 + }, + { + "epoch": 0.539873417721519, + "grad_norm": 0.7087212800979614, + "learning_rate": 0.000667193960132116, + "loss": 1.5063, + "step": 5118 + }, + { + "epoch": 0.5399789029535865, + "grad_norm": 0.7299991846084595, + "learning_rate": 0.0006669444400533286, + "loss": 1.4714, + "step": 5119 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.7747888565063477, + "learning_rate": 0.0006666949292813306, + "loss": 1.4411, + "step": 5120 + }, + { + "epoch": 0.5401898734177215, + "grad_norm": 0.7998756170272827, + "learning_rate": 0.0006664454278440813, + "loss": 1.4687, + "step": 5121 + }, + { + "epoch": 0.5402953586497891, + "grad_norm": 0.7034708857536316, + "learning_rate": 0.0006661959357695382, + "loss": 1.458, + "step": 5122 + }, + { + "epoch": 0.5404008438818565, + "grad_norm": 0.6924930214881897, + "learning_rate": 0.0006659464530856587, + "loss": 1.4613, + "step": 5123 + }, + { + "epoch": 0.540506329113924, + "grad_norm": 0.7749452590942383, + "learning_rate": 0.0006656969798203982, + "loss": 1.4934, + "step": 5124 + }, + { + "epoch": 0.5406118143459916, + "grad_norm": 0.8213515281677246, + "learning_rate": 0.0006654475160017115, + "loss": 1.4791, + "step": 5125 + }, + { + "epoch": 0.540717299578059, + "grad_norm": 0.7085028290748596, + "learning_rate": 0.0006651980616575522, + "loss": 1.4662, + "step": 5126 + }, + { + "epoch": 0.5408227848101266, + "grad_norm": 0.7494063377380371, + "learning_rate": 0.0006649486168158731, + "loss": 1.4794, + "step": 5127 + }, + { + "epoch": 0.5409282700421941, + "grad_norm": 0.7235351800918579, + "learning_rate": 0.0006646991815046254, + "loss": 1.4746, + "step": 5128 + }, + { + "epoch": 0.5410337552742616, + "grad_norm": 0.8020064234733582, + "learning_rate": 0.0006644497557517599, + "loss": 1.4836, + "step": 5129 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.6768255233764648, + "learning_rate": 0.0006642003395852258, + "loss": 1.4852, + "step": 5130 + }, + { + "epoch": 0.5412447257383967, + "grad_norm": 0.7495554089546204, + "learning_rate": 0.0006639509330329713, + "loss": 1.5103, + "step": 5131 + }, + { + "epoch": 0.5413502109704641, + "grad_norm": 0.6898956894874573, + "learning_rate": 0.0006637015361229438, + "loss": 1.5254, + "step": 5132 + }, + { + "epoch": 0.5414556962025316, + "grad_norm": 0.7361966967582703, + "learning_rate": 0.0006634521488830898, + "loss": 1.4806, + "step": 5133 + }, + { + "epoch": 0.5415611814345992, + "grad_norm": 0.6952835917472839, + "learning_rate": 0.0006632027713413541, + "loss": 1.4875, + "step": 5134 + }, + { + "epoch": 0.5416666666666666, + "grad_norm": 0.6843955516815186, + "learning_rate": 0.0006629534035256805, + "loss": 1.4809, + "step": 5135 + }, + { + "epoch": 0.5417721518987342, + "grad_norm": 0.70630943775177, + "learning_rate": 0.0006627040454640123, + "loss": 1.4778, + "step": 5136 + }, + { + "epoch": 0.5418776371308017, + "grad_norm": 0.7190687656402588, + "learning_rate": 0.0006624546971842909, + "loss": 1.4914, + "step": 5137 + }, + { + "epoch": 0.5419831223628692, + "grad_norm": 0.6921412348747253, + "learning_rate": 0.0006622053587144572, + "loss": 1.4911, + "step": 5138 + }, + { + "epoch": 0.5420886075949367, + "grad_norm": 0.7236266136169434, + "learning_rate": 0.0006619560300824507, + "loss": 1.4948, + "step": 5139 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.6661948561668396, + "learning_rate": 0.0006617067113162103, + "loss": 1.4917, + "step": 5140 + }, + { + "epoch": 0.5422995780590717, + "grad_norm": 0.7157939672470093, + "learning_rate": 0.0006614574024436732, + "loss": 1.4653, + "step": 5141 + }, + { + "epoch": 0.5424050632911392, + "grad_norm": 0.810660183429718, + "learning_rate": 0.0006612081034927756, + "loss": 1.5081, + "step": 5142 + }, + { + "epoch": 0.5425105485232068, + "grad_norm": 0.7098639607429504, + "learning_rate": 0.0006609588144914528, + "loss": 1.5021, + "step": 5143 + }, + { + "epoch": 0.5426160337552742, + "grad_norm": 0.7729082107543945, + "learning_rate": 0.0006607095354676389, + "loss": 1.4754, + "step": 5144 + }, + { + "epoch": 0.5427215189873418, + "grad_norm": 0.7509157061576843, + "learning_rate": 0.0006604602664492667, + "loss": 1.4958, + "step": 5145 + }, + { + "epoch": 0.5428270042194093, + "grad_norm": 0.6635807752609253, + "learning_rate": 0.0006602110074642682, + "loss": 1.4731, + "step": 5146 + }, + { + "epoch": 0.5429324894514768, + "grad_norm": 0.7757813930511475, + "learning_rate": 0.000659961758540574, + "loss": 1.5083, + "step": 5147 + }, + { + "epoch": 0.5430379746835443, + "grad_norm": 0.6477483510971069, + "learning_rate": 0.0006597125197061133, + "loss": 1.4518, + "step": 5148 + }, + { + "epoch": 0.5431434599156119, + "grad_norm": 0.7474650740623474, + "learning_rate": 0.0006594632909888154, + "loss": 1.4769, + "step": 5149 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.693250834941864, + "learning_rate": 0.0006592140724166073, + "loss": 1.5099, + "step": 5150 + }, + { + "epoch": 0.5433544303797468, + "grad_norm": 0.738993227481842, + "learning_rate": 0.000658964864017415, + "loss": 1.4564, + "step": 5151 + }, + { + "epoch": 0.5434599156118144, + "grad_norm": 0.740841805934906, + "learning_rate": 0.0006587156658191635, + "loss": 1.509, + "step": 5152 + }, + { + "epoch": 0.5435654008438818, + "grad_norm": 0.6566944122314453, + "learning_rate": 0.0006584664778497771, + "loss": 1.4686, + "step": 5153 + }, + { + "epoch": 0.5436708860759494, + "grad_norm": 0.6618391871452332, + "learning_rate": 0.0006582173001371781, + "loss": 1.4944, + "step": 5154 + }, + { + "epoch": 0.5437763713080169, + "grad_norm": 0.7147175669670105, + "learning_rate": 0.0006579681327092883, + "loss": 1.5101, + "step": 5155 + }, + { + "epoch": 0.5438818565400844, + "grad_norm": 0.7144160866737366, + "learning_rate": 0.0006577189755940282, + "loss": 1.4476, + "step": 5156 + }, + { + "epoch": 0.5439873417721519, + "grad_norm": 0.7102494835853577, + "learning_rate": 0.0006574698288193166, + "loss": 1.4813, + "step": 5157 + }, + { + "epoch": 0.5440928270042195, + "grad_norm": 0.8813502192497253, + "learning_rate": 0.0006572206924130725, + "loss": 1.4655, + "step": 5158 + }, + { + "epoch": 0.5441983122362869, + "grad_norm": 0.9776658415794373, + "learning_rate": 0.0006569715664032124, + "loss": 1.5038, + "step": 5159 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.7501352429389954, + "learning_rate": 0.0006567224508176523, + "loss": 1.4781, + "step": 5160 + }, + { + "epoch": 0.544409282700422, + "grad_norm": 1.0027542114257812, + "learning_rate": 0.0006564733456843067, + "loss": 1.527, + "step": 5161 + }, + { + "epoch": 0.5445147679324894, + "grad_norm": 0.7884905934333801, + "learning_rate": 0.000656224251031089, + "loss": 1.5175, + "step": 5162 + }, + { + "epoch": 0.544620253164557, + "grad_norm": 0.8992710709571838, + "learning_rate": 0.0006559751668859115, + "loss": 1.4456, + "step": 5163 + }, + { + "epoch": 0.5447257383966245, + "grad_norm": 0.8819329738616943, + "learning_rate": 0.0006557260932766855, + "loss": 1.4743, + "step": 5164 + }, + { + "epoch": 0.544831223628692, + "grad_norm": 0.6978321671485901, + "learning_rate": 0.0006554770302313205, + "loss": 1.4844, + "step": 5165 + }, + { + "epoch": 0.5449367088607595, + "grad_norm": 1.1699320077896118, + "learning_rate": 0.0006552279777777258, + "loss": 1.4467, + "step": 5166 + }, + { + "epoch": 0.5450421940928271, + "grad_norm": 0.7149600386619568, + "learning_rate": 0.000654978935943809, + "loss": 1.4868, + "step": 5167 + }, + { + "epoch": 0.5451476793248945, + "grad_norm": 0.8753569722175598, + "learning_rate": 0.0006547299047574761, + "loss": 1.482, + "step": 5168 + }, + { + "epoch": 0.545253164556962, + "grad_norm": 0.6901388764381409, + "learning_rate": 0.0006544808842466324, + "loss": 1.5111, + "step": 5169 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.9631401896476746, + "learning_rate": 0.0006542318744391821, + "loss": 1.4345, + "step": 5170 + }, + { + "epoch": 0.545464135021097, + "grad_norm": 0.7004762887954712, + "learning_rate": 0.0006539828753630276, + "loss": 1.4597, + "step": 5171 + }, + { + "epoch": 0.5455696202531646, + "grad_norm": 1.0558565855026245, + "learning_rate": 0.0006537338870460708, + "loss": 1.4854, + "step": 5172 + }, + { + "epoch": 0.545675105485232, + "grad_norm": 0.7188917398452759, + "learning_rate": 0.000653484909516212, + "loss": 1.4935, + "step": 5173 + }, + { + "epoch": 0.5457805907172996, + "grad_norm": 1.083565354347229, + "learning_rate": 0.00065323594280135, + "loss": 1.5029, + "step": 5174 + }, + { + "epoch": 0.5458860759493671, + "grad_norm": 0.7155992984771729, + "learning_rate": 0.0006529869869293834, + "loss": 1.4859, + "step": 5175 + }, + { + "epoch": 0.5459915611814345, + "grad_norm": 0.9971915483474731, + "learning_rate": 0.0006527380419282088, + "loss": 1.4756, + "step": 5176 + }, + { + "epoch": 0.5460970464135021, + "grad_norm": 0.8197457790374756, + "learning_rate": 0.0006524891078257215, + "loss": 1.5128, + "step": 5177 + }, + { + "epoch": 0.5462025316455696, + "grad_norm": 0.8294790983200073, + "learning_rate": 0.000652240184649816, + "loss": 1.532, + "step": 5178 + }, + { + "epoch": 0.5463080168776371, + "grad_norm": 0.7423095703125, + "learning_rate": 0.0006519912724283851, + "loss": 1.4668, + "step": 5179 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 1.1634740829467773, + "learning_rate": 0.0006517423711893209, + "loss": 1.4942, + "step": 5180 + }, + { + "epoch": 0.5465189873417722, + "grad_norm": 0.9104419350624084, + "learning_rate": 0.000651493480960514, + "loss": 1.4793, + "step": 5181 + }, + { + "epoch": 0.5466244725738396, + "grad_norm": 0.7017368078231812, + "learning_rate": 0.0006512446017698537, + "loss": 1.4807, + "step": 5182 + }, + { + "epoch": 0.5467299578059072, + "grad_norm": 0.8891331553459167, + "learning_rate": 0.0006509957336452279, + "loss": 1.4593, + "step": 5183 + }, + { + "epoch": 0.5468354430379747, + "grad_norm": 0.6789868474006653, + "learning_rate": 0.0006507468766145242, + "loss": 1.4976, + "step": 5184 + }, + { + "epoch": 0.5469409282700421, + "grad_norm": 1.1358489990234375, + "learning_rate": 0.000650498030705628, + "loss": 1.4584, + "step": 5185 + }, + { + "epoch": 0.5470464135021097, + "grad_norm": 0.7007588148117065, + "learning_rate": 0.0006502491959464235, + "loss": 1.5134, + "step": 5186 + }, + { + "epoch": 0.5471518987341772, + "grad_norm": 0.9255052804946899, + "learning_rate": 0.000650000372364794, + "loss": 1.4604, + "step": 5187 + }, + { + "epoch": 0.5472573839662447, + "grad_norm": 0.7785059809684753, + "learning_rate": 0.0006497515599886214, + "loss": 1.448, + "step": 5188 + }, + { + "epoch": 0.5473628691983122, + "grad_norm": 1.190997838973999, + "learning_rate": 0.0006495027588457864, + "loss": 1.4835, + "step": 5189 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.7564184069633484, + "learning_rate": 0.0006492539689641685, + "loss": 1.4564, + "step": 5190 + }, + { + "epoch": 0.5475738396624472, + "grad_norm": 0.9942145943641663, + "learning_rate": 0.0006490051903716454, + "loss": 1.4844, + "step": 5191 + }, + { + "epoch": 0.5476793248945148, + "grad_norm": 0.8551633358001709, + "learning_rate": 0.0006487564230960944, + "loss": 1.4885, + "step": 5192 + }, + { + "epoch": 0.5477848101265823, + "grad_norm": 1.2636345624923706, + "learning_rate": 0.0006485076671653913, + "loss": 1.4837, + "step": 5193 + }, + { + "epoch": 0.5478902953586497, + "grad_norm": 1.0092867612838745, + "learning_rate": 0.00064825892260741, + "loss": 1.4953, + "step": 5194 + }, + { + "epoch": 0.5479957805907173, + "grad_norm": 1.3092275857925415, + "learning_rate": 0.0006480101894500239, + "loss": 1.4859, + "step": 5195 + }, + { + "epoch": 0.5481012658227848, + "grad_norm": 1.1395883560180664, + "learning_rate": 0.0006477614677211046, + "loss": 1.5099, + "step": 5196 + }, + { + "epoch": 0.5482067510548523, + "grad_norm": 0.9776188135147095, + "learning_rate": 0.0006475127574485226, + "loss": 1.4878, + "step": 5197 + }, + { + "epoch": 0.5483122362869198, + "grad_norm": 0.9127559065818787, + "learning_rate": 0.0006472640586601472, + "loss": 1.4593, + "step": 5198 + }, + { + "epoch": 0.5484177215189874, + "grad_norm": 0.7713063359260559, + "learning_rate": 0.0006470153713838463, + "loss": 1.4728, + "step": 5199 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 1.31419038772583, + "learning_rate": 0.0006467666956474865, + "loss": 1.4729, + "step": 5200 + }, + { + "epoch": 0.5486286919831224, + "grad_norm": 0.7310884594917297, + "learning_rate": 0.0006465180314789332, + "loss": 1.429, + "step": 5201 + }, + { + "epoch": 0.5487341772151899, + "grad_norm": 1.00730299949646, + "learning_rate": 0.0006462693789060505, + "loss": 1.4848, + "step": 5202 + }, + { + "epoch": 0.5488396624472573, + "grad_norm": 0.6948159337043762, + "learning_rate": 0.0006460207379567011, + "loss": 1.4635, + "step": 5203 + }, + { + "epoch": 0.5489451476793249, + "grad_norm": 0.9189753532409668, + "learning_rate": 0.0006457721086587468, + "loss": 1.4647, + "step": 5204 + }, + { + "epoch": 0.5490506329113924, + "grad_norm": 0.7569296956062317, + "learning_rate": 0.0006455234910400472, + "loss": 1.4839, + "step": 5205 + }, + { + "epoch": 0.5491561181434599, + "grad_norm": 0.9700836539268494, + "learning_rate": 0.0006452748851284615, + "loss": 1.4625, + "step": 5206 + }, + { + "epoch": 0.5492616033755274, + "grad_norm": 0.7953030467033386, + "learning_rate": 0.0006450262909518471, + "loss": 1.4476, + "step": 5207 + }, + { + "epoch": 0.549367088607595, + "grad_norm": 1.0490093231201172, + "learning_rate": 0.0006447777085380603, + "loss": 1.4969, + "step": 5208 + }, + { + "epoch": 0.5494725738396624, + "grad_norm": 0.7333338856697083, + "learning_rate": 0.0006445291379149556, + "loss": 1.4701, + "step": 5209 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 1.2612946033477783, + "learning_rate": 0.0006442805791103873, + "loss": 1.4726, + "step": 5210 + }, + { + "epoch": 0.5496835443037975, + "grad_norm": 0.7224448323249817, + "learning_rate": 0.0006440320321522071, + "loss": 1.4969, + "step": 5211 + }, + { + "epoch": 0.549789029535865, + "grad_norm": 0.834145188331604, + "learning_rate": 0.0006437834970682661, + "loss": 1.4325, + "step": 5212 + }, + { + "epoch": 0.5498945147679325, + "grad_norm": 0.9218828082084656, + "learning_rate": 0.000643534973886414, + "loss": 1.5152, + "step": 5213 + }, + { + "epoch": 0.55, + "grad_norm": 0.9875684380531311, + "learning_rate": 0.0006432864626344989, + "loss": 1.4895, + "step": 5214 + }, + { + "epoch": 0.5501054852320675, + "grad_norm": 0.8914507627487183, + "learning_rate": 0.0006430379633403679, + "loss": 1.491, + "step": 5215 + }, + { + "epoch": 0.550210970464135, + "grad_norm": 0.9490864276885986, + "learning_rate": 0.0006427894760318664, + "loss": 1.5077, + "step": 5216 + }, + { + "epoch": 0.5503164556962026, + "grad_norm": 0.9062446355819702, + "learning_rate": 0.0006425410007368385, + "loss": 1.4697, + "step": 5217 + }, + { + "epoch": 0.55042194092827, + "grad_norm": 1.1313064098358154, + "learning_rate": 0.0006422925374831275, + "loss": 1.478, + "step": 5218 + }, + { + "epoch": 0.5505274261603376, + "grad_norm": 0.7486379742622375, + "learning_rate": 0.0006420440862985748, + "loss": 1.5022, + "step": 5219 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.7803442478179932, + "learning_rate": 0.0006417956472110205, + "loss": 1.4404, + "step": 5220 + }, + { + "epoch": 0.5507383966244725, + "grad_norm": 0.6858295798301697, + "learning_rate": 0.0006415472202483034, + "loss": 1.4642, + "step": 5221 + }, + { + "epoch": 0.5508438818565401, + "grad_norm": 0.8390136957168579, + "learning_rate": 0.0006412988054382611, + "loss": 1.4494, + "step": 5222 + }, + { + "epoch": 0.5509493670886076, + "grad_norm": 0.6865503191947937, + "learning_rate": 0.0006410504028087297, + "loss": 1.4396, + "step": 5223 + }, + { + "epoch": 0.5510548523206751, + "grad_norm": 0.7971534132957458, + "learning_rate": 0.000640802012387544, + "loss": 1.5044, + "step": 5224 + }, + { + "epoch": 0.5511603375527426, + "grad_norm": 0.6986808776855469, + "learning_rate": 0.0006405536342025374, + "loss": 1.4735, + "step": 5225 + }, + { + "epoch": 0.5512658227848102, + "grad_norm": 0.7863610982894897, + "learning_rate": 0.0006403052682815415, + "loss": 1.4652, + "step": 5226 + }, + { + "epoch": 0.5513713080168776, + "grad_norm": 0.8569674491882324, + "learning_rate": 0.0006400569146523875, + "loss": 1.4428, + "step": 5227 + }, + { + "epoch": 0.5514767932489452, + "grad_norm": 0.9275884032249451, + "learning_rate": 0.0006398085733429045, + "loss": 1.5111, + "step": 5228 + }, + { + "epoch": 0.5515822784810127, + "grad_norm": 0.9435176253318787, + "learning_rate": 0.0006395602443809203, + "loss": 1.4858, + "step": 5229 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 1.065724492073059, + "learning_rate": 0.0006393119277942614, + "loss": 1.4993, + "step": 5230 + }, + { + "epoch": 0.5517932489451477, + "grad_norm": 0.9469255805015564, + "learning_rate": 0.0006390636236107528, + "loss": 1.5025, + "step": 5231 + }, + { + "epoch": 0.5518987341772152, + "grad_norm": 0.9543864727020264, + "learning_rate": 0.0006388153318582185, + "loss": 1.4704, + "step": 5232 + }, + { + "epoch": 0.5520042194092827, + "grad_norm": 0.7855944633483887, + "learning_rate": 0.0006385670525644806, + "loss": 1.4245, + "step": 5233 + }, + { + "epoch": 0.5521097046413502, + "grad_norm": 1.021822452545166, + "learning_rate": 0.0006383187857573601, + "loss": 1.4836, + "step": 5234 + }, + { + "epoch": 0.5522151898734177, + "grad_norm": 0.8253738284111023, + "learning_rate": 0.0006380705314646765, + "loss": 1.493, + "step": 5235 + }, + { + "epoch": 0.5523206751054852, + "grad_norm": 0.7123667597770691, + "learning_rate": 0.0006378222897142482, + "loss": 1.4871, + "step": 5236 + }, + { + "epoch": 0.5524261603375528, + "grad_norm": 0.6708317995071411, + "learning_rate": 0.0006375740605338916, + "loss": 1.474, + "step": 5237 + }, + { + "epoch": 0.5525316455696202, + "grad_norm": 0.7591674327850342, + "learning_rate": 0.0006373258439514221, + "loss": 1.4706, + "step": 5238 + }, + { + "epoch": 0.5526371308016877, + "grad_norm": 0.769538938999176, + "learning_rate": 0.0006370776399946536, + "loss": 1.4864, + "step": 5239 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6967486143112183, + "learning_rate": 0.0006368294486913987, + "loss": 1.5077, + "step": 5240 + }, + { + "epoch": 0.5528481012658227, + "grad_norm": 0.7365374565124512, + "learning_rate": 0.0006365812700694683, + "loss": 1.4835, + "step": 5241 + }, + { + "epoch": 0.5529535864978903, + "grad_norm": 0.6821744441986084, + "learning_rate": 0.0006363331041566723, + "loss": 1.46, + "step": 5242 + }, + { + "epoch": 0.5530590717299578, + "grad_norm": 0.8148869872093201, + "learning_rate": 0.0006360849509808184, + "loss": 1.4506, + "step": 5243 + }, + { + "epoch": 0.5531645569620253, + "grad_norm": 0.7089667916297913, + "learning_rate": 0.0006358368105697142, + "loss": 1.4515, + "step": 5244 + }, + { + "epoch": 0.5532700421940928, + "grad_norm": 0.753223180770874, + "learning_rate": 0.0006355886829511645, + "loss": 1.48, + "step": 5245 + }, + { + "epoch": 0.5533755274261604, + "grad_norm": 0.7921420931816101, + "learning_rate": 0.0006353405681529734, + "loss": 1.4808, + "step": 5246 + }, + { + "epoch": 0.5534810126582278, + "grad_norm": 0.725290834903717, + "learning_rate": 0.0006350924662029433, + "loss": 1.4783, + "step": 5247 + }, + { + "epoch": 0.5535864978902953, + "grad_norm": 0.8498547673225403, + "learning_rate": 0.0006348443771288755, + "loss": 1.4853, + "step": 5248 + }, + { + "epoch": 0.5536919831223629, + "grad_norm": 0.7660459280014038, + "learning_rate": 0.0006345963009585694, + "loss": 1.4511, + "step": 5249 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.793799102306366, + "learning_rate": 0.0006343482377198232, + "loss": 1.5027, + "step": 5250 + }, + { + "epoch": 0.5539029535864979, + "grad_norm": 0.9155524969100952, + "learning_rate": 0.0006341001874404335, + "loss": 1.5007, + "step": 5251 + }, + { + "epoch": 0.5540084388185654, + "grad_norm": 0.7684077024459839, + "learning_rate": 0.0006338521501481957, + "loss": 1.4649, + "step": 5252 + }, + { + "epoch": 0.5541139240506329, + "grad_norm": 0.8120620846748352, + "learning_rate": 0.0006336041258709039, + "loss": 1.4729, + "step": 5253 + }, + { + "epoch": 0.5542194092827004, + "grad_norm": 0.755851149559021, + "learning_rate": 0.0006333561146363502, + "loss": 1.4981, + "step": 5254 + }, + { + "epoch": 0.554324894514768, + "grad_norm": 0.9055152535438538, + "learning_rate": 0.0006331081164723253, + "loss": 1.4988, + "step": 5255 + }, + { + "epoch": 0.5544303797468354, + "grad_norm": 0.7983907461166382, + "learning_rate": 0.000632860131406619, + "loss": 1.4406, + "step": 5256 + }, + { + "epoch": 0.554535864978903, + "grad_norm": 0.7414277195930481, + "learning_rate": 0.0006326121594670191, + "loss": 1.4877, + "step": 5257 + }, + { + "epoch": 0.5546413502109705, + "grad_norm": 0.6863685250282288, + "learning_rate": 0.000632364200681312, + "loss": 1.4798, + "step": 5258 + }, + { + "epoch": 0.5547468354430379, + "grad_norm": 0.9664469957351685, + "learning_rate": 0.0006321162550772829, + "loss": 1.5025, + "step": 5259 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.6980290412902832, + "learning_rate": 0.0006318683226827151, + "loss": 1.4571, + "step": 5260 + }, + { + "epoch": 0.554957805907173, + "grad_norm": 0.9902487397193909, + "learning_rate": 0.0006316204035253906, + "loss": 1.4875, + "step": 5261 + }, + { + "epoch": 0.5550632911392405, + "grad_norm": 0.744840145111084, + "learning_rate": 0.0006313724976330904, + "loss": 1.4863, + "step": 5262 + }, + { + "epoch": 0.555168776371308, + "grad_norm": 1.1101820468902588, + "learning_rate": 0.0006311246050335934, + "loss": 1.4831, + "step": 5263 + }, + { + "epoch": 0.5552742616033756, + "grad_norm": 0.6528719067573547, + "learning_rate": 0.0006308767257546772, + "loss": 1.4546, + "step": 5264 + }, + { + "epoch": 0.555379746835443, + "grad_norm": 1.1360563039779663, + "learning_rate": 0.0006306288598241179, + "loss": 1.4802, + "step": 5265 + }, + { + "epoch": 0.5554852320675105, + "grad_norm": 0.7205590605735779, + "learning_rate": 0.00063038100726969, + "loss": 1.4758, + "step": 5266 + }, + { + "epoch": 0.5555907172995781, + "grad_norm": 1.1234019994735718, + "learning_rate": 0.0006301331681191668, + "loss": 1.4983, + "step": 5267 + }, + { + "epoch": 0.5556962025316455, + "grad_norm": 0.6615250706672668, + "learning_rate": 0.0006298853424003199, + "loss": 1.4959, + "step": 5268 + }, + { + "epoch": 0.5558016877637131, + "grad_norm": 0.9485765695571899, + "learning_rate": 0.0006296375301409187, + "loss": 1.4906, + "step": 5269 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.757503092288971, + "learning_rate": 0.0006293897313687331, + "loss": 1.4876, + "step": 5270 + }, + { + "epoch": 0.5560126582278481, + "grad_norm": 0.809901237487793, + "learning_rate": 0.0006291419461115293, + "loss": 1.4728, + "step": 5271 + }, + { + "epoch": 0.5561181434599156, + "grad_norm": 0.7528597712516785, + "learning_rate": 0.0006288941743970732, + "loss": 1.4988, + "step": 5272 + }, + { + "epoch": 0.5562236286919832, + "grad_norm": 0.9289854764938354, + "learning_rate": 0.0006286464162531287, + "loss": 1.444, + "step": 5273 + }, + { + "epoch": 0.5563291139240506, + "grad_norm": 0.9187638759613037, + "learning_rate": 0.0006283986717074585, + "loss": 1.4415, + "step": 5274 + }, + { + "epoch": 0.5564345991561181, + "grad_norm": 0.817061185836792, + "learning_rate": 0.0006281509407878232, + "loss": 1.4606, + "step": 5275 + }, + { + "epoch": 0.5565400843881857, + "grad_norm": 0.9118360877037048, + "learning_rate": 0.0006279032235219829, + "loss": 1.4896, + "step": 5276 + }, + { + "epoch": 0.5566455696202531, + "grad_norm": 0.6470775008201599, + "learning_rate": 0.0006276555199376951, + "loss": 1.5004, + "step": 5277 + }, + { + "epoch": 0.5567510548523207, + "grad_norm": 0.7738702297210693, + "learning_rate": 0.000627407830062716, + "loss": 1.4693, + "step": 5278 + }, + { + "epoch": 0.5568565400843882, + "grad_norm": 0.6656195521354675, + "learning_rate": 0.0006271601539248012, + "loss": 1.4833, + "step": 5279 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.7167867422103882, + "learning_rate": 0.0006269124915517037, + "loss": 1.4694, + "step": 5280 + }, + { + "epoch": 0.5570675105485232, + "grad_norm": 0.6639468669891357, + "learning_rate": 0.0006266648429711753, + "loss": 1.4861, + "step": 5281 + }, + { + "epoch": 0.5571729957805908, + "grad_norm": 0.7603502869606018, + "learning_rate": 0.0006264172082109661, + "loss": 1.4607, + "step": 5282 + }, + { + "epoch": 0.5572784810126582, + "grad_norm": 0.696189820766449, + "learning_rate": 0.0006261695872988252, + "loss": 1.4511, + "step": 5283 + }, + { + "epoch": 0.5573839662447257, + "grad_norm": 0.9003666043281555, + "learning_rate": 0.0006259219802624994, + "loss": 1.4808, + "step": 5284 + }, + { + "epoch": 0.5574894514767933, + "grad_norm": 0.8116178512573242, + "learning_rate": 0.0006256743871297344, + "loss": 1.4661, + "step": 5285 + }, + { + "epoch": 0.5575949367088607, + "grad_norm": 1.022573709487915, + "learning_rate": 0.0006254268079282743, + "loss": 1.4688, + "step": 5286 + }, + { + "epoch": 0.5577004219409283, + "grad_norm": 0.7954180836677551, + "learning_rate": 0.0006251792426858612, + "loss": 1.4575, + "step": 5287 + }, + { + "epoch": 0.5578059071729958, + "grad_norm": 1.322927474975586, + "learning_rate": 0.0006249316914302368, + "loss": 1.4828, + "step": 5288 + }, + { + "epoch": 0.5579113924050633, + "grad_norm": 0.7957351207733154, + "learning_rate": 0.0006246841541891399, + "loss": 1.4986, + "step": 5289 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.8903948664665222, + "learning_rate": 0.0006244366309903084, + "loss": 1.5104, + "step": 5290 + }, + { + "epoch": 0.5581223628691984, + "grad_norm": 0.684565007686615, + "learning_rate": 0.0006241891218614786, + "loss": 1.4917, + "step": 5291 + }, + { + "epoch": 0.5582278481012658, + "grad_norm": 0.8277559280395508, + "learning_rate": 0.0006239416268303849, + "loss": 1.4424, + "step": 5292 + }, + { + "epoch": 0.5583333333333333, + "grad_norm": 0.7457320094108582, + "learning_rate": 0.0006236941459247606, + "loss": 1.4745, + "step": 5293 + }, + { + "epoch": 0.5584388185654009, + "grad_norm": 0.7179293632507324, + "learning_rate": 0.0006234466791723371, + "loss": 1.4876, + "step": 5294 + }, + { + "epoch": 0.5585443037974683, + "grad_norm": 0.6766775250434875, + "learning_rate": 0.0006231992266008438, + "loss": 1.4697, + "step": 5295 + }, + { + "epoch": 0.5586497890295359, + "grad_norm": 0.7802527546882629, + "learning_rate": 0.00062295178823801, + "loss": 1.5008, + "step": 5296 + }, + { + "epoch": 0.5587552742616034, + "grad_norm": 0.6481279134750366, + "learning_rate": 0.0006227043641115616, + "loss": 1.4682, + "step": 5297 + }, + { + "epoch": 0.5588607594936709, + "grad_norm": 0.8656459450721741, + "learning_rate": 0.0006224569542492241, + "loss": 1.454, + "step": 5298 + }, + { + "epoch": 0.5589662447257384, + "grad_norm": 0.8448770642280579, + "learning_rate": 0.0006222095586787208, + "loss": 1.5032, + "step": 5299 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.6833620071411133, + "learning_rate": 0.0006219621774277737, + "loss": 1.4998, + "step": 5300 + }, + { + "epoch": 0.5591772151898734, + "grad_norm": 0.810914933681488, + "learning_rate": 0.000621714810524103, + "loss": 1.4587, + "step": 5301 + }, + { + "epoch": 0.559282700421941, + "grad_norm": 0.6997657418251038, + "learning_rate": 0.0006214674579954276, + "loss": 1.4646, + "step": 5302 + }, + { + "epoch": 0.5593881856540084, + "grad_norm": 0.7127995491027832, + "learning_rate": 0.0006212201198694643, + "loss": 1.4745, + "step": 5303 + }, + { + "epoch": 0.5594936708860759, + "grad_norm": 0.7908846139907837, + "learning_rate": 0.0006209727961739286, + "loss": 1.4618, + "step": 5304 + }, + { + "epoch": 0.5595991561181435, + "grad_norm": 0.663141131401062, + "learning_rate": 0.0006207254869365346, + "loss": 1.483, + "step": 5305 + }, + { + "epoch": 0.5597046413502109, + "grad_norm": 0.9019588232040405, + "learning_rate": 0.0006204781921849945, + "loss": 1.4855, + "step": 5306 + }, + { + "epoch": 0.5598101265822785, + "grad_norm": 0.7591596841812134, + "learning_rate": 0.0006202309119470188, + "loss": 1.4692, + "step": 5307 + }, + { + "epoch": 0.559915611814346, + "grad_norm": 0.7140185236930847, + "learning_rate": 0.0006199836462503166, + "loss": 1.4247, + "step": 5308 + }, + { + "epoch": 0.5600210970464135, + "grad_norm": 0.7816687226295471, + "learning_rate": 0.0006197363951225951, + "loss": 1.4608, + "step": 5309 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.8325383067131042, + "learning_rate": 0.00061948915859156, + "loss": 1.4639, + "step": 5310 + }, + { + "epoch": 0.5602320675105485, + "grad_norm": 0.6443870663642883, + "learning_rate": 0.0006192419366849155, + "loss": 1.4775, + "step": 5311 + }, + { + "epoch": 0.560337552742616, + "grad_norm": 0.7442277073860168, + "learning_rate": 0.0006189947294303641, + "loss": 1.4543, + "step": 5312 + }, + { + "epoch": 0.5604430379746835, + "grad_norm": 0.7018092274665833, + "learning_rate": 0.000618747536855606, + "loss": 1.4557, + "step": 5313 + }, + { + "epoch": 0.5605485232067511, + "grad_norm": 0.6747755408287048, + "learning_rate": 0.0006185003589883413, + "loss": 1.4456, + "step": 5314 + }, + { + "epoch": 0.5606540084388185, + "grad_norm": 0.7278077006340027, + "learning_rate": 0.0006182531958562672, + "loss": 1.4639, + "step": 5315 + }, + { + "epoch": 0.5607594936708861, + "grad_norm": 0.6897360682487488, + "learning_rate": 0.0006180060474870793, + "loss": 1.4899, + "step": 5316 + }, + { + "epoch": 0.5608649789029536, + "grad_norm": 0.6887032985687256, + "learning_rate": 0.0006177589139084721, + "loss": 1.4417, + "step": 5317 + }, + { + "epoch": 0.560970464135021, + "grad_norm": 0.6704647541046143, + "learning_rate": 0.000617511795148138, + "loss": 1.5017, + "step": 5318 + }, + { + "epoch": 0.5610759493670886, + "grad_norm": 0.6225866079330444, + "learning_rate": 0.0006172646912337678, + "loss": 1.4633, + "step": 5319 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.6642029285430908, + "learning_rate": 0.0006170176021930509, + "loss": 1.4667, + "step": 5320 + }, + { + "epoch": 0.5612869198312236, + "grad_norm": 0.6744901537895203, + "learning_rate": 0.0006167705280536745, + "loss": 1.5003, + "step": 5321 + }, + { + "epoch": 0.5613924050632911, + "grad_norm": 0.65027916431427, + "learning_rate": 0.000616523468843325, + "loss": 1.4979, + "step": 5322 + }, + { + "epoch": 0.5614978902953587, + "grad_norm": 0.6700526475906372, + "learning_rate": 0.0006162764245896863, + "loss": 1.4827, + "step": 5323 + }, + { + "epoch": 0.5616033755274261, + "grad_norm": 0.6623328328132629, + "learning_rate": 0.0006160293953204412, + "loss": 1.4825, + "step": 5324 + }, + { + "epoch": 0.5617088607594937, + "grad_norm": 0.7453864812850952, + "learning_rate": 0.0006157823810632704, + "loss": 1.4614, + "step": 5325 + }, + { + "epoch": 0.5618143459915612, + "grad_norm": 0.8167215585708618, + "learning_rate": 0.000615535381845853, + "loss": 1.4789, + "step": 5326 + }, + { + "epoch": 0.5619198312236287, + "grad_norm": 0.7174670696258545, + "learning_rate": 0.0006152883976958665, + "loss": 1.4409, + "step": 5327 + }, + { + "epoch": 0.5620253164556962, + "grad_norm": 0.679065465927124, + "learning_rate": 0.0006150414286409869, + "loss": 1.4467, + "step": 5328 + }, + { + "epoch": 0.5621308016877637, + "grad_norm": 0.7087657451629639, + "learning_rate": 0.0006147944747088881, + "loss": 1.4782, + "step": 5329 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.7968090772628784, + "learning_rate": 0.0006145475359272424, + "loss": 1.4882, + "step": 5330 + }, + { + "epoch": 0.5623417721518987, + "grad_norm": 0.7000648975372314, + "learning_rate": 0.0006143006123237208, + "loss": 1.4698, + "step": 5331 + }, + { + "epoch": 0.5624472573839663, + "grad_norm": 0.6643130779266357, + "learning_rate": 0.0006140537039259925, + "loss": 1.4697, + "step": 5332 + }, + { + "epoch": 0.5625527426160337, + "grad_norm": 0.6895706653594971, + "learning_rate": 0.0006138068107617244, + "loss": 1.4414, + "step": 5333 + }, + { + "epoch": 0.5626582278481013, + "grad_norm": 0.6775655746459961, + "learning_rate": 0.0006135599328585824, + "loss": 1.4808, + "step": 5334 + }, + { + "epoch": 0.5627637130801688, + "grad_norm": 0.6997766494750977, + "learning_rate": 0.0006133130702442302, + "loss": 1.4792, + "step": 5335 + }, + { + "epoch": 0.5628691983122363, + "grad_norm": 0.7264531850814819, + "learning_rate": 0.0006130662229463301, + "loss": 1.4802, + "step": 5336 + }, + { + "epoch": 0.5629746835443038, + "grad_norm": 0.6755335927009583, + "learning_rate": 0.0006128193909925425, + "loss": 1.4534, + "step": 5337 + }, + { + "epoch": 0.5630801687763713, + "grad_norm": 0.6640120148658752, + "learning_rate": 0.0006125725744105263, + "loss": 1.4425, + "step": 5338 + }, + { + "epoch": 0.5631856540084388, + "grad_norm": 0.7552067637443542, + "learning_rate": 0.000612325773227938, + "loss": 1.4766, + "step": 5339 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.760367214679718, + "learning_rate": 0.0006120789874724336, + "loss": 1.4733, + "step": 5340 + }, + { + "epoch": 0.5633966244725739, + "grad_norm": 0.6839481592178345, + "learning_rate": 0.0006118322171716665, + "loss": 1.5044, + "step": 5341 + }, + { + "epoch": 0.5635021097046413, + "grad_norm": 0.7115142941474915, + "learning_rate": 0.0006115854623532884, + "loss": 1.4627, + "step": 5342 + }, + { + "epoch": 0.5636075949367089, + "grad_norm": 0.729954719543457, + "learning_rate": 0.0006113387230449493, + "loss": 1.4778, + "step": 5343 + }, + { + "epoch": 0.5637130801687764, + "grad_norm": 0.6684756278991699, + "learning_rate": 0.0006110919992742978, + "loss": 1.4432, + "step": 5344 + }, + { + "epoch": 0.5638185654008439, + "grad_norm": 0.8582062721252441, + "learning_rate": 0.0006108452910689804, + "loss": 1.4886, + "step": 5345 + }, + { + "epoch": 0.5639240506329114, + "grad_norm": 0.7516006231307983, + "learning_rate": 0.0006105985984566421, + "loss": 1.4512, + "step": 5346 + }, + { + "epoch": 0.564029535864979, + "grad_norm": 0.8879760503768921, + "learning_rate": 0.0006103519214649256, + "loss": 1.4195, + "step": 5347 + }, + { + "epoch": 0.5641350210970464, + "grad_norm": 0.7130259275436401, + "learning_rate": 0.000610105260121473, + "loss": 1.4494, + "step": 5348 + }, + { + "epoch": 0.5642405063291139, + "grad_norm": 0.7352584004402161, + "learning_rate": 0.0006098586144539235, + "loss": 1.4705, + "step": 5349 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.6921730041503906, + "learning_rate": 0.0006096119844899151, + "loss": 1.4437, + "step": 5350 + }, + { + "epoch": 0.5644514767932489, + "grad_norm": 0.6637848019599915, + "learning_rate": 0.000609365370257084, + "loss": 1.4501, + "step": 5351 + }, + { + "epoch": 0.5645569620253165, + "grad_norm": 0.808843731880188, + "learning_rate": 0.0006091187717830643, + "loss": 1.4553, + "step": 5352 + }, + { + "epoch": 0.564662447257384, + "grad_norm": 0.7363366484642029, + "learning_rate": 0.0006088721890954887, + "loss": 1.4855, + "step": 5353 + }, + { + "epoch": 0.5647679324894515, + "grad_norm": 0.7284736633300781, + "learning_rate": 0.0006086256222219881, + "loss": 1.4686, + "step": 5354 + }, + { + "epoch": 0.564873417721519, + "grad_norm": 0.677895188331604, + "learning_rate": 0.0006083790711901915, + "loss": 1.4664, + "step": 5355 + }, + { + "epoch": 0.5649789029535865, + "grad_norm": 0.7404670119285583, + "learning_rate": 0.0006081325360277257, + "loss": 1.4447, + "step": 5356 + }, + { + "epoch": 0.565084388185654, + "grad_norm": 0.6962767243385315, + "learning_rate": 0.0006078860167622171, + "loss": 1.4609, + "step": 5357 + }, + { + "epoch": 0.5651898734177215, + "grad_norm": 0.722609281539917, + "learning_rate": 0.000607639513421289, + "loss": 1.4757, + "step": 5358 + }, + { + "epoch": 0.5652953586497891, + "grad_norm": 0.8954606652259827, + "learning_rate": 0.0006073930260325632, + "loss": 1.4833, + "step": 5359 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.9582844972610474, + "learning_rate": 0.0006071465546236601, + "loss": 1.5124, + "step": 5360 + }, + { + "epoch": 0.5655063291139241, + "grad_norm": 0.7162325382232666, + "learning_rate": 0.0006069000992221977, + "loss": 1.4826, + "step": 5361 + }, + { + "epoch": 0.5656118143459916, + "grad_norm": 0.7430269122123718, + "learning_rate": 0.0006066536598557927, + "loss": 1.4762, + "step": 5362 + }, + { + "epoch": 0.565717299578059, + "grad_norm": 0.7479982972145081, + "learning_rate": 0.0006064072365520601, + "loss": 1.5275, + "step": 5363 + }, + { + "epoch": 0.5658227848101266, + "grad_norm": 0.7359756827354431, + "learning_rate": 0.0006061608293386126, + "loss": 1.4608, + "step": 5364 + }, + { + "epoch": 0.5659282700421941, + "grad_norm": 0.9285323619842529, + "learning_rate": 0.0006059144382430612, + "loss": 1.4626, + "step": 5365 + }, + { + "epoch": 0.5660337552742616, + "grad_norm": 1.0603667497634888, + "learning_rate": 0.0006056680632930154, + "loss": 1.4826, + "step": 5366 + }, + { + "epoch": 0.5661392405063291, + "grad_norm": 0.8484776020050049, + "learning_rate": 0.0006054217045160831, + "loss": 1.4595, + "step": 5367 + }, + { + "epoch": 0.5662447257383966, + "grad_norm": 1.289257526397705, + "learning_rate": 0.0006051753619398697, + "loss": 1.4572, + "step": 5368 + }, + { + "epoch": 0.5663502109704641, + "grad_norm": 0.8287988305091858, + "learning_rate": 0.0006049290355919792, + "loss": 1.4253, + "step": 5369 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 1.1847273111343384, + "learning_rate": 0.0006046827255000135, + "loss": 1.4806, + "step": 5370 + }, + { + "epoch": 0.5665611814345991, + "grad_norm": 0.8085842728614807, + "learning_rate": 0.0006044364316915733, + "loss": 1.4793, + "step": 5371 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 0.9448413848876953, + "learning_rate": 0.0006041901541942565, + "loss": 1.4706, + "step": 5372 + }, + { + "epoch": 0.5667721518987342, + "grad_norm": 0.8139441013336182, + "learning_rate": 0.0006039438930356601, + "loss": 1.4323, + "step": 5373 + }, + { + "epoch": 0.5668776371308016, + "grad_norm": 1.076927661895752, + "learning_rate": 0.0006036976482433787, + "loss": 1.4768, + "step": 5374 + }, + { + "epoch": 0.5669831223628692, + "grad_norm": 0.7894165515899658, + "learning_rate": 0.0006034514198450053, + "loss": 1.457, + "step": 5375 + }, + { + "epoch": 0.5670886075949367, + "grad_norm": 0.9332984685897827, + "learning_rate": 0.0006032052078681312, + "loss": 1.4309, + "step": 5376 + }, + { + "epoch": 0.5671940928270042, + "grad_norm": 0.8397035598754883, + "learning_rate": 0.0006029590123403456, + "loss": 1.5084, + "step": 5377 + }, + { + "epoch": 0.5672995780590717, + "grad_norm": 0.9232398271560669, + "learning_rate": 0.0006027128332892358, + "loss": 1.4256, + "step": 5378 + }, + { + "epoch": 0.5674050632911393, + "grad_norm": 0.8235622048377991, + "learning_rate": 0.0006024666707423875, + "loss": 1.4804, + "step": 5379 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.780671238899231, + "learning_rate": 0.0006022205247273845, + "loss": 1.4637, + "step": 5380 + }, + { + "epoch": 0.5676160337552743, + "grad_norm": 0.8274799585342407, + "learning_rate": 0.0006019743952718085, + "loss": 1.4381, + "step": 5381 + }, + { + "epoch": 0.5677215189873418, + "grad_norm": 0.7761316895484924, + "learning_rate": 0.0006017282824032394, + "loss": 1.4238, + "step": 5382 + }, + { + "epoch": 0.5678270042194092, + "grad_norm": 0.7796828746795654, + "learning_rate": 0.0006014821861492559, + "loss": 1.4708, + "step": 5383 + }, + { + "epoch": 0.5679324894514768, + "grad_norm": 0.7782896161079407, + "learning_rate": 0.0006012361065374339, + "loss": 1.4736, + "step": 5384 + }, + { + "epoch": 0.5680379746835443, + "grad_norm": 0.9316810965538025, + "learning_rate": 0.0006009900435953478, + "loss": 1.469, + "step": 5385 + }, + { + "epoch": 0.5681434599156118, + "grad_norm": 0.7442188858985901, + "learning_rate": 0.0006007439973505707, + "loss": 1.4816, + "step": 5386 + }, + { + "epoch": 0.5682489451476793, + "grad_norm": 0.8420392870903015, + "learning_rate": 0.0006004979678306729, + "loss": 1.4776, + "step": 5387 + }, + { + "epoch": 0.5683544303797469, + "grad_norm": 0.7212050557136536, + "learning_rate": 0.0006002519550632232, + "loss": 1.4707, + "step": 5388 + }, + { + "epoch": 0.5684599156118143, + "grad_norm": 0.8154658675193787, + "learning_rate": 0.0006000059590757886, + "loss": 1.4746, + "step": 5389 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.8333590626716614, + "learning_rate": 0.0005997599798959343, + "loss": 1.4793, + "step": 5390 + }, + { + "epoch": 0.5686708860759494, + "grad_norm": 0.7678602933883667, + "learning_rate": 0.0005995140175512233, + "loss": 1.4525, + "step": 5391 + }, + { + "epoch": 0.5687763713080168, + "grad_norm": 1.0695003271102905, + "learning_rate": 0.000599268072069217, + "loss": 1.4664, + "step": 5392 + }, + { + "epoch": 0.5688818565400844, + "grad_norm": 0.7336746454238892, + "learning_rate": 0.0005990221434774751, + "loss": 1.4881, + "step": 5393 + }, + { + "epoch": 0.5689873417721519, + "grad_norm": 0.8575325608253479, + "learning_rate": 0.0005987762318035546, + "loss": 1.4774, + "step": 5394 + }, + { + "epoch": 0.5690928270042194, + "grad_norm": 0.7892743349075317, + "learning_rate": 0.0005985303370750115, + "loss": 1.4535, + "step": 5395 + }, + { + "epoch": 0.5691983122362869, + "grad_norm": 0.8055384159088135, + "learning_rate": 0.0005982844593193995, + "loss": 1.4605, + "step": 5396 + }, + { + "epoch": 0.5693037974683545, + "grad_norm": 0.8732380867004395, + "learning_rate": 0.0005980385985642703, + "loss": 1.4909, + "step": 5397 + }, + { + "epoch": 0.5694092827004219, + "grad_norm": 0.6923412084579468, + "learning_rate": 0.000597792754837174, + "loss": 1.4713, + "step": 5398 + }, + { + "epoch": 0.5695147679324895, + "grad_norm": 0.8135784864425659, + "learning_rate": 0.0005975469281656581, + "loss": 1.4508, + "step": 5399 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.6803560853004456, + "learning_rate": 0.0005973011185772694, + "loss": 1.4398, + "step": 5400 + }, + { + "epoch": 0.5697257383966244, + "grad_norm": 0.7433398365974426, + "learning_rate": 0.0005970553260995517, + "loss": 1.4726, + "step": 5401 + }, + { + "epoch": 0.569831223628692, + "grad_norm": 0.7203198671340942, + "learning_rate": 0.0005968095507600476, + "loss": 1.4707, + "step": 5402 + }, + { + "epoch": 0.5699367088607595, + "grad_norm": 0.723482072353363, + "learning_rate": 0.000596563792586297, + "loss": 1.4833, + "step": 5403 + }, + { + "epoch": 0.570042194092827, + "grad_norm": 0.6864582300186157, + "learning_rate": 0.0005963180516058386, + "loss": 1.5116, + "step": 5404 + }, + { + "epoch": 0.5701476793248945, + "grad_norm": 0.7224584221839905, + "learning_rate": 0.0005960723278462086, + "loss": 1.4446, + "step": 5405 + }, + { + "epoch": 0.5702531645569621, + "grad_norm": 0.701150119304657, + "learning_rate": 0.0005958266213349422, + "loss": 1.4284, + "step": 5406 + }, + { + "epoch": 0.5703586497890295, + "grad_norm": 0.697723388671875, + "learning_rate": 0.0005955809320995714, + "loss": 1.4551, + "step": 5407 + }, + { + "epoch": 0.570464135021097, + "grad_norm": 0.7111025452613831, + "learning_rate": 0.0005953352601676272, + "loss": 1.4657, + "step": 5408 + }, + { + "epoch": 0.5705696202531646, + "grad_norm": 0.7635056376457214, + "learning_rate": 0.0005950896055666384, + "loss": 1.4831, + "step": 5409 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.738119900226593, + "learning_rate": 0.0005948439683241318, + "loss": 1.4481, + "step": 5410 + }, + { + "epoch": 0.5707805907172996, + "grad_norm": 0.6635184288024902, + "learning_rate": 0.0005945983484676321, + "loss": 1.4668, + "step": 5411 + }, + { + "epoch": 0.5708860759493671, + "grad_norm": 0.6453139185905457, + "learning_rate": 0.0005943527460246625, + "loss": 1.4635, + "step": 5412 + }, + { + "epoch": 0.5709915611814346, + "grad_norm": 0.6825000047683716, + "learning_rate": 0.0005941071610227437, + "loss": 1.4799, + "step": 5413 + }, + { + "epoch": 0.5710970464135021, + "grad_norm": 0.6695380210876465, + "learning_rate": 0.000593861593489395, + "loss": 1.4112, + "step": 5414 + }, + { + "epoch": 0.5712025316455697, + "grad_norm": 0.694889485836029, + "learning_rate": 0.000593616043452133, + "loss": 1.4973, + "step": 5415 + }, + { + "epoch": 0.5713080168776371, + "grad_norm": 0.6540343761444092, + "learning_rate": 0.0005933705109384735, + "loss": 1.4548, + "step": 5416 + }, + { + "epoch": 0.5714135021097047, + "grad_norm": 0.7363656163215637, + "learning_rate": 0.000593124995975929, + "loss": 1.4787, + "step": 5417 + }, + { + "epoch": 0.5715189873417722, + "grad_norm": 0.6689146161079407, + "learning_rate": 0.000592879498592011, + "loss": 1.4627, + "step": 5418 + }, + { + "epoch": 0.5716244725738396, + "grad_norm": 0.8863011002540588, + "learning_rate": 0.0005926340188142289, + "loss": 1.4404, + "step": 5419 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.667672336101532, + "learning_rate": 0.0005923885566700896, + "loss": 1.4524, + "step": 5420 + }, + { + "epoch": 0.5718354430379747, + "grad_norm": 0.8222251534461975, + "learning_rate": 0.0005921431121870984, + "loss": 1.4888, + "step": 5421 + }, + { + "epoch": 0.5719409282700422, + "grad_norm": 0.7147220969200134, + "learning_rate": 0.0005918976853927586, + "loss": 1.4797, + "step": 5422 + }, + { + "epoch": 0.5720464135021097, + "grad_norm": 0.71647047996521, + "learning_rate": 0.0005916522763145715, + "loss": 1.4274, + "step": 5423 + }, + { + "epoch": 0.5721518987341773, + "grad_norm": 0.7962694764137268, + "learning_rate": 0.0005914068849800365, + "loss": 1.4393, + "step": 5424 + }, + { + "epoch": 0.5722573839662447, + "grad_norm": 0.7777957320213318, + "learning_rate": 0.0005911615114166508, + "loss": 1.4608, + "step": 5425 + }, + { + "epoch": 0.5723628691983123, + "grad_norm": 0.7720675468444824, + "learning_rate": 0.0005909161556519096, + "loss": 1.4809, + "step": 5426 + }, + { + "epoch": 0.5724683544303798, + "grad_norm": 0.6749531626701355, + "learning_rate": 0.0005906708177133066, + "loss": 1.4782, + "step": 5427 + }, + { + "epoch": 0.5725738396624472, + "grad_norm": 0.7741773724555969, + "learning_rate": 0.0005904254976283331, + "loss": 1.4593, + "step": 5428 + }, + { + "epoch": 0.5726793248945148, + "grad_norm": 0.7248591184616089, + "learning_rate": 0.0005901801954244782, + "loss": 1.459, + "step": 5429 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.7678217887878418, + "learning_rate": 0.0005899349111292293, + "loss": 1.4291, + "step": 5430 + }, + { + "epoch": 0.5728902953586498, + "grad_norm": 0.7746262550354004, + "learning_rate": 0.0005896896447700718, + "loss": 1.4851, + "step": 5431 + }, + { + "epoch": 0.5729957805907173, + "grad_norm": 0.681368350982666, + "learning_rate": 0.0005894443963744891, + "loss": 1.4441, + "step": 5432 + }, + { + "epoch": 0.5731012658227848, + "grad_norm": 0.9048222303390503, + "learning_rate": 0.0005891991659699622, + "loss": 1.4362, + "step": 5433 + }, + { + "epoch": 0.5732067510548523, + "grad_norm": 0.9211746454238892, + "learning_rate": 0.0005889539535839704, + "loss": 1.5083, + "step": 5434 + }, + { + "epoch": 0.5733122362869199, + "grad_norm": 0.8300454616546631, + "learning_rate": 0.0005887087592439914, + "loss": 1.4137, + "step": 5435 + }, + { + "epoch": 0.5734177215189873, + "grad_norm": 1.1071224212646484, + "learning_rate": 0.0005884635829775002, + "loss": 1.4452, + "step": 5436 + }, + { + "epoch": 0.5735232067510548, + "grad_norm": 0.7839834094047546, + "learning_rate": 0.00058821842481197, + "loss": 1.4313, + "step": 5437 + }, + { + "epoch": 0.5736286919831224, + "grad_norm": 0.8684725761413574, + "learning_rate": 0.0005879732847748721, + "loss": 1.4764, + "step": 5438 + }, + { + "epoch": 0.5737341772151898, + "grad_norm": 0.7273783087730408, + "learning_rate": 0.0005877281628936756, + "loss": 1.468, + "step": 5439 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.8573100566864014, + "learning_rate": 0.0005874830591958474, + "loss": 1.457, + "step": 5440 + }, + { + "epoch": 0.5739451476793249, + "grad_norm": 0.833082377910614, + "learning_rate": 0.000587237973708853, + "loss": 1.4577, + "step": 5441 + }, + { + "epoch": 0.5740506329113924, + "grad_norm": 1.0345486402511597, + "learning_rate": 0.0005869929064601551, + "loss": 1.4716, + "step": 5442 + }, + { + "epoch": 0.5741561181434599, + "grad_norm": 0.7320310473442078, + "learning_rate": 0.0005867478574772147, + "loss": 1.4722, + "step": 5443 + }, + { + "epoch": 0.5742616033755275, + "grad_norm": 0.7132730484008789, + "learning_rate": 0.0005865028267874911, + "loss": 1.4602, + "step": 5444 + }, + { + "epoch": 0.5743670886075949, + "grad_norm": 0.8275551795959473, + "learning_rate": 0.0005862578144184412, + "loss": 1.4457, + "step": 5445 + }, + { + "epoch": 0.5744725738396624, + "grad_norm": 0.794180154800415, + "learning_rate": 0.0005860128203975196, + "loss": 1.4816, + "step": 5446 + }, + { + "epoch": 0.57457805907173, + "grad_norm": 0.8599771857261658, + "learning_rate": 0.0005857678447521791, + "loss": 1.4853, + "step": 5447 + }, + { + "epoch": 0.5746835443037974, + "grad_norm": 0.9676414132118225, + "learning_rate": 0.0005855228875098706, + "loss": 1.473, + "step": 5448 + }, + { + "epoch": 0.574789029535865, + "grad_norm": 0.7631100416183472, + "learning_rate": 0.0005852779486980427, + "loss": 1.4567, + "step": 5449 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.8604432940483093, + "learning_rate": 0.000585033028344142, + "loss": 1.4485, + "step": 5450 + }, + { + "epoch": 0.575, + "grad_norm": 0.9192318916320801, + "learning_rate": 0.0005847881264756131, + "loss": 1.466, + "step": 5451 + }, + { + "epoch": 0.5751054852320675, + "grad_norm": 1.0592525005340576, + "learning_rate": 0.0005845432431198981, + "loss": 1.4222, + "step": 5452 + }, + { + "epoch": 0.575210970464135, + "grad_norm": 0.8300032019615173, + "learning_rate": 0.0005842983783044381, + "loss": 1.4859, + "step": 5453 + }, + { + "epoch": 0.5753164556962025, + "grad_norm": 1.1077860593795776, + "learning_rate": 0.0005840535320566711, + "loss": 1.4698, + "step": 5454 + }, + { + "epoch": 0.57542194092827, + "grad_norm": 0.8211217522621155, + "learning_rate": 0.0005838087044040334, + "loss": 1.5091, + "step": 5455 + }, + { + "epoch": 0.5755274261603376, + "grad_norm": 0.9483095407485962, + "learning_rate": 0.0005835638953739589, + "loss": 1.4348, + "step": 5456 + }, + { + "epoch": 0.575632911392405, + "grad_norm": 0.7624984383583069, + "learning_rate": 0.00058331910499388, + "loss": 1.4748, + "step": 5457 + }, + { + "epoch": 0.5757383966244726, + "grad_norm": 0.8602396845817566, + "learning_rate": 0.0005830743332912264, + "loss": 1.4558, + "step": 5458 + }, + { + "epoch": 0.5758438818565401, + "grad_norm": 0.7410352230072021, + "learning_rate": 0.0005828295802934263, + "loss": 1.4573, + "step": 5459 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.7896410226821899, + "learning_rate": 0.0005825848460279048, + "loss": 1.4759, + "step": 5460 + }, + { + "epoch": 0.5760548523206751, + "grad_norm": 0.7638104557991028, + "learning_rate": 0.0005823401305220865, + "loss": 1.4321, + "step": 5461 + }, + { + "epoch": 0.5761603375527427, + "grad_norm": 0.7436337471008301, + "learning_rate": 0.0005820954338033925, + "loss": 1.42, + "step": 5462 + }, + { + "epoch": 0.5762658227848101, + "grad_norm": 0.8927266001701355, + "learning_rate": 0.0005818507558992426, + "loss": 1.4602, + "step": 5463 + }, + { + "epoch": 0.5763713080168776, + "grad_norm": 0.7238037586212158, + "learning_rate": 0.0005816060968370538, + "loss": 1.4464, + "step": 5464 + }, + { + "epoch": 0.5764767932489452, + "grad_norm": 0.9734136462211609, + "learning_rate": 0.0005813614566442416, + "loss": 1.5074, + "step": 5465 + }, + { + "epoch": 0.5765822784810126, + "grad_norm": 0.7396621108055115, + "learning_rate": 0.0005811168353482191, + "loss": 1.4905, + "step": 5466 + }, + { + "epoch": 0.5766877637130802, + "grad_norm": 1.088020920753479, + "learning_rate": 0.0005808722329763974, + "loss": 1.4656, + "step": 5467 + }, + { + "epoch": 0.5767932489451477, + "grad_norm": 0.7482842206954956, + "learning_rate": 0.0005806276495561852, + "loss": 1.4735, + "step": 5468 + }, + { + "epoch": 0.5768987341772152, + "grad_norm": 0.9478933811187744, + "learning_rate": 0.0005803830851149892, + "loss": 1.4729, + "step": 5469 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.8105738162994385, + "learning_rate": 0.0005801385396802146, + "loss": 1.4643, + "step": 5470 + }, + { + "epoch": 0.5771097046413503, + "grad_norm": 0.8578856587409973, + "learning_rate": 0.0005798940132792636, + "loss": 1.484, + "step": 5471 + }, + { + "epoch": 0.5772151898734177, + "grad_norm": 0.7353159189224243, + "learning_rate": 0.0005796495059395367, + "loss": 1.4606, + "step": 5472 + }, + { + "epoch": 0.5773206751054852, + "grad_norm": 0.7912877202033997, + "learning_rate": 0.0005794050176884321, + "loss": 1.4356, + "step": 5473 + }, + { + "epoch": 0.5774261603375528, + "grad_norm": 0.7208280563354492, + "learning_rate": 0.0005791605485533459, + "loss": 1.4392, + "step": 5474 + }, + { + "epoch": 0.5775316455696202, + "grad_norm": 0.8098331689834595, + "learning_rate": 0.0005789160985616721, + "loss": 1.4576, + "step": 5475 + }, + { + "epoch": 0.5776371308016878, + "grad_norm": 0.7677286267280579, + "learning_rate": 0.0005786716677408025, + "loss": 1.4464, + "step": 5476 + }, + { + "epoch": 0.5777426160337553, + "grad_norm": 0.7841598391532898, + "learning_rate": 0.0005784272561181269, + "loss": 1.4931, + "step": 5477 + }, + { + "epoch": 0.5778481012658228, + "grad_norm": 0.7169468998908997, + "learning_rate": 0.0005781828637210325, + "loss": 1.4771, + "step": 5478 + }, + { + "epoch": 0.5779535864978903, + "grad_norm": 0.8980294466018677, + "learning_rate": 0.0005779384905769053, + "loss": 1.4674, + "step": 5479 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.8033251762390137, + "learning_rate": 0.0005776941367131282, + "loss": 1.5068, + "step": 5480 + }, + { + "epoch": 0.5781645569620253, + "grad_norm": 0.9754118919372559, + "learning_rate": 0.0005774498021570824, + "loss": 1.4266, + "step": 5481 + }, + { + "epoch": 0.5782700421940928, + "grad_norm": 1.0642590522766113, + "learning_rate": 0.0005772054869361465, + "loss": 1.4826, + "step": 5482 + }, + { + "epoch": 0.5783755274261604, + "grad_norm": 0.9276461601257324, + "learning_rate": 0.0005769611910776975, + "loss": 1.4851, + "step": 5483 + }, + { + "epoch": 0.5784810126582278, + "grad_norm": 1.0063179731369019, + "learning_rate": 0.0005767169146091098, + "loss": 1.4721, + "step": 5484 + }, + { + "epoch": 0.5785864978902954, + "grad_norm": 0.9918343424797058, + "learning_rate": 0.0005764726575577559, + "loss": 1.4573, + "step": 5485 + }, + { + "epoch": 0.5786919831223629, + "grad_norm": 1.0212900638580322, + "learning_rate": 0.0005762284199510059, + "loss": 1.4562, + "step": 5486 + }, + { + "epoch": 0.5787974683544304, + "grad_norm": 1.0277692079544067, + "learning_rate": 0.000575984201816228, + "loss": 1.4831, + "step": 5487 + }, + { + "epoch": 0.5789029535864979, + "grad_norm": 0.848292350769043, + "learning_rate": 0.0005757400031807881, + "loss": 1.4548, + "step": 5488 + }, + { + "epoch": 0.5790084388185655, + "grad_norm": 1.2620210647583008, + "learning_rate": 0.0005754958240720498, + "loss": 1.5294, + "step": 5489 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.7211219072341919, + "learning_rate": 0.0005752516645173745, + "loss": 1.4541, + "step": 5490 + }, + { + "epoch": 0.5792194092827004, + "grad_norm": 1.1183106899261475, + "learning_rate": 0.0005750075245441218, + "loss": 1.4575, + "step": 5491 + }, + { + "epoch": 0.579324894514768, + "grad_norm": 0.7089693546295166, + "learning_rate": 0.0005747634041796484, + "loss": 1.4594, + "step": 5492 + }, + { + "epoch": 0.5794303797468354, + "grad_norm": 1.185613751411438, + "learning_rate": 0.0005745193034513092, + "loss": 1.4677, + "step": 5493 + }, + { + "epoch": 0.579535864978903, + "grad_norm": 0.6628633141517639, + "learning_rate": 0.0005742752223864573, + "loss": 1.4421, + "step": 5494 + }, + { + "epoch": 0.5796413502109705, + "grad_norm": 0.9484649300575256, + "learning_rate": 0.0005740311610124427, + "loss": 1.4833, + "step": 5495 + }, + { + "epoch": 0.579746835443038, + "grad_norm": 0.7977759838104248, + "learning_rate": 0.0005737871193566141, + "loss": 1.4302, + "step": 5496 + }, + { + "epoch": 0.5798523206751055, + "grad_norm": 0.9533494710922241, + "learning_rate": 0.0005735430974463175, + "loss": 1.4569, + "step": 5497 + }, + { + "epoch": 0.5799578059071729, + "grad_norm": 0.9265702962875366, + "learning_rate": 0.0005732990953088968, + "loss": 1.4545, + "step": 5498 + }, + { + "epoch": 0.5800632911392405, + "grad_norm": 1.0612633228302002, + "learning_rate": 0.0005730551129716936, + "loss": 1.4969, + "step": 5499 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 1.0015668869018555, + "learning_rate": 0.0005728111504620472, + "loss": 1.4386, + "step": 5500 + }, + { + "epoch": 0.5802742616033755, + "grad_norm": 1.182348608970642, + "learning_rate": 0.000572567207807295, + "loss": 1.4491, + "step": 5501 + }, + { + "epoch": 0.580379746835443, + "grad_norm": 1.1639717817306519, + "learning_rate": 0.000572323285034772, + "loss": 1.4991, + "step": 5502 + }, + { + "epoch": 0.5804852320675106, + "grad_norm": 0.9507852792739868, + "learning_rate": 0.0005720793821718108, + "loss": 1.4785, + "step": 5503 + }, + { + "epoch": 0.580590717299578, + "grad_norm": 1.2315343618392944, + "learning_rate": 0.0005718354992457417, + "loss": 1.4529, + "step": 5504 + }, + { + "epoch": 0.5806962025316456, + "grad_norm": 0.8835030198097229, + "learning_rate": 0.0005715916362838936, + "loss": 1.4508, + "step": 5505 + }, + { + "epoch": 0.5808016877637131, + "grad_norm": 0.9667177796363831, + "learning_rate": 0.0005713477933135923, + "loss": 1.4914, + "step": 5506 + }, + { + "epoch": 0.5809071729957805, + "grad_norm": 0.8062408566474915, + "learning_rate": 0.0005711039703621616, + "loss": 1.4861, + "step": 5507 + }, + { + "epoch": 0.5810126582278481, + "grad_norm": 1.2319585084915161, + "learning_rate": 0.0005708601674569232, + "loss": 1.448, + "step": 5508 + }, + { + "epoch": 0.5811181434599156, + "grad_norm": 0.82657390832901, + "learning_rate": 0.0005706163846251961, + "loss": 1.4786, + "step": 5509 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.8656774163246155, + "learning_rate": 0.0005703726218942976, + "loss": 1.4586, + "step": 5510 + }, + { + "epoch": 0.5813291139240506, + "grad_norm": 0.8449634313583374, + "learning_rate": 0.0005701288792915427, + "loss": 1.4286, + "step": 5511 + }, + { + "epoch": 0.5814345991561182, + "grad_norm": 0.9832130074501038, + "learning_rate": 0.0005698851568442434, + "loss": 1.4592, + "step": 5512 + }, + { + "epoch": 0.5815400843881856, + "grad_norm": 0.8413611054420471, + "learning_rate": 0.0005696414545797108, + "loss": 1.4734, + "step": 5513 + }, + { + "epoch": 0.5816455696202532, + "grad_norm": 1.0561116933822632, + "learning_rate": 0.0005693977725252525, + "loss": 1.4898, + "step": 5514 + }, + { + "epoch": 0.5817510548523207, + "grad_norm": 0.7688167691230774, + "learning_rate": 0.0005691541107081743, + "loss": 1.4665, + "step": 5515 + }, + { + "epoch": 0.5818565400843881, + "grad_norm": 0.8981918096542358, + "learning_rate": 0.0005689104691557798, + "loss": 1.4187, + "step": 5516 + }, + { + "epoch": 0.5819620253164557, + "grad_norm": 0.723439633846283, + "learning_rate": 0.0005686668478953702, + "loss": 1.4439, + "step": 5517 + }, + { + "epoch": 0.5820675105485232, + "grad_norm": 0.8671021461486816, + "learning_rate": 0.0005684232469542446, + "loss": 1.4797, + "step": 5518 + }, + { + "epoch": 0.5821729957805907, + "grad_norm": 0.8326738476753235, + "learning_rate": 0.0005681796663596996, + "loss": 1.4564, + "step": 5519 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.8873391151428223, + "learning_rate": 0.0005679361061390295, + "loss": 1.4913, + "step": 5520 + }, + { + "epoch": 0.5823839662447258, + "grad_norm": 0.6751779317855835, + "learning_rate": 0.0005676925663195263, + "loss": 1.4379, + "step": 5521 + }, + { + "epoch": 0.5824894514767932, + "grad_norm": 0.8767799139022827, + "learning_rate": 0.0005674490469284805, + "loss": 1.4376, + "step": 5522 + }, + { + "epoch": 0.5825949367088608, + "grad_norm": 0.7133544087409973, + "learning_rate": 0.0005672055479931791, + "loss": 1.4376, + "step": 5523 + }, + { + "epoch": 0.5827004219409283, + "grad_norm": 0.7671077251434326, + "learning_rate": 0.0005669620695409076, + "loss": 1.4594, + "step": 5524 + }, + { + "epoch": 0.5828059071729957, + "grad_norm": 0.7604379057884216, + "learning_rate": 0.000566718611598949, + "loss": 1.4452, + "step": 5525 + }, + { + "epoch": 0.5829113924050633, + "grad_norm": 0.7214747667312622, + "learning_rate": 0.0005664751741945839, + "loss": 1.4545, + "step": 5526 + }, + { + "epoch": 0.5830168776371308, + "grad_norm": 0.8342068791389465, + "learning_rate": 0.0005662317573550906, + "loss": 1.4479, + "step": 5527 + }, + { + "epoch": 0.5831223628691983, + "grad_norm": 0.7537788152694702, + "learning_rate": 0.0005659883611077453, + "loss": 1.4678, + "step": 5528 + }, + { + "epoch": 0.5832278481012658, + "grad_norm": 0.8000002503395081, + "learning_rate": 0.0005657449854798216, + "loss": 1.4339, + "step": 5529 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.7188135385513306, + "learning_rate": 0.0005655016304985908, + "loss": 1.481, + "step": 5530 + }, + { + "epoch": 0.5834388185654008, + "grad_norm": 0.7926221489906311, + "learning_rate": 0.0005652582961913227, + "loss": 1.472, + "step": 5531 + }, + { + "epoch": 0.5835443037974684, + "grad_norm": 0.8226363658905029, + "learning_rate": 0.0005650149825852836, + "loss": 1.4302, + "step": 5532 + }, + { + "epoch": 0.5836497890295359, + "grad_norm": 0.6887568831443787, + "learning_rate": 0.0005647716897077382, + "loss": 1.4264, + "step": 5533 + }, + { + "epoch": 0.5837552742616033, + "grad_norm": 0.6952775120735168, + "learning_rate": 0.0005645284175859486, + "loss": 1.4313, + "step": 5534 + }, + { + "epoch": 0.5838607594936709, + "grad_norm": 0.7305320501327515, + "learning_rate": 0.0005642851662471745, + "loss": 1.4336, + "step": 5535 + }, + { + "epoch": 0.5839662447257384, + "grad_norm": 0.7421310544013977, + "learning_rate": 0.0005640419357186738, + "loss": 1.4398, + "step": 5536 + }, + { + "epoch": 0.5840717299578059, + "grad_norm": 0.7538427710533142, + "learning_rate": 0.0005637987260277013, + "loss": 1.4481, + "step": 5537 + }, + { + "epoch": 0.5841772151898734, + "grad_norm": 0.7318088412284851, + "learning_rate": 0.0005635555372015099, + "loss": 1.5048, + "step": 5538 + }, + { + "epoch": 0.584282700421941, + "grad_norm": 0.6609330177307129, + "learning_rate": 0.0005633123692673503, + "loss": 1.4435, + "step": 5539 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.8076738119125366, + "learning_rate": 0.0005630692222524709, + "loss": 1.4818, + "step": 5540 + }, + { + "epoch": 0.584493670886076, + "grad_norm": 0.6880483627319336, + "learning_rate": 0.0005628260961841171, + "loss": 1.4687, + "step": 5541 + }, + { + "epoch": 0.5845991561181435, + "grad_norm": 0.7678987383842468, + "learning_rate": 0.0005625829910895325, + "loss": 1.4625, + "step": 5542 + }, + { + "epoch": 0.5847046413502109, + "grad_norm": 0.6821463108062744, + "learning_rate": 0.0005623399069959585, + "loss": 1.4666, + "step": 5543 + }, + { + "epoch": 0.5848101265822785, + "grad_norm": 0.7845714688301086, + "learning_rate": 0.0005620968439306335, + "loss": 1.4522, + "step": 5544 + }, + { + "epoch": 0.584915611814346, + "grad_norm": 0.7148608565330505, + "learning_rate": 0.0005618538019207943, + "loss": 1.4798, + "step": 5545 + }, + { + "epoch": 0.5850210970464135, + "grad_norm": 0.7896075248718262, + "learning_rate": 0.0005616107809936746, + "loss": 1.4511, + "step": 5546 + }, + { + "epoch": 0.585126582278481, + "grad_norm": 0.7020695209503174, + "learning_rate": 0.0005613677811765062, + "loss": 1.4465, + "step": 5547 + }, + { + "epoch": 0.5852320675105486, + "grad_norm": 0.9077619314193726, + "learning_rate": 0.0005611248024965186, + "loss": 1.4867, + "step": 5548 + }, + { + "epoch": 0.585337552742616, + "grad_norm": 0.7286068797111511, + "learning_rate": 0.0005608818449809387, + "loss": 1.4759, + "step": 5549 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 1.2198656797409058, + "learning_rate": 0.0005606389086569911, + "loss": 1.5011, + "step": 5550 + }, + { + "epoch": 0.5855485232067511, + "grad_norm": 0.7376657724380493, + "learning_rate": 0.0005603959935518981, + "loss": 1.495, + "step": 5551 + }, + { + "epoch": 0.5856540084388185, + "grad_norm": 0.9138449430465698, + "learning_rate": 0.0005601530996928795, + "loss": 1.5174, + "step": 5552 + }, + { + "epoch": 0.5857594936708861, + "grad_norm": 0.681480884552002, + "learning_rate": 0.0005599102271071527, + "loss": 1.4551, + "step": 5553 + }, + { + "epoch": 0.5858649789029536, + "grad_norm": 0.8685181736946106, + "learning_rate": 0.0005596673758219327, + "loss": 1.4762, + "step": 5554 + }, + { + "epoch": 0.5859704641350211, + "grad_norm": 0.6741061210632324, + "learning_rate": 0.0005594245458644325, + "loss": 1.4728, + "step": 5555 + }, + { + "epoch": 0.5860759493670886, + "grad_norm": 0.7517549395561218, + "learning_rate": 0.0005591817372618621, + "loss": 1.4482, + "step": 5556 + }, + { + "epoch": 0.5861814345991562, + "grad_norm": 0.6585934162139893, + "learning_rate": 0.0005589389500414296, + "loss": 1.4547, + "step": 5557 + }, + { + "epoch": 0.5862869198312236, + "grad_norm": 0.9596599340438843, + "learning_rate": 0.0005586961842303405, + "loss": 1.4928, + "step": 5558 + }, + { + "epoch": 0.5863924050632912, + "grad_norm": 0.711069643497467, + "learning_rate": 0.0005584534398557977, + "loss": 1.4504, + "step": 5559 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.6782465577125549, + "learning_rate": 0.0005582107169450023, + "loss": 1.4686, + "step": 5560 + }, + { + "epoch": 0.5866033755274261, + "grad_norm": 0.664269208908081, + "learning_rate": 0.0005579680155251524, + "loss": 1.4547, + "step": 5561 + }, + { + "epoch": 0.5867088607594937, + "grad_norm": 0.7105126976966858, + "learning_rate": 0.0005577253356234439, + "loss": 1.472, + "step": 5562 + }, + { + "epoch": 0.5868143459915611, + "grad_norm": 0.6734883189201355, + "learning_rate": 0.0005574826772670703, + "loss": 1.4434, + "step": 5563 + }, + { + "epoch": 0.5869198312236287, + "grad_norm": 0.7758301496505737, + "learning_rate": 0.0005572400404832226, + "loss": 1.4737, + "step": 5564 + }, + { + "epoch": 0.5870253164556962, + "grad_norm": 0.6696879267692566, + "learning_rate": 0.0005569974252990896, + "loss": 1.4602, + "step": 5565 + }, + { + "epoch": 0.5871308016877637, + "grad_norm": 0.7202883958816528, + "learning_rate": 0.0005567548317418576, + "loss": 1.4571, + "step": 5566 + }, + { + "epoch": 0.5872362869198312, + "grad_norm": 0.6777191162109375, + "learning_rate": 0.0005565122598387103, + "loss": 1.4914, + "step": 5567 + }, + { + "epoch": 0.5873417721518988, + "grad_norm": 0.7426519393920898, + "learning_rate": 0.0005562697096168289, + "loss": 1.4327, + "step": 5568 + }, + { + "epoch": 0.5874472573839662, + "grad_norm": 0.6886458992958069, + "learning_rate": 0.0005560271811033928, + "loss": 1.4265, + "step": 5569 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.8036988377571106, + "learning_rate": 0.0005557846743255783, + "loss": 1.4405, + "step": 5570 + }, + { + "epoch": 0.5876582278481013, + "grad_norm": 0.6640660166740417, + "learning_rate": 0.0005555421893105593, + "loss": 1.4553, + "step": 5571 + }, + { + "epoch": 0.5877637130801687, + "grad_norm": 0.8341075778007507, + "learning_rate": 0.0005552997260855077, + "loss": 1.4648, + "step": 5572 + }, + { + "epoch": 0.5878691983122363, + "grad_norm": 0.850774347782135, + "learning_rate": 0.0005550572846775927, + "loss": 1.4519, + "step": 5573 + }, + { + "epoch": 0.5879746835443038, + "grad_norm": 0.6796267628669739, + "learning_rate": 0.0005548148651139809, + "loss": 1.4562, + "step": 5574 + }, + { + "epoch": 0.5880801687763713, + "grad_norm": 0.8117522597312927, + "learning_rate": 0.0005545724674218368, + "loss": 1.4714, + "step": 5575 + }, + { + "epoch": 0.5881856540084388, + "grad_norm": 0.6976268291473389, + "learning_rate": 0.0005543300916283223, + "loss": 1.4481, + "step": 5576 + }, + { + "epoch": 0.5882911392405064, + "grad_norm": 0.801470935344696, + "learning_rate": 0.0005540877377605968, + "loss": 1.4615, + "step": 5577 + }, + { + "epoch": 0.5883966244725738, + "grad_norm": 0.7391678094863892, + "learning_rate": 0.0005538454058458171, + "loss": 1.4498, + "step": 5578 + }, + { + "epoch": 0.5885021097046413, + "grad_norm": 0.7492693066596985, + "learning_rate": 0.0005536030959111377, + "loss": 1.4656, + "step": 5579 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.6952809691429138, + "learning_rate": 0.0005533608079837109, + "loss": 1.4175, + "step": 5580 + }, + { + "epoch": 0.5887130801687763, + "grad_norm": 0.7714180946350098, + "learning_rate": 0.0005531185420906859, + "loss": 1.4497, + "step": 5581 + }, + { + "epoch": 0.5888185654008439, + "grad_norm": 0.8530722856521606, + "learning_rate": 0.0005528762982592101, + "loss": 1.47, + "step": 5582 + }, + { + "epoch": 0.5889240506329114, + "grad_norm": 0.7215616703033447, + "learning_rate": 0.000552634076516428, + "loss": 1.4369, + "step": 5583 + }, + { + "epoch": 0.5890295358649789, + "grad_norm": 0.93043452501297, + "learning_rate": 0.0005523918768894819, + "loss": 1.4629, + "step": 5584 + }, + { + "epoch": 0.5891350210970464, + "grad_norm": 0.7021670341491699, + "learning_rate": 0.0005521496994055112, + "loss": 1.4896, + "step": 5585 + }, + { + "epoch": 0.589240506329114, + "grad_norm": 0.857650637626648, + "learning_rate": 0.0005519075440916534, + "loss": 1.4615, + "step": 5586 + }, + { + "epoch": 0.5893459915611814, + "grad_norm": 0.768218457698822, + "learning_rate": 0.000551665410975043, + "loss": 1.4584, + "step": 5587 + }, + { + "epoch": 0.5894514767932489, + "grad_norm": 0.7617660760879517, + "learning_rate": 0.0005514233000828121, + "loss": 1.4918, + "step": 5588 + }, + { + "epoch": 0.5895569620253165, + "grad_norm": 0.7454844117164612, + "learning_rate": 0.0005511812114420908, + "loss": 1.4442, + "step": 5589 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.785004734992981, + "learning_rate": 0.0005509391450800061, + "loss": 1.4454, + "step": 5590 + }, + { + "epoch": 0.5897679324894515, + "grad_norm": 0.8124148845672607, + "learning_rate": 0.0005506971010236829, + "loss": 1.4344, + "step": 5591 + }, + { + "epoch": 0.589873417721519, + "grad_norm": 0.7622130513191223, + "learning_rate": 0.0005504550793002433, + "loss": 1.484, + "step": 5592 + }, + { + "epoch": 0.5899789029535865, + "grad_norm": 0.7844920754432678, + "learning_rate": 0.000550213079936807, + "loss": 1.4438, + "step": 5593 + }, + { + "epoch": 0.590084388185654, + "grad_norm": 0.8750115633010864, + "learning_rate": 0.0005499711029604915, + "loss": 1.4477, + "step": 5594 + }, + { + "epoch": 0.5901898734177216, + "grad_norm": 0.7690982818603516, + "learning_rate": 0.0005497291483984113, + "loss": 1.4515, + "step": 5595 + }, + { + "epoch": 0.590295358649789, + "grad_norm": 0.9257769584655762, + "learning_rate": 0.0005494872162776786, + "loss": 1.4254, + "step": 5596 + }, + { + "epoch": 0.5904008438818565, + "grad_norm": 0.8147357702255249, + "learning_rate": 0.0005492453066254032, + "loss": 1.4437, + "step": 5597 + }, + { + "epoch": 0.5905063291139241, + "grad_norm": 0.6921488642692566, + "learning_rate": 0.000549003419468692, + "loss": 1.453, + "step": 5598 + }, + { + "epoch": 0.5906118143459915, + "grad_norm": 0.7009955644607544, + "learning_rate": 0.0005487615548346502, + "loss": 1.4635, + "step": 5599 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.6996541619300842, + "learning_rate": 0.0005485197127503795, + "loss": 1.4698, + "step": 5600 + }, + { + "epoch": 0.5908227848101266, + "grad_norm": 0.7926917672157288, + "learning_rate": 0.0005482778932429798, + "loss": 1.4745, + "step": 5601 + }, + { + "epoch": 0.5909282700421941, + "grad_norm": 0.685895562171936, + "learning_rate": 0.000548036096339548, + "loss": 1.478, + "step": 5602 + }, + { + "epoch": 0.5910337552742616, + "grad_norm": 0.7378528118133545, + "learning_rate": 0.0005477943220671786, + "loss": 1.4694, + "step": 5603 + }, + { + "epoch": 0.5911392405063292, + "grad_norm": 0.73805171251297, + "learning_rate": 0.0005475525704529638, + "loss": 1.4381, + "step": 5604 + }, + { + "epoch": 0.5912447257383966, + "grad_norm": 0.7571444511413574, + "learning_rate": 0.0005473108415239929, + "loss": 1.4245, + "step": 5605 + }, + { + "epoch": 0.5913502109704641, + "grad_norm": 0.7168264985084534, + "learning_rate": 0.0005470691353073531, + "loss": 1.4614, + "step": 5606 + }, + { + "epoch": 0.5914556962025317, + "grad_norm": 0.8035057187080383, + "learning_rate": 0.0005468274518301284, + "loss": 1.4587, + "step": 5607 + }, + { + "epoch": 0.5915611814345991, + "grad_norm": 0.7025029063224792, + "learning_rate": 0.0005465857911194006, + "loss": 1.4655, + "step": 5608 + }, + { + "epoch": 0.5916666666666667, + "grad_norm": 0.8048072457313538, + "learning_rate": 0.0005463441532022495, + "loss": 1.4477, + "step": 5609 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.7031733989715576, + "learning_rate": 0.0005461025381057516, + "loss": 1.465, + "step": 5610 + }, + { + "epoch": 0.5918776371308017, + "grad_norm": 0.7029501795768738, + "learning_rate": 0.000545860945856981, + "loss": 1.464, + "step": 5611 + }, + { + "epoch": 0.5919831223628692, + "grad_norm": 0.8869108557701111, + "learning_rate": 0.0005456193764830093, + "loss": 1.4811, + "step": 5612 + }, + { + "epoch": 0.5920886075949368, + "grad_norm": 0.7653211951255798, + "learning_rate": 0.0005453778300109056, + "loss": 1.4646, + "step": 5613 + }, + { + "epoch": 0.5921940928270042, + "grad_norm": 0.8969634771347046, + "learning_rate": 0.0005451363064677365, + "loss": 1.471, + "step": 5614 + }, + { + "epoch": 0.5922995780590717, + "grad_norm": 0.6821655035018921, + "learning_rate": 0.0005448948058805657, + "loss": 1.4785, + "step": 5615 + }, + { + "epoch": 0.5924050632911393, + "grad_norm": 1.0120760202407837, + "learning_rate": 0.0005446533282764543, + "loss": 1.4684, + "step": 5616 + }, + { + "epoch": 0.5925105485232067, + "grad_norm": 0.817676842212677, + "learning_rate": 0.0005444118736824617, + "loss": 1.4394, + "step": 5617 + }, + { + "epoch": 0.5926160337552743, + "grad_norm": 0.8356680274009705, + "learning_rate": 0.000544170442125644, + "loss": 1.4721, + "step": 5618 + }, + { + "epoch": 0.5927215189873418, + "grad_norm": 0.7214919328689575, + "learning_rate": 0.0005439290336330545, + "loss": 1.4674, + "step": 5619 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 1.0777584314346313, + "learning_rate": 0.0005436876482317444, + "loss": 1.4746, + "step": 5620 + }, + { + "epoch": 0.5929324894514768, + "grad_norm": 0.8240314722061157, + "learning_rate": 0.000543446285948762, + "loss": 1.4706, + "step": 5621 + }, + { + "epoch": 0.5930379746835444, + "grad_norm": 0.9161328673362732, + "learning_rate": 0.0005432049468111534, + "loss": 1.4878, + "step": 5622 + }, + { + "epoch": 0.5931434599156118, + "grad_norm": 0.6883248686790466, + "learning_rate": 0.0005429636308459614, + "loss": 1.415, + "step": 5623 + }, + { + "epoch": 0.5932489451476793, + "grad_norm": 1.0359797477722168, + "learning_rate": 0.0005427223380802272, + "loss": 1.4793, + "step": 5624 + }, + { + "epoch": 0.5933544303797469, + "grad_norm": 0.7443950772285461, + "learning_rate": 0.0005424810685409881, + "loss": 1.4785, + "step": 5625 + }, + { + "epoch": 0.5934599156118143, + "grad_norm": 1.001520037651062, + "learning_rate": 0.0005422398222552806, + "loss": 1.4653, + "step": 5626 + }, + { + "epoch": 0.5935654008438819, + "grad_norm": 0.7956432700157166, + "learning_rate": 0.0005419985992501367, + "loss": 1.4825, + "step": 5627 + }, + { + "epoch": 0.5936708860759494, + "grad_norm": 1.096645712852478, + "learning_rate": 0.0005417573995525871, + "loss": 1.4384, + "step": 5628 + }, + { + "epoch": 0.5937763713080169, + "grad_norm": 0.6786928176879883, + "learning_rate": 0.0005415162231896593, + "loss": 1.4583, + "step": 5629 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.8925006985664368, + "learning_rate": 0.0005412750701883782, + "loss": 1.448, + "step": 5630 + }, + { + "epoch": 0.5939873417721518, + "grad_norm": 0.6731496453285217, + "learning_rate": 0.0005410339405757665, + "loss": 1.4698, + "step": 5631 + }, + { + "epoch": 0.5940928270042194, + "grad_norm": 0.7251468896865845, + "learning_rate": 0.0005407928343788435, + "loss": 1.4702, + "step": 5632 + }, + { + "epoch": 0.5941983122362869, + "grad_norm": 0.6685231328010559, + "learning_rate": 0.0005405517516246267, + "loss": 1.4484, + "step": 5633 + }, + { + "epoch": 0.5943037974683544, + "grad_norm": 0.6510521769523621, + "learning_rate": 0.0005403106923401302, + "loss": 1.4695, + "step": 5634 + }, + { + "epoch": 0.5944092827004219, + "grad_norm": 0.7074598073959351, + "learning_rate": 0.0005400696565523666, + "loss": 1.4631, + "step": 5635 + }, + { + "epoch": 0.5945147679324895, + "grad_norm": 0.6422494053840637, + "learning_rate": 0.0005398286442883448, + "loss": 1.4537, + "step": 5636 + }, + { + "epoch": 0.5946202531645569, + "grad_norm": 0.7022764682769775, + "learning_rate": 0.0005395876555750712, + "loss": 1.4551, + "step": 5637 + }, + { + "epoch": 0.5947257383966245, + "grad_norm": 0.7374936938285828, + "learning_rate": 0.0005393466904395503, + "loss": 1.5155, + "step": 5638 + }, + { + "epoch": 0.594831223628692, + "grad_norm": 0.6745099425315857, + "learning_rate": 0.000539105748908783, + "loss": 1.47, + "step": 5639 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.6884918808937073, + "learning_rate": 0.0005388648310097682, + "loss": 1.4814, + "step": 5640 + }, + { + "epoch": 0.595042194092827, + "grad_norm": 0.7182822823524475, + "learning_rate": 0.0005386239367695018, + "loss": 1.4826, + "step": 5641 + }, + { + "epoch": 0.5951476793248945, + "grad_norm": 0.7880580425262451, + "learning_rate": 0.0005383830662149771, + "loss": 1.456, + "step": 5642 + }, + { + "epoch": 0.595253164556962, + "grad_norm": 0.6812344193458557, + "learning_rate": 0.0005381422193731853, + "loss": 1.4579, + "step": 5643 + }, + { + "epoch": 0.5953586497890295, + "grad_norm": 0.7413313388824463, + "learning_rate": 0.0005379013962711143, + "loss": 1.4668, + "step": 5644 + }, + { + "epoch": 0.5954641350210971, + "grad_norm": 0.7072516679763794, + "learning_rate": 0.0005376605969357494, + "loss": 1.4654, + "step": 5645 + }, + { + "epoch": 0.5955696202531645, + "grad_norm": 0.8179404139518738, + "learning_rate": 0.0005374198213940734, + "loss": 1.4434, + "step": 5646 + }, + { + "epoch": 0.5956751054852321, + "grad_norm": 0.6992336511611938, + "learning_rate": 0.0005371790696730665, + "loss": 1.4633, + "step": 5647 + }, + { + "epoch": 0.5957805907172996, + "grad_norm": 0.7670435309410095, + "learning_rate": 0.000536938341799706, + "loss": 1.4672, + "step": 5648 + }, + { + "epoch": 0.595886075949367, + "grad_norm": 0.6835372447967529, + "learning_rate": 0.0005366976378009668, + "loss": 1.4722, + "step": 5649 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.8429602980613708, + "learning_rate": 0.000536456957703821, + "loss": 1.4569, + "step": 5650 + }, + { + "epoch": 0.5960970464135021, + "grad_norm": 0.7602611184120178, + "learning_rate": 0.0005362163015352374, + "loss": 1.4491, + "step": 5651 + }, + { + "epoch": 0.5962025316455696, + "grad_norm": 0.87361741065979, + "learning_rate": 0.0005359756693221836, + "loss": 1.4484, + "step": 5652 + }, + { + "epoch": 0.5963080168776371, + "grad_norm": 0.7474222183227539, + "learning_rate": 0.0005357350610916233, + "loss": 1.4187, + "step": 5653 + }, + { + "epoch": 0.5964135021097047, + "grad_norm": 0.9951300621032715, + "learning_rate": 0.0005354944768705179, + "loss": 1.4497, + "step": 5654 + }, + { + "epoch": 0.5965189873417721, + "grad_norm": 0.9307352900505066, + "learning_rate": 0.0005352539166858258, + "loss": 1.4718, + "step": 5655 + }, + { + "epoch": 0.5966244725738397, + "grad_norm": 0.701042890548706, + "learning_rate": 0.0005350133805645034, + "loss": 1.4747, + "step": 5656 + }, + { + "epoch": 0.5967299578059072, + "grad_norm": 0.7920516729354858, + "learning_rate": 0.0005347728685335036, + "loss": 1.4775, + "step": 5657 + }, + { + "epoch": 0.5968354430379746, + "grad_norm": 0.6712926030158997, + "learning_rate": 0.0005345323806197771, + "loss": 1.4942, + "step": 5658 + }, + { + "epoch": 0.5969409282700422, + "grad_norm": 0.7095350623130798, + "learning_rate": 0.0005342919168502717, + "loss": 1.4383, + "step": 5659 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.6649372577667236, + "learning_rate": 0.0005340514772519324, + "loss": 1.4456, + "step": 5660 + }, + { + "epoch": 0.5971518987341772, + "grad_norm": 0.6761260032653809, + "learning_rate": 0.0005338110618517022, + "loss": 1.4891, + "step": 5661 + }, + { + "epoch": 0.5972573839662447, + "grad_norm": 0.7358752489089966, + "learning_rate": 0.0005335706706765205, + "loss": 1.4296, + "step": 5662 + }, + { + "epoch": 0.5973628691983123, + "grad_norm": 0.7985560297966003, + "learning_rate": 0.0005333303037533244, + "loss": 1.4883, + "step": 5663 + }, + { + "epoch": 0.5974683544303797, + "grad_norm": 0.7016017436981201, + "learning_rate": 0.0005330899611090482, + "loss": 1.4699, + "step": 5664 + }, + { + "epoch": 0.5975738396624473, + "grad_norm": 0.7754395008087158, + "learning_rate": 0.0005328496427706235, + "loss": 1.4635, + "step": 5665 + }, + { + "epoch": 0.5976793248945148, + "grad_norm": 0.6822608113288879, + "learning_rate": 0.000532609348764979, + "loss": 1.4272, + "step": 5666 + }, + { + "epoch": 0.5977848101265822, + "grad_norm": 0.7672669291496277, + "learning_rate": 0.0005323690791190412, + "loss": 1.4901, + "step": 5667 + }, + { + "epoch": 0.5978902953586498, + "grad_norm": 0.7222442030906677, + "learning_rate": 0.0005321288338597327, + "loss": 1.4926, + "step": 5668 + }, + { + "epoch": 0.5979957805907173, + "grad_norm": 0.7322744131088257, + "learning_rate": 0.0005318886130139753, + "loss": 1.4682, + "step": 5669 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.7827789783477783, + "learning_rate": 0.0005316484166086863, + "loss": 1.4301, + "step": 5670 + }, + { + "epoch": 0.5982067510548523, + "grad_norm": 0.791528582572937, + "learning_rate": 0.0005314082446707811, + "loss": 1.4694, + "step": 5671 + }, + { + "epoch": 0.5983122362869199, + "grad_norm": 0.7510574460029602, + "learning_rate": 0.000531168097227172, + "loss": 1.4588, + "step": 5672 + }, + { + "epoch": 0.5984177215189873, + "grad_norm": 0.8035285472869873, + "learning_rate": 0.0005309279743047687, + "loss": 1.4934, + "step": 5673 + }, + { + "epoch": 0.5985232067510549, + "grad_norm": 0.8223737478256226, + "learning_rate": 0.0005306878759304785, + "loss": 1.4825, + "step": 5674 + }, + { + "epoch": 0.5986286919831224, + "grad_norm": 0.6975213885307312, + "learning_rate": 0.0005304478021312053, + "loss": 1.4523, + "step": 5675 + }, + { + "epoch": 0.5987341772151898, + "grad_norm": 0.693953812122345, + "learning_rate": 0.0005302077529338507, + "loss": 1.4802, + "step": 5676 + }, + { + "epoch": 0.5988396624472574, + "grad_norm": 0.6741694211959839, + "learning_rate": 0.0005299677283653128, + "loss": 1.4299, + "step": 5677 + }, + { + "epoch": 0.5989451476793249, + "grad_norm": 0.6922138333320618, + "learning_rate": 0.0005297277284524888, + "loss": 1.4958, + "step": 5678 + }, + { + "epoch": 0.5990506329113924, + "grad_norm": 0.7252305150032043, + "learning_rate": 0.0005294877532222709, + "loss": 1.4248, + "step": 5679 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.7800537347793579, + "learning_rate": 0.00052924780270155, + "loss": 1.4792, + "step": 5680 + }, + { + "epoch": 0.5992616033755275, + "grad_norm": 0.7011428475379944, + "learning_rate": 0.0005290078769172135, + "loss": 1.4517, + "step": 5681 + }, + { + "epoch": 0.5993670886075949, + "grad_norm": 0.7566906809806824, + "learning_rate": 0.0005287679758961465, + "loss": 1.4483, + "step": 5682 + }, + { + "epoch": 0.5994725738396625, + "grad_norm": 0.6715098023414612, + "learning_rate": 0.0005285280996652308, + "loss": 1.4426, + "step": 5683 + }, + { + "epoch": 0.59957805907173, + "grad_norm": 0.6874048709869385, + "learning_rate": 0.0005282882482513459, + "loss": 1.4259, + "step": 5684 + }, + { + "epoch": 0.5996835443037974, + "grad_norm": 0.672662079334259, + "learning_rate": 0.0005280484216813686, + "loss": 1.4447, + "step": 5685 + }, + { + "epoch": 0.599789029535865, + "grad_norm": 0.6992858052253723, + "learning_rate": 0.0005278086199821718, + "loss": 1.4713, + "step": 5686 + }, + { + "epoch": 0.5998945147679325, + "grad_norm": 0.7433319687843323, + "learning_rate": 0.0005275688431806274, + "loss": 1.447, + "step": 5687 + }, + { + "epoch": 0.6, + "grad_norm": 0.6929677128791809, + "learning_rate": 0.0005273290913036033, + "loss": 1.438, + "step": 5688 + }, + { + "epoch": 0.6001054852320675, + "grad_norm": 0.8164818286895752, + "learning_rate": 0.0005270893643779649, + "loss": 1.4213, + "step": 5689 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.9352688193321228, + "learning_rate": 0.0005268496624305747, + "loss": 1.4474, + "step": 5690 + }, + { + "epoch": 0.6003164556962025, + "grad_norm": 0.680438220500946, + "learning_rate": 0.0005266099854882927, + "loss": 1.4674, + "step": 5691 + }, + { + "epoch": 0.6004219409282701, + "grad_norm": 0.8819659948348999, + "learning_rate": 0.0005263703335779755, + "loss": 1.4187, + "step": 5692 + }, + { + "epoch": 0.6005274261603376, + "grad_norm": 0.720032811164856, + "learning_rate": 0.0005261307067264778, + "loss": 1.4581, + "step": 5693 + }, + { + "epoch": 0.600632911392405, + "grad_norm": 0.923029363155365, + "learning_rate": 0.0005258911049606503, + "loss": 1.4358, + "step": 5694 + }, + { + "epoch": 0.6007383966244726, + "grad_norm": 0.7440074682235718, + "learning_rate": 0.0005256515283073422, + "loss": 1.4126, + "step": 5695 + }, + { + "epoch": 0.60084388185654, + "grad_norm": 0.9558119773864746, + "learning_rate": 0.0005254119767933992, + "loss": 1.4641, + "step": 5696 + }, + { + "epoch": 0.6009493670886076, + "grad_norm": 0.8056557178497314, + "learning_rate": 0.0005251724504456641, + "loss": 1.4688, + "step": 5697 + }, + { + "epoch": 0.6010548523206751, + "grad_norm": 0.6899654865264893, + "learning_rate": 0.000524932949290977, + "loss": 1.4575, + "step": 5698 + }, + { + "epoch": 0.6011603375527426, + "grad_norm": 0.7639470100402832, + "learning_rate": 0.0005246934733561751, + "loss": 1.4243, + "step": 5699 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.823087215423584, + "learning_rate": 0.0005244540226680931, + "loss": 1.4791, + "step": 5700 + }, + { + "epoch": 0.6013713080168777, + "grad_norm": 0.787338376045227, + "learning_rate": 0.0005242145972535625, + "loss": 1.4432, + "step": 5701 + }, + { + "epoch": 0.6014767932489451, + "grad_norm": 1.0944461822509766, + "learning_rate": 0.0005239751971394122, + "loss": 1.47, + "step": 5702 + }, + { + "epoch": 0.6015822784810126, + "grad_norm": 0.6846142411231995, + "learning_rate": 0.0005237358223524678, + "loss": 1.4809, + "step": 5703 + }, + { + "epoch": 0.6016877637130802, + "grad_norm": 0.7985149025917053, + "learning_rate": 0.000523496472919553, + "loss": 1.4383, + "step": 5704 + }, + { + "epoch": 0.6017932489451476, + "grad_norm": 0.7247748970985413, + "learning_rate": 0.000523257148867488, + "loss": 1.4615, + "step": 5705 + }, + { + "epoch": 0.6018987341772152, + "grad_norm": 0.8483126163482666, + "learning_rate": 0.00052301785022309, + "loss": 1.424, + "step": 5706 + }, + { + "epoch": 0.6020042194092827, + "grad_norm": 0.7509559988975525, + "learning_rate": 0.0005227785770131737, + "loss": 1.4307, + "step": 5707 + }, + { + "epoch": 0.6021097046413502, + "grad_norm": 0.9925490021705627, + "learning_rate": 0.0005225393292645509, + "loss": 1.4462, + "step": 5708 + }, + { + "epoch": 0.6022151898734177, + "grad_norm": 0.6824211478233337, + "learning_rate": 0.0005223001070040305, + "loss": 1.4537, + "step": 5709 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.8674525022506714, + "learning_rate": 0.0005220609102584185, + "loss": 1.4966, + "step": 5710 + }, + { + "epoch": 0.6024261603375527, + "grad_norm": 0.747430145740509, + "learning_rate": 0.0005218217390545181, + "loss": 1.4714, + "step": 5711 + }, + { + "epoch": 0.6025316455696202, + "grad_norm": 0.7703140377998352, + "learning_rate": 0.0005215825934191293, + "loss": 1.4403, + "step": 5712 + }, + { + "epoch": 0.6026371308016878, + "grad_norm": 0.7628768086433411, + "learning_rate": 0.0005213434733790503, + "loss": 1.4551, + "step": 5713 + }, + { + "epoch": 0.6027426160337552, + "grad_norm": 0.9575045108795166, + "learning_rate": 0.0005211043789610752, + "loss": 1.4447, + "step": 5714 + }, + { + "epoch": 0.6028481012658228, + "grad_norm": 0.6752820014953613, + "learning_rate": 0.0005208653101919959, + "loss": 1.4314, + "step": 5715 + }, + { + "epoch": 0.6029535864978903, + "grad_norm": 0.9288398027420044, + "learning_rate": 0.0005206262670986012, + "loss": 1.4824, + "step": 5716 + }, + { + "epoch": 0.6030590717299578, + "grad_norm": 0.673622727394104, + "learning_rate": 0.0005203872497076768, + "loss": 1.4831, + "step": 5717 + }, + { + "epoch": 0.6031645569620253, + "grad_norm": 0.8425714373588562, + "learning_rate": 0.0005201482580460063, + "loss": 1.4503, + "step": 5718 + }, + { + "epoch": 0.6032700421940929, + "grad_norm": 0.6844587326049805, + "learning_rate": 0.0005199092921403696, + "loss": 1.4547, + "step": 5719 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.7662461400032043, + "learning_rate": 0.0005196703520175437, + "loss": 1.4481, + "step": 5720 + }, + { + "epoch": 0.6034810126582278, + "grad_norm": 0.7158236503601074, + "learning_rate": 0.0005194314377043037, + "loss": 1.4448, + "step": 5721 + }, + { + "epoch": 0.6035864978902954, + "grad_norm": 0.7138902544975281, + "learning_rate": 0.0005191925492274205, + "loss": 1.4603, + "step": 5722 + }, + { + "epoch": 0.6036919831223628, + "grad_norm": 0.8144873976707458, + "learning_rate": 0.0005189536866136634, + "loss": 1.4574, + "step": 5723 + }, + { + "epoch": 0.6037974683544304, + "grad_norm": 0.7912715077400208, + "learning_rate": 0.0005187148498897977, + "loss": 1.474, + "step": 5724 + }, + { + "epoch": 0.6039029535864979, + "grad_norm": 0.7646504044532776, + "learning_rate": 0.0005184760390825865, + "loss": 1.4421, + "step": 5725 + }, + { + "epoch": 0.6040084388185654, + "grad_norm": 0.7226248383522034, + "learning_rate": 0.0005182372542187895, + "loss": 1.4635, + "step": 5726 + }, + { + "epoch": 0.6041139240506329, + "grad_norm": 0.7556485533714294, + "learning_rate": 0.0005179984953251639, + "loss": 1.4376, + "step": 5727 + }, + { + "epoch": 0.6042194092827005, + "grad_norm": 0.8157675266265869, + "learning_rate": 0.0005177597624284637, + "loss": 1.4502, + "step": 5728 + }, + { + "epoch": 0.6043248945147679, + "grad_norm": 0.8987638354301453, + "learning_rate": 0.00051752105555544, + "loss": 1.4632, + "step": 5729 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.853367269039154, + "learning_rate": 0.0005172823747328415, + "loss": 1.4686, + "step": 5730 + }, + { + "epoch": 0.604535864978903, + "grad_norm": 0.7702990770339966, + "learning_rate": 0.0005170437199874132, + "loss": 1.4537, + "step": 5731 + }, + { + "epoch": 0.6046413502109704, + "grad_norm": 0.8083300590515137, + "learning_rate": 0.0005168050913458977, + "loss": 1.4448, + "step": 5732 + }, + { + "epoch": 0.604746835443038, + "grad_norm": 0.7359805107116699, + "learning_rate": 0.0005165664888350347, + "loss": 1.4585, + "step": 5733 + }, + { + "epoch": 0.6048523206751055, + "grad_norm": 1.0562018156051636, + "learning_rate": 0.0005163279124815605, + "loss": 1.4346, + "step": 5734 + }, + { + "epoch": 0.604957805907173, + "grad_norm": 0.7079649567604065, + "learning_rate": 0.000516089362312209, + "loss": 1.4536, + "step": 5735 + }, + { + "epoch": 0.6050632911392405, + "grad_norm": 0.9224934577941895, + "learning_rate": 0.0005158508383537109, + "loss": 1.4819, + "step": 5736 + }, + { + "epoch": 0.6051687763713081, + "grad_norm": 0.7197334170341492, + "learning_rate": 0.0005156123406327938, + "loss": 1.4442, + "step": 5737 + }, + { + "epoch": 0.6052742616033755, + "grad_norm": 0.7336747646331787, + "learning_rate": 0.0005153738691761826, + "loss": 1.4498, + "step": 5738 + }, + { + "epoch": 0.605379746835443, + "grad_norm": 0.8277914524078369, + "learning_rate": 0.0005151354240105994, + "loss": 1.4935, + "step": 5739 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.716366171836853, + "learning_rate": 0.0005148970051627632, + "loss": 1.43, + "step": 5740 + }, + { + "epoch": 0.605590717299578, + "grad_norm": 0.9043501019477844, + "learning_rate": 0.0005146586126593898, + "loss": 1.4307, + "step": 5741 + }, + { + "epoch": 0.6056962025316456, + "grad_norm": 0.7299345135688782, + "learning_rate": 0.0005144202465271922, + "loss": 1.4622, + "step": 5742 + }, + { + "epoch": 0.6058016877637131, + "grad_norm": 0.9249469041824341, + "learning_rate": 0.000514181906792881, + "loss": 1.417, + "step": 5743 + }, + { + "epoch": 0.6059071729957806, + "grad_norm": 0.7354869246482849, + "learning_rate": 0.0005139435934831628, + "loss": 1.4185, + "step": 5744 + }, + { + "epoch": 0.6060126582278481, + "grad_norm": 0.8529190421104431, + "learning_rate": 0.0005137053066247421, + "loss": 1.4158, + "step": 5745 + }, + { + "epoch": 0.6061181434599157, + "grad_norm": 0.7169058322906494, + "learning_rate": 0.00051346704624432, + "loss": 1.4323, + "step": 5746 + }, + { + "epoch": 0.6062236286919831, + "grad_norm": 1.0638638734817505, + "learning_rate": 0.000513228812368595, + "loss": 1.4176, + "step": 5747 + }, + { + "epoch": 0.6063291139240506, + "grad_norm": 0.7001129388809204, + "learning_rate": 0.0005129906050242622, + "loss": 1.4398, + "step": 5748 + }, + { + "epoch": 0.6064345991561182, + "grad_norm": 0.9542639255523682, + "learning_rate": 0.0005127524242380139, + "loss": 1.4528, + "step": 5749 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.740460216999054, + "learning_rate": 0.0005125142700365394, + "loss": 1.3932, + "step": 5750 + }, + { + "epoch": 0.6066455696202532, + "grad_norm": 0.7761309742927551, + "learning_rate": 0.0005122761424465254, + "loss": 1.4099, + "step": 5751 + }, + { + "epoch": 0.6067510548523207, + "grad_norm": 0.7677785158157349, + "learning_rate": 0.0005120380414946546, + "loss": 1.4183, + "step": 5752 + }, + { + "epoch": 0.6068565400843882, + "grad_norm": 0.7782109379768372, + "learning_rate": 0.0005117999672076081, + "loss": 1.4593, + "step": 5753 + }, + { + "epoch": 0.6069620253164557, + "grad_norm": 0.7176998853683472, + "learning_rate": 0.0005115619196120632, + "loss": 1.4364, + "step": 5754 + }, + { + "epoch": 0.6070675105485233, + "grad_norm": 0.7189013361930847, + "learning_rate": 0.0005113238987346939, + "loss": 1.4645, + "step": 5755 + }, + { + "epoch": 0.6071729957805907, + "grad_norm": 0.7696628570556641, + "learning_rate": 0.000511085904602172, + "loss": 1.424, + "step": 5756 + }, + { + "epoch": 0.6072784810126582, + "grad_norm": 0.7535209059715271, + "learning_rate": 0.0005108479372411658, + "loss": 1.4117, + "step": 5757 + }, + { + "epoch": 0.6073839662447258, + "grad_norm": 0.7477710247039795, + "learning_rate": 0.0005106099966783409, + "loss": 1.4633, + "step": 5758 + }, + { + "epoch": 0.6074894514767932, + "grad_norm": 0.6637445092201233, + "learning_rate": 0.0005103720829403594, + "loss": 1.449, + "step": 5759 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.7909016609191895, + "learning_rate": 0.000510134196053881, + "loss": 1.4501, + "step": 5760 + }, + { + "epoch": 0.6077004219409282, + "grad_norm": 0.7072054147720337, + "learning_rate": 0.000509896336045562, + "loss": 1.4063, + "step": 5761 + }, + { + "epoch": 0.6078059071729958, + "grad_norm": 0.7242763042449951, + "learning_rate": 0.0005096585029420556, + "loss": 1.4557, + "step": 5762 + }, + { + "epoch": 0.6079113924050633, + "grad_norm": 0.7078933715820312, + "learning_rate": 0.0005094206967700127, + "loss": 1.4357, + "step": 5763 + }, + { + "epoch": 0.6080168776371307, + "grad_norm": 0.7545815110206604, + "learning_rate": 0.0005091829175560801, + "loss": 1.4389, + "step": 5764 + }, + { + "epoch": 0.6081223628691983, + "grad_norm": 0.7741457223892212, + "learning_rate": 0.0005089451653269026, + "loss": 1.4281, + "step": 5765 + }, + { + "epoch": 0.6082278481012658, + "grad_norm": 0.87867671251297, + "learning_rate": 0.0005087074401091212, + "loss": 1.464, + "step": 5766 + }, + { + "epoch": 0.6083333333333333, + "grad_norm": 0.7732259035110474, + "learning_rate": 0.0005084697419293746, + "loss": 1.4465, + "step": 5767 + }, + { + "epoch": 0.6084388185654008, + "grad_norm": 1.004044532775879, + "learning_rate": 0.0005082320708142975, + "loss": 1.4293, + "step": 5768 + }, + { + "epoch": 0.6085443037974684, + "grad_norm": 0.836759090423584, + "learning_rate": 0.0005079944267905226, + "loss": 1.4425, + "step": 5769 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.8956238627433777, + "learning_rate": 0.0005077568098846789, + "loss": 1.4592, + "step": 5770 + }, + { + "epoch": 0.6087552742616034, + "grad_norm": 1.0683262348175049, + "learning_rate": 0.0005075192201233924, + "loss": 1.43, + "step": 5771 + }, + { + "epoch": 0.6088607594936709, + "grad_norm": 0.6656935811042786, + "learning_rate": 0.0005072816575332864, + "loss": 1.4471, + "step": 5772 + }, + { + "epoch": 0.6089662447257383, + "grad_norm": 0.7463771104812622, + "learning_rate": 0.0005070441221409811, + "loss": 1.4572, + "step": 5773 + }, + { + "epoch": 0.6090717299578059, + "grad_norm": 0.7609611749649048, + "learning_rate": 0.0005068066139730936, + "loss": 1.4405, + "step": 5774 + }, + { + "epoch": 0.6091772151898734, + "grad_norm": 0.7029635906219482, + "learning_rate": 0.0005065691330562375, + "loss": 1.4204, + "step": 5775 + }, + { + "epoch": 0.6092827004219409, + "grad_norm": 0.7271345853805542, + "learning_rate": 0.0005063316794170239, + "loss": 1.4708, + "step": 5776 + }, + { + "epoch": 0.6093881856540084, + "grad_norm": 0.6868565082550049, + "learning_rate": 0.0005060942530820607, + "loss": 1.4341, + "step": 5777 + }, + { + "epoch": 0.609493670886076, + "grad_norm": 0.7554129958152771, + "learning_rate": 0.0005058568540779526, + "loss": 1.4603, + "step": 5778 + }, + { + "epoch": 0.6095991561181434, + "grad_norm": 0.9597440958023071, + "learning_rate": 0.0005056194824313015, + "loss": 1.4384, + "step": 5779 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.6747674345970154, + "learning_rate": 0.000505382138168706, + "loss": 1.4551, + "step": 5780 + }, + { + "epoch": 0.6098101265822785, + "grad_norm": 0.8611102104187012, + "learning_rate": 0.0005051448213167614, + "loss": 1.4418, + "step": 5781 + }, + { + "epoch": 0.609915611814346, + "grad_norm": 0.714123547077179, + "learning_rate": 0.0005049075319020608, + "loss": 1.4532, + "step": 5782 + }, + { + "epoch": 0.6100210970464135, + "grad_norm": 0.896814227104187, + "learning_rate": 0.0005046702699511933, + "loss": 1.482, + "step": 5783 + }, + { + "epoch": 0.610126582278481, + "grad_norm": 0.7129641175270081, + "learning_rate": 0.0005044330354907454, + "loss": 1.4804, + "step": 5784 + }, + { + "epoch": 0.6102320675105485, + "grad_norm": 0.7084757089614868, + "learning_rate": 0.0005041958285473005, + "loss": 1.427, + "step": 5785 + }, + { + "epoch": 0.610337552742616, + "grad_norm": 0.7300400137901306, + "learning_rate": 0.0005039586491474386, + "loss": 1.4157, + "step": 5786 + }, + { + "epoch": 0.6104430379746836, + "grad_norm": 0.8704163432121277, + "learning_rate": 0.000503721497317737, + "loss": 1.4151, + "step": 5787 + }, + { + "epoch": 0.610548523206751, + "grad_norm": 0.7028548717498779, + "learning_rate": 0.0005034843730847696, + "loss": 1.4308, + "step": 5788 + }, + { + "epoch": 0.6106540084388186, + "grad_norm": 0.751186728477478, + "learning_rate": 0.0005032472764751074, + "loss": 1.4625, + "step": 5789 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.6607261896133423, + "learning_rate": 0.0005030102075153181, + "loss": 1.4288, + "step": 5790 + }, + { + "epoch": 0.6108649789029535, + "grad_norm": 0.8192542195320129, + "learning_rate": 0.000502773166231967, + "loss": 1.4807, + "step": 5791 + }, + { + "epoch": 0.6109704641350211, + "grad_norm": 0.6857917904853821, + "learning_rate": 0.0005025361526516151, + "loss": 1.4439, + "step": 5792 + }, + { + "epoch": 0.6110759493670886, + "grad_norm": 0.9444584846496582, + "learning_rate": 0.0005022991668008216, + "loss": 1.4769, + "step": 5793 + }, + { + "epoch": 0.6111814345991561, + "grad_norm": 0.715400755405426, + "learning_rate": 0.0005020622087061415, + "loss": 1.4492, + "step": 5794 + }, + { + "epoch": 0.6112869198312236, + "grad_norm": 0.807215690612793, + "learning_rate": 0.0005018252783941273, + "loss": 1.4398, + "step": 5795 + }, + { + "epoch": 0.6113924050632912, + "grad_norm": 0.8005495667457581, + "learning_rate": 0.0005015883758913281, + "loss": 1.4392, + "step": 5796 + }, + { + "epoch": 0.6114978902953586, + "grad_norm": 0.9470233917236328, + "learning_rate": 0.0005013515012242901, + "loss": 1.4465, + "step": 5797 + }, + { + "epoch": 0.6116033755274262, + "grad_norm": 0.7367725372314453, + "learning_rate": 0.0005011146544195559, + "loss": 1.4129, + "step": 5798 + }, + { + "epoch": 0.6117088607594937, + "grad_norm": 0.7928143739700317, + "learning_rate": 0.000500877835503666, + "loss": 1.4448, + "step": 5799 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.8216769099235535, + "learning_rate": 0.0005006410445031569, + "loss": 1.4603, + "step": 5800 + }, + { + "epoch": 0.6119198312236287, + "grad_norm": 0.9055120944976807, + "learning_rate": 0.0005004042814445622, + "loss": 1.4535, + "step": 5801 + }, + { + "epoch": 0.6120253164556962, + "grad_norm": 0.7210325598716736, + "learning_rate": 0.0005001675463544125, + "loss": 1.4482, + "step": 5802 + }, + { + "epoch": 0.6121308016877637, + "grad_norm": 0.7564740777015686, + "learning_rate": 0.0004999308392592349, + "loss": 1.4284, + "step": 5803 + }, + { + "epoch": 0.6122362869198312, + "grad_norm": 0.7210363745689392, + "learning_rate": 0.0004996941601855536, + "loss": 1.4538, + "step": 5804 + }, + { + "epoch": 0.6123417721518988, + "grad_norm": 0.7016943693161011, + "learning_rate": 0.0004994575091598898, + "loss": 1.432, + "step": 5805 + }, + { + "epoch": 0.6124472573839662, + "grad_norm": 0.8009229302406311, + "learning_rate": 0.0004992208862087616, + "loss": 1.474, + "step": 5806 + }, + { + "epoch": 0.6125527426160338, + "grad_norm": 0.696427583694458, + "learning_rate": 0.0004989842913586832, + "loss": 1.4634, + "step": 5807 + }, + { + "epoch": 0.6126582278481013, + "grad_norm": 0.8583039045333862, + "learning_rate": 0.000498747724636167, + "loss": 1.4645, + "step": 5808 + }, + { + "epoch": 0.6127637130801687, + "grad_norm": 0.6971880793571472, + "learning_rate": 0.000498511186067721, + "loss": 1.4705, + "step": 5809 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.8308250904083252, + "learning_rate": 0.0004982746756798507, + "loss": 1.4546, + "step": 5810 + }, + { + "epoch": 0.6129746835443038, + "grad_norm": 0.8257160782814026, + "learning_rate": 0.0004980381934990583, + "loss": 1.4865, + "step": 5811 + }, + { + "epoch": 0.6130801687763713, + "grad_norm": 0.7183710932731628, + "learning_rate": 0.0004978017395518425, + "loss": 1.4542, + "step": 5812 + }, + { + "epoch": 0.6131856540084388, + "grad_norm": 0.8198190331459045, + "learning_rate": 0.0004975653138646994, + "loss": 1.4398, + "step": 5813 + }, + { + "epoch": 0.6132911392405064, + "grad_norm": 0.6994310021400452, + "learning_rate": 0.0004973289164641217, + "loss": 1.4452, + "step": 5814 + }, + { + "epoch": 0.6133966244725738, + "grad_norm": 0.7765213847160339, + "learning_rate": 0.0004970925473765988, + "loss": 1.4906, + "step": 5815 + }, + { + "epoch": 0.6135021097046414, + "grad_norm": 0.6870372891426086, + "learning_rate": 0.0004968562066286168, + "loss": 1.4854, + "step": 5816 + }, + { + "epoch": 0.6136075949367089, + "grad_norm": 0.6860522627830505, + "learning_rate": 0.0004966198942466595, + "loss": 1.459, + "step": 5817 + }, + { + "epoch": 0.6137130801687763, + "grad_norm": 0.7815109491348267, + "learning_rate": 0.0004963836102572065, + "loss": 1.4869, + "step": 5818 + }, + { + "epoch": 0.6138185654008439, + "grad_norm": 0.8074607253074646, + "learning_rate": 0.0004961473546867346, + "loss": 1.4583, + "step": 5819 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.7079684734344482, + "learning_rate": 0.0004959111275617174, + "loss": 1.4665, + "step": 5820 + }, + { + "epoch": 0.6140295358649789, + "grad_norm": 0.9112611413002014, + "learning_rate": 0.0004956749289086254, + "loss": 1.4791, + "step": 5821 + }, + { + "epoch": 0.6141350210970464, + "grad_norm": 0.7457361817359924, + "learning_rate": 0.0004954387587539257, + "loss": 1.4304, + "step": 5822 + }, + { + "epoch": 0.614240506329114, + "grad_norm": 0.877955436706543, + "learning_rate": 0.0004952026171240826, + "loss": 1.4415, + "step": 5823 + }, + { + "epoch": 0.6143459915611814, + "grad_norm": 0.7390661239624023, + "learning_rate": 0.0004949665040455566, + "loss": 1.4545, + "step": 5824 + }, + { + "epoch": 0.614451476793249, + "grad_norm": 1.3502106666564941, + "learning_rate": 0.0004947304195448052, + "loss": 1.4531, + "step": 5825 + }, + { + "epoch": 0.6145569620253165, + "grad_norm": 0.7698769569396973, + "learning_rate": 0.0004944943636482836, + "loss": 1.4458, + "step": 5826 + }, + { + "epoch": 0.614662447257384, + "grad_norm": 0.9357996582984924, + "learning_rate": 0.0004942583363824428, + "loss": 1.4485, + "step": 5827 + }, + { + "epoch": 0.6147679324894515, + "grad_norm": 0.7407150268554688, + "learning_rate": 0.0004940223377737304, + "loss": 1.4266, + "step": 5828 + }, + { + "epoch": 0.6148734177215189, + "grad_norm": 0.7579896450042725, + "learning_rate": 0.0004937863678485915, + "loss": 1.4628, + "step": 5829 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.7790936827659607, + "learning_rate": 0.0004935504266334677, + "loss": 1.4348, + "step": 5830 + }, + { + "epoch": 0.615084388185654, + "grad_norm": 0.7358512878417969, + "learning_rate": 0.0004933145141547975, + "loss": 1.4565, + "step": 5831 + }, + { + "epoch": 0.6151898734177215, + "grad_norm": 0.8895414471626282, + "learning_rate": 0.0004930786304390158, + "loss": 1.4792, + "step": 5832 + }, + { + "epoch": 0.615295358649789, + "grad_norm": 0.7195414304733276, + "learning_rate": 0.0004928427755125544, + "loss": 1.4265, + "step": 5833 + }, + { + "epoch": 0.6154008438818566, + "grad_norm": 0.7619926333427429, + "learning_rate": 0.0004926069494018427, + "loss": 1.4659, + "step": 5834 + }, + { + "epoch": 0.615506329113924, + "grad_norm": 0.6907306909561157, + "learning_rate": 0.0004923711521333056, + "loss": 1.4526, + "step": 5835 + }, + { + "epoch": 0.6156118143459915, + "grad_norm": 0.7498621940612793, + "learning_rate": 0.0004921353837333657, + "loss": 1.4872, + "step": 5836 + }, + { + "epoch": 0.6157172995780591, + "grad_norm": 0.6796973347663879, + "learning_rate": 0.0004918996442284419, + "loss": 1.43, + "step": 5837 + }, + { + "epoch": 0.6158227848101265, + "grad_norm": 0.7214763760566711, + "learning_rate": 0.0004916639336449499, + "loss": 1.476, + "step": 5838 + }, + { + "epoch": 0.6159282700421941, + "grad_norm": 0.6799614429473877, + "learning_rate": 0.0004914282520093023, + "loss": 1.4546, + "step": 5839 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.7076466679573059, + "learning_rate": 0.0004911925993479085, + "loss": 1.471, + "step": 5840 + }, + { + "epoch": 0.6161392405063291, + "grad_norm": 0.7101291418075562, + "learning_rate": 0.0004909569756871745, + "loss": 1.4733, + "step": 5841 + }, + { + "epoch": 0.6162447257383966, + "grad_norm": 0.6768279075622559, + "learning_rate": 0.0004907213810535026, + "loss": 1.4778, + "step": 5842 + }, + { + "epoch": 0.6163502109704642, + "grad_norm": 0.6832438707351685, + "learning_rate": 0.0004904858154732932, + "loss": 1.4147, + "step": 5843 + }, + { + "epoch": 0.6164556962025316, + "grad_norm": 0.7179003953933716, + "learning_rate": 0.0004902502789729424, + "loss": 1.462, + "step": 5844 + }, + { + "epoch": 0.6165611814345991, + "grad_norm": 0.6987724900245667, + "learning_rate": 0.0004900147715788429, + "loss": 1.4805, + "step": 5845 + }, + { + "epoch": 0.6166666666666667, + "grad_norm": 0.7223509550094604, + "learning_rate": 0.0004897792933173847, + "loss": 1.442, + "step": 5846 + }, + { + "epoch": 0.6167721518987341, + "grad_norm": 0.7384411096572876, + "learning_rate": 0.0004895438442149542, + "loss": 1.4593, + "step": 5847 + }, + { + "epoch": 0.6168776371308017, + "grad_norm": 0.71107017993927, + "learning_rate": 0.0004893084242979348, + "loss": 1.4721, + "step": 5848 + }, + { + "epoch": 0.6169831223628692, + "grad_norm": 0.962515115737915, + "learning_rate": 0.0004890730335927063, + "loss": 1.5097, + "step": 5849 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.7260770797729492, + "learning_rate": 0.0004888376721256456, + "loss": 1.4479, + "step": 5850 + }, + { + "epoch": 0.6171940928270042, + "grad_norm": 0.7297670245170593, + "learning_rate": 0.0004886023399231255, + "loss": 1.4663, + "step": 5851 + }, + { + "epoch": 0.6172995780590718, + "grad_norm": 0.8207067251205444, + "learning_rate": 0.0004883670370115173, + "loss": 1.4395, + "step": 5852 + }, + { + "epoch": 0.6174050632911392, + "grad_norm": 0.8443765640258789, + "learning_rate": 0.00048813176341718693, + "loss": 1.4448, + "step": 5853 + }, + { + "epoch": 0.6175105485232067, + "grad_norm": 0.6838981509208679, + "learning_rate": 0.0004878965191664983, + "loss": 1.4176, + "step": 5854 + }, + { + "epoch": 0.6176160337552743, + "grad_norm": 0.778358519077301, + "learning_rate": 0.0004876613042858118, + "loss": 1.4978, + "step": 5855 + }, + { + "epoch": 0.6177215189873417, + "grad_norm": 0.7685683369636536, + "learning_rate": 0.0004874261188014842, + "loss": 1.4492, + "step": 5856 + }, + { + "epoch": 0.6178270042194093, + "grad_norm": 0.7496544718742371, + "learning_rate": 0.00048719096273986925, + "loss": 1.4254, + "step": 5857 + }, + { + "epoch": 0.6179324894514768, + "grad_norm": 0.7430266737937927, + "learning_rate": 0.0004869558361273175, + "loss": 1.4643, + "step": 5858 + }, + { + "epoch": 0.6180379746835443, + "grad_norm": 0.7665625810623169, + "learning_rate": 0.00048672073899017564, + "loss": 1.4597, + "step": 5859 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.7951707243919373, + "learning_rate": 0.00048648567135478805, + "loss": 1.4592, + "step": 5860 + }, + { + "epoch": 0.6182489451476794, + "grad_norm": 0.8918793797492981, + "learning_rate": 0.0004862506332474951, + "loss": 1.4874, + "step": 5861 + }, + { + "epoch": 0.6183544303797468, + "grad_norm": 0.7439255118370056, + "learning_rate": 0.0004860156246946338, + "loss": 1.5131, + "step": 5862 + }, + { + "epoch": 0.6184599156118143, + "grad_norm": 0.6630406975746155, + "learning_rate": 0.0004857806457225381, + "loss": 1.4086, + "step": 5863 + }, + { + "epoch": 0.6185654008438819, + "grad_norm": 0.7946127653121948, + "learning_rate": 0.00048554569635753857, + "loss": 1.415, + "step": 5864 + }, + { + "epoch": 0.6186708860759493, + "grad_norm": 0.7446522116661072, + "learning_rate": 0.00048531077662596246, + "loss": 1.4512, + "step": 5865 + }, + { + "epoch": 0.6187763713080169, + "grad_norm": 0.7687454223632812, + "learning_rate": 0.00048507588655413367, + "loss": 1.4628, + "step": 5866 + }, + { + "epoch": 0.6188818565400844, + "grad_norm": 0.7070428729057312, + "learning_rate": 0.00048484102616837277, + "loss": 1.3904, + "step": 5867 + }, + { + "epoch": 0.6189873417721519, + "grad_norm": 0.713883101940155, + "learning_rate": 0.000484606195494997, + "loss": 1.4227, + "step": 5868 + }, + { + "epoch": 0.6190928270042194, + "grad_norm": 0.6926547288894653, + "learning_rate": 0.0004843713945603205, + "loss": 1.4473, + "step": 5869 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.6883769035339355, + "learning_rate": 0.0004841366233906538, + "loss": 1.4349, + "step": 5870 + }, + { + "epoch": 0.6193037974683544, + "grad_norm": 0.7032212018966675, + "learning_rate": 0.0004839018820123042, + "loss": 1.4941, + "step": 5871 + }, + { + "epoch": 0.619409282700422, + "grad_norm": 0.770293653011322, + "learning_rate": 0.0004836671704515756, + "loss": 1.441, + "step": 5872 + }, + { + "epoch": 0.6195147679324895, + "grad_norm": 0.7467885613441467, + "learning_rate": 0.00048343248873476853, + "loss": 1.4312, + "step": 5873 + }, + { + "epoch": 0.6196202531645569, + "grad_norm": 0.6817745566368103, + "learning_rate": 0.00048319783688818043, + "loss": 1.4319, + "step": 5874 + }, + { + "epoch": 0.6197257383966245, + "grad_norm": 0.7381165623664856, + "learning_rate": 0.00048296321493810507, + "loss": 1.5132, + "step": 5875 + }, + { + "epoch": 0.619831223628692, + "grad_norm": 0.7270400524139404, + "learning_rate": 0.0004827286229108331, + "loss": 1.4139, + "step": 5876 + }, + { + "epoch": 0.6199367088607595, + "grad_norm": 0.7026908993721008, + "learning_rate": 0.00048249406083265123, + "loss": 1.4587, + "step": 5877 + }, + { + "epoch": 0.620042194092827, + "grad_norm": 0.7703906297683716, + "learning_rate": 0.0004822595287298442, + "loss": 1.4587, + "step": 5878 + }, + { + "epoch": 0.6201476793248946, + "grad_norm": 0.705422043800354, + "learning_rate": 0.00048202502662869195, + "loss": 1.4358, + "step": 5879 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.6745405197143555, + "learning_rate": 0.0004817905545554717, + "loss": 1.4508, + "step": 5880 + }, + { + "epoch": 0.6203586497890295, + "grad_norm": 0.7638978958129883, + "learning_rate": 0.00048155611253645727, + "loss": 1.4449, + "step": 5881 + }, + { + "epoch": 0.6204641350210971, + "grad_norm": 0.6918387413024902, + "learning_rate": 0.0004813217005979191, + "loss": 1.449, + "step": 5882 + }, + { + "epoch": 0.6205696202531645, + "grad_norm": 0.8720850348472595, + "learning_rate": 0.000481087318766124, + "loss": 1.4196, + "step": 5883 + }, + { + "epoch": 0.6206751054852321, + "grad_norm": 0.6868793368339539, + "learning_rate": 0.0004808529670673358, + "loss": 1.41, + "step": 5884 + }, + { + "epoch": 0.6207805907172996, + "grad_norm": 0.6920719146728516, + "learning_rate": 0.00048061864552781456, + "loss": 1.4228, + "step": 5885 + }, + { + "epoch": 0.6208860759493671, + "grad_norm": 0.7319024205207825, + "learning_rate": 0.0004803843541738173, + "loss": 1.4691, + "step": 5886 + }, + { + "epoch": 0.6209915611814346, + "grad_norm": 0.7068464159965515, + "learning_rate": 0.0004801500930315978, + "loss": 1.4691, + "step": 5887 + }, + { + "epoch": 0.6210970464135022, + "grad_norm": 0.7751444578170776, + "learning_rate": 0.000479915862127406, + "loss": 1.4197, + "step": 5888 + }, + { + "epoch": 0.6212025316455696, + "grad_norm": 0.728388786315918, + "learning_rate": 0.0004796816614874885, + "loss": 1.4524, + "step": 5889 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.7362114191055298, + "learning_rate": 0.00047944749113808884, + "loss": 1.4711, + "step": 5890 + }, + { + "epoch": 0.6214135021097047, + "grad_norm": 0.7938821911811829, + "learning_rate": 0.0004792133511054469, + "loss": 1.4038, + "step": 5891 + }, + { + "epoch": 0.6215189873417721, + "grad_norm": 0.7924456596374512, + "learning_rate": 0.0004789792414157992, + "loss": 1.4654, + "step": 5892 + }, + { + "epoch": 0.6216244725738397, + "grad_norm": 0.8302398920059204, + "learning_rate": 0.000478745162095379, + "loss": 1.4069, + "step": 5893 + }, + { + "epoch": 0.6217299578059071, + "grad_norm": 0.6825876235961914, + "learning_rate": 0.0004785111131704157, + "loss": 1.4241, + "step": 5894 + }, + { + "epoch": 0.6218354430379747, + "grad_norm": 0.7095518708229065, + "learning_rate": 0.0004782770946671362, + "loss": 1.4558, + "step": 5895 + }, + { + "epoch": 0.6219409282700422, + "grad_norm": 0.823843240737915, + "learning_rate": 0.0004780431066117629, + "loss": 1.451, + "step": 5896 + }, + { + "epoch": 0.6220464135021097, + "grad_norm": 0.9664172530174255, + "learning_rate": 0.0004778091490305159, + "loss": 1.4779, + "step": 5897 + }, + { + "epoch": 0.6221518987341772, + "grad_norm": 0.7809960246086121, + "learning_rate": 0.0004775752219496109, + "loss": 1.4731, + "step": 5898 + }, + { + "epoch": 0.6222573839662447, + "grad_norm": 0.9583179950714111, + "learning_rate": 0.00047734132539526086, + "loss": 1.4267, + "step": 5899 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.7550302743911743, + "learning_rate": 0.00047710745939367474, + "loss": 1.4583, + "step": 5900 + }, + { + "epoch": 0.6224683544303797, + "grad_norm": 0.9089716672897339, + "learning_rate": 0.00047687362397105863, + "loss": 1.4326, + "step": 5901 + }, + { + "epoch": 0.6225738396624473, + "grad_norm": 0.7417371273040771, + "learning_rate": 0.0004766398191536149, + "loss": 1.4914, + "step": 5902 + }, + { + "epoch": 0.6226793248945147, + "grad_norm": 0.7479303479194641, + "learning_rate": 0.00047640604496754235, + "loss": 1.4102, + "step": 5903 + }, + { + "epoch": 0.6227848101265823, + "grad_norm": 0.7557774782180786, + "learning_rate": 0.000476172301439037, + "loss": 1.4502, + "step": 5904 + }, + { + "epoch": 0.6228902953586498, + "grad_norm": 0.8047304749488831, + "learning_rate": 0.00047593858859429035, + "loss": 1.4193, + "step": 5905 + }, + { + "epoch": 0.6229957805907173, + "grad_norm": 0.7566039562225342, + "learning_rate": 0.00047570490645949175, + "loss": 1.4467, + "step": 5906 + }, + { + "epoch": 0.6231012658227848, + "grad_norm": 0.8464046120643616, + "learning_rate": 0.000475471255060826, + "loss": 1.4174, + "step": 5907 + }, + { + "epoch": 0.6232067510548523, + "grad_norm": 0.7268045544624329, + "learning_rate": 0.0004752376344244752, + "loss": 1.4504, + "step": 5908 + }, + { + "epoch": 0.6233122362869198, + "grad_norm": 0.818561851978302, + "learning_rate": 0.00047500404457661747, + "loss": 1.4449, + "step": 5909 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.760286808013916, + "learning_rate": 0.0004747704855434278, + "loss": 1.4482, + "step": 5910 + }, + { + "epoch": 0.6235232067510549, + "grad_norm": 0.9189671277999878, + "learning_rate": 0.0004745369573510775, + "loss": 1.4076, + "step": 5911 + }, + { + "epoch": 0.6236286919831223, + "grad_norm": 0.6690592169761658, + "learning_rate": 0.0004743034600257348, + "loss": 1.4637, + "step": 5912 + }, + { + "epoch": 0.6237341772151899, + "grad_norm": 0.9564945697784424, + "learning_rate": 0.0004740699935935643, + "loss": 1.4142, + "step": 5913 + }, + { + "epoch": 0.6238396624472574, + "grad_norm": 0.7178501486778259, + "learning_rate": 0.0004738365580807268, + "loss": 1.426, + "step": 5914 + }, + { + "epoch": 0.6239451476793249, + "grad_norm": 0.9963337779045105, + "learning_rate": 0.0004736031535133799, + "loss": 1.4222, + "step": 5915 + }, + { + "epoch": 0.6240506329113924, + "grad_norm": 0.6991158127784729, + "learning_rate": 0.0004733697799176781, + "loss": 1.4327, + "step": 5916 + }, + { + "epoch": 0.62415611814346, + "grad_norm": 0.9236934781074524, + "learning_rate": 0.0004731364373197718, + "loss": 1.4616, + "step": 5917 + }, + { + "epoch": 0.6242616033755274, + "grad_norm": 0.7141176462173462, + "learning_rate": 0.00047290312574580835, + "loss": 1.442, + "step": 5918 + }, + { + "epoch": 0.6243670886075949, + "grad_norm": 0.9929444789886475, + "learning_rate": 0.00047266984522193134, + "loss": 1.4373, + "step": 5919 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.8078657388687134, + "learning_rate": 0.0004724365957742809, + "loss": 1.4374, + "step": 5920 + }, + { + "epoch": 0.6245780590717299, + "grad_norm": 0.9824070334434509, + "learning_rate": 0.0004722033774289941, + "loss": 1.4541, + "step": 5921 + }, + { + "epoch": 0.6246835443037975, + "grad_norm": 0.7978588342666626, + "learning_rate": 0.0004719701902122041, + "loss": 1.4288, + "step": 5922 + }, + { + "epoch": 0.624789029535865, + "grad_norm": 0.9472945332527161, + "learning_rate": 0.00047173703415004066, + "loss": 1.4615, + "step": 5923 + }, + { + "epoch": 0.6248945147679325, + "grad_norm": 0.8141405582427979, + "learning_rate": 0.0004715039092686302, + "loss": 1.4666, + "step": 5924 + }, + { + "epoch": 0.625, + "grad_norm": 0.8638810515403748, + "learning_rate": 0.0004712708155940951, + "loss": 1.4602, + "step": 5925 + }, + { + "epoch": 0.6251054852320675, + "grad_norm": 0.8243886232376099, + "learning_rate": 0.0004710377531525552, + "loss": 1.3872, + "step": 5926 + }, + { + "epoch": 0.625210970464135, + "grad_norm": 0.6822666525840759, + "learning_rate": 0.000470804721970126, + "loss": 1.4381, + "step": 5927 + }, + { + "epoch": 0.6253164556962025, + "grad_norm": 0.9194545745849609, + "learning_rate": 0.00047057172207292004, + "loss": 1.4296, + "step": 5928 + }, + { + "epoch": 0.6254219409282701, + "grad_norm": 1.0238720178604126, + "learning_rate": 0.00047033875348704576, + "loss": 1.4331, + "step": 5929 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.2969337701797485, + "learning_rate": 0.00047010581623860883, + "loss": 1.4552, + "step": 5930 + }, + { + "epoch": 0.6256329113924051, + "grad_norm": 0.8789483904838562, + "learning_rate": 0.0004698729103537109, + "loss": 1.457, + "step": 5931 + }, + { + "epoch": 0.6257383966244726, + "grad_norm": 0.9618124961853027, + "learning_rate": 0.0004696400358584501, + "loss": 1.4637, + "step": 5932 + }, + { + "epoch": 0.62584388185654, + "grad_norm": 0.8323497176170349, + "learning_rate": 0.00046940719277892143, + "loss": 1.4755, + "step": 5933 + }, + { + "epoch": 0.6259493670886076, + "grad_norm": 0.9262371063232422, + "learning_rate": 0.0004691743811412159, + "loss": 1.4556, + "step": 5934 + }, + { + "epoch": 0.6260548523206751, + "grad_norm": 0.6732933521270752, + "learning_rate": 0.00046894160097142113, + "loss": 1.4501, + "step": 5935 + }, + { + "epoch": 0.6261603375527426, + "grad_norm": 0.8799867630004883, + "learning_rate": 0.00046870885229562153, + "loss": 1.4583, + "step": 5936 + }, + { + "epoch": 0.6262658227848101, + "grad_norm": 0.7189381718635559, + "learning_rate": 0.0004684761351398976, + "loss": 1.4501, + "step": 5937 + }, + { + "epoch": 0.6263713080168777, + "grad_norm": 0.9308168888092041, + "learning_rate": 0.0004682434495303267, + "loss": 1.4083, + "step": 5938 + }, + { + "epoch": 0.6264767932489451, + "grad_norm": 0.719944953918457, + "learning_rate": 0.00046801079549298224, + "loss": 1.3949, + "step": 5939 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.8068941235542297, + "learning_rate": 0.0004677781730539342, + "loss": 1.4792, + "step": 5940 + }, + { + "epoch": 0.6266877637130802, + "grad_norm": 0.719455361366272, + "learning_rate": 0.00046754558223924926, + "loss": 1.4673, + "step": 5941 + }, + { + "epoch": 0.6267932489451477, + "grad_norm": 0.9208967089653015, + "learning_rate": 0.00046731302307499023, + "loss": 1.4436, + "step": 5942 + }, + { + "epoch": 0.6268987341772152, + "grad_norm": 0.6969338059425354, + "learning_rate": 0.0004670804955872166, + "loss": 1.4023, + "step": 5943 + }, + { + "epoch": 0.6270042194092827, + "grad_norm": 0.9966640472412109, + "learning_rate": 0.00046684799980198415, + "loss": 1.4604, + "step": 5944 + }, + { + "epoch": 0.6271097046413502, + "grad_norm": 0.7501001954078674, + "learning_rate": 0.0004666155357453451, + "loss": 1.4153, + "step": 5945 + }, + { + "epoch": 0.6272151898734177, + "grad_norm": 0.7981048226356506, + "learning_rate": 0.00046638310344334835, + "loss": 1.459, + "step": 5946 + }, + { + "epoch": 0.6273206751054853, + "grad_norm": 0.8062828183174133, + "learning_rate": 0.0004661507029220393, + "loss": 1.4259, + "step": 5947 + }, + { + "epoch": 0.6274261603375527, + "grad_norm": 0.7797819972038269, + "learning_rate": 0.0004659183342074594, + "loss": 1.4197, + "step": 5948 + }, + { + "epoch": 0.6275316455696203, + "grad_norm": 0.7300933003425598, + "learning_rate": 0.0004656859973256466, + "loss": 1.4378, + "step": 5949 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.7258684039115906, + "learning_rate": 0.0004654536923026356, + "loss": 1.4402, + "step": 5950 + }, + { + "epoch": 0.6277426160337553, + "grad_norm": 0.808009147644043, + "learning_rate": 0.00046522141916445725, + "loss": 1.4834, + "step": 5951 + }, + { + "epoch": 0.6278481012658228, + "grad_norm": 0.7842979431152344, + "learning_rate": 0.0004649891779371389, + "loss": 1.4621, + "step": 5952 + }, + { + "epoch": 0.6279535864978903, + "grad_norm": 0.7294203639030457, + "learning_rate": 0.0004647569686467043, + "loss": 1.4393, + "step": 5953 + }, + { + "epoch": 0.6280590717299578, + "grad_norm": 0.7866582870483398, + "learning_rate": 0.00046452479131917383, + "loss": 1.4257, + "step": 5954 + }, + { + "epoch": 0.6281645569620253, + "grad_norm": 0.7871180772781372, + "learning_rate": 0.0004642926459805636, + "loss": 1.4522, + "step": 5955 + }, + { + "epoch": 0.6282700421940929, + "grad_norm": 0.7779834270477295, + "learning_rate": 0.0004640605326568874, + "loss": 1.4601, + "step": 5956 + }, + { + "epoch": 0.6283755274261603, + "grad_norm": 0.7409660816192627, + "learning_rate": 0.00046382845137415437, + "loss": 1.4128, + "step": 5957 + }, + { + "epoch": 0.6284810126582279, + "grad_norm": 0.9247057437896729, + "learning_rate": 0.0004635964021583703, + "loss": 1.4221, + "step": 5958 + }, + { + "epoch": 0.6285864978902953, + "grad_norm": 0.7442986369132996, + "learning_rate": 0.00046336438503553754, + "loss": 1.4278, + "step": 5959 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 1.023180365562439, + "learning_rate": 0.00046313240003165466, + "loss": 1.4506, + "step": 5960 + }, + { + "epoch": 0.6287974683544304, + "grad_norm": 0.7781404256820679, + "learning_rate": 0.00046290044717271685, + "loss": 1.4477, + "step": 5961 + }, + { + "epoch": 0.6289029535864978, + "grad_norm": 0.838767945766449, + "learning_rate": 0.00046266852648471553, + "loss": 1.4329, + "step": 5962 + }, + { + "epoch": 0.6290084388185654, + "grad_norm": 0.8130188584327698, + "learning_rate": 0.0004624366379936383, + "loss": 1.4533, + "step": 5963 + }, + { + "epoch": 0.6291139240506329, + "grad_norm": 0.8062422275543213, + "learning_rate": 0.00046220478172546997, + "loss": 1.4511, + "step": 5964 + }, + { + "epoch": 0.6292194092827004, + "grad_norm": 0.8232077360153198, + "learning_rate": 0.00046197295770619105, + "loss": 1.4814, + "step": 5965 + }, + { + "epoch": 0.6293248945147679, + "grad_norm": 0.7051408290863037, + "learning_rate": 0.00046174116596177833, + "loss": 1.4461, + "step": 5966 + }, + { + "epoch": 0.6294303797468355, + "grad_norm": 0.741523802280426, + "learning_rate": 0.00046150940651820536, + "loss": 1.446, + "step": 5967 + }, + { + "epoch": 0.6295358649789029, + "grad_norm": 0.7414664030075073, + "learning_rate": 0.0004612776794014419, + "loss": 1.4105, + "step": 5968 + }, + { + "epoch": 0.6296413502109705, + "grad_norm": 0.8242177367210388, + "learning_rate": 0.00046104598463745424, + "loss": 1.4115, + "step": 5969 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.7262500524520874, + "learning_rate": 0.0004608143222522048, + "loss": 1.4591, + "step": 5970 + }, + { + "epoch": 0.6298523206751054, + "grad_norm": 0.6731962561607361, + "learning_rate": 0.00046058269227165256, + "loss": 1.4673, + "step": 5971 + }, + { + "epoch": 0.629957805907173, + "grad_norm": 0.7272813320159912, + "learning_rate": 0.0004603510947217526, + "loss": 1.4031, + "step": 5972 + }, + { + "epoch": 0.6300632911392405, + "grad_norm": 0.7918566465377808, + "learning_rate": 0.000460119529628457, + "loss": 1.451, + "step": 5973 + }, + { + "epoch": 0.630168776371308, + "grad_norm": 0.828005313873291, + "learning_rate": 0.00045988799701771364, + "loss": 1.453, + "step": 5974 + }, + { + "epoch": 0.6302742616033755, + "grad_norm": 0.8806169033050537, + "learning_rate": 0.0004596564969154668, + "loss": 1.4277, + "step": 5975 + }, + { + "epoch": 0.6303797468354431, + "grad_norm": 0.8193827271461487, + "learning_rate": 0.00045942502934765735, + "loss": 1.4336, + "step": 5976 + }, + { + "epoch": 0.6304852320675105, + "grad_norm": 0.8914004564285278, + "learning_rate": 0.0004591935943402222, + "loss": 1.4036, + "step": 5977 + }, + { + "epoch": 0.630590717299578, + "grad_norm": 0.7505046129226685, + "learning_rate": 0.00045896219191909486, + "loss": 1.4403, + "step": 5978 + }, + { + "epoch": 0.6306962025316456, + "grad_norm": 0.8382296562194824, + "learning_rate": 0.0004587308221102053, + "loss": 1.3923, + "step": 5979 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.9911462068557739, + "learning_rate": 0.0004584994849394795, + "loss": 1.4209, + "step": 5980 + }, + { + "epoch": 0.6309071729957806, + "grad_norm": 0.9995932579040527, + "learning_rate": 0.0004582681804328396, + "loss": 1.4492, + "step": 5981 + }, + { + "epoch": 0.6310126582278481, + "grad_norm": 1.0845234394073486, + "learning_rate": 0.0004580369086162051, + "loss": 1.446, + "step": 5982 + }, + { + "epoch": 0.6311181434599156, + "grad_norm": 0.8087531924247742, + "learning_rate": 0.0004578056695154909, + "loss": 1.4106, + "step": 5983 + }, + { + "epoch": 0.6312236286919831, + "grad_norm": 1.0594855546951294, + "learning_rate": 0.0004575744631566083, + "loss": 1.4399, + "step": 5984 + }, + { + "epoch": 0.6313291139240507, + "grad_norm": 0.7140163779258728, + "learning_rate": 0.0004573432895654654, + "loss": 1.4348, + "step": 5985 + }, + { + "epoch": 0.6314345991561181, + "grad_norm": 0.7709125876426697, + "learning_rate": 0.00045711214876796623, + "loss": 1.4558, + "step": 5986 + }, + { + "epoch": 0.6315400843881857, + "grad_norm": 0.7351974844932556, + "learning_rate": 0.0004568810407900112, + "loss": 1.4289, + "step": 5987 + }, + { + "epoch": 0.6316455696202532, + "grad_norm": 0.7025332450866699, + "learning_rate": 0.00045664996565749716, + "loss": 1.4551, + "step": 5988 + }, + { + "epoch": 0.6317510548523206, + "grad_norm": 0.8435665369033813, + "learning_rate": 0.00045641892339631703, + "loss": 1.4854, + "step": 5989 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.682041347026825, + "learning_rate": 0.0004561879140323607, + "loss": 1.4476, + "step": 5990 + }, + { + "epoch": 0.6319620253164557, + "grad_norm": 0.8568286299705505, + "learning_rate": 0.0004559569375915137, + "loss": 1.448, + "step": 5991 + }, + { + "epoch": 0.6320675105485232, + "grad_norm": 0.686526358127594, + "learning_rate": 0.00045572599409965804, + "loss": 1.4194, + "step": 5992 + }, + { + "epoch": 0.6321729957805907, + "grad_norm": 0.7808722853660583, + "learning_rate": 0.00045549508358267224, + "loss": 1.5162, + "step": 5993 + }, + { + "epoch": 0.6322784810126583, + "grad_norm": 0.8111094236373901, + "learning_rate": 0.0004552642060664307, + "loss": 1.4662, + "step": 5994 + }, + { + "epoch": 0.6323839662447257, + "grad_norm": 1.287213683128357, + "learning_rate": 0.00045503336157680466, + "loss": 1.4198, + "step": 5995 + }, + { + "epoch": 0.6324894514767933, + "grad_norm": 0.7146262526512146, + "learning_rate": 0.00045480255013966123, + "loss": 1.4515, + "step": 5996 + }, + { + "epoch": 0.6325949367088608, + "grad_norm": 0.839002251625061, + "learning_rate": 0.00045457177178086407, + "loss": 1.4547, + "step": 5997 + }, + { + "epoch": 0.6327004219409282, + "grad_norm": 0.6940028667449951, + "learning_rate": 0.0004543410265262727, + "loss": 1.4303, + "step": 5998 + }, + { + "epoch": 0.6328059071729958, + "grad_norm": 0.7505186796188354, + "learning_rate": 0.000454110314401744, + "loss": 1.4463, + "step": 5999 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.8099414110183716, + "learning_rate": 0.0004538796354331298, + "loss": 1.4673, + "step": 6000 + }, + { + "epoch": 0.6330168776371308, + "grad_norm": 0.7326754927635193, + "learning_rate": 0.0004536489896462792, + "loss": 1.4158, + "step": 6001 + }, + { + "epoch": 0.6331223628691983, + "grad_norm": 0.8523045778274536, + "learning_rate": 0.0004534183770670371, + "loss": 1.4128, + "step": 6002 + }, + { + "epoch": 0.6332278481012659, + "grad_norm": 0.8154420256614685, + "learning_rate": 0.0004531877977212446, + "loss": 1.4514, + "step": 6003 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 0.8596272468566895, + "learning_rate": 0.00045295725163473945, + "loss": 1.4802, + "step": 6004 + }, + { + "epoch": 0.6334388185654009, + "grad_norm": 0.8362776637077332, + "learning_rate": 0.0004527267388333555, + "loss": 1.4321, + "step": 6005 + }, + { + "epoch": 0.6335443037974684, + "grad_norm": 0.9191732406616211, + "learning_rate": 0.0004524962593429227, + "loss": 1.4412, + "step": 6006 + }, + { + "epoch": 0.6336497890295358, + "grad_norm": 0.8347001075744629, + "learning_rate": 0.00045226581318926737, + "loss": 1.4359, + "step": 6007 + }, + { + "epoch": 0.6337552742616034, + "grad_norm": 0.9287124276161194, + "learning_rate": 0.0004520354003982125, + "loss": 1.4485, + "step": 6008 + }, + { + "epoch": 0.6338607594936709, + "grad_norm": 0.6830888390541077, + "learning_rate": 0.00045180502099557686, + "loss": 1.4687, + "step": 6009 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.712864100933075, + "learning_rate": 0.0004515746750071754, + "loss": 1.4144, + "step": 6010 + }, + { + "epoch": 0.6340717299578059, + "grad_norm": 0.8446677327156067, + "learning_rate": 0.00045134436245881986, + "loss": 1.4227, + "step": 6011 + }, + { + "epoch": 0.6341772151898735, + "grad_norm": 0.734440267086029, + "learning_rate": 0.0004511140833763177, + "loss": 1.4574, + "step": 6012 + }, + { + "epoch": 0.6342827004219409, + "grad_norm": 0.9858836531639099, + "learning_rate": 0.00045088383778547284, + "loss": 1.4616, + "step": 6013 + }, + { + "epoch": 0.6343881856540085, + "grad_norm": 0.6982553601264954, + "learning_rate": 0.0004506536257120856, + "loss": 1.458, + "step": 6014 + }, + { + "epoch": 0.634493670886076, + "grad_norm": 0.848870575428009, + "learning_rate": 0.0004504234471819518, + "loss": 1.4534, + "step": 6015 + }, + { + "epoch": 0.6345991561181434, + "grad_norm": 0.925583004951477, + "learning_rate": 0.0004501933022208649, + "loss": 1.4415, + "step": 6016 + }, + { + "epoch": 0.634704641350211, + "grad_norm": 0.98038250207901, + "learning_rate": 0.00044996319085461353, + "loss": 1.4694, + "step": 6017 + }, + { + "epoch": 0.6348101265822785, + "grad_norm": 0.8693779706954956, + "learning_rate": 0.00044973311310898275, + "loss": 1.4149, + "step": 6018 + }, + { + "epoch": 0.634915611814346, + "grad_norm": 1.0354924201965332, + "learning_rate": 0.00044950306900975377, + "loss": 1.4144, + "step": 6019 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.8047609925270081, + "learning_rate": 0.0004492730585827046, + "loss": 1.4452, + "step": 6020 + }, + { + "epoch": 0.6351265822784811, + "grad_norm": 0.7272672057151794, + "learning_rate": 0.0004490430818536085, + "loss": 1.4153, + "step": 6021 + }, + { + "epoch": 0.6352320675105485, + "grad_norm": 0.9304630756378174, + "learning_rate": 0.0004488131388482359, + "loss": 1.4076, + "step": 6022 + }, + { + "epoch": 0.635337552742616, + "grad_norm": 0.7912357449531555, + "learning_rate": 0.000448583229592353, + "loss": 1.4447, + "step": 6023 + }, + { + "epoch": 0.6354430379746835, + "grad_norm": 0.76729816198349, + "learning_rate": 0.0004483533541117218, + "loss": 1.4038, + "step": 6024 + }, + { + "epoch": 0.635548523206751, + "grad_norm": 0.6574088335037231, + "learning_rate": 0.0004481235124321018, + "loss": 1.4315, + "step": 6025 + }, + { + "epoch": 0.6356540084388186, + "grad_norm": 0.8969433307647705, + "learning_rate": 0.0004478937045792474, + "loss": 1.43, + "step": 6026 + }, + { + "epoch": 0.635759493670886, + "grad_norm": 0.7240192890167236, + "learning_rate": 0.00044766393057891, + "loss": 1.4347, + "step": 6027 + }, + { + "epoch": 0.6358649789029536, + "grad_norm": 0.7843334674835205, + "learning_rate": 0.00044743419045683674, + "loss": 1.4505, + "step": 6028 + }, + { + "epoch": 0.6359704641350211, + "grad_norm": 0.686714231967926, + "learning_rate": 0.00044720448423877113, + "loss": 1.4191, + "step": 6029 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.7982355952262878, + "learning_rate": 0.0004469748119504529, + "loss": 1.4294, + "step": 6030 + }, + { + "epoch": 0.6361814345991561, + "grad_norm": 0.8331283926963806, + "learning_rate": 0.000446745173617618, + "loss": 1.4658, + "step": 6031 + }, + { + "epoch": 0.6362869198312237, + "grad_norm": 0.7542023658752441, + "learning_rate": 0.00044651556926599863, + "loss": 1.4418, + "step": 6032 + }, + { + "epoch": 0.6363924050632911, + "grad_norm": 0.7199057340621948, + "learning_rate": 0.0004462859989213227, + "loss": 1.4362, + "step": 6033 + }, + { + "epoch": 0.6364978902953586, + "grad_norm": 0.8460214138031006, + "learning_rate": 0.0004460564626093154, + "loss": 1.4618, + "step": 6034 + }, + { + "epoch": 0.6366033755274262, + "grad_norm": 0.6650453805923462, + "learning_rate": 0.00044582696035569695, + "loss": 1.4488, + "step": 6035 + }, + { + "epoch": 0.6367088607594936, + "grad_norm": 0.90232253074646, + "learning_rate": 0.00044559749218618444, + "loss": 1.4309, + "step": 6036 + }, + { + "epoch": 0.6368143459915612, + "grad_norm": 0.6777293086051941, + "learning_rate": 0.0004453680581264908, + "loss": 1.4488, + "step": 6037 + }, + { + "epoch": 0.6369198312236287, + "grad_norm": 0.8364424705505371, + "learning_rate": 0.00044513865820232525, + "loss": 1.4364, + "step": 6038 + }, + { + "epoch": 0.6370253164556962, + "grad_norm": 0.6962621212005615, + "learning_rate": 0.0004449092924393933, + "loss": 1.4222, + "step": 6039 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.937920331954956, + "learning_rate": 0.0004446799608633964, + "loss": 1.4577, + "step": 6040 + }, + { + "epoch": 0.6372362869198313, + "grad_norm": 0.6741183400154114, + "learning_rate": 0.00044445066350003203, + "loss": 1.4148, + "step": 6041 + }, + { + "epoch": 0.6373417721518987, + "grad_norm": 0.8303995132446289, + "learning_rate": 0.00044422140037499473, + "loss": 1.4301, + "step": 6042 + }, + { + "epoch": 0.6374472573839662, + "grad_norm": 0.790778636932373, + "learning_rate": 0.0004439921715139743, + "loss": 1.4323, + "step": 6043 + }, + { + "epoch": 0.6375527426160338, + "grad_norm": 0.8936322331428528, + "learning_rate": 0.00044376297694265687, + "loss": 1.415, + "step": 6044 + }, + { + "epoch": 0.6376582278481012, + "grad_norm": 0.8711749911308289, + "learning_rate": 0.000443533816686725, + "loss": 1.4177, + "step": 6045 + }, + { + "epoch": 0.6377637130801688, + "grad_norm": 0.9429438710212708, + "learning_rate": 0.0004433046907718571, + "loss": 1.4174, + "step": 6046 + }, + { + "epoch": 0.6378691983122363, + "grad_norm": 0.8306996822357178, + "learning_rate": 0.0004430755992237278, + "loss": 1.4398, + "step": 6047 + }, + { + "epoch": 0.6379746835443038, + "grad_norm": 0.7849342823028564, + "learning_rate": 0.00044284654206800826, + "loss": 1.4485, + "step": 6048 + }, + { + "epoch": 0.6380801687763713, + "grad_norm": 0.8099485635757446, + "learning_rate": 0.00044261751933036525, + "loss": 1.4244, + "step": 6049 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.6847753524780273, + "learning_rate": 0.00044238853103646154, + "loss": 1.4277, + "step": 6050 + }, + { + "epoch": 0.6382911392405063, + "grad_norm": 0.8462076783180237, + "learning_rate": 0.0004421595772119573, + "loss": 1.4432, + "step": 6051 + }, + { + "epoch": 0.6383966244725738, + "grad_norm": 0.7126113772392273, + "learning_rate": 0.0004419306578825073, + "loss": 1.4733, + "step": 6052 + }, + { + "epoch": 0.6385021097046414, + "grad_norm": 0.6599733829498291, + "learning_rate": 0.0004417017730737633, + "loss": 1.4116, + "step": 6053 + }, + { + "epoch": 0.6386075949367088, + "grad_norm": 0.8341288566589355, + "learning_rate": 0.00044147292281137293, + "loss": 1.4597, + "step": 6054 + }, + { + "epoch": 0.6387130801687764, + "grad_norm": 0.6905267834663391, + "learning_rate": 0.00044124410712098014, + "loss": 1.4122, + "step": 6055 + }, + { + "epoch": 0.6388185654008439, + "grad_norm": 0.9611828923225403, + "learning_rate": 0.0004410153260282246, + "loss": 1.4209, + "step": 6056 + }, + { + "epoch": 0.6389240506329114, + "grad_norm": 0.7664650082588196, + "learning_rate": 0.00044078657955874245, + "loss": 1.4553, + "step": 6057 + }, + { + "epoch": 0.6390295358649789, + "grad_norm": 1.0288769006729126, + "learning_rate": 0.0004405578677381661, + "loss": 1.452, + "step": 6058 + }, + { + "epoch": 0.6391350210970465, + "grad_norm": 0.881078839302063, + "learning_rate": 0.0004403291905921233, + "loss": 1.4586, + "step": 6059 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.7850981950759888, + "learning_rate": 0.00044010054814623925, + "loss": 1.4724, + "step": 6060 + }, + { + "epoch": 0.6393459915611814, + "grad_norm": 1.4152766466140747, + "learning_rate": 0.00043987194042613393, + "loss": 1.4432, + "step": 6061 + }, + { + "epoch": 0.639451476793249, + "grad_norm": 0.7591932415962219, + "learning_rate": 0.0004396433674574242, + "loss": 1.4199, + "step": 6062 + }, + { + "epoch": 0.6395569620253164, + "grad_norm": 1.0152407884597778, + "learning_rate": 0.00043941482926572277, + "loss": 1.4233, + "step": 6063 + }, + { + "epoch": 0.639662447257384, + "grad_norm": 0.6946152448654175, + "learning_rate": 0.0004391863258766384, + "loss": 1.4939, + "step": 6064 + }, + { + "epoch": 0.6397679324894515, + "grad_norm": 0.8376253247261047, + "learning_rate": 0.00043895785731577606, + "loss": 1.4245, + "step": 6065 + }, + { + "epoch": 0.639873417721519, + "grad_norm": 0.7064716219902039, + "learning_rate": 0.0004387294236087368, + "loss": 1.4132, + "step": 6066 + }, + { + "epoch": 0.6399789029535865, + "grad_norm": 0.9098712801933289, + "learning_rate": 0.00043850102478111764, + "loss": 1.4334, + "step": 6067 + }, + { + "epoch": 0.640084388185654, + "grad_norm": 0.6841269731521606, + "learning_rate": 0.00043827266085851203, + "loss": 1.4166, + "step": 6068 + }, + { + "epoch": 0.6401898734177215, + "grad_norm": 0.8374894857406616, + "learning_rate": 0.00043804433186650916, + "loss": 1.4776, + "step": 6069 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.7425900101661682, + "learning_rate": 0.0004378160378306944, + "loss": 1.4242, + "step": 6070 + }, + { + "epoch": 0.6404008438818566, + "grad_norm": 0.709538996219635, + "learning_rate": 0.0004375877787766495, + "loss": 1.4308, + "step": 6071 + }, + { + "epoch": 0.640506329113924, + "grad_norm": 0.7231118679046631, + "learning_rate": 0.0004373595547299517, + "loss": 1.4255, + "step": 6072 + }, + { + "epoch": 0.6406118143459916, + "grad_norm": 0.7738616466522217, + "learning_rate": 0.00043713136571617474, + "loss": 1.469, + "step": 6073 + }, + { + "epoch": 0.6407172995780591, + "grad_norm": 0.7010825276374817, + "learning_rate": 0.00043690321176088843, + "loss": 1.465, + "step": 6074 + }, + { + "epoch": 0.6408227848101266, + "grad_norm": 0.6870641708374023, + "learning_rate": 0.00043667509288965845, + "loss": 1.4089, + "step": 6075 + }, + { + "epoch": 0.6409282700421941, + "grad_norm": 0.8019319176673889, + "learning_rate": 0.0004364470091280463, + "loss": 1.4037, + "step": 6076 + }, + { + "epoch": 0.6410337552742617, + "grad_norm": 0.7478570938110352, + "learning_rate": 0.0004362189605016107, + "loss": 1.4322, + "step": 6077 + }, + { + "epoch": 0.6411392405063291, + "grad_norm": 0.9035025238990784, + "learning_rate": 0.00043599094703590524, + "loss": 1.4353, + "step": 6078 + }, + { + "epoch": 0.6412447257383966, + "grad_norm": 0.7081230282783508, + "learning_rate": 0.00043576296875647984, + "loss": 1.4612, + "step": 6079 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.6801396608352661, + "learning_rate": 0.00043553502568888095, + "loss": 1.4074, + "step": 6080 + }, + { + "epoch": 0.6414556962025316, + "grad_norm": 0.7969607710838318, + "learning_rate": 0.00043530711785865026, + "loss": 1.426, + "step": 6081 + }, + { + "epoch": 0.6415611814345992, + "grad_norm": 0.7141013741493225, + "learning_rate": 0.00043507924529132637, + "loss": 1.3969, + "step": 6082 + }, + { + "epoch": 0.6416666666666667, + "grad_norm": 0.7913172841072083, + "learning_rate": 0.0004348514080124432, + "loss": 1.4669, + "step": 6083 + }, + { + "epoch": 0.6417721518987342, + "grad_norm": 0.7970975637435913, + "learning_rate": 0.0004346236060475314, + "loss": 1.4149, + "step": 6084 + }, + { + "epoch": 0.6418776371308017, + "grad_norm": 0.7446650266647339, + "learning_rate": 0.00043439583942211674, + "loss": 1.4189, + "step": 6085 + }, + { + "epoch": 0.6419831223628693, + "grad_norm": 0.7273004651069641, + "learning_rate": 0.00043416810816172244, + "loss": 1.4602, + "step": 6086 + }, + { + "epoch": 0.6420886075949367, + "grad_norm": 0.9827155470848083, + "learning_rate": 0.0004339404122918664, + "loss": 1.4558, + "step": 6087 + }, + { + "epoch": 0.6421940928270042, + "grad_norm": 0.7047234177589417, + "learning_rate": 0.0004337127518380632, + "loss": 1.4163, + "step": 6088 + }, + { + "epoch": 0.6422995780590718, + "grad_norm": 0.8416969180107117, + "learning_rate": 0.0004334851268258234, + "loss": 1.4183, + "step": 6089 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.8890203237533569, + "learning_rate": 0.0004332575372806534, + "loss": 1.4417, + "step": 6090 + }, + { + "epoch": 0.6425105485232068, + "grad_norm": 0.8316123485565186, + "learning_rate": 0.00043302998322805564, + "loss": 1.4655, + "step": 6091 + }, + { + "epoch": 0.6426160337552742, + "grad_norm": 0.9710208177566528, + "learning_rate": 0.0004328024646935289, + "loss": 1.4274, + "step": 6092 + }, + { + "epoch": 0.6427215189873418, + "grad_norm": 0.7797888517379761, + "learning_rate": 0.00043257498170256735, + "loss": 1.436, + "step": 6093 + }, + { + "epoch": 0.6428270042194093, + "grad_norm": 0.807009756565094, + "learning_rate": 0.0004323475342806622, + "loss": 1.42, + "step": 6094 + }, + { + "epoch": 0.6429324894514767, + "grad_norm": 0.7490699291229248, + "learning_rate": 0.00043212012245329986, + "loss": 1.3881, + "step": 6095 + }, + { + "epoch": 0.6430379746835443, + "grad_norm": 0.7669945955276489, + "learning_rate": 0.0004318927462459629, + "loss": 1.418, + "step": 6096 + }, + { + "epoch": 0.6431434599156118, + "grad_norm": 0.7610446214675903, + "learning_rate": 0.0004316654056841299, + "loss": 1.414, + "step": 6097 + }, + { + "epoch": 0.6432489451476793, + "grad_norm": 0.7417302131652832, + "learning_rate": 0.0004314381007932756, + "loss": 1.4509, + "step": 6098 + }, + { + "epoch": 0.6433544303797468, + "grad_norm": 0.7135471105575562, + "learning_rate": 0.00043121083159887056, + "loss": 1.4046, + "step": 6099 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6729781627655029, + "learning_rate": 0.00043098359812638145, + "loss": 1.4355, + "step": 6100 + }, + { + "epoch": 0.6435654008438818, + "grad_norm": 0.6841176748275757, + "learning_rate": 0.000430756400401271, + "loss": 1.4692, + "step": 6101 + }, + { + "epoch": 0.6436708860759494, + "grad_norm": 0.7283507585525513, + "learning_rate": 0.00043052923844899733, + "loss": 1.4057, + "step": 6102 + }, + { + "epoch": 0.6437763713080169, + "grad_norm": 0.8415379524230957, + "learning_rate": 0.000430302112295016, + "loss": 1.4154, + "step": 6103 + }, + { + "epoch": 0.6438818565400843, + "grad_norm": 0.7089380621910095, + "learning_rate": 0.00043007502196477703, + "loss": 1.4375, + "step": 6104 + }, + { + "epoch": 0.6439873417721519, + "grad_norm": 0.8229581117630005, + "learning_rate": 0.00042984796748372716, + "loss": 1.4625, + "step": 6105 + }, + { + "epoch": 0.6440928270042194, + "grad_norm": 0.7281444668769836, + "learning_rate": 0.000429620948877309, + "loss": 1.4405, + "step": 6106 + }, + { + "epoch": 0.6441983122362869, + "grad_norm": 0.7299991250038147, + "learning_rate": 0.000429393966170961, + "loss": 1.4761, + "step": 6107 + }, + { + "epoch": 0.6443037974683544, + "grad_norm": 0.7181311845779419, + "learning_rate": 0.00042916701939011787, + "loss": 1.4278, + "step": 6108 + }, + { + "epoch": 0.644409282700422, + "grad_norm": 0.7958118319511414, + "learning_rate": 0.00042894010856020997, + "loss": 1.4519, + "step": 6109 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.7192996144294739, + "learning_rate": 0.00042871323370666383, + "loss": 1.4303, + "step": 6110 + }, + { + "epoch": 0.644620253164557, + "grad_norm": 0.9002346396446228, + "learning_rate": 0.00042848639485490165, + "loss": 1.4399, + "step": 6111 + }, + { + "epoch": 0.6447257383966245, + "grad_norm": 0.7096213698387146, + "learning_rate": 0.0004282595920303425, + "loss": 1.405, + "step": 6112 + }, + { + "epoch": 0.6448312236286919, + "grad_norm": 0.742565929889679, + "learning_rate": 0.00042803282525840036, + "loss": 1.46, + "step": 6113 + }, + { + "epoch": 0.6449367088607595, + "grad_norm": 0.7045370936393738, + "learning_rate": 0.0004278060945644856, + "loss": 1.4281, + "step": 6114 + }, + { + "epoch": 0.645042194092827, + "grad_norm": 0.6599575281143188, + "learning_rate": 0.0004275793999740046, + "loss": 1.4401, + "step": 6115 + }, + { + "epoch": 0.6451476793248945, + "grad_norm": 0.8850765228271484, + "learning_rate": 0.00042735274151235953, + "loss": 1.4705, + "step": 6116 + }, + { + "epoch": 0.645253164556962, + "grad_norm": 0.6904810070991516, + "learning_rate": 0.00042712611920494865, + "loss": 1.4198, + "step": 6117 + }, + { + "epoch": 0.6453586497890296, + "grad_norm": 0.843975305557251, + "learning_rate": 0.0004268995330771661, + "loss": 1.4574, + "step": 6118 + }, + { + "epoch": 0.645464135021097, + "grad_norm": 0.7549629807472229, + "learning_rate": 0.0004266729831544017, + "loss": 1.4226, + "step": 6119 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.7722054123878479, + "learning_rate": 0.0004264464694620421, + "loss": 1.3973, + "step": 6120 + }, + { + "epoch": 0.6456751054852321, + "grad_norm": 0.6603596806526184, + "learning_rate": 0.00042621999202546897, + "loss": 1.4298, + "step": 6121 + }, + { + "epoch": 0.6457805907172995, + "grad_norm": 0.7577202320098877, + "learning_rate": 0.0004259935508700603, + "loss": 1.4574, + "step": 6122 + }, + { + "epoch": 0.6458860759493671, + "grad_norm": 0.7405611872673035, + "learning_rate": 0.0004257671460211898, + "loss": 1.43, + "step": 6123 + }, + { + "epoch": 0.6459915611814346, + "grad_norm": 0.7040319442749023, + "learning_rate": 0.00042554077750422736, + "loss": 1.4322, + "step": 6124 + }, + { + "epoch": 0.6460970464135021, + "grad_norm": 0.6955128312110901, + "learning_rate": 0.00042531444534453885, + "loss": 1.4313, + "step": 6125 + }, + { + "epoch": 0.6462025316455696, + "grad_norm": 0.6674414277076721, + "learning_rate": 0.0004250881495674855, + "loss": 1.4263, + "step": 6126 + }, + { + "epoch": 0.6463080168776372, + "grad_norm": 0.7043776512145996, + "learning_rate": 0.00042486189019842535, + "loss": 1.4178, + "step": 6127 + }, + { + "epoch": 0.6464135021097046, + "grad_norm": 0.723990261554718, + "learning_rate": 0.00042463566726271137, + "loss": 1.412, + "step": 6128 + }, + { + "epoch": 0.6465189873417722, + "grad_norm": 0.6700013875961304, + "learning_rate": 0.0004244094807856936, + "loss": 1.4392, + "step": 6129 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.6861025094985962, + "learning_rate": 0.000424183330792717, + "loss": 1.4804, + "step": 6130 + }, + { + "epoch": 0.6467299578059071, + "grad_norm": 0.9036205410957336, + "learning_rate": 0.0004239572173091229, + "loss": 1.4715, + "step": 6131 + }, + { + "epoch": 0.6468354430379747, + "grad_norm": 0.7077530026435852, + "learning_rate": 0.0004237311403602484, + "loss": 1.4554, + "step": 6132 + }, + { + "epoch": 0.6469409282700422, + "grad_norm": 0.9841330647468567, + "learning_rate": 0.0004235050999714265, + "loss": 1.4172, + "step": 6133 + }, + { + "epoch": 0.6470464135021097, + "grad_norm": 0.681279718875885, + "learning_rate": 0.00042327909616798616, + "loss": 1.4455, + "step": 6134 + }, + { + "epoch": 0.6471518987341772, + "grad_norm": 0.9688820838928223, + "learning_rate": 0.0004230531289752523, + "loss": 1.3963, + "step": 6135 + }, + { + "epoch": 0.6472573839662448, + "grad_norm": 0.8289952278137207, + "learning_rate": 0.00042282719841854567, + "loss": 1.4595, + "step": 6136 + }, + { + "epoch": 0.6473628691983122, + "grad_norm": 0.7947161793708801, + "learning_rate": 0.0004226013045231826, + "loss": 1.4442, + "step": 6137 + }, + { + "epoch": 0.6474683544303798, + "grad_norm": 0.8499106764793396, + "learning_rate": 0.00042237544731447616, + "loss": 1.4425, + "step": 6138 + }, + { + "epoch": 0.6475738396624473, + "grad_norm": 0.8894713521003723, + "learning_rate": 0.00042214962681773457, + "loss": 1.4472, + "step": 6139 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.68406742811203, + "learning_rate": 0.0004219238430582621, + "loss": 1.3646, + "step": 6140 + }, + { + "epoch": 0.6477848101265823, + "grad_norm": 0.8097927570343018, + "learning_rate": 0.00042169809606135893, + "loss": 1.4492, + "step": 6141 + }, + { + "epoch": 0.6478902953586498, + "grad_norm": 0.7726114392280579, + "learning_rate": 0.0004214723858523212, + "loss": 1.4086, + "step": 6142 + }, + { + "epoch": 0.6479957805907173, + "grad_norm": 0.8480383157730103, + "learning_rate": 0.00042124671245644086, + "loss": 1.412, + "step": 6143 + }, + { + "epoch": 0.6481012658227848, + "grad_norm": 0.7776813507080078, + "learning_rate": 0.0004210210758990056, + "loss": 1.4568, + "step": 6144 + }, + { + "epoch": 0.6482067510548524, + "grad_norm": 0.9163914322853088, + "learning_rate": 0.00042079547620529927, + "loss": 1.4662, + "step": 6145 + }, + { + "epoch": 0.6483122362869198, + "grad_norm": 0.7229087352752686, + "learning_rate": 0.0004205699134006011, + "loss": 1.445, + "step": 6146 + }, + { + "epoch": 0.6484177215189874, + "grad_norm": 0.9654992818832397, + "learning_rate": 0.0004203443875101871, + "loss": 1.4646, + "step": 6147 + }, + { + "epoch": 0.6485232067510549, + "grad_norm": 0.7626873850822449, + "learning_rate": 0.0004201188985593283, + "loss": 1.4265, + "step": 6148 + }, + { + "epoch": 0.6486286919831223, + "grad_norm": 0.8238897323608398, + "learning_rate": 0.00041989344657329187, + "loss": 1.4626, + "step": 6149 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.8212975263595581, + "learning_rate": 0.0004196680315773408, + "loss": 1.4749, + "step": 6150 + }, + { + "epoch": 0.6488396624472574, + "grad_norm": 0.8740161657333374, + "learning_rate": 0.0004194426535967339, + "loss": 1.4289, + "step": 6151 + }, + { + "epoch": 0.6489451476793249, + "grad_norm": 0.7432365417480469, + "learning_rate": 0.00041921731265672613, + "loss": 1.4663, + "step": 6152 + }, + { + "epoch": 0.6490506329113924, + "grad_norm": 0.8863314390182495, + "learning_rate": 0.0004189920087825678, + "loss": 1.4299, + "step": 6153 + }, + { + "epoch": 0.64915611814346, + "grad_norm": 0.7218431830406189, + "learning_rate": 0.00041876674199950545, + "loss": 1.4262, + "step": 6154 + }, + { + "epoch": 0.6492616033755274, + "grad_norm": 0.8231738805770874, + "learning_rate": 0.0004185415123327813, + "loss": 1.439, + "step": 6155 + }, + { + "epoch": 0.649367088607595, + "grad_norm": 0.697701632976532, + "learning_rate": 0.00041831631980763324, + "loss": 1.4133, + "step": 6156 + }, + { + "epoch": 0.6494725738396624, + "grad_norm": 0.703876793384552, + "learning_rate": 0.00041809116444929586, + "loss": 1.4364, + "step": 6157 + }, + { + "epoch": 0.6495780590717299, + "grad_norm": 0.7463996410369873, + "learning_rate": 0.00041786604628299846, + "loss": 1.4682, + "step": 6158 + }, + { + "epoch": 0.6496835443037975, + "grad_norm": 0.6971954107284546, + "learning_rate": 0.00041764096533396667, + "loss": 1.3999, + "step": 6159 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.7209469079971313, + "learning_rate": 0.00041741592162742214, + "loss": 1.4625, + "step": 6160 + }, + { + "epoch": 0.6498945147679325, + "grad_norm": 0.7392289638519287, + "learning_rate": 0.0004171909151885819, + "loss": 1.4272, + "step": 6161 + }, + { + "epoch": 0.65, + "grad_norm": 0.7655339241027832, + "learning_rate": 0.0004169659460426592, + "loss": 1.4215, + "step": 6162 + }, + { + "epoch": 0.6501054852320675, + "grad_norm": 0.8051302433013916, + "learning_rate": 0.00041674101421486294, + "loss": 1.3624, + "step": 6163 + }, + { + "epoch": 0.650210970464135, + "grad_norm": 0.7644410729408264, + "learning_rate": 0.00041651611973039776, + "loss": 1.3894, + "step": 6164 + }, + { + "epoch": 0.6503164556962026, + "grad_norm": 0.8805162310600281, + "learning_rate": 0.0004162912626144642, + "loss": 1.4256, + "step": 6165 + }, + { + "epoch": 0.65042194092827, + "grad_norm": 0.7010526061058044, + "learning_rate": 0.0004160664428922586, + "loss": 1.421, + "step": 6166 + }, + { + "epoch": 0.6505274261603375, + "grad_norm": 0.8657515048980713, + "learning_rate": 0.00041584166058897324, + "loss": 1.4087, + "step": 6167 + }, + { + "epoch": 0.6506329113924051, + "grad_norm": 0.6787301898002625, + "learning_rate": 0.00041561691572979624, + "loss": 1.4017, + "step": 6168 + }, + { + "epoch": 0.6507383966244725, + "grad_norm": 0.9783844351768494, + "learning_rate": 0.00041539220833991124, + "loss": 1.4622, + "step": 6169 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.6997570395469666, + "learning_rate": 0.0004151675384444978, + "loss": 1.4069, + "step": 6170 + }, + { + "epoch": 0.6509493670886076, + "grad_norm": 0.8643743991851807, + "learning_rate": 0.0004149429060687312, + "loss": 1.4353, + "step": 6171 + }, + { + "epoch": 0.6510548523206751, + "grad_norm": 0.6819087862968445, + "learning_rate": 0.00041471831123778284, + "loss": 1.4046, + "step": 6172 + }, + { + "epoch": 0.6511603375527426, + "grad_norm": 0.7372051477432251, + "learning_rate": 0.0004144937539768195, + "loss": 1.4483, + "step": 6173 + }, + { + "epoch": 0.6512658227848102, + "grad_norm": 0.8741850852966309, + "learning_rate": 0.00041426923431100396, + "loss": 1.458, + "step": 6174 + }, + { + "epoch": 0.6513713080168776, + "grad_norm": 0.7157009840011597, + "learning_rate": 0.0004140447522654946, + "loss": 1.4172, + "step": 6175 + }, + { + "epoch": 0.6514767932489451, + "grad_norm": 0.9323055148124695, + "learning_rate": 0.0004138203078654463, + "loss": 1.4443, + "step": 6176 + }, + { + "epoch": 0.6515822784810127, + "grad_norm": 0.740375816822052, + "learning_rate": 0.0004135959011360088, + "loss": 1.4412, + "step": 6177 + }, + { + "epoch": 0.6516877637130801, + "grad_norm": 0.8729636669158936, + "learning_rate": 0.000413371532102328, + "loss": 1.4328, + "step": 6178 + }, + { + "epoch": 0.6517932489451477, + "grad_norm": 0.8895946145057678, + "learning_rate": 0.0004131472007895457, + "loss": 1.4529, + "step": 6179 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.9814481139183044, + "learning_rate": 0.00041292290722279914, + "loss": 1.4407, + "step": 6180 + }, + { + "epoch": 0.6520042194092827, + "grad_norm": 1.0146093368530273, + "learning_rate": 0.00041269865142722176, + "loss": 1.4635, + "step": 6181 + }, + { + "epoch": 0.6521097046413502, + "grad_norm": 0.9451570510864258, + "learning_rate": 0.0004124744334279424, + "loss": 1.449, + "step": 6182 + }, + { + "epoch": 0.6522151898734178, + "grad_norm": 1.29966139793396, + "learning_rate": 0.0004122502532500858, + "loss": 1.4395, + "step": 6183 + }, + { + "epoch": 0.6523206751054852, + "grad_norm": 0.7501059770584106, + "learning_rate": 0.0004120261109187724, + "loss": 1.4467, + "step": 6184 + }, + { + "epoch": 0.6524261603375527, + "grad_norm": 0.9504410028457642, + "learning_rate": 0.0004118020064591184, + "loss": 1.4344, + "step": 6185 + }, + { + "epoch": 0.6525316455696203, + "grad_norm": 0.7524120211601257, + "learning_rate": 0.00041157793989623625, + "loss": 1.4539, + "step": 6186 + }, + { + "epoch": 0.6526371308016877, + "grad_norm": 0.9521394968032837, + "learning_rate": 0.0004113539112552334, + "loss": 1.4176, + "step": 6187 + }, + { + "epoch": 0.6527426160337553, + "grad_norm": 0.9936039447784424, + "learning_rate": 0.0004111299205612135, + "loss": 1.4303, + "step": 6188 + }, + { + "epoch": 0.6528481012658228, + "grad_norm": 1.1524056196212769, + "learning_rate": 0.00041090596783927583, + "loss": 1.4194, + "step": 6189 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 1.0396270751953125, + "learning_rate": 0.00041068205311451517, + "loss": 1.4707, + "step": 6190 + }, + { + "epoch": 0.6530590717299578, + "grad_norm": 0.853754997253418, + "learning_rate": 0.00041045817641202257, + "loss": 1.4602, + "step": 6191 + }, + { + "epoch": 0.6531645569620254, + "grad_norm": 1.0141412019729614, + "learning_rate": 0.00041023433775688435, + "loss": 1.4102, + "step": 6192 + }, + { + "epoch": 0.6532700421940928, + "grad_norm": 0.6977814435958862, + "learning_rate": 0.00041001053717418283, + "loss": 1.4381, + "step": 6193 + }, + { + "epoch": 0.6533755274261603, + "grad_norm": 1.2985401153564453, + "learning_rate": 0.000409786774688996, + "loss": 1.4, + "step": 6194 + }, + { + "epoch": 0.6534810126582279, + "grad_norm": 0.8829346895217896, + "learning_rate": 0.00040956305032639723, + "loss": 1.4491, + "step": 6195 + }, + { + "epoch": 0.6535864978902953, + "grad_norm": 0.8770232200622559, + "learning_rate": 0.0004093393641114565, + "loss": 1.4221, + "step": 6196 + }, + { + "epoch": 0.6536919831223629, + "grad_norm": 1.1198911666870117, + "learning_rate": 0.00040911571606923867, + "loss": 1.4236, + "step": 6197 + }, + { + "epoch": 0.6537974683544304, + "grad_norm": 0.7170723676681519, + "learning_rate": 0.00040889210622480467, + "loss": 1.4477, + "step": 6198 + }, + { + "epoch": 0.6539029535864979, + "grad_norm": 1.0214595794677734, + "learning_rate": 0.0004086685346032111, + "loss": 1.4682, + "step": 6199 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.6714975833892822, + "learning_rate": 0.00040844500122951026, + "loss": 1.4403, + "step": 6200 + }, + { + "epoch": 0.654113924050633, + "grad_norm": 0.9285281300544739, + "learning_rate": 0.0004082215061287502, + "loss": 1.4019, + "step": 6201 + }, + { + "epoch": 0.6542194092827004, + "grad_norm": 0.673658549785614, + "learning_rate": 0.00040799804932597464, + "loss": 1.426, + "step": 6202 + }, + { + "epoch": 0.6543248945147679, + "grad_norm": 1.0337517261505127, + "learning_rate": 0.00040777463084622304, + "loss": 1.4178, + "step": 6203 + }, + { + "epoch": 0.6544303797468355, + "grad_norm": 0.7043799161911011, + "learning_rate": 0.00040755125071453055, + "loss": 1.4356, + "step": 6204 + }, + { + "epoch": 0.6545358649789029, + "grad_norm": 0.8810152411460876, + "learning_rate": 0.00040732790895592764, + "loss": 1.4302, + "step": 6205 + }, + { + "epoch": 0.6546413502109705, + "grad_norm": 0.7444444298744202, + "learning_rate": 0.00040710460559544167, + "loss": 1.4555, + "step": 6206 + }, + { + "epoch": 0.654746835443038, + "grad_norm": 0.7992115616798401, + "learning_rate": 0.0004068813406580944, + "loss": 1.4453, + "step": 6207 + }, + { + "epoch": 0.6548523206751055, + "grad_norm": 0.8316057920455933, + "learning_rate": 0.0004066581141689038, + "loss": 1.4269, + "step": 6208 + }, + { + "epoch": 0.654957805907173, + "grad_norm": 0.737636148929596, + "learning_rate": 0.00040643492615288367, + "loss": 1.4278, + "step": 6209 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.9111663103103638, + "learning_rate": 0.00040621177663504313, + "loss": 1.4188, + "step": 6210 + }, + { + "epoch": 0.655168776371308, + "grad_norm": 0.701818585395813, + "learning_rate": 0.0004059886656403874, + "loss": 1.4464, + "step": 6211 + }, + { + "epoch": 0.6552742616033755, + "grad_norm": 0.960300862789154, + "learning_rate": 0.00040576559319391704, + "loss": 1.4343, + "step": 6212 + }, + { + "epoch": 0.6553797468354431, + "grad_norm": 0.7175774574279785, + "learning_rate": 0.0004055425593206285, + "loss": 1.444, + "step": 6213 + }, + { + "epoch": 0.6554852320675105, + "grad_norm": 0.8583335280418396, + "learning_rate": 0.0004053195640455137, + "loss": 1.4167, + "step": 6214 + }, + { + "epoch": 0.6555907172995781, + "grad_norm": 0.8462952375411987, + "learning_rate": 0.0004050966073935602, + "loss": 1.4462, + "step": 6215 + }, + { + "epoch": 0.6556962025316456, + "grad_norm": 0.7858204245567322, + "learning_rate": 0.00040487368938975214, + "loss": 1.4605, + "step": 6216 + }, + { + "epoch": 0.6558016877637131, + "grad_norm": 0.94489985704422, + "learning_rate": 0.00040465081005906805, + "loss": 1.4348, + "step": 6217 + }, + { + "epoch": 0.6559071729957806, + "grad_norm": 0.7338283061981201, + "learning_rate": 0.00040442796942648273, + "loss": 1.454, + "step": 6218 + }, + { + "epoch": 0.6560126582278482, + "grad_norm": 0.8551020622253418, + "learning_rate": 0.00040420516751696664, + "loss": 1.4228, + "step": 6219 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.7299726009368896, + "learning_rate": 0.00040398240435548583, + "loss": 1.4106, + "step": 6220 + }, + { + "epoch": 0.6562236286919831, + "grad_norm": 0.784591794013977, + "learning_rate": 0.000403759679967002, + "loss": 1.4323, + "step": 6221 + }, + { + "epoch": 0.6563291139240506, + "grad_norm": 0.764930009841919, + "learning_rate": 0.00040353699437647257, + "loss": 1.4018, + "step": 6222 + }, + { + "epoch": 0.6564345991561181, + "grad_norm": 0.8226380348205566, + "learning_rate": 0.0004033143476088504, + "loss": 1.4263, + "step": 6223 + }, + { + "epoch": 0.6565400843881857, + "grad_norm": 0.8278454542160034, + "learning_rate": 0.00040309173968908413, + "loss": 1.4225, + "step": 6224 + }, + { + "epoch": 0.6566455696202531, + "grad_norm": 0.7070448994636536, + "learning_rate": 0.0004028691706421185, + "loss": 1.4288, + "step": 6225 + }, + { + "epoch": 0.6567510548523207, + "grad_norm": 0.8195032477378845, + "learning_rate": 0.00040264664049289336, + "loss": 1.4507, + "step": 6226 + }, + { + "epoch": 0.6568565400843882, + "grad_norm": 0.7180461287498474, + "learning_rate": 0.00040242414926634415, + "loss": 1.4183, + "step": 6227 + }, + { + "epoch": 0.6569620253164556, + "grad_norm": 0.7825677990913391, + "learning_rate": 0.0004022016969874023, + "loss": 1.425, + "step": 6228 + }, + { + "epoch": 0.6570675105485232, + "grad_norm": 0.9852966666221619, + "learning_rate": 0.00040197928368099445, + "loss": 1.4232, + "step": 6229 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.8433512449264526, + "learning_rate": 0.00040175690937204324, + "loss": 1.4412, + "step": 6230 + }, + { + "epoch": 0.6572784810126582, + "grad_norm": 0.8569364547729492, + "learning_rate": 0.0004015345740854668, + "loss": 1.4206, + "step": 6231 + }, + { + "epoch": 0.6573839662447257, + "grad_norm": 0.7070631384849548, + "learning_rate": 0.00040131227784617876, + "loss": 1.4411, + "step": 6232 + }, + { + "epoch": 0.6574894514767933, + "grad_norm": 0.7336933016777039, + "learning_rate": 0.000401090020679089, + "loss": 1.4587, + "step": 6233 + }, + { + "epoch": 0.6575949367088607, + "grad_norm": 0.821647584438324, + "learning_rate": 0.00040086780260910213, + "loss": 1.4578, + "step": 6234 + }, + { + "epoch": 0.6577004219409283, + "grad_norm": 0.9195849299430847, + "learning_rate": 0.000400645623661119, + "loss": 1.4388, + "step": 6235 + }, + { + "epoch": 0.6578059071729958, + "grad_norm": 0.6719793677330017, + "learning_rate": 0.0004004234838600357, + "loss": 1.4271, + "step": 6236 + }, + { + "epoch": 0.6579113924050632, + "grad_norm": 0.8527848720550537, + "learning_rate": 0.00040020138323074427, + "loss": 1.4039, + "step": 6237 + }, + { + "epoch": 0.6580168776371308, + "grad_norm": 0.6981282830238342, + "learning_rate": 0.00039997932179813205, + "loss": 1.4214, + "step": 6238 + }, + { + "epoch": 0.6581223628691983, + "grad_norm": 0.6692190170288086, + "learning_rate": 0.00039975729958708223, + "loss": 1.3976, + "step": 6239 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.7339532971382141, + "learning_rate": 0.00039953531662247343, + "loss": 1.4153, + "step": 6240 + }, + { + "epoch": 0.6583333333333333, + "grad_norm": 0.7262539863586426, + "learning_rate": 0.00039931337292917966, + "loss": 1.4163, + "step": 6241 + }, + { + "epoch": 0.6584388185654009, + "grad_norm": 0.7637107372283936, + "learning_rate": 0.0003990914685320714, + "loss": 1.4312, + "step": 6242 + }, + { + "epoch": 0.6585443037974683, + "grad_norm": 0.7298907041549683, + "learning_rate": 0.00039886960345601394, + "loss": 1.4689, + "step": 6243 + }, + { + "epoch": 0.6586497890295359, + "grad_norm": 0.738838791847229, + "learning_rate": 0.00039864777772586826, + "loss": 1.4287, + "step": 6244 + }, + { + "epoch": 0.6587552742616034, + "grad_norm": 0.7987804412841797, + "learning_rate": 0.00039842599136649117, + "loss": 1.4319, + "step": 6245 + }, + { + "epoch": 0.6588607594936708, + "grad_norm": 0.6693361401557922, + "learning_rate": 0.00039820424440273474, + "loss": 1.4006, + "step": 6246 + }, + { + "epoch": 0.6589662447257384, + "grad_norm": 0.9354111552238464, + "learning_rate": 0.000397982536859447, + "loss": 1.4314, + "step": 6247 + }, + { + "epoch": 0.6590717299578059, + "grad_norm": 0.7166111469268799, + "learning_rate": 0.00039776086876147133, + "loss": 1.4173, + "step": 6248 + }, + { + "epoch": 0.6591772151898734, + "grad_norm": 0.7021239995956421, + "learning_rate": 0.0003975392401336468, + "loss": 1.4723, + "step": 6249 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.8863773345947266, + "learning_rate": 0.0003973176510008075, + "loss": 1.4674, + "step": 6250 + }, + { + "epoch": 0.6593881856540085, + "grad_norm": 0.7821513414382935, + "learning_rate": 0.00039709610138778445, + "loss": 1.4155, + "step": 6251 + }, + { + "epoch": 0.6594936708860759, + "grad_norm": 0.924352765083313, + "learning_rate": 0.0003968745913194029, + "loss": 1.4398, + "step": 6252 + }, + { + "epoch": 0.6595991561181435, + "grad_norm": 0.7663027048110962, + "learning_rate": 0.0003966531208204842, + "loss": 1.4075, + "step": 6253 + }, + { + "epoch": 0.659704641350211, + "grad_norm": 0.831026554107666, + "learning_rate": 0.0003964316899158454, + "loss": 1.4314, + "step": 6254 + }, + { + "epoch": 0.6598101265822784, + "grad_norm": 0.8161025643348694, + "learning_rate": 0.00039621029863029874, + "loss": 1.4532, + "step": 6255 + }, + { + "epoch": 0.659915611814346, + "grad_norm": 0.7740626931190491, + "learning_rate": 0.00039598894698865216, + "loss": 1.4245, + "step": 6256 + }, + { + "epoch": 0.6600210970464135, + "grad_norm": 0.7156111001968384, + "learning_rate": 0.00039576763501570944, + "loss": 1.3925, + "step": 6257 + }, + { + "epoch": 0.660126582278481, + "grad_norm": 0.7208887934684753, + "learning_rate": 0.0003955463627362694, + "loss": 1.4388, + "step": 6258 + }, + { + "epoch": 0.6602320675105485, + "grad_norm": 0.8523207902908325, + "learning_rate": 0.00039532513017512694, + "loss": 1.4452, + "step": 6259 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.7271676659584045, + "learning_rate": 0.00039510393735707233, + "loss": 1.4379, + "step": 6260 + }, + { + "epoch": 0.6604430379746835, + "grad_norm": 0.8330471515655518, + "learning_rate": 0.00039488278430689123, + "loss": 1.4422, + "step": 6261 + }, + { + "epoch": 0.6605485232067511, + "grad_norm": 0.8175740242004395, + "learning_rate": 0.0003946616710493649, + "loss": 1.4453, + "step": 6262 + }, + { + "epoch": 0.6606540084388186, + "grad_norm": 0.7966387271881104, + "learning_rate": 0.0003944405976092702, + "loss": 1.3941, + "step": 6263 + }, + { + "epoch": 0.660759493670886, + "grad_norm": 0.6867341995239258, + "learning_rate": 0.0003942195640113795, + "loss": 1.4755, + "step": 6264 + }, + { + "epoch": 0.6608649789029536, + "grad_norm": 0.9175127148628235, + "learning_rate": 0.00039399857028046066, + "loss": 1.4333, + "step": 6265 + }, + { + "epoch": 0.6609704641350211, + "grad_norm": 0.6844927072525024, + "learning_rate": 0.0003937776164412773, + "loss": 1.4365, + "step": 6266 + }, + { + "epoch": 0.6610759493670886, + "grad_norm": 0.9014567732810974, + "learning_rate": 0.00039355670251858805, + "loss": 1.4693, + "step": 6267 + }, + { + "epoch": 0.6611814345991561, + "grad_norm": 0.778085470199585, + "learning_rate": 0.00039333582853714793, + "loss": 1.4171, + "step": 6268 + }, + { + "epoch": 0.6612869198312237, + "grad_norm": 0.7779273390769958, + "learning_rate": 0.00039311499452170665, + "loss": 1.4681, + "step": 6269 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.6783804297447205, + "learning_rate": 0.00039289420049700986, + "loss": 1.398, + "step": 6270 + }, + { + "epoch": 0.6614978902953587, + "grad_norm": 0.7966026663780212, + "learning_rate": 0.0003926734464877986, + "loss": 1.4166, + "step": 6271 + }, + { + "epoch": 0.6616033755274262, + "grad_norm": 0.7244253754615784, + "learning_rate": 0.0003924527325188095, + "loss": 1.4487, + "step": 6272 + }, + { + "epoch": 0.6617088607594936, + "grad_norm": 0.7457542419433594, + "learning_rate": 0.00039223205861477455, + "loss": 1.4875, + "step": 6273 + }, + { + "epoch": 0.6618143459915612, + "grad_norm": 0.674597442150116, + "learning_rate": 0.00039201142480042145, + "loss": 1.4327, + "step": 6274 + }, + { + "epoch": 0.6619198312236287, + "grad_norm": 0.6917687654495239, + "learning_rate": 0.0003917908311004732, + "loss": 1.436, + "step": 6275 + }, + { + "epoch": 0.6620253164556962, + "grad_norm": 0.6822735071182251, + "learning_rate": 0.0003915702775396483, + "loss": 1.4217, + "step": 6276 + }, + { + "epoch": 0.6621308016877637, + "grad_norm": 0.7359282374382019, + "learning_rate": 0.0003913497641426614, + "loss": 1.42, + "step": 6277 + }, + { + "epoch": 0.6622362869198313, + "grad_norm": 0.6995983719825745, + "learning_rate": 0.00039112929093422185, + "loss": 1.4545, + "step": 6278 + }, + { + "epoch": 0.6623417721518987, + "grad_norm": 0.6962149739265442, + "learning_rate": 0.0003909088579390347, + "loss": 1.4294, + "step": 6279 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.7091727256774902, + "learning_rate": 0.0003906884651818006, + "loss": 1.4322, + "step": 6280 + }, + { + "epoch": 0.6625527426160338, + "grad_norm": 0.7090558409690857, + "learning_rate": 0.0003904681126872157, + "loss": 1.3835, + "step": 6281 + }, + { + "epoch": 0.6626582278481012, + "grad_norm": 0.6778234839439392, + "learning_rate": 0.00039024780047997157, + "loss": 1.4186, + "step": 6282 + }, + { + "epoch": 0.6627637130801688, + "grad_norm": 0.712345540523529, + "learning_rate": 0.00039002752858475527, + "loss": 1.4099, + "step": 6283 + }, + { + "epoch": 0.6628691983122363, + "grad_norm": 0.7054233551025391, + "learning_rate": 0.00038980729702624896, + "loss": 1.4738, + "step": 6284 + }, + { + "epoch": 0.6629746835443038, + "grad_norm": 0.7338751554489136, + "learning_rate": 0.00038958710582913153, + "loss": 1.4479, + "step": 6285 + }, + { + "epoch": 0.6630801687763713, + "grad_norm": 0.8124212026596069, + "learning_rate": 0.0003893669550180761, + "loss": 1.441, + "step": 6286 + }, + { + "epoch": 0.6631856540084389, + "grad_norm": 0.6751219630241394, + "learning_rate": 0.00038914684461775154, + "loss": 1.4516, + "step": 6287 + }, + { + "epoch": 0.6632911392405063, + "grad_norm": 0.7042079567909241, + "learning_rate": 0.0003889267746528225, + "loss": 1.4447, + "step": 6288 + }, + { + "epoch": 0.6633966244725739, + "grad_norm": 0.736190676689148, + "learning_rate": 0.00038870674514794877, + "loss": 1.4448, + "step": 6289 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.6742278933525085, + "learning_rate": 0.00038848675612778577, + "loss": 1.4077, + "step": 6290 + }, + { + "epoch": 0.6636075949367088, + "grad_norm": 0.7512586712837219, + "learning_rate": 0.0003882668076169846, + "loss": 1.3904, + "step": 6291 + }, + { + "epoch": 0.6637130801687764, + "grad_norm": 0.7198797464370728, + "learning_rate": 0.0003880468996401912, + "loss": 1.4485, + "step": 6292 + }, + { + "epoch": 0.6638185654008438, + "grad_norm": 0.7546441555023193, + "learning_rate": 0.0003878270322220474, + "loss": 1.4209, + "step": 6293 + }, + { + "epoch": 0.6639240506329114, + "grad_norm": 0.672562301158905, + "learning_rate": 0.00038760720538719086, + "loss": 1.4439, + "step": 6294 + }, + { + "epoch": 0.6640295358649789, + "grad_norm": 0.6930740475654602, + "learning_rate": 0.0003873874191602539, + "loss": 1.43, + "step": 6295 + }, + { + "epoch": 0.6641350210970464, + "grad_norm": 0.7328329086303711, + "learning_rate": 0.00038716767356586487, + "loss": 1.4426, + "step": 6296 + }, + { + "epoch": 0.6642405063291139, + "grad_norm": 0.6719995737075806, + "learning_rate": 0.00038694796862864724, + "loss": 1.4311, + "step": 6297 + }, + { + "epoch": 0.6643459915611815, + "grad_norm": 0.8616061210632324, + "learning_rate": 0.00038672830437322007, + "loss": 1.4466, + "step": 6298 + }, + { + "epoch": 0.6644514767932489, + "grad_norm": 0.6600374579429626, + "learning_rate": 0.0003865086808241979, + "loss": 1.3983, + "step": 6299 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.8086583018302917, + "learning_rate": 0.00038628909800619046, + "loss": 1.3998, + "step": 6300 + }, + { + "epoch": 0.664662447257384, + "grad_norm": 0.7222776412963867, + "learning_rate": 0.00038606955594380326, + "loss": 1.4232, + "step": 6301 + }, + { + "epoch": 0.6647679324894514, + "grad_norm": 0.7395935654640198, + "learning_rate": 0.0003858500546616368, + "loss": 1.4128, + "step": 6302 + }, + { + "epoch": 0.664873417721519, + "grad_norm": 0.8255496025085449, + "learning_rate": 0.0003856305941842878, + "loss": 1.3969, + "step": 6303 + }, + { + "epoch": 0.6649789029535865, + "grad_norm": 0.6964786648750305, + "learning_rate": 0.0003854111745363476, + "loss": 1.4118, + "step": 6304 + }, + { + "epoch": 0.665084388185654, + "grad_norm": 0.9598518013954163, + "learning_rate": 0.00038519179574240324, + "loss": 1.4227, + "step": 6305 + }, + { + "epoch": 0.6651898734177215, + "grad_norm": 0.674996018409729, + "learning_rate": 0.0003849724578270374, + "loss": 1.4089, + "step": 6306 + }, + { + "epoch": 0.6652953586497891, + "grad_norm": 0.734449028968811, + "learning_rate": 0.0003847531608148277, + "loss": 1.4651, + "step": 6307 + }, + { + "epoch": 0.6654008438818565, + "grad_norm": 0.8090888261795044, + "learning_rate": 0.0003845339047303477, + "loss": 1.4341, + "step": 6308 + }, + { + "epoch": 0.665506329113924, + "grad_norm": 0.9205303192138672, + "learning_rate": 0.0003843146895981661, + "loss": 1.4116, + "step": 6309 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.8171675801277161, + "learning_rate": 0.0003840955154428467, + "loss": 1.4462, + "step": 6310 + }, + { + "epoch": 0.665717299578059, + "grad_norm": 1.031058430671692, + "learning_rate": 0.0003838763822889495, + "loss": 1.4418, + "step": 6311 + }, + { + "epoch": 0.6658227848101266, + "grad_norm": 0.9729369878768921, + "learning_rate": 0.0003836572901610295, + "loss": 1.3859, + "step": 6312 + }, + { + "epoch": 0.6659282700421941, + "grad_norm": 1.1447495222091675, + "learning_rate": 0.0003834382390836368, + "loss": 1.4284, + "step": 6313 + }, + { + "epoch": 0.6660337552742616, + "grad_norm": 1.3820630311965942, + "learning_rate": 0.00038321922908131736, + "loss": 1.421, + "step": 6314 + }, + { + "epoch": 0.6661392405063291, + "grad_norm": 0.8157326579093933, + "learning_rate": 0.0003830002601786121, + "loss": 1.3978, + "step": 6315 + }, + { + "epoch": 0.6662447257383967, + "grad_norm": 1.218424916267395, + "learning_rate": 0.0003827813324000578, + "loss": 1.4245, + "step": 6316 + }, + { + "epoch": 0.6663502109704641, + "grad_norm": 0.6975057125091553, + "learning_rate": 0.0003825624457701863, + "loss": 1.4504, + "step": 6317 + }, + { + "epoch": 0.6664556962025316, + "grad_norm": 0.9054881930351257, + "learning_rate": 0.00038234360031352485, + "loss": 1.423, + "step": 6318 + }, + { + "epoch": 0.6665611814345992, + "grad_norm": 0.8867390155792236, + "learning_rate": 0.00038212479605459617, + "loss": 1.4406, + "step": 6319 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.9144896268844604, + "learning_rate": 0.00038190603301791864, + "loss": 1.4054, + "step": 6320 + }, + { + "epoch": 0.6667721518987342, + "grad_norm": 0.8585034608840942, + "learning_rate": 0.0003816873112280056, + "loss": 1.4442, + "step": 6321 + }, + { + "epoch": 0.6668776371308017, + "grad_norm": 0.70639568567276, + "learning_rate": 0.00038146863070936607, + "loss": 1.4292, + "step": 6322 + }, + { + "epoch": 0.6669831223628692, + "grad_norm": 0.9538993239402771, + "learning_rate": 0.0003812499914865039, + "loss": 1.4193, + "step": 6323 + }, + { + "epoch": 0.6670886075949367, + "grad_norm": 0.7127643823623657, + "learning_rate": 0.00038103139358391914, + "loss": 1.4739, + "step": 6324 + }, + { + "epoch": 0.6671940928270043, + "grad_norm": 1.199114441871643, + "learning_rate": 0.0003808128370261065, + "loss": 1.4333, + "step": 6325 + }, + { + "epoch": 0.6672995780590717, + "grad_norm": 0.7265287637710571, + "learning_rate": 0.00038059432183755633, + "loss": 1.4096, + "step": 6326 + }, + { + "epoch": 0.6674050632911392, + "grad_norm": 1.0102335214614868, + "learning_rate": 0.0003803758480427544, + "loss": 1.4431, + "step": 6327 + }, + { + "epoch": 0.6675105485232068, + "grad_norm": 0.7252889275550842, + "learning_rate": 0.0003801574156661817, + "loss": 1.3651, + "step": 6328 + }, + { + "epoch": 0.6676160337552742, + "grad_norm": 0.9289504885673523, + "learning_rate": 0.000379939024732315, + "loss": 1.4245, + "step": 6329 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.9160996675491333, + "learning_rate": 0.0003797206752656258, + "loss": 1.4685, + "step": 6330 + }, + { + "epoch": 0.6678270042194093, + "grad_norm": 0.9524758458137512, + "learning_rate": 0.0003795023672905814, + "loss": 1.39, + "step": 6331 + }, + { + "epoch": 0.6679324894514768, + "grad_norm": 0.8722214102745056, + "learning_rate": 0.00037928410083164416, + "loss": 1.4587, + "step": 6332 + }, + { + "epoch": 0.6680379746835443, + "grad_norm": 0.6684061288833618, + "learning_rate": 0.0003790658759132719, + "loss": 1.434, + "step": 6333 + }, + { + "epoch": 0.6681434599156119, + "grad_norm": 0.8290054202079773, + "learning_rate": 0.0003788476925599181, + "loss": 1.4172, + "step": 6334 + }, + { + "epoch": 0.6682489451476793, + "grad_norm": 0.6724562048912048, + "learning_rate": 0.00037862955079603086, + "loss": 1.3983, + "step": 6335 + }, + { + "epoch": 0.6683544303797468, + "grad_norm": 0.6486767530441284, + "learning_rate": 0.00037841145064605416, + "loss": 1.4506, + "step": 6336 + }, + { + "epoch": 0.6684599156118144, + "grad_norm": 0.7457849979400635, + "learning_rate": 0.00037819339213442744, + "loss": 1.4232, + "step": 6337 + }, + { + "epoch": 0.6685654008438818, + "grad_norm": 0.6613075733184814, + "learning_rate": 0.0003779753752855853, + "loss": 1.4069, + "step": 6338 + }, + { + "epoch": 0.6686708860759494, + "grad_norm": 0.6527712345123291, + "learning_rate": 0.0003777574001239573, + "loss": 1.4438, + "step": 6339 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6642407178878784, + "learning_rate": 0.0003775394666739688, + "loss": 1.4117, + "step": 6340 + }, + { + "epoch": 0.6688818565400844, + "grad_norm": 0.7670342326164246, + "learning_rate": 0.0003773215749600404, + "loss": 1.4138, + "step": 6341 + }, + { + "epoch": 0.6689873417721519, + "grad_norm": 0.6867917776107788, + "learning_rate": 0.0003771037250065878, + "loss": 1.4686, + "step": 6342 + }, + { + "epoch": 0.6690928270042195, + "grad_norm": 0.8208602070808411, + "learning_rate": 0.0003768859168380223, + "loss": 1.4424, + "step": 6343 + }, + { + "epoch": 0.6691983122362869, + "grad_norm": 0.7001321315765381, + "learning_rate": 0.0003766681504787503, + "loss": 1.4195, + "step": 6344 + }, + { + "epoch": 0.6693037974683544, + "grad_norm": 0.6630799174308777, + "learning_rate": 0.0003764504259531734, + "loss": 1.4442, + "step": 6345 + }, + { + "epoch": 0.669409282700422, + "grad_norm": 0.7111759185791016, + "learning_rate": 0.0003762327432856892, + "loss": 1.432, + "step": 6346 + }, + { + "epoch": 0.6695147679324894, + "grad_norm": 0.6817337274551392, + "learning_rate": 0.00037601510250068984, + "loss": 1.4093, + "step": 6347 + }, + { + "epoch": 0.669620253164557, + "grad_norm": 0.693038284778595, + "learning_rate": 0.0003757975036225632, + "loss": 1.41, + "step": 6348 + }, + { + "epoch": 0.6697257383966245, + "grad_norm": 0.729289174079895, + "learning_rate": 0.00037557994667569217, + "loss": 1.4129, + "step": 6349 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.6992886662483215, + "learning_rate": 0.00037536243168445507, + "loss": 1.4321, + "step": 6350 + }, + { + "epoch": 0.6699367088607595, + "grad_norm": 0.748982310295105, + "learning_rate": 0.0003751449586732257, + "loss": 1.4313, + "step": 6351 + }, + { + "epoch": 0.6700421940928271, + "grad_norm": 0.8401532173156738, + "learning_rate": 0.0003749275276663729, + "loss": 1.4198, + "step": 6352 + }, + { + "epoch": 0.6701476793248945, + "grad_norm": 0.7941371202468872, + "learning_rate": 0.0003747101386882609, + "loss": 1.4078, + "step": 6353 + }, + { + "epoch": 0.670253164556962, + "grad_norm": 0.7913784980773926, + "learning_rate": 0.0003744927917632489, + "loss": 1.4642, + "step": 6354 + }, + { + "epoch": 0.6703586497890295, + "grad_norm": 0.7748304009437561, + "learning_rate": 0.00037427548691569237, + "loss": 1.386, + "step": 6355 + }, + { + "epoch": 0.670464135021097, + "grad_norm": 0.6860601902008057, + "learning_rate": 0.000374058224169941, + "loss": 1.4232, + "step": 6356 + }, + { + "epoch": 0.6705696202531646, + "grad_norm": 0.724251925945282, + "learning_rate": 0.00037384100355034033, + "loss": 1.4256, + "step": 6357 + }, + { + "epoch": 0.670675105485232, + "grad_norm": 0.8761030435562134, + "learning_rate": 0.0003736238250812308, + "loss": 1.4613, + "step": 6358 + }, + { + "epoch": 0.6707805907172996, + "grad_norm": 0.6972000598907471, + "learning_rate": 0.0003734066887869485, + "loss": 1.4151, + "step": 6359 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.6921382546424866, + "learning_rate": 0.0003731895946918246, + "loss": 1.4431, + "step": 6360 + }, + { + "epoch": 0.6709915611814345, + "grad_norm": 0.6991234421730042, + "learning_rate": 0.0003729725428201856, + "loss": 1.403, + "step": 6361 + }, + { + "epoch": 0.6710970464135021, + "grad_norm": 0.7702720761299133, + "learning_rate": 0.00037275553319635285, + "loss": 1.4319, + "step": 6362 + }, + { + "epoch": 0.6712025316455696, + "grad_norm": 0.703531801700592, + "learning_rate": 0.000372538565844644, + "loss": 1.4355, + "step": 6363 + }, + { + "epoch": 0.6713080168776371, + "grad_norm": 0.7415279746055603, + "learning_rate": 0.00037232164078937106, + "loss": 1.4023, + "step": 6364 + }, + { + "epoch": 0.6714135021097046, + "grad_norm": 0.6823393702507019, + "learning_rate": 0.00037210475805484156, + "loss": 1.3955, + "step": 6365 + }, + { + "epoch": 0.6715189873417722, + "grad_norm": 0.6622892618179321, + "learning_rate": 0.00037188791766535825, + "loss": 1.3865, + "step": 6366 + }, + { + "epoch": 0.6716244725738396, + "grad_norm": 0.7232235670089722, + "learning_rate": 0.0003716711196452192, + "loss": 1.3602, + "step": 6367 + }, + { + "epoch": 0.6717299578059072, + "grad_norm": 0.7045242786407471, + "learning_rate": 0.0003714543640187177, + "loss": 1.4065, + "step": 6368 + }, + { + "epoch": 0.6718354430379747, + "grad_norm": 0.7484189867973328, + "learning_rate": 0.0003712376508101424, + "loss": 1.4336, + "step": 6369 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.7035188674926758, + "learning_rate": 0.0003710209800437769, + "loss": 1.4211, + "step": 6370 + }, + { + "epoch": 0.6720464135021097, + "grad_norm": 0.8502385020256042, + "learning_rate": 0.00037080435174390014, + "loss": 1.3887, + "step": 6371 + }, + { + "epoch": 0.6721518987341772, + "grad_norm": 0.7363391518592834, + "learning_rate": 0.00037058776593478675, + "loss": 1.4411, + "step": 6372 + }, + { + "epoch": 0.6722573839662447, + "grad_norm": 0.8723475933074951, + "learning_rate": 0.00037037122264070625, + "loss": 1.4349, + "step": 6373 + }, + { + "epoch": 0.6723628691983122, + "grad_norm": 0.7015379667282104, + "learning_rate": 0.0003701547218859232, + "loss": 1.4037, + "step": 6374 + }, + { + "epoch": 0.6724683544303798, + "grad_norm": 0.8374812602996826, + "learning_rate": 0.0003699382636946977, + "loss": 1.3856, + "step": 6375 + }, + { + "epoch": 0.6725738396624472, + "grad_norm": 0.6864029765129089, + "learning_rate": 0.0003697218480912848, + "loss": 1.4043, + "step": 6376 + }, + { + "epoch": 0.6726793248945148, + "grad_norm": 0.8256092071533203, + "learning_rate": 0.0003695054750999352, + "loss": 1.4405, + "step": 6377 + }, + { + "epoch": 0.6727848101265823, + "grad_norm": 0.7043921947479248, + "learning_rate": 0.0003692891447448943, + "loss": 1.4175, + "step": 6378 + }, + { + "epoch": 0.6728902953586497, + "grad_norm": 0.7246366143226624, + "learning_rate": 0.0003690728570504032, + "loss": 1.4232, + "step": 6379 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.9472194314002991, + "learning_rate": 0.00036885661204069767, + "loss": 1.404, + "step": 6380 + }, + { + "epoch": 0.6731012658227848, + "grad_norm": 0.7072873711585999, + "learning_rate": 0.00036864040974000955, + "loss": 1.4284, + "step": 6381 + }, + { + "epoch": 0.6732067510548523, + "grad_norm": 1.2528939247131348, + "learning_rate": 0.0003684242501725652, + "loss": 1.4111, + "step": 6382 + }, + { + "epoch": 0.6733122362869198, + "grad_norm": 0.7811903953552246, + "learning_rate": 0.00036820813336258624, + "loss": 1.4814, + "step": 6383 + }, + { + "epoch": 0.6734177215189874, + "grad_norm": 0.9299925565719604, + "learning_rate": 0.0003679920593342898, + "loss": 1.4163, + "step": 6384 + }, + { + "epoch": 0.6735232067510548, + "grad_norm": 0.8904517889022827, + "learning_rate": 0.0003677760281118879, + "loss": 1.4624, + "step": 6385 + }, + { + "epoch": 0.6736286919831224, + "grad_norm": 0.7195829153060913, + "learning_rate": 0.0003675600397195881, + "loss": 1.4582, + "step": 6386 + }, + { + "epoch": 0.6737341772151899, + "grad_norm": 0.9464041590690613, + "learning_rate": 0.0003673440941815928, + "loss": 1.4194, + "step": 6387 + }, + { + "epoch": 0.6738396624472573, + "grad_norm": 0.7809550762176514, + "learning_rate": 0.00036712819152209954, + "loss": 1.4418, + "step": 6388 + }, + { + "epoch": 0.6739451476793249, + "grad_norm": 1.251412034034729, + "learning_rate": 0.00036691233176530197, + "loss": 1.4368, + "step": 6389 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.8380163311958313, + "learning_rate": 0.0003666965149353878, + "loss": 1.369, + "step": 6390 + }, + { + "epoch": 0.6741561181434599, + "grad_norm": 0.7653887271881104, + "learning_rate": 0.00036648074105654043, + "loss": 1.4364, + "step": 6391 + }, + { + "epoch": 0.6742616033755274, + "grad_norm": 0.7987803220748901, + "learning_rate": 0.0003662650101529385, + "loss": 1.4691, + "step": 6392 + }, + { + "epoch": 0.674367088607595, + "grad_norm": 0.853638768196106, + "learning_rate": 0.00036604932224875564, + "loss": 1.4081, + "step": 6393 + }, + { + "epoch": 0.6744725738396624, + "grad_norm": 0.6703673005104065, + "learning_rate": 0.0003658336773681607, + "loss": 1.4224, + "step": 6394 + }, + { + "epoch": 0.67457805907173, + "grad_norm": 0.8814274668693542, + "learning_rate": 0.0003656180755353179, + "loss": 1.4156, + "step": 6395 + }, + { + "epoch": 0.6746835443037975, + "grad_norm": 0.7872374653816223, + "learning_rate": 0.0003654025167743864, + "loss": 1.4115, + "step": 6396 + }, + { + "epoch": 0.674789029535865, + "grad_norm": 0.711276650428772, + "learning_rate": 0.0003651870011095204, + "loss": 1.4714, + "step": 6397 + }, + { + "epoch": 0.6748945147679325, + "grad_norm": 0.867887020111084, + "learning_rate": 0.0003649715285648701, + "loss": 1.3977, + "step": 6398 + }, + { + "epoch": 0.675, + "grad_norm": 0.763722836971283, + "learning_rate": 0.00036475609916457996, + "loss": 1.4133, + "step": 6399 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.7348065972328186, + "learning_rate": 0.0003645407129327898, + "loss": 1.4508, + "step": 6400 + }, + { + "epoch": 0.675210970464135, + "grad_norm": 0.7430456876754761, + "learning_rate": 0.0003643253698936349, + "loss": 1.3949, + "step": 6401 + }, + { + "epoch": 0.6753164556962026, + "grad_norm": 0.7310472726821899, + "learning_rate": 0.00036411007007124547, + "loss": 1.4296, + "step": 6402 + }, + { + "epoch": 0.67542194092827, + "grad_norm": 0.7564829587936401, + "learning_rate": 0.0003638948134897469, + "loss": 1.4578, + "step": 6403 + }, + { + "epoch": 0.6755274261603376, + "grad_norm": 0.7570948004722595, + "learning_rate": 0.0003636796001732597, + "loss": 1.4644, + "step": 6404 + }, + { + "epoch": 0.6756329113924051, + "grad_norm": 0.7433801293373108, + "learning_rate": 0.00036346443014589983, + "loss": 1.4394, + "step": 6405 + }, + { + "epoch": 0.6757383966244725, + "grad_norm": 0.7909365296363831, + "learning_rate": 0.00036324930343177754, + "loss": 1.4345, + "step": 6406 + }, + { + "epoch": 0.6758438818565401, + "grad_norm": 0.7324243783950806, + "learning_rate": 0.0003630342200549997, + "loss": 1.4185, + "step": 6407 + }, + { + "epoch": 0.6759493670886076, + "grad_norm": 0.756072998046875, + "learning_rate": 0.000362819180039667, + "loss": 1.4034, + "step": 6408 + }, + { + "epoch": 0.6760548523206751, + "grad_norm": 0.71491938829422, + "learning_rate": 0.000362604183409876, + "loss": 1.4095, + "step": 6409 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.6998830437660217, + "learning_rate": 0.00036238923018971783, + "loss": 1.3928, + "step": 6410 + }, + { + "epoch": 0.6762658227848102, + "grad_norm": 0.7862604856491089, + "learning_rate": 0.00036217432040327926, + "loss": 1.4372, + "step": 6411 + }, + { + "epoch": 0.6763713080168776, + "grad_norm": 0.7424776554107666, + "learning_rate": 0.000361959454074642, + "loss": 1.3983, + "step": 6412 + }, + { + "epoch": 0.6764767932489452, + "grad_norm": 0.798620343208313, + "learning_rate": 0.00036174463122788273, + "loss": 1.3979, + "step": 6413 + }, + { + "epoch": 0.6765822784810127, + "grad_norm": 0.7863502502441406, + "learning_rate": 0.00036152985188707344, + "loss": 1.3985, + "step": 6414 + }, + { + "epoch": 0.6766877637130801, + "grad_norm": 0.7246456146240234, + "learning_rate": 0.0003613151160762815, + "loss": 1.4354, + "step": 6415 + }, + { + "epoch": 0.6767932489451477, + "grad_norm": 0.7861909866333008, + "learning_rate": 0.00036110042381956895, + "loss": 1.3933, + "step": 6416 + }, + { + "epoch": 0.6768987341772152, + "grad_norm": 0.7120101451873779, + "learning_rate": 0.00036088577514099325, + "loss": 1.4323, + "step": 6417 + }, + { + "epoch": 0.6770042194092827, + "grad_norm": 0.7946633100509644, + "learning_rate": 0.0003606711700646067, + "loss": 1.4406, + "step": 6418 + }, + { + "epoch": 0.6771097046413502, + "grad_norm": 0.7549168467521667, + "learning_rate": 0.00036045660861445684, + "loss": 1.3911, + "step": 6419 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.725592851638794, + "learning_rate": 0.0003602420908145865, + "loss": 1.4348, + "step": 6420 + }, + { + "epoch": 0.6773206751054852, + "grad_norm": 0.6948243975639343, + "learning_rate": 0.00036002761668903335, + "loss": 1.4197, + "step": 6421 + }, + { + "epoch": 0.6774261603375528, + "grad_norm": 0.7735552191734314, + "learning_rate": 0.0003598131862618304, + "loss": 1.4067, + "step": 6422 + }, + { + "epoch": 0.6775316455696202, + "grad_norm": 0.6953318119049072, + "learning_rate": 0.0003595987995570052, + "loss": 1.4029, + "step": 6423 + }, + { + "epoch": 0.6776371308016877, + "grad_norm": 0.790732204914093, + "learning_rate": 0.0003593844565985815, + "loss": 1.4516, + "step": 6424 + }, + { + "epoch": 0.6777426160337553, + "grad_norm": 0.7174103856086731, + "learning_rate": 0.00035917015741057727, + "loss": 1.4565, + "step": 6425 + }, + { + "epoch": 0.6778481012658227, + "grad_norm": 0.7688499689102173, + "learning_rate": 0.0003589559020170058, + "loss": 1.4188, + "step": 6426 + }, + { + "epoch": 0.6779535864978903, + "grad_norm": 0.699966311454773, + "learning_rate": 0.00035874169044187537, + "loss": 1.4337, + "step": 6427 + }, + { + "epoch": 0.6780590717299578, + "grad_norm": 0.7263235449790955, + "learning_rate": 0.00035852752270918955, + "loss": 1.4216, + "step": 6428 + }, + { + "epoch": 0.6781645569620253, + "grad_norm": 0.6722891330718994, + "learning_rate": 0.0003583133988429468, + "loss": 1.384, + "step": 6429 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.7406280040740967, + "learning_rate": 0.00035809931886714093, + "loss": 1.4433, + "step": 6430 + }, + { + "epoch": 0.6783755274261604, + "grad_norm": 0.7879452705383301, + "learning_rate": 0.00035788528280576053, + "loss": 1.3968, + "step": 6431 + }, + { + "epoch": 0.6784810126582278, + "grad_norm": 0.7134490013122559, + "learning_rate": 0.0003576712906827892, + "loss": 1.4178, + "step": 6432 + }, + { + "epoch": 0.6785864978902953, + "grad_norm": 0.8049954771995544, + "learning_rate": 0.00035745734252220633, + "loss": 1.3884, + "step": 6433 + }, + { + "epoch": 0.6786919831223629, + "grad_norm": 0.7558786273002625, + "learning_rate": 0.00035724343834798566, + "loss": 1.4131, + "step": 6434 + }, + { + "epoch": 0.6787974683544303, + "grad_norm": 0.955927312374115, + "learning_rate": 0.00035702957818409606, + "loss": 1.3887, + "step": 6435 + }, + { + "epoch": 0.6789029535864979, + "grad_norm": 0.7124733924865723, + "learning_rate": 0.0003568157620545019, + "loss": 1.4217, + "step": 6436 + }, + { + "epoch": 0.6790084388185654, + "grad_norm": 0.8267397284507751, + "learning_rate": 0.00035660198998316213, + "loss": 1.4058, + "step": 6437 + }, + { + "epoch": 0.6791139240506329, + "grad_norm": 0.7471349239349365, + "learning_rate": 0.00035638826199403103, + "loss": 1.416, + "step": 6438 + }, + { + "epoch": 0.6792194092827004, + "grad_norm": 0.7042402625083923, + "learning_rate": 0.0003561745781110579, + "loss": 1.4092, + "step": 6439 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.8493863344192505, + "learning_rate": 0.00035596093835818683, + "loss": 1.4185, + "step": 6440 + }, + { + "epoch": 0.6794303797468354, + "grad_norm": 0.7049119472503662, + "learning_rate": 0.0003557473427593578, + "loss": 1.4566, + "step": 6441 + }, + { + "epoch": 0.679535864978903, + "grad_norm": 0.6927182078361511, + "learning_rate": 0.0003555337913385048, + "loss": 1.3763, + "step": 6442 + }, + { + "epoch": 0.6796413502109705, + "grad_norm": 0.7183944582939148, + "learning_rate": 0.0003553202841195576, + "loss": 1.4244, + "step": 6443 + }, + { + "epoch": 0.6797468354430379, + "grad_norm": 0.701518177986145, + "learning_rate": 0.00035510682112644055, + "loss": 1.431, + "step": 6444 + }, + { + "epoch": 0.6798523206751055, + "grad_norm": 0.7628350257873535, + "learning_rate": 0.00035489340238307326, + "loss": 1.4283, + "step": 6445 + }, + { + "epoch": 0.679957805907173, + "grad_norm": 0.6826925277709961, + "learning_rate": 0.00035468002791337047, + "loss": 1.4118, + "step": 6446 + }, + { + "epoch": 0.6800632911392405, + "grad_norm": 0.7782007455825806, + "learning_rate": 0.0003544666977412418, + "loss": 1.413, + "step": 6447 + }, + { + "epoch": 0.680168776371308, + "grad_norm": 0.7354628443717957, + "learning_rate": 0.000354253411890592, + "loss": 1.3945, + "step": 6448 + }, + { + "epoch": 0.6802742616033756, + "grad_norm": 0.6905533671379089, + "learning_rate": 0.00035404017038532045, + "loss": 1.3872, + "step": 6449 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.7688775062561035, + "learning_rate": 0.00035382697324932245, + "loss": 1.4313, + "step": 6450 + }, + { + "epoch": 0.6804852320675105, + "grad_norm": 0.7204486131668091, + "learning_rate": 0.0003536138205064877, + "loss": 1.4034, + "step": 6451 + }, + { + "epoch": 0.6805907172995781, + "grad_norm": 0.782409131526947, + "learning_rate": 0.0003534007121807009, + "loss": 1.458, + "step": 6452 + }, + { + "epoch": 0.6806962025316455, + "grad_norm": 0.8545967936515808, + "learning_rate": 0.00035318764829584185, + "loss": 1.4585, + "step": 6453 + }, + { + "epoch": 0.6808016877637131, + "grad_norm": 0.8605248928070068, + "learning_rate": 0.0003529746288757856, + "loss": 1.4003, + "step": 6454 + }, + { + "epoch": 0.6809071729957806, + "grad_norm": 0.8218358755111694, + "learning_rate": 0.0003527616539444019, + "loss": 1.4162, + "step": 6455 + }, + { + "epoch": 0.6810126582278481, + "grad_norm": 0.695048987865448, + "learning_rate": 0.0003525487235255556, + "loss": 1.4083, + "step": 6456 + }, + { + "epoch": 0.6811181434599156, + "grad_norm": 1.0243144035339355, + "learning_rate": 0.0003523358376431068, + "loss": 1.4449, + "step": 6457 + }, + { + "epoch": 0.6812236286919832, + "grad_norm": 0.7074039578437805, + "learning_rate": 0.00035212299632090996, + "loss": 1.4224, + "step": 6458 + }, + { + "epoch": 0.6813291139240506, + "grad_norm": 0.8806515336036682, + "learning_rate": 0.00035191019958281575, + "loss": 1.4388, + "step": 6459 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 1.0331926345825195, + "learning_rate": 0.00035169744745266866, + "loss": 1.442, + "step": 6460 + }, + { + "epoch": 0.6815400843881857, + "grad_norm": 0.9323078393936157, + "learning_rate": 0.0003514847399543087, + "loss": 1.4147, + "step": 6461 + }, + { + "epoch": 0.6816455696202531, + "grad_norm": 0.9240701198577881, + "learning_rate": 0.00035127207711157084, + "loss": 1.4203, + "step": 6462 + }, + { + "epoch": 0.6817510548523207, + "grad_norm": 0.7543449401855469, + "learning_rate": 0.00035105945894828495, + "loss": 1.412, + "step": 6463 + }, + { + "epoch": 0.6818565400843882, + "grad_norm": 0.828427791595459, + "learning_rate": 0.000350846885488276, + "loss": 1.426, + "step": 6464 + }, + { + "epoch": 0.6819620253164557, + "grad_norm": 1.0091421604156494, + "learning_rate": 0.00035063435675536386, + "loss": 1.4188, + "step": 6465 + }, + { + "epoch": 0.6820675105485232, + "grad_norm": 1.0981940031051636, + "learning_rate": 0.00035042187277336325, + "loss": 1.395, + "step": 6466 + }, + { + "epoch": 0.6821729957805908, + "grad_norm": 0.8059123158454895, + "learning_rate": 0.00035020943356608444, + "loss": 1.4106, + "step": 6467 + }, + { + "epoch": 0.6822784810126582, + "grad_norm": 0.7679425477981567, + "learning_rate": 0.0003499970391573322, + "loss": 1.4341, + "step": 6468 + }, + { + "epoch": 0.6823839662447257, + "grad_norm": 0.7890285849571228, + "learning_rate": 0.00034978468957090635, + "loss": 1.4149, + "step": 6469 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.7948709726333618, + "learning_rate": 0.0003495723848306017, + "loss": 1.4534, + "step": 6470 + }, + { + "epoch": 0.6825949367088607, + "grad_norm": 0.8090433478355408, + "learning_rate": 0.000349360124960208, + "loss": 1.4027, + "step": 6471 + }, + { + "epoch": 0.6827004219409283, + "grad_norm": 0.7191137671470642, + "learning_rate": 0.00034914790998351005, + "loss": 1.4647, + "step": 6472 + }, + { + "epoch": 0.6828059071729958, + "grad_norm": 0.7243310809135437, + "learning_rate": 0.0003489357399242876, + "loss": 1.4293, + "step": 6473 + }, + { + "epoch": 0.6829113924050633, + "grad_norm": 0.7893538475036621, + "learning_rate": 0.0003487236148063154, + "loss": 1.4109, + "step": 6474 + }, + { + "epoch": 0.6830168776371308, + "grad_norm": 0.6912686824798584, + "learning_rate": 0.0003485115346533629, + "loss": 1.4258, + "step": 6475 + }, + { + "epoch": 0.6831223628691984, + "grad_norm": 0.6717785000801086, + "learning_rate": 0.00034829949948919517, + "loss": 1.4484, + "step": 6476 + }, + { + "epoch": 0.6832278481012658, + "grad_norm": 0.6906934976577759, + "learning_rate": 0.00034808750933757154, + "loss": 1.415, + "step": 6477 + }, + { + "epoch": 0.6833333333333333, + "grad_norm": 0.7379029393196106, + "learning_rate": 0.0003478755642222466, + "loss": 1.4364, + "step": 6478 + }, + { + "epoch": 0.6834388185654009, + "grad_norm": 0.6826701760292053, + "learning_rate": 0.0003476636641669699, + "loss": 1.4112, + "step": 6479 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.76104736328125, + "learning_rate": 0.0003474518091954859, + "loss": 1.4279, + "step": 6480 + }, + { + "epoch": 0.6836497890295359, + "grad_norm": 0.7117180228233337, + "learning_rate": 0.00034723999933153387, + "loss": 1.4255, + "step": 6481 + }, + { + "epoch": 0.6837552742616034, + "grad_norm": 0.72557532787323, + "learning_rate": 0.00034702823459884836, + "loss": 1.3783, + "step": 6482 + }, + { + "epoch": 0.6838607594936709, + "grad_norm": 0.7547438144683838, + "learning_rate": 0.0003468165150211585, + "loss": 1.3744, + "step": 6483 + }, + { + "epoch": 0.6839662447257384, + "grad_norm": 0.7457993626594543, + "learning_rate": 0.0003466048406221883, + "loss": 1.4529, + "step": 6484 + }, + { + "epoch": 0.6840717299578059, + "grad_norm": 0.6529878973960876, + "learning_rate": 0.0003463932114256576, + "loss": 1.4329, + "step": 6485 + }, + { + "epoch": 0.6841772151898734, + "grad_norm": 0.8052867650985718, + "learning_rate": 0.00034618162745528, + "loss": 1.424, + "step": 6486 + }, + { + "epoch": 0.684282700421941, + "grad_norm": 0.7389687895774841, + "learning_rate": 0.00034597008873476473, + "loss": 1.4266, + "step": 6487 + }, + { + "epoch": 0.6843881856540084, + "grad_norm": 0.9174879789352417, + "learning_rate": 0.0003457585952878156, + "loss": 1.4597, + "step": 6488 + }, + { + "epoch": 0.6844936708860759, + "grad_norm": 0.760420024394989, + "learning_rate": 0.0003455471471381318, + "loss": 1.4228, + "step": 6489 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.7595261931419373, + "learning_rate": 0.0003453357443094068, + "loss": 1.4285, + "step": 6490 + }, + { + "epoch": 0.6847046413502109, + "grad_norm": 0.7136346101760864, + "learning_rate": 0.0003451243868253294, + "loss": 1.4443, + "step": 6491 + }, + { + "epoch": 0.6848101265822785, + "grad_norm": 0.8304058313369751, + "learning_rate": 0.0003449130747095835, + "loss": 1.4558, + "step": 6492 + }, + { + "epoch": 0.684915611814346, + "grad_norm": 0.6817740797996521, + "learning_rate": 0.0003447018079858472, + "loss": 1.4296, + "step": 6493 + }, + { + "epoch": 0.6850210970464135, + "grad_norm": 0.842635989189148, + "learning_rate": 0.0003444905866777946, + "loss": 1.4119, + "step": 6494 + }, + { + "epoch": 0.685126582278481, + "grad_norm": 0.7986421585083008, + "learning_rate": 0.0003442794108090938, + "loss": 1.4524, + "step": 6495 + }, + { + "epoch": 0.6852320675105485, + "grad_norm": 0.8644797205924988, + "learning_rate": 0.0003440682804034081, + "loss": 1.4276, + "step": 6496 + }, + { + "epoch": 0.685337552742616, + "grad_norm": 0.9629131555557251, + "learning_rate": 0.00034385719548439585, + "loss": 1.4597, + "step": 6497 + }, + { + "epoch": 0.6854430379746835, + "grad_norm": 0.7005763649940491, + "learning_rate": 0.00034364615607570994, + "loss": 1.4119, + "step": 6498 + }, + { + "epoch": 0.6855485232067511, + "grad_norm": 1.3959095478057861, + "learning_rate": 0.0003434351622009985, + "loss": 1.4698, + "step": 6499 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 1.0005528926849365, + "learning_rate": 0.00034322421388390456, + "loss": 1.421, + "step": 6500 + }, + { + "epoch": 0.6857594936708861, + "grad_norm": 1.0040969848632812, + "learning_rate": 0.00034301331114806573, + "loss": 1.4323, + "step": 6501 + }, + { + "epoch": 0.6858649789029536, + "grad_norm": 1.2076705694198608, + "learning_rate": 0.0003428024540171148, + "loss": 1.422, + "step": 6502 + }, + { + "epoch": 0.685970464135021, + "grad_norm": 0.7670792937278748, + "learning_rate": 0.0003425916425146791, + "loss": 1.4638, + "step": 6503 + }, + { + "epoch": 0.6860759493670886, + "grad_norm": 1.1753857135772705, + "learning_rate": 0.0003423808766643817, + "loss": 1.456, + "step": 6504 + }, + { + "epoch": 0.6861814345991561, + "grad_norm": 1.092944860458374, + "learning_rate": 0.00034217015648983957, + "loss": 1.449, + "step": 6505 + }, + { + "epoch": 0.6862869198312236, + "grad_norm": 0.7334634065628052, + "learning_rate": 0.0003419594820146652, + "loss": 1.4436, + "step": 6506 + }, + { + "epoch": 0.6863924050632911, + "grad_norm": 0.9573581218719482, + "learning_rate": 0.0003417488532624653, + "loss": 1.4305, + "step": 6507 + }, + { + "epoch": 0.6864978902953587, + "grad_norm": 1.077333688735962, + "learning_rate": 0.00034153827025684225, + "loss": 1.4089, + "step": 6508 + }, + { + "epoch": 0.6866033755274261, + "grad_norm": 0.8332042098045349, + "learning_rate": 0.0003413277330213928, + "loss": 1.4543, + "step": 6509 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 1.024483323097229, + "learning_rate": 0.0003411172415797087, + "loss": 1.4149, + "step": 6510 + }, + { + "epoch": 0.6868143459915612, + "grad_norm": 0.7289676070213318, + "learning_rate": 0.00034090679595537646, + "loss": 1.4268, + "step": 6511 + }, + { + "epoch": 0.6869198312236287, + "grad_norm": 0.8280117511749268, + "learning_rate": 0.0003406963961719778, + "loss": 1.4344, + "step": 6512 + }, + { + "epoch": 0.6870253164556962, + "grad_norm": 0.858031153678894, + "learning_rate": 0.00034048604225308854, + "loss": 1.4231, + "step": 6513 + }, + { + "epoch": 0.6871308016877637, + "grad_norm": 0.7537016272544861, + "learning_rate": 0.00034027573422228054, + "loss": 1.4349, + "step": 6514 + }, + { + "epoch": 0.6872362869198312, + "grad_norm": 0.7572857141494751, + "learning_rate": 0.00034006547210311964, + "loss": 1.408, + "step": 6515 + }, + { + "epoch": 0.6873417721518987, + "grad_norm": 0.7485715746879578, + "learning_rate": 0.0003398552559191667, + "loss": 1.4244, + "step": 6516 + }, + { + "epoch": 0.6874472573839663, + "grad_norm": 0.7317846417427063, + "learning_rate": 0.00033964508569397743, + "loss": 1.4594, + "step": 6517 + }, + { + "epoch": 0.6875527426160337, + "grad_norm": 0.6985745429992676, + "learning_rate": 0.0003394349614511026, + "loss": 1.4003, + "step": 6518 + }, + { + "epoch": 0.6876582278481013, + "grad_norm": 0.7937660813331604, + "learning_rate": 0.0003392248832140876, + "loss": 1.3789, + "step": 6519 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.691696286201477, + "learning_rate": 0.0003390148510064727, + "loss": 1.4191, + "step": 6520 + }, + { + "epoch": 0.6878691983122363, + "grad_norm": 0.6797316670417786, + "learning_rate": 0.00033880486485179305, + "loss": 1.4225, + "step": 6521 + }, + { + "epoch": 0.6879746835443038, + "grad_norm": 0.6940584182739258, + "learning_rate": 0.0003385949247735786, + "loss": 1.3831, + "step": 6522 + }, + { + "epoch": 0.6880801687763713, + "grad_norm": 0.6987354159355164, + "learning_rate": 0.00033838503079535435, + "loss": 1.3807, + "step": 6523 + }, + { + "epoch": 0.6881856540084388, + "grad_norm": 0.7370089292526245, + "learning_rate": 0.00033817518294064003, + "loss": 1.4204, + "step": 6524 + }, + { + "epoch": 0.6882911392405063, + "grad_norm": 0.6964244842529297, + "learning_rate": 0.00033796538123294996, + "loss": 1.4238, + "step": 6525 + }, + { + "epoch": 0.6883966244725739, + "grad_norm": 0.7661454081535339, + "learning_rate": 0.0003377556256957936, + "loss": 1.4246, + "step": 6526 + }, + { + "epoch": 0.6885021097046413, + "grad_norm": 0.7122755646705627, + "learning_rate": 0.0003375459163526749, + "loss": 1.4185, + "step": 6527 + }, + { + "epoch": 0.6886075949367089, + "grad_norm": 0.7890910506248474, + "learning_rate": 0.000337336253227093, + "loss": 1.4128, + "step": 6528 + }, + { + "epoch": 0.6887130801687764, + "grad_norm": 0.7449623346328735, + "learning_rate": 0.00033712663634254163, + "loss": 1.4083, + "step": 6529 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.7038851380348206, + "learning_rate": 0.0003369170657225094, + "loss": 1.4092, + "step": 6530 + }, + { + "epoch": 0.6889240506329114, + "grad_norm": 0.7882105708122253, + "learning_rate": 0.0003367075413904799, + "loss": 1.3783, + "step": 6531 + }, + { + "epoch": 0.689029535864979, + "grad_norm": 0.6815450191497803, + "learning_rate": 0.00033649806336993085, + "loss": 1.4, + "step": 6532 + }, + { + "epoch": 0.6891350210970464, + "grad_norm": 0.8170140981674194, + "learning_rate": 0.0003362886316843361, + "loss": 1.4017, + "step": 6533 + }, + { + "epoch": 0.6892405063291139, + "grad_norm": 0.7514131665229797, + "learning_rate": 0.000336079246357163, + "loss": 1.416, + "step": 6534 + }, + { + "epoch": 0.6893459915611815, + "grad_norm": 0.8676620125770569, + "learning_rate": 0.00033586990741187446, + "loss": 1.4577, + "step": 6535 + }, + { + "epoch": 0.6894514767932489, + "grad_norm": 0.8961958289146423, + "learning_rate": 0.0003356606148719277, + "loss": 1.4347, + "step": 6536 + }, + { + "epoch": 0.6895569620253165, + "grad_norm": 0.7137209177017212, + "learning_rate": 0.00033545136876077524, + "loss": 1.4444, + "step": 6537 + }, + { + "epoch": 0.689662447257384, + "grad_norm": 0.9683956503868103, + "learning_rate": 0.00033524216910186394, + "loss": 1.4297, + "step": 6538 + }, + { + "epoch": 0.6897679324894515, + "grad_norm": 0.7490960955619812, + "learning_rate": 0.00033503301591863586, + "loss": 1.4508, + "step": 6539 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.8170836567878723, + "learning_rate": 0.0003348239092345275, + "loss": 1.4229, + "step": 6540 + }, + { + "epoch": 0.6899789029535865, + "grad_norm": 0.8667669296264648, + "learning_rate": 0.00033461484907297036, + "loss": 1.412, + "step": 6541 + }, + { + "epoch": 0.690084388185654, + "grad_norm": 0.6921650767326355, + "learning_rate": 0.00033440583545739046, + "loss": 1.4215, + "step": 6542 + }, + { + "epoch": 0.6901898734177215, + "grad_norm": 0.8906436562538147, + "learning_rate": 0.00033419686841120925, + "loss": 1.3974, + "step": 6543 + }, + { + "epoch": 0.6902953586497891, + "grad_norm": 0.7355681657791138, + "learning_rate": 0.00033398794795784227, + "loss": 1.434, + "step": 6544 + }, + { + "epoch": 0.6904008438818565, + "grad_norm": 0.8341505527496338, + "learning_rate": 0.0003337790741207003, + "loss": 1.4167, + "step": 6545 + }, + { + "epoch": 0.6905063291139241, + "grad_norm": 0.7301145792007446, + "learning_rate": 0.0003335702469231884, + "loss": 1.4338, + "step": 6546 + }, + { + "epoch": 0.6906118143459916, + "grad_norm": 0.8838505744934082, + "learning_rate": 0.00033336146638870685, + "loss": 1.4483, + "step": 6547 + }, + { + "epoch": 0.690717299578059, + "grad_norm": 0.7103906869888306, + "learning_rate": 0.0003331527325406506, + "loss": 1.4235, + "step": 6548 + }, + { + "epoch": 0.6908227848101266, + "grad_norm": 0.7722828984260559, + "learning_rate": 0.0003329440454024092, + "loss": 1.412, + "step": 6549 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.8090463280677795, + "learning_rate": 0.0003327354049973672, + "loss": 1.3766, + "step": 6550 + }, + { + "epoch": 0.6910337552742616, + "grad_norm": 0.7772613167762756, + "learning_rate": 0.00033252681134890373, + "loss": 1.4523, + "step": 6551 + }, + { + "epoch": 0.6911392405063291, + "grad_norm": 0.7976638674736023, + "learning_rate": 0.00033231826448039246, + "loss": 1.3912, + "step": 6552 + }, + { + "epoch": 0.6912447257383966, + "grad_norm": 0.6809467077255249, + "learning_rate": 0.0003321097644152027, + "loss": 1.4303, + "step": 6553 + }, + { + "epoch": 0.6913502109704641, + "grad_norm": 0.9217113852500916, + "learning_rate": 0.00033190131117669753, + "loss": 1.439, + "step": 6554 + }, + { + "epoch": 0.6914556962025317, + "grad_norm": 0.7252447009086609, + "learning_rate": 0.0003316929047882354, + "loss": 1.4044, + "step": 6555 + }, + { + "epoch": 0.6915611814345991, + "grad_norm": 0.821967601776123, + "learning_rate": 0.0003314845452731691, + "loss": 1.4321, + "step": 6556 + }, + { + "epoch": 0.6916666666666667, + "grad_norm": 0.9152286052703857, + "learning_rate": 0.00033127623265484643, + "loss": 1.4158, + "step": 6557 + }, + { + "epoch": 0.6917721518987342, + "grad_norm": 0.7304633855819702, + "learning_rate": 0.00033106796695660983, + "loss": 1.4223, + "step": 6558 + }, + { + "epoch": 0.6918776371308016, + "grad_norm": 0.9246759414672852, + "learning_rate": 0.0003308597482017965, + "loss": 1.3937, + "step": 6559 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.7762858867645264, + "learning_rate": 0.00033065157641373847, + "loss": 1.4155, + "step": 6560 + }, + { + "epoch": 0.6920886075949367, + "grad_norm": 0.7271286845207214, + "learning_rate": 0.00033044345161576224, + "loss": 1.4422, + "step": 6561 + }, + { + "epoch": 0.6921940928270042, + "grad_norm": 0.9089529514312744, + "learning_rate": 0.00033023537383118916, + "loss": 1.3978, + "step": 6562 + }, + { + "epoch": 0.6922995780590717, + "grad_norm": 0.7043750882148743, + "learning_rate": 0.0003300273430833358, + "loss": 1.3738, + "step": 6563 + }, + { + "epoch": 0.6924050632911393, + "grad_norm": 0.9413266777992249, + "learning_rate": 0.00032981935939551294, + "loss": 1.4096, + "step": 6564 + }, + { + "epoch": 0.6925105485232067, + "grad_norm": 0.8149700164794922, + "learning_rate": 0.000329611422791026, + "loss": 1.4361, + "step": 6565 + }, + { + "epoch": 0.6926160337552743, + "grad_norm": 0.7374076247215271, + "learning_rate": 0.00032940353329317533, + "loss": 1.4171, + "step": 6566 + }, + { + "epoch": 0.6927215189873418, + "grad_norm": 0.6624267101287842, + "learning_rate": 0.0003291956909252561, + "loss": 1.4415, + "step": 6567 + }, + { + "epoch": 0.6928270042194092, + "grad_norm": 0.7073465585708618, + "learning_rate": 0.00032898789571055796, + "loss": 1.4123, + "step": 6568 + }, + { + "epoch": 0.6929324894514768, + "grad_norm": 0.6693248748779297, + "learning_rate": 0.0003287801476723656, + "loss": 1.4286, + "step": 6569 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.721954882144928, + "learning_rate": 0.0003285724468339576, + "loss": 1.4023, + "step": 6570 + }, + { + "epoch": 0.6931434599156118, + "grad_norm": 0.7215431332588196, + "learning_rate": 0.00032836479321860884, + "loss": 1.4276, + "step": 6571 + }, + { + "epoch": 0.6932489451476793, + "grad_norm": 0.7416446805000305, + "learning_rate": 0.00032815718684958727, + "loss": 1.4185, + "step": 6572 + }, + { + "epoch": 0.6933544303797469, + "grad_norm": 0.7288605570793152, + "learning_rate": 0.00032794962775015656, + "loss": 1.3957, + "step": 6573 + }, + { + "epoch": 0.6934599156118143, + "grad_norm": 0.7062421441078186, + "learning_rate": 0.0003277421159435745, + "loss": 1.4302, + "step": 6574 + }, + { + "epoch": 0.6935654008438819, + "grad_norm": 0.7234452962875366, + "learning_rate": 0.000327534651453094, + "loss": 1.4146, + "step": 6575 + }, + { + "epoch": 0.6936708860759494, + "grad_norm": 0.7198600172996521, + "learning_rate": 0.00032732723430196236, + "loss": 1.3657, + "step": 6576 + }, + { + "epoch": 0.6937763713080168, + "grad_norm": 0.7056398987770081, + "learning_rate": 0.0003271198645134218, + "loss": 1.4062, + "step": 6577 + }, + { + "epoch": 0.6938818565400844, + "grad_norm": 1.0025262832641602, + "learning_rate": 0.0003269125421107091, + "loss": 1.4159, + "step": 6578 + }, + { + "epoch": 0.6939873417721519, + "grad_norm": 0.8126164674758911, + "learning_rate": 0.00032670526711705536, + "loss": 1.4022, + "step": 6579 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.7955871224403381, + "learning_rate": 0.00032649803955568755, + "loss": 1.452, + "step": 6580 + }, + { + "epoch": 0.6941983122362869, + "grad_norm": 0.7631414532661438, + "learning_rate": 0.0003262908594498262, + "loss": 1.3898, + "step": 6581 + }, + { + "epoch": 0.6943037974683545, + "grad_norm": 0.8470839858055115, + "learning_rate": 0.0003260837268226868, + "loss": 1.415, + "step": 6582 + }, + { + "epoch": 0.6944092827004219, + "grad_norm": 0.830568253993988, + "learning_rate": 0.0003258766416974796, + "loss": 1.4146, + "step": 6583 + }, + { + "epoch": 0.6945147679324895, + "grad_norm": 0.8573485612869263, + "learning_rate": 0.0003256696040974097, + "loss": 1.404, + "step": 6584 + }, + { + "epoch": 0.694620253164557, + "grad_norm": 0.6824976205825806, + "learning_rate": 0.00032546261404567644, + "loss": 1.4263, + "step": 6585 + }, + { + "epoch": 0.6947257383966244, + "grad_norm": 0.7571814656257629, + "learning_rate": 0.0003252556715654743, + "loss": 1.4038, + "step": 6586 + }, + { + "epoch": 0.694831223628692, + "grad_norm": 0.678226888179779, + "learning_rate": 0.00032504877667999206, + "loss": 1.4208, + "step": 6587 + }, + { + "epoch": 0.6949367088607595, + "grad_norm": 0.7175173759460449, + "learning_rate": 0.00032484192941241316, + "loss": 1.418, + "step": 6588 + }, + { + "epoch": 0.695042194092827, + "grad_norm": 0.6958455443382263, + "learning_rate": 0.0003246351297859164, + "loss": 1.4185, + "step": 6589 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.6899562478065491, + "learning_rate": 0.00032442837782367434, + "loss": 1.4741, + "step": 6590 + }, + { + "epoch": 0.6952531645569621, + "grad_norm": 0.7834329605102539, + "learning_rate": 0.00032422167354885463, + "loss": 1.4137, + "step": 6591 + }, + { + "epoch": 0.6953586497890295, + "grad_norm": 0.719643771648407, + "learning_rate": 0.0003240150169846196, + "loss": 1.4306, + "step": 6592 + }, + { + "epoch": 0.695464135021097, + "grad_norm": 0.8246404528617859, + "learning_rate": 0.00032380840815412603, + "loss": 1.4377, + "step": 6593 + }, + { + "epoch": 0.6955696202531646, + "grad_norm": 0.6710047125816345, + "learning_rate": 0.00032360184708052554, + "loss": 1.4059, + "step": 6594 + }, + { + "epoch": 0.695675105485232, + "grad_norm": 0.7128825783729553, + "learning_rate": 0.00032339533378696424, + "loss": 1.3955, + "step": 6595 + }, + { + "epoch": 0.6957805907172996, + "grad_norm": 0.6577406525611877, + "learning_rate": 0.00032318886829658277, + "loss": 1.4457, + "step": 6596 + }, + { + "epoch": 0.6958860759493671, + "grad_norm": 0.7361050248146057, + "learning_rate": 0.0003229824506325172, + "loss": 1.4359, + "step": 6597 + }, + { + "epoch": 0.6959915611814346, + "grad_norm": 0.6869662404060364, + "learning_rate": 0.0003227760808178973, + "loss": 1.4272, + "step": 6598 + }, + { + "epoch": 0.6960970464135021, + "grad_norm": 0.7761451005935669, + "learning_rate": 0.00032256975887584783, + "loss": 1.4179, + "step": 6599 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.7322080731391907, + "learning_rate": 0.0003223634848294883, + "loss": 1.4365, + "step": 6600 + }, + { + "epoch": 0.6963080168776371, + "grad_norm": 0.7749722003936768, + "learning_rate": 0.0003221572587019327, + "loss": 1.3908, + "step": 6601 + }, + { + "epoch": 0.6964135021097047, + "grad_norm": 0.6692136526107788, + "learning_rate": 0.0003219510805162896, + "loss": 1.41, + "step": 6602 + }, + { + "epoch": 0.6965189873417722, + "grad_norm": 0.905876100063324, + "learning_rate": 0.0003217449502956624, + "loss": 1.4313, + "step": 6603 + }, + { + "epoch": 0.6966244725738396, + "grad_norm": 0.7003208994865417, + "learning_rate": 0.0003215388680631491, + "loss": 1.4244, + "step": 6604 + }, + { + "epoch": 0.6967299578059072, + "grad_norm": 0.7164966464042664, + "learning_rate": 0.00032133283384184173, + "loss": 1.4252, + "step": 6605 + }, + { + "epoch": 0.6968354430379747, + "grad_norm": 0.896405816078186, + "learning_rate": 0.00032112684765482814, + "loss": 1.384, + "step": 6606 + }, + { + "epoch": 0.6969409282700422, + "grad_norm": 0.6941912770271301, + "learning_rate": 0.00032092090952518996, + "loss": 1.4079, + "step": 6607 + }, + { + "epoch": 0.6970464135021097, + "grad_norm": 0.7128201127052307, + "learning_rate": 0.00032071501947600334, + "loss": 1.3516, + "step": 6608 + }, + { + "epoch": 0.6971518987341773, + "grad_norm": 0.8333280086517334, + "learning_rate": 0.00032050917753033935, + "loss": 1.3994, + "step": 6609 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.759625256061554, + "learning_rate": 0.00032030338371126374, + "loss": 1.4701, + "step": 6610 + }, + { + "epoch": 0.6973628691983123, + "grad_norm": 0.8195058107376099, + "learning_rate": 0.0003200976380418366, + "loss": 1.3756, + "step": 6611 + }, + { + "epoch": 0.6974683544303798, + "grad_norm": 0.7421247959136963, + "learning_rate": 0.00031989194054511276, + "loss": 1.4174, + "step": 6612 + }, + { + "epoch": 0.6975738396624472, + "grad_norm": 0.6966759562492371, + "learning_rate": 0.0003196862912441418, + "loss": 1.4602, + "step": 6613 + }, + { + "epoch": 0.6976793248945148, + "grad_norm": 0.8078312873840332, + "learning_rate": 0.0003194806901619673, + "loss": 1.4149, + "step": 6614 + }, + { + "epoch": 0.6977848101265823, + "grad_norm": 0.7694984674453735, + "learning_rate": 0.00031927513732162856, + "loss": 1.4065, + "step": 6615 + }, + { + "epoch": 0.6978902953586498, + "grad_norm": 0.784797191619873, + "learning_rate": 0.00031906963274615837, + "loss": 1.4478, + "step": 6616 + }, + { + "epoch": 0.6979957805907173, + "grad_norm": 0.8318613171577454, + "learning_rate": 0.00031886417645858475, + "loss": 1.4361, + "step": 6617 + }, + { + "epoch": 0.6981012658227848, + "grad_norm": 0.7553563714027405, + "learning_rate": 0.00031865876848192993, + "loss": 1.3969, + "step": 6618 + }, + { + "epoch": 0.6982067510548523, + "grad_norm": 0.8061240315437317, + "learning_rate": 0.000318453408839211, + "loss": 1.4123, + "step": 6619 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.7118523716926575, + "learning_rate": 0.0003182480975534395, + "loss": 1.4076, + "step": 6620 + }, + { + "epoch": 0.6984177215189873, + "grad_norm": 0.7929918169975281, + "learning_rate": 0.0003180428346476215, + "loss": 1.4106, + "step": 6621 + }, + { + "epoch": 0.6985232067510548, + "grad_norm": 0.8425456881523132, + "learning_rate": 0.0003178376201447576, + "loss": 1.4438, + "step": 6622 + }, + { + "epoch": 0.6986286919831224, + "grad_norm": 0.7583506107330322, + "learning_rate": 0.00031763245406784364, + "loss": 1.3968, + "step": 6623 + }, + { + "epoch": 0.6987341772151898, + "grad_norm": 0.8717215061187744, + "learning_rate": 0.0003174273364398691, + "loss": 1.4133, + "step": 6624 + }, + { + "epoch": 0.6988396624472574, + "grad_norm": 0.6971889138221741, + "learning_rate": 0.00031722226728381854, + "loss": 1.4148, + "step": 6625 + }, + { + "epoch": 0.6989451476793249, + "grad_norm": 0.7184058427810669, + "learning_rate": 0.00031701724662267097, + "loss": 1.3867, + "step": 6626 + }, + { + "epoch": 0.6990506329113924, + "grad_norm": 0.7890133261680603, + "learning_rate": 0.00031681227447939996, + "loss": 1.3974, + "step": 6627 + }, + { + "epoch": 0.6991561181434599, + "grad_norm": 0.7727085947990417, + "learning_rate": 0.00031660735087697363, + "loss": 1.4377, + "step": 6628 + }, + { + "epoch": 0.6992616033755275, + "grad_norm": 0.7071055769920349, + "learning_rate": 0.0003164024758383548, + "loss": 1.3951, + "step": 6629 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.8023898005485535, + "learning_rate": 0.00031619764938650057, + "loss": 1.4135, + "step": 6630 + }, + { + "epoch": 0.6994725738396624, + "grad_norm": 0.9249725937843323, + "learning_rate": 0.00031599287154436263, + "loss": 1.4204, + "step": 6631 + }, + { + "epoch": 0.69957805907173, + "grad_norm": 0.723680853843689, + "learning_rate": 0.0003157881423348879, + "loss": 1.4067, + "step": 6632 + }, + { + "epoch": 0.6996835443037974, + "grad_norm": 0.8342786431312561, + "learning_rate": 0.00031558346178101694, + "loss": 1.4034, + "step": 6633 + }, + { + "epoch": 0.699789029535865, + "grad_norm": 0.7512307167053223, + "learning_rate": 0.00031537882990568535, + "loss": 1.4354, + "step": 6634 + }, + { + "epoch": 0.6998945147679325, + "grad_norm": 0.727185845375061, + "learning_rate": 0.000315174246731823, + "loss": 1.4138, + "step": 6635 + }, + { + "epoch": 0.7, + "grad_norm": 0.6901951432228088, + "learning_rate": 0.00031496971228235464, + "loss": 1.378, + "step": 6636 + }, + { + "epoch": 0.7001054852320675, + "grad_norm": 0.7711026072502136, + "learning_rate": 0.00031476522658019916, + "loss": 1.4548, + "step": 6637 + }, + { + "epoch": 0.700210970464135, + "grad_norm": 0.7066276669502258, + "learning_rate": 0.0003145607896482704, + "loss": 1.4362, + "step": 6638 + }, + { + "epoch": 0.7003164556962025, + "grad_norm": 0.7030028700828552, + "learning_rate": 0.00031435640150947645, + "loss": 1.4034, + "step": 6639 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.7883404493331909, + "learning_rate": 0.0003141520621867197, + "loss": 1.3989, + "step": 6640 + }, + { + "epoch": 0.7005274261603376, + "grad_norm": 0.7151665091514587, + "learning_rate": 0.00031394777170289806, + "loss": 1.4202, + "step": 6641 + }, + { + "epoch": 0.700632911392405, + "grad_norm": 0.8017002940177917, + "learning_rate": 0.00031374353008090285, + "loss": 1.409, + "step": 6642 + }, + { + "epoch": 0.7007383966244726, + "grad_norm": 0.715144693851471, + "learning_rate": 0.0003135393373436206, + "loss": 1.4274, + "step": 6643 + }, + { + "epoch": 0.7008438818565401, + "grad_norm": 0.7361568212509155, + "learning_rate": 0.0003133351935139319, + "loss": 1.3835, + "step": 6644 + }, + { + "epoch": 0.7009493670886076, + "grad_norm": 0.7680429816246033, + "learning_rate": 0.00031313109861471223, + "loss": 1.3823, + "step": 6645 + }, + { + "epoch": 0.7010548523206751, + "grad_norm": 0.7477811574935913, + "learning_rate": 0.0003129270526688313, + "loss": 1.3994, + "step": 6646 + }, + { + "epoch": 0.7011603375527427, + "grad_norm": 0.7238718867301941, + "learning_rate": 0.0003127230556991536, + "loss": 1.4237, + "step": 6647 + }, + { + "epoch": 0.7012658227848101, + "grad_norm": 0.7201357483863831, + "learning_rate": 0.000312519107728538, + "loss": 1.4533, + "step": 6648 + }, + { + "epoch": 0.7013713080168776, + "grad_norm": 0.7222275137901306, + "learning_rate": 0.0003123152087798376, + "loss": 1.3683, + "step": 6649 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.7332446575164795, + "learning_rate": 0.00031211135887590074, + "loss": 1.4429, + "step": 6650 + }, + { + "epoch": 0.7015822784810126, + "grad_norm": 0.7029076814651489, + "learning_rate": 0.0003119075580395697, + "loss": 1.4002, + "step": 6651 + }, + { + "epoch": 0.7016877637130802, + "grad_norm": 0.686096727848053, + "learning_rate": 0.0003117038062936813, + "loss": 1.4335, + "step": 6652 + }, + { + "epoch": 0.7017932489451477, + "grad_norm": 0.6827272176742554, + "learning_rate": 0.0003115001036610669, + "loss": 1.3971, + "step": 6653 + }, + { + "epoch": 0.7018987341772152, + "grad_norm": 0.756051778793335, + "learning_rate": 0.0003112964501645525, + "loss": 1.4194, + "step": 6654 + }, + { + "epoch": 0.7020042194092827, + "grad_norm": 0.6585301160812378, + "learning_rate": 0.0003110928458269584, + "loss": 1.4196, + "step": 6655 + }, + { + "epoch": 0.7021097046413503, + "grad_norm": 0.7268235087394714, + "learning_rate": 0.00031088929067109945, + "loss": 1.4059, + "step": 6656 + }, + { + "epoch": 0.7022151898734177, + "grad_norm": 0.7178346514701843, + "learning_rate": 0.0003106857847197849, + "loss": 1.4139, + "step": 6657 + }, + { + "epoch": 0.7023206751054852, + "grad_norm": 0.694493293762207, + "learning_rate": 0.0003104823279958191, + "loss": 1.438, + "step": 6658 + }, + { + "epoch": 0.7024261603375528, + "grad_norm": 0.7704127430915833, + "learning_rate": 0.00031027892052200003, + "loss": 1.4243, + "step": 6659 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.6719833016395569, + "learning_rate": 0.0003100755623211205, + "loss": 1.381, + "step": 6660 + }, + { + "epoch": 0.7026371308016878, + "grad_norm": 0.838003396987915, + "learning_rate": 0.000309872253415968, + "loss": 1.3904, + "step": 6661 + }, + { + "epoch": 0.7027426160337553, + "grad_norm": 0.7091001868247986, + "learning_rate": 0.00030966899382932404, + "loss": 1.4187, + "step": 6662 + }, + { + "epoch": 0.7028481012658228, + "grad_norm": 0.677253246307373, + "learning_rate": 0.0003094657835839651, + "loss": 1.4148, + "step": 6663 + }, + { + "epoch": 0.7029535864978903, + "grad_norm": 0.8410184979438782, + "learning_rate": 0.00030926262270266177, + "loss": 1.3864, + "step": 6664 + }, + { + "epoch": 0.7030590717299579, + "grad_norm": 0.7406084537506104, + "learning_rate": 0.00030905951120817934, + "loss": 1.4074, + "step": 6665 + }, + { + "epoch": 0.7031645569620253, + "grad_norm": 0.7487931251525879, + "learning_rate": 0.00030885644912327713, + "loss": 1.4164, + "step": 6666 + }, + { + "epoch": 0.7032700421940928, + "grad_norm": 0.6957740187644958, + "learning_rate": 0.0003086534364707097, + "loss": 1.4169, + "step": 6667 + }, + { + "epoch": 0.7033755274261604, + "grad_norm": 0.7063069939613342, + "learning_rate": 0.00030845047327322556, + "loss": 1.4033, + "step": 6668 + }, + { + "epoch": 0.7034810126582278, + "grad_norm": 0.7365018725395203, + "learning_rate": 0.0003082475595535677, + "loss": 1.446, + "step": 6669 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.7223359942436218, + "learning_rate": 0.0003080446953344735, + "loss": 1.4287, + "step": 6670 + }, + { + "epoch": 0.7036919831223629, + "grad_norm": 0.7310599088668823, + "learning_rate": 0.000307841880638675, + "loss": 1.4486, + "step": 6671 + }, + { + "epoch": 0.7037974683544304, + "grad_norm": 0.6811070442199707, + "learning_rate": 0.0003076391154888985, + "loss": 1.4113, + "step": 6672 + }, + { + "epoch": 0.7039029535864979, + "grad_norm": 0.7482221126556396, + "learning_rate": 0.000307436399907865, + "loss": 1.4334, + "step": 6673 + }, + { + "epoch": 0.7040084388185655, + "grad_norm": 0.790470540523529, + "learning_rate": 0.00030723373391828966, + "loss": 1.4513, + "step": 6674 + }, + { + "epoch": 0.7041139240506329, + "grad_norm": 0.6865370869636536, + "learning_rate": 0.00030703111754288204, + "loss": 1.3844, + "step": 6675 + }, + { + "epoch": 0.7042194092827004, + "grad_norm": 0.7086933851242065, + "learning_rate": 0.0003068285508043467, + "loss": 1.4206, + "step": 6676 + }, + { + "epoch": 0.704324894514768, + "grad_norm": 0.710120439529419, + "learning_rate": 0.00030662603372538224, + "loss": 1.4165, + "step": 6677 + }, + { + "epoch": 0.7044303797468354, + "grad_norm": 0.6771200299263, + "learning_rate": 0.0003064235663286815, + "loss": 1.424, + "step": 6678 + }, + { + "epoch": 0.704535864978903, + "grad_norm": 0.7941837310791016, + "learning_rate": 0.00030622114863693205, + "loss": 1.3632, + "step": 6679 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.7402331233024597, + "learning_rate": 0.00030601878067281575, + "loss": 1.3933, + "step": 6680 + }, + { + "epoch": 0.704746835443038, + "grad_norm": 0.9413955211639404, + "learning_rate": 0.00030581646245900895, + "loss": 1.4126, + "step": 6681 + }, + { + "epoch": 0.7048523206751055, + "grad_norm": 0.7214118242263794, + "learning_rate": 0.0003056141940181825, + "loss": 1.417, + "step": 6682 + }, + { + "epoch": 0.7049578059071729, + "grad_norm": 0.9300150871276855, + "learning_rate": 0.0003054119753730012, + "loss": 1.3872, + "step": 6683 + }, + { + "epoch": 0.7050632911392405, + "grad_norm": 0.8379713296890259, + "learning_rate": 0.00030520980654612527, + "loss": 1.3809, + "step": 6684 + }, + { + "epoch": 0.705168776371308, + "grad_norm": 0.7822762131690979, + "learning_rate": 0.0003050076875602084, + "loss": 1.4226, + "step": 6685 + }, + { + "epoch": 0.7052742616033755, + "grad_norm": 0.8065719604492188, + "learning_rate": 0.0003048056184378991, + "loss": 1.4176, + "step": 6686 + }, + { + "epoch": 0.705379746835443, + "grad_norm": 0.7859869003295898, + "learning_rate": 0.0003046035992018402, + "loss": 1.4168, + "step": 6687 + }, + { + "epoch": 0.7054852320675106, + "grad_norm": 0.68796706199646, + "learning_rate": 0.00030440162987466896, + "loss": 1.3929, + "step": 6688 + }, + { + "epoch": 0.705590717299578, + "grad_norm": 0.8030683398246765, + "learning_rate": 0.00030419971047901704, + "loss": 1.4412, + "step": 6689 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.8544976115226746, + "learning_rate": 0.00030399784103751044, + "loss": 1.3948, + "step": 6690 + }, + { + "epoch": 0.7058016877637131, + "grad_norm": 0.7049795985221863, + "learning_rate": 0.0003037960215727699, + "loss": 1.4148, + "step": 6691 + }, + { + "epoch": 0.7059071729957805, + "grad_norm": 0.7032355666160583, + "learning_rate": 0.0003035942521074097, + "loss": 1.4151, + "step": 6692 + }, + { + "epoch": 0.7060126582278481, + "grad_norm": 0.7980014085769653, + "learning_rate": 0.0003033925326640398, + "loss": 1.4262, + "step": 6693 + }, + { + "epoch": 0.7061181434599156, + "grad_norm": 0.7147760987281799, + "learning_rate": 0.00030319086326526364, + "loss": 1.4168, + "step": 6694 + }, + { + "epoch": 0.7062236286919831, + "grad_norm": 0.746067464351654, + "learning_rate": 0.00030298924393367923, + "loss": 1.4059, + "step": 6695 + }, + { + "epoch": 0.7063291139240506, + "grad_norm": 0.6881378889083862, + "learning_rate": 0.0003027876746918791, + "loss": 1.4182, + "step": 6696 + }, + { + "epoch": 0.7064345991561182, + "grad_norm": 0.7269294261932373, + "learning_rate": 0.00030258615556244995, + "loss": 1.3969, + "step": 6697 + }, + { + "epoch": 0.7065400843881856, + "grad_norm": 0.6908167004585266, + "learning_rate": 0.0003023846865679731, + "loss": 1.4351, + "step": 6698 + }, + { + "epoch": 0.7066455696202532, + "grad_norm": 0.7264042496681213, + "learning_rate": 0.00030218326773102407, + "loss": 1.3639, + "step": 6699 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.7531015872955322, + "learning_rate": 0.000301981899074173, + "loss": 1.4098, + "step": 6700 + }, + { + "epoch": 0.7068565400843881, + "grad_norm": 0.6546299457550049, + "learning_rate": 0.00030178058061998387, + "loss": 1.4425, + "step": 6701 + }, + { + "epoch": 0.7069620253164557, + "grad_norm": 0.8251559734344482, + "learning_rate": 0.00030157931239101595, + "loss": 1.4316, + "step": 6702 + }, + { + "epoch": 0.7070675105485232, + "grad_norm": 0.7142723202705383, + "learning_rate": 0.00030137809440982207, + "loss": 1.3964, + "step": 6703 + }, + { + "epoch": 0.7071729957805907, + "grad_norm": 0.7353797554969788, + "learning_rate": 0.0003011769266989498, + "loss": 1.3988, + "step": 6704 + }, + { + "epoch": 0.7072784810126582, + "grad_norm": 0.7999634742736816, + "learning_rate": 0.0003009758092809409, + "loss": 1.4327, + "step": 6705 + }, + { + "epoch": 0.7073839662447258, + "grad_norm": 0.7408666610717773, + "learning_rate": 0.00030077474217833167, + "loss": 1.4035, + "step": 6706 + }, + { + "epoch": 0.7074894514767932, + "grad_norm": 0.8345036506652832, + "learning_rate": 0.0003005737254136525, + "loss": 1.4235, + "step": 6707 + }, + { + "epoch": 0.7075949367088608, + "grad_norm": 0.8856201767921448, + "learning_rate": 0.0003003727590094285, + "loss": 1.3913, + "step": 6708 + }, + { + "epoch": 0.7077004219409283, + "grad_norm": 0.8209749460220337, + "learning_rate": 0.00030017184298817873, + "loss": 1.4228, + "step": 6709 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.8115068674087524, + "learning_rate": 0.0002999709773724171, + "loss": 1.3872, + "step": 6710 + }, + { + "epoch": 0.7079113924050633, + "grad_norm": 0.7419852614402771, + "learning_rate": 0.00029977016218465154, + "loss": 1.3857, + "step": 6711 + }, + { + "epoch": 0.7080168776371308, + "grad_norm": 0.9966089725494385, + "learning_rate": 0.0002995693974473844, + "loss": 1.4188, + "step": 6712 + }, + { + "epoch": 0.7081223628691983, + "grad_norm": 0.7522166967391968, + "learning_rate": 0.00029936868318311235, + "loss": 1.4038, + "step": 6713 + }, + { + "epoch": 0.7082278481012658, + "grad_norm": 0.7038177251815796, + "learning_rate": 0.00029916801941432637, + "loss": 1.4109, + "step": 6714 + }, + { + "epoch": 0.7083333333333334, + "grad_norm": 0.9413915872573853, + "learning_rate": 0.00029896740616351187, + "loss": 1.357, + "step": 6715 + }, + { + "epoch": 0.7084388185654008, + "grad_norm": 0.7514850497245789, + "learning_rate": 0.00029876684345314853, + "loss": 1.3785, + "step": 6716 + }, + { + "epoch": 0.7085443037974684, + "grad_norm": 1.0000659227371216, + "learning_rate": 0.00029856633130571046, + "loss": 1.4401, + "step": 6717 + }, + { + "epoch": 0.7086497890295359, + "grad_norm": 0.9499351382255554, + "learning_rate": 0.00029836586974366574, + "loss": 1.3891, + "step": 6718 + }, + { + "epoch": 0.7087552742616033, + "grad_norm": 0.8463512659072876, + "learning_rate": 0.00029816545878947763, + "loss": 1.4122, + "step": 6719 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.9214828014373779, + "learning_rate": 0.00029796509846560294, + "loss": 1.4286, + "step": 6720 + }, + { + "epoch": 0.7089662447257384, + "grad_norm": 0.7067133188247681, + "learning_rate": 0.00029776478879449305, + "loss": 1.4025, + "step": 6721 + }, + { + "epoch": 0.7090717299578059, + "grad_norm": 0.8234004974365234, + "learning_rate": 0.0002975645297985935, + "loss": 1.4304, + "step": 6722 + }, + { + "epoch": 0.7091772151898734, + "grad_norm": 0.6876112222671509, + "learning_rate": 0.0002973643215003445, + "loss": 1.4079, + "step": 6723 + }, + { + "epoch": 0.709282700421941, + "grad_norm": 0.7622956037521362, + "learning_rate": 0.0002971641639221804, + "loss": 1.4032, + "step": 6724 + }, + { + "epoch": 0.7093881856540084, + "grad_norm": 0.893990695476532, + "learning_rate": 0.00029696405708652966, + "loss": 1.4055, + "step": 6725 + }, + { + "epoch": 0.709493670886076, + "grad_norm": 0.7188781499862671, + "learning_rate": 0.00029676400101581545, + "loss": 1.4175, + "step": 6726 + }, + { + "epoch": 0.7095991561181435, + "grad_norm": 0.8215598464012146, + "learning_rate": 0.0002965639957324546, + "loss": 1.4059, + "step": 6727 + }, + { + "epoch": 0.7097046413502109, + "grad_norm": 0.7519844174385071, + "learning_rate": 0.00029636404125885936, + "loss": 1.4407, + "step": 6728 + }, + { + "epoch": 0.7098101265822785, + "grad_norm": 0.7314040064811707, + "learning_rate": 0.00029616413761743537, + "loss": 1.4068, + "step": 6729 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.784332811832428, + "learning_rate": 0.0002959642848305828, + "loss": 1.4013, + "step": 6730 + }, + { + "epoch": 0.7100210970464135, + "grad_norm": 0.7244210243225098, + "learning_rate": 0.0002957644829206961, + "loss": 1.4317, + "step": 6731 + }, + { + "epoch": 0.710126582278481, + "grad_norm": 0.7625210285186768, + "learning_rate": 0.0002955647319101641, + "loss": 1.4074, + "step": 6732 + }, + { + "epoch": 0.7102320675105486, + "grad_norm": 0.7611334323883057, + "learning_rate": 0.00029536503182137, + "loss": 1.4147, + "step": 6733 + }, + { + "epoch": 0.710337552742616, + "grad_norm": 0.8231474757194519, + "learning_rate": 0.00029516538267669096, + "loss": 1.3832, + "step": 6734 + }, + { + "epoch": 0.7104430379746836, + "grad_norm": 0.7632891535758972, + "learning_rate": 0.00029496578449849867, + "loss": 1.423, + "step": 6735 + }, + { + "epoch": 0.7105485232067511, + "grad_norm": 0.7791309356689453, + "learning_rate": 0.00029476623730915943, + "loss": 1.4247, + "step": 6736 + }, + { + "epoch": 0.7106540084388185, + "grad_norm": 0.7675945162773132, + "learning_rate": 0.00029456674113103335, + "loss": 1.3909, + "step": 6737 + }, + { + "epoch": 0.7107594936708861, + "grad_norm": 0.8215928077697754, + "learning_rate": 0.00029436729598647483, + "loss": 1.4258, + "step": 6738 + }, + { + "epoch": 0.7108649789029536, + "grad_norm": 0.8349428176879883, + "learning_rate": 0.00029416790189783286, + "loss": 1.4446, + "step": 6739 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.9235941767692566, + "learning_rate": 0.00029396855888745045, + "loss": 1.4086, + "step": 6740 + }, + { + "epoch": 0.7110759493670886, + "grad_norm": 0.785880982875824, + "learning_rate": 0.00029376926697766495, + "loss": 1.4198, + "step": 6741 + }, + { + "epoch": 0.7111814345991562, + "grad_norm": 0.8610718250274658, + "learning_rate": 0.00029357002619080814, + "loss": 1.4163, + "step": 6742 + }, + { + "epoch": 0.7112869198312236, + "grad_norm": 0.7248905897140503, + "learning_rate": 0.0002933708365492058, + "loss": 1.3948, + "step": 6743 + }, + { + "epoch": 0.7113924050632912, + "grad_norm": 0.7996690273284912, + "learning_rate": 0.00029317169807517785, + "loss": 1.3817, + "step": 6744 + }, + { + "epoch": 0.7114978902953587, + "grad_norm": 0.7058922052383423, + "learning_rate": 0.00029297261079103945, + "loss": 1.4394, + "step": 6745 + }, + { + "epoch": 0.7116033755274261, + "grad_norm": 0.6465176939964294, + "learning_rate": 0.000292773574719099, + "loss": 1.4284, + "step": 6746 + }, + { + "epoch": 0.7117088607594937, + "grad_norm": 0.8563470244407654, + "learning_rate": 0.0002925745898816594, + "loss": 1.4156, + "step": 6747 + }, + { + "epoch": 0.7118143459915611, + "grad_norm": 0.6952831149101257, + "learning_rate": 0.0002923756563010179, + "loss": 1.4071, + "step": 6748 + }, + { + "epoch": 0.7119198312236287, + "grad_norm": 0.8573631048202515, + "learning_rate": 0.000292176773999466, + "loss": 1.4461, + "step": 6749 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.7349979281425476, + "learning_rate": 0.0002919779429992895, + "loss": 1.4037, + "step": 6750 + }, + { + "epoch": 0.7121308016877637, + "grad_norm": 0.6582909226417542, + "learning_rate": 0.0002917791633227685, + "loss": 1.3864, + "step": 6751 + }, + { + "epoch": 0.7122362869198312, + "grad_norm": 0.800318717956543, + "learning_rate": 0.000291580434992177, + "loss": 1.4052, + "step": 6752 + }, + { + "epoch": 0.7123417721518988, + "grad_norm": 0.7417111396789551, + "learning_rate": 0.00029138175802978343, + "loss": 1.4062, + "step": 6753 + }, + { + "epoch": 0.7124472573839662, + "grad_norm": 0.733092725276947, + "learning_rate": 0.00029118313245785104, + "loss": 1.4592, + "step": 6754 + }, + { + "epoch": 0.7125527426160337, + "grad_norm": 0.8403455018997192, + "learning_rate": 0.00029098455829863653, + "loss": 1.4083, + "step": 6755 + }, + { + "epoch": 0.7126582278481013, + "grad_norm": 0.8964159488677979, + "learning_rate": 0.0002907860355743911, + "loss": 1.377, + "step": 6756 + }, + { + "epoch": 0.7127637130801687, + "grad_norm": 0.7852303385734558, + "learning_rate": 0.00029058756430736025, + "loss": 1.4238, + "step": 6757 + }, + { + "epoch": 0.7128691983122363, + "grad_norm": 0.995603084564209, + "learning_rate": 0.0002903891445197836, + "loss": 1.4331, + "step": 6758 + }, + { + "epoch": 0.7129746835443038, + "grad_norm": 0.7494598031044006, + "learning_rate": 0.0002901907762338952, + "loss": 1.4231, + "step": 6759 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.9593385457992554, + "learning_rate": 0.0002899924594719231, + "loss": 1.4019, + "step": 6760 + }, + { + "epoch": 0.7131856540084388, + "grad_norm": 0.8846142888069153, + "learning_rate": 0.0002897941942560894, + "loss": 1.4246, + "step": 6761 + }, + { + "epoch": 0.7132911392405064, + "grad_norm": 0.8600628972053528, + "learning_rate": 0.0002895959806086114, + "loss": 1.3965, + "step": 6762 + }, + { + "epoch": 0.7133966244725738, + "grad_norm": 0.9302670955657959, + "learning_rate": 0.0002893978185516995, + "loss": 1.4251, + "step": 6763 + }, + { + "epoch": 0.7135021097046413, + "grad_norm": 0.7622833251953125, + "learning_rate": 0.00028919970810755883, + "loss": 1.4174, + "step": 6764 + }, + { + "epoch": 0.7136075949367089, + "grad_norm": 0.8271130323410034, + "learning_rate": 0.0002890016492983886, + "loss": 1.4366, + "step": 6765 + }, + { + "epoch": 0.7137130801687763, + "grad_norm": 0.799453616142273, + "learning_rate": 0.0002888036421463823, + "loss": 1.3799, + "step": 6766 + }, + { + "epoch": 0.7138185654008439, + "grad_norm": 0.7412325143814087, + "learning_rate": 0.0002886056866737277, + "loss": 1.3978, + "step": 6767 + }, + { + "epoch": 0.7139240506329114, + "grad_norm": 0.7008804678916931, + "learning_rate": 0.0002884077829026066, + "loss": 1.4147, + "step": 6768 + }, + { + "epoch": 0.7140295358649789, + "grad_norm": 0.7547533512115479, + "learning_rate": 0.0002882099308551951, + "loss": 1.4275, + "step": 6769 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.7562646865844727, + "learning_rate": 0.00028801213055366335, + "loss": 1.4203, + "step": 6770 + }, + { + "epoch": 0.714240506329114, + "grad_norm": 0.7916061282157898, + "learning_rate": 0.00028781438202017613, + "loss": 1.4668, + "step": 6771 + }, + { + "epoch": 0.7143459915611814, + "grad_norm": 0.7594568729400635, + "learning_rate": 0.0002876166852768923, + "loss": 1.4145, + "step": 6772 + }, + { + "epoch": 0.7144514767932489, + "grad_norm": 0.7285019755363464, + "learning_rate": 0.0002874190403459644, + "loss": 1.4314, + "step": 6773 + }, + { + "epoch": 0.7145569620253165, + "grad_norm": 0.6952726244926453, + "learning_rate": 0.0002872214472495397, + "loss": 1.3636, + "step": 6774 + }, + { + "epoch": 0.7146624472573839, + "grad_norm": 0.8192839026451111, + "learning_rate": 0.00028702390600975937, + "loss": 1.4009, + "step": 6775 + }, + { + "epoch": 0.7147679324894515, + "grad_norm": 0.7181259393692017, + "learning_rate": 0.0002868264166487591, + "loss": 1.3818, + "step": 6776 + }, + { + "epoch": 0.714873417721519, + "grad_norm": 0.7700967192649841, + "learning_rate": 0.0002866289791886684, + "loss": 1.398, + "step": 6777 + }, + { + "epoch": 0.7149789029535865, + "grad_norm": 0.7069383859634399, + "learning_rate": 0.00028643159365161113, + "loss": 1.4232, + "step": 6778 + }, + { + "epoch": 0.715084388185654, + "grad_norm": 0.8885515332221985, + "learning_rate": 0.00028623426005970517, + "loss": 1.4054, + "step": 6779 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.7068488001823425, + "learning_rate": 0.00028603697843506315, + "loss": 1.4185, + "step": 6780 + }, + { + "epoch": 0.715295358649789, + "grad_norm": 0.7542856335639954, + "learning_rate": 0.00028583974879979113, + "loss": 1.4463, + "step": 6781 + }, + { + "epoch": 0.7154008438818565, + "grad_norm": 0.727407693862915, + "learning_rate": 0.00028564257117598993, + "loss": 1.4072, + "step": 6782 + }, + { + "epoch": 0.7155063291139241, + "grad_norm": 0.8147586584091187, + "learning_rate": 0.00028544544558575395, + "loss": 1.394, + "step": 6783 + }, + { + "epoch": 0.7156118143459915, + "grad_norm": 0.9121218323707581, + "learning_rate": 0.0002852483720511724, + "loss": 1.4454, + "step": 6784 + }, + { + "epoch": 0.7157172995780591, + "grad_norm": 0.6873136758804321, + "learning_rate": 0.0002850513505943281, + "loss": 1.4297, + "step": 6785 + }, + { + "epoch": 0.7158227848101266, + "grad_norm": 1.0557284355163574, + "learning_rate": 0.0002848543812372986, + "loss": 1.3813, + "step": 6786 + }, + { + "epoch": 0.7159282700421941, + "grad_norm": 0.851100742816925, + "learning_rate": 0.00028465746400215463, + "loss": 1.3968, + "step": 6787 + }, + { + "epoch": 0.7160337552742616, + "grad_norm": 0.945026695728302, + "learning_rate": 0.00028446059891096265, + "loss": 1.4204, + "step": 6788 + }, + { + "epoch": 0.7161392405063292, + "grad_norm": 0.8884594440460205, + "learning_rate": 0.00028426378598578187, + "loss": 1.4004, + "step": 6789 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.766464114189148, + "learning_rate": 0.0002840670252486662, + "loss": 1.4569, + "step": 6790 + }, + { + "epoch": 0.7163502109704641, + "grad_norm": 0.9202120304107666, + "learning_rate": 0.00028387031672166385, + "loss": 1.4089, + "step": 6791 + }, + { + "epoch": 0.7164556962025317, + "grad_norm": 0.8498384356498718, + "learning_rate": 0.0002836736604268167, + "loss": 1.4141, + "step": 6792 + }, + { + "epoch": 0.7165611814345991, + "grad_norm": 0.7804853916168213, + "learning_rate": 0.0002834770563861613, + "loss": 1.4278, + "step": 6793 + }, + { + "epoch": 0.7166666666666667, + "grad_norm": 1.237866759300232, + "learning_rate": 0.000283280504621728, + "loss": 1.4001, + "step": 6794 + }, + { + "epoch": 0.7167721518987342, + "grad_norm": 0.7220629453659058, + "learning_rate": 0.0002830840051555414, + "loss": 1.4075, + "step": 6795 + }, + { + "epoch": 0.7168776371308017, + "grad_norm": 0.8004711866378784, + "learning_rate": 0.00028288755800962, + "loss": 1.3616, + "step": 6796 + }, + { + "epoch": 0.7169831223628692, + "grad_norm": 0.7508109211921692, + "learning_rate": 0.00028269116320597733, + "loss": 1.4043, + "step": 6797 + }, + { + "epoch": 0.7170886075949368, + "grad_norm": 0.7490410804748535, + "learning_rate": 0.0002824948207666199, + "loss": 1.4146, + "step": 6798 + }, + { + "epoch": 0.7171940928270042, + "grad_norm": 0.7263420820236206, + "learning_rate": 0.0002822985307135491, + "loss": 1.4372, + "step": 6799 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.745394766330719, + "learning_rate": 0.00028210229306876, + "loss": 1.3774, + "step": 6800 + }, + { + "epoch": 0.7174050632911393, + "grad_norm": 0.7555010318756104, + "learning_rate": 0.0002819061078542422, + "loss": 1.4474, + "step": 6801 + }, + { + "epoch": 0.7175105485232067, + "grad_norm": 0.7743978500366211, + "learning_rate": 0.0002817099750919791, + "loss": 1.3924, + "step": 6802 + }, + { + "epoch": 0.7176160337552743, + "grad_norm": 0.75373375415802, + "learning_rate": 0.0002815138948039485, + "loss": 1.4203, + "step": 6803 + }, + { + "epoch": 0.7177215189873418, + "grad_norm": 0.7720244526863098, + "learning_rate": 0.000281317867012122, + "loss": 1.3666, + "step": 6804 + }, + { + "epoch": 0.7178270042194093, + "grad_norm": 0.7460476160049438, + "learning_rate": 0.0002811218917384652, + "loss": 1.4419, + "step": 6805 + }, + { + "epoch": 0.7179324894514768, + "grad_norm": 0.6978552937507629, + "learning_rate": 0.00028092596900493885, + "loss": 1.424, + "step": 6806 + }, + { + "epoch": 0.7180379746835444, + "grad_norm": 0.7015257477760315, + "learning_rate": 0.00028073009883349665, + "loss": 1.3943, + "step": 6807 + }, + { + "epoch": 0.7181434599156118, + "grad_norm": 0.7902041673660278, + "learning_rate": 0.00028053428124608684, + "loss": 1.4286, + "step": 6808 + }, + { + "epoch": 0.7182489451476793, + "grad_norm": 0.7067888975143433, + "learning_rate": 0.0002803385162646518, + "loss": 1.4162, + "step": 6809 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.8761206865310669, + "learning_rate": 0.0002801428039111279, + "loss": 1.3433, + "step": 6810 + }, + { + "epoch": 0.7184599156118143, + "grad_norm": 0.6964515447616577, + "learning_rate": 0.0002799471442074459, + "loss": 1.439, + "step": 6811 + }, + { + "epoch": 0.7185654008438819, + "grad_norm": 0.7692927122116089, + "learning_rate": 0.00027975153717553014, + "loss": 1.4245, + "step": 6812 + }, + { + "epoch": 0.7186708860759494, + "grad_norm": 0.8517347574234009, + "learning_rate": 0.00027955598283729936, + "loss": 1.4164, + "step": 6813 + }, + { + "epoch": 0.7187763713080169, + "grad_norm": 0.7240038514137268, + "learning_rate": 0.00027936048121466673, + "loss": 1.3926, + "step": 6814 + }, + { + "epoch": 0.7188818565400844, + "grad_norm": 0.7979797124862671, + "learning_rate": 0.00027916503232953895, + "loss": 1.4207, + "step": 6815 + }, + { + "epoch": 0.7189873417721518, + "grad_norm": 0.8592051267623901, + "learning_rate": 0.0002789696362038172, + "loss": 1.4136, + "step": 6816 + }, + { + "epoch": 0.7190928270042194, + "grad_norm": 0.6888195276260376, + "learning_rate": 0.0002787742928593965, + "loss": 1.3992, + "step": 6817 + }, + { + "epoch": 0.7191983122362869, + "grad_norm": 0.7440544962882996, + "learning_rate": 0.00027857900231816594, + "loss": 1.3988, + "step": 6818 + }, + { + "epoch": 0.7193037974683544, + "grad_norm": 0.8858808279037476, + "learning_rate": 0.0002783837646020089, + "loss": 1.4034, + "step": 6819 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.7406858205795288, + "learning_rate": 0.00027818857973280274, + "loss": 1.4187, + "step": 6820 + }, + { + "epoch": 0.7195147679324895, + "grad_norm": 0.8054628968238831, + "learning_rate": 0.0002779934477324189, + "loss": 1.4092, + "step": 6821 + }, + { + "epoch": 0.7196202531645569, + "grad_norm": 0.8029532432556152, + "learning_rate": 0.0002777983686227226, + "loss": 1.4613, + "step": 6822 + }, + { + "epoch": 0.7197257383966245, + "grad_norm": 0.8609996438026428, + "learning_rate": 0.00027760334242557397, + "loss": 1.3601, + "step": 6823 + }, + { + "epoch": 0.719831223628692, + "grad_norm": 0.7453466653823853, + "learning_rate": 0.00027740836916282643, + "loss": 1.4099, + "step": 6824 + }, + { + "epoch": 0.7199367088607594, + "grad_norm": 0.6938377618789673, + "learning_rate": 0.00027721344885632765, + "loss": 1.4042, + "step": 6825 + }, + { + "epoch": 0.720042194092827, + "grad_norm": 0.8555868864059448, + "learning_rate": 0.0002770185815279195, + "loss": 1.3855, + "step": 6826 + }, + { + "epoch": 0.7201476793248945, + "grad_norm": 0.7587953805923462, + "learning_rate": 0.0002768237671994377, + "loss": 1.437, + "step": 6827 + }, + { + "epoch": 0.720253164556962, + "grad_norm": 0.8141512870788574, + "learning_rate": 0.0002766290058927123, + "loss": 1.4034, + "step": 6828 + }, + { + "epoch": 0.7203586497890295, + "grad_norm": 0.7782889008522034, + "learning_rate": 0.0002764342976295673, + "loss": 1.3925, + "step": 6829 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 1.0104038715362549, + "learning_rate": 0.0002762396424318206, + "loss": 1.3962, + "step": 6830 + }, + { + "epoch": 0.7205696202531645, + "grad_norm": 0.7653900980949402, + "learning_rate": 0.000276045040321284, + "loss": 1.4164, + "step": 6831 + }, + { + "epoch": 0.7206751054852321, + "grad_norm": 0.8451839685440063, + "learning_rate": 0.0002758504913197644, + "loss": 1.3835, + "step": 6832 + }, + { + "epoch": 0.7207805907172996, + "grad_norm": 0.8101283311843872, + "learning_rate": 0.0002756559954490615, + "loss": 1.4, + "step": 6833 + }, + { + "epoch": 0.720886075949367, + "grad_norm": 0.7153660655021667, + "learning_rate": 0.0002754615527309696, + "loss": 1.3906, + "step": 6834 + }, + { + "epoch": 0.7209915611814346, + "grad_norm": 0.7359811663627625, + "learning_rate": 0.000275267163187277, + "loss": 1.3979, + "step": 6835 + }, + { + "epoch": 0.7210970464135021, + "grad_norm": 0.8614129424095154, + "learning_rate": 0.00027507282683976594, + "loss": 1.3946, + "step": 6836 + }, + { + "epoch": 0.7212025316455696, + "grad_norm": 0.7025471925735474, + "learning_rate": 0.0002748785437102129, + "loss": 1.395, + "step": 6837 + }, + { + "epoch": 0.7213080168776371, + "grad_norm": 0.6930109858512878, + "learning_rate": 0.00027468431382038816, + "loss": 1.4208, + "step": 6838 + }, + { + "epoch": 0.7214135021097047, + "grad_norm": 0.6966862082481384, + "learning_rate": 0.00027449013719205623, + "loss": 1.3807, + "step": 6839 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.8546093702316284, + "learning_rate": 0.00027429601384697526, + "loss": 1.3836, + "step": 6840 + }, + { + "epoch": 0.7216244725738397, + "grad_norm": 0.6924305558204651, + "learning_rate": 0.00027410194380689826, + "loss": 1.3885, + "step": 6841 + }, + { + "epoch": 0.7217299578059072, + "grad_norm": 0.956778347492218, + "learning_rate": 0.00027390792709357155, + "loss": 1.4055, + "step": 6842 + }, + { + "epoch": 0.7218354430379746, + "grad_norm": 0.7395838499069214, + "learning_rate": 0.00027371396372873557, + "loss": 1.4001, + "step": 6843 + }, + { + "epoch": 0.7219409282700422, + "grad_norm": 1.0141737461090088, + "learning_rate": 0.00027352005373412487, + "loss": 1.4213, + "step": 6844 + }, + { + "epoch": 0.7220464135021097, + "grad_norm": 0.853965699672699, + "learning_rate": 0.00027332619713146816, + "loss": 1.4152, + "step": 6845 + }, + { + "epoch": 0.7221518987341772, + "grad_norm": 0.8965448141098022, + "learning_rate": 0.000273132393942488, + "loss": 1.4234, + "step": 6846 + }, + { + "epoch": 0.7222573839662447, + "grad_norm": 0.9213038086891174, + "learning_rate": 0.000272938644188901, + "loss": 1.4183, + "step": 6847 + }, + { + "epoch": 0.7223628691983123, + "grad_norm": 0.7120789885520935, + "learning_rate": 0.00027274494789241766, + "loss": 1.3889, + "step": 6848 + }, + { + "epoch": 0.7224683544303797, + "grad_norm": 0.8239436745643616, + "learning_rate": 0.00027255130507474276, + "loss": 1.4198, + "step": 6849 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.7275452613830566, + "learning_rate": 0.00027235771575757466, + "loss": 1.3785, + "step": 6850 + }, + { + "epoch": 0.7226793248945148, + "grad_norm": 0.7344847917556763, + "learning_rate": 0.00027216417996260654, + "loss": 1.4188, + "step": 6851 + }, + { + "epoch": 0.7227848101265822, + "grad_norm": 0.7208225131034851, + "learning_rate": 0.00027197069771152464, + "loss": 1.4158, + "step": 6852 + }, + { + "epoch": 0.7228902953586498, + "grad_norm": 0.7500246167182922, + "learning_rate": 0.0002717772690260098, + "loss": 1.3926, + "step": 6853 + }, + { + "epoch": 0.7229957805907173, + "grad_norm": 0.8395860195159912, + "learning_rate": 0.0002715838939277366, + "loss": 1.4089, + "step": 6854 + }, + { + "epoch": 0.7231012658227848, + "grad_norm": 0.7029407620429993, + "learning_rate": 0.0002713905724383737, + "loss": 1.3606, + "step": 6855 + }, + { + "epoch": 0.7232067510548523, + "grad_norm": 0.7702172994613647, + "learning_rate": 0.00027119730457958376, + "loss": 1.3961, + "step": 6856 + }, + { + "epoch": 0.7233122362869199, + "grad_norm": 0.7377851605415344, + "learning_rate": 0.0002710040903730233, + "loss": 1.3641, + "step": 6857 + }, + { + "epoch": 0.7234177215189873, + "grad_norm": 0.7781023979187012, + "learning_rate": 0.00027081092984034303, + "loss": 1.4342, + "step": 6858 + }, + { + "epoch": 0.7235232067510549, + "grad_norm": 0.807590663433075, + "learning_rate": 0.00027061782300318726, + "loss": 1.435, + "step": 6859 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.7132800221443176, + "learning_rate": 0.0002704247698831951, + "loss": 1.3857, + "step": 6860 + }, + { + "epoch": 0.7237341772151898, + "grad_norm": 0.7500719428062439, + "learning_rate": 0.00027023177050199885, + "loss": 1.3682, + "step": 6861 + }, + { + "epoch": 0.7238396624472574, + "grad_norm": 0.735255777835846, + "learning_rate": 0.00027003882488122507, + "loss": 1.4457, + "step": 6862 + }, + { + "epoch": 0.7239451476793249, + "grad_norm": 0.734009325504303, + "learning_rate": 0.0002698459330424942, + "loss": 1.4119, + "step": 6863 + }, + { + "epoch": 0.7240506329113924, + "grad_norm": 0.7622190117835999, + "learning_rate": 0.0002696530950074208, + "loss": 1.4211, + "step": 6864 + }, + { + "epoch": 0.7241561181434599, + "grad_norm": 0.7094702124595642, + "learning_rate": 0.00026946031079761346, + "loss": 1.4007, + "step": 6865 + }, + { + "epoch": 0.7242616033755275, + "grad_norm": 0.6994034647941589, + "learning_rate": 0.00026926758043467435, + "loss": 1.4016, + "step": 6866 + }, + { + "epoch": 0.7243670886075949, + "grad_norm": 0.7286331653594971, + "learning_rate": 0.00026907490394020004, + "loss": 1.3881, + "step": 6867 + }, + { + "epoch": 0.7244725738396625, + "grad_norm": 0.7113932371139526, + "learning_rate": 0.00026888228133578086, + "loss": 1.4006, + "step": 6868 + }, + { + "epoch": 0.72457805907173, + "grad_norm": 0.80919349193573, + "learning_rate": 0.0002686897126430009, + "loss": 1.3863, + "step": 6869 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.7006799578666687, + "learning_rate": 0.0002684971978834389, + "loss": 1.3801, + "step": 6870 + }, + { + "epoch": 0.724789029535865, + "grad_norm": 0.7504919171333313, + "learning_rate": 0.00026830473707866684, + "loss": 1.4097, + "step": 6871 + }, + { + "epoch": 0.7248945147679325, + "grad_norm": 0.7219193577766418, + "learning_rate": 0.00026811233025025096, + "loss": 1.4103, + "step": 6872 + }, + { + "epoch": 0.725, + "grad_norm": 0.6725665926933289, + "learning_rate": 0.00026791997741975134, + "loss": 1.3972, + "step": 6873 + }, + { + "epoch": 0.7251054852320675, + "grad_norm": 0.7639097571372986, + "learning_rate": 0.00026772767860872216, + "loss": 1.4058, + "step": 6874 + }, + { + "epoch": 0.7252109704641351, + "grad_norm": 0.7337458729743958, + "learning_rate": 0.00026753543383871143, + "loss": 1.3961, + "step": 6875 + }, + { + "epoch": 0.7253164556962025, + "grad_norm": 0.6728100776672363, + "learning_rate": 0.0002673432431312611, + "loss": 1.3995, + "step": 6876 + }, + { + "epoch": 0.7254219409282701, + "grad_norm": 0.691261351108551, + "learning_rate": 0.0002671511065079071, + "loss": 1.3946, + "step": 6877 + }, + { + "epoch": 0.7255274261603376, + "grad_norm": 0.7884491682052612, + "learning_rate": 0.00026695902399017935, + "loss": 1.4032, + "step": 6878 + }, + { + "epoch": 0.725632911392405, + "grad_norm": 0.828609049320221, + "learning_rate": 0.00026676699559960145, + "loss": 1.441, + "step": 6879 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.7186872363090515, + "learning_rate": 0.0002665750213576914, + "loss": 1.403, + "step": 6880 + }, + { + "epoch": 0.72584388185654, + "grad_norm": 0.6776237487792969, + "learning_rate": 0.0002663831012859609, + "loss": 1.3981, + "step": 6881 + }, + { + "epoch": 0.7259493670886076, + "grad_norm": 0.7040878534317017, + "learning_rate": 0.0002661912354059154, + "loss": 1.4032, + "step": 6882 + }, + { + "epoch": 0.7260548523206751, + "grad_norm": 0.8125771880149841, + "learning_rate": 0.0002659994237390545, + "loss": 1.4045, + "step": 6883 + }, + { + "epoch": 0.7261603375527426, + "grad_norm": 0.7722179889678955, + "learning_rate": 0.0002658076663068715, + "loss": 1.3911, + "step": 6884 + }, + { + "epoch": 0.7262658227848101, + "grad_norm": 0.8042052984237671, + "learning_rate": 0.00026561596313085396, + "loss": 1.4225, + "step": 6885 + }, + { + "epoch": 0.7263713080168777, + "grad_norm": 0.729915976524353, + "learning_rate": 0.00026542431423248313, + "loss": 1.3771, + "step": 6886 + }, + { + "epoch": 0.7264767932489451, + "grad_norm": 0.9628373384475708, + "learning_rate": 0.00026523271963323414, + "loss": 1.4073, + "step": 6887 + }, + { + "epoch": 0.7265822784810126, + "grad_norm": 0.7433755397796631, + "learning_rate": 0.0002650411793545763, + "loss": 1.4114, + "step": 6888 + }, + { + "epoch": 0.7266877637130802, + "grad_norm": 0.7927623987197876, + "learning_rate": 0.00026484969341797224, + "loss": 1.3747, + "step": 6889 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.7794502377510071, + "learning_rate": 0.0002646582618448794, + "loss": 1.37, + "step": 6890 + }, + { + "epoch": 0.7268987341772152, + "grad_norm": 0.7765015363693237, + "learning_rate": 0.00026446688465674845, + "loss": 1.4173, + "step": 6891 + }, + { + "epoch": 0.7270042194092827, + "grad_norm": 0.7755011320114136, + "learning_rate": 0.0002642755618750242, + "loss": 1.426, + "step": 6892 + }, + { + "epoch": 0.7271097046413502, + "grad_norm": 0.7023531198501587, + "learning_rate": 0.0002640842935211453, + "loss": 1.4077, + "step": 6893 + }, + { + "epoch": 0.7272151898734177, + "grad_norm": 0.7919066548347473, + "learning_rate": 0.0002638930796165443, + "loss": 1.376, + "step": 6894 + }, + { + "epoch": 0.7273206751054853, + "grad_norm": 0.7059270739555359, + "learning_rate": 0.00026370192018264766, + "loss": 1.3713, + "step": 6895 + }, + { + "epoch": 0.7274261603375527, + "grad_norm": 0.7132629156112671, + "learning_rate": 0.00026351081524087573, + "loss": 1.3891, + "step": 6896 + }, + { + "epoch": 0.7275316455696202, + "grad_norm": 0.8053942322731018, + "learning_rate": 0.0002633197648126429, + "loss": 1.4105, + "step": 6897 + }, + { + "epoch": 0.7276371308016878, + "grad_norm": 0.7041761875152588, + "learning_rate": 0.0002631287689193571, + "loss": 1.4104, + "step": 6898 + }, + { + "epoch": 0.7277426160337552, + "grad_norm": 0.7620981335639954, + "learning_rate": 0.0002629378275824204, + "loss": 1.3909, + "step": 6899 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.7757359147071838, + "learning_rate": 0.00026274694082322896, + "loss": 1.3898, + "step": 6900 + }, + { + "epoch": 0.7279535864978903, + "grad_norm": 0.7890090942382812, + "learning_rate": 0.00026255610866317253, + "loss": 1.428, + "step": 6901 + }, + { + "epoch": 0.7280590717299578, + "grad_norm": 0.825495719909668, + "learning_rate": 0.0002623653311236347, + "loss": 1.4134, + "step": 6902 + }, + { + "epoch": 0.7281645569620253, + "grad_norm": 0.8954551219940186, + "learning_rate": 0.0002621746082259931, + "loss": 1.4338, + "step": 6903 + }, + { + "epoch": 0.7282700421940929, + "grad_norm": 0.7875639796257019, + "learning_rate": 0.0002619839399916192, + "loss": 1.3998, + "step": 6904 + }, + { + "epoch": 0.7283755274261603, + "grad_norm": 0.9622467756271362, + "learning_rate": 0.0002617933264418782, + "loss": 1.4104, + "step": 6905 + }, + { + "epoch": 0.7284810126582278, + "grad_norm": 0.7130706310272217, + "learning_rate": 0.00026160276759812953, + "loss": 1.4302, + "step": 6906 + }, + { + "epoch": 0.7285864978902954, + "grad_norm": 0.8698712587356567, + "learning_rate": 0.00026141226348172595, + "loss": 1.4255, + "step": 6907 + }, + { + "epoch": 0.7286919831223628, + "grad_norm": 0.9178017377853394, + "learning_rate": 0.00026122181411401444, + "loss": 1.4218, + "step": 6908 + }, + { + "epoch": 0.7287974683544304, + "grad_norm": 0.7979358434677124, + "learning_rate": 0.00026103141951633617, + "loss": 1.4681, + "step": 6909 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.9311311841011047, + "learning_rate": 0.0002608410797100255, + "loss": 1.4057, + "step": 6910 + }, + { + "epoch": 0.7290084388185654, + "grad_norm": 0.7249614000320435, + "learning_rate": 0.000260650794716411, + "loss": 1.4245, + "step": 6911 + }, + { + "epoch": 0.7291139240506329, + "grad_norm": 0.8988996148109436, + "learning_rate": 0.00026046056455681515, + "loss": 1.3755, + "step": 6912 + }, + { + "epoch": 0.7292194092827005, + "grad_norm": 0.8302013278007507, + "learning_rate": 0.00026027038925255407, + "loss": 1.4119, + "step": 6913 + }, + { + "epoch": 0.7293248945147679, + "grad_norm": 0.6792076826095581, + "learning_rate": 0.00026008026882493783, + "loss": 1.3856, + "step": 6914 + }, + { + "epoch": 0.7294303797468354, + "grad_norm": 0.99598228931427, + "learning_rate": 0.00025989020329527057, + "loss": 1.4006, + "step": 6915 + }, + { + "epoch": 0.729535864978903, + "grad_norm": 0.7677018046379089, + "learning_rate": 0.0002597001926848498, + "loss": 1.3947, + "step": 6916 + }, + { + "epoch": 0.7296413502109704, + "grad_norm": 0.7739041447639465, + "learning_rate": 0.00025951023701496713, + "loss": 1.4212, + "step": 6917 + }, + { + "epoch": 0.729746835443038, + "grad_norm": 0.7265740036964417, + "learning_rate": 0.0002593203363069084, + "loss": 1.4137, + "step": 6918 + }, + { + "epoch": 0.7298523206751055, + "grad_norm": 0.6919379830360413, + "learning_rate": 0.00025913049058195277, + "loss": 1.4018, + "step": 6919 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.680683434009552, + "learning_rate": 0.0002589406998613733, + "loss": 1.4007, + "step": 6920 + }, + { + "epoch": 0.7300632911392405, + "grad_norm": 0.6990373730659485, + "learning_rate": 0.0002587509641664372, + "loss": 1.4243, + "step": 6921 + }, + { + "epoch": 0.7301687763713081, + "grad_norm": 0.7280877232551575, + "learning_rate": 0.0002585612835184051, + "loss": 1.3885, + "step": 6922 + }, + { + "epoch": 0.7302742616033755, + "grad_norm": 0.6616265177726746, + "learning_rate": 0.00025837165793853164, + "loss": 1.4097, + "step": 6923 + }, + { + "epoch": 0.730379746835443, + "grad_norm": 0.6833961009979248, + "learning_rate": 0.0002581820874480654, + "loss": 1.416, + "step": 6924 + }, + { + "epoch": 0.7304852320675106, + "grad_norm": 0.6649190187454224, + "learning_rate": 0.0002579925720682487, + "loss": 1.3956, + "step": 6925 + }, + { + "epoch": 0.730590717299578, + "grad_norm": 0.6958440542221069, + "learning_rate": 0.0002578031118203174, + "loss": 1.423, + "step": 6926 + }, + { + "epoch": 0.7306962025316456, + "grad_norm": 0.7731822729110718, + "learning_rate": 0.00025761370672550203, + "loss": 1.395, + "step": 6927 + }, + { + "epoch": 0.7308016877637131, + "grad_norm": 0.6895481944084167, + "learning_rate": 0.0002574243568050261, + "loss": 1.3938, + "step": 6928 + }, + { + "epoch": 0.7309071729957806, + "grad_norm": 0.7336916923522949, + "learning_rate": 0.0002572350620801072, + "loss": 1.4021, + "step": 6929 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.7810378074645996, + "learning_rate": 0.0002570458225719567, + "loss": 1.4421, + "step": 6930 + }, + { + "epoch": 0.7311181434599157, + "grad_norm": 0.7799192667007446, + "learning_rate": 0.0002568566383017799, + "loss": 1.4189, + "step": 6931 + }, + { + "epoch": 0.7312236286919831, + "grad_norm": 0.7241182327270508, + "learning_rate": 0.0002566675092907757, + "loss": 1.3837, + "step": 6932 + }, + { + "epoch": 0.7313291139240506, + "grad_norm": 0.7879086136817932, + "learning_rate": 0.0002564784355601372, + "loss": 1.4411, + "step": 6933 + }, + { + "epoch": 0.7314345991561182, + "grad_norm": 0.7996188998222351, + "learning_rate": 0.0002562894171310508, + "loss": 1.4022, + "step": 6934 + }, + { + "epoch": 0.7315400843881856, + "grad_norm": 0.7657592296600342, + "learning_rate": 0.00025610045402469695, + "loss": 1.4571, + "step": 6935 + }, + { + "epoch": 0.7316455696202532, + "grad_norm": 0.8429516553878784, + "learning_rate": 0.0002559115462622503, + "loss": 1.3523, + "step": 6936 + }, + { + "epoch": 0.7317510548523207, + "grad_norm": 0.8321728706359863, + "learning_rate": 0.00025572269386487853, + "loss": 1.3846, + "step": 6937 + }, + { + "epoch": 0.7318565400843882, + "grad_norm": 0.705014169216156, + "learning_rate": 0.0002555338968537436, + "loss": 1.4167, + "step": 6938 + }, + { + "epoch": 0.7319620253164557, + "grad_norm": 0.8158211708068848, + "learning_rate": 0.0002553451552500012, + "loss": 1.422, + "step": 6939 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.7208105325698853, + "learning_rate": 0.00025515646907480074, + "loss": 1.4375, + "step": 6940 + }, + { + "epoch": 0.7321729957805907, + "grad_norm": 0.7029818296432495, + "learning_rate": 0.0002549678383492854, + "loss": 1.4276, + "step": 6941 + }, + { + "epoch": 0.7322784810126582, + "grad_norm": 0.6903982162475586, + "learning_rate": 0.00025477926309459224, + "loss": 1.3807, + "step": 6942 + }, + { + "epoch": 0.7323839662447258, + "grad_norm": 0.7329031825065613, + "learning_rate": 0.00025459074333185176, + "loss": 1.3931, + "step": 6943 + }, + { + "epoch": 0.7324894514767932, + "grad_norm": 0.7130163908004761, + "learning_rate": 0.0002544022790821891, + "loss": 1.3985, + "step": 6944 + }, + { + "epoch": 0.7325949367088608, + "grad_norm": 0.8259326815605164, + "learning_rate": 0.0002542138703667224, + "loss": 1.4064, + "step": 6945 + }, + { + "epoch": 0.7327004219409282, + "grad_norm": 0.7340685129165649, + "learning_rate": 0.00025402551720656366, + "loss": 1.3864, + "step": 6946 + }, + { + "epoch": 0.7328059071729958, + "grad_norm": 0.804579496383667, + "learning_rate": 0.0002538372196228189, + "loss": 1.4024, + "step": 6947 + }, + { + "epoch": 0.7329113924050633, + "grad_norm": 0.7195679545402527, + "learning_rate": 0.00025364897763658777, + "loss": 1.4346, + "step": 6948 + }, + { + "epoch": 0.7330168776371307, + "grad_norm": 0.7034432291984558, + "learning_rate": 0.0002534607912689637, + "loss": 1.4413, + "step": 6949 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.6920897364616394, + "learning_rate": 0.00025327266054103395, + "loss": 1.4008, + "step": 6950 + }, + { + "epoch": 0.7332278481012658, + "grad_norm": 0.6977738738059998, + "learning_rate": 0.0002530845854738796, + "loss": 1.4031, + "step": 6951 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.7083777189254761, + "learning_rate": 0.0002528965660885749, + "loss": 1.3944, + "step": 6952 + }, + { + "epoch": 0.7334388185654008, + "grad_norm": 0.7648510932922363, + "learning_rate": 0.00025270860240618904, + "loss": 1.3928, + "step": 6953 + }, + { + "epoch": 0.7335443037974684, + "grad_norm": 0.7591115236282349, + "learning_rate": 0.000252520694447784, + "loss": 1.403, + "step": 6954 + }, + { + "epoch": 0.7336497890295358, + "grad_norm": 0.894423246383667, + "learning_rate": 0.0002523328422344158, + "loss": 1.4, + "step": 6955 + }, + { + "epoch": 0.7337552742616034, + "grad_norm": 0.7113230228424072, + "learning_rate": 0.0002521450457871343, + "loss": 1.3797, + "step": 6956 + }, + { + "epoch": 0.7338607594936709, + "grad_norm": 0.7435194849967957, + "learning_rate": 0.0002519573051269828, + "loss": 1.4572, + "step": 6957 + }, + { + "epoch": 0.7339662447257383, + "grad_norm": 0.7140328884124756, + "learning_rate": 0.0002517696202749988, + "loss": 1.3713, + "step": 6958 + }, + { + "epoch": 0.7340717299578059, + "grad_norm": 0.6732792854309082, + "learning_rate": 0.00025158199125221325, + "loss": 1.3661, + "step": 6959 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.7003325819969177, + "learning_rate": 0.0002513944180796509, + "loss": 1.4111, + "step": 6960 + }, + { + "epoch": 0.7342827004219409, + "grad_norm": 0.7483459711074829, + "learning_rate": 0.0002512069007783301, + "loss": 1.4439, + "step": 6961 + }, + { + "epoch": 0.7343881856540084, + "grad_norm": 0.6926790475845337, + "learning_rate": 0.00025101943936926347, + "loss": 1.3771, + "step": 6962 + }, + { + "epoch": 0.734493670886076, + "grad_norm": 0.7507555484771729, + "learning_rate": 0.0002508320338734568, + "loss": 1.3466, + "step": 6963 + }, + { + "epoch": 0.7345991561181434, + "grad_norm": 0.7595475912094116, + "learning_rate": 0.00025064468431190977, + "loss": 1.4114, + "step": 6964 + }, + { + "epoch": 0.734704641350211, + "grad_norm": 0.7147613763809204, + "learning_rate": 0.0002504573907056159, + "loss": 1.3848, + "step": 6965 + }, + { + "epoch": 0.7348101265822785, + "grad_norm": 0.7396936416625977, + "learning_rate": 0.00025027015307556234, + "loss": 1.4016, + "step": 6966 + }, + { + "epoch": 0.734915611814346, + "grad_norm": 0.6604900360107422, + "learning_rate": 0.00025008297144273, + "loss": 1.371, + "step": 6967 + }, + { + "epoch": 0.7350210970464135, + "grad_norm": 0.7069298028945923, + "learning_rate": 0.0002498958458280936, + "loss": 1.3983, + "step": 6968 + }, + { + "epoch": 0.735126582278481, + "grad_norm": 0.7200443148612976, + "learning_rate": 0.0002497087762526211, + "loss": 1.3936, + "step": 6969 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.7330124378204346, + "learning_rate": 0.0002495217627372752, + "loss": 1.3986, + "step": 6970 + }, + { + "epoch": 0.735337552742616, + "grad_norm": 0.7243316173553467, + "learning_rate": 0.0002493348053030113, + "loss": 1.3831, + "step": 6971 + }, + { + "epoch": 0.7354430379746836, + "grad_norm": 0.6800959706306458, + "learning_rate": 0.0002491479039707791, + "loss": 1.3925, + "step": 6972 + }, + { + "epoch": 0.735548523206751, + "grad_norm": 0.7110341787338257, + "learning_rate": 0.00024896105876152165, + "loss": 1.3857, + "step": 6973 + }, + { + "epoch": 0.7356540084388186, + "grad_norm": 0.6653382182121277, + "learning_rate": 0.0002487742696961761, + "loss": 1.3843, + "step": 6974 + }, + { + "epoch": 0.7357594936708861, + "grad_norm": 0.7916168570518494, + "learning_rate": 0.0002485875367956729, + "loss": 1.4312, + "step": 6975 + }, + { + "epoch": 0.7358649789029535, + "grad_norm": 0.6754764914512634, + "learning_rate": 0.00024840086008093645, + "loss": 1.3729, + "step": 6976 + }, + { + "epoch": 0.7359704641350211, + "grad_norm": 0.6619775891304016, + "learning_rate": 0.0002482142395728848, + "loss": 1.4295, + "step": 6977 + }, + { + "epoch": 0.7360759493670886, + "grad_norm": 0.7209532856941223, + "learning_rate": 0.0002480276752924295, + "loss": 1.4038, + "step": 6978 + }, + { + "epoch": 0.7361814345991561, + "grad_norm": 0.6828001141548157, + "learning_rate": 0.0002478411672604766, + "loss": 1.404, + "step": 6979 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.67121422290802, + "learning_rate": 0.0002476547154979248, + "loss": 1.3795, + "step": 6980 + }, + { + "epoch": 0.7363924050632912, + "grad_norm": 0.7286157011985779, + "learning_rate": 0.00024746832002566703, + "loss": 1.4092, + "step": 6981 + }, + { + "epoch": 0.7364978902953586, + "grad_norm": 0.7195632457733154, + "learning_rate": 0.0002472819808645899, + "loss": 1.3839, + "step": 6982 + }, + { + "epoch": 0.7366033755274262, + "grad_norm": 0.6920239329338074, + "learning_rate": 0.0002470956980355735, + "loss": 1.3688, + "step": 6983 + }, + { + "epoch": 0.7367088607594937, + "grad_norm": 0.7105042338371277, + "learning_rate": 0.00024690947155949194, + "loss": 1.4219, + "step": 6984 + }, + { + "epoch": 0.7368143459915611, + "grad_norm": 0.7434129118919373, + "learning_rate": 0.0002467233014572127, + "loss": 1.4129, + "step": 6985 + }, + { + "epoch": 0.7369198312236287, + "grad_norm": 0.6627576947212219, + "learning_rate": 0.00024653718774959713, + "loss": 1.3682, + "step": 6986 + }, + { + "epoch": 0.7370253164556962, + "grad_norm": 0.7320904731750488, + "learning_rate": 0.00024635113045749985, + "loss": 1.369, + "step": 6987 + }, + { + "epoch": 0.7371308016877637, + "grad_norm": 0.7134013175964355, + "learning_rate": 0.00024616512960177014, + "loss": 1.4239, + "step": 6988 + }, + { + "epoch": 0.7372362869198312, + "grad_norm": 0.8673959970474243, + "learning_rate": 0.00024597918520324994, + "loss": 1.4475, + "step": 6989 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.7171036601066589, + "learning_rate": 0.00024579329728277534, + "loss": 1.3731, + "step": 6990 + }, + { + "epoch": 0.7374472573839662, + "grad_norm": 0.6925102472305298, + "learning_rate": 0.00024560746586117603, + "loss": 1.425, + "step": 6991 + }, + { + "epoch": 0.7375527426160338, + "grad_norm": 0.7774814367294312, + "learning_rate": 0.00024542169095927526, + "loss": 1.4482, + "step": 6992 + }, + { + "epoch": 0.7376582278481013, + "grad_norm": 0.7147067785263062, + "learning_rate": 0.00024523597259789004, + "loss": 1.3815, + "step": 6993 + }, + { + "epoch": 0.7377637130801687, + "grad_norm": 0.6825783252716064, + "learning_rate": 0.0002450503107978311, + "loss": 1.4009, + "step": 6994 + }, + { + "epoch": 0.7378691983122363, + "grad_norm": 0.6790881752967834, + "learning_rate": 0.00024486470557990247, + "loss": 1.4061, + "step": 6995 + }, + { + "epoch": 0.7379746835443038, + "grad_norm": 0.6762011051177979, + "learning_rate": 0.0002446791569649027, + "loss": 1.411, + "step": 6996 + }, + { + "epoch": 0.7380801687763713, + "grad_norm": 0.684247612953186, + "learning_rate": 0.0002444936649736232, + "loss": 1.3932, + "step": 6997 + }, + { + "epoch": 0.7381856540084388, + "grad_norm": 0.6745227575302124, + "learning_rate": 0.00024430822962684905, + "loss": 1.3857, + "step": 6998 + }, + { + "epoch": 0.7382911392405064, + "grad_norm": 0.7669612169265747, + "learning_rate": 0.00024412285094535952, + "loss": 1.4349, + "step": 6999 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.7244818806648254, + "learning_rate": 0.00024393752894992708, + "loss": 1.3949, + "step": 7000 + }, + { + "epoch": 0.7385021097046414, + "grad_norm": 0.8499237298965454, + "learning_rate": 0.00024375226366131787, + "loss": 1.38, + "step": 7001 + }, + { + "epoch": 0.7386075949367089, + "grad_norm": 0.6939482688903809, + "learning_rate": 0.00024356705510029196, + "loss": 1.3989, + "step": 7002 + }, + { + "epoch": 0.7387130801687763, + "grad_norm": 0.7842468619346619, + "learning_rate": 0.00024338190328760282, + "loss": 1.3974, + "step": 7003 + }, + { + "epoch": 0.7388185654008439, + "grad_norm": 0.7568947672843933, + "learning_rate": 0.00024319680824399736, + "loss": 1.3675, + "step": 7004 + }, + { + "epoch": 0.7389240506329114, + "grad_norm": 0.7357829213142395, + "learning_rate": 0.00024301176999021702, + "loss": 1.3535, + "step": 7005 + }, + { + "epoch": 0.7390295358649789, + "grad_norm": 0.8926505446434021, + "learning_rate": 0.00024282678854699592, + "loss": 1.3723, + "step": 7006 + }, + { + "epoch": 0.7391350210970464, + "grad_norm": 0.7280212640762329, + "learning_rate": 0.00024264186393506206, + "loss": 1.3902, + "step": 7007 + }, + { + "epoch": 0.739240506329114, + "grad_norm": 0.7843228578567505, + "learning_rate": 0.00024245699617513733, + "loss": 1.3568, + "step": 7008 + }, + { + "epoch": 0.7393459915611814, + "grad_norm": 0.8826673030853271, + "learning_rate": 0.00024227218528793696, + "loss": 1.3938, + "step": 7009 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.6721569299697876, + "learning_rate": 0.00024208743129417004, + "loss": 1.3903, + "step": 7010 + }, + { + "epoch": 0.7395569620253165, + "grad_norm": 1.0119202136993408, + "learning_rate": 0.00024190273421453913, + "loss": 1.4142, + "step": 7011 + }, + { + "epoch": 0.739662447257384, + "grad_norm": 0.9260473847389221, + "learning_rate": 0.00024171809406974047, + "loss": 1.3769, + "step": 7012 + }, + { + "epoch": 0.7397679324894515, + "grad_norm": 0.8194217085838318, + "learning_rate": 0.0002415335108804636, + "loss": 1.4026, + "step": 7013 + }, + { + "epoch": 0.7398734177215189, + "grad_norm": 1.137622594833374, + "learning_rate": 0.0002413489846673925, + "loss": 1.3806, + "step": 7014 + }, + { + "epoch": 0.7399789029535865, + "grad_norm": 0.6993823051452637, + "learning_rate": 0.0002411645154512041, + "loss": 1.4116, + "step": 7015 + }, + { + "epoch": 0.740084388185654, + "grad_norm": 0.8618630170822144, + "learning_rate": 0.00024098010325256897, + "loss": 1.4135, + "step": 7016 + }, + { + "epoch": 0.7401898734177215, + "grad_norm": 0.8233925104141235, + "learning_rate": 0.00024079574809215149, + "loss": 1.3925, + "step": 7017 + }, + { + "epoch": 0.740295358649789, + "grad_norm": 0.7408778667449951, + "learning_rate": 0.00024061144999060956, + "loss": 1.4177, + "step": 7018 + }, + { + "epoch": 0.7404008438818566, + "grad_norm": 0.7786838412284851, + "learning_rate": 0.00024042720896859471, + "loss": 1.3652, + "step": 7019 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 1.0805575847625732, + "learning_rate": 0.00024024302504675206, + "loss": 1.4088, + "step": 7020 + }, + { + "epoch": 0.7406118143459915, + "grad_norm": 0.711574375629425, + "learning_rate": 0.00024005889824572004, + "loss": 1.4056, + "step": 7021 + }, + { + "epoch": 0.7407172995780591, + "grad_norm": 0.8563660979270935, + "learning_rate": 0.00023987482858613154, + "loss": 1.4036, + "step": 7022 + }, + { + "epoch": 0.7408227848101265, + "grad_norm": 0.8277345299720764, + "learning_rate": 0.0002396908160886123, + "loss": 1.3602, + "step": 7023 + }, + { + "epoch": 0.7409282700421941, + "grad_norm": 0.7055728435516357, + "learning_rate": 0.0002395068607737816, + "loss": 1.3745, + "step": 7024 + }, + { + "epoch": 0.7410337552742616, + "grad_norm": 0.7846738696098328, + "learning_rate": 0.0002393229626622528, + "loss": 1.3739, + "step": 7025 + }, + { + "epoch": 0.7411392405063291, + "grad_norm": 0.7636492252349854, + "learning_rate": 0.00023913912177463248, + "loss": 1.3988, + "step": 7026 + }, + { + "epoch": 0.7412447257383966, + "grad_norm": 0.7643949389457703, + "learning_rate": 0.0002389553381315209, + "loss": 1.4451, + "step": 7027 + }, + { + "epoch": 0.7413502109704642, + "grad_norm": 0.8623961210250854, + "learning_rate": 0.00023877161175351206, + "loss": 1.4359, + "step": 7028 + }, + { + "epoch": 0.7414556962025316, + "grad_norm": 0.7011708617210388, + "learning_rate": 0.00023858794266119323, + "loss": 1.4348, + "step": 7029 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.785719633102417, + "learning_rate": 0.0002384043308751454, + "loss": 1.3872, + "step": 7030 + }, + { + "epoch": 0.7416666666666667, + "grad_norm": 0.7953957915306091, + "learning_rate": 0.0002382207764159436, + "loss": 1.3926, + "step": 7031 + }, + { + "epoch": 0.7417721518987341, + "grad_norm": 0.7193179726600647, + "learning_rate": 0.00023803727930415568, + "loss": 1.4329, + "step": 7032 + }, + { + "epoch": 0.7418776371308017, + "grad_norm": 0.838819146156311, + "learning_rate": 0.00023785383956034353, + "loss": 1.4294, + "step": 7033 + }, + { + "epoch": 0.7419831223628692, + "grad_norm": 0.6928951144218445, + "learning_rate": 0.00023767045720506243, + "loss": 1.3971, + "step": 7034 + }, + { + "epoch": 0.7420886075949367, + "grad_norm": 0.8383245468139648, + "learning_rate": 0.00023748713225886137, + "loss": 1.4211, + "step": 7035 + }, + { + "epoch": 0.7421940928270042, + "grad_norm": 0.7500584125518799, + "learning_rate": 0.0002373038647422827, + "loss": 1.3909, + "step": 7036 + }, + { + "epoch": 0.7422995780590718, + "grad_norm": 0.7862129211425781, + "learning_rate": 0.00023712065467586252, + "loss": 1.3999, + "step": 7037 + }, + { + "epoch": 0.7424050632911392, + "grad_norm": 0.7134594321250916, + "learning_rate": 0.00023693750208013045, + "loss": 1.4109, + "step": 7038 + }, + { + "epoch": 0.7425105485232067, + "grad_norm": 0.7368189096450806, + "learning_rate": 0.00023675440697560943, + "loss": 1.4338, + "step": 7039 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.877496063709259, + "learning_rate": 0.00023657136938281653, + "loss": 1.3692, + "step": 7040 + }, + { + "epoch": 0.7427215189873417, + "grad_norm": 0.7442905306816101, + "learning_rate": 0.00023638838932226196, + "loss": 1.3637, + "step": 7041 + }, + { + "epoch": 0.7428270042194093, + "grad_norm": 0.7480473518371582, + "learning_rate": 0.00023620546681444942, + "loss": 1.4584, + "step": 7042 + }, + { + "epoch": 0.7429324894514768, + "grad_norm": 0.7130622267723083, + "learning_rate": 0.00023602260187987635, + "loss": 1.4223, + "step": 7043 + }, + { + "epoch": 0.7430379746835443, + "grad_norm": 0.7010446190834045, + "learning_rate": 0.0002358397945390336, + "loss": 1.3769, + "step": 7044 + }, + { + "epoch": 0.7431434599156118, + "grad_norm": 0.6605304479598999, + "learning_rate": 0.0002356570448124058, + "loss": 1.4056, + "step": 7045 + }, + { + "epoch": 0.7432489451476794, + "grad_norm": 0.7263056635856628, + "learning_rate": 0.00023547435272047083, + "loss": 1.3946, + "step": 7046 + }, + { + "epoch": 0.7433544303797468, + "grad_norm": 0.7414980530738831, + "learning_rate": 0.00023529171828370033, + "loss": 1.3688, + "step": 7047 + }, + { + "epoch": 0.7434599156118143, + "grad_norm": 0.7145434617996216, + "learning_rate": 0.0002351091415225591, + "loss": 1.3923, + "step": 7048 + }, + { + "epoch": 0.7435654008438819, + "grad_norm": 0.7331005930900574, + "learning_rate": 0.0002349266224575063, + "loss": 1.3926, + "step": 7049 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.723391592502594, + "learning_rate": 0.00023474416110899377, + "loss": 1.3607, + "step": 7050 + }, + { + "epoch": 0.7437763713080169, + "grad_norm": 0.736882209777832, + "learning_rate": 0.00023456175749746736, + "loss": 1.4038, + "step": 7051 + }, + { + "epoch": 0.7438818565400844, + "grad_norm": 0.7316796779632568, + "learning_rate": 0.0002343794116433662, + "loss": 1.3695, + "step": 7052 + }, + { + "epoch": 0.7439873417721519, + "grad_norm": 0.7058619260787964, + "learning_rate": 0.00023419712356712307, + "loss": 1.3569, + "step": 7053 + }, + { + "epoch": 0.7440928270042194, + "grad_norm": 0.683116614818573, + "learning_rate": 0.00023401489328916432, + "loss": 1.3893, + "step": 7054 + }, + { + "epoch": 0.744198312236287, + "grad_norm": 0.7230297327041626, + "learning_rate": 0.00023383272082990963, + "loss": 1.4294, + "step": 7055 + }, + { + "epoch": 0.7443037974683544, + "grad_norm": 0.8196006417274475, + "learning_rate": 0.00023365060620977223, + "loss": 1.4313, + "step": 7056 + }, + { + "epoch": 0.744409282700422, + "grad_norm": 0.6673588156700134, + "learning_rate": 0.00023346854944915937, + "loss": 1.3828, + "step": 7057 + }, + { + "epoch": 0.7445147679324895, + "grad_norm": 0.7547497749328613, + "learning_rate": 0.00023328655056847124, + "loss": 1.4263, + "step": 7058 + }, + { + "epoch": 0.7446202531645569, + "grad_norm": 0.760692834854126, + "learning_rate": 0.0002331046095881017, + "loss": 1.4011, + "step": 7059 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.7282455563545227, + "learning_rate": 0.00023292272652843807, + "loss": 1.4029, + "step": 7060 + }, + { + "epoch": 0.744831223628692, + "grad_norm": 0.6976463794708252, + "learning_rate": 0.00023274090140986138, + "loss": 1.4585, + "step": 7061 + }, + { + "epoch": 0.7449367088607595, + "grad_norm": 0.6874116659164429, + "learning_rate": 0.00023255913425274588, + "loss": 1.382, + "step": 7062 + }, + { + "epoch": 0.745042194092827, + "grad_norm": 0.6893929243087769, + "learning_rate": 0.00023237742507745964, + "loss": 1.3767, + "step": 7063 + }, + { + "epoch": 0.7451476793248946, + "grad_norm": 0.7663530707359314, + "learning_rate": 0.00023219577390436397, + "loss": 1.4043, + "step": 7064 + }, + { + "epoch": 0.745253164556962, + "grad_norm": 0.6927251219749451, + "learning_rate": 0.00023201418075381364, + "loss": 1.39, + "step": 7065 + }, + { + "epoch": 0.7453586497890295, + "grad_norm": 0.6777464747428894, + "learning_rate": 0.00023183264564615756, + "loss": 1.402, + "step": 7066 + }, + { + "epoch": 0.7454641350210971, + "grad_norm": 0.8078547716140747, + "learning_rate": 0.00023165116860173726, + "loss": 1.4085, + "step": 7067 + }, + { + "epoch": 0.7455696202531645, + "grad_norm": 0.6973373293876648, + "learning_rate": 0.00023146974964088825, + "loss": 1.4198, + "step": 7068 + }, + { + "epoch": 0.7456751054852321, + "grad_norm": 0.7484402060508728, + "learning_rate": 0.00023128838878393946, + "loss": 1.417, + "step": 7069 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.6591492295265198, + "learning_rate": 0.00023110708605121317, + "loss": 1.3972, + "step": 7070 + }, + { + "epoch": 0.7458860759493671, + "grad_norm": 0.7508923411369324, + "learning_rate": 0.00023092584146302539, + "loss": 1.4213, + "step": 7071 + }, + { + "epoch": 0.7459915611814346, + "grad_norm": 0.6955106854438782, + "learning_rate": 0.0002307446550396854, + "loss": 1.4105, + "step": 7072 + }, + { + "epoch": 0.7460970464135022, + "grad_norm": 0.7054818868637085, + "learning_rate": 0.0002305635268014961, + "loss": 1.397, + "step": 7073 + }, + { + "epoch": 0.7462025316455696, + "grad_norm": 0.6856507062911987, + "learning_rate": 0.0002303824567687534, + "loss": 1.3895, + "step": 7074 + }, + { + "epoch": 0.7463080168776371, + "grad_norm": 0.6917305588722229, + "learning_rate": 0.00023020144496174781, + "loss": 1.3755, + "step": 7075 + }, + { + "epoch": 0.7464135021097047, + "grad_norm": 0.6832747459411621, + "learning_rate": 0.0002300204914007622, + "loss": 1.376, + "step": 7076 + }, + { + "epoch": 0.7465189873417721, + "grad_norm": 0.6498675346374512, + "learning_rate": 0.00022983959610607338, + "loss": 1.3705, + "step": 7077 + }, + { + "epoch": 0.7466244725738397, + "grad_norm": 0.695845365524292, + "learning_rate": 0.00022965875909795164, + "loss": 1.4052, + "step": 7078 + }, + { + "epoch": 0.7467299578059071, + "grad_norm": 0.7200582027435303, + "learning_rate": 0.00022947798039666051, + "loss": 1.4182, + "step": 7079 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.7085361480712891, + "learning_rate": 0.00022929726002245728, + "loss": 1.4198, + "step": 7080 + }, + { + "epoch": 0.7469409282700422, + "grad_norm": 0.7896950840950012, + "learning_rate": 0.00022911659799559254, + "loss": 1.3826, + "step": 7081 + }, + { + "epoch": 0.7470464135021097, + "grad_norm": 0.756096363067627, + "learning_rate": 0.00022893599433631014, + "loss": 1.4249, + "step": 7082 + }, + { + "epoch": 0.7471518987341772, + "grad_norm": 0.6804926991462708, + "learning_rate": 0.00022875544906484797, + "loss": 1.4153, + "step": 7083 + }, + { + "epoch": 0.7472573839662447, + "grad_norm": 0.7813074588775635, + "learning_rate": 0.00022857496220143696, + "loss": 1.3793, + "step": 7084 + }, + { + "epoch": 0.7473628691983122, + "grad_norm": 0.7242884635925293, + "learning_rate": 0.00022839453376630149, + "loss": 1.4011, + "step": 7085 + }, + { + "epoch": 0.7474683544303797, + "grad_norm": 0.6837049126625061, + "learning_rate": 0.00022821416377965948, + "loss": 1.385, + "step": 7086 + }, + { + "epoch": 0.7475738396624473, + "grad_norm": 0.6992626190185547, + "learning_rate": 0.00022803385226172226, + "loss": 1.4593, + "step": 7087 + }, + { + "epoch": 0.7476793248945147, + "grad_norm": 0.6837834119796753, + "learning_rate": 0.0002278535992326947, + "loss": 1.4046, + "step": 7088 + }, + { + "epoch": 0.7477848101265823, + "grad_norm": 0.6950384974479675, + "learning_rate": 0.00022767340471277492, + "loss": 1.4209, + "step": 7089 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.720840573310852, + "learning_rate": 0.00022749326872215472, + "loss": 1.3347, + "step": 7090 + }, + { + "epoch": 0.7479957805907173, + "grad_norm": 0.6884075403213501, + "learning_rate": 0.00022731319128101906, + "loss": 1.3731, + "step": 7091 + }, + { + "epoch": 0.7481012658227848, + "grad_norm": 0.7091366648674011, + "learning_rate": 0.0002271331724095468, + "loss": 1.3733, + "step": 7092 + }, + { + "epoch": 0.7482067510548523, + "grad_norm": 0.6947173476219177, + "learning_rate": 0.0002269532121279099, + "loss": 1.3992, + "step": 7093 + }, + { + "epoch": 0.7483122362869198, + "grad_norm": 0.723362922668457, + "learning_rate": 0.00022677331045627366, + "loss": 1.415, + "step": 7094 + }, + { + "epoch": 0.7484177215189873, + "grad_norm": 0.7337915897369385, + "learning_rate": 0.00022659346741479708, + "loss": 1.4211, + "step": 7095 + }, + { + "epoch": 0.7485232067510549, + "grad_norm": 0.7708825469017029, + "learning_rate": 0.00022641368302363235, + "loss": 1.4158, + "step": 7096 + }, + { + "epoch": 0.7486286919831223, + "grad_norm": 0.6870650053024292, + "learning_rate": 0.00022623395730292538, + "loss": 1.3919, + "step": 7097 + }, + { + "epoch": 0.7487341772151899, + "grad_norm": 0.9443276524543762, + "learning_rate": 0.0002260542902728151, + "loss": 1.4238, + "step": 7098 + }, + { + "epoch": 0.7488396624472574, + "grad_norm": 0.6930131316184998, + "learning_rate": 0.00022587468195343436, + "loss": 1.3853, + "step": 7099 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.7589298486709595, + "learning_rate": 0.0002256951323649087, + "loss": 1.3642, + "step": 7100 + }, + { + "epoch": 0.7490506329113924, + "grad_norm": 0.7224299907684326, + "learning_rate": 0.00022551564152735814, + "loss": 1.4331, + "step": 7101 + }, + { + "epoch": 0.74915611814346, + "grad_norm": 0.7099999189376831, + "learning_rate": 0.00022533620946089524, + "loss": 1.4389, + "step": 7102 + }, + { + "epoch": 0.7492616033755274, + "grad_norm": 0.6701095104217529, + "learning_rate": 0.00022515683618562626, + "loss": 1.4048, + "step": 7103 + }, + { + "epoch": 0.7493670886075949, + "grad_norm": 0.6820799708366394, + "learning_rate": 0.00022497752172165095, + "loss": 1.3684, + "step": 7104 + }, + { + "epoch": 0.7494725738396625, + "grad_norm": 0.6723797917366028, + "learning_rate": 0.0002247982660890623, + "loss": 1.4159, + "step": 7105 + }, + { + "epoch": 0.7495780590717299, + "grad_norm": 0.7040444016456604, + "learning_rate": 0.00022461906930794687, + "loss": 1.4249, + "step": 7106 + }, + { + "epoch": 0.7496835443037975, + "grad_norm": 0.675467312335968, + "learning_rate": 0.00022443993139838447, + "loss": 1.4072, + "step": 7107 + }, + { + "epoch": 0.749789029535865, + "grad_norm": 0.7305972576141357, + "learning_rate": 0.00022426085238044823, + "loss": 1.3796, + "step": 7108 + }, + { + "epoch": 0.7498945147679325, + "grad_norm": 0.7093145251274109, + "learning_rate": 0.00022408183227420528, + "loss": 1.3743, + "step": 7109 + }, + { + "epoch": 0.75, + "grad_norm": 0.7715895771980286, + "learning_rate": 0.00022390287109971547, + "loss": 1.4088, + "step": 7110 + }, + { + "epoch": 0.7501054852320675, + "grad_norm": 0.7016634345054626, + "learning_rate": 0.00022372396887703234, + "loss": 1.4488, + "step": 7111 + }, + { + "epoch": 0.750210970464135, + "grad_norm": 0.6945618987083435, + "learning_rate": 0.00022354512562620268, + "loss": 1.4003, + "step": 7112 + }, + { + "epoch": 0.7503164556962025, + "grad_norm": 0.6662477850914001, + "learning_rate": 0.0002233663413672669, + "loss": 1.4568, + "step": 7113 + }, + { + "epoch": 0.7504219409282701, + "grad_norm": 0.6795189380645752, + "learning_rate": 0.00022318761612025856, + "loss": 1.3717, + "step": 7114 + }, + { + "epoch": 0.7505274261603375, + "grad_norm": 0.7128087878227234, + "learning_rate": 0.00022300894990520478, + "loss": 1.3947, + "step": 7115 + }, + { + "epoch": 0.7506329113924051, + "grad_norm": 0.7615166902542114, + "learning_rate": 0.000222830342742126, + "loss": 1.3678, + "step": 7116 + }, + { + "epoch": 0.7507383966244726, + "grad_norm": 0.7917550802230835, + "learning_rate": 0.00022265179465103574, + "loss": 1.4058, + "step": 7117 + }, + { + "epoch": 0.75084388185654, + "grad_norm": 0.74428391456604, + "learning_rate": 0.00022247330565194171, + "loss": 1.3669, + "step": 7118 + }, + { + "epoch": 0.7509493670886076, + "grad_norm": 0.7521764039993286, + "learning_rate": 0.0002222948757648443, + "loss": 1.3558, + "step": 7119 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.7438180446624756, + "learning_rate": 0.00022211650500973746, + "loss": 1.4306, + "step": 7120 + }, + { + "epoch": 0.7511603375527426, + "grad_norm": 0.8241280317306519, + "learning_rate": 0.0002219381934066084, + "loss": 1.4533, + "step": 7121 + }, + { + "epoch": 0.7512658227848101, + "grad_norm": 0.8391345143318176, + "learning_rate": 0.00022175994097543806, + "loss": 1.3992, + "step": 7122 + }, + { + "epoch": 0.7513713080168777, + "grad_norm": 0.6750499606132507, + "learning_rate": 0.0002215817477362003, + "loss": 1.3928, + "step": 7123 + }, + { + "epoch": 0.7514767932489451, + "grad_norm": 0.7723751664161682, + "learning_rate": 0.00022140361370886265, + "loss": 1.4094, + "step": 7124 + }, + { + "epoch": 0.7515822784810127, + "grad_norm": 0.7986998558044434, + "learning_rate": 0.00022122553891338586, + "loss": 1.4176, + "step": 7125 + }, + { + "epoch": 0.7516877637130802, + "grad_norm": 0.7283111214637756, + "learning_rate": 0.00022104752336972396, + "loss": 1.4199, + "step": 7126 + }, + { + "epoch": 0.7517932489451477, + "grad_norm": 0.6922345757484436, + "learning_rate": 0.00022086956709782495, + "loss": 1.4158, + "step": 7127 + }, + { + "epoch": 0.7518987341772152, + "grad_norm": 0.7124416828155518, + "learning_rate": 0.0002206916701176293, + "loss": 1.4131, + "step": 7128 + }, + { + "epoch": 0.7520042194092827, + "grad_norm": 0.6873795986175537, + "learning_rate": 0.00022051383244907143, + "loss": 1.358, + "step": 7129 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.7152830362319946, + "learning_rate": 0.0002203360541120789, + "loss": 1.4039, + "step": 7130 + }, + { + "epoch": 0.7522151898734177, + "grad_norm": 0.743510365486145, + "learning_rate": 0.00022015833512657268, + "loss": 1.3986, + "step": 7131 + }, + { + "epoch": 0.7523206751054853, + "grad_norm": 0.6819198131561279, + "learning_rate": 0.000219980675512467, + "loss": 1.3704, + "step": 7132 + }, + { + "epoch": 0.7524261603375527, + "grad_norm": 0.8903578519821167, + "learning_rate": 0.00021980307528966962, + "loss": 1.3736, + "step": 7133 + }, + { + "epoch": 0.7525316455696203, + "grad_norm": 0.8152833580970764, + "learning_rate": 0.00021962553447808108, + "loss": 1.3722, + "step": 7134 + }, + { + "epoch": 0.7526371308016878, + "grad_norm": 0.8074355125427246, + "learning_rate": 0.00021944805309759643, + "loss": 1.3603, + "step": 7135 + }, + { + "epoch": 0.7527426160337553, + "grad_norm": 0.9462188482284546, + "learning_rate": 0.000219270631168103, + "loss": 1.4026, + "step": 7136 + }, + { + "epoch": 0.7528481012658228, + "grad_norm": 0.8681802749633789, + "learning_rate": 0.0002190932687094818, + "loss": 1.4332, + "step": 7137 + }, + { + "epoch": 0.7529535864978903, + "grad_norm": 0.880003035068512, + "learning_rate": 0.00021891596574160715, + "loss": 1.4022, + "step": 7138 + }, + { + "epoch": 0.7530590717299578, + "grad_norm": 0.7548840641975403, + "learning_rate": 0.0002187387222843467, + "loss": 1.3976, + "step": 7139 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.7809464335441589, + "learning_rate": 0.00021856153835756164, + "loss": 1.4046, + "step": 7140 + }, + { + "epoch": 0.7532700421940929, + "grad_norm": 1.0602686405181885, + "learning_rate": 0.00021838441398110617, + "loss": 1.4015, + "step": 7141 + }, + { + "epoch": 0.7533755274261603, + "grad_norm": 0.7463604211807251, + "learning_rate": 0.000218207349174828, + "loss": 1.3998, + "step": 7142 + }, + { + "epoch": 0.7534810126582279, + "grad_norm": 0.7358394265174866, + "learning_rate": 0.0002180303439585678, + "loss": 1.3721, + "step": 7143 + }, + { + "epoch": 0.7535864978902953, + "grad_norm": 1.0096821784973145, + "learning_rate": 0.0002178533983521605, + "loss": 1.414, + "step": 7144 + }, + { + "epoch": 0.7536919831223629, + "grad_norm": 0.7424828410148621, + "learning_rate": 0.0002176765123754334, + "loss": 1.3813, + "step": 7145 + }, + { + "epoch": 0.7537974683544304, + "grad_norm": 0.7488431930541992, + "learning_rate": 0.00021749968604820754, + "loss": 1.4129, + "step": 7146 + }, + { + "epoch": 0.7539029535864978, + "grad_norm": 0.7857943773269653, + "learning_rate": 0.00021732291939029712, + "loss": 1.4, + "step": 7147 + }, + { + "epoch": 0.7540084388185654, + "grad_norm": 0.7356014251708984, + "learning_rate": 0.00021714621242150973, + "loss": 1.3637, + "step": 7148 + }, + { + "epoch": 0.7541139240506329, + "grad_norm": 0.706786036491394, + "learning_rate": 0.0002169695651616463, + "loss": 1.352, + "step": 7149 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.9881721138954163, + "learning_rate": 0.00021679297763050104, + "loss": 1.404, + "step": 7150 + }, + { + "epoch": 0.7543248945147679, + "grad_norm": 0.7376975417137146, + "learning_rate": 0.00021661644984786142, + "loss": 1.379, + "step": 7151 + }, + { + "epoch": 0.7544303797468355, + "grad_norm": 0.8358660340309143, + "learning_rate": 0.00021643998183350802, + "loss": 1.4014, + "step": 7152 + }, + { + "epoch": 0.7545358649789029, + "grad_norm": 0.8187090754508972, + "learning_rate": 0.00021626357360721556, + "loss": 1.3717, + "step": 7153 + }, + { + "epoch": 0.7546413502109705, + "grad_norm": 0.7420418858528137, + "learning_rate": 0.0002160872251887511, + "loss": 1.3717, + "step": 7154 + }, + { + "epoch": 0.754746835443038, + "grad_norm": 0.9514976143836975, + "learning_rate": 0.00021591093659787528, + "loss": 1.4461, + "step": 7155 + }, + { + "epoch": 0.7548523206751054, + "grad_norm": 0.7187660336494446, + "learning_rate": 0.00021573470785434237, + "loss": 1.4281, + "step": 7156 + }, + { + "epoch": 0.754957805907173, + "grad_norm": 0.7616822719573975, + "learning_rate": 0.00021555853897789942, + "loss": 1.3591, + "step": 7157 + }, + { + "epoch": 0.7550632911392405, + "grad_norm": 0.9245477318763733, + "learning_rate": 0.0002153824299882872, + "loss": 1.3878, + "step": 7158 + }, + { + "epoch": 0.755168776371308, + "grad_norm": 0.7028918266296387, + "learning_rate": 0.00021520638090523955, + "loss": 1.3973, + "step": 7159 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.9324410557746887, + "learning_rate": 0.0002150303917484834, + "loss": 1.4435, + "step": 7160 + }, + { + "epoch": 0.7553797468354431, + "grad_norm": 0.8496628999710083, + "learning_rate": 0.00021485446253773966, + "loss": 1.4004, + "step": 7161 + }, + { + "epoch": 0.7554852320675105, + "grad_norm": 0.6993350386619568, + "learning_rate": 0.00021467859329272188, + "loss": 1.4053, + "step": 7162 + }, + { + "epoch": 0.755590717299578, + "grad_norm": 0.839276134967804, + "learning_rate": 0.00021450278403313707, + "loss": 1.3842, + "step": 7163 + }, + { + "epoch": 0.7556962025316456, + "grad_norm": 0.9699743986129761, + "learning_rate": 0.0002143270347786856, + "loss": 1.4101, + "step": 7164 + }, + { + "epoch": 0.755801687763713, + "grad_norm": 0.6983271837234497, + "learning_rate": 0.0002141513455490609, + "loss": 1.417, + "step": 7165 + }, + { + "epoch": 0.7559071729957806, + "grad_norm": 0.9604791402816772, + "learning_rate": 0.00021397571636394991, + "loss": 1.3619, + "step": 7166 + }, + { + "epoch": 0.7560126582278481, + "grad_norm": 0.908984363079071, + "learning_rate": 0.00021380014724303286, + "loss": 1.3936, + "step": 7167 + }, + { + "epoch": 0.7561181434599156, + "grad_norm": 0.7220180034637451, + "learning_rate": 0.00021362463820598297, + "loss": 1.4088, + "step": 7168 + }, + { + "epoch": 0.7562236286919831, + "grad_norm": 1.0849757194519043, + "learning_rate": 0.00021344918927246678, + "loss": 1.4278, + "step": 7169 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.881550133228302, + "learning_rate": 0.0002132738004621446, + "loss": 1.4292, + "step": 7170 + }, + { + "epoch": 0.7564345991561181, + "grad_norm": 0.8057795166969299, + "learning_rate": 0.0002130984717946695, + "loss": 1.3916, + "step": 7171 + }, + { + "epoch": 0.7565400843881857, + "grad_norm": 0.9613450765609741, + "learning_rate": 0.00021292320328968783, + "loss": 1.3984, + "step": 7172 + }, + { + "epoch": 0.7566455696202532, + "grad_norm": 0.8230540752410889, + "learning_rate": 0.0002127479949668393, + "loss": 1.3982, + "step": 7173 + }, + { + "epoch": 0.7567510548523206, + "grad_norm": 0.9484757781028748, + "learning_rate": 0.000212572846845757, + "loss": 1.3722, + "step": 7174 + }, + { + "epoch": 0.7568565400843882, + "grad_norm": 1.0594958066940308, + "learning_rate": 0.000212397758946067, + "loss": 1.4186, + "step": 7175 + }, + { + "epoch": 0.7569620253164557, + "grad_norm": 0.6799182891845703, + "learning_rate": 0.0002122227312873889, + "loss": 1.3806, + "step": 7176 + }, + { + "epoch": 0.7570675105485232, + "grad_norm": 0.8403701782226562, + "learning_rate": 0.00021204776388933534, + "loss": 1.4013, + "step": 7177 + }, + { + "epoch": 0.7571729957805907, + "grad_norm": 1.2088487148284912, + "learning_rate": 0.00021187285677151205, + "loss": 1.3459, + "step": 7178 + }, + { + "epoch": 0.7572784810126583, + "grad_norm": 0.756799578666687, + "learning_rate": 0.00021169800995351874, + "loss": 1.3723, + "step": 7179 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.8195507526397705, + "learning_rate": 0.00021152322345494763, + "loss": 1.3986, + "step": 7180 + }, + { + "epoch": 0.7574894514767933, + "grad_norm": 0.8369058966636658, + "learning_rate": 0.00021134849729538438, + "loss": 1.3825, + "step": 7181 + }, + { + "epoch": 0.7575949367088608, + "grad_norm": 0.8008782267570496, + "learning_rate": 0.00021117383149440801, + "loss": 1.4006, + "step": 7182 + }, + { + "epoch": 0.7577004219409282, + "grad_norm": 0.6984096169471741, + "learning_rate": 0.00021099922607159064, + "loss": 1.3553, + "step": 7183 + }, + { + "epoch": 0.7578059071729958, + "grad_norm": 0.9438830614089966, + "learning_rate": 0.00021082468104649773, + "loss": 1.4112, + "step": 7184 + }, + { + "epoch": 0.7579113924050633, + "grad_norm": 0.8840997219085693, + "learning_rate": 0.00021065019643868785, + "loss": 1.4281, + "step": 7185 + }, + { + "epoch": 0.7580168776371308, + "grad_norm": 0.7691326141357422, + "learning_rate": 0.00021047577226771292, + "loss": 1.348, + "step": 7186 + }, + { + "epoch": 0.7581223628691983, + "grad_norm": 1.0350295305252075, + "learning_rate": 0.00021030140855311772, + "loss": 1.4097, + "step": 7187 + }, + { + "epoch": 0.7582278481012659, + "grad_norm": 0.7385426163673401, + "learning_rate": 0.00021012710531444112, + "loss": 1.3657, + "step": 7188 + }, + { + "epoch": 0.7583333333333333, + "grad_norm": 0.7734797596931458, + "learning_rate": 0.00020995286257121453, + "loss": 1.3897, + "step": 7189 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.8582370281219482, + "learning_rate": 0.00020977868034296253, + "loss": 1.3721, + "step": 7190 + }, + { + "epoch": 0.7585443037974684, + "grad_norm": 0.6955395936965942, + "learning_rate": 0.0002096045586492031, + "loss": 1.3828, + "step": 7191 + }, + { + "epoch": 0.7586497890295358, + "grad_norm": 0.7283288240432739, + "learning_rate": 0.00020943049750944768, + "loss": 1.3871, + "step": 7192 + }, + { + "epoch": 0.7587552742616034, + "grad_norm": 0.8181195855140686, + "learning_rate": 0.00020925649694320046, + "loss": 1.4051, + "step": 7193 + }, + { + "epoch": 0.7588607594936709, + "grad_norm": 0.8558921217918396, + "learning_rate": 0.0002090825569699591, + "loss": 1.3654, + "step": 7194 + }, + { + "epoch": 0.7589662447257384, + "grad_norm": 0.830424964427948, + "learning_rate": 0.0002089086776092146, + "loss": 1.4159, + "step": 7195 + }, + { + "epoch": 0.7590717299578059, + "grad_norm": 0.9542977809906006, + "learning_rate": 0.0002087348588804505, + "loss": 1.4259, + "step": 7196 + }, + { + "epoch": 0.7591772151898735, + "grad_norm": 0.7497240304946899, + "learning_rate": 0.0002085611008031449, + "loss": 1.3687, + "step": 7197 + }, + { + "epoch": 0.7592827004219409, + "grad_norm": 0.7281267046928406, + "learning_rate": 0.00020838740339676763, + "loss": 1.4023, + "step": 7198 + }, + { + "epoch": 0.7593881856540085, + "grad_norm": 0.74925297498703, + "learning_rate": 0.00020821376668078264, + "loss": 1.3894, + "step": 7199 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.7080130577087402, + "learning_rate": 0.00020804019067464667, + "loss": 1.3713, + "step": 7200 + }, + { + "epoch": 0.7595991561181434, + "grad_norm": 0.7665355205535889, + "learning_rate": 0.00020786667539780977, + "loss": 1.3643, + "step": 7201 + }, + { + "epoch": 0.759704641350211, + "grad_norm": 0.733002781867981, + "learning_rate": 0.00020769322086971524, + "loss": 1.3915, + "step": 7202 + }, + { + "epoch": 0.7598101265822785, + "grad_norm": 0.6802226901054382, + "learning_rate": 0.00020751982710979944, + "loss": 1.4089, + "step": 7203 + }, + { + "epoch": 0.759915611814346, + "grad_norm": 0.7102795243263245, + "learning_rate": 0.0002073464941374921, + "loss": 1.3973, + "step": 7204 + }, + { + "epoch": 0.7600210970464135, + "grad_norm": 0.7580370903015137, + "learning_rate": 0.000207173221972216, + "loss": 1.4342, + "step": 7205 + }, + { + "epoch": 0.7601265822784811, + "grad_norm": 0.6849876046180725, + "learning_rate": 0.00020700001063338696, + "loss": 1.4162, + "step": 7206 + }, + { + "epoch": 0.7602320675105485, + "grad_norm": 0.7138596177101135, + "learning_rate": 0.00020682686014041458, + "loss": 1.375, + "step": 7207 + }, + { + "epoch": 0.760337552742616, + "grad_norm": 0.7674537301063538, + "learning_rate": 0.00020665377051270095, + "loss": 1.4015, + "step": 7208 + }, + { + "epoch": 0.7604430379746835, + "grad_norm": 0.7097899317741394, + "learning_rate": 0.00020648074176964182, + "loss": 1.3729, + "step": 7209 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.7129647135734558, + "learning_rate": 0.00020630777393062575, + "loss": 1.3933, + "step": 7210 + }, + { + "epoch": 0.7606540084388186, + "grad_norm": 0.708614706993103, + "learning_rate": 0.00020613486701503473, + "loss": 1.4255, + "step": 7211 + }, + { + "epoch": 0.760759493670886, + "grad_norm": 0.6812008619308472, + "learning_rate": 0.00020596202104224376, + "loss": 1.3893, + "step": 7212 + }, + { + "epoch": 0.7608649789029536, + "grad_norm": 0.7349188923835754, + "learning_rate": 0.0002057892360316212, + "loss": 1.4162, + "step": 7213 + }, + { + "epoch": 0.7609704641350211, + "grad_norm": 0.7000165581703186, + "learning_rate": 0.00020561651200252836, + "loss": 1.4187, + "step": 7214 + }, + { + "epoch": 0.7610759493670886, + "grad_norm": 0.7712373733520508, + "learning_rate": 0.00020544384897431997, + "loss": 1.4188, + "step": 7215 + }, + { + "epoch": 0.7611814345991561, + "grad_norm": 0.6983794569969177, + "learning_rate": 0.00020527124696634343, + "loss": 1.3955, + "step": 7216 + }, + { + "epoch": 0.7612869198312237, + "grad_norm": 0.7353876829147339, + "learning_rate": 0.00020509870599794022, + "loss": 1.4223, + "step": 7217 + }, + { + "epoch": 0.7613924050632911, + "grad_norm": 0.6745840311050415, + "learning_rate": 0.0002049262260884441, + "loss": 1.4251, + "step": 7218 + }, + { + "epoch": 0.7614978902953586, + "grad_norm": 0.6922546625137329, + "learning_rate": 0.00020475380725718228, + "loss": 1.392, + "step": 7219 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.7533679604530334, + "learning_rate": 0.00020458144952347523, + "loss": 1.3812, + "step": 7220 + }, + { + "epoch": 0.7617088607594936, + "grad_norm": 0.7335350513458252, + "learning_rate": 0.0002044091529066365, + "loss": 1.4181, + "step": 7221 + }, + { + "epoch": 0.7618143459915612, + "grad_norm": 0.7325161695480347, + "learning_rate": 0.00020423691742597273, + "loss": 1.3961, + "step": 7222 + }, + { + "epoch": 0.7619198312236287, + "grad_norm": 0.6720327734947205, + "learning_rate": 0.0002040647431007837, + "loss": 1.3758, + "step": 7223 + }, + { + "epoch": 0.7620253164556962, + "grad_norm": 0.7122063636779785, + "learning_rate": 0.00020389262995036263, + "loss": 1.3935, + "step": 7224 + }, + { + "epoch": 0.7621308016877637, + "grad_norm": 0.7994998097419739, + "learning_rate": 0.00020372057799399534, + "loss": 1.4246, + "step": 7225 + }, + { + "epoch": 0.7622362869198313, + "grad_norm": 0.6553068161010742, + "learning_rate": 0.00020354858725096122, + "loss": 1.4073, + "step": 7226 + }, + { + "epoch": 0.7623417721518987, + "grad_norm": 0.9108197093009949, + "learning_rate": 0.00020337665774053284, + "loss": 1.3861, + "step": 7227 + }, + { + "epoch": 0.7624472573839662, + "grad_norm": 0.6620415449142456, + "learning_rate": 0.0002032047894819758, + "loss": 1.4015, + "step": 7228 + }, + { + "epoch": 0.7625527426160338, + "grad_norm": 0.6661376357078552, + "learning_rate": 0.00020303298249454857, + "loss": 1.4513, + "step": 7229 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.6859104037284851, + "learning_rate": 0.00020286123679750314, + "loss": 1.4043, + "step": 7230 + }, + { + "epoch": 0.7627637130801688, + "grad_norm": 0.7252294421195984, + "learning_rate": 0.00020268955241008437, + "loss": 1.4289, + "step": 7231 + }, + { + "epoch": 0.7628691983122363, + "grad_norm": 0.6800586581230164, + "learning_rate": 0.00020251792935153037, + "loss": 1.402, + "step": 7232 + }, + { + "epoch": 0.7629746835443038, + "grad_norm": 0.6623731255531311, + "learning_rate": 0.0002023463676410724, + "loss": 1.3822, + "step": 7233 + }, + { + "epoch": 0.7630801687763713, + "grad_norm": 0.6601670384407043, + "learning_rate": 0.0002021748672979348, + "loss": 1.3958, + "step": 7234 + }, + { + "epoch": 0.7631856540084389, + "grad_norm": 0.8390235900878906, + "learning_rate": 0.00020200342834133497, + "loss": 1.4226, + "step": 7235 + }, + { + "epoch": 0.7632911392405063, + "grad_norm": 0.7226210832595825, + "learning_rate": 0.00020183205079048338, + "loss": 1.3838, + "step": 7236 + }, + { + "epoch": 0.7633966244725738, + "grad_norm": 0.6938892006874084, + "learning_rate": 0.0002016607346645841, + "loss": 1.4343, + "step": 7237 + }, + { + "epoch": 0.7635021097046414, + "grad_norm": 0.6869243383407593, + "learning_rate": 0.00020148947998283381, + "loss": 1.4273, + "step": 7238 + }, + { + "epoch": 0.7636075949367088, + "grad_norm": 0.7271302938461304, + "learning_rate": 0.00020131828676442237, + "loss": 1.3799, + "step": 7239 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.6992348432540894, + "learning_rate": 0.00020114715502853292, + "loss": 1.3939, + "step": 7240 + }, + { + "epoch": 0.7638185654008439, + "grad_norm": 0.6764703392982483, + "learning_rate": 0.00020097608479434153, + "loss": 1.3554, + "step": 7241 + }, + { + "epoch": 0.7639240506329114, + "grad_norm": 0.7242671847343445, + "learning_rate": 0.00020080507608101757, + "loss": 1.4467, + "step": 7242 + }, + { + "epoch": 0.7640295358649789, + "grad_norm": 0.7507384419441223, + "learning_rate": 0.0002006341289077233, + "loss": 1.4072, + "step": 7243 + }, + { + "epoch": 0.7641350210970465, + "grad_norm": 0.6980249881744385, + "learning_rate": 0.00020046324329361432, + "loss": 1.3906, + "step": 7244 + }, + { + "epoch": 0.7642405063291139, + "grad_norm": 0.6777079105377197, + "learning_rate": 0.00020029241925783908, + "loss": 1.3702, + "step": 7245 + }, + { + "epoch": 0.7643459915611814, + "grad_norm": 0.7735211253166199, + "learning_rate": 0.00020012165681953923, + "loss": 1.3674, + "step": 7246 + }, + { + "epoch": 0.764451476793249, + "grad_norm": 0.7026504874229431, + "learning_rate": 0.00019995095599784985, + "loss": 1.3973, + "step": 7247 + }, + { + "epoch": 0.7645569620253164, + "grad_norm": 0.7071264982223511, + "learning_rate": 0.00019978031681189864, + "loss": 1.3713, + "step": 7248 + }, + { + "epoch": 0.764662447257384, + "grad_norm": 0.8230234384536743, + "learning_rate": 0.00019960973928080666, + "loss": 1.4429, + "step": 7249 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.8185003995895386, + "learning_rate": 0.0001994392234236878, + "loss": 1.3941, + "step": 7250 + }, + { + "epoch": 0.764873417721519, + "grad_norm": 0.7781619429588318, + "learning_rate": 0.00019926876925964928, + "loss": 1.4063, + "step": 7251 + }, + { + "epoch": 0.7649789029535865, + "grad_norm": 0.6825125217437744, + "learning_rate": 0.00019909837680779141, + "loss": 1.38, + "step": 7252 + }, + { + "epoch": 0.765084388185654, + "grad_norm": 0.6817828416824341, + "learning_rate": 0.00019892804608720747, + "loss": 1.406, + "step": 7253 + }, + { + "epoch": 0.7651898734177215, + "grad_norm": 0.8922781348228455, + "learning_rate": 0.00019875777711698384, + "loss": 1.4113, + "step": 7254 + }, + { + "epoch": 0.765295358649789, + "grad_norm": 0.7785928249359131, + "learning_rate": 0.00019858756991619978, + "loss": 1.4002, + "step": 7255 + }, + { + "epoch": 0.7654008438818566, + "grad_norm": 0.8074373602867126, + "learning_rate": 0.00019841742450392837, + "loss": 1.4076, + "step": 7256 + }, + { + "epoch": 0.765506329113924, + "grad_norm": 0.8255418539047241, + "learning_rate": 0.0001982473408992349, + "loss": 1.4108, + "step": 7257 + }, + { + "epoch": 0.7656118143459916, + "grad_norm": 0.7749651670455933, + "learning_rate": 0.00019807731912117828, + "loss": 1.3833, + "step": 7258 + }, + { + "epoch": 0.7657172995780591, + "grad_norm": 0.7826220393180847, + "learning_rate": 0.0001979073591888101, + "loss": 1.4275, + "step": 7259 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.7045955061912537, + "learning_rate": 0.0001977374611211754, + "loss": 1.3412, + "step": 7260 + }, + { + "epoch": 0.7659282700421941, + "grad_norm": 0.7068358659744263, + "learning_rate": 0.00019756762493731192, + "loss": 1.3544, + "step": 7261 + }, + { + "epoch": 0.7660337552742617, + "grad_norm": 0.6391391754150391, + "learning_rate": 0.00019739785065625077, + "loss": 1.3735, + "step": 7262 + }, + { + "epoch": 0.7661392405063291, + "grad_norm": 0.7137014865875244, + "learning_rate": 0.00019722813829701593, + "loss": 1.3869, + "step": 7263 + }, + { + "epoch": 0.7662447257383966, + "grad_norm": 0.7636756896972656, + "learning_rate": 0.0001970584878786244, + "loss": 1.4293, + "step": 7264 + }, + { + "epoch": 0.7663502109704642, + "grad_norm": 0.8016577959060669, + "learning_rate": 0.0001968888994200868, + "loss": 1.3755, + "step": 7265 + }, + { + "epoch": 0.7664556962025316, + "grad_norm": 0.6802381873130798, + "learning_rate": 0.00019671937294040595, + "loss": 1.3578, + "step": 7266 + }, + { + "epoch": 0.7665611814345992, + "grad_norm": 0.6714524626731873, + "learning_rate": 0.00019654990845857832, + "loss": 1.3778, + "step": 7267 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 0.9294701814651489, + "learning_rate": 0.00019638050599359326, + "loss": 1.423, + "step": 7268 + }, + { + "epoch": 0.7667721518987342, + "grad_norm": 0.6746889352798462, + "learning_rate": 0.000196211165564433, + "loss": 1.3975, + "step": 7269 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.7999582886695862, + "learning_rate": 0.00019604188719007313, + "loss": 1.4369, + "step": 7270 + }, + { + "epoch": 0.7669831223628693, + "grad_norm": 0.7725174427032471, + "learning_rate": 0.00019587267088948214, + "loss": 1.3927, + "step": 7271 + }, + { + "epoch": 0.7670886075949367, + "grad_norm": 0.7224389314651489, + "learning_rate": 0.00019570351668162143, + "loss": 1.395, + "step": 7272 + }, + { + "epoch": 0.7671940928270042, + "grad_norm": 0.8818314075469971, + "learning_rate": 0.00019553442458544542, + "loss": 1.402, + "step": 7273 + }, + { + "epoch": 0.7672995780590718, + "grad_norm": 0.716957151889801, + "learning_rate": 0.00019536539461990224, + "loss": 1.3519, + "step": 7274 + }, + { + "epoch": 0.7674050632911392, + "grad_norm": 0.6802651882171631, + "learning_rate": 0.0001951964268039322, + "loss": 1.3905, + "step": 7275 + }, + { + "epoch": 0.7675105485232068, + "grad_norm": 0.7833480834960938, + "learning_rate": 0.00019502752115646901, + "loss": 1.4346, + "step": 7276 + }, + { + "epoch": 0.7676160337552742, + "grad_norm": 0.7639727592468262, + "learning_rate": 0.00019485867769643945, + "loss": 1.3687, + "step": 7277 + }, + { + "epoch": 0.7677215189873418, + "grad_norm": 0.708500862121582, + "learning_rate": 0.0001946898964427633, + "loss": 1.4223, + "step": 7278 + }, + { + "epoch": 0.7678270042194093, + "grad_norm": 0.8370721340179443, + "learning_rate": 0.00019452117741435314, + "loss": 1.3959, + "step": 7279 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.7884529829025269, + "learning_rate": 0.00019435252063011504, + "loss": 1.3693, + "step": 7280 + }, + { + "epoch": 0.7680379746835443, + "grad_norm": 0.7313410043716431, + "learning_rate": 0.00019418392610894768, + "loss": 1.4436, + "step": 7281 + }, + { + "epoch": 0.7681434599156118, + "grad_norm": 0.7292661666870117, + "learning_rate": 0.0001940153938697427, + "loss": 1.4421, + "step": 7282 + }, + { + "epoch": 0.7682489451476793, + "grad_norm": 0.7637990117073059, + "learning_rate": 0.0001938469239313855, + "loss": 1.3942, + "step": 7283 + }, + { + "epoch": 0.7683544303797468, + "grad_norm": 0.765353262424469, + "learning_rate": 0.00019367851631275362, + "loss": 1.3864, + "step": 7284 + }, + { + "epoch": 0.7684599156118144, + "grad_norm": 0.7480273246765137, + "learning_rate": 0.00019351017103271805, + "loss": 1.4132, + "step": 7285 + }, + { + "epoch": 0.7685654008438818, + "grad_norm": 0.9240022301673889, + "learning_rate": 0.00019334188811014278, + "loss": 1.3893, + "step": 7286 + }, + { + "epoch": 0.7686708860759494, + "grad_norm": 0.8755521774291992, + "learning_rate": 0.00019317366756388477, + "loss": 1.3817, + "step": 7287 + }, + { + "epoch": 0.7687763713080169, + "grad_norm": 0.7461376190185547, + "learning_rate": 0.0001930055094127938, + "loss": 1.3864, + "step": 7288 + }, + { + "epoch": 0.7688818565400843, + "grad_norm": 0.9481735825538635, + "learning_rate": 0.00019283741367571294, + "loss": 1.4211, + "step": 7289 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.7926174402236938, + "learning_rate": 0.0001926693803714779, + "loss": 1.3893, + "step": 7290 + }, + { + "epoch": 0.7690928270042194, + "grad_norm": 0.7261988520622253, + "learning_rate": 0.00019250140951891813, + "loss": 1.4636, + "step": 7291 + }, + { + "epoch": 0.7691983122362869, + "grad_norm": 0.7003751993179321, + "learning_rate": 0.00019233350113685536, + "loss": 1.4209, + "step": 7292 + }, + { + "epoch": 0.7693037974683544, + "grad_norm": 0.786095380783081, + "learning_rate": 0.00019216565524410455, + "loss": 1.3695, + "step": 7293 + }, + { + "epoch": 0.769409282700422, + "grad_norm": 0.8474434614181519, + "learning_rate": 0.0001919978718594738, + "loss": 1.3783, + "step": 7294 + }, + { + "epoch": 0.7695147679324894, + "grad_norm": 0.7312735319137573, + "learning_rate": 0.0001918301510017638, + "loss": 1.435, + "step": 7295 + }, + { + "epoch": 0.769620253164557, + "grad_norm": 0.7260260581970215, + "learning_rate": 0.0001916624926897687, + "loss": 1.4184, + "step": 7296 + }, + { + "epoch": 0.7697257383966245, + "grad_norm": 0.8217074275016785, + "learning_rate": 0.0001914948969422755, + "loss": 1.4001, + "step": 7297 + }, + { + "epoch": 0.7698312236286919, + "grad_norm": 0.6858365535736084, + "learning_rate": 0.00019132736377806394, + "loss": 1.3738, + "step": 7298 + }, + { + "epoch": 0.7699367088607595, + "grad_norm": 0.7690737843513489, + "learning_rate": 0.00019115989321590694, + "loss": 1.3994, + "step": 7299 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.8139335513114929, + "learning_rate": 0.00019099248527457068, + "loss": 1.442, + "step": 7300 + }, + { + "epoch": 0.7701476793248945, + "grad_norm": 0.6718381643295288, + "learning_rate": 0.00019082513997281398, + "loss": 1.4035, + "step": 7301 + }, + { + "epoch": 0.770253164556962, + "grad_norm": 0.6675881743431091, + "learning_rate": 0.0001906578573293886, + "loss": 1.3559, + "step": 7302 + }, + { + "epoch": 0.7703586497890296, + "grad_norm": 0.8879851698875427, + "learning_rate": 0.00019049063736303946, + "loss": 1.3954, + "step": 7303 + }, + { + "epoch": 0.770464135021097, + "grad_norm": 0.6967356204986572, + "learning_rate": 0.00019032348009250433, + "loss": 1.4004, + "step": 7304 + }, + { + "epoch": 0.7705696202531646, + "grad_norm": 0.7446186542510986, + "learning_rate": 0.0001901563855365141, + "loss": 1.383, + "step": 7305 + }, + { + "epoch": 0.7706751054852321, + "grad_norm": 0.743310809135437, + "learning_rate": 0.00018998935371379252, + "loss": 1.3843, + "step": 7306 + }, + { + "epoch": 0.7707805907172995, + "grad_norm": 0.7187754511833191, + "learning_rate": 0.00018982238464305623, + "loss": 1.4017, + "step": 7307 + }, + { + "epoch": 0.7708860759493671, + "grad_norm": 0.7186182737350464, + "learning_rate": 0.0001896554783430149, + "loss": 1.3953, + "step": 7308 + }, + { + "epoch": 0.7709915611814346, + "grad_norm": 0.7989746332168579, + "learning_rate": 0.00018948863483237154, + "loss": 1.3961, + "step": 7309 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.6763551235198975, + "learning_rate": 0.0001893218541298216, + "loss": 1.3809, + "step": 7310 + }, + { + "epoch": 0.7712025316455696, + "grad_norm": 0.7270032167434692, + "learning_rate": 0.00018915513625405374, + "loss": 1.3895, + "step": 7311 + }, + { + "epoch": 0.7713080168776372, + "grad_norm": 0.6885639429092407, + "learning_rate": 0.00018898848122374942, + "loss": 1.4072, + "step": 7312 + }, + { + "epoch": 0.7714135021097046, + "grad_norm": 0.7653898000717163, + "learning_rate": 0.00018882188905758326, + "loss": 1.3966, + "step": 7313 + }, + { + "epoch": 0.7715189873417722, + "grad_norm": 0.7189638614654541, + "learning_rate": 0.00018865535977422273, + "loss": 1.3771, + "step": 7314 + }, + { + "epoch": 0.7716244725738397, + "grad_norm": 0.6939089298248291, + "learning_rate": 0.00018848889339232833, + "loss": 1.3665, + "step": 7315 + }, + { + "epoch": 0.7717299578059071, + "grad_norm": 0.7014774084091187, + "learning_rate": 0.00018832248993055304, + "loss": 1.4024, + "step": 7316 + }, + { + "epoch": 0.7718354430379747, + "grad_norm": 0.8311484456062317, + "learning_rate": 0.00018815614940754377, + "loss": 1.3594, + "step": 7317 + }, + { + "epoch": 0.7719409282700422, + "grad_norm": 0.6610333919525146, + "learning_rate": 0.00018798987184193963, + "loss": 1.3701, + "step": 7318 + }, + { + "epoch": 0.7720464135021097, + "grad_norm": 0.7613053917884827, + "learning_rate": 0.00018782365725237272, + "loss": 1.3949, + "step": 7319 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.9062397480010986, + "learning_rate": 0.00018765750565746827, + "loss": 1.3823, + "step": 7320 + }, + { + "epoch": 0.7722573839662448, + "grad_norm": 0.7002202272415161, + "learning_rate": 0.00018749141707584443, + "loss": 1.4308, + "step": 7321 + }, + { + "epoch": 0.7723628691983122, + "grad_norm": 0.6769017577171326, + "learning_rate": 0.0001873253915261123, + "loss": 1.3711, + "step": 7322 + }, + { + "epoch": 0.7724683544303798, + "grad_norm": 0.6817429065704346, + "learning_rate": 0.00018715942902687566, + "loss": 1.3741, + "step": 7323 + }, + { + "epoch": 0.7725738396624473, + "grad_norm": 0.7028344869613647, + "learning_rate": 0.00018699352959673172, + "loss": 1.3888, + "step": 7324 + }, + { + "epoch": 0.7726793248945147, + "grad_norm": 0.6953803300857544, + "learning_rate": 0.00018682769325426986, + "loss": 1.4173, + "step": 7325 + }, + { + "epoch": 0.7727848101265823, + "grad_norm": 0.6958556175231934, + "learning_rate": 0.00018666192001807344, + "loss": 1.3837, + "step": 7326 + }, + { + "epoch": 0.7728902953586498, + "grad_norm": 0.7301607131958008, + "learning_rate": 0.00018649620990671798, + "loss": 1.3584, + "step": 7327 + }, + { + "epoch": 0.7729957805907173, + "grad_norm": 0.6674724221229553, + "learning_rate": 0.00018633056293877203, + "loss": 1.3852, + "step": 7328 + }, + { + "epoch": 0.7731012658227848, + "grad_norm": 0.6665921211242676, + "learning_rate": 0.00018616497913279728, + "loss": 1.3549, + "step": 7329 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.8324102163314819, + "learning_rate": 0.00018599945850734812, + "loss": 1.4487, + "step": 7330 + }, + { + "epoch": 0.7733122362869198, + "grad_norm": 0.6870251893997192, + "learning_rate": 0.00018583400108097194, + "loss": 1.3728, + "step": 7331 + }, + { + "epoch": 0.7734177215189874, + "grad_norm": 0.7231236100196838, + "learning_rate": 0.00018566860687220922, + "loss": 1.3993, + "step": 7332 + }, + { + "epoch": 0.7735232067510549, + "grad_norm": 0.7458232641220093, + "learning_rate": 0.00018550327589959308, + "loss": 1.4041, + "step": 7333 + }, + { + "epoch": 0.7736286919831223, + "grad_norm": 0.7875053882598877, + "learning_rate": 0.00018533800818164943, + "loss": 1.3727, + "step": 7334 + }, + { + "epoch": 0.7737341772151899, + "grad_norm": 0.706142246723175, + "learning_rate": 0.00018517280373689789, + "loss": 1.4269, + "step": 7335 + }, + { + "epoch": 0.7738396624472574, + "grad_norm": 0.7687646150588989, + "learning_rate": 0.0001850076625838502, + "loss": 1.368, + "step": 7336 + }, + { + "epoch": 0.7739451476793249, + "grad_norm": 0.7451513409614563, + "learning_rate": 0.0001848425847410112, + "loss": 1.3604, + "step": 7337 + }, + { + "epoch": 0.7740506329113924, + "grad_norm": 0.6905320882797241, + "learning_rate": 0.00018467757022687864, + "loss": 1.3895, + "step": 7338 + }, + { + "epoch": 0.77415611814346, + "grad_norm": 0.7695297598838806, + "learning_rate": 0.0001845126190599434, + "loss": 1.405, + "step": 7339 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.7981787323951721, + "learning_rate": 0.00018434773125868895, + "loss": 1.3755, + "step": 7340 + }, + { + "epoch": 0.774367088607595, + "grad_norm": 0.73011714220047, + "learning_rate": 0.00018418290684159175, + "loss": 1.3761, + "step": 7341 + }, + { + "epoch": 0.7744725738396624, + "grad_norm": 0.8195927143096924, + "learning_rate": 0.00018401814582712103, + "loss": 1.3893, + "step": 7342 + }, + { + "epoch": 0.7745780590717299, + "grad_norm": 0.6723359227180481, + "learning_rate": 0.0001838534482337396, + "loss": 1.3631, + "step": 7343 + }, + { + "epoch": 0.7746835443037975, + "grad_norm": 0.728031575679779, + "learning_rate": 0.0001836888140799023, + "loss": 1.4235, + "step": 7344 + }, + { + "epoch": 0.7747890295358649, + "grad_norm": 0.6834869980812073, + "learning_rate": 0.0001835242433840573, + "loss": 1.3823, + "step": 7345 + }, + { + "epoch": 0.7748945147679325, + "grad_norm": 0.8125569820404053, + "learning_rate": 0.00018335973616464554, + "loss": 1.334, + "step": 7346 + }, + { + "epoch": 0.775, + "grad_norm": 0.7226583361625671, + "learning_rate": 0.00018319529244010082, + "loss": 1.4019, + "step": 7347 + }, + { + "epoch": 0.7751054852320675, + "grad_norm": 0.6936764717102051, + "learning_rate": 0.00018303091222884998, + "loss": 1.4153, + "step": 7348 + }, + { + "epoch": 0.775210970464135, + "grad_norm": 0.6905502080917358, + "learning_rate": 0.00018286659554931254, + "loss": 1.3908, + "step": 7349 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.7019782662391663, + "learning_rate": 0.00018270234241990108, + "loss": 1.3642, + "step": 7350 + }, + { + "epoch": 0.77542194092827, + "grad_norm": 0.7054752111434937, + "learning_rate": 0.00018253815285902074, + "loss": 1.3667, + "step": 7351 + }, + { + "epoch": 0.7755274261603375, + "grad_norm": 0.7218157052993774, + "learning_rate": 0.0001823740268850702, + "loss": 1.4124, + "step": 7352 + }, + { + "epoch": 0.7756329113924051, + "grad_norm": 0.7578182220458984, + "learning_rate": 0.0001822099645164404, + "loss": 1.3947, + "step": 7353 + }, + { + "epoch": 0.7757383966244725, + "grad_norm": 0.6781614422798157, + "learning_rate": 0.00018204596577151534, + "loss": 1.3686, + "step": 7354 + }, + { + "epoch": 0.7758438818565401, + "grad_norm": 0.6571463942527771, + "learning_rate": 0.00018188203066867178, + "loss": 1.3932, + "step": 7355 + }, + { + "epoch": 0.7759493670886076, + "grad_norm": 0.6937546133995056, + "learning_rate": 0.00018171815922627974, + "loss": 1.4268, + "step": 7356 + }, + { + "epoch": 0.7760548523206751, + "grad_norm": 0.7561089396476746, + "learning_rate": 0.00018155435146270158, + "loss": 1.3916, + "step": 7357 + }, + { + "epoch": 0.7761603375527426, + "grad_norm": 0.6956106424331665, + "learning_rate": 0.00018139060739629287, + "loss": 1.4121, + "step": 7358 + }, + { + "epoch": 0.7762658227848102, + "grad_norm": 0.7108892202377319, + "learning_rate": 0.00018122692704540194, + "loss": 1.3878, + "step": 7359 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.6968532800674438, + "learning_rate": 0.0001810633104283698, + "loss": 1.3741, + "step": 7360 + }, + { + "epoch": 0.7764767932489451, + "grad_norm": 0.7161688804626465, + "learning_rate": 0.00018089975756353083, + "loss": 1.4224, + "step": 7361 + }, + { + "epoch": 0.7765822784810127, + "grad_norm": 0.711043655872345, + "learning_rate": 0.0001807362684692119, + "loss": 1.4204, + "step": 7362 + }, + { + "epoch": 0.7766877637130801, + "grad_norm": 0.7127967476844788, + "learning_rate": 0.00018057284316373267, + "loss": 1.3599, + "step": 7363 + }, + { + "epoch": 0.7767932489451477, + "grad_norm": 0.6904404759407043, + "learning_rate": 0.00018040948166540586, + "loss": 1.3645, + "step": 7364 + }, + { + "epoch": 0.7768987341772152, + "grad_norm": 0.6428953409194946, + "learning_rate": 0.0001802461839925368, + "loss": 1.3799, + "step": 7365 + }, + { + "epoch": 0.7770042194092827, + "grad_norm": 0.6984139680862427, + "learning_rate": 0.00018008295016342383, + "loss": 1.3834, + "step": 7366 + }, + { + "epoch": 0.7771097046413502, + "grad_norm": 0.7689412236213684, + "learning_rate": 0.00017991978019635819, + "loss": 1.3907, + "step": 7367 + }, + { + "epoch": 0.7772151898734178, + "grad_norm": 0.6845264434814453, + "learning_rate": 0.00017975667410962366, + "loss": 1.364, + "step": 7368 + }, + { + "epoch": 0.7773206751054852, + "grad_norm": 0.7484418749809265, + "learning_rate": 0.00017959363192149752, + "loss": 1.4184, + "step": 7369 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.7351182699203491, + "learning_rate": 0.0001794306536502492, + "loss": 1.4304, + "step": 7370 + }, + { + "epoch": 0.7775316455696203, + "grad_norm": 0.6918825507164001, + "learning_rate": 0.0001792677393141412, + "loss": 1.4013, + "step": 7371 + }, + { + "epoch": 0.7776371308016877, + "grad_norm": 0.7601997256278992, + "learning_rate": 0.00017910488893142903, + "loss": 1.423, + "step": 7372 + }, + { + "epoch": 0.7777426160337553, + "grad_norm": 0.6885178089141846, + "learning_rate": 0.00017894210252036069, + "loss": 1.3476, + "step": 7373 + }, + { + "epoch": 0.7778481012658228, + "grad_norm": 0.6826455593109131, + "learning_rate": 0.0001787793800991774, + "loss": 1.3983, + "step": 7374 + }, + { + "epoch": 0.7779535864978903, + "grad_norm": 0.6966461539268494, + "learning_rate": 0.00017861672168611293, + "loss": 1.4216, + "step": 7375 + }, + { + "epoch": 0.7780590717299578, + "grad_norm": 0.6722185015678406, + "learning_rate": 0.0001784541272993939, + "loss": 1.3597, + "step": 7376 + }, + { + "epoch": 0.7781645569620254, + "grad_norm": 0.7042160630226135, + "learning_rate": 0.00017829159695723973, + "loss": 1.3756, + "step": 7377 + }, + { + "epoch": 0.7782700421940928, + "grad_norm": 0.7100505828857422, + "learning_rate": 0.00017812913067786313, + "loss": 1.354, + "step": 7378 + }, + { + "epoch": 0.7783755274261603, + "grad_norm": 0.6759604215621948, + "learning_rate": 0.00017796672847946905, + "loss": 1.383, + "step": 7379 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.7293667793273926, + "learning_rate": 0.0001778043903802555, + "loss": 1.3786, + "step": 7380 + }, + { + "epoch": 0.7785864978902953, + "grad_norm": 0.7247200012207031, + "learning_rate": 0.00017764211639841312, + "loss": 1.3906, + "step": 7381 + }, + { + "epoch": 0.7786919831223629, + "grad_norm": 0.6997755169868469, + "learning_rate": 0.0001774799065521257, + "loss": 1.4731, + "step": 7382 + }, + { + "epoch": 0.7787974683544304, + "grad_norm": 0.697669267654419, + "learning_rate": 0.0001773177608595696, + "loss": 1.3851, + "step": 7383 + }, + { + "epoch": 0.7789029535864979, + "grad_norm": 0.7424220442771912, + "learning_rate": 0.00017715567933891405, + "loss": 1.3807, + "step": 7384 + }, + { + "epoch": 0.7790084388185654, + "grad_norm": 0.7008427381515503, + "learning_rate": 0.0001769936620083211, + "loss": 1.3898, + "step": 7385 + }, + { + "epoch": 0.779113924050633, + "grad_norm": 0.7889910340309143, + "learning_rate": 0.0001768317088859453, + "loss": 1.3448, + "step": 7386 + }, + { + "epoch": 0.7792194092827004, + "grad_norm": 0.689349353313446, + "learning_rate": 0.0001766698199899349, + "loss": 1.4043, + "step": 7387 + }, + { + "epoch": 0.7793248945147679, + "grad_norm": 0.7657639980316162, + "learning_rate": 0.00017650799533842996, + "loss": 1.3811, + "step": 7388 + }, + { + "epoch": 0.7794303797468355, + "grad_norm": 0.7501848936080933, + "learning_rate": 0.0001763462349495639, + "loss": 1.3835, + "step": 7389 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.6892876625061035, + "learning_rate": 0.0001761845388414627, + "loss": 1.3776, + "step": 7390 + }, + { + "epoch": 0.7796413502109705, + "grad_norm": 0.7384872436523438, + "learning_rate": 0.00017602290703224525, + "loss": 1.3913, + "step": 7391 + }, + { + "epoch": 0.779746835443038, + "grad_norm": 0.7070227861404419, + "learning_rate": 0.00017586133954002308, + "loss": 1.4253, + "step": 7392 + }, + { + "epoch": 0.7798523206751055, + "grad_norm": 0.6807616949081421, + "learning_rate": 0.00017569983638290084, + "loss": 1.3937, + "step": 7393 + }, + { + "epoch": 0.779957805907173, + "grad_norm": 0.7298701405525208, + "learning_rate": 0.0001755383975789754, + "loss": 1.3909, + "step": 7394 + }, + { + "epoch": 0.7800632911392406, + "grad_norm": 0.7409301400184631, + "learning_rate": 0.00017537702314633722, + "loss": 1.3832, + "step": 7395 + }, + { + "epoch": 0.780168776371308, + "grad_norm": 0.7248406410217285, + "learning_rate": 0.00017521571310306889, + "loss": 1.4451, + "step": 7396 + }, + { + "epoch": 0.7802742616033755, + "grad_norm": 0.8014376759529114, + "learning_rate": 0.0001750544674672461, + "loss": 1.4053, + "step": 7397 + }, + { + "epoch": 0.7803797468354431, + "grad_norm": 0.727973222732544, + "learning_rate": 0.00017489328625693715, + "loss": 1.3704, + "step": 7398 + }, + { + "epoch": 0.7804852320675105, + "grad_norm": 0.731758177280426, + "learning_rate": 0.00017473216949020326, + "loss": 1.3932, + "step": 7399 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.7980003356933594, + "learning_rate": 0.00017457111718509831, + "loss": 1.4156, + "step": 7400 + }, + { + "epoch": 0.7806962025316456, + "grad_norm": 0.7521733045578003, + "learning_rate": 0.00017441012935966898, + "loss": 1.3461, + "step": 7401 + }, + { + "epoch": 0.7808016877637131, + "grad_norm": 0.7921962141990662, + "learning_rate": 0.00017424920603195483, + "loss": 1.3983, + "step": 7402 + }, + { + "epoch": 0.7809071729957806, + "grad_norm": 0.7201323509216309, + "learning_rate": 0.0001740883472199879, + "loss": 1.3631, + "step": 7403 + }, + { + "epoch": 0.7810126582278482, + "grad_norm": 0.7279659509658813, + "learning_rate": 0.00017392755294179363, + "loss": 1.3935, + "step": 7404 + }, + { + "epoch": 0.7811181434599156, + "grad_norm": 0.8026231527328491, + "learning_rate": 0.0001737668232153896, + "loss": 1.3753, + "step": 7405 + }, + { + "epoch": 0.7812236286919831, + "grad_norm": 0.6746460795402527, + "learning_rate": 0.00017360615805878636, + "loss": 1.379, + "step": 7406 + }, + { + "epoch": 0.7813291139240506, + "grad_norm": 0.6630092263221741, + "learning_rate": 0.00017344555748998727, + "loss": 1.3755, + "step": 7407 + }, + { + "epoch": 0.7814345991561181, + "grad_norm": 0.7110109925270081, + "learning_rate": 0.0001732850215269885, + "loss": 1.3985, + "step": 7408 + }, + { + "epoch": 0.7815400843881857, + "grad_norm": 0.6747986078262329, + "learning_rate": 0.0001731245501877787, + "loss": 1.3985, + "step": 7409 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.6825429797172546, + "learning_rate": 0.00017296414349033976, + "loss": 1.3672, + "step": 7410 + }, + { + "epoch": 0.7817510548523207, + "grad_norm": 0.6850488781929016, + "learning_rate": 0.0001728038014526458, + "loss": 1.3992, + "step": 7411 + }, + { + "epoch": 0.7818565400843882, + "grad_norm": 0.6688708066940308, + "learning_rate": 0.00017264352409266385, + "loss": 1.3971, + "step": 7412 + }, + { + "epoch": 0.7819620253164556, + "grad_norm": 0.7323389649391174, + "learning_rate": 0.0001724833114283542, + "loss": 1.3909, + "step": 7413 + }, + { + "epoch": 0.7820675105485232, + "grad_norm": 0.7177338004112244, + "learning_rate": 0.0001723231634776693, + "loss": 1.3883, + "step": 7414 + }, + { + "epoch": 0.7821729957805907, + "grad_norm": 0.8301907181739807, + "learning_rate": 0.0001721630802585545, + "loss": 1.3579, + "step": 7415 + }, + { + "epoch": 0.7822784810126582, + "grad_norm": 0.6672830581665039, + "learning_rate": 0.00017200306178894785, + "loss": 1.3397, + "step": 7416 + }, + { + "epoch": 0.7823839662447257, + "grad_norm": 0.6714862585067749, + "learning_rate": 0.00017184310808678028, + "loss": 1.3595, + "step": 7417 + }, + { + "epoch": 0.7824894514767933, + "grad_norm": 0.6968562602996826, + "learning_rate": 0.00017168321916997547, + "loss": 1.4225, + "step": 7418 + }, + { + "epoch": 0.7825949367088607, + "grad_norm": 0.6877459287643433, + "learning_rate": 0.00017152339505644963, + "loss": 1.4134, + "step": 7419 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.6652035713195801, + "learning_rate": 0.00017136363576411172, + "loss": 1.3701, + "step": 7420 + }, + { + "epoch": 0.7828059071729958, + "grad_norm": 0.6730415225028992, + "learning_rate": 0.00017120394131086398, + "loss": 1.3791, + "step": 7421 + }, + { + "epoch": 0.7829113924050632, + "grad_norm": 0.7242416739463806, + "learning_rate": 0.00017104431171460077, + "loss": 1.3815, + "step": 7422 + }, + { + "epoch": 0.7830168776371308, + "grad_norm": 0.6455191373825073, + "learning_rate": 0.0001708847469932093, + "loss": 1.3497, + "step": 7423 + }, + { + "epoch": 0.7831223628691983, + "grad_norm": 0.6754509210586548, + "learning_rate": 0.00017072524716456975, + "loss": 1.3841, + "step": 7424 + }, + { + "epoch": 0.7832278481012658, + "grad_norm": 0.6878651976585388, + "learning_rate": 0.00017056581224655473, + "loss": 1.383, + "step": 7425 + }, + { + "epoch": 0.7833333333333333, + "grad_norm": 0.769875705242157, + "learning_rate": 0.0001704064422570298, + "loss": 1.3348, + "step": 7426 + }, + { + "epoch": 0.7834388185654009, + "grad_norm": 0.7831520438194275, + "learning_rate": 0.0001702471372138531, + "loss": 1.3634, + "step": 7427 + }, + { + "epoch": 0.7835443037974683, + "grad_norm": 0.6769765615463257, + "learning_rate": 0.00017008789713487558, + "loss": 1.3578, + "step": 7428 + }, + { + "epoch": 0.7836497890295359, + "grad_norm": 0.7067611217498779, + "learning_rate": 0.0001699287220379407, + "loss": 1.3801, + "step": 7429 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.6433848738670349, + "learning_rate": 0.00016976961194088526, + "loss": 1.388, + "step": 7430 + }, + { + "epoch": 0.7838607594936708, + "grad_norm": 0.6924077272415161, + "learning_rate": 0.000169610566861538, + "loss": 1.3689, + "step": 7431 + }, + { + "epoch": 0.7839662447257384, + "grad_norm": 0.6957792639732361, + "learning_rate": 0.0001694515868177209, + "loss": 1.368, + "step": 7432 + }, + { + "epoch": 0.7840717299578059, + "grad_norm": 0.7067357897758484, + "learning_rate": 0.0001692926718272483, + "loss": 1.37, + "step": 7433 + }, + { + "epoch": 0.7841772151898734, + "grad_norm": 0.7213476300239563, + "learning_rate": 0.00016913382190792754, + "loss": 1.3956, + "step": 7434 + }, + { + "epoch": 0.7842827004219409, + "grad_norm": 0.6792110204696655, + "learning_rate": 0.0001689750370775584, + "loss": 1.3713, + "step": 7435 + }, + { + "epoch": 0.7843881856540085, + "grad_norm": 0.6819467544555664, + "learning_rate": 0.00016881631735393368, + "loss": 1.343, + "step": 7436 + }, + { + "epoch": 0.7844936708860759, + "grad_norm": 0.7307490110397339, + "learning_rate": 0.00016865766275483865, + "loss": 1.3941, + "step": 7437 + }, + { + "epoch": 0.7845991561181435, + "grad_norm": 0.6972238421440125, + "learning_rate": 0.00016849907329805118, + "loss": 1.3618, + "step": 7438 + }, + { + "epoch": 0.784704641350211, + "grad_norm": 0.6873337626457214, + "learning_rate": 0.00016834054900134228, + "loss": 1.4078, + "step": 7439 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.7057613730430603, + "learning_rate": 0.00016818208988247533, + "loss": 1.3771, + "step": 7440 + }, + { + "epoch": 0.784915611814346, + "grad_norm": 0.6673192381858826, + "learning_rate": 0.00016802369595920647, + "loss": 1.3943, + "step": 7441 + }, + { + "epoch": 0.7850210970464135, + "grad_norm": 0.7398115396499634, + "learning_rate": 0.00016786536724928432, + "loss": 1.3714, + "step": 7442 + }, + { + "epoch": 0.785126582278481, + "grad_norm": 0.6965714693069458, + "learning_rate": 0.00016770710377045074, + "loss": 1.3418, + "step": 7443 + }, + { + "epoch": 0.7852320675105485, + "grad_norm": 0.6536018252372742, + "learning_rate": 0.00016754890554043965, + "loss": 1.3663, + "step": 7444 + }, + { + "epoch": 0.7853375527426161, + "grad_norm": 0.6975189447402954, + "learning_rate": 0.00016739077257697804, + "loss": 1.3853, + "step": 7445 + }, + { + "epoch": 0.7854430379746835, + "grad_norm": 0.6970981359481812, + "learning_rate": 0.0001672327048977856, + "loss": 1.3815, + "step": 7446 + }, + { + "epoch": 0.7855485232067511, + "grad_norm": 0.7008918523788452, + "learning_rate": 0.00016707470252057423, + "loss": 1.4027, + "step": 7447 + }, + { + "epoch": 0.7856540084388186, + "grad_norm": 0.7167918086051941, + "learning_rate": 0.00016691676546304936, + "loss": 1.3654, + "step": 7448 + }, + { + "epoch": 0.785759493670886, + "grad_norm": 0.7730286121368408, + "learning_rate": 0.00016675889374290852, + "loss": 1.3985, + "step": 7449 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.7166760563850403, + "learning_rate": 0.0001666010873778419, + "loss": 1.391, + "step": 7450 + }, + { + "epoch": 0.7859704641350211, + "grad_norm": 0.7125458717346191, + "learning_rate": 0.0001664433463855325, + "loss": 1.4025, + "step": 7451 + }, + { + "epoch": 0.7860759493670886, + "grad_norm": 0.705710232257843, + "learning_rate": 0.00016628567078365612, + "loss": 1.3935, + "step": 7452 + }, + { + "epoch": 0.7861814345991561, + "grad_norm": 0.6949275135993958, + "learning_rate": 0.00016612806058988088, + "loss": 1.3729, + "step": 7453 + }, + { + "epoch": 0.7862869198312237, + "grad_norm": 0.7198405265808105, + "learning_rate": 0.0001659705158218679, + "loss": 1.3923, + "step": 7454 + }, + { + "epoch": 0.7863924050632911, + "grad_norm": 0.6922445893287659, + "learning_rate": 0.00016581303649727076, + "loss": 1.3928, + "step": 7455 + }, + { + "epoch": 0.7864978902953587, + "grad_norm": 0.726126492023468, + "learning_rate": 0.000165655622633736, + "loss": 1.3896, + "step": 7456 + }, + { + "epoch": 0.7866033755274262, + "grad_norm": 0.7390122413635254, + "learning_rate": 0.00016549827424890257, + "loss": 1.3859, + "step": 7457 + }, + { + "epoch": 0.7867088607594936, + "grad_norm": 0.6998476386070251, + "learning_rate": 0.00016534099136040207, + "loss": 1.3918, + "step": 7458 + }, + { + "epoch": 0.7868143459915612, + "grad_norm": 0.7870787978172302, + "learning_rate": 0.0001651837739858589, + "loss": 1.4069, + "step": 7459 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.6813379526138306, + "learning_rate": 0.00016502662214289, + "loss": 1.3792, + "step": 7460 + }, + { + "epoch": 0.7870253164556962, + "grad_norm": 0.898655354976654, + "learning_rate": 0.000164869535849105, + "loss": 1.3971, + "step": 7461 + }, + { + "epoch": 0.7871308016877637, + "grad_norm": 0.7477975487709045, + "learning_rate": 0.00016471251512210626, + "loss": 1.3746, + "step": 7462 + }, + { + "epoch": 0.7872362869198313, + "grad_norm": 0.7416803240776062, + "learning_rate": 0.00016455555997948868, + "loss": 1.3817, + "step": 7463 + }, + { + "epoch": 0.7873417721518987, + "grad_norm": 0.7443211674690247, + "learning_rate": 0.0001643986704388397, + "loss": 1.356, + "step": 7464 + }, + { + "epoch": 0.7874472573839663, + "grad_norm": 0.7021069526672363, + "learning_rate": 0.00016424184651773997, + "loss": 1.3891, + "step": 7465 + }, + { + "epoch": 0.7875527426160338, + "grad_norm": 0.677893877029419, + "learning_rate": 0.0001640850882337622, + "loss": 1.4004, + "step": 7466 + }, + { + "epoch": 0.7876582278481012, + "grad_norm": 1.0726814270019531, + "learning_rate": 0.00016392839560447196, + "loss": 1.3646, + "step": 7467 + }, + { + "epoch": 0.7877637130801688, + "grad_norm": 0.7061154246330261, + "learning_rate": 0.00016377176864742734, + "loss": 1.3991, + "step": 7468 + }, + { + "epoch": 0.7878691983122363, + "grad_norm": 0.8995250463485718, + "learning_rate": 0.00016361520738017934, + "loss": 1.364, + "step": 7469 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.8205491304397583, + "learning_rate": 0.00016345871182027124, + "loss": 1.3891, + "step": 7470 + }, + { + "epoch": 0.7880801687763713, + "grad_norm": 0.7223994135856628, + "learning_rate": 0.00016330228198523927, + "loss": 1.39, + "step": 7471 + }, + { + "epoch": 0.7881856540084389, + "grad_norm": 0.7799689769744873, + "learning_rate": 0.00016314591789261216, + "loss": 1.3564, + "step": 7472 + }, + { + "epoch": 0.7882911392405063, + "grad_norm": 0.8581148982048035, + "learning_rate": 0.00016298961955991105, + "loss": 1.4141, + "step": 7473 + }, + { + "epoch": 0.7883966244725739, + "grad_norm": 0.7899479269981384, + "learning_rate": 0.00016283338700465034, + "loss": 1.3701, + "step": 7474 + }, + { + "epoch": 0.7885021097046413, + "grad_norm": 0.7849689722061157, + "learning_rate": 0.00016267722024433654, + "loss": 1.3798, + "step": 7475 + }, + { + "epoch": 0.7886075949367088, + "grad_norm": 0.9586749076843262, + "learning_rate": 0.0001625211192964688, + "loss": 1.396, + "step": 7476 + }, + { + "epoch": 0.7887130801687764, + "grad_norm": 0.6836292147636414, + "learning_rate": 0.00016236508417853917, + "loss": 1.426, + "step": 7477 + }, + { + "epoch": 0.7888185654008438, + "grad_norm": 0.6748135089874268, + "learning_rate": 0.00016220911490803206, + "loss": 1.3844, + "step": 7478 + }, + { + "epoch": 0.7889240506329114, + "grad_norm": 0.8035507798194885, + "learning_rate": 0.00016205321150242454, + "loss": 1.3507, + "step": 7479 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.8345606327056885, + "learning_rate": 0.00016189737397918653, + "loss": 1.3969, + "step": 7480 + }, + { + "epoch": 0.7891350210970464, + "grad_norm": 0.6970751881599426, + "learning_rate": 0.00016174160235578, + "loss": 1.3894, + "step": 7481 + }, + { + "epoch": 0.7892405063291139, + "grad_norm": 0.7889783382415771, + "learning_rate": 0.00016158589664966053, + "loss": 1.4259, + "step": 7482 + }, + { + "epoch": 0.7893459915611815, + "grad_norm": 0.7183694243431091, + "learning_rate": 0.00016143025687827538, + "loss": 1.3851, + "step": 7483 + }, + { + "epoch": 0.7894514767932489, + "grad_norm": 0.8967047929763794, + "learning_rate": 0.0001612746830590649, + "loss": 1.4108, + "step": 7484 + }, + { + "epoch": 0.7895569620253164, + "grad_norm": 0.6970764398574829, + "learning_rate": 0.00016111917520946175, + "loss": 1.402, + "step": 7485 + }, + { + "epoch": 0.789662447257384, + "grad_norm": 0.6657581329345703, + "learning_rate": 0.00016096373334689154, + "loss": 1.3612, + "step": 7486 + }, + { + "epoch": 0.7897679324894514, + "grad_norm": 0.6841005682945251, + "learning_rate": 0.00016080835748877214, + "loss": 1.3865, + "step": 7487 + }, + { + "epoch": 0.789873417721519, + "grad_norm": 0.7344062328338623, + "learning_rate": 0.00016065304765251423, + "loss": 1.3983, + "step": 7488 + }, + { + "epoch": 0.7899789029535865, + "grad_norm": 0.6937626600265503, + "learning_rate": 0.00016049780385552113, + "loss": 1.3931, + "step": 7489 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.6732619404792786, + "learning_rate": 0.0001603426261151884, + "loss": 1.3925, + "step": 7490 + }, + { + "epoch": 0.7901898734177215, + "grad_norm": 0.6704684495925903, + "learning_rate": 0.000160187514448905, + "loss": 1.3633, + "step": 7491 + }, + { + "epoch": 0.7902953586497891, + "grad_norm": 0.7464132308959961, + "learning_rate": 0.0001600324688740516, + "loss": 1.4084, + "step": 7492 + }, + { + "epoch": 0.7904008438818565, + "grad_norm": 0.6715165376663208, + "learning_rate": 0.00015987748940800186, + "loss": 1.395, + "step": 7493 + }, + { + "epoch": 0.790506329113924, + "grad_norm": 0.701321005821228, + "learning_rate": 0.0001597225760681221, + "loss": 1.3903, + "step": 7494 + }, + { + "epoch": 0.7906118143459916, + "grad_norm": 0.6843594908714294, + "learning_rate": 0.00015956772887177115, + "loss": 1.4272, + "step": 7495 + }, + { + "epoch": 0.790717299578059, + "grad_norm": 0.6485147476196289, + "learning_rate": 0.00015941294783630022, + "loss": 1.3466, + "step": 7496 + }, + { + "epoch": 0.7908227848101266, + "grad_norm": 0.6886973977088928, + "learning_rate": 0.00015925823297905346, + "loss": 1.3763, + "step": 7497 + }, + { + "epoch": 0.7909282700421941, + "grad_norm": 0.7110106945037842, + "learning_rate": 0.00015910358431736745, + "loss": 1.3948, + "step": 7498 + }, + { + "epoch": 0.7910337552742616, + "grad_norm": 0.7311848998069763, + "learning_rate": 0.00015894900186857105, + "loss": 1.3633, + "step": 7499 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.7131513953208923, + "learning_rate": 0.00015879448564998648, + "loss": 1.4299, + "step": 7500 + }, + { + "epoch": 0.7912447257383967, + "grad_norm": 0.7179650068283081, + "learning_rate": 0.00015864003567892776, + "loss": 1.4388, + "step": 7501 + }, + { + "epoch": 0.7913502109704641, + "grad_norm": 0.6735038757324219, + "learning_rate": 0.00015848565197270175, + "loss": 1.397, + "step": 7502 + }, + { + "epoch": 0.7914556962025316, + "grad_norm": 0.7069321274757385, + "learning_rate": 0.00015833133454860814, + "loss": 1.3926, + "step": 7503 + }, + { + "epoch": 0.7915611814345992, + "grad_norm": 0.7596351504325867, + "learning_rate": 0.00015817708342393878, + "loss": 1.4414, + "step": 7504 + }, + { + "epoch": 0.7916666666666666, + "grad_norm": 0.7026251554489136, + "learning_rate": 0.0001580228986159783, + "loss": 1.3717, + "step": 7505 + }, + { + "epoch": 0.7917721518987342, + "grad_norm": 0.711010754108429, + "learning_rate": 0.00015786878014200387, + "loss": 1.407, + "step": 7506 + }, + { + "epoch": 0.7918776371308017, + "grad_norm": 0.8002294301986694, + "learning_rate": 0.0001577147280192851, + "loss": 1.422, + "step": 7507 + }, + { + "epoch": 0.7919831223628692, + "grad_norm": 0.9395539164543152, + "learning_rate": 0.0001575607422650846, + "loss": 1.3708, + "step": 7508 + }, + { + "epoch": 0.7920886075949367, + "grad_norm": 0.6869181394577026, + "learning_rate": 0.00015740682289665714, + "loss": 1.3429, + "step": 7509 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.7753698825836182, + "learning_rate": 0.0001572529699312501, + "loss": 1.3844, + "step": 7510 + }, + { + "epoch": 0.7922995780590717, + "grad_norm": 0.9703484177589417, + "learning_rate": 0.0001570991833861035, + "loss": 1.3728, + "step": 7511 + }, + { + "epoch": 0.7924050632911392, + "grad_norm": 0.6907810568809509, + "learning_rate": 0.00015694546327844986, + "loss": 1.3774, + "step": 7512 + }, + { + "epoch": 0.7925105485232068, + "grad_norm": 0.7310986518859863, + "learning_rate": 0.00015679180962551435, + "loss": 1.3701, + "step": 7513 + }, + { + "epoch": 0.7926160337552742, + "grad_norm": 0.8856257200241089, + "learning_rate": 0.00015663822244451446, + "loss": 1.3923, + "step": 7514 + }, + { + "epoch": 0.7927215189873418, + "grad_norm": 0.7399455904960632, + "learning_rate": 0.00015648470175266057, + "loss": 1.3568, + "step": 7515 + }, + { + "epoch": 0.7928270042194093, + "grad_norm": 0.7344725728034973, + "learning_rate": 0.00015633124756715523, + "loss": 1.4245, + "step": 7516 + }, + { + "epoch": 0.7929324894514768, + "grad_norm": 0.887397050857544, + "learning_rate": 0.00015617785990519403, + "loss": 1.3611, + "step": 7517 + }, + { + "epoch": 0.7930379746835443, + "grad_norm": 0.7231271266937256, + "learning_rate": 0.00015602453878396479, + "loss": 1.3881, + "step": 7518 + }, + { + "epoch": 0.7931434599156119, + "grad_norm": 0.6819596290588379, + "learning_rate": 0.0001558712842206477, + "loss": 1.388, + "step": 7519 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.7717449069023132, + "learning_rate": 0.0001557180962324158, + "loss": 1.3655, + "step": 7520 + }, + { + "epoch": 0.7933544303797468, + "grad_norm": 0.6695538759231567, + "learning_rate": 0.00015556497483643466, + "loss": 1.364, + "step": 7521 + }, + { + "epoch": 0.7934599156118144, + "grad_norm": 0.6666560769081116, + "learning_rate": 0.00015541192004986222, + "loss": 1.3822, + "step": 7522 + }, + { + "epoch": 0.7935654008438818, + "grad_norm": 0.7012074589729309, + "learning_rate": 0.00015525893188984898, + "loss": 1.3676, + "step": 7523 + }, + { + "epoch": 0.7936708860759494, + "grad_norm": 0.7345492839813232, + "learning_rate": 0.00015510601037353804, + "loss": 1.3906, + "step": 7524 + }, + { + "epoch": 0.7937763713080169, + "grad_norm": 0.7293058037757874, + "learning_rate": 0.00015495315551806486, + "loss": 1.4011, + "step": 7525 + }, + { + "epoch": 0.7938818565400844, + "grad_norm": 0.6643821001052856, + "learning_rate": 0.000154800367340558, + "loss": 1.3754, + "step": 7526 + }, + { + "epoch": 0.7939873417721519, + "grad_norm": 0.7211504578590393, + "learning_rate": 0.00015464764585813783, + "loss": 1.3737, + "step": 7527 + }, + { + "epoch": 0.7940928270042195, + "grad_norm": 0.6887285113334656, + "learning_rate": 0.0001544949910879177, + "loss": 1.3748, + "step": 7528 + }, + { + "epoch": 0.7941983122362869, + "grad_norm": 0.7710700631141663, + "learning_rate": 0.00015434240304700332, + "loss": 1.382, + "step": 7529 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.6994560956954956, + "learning_rate": 0.00015418988175249282, + "loss": 1.3914, + "step": 7530 + }, + { + "epoch": 0.794409282700422, + "grad_norm": 0.7011643648147583, + "learning_rate": 0.00015403742722147707, + "loss": 1.4015, + "step": 7531 + }, + { + "epoch": 0.7945147679324894, + "grad_norm": 0.6858746409416199, + "learning_rate": 0.00015388503947103937, + "loss": 1.3706, + "step": 7532 + }, + { + "epoch": 0.794620253164557, + "grad_norm": 0.7416503429412842, + "learning_rate": 0.00015373271851825527, + "loss": 1.3991, + "step": 7533 + }, + { + "epoch": 0.7947257383966245, + "grad_norm": 0.6881956458091736, + "learning_rate": 0.00015358046438019356, + "loss": 1.3965, + "step": 7534 + }, + { + "epoch": 0.794831223628692, + "grad_norm": 0.7122061848640442, + "learning_rate": 0.00015342827707391475, + "loss": 1.3409, + "step": 7535 + }, + { + "epoch": 0.7949367088607595, + "grad_norm": 0.7257264852523804, + "learning_rate": 0.0001532761566164723, + "loss": 1.3493, + "step": 7536 + }, + { + "epoch": 0.7950421940928271, + "grad_norm": 0.6426222324371338, + "learning_rate": 0.0001531241030249121, + "loss": 1.3888, + "step": 7537 + }, + { + "epoch": 0.7951476793248945, + "grad_norm": 0.7509781122207642, + "learning_rate": 0.00015297211631627234, + "loss": 1.3746, + "step": 7538 + }, + { + "epoch": 0.795253164556962, + "grad_norm": 0.7453066110610962, + "learning_rate": 0.0001528201965075841, + "loss": 1.3646, + "step": 7539 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.8515508770942688, + "learning_rate": 0.00015266834361587063, + "loss": 1.3872, + "step": 7540 + }, + { + "epoch": 0.795464135021097, + "grad_norm": 0.7762847542762756, + "learning_rate": 0.00015251655765814777, + "loss": 1.3368, + "step": 7541 + }, + { + "epoch": 0.7955696202531646, + "grad_norm": 0.702143669128418, + "learning_rate": 0.000152364838651424, + "loss": 1.3731, + "step": 7542 + }, + { + "epoch": 0.795675105485232, + "grad_norm": 0.8602166175842285, + "learning_rate": 0.00015221318661269985, + "loss": 1.3814, + "step": 7543 + }, + { + "epoch": 0.7957805907172996, + "grad_norm": 0.7377411127090454, + "learning_rate": 0.00015206160155896924, + "loss": 1.3689, + "step": 7544 + }, + { + "epoch": 0.7958860759493671, + "grad_norm": 0.7464576363563538, + "learning_rate": 0.00015191008350721772, + "loss": 1.3933, + "step": 7545 + }, + { + "epoch": 0.7959915611814345, + "grad_norm": 0.7360361814498901, + "learning_rate": 0.00015175863247442374, + "loss": 1.3968, + "step": 7546 + }, + { + "epoch": 0.7960970464135021, + "grad_norm": 0.7676724195480347, + "learning_rate": 0.00015160724847755806, + "loss": 1.3568, + "step": 7547 + }, + { + "epoch": 0.7962025316455696, + "grad_norm": 0.6703827977180481, + "learning_rate": 0.00015145593153358412, + "loss": 1.3579, + "step": 7548 + }, + { + "epoch": 0.7963080168776371, + "grad_norm": 0.7659571766853333, + "learning_rate": 0.0001513046816594575, + "loss": 1.3888, + "step": 7549 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.7318093776702881, + "learning_rate": 0.00015115349887212678, + "loss": 1.3894, + "step": 7550 + }, + { + "epoch": 0.7965189873417722, + "grad_norm": 0.7462074756622314, + "learning_rate": 0.00015100238318853262, + "loss": 1.368, + "step": 7551 + }, + { + "epoch": 0.7966244725738396, + "grad_norm": 0.6993612051010132, + "learning_rate": 0.00015085133462560833, + "loss": 1.4049, + "step": 7552 + }, + { + "epoch": 0.7967299578059072, + "grad_norm": 0.6898479461669922, + "learning_rate": 0.00015070035320027933, + "loss": 1.405, + "step": 7553 + }, + { + "epoch": 0.7968354430379747, + "grad_norm": 0.6769663691520691, + "learning_rate": 0.00015054943892946446, + "loss": 1.357, + "step": 7554 + }, + { + "epoch": 0.7969409282700421, + "grad_norm": 0.7765471935272217, + "learning_rate": 0.000150398591830074, + "loss": 1.3258, + "step": 7555 + }, + { + "epoch": 0.7970464135021097, + "grad_norm": 0.8215624690055847, + "learning_rate": 0.00015024781191901122, + "loss": 1.4225, + "step": 7556 + }, + { + "epoch": 0.7971518987341772, + "grad_norm": 0.6515848636627197, + "learning_rate": 0.00015009709921317172, + "loss": 1.3808, + "step": 7557 + }, + { + "epoch": 0.7972573839662447, + "grad_norm": 0.8926091194152832, + "learning_rate": 0.00014994645372944367, + "loss": 1.353, + "step": 7558 + }, + { + "epoch": 0.7973628691983122, + "grad_norm": 0.738673746585846, + "learning_rate": 0.0001497958754847076, + "loss": 1.3707, + "step": 7559 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.7754366397857666, + "learning_rate": 0.00014964536449583657, + "loss": 1.389, + "step": 7560 + }, + { + "epoch": 0.7975738396624472, + "grad_norm": 0.8003454208374023, + "learning_rate": 0.0001494949207796961, + "loss": 1.374, + "step": 7561 + }, + { + "epoch": 0.7976793248945148, + "grad_norm": 0.8237720727920532, + "learning_rate": 0.00014934454435314417, + "loss": 1.4107, + "step": 7562 + }, + { + "epoch": 0.7977848101265823, + "grad_norm": 0.8332294225692749, + "learning_rate": 0.00014919423523303095, + "loss": 1.382, + "step": 7563 + }, + { + "epoch": 0.7978902953586497, + "grad_norm": 0.674927830696106, + "learning_rate": 0.00014904399343619972, + "loss": 1.3888, + "step": 7564 + }, + { + "epoch": 0.7979957805907173, + "grad_norm": 0.8832077980041504, + "learning_rate": 0.00014889381897948575, + "loss": 1.3994, + "step": 7565 + }, + { + "epoch": 0.7981012658227848, + "grad_norm": 0.9695097804069519, + "learning_rate": 0.00014874371187971672, + "loss": 1.3618, + "step": 7566 + }, + { + "epoch": 0.7982067510548523, + "grad_norm": 0.7884697318077087, + "learning_rate": 0.00014859367215371293, + "loss": 1.4175, + "step": 7567 + }, + { + "epoch": 0.7983122362869198, + "grad_norm": 1.0408929586410522, + "learning_rate": 0.00014844369981828698, + "loss": 1.3856, + "step": 7568 + }, + { + "epoch": 0.7984177215189874, + "grad_norm": 1.1073236465454102, + "learning_rate": 0.00014829379489024415, + "loss": 1.4175, + "step": 7569 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.7634444236755371, + "learning_rate": 0.00014814395738638195, + "loss": 1.3779, + "step": 7570 + }, + { + "epoch": 0.7986286919831224, + "grad_norm": 0.7541136741638184, + "learning_rate": 0.0001479941873234905, + "loss": 1.3415, + "step": 7571 + }, + { + "epoch": 0.7987341772151899, + "grad_norm": 1.0558245182037354, + "learning_rate": 0.00014784448471835224, + "loss": 1.397, + "step": 7572 + }, + { + "epoch": 0.7988396624472573, + "grad_norm": 0.7092965841293335, + "learning_rate": 0.0001476948495877418, + "loss": 1.3869, + "step": 7573 + }, + { + "epoch": 0.7989451476793249, + "grad_norm": 0.6887006759643555, + "learning_rate": 0.00014754528194842707, + "loss": 1.3995, + "step": 7574 + }, + { + "epoch": 0.7990506329113924, + "grad_norm": 1.0013883113861084, + "learning_rate": 0.00014739578181716765, + "loss": 1.3632, + "step": 7575 + }, + { + "epoch": 0.7991561181434599, + "grad_norm": 0.7191919684410095, + "learning_rate": 0.00014724634921071573, + "loss": 1.3848, + "step": 7576 + }, + { + "epoch": 0.7992616033755274, + "grad_norm": 0.741167426109314, + "learning_rate": 0.0001470969841458159, + "loss": 1.3908, + "step": 7577 + }, + { + "epoch": 0.799367088607595, + "grad_norm": 0.8235173225402832, + "learning_rate": 0.00014694768663920537, + "loss": 1.3969, + "step": 7578 + }, + { + "epoch": 0.7994725738396624, + "grad_norm": 0.6937182545661926, + "learning_rate": 0.0001467984567076137, + "loss": 1.4092, + "step": 7579 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.7069861888885498, + "learning_rate": 0.00014664929436776278, + "loss": 1.3721, + "step": 7580 + }, + { + "epoch": 0.7996835443037975, + "grad_norm": 0.7528973817825317, + "learning_rate": 0.00014650019963636696, + "loss": 1.3671, + "step": 7581 + }, + { + "epoch": 0.799789029535865, + "grad_norm": 0.738186240196228, + "learning_rate": 0.0001463511725301331, + "loss": 1.4072, + "step": 7582 + }, + { + "epoch": 0.7998945147679325, + "grad_norm": 0.6596523523330688, + "learning_rate": 0.00014620221306576027, + "loss": 1.3996, + "step": 7583 + }, + { + "epoch": 0.8, + "grad_norm": 0.7706806659698486, + "learning_rate": 0.00014605332125994038, + "loss": 1.371, + "step": 7584 + }, + { + "epoch": 0.8001054852320675, + "grad_norm": 0.7219576835632324, + "learning_rate": 0.0001459044971293575, + "loss": 1.3861, + "step": 7585 + }, + { + "epoch": 0.800210970464135, + "grad_norm": 0.6557290554046631, + "learning_rate": 0.000145755740690688, + "loss": 1.376, + "step": 7586 + }, + { + "epoch": 0.8003164556962026, + "grad_norm": 0.755409300327301, + "learning_rate": 0.00014560705196060074, + "loss": 1.365, + "step": 7587 + }, + { + "epoch": 0.80042194092827, + "grad_norm": 0.8157063722610474, + "learning_rate": 0.00014545843095575709, + "loss": 1.3784, + "step": 7588 + }, + { + "epoch": 0.8005274261603376, + "grad_norm": 0.7712364196777344, + "learning_rate": 0.00014530987769281075, + "loss": 1.3974, + "step": 7589 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.7578460574150085, + "learning_rate": 0.00014516139218840788, + "loss": 1.3594, + "step": 7590 + }, + { + "epoch": 0.8007383966244725, + "grad_norm": 0.7801294922828674, + "learning_rate": 0.00014501297445918703, + "loss": 1.3868, + "step": 7591 + }, + { + "epoch": 0.8008438818565401, + "grad_norm": 0.8391979336738586, + "learning_rate": 0.00014486462452177896, + "loss": 1.403, + "step": 7592 + }, + { + "epoch": 0.8009493670886076, + "grad_norm": 0.7435699105262756, + "learning_rate": 0.0001447163423928073, + "loss": 1.4123, + "step": 7593 + }, + { + "epoch": 0.8010548523206751, + "grad_norm": 0.8387777805328369, + "learning_rate": 0.00014456812808888775, + "loss": 1.37, + "step": 7594 + }, + { + "epoch": 0.8011603375527426, + "grad_norm": 0.7251166105270386, + "learning_rate": 0.00014441998162662847, + "loss": 1.4369, + "step": 7595 + }, + { + "epoch": 0.8012658227848102, + "grad_norm": 0.6653761267662048, + "learning_rate": 0.00014427190302262989, + "loss": 1.3592, + "step": 7596 + }, + { + "epoch": 0.8013713080168776, + "grad_norm": 0.7325983047485352, + "learning_rate": 0.00014412389229348494, + "loss": 1.3296, + "step": 7597 + }, + { + "epoch": 0.8014767932489452, + "grad_norm": 0.7710458636283875, + "learning_rate": 0.00014397594945577912, + "loss": 1.3612, + "step": 7598 + }, + { + "epoch": 0.8015822784810127, + "grad_norm": 0.7122901678085327, + "learning_rate": 0.00014382807452609003, + "loss": 1.3864, + "step": 7599 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.6541584134101868, + "learning_rate": 0.00014368026752098782, + "loss": 1.3603, + "step": 7600 + }, + { + "epoch": 0.8017932489451477, + "grad_norm": 0.6866434812545776, + "learning_rate": 0.00014353252845703506, + "loss": 1.4145, + "step": 7601 + }, + { + "epoch": 0.8018987341772152, + "grad_norm": 0.94842928647995, + "learning_rate": 0.00014338485735078632, + "loss": 1.3765, + "step": 7602 + }, + { + "epoch": 0.8020042194092827, + "grad_norm": 0.7136005163192749, + "learning_rate": 0.0001432372542187895, + "loss": 1.4308, + "step": 7603 + }, + { + "epoch": 0.8021097046413502, + "grad_norm": 0.829378068447113, + "learning_rate": 0.00014308971907758383, + "loss": 1.4083, + "step": 7604 + }, + { + "epoch": 0.8022151898734177, + "grad_norm": 0.7111039161682129, + "learning_rate": 0.00014294225194370154, + "loss": 1.3747, + "step": 7605 + }, + { + "epoch": 0.8023206751054852, + "grad_norm": 0.7395954728126526, + "learning_rate": 0.00014279485283366696, + "loss": 1.3976, + "step": 7606 + }, + { + "epoch": 0.8024261603375528, + "grad_norm": 0.6966091394424438, + "learning_rate": 0.00014264752176399687, + "loss": 1.3669, + "step": 7607 + }, + { + "epoch": 0.8025316455696202, + "grad_norm": 0.7639530897140503, + "learning_rate": 0.0001425002587512005, + "loss": 1.36, + "step": 7608 + }, + { + "epoch": 0.8026371308016877, + "grad_norm": 0.6816637516021729, + "learning_rate": 0.00014235306381177952, + "loss": 1.4017, + "step": 7609 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.7088680863380432, + "learning_rate": 0.00014220593696222768, + "loss": 1.4109, + "step": 7610 + }, + { + "epoch": 0.8028481012658227, + "grad_norm": 0.7258879542350769, + "learning_rate": 0.00014205887821903105, + "loss": 1.3528, + "step": 7611 + }, + { + "epoch": 0.8029535864978903, + "grad_norm": 0.6803544759750366, + "learning_rate": 0.00014191188759866887, + "loss": 1.393, + "step": 7612 + }, + { + "epoch": 0.8030590717299578, + "grad_norm": 0.7126730680465698, + "learning_rate": 0.00014176496511761192, + "loss": 1.3591, + "step": 7613 + }, + { + "epoch": 0.8031645569620253, + "grad_norm": 0.6419739127159119, + "learning_rate": 0.0001416181107923235, + "loss": 1.3554, + "step": 7614 + }, + { + "epoch": 0.8032700421940928, + "grad_norm": 0.7463902235031128, + "learning_rate": 0.0001414713246392594, + "loss": 1.3855, + "step": 7615 + }, + { + "epoch": 0.8033755274261604, + "grad_norm": 0.6688806414604187, + "learning_rate": 0.0001413246066748678, + "loss": 1.3847, + "step": 7616 + }, + { + "epoch": 0.8034810126582278, + "grad_norm": 0.7071070671081543, + "learning_rate": 0.00014117795691558915, + "loss": 1.3444, + "step": 7617 + }, + { + "epoch": 0.8035864978902953, + "grad_norm": 0.6626156568527222, + "learning_rate": 0.00014103137537785633, + "loss": 1.3662, + "step": 7618 + }, + { + "epoch": 0.8036919831223629, + "grad_norm": 0.7659680843353271, + "learning_rate": 0.00014088486207809449, + "loss": 1.3814, + "step": 7619 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.6708775758743286, + "learning_rate": 0.00014073841703272092, + "loss": 1.3585, + "step": 7620 + }, + { + "epoch": 0.8039029535864979, + "grad_norm": 0.6846840381622314, + "learning_rate": 0.00014059204025814603, + "loss": 1.4396, + "step": 7621 + }, + { + "epoch": 0.8040084388185654, + "grad_norm": 0.7574080228805542, + "learning_rate": 0.0001404457317707718, + "loss": 1.3618, + "step": 7622 + }, + { + "epoch": 0.8041139240506329, + "grad_norm": 0.7992683053016663, + "learning_rate": 0.00014029949158699285, + "loss": 1.3599, + "step": 7623 + }, + { + "epoch": 0.8042194092827004, + "grad_norm": 0.7022688984870911, + "learning_rate": 0.00014015331972319606, + "loss": 1.3887, + "step": 7624 + }, + { + "epoch": 0.804324894514768, + "grad_norm": 0.7595717310905457, + "learning_rate": 0.00014000721619576077, + "loss": 1.3707, + "step": 7625 + }, + { + "epoch": 0.8044303797468354, + "grad_norm": 0.6779691576957703, + "learning_rate": 0.0001398611810210586, + "loss": 1.368, + "step": 7626 + }, + { + "epoch": 0.804535864978903, + "grad_norm": 0.684158444404602, + "learning_rate": 0.0001397152142154536, + "loss": 1.3836, + "step": 7627 + }, + { + "epoch": 0.8046413502109705, + "grad_norm": 0.6511198878288269, + "learning_rate": 0.00013956931579530194, + "loss": 1.3978, + "step": 7628 + }, + { + "epoch": 0.8047468354430379, + "grad_norm": 0.8836888074874878, + "learning_rate": 0.0001394234857769521, + "loss": 1.3985, + "step": 7629 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.6469628214836121, + "learning_rate": 0.00013927772417674558, + "loss": 1.4191, + "step": 7630 + }, + { + "epoch": 0.804957805907173, + "grad_norm": 0.704065203666687, + "learning_rate": 0.00013913203101101532, + "loss": 1.4211, + "step": 7631 + }, + { + "epoch": 0.8050632911392405, + "grad_norm": 0.6952881813049316, + "learning_rate": 0.0001389864062960871, + "loss": 1.3621, + "step": 7632 + }, + { + "epoch": 0.805168776371308, + "grad_norm": 0.6982690095901489, + "learning_rate": 0.00013884085004827883, + "loss": 1.3802, + "step": 7633 + }, + { + "epoch": 0.8052742616033756, + "grad_norm": 0.7467403411865234, + "learning_rate": 0.0001386953622839008, + "loss": 1.4002, + "step": 7634 + }, + { + "epoch": 0.805379746835443, + "grad_norm": 0.7286059260368347, + "learning_rate": 0.0001385499430192557, + "loss": 1.4053, + "step": 7635 + }, + { + "epoch": 0.8054852320675105, + "grad_norm": 0.6697893142700195, + "learning_rate": 0.00013840459227063842, + "loss": 1.3623, + "step": 7636 + }, + { + "epoch": 0.8055907172995781, + "grad_norm": 0.6855668425559998, + "learning_rate": 0.00013825931005433605, + "loss": 1.3625, + "step": 7637 + }, + { + "epoch": 0.8056962025316455, + "grad_norm": 0.7151798605918884, + "learning_rate": 0.00013811409638662858, + "loss": 1.3686, + "step": 7638 + }, + { + "epoch": 0.8058016877637131, + "grad_norm": 0.7032800316810608, + "learning_rate": 0.0001379689512837878, + "loss": 1.388, + "step": 7639 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.7352445721626282, + "learning_rate": 0.00013782387476207788, + "loss": 1.3556, + "step": 7640 + }, + { + "epoch": 0.8060126582278481, + "grad_norm": 0.6808099150657654, + "learning_rate": 0.0001376788668377554, + "loss": 1.3957, + "step": 7641 + }, + { + "epoch": 0.8061181434599156, + "grad_norm": 0.68494713306427, + "learning_rate": 0.0001375339275270692, + "loss": 1.4246, + "step": 7642 + }, + { + "epoch": 0.8062236286919832, + "grad_norm": 0.6529459953308105, + "learning_rate": 0.00013738905684626044, + "loss": 1.353, + "step": 7643 + }, + { + "epoch": 0.8063291139240506, + "grad_norm": 0.6761276125907898, + "learning_rate": 0.00013724425481156263, + "loss": 1.4084, + "step": 7644 + }, + { + "epoch": 0.8064345991561181, + "grad_norm": 0.6676883101463318, + "learning_rate": 0.00013709952143920148, + "loss": 1.393, + "step": 7645 + }, + { + "epoch": 0.8065400843881857, + "grad_norm": 0.69244784116745, + "learning_rate": 0.000136954856745395, + "loss": 1.4245, + "step": 7646 + }, + { + "epoch": 0.8066455696202531, + "grad_norm": 0.6675228476524353, + "learning_rate": 0.000136810260746354, + "loss": 1.3868, + "step": 7647 + }, + { + "epoch": 0.8067510548523207, + "grad_norm": 0.7291213870048523, + "learning_rate": 0.00013666573345828083, + "loss": 1.3623, + "step": 7648 + }, + { + "epoch": 0.8068565400843882, + "grad_norm": 0.735474705696106, + "learning_rate": 0.00013652127489737067, + "loss": 1.3872, + "step": 7649 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.6559410691261292, + "learning_rate": 0.00013637688507981064, + "loss": 1.4244, + "step": 7650 + }, + { + "epoch": 0.8070675105485232, + "grad_norm": 0.792075514793396, + "learning_rate": 0.0001362325640217805, + "loss": 1.3864, + "step": 7651 + }, + { + "epoch": 0.8071729957805908, + "grad_norm": 0.6613298654556274, + "learning_rate": 0.00013608831173945207, + "loss": 1.3074, + "step": 7652 + }, + { + "epoch": 0.8072784810126582, + "grad_norm": 0.7125478386878967, + "learning_rate": 0.0001359441282489895, + "loss": 1.383, + "step": 7653 + }, + { + "epoch": 0.8073839662447257, + "grad_norm": 0.7252269387245178, + "learning_rate": 0.0001358000135665494, + "loss": 1.4121, + "step": 7654 + }, + { + "epoch": 0.8074894514767933, + "grad_norm": 0.6913387775421143, + "learning_rate": 0.00013565596770828025, + "loss": 1.3544, + "step": 7655 + }, + { + "epoch": 0.8075949367088607, + "grad_norm": 0.7090015411376953, + "learning_rate": 0.00013551199069032348, + "loss": 1.3507, + "step": 7656 + }, + { + "epoch": 0.8077004219409283, + "grad_norm": 0.6787751317024231, + "learning_rate": 0.0001353680825288123, + "loss": 1.4143, + "step": 7657 + }, + { + "epoch": 0.8078059071729958, + "grad_norm": 0.8142795562744141, + "learning_rate": 0.0001352242432398723, + "loss": 1.4004, + "step": 7658 + }, + { + "epoch": 0.8079113924050633, + "grad_norm": 0.6948515176773071, + "learning_rate": 0.00013508047283962137, + "loss": 1.3823, + "step": 7659 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.7075870633125305, + "learning_rate": 0.0001349367713441697, + "loss": 1.3595, + "step": 7660 + }, + { + "epoch": 0.8081223628691984, + "grad_norm": 0.6646133065223694, + "learning_rate": 0.0001347931387696198, + "loss": 1.3527, + "step": 7661 + }, + { + "epoch": 0.8082278481012658, + "grad_norm": 0.6957335472106934, + "learning_rate": 0.0001346495751320664, + "loss": 1.3971, + "step": 7662 + }, + { + "epoch": 0.8083333333333333, + "grad_norm": 0.7118244171142578, + "learning_rate": 0.00013450608044759634, + "loss": 1.4029, + "step": 7663 + }, + { + "epoch": 0.8084388185654009, + "grad_norm": 0.8076631426811218, + "learning_rate": 0.00013436265473228926, + "loss": 1.3719, + "step": 7664 + }, + { + "epoch": 0.8085443037974683, + "grad_norm": 0.6875929236412048, + "learning_rate": 0.0001342192980022166, + "loss": 1.3928, + "step": 7665 + }, + { + "epoch": 0.8086497890295359, + "grad_norm": 0.7991876602172852, + "learning_rate": 0.00013407601027344213, + "loss": 1.3369, + "step": 7666 + }, + { + "epoch": 0.8087552742616034, + "grad_norm": 0.7050565481185913, + "learning_rate": 0.00013393279156202197, + "loss": 1.371, + "step": 7667 + }, + { + "epoch": 0.8088607594936709, + "grad_norm": 0.6782211661338806, + "learning_rate": 0.00013378964188400457, + "loss": 1.4139, + "step": 7668 + }, + { + "epoch": 0.8089662447257384, + "grad_norm": 0.735057532787323, + "learning_rate": 0.00013364656125543044, + "loss": 1.3773, + "step": 7669 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.7154911756515503, + "learning_rate": 0.0001335035496923326, + "loss": 1.3844, + "step": 7670 + }, + { + "epoch": 0.8091772151898734, + "grad_norm": 0.6624658703804016, + "learning_rate": 0.00013336060721073608, + "loss": 1.3993, + "step": 7671 + }, + { + "epoch": 0.809282700421941, + "grad_norm": 0.7785964012145996, + "learning_rate": 0.00013321773382665822, + "loss": 1.4186, + "step": 7672 + }, + { + "epoch": 0.8093881856540084, + "grad_norm": 0.7066243290901184, + "learning_rate": 0.00013307492955610896, + "loss": 1.4212, + "step": 7673 + }, + { + "epoch": 0.8094936708860759, + "grad_norm": 0.688717246055603, + "learning_rate": 0.0001329321944150902, + "loss": 1.4061, + "step": 7674 + }, + { + "epoch": 0.8095991561181435, + "grad_norm": 0.6887671947479248, + "learning_rate": 0.000132789528419596, + "loss": 1.354, + "step": 7675 + }, + { + "epoch": 0.8097046413502109, + "grad_norm": 0.6823023557662964, + "learning_rate": 0.0001326469315856128, + "loss": 1.385, + "step": 7676 + }, + { + "epoch": 0.8098101265822785, + "grad_norm": 0.7239181399345398, + "learning_rate": 0.00013250440392911927, + "loss": 1.3658, + "step": 7677 + }, + { + "epoch": 0.809915611814346, + "grad_norm": 0.6812121272087097, + "learning_rate": 0.00013236194546608645, + "loss": 1.4212, + "step": 7678 + }, + { + "epoch": 0.8100210970464135, + "grad_norm": 0.7080599069595337, + "learning_rate": 0.00013221955621247749, + "loss": 1.3648, + "step": 7679 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.7861510515213013, + "learning_rate": 0.0001320772361842478, + "loss": 1.4273, + "step": 7680 + }, + { + "epoch": 0.8102320675105485, + "grad_norm": 0.6828396916389465, + "learning_rate": 0.00013193498539734478, + "loss": 1.3771, + "step": 7681 + }, + { + "epoch": 0.810337552742616, + "grad_norm": 0.7218540906906128, + "learning_rate": 0.00013179280386770885, + "loss": 1.3778, + "step": 7682 + }, + { + "epoch": 0.8104430379746835, + "grad_norm": 0.698485791683197, + "learning_rate": 0.00013165069161127183, + "loss": 1.3529, + "step": 7683 + }, + { + "epoch": 0.8105485232067511, + "grad_norm": 0.6975534558296204, + "learning_rate": 0.00013150864864395825, + "loss": 1.3623, + "step": 7684 + }, + { + "epoch": 0.8106540084388185, + "grad_norm": 0.6805258393287659, + "learning_rate": 0.00013136667498168464, + "loss": 1.4071, + "step": 7685 + }, + { + "epoch": 0.8107594936708861, + "grad_norm": 0.6919669508934021, + "learning_rate": 0.00013122477064035992, + "loss": 1.3885, + "step": 7686 + }, + { + "epoch": 0.8108649789029536, + "grad_norm": 0.6694682240486145, + "learning_rate": 0.00013108293563588504, + "loss": 1.3806, + "step": 7687 + }, + { + "epoch": 0.810970464135021, + "grad_norm": 0.6668712496757507, + "learning_rate": 0.00013094116998415358, + "loss": 1.3548, + "step": 7688 + }, + { + "epoch": 0.8110759493670886, + "grad_norm": 0.6862849593162537, + "learning_rate": 0.00013079947370105057, + "loss": 1.3652, + "step": 7689 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.675478458404541, + "learning_rate": 0.00013065784680245442, + "loss": 1.3406, + "step": 7690 + }, + { + "epoch": 0.8112869198312236, + "grad_norm": 0.6759390830993652, + "learning_rate": 0.00013051628930423485, + "loss": 1.3847, + "step": 7691 + }, + { + "epoch": 0.8113924050632911, + "grad_norm": 0.6983610391616821, + "learning_rate": 0.00013037480122225412, + "loss": 1.3975, + "step": 7692 + }, + { + "epoch": 0.8114978902953587, + "grad_norm": 0.6835496425628662, + "learning_rate": 0.00013023338257236655, + "loss": 1.368, + "step": 7693 + }, + { + "epoch": 0.8116033755274261, + "grad_norm": 0.7560632824897766, + "learning_rate": 0.00013009203337041898, + "loss": 1.3609, + "step": 7694 + }, + { + "epoch": 0.8117088607594937, + "grad_norm": 0.66190105676651, + "learning_rate": 0.0001299507536322502, + "loss": 1.3647, + "step": 7695 + }, + { + "epoch": 0.8118143459915612, + "grad_norm": 0.6742803454399109, + "learning_rate": 0.00012980954337369133, + "loss": 1.4072, + "step": 7696 + }, + { + "epoch": 0.8119198312236287, + "grad_norm": 0.9272788166999817, + "learning_rate": 0.00012966840261056562, + "loss": 1.3627, + "step": 7697 + }, + { + "epoch": 0.8120253164556962, + "grad_norm": 0.6864501237869263, + "learning_rate": 0.0001295273313586885, + "loss": 1.3693, + "step": 7698 + }, + { + "epoch": 0.8121308016877637, + "grad_norm": 0.6971797347068787, + "learning_rate": 0.00012938632963386808, + "loss": 1.372, + "step": 7699 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.6921612024307251, + "learning_rate": 0.00012924539745190402, + "loss": 1.3971, + "step": 7700 + }, + { + "epoch": 0.8123417721518987, + "grad_norm": 0.8110692501068115, + "learning_rate": 0.0001291045348285885, + "loss": 1.357, + "step": 7701 + }, + { + "epoch": 0.8124472573839663, + "grad_norm": 0.6747639179229736, + "learning_rate": 0.00012896374177970602, + "loss": 1.3983, + "step": 7702 + }, + { + "epoch": 0.8125527426160337, + "grad_norm": 0.7503506541252136, + "learning_rate": 0.00012882301832103297, + "loss": 1.3408, + "step": 7703 + }, + { + "epoch": 0.8126582278481013, + "grad_norm": 0.7089521884918213, + "learning_rate": 0.0001286823644683382, + "loss": 1.3674, + "step": 7704 + }, + { + "epoch": 0.8127637130801688, + "grad_norm": 0.7093914747238159, + "learning_rate": 0.0001285417802373827, + "loss": 1.3713, + "step": 7705 + }, + { + "epoch": 0.8128691983122363, + "grad_norm": 0.6567253470420837, + "learning_rate": 0.00012840126564391961, + "loss": 1.3653, + "step": 7706 + }, + { + "epoch": 0.8129746835443038, + "grad_norm": 0.6378287672996521, + "learning_rate": 0.00012826082070369402, + "loss": 1.3964, + "step": 7707 + }, + { + "epoch": 0.8130801687763713, + "grad_norm": 0.7857561111450195, + "learning_rate": 0.00012812044543244395, + "loss": 1.375, + "step": 7708 + }, + { + "epoch": 0.8131856540084388, + "grad_norm": 0.7049862146377563, + "learning_rate": 0.00012798013984589894, + "loss": 1.4185, + "step": 7709 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.7014749646186829, + "learning_rate": 0.0001278399039597809, + "loss": 1.3652, + "step": 7710 + }, + { + "epoch": 0.8133966244725739, + "grad_norm": 0.8105972409248352, + "learning_rate": 0.00012769973778980405, + "loss": 1.4041, + "step": 7711 + }, + { + "epoch": 0.8135021097046413, + "grad_norm": 0.7254236936569214, + "learning_rate": 0.00012755964135167464, + "loss": 1.3706, + "step": 7712 + }, + { + "epoch": 0.8136075949367089, + "grad_norm": 0.663188099861145, + "learning_rate": 0.00012741961466109113, + "loss": 1.3613, + "step": 7713 + }, + { + "epoch": 0.8137130801687764, + "grad_norm": 0.6607755422592163, + "learning_rate": 0.00012727965773374434, + "loss": 1.3948, + "step": 7714 + }, + { + "epoch": 0.8138185654008439, + "grad_norm": 0.7060583233833313, + "learning_rate": 0.00012713977058531685, + "loss": 1.3758, + "step": 7715 + }, + { + "epoch": 0.8139240506329114, + "grad_norm": 0.7386715412139893, + "learning_rate": 0.0001269999532314841, + "loss": 1.3891, + "step": 7716 + }, + { + "epoch": 0.814029535864979, + "grad_norm": 0.7656110525131226, + "learning_rate": 0.00012686020568791311, + "loss": 1.4402, + "step": 7717 + }, + { + "epoch": 0.8141350210970464, + "grad_norm": 0.7132387161254883, + "learning_rate": 0.00012672052797026344, + "loss": 1.3256, + "step": 7718 + }, + { + "epoch": 0.8142405063291139, + "grad_norm": 0.6645321846008301, + "learning_rate": 0.00012658092009418652, + "loss": 1.3498, + "step": 7719 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.8191158175468445, + "learning_rate": 0.0001264413820753261, + "loss": 1.3853, + "step": 7720 + }, + { + "epoch": 0.8144514767932489, + "grad_norm": 0.6789312958717346, + "learning_rate": 0.0001263019139293182, + "loss": 1.4062, + "step": 7721 + }, + { + "epoch": 0.8145569620253165, + "grad_norm": 0.6703646779060364, + "learning_rate": 0.0001261625156717909, + "loss": 1.3706, + "step": 7722 + }, + { + "epoch": 0.814662447257384, + "grad_norm": 0.7896304726600647, + "learning_rate": 0.0001260231873183644, + "loss": 1.3987, + "step": 7723 + }, + { + "epoch": 0.8147679324894515, + "grad_norm": 0.7455702424049377, + "learning_rate": 0.00012588392888465103, + "loss": 1.3599, + "step": 7724 + }, + { + "epoch": 0.814873417721519, + "grad_norm": 0.721904456615448, + "learning_rate": 0.0001257447403862557, + "loss": 1.4313, + "step": 7725 + }, + { + "epoch": 0.8149789029535865, + "grad_norm": 0.6859622597694397, + "learning_rate": 0.00012560562183877507, + "loss": 1.387, + "step": 7726 + }, + { + "epoch": 0.815084388185654, + "grad_norm": 0.7658044099807739, + "learning_rate": 0.00012546657325779805, + "loss": 1.3683, + "step": 7727 + }, + { + "epoch": 0.8151898734177215, + "grad_norm": 0.6979864835739136, + "learning_rate": 0.00012532759465890567, + "loss": 1.3673, + "step": 7728 + }, + { + "epoch": 0.8152953586497891, + "grad_norm": 0.6560010313987732, + "learning_rate": 0.00012518868605767118, + "loss": 1.3775, + "step": 7729 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.7708538174629211, + "learning_rate": 0.00012504984746966003, + "loss": 1.3967, + "step": 7730 + }, + { + "epoch": 0.8155063291139241, + "grad_norm": 0.7122376561164856, + "learning_rate": 0.0001249110789104298, + "loss": 1.3667, + "step": 7731 + }, + { + "epoch": 0.8156118143459916, + "grad_norm": 0.7408884763717651, + "learning_rate": 0.00012477238039553006, + "loss": 1.3978, + "step": 7732 + }, + { + "epoch": 0.815717299578059, + "grad_norm": 0.7048880457878113, + "learning_rate": 0.00012463375194050267, + "loss": 1.3785, + "step": 7733 + }, + { + "epoch": 0.8158227848101266, + "grad_norm": 0.6516126990318298, + "learning_rate": 0.00012449519356088192, + "loss": 1.3627, + "step": 7734 + }, + { + "epoch": 0.8159282700421941, + "grad_norm": 0.6805256605148315, + "learning_rate": 0.0001243567052721937, + "loss": 1.3773, + "step": 7735 + }, + { + "epoch": 0.8160337552742616, + "grad_norm": 0.6907792091369629, + "learning_rate": 0.00012421828708995649, + "loss": 1.3878, + "step": 7736 + }, + { + "epoch": 0.8161392405063291, + "grad_norm": 0.657566249370575, + "learning_rate": 0.00012407993902968057, + "loss": 1.3546, + "step": 7737 + }, + { + "epoch": 0.8162447257383966, + "grad_norm": 0.6700558066368103, + "learning_rate": 0.00012394166110686857, + "loss": 1.4016, + "step": 7738 + }, + { + "epoch": 0.8163502109704641, + "grad_norm": 0.6927803158760071, + "learning_rate": 0.0001238034533370153, + "loss": 1.4314, + "step": 7739 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.6642847657203674, + "learning_rate": 0.00012366531573560754, + "loss": 1.3931, + "step": 7740 + }, + { + "epoch": 0.8165611814345991, + "grad_norm": 0.679032027721405, + "learning_rate": 0.00012352724831812424, + "loss": 1.3512, + "step": 7741 + }, + { + "epoch": 0.8166666666666667, + "grad_norm": 0.6547514796257019, + "learning_rate": 0.0001233892511000368, + "loss": 1.3496, + "step": 7742 + }, + { + "epoch": 0.8167721518987342, + "grad_norm": 0.6542949080467224, + "learning_rate": 0.00012325132409680829, + "loss": 1.4052, + "step": 7743 + }, + { + "epoch": 0.8168776371308016, + "grad_norm": 0.771107017993927, + "learning_rate": 0.00012311346732389418, + "loss": 1.3957, + "step": 7744 + }, + { + "epoch": 0.8169831223628692, + "grad_norm": 0.6775034666061401, + "learning_rate": 0.000122975680796742, + "loss": 1.3332, + "step": 7745 + }, + { + "epoch": 0.8170886075949367, + "grad_norm": 0.6403301358222961, + "learning_rate": 0.00012283796453079146, + "loss": 1.3741, + "step": 7746 + }, + { + "epoch": 0.8171940928270042, + "grad_norm": 0.6833693981170654, + "learning_rate": 0.00012270031854147426, + "loss": 1.3558, + "step": 7747 + }, + { + "epoch": 0.8172995780590717, + "grad_norm": 0.9280748963356018, + "learning_rate": 0.0001225627428442143, + "loss": 1.3598, + "step": 7748 + }, + { + "epoch": 0.8174050632911393, + "grad_norm": 0.6926872134208679, + "learning_rate": 0.0001224252374544278, + "loss": 1.4097, + "step": 7749 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.7059286236763, + "learning_rate": 0.00012228780238752264, + "loss": 1.376, + "step": 7750 + }, + { + "epoch": 0.8176160337552743, + "grad_norm": 0.9196080565452576, + "learning_rate": 0.00012215043765889932, + "loss": 1.3854, + "step": 7751 + }, + { + "epoch": 0.8177215189873418, + "grad_norm": 0.6584753394126892, + "learning_rate": 0.00012201314328395032, + "loss": 1.3291, + "step": 7752 + }, + { + "epoch": 0.8178270042194092, + "grad_norm": 0.6565636992454529, + "learning_rate": 0.00012187591927806, + "loss": 1.3656, + "step": 7753 + }, + { + "epoch": 0.8179324894514768, + "grad_norm": 0.8399117588996887, + "learning_rate": 0.0001217387656566051, + "loss": 1.3812, + "step": 7754 + }, + { + "epoch": 0.8180379746835443, + "grad_norm": 0.8912807106971741, + "learning_rate": 0.0001216016824349542, + "loss": 1.3869, + "step": 7755 + }, + { + "epoch": 0.8181434599156118, + "grad_norm": 0.6698450446128845, + "learning_rate": 0.00012146466962846833, + "loss": 1.3701, + "step": 7756 + }, + { + "epoch": 0.8182489451476793, + "grad_norm": 0.7788660526275635, + "learning_rate": 0.00012132772725250038, + "loss": 1.3817, + "step": 7757 + }, + { + "epoch": 0.8183544303797469, + "grad_norm": 0.9312688112258911, + "learning_rate": 0.0001211908553223954, + "loss": 1.3402, + "step": 7758 + }, + { + "epoch": 0.8184599156118143, + "grad_norm": 0.7057907581329346, + "learning_rate": 0.00012105405385349047, + "loss": 1.4044, + "step": 7759 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.8762518167495728, + "learning_rate": 0.00012091732286111514, + "loss": 1.4009, + "step": 7760 + }, + { + "epoch": 0.8186708860759494, + "grad_norm": 0.7138023376464844, + "learning_rate": 0.00012078066236059068, + "loss": 1.3526, + "step": 7761 + }, + { + "epoch": 0.8187763713080168, + "grad_norm": 0.6422764658927917, + "learning_rate": 0.00012064407236723066, + "loss": 1.3499, + "step": 7762 + }, + { + "epoch": 0.8188818565400844, + "grad_norm": 0.7287861704826355, + "learning_rate": 0.00012050755289634049, + "loss": 1.4165, + "step": 7763 + }, + { + "epoch": 0.8189873417721519, + "grad_norm": 0.7711328268051147, + "learning_rate": 0.00012037110396321796, + "loss": 1.4077, + "step": 7764 + }, + { + "epoch": 0.8190928270042194, + "grad_norm": 0.8940742611885071, + "learning_rate": 0.0001202347255831529, + "loss": 1.3702, + "step": 7765 + }, + { + "epoch": 0.8191983122362869, + "grad_norm": 0.6632680892944336, + "learning_rate": 0.0001200984177714271, + "loss": 1.4122, + "step": 7766 + }, + { + "epoch": 0.8193037974683545, + "grad_norm": 0.9728264808654785, + "learning_rate": 0.00011996218054331434, + "loss": 1.3909, + "step": 7767 + }, + { + "epoch": 0.8194092827004219, + "grad_norm": 0.8716222643852234, + "learning_rate": 0.00011982601391408115, + "loss": 1.3821, + "step": 7768 + }, + { + "epoch": 0.8195147679324895, + "grad_norm": 0.7278730869293213, + "learning_rate": 0.00011968991789898533, + "loss": 1.4131, + "step": 7769 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.745907723903656, + "learning_rate": 0.00011955389251327737, + "loss": 1.3273, + "step": 7770 + }, + { + "epoch": 0.8197257383966244, + "grad_norm": 0.8121914863586426, + "learning_rate": 0.00011941793777219937, + "loss": 1.3627, + "step": 7771 + }, + { + "epoch": 0.819831223628692, + "grad_norm": 0.7731775045394897, + "learning_rate": 0.00011928205369098574, + "loss": 1.3583, + "step": 7772 + }, + { + "epoch": 0.8199367088607595, + "grad_norm": 0.7152034640312195, + "learning_rate": 0.00011914624028486315, + "loss": 1.3581, + "step": 7773 + }, + { + "epoch": 0.820042194092827, + "grad_norm": 0.7191460132598877, + "learning_rate": 0.00011901049756905, + "loss": 1.3204, + "step": 7774 + }, + { + "epoch": 0.8201476793248945, + "grad_norm": 0.9290207624435425, + "learning_rate": 0.00011887482555875695, + "loss": 1.3952, + "step": 7775 + }, + { + "epoch": 0.8202531645569621, + "grad_norm": 0.7107332944869995, + "learning_rate": 0.00011873922426918668, + "loss": 1.4039, + "step": 7776 + }, + { + "epoch": 0.8203586497890295, + "grad_norm": 0.6988203525543213, + "learning_rate": 0.0001186036937155342, + "loss": 1.4042, + "step": 7777 + }, + { + "epoch": 0.820464135021097, + "grad_norm": 0.7403839826583862, + "learning_rate": 0.00011846823391298628, + "loss": 1.3702, + "step": 7778 + }, + { + "epoch": 0.8205696202531646, + "grad_norm": 0.7357184886932373, + "learning_rate": 0.00011833284487672185, + "loss": 1.3508, + "step": 7779 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.7873018980026245, + "learning_rate": 0.00011819752662191197, + "loss": 1.4154, + "step": 7780 + }, + { + "epoch": 0.8207805907172996, + "grad_norm": 0.696895182132721, + "learning_rate": 0.00011806227916371964, + "loss": 1.4256, + "step": 7781 + }, + { + "epoch": 0.8208860759493671, + "grad_norm": 0.9012776613235474, + "learning_rate": 0.0001179271025173001, + "loss": 1.3874, + "step": 7782 + }, + { + "epoch": 0.8209915611814346, + "grad_norm": 0.7133563756942749, + "learning_rate": 0.00011779199669780046, + "loss": 1.3857, + "step": 7783 + }, + { + "epoch": 0.8210970464135021, + "grad_norm": 0.6629769206047058, + "learning_rate": 0.00011765696172036006, + "loss": 1.4021, + "step": 7784 + }, + { + "epoch": 0.8212025316455697, + "grad_norm": 0.7883576154708862, + "learning_rate": 0.00011752199760011017, + "loss": 1.3663, + "step": 7785 + }, + { + "epoch": 0.8213080168776371, + "grad_norm": 0.6967271566390991, + "learning_rate": 0.00011738710435217431, + "loss": 1.4185, + "step": 7786 + }, + { + "epoch": 0.8214135021097047, + "grad_norm": 0.6997618079185486, + "learning_rate": 0.00011725228199166805, + "loss": 1.4194, + "step": 7787 + }, + { + "epoch": 0.8215189873417722, + "grad_norm": 0.6483917236328125, + "learning_rate": 0.00011711753053369861, + "loss": 1.3767, + "step": 7788 + }, + { + "epoch": 0.8216244725738396, + "grad_norm": 0.7304111123085022, + "learning_rate": 0.00011698284999336578, + "loss": 1.3637, + "step": 7789 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.69027179479599, + "learning_rate": 0.00011684824038576115, + "loss": 1.3757, + "step": 7790 + }, + { + "epoch": 0.8218354430379747, + "grad_norm": 0.7404921054840088, + "learning_rate": 0.00011671370172596829, + "loss": 1.3823, + "step": 7791 + }, + { + "epoch": 0.8219409282700422, + "grad_norm": 0.6825034022331238, + "learning_rate": 0.00011657923402906309, + "loss": 1.425, + "step": 7792 + }, + { + "epoch": 0.8220464135021097, + "grad_norm": 0.7081214785575867, + "learning_rate": 0.000116444837310113, + "loss": 1.3857, + "step": 7793 + }, + { + "epoch": 0.8221518987341773, + "grad_norm": 0.7229780554771423, + "learning_rate": 0.00011631051158417828, + "loss": 1.3482, + "step": 7794 + }, + { + "epoch": 0.8222573839662447, + "grad_norm": 0.678230881690979, + "learning_rate": 0.00011617625686631056, + "loss": 1.346, + "step": 7795 + }, + { + "epoch": 0.8223628691983123, + "grad_norm": 0.6497051119804382, + "learning_rate": 0.00011604207317155383, + "loss": 1.3825, + "step": 7796 + }, + { + "epoch": 0.8224683544303798, + "grad_norm": 0.7490842342376709, + "learning_rate": 0.00011590796051494395, + "loss": 1.374, + "step": 7797 + }, + { + "epoch": 0.8225738396624472, + "grad_norm": 0.7103738784790039, + "learning_rate": 0.00011577391891150901, + "loss": 1.3638, + "step": 7798 + }, + { + "epoch": 0.8226793248945148, + "grad_norm": 0.7481379508972168, + "learning_rate": 0.00011563994837626898, + "loss": 1.4057, + "step": 7799 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.6481792330741882, + "learning_rate": 0.00011550604892423593, + "loss": 1.3478, + "step": 7800 + }, + { + "epoch": 0.8228902953586498, + "grad_norm": 0.6871219873428345, + "learning_rate": 0.00011537222057041396, + "loss": 1.3938, + "step": 7801 + }, + { + "epoch": 0.8229957805907173, + "grad_norm": 0.6739243268966675, + "learning_rate": 0.00011523846332979907, + "loss": 1.3678, + "step": 7802 + }, + { + "epoch": 0.8231012658227848, + "grad_norm": 0.8352223634719849, + "learning_rate": 0.00011510477721737974, + "loss": 1.3659, + "step": 7803 + }, + { + "epoch": 0.8232067510548523, + "grad_norm": 0.6840208172798157, + "learning_rate": 0.00011497116224813604, + "loss": 1.3868, + "step": 7804 + }, + { + "epoch": 0.8233122362869199, + "grad_norm": 0.6376951932907104, + "learning_rate": 0.0001148376184370401, + "loss": 1.3572, + "step": 7805 + }, + { + "epoch": 0.8234177215189873, + "grad_norm": 0.7146715521812439, + "learning_rate": 0.00011470414579905617, + "loss": 1.4187, + "step": 7806 + }, + { + "epoch": 0.8235232067510548, + "grad_norm": 0.7798951268196106, + "learning_rate": 0.00011457074434914067, + "loss": 1.382, + "step": 7807 + }, + { + "epoch": 0.8236286919831224, + "grad_norm": 0.7036898732185364, + "learning_rate": 0.00011443741410224173, + "loss": 1.3394, + "step": 7808 + }, + { + "epoch": 0.8237341772151898, + "grad_norm": 0.6668311357498169, + "learning_rate": 0.00011430415507329975, + "loss": 1.3809, + "step": 7809 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.6815202236175537, + "learning_rate": 0.0001141709672772471, + "loss": 1.3669, + "step": 7810 + }, + { + "epoch": 0.8239451476793249, + "grad_norm": 0.7407945394515991, + "learning_rate": 0.00011403785072900793, + "loss": 1.3492, + "step": 7811 + }, + { + "epoch": 0.8240506329113924, + "grad_norm": 0.6788998246192932, + "learning_rate": 0.00011390480544349891, + "loss": 1.3834, + "step": 7812 + }, + { + "epoch": 0.8241561181434599, + "grad_norm": 0.6902247071266174, + "learning_rate": 0.00011377183143562833, + "loss": 1.3653, + "step": 7813 + }, + { + "epoch": 0.8242616033755275, + "grad_norm": 0.692099928855896, + "learning_rate": 0.00011363892872029655, + "loss": 1.4207, + "step": 7814 + }, + { + "epoch": 0.8243670886075949, + "grad_norm": 0.6986406445503235, + "learning_rate": 0.00011350609731239597, + "loss": 1.3767, + "step": 7815 + }, + { + "epoch": 0.8244725738396624, + "grad_norm": 0.6608006358146667, + "learning_rate": 0.00011337333722681104, + "loss": 1.3486, + "step": 7816 + }, + { + "epoch": 0.82457805907173, + "grad_norm": 0.7942008376121521, + "learning_rate": 0.00011324064847841817, + "loss": 1.3617, + "step": 7817 + }, + { + "epoch": 0.8246835443037974, + "grad_norm": 0.6633591055870056, + "learning_rate": 0.00011310803108208581, + "loss": 1.4167, + "step": 7818 + }, + { + "epoch": 0.824789029535865, + "grad_norm": 0.6623001098632812, + "learning_rate": 0.00011297548505267424, + "loss": 1.3857, + "step": 7819 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.6487354636192322, + "learning_rate": 0.00011284301040503625, + "loss": 1.4163, + "step": 7820 + }, + { + "epoch": 0.825, + "grad_norm": 0.6796507239341736, + "learning_rate": 0.00011271060715401604, + "loss": 1.3996, + "step": 7821 + }, + { + "epoch": 0.8251054852320675, + "grad_norm": 0.6897767782211304, + "learning_rate": 0.00011257827531445017, + "loss": 1.3605, + "step": 7822 + }, + { + "epoch": 0.825210970464135, + "grad_norm": 0.6828884482383728, + "learning_rate": 0.00011244601490116693, + "loss": 1.3784, + "step": 7823 + }, + { + "epoch": 0.8253164556962025, + "grad_norm": 0.681142270565033, + "learning_rate": 0.00011231382592898698, + "loss": 1.3473, + "step": 7824 + }, + { + "epoch": 0.82542194092827, + "grad_norm": 0.772442638874054, + "learning_rate": 0.00011218170841272254, + "loss": 1.3668, + "step": 7825 + }, + { + "epoch": 0.8255274261603376, + "grad_norm": 0.7131338119506836, + "learning_rate": 0.00011204966236717811, + "loss": 1.3726, + "step": 7826 + }, + { + "epoch": 0.825632911392405, + "grad_norm": 0.720838725566864, + "learning_rate": 0.0001119176878071502, + "loss": 1.4115, + "step": 7827 + }, + { + "epoch": 0.8257383966244726, + "grad_norm": 0.7054511904716492, + "learning_rate": 0.00011178578474742687, + "loss": 1.3728, + "step": 7828 + }, + { + "epoch": 0.8258438818565401, + "grad_norm": 0.6920532584190369, + "learning_rate": 0.00011165395320278898, + "loss": 1.3933, + "step": 7829 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.7188560962677002, + "learning_rate": 0.0001115221931880088, + "loss": 1.3369, + "step": 7830 + }, + { + "epoch": 0.8260548523206751, + "grad_norm": 0.6989048719406128, + "learning_rate": 0.00011139050471785051, + "loss": 1.3753, + "step": 7831 + }, + { + "epoch": 0.8261603375527427, + "grad_norm": 0.6844520568847656, + "learning_rate": 0.00011125888780707064, + "loss": 1.362, + "step": 7832 + }, + { + "epoch": 0.8262658227848101, + "grad_norm": 0.6695718169212341, + "learning_rate": 0.00011112734247041739, + "loss": 1.3611, + "step": 7833 + }, + { + "epoch": 0.8263713080168776, + "grad_norm": 0.6710222363471985, + "learning_rate": 0.00011099586872263107, + "loss": 1.3657, + "step": 7834 + }, + { + "epoch": 0.8264767932489452, + "grad_norm": 0.6670278310775757, + "learning_rate": 0.00011086446657844412, + "loss": 1.3442, + "step": 7835 + }, + { + "epoch": 0.8265822784810126, + "grad_norm": 0.6832781434059143, + "learning_rate": 0.0001107331360525807, + "loss": 1.3195, + "step": 7836 + }, + { + "epoch": 0.8266877637130802, + "grad_norm": 0.6958902478218079, + "learning_rate": 0.00011060187715975686, + "loss": 1.3794, + "step": 7837 + }, + { + "epoch": 0.8267932489451477, + "grad_norm": 0.7174513936042786, + "learning_rate": 0.00011047068991468118, + "loss": 1.4061, + "step": 7838 + }, + { + "epoch": 0.8268987341772152, + "grad_norm": 0.703337550163269, + "learning_rate": 0.00011033957433205364, + "loss": 1.3133, + "step": 7839 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.6724845170974731, + "learning_rate": 0.00011020853042656648, + "loss": 1.3647, + "step": 7840 + }, + { + "epoch": 0.8271097046413503, + "grad_norm": 0.6743252277374268, + "learning_rate": 0.00011007755821290371, + "loss": 1.3916, + "step": 7841 + }, + { + "epoch": 0.8272151898734177, + "grad_norm": 0.6708295345306396, + "learning_rate": 0.00010994665770574162, + "loss": 1.3755, + "step": 7842 + }, + { + "epoch": 0.8273206751054852, + "grad_norm": 0.7029100656509399, + "learning_rate": 0.000109815828919748, + "loss": 1.3857, + "step": 7843 + }, + { + "epoch": 0.8274261603375528, + "grad_norm": 0.7578839659690857, + "learning_rate": 0.00010968507186958302, + "loss": 1.3471, + "step": 7844 + }, + { + "epoch": 0.8275316455696202, + "grad_norm": 0.6978686451911926, + "learning_rate": 0.00010955438656989849, + "loss": 1.3585, + "step": 7845 + }, + { + "epoch": 0.8276371308016878, + "grad_norm": 0.7264713048934937, + "learning_rate": 0.00010942377303533865, + "loss": 1.3806, + "step": 7846 + }, + { + "epoch": 0.8277426160337553, + "grad_norm": 0.7258362770080566, + "learning_rate": 0.00010929323128053927, + "loss": 1.3805, + "step": 7847 + }, + { + "epoch": 0.8278481012658228, + "grad_norm": 0.8203786015510559, + "learning_rate": 0.00010916276132012818, + "loss": 1.3907, + "step": 7848 + }, + { + "epoch": 0.8279535864978903, + "grad_norm": 0.697004497051239, + "learning_rate": 0.00010903236316872514, + "loss": 1.3357, + "step": 7849 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.6439664959907532, + "learning_rate": 0.000108902036840942, + "loss": 1.3719, + "step": 7850 + }, + { + "epoch": 0.8281645569620253, + "grad_norm": 0.6622866988182068, + "learning_rate": 0.00010877178235138239, + "loss": 1.3909, + "step": 7851 + }, + { + "epoch": 0.8282700421940928, + "grad_norm": 0.7545678019523621, + "learning_rate": 0.00010864159971464205, + "loss": 1.3422, + "step": 7852 + }, + { + "epoch": 0.8283755274261604, + "grad_norm": 0.7590564489364624, + "learning_rate": 0.00010851148894530858, + "loss": 1.371, + "step": 7853 + }, + { + "epoch": 0.8284810126582278, + "grad_norm": 0.7713074088096619, + "learning_rate": 0.00010838145005796138, + "loss": 1.3826, + "step": 7854 + }, + { + "epoch": 0.8285864978902954, + "grad_norm": 0.7037607431411743, + "learning_rate": 0.00010825148306717222, + "loss": 1.3631, + "step": 7855 + }, + { + "epoch": 0.8286919831223629, + "grad_norm": 0.6673005223274231, + "learning_rate": 0.00010812158798750438, + "loss": 1.3562, + "step": 7856 + }, + { + "epoch": 0.8287974683544304, + "grad_norm": 0.6963204145431519, + "learning_rate": 0.00010799176483351337, + "loss": 1.3429, + "step": 7857 + }, + { + "epoch": 0.8289029535864979, + "grad_norm": 0.6714196801185608, + "learning_rate": 0.00010786201361974646, + "loss": 1.3488, + "step": 7858 + }, + { + "epoch": 0.8290084388185655, + "grad_norm": 0.6459939479827881, + "learning_rate": 0.00010773233436074287, + "loss": 1.3374, + "step": 7859 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.6859797835350037, + "learning_rate": 0.00010760272707103389, + "loss": 1.3438, + "step": 7860 + }, + { + "epoch": 0.8292194092827004, + "grad_norm": 0.7259979248046875, + "learning_rate": 0.00010747319176514264, + "loss": 1.3854, + "step": 7861 + }, + { + "epoch": 0.829324894514768, + "grad_norm": 0.6938033699989319, + "learning_rate": 0.00010734372845758411, + "loss": 1.3853, + "step": 7862 + }, + { + "epoch": 0.8294303797468354, + "grad_norm": 0.6436554193496704, + "learning_rate": 0.00010721433716286527, + "loss": 1.3629, + "step": 7863 + }, + { + "epoch": 0.829535864978903, + "grad_norm": 0.6653328537940979, + "learning_rate": 0.00010708501789548527, + "loss": 1.3837, + "step": 7864 + }, + { + "epoch": 0.8296413502109705, + "grad_norm": 0.7170171141624451, + "learning_rate": 0.00010695577066993495, + "loss": 1.3366, + "step": 7865 + }, + { + "epoch": 0.829746835443038, + "grad_norm": 0.6969335079193115, + "learning_rate": 0.00010682659550069704, + "loss": 1.4124, + "step": 7866 + }, + { + "epoch": 0.8298523206751055, + "grad_norm": 0.7502753138542175, + "learning_rate": 0.00010669749240224621, + "loss": 1.3411, + "step": 7867 + }, + { + "epoch": 0.8299578059071729, + "grad_norm": 0.6597749590873718, + "learning_rate": 0.00010656846138904916, + "loss": 1.3331, + "step": 7868 + }, + { + "epoch": 0.8300632911392405, + "grad_norm": 0.6922480463981628, + "learning_rate": 0.00010643950247556447, + "loss": 1.4116, + "step": 7869 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.7048709988594055, + "learning_rate": 0.00010631061567624259, + "loss": 1.3774, + "step": 7870 + }, + { + "epoch": 0.8302742616033755, + "grad_norm": 0.6854350566864014, + "learning_rate": 0.00010618180100552596, + "loss": 1.4268, + "step": 7871 + }, + { + "epoch": 0.830379746835443, + "grad_norm": 0.6684819459915161, + "learning_rate": 0.00010605305847784871, + "loss": 1.3875, + "step": 7872 + }, + { + "epoch": 0.8304852320675106, + "grad_norm": 0.6591783165931702, + "learning_rate": 0.00010592438810763747, + "loss": 1.3624, + "step": 7873 + }, + { + "epoch": 0.830590717299578, + "grad_norm": 0.7188674211502075, + "learning_rate": 0.00010579578990931019, + "loss": 1.3974, + "step": 7874 + }, + { + "epoch": 0.8306962025316456, + "grad_norm": 0.7301537990570068, + "learning_rate": 0.00010566726389727693, + "loss": 1.3759, + "step": 7875 + }, + { + "epoch": 0.8308016877637131, + "grad_norm": 0.7160071134567261, + "learning_rate": 0.00010553881008593969, + "loss": 1.3986, + "step": 7876 + }, + { + "epoch": 0.8309071729957805, + "grad_norm": 0.6863964796066284, + "learning_rate": 0.00010541042848969235, + "loss": 1.3959, + "step": 7877 + }, + { + "epoch": 0.8310126582278481, + "grad_norm": 0.7068641185760498, + "learning_rate": 0.00010528211912292066, + "loss": 1.396, + "step": 7878 + }, + { + "epoch": 0.8311181434599156, + "grad_norm": 0.6669578552246094, + "learning_rate": 0.00010515388200000245, + "loss": 1.3913, + "step": 7879 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.6967859864234924, + "learning_rate": 0.00010502571713530706, + "loss": 1.3736, + "step": 7880 + }, + { + "epoch": 0.8313291139240506, + "grad_norm": 0.6726458072662354, + "learning_rate": 0.00010489762454319634, + "loss": 1.3735, + "step": 7881 + }, + { + "epoch": 0.8314345991561182, + "grad_norm": 0.6521271467208862, + "learning_rate": 0.00010476960423802356, + "loss": 1.3728, + "step": 7882 + }, + { + "epoch": 0.8315400843881856, + "grad_norm": 0.6962772607803345, + "learning_rate": 0.00010464165623413408, + "loss": 1.3657, + "step": 7883 + }, + { + "epoch": 0.8316455696202532, + "grad_norm": 0.7183647155761719, + "learning_rate": 0.00010451378054586508, + "loss": 1.4197, + "step": 7884 + }, + { + "epoch": 0.8317510548523207, + "grad_norm": 0.6582158803939819, + "learning_rate": 0.00010438597718754561, + "loss": 1.4155, + "step": 7885 + }, + { + "epoch": 0.8318565400843881, + "grad_norm": 0.6759195327758789, + "learning_rate": 0.00010425824617349671, + "loss": 1.4116, + "step": 7886 + }, + { + "epoch": 0.8319620253164557, + "grad_norm": 0.6524875164031982, + "learning_rate": 0.00010413058751803129, + "loss": 1.3966, + "step": 7887 + }, + { + "epoch": 0.8320675105485232, + "grad_norm": 0.6617318987846375, + "learning_rate": 0.0001040030012354542, + "loss": 1.3563, + "step": 7888 + }, + { + "epoch": 0.8321729957805907, + "grad_norm": 0.7053930163383484, + "learning_rate": 0.00010387548734006195, + "loss": 1.3812, + "step": 7889 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.699026882648468, + "learning_rate": 0.00010374804584614308, + "loss": 1.3781, + "step": 7890 + }, + { + "epoch": 0.8323839662447258, + "grad_norm": 0.6581346392631531, + "learning_rate": 0.00010362067676797837, + "loss": 1.3511, + "step": 7891 + }, + { + "epoch": 0.8324894514767932, + "grad_norm": 0.7140107750892639, + "learning_rate": 0.00010349338011983998, + "loss": 1.3295, + "step": 7892 + }, + { + "epoch": 0.8325949367088608, + "grad_norm": 0.6897494196891785, + "learning_rate": 0.00010336615591599204, + "loss": 1.3698, + "step": 7893 + }, + { + "epoch": 0.8327004219409283, + "grad_norm": 0.6959677934646606, + "learning_rate": 0.00010323900417069079, + "loss": 1.3453, + "step": 7894 + }, + { + "epoch": 0.8328059071729957, + "grad_norm": 0.7070788145065308, + "learning_rate": 0.00010311192489818421, + "loss": 1.4153, + "step": 7895 + }, + { + "epoch": 0.8329113924050633, + "grad_norm": 0.645057737827301, + "learning_rate": 0.0001029849181127121, + "loss": 1.3785, + "step": 7896 + }, + { + "epoch": 0.8330168776371308, + "grad_norm": 0.7018970847129822, + "learning_rate": 0.00010285798382850614, + "loss": 1.3523, + "step": 7897 + }, + { + "epoch": 0.8331223628691983, + "grad_norm": 0.6947779655456543, + "learning_rate": 0.00010273112205979012, + "loss": 1.384, + "step": 7898 + }, + { + "epoch": 0.8332278481012658, + "grad_norm": 0.7051786780357361, + "learning_rate": 0.00010260433282077944, + "loss": 1.3671, + "step": 7899 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.6860271692276001, + "learning_rate": 0.00010247761612568129, + "loss": 1.3869, + "step": 7900 + }, + { + "epoch": 0.8334388185654008, + "grad_norm": 0.7256574630737305, + "learning_rate": 0.00010235097198869525, + "loss": 1.3655, + "step": 7901 + }, + { + "epoch": 0.8335443037974684, + "grad_norm": 0.6566762924194336, + "learning_rate": 0.0001022244004240123, + "loss": 1.3915, + "step": 7902 + }, + { + "epoch": 0.8336497890295359, + "grad_norm": 0.8873130679130554, + "learning_rate": 0.00010209790144581533, + "loss": 1.3403, + "step": 7903 + }, + { + "epoch": 0.8337552742616033, + "grad_norm": 0.7020952105522156, + "learning_rate": 0.00010197147506827925, + "loss": 1.349, + "step": 7904 + }, + { + "epoch": 0.8338607594936709, + "grad_norm": 0.6703161001205444, + "learning_rate": 0.00010184512130557074, + "loss": 1.3996, + "step": 7905 + }, + { + "epoch": 0.8339662447257384, + "grad_norm": 0.8023024201393127, + "learning_rate": 0.0001017188401718484, + "loss": 1.4029, + "step": 7906 + }, + { + "epoch": 0.8340717299578059, + "grad_norm": 0.7234219312667847, + "learning_rate": 0.00010159263168126265, + "loss": 1.3811, + "step": 7907 + }, + { + "epoch": 0.8341772151898734, + "grad_norm": 0.722984790802002, + "learning_rate": 0.00010146649584795575, + "loss": 1.392, + "step": 7908 + }, + { + "epoch": 0.834282700421941, + "grad_norm": 0.7134652137756348, + "learning_rate": 0.00010134043268606191, + "loss": 1.3757, + "step": 7909 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.8386152982711792, + "learning_rate": 0.0001012144422097069, + "loss": 1.3837, + "step": 7910 + }, + { + "epoch": 0.834493670886076, + "grad_norm": 0.7745761275291443, + "learning_rate": 0.00010108852443300895, + "loss": 1.3747, + "step": 7911 + }, + { + "epoch": 0.8345991561181435, + "grad_norm": 0.7532318830490112, + "learning_rate": 0.00010096267937007758, + "loss": 1.3723, + "step": 7912 + }, + { + "epoch": 0.8347046413502109, + "grad_norm": 0.7406126856803894, + "learning_rate": 0.00010083690703501445, + "loss": 1.3866, + "step": 7913 + }, + { + "epoch": 0.8348101265822785, + "grad_norm": 0.6501064896583557, + "learning_rate": 0.00010071120744191284, + "loss": 1.3394, + "step": 7914 + }, + { + "epoch": 0.834915611814346, + "grad_norm": 0.657516360282898, + "learning_rate": 0.0001005855806048581, + "loss": 1.4021, + "step": 7915 + }, + { + "epoch": 0.8350210970464135, + "grad_norm": 0.6559467911720276, + "learning_rate": 0.00010046002653792726, + "loss": 1.3775, + "step": 7916 + }, + { + "epoch": 0.835126582278481, + "grad_norm": 0.6654199361801147, + "learning_rate": 0.00010033454525518945, + "loss": 1.3711, + "step": 7917 + }, + { + "epoch": 0.8352320675105486, + "grad_norm": 0.6357002854347229, + "learning_rate": 0.0001002091367707053, + "loss": 1.3786, + "step": 7918 + }, + { + "epoch": 0.835337552742616, + "grad_norm": 0.698040246963501, + "learning_rate": 0.00010008380109852752, + "loss": 1.3631, + "step": 7919 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.6760722398757935, + "learning_rate": 9.995853825270052e-05, + "loss": 1.3829, + "step": 7920 + }, + { + "epoch": 0.8355485232067511, + "grad_norm": 0.679737389087677, + "learning_rate": 9.983334824726081e-05, + "loss": 1.3915, + "step": 7921 + }, + { + "epoch": 0.8356540084388185, + "grad_norm": 0.9110767245292664, + "learning_rate": 9.970823109623644e-05, + "loss": 1.3909, + "step": 7922 + }, + { + "epoch": 0.8357594936708861, + "grad_norm": 0.7768306732177734, + "learning_rate": 9.958318681364745e-05, + "loss": 1.4108, + "step": 7923 + }, + { + "epoch": 0.8358649789029536, + "grad_norm": 0.6565279364585876, + "learning_rate": 9.94582154135056e-05, + "loss": 1.3905, + "step": 7924 + }, + { + "epoch": 0.8359704641350211, + "grad_norm": 0.8862765431404114, + "learning_rate": 9.933331690981473e-05, + "loss": 1.3763, + "step": 7925 + }, + { + "epoch": 0.8360759493670886, + "grad_norm": 0.7775053381919861, + "learning_rate": 9.920849131657011e-05, + "loss": 1.4075, + "step": 7926 + }, + { + "epoch": 0.8361814345991562, + "grad_norm": 0.7157067060470581, + "learning_rate": 9.908373864775915e-05, + "loss": 1.3497, + "step": 7927 + }, + { + "epoch": 0.8362869198312236, + "grad_norm": 0.7876885533332825, + "learning_rate": 9.895905891736118e-05, + "loss": 1.3882, + "step": 7928 + }, + { + "epoch": 0.8363924050632912, + "grad_norm": 0.7179737091064453, + "learning_rate": 9.883445213934675e-05, + "loss": 1.3867, + "step": 7929 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.6676092743873596, + "learning_rate": 9.870991832767919e-05, + "loss": 1.3644, + "step": 7930 + }, + { + "epoch": 0.8366033755274261, + "grad_norm": 0.682931661605835, + "learning_rate": 9.858545749631287e-05, + "loss": 1.3777, + "step": 7931 + }, + { + "epoch": 0.8367088607594937, + "grad_norm": 0.714571475982666, + "learning_rate": 9.846106965919427e-05, + "loss": 1.3712, + "step": 7932 + }, + { + "epoch": 0.8368143459915611, + "grad_norm": 0.8957777619361877, + "learning_rate": 9.833675483026175e-05, + "loss": 1.3901, + "step": 7933 + }, + { + "epoch": 0.8369198312236287, + "grad_norm": 0.7585304975509644, + "learning_rate": 9.821251302344525e-05, + "loss": 1.3849, + "step": 7934 + }, + { + "epoch": 0.8370253164556962, + "grad_norm": 0.6969274878501892, + "learning_rate": 9.80883442526668e-05, + "loss": 1.3635, + "step": 7935 + }, + { + "epoch": 0.8371308016877637, + "grad_norm": 0.8265061378479004, + "learning_rate": 9.79642485318401e-05, + "loss": 1.3795, + "step": 7936 + }, + { + "epoch": 0.8372362869198312, + "grad_norm": 0.757706880569458, + "learning_rate": 9.78402258748708e-05, + "loss": 1.3587, + "step": 7937 + }, + { + "epoch": 0.8373417721518988, + "grad_norm": 0.6585620641708374, + "learning_rate": 9.771627629565599e-05, + "loss": 1.3869, + "step": 7938 + }, + { + "epoch": 0.8374472573839662, + "grad_norm": 0.6809417009353638, + "learning_rate": 9.759239980808494e-05, + "loss": 1.385, + "step": 7939 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.7617320418357849, + "learning_rate": 9.746859642603884e-05, + "loss": 1.396, + "step": 7940 + }, + { + "epoch": 0.8376582278481013, + "grad_norm": 0.745966374874115, + "learning_rate": 9.734486616339027e-05, + "loss": 1.3943, + "step": 7941 + }, + { + "epoch": 0.8377637130801687, + "grad_norm": 0.6994436383247375, + "learning_rate": 9.722120903400392e-05, + "loss": 1.3943, + "step": 7942 + }, + { + "epoch": 0.8378691983122363, + "grad_norm": 0.781633198261261, + "learning_rate": 9.709762505173617e-05, + "loss": 1.3803, + "step": 7943 + }, + { + "epoch": 0.8379746835443038, + "grad_norm": 0.8762484192848206, + "learning_rate": 9.697411423043521e-05, + "loss": 1.3879, + "step": 7944 + }, + { + "epoch": 0.8380801687763713, + "grad_norm": 0.6740595102310181, + "learning_rate": 9.685067658394095e-05, + "loss": 1.3631, + "step": 7945 + }, + { + "epoch": 0.8381856540084388, + "grad_norm": 0.7340859770774841, + "learning_rate": 9.672731212608535e-05, + "loss": 1.3831, + "step": 7946 + }, + { + "epoch": 0.8382911392405064, + "grad_norm": 0.6650197505950928, + "learning_rate": 9.660402087069192e-05, + "loss": 1.3875, + "step": 7947 + }, + { + "epoch": 0.8383966244725738, + "grad_norm": 0.7553302645683289, + "learning_rate": 9.648080283157604e-05, + "loss": 1.3535, + "step": 7948 + }, + { + "epoch": 0.8385021097046413, + "grad_norm": 0.6953170895576477, + "learning_rate": 9.635765802254482e-05, + "loss": 1.3834, + "step": 7949 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.7269281148910522, + "learning_rate": 9.623458645739755e-05, + "loss": 1.3896, + "step": 7950 + }, + { + "epoch": 0.8387130801687763, + "grad_norm": 0.6729454398155212, + "learning_rate": 9.611158814992479e-05, + "loss": 1.3858, + "step": 7951 + }, + { + "epoch": 0.8388185654008439, + "grad_norm": 0.6653351187705994, + "learning_rate": 9.598866311390919e-05, + "loss": 1.3339, + "step": 7952 + }, + { + "epoch": 0.8389240506329114, + "grad_norm": 0.6582700610160828, + "learning_rate": 9.586581136312506e-05, + "loss": 1.3836, + "step": 7953 + }, + { + "epoch": 0.8390295358649789, + "grad_norm": 0.6581037640571594, + "learning_rate": 9.574303291133862e-05, + "loss": 1.3278, + "step": 7954 + }, + { + "epoch": 0.8391350210970464, + "grad_norm": 0.7853182554244995, + "learning_rate": 9.562032777230772e-05, + "loss": 1.3389, + "step": 7955 + }, + { + "epoch": 0.839240506329114, + "grad_norm": 0.6677504777908325, + "learning_rate": 9.549769595978211e-05, + "loss": 1.3572, + "step": 7956 + }, + { + "epoch": 0.8393459915611814, + "grad_norm": 0.7113779187202454, + "learning_rate": 9.537513748750337e-05, + "loss": 1.3731, + "step": 7957 + }, + { + "epoch": 0.8394514767932489, + "grad_norm": 0.7146121859550476, + "learning_rate": 9.525265236920452e-05, + "loss": 1.3884, + "step": 7958 + }, + { + "epoch": 0.8395569620253165, + "grad_norm": 0.7108674049377441, + "learning_rate": 9.5130240618611e-05, + "loss": 1.3841, + "step": 7959 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.652147114276886, + "learning_rate": 9.50079022494395e-05, + "loss": 1.3671, + "step": 7960 + }, + { + "epoch": 0.8397679324894515, + "grad_norm": 0.6585747599601746, + "learning_rate": 9.488563727539864e-05, + "loss": 1.3429, + "step": 7961 + }, + { + "epoch": 0.839873417721519, + "grad_norm": 0.6880823373794556, + "learning_rate": 9.47634457101888e-05, + "loss": 1.3727, + "step": 7962 + }, + { + "epoch": 0.8399789029535865, + "grad_norm": 0.7763985991477966, + "learning_rate": 9.464132756750218e-05, + "loss": 1.3654, + "step": 7963 + }, + { + "epoch": 0.840084388185654, + "grad_norm": 0.6660386323928833, + "learning_rate": 9.451928286102277e-05, + "loss": 1.3853, + "step": 7964 + }, + { + "epoch": 0.8401898734177216, + "grad_norm": 0.6976905465126038, + "learning_rate": 9.439731160442619e-05, + "loss": 1.3773, + "step": 7965 + }, + { + "epoch": 0.840295358649789, + "grad_norm": 0.7242016792297363, + "learning_rate": 9.427541381138002e-05, + "loss": 1.3779, + "step": 7966 + }, + { + "epoch": 0.8404008438818565, + "grad_norm": 0.6787542104721069, + "learning_rate": 9.415358949554326e-05, + "loss": 1.3704, + "step": 7967 + }, + { + "epoch": 0.8405063291139241, + "grad_norm": 0.7357276082038879, + "learning_rate": 9.40318386705673e-05, + "loss": 1.3865, + "step": 7968 + }, + { + "epoch": 0.8406118143459915, + "grad_norm": 0.6786850690841675, + "learning_rate": 9.391016135009484e-05, + "loss": 1.3374, + "step": 7969 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.7653582096099854, + "learning_rate": 9.378855754776028e-05, + "loss": 1.3583, + "step": 7970 + }, + { + "epoch": 0.8408227848101266, + "grad_norm": 0.9528514742851257, + "learning_rate": 9.366702727719006e-05, + "loss": 1.3932, + "step": 7971 + }, + { + "epoch": 0.8409282700421941, + "grad_norm": 0.705061137676239, + "learning_rate": 9.354557055200214e-05, + "loss": 1.3961, + "step": 7972 + }, + { + "epoch": 0.8410337552742616, + "grad_norm": 0.6704480648040771, + "learning_rate": 9.342418738580652e-05, + "loss": 1.3555, + "step": 7973 + }, + { + "epoch": 0.8411392405063292, + "grad_norm": 0.8061882853507996, + "learning_rate": 9.330287779220459e-05, + "loss": 1.3363, + "step": 7974 + }, + { + "epoch": 0.8412447257383966, + "grad_norm": 1.0296441316604614, + "learning_rate": 9.31816417847898e-05, + "loss": 1.3867, + "step": 7975 + }, + { + "epoch": 0.8413502109704641, + "grad_norm": 0.6741187572479248, + "learning_rate": 9.306047937714713e-05, + "loss": 1.3833, + "step": 7976 + }, + { + "epoch": 0.8414556962025317, + "grad_norm": 0.7362245321273804, + "learning_rate": 9.29393905828537e-05, + "loss": 1.3591, + "step": 7977 + }, + { + "epoch": 0.8415611814345991, + "grad_norm": 0.8283452391624451, + "learning_rate": 9.281837541547791e-05, + "loss": 1.3602, + "step": 7978 + }, + { + "epoch": 0.8416666666666667, + "grad_norm": 0.7031230330467224, + "learning_rate": 9.269743388858019e-05, + "loss": 1.3697, + "step": 7979 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.6644742488861084, + "learning_rate": 9.257656601571266e-05, + "loss": 1.345, + "step": 7980 + }, + { + "epoch": 0.8418776371308017, + "grad_norm": 0.6766179203987122, + "learning_rate": 9.245577181041901e-05, + "loss": 1.3683, + "step": 7981 + }, + { + "epoch": 0.8419831223628692, + "grad_norm": 0.8655458688735962, + "learning_rate": 9.233505128623499e-05, + "loss": 1.3522, + "step": 7982 + }, + { + "epoch": 0.8420886075949368, + "grad_norm": 0.6886115670204163, + "learning_rate": 9.221440445668794e-05, + "loss": 1.4054, + "step": 7983 + }, + { + "epoch": 0.8421940928270042, + "grad_norm": 0.8226326107978821, + "learning_rate": 9.209383133529664e-05, + "loss": 1.3414, + "step": 7984 + }, + { + "epoch": 0.8422995780590717, + "grad_norm": 0.6563223600387573, + "learning_rate": 9.197333193557237e-05, + "loss": 1.4073, + "step": 7985 + }, + { + "epoch": 0.8424050632911393, + "grad_norm": 0.7477640509605408, + "learning_rate": 9.185290627101747e-05, + "loss": 1.3827, + "step": 7986 + }, + { + "epoch": 0.8425105485232067, + "grad_norm": 0.7421661615371704, + "learning_rate": 9.173255435512617e-05, + "loss": 1.385, + "step": 7987 + }, + { + "epoch": 0.8426160337552743, + "grad_norm": 0.7877265810966492, + "learning_rate": 9.161227620138468e-05, + "loss": 1.4214, + "step": 7988 + }, + { + "epoch": 0.8427215189873418, + "grad_norm": 0.6880192756652832, + "learning_rate": 9.149207182327054e-05, + "loss": 1.35, + "step": 7989 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.6551079750061035, + "learning_rate": 9.137194123425349e-05, + "loss": 1.3749, + "step": 7990 + }, + { + "epoch": 0.8429324894514768, + "grad_norm": 0.6485511064529419, + "learning_rate": 9.125188444779458e-05, + "loss": 1.376, + "step": 7991 + }, + { + "epoch": 0.8430379746835444, + "grad_norm": 0.7177321910858154, + "learning_rate": 9.113190147734682e-05, + "loss": 1.3558, + "step": 7992 + }, + { + "epoch": 0.8431434599156118, + "grad_norm": 0.6536970138549805, + "learning_rate": 9.101199233635477e-05, + "loss": 1.3878, + "step": 7993 + }, + { + "epoch": 0.8432489451476793, + "grad_norm": 0.8422549962997437, + "learning_rate": 9.089215703825519e-05, + "loss": 1.3626, + "step": 7994 + }, + { + "epoch": 0.8433544303797469, + "grad_norm": 0.7394202351570129, + "learning_rate": 9.077239559647591e-05, + "loss": 1.3633, + "step": 7995 + }, + { + "epoch": 0.8434599156118143, + "grad_norm": 0.701370894908905, + "learning_rate": 9.065270802443704e-05, + "loss": 1.3973, + "step": 7996 + }, + { + "epoch": 0.8435654008438819, + "grad_norm": 0.6943351030349731, + "learning_rate": 9.053309433554993e-05, + "loss": 1.3764, + "step": 7997 + }, + { + "epoch": 0.8436708860759494, + "grad_norm": 0.6919082999229431, + "learning_rate": 9.041355454321803e-05, + "loss": 1.3682, + "step": 7998 + }, + { + "epoch": 0.8437763713080169, + "grad_norm": 0.6543880701065063, + "learning_rate": 9.029408866083638e-05, + "loss": 1.3689, + "step": 7999 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.6856273412704468, + "learning_rate": 9.017469670179168e-05, + "loss": 1.3675, + "step": 8000 + }, + { + "epoch": 0.8439873417721518, + "grad_norm": 0.6600971817970276, + "learning_rate": 9.00553786794624e-05, + "loss": 1.3909, + "step": 8001 + }, + { + "epoch": 0.8440928270042194, + "grad_norm": 0.7238931059837341, + "learning_rate": 8.99361346072185e-05, + "loss": 1.4076, + "step": 8002 + }, + { + "epoch": 0.8441983122362869, + "grad_norm": 0.6421932578086853, + "learning_rate": 8.98169644984223e-05, + "loss": 1.3997, + "step": 8003 + }, + { + "epoch": 0.8443037974683544, + "grad_norm": 0.6654088497161865, + "learning_rate": 8.96978683664272e-05, + "loss": 1.3444, + "step": 8004 + }, + { + "epoch": 0.8444092827004219, + "grad_norm": 0.6924827694892883, + "learning_rate": 8.957884622457854e-05, + "loss": 1.365, + "step": 8005 + }, + { + "epoch": 0.8445147679324895, + "grad_norm": 0.7683106064796448, + "learning_rate": 8.945989808621321e-05, + "loss": 1.3798, + "step": 8006 + }, + { + "epoch": 0.8446202531645569, + "grad_norm": 0.660514235496521, + "learning_rate": 8.934102396466016e-05, + "loss": 1.422, + "step": 8007 + }, + { + "epoch": 0.8447257383966245, + "grad_norm": 0.7457454204559326, + "learning_rate": 8.92222238732397e-05, + "loss": 1.36, + "step": 8008 + }, + { + "epoch": 0.844831223628692, + "grad_norm": 0.6408562064170837, + "learning_rate": 8.910349782526394e-05, + "loss": 1.3405, + "step": 8009 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.6586745381355286, + "learning_rate": 8.898484583403668e-05, + "loss": 1.3602, + "step": 8010 + }, + { + "epoch": 0.845042194092827, + "grad_norm": 0.6468051075935364, + "learning_rate": 8.886626791285369e-05, + "loss": 1.3782, + "step": 8011 + }, + { + "epoch": 0.8451476793248945, + "grad_norm": 0.6718406677246094, + "learning_rate": 8.874776407500206e-05, + "loss": 1.3659, + "step": 8012 + }, + { + "epoch": 0.845253164556962, + "grad_norm": 0.6611781120300293, + "learning_rate": 8.86293343337608e-05, + "loss": 1.3777, + "step": 8013 + }, + { + "epoch": 0.8453586497890295, + "grad_norm": 0.6478965282440186, + "learning_rate": 8.851097870240051e-05, + "loss": 1.3635, + "step": 8014 + }, + { + "epoch": 0.8454641350210971, + "grad_norm": 0.6574118733406067, + "learning_rate": 8.839269719418361e-05, + "loss": 1.3674, + "step": 8015 + }, + { + "epoch": 0.8455696202531645, + "grad_norm": 0.6660116910934448, + "learning_rate": 8.827448982236397e-05, + "loss": 1.3717, + "step": 8016 + }, + { + "epoch": 0.8456751054852321, + "grad_norm": 0.6681106090545654, + "learning_rate": 8.815635660018742e-05, + "loss": 1.3228, + "step": 8017 + }, + { + "epoch": 0.8457805907172996, + "grad_norm": 0.6994544267654419, + "learning_rate": 8.803829754089138e-05, + "loss": 1.4081, + "step": 8018 + }, + { + "epoch": 0.845886075949367, + "grad_norm": 0.681298553943634, + "learning_rate": 8.792031265770475e-05, + "loss": 1.394, + "step": 8019 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.6781190633773804, + "learning_rate": 8.780240196384873e-05, + "loss": 1.3765, + "step": 8020 + }, + { + "epoch": 0.8460970464135021, + "grad_norm": 0.6769047975540161, + "learning_rate": 8.768456547253556e-05, + "loss": 1.3702, + "step": 8021 + }, + { + "epoch": 0.8462025316455696, + "grad_norm": 0.6447323560714722, + "learning_rate": 8.756680319696945e-05, + "loss": 1.3575, + "step": 8022 + }, + { + "epoch": 0.8463080168776371, + "grad_norm": 0.6410549879074097, + "learning_rate": 8.744911515034623e-05, + "loss": 1.3392, + "step": 8023 + }, + { + "epoch": 0.8464135021097047, + "grad_norm": 0.6806005835533142, + "learning_rate": 8.733150134585338e-05, + "loss": 1.4162, + "step": 8024 + }, + { + "epoch": 0.8465189873417721, + "grad_norm": 0.6832634210586548, + "learning_rate": 8.721396179667019e-05, + "loss": 1.3898, + "step": 8025 + }, + { + "epoch": 0.8466244725738397, + "grad_norm": 0.6588675379753113, + "learning_rate": 8.709649651596752e-05, + "loss": 1.3615, + "step": 8026 + }, + { + "epoch": 0.8467299578059072, + "grad_norm": 0.682657778263092, + "learning_rate": 8.697910551690802e-05, + "loss": 1.3544, + "step": 8027 + }, + { + "epoch": 0.8468354430379746, + "grad_norm": 0.7189826965332031, + "learning_rate": 8.686178881264568e-05, + "loss": 1.3481, + "step": 8028 + }, + { + "epoch": 0.8469409282700422, + "grad_norm": 0.70392245054245, + "learning_rate": 8.67445464163267e-05, + "loss": 1.3873, + "step": 8029 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.7137112021446228, + "learning_rate": 8.662737834108861e-05, + "loss": 1.3571, + "step": 8030 + }, + { + "epoch": 0.8471518987341772, + "grad_norm": 0.6699103713035583, + "learning_rate": 8.651028460006072e-05, + "loss": 1.3901, + "step": 8031 + }, + { + "epoch": 0.8472573839662447, + "grad_norm": 0.7501208186149597, + "learning_rate": 8.639326520636387e-05, + "loss": 1.3382, + "step": 8032 + }, + { + "epoch": 0.8473628691983123, + "grad_norm": 0.6861653327941895, + "learning_rate": 8.627632017311065e-05, + "loss": 1.4034, + "step": 8033 + }, + { + "epoch": 0.8474683544303797, + "grad_norm": 0.6985395550727844, + "learning_rate": 8.615944951340543e-05, + "loss": 1.3473, + "step": 8034 + }, + { + "epoch": 0.8475738396624473, + "grad_norm": 0.7587062120437622, + "learning_rate": 8.604265324034405e-05, + "loss": 1.3755, + "step": 8035 + }, + { + "epoch": 0.8476793248945148, + "grad_norm": 0.6964250802993774, + "learning_rate": 8.592593136701404e-05, + "loss": 1.3902, + "step": 8036 + }, + { + "epoch": 0.8477848101265822, + "grad_norm": 0.7255306839942932, + "learning_rate": 8.580928390649496e-05, + "loss": 1.3763, + "step": 8037 + }, + { + "epoch": 0.8478902953586498, + "grad_norm": 0.6903165578842163, + "learning_rate": 8.569271087185756e-05, + "loss": 1.4226, + "step": 8038 + }, + { + "epoch": 0.8479957805907173, + "grad_norm": 0.7806037664413452, + "learning_rate": 8.557621227616444e-05, + "loss": 1.4161, + "step": 8039 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.6781609654426575, + "learning_rate": 8.545978813246987e-05, + "loss": 1.3815, + "step": 8040 + }, + { + "epoch": 0.8482067510548523, + "grad_norm": 0.6695238947868347, + "learning_rate": 8.53434384538197e-05, + "loss": 1.3698, + "step": 8041 + }, + { + "epoch": 0.8483122362869199, + "grad_norm": 0.6698035597801208, + "learning_rate": 8.522716325325155e-05, + "loss": 1.3258, + "step": 8042 + }, + { + "epoch": 0.8484177215189873, + "grad_norm": 0.6686978936195374, + "learning_rate": 8.51109625437946e-05, + "loss": 1.3998, + "step": 8043 + }, + { + "epoch": 0.8485232067510549, + "grad_norm": 0.7027145624160767, + "learning_rate": 8.499483633846977e-05, + "loss": 1.3546, + "step": 8044 + }, + { + "epoch": 0.8486286919831224, + "grad_norm": 0.658146858215332, + "learning_rate": 8.48787846502893e-05, + "loss": 1.3499, + "step": 8045 + }, + { + "epoch": 0.8487341772151898, + "grad_norm": 0.6741515398025513, + "learning_rate": 8.476280749225782e-05, + "loss": 1.3505, + "step": 8046 + }, + { + "epoch": 0.8488396624472574, + "grad_norm": 0.6844092011451721, + "learning_rate": 8.464690487737098e-05, + "loss": 1.3354, + "step": 8047 + }, + { + "epoch": 0.8489451476793249, + "grad_norm": 0.6520295739173889, + "learning_rate": 8.453107681861616e-05, + "loss": 1.3404, + "step": 8048 + }, + { + "epoch": 0.8490506329113924, + "grad_norm": 0.7095662951469421, + "learning_rate": 8.441532332897248e-05, + "loss": 1.4111, + "step": 8049 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.6973369121551514, + "learning_rate": 8.429964442141072e-05, + "loss": 1.379, + "step": 8050 + }, + { + "epoch": 0.8492616033755275, + "grad_norm": 0.6764839887619019, + "learning_rate": 8.418404010889336e-05, + "loss": 1.3821, + "step": 8051 + }, + { + "epoch": 0.8493670886075949, + "grad_norm": 0.6653481721878052, + "learning_rate": 8.406851040437426e-05, + "loss": 1.3472, + "step": 8052 + }, + { + "epoch": 0.8494725738396625, + "grad_norm": 0.6495171785354614, + "learning_rate": 8.395305532079928e-05, + "loss": 1.3906, + "step": 8053 + }, + { + "epoch": 0.84957805907173, + "grad_norm": 0.6833106875419617, + "learning_rate": 8.383767487110552e-05, + "loss": 1.3633, + "step": 8054 + }, + { + "epoch": 0.8496835443037974, + "grad_norm": 0.6910944581031799, + "learning_rate": 8.372236906822217e-05, + "loss": 1.3885, + "step": 8055 + }, + { + "epoch": 0.849789029535865, + "grad_norm": 0.692548394203186, + "learning_rate": 8.360713792506971e-05, + "loss": 1.3612, + "step": 8056 + }, + { + "epoch": 0.8498945147679325, + "grad_norm": 0.6557542681694031, + "learning_rate": 8.349198145456049e-05, + "loss": 1.3828, + "step": 8057 + }, + { + "epoch": 0.85, + "grad_norm": 0.6578806638717651, + "learning_rate": 8.337689966959819e-05, + "loss": 1.4076, + "step": 8058 + }, + { + "epoch": 0.8501054852320675, + "grad_norm": 0.6604527831077576, + "learning_rate": 8.326189258307832e-05, + "loss": 1.326, + "step": 8059 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.6571440696716309, + "learning_rate": 8.314696020788806e-05, + "loss": 1.3669, + "step": 8060 + }, + { + "epoch": 0.8503164556962025, + "grad_norm": 0.6734827160835266, + "learning_rate": 8.303210255690622e-05, + "loss": 1.3727, + "step": 8061 + }, + { + "epoch": 0.8504219409282701, + "grad_norm": 0.7046311497688293, + "learning_rate": 8.29173196430029e-05, + "loss": 1.3738, + "step": 8062 + }, + { + "epoch": 0.8505274261603376, + "grad_norm": 0.6916664242744446, + "learning_rate": 8.280261147904039e-05, + "loss": 1.3365, + "step": 8063 + }, + { + "epoch": 0.850632911392405, + "grad_norm": 0.6572110652923584, + "learning_rate": 8.268797807787226e-05, + "loss": 1.3878, + "step": 8064 + }, + { + "epoch": 0.8507383966244726, + "grad_norm": 0.6650893092155457, + "learning_rate": 8.257341945234365e-05, + "loss": 1.425, + "step": 8065 + }, + { + "epoch": 0.85084388185654, + "grad_norm": 0.7357087731361389, + "learning_rate": 8.245893561529153e-05, + "loss": 1.3689, + "step": 8066 + }, + { + "epoch": 0.8509493670886076, + "grad_norm": 0.6660627722740173, + "learning_rate": 8.23445265795443e-05, + "loss": 1.3593, + "step": 8067 + }, + { + "epoch": 0.8510548523206751, + "grad_norm": 0.6718909740447998, + "learning_rate": 8.223019235792214e-05, + "loss": 1.3846, + "step": 8068 + }, + { + "epoch": 0.8511603375527426, + "grad_norm": 0.7066664695739746, + "learning_rate": 8.211593296323672e-05, + "loss": 1.3907, + "step": 8069 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.7248277068138123, + "learning_rate": 8.200174840829136e-05, + "loss": 1.3779, + "step": 8070 + }, + { + "epoch": 0.8513713080168777, + "grad_norm": 0.6829277276992798, + "learning_rate": 8.188763870588092e-05, + "loss": 1.3438, + "step": 8071 + }, + { + "epoch": 0.8514767932489451, + "grad_norm": 0.6394017338752747, + "learning_rate": 8.177360386879217e-05, + "loss": 1.377, + "step": 8072 + }, + { + "epoch": 0.8515822784810126, + "grad_norm": 0.7568827271461487, + "learning_rate": 8.165964390980316e-05, + "loss": 1.3799, + "step": 8073 + }, + { + "epoch": 0.8516877637130802, + "grad_norm": 0.7078865170478821, + "learning_rate": 8.15457588416838e-05, + "loss": 1.3854, + "step": 8074 + }, + { + "epoch": 0.8517932489451476, + "grad_norm": 0.7248688340187073, + "learning_rate": 8.143194867719534e-05, + "loss": 1.3815, + "step": 8075 + }, + { + "epoch": 0.8518987341772152, + "grad_norm": 0.7220240831375122, + "learning_rate": 8.131821342909071e-05, + "loss": 1.4249, + "step": 8076 + }, + { + "epoch": 0.8520042194092827, + "grad_norm": 0.6763780117034912, + "learning_rate": 8.120455311011473e-05, + "loss": 1.3844, + "step": 8077 + }, + { + "epoch": 0.8521097046413502, + "grad_norm": 0.6305204629898071, + "learning_rate": 8.109096773300348e-05, + "loss": 1.3355, + "step": 8078 + }, + { + "epoch": 0.8522151898734177, + "grad_norm": 0.6682489514350891, + "learning_rate": 8.097745731048475e-05, + "loss": 1.3688, + "step": 8079 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.6664907336235046, + "learning_rate": 8.08640218552778e-05, + "loss": 1.402, + "step": 8080 + }, + { + "epoch": 0.8524261603375527, + "grad_norm": 0.6456770300865173, + "learning_rate": 8.075066138009396e-05, + "loss": 1.3255, + "step": 8081 + }, + { + "epoch": 0.8525316455696202, + "grad_norm": 0.7518002390861511, + "learning_rate": 8.063737589763573e-05, + "loss": 1.308, + "step": 8082 + }, + { + "epoch": 0.8526371308016878, + "grad_norm": 0.6533992886543274, + "learning_rate": 8.05241654205973e-05, + "loss": 1.3495, + "step": 8083 + }, + { + "epoch": 0.8527426160337552, + "grad_norm": 0.7021051049232483, + "learning_rate": 8.041102996166442e-05, + "loss": 1.37, + "step": 8084 + }, + { + "epoch": 0.8528481012658228, + "grad_norm": 0.6666764616966248, + "learning_rate": 8.029796953351445e-05, + "loss": 1.3799, + "step": 8085 + }, + { + "epoch": 0.8529535864978903, + "grad_norm": 0.6594858169555664, + "learning_rate": 8.018498414881645e-05, + "loss": 1.3793, + "step": 8086 + }, + { + "epoch": 0.8530590717299578, + "grad_norm": 0.6647143959999084, + "learning_rate": 8.007207382023102e-05, + "loss": 1.3646, + "step": 8087 + }, + { + "epoch": 0.8531645569620253, + "grad_norm": 0.7181658744812012, + "learning_rate": 7.995923856041013e-05, + "loss": 1.3918, + "step": 8088 + }, + { + "epoch": 0.8532700421940929, + "grad_norm": 0.6517056822776794, + "learning_rate": 7.984647838199773e-05, + "loss": 1.4148, + "step": 8089 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.6693677306175232, + "learning_rate": 7.973379329762925e-05, + "loss": 1.4026, + "step": 8090 + }, + { + "epoch": 0.8534810126582278, + "grad_norm": 0.706850528717041, + "learning_rate": 7.96211833199314e-05, + "loss": 1.3736, + "step": 8091 + }, + { + "epoch": 0.8535864978902954, + "grad_norm": 0.7161184549331665, + "learning_rate": 7.950864846152284e-05, + "loss": 1.3738, + "step": 8092 + }, + { + "epoch": 0.8536919831223628, + "grad_norm": 0.7391166687011719, + "learning_rate": 7.939618873501356e-05, + "loss": 1.3692, + "step": 8093 + }, + { + "epoch": 0.8537974683544304, + "grad_norm": 0.6733768582344055, + "learning_rate": 7.928380415300523e-05, + "loss": 1.3573, + "step": 8094 + }, + { + "epoch": 0.8539029535864979, + "grad_norm": 0.6718480587005615, + "learning_rate": 7.917149472809113e-05, + "loss": 1.3091, + "step": 8095 + }, + { + "epoch": 0.8540084388185654, + "grad_norm": 0.660937488079071, + "learning_rate": 7.905926047285616e-05, + "loss": 1.3395, + "step": 8096 + }, + { + "epoch": 0.8541139240506329, + "grad_norm": 0.6752768158912659, + "learning_rate": 7.894710139987645e-05, + "loss": 1.3785, + "step": 8097 + }, + { + "epoch": 0.8542194092827005, + "grad_norm": 0.7018122673034668, + "learning_rate": 7.883501752172038e-05, + "loss": 1.3506, + "step": 8098 + }, + { + "epoch": 0.8543248945147679, + "grad_norm": 0.6875532269477844, + "learning_rate": 7.872300885094736e-05, + "loss": 1.3687, + "step": 8099 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.7587577700614929, + "learning_rate": 7.861107540010845e-05, + "loss": 1.3732, + "step": 8100 + }, + { + "epoch": 0.854535864978903, + "grad_norm": 0.6949634552001953, + "learning_rate": 7.849921718174638e-05, + "loss": 1.3881, + "step": 8101 + }, + { + "epoch": 0.8546413502109704, + "grad_norm": 0.7019610404968262, + "learning_rate": 7.838743420839544e-05, + "loss": 1.4202, + "step": 8102 + }, + { + "epoch": 0.854746835443038, + "grad_norm": 0.6801626682281494, + "learning_rate": 7.827572649258147e-05, + "loss": 1.3735, + "step": 8103 + }, + { + "epoch": 0.8548523206751055, + "grad_norm": 0.6980706453323364, + "learning_rate": 7.816409404682185e-05, + "loss": 1.4087, + "step": 8104 + }, + { + "epoch": 0.854957805907173, + "grad_norm": 0.7130919098854065, + "learning_rate": 7.805253688362557e-05, + "loss": 1.3788, + "step": 8105 + }, + { + "epoch": 0.8550632911392405, + "grad_norm": 0.6932260990142822, + "learning_rate": 7.794105501549306e-05, + "loss": 1.367, + "step": 8106 + }, + { + "epoch": 0.8551687763713081, + "grad_norm": 0.6454377770423889, + "learning_rate": 7.782964845491666e-05, + "loss": 1.3448, + "step": 8107 + }, + { + "epoch": 0.8552742616033755, + "grad_norm": 0.6551463603973389, + "learning_rate": 7.771831721437989e-05, + "loss": 1.3354, + "step": 8108 + }, + { + "epoch": 0.855379746835443, + "grad_norm": 0.6784166097640991, + "learning_rate": 7.760706130635792e-05, + "loss": 1.3761, + "step": 8109 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.671840250492096, + "learning_rate": 7.749588074331762e-05, + "loss": 1.4042, + "step": 8110 + }, + { + "epoch": 0.855590717299578, + "grad_norm": 0.6798112988471985, + "learning_rate": 7.738477553771727e-05, + "loss": 1.3748, + "step": 8111 + }, + { + "epoch": 0.8556962025316456, + "grad_norm": 0.6497519612312317, + "learning_rate": 7.727374570200685e-05, + "loss": 1.3974, + "step": 8112 + }, + { + "epoch": 0.8558016877637131, + "grad_norm": 0.7046075463294983, + "learning_rate": 7.716279124862771e-05, + "loss": 1.3278, + "step": 8113 + }, + { + "epoch": 0.8559071729957806, + "grad_norm": 0.6767721772193909, + "learning_rate": 7.705191219001267e-05, + "loss": 1.4054, + "step": 8114 + }, + { + "epoch": 0.8560126582278481, + "grad_norm": 0.6526734828948975, + "learning_rate": 7.694110853858671e-05, + "loss": 1.3709, + "step": 8115 + }, + { + "epoch": 0.8561181434599157, + "grad_norm": 0.6608391404151917, + "learning_rate": 7.683038030676573e-05, + "loss": 1.3711, + "step": 8116 + }, + { + "epoch": 0.8562236286919831, + "grad_norm": 0.8192624449729919, + "learning_rate": 7.67197275069573e-05, + "loss": 1.389, + "step": 8117 + }, + { + "epoch": 0.8563291139240506, + "grad_norm": 0.6837494373321533, + "learning_rate": 7.660915015156067e-05, + "loss": 1.3478, + "step": 8118 + }, + { + "epoch": 0.8564345991561182, + "grad_norm": 0.6607191562652588, + "learning_rate": 7.649864825296669e-05, + "loss": 1.3568, + "step": 8119 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.6591840386390686, + "learning_rate": 7.63882218235575e-05, + "loss": 1.3411, + "step": 8120 + }, + { + "epoch": 0.8566455696202532, + "grad_norm": 0.7408193349838257, + "learning_rate": 7.627787087570692e-05, + "loss": 1.3642, + "step": 8121 + }, + { + "epoch": 0.8567510548523207, + "grad_norm": 0.698333740234375, + "learning_rate": 7.616759542178045e-05, + "loss": 1.3592, + "step": 8122 + }, + { + "epoch": 0.8568565400843882, + "grad_norm": 0.6651426553726196, + "learning_rate": 7.605739547413487e-05, + "loss": 1.3717, + "step": 8123 + }, + { + "epoch": 0.8569620253164557, + "grad_norm": 0.8032212853431702, + "learning_rate": 7.594727104511873e-05, + "loss": 1.3683, + "step": 8124 + }, + { + "epoch": 0.8570675105485233, + "grad_norm": 0.7046306133270264, + "learning_rate": 7.583722214707206e-05, + "loss": 1.3761, + "step": 8125 + }, + { + "epoch": 0.8571729957805907, + "grad_norm": 0.6490048766136169, + "learning_rate": 7.572724879232634e-05, + "loss": 1.3757, + "step": 8126 + }, + { + "epoch": 0.8572784810126582, + "grad_norm": 0.6639665365219116, + "learning_rate": 7.561735099320463e-05, + "loss": 1.3738, + "step": 8127 + }, + { + "epoch": 0.8573839662447258, + "grad_norm": 0.6730031371116638, + "learning_rate": 7.55075287620215e-05, + "loss": 1.3817, + "step": 8128 + }, + { + "epoch": 0.8574894514767932, + "grad_norm": 0.6993845105171204, + "learning_rate": 7.539778211108309e-05, + "loss": 1.4052, + "step": 8129 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.6962851881980896, + "learning_rate": 7.528811105268699e-05, + "loss": 1.338, + "step": 8130 + }, + { + "epoch": 0.8577004219409282, + "grad_norm": 0.7136635184288025, + "learning_rate": 7.517851559912254e-05, + "loss": 1.3388, + "step": 8131 + }, + { + "epoch": 0.8578059071729958, + "grad_norm": 0.7033555507659912, + "learning_rate": 7.506899576267023e-05, + "loss": 1.3703, + "step": 8132 + }, + { + "epoch": 0.8579113924050633, + "grad_norm": 0.6943025588989258, + "learning_rate": 7.495955155560261e-05, + "loss": 1.3808, + "step": 8133 + }, + { + "epoch": 0.8580168776371307, + "grad_norm": 0.6654751896858215, + "learning_rate": 7.485018299018326e-05, + "loss": 1.3549, + "step": 8134 + }, + { + "epoch": 0.8581223628691983, + "grad_norm": 0.6803420782089233, + "learning_rate": 7.474089007866756e-05, + "loss": 1.3599, + "step": 8135 + }, + { + "epoch": 0.8582278481012658, + "grad_norm": 0.647494375705719, + "learning_rate": 7.463167283330227e-05, + "loss": 1.3813, + "step": 8136 + }, + { + "epoch": 0.8583333333333333, + "grad_norm": 0.649861216545105, + "learning_rate": 7.452253126632564e-05, + "loss": 1.3608, + "step": 8137 + }, + { + "epoch": 0.8584388185654008, + "grad_norm": 0.6483199596405029, + "learning_rate": 7.441346538996769e-05, + "loss": 1.3758, + "step": 8138 + }, + { + "epoch": 0.8585443037974684, + "grad_norm": 0.65047287940979, + "learning_rate": 7.430447521644973e-05, + "loss": 1.3783, + "step": 8139 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.6967417001724243, + "learning_rate": 7.41955607579845e-05, + "loss": 1.3679, + "step": 8140 + }, + { + "epoch": 0.8587552742616034, + "grad_norm": 0.6406729817390442, + "learning_rate": 7.408672202677666e-05, + "loss": 1.3272, + "step": 8141 + }, + { + "epoch": 0.8588607594936709, + "grad_norm": 0.6572203636169434, + "learning_rate": 7.397795903502202e-05, + "loss": 1.3688, + "step": 8142 + }, + { + "epoch": 0.8589662447257383, + "grad_norm": 0.7296553254127502, + "learning_rate": 7.386927179490801e-05, + "loss": 1.3851, + "step": 8143 + }, + { + "epoch": 0.8590717299578059, + "grad_norm": 0.6691399216651917, + "learning_rate": 7.376066031861364e-05, + "loss": 1.3428, + "step": 8144 + }, + { + "epoch": 0.8591772151898734, + "grad_norm": 0.7145047187805176, + "learning_rate": 7.365212461830933e-05, + "loss": 1.3468, + "step": 8145 + }, + { + "epoch": 0.8592827004219409, + "grad_norm": 0.6893951296806335, + "learning_rate": 7.354366470615695e-05, + "loss": 1.4069, + "step": 8146 + }, + { + "epoch": 0.8593881856540084, + "grad_norm": 0.7351024150848389, + "learning_rate": 7.343528059431009e-05, + "loss": 1.3574, + "step": 8147 + }, + { + "epoch": 0.859493670886076, + "grad_norm": 0.9029229283332825, + "learning_rate": 7.332697229491373e-05, + "loss": 1.3607, + "step": 8148 + }, + { + "epoch": 0.8595991561181434, + "grad_norm": 0.6830878257751465, + "learning_rate": 7.321873982010422e-05, + "loss": 1.3369, + "step": 8149 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.7123898863792419, + "learning_rate": 7.311058318200969e-05, + "loss": 1.3949, + "step": 8150 + }, + { + "epoch": 0.8598101265822785, + "grad_norm": 0.683655321598053, + "learning_rate": 7.300250239274964e-05, + "loss": 1.3703, + "step": 8151 + }, + { + "epoch": 0.859915611814346, + "grad_norm": 0.7752760648727417, + "learning_rate": 7.289449746443494e-05, + "loss": 1.3505, + "step": 8152 + }, + { + "epoch": 0.8600210970464135, + "grad_norm": 0.7870528697967529, + "learning_rate": 7.278656840916825e-05, + "loss": 1.4083, + "step": 8153 + }, + { + "epoch": 0.860126582278481, + "grad_norm": 0.702735185623169, + "learning_rate": 7.26787152390434e-05, + "loss": 1.3861, + "step": 8154 + }, + { + "epoch": 0.8602320675105485, + "grad_norm": 0.6465806365013123, + "learning_rate": 7.257093796614597e-05, + "loss": 1.3904, + "step": 8155 + }, + { + "epoch": 0.860337552742616, + "grad_norm": 0.8330298066139221, + "learning_rate": 7.246323660255289e-05, + "loss": 1.3966, + "step": 8156 + }, + { + "epoch": 0.8604430379746836, + "grad_norm": 0.734832763671875, + "learning_rate": 7.235561116033265e-05, + "loss": 1.3597, + "step": 8157 + }, + { + "epoch": 0.860548523206751, + "grad_norm": 0.701363742351532, + "learning_rate": 7.224806165154504e-05, + "loss": 1.3771, + "step": 8158 + }, + { + "epoch": 0.8606540084388186, + "grad_norm": 0.699924886226654, + "learning_rate": 7.214058808824192e-05, + "loss": 1.3398, + "step": 8159 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.7305096983909607, + "learning_rate": 7.203319048246599e-05, + "loss": 1.3738, + "step": 8160 + }, + { + "epoch": 0.8608649789029535, + "grad_norm": 0.6981671452522278, + "learning_rate": 7.192586884625169e-05, + "loss": 1.3478, + "step": 8161 + }, + { + "epoch": 0.8609704641350211, + "grad_norm": 0.6915459632873535, + "learning_rate": 7.1818623191625e-05, + "loss": 1.3877, + "step": 8162 + }, + { + "epoch": 0.8610759493670886, + "grad_norm": 0.6660017371177673, + "learning_rate": 7.17114535306033e-05, + "loss": 1.3833, + "step": 8163 + }, + { + "epoch": 0.8611814345991561, + "grad_norm": 0.6219620108604431, + "learning_rate": 7.16043598751954e-05, + "loss": 1.3403, + "step": 8164 + }, + { + "epoch": 0.8612869198312236, + "grad_norm": 0.6955319046974182, + "learning_rate": 7.149734223740187e-05, + "loss": 1.369, + "step": 8165 + }, + { + "epoch": 0.8613924050632912, + "grad_norm": 0.663905918598175, + "learning_rate": 7.139040062921428e-05, + "loss": 1.4099, + "step": 8166 + }, + { + "epoch": 0.8614978902953586, + "grad_norm": 0.6533742547035217, + "learning_rate": 7.128353506261631e-05, + "loss": 1.3662, + "step": 8167 + }, + { + "epoch": 0.8616033755274262, + "grad_norm": 0.6593297719955444, + "learning_rate": 7.117674554958253e-05, + "loss": 1.3774, + "step": 8168 + }, + { + "epoch": 0.8617088607594937, + "grad_norm": 0.6979124546051025, + "learning_rate": 7.107003210207947e-05, + "loss": 1.3413, + "step": 8169 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.672027587890625, + "learning_rate": 7.096339473206471e-05, + "loss": 1.3368, + "step": 8170 + }, + { + "epoch": 0.8619198312236287, + "grad_norm": 0.6486179828643799, + "learning_rate": 7.085683345148753e-05, + "loss": 1.3746, + "step": 8171 + }, + { + "epoch": 0.8620253164556962, + "grad_norm": 0.7069767713546753, + "learning_rate": 7.075034827228862e-05, + "loss": 1.3641, + "step": 8172 + }, + { + "epoch": 0.8621308016877637, + "grad_norm": 0.6421130299568176, + "learning_rate": 7.064393920640031e-05, + "loss": 1.4085, + "step": 8173 + }, + { + "epoch": 0.8622362869198312, + "grad_norm": 0.6459810137748718, + "learning_rate": 7.053760626574618e-05, + "loss": 1.3698, + "step": 8174 + }, + { + "epoch": 0.8623417721518988, + "grad_norm": 0.6663148999214172, + "learning_rate": 7.043134946224123e-05, + "loss": 1.3309, + "step": 8175 + }, + { + "epoch": 0.8624472573839662, + "grad_norm": 0.6565984487533569, + "learning_rate": 7.032516880779233e-05, + "loss": 1.3699, + "step": 8176 + }, + { + "epoch": 0.8625527426160338, + "grad_norm": 0.7145168781280518, + "learning_rate": 7.021906431429747e-05, + "loss": 1.3847, + "step": 8177 + }, + { + "epoch": 0.8626582278481013, + "grad_norm": 0.6650967597961426, + "learning_rate": 7.011303599364608e-05, + "loss": 1.3628, + "step": 8178 + }, + { + "epoch": 0.8627637130801687, + "grad_norm": 0.6741932034492493, + "learning_rate": 7.000708385771928e-05, + "loss": 1.3615, + "step": 8179 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.6996565461158752, + "learning_rate": 6.990120791838953e-05, + "loss": 1.3986, + "step": 8180 + }, + { + "epoch": 0.8629746835443038, + "grad_norm": 0.7087993621826172, + "learning_rate": 6.979540818752064e-05, + "loss": 1.3608, + "step": 8181 + }, + { + "epoch": 0.8630801687763713, + "grad_norm": 0.6765323877334595, + "learning_rate": 6.968968467696806e-05, + "loss": 1.3679, + "step": 8182 + }, + { + "epoch": 0.8631856540084388, + "grad_norm": 0.679778516292572, + "learning_rate": 6.958403739857866e-05, + "loss": 1.3597, + "step": 8183 + }, + { + "epoch": 0.8632911392405064, + "grad_norm": 0.6434427499771118, + "learning_rate": 6.947846636419061e-05, + "loss": 1.3768, + "step": 8184 + }, + { + "epoch": 0.8633966244725738, + "grad_norm": 0.7165384292602539, + "learning_rate": 6.937297158563389e-05, + "loss": 1.3714, + "step": 8185 + }, + { + "epoch": 0.8635021097046414, + "grad_norm": 0.6388179659843445, + "learning_rate": 6.926755307472968e-05, + "loss": 1.3753, + "step": 8186 + }, + { + "epoch": 0.8636075949367089, + "grad_norm": Infinity, + "learning_rate": 6.926755307472968e-05, + "loss": 1.3593, + "step": 8187 + }, + { + "epoch": 0.8637130801687763, + "grad_norm": 0.8309327363967896, + "learning_rate": 6.916221084329055e-05, + "loss": 1.3505, + "step": 8188 + }, + { + "epoch": 0.8638185654008439, + "grad_norm": 0.7015219926834106, + "learning_rate": 6.905694490312064e-05, + "loss": 1.3768, + "step": 8189 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.7443726062774658, + "learning_rate": 6.89517552660156e-05, + "loss": 1.365, + "step": 8190 + }, + { + "epoch": 0.8640295358649789, + "grad_norm": 0.674812912940979, + "learning_rate": 6.884664194376233e-05, + "loss": 1.3781, + "step": 8191 + }, + { + "epoch": 0.8641350210970464, + "grad_norm": 0.6821144223213196, + "learning_rate": 6.874160494813942e-05, + "loss": 1.3617, + "step": 8192 + }, + { + "epoch": 0.864240506329114, + "grad_norm": 0.6451838612556458, + "learning_rate": 6.86366442909166e-05, + "loss": 1.3617, + "step": 8193 + }, + { + "epoch": 0.8643459915611814, + "grad_norm": 0.695590615272522, + "learning_rate": 6.853175998385547e-05, + "loss": 1.3875, + "step": 8194 + }, + { + "epoch": 0.864451476793249, + "grad_norm": 0.7011633515357971, + "learning_rate": 6.842695203870872e-05, + "loss": 1.3544, + "step": 8195 + }, + { + "epoch": 0.8645569620253165, + "grad_norm": 0.6564785838127136, + "learning_rate": 6.832222046722069e-05, + "loss": 1.4075, + "step": 8196 + }, + { + "epoch": 0.864662447257384, + "grad_norm": 0.6721289157867432, + "learning_rate": 6.821756528112693e-05, + "loss": 1.3816, + "step": 8197 + }, + { + "epoch": 0.8647679324894515, + "grad_norm": 0.700854480266571, + "learning_rate": 6.811298649215472e-05, + "loss": 1.3693, + "step": 8198 + }, + { + "epoch": 0.8648734177215189, + "grad_norm": 0.6819148063659668, + "learning_rate": 6.80084841120226e-05, + "loss": 1.4016, + "step": 8199 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.6784263849258423, + "learning_rate": 6.790405815244044e-05, + "loss": 1.3607, + "step": 8200 + }, + { + "epoch": 0.865084388185654, + "grad_norm": 0.6569089293479919, + "learning_rate": 6.779970862510989e-05, + "loss": 1.3805, + "step": 8201 + }, + { + "epoch": 0.8651898734177215, + "grad_norm": 0.7151170372962952, + "learning_rate": 6.769543554172361e-05, + "loss": 1.4097, + "step": 8202 + }, + { + "epoch": 0.865295358649789, + "grad_norm": 0.6799268126487732, + "learning_rate": 6.759123891396615e-05, + "loss": 1.3553, + "step": 8203 + }, + { + "epoch": 0.8654008438818566, + "grad_norm": 0.6918942928314209, + "learning_rate": 6.748711875351318e-05, + "loss": 1.3412, + "step": 8204 + }, + { + "epoch": 0.865506329113924, + "grad_norm": 0.7388770580291748, + "learning_rate": 6.738307507203187e-05, + "loss": 1.3683, + "step": 8205 + }, + { + "epoch": 0.8656118143459915, + "grad_norm": 0.699564516544342, + "learning_rate": 6.72791078811808e-05, + "loss": 1.368, + "step": 8206 + }, + { + "epoch": 0.8657172995780591, + "grad_norm": 0.6540932655334473, + "learning_rate": 6.717521719261016e-05, + "loss": 1.3822, + "step": 8207 + }, + { + "epoch": 0.8658227848101265, + "grad_norm": 0.6825164556503296, + "learning_rate": 6.707140301796122e-05, + "loss": 1.3985, + "step": 8208 + }, + { + "epoch": 0.8659282700421941, + "grad_norm": 0.6588616371154785, + "learning_rate": 6.696766536886692e-05, + "loss": 1.3419, + "step": 8209 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.7177931666374207, + "learning_rate": 6.686400425695171e-05, + "loss": 1.3421, + "step": 8210 + }, + { + "epoch": 0.8661392405063291, + "grad_norm": 0.7044302225112915, + "learning_rate": 6.676041969383107e-05, + "loss": 1.3785, + "step": 8211 + }, + { + "epoch": 0.8662447257383966, + "grad_norm": 0.6876776218414307, + "learning_rate": 6.665691169111244e-05, + "loss": 1.3588, + "step": 8212 + }, + { + "epoch": 0.8663502109704642, + "grad_norm": 0.6820887327194214, + "learning_rate": 6.655348026039437e-05, + "loss": 1.3712, + "step": 8213 + }, + { + "epoch": 0.8664556962025316, + "grad_norm": 0.6527705192565918, + "learning_rate": 6.645012541326678e-05, + "loss": 1.3777, + "step": 8214 + }, + { + "epoch": 0.8665611814345991, + "grad_norm": 0.6848354935646057, + "learning_rate": 6.634684716131114e-05, + "loss": 1.3954, + "step": 8215 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 0.6751934289932251, + "learning_rate": 6.62436455161003e-05, + "loss": 1.3637, + "step": 8216 + }, + { + "epoch": 0.8667721518987341, + "grad_norm": 0.751686692237854, + "learning_rate": 6.614052048919847e-05, + "loss": 1.394, + "step": 8217 + }, + { + "epoch": 0.8668776371308017, + "grad_norm": 0.6923282146453857, + "learning_rate": 6.603747209216135e-05, + "loss": 1.3497, + "step": 8218 + }, + { + "epoch": 0.8669831223628692, + "grad_norm": 0.8060182929039001, + "learning_rate": 6.593450033653586e-05, + "loss": 1.4046, + "step": 8219 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.696691632270813, + "learning_rate": 6.583160523386086e-05, + "loss": 1.4062, + "step": 8220 + }, + { + "epoch": 0.8671940928270042, + "grad_norm": 0.6410233378410339, + "learning_rate": 6.572878679566605e-05, + "loss": 1.3349, + "step": 8221 + }, + { + "epoch": 0.8672995780590718, + "grad_norm": 0.7211624979972839, + "learning_rate": 6.562604503347277e-05, + "loss": 1.3589, + "step": 8222 + }, + { + "epoch": 0.8674050632911392, + "grad_norm": 0.6422358751296997, + "learning_rate": 6.552337995879368e-05, + "loss": 1.3823, + "step": 8223 + }, + { + "epoch": 0.8675105485232067, + "grad_norm": 0.656272292137146, + "learning_rate": 6.542079158313305e-05, + "loss": 1.381, + "step": 8224 + }, + { + "epoch": 0.8676160337552743, + "grad_norm": 0.7605425715446472, + "learning_rate": 6.531827991798628e-05, + "loss": 1.4047, + "step": 8225 + }, + { + "epoch": 0.8677215189873417, + "grad_norm": 0.7044042944908142, + "learning_rate": 6.521584497484043e-05, + "loss": 1.3768, + "step": 8226 + }, + { + "epoch": 0.8678270042194093, + "grad_norm": 0.6674962043762207, + "learning_rate": 6.511348676517373e-05, + "loss": 1.3484, + "step": 8227 + }, + { + "epoch": 0.8679324894514768, + "grad_norm": 0.6376194357872009, + "learning_rate": 6.501120530045593e-05, + "loss": 1.3871, + "step": 8228 + }, + { + "epoch": 0.8680379746835443, + "grad_norm": 0.6937452554702759, + "learning_rate": 6.490900059214836e-05, + "loss": 1.366, + "step": 8229 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.6615468263626099, + "learning_rate": 6.480687265170342e-05, + "loss": 1.3955, + "step": 8230 + }, + { + "epoch": 0.8682489451476794, + "grad_norm": 0.6721321940422058, + "learning_rate": 6.470482149056509e-05, + "loss": 1.3746, + "step": 8231 + }, + { + "epoch": 0.8683544303797468, + "grad_norm": 0.747225821018219, + "learning_rate": 6.460284712016868e-05, + "loss": 1.3456, + "step": 8232 + }, + { + "epoch": 0.8684599156118143, + "grad_norm": 0.6660395264625549, + "learning_rate": 6.450094955194096e-05, + "loss": 1.3838, + "step": 8233 + }, + { + "epoch": 0.8685654008438819, + "grad_norm": 0.6475598216056824, + "learning_rate": 6.439912879730009e-05, + "loss": 1.3875, + "step": 8234 + }, + { + "epoch": 0.8686708860759493, + "grad_norm": 0.6926462054252625, + "learning_rate": 6.429738486765548e-05, + "loss": 1.3609, + "step": 8235 + }, + { + "epoch": 0.8687763713080169, + "grad_norm": 0.7347736358642578, + "learning_rate": 6.419571777440814e-05, + "loss": 1.3741, + "step": 8236 + }, + { + "epoch": 0.8688818565400844, + "grad_norm": 0.6482786536216736, + "learning_rate": 6.409412752895041e-05, + "loss": 1.3949, + "step": 8237 + }, + { + "epoch": 0.8689873417721519, + "grad_norm": 0.7061935663223267, + "learning_rate": 6.399261414266571e-05, + "loss": 1.3816, + "step": 8238 + }, + { + "epoch": 0.8690928270042194, + "grad_norm": 0.6836811304092407, + "learning_rate": 6.389117762692952e-05, + "loss": 1.3209, + "step": 8239 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.6330883502960205, + "learning_rate": 6.37898179931081e-05, + "loss": 1.367, + "step": 8240 + }, + { + "epoch": 0.8693037974683544, + "grad_norm": 0.6547881364822388, + "learning_rate": 6.368853525255942e-05, + "loss": 1.3912, + "step": 8241 + }, + { + "epoch": 0.869409282700422, + "grad_norm": 0.6463180780410767, + "learning_rate": 6.358732941663248e-05, + "loss": 1.3558, + "step": 8242 + }, + { + "epoch": 0.8695147679324895, + "grad_norm": 0.6706278324127197, + "learning_rate": 6.348620049666815e-05, + "loss": 1.4042, + "step": 8243 + }, + { + "epoch": 0.8696202531645569, + "grad_norm": 0.7391893267631531, + "learning_rate": 6.338514850399826e-05, + "loss": 1.3669, + "step": 8244 + }, + { + "epoch": 0.8697257383966245, + "grad_norm": 0.7032675743103027, + "learning_rate": 6.328417344994627e-05, + "loss": 1.3578, + "step": 8245 + }, + { + "epoch": 0.869831223628692, + "grad_norm": 0.6998916864395142, + "learning_rate": 6.318327534582688e-05, + "loss": 1.3859, + "step": 8246 + }, + { + "epoch": 0.8699367088607595, + "grad_norm": 0.7462990880012512, + "learning_rate": 6.308245420294636e-05, + "loss": 1.3921, + "step": 8247 + }, + { + "epoch": 0.870042194092827, + "grad_norm": 0.6749417781829834, + "learning_rate": 6.298171003260194e-05, + "loss": 1.4057, + "step": 8248 + }, + { + "epoch": 0.8701476793248946, + "grad_norm": 0.6462646126747131, + "learning_rate": 6.288104284608284e-05, + "loss": 1.3783, + "step": 8249 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.6830899119377136, + "learning_rate": 6.278045265466911e-05, + "loss": 1.4051, + "step": 8250 + }, + { + "epoch": 0.8703586497890295, + "grad_norm": 0.7386086583137512, + "learning_rate": 6.267993946963249e-05, + "loss": 1.402, + "step": 8251 + }, + { + "epoch": 0.8704641350210971, + "grad_norm": 0.670418381690979, + "learning_rate": 6.257950330223597e-05, + "loss": 1.3551, + "step": 8252 + }, + { + "epoch": 0.8705696202531645, + "grad_norm": 0.7273027300834656, + "learning_rate": 6.247914416373387e-05, + "loss": 1.3695, + "step": 8253 + }, + { + "epoch": 0.8706751054852321, + "grad_norm": 0.7828029990196228, + "learning_rate": 6.237886206537197e-05, + "loss": 1.37, + "step": 8254 + }, + { + "epoch": 0.8707805907172996, + "grad_norm": 0.6569628119468689, + "learning_rate": 6.227865701838733e-05, + "loss": 1.4256, + "step": 8255 + }, + { + "epoch": 0.8708860759493671, + "grad_norm": 0.6560172438621521, + "learning_rate": 6.217852903400841e-05, + "loss": 1.3829, + "step": 8256 + }, + { + "epoch": 0.8709915611814346, + "grad_norm": 0.7758512496948242, + "learning_rate": 6.207847812345524e-05, + "loss": 1.3852, + "step": 8257 + }, + { + "epoch": 0.8710970464135022, + "grad_norm": 0.6922004222869873, + "learning_rate": 6.197850429793866e-05, + "loss": 1.3574, + "step": 8258 + }, + { + "epoch": 0.8712025316455696, + "grad_norm": 0.7593677043914795, + "learning_rate": 6.187860756866157e-05, + "loss": 1.3773, + "step": 8259 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.6531770825386047, + "learning_rate": 6.177878794681782e-05, + "loss": 1.3486, + "step": 8260 + }, + { + "epoch": 0.8714135021097047, + "grad_norm": 0.6652272939682007, + "learning_rate": 6.167904544359265e-05, + "loss": 1.3638, + "step": 8261 + }, + { + "epoch": 0.8715189873417721, + "grad_norm": 0.7119348049163818, + "learning_rate": 6.157938007016279e-05, + "loss": 1.3633, + "step": 8262 + }, + { + "epoch": 0.8716244725738397, + "grad_norm": 0.674290657043457, + "learning_rate": 6.147979183769602e-05, + "loss": 1.3901, + "step": 8263 + }, + { + "epoch": 0.8717299578059071, + "grad_norm": 0.6699403524398804, + "learning_rate": 6.138028075735196e-05, + "loss": 1.3487, + "step": 8264 + }, + { + "epoch": 0.8718354430379747, + "grad_norm": 0.7260771989822388, + "learning_rate": 6.128084684028118e-05, + "loss": 1.3453, + "step": 8265 + }, + { + "epoch": 0.8719409282700422, + "grad_norm": 0.69029301404953, + "learning_rate": 6.118149009762574e-05, + "loss": 1.3794, + "step": 8266 + }, + { + "epoch": 0.8720464135021097, + "grad_norm": 0.7095767259597778, + "learning_rate": 6.108221054051902e-05, + "loss": 1.3724, + "step": 8267 + }, + { + "epoch": 0.8721518987341772, + "grad_norm": 0.7554974555969238, + "learning_rate": 6.0983008180086005e-05, + "loss": 1.3552, + "step": 8268 + }, + { + "epoch": 0.8722573839662447, + "grad_norm": 0.6402941942214966, + "learning_rate": 6.088388302744266e-05, + "loss": 1.3938, + "step": 8269 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.6844829320907593, + "learning_rate": 6.078483509369642e-05, + "loss": 1.3932, + "step": 8270 + }, + { + "epoch": 0.8724683544303797, + "grad_norm": 0.662503182888031, + "learning_rate": 6.068586438994617e-05, + "loss": 1.4104, + "step": 8271 + }, + { + "epoch": 0.8725738396624473, + "grad_norm": 0.6683664917945862, + "learning_rate": 6.058697092728202e-05, + "loss": 1.3453, + "step": 8272 + }, + { + "epoch": 0.8726793248945147, + "grad_norm": 0.6422229409217834, + "learning_rate": 6.048815471678554e-05, + "loss": 1.3578, + "step": 8273 + }, + { + "epoch": 0.8727848101265823, + "grad_norm": 0.6819918155670166, + "learning_rate": 6.038941576952952e-05, + "loss": 1.351, + "step": 8274 + }, + { + "epoch": 0.8728902953586498, + "grad_norm": 0.6494011878967285, + "learning_rate": 6.029075409657822e-05, + "loss": 1.3372, + "step": 8275 + }, + { + "epoch": 0.8729957805907173, + "grad_norm": 0.7337321043014526, + "learning_rate": 6.0192169708987026e-05, + "loss": 1.3736, + "step": 8276 + }, + { + "epoch": 0.8731012658227848, + "grad_norm": 0.6793985962867737, + "learning_rate": 6.009366261780286e-05, + "loss": 1.3509, + "step": 8277 + }, + { + "epoch": 0.8732067510548523, + "grad_norm": 0.6506238579750061, + "learning_rate": 5.999523283406405e-05, + "loss": 1.3342, + "step": 8278 + }, + { + "epoch": 0.8733122362869198, + "grad_norm": 0.7862019538879395, + "learning_rate": 5.9896880368800115e-05, + "loss": 1.3695, + "step": 8279 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.6737431287765503, + "learning_rate": 5.9798605233031904e-05, + "loss": 1.328, + "step": 8280 + }, + { + "epoch": 0.8735232067510549, + "grad_norm": 0.6469071507453918, + "learning_rate": 5.970040743777161e-05, + "loss": 1.3615, + "step": 8281 + }, + { + "epoch": 0.8736286919831223, + "grad_norm": 0.6588966846466064, + "learning_rate": 5.960228699402284e-05, + "loss": 1.3807, + "step": 8282 + }, + { + "epoch": 0.8737341772151899, + "grad_norm": 0.6520185470581055, + "learning_rate": 5.9504243912780474e-05, + "loss": 1.3824, + "step": 8283 + }, + { + "epoch": 0.8738396624472574, + "grad_norm": 0.658193051815033, + "learning_rate": 5.940627820503064e-05, + "loss": 1.3429, + "step": 8284 + }, + { + "epoch": 0.8739451476793249, + "grad_norm": 0.6693471074104309, + "learning_rate": 5.930838988175097e-05, + "loss": 1.3363, + "step": 8285 + }, + { + "epoch": 0.8740506329113924, + "grad_norm": 0.6585366129875183, + "learning_rate": 5.921057895391027e-05, + "loss": 1.3444, + "step": 8286 + }, + { + "epoch": 0.87415611814346, + "grad_norm": 0.6852160692214966, + "learning_rate": 5.91128454324687e-05, + "loss": 1.3853, + "step": 8287 + }, + { + "epoch": 0.8742616033755274, + "grad_norm": 0.6637154817581177, + "learning_rate": 5.901518932837799e-05, + "loss": 1.3726, + "step": 8288 + }, + { + "epoch": 0.8743670886075949, + "grad_norm": 0.689048707485199, + "learning_rate": 5.891761065258089e-05, + "loss": 1.3667, + "step": 8289 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.6590829491615295, + "learning_rate": 5.8820109416011485e-05, + "loss": 1.327, + "step": 8290 + }, + { + "epoch": 0.8745780590717299, + "grad_norm": 0.7028821110725403, + "learning_rate": 5.8722685629595454e-05, + "loss": 1.3818, + "step": 8291 + }, + { + "epoch": 0.8746835443037975, + "grad_norm": 0.6708802580833435, + "learning_rate": 5.862533930424949e-05, + "loss": 1.4019, + "step": 8292 + }, + { + "epoch": 0.874789029535865, + "grad_norm": 0.7669069766998291, + "learning_rate": 5.852807045088177e-05, + "loss": 1.3468, + "step": 8293 + }, + { + "epoch": 0.8748945147679325, + "grad_norm": 0.8195232152938843, + "learning_rate": 5.843087908039166e-05, + "loss": 1.3864, + "step": 8294 + }, + { + "epoch": 0.875, + "grad_norm": 0.6719500422477722, + "learning_rate": 5.833376520367012e-05, + "loss": 1.405, + "step": 8295 + }, + { + "epoch": 0.8751054852320675, + "grad_norm": 0.622355580329895, + "learning_rate": 5.823672883159911e-05, + "loss": 1.3649, + "step": 8296 + }, + { + "epoch": 0.875210970464135, + "grad_norm": 0.6937520503997803, + "learning_rate": 5.813976997505202e-05, + "loss": 1.3969, + "step": 8297 + }, + { + "epoch": 0.8753164556962025, + "grad_norm": 0.6504334807395935, + "learning_rate": 5.804288864489366e-05, + "loss": 1.3807, + "step": 8298 + }, + { + "epoch": 0.8754219409282701, + "grad_norm": 0.6896719932556152, + "learning_rate": 5.794608485198008e-05, + "loss": 1.3981, + "step": 8299 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.6575997471809387, + "learning_rate": 5.784935860715862e-05, + "loss": 1.3702, + "step": 8300 + }, + { + "epoch": 0.8756329113924051, + "grad_norm": 0.6933614611625671, + "learning_rate": 5.7752709921267855e-05, + "loss": 1.3796, + "step": 8301 + }, + { + "epoch": 0.8757383966244726, + "grad_norm": 0.6828913688659668, + "learning_rate": 5.7656138805137785e-05, + "loss": 1.3627, + "step": 8302 + }, + { + "epoch": 0.87584388185654, + "grad_norm": 0.652489960193634, + "learning_rate": 5.7559645269589764e-05, + "loss": 1.333, + "step": 8303 + }, + { + "epoch": 0.8759493670886076, + "grad_norm": 0.6983485817909241, + "learning_rate": 5.746322932543621e-05, + "loss": 1.3685, + "step": 8304 + }, + { + "epoch": 0.8760548523206751, + "grad_norm": 0.7215436697006226, + "learning_rate": 5.736689098348125e-05, + "loss": 1.3393, + "step": 8305 + }, + { + "epoch": 0.8761603375527426, + "grad_norm": 0.6378780007362366, + "learning_rate": 5.727063025451973e-05, + "loss": 1.3782, + "step": 8306 + }, + { + "epoch": 0.8762658227848101, + "grad_norm": 0.6930611729621887, + "learning_rate": 5.717444714933845e-05, + "loss": 1.3446, + "step": 8307 + }, + { + "epoch": 0.8763713080168777, + "grad_norm": 0.6429447531700134, + "learning_rate": 5.707834167871512e-05, + "loss": 1.3817, + "step": 8308 + }, + { + "epoch": 0.8764767932489451, + "grad_norm": 0.645343542098999, + "learning_rate": 5.698231385341887e-05, + "loss": 1.3451, + "step": 8309 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.7297846674919128, + "learning_rate": 5.6886363684210016e-05, + "loss": 1.3664, + "step": 8310 + }, + { + "epoch": 0.8766877637130802, + "grad_norm": 0.7009482979774475, + "learning_rate": 5.6790491181840294e-05, + "loss": 1.4177, + "step": 8311 + }, + { + "epoch": 0.8767932489451477, + "grad_norm": 0.6936761736869812, + "learning_rate": 5.6694696357052685e-05, + "loss": 1.3852, + "step": 8312 + }, + { + "epoch": 0.8768987341772152, + "grad_norm": 0.7018977999687195, + "learning_rate": 5.6598979220581434e-05, + "loss": 1.3943, + "step": 8313 + }, + { + "epoch": 0.8770042194092827, + "grad_norm": 0.6838222146034241, + "learning_rate": 5.650333978315223e-05, + "loss": 1.3617, + "step": 8314 + }, + { + "epoch": 0.8771097046413502, + "grad_norm": 0.8477291464805603, + "learning_rate": 5.640777805548181e-05, + "loss": 1.3757, + "step": 8315 + }, + { + "epoch": 0.8772151898734177, + "grad_norm": 0.6578026413917542, + "learning_rate": 5.631229404827845e-05, + "loss": 1.3709, + "step": 8316 + }, + { + "epoch": 0.8773206751054853, + "grad_norm": 0.6692426204681396, + "learning_rate": 5.6216887772241596e-05, + "loss": 1.3551, + "step": 8317 + }, + { + "epoch": 0.8774261603375527, + "grad_norm": 0.6551451683044434, + "learning_rate": 5.612155923806203e-05, + "loss": 1.3773, + "step": 8318 + }, + { + "epoch": 0.8775316455696203, + "grad_norm": 0.6973987221717834, + "learning_rate": 5.60263084564217e-05, + "loss": 1.3707, + "step": 8319 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.8257626891136169, + "learning_rate": 5.5931135437993994e-05, + "loss": 1.3623, + "step": 8320 + }, + { + "epoch": 0.8777426160337553, + "grad_norm": 0.6592655181884766, + "learning_rate": 5.583604019344354e-05, + "loss": 1.3507, + "step": 8321 + }, + { + "epoch": 0.8778481012658228, + "grad_norm": 0.6445162892341614, + "learning_rate": 5.574102273342616e-05, + "loss": 1.3861, + "step": 8322 + }, + { + "epoch": 0.8779535864978903, + "grad_norm": 0.6654862761497498, + "learning_rate": 5.5646083068589065e-05, + "loss": 1.3843, + "step": 8323 + }, + { + "epoch": 0.8780590717299578, + "grad_norm": 0.7155562043190002, + "learning_rate": 5.5551221209570596e-05, + "loss": 1.3988, + "step": 8324 + }, + { + "epoch": 0.8781645569620253, + "grad_norm": 0.6619378328323364, + "learning_rate": 5.5456437167000746e-05, + "loss": 1.3735, + "step": 8325 + }, + { + "epoch": 0.8782700421940929, + "grad_norm": 0.6799493432044983, + "learning_rate": 5.536173095150043e-05, + "loss": 1.3774, + "step": 8326 + }, + { + "epoch": 0.8783755274261603, + "grad_norm": 0.661311686038971, + "learning_rate": 5.526710257368192e-05, + "loss": 1.3851, + "step": 8327 + }, + { + "epoch": 0.8784810126582279, + "grad_norm": 0.6274387240409851, + "learning_rate": 5.517255204414889e-05, + "loss": 1.3552, + "step": 8328 + }, + { + "epoch": 0.8785864978902953, + "grad_norm": 0.7775511145591736, + "learning_rate": 5.507807937349604e-05, + "loss": 1.3442, + "step": 8329 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.6817455887794495, + "learning_rate": 5.498368457230965e-05, + "loss": 1.3752, + "step": 8330 + }, + { + "epoch": 0.8787974683544304, + "grad_norm": 0.7490395307540894, + "learning_rate": 5.4889367651167007e-05, + "loss": 1.3943, + "step": 8331 + }, + { + "epoch": 0.8789029535864978, + "grad_norm": 0.7508944272994995, + "learning_rate": 5.479512862063674e-05, + "loss": 1.3992, + "step": 8332 + }, + { + "epoch": 0.8790084388185654, + "grad_norm": 0.6957663297653198, + "learning_rate": 5.470096749127906e-05, + "loss": 1.3506, + "step": 8333 + }, + { + "epoch": 0.8791139240506329, + "grad_norm": 0.6471489071846008, + "learning_rate": 5.460688427364505e-05, + "loss": 1.3581, + "step": 8334 + }, + { + "epoch": 0.8792194092827004, + "grad_norm": 0.6769201755523682, + "learning_rate": 5.451287897827725e-05, + "loss": 1.383, + "step": 8335 + }, + { + "epoch": 0.8793248945147679, + "grad_norm": 0.6926968097686768, + "learning_rate": 5.441895161570934e-05, + "loss": 1.3548, + "step": 8336 + }, + { + "epoch": 0.8794303797468355, + "grad_norm": 0.6774328351020813, + "learning_rate": 5.43251021964663e-05, + "loss": 1.3638, + "step": 8337 + }, + { + "epoch": 0.8795358649789029, + "grad_norm": 0.7803463339805603, + "learning_rate": 5.423133073106457e-05, + "loss": 1.3678, + "step": 8338 + }, + { + "epoch": 0.8796413502109705, + "grad_norm": 0.6690472364425659, + "learning_rate": 5.413763723001164e-05, + "loss": 1.2833, + "step": 8339 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.6627023220062256, + "learning_rate": 5.4044021703806375e-05, + "loss": 1.3343, + "step": 8340 + }, + { + "epoch": 0.8798523206751054, + "grad_norm": 0.7309731841087341, + "learning_rate": 5.3950484162938714e-05, + "loss": 1.3639, + "step": 8341 + }, + { + "epoch": 0.879957805907173, + "grad_norm": 0.6507136225700378, + "learning_rate": 5.385702461789019e-05, + "loss": 1.3409, + "step": 8342 + }, + { + "epoch": 0.8800632911392405, + "grad_norm": 0.6407321095466614, + "learning_rate": 5.376364307913334e-05, + "loss": 1.3265, + "step": 8343 + }, + { + "epoch": 0.880168776371308, + "grad_norm": 0.6464110016822815, + "learning_rate": 5.3670339557132045e-05, + "loss": 1.3425, + "step": 8344 + }, + { + "epoch": 0.8802742616033755, + "grad_norm": 0.7371940016746521, + "learning_rate": 5.3577114062341446e-05, + "loss": 1.3822, + "step": 8345 + }, + { + "epoch": 0.8803797468354431, + "grad_norm": 0.7462694048881531, + "learning_rate": 5.348396660520785e-05, + "loss": 1.3516, + "step": 8346 + }, + { + "epoch": 0.8804852320675105, + "grad_norm": 0.6884382963180542, + "learning_rate": 5.339089719616891e-05, + "loss": 1.3991, + "step": 8347 + }, + { + "epoch": 0.880590717299578, + "grad_norm": 0.6477233171463013, + "learning_rate": 5.329790584565361e-05, + "loss": 1.3664, + "step": 8348 + }, + { + "epoch": 0.8806962025316456, + "grad_norm": 0.7011104226112366, + "learning_rate": 5.320499256408204e-05, + "loss": 1.3539, + "step": 8349 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.672407865524292, + "learning_rate": 5.311215736186536e-05, + "loss": 1.391, + "step": 8350 + }, + { + "epoch": 0.8809071729957806, + "grad_norm": 0.6305171847343445, + "learning_rate": 5.3019400249406686e-05, + "loss": 1.3822, + "step": 8351 + }, + { + "epoch": 0.8810126582278481, + "grad_norm": 0.6959730982780457, + "learning_rate": 5.29267212370996e-05, + "loss": 1.3355, + "step": 8352 + }, + { + "epoch": 0.8811181434599156, + "grad_norm": 0.6653816103935242, + "learning_rate": 5.283412033532939e-05, + "loss": 1.3095, + "step": 8353 + }, + { + "epoch": 0.8812236286919831, + "grad_norm": 0.6783088445663452, + "learning_rate": 5.274159755447233e-05, + "loss": 1.3489, + "step": 8354 + }, + { + "epoch": 0.8813291139240507, + "grad_norm": 0.6785255074501038, + "learning_rate": 5.264915290489614e-05, + "loss": 1.3389, + "step": 8355 + }, + { + "epoch": 0.8814345991561181, + "grad_norm": 0.7142829298973083, + "learning_rate": 5.25567863969596e-05, + "loss": 1.3671, + "step": 8356 + }, + { + "epoch": 0.8815400843881857, + "grad_norm": 0.6451417207717896, + "learning_rate": 5.246449804101294e-05, + "loss": 1.3891, + "step": 8357 + }, + { + "epoch": 0.8816455696202532, + "grad_norm": 0.7280308604240417, + "learning_rate": 5.237228784739739e-05, + "loss": 1.363, + "step": 8358 + }, + { + "epoch": 0.8817510548523206, + "grad_norm": 0.7152231335639954, + "learning_rate": 5.228015582644585e-05, + "loss": 1.3999, + "step": 8359 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.6613929271697998, + "learning_rate": 5.21881019884819e-05, + "loss": 1.3496, + "step": 8360 + }, + { + "epoch": 0.8819620253164557, + "grad_norm": 0.6943455338478088, + "learning_rate": 5.209612634382077e-05, + "loss": 1.3733, + "step": 8361 + }, + { + "epoch": 0.8820675105485232, + "grad_norm": 0.6725080609321594, + "learning_rate": 5.2004228902768815e-05, + "loss": 1.4123, + "step": 8362 + }, + { + "epoch": 0.8821729957805907, + "grad_norm": 0.6478685140609741, + "learning_rate": 5.191240967562347e-05, + "loss": 1.3696, + "step": 8363 + }, + { + "epoch": 0.8822784810126583, + "grad_norm": 0.679498016834259, + "learning_rate": 5.182066867267357e-05, + "loss": 1.385, + "step": 8364 + }, + { + "epoch": 0.8823839662447257, + "grad_norm": 0.6742092370986938, + "learning_rate": 5.172900590419915e-05, + "loss": 1.3889, + "step": 8365 + }, + { + "epoch": 0.8824894514767933, + "grad_norm": 0.6839421987533569, + "learning_rate": 5.1637421380471586e-05, + "loss": 1.3464, + "step": 8366 + }, + { + "epoch": 0.8825949367088608, + "grad_norm": 0.6160271167755127, + "learning_rate": 5.154591511175316e-05, + "loss": 1.3654, + "step": 8367 + }, + { + "epoch": 0.8827004219409282, + "grad_norm": 0.6556387543678284, + "learning_rate": 5.1454487108297924e-05, + "loss": 1.3728, + "step": 8368 + }, + { + "epoch": 0.8828059071729958, + "grad_norm": 0.6606307625770569, + "learning_rate": 5.136313738035059e-05, + "loss": 1.3508, + "step": 8369 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.7044562101364136, + "learning_rate": 5.127186593814748e-05, + "loss": 1.3253, + "step": 8370 + }, + { + "epoch": 0.8830168776371308, + "grad_norm": 0.6432451605796814, + "learning_rate": 5.118067279191599e-05, + "loss": 1.3512, + "step": 8371 + }, + { + "epoch": 0.8831223628691983, + "grad_norm": 0.6635569334030151, + "learning_rate": 5.1089557951874696e-05, + "loss": 1.3853, + "step": 8372 + }, + { + "epoch": 0.8832278481012659, + "grad_norm": 0.6621307730674744, + "learning_rate": 5.0998521428233526e-05, + "loss": 1.3534, + "step": 8373 + }, + { + "epoch": 0.8833333333333333, + "grad_norm": 0.6626915335655212, + "learning_rate": 5.0907563231193556e-05, + "loss": 1.3529, + "step": 8374 + }, + { + "epoch": 0.8834388185654009, + "grad_norm": 0.6453858017921448, + "learning_rate": 5.081668337094713e-05, + "loss": 1.3961, + "step": 8375 + }, + { + "epoch": 0.8835443037974684, + "grad_norm": 0.6794809699058533, + "learning_rate": 5.072588185767763e-05, + "loss": 1.4062, + "step": 8376 + }, + { + "epoch": 0.8836497890295358, + "grad_norm": 0.6533005833625793, + "learning_rate": 5.063515870156013e-05, + "loss": 1.3711, + "step": 8377 + }, + { + "epoch": 0.8837552742616034, + "grad_norm": 0.6822280287742615, + "learning_rate": 5.054451391276035e-05, + "loss": 1.3557, + "step": 8378 + }, + { + "epoch": 0.8838607594936709, + "grad_norm": 0.6511343717575073, + "learning_rate": 5.045394750143567e-05, + "loss": 1.3542, + "step": 8379 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.6570602655410767, + "learning_rate": 5.0363459477734464e-05, + "loss": 1.4052, + "step": 8380 + }, + { + "epoch": 0.8840717299578059, + "grad_norm": 0.6777589917182922, + "learning_rate": 5.0273049851796205e-05, + "loss": 1.3478, + "step": 8381 + }, + { + "epoch": 0.8841772151898735, + "grad_norm": 0.680399477481842, + "learning_rate": 5.0182718633751954e-05, + "loss": 1.395, + "step": 8382 + }, + { + "epoch": 0.8842827004219409, + "grad_norm": 0.7329353094100952, + "learning_rate": 5.009246583372362e-05, + "loss": 1.3615, + "step": 8383 + }, + { + "epoch": 0.8843881856540085, + "grad_norm": 0.6926181316375732, + "learning_rate": 5.000229146182453e-05, + "loss": 1.3625, + "step": 8384 + }, + { + "epoch": 0.884493670886076, + "grad_norm": 0.6524111032485962, + "learning_rate": 4.9912195528159174e-05, + "loss": 1.3901, + "step": 8385 + }, + { + "epoch": 0.8845991561181434, + "grad_norm": 0.7264524698257446, + "learning_rate": 4.982217804282332e-05, + "loss": 1.3706, + "step": 8386 + }, + { + "epoch": 0.884704641350211, + "grad_norm": 0.6878796219825745, + "learning_rate": 4.973223901590382e-05, + "loss": 1.3675, + "step": 8387 + }, + { + "epoch": 0.8848101265822785, + "grad_norm": 0.6424823999404907, + "learning_rate": 4.9642378457478847e-05, + "loss": 1.3409, + "step": 8388 + }, + { + "epoch": 0.884915611814346, + "grad_norm": 0.6767147779464722, + "learning_rate": 4.955259637761761e-05, + "loss": 1.3458, + "step": 8389 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.6694784760475159, + "learning_rate": 4.946289278638064e-05, + "loss": 1.3724, + "step": 8390 + }, + { + "epoch": 0.8851265822784811, + "grad_norm": 0.6916648149490356, + "learning_rate": 4.9373267693819805e-05, + "loss": 1.3934, + "step": 8391 + }, + { + "epoch": 0.8852320675105485, + "grad_norm": 0.7879810333251953, + "learning_rate": 4.928372110997792e-05, + "loss": 1.3471, + "step": 8392 + }, + { + "epoch": 0.885337552742616, + "grad_norm": 0.7385640740394592, + "learning_rate": 4.9194253044889117e-05, + "loss": 1.365, + "step": 8393 + }, + { + "epoch": 0.8854430379746835, + "grad_norm": 0.70733243227005, + "learning_rate": 4.910486350857887e-05, + "loss": 1.3586, + "step": 8394 + }, + { + "epoch": 0.885548523206751, + "grad_norm": 0.6795676946640015, + "learning_rate": 4.90155525110636e-05, + "loss": 1.3611, + "step": 8395 + }, + { + "epoch": 0.8856540084388186, + "grad_norm": 0.6814989447593689, + "learning_rate": 4.89263200623512e-05, + "loss": 1.3712, + "step": 8396 + }, + { + "epoch": 0.885759493670886, + "grad_norm": 0.7205997109413147, + "learning_rate": 4.883716617244044e-05, + "loss": 1.3683, + "step": 8397 + }, + { + "epoch": 0.8858649789029536, + "grad_norm": 0.801544189453125, + "learning_rate": 4.874809085132148e-05, + "loss": 1.3485, + "step": 8398 + }, + { + "epoch": 0.8859704641350211, + "grad_norm": 0.7759989500045776, + "learning_rate": 4.865909410897576e-05, + "loss": 1.3781, + "step": 8399 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.6502814888954163, + "learning_rate": 4.8570175955375715e-05, + "loss": 1.3514, + "step": 8400 + }, + { + "epoch": 0.8861814345991561, + "grad_norm": 0.6777534484863281, + "learning_rate": 4.848133640048513e-05, + "loss": 1.378, + "step": 8401 + }, + { + "epoch": 0.8862869198312237, + "grad_norm": 0.6467025876045227, + "learning_rate": 4.839257545425879e-05, + "loss": 1.3613, + "step": 8402 + }, + { + "epoch": 0.8863924050632911, + "grad_norm": 0.7443371415138245, + "learning_rate": 4.830389312664299e-05, + "loss": 1.392, + "step": 8403 + }, + { + "epoch": 0.8864978902953586, + "grad_norm": 0.816708505153656, + "learning_rate": 4.821528942757494e-05, + "loss": 1.3815, + "step": 8404 + }, + { + "epoch": 0.8866033755274262, + "grad_norm": 0.876658022403717, + "learning_rate": 4.8126764366983126e-05, + "loss": 1.3826, + "step": 8405 + }, + { + "epoch": 0.8867088607594936, + "grad_norm": 0.6735246777534485, + "learning_rate": 4.803831795478719e-05, + "loss": 1.3954, + "step": 8406 + }, + { + "epoch": 0.8868143459915612, + "grad_norm": 0.6649228930473328, + "learning_rate": 4.794995020089804e-05, + "loss": 1.402, + "step": 8407 + }, + { + "epoch": 0.8869198312236287, + "grad_norm": 0.8409597873687744, + "learning_rate": 4.7861661115217754e-05, + "loss": 1.3757, + "step": 8408 + }, + { + "epoch": 0.8870253164556962, + "grad_norm": 0.9606679081916809, + "learning_rate": 4.7773450707639414e-05, + "loss": 1.4075, + "step": 8409 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.7923163175582886, + "learning_rate": 4.768531898804754e-05, + "loss": 1.3629, + "step": 8410 + }, + { + "epoch": 0.8872362869198313, + "grad_norm": 0.6860870122909546, + "learning_rate": 4.75972659663178e-05, + "loss": 1.3912, + "step": 8411 + }, + { + "epoch": 0.8873417721518987, + "grad_norm": 0.7126031517982483, + "learning_rate": 4.75092916523169e-05, + "loss": 1.3537, + "step": 8412 + }, + { + "epoch": 0.8874472573839662, + "grad_norm": 0.7228716015815735, + "learning_rate": 4.742139605590279e-05, + "loss": 1.3926, + "step": 8413 + }, + { + "epoch": 0.8875527426160338, + "grad_norm": 0.7996667623519897, + "learning_rate": 4.733357918692466e-05, + "loss": 1.3802, + "step": 8414 + }, + { + "epoch": 0.8876582278481012, + "grad_norm": 0.816798746585846, + "learning_rate": 4.7245841055222726e-05, + "loss": 1.3764, + "step": 8415 + }, + { + "epoch": 0.8877637130801688, + "grad_norm": 0.6781414747238159, + "learning_rate": 4.715818167062863e-05, + "loss": 1.3412, + "step": 8416 + }, + { + "epoch": 0.8878691983122363, + "grad_norm": 0.6523796916007996, + "learning_rate": 4.7070601042964925e-05, + "loss": 1.3518, + "step": 8417 + }, + { + "epoch": 0.8879746835443038, + "grad_norm": 0.7011504769325256, + "learning_rate": 4.698309918204552e-05, + "loss": 1.3721, + "step": 8418 + }, + { + "epoch": 0.8880801687763713, + "grad_norm": 0.6755853891372681, + "learning_rate": 4.6895676097675225e-05, + "loss": 1.3579, + "step": 8419 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.7639293670654297, + "learning_rate": 4.680833179965063e-05, + "loss": 1.3763, + "step": 8420 + }, + { + "epoch": 0.8882911392405063, + "grad_norm": 0.7546006441116333, + "learning_rate": 4.672106629775882e-05, + "loss": 1.3682, + "step": 8421 + }, + { + "epoch": 0.8883966244725738, + "grad_norm": 0.7093358635902405, + "learning_rate": 4.663387960177848e-05, + "loss": 1.3725, + "step": 8422 + }, + { + "epoch": 0.8885021097046414, + "grad_norm": 0.6743266582489014, + "learning_rate": 4.654677172147912e-05, + "loss": 1.3986, + "step": 8423 + }, + { + "epoch": 0.8886075949367088, + "grad_norm": 0.6706269979476929, + "learning_rate": 4.645974266662176e-05, + "loss": 1.334, + "step": 8424 + }, + { + "epoch": 0.8887130801687764, + "grad_norm": 0.8040145635604858, + "learning_rate": 4.637279244695844e-05, + "loss": 1.3474, + "step": 8425 + }, + { + "epoch": 0.8888185654008439, + "grad_norm": 0.6602645516395569, + "learning_rate": 4.628592107223229e-05, + "loss": 1.3403, + "step": 8426 + }, + { + "epoch": 0.8889240506329114, + "grad_norm": 0.6506659388542175, + "learning_rate": 4.6199128552177756e-05, + "loss": 1.402, + "step": 8427 + }, + { + "epoch": 0.8890295358649789, + "grad_norm": 0.6928896903991699, + "learning_rate": 4.611241489652016e-05, + "loss": 1.3662, + "step": 8428 + }, + { + "epoch": 0.8891350210970465, + "grad_norm": 0.7744578719139099, + "learning_rate": 4.6025780114976545e-05, + "loss": 1.3782, + "step": 8429 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.6456565856933594, + "learning_rate": 4.5939224217254574e-05, + "loss": 1.3579, + "step": 8430 + }, + { + "epoch": 0.8893459915611814, + "grad_norm": 0.684644877910614, + "learning_rate": 4.585274721305333e-05, + "loss": 1.342, + "step": 8431 + }, + { + "epoch": 0.889451476793249, + "grad_norm": 0.6694583296775818, + "learning_rate": 4.576634911206296e-05, + "loss": 1.3486, + "step": 8432 + }, + { + "epoch": 0.8895569620253164, + "grad_norm": 0.6892793774604797, + "learning_rate": 4.5680029923964724e-05, + "loss": 1.3311, + "step": 8433 + }, + { + "epoch": 0.889662447257384, + "grad_norm": 0.6987037658691406, + "learning_rate": 4.559378965843122e-05, + "loss": 1.3336, + "step": 8434 + }, + { + "epoch": 0.8897679324894515, + "grad_norm": 0.6659891605377197, + "learning_rate": 4.5507628325126144e-05, + "loss": 1.3905, + "step": 8435 + }, + { + "epoch": 0.889873417721519, + "grad_norm": 0.6734582781791687, + "learning_rate": 4.542154593370401e-05, + "loss": 1.3888, + "step": 8436 + }, + { + "epoch": 0.8899789029535865, + "grad_norm": 0.692224383354187, + "learning_rate": 4.533554249381119e-05, + "loss": 1.3466, + "step": 8437 + }, + { + "epoch": 0.890084388185654, + "grad_norm": 0.6624848246574402, + "learning_rate": 4.524961801508456e-05, + "loss": 1.3929, + "step": 8438 + }, + { + "epoch": 0.8901898734177215, + "grad_norm": 0.6944906711578369, + "learning_rate": 4.5163772507152425e-05, + "loss": 1.3733, + "step": 8439 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.6638945937156677, + "learning_rate": 4.507800597963424e-05, + "loss": 1.3787, + "step": 8440 + }, + { + "epoch": 0.8904008438818566, + "grad_norm": 0.6562658548355103, + "learning_rate": 4.4992318442140575e-05, + "loss": 1.363, + "step": 8441 + }, + { + "epoch": 0.890506329113924, + "grad_norm": 0.6647380590438843, + "learning_rate": 4.490670990427309e-05, + "loss": 1.3497, + "step": 8442 + }, + { + "epoch": 0.8906118143459916, + "grad_norm": 0.6592072248458862, + "learning_rate": 4.4821180375624684e-05, + "loss": 1.3741, + "step": 8443 + }, + { + "epoch": 0.8907172995780591, + "grad_norm": 0.6674745082855225, + "learning_rate": 4.473572986577928e-05, + "loss": 1.355, + "step": 8444 + }, + { + "epoch": 0.8908227848101266, + "grad_norm": 0.666206955909729, + "learning_rate": 4.4650358384312056e-05, + "loss": 1.3434, + "step": 8445 + }, + { + "epoch": 0.8909282700421941, + "grad_norm": 0.674496054649353, + "learning_rate": 4.4565065940789515e-05, + "loss": 1.3763, + "step": 8446 + }, + { + "epoch": 0.8910337552742617, + "grad_norm": 0.7171851396560669, + "learning_rate": 4.447985254476894e-05, + "loss": 1.3955, + "step": 8447 + }, + { + "epoch": 0.8911392405063291, + "grad_norm": 0.672838032245636, + "learning_rate": 4.439471820579885e-05, + "loss": 1.3976, + "step": 8448 + }, + { + "epoch": 0.8912447257383966, + "grad_norm": 0.6757370233535767, + "learning_rate": 4.430966293341912e-05, + "loss": 1.3909, + "step": 8449 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.7449687123298645, + "learning_rate": 4.422468673716054e-05, + "loss": 1.364, + "step": 8450 + }, + { + "epoch": 0.8914556962025316, + "grad_norm": 0.6384792923927307, + "learning_rate": 4.413978962654508e-05, + "loss": 1.3484, + "step": 8451 + }, + { + "epoch": 0.8915611814345992, + "grad_norm": 0.6511000394821167, + "learning_rate": 4.405497161108596e-05, + "loss": 1.3718, + "step": 8452 + }, + { + "epoch": 0.8916666666666667, + "grad_norm": 0.66456538438797, + "learning_rate": 4.397023270028749e-05, + "loss": 1.3682, + "step": 8453 + }, + { + "epoch": 0.8917721518987342, + "grad_norm": 0.6571612358093262, + "learning_rate": 4.388557290364484e-05, + "loss": 1.3886, + "step": 8454 + }, + { + "epoch": 0.8918776371308017, + "grad_norm": 0.6939250826835632, + "learning_rate": 4.3800992230644904e-05, + "loss": 1.3257, + "step": 8455 + }, + { + "epoch": 0.8919831223628693, + "grad_norm": 0.6803630590438843, + "learning_rate": 4.3716490690765194e-05, + "loss": 1.3793, + "step": 8456 + }, + { + "epoch": 0.8920886075949367, + "grad_norm": 0.6835225820541382, + "learning_rate": 4.3632068293474545e-05, + "loss": 1.3433, + "step": 8457 + }, + { + "epoch": 0.8921940928270042, + "grad_norm": 0.6854135990142822, + "learning_rate": 4.35477250482329e-05, + "loss": 1.3556, + "step": 8458 + }, + { + "epoch": 0.8922995780590718, + "grad_norm": 0.6660527586936951, + "learning_rate": 4.346346096449136e-05, + "loss": 1.38, + "step": 8459 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.6520915627479553, + "learning_rate": 4.337927605169212e-05, + "loss": 1.3815, + "step": 8460 + }, + { + "epoch": 0.8925105485232068, + "grad_norm": 0.6641265153884888, + "learning_rate": 4.3295170319268554e-05, + "loss": 1.3805, + "step": 8461 + }, + { + "epoch": 0.8926160337552742, + "grad_norm": 0.6600520610809326, + "learning_rate": 4.321114377664495e-05, + "loss": 1.3603, + "step": 8462 + }, + { + "epoch": 0.8927215189873418, + "grad_norm": 0.6853899359703064, + "learning_rate": 4.3127196433237205e-05, + "loss": 1.3591, + "step": 8463 + }, + { + "epoch": 0.8928270042194093, + "grad_norm": 0.640744686126709, + "learning_rate": 4.304332829845187e-05, + "loss": 1.3616, + "step": 8464 + }, + { + "epoch": 0.8929324894514767, + "grad_norm": 0.6828067898750305, + "learning_rate": 4.2959539381686843e-05, + "loss": 1.3697, + "step": 8465 + }, + { + "epoch": 0.8930379746835443, + "grad_norm": 0.6753593683242798, + "learning_rate": 4.287582969233103e-05, + "loss": 1.3556, + "step": 8466 + }, + { + "epoch": 0.8931434599156118, + "grad_norm": 0.6705193519592285, + "learning_rate": 4.279219923976452e-05, + "loss": 1.3701, + "step": 8467 + }, + { + "epoch": 0.8932489451476793, + "grad_norm": 0.6415925025939941, + "learning_rate": 4.2708648033358554e-05, + "loss": 1.3474, + "step": 8468 + }, + { + "epoch": 0.8933544303797468, + "grad_norm": 0.6518171429634094, + "learning_rate": 4.26251760824754e-05, + "loss": 1.3786, + "step": 8469 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.6488243341445923, + "learning_rate": 4.2541783396468584e-05, + "loss": 1.3891, + "step": 8470 + }, + { + "epoch": 0.8935654008438818, + "grad_norm": 0.7452116012573242, + "learning_rate": 4.245846998468261e-05, + "loss": 1.3568, + "step": 8471 + }, + { + "epoch": 0.8936708860759494, + "grad_norm": 0.6736252903938293, + "learning_rate": 4.2375235856453197e-05, + "loss": 1.3579, + "step": 8472 + }, + { + "epoch": 0.8937763713080169, + "grad_norm": 0.6476534605026245, + "learning_rate": 4.229208102110721e-05, + "loss": 1.3874, + "step": 8473 + }, + { + "epoch": 0.8938818565400843, + "grad_norm": 0.6719524264335632, + "learning_rate": 4.220900548796244e-05, + "loss": 1.3456, + "step": 8474 + }, + { + "epoch": 0.8939873417721519, + "grad_norm": 0.6518126726150513, + "learning_rate": 4.212600926632804e-05, + "loss": 1.3758, + "step": 8475 + }, + { + "epoch": 0.8940928270042194, + "grad_norm": 0.6210158467292786, + "learning_rate": 4.204309236550405e-05, + "loss": 1.313, + "step": 8476 + }, + { + "epoch": 0.8941983122362869, + "grad_norm": 0.6632635593414307, + "learning_rate": 4.1960254794781714e-05, + "loss": 1.3777, + "step": 8477 + }, + { + "epoch": 0.8943037974683544, + "grad_norm": 0.6424921751022339, + "learning_rate": 4.1877496563443446e-05, + "loss": 1.3655, + "step": 8478 + }, + { + "epoch": 0.894409282700422, + "grad_norm": 0.6874096393585205, + "learning_rate": 4.179481768076274e-05, + "loss": 1.3412, + "step": 8479 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.656163215637207, + "learning_rate": 4.1712218156004014e-05, + "loss": 1.3695, + "step": 8480 + }, + { + "epoch": 0.894620253164557, + "grad_norm": 0.662020742893219, + "learning_rate": 4.16296979984232e-05, + "loss": 1.3802, + "step": 8481 + }, + { + "epoch": 0.8947257383966245, + "grad_norm": 0.6431630849838257, + "learning_rate": 4.154725721726699e-05, + "loss": 1.3916, + "step": 8482 + }, + { + "epoch": 0.8948312236286919, + "grad_norm": 0.6784250140190125, + "learning_rate": 4.1464895821773235e-05, + "loss": 1.3632, + "step": 8483 + }, + { + "epoch": 0.8949367088607595, + "grad_norm": 0.6362035870552063, + "learning_rate": 4.138261382117098e-05, + "loss": 1.358, + "step": 8484 + }, + { + "epoch": 0.895042194092827, + "grad_norm": 0.6668964624404907, + "learning_rate": 4.130041122468042e-05, + "loss": 1.3602, + "step": 8485 + }, + { + "epoch": 0.8951476793248945, + "grad_norm": 0.6434848308563232, + "learning_rate": 4.1218288041512534e-05, + "loss": 1.3757, + "step": 8486 + }, + { + "epoch": 0.895253164556962, + "grad_norm": 0.6678675413131714, + "learning_rate": 4.113624428086987e-05, + "loss": 1.3972, + "step": 8487 + }, + { + "epoch": 0.8953586497890296, + "grad_norm": 0.6011137366294861, + "learning_rate": 4.105427995194566e-05, + "loss": 1.3428, + "step": 8488 + }, + { + "epoch": 0.895464135021097, + "grad_norm": 0.6469545960426331, + "learning_rate": 4.0972395063924554e-05, + "loss": 1.3647, + "step": 8489 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.6805349588394165, + "learning_rate": 4.089058962598213e-05, + "loss": 1.3352, + "step": 8490 + }, + { + "epoch": 0.8956751054852321, + "grad_norm": 0.6398686170578003, + "learning_rate": 4.080886364728506e-05, + "loss": 1.3516, + "step": 8491 + }, + { + "epoch": 0.8957805907172995, + "grad_norm": 0.7696349024772644, + "learning_rate": 4.072721713699118e-05, + "loss": 1.3372, + "step": 8492 + }, + { + "epoch": 0.8958860759493671, + "grad_norm": 0.7086409330368042, + "learning_rate": 4.064565010424942e-05, + "loss": 1.3237, + "step": 8493 + }, + { + "epoch": 0.8959915611814346, + "grad_norm": 0.7658605575561523, + "learning_rate": 4.056416255819964e-05, + "loss": 1.3285, + "step": 8494 + }, + { + "epoch": 0.8960970464135021, + "grad_norm": 0.6528369784355164, + "learning_rate": 4.048275450797312e-05, + "loss": 1.4262, + "step": 8495 + }, + { + "epoch": 0.8962025316455696, + "grad_norm": 0.6283906698226929, + "learning_rate": 4.0401425962691804e-05, + "loss": 1.3604, + "step": 8496 + }, + { + "epoch": 0.8963080168776372, + "grad_norm": 0.6685169339179993, + "learning_rate": 4.032017693146908e-05, + "loss": 1.359, + "step": 8497 + }, + { + "epoch": 0.8964135021097046, + "grad_norm": 0.6354694962501526, + "learning_rate": 4.023900742340941e-05, + "loss": 1.3464, + "step": 8498 + }, + { + "epoch": 0.8965189873417722, + "grad_norm": 0.66324383020401, + "learning_rate": 4.015791744760811e-05, + "loss": 1.3687, + "step": 8499 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.6539524793624878, + "learning_rate": 4.0076907013151726e-05, + "loss": 1.3659, + "step": 8500 + }, + { + "epoch": 0.8967299578059071, + "grad_norm": 0.6338005065917969, + "learning_rate": 3.999597612911793e-05, + "loss": 1.3722, + "step": 8501 + }, + { + "epoch": 0.8968354430379747, + "grad_norm": 0.6842088103294373, + "learning_rate": 3.991512480457546e-05, + "loss": 1.3906, + "step": 8502 + }, + { + "epoch": 0.8969409282700422, + "grad_norm": 0.6324447989463806, + "learning_rate": 3.9834353048583984e-05, + "loss": 1.3433, + "step": 8503 + }, + { + "epoch": 0.8970464135021097, + "grad_norm": 0.66953444480896, + "learning_rate": 3.9753660870194524e-05, + "loss": 1.3758, + "step": 8504 + }, + { + "epoch": 0.8971518987341772, + "grad_norm": 0.6808360815048218, + "learning_rate": 3.967304827844892e-05, + "loss": 1.3458, + "step": 8505 + }, + { + "epoch": 0.8972573839662448, + "grad_norm": 0.6360301971435547, + "learning_rate": 3.95925152823802e-05, + "loss": 1.3407, + "step": 8506 + }, + { + "epoch": 0.8973628691983122, + "grad_norm": 0.6404669880867004, + "learning_rate": 3.9512061891012643e-05, + "loss": 1.3731, + "step": 8507 + }, + { + "epoch": 0.8974683544303798, + "grad_norm": 0.6530092358589172, + "learning_rate": 3.943168811336137e-05, + "loss": 1.387, + "step": 8508 + }, + { + "epoch": 0.8975738396624473, + "grad_norm": 0.6628643274307251, + "learning_rate": 3.93513939584326e-05, + "loss": 1.3837, + "step": 8509 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.6543424129486084, + "learning_rate": 3.927117943522379e-05, + "loss": 1.3512, + "step": 8510 + }, + { + "epoch": 0.8977848101265823, + "grad_norm": 0.6416884064674377, + "learning_rate": 3.9191044552723345e-05, + "loss": 1.3454, + "step": 8511 + }, + { + "epoch": 0.8978902953586498, + "grad_norm": 0.6615849137306213, + "learning_rate": 3.911098931991075e-05, + "loss": 1.3632, + "step": 8512 + }, + { + "epoch": 0.8979957805907173, + "grad_norm": 0.6659644842147827, + "learning_rate": 3.9031013745756655e-05, + "loss": 1.3477, + "step": 8513 + }, + { + "epoch": 0.8981012658227848, + "grad_norm": 0.6429924368858337, + "learning_rate": 3.895111783922256e-05, + "loss": 1.3317, + "step": 8514 + }, + { + "epoch": 0.8982067510548524, + "grad_norm": 0.6459934115409851, + "learning_rate": 3.887130160926139e-05, + "loss": 1.3663, + "step": 8515 + }, + { + "epoch": 0.8983122362869198, + "grad_norm": 0.660017728805542, + "learning_rate": 3.879156506481699e-05, + "loss": 1.3848, + "step": 8516 + }, + { + "epoch": 0.8984177215189874, + "grad_norm": 0.6665769219398499, + "learning_rate": 3.8711908214824035e-05, + "loss": 1.3798, + "step": 8517 + }, + { + "epoch": 0.8985232067510549, + "grad_norm": 0.7204260230064392, + "learning_rate": 3.863233106820857e-05, + "loss": 1.3757, + "step": 8518 + }, + { + "epoch": 0.8986286919831223, + "grad_norm": 0.6743907928466797, + "learning_rate": 3.855283363388762e-05, + "loss": 1.3406, + "step": 8519 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.7108284831047058, + "learning_rate": 3.8473415920769304e-05, + "loss": 1.3956, + "step": 8520 + }, + { + "epoch": 0.8988396624472574, + "grad_norm": 0.6597795486450195, + "learning_rate": 3.839407793775268e-05, + "loss": 1.3638, + "step": 8521 + }, + { + "epoch": 0.8989451476793249, + "grad_norm": 0.6614108085632324, + "learning_rate": 3.8314819693727966e-05, + "loss": 1.3763, + "step": 8522 + }, + { + "epoch": 0.8990506329113924, + "grad_norm": 0.6981967687606812, + "learning_rate": 3.823564119757647e-05, + "loss": 1.3772, + "step": 8523 + }, + { + "epoch": 0.89915611814346, + "grad_norm": 0.6627926230430603, + "learning_rate": 3.81565424581706e-05, + "loss": 1.35, + "step": 8524 + }, + { + "epoch": 0.8992616033755274, + "grad_norm": 0.6782321333885193, + "learning_rate": 3.8077523484373764e-05, + "loss": 1.4007, + "step": 8525 + }, + { + "epoch": 0.899367088607595, + "grad_norm": 0.6686844825744629, + "learning_rate": 3.79985842850403e-05, + "loss": 1.4123, + "step": 8526 + }, + { + "epoch": 0.8994725738396624, + "grad_norm": 0.6238040924072266, + "learning_rate": 3.791972486901596e-05, + "loss": 1.3687, + "step": 8527 + }, + { + "epoch": 0.8995780590717299, + "grad_norm": 0.6776757836341858, + "learning_rate": 3.784094524513709e-05, + "loss": 1.3858, + "step": 8528 + }, + { + "epoch": 0.8996835443037975, + "grad_norm": 0.6623917818069458, + "learning_rate": 3.7762245422231476e-05, + "loss": 1.3513, + "step": 8529 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.6812811493873596, + "learning_rate": 3.768362540911788e-05, + "loss": 1.3961, + "step": 8530 + }, + { + "epoch": 0.8998945147679325, + "grad_norm": 0.7031418085098267, + "learning_rate": 3.760508521460584e-05, + "loss": 1.3668, + "step": 8531 + }, + { + "epoch": 0.9, + "grad_norm": 0.6844162940979004, + "learning_rate": 3.7526624847496335e-05, + "loss": 1.3541, + "step": 8532 + }, + { + "epoch": 0.9001054852320675, + "grad_norm": 0.6126548647880554, + "learning_rate": 3.744824431658131e-05, + "loss": 1.3478, + "step": 8533 + }, + { + "epoch": 0.900210970464135, + "grad_norm": 0.6570507287979126, + "learning_rate": 3.736994363064358e-05, + "loss": 1.3977, + "step": 8534 + }, + { + "epoch": 0.9003164556962026, + "grad_norm": 0.6803613305091858, + "learning_rate": 3.7291722798457215e-05, + "loss": 1.3553, + "step": 8535 + }, + { + "epoch": 0.90042194092827, + "grad_norm": 0.6599968075752258, + "learning_rate": 3.72135818287872e-05, + "loss": 1.3889, + "step": 8536 + }, + { + "epoch": 0.9005274261603375, + "grad_norm": 0.7022616863250732, + "learning_rate": 3.713552073038953e-05, + "loss": 1.4094, + "step": 8537 + }, + { + "epoch": 0.9006329113924051, + "grad_norm": 0.6493708491325378, + "learning_rate": 3.705753951201146e-05, + "loss": 1.3644, + "step": 8538 + }, + { + "epoch": 0.9007383966244725, + "grad_norm": 0.6443365216255188, + "learning_rate": 3.697963818239117e-05, + "loss": 1.3454, + "step": 8539 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.633640706539154, + "learning_rate": 3.690181675025775e-05, + "loss": 1.3877, + "step": 8540 + }, + { + "epoch": 0.9009493670886076, + "grad_norm": 0.64163738489151, + "learning_rate": 3.682407522433173e-05, + "loss": 1.3787, + "step": 8541 + }, + { + "epoch": 0.9010548523206751, + "grad_norm": 0.7283867597579956, + "learning_rate": 3.674641361332423e-05, + "loss": 1.3741, + "step": 8542 + }, + { + "epoch": 0.9011603375527426, + "grad_norm": 0.7043198943138123, + "learning_rate": 3.66688319259377e-05, + "loss": 1.3487, + "step": 8543 + }, + { + "epoch": 0.9012658227848102, + "grad_norm": 0.6560997366905212, + "learning_rate": 3.6591330170865524e-05, + "loss": 1.3859, + "step": 8544 + }, + { + "epoch": 0.9013713080168776, + "grad_norm": 0.6464356184005737, + "learning_rate": 3.6513908356792244e-05, + "loss": 1.3292, + "step": 8545 + }, + { + "epoch": 0.9014767932489451, + "grad_norm": 0.6836252212524414, + "learning_rate": 3.643656649239327e-05, + "loss": 1.3633, + "step": 8546 + }, + { + "epoch": 0.9015822784810127, + "grad_norm": 0.6832115650177002, + "learning_rate": 3.635930458633516e-05, + "loss": 1.3451, + "step": 8547 + }, + { + "epoch": 0.9016877637130801, + "grad_norm": 0.6674937009811401, + "learning_rate": 3.628212264727548e-05, + "loss": 1.3901, + "step": 8548 + }, + { + "epoch": 0.9017932489451477, + "grad_norm": 0.6533337235450745, + "learning_rate": 3.6205020683862836e-05, + "loss": 1.3369, + "step": 8549 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.6716039180755615, + "learning_rate": 3.612799870473696e-05, + "loss": 1.3905, + "step": 8550 + }, + { + "epoch": 0.9020042194092827, + "grad_norm": 0.66312175989151, + "learning_rate": 3.605105671852854e-05, + "loss": 1.3382, + "step": 8551 + }, + { + "epoch": 0.9021097046413502, + "grad_norm": 0.7062427997589111, + "learning_rate": 3.597419473385935e-05, + "loss": 1.3491, + "step": 8552 + }, + { + "epoch": 0.9022151898734178, + "grad_norm": 0.7678060531616211, + "learning_rate": 3.5897412759342e-05, + "loss": 1.3709, + "step": 8553 + }, + { + "epoch": 0.9023206751054852, + "grad_norm": 0.6693977117538452, + "learning_rate": 3.582071080358043e-05, + "loss": 1.386, + "step": 8554 + }, + { + "epoch": 0.9024261603375527, + "grad_norm": 0.6657706499099731, + "learning_rate": 3.5744088875169446e-05, + "loss": 1.3741, + "step": 8555 + }, + { + "epoch": 0.9025316455696203, + "grad_norm": 0.6695181131362915, + "learning_rate": 3.566754698269492e-05, + "loss": 1.3783, + "step": 8556 + }, + { + "epoch": 0.9026371308016877, + "grad_norm": 0.6476250290870667, + "learning_rate": 3.5591085134733666e-05, + "loss": 1.3033, + "step": 8557 + }, + { + "epoch": 0.9027426160337553, + "grad_norm": 0.6798392534255981, + "learning_rate": 3.5514703339853656e-05, + "loss": 1.3455, + "step": 8558 + }, + { + "epoch": 0.9028481012658228, + "grad_norm": 0.6755329370498657, + "learning_rate": 3.543840160661396e-05, + "loss": 1.3751, + "step": 8559 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.6404920220375061, + "learning_rate": 3.5362179943564496e-05, + "loss": 1.3404, + "step": 8560 + }, + { + "epoch": 0.9030590717299578, + "grad_norm": 0.6599472761154175, + "learning_rate": 3.528603835924626e-05, + "loss": 1.3773, + "step": 8561 + }, + { + "epoch": 0.9031645569620254, + "grad_norm": 0.6391493678092957, + "learning_rate": 3.520997686219127e-05, + "loss": 1.389, + "step": 8562 + }, + { + "epoch": 0.9032700421940928, + "grad_norm": 0.6653231382369995, + "learning_rate": 3.513399546092269e-05, + "loss": 1.3654, + "step": 8563 + }, + { + "epoch": 0.9033755274261603, + "grad_norm": 0.7725335955619812, + "learning_rate": 3.5058094163954556e-05, + "loss": 1.3474, + "step": 8564 + }, + { + "epoch": 0.9034810126582279, + "grad_norm": 0.6826585531234741, + "learning_rate": 3.498227297979198e-05, + "loss": 1.3662, + "step": 8565 + }, + { + "epoch": 0.9035864978902953, + "grad_norm": 0.653588056564331, + "learning_rate": 3.4906531916931075e-05, + "loss": 1.4009, + "step": 8566 + }, + { + "epoch": 0.9036919831223629, + "grad_norm": 0.645310640335083, + "learning_rate": 3.483087098385906e-05, + "loss": 1.3817, + "step": 8567 + }, + { + "epoch": 0.9037974683544304, + "grad_norm": 0.6206324100494385, + "learning_rate": 3.475529018905416e-05, + "loss": 1.3458, + "step": 8568 + }, + { + "epoch": 0.9039029535864979, + "grad_norm": 0.6512148380279541, + "learning_rate": 3.467978954098549e-05, + "loss": 1.3654, + "step": 8569 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.7451084852218628, + "learning_rate": 3.46043690481134e-05, + "loss": 1.3954, + "step": 8570 + }, + { + "epoch": 0.904113924050633, + "grad_norm": 0.6677902340888977, + "learning_rate": 3.4529028718888935e-05, + "loss": 1.3668, + "step": 8571 + }, + { + "epoch": 0.9042194092827004, + "grad_norm": 0.650222659111023, + "learning_rate": 3.4453768561754525e-05, + "loss": 1.3451, + "step": 8572 + }, + { + "epoch": 0.9043248945147679, + "grad_norm": 0.6514116525650024, + "learning_rate": 3.437858858514334e-05, + "loss": 1.3666, + "step": 8573 + }, + { + "epoch": 0.9044303797468355, + "grad_norm": 0.7129626870155334, + "learning_rate": 3.43034887974798e-05, + "loss": 1.3871, + "step": 8574 + }, + { + "epoch": 0.9045358649789029, + "grad_norm": 0.7285613417625427, + "learning_rate": 3.422846920717893e-05, + "loss": 1.3691, + "step": 8575 + }, + { + "epoch": 0.9046413502109705, + "grad_norm": 0.7714434862136841, + "learning_rate": 3.4153529822647414e-05, + "loss": 1.3433, + "step": 8576 + }, + { + "epoch": 0.904746835443038, + "grad_norm": 0.6483895778656006, + "learning_rate": 3.4078670652282374e-05, + "loss": 1.3913, + "step": 8577 + }, + { + "epoch": 0.9048523206751055, + "grad_norm": 0.6705356240272522, + "learning_rate": 3.400389170447218e-05, + "loss": 1.3408, + "step": 8578 + }, + { + "epoch": 0.904957805907173, + "grad_norm": 0.6598783135414124, + "learning_rate": 3.392919298759623e-05, + "loss": 1.3962, + "step": 8579 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.7566462755203247, + "learning_rate": 3.38545745100248e-05, + "loss": 1.3727, + "step": 8580 + }, + { + "epoch": 0.905168776371308, + "grad_norm": 0.688312828540802, + "learning_rate": 3.378003628011938e-05, + "loss": 1.3474, + "step": 8581 + }, + { + "epoch": 0.9052742616033755, + "grad_norm": 0.7081122398376465, + "learning_rate": 3.3705578306232224e-05, + "loss": 1.3768, + "step": 8582 + }, + { + "epoch": 0.9053797468354431, + "grad_norm": 0.7544238567352295, + "learning_rate": 3.363120059670688e-05, + "loss": 1.3696, + "step": 8583 + }, + { + "epoch": 0.9054852320675105, + "grad_norm": 0.6689761877059937, + "learning_rate": 3.355690315987761e-05, + "loss": 1.3568, + "step": 8584 + }, + { + "epoch": 0.9055907172995781, + "grad_norm": 0.653740644454956, + "learning_rate": 3.3482686004069755e-05, + "loss": 1.3676, + "step": 8585 + }, + { + "epoch": 0.9056962025316456, + "grad_norm": 0.6980797648429871, + "learning_rate": 3.340854913759983e-05, + "loss": 1.3542, + "step": 8586 + }, + { + "epoch": 0.9058016877637131, + "grad_norm": 0.8822422623634338, + "learning_rate": 3.3334492568775355e-05, + "loss": 1.3751, + "step": 8587 + }, + { + "epoch": 0.9059071729957806, + "grad_norm": 0.7558891177177429, + "learning_rate": 3.3260516305894526e-05, + "loss": 1.3449, + "step": 8588 + }, + { + "epoch": 0.9060126582278482, + "grad_norm": 0.6681250333786011, + "learning_rate": 3.318662035724679e-05, + "loss": 1.3928, + "step": 8589 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.761403501033783, + "learning_rate": 3.31128047311127e-05, + "loss": 1.4091, + "step": 8590 + }, + { + "epoch": 0.9062236286919831, + "grad_norm": 0.6546169519424438, + "learning_rate": 3.303906943576346e-05, + "loss": 1.362, + "step": 8591 + }, + { + "epoch": 0.9063291139240506, + "grad_norm": 0.6517300009727478, + "learning_rate": 3.296541447946164e-05, + "loss": 1.3702, + "step": 8592 + }, + { + "epoch": 0.9064345991561181, + "grad_norm": 0.6865326762199402, + "learning_rate": 3.2891839870460546e-05, + "loss": 1.3852, + "step": 8593 + }, + { + "epoch": 0.9065400843881857, + "grad_norm": 0.6439382433891296, + "learning_rate": 3.281834561700467e-05, + "loss": 1.3294, + "step": 8594 + }, + { + "epoch": 0.9066455696202531, + "grad_norm": 0.6966611742973328, + "learning_rate": 3.274493172732926e-05, + "loss": 1.4215, + "step": 8595 + }, + { + "epoch": 0.9067510548523207, + "grad_norm": 0.7691476941108704, + "learning_rate": 3.26715982096609e-05, + "loss": 1.3001, + "step": 8596 + }, + { + "epoch": 0.9068565400843882, + "grad_norm": 0.6329371929168701, + "learning_rate": 3.259834507221684e-05, + "loss": 1.3291, + "step": 8597 + }, + { + "epoch": 0.9069620253164556, + "grad_norm": 0.6546407341957092, + "learning_rate": 3.2525172323205535e-05, + "loss": 1.3781, + "step": 8598 + }, + { + "epoch": 0.9070675105485232, + "grad_norm": 0.6413564085960388, + "learning_rate": 3.2452079970826335e-05, + "loss": 1.3474, + "step": 8599 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.7076972126960754, + "learning_rate": 3.237906802326951e-05, + "loss": 1.3872, + "step": 8600 + }, + { + "epoch": 0.9072784810126582, + "grad_norm": 0.6502459645271301, + "learning_rate": 3.230613648871661e-05, + "loss": 1.3875, + "step": 8601 + }, + { + "epoch": 0.9073839662447257, + "grad_norm": 0.6839002370834351, + "learning_rate": 3.223328537533976e-05, + "loss": 1.3736, + "step": 8602 + }, + { + "epoch": 0.9074894514767933, + "grad_norm": 0.6426451206207275, + "learning_rate": 3.216051469130243e-05, + "loss": 1.3462, + "step": 8603 + }, + { + "epoch": 0.9075949367088607, + "grad_norm": 0.705525815486908, + "learning_rate": 3.208782444475894e-05, + "loss": 1.3806, + "step": 8604 + }, + { + "epoch": 0.9077004219409283, + "grad_norm": 0.6420931816101074, + "learning_rate": 3.201521464385443e-05, + "loss": 1.3515, + "step": 8605 + }, + { + "epoch": 0.9078059071729958, + "grad_norm": 0.7082102298736572, + "learning_rate": 3.194268529672539e-05, + "loss": 1.3579, + "step": 8606 + }, + { + "epoch": 0.9079113924050632, + "grad_norm": 0.7145301699638367, + "learning_rate": 3.187023641149908e-05, + "loss": 1.3527, + "step": 8607 + }, + { + "epoch": 0.9080168776371308, + "grad_norm": 0.7105602622032166, + "learning_rate": 3.1797867996293663e-05, + "loss": 1.352, + "step": 8608 + }, + { + "epoch": 0.9081223628691983, + "grad_norm": 0.6412433385848999, + "learning_rate": 3.172558005921841e-05, + "loss": 1.3695, + "step": 8609 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.6377705931663513, + "learning_rate": 3.165337260837351e-05, + "loss": 1.357, + "step": 8610 + }, + { + "epoch": 0.9083333333333333, + "grad_norm": 0.6600427031517029, + "learning_rate": 3.158124565185022e-05, + "loss": 1.3738, + "step": 8611 + }, + { + "epoch": 0.9084388185654009, + "grad_norm": 0.6816179752349854, + "learning_rate": 3.1509199197730765e-05, + "loss": 1.3728, + "step": 8612 + }, + { + "epoch": 0.9085443037974683, + "grad_norm": 0.6578954458236694, + "learning_rate": 3.143723325408826e-05, + "loss": 1.3417, + "step": 8613 + }, + { + "epoch": 0.9086497890295359, + "grad_norm": 0.6536744236946106, + "learning_rate": 3.136534782898667e-05, + "loss": 1.3831, + "step": 8614 + }, + { + "epoch": 0.9087552742616034, + "grad_norm": 0.7125338315963745, + "learning_rate": 3.129354293048148e-05, + "loss": 1.3175, + "step": 8615 + }, + { + "epoch": 0.9088607594936708, + "grad_norm": 0.6693426966667175, + "learning_rate": 3.122181856661857e-05, + "loss": 1.3949, + "step": 8616 + }, + { + "epoch": 0.9089662447257384, + "grad_norm": 0.6449471116065979, + "learning_rate": 3.1150174745435026e-05, + "loss": 1.3414, + "step": 8617 + }, + { + "epoch": 0.9090717299578059, + "grad_norm": 0.6548881530761719, + "learning_rate": 3.107861147495891e-05, + "loss": 1.3827, + "step": 8618 + }, + { + "epoch": 0.9091772151898734, + "grad_norm": 0.6633872389793396, + "learning_rate": 3.100712876320924e-05, + "loss": 1.3163, + "step": 8619 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.6655197739601135, + "learning_rate": 3.093572661819602e-05, + "loss": 1.3842, + "step": 8620 + }, + { + "epoch": 0.9093881856540085, + "grad_norm": 0.697942852973938, + "learning_rate": 3.086440504792026e-05, + "loss": 1.3698, + "step": 8621 + }, + { + "epoch": 0.9094936708860759, + "grad_norm": 0.6354434490203857, + "learning_rate": 3.079316406037375e-05, + "loss": 1.3187, + "step": 8622 + }, + { + "epoch": 0.9095991561181435, + "grad_norm": 0.7026781439781189, + "learning_rate": 3.072200366353958e-05, + "loss": 1.3715, + "step": 8623 + }, + { + "epoch": 0.909704641350211, + "grad_norm": 0.6462761759757996, + "learning_rate": 3.0650923865391395e-05, + "loss": 1.3477, + "step": 8624 + }, + { + "epoch": 0.9098101265822784, + "grad_norm": 0.6307592391967773, + "learning_rate": 3.057992467389431e-05, + "loss": 1.3769, + "step": 8625 + }, + { + "epoch": 0.909915611814346, + "grad_norm": 0.6834284067153931, + "learning_rate": 3.0509006097004048e-05, + "loss": 1.3844, + "step": 8626 + }, + { + "epoch": 0.9100210970464135, + "grad_norm": 0.6566100716590881, + "learning_rate": 3.043816814266734e-05, + "loss": 1.3733, + "step": 8627 + }, + { + "epoch": 0.910126582278481, + "grad_norm": 0.6491342782974243, + "learning_rate": 3.0367410818821913e-05, + "loss": 1.3277, + "step": 8628 + }, + { + "epoch": 0.9102320675105485, + "grad_norm": 0.6741083860397339, + "learning_rate": 3.029673413339651e-05, + "loss": 1.3944, + "step": 8629 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.66437166929245, + "learning_rate": 3.022613809431088e-05, + "loss": 1.416, + "step": 8630 + }, + { + "epoch": 0.9104430379746835, + "grad_norm": 0.6729529500007629, + "learning_rate": 3.015562270947553e-05, + "loss": 1.3765, + "step": 8631 + }, + { + "epoch": 0.9105485232067511, + "grad_norm": 0.67221599817276, + "learning_rate": 3.0085187986792136e-05, + "loss": 1.3286, + "step": 8632 + }, + { + "epoch": 0.9106540084388186, + "grad_norm": 0.6712231636047363, + "learning_rate": 3.00148339341533e-05, + "loss": 1.3276, + "step": 8633 + }, + { + "epoch": 0.910759493670886, + "grad_norm": 0.6728264689445496, + "learning_rate": 2.994456055944231e-05, + "loss": 1.3744, + "step": 8634 + }, + { + "epoch": 0.9108649789029536, + "grad_norm": 0.6509149670600891, + "learning_rate": 2.9874367870534018e-05, + "loss": 1.3644, + "step": 8635 + }, + { + "epoch": 0.9109704641350211, + "grad_norm": 0.6845703721046448, + "learning_rate": 2.9804255875293645e-05, + "loss": 1.3327, + "step": 8636 + }, + { + "epoch": 0.9110759493670886, + "grad_norm": 0.6887806057929993, + "learning_rate": 2.9734224581577568e-05, + "loss": 1.3866, + "step": 8637 + }, + { + "epoch": 0.9111814345991561, + "grad_norm": 0.6438666582107544, + "learning_rate": 2.966427399723326e-05, + "loss": 1.3429, + "step": 8638 + }, + { + "epoch": 0.9112869198312237, + "grad_norm": 0.6888579726219177, + "learning_rate": 2.959440413009895e-05, + "loss": 1.3389, + "step": 8639 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.7039290070533752, + "learning_rate": 2.952461498800388e-05, + "loss": 1.3284, + "step": 8640 + }, + { + "epoch": 0.9114978902953587, + "grad_norm": 0.6571919322013855, + "learning_rate": 2.945490657876837e-05, + "loss": 1.3413, + "step": 8641 + }, + { + "epoch": 0.9116033755274262, + "grad_norm": 0.6426750421524048, + "learning_rate": 2.938527891020351e-05, + "loss": 1.3611, + "step": 8642 + }, + { + "epoch": 0.9117088607594936, + "grad_norm": 0.628846287727356, + "learning_rate": 2.931573199011148e-05, + "loss": 1.347, + "step": 8643 + }, + { + "epoch": 0.9118143459915612, + "grad_norm": 0.6542797088623047, + "learning_rate": 2.92462658262852e-05, + "loss": 1.3489, + "step": 8644 + }, + { + "epoch": 0.9119198312236287, + "grad_norm": 0.707127571105957, + "learning_rate": 2.9176880426508957e-05, + "loss": 1.3036, + "step": 8645 + }, + { + "epoch": 0.9120253164556962, + "grad_norm": 0.7270404696464539, + "learning_rate": 2.9107575798557605e-05, + "loss": 1.3724, + "step": 8646 + }, + { + "epoch": 0.9121308016877637, + "grad_norm": 0.6637927889823914, + "learning_rate": 2.9038351950197107e-05, + "loss": 1.3495, + "step": 8647 + }, + { + "epoch": 0.9122362869198313, + "grad_norm": 0.6684118509292603, + "learning_rate": 2.8969208889184335e-05, + "loss": 1.4209, + "step": 8648 + }, + { + "epoch": 0.9123417721518987, + "grad_norm": 0.6763123869895935, + "learning_rate": 2.890014662326701e-05, + "loss": 1.3594, + "step": 8649 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.6621986031532288, + "learning_rate": 2.8831165160184024e-05, + "loss": 1.3835, + "step": 8650 + }, + { + "epoch": 0.9125527426160338, + "grad_norm": 0.6310862898826599, + "learning_rate": 2.8762264507665113e-05, + "loss": 1.3733, + "step": 8651 + }, + { + "epoch": 0.9126582278481012, + "grad_norm": 0.7757952213287354, + "learning_rate": 2.869344467343077e-05, + "loss": 1.3823, + "step": 8652 + }, + { + "epoch": 0.9127637130801688, + "grad_norm": 0.6394551992416382, + "learning_rate": 2.862470566519265e-05, + "loss": 1.367, + "step": 8653 + }, + { + "epoch": 0.9128691983122363, + "grad_norm": 0.7147219181060791, + "learning_rate": 2.855604749065352e-05, + "loss": 1.3943, + "step": 8654 + }, + { + "epoch": 0.9129746835443038, + "grad_norm": 0.665749192237854, + "learning_rate": 2.8487470157506633e-05, + "loss": 1.3394, + "step": 8655 + }, + { + "epoch": 0.9130801687763713, + "grad_norm": 0.726699709892273, + "learning_rate": 2.84189736734366e-05, + "loss": 1.3396, + "step": 8656 + }, + { + "epoch": 0.9131856540084389, + "grad_norm": 0.7140108942985535, + "learning_rate": 2.8350558046118607e-05, + "loss": 1.3163, + "step": 8657 + }, + { + "epoch": 0.9132911392405063, + "grad_norm": 0.6672967076301575, + "learning_rate": 2.828222328321911e-05, + "loss": 1.3515, + "step": 8658 + }, + { + "epoch": 0.9133966244725739, + "grad_norm": 0.6612528562545776, + "learning_rate": 2.8213969392395233e-05, + "loss": 1.337, + "step": 8659 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.7299373745918274, + "learning_rate": 2.8145796381295276e-05, + "loss": 1.3723, + "step": 8660 + }, + { + "epoch": 0.9136075949367088, + "grad_norm": 0.6639196872711182, + "learning_rate": 2.807770425755829e-05, + "loss": 1.3353, + "step": 8661 + }, + { + "epoch": 0.9137130801687764, + "grad_norm": 0.690264105796814, + "learning_rate": 2.800969302881434e-05, + "loss": 1.3478, + "step": 8662 + }, + { + "epoch": 0.9138185654008438, + "grad_norm": 0.7738808393478394, + "learning_rate": 2.7941762702684503e-05, + "loss": 1.3804, + "step": 8663 + }, + { + "epoch": 0.9139240506329114, + "grad_norm": 0.6826947927474976, + "learning_rate": 2.7873913286780683e-05, + "loss": 1.3941, + "step": 8664 + }, + { + "epoch": 0.9140295358649789, + "grad_norm": 0.6676325798034668, + "learning_rate": 2.7806144788705718e-05, + "loss": 1.3778, + "step": 8665 + }, + { + "epoch": 0.9141350210970464, + "grad_norm": 0.6506310701370239, + "learning_rate": 2.7738457216053447e-05, + "loss": 1.341, + "step": 8666 + }, + { + "epoch": 0.9142405063291139, + "grad_norm": 0.6977909803390503, + "learning_rate": 2.7670850576408556e-05, + "loss": 1.3555, + "step": 8667 + }, + { + "epoch": 0.9143459915611815, + "grad_norm": 0.7320038676261902, + "learning_rate": 2.7603324877346653e-05, + "loss": 1.3448, + "step": 8668 + }, + { + "epoch": 0.9144514767932489, + "grad_norm": 0.6299054026603699, + "learning_rate": 2.7535880126434433e-05, + "loss": 1.361, + "step": 8669 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.6373763084411621, + "learning_rate": 2.7468516331229432e-05, + "loss": 1.391, + "step": 8670 + }, + { + "epoch": 0.914662447257384, + "grad_norm": 0.6597040891647339, + "learning_rate": 2.7401233499279866e-05, + "loss": 1.4014, + "step": 8671 + }, + { + "epoch": 0.9147679324894514, + "grad_norm": 0.6540875434875488, + "learning_rate": 2.7334031638125367e-05, + "loss": 1.4077, + "step": 8672 + }, + { + "epoch": 0.914873417721519, + "grad_norm": 0.6303385496139526, + "learning_rate": 2.726691075529625e-05, + "loss": 1.35, + "step": 8673 + }, + { + "epoch": 0.9149789029535865, + "grad_norm": 0.6578795313835144, + "learning_rate": 2.7199870858313574e-05, + "loss": 1.3482, + "step": 8674 + }, + { + "epoch": 0.915084388185654, + "grad_norm": 0.6697378158569336, + "learning_rate": 2.7132911954689672e-05, + "loss": 1.3796, + "step": 8675 + }, + { + "epoch": 0.9151898734177215, + "grad_norm": 0.6317864656448364, + "learning_rate": 2.706603405192745e-05, + "loss": 1.3667, + "step": 8676 + }, + { + "epoch": 0.9152953586497891, + "grad_norm": 0.6916254758834839, + "learning_rate": 2.6999237157521005e-05, + "loss": 1.3482, + "step": 8677 + }, + { + "epoch": 0.9154008438818565, + "grad_norm": 0.648242712020874, + "learning_rate": 2.6932521278955262e-05, + "loss": 1.3861, + "step": 8678 + }, + { + "epoch": 0.915506329113924, + "grad_norm": 0.6670178174972534, + "learning_rate": 2.686588642370591e-05, + "loss": 1.3965, + "step": 8679 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.6399694681167603, + "learning_rate": 2.6799332599239974e-05, + "loss": 1.3424, + "step": 8680 + }, + { + "epoch": 0.915717299578059, + "grad_norm": 0.7120272517204285, + "learning_rate": 2.6732859813014987e-05, + "loss": 1.3967, + "step": 8681 + }, + { + "epoch": 0.9158227848101266, + "grad_norm": 0.6361318230628967, + "learning_rate": 2.666646807247966e-05, + "loss": 1.3799, + "step": 8682 + }, + { + "epoch": 0.9159282700421941, + "grad_norm": 0.6508898735046387, + "learning_rate": 2.660015738507346e-05, + "loss": 1.3484, + "step": 8683 + }, + { + "epoch": 0.9160337552742616, + "grad_norm": 0.655553936958313, + "learning_rate": 2.653392775822677e-05, + "loss": 1.3719, + "step": 8684 + }, + { + "epoch": 0.9161392405063291, + "grad_norm": 0.6656926274299622, + "learning_rate": 2.6467779199361e-05, + "loss": 1.3571, + "step": 8685 + }, + { + "epoch": 0.9162447257383967, + "grad_norm": 0.7081670165061951, + "learning_rate": 2.6401711715888454e-05, + "loss": 1.37, + "step": 8686 + }, + { + "epoch": 0.9163502109704641, + "grad_norm": 0.652429461479187, + "learning_rate": 2.6335725315212304e-05, + "loss": 1.3613, + "step": 8687 + }, + { + "epoch": 0.9164556962025316, + "grad_norm": 0.6486814022064209, + "learning_rate": 2.626982000472655e-05, + "loss": 1.3725, + "step": 8688 + }, + { + "epoch": 0.9165611814345992, + "grad_norm": 0.6357866525650024, + "learning_rate": 2.6203995791816372e-05, + "loss": 1.3912, + "step": 8689 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.6772887110710144, + "learning_rate": 2.6138252683857693e-05, + "loss": 1.339, + "step": 8690 + }, + { + "epoch": 0.9167721518987342, + "grad_norm": 0.7111568450927734, + "learning_rate": 2.607259068821721e-05, + "loss": 1.3934, + "step": 8691 + }, + { + "epoch": 0.9168776371308017, + "grad_norm": 0.6271485090255737, + "learning_rate": 2.6007009812252875e-05, + "loss": 1.3579, + "step": 8692 + }, + { + "epoch": 0.9169831223628692, + "grad_norm": 0.6461260914802551, + "learning_rate": 2.594151006331322e-05, + "loss": 1.3661, + "step": 8693 + }, + { + "epoch": 0.9170886075949367, + "grad_norm": 0.6490582823753357, + "learning_rate": 2.5876091448737788e-05, + "loss": 1.3762, + "step": 8694 + }, + { + "epoch": 0.9171940928270043, + "grad_norm": 0.6274640560150146, + "learning_rate": 2.5810753975857136e-05, + "loss": 1.3328, + "step": 8695 + }, + { + "epoch": 0.9172995780590717, + "grad_norm": 0.6669328212738037, + "learning_rate": 2.5745497651992662e-05, + "loss": 1.3693, + "step": 8696 + }, + { + "epoch": 0.9174050632911392, + "grad_norm": 0.6451846957206726, + "learning_rate": 2.568032248445651e-05, + "loss": 1.3539, + "step": 8697 + }, + { + "epoch": 0.9175105485232068, + "grad_norm": 0.6400876641273499, + "learning_rate": 2.561522848055217e-05, + "loss": 1.3524, + "step": 8698 + }, + { + "epoch": 0.9176160337552742, + "grad_norm": 0.6516169905662537, + "learning_rate": 2.5550215647573482e-05, + "loss": 1.3865, + "step": 8699 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.6268681287765503, + "learning_rate": 2.5485283992805615e-05, + "loss": 1.3497, + "step": 8700 + }, + { + "epoch": 0.9178270042194093, + "grad_norm": 0.6585744619369507, + "learning_rate": 2.5420433523524493e-05, + "loss": 1.3589, + "step": 8701 + }, + { + "epoch": 0.9179324894514768, + "grad_norm": 0.6630549430847168, + "learning_rate": 2.5355664246996813e-05, + "loss": 1.3306, + "step": 8702 + }, + { + "epoch": 0.9180379746835443, + "grad_norm": 0.6446616053581238, + "learning_rate": 2.5290976170480346e-05, + "loss": 1.3855, + "step": 8703 + }, + { + "epoch": 0.9181434599156119, + "grad_norm": 0.6453248262405396, + "learning_rate": 2.522636930122371e-05, + "loss": 1.3505, + "step": 8704 + }, + { + "epoch": 0.9182489451476793, + "grad_norm": 0.6883383989334106, + "learning_rate": 2.516184364646637e-05, + "loss": 1.3905, + "step": 8705 + }, + { + "epoch": 0.9183544303797468, + "grad_norm": 0.6702013611793518, + "learning_rate": 2.5097399213438955e-05, + "loss": 1.3475, + "step": 8706 + }, + { + "epoch": 0.9184599156118144, + "grad_norm": 0.6602274775505066, + "learning_rate": 2.50330360093626e-05, + "loss": 1.375, + "step": 8707 + }, + { + "epoch": 0.9185654008438818, + "grad_norm": 0.6826262474060059, + "learning_rate": 2.4968754041449633e-05, + "loss": 1.3508, + "step": 8708 + }, + { + "epoch": 0.9186708860759494, + "grad_norm": 0.6843699812889099, + "learning_rate": 2.490455331690303e-05, + "loss": 1.3673, + "step": 8709 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.6695432662963867, + "learning_rate": 2.4840433842916872e-05, + "loss": 1.3874, + "step": 8710 + }, + { + "epoch": 0.9188818565400844, + "grad_norm": 0.6570336818695068, + "learning_rate": 2.4776395626676162e-05, + "loss": 1.3803, + "step": 8711 + }, + { + "epoch": 0.9189873417721519, + "grad_norm": 0.6400668621063232, + "learning_rate": 2.471243867535658e-05, + "loss": 1.352, + "step": 8712 + }, + { + "epoch": 0.9190928270042195, + "grad_norm": 0.6499803066253662, + "learning_rate": 2.4648562996124806e-05, + "loss": 1.3673, + "step": 8713 + }, + { + "epoch": 0.9191983122362869, + "grad_norm": 0.8653640151023865, + "learning_rate": 2.4584768596138452e-05, + "loss": 1.3642, + "step": 8714 + }, + { + "epoch": 0.9193037974683544, + "grad_norm": 0.7347780466079712, + "learning_rate": 2.4521055482546046e-05, + "loss": 1.3645, + "step": 8715 + }, + { + "epoch": 0.919409282700422, + "grad_norm": 0.6375982165336609, + "learning_rate": 2.4457423662486962e-05, + "loss": 1.3398, + "step": 8716 + }, + { + "epoch": 0.9195147679324894, + "grad_norm": 0.646506667137146, + "learning_rate": 2.4393873143091495e-05, + "loss": 1.3116, + "step": 8717 + }, + { + "epoch": 0.919620253164557, + "grad_norm": 0.6918492317199707, + "learning_rate": 2.43304039314807e-05, + "loss": 1.3797, + "step": 8718 + }, + { + "epoch": 0.9197257383966245, + "grad_norm": 0.7194741368293762, + "learning_rate": 2.4267016034766637e-05, + "loss": 1.3727, + "step": 8719 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.7094241976737976, + "learning_rate": 2.4203709460052292e-05, + "loss": 1.3512, + "step": 8720 + }, + { + "epoch": 0.9199367088607595, + "grad_norm": 0.7419690489768982, + "learning_rate": 2.414048421443141e-05, + "loss": 1.3848, + "step": 8721 + }, + { + "epoch": 0.9200421940928271, + "grad_norm": 0.7133418917655945, + "learning_rate": 2.407734030498873e-05, + "loss": 1.3757, + "step": 8722 + }, + { + "epoch": 0.9201476793248945, + "grad_norm": 0.632131040096283, + "learning_rate": 2.4014277738799774e-05, + "loss": 1.3836, + "step": 8723 + }, + { + "epoch": 0.920253164556962, + "grad_norm": 0.6808504462242126, + "learning_rate": 2.395129652293121e-05, + "loss": 1.419, + "step": 8724 + }, + { + "epoch": 0.9203586497890295, + "grad_norm": 0.6784893870353699, + "learning_rate": 2.3888396664440232e-05, + "loss": 1.3515, + "step": 8725 + }, + { + "epoch": 0.920464135021097, + "grad_norm": 0.6837197542190552, + "learning_rate": 2.38255781703752e-05, + "loss": 1.3219, + "step": 8726 + }, + { + "epoch": 0.9205696202531646, + "grad_norm": 0.6403244733810425, + "learning_rate": 2.3762841047775068e-05, + "loss": 1.3673, + "step": 8727 + }, + { + "epoch": 0.920675105485232, + "grad_norm": 0.7176913022994995, + "learning_rate": 2.3700185303670046e-05, + "loss": 1.3289, + "step": 8728 + }, + { + "epoch": 0.9207805907172996, + "grad_norm": 0.6297885179519653, + "learning_rate": 2.363761094508085e-05, + "loss": 1.424, + "step": 8729 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.6335397958755493, + "learning_rate": 2.357511797901929e-05, + "loss": 1.3567, + "step": 8730 + }, + { + "epoch": 0.9209915611814345, + "grad_norm": 0.6349387764930725, + "learning_rate": 2.3512706412488012e-05, + "loss": 1.3456, + "step": 8731 + }, + { + "epoch": 0.9210970464135021, + "grad_norm": 0.661637544631958, + "learning_rate": 2.345037625248067e-05, + "loss": 1.3719, + "step": 8732 + }, + { + "epoch": 0.9212025316455696, + "grad_norm": 0.6574177742004395, + "learning_rate": 2.3388127505981515e-05, + "loss": 1.382, + "step": 8733 + }, + { + "epoch": 0.9213080168776371, + "grad_norm": 0.6569992899894714, + "learning_rate": 2.3325960179965967e-05, + "loss": 1.3402, + "step": 8734 + }, + { + "epoch": 0.9214135021097046, + "grad_norm": 0.6466109752655029, + "learning_rate": 2.3263874281400034e-05, + "loss": 1.3455, + "step": 8735 + }, + { + "epoch": 0.9215189873417722, + "grad_norm": 0.7758863568305969, + "learning_rate": 2.3201869817240817e-05, + "loss": 1.3562, + "step": 8736 + }, + { + "epoch": 0.9216244725738396, + "grad_norm": 0.703473687171936, + "learning_rate": 2.313994679443626e-05, + "loss": 1.3474, + "step": 8737 + }, + { + "epoch": 0.9217299578059072, + "grad_norm": 0.7265438437461853, + "learning_rate": 2.307810521992515e-05, + "loss": 1.3844, + "step": 8738 + }, + { + "epoch": 0.9218354430379747, + "grad_norm": 0.6269305348396301, + "learning_rate": 2.301634510063702e-05, + "loss": 1.3679, + "step": 8739 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.6480352282524109, + "learning_rate": 2.2954666443492505e-05, + "loss": 1.3557, + "step": 8740 + }, + { + "epoch": 0.9220464135021097, + "grad_norm": 0.6642633080482483, + "learning_rate": 2.2893069255402993e-05, + "loss": 1.3454, + "step": 8741 + }, + { + "epoch": 0.9221518987341772, + "grad_norm": 0.6254279613494873, + "learning_rate": 2.2831553543270793e-05, + "loss": 1.3408, + "step": 8742 + }, + { + "epoch": 0.9222573839662447, + "grad_norm": 0.6487465500831604, + "learning_rate": 2.277011931398898e-05, + "loss": 1.3001, + "step": 8743 + }, + { + "epoch": 0.9223628691983122, + "grad_norm": 0.6875107288360596, + "learning_rate": 2.2708766574441626e-05, + "loss": 1.3625, + "step": 8744 + }, + { + "epoch": 0.9224683544303798, + "grad_norm": 0.7042202353477478, + "learning_rate": 2.2647495331503565e-05, + "loss": 1.3973, + "step": 8745 + }, + { + "epoch": 0.9225738396624472, + "grad_norm": 0.7264398336410522, + "learning_rate": 2.2586305592040558e-05, + "loss": 1.3693, + "step": 8746 + }, + { + "epoch": 0.9226793248945148, + "grad_norm": 0.6540358662605286, + "learning_rate": 2.2525197362909282e-05, + "loss": 1.3491, + "step": 8747 + }, + { + "epoch": 0.9227848101265823, + "grad_norm": 0.6484928727149963, + "learning_rate": 2.24641706509571e-05, + "loss": 1.3507, + "step": 8748 + }, + { + "epoch": 0.9228902953586497, + "grad_norm": 0.6463927030563354, + "learning_rate": 2.2403225463022288e-05, + "loss": 1.3761, + "step": 8749 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.6400175094604492, + "learning_rate": 2.2342361805934297e-05, + "loss": 1.3854, + "step": 8750 + }, + { + "epoch": 0.9231012658227848, + "grad_norm": 0.6406482458114624, + "learning_rate": 2.2281579686513176e-05, + "loss": 1.3298, + "step": 8751 + }, + { + "epoch": 0.9232067510548523, + "grad_norm": 0.650988757610321, + "learning_rate": 2.2220879111569725e-05, + "loss": 1.3767, + "step": 8752 + }, + { + "epoch": 0.9233122362869198, + "grad_norm": 0.664650559425354, + "learning_rate": 2.2160260087905753e-05, + "loss": 1.3327, + "step": 8753 + }, + { + "epoch": 0.9234177215189874, + "grad_norm": 0.6429218053817749, + "learning_rate": 2.2099722622314078e-05, + "loss": 1.3497, + "step": 8754 + }, + { + "epoch": 0.9235232067510548, + "grad_norm": 0.6552659869194031, + "learning_rate": 2.203926672157802e-05, + "loss": 1.3629, + "step": 8755 + }, + { + "epoch": 0.9236286919831224, + "grad_norm": 0.6646266579627991, + "learning_rate": 2.1978892392472085e-05, + "loss": 1.356, + "step": 8756 + }, + { + "epoch": 0.9237341772151899, + "grad_norm": 0.6333168745040894, + "learning_rate": 2.1918599641761517e-05, + "loss": 1.3505, + "step": 8757 + }, + { + "epoch": 0.9238396624472573, + "grad_norm": 0.6558233499526978, + "learning_rate": 2.185838847620242e-05, + "loss": 1.2869, + "step": 8758 + }, + { + "epoch": 0.9239451476793249, + "grad_norm": 0.6601163744926453, + "learning_rate": 2.1798258902541723e-05, + "loss": 1.3682, + "step": 8759 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.7284969091415405, + "learning_rate": 2.173821092751721e-05, + "loss": 1.3344, + "step": 8760 + }, + { + "epoch": 0.9241561181434599, + "grad_norm": 0.7002995610237122, + "learning_rate": 2.1678244557857663e-05, + "loss": 1.3787, + "step": 8761 + }, + { + "epoch": 0.9242616033755274, + "grad_norm": 0.6864391565322876, + "learning_rate": 2.161835980028254e-05, + "loss": 1.3171, + "step": 8762 + }, + { + "epoch": 0.924367088607595, + "grad_norm": 0.7038198113441467, + "learning_rate": 2.1558556661502222e-05, + "loss": 1.4177, + "step": 8763 + }, + { + "epoch": 0.9244725738396624, + "grad_norm": 0.6264368295669556, + "learning_rate": 2.1498835148218017e-05, + "loss": 1.3701, + "step": 8764 + }, + { + "epoch": 0.92457805907173, + "grad_norm": 0.6489025354385376, + "learning_rate": 2.1439195267121902e-05, + "loss": 1.3313, + "step": 8765 + }, + { + "epoch": 0.9246835443037975, + "grad_norm": 0.721035361289978, + "learning_rate": 2.137963702489687e-05, + "loss": 1.3521, + "step": 8766 + }, + { + "epoch": 0.924789029535865, + "grad_norm": 0.7870285511016846, + "learning_rate": 2.132016042821683e-05, + "loss": 1.3652, + "step": 8767 + }, + { + "epoch": 0.9248945147679325, + "grad_norm": 0.6800219416618347, + "learning_rate": 2.1260765483746282e-05, + "loss": 1.3482, + "step": 8768 + }, + { + "epoch": 0.925, + "grad_norm": 0.6709918975830078, + "learning_rate": 2.120145219814082e-05, + "loss": 1.3552, + "step": 8769 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.6536772847175598, + "learning_rate": 2.1142220578046712e-05, + "loss": 1.3384, + "step": 8770 + }, + { + "epoch": 0.925210970464135, + "grad_norm": 0.640609860420227, + "learning_rate": 2.1083070630101232e-05, + "loss": 1.4004, + "step": 8771 + }, + { + "epoch": 0.9253164556962026, + "grad_norm": 0.6944411993026733, + "learning_rate": 2.102400236093241e-05, + "loss": 1.3282, + "step": 8772 + }, + { + "epoch": 0.92542194092827, + "grad_norm": 0.6409472823143005, + "learning_rate": 2.096501577715912e-05, + "loss": 1.3721, + "step": 8773 + }, + { + "epoch": 0.9255274261603376, + "grad_norm": 0.6400893330574036, + "learning_rate": 2.0906110885391072e-05, + "loss": 1.3381, + "step": 8774 + }, + { + "epoch": 0.9256329113924051, + "grad_norm": 0.64925217628479, + "learning_rate": 2.0847287692228905e-05, + "loss": 1.3697, + "step": 8775 + }, + { + "epoch": 0.9257383966244725, + "grad_norm": 0.6529539227485657, + "learning_rate": 2.0788546204264013e-05, + "loss": 1.3733, + "step": 8776 + }, + { + "epoch": 0.9258438818565401, + "grad_norm": 0.6467703580856323, + "learning_rate": 2.0729886428078716e-05, + "loss": 1.3616, + "step": 8777 + }, + { + "epoch": 0.9259493670886076, + "grad_norm": 0.6250476837158203, + "learning_rate": 2.0671308370246167e-05, + "loss": 1.3555, + "step": 8778 + }, + { + "epoch": 0.9260548523206751, + "grad_norm": 0.6381667256355286, + "learning_rate": 2.0612812037330202e-05, + "loss": 1.3811, + "step": 8779 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.6266328692436218, + "learning_rate": 2.0554397435885746e-05, + "loss": 1.3787, + "step": 8780 + }, + { + "epoch": 0.9262658227848102, + "grad_norm": 0.6549258232116699, + "learning_rate": 2.0496064572458395e-05, + "loss": 1.3787, + "step": 8781 + }, + { + "epoch": 0.9263713080168776, + "grad_norm": 0.646287739276886, + "learning_rate": 2.043781345358467e-05, + "loss": 1.3949, + "step": 8782 + }, + { + "epoch": 0.9264767932489452, + "grad_norm": 0.694121241569519, + "learning_rate": 2.0379644085791767e-05, + "loss": 1.3211, + "step": 8783 + }, + { + "epoch": 0.9265822784810127, + "grad_norm": 0.6740370988845825, + "learning_rate": 2.032155647559805e-05, + "loss": 1.3436, + "step": 8784 + }, + { + "epoch": 0.9266877637130801, + "grad_norm": 0.6957595348358154, + "learning_rate": 2.0263550629512406e-05, + "loss": 1.3676, + "step": 8785 + }, + { + "epoch": 0.9267932489451477, + "grad_norm": 0.6451226472854614, + "learning_rate": 2.0205626554034713e-05, + "loss": 1.3461, + "step": 8786 + }, + { + "epoch": 0.9268987341772152, + "grad_norm": 0.6342389583587646, + "learning_rate": 2.0147784255655692e-05, + "loss": 1.3296, + "step": 8787 + }, + { + "epoch": 0.9270042194092827, + "grad_norm": 0.6770099401473999, + "learning_rate": 2.009002374085675e-05, + "loss": 1.356, + "step": 8788 + }, + { + "epoch": 0.9271097046413502, + "grad_norm": 0.6471418142318726, + "learning_rate": 2.003234501611037e-05, + "loss": 1.3644, + "step": 8789 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.6359807252883911, + "learning_rate": 1.9974748087879636e-05, + "loss": 1.3712, + "step": 8790 + }, + { + "epoch": 0.9273206751054852, + "grad_norm": 0.649536669254303, + "learning_rate": 1.991723296261863e-05, + "loss": 1.3768, + "step": 8791 + }, + { + "epoch": 0.9274261603375528, + "grad_norm": 0.670367419719696, + "learning_rate": 1.985979964677212e-05, + "loss": 1.3392, + "step": 8792 + }, + { + "epoch": 0.9275316455696202, + "grad_norm": 0.6411438584327698, + "learning_rate": 1.9802448146775953e-05, + "loss": 1.3642, + "step": 8793 + }, + { + "epoch": 0.9276371308016877, + "grad_norm": 0.6382707953453064, + "learning_rate": 1.9745178469056575e-05, + "loss": 1.3375, + "step": 8794 + }, + { + "epoch": 0.9277426160337553, + "grad_norm": 0.6949166655540466, + "learning_rate": 1.9687990620031266e-05, + "loss": 1.3507, + "step": 8795 + }, + { + "epoch": 0.9278481012658227, + "grad_norm": 0.6764292120933533, + "learning_rate": 1.963088460610832e-05, + "loss": 1.3835, + "step": 8796 + }, + { + "epoch": 0.9279535864978903, + "grad_norm": 0.6569238305091858, + "learning_rate": 1.9573860433686696e-05, + "loss": 1.3637, + "step": 8797 + }, + { + "epoch": 0.9280590717299578, + "grad_norm": 0.6379698514938354, + "learning_rate": 1.9516918109156206e-05, + "loss": 1.3593, + "step": 8798 + }, + { + "epoch": 0.9281645569620253, + "grad_norm": 0.6654945611953735, + "learning_rate": 1.9460057638897578e-05, + "loss": 1.339, + "step": 8799 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.6768714189529419, + "learning_rate": 1.9403279029282376e-05, + "loss": 1.3603, + "step": 8800 + }, + { + "epoch": 0.9283755274261604, + "grad_norm": 0.6795194745063782, + "learning_rate": 1.9346582286672686e-05, + "loss": 1.382, + "step": 8801 + }, + { + "epoch": 0.9284810126582278, + "grad_norm": 0.6983014345169067, + "learning_rate": 1.9289967417421922e-05, + "loss": 1.356, + "step": 8802 + }, + { + "epoch": 0.9285864978902953, + "grad_norm": 0.6760425567626953, + "learning_rate": 1.9233434427873924e-05, + "loss": 1.3607, + "step": 8803 + }, + { + "epoch": 0.9286919831223629, + "grad_norm": 0.6341295838356018, + "learning_rate": 1.9176983324363545e-05, + "loss": 1.3563, + "step": 8804 + }, + { + "epoch": 0.9287974683544303, + "grad_norm": 0.6480253338813782, + "learning_rate": 1.912061411321639e-05, + "loss": 1.3592, + "step": 8805 + }, + { + "epoch": 0.9289029535864979, + "grad_norm": 0.647921621799469, + "learning_rate": 1.9064326800748906e-05, + "loss": 1.367, + "step": 8806 + }, + { + "epoch": 0.9290084388185654, + "grad_norm": 0.6359094381332397, + "learning_rate": 1.9008121393268462e-05, + "loss": 1.3727, + "step": 8807 + }, + { + "epoch": 0.9291139240506329, + "grad_norm": 0.6421307921409607, + "learning_rate": 1.8951997897072943e-05, + "loss": 1.3826, + "step": 8808 + }, + { + "epoch": 0.9292194092827004, + "grad_norm": 0.6795560121536255, + "learning_rate": 1.8895956318451398e-05, + "loss": 1.3875, + "step": 8809 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.6410505771636963, + "learning_rate": 1.8839996663683635e-05, + "loss": 1.4038, + "step": 8810 + }, + { + "epoch": 0.9294303797468354, + "grad_norm": 0.6771671772003174, + "learning_rate": 1.878411893904014e-05, + "loss": 1.3702, + "step": 8811 + }, + { + "epoch": 0.929535864978903, + "grad_norm": 0.6225691437721252, + "learning_rate": 1.872832315078224e-05, + "loss": 1.3434, + "step": 8812 + }, + { + "epoch": 0.9296413502109705, + "grad_norm": 0.6597674489021301, + "learning_rate": 1.8672609305162263e-05, + "loss": 1.3684, + "step": 8813 + }, + { + "epoch": 0.9297468354430379, + "grad_norm": 0.6304882168769836, + "learning_rate": 1.8616977408423053e-05, + "loss": 1.3792, + "step": 8814 + }, + { + "epoch": 0.9298523206751055, + "grad_norm": 0.637260913848877, + "learning_rate": 1.856142746679862e-05, + "loss": 1.3537, + "step": 8815 + }, + { + "epoch": 0.929957805907173, + "grad_norm": 0.6679820418357849, + "learning_rate": 1.8505959486513485e-05, + "loss": 1.3661, + "step": 8816 + }, + { + "epoch": 0.9300632911392405, + "grad_norm": 0.6840619444847107, + "learning_rate": 1.8450573473783094e-05, + "loss": 1.3756, + "step": 8817 + }, + { + "epoch": 0.930168776371308, + "grad_norm": 0.6759916543960571, + "learning_rate": 1.8395269434813733e-05, + "loss": 1.369, + "step": 8818 + }, + { + "epoch": 0.9302742616033756, + "grad_norm": 0.6237977147102356, + "learning_rate": 1.8340047375802693e-05, + "loss": 1.338, + "step": 8819 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.6615469455718994, + "learning_rate": 1.8284907302937608e-05, + "loss": 1.3371, + "step": 8820 + }, + { + "epoch": 0.9304852320675105, + "grad_norm": 0.6519652605056763, + "learning_rate": 1.822984922239737e-05, + "loss": 1.3789, + "step": 8821 + }, + { + "epoch": 0.9305907172995781, + "grad_norm": 0.663100004196167, + "learning_rate": 1.8174873140351544e-05, + "loss": 1.4331, + "step": 8822 + }, + { + "epoch": 0.9306962025316455, + "grad_norm": 0.6713622212409973, + "learning_rate": 1.8119979062960286e-05, + "loss": 1.3533, + "step": 8823 + }, + { + "epoch": 0.9308016877637131, + "grad_norm": 0.7320098280906677, + "learning_rate": 1.806516699637492e-05, + "loss": 1.4051, + "step": 8824 + }, + { + "epoch": 0.9309071729957806, + "grad_norm": 0.6257620453834534, + "learning_rate": 1.8010436946737292e-05, + "loss": 1.3718, + "step": 8825 + }, + { + "epoch": 0.9310126582278481, + "grad_norm": 0.6452701687812805, + "learning_rate": 1.7955788920180238e-05, + "loss": 1.3746, + "step": 8826 + }, + { + "epoch": 0.9311181434599156, + "grad_norm": 0.6512988805770874, + "learning_rate": 1.7901222922827282e-05, + "loss": 1.3636, + "step": 8827 + }, + { + "epoch": 0.9312236286919832, + "grad_norm": 0.6512917876243591, + "learning_rate": 1.7846738960792945e-05, + "loss": 1.3952, + "step": 8828 + }, + { + "epoch": 0.9313291139240506, + "grad_norm": 0.7989813685417175, + "learning_rate": 1.7792337040182434e-05, + "loss": 1.3543, + "step": 8829 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.6357534527778625, + "learning_rate": 1.773801716709153e-05, + "loss": 1.3334, + "step": 8830 + }, + { + "epoch": 0.9315400843881857, + "grad_norm": 0.6428249478340149, + "learning_rate": 1.7683779347607286e-05, + "loss": 1.3192, + "step": 8831 + }, + { + "epoch": 0.9316455696202531, + "grad_norm": 0.642569899559021, + "learning_rate": 1.7629623587807175e-05, + "loss": 1.397, + "step": 8832 + }, + { + "epoch": 0.9317510548523207, + "grad_norm": 0.6558317542076111, + "learning_rate": 1.7575549893759756e-05, + "loss": 1.3506, + "step": 8833 + }, + { + "epoch": 0.9318565400843882, + "grad_norm": 0.6340495944023132, + "learning_rate": 1.7521558271524103e-05, + "loss": 1.3429, + "step": 8834 + }, + { + "epoch": 0.9319620253164557, + "grad_norm": 0.645660936832428, + "learning_rate": 1.7467648727150202e-05, + "loss": 1.3623, + "step": 8835 + }, + { + "epoch": 0.9320675105485232, + "grad_norm": 0.7125030755996704, + "learning_rate": 1.741382126667915e-05, + "loss": 1.3842, + "step": 8836 + }, + { + "epoch": 0.9321729957805908, + "grad_norm": 0.7066954374313354, + "learning_rate": 1.7360075896142357e-05, + "loss": 1.3871, + "step": 8837 + }, + { + "epoch": 0.9322784810126582, + "grad_norm": 0.6168384552001953, + "learning_rate": 1.7306412621562352e-05, + "loss": 1.3709, + "step": 8838 + }, + { + "epoch": 0.9323839662447257, + "grad_norm": 0.6306811571121216, + "learning_rate": 1.72528314489524e-05, + "loss": 1.3262, + "step": 8839 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.6535758972167969, + "learning_rate": 1.719933238431645e-05, + "loss": 1.3691, + "step": 8840 + }, + { + "epoch": 0.9325949367088607, + "grad_norm": 0.6528608202934265, + "learning_rate": 1.714591543364938e-05, + "loss": 1.3308, + "step": 8841 + }, + { + "epoch": 0.9327004219409283, + "grad_norm": 0.658331573009491, + "learning_rate": 1.7092580602936807e-05, + "loss": 1.4096, + "step": 8842 + }, + { + "epoch": 0.9328059071729958, + "grad_norm": 0.6361926198005676, + "learning_rate": 1.703932789815521e-05, + "loss": 1.3551, + "step": 8843 + }, + { + "epoch": 0.9329113924050633, + "grad_norm": 0.6528674960136414, + "learning_rate": 1.6986157325271727e-05, + "loss": 1.3611, + "step": 8844 + }, + { + "epoch": 0.9330168776371308, + "grad_norm": 0.6426616907119751, + "learning_rate": 1.6933068890244595e-05, + "loss": 1.3494, + "step": 8845 + }, + { + "epoch": 0.9331223628691984, + "grad_norm": 0.6636613607406616, + "learning_rate": 1.688006259902239e-05, + "loss": 1.3647, + "step": 8846 + }, + { + "epoch": 0.9332278481012658, + "grad_norm": 0.6287078261375427, + "learning_rate": 1.6827138457544854e-05, + "loss": 1.3196, + "step": 8847 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.6775009036064148, + "learning_rate": 1.677429647174242e-05, + "loss": 1.3813, + "step": 8848 + }, + { + "epoch": 0.9334388185654009, + "grad_norm": 0.6775396466255188, + "learning_rate": 1.6721536647536255e-05, + "loss": 1.3867, + "step": 8849 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.7188678979873657, + "learning_rate": 1.666885899083831e-05, + "loss": 1.3358, + "step": 8850 + }, + { + "epoch": 0.9336497890295359, + "grad_norm": 0.6370828151702881, + "learning_rate": 1.6616263507551437e-05, + "loss": 1.3734, + "step": 8851 + }, + { + "epoch": 0.9337552742616034, + "grad_norm": 0.6499356627464294, + "learning_rate": 1.656375020356926e-05, + "loss": 1.367, + "step": 8852 + }, + { + "epoch": 0.9338607594936709, + "grad_norm": 0.6756464242935181, + "learning_rate": 1.6511319084776073e-05, + "loss": 1.349, + "step": 8853 + }, + { + "epoch": 0.9339662447257384, + "grad_norm": 0.6498131155967712, + "learning_rate": 1.645897015704709e-05, + "loss": 1.3559, + "step": 8854 + }, + { + "epoch": 0.9340717299578059, + "grad_norm": 0.6513285636901855, + "learning_rate": 1.6406703426248366e-05, + "loss": 1.3862, + "step": 8855 + }, + { + "epoch": 0.9341772151898734, + "grad_norm": 0.6271811723709106, + "learning_rate": 1.6354518898236472e-05, + "loss": 1.3511, + "step": 8856 + }, + { + "epoch": 0.934282700421941, + "grad_norm": 0.6374751329421997, + "learning_rate": 1.630241657885906e-05, + "loss": 1.3308, + "step": 8857 + }, + { + "epoch": 0.9343881856540084, + "grad_norm": 0.6331255435943604, + "learning_rate": 1.6250396473954377e-05, + "loss": 1.377, + "step": 8858 + }, + { + "epoch": 0.9344936708860759, + "grad_norm": 0.6475402116775513, + "learning_rate": 1.6198458589351595e-05, + "loss": 1.3662, + "step": 8859 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.7295231223106384, + "learning_rate": 1.614660293087056e-05, + "loss": 1.3421, + "step": 8860 + }, + { + "epoch": 0.9347046413502109, + "grad_norm": 0.6580467820167542, + "learning_rate": 1.609482950432195e-05, + "loss": 1.331, + "step": 8861 + }, + { + "epoch": 0.9348101265822785, + "grad_norm": 0.6237281560897827, + "learning_rate": 1.6043138315507382e-05, + "loss": 1.3932, + "step": 8862 + }, + { + "epoch": 0.934915611814346, + "grad_norm": 0.6423657536506653, + "learning_rate": 1.5991529370218887e-05, + "loss": 1.371, + "step": 8863 + }, + { + "epoch": 0.9350210970464135, + "grad_norm": 0.6651529669761658, + "learning_rate": 1.5940002674239756e-05, + "loss": 1.3495, + "step": 8864 + }, + { + "epoch": 0.935126582278481, + "grad_norm": 0.6390499472618103, + "learning_rate": 1.588855823334362e-05, + "loss": 1.3807, + "step": 8865 + }, + { + "epoch": 0.9352320675105485, + "grad_norm": 0.6450724601745605, + "learning_rate": 1.5837196053295117e-05, + "loss": 1.3701, + "step": 8866 + }, + { + "epoch": 0.935337552742616, + "grad_norm": 0.7220010757446289, + "learning_rate": 1.5785916139849725e-05, + "loss": 1.3887, + "step": 8867 + }, + { + "epoch": 0.9354430379746835, + "grad_norm": 0.6537818908691406, + "learning_rate": 1.573471849875352e-05, + "loss": 1.3614, + "step": 8868 + }, + { + "epoch": 0.9355485232067511, + "grad_norm": 0.6644393801689148, + "learning_rate": 1.568360313574349e-05, + "loss": 1.3671, + "step": 8869 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.6300392746925354, + "learning_rate": 1.5632570056547308e-05, + "loss": 1.3281, + "step": 8870 + }, + { + "epoch": 0.9357594936708861, + "grad_norm": 0.6541872024536133, + "learning_rate": 1.5581619266883563e-05, + "loss": 1.3931, + "step": 8871 + }, + { + "epoch": 0.9358649789029536, + "grad_norm": 0.632821798324585, + "learning_rate": 1.5530750772461522e-05, + "loss": 1.3277, + "step": 8872 + }, + { + "epoch": 0.935970464135021, + "grad_norm": 0.7055752873420715, + "learning_rate": 1.5479964578981293e-05, + "loss": 1.3879, + "step": 8873 + }, + { + "epoch": 0.9360759493670886, + "grad_norm": 0.6373599171638489, + "learning_rate": 1.5429260692133656e-05, + "loss": 1.3464, + "step": 8874 + }, + { + "epoch": 0.9361814345991561, + "grad_norm": 0.6970146894454956, + "learning_rate": 1.5378639117600234e-05, + "loss": 1.3457, + "step": 8875 + }, + { + "epoch": 0.9362869198312236, + "grad_norm": 0.7172166109085083, + "learning_rate": 1.532809986105349e-05, + "loss": 1.3846, + "step": 8876 + }, + { + "epoch": 0.9363924050632911, + "grad_norm": 0.6272805333137512, + "learning_rate": 1.527764292815656e-05, + "loss": 1.3611, + "step": 8877 + }, + { + "epoch": 0.9364978902953587, + "grad_norm": 0.6444631218910217, + "learning_rate": 1.522726832456342e-05, + "loss": 1.3902, + "step": 8878 + }, + { + "epoch": 0.9366033755274261, + "grad_norm": 0.6374657154083252, + "learning_rate": 1.517697605591864e-05, + "loss": 1.3761, + "step": 8879 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.6702525019645691, + "learning_rate": 1.512676612785796e-05, + "loss": 1.3705, + "step": 8880 + }, + { + "epoch": 0.9368143459915612, + "grad_norm": 0.7117735743522644, + "learning_rate": 1.5076638546007548e-05, + "loss": 1.3739, + "step": 8881 + }, + { + "epoch": 0.9369198312236287, + "grad_norm": 0.6686590313911438, + "learning_rate": 1.502659331598441e-05, + "loss": 1.4066, + "step": 8882 + }, + { + "epoch": 0.9370253164556962, + "grad_norm": 0.6284628510475159, + "learning_rate": 1.4976630443396395e-05, + "loss": 1.3382, + "step": 8883 + }, + { + "epoch": 0.9371308016877637, + "grad_norm": 0.6515147686004639, + "learning_rate": 1.4926749933842187e-05, + "loss": 1.3712, + "step": 8884 + }, + { + "epoch": 0.9372362869198312, + "grad_norm": 0.6329537034034729, + "learning_rate": 1.4876951792910987e-05, + "loss": 1.3463, + "step": 8885 + }, + { + "epoch": 0.9373417721518987, + "grad_norm": 0.6564114093780518, + "learning_rate": 1.4827236026182994e-05, + "loss": 1.3818, + "step": 8886 + }, + { + "epoch": 0.9374472573839663, + "grad_norm": 0.6531376242637634, + "learning_rate": 1.4777602639229004e-05, + "loss": 1.3553, + "step": 8887 + }, + { + "epoch": 0.9375527426160337, + "grad_norm": 0.6680524349212646, + "learning_rate": 1.4728051637610902e-05, + "loss": 1.3693, + "step": 8888 + }, + { + "epoch": 0.9376582278481013, + "grad_norm": 0.6442888379096985, + "learning_rate": 1.4678583026880993e-05, + "loss": 1.3697, + "step": 8889 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.6514520049095154, + "learning_rate": 1.4629196812582513e-05, + "loss": 1.3381, + "step": 8890 + }, + { + "epoch": 0.9378691983122363, + "grad_norm": 0.7143135070800781, + "learning_rate": 1.457989300024945e-05, + "loss": 1.3617, + "step": 8891 + }, + { + "epoch": 0.9379746835443038, + "grad_norm": 0.6364946961402893, + "learning_rate": 1.4530671595406469e-05, + "loss": 1.366, + "step": 8892 + }, + { + "epoch": 0.9380801687763713, + "grad_norm": 0.6567707061767578, + "learning_rate": 1.4481532603569076e-05, + "loss": 1.3379, + "step": 8893 + }, + { + "epoch": 0.9381856540084388, + "grad_norm": 0.7121529579162598, + "learning_rate": 1.4432476030243696e-05, + "loss": 1.3378, + "step": 8894 + }, + { + "epoch": 0.9382911392405063, + "grad_norm": 0.6547011733055115, + "learning_rate": 1.4383501880927103e-05, + "loss": 1.3686, + "step": 8895 + }, + { + "epoch": 0.9383966244725739, + "grad_norm": 0.709871768951416, + "learning_rate": 1.433461016110732e-05, + "loss": 1.3508, + "step": 8896 + }, + { + "epoch": 0.9385021097046413, + "grad_norm": 0.7701424956321716, + "learning_rate": 1.42858008762628e-05, + "loss": 1.3598, + "step": 8897 + }, + { + "epoch": 0.9386075949367089, + "grad_norm": 0.6766928434371948, + "learning_rate": 1.4237074031862918e-05, + "loss": 1.3272, + "step": 8898 + }, + { + "epoch": 0.9387130801687764, + "grad_norm": 0.7211193442344666, + "learning_rate": 1.4188429633367721e-05, + "loss": 1.3789, + "step": 8899 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.6261487603187561, + "learning_rate": 1.4139867686228102e-05, + "loss": 1.3572, + "step": 8900 + }, + { + "epoch": 0.9389240506329114, + "grad_norm": 0.6539885401725769, + "learning_rate": 1.4091388195885625e-05, + "loss": 1.3654, + "step": 8901 + }, + { + "epoch": 0.939029535864979, + "grad_norm": 0.6338141560554504, + "learning_rate": 1.404299116777269e-05, + "loss": 1.3383, + "step": 8902 + }, + { + "epoch": 0.9391350210970464, + "grad_norm": 0.6575031280517578, + "learning_rate": 1.3994676607312379e-05, + "loss": 1.3625, + "step": 8903 + }, + { + "epoch": 0.9392405063291139, + "grad_norm": 0.6545928716659546, + "learning_rate": 1.3946444519918611e-05, + "loss": 1.3652, + "step": 8904 + }, + { + "epoch": 0.9393459915611815, + "grad_norm": 0.6405527591705322, + "learning_rate": 1.3898294910995979e-05, + "loss": 1.3643, + "step": 8905 + }, + { + "epoch": 0.9394514767932489, + "grad_norm": 0.6217878460884094, + "learning_rate": 1.385022778594e-05, + "loss": 1.3983, + "step": 8906 + }, + { + "epoch": 0.9395569620253165, + "grad_norm": 0.637383759021759, + "learning_rate": 1.3802243150136784e-05, + "loss": 1.3444, + "step": 8907 + }, + { + "epoch": 0.939662447257384, + "grad_norm": 0.6174759268760681, + "learning_rate": 1.3754341008963194e-05, + "loss": 1.3411, + "step": 8908 + }, + { + "epoch": 0.9397679324894515, + "grad_norm": 0.6613740921020508, + "learning_rate": 1.370652136778694e-05, + "loss": 1.3462, + "step": 8909 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.6706771850585938, + "learning_rate": 1.3658784231966481e-05, + "loss": 1.3313, + "step": 8910 + }, + { + "epoch": 0.9399789029535865, + "grad_norm": 0.6400927901268005, + "learning_rate": 1.3611129606851041e-05, + "loss": 1.3457, + "step": 8911 + }, + { + "epoch": 0.940084388185654, + "grad_norm": 0.6227096319198608, + "learning_rate": 1.3563557497780432e-05, + "loss": 1.383, + "step": 8912 + }, + { + "epoch": 0.9401898734177215, + "grad_norm": 0.6215478181838989, + "learning_rate": 1.3516067910085306e-05, + "loss": 1.3638, + "step": 8913 + }, + { + "epoch": 0.9402953586497891, + "grad_norm": 0.6506956219673157, + "learning_rate": 1.3468660849087322e-05, + "loss": 1.3678, + "step": 8914 + }, + { + "epoch": 0.9404008438818565, + "grad_norm": 0.6362175345420837, + "learning_rate": 1.3421336320098565e-05, + "loss": 1.3507, + "step": 8915 + }, + { + "epoch": 0.9405063291139241, + "grad_norm": 0.6619945764541626, + "learning_rate": 1.3374094328422043e-05, + "loss": 1.4097, + "step": 8916 + }, + { + "epoch": 0.9406118143459916, + "grad_norm": 0.6512553691864014, + "learning_rate": 1.3326934879351272e-05, + "loss": 1.3748, + "step": 8917 + }, + { + "epoch": 0.940717299578059, + "grad_norm": 0.7990323901176453, + "learning_rate": 1.327985797817094e-05, + "loss": 1.3314, + "step": 8918 + }, + { + "epoch": 0.9408227848101266, + "grad_norm": 0.6475186347961426, + "learning_rate": 1.3232863630156077e-05, + "loss": 1.3552, + "step": 8919 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.6506619453430176, + "learning_rate": 1.3185951840572723e-05, + "loss": 1.3351, + "step": 8920 + }, + { + "epoch": 0.9410337552742616, + "grad_norm": 0.6749667525291443, + "learning_rate": 1.313912261467759e-05, + "loss": 1.3792, + "step": 8921 + }, + { + "epoch": 0.9411392405063291, + "grad_norm": 0.6277338862419128, + "learning_rate": 1.3092375957717978e-05, + "loss": 1.3622, + "step": 8922 + }, + { + "epoch": 0.9412447257383966, + "grad_norm": 0.65827476978302, + "learning_rate": 1.3045711874932281e-05, + "loss": 1.3607, + "step": 8923 + }, + { + "epoch": 0.9413502109704641, + "grad_norm": 0.6472083330154419, + "learning_rate": 1.2999130371549318e-05, + "loss": 1.3569, + "step": 8924 + }, + { + "epoch": 0.9414556962025317, + "grad_norm": 0.6572861671447754, + "learning_rate": 1.2952631452788826e-05, + "loss": 1.3968, + "step": 8925 + }, + { + "epoch": 0.9415611814345991, + "grad_norm": 0.6337788105010986, + "learning_rate": 1.2906215123861226e-05, + "loss": 1.3282, + "step": 8926 + }, + { + "epoch": 0.9416666666666667, + "grad_norm": 0.6631450653076172, + "learning_rate": 1.2859881389967687e-05, + "loss": 1.3841, + "step": 8927 + }, + { + "epoch": 0.9417721518987342, + "grad_norm": 0.6399299502372742, + "learning_rate": 1.2813630256300224e-05, + "loss": 1.3801, + "step": 8928 + }, + { + "epoch": 0.9418776371308016, + "grad_norm": 0.618498682975769, + "learning_rate": 1.2767461728041357e-05, + "loss": 1.3239, + "step": 8929 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.662862241268158, + "learning_rate": 1.2721375810364616e-05, + "loss": 1.3509, + "step": 8930 + }, + { + "epoch": 0.9420886075949367, + "grad_norm": 0.672252357006073, + "learning_rate": 1.267537250843412e-05, + "loss": 1.3403, + "step": 8931 + }, + { + "epoch": 0.9421940928270042, + "grad_norm": 0.6484048962593079, + "learning_rate": 1.2629451827404659e-05, + "loss": 1.3456, + "step": 8932 + }, + { + "epoch": 0.9422995780590717, + "grad_norm": 0.629730224609375, + "learning_rate": 1.258361377242212e-05, + "loss": 1.3672, + "step": 8933 + }, + { + "epoch": 0.9424050632911393, + "grad_norm": 0.6543170809745789, + "learning_rate": 1.2537858348622728e-05, + "loss": 1.3715, + "step": 8934 + }, + { + "epoch": 0.9425105485232067, + "grad_norm": 0.6489289999008179, + "learning_rate": 1.2492185561133545e-05, + "loss": 1.3791, + "step": 8935 + }, + { + "epoch": 0.9426160337552743, + "grad_norm": 0.6474031209945679, + "learning_rate": 1.2446595415072565e-05, + "loss": 1.3468, + "step": 8936 + }, + { + "epoch": 0.9427215189873418, + "grad_norm": 0.7231074571609497, + "learning_rate": 1.2401087915548365e-05, + "loss": 1.3732, + "step": 8937 + }, + { + "epoch": 0.9428270042194092, + "grad_norm": 0.6230294704437256, + "learning_rate": 1.2355663067660283e-05, + "loss": 1.3374, + "step": 8938 + }, + { + "epoch": 0.9429324894514768, + "grad_norm": 0.645980715751648, + "learning_rate": 1.2310320876498333e-05, + "loss": 1.3891, + "step": 8939 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.6134688258171082, + "learning_rate": 1.2265061347143447e-05, + "loss": 1.3639, + "step": 8940 + }, + { + "epoch": 0.9431434599156118, + "grad_norm": 0.6191325783729553, + "learning_rate": 1.2219884484667071e-05, + "loss": 1.3369, + "step": 8941 + }, + { + "epoch": 0.9432489451476793, + "grad_norm": 0.6570987105369568, + "learning_rate": 1.2174790294131405e-05, + "loss": 1.3597, + "step": 8942 + }, + { + "epoch": 0.9433544303797469, + "grad_norm": 0.6448532938957214, + "learning_rate": 1.2129778780589823e-05, + "loss": 1.3624, + "step": 8943 + }, + { + "epoch": 0.9434599156118143, + "grad_norm": 0.6665776371955872, + "learning_rate": 1.2084849949085791e-05, + "loss": 1.3945, + "step": 8944 + }, + { + "epoch": 0.9435654008438819, + "grad_norm": 0.6441079378128052, + "learning_rate": 1.2040003804653864e-05, + "loss": 1.3496, + "step": 8945 + }, + { + "epoch": 0.9436708860759494, + "grad_norm": 0.6634828448295593, + "learning_rate": 1.199524035231936e-05, + "loss": 1.3659, + "step": 8946 + }, + { + "epoch": 0.9437763713080168, + "grad_norm": 0.6330483555793762, + "learning_rate": 1.195055959709826e-05, + "loss": 1.3514, + "step": 8947 + }, + { + "epoch": 0.9438818565400844, + "grad_norm": 0.6722714900970459, + "learning_rate": 1.1905961543997147e-05, + "loss": 1.3551, + "step": 8948 + }, + { + "epoch": 0.9439873417721519, + "grad_norm": 0.6544966101646423, + "learning_rate": 1.186144619801352e-05, + "loss": 1.3927, + "step": 8949 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.6228639483451843, + "learning_rate": 1.1817013564135475e-05, + "loss": 1.3562, + "step": 8950 + }, + { + "epoch": 0.9441983122362869, + "grad_norm": 0.6459734439849854, + "learning_rate": 1.1772663647341947e-05, + "loss": 1.3517, + "step": 8951 + }, + { + "epoch": 0.9443037974683545, + "grad_norm": 0.6492713093757629, + "learning_rate": 1.1728396452602708e-05, + "loss": 1.3627, + "step": 8952 + }, + { + "epoch": 0.9444092827004219, + "grad_norm": 0.6865411996841431, + "learning_rate": 1.1684211984877957e-05, + "loss": 1.3816, + "step": 8953 + }, + { + "epoch": 0.9445147679324895, + "grad_norm": 0.627832293510437, + "learning_rate": 1.1640110249118818e-05, + "loss": 1.362, + "step": 8954 + }, + { + "epoch": 0.944620253164557, + "grad_norm": 0.624320924282074, + "learning_rate": 1.1596091250267171e-05, + "loss": 1.3953, + "step": 8955 + }, + { + "epoch": 0.9447257383966244, + "grad_norm": 0.6390889883041382, + "learning_rate": 1.1552154993255488e-05, + "loss": 1.3531, + "step": 8956 + }, + { + "epoch": 0.944831223628692, + "grad_norm": 0.6582934856414795, + "learning_rate": 1.1508301483007078e-05, + "loss": 1.3868, + "step": 8957 + }, + { + "epoch": 0.9449367088607595, + "grad_norm": 0.6188734769821167, + "learning_rate": 1.1464530724435928e-05, + "loss": 1.3372, + "step": 8958 + }, + { + "epoch": 0.945042194092827, + "grad_norm": 0.6332690715789795, + "learning_rate": 1.14208427224467e-05, + "loss": 1.3459, + "step": 8959 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.6491597294807434, + "learning_rate": 1.137723748193506e-05, + "loss": 1.3782, + "step": 8960 + }, + { + "epoch": 0.9452531645569621, + "grad_norm": 0.6652551889419556, + "learning_rate": 1.1333715007786932e-05, + "loss": 1.3382, + "step": 8961 + }, + { + "epoch": 0.9453586497890295, + "grad_norm": 0.6738382577896118, + "learning_rate": 1.12902753048795e-05, + "loss": 1.3604, + "step": 8962 + }, + { + "epoch": 0.945464135021097, + "grad_norm": 0.6401417255401611, + "learning_rate": 1.1246918378080202e-05, + "loss": 1.3742, + "step": 8963 + }, + { + "epoch": 0.9455696202531646, + "grad_norm": 0.6878994703292847, + "learning_rate": 1.12036442322474e-05, + "loss": 1.3615, + "step": 8964 + }, + { + "epoch": 0.945675105485232, + "grad_norm": 0.6624740958213806, + "learning_rate": 1.1160452872230303e-05, + "loss": 1.3786, + "step": 8965 + }, + { + "epoch": 0.9457805907172996, + "grad_norm": 0.653689980506897, + "learning_rate": 1.111734430286862e-05, + "loss": 1.4028, + "step": 8966 + }, + { + "epoch": 0.9458860759493671, + "grad_norm": 0.649126410484314, + "learning_rate": 1.1074318528992905e-05, + "loss": 1.3195, + "step": 8967 + }, + { + "epoch": 0.9459915611814346, + "grad_norm": 0.7056084871292114, + "learning_rate": 1.1031375555424466e-05, + "loss": 1.3736, + "step": 8968 + }, + { + "epoch": 0.9460970464135021, + "grad_norm": 0.6730154752731323, + "learning_rate": 1.0988515386975206e-05, + "loss": 1.3767, + "step": 8969 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.6734298467636108, + "learning_rate": 1.0945738028447783e-05, + "loss": 1.3567, + "step": 8970 + }, + { + "epoch": 0.9463080168776371, + "grad_norm": 0.6503632068634033, + "learning_rate": 1.0903043484635694e-05, + "loss": 1.3692, + "step": 8971 + }, + { + "epoch": 0.9464135021097047, + "grad_norm": 0.662321150302887, + "learning_rate": 1.0860431760323032e-05, + "loss": 1.3432, + "step": 8972 + }, + { + "epoch": 0.9465189873417722, + "grad_norm": 0.7088372111320496, + "learning_rate": 1.0817902860284723e-05, + "loss": 1.3114, + "step": 8973 + }, + { + "epoch": 0.9466244725738396, + "grad_norm": 0.6230581998825073, + "learning_rate": 1.0775456789286291e-05, + "loss": 1.3487, + "step": 8974 + }, + { + "epoch": 0.9467299578059072, + "grad_norm": 0.6515845060348511, + "learning_rate": 1.0733093552084016e-05, + "loss": 1.3803, + "step": 8975 + }, + { + "epoch": 0.9468354430379747, + "grad_norm": 0.6459628939628601, + "learning_rate": 1.0690813153425016e-05, + "loss": 1.3736, + "step": 8976 + }, + { + "epoch": 0.9469409282700422, + "grad_norm": 0.6562303304672241, + "learning_rate": 1.0648615598046834e-05, + "loss": 1.3628, + "step": 8977 + }, + { + "epoch": 0.9470464135021097, + "grad_norm": 0.6392783522605896, + "learning_rate": 1.0606500890678023e-05, + "loss": 1.3726, + "step": 8978 + }, + { + "epoch": 0.9471518987341773, + "grad_norm": 0.7035980224609375, + "learning_rate": 1.0564469036037722e-05, + "loss": 1.3238, + "step": 8979 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.6968185901641846, + "learning_rate": 1.0522520038835831e-05, + "loss": 1.3828, + "step": 8980 + }, + { + "epoch": 0.9473628691983123, + "grad_norm": 0.6580529808998108, + "learning_rate": 1.0480653903772924e-05, + "loss": 1.3683, + "step": 8981 + }, + { + "epoch": 0.9474683544303798, + "grad_norm": 0.6586958169937134, + "learning_rate": 1.0438870635540332e-05, + "loss": 1.3527, + "step": 8982 + }, + { + "epoch": 0.9475738396624472, + "grad_norm": 0.6515299677848816, + "learning_rate": 1.0397170238820142e-05, + "loss": 1.3841, + "step": 8983 + }, + { + "epoch": 0.9476793248945148, + "grad_norm": 0.652878999710083, + "learning_rate": 1.0355552718284949e-05, + "loss": 1.3848, + "step": 8984 + }, + { + "epoch": 0.9477848101265823, + "grad_norm": 0.6325267553329468, + "learning_rate": 1.0314018078598275e-05, + "loss": 1.3749, + "step": 8985 + }, + { + "epoch": 0.9478902953586498, + "grad_norm": 0.6316607594490051, + "learning_rate": 1.0272566324414313e-05, + "loss": 1.3668, + "step": 8986 + }, + { + "epoch": 0.9479957805907173, + "grad_norm": 0.651207447052002, + "learning_rate": 1.0231197460377845e-05, + "loss": 1.3525, + "step": 8987 + }, + { + "epoch": 0.9481012658227848, + "grad_norm": 0.6479745507240295, + "learning_rate": 1.0189911491124582e-05, + "loss": 1.3563, + "step": 8988 + }, + { + "epoch": 0.9482067510548523, + "grad_norm": 0.6582478284835815, + "learning_rate": 1.0148708421280822e-05, + "loss": 1.3763, + "step": 8989 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.6154480576515198, + "learning_rate": 1.0107588255463373e-05, + "loss": 1.345, + "step": 8990 + }, + { + "epoch": 0.9484177215189873, + "grad_norm": 0.6305621862411499, + "learning_rate": 1.0066550998280132e-05, + "loss": 1.326, + "step": 8991 + }, + { + "epoch": 0.9485232067510548, + "grad_norm": 0.7337284088134766, + "learning_rate": 1.0025596654329504e-05, + "loss": 1.3475, + "step": 8992 + }, + { + "epoch": 0.9486286919831224, + "grad_norm": 0.6372855305671692, + "learning_rate": 9.984725228200654e-06, + "loss": 1.3425, + "step": 8993 + }, + { + "epoch": 0.9487341772151898, + "grad_norm": 0.6460554599761963, + "learning_rate": 9.943936724473412e-06, + "loss": 1.3604, + "step": 8994 + }, + { + "epoch": 0.9488396624472574, + "grad_norm": 0.6884849071502686, + "learning_rate": 9.903231147718294e-06, + "loss": 1.4158, + "step": 8995 + }, + { + "epoch": 0.9489451476793249, + "grad_norm": 0.6485365629196167, + "learning_rate": 9.862608502496568e-06, + "loss": 1.347, + "step": 8996 + }, + { + "epoch": 0.9490506329113924, + "grad_norm": 0.6381001472473145, + "learning_rate": 9.822068793360172e-06, + "loss": 1.3672, + "step": 8997 + }, + { + "epoch": 0.9491561181434599, + "grad_norm": 0.6307768821716309, + "learning_rate": 9.781612024851893e-06, + "loss": 1.3803, + "step": 8998 + }, + { + "epoch": 0.9492616033755275, + "grad_norm": 0.7027404308319092, + "learning_rate": 9.74123820150502e-06, + "loss": 1.361, + "step": 8999 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.6306405067443848, + "learning_rate": 9.700947327843685e-06, + "loss": 1.3473, + "step": 9000 + }, + { + "epoch": 0.9494725738396624, + "grad_norm": 0.6394280791282654, + "learning_rate": 9.660739408382608e-06, + "loss": 1.3671, + "step": 9001 + }, + { + "epoch": 0.94957805907173, + "grad_norm": 0.7124308943748474, + "learning_rate": 9.620614447627435e-06, + "loss": 1.366, + "step": 9002 + }, + { + "epoch": 0.9496835443037974, + "grad_norm": 0.6738433241844177, + "learning_rate": 9.580572450074237e-06, + "loss": 1.3768, + "step": 9003 + }, + { + "epoch": 0.949789029535865, + "grad_norm": 0.6467238664627075, + "learning_rate": 9.540613420209927e-06, + "loss": 1.3328, + "step": 9004 + }, + { + "epoch": 0.9498945147679325, + "grad_norm": 0.6576680541038513, + "learning_rate": 9.500737362512168e-06, + "loss": 1.3204, + "step": 9005 + }, + { + "epoch": 0.95, + "grad_norm": 0.6504475474357605, + "learning_rate": 9.460944281449307e-06, + "loss": 1.365, + "step": 9006 + }, + { + "epoch": 0.9501054852320675, + "grad_norm": 0.6393879652023315, + "learning_rate": 9.421234181480275e-06, + "loss": 1.3814, + "step": 9007 + }, + { + "epoch": 0.950210970464135, + "grad_norm": 0.6713904738426208, + "learning_rate": 9.381607067054764e-06, + "loss": 1.3812, + "step": 9008 + }, + { + "epoch": 0.9503164556962025, + "grad_norm": 0.6290982961654663, + "learning_rate": 9.342062942613222e-06, + "loss": 1.3486, + "step": 9009 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.6742907166481018, + "learning_rate": 9.302601812586852e-06, + "loss": 1.352, + "step": 9010 + }, + { + "epoch": 0.9505274261603376, + "grad_norm": 0.6613414287567139, + "learning_rate": 9.26322368139737e-06, + "loss": 1.3313, + "step": 9011 + }, + { + "epoch": 0.950632911392405, + "grad_norm": 0.6508035659790039, + "learning_rate": 9.223928553457328e-06, + "loss": 1.3781, + "step": 9012 + }, + { + "epoch": 0.9507383966244726, + "grad_norm": 0.6547254920005798, + "learning_rate": 9.184716433169955e-06, + "loss": 1.3724, + "step": 9013 + }, + { + "epoch": 0.9508438818565401, + "grad_norm": 0.6518592834472656, + "learning_rate": 9.145587324929066e-06, + "loss": 1.3587, + "step": 9014 + }, + { + "epoch": 0.9509493670886076, + "grad_norm": 0.729763388633728, + "learning_rate": 9.106541233119409e-06, + "loss": 1.3507, + "step": 9015 + }, + { + "epoch": 0.9510548523206751, + "grad_norm": 0.6428207755088806, + "learning_rate": 9.06757816211623e-06, + "loss": 1.3111, + "step": 9016 + }, + { + "epoch": 0.9511603375527427, + "grad_norm": 0.6413525342941284, + "learning_rate": 9.028698116285538e-06, + "loss": 1.3318, + "step": 9017 + }, + { + "epoch": 0.9512658227848101, + "grad_norm": 0.6410609483718872, + "learning_rate": 8.989901099984016e-06, + "loss": 1.3752, + "step": 9018 + }, + { + "epoch": 0.9513713080168776, + "grad_norm": 0.625615119934082, + "learning_rate": 8.9511871175591e-06, + "loss": 1.3549, + "step": 9019 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.6350460052490234, + "learning_rate": 8.912556173348907e-06, + "loss": 1.3926, + "step": 9020 + }, + { + "epoch": 0.9515822784810126, + "grad_norm": 0.638404905796051, + "learning_rate": 8.874008271682222e-06, + "loss": 1.3689, + "step": 9021 + }, + { + "epoch": 0.9516877637130802, + "grad_norm": 0.6620716452598572, + "learning_rate": 8.835543416878422e-06, + "loss": 1.406, + "step": 9022 + }, + { + "epoch": 0.9517932489451477, + "grad_norm": 0.649927020072937, + "learning_rate": 8.797161613247728e-06, + "loss": 1.3972, + "step": 9023 + }, + { + "epoch": 0.9518987341772152, + "grad_norm": 0.642350435256958, + "learning_rate": 8.758862865091117e-06, + "loss": 1.3365, + "step": 9024 + }, + { + "epoch": 0.9520042194092827, + "grad_norm": 0.6524500250816345, + "learning_rate": 8.72064717670007e-06, + "loss": 1.3883, + "step": 9025 + }, + { + "epoch": 0.9521097046413503, + "grad_norm": 0.649325430393219, + "learning_rate": 8.68251455235683e-06, + "loss": 1.4184, + "step": 9026 + }, + { + "epoch": 0.9522151898734177, + "grad_norm": 0.7103158235549927, + "learning_rate": 8.644464996334395e-06, + "loss": 1.3537, + "step": 9027 + }, + { + "epoch": 0.9523206751054852, + "grad_norm": 0.6245574951171875, + "learning_rate": 8.606498512896438e-06, + "loss": 1.3631, + "step": 9028 + }, + { + "epoch": 0.9524261603375528, + "grad_norm": 0.6665302515029907, + "learning_rate": 8.568615106297223e-06, + "loss": 1.4006, + "step": 9029 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.6324486136436462, + "learning_rate": 8.53081478078177e-06, + "loss": 1.3555, + "step": 9030 + }, + { + "epoch": 0.9526371308016878, + "grad_norm": 0.633036732673645, + "learning_rate": 8.493097540585775e-06, + "loss": 1.327, + "step": 9031 + }, + { + "epoch": 0.9527426160337553, + "grad_norm": 0.6841399073600769, + "learning_rate": 8.455463389935774e-06, + "loss": 1.3877, + "step": 9032 + }, + { + "epoch": 0.9528481012658228, + "grad_norm": 0.6450932025909424, + "learning_rate": 8.417912333048727e-06, + "loss": 1.3727, + "step": 9033 + }, + { + "epoch": 0.9529535864978903, + "grad_norm": 0.6370425224304199, + "learning_rate": 8.380444374132517e-06, + "loss": 1.4019, + "step": 9034 + }, + { + "epoch": 0.9530590717299579, + "grad_norm": 0.6490717530250549, + "learning_rate": 8.343059517385454e-06, + "loss": 1.3349, + "step": 9035 + }, + { + "epoch": 0.9531645569620253, + "grad_norm": 0.6507613658905029, + "learning_rate": 8.305757766996935e-06, + "loss": 1.3208, + "step": 9036 + }, + { + "epoch": 0.9532700421940928, + "grad_norm": 0.622519314289093, + "learning_rate": 8.268539127146619e-06, + "loss": 1.3131, + "step": 9037 + }, + { + "epoch": 0.9533755274261604, + "grad_norm": 0.6765690445899963, + "learning_rate": 8.231403602005083e-06, + "loss": 1.3588, + "step": 9038 + }, + { + "epoch": 0.9534810126582278, + "grad_norm": 0.6702097058296204, + "learning_rate": 8.194351195733585e-06, + "loss": 1.3393, + "step": 9039 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.6418893337249756, + "learning_rate": 8.157381912484053e-06, + "loss": 1.363, + "step": 9040 + }, + { + "epoch": 0.9536919831223629, + "grad_norm": 0.6294135451316833, + "learning_rate": 8.120495756399005e-06, + "loss": 1.3743, + "step": 9041 + }, + { + "epoch": 0.9537974683544304, + "grad_norm": 0.6487946510314941, + "learning_rate": 8.08369273161172e-06, + "loss": 1.3692, + "step": 9042 + }, + { + "epoch": 0.9539029535864979, + "grad_norm": 0.6369738578796387, + "learning_rate": 8.046972842246147e-06, + "loss": 1.3358, + "step": 9043 + }, + { + "epoch": 0.9540084388185655, + "grad_norm": 0.6185550093650818, + "learning_rate": 8.01033609241708e-06, + "loss": 1.3567, + "step": 9044 + }, + { + "epoch": 0.9541139240506329, + "grad_norm": 0.6497821807861328, + "learning_rate": 7.973782486229737e-06, + "loss": 1.3753, + "step": 9045 + }, + { + "epoch": 0.9542194092827004, + "grad_norm": 0.6238925457000732, + "learning_rate": 7.937312027780169e-06, + "loss": 1.352, + "step": 9046 + }, + { + "epoch": 0.954324894514768, + "grad_norm": 0.6571165323257446, + "learning_rate": 7.900924721154945e-06, + "loss": 1.4158, + "step": 9047 + }, + { + "epoch": 0.9544303797468354, + "grad_norm": 0.6476583480834961, + "learning_rate": 7.864620570431635e-06, + "loss": 1.3464, + "step": 9048 + }, + { + "epoch": 0.954535864978903, + "grad_norm": 0.6351646184921265, + "learning_rate": 7.828399579678153e-06, + "loss": 1.3556, + "step": 9049 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.6587585210800171, + "learning_rate": 7.792261752953333e-06, + "loss": 1.3548, + "step": 9050 + }, + { + "epoch": 0.954746835443038, + "grad_norm": 0.6441763043403625, + "learning_rate": 7.756207094306605e-06, + "loss": 1.3819, + "step": 9051 + }, + { + "epoch": 0.9548523206751055, + "grad_norm": 0.6372100710868835, + "learning_rate": 7.720235607777987e-06, + "loss": 1.363, + "step": 9052 + }, + { + "epoch": 0.9549578059071729, + "grad_norm": 0.6483339667320251, + "learning_rate": 7.684347297398254e-06, + "loss": 1.3542, + "step": 9053 + }, + { + "epoch": 0.9550632911392405, + "grad_norm": 0.6316462755203247, + "learning_rate": 7.648542167189021e-06, + "loss": 1.3879, + "step": 9054 + }, + { + "epoch": 0.955168776371308, + "grad_norm": 0.6298888921737671, + "learning_rate": 7.612820221162331e-06, + "loss": 1.3476, + "step": 9055 + }, + { + "epoch": 0.9552742616033755, + "grad_norm": 0.7060683965682983, + "learning_rate": 7.577181463320981e-06, + "loss": 1.3825, + "step": 9056 + }, + { + "epoch": 0.955379746835443, + "grad_norm": 0.620552659034729, + "learning_rate": 7.541625897658444e-06, + "loss": 1.3544, + "step": 9057 + }, + { + "epoch": 0.9554852320675106, + "grad_norm": 0.6542927026748657, + "learning_rate": 7.506153528159032e-06, + "loss": 1.3797, + "step": 9058 + }, + { + "epoch": 0.955590717299578, + "grad_norm": 0.6649830341339111, + "learning_rate": 7.470764358797566e-06, + "loss": 1.352, + "step": 9059 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.6386170983314514, + "learning_rate": 7.435458393539457e-06, + "loss": 1.3523, + "step": 9060 + }, + { + "epoch": 0.9558016877637131, + "grad_norm": 0.6933517456054688, + "learning_rate": 7.400235636340957e-06, + "loss": 1.3882, + "step": 9061 + }, + { + "epoch": 0.9559071729957805, + "grad_norm": 0.6272974014282227, + "learning_rate": 7.3650960911490764e-06, + "loss": 1.3747, + "step": 9062 + }, + { + "epoch": 0.9560126582278481, + "grad_norm": 0.6267911195755005, + "learning_rate": 7.330039761901247e-06, + "loss": 1.368, + "step": 9063 + }, + { + "epoch": 0.9561181434599156, + "grad_norm": 0.641906201839447, + "learning_rate": 7.295066652525828e-06, + "loss": 1.354, + "step": 9064 + }, + { + "epoch": 0.9562236286919831, + "grad_norm": 0.6295943856239319, + "learning_rate": 7.260176766941601e-06, + "loss": 1.3824, + "step": 9065 + }, + { + "epoch": 0.9563291139240506, + "grad_norm": 0.6190171241760254, + "learning_rate": 7.225370109058188e-06, + "loss": 1.3507, + "step": 9066 + }, + { + "epoch": 0.9564345991561182, + "grad_norm": 0.6455026268959045, + "learning_rate": 7.190646682775886e-06, + "loss": 1.383, + "step": 9067 + }, + { + "epoch": 0.9565400843881856, + "grad_norm": 0.6410238742828369, + "learning_rate": 7.1560064919855835e-06, + "loss": 1.3589, + "step": 9068 + }, + { + "epoch": 0.9566455696202532, + "grad_norm": 0.7084226608276367, + "learning_rate": 7.121449540568842e-06, + "loss": 1.3572, + "step": 9069 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.6286916732788086, + "learning_rate": 7.086975832398146e-06, + "loss": 1.3375, + "step": 9070 + }, + { + "epoch": 0.9568565400843881, + "grad_norm": 0.6153943538665771, + "learning_rate": 7.0525853713362395e-06, + "loss": 1.3946, + "step": 9071 + }, + { + "epoch": 0.9569620253164557, + "grad_norm": 0.6732749342918396, + "learning_rate": 7.018278161236791e-06, + "loss": 1.3428, + "step": 9072 + }, + { + "epoch": 0.9570675105485232, + "grad_norm": 0.6878830790519714, + "learning_rate": 6.984054205944141e-06, + "loss": 1.3834, + "step": 9073 + }, + { + "epoch": 0.9571729957805907, + "grad_norm": 0.6313354969024658, + "learning_rate": 6.949913509293221e-06, + "loss": 1.3866, + "step": 9074 + }, + { + "epoch": 0.9572784810126582, + "grad_norm": 0.6384416818618774, + "learning_rate": 6.915856075109722e-06, + "loss": 1.3463, + "step": 9075 + }, + { + "epoch": 0.9573839662447258, + "grad_norm": 0.6693029403686523, + "learning_rate": 6.881881907209841e-06, + "loss": 1.3299, + "step": 9076 + }, + { + "epoch": 0.9574894514767932, + "grad_norm": 0.6285623908042908, + "learning_rate": 6.847991009400617e-06, + "loss": 1.3846, + "step": 9077 + }, + { + "epoch": 0.9575949367088608, + "grad_norm": 0.688805878162384, + "learning_rate": 6.814183385479677e-06, + "loss": 1.3891, + "step": 9078 + }, + { + "epoch": 0.9577004219409283, + "grad_norm": 0.6357269883155823, + "learning_rate": 6.780459039235409e-06, + "loss": 1.3228, + "step": 9079 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.6514778733253479, + "learning_rate": 6.746817974446706e-06, + "loss": 1.3885, + "step": 9080 + }, + { + "epoch": 0.9579113924050633, + "grad_norm": 0.6541258096694946, + "learning_rate": 6.71326019488322e-06, + "loss": 1.383, + "step": 9081 + }, + { + "epoch": 0.9580168776371308, + "grad_norm": 0.7572007179260254, + "learning_rate": 6.679785704305358e-06, + "loss": 1.3757, + "step": 9082 + }, + { + "epoch": 0.9581223628691983, + "grad_norm": 0.6175175905227661, + "learning_rate": 6.6463945064639544e-06, + "loss": 1.364, + "step": 9083 + }, + { + "epoch": 0.9582278481012658, + "grad_norm": 0.622224748134613, + "learning_rate": 6.6130866051007654e-06, + "loss": 1.3673, + "step": 9084 + }, + { + "epoch": 0.9583333333333334, + "grad_norm": 0.6572061777114868, + "learning_rate": 6.57986200394814e-06, + "loss": 1.3338, + "step": 9085 + }, + { + "epoch": 0.9584388185654008, + "grad_norm": 0.6614440679550171, + "learning_rate": 6.546720706728931e-06, + "loss": 1.3588, + "step": 9086 + }, + { + "epoch": 0.9585443037974684, + "grad_norm": 0.6915423274040222, + "learning_rate": 6.513662717156838e-06, + "loss": 1.3559, + "step": 9087 + }, + { + "epoch": 0.9586497890295359, + "grad_norm": 0.6298391222953796, + "learning_rate": 6.480688038936311e-06, + "loss": 1.3769, + "step": 9088 + }, + { + "epoch": 0.9587552742616033, + "grad_norm": 0.6498953104019165, + "learning_rate": 6.447796675762146e-06, + "loss": 1.3702, + "step": 9089 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.6683400273323059, + "learning_rate": 6.414988631320062e-06, + "loss": 1.3952, + "step": 9090 + }, + { + "epoch": 0.9589662447257384, + "grad_norm": 0.6234251260757446, + "learning_rate": 6.3822639092862846e-06, + "loss": 1.3293, + "step": 9091 + }, + { + "epoch": 0.9590717299578059, + "grad_norm": 0.7260818481445312, + "learning_rate": 6.349622513327963e-06, + "loss": 1.347, + "step": 9092 + }, + { + "epoch": 0.9591772151898734, + "grad_norm": 0.636389970779419, + "learning_rate": 6.317064447102505e-06, + "loss": 1.3585, + "step": 9093 + }, + { + "epoch": 0.959282700421941, + "grad_norm": 0.6552330851554871, + "learning_rate": 6.28458971425841e-06, + "loss": 1.3291, + "step": 9094 + }, + { + "epoch": 0.9593881856540084, + "grad_norm": 0.6587387919425964, + "learning_rate": 6.252198318434432e-06, + "loss": 1.34, + "step": 9095 + }, + { + "epoch": 0.959493670886076, + "grad_norm": 0.6629188060760498, + "learning_rate": 6.219890263260336e-06, + "loss": 1.3799, + "step": 9096 + }, + { + "epoch": 0.9595991561181435, + "grad_norm": 0.6684183478355408, + "learning_rate": 6.187665552356392e-06, + "loss": 1.3937, + "step": 9097 + }, + { + "epoch": 0.9597046413502109, + "grad_norm": 0.6366636753082275, + "learning_rate": 6.155524189333461e-06, + "loss": 1.3269, + "step": 9098 + }, + { + "epoch": 0.9598101265822785, + "grad_norm": 0.6916595101356506, + "learning_rate": 6.123466177793247e-06, + "loss": 1.3053, + "step": 9099 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.6480914354324341, + "learning_rate": 6.091491521327958e-06, + "loss": 1.3587, + "step": 9100 + }, + { + "epoch": 0.9600210970464135, + "grad_norm": 0.6694323420524597, + "learning_rate": 6.059600223520478e-06, + "loss": 1.3811, + "step": 9101 + }, + { + "epoch": 0.960126582278481, + "grad_norm": 0.6392681002616882, + "learning_rate": 6.027792287944367e-06, + "loss": 1.3427, + "step": 9102 + }, + { + "epoch": 0.9602320675105486, + "grad_norm": 0.6419055461883545, + "learning_rate": 5.996067718163939e-06, + "loss": 1.3387, + "step": 9103 + }, + { + "epoch": 0.960337552742616, + "grad_norm": 0.6252624988555908, + "learning_rate": 5.964426517734101e-06, + "loss": 1.3646, + "step": 9104 + }, + { + "epoch": 0.9604430379746836, + "grad_norm": 0.6551651954650879, + "learning_rate": 5.932868690200266e-06, + "loss": 1.3589, + "step": 9105 + }, + { + "epoch": 0.9605485232067511, + "grad_norm": 0.6523978114128113, + "learning_rate": 5.901394239098856e-06, + "loss": 1.3629, + "step": 9106 + }, + { + "epoch": 0.9606540084388185, + "grad_norm": 0.619875431060791, + "learning_rate": 5.870003167956634e-06, + "loss": 1.3486, + "step": 9107 + }, + { + "epoch": 0.9607594936708861, + "grad_norm": 0.6458244323730469, + "learning_rate": 5.838695480291034e-06, + "loss": 1.3337, + "step": 9108 + }, + { + "epoch": 0.9608649789029536, + "grad_norm": 0.6433133482933044, + "learning_rate": 5.807471179610418e-06, + "loss": 1.3865, + "step": 9109 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.6634728312492371, + "learning_rate": 5.776330269413488e-06, + "loss": 1.3437, + "step": 9110 + }, + { + "epoch": 0.9610759493670886, + "grad_norm": 0.631914496421814, + "learning_rate": 5.745272753189784e-06, + "loss": 1.3583, + "step": 9111 + }, + { + "epoch": 0.9611814345991562, + "grad_norm": 0.677562415599823, + "learning_rate": 5.714298634419524e-06, + "loss": 1.3713, + "step": 9112 + }, + { + "epoch": 0.9612869198312236, + "grad_norm": 0.6443235874176025, + "learning_rate": 5.6834079165733464e-06, + "loss": 1.3552, + "step": 9113 + }, + { + "epoch": 0.9613924050632912, + "grad_norm": 0.6223993301391602, + "learning_rate": 5.652600603112818e-06, + "loss": 1.359, + "step": 9114 + }, + { + "epoch": 0.9614978902953587, + "grad_norm": 0.6322022676467896, + "learning_rate": 5.6218766974900915e-06, + "loss": 1.3583, + "step": 9115 + }, + { + "epoch": 0.9616033755274261, + "grad_norm": 0.6425161957740784, + "learning_rate": 5.591236203147915e-06, + "loss": 1.4066, + "step": 9116 + }, + { + "epoch": 0.9617088607594937, + "grad_norm": 0.6359859704971313, + "learning_rate": 5.560679123519624e-06, + "loss": 1.3664, + "step": 9117 + }, + { + "epoch": 0.9618143459915611, + "grad_norm": 0.6782578825950623, + "learning_rate": 5.530205462029314e-06, + "loss": 1.4092, + "step": 9118 + }, + { + "epoch": 0.9619198312236287, + "grad_norm": 0.6501981616020203, + "learning_rate": 5.499815222091836e-06, + "loss": 1.385, + "step": 9119 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.66506427526474, + "learning_rate": 5.469508407112467e-06, + "loss": 1.3096, + "step": 9120 + }, + { + "epoch": 0.9621308016877637, + "grad_norm": 0.6468316912651062, + "learning_rate": 5.439285020487156e-06, + "loss": 1.3679, + "step": 9121 + }, + { + "epoch": 0.9622362869198312, + "grad_norm": 0.6556439995765686, + "learning_rate": 5.409145065602694e-06, + "loss": 1.3306, + "step": 9122 + }, + { + "epoch": 0.9623417721518988, + "grad_norm": 0.6394083499908447, + "learning_rate": 5.379088545836464e-06, + "loss": 1.3642, + "step": 9123 + }, + { + "epoch": 0.9624472573839662, + "grad_norm": 0.6203057169914246, + "learning_rate": 5.349115464556354e-06, + "loss": 1.337, + "step": 9124 + }, + { + "epoch": 0.9625527426160337, + "grad_norm": 0.6322455406188965, + "learning_rate": 5.319225825120927e-06, + "loss": 1.3291, + "step": 9125 + }, + { + "epoch": 0.9626582278481013, + "grad_norm": 0.6404223442077637, + "learning_rate": 5.289419630879672e-06, + "loss": 1.3329, + "step": 9126 + }, + { + "epoch": 0.9627637130801687, + "grad_norm": 0.6391290426254272, + "learning_rate": 5.2596968851724155e-06, + "loss": 1.3686, + "step": 9127 + }, + { + "epoch": 0.9628691983122363, + "grad_norm": 0.6277337074279785, + "learning_rate": 5.230057591329662e-06, + "loss": 1.3257, + "step": 9128 + }, + { + "epoch": 0.9629746835443038, + "grad_norm": 0.6361364722251892, + "learning_rate": 5.200501752672754e-06, + "loss": 1.3203, + "step": 9129 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.6522839069366455, + "learning_rate": 5.171029372513458e-06, + "loss": 1.4265, + "step": 9130 + }, + { + "epoch": 0.9631856540084388, + "grad_norm": 0.6337528824806213, + "learning_rate": 5.141640454154467e-06, + "loss": 1.3842, + "step": 9131 + }, + { + "epoch": 0.9632911392405064, + "grad_norm": 0.62589430809021, + "learning_rate": 5.112335000888813e-06, + "loss": 1.3961, + "step": 9132 + }, + { + "epoch": 0.9633966244725738, + "grad_norm": 0.6561613082885742, + "learning_rate": 5.083113016000368e-06, + "loss": 1.4119, + "step": 9133 + }, + { + "epoch": 0.9635021097046413, + "grad_norm": 0.6470049619674683, + "learning_rate": 5.053974502763681e-06, + "loss": 1.3396, + "step": 9134 + }, + { + "epoch": 0.9636075949367089, + "grad_norm": 0.6641644239425659, + "learning_rate": 5.024919464443723e-06, + "loss": 1.3429, + "step": 9135 + }, + { + "epoch": 0.9637130801687763, + "grad_norm": 0.6324298977851868, + "learning_rate": 4.995947904296305e-06, + "loss": 1.3769, + "step": 9136 + }, + { + "epoch": 0.9638185654008439, + "grad_norm": 0.6331436634063721, + "learning_rate": 4.967059825567832e-06, + "loss": 1.373, + "step": 9137 + }, + { + "epoch": 0.9639240506329114, + "grad_norm": 0.6213120222091675, + "learning_rate": 4.938255231495464e-06, + "loss": 1.3703, + "step": 9138 + }, + { + "epoch": 0.9640295358649789, + "grad_norm": 0.6258267164230347, + "learning_rate": 4.909534125306702e-06, + "loss": 1.3684, + "step": 9139 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.6857502460479736, + "learning_rate": 4.880896510220056e-06, + "loss": 1.3824, + "step": 9140 + }, + { + "epoch": 0.964240506329114, + "grad_norm": 0.6986061930656433, + "learning_rate": 4.852342389444458e-06, + "loss": 1.3406, + "step": 9141 + }, + { + "epoch": 0.9643459915611814, + "grad_norm": 0.6416981816291809, + "learning_rate": 4.823871766179516e-06, + "loss": 1.3508, + "step": 9142 + }, + { + "epoch": 0.9644514767932489, + "grad_norm": 0.6474980115890503, + "learning_rate": 4.7954846436155104e-06, + "loss": 1.3629, + "step": 9143 + }, + { + "epoch": 0.9645569620253165, + "grad_norm": 0.6638444066047668, + "learning_rate": 4.767181024933398e-06, + "loss": 1.362, + "step": 9144 + }, + { + "epoch": 0.9646624472573839, + "grad_norm": 0.6313378810882568, + "learning_rate": 4.738960913304724e-06, + "loss": 1.3502, + "step": 9145 + }, + { + "epoch": 0.9647679324894515, + "grad_norm": 0.6447040438652039, + "learning_rate": 4.710824311891709e-06, + "loss": 1.3796, + "step": 9146 + }, + { + "epoch": 0.964873417721519, + "grad_norm": 0.6364414691925049, + "learning_rate": 4.682771223847166e-06, + "loss": 1.383, + "step": 9147 + }, + { + "epoch": 0.9649789029535865, + "grad_norm": 0.628044605255127, + "learning_rate": 4.654801652314577e-06, + "loss": 1.345, + "step": 9148 + }, + { + "epoch": 0.965084388185654, + "grad_norm": 0.6350790858268738, + "learning_rate": 4.626915600428105e-06, + "loss": 1.3659, + "step": 9149 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.6447082161903381, + "learning_rate": 4.5991130713124995e-06, + "loss": 1.348, + "step": 9150 + }, + { + "epoch": 0.965295358649789, + "grad_norm": 0.6540078520774841, + "learning_rate": 4.571394068083185e-06, + "loss": 1.3281, + "step": 9151 + }, + { + "epoch": 0.9654008438818565, + "grad_norm": 0.637992262840271, + "learning_rate": 4.543758593846175e-06, + "loss": 1.3841, + "step": 9152 + }, + { + "epoch": 0.9655063291139241, + "grad_norm": 0.6215776801109314, + "learning_rate": 4.516206651698246e-06, + "loss": 1.3844, + "step": 9153 + }, + { + "epoch": 0.9656118143459915, + "grad_norm": 0.6314142942428589, + "learning_rate": 4.488738244726593e-06, + "loss": 1.3752, + "step": 9154 + }, + { + "epoch": 0.9657172995780591, + "grad_norm": 0.636328935623169, + "learning_rate": 4.4613533760093365e-06, + "loss": 1.3807, + "step": 9155 + }, + { + "epoch": 0.9658227848101266, + "grad_norm": 0.6428817510604858, + "learning_rate": 4.434052048615022e-06, + "loss": 1.3588, + "step": 9156 + }, + { + "epoch": 0.9659282700421941, + "grad_norm": 0.6948763132095337, + "learning_rate": 4.4068342656028715e-06, + "loss": 1.3528, + "step": 9157 + }, + { + "epoch": 0.9660337552742616, + "grad_norm": 0.6317366361618042, + "learning_rate": 4.37970003002286e-06, + "loss": 1.3879, + "step": 9158 + }, + { + "epoch": 0.9661392405063292, + "grad_norm": 0.6349298357963562, + "learning_rate": 4.352649344915471e-06, + "loss": 1.3429, + "step": 9159 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.6322272419929504, + "learning_rate": 4.325682213311782e-06, + "loss": 1.3746, + "step": 9160 + }, + { + "epoch": 0.9663502109704641, + "grad_norm": 0.6261911392211914, + "learning_rate": 4.298798638233709e-06, + "loss": 1.3234, + "step": 9161 + }, + { + "epoch": 0.9664556962025317, + "grad_norm": 0.6401295065879822, + "learning_rate": 4.271998622693674e-06, + "loss": 1.3498, + "step": 9162 + }, + { + "epoch": 0.9665611814345991, + "grad_norm": 0.6263173818588257, + "learning_rate": 4.245282169694692e-06, + "loss": 1.3251, + "step": 9163 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 0.6888350248336792, + "learning_rate": 4.218649282230536e-06, + "loss": 1.4025, + "step": 9164 + }, + { + "epoch": 0.9667721518987342, + "grad_norm": 0.6893624663352966, + "learning_rate": 4.192099963285484e-06, + "loss": 1.405, + "step": 9165 + }, + { + "epoch": 0.9668776371308017, + "grad_norm": 0.7494513392448425, + "learning_rate": 4.165634215834574e-06, + "loss": 1.3812, + "step": 9166 + }, + { + "epoch": 0.9669831223628692, + "grad_norm": 0.6195637583732605, + "learning_rate": 4.139252042843517e-06, + "loss": 1.3695, + "step": 9167 + }, + { + "epoch": 0.9670886075949368, + "grad_norm": 0.6503784656524658, + "learning_rate": 4.112953447268364e-06, + "loss": 1.3569, + "step": 9168 + }, + { + "epoch": 0.9671940928270042, + "grad_norm": 0.6259627938270569, + "learning_rate": 4.086738432056092e-06, + "loss": 1.3978, + "step": 9169 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.629814863204956, + "learning_rate": 4.060607000144351e-06, + "loss": 1.3246, + "step": 9170 + }, + { + "epoch": 0.9674050632911393, + "grad_norm": 0.6470849514007568, + "learning_rate": 4.034559154461049e-06, + "loss": 1.3121, + "step": 9171 + }, + { + "epoch": 0.9675105485232067, + "grad_norm": 0.63365238904953, + "learning_rate": 4.008594897925183e-06, + "loss": 1.3974, + "step": 9172 + }, + { + "epoch": 0.9676160337552743, + "grad_norm": 0.6289753317832947, + "learning_rate": 3.982714233446094e-06, + "loss": 1.3754, + "step": 9173 + }, + { + "epoch": 0.9677215189873418, + "grad_norm": 0.7102535367012024, + "learning_rate": 3.956917163923879e-06, + "loss": 1.3638, + "step": 9174 + }, + { + "epoch": 0.9678270042194093, + "grad_norm": 0.6355726718902588, + "learning_rate": 3.931203692249141e-06, + "loss": 1.3421, + "step": 9175 + }, + { + "epoch": 0.9679324894514768, + "grad_norm": 0.6406049728393555, + "learning_rate": 3.905573821303327e-06, + "loss": 1.3434, + "step": 9176 + }, + { + "epoch": 0.9680379746835444, + "grad_norm": 0.6274168491363525, + "learning_rate": 3.880027553958304e-06, + "loss": 1.3632, + "step": 9177 + }, + { + "epoch": 0.9681434599156118, + "grad_norm": 0.6698519587516785, + "learning_rate": 3.8545648930767005e-06, + "loss": 1.3771, + "step": 9178 + }, + { + "epoch": 0.9682489451476793, + "grad_norm": 0.6482182741165161, + "learning_rate": 3.8291858415117344e-06, + "loss": 1.3686, + "step": 9179 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.628389835357666, + "learning_rate": 3.803890402107213e-06, + "loss": 1.3319, + "step": 9180 + }, + { + "epoch": 0.9684599156118143, + "grad_norm": 0.6785874962806702, + "learning_rate": 3.7786785776976198e-06, + "loss": 1.3705, + "step": 9181 + }, + { + "epoch": 0.9685654008438819, + "grad_norm": 0.6599942445755005, + "learning_rate": 3.7535503711080276e-06, + "loss": 1.3548, + "step": 9182 + }, + { + "epoch": 0.9686708860759494, + "grad_norm": 0.632425844669342, + "learning_rate": 3.7285057851543515e-06, + "loss": 1.2837, + "step": 9183 + }, + { + "epoch": 0.9687763713080169, + "grad_norm": 0.7012710571289062, + "learning_rate": 3.703544822642846e-06, + "loss": 1.3744, + "step": 9184 + }, + { + "epoch": 0.9688818565400844, + "grad_norm": 0.6390120983123779, + "learning_rate": 3.6786674863704406e-06, + "loss": 1.3741, + "step": 9185 + }, + { + "epoch": 0.9689873417721518, + "grad_norm": 0.641294538974762, + "learning_rate": 3.6538737791249053e-06, + "loss": 1.3532, + "step": 9186 + }, + { + "epoch": 0.9690928270042194, + "grad_norm": 0.6857505440711975, + "learning_rate": 3.629163703684352e-06, + "loss": 1.3731, + "step": 9187 + }, + { + "epoch": 0.9691983122362869, + "grad_norm": 0.6290237903594971, + "learning_rate": 3.604537262817814e-06, + "loss": 1.3672, + "step": 9188 + }, + { + "epoch": 0.9693037974683544, + "grad_norm": 0.6762758493423462, + "learning_rate": 3.579994459284752e-06, + "loss": 1.3842, + "step": 9189 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.6260939836502075, + "learning_rate": 3.555535295835216e-06, + "loss": 1.3323, + "step": 9190 + }, + { + "epoch": 0.9695147679324895, + "grad_norm": 0.6988619565963745, + "learning_rate": 3.5311597752100964e-06, + "loss": 1.3968, + "step": 9191 + }, + { + "epoch": 0.9696202531645569, + "grad_norm": 0.63655024766922, + "learning_rate": 3.506867900140792e-06, + "loss": 1.3668, + "step": 9192 + }, + { + "epoch": 0.9697257383966245, + "grad_norm": 0.6550729274749756, + "learning_rate": 3.4826596733492087e-06, + "loss": 1.333, + "step": 9193 + }, + { + "epoch": 0.969831223628692, + "grad_norm": 0.6400544047355652, + "learning_rate": 3.4585350975481766e-06, + "loss": 1.3659, + "step": 9194 + }, + { + "epoch": 0.9699367088607594, + "grad_norm": 0.6576273441314697, + "learning_rate": 3.4344941754408663e-06, + "loss": 1.3522, + "step": 9195 + }, + { + "epoch": 0.970042194092827, + "grad_norm": 0.6391363143920898, + "learning_rate": 3.4105369097211238e-06, + "loss": 1.3565, + "step": 9196 + }, + { + "epoch": 0.9701476793248945, + "grad_norm": 0.6292679905891418, + "learning_rate": 3.386663303073634e-06, + "loss": 1.3732, + "step": 9197 + }, + { + "epoch": 0.970253164556962, + "grad_norm": 0.6567360758781433, + "learning_rate": 3.362873358173424e-06, + "loss": 1.3306, + "step": 9198 + }, + { + "epoch": 0.9703586497890295, + "grad_norm": 0.636402428150177, + "learning_rate": 3.339167077686278e-06, + "loss": 1.3885, + "step": 9199 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.6358939409255981, + "learning_rate": 3.3155444642687384e-06, + "loss": 1.3289, + "step": 9200 + }, + { + "epoch": 0.9705696202531645, + "grad_norm": 0.6646636724472046, + "learning_rate": 3.2920055205676867e-06, + "loss": 1.3755, + "step": 9201 + }, + { + "epoch": 0.9706751054852321, + "grad_norm": 0.6661490201950073, + "learning_rate": 3.2685502492208475e-06, + "loss": 1.3834, + "step": 9202 + }, + { + "epoch": 0.9707805907172996, + "grad_norm": 0.633707582950592, + "learning_rate": 3.245178652856534e-06, + "loss": 1.3295, + "step": 9203 + }, + { + "epoch": 0.970886075949367, + "grad_norm": 0.6374852061271667, + "learning_rate": 3.221890734093569e-06, + "loss": 1.3731, + "step": 9204 + }, + { + "epoch": 0.9709915611814346, + "grad_norm": 0.634879469871521, + "learning_rate": 3.198686495541531e-06, + "loss": 1.3786, + "step": 9205 + }, + { + "epoch": 0.9710970464135021, + "grad_norm": 0.6653866767883301, + "learning_rate": 3.1755659398005066e-06, + "loss": 1.3265, + "step": 9206 + }, + { + "epoch": 0.9712025316455696, + "grad_norm": 0.6651393175125122, + "learning_rate": 3.152529069461424e-06, + "loss": 1.3792, + "step": 9207 + }, + { + "epoch": 0.9713080168776371, + "grad_norm": 0.6719794869422913, + "learning_rate": 3.129575887105468e-06, + "loss": 1.3522, + "step": 9208 + }, + { + "epoch": 0.9714135021097047, + "grad_norm": 0.6343987584114075, + "learning_rate": 3.1067063953048313e-06, + "loss": 1.3356, + "step": 9209 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.6742860674858093, + "learning_rate": 3.0839205966220474e-06, + "loss": 1.3516, + "step": 9210 + }, + { + "epoch": 0.9716244725738397, + "grad_norm": 0.7439141273498535, + "learning_rate": 3.06121849361049e-06, + "loss": 1.3721, + "step": 9211 + }, + { + "epoch": 0.9717299578059072, + "grad_norm": 0.6476523876190186, + "learning_rate": 3.0386000888139588e-06, + "loss": 1.3822, + "step": 9212 + }, + { + "epoch": 0.9718354430379746, + "grad_norm": 0.6568520069122314, + "learning_rate": 3.0160653847669252e-06, + "loss": 1.3988, + "step": 9213 + }, + { + "epoch": 0.9719409282700422, + "grad_norm": 0.6366230845451355, + "learning_rate": 2.9936143839946193e-06, + "loss": 1.3449, + "step": 9214 + }, + { + "epoch": 0.9720464135021097, + "grad_norm": 0.6252294778823853, + "learning_rate": 2.9712470890126962e-06, + "loss": 1.3518, + "step": 9215 + }, + { + "epoch": 0.9721518987341772, + "grad_norm": 0.6292774677276611, + "learning_rate": 2.9489635023275676e-06, + "loss": 1.3236, + "step": 9216 + }, + { + "epoch": 0.9722573839662447, + "grad_norm": 0.6490638256072998, + "learning_rate": 2.9267636264361517e-06, + "loss": 1.3597, + "step": 9217 + }, + { + "epoch": 0.9723628691983123, + "grad_norm": 0.6263503432273865, + "learning_rate": 2.90464746382621e-06, + "loss": 1.325, + "step": 9218 + }, + { + "epoch": 0.9724683544303797, + "grad_norm": 0.6582847833633423, + "learning_rate": 2.8826150169758425e-06, + "loss": 1.3617, + "step": 9219 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.6512768864631653, + "learning_rate": 2.8606662883539082e-06, + "loss": 1.3627, + "step": 9220 + }, + { + "epoch": 0.9726793248945148, + "grad_norm": 0.666912853717804, + "learning_rate": 2.838801280419856e-06, + "loss": 1.3765, + "step": 9221 + }, + { + "epoch": 0.9727848101265822, + "grad_norm": 0.6345261931419373, + "learning_rate": 2.817019995623893e-06, + "loss": 1.3283, + "step": 9222 + }, + { + "epoch": 0.9728902953586498, + "grad_norm": 0.6287469863891602, + "learning_rate": 2.7953224364065667e-06, + "loss": 1.3588, + "step": 9223 + }, + { + "epoch": 0.9729957805907173, + "grad_norm": 0.7152419686317444, + "learning_rate": 2.7737086051992653e-06, + "loss": 1.3426, + "step": 9224 + }, + { + "epoch": 0.9731012658227848, + "grad_norm": 0.6516245603561401, + "learning_rate": 2.752178504423969e-06, + "loss": 1.3411, + "step": 9225 + }, + { + "epoch": 0.9732067510548523, + "grad_norm": 0.6869879364967346, + "learning_rate": 2.7307321364930804e-06, + "loss": 1.365, + "step": 9226 + }, + { + "epoch": 0.9733122362869199, + "grad_norm": 0.6608257293701172, + "learning_rate": 2.7093695038099277e-06, + "loss": 1.3254, + "step": 9227 + }, + { + "epoch": 0.9734177215189873, + "grad_norm": 0.6745597720146179, + "learning_rate": 2.6880906087682622e-06, + "loss": 1.3536, + "step": 9228 + }, + { + "epoch": 0.9735232067510549, + "grad_norm": 0.7104049324989319, + "learning_rate": 2.66689545375251e-06, + "loss": 1.3256, + "step": 9229 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.6613138318061829, + "learning_rate": 2.6457840411376888e-06, + "loss": 1.3758, + "step": 9230 + }, + { + "epoch": 0.9737341772151898, + "grad_norm": 0.6502285599708557, + "learning_rate": 2.624756373289322e-06, + "loss": 1.3704, + "step": 9231 + }, + { + "epoch": 0.9738396624472574, + "grad_norm": 0.6421935558319092, + "learning_rate": 2.603812452563775e-06, + "loss": 1.3082, + "step": 9232 + }, + { + "epoch": 0.9739451476793249, + "grad_norm": 0.6277328729629517, + "learning_rate": 2.5829522813079207e-06, + "loss": 1.347, + "step": 9233 + }, + { + "epoch": 0.9740506329113924, + "grad_norm": 0.6512101888656616, + "learning_rate": 2.5621758618591394e-06, + "loss": 1.3566, + "step": 9234 + }, + { + "epoch": 0.9741561181434599, + "grad_norm": 0.6906675696372986, + "learning_rate": 2.541483196545735e-06, + "loss": 1.3545, + "step": 9235 + }, + { + "epoch": 0.9742616033755275, + "grad_norm": 0.6329959034919739, + "learning_rate": 2.52087428768627e-06, + "loss": 1.343, + "step": 9236 + }, + { + "epoch": 0.9743670886075949, + "grad_norm": 0.6583462357521057, + "learning_rate": 2.5003491375900633e-06, + "loss": 1.3317, + "step": 9237 + }, + { + "epoch": 0.9744725738396625, + "grad_norm": 0.6465904116630554, + "learning_rate": 2.4799077485571087e-06, + "loss": 1.3622, + "step": 9238 + }, + { + "epoch": 0.97457805907173, + "grad_norm": 0.619859516620636, + "learning_rate": 2.4595501228779906e-06, + "loss": 1.3148, + "step": 9239 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.6415495872497559, + "learning_rate": 2.4392762628338838e-06, + "loss": 1.3496, + "step": 9240 + }, + { + "epoch": 0.974789029535865, + "grad_norm": 0.6342458128929138, + "learning_rate": 2.419086170696472e-06, + "loss": 1.3263, + "step": 9241 + }, + { + "epoch": 0.9748945147679325, + "grad_norm": 0.6379193663597107, + "learning_rate": 2.3989798487282776e-06, + "loss": 1.3843, + "step": 9242 + }, + { + "epoch": 0.975, + "grad_norm": 0.6334319114685059, + "learning_rate": 2.3789572991822495e-06, + "loss": 1.3336, + "step": 9243 + }, + { + "epoch": 0.9751054852320675, + "grad_norm": 0.7422937154769897, + "learning_rate": 2.3590185243020092e-06, + "loss": 1.3751, + "step": 9244 + }, + { + "epoch": 0.9752109704641351, + "grad_norm": 0.672978401184082, + "learning_rate": 2.3391635263218526e-06, + "loss": 1.3089, + "step": 9245 + }, + { + "epoch": 0.9753164556962025, + "grad_norm": 0.6682551503181458, + "learning_rate": 2.3193923074665834e-06, + "loss": 1.3685, + "step": 9246 + }, + { + "epoch": 0.9754219409282701, + "grad_norm": 0.6441994905471802, + "learning_rate": 2.299704869951763e-06, + "loss": 1.3848, + "step": 9247 + }, + { + "epoch": 0.9755274261603376, + "grad_norm": 0.6848739981651306, + "learning_rate": 2.2801012159832933e-06, + "loss": 1.348, + "step": 9248 + }, + { + "epoch": 0.975632911392405, + "grad_norm": 0.6232642531394958, + "learning_rate": 2.2605813477579172e-06, + "loss": 1.3782, + "step": 9249 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.6323142647743225, + "learning_rate": 2.2411452674630517e-06, + "loss": 1.3511, + "step": 9250 + }, + { + "epoch": 0.97584388185654, + "grad_norm": 0.661388635635376, + "learning_rate": 2.2217929772764545e-06, + "loss": 1.3372, + "step": 9251 + }, + { + "epoch": 0.9759493670886076, + "grad_norm": 0.6675936579704285, + "learning_rate": 2.2025244793667242e-06, + "loss": 1.3557, + "step": 9252 + }, + { + "epoch": 0.9760548523206751, + "grad_norm": 0.6349383592605591, + "learning_rate": 2.1833397758929674e-06, + "loss": 1.3681, + "step": 9253 + }, + { + "epoch": 0.9761603375527426, + "grad_norm": 0.6515568494796753, + "learning_rate": 2.1642388690049643e-06, + "loss": 1.3613, + "step": 9254 + }, + { + "epoch": 0.9762658227848101, + "grad_norm": 0.6616409420967102, + "learning_rate": 2.1452217608430857e-06, + "loss": 1.339, + "step": 9255 + }, + { + "epoch": 0.9763713080168777, + "grad_norm": 0.640874445438385, + "learning_rate": 2.126288453538211e-06, + "loss": 1.3427, + "step": 9256 + }, + { + "epoch": 0.9764767932489451, + "grad_norm": 0.6253330707550049, + "learning_rate": 2.107438949211976e-06, + "loss": 1.3963, + "step": 9257 + }, + { + "epoch": 0.9765822784810126, + "grad_norm": 0.6539896726608276, + "learning_rate": 2.0886732499764416e-06, + "loss": 1.3512, + "step": 9258 + }, + { + "epoch": 0.9766877637130802, + "grad_norm": 0.6273428201675415, + "learning_rate": 2.069991357934592e-06, + "loss": 1.3447, + "step": 9259 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.6456930041313171, + "learning_rate": 2.0513932751796695e-06, + "loss": 1.3472, + "step": 9260 + }, + { + "epoch": 0.9768987341772152, + "grad_norm": 0.6501388549804688, + "learning_rate": 2.0328790037957568e-06, + "loss": 1.3614, + "step": 9261 + }, + { + "epoch": 0.9770042194092827, + "grad_norm": 0.7157672047615051, + "learning_rate": 2.0144485458574446e-06, + "loss": 1.3626, + "step": 9262 + }, + { + "epoch": 0.9771097046413502, + "grad_norm": 0.6424756050109863, + "learning_rate": 1.9961019034299976e-06, + "loss": 1.3742, + "step": 9263 + }, + { + "epoch": 0.9772151898734177, + "grad_norm": 0.6336686015129089, + "learning_rate": 1.977839078569188e-06, + "loss": 1.3916, + "step": 9264 + }, + { + "epoch": 0.9773206751054853, + "grad_norm": 0.6337883472442627, + "learning_rate": 1.959660073321545e-06, + "loss": 1.3499, + "step": 9265 + }, + { + "epoch": 0.9774261603375527, + "grad_norm": 0.6535773277282715, + "learning_rate": 1.94156488972394e-06, + "loss": 1.3704, + "step": 9266 + }, + { + "epoch": 0.9775316455696202, + "grad_norm": 0.6410531997680664, + "learning_rate": 1.9235535298042506e-06, + "loss": 1.3538, + "step": 9267 + }, + { + "epoch": 0.9776371308016878, + "grad_norm": 0.629086971282959, + "learning_rate": 1.905625995580612e-06, + "loss": 1.3535, + "step": 9268 + }, + { + "epoch": 0.9777426160337552, + "grad_norm": 0.7129918932914734, + "learning_rate": 1.8877822890618346e-06, + "loss": 1.3615, + "step": 9269 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.6386505961418152, + "learning_rate": 1.8700224122475683e-06, + "loss": 1.3679, + "step": 9270 + }, + { + "epoch": 0.9779535864978903, + "grad_norm": 0.6359573006629944, + "learning_rate": 1.8523463671278052e-06, + "loss": 1.3261, + "step": 9271 + }, + { + "epoch": 0.9780590717299578, + "grad_norm": 0.650166928768158, + "learning_rate": 1.8347541556832104e-06, + "loss": 1.3601, + "step": 9272 + }, + { + "epoch": 0.9781645569620253, + "grad_norm": 0.6427909135818481, + "learning_rate": 1.8172457798850407e-06, + "loss": 1.3656, + "step": 9273 + }, + { + "epoch": 0.9782700421940929, + "grad_norm": 0.6251278519630432, + "learning_rate": 1.7998212416953096e-06, + "loss": 1.3759, + "step": 9274 + }, + { + "epoch": 0.9783755274261603, + "grad_norm": 0.6182255148887634, + "learning_rate": 1.782480543066456e-06, + "loss": 1.3671, + "step": 9275 + }, + { + "epoch": 0.9784810126582278, + "grad_norm": 0.6677987575531006, + "learning_rate": 1.7652236859416748e-06, + "loss": 1.338, + "step": 9276 + }, + { + "epoch": 0.9785864978902954, + "grad_norm": 0.6628646850585938, + "learning_rate": 1.7480506722545864e-06, + "loss": 1.3377, + "step": 9277 + }, + { + "epoch": 0.9786919831223628, + "grad_norm": 0.6367526650428772, + "learning_rate": 1.7309615039294847e-06, + "loss": 1.3781, + "step": 9278 + }, + { + "epoch": 0.9787974683544304, + "grad_norm": 0.6539214253425598, + "learning_rate": 1.7139561828813377e-06, + "loss": 1.3507, + "step": 9279 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.6405346393585205, + "learning_rate": 1.6970347110157879e-06, + "loss": 1.3111, + "step": 9280 + }, + { + "epoch": 0.9790084388185654, + "grad_norm": 0.6365873217582703, + "learning_rate": 1.6801970902288188e-06, + "loss": 1.3477, + "step": 9281 + }, + { + "epoch": 0.9791139240506329, + "grad_norm": 0.6321855187416077, + "learning_rate": 1.6634433224072543e-06, + "loss": 1.3512, + "step": 9282 + }, + { + "epoch": 0.9792194092827005, + "grad_norm": 0.6240972280502319, + "learning_rate": 1.6467734094283427e-06, + "loss": 1.347, + "step": 9283 + }, + { + "epoch": 0.9793248945147679, + "grad_norm": 0.6240954399108887, + "learning_rate": 1.630187353160173e-06, + "loss": 1.3652, + "step": 9284 + }, + { + "epoch": 0.9794303797468354, + "grad_norm": 0.6387749314308167, + "learning_rate": 1.6136851554611753e-06, + "loss": 1.3821, + "step": 9285 + }, + { + "epoch": 0.979535864978903, + "grad_norm": 0.6244833469390869, + "learning_rate": 1.5972668181805373e-06, + "loss": 1.3437, + "step": 9286 + }, + { + "epoch": 0.9796413502109704, + "grad_norm": 0.6633557677268982, + "learning_rate": 1.580932343158037e-06, + "loss": 1.3638, + "step": 9287 + }, + { + "epoch": 0.979746835443038, + "grad_norm": 0.6280501484870911, + "learning_rate": 1.5646817322240436e-06, + "loss": 1.3591, + "step": 9288 + }, + { + "epoch": 0.9798523206751055, + "grad_norm": 0.6619694232940674, + "learning_rate": 1.5485149871995175e-06, + "loss": 1.3487, + "step": 9289 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.634674072265625, + "learning_rate": 1.532432109895926e-06, + "loss": 1.3681, + "step": 9290 + }, + { + "epoch": 0.9800632911392405, + "grad_norm": 0.6457034945487976, + "learning_rate": 1.5164331021155774e-06, + "loss": 1.335, + "step": 9291 + }, + { + "epoch": 0.9801687763713081, + "grad_norm": 0.62851482629776, + "learning_rate": 1.5005179656511213e-06, + "loss": 1.3318, + "step": 9292 + }, + { + "epoch": 0.9802742616033755, + "grad_norm": 0.6548060178756714, + "learning_rate": 1.4846867022860477e-06, + "loss": 1.3684, + "step": 9293 + }, + { + "epoch": 0.980379746835443, + "grad_norm": 0.639781653881073, + "learning_rate": 1.4689393137941876e-06, + "loss": 1.3753, + "step": 9294 + }, + { + "epoch": 0.9804852320675106, + "grad_norm": 0.6159843802452087, + "learning_rate": 1.4532758019402958e-06, + "loss": 1.3638, + "step": 9295 + }, + { + "epoch": 0.980590717299578, + "grad_norm": 0.63273024559021, + "learning_rate": 1.4376961684793854e-06, + "loss": 1.3427, + "step": 9296 + }, + { + "epoch": 0.9806962025316456, + "grad_norm": 0.6520577073097229, + "learning_rate": 1.4222004151572265e-06, + "loss": 1.3472, + "step": 9297 + }, + { + "epoch": 0.9808016877637131, + "grad_norm": 0.6738293170928955, + "learning_rate": 1.4067885437103467e-06, + "loss": 1.3732, + "step": 9298 + }, + { + "epoch": 0.9809071729957806, + "grad_norm": 0.6750359535217285, + "learning_rate": 1.3914605558656146e-06, + "loss": 1.3476, + "step": 9299 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.6512549519538879, + "learning_rate": 1.376216453340573e-06, + "loss": 1.3713, + "step": 9300 + }, + { + "epoch": 0.9811181434599157, + "grad_norm": 0.6527745723724365, + "learning_rate": 1.3610562378435221e-06, + "loss": 1.3384, + "step": 9301 + }, + { + "epoch": 0.9812236286919831, + "grad_norm": 0.6893266439437866, + "learning_rate": 1.345979911073103e-06, + "loss": 1.3546, + "step": 9302 + }, + { + "epoch": 0.9813291139240506, + "grad_norm": 0.6523940563201904, + "learning_rate": 1.3309874747187978e-06, + "loss": 1.3829, + "step": 9303 + }, + { + "epoch": 0.9814345991561182, + "grad_norm": 0.6805461049079895, + "learning_rate": 1.3160789304605958e-06, + "loss": 1.3311, + "step": 9304 + }, + { + "epoch": 0.9815400843881856, + "grad_norm": 0.6325927376747131, + "learning_rate": 1.3012542799689108e-06, + "loss": 1.3653, + "step": 9305 + }, + { + "epoch": 0.9816455696202532, + "grad_norm": 0.6268103718757629, + "learning_rate": 1.286513524905164e-06, + "loss": 1.3695, + "step": 9306 + }, + { + "epoch": 0.9817510548523207, + "grad_norm": 0.6630647778511047, + "learning_rate": 1.2718566669208675e-06, + "loss": 1.3954, + "step": 9307 + }, + { + "epoch": 0.9818565400843882, + "grad_norm": 0.6202094554901123, + "learning_rate": 1.2572837076586241e-06, + "loss": 1.3673, + "step": 9308 + }, + { + "epoch": 0.9819620253164557, + "grad_norm": 0.6339752078056335, + "learning_rate": 1.2427946487512941e-06, + "loss": 1.3116, + "step": 9309 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.6420177817344666, + "learning_rate": 1.2283894918224125e-06, + "loss": 1.3716, + "step": 9310 + }, + { + "epoch": 0.9821729957805907, + "grad_norm": 0.6502456665039062, + "learning_rate": 1.2140682384862712e-06, + "loss": 1.3445, + "step": 9311 + }, + { + "epoch": 0.9822784810126582, + "grad_norm": 0.6461179256439209, + "learning_rate": 1.199830890347503e-06, + "loss": 1.3581, + "step": 9312 + }, + { + "epoch": 0.9823839662447258, + "grad_norm": 0.6408535838127136, + "learning_rate": 1.185677449001582e-06, + "loss": 1.3511, + "step": 9313 + }, + { + "epoch": 0.9824894514767932, + "grad_norm": 0.62728351354599, + "learning_rate": 1.1716079160344061e-06, + "loss": 1.3942, + "step": 9314 + }, + { + "epoch": 0.9825949367088608, + "grad_norm": 0.6552984118461609, + "learning_rate": 1.1576222930225478e-06, + "loss": 1.3312, + "step": 9315 + }, + { + "epoch": 0.9827004219409282, + "grad_norm": 0.6468480825424194, + "learning_rate": 1.143720581533253e-06, + "loss": 1.3924, + "step": 9316 + }, + { + "epoch": 0.9828059071729958, + "grad_norm": 0.61629718542099, + "learning_rate": 1.1299027831241094e-06, + "loss": 1.335, + "step": 9317 + }, + { + "epoch": 0.9829113924050633, + "grad_norm": 0.6799150109291077, + "learning_rate": 1.1161688993435449e-06, + "loss": 1.3382, + "step": 9318 + }, + { + "epoch": 0.9830168776371307, + "grad_norm": 0.7157482504844666, + "learning_rate": 1.1025189317305784e-06, + "loss": 1.3508, + "step": 9319 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.6329525709152222, + "learning_rate": 1.0889528818147366e-06, + "loss": 1.3359, + "step": 9320 + }, + { + "epoch": 0.9832278481012658, + "grad_norm": 0.640806257724762, + "learning_rate": 1.0754707511161365e-06, + "loss": 1.3762, + "step": 9321 + }, + { + "epoch": 0.9833333333333333, + "grad_norm": 0.6600736975669861, + "learning_rate": 1.0620725411454868e-06, + "loss": 1.3775, + "step": 9322 + }, + { + "epoch": 0.9834388185654008, + "grad_norm": 0.6377180218696594, + "learning_rate": 1.0487582534040863e-06, + "loss": 1.4289, + "step": 9323 + }, + { + "epoch": 0.9835443037974684, + "grad_norm": 0.6611717343330383, + "learning_rate": 1.0355278893839915e-06, + "loss": 1.3706, + "step": 9324 + }, + { + "epoch": 0.9836497890295358, + "grad_norm": 0.6211128234863281, + "learning_rate": 1.0223814505676832e-06, + "loss": 1.3555, + "step": 9325 + }, + { + "epoch": 0.9837552742616034, + "grad_norm": 0.6422106623649597, + "learning_rate": 1.009318938428233e-06, + "loss": 1.3684, + "step": 9326 + }, + { + "epoch": 0.9838607594936709, + "grad_norm": 0.6441232562065125, + "learning_rate": 9.963403544294702e-07, + "loss": 1.3704, + "step": 9327 + }, + { + "epoch": 0.9839662447257383, + "grad_norm": 0.6492293477058411, + "learning_rate": 9.834457000255647e-07, + "loss": 1.3315, + "step": 9328 + }, + { + "epoch": 0.9840717299578059, + "grad_norm": 0.7273208498954773, + "learning_rate": 9.706349766615275e-07, + "loss": 1.3498, + "step": 9329 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.6579452157020569, + "learning_rate": 9.579081857728766e-07, + "loss": 1.3319, + "step": 9330 + }, + { + "epoch": 0.9842827004219409, + "grad_norm": 0.6444456577301025, + "learning_rate": 9.452653287856383e-07, + "loss": 1.369, + "step": 9331 + }, + { + "epoch": 0.9843881856540084, + "grad_norm": 0.6272783875465393, + "learning_rate": 9.327064071165126e-07, + "loss": 1.3338, + "step": 9332 + }, + { + "epoch": 0.984493670886076, + "grad_norm": 0.634916365146637, + "learning_rate": 9.202314221728735e-07, + "loss": 1.374, + "step": 9333 + }, + { + "epoch": 0.9845991561181434, + "grad_norm": 0.6506290435791016, + "learning_rate": 9.078403753525199e-07, + "loss": 1.3846, + "step": 9334 + }, + { + "epoch": 0.984704641350211, + "grad_norm": 0.6207000613212585, + "learning_rate": 8.955332680440076e-07, + "loss": 1.3293, + "step": 9335 + }, + { + "epoch": 0.9848101265822785, + "grad_norm": 0.646986722946167, + "learning_rate": 8.833101016263168e-07, + "loss": 1.3607, + "step": 9336 + }, + { + "epoch": 0.984915611814346, + "grad_norm": 0.6601268649101257, + "learning_rate": 8.711708774691851e-07, + "loss": 1.3684, + "step": 9337 + }, + { + "epoch": 0.9850210970464135, + "grad_norm": 0.638270378112793, + "learning_rate": 8.591155969327746e-07, + "loss": 1.3506, + "step": 9338 + }, + { + "epoch": 0.985126582278481, + "grad_norm": 0.632773756980896, + "learning_rate": 8.47144261368088e-07, + "loss": 1.3587, + "step": 9339 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.6211699843406677, + "learning_rate": 8.352568721165521e-07, + "loss": 1.383, + "step": 9340 + }, + { + "epoch": 0.985337552742616, + "grad_norm": 0.6350894570350647, + "learning_rate": 8.234534305101015e-07, + "loss": 1.3831, + "step": 9341 + }, + { + "epoch": 0.9854430379746836, + "grad_norm": 0.6795880794525146, + "learning_rate": 8.117339378714283e-07, + "loss": 1.3737, + "step": 9342 + }, + { + "epoch": 0.985548523206751, + "grad_norm": 0.6436785459518433, + "learning_rate": 8.00098395513732e-07, + "loss": 1.4023, + "step": 9343 + }, + { + "epoch": 0.9856540084388186, + "grad_norm": 0.6386290788650513, + "learning_rate": 7.885468047408862e-07, + "loss": 1.3506, + "step": 9344 + }, + { + "epoch": 0.9857594936708861, + "grad_norm": 0.7583345174789429, + "learning_rate": 7.770791668472721e-07, + "loss": 1.3726, + "step": 9345 + }, + { + "epoch": 0.9858649789029535, + "grad_norm": 0.6274396181106567, + "learning_rate": 7.656954831178619e-07, + "loss": 1.3741, + "step": 9346 + }, + { + "epoch": 0.9859704641350211, + "grad_norm": 0.6401108503341675, + "learning_rate": 7.543957548283021e-07, + "loss": 1.3788, + "step": 9347 + }, + { + "epoch": 0.9860759493670886, + "grad_norm": 0.6324493885040283, + "learning_rate": 7.431799832448294e-07, + "loss": 1.4007, + "step": 9348 + }, + { + "epoch": 0.9861814345991561, + "grad_norm": 0.7179774641990662, + "learning_rate": 7.320481696241887e-07, + "loss": 1.3693, + "step": 9349 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.6158773303031921, + "learning_rate": 7.210003152136324e-07, + "loss": 1.3479, + "step": 9350 + }, + { + "epoch": 0.9863924050632912, + "grad_norm": 0.6265453100204468, + "learning_rate": 7.100364212513367e-07, + "loss": 1.3969, + "step": 9351 + }, + { + "epoch": 0.9864978902953586, + "grad_norm": 0.6648955941200256, + "learning_rate": 6.991564889656521e-07, + "loss": 1.3674, + "step": 9352 + }, + { + "epoch": 0.9866033755274262, + "grad_norm": 0.6448813676834106, + "learning_rate": 6.883605195759369e-07, + "loss": 1.3874, + "step": 9353 + }, + { + "epoch": 0.9867088607594937, + "grad_norm": 0.6410589814186096, + "learning_rate": 6.776485142918065e-07, + "loss": 1.3491, + "step": 9354 + }, + { + "epoch": 0.9868143459915611, + "grad_norm": 0.6253377795219421, + "learning_rate": 6.67020474313551e-07, + "loss": 1.379, + "step": 9355 + }, + { + "epoch": 0.9869198312236287, + "grad_norm": 0.623411238193512, + "learning_rate": 6.564764008322177e-07, + "loss": 1.3596, + "step": 9356 + }, + { + "epoch": 0.9870253164556962, + "grad_norm": 0.6463187336921692, + "learning_rate": 6.460162950292781e-07, + "loss": 1.3495, + "step": 9357 + }, + { + "epoch": 0.9871308016877637, + "grad_norm": 0.6240873336791992, + "learning_rate": 6.356401580767945e-07, + "loss": 1.3345, + "step": 9358 + }, + { + "epoch": 0.9872362869198312, + "grad_norm": 0.631027340888977, + "learning_rate": 6.253479911375037e-07, + "loss": 1.3883, + "step": 9359 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.6458188891410828, + "learning_rate": 6.151397953647331e-07, + "loss": 1.3582, + "step": 9360 + }, + { + "epoch": 0.9874472573839662, + "grad_norm": 0.6179155707359314, + "learning_rate": 6.050155719023176e-07, + "loss": 1.3662, + "step": 9361 + }, + { + "epoch": 0.9875527426160338, + "grad_norm": 0.6393219232559204, + "learning_rate": 5.949753218846832e-07, + "loss": 1.369, + "step": 9362 + }, + { + "epoch": 0.9876582278481013, + "grad_norm": 0.647323489189148, + "learning_rate": 5.850190464369298e-07, + "loss": 1.3528, + "step": 9363 + }, + { + "epoch": 0.9877637130801687, + "grad_norm": 0.6388441324234009, + "learning_rate": 5.751467466747484e-07, + "loss": 1.3592, + "step": 9364 + }, + { + "epoch": 0.9878691983122363, + "grad_norm": 0.6389040946960449, + "learning_rate": 5.653584237043374e-07, + "loss": 1.3596, + "step": 9365 + }, + { + "epoch": 0.9879746835443038, + "grad_norm": 0.6651508808135986, + "learning_rate": 5.556540786224862e-07, + "loss": 1.3722, + "step": 9366 + }, + { + "epoch": 0.9880801687763713, + "grad_norm": 0.6396982669830322, + "learning_rate": 5.460337125167414e-07, + "loss": 1.3248, + "step": 9367 + }, + { + "epoch": 0.9881856540084388, + "grad_norm": 0.677069902420044, + "learning_rate": 5.364973264649908e-07, + "loss": 1.3772, + "step": 9368 + }, + { + "epoch": 0.9882911392405064, + "grad_norm": 0.634397566318512, + "learning_rate": 5.270449215358797e-07, + "loss": 1.3528, + "step": 9369 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.6511263251304626, + "learning_rate": 5.176764987885607e-07, + "loss": 1.4054, + "step": 9370 + }, + { + "epoch": 0.9885021097046414, + "grad_norm": 0.6213569641113281, + "learning_rate": 5.08392059272944e-07, + "loss": 1.3409, + "step": 9371 + }, + { + "epoch": 0.9886075949367089, + "grad_norm": 0.6297670006752014, + "learning_rate": 4.991916040291977e-07, + "loss": 1.3446, + "step": 9372 + }, + { + "epoch": 0.9887130801687763, + "grad_norm": 0.6254132986068726, + "learning_rate": 4.900751340884135e-07, + "loss": 1.3622, + "step": 9373 + }, + { + "epoch": 0.9888185654008439, + "grad_norm": 0.6419636011123657, + "learning_rate": 4.810426504721077e-07, + "loss": 1.3062, + "step": 9374 + }, + { + "epoch": 0.9889240506329114, + "grad_norm": 0.6290774345397949, + "learning_rate": 4.720941541923873e-07, + "loss": 1.3802, + "step": 9375 + }, + { + "epoch": 0.9890295358649789, + "grad_norm": 0.6252092123031616, + "learning_rate": 4.632296462520336e-07, + "loss": 1.3201, + "step": 9376 + }, + { + "epoch": 0.9891350210970464, + "grad_norm": 0.6515519618988037, + "learning_rate": 4.544491276443352e-07, + "loss": 1.3188, + "step": 9377 + }, + { + "epoch": 0.989240506329114, + "grad_norm": 0.6630059480667114, + "learning_rate": 4.457525993531719e-07, + "loss": 1.3956, + "step": 9378 + }, + { + "epoch": 0.9893459915611814, + "grad_norm": 0.6279820799827576, + "learning_rate": 4.371400623530142e-07, + "loss": 1.3704, + "step": 9379 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.671107292175293, + "learning_rate": 4.2861151760900665e-07, + "loss": 1.3529, + "step": 9380 + }, + { + "epoch": 0.9895569620253165, + "grad_norm": 0.619791567325592, + "learning_rate": 4.2016696607680147e-07, + "loss": 1.32, + "step": 9381 + }, + { + "epoch": 0.989662447257384, + "grad_norm": 0.6565500497817993, + "learning_rate": 4.118064087025586e-07, + "loss": 1.3682, + "step": 9382 + }, + { + "epoch": 0.9897679324894515, + "grad_norm": 0.6168315410614014, + "learning_rate": 4.035298464232784e-07, + "loss": 1.3719, + "step": 9383 + }, + { + "epoch": 0.9898734177215189, + "grad_norm": 0.6243917346000671, + "learning_rate": 3.953372801662192e-07, + "loss": 1.3509, + "step": 9384 + }, + { + "epoch": 0.9899789029535865, + "grad_norm": 0.6148205399513245, + "learning_rate": 3.8722871084956313e-07, + "loss": 1.3505, + "step": 9385 + }, + { + "epoch": 0.990084388185654, + "grad_norm": 0.6575530767440796, + "learning_rate": 3.7920413938175027e-07, + "loss": 1.3448, + "step": 9386 + }, + { + "epoch": 0.9901898734177215, + "grad_norm": 0.6634998917579651, + "learning_rate": 3.7126356666214447e-07, + "loss": 1.379, + "step": 9387 + }, + { + "epoch": 0.990295358649789, + "grad_norm": 0.6244145631790161, + "learning_rate": 3.6340699358036743e-07, + "loss": 1.3347, + "step": 9388 + }, + { + "epoch": 0.9904008438818566, + "grad_norm": 0.6765584945678711, + "learning_rate": 3.5563442101696486e-07, + "loss": 1.3676, + "step": 9389 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.6475701332092285, + "learning_rate": 3.479458498426569e-07, + "loss": 1.3635, + "step": 9390 + }, + { + "epoch": 0.9906118143459915, + "grad_norm": 0.6140093803405762, + "learning_rate": 3.4034128091917085e-07, + "loss": 1.3666, + "step": 9391 + }, + { + "epoch": 0.9907172995780591, + "grad_norm": 0.6336082220077515, + "learning_rate": 3.328207150986584e-07, + "loss": 1.3679, + "step": 9392 + }, + { + "epoch": 0.9908227848101265, + "grad_norm": 0.6263810396194458, + "learning_rate": 3.2538415322369563e-07, + "loss": 1.3638, + "step": 9393 + }, + { + "epoch": 0.9909282700421941, + "grad_norm": 0.6588853597640991, + "learning_rate": 3.180315961276159e-07, + "loss": 1.3392, + "step": 9394 + }, + { + "epoch": 0.9910337552742616, + "grad_norm": 0.6692104339599609, + "learning_rate": 3.107630446344267e-07, + "loss": 1.4096, + "step": 9395 + }, + { + "epoch": 0.9911392405063291, + "grad_norm": 0.6453857421875, + "learning_rate": 3.035784995584767e-07, + "loss": 1.3469, + "step": 9396 + }, + { + "epoch": 0.9912447257383966, + "grad_norm": 0.6509672403335571, + "learning_rate": 2.964779617049551e-07, + "loss": 1.3706, + "step": 9397 + }, + { + "epoch": 0.9913502109704642, + "grad_norm": 0.6679413318634033, + "learning_rate": 2.8946143186930896e-07, + "loss": 1.368, + "step": 9398 + }, + { + "epoch": 0.9914556962025316, + "grad_norm": 0.6390815377235413, + "learning_rate": 2.825289108379925e-07, + "loss": 1.3911, + "step": 9399 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.6348574161529541, + "learning_rate": 2.756803993877177e-07, + "loss": 1.3733, + "step": 9400 + }, + { + "epoch": 0.9916666666666667, + "grad_norm": 0.622804582118988, + "learning_rate": 2.689158982859541e-07, + "loss": 1.32, + "step": 9401 + }, + { + "epoch": 0.9917721518987341, + "grad_norm": 0.6435536742210388, + "learning_rate": 2.622354082905953e-07, + "loss": 1.3632, + "step": 9402 + }, + { + "epoch": 0.9918776371308017, + "grad_norm": 0.65234375, + "learning_rate": 2.556389301502926e-07, + "loss": 1.3126, + "step": 9403 + }, + { + "epoch": 0.9919831223628692, + "grad_norm": 0.6318247318267822, + "learning_rate": 2.491264646042879e-07, + "loss": 1.3424, + "step": 9404 + }, + { + "epoch": 0.9920886075949367, + "grad_norm": 0.7020645141601562, + "learning_rate": 2.426980123821643e-07, + "loss": 1.3395, + "step": 9405 + }, + { + "epoch": 0.9921940928270042, + "grad_norm": 0.6169302463531494, + "learning_rate": 2.3635357420442872e-07, + "loss": 1.3437, + "step": 9406 + }, + { + "epoch": 0.9922995780590718, + "grad_norm": 0.6475908756256104, + "learning_rate": 2.3009315078192926e-07, + "loss": 1.3301, + "step": 9407 + }, + { + "epoch": 0.9924050632911392, + "grad_norm": 0.6303315162658691, + "learning_rate": 2.2391674281610486e-07, + "loss": 1.3618, + "step": 9408 + }, + { + "epoch": 0.9925105485232067, + "grad_norm": 0.65240079164505, + "learning_rate": 2.1782435099923503e-07, + "loss": 1.366, + "step": 9409 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.6768707633018494, + "learning_rate": 2.1181597601385716e-07, + "loss": 1.3526, + "step": 9410 + }, + { + "epoch": 0.9927215189873417, + "grad_norm": 0.6498616933822632, + "learning_rate": 2.05891618533266e-07, + "loss": 1.3987, + "step": 9411 + }, + { + "epoch": 0.9928270042194093, + "grad_norm": 0.6168335676193237, + "learning_rate": 2.0005127922134713e-07, + "loss": 1.3191, + "step": 9412 + }, + { + "epoch": 0.9929324894514768, + "grad_norm": 0.6804923415184021, + "learning_rate": 1.942949587324938e-07, + "loss": 1.3243, + "step": 9413 + }, + { + "epoch": 0.9930379746835443, + "grad_norm": 0.6515710949897766, + "learning_rate": 1.8862265771177333e-07, + "loss": 1.3882, + "step": 9414 + }, + { + "epoch": 0.9931434599156118, + "grad_norm": 0.7137730717658997, + "learning_rate": 1.8303437679476065e-07, + "loss": 1.3758, + "step": 9415 + }, + { + "epoch": 0.9932489451476794, + "grad_norm": 0.6495699286460876, + "learning_rate": 1.775301166077048e-07, + "loss": 1.3536, + "step": 9416 + }, + { + "epoch": 0.9933544303797468, + "grad_norm": 0.6806798577308655, + "learning_rate": 1.7210987776736243e-07, + "loss": 1.3826, + "step": 9417 + }, + { + "epoch": 0.9934599156118143, + "grad_norm": 0.6201052665710449, + "learning_rate": 1.6677366088099777e-07, + "loss": 1.3449, + "step": 9418 + }, + { + "epoch": 0.9935654008438819, + "grad_norm": 0.6351364254951477, + "learning_rate": 1.6152146654671573e-07, + "loss": 1.3883, + "step": 9419 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.649196445941925, + "learning_rate": 1.5635329535304554e-07, + "loss": 1.3227, + "step": 9420 + }, + { + "epoch": 0.9937763713080169, + "grad_norm": 0.645277738571167, + "learning_rate": 1.5126914787894074e-07, + "loss": 1.3994, + "step": 9421 + }, + { + "epoch": 0.9938818565400844, + "grad_norm": 0.6419492363929749, + "learning_rate": 1.4626902469427882e-07, + "loss": 1.3793, + "step": 9422 + }, + { + "epoch": 0.9939873417721519, + "grad_norm": 0.6377614736557007, + "learning_rate": 1.4135292635927832e-07, + "loss": 1.3438, + "step": 9423 + }, + { + "epoch": 0.9940928270042194, + "grad_norm": 0.6523425579071045, + "learning_rate": 1.365208534248319e-07, + "loss": 1.3371, + "step": 9424 + }, + { + "epoch": 0.994198312236287, + "grad_norm": 0.6683328747749329, + "learning_rate": 1.3177280643233979e-07, + "loss": 1.3464, + "step": 9425 + }, + { + "epoch": 0.9943037974683544, + "grad_norm": 0.6596412658691406, + "learning_rate": 1.271087859138764e-07, + "loss": 1.3907, + "step": 9426 + }, + { + "epoch": 0.994409282700422, + "grad_norm": 0.6712079048156738, + "learning_rate": 1.2252879239210702e-07, + "loss": 1.3813, + "step": 9427 + }, + { + "epoch": 0.9945147679324895, + "grad_norm": 0.6468806862831116, + "learning_rate": 1.1803282638020441e-07, + "loss": 1.3423, + "step": 9428 + }, + { + "epoch": 0.9946202531645569, + "grad_norm": 0.6442531943321228, + "learning_rate": 1.1362088838193229e-07, + "loss": 1.3803, + "step": 9429 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.6316949725151062, + "learning_rate": 1.0929297889172852e-07, + "loss": 1.3244, + "step": 9430 + }, + { + "epoch": 0.994831223628692, + "grad_norm": 0.6573114991188049, + "learning_rate": 1.0504909839462173e-07, + "loss": 1.3469, + "step": 9431 + }, + { + "epoch": 0.9949367088607595, + "grad_norm": 0.6760499477386475, + "learning_rate": 1.008892473659817e-07, + "loss": 1.385, + "step": 9432 + }, + { + "epoch": 0.995042194092827, + "grad_norm": 0.6219140887260437, + "learning_rate": 9.68134262721021e-08, + "loss": 1.3596, + "step": 9433 + }, + { + "epoch": 0.9951476793248946, + "grad_norm": 0.6374545693397522, + "learning_rate": 9.282163556953437e-08, + "loss": 1.357, + "step": 9434 + }, + { + "epoch": 0.995253164556962, + "grad_norm": 0.6353083252906799, + "learning_rate": 8.891387570575393e-08, + "loss": 1.3451, + "step": 9435 + }, + { + "epoch": 0.9953586497890295, + "grad_norm": 0.6620053648948669, + "learning_rate": 8.509014711857721e-08, + "loss": 1.3502, + "step": 9436 + }, + { + "epoch": 0.9954641350210971, + "grad_norm": 0.641240119934082, + "learning_rate": 8.135045023641152e-08, + "loss": 1.3453, + "step": 9437 + }, + { + "epoch": 0.9955696202531645, + "grad_norm": 0.6468110680580139, + "learning_rate": 7.769478547842157e-08, + "loss": 1.35, + "step": 9438 + }, + { + "epoch": 0.9956751054852321, + "grad_norm": 0.6510635614395142, + "learning_rate": 7.412315325411312e-08, + "loss": 1.4, + "step": 9439 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.6338212490081787, + "learning_rate": 7.063555396383259e-08, + "loss": 1.351, + "step": 9440 + }, + { + "epoch": 0.9958860759493671, + "grad_norm": 0.6393499970436096, + "learning_rate": 6.723198799826746e-08, + "loss": 1.3658, + "step": 9441 + }, + { + "epoch": 0.9959915611814346, + "grad_norm": 0.6283863186836243, + "learning_rate": 6.391245573894588e-08, + "loss": 1.3773, + "step": 9442 + }, + { + "epoch": 0.9960970464135022, + "grad_norm": 0.6611254811286926, + "learning_rate": 6.067695755765379e-08, + "loss": 1.3394, + "step": 9443 + }, + { + "epoch": 0.9962025316455696, + "grad_norm": 0.6532069444656372, + "learning_rate": 5.7525493817101035e-08, + "loss": 1.361, + "step": 9444 + }, + { + "epoch": 0.9963080168776371, + "grad_norm": 0.6557204127311707, + "learning_rate": 5.4458064870338553e-08, + "loss": 1.3718, + "step": 9445 + }, + { + "epoch": 0.9964135021097047, + "grad_norm": 0.6314255595207214, + "learning_rate": 5.147467106117465e-08, + "loss": 1.3539, + "step": 9446 + }, + { + "epoch": 0.9965189873417721, + "grad_norm": 0.6771482825279236, + "learning_rate": 4.85753127237587e-08, + "loss": 1.372, + "step": 9447 + }, + { + "epoch": 0.9966244725738397, + "grad_norm": 0.6329267024993896, + "learning_rate": 4.575999018316401e-08, + "loss": 1.3803, + "step": 9448 + }, + { + "epoch": 0.9967299578059071, + "grad_norm": 0.6425904035568237, + "learning_rate": 4.302870375472168e-08, + "loss": 1.3855, + "step": 9449 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.6298028230667114, + "learning_rate": 4.038145374460345e-08, + "loss": 1.3901, + "step": 9450 + }, + { + "epoch": 0.9969409282700422, + "grad_norm": 0.6493426561355591, + "learning_rate": 3.781824044932214e-08, + "loss": 1.359, + "step": 9451 + }, + { + "epoch": 0.9970464135021097, + "grad_norm": 0.7008041143417358, + "learning_rate": 3.533906415614796e-08, + "loss": 1.4078, + "step": 9452 + }, + { + "epoch": 0.9971518987341772, + "grad_norm": 0.6826564073562622, + "learning_rate": 3.294392514285871e-08, + "loss": 1.3477, + "step": 9453 + }, + { + "epoch": 0.9972573839662447, + "grad_norm": 0.6506145596504211, + "learning_rate": 3.0632823677906316e-08, + "loss": 1.3487, + "step": 9454 + }, + { + "epoch": 0.9973628691983122, + "grad_norm": 0.6307803988456726, + "learning_rate": 2.8405760020250304e-08, + "loss": 1.3395, + "step": 9455 + }, + { + "epoch": 0.9974683544303797, + "grad_norm": 0.6181148886680603, + "learning_rate": 2.6262734419441047e-08, + "loss": 1.3453, + "step": 9456 + }, + { + "epoch": 0.9975738396624473, + "grad_norm": 0.6515939235687256, + "learning_rate": 2.420374711561979e-08, + "loss": 1.3348, + "step": 9457 + }, + { + "epoch": 0.9976793248945147, + "grad_norm": 0.6328627467155457, + "learning_rate": 2.2228798339435363e-08, + "loss": 1.3424, + "step": 9458 + }, + { + "epoch": 0.9977848101265823, + "grad_norm": 0.6334707140922546, + "learning_rate": 2.0337888312210727e-08, + "loss": 1.3209, + "step": 9459 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.6467985510826111, + "learning_rate": 1.8531017245942972e-08, + "loss": 1.3731, + "step": 9460 + }, + { + "epoch": 0.9979957805907173, + "grad_norm": 0.639443576335907, + "learning_rate": 1.6808185342970238e-08, + "loss": 1.3509, + "step": 9461 + }, + { + "epoch": 0.9981012658227848, + "grad_norm": 0.6283660531044006, + "learning_rate": 1.516939279638807e-08, + "loss": 1.3612, + "step": 9462 + }, + { + "epoch": 0.9982067510548523, + "grad_norm": 0.711144745349884, + "learning_rate": 1.3614639789882866e-08, + "loss": 1.3784, + "step": 9463 + }, + { + "epoch": 0.9983122362869198, + "grad_norm": 0.6304100751876831, + "learning_rate": 1.214392649756535e-08, + "loss": 1.3839, + "step": 9464 + }, + { + "epoch": 0.9984177215189873, + "grad_norm": 0.6452110409736633, + "learning_rate": 1.075725308438691e-08, + "loss": 1.4029, + "step": 9465 + }, + { + "epoch": 0.9985232067510549, + "grad_norm": 0.6117958426475525, + "learning_rate": 9.454619705556722e-09, + "loss": 1.3459, + "step": 9466 + }, + { + "epoch": 0.9986286919831223, + "grad_norm": 0.6317223906517029, + "learning_rate": 8.236026507124628e-09, + "loss": 1.3515, + "step": 9467 + }, + { + "epoch": 0.9987341772151899, + "grad_norm": 0.6437963247299194, + "learning_rate": 7.101473625648058e-09, + "loss": 1.36, + "step": 9468 + }, + { + "epoch": 0.9988396624472574, + "grad_norm": 0.6697539687156677, + "learning_rate": 6.050961188358573e-09, + "loss": 1.3549, + "step": 9469 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.6353802680969238, + "learning_rate": 5.084489312745521e-09, + "loss": 1.3682, + "step": 9470 + }, + { + "epoch": 0.9990506329113924, + "grad_norm": 0.6186800599098206, + "learning_rate": 4.202058107305451e-09, + "loss": 1.3691, + "step": 9471 + }, + { + "epoch": 0.99915611814346, + "grad_norm": 0.6431089043617249, + "learning_rate": 3.403667670792698e-09, + "loss": 1.3276, + "step": 9472 + }, + { + "epoch": 0.9992616033755274, + "grad_norm": 0.6369529366493225, + "learning_rate": 2.689318092718995e-09, + "loss": 1.3673, + "step": 9473 + }, + { + "epoch": 0.9993670886075949, + "grad_norm": 0.6215049624443054, + "learning_rate": 2.059009453103666e-09, + "loss": 1.3782, + "step": 9474 + }, + { + "epoch": 0.9994725738396625, + "grad_norm": 0.6426306366920471, + "learning_rate": 1.5127418226401623e-09, + "loss": 1.3528, + "step": 9475 + }, + { + "epoch": 0.9995780590717299, + "grad_norm": 0.645084798336029, + "learning_rate": 1.0505152625295278e-09, + "loss": 1.3383, + "step": 9476 + }, + { + "epoch": 0.9996835443037975, + "grad_norm": 0.6648299098014832, + "learning_rate": 6.723298245636666e-10, + "loss": 1.346, + "step": 9477 + }, + { + "epoch": 0.999789029535865, + "grad_norm": 0.6170280575752258, + "learning_rate": 3.781855510420762e-10, + "loss": 1.3768, + "step": 9478 + }, + { + "epoch": 0.9998945147679325, + "grad_norm": 0.6664181351661682, + "learning_rate": 1.6808247493838026e-10, + "loss": 1.3629, + "step": 9479 + }, + { + "epoch": 1.0, + "grad_norm": 1.8477612733840942, + "learning_rate": 4.202061990032924e-11, + "loss": 1.331, + "step": 9480 + } + ], + "logging_steps": 1, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.832308198648013e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-mpt-cosine/checkpoint-9480/training_args.bin b/saves-mpt-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9a42ab573e47f60a85d978d59fc3a28a2b7f09eb --- /dev/null +++ b/saves-mpt-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c391845dfcac0605928877bef083a662f6dfdb95d16127f934c3305aee6ac33e +size 5176 diff --git a/saves-mpt-cosine/config.json b/saves-mpt-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7bae8bd28a02ff350c658f15c2c052270df91b7 --- /dev/null +++ b/saves-mpt-cosine/config.json @@ -0,0 +1,32 @@ +{ + "architectures": [ + "MptForCausalLM" + ], + "attn_config": { + "model_type": "" + }, + "d_model": 256, + "emb_pdrop": 0.0, + "embedding_fraction": 1.0, + "expansion_ratio": 4, + "hidden_act": "gelu", + "init_device": "cpu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "learned_pos_emb": true, + "logit_scale": null, + "max_seq_len": 2048, + "model_type": "mpt", + "n_heads": 8, + "n_layers": 2, + "no_bias": true, + "norm_type": "low_precision_layernorm", + "num_key_value_heads": 8, + "resid_pdrop": 0.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": false, + "verbose": 0, + "vocab_size": 2000 +} diff --git a/saves-mpt-cosine/generation_config.json b/saves-mpt-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..805bc20f96323ae6bf455904f78f359bf95ce35b --- /dev/null +++ b/saves-mpt-cosine/generation_config.json @@ -0,0 +1,5 @@ +{ + "_from_model_config": true, + "transformers_version": "4.42.4", + "use_cache": false +} diff --git a/saves-mpt-cosine/model.safetensors b/saves-mpt-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9da5bf88089d4fb214e66f23ab2c5712c957eccd --- /dev/null +++ b/saves-mpt-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d732944cce2bcceb1c14eb977d09d5633b7dcc02fb8a2d325207fb87d34197 +size 8346072 diff --git a/saves-mpt-cosine/result.log b/saves-mpt-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..1c2dd5e2bd34daeff326bc7a0bf41267f7c0297d --- /dev/null +++ b/saves-mpt-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 2158.9249, 'train_samples_per_second': 4496.036, 'train_steps_per_second': 4.391, 'train_loss': 1.631664564224738, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-mpt-cosine/special_tokens_map.json b/saves-mpt-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-mpt-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-mpt-cosine/tokenizer.json b/saves-mpt-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-mpt-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-mpt-cosine/tokenizer_config.json b/saves-mpt-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-mpt-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-mpt/checkpoint-9480/config.json b/saves-mpt/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7bae8bd28a02ff350c658f15c2c052270df91b7 --- /dev/null +++ b/saves-mpt/checkpoint-9480/config.json @@ -0,0 +1,32 @@ +{ + "architectures": [ + "MptForCausalLM" + ], + "attn_config": { + "model_type": "" + }, + "d_model": 256, + "emb_pdrop": 0.0, + "embedding_fraction": 1.0, + "expansion_ratio": 4, + "hidden_act": "gelu", + "init_device": "cpu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "learned_pos_emb": true, + "logit_scale": null, + "max_seq_len": 2048, + "model_type": "mpt", + "n_heads": 8, + "n_layers": 2, + "no_bias": true, + "norm_type": "low_precision_layernorm", + "num_key_value_heads": 8, + "resid_pdrop": 0.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": false, + "verbose": 0, + "vocab_size": 2000 +} diff --git a/saves-mpt/checkpoint-9480/generation_config.json b/saves-mpt/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..805bc20f96323ae6bf455904f78f359bf95ce35b --- /dev/null +++ b/saves-mpt/checkpoint-9480/generation_config.json @@ -0,0 +1,5 @@ +{ + "_from_model_config": true, + "transformers_version": "4.42.4", + "use_cache": false +} diff --git a/saves-mpt/checkpoint-9480/model.safetensors b/saves-mpt/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..719d368e1b5dcd7fee86f24797b59dcc6db935fa --- /dev/null +++ b/saves-mpt/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fadae899671b76700344d41f6c6f36a91280d445c2bbc5baae5bea1b4584cbc8 +size 8346072 diff --git a/saves-mpt/checkpoint-9480/optimizer.pt b/saves-mpt/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a30e9a467ffebd3393591abaa0b28b4b39a5bc15 --- /dev/null +++ b/saves-mpt/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1f35f095107c6af0a9a27779938b8989ce0828da9387fb9dff74b4a860e943 +size 16701364 diff --git a/saves-mpt/checkpoint-9480/rng_state.pth b/saves-mpt/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-mpt/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-mpt/checkpoint-9480/scheduler.pt b/saves-mpt/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-mpt/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-mpt/checkpoint-9480/special_tokens_map.json b/saves-mpt/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-mpt/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-mpt/checkpoint-9480/tokenizer.json b/saves-mpt/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-mpt/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-mpt/checkpoint-9480/tokenizer_config.json b/saves-mpt/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-mpt/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-mpt/checkpoint-9480/trainer_state.json b/saves-mpt/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4b688fc63b140d01b32939879295b231e24cf007 --- /dev/null +++ b/saves-mpt/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2186888456344604, + "learning_rate": 0.00015822784810126583, + "loss": 7.4756, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1075139045715332, + "learning_rate": 0.00031645569620253165, + "loss": 6.7987, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8604756593704224, + "learning_rate": 0.00047468354430379745, + "loss": 6.1951, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.6423186659812927, + "learning_rate": 0.0006329113924050633, + "loss": 5.7039, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.6575109362602234, + "learning_rate": 0.0007911392405063291, + "loss": 5.2429, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.7181506156921387, + "learning_rate": 0.0009493670886075949, + "loss": 4.7705, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.9496937990188599, + "learning_rate": 0.0011075949367088608, + "loss": 4.3984, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.0826588869094849, + "learning_rate": 0.0012658227848101266, + "loss": 4.1411, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.7389441728591919, + "learning_rate": 0.0014240506329113926, + "loss": 3.9336, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.0077191591262817, + "learning_rate": 0.0015, + "loss": 3.7884, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.7541709542274475, + "learning_rate": 0.0015, + "loss": 3.6578, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.7902190685272217, + "learning_rate": 0.0015, + "loss": 3.5503, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.7221662998199463, + "learning_rate": 0.0015, + "loss": 3.4618, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.2441911697387695, + "learning_rate": 0.0015, + "loss": 3.3819, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.9825108647346497, + "learning_rate": 0.0015, + "loss": 3.3147, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.9007912874221802, + "learning_rate": 0.0015, + "loss": 3.2649, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.86688232421875, + "learning_rate": 0.0015, + "loss": 3.1977, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.8259088397026062, + "learning_rate": 0.0015, + "loss": 3.1623, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 1.230103850364685, + "learning_rate": 0.0015, + "loss": 3.1005, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.9208290576934814, + "learning_rate": 0.0015, + "loss": 3.0592, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.7172234654426575, + "learning_rate": 0.0015, + "loss": 3.0197, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.691364049911499, + "learning_rate": 0.0015, + "loss": 2.9838, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.882768452167511, + "learning_rate": 0.0015, + "loss": 2.9276, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 1.0899426937103271, + "learning_rate": 0.0015, + "loss": 2.898, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.835606038570404, + "learning_rate": 0.0015, + "loss": 2.8611, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.9906381964683533, + "learning_rate": 0.0015, + "loss": 2.8228, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.837228000164032, + "learning_rate": 0.0015, + "loss": 2.7884, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.9279177784919739, + "learning_rate": 0.0015, + "loss": 2.7519, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 1.240341305732727, + "learning_rate": 0.0015, + "loss": 2.7221, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.9779770374298096, + "learning_rate": 0.0015, + "loss": 2.6969, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 1.3490874767303467, + "learning_rate": 0.0015, + "loss": 2.6669, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 1.3380482196807861, + "learning_rate": 0.0015, + "loss": 2.6312, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.8288021087646484, + "learning_rate": 0.0015, + "loss": 2.6254, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 1.363772988319397, + "learning_rate": 0.0015, + "loss": 2.5977, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 1.2266892194747925, + "learning_rate": 0.0015, + "loss": 2.5532, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.9206980466842651, + "learning_rate": 0.0015, + "loss": 2.5316, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 1.0549575090408325, + "learning_rate": 0.0015, + "loss": 2.5158, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 1.217510461807251, + "learning_rate": 0.0015, + "loss": 2.4978, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.0540152788162231, + "learning_rate": 0.0015, + "loss": 2.4792, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 1.0417619943618774, + "learning_rate": 0.0015, + "loss": 2.4573, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.7437645196914673, + "learning_rate": 0.0015, + "loss": 2.442, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 1.1524747610092163, + "learning_rate": 0.0015, + "loss": 2.4248, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 1.0705971717834473, + "learning_rate": 0.0015, + "loss": 2.3973, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.9775382280349731, + "learning_rate": 0.0015, + "loss": 2.3833, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 1.025772213935852, + "learning_rate": 0.0015, + "loss": 2.3803, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 1.246941328048706, + "learning_rate": 0.0015, + "loss": 2.3622, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 1.0031028985977173, + "learning_rate": 0.0015, + "loss": 2.3334, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.5016298294067383, + "learning_rate": 0.0015, + "loss": 2.3327, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.7244430184364319, + "learning_rate": 0.0015, + "loss": 2.3008, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 1.213437795639038, + "learning_rate": 0.0015, + "loss": 2.2994, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 1.2152141332626343, + "learning_rate": 0.0015, + "loss": 2.2937, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.9512096047401428, + "learning_rate": 0.0015, + "loss": 2.2921, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.7877956628799438, + "learning_rate": 0.0015, + "loss": 2.256, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 1.007867693901062, + "learning_rate": 0.0015, + "loss": 2.2389, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 1.2216438055038452, + "learning_rate": 0.0015, + "loss": 2.2341, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.769692599773407, + "learning_rate": 0.0015, + "loss": 2.2007, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 1.202372431755066, + "learning_rate": 0.0015, + "loss": 2.2073, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.0290395021438599, + "learning_rate": 0.0015, + "loss": 2.2108, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 1.367793321609497, + "learning_rate": 0.0015, + "loss": 2.1828, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.8654493093490601, + "learning_rate": 0.0015, + "loss": 2.1673, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.8177367448806763, + "learning_rate": 0.0015, + "loss": 2.1687, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 1.0598950386047363, + "learning_rate": 0.0015, + "loss": 2.1564, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 1.063185691833496, + "learning_rate": 0.0015, + "loss": 2.1392, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.8926445245742798, + "learning_rate": 0.0015, + "loss": 2.1467, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.7596344947814941, + "learning_rate": 0.0015, + "loss": 2.1377, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.8228513598442078, + "learning_rate": 0.0015, + "loss": 2.1232, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.8619157075881958, + "learning_rate": 0.0015, + "loss": 2.108, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 2.402742862701416, + "learning_rate": 0.0015, + "loss": 2.1152, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.8077136278152466, + "learning_rate": 0.0015, + "loss": 2.135, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.8595909476280212, + "learning_rate": 0.0015, + "loss": 2.0909, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.8992576003074646, + "learning_rate": 0.0015, + "loss": 2.0899, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.1452020406723022, + "learning_rate": 0.0015, + "loss": 2.0698, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.9489206075668335, + "learning_rate": 0.0015, + "loss": 2.0649, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.6932376027107239, + "learning_rate": 0.0015, + "loss": 2.0725, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.9433043599128723, + "learning_rate": 0.0015, + "loss": 2.0514, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 1.7378337383270264, + "learning_rate": 0.0015, + "loss": 2.0728, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 1.3360079526901245, + "learning_rate": 0.0015, + "loss": 2.0565, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.9339075088500977, + "learning_rate": 0.0015, + "loss": 2.0525, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.6675604581832886, + "learning_rate": 0.0015, + "loss": 2.0403, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.6963586211204529, + "learning_rate": 0.0015, + "loss": 2.0174, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.756433367729187, + "learning_rate": 0.0015, + "loss": 2.0048, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.7605345845222473, + "learning_rate": 0.0015, + "loss": 2.0148, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.9260172843933105, + "learning_rate": 0.0015, + "loss": 2.0224, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 1.2491339445114136, + "learning_rate": 0.0015, + "loss": 2.0016, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.9123998880386353, + "learning_rate": 0.0015, + "loss": 1.9979, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 1.066710114479065, + "learning_rate": 0.0015, + "loss": 1.9967, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.6761780381202698, + "learning_rate": 0.0015, + "loss": 1.9925, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.7574045062065125, + "learning_rate": 0.0015, + "loss": 1.9863, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.8483901023864746, + "learning_rate": 0.0015, + "loss": 1.9792, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.9919297099113464, + "learning_rate": 0.0015, + "loss": 1.9766, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.9863075613975525, + "learning_rate": 0.0015, + "loss": 1.985, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 1.0680369138717651, + "learning_rate": 0.0015, + "loss": 1.9688, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.7299118041992188, + "learning_rate": 0.0015, + "loss": 1.9583, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.7109554409980774, + "learning_rate": 0.0015, + "loss": 1.9685, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.6925338506698608, + "learning_rate": 0.0015, + "loss": 1.9396, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.989055335521698, + "learning_rate": 0.0015, + "loss": 1.9474, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.7260894179344177, + "learning_rate": 0.0015, + "loss": 1.9499, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 1.1013668775558472, + "learning_rate": 0.0015, + "loss": 1.9332, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.6925954818725586, + "learning_rate": 0.0015, + "loss": 1.9296, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 1.757063865661621, + "learning_rate": 0.0015, + "loss": 1.9455, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 1.0348504781723022, + "learning_rate": 0.0015, + "loss": 1.9542, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.7209298610687256, + "learning_rate": 0.0015, + "loss": 1.919, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.7077058553695679, + "learning_rate": 0.0015, + "loss": 1.9186, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.6635832786560059, + "learning_rate": 0.0015, + "loss": 1.9155, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.8985444903373718, + "learning_rate": 0.0015, + "loss": 1.918, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.7381898164749146, + "learning_rate": 0.0015, + "loss": 1.9115, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.7176501750946045, + "learning_rate": 0.0015, + "loss": 1.9009, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.6653502583503723, + "learning_rate": 0.0015, + "loss": 1.8975, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.6939038634300232, + "learning_rate": 0.0015, + "loss": 1.9065, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.819861888885498, + "learning_rate": 0.0015, + "loss": 1.8948, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.7226906418800354, + "learning_rate": 0.0015, + "loss": 1.8988, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.9487333297729492, + "learning_rate": 0.0015, + "loss": 1.8923, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.8971943855285645, + "learning_rate": 0.0015, + "loss": 1.8775, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.7244747281074524, + "learning_rate": 0.0015, + "loss": 1.8967, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.753216028213501, + "learning_rate": 0.0015, + "loss": 1.8789, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.7540823221206665, + "learning_rate": 0.0015, + "loss": 1.8828, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.7866358757019043, + "learning_rate": 0.0015, + "loss": 1.8744, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.752935528755188, + "learning_rate": 0.0015, + "loss": 1.8599, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.6611055731773376, + "learning_rate": 0.0015, + "loss": 1.8637, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.2516565322875977, + "learning_rate": 0.0015, + "loss": 1.8793, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.6632084846496582, + "learning_rate": 0.0015, + "loss": 1.8685, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6862037181854248, + "learning_rate": 0.0015, + "loss": 1.8506, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.8717275261878967, + "learning_rate": 0.0015, + "loss": 1.8432, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 1.1524758338928223, + "learning_rate": 0.0015, + "loss": 1.866, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.9125797748565674, + "learning_rate": 0.0015, + "loss": 1.8598, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.8476076722145081, + "learning_rate": 0.0015, + "loss": 1.8439, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.9498160481452942, + "learning_rate": 0.0015, + "loss": 1.8459, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.7211402654647827, + "learning_rate": 0.0015, + "loss": 1.8615, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.6844839453697205, + "learning_rate": 0.0015, + "loss": 1.8446, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.8232640027999878, + "learning_rate": 0.0015, + "loss": 1.832, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.6936826705932617, + "learning_rate": 0.0015, + "loss": 1.8344, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.9277529716491699, + "learning_rate": 0.0015, + "loss": 1.8353, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.7764731049537659, + "learning_rate": 0.0015, + "loss": 1.831, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.9051993489265442, + "learning_rate": 0.0015, + "loss": 1.8301, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.6625475883483887, + "learning_rate": 0.0015, + "loss": 1.8343, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.6393569111824036, + "learning_rate": 0.0015, + "loss": 1.8297, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.6683023571968079, + "learning_rate": 0.0015, + "loss": 1.8235, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.7225092649459839, + "learning_rate": 0.0015, + "loss": 1.8168, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 1.3273886442184448, + "learning_rate": 0.0015, + "loss": 1.8285, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 1.3077683448791504, + "learning_rate": 0.0015, + "loss": 1.8139, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6626515984535217, + "learning_rate": 0.0015, + "loss": 1.812, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.7292690277099609, + "learning_rate": 0.0015, + "loss": 1.8165, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.7270740270614624, + "learning_rate": 0.0015, + "loss": 1.8093, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.9681549072265625, + "learning_rate": 0.0015, + "loss": 1.8034, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.6339258551597595, + "learning_rate": 0.0015, + "loss": 1.805, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.6470240950584412, + "learning_rate": 0.0015, + "loss": 1.8001, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.8807370662689209, + "learning_rate": 0.0015, + "loss": 1.8002, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.7718443274497986, + "learning_rate": 0.0015, + "loss": 1.8049, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.8173696994781494, + "learning_rate": 0.0015, + "loss": 1.7981, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.6859337687492371, + "learning_rate": 0.0015, + "loss": 1.7994, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.7179945707321167, + "learning_rate": 0.0015, + "loss": 1.7937, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.6422714591026306, + "learning_rate": 0.0015, + "loss": 1.7976, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.8165238499641418, + "learning_rate": 0.0015, + "loss": 1.7832, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6381688714027405, + "learning_rate": 0.0015, + "loss": 1.7716, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.6977247595787048, + "learning_rate": 0.0015, + "loss": 1.7763, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.6377383470535278, + "learning_rate": 0.0015, + "loss": 1.7759, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.9726520776748657, + "learning_rate": 0.0015, + "loss": 1.7675, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.6959405541419983, + "learning_rate": 0.0015, + "loss": 1.7814, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.8633522987365723, + "learning_rate": 0.0015, + "loss": 1.7783, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.7004960179328918, + "learning_rate": 0.0015, + "loss": 1.7804, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.7215518951416016, + "learning_rate": 0.0015, + "loss": 1.7773, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6039355397224426, + "learning_rate": 0.0015, + "loss": 1.7504, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.9246911406517029, + "learning_rate": 0.0015, + "loss": 1.7648, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.8042846918106079, + "learning_rate": 0.0015, + "loss": 1.7651, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.6565026044845581, + "learning_rate": 0.0015, + "loss": 1.7564, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.7790629267692566, + "learning_rate": 0.0015, + "loss": 1.7574, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.6511490345001221, + "learning_rate": 0.0015, + "loss": 1.7633, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.6874651312828064, + "learning_rate": 0.0015, + "loss": 1.7522, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.6518383622169495, + "learning_rate": 0.0015, + "loss": 1.7446, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.9337310194969177, + "learning_rate": 0.0015, + "loss": 1.7634, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 1.0004547834396362, + "learning_rate": 0.0015, + "loss": 1.7414, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.9592145681381226, + "learning_rate": 0.0015, + "loss": 1.7396, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.8522003889083862, + "learning_rate": 0.0015, + "loss": 1.7394, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.6941075921058655, + "learning_rate": 0.0015, + "loss": 1.7461, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.6232513785362244, + "learning_rate": 0.0015, + "loss": 1.7665, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 1.227797269821167, + "learning_rate": 0.0015, + "loss": 1.7449, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.689431369304657, + "learning_rate": 0.0015, + "loss": 1.7496, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.7479903101921082, + "learning_rate": 0.0015, + "loss": 1.7448, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.6916646361351013, + "learning_rate": 0.0015, + "loss": 1.7481, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.8803281188011169, + "learning_rate": 0.0015, + "loss": 1.7404, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.8555834889411926, + "learning_rate": 0.0015, + "loss": 1.7316, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.6191942691802979, + "learning_rate": 0.0015, + "loss": 1.7284, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.5880500078201294, + "learning_rate": 0.0015, + "loss": 1.7269, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.6119520664215088, + "learning_rate": 0.0015, + "loss": 1.7287, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.8929871916770935, + "learning_rate": 0.0015, + "loss": 1.731, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.8229356408119202, + "learning_rate": 0.0015, + "loss": 1.7355, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.9643657803535461, + "learning_rate": 0.0015, + "loss": 1.7365, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.6754857301712036, + "learning_rate": 0.0015, + "loss": 1.7198, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.7569988369941711, + "learning_rate": 0.0015, + "loss": 1.7248, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.7129053473472595, + "learning_rate": 0.0015, + "loss": 1.7223, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 1.1085187196731567, + "learning_rate": 0.0015, + "loss": 1.726, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.8156635761260986, + "learning_rate": 0.0015, + "loss": 1.7233, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.6475657224655151, + "learning_rate": 0.0015, + "loss": 1.7241, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.6763522028923035, + "learning_rate": 0.0015, + "loss": 1.7149, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.7413578629493713, + "learning_rate": 0.0015, + "loss": 1.7129, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.6201931834220886, + "learning_rate": 0.0015, + "loss": 1.7185, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 1.0813745260238647, + "learning_rate": 0.0015, + "loss": 1.7222, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.7272746562957764, + "learning_rate": 0.0015, + "loss": 1.7075, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.6317844986915588, + "learning_rate": 0.0015, + "loss": 1.7012, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.8212335705757141, + "learning_rate": 0.0015, + "loss": 1.7043, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.8446721434593201, + "learning_rate": 0.0015, + "loss": 1.7112, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.7635065317153931, + "learning_rate": 0.0015, + "loss": 1.7201, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.6259655952453613, + "learning_rate": 0.0015, + "loss": 1.707, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.6341089606285095, + "learning_rate": 0.0015, + "loss": 1.7077, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.6744656562805176, + "learning_rate": 0.0015, + "loss": 1.7092, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.7680793404579163, + "learning_rate": 0.0015, + "loss": 1.6997, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.855820894241333, + "learning_rate": 0.0015, + "loss": 1.6979, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.6359190940856934, + "learning_rate": 0.0015, + "loss": 1.7029, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.739136815071106, + "learning_rate": 0.0015, + "loss": 1.6946, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.6583918333053589, + "learning_rate": 0.0015, + "loss": 1.7005, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.6532536745071411, + "learning_rate": 0.0015, + "loss": 1.6911, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 1.0495470762252808, + "learning_rate": 0.0015, + "loss": 1.6959, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.700576901435852, + "learning_rate": 0.0015, + "loss": 1.6932, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6586419939994812, + "learning_rate": 0.0015, + "loss": 1.684, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.6587622761726379, + "learning_rate": 0.0015, + "loss": 1.6935, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.681542694568634, + "learning_rate": 0.0015, + "loss": 1.7028, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.6412506103515625, + "learning_rate": 0.0015, + "loss": 1.6956, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.7530674934387207, + "learning_rate": 0.0015, + "loss": 1.6761, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.6285898685455322, + "learning_rate": 0.0015, + "loss": 1.69, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 1.0234174728393555, + "learning_rate": 0.0015, + "loss": 1.6931, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.7023261189460754, + "learning_rate": 0.0015, + "loss": 1.6776, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.6849989295005798, + "learning_rate": 0.0015, + "loss": 1.6911, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.8580043911933899, + "learning_rate": 0.0015, + "loss": 1.6807, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.8025040030479431, + "learning_rate": 0.0015, + "loss": 1.682, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.6510624885559082, + "learning_rate": 0.0015, + "loss": 1.6767, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.8475005626678467, + "learning_rate": 0.0015, + "loss": 1.6703, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.5999197363853455, + "learning_rate": 0.0015, + "loss": 1.7013, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.6120131015777588, + "learning_rate": 0.0015, + "loss": 1.687, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.8520112633705139, + "learning_rate": 0.0015, + "loss": 1.67, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.6505146622657776, + "learning_rate": 0.0015, + "loss": 1.6715, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.8636083602905273, + "learning_rate": 0.0015, + "loss": 1.6736, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.6786522269248962, + "learning_rate": 0.0015, + "loss": 1.6814, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.6315372586250305, + "learning_rate": 0.0015, + "loss": 1.6611, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.6420019268989563, + "learning_rate": 0.0015, + "loss": 1.6601, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.9882775545120239, + "learning_rate": 0.0015, + "loss": 1.6627, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.7462952733039856, + "learning_rate": 0.0015, + "loss": 1.6721, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.6469263434410095, + "learning_rate": 0.0015, + "loss": 1.6693, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.714177131652832, + "learning_rate": 0.0015, + "loss": 1.6692, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 1.074702262878418, + "learning_rate": 0.0015, + "loss": 1.6695, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6026642918586731, + "learning_rate": 0.0015, + "loss": 1.6631, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.9359568953514099, + "learning_rate": 0.0015, + "loss": 1.6685, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.9025223851203918, + "learning_rate": 0.0015, + "loss": 1.6624, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.6915332674980164, + "learning_rate": 0.0015, + "loss": 1.665, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 1.0445406436920166, + "learning_rate": 0.0015, + "loss": 1.6697, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.8682367205619812, + "learning_rate": 0.0015, + "loss": 1.6582, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.6498146057128906, + "learning_rate": 0.0015, + "loss": 1.6709, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.773652970790863, + "learning_rate": 0.0015, + "loss": 1.6583, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.6476708054542542, + "learning_rate": 0.0015, + "loss": 1.6601, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.7904232740402222, + "learning_rate": 0.0015, + "loss": 1.6499, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.961468517780304, + "learning_rate": 0.0015, + "loss": 1.6616, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.6232831478118896, + "learning_rate": 0.0015, + "loss": 1.6544, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.6506131291389465, + "learning_rate": 0.0015, + "loss": 1.656, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.7402883768081665, + "learning_rate": 0.0015, + "loss": 1.6513, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.9401949644088745, + "learning_rate": 0.0015, + "loss": 1.6607, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.8450409770011902, + "learning_rate": 0.0015, + "loss": 1.6536, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.6884193420410156, + "learning_rate": 0.0015, + "loss": 1.6565, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.6372545957565308, + "learning_rate": 0.0015, + "loss": 1.6506, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.6220951676368713, + "learning_rate": 0.0015, + "loss": 1.6522, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.8408181071281433, + "learning_rate": 0.0015, + "loss": 1.6557, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.6427624225616455, + "learning_rate": 0.0015, + "loss": 1.6569, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.8172615170478821, + "learning_rate": 0.0015, + "loss": 1.654, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.9558082222938538, + "learning_rate": 0.0015, + "loss": 1.6505, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.7412358522415161, + "learning_rate": 0.0015, + "loss": 1.6441, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.6658812165260315, + "learning_rate": 0.0015, + "loss": 1.6528, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.9503615498542786, + "learning_rate": 0.0015, + "loss": 1.6444, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.5820896625518799, + "learning_rate": 0.0015, + "loss": 1.6394, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.737178385257721, + "learning_rate": 0.0015, + "loss": 1.6411, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.6155056357383728, + "learning_rate": 0.0015, + "loss": 1.6426, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.8358173370361328, + "learning_rate": 0.0015, + "loss": 1.6418, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.6908949017524719, + "learning_rate": 0.0015, + "loss": 1.6366, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.5848388671875, + "learning_rate": 0.0015, + "loss": 1.6412, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.7473011612892151, + "learning_rate": 0.0015, + "loss": 1.6435, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.7751439809799194, + "learning_rate": 0.0015, + "loss": 1.625, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.9881758689880371, + "learning_rate": 0.0015, + "loss": 1.6345, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.9220196008682251, + "learning_rate": 0.0015, + "loss": 1.641, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.6724462509155273, + "learning_rate": 0.0015, + "loss": 1.6508, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.6800593733787537, + "learning_rate": 0.0015, + "loss": 1.6453, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.6859573125839233, + "learning_rate": 0.0015, + "loss": 1.6378, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.785236120223999, + "learning_rate": 0.0015, + "loss": 1.6414, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.6508416533470154, + "learning_rate": 0.0015, + "loss": 1.6394, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.7265350222587585, + "learning_rate": 0.0015, + "loss": 1.6387, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.7346289753913879, + "learning_rate": 0.0015, + "loss": 1.642, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.5750663876533508, + "learning_rate": 0.0015, + "loss": 1.6179, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.8585373163223267, + "learning_rate": 0.0015, + "loss": 1.6211, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.6604123115539551, + "learning_rate": 0.0015, + "loss": 1.6266, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.7079337239265442, + "learning_rate": 0.0015, + "loss": 1.6293, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.9872538447380066, + "learning_rate": 0.0015, + "loss": 1.6332, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.7314916849136353, + "learning_rate": 0.0015, + "loss": 1.6207, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.6987850069999695, + "learning_rate": 0.0015, + "loss": 1.6224, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.658786416053772, + "learning_rate": 0.0015, + "loss": 1.6279, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 1.8567450046539307, + "learning_rate": 0.0015, + "loss": 1.6421, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.7476452589035034, + "learning_rate": 0.0015, + "loss": 1.625, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.6860174536705017, + "learning_rate": 0.0015, + "loss": 1.6179, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.8042653203010559, + "learning_rate": 0.0015, + "loss": 1.6247, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.8760249018669128, + "learning_rate": 0.0015, + "loss": 1.633, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 1.5029122829437256, + "learning_rate": 0.0015, + "loss": 1.6288, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.8444796800613403, + "learning_rate": 0.0015, + "loss": 1.6184, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.6066569685935974, + "learning_rate": 0.0015, + "loss": 1.6102, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.6329298615455627, + "learning_rate": 0.0015, + "loss": 1.6283, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.6336819529533386, + "learning_rate": 0.0015, + "loss": 1.6086, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.6879556775093079, + "learning_rate": 0.0015, + "loss": 1.6157, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.7391799092292786, + "learning_rate": 0.0015, + "loss": 1.6197, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.5708714723587036, + "learning_rate": 0.0015, + "loss": 1.6042, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.5482274293899536, + "learning_rate": 0.0015, + "loss": 1.6152, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.5858886241912842, + "learning_rate": 0.0015, + "loss": 1.6097, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.7412643432617188, + "learning_rate": 0.0015, + "loss": 1.6219, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5655277967453003, + "learning_rate": 0.0015, + "loss": 1.6132, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.7304298877716064, + "learning_rate": 0.0015, + "loss": 1.6011, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.8199241757392883, + "learning_rate": 0.0015, + "loss": 1.6198, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.5427016615867615, + "learning_rate": 0.0015, + "loss": 1.6049, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.5689124464988708, + "learning_rate": 0.0015, + "loss": 1.6085, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.6812858581542969, + "learning_rate": 0.0015, + "loss": 1.6118, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.5897095203399658, + "learning_rate": 0.0015, + "loss": 1.6239, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.9220945835113525, + "learning_rate": 0.0015, + "loss": 1.6001, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.9273051619529724, + "learning_rate": 0.0015, + "loss": 1.6101, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.6259636282920837, + "learning_rate": 0.0015, + "loss": 1.6116, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.6592155694961548, + "learning_rate": 0.0015, + "loss": 1.6147, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.6287351846694946, + "learning_rate": 0.0015, + "loss": 1.6056, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.982848584651947, + "learning_rate": 0.0015, + "loss": 1.6079, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.6584777235984802, + "learning_rate": 0.0015, + "loss": 1.5998, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.6095257997512817, + "learning_rate": 0.0015, + "loss": 1.6171, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.6312419772148132, + "learning_rate": 0.0015, + "loss": 1.6054, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.5978062152862549, + "learning_rate": 0.0015, + "loss": 1.5997, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.5813004970550537, + "learning_rate": 0.0015, + "loss": 1.5785, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.6352910995483398, + "learning_rate": 0.0015, + "loss": 1.603, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5998354554176331, + "learning_rate": 0.0015, + "loss": 1.5949, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.6111003756523132, + "learning_rate": 0.0015, + "loss": 1.6095, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.566878616809845, + "learning_rate": 0.0015, + "loss": 1.6106, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.6212012767791748, + "learning_rate": 0.0015, + "loss": 1.5931, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.6310331225395203, + "learning_rate": 0.0015, + "loss": 1.6032, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.6017879247665405, + "learning_rate": 0.0015, + "loss": 1.6022, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.6400523781776428, + "learning_rate": 0.0015, + "loss": 1.607, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.5609440207481384, + "learning_rate": 0.0015, + "loss": 1.599, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.6740028262138367, + "learning_rate": 0.0015, + "loss": 1.6011, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.7476164102554321, + "learning_rate": 0.0015, + "loss": 1.5933, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.8268814086914062, + "learning_rate": 0.0015, + "loss": 1.5934, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.67869633436203, + "learning_rate": 0.0015, + "loss": 1.6028, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.5998952984809875, + "learning_rate": 0.0015, + "loss": 1.5976, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.8241248726844788, + "learning_rate": 0.0015, + "loss": 1.5924, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.6471379995346069, + "learning_rate": 0.0015, + "loss": 1.5999, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6880522966384888, + "learning_rate": 0.0015, + "loss": 1.5978, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5927819013595581, + "learning_rate": 0.0015, + "loss": 1.5752, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.623607337474823, + "learning_rate": 0.0015, + "loss": 1.5933, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.7323747277259827, + "learning_rate": 0.0015, + "loss": 1.5918, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.6532187461853027, + "learning_rate": 0.0015, + "loss": 1.588, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.5976588129997253, + "learning_rate": 0.0015, + "loss": 1.5803, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.6030303835868835, + "learning_rate": 0.0015, + "loss": 1.5976, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.6205447912216187, + "learning_rate": 0.0015, + "loss": 1.6031, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.7707657814025879, + "learning_rate": 0.0015, + "loss": 1.5968, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.651380717754364, + "learning_rate": 0.0015, + "loss": 1.5739, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6881337761878967, + "learning_rate": 0.0015, + "loss": 1.5931, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.589093804359436, + "learning_rate": 0.0015, + "loss": 1.5906, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.774224579334259, + "learning_rate": 0.0015, + "loss": 1.6007, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.579230546951294, + "learning_rate": 0.0015, + "loss": 1.5936, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.6935818195343018, + "learning_rate": 0.0015, + "loss": 1.5876, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.6207526326179504, + "learning_rate": 0.0015, + "loss": 1.5856, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.8157477974891663, + "learning_rate": 0.0015, + "loss": 1.5808, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.5693010687828064, + "learning_rate": 0.0015, + "loss": 1.5799, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.6131789088249207, + "learning_rate": 0.0015, + "loss": 1.5949, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.8241103887557983, + "learning_rate": 0.0015, + "loss": 1.5764, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.7060911655426025, + "learning_rate": 0.0015, + "loss": 1.5798, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.6012842059135437, + "learning_rate": 0.0015, + "loss": 1.5833, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.7175701260566711, + "learning_rate": 0.0015, + "loss": 1.5823, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.6527909636497498, + "learning_rate": 0.0015, + "loss": 1.5838, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.5886304974555969, + "learning_rate": 0.0015, + "loss": 1.5878, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.5935819149017334, + "learning_rate": 0.0015, + "loss": 1.5827, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.5793882012367249, + "learning_rate": 0.0015, + "loss": 1.5743, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.6356774568557739, + "learning_rate": 0.0015, + "loss": 1.5783, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.6113144159317017, + "learning_rate": 0.0015, + "loss": 1.5893, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6362749934196472, + "learning_rate": 0.0015, + "loss": 1.5917, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.8249803781509399, + "learning_rate": 0.0015, + "loss": 1.5781, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.8612954616546631, + "learning_rate": 0.0015, + "loss": 1.5608, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 1.7621455192565918, + "learning_rate": 0.0015, + "loss": 1.5877, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 1.3036092519760132, + "learning_rate": 0.0015, + "loss": 1.5916, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.9393406510353088, + "learning_rate": 0.0015, + "loss": 1.5864, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.9737085103988647, + "learning_rate": 0.0015, + "loss": 1.5715, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.8785778880119324, + "learning_rate": 0.0015, + "loss": 1.5872, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5576906800270081, + "learning_rate": 0.0015, + "loss": 1.5743, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.5176705718040466, + "learning_rate": 0.0015, + "loss": 1.5734, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.6921336650848389, + "learning_rate": 0.0015, + "loss": 1.5808, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.8691692352294922, + "learning_rate": 0.0015, + "loss": 1.5723, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.6363836526870728, + "learning_rate": 0.0015, + "loss": 1.5627, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.6024162769317627, + "learning_rate": 0.0015, + "loss": 1.5676, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.628873884677887, + "learning_rate": 0.0015, + "loss": 1.5875, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.5797845721244812, + "learning_rate": 0.0015, + "loss": 1.5663, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.7347355484962463, + "learning_rate": 0.0015, + "loss": 1.5797, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.8189390897750854, + "learning_rate": 0.0015, + "loss": 1.5715, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.5799738764762878, + "learning_rate": 0.0015, + "loss": 1.5603, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.6734434366226196, + "learning_rate": 0.0015, + "loss": 1.5592, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 1.40089750289917, + "learning_rate": 0.0015, + "loss": 1.563, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.7925665974617004, + "learning_rate": 0.0015, + "loss": 1.5689, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.7021671533584595, + "learning_rate": 0.0015, + "loss": 1.5624, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.5486526489257812, + "learning_rate": 0.0015, + "loss": 1.5664, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.6393633484840393, + "learning_rate": 0.0015, + "loss": 1.5618, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.5666303634643555, + "learning_rate": 0.0015, + "loss": 1.5677, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.6497644782066345, + "learning_rate": 0.0015, + "loss": 1.5697, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.5655925273895264, + "learning_rate": 0.0015, + "loss": 1.5562, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.8136340975761414, + "learning_rate": 0.0015, + "loss": 1.5709, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.874515950679779, + "learning_rate": 0.0015, + "loss": 1.5607, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.5788293480873108, + "learning_rate": 0.0015, + "loss": 1.576, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.5628449320793152, + "learning_rate": 0.0015, + "loss": 1.5605, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.5451663136482239, + "learning_rate": 0.0015, + "loss": 1.5619, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.5687470436096191, + "learning_rate": 0.0015, + "loss": 1.5556, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.6235162019729614, + "learning_rate": 0.0015, + "loss": 1.5632, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5888081192970276, + "learning_rate": 0.0015, + "loss": 1.5638, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 1.0608152151107788, + "learning_rate": 0.0015, + "loss": 1.5717, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.8200925588607788, + "learning_rate": 0.0015, + "loss": 1.5724, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.6000164151191711, + "learning_rate": 0.0015, + "loss": 1.5819, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.632230281829834, + "learning_rate": 0.0015, + "loss": 1.5607, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.6781566143035889, + "learning_rate": 0.0015, + "loss": 1.5579, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.6161074042320251, + "learning_rate": 0.0015, + "loss": 1.5549, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.7849648594856262, + "learning_rate": 0.0015, + "loss": 1.5565, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.7117999196052551, + "learning_rate": 0.0015, + "loss": 1.5533, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.559719443321228, + "learning_rate": 0.0015, + "loss": 1.5558, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 1.0182809829711914, + "learning_rate": 0.0015, + "loss": 1.5608, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.5550006628036499, + "learning_rate": 0.0015, + "loss": 1.5536, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.5445747375488281, + "learning_rate": 0.0015, + "loss": 1.554, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5958618521690369, + "learning_rate": 0.0015, + "loss": 1.5497, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.5892060995101929, + "learning_rate": 0.0015, + "loss": 1.559, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.5875847339630127, + "learning_rate": 0.0015, + "loss": 1.5576, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.7794286608695984, + "learning_rate": 0.0015, + "loss": 1.5495, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.5659272074699402, + "learning_rate": 0.0015, + "loss": 1.5555, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.5580011010169983, + "learning_rate": 0.0015, + "loss": 1.55, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.6203771829605103, + "learning_rate": 0.0015, + "loss": 1.5515, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.682816743850708, + "learning_rate": 0.0015, + "loss": 1.5575, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.6003069281578064, + "learning_rate": 0.0015, + "loss": 1.5646, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.5731751918792725, + "learning_rate": 0.0015, + "loss": 1.5498, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 1.3089572191238403, + "learning_rate": 0.0015, + "loss": 1.5434, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.8871122598648071, + "learning_rate": 0.0015, + "loss": 1.557, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.7614080905914307, + "learning_rate": 0.0015, + "loss": 1.5533, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5850949883460999, + "learning_rate": 0.0015, + "loss": 1.5542, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.6122797727584839, + "learning_rate": 0.0015, + "loss": 1.5463, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.6340963840484619, + "learning_rate": 0.0015, + "loss": 1.5578, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.7485887408256531, + "learning_rate": 0.0015, + "loss": 1.5457, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.8456482887268066, + "learning_rate": 0.0015, + "loss": 1.5487, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.5885475277900696, + "learning_rate": 0.0015, + "loss": 1.5621, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.7908206582069397, + "learning_rate": 0.0015, + "loss": 1.5433, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.5468949675559998, + "learning_rate": 0.0015, + "loss": 1.5446, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5600817799568176, + "learning_rate": 0.0015, + "loss": 1.5413, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.5512800216674805, + "learning_rate": 0.0015, + "loss": 1.5467, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.7056983113288879, + "learning_rate": 0.0015, + "loss": 1.5638, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.9344276785850525, + "learning_rate": 0.0015, + "loss": 1.5457, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.8951518535614014, + "learning_rate": 0.0015, + "loss": 1.5528, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.6155388355255127, + "learning_rate": 0.0015, + "loss": 1.5467, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.7321780323982239, + "learning_rate": 0.0015, + "loss": 1.5539, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.7223412394523621, + "learning_rate": 0.0015, + "loss": 1.542, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.580058753490448, + "learning_rate": 0.0015, + "loss": 1.5512, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.626334547996521, + "learning_rate": 0.0015, + "loss": 1.5538, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.9320026636123657, + "learning_rate": 0.0015, + "loss": 1.5435, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.6046232581138611, + "learning_rate": 0.0015, + "loss": 1.5446, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.7325782775878906, + "learning_rate": 0.0015, + "loss": 1.5463, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.571357786655426, + "learning_rate": 0.0015, + "loss": 1.5328, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.5945020318031311, + "learning_rate": 0.0015, + "loss": 1.5487, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.6285760402679443, + "learning_rate": 0.0015, + "loss": 1.5507, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.5691685676574707, + "learning_rate": 0.0015, + "loss": 1.5472, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.5442836880683899, + "learning_rate": 0.0015, + "loss": 1.537, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.6255428194999695, + "learning_rate": 0.0015, + "loss": 1.5193, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.554628849029541, + "learning_rate": 0.0015, + "loss": 1.5447, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.6360525488853455, + "learning_rate": 0.0015, + "loss": 1.5315, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.7742726802825928, + "learning_rate": 0.0015, + "loss": 1.5487, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.8166155219078064, + "learning_rate": 0.0015, + "loss": 1.5497, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.5890775918960571, + "learning_rate": 0.0015, + "loss": 1.527, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6394803524017334, + "learning_rate": 0.0015, + "loss": 1.5464, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6207756996154785, + "learning_rate": 0.0015, + "loss": 1.5459, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.5840123891830444, + "learning_rate": 0.0015, + "loss": 1.5507, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.5675470232963562, + "learning_rate": 0.0015, + "loss": 1.5353, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.5648407340049744, + "learning_rate": 0.0015, + "loss": 1.5372, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5731132626533508, + "learning_rate": 0.0015, + "loss": 1.5304, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.8239709138870239, + "learning_rate": 0.0015, + "loss": 1.5294, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.8988879919052124, + "learning_rate": 0.0015, + "loss": 1.5411, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.5612457990646362, + "learning_rate": 0.0015, + "loss": 1.5405, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.7755550146102905, + "learning_rate": 0.0015, + "loss": 1.5394, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.5818630456924438, + "learning_rate": 0.0015, + "loss": 1.5331, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.5022954344749451, + "learning_rate": 0.0015, + "loss": 1.5316, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.9314385056495667, + "learning_rate": 0.0015, + "loss": 1.5524, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.7994999885559082, + "learning_rate": 0.0015, + "loss": 1.5383, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.6273106932640076, + "learning_rate": 0.0015, + "loss": 1.5322, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.600748598575592, + "learning_rate": 0.0015, + "loss": 1.5433, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.7291206121444702, + "learning_rate": 0.0015, + "loss": 1.527, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.7947571873664856, + "learning_rate": 0.0015, + "loss": 1.5326, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.6562824845314026, + "learning_rate": 0.0015, + "loss": 1.5337, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.6121402382850647, + "learning_rate": 0.0015, + "loss": 1.554, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.9882345795631409, + "learning_rate": 0.0015, + "loss": 1.5372, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.6104786992073059, + "learning_rate": 0.0015, + "loss": 1.5303, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.8607736229896545, + "learning_rate": 0.0015, + "loss": 1.5444, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.6029435992240906, + "learning_rate": 0.0015, + "loss": 1.5298, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.7495979070663452, + "learning_rate": 0.0015, + "loss": 1.5225, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.6200342178344727, + "learning_rate": 0.0015, + "loss": 1.5405, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.5241333842277527, + "learning_rate": 0.0015, + "loss": 1.5306, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.5704604387283325, + "learning_rate": 0.0015, + "loss": 1.5394, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6147574782371521, + "learning_rate": 0.0015, + "loss": 1.5335, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.6230430006980896, + "learning_rate": 0.0015, + "loss": 1.5283, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.678521454334259, + "learning_rate": 0.0015, + "loss": 1.5343, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.8999406695365906, + "learning_rate": 0.0015, + "loss": 1.5324, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.7650752067565918, + "learning_rate": 0.0015, + "loss": 1.5426, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.7141919732093811, + "learning_rate": 0.0015, + "loss": 1.5218, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.5926218628883362, + "learning_rate": 0.0015, + "loss": 1.5294, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.6089852452278137, + "learning_rate": 0.0015, + "loss": 1.5233, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.7046988010406494, + "learning_rate": 0.0015, + "loss": 1.5261, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.6000553369522095, + "learning_rate": 0.0015, + "loss": 1.5373, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.5103377103805542, + "learning_rate": 0.0015, + "loss": 1.5296, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.7494931221008301, + "learning_rate": 0.0015, + "loss": 1.5253, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.5808805823326111, + "learning_rate": 0.0015, + "loss": 1.5073, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5486202239990234, + "learning_rate": 0.0015, + "loss": 1.5374, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.5495449900627136, + "learning_rate": 0.0015, + "loss": 1.5272, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.7107338905334473, + "learning_rate": 0.0015, + "loss": 1.5377, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.6358274817466736, + "learning_rate": 0.0015, + "loss": 1.526, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.5813426375389099, + "learning_rate": 0.0015, + "loss": 1.5372, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.7069512009620667, + "learning_rate": 0.0015, + "loss": 1.529, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.5345435738563538, + "learning_rate": 0.0015, + "loss": 1.5103, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.5351413488388062, + "learning_rate": 0.0015, + "loss": 1.5245, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.672135055065155, + "learning_rate": 0.0015, + "loss": 1.5229, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5577877163887024, + "learning_rate": 0.0015, + "loss": 1.5213, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.6954469084739685, + "learning_rate": 0.0015, + "loss": 1.5422, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.721547544002533, + "learning_rate": 0.0015, + "loss": 1.5346, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5934624075889587, + "learning_rate": 0.0015, + "loss": 1.5253, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.8245863318443298, + "learning_rate": 0.0015, + "loss": 1.5312, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.632466733455658, + "learning_rate": 0.0015, + "loss": 1.5377, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.7374550104141235, + "learning_rate": 0.0015, + "loss": 1.5187, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.8513514399528503, + "learning_rate": 0.0015, + "loss": 1.5297, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.7695537209510803, + "learning_rate": 0.0015, + "loss": 1.5163, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.746254026889801, + "learning_rate": 0.0015, + "loss": 1.5258, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.5853145122528076, + "learning_rate": 0.0015, + "loss": 1.5185, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.5808828473091125, + "learning_rate": 0.0015, + "loss": 1.5253, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.7577324509620667, + "learning_rate": 0.0015, + "loss": 1.5204, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.5829281210899353, + "learning_rate": 0.0015, + "loss": 1.5279, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.8018589019775391, + "learning_rate": 0.0015, + "loss": 1.532, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.6416056752204895, + "learning_rate": 0.0015, + "loss": 1.5254, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.7368462681770325, + "learning_rate": 0.0015, + "loss": 1.5221, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.541665256023407, + "learning_rate": 0.0015, + "loss": 1.5287, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.7606929540634155, + "learning_rate": 0.0015, + "loss": 1.518, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.647328794002533, + "learning_rate": 0.0015, + "loss": 1.5171, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.8251186013221741, + "learning_rate": 0.0015, + "loss": 1.5291, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.6676337122917175, + "learning_rate": 0.0015, + "loss": 1.5206, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.6196624040603638, + "learning_rate": 0.0015, + "loss": 1.5094, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.8831680417060852, + "learning_rate": 0.0015, + "loss": 1.5197, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.5513179898262024, + "learning_rate": 0.0015, + "loss": 1.5197, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.5404736399650574, + "learning_rate": 0.0015, + "loss": 1.5132, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.7697404026985168, + "learning_rate": 0.0015, + "loss": 1.516, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.5695388317108154, + "learning_rate": 0.0015, + "loss": 1.5182, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.636220395565033, + "learning_rate": 0.0015, + "loss": 1.5201, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.5687567591667175, + "learning_rate": 0.0015, + "loss": 1.5111, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.5303071141242981, + "learning_rate": 0.0015, + "loss": 1.5107, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.8346840143203735, + "learning_rate": 0.0015, + "loss": 1.5094, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6687101125717163, + "learning_rate": 0.0015, + "loss": 1.5143, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.6760293245315552, + "learning_rate": 0.0015, + "loss": 1.5148, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.6732572913169861, + "learning_rate": 0.0015, + "loss": 1.5144, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.8290615677833557, + "learning_rate": 0.0015, + "loss": 1.5165, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.5455368757247925, + "learning_rate": 0.0015, + "loss": 1.5232, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.5541859865188599, + "learning_rate": 0.0015, + "loss": 1.5133, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.7072927951812744, + "learning_rate": 0.0015, + "loss": 1.518, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.5367401242256165, + "learning_rate": 0.0015, + "loss": 1.5118, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.5651088356971741, + "learning_rate": 0.0015, + "loss": 1.5039, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.5084605813026428, + "learning_rate": 0.0015, + "loss": 1.5069, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.6395454406738281, + "learning_rate": 0.0015, + "loss": 1.5226, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 1.0206706523895264, + "learning_rate": 0.0015, + "loss": 1.5269, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5466570854187012, + "learning_rate": 0.0015, + "loss": 1.5075, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.8130196928977966, + "learning_rate": 0.0015, + "loss": 1.5113, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.7004330158233643, + "learning_rate": 0.0015, + "loss": 1.5151, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.5617148876190186, + "learning_rate": 0.0015, + "loss": 1.5074, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.6354098916053772, + "learning_rate": 0.0015, + "loss": 1.5117, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.6242541670799255, + "learning_rate": 0.0015, + "loss": 1.5189, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.6715850234031677, + "learning_rate": 0.0015, + "loss": 1.5157, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.5890056490898132, + "learning_rate": 0.0015, + "loss": 1.5265, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.6360265612602234, + "learning_rate": 0.0015, + "loss": 1.52, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.6696105599403381, + "learning_rate": 0.0015, + "loss": 1.5175, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.5383630990982056, + "learning_rate": 0.0015, + "loss": 1.5218, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.7002443671226501, + "learning_rate": 0.0015, + "loss": 1.5228, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.5610836744308472, + "learning_rate": 0.0015, + "loss": 1.5001, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.5491235852241516, + "learning_rate": 0.0015, + "loss": 1.5048, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.5745838284492493, + "learning_rate": 0.0015, + "loss": 1.5103, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5973967909812927, + "learning_rate": 0.0015, + "loss": 1.5142, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.7536993622779846, + "learning_rate": 0.0015, + "loss": 1.5092, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.5494557619094849, + "learning_rate": 0.0015, + "loss": 1.5117, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.8905473947525024, + "learning_rate": 0.0015, + "loss": 1.4849, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.5961906313896179, + "learning_rate": 0.0015, + "loss": 1.4959, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.7787330746650696, + "learning_rate": 0.0015, + "loss": 1.5001, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.7014888525009155, + "learning_rate": 0.0015, + "loss": 1.517, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.7262461185455322, + "learning_rate": 0.0015, + "loss": 1.5071, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.550659716129303, + "learning_rate": 0.0015, + "loss": 1.5077, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.5086367726325989, + "learning_rate": 0.0015, + "loss": 1.5111, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.881079375743866, + "learning_rate": 0.0015, + "loss": 1.5241, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.6603014469146729, + "learning_rate": 0.0015, + "loss": 1.5025, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.5636839866638184, + "learning_rate": 0.0015, + "loss": 1.5192, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.5623931884765625, + "learning_rate": 0.0015, + "loss": 1.5224, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.5545900464057922, + "learning_rate": 0.0015, + "loss": 1.5094, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5038663744926453, + "learning_rate": 0.0015, + "loss": 1.5034, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.634153425693512, + "learning_rate": 0.0015, + "loss": 1.5147, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.7164548635482788, + "learning_rate": 0.0015, + "loss": 1.506, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.6937264800071716, + "learning_rate": 0.0015, + "loss": 1.5062, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.5849927663803101, + "learning_rate": 0.0015, + "loss": 1.5028, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.6183120608329773, + "learning_rate": 0.0015, + "loss": 1.4944, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.6934881210327148, + "learning_rate": 0.0015, + "loss": 1.4992, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.5181916356086731, + "learning_rate": 0.0015, + "loss": 1.5054, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.5753829479217529, + "learning_rate": 0.0015, + "loss": 1.4961, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.5150362849235535, + "learning_rate": 0.0015, + "loss": 1.5038, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5351850390434265, + "learning_rate": 0.0015, + "loss": 1.5054, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.6032434701919556, + "learning_rate": 0.0015, + "loss": 1.4895, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.6674609780311584, + "learning_rate": 0.0015, + "loss": 1.5079, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.6922264099121094, + "learning_rate": 0.0015, + "loss": 1.5173, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.7690093517303467, + "learning_rate": 0.0015, + "loss": 1.5061, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.859092652797699, + "learning_rate": 0.0015, + "loss": 1.5093, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.5710610151290894, + "learning_rate": 0.0015, + "loss": 1.4942, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.8480361104011536, + "learning_rate": 0.0015, + "loss": 1.511, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.5757660269737244, + "learning_rate": 0.0015, + "loss": 1.491, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.5221954584121704, + "learning_rate": 0.0015, + "loss": 1.5079, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.5378153920173645, + "learning_rate": 0.0015, + "loss": 1.4992, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.5231079459190369, + "learning_rate": 0.0015, + "loss": 1.5003, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.510814368724823, + "learning_rate": 0.0015, + "loss": 1.4984, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.651321530342102, + "learning_rate": 0.0015, + "loss": 1.4928, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.621930718421936, + "learning_rate": 0.0015, + "loss": 1.5054, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.9920527935028076, + "learning_rate": 0.0015, + "loss": 1.5025, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.4889945387840271, + "learning_rate": 0.0015, + "loss": 1.4995, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.5933585166931152, + "learning_rate": 0.0015, + "loss": 1.497, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.5071729421615601, + "learning_rate": 0.0015, + "loss": 1.5068, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.5711946487426758, + "learning_rate": 0.0015, + "loss": 1.5037, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.8071870803833008, + "learning_rate": 0.0014834368975312174, + "loss": 1.4822, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.8835534453392029, + "learning_rate": 0.0014629899726345957, + "loss": 1.5064, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.5751279592514038, + "learning_rate": 0.0014428248775471316, + "loss": 1.5064, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.5673179626464844, + "learning_rate": 0.00142293772767289, + "loss": 1.4965, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.6210446357727051, + "learning_rate": 0.001403324691959192, + "loss": 1.4913, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.6187479496002197, + "learning_rate": 0.0013839819921586025, + "loss": 1.4976, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.5539543032646179, + "learning_rate": 0.0013649059021010894, + "loss": 1.486, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.5763696432113647, + "learning_rate": 0.0013460927469762154, + "loss": 1.4909, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.5042856931686401, + "learning_rate": 0.0013275389026252255, + "loss": 1.4913, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.6170564293861389, + "learning_rate": 0.0013092407948428887, + "loss": 1.4834, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.5373573303222656, + "learning_rate": 0.001291194898688966, + "loss": 1.4911, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.7562010288238525, + "learning_rate": 0.001273397737809166, + "loss": 1.4895, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.6941890716552734, + "learning_rate": 0.001255845883765463, + "loss": 1.485, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.5045270919799805, + "learning_rate": 0.001238535955375642, + "loss": 1.4783, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.4871998727321625, + "learning_rate": 0.0012214646180619506, + "loss": 1.4748, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5172607898712158, + "learning_rate": 0.001204628583208727, + "loss": 1.4706, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.5898618102073669, + "learning_rate": 0.0011880246075288827, + "loss": 1.4782, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.5182415843009949, + "learning_rate": 0.001171649492439115, + "loss": 1.4702, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.49977177381515503, + "learning_rate": 0.0011555000834437364, + "loss": 1.4745, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.8687050938606262, + "learning_rate": 0.0011395732695269908, + "loss": 1.4714, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.5547814965248108, + "learning_rate": 0.0011238659825537505, + "loss": 1.4521, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.6606083512306213, + "learning_rate": 0.0011083751966784717, + "loss": 1.4589, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.6311693787574768, + "learning_rate": 0.0010930979277622953, + "loss": 1.4735, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.5820292234420776, + "learning_rate": 0.0010780312327981854, + "loss": 1.4718, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.8019455671310425, + "learning_rate": 0.0010631722093439888, + "loss": 1.4671, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.761712908744812, + "learning_rate": 0.00104851799496331, + "loss": 1.4531, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.514397144317627, + "learning_rate": 0.0010340657666740914, + "loss": 1.4596, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.49260208010673523, + "learning_rate": 0.0010198127404047975, + "loss": 1.4427, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.5700627565383911, + "learning_rate": 0.0010057561704580897, + "loss": 1.4506, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.4923916459083557, + "learning_rate": 0.0009918933489818985, + "loss": 1.4626, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.6934185028076172, + "learning_rate": 0.0009782216054477827, + "loss": 1.4533, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.8038368821144104, + "learning_rate": 0.0009647383061364801, + "loss": 1.4629, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5632407665252686, + "learning_rate": 0.0009514408536305495, + "loss": 1.4534, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.6466336250305176, + "learning_rate": 0.0009383266863140042, + "loss": 1.4668, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.6142919659614563, + "learning_rate": 0.000925393277878844, + "loss": 1.4675, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.5444507598876953, + "learning_rate": 0.0009126381368383879, + "loss": 1.4484, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.5479179620742798, + "learning_rate": 0.0009000588060473156, + "loss": 1.438, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.6018847823143005, + "learning_rate": 0.0008876528622283235, + "loss": 1.4504, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.5311291217803955, + "learning_rate": 0.0008754179155053053, + "loss": 1.4477, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.5240741968154907, + "learning_rate": 0.0008633516089429683, + "loss": 1.4453, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.4696906805038452, + "learning_rate": 0.0008514516180927928, + "loss": 1.4399, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.6514448523521423, + "learning_rate": 0.0008397156505452524, + "loss": 1.4389, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.49357330799102783, + "learning_rate": 0.0008281414454882051, + "loss": 1.4449, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.5959960222244263, + "learning_rate": 0.0008167267732713704, + "loss": 1.4476, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.5163862109184265, + "learning_rate": 0.0008054694349768117, + "loss": 1.4315, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.5023782849311829, + "learning_rate": 0.0007943672619953359, + "loss": 1.4392, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.5141021609306335, + "learning_rate": 0.0007834181156087356, + "loss": 1.4311, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.5499473214149475, + "learning_rate": 0.0007726198865777852, + "loss": 1.4371, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.6825447678565979, + "learning_rate": 0.0007619704947359191, + "loss": 1.4288, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.47925615310668945, + "learning_rate": 0.0007514678885885087, + "loss": 1.4312, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.48817384243011475, + "learning_rate": 0.0007411100449176633, + "loss": 1.4313, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.5127274990081787, + "learning_rate": 0.0007308949683924791, + "loss": 1.4353, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.6060632467269897, + "learning_rate": 0.000720820691184658, + "loss": 1.4261, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.553632378578186, + "learning_rate": 0.0007108852725894269, + "loss": 1.4253, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.4965393543243408, + "learning_rate": 0.000701086798651681, + "loss": 1.4279, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.539612889289856, + "learning_rate": 0.0006914233817972798, + "loss": 1.4161, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.5040695071220398, + "learning_rate": 0.0006818931604694261, + "loss": 1.4232, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.4720032215118408, + "learning_rate": 0.0006724942987700563, + "loss": 1.4277, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.5587788820266724, + "learning_rate": 0.0006632249861061732, + "loss": 1.4281, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.5153480172157288, + "learning_rate": 0.0006540834368410549, + "loss": 1.4242, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.484110027551651, + "learning_rate": 0.0006450678899502701, + "loss": 1.4261, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5298638343811035, + "learning_rate": 0.0006361766086824345, + "loss": 1.4218, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.6543071269989014, + "learning_rate": 0.000627407880224645, + "loss": 1.4302, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.5062084794044495, + "learning_rate": 0.0006187600153725225, + "loss": 1.4139, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.6418647766113281, + "learning_rate": 0.0006102313482048055, + "loss": 1.4156, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5333866477012634, + "learning_rate": 0.0006018202357624274, + "loss": 1.4207, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6157364249229431, + "learning_rate": 0.0005935250577320168, + "loss": 1.4154, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.49300333857536316, + "learning_rate": 0.0005853442161337618, + "loss": 1.4051, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.5593222975730896, + "learning_rate": 0.0005772761350135759, + "loss": 1.4136, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.536490797996521, + "learning_rate": 0.0005693192601395058, + "loss": 1.4077, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.526736855506897, + "learning_rate": 0.000561472058702326, + "loss": 1.4052, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.46280238032341003, + "learning_rate": 0.000553733019020258, + "loss": 1.4118, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.5084342956542969, + "learning_rate": 0.0005461006502477612, + "loss": 1.4011, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5471343398094177, + "learning_rate": 0.0005385734820883369, + "loss": 1.4024, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.512230634689331, + "learning_rate": 0.0005311500645112907, + "loss": 1.4292, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.5206472277641296, + "learning_rate": 0.0005238289674723993, + "loss": 1.4066, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.5572535991668701, + "learning_rate": 0.0005166087806384274, + "loss": 1.4115, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.5156189203262329, + "learning_rate": 0.0005094881131154418, + "loss": 1.4123, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.503831684589386, + "learning_rate": 0.0005024655931808696, + "loss": 1.41, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.47319483757019043, + "learning_rate": 0.0004955398680192508, + "loss": 1.401, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.4656444489955902, + "learning_rate": 0.000488709603461632, + "loss": 1.3946, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.4967908561229706, + "learning_rate": 0.000481973483728553, + "loss": 1.3971, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.5195211172103882, + "learning_rate": 0.0004753302111765748, + "loss": 1.3981, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.4845932424068451, + "learning_rate": 0.0004687785060483032, + "loss": 1.4101, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.4656439423561096, + "learning_rate": 0.0004623171062258558, + "loss": 1.3801, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.6125307083129883, + "learning_rate": 0.0004559447669877288, + "loss": 1.3971, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.5421500205993652, + "learning_rate": 0.00044966026076901413, + "loss": 1.3991, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.6151002049446106, + "learning_rate": 0.00044346237692492177, + "loss": 1.4056, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.5110399723052979, + "learning_rate": 0.0004373499214975615, + "loss": 1.392, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.4953923225402832, + "learning_rate": 0.0004313217169859396, + "loss": 1.3992, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.5136517286300659, + "learning_rate": 0.0004253766021191256, + "loss": 1.4037, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.5029652118682861, + "learning_rate": 0.00041951343163254497, + "loss": 1.399, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.4731636643409729, + "learning_rate": 0.00041373107604735626, + "loss": 1.3959, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.5364992022514343, + "learning_rate": 0.0004080284214528687, + "loss": 1.39, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.5382775664329529, + "learning_rate": 0.0004024043692919589, + "loss": 1.4037, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.5207314491271973, + "learning_rate": 0.0003968578361494449, + "loss": 1.3999, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.4977900981903076, + "learning_rate": 0.000391387753543378, + "loss": 1.4073, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.5196287631988525, + "learning_rate": 0.00038599306771921023, + "loss": 1.3891, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.6989203095436096, + "learning_rate": 0.0003806727394468004, + "loss": 1.3864, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.5948571562767029, + "learning_rate": 0.0003754257438202162, + "loss": 1.3941, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.6483874320983887, + "learning_rate": 0.0003702510700602974, + "loss": 1.3999, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.5051309466362, + "learning_rate": 0.0003651477213199393, + "loss": 1.3803, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.506965696811676, + "learning_rate": 0.000360114714492061, + "loss": 1.3813, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.4690167009830475, + "learning_rate": 0.0003551510800202195, + "loss": 1.3839, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.5723272562026978, + "learning_rate": 0.0003502558617118353, + "loss": 1.3915, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.5610151290893555, + "learning_rate": 0.0003454281165539914, + "loss": 1.4038, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.4634474813938141, + "learning_rate": 0.00034066691453177176, + "loss": 1.4008, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.47573161125183105, + "learning_rate": 0.0003359713384491037, + "loss": 1.397, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.5212197303771973, + "learning_rate": 0.00033134048375206944, + "loss": 1.389, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.49535438418388367, + "learning_rate": 0.0003267734583546536, + "loss": 1.3859, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.5158471465110779, + "learning_rate": 0.00032226938246689157, + "loss": 1.3852, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.5728384852409363, + "learning_rate": 0.0003178273884253874, + "loss": 1.3856, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.5211251378059387, + "learning_rate": 0.0003134466205261674, + "loss": 1.3971, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.5732220411300659, + "learning_rate": 0.0003091262348598378, + "loss": 1.4041, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.47373560070991516, + "learning_rate": 0.0003048653991490141, + "loss": 1.3822, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.6221377849578857, + "learning_rate": 0.00030066329258799187, + "loss": 1.3776, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.5708879828453064, + "learning_rate": 0.0002965191056846266, + "loss": 1.3808, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.603161633014679, + "learning_rate": 0.000292432040104394, + "loss": 1.3802, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.5277611613273621, + "learning_rate": 0.00028840130851659853, + "loss": 1.375, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.5325142741203308, + "learning_rate": 0.0002844261344427028, + "loss": 1.3831, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5006171464920044, + "learning_rate": 0.0002805057521067471, + "loss": 1.3836, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.48984476923942566, + "learning_rate": 0.00027663940628783017, + "loss": 1.3707, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.4523366093635559, + "learning_rate": 0.00027282635217462393, + "loss": 1.3811, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.6498004198074341, + "learning_rate": 0.0002690658552218937, + "loss": 1.389, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.5409725308418274, + "learning_rate": 0.00026535719100899516, + "loss": 1.3711, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.5616211891174316, + "learning_rate": 0.00026169964510032245, + "loss": 1.3747, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.487123042345047, + "learning_rate": 0.00025809251290767984, + "loss": 1.3619, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.4605257511138916, + "learning_rate": 0.00025453509955454957, + "loss": 1.3643, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.5088199973106384, + "learning_rate": 0.00025102671974223175, + "loss": 1.3698, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.4879826307296753, + "learning_rate": 0.00024756669761782815, + "loss": 1.3797, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.48447489738464355, + "learning_rate": 0.0002441543666440464, + "loss": 1.368, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.5670198798179626, + "learning_rate": 0.00024078906947079878, + "loss": 1.3742, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.4819045066833496, + "learning_rate": 0.00023747015780857005, + "loss": 1.383, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.49387362599372864, + "learning_rate": 0.00023419699230353144, + "loss": 1.3774, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.5370501279830933, + "learning_rate": 0.00023096894241437586, + "loss": 1.3828, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.47914108633995056, + "learning_rate": 0.00022778538629085056, + "loss": 1.3672, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.5119479298591614, + "learning_rate": 0.00022464571065396427, + "loss": 1.3694, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.5128620266914368, + "learning_rate": 0.00022154931067784521, + "loss": 1.3666, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.46946507692337036, + "learning_rate": 0.00021849558987322782, + "loss": 1.3642, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5166003704071045, + "learning_rate": 0.0002154839599725452, + "loss": 1.365, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.5451986789703369, + "learning_rate": 0.00021251384081660544, + "loss": 1.3778, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.4987885057926178, + "learning_rate": 0.0002095846602428303, + "loss": 1.3732, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.4632894694805145, + "learning_rate": 0.00020669585397503358, + "loss": 1.3643, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.4567081928253174, + "learning_rate": 0.0002038468655147195, + "loss": 1.3674, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.49346765875816345, + "learning_rate": 0.00020103714603387894, + "loss": 1.3815, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.45700404047966003, + "learning_rate": 0.00019826615426926338, + "loss": 1.3544, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.4595026671886444, + "learning_rate": 0.00019553335641811625, + "loss": 1.3763, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.5063329935073853, + "learning_rate": 0.0001928382260353415, + "loss": 1.3673, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.47150281071662903, + "learning_rate": 0.00019018024393208902, + "loss": 1.3843, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.4635440409183502, + "learning_rate": 0.00018755889807573872, + "loss": 1.3615, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.45574668049812317, + "learning_rate": 0.00018497368349126262, + "loss": 1.3672, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.5359010696411133, + "learning_rate": 0.00018242410216394648, + "loss": 1.383, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.5147023797035217, + "learning_rate": 0.0001799096629434529, + "loss": 1.3556, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.46002885699272156, + "learning_rate": 0.00017742988144920578, + "loss": 1.3666, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.4675118625164032, + "learning_rate": 0.00017498427997707976, + "loss": 1.3633, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.5563496351242065, + "learning_rate": 0.00017257238740737548, + "loss": 1.3648, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.5589823722839355, + "learning_rate": 0.00017019373911406307, + "loss": 1.3735, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.46896493434906006, + "learning_rate": 0.000167847876875277, + "loss": 1.3721, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.45898881554603577, + "learning_rate": 0.00016553434878504428, + "loss": 1.3565, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.5108206868171692, + "learning_rate": 0.00016325270916622947, + "loss": 1.3584, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.4540421962738037, + "learning_rate": 0.00016100251848467966, + "loss": 1.3679, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.6645315885543823, + "learning_rate": 0.0001587833432645528, + "loss": 1.3602, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.5565598011016846, + "learning_rate": 0.00015659475600481292, + "loss": 1.3754, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.4732491075992584, + "learning_rate": 0.00015443633509687688, + "loss": 1.3637, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.4488430321216583, + "learning_rate": 0.00015230766474339536, + "loss": 1.363, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.4506990313529968, + "learning_rate": 0.00015020833487815416, + "loss": 1.3719, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.46290716528892517, + "learning_rate": 0.0001481379410870792, + "loss": 1.3612, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.47624319791793823, + "learning_rate": 0.00014609608453033013, + "loss": 1.3453, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.45356878638267517, + "learning_rate": 0.00014408237186546807, + "loss": 1.3565, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.46717569231987, + "learning_rate": 0.00014209641517168273, + "loss": 1.3433, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.48989081382751465, + "learning_rate": 0.00014013783187506265, + "loss": 1.3578, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.529300332069397, + "learning_rate": 0.00013820624467489697, + "loss": 1.3766, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.47537773847579956, + "learning_rate": 0.00013630128147099213, + "loss": 1.3743, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.48015323281288147, + "learning_rate": 0.00013442257529199068, + "loss": 1.3561, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.5237522125244141, + "learning_rate": 0.00013256976422467803, + "loss": 1.3634, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.4523259997367859, + "learning_rate": 0.00013074249134426366, + "loss": 1.3622, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.5076009035110474, + "learning_rate": 0.0001289404046456233, + "loss": 1.3708, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.4758736789226532, + "learning_rate": 0.0001271631569754887, + "loss": 1.3642, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.4746805429458618, + "learning_rate": 0.0001254104059655723, + "loss": 1.3666, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.45755910873413086, + "learning_rate": 0.00012368181396661337, + "loss": 1.3526, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.5004385709762573, + "learning_rate": 0.00012197704798333364, + "loss": 1.3514, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.5192686319351196, + "learning_rate": 0.00012029577961028894, + "loss": 1.3512, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.48216772079467773, + "learning_rate": 0.00011863768496860542, + "loss": 1.3634, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.46114224195480347, + "learning_rate": 0.00011700244464358777, + "loss": 1.3595, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.45775875449180603, + "learning_rate": 0.00011538974362318715, + "loss": 1.3632, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.4667496979236603, + "learning_rate": 0.00011379927123731737, + "loss": 1.3565, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.4608895778656006, + "learning_rate": 0.0001122307210980077, + "loss": 1.3568, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.47877684235572815, + "learning_rate": 0.00011068379104038026, + "loss": 1.3704, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.457285612821579, + "learning_rate": 0.00010915818306444116, + "loss": 1.3466, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.5008485913276672, + "learning_rate": 0.00010765360327767384, + "loss": 1.3548, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.47175008058547974, + "learning_rate": 0.00010616976183842376, + "loss": 1.3633, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.4591042697429657, + "learning_rate": 0.00010470637290006365, + "loss": 1.3648, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.4671987295150757, + "learning_rate": 0.00010326315455592764, + "loss": 1.352, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.4841940104961395, + "learning_rate": 0.0001018398287850053, + "loss": 1.3435, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.4897977113723755, + "learning_rate": 0.00010043612139838357, + "loss": 1.3656, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.4758096933364868, + "learning_rate": 9.905176198642719e-05, + "loss": 1.3553, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.4628910720348358, + "learning_rate": 9.76864838666871e-05, + "loss": 1.3565, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.4522228538990021, + "learning_rate": 9.634002403252676e-05, + "loss": 1.3511, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.47021564841270447, + "learning_rate": 9.501212310245681e-05, + "loss": 1.3488, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.49830883741378784, + "learning_rate": 9.370252527016777e-05, + "loss": 1.3601, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.47212862968444824, + "learning_rate": 9.241097825525163e-05, + "loss": 1.3512, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.48565971851348877, + "learning_rate": 9.113723325460276e-05, + "loss": 1.358, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.5443127155303955, + "learning_rate": 8.988104489448849e-05, + "loss": 1.3521, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.4680684506893158, + "learning_rate": 8.864217118328042e-05, + "loss": 1.3639, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.46079784631729126, + "learning_rate": 8.742037346483729e-05, + "loss": 1.3559, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.45251044631004333, + "learning_rate": 8.62154163725303e-05, + "loss": 1.3637, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.5130413770675659, + "learning_rate": 8.502706778390219e-05, + "loss": 1.3585, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.4337594509124756, + "learning_rate": 8.38550987759513e-05, + "loss": 1.3566, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.5292634963989258, + "learning_rate": 8.269928358103191e-05, + "loss": 1.3709, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.4459943473339081, + "learning_rate": 8.155939954336243e-05, + "loss": 1.3609, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.5037257075309753, + "learning_rate": 8.043522707613312e-05, + "loss": 1.3573, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.4748310446739197, + "learning_rate": 7.932654961920486e-05, + "loss": 1.3421, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.46188482642173767, + "learning_rate": 7.823315359739135e-05, + "loss": 1.3442, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.4867098331451416, + "learning_rate": 7.715482837931577e-05, + "loss": 1.3685, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.4996362328529358, + "learning_rate": 7.6091366236835e-05, + "loss": 1.3469, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4916406273841858, + "learning_rate": 7.504256230502289e-05, + "loss": 1.3649, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.4626372754573822, + "learning_rate": 7.400821454270524e-05, + "loss": 1.3576, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.45033547282218933, + "learning_rate": 7.29881236935386e-05, + "loss": 1.3472, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.44288432598114014, + "learning_rate": 7.198209324762562e-05, + "loss": 1.3494, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.49059954285621643, + "learning_rate": 7.098992940365946e-05, + "loss": 1.3443, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.45661666989326477, + "learning_rate": 7.001144103159e-05, + "loss": 1.3537, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.4595849812030792, + "learning_rate": 6.904643963580461e-05, + "loss": 1.3609, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.44966819882392883, + "learning_rate": 6.809473931881644e-05, + "loss": 1.349, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.4483475983142853, + "learning_rate": 6.71561567454532e-05, + "loss": 1.3506, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.5414402484893799, + "learning_rate": 6.623051110753948e-05, + "loss": 1.3686, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.4634023606777191, + "learning_rate": 6.531762408906607e-05, + "loss": 1.3559, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.4461284875869751, + "learning_rate": 6.441731983183912e-05, + "loss": 1.3544, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.45708733797073364, + "learning_rate": 6.352942490160292e-05, + "loss": 1.3451, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.4943016469478607, + "learning_rate": 6.265376825462966e-05, + "loss": 1.3544, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.470504492521286, + "learning_rate": 6.179018120476945e-05, + "loss": 1.3537, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.4554164409637451, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.3508, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.453253835439682, + "learning_rate": 6.009855274515339e-05, + "loss": 1.3449, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.48623064160346985, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.354, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.45357802510261536, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.3415, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.4535787105560303, + "learning_rate": 5.764754687033678e-05, + "loss": 1.3523, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.4594683349132538, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.3488, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.49994322657585144, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.3642, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.4636819660663605, + "learning_rate": 5.529650063720842e-05, + "loss": 1.3577, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.4792076647281647, + "learning_rate": 5.453432234876445e-05, + "loss": 1.3498, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.4741894602775574, + "learning_rate": 5.37826495305886e-05, + "loss": 1.3411, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.5298281311988831, + "learning_rate": 5.304133738072674e-05, + "loss": 1.3623, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.5063627362251282, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.3535, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.5597848892211914, + "learning_rate": 5.158922582999368e-05, + "loss": 1.3568, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.48414871096611023, + "learning_rate": 5.087814669492819e-05, + "loss": 1.3481, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.5020228028297424, + "learning_rate": 5.017686870590028e-05, + "loss": 1.3479, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.4592040777206421, + "learning_rate": 4.948525676899577e-05, + "loss": 1.3463, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.47056108713150024, + "learning_rate": 4.880317765236493e-05, + "loss": 1.3561, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.47921305894851685, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.3355, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.4555310010910034, + "learning_rate": 4.746709410920699e-05, + "loss": 1.3422, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5480925440788269, + "learning_rate": 4.681283230007507e-05, + "loss": 1.3452, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.4556386470794678, + "learning_rate": 4.616758849642509e-05, + "loss": 1.3525, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.45376330614089966, + "learning_rate": 4.553123839874615e-05, + "loss": 1.3588, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.48756328225135803, + "learning_rate": 4.490365942080736e-05, + "loss": 1.3523, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.4586666524410248, + "learning_rate": 4.428473066604285e-05, + "loss": 1.3498, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.4539191722869873, + "learning_rate": 4.367433290426233e-05, + "loss": 1.3505, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.4817667603492737, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.3431, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.46067652106285095, + "learning_rate": 4.247866163327575e-05, + "loss": 1.3548, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.4616350829601288, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.3469, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.4541439712047577, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.3421, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.5221729874610901, + "learning_rate": 4.074624971216005e-05, + "loss": 1.3293, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4680933356285095, + "learning_rate": 4.018462453681889e-05, + "loss": 1.342, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.4517577886581421, + "learning_rate": 3.963074051164014e-05, + "loss": 1.351, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.4618259072303772, + "learning_rate": 3.908449093662446e-05, + "loss": 1.3435, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.45023104548454285, + "learning_rate": 3.854577058246998e-05, + "loss": 1.3425, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.47846972942352295, + "learning_rate": 3.801447567030094e-05, + "loss": 1.3586, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.45514824986457825, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.3471, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.44830751419067383, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.3653, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.465257465839386, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.3491, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.5014391541481018, + "learning_rate": 3.596152451701616e-05, + "loss": 1.3464, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.48519137501716614, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.3463, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.45895054936408997, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.3504, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.4757814407348633, + "learning_rate": 3.449490171442838e-05, + "loss": 1.3539, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.46523842215538025, + "learning_rate": 3.401944187798702e-05, + "loss": 1.3522, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.447443425655365, + "learning_rate": 3.355053553336137e-05, + "loss": 1.3438, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.47570958733558655, + "learning_rate": 3.308809235061882e-05, + "loss": 1.3425, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.45039355754852295, + "learning_rate": 3.263202324488772e-05, + "loss": 1.3494, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.45538634061813354, + "learning_rate": 3.218224035919609e-05, + "loss": 1.3456, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.47007620334625244, + "learning_rate": 3.173865704754688e-05, + "loss": 1.3475, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.4494016170501709, + "learning_rate": 3.130118785822657e-05, + "loss": 1.3498, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.46623754501342773, + "learning_rate": 3.08697485173437e-05, + "loss": 1.3525, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5000026226043701, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.3486, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.47525277733802795, + "learning_rate": 3.002462807725185e-05, + "loss": 1.3442, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.4763094186782837, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.3515, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.45728081464767456, + "learning_rate": 2.920264448124087e-05, + "loss": 1.3449, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.5230275988578796, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.3412, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.45506682991981506, + "learning_rate": 2.84031643124288e-05, + "loss": 1.3435, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.46359187364578247, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.3625, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.4452093541622162, + "learning_rate": 2.762557149497405e-05, + "loss": 1.3399, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.45948877930641174, + "learning_rate": 2.724479494389592e-05, + "loss": 1.3497, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.44907087087631226, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.3511, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.45196837186813354, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.3487, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.4547522962093353, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.3515, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.46276381611824036, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.3532, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.4469272792339325, + "learning_rate": 2.541820662891541e-05, + "loss": 1.3316, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.45500412583351135, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.3432, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.4534028470516205, + "learning_rate": 2.472233293365335e-05, + "loss": 1.3511, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.47841382026672363, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.337, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.463321328163147, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.3586, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.45994776487350464, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.3446, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.46441754698753357, + "learning_rate": 2.338721674401494e-05, + "loss": 1.3521, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.4650691747665405, + "learning_rate": 2.30648594768459e-05, + "loss": 1.3502, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.44704753160476685, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.3463, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.4620136320590973, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.3418, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.4611254930496216, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.3463, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.4542345702648163, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.3471, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.4623969495296478, + "learning_rate": 2.151850895764285e-05, + "loss": 1.3437, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.4567524790763855, + "learning_rate": 2.12219089895037e-05, + "loss": 1.3354, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.45435529947280884, + "learning_rate": 2.092939720151092e-05, + "loss": 1.329, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.46444326639175415, + "learning_rate": 2.064091724430947e-05, + "loss": 1.3422, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.46743497252464294, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.3389, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.4665004014968872, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.3482, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.4589768946170807, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.337, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.4532953202724457, + "learning_rate": 1.952621569669175e-05, + "loss": 1.3465, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.4485061764717102, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.3432, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.46737146377563477, + "learning_rate": 1.899164691024229e-05, + "loss": 1.3444, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.47384166717529297, + "learning_rate": 1.872987589815331e-05, + "loss": 1.3399, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.4618329107761383, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.3527, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.44944530725479126, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.35, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.444137841463089, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.3594, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.457152783870697, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.3519, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.5073128938674927, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.3498, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.49374303221702576, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.3331, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.4625350534915924, + "learning_rate": 1.699576850233916e-05, + "loss": 1.3397, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.4780026972293854, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.3526, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.47076642513275146, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.3287, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.444694459438324, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.349, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.45865774154663086, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.3475, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.4531383812427521, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.3449, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.46567508578300476, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.3555, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.4613323509693146, + "learning_rate": 1.542221360973268e-05, + "loss": 1.3376, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.46998530626296997, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.3494, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.2927305698394775, + "learning_rate": 1.5e-05, + "loss": 1.3386, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.832308198648013e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-mpt/checkpoint-9480/training_args.bin b/saves-mpt/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0477e8e0c28cfa2d8e6a1a92198f611605b41792 --- /dev/null +++ b/saves-mpt/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f931707c8e0fc32c7b15be6f4c0aff4961b26e19e13a37075f59c71137db336 +size 5112 diff --git a/saves-mpt/config.json b/saves-mpt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7bae8bd28a02ff350c658f15c2c052270df91b7 --- /dev/null +++ b/saves-mpt/config.json @@ -0,0 +1,32 @@ +{ + "architectures": [ + "MptForCausalLM" + ], + "attn_config": { + "model_type": "" + }, + "d_model": 256, + "emb_pdrop": 0.0, + "embedding_fraction": 1.0, + "expansion_ratio": 4, + "hidden_act": "gelu", + "init_device": "cpu", + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "learned_pos_emb": true, + "logit_scale": null, + "max_seq_len": 2048, + "model_type": "mpt", + "n_heads": 8, + "n_layers": 2, + "no_bias": true, + "norm_type": "low_precision_layernorm", + "num_key_value_heads": 8, + "resid_pdrop": 0.0, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": false, + "verbose": 0, + "vocab_size": 2000 +} diff --git a/saves-mpt/generation_config.json b/saves-mpt/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..805bc20f96323ae6bf455904f78f359bf95ce35b --- /dev/null +++ b/saves-mpt/generation_config.json @@ -0,0 +1,5 @@ +{ + "_from_model_config": true, + "transformers_version": "4.42.4", + "use_cache": false +} diff --git a/saves-mpt/model.safetensors b/saves-mpt/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..719d368e1b5dcd7fee86f24797b59dcc6db935fa --- /dev/null +++ b/saves-mpt/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fadae899671b76700344d41f6c6f36a91280d445c2bbc5baae5bea1b4584cbc8 +size 8346072 diff --git a/saves-mpt/result.log b/saves-mpt/result.log new file mode 100644 index 0000000000000000000000000000000000000000..711c09f7278f22896ffc93498345b876c1e24446 --- /dev/null +++ b/saves-mpt/result.log @@ -0,0 +1 @@ +{'train_runtime': 2159.8924, 'train_samples_per_second': 4494.022, 'train_steps_per_second': 4.389, 'train_loss': 1.6466181319474167, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-mpt/special_tokens_map.json b/saves-mpt/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-mpt/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-mpt/tokenizer.json b/saves-mpt/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-mpt/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-mpt/tokenizer_config.json b/saves-mpt/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-mpt/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-olmo-bf16/checkpoint-9480/config.json b/saves-olmo-bf16/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8761f5fa7cedb85129adca4a6b09528ef2dc08ac --- /dev/null +++ b/saves-olmo-bf16/checkpoint-9480/config.json @@ -0,0 +1,26 @@ +{ + "architectures": [ + "OlmoForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "clip_qkv": null, + "eos_token_id": 50279, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "model_type": "olmo", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 1, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.0", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-olmo-bf16/checkpoint-9480/generation_config.json b/saves-olmo-bf16/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..96f2a31550174be2bb95ece9acb999f7b7aa76d0 --- /dev/null +++ b/saves-olmo-bf16/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "eos_token_id": 50279, + "pad_token_id": 1, + "transformers_version": "4.42.0" +} diff --git a/saves-olmo-bf16/checkpoint-9480/model.safetensors b/saves-olmo-bf16/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d12ad0e38ea49c3efdb89add4f5a37d13f61d029 --- /dev/null +++ b/saves-olmo-bf16/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1de65855971771c12d295e1b66b6a5c7415993152f0e2f91db269ef2a7120b18 +size 8341080 diff --git a/saves-olmo-bf16/checkpoint-9480/optimizer.pt b/saves-olmo-bf16/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..43edb27118fb4cb0f1c670b1df4c63a5a6b84f9d --- /dev/null +++ b/saves-olmo-bf16/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dd58d2c286ee36ce77dfbc2d85809bc6d3a4c65de8c422efffe3b7c8a1e0f1d +size 16692017 diff --git a/saves-olmo-bf16/checkpoint-9480/rng_state.pth b/saves-olmo-bf16/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-olmo-bf16/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-olmo-bf16/checkpoint-9480/scheduler.pt b/saves-olmo-bf16/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-olmo-bf16/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-olmo-bf16/checkpoint-9480/special_tokens_map.json b/saves-olmo-bf16/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-olmo-bf16/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-olmo-bf16/checkpoint-9480/tokenizer.json b/saves-olmo-bf16/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-olmo-bf16/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-olmo-bf16/checkpoint-9480/tokenizer_config.json b/saves-olmo-bf16/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-olmo-bf16/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-olmo-bf16/checkpoint-9480/trainer_state.json b/saves-olmo-bf16/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..93636c062b4df3194767a5fde997e5035a5f1b4d --- /dev/null +++ b/saves-olmo-bf16/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.280547022819519, + "learning_rate": 0.00015822784810126583, + "loss": 7.4867, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1321444511413574, + "learning_rate": 0.00031645569620253165, + "loss": 6.8694, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8241393566131592, + "learning_rate": 0.00047468354430379745, + "loss": 6.231, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.9078624844551086, + "learning_rate": 0.0006329113924050633, + "loss": 5.7805, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 1.1798850297927856, + "learning_rate": 0.0007911392405063291, + "loss": 5.3628, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 1.0740281343460083, + "learning_rate": 0.0009493670886075949, + "loss": 4.8761, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.462295413017273, + "learning_rate": 0.0011075949367088608, + "loss": 4.4597, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.8464468717575073, + "learning_rate": 0.0012658227848101266, + "loss": 4.1877, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.8256818056106567, + "learning_rate": 0.0014240506329113926, + "loss": 3.9818, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.319704294204712, + "learning_rate": 0.0015, + "loss": 3.8255, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.8619171977043152, + "learning_rate": 0.0015, + "loss": 3.6598, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.8669776916503906, + "learning_rate": 0.0015, + "loss": 3.5442, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.7310807704925537, + "learning_rate": 0.0015, + "loss": 3.4417, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.6836230754852295, + "learning_rate": 0.0015, + "loss": 3.3388, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.7951012849807739, + "learning_rate": 0.0015, + "loss": 3.2581, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.7394816875457764, + "learning_rate": 0.0015, + "loss": 3.199, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.8423566818237305, + "learning_rate": 0.0015, + "loss": 3.1291, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.8750805854797363, + "learning_rate": 0.0015, + "loss": 3.0828, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.7946017384529114, + "learning_rate": 0.0015, + "loss": 3.0311, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.9320774674415588, + "learning_rate": 0.0015, + "loss": 2.985, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 1.1086764335632324, + "learning_rate": 0.0015, + "loss": 2.95, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.9890562295913696, + "learning_rate": 0.0015, + "loss": 2.9203, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.867972195148468, + "learning_rate": 0.0015, + "loss": 2.8746, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.767699658870697, + "learning_rate": 0.0015, + "loss": 2.8331, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.9333881735801697, + "learning_rate": 0.0015, + "loss": 2.8101, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.7089716792106628, + "learning_rate": 0.0015, + "loss": 2.7732, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.9305698275566101, + "learning_rate": 0.0015, + "loss": 2.746, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 1.1325851678848267, + "learning_rate": 0.0015, + "loss": 2.7114, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.9999624490737915, + "learning_rate": 0.0015, + "loss": 2.6858, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.8429637551307678, + "learning_rate": 0.0015, + "loss": 2.6612, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.8646096587181091, + "learning_rate": 0.0015, + "loss": 2.6428, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.886806845664978, + "learning_rate": 0.0015, + "loss": 2.6039, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.9155639410018921, + "learning_rate": 0.0015, + "loss": 2.5994, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.8203383088111877, + "learning_rate": 0.0015, + "loss": 2.5743, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.917801558971405, + "learning_rate": 0.0015, + "loss": 2.5364, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.8194217681884766, + "learning_rate": 0.0015, + "loss": 2.5165, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.7929495573043823, + "learning_rate": 0.0015, + "loss": 2.5034, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 1.0203853845596313, + "learning_rate": 0.0015, + "loss": 2.4966, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.9013593196868896, + "learning_rate": 0.0015, + "loss": 2.4702, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.8710969090461731, + "learning_rate": 0.0015, + "loss": 2.443, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.9548656344413757, + "learning_rate": 0.0015, + "loss": 2.4312, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.9535632133483887, + "learning_rate": 0.0015, + "loss": 2.4156, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.8102132678031921, + "learning_rate": 0.0015, + "loss": 2.3876, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 1.1112456321716309, + "learning_rate": 0.0015, + "loss": 2.3744, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.8402998447418213, + "learning_rate": 0.0015, + "loss": 2.3703, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.9239383339881897, + "learning_rate": 0.0015, + "loss": 2.3478, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.8364289999008179, + "learning_rate": 0.0015, + "loss": 2.3259, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.9491376876831055, + "learning_rate": 0.0015, + "loss": 2.3213, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 1.240187406539917, + "learning_rate": 0.0015, + "loss": 2.298, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.6892614960670471, + "learning_rate": 0.0015, + "loss": 2.2853, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 1.1335899829864502, + "learning_rate": 0.0015, + "loss": 2.2754, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.9805135726928711, + "learning_rate": 0.0015, + "loss": 2.2667, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.6936400532722473, + "learning_rate": 0.0015, + "loss": 2.2429, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 1.0821820497512817, + "learning_rate": 0.0015, + "loss": 2.2292, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.8878759741783142, + "learning_rate": 0.0015, + "loss": 2.231, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.7503810524940491, + "learning_rate": 0.0015, + "loss": 2.19, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.7545302510261536, + "learning_rate": 0.0015, + "loss": 2.1975, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.1007965803146362, + "learning_rate": 0.0015, + "loss": 2.1953, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 1.1005948781967163, + "learning_rate": 0.0015, + "loss": 2.1736, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.741956353187561, + "learning_rate": 0.0015, + "loss": 2.1526, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.8366942405700684, + "learning_rate": 0.0015, + "loss": 2.1546, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.7844794988632202, + "learning_rate": 0.0015, + "loss": 2.1418, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.7197091579437256, + "learning_rate": 0.0015, + "loss": 2.1263, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.8558847308158875, + "learning_rate": 0.0015, + "loss": 2.1419, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 1.0648032426834106, + "learning_rate": 0.0015, + "loss": 2.1266, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.780692458152771, + "learning_rate": 0.0015, + "loss": 2.1117, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.914266049861908, + "learning_rate": 0.0015, + "loss": 2.0944, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.741974413394928, + "learning_rate": 0.0015, + "loss": 2.0881, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.6926323771476746, + "learning_rate": 0.0015, + "loss": 2.0963, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.8212527632713318, + "learning_rate": 0.0015, + "loss": 2.0821, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.8038210868835449, + "learning_rate": 0.0015, + "loss": 2.0711, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.00883150100708, + "learning_rate": 0.0015, + "loss": 2.0521, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.8826992511749268, + "learning_rate": 0.0015, + "loss": 2.0454, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.8499228358268738, + "learning_rate": 0.0015, + "loss": 2.0566, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.8320230841636658, + "learning_rate": 0.0015, + "loss": 2.0352, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 1.1891824007034302, + "learning_rate": 0.0015, + "loss": 2.0354, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.8269404768943787, + "learning_rate": 0.0015, + "loss": 2.0256, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.7528815269470215, + "learning_rate": 0.0015, + "loss": 2.0229, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 1.019327998161316, + "learning_rate": 0.0015, + "loss": 2.0285, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.771692156791687, + "learning_rate": 0.0015, + "loss": 2.0007, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.7045907378196716, + "learning_rate": 0.0015, + "loss": 1.9873, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.7437472939491272, + "learning_rate": 0.0015, + "loss": 1.9954, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.8867875337600708, + "learning_rate": 0.0015, + "loss": 2.0016, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.9023960828781128, + "learning_rate": 0.0015, + "loss": 1.9787, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.653753399848938, + "learning_rate": 0.0015, + "loss": 1.9754, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.7463269233703613, + "learning_rate": 0.0015, + "loss": 1.9773, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.6657361388206482, + "learning_rate": 0.0015, + "loss": 1.9741, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.7547237277030945, + "learning_rate": 0.0015, + "loss": 1.9635, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 1.0085852146148682, + "learning_rate": 0.0015, + "loss": 1.9595, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 1.19351065158844, + "learning_rate": 0.0015, + "loss": 1.9593, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.6961801052093506, + "learning_rate": 0.0015, + "loss": 1.9572, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.6787910461425781, + "learning_rate": 0.0015, + "loss": 1.9456, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 1.2137645483016968, + "learning_rate": 0.0015, + "loss": 1.9315, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.7932629585266113, + "learning_rate": 0.0015, + "loss": 1.9427, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.7934066653251648, + "learning_rate": 0.0015, + "loss": 1.9193, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.7235351204872131, + "learning_rate": 0.0015, + "loss": 1.9257, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.6998668909072876, + "learning_rate": 0.0015, + "loss": 1.927, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.6853834390640259, + "learning_rate": 0.0015, + "loss": 1.906, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.6908657550811768, + "learning_rate": 0.0015, + "loss": 1.9065, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.8567667603492737, + "learning_rate": 0.0015, + "loss": 1.9182, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.7900475263595581, + "learning_rate": 0.0015, + "loss": 1.9111, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.8305337429046631, + "learning_rate": 0.0015, + "loss": 1.8972, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.9728142023086548, + "learning_rate": 0.0015, + "loss": 1.9, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.760550856590271, + "learning_rate": 0.0015, + "loss": 1.8876, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.077075719833374, + "learning_rate": 0.0015, + "loss": 1.8919, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.8105736374855042, + "learning_rate": 0.0015, + "loss": 1.8845, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.7220541834831238, + "learning_rate": 0.0015, + "loss": 1.8714, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.732894241809845, + "learning_rate": 0.0015, + "loss": 1.8705, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.844344437122345, + "learning_rate": 0.0015, + "loss": 1.8781, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.7333281636238098, + "learning_rate": 0.0015, + "loss": 1.8657, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.7032455205917358, + "learning_rate": 0.0015, + "loss": 1.8698, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.7837533950805664, + "learning_rate": 0.0015, + "loss": 1.8639, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.8764195442199707, + "learning_rate": 0.0015, + "loss": 1.8506, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.842060923576355, + "learning_rate": 0.0015, + "loss": 1.8638, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.2955107688903809, + "learning_rate": 0.0015, + "loss": 1.8501, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.6817561388015747, + "learning_rate": 0.0015, + "loss": 1.8583, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 1.2760831117630005, + "learning_rate": 0.0015, + "loss": 1.8442, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.6628448367118835, + "learning_rate": 0.0015, + "loss": 1.8341, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.7575486302375793, + "learning_rate": 0.0015, + "loss": 1.8346, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.8239588141441345, + "learning_rate": 0.0015, + "loss": 1.8455, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.7578184604644775, + "learning_rate": 0.0015, + "loss": 1.8411, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6956812143325806, + "learning_rate": 0.0015, + "loss": 1.8244, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.7095561623573303, + "learning_rate": 0.0015, + "loss": 1.8124, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.8960379958152771, + "learning_rate": 0.0015, + "loss": 1.8277, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.8036966919898987, + "learning_rate": 0.0015, + "loss": 1.8297, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.6625062823295593, + "learning_rate": 0.0015, + "loss": 1.8146, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.6436560750007629, + "learning_rate": 0.0015, + "loss": 1.8091, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.6492863893508911, + "learning_rate": 0.0015, + "loss": 1.8247, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.8988174200057983, + "learning_rate": 0.0015, + "loss": 1.8122, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.6358052492141724, + "learning_rate": 0.0015, + "loss": 1.8013, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.7899615168571472, + "learning_rate": 0.0015, + "loss": 1.8045, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.8667846322059631, + "learning_rate": 0.0015, + "loss": 1.8069, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.1689475774765015, + "learning_rate": 0.0015, + "loss": 1.8009, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.6985293626785278, + "learning_rate": 0.0015, + "loss": 1.8, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.6985313296318054, + "learning_rate": 0.0015, + "loss": 1.8014, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.6706464886665344, + "learning_rate": 0.0015, + "loss": 1.7933, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.7382428050041199, + "learning_rate": 0.0015, + "loss": 1.7895, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.8225905895233154, + "learning_rate": 0.0015, + "loss": 1.7835, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 1.0040773153305054, + "learning_rate": 0.0015, + "loss": 1.7946, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.7514321208000183, + "learning_rate": 0.0015, + "loss": 1.7761, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6756529808044434, + "learning_rate": 0.0015, + "loss": 1.7768, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.8687539100646973, + "learning_rate": 0.0015, + "loss": 1.7832, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.7463226914405823, + "learning_rate": 0.0015, + "loss": 1.7787, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.7013041973114014, + "learning_rate": 0.0015, + "loss": 1.7739, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.7494015097618103, + "learning_rate": 0.0015, + "loss": 1.7702, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.6857183575630188, + "learning_rate": 0.0015, + "loss": 1.7661, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.8611487746238708, + "learning_rate": 0.0015, + "loss": 1.7675, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.8028945922851562, + "learning_rate": 0.0015, + "loss": 1.7677, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.6641376614570618, + "learning_rate": 0.0015, + "loss": 1.7614, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.6738835573196411, + "learning_rate": 0.0015, + "loss": 1.7644, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.7139059901237488, + "learning_rate": 0.0015, + "loss": 1.7635, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.7500717043876648, + "learning_rate": 0.0015, + "loss": 1.7628, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.7021812796592712, + "learning_rate": 0.0015, + "loss": 1.7467, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6480691432952881, + "learning_rate": 0.0015, + "loss": 1.738, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.6613367795944214, + "learning_rate": 0.0015, + "loss": 1.7399, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.8135258555412292, + "learning_rate": 0.0015, + "loss": 1.7417, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.7066948413848877, + "learning_rate": 0.0015, + "loss": 1.7333, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.9046475887298584, + "learning_rate": 0.0015, + "loss": 1.7429, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 1.2522505521774292, + "learning_rate": 0.0015, + "loss": 1.7414, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.8212918639183044, + "learning_rate": 0.0015, + "loss": 1.7471, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.7190598845481873, + "learning_rate": 0.0015, + "loss": 1.7416, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6582958698272705, + "learning_rate": 0.0015, + "loss": 1.7134, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.7701506614685059, + "learning_rate": 0.0015, + "loss": 1.7261, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.75456702709198, + "learning_rate": 0.0015, + "loss": 1.7287, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.2882826328277588, + "learning_rate": 0.0015, + "loss": 1.7209, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.6976048946380615, + "learning_rate": 0.0015, + "loss": 1.7209, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.9592496156692505, + "learning_rate": 0.0015, + "loss": 1.7292, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.9925044775009155, + "learning_rate": 0.0015, + "loss": 1.714, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.7743837833404541, + "learning_rate": 0.0015, + "loss": 1.709, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.8759896159172058, + "learning_rate": 0.0015, + "loss": 1.7242, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.8945364952087402, + "learning_rate": 0.0015, + "loss": 1.7072, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.6546477675437927, + "learning_rate": 0.0015, + "loss": 1.703, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.6503028869628906, + "learning_rate": 0.0015, + "loss": 1.7064, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.809508204460144, + "learning_rate": 0.0015, + "loss": 1.7107, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.6839598417282104, + "learning_rate": 0.0015, + "loss": 1.7276, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.8577884435653687, + "learning_rate": 0.0015, + "loss": 1.7095, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.7543063759803772, + "learning_rate": 0.0015, + "loss": 1.7125, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.6626102924346924, + "learning_rate": 0.0015, + "loss": 1.7143, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.9239822030067444, + "learning_rate": 0.0015, + "loss": 1.7127, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.8079544901847839, + "learning_rate": 0.0015, + "loss": 1.7087, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.6424635648727417, + "learning_rate": 0.0015, + "loss": 1.6945, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.7259810566902161, + "learning_rate": 0.0015, + "loss": 1.6933, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.680972158908844, + "learning_rate": 0.0015, + "loss": 1.695, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.6927972435951233, + "learning_rate": 0.0015, + "loss": 1.689, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.8696661591529846, + "learning_rate": 0.0015, + "loss": 1.6924, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.6716064810752869, + "learning_rate": 0.0015, + "loss": 1.6973, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.64600670337677, + "learning_rate": 0.0015, + "loss": 1.6966, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.7006885409355164, + "learning_rate": 0.0015, + "loss": 1.6848, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.6711748242378235, + "learning_rate": 0.0015, + "loss": 1.6906, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 1.0483088493347168, + "learning_rate": 0.0015, + "loss": 1.6886, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 1.0006247758865356, + "learning_rate": 0.0015, + "loss": 1.7006, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.9228950142860413, + "learning_rate": 0.0015, + "loss": 1.6837, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.7335243821144104, + "learning_rate": 0.0015, + "loss": 1.6816, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.6890536546707153, + "learning_rate": 0.0015, + "loss": 1.6737, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.6723124384880066, + "learning_rate": 0.0015, + "loss": 1.6711, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.6646891832351685, + "learning_rate": 0.0015, + "loss": 1.6789, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.6417124271392822, + "learning_rate": 0.0015, + "loss": 1.6877, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.7362343668937683, + "learning_rate": 0.0015, + "loss": 1.6666, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.7579134106636047, + "learning_rate": 0.0015, + "loss": 1.6678, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.7324331998825073, + "learning_rate": 0.0015, + "loss": 1.668, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.6789275407791138, + "learning_rate": 0.0015, + "loss": 1.6734, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.7488144040107727, + "learning_rate": 0.0015, + "loss": 1.6819, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.7778571844100952, + "learning_rate": 0.0015, + "loss": 1.6716, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.7369665503501892, + "learning_rate": 0.0015, + "loss": 1.6687, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.7494090795516968, + "learning_rate": 0.0015, + "loss": 1.6703, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.6868656277656555, + "learning_rate": 0.0015, + "loss": 1.6593, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.8503167033195496, + "learning_rate": 0.0015, + "loss": 1.6627, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.7101228833198547, + "learning_rate": 0.0015, + "loss": 1.6672, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.7787283062934875, + "learning_rate": 0.0015, + "loss": 1.6558, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.9143298864364624, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.6820140480995178, + "learning_rate": 0.0015, + "loss": 1.6565, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6765663027763367, + "learning_rate": 0.0015, + "loss": 1.6516, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.680889368057251, + "learning_rate": 0.0015, + "loss": 1.6564, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6101934909820557, + "learning_rate": 0.0015, + "loss": 1.648, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.7987344861030579, + "learning_rate": 0.0015, + "loss": 1.6541, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.8717620968818665, + "learning_rate": 0.0015, + "loss": 1.6654, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.6999959945678711, + "learning_rate": 0.0015, + "loss": 1.6533, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.6539660692214966, + "learning_rate": 0.0015, + "loss": 1.6389, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.7787438035011292, + "learning_rate": 0.0015, + "loss": 1.6521, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.6465375423431396, + "learning_rate": 0.0015, + "loss": 1.6567, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.8682584166526794, + "learning_rate": 0.0015, + "loss": 1.6374, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.6933326125144958, + "learning_rate": 0.0015, + "loss": 1.6519, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.840474009513855, + "learning_rate": 0.0015, + "loss": 1.6452, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.7039955258369446, + "learning_rate": 0.0015, + "loss": 1.6443, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.7520264387130737, + "learning_rate": 0.0015, + "loss": 1.6422, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.6830967664718628, + "learning_rate": 0.0015, + "loss": 1.6318, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.8535075187683105, + "learning_rate": 0.0015, + "loss": 1.6638, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 1.1583036184310913, + "learning_rate": 0.0015, + "loss": 1.6488, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.6714127659797668, + "learning_rate": 0.0015, + "loss": 1.6421, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.6702568531036377, + "learning_rate": 0.0015, + "loss": 1.6325, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.680148720741272, + "learning_rate": 0.0015, + "loss": 1.6349, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.6562274098396301, + "learning_rate": 0.0015, + "loss": 1.6424, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.7356050610542297, + "learning_rate": 0.0015, + "loss": 1.6254, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 1.0072473287582397, + "learning_rate": 0.0015, + "loss": 1.6232, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.6852766275405884, + "learning_rate": 0.0015, + "loss": 1.6242, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.717919111251831, + "learning_rate": 0.0015, + "loss": 1.6359, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.6081141829490662, + "learning_rate": 0.0015, + "loss": 1.632, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.647636353969574, + "learning_rate": 0.0015, + "loss": 1.6277, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.77552330493927, + "learning_rate": 0.0015, + "loss": 1.6286, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6071036458015442, + "learning_rate": 0.0015, + "loss": 1.6257, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.7372478246688843, + "learning_rate": 0.0015, + "loss": 1.6307, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.7451688647270203, + "learning_rate": 0.0015, + "loss": 1.6241, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.7447968125343323, + "learning_rate": 0.0015, + "loss": 1.6204, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.9765622019767761, + "learning_rate": 0.0015, + "loss": 1.6354, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6349117755889893, + "learning_rate": 0.0015, + "loss": 1.6193, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.7435948848724365, + "learning_rate": 0.0015, + "loss": 1.6299, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 1.0427266359329224, + "learning_rate": 0.0015, + "loss": 1.6213, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.8208745718002319, + "learning_rate": 0.0015, + "loss": 1.628, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.6625232100486755, + "learning_rate": 0.0015, + "loss": 1.6152, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.6491722464561462, + "learning_rate": 0.0015, + "loss": 1.6159, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.6551082134246826, + "learning_rate": 0.0015, + "loss": 1.6133, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.9150200486183167, + "learning_rate": 0.0015, + "loss": 1.6192, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.7414054274559021, + "learning_rate": 0.0015, + "loss": 1.6143, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.720600426197052, + "learning_rate": 0.0015, + "loss": 1.6172, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.6197947859764099, + "learning_rate": 0.0015, + "loss": 1.6111, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.6577795147895813, + "learning_rate": 0.0015, + "loss": 1.6199, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.6775305271148682, + "learning_rate": 0.0015, + "loss": 1.6136, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.6268432140350342, + "learning_rate": 0.0015, + "loss": 1.6138, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6115721464157104, + "learning_rate": 0.0015, + "loss": 1.6189, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.6424464583396912, + "learning_rate": 0.0015, + "loss": 1.6179, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.7381553053855896, + "learning_rate": 0.0015, + "loss": 1.6141, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.6508358120918274, + "learning_rate": 0.0015, + "loss": 1.6151, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.704152524471283, + "learning_rate": 0.0015, + "loss": 1.6049, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.7092164754867554, + "learning_rate": 0.0015, + "loss": 1.616, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.8363828063011169, + "learning_rate": 0.0015, + "loss": 1.6049, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.6648263931274414, + "learning_rate": 0.0015, + "loss": 1.6033, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.7064094543457031, + "learning_rate": 0.0015, + "loss": 1.6029, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.6821885108947754, + "learning_rate": 0.0015, + "loss": 1.5995, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.9203848838806152, + "learning_rate": 0.0015, + "loss": 1.6022, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.6642154455184937, + "learning_rate": 0.0015, + "loss": 1.5972, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.667013943195343, + "learning_rate": 0.0015, + "loss": 1.602, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.6883009076118469, + "learning_rate": 0.0015, + "loss": 1.6024, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.684313178062439, + "learning_rate": 0.0015, + "loss": 1.5836, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 1.3332993984222412, + "learning_rate": 0.0015, + "loss": 1.6059, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.7802549600601196, + "learning_rate": 0.0015, + "loss": 1.6066, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.7167192101478577, + "learning_rate": 0.0015, + "loss": 1.6089, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.6920537948608398, + "learning_rate": 0.0015, + "loss": 1.6055, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.654425323009491, + "learning_rate": 0.0015, + "loss": 1.5991, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.7083066701889038, + "learning_rate": 0.0015, + "loss": 1.5976, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.6729137301445007, + "learning_rate": 0.0015, + "loss": 1.5955, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.7214769124984741, + "learning_rate": 0.0015, + "loss": 1.5975, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.8139691352844238, + "learning_rate": 0.0015, + "loss": 1.6038, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.8491970896720886, + "learning_rate": 0.0015, + "loss": 1.5773, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 1.2716965675354004, + "learning_rate": 0.0015, + "loss": 1.5833, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 1.1176294088363647, + "learning_rate": 0.0015, + "loss": 1.5928, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.7806745171546936, + "learning_rate": 0.0015, + "loss": 1.5912, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.8258131742477417, + "learning_rate": 0.0015, + "loss": 1.5863, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.7513039112091064, + "learning_rate": 0.0015, + "loss": 1.5773, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.6474192142486572, + "learning_rate": 0.0015, + "loss": 1.5845, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.7698449492454529, + "learning_rate": 0.0015, + "loss": 1.5892, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.833858847618103, + "learning_rate": 0.0015, + "loss": 1.5882, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.6728206872940063, + "learning_rate": 0.0015, + "loss": 1.5846, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.6305862665176392, + "learning_rate": 0.0015, + "loss": 1.5809, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.872765839099884, + "learning_rate": 0.0015, + "loss": 1.5872, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.8703192472457886, + "learning_rate": 0.0015, + "loss": 1.5957, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.8153250217437744, + "learning_rate": 0.0015, + "loss": 1.5882, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.6846070885658264, + "learning_rate": 0.0015, + "loss": 1.5697, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.66553795337677, + "learning_rate": 0.0015, + "loss": 1.5721, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.6434470415115356, + "learning_rate": 0.0015, + "loss": 1.5888, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.7582831978797913, + "learning_rate": 0.0015, + "loss": 1.5727, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.6478833556175232, + "learning_rate": 0.0015, + "loss": 1.58, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.70221346616745, + "learning_rate": 0.0015, + "loss": 1.5801, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6181135177612305, + "learning_rate": 0.0015, + "loss": 1.5665, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.7900903820991516, + "learning_rate": 0.0015, + "loss": 1.5749, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.6413560509681702, + "learning_rate": 0.0015, + "loss": 1.5715, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.655437171459198, + "learning_rate": 0.0015, + "loss": 1.5831, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.6200083494186401, + "learning_rate": 0.0015, + "loss": 1.5726, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.7289552688598633, + "learning_rate": 0.0015, + "loss": 1.5633, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.6110949516296387, + "learning_rate": 0.0015, + "loss": 1.5794, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.6898250579833984, + "learning_rate": 0.0015, + "loss": 1.5658, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.6093639731407166, + "learning_rate": 0.0015, + "loss": 1.5732, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.7576356530189514, + "learning_rate": 0.0015, + "loss": 1.5735, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6974408626556396, + "learning_rate": 0.0015, + "loss": 1.5858, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.8061007261276245, + "learning_rate": 0.0015, + "loss": 1.5647, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.7183794975280762, + "learning_rate": 0.0015, + "loss": 1.5722, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.6634585857391357, + "learning_rate": 0.0015, + "loss": 1.5712, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.6746152639389038, + "learning_rate": 0.0015, + "loss": 1.5752, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.6733529567718506, + "learning_rate": 0.0015, + "loss": 1.5679, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.7930343747138977, + "learning_rate": 0.0015, + "loss": 1.5666, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.7915785312652588, + "learning_rate": 0.0015, + "loss": 1.5651, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.8983940482139587, + "learning_rate": 0.0015, + "loss": 1.5763, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.7969074845314026, + "learning_rate": 0.0015, + "loss": 1.566, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.5963845252990723, + "learning_rate": 0.0015, + "loss": 1.5603, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.7686908841133118, + "learning_rate": 0.0015, + "loss": 1.539, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.7030147910118103, + "learning_rate": 0.0015, + "loss": 1.5676, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.712670624256134, + "learning_rate": 0.0015, + "loss": 1.5562, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.7921881675720215, + "learning_rate": 0.0015, + "loss": 1.5762, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.8681122064590454, + "learning_rate": 0.0015, + "loss": 1.5757, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 1.1272096633911133, + "learning_rate": 0.0015, + "loss": 1.5582, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.7130247354507446, + "learning_rate": 0.0015, + "loss": 1.5635, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.683824360370636, + "learning_rate": 0.0015, + "loss": 1.5628, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.6681883335113525, + "learning_rate": 0.0015, + "loss": 1.5695, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.6264692544937134, + "learning_rate": 0.0015, + "loss": 1.5562, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.774014949798584, + "learning_rate": 0.0015, + "loss": 1.5611, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.6604976654052734, + "learning_rate": 0.0015, + "loss": 1.5547, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.7396052479743958, + "learning_rate": 0.0015, + "loss": 1.5525, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.6354207992553711, + "learning_rate": 0.0015, + "loss": 1.5617, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.8779283165931702, + "learning_rate": 0.0015, + "loss": 1.5548, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.644379198551178, + "learning_rate": 0.0015, + "loss": 1.549, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.706469714641571, + "learning_rate": 0.0015, + "loss": 1.5596, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6598718762397766, + "learning_rate": 0.0015, + "loss": 1.5564, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.6580817699432373, + "learning_rate": 0.0015, + "loss": 1.5375, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.602471113204956, + "learning_rate": 0.0015, + "loss": 1.5506, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.6580116152763367, + "learning_rate": 0.0015, + "loss": 1.5503, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.6038399338722229, + "learning_rate": 0.0015, + "loss": 1.5463, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.6137148141860962, + "learning_rate": 0.0015, + "loss": 1.5429, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.82498699426651, + "learning_rate": 0.0015, + "loss": 1.5581, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.6414018869400024, + "learning_rate": 0.0015, + "loss": 1.562, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.803173840045929, + "learning_rate": 0.0015, + "loss": 1.5538, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.7096922993659973, + "learning_rate": 0.0015, + "loss": 1.5374, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6633762121200562, + "learning_rate": 0.0015, + "loss": 1.5583, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.7181137204170227, + "learning_rate": 0.0015, + "loss": 1.5496, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.6555500626564026, + "learning_rate": 0.0015, + "loss": 1.5617, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.9277512431144714, + "learning_rate": 0.0015, + "loss": 1.5584, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.6497780084609985, + "learning_rate": 0.0015, + "loss": 1.5458, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.6723967790603638, + "learning_rate": 0.0015, + "loss": 1.5446, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 1.0287106037139893, + "learning_rate": 0.0015, + "loss": 1.5413, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.7365649342536926, + "learning_rate": 0.0015, + "loss": 1.5454, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.6937849521636963, + "learning_rate": 0.0015, + "loss": 1.5543, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.9443396925926208, + "learning_rate": 0.0015, + "loss": 1.5367, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.7585970163345337, + "learning_rate": 0.0015, + "loss": 1.5364, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.7182621955871582, + "learning_rate": 0.0015, + "loss": 1.5449, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.8222300410270691, + "learning_rate": 0.0015, + "loss": 1.5454, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.7129064202308655, + "learning_rate": 0.0015, + "loss": 1.5398, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.7171529531478882, + "learning_rate": 0.0015, + "loss": 1.5528, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.7988422513008118, + "learning_rate": 0.0015, + "loss": 1.547, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.7298112511634827, + "learning_rate": 0.0015, + "loss": 1.5392, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.9609044194221497, + "learning_rate": 0.0015, + "loss": 1.5395, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.7461931109428406, + "learning_rate": 0.0015, + "loss": 1.551, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.660954475402832, + "learning_rate": 0.0015, + "loss": 1.5534, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.699897289276123, + "learning_rate": 0.0015, + "loss": 1.5422, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.6323885917663574, + "learning_rate": 0.0015, + "loss": 1.5221, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.6666523218154907, + "learning_rate": 0.0015, + "loss": 1.5391, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.6488693356513977, + "learning_rate": 0.0015, + "loss": 1.5396, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.861187756061554, + "learning_rate": 0.0015, + "loss": 1.545, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 1.2243887186050415, + "learning_rate": 0.0015, + "loss": 1.5339, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.61638343334198, + "learning_rate": 0.0015, + "loss": 1.5463, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.6018365621566772, + "learning_rate": 0.0015, + "loss": 1.5313, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.7864364981651306, + "learning_rate": 0.0015, + "loss": 1.5392, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.6807109713554382, + "learning_rate": 0.0015, + "loss": 1.541, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.6566856503486633, + "learning_rate": 0.0015, + "loss": 1.532, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.646217942237854, + "learning_rate": 0.0015, + "loss": 1.524, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.63829505443573, + "learning_rate": 0.0015, + "loss": 1.5279, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.6141451597213745, + "learning_rate": 0.0015, + "loss": 1.544, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.6665148735046387, + "learning_rate": 0.0015, + "loss": 1.5252, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.7954282760620117, + "learning_rate": 0.0015, + "loss": 1.54, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.7158104181289673, + "learning_rate": 0.0015, + "loss": 1.5289, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.6264477968215942, + "learning_rate": 0.0015, + "loss": 1.5219, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.7052874565124512, + "learning_rate": 0.0015, + "loss": 1.5188, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 1.2495715618133545, + "learning_rate": 0.0015, + "loss": 1.5222, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.6966308951377869, + "learning_rate": 0.0015, + "loss": 1.5248, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.6995412111282349, + "learning_rate": 0.0015, + "loss": 1.5186, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.6294351816177368, + "learning_rate": 0.0015, + "loss": 1.5255, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.9574099779129028, + "learning_rate": 0.0015, + "loss": 1.5256, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.8341027498245239, + "learning_rate": 0.0015, + "loss": 1.5304, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.6560214161872864, + "learning_rate": 0.0015, + "loss": 1.5287, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.6953286528587341, + "learning_rate": 0.0015, + "loss": 1.5185, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.7181388139724731, + "learning_rate": 0.0015, + "loss": 1.5301, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.7076848745346069, + "learning_rate": 0.0015, + "loss": 1.5234, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.5951666235923767, + "learning_rate": 0.0015, + "loss": 1.5321, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.6291056275367737, + "learning_rate": 0.0015, + "loss": 1.5209, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.701616644859314, + "learning_rate": 0.0015, + "loss": 1.5241, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.7535003423690796, + "learning_rate": 0.0015, + "loss": 1.5152, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.6437270045280457, + "learning_rate": 0.0015, + "loss": 1.5248, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.8609044551849365, + "learning_rate": 0.0015, + "loss": 1.5274, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.635687530040741, + "learning_rate": 0.0015, + "loss": 1.532, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.796039342880249, + "learning_rate": 0.0015, + "loss": 1.5262, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.8434703946113586, + "learning_rate": 0.0015, + "loss": 1.5338, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.771105945110321, + "learning_rate": 0.0015, + "loss": 1.5292, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.6671942472457886, + "learning_rate": 0.0015, + "loss": 1.5177, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.7578005194664001, + "learning_rate": 0.0015, + "loss": 1.5126, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.6778215169906616, + "learning_rate": 0.0015, + "loss": 1.5185, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.82611483335495, + "learning_rate": 0.0015, + "loss": 1.5107, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.797430157661438, + "learning_rate": 0.0015, + "loss": 1.5179, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.7697995901107788, + "learning_rate": 0.0015, + "loss": 1.5201, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.6459429264068604, + "learning_rate": 0.0015, + "loss": 1.5144, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.5715150237083435, + "learning_rate": 0.0015, + "loss": 1.5142, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.6213168501853943, + "learning_rate": 0.0015, + "loss": 1.5134, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.7337896823883057, + "learning_rate": 0.0015, + "loss": 1.5175, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.646183967590332, + "learning_rate": 0.0015, + "loss": 1.5194, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.6797531247138977, + "learning_rate": 0.0015, + "loss": 1.5077, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.7032386064529419, + "learning_rate": 0.0015, + "loss": 1.5177, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.830560564994812, + "learning_rate": 0.0015, + "loss": 1.5139, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.6150537133216858, + "learning_rate": 0.0015, + "loss": 1.5128, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.6464046835899353, + "learning_rate": 0.0015, + "loss": 1.5159, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.7626861333847046, + "learning_rate": 0.0015, + "loss": 1.5246, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.6594361066818237, + "learning_rate": 0.0015, + "loss": 1.5127, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.9918456673622131, + "learning_rate": 0.0015, + "loss": 1.5057, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 1.1276347637176514, + "learning_rate": 0.0015, + "loss": 1.5172, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.7557980418205261, + "learning_rate": 0.0015, + "loss": 1.5139, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5922372937202454, + "learning_rate": 0.0015, + "loss": 1.5115, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.8998174667358398, + "learning_rate": 0.0015, + "loss": 1.507, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.6543339490890503, + "learning_rate": 0.0015, + "loss": 1.5173, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.7405723929405212, + "learning_rate": 0.0015, + "loss": 1.5079, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.7839069962501526, + "learning_rate": 0.0015, + "loss": 1.51, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.6344037055969238, + "learning_rate": 0.0015, + "loss": 1.5244, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.6046910881996155, + "learning_rate": 0.0015, + "loss": 1.502, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.6111357808113098, + "learning_rate": 0.0015, + "loss": 1.5058, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5927023887634277, + "learning_rate": 0.0015, + "loss": 1.5021, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.6044620871543884, + "learning_rate": 0.0015, + "loss": 1.5102, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.7710412740707397, + "learning_rate": 0.0015, + "loss": 1.5263, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.744567334651947, + "learning_rate": 0.0015, + "loss": 1.507, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.7604517936706543, + "learning_rate": 0.0015, + "loss": 1.5117, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.8007243275642395, + "learning_rate": 0.0015, + "loss": 1.5068, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.6494132280349731, + "learning_rate": 0.0015, + "loss": 1.5116, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.6437615156173706, + "learning_rate": 0.0015, + "loss": 1.5, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.602859616279602, + "learning_rate": 0.0015, + "loss": 1.5107, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.7249857187271118, + "learning_rate": 0.0015, + "loss": 1.5157, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.8063498139381409, + "learning_rate": 0.0015, + "loss": 1.5013, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.6102367639541626, + "learning_rate": 0.0015, + "loss": 1.5003, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.5735389590263367, + "learning_rate": 0.0015, + "loss": 1.5069, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.7185008525848389, + "learning_rate": 0.0015, + "loss": 1.4896, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.7369451522827148, + "learning_rate": 0.0015, + "loss": 1.5107, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.7103257775306702, + "learning_rate": 0.0015, + "loss": 1.511, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.6686055660247803, + "learning_rate": 0.0015, + "loss": 1.5105, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.8303413391113281, + "learning_rate": 0.0015, + "loss": 1.4972, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.6716217398643494, + "learning_rate": 0.0015, + "loss": 1.4825, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.7727724313735962, + "learning_rate": 0.0015, + "loss": 1.5016, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.7711950540542603, + "learning_rate": 0.0015, + "loss": 1.4905, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.6626254916191101, + "learning_rate": 0.0015, + "loss": 1.5064, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.858144998550415, + "learning_rate": 0.0015, + "loss": 1.5117, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.7701011300086975, + "learning_rate": 0.0015, + "loss": 1.487, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6523405313491821, + "learning_rate": 0.0015, + "loss": 1.5066, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.609957218170166, + "learning_rate": 0.0015, + "loss": 1.5064, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.6151617765426636, + "learning_rate": 0.0015, + "loss": 1.5094, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.6941571235656738, + "learning_rate": 0.0015, + "loss": 1.4985, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.7581057548522949, + "learning_rate": 0.0015, + "loss": 1.4957, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.7099641561508179, + "learning_rate": 0.0015, + "loss": 1.4939, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.8094627857208252, + "learning_rate": 0.0015, + "loss": 1.488, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.5867845416069031, + "learning_rate": 0.0015, + "loss": 1.4967, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.6739965081214905, + "learning_rate": 0.0015, + "loss": 1.5007, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.7364274263381958, + "learning_rate": 0.0015, + "loss": 1.4962, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.7700591087341309, + "learning_rate": 0.0015, + "loss": 1.4936, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.8104601502418518, + "learning_rate": 0.0015, + "loss": 1.4928, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.6066454648971558, + "learning_rate": 0.0015, + "loss": 1.5123, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.6883394122123718, + "learning_rate": 0.0015, + "loss": 1.497, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.74123215675354, + "learning_rate": 0.0015, + "loss": 1.4957, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.6447526812553406, + "learning_rate": 0.0015, + "loss": 1.5034, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.7144091725349426, + "learning_rate": 0.0015, + "loss": 1.4864, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.6667797565460205, + "learning_rate": 0.0015, + "loss": 1.4919, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.6014418005943298, + "learning_rate": 0.0015, + "loss": 1.4923, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.598326563835144, + "learning_rate": 0.0015, + "loss": 1.5112, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.6572471261024475, + "learning_rate": 0.0015, + "loss": 1.4959, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.6467229127883911, + "learning_rate": 0.0015, + "loss": 1.488, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.6614055037498474, + "learning_rate": 0.0015, + "loss": 1.5008, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.655278205871582, + "learning_rate": 0.0015, + "loss": 1.4903, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.7447291016578674, + "learning_rate": 0.0015, + "loss": 1.4837, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.8840464949607849, + "learning_rate": 0.0015, + "loss": 1.504, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.7120494246482849, + "learning_rate": 0.0015, + "loss": 1.4928, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.6415331363677979, + "learning_rate": 0.0015, + "loss": 1.5019, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6202507615089417, + "learning_rate": 0.0015, + "loss": 1.4947, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.7231060862541199, + "learning_rate": 0.0015, + "loss": 1.4857, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.5879958868026733, + "learning_rate": 0.0015, + "loss": 1.4948, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.6680917739868164, + "learning_rate": 0.0015, + "loss": 1.4906, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.6083183884620667, + "learning_rate": 0.0015, + "loss": 1.4991, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.7683422565460205, + "learning_rate": 0.0015, + "loss": 1.4814, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.6253842115402222, + "learning_rate": 0.0015, + "loss": 1.4857, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.6490429639816284, + "learning_rate": 0.0015, + "loss": 1.483, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.6184181571006775, + "learning_rate": 0.0015, + "loss": 1.4846, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.7185336351394653, + "learning_rate": 0.0015, + "loss": 1.4969, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.5995338559150696, + "learning_rate": 0.0015, + "loss": 1.4888, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.6141905784606934, + "learning_rate": 0.0015, + "loss": 1.4823, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.6401776075363159, + "learning_rate": 0.0015, + "loss": 1.4689, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.7476955652236938, + "learning_rate": 0.0015, + "loss": 1.4977, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.7000148296356201, + "learning_rate": 0.0015, + "loss": 1.4859, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.8738598823547363, + "learning_rate": 0.0015, + "loss": 1.4974, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.8763918280601501, + "learning_rate": 0.0015, + "loss": 1.4866, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.6915040016174316, + "learning_rate": 0.0015, + "loss": 1.5009, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.7343860268592834, + "learning_rate": 0.0015, + "loss": 1.4909, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.6705483198165894, + "learning_rate": 0.0015, + "loss": 1.4717, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.9428117871284485, + "learning_rate": 0.0015, + "loss": 1.4899, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.6759603023529053, + "learning_rate": 0.0015, + "loss": 1.4875, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.7669435739517212, + "learning_rate": 0.0015, + "loss": 1.4822, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.9636547565460205, + "learning_rate": 0.0015, + "loss": 1.4965, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.7753234505653381, + "learning_rate": 0.0015, + "loss": 1.4946, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.6989697217941284, + "learning_rate": 0.0015, + "loss": 1.4887, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.7359611392021179, + "learning_rate": 0.0015, + "loss": 1.4873, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.9293256998062134, + "learning_rate": 0.0015, + "loss": 1.4951, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.7741364240646362, + "learning_rate": 0.0015, + "loss": 1.478, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.9756428003311157, + "learning_rate": 0.0015, + "loss": 1.4903, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.6212122440338135, + "learning_rate": 0.0015, + "loss": 1.4756, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.7047474980354309, + "learning_rate": 0.0015, + "loss": 1.4844, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.7011207342147827, + "learning_rate": 0.0015, + "loss": 1.48, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6198409199714661, + "learning_rate": 0.0015, + "loss": 1.4856, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.8558717370033264, + "learning_rate": 0.0015, + "loss": 1.478, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.6592795848846436, + "learning_rate": 0.0015, + "loss": 1.4836, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.7591399550437927, + "learning_rate": 0.0015, + "loss": 1.4893, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.8932752013206482, + "learning_rate": 0.0015, + "loss": 1.4815, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.9459204077720642, + "learning_rate": 0.0015, + "loss": 1.4824, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.7757249474525452, + "learning_rate": 0.0015, + "loss": 1.4865, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 1.1059825420379639, + "learning_rate": 0.0015, + "loss": 1.4737, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.6449358463287354, + "learning_rate": 0.0015, + "loss": 1.4745, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.6185063123703003, + "learning_rate": 0.0015, + "loss": 1.4834, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.5984042882919312, + "learning_rate": 0.0015, + "loss": 1.478, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.6496654748916626, + "learning_rate": 0.0015, + "loss": 1.4705, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.7661080360412598, + "learning_rate": 0.0015, + "loss": 1.4802, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.6617264747619629, + "learning_rate": 0.0015, + "loss": 1.4805, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.6429807543754578, + "learning_rate": 0.0015, + "loss": 1.4746, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.70951247215271, + "learning_rate": 0.0015, + "loss": 1.4748, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.6636227965354919, + "learning_rate": 0.0015, + "loss": 1.4747, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.5959547758102417, + "learning_rate": 0.0015, + "loss": 1.4772, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.7216319441795349, + "learning_rate": 0.0015, + "loss": 1.472, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.8427746891975403, + "learning_rate": 0.0015, + "loss": 1.4698, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.6237267851829529, + "learning_rate": 0.0015, + "loss": 1.4681, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.7382597923278809, + "learning_rate": 0.0015, + "loss": 1.4777, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.6296837329864502, + "learning_rate": 0.0015, + "loss": 1.4784, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.6827468276023865, + "learning_rate": 0.0015, + "loss": 1.4768, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.7274801731109619, + "learning_rate": 0.0015, + "loss": 1.4812, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.6991948485374451, + "learning_rate": 0.0015, + "loss": 1.4828, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.6994268298149109, + "learning_rate": 0.0015, + "loss": 1.4748, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.7800191640853882, + "learning_rate": 0.0015, + "loss": 1.4798, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.6182882189750671, + "learning_rate": 0.0015, + "loss": 1.4708, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.703421413898468, + "learning_rate": 0.0015, + "loss": 1.4681, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.6056244373321533, + "learning_rate": 0.0015, + "loss": 1.4651, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.6793457865715027, + "learning_rate": 0.0015, + "loss": 1.4802, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.6434624791145325, + "learning_rate": 0.0015, + "loss": 1.4843, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.6200845837593079, + "learning_rate": 0.0015, + "loss": 1.4688, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.6324935555458069, + "learning_rate": 0.0015, + "loss": 1.4683, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.6324947476387024, + "learning_rate": 0.0015, + "loss": 1.4754, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.9353732466697693, + "learning_rate": 0.0015, + "loss": 1.4669, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.7026352286338806, + "learning_rate": 0.0015, + "loss": 1.4725, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.6273441314697266, + "learning_rate": 0.0015, + "loss": 1.4812, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.7759383916854858, + "learning_rate": 0.0015, + "loss": 1.4763, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.6315538287162781, + "learning_rate": 0.0015, + "loss": 1.4843, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.7065660953521729, + "learning_rate": 0.0015, + "loss": 1.4776, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.6241559386253357, + "learning_rate": 0.0015, + "loss": 1.4743, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6300352215766907, + "learning_rate": 0.0015, + "loss": 1.4815, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.7059628963470459, + "learning_rate": 0.0015, + "loss": 1.4812, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 1.0336222648620605, + "learning_rate": 0.0015, + "loss": 1.462, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.792210578918457, + "learning_rate": 0.0015, + "loss": 1.4646, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.5760052800178528, + "learning_rate": 0.0015, + "loss": 1.4682, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5979909896850586, + "learning_rate": 0.0015, + "loss": 1.4717, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.6502906084060669, + "learning_rate": 0.0015, + "loss": 1.4716, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.666630208492279, + "learning_rate": 0.0015, + "loss": 1.4698, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.671164333820343, + "learning_rate": 0.0015, + "loss": 1.4487, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.6904129385948181, + "learning_rate": 0.0015, + "loss": 1.4565, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.7492054104804993, + "learning_rate": 0.0015, + "loss": 1.458, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.6643478274345398, + "learning_rate": 0.0015, + "loss": 1.4638, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.637754499912262, + "learning_rate": 0.0015, + "loss": 1.4608, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.6098731756210327, + "learning_rate": 0.0015, + "loss": 1.4677, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.6036657691001892, + "learning_rate": 0.0015, + "loss": 1.4694, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.7596926093101501, + "learning_rate": 0.0015, + "loss": 1.4841, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.7015154361724854, + "learning_rate": 0.0015, + "loss": 1.4677, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.6820868849754333, + "learning_rate": 0.0015, + "loss": 1.4772, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.8433887958526611, + "learning_rate": 0.0015, + "loss": 1.4819, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.603254497051239, + "learning_rate": 0.0015, + "loss": 1.4695, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5750964879989624, + "learning_rate": 0.0015, + "loss": 1.4625, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.9527162909507751, + "learning_rate": 0.0015, + "loss": 1.4716, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.6797378659248352, + "learning_rate": 0.0015, + "loss": 1.4654, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.6489676833152771, + "learning_rate": 0.0015, + "loss": 1.4648, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.60887211561203, + "learning_rate": 0.0015, + "loss": 1.4585, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.5964041948318481, + "learning_rate": 0.0015, + "loss": 1.4556, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.0236953496932983, + "learning_rate": 0.0015, + "loss": 1.4653, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.7320529222488403, + "learning_rate": 0.0015, + "loss": 1.4691, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.7991471290588379, + "learning_rate": 0.0015, + "loss": 1.4582, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.6449185013771057, + "learning_rate": 0.0015, + "loss": 1.4642, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.6430728435516357, + "learning_rate": 0.0015, + "loss": 1.4652, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.6137993335723877, + "learning_rate": 0.0015, + "loss": 1.4492, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.7657532691955566, + "learning_rate": 0.0015, + "loss": 1.471, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.0037484169006348, + "learning_rate": 0.0015, + "loss": 1.4843, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.7444342374801636, + "learning_rate": 0.0015, + "loss": 1.463, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.7103517055511475, + "learning_rate": 0.0015, + "loss": 1.4622, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.6482492685317993, + "learning_rate": 0.0015, + "loss": 1.4453, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.8009716272354126, + "learning_rate": 0.0015, + "loss": 1.4645, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.6729794144630432, + "learning_rate": 0.0015, + "loss": 1.4489, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.7312999367713928, + "learning_rate": 0.0015, + "loss": 1.4692, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.7489555478096008, + "learning_rate": 0.0015, + "loss": 1.4614, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.5873612761497498, + "learning_rate": 0.0015, + "loss": 1.4573, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.5916730165481567, + "learning_rate": 0.0015, + "loss": 1.4597, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.5824461579322815, + "learning_rate": 0.0015, + "loss": 1.4555, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.5909306406974792, + "learning_rate": 0.0015, + "loss": 1.4635, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.6104640960693359, + "learning_rate": 0.0015, + "loss": 1.4591, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.82509845495224, + "learning_rate": 0.0015, + "loss": 1.4577, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.9862090945243835, + "learning_rate": 0.0015, + "loss": 1.4577, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.6922302842140198, + "learning_rate": 0.0015, + "loss": 1.4694, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.7016127109527588, + "learning_rate": 0.0015, + "loss": 1.4627, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.6524290442466736, + "learning_rate": 0.0014834368975312174, + "loss": 1.4412, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.9576454758644104, + "learning_rate": 0.0014629899726345957, + "loss": 1.4631, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.5998448729515076, + "learning_rate": 0.0014428248775471316, + "loss": 1.4665, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.7185853123664856, + "learning_rate": 0.00142293772767289, + "loss": 1.4535, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.6232958436012268, + "learning_rate": 0.001403324691959192, + "loss": 1.4485, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.7536224722862244, + "learning_rate": 0.0013839819921586025, + "loss": 1.4571, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.6535559296607971, + "learning_rate": 0.0013649059021010894, + "loss": 1.444, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.7590029239654541, + "learning_rate": 0.0013460927469762154, + "loss": 1.4459, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.6376825571060181, + "learning_rate": 0.0013275389026252255, + "loss": 1.4501, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.6270737648010254, + "learning_rate": 0.0013092407948428887, + "loss": 1.4419, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.6457526683807373, + "learning_rate": 0.001291194898688966, + "loss": 1.448, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.6523187160491943, + "learning_rate": 0.001273397737809166, + "loss": 1.4459, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5821955800056458, + "learning_rate": 0.001255845883765463, + "loss": 1.4405, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.8359414339065552, + "learning_rate": 0.001238535955375642, + "loss": 1.4337, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.7809079885482788, + "learning_rate": 0.0012214646180619506, + "loss": 1.4325, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5825558304786682, + "learning_rate": 0.001204628583208727, + "loss": 1.426, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.6133561134338379, + "learning_rate": 0.0011880246075288827, + "loss": 1.4335, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.7389736771583557, + "learning_rate": 0.001171649492439115, + "loss": 1.4274, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.6356820464134216, + "learning_rate": 0.0011555000834437364, + "loss": 1.4312, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.682184100151062, + "learning_rate": 0.0011395732695269908, + "loss": 1.4269, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.6300919055938721, + "learning_rate": 0.0011238659825537505, + "loss": 1.4094, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5556436777114868, + "learning_rate": 0.0011083751966784717, + "loss": 1.4122, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.886567234992981, + "learning_rate": 0.0010930979277622953, + "loss": 1.4254, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.633384644985199, + "learning_rate": 0.0010780312327981854, + "loss": 1.4222, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.5969919562339783, + "learning_rate": 0.0010631722093439888, + "loss": 1.4189, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.5789670944213867, + "learning_rate": 0.00104851799496331, + "loss": 1.405, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.5954998731613159, + "learning_rate": 0.0010340657666740914, + "loss": 1.4159, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.793982982635498, + "learning_rate": 0.0010198127404047975, + "loss": 1.4013, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.6856889128684998, + "learning_rate": 0.0010057561704580897, + "loss": 1.4035, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.564650297164917, + "learning_rate": 0.0009918933489818985, + "loss": 1.4136, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.6375980973243713, + "learning_rate": 0.0009782216054477827, + "loss": 1.407, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.6755557656288147, + "learning_rate": 0.0009647383061364801, + "loss": 1.4131, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5792467594146729, + "learning_rate": 0.0009514408536305495, + "loss": 1.4077, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.7037333846092224, + "learning_rate": 0.0009383266863140042, + "loss": 1.4202, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.5915057063102722, + "learning_rate": 0.000925393277878844, + "loss": 1.4179, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.6086336970329285, + "learning_rate": 0.0009126381368383879, + "loss": 1.4028, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.7631401419639587, + "learning_rate": 0.0009000588060473156, + "loss": 1.3939, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.6577200293540955, + "learning_rate": 0.0008876528622283235, + "loss": 1.4035, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.613714337348938, + "learning_rate": 0.0008754179155053053, + "loss": 1.3969, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.7677353024482727, + "learning_rate": 0.0008633516089429683, + "loss": 1.3969, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.5944827795028687, + "learning_rate": 0.0008514516180927928, + "loss": 1.3956, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.6416760683059692, + "learning_rate": 0.0008397156505452524, + "loss": 1.3903, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.6125935912132263, + "learning_rate": 0.0008281414454882051, + "loss": 1.3925, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.5811773538589478, + "learning_rate": 0.0008167267732713704, + "loss": 1.3986, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.5692859888076782, + "learning_rate": 0.0008054694349768117, + "loss": 1.3826, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.5544273853302002, + "learning_rate": 0.0007943672619953359, + "loss": 1.3935, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.6564979553222656, + "learning_rate": 0.0007834181156087356, + "loss": 1.3869, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.6044861674308777, + "learning_rate": 0.0007726198865777852, + "loss": 1.3905, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.5948479771614075, + "learning_rate": 0.0007619704947359191, + "loss": 1.3821, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.6185819506645203, + "learning_rate": 0.0007514678885885087, + "loss": 1.3819, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.570355236530304, + "learning_rate": 0.0007411100449176633, + "loss": 1.384, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.5519322752952576, + "learning_rate": 0.0007308949683924791, + "loss": 1.3836, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.6107560992240906, + "learning_rate": 0.000720820691184658, + "loss": 1.3753, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.5432149171829224, + "learning_rate": 0.0007108852725894269, + "loss": 1.3777, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.619623601436615, + "learning_rate": 0.000701086798651681, + "loss": 1.3792, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.7213531136512756, + "learning_rate": 0.0006914233817972798, + "loss": 1.3673, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.5632906556129456, + "learning_rate": 0.0006818931604694261, + "loss": 1.3788, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.5627778768539429, + "learning_rate": 0.0006724942987700563, + "loss": 1.378, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.5579544901847839, + "learning_rate": 0.0006632249861061732, + "loss": 1.3803, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.691521406173706, + "learning_rate": 0.0006540834368410549, + "loss": 1.3759, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.6185905933380127, + "learning_rate": 0.0006450678899502701, + "loss": 1.3767, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5896825194358826, + "learning_rate": 0.0006361766086824345, + "loss": 1.3727, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.5611611604690552, + "learning_rate": 0.000627407880224645, + "loss": 1.3792, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.5953310132026672, + "learning_rate": 0.0006187600153725225, + "loss": 1.3644, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.8019931316375732, + "learning_rate": 0.0006102313482048055, + "loss": 1.3696, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.6050307154655457, + "learning_rate": 0.0006018202357624274, + "loss": 1.3695, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6819031834602356, + "learning_rate": 0.0005935250577320168, + "loss": 1.364, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.7755671143531799, + "learning_rate": 0.0005853442161337618, + "loss": 1.3564, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.524270236492157, + "learning_rate": 0.0005772761350135759, + "loss": 1.3637, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.6057386994361877, + "learning_rate": 0.0005693192601395058, + "loss": 1.3575, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.6446287035942078, + "learning_rate": 0.000561472058702326, + "loss": 1.3561, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.6866294741630554, + "learning_rate": 0.000553733019020258, + "loss": 1.3639, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.631495475769043, + "learning_rate": 0.0005461006502477612, + "loss": 1.3514, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.648935079574585, + "learning_rate": 0.0005385734820883369, + "loss": 1.3537, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.5714157819747925, + "learning_rate": 0.0005311500645112907, + "loss": 1.376, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.5869693756103516, + "learning_rate": 0.0005238289674723993, + "loss": 1.356, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.5632079243659973, + "learning_rate": 0.0005166087806384274, + "loss": 1.3636, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.6805329322814941, + "learning_rate": 0.0005094881131154418, + "loss": 1.3642, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.6069631576538086, + "learning_rate": 0.0005024655931808696, + "loss": 1.361, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.5363380312919617, + "learning_rate": 0.0004955398680192508, + "loss": 1.3518, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.6071041226387024, + "learning_rate": 0.000488709603461632, + "loss": 1.3472, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.549613893032074, + "learning_rate": 0.000481973483728553, + "loss": 1.3497, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.6225315928459167, + "learning_rate": 0.0004753302111765748, + "loss": 1.3501, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.580544114112854, + "learning_rate": 0.0004687785060483032, + "loss": 1.3617, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.545915424823761, + "learning_rate": 0.0004623171062258558, + "loss": 1.3305, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.6700500249862671, + "learning_rate": 0.0004559447669877288, + "loss": 1.3462, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.5874764919281006, + "learning_rate": 0.00044966026076901413, + "loss": 1.3505, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.700940728187561, + "learning_rate": 0.00044346237692492177, + "loss": 1.3536, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.6029543876647949, + "learning_rate": 0.0004373499214975615, + "loss": 1.3411, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.6259494423866272, + "learning_rate": 0.0004313217169859396, + "loss": 1.3459, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.6493311524391174, + "learning_rate": 0.0004253766021191256, + "loss": 1.3512, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.5584121346473694, + "learning_rate": 0.00041951343163254497, + "loss": 1.3514, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.5865875482559204, + "learning_rate": 0.00041373107604735626, + "loss": 1.3461, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.6342407464981079, + "learning_rate": 0.0004080284214528687, + "loss": 1.3405, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.6013846397399902, + "learning_rate": 0.0004024043692919589, + "loss": 1.3517, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.6558407545089722, + "learning_rate": 0.0003968578361494449, + "loss": 1.3458, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.5630905032157898, + "learning_rate": 0.000391387753543378, + "loss": 1.3521, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.5589712262153625, + "learning_rate": 0.00038599306771921023, + "loss": 1.3365, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.6231862902641296, + "learning_rate": 0.0003806727394468004, + "loss": 1.3337, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.5534417033195496, + "learning_rate": 0.0003754257438202162, + "loss": 1.3403, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.7053126692771912, + "learning_rate": 0.0003702510700602974, + "loss": 1.3495, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.6563172340393066, + "learning_rate": 0.0003651477213199393, + "loss": 1.3276, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.5651391744613647, + "learning_rate": 0.000360114714492061, + "loss": 1.3329, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.5689417123794556, + "learning_rate": 0.0003551510800202195, + "loss": 1.3362, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.6080328226089478, + "learning_rate": 0.0003502558617118353, + "loss": 1.3389, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.6188055872917175, + "learning_rate": 0.0003454281165539914, + "loss": 1.3517, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.5675854682922363, + "learning_rate": 0.00034066691453177176, + "loss": 1.3467, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.5765607953071594, + "learning_rate": 0.0003359713384491037, + "loss": 1.3428, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.5571680068969727, + "learning_rate": 0.00033134048375206944, + "loss": 1.3373, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.5907789468765259, + "learning_rate": 0.0003267734583546536, + "loss": 1.3349, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.5772618651390076, + "learning_rate": 0.00032226938246689157, + "loss": 1.3342, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.577710747718811, + "learning_rate": 0.0003178273884253874, + "loss": 1.3358, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.591688871383667, + "learning_rate": 0.0003134466205261674, + "loss": 1.3449, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.6315087676048279, + "learning_rate": 0.0003091262348598378, + "loss": 1.3473, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.5724259614944458, + "learning_rate": 0.0003048653991490141, + "loss": 1.3295, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.582590639591217, + "learning_rate": 0.00030066329258799187, + "loss": 1.327, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.561863362789154, + "learning_rate": 0.0002965191056846266, + "loss": 1.3303, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.6267061829566956, + "learning_rate": 0.000292432040104394, + "loss": 1.3274, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.5555689334869385, + "learning_rate": 0.00028840130851659853, + "loss": 1.323, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.5299456715583801, + "learning_rate": 0.0002844261344427028, + "loss": 1.3313, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5769301056861877, + "learning_rate": 0.0002805057521067471, + "loss": 1.3332, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.5532737970352173, + "learning_rate": 0.00027663940628783017, + "loss": 1.3186, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.5504904985427856, + "learning_rate": 0.00027282635217462393, + "loss": 1.3278, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.6307162046432495, + "learning_rate": 0.0002690658552218937, + "loss": 1.3364, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.5829129219055176, + "learning_rate": 0.00026535719100899516, + "loss": 1.3178, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.5658915042877197, + "learning_rate": 0.00026169964510032245, + "loss": 1.3242, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.551172137260437, + "learning_rate": 0.00025809251290767984, + "loss": 1.3092, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.534442126750946, + "learning_rate": 0.00025453509955454957, + "loss": 1.3132, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.5543773770332336, + "learning_rate": 0.00025102671974223175, + "loss": 1.3153, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.5598480105400085, + "learning_rate": 0.00024756669761782815, + "loss": 1.3267, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.5898130536079407, + "learning_rate": 0.0002441543666440464, + "loss": 1.3156, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.5876103639602661, + "learning_rate": 0.00024078906947079878, + "loss": 1.3241, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.6022315621376038, + "learning_rate": 0.00023747015780857005, + "loss": 1.3294, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.6138072609901428, + "learning_rate": 0.00023419699230353144, + "loss": 1.3237, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.5699710845947266, + "learning_rate": 0.00023096894241437586, + "loss": 1.3315, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.5461618900299072, + "learning_rate": 0.00022778538629085056, + "loss": 1.3128, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.5813279151916504, + "learning_rate": 0.00022464571065396427, + "loss": 1.3153, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.5995029211044312, + "learning_rate": 0.00022154931067784521, + "loss": 1.3117, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.5772252678871155, + "learning_rate": 0.00021849558987322782, + "loss": 1.3116, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5892878770828247, + "learning_rate": 0.0002154839599725452, + "loss": 1.3145, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.5628073215484619, + "learning_rate": 0.00021251384081660544, + "loss": 1.3215, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.5467565059661865, + "learning_rate": 0.0002095846602428303, + "loss": 1.32, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.5484524965286255, + "learning_rate": 0.00020669585397503358, + "loss": 1.3125, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.5602053999900818, + "learning_rate": 0.0002038468655147195, + "loss": 1.3139, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.5578546524047852, + "learning_rate": 0.00020103714603387894, + "loss": 1.3272, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.5700865387916565, + "learning_rate": 0.00019826615426926338, + "loss": 1.3008, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.5464246273040771, + "learning_rate": 0.00019553335641811625, + "loss": 1.3248, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.5654239058494568, + "learning_rate": 0.0001928382260353415, + "loss": 1.3138, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.5416972041130066, + "learning_rate": 0.00019018024393208902, + "loss": 1.3282, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.5687530636787415, + "learning_rate": 0.00018755889807573872, + "loss": 1.3105, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.5472725629806519, + "learning_rate": 0.00018497368349126262, + "loss": 1.3159, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.5564774870872498, + "learning_rate": 0.00018242410216394648, + "loss": 1.3286, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.5713562965393066, + "learning_rate": 0.0001799096629434529, + "loss": 1.3028, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.5620700716972351, + "learning_rate": 0.00017742988144920578, + "loss": 1.3111, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.6595193147659302, + "learning_rate": 0.00017498427997707976, + "loss": 1.3096, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.557573139667511, + "learning_rate": 0.00017257238740737548, + "loss": 1.3149, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.5701920986175537, + "learning_rate": 0.00017019373911406307, + "loss": 1.3203, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.5435668230056763, + "learning_rate": 0.000167847876875277, + "loss": 1.3205, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.6069523096084595, + "learning_rate": 0.00016553434878504428, + "loss": 1.3039, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.6343440413475037, + "learning_rate": 0.00016325270916622947, + "loss": 1.3063, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.5472316145896912, + "learning_rate": 0.00016100251848467966, + "loss": 1.3135, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.6462746262550354, + "learning_rate": 0.0001587833432645528, + "loss": 1.3065, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.6324774026870728, + "learning_rate": 0.00015659475600481292, + "loss": 1.3231, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.5284608006477356, + "learning_rate": 0.00015443633509687688, + "loss": 1.3101, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.5500490665435791, + "learning_rate": 0.00015230766474339536, + "loss": 1.3083, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.5517863631248474, + "learning_rate": 0.00015020833487815416, + "loss": 1.3167, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.549925684928894, + "learning_rate": 0.0001481379410870792, + "loss": 1.3093, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.5453600287437439, + "learning_rate": 0.00014609608453033013, + "loss": 1.2934, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.5260262489318848, + "learning_rate": 0.00014408237186546807, + "loss": 1.3029, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.5468515753746033, + "learning_rate": 0.00014209641517168273, + "loss": 1.2923, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5875610113143921, + "learning_rate": 0.00014013783187506265, + "loss": 1.3056, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.5722810626029968, + "learning_rate": 0.00013820624467489697, + "loss": 1.3235, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.5563709735870361, + "learning_rate": 0.00013630128147099213, + "loss": 1.3207, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.537101686000824, + "learning_rate": 0.00013442257529199068, + "loss": 1.2995, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.6059907674789429, + "learning_rate": 0.00013256976422467803, + "loss": 1.3092, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.6264088749885559, + "learning_rate": 0.00013074249134426366, + "loss": 1.3066, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.5340638160705566, + "learning_rate": 0.0001289404046456233, + "loss": 1.3162, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.5328018665313721, + "learning_rate": 0.0001271631569754887, + "loss": 1.3096, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.545251727104187, + "learning_rate": 0.0001254104059655723, + "loss": 1.3143, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.535719096660614, + "learning_rate": 0.00012368181396661337, + "loss": 1.2989, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.5601621270179749, + "learning_rate": 0.00012197704798333364, + "loss": 1.2998, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.5374870300292969, + "learning_rate": 0.00012029577961028894, + "loss": 1.2979, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.5823902487754822, + "learning_rate": 0.00011863768496860542, + "loss": 1.3097, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.5670998096466064, + "learning_rate": 0.00011700244464358777, + "loss": 1.3055, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.55030357837677, + "learning_rate": 0.00011538974362318715, + "loss": 1.3105, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.5547555685043335, + "learning_rate": 0.00011379927123731737, + "loss": 1.3026, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.5531983971595764, + "learning_rate": 0.0001122307210980077, + "loss": 1.3027, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.533770740032196, + "learning_rate": 0.00011068379104038026, + "loss": 1.3148, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.5580774545669556, + "learning_rate": 0.00010915818306444116, + "loss": 1.2976, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.5440179705619812, + "learning_rate": 0.00010765360327767384, + "loss": 1.3031, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.5207547545433044, + "learning_rate": 0.00010616976183842376, + "loss": 1.3077, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.5587083101272583, + "learning_rate": 0.00010470637290006365, + "loss": 1.3097, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.5476788282394409, + "learning_rate": 0.00010326315455592764, + "loss": 1.2969, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.5930982828140259, + "learning_rate": 0.0001018398287850053, + "loss": 1.2932, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.5767351388931274, + "learning_rate": 0.00010043612139838357, + "loss": 1.312, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.5543381571769714, + "learning_rate": 9.905176198642719e-05, + "loss": 1.3014, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.5643795132637024, + "learning_rate": 9.76864838666871e-05, + "loss": 1.3002, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.5286699533462524, + "learning_rate": 9.634002403252676e-05, + "loss": 1.2998, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.5420418381690979, + "learning_rate": 9.501212310245681e-05, + "loss": 1.2967, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.5427573323249817, + "learning_rate": 9.370252527016777e-05, + "loss": 1.3073, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.5684431195259094, + "learning_rate": 9.241097825525163e-05, + "loss": 1.2948, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.5473798513412476, + "learning_rate": 9.113723325460276e-05, + "loss": 1.3048, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.6615726947784424, + "learning_rate": 8.988104489448849e-05, + "loss": 1.298, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.5358026027679443, + "learning_rate": 8.864217118328042e-05, + "loss": 1.3096, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.5515100359916687, + "learning_rate": 8.742037346483729e-05, + "loss": 1.3025, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.5427886247634888, + "learning_rate": 8.62154163725303e-05, + "loss": 1.3102, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.550643265247345, + "learning_rate": 8.502706778390219e-05, + "loss": 1.306, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.532523512840271, + "learning_rate": 8.38550987759513e-05, + "loss": 1.3016, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.5340996384620667, + "learning_rate": 8.269928358103191e-05, + "loss": 1.3161, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.556290328502655, + "learning_rate": 8.155939954336243e-05, + "loss": 1.305, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.5603011250495911, + "learning_rate": 8.043522707613312e-05, + "loss": 1.3035, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.5537724494934082, + "learning_rate": 7.932654961920486e-05, + "loss": 1.2875, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.5351696610450745, + "learning_rate": 7.823315359739135e-05, + "loss": 1.2873, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.5564333200454712, + "learning_rate": 7.715482837931577e-05, + "loss": 1.3145, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.5572025775909424, + "learning_rate": 7.6091366236835e-05, + "loss": 1.2908, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.5360335111618042, + "learning_rate": 7.504256230502289e-05, + "loss": 1.3104, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.5379513502120972, + "learning_rate": 7.400821454270524e-05, + "loss": 1.3035, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.5394819378852844, + "learning_rate": 7.29881236935386e-05, + "loss": 1.2902, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.5339441895484924, + "learning_rate": 7.198209324762562e-05, + "loss": 1.2941, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.5443100333213806, + "learning_rate": 7.098992940365946e-05, + "loss": 1.2924, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.5352849364280701, + "learning_rate": 7.001144103159e-05, + "loss": 1.2983, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.5510713458061218, + "learning_rate": 6.904643963580461e-05, + "loss": 1.3077, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.5565567016601562, + "learning_rate": 6.809473931881644e-05, + "loss": 1.2974, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.5877848267555237, + "learning_rate": 6.71561567454532e-05, + "loss": 1.298, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.5745589733123779, + "learning_rate": 6.623051110753948e-05, + "loss": 1.3138, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.5260949730873108, + "learning_rate": 6.531762408906607e-05, + "loss": 1.3016, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.5599448680877686, + "learning_rate": 6.441731983183912e-05, + "loss": 1.2998, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.5515381693840027, + "learning_rate": 6.352942490160292e-05, + "loss": 1.2913, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.5943716168403625, + "learning_rate": 6.265376825462966e-05, + "loss": 1.301, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.6185287237167358, + "learning_rate": 6.179018120476945e-05, + "loss": 1.2969, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.5664206743240356, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.2982, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.5414692759513855, + "learning_rate": 6.009855274515339e-05, + "loss": 1.2924, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.5285016298294067, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.2998, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.5437647104263306, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.2875, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.5499237775802612, + "learning_rate": 5.764754687033678e-05, + "loss": 1.2998, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.5571590662002563, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.2967, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.535373330116272, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.3101, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.542596161365509, + "learning_rate": 5.529650063720842e-05, + "loss": 1.305, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.5584049820899963, + "learning_rate": 5.453432234876445e-05, + "loss": 1.2963, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.5617037415504456, + "learning_rate": 5.37826495305886e-05, + "loss": 1.2881, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.6335888504981995, + "learning_rate": 5.304133738072674e-05, + "loss": 1.3064, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.5452306866645813, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.2977, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.6304827332496643, + "learning_rate": 5.158922582999368e-05, + "loss": 1.3034, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.5368176102638245, + "learning_rate": 5.087814669492819e-05, + "loss": 1.2946, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.6178833246231079, + "learning_rate": 5.017686870590028e-05, + "loss": 1.296, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.5466554164886475, + "learning_rate": 4.948525676899577e-05, + "loss": 1.2914, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.5708282589912415, + "learning_rate": 4.880317765236493e-05, + "loss": 1.3049, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.6130954623222351, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.2842, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.5375681519508362, + "learning_rate": 4.746709410920699e-05, + "loss": 1.29, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.596631646156311, + "learning_rate": 4.681283230007507e-05, + "loss": 1.2895, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.5348437428474426, + "learning_rate": 4.616758849642509e-05, + "loss": 1.2967, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.5292144417762756, + "learning_rate": 4.553123839874615e-05, + "loss": 1.3049, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.5334331393241882, + "learning_rate": 4.490365942080736e-05, + "loss": 1.2992, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.5265600681304932, + "learning_rate": 4.428473066604285e-05, + "loss": 1.2957, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.547091007232666, + "learning_rate": 4.367433290426233e-05, + "loss": 1.2964, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.5164364576339722, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.2887, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.5336405038833618, + "learning_rate": 4.247866163327575e-05, + "loss": 1.3014, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.5502304434776306, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.291, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.5449358224868774, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.2899, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.5880643129348755, + "learning_rate": 4.074624971216005e-05, + "loss": 1.277, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.5264186263084412, + "learning_rate": 4.018462453681889e-05, + "loss": 1.288, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.5335397124290466, + "learning_rate": 3.963074051164014e-05, + "loss": 1.2945, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.549601137638092, + "learning_rate": 3.908449093662446e-05, + "loss": 1.2912, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.5489893555641174, + "learning_rate": 3.854577058246998e-05, + "loss": 1.2893, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.5332473516464233, + "learning_rate": 3.801447567030094e-05, + "loss": 1.3066, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.5241206884384155, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.2913, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.5487094521522522, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.3057, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.5234995484352112, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.2949, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.5629460215568542, + "learning_rate": 3.596152451701616e-05, + "loss": 1.2946, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.573924720287323, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.2941, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.550319492816925, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.2951, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.5447481274604797, + "learning_rate": 3.449490171442838e-05, + "loss": 1.2984, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.5491507053375244, + "learning_rate": 3.401944187798702e-05, + "loss": 1.2994, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.5341458916664124, + "learning_rate": 3.355053553336137e-05, + "loss": 1.2876, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.577480673789978, + "learning_rate": 3.308809235061882e-05, + "loss": 1.289, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.5139463543891907, + "learning_rate": 3.263202324488772e-05, + "loss": 1.2974, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.5780912041664124, + "learning_rate": 3.218224035919609e-05, + "loss": 1.2894, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.5245312452316284, + "learning_rate": 3.173865704754688e-05, + "loss": 1.2921, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.5300474166870117, + "learning_rate": 3.130118785822657e-05, + "loss": 1.2927, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.536361813545227, + "learning_rate": 3.08697485173437e-05, + "loss": 1.2965, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5535325407981873, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.2928, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5927109718322754, + "learning_rate": 3.002462807725185e-05, + "loss": 1.2898, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.5309644341468811, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.2963, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.5136746168136597, + "learning_rate": 2.920264448124087e-05, + "loss": 1.2909, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.5523134469985962, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.2888, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.5428795218467712, + "learning_rate": 2.84031643124288e-05, + "loss": 1.2892, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.5764241814613342, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.3103, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.5402174592018127, + "learning_rate": 2.762557149497405e-05, + "loss": 1.2852, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.5388835072517395, + "learning_rate": 2.724479494389592e-05, + "loss": 1.2947, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.5216668248176575, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.2958, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.5394839644432068, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.2972, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.5227253437042236, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.2982, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.5423758029937744, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.2985, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.5196573138237, + "learning_rate": 2.541820662891541e-05, + "loss": 1.2783, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.5322595834732056, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.2883, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.545225203037262, + "learning_rate": 2.472233293365335e-05, + "loss": 1.2987, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.5407276153564453, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.2854, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.572319507598877, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.3023, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.5634906888008118, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.2916, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.5206300020217896, + "learning_rate": 2.338721674401494e-05, + "loss": 1.2981, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.537507951259613, + "learning_rate": 2.30648594768459e-05, + "loss": 1.2957, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.543291449546814, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.2916, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.5341705679893494, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.2874, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.5411273241043091, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.2921, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.5340097546577454, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.2915, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.5345265865325928, + "learning_rate": 2.151850895764285e-05, + "loss": 1.2894, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.5276795029640198, + "learning_rate": 2.12219089895037e-05, + "loss": 1.2815, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.5508191585540771, + "learning_rate": 2.092939720151092e-05, + "loss": 1.2747, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.5273526906967163, + "learning_rate": 2.064091724430947e-05, + "loss": 1.2886, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.5308668613433838, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.2825, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.5425849556922913, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.2949, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.5168497562408447, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.2841, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.5246695280075073, + "learning_rate": 1.952621569669175e-05, + "loss": 1.2902, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.5243405103683472, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.2893, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.5647236108779907, + "learning_rate": 1.899164691024229e-05, + "loss": 1.2903, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.5375344157218933, + "learning_rate": 1.872987589815331e-05, + "loss": 1.2858, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.5346893668174744, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.2971, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.5454371571540833, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.2953, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.5363225340843201, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.3063, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.523496687412262, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.2973, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.5324601531028748, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.2962, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.5254653096199036, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.2831, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.5524951219558716, + "learning_rate": 1.699576850233916e-05, + "loss": 1.2871, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.5279617309570312, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.3009, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.5764878988265991, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.274, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.5379413366317749, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.2947, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.5225703716278076, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.2945, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.5388068556785583, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.2897, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.5279055237770081, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.2978, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.5495163202285767, + "learning_rate": 1.542221360973268e-05, + "loss": 1.2824, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.5348067879676819, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.2954, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.5235129594802856, + "learning_rate": 1.5e-05, + "loss": 1.2846, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.830818274921677e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-olmo-bf16/checkpoint-9480/training_args.bin b/saves-olmo-bf16/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..958fd46f72a6485907fcb43205a779a15ac200db --- /dev/null +++ b/saves-olmo-bf16/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbf22f113253a0d17bc47c1a1decdd136a228605592e5673e487d00710db1b42 +size 5112 diff --git a/saves-olmo-bf16/config.json b/saves-olmo-bf16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8761f5fa7cedb85129adca4a6b09528ef2dc08ac --- /dev/null +++ b/saves-olmo-bf16/config.json @@ -0,0 +1,26 @@ +{ + "architectures": [ + "OlmoForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "clip_qkv": null, + "eos_token_id": 50279, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "model_type": "olmo", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 1, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.0", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-olmo-bf16/generation_config.json b/saves-olmo-bf16/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..96f2a31550174be2bb95ece9acb999f7b7aa76d0 --- /dev/null +++ b/saves-olmo-bf16/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "eos_token_id": 50279, + "pad_token_id": 1, + "transformers_version": "4.42.0" +} diff --git a/saves-olmo-bf16/model.safetensors b/saves-olmo-bf16/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d12ad0e38ea49c3efdb89add4f5a37d13f61d029 --- /dev/null +++ b/saves-olmo-bf16/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1de65855971771c12d295e1b66b6a5c7415993152f0e2f91db269ef2a7120b18 +size 8341080 diff --git a/saves-olmo-bf16/result.log b/saves-olmo-bf16/result.log new file mode 100644 index 0000000000000000000000000000000000000000..2458b2e6d32fd3e3d3aecd8200fca3924f8e6c92 --- /dev/null +++ b/saves-olmo-bf16/result.log @@ -0,0 +1 @@ +{'train_runtime': 5185.8759, 'train_samples_per_second': 1871.739, 'train_steps_per_second': 1.828, 'train_loss': 1.605855600542157, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-olmo-bf16/special_tokens_map.json b/saves-olmo-bf16/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-olmo-bf16/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-olmo-bf16/tokenizer.json b/saves-olmo-bf16/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-olmo-bf16/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-olmo-bf16/tokenizer_config.json b/saves-olmo-bf16/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-olmo-bf16/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-olmo-cosine/checkpoint-9480/config.json b/saves-olmo-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..69830f45579d99650d832c24fb725715de4848e2 --- /dev/null +++ b/saves-olmo-cosine/checkpoint-9480/config.json @@ -0,0 +1,26 @@ +{ + "architectures": [ + "OlmoForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "clip_qkv": null, + "eos_token_id": 50279, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "model_type": "olmo", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 1, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-olmo-cosine/checkpoint-9480/generation_config.json b/saves-olmo-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..424d0e318171a19c3fe3f1423f5d8dc090cc22d6 --- /dev/null +++ b/saves-olmo-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "eos_token_id": 50279, + "pad_token_id": 1, + "transformers_version": "4.42.4" +} diff --git a/saves-olmo-cosine/checkpoint-9480/model.safetensors b/saves-olmo-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04dfa651c1205817aafcd99008672c4062401a2e --- /dev/null +++ b/saves-olmo-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93bc2de53c0289ef5f825de79e66562914ed7b7457360e1598fc60afa979f112 +size 8341080 diff --git a/saves-olmo-cosine/checkpoint-9480/optimizer.pt b/saves-olmo-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..78c572390b2250898fef887c598207706a8e3580 --- /dev/null +++ b/saves-olmo-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b26ba121c8d092b38f5ef7564a8b020eebc20d2202ac1acfdc73aefc2a3529e0 +size 16692145 diff --git a/saves-olmo-cosine/checkpoint-9480/rng_state.pth b/saves-olmo-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-olmo-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-olmo-cosine/checkpoint-9480/scheduler.pt b/saves-olmo-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03c145297021546d40e130546440641e02059bcb --- /dev/null +++ b/saves-olmo-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fd617624c087e1a286ed7cf3fa38baa4a8815e49f107c3186b4c7c58e1adbb +size 1064 diff --git a/saves-olmo-cosine/checkpoint-9480/special_tokens_map.json b/saves-olmo-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-olmo-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-olmo-cosine/checkpoint-9480/tokenizer.json b/saves-olmo-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-olmo-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-olmo-cosine/checkpoint-9480/tokenizer_config.json b/saves-olmo-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-olmo-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-olmo-cosine/checkpoint-9480/trainer_state.json b/saves-olmo-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c1dc0bc114c38e9b41356f3188f8ffec786d8b9d --- /dev/null +++ b/saves-olmo-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,66393 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00010548523206751055, + "grad_norm": 1.33402419090271, + "learning_rate": 1.5789473684210526e-05, + "loss": 7.6615, + "step": 1 + }, + { + "epoch": 0.0002109704641350211, + "grad_norm": 1.3331120014190674, + "learning_rate": 3.157894736842105e-05, + "loss": 7.6573, + "step": 2 + }, + { + "epoch": 0.00031645569620253165, + "grad_norm": 1.3108360767364502, + "learning_rate": 4.736842105263158e-05, + "loss": 7.6439, + "step": 3 + }, + { + "epoch": 0.0004219409282700422, + "grad_norm": 1.3312245607376099, + "learning_rate": 6.31578947368421e-05, + "loss": 7.6185, + "step": 4 + }, + { + "epoch": 0.0005274261603375527, + "grad_norm": 1.347765564918518, + "learning_rate": 7.894736842105263e-05, + "loss": 7.5801, + "step": 5 + }, + { + "epoch": 0.0006329113924050633, + "grad_norm": 1.288072943687439, + "learning_rate": 9.473684210526316e-05, + "loss": 7.5333, + "step": 6 + }, + { + "epoch": 0.0007383966244725738, + "grad_norm": 1.2597615718841553, + "learning_rate": 0.00011052631578947368, + "loss": 7.4796, + "step": 7 + }, + { + "epoch": 0.0008438818565400844, + "grad_norm": 1.2422316074371338, + "learning_rate": 0.0001263157894736842, + "loss": 7.4201, + "step": 8 + }, + { + "epoch": 0.0009493670886075949, + "grad_norm": 1.2551628351211548, + "learning_rate": 0.00014210526315789474, + "loss": 7.3543, + "step": 9 + }, + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2514461278915405, + "learning_rate": 0.00015789473684210527, + "loss": 7.2909, + "step": 10 + }, + { + "epoch": 0.001160337552742616, + "grad_norm": 1.277658224105835, + "learning_rate": 0.0001736842105263158, + "loss": 7.2155, + "step": 11 + }, + { + "epoch": 0.0012658227848101266, + "grad_norm": 1.2522021532058716, + "learning_rate": 0.00018947368421052632, + "loss": 7.1611, + "step": 12 + }, + { + "epoch": 0.0013713080168776372, + "grad_norm": 1.2492729425430298, + "learning_rate": 0.00020526315789473685, + "loss": 7.0982, + "step": 13 + }, + { + "epoch": 0.0014767932489451476, + "grad_norm": 1.2570220232009888, + "learning_rate": 0.00022105263157894735, + "loss": 7.0343, + "step": 14 + }, + { + "epoch": 0.0015822784810126582, + "grad_norm": 1.261733055114746, + "learning_rate": 0.00023684210526315788, + "loss": 6.9814, + "step": 15 + }, + { + "epoch": 0.0016877637130801688, + "grad_norm": 1.2463140487670898, + "learning_rate": 0.0002526315789473684, + "loss": 6.9246, + "step": 16 + }, + { + "epoch": 0.0017932489451476794, + "grad_norm": 1.2544599771499634, + "learning_rate": 0.00026842105263157897, + "loss": 6.8463, + "step": 17 + }, + { + "epoch": 0.0018987341772151898, + "grad_norm": 1.213974118232727, + "learning_rate": 0.00028421052631578947, + "loss": 6.774, + "step": 18 + }, + { + "epoch": 0.0020042194092827004, + "grad_norm": 1.204857349395752, + "learning_rate": 0.00030000000000000003, + "loss": 6.6948, + "step": 19 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.189758539199829, + "learning_rate": 0.00031578947368421053, + "loss": 6.6275, + "step": 20 + }, + { + "epoch": 0.0022151898734177216, + "grad_norm": 1.1528675556182861, + "learning_rate": 0.00033157894736842103, + "loss": 6.5613, + "step": 21 + }, + { + "epoch": 0.002320675105485232, + "grad_norm": 1.1263946294784546, + "learning_rate": 0.0003473684210526316, + "loss": 6.4916, + "step": 22 + }, + { + "epoch": 0.002426160337552743, + "grad_norm": 1.0972181558609009, + "learning_rate": 0.0003631578947368421, + "loss": 6.416, + "step": 23 + }, + { + "epoch": 0.002531645569620253, + "grad_norm": 1.0661813020706177, + "learning_rate": 0.00037894736842105265, + "loss": 6.3517, + "step": 24 + }, + { + "epoch": 0.0026371308016877636, + "grad_norm": 1.0231572389602661, + "learning_rate": 0.00039473684210526315, + "loss": 6.303, + "step": 25 + }, + { + "epoch": 0.0027426160337552744, + "grad_norm": 1.0130726099014282, + "learning_rate": 0.0004105263157894737, + "loss": 6.2361, + "step": 26 + }, + { + "epoch": 0.002848101265822785, + "grad_norm": 0.9633982181549072, + "learning_rate": 0.0004263157894736842, + "loss": 6.2044, + "step": 27 + }, + { + "epoch": 0.002953586497890295, + "grad_norm": 0.9549344778060913, + "learning_rate": 0.0004421052631578947, + "loss": 6.1361, + "step": 28 + }, + { + "epoch": 0.003059071729957806, + "grad_norm": 0.9332831501960754, + "learning_rate": 0.00045789473684210527, + "loss": 6.0958, + "step": 29 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8689128160476685, + "learning_rate": 0.00047368421052631577, + "loss": 6.0613, + "step": 30 + }, + { + "epoch": 0.003270042194092827, + "grad_norm": 0.854705274105072, + "learning_rate": 0.0004894736842105264, + "loss": 6.0137, + "step": 31 + }, + { + "epoch": 0.0033755274261603376, + "grad_norm": 0.840885579586029, + "learning_rate": 0.0005052631578947368, + "loss": 5.9571, + "step": 32 + }, + { + "epoch": 0.003481012658227848, + "grad_norm": 0.8322707414627075, + "learning_rate": 0.0005210526315789474, + "loss": 5.9025, + "step": 33 + }, + { + "epoch": 0.003586497890295359, + "grad_norm": 0.7833331227302551, + "learning_rate": 0.0005368421052631579, + "loss": 5.8765, + "step": 34 + }, + { + "epoch": 0.003691983122362869, + "grad_norm": 0.8091939687728882, + "learning_rate": 0.0005526315789473684, + "loss": 5.7998, + "step": 35 + }, + { + "epoch": 0.0037974683544303796, + "grad_norm": 0.788054347038269, + "learning_rate": 0.0005684210526315789, + "loss": 5.7813, + "step": 36 + }, + { + "epoch": 0.0039029535864978904, + "grad_norm": 0.8752147555351257, + "learning_rate": 0.0005842105263157895, + "loss": 5.7206, + "step": 37 + }, + { + "epoch": 0.004008438818565401, + "grad_norm": 2.109766721725464, + "learning_rate": 0.0006000000000000001, + "loss": 5.7118, + "step": 38 + }, + { + "epoch": 0.004113924050632912, + "grad_norm": 1.1336824893951416, + "learning_rate": 0.0006157894736842105, + "loss": 5.6292, + "step": 39 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 1.00751793384552, + "learning_rate": 0.0006315789473684211, + "loss": 5.6005, + "step": 40 + }, + { + "epoch": 0.004324894514767932, + "grad_norm": 1.852959394454956, + "learning_rate": 0.0006473684210526316, + "loss": 5.5855, + "step": 41 + }, + { + "epoch": 0.004430379746835443, + "grad_norm": 1.0087597370147705, + "learning_rate": 0.0006631578947368421, + "loss": 5.5248, + "step": 42 + }, + { + "epoch": 0.004535864978902953, + "grad_norm": 1.4094399213790894, + "learning_rate": 0.0006789473684210526, + "loss": 5.4631, + "step": 43 + }, + { + "epoch": 0.004641350210970464, + "grad_norm": 1.0254197120666504, + "learning_rate": 0.0006947368421052632, + "loss": 5.4655, + "step": 44 + }, + { + "epoch": 0.004746835443037975, + "grad_norm": 1.4345813989639282, + "learning_rate": 0.0007105263157894736, + "loss": 5.382, + "step": 45 + }, + { + "epoch": 0.004852320675105486, + "grad_norm": 0.9950718879699707, + "learning_rate": 0.0007263157894736842, + "loss": 5.3299, + "step": 46 + }, + { + "epoch": 0.004957805907172996, + "grad_norm": 1.07522451877594, + "learning_rate": 0.0007421052631578947, + "loss": 5.2959, + "step": 47 + }, + { + "epoch": 0.005063291139240506, + "grad_norm": 0.9250797629356384, + "learning_rate": 0.0007578947368421053, + "loss": 5.2546, + "step": 48 + }, + { + "epoch": 0.005168776371308017, + "grad_norm": 0.7360596656799316, + "learning_rate": 0.0007736842105263159, + "loss": 5.1739, + "step": 49 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.676420271396637, + "learning_rate": 0.0007894736842105263, + "loss": 5.1476, + "step": 50 + }, + { + "epoch": 0.005379746835443038, + "grad_norm": 0.5840337872505188, + "learning_rate": 0.0008052631578947369, + "loss": 5.0893, + "step": 51 + }, + { + "epoch": 0.005485232067510549, + "grad_norm": 0.5672550797462463, + "learning_rate": 0.0008210526315789474, + "loss": 5.0274, + "step": 52 + }, + { + "epoch": 0.005590717299578059, + "grad_norm": 0.5732640624046326, + "learning_rate": 0.0008368421052631579, + "loss": 4.987, + "step": 53 + }, + { + "epoch": 0.00569620253164557, + "grad_norm": 0.7617015242576599, + "learning_rate": 0.0008526315789473684, + "loss": 4.91, + "step": 54 + }, + { + "epoch": 0.0058016877637130804, + "grad_norm": 1.0320638418197632, + "learning_rate": 0.000868421052631579, + "loss": 4.8784, + "step": 55 + }, + { + "epoch": 0.00590717299578059, + "grad_norm": 1.1912070512771606, + "learning_rate": 0.0008842105263157894, + "loss": 4.8135, + "step": 56 + }, + { + "epoch": 0.006012658227848101, + "grad_norm": 0.8080780506134033, + "learning_rate": 0.0009, + "loss": 4.7929, + "step": 57 + }, + { + "epoch": 0.006118143459915612, + "grad_norm": 0.7912352085113525, + "learning_rate": 0.0009157894736842105, + "loss": 4.7574, + "step": 58 + }, + { + "epoch": 0.006223628691983122, + "grad_norm": 0.9463189840316772, + "learning_rate": 0.0009315789473684211, + "loss": 4.6938, + "step": 59 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 1.1940027475357056, + "learning_rate": 0.0009473684210526315, + "loss": 4.6424, + "step": 60 + }, + { + "epoch": 0.006434599156118144, + "grad_norm": 0.6672406196594238, + "learning_rate": 0.0009631578947368421, + "loss": 4.6191, + "step": 61 + }, + { + "epoch": 0.006540084388185654, + "grad_norm": 0.6844768524169922, + "learning_rate": 0.0009789473684210528, + "loss": 4.5563, + "step": 62 + }, + { + "epoch": 0.006645569620253164, + "grad_norm": 0.49863189458847046, + "learning_rate": 0.000994736842105263, + "loss": 4.5223, + "step": 63 + }, + { + "epoch": 0.006751054852320675, + "grad_norm": 0.493787556886673, + "learning_rate": 0.0010105263157894737, + "loss": 4.4698, + "step": 64 + }, + { + "epoch": 0.006856540084388186, + "grad_norm": 0.698871374130249, + "learning_rate": 0.0010263157894736842, + "loss": 4.4549, + "step": 65 + }, + { + "epoch": 0.006962025316455696, + "grad_norm": 1.2139664888381958, + "learning_rate": 0.0010421052631578948, + "loss": 4.4049, + "step": 66 + }, + { + "epoch": 0.007067510548523207, + "grad_norm": 1.2939026355743408, + "learning_rate": 0.0010578947368421053, + "loss": 4.4055, + "step": 67 + }, + { + "epoch": 0.007172995780590718, + "grad_norm": 1.3509469032287598, + "learning_rate": 0.0010736842105263159, + "loss": 4.3687, + "step": 68 + }, + { + "epoch": 0.007278481012658228, + "grad_norm": 1.4538017511367798, + "learning_rate": 0.0010894736842105264, + "loss": 4.3093, + "step": 69 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 2.5992090702056885, + "learning_rate": 0.0011052631578947368, + "loss": 4.3075, + "step": 70 + }, + { + "epoch": 0.007489451476793249, + "grad_norm": 1.0675625801086426, + "learning_rate": 0.0011210526315789473, + "loss": 4.3064, + "step": 71 + }, + { + "epoch": 0.007594936708860759, + "grad_norm": 1.0624696016311646, + "learning_rate": 0.0011368421052631579, + "loss": 4.2606, + "step": 72 + }, + { + "epoch": 0.00770042194092827, + "grad_norm": 0.9986380338668823, + "learning_rate": 0.0011526315789473684, + "loss": 4.2238, + "step": 73 + }, + { + "epoch": 0.007805907172995781, + "grad_norm": 1.317137598991394, + "learning_rate": 0.001168421052631579, + "loss": 4.2041, + "step": 74 + }, + { + "epoch": 0.007911392405063292, + "grad_norm": 1.2356232404708862, + "learning_rate": 0.0011842105263157896, + "loss": 4.1782, + "step": 75 + }, + { + "epoch": 0.008016877637130802, + "grad_norm": 1.3196362257003784, + "learning_rate": 0.0012000000000000001, + "loss": 4.1752, + "step": 76 + }, + { + "epoch": 0.008122362869198312, + "grad_norm": 0.9703906178474426, + "learning_rate": 0.0012157894736842105, + "loss": 4.149, + "step": 77 + }, + { + "epoch": 0.008227848101265823, + "grad_norm": 1.5880800485610962, + "learning_rate": 0.001231578947368421, + "loss": 4.1429, + "step": 78 + }, + { + "epoch": 0.008333333333333333, + "grad_norm": 0.9209275841712952, + "learning_rate": 0.0012473684210526316, + "loss": 4.0979, + "step": 79 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.0608091354370117, + "learning_rate": 0.0012631578947368421, + "loss": 4.0957, + "step": 80 + }, + { + "epoch": 0.008544303797468355, + "grad_norm": 0.9260897636413574, + "learning_rate": 0.0012789473684210527, + "loss": 4.0605, + "step": 81 + }, + { + "epoch": 0.008649789029535865, + "grad_norm": 0.8770465850830078, + "learning_rate": 0.0012947368421052632, + "loss": 4.0377, + "step": 82 + }, + { + "epoch": 0.008755274261603375, + "grad_norm": 0.7508010864257812, + "learning_rate": 0.0013105263157894738, + "loss": 4.009, + "step": 83 + }, + { + "epoch": 0.008860759493670886, + "grad_norm": 0.9307889342308044, + "learning_rate": 0.0013263157894736841, + "loss": 3.985, + "step": 84 + }, + { + "epoch": 0.008966244725738396, + "grad_norm": 1.1121947765350342, + "learning_rate": 0.0013421052631578947, + "loss": 3.9624, + "step": 85 + }, + { + "epoch": 0.009071729957805906, + "grad_norm": 1.2771798372268677, + "learning_rate": 0.0013578947368421052, + "loss": 3.9971, + "step": 86 + }, + { + "epoch": 0.009177215189873418, + "grad_norm": 0.9104655385017395, + "learning_rate": 0.0013736842105263158, + "loss": 3.9679, + "step": 87 + }, + { + "epoch": 0.009282700421940928, + "grad_norm": 1.3350539207458496, + "learning_rate": 0.0013894736842105264, + "loss": 3.8982, + "step": 88 + }, + { + "epoch": 0.009388185654008438, + "grad_norm": 0.7080676555633545, + "learning_rate": 0.001405263157894737, + "loss": 3.8979, + "step": 89 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.7188036441802979, + "learning_rate": 0.0014210526315789472, + "loss": 3.883, + "step": 90 + }, + { + "epoch": 0.00959915611814346, + "grad_norm": 1.1549890041351318, + "learning_rate": 0.0014368421052631578, + "loss": 3.8789, + "step": 91 + }, + { + "epoch": 0.009704641350210971, + "grad_norm": 1.2730180025100708, + "learning_rate": 0.0014526315789473684, + "loss": 3.8682, + "step": 92 + }, + { + "epoch": 0.009810126582278481, + "grad_norm": 1.2028182744979858, + "learning_rate": 0.0014684210526315791, + "loss": 3.8448, + "step": 93 + }, + { + "epoch": 0.009915611814345991, + "grad_norm": 1.2370046377182007, + "learning_rate": 0.0014842105263157895, + "loss": 3.8675, + "step": 94 + }, + { + "epoch": 0.010021097046413503, + "grad_norm": 1.3064701557159424, + "learning_rate": 0.0015, + "loss": 3.8274, + "step": 95 + }, + { + "epoch": 0.010126582278481013, + "grad_norm": 0.9450060725212097, + "learning_rate": 0.00149999995797938, + "loss": 3.8184, + "step": 96 + }, + { + "epoch": 0.010232067510548523, + "grad_norm": 0.8917908072471619, + "learning_rate": 0.001499999831917525, + "loss": 3.778, + "step": 97 + }, + { + "epoch": 0.010337552742616034, + "grad_norm": 1.0605502128601074, + "learning_rate": 0.001499999621814449, + "loss": 3.7617, + "step": 98 + }, + { + "epoch": 0.010443037974683544, + "grad_norm": 0.9702006578445435, + "learning_rate": 0.0014999993276701756, + "loss": 3.792, + "step": 99 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.4191598892211914, + "learning_rate": 0.0014999989494847376, + "loss": 3.7459, + "step": 100 + }, + { + "epoch": 0.010654008438818566, + "grad_norm": 1.050790548324585, + "learning_rate": 0.0014999984872581774, + "loss": 3.7382, + "step": 101 + }, + { + "epoch": 0.010759493670886076, + "grad_norm": 0.8809148669242859, + "learning_rate": 0.0014999979409905469, + "loss": 3.6842, + "step": 102 + }, + { + "epoch": 0.010864978902953586, + "grad_norm": 0.9521017670631409, + "learning_rate": 0.0014999973106819074, + "loss": 3.6857, + "step": 103 + }, + { + "epoch": 0.010970464135021098, + "grad_norm": 0.9192695021629333, + "learning_rate": 0.0014999965963323294, + "loss": 3.6549, + "step": 104 + }, + { + "epoch": 0.011075949367088608, + "grad_norm": 0.8294497728347778, + "learning_rate": 0.0014999957979418927, + "loss": 3.6442, + "step": 105 + }, + { + "epoch": 0.011181434599156118, + "grad_norm": 0.9014871716499329, + "learning_rate": 0.0014999949155106874, + "loss": 3.6401, + "step": 106 + }, + { + "epoch": 0.01128691983122363, + "grad_norm": 1.0901930332183838, + "learning_rate": 0.0014999939490388115, + "loss": 3.653, + "step": 107 + }, + { + "epoch": 0.01139240506329114, + "grad_norm": 0.9290949106216431, + "learning_rate": 0.0014999928985263743, + "loss": 3.6207, + "step": 108 + }, + { + "epoch": 0.01149789029535865, + "grad_norm": 0.8087457418441772, + "learning_rate": 0.001499991763973493, + "loss": 3.619, + "step": 109 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.9243283867835999, + "learning_rate": 0.0014999905453802946, + "loss": 3.5797, + "step": 110 + }, + { + "epoch": 0.01170886075949367, + "grad_norm": 1.1109795570373535, + "learning_rate": 0.0014999892427469156, + "loss": 3.584, + "step": 111 + }, + { + "epoch": 0.01181434599156118, + "grad_norm": 0.891713559627533, + "learning_rate": 0.0014999878560735024, + "loss": 3.577, + "step": 112 + }, + { + "epoch": 0.011919831223628692, + "grad_norm": 0.7354522943496704, + "learning_rate": 0.0014999863853602101, + "loss": 3.5559, + "step": 113 + }, + { + "epoch": 0.012025316455696202, + "grad_norm": 0.623985230922699, + "learning_rate": 0.0014999848306072037, + "loss": 3.5598, + "step": 114 + }, + { + "epoch": 0.012130801687763712, + "grad_norm": 0.7063365578651428, + "learning_rate": 0.0014999831918146571, + "loss": 3.5441, + "step": 115 + }, + { + "epoch": 0.012236286919831224, + "grad_norm": 0.8937822580337524, + "learning_rate": 0.001499981468982754, + "loss": 3.5214, + "step": 116 + }, + { + "epoch": 0.012341772151898734, + "grad_norm": 1.0383225679397583, + "learning_rate": 0.001499979662111688, + "loss": 3.537, + "step": 117 + }, + { + "epoch": 0.012447257383966244, + "grad_norm": 0.8857123851776123, + "learning_rate": 0.0014999777712016607, + "loss": 3.5144, + "step": 118 + }, + { + "epoch": 0.012552742616033756, + "grad_norm": 0.804810643196106, + "learning_rate": 0.0014999757962528846, + "loss": 3.4582, + "step": 119 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.9934338331222534, + "learning_rate": 0.0014999737372655805, + "loss": 3.4877, + "step": 120 + }, + { + "epoch": 0.012763713080168776, + "grad_norm": 0.9769100546836853, + "learning_rate": 0.0014999715942399798, + "loss": 3.4822, + "step": 121 + }, + { + "epoch": 0.012869198312236287, + "grad_norm": 1.1319202184677124, + "learning_rate": 0.001499969367176322, + "loss": 3.4782, + "step": 122 + }, + { + "epoch": 0.012974683544303797, + "grad_norm": 0.8316890597343445, + "learning_rate": 0.0014999670560748573, + "loss": 3.4296, + "step": 123 + }, + { + "epoch": 0.013080168776371307, + "grad_norm": 0.7747625708580017, + "learning_rate": 0.001499964660935844, + "loss": 3.4203, + "step": 124 + }, + { + "epoch": 0.013185654008438819, + "grad_norm": 0.7343692183494568, + "learning_rate": 0.0014999621817595509, + "loss": 3.4399, + "step": 125 + }, + { + "epoch": 0.013291139240506329, + "grad_norm": 0.9695560932159424, + "learning_rate": 0.0014999596185462556, + "loss": 3.4172, + "step": 126 + }, + { + "epoch": 0.01339662447257384, + "grad_norm": 1.008923888206482, + "learning_rate": 0.0014999569712962452, + "loss": 3.433, + "step": 127 + }, + { + "epoch": 0.01350210970464135, + "grad_norm": 0.7971740365028381, + "learning_rate": 0.0014999542400098169, + "loss": 3.3952, + "step": 128 + }, + { + "epoch": 0.01360759493670886, + "grad_norm": 1.0129265785217285, + "learning_rate": 0.0014999514246872762, + "loss": 3.3885, + "step": 129 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.9079877138137817, + "learning_rate": 0.0014999485253289388, + "loss": 3.4074, + "step": 130 + }, + { + "epoch": 0.013818565400843882, + "grad_norm": 1.1965516805648804, + "learning_rate": 0.0014999455419351297, + "loss": 3.3568, + "step": 131 + }, + { + "epoch": 0.013924050632911392, + "grad_norm": 1.2806912660598755, + "learning_rate": 0.001499942474506183, + "loss": 3.418, + "step": 132 + }, + { + "epoch": 0.014029535864978904, + "grad_norm": 0.7652689814567566, + "learning_rate": 0.0014999393230424422, + "loss": 3.386, + "step": 133 + }, + { + "epoch": 0.014135021097046414, + "grad_norm": 0.9126921892166138, + "learning_rate": 0.001499936087544261, + "loss": 3.3331, + "step": 134 + }, + { + "epoch": 0.014240506329113924, + "grad_norm": 0.8921104669570923, + "learning_rate": 0.001499932768012002, + "loss": 3.3364, + "step": 135 + }, + { + "epoch": 0.014345991561181435, + "grad_norm": 0.7362779378890991, + "learning_rate": 0.0014999293644460362, + "loss": 3.3279, + "step": 136 + }, + { + "epoch": 0.014451476793248945, + "grad_norm": 1.015025019645691, + "learning_rate": 0.0014999258768467459, + "loss": 3.3375, + "step": 137 + }, + { + "epoch": 0.014556962025316455, + "grad_norm": 1.7015050649642944, + "learning_rate": 0.0014999223052145215, + "loss": 3.3165, + "step": 138 + }, + { + "epoch": 0.014662447257383967, + "grad_norm": 0.8104221224784851, + "learning_rate": 0.0014999186495497636, + "loss": 3.2945, + "step": 139 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.166978120803833, + "learning_rate": 0.0014999149098528814, + "loss": 3.2988, + "step": 140 + }, + { + "epoch": 0.014873417721518987, + "grad_norm": 1.274994134902954, + "learning_rate": 0.0014999110861242944, + "loss": 3.3145, + "step": 141 + }, + { + "epoch": 0.014978902953586498, + "grad_norm": 0.8267152309417725, + "learning_rate": 0.0014999071783644306, + "loss": 3.2831, + "step": 142 + }, + { + "epoch": 0.015084388185654008, + "grad_norm": 1.1326689720153809, + "learning_rate": 0.001499903186573728, + "loss": 3.2893, + "step": 143 + }, + { + "epoch": 0.015189873417721518, + "grad_norm": 0.8955747485160828, + "learning_rate": 0.001499899110752634, + "loss": 3.2786, + "step": 144 + }, + { + "epoch": 0.01529535864978903, + "grad_norm": 1.1378633975982666, + "learning_rate": 0.0014998949509016054, + "loss": 3.2932, + "step": 145 + }, + { + "epoch": 0.01540084388185654, + "grad_norm": 0.7744877338409424, + "learning_rate": 0.0014998907070211084, + "loss": 3.2449, + "step": 146 + }, + { + "epoch": 0.01550632911392405, + "grad_norm": 0.695989727973938, + "learning_rate": 0.0014998863791116182, + "loss": 3.2721, + "step": 147 + }, + { + "epoch": 0.015611814345991562, + "grad_norm": 0.7581738829612732, + "learning_rate": 0.0014998819671736198, + "loss": 3.2118, + "step": 148 + }, + { + "epoch": 0.015717299578059073, + "grad_norm": 0.7258220911026001, + "learning_rate": 0.001499877471207608, + "loss": 3.2321, + "step": 149 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.6659122705459595, + "learning_rate": 0.0014998728912140862, + "loss": 3.2321, + "step": 150 + }, + { + "epoch": 0.015928270042194093, + "grad_norm": 0.7179707288742065, + "learning_rate": 0.0014998682271935677, + "loss": 3.2592, + "step": 151 + }, + { + "epoch": 0.016033755274261603, + "grad_norm": 0.562185525894165, + "learning_rate": 0.0014998634791465752, + "loss": 3.1797, + "step": 152 + }, + { + "epoch": 0.016139240506329113, + "grad_norm": 0.5972047448158264, + "learning_rate": 0.001499858647073641, + "loss": 3.2391, + "step": 153 + }, + { + "epoch": 0.016244725738396623, + "grad_norm": 0.5690639615058899, + "learning_rate": 0.0014998537309753057, + "loss": 3.1775, + "step": 154 + }, + { + "epoch": 0.016350210970464137, + "grad_norm": 0.69142746925354, + "learning_rate": 0.001499848730852121, + "loss": 3.1939, + "step": 155 + }, + { + "epoch": 0.016455696202531647, + "grad_norm": 0.8223106265068054, + "learning_rate": 0.001499843646704647, + "loss": 3.1754, + "step": 156 + }, + { + "epoch": 0.016561181434599156, + "grad_norm": 0.7990493178367615, + "learning_rate": 0.0014998384785334532, + "loss": 3.1959, + "step": 157 + }, + { + "epoch": 0.016666666666666666, + "grad_norm": 0.7512163519859314, + "learning_rate": 0.0014998332263391192, + "loss": 3.1608, + "step": 158 + }, + { + "epoch": 0.016772151898734176, + "grad_norm": 0.7335628867149353, + "learning_rate": 0.0014998278901222327, + "loss": 3.1729, + "step": 159 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.9442381262779236, + "learning_rate": 0.0014998224698833922, + "loss": 3.1721, + "step": 160 + }, + { + "epoch": 0.0169831223628692, + "grad_norm": 1.3774473667144775, + "learning_rate": 0.0014998169656232053, + "loss": 3.1406, + "step": 161 + }, + { + "epoch": 0.01708860759493671, + "grad_norm": 1.1984927654266357, + "learning_rate": 0.0014998113773422883, + "loss": 3.1952, + "step": 162 + }, + { + "epoch": 0.01719409282700422, + "grad_norm": 1.0597014427185059, + "learning_rate": 0.0014998057050412674, + "loss": 3.1641, + "step": 163 + }, + { + "epoch": 0.01729957805907173, + "grad_norm": 1.1084028482437134, + "learning_rate": 0.0014997999487207786, + "loss": 3.1454, + "step": 164 + }, + { + "epoch": 0.01740506329113924, + "grad_norm": 0.9740651845932007, + "learning_rate": 0.0014997941083814666, + "loss": 3.1609, + "step": 165 + }, + { + "epoch": 0.01751054852320675, + "grad_norm": 0.9938405156135559, + "learning_rate": 0.001499788184023986, + "loss": 3.1434, + "step": 166 + }, + { + "epoch": 0.017616033755274263, + "grad_norm": 0.6897172927856445, + "learning_rate": 0.0014997821756490008, + "loss": 3.0974, + "step": 167 + }, + { + "epoch": 0.017721518987341773, + "grad_norm": 0.9283431768417358, + "learning_rate": 0.0014997760832571839, + "loss": 3.1064, + "step": 168 + }, + { + "epoch": 0.017827004219409283, + "grad_norm": 0.9346088767051697, + "learning_rate": 0.001499769906849218, + "loss": 3.0897, + "step": 169 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.9544816017150879, + "learning_rate": 0.0014997636464257956, + "loss": 3.0973, + "step": 170 + }, + { + "epoch": 0.018037974683544303, + "grad_norm": 1.1229948997497559, + "learning_rate": 0.0014997573019876179, + "loss": 3.0794, + "step": 171 + }, + { + "epoch": 0.018143459915611813, + "grad_norm": 0.7029290795326233, + "learning_rate": 0.0014997508735353957, + "loss": 3.1224, + "step": 172 + }, + { + "epoch": 0.018248945147679326, + "grad_norm": 0.8906141519546509, + "learning_rate": 0.0014997443610698497, + "loss": 3.1042, + "step": 173 + }, + { + "epoch": 0.018354430379746836, + "grad_norm": 0.751708447933197, + "learning_rate": 0.0014997377645917095, + "loss": 3.0597, + "step": 174 + }, + { + "epoch": 0.018459915611814346, + "grad_norm": 0.731255829334259, + "learning_rate": 0.001499731084101714, + "loss": 3.1051, + "step": 175 + }, + { + "epoch": 0.018565400843881856, + "grad_norm": 0.9861486554145813, + "learning_rate": 0.0014997243196006125, + "loss": 3.09, + "step": 176 + }, + { + "epoch": 0.018670886075949366, + "grad_norm": 0.9757897257804871, + "learning_rate": 0.001499717471089162, + "loss": 3.0979, + "step": 177 + }, + { + "epoch": 0.018776371308016876, + "grad_norm": 1.5157796144485474, + "learning_rate": 0.0014997105385681306, + "loss": 3.1033, + "step": 178 + }, + { + "epoch": 0.01888185654008439, + "grad_norm": 0.7201052904129028, + "learning_rate": 0.001499703522038295, + "loss": 3.1003, + "step": 179 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 1.1709928512573242, + "learning_rate": 0.0014996964215004416, + "loss": 3.0757, + "step": 180 + }, + { + "epoch": 0.01909282700421941, + "grad_norm": 0.8737855553627014, + "learning_rate": 0.0014996892369553655, + "loss": 3.0828, + "step": 181 + }, + { + "epoch": 0.01919831223628692, + "grad_norm": 0.9526352286338806, + "learning_rate": 0.0014996819684038726, + "loss": 3.067, + "step": 182 + }, + { + "epoch": 0.01930379746835443, + "grad_norm": 1.029895544052124, + "learning_rate": 0.0014996746158467762, + "loss": 3.0436, + "step": 183 + }, + { + "epoch": 0.019409282700421943, + "grad_norm": 0.9069969058036804, + "learning_rate": 0.0014996671792849015, + "loss": 3.067, + "step": 184 + }, + { + "epoch": 0.019514767932489453, + "grad_norm": 0.8476433753967285, + "learning_rate": 0.001499659658719081, + "loss": 3.0194, + "step": 185 + }, + { + "epoch": 0.019620253164556962, + "grad_norm": 0.6034741997718811, + "learning_rate": 0.0014996520541501574, + "loss": 3.0067, + "step": 186 + }, + { + "epoch": 0.019725738396624472, + "grad_norm": 0.8293297290802002, + "learning_rate": 0.0014996443655789832, + "loss": 2.9997, + "step": 187 + }, + { + "epoch": 0.019831223628691982, + "grad_norm": 0.6320614814758301, + "learning_rate": 0.0014996365930064197, + "loss": 2.993, + "step": 188 + }, + { + "epoch": 0.019936708860759492, + "grad_norm": 0.6564265489578247, + "learning_rate": 0.001499628736433338, + "loss": 2.9998, + "step": 189 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.6689332127571106, + "learning_rate": 0.0014996207958606182, + "loss": 3.0289, + "step": 190 + }, + { + "epoch": 0.020147679324894516, + "grad_norm": 0.7024092078208923, + "learning_rate": 0.0014996127712891504, + "loss": 2.9838, + "step": 191 + }, + { + "epoch": 0.020253164556962026, + "grad_norm": 0.6698716878890991, + "learning_rate": 0.0014996046627198337, + "loss": 3.033, + "step": 192 + }, + { + "epoch": 0.020358649789029536, + "grad_norm": 0.6361587643623352, + "learning_rate": 0.0014995964701535768, + "loss": 2.9615, + "step": 193 + }, + { + "epoch": 0.020464135021097046, + "grad_norm": 0.8629898428916931, + "learning_rate": 0.0014995881935912973, + "loss": 3.0059, + "step": 194 + }, + { + "epoch": 0.020569620253164556, + "grad_norm": 0.8762413263320923, + "learning_rate": 0.0014995798330339233, + "loss": 2.9874, + "step": 195 + }, + { + "epoch": 0.02067510548523207, + "grad_norm": 0.9430570006370544, + "learning_rate": 0.001499571388482391, + "loss": 2.9798, + "step": 196 + }, + { + "epoch": 0.02078059071729958, + "grad_norm": 0.8733846545219421, + "learning_rate": 0.001499562859937647, + "loss": 2.9768, + "step": 197 + }, + { + "epoch": 0.02088607594936709, + "grad_norm": 0.740599513053894, + "learning_rate": 0.001499554247400647, + "loss": 2.9978, + "step": 198 + }, + { + "epoch": 0.0209915611814346, + "grad_norm": 0.992948591709137, + "learning_rate": 0.0014995455508723557, + "loss": 2.9991, + "step": 199 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 1.0023967027664185, + "learning_rate": 0.001499536770353748, + "loss": 2.9399, + "step": 200 + }, + { + "epoch": 0.02120253164556962, + "grad_norm": 0.5940843820571899, + "learning_rate": 0.0014995279058458075, + "loss": 2.9838, + "step": 201 + }, + { + "epoch": 0.021308016877637132, + "grad_norm": 0.7505399584770203, + "learning_rate": 0.001499518957349528, + "loss": 2.9665, + "step": 202 + }, + { + "epoch": 0.021413502109704642, + "grad_norm": 0.7123975157737732, + "learning_rate": 0.0014995099248659115, + "loss": 2.9664, + "step": 203 + }, + { + "epoch": 0.021518987341772152, + "grad_norm": 0.6684901714324951, + "learning_rate": 0.001499500808395971, + "loss": 2.9441, + "step": 204 + }, + { + "epoch": 0.021624472573839662, + "grad_norm": 0.8758014440536499, + "learning_rate": 0.0014994916079407272, + "loss": 2.9289, + "step": 205 + }, + { + "epoch": 0.021729957805907172, + "grad_norm": 0.9805823564529419, + "learning_rate": 0.0014994823235012114, + "loss": 2.9218, + "step": 206 + }, + { + "epoch": 0.021835443037974682, + "grad_norm": 1.2285289764404297, + "learning_rate": 0.0014994729550784642, + "loss": 2.9553, + "step": 207 + }, + { + "epoch": 0.021940928270042195, + "grad_norm": 1.0861914157867432, + "learning_rate": 0.001499463502673535, + "loss": 2.9321, + "step": 208 + }, + { + "epoch": 0.022046413502109705, + "grad_norm": 0.977634608745575, + "learning_rate": 0.0014994539662874832, + "loss": 2.9325, + "step": 209 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.9268695116043091, + "learning_rate": 0.0014994443459213774, + "loss": 2.9749, + "step": 210 + }, + { + "epoch": 0.022257383966244725, + "grad_norm": 0.8971960544586182, + "learning_rate": 0.0014994346415762956, + "loss": 2.9361, + "step": 211 + }, + { + "epoch": 0.022362869198312235, + "grad_norm": 0.7222105264663696, + "learning_rate": 0.0014994248532533253, + "loss": 2.9208, + "step": 212 + }, + { + "epoch": 0.022468354430379745, + "grad_norm": 0.9264825582504272, + "learning_rate": 0.001499414980953563, + "loss": 2.9499, + "step": 213 + }, + { + "epoch": 0.02257383966244726, + "grad_norm": 0.9348888397216797, + "learning_rate": 0.0014994050246781153, + "loss": 2.9298, + "step": 214 + }, + { + "epoch": 0.02267932489451477, + "grad_norm": 1.0583429336547852, + "learning_rate": 0.0014993949844280977, + "loss": 2.8933, + "step": 215 + }, + { + "epoch": 0.02278481012658228, + "grad_norm": 1.1376277208328247, + "learning_rate": 0.0014993848602046355, + "loss": 2.9295, + "step": 216 + }, + { + "epoch": 0.02289029535864979, + "grad_norm": 0.8223038911819458, + "learning_rate": 0.0014993746520088626, + "loss": 2.8993, + "step": 217 + }, + { + "epoch": 0.0229957805907173, + "grad_norm": 0.9218916296958923, + "learning_rate": 0.0014993643598419234, + "loss": 2.928, + "step": 218 + }, + { + "epoch": 0.023101265822784812, + "grad_norm": 0.8760400414466858, + "learning_rate": 0.0014993539837049707, + "loss": 2.9283, + "step": 219 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.7574028968811035, + "learning_rate": 0.001499343523599168, + "loss": 2.8847, + "step": 220 + }, + { + "epoch": 0.02331223628691983, + "grad_norm": 0.7475148439407349, + "learning_rate": 0.0014993329795256864, + "loss": 2.885, + "step": 221 + }, + { + "epoch": 0.02341772151898734, + "grad_norm": 0.8537375926971436, + "learning_rate": 0.0014993223514857081, + "loss": 2.8637, + "step": 222 + }, + { + "epoch": 0.02352320675105485, + "grad_norm": 0.6777610182762146, + "learning_rate": 0.001499311639480424, + "loss": 2.8692, + "step": 223 + }, + { + "epoch": 0.02362869198312236, + "grad_norm": 0.9427462220191956, + "learning_rate": 0.0014993008435110345, + "loss": 2.8689, + "step": 224 + }, + { + "epoch": 0.023734177215189875, + "grad_norm": 0.8493639826774597, + "learning_rate": 0.0014992899635787487, + "loss": 2.856, + "step": 225 + }, + { + "epoch": 0.023839662447257385, + "grad_norm": 0.787822425365448, + "learning_rate": 0.0014992789996847863, + "loss": 2.9037, + "step": 226 + }, + { + "epoch": 0.023945147679324895, + "grad_norm": 1.0846736431121826, + "learning_rate": 0.0014992679518303761, + "loss": 2.8617, + "step": 227 + }, + { + "epoch": 0.024050632911392405, + "grad_norm": 1.1679960489273071, + "learning_rate": 0.001499256820016755, + "loss": 2.8629, + "step": 228 + }, + { + "epoch": 0.024156118143459915, + "grad_norm": 0.9752690196037292, + "learning_rate": 0.0014992456042451717, + "loss": 2.8414, + "step": 229 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 1.0203039646148682, + "learning_rate": 0.0014992343045168823, + "loss": 2.8711, + "step": 230 + }, + { + "epoch": 0.024367088607594938, + "grad_norm": 0.8119566440582275, + "learning_rate": 0.0014992229208331527, + "loss": 2.8508, + "step": 231 + }, + { + "epoch": 0.024472573839662448, + "grad_norm": 0.8373361229896545, + "learning_rate": 0.0014992114531952592, + "loss": 2.8681, + "step": 232 + }, + { + "epoch": 0.024578059071729958, + "grad_norm": 0.8981396555900574, + "learning_rate": 0.0014991999016044865, + "loss": 2.8136, + "step": 233 + }, + { + "epoch": 0.024683544303797468, + "grad_norm": 0.6686893701553345, + "learning_rate": 0.0014991882660621285, + "loss": 2.8611, + "step": 234 + }, + { + "epoch": 0.024789029535864978, + "grad_norm": 0.7313281893730164, + "learning_rate": 0.0014991765465694898, + "loss": 2.7982, + "step": 235 + }, + { + "epoch": 0.024894514767932488, + "grad_norm": 0.7293067574501038, + "learning_rate": 0.0014991647431278835, + "loss": 2.8257, + "step": 236 + }, + { + "epoch": 0.025, + "grad_norm": 0.6298500895500183, + "learning_rate": 0.001499152855738632, + "loss": 2.8041, + "step": 237 + }, + { + "epoch": 0.02510548523206751, + "grad_norm": 0.6921938061714172, + "learning_rate": 0.0014991408844030672, + "loss": 2.8072, + "step": 238 + }, + { + "epoch": 0.02521097046413502, + "grad_norm": 0.7551835179328918, + "learning_rate": 0.0014991288291225308, + "loss": 2.8239, + "step": 239 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.7630535364151001, + "learning_rate": 0.0014991166898983739, + "loss": 2.838, + "step": 240 + }, + { + "epoch": 0.02542194092827004, + "grad_norm": 0.6612765789031982, + "learning_rate": 0.001499104466731956, + "loss": 2.7747, + "step": 241 + }, + { + "epoch": 0.02552742616033755, + "grad_norm": 0.8147323727607727, + "learning_rate": 0.0014990921596246475, + "loss": 2.8173, + "step": 242 + }, + { + "epoch": 0.025632911392405065, + "grad_norm": 0.8418398499488831, + "learning_rate": 0.0014990797685778272, + "loss": 2.8282, + "step": 243 + }, + { + "epoch": 0.025738396624472575, + "grad_norm": 0.9046397805213928, + "learning_rate": 0.0014990672935928835, + "loss": 2.8264, + "step": 244 + }, + { + "epoch": 0.025843881856540084, + "grad_norm": 0.9595801830291748, + "learning_rate": 0.0014990547346712144, + "loss": 2.803, + "step": 245 + }, + { + "epoch": 0.025949367088607594, + "grad_norm": 0.9767016172409058, + "learning_rate": 0.0014990420918142271, + "loss": 2.809, + "step": 246 + }, + { + "epoch": 0.026054852320675104, + "grad_norm": 1.0082334280014038, + "learning_rate": 0.0014990293650233384, + "loss": 2.7734, + "step": 247 + }, + { + "epoch": 0.026160337552742614, + "grad_norm": 1.387833595275879, + "learning_rate": 0.0014990165542999746, + "loss": 2.8309, + "step": 248 + }, + { + "epoch": 0.026265822784810128, + "grad_norm": 1.0656508207321167, + "learning_rate": 0.0014990036596455706, + "loss": 2.787, + "step": 249 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 1.0711945295333862, + "learning_rate": 0.001498990681061572, + "loss": 2.7685, + "step": 250 + }, + { + "epoch": 0.026476793248945148, + "grad_norm": 0.8909558057785034, + "learning_rate": 0.0014989776185494322, + "loss": 2.8124, + "step": 251 + }, + { + "epoch": 0.026582278481012658, + "grad_norm": 1.0691665410995483, + "learning_rate": 0.001498964472110616, + "loss": 2.7846, + "step": 252 + }, + { + "epoch": 0.026687763713080168, + "grad_norm": 0.8788166642189026, + "learning_rate": 0.001498951241746596, + "loss": 2.7822, + "step": 253 + }, + { + "epoch": 0.02679324894514768, + "grad_norm": 0.8955141305923462, + "learning_rate": 0.0014989379274588546, + "loss": 2.7607, + "step": 254 + }, + { + "epoch": 0.02689873417721519, + "grad_norm": 0.865074634552002, + "learning_rate": 0.0014989245292488839, + "loss": 2.7647, + "step": 255 + }, + { + "epoch": 0.0270042194092827, + "grad_norm": 1.0438852310180664, + "learning_rate": 0.0014989110471181853, + "loss": 2.7357, + "step": 256 + }, + { + "epoch": 0.02710970464135021, + "grad_norm": 0.894645094871521, + "learning_rate": 0.0014988974810682695, + "loss": 2.7725, + "step": 257 + }, + { + "epoch": 0.02721518987341772, + "grad_norm": 0.9917587637901306, + "learning_rate": 0.0014988838311006565, + "loss": 2.7877, + "step": 258 + }, + { + "epoch": 0.02732067510548523, + "grad_norm": 0.9313732981681824, + "learning_rate": 0.0014988700972168758, + "loss": 2.7893, + "step": 259 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.723261833190918, + "learning_rate": 0.001498856279418467, + "loss": 2.7226, + "step": 260 + }, + { + "epoch": 0.027531645569620254, + "grad_norm": 0.7557529807090759, + "learning_rate": 0.0014988423777069775, + "loss": 2.7643, + "step": 261 + }, + { + "epoch": 0.027637130801687764, + "grad_norm": 0.797459602355957, + "learning_rate": 0.0014988283920839658, + "loss": 2.7456, + "step": 262 + }, + { + "epoch": 0.027742616033755274, + "grad_norm": 0.9148644208908081, + "learning_rate": 0.0014988143225509983, + "loss": 2.7438, + "step": 263 + }, + { + "epoch": 0.027848101265822784, + "grad_norm": 0.9071038961410522, + "learning_rate": 0.0014988001691096525, + "loss": 2.7499, + "step": 264 + }, + { + "epoch": 0.027953586497890294, + "grad_norm": 0.7967275381088257, + "learning_rate": 0.0014987859317615137, + "loss": 2.7428, + "step": 265 + }, + { + "epoch": 0.028059071729957807, + "grad_norm": 0.7570477724075317, + "learning_rate": 0.0014987716105081775, + "loss": 2.7378, + "step": 266 + }, + { + "epoch": 0.028164556962025317, + "grad_norm": 0.8421326875686646, + "learning_rate": 0.001498757205351249, + "loss": 2.7276, + "step": 267 + }, + { + "epoch": 0.028270042194092827, + "grad_norm": 0.7753202319145203, + "learning_rate": 0.0014987427162923416, + "loss": 2.7047, + "step": 268 + }, + { + "epoch": 0.028375527426160337, + "grad_norm": 0.9220251441001892, + "learning_rate": 0.001498728143333079, + "loss": 2.7372, + "step": 269 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 1.0194884538650513, + "learning_rate": 0.0014987134864750948, + "loss": 2.7428, + "step": 270 + }, + { + "epoch": 0.028586497890295357, + "grad_norm": 0.9373172521591187, + "learning_rate": 0.0014986987457200312, + "loss": 2.7349, + "step": 271 + }, + { + "epoch": 0.02869198312236287, + "grad_norm": 0.739021360874176, + "learning_rate": 0.0014986839210695394, + "loss": 2.7158, + "step": 272 + }, + { + "epoch": 0.02879746835443038, + "grad_norm": 0.6601149439811707, + "learning_rate": 0.0014986690125252814, + "loss": 2.6933, + "step": 273 + }, + { + "epoch": 0.02890295358649789, + "grad_norm": 0.793128490447998, + "learning_rate": 0.001498654020088927, + "loss": 2.7138, + "step": 274 + }, + { + "epoch": 0.0290084388185654, + "grad_norm": 0.8573440909385681, + "learning_rate": 0.0014986389437621566, + "loss": 2.7324, + "step": 275 + }, + { + "epoch": 0.02911392405063291, + "grad_norm": 0.9217886328697205, + "learning_rate": 0.0014986237835466596, + "loss": 2.6928, + "step": 276 + }, + { + "epoch": 0.02921940928270042, + "grad_norm": 0.8632397055625916, + "learning_rate": 0.0014986085394441343, + "loss": 2.7209, + "step": 277 + }, + { + "epoch": 0.029324894514767934, + "grad_norm": 0.6197367906570435, + "learning_rate": 0.0014985932114562896, + "loss": 2.6373, + "step": 278 + }, + { + "epoch": 0.029430379746835444, + "grad_norm": 0.8438882231712341, + "learning_rate": 0.0014985777995848428, + "loss": 2.7058, + "step": 279 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.8704207539558411, + "learning_rate": 0.0014985623038315206, + "loss": 2.6826, + "step": 280 + }, + { + "epoch": 0.029641350210970464, + "grad_norm": 0.9130333662033081, + "learning_rate": 0.0014985467241980597, + "loss": 2.6717, + "step": 281 + }, + { + "epoch": 0.029746835443037974, + "grad_norm": 1.0030908584594727, + "learning_rate": 0.0014985310606862058, + "loss": 2.7337, + "step": 282 + }, + { + "epoch": 0.029852320675105484, + "grad_norm": 0.9902961850166321, + "learning_rate": 0.0014985153132977141, + "loss": 2.658, + "step": 283 + }, + { + "epoch": 0.029957805907172997, + "grad_norm": 0.9128199815750122, + "learning_rate": 0.0014984994820343488, + "loss": 2.666, + "step": 284 + }, + { + "epoch": 0.030063291139240507, + "grad_norm": 0.7756869792938232, + "learning_rate": 0.0014984835668978844, + "loss": 2.713, + "step": 285 + }, + { + "epoch": 0.030168776371308017, + "grad_norm": 0.7424759864807129, + "learning_rate": 0.0014984675678901042, + "loss": 2.6898, + "step": 286 + }, + { + "epoch": 0.030274261603375527, + "grad_norm": 0.8693467378616333, + "learning_rate": 0.0014984514850128006, + "loss": 2.6919, + "step": 287 + }, + { + "epoch": 0.030379746835443037, + "grad_norm": 0.8453320860862732, + "learning_rate": 0.0014984353182677759, + "loss": 2.6727, + "step": 288 + }, + { + "epoch": 0.03048523206751055, + "grad_norm": 0.7967785000801086, + "learning_rate": 0.001498419067656842, + "loss": 2.6719, + "step": 289 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.7858949303627014, + "learning_rate": 0.0014984027331818193, + "loss": 2.6628, + "step": 290 + }, + { + "epoch": 0.03069620253164557, + "grad_norm": 0.8938252925872803, + "learning_rate": 0.0014983863148445389, + "loss": 2.671, + "step": 291 + }, + { + "epoch": 0.03080168776371308, + "grad_norm": 0.9217085242271423, + "learning_rate": 0.0014983698126468398, + "loss": 2.6957, + "step": 292 + }, + { + "epoch": 0.03090717299578059, + "grad_norm": 1.0677217245101929, + "learning_rate": 0.0014983532265905716, + "loss": 2.6694, + "step": 293 + }, + { + "epoch": 0.0310126582278481, + "grad_norm": 0.9188022613525391, + "learning_rate": 0.0014983365566775928, + "loss": 2.6296, + "step": 294 + }, + { + "epoch": 0.031118143459915613, + "grad_norm": 1.015804409980774, + "learning_rate": 0.0014983198029097711, + "loss": 2.6456, + "step": 295 + }, + { + "epoch": 0.031223628691983123, + "grad_norm": 1.1120597124099731, + "learning_rate": 0.0014983029652889843, + "loss": 2.683, + "step": 296 + }, + { + "epoch": 0.03132911392405063, + "grad_norm": 0.7365554571151733, + "learning_rate": 0.0014982860438171187, + "loss": 2.6989, + "step": 297 + }, + { + "epoch": 0.03143459915611815, + "grad_norm": 0.8252580165863037, + "learning_rate": 0.0014982690384960705, + "loss": 2.6587, + "step": 298 + }, + { + "epoch": 0.03154008438818565, + "grad_norm": 0.799670934677124, + "learning_rate": 0.0014982519493277455, + "loss": 2.6254, + "step": 299 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.7530995011329651, + "learning_rate": 0.0014982347763140584, + "loss": 2.6433, + "step": 300 + }, + { + "epoch": 0.03175105485232067, + "grad_norm": 0.7862807512283325, + "learning_rate": 0.0014982175194569337, + "loss": 2.6254, + "step": 301 + }, + { + "epoch": 0.03185654008438819, + "grad_norm": 0.9244537949562073, + "learning_rate": 0.0014982001787583047, + "loss": 2.6273, + "step": 302 + }, + { + "epoch": 0.03196202531645569, + "grad_norm": 0.8339300751686096, + "learning_rate": 0.001498182754220115, + "loss": 2.632, + "step": 303 + }, + { + "epoch": 0.032067510548523206, + "grad_norm": 0.7597547173500061, + "learning_rate": 0.001498165245844317, + "loss": 2.6359, + "step": 304 + }, + { + "epoch": 0.03217299578059072, + "grad_norm": 1.030236005783081, + "learning_rate": 0.0014981476536328722, + "loss": 2.6559, + "step": 305 + }, + { + "epoch": 0.032278481012658226, + "grad_norm": 1.0363157987594604, + "learning_rate": 0.0014981299775877525, + "loss": 2.632, + "step": 306 + }, + { + "epoch": 0.03238396624472574, + "grad_norm": 0.8513719439506531, + "learning_rate": 0.0014981122177109383, + "loss": 2.6394, + "step": 307 + }, + { + "epoch": 0.032489451476793246, + "grad_norm": 0.8769726157188416, + "learning_rate": 0.0014980943740044196, + "loss": 2.6159, + "step": 308 + }, + { + "epoch": 0.03259493670886076, + "grad_norm": 0.9229185581207275, + "learning_rate": 0.0014980764464701958, + "loss": 2.6458, + "step": 309 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.6037107706069946, + "learning_rate": 0.0014980584351102762, + "loss": 2.6298, + "step": 310 + }, + { + "epoch": 0.03280590717299578, + "grad_norm": 0.8517947793006897, + "learning_rate": 0.0014980403399266786, + "loss": 2.6055, + "step": 311 + }, + { + "epoch": 0.03291139240506329, + "grad_norm": 0.7070207595825195, + "learning_rate": 0.0014980221609214308, + "loss": 2.6218, + "step": 312 + }, + { + "epoch": 0.0330168776371308, + "grad_norm": 0.794857382774353, + "learning_rate": 0.0014980038980965701, + "loss": 2.5953, + "step": 313 + }, + { + "epoch": 0.03312236286919831, + "grad_norm": 0.8935182094573975, + "learning_rate": 0.0014979855514541424, + "loss": 2.5997, + "step": 314 + }, + { + "epoch": 0.03322784810126582, + "grad_norm": 1.0899525880813599, + "learning_rate": 0.0014979671209962044, + "loss": 2.6223, + "step": 315 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 1.0075792074203491, + "learning_rate": 0.0014979486067248204, + "loss": 2.5894, + "step": 316 + }, + { + "epoch": 0.033438818565400846, + "grad_norm": 0.8398371934890747, + "learning_rate": 0.0014979300086420655, + "loss": 2.6188, + "step": 317 + }, + { + "epoch": 0.03354430379746835, + "grad_norm": 0.7860711216926575, + "learning_rate": 0.0014979113267500235, + "loss": 2.6027, + "step": 318 + }, + { + "epoch": 0.033649789029535866, + "grad_norm": 0.726205587387085, + "learning_rate": 0.0014978925610507879, + "loss": 2.5875, + "step": 319 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.9346764087677002, + "learning_rate": 0.001497873711546462, + "loss": 2.62, + "step": 320 + }, + { + "epoch": 0.033860759493670886, + "grad_norm": 0.9493430256843567, + "learning_rate": 0.001497854778239157, + "loss": 2.6092, + "step": 321 + }, + { + "epoch": 0.0339662447257384, + "grad_norm": 0.8064025044441223, + "learning_rate": 0.0014978357611309951, + "loss": 2.6063, + "step": 322 + }, + { + "epoch": 0.034071729957805906, + "grad_norm": 0.8978192806243896, + "learning_rate": 0.0014978166602241068, + "loss": 2.6229, + "step": 323 + }, + { + "epoch": 0.03417721518987342, + "grad_norm": 0.7818055152893066, + "learning_rate": 0.0014977974755206334, + "loss": 2.6416, + "step": 324 + }, + { + "epoch": 0.034282700421940926, + "grad_norm": 0.9453415274620056, + "learning_rate": 0.0014977782070227236, + "loss": 2.579, + "step": 325 + }, + { + "epoch": 0.03438818565400844, + "grad_norm": 0.9227696061134338, + "learning_rate": 0.001497758854732537, + "loss": 2.5993, + "step": 326 + }, + { + "epoch": 0.03449367088607595, + "grad_norm": 1.0595301389694214, + "learning_rate": 0.001497739418652242, + "loss": 2.6211, + "step": 327 + }, + { + "epoch": 0.03459915611814346, + "grad_norm": 0.9919867515563965, + "learning_rate": 0.0014977198987840168, + "loss": 2.5706, + "step": 328 + }, + { + "epoch": 0.03470464135021097, + "grad_norm": 1.0165826082229614, + "learning_rate": 0.0014977002951300483, + "loss": 2.5615, + "step": 329 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.9390838146209717, + "learning_rate": 0.0014976806076925334, + "loss": 2.5513, + "step": 330 + }, + { + "epoch": 0.03491561181434599, + "grad_norm": 1.226837158203125, + "learning_rate": 0.0014976608364736781, + "loss": 2.5616, + "step": 331 + }, + { + "epoch": 0.0350210970464135, + "grad_norm": 0.9883494973182678, + "learning_rate": 0.001497640981475698, + "loss": 2.5722, + "step": 332 + }, + { + "epoch": 0.03512658227848101, + "grad_norm": 0.8422044515609741, + "learning_rate": 0.0014976210427008177, + "loss": 2.5997, + "step": 333 + }, + { + "epoch": 0.035232067510548526, + "grad_norm": 0.9751186966896057, + "learning_rate": 0.0014976010201512718, + "loss": 2.5717, + "step": 334 + }, + { + "epoch": 0.03533755274261603, + "grad_norm": 1.158748984336853, + "learning_rate": 0.0014975809138293036, + "loss": 2.5627, + "step": 335 + }, + { + "epoch": 0.035443037974683546, + "grad_norm": 0.7320473194122314, + "learning_rate": 0.0014975607237371663, + "loss": 2.5635, + "step": 336 + }, + { + "epoch": 0.03554852320675105, + "grad_norm": 1.004415512084961, + "learning_rate": 0.0014975404498771222, + "loss": 2.563, + "step": 337 + }, + { + "epoch": 0.035654008438818566, + "grad_norm": 1.237905740737915, + "learning_rate": 0.0014975200922514428, + "loss": 2.5957, + "step": 338 + }, + { + "epoch": 0.03575949367088608, + "grad_norm": 0.7636582255363464, + "learning_rate": 0.00149749965086241, + "loss": 2.5913, + "step": 339 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 1.1618270874023438, + "learning_rate": 0.0014974791257123137, + "loss": 2.5786, + "step": 340 + }, + { + "epoch": 0.0359704641350211, + "grad_norm": 0.9126408100128174, + "learning_rate": 0.0014974585168034543, + "loss": 2.5375, + "step": 341 + }, + { + "epoch": 0.036075949367088606, + "grad_norm": 0.9528697729110718, + "learning_rate": 0.0014974378241381409, + "loss": 2.5138, + "step": 342 + }, + { + "epoch": 0.03618143459915612, + "grad_norm": 0.8458671569824219, + "learning_rate": 0.001497417047718692, + "loss": 2.56, + "step": 343 + }, + { + "epoch": 0.036286919831223625, + "grad_norm": 0.8602905869483948, + "learning_rate": 0.0014973961875474364, + "loss": 2.5397, + "step": 344 + }, + { + "epoch": 0.03639240506329114, + "grad_norm": 0.9775346517562866, + "learning_rate": 0.0014973752436267106, + "loss": 2.5426, + "step": 345 + }, + { + "epoch": 0.03649789029535865, + "grad_norm": 0.9600573182106018, + "learning_rate": 0.0014973542159588623, + "loss": 2.5287, + "step": 346 + }, + { + "epoch": 0.03660337552742616, + "grad_norm": 0.8364992141723633, + "learning_rate": 0.0014973331045462475, + "loss": 2.5555, + "step": 347 + }, + { + "epoch": 0.03670886075949367, + "grad_norm": 0.8127474188804626, + "learning_rate": 0.0014973119093912317, + "loss": 2.5414, + "step": 348 + }, + { + "epoch": 0.03681434599156118, + "grad_norm": 0.8451001644134521, + "learning_rate": 0.00149729063049619, + "loss": 2.4996, + "step": 349 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.8449087738990784, + "learning_rate": 0.001497269267863507, + "loss": 2.5126, + "step": 350 + }, + { + "epoch": 0.037025316455696206, + "grad_norm": 0.9673872590065002, + "learning_rate": 0.0014972478214955762, + "loss": 2.5118, + "step": 351 + }, + { + "epoch": 0.03713080168776371, + "grad_norm": 0.7918200492858887, + "learning_rate": 0.0014972262913948008, + "loss": 2.5292, + "step": 352 + }, + { + "epoch": 0.037236286919831225, + "grad_norm": 0.7892960906028748, + "learning_rate": 0.0014972046775635934, + "loss": 2.5455, + "step": 353 + }, + { + "epoch": 0.03734177215189873, + "grad_norm": 0.7750479578971863, + "learning_rate": 0.0014971829800043762, + "loss": 2.4826, + "step": 354 + }, + { + "epoch": 0.037447257383966245, + "grad_norm": 0.7374945282936096, + "learning_rate": 0.0014971611987195802, + "loss": 2.5115, + "step": 355 + }, + { + "epoch": 0.03755274261603375, + "grad_norm": 0.7625460028648376, + "learning_rate": 0.0014971393337116462, + "loss": 2.5347, + "step": 356 + }, + { + "epoch": 0.037658227848101265, + "grad_norm": 0.7377792596817017, + "learning_rate": 0.0014971173849830243, + "loss": 2.4891, + "step": 357 + }, + { + "epoch": 0.03776371308016878, + "grad_norm": 0.7255069613456726, + "learning_rate": 0.0014970953525361738, + "loss": 2.5139, + "step": 358 + }, + { + "epoch": 0.037869198312236285, + "grad_norm": 0.7754729986190796, + "learning_rate": 0.001497073236373564, + "loss": 2.4916, + "step": 359 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.7840685248374939, + "learning_rate": 0.0014970510364976724, + "loss": 2.5344, + "step": 360 + }, + { + "epoch": 0.038080168776371305, + "grad_norm": 0.9067919850349426, + "learning_rate": 0.0014970287529109873, + "loss": 2.5039, + "step": 361 + }, + { + "epoch": 0.03818565400843882, + "grad_norm": 0.9635066986083984, + "learning_rate": 0.0014970063856160054, + "loss": 2.5403, + "step": 362 + }, + { + "epoch": 0.03829113924050633, + "grad_norm": 0.9669232368469238, + "learning_rate": 0.0014969839346152332, + "loss": 2.4882, + "step": 363 + }, + { + "epoch": 0.03839662447257384, + "grad_norm": 1.0239323377609253, + "learning_rate": 0.001496961399911186, + "loss": 2.494, + "step": 364 + }, + { + "epoch": 0.03850210970464135, + "grad_norm": 1.0260052680969238, + "learning_rate": 0.0014969387815063897, + "loss": 2.5289, + "step": 365 + }, + { + "epoch": 0.03860759493670886, + "grad_norm": 0.9636494517326355, + "learning_rate": 0.0014969160794033778, + "loss": 2.491, + "step": 366 + }, + { + "epoch": 0.03871308016877637, + "grad_norm": 0.7968422770500183, + "learning_rate": 0.0014968932936046953, + "loss": 2.4928, + "step": 367 + }, + { + "epoch": 0.038818565400843885, + "grad_norm": 0.8845379948616028, + "learning_rate": 0.0014968704241128947, + "loss": 2.5182, + "step": 368 + }, + { + "epoch": 0.03892405063291139, + "grad_norm": 0.8241498470306396, + "learning_rate": 0.0014968474709305384, + "loss": 2.5043, + "step": 369 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.7662643194198608, + "learning_rate": 0.0014968244340601996, + "loss": 2.4657, + "step": 370 + }, + { + "epoch": 0.03913502109704641, + "grad_norm": 0.8335525989532471, + "learning_rate": 0.0014968013135044586, + "loss": 2.4922, + "step": 371 + }, + { + "epoch": 0.039240506329113925, + "grad_norm": 0.8275237083435059, + "learning_rate": 0.0014967781092659065, + "loss": 2.482, + "step": 372 + }, + { + "epoch": 0.03934599156118143, + "grad_norm": 0.9658628702163696, + "learning_rate": 0.0014967548213471436, + "loss": 2.5144, + "step": 373 + }, + { + "epoch": 0.039451476793248945, + "grad_norm": 1.0264416933059692, + "learning_rate": 0.0014967314497507792, + "loss": 2.5314, + "step": 374 + }, + { + "epoch": 0.03955696202531646, + "grad_norm": 1.0277036428451538, + "learning_rate": 0.0014967079944794323, + "loss": 2.4803, + "step": 375 + }, + { + "epoch": 0.039662447257383965, + "grad_norm": 0.8801045417785645, + "learning_rate": 0.0014966844555357314, + "loss": 2.4951, + "step": 376 + }, + { + "epoch": 0.03976793248945148, + "grad_norm": 0.9375991225242615, + "learning_rate": 0.0014966608329223137, + "loss": 2.4992, + "step": 377 + }, + { + "epoch": 0.039873417721518985, + "grad_norm": 0.8814296126365662, + "learning_rate": 0.0014966371266418267, + "loss": 2.4754, + "step": 378 + }, + { + "epoch": 0.0399789029535865, + "grad_norm": 0.7958258986473083, + "learning_rate": 0.0014966133366969264, + "loss": 2.4936, + "step": 379 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.7726293802261353, + "learning_rate": 0.001496589463090279, + "loss": 2.479, + "step": 380 + }, + { + "epoch": 0.04018987341772152, + "grad_norm": 0.7703227996826172, + "learning_rate": 0.0014965655058245592, + "loss": 2.4841, + "step": 381 + }, + { + "epoch": 0.04029535864978903, + "grad_norm": 0.9839662313461304, + "learning_rate": 0.001496541464902452, + "loss": 2.4935, + "step": 382 + }, + { + "epoch": 0.04040084388185654, + "grad_norm": 1.0334705114364624, + "learning_rate": 0.001496517340326651, + "loss": 2.4816, + "step": 383 + }, + { + "epoch": 0.04050632911392405, + "grad_norm": 0.8349124789237976, + "learning_rate": 0.0014964931320998593, + "loss": 2.4703, + "step": 384 + }, + { + "epoch": 0.04061181434599156, + "grad_norm": 0.8747735619544983, + "learning_rate": 0.00149646884022479, + "loss": 2.4546, + "step": 385 + }, + { + "epoch": 0.04071729957805907, + "grad_norm": 0.9137724041938782, + "learning_rate": 0.0014964444647041647, + "loss": 2.4474, + "step": 386 + }, + { + "epoch": 0.040822784810126585, + "grad_norm": 1.0790902376174927, + "learning_rate": 0.0014964200055407153, + "loss": 2.4844, + "step": 387 + }, + { + "epoch": 0.04092827004219409, + "grad_norm": 0.8298695683479309, + "learning_rate": 0.0014963954627371823, + "loss": 2.4682, + "step": 388 + }, + { + "epoch": 0.041033755274261605, + "grad_norm": 0.7845171689987183, + "learning_rate": 0.0014963708362963157, + "loss": 2.4558, + "step": 389 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.7653631567955017, + "learning_rate": 0.001496346126220875, + "loss": 2.4559, + "step": 390 + }, + { + "epoch": 0.041244725738396625, + "grad_norm": 0.7387005090713501, + "learning_rate": 0.0014963213325136296, + "loss": 2.4504, + "step": 391 + }, + { + "epoch": 0.04135021097046414, + "grad_norm": 0.7735226154327393, + "learning_rate": 0.0014962964551773572, + "loss": 2.4446, + "step": 392 + }, + { + "epoch": 0.041455696202531644, + "grad_norm": 0.7246749401092529, + "learning_rate": 0.0014962714942148457, + "loss": 2.432, + "step": 393 + }, + { + "epoch": 0.04156118143459916, + "grad_norm": 1.0097548961639404, + "learning_rate": 0.001496246449628892, + "loss": 2.4344, + "step": 394 + }, + { + "epoch": 0.041666666666666664, + "grad_norm": 1.2118507623672485, + "learning_rate": 0.0014962213214223025, + "loss": 2.4357, + "step": 395 + }, + { + "epoch": 0.04177215189873418, + "grad_norm": 0.9682818651199341, + "learning_rate": 0.001496196109597893, + "loss": 2.4523, + "step": 396 + }, + { + "epoch": 0.04187763713080169, + "grad_norm": 0.9052855968475342, + "learning_rate": 0.0014961708141584885, + "loss": 2.427, + "step": 397 + }, + { + "epoch": 0.0419831223628692, + "grad_norm": 1.2072954177856445, + "learning_rate": 0.0014961454351069233, + "loss": 2.4445, + "step": 398 + }, + { + "epoch": 0.04208860759493671, + "grad_norm": 0.957667887210846, + "learning_rate": 0.0014961199724460418, + "loss": 2.4525, + "step": 399 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.9537596106529236, + "learning_rate": 0.0014960944261786966, + "loss": 2.4425, + "step": 400 + }, + { + "epoch": 0.04229957805907173, + "grad_norm": 0.8746856451034546, + "learning_rate": 0.001496068796307751, + "loss": 2.4338, + "step": 401 + }, + { + "epoch": 0.04240506329113924, + "grad_norm": 0.8735122084617615, + "learning_rate": 0.0014960430828360762, + "loss": 2.4054, + "step": 402 + }, + { + "epoch": 0.04251054852320675, + "grad_norm": 0.8234822154045105, + "learning_rate": 0.001496017285766554, + "loss": 2.4498, + "step": 403 + }, + { + "epoch": 0.042616033755274264, + "grad_norm": 0.8238188624382019, + "learning_rate": 0.0014959914051020748, + "loss": 2.4508, + "step": 404 + }, + { + "epoch": 0.04272151898734177, + "grad_norm": 0.9125956892967224, + "learning_rate": 0.001495965440845539, + "loss": 2.4032, + "step": 405 + }, + { + "epoch": 0.042827004219409284, + "grad_norm": 1.2202295064926147, + "learning_rate": 0.0014959393929998557, + "loss": 2.4569, + "step": 406 + }, + { + "epoch": 0.04293248945147679, + "grad_norm": 0.7834360599517822, + "learning_rate": 0.001495913261567944, + "loss": 2.4667, + "step": 407 + }, + { + "epoch": 0.043037974683544304, + "grad_norm": 0.9273266196250916, + "learning_rate": 0.0014958870465527317, + "loss": 2.4328, + "step": 408 + }, + { + "epoch": 0.04314345991561182, + "grad_norm": 1.0169610977172852, + "learning_rate": 0.0014958607479571564, + "loss": 2.4264, + "step": 409 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8333950638771057, + "learning_rate": 0.0014958343657841655, + "loss": 2.4128, + "step": 410 + }, + { + "epoch": 0.04335443037974684, + "grad_norm": 0.8079005479812622, + "learning_rate": 0.0014958079000367147, + "loss": 2.3982, + "step": 411 + }, + { + "epoch": 0.043459915611814344, + "grad_norm": 0.7574958801269531, + "learning_rate": 0.0014957813507177696, + "loss": 2.4213, + "step": 412 + }, + { + "epoch": 0.04356540084388186, + "grad_norm": 0.7480236291885376, + "learning_rate": 0.0014957547178303054, + "loss": 2.4141, + "step": 413 + }, + { + "epoch": 0.043670886075949364, + "grad_norm": 0.7692379355430603, + "learning_rate": 0.0014957280013773065, + "loss": 2.3942, + "step": 414 + }, + { + "epoch": 0.04377637130801688, + "grad_norm": 0.8151578903198242, + "learning_rate": 0.0014957012013617663, + "loss": 2.435, + "step": 415 + }, + { + "epoch": 0.04388185654008439, + "grad_norm": 0.9230300188064575, + "learning_rate": 0.0014956743177866882, + "loss": 2.4099, + "step": 416 + }, + { + "epoch": 0.0439873417721519, + "grad_norm": 0.8614334464073181, + "learning_rate": 0.0014956473506550845, + "loss": 2.4147, + "step": 417 + }, + { + "epoch": 0.04409282700421941, + "grad_norm": 0.7801306247711182, + "learning_rate": 0.0014956202999699773, + "loss": 2.427, + "step": 418 + }, + { + "epoch": 0.04419831223628692, + "grad_norm": 0.7336130738258362, + "learning_rate": 0.001495593165734397, + "loss": 2.4175, + "step": 419 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.8876241445541382, + "learning_rate": 0.001495565947951385, + "loss": 2.39, + "step": 420 + }, + { + "epoch": 0.044409282700421944, + "grad_norm": 0.8445358276367188, + "learning_rate": 0.0014955386466239907, + "loss": 2.4107, + "step": 421 + }, + { + "epoch": 0.04451476793248945, + "grad_norm": 0.8620857000350952, + "learning_rate": 0.0014955112617552734, + "loss": 2.3936, + "step": 422 + }, + { + "epoch": 0.044620253164556964, + "grad_norm": 0.9904260039329529, + "learning_rate": 0.001495483793348302, + "loss": 2.3954, + "step": 423 + }, + { + "epoch": 0.04472573839662447, + "grad_norm": 0.8937655091285706, + "learning_rate": 0.0014954562414061538, + "loss": 2.3922, + "step": 424 + }, + { + "epoch": 0.044831223628691984, + "grad_norm": 0.957983136177063, + "learning_rate": 0.0014954286059319167, + "loss": 2.3582, + "step": 425 + }, + { + "epoch": 0.04493670886075949, + "grad_norm": 1.2095526456832886, + "learning_rate": 0.0014954008869286876, + "loss": 2.4167, + "step": 426 + }, + { + "epoch": 0.045042194092827004, + "grad_norm": 0.8740714192390442, + "learning_rate": 0.001495373084399572, + "loss": 2.3948, + "step": 427 + }, + { + "epoch": 0.04514767932489452, + "grad_norm": 0.7053276300430298, + "learning_rate": 0.0014953451983476854, + "loss": 2.3864, + "step": 428 + }, + { + "epoch": 0.045253164556962024, + "grad_norm": 0.7990120649337769, + "learning_rate": 0.0014953172287761529, + "loss": 2.3705, + "step": 429 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.7471515536308289, + "learning_rate": 0.0014952891756881085, + "loss": 2.3521, + "step": 430 + }, + { + "epoch": 0.045464135021097044, + "grad_norm": 0.6981036067008972, + "learning_rate": 0.0014952610390866954, + "loss": 2.3362, + "step": 431 + }, + { + "epoch": 0.04556962025316456, + "grad_norm": 0.7554906010627747, + "learning_rate": 0.0014952328189750666, + "loss": 2.368, + "step": 432 + }, + { + "epoch": 0.04567510548523207, + "grad_norm": 0.8559644222259521, + "learning_rate": 0.0014952045153563845, + "loss": 2.3642, + "step": 433 + }, + { + "epoch": 0.04578059071729958, + "grad_norm": 0.9878112077713013, + "learning_rate": 0.0014951761282338205, + "loss": 2.3855, + "step": 434 + }, + { + "epoch": 0.04588607594936709, + "grad_norm": 1.132291316986084, + "learning_rate": 0.0014951476576105555, + "loss": 2.3757, + "step": 435 + }, + { + "epoch": 0.0459915611814346, + "grad_norm": 1.1446528434753418, + "learning_rate": 0.00149511910348978, + "loss": 2.3821, + "step": 436 + }, + { + "epoch": 0.04609704641350211, + "grad_norm": 0.6861117482185364, + "learning_rate": 0.0014950904658746933, + "loss": 2.3729, + "step": 437 + }, + { + "epoch": 0.046202531645569624, + "grad_norm": 0.8990034461021423, + "learning_rate": 0.0014950617447685047, + "loss": 2.3994, + "step": 438 + }, + { + "epoch": 0.04630801687763713, + "grad_norm": 1.244089961051941, + "learning_rate": 0.001495032940174432, + "loss": 2.3622, + "step": 439 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.7785615921020508, + "learning_rate": 0.0014950040520957037, + "loss": 2.3802, + "step": 440 + }, + { + "epoch": 0.04651898734177215, + "grad_norm": 0.8374646306037903, + "learning_rate": 0.0014949750805355563, + "loss": 2.3738, + "step": 441 + }, + { + "epoch": 0.04662447257383966, + "grad_norm": 0.9361991882324219, + "learning_rate": 0.0014949460254972363, + "loss": 2.3861, + "step": 442 + }, + { + "epoch": 0.04672995780590717, + "grad_norm": 0.951636791229248, + "learning_rate": 0.0014949168869839997, + "loss": 2.3538, + "step": 443 + }, + { + "epoch": 0.04683544303797468, + "grad_norm": 0.8879424929618835, + "learning_rate": 0.0014948876649991112, + "loss": 2.3694, + "step": 444 + }, + { + "epoch": 0.0469409282700422, + "grad_norm": 0.6983885765075684, + "learning_rate": 0.0014948583595458455, + "loss": 2.3512, + "step": 445 + }, + { + "epoch": 0.0470464135021097, + "grad_norm": 0.7794575691223145, + "learning_rate": 0.0014948289706274865, + "loss": 2.3856, + "step": 446 + }, + { + "epoch": 0.04715189873417722, + "grad_norm": 0.8022186160087585, + "learning_rate": 0.0014947994982473273, + "loss": 2.3723, + "step": 447 + }, + { + "epoch": 0.04725738396624472, + "grad_norm": 0.7987444400787354, + "learning_rate": 0.0014947699424086704, + "loss": 2.3534, + "step": 448 + }, + { + "epoch": 0.04736286919831224, + "grad_norm": 0.8128446936607361, + "learning_rate": 0.0014947403031148278, + "loss": 2.3907, + "step": 449 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.821545422077179, + "learning_rate": 0.0014947105803691204, + "loss": 2.3452, + "step": 450 + }, + { + "epoch": 0.047573839662447256, + "grad_norm": 0.7549449801445007, + "learning_rate": 0.0014946807741748791, + "loss": 2.375, + "step": 451 + }, + { + "epoch": 0.04767932489451477, + "grad_norm": 0.846917450428009, + "learning_rate": 0.001494650884535444, + "loss": 2.333, + "step": 452 + }, + { + "epoch": 0.047784810126582276, + "grad_norm": 0.9832485318183899, + "learning_rate": 0.0014946209114541636, + "loss": 2.4019, + "step": 453 + }, + { + "epoch": 0.04789029535864979, + "grad_norm": 0.7231648564338684, + "learning_rate": 0.0014945908549343974, + "loss": 2.3674, + "step": 454 + }, + { + "epoch": 0.047995780590717296, + "grad_norm": 0.8465123772621155, + "learning_rate": 0.001494560714979513, + "loss": 2.3422, + "step": 455 + }, + { + "epoch": 0.04810126582278481, + "grad_norm": 0.824917197227478, + "learning_rate": 0.0014945304915928875, + "loss": 2.3227, + "step": 456 + }, + { + "epoch": 0.04820675105485232, + "grad_norm": 0.9581198692321777, + "learning_rate": 0.0014945001847779082, + "loss": 2.3263, + "step": 457 + }, + { + "epoch": 0.04831223628691983, + "grad_norm": 0.8521071076393127, + "learning_rate": 0.0014944697945379708, + "loss": 2.3221, + "step": 458 + }, + { + "epoch": 0.04841772151898734, + "grad_norm": 0.7724025249481201, + "learning_rate": 0.0014944393208764805, + "loss": 2.3178, + "step": 459 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.8051818609237671, + "learning_rate": 0.0014944087637968522, + "loss": 2.3653, + "step": 460 + }, + { + "epoch": 0.04862869198312236, + "grad_norm": 0.8616336584091187, + "learning_rate": 0.00149437812330251, + "loss": 2.3378, + "step": 461 + }, + { + "epoch": 0.048734177215189876, + "grad_norm": 0.8494210839271545, + "learning_rate": 0.0014943473993968871, + "loss": 2.3536, + "step": 462 + }, + { + "epoch": 0.04883966244725738, + "grad_norm": 0.7882614731788635, + "learning_rate": 0.0014943165920834266, + "loss": 2.3316, + "step": 463 + }, + { + "epoch": 0.048945147679324896, + "grad_norm": 1.0061867237091064, + "learning_rate": 0.0014942857013655806, + "loss": 2.3374, + "step": 464 + }, + { + "epoch": 0.0490506329113924, + "grad_norm": 0.7612751126289368, + "learning_rate": 0.0014942547272468103, + "loss": 2.3179, + "step": 465 + }, + { + "epoch": 0.049156118143459916, + "grad_norm": 0.9776901602745056, + "learning_rate": 0.0014942236697305866, + "loss": 2.3076, + "step": 466 + }, + { + "epoch": 0.04926160337552743, + "grad_norm": 1.0113615989685059, + "learning_rate": 0.0014941925288203897, + "loss": 2.3346, + "step": 467 + }, + { + "epoch": 0.049367088607594936, + "grad_norm": 1.068109154701233, + "learning_rate": 0.001494161304519709, + "loss": 2.3379, + "step": 468 + }, + { + "epoch": 0.04947257383966245, + "grad_norm": 0.90532386302948, + "learning_rate": 0.0014941299968320434, + "loss": 2.307, + "step": 469 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.7197695970535278, + "learning_rate": 0.0014940986057609012, + "loss": 2.3084, + "step": 470 + }, + { + "epoch": 0.04968354430379747, + "grad_norm": 0.8406993746757507, + "learning_rate": 0.0014940671313097998, + "loss": 2.3151, + "step": 471 + }, + { + "epoch": 0.049789029535864976, + "grad_norm": 0.9119200706481934, + "learning_rate": 0.001494035573482266, + "loss": 2.3193, + "step": 472 + }, + { + "epoch": 0.04989451476793249, + "grad_norm": 0.9890115261077881, + "learning_rate": 0.0014940039322818362, + "loss": 2.3418, + "step": 473 + }, + { + "epoch": 0.05, + "grad_norm": 0.860100507736206, + "learning_rate": 0.0014939722077120558, + "loss": 2.3438, + "step": 474 + }, + { + "epoch": 0.05010548523206751, + "grad_norm": 0.7290588617324829, + "learning_rate": 0.0014939403997764795, + "loss": 2.3336, + "step": 475 + }, + { + "epoch": 0.05021097046413502, + "grad_norm": 1.0448921918869019, + "learning_rate": 0.001493908508478672, + "loss": 2.3199, + "step": 476 + }, + { + "epoch": 0.05031645569620253, + "grad_norm": 1.1612119674682617, + "learning_rate": 0.0014938765338222068, + "loss": 2.2892, + "step": 477 + }, + { + "epoch": 0.05042194092827004, + "grad_norm": 0.7984516024589539, + "learning_rate": 0.0014938444758106665, + "loss": 2.364, + "step": 478 + }, + { + "epoch": 0.050527426160337556, + "grad_norm": 0.9780804514884949, + "learning_rate": 0.0014938123344476436, + "loss": 2.3087, + "step": 479 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.3520673513412476, + "learning_rate": 0.0014937801097367396, + "loss": 2.3276, + "step": 480 + }, + { + "epoch": 0.050738396624472576, + "grad_norm": 0.9890926480293274, + "learning_rate": 0.0014937478016815657, + "loss": 2.2847, + "step": 481 + }, + { + "epoch": 0.05084388185654008, + "grad_norm": 0.892302930355072, + "learning_rate": 0.0014937154102857416, + "loss": 2.3192, + "step": 482 + }, + { + "epoch": 0.050949367088607596, + "grad_norm": 0.858730137348175, + "learning_rate": 0.0014936829355528976, + "loss": 2.3437, + "step": 483 + }, + { + "epoch": 0.0510548523206751, + "grad_norm": 0.9059467911720276, + "learning_rate": 0.0014936503774866721, + "loss": 2.3094, + "step": 484 + }, + { + "epoch": 0.051160337552742616, + "grad_norm": 0.8372077345848083, + "learning_rate": 0.0014936177360907138, + "loss": 2.2602, + "step": 485 + }, + { + "epoch": 0.05126582278481013, + "grad_norm": 0.7935492396354675, + "learning_rate": 0.00149358501136868, + "loss": 2.2627, + "step": 486 + }, + { + "epoch": 0.051371308016877636, + "grad_norm": 0.715069591999054, + "learning_rate": 0.0014935522033242379, + "loss": 2.3143, + "step": 487 + }, + { + "epoch": 0.05147679324894515, + "grad_norm": 0.7232562899589539, + "learning_rate": 0.0014935193119610638, + "loss": 2.2812, + "step": 488 + }, + { + "epoch": 0.051582278481012656, + "grad_norm": 0.7653799057006836, + "learning_rate": 0.0014934863372828432, + "loss": 2.2732, + "step": 489 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.7756609916687012, + "learning_rate": 0.001493453279293271, + "loss": 2.2995, + "step": 490 + }, + { + "epoch": 0.05179324894514768, + "grad_norm": 0.8010226488113403, + "learning_rate": 0.001493420137996052, + "loss": 2.3017, + "step": 491 + }, + { + "epoch": 0.05189873417721519, + "grad_norm": 0.883476972579956, + "learning_rate": 0.0014933869133948992, + "loss": 2.2984, + "step": 492 + }, + { + "epoch": 0.0520042194092827, + "grad_norm": 1.0204201936721802, + "learning_rate": 0.0014933536054935362, + "loss": 2.2977, + "step": 493 + }, + { + "epoch": 0.05210970464135021, + "grad_norm": 0.991952657699585, + "learning_rate": 0.0014933202142956947, + "loss": 2.2878, + "step": 494 + }, + { + "epoch": 0.05221518987341772, + "grad_norm": 0.735356330871582, + "learning_rate": 0.0014932867398051168, + "loss": 2.2803, + "step": 495 + }, + { + "epoch": 0.05232067510548523, + "grad_norm": 0.8957225680351257, + "learning_rate": 0.0014932531820255534, + "loss": 2.2707, + "step": 496 + }, + { + "epoch": 0.05242616033755274, + "grad_norm": 1.0202970504760742, + "learning_rate": 0.0014932195409607645, + "loss": 2.3017, + "step": 497 + }, + { + "epoch": 0.052531645569620256, + "grad_norm": 0.9867851734161377, + "learning_rate": 0.0014931858166145203, + "loss": 2.2693, + "step": 498 + }, + { + "epoch": 0.05263713080168776, + "grad_norm": 0.8117709755897522, + "learning_rate": 0.0014931520089905993, + "loss": 2.2889, + "step": 499 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.6830359101295471, + "learning_rate": 0.0014931181180927902, + "loss": 2.2783, + "step": 500 + }, + { + "epoch": 0.05284810126582278, + "grad_norm": 0.9011273384094238, + "learning_rate": 0.0014930841439248904, + "loss": 2.2871, + "step": 501 + }, + { + "epoch": 0.052953586497890295, + "grad_norm": 1.0205243825912476, + "learning_rate": 0.0014930500864907066, + "loss": 2.3228, + "step": 502 + }, + { + "epoch": 0.05305907172995781, + "grad_norm": 1.137772798538208, + "learning_rate": 0.001493015945794056, + "loss": 2.2503, + "step": 503 + }, + { + "epoch": 0.053164556962025315, + "grad_norm": 0.799720048904419, + "learning_rate": 0.0014929817218387632, + "loss": 2.2654, + "step": 504 + }, + { + "epoch": 0.05327004219409283, + "grad_norm": 0.8709356784820557, + "learning_rate": 0.0014929474146286638, + "loss": 2.2704, + "step": 505 + }, + { + "epoch": 0.053375527426160335, + "grad_norm": 1.3274089097976685, + "learning_rate": 0.001492913024167602, + "loss": 2.3288, + "step": 506 + }, + { + "epoch": 0.05348101265822785, + "grad_norm": 0.7469685077667236, + "learning_rate": 0.001492878550459431, + "loss": 2.3005, + "step": 507 + }, + { + "epoch": 0.05358649789029536, + "grad_norm": 1.187210202217102, + "learning_rate": 0.0014928439935080143, + "loss": 2.2722, + "step": 508 + }, + { + "epoch": 0.05369198312236287, + "grad_norm": 1.3224529027938843, + "learning_rate": 0.0014928093533172243, + "loss": 2.2533, + "step": 509 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.7929174304008484, + "learning_rate": 0.001492774629890942, + "loss": 2.2917, + "step": 510 + }, + { + "epoch": 0.05390295358649789, + "grad_norm": 1.4420719146728516, + "learning_rate": 0.0014927398232330584, + "loss": 2.2796, + "step": 511 + }, + { + "epoch": 0.0540084388185654, + "grad_norm": 0.8455414175987244, + "learning_rate": 0.0014927049333474743, + "loss": 2.2824, + "step": 512 + }, + { + "epoch": 0.05411392405063291, + "grad_norm": 1.3444427251815796, + "learning_rate": 0.001492669960238099, + "loss": 2.284, + "step": 513 + }, + { + "epoch": 0.05421940928270042, + "grad_norm": 0.8570277690887451, + "learning_rate": 0.001492634903908851, + "loss": 2.2784, + "step": 514 + }, + { + "epoch": 0.054324894514767935, + "grad_norm": 1.1462693214416504, + "learning_rate": 0.001492599764363659, + "loss": 2.2553, + "step": 515 + }, + { + "epoch": 0.05443037974683544, + "grad_norm": 0.9010613560676575, + "learning_rate": 0.0014925645416064605, + "loss": 2.2689, + "step": 516 + }, + { + "epoch": 0.054535864978902955, + "grad_norm": 0.9810975193977356, + "learning_rate": 0.0014925292356412025, + "loss": 2.2661, + "step": 517 + }, + { + "epoch": 0.05464135021097046, + "grad_norm": 0.9586588740348816, + "learning_rate": 0.001492493846471841, + "loss": 2.2155, + "step": 518 + }, + { + "epoch": 0.054746835443037975, + "grad_norm": 0.854088306427002, + "learning_rate": 0.0014924583741023417, + "loss": 2.2687, + "step": 519 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.7592766284942627, + "learning_rate": 0.001492422818536679, + "loss": 2.2671, + "step": 520 + }, + { + "epoch": 0.054957805907172995, + "grad_norm": 0.9777128100395203, + "learning_rate": 0.0014923871797788378, + "loss": 2.2725, + "step": 521 + }, + { + "epoch": 0.05506329113924051, + "grad_norm": 0.8035388588905334, + "learning_rate": 0.001492351457832811, + "loss": 2.2299, + "step": 522 + }, + { + "epoch": 0.055168776371308015, + "grad_norm": 1.0524787902832031, + "learning_rate": 0.0014923156527026017, + "loss": 2.2634, + "step": 523 + }, + { + "epoch": 0.05527426160337553, + "grad_norm": 1.1118743419647217, + "learning_rate": 0.001492279764392222, + "loss": 2.2764, + "step": 524 + }, + { + "epoch": 0.055379746835443035, + "grad_norm": 0.7706770896911621, + "learning_rate": 0.0014922437929056934, + "loss": 2.2527, + "step": 525 + }, + { + "epoch": 0.05548523206751055, + "grad_norm": 1.1368579864501953, + "learning_rate": 0.0014922077382470468, + "loss": 2.2404, + "step": 526 + }, + { + "epoch": 0.05559071729957806, + "grad_norm": 0.8062471747398376, + "learning_rate": 0.001492171600420322, + "loss": 2.222, + "step": 527 + }, + { + "epoch": 0.05569620253164557, + "grad_norm": 0.7496657967567444, + "learning_rate": 0.0014921353794295684, + "loss": 2.2502, + "step": 528 + }, + { + "epoch": 0.05580168776371308, + "grad_norm": 0.8133280277252197, + "learning_rate": 0.001492099075278845, + "loss": 2.2583, + "step": 529 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.7581446766853333, + "learning_rate": 0.00149206268797222, + "loss": 2.2342, + "step": 530 + }, + { + "epoch": 0.0560126582278481, + "grad_norm": 0.8025121688842773, + "learning_rate": 0.0014920262175137703, + "loss": 2.2428, + "step": 531 + }, + { + "epoch": 0.056118143459915615, + "grad_norm": 0.8092765212059021, + "learning_rate": 0.001491989663907583, + "loss": 2.2104, + "step": 532 + }, + { + "epoch": 0.05622362869198312, + "grad_norm": 0.9102513790130615, + "learning_rate": 0.001491953027157754, + "loss": 2.2319, + "step": 533 + }, + { + "epoch": 0.056329113924050635, + "grad_norm": 0.8683795928955078, + "learning_rate": 0.0014919163072683883, + "loss": 2.2276, + "step": 534 + }, + { + "epoch": 0.05643459915611814, + "grad_norm": 0.7774583101272583, + "learning_rate": 0.0014918795042436013, + "loss": 2.2379, + "step": 535 + }, + { + "epoch": 0.056540084388185655, + "grad_norm": 0.7291626930236816, + "learning_rate": 0.001491842618087516, + "loss": 2.2255, + "step": 536 + }, + { + "epoch": 0.05664556962025316, + "grad_norm": 0.7228419184684753, + "learning_rate": 0.0014918056488042665, + "loss": 2.203, + "step": 537 + }, + { + "epoch": 0.056751054852320675, + "grad_norm": 0.7227886915206909, + "learning_rate": 0.0014917685963979949, + "loss": 2.2289, + "step": 538 + }, + { + "epoch": 0.05685654008438819, + "grad_norm": 0.7046857476234436, + "learning_rate": 0.0014917314608728536, + "loss": 2.2393, + "step": 539 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.8276526927947998, + "learning_rate": 0.0014916942422330032, + "loss": 2.2376, + "step": 540 + }, + { + "epoch": 0.05706751054852321, + "grad_norm": 0.853658139705658, + "learning_rate": 0.0014916569404826146, + "loss": 2.2599, + "step": 541 + }, + { + "epoch": 0.057172995780590714, + "grad_norm": 0.7738600969314575, + "learning_rate": 0.0014916195556258676, + "loss": 2.2413, + "step": 542 + }, + { + "epoch": 0.05727848101265823, + "grad_norm": 0.7568380832672119, + "learning_rate": 0.0014915820876669514, + "loss": 2.2034, + "step": 543 + }, + { + "epoch": 0.05738396624472574, + "grad_norm": 0.7528659701347351, + "learning_rate": 0.0014915445366100641, + "loss": 2.2337, + "step": 544 + }, + { + "epoch": 0.05748945147679325, + "grad_norm": 0.8218098282814026, + "learning_rate": 0.0014915069024594144, + "loss": 2.2405, + "step": 545 + }, + { + "epoch": 0.05759493670886076, + "grad_norm": 0.8951519727706909, + "learning_rate": 0.0014914691852192183, + "loss": 2.2276, + "step": 546 + }, + { + "epoch": 0.05770042194092827, + "grad_norm": 0.8911035060882568, + "learning_rate": 0.001491431384893703, + "loss": 2.2282, + "step": 547 + }, + { + "epoch": 0.05780590717299578, + "grad_norm": 1.0021995306015015, + "learning_rate": 0.0014913935014871035, + "loss": 2.2433, + "step": 548 + }, + { + "epoch": 0.057911392405063294, + "grad_norm": 0.8929442763328552, + "learning_rate": 0.0014913555350036657, + "loss": 2.2381, + "step": 549 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.7290270328521729, + "learning_rate": 0.001491317485447643, + "loss": 2.2048, + "step": 550 + }, + { + "epoch": 0.058122362869198314, + "grad_norm": 0.8756986856460571, + "learning_rate": 0.0014912793528233, + "loss": 2.1775, + "step": 551 + }, + { + "epoch": 0.05822784810126582, + "grad_norm": 1.1025902032852173, + "learning_rate": 0.0014912411371349088, + "loss": 2.2249, + "step": 552 + }, + { + "epoch": 0.058333333333333334, + "grad_norm": 0.8786170482635498, + "learning_rate": 0.0014912028383867522, + "loss": 2.2242, + "step": 553 + }, + { + "epoch": 0.05843881856540084, + "grad_norm": 0.7357375621795654, + "learning_rate": 0.0014911644565831217, + "loss": 2.1681, + "step": 554 + }, + { + "epoch": 0.058544303797468354, + "grad_norm": 0.9858644604682922, + "learning_rate": 0.001491125991728318, + "loss": 2.1697, + "step": 555 + }, + { + "epoch": 0.05864978902953587, + "grad_norm": 1.1306626796722412, + "learning_rate": 0.001491087443826651, + "loss": 2.2143, + "step": 556 + }, + { + "epoch": 0.058755274261603374, + "grad_norm": 0.8595799803733826, + "learning_rate": 0.0014910488128824409, + "loss": 2.2363, + "step": 557 + }, + { + "epoch": 0.05886075949367089, + "grad_norm": 0.7569752335548401, + "learning_rate": 0.0014910100989000159, + "loss": 2.1875, + "step": 558 + }, + { + "epoch": 0.058966244725738394, + "grad_norm": 0.7176315784454346, + "learning_rate": 0.0014909713018837144, + "loss": 2.1608, + "step": 559 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.7648336291313171, + "learning_rate": 0.0014909324218378838, + "loss": 2.1943, + "step": 560 + }, + { + "epoch": 0.05917721518987342, + "grad_norm": 0.832628071308136, + "learning_rate": 0.0014908934587668805, + "loss": 2.1978, + "step": 561 + }, + { + "epoch": 0.05928270042194093, + "grad_norm": 0.7534613013267517, + "learning_rate": 0.001490854412675071, + "loss": 2.1832, + "step": 562 + }, + { + "epoch": 0.05938818565400844, + "grad_norm": 0.7494786381721497, + "learning_rate": 0.0014908152835668301, + "loss": 2.2214, + "step": 563 + }, + { + "epoch": 0.05949367088607595, + "grad_norm": 0.7126140594482422, + "learning_rate": 0.0014907760714465428, + "loss": 2.195, + "step": 564 + }, + { + "epoch": 0.05959915611814346, + "grad_norm": 0.6769441962242126, + "learning_rate": 0.0014907367763186026, + "loss": 2.2047, + "step": 565 + }, + { + "epoch": 0.05970464135021097, + "grad_norm": 0.7484626770019531, + "learning_rate": 0.0014906973981874132, + "loss": 2.2104, + "step": 566 + }, + { + "epoch": 0.05981012658227848, + "grad_norm": 0.7877711057662964, + "learning_rate": 0.0014906579370573868, + "loss": 2.2161, + "step": 567 + }, + { + "epoch": 0.059915611814345994, + "grad_norm": 0.8475490212440491, + "learning_rate": 0.0014906183929329455, + "loss": 2.159, + "step": 568 + }, + { + "epoch": 0.0600210970464135, + "grad_norm": 0.7579385638237, + "learning_rate": 0.00149057876581852, + "loss": 2.137, + "step": 569 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.7557674050331116, + "learning_rate": 0.0014905390557185508, + "loss": 2.2361, + "step": 570 + }, + { + "epoch": 0.06023206751054852, + "grad_norm": 0.6999828219413757, + "learning_rate": 0.0014904992626374879, + "loss": 2.2199, + "step": 571 + }, + { + "epoch": 0.060337552742616034, + "grad_norm": 0.7201787829399109, + "learning_rate": 0.0014904593865797903, + "loss": 2.2048, + "step": 572 + }, + { + "epoch": 0.06044303797468355, + "grad_norm": 0.749936044216156, + "learning_rate": 0.0014904194275499258, + "loss": 2.2083, + "step": 573 + }, + { + "epoch": 0.060548523206751054, + "grad_norm": 0.7761785387992859, + "learning_rate": 0.0014903793855523726, + "loss": 2.1645, + "step": 574 + }, + { + "epoch": 0.06065400843881857, + "grad_norm": 0.9228598475456238, + "learning_rate": 0.0014903392605916175, + "loss": 2.2281, + "step": 575 + }, + { + "epoch": 0.060759493670886074, + "grad_norm": 1.0719666481018066, + "learning_rate": 0.0014902990526721564, + "loss": 2.2027, + "step": 576 + }, + { + "epoch": 0.06086497890295359, + "grad_norm": 1.2087337970733643, + "learning_rate": 0.0014902587617984951, + "loss": 2.1928, + "step": 577 + }, + { + "epoch": 0.0609704641350211, + "grad_norm": 0.6675288677215576, + "learning_rate": 0.0014902183879751483, + "loss": 2.1891, + "step": 578 + }, + { + "epoch": 0.06107594936708861, + "grad_norm": 1.0098165273666382, + "learning_rate": 0.0014901779312066399, + "loss": 2.1973, + "step": 579 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.4001551866531372, + "learning_rate": 0.0014901373914975036, + "loss": 2.2004, + "step": 580 + }, + { + "epoch": 0.06128691983122363, + "grad_norm": 0.8232523202896118, + "learning_rate": 0.0014900967688522818, + "loss": 2.1872, + "step": 581 + }, + { + "epoch": 0.06139240506329114, + "grad_norm": 1.0149213075637817, + "learning_rate": 0.0014900560632755265, + "loss": 2.1562, + "step": 582 + }, + { + "epoch": 0.06149789029535865, + "grad_norm": 1.1258769035339355, + "learning_rate": 0.0014900152747717994, + "loss": 2.2035, + "step": 583 + }, + { + "epoch": 0.06160337552742616, + "grad_norm": 0.8336349725723267, + "learning_rate": 0.0014899744033456705, + "loss": 2.1707, + "step": 584 + }, + { + "epoch": 0.061708860759493674, + "grad_norm": 1.0030862092971802, + "learning_rate": 0.0014899334490017198, + "loss": 2.2122, + "step": 585 + }, + { + "epoch": 0.06181434599156118, + "grad_norm": 0.9166681170463562, + "learning_rate": 0.0014898924117445367, + "loss": 2.1808, + "step": 586 + }, + { + "epoch": 0.061919831223628694, + "grad_norm": 0.7517438530921936, + "learning_rate": 0.0014898512915787192, + "loss": 2.1464, + "step": 587 + }, + { + "epoch": 0.0620253164556962, + "grad_norm": 0.8905476331710815, + "learning_rate": 0.0014898100885088754, + "loss": 2.1557, + "step": 588 + }, + { + "epoch": 0.06213080168776371, + "grad_norm": 0.8131250143051147, + "learning_rate": 0.001489768802539622, + "loss": 2.1851, + "step": 589 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.7173609137535095, + "learning_rate": 0.0014897274336755856, + "loss": 2.1201, + "step": 590 + }, + { + "epoch": 0.06234177215189873, + "grad_norm": 0.7634925246238708, + "learning_rate": 0.0014896859819214018, + "loss": 2.1524, + "step": 591 + }, + { + "epoch": 0.06244725738396625, + "grad_norm": 0.8387352824211121, + "learning_rate": 0.001489644447281715, + "loss": 2.1451, + "step": 592 + }, + { + "epoch": 0.06255274261603376, + "grad_norm": 0.7606543302536011, + "learning_rate": 0.00148960282976118, + "loss": 2.1778, + "step": 593 + }, + { + "epoch": 0.06265822784810127, + "grad_norm": 0.7614926695823669, + "learning_rate": 0.0014895611293644596, + "loss": 2.1364, + "step": 594 + }, + { + "epoch": 0.06276371308016877, + "grad_norm": 0.7809138298034668, + "learning_rate": 0.0014895193460962271, + "loss": 2.1637, + "step": 595 + }, + { + "epoch": 0.0628691983122363, + "grad_norm": 0.7303262948989868, + "learning_rate": 0.001489477479961164, + "loss": 2.1153, + "step": 596 + }, + { + "epoch": 0.0629746835443038, + "grad_norm": 0.7297417521476746, + "learning_rate": 0.0014894355309639621, + "loss": 2.1594, + "step": 597 + }, + { + "epoch": 0.0630801687763713, + "grad_norm": 0.8164485692977905, + "learning_rate": 0.0014893934991093221, + "loss": 2.1981, + "step": 598 + }, + { + "epoch": 0.06318565400843881, + "grad_norm": 0.9370778203010559, + "learning_rate": 0.0014893513844019533, + "loss": 2.1398, + "step": 599 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 1.0865521430969238, + "learning_rate": 0.001489309186846575, + "loss": 2.1582, + "step": 600 + }, + { + "epoch": 0.06339662447257384, + "grad_norm": 1.005141258239746, + "learning_rate": 0.001489266906447916, + "loss": 2.177, + "step": 601 + }, + { + "epoch": 0.06350210970464135, + "grad_norm": 0.784963071346283, + "learning_rate": 0.0014892245432107138, + "loss": 2.1542, + "step": 602 + }, + { + "epoch": 0.06360759493670887, + "grad_norm": 0.6826410889625549, + "learning_rate": 0.0014891820971397152, + "loss": 2.1808, + "step": 603 + }, + { + "epoch": 0.06371308016877637, + "grad_norm": 0.7510600686073303, + "learning_rate": 0.001489139568239677, + "loss": 2.1208, + "step": 604 + }, + { + "epoch": 0.06381856540084388, + "grad_norm": 0.6897299289703369, + "learning_rate": 0.0014890969565153642, + "loss": 2.1517, + "step": 605 + }, + { + "epoch": 0.06392405063291139, + "grad_norm": 0.6920267939567566, + "learning_rate": 0.0014890542619715522, + "loss": 2.1303, + "step": 606 + }, + { + "epoch": 0.0640295358649789, + "grad_norm": 0.6962433457374573, + "learning_rate": 0.0014890114846130248, + "loss": 2.1773, + "step": 607 + }, + { + "epoch": 0.06413502109704641, + "grad_norm": 0.7881858348846436, + "learning_rate": 0.0014889686244445755, + "loss": 2.1969, + "step": 608 + }, + { + "epoch": 0.06424050632911392, + "grad_norm": 0.839191198348999, + "learning_rate": 0.0014889256814710071, + "loss": 2.1531, + "step": 609 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.8057103753089905, + "learning_rate": 0.0014888826556971313, + "loss": 2.1436, + "step": 610 + }, + { + "epoch": 0.06445147679324895, + "grad_norm": 0.7134159803390503, + "learning_rate": 0.0014888395471277698, + "loss": 2.161, + "step": 611 + }, + { + "epoch": 0.06455696202531645, + "grad_norm": 0.8251769542694092, + "learning_rate": 0.0014887963557677526, + "loss": 2.139, + "step": 612 + }, + { + "epoch": 0.06466244725738397, + "grad_norm": 0.8937616348266602, + "learning_rate": 0.00148875308162192, + "loss": 2.126, + "step": 613 + }, + { + "epoch": 0.06476793248945148, + "grad_norm": 0.9106521606445312, + "learning_rate": 0.0014887097246951205, + "loss": 2.1037, + "step": 614 + }, + { + "epoch": 0.06487341772151899, + "grad_norm": 0.9349458813667297, + "learning_rate": 0.001488666284992213, + "loss": 2.1595, + "step": 615 + }, + { + "epoch": 0.06497890295358649, + "grad_norm": 0.838240385055542, + "learning_rate": 0.001488622762518065, + "loss": 2.1478, + "step": 616 + }, + { + "epoch": 0.06508438818565401, + "grad_norm": 0.7340818047523499, + "learning_rate": 0.0014885791572775533, + "loss": 2.1391, + "step": 617 + }, + { + "epoch": 0.06518987341772152, + "grad_norm": 0.6401148438453674, + "learning_rate": 0.0014885354692755642, + "loss": 2.1561, + "step": 618 + }, + { + "epoch": 0.06529535864978903, + "grad_norm": 0.7366956472396851, + "learning_rate": 0.001488491698516993, + "loss": 2.1729, + "step": 619 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.7732042670249939, + "learning_rate": 0.0014884478450067444, + "loss": 2.1491, + "step": 620 + }, + { + "epoch": 0.06550632911392405, + "grad_norm": 0.8364936113357544, + "learning_rate": 0.001488403908749733, + "loss": 2.1232, + "step": 621 + }, + { + "epoch": 0.06561181434599156, + "grad_norm": 0.823547899723053, + "learning_rate": 0.0014883598897508811, + "loss": 2.1629, + "step": 622 + }, + { + "epoch": 0.06571729957805907, + "grad_norm": 0.7127820253372192, + "learning_rate": 0.0014883157880151222, + "loss": 2.1267, + "step": 623 + }, + { + "epoch": 0.06582278481012659, + "grad_norm": 0.7255997657775879, + "learning_rate": 0.0014882716035473974, + "loss": 2.1067, + "step": 624 + }, + { + "epoch": 0.06592827004219409, + "grad_norm": 0.7278724312782288, + "learning_rate": 0.001488227336352658, + "loss": 2.1324, + "step": 625 + }, + { + "epoch": 0.0660337552742616, + "grad_norm": 0.7428761720657349, + "learning_rate": 0.0014881829864358644, + "loss": 2.1168, + "step": 626 + }, + { + "epoch": 0.06613924050632912, + "grad_norm": 0.8635377883911133, + "learning_rate": 0.0014881385538019867, + "loss": 2.1527, + "step": 627 + }, + { + "epoch": 0.06624472573839663, + "grad_norm": 0.8804657459259033, + "learning_rate": 0.0014880940384560028, + "loss": 2.1306, + "step": 628 + }, + { + "epoch": 0.06635021097046413, + "grad_norm": 1.0086274147033691, + "learning_rate": 0.0014880494404029016, + "loss": 2.1469, + "step": 629 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 1.1296788454055786, + "learning_rate": 0.0014880047596476807, + "loss": 2.1056, + "step": 630 + }, + { + "epoch": 0.06656118143459916, + "grad_norm": 0.9491087198257446, + "learning_rate": 0.0014879599961953461, + "loss": 2.1198, + "step": 631 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 0.9360904693603516, + "learning_rate": 0.0014879151500509142, + "loss": 2.1492, + "step": 632 + }, + { + "epoch": 0.06677215189873417, + "grad_norm": 1.079441785812378, + "learning_rate": 0.0014878702212194103, + "loss": 2.1418, + "step": 633 + }, + { + "epoch": 0.06687763713080169, + "grad_norm": 1.048951268196106, + "learning_rate": 0.0014878252097058685, + "loss": 2.1027, + "step": 634 + }, + { + "epoch": 0.0669831223628692, + "grad_norm": 0.7208820581436157, + "learning_rate": 0.001487780115515333, + "loss": 2.1685, + "step": 635 + }, + { + "epoch": 0.0670886075949367, + "grad_norm": 0.7811060547828674, + "learning_rate": 0.0014877349386528565, + "loss": 2.1256, + "step": 636 + }, + { + "epoch": 0.06719409282700423, + "grad_norm": 0.885992705821991, + "learning_rate": 0.0014876896791235015, + "loss": 2.178, + "step": 637 + }, + { + "epoch": 0.06729957805907173, + "grad_norm": 0.8561968207359314, + "learning_rate": 0.0014876443369323397, + "loss": 2.1445, + "step": 638 + }, + { + "epoch": 0.06740506329113924, + "grad_norm": 0.7810215353965759, + "learning_rate": 0.0014875989120844517, + "loss": 2.1357, + "step": 639 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.7314838767051697, + "learning_rate": 0.0014875534045849274, + "loss": 2.1371, + "step": 640 + }, + { + "epoch": 0.06761603375527427, + "grad_norm": 0.9736763834953308, + "learning_rate": 0.0014875078144388665, + "loss": 2.1284, + "step": 641 + }, + { + "epoch": 0.06772151898734177, + "grad_norm": 1.1363468170166016, + "learning_rate": 0.0014874621416513774, + "loss": 2.1329, + "step": 642 + }, + { + "epoch": 0.06782700421940928, + "grad_norm": 0.9408426880836487, + "learning_rate": 0.001487416386227578, + "loss": 2.1501, + "step": 643 + }, + { + "epoch": 0.0679324894514768, + "grad_norm": 0.660031795501709, + "learning_rate": 0.0014873705481725952, + "loss": 2.0851, + "step": 644 + }, + { + "epoch": 0.0680379746835443, + "grad_norm": 0.7558926343917847, + "learning_rate": 0.0014873246274915658, + "loss": 2.104, + "step": 645 + }, + { + "epoch": 0.06814345991561181, + "grad_norm": 0.7763336896896362, + "learning_rate": 0.0014872786241896354, + "loss": 2.0897, + "step": 646 + }, + { + "epoch": 0.06824894514767932, + "grad_norm": 0.933853268623352, + "learning_rate": 0.0014872325382719587, + "loss": 2.1626, + "step": 647 + }, + { + "epoch": 0.06835443037974684, + "grad_norm": 0.9009565711021423, + "learning_rate": 0.0014871863697436998, + "loss": 2.1716, + "step": 648 + }, + { + "epoch": 0.06845991561181435, + "grad_norm": 0.8859764337539673, + "learning_rate": 0.0014871401186100322, + "loss": 2.1243, + "step": 649 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.7940826416015625, + "learning_rate": 0.0014870937848761388, + "loss": 2.1484, + "step": 650 + }, + { + "epoch": 0.06867088607594937, + "grad_norm": 0.7556443214416504, + "learning_rate": 0.0014870473685472112, + "loss": 2.0909, + "step": 651 + }, + { + "epoch": 0.06877637130801688, + "grad_norm": 0.912250280380249, + "learning_rate": 0.0014870008696284507, + "loss": 2.1215, + "step": 652 + }, + { + "epoch": 0.06888185654008439, + "grad_norm": 0.9495176076889038, + "learning_rate": 0.0014869542881250678, + "loss": 2.1328, + "step": 653 + }, + { + "epoch": 0.0689873417721519, + "grad_norm": 0.7421438694000244, + "learning_rate": 0.001486907624042282, + "loss": 2.142, + "step": 654 + }, + { + "epoch": 0.06909282700421941, + "grad_norm": 0.9031786918640137, + "learning_rate": 0.0014868608773853226, + "loss": 2.1206, + "step": 655 + }, + { + "epoch": 0.06919831223628692, + "grad_norm": 0.9039426445960999, + "learning_rate": 0.0014868140481594273, + "loss": 2.1287, + "step": 656 + }, + { + "epoch": 0.06930379746835443, + "grad_norm": 0.8175007700920105, + "learning_rate": 0.001486767136369844, + "loss": 2.0931, + "step": 657 + }, + { + "epoch": 0.06940928270042195, + "grad_norm": 0.830953061580658, + "learning_rate": 0.0014867201420218292, + "loss": 2.0867, + "step": 658 + }, + { + "epoch": 0.06951476793248945, + "grad_norm": 0.8680970072746277, + "learning_rate": 0.0014866730651206487, + "loss": 2.1236, + "step": 659 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.8000528812408447, + "learning_rate": 0.001486625905671578, + "loss": 2.1157, + "step": 660 + }, + { + "epoch": 0.06972573839662448, + "grad_norm": 0.8050628304481506, + "learning_rate": 0.0014865786636799015, + "loss": 2.0961, + "step": 661 + }, + { + "epoch": 0.06983122362869199, + "grad_norm": 0.7464137673377991, + "learning_rate": 0.0014865313391509126, + "loss": 2.087, + "step": 662 + }, + { + "epoch": 0.06993670886075949, + "grad_norm": 0.8879184126853943, + "learning_rate": 0.0014864839320899148, + "loss": 2.0442, + "step": 663 + }, + { + "epoch": 0.070042194092827, + "grad_norm": 0.737315833568573, + "learning_rate": 0.0014864364425022198, + "loss": 2.1189, + "step": 664 + }, + { + "epoch": 0.07014767932489452, + "grad_norm": 0.8622320294380188, + "learning_rate": 0.001486388870393149, + "loss": 2.1021, + "step": 665 + }, + { + "epoch": 0.07025316455696203, + "grad_norm": 0.7799750566482544, + "learning_rate": 0.0014863412157680336, + "loss": 2.0743, + "step": 666 + }, + { + "epoch": 0.07035864978902953, + "grad_norm": 0.7476623058319092, + "learning_rate": 0.0014862934786322131, + "loss": 2.1228, + "step": 667 + }, + { + "epoch": 0.07046413502109705, + "grad_norm": 0.9567201733589172, + "learning_rate": 0.0014862456589910368, + "loss": 2.0793, + "step": 668 + }, + { + "epoch": 0.07056962025316456, + "grad_norm": 0.7988725900650024, + "learning_rate": 0.0014861977568498632, + "loss": 2.1404, + "step": 669 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.8471091985702515, + "learning_rate": 0.00148614977221406, + "loss": 2.1273, + "step": 670 + }, + { + "epoch": 0.07078059071729957, + "grad_norm": 0.8345546722412109, + "learning_rate": 0.001486101705089004, + "loss": 2.118, + "step": 671 + }, + { + "epoch": 0.07088607594936709, + "grad_norm": 0.8938785791397095, + "learning_rate": 0.0014860535554800814, + "loss": 2.0944, + "step": 672 + }, + { + "epoch": 0.0709915611814346, + "grad_norm": 1.018122911453247, + "learning_rate": 0.0014860053233926875, + "loss": 2.1098, + "step": 673 + }, + { + "epoch": 0.0710970464135021, + "grad_norm": 0.8986945152282715, + "learning_rate": 0.0014859570088322273, + "loss": 2.0439, + "step": 674 + }, + { + "epoch": 0.07120253164556962, + "grad_norm": 0.7479719519615173, + "learning_rate": 0.0014859086118041145, + "loss": 2.0992, + "step": 675 + }, + { + "epoch": 0.07130801687763713, + "grad_norm": 0.761841356754303, + "learning_rate": 0.001485860132313772, + "loss": 2.0925, + "step": 676 + }, + { + "epoch": 0.07141350210970464, + "grad_norm": 0.8992645740509033, + "learning_rate": 0.0014858115703666325, + "loss": 2.0836, + "step": 677 + }, + { + "epoch": 0.07151898734177216, + "grad_norm": 1.2079108953475952, + "learning_rate": 0.001485762925968137, + "loss": 2.0976, + "step": 678 + }, + { + "epoch": 0.07162447257383966, + "grad_norm": 0.892239511013031, + "learning_rate": 0.0014857141991237372, + "loss": 2.0792, + "step": 679 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.7609373331069946, + "learning_rate": 0.0014856653898388927, + "loss": 2.1066, + "step": 680 + }, + { + "epoch": 0.07183544303797468, + "grad_norm": 0.9173004031181335, + "learning_rate": 0.0014856164981190728, + "loss": 2.1, + "step": 681 + }, + { + "epoch": 0.0719409282700422, + "grad_norm": 1.125168800354004, + "learning_rate": 0.0014855675239697564, + "loss": 2.1194, + "step": 682 + }, + { + "epoch": 0.0720464135021097, + "grad_norm": 0.9497778415679932, + "learning_rate": 0.0014855184673964311, + "loss": 2.1036, + "step": 683 + }, + { + "epoch": 0.07215189873417721, + "grad_norm": 0.8035500049591064, + "learning_rate": 0.0014854693284045936, + "loss": 2.102, + "step": 684 + }, + { + "epoch": 0.07225738396624473, + "grad_norm": 0.6808021068572998, + "learning_rate": 0.0014854201069997505, + "loss": 2.0843, + "step": 685 + }, + { + "epoch": 0.07236286919831224, + "grad_norm": 0.9097883105278015, + "learning_rate": 0.0014853708031874176, + "loss": 2.1243, + "step": 686 + }, + { + "epoch": 0.07246835443037974, + "grad_norm": 0.928828775882721, + "learning_rate": 0.001485321416973119, + "loss": 2.0899, + "step": 687 + }, + { + "epoch": 0.07257383966244725, + "grad_norm": 0.8487655520439148, + "learning_rate": 0.0014852719483623893, + "loss": 2.1364, + "step": 688 + }, + { + "epoch": 0.07267932489451477, + "grad_norm": 0.7588637471199036, + "learning_rate": 0.001485222397360771, + "loss": 2.0479, + "step": 689 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.6523399949073792, + "learning_rate": 0.001485172763973817, + "loss": 2.109, + "step": 690 + }, + { + "epoch": 0.07289029535864978, + "grad_norm": 0.6865407824516296, + "learning_rate": 0.0014851230482070892, + "loss": 2.0939, + "step": 691 + }, + { + "epoch": 0.0729957805907173, + "grad_norm": 0.7429556846618652, + "learning_rate": 0.001485073250066158, + "loss": 2.0716, + "step": 692 + }, + { + "epoch": 0.07310126582278481, + "grad_norm": 0.9739676713943481, + "learning_rate": 0.0014850233695566034, + "loss": 2.0947, + "step": 693 + }, + { + "epoch": 0.07320675105485232, + "grad_norm": 1.2154453992843628, + "learning_rate": 0.0014849734066840158, + "loss": 2.144, + "step": 694 + }, + { + "epoch": 0.07331223628691984, + "grad_norm": 0.7952699661254883, + "learning_rate": 0.0014849233614539926, + "loss": 2.0936, + "step": 695 + }, + { + "epoch": 0.07341772151898734, + "grad_norm": 1.0296688079833984, + "learning_rate": 0.001484873233872142, + "loss": 2.0718, + "step": 696 + }, + { + "epoch": 0.07352320675105485, + "grad_norm": 1.2016985416412354, + "learning_rate": 0.0014848230239440812, + "loss": 2.0988, + "step": 697 + }, + { + "epoch": 0.07362869198312236, + "grad_norm": 0.8011865019798279, + "learning_rate": 0.0014847727316754367, + "loss": 2.07, + "step": 698 + }, + { + "epoch": 0.07373417721518988, + "grad_norm": 0.9635696411132812, + "learning_rate": 0.0014847223570718436, + "loss": 2.0546, + "step": 699 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.9056360125541687, + "learning_rate": 0.0014846719001389466, + "loss": 2.0569, + "step": 700 + }, + { + "epoch": 0.07394514767932489, + "grad_norm": 0.8601135015487671, + "learning_rate": 0.0014846213608823997, + "loss": 2.0486, + "step": 701 + }, + { + "epoch": 0.07405063291139241, + "grad_norm": 0.9005991816520691, + "learning_rate": 0.0014845707393078664, + "loss": 2.0573, + "step": 702 + }, + { + "epoch": 0.07415611814345992, + "grad_norm": 0.8128471970558167, + "learning_rate": 0.0014845200354210186, + "loss": 2.0995, + "step": 703 + }, + { + "epoch": 0.07426160337552742, + "grad_norm": 0.8340721130371094, + "learning_rate": 0.0014844692492275385, + "loss": 2.0699, + "step": 704 + }, + { + "epoch": 0.07436708860759493, + "grad_norm": 0.8185584545135498, + "learning_rate": 0.0014844183807331164, + "loss": 2.0602, + "step": 705 + }, + { + "epoch": 0.07447257383966245, + "grad_norm": 0.9111099243164062, + "learning_rate": 0.0014843674299434527, + "loss": 2.0878, + "step": 706 + }, + { + "epoch": 0.07457805907172996, + "grad_norm": 0.9174075722694397, + "learning_rate": 0.0014843163968642566, + "loss": 2.0607, + "step": 707 + }, + { + "epoch": 0.07468354430379746, + "grad_norm": 0.9793227314949036, + "learning_rate": 0.0014842652815012466, + "loss": 2.0903, + "step": 708 + }, + { + "epoch": 0.07478902953586498, + "grad_norm": 0.8130234479904175, + "learning_rate": 0.0014842140838601501, + "loss": 2.0773, + "step": 709 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.7601045966148376, + "learning_rate": 0.001484162803946705, + "loss": 2.0729, + "step": 710 + }, + { + "epoch": 0.075, + "grad_norm": 0.7053144574165344, + "learning_rate": 0.0014841114417666564, + "loss": 2.0651, + "step": 711 + }, + { + "epoch": 0.0751054852320675, + "grad_norm": 0.7494787573814392, + "learning_rate": 0.0014840599973257604, + "loss": 2.0784, + "step": 712 + }, + { + "epoch": 0.07521097046413502, + "grad_norm": 0.7846508622169495, + "learning_rate": 0.001484008470629781, + "loss": 2.0629, + "step": 713 + }, + { + "epoch": 0.07531645569620253, + "grad_norm": 0.8855875730514526, + "learning_rate": 0.0014839568616844927, + "loss": 2.0376, + "step": 714 + }, + { + "epoch": 0.07542194092827004, + "grad_norm": 0.6914259195327759, + "learning_rate": 0.0014839051704956781, + "loss": 2.0722, + "step": 715 + }, + { + "epoch": 0.07552742616033756, + "grad_norm": 1.0472743511199951, + "learning_rate": 0.0014838533970691296, + "loss": 2.0528, + "step": 716 + }, + { + "epoch": 0.07563291139240506, + "grad_norm": 1.087383508682251, + "learning_rate": 0.0014838015414106486, + "loss": 2.0461, + "step": 717 + }, + { + "epoch": 0.07573839662447257, + "grad_norm": 0.8406429886817932, + "learning_rate": 0.0014837496035260457, + "loss": 2.0141, + "step": 718 + }, + { + "epoch": 0.07584388185654009, + "grad_norm": 0.8413430452346802, + "learning_rate": 0.0014836975834211412, + "loss": 2.053, + "step": 719 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.9252010583877563, + "learning_rate": 0.0014836454811017635, + "loss": 2.0308, + "step": 720 + }, + { + "epoch": 0.0760548523206751, + "grad_norm": 0.9471124410629272, + "learning_rate": 0.0014835932965737517, + "loss": 2.0699, + "step": 721 + }, + { + "epoch": 0.07616033755274261, + "grad_norm": 0.9818708300590515, + "learning_rate": 0.0014835410298429529, + "loss": 2.0587, + "step": 722 + }, + { + "epoch": 0.07626582278481013, + "grad_norm": 0.8826227784156799, + "learning_rate": 0.001483488680915224, + "loss": 2.0533, + "step": 723 + }, + { + "epoch": 0.07637130801687764, + "grad_norm": 0.7445155382156372, + "learning_rate": 0.0014834362497964308, + "loss": 2.031, + "step": 724 + }, + { + "epoch": 0.07647679324894514, + "grad_norm": 0.7622427940368652, + "learning_rate": 0.0014833837364924484, + "loss": 2.0502, + "step": 725 + }, + { + "epoch": 0.07658227848101266, + "grad_norm": 0.7823601961135864, + "learning_rate": 0.0014833311410091617, + "loss": 2.0388, + "step": 726 + }, + { + "epoch": 0.07668776371308017, + "grad_norm": 0.8535965085029602, + "learning_rate": 0.0014832784633524638, + "loss": 2.0421, + "step": 727 + }, + { + "epoch": 0.07679324894514768, + "grad_norm": 1.002013087272644, + "learning_rate": 0.0014832257035282577, + "loss": 2.0591, + "step": 728 + }, + { + "epoch": 0.07689873417721518, + "grad_norm": 1.1152223348617554, + "learning_rate": 0.0014831728615424553, + "loss": 2.0643, + "step": 729 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.9078091979026794, + "learning_rate": 0.0014831199374009778, + "loss": 2.026, + "step": 730 + }, + { + "epoch": 0.07710970464135021, + "grad_norm": 0.8184106349945068, + "learning_rate": 0.0014830669311097554, + "loss": 2.0489, + "step": 731 + }, + { + "epoch": 0.07721518987341772, + "grad_norm": 0.9347290992736816, + "learning_rate": 0.0014830138426747282, + "loss": 2.0637, + "step": 732 + }, + { + "epoch": 0.07732067510548524, + "grad_norm": 0.7959004640579224, + "learning_rate": 0.0014829606721018448, + "loss": 2.072, + "step": 733 + }, + { + "epoch": 0.07742616033755274, + "grad_norm": 0.7183822393417358, + "learning_rate": 0.0014829074193970634, + "loss": 2.0926, + "step": 734 + }, + { + "epoch": 0.07753164556962025, + "grad_norm": 0.7533881664276123, + "learning_rate": 0.0014828540845663507, + "loss": 2.0623, + "step": 735 + }, + { + "epoch": 0.07763713080168777, + "grad_norm": 0.818114697933197, + "learning_rate": 0.0014828006676156837, + "loss": 2.0296, + "step": 736 + }, + { + "epoch": 0.07774261603375528, + "grad_norm": 0.8128653764724731, + "learning_rate": 0.0014827471685510477, + "loss": 2.0753, + "step": 737 + }, + { + "epoch": 0.07784810126582278, + "grad_norm": 0.7084885835647583, + "learning_rate": 0.0014826935873784378, + "loss": 2.0403, + "step": 738 + }, + { + "epoch": 0.07795358649789029, + "grad_norm": 0.7632172703742981, + "learning_rate": 0.0014826399241038577, + "loss": 2.0744, + "step": 739 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 1.0373293161392212, + "learning_rate": 0.0014825861787333208, + "loss": 2.0299, + "step": 740 + }, + { + "epoch": 0.07816455696202532, + "grad_norm": 0.9600429534912109, + "learning_rate": 0.00148253235127285, + "loss": 2.0408, + "step": 741 + }, + { + "epoch": 0.07827004219409282, + "grad_norm": 1.0876994132995605, + "learning_rate": 0.001482478441728476, + "loss": 2.0837, + "step": 742 + }, + { + "epoch": 0.07837552742616034, + "grad_norm": 0.8083521127700806, + "learning_rate": 0.0014824244501062402, + "loss": 2.039, + "step": 743 + }, + { + "epoch": 0.07848101265822785, + "grad_norm": 0.7359253168106079, + "learning_rate": 0.0014823703764121929, + "loss": 2.041, + "step": 744 + }, + { + "epoch": 0.07858649789029536, + "grad_norm": 0.8825395703315735, + "learning_rate": 0.0014823162206523926, + "loss": 2.0515, + "step": 745 + }, + { + "epoch": 0.07869198312236286, + "grad_norm": 0.8302421569824219, + "learning_rate": 0.0014822619828329085, + "loss": 2.0479, + "step": 746 + }, + { + "epoch": 0.07879746835443038, + "grad_norm": 0.7472004890441895, + "learning_rate": 0.0014822076629598176, + "loss": 2.0258, + "step": 747 + }, + { + "epoch": 0.07890295358649789, + "grad_norm": 1.0081514120101929, + "learning_rate": 0.001482153261039207, + "loss": 2.0002, + "step": 748 + }, + { + "epoch": 0.0790084388185654, + "grad_norm": 1.3184149265289307, + "learning_rate": 0.0014820987770771726, + "loss": 2.0544, + "step": 749 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.8041273951530457, + "learning_rate": 0.0014820442110798197, + "loss": 2.0182, + "step": 750 + }, + { + "epoch": 0.07921940928270042, + "grad_norm": 0.9374203085899353, + "learning_rate": 0.0014819895630532628, + "loss": 2.0297, + "step": 751 + }, + { + "epoch": 0.07932489451476793, + "grad_norm": 1.1146823167800903, + "learning_rate": 0.0014819348330036251, + "loss": 2.0723, + "step": 752 + }, + { + "epoch": 0.07943037974683544, + "grad_norm": 0.7888982892036438, + "learning_rate": 0.0014818800209370397, + "loss": 2.0276, + "step": 753 + }, + { + "epoch": 0.07953586497890296, + "grad_norm": 0.7556480765342712, + "learning_rate": 0.0014818251268596486, + "loss": 2.0379, + "step": 754 + }, + { + "epoch": 0.07964135021097046, + "grad_norm": 0.886920690536499, + "learning_rate": 0.0014817701507776025, + "loss": 2.0517, + "step": 755 + }, + { + "epoch": 0.07974683544303797, + "grad_norm": 0.7745406627655029, + "learning_rate": 0.0014817150926970625, + "loss": 2.0252, + "step": 756 + }, + { + "epoch": 0.07985232067510549, + "grad_norm": 0.9084661602973938, + "learning_rate": 0.0014816599526241974, + "loss": 2.0177, + "step": 757 + }, + { + "epoch": 0.079957805907173, + "grad_norm": 0.9429565072059631, + "learning_rate": 0.0014816047305651863, + "loss": 2.0647, + "step": 758 + }, + { + "epoch": 0.0800632911392405, + "grad_norm": 0.7428175210952759, + "learning_rate": 0.0014815494265262169, + "loss": 2.0027, + "step": 759 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.7939383387565613, + "learning_rate": 0.0014814940405134865, + "loss": 2.015, + "step": 760 + }, + { + "epoch": 0.08027426160337553, + "grad_norm": 0.862276554107666, + "learning_rate": 0.0014814385725332015, + "loss": 2.0612, + "step": 761 + }, + { + "epoch": 0.08037974683544304, + "grad_norm": 0.7810617685317993, + "learning_rate": 0.001481383022591577, + "loss": 2.0208, + "step": 762 + }, + { + "epoch": 0.08048523206751054, + "grad_norm": 0.71695476770401, + "learning_rate": 0.0014813273906948378, + "loss": 2.0033, + "step": 763 + }, + { + "epoch": 0.08059071729957806, + "grad_norm": 0.8320707082748413, + "learning_rate": 0.0014812716768492177, + "loss": 2.039, + "step": 764 + }, + { + "epoch": 0.08069620253164557, + "grad_norm": 1.055572509765625, + "learning_rate": 0.0014812158810609598, + "loss": 2.0183, + "step": 765 + }, + { + "epoch": 0.08080168776371308, + "grad_norm": 0.9686812162399292, + "learning_rate": 0.0014811600033363165, + "loss": 2.0375, + "step": 766 + }, + { + "epoch": 0.0809071729957806, + "grad_norm": 0.8108116388320923, + "learning_rate": 0.0014811040436815486, + "loss": 2.0154, + "step": 767 + }, + { + "epoch": 0.0810126582278481, + "grad_norm": 0.924351155757904, + "learning_rate": 0.001481048002102927, + "loss": 2.0269, + "step": 768 + }, + { + "epoch": 0.08111814345991561, + "grad_norm": 1.0301975011825562, + "learning_rate": 0.0014809918786067315, + "loss": 2.0635, + "step": 769 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.7987834811210632, + "learning_rate": 0.001480935673199251, + "loss": 1.9973, + "step": 770 + }, + { + "epoch": 0.08132911392405064, + "grad_norm": 0.7898582816123962, + "learning_rate": 0.0014808793858867837, + "loss": 2.0028, + "step": 771 + }, + { + "epoch": 0.08143459915611814, + "grad_norm": 0.9361711144447327, + "learning_rate": 0.0014808230166756366, + "loss": 2.0828, + "step": 772 + }, + { + "epoch": 0.08154008438818565, + "grad_norm": 0.9031997323036194, + "learning_rate": 0.0014807665655721261, + "loss": 2.0137, + "step": 773 + }, + { + "epoch": 0.08164556962025317, + "grad_norm": 0.7575645446777344, + "learning_rate": 0.0014807100325825782, + "loss": 2.0183, + "step": 774 + }, + { + "epoch": 0.08175105485232068, + "grad_norm": 0.9003918170928955, + "learning_rate": 0.0014806534177133274, + "loss": 1.9921, + "step": 775 + }, + { + "epoch": 0.08185654008438818, + "grad_norm": 1.1839836835861206, + "learning_rate": 0.0014805967209707178, + "loss": 1.9842, + "step": 776 + }, + { + "epoch": 0.0819620253164557, + "grad_norm": 0.6721642017364502, + "learning_rate": 0.0014805399423611025, + "loss": 2.017, + "step": 777 + }, + { + "epoch": 0.08206751054852321, + "grad_norm": 1.1332080364227295, + "learning_rate": 0.0014804830818908438, + "loss": 2.0348, + "step": 778 + }, + { + "epoch": 0.08217299578059072, + "grad_norm": 1.3852750062942505, + "learning_rate": 0.0014804261395663133, + "loss": 2.0689, + "step": 779 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.6995261907577515, + "learning_rate": 0.0014803691153938915, + "loss": 2.0465, + "step": 780 + }, + { + "epoch": 0.08238396624472574, + "grad_norm": 1.1727993488311768, + "learning_rate": 0.0014803120093799687, + "loss": 2.0045, + "step": 781 + }, + { + "epoch": 0.08248945147679325, + "grad_norm": 0.9326077103614807, + "learning_rate": 0.0014802548215309434, + "loss": 2.008, + "step": 782 + }, + { + "epoch": 0.08259493670886076, + "grad_norm": 0.6740857362747192, + "learning_rate": 0.001480197551853224, + "loss": 2.0028, + "step": 783 + }, + { + "epoch": 0.08270042194092828, + "grad_norm": 1.0548357963562012, + "learning_rate": 0.0014801402003532277, + "loss": 2.0345, + "step": 784 + }, + { + "epoch": 0.08280590717299578, + "grad_norm": 0.899648129940033, + "learning_rate": 0.0014800827670373815, + "loss": 2.0371, + "step": 785 + }, + { + "epoch": 0.08291139240506329, + "grad_norm": 0.7559587955474854, + "learning_rate": 0.0014800252519121203, + "loss": 2.0007, + "step": 786 + }, + { + "epoch": 0.0830168776371308, + "grad_norm": 1.0439252853393555, + "learning_rate": 0.0014799676549838898, + "loss": 2.0475, + "step": 787 + }, + { + "epoch": 0.08312236286919832, + "grad_norm": 0.8973297476768494, + "learning_rate": 0.0014799099762591434, + "loss": 2.0439, + "step": 788 + }, + { + "epoch": 0.08322784810126582, + "grad_norm": 0.7104743719100952, + "learning_rate": 0.0014798522157443443, + "loss": 2.0041, + "step": 789 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.8140530586242676, + "learning_rate": 0.0014797943734459653, + "loss": 2.0649, + "step": 790 + }, + { + "epoch": 0.08343881856540085, + "grad_norm": 0.756718635559082, + "learning_rate": 0.0014797364493704876, + "loss": 2.032, + "step": 791 + }, + { + "epoch": 0.08354430379746836, + "grad_norm": 0.85821932554245, + "learning_rate": 0.001479678443524402, + "loss": 2.0092, + "step": 792 + }, + { + "epoch": 0.08364978902953586, + "grad_norm": 1.2496285438537598, + "learning_rate": 0.0014796203559142081, + "loss": 1.997, + "step": 793 + }, + { + "epoch": 0.08375527426160338, + "grad_norm": 0.8274452686309814, + "learning_rate": 0.0014795621865464155, + "loss": 2.0188, + "step": 794 + }, + { + "epoch": 0.08386075949367089, + "grad_norm": 0.7965332269668579, + "learning_rate": 0.0014795039354275417, + "loss": 1.9852, + "step": 795 + }, + { + "epoch": 0.0839662447257384, + "grad_norm": 1.1568830013275146, + "learning_rate": 0.0014794456025641143, + "loss": 2.0043, + "step": 796 + }, + { + "epoch": 0.0840717299578059, + "grad_norm": 0.7922971248626709, + "learning_rate": 0.00147938718796267, + "loss": 2.0263, + "step": 797 + }, + { + "epoch": 0.08417721518987342, + "grad_norm": 0.7085273861885071, + "learning_rate": 0.001479328691629754, + "loss": 1.9797, + "step": 798 + }, + { + "epoch": 0.08428270042194093, + "grad_norm": 0.7261282801628113, + "learning_rate": 0.0014792701135719214, + "loss": 1.9825, + "step": 799 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.7652263641357422, + "learning_rate": 0.001479211453795736, + "loss": 1.9604, + "step": 800 + }, + { + "epoch": 0.08449367088607596, + "grad_norm": 0.7499869465827942, + "learning_rate": 0.001479152712307771, + "loss": 1.9733, + "step": 801 + }, + { + "epoch": 0.08459915611814346, + "grad_norm": 0.7441799640655518, + "learning_rate": 0.0014790938891146089, + "loss": 1.9977, + "step": 802 + }, + { + "epoch": 0.08470464135021097, + "grad_norm": 0.8202113509178162, + "learning_rate": 0.001479034984222841, + "loss": 1.9645, + "step": 803 + }, + { + "epoch": 0.08481012658227848, + "grad_norm": 0.8074707388877869, + "learning_rate": 0.0014789759976390675, + "loss": 1.9485, + "step": 804 + }, + { + "epoch": 0.084915611814346, + "grad_norm": 0.6840352416038513, + "learning_rate": 0.0014789169293698988, + "loss": 2.0039, + "step": 805 + }, + { + "epoch": 0.0850210970464135, + "grad_norm": 0.8436501026153564, + "learning_rate": 0.0014788577794219533, + "loss": 2.025, + "step": 806 + }, + { + "epoch": 0.08512658227848101, + "grad_norm": 0.839043140411377, + "learning_rate": 0.0014787985478018593, + "loss": 1.9976, + "step": 807 + }, + { + "epoch": 0.08523206751054853, + "grad_norm": 0.7740750312805176, + "learning_rate": 0.0014787392345162538, + "loss": 2.0092, + "step": 808 + }, + { + "epoch": 0.08533755274261604, + "grad_norm": 0.7002189755439758, + "learning_rate": 0.0014786798395717833, + "loss": 1.9483, + "step": 809 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.8761747479438782, + "learning_rate": 0.0014786203629751033, + "loss": 1.9765, + "step": 810 + }, + { + "epoch": 0.08554852320675105, + "grad_norm": 0.9131048321723938, + "learning_rate": 0.001478560804732878, + "loss": 2.0084, + "step": 811 + }, + { + "epoch": 0.08565400843881857, + "grad_norm": 0.697911262512207, + "learning_rate": 0.001478501164851782, + "loss": 1.9716, + "step": 812 + }, + { + "epoch": 0.08575949367088608, + "grad_norm": 0.7107416987419128, + "learning_rate": 0.0014784414433384977, + "loss": 2.0402, + "step": 813 + }, + { + "epoch": 0.08586497890295358, + "grad_norm": 0.8925285935401917, + "learning_rate": 0.0014783816401997174, + "loss": 2.0175, + "step": 814 + }, + { + "epoch": 0.0859704641350211, + "grad_norm": 0.6919785141944885, + "learning_rate": 0.0014783217554421423, + "loss": 1.9837, + "step": 815 + }, + { + "epoch": 0.08607594936708861, + "grad_norm": 0.6890232563018799, + "learning_rate": 0.0014782617890724827, + "loss": 1.9708, + "step": 816 + }, + { + "epoch": 0.08618143459915611, + "grad_norm": 0.8114655613899231, + "learning_rate": 0.0014782017410974583, + "loss": 2.0104, + "step": 817 + }, + { + "epoch": 0.08628691983122364, + "grad_norm": 0.8649437427520752, + "learning_rate": 0.0014781416115237976, + "loss": 1.9963, + "step": 818 + }, + { + "epoch": 0.08639240506329114, + "grad_norm": 0.8874574899673462, + "learning_rate": 0.0014780814003582385, + "loss": 1.9785, + "step": 819 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.7124201059341431, + "learning_rate": 0.0014780211076075279, + "loss": 1.9731, + "step": 820 + }, + { + "epoch": 0.08660337552742615, + "grad_norm": 0.7281554341316223, + "learning_rate": 0.001477960733278422, + "loss": 1.9941, + "step": 821 + }, + { + "epoch": 0.08670886075949367, + "grad_norm": 0.6597190499305725, + "learning_rate": 0.001477900277377686, + "loss": 1.9989, + "step": 822 + }, + { + "epoch": 0.08681434599156118, + "grad_norm": 0.708371102809906, + "learning_rate": 0.0014778397399120942, + "loss": 1.9973, + "step": 823 + }, + { + "epoch": 0.08691983122362869, + "grad_norm": 0.7170627117156982, + "learning_rate": 0.0014777791208884304, + "loss": 2.0346, + "step": 824 + }, + { + "epoch": 0.08702531645569621, + "grad_norm": 0.7222812175750732, + "learning_rate": 0.0014777184203134867, + "loss": 1.9762, + "step": 825 + }, + { + "epoch": 0.08713080168776371, + "grad_norm": 0.8331120610237122, + "learning_rate": 0.0014776576381940658, + "loss": 2.019, + "step": 826 + }, + { + "epoch": 0.08723628691983122, + "grad_norm": 0.7527528405189514, + "learning_rate": 0.0014775967745369778, + "loss": 2.0045, + "step": 827 + }, + { + "epoch": 0.08734177215189873, + "grad_norm": 0.7837775945663452, + "learning_rate": 0.001477535829349043, + "loss": 1.9794, + "step": 828 + }, + { + "epoch": 0.08744725738396625, + "grad_norm": 0.7725791335105896, + "learning_rate": 0.0014774748026370908, + "loss": 1.9834, + "step": 829 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.7880226373672485, + "learning_rate": 0.0014774136944079594, + "loss": 2.0054, + "step": 830 + }, + { + "epoch": 0.08765822784810126, + "grad_norm": 0.6642130017280579, + "learning_rate": 0.0014773525046684964, + "loss": 1.9561, + "step": 831 + }, + { + "epoch": 0.08776371308016878, + "grad_norm": 0.77483731508255, + "learning_rate": 0.0014772912334255585, + "loss": 1.9516, + "step": 832 + }, + { + "epoch": 0.08786919831223629, + "grad_norm": 0.7198542356491089, + "learning_rate": 0.0014772298806860111, + "loss": 1.9708, + "step": 833 + }, + { + "epoch": 0.0879746835443038, + "grad_norm": 0.6992120742797852, + "learning_rate": 0.0014771684464567293, + "loss": 1.9374, + "step": 834 + }, + { + "epoch": 0.08808016877637131, + "grad_norm": 0.8210814595222473, + "learning_rate": 0.0014771069307445972, + "loss": 2.0028, + "step": 835 + }, + { + "epoch": 0.08818565400843882, + "grad_norm": 0.8923372030258179, + "learning_rate": 0.0014770453335565077, + "loss": 1.979, + "step": 836 + }, + { + "epoch": 0.08829113924050633, + "grad_norm": 0.9127616882324219, + "learning_rate": 0.0014769836548993631, + "loss": 1.9931, + "step": 837 + }, + { + "epoch": 0.08839662447257383, + "grad_norm": 0.8255398273468018, + "learning_rate": 0.0014769218947800749, + "loss": 2.0064, + "step": 838 + }, + { + "epoch": 0.08850210970464135, + "grad_norm": 0.7085419297218323, + "learning_rate": 0.0014768600532055638, + "loss": 1.965, + "step": 839 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.7873749732971191, + "learning_rate": 0.0014767981301827592, + "loss": 1.9858, + "step": 840 + }, + { + "epoch": 0.08871308016877637, + "grad_norm": 0.7555823922157288, + "learning_rate": 0.0014767361257186, + "loss": 1.9922, + "step": 841 + }, + { + "epoch": 0.08881856540084389, + "grad_norm": 0.8675287365913391, + "learning_rate": 0.0014766740398200343, + "loss": 1.9448, + "step": 842 + }, + { + "epoch": 0.0889240506329114, + "grad_norm": 0.9186450242996216, + "learning_rate": 0.0014766118724940185, + "loss": 1.9949, + "step": 843 + }, + { + "epoch": 0.0890295358649789, + "grad_norm": 0.7396310567855835, + "learning_rate": 0.0014765496237475195, + "loss": 1.9671, + "step": 844 + }, + { + "epoch": 0.08913502109704641, + "grad_norm": 0.8003945350646973, + "learning_rate": 0.001476487293587512, + "loss": 1.9855, + "step": 845 + }, + { + "epoch": 0.08924050632911393, + "grad_norm": 0.8001066446304321, + "learning_rate": 0.0014764248820209808, + "loss": 2.0035, + "step": 846 + }, + { + "epoch": 0.08934599156118143, + "grad_norm": 0.7978143095970154, + "learning_rate": 0.0014763623890549193, + "loss": 1.9929, + "step": 847 + }, + { + "epoch": 0.08945147679324894, + "grad_norm": 0.7468557953834534, + "learning_rate": 0.00147629981469633, + "loss": 1.9869, + "step": 848 + }, + { + "epoch": 0.08955696202531646, + "grad_norm": 0.8194332122802734, + "learning_rate": 0.001476237158952225, + "loss": 1.9519, + "step": 849 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.8178837299346924, + "learning_rate": 0.0014761744218296249, + "loss": 1.9517, + "step": 850 + }, + { + "epoch": 0.08976793248945147, + "grad_norm": 0.7478840351104736, + "learning_rate": 0.0014761116033355597, + "loss": 1.9697, + "step": 851 + }, + { + "epoch": 0.08987341772151898, + "grad_norm": 0.7429735660552979, + "learning_rate": 0.001476048703477069, + "loss": 2.0024, + "step": 852 + }, + { + "epoch": 0.0899789029535865, + "grad_norm": 0.8701443076133728, + "learning_rate": 0.0014759857222612003, + "loss": 1.9637, + "step": 853 + }, + { + "epoch": 0.09008438818565401, + "grad_norm": 1.0290130376815796, + "learning_rate": 0.0014759226596950115, + "loss": 2.0231, + "step": 854 + }, + { + "epoch": 0.09018987341772151, + "grad_norm": 0.9827783703804016, + "learning_rate": 0.0014758595157855687, + "loss": 1.9799, + "step": 855 + }, + { + "epoch": 0.09029535864978903, + "grad_norm": 0.7142657041549683, + "learning_rate": 0.001475796290539948, + "loss": 1.9829, + "step": 856 + }, + { + "epoch": 0.09040084388185654, + "grad_norm": 0.7950439453125, + "learning_rate": 0.0014757329839652335, + "loss": 1.9501, + "step": 857 + }, + { + "epoch": 0.09050632911392405, + "grad_norm": 1.1081926822662354, + "learning_rate": 0.0014756695960685194, + "loss": 1.9924, + "step": 858 + }, + { + "epoch": 0.09061181434599157, + "grad_norm": 1.0232712030410767, + "learning_rate": 0.0014756061268569086, + "loss": 1.9707, + "step": 859 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.8919060230255127, + "learning_rate": 0.001475542576337513, + "loss": 1.9407, + "step": 860 + }, + { + "epoch": 0.09082278481012658, + "grad_norm": 0.8040094375610352, + "learning_rate": 0.001475478944517454, + "loss": 1.9865, + "step": 861 + }, + { + "epoch": 0.09092827004219409, + "grad_norm": 0.8021308183670044, + "learning_rate": 0.0014754152314038617, + "loss": 1.9583, + "step": 862 + }, + { + "epoch": 0.09103375527426161, + "grad_norm": 0.7467990517616272, + "learning_rate": 0.0014753514370038753, + "loss": 1.9751, + "step": 863 + }, + { + "epoch": 0.09113924050632911, + "grad_norm": 0.9529770016670227, + "learning_rate": 0.0014752875613246435, + "loss": 1.9864, + "step": 864 + }, + { + "epoch": 0.09124472573839662, + "grad_norm": 0.711624264717102, + "learning_rate": 0.001475223604373324, + "loss": 1.9889, + "step": 865 + }, + { + "epoch": 0.09135021097046414, + "grad_norm": 0.8056161403656006, + "learning_rate": 0.0014751595661570832, + "loss": 1.9446, + "step": 866 + }, + { + "epoch": 0.09145569620253165, + "grad_norm": 0.8755531907081604, + "learning_rate": 0.001475095446683097, + "loss": 1.9852, + "step": 867 + }, + { + "epoch": 0.09156118143459915, + "grad_norm": 0.7735251188278198, + "learning_rate": 0.0014750312459585505, + "loss": 2.001, + "step": 868 + }, + { + "epoch": 0.09166666666666666, + "grad_norm": 0.7478463053703308, + "learning_rate": 0.0014749669639906374, + "loss": 1.9411, + "step": 869 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.7848837971687317, + "learning_rate": 0.001474902600786561, + "loss": 1.9852, + "step": 870 + }, + { + "epoch": 0.09187763713080169, + "grad_norm": 0.7906065583229065, + "learning_rate": 0.0014748381563535337, + "loss": 1.9746, + "step": 871 + }, + { + "epoch": 0.0919831223628692, + "grad_norm": 0.7443709969520569, + "learning_rate": 0.0014747736306987764, + "loss": 1.9514, + "step": 872 + }, + { + "epoch": 0.09208860759493671, + "grad_norm": 0.7821484804153442, + "learning_rate": 0.0014747090238295198, + "loss": 1.964, + "step": 873 + }, + { + "epoch": 0.09219409282700422, + "grad_norm": 0.9665396213531494, + "learning_rate": 0.0014746443357530033, + "loss": 1.9755, + "step": 874 + }, + { + "epoch": 0.09229957805907173, + "grad_norm": 1.2201642990112305, + "learning_rate": 0.0014745795664764757, + "loss": 1.9598, + "step": 875 + }, + { + "epoch": 0.09240506329113925, + "grad_norm": 0.7799399495124817, + "learning_rate": 0.0014745147160071944, + "loss": 1.9829, + "step": 876 + }, + { + "epoch": 0.09251054852320675, + "grad_norm": 0.8286166191101074, + "learning_rate": 0.0014744497843524266, + "loss": 1.9569, + "step": 877 + }, + { + "epoch": 0.09261603375527426, + "grad_norm": 0.9090811014175415, + "learning_rate": 0.001474384771519448, + "loss": 1.9356, + "step": 878 + }, + { + "epoch": 0.09272151898734177, + "grad_norm": 0.9315096735954285, + "learning_rate": 0.0014743196775155434, + "loss": 1.9993, + "step": 879 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.76627117395401, + "learning_rate": 0.0014742545023480075, + "loss": 1.9714, + "step": 880 + }, + { + "epoch": 0.0929324894514768, + "grad_norm": 0.7704630494117737, + "learning_rate": 0.001474189246024143, + "loss": 1.9993, + "step": 881 + }, + { + "epoch": 0.0930379746835443, + "grad_norm": 0.8186467885971069, + "learning_rate": 0.0014741239085512624, + "loss": 1.9445, + "step": 882 + }, + { + "epoch": 0.09314345991561182, + "grad_norm": 1.0847194194793701, + "learning_rate": 0.0014740584899366868, + "loss": 1.9005, + "step": 883 + }, + { + "epoch": 0.09324894514767933, + "grad_norm": 0.7882110476493835, + "learning_rate": 0.0014739929901877473, + "loss": 1.9451, + "step": 884 + }, + { + "epoch": 0.09335443037974683, + "grad_norm": 0.7622191309928894, + "learning_rate": 0.001473927409311783, + "loss": 2.0086, + "step": 885 + }, + { + "epoch": 0.09345991561181434, + "grad_norm": 1.118061900138855, + "learning_rate": 0.0014738617473161425, + "loss": 1.9231, + "step": 886 + }, + { + "epoch": 0.09356540084388186, + "grad_norm": 1.2210592031478882, + "learning_rate": 0.0014737960042081836, + "loss": 1.9476, + "step": 887 + }, + { + "epoch": 0.09367088607594937, + "grad_norm": 0.9136677980422974, + "learning_rate": 0.0014737301799952734, + "loss": 1.9847, + "step": 888 + }, + { + "epoch": 0.09377637130801687, + "grad_norm": 0.8415705561637878, + "learning_rate": 0.001473664274684788, + "loss": 1.9535, + "step": 889 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 1.0081201791763306, + "learning_rate": 0.0014735982882841117, + "loss": 1.973, + "step": 890 + }, + { + "epoch": 0.0939873417721519, + "grad_norm": 1.0097463130950928, + "learning_rate": 0.0014735322208006391, + "loss": 2.0087, + "step": 891 + }, + { + "epoch": 0.0940928270042194, + "grad_norm": 0.8645328879356384, + "learning_rate": 0.0014734660722417734, + "loss": 1.9699, + "step": 892 + }, + { + "epoch": 0.09419831223628691, + "grad_norm": 0.7445876002311707, + "learning_rate": 0.0014733998426149266, + "loss": 1.9337, + "step": 893 + }, + { + "epoch": 0.09430379746835443, + "grad_norm": 0.8839159607887268, + "learning_rate": 0.0014733335319275203, + "loss": 1.9845, + "step": 894 + }, + { + "epoch": 0.09440928270042194, + "grad_norm": 0.8625115752220154, + "learning_rate": 0.001473267140186985, + "loss": 1.9386, + "step": 895 + }, + { + "epoch": 0.09451476793248945, + "grad_norm": 0.6866080164909363, + "learning_rate": 0.00147320066740076, + "loss": 1.9382, + "step": 896 + }, + { + "epoch": 0.09462025316455697, + "grad_norm": 1.027549147605896, + "learning_rate": 0.001473134113576294, + "loss": 1.9473, + "step": 897 + }, + { + "epoch": 0.09472573839662447, + "grad_norm": 1.323302149772644, + "learning_rate": 0.0014730674787210448, + "loss": 1.9607, + "step": 898 + }, + { + "epoch": 0.09483122362869198, + "grad_norm": 0.7032807469367981, + "learning_rate": 0.0014730007628424792, + "loss": 1.9214, + "step": 899 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 1.2839338779449463, + "learning_rate": 0.0014729339659480727, + "loss": 1.9539, + "step": 900 + }, + { + "epoch": 0.095042194092827, + "grad_norm": 0.8145845532417297, + "learning_rate": 0.0014728670880453105, + "loss": 1.9693, + "step": 901 + }, + { + "epoch": 0.09514767932489451, + "grad_norm": 0.7844377756118774, + "learning_rate": 0.0014728001291416863, + "loss": 1.952, + "step": 902 + }, + { + "epoch": 0.09525316455696202, + "grad_norm": 0.9820501804351807, + "learning_rate": 0.001472733089244704, + "loss": 1.946, + "step": 903 + }, + { + "epoch": 0.09535864978902954, + "grad_norm": 0.8713457584381104, + "learning_rate": 0.0014726659683618746, + "loss": 1.9441, + "step": 904 + }, + { + "epoch": 0.09546413502109705, + "grad_norm": 0.6758168339729309, + "learning_rate": 0.0014725987665007202, + "loss": 1.9337, + "step": 905 + }, + { + "epoch": 0.09556962025316455, + "grad_norm": 0.7543720006942749, + "learning_rate": 0.0014725314836687708, + "loss": 1.9687, + "step": 906 + }, + { + "epoch": 0.09567510548523207, + "grad_norm": 0.6593600511550903, + "learning_rate": 0.0014724641198735659, + "loss": 1.9681, + "step": 907 + }, + { + "epoch": 0.09578059071729958, + "grad_norm": 0.6954530477523804, + "learning_rate": 0.0014723966751226535, + "loss": 1.9354, + "step": 908 + }, + { + "epoch": 0.09588607594936709, + "grad_norm": 0.8631108403205872, + "learning_rate": 0.0014723291494235916, + "loss": 1.95, + "step": 909 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.9900848865509033, + "learning_rate": 0.0014722615427839468, + "loss": 1.9934, + "step": 910 + }, + { + "epoch": 0.09609704641350211, + "grad_norm": 0.8290793299674988, + "learning_rate": 0.0014721938552112943, + "loss": 1.948, + "step": 911 + }, + { + "epoch": 0.09620253164556962, + "grad_norm": 0.6939495205879211, + "learning_rate": 0.0014721260867132193, + "loss": 1.9389, + "step": 912 + }, + { + "epoch": 0.09630801687763713, + "grad_norm": 0.7518351078033447, + "learning_rate": 0.0014720582372973155, + "loss": 1.9442, + "step": 913 + }, + { + "epoch": 0.09641350210970465, + "grad_norm": 0.9594983458518982, + "learning_rate": 0.0014719903069711857, + "loss": 1.9547, + "step": 914 + }, + { + "epoch": 0.09651898734177215, + "grad_norm": 0.9708054661750793, + "learning_rate": 0.0014719222957424417, + "loss": 1.9773, + "step": 915 + }, + { + "epoch": 0.09662447257383966, + "grad_norm": 0.8276053071022034, + "learning_rate": 0.0014718542036187049, + "loss": 1.9668, + "step": 916 + }, + { + "epoch": 0.09672995780590718, + "grad_norm": 0.7011005282402039, + "learning_rate": 0.0014717860306076049, + "loss": 1.9233, + "step": 917 + }, + { + "epoch": 0.09683544303797469, + "grad_norm": 0.7392288446426392, + "learning_rate": 0.0014717177767167812, + "loss": 1.9259, + "step": 918 + }, + { + "epoch": 0.09694092827004219, + "grad_norm": 0.7314950227737427, + "learning_rate": 0.0014716494419538815, + "loss": 1.9648, + "step": 919 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.6561670899391174, + "learning_rate": 0.0014715810263265633, + "loss": 1.8984, + "step": 920 + }, + { + "epoch": 0.09715189873417722, + "grad_norm": 0.7130864858627319, + "learning_rate": 0.0014715125298424934, + "loss": 1.958, + "step": 921 + }, + { + "epoch": 0.09725738396624473, + "grad_norm": 0.6721841096878052, + "learning_rate": 0.0014714439525093466, + "loss": 1.9261, + "step": 922 + }, + { + "epoch": 0.09736286919831223, + "grad_norm": 0.6888039708137512, + "learning_rate": 0.0014713752943348074, + "loss": 1.9092, + "step": 923 + }, + { + "epoch": 0.09746835443037975, + "grad_norm": 0.7490319013595581, + "learning_rate": 0.0014713065553265694, + "loss": 1.9439, + "step": 924 + }, + { + "epoch": 0.09757383966244726, + "grad_norm": 0.6794431805610657, + "learning_rate": 0.001471237735492335, + "loss": 1.9123, + "step": 925 + }, + { + "epoch": 0.09767932489451477, + "grad_norm": 0.7151462435722351, + "learning_rate": 0.0014711688348398161, + "loss": 1.9267, + "step": 926 + }, + { + "epoch": 0.09778481012658227, + "grad_norm": 0.7843958735466003, + "learning_rate": 0.001471099853376733, + "loss": 1.943, + "step": 927 + }, + { + "epoch": 0.09789029535864979, + "grad_norm": 0.736131489276886, + "learning_rate": 0.0014710307911108159, + "loss": 1.8998, + "step": 928 + }, + { + "epoch": 0.0979957805907173, + "grad_norm": 0.7866749167442322, + "learning_rate": 0.0014709616480498029, + "loss": 1.935, + "step": 929 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.7242915034294128, + "learning_rate": 0.0014708924242014423, + "loss": 1.9328, + "step": 930 + }, + { + "epoch": 0.09820675105485233, + "grad_norm": 0.7860813736915588, + "learning_rate": 0.001470823119573491, + "loss": 1.9133, + "step": 931 + }, + { + "epoch": 0.09831223628691983, + "grad_norm": 1.0148043632507324, + "learning_rate": 0.0014707537341737149, + "loss": 1.9788, + "step": 932 + }, + { + "epoch": 0.09841772151898734, + "grad_norm": 1.213636875152588, + "learning_rate": 0.0014706842680098887, + "loss": 1.9599, + "step": 933 + }, + { + "epoch": 0.09852320675105486, + "grad_norm": 0.721025288105011, + "learning_rate": 0.0014706147210897967, + "loss": 1.9462, + "step": 934 + }, + { + "epoch": 0.09862869198312237, + "grad_norm": 0.9639481902122498, + "learning_rate": 0.0014705450934212317, + "loss": 1.9491, + "step": 935 + }, + { + "epoch": 0.09873417721518987, + "grad_norm": 1.1914700269699097, + "learning_rate": 0.0014704753850119962, + "loss": 1.9264, + "step": 936 + }, + { + "epoch": 0.09883966244725738, + "grad_norm": 0.765422523021698, + "learning_rate": 0.001470405595869901, + "loss": 1.9489, + "step": 937 + }, + { + "epoch": 0.0989451476793249, + "grad_norm": 0.7902228832244873, + "learning_rate": 0.0014703357260027667, + "loss": 1.9332, + "step": 938 + }, + { + "epoch": 0.0990506329113924, + "grad_norm": 1.0714261531829834, + "learning_rate": 0.0014702657754184225, + "loss": 1.9364, + "step": 939 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.8271100521087646, + "learning_rate": 0.0014701957441247064, + "loss": 1.9273, + "step": 940 + }, + { + "epoch": 0.09926160337552743, + "grad_norm": 0.7538468837738037, + "learning_rate": 0.001470125632129466, + "loss": 1.9118, + "step": 941 + }, + { + "epoch": 0.09936708860759494, + "grad_norm": 1.141025424003601, + "learning_rate": 0.0014700554394405576, + "loss": 1.9396, + "step": 942 + }, + { + "epoch": 0.09947257383966245, + "grad_norm": 1.1188781261444092, + "learning_rate": 0.0014699851660658469, + "loss": 1.9162, + "step": 943 + }, + { + "epoch": 0.09957805907172995, + "grad_norm": 0.6984257102012634, + "learning_rate": 0.0014699148120132079, + "loss": 1.8848, + "step": 944 + }, + { + "epoch": 0.09968354430379747, + "grad_norm": 1.258985996246338, + "learning_rate": 0.0014698443772905247, + "loss": 1.9337, + "step": 945 + }, + { + "epoch": 0.09978902953586498, + "grad_norm": 0.9010317325592041, + "learning_rate": 0.0014697738619056891, + "loss": 1.9325, + "step": 946 + }, + { + "epoch": 0.09989451476793249, + "grad_norm": 0.8764107823371887, + "learning_rate": 0.0014697032658666036, + "loss": 1.881, + "step": 947 + }, + { + "epoch": 0.1, + "grad_norm": 1.47842276096344, + "learning_rate": 0.001469632589181178, + "loss": 1.9646, + "step": 948 + }, + { + "epoch": 0.10010548523206751, + "grad_norm": 0.7586141228675842, + "learning_rate": 0.0014695618318573327, + "loss": 1.9144, + "step": 949 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 1.4563822746276855, + "learning_rate": 0.0014694909939029959, + "loss": 1.9445, + "step": 950 + }, + { + "epoch": 0.10031645569620253, + "grad_norm": 0.8836003541946411, + "learning_rate": 0.0014694200753261057, + "loss": 1.956, + "step": 951 + }, + { + "epoch": 0.10042194092827005, + "grad_norm": 1.238711953163147, + "learning_rate": 0.0014693490761346086, + "loss": 1.9283, + "step": 952 + }, + { + "epoch": 0.10052742616033755, + "grad_norm": 0.8301349878311157, + "learning_rate": 0.0014692779963364606, + "loss": 1.9549, + "step": 953 + }, + { + "epoch": 0.10063291139240506, + "grad_norm": 0.9762710928916931, + "learning_rate": 0.0014692068359396264, + "loss": 1.9475, + "step": 954 + }, + { + "epoch": 0.10073839662447258, + "grad_norm": 0.9054292440414429, + "learning_rate": 0.00146913559495208, + "loss": 1.9326, + "step": 955 + }, + { + "epoch": 0.10084388185654009, + "grad_norm": 0.7704567313194275, + "learning_rate": 0.001469064273381804, + "loss": 1.9216, + "step": 956 + }, + { + "epoch": 0.10094936708860759, + "grad_norm": 0.9342378377914429, + "learning_rate": 0.0014689928712367907, + "loss": 1.9081, + "step": 957 + }, + { + "epoch": 0.10105485232067511, + "grad_norm": 0.7875632047653198, + "learning_rate": 0.0014689213885250411, + "loss": 1.9435, + "step": 958 + }, + { + "epoch": 0.10116033755274262, + "grad_norm": 0.8386056423187256, + "learning_rate": 0.001468849825254565, + "loss": 1.8861, + "step": 959 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.7715898752212524, + "learning_rate": 0.0014687781814333814, + "loss": 1.9019, + "step": 960 + }, + { + "epoch": 0.10137130801687763, + "grad_norm": 0.7895503044128418, + "learning_rate": 0.0014687064570695185, + "loss": 1.9249, + "step": 961 + }, + { + "epoch": 0.10147679324894515, + "grad_norm": 0.803543746471405, + "learning_rate": 0.0014686346521710133, + "loss": 1.8995, + "step": 962 + }, + { + "epoch": 0.10158227848101266, + "grad_norm": 0.7247439026832581, + "learning_rate": 0.0014685627667459118, + "loss": 1.9541, + "step": 963 + }, + { + "epoch": 0.10168776371308016, + "grad_norm": 0.7915191650390625, + "learning_rate": 0.0014684908008022694, + "loss": 1.9081, + "step": 964 + }, + { + "epoch": 0.10179324894514769, + "grad_norm": 0.8008511662483215, + "learning_rate": 0.00146841875434815, + "loss": 1.9714, + "step": 965 + }, + { + "epoch": 0.10189873417721519, + "grad_norm": 1.1038283109664917, + "learning_rate": 0.0014683466273916266, + "loss": 1.8954, + "step": 966 + }, + { + "epoch": 0.1020042194092827, + "grad_norm": 0.8921539783477783, + "learning_rate": 0.0014682744199407817, + "loss": 1.913, + "step": 967 + }, + { + "epoch": 0.1021097046413502, + "grad_norm": 0.6839750409126282, + "learning_rate": 0.0014682021320037064, + "loss": 1.9587, + "step": 968 + }, + { + "epoch": 0.10221518987341772, + "grad_norm": 1.1624159812927246, + "learning_rate": 0.0014681297635885011, + "loss": 1.909, + "step": 969 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.8827787637710571, + "learning_rate": 0.0014680573147032746, + "loss": 1.9342, + "step": 970 + }, + { + "epoch": 0.10242616033755274, + "grad_norm": 0.7449427247047424, + "learning_rate": 0.0014679847853561457, + "loss": 1.888, + "step": 971 + }, + { + "epoch": 0.10253164556962026, + "grad_norm": 0.8993370532989502, + "learning_rate": 0.0014679121755552412, + "loss": 1.8995, + "step": 972 + }, + { + "epoch": 0.10263713080168776, + "grad_norm": 0.696052610874176, + "learning_rate": 0.0014678394853086976, + "loss": 1.9011, + "step": 973 + }, + { + "epoch": 0.10274261603375527, + "grad_norm": 0.8271912336349487, + "learning_rate": 0.0014677667146246604, + "loss": 1.9122, + "step": 974 + }, + { + "epoch": 0.10284810126582279, + "grad_norm": 0.8416345715522766, + "learning_rate": 0.0014676938635112835, + "loss": 1.8965, + "step": 975 + }, + { + "epoch": 0.1029535864978903, + "grad_norm": 0.792177140712738, + "learning_rate": 0.0014676209319767306, + "loss": 1.9029, + "step": 976 + }, + { + "epoch": 0.1030590717299578, + "grad_norm": 0.7115939259529114, + "learning_rate": 0.0014675479200291738, + "loss": 1.9079, + "step": 977 + }, + { + "epoch": 0.10316455696202531, + "grad_norm": 0.6145343780517578, + "learning_rate": 0.0014674748276767944, + "loss": 1.8942, + "step": 978 + }, + { + "epoch": 0.10327004219409283, + "grad_norm": 0.6973080635070801, + "learning_rate": 0.0014674016549277831, + "loss": 1.8838, + "step": 979 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.6773556470870972, + "learning_rate": 0.0014673284017903392, + "loss": 1.9253, + "step": 980 + }, + { + "epoch": 0.10348101265822784, + "grad_norm": 0.6679510474205017, + "learning_rate": 0.001467255068272671, + "loss": 1.9054, + "step": 981 + }, + { + "epoch": 0.10358649789029536, + "grad_norm": 0.8198443055152893, + "learning_rate": 0.0014671816543829954, + "loss": 1.9398, + "step": 982 + }, + { + "epoch": 0.10369198312236287, + "grad_norm": 0.7803311347961426, + "learning_rate": 0.0014671081601295394, + "loss": 1.8895, + "step": 983 + }, + { + "epoch": 0.10379746835443038, + "grad_norm": 0.7099723815917969, + "learning_rate": 0.0014670345855205384, + "loss": 1.89, + "step": 984 + }, + { + "epoch": 0.10390295358649788, + "grad_norm": 1.0225074291229248, + "learning_rate": 0.0014669609305642366, + "loss": 1.9102, + "step": 985 + }, + { + "epoch": 0.1040084388185654, + "grad_norm": 0.9743515253067017, + "learning_rate": 0.0014668871952688873, + "loss": 1.907, + "step": 986 + }, + { + "epoch": 0.10411392405063291, + "grad_norm": 0.7221881151199341, + "learning_rate": 0.0014668133796427532, + "loss": 1.9342, + "step": 987 + }, + { + "epoch": 0.10421940928270042, + "grad_norm": 0.7963093519210815, + "learning_rate": 0.0014667394836941055, + "loss": 1.9007, + "step": 988 + }, + { + "epoch": 0.10432489451476794, + "grad_norm": 0.9664093852043152, + "learning_rate": 0.0014666655074312247, + "loss": 1.897, + "step": 989 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.8327805399894714, + "learning_rate": 0.0014665914508624, + "loss": 1.901, + "step": 990 + }, + { + "epoch": 0.10453586497890295, + "grad_norm": 0.677662193775177, + "learning_rate": 0.0014665173139959305, + "loss": 1.9636, + "step": 991 + }, + { + "epoch": 0.10464135021097046, + "grad_norm": 0.9592975974082947, + "learning_rate": 0.0014664430968401225, + "loss": 1.9076, + "step": 992 + }, + { + "epoch": 0.10474683544303798, + "grad_norm": 1.007272481918335, + "learning_rate": 0.0014663687994032931, + "loss": 1.8841, + "step": 993 + }, + { + "epoch": 0.10485232067510548, + "grad_norm": 0.8201162219047546, + "learning_rate": 0.0014662944216937677, + "loss": 1.9333, + "step": 994 + }, + { + "epoch": 0.10495780590717299, + "grad_norm": 0.7789984345436096, + "learning_rate": 0.0014662199637198807, + "loss": 1.8827, + "step": 995 + }, + { + "epoch": 0.10506329113924051, + "grad_norm": 0.885975182056427, + "learning_rate": 0.0014661454254899754, + "loss": 1.9154, + "step": 996 + }, + { + "epoch": 0.10516877637130802, + "grad_norm": 1.0825153589248657, + "learning_rate": 0.0014660708070124038, + "loss": 1.8926, + "step": 997 + }, + { + "epoch": 0.10527426160337552, + "grad_norm": 0.6798545122146606, + "learning_rate": 0.0014659961082955277, + "loss": 1.927, + "step": 998 + }, + { + "epoch": 0.10537974683544304, + "grad_norm": 0.9920344948768616, + "learning_rate": 0.0014659213293477177, + "loss": 1.9259, + "step": 999 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 1.1546260118484497, + "learning_rate": 0.0014658464701773526, + "loss": 1.9504, + "step": 1000 + }, + { + "epoch": 0.10559071729957806, + "grad_norm": 0.6979643106460571, + "learning_rate": 0.0014657715307928212, + "loss": 1.8937, + "step": 1001 + }, + { + "epoch": 0.10569620253164556, + "grad_norm": 1.1849744319915771, + "learning_rate": 0.0014656965112025203, + "loss": 1.9082, + "step": 1002 + }, + { + "epoch": 0.10580168776371308, + "grad_norm": 0.7852225303649902, + "learning_rate": 0.0014656214114148567, + "loss": 1.9486, + "step": 1003 + }, + { + "epoch": 0.10590717299578059, + "grad_norm": 1.0831913948059082, + "learning_rate": 0.0014655462314382456, + "loss": 1.8754, + "step": 1004 + }, + { + "epoch": 0.1060126582278481, + "grad_norm": 1.0383925437927246, + "learning_rate": 0.0014654709712811113, + "loss": 1.9104, + "step": 1005 + }, + { + "epoch": 0.10611814345991562, + "grad_norm": 0.9360597729682922, + "learning_rate": 0.0014653956309518866, + "loss": 1.9152, + "step": 1006 + }, + { + "epoch": 0.10622362869198312, + "grad_norm": 0.7593399882316589, + "learning_rate": 0.0014653202104590146, + "loss": 1.9371, + "step": 1007 + }, + { + "epoch": 0.10632911392405063, + "grad_norm": 0.849263608455658, + "learning_rate": 0.0014652447098109458, + "loss": 1.9008, + "step": 1008 + }, + { + "epoch": 0.10643459915611814, + "grad_norm": 0.78011554479599, + "learning_rate": 0.001465169129016141, + "loss": 1.9257, + "step": 1009 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.7586991190910339, + "learning_rate": 0.0014650934680830688, + "loss": 1.9114, + "step": 1010 + }, + { + "epoch": 0.10664556962025316, + "grad_norm": 0.8545921444892883, + "learning_rate": 0.001465017727020208, + "loss": 1.887, + "step": 1011 + }, + { + "epoch": 0.10675105485232067, + "grad_norm": 0.7554069757461548, + "learning_rate": 0.0014649419058360455, + "loss": 1.883, + "step": 1012 + }, + { + "epoch": 0.10685654008438819, + "grad_norm": 0.7838428020477295, + "learning_rate": 0.0014648660045390772, + "loss": 1.8538, + "step": 1013 + }, + { + "epoch": 0.1069620253164557, + "grad_norm": 0.8562689423561096, + "learning_rate": 0.0014647900231378086, + "loss": 1.9052, + "step": 1014 + }, + { + "epoch": 0.1070675105485232, + "grad_norm": 0.7297021150588989, + "learning_rate": 0.0014647139616407539, + "loss": 1.89, + "step": 1015 + }, + { + "epoch": 0.10717299578059072, + "grad_norm": 0.7992454171180725, + "learning_rate": 0.0014646378200564355, + "loss": 1.8555, + "step": 1016 + }, + { + "epoch": 0.10727848101265823, + "grad_norm": 0.7591902017593384, + "learning_rate": 0.001464561598393386, + "loss": 1.923, + "step": 1017 + }, + { + "epoch": 0.10738396624472574, + "grad_norm": 0.7708244323730469, + "learning_rate": 0.0014644852966601463, + "loss": 1.9226, + "step": 1018 + }, + { + "epoch": 0.10748945147679324, + "grad_norm": 1.2132785320281982, + "learning_rate": 0.0014644089148652664, + "loss": 1.9718, + "step": 1019 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 1.022781252861023, + "learning_rate": 0.0014643324530173051, + "loss": 1.8827, + "step": 1020 + }, + { + "epoch": 0.10770042194092827, + "grad_norm": 0.8327024579048157, + "learning_rate": 0.0014642559111248306, + "loss": 1.8827, + "step": 1021 + }, + { + "epoch": 0.10780590717299578, + "grad_norm": 1.1982691287994385, + "learning_rate": 0.0014641792891964195, + "loss": 1.8975, + "step": 1022 + }, + { + "epoch": 0.1079113924050633, + "grad_norm": 0.80882328748703, + "learning_rate": 0.0014641025872406581, + "loss": 1.8811, + "step": 1023 + }, + { + "epoch": 0.1080168776371308, + "grad_norm": 1.0127649307250977, + "learning_rate": 0.0014640258052661405, + "loss": 1.9053, + "step": 1024 + }, + { + "epoch": 0.10812236286919831, + "grad_norm": 1.0082775354385376, + "learning_rate": 0.0014639489432814712, + "loss": 1.9457, + "step": 1025 + }, + { + "epoch": 0.10822784810126582, + "grad_norm": 0.7998855710029602, + "learning_rate": 0.001463872001295263, + "loss": 1.8622, + "step": 1026 + }, + { + "epoch": 0.10833333333333334, + "grad_norm": 1.2898708581924438, + "learning_rate": 0.0014637949793161371, + "loss": 1.9258, + "step": 1027 + }, + { + "epoch": 0.10843881856540084, + "grad_norm": 0.6974481344223022, + "learning_rate": 0.0014637178773527246, + "loss": 1.8576, + "step": 1028 + }, + { + "epoch": 0.10854430379746835, + "grad_norm": 1.245059609413147, + "learning_rate": 0.001463640695413665, + "loss": 1.9357, + "step": 1029 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.7987862825393677, + "learning_rate": 0.0014635634335076067, + "loss": 1.905, + "step": 1030 + }, + { + "epoch": 0.10875527426160338, + "grad_norm": 0.8628827929496765, + "learning_rate": 0.0014634860916432077, + "loss": 1.8642, + "step": 1031 + }, + { + "epoch": 0.10886075949367088, + "grad_norm": 0.9531648755073547, + "learning_rate": 0.0014634086698291345, + "loss": 1.8527, + "step": 1032 + }, + { + "epoch": 0.10896624472573839, + "grad_norm": 0.7528793215751648, + "learning_rate": 0.0014633311680740625, + "loss": 1.9014, + "step": 1033 + }, + { + "epoch": 0.10907172995780591, + "grad_norm": 0.9317465424537659, + "learning_rate": 0.0014632535863866756, + "loss": 1.9323, + "step": 1034 + }, + { + "epoch": 0.10917721518987342, + "grad_norm": 0.794512152671814, + "learning_rate": 0.0014631759247756683, + "loss": 1.9155, + "step": 1035 + }, + { + "epoch": 0.10928270042194092, + "grad_norm": 0.801597535610199, + "learning_rate": 0.0014630981832497421, + "loss": 1.8619, + "step": 1036 + }, + { + "epoch": 0.10938818565400844, + "grad_norm": 0.911699116230011, + "learning_rate": 0.0014630203618176088, + "loss": 1.8865, + "step": 1037 + }, + { + "epoch": 0.10949367088607595, + "grad_norm": 0.8099789023399353, + "learning_rate": 0.0014629424604879885, + "loss": 1.9135, + "step": 1038 + }, + { + "epoch": 0.10959915611814346, + "grad_norm": 0.747134268283844, + "learning_rate": 0.0014628644792696105, + "loss": 1.8751, + "step": 1039 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.929955780506134, + "learning_rate": 0.001462786418171213, + "loss": 1.8916, + "step": 1040 + }, + { + "epoch": 0.10981012658227848, + "grad_norm": 0.9559035301208496, + "learning_rate": 0.0014627082772015428, + "loss": 1.9074, + "step": 1041 + }, + { + "epoch": 0.10991561181434599, + "grad_norm": 0.7645518779754639, + "learning_rate": 0.0014626300563693566, + "loss": 1.8964, + "step": 1042 + }, + { + "epoch": 0.1100210970464135, + "grad_norm": 0.8639316558837891, + "learning_rate": 0.0014625517556834187, + "loss": 1.8918, + "step": 1043 + }, + { + "epoch": 0.11012658227848102, + "grad_norm": 0.7517715692520142, + "learning_rate": 0.0014624733751525036, + "loss": 1.8753, + "step": 1044 + }, + { + "epoch": 0.11023206751054852, + "grad_norm": 0.7472096681594849, + "learning_rate": 0.001462394914785394, + "loss": 1.9031, + "step": 1045 + }, + { + "epoch": 0.11033755274261603, + "grad_norm": 0.7880653142929077, + "learning_rate": 0.0014623163745908821, + "loss": 1.9057, + "step": 1046 + }, + { + "epoch": 0.11044303797468355, + "grad_norm": 1.0170305967330933, + "learning_rate": 0.0014622377545777687, + "loss": 1.8954, + "step": 1047 + }, + { + "epoch": 0.11054852320675106, + "grad_norm": 0.732809841632843, + "learning_rate": 0.001462159054754863, + "loss": 1.8901, + "step": 1048 + }, + { + "epoch": 0.11065400843881856, + "grad_norm": 0.9893458485603333, + "learning_rate": 0.0014620802751309841, + "loss": 1.8824, + "step": 1049 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.2814258337020874, + "learning_rate": 0.0014620014157149597, + "loss": 1.9134, + "step": 1050 + }, + { + "epoch": 0.11086497890295359, + "grad_norm": 0.7590961456298828, + "learning_rate": 0.0014619224765156263, + "loss": 1.8918, + "step": 1051 + }, + { + "epoch": 0.1109704641350211, + "grad_norm": 1.3801941871643066, + "learning_rate": 0.0014618434575418293, + "loss": 1.8991, + "step": 1052 + }, + { + "epoch": 0.1110759493670886, + "grad_norm": 0.7779123187065125, + "learning_rate": 0.0014617643588024237, + "loss": 1.9071, + "step": 1053 + }, + { + "epoch": 0.11118143459915612, + "grad_norm": 1.2338576316833496, + "learning_rate": 0.001461685180306272, + "loss": 1.9026, + "step": 1054 + }, + { + "epoch": 0.11128691983122363, + "grad_norm": 0.8361676931381226, + "learning_rate": 0.0014616059220622475, + "loss": 1.8717, + "step": 1055 + }, + { + "epoch": 0.11139240506329114, + "grad_norm": 0.9507809281349182, + "learning_rate": 0.0014615265840792308, + "loss": 1.863, + "step": 1056 + }, + { + "epoch": 0.11149789029535866, + "grad_norm": 0.7300770878791809, + "learning_rate": 0.0014614471663661123, + "loss": 1.8865, + "step": 1057 + }, + { + "epoch": 0.11160337552742616, + "grad_norm": 0.8910831212997437, + "learning_rate": 0.0014613676689317916, + "loss": 1.8735, + "step": 1058 + }, + { + "epoch": 0.11170886075949367, + "grad_norm": 0.8112926483154297, + "learning_rate": 0.001461288091785176, + "loss": 1.8501, + "step": 1059 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.7803376913070679, + "learning_rate": 0.001461208434935183, + "loss": 1.8675, + "step": 1060 + }, + { + "epoch": 0.1119198312236287, + "grad_norm": 0.9136650562286377, + "learning_rate": 0.0014611286983907384, + "loss": 1.9109, + "step": 1061 + }, + { + "epoch": 0.1120253164556962, + "grad_norm": 0.7617851495742798, + "learning_rate": 0.0014610488821607775, + "loss": 1.8466, + "step": 1062 + }, + { + "epoch": 0.11213080168776371, + "grad_norm": 0.9087613224983215, + "learning_rate": 0.0014609689862542434, + "loss": 1.8879, + "step": 1063 + }, + { + "epoch": 0.11223628691983123, + "grad_norm": 0.7564509510993958, + "learning_rate": 0.0014608890106800893, + "loss": 1.8488, + "step": 1064 + }, + { + "epoch": 0.11234177215189874, + "grad_norm": 0.7629596590995789, + "learning_rate": 0.0014608089554472767, + "loss": 1.8605, + "step": 1065 + }, + { + "epoch": 0.11244725738396624, + "grad_norm": 0.6972772479057312, + "learning_rate": 0.0014607288205647762, + "loss": 1.8395, + "step": 1066 + }, + { + "epoch": 0.11255274261603375, + "grad_norm": 0.7764860987663269, + "learning_rate": 0.0014606486060415673, + "loss": 1.8857, + "step": 1067 + }, + { + "epoch": 0.11265822784810127, + "grad_norm": 0.7426968216896057, + "learning_rate": 0.0014605683118866387, + "loss": 1.8494, + "step": 1068 + }, + { + "epoch": 0.11276371308016878, + "grad_norm": 0.6473240852355957, + "learning_rate": 0.0014604879381089873, + "loss": 1.8879, + "step": 1069 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.6940961480140686, + "learning_rate": 0.0014604074847176197, + "loss": 1.8876, + "step": 1070 + }, + { + "epoch": 0.1129746835443038, + "grad_norm": 0.7396560311317444, + "learning_rate": 0.0014603269517215512, + "loss": 1.8426, + "step": 1071 + }, + { + "epoch": 0.11308016877637131, + "grad_norm": 0.7561905980110168, + "learning_rate": 0.0014602463391298055, + "loss": 1.9071, + "step": 1072 + }, + { + "epoch": 0.11318565400843882, + "grad_norm": 0.7491395473480225, + "learning_rate": 0.0014601656469514159, + "loss": 1.8565, + "step": 1073 + }, + { + "epoch": 0.11329113924050632, + "grad_norm": 0.7414562702178955, + "learning_rate": 0.0014600848751954248, + "loss": 1.848, + "step": 1074 + }, + { + "epoch": 0.11339662447257384, + "grad_norm": 0.695555567741394, + "learning_rate": 0.001460004023870882, + "loss": 1.8888, + "step": 1075 + }, + { + "epoch": 0.11350210970464135, + "grad_norm": 0.9530956149101257, + "learning_rate": 0.0014599230929868482, + "loss": 1.9049, + "step": 1076 + }, + { + "epoch": 0.11360759493670886, + "grad_norm": 1.026656150817871, + "learning_rate": 0.0014598420825523918, + "loss": 1.8914, + "step": 1077 + }, + { + "epoch": 0.11371308016877638, + "grad_norm": 0.7768730521202087, + "learning_rate": 0.0014597609925765906, + "loss": 1.8387, + "step": 1078 + }, + { + "epoch": 0.11381856540084388, + "grad_norm": 1.1126939058303833, + "learning_rate": 0.0014596798230685308, + "loss": 1.8847, + "step": 1079 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 1.0613808631896973, + "learning_rate": 0.0014595985740373082, + "loss": 1.8708, + "step": 1080 + }, + { + "epoch": 0.11402953586497891, + "grad_norm": 0.815146267414093, + "learning_rate": 0.001459517245492027, + "loss": 1.8733, + "step": 1081 + }, + { + "epoch": 0.11413502109704642, + "grad_norm": 0.9056951999664307, + "learning_rate": 0.0014594358374418004, + "loss": 1.8939, + "step": 1082 + }, + { + "epoch": 0.11424050632911392, + "grad_norm": 0.8486618399620056, + "learning_rate": 0.0014593543498957506, + "loss": 1.8611, + "step": 1083 + }, + { + "epoch": 0.11434599156118143, + "grad_norm": 0.8695193529129028, + "learning_rate": 0.0014592727828630088, + "loss": 1.884, + "step": 1084 + }, + { + "epoch": 0.11445147679324895, + "grad_norm": 0.7958950400352478, + "learning_rate": 0.001459191136352715, + "loss": 1.8309, + "step": 1085 + }, + { + "epoch": 0.11455696202531646, + "grad_norm": 0.8478586077690125, + "learning_rate": 0.0014591094103740179, + "loss": 1.8442, + "step": 1086 + }, + { + "epoch": 0.11466244725738396, + "grad_norm": 0.7614657282829285, + "learning_rate": 0.0014590276049360755, + "loss": 1.9166, + "step": 1087 + }, + { + "epoch": 0.11476793248945148, + "grad_norm": 0.890357255935669, + "learning_rate": 0.0014589457200480543, + "loss": 1.9046, + "step": 1088 + }, + { + "epoch": 0.11487341772151899, + "grad_norm": 0.7359107732772827, + "learning_rate": 0.0014588637557191302, + "loss": 1.9007, + "step": 1089 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.8927121758460999, + "learning_rate": 0.0014587817119584873, + "loss": 1.8747, + "step": 1090 + }, + { + "epoch": 0.115084388185654, + "grad_norm": 0.7666681408882141, + "learning_rate": 0.0014586995887753197, + "loss": 1.8287, + "step": 1091 + }, + { + "epoch": 0.11518987341772152, + "grad_norm": 0.7137439250946045, + "learning_rate": 0.001458617386178829, + "loss": 1.8294, + "step": 1092 + }, + { + "epoch": 0.11529535864978903, + "grad_norm": 0.8428443670272827, + "learning_rate": 0.001458535104178227, + "loss": 1.8573, + "step": 1093 + }, + { + "epoch": 0.11540084388185654, + "grad_norm": 0.7317948341369629, + "learning_rate": 0.001458452742782733, + "loss": 1.8834, + "step": 1094 + }, + { + "epoch": 0.11550632911392406, + "grad_norm": 0.7039938569068909, + "learning_rate": 0.0014583703020015768, + "loss": 1.8748, + "step": 1095 + }, + { + "epoch": 0.11561181434599156, + "grad_norm": 0.8052597045898438, + "learning_rate": 0.001458287781843996, + "loss": 1.8868, + "step": 1096 + }, + { + "epoch": 0.11571729957805907, + "grad_norm": 0.7247453927993774, + "learning_rate": 0.0014582051823192374, + "loss": 1.8637, + "step": 1097 + }, + { + "epoch": 0.11582278481012659, + "grad_norm": 0.7838426828384399, + "learning_rate": 0.0014581225034365564, + "loss": 1.9018, + "step": 1098 + }, + { + "epoch": 0.1159282700421941, + "grad_norm": 0.7044827342033386, + "learning_rate": 0.0014580397452052182, + "loss": 1.8515, + "step": 1099 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.7636606097221375, + "learning_rate": 0.001457956907634496, + "loss": 1.8632, + "step": 1100 + }, + { + "epoch": 0.11613924050632911, + "grad_norm": 0.7147975564002991, + "learning_rate": 0.001457873990733672, + "loss": 1.8668, + "step": 1101 + }, + { + "epoch": 0.11624472573839663, + "grad_norm": 0.7568527460098267, + "learning_rate": 0.0014577909945120376, + "loss": 1.8558, + "step": 1102 + }, + { + "epoch": 0.11635021097046414, + "grad_norm": 0.7343026399612427, + "learning_rate": 0.001457707918978893, + "loss": 1.8799, + "step": 1103 + }, + { + "epoch": 0.11645569620253164, + "grad_norm": 0.7024850249290466, + "learning_rate": 0.0014576247641435469, + "loss": 1.8839, + "step": 1104 + }, + { + "epoch": 0.11656118143459916, + "grad_norm": 0.6938062906265259, + "learning_rate": 0.0014575415300153174, + "loss": 1.8932, + "step": 1105 + }, + { + "epoch": 0.11666666666666667, + "grad_norm": 0.7227219939231873, + "learning_rate": 0.0014574582166035314, + "loss": 1.8673, + "step": 1106 + }, + { + "epoch": 0.11677215189873418, + "grad_norm": 0.7388744950294495, + "learning_rate": 0.0014573748239175247, + "loss": 1.859, + "step": 1107 + }, + { + "epoch": 0.11687763713080168, + "grad_norm": 0.9176732301712036, + "learning_rate": 0.0014572913519666417, + "loss": 1.856, + "step": 1108 + }, + { + "epoch": 0.1169831223628692, + "grad_norm": 0.8050602078437805, + "learning_rate": 0.0014572078007602355, + "loss": 1.8823, + "step": 1109 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.6389517784118652, + "learning_rate": 0.0014571241703076692, + "loss": 1.8476, + "step": 1110 + }, + { + "epoch": 0.11719409282700421, + "grad_norm": 0.6852282881736755, + "learning_rate": 0.0014570404606183132, + "loss": 1.8942, + "step": 1111 + }, + { + "epoch": 0.11729957805907174, + "grad_norm": 0.6750139594078064, + "learning_rate": 0.0014569566717015483, + "loss": 1.8387, + "step": 1112 + }, + { + "epoch": 0.11740506329113924, + "grad_norm": 0.7424336075782776, + "learning_rate": 0.0014568728035667627, + "loss": 1.8839, + "step": 1113 + }, + { + "epoch": 0.11751054852320675, + "grad_norm": 0.745366096496582, + "learning_rate": 0.001456788856223355, + "loss": 1.8405, + "step": 1114 + }, + { + "epoch": 0.11761603375527427, + "grad_norm": 0.7880332469940186, + "learning_rate": 0.0014567048296807315, + "loss": 1.8938, + "step": 1115 + }, + { + "epoch": 0.11772151898734177, + "grad_norm": 0.7267454266548157, + "learning_rate": 0.0014566207239483078, + "loss": 1.855, + "step": 1116 + }, + { + "epoch": 0.11782700421940928, + "grad_norm": 0.6588610410690308, + "learning_rate": 0.0014565365390355087, + "loss": 1.8535, + "step": 1117 + }, + { + "epoch": 0.11793248945147679, + "grad_norm": 0.6804662346839905, + "learning_rate": 0.001456452274951767, + "loss": 1.8891, + "step": 1118 + }, + { + "epoch": 0.11803797468354431, + "grad_norm": 0.6700633764266968, + "learning_rate": 0.0014563679317065254, + "loss": 1.8535, + "step": 1119 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.6378878355026245, + "learning_rate": 0.0014562835093092348, + "loss": 1.8322, + "step": 1120 + }, + { + "epoch": 0.11824894514767932, + "grad_norm": 0.6520585417747498, + "learning_rate": 0.0014561990077693553, + "loss": 1.8085, + "step": 1121 + }, + { + "epoch": 0.11835443037974684, + "grad_norm": 0.7228946685791016, + "learning_rate": 0.0014561144270963551, + "loss": 1.8678, + "step": 1122 + }, + { + "epoch": 0.11845991561181435, + "grad_norm": 0.819640040397644, + "learning_rate": 0.0014560297672997127, + "loss": 1.8505, + "step": 1123 + }, + { + "epoch": 0.11856540084388185, + "grad_norm": 0.8168421387672424, + "learning_rate": 0.001455945028388914, + "loss": 1.831, + "step": 1124 + }, + { + "epoch": 0.11867088607594936, + "grad_norm": 0.9419547319412231, + "learning_rate": 0.001455860210373455, + "loss": 1.8648, + "step": 1125 + }, + { + "epoch": 0.11877637130801688, + "grad_norm": 1.2119942903518677, + "learning_rate": 0.0014557753132628396, + "loss": 1.8376, + "step": 1126 + }, + { + "epoch": 0.11888185654008439, + "grad_norm": 1.0030168294906616, + "learning_rate": 0.0014556903370665807, + "loss": 1.8569, + "step": 1127 + }, + { + "epoch": 0.1189873417721519, + "grad_norm": 0.6473171710968018, + "learning_rate": 0.0014556052817942013, + "loss": 1.8516, + "step": 1128 + }, + { + "epoch": 0.11909282700421941, + "grad_norm": 0.9784462451934814, + "learning_rate": 0.001455520147455231, + "loss": 1.8702, + "step": 1129 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.0950433015823364, + "learning_rate": 0.0014554349340592104, + "loss": 1.844, + "step": 1130 + }, + { + "epoch": 0.11930379746835443, + "grad_norm": 0.9117785096168518, + "learning_rate": 0.001455349641615688, + "loss": 1.8973, + "step": 1131 + }, + { + "epoch": 0.11940928270042193, + "grad_norm": 0.8083338141441345, + "learning_rate": 0.001455264270134221, + "loss": 1.8332, + "step": 1132 + }, + { + "epoch": 0.11951476793248945, + "grad_norm": 0.7288335561752319, + "learning_rate": 0.0014551788196243754, + "loss": 1.862, + "step": 1133 + }, + { + "epoch": 0.11962025316455696, + "grad_norm": 0.673681914806366, + "learning_rate": 0.0014550932900957271, + "loss": 1.8727, + "step": 1134 + }, + { + "epoch": 0.11972573839662447, + "grad_norm": 0.7379921078681946, + "learning_rate": 0.0014550076815578595, + "loss": 1.8467, + "step": 1135 + }, + { + "epoch": 0.11983122362869199, + "grad_norm": 1.0027583837509155, + "learning_rate": 0.0014549219940203659, + "loss": 1.817, + "step": 1136 + }, + { + "epoch": 0.1199367088607595, + "grad_norm": 0.9040977358818054, + "learning_rate": 0.0014548362274928476, + "loss": 1.8816, + "step": 1137 + }, + { + "epoch": 0.120042194092827, + "grad_norm": 0.6784898638725281, + "learning_rate": 0.0014547503819849154, + "loss": 1.8647, + "step": 1138 + }, + { + "epoch": 0.12014767932489452, + "grad_norm": 0.7737665176391602, + "learning_rate": 0.001454664457506189, + "loss": 1.8485, + "step": 1139 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.885465145111084, + "learning_rate": 0.001454578454066296, + "loss": 1.8813, + "step": 1140 + }, + { + "epoch": 0.12035864978902953, + "grad_norm": 0.9551668763160706, + "learning_rate": 0.001454492371674874, + "loss": 1.7915, + "step": 1141 + }, + { + "epoch": 0.12046413502109704, + "grad_norm": 1.1440515518188477, + "learning_rate": 0.0014544062103415687, + "loss": 1.8478, + "step": 1142 + }, + { + "epoch": 0.12056962025316456, + "grad_norm": 0.7924839854240417, + "learning_rate": 0.0014543199700760353, + "loss": 1.901, + "step": 1143 + }, + { + "epoch": 0.12067510548523207, + "grad_norm": 0.7156087160110474, + "learning_rate": 0.0014542336508879372, + "loss": 1.8145, + "step": 1144 + }, + { + "epoch": 0.12078059071729957, + "grad_norm": 0.8546972870826721, + "learning_rate": 0.0014541472527869468, + "loss": 1.8434, + "step": 1145 + }, + { + "epoch": 0.1208860759493671, + "grad_norm": 0.7220061421394348, + "learning_rate": 0.0014540607757827456, + "loss": 1.8438, + "step": 1146 + }, + { + "epoch": 0.1209915611814346, + "grad_norm": 0.9475833773612976, + "learning_rate": 0.0014539742198850234, + "loss": 1.8327, + "step": 1147 + }, + { + "epoch": 0.12109704641350211, + "grad_norm": 1.009262204170227, + "learning_rate": 0.0014538875851034798, + "loss": 1.8887, + "step": 1148 + }, + { + "epoch": 0.12120253164556961, + "grad_norm": 0.7780335545539856, + "learning_rate": 0.0014538008714478224, + "loss": 1.8521, + "step": 1149 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.981829822063446, + "learning_rate": 0.0014537140789277678, + "loss": 1.8491, + "step": 1150 + }, + { + "epoch": 0.12141350210970464, + "grad_norm": 0.9665805101394653, + "learning_rate": 0.0014536272075530417, + "loss": 1.854, + "step": 1151 + }, + { + "epoch": 0.12151898734177215, + "grad_norm": 0.7972895503044128, + "learning_rate": 0.0014535402573333783, + "loss": 1.8946, + "step": 1152 + }, + { + "epoch": 0.12162447257383967, + "grad_norm": 1.0549323558807373, + "learning_rate": 0.001453453228278521, + "loss": 1.8529, + "step": 1153 + }, + { + "epoch": 0.12172995780590717, + "grad_norm": 0.7972485423088074, + "learning_rate": 0.0014533661203982215, + "loss": 1.8489, + "step": 1154 + }, + { + "epoch": 0.12183544303797468, + "grad_norm": 1.0588091611862183, + "learning_rate": 0.0014532789337022413, + "loss": 1.8625, + "step": 1155 + }, + { + "epoch": 0.1219409282700422, + "grad_norm": 0.9616856575012207, + "learning_rate": 0.0014531916682003494, + "loss": 1.8568, + "step": 1156 + }, + { + "epoch": 0.12204641350210971, + "grad_norm": 0.8743852376937866, + "learning_rate": 0.0014531043239023247, + "loss": 1.8484, + "step": 1157 + }, + { + "epoch": 0.12215189873417721, + "grad_norm": 0.7445603013038635, + "learning_rate": 0.0014530169008179546, + "loss": 1.8425, + "step": 1158 + }, + { + "epoch": 0.12225738396624472, + "grad_norm": 0.8883669972419739, + "learning_rate": 0.001452929398957035, + "loss": 1.861, + "step": 1159 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.7757483124732971, + "learning_rate": 0.0014528418183293716, + "loss": 1.8277, + "step": 1160 + }, + { + "epoch": 0.12246835443037975, + "grad_norm": 0.712805449962616, + "learning_rate": 0.0014527541589447774, + "loss": 1.8625, + "step": 1161 + }, + { + "epoch": 0.12257383966244725, + "grad_norm": 0.8384401202201843, + "learning_rate": 0.0014526664208130756, + "loss": 1.8763, + "step": 1162 + }, + { + "epoch": 0.12267932489451477, + "grad_norm": 0.7890302538871765, + "learning_rate": 0.0014525786039440971, + "loss": 1.8379, + "step": 1163 + }, + { + "epoch": 0.12278481012658228, + "grad_norm": 0.7029653787612915, + "learning_rate": 0.001452490708347683, + "loss": 1.8333, + "step": 1164 + }, + { + "epoch": 0.12289029535864979, + "grad_norm": 0.7023031711578369, + "learning_rate": 0.0014524027340336821, + "loss": 1.8407, + "step": 1165 + }, + { + "epoch": 0.1229957805907173, + "grad_norm": 0.6411996483802795, + "learning_rate": 0.0014523146810119525, + "loss": 1.815, + "step": 1166 + }, + { + "epoch": 0.12310126582278481, + "grad_norm": 0.6939812898635864, + "learning_rate": 0.0014522265492923608, + "loss": 1.8726, + "step": 1167 + }, + { + "epoch": 0.12320675105485232, + "grad_norm": 0.685781717300415, + "learning_rate": 0.0014521383388847824, + "loss": 1.8117, + "step": 1168 + }, + { + "epoch": 0.12331223628691983, + "grad_norm": 0.6645174026489258, + "learning_rate": 0.0014520500497991022, + "loss": 1.8468, + "step": 1169 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.7869470715522766, + "learning_rate": 0.001451961682045213, + "loss": 1.8172, + "step": 1170 + }, + { + "epoch": 0.12352320675105485, + "grad_norm": 0.6968040466308594, + "learning_rate": 0.001451873235633017, + "loss": 1.8567, + "step": 1171 + }, + { + "epoch": 0.12362869198312236, + "grad_norm": 0.7278109788894653, + "learning_rate": 0.0014517847105724251, + "loss": 1.8297, + "step": 1172 + }, + { + "epoch": 0.12373417721518987, + "grad_norm": 0.8673883676528931, + "learning_rate": 0.0014516961068733569, + "loss": 1.7994, + "step": 1173 + }, + { + "epoch": 0.12383966244725739, + "grad_norm": 0.7885510325431824, + "learning_rate": 0.0014516074245457412, + "loss": 1.8409, + "step": 1174 + }, + { + "epoch": 0.1239451476793249, + "grad_norm": 0.9063760042190552, + "learning_rate": 0.001451518663599515, + "loss": 1.8177, + "step": 1175 + }, + { + "epoch": 0.1240506329113924, + "grad_norm": 0.9025067090988159, + "learning_rate": 0.0014514298240446244, + "loss": 1.7757, + "step": 1176 + }, + { + "epoch": 0.12415611814345992, + "grad_norm": 0.797467827796936, + "learning_rate": 0.0014513409058910243, + "loss": 1.8026, + "step": 1177 + }, + { + "epoch": 0.12426160337552743, + "grad_norm": 0.7283565402030945, + "learning_rate": 0.0014512519091486786, + "loss": 1.8549, + "step": 1178 + }, + { + "epoch": 0.12436708860759493, + "grad_norm": 0.8877585530281067, + "learning_rate": 0.0014511628338275597, + "loss": 1.8668, + "step": 1179 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.8357267379760742, + "learning_rate": 0.001451073679937649, + "loss": 1.8431, + "step": 1180 + }, + { + "epoch": 0.12457805907172996, + "grad_norm": 0.6661786437034607, + "learning_rate": 0.0014509844474889365, + "loss": 1.8235, + "step": 1181 + }, + { + "epoch": 0.12468354430379747, + "grad_norm": 0.9371480941772461, + "learning_rate": 0.0014508951364914213, + "loss": 1.8189, + "step": 1182 + }, + { + "epoch": 0.12478902953586497, + "grad_norm": 0.8830227255821228, + "learning_rate": 0.001450805746955111, + "loss": 1.8426, + "step": 1183 + }, + { + "epoch": 0.1248945147679325, + "grad_norm": 0.6775006651878357, + "learning_rate": 0.001450716278890022, + "loss": 1.8492, + "step": 1184 + }, + { + "epoch": 0.125, + "grad_norm": 0.722724199295044, + "learning_rate": 0.0014506267323061803, + "loss": 1.85, + "step": 1185 + }, + { + "epoch": 0.12510548523206752, + "grad_norm": 0.6884028911590576, + "learning_rate": 0.0014505371072136195, + "loss": 1.8387, + "step": 1186 + }, + { + "epoch": 0.125210970464135, + "grad_norm": 0.7559560537338257, + "learning_rate": 0.0014504474036223826, + "loss": 1.852, + "step": 1187 + }, + { + "epoch": 0.12531645569620253, + "grad_norm": 0.7603992223739624, + "learning_rate": 0.0014503576215425212, + "loss": 1.8437, + "step": 1188 + }, + { + "epoch": 0.12542194092827005, + "grad_norm": 0.7782211303710938, + "learning_rate": 0.0014502677609840964, + "loss": 1.8121, + "step": 1189 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.9399582147598267, + "learning_rate": 0.0014501778219571766, + "loss": 1.8011, + "step": 1190 + }, + { + "epoch": 0.12563291139240507, + "grad_norm": 0.8218911290168762, + "learning_rate": 0.0014500878044718408, + "loss": 1.8293, + "step": 1191 + }, + { + "epoch": 0.1257383966244726, + "grad_norm": 0.7567782998085022, + "learning_rate": 0.0014499977085381756, + "loss": 1.8086, + "step": 1192 + }, + { + "epoch": 0.12584388185654008, + "grad_norm": 0.631169319152832, + "learning_rate": 0.0014499075341662764, + "loss": 1.8592, + "step": 1193 + }, + { + "epoch": 0.1259493670886076, + "grad_norm": 0.7428240180015564, + "learning_rate": 0.0014498172813662482, + "loss": 1.8786, + "step": 1194 + }, + { + "epoch": 0.1260548523206751, + "grad_norm": 0.978882908821106, + "learning_rate": 0.0014497269501482037, + "loss": 1.8336, + "step": 1195 + }, + { + "epoch": 0.1261603375527426, + "grad_norm": 1.2190033197402954, + "learning_rate": 0.0014496365405222656, + "loss": 1.8209, + "step": 1196 + }, + { + "epoch": 0.12626582278481013, + "grad_norm": 0.7180554866790771, + "learning_rate": 0.0014495460524985644, + "loss": 1.8328, + "step": 1197 + }, + { + "epoch": 0.12637130801687763, + "grad_norm": 1.050147533416748, + "learning_rate": 0.0014494554860872398, + "loss": 1.8604, + "step": 1198 + }, + { + "epoch": 0.12647679324894515, + "grad_norm": 1.008325457572937, + "learning_rate": 0.00144936484129844, + "loss": 1.864, + "step": 1199 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.7349124550819397, + "learning_rate": 0.0014492741181423225, + "loss": 1.8413, + "step": 1200 + }, + { + "epoch": 0.12668776371308016, + "grad_norm": 0.825681746006012, + "learning_rate": 0.001449183316629053, + "loss": 1.8373, + "step": 1201 + }, + { + "epoch": 0.12679324894514768, + "grad_norm": 0.889874279499054, + "learning_rate": 0.0014490924367688066, + "loss": 1.8409, + "step": 1202 + }, + { + "epoch": 0.1268987341772152, + "grad_norm": 0.7285179495811462, + "learning_rate": 0.0014490014785717667, + "loss": 1.8092, + "step": 1203 + }, + { + "epoch": 0.1270042194092827, + "grad_norm": 0.7320941090583801, + "learning_rate": 0.0014489104420481254, + "loss": 1.8272, + "step": 1204 + }, + { + "epoch": 0.1271097046413502, + "grad_norm": 0.7456066012382507, + "learning_rate": 0.001448819327208084, + "loss": 1.8672, + "step": 1205 + }, + { + "epoch": 0.12721518987341773, + "grad_norm": 0.6581923365592957, + "learning_rate": 0.0014487281340618526, + "loss": 1.8205, + "step": 1206 + }, + { + "epoch": 0.12732067510548523, + "grad_norm": 0.70477294921875, + "learning_rate": 0.0014486368626196494, + "loss": 1.8229, + "step": 1207 + }, + { + "epoch": 0.12742616033755275, + "grad_norm": 0.7114017009735107, + "learning_rate": 0.001448545512891702, + "loss": 1.828, + "step": 1208 + }, + { + "epoch": 0.12753164556962027, + "grad_norm": 0.7092335820198059, + "learning_rate": 0.0014484540848882469, + "loss": 1.8296, + "step": 1209 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.6658474206924438, + "learning_rate": 0.0014483625786195285, + "loss": 1.8599, + "step": 1210 + }, + { + "epoch": 0.12774261603375528, + "grad_norm": 0.666368842124939, + "learning_rate": 0.0014482709940958009, + "loss": 1.8103, + "step": 1211 + }, + { + "epoch": 0.12784810126582277, + "grad_norm": 0.6433525681495667, + "learning_rate": 0.0014481793313273266, + "loss": 1.7916, + "step": 1212 + }, + { + "epoch": 0.1279535864978903, + "grad_norm": 0.7827813029289246, + "learning_rate": 0.0014480875903243766, + "loss": 1.8232, + "step": 1213 + }, + { + "epoch": 0.1280590717299578, + "grad_norm": 0.9380127191543579, + "learning_rate": 0.0014479957710972313, + "loss": 1.8161, + "step": 1214 + }, + { + "epoch": 0.1281645569620253, + "grad_norm": 0.9930049777030945, + "learning_rate": 0.0014479038736561793, + "loss": 1.8056, + "step": 1215 + }, + { + "epoch": 0.12827004219409283, + "grad_norm": 0.8962766528129578, + "learning_rate": 0.001447811898011518, + "loss": 1.8356, + "step": 1216 + }, + { + "epoch": 0.12837552742616035, + "grad_norm": 0.7079574465751648, + "learning_rate": 0.0014477198441735543, + "loss": 1.8225, + "step": 1217 + }, + { + "epoch": 0.12848101265822784, + "grad_norm": 0.6912738084793091, + "learning_rate": 0.0014476277121526027, + "loss": 1.871, + "step": 1218 + }, + { + "epoch": 0.12858649789029536, + "grad_norm": 0.769539475440979, + "learning_rate": 0.0014475355019589872, + "loss": 1.8052, + "step": 1219 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6624451279640198, + "learning_rate": 0.0014474432136030405, + "loss": 1.7951, + "step": 1220 + }, + { + "epoch": 0.12879746835443037, + "grad_norm": 0.6770875453948975, + "learning_rate": 0.001447350847095104, + "loss": 1.8288, + "step": 1221 + }, + { + "epoch": 0.1289029535864979, + "grad_norm": 0.6737990975379944, + "learning_rate": 0.001447258402445528, + "loss": 1.8426, + "step": 1222 + }, + { + "epoch": 0.1290084388185654, + "grad_norm": 0.6515424251556396, + "learning_rate": 0.0014471658796646708, + "loss": 1.7931, + "step": 1223 + }, + { + "epoch": 0.1291139240506329, + "grad_norm": 0.7301947474479675, + "learning_rate": 0.0014470732787629005, + "loss": 1.8458, + "step": 1224 + }, + { + "epoch": 0.12921940928270043, + "grad_norm": 0.7043319344520569, + "learning_rate": 0.0014469805997505932, + "loss": 1.7761, + "step": 1225 + }, + { + "epoch": 0.12932489451476795, + "grad_norm": 0.6863788366317749, + "learning_rate": 0.0014468878426381346, + "loss": 1.7853, + "step": 1226 + }, + { + "epoch": 0.12943037974683544, + "grad_norm": 0.7219026684761047, + "learning_rate": 0.001446795007435918, + "loss": 1.7883, + "step": 1227 + }, + { + "epoch": 0.12953586497890296, + "grad_norm": 0.8034070134162903, + "learning_rate": 0.0014467020941543464, + "loss": 1.7996, + "step": 1228 + }, + { + "epoch": 0.12964135021097045, + "grad_norm": 0.9910058379173279, + "learning_rate": 0.0014466091028038314, + "loss": 1.837, + "step": 1229 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.8622016906738281, + "learning_rate": 0.0014465160333947923, + "loss": 1.7793, + "step": 1230 + }, + { + "epoch": 0.1298523206751055, + "grad_norm": 0.6330486536026001, + "learning_rate": 0.0014464228859376587, + "loss": 1.8279, + "step": 1231 + }, + { + "epoch": 0.12995780590717299, + "grad_norm": 0.7237390875816345, + "learning_rate": 0.001446329660442868, + "loss": 1.8597, + "step": 1232 + }, + { + "epoch": 0.1300632911392405, + "grad_norm": 0.8396974205970764, + "learning_rate": 0.0014462363569208666, + "loss": 1.8003, + "step": 1233 + }, + { + "epoch": 0.13016877637130803, + "grad_norm": 0.8439801335334778, + "learning_rate": 0.00144614297538211, + "loss": 1.8456, + "step": 1234 + }, + { + "epoch": 0.13027426160337552, + "grad_norm": 0.7538939118385315, + "learning_rate": 0.0014460495158370615, + "loss": 1.8449, + "step": 1235 + }, + { + "epoch": 0.13037974683544304, + "grad_norm": 0.7263071537017822, + "learning_rate": 0.0014459559782961937, + "loss": 1.8174, + "step": 1236 + }, + { + "epoch": 0.13048523206751056, + "grad_norm": 0.6297847628593445, + "learning_rate": 0.0014458623627699883, + "loss": 1.7992, + "step": 1237 + }, + { + "epoch": 0.13059071729957805, + "grad_norm": 0.7037213444709778, + "learning_rate": 0.0014457686692689355, + "loss": 1.814, + "step": 1238 + }, + { + "epoch": 0.13069620253164557, + "grad_norm": 0.692755937576294, + "learning_rate": 0.0014456748978035339, + "loss": 1.8294, + "step": 1239 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.7339959740638733, + "learning_rate": 0.0014455810483842908, + "loss": 1.8024, + "step": 1240 + }, + { + "epoch": 0.13090717299578059, + "grad_norm": 0.7207701206207275, + "learning_rate": 0.0014454871210217229, + "loss": 1.7935, + "step": 1241 + }, + { + "epoch": 0.1310126582278481, + "grad_norm": 0.7826941609382629, + "learning_rate": 0.0014453931157263548, + "loss": 1.8385, + "step": 1242 + }, + { + "epoch": 0.1311181434599156, + "grad_norm": 0.6282021999359131, + "learning_rate": 0.001445299032508721, + "loss": 1.8221, + "step": 1243 + }, + { + "epoch": 0.13122362869198312, + "grad_norm": 0.8492001295089722, + "learning_rate": 0.0014452048713793633, + "loss": 1.8222, + "step": 1244 + }, + { + "epoch": 0.13132911392405064, + "grad_norm": 0.9081045389175415, + "learning_rate": 0.0014451106323488331, + "loss": 1.8549, + "step": 1245 + }, + { + "epoch": 0.13143459915611813, + "grad_norm": 0.8355534672737122, + "learning_rate": 0.0014450163154276906, + "loss": 1.862, + "step": 1246 + }, + { + "epoch": 0.13154008438818565, + "grad_norm": 0.8220559358596802, + "learning_rate": 0.0014449219206265041, + "loss": 1.8258, + "step": 1247 + }, + { + "epoch": 0.13164556962025317, + "grad_norm": 0.6435973048210144, + "learning_rate": 0.0014448274479558513, + "loss": 1.8323, + "step": 1248 + }, + { + "epoch": 0.13175105485232066, + "grad_norm": 0.7602776288986206, + "learning_rate": 0.0014447328974263182, + "loss": 1.8168, + "step": 1249 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.8574325442314148, + "learning_rate": 0.0014446382690484997, + "loss": 1.8188, + "step": 1250 + }, + { + "epoch": 0.1319620253164557, + "grad_norm": 0.8587391972541809, + "learning_rate": 0.0014445435628329993, + "loss": 1.8501, + "step": 1251 + }, + { + "epoch": 0.1320675105485232, + "grad_norm": 0.7200959920883179, + "learning_rate": 0.0014444487787904294, + "loss": 1.8063, + "step": 1252 + }, + { + "epoch": 0.13217299578059072, + "grad_norm": 0.6755486726760864, + "learning_rate": 0.001444353916931411, + "loss": 1.8006, + "step": 1253 + }, + { + "epoch": 0.13227848101265824, + "grad_norm": 0.7063353657722473, + "learning_rate": 0.001444258977266574, + "loss": 1.792, + "step": 1254 + }, + { + "epoch": 0.13238396624472573, + "grad_norm": 0.7428087592124939, + "learning_rate": 0.0014441639598065565, + "loss": 1.7912, + "step": 1255 + }, + { + "epoch": 0.13248945147679325, + "grad_norm": 0.7653167843818665, + "learning_rate": 0.001444068864562006, + "loss": 1.8062, + "step": 1256 + }, + { + "epoch": 0.13259493670886077, + "grad_norm": 0.7311589121818542, + "learning_rate": 0.0014439736915435786, + "loss": 1.8301, + "step": 1257 + }, + { + "epoch": 0.13270042194092826, + "grad_norm": 0.6515324711799622, + "learning_rate": 0.001443878440761938, + "loss": 1.8083, + "step": 1258 + }, + { + "epoch": 0.13280590717299579, + "grad_norm": 0.7293159365653992, + "learning_rate": 0.0014437831122277585, + "loss": 1.8148, + "step": 1259 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.8701435327529907, + "learning_rate": 0.0014436877059517215, + "loss": 1.8075, + "step": 1260 + }, + { + "epoch": 0.1330168776371308, + "grad_norm": 0.7447200417518616, + "learning_rate": 0.0014435922219445182, + "loss": 1.8207, + "step": 1261 + }, + { + "epoch": 0.13312236286919832, + "grad_norm": 0.6748594641685486, + "learning_rate": 0.0014434966602168478, + "loss": 1.7927, + "step": 1262 + }, + { + "epoch": 0.1332278481012658, + "grad_norm": 0.8613166809082031, + "learning_rate": 0.0014434010207794185, + "loss": 1.7995, + "step": 1263 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 1.2235286235809326, + "learning_rate": 0.0014433053036429474, + "loss": 1.7901, + "step": 1264 + }, + { + "epoch": 0.13343881856540085, + "grad_norm": 0.7899220585823059, + "learning_rate": 0.00144320950881816, + "loss": 1.8062, + "step": 1265 + }, + { + "epoch": 0.13354430379746834, + "grad_norm": 0.8237556219100952, + "learning_rate": 0.0014431136363157902, + "loss": 1.8072, + "step": 1266 + }, + { + "epoch": 0.13364978902953586, + "grad_norm": 0.9973738193511963, + "learning_rate": 0.0014430176861465812, + "loss": 1.8374, + "step": 1267 + }, + { + "epoch": 0.13375527426160339, + "grad_norm": 0.788825273513794, + "learning_rate": 0.001442921658321285, + "loss": 1.7831, + "step": 1268 + }, + { + "epoch": 0.13386075949367088, + "grad_norm": 0.8803501129150391, + "learning_rate": 0.0014428255528506617, + "loss": 1.8491, + "step": 1269 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 1.0171993970870972, + "learning_rate": 0.0014427293697454803, + "loss": 1.8033, + "step": 1270 + }, + { + "epoch": 0.13407172995780592, + "grad_norm": 0.6911283135414124, + "learning_rate": 0.001442633109016519, + "loss": 1.8373, + "step": 1271 + }, + { + "epoch": 0.1341772151898734, + "grad_norm": 0.8601331114768982, + "learning_rate": 0.001442536770674564, + "loss": 1.8787, + "step": 1272 + }, + { + "epoch": 0.13428270042194093, + "grad_norm": 0.8647957444190979, + "learning_rate": 0.0014424403547304103, + "loss": 1.7947, + "step": 1273 + }, + { + "epoch": 0.13438818565400845, + "grad_norm": 0.8467908501625061, + "learning_rate": 0.0014423438611948624, + "loss": 1.8228, + "step": 1274 + }, + { + "epoch": 0.13449367088607594, + "grad_norm": 0.6522955894470215, + "learning_rate": 0.0014422472900787323, + "loss": 1.8188, + "step": 1275 + }, + { + "epoch": 0.13459915611814346, + "grad_norm": 0.8427030444145203, + "learning_rate": 0.0014421506413928415, + "loss": 1.7986, + "step": 1276 + }, + { + "epoch": 0.13470464135021096, + "grad_norm": 0.8978816270828247, + "learning_rate": 0.0014420539151480199, + "loss": 1.8386, + "step": 1277 + }, + { + "epoch": 0.13481012658227848, + "grad_norm": 0.8022451996803284, + "learning_rate": 0.0014419571113551063, + "loss": 1.8108, + "step": 1278 + }, + { + "epoch": 0.134915611814346, + "grad_norm": 0.6966913938522339, + "learning_rate": 0.0014418602300249482, + "loss": 1.8378, + "step": 1279 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.7757465839385986, + "learning_rate": 0.001441763271168401, + "loss": 1.7785, + "step": 1280 + }, + { + "epoch": 0.135126582278481, + "grad_norm": 0.7825314998626709, + "learning_rate": 0.00144166623479633, + "loss": 1.8117, + "step": 1281 + }, + { + "epoch": 0.13523206751054853, + "grad_norm": 0.7162776589393616, + "learning_rate": 0.0014415691209196085, + "loss": 1.7647, + "step": 1282 + }, + { + "epoch": 0.13533755274261602, + "grad_norm": 0.6996254920959473, + "learning_rate": 0.0014414719295491184, + "loss": 1.8227, + "step": 1283 + }, + { + "epoch": 0.13544303797468354, + "grad_norm": 0.6781145930290222, + "learning_rate": 0.0014413746606957505, + "loss": 1.7854, + "step": 1284 + }, + { + "epoch": 0.13554852320675106, + "grad_norm": 0.7741174697875977, + "learning_rate": 0.0014412773143704046, + "loss": 1.7862, + "step": 1285 + }, + { + "epoch": 0.13565400843881856, + "grad_norm": 0.636627197265625, + "learning_rate": 0.0014411798905839884, + "loss": 1.7931, + "step": 1286 + }, + { + "epoch": 0.13575949367088608, + "grad_norm": 0.7170829176902771, + "learning_rate": 0.0014410823893474193, + "loss": 1.7962, + "step": 1287 + }, + { + "epoch": 0.1358649789029536, + "grad_norm": 0.7141652703285217, + "learning_rate": 0.001440984810671622, + "loss": 1.8198, + "step": 1288 + }, + { + "epoch": 0.1359704641350211, + "grad_norm": 1.0177857875823975, + "learning_rate": 0.0014408871545675314, + "loss": 1.8646, + "step": 1289 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 1.0258063077926636, + "learning_rate": 0.00144078942104609, + "loss": 1.8279, + "step": 1290 + }, + { + "epoch": 0.13618143459915613, + "grad_norm": 0.7523351907730103, + "learning_rate": 0.0014406916101182491, + "loss": 1.8274, + "step": 1291 + }, + { + "epoch": 0.13628691983122362, + "grad_norm": 0.7810572385787964, + "learning_rate": 0.0014405937217949695, + "loss": 1.7904, + "step": 1292 + }, + { + "epoch": 0.13639240506329114, + "grad_norm": 1.147817611694336, + "learning_rate": 0.0014404957560872197, + "loss": 1.8011, + "step": 1293 + }, + { + "epoch": 0.13649789029535864, + "grad_norm": 0.804366946220398, + "learning_rate": 0.0014403977130059773, + "loss": 1.8278, + "step": 1294 + }, + { + "epoch": 0.13660337552742616, + "grad_norm": 0.6693724393844604, + "learning_rate": 0.0014402995925622284, + "loss": 1.7422, + "step": 1295 + }, + { + "epoch": 0.13670886075949368, + "grad_norm": 0.8254324793815613, + "learning_rate": 0.0014402013947669681, + "loss": 1.8061, + "step": 1296 + }, + { + "epoch": 0.13681434599156117, + "grad_norm": 0.8546948432922363, + "learning_rate": 0.0014401031196312, + "loss": 1.8155, + "step": 1297 + }, + { + "epoch": 0.1369198312236287, + "grad_norm": 0.7697573900222778, + "learning_rate": 0.001440004767165936, + "loss": 1.7486, + "step": 1298 + }, + { + "epoch": 0.1370253164556962, + "grad_norm": 0.9042449593544006, + "learning_rate": 0.0014399063373821972, + "loss": 1.7874, + "step": 1299 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.7202324867248535, + "learning_rate": 0.001439807830291013, + "loss": 1.8021, + "step": 1300 + }, + { + "epoch": 0.13723628691983122, + "grad_norm": 0.6722977757453918, + "learning_rate": 0.001439709245903422, + "loss": 1.7925, + "step": 1301 + }, + { + "epoch": 0.13734177215189874, + "grad_norm": 0.741966962814331, + "learning_rate": 0.0014396105842304707, + "loss": 1.8173, + "step": 1302 + }, + { + "epoch": 0.13744725738396624, + "grad_norm": 0.6990131735801697, + "learning_rate": 0.0014395118452832146, + "loss": 1.8103, + "step": 1303 + }, + { + "epoch": 0.13755274261603376, + "grad_norm": 0.6179027557373047, + "learning_rate": 0.001439413029072718, + "loss": 1.7858, + "step": 1304 + }, + { + "epoch": 0.13765822784810128, + "grad_norm": 0.6794390678405762, + "learning_rate": 0.001439314135610054, + "loss": 1.8098, + "step": 1305 + }, + { + "epoch": 0.13776371308016877, + "grad_norm": 0.6400492787361145, + "learning_rate": 0.0014392151649063039, + "loss": 1.7951, + "step": 1306 + }, + { + "epoch": 0.1378691983122363, + "grad_norm": 0.7198679447174072, + "learning_rate": 0.0014391161169725573, + "loss": 1.7783, + "step": 1307 + }, + { + "epoch": 0.1379746835443038, + "grad_norm": 0.7680641412734985, + "learning_rate": 0.001439016991819914, + "loss": 1.814, + "step": 1308 + }, + { + "epoch": 0.1380801687763713, + "grad_norm": 0.8257659673690796, + "learning_rate": 0.001438917789459481, + "loss": 1.7793, + "step": 1309 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.7430399656295776, + "learning_rate": 0.0014388185099023744, + "loss": 1.7852, + "step": 1310 + }, + { + "epoch": 0.13829113924050632, + "grad_norm": 0.674595296382904, + "learning_rate": 0.001438719153159719, + "loss": 1.7892, + "step": 1311 + }, + { + "epoch": 0.13839662447257384, + "grad_norm": 0.7471333146095276, + "learning_rate": 0.0014386197192426482, + "loss": 1.8001, + "step": 1312 + }, + { + "epoch": 0.13850210970464136, + "grad_norm": 0.6662381887435913, + "learning_rate": 0.001438520208162304, + "loss": 1.8244, + "step": 1313 + }, + { + "epoch": 0.13860759493670885, + "grad_norm": 0.6611544489860535, + "learning_rate": 0.0014384206199298374, + "loss": 1.8267, + "step": 1314 + }, + { + "epoch": 0.13871308016877637, + "grad_norm": 0.6684417724609375, + "learning_rate": 0.0014383209545564073, + "loss": 1.8268, + "step": 1315 + }, + { + "epoch": 0.1388185654008439, + "grad_norm": 0.6537047028541565, + "learning_rate": 0.001438221212053182, + "loss": 1.76, + "step": 1316 + }, + { + "epoch": 0.13892405063291138, + "grad_norm": 0.850995659828186, + "learning_rate": 0.0014381213924313386, + "loss": 1.7856, + "step": 1317 + }, + { + "epoch": 0.1390295358649789, + "grad_norm": 1.1960067749023438, + "learning_rate": 0.0014380214957020613, + "loss": 1.7975, + "step": 1318 + }, + { + "epoch": 0.13913502109704642, + "grad_norm": 0.7146938443183899, + "learning_rate": 0.001437921521876545, + "loss": 1.8111, + "step": 1319 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.8614456653594971, + "learning_rate": 0.0014378214709659916, + "loss": 1.7954, + "step": 1320 + }, + { + "epoch": 0.13934599156118144, + "grad_norm": 1.2474298477172852, + "learning_rate": 0.0014377213429816128, + "loss": 1.7814, + "step": 1321 + }, + { + "epoch": 0.13945147679324896, + "grad_norm": 0.8480991125106812, + "learning_rate": 0.0014376211379346282, + "loss": 1.7885, + "step": 1322 + }, + { + "epoch": 0.13955696202531645, + "grad_norm": 0.6891940236091614, + "learning_rate": 0.0014375208558362663, + "loss": 1.8012, + "step": 1323 + }, + { + "epoch": 0.13966244725738397, + "grad_norm": 0.849804162979126, + "learning_rate": 0.0014374204966977639, + "loss": 1.8035, + "step": 1324 + }, + { + "epoch": 0.13976793248945146, + "grad_norm": 0.82364821434021, + "learning_rate": 0.0014373200605303674, + "loss": 1.7762, + "step": 1325 + }, + { + "epoch": 0.13987341772151898, + "grad_norm": 0.7433011531829834, + "learning_rate": 0.001437219547345331, + "loss": 1.8085, + "step": 1326 + }, + { + "epoch": 0.1399789029535865, + "grad_norm": 0.7200556993484497, + "learning_rate": 0.0014371189571539174, + "loss": 1.7895, + "step": 1327 + }, + { + "epoch": 0.140084388185654, + "grad_norm": 0.7827677726745605, + "learning_rate": 0.0014370182899673982, + "loss": 1.7757, + "step": 1328 + }, + { + "epoch": 0.14018987341772152, + "grad_norm": 0.9818326830863953, + "learning_rate": 0.0014369175457970538, + "loss": 1.7953, + "step": 1329 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.0174500942230225, + "learning_rate": 0.0014368167246541733, + "loss": 1.8227, + "step": 1330 + }, + { + "epoch": 0.14040084388185653, + "grad_norm": 0.7383694052696228, + "learning_rate": 0.0014367158265500537, + "loss": 1.8069, + "step": 1331 + }, + { + "epoch": 0.14050632911392405, + "grad_norm": 0.87797611951828, + "learning_rate": 0.0014366148514960016, + "loss": 1.7951, + "step": 1332 + }, + { + "epoch": 0.14061181434599157, + "grad_norm": 0.9642936587333679, + "learning_rate": 0.001436513799503332, + "loss": 1.7639, + "step": 1333 + }, + { + "epoch": 0.14071729957805906, + "grad_norm": 0.8172906637191772, + "learning_rate": 0.0014364126705833675, + "loss": 1.782, + "step": 1334 + }, + { + "epoch": 0.14082278481012658, + "grad_norm": 0.8514280319213867, + "learning_rate": 0.0014363114647474406, + "loss": 1.7704, + "step": 1335 + }, + { + "epoch": 0.1409282700421941, + "grad_norm": 1.102067470550537, + "learning_rate": 0.0014362101820068918, + "loss": 1.7956, + "step": 1336 + }, + { + "epoch": 0.1410337552742616, + "grad_norm": 0.7029712200164795, + "learning_rate": 0.0014361088223730704, + "loss": 1.8143, + "step": 1337 + }, + { + "epoch": 0.14113924050632912, + "grad_norm": 1.2550036907196045, + "learning_rate": 0.0014360073858573341, + "loss": 1.7976, + "step": 1338 + }, + { + "epoch": 0.14124472573839664, + "grad_norm": 0.8031389117240906, + "learning_rate": 0.0014359058724710497, + "loss": 1.842, + "step": 1339 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.8022482395172119, + "learning_rate": 0.0014358042822255918, + "loss": 1.7737, + "step": 1340 + }, + { + "epoch": 0.14145569620253165, + "grad_norm": 0.9170877933502197, + "learning_rate": 0.0014357026151323444, + "loss": 1.7993, + "step": 1341 + }, + { + "epoch": 0.14156118143459914, + "grad_norm": 0.9575889110565186, + "learning_rate": 0.0014356008712027, + "loss": 1.8283, + "step": 1342 + }, + { + "epoch": 0.14166666666666666, + "grad_norm": 0.8463864326477051, + "learning_rate": 0.0014354990504480592, + "loss": 1.7778, + "step": 1343 + }, + { + "epoch": 0.14177215189873418, + "grad_norm": 0.6938042044639587, + "learning_rate": 0.0014353971528798313, + "loss": 1.804, + "step": 1344 + }, + { + "epoch": 0.14187763713080168, + "grad_norm": 0.800628125667572, + "learning_rate": 0.001435295178509435, + "loss": 1.7838, + "step": 1345 + }, + { + "epoch": 0.1419831223628692, + "grad_norm": 0.875594437122345, + "learning_rate": 0.0014351931273482966, + "loss": 1.7855, + "step": 1346 + }, + { + "epoch": 0.14208860759493672, + "grad_norm": 0.7352370619773865, + "learning_rate": 0.0014350909994078516, + "loss": 1.794, + "step": 1347 + }, + { + "epoch": 0.1421940928270042, + "grad_norm": 0.6576163172721863, + "learning_rate": 0.0014349887946995441, + "loss": 1.7783, + "step": 1348 + }, + { + "epoch": 0.14229957805907173, + "grad_norm": 0.7258592844009399, + "learning_rate": 0.0014348865132348262, + "loss": 1.8018, + "step": 1349 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.7895303964614868, + "learning_rate": 0.0014347841550251597, + "loss": 1.8021, + "step": 1350 + }, + { + "epoch": 0.14251054852320674, + "grad_norm": 0.6640409231185913, + "learning_rate": 0.0014346817200820137, + "loss": 1.7695, + "step": 1351 + }, + { + "epoch": 0.14261603375527426, + "grad_norm": 0.7494959235191345, + "learning_rate": 0.0014345792084168672, + "loss": 1.7675, + "step": 1352 + }, + { + "epoch": 0.14272151898734178, + "grad_norm": 0.6856107711791992, + "learning_rate": 0.0014344766200412062, + "loss": 1.8083, + "step": 1353 + }, + { + "epoch": 0.14282700421940928, + "grad_norm": 0.7249695062637329, + "learning_rate": 0.0014343739549665274, + "loss": 1.81, + "step": 1354 + }, + { + "epoch": 0.1429324894514768, + "grad_norm": 0.6952835321426392, + "learning_rate": 0.0014342712132043342, + "loss": 1.77, + "step": 1355 + }, + { + "epoch": 0.14303797468354432, + "grad_norm": 0.7198187708854675, + "learning_rate": 0.001434168394766139, + "loss": 1.7745, + "step": 1356 + }, + { + "epoch": 0.1431434599156118, + "grad_norm": 0.6582894325256348, + "learning_rate": 0.001434065499663464, + "loss": 1.7921, + "step": 1357 + }, + { + "epoch": 0.14324894514767933, + "grad_norm": 0.7357485294342041, + "learning_rate": 0.0014339625279078388, + "loss": 1.7837, + "step": 1358 + }, + { + "epoch": 0.14335443037974682, + "grad_norm": 0.8382334113121033, + "learning_rate": 0.0014338594795108017, + "loss": 1.7637, + "step": 1359 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.6411068439483643, + "learning_rate": 0.0014337563544838997, + "loss": 1.8027, + "step": 1360 + }, + { + "epoch": 0.14356540084388186, + "grad_norm": 0.7448423504829407, + "learning_rate": 0.0014336531528386888, + "loss": 1.7698, + "step": 1361 + }, + { + "epoch": 0.14367088607594936, + "grad_norm": 0.7575255036354065, + "learning_rate": 0.0014335498745867332, + "loss": 1.7896, + "step": 1362 + }, + { + "epoch": 0.14377637130801688, + "grad_norm": 0.7063677906990051, + "learning_rate": 0.0014334465197396054, + "loss": 1.7961, + "step": 1363 + }, + { + "epoch": 0.1438818565400844, + "grad_norm": 0.6385489702224731, + "learning_rate": 0.0014333430883088877, + "loss": 1.7842, + "step": 1364 + }, + { + "epoch": 0.1439873417721519, + "grad_norm": 0.7255067825317383, + "learning_rate": 0.001433239580306169, + "loss": 1.774, + "step": 1365 + }, + { + "epoch": 0.1440928270042194, + "grad_norm": 0.7404801845550537, + "learning_rate": 0.0014331359957430482, + "loss": 1.7806, + "step": 1366 + }, + { + "epoch": 0.14419831223628693, + "grad_norm": 0.8373108506202698, + "learning_rate": 0.001433032334631133, + "loss": 1.8015, + "step": 1367 + }, + { + "epoch": 0.14430379746835442, + "grad_norm": 1.211124062538147, + "learning_rate": 0.0014329285969820389, + "loss": 1.7897, + "step": 1368 + }, + { + "epoch": 0.14440928270042194, + "grad_norm": 0.9793059229850769, + "learning_rate": 0.00143282478280739, + "loss": 1.8064, + "step": 1369 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.7771275043487549, + "learning_rate": 0.001432720892118819, + "loss": 1.7736, + "step": 1370 + }, + { + "epoch": 0.14462025316455696, + "grad_norm": 0.7294965386390686, + "learning_rate": 0.0014326169249279683, + "loss": 1.7702, + "step": 1371 + }, + { + "epoch": 0.14472573839662448, + "grad_norm": 0.9268815517425537, + "learning_rate": 0.001432512881246487, + "loss": 1.7674, + "step": 1372 + }, + { + "epoch": 0.144831223628692, + "grad_norm": 1.1329964399337769, + "learning_rate": 0.0014324087610860339, + "loss": 1.7984, + "step": 1373 + }, + { + "epoch": 0.1449367088607595, + "grad_norm": 0.8092204928398132, + "learning_rate": 0.0014323045644582765, + "loss": 1.7728, + "step": 1374 + }, + { + "epoch": 0.145042194092827, + "grad_norm": 0.90907883644104, + "learning_rate": 0.0014322002913748902, + "loss": 1.7963, + "step": 1375 + }, + { + "epoch": 0.1451476793248945, + "grad_norm": 1.312678337097168, + "learning_rate": 0.0014320959418475596, + "loss": 1.7712, + "step": 1376 + }, + { + "epoch": 0.14525316455696202, + "grad_norm": 0.7446073293685913, + "learning_rate": 0.0014319915158879776, + "loss": 1.7437, + "step": 1377 + }, + { + "epoch": 0.14535864978902954, + "grad_norm": 1.0868433713912964, + "learning_rate": 0.0014318870135078452, + "loss": 1.7621, + "step": 1378 + }, + { + "epoch": 0.14546413502109704, + "grad_norm": 1.153080701828003, + "learning_rate": 0.001431782434718873, + "loss": 1.817, + "step": 1379 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.6610114574432373, + "learning_rate": 0.0014316777795327794, + "loss": 1.7594, + "step": 1380 + }, + { + "epoch": 0.14567510548523208, + "grad_norm": 0.9080953598022461, + "learning_rate": 0.0014315730479612914, + "loss": 1.8075, + "step": 1381 + }, + { + "epoch": 0.14578059071729957, + "grad_norm": 0.9568781852722168, + "learning_rate": 0.0014314682400161445, + "loss": 1.7922, + "step": 1382 + }, + { + "epoch": 0.1458860759493671, + "grad_norm": 0.6741198301315308, + "learning_rate": 0.0014313633557090834, + "loss": 1.766, + "step": 1383 + }, + { + "epoch": 0.1459915611814346, + "grad_norm": 0.8613115549087524, + "learning_rate": 0.0014312583950518607, + "loss": 1.7591, + "step": 1384 + }, + { + "epoch": 0.1460970464135021, + "grad_norm": 1.022559404373169, + "learning_rate": 0.0014311533580562378, + "loss": 1.802, + "step": 1385 + }, + { + "epoch": 0.14620253164556962, + "grad_norm": 0.782627522945404, + "learning_rate": 0.0014310482447339845, + "loss": 1.7789, + "step": 1386 + }, + { + "epoch": 0.14630801687763714, + "grad_norm": 0.6540765166282654, + "learning_rate": 0.0014309430550968794, + "loss": 1.7778, + "step": 1387 + }, + { + "epoch": 0.14641350210970464, + "grad_norm": 0.7302731871604919, + "learning_rate": 0.0014308377891567095, + "loss": 1.7734, + "step": 1388 + }, + { + "epoch": 0.14651898734177216, + "grad_norm": 0.7347846031188965, + "learning_rate": 0.0014307324469252703, + "loss": 1.7851, + "step": 1389 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.7086843848228455, + "learning_rate": 0.001430627028414366, + "loss": 1.7923, + "step": 1390 + }, + { + "epoch": 0.14672995780590717, + "grad_norm": 0.6883392930030823, + "learning_rate": 0.0014305215336358093, + "loss": 1.8047, + "step": 1391 + }, + { + "epoch": 0.1468354430379747, + "grad_norm": 0.738565981388092, + "learning_rate": 0.0014304159626014213, + "loss": 1.7867, + "step": 1392 + }, + { + "epoch": 0.14694092827004218, + "grad_norm": 0.6826196908950806, + "learning_rate": 0.0014303103153230322, + "loss": 1.7677, + "step": 1393 + }, + { + "epoch": 0.1470464135021097, + "grad_norm": 0.7159891128540039, + "learning_rate": 0.0014302045918124795, + "loss": 1.773, + "step": 1394 + }, + { + "epoch": 0.14715189873417722, + "grad_norm": 0.7110466957092285, + "learning_rate": 0.0014300987920816107, + "loss": 1.7855, + "step": 1395 + }, + { + "epoch": 0.14725738396624471, + "grad_norm": 0.6676225066184998, + "learning_rate": 0.0014299929161422807, + "loss": 1.7762, + "step": 1396 + }, + { + "epoch": 0.14736286919831224, + "grad_norm": 0.8910120725631714, + "learning_rate": 0.001429886964006354, + "loss": 1.7341, + "step": 1397 + }, + { + "epoch": 0.14746835443037976, + "grad_norm": 0.9982560276985168, + "learning_rate": 0.0014297809356857026, + "loss": 1.7252, + "step": 1398 + }, + { + "epoch": 0.14757383966244725, + "grad_norm": 0.7043653726577759, + "learning_rate": 0.0014296748311922074, + "loss": 1.7692, + "step": 1399 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.9799237251281738, + "learning_rate": 0.0014295686505377586, + "loss": 1.7366, + "step": 1400 + }, + { + "epoch": 0.1477848101265823, + "grad_norm": 0.9148879647254944, + "learning_rate": 0.001429462393734254, + "loss": 1.7878, + "step": 1401 + }, + { + "epoch": 0.14789029535864978, + "grad_norm": 0.7397372126579285, + "learning_rate": 0.0014293560607935999, + "loss": 1.7606, + "step": 1402 + }, + { + "epoch": 0.1479957805907173, + "grad_norm": 0.678921103477478, + "learning_rate": 0.0014292496517277116, + "loss": 1.7964, + "step": 1403 + }, + { + "epoch": 0.14810126582278482, + "grad_norm": 0.8000695109367371, + "learning_rate": 0.0014291431665485125, + "loss": 1.7774, + "step": 1404 + }, + { + "epoch": 0.14820675105485231, + "grad_norm": 0.9287055730819702, + "learning_rate": 0.0014290366052679352, + "loss": 1.7507, + "step": 1405 + }, + { + "epoch": 0.14831223628691984, + "grad_norm": 0.9039536714553833, + "learning_rate": 0.0014289299678979207, + "loss": 1.754, + "step": 1406 + }, + { + "epoch": 0.14841772151898736, + "grad_norm": 0.8006783127784729, + "learning_rate": 0.0014288232544504174, + "loss": 1.7035, + "step": 1407 + }, + { + "epoch": 0.14852320675105485, + "grad_norm": 0.6466794610023499, + "learning_rate": 0.0014287164649373837, + "loss": 1.7716, + "step": 1408 + }, + { + "epoch": 0.14862869198312237, + "grad_norm": 0.7240825295448303, + "learning_rate": 0.0014286095993707856, + "loss": 1.7976, + "step": 1409 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.7115584015846252, + "learning_rate": 0.0014285026577625982, + "loss": 1.7863, + "step": 1410 + }, + { + "epoch": 0.14883966244725738, + "grad_norm": 0.7453529834747314, + "learning_rate": 0.0014283956401248048, + "loss": 1.7947, + "step": 1411 + }, + { + "epoch": 0.1489451476793249, + "grad_norm": 0.6806166172027588, + "learning_rate": 0.0014282885464693969, + "loss": 1.755, + "step": 1412 + }, + { + "epoch": 0.1490506329113924, + "grad_norm": 0.8458051085472107, + "learning_rate": 0.001428181376808375, + "loss": 1.7686, + "step": 1413 + }, + { + "epoch": 0.14915611814345991, + "grad_norm": 0.8270869255065918, + "learning_rate": 0.0014280741311537483, + "loss": 1.7895, + "step": 1414 + }, + { + "epoch": 0.14926160337552744, + "grad_norm": 0.7619837522506714, + "learning_rate": 0.001427966809517534, + "loss": 1.7397, + "step": 1415 + }, + { + "epoch": 0.14936708860759493, + "grad_norm": 0.7852846384048462, + "learning_rate": 0.001427859411911758, + "loss": 1.8092, + "step": 1416 + }, + { + "epoch": 0.14947257383966245, + "grad_norm": 0.8424565196037292, + "learning_rate": 0.0014277519383484548, + "loss": 1.7463, + "step": 1417 + }, + { + "epoch": 0.14957805907172997, + "grad_norm": 0.6884106397628784, + "learning_rate": 0.0014276443888396675, + "loss": 1.7788, + "step": 1418 + }, + { + "epoch": 0.14968354430379746, + "grad_norm": 0.9193809032440186, + "learning_rate": 0.0014275367633974473, + "loss": 1.7831, + "step": 1419 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 1.1024489402770996, + "learning_rate": 0.0014274290620338542, + "loss": 1.7975, + "step": 1420 + }, + { + "epoch": 0.1498945147679325, + "grad_norm": 0.7746059894561768, + "learning_rate": 0.0014273212847609566, + "loss": 1.782, + "step": 1421 + }, + { + "epoch": 0.15, + "grad_norm": 0.7813287377357483, + "learning_rate": 0.0014272134315908317, + "loss": 1.7537, + "step": 1422 + }, + { + "epoch": 0.15010548523206751, + "grad_norm": 0.844834566116333, + "learning_rate": 0.0014271055025355652, + "loss": 1.7752, + "step": 1423 + }, + { + "epoch": 0.150210970464135, + "grad_norm": 0.7563886046409607, + "learning_rate": 0.0014269974976072505, + "loss": 1.771, + "step": 1424 + }, + { + "epoch": 0.15031645569620253, + "grad_norm": 0.8102545142173767, + "learning_rate": 0.0014268894168179903, + "loss": 1.7531, + "step": 1425 + }, + { + "epoch": 0.15042194092827005, + "grad_norm": 0.891567587852478, + "learning_rate": 0.0014267812601798957, + "loss": 1.7885, + "step": 1426 + }, + { + "epoch": 0.15052742616033754, + "grad_norm": 0.7021755576133728, + "learning_rate": 0.0014266730277050863, + "loss": 1.8119, + "step": 1427 + }, + { + "epoch": 0.15063291139240506, + "grad_norm": 1.1834309101104736, + "learning_rate": 0.00142656471940569, + "loss": 1.7755, + "step": 1428 + }, + { + "epoch": 0.15073839662447258, + "grad_norm": 0.9410405158996582, + "learning_rate": 0.001426456335293843, + "loss": 1.767, + "step": 1429 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.7471405863761902, + "learning_rate": 0.0014263478753816906, + "loss": 1.7472, + "step": 1430 + }, + { + "epoch": 0.1509493670886076, + "grad_norm": 0.9613838195800781, + "learning_rate": 0.0014262393396813863, + "loss": 1.7833, + "step": 1431 + }, + { + "epoch": 0.15105485232067511, + "grad_norm": 0.8503358364105225, + "learning_rate": 0.001426130728205092, + "loss": 1.7877, + "step": 1432 + }, + { + "epoch": 0.1511603375527426, + "grad_norm": 0.7672445774078369, + "learning_rate": 0.001426022040964978, + "loss": 1.7173, + "step": 1433 + }, + { + "epoch": 0.15126582278481013, + "grad_norm": 1.107957363128662, + "learning_rate": 0.0014259132779732234, + "loss": 1.7582, + "step": 1434 + }, + { + "epoch": 0.15137130801687765, + "grad_norm": 0.9808899760246277, + "learning_rate": 0.0014258044392420155, + "loss": 1.8041, + "step": 1435 + }, + { + "epoch": 0.15147679324894514, + "grad_norm": 0.6533488035202026, + "learning_rate": 0.0014256955247835504, + "loss": 1.7502, + "step": 1436 + }, + { + "epoch": 0.15158227848101266, + "grad_norm": 1.0215730667114258, + "learning_rate": 0.0014255865346100324, + "loss": 1.79, + "step": 1437 + }, + { + "epoch": 0.15168776371308018, + "grad_norm": 1.0291826725006104, + "learning_rate": 0.0014254774687336744, + "loss": 1.741, + "step": 1438 + }, + { + "epoch": 0.15179324894514767, + "grad_norm": 0.7064449787139893, + "learning_rate": 0.0014253683271666978, + "loss": 1.7794, + "step": 1439 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.7478636503219604, + "learning_rate": 0.0014252591099213326, + "loss": 1.7662, + "step": 1440 + }, + { + "epoch": 0.1520042194092827, + "grad_norm": 0.8979621529579163, + "learning_rate": 0.0014251498170098167, + "loss": 1.7372, + "step": 1441 + }, + { + "epoch": 0.1521097046413502, + "grad_norm": 0.7638247013092041, + "learning_rate": 0.0014250404484443975, + "loss": 1.772, + "step": 1442 + }, + { + "epoch": 0.15221518987341773, + "grad_norm": 0.7159293293952942, + "learning_rate": 0.0014249310042373298, + "loss": 1.7607, + "step": 1443 + }, + { + "epoch": 0.15232067510548522, + "grad_norm": 0.786171019077301, + "learning_rate": 0.0014248214844008776, + "loss": 1.7737, + "step": 1444 + }, + { + "epoch": 0.15242616033755274, + "grad_norm": 0.7254084944725037, + "learning_rate": 0.001424711888947313, + "loss": 1.7538, + "step": 1445 + }, + { + "epoch": 0.15253164556962026, + "grad_norm": 0.8086534738540649, + "learning_rate": 0.001424602217888917, + "loss": 1.7645, + "step": 1446 + }, + { + "epoch": 0.15263713080168775, + "grad_norm": 0.7746403813362122, + "learning_rate": 0.0014244924712379786, + "loss": 1.7722, + "step": 1447 + }, + { + "epoch": 0.15274261603375527, + "grad_norm": 0.6314970254898071, + "learning_rate": 0.0014243826490067954, + "loss": 1.762, + "step": 1448 + }, + { + "epoch": 0.1528481012658228, + "grad_norm": 0.7693986296653748, + "learning_rate": 0.0014242727512076736, + "loss": 1.7736, + "step": 1449 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.7572335600852966, + "learning_rate": 0.001424162777852928, + "loss": 1.778, + "step": 1450 + }, + { + "epoch": 0.1530590717299578, + "grad_norm": 0.6300528049468994, + "learning_rate": 0.0014240527289548814, + "loss": 1.7686, + "step": 1451 + }, + { + "epoch": 0.15316455696202533, + "grad_norm": 0.7100444436073303, + "learning_rate": 0.0014239426045258652, + "loss": 1.7738, + "step": 1452 + }, + { + "epoch": 0.15327004219409282, + "grad_norm": 0.6947463154792786, + "learning_rate": 0.0014238324045782198, + "loss": 1.7433, + "step": 1453 + }, + { + "epoch": 0.15337552742616034, + "grad_norm": 0.6779311895370483, + "learning_rate": 0.0014237221291242932, + "loss": 1.7155, + "step": 1454 + }, + { + "epoch": 0.15348101265822786, + "grad_norm": 0.6807200312614441, + "learning_rate": 0.0014236117781764425, + "loss": 1.7398, + "step": 1455 + }, + { + "epoch": 0.15358649789029535, + "grad_norm": 0.6592812538146973, + "learning_rate": 0.0014235013517470334, + "loss": 1.7946, + "step": 1456 + }, + { + "epoch": 0.15369198312236287, + "grad_norm": 0.7391563653945923, + "learning_rate": 0.0014233908498484393, + "loss": 1.7361, + "step": 1457 + }, + { + "epoch": 0.15379746835443037, + "grad_norm": 0.7643076181411743, + "learning_rate": 0.0014232802724930427, + "loss": 1.7488, + "step": 1458 + }, + { + "epoch": 0.1539029535864979, + "grad_norm": 0.7218945026397705, + "learning_rate": 0.0014231696196932342, + "loss": 1.7608, + "step": 1459 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.6669723391532898, + "learning_rate": 0.0014230588914614134, + "loss": 1.8008, + "step": 1460 + }, + { + "epoch": 0.1541139240506329, + "grad_norm": 0.6524029970169067, + "learning_rate": 0.0014229480878099872, + "loss": 1.7598, + "step": 1461 + }, + { + "epoch": 0.15421940928270042, + "grad_norm": 0.6557595133781433, + "learning_rate": 0.0014228372087513725, + "loss": 1.7179, + "step": 1462 + }, + { + "epoch": 0.15432489451476794, + "grad_norm": 0.617344319820404, + "learning_rate": 0.0014227262542979933, + "loss": 1.7241, + "step": 1463 + }, + { + "epoch": 0.15443037974683543, + "grad_norm": 0.6687549352645874, + "learning_rate": 0.0014226152244622826, + "loss": 1.7608, + "step": 1464 + }, + { + "epoch": 0.15453586497890295, + "grad_norm": 0.7616507411003113, + "learning_rate": 0.0014225041192566822, + "loss": 1.7331, + "step": 1465 + }, + { + "epoch": 0.15464135021097047, + "grad_norm": 0.7781322002410889, + "learning_rate": 0.001422392938693642, + "loss": 1.7453, + "step": 1466 + }, + { + "epoch": 0.15474683544303797, + "grad_norm": 0.7159684300422668, + "learning_rate": 0.0014222816827856202, + "loss": 1.7982, + "step": 1467 + }, + { + "epoch": 0.1548523206751055, + "grad_norm": 0.7287147045135498, + "learning_rate": 0.0014221703515450834, + "loss": 1.7724, + "step": 1468 + }, + { + "epoch": 0.154957805907173, + "grad_norm": 0.9431865811347961, + "learning_rate": 0.001422058944984507, + "loss": 1.7871, + "step": 1469 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 1.1362557411193848, + "learning_rate": 0.0014219474631163745, + "loss": 1.7678, + "step": 1470 + }, + { + "epoch": 0.15516877637130802, + "grad_norm": 0.7980315685272217, + "learning_rate": 0.0014218359059531783, + "loss": 1.7822, + "step": 1471 + }, + { + "epoch": 0.15527426160337554, + "grad_norm": 0.8259161710739136, + "learning_rate": 0.0014217242735074188, + "loss": 1.7841, + "step": 1472 + }, + { + "epoch": 0.15537974683544303, + "grad_norm": 0.9672106504440308, + "learning_rate": 0.0014216125657916046, + "loss": 1.7551, + "step": 1473 + }, + { + "epoch": 0.15548523206751055, + "grad_norm": 0.8757764101028442, + "learning_rate": 0.0014215007828182536, + "loss": 1.7539, + "step": 1474 + }, + { + "epoch": 0.15559071729957805, + "grad_norm": 0.6953483819961548, + "learning_rate": 0.0014213889245998917, + "loss": 1.7637, + "step": 1475 + }, + { + "epoch": 0.15569620253164557, + "grad_norm": 0.9721072912216187, + "learning_rate": 0.0014212769911490528, + "loss": 1.7642, + "step": 1476 + }, + { + "epoch": 0.1558016877637131, + "grad_norm": 1.040550708770752, + "learning_rate": 0.0014211649824782797, + "loss": 1.7609, + "step": 1477 + }, + { + "epoch": 0.15590717299578058, + "grad_norm": 0.7249879240989685, + "learning_rate": 0.0014210528986001237, + "loss": 1.7547, + "step": 1478 + }, + { + "epoch": 0.1560126582278481, + "grad_norm": 0.7620607614517212, + "learning_rate": 0.001420940739527144, + "loss": 1.7936, + "step": 1479 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.7344058156013489, + "learning_rate": 0.001420828505271909, + "loss": 1.7223, + "step": 1480 + }, + { + "epoch": 0.1562236286919831, + "grad_norm": 0.69898921251297, + "learning_rate": 0.001420716195846995, + "loss": 1.7759, + "step": 1481 + }, + { + "epoch": 0.15632911392405063, + "grad_norm": 0.8350276947021484, + "learning_rate": 0.0014206038112649865, + "loss": 1.7608, + "step": 1482 + }, + { + "epoch": 0.15643459915611815, + "grad_norm": 0.7527628540992737, + "learning_rate": 0.0014204913515384772, + "loss": 1.7604, + "step": 1483 + }, + { + "epoch": 0.15654008438818565, + "grad_norm": 0.723558783531189, + "learning_rate": 0.0014203788166800685, + "loss": 1.7805, + "step": 1484 + }, + { + "epoch": 0.15664556962025317, + "grad_norm": 0.8324093222618103, + "learning_rate": 0.0014202662067023708, + "loss": 1.7266, + "step": 1485 + }, + { + "epoch": 0.1567510548523207, + "grad_norm": 0.9037590026855469, + "learning_rate": 0.0014201535216180024, + "loss": 1.7999, + "step": 1486 + }, + { + "epoch": 0.15685654008438818, + "grad_norm": 0.6715801954269409, + "learning_rate": 0.0014200407614395898, + "loss": 1.7083, + "step": 1487 + }, + { + "epoch": 0.1569620253164557, + "grad_norm": 0.7303248047828674, + "learning_rate": 0.0014199279261797692, + "loss": 1.7806, + "step": 1488 + }, + { + "epoch": 0.15706751054852322, + "grad_norm": 0.782063364982605, + "learning_rate": 0.0014198150158511837, + "loss": 1.7448, + "step": 1489 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.6711951494216919, + "learning_rate": 0.0014197020304664856, + "loss": 1.7558, + "step": 1490 + }, + { + "epoch": 0.15727848101265823, + "grad_norm": 0.697072446346283, + "learning_rate": 0.0014195889700383357, + "loss": 1.7294, + "step": 1491 + }, + { + "epoch": 0.15738396624472573, + "grad_norm": 0.7112602591514587, + "learning_rate": 0.0014194758345794029, + "loss": 1.7664, + "step": 1492 + }, + { + "epoch": 0.15748945147679325, + "grad_norm": 0.6496112942695618, + "learning_rate": 0.0014193626241023644, + "loss": 1.7466, + "step": 1493 + }, + { + "epoch": 0.15759493670886077, + "grad_norm": 0.7491363883018494, + "learning_rate": 0.001419249338619906, + "loss": 1.7463, + "step": 1494 + }, + { + "epoch": 0.15770042194092826, + "grad_norm": 0.7176553606987, + "learning_rate": 0.0014191359781447223, + "loss": 1.7291, + "step": 1495 + }, + { + "epoch": 0.15780590717299578, + "grad_norm": 0.6533645987510681, + "learning_rate": 0.0014190225426895153, + "loss": 1.7982, + "step": 1496 + }, + { + "epoch": 0.1579113924050633, + "grad_norm": 0.7389034628868103, + "learning_rate": 0.0014189090322669967, + "loss": 1.7799, + "step": 1497 + }, + { + "epoch": 0.1580168776371308, + "grad_norm": 0.7734598517417908, + "learning_rate": 0.0014187954468898854, + "loss": 1.7484, + "step": 1498 + }, + { + "epoch": 0.1581223628691983, + "grad_norm": 0.766602635383606, + "learning_rate": 0.0014186817865709095, + "loss": 1.7915, + "step": 1499 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.6534448266029358, + "learning_rate": 0.0014185680513228048, + "loss": 1.7538, + "step": 1500 + }, + { + "epoch": 0.15833333333333333, + "grad_norm": 0.7695654630661011, + "learning_rate": 0.0014184542411583162, + "loss": 1.7555, + "step": 1501 + }, + { + "epoch": 0.15843881856540085, + "grad_norm": 0.6920500993728638, + "learning_rate": 0.001418340356090197, + "loss": 1.7419, + "step": 1502 + }, + { + "epoch": 0.15854430379746837, + "grad_norm": 0.6796661019325256, + "learning_rate": 0.0014182263961312078, + "loss": 1.7554, + "step": 1503 + }, + { + "epoch": 0.15864978902953586, + "grad_norm": 0.8960886001586914, + "learning_rate": 0.001418112361294119, + "loss": 1.7936, + "step": 1504 + }, + { + "epoch": 0.15875527426160338, + "grad_norm": 0.8714098334312439, + "learning_rate": 0.0014179982515917088, + "loss": 1.7452, + "step": 1505 + }, + { + "epoch": 0.15886075949367087, + "grad_norm": 0.6603907942771912, + "learning_rate": 0.0014178840670367634, + "loss": 1.7344, + "step": 1506 + }, + { + "epoch": 0.1589662447257384, + "grad_norm": 0.9513084888458252, + "learning_rate": 0.001417769807642078, + "loss": 1.7894, + "step": 1507 + }, + { + "epoch": 0.1590717299578059, + "grad_norm": 1.1352601051330566, + "learning_rate": 0.0014176554734204557, + "loss": 1.7382, + "step": 1508 + }, + { + "epoch": 0.1591772151898734, + "grad_norm": 0.6830622553825378, + "learning_rate": 0.0014175410643847085, + "loss": 1.751, + "step": 1509 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.9146121144294739, + "learning_rate": 0.0014174265805476564, + "loss": 1.7555, + "step": 1510 + }, + { + "epoch": 0.15938818565400845, + "grad_norm": 0.9542486071586609, + "learning_rate": 0.001417312021922128, + "loss": 1.7437, + "step": 1511 + }, + { + "epoch": 0.15949367088607594, + "grad_norm": 0.7421692609786987, + "learning_rate": 0.0014171973885209596, + "loss": 1.7531, + "step": 1512 + }, + { + "epoch": 0.15959915611814346, + "grad_norm": 0.8244821429252625, + "learning_rate": 0.0014170826803569971, + "loss": 1.7748, + "step": 1513 + }, + { + "epoch": 0.15970464135021098, + "grad_norm": 1.066766381263733, + "learning_rate": 0.0014169678974430941, + "loss": 1.7482, + "step": 1514 + }, + { + "epoch": 0.15981012658227847, + "grad_norm": 0.8177826404571533, + "learning_rate": 0.0014168530397921121, + "loss": 1.7678, + "step": 1515 + }, + { + "epoch": 0.159915611814346, + "grad_norm": 0.6989778280258179, + "learning_rate": 0.0014167381074169218, + "loss": 1.7538, + "step": 1516 + }, + { + "epoch": 0.1600210970464135, + "grad_norm": 0.8144663572311401, + "learning_rate": 0.0014166231003304019, + "loss": 1.7528, + "step": 1517 + }, + { + "epoch": 0.160126582278481, + "grad_norm": 0.8809829354286194, + "learning_rate": 0.0014165080185454396, + "loss": 1.7522, + "step": 1518 + }, + { + "epoch": 0.16023206751054853, + "grad_norm": 0.6922276020050049, + "learning_rate": 0.0014163928620749301, + "loss": 1.7603, + "step": 1519 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.7601610422134399, + "learning_rate": 0.0014162776309317778, + "loss": 1.7496, + "step": 1520 + }, + { + "epoch": 0.16044303797468354, + "grad_norm": 0.9923710823059082, + "learning_rate": 0.0014161623251288944, + "loss": 1.7612, + "step": 1521 + }, + { + "epoch": 0.16054852320675106, + "grad_norm": 0.7279878258705139, + "learning_rate": 0.001416046944679201, + "loss": 1.7294, + "step": 1522 + }, + { + "epoch": 0.16065400843881855, + "grad_norm": 0.838126003742218, + "learning_rate": 0.0014159314895956258, + "loss": 1.7194, + "step": 1523 + }, + { + "epoch": 0.16075949367088607, + "grad_norm": 0.9998916983604431, + "learning_rate": 0.0014158159598911067, + "loss": 1.7406, + "step": 1524 + }, + { + "epoch": 0.1608649789029536, + "grad_norm": 0.7215443849563599, + "learning_rate": 0.0014157003555785893, + "loss": 1.7575, + "step": 1525 + }, + { + "epoch": 0.16097046413502109, + "grad_norm": 0.6509567499160767, + "learning_rate": 0.0014155846766710277, + "loss": 1.6759, + "step": 1526 + }, + { + "epoch": 0.1610759493670886, + "grad_norm": 0.6678096055984497, + "learning_rate": 0.0014154689231813838, + "loss": 1.7834, + "step": 1527 + }, + { + "epoch": 0.16118143459915613, + "grad_norm": 0.7462943196296692, + "learning_rate": 0.001415353095122629, + "loss": 1.7117, + "step": 1528 + }, + { + "epoch": 0.16128691983122362, + "grad_norm": 0.8078972697257996, + "learning_rate": 0.0014152371925077423, + "loss": 1.7807, + "step": 1529 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.6949778199195862, + "learning_rate": 0.0014151212153497108, + "loss": 1.7214, + "step": 1530 + }, + { + "epoch": 0.16149789029535866, + "grad_norm": 0.8025431036949158, + "learning_rate": 0.0014150051636615305, + "loss": 1.7061, + "step": 1531 + }, + { + "epoch": 0.16160337552742615, + "grad_norm": 1.2264758348464966, + "learning_rate": 0.0014148890374562056, + "loss": 1.7147, + "step": 1532 + }, + { + "epoch": 0.16170886075949367, + "grad_norm": 0.7884214520454407, + "learning_rate": 0.0014147728367467486, + "loss": 1.7495, + "step": 1533 + }, + { + "epoch": 0.1618143459915612, + "grad_norm": 0.7150706648826599, + "learning_rate": 0.0014146565615461805, + "loss": 1.7394, + "step": 1534 + }, + { + "epoch": 0.16191983122362869, + "grad_norm": 0.7238087058067322, + "learning_rate": 0.0014145402118675302, + "loss": 1.7664, + "step": 1535 + }, + { + "epoch": 0.1620253164556962, + "grad_norm": 0.6513721942901611, + "learning_rate": 0.0014144237877238355, + "loss": 1.6947, + "step": 1536 + }, + { + "epoch": 0.16213080168776373, + "grad_norm": 0.6514416337013245, + "learning_rate": 0.0014143072891281425, + "loss": 1.7067, + "step": 1537 + }, + { + "epoch": 0.16223628691983122, + "grad_norm": 0.6931917667388916, + "learning_rate": 0.001414190716093505, + "loss": 1.7317, + "step": 1538 + }, + { + "epoch": 0.16234177215189874, + "grad_norm": 0.7279885411262512, + "learning_rate": 0.001414074068632986, + "loss": 1.732, + "step": 1539 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6257980465888977, + "learning_rate": 0.0014139573467596561, + "loss": 1.7495, + "step": 1540 + }, + { + "epoch": 0.16255274261603375, + "grad_norm": 0.6991544961929321, + "learning_rate": 0.0014138405504865949, + "loss": 1.7363, + "step": 1541 + }, + { + "epoch": 0.16265822784810127, + "grad_norm": 0.6867298483848572, + "learning_rate": 0.0014137236798268896, + "loss": 1.6906, + "step": 1542 + }, + { + "epoch": 0.16276371308016876, + "grad_norm": 0.6968268156051636, + "learning_rate": 0.0014136067347936363, + "loss": 1.727, + "step": 1543 + }, + { + "epoch": 0.16286919831223629, + "grad_norm": 0.6810078024864197, + "learning_rate": 0.0014134897153999394, + "loss": 1.741, + "step": 1544 + }, + { + "epoch": 0.1629746835443038, + "grad_norm": 0.704839289188385, + "learning_rate": 0.0014133726216589114, + "loss": 1.7031, + "step": 1545 + }, + { + "epoch": 0.1630801687763713, + "grad_norm": 0.6864981651306152, + "learning_rate": 0.0014132554535836732, + "loss": 1.7035, + "step": 1546 + }, + { + "epoch": 0.16318565400843882, + "grad_norm": 0.678835391998291, + "learning_rate": 0.0014131382111873543, + "loss": 1.7321, + "step": 1547 + }, + { + "epoch": 0.16329113924050634, + "grad_norm": 0.7642484307289124, + "learning_rate": 0.0014130208944830923, + "loss": 1.7648, + "step": 1548 + }, + { + "epoch": 0.16339662447257383, + "grad_norm": 0.6760846376419067, + "learning_rate": 0.0014129035034840325, + "loss": 1.7637, + "step": 1549 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.7757239937782288, + "learning_rate": 0.00141278603820333, + "loss": 1.7244, + "step": 1550 + }, + { + "epoch": 0.16360759493670887, + "grad_norm": 0.6993934512138367, + "learning_rate": 0.0014126684986541468, + "loss": 1.7401, + "step": 1551 + }, + { + "epoch": 0.16371308016877636, + "grad_norm": 0.895210325717926, + "learning_rate": 0.0014125508848496539, + "loss": 1.7453, + "step": 1552 + }, + { + "epoch": 0.16381856540084389, + "grad_norm": 1.089657187461853, + "learning_rate": 0.0014124331968030307, + "loss": 1.7947, + "step": 1553 + }, + { + "epoch": 0.1639240506329114, + "grad_norm": 0.7330487370491028, + "learning_rate": 0.0014123154345274645, + "loss": 1.7076, + "step": 1554 + }, + { + "epoch": 0.1640295358649789, + "grad_norm": 0.7441146969795227, + "learning_rate": 0.0014121975980361512, + "loss": 1.7203, + "step": 1555 + }, + { + "epoch": 0.16413502109704642, + "grad_norm": 0.8837770819664001, + "learning_rate": 0.0014120796873422952, + "loss": 1.7375, + "step": 1556 + }, + { + "epoch": 0.1642405063291139, + "grad_norm": 0.7043883800506592, + "learning_rate": 0.0014119617024591089, + "loss": 1.7704, + "step": 1557 + }, + { + "epoch": 0.16434599156118143, + "grad_norm": 0.7625821232795715, + "learning_rate": 0.0014118436433998127, + "loss": 1.7081, + "step": 1558 + }, + { + "epoch": 0.16445147679324895, + "grad_norm": 1.147924780845642, + "learning_rate": 0.0014117255101776362, + "loss": 1.6988, + "step": 1559 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.8512340188026428, + "learning_rate": 0.0014116073028058165, + "loss": 1.7287, + "step": 1560 + }, + { + "epoch": 0.16466244725738396, + "grad_norm": 0.7688288688659668, + "learning_rate": 0.0014114890212975997, + "loss": 1.6837, + "step": 1561 + }, + { + "epoch": 0.16476793248945149, + "grad_norm": 1.1420832872390747, + "learning_rate": 0.0014113706656662393, + "loss": 1.7714, + "step": 1562 + }, + { + "epoch": 0.16487341772151898, + "grad_norm": 0.7125862240791321, + "learning_rate": 0.001411252235924998, + "loss": 1.7151, + "step": 1563 + }, + { + "epoch": 0.1649789029535865, + "grad_norm": 1.082254409790039, + "learning_rate": 0.0014111337320871463, + "loss": 1.7273, + "step": 1564 + }, + { + "epoch": 0.16508438818565402, + "grad_norm": 0.969023585319519, + "learning_rate": 0.0014110151541659633, + "loss": 1.6878, + "step": 1565 + }, + { + "epoch": 0.1651898734177215, + "grad_norm": 0.7214703559875488, + "learning_rate": 0.0014108965021747363, + "loss": 1.7409, + "step": 1566 + }, + { + "epoch": 0.16529535864978903, + "grad_norm": 1.1360691785812378, + "learning_rate": 0.0014107777761267605, + "loss": 1.7385, + "step": 1567 + }, + { + "epoch": 0.16540084388185655, + "grad_norm": 0.8894435167312622, + "learning_rate": 0.00141065897603534, + "loss": 1.7684, + "step": 1568 + }, + { + "epoch": 0.16550632911392404, + "grad_norm": 0.8851601481437683, + "learning_rate": 0.001410540101913787, + "loss": 1.7295, + "step": 1569 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.7784064412117004, + "learning_rate": 0.0014104211537754217, + "loss": 1.7334, + "step": 1570 + }, + { + "epoch": 0.16571729957805909, + "grad_norm": 0.7855173945426941, + "learning_rate": 0.001410302131633573, + "loss": 1.7074, + "step": 1571 + }, + { + "epoch": 0.16582278481012658, + "grad_norm": 0.7457441091537476, + "learning_rate": 0.0014101830355015778, + "loss": 1.7603, + "step": 1572 + }, + { + "epoch": 0.1659282700421941, + "grad_norm": 0.756546676158905, + "learning_rate": 0.0014100638653927816, + "loss": 1.7488, + "step": 1573 + }, + { + "epoch": 0.1660337552742616, + "grad_norm": 0.8971083164215088, + "learning_rate": 0.0014099446213205378, + "loss": 1.7182, + "step": 1574 + }, + { + "epoch": 0.1661392405063291, + "grad_norm": 0.8132305145263672, + "learning_rate": 0.0014098253032982086, + "loss": 1.7156, + "step": 1575 + }, + { + "epoch": 0.16624472573839663, + "grad_norm": 0.7129759192466736, + "learning_rate": 0.0014097059113391639, + "loss": 1.7608, + "step": 1576 + }, + { + "epoch": 0.16635021097046412, + "grad_norm": 0.8287227749824524, + "learning_rate": 0.0014095864454567821, + "loss": 1.7459, + "step": 1577 + }, + { + "epoch": 0.16645569620253164, + "grad_norm": 0.780190646648407, + "learning_rate": 0.0014094669056644502, + "loss": 1.717, + "step": 1578 + }, + { + "epoch": 0.16656118143459916, + "grad_norm": 0.7331069111824036, + "learning_rate": 0.001409347291975563, + "loss": 1.7772, + "step": 1579 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.6746746897697449, + "learning_rate": 0.001409227604403524, + "loss": 1.7382, + "step": 1580 + }, + { + "epoch": 0.16677215189873418, + "grad_norm": 0.7287484407424927, + "learning_rate": 0.0014091078429617448, + "loss": 1.7481, + "step": 1581 + }, + { + "epoch": 0.1668776371308017, + "grad_norm": 0.7540724873542786, + "learning_rate": 0.0014089880076636452, + "loss": 1.7395, + "step": 1582 + }, + { + "epoch": 0.1669831223628692, + "grad_norm": 0.6463468670845032, + "learning_rate": 0.0014088680985226533, + "loss": 1.711, + "step": 1583 + }, + { + "epoch": 0.1670886075949367, + "grad_norm": 0.7291228771209717, + "learning_rate": 0.0014087481155522056, + "loss": 1.7134, + "step": 1584 + }, + { + "epoch": 0.16719409282700423, + "grad_norm": 0.8149453401565552, + "learning_rate": 0.0014086280587657467, + "loss": 1.7711, + "step": 1585 + }, + { + "epoch": 0.16729957805907172, + "grad_norm": 0.7526465654373169, + "learning_rate": 0.0014085079281767295, + "loss": 1.7495, + "step": 1586 + }, + { + "epoch": 0.16740506329113924, + "grad_norm": 0.8416154384613037, + "learning_rate": 0.0014083877237986153, + "loss": 1.7029, + "step": 1587 + }, + { + "epoch": 0.16751054852320676, + "grad_norm": 0.7129558324813843, + "learning_rate": 0.0014082674456448738, + "loss": 1.7214, + "step": 1588 + }, + { + "epoch": 0.16761603375527426, + "grad_norm": 0.728987455368042, + "learning_rate": 0.0014081470937289827, + "loss": 1.7405, + "step": 1589 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 1.027596116065979, + "learning_rate": 0.0014080266680644277, + "loss": 1.7102, + "step": 1590 + }, + { + "epoch": 0.16782700421940927, + "grad_norm": 0.8770405054092407, + "learning_rate": 0.0014079061686647033, + "loss": 1.7343, + "step": 1591 + }, + { + "epoch": 0.1679324894514768, + "grad_norm": 0.6579709649085999, + "learning_rate": 0.0014077855955433123, + "loss": 1.7051, + "step": 1592 + }, + { + "epoch": 0.1680379746835443, + "grad_norm": 0.7748857140541077, + "learning_rate": 0.001407664948713765, + "loss": 1.7379, + "step": 1593 + }, + { + "epoch": 0.1681434599156118, + "grad_norm": 0.7721389532089233, + "learning_rate": 0.001407544228189581, + "loss": 1.7233, + "step": 1594 + }, + { + "epoch": 0.16824894514767932, + "grad_norm": 0.640519380569458, + "learning_rate": 0.0014074234339842874, + "loss": 1.769, + "step": 1595 + }, + { + "epoch": 0.16835443037974684, + "grad_norm": 0.7201564311981201, + "learning_rate": 0.00140730256611142, + "loss": 1.7494, + "step": 1596 + }, + { + "epoch": 0.16845991561181434, + "grad_norm": 0.6429464817047119, + "learning_rate": 0.001407181624584522, + "loss": 1.7272, + "step": 1597 + }, + { + "epoch": 0.16856540084388186, + "grad_norm": 0.8682605624198914, + "learning_rate": 0.0014070606094171464, + "loss": 1.7374, + "step": 1598 + }, + { + "epoch": 0.16867088607594938, + "grad_norm": 1.0000993013381958, + "learning_rate": 0.0014069395206228528, + "loss": 1.7336, + "step": 1599 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.8999611139297485, + "learning_rate": 0.0014068183582152103, + "loss": 1.7633, + "step": 1600 + }, + { + "epoch": 0.1688818565400844, + "grad_norm": 0.7559765577316284, + "learning_rate": 0.0014066971222077955, + "loss": 1.7035, + "step": 1601 + }, + { + "epoch": 0.1689873417721519, + "grad_norm": 0.9178354740142822, + "learning_rate": 0.0014065758126141938, + "loss": 1.7645, + "step": 1602 + }, + { + "epoch": 0.1690928270042194, + "grad_norm": 1.0445647239685059, + "learning_rate": 0.0014064544294479981, + "loss": 1.7549, + "step": 1603 + }, + { + "epoch": 0.16919831223628692, + "grad_norm": 0.7955030798912048, + "learning_rate": 0.0014063329727228102, + "loss": 1.7199, + "step": 1604 + }, + { + "epoch": 0.16930379746835442, + "grad_norm": 0.7314643263816833, + "learning_rate": 0.0014062114424522397, + "loss": 1.7069, + "step": 1605 + }, + { + "epoch": 0.16940928270042194, + "grad_norm": 0.9773171544075012, + "learning_rate": 0.0014060898386499053, + "loss": 1.7598, + "step": 1606 + }, + { + "epoch": 0.16951476793248946, + "grad_norm": 0.9049487709999084, + "learning_rate": 0.0014059681613294327, + "loss": 1.7637, + "step": 1607 + }, + { + "epoch": 0.16962025316455695, + "grad_norm": 0.7583763599395752, + "learning_rate": 0.0014058464105044567, + "loss": 1.7119, + "step": 1608 + }, + { + "epoch": 0.16972573839662447, + "grad_norm": 0.7015200853347778, + "learning_rate": 0.0014057245861886201, + "loss": 1.7258, + "step": 1609 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.734734296798706, + "learning_rate": 0.001405602688395574, + "loss": 1.7504, + "step": 1610 + }, + { + "epoch": 0.16993670886075948, + "grad_norm": 0.7667622566223145, + "learning_rate": 0.0014054807171389773, + "loss": 1.6744, + "step": 1611 + }, + { + "epoch": 0.170042194092827, + "grad_norm": 1.0997323989868164, + "learning_rate": 0.001405358672432498, + "loss": 1.6709, + "step": 1612 + }, + { + "epoch": 0.17014767932489452, + "grad_norm": 1.0382399559020996, + "learning_rate": 0.0014052365542898111, + "loss": 1.7072, + "step": 1613 + }, + { + "epoch": 0.17025316455696202, + "grad_norm": 0.7218450307846069, + "learning_rate": 0.0014051143627246015, + "loss": 1.7294, + "step": 1614 + }, + { + "epoch": 0.17035864978902954, + "grad_norm": 0.7050555944442749, + "learning_rate": 0.0014049920977505608, + "loss": 1.7072, + "step": 1615 + }, + { + "epoch": 0.17046413502109706, + "grad_norm": 0.7651844024658203, + "learning_rate": 0.0014048697593813891, + "loss": 1.7273, + "step": 1616 + }, + { + "epoch": 0.17056962025316455, + "grad_norm": 0.6873340606689453, + "learning_rate": 0.0014047473476307955, + "loss": 1.708, + "step": 1617 + }, + { + "epoch": 0.17067510548523207, + "grad_norm": 0.6738780736923218, + "learning_rate": 0.001404624862512497, + "loss": 1.6988, + "step": 1618 + }, + { + "epoch": 0.1707805907172996, + "grad_norm": 0.6922058463096619, + "learning_rate": 0.001404502304040218, + "loss": 1.7181, + "step": 1619 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6398324966430664, + "learning_rate": 0.0014043796722276924, + "loss": 1.7216, + "step": 1620 + }, + { + "epoch": 0.1709915611814346, + "grad_norm": 0.6724056601524353, + "learning_rate": 0.0014042569670886615, + "loss": 1.7122, + "step": 1621 + }, + { + "epoch": 0.1710970464135021, + "grad_norm": 0.6527624726295471, + "learning_rate": 0.0014041341886368752, + "loss": 1.7691, + "step": 1622 + }, + { + "epoch": 0.17120253164556962, + "grad_norm": 0.7249466776847839, + "learning_rate": 0.0014040113368860908, + "loss": 1.7268, + "step": 1623 + }, + { + "epoch": 0.17130801687763714, + "grad_norm": 0.9305387139320374, + "learning_rate": 0.0014038884118500754, + "loss": 1.7002, + "step": 1624 + }, + { + "epoch": 0.17141350210970463, + "grad_norm": 0.9855088591575623, + "learning_rate": 0.0014037654135426025, + "loss": 1.7581, + "step": 1625 + }, + { + "epoch": 0.17151898734177215, + "grad_norm": 0.6376675963401794, + "learning_rate": 0.0014036423419774551, + "loss": 1.6803, + "step": 1626 + }, + { + "epoch": 0.17162447257383967, + "grad_norm": 0.7154656052589417, + "learning_rate": 0.0014035191971684242, + "loss": 1.7049, + "step": 1627 + }, + { + "epoch": 0.17172995780590716, + "grad_norm": 0.7580732107162476, + "learning_rate": 0.0014033959791293082, + "loss": 1.7049, + "step": 1628 + }, + { + "epoch": 0.17183544303797468, + "grad_norm": 0.8652842044830322, + "learning_rate": 0.0014032726878739148, + "loss": 1.7342, + "step": 1629 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.7950310707092285, + "learning_rate": 0.0014031493234160591, + "loss": 1.6897, + "step": 1630 + }, + { + "epoch": 0.1720464135021097, + "grad_norm": 0.7327529788017273, + "learning_rate": 0.001403025885769565, + "loss": 1.734, + "step": 1631 + }, + { + "epoch": 0.17215189873417722, + "grad_norm": 0.7051717638969421, + "learning_rate": 0.001402902374948264, + "loss": 1.7315, + "step": 1632 + }, + { + "epoch": 0.17225738396624474, + "grad_norm": 0.7136391997337341, + "learning_rate": 0.0014027787909659962, + "loss": 1.7248, + "step": 1633 + }, + { + "epoch": 0.17236286919831223, + "grad_norm": 0.6212252378463745, + "learning_rate": 0.0014026551338366098, + "loss": 1.7294, + "step": 1634 + }, + { + "epoch": 0.17246835443037975, + "grad_norm": 0.7478551268577576, + "learning_rate": 0.0014025314035739614, + "loss": 1.6945, + "step": 1635 + }, + { + "epoch": 0.17257383966244727, + "grad_norm": 0.7400761246681213, + "learning_rate": 0.001402407600191915, + "loss": 1.7119, + "step": 1636 + }, + { + "epoch": 0.17267932489451476, + "grad_norm": 0.6851034760475159, + "learning_rate": 0.0014022837237043441, + "loss": 1.7254, + "step": 1637 + }, + { + "epoch": 0.17278481012658228, + "grad_norm": 0.6348342299461365, + "learning_rate": 0.0014021597741251295, + "loss": 1.725, + "step": 1638 + }, + { + "epoch": 0.17289029535864978, + "grad_norm": 0.6864407062530518, + "learning_rate": 0.00140203575146816, + "loss": 1.718, + "step": 1639 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.6530349850654602, + "learning_rate": 0.0014019116557473332, + "loss": 1.683, + "step": 1640 + }, + { + "epoch": 0.17310126582278482, + "grad_norm": 0.6916310787200928, + "learning_rate": 0.0014017874869765548, + "loss": 1.6973, + "step": 1641 + }, + { + "epoch": 0.1732067510548523, + "grad_norm": 0.653239905834198, + "learning_rate": 0.0014016632451697383, + "loss": 1.6816, + "step": 1642 + }, + { + "epoch": 0.17331223628691983, + "grad_norm": 0.8363341093063354, + "learning_rate": 0.0014015389303408058, + "loss": 1.699, + "step": 1643 + }, + { + "epoch": 0.17341772151898735, + "grad_norm": 0.7652314305305481, + "learning_rate": 0.001401414542503687, + "loss": 1.7062, + "step": 1644 + }, + { + "epoch": 0.17352320675105484, + "grad_norm": 0.7015217542648315, + "learning_rate": 0.001401290081672321, + "loss": 1.7221, + "step": 1645 + }, + { + "epoch": 0.17362869198312236, + "grad_norm": 0.7171454429626465, + "learning_rate": 0.0014011655478606531, + "loss": 1.7144, + "step": 1646 + }, + { + "epoch": 0.17373417721518988, + "grad_norm": 0.7435928583145142, + "learning_rate": 0.001401040941082639, + "loss": 1.7185, + "step": 1647 + }, + { + "epoch": 0.17383966244725738, + "grad_norm": 0.8406805992126465, + "learning_rate": 0.001400916261352241, + "loss": 1.6969, + "step": 1648 + }, + { + "epoch": 0.1739451476793249, + "grad_norm": 0.7176163196563721, + "learning_rate": 0.00140079150868343, + "loss": 1.7322, + "step": 1649 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.6617015600204468, + "learning_rate": 0.0014006666830901854, + "loss": 1.7163, + "step": 1650 + }, + { + "epoch": 0.1741561181434599, + "grad_norm": 0.8235006332397461, + "learning_rate": 0.0014005417845864945, + "loss": 1.7287, + "step": 1651 + }, + { + "epoch": 0.17426160337552743, + "grad_norm": 0.6268213391304016, + "learning_rate": 0.0014004168131863525, + "loss": 1.6853, + "step": 1652 + }, + { + "epoch": 0.17436708860759495, + "grad_norm": 0.8124989867210388, + "learning_rate": 0.0014002917689037637, + "loss": 1.6825, + "step": 1653 + }, + { + "epoch": 0.17447257383966244, + "grad_norm": 0.890523374080658, + "learning_rate": 0.0014001666517527392, + "loss": 1.7345, + "step": 1654 + }, + { + "epoch": 0.17457805907172996, + "grad_norm": 0.8036944270133972, + "learning_rate": 0.0014000414617472996, + "loss": 1.7181, + "step": 1655 + }, + { + "epoch": 0.17468354430379746, + "grad_norm": 0.9182102680206299, + "learning_rate": 0.0013999161989014725, + "loss": 1.7509, + "step": 1656 + }, + { + "epoch": 0.17478902953586498, + "grad_norm": 0.941299557685852, + "learning_rate": 0.0013997908632292948, + "loss": 1.6954, + "step": 1657 + }, + { + "epoch": 0.1748945147679325, + "grad_norm": 0.7413016557693481, + "learning_rate": 0.0013996654547448106, + "loss": 1.6987, + "step": 1658 + }, + { + "epoch": 0.175, + "grad_norm": 0.721928060054779, + "learning_rate": 0.0013995399734620729, + "loss": 1.7128, + "step": 1659 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.7272056341171265, + "learning_rate": 0.001399414419395142, + "loss": 1.7002, + "step": 1660 + }, + { + "epoch": 0.17521097046413503, + "grad_norm": 0.6871423721313477, + "learning_rate": 0.0013992887925580874, + "loss": 1.7574, + "step": 1661 + }, + { + "epoch": 0.17531645569620252, + "grad_norm": 0.7444457411766052, + "learning_rate": 0.0013991630929649857, + "loss": 1.709, + "step": 1662 + }, + { + "epoch": 0.17542194092827004, + "grad_norm": 0.9920992851257324, + "learning_rate": 0.0013990373206299225, + "loss": 1.7087, + "step": 1663 + }, + { + "epoch": 0.17552742616033756, + "grad_norm": 0.9573445916175842, + "learning_rate": 0.0013989114755669912, + "loss": 1.7078, + "step": 1664 + }, + { + "epoch": 0.17563291139240506, + "grad_norm": 0.7558340430259705, + "learning_rate": 0.001398785557790293, + "loss": 1.7208, + "step": 1665 + }, + { + "epoch": 0.17573839662447258, + "grad_norm": 0.8277615904808044, + "learning_rate": 0.0013986595673139382, + "loss": 1.7468, + "step": 1666 + }, + { + "epoch": 0.1758438818565401, + "grad_norm": 0.9961391687393188, + "learning_rate": 0.0013985335041520443, + "loss": 1.7314, + "step": 1667 + }, + { + "epoch": 0.1759493670886076, + "grad_norm": 0.8598665595054626, + "learning_rate": 0.0013984073683187374, + "loss": 1.7352, + "step": 1668 + }, + { + "epoch": 0.1760548523206751, + "grad_norm": 0.6536204814910889, + "learning_rate": 0.0013982811598281517, + "loss": 1.6949, + "step": 1669 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.8012102842330933, + "learning_rate": 0.0013981548786944293, + "loss": 1.6687, + "step": 1670 + }, + { + "epoch": 0.17626582278481012, + "grad_norm": 0.873194694519043, + "learning_rate": 0.0013980285249317209, + "loss": 1.672, + "step": 1671 + }, + { + "epoch": 0.17637130801687764, + "grad_norm": 0.7318752408027649, + "learning_rate": 0.0013979020985541847, + "loss": 1.7076, + "step": 1672 + }, + { + "epoch": 0.17647679324894514, + "grad_norm": 0.6977208256721497, + "learning_rate": 0.0013977755995759876, + "loss": 1.7268, + "step": 1673 + }, + { + "epoch": 0.17658227848101266, + "grad_norm": 1.0352181196212769, + "learning_rate": 0.0013976490280113048, + "loss": 1.7197, + "step": 1674 + }, + { + "epoch": 0.17668776371308018, + "grad_norm": 1.2723833322525024, + "learning_rate": 0.0013975223838743188, + "loss": 1.7154, + "step": 1675 + }, + { + "epoch": 0.17679324894514767, + "grad_norm": 0.7293285131454468, + "learning_rate": 0.0013973956671792206, + "loss": 1.7227, + "step": 1676 + }, + { + "epoch": 0.1768987341772152, + "grad_norm": 1.4712457656860352, + "learning_rate": 0.00139726887794021, + "loss": 1.6839, + "step": 1677 + }, + { + "epoch": 0.1770042194092827, + "grad_norm": 0.8868967890739441, + "learning_rate": 0.001397142016171494, + "loss": 1.7476, + "step": 1678 + }, + { + "epoch": 0.1771097046413502, + "grad_norm": 1.14591383934021, + "learning_rate": 0.0013970150818872881, + "loss": 1.6893, + "step": 1679 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.5050595998764038, + "learning_rate": 0.0013968880751018158, + "loss": 1.7323, + "step": 1680 + }, + { + "epoch": 0.17732067510548524, + "grad_norm": 0.9538464546203613, + "learning_rate": 0.0013967609958293091, + "loss": 1.7101, + "step": 1681 + }, + { + "epoch": 0.17742616033755274, + "grad_norm": 2.0436813831329346, + "learning_rate": 0.001396633844084008, + "loss": 1.7207, + "step": 1682 + }, + { + "epoch": 0.17753164556962026, + "grad_norm": 1.3827439546585083, + "learning_rate": 0.00139650661988016, + "loss": 1.7602, + "step": 1683 + }, + { + "epoch": 0.17763713080168778, + "grad_norm": 1.5124088525772095, + "learning_rate": 0.0013963793232320216, + "loss": 1.7523, + "step": 1684 + }, + { + "epoch": 0.17774261603375527, + "grad_norm": 1.479117751121521, + "learning_rate": 0.0013962519541538569, + "loss": 1.684, + "step": 1685 + }, + { + "epoch": 0.1778481012658228, + "grad_norm": 1.2532753944396973, + "learning_rate": 0.001396124512659938, + "loss": 1.6816, + "step": 1686 + }, + { + "epoch": 0.17795358649789028, + "grad_norm": 0.9880537986755371, + "learning_rate": 0.001395996998764546, + "loss": 1.7275, + "step": 1687 + }, + { + "epoch": 0.1780590717299578, + "grad_norm": 1.284392237663269, + "learning_rate": 0.0013958694124819688, + "loss": 1.6568, + "step": 1688 + }, + { + "epoch": 0.17816455696202532, + "grad_norm": 0.9685285091400146, + "learning_rate": 0.0013957417538265032, + "loss": 1.7231, + "step": 1689 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 1.7471750974655151, + "learning_rate": 0.0013956140228124545, + "loss": 1.7106, + "step": 1690 + }, + { + "epoch": 0.17837552742616034, + "grad_norm": 1.2315418720245361, + "learning_rate": 0.001395486219454135, + "loss": 1.7608, + "step": 1691 + }, + { + "epoch": 0.17848101265822786, + "grad_norm": 1.2554739713668823, + "learning_rate": 0.0013953583437658658, + "loss": 1.7231, + "step": 1692 + }, + { + "epoch": 0.17858649789029535, + "grad_norm": 1.2685275077819824, + "learning_rate": 0.0013952303957619763, + "loss": 1.6927, + "step": 1693 + }, + { + "epoch": 0.17869198312236287, + "grad_norm": 0.9122018814086914, + "learning_rate": 0.0013951023754568035, + "loss": 1.6999, + "step": 1694 + }, + { + "epoch": 0.1787974683544304, + "grad_norm": 1.1895605325698853, + "learning_rate": 0.001394974282864693, + "loss": 1.6859, + "step": 1695 + }, + { + "epoch": 0.17890295358649788, + "grad_norm": 0.8928968906402588, + "learning_rate": 0.0013948461179999977, + "loss": 1.6883, + "step": 1696 + }, + { + "epoch": 0.1790084388185654, + "grad_norm": 1.0569838285446167, + "learning_rate": 0.0013947178808770794, + "loss": 1.7004, + "step": 1697 + }, + { + "epoch": 0.17911392405063292, + "grad_norm": 0.9755202531814575, + "learning_rate": 0.0013945895715103077, + "loss": 1.7109, + "step": 1698 + }, + { + "epoch": 0.17921940928270041, + "grad_norm": 0.7519292831420898, + "learning_rate": 0.0013944611899140604, + "loss": 1.7506, + "step": 1699 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.9803712964057922, + "learning_rate": 0.0013943327361027231, + "loss": 1.7389, + "step": 1700 + }, + { + "epoch": 0.17943037974683546, + "grad_norm": 0.703090488910675, + "learning_rate": 0.0013942042100906899, + "loss": 1.6941, + "step": 1701 + }, + { + "epoch": 0.17953586497890295, + "grad_norm": 0.7735727429389954, + "learning_rate": 0.0013940756118923626, + "loss": 1.6941, + "step": 1702 + }, + { + "epoch": 0.17964135021097047, + "grad_norm": 0.6396759748458862, + "learning_rate": 0.0013939469415221513, + "loss": 1.6463, + "step": 1703 + }, + { + "epoch": 0.17974683544303796, + "grad_norm": 0.7368679642677307, + "learning_rate": 0.0013938181989944741, + "loss": 1.6854, + "step": 1704 + }, + { + "epoch": 0.17985232067510548, + "grad_norm": 0.6383791565895081, + "learning_rate": 0.0013936893843237573, + "loss": 1.6947, + "step": 1705 + }, + { + "epoch": 0.179957805907173, + "grad_norm": 0.7948450446128845, + "learning_rate": 0.0013935604975244356, + "loss": 1.7174, + "step": 1706 + }, + { + "epoch": 0.1800632911392405, + "grad_norm": 0.6171702742576599, + "learning_rate": 0.0013934315386109509, + "loss": 1.6684, + "step": 1707 + }, + { + "epoch": 0.18016877637130801, + "grad_norm": 0.7914339303970337, + "learning_rate": 0.0013933025075977539, + "loss": 1.6763, + "step": 1708 + }, + { + "epoch": 0.18027426160337554, + "grad_norm": 0.6577966213226318, + "learning_rate": 0.0013931734044993031, + "loss": 1.6748, + "step": 1709 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.8123793005943298, + "learning_rate": 0.0013930442293300649, + "loss": 1.7295, + "step": 1710 + }, + { + "epoch": 0.18048523206751055, + "grad_norm": 0.7951372861862183, + "learning_rate": 0.0013929149821045148, + "loss": 1.7148, + "step": 1711 + }, + { + "epoch": 0.18059071729957807, + "grad_norm": 0.6552478075027466, + "learning_rate": 0.0013927856628371347, + "loss": 1.7359, + "step": 1712 + }, + { + "epoch": 0.18069620253164556, + "grad_norm": 0.7494841814041138, + "learning_rate": 0.0013926562715424159, + "loss": 1.6726, + "step": 1713 + }, + { + "epoch": 0.18080168776371308, + "grad_norm": 0.6931064128875732, + "learning_rate": 0.0013925268082348576, + "loss": 1.6715, + "step": 1714 + }, + { + "epoch": 0.1809071729957806, + "grad_norm": 0.6691313982009888, + "learning_rate": 0.0013923972729289662, + "loss": 1.6622, + "step": 1715 + }, + { + "epoch": 0.1810126582278481, + "grad_norm": 0.776023268699646, + "learning_rate": 0.0013922676656392572, + "loss": 1.6858, + "step": 1716 + }, + { + "epoch": 0.18111814345991561, + "grad_norm": 0.7195562124252319, + "learning_rate": 0.0013921379863802536, + "loss": 1.6642, + "step": 1717 + }, + { + "epoch": 0.18122362869198314, + "grad_norm": 0.698626697063446, + "learning_rate": 0.0013920082351664867, + "loss": 1.6852, + "step": 1718 + }, + { + "epoch": 0.18132911392405063, + "grad_norm": 0.7129390835762024, + "learning_rate": 0.0013918784120124956, + "loss": 1.6948, + "step": 1719 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.7528694272041321, + "learning_rate": 0.0013917485169328279, + "loss": 1.7202, + "step": 1720 + }, + { + "epoch": 0.18154008438818564, + "grad_norm": 0.986432671546936, + "learning_rate": 0.0013916185499420386, + "loss": 1.698, + "step": 1721 + }, + { + "epoch": 0.18164556962025316, + "grad_norm": 0.7935528755187988, + "learning_rate": 0.0013914885110546916, + "loss": 1.6954, + "step": 1722 + }, + { + "epoch": 0.18175105485232068, + "grad_norm": 0.7465547323226929, + "learning_rate": 0.001391358400285358, + "loss": 1.6859, + "step": 1723 + }, + { + "epoch": 0.18185654008438817, + "grad_norm": 1.0887444019317627, + "learning_rate": 0.0013912282176486177, + "loss": 1.6849, + "step": 1724 + }, + { + "epoch": 0.1819620253164557, + "grad_norm": 0.7200260162353516, + "learning_rate": 0.0013910979631590581, + "loss": 1.6818, + "step": 1725 + }, + { + "epoch": 0.18206751054852321, + "grad_norm": 0.836004376411438, + "learning_rate": 0.001390967636831275, + "loss": 1.7237, + "step": 1726 + }, + { + "epoch": 0.1821729957805907, + "grad_norm": 0.7803277969360352, + "learning_rate": 0.0013908372386798717, + "loss": 1.6914, + "step": 1727 + }, + { + "epoch": 0.18227848101265823, + "grad_norm": 0.8146350979804993, + "learning_rate": 0.0013907067687194607, + "loss": 1.6872, + "step": 1728 + }, + { + "epoch": 0.18238396624472575, + "grad_norm": 0.7667115926742554, + "learning_rate": 0.0013905762269646614, + "loss": 1.7234, + "step": 1729 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.7415938377380371, + "learning_rate": 0.0013904456134301016, + "loss": 1.7229, + "step": 1730 + }, + { + "epoch": 0.18259493670886076, + "grad_norm": 0.7994092106819153, + "learning_rate": 0.001390314928130417, + "loss": 1.6839, + "step": 1731 + }, + { + "epoch": 0.18270042194092828, + "grad_norm": 0.7282410264015198, + "learning_rate": 0.0013901841710802522, + "loss": 1.6859, + "step": 1732 + }, + { + "epoch": 0.18280590717299577, + "grad_norm": 0.973089873790741, + "learning_rate": 0.0013900533422942585, + "loss": 1.685, + "step": 1733 + }, + { + "epoch": 0.1829113924050633, + "grad_norm": 0.6773604154586792, + "learning_rate": 0.0013899224417870963, + "loss": 1.704, + "step": 1734 + }, + { + "epoch": 0.18301687763713081, + "grad_norm": 0.7988470792770386, + "learning_rate": 0.0013897914695734336, + "loss": 1.6869, + "step": 1735 + }, + { + "epoch": 0.1831223628691983, + "grad_norm": 0.8645642995834351, + "learning_rate": 0.0013896604256679462, + "loss": 1.7263, + "step": 1736 + }, + { + "epoch": 0.18322784810126583, + "grad_norm": 0.761053204536438, + "learning_rate": 0.0013895293100853188, + "loss": 1.6845, + "step": 1737 + }, + { + "epoch": 0.18333333333333332, + "grad_norm": 0.6886294484138489, + "learning_rate": 0.001389398122840243, + "loss": 1.7042, + "step": 1738 + }, + { + "epoch": 0.18343881856540084, + "grad_norm": 0.7711166143417358, + "learning_rate": 0.0013892668639474194, + "loss": 1.7311, + "step": 1739 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.65291827917099, + "learning_rate": 0.0013891355334215562, + "loss": 1.7232, + "step": 1740 + }, + { + "epoch": 0.18364978902953585, + "grad_norm": 0.7976797819137573, + "learning_rate": 0.001389004131277369, + "loss": 1.7126, + "step": 1741 + }, + { + "epoch": 0.18375527426160337, + "grad_norm": 0.7469828128814697, + "learning_rate": 0.0013888726575295826, + "loss": 1.7087, + "step": 1742 + }, + { + "epoch": 0.1838607594936709, + "grad_norm": 0.7104125618934631, + "learning_rate": 0.0013887411121929294, + "loss": 1.7111, + "step": 1743 + }, + { + "epoch": 0.1839662447257384, + "grad_norm": 0.8906893134117126, + "learning_rate": 0.0013886094952821496, + "loss": 1.7208, + "step": 1744 + }, + { + "epoch": 0.1840717299578059, + "grad_norm": 0.7429161667823792, + "learning_rate": 0.0013884778068119913, + "loss": 1.7148, + "step": 1745 + }, + { + "epoch": 0.18417721518987343, + "grad_norm": 0.682334840297699, + "learning_rate": 0.0013883460467972108, + "loss": 1.729, + "step": 1746 + }, + { + "epoch": 0.18428270042194092, + "grad_norm": 0.6743359565734863, + "learning_rate": 0.0013882142152525732, + "loss": 1.7383, + "step": 1747 + }, + { + "epoch": 0.18438818565400844, + "grad_norm": 0.6865478754043579, + "learning_rate": 0.0013880823121928498, + "loss": 1.6927, + "step": 1748 + }, + { + "epoch": 0.18449367088607596, + "grad_norm": 0.6272658705711365, + "learning_rate": 0.0013879503376328219, + "loss": 1.7327, + "step": 1749 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.6995193958282471, + "learning_rate": 0.0013878182915872776, + "loss": 1.7317, + "step": 1750 + }, + { + "epoch": 0.18470464135021097, + "grad_norm": 0.7226294279098511, + "learning_rate": 0.001387686174071013, + "loss": 1.6946, + "step": 1751 + }, + { + "epoch": 0.1848101265822785, + "grad_norm": 0.6890009045600891, + "learning_rate": 0.001387553985098833, + "loss": 1.713, + "step": 1752 + }, + { + "epoch": 0.184915611814346, + "grad_norm": 0.744717538356781, + "learning_rate": 0.0013874217246855499, + "loss": 1.7164, + "step": 1753 + }, + { + "epoch": 0.1850210970464135, + "grad_norm": 0.6496021747589111, + "learning_rate": 0.001387289392845984, + "loss": 1.7048, + "step": 1754 + }, + { + "epoch": 0.185126582278481, + "grad_norm": 0.6639562249183655, + "learning_rate": 0.0013871569895949635, + "loss": 1.7088, + "step": 1755 + }, + { + "epoch": 0.18523206751054852, + "grad_norm": 0.6346448659896851, + "learning_rate": 0.0013870245149473256, + "loss": 1.6742, + "step": 1756 + }, + { + "epoch": 0.18533755274261604, + "grad_norm": 0.71006840467453, + "learning_rate": 0.0013868919689179143, + "loss": 1.6951, + "step": 1757 + }, + { + "epoch": 0.18544303797468353, + "grad_norm": 0.7527556419372559, + "learning_rate": 0.001386759351521582, + "loss": 1.717, + "step": 1758 + }, + { + "epoch": 0.18554852320675105, + "grad_norm": 0.73298180103302, + "learning_rate": 0.0013866266627731892, + "loss": 1.7115, + "step": 1759 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.6870157122612, + "learning_rate": 0.001386493902687604, + "loss": 1.6503, + "step": 1760 + }, + { + "epoch": 0.18575949367088607, + "grad_norm": 0.7702459096908569, + "learning_rate": 0.0013863610712797035, + "loss": 1.7068, + "step": 1761 + }, + { + "epoch": 0.1858649789029536, + "grad_norm": 0.7881658673286438, + "learning_rate": 0.0013862281685643716, + "loss": 1.7207, + "step": 1762 + }, + { + "epoch": 0.1859704641350211, + "grad_norm": 0.7210410833358765, + "learning_rate": 0.001386095194556501, + "loss": 1.6945, + "step": 1763 + }, + { + "epoch": 0.1860759493670886, + "grad_norm": 0.6694091558456421, + "learning_rate": 0.001385962149270992, + "loss": 1.6816, + "step": 1764 + }, + { + "epoch": 0.18618143459915612, + "grad_norm": 0.721910834312439, + "learning_rate": 0.001385829032722753, + "loss": 1.6991, + "step": 1765 + }, + { + "epoch": 0.18628691983122364, + "grad_norm": 0.6925278902053833, + "learning_rate": 0.0013856958449267002, + "loss": 1.7072, + "step": 1766 + }, + { + "epoch": 0.18639240506329113, + "grad_norm": 0.8329174518585205, + "learning_rate": 0.0013855625858977584, + "loss": 1.7235, + "step": 1767 + }, + { + "epoch": 0.18649789029535865, + "grad_norm": 0.8352997899055481, + "learning_rate": 0.0013854292556508593, + "loss": 1.6763, + "step": 1768 + }, + { + "epoch": 0.18660337552742617, + "grad_norm": 0.7257702350616455, + "learning_rate": 0.0013852958542009438, + "loss": 1.7059, + "step": 1769 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.6556980013847351, + "learning_rate": 0.00138516238156296, + "loss": 1.7118, + "step": 1770 + }, + { + "epoch": 0.1868143459915612, + "grad_norm": 0.7361748814582825, + "learning_rate": 0.001385028837751864, + "loss": 1.7117, + "step": 1771 + }, + { + "epoch": 0.18691983122362868, + "grad_norm": 0.6581652164459229, + "learning_rate": 0.0013848952227826202, + "loss": 1.7002, + "step": 1772 + }, + { + "epoch": 0.1870253164556962, + "grad_norm": 0.6873348951339722, + "learning_rate": 0.0013847615366702009, + "loss": 1.6914, + "step": 1773 + }, + { + "epoch": 0.18713080168776372, + "grad_norm": 0.7154548764228821, + "learning_rate": 0.001384627779429586, + "loss": 1.7355, + "step": 1774 + }, + { + "epoch": 0.1872362869198312, + "grad_norm": 0.7016015648841858, + "learning_rate": 0.0013844939510757642, + "loss": 1.7499, + "step": 1775 + }, + { + "epoch": 0.18734177215189873, + "grad_norm": 0.7238917946815491, + "learning_rate": 0.0013843600516237312, + "loss": 1.7046, + "step": 1776 + }, + { + "epoch": 0.18744725738396625, + "grad_norm": 0.7054125666618347, + "learning_rate": 0.001384226081088491, + "loss": 1.685, + "step": 1777 + }, + { + "epoch": 0.18755274261603375, + "grad_norm": 0.7176743745803833, + "learning_rate": 0.001384092039485056, + "loss": 1.639, + "step": 1778 + }, + { + "epoch": 0.18765822784810127, + "grad_norm": 0.6956509947776794, + "learning_rate": 0.0013839579268284461, + "loss": 1.6991, + "step": 1779 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.7435916066169739, + "learning_rate": 0.0013838237431336895, + "loss": 1.6938, + "step": 1780 + }, + { + "epoch": 0.18786919831223628, + "grad_norm": 0.7595970034599304, + "learning_rate": 0.0013836894884158217, + "loss": 1.6869, + "step": 1781 + }, + { + "epoch": 0.1879746835443038, + "grad_norm": 0.6325842142105103, + "learning_rate": 0.001383555162689887, + "loss": 1.7467, + "step": 1782 + }, + { + "epoch": 0.18808016877637132, + "grad_norm": 0.7889545559883118, + "learning_rate": 0.001383420765970937, + "loss": 1.7027, + "step": 1783 + }, + { + "epoch": 0.1881856540084388, + "grad_norm": 0.7368943095207214, + "learning_rate": 0.0013832862982740318, + "loss": 1.682, + "step": 1784 + }, + { + "epoch": 0.18829113924050633, + "grad_norm": 0.7094132304191589, + "learning_rate": 0.001383151759614239, + "loss": 1.705, + "step": 1785 + }, + { + "epoch": 0.18839662447257383, + "grad_norm": 1.104589581489563, + "learning_rate": 0.0013830171500066343, + "loss": 1.6733, + "step": 1786 + }, + { + "epoch": 0.18850210970464135, + "grad_norm": 0.9088705778121948, + "learning_rate": 0.0013828824694663013, + "loss": 1.6972, + "step": 1787 + }, + { + "epoch": 0.18860759493670887, + "grad_norm": 0.6456066966056824, + "learning_rate": 0.001382747718008332, + "loss": 1.7122, + "step": 1788 + }, + { + "epoch": 0.18871308016877636, + "grad_norm": 0.6314347982406616, + "learning_rate": 0.0013826128956478255, + "loss": 1.7003, + "step": 1789 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.7091779708862305, + "learning_rate": 0.0013824780023998899, + "loss": 1.6884, + "step": 1790 + }, + { + "epoch": 0.1889240506329114, + "grad_norm": 0.7089309692382812, + "learning_rate": 0.0013823430382796402, + "loss": 1.6691, + "step": 1791 + }, + { + "epoch": 0.1890295358649789, + "grad_norm": 0.7444326877593994, + "learning_rate": 0.0013822080033021997, + "loss": 1.6971, + "step": 1792 + }, + { + "epoch": 0.1891350210970464, + "grad_norm": 0.6449862122535706, + "learning_rate": 0.0013820728974827, + "loss": 1.7127, + "step": 1793 + }, + { + "epoch": 0.18924050632911393, + "grad_norm": 0.7517580389976501, + "learning_rate": 0.0013819377208362806, + "loss": 1.7552, + "step": 1794 + }, + { + "epoch": 0.18934599156118143, + "grad_norm": 0.7423803806304932, + "learning_rate": 0.0013818024733780881, + "loss": 1.6863, + "step": 1795 + }, + { + "epoch": 0.18945147679324895, + "grad_norm": 0.7031176686286926, + "learning_rate": 0.0013816671551232782, + "loss": 1.7173, + "step": 1796 + }, + { + "epoch": 0.18955696202531647, + "grad_norm": 0.6964685916900635, + "learning_rate": 0.0013815317660870138, + "loss": 1.6775, + "step": 1797 + }, + { + "epoch": 0.18966244725738396, + "grad_norm": 0.707944393157959, + "learning_rate": 0.001381396306284466, + "loss": 1.6885, + "step": 1798 + }, + { + "epoch": 0.18976793248945148, + "grad_norm": 0.6523306965827942, + "learning_rate": 0.0013812607757308134, + "loss": 1.687, + "step": 1799 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.7795444130897522, + "learning_rate": 0.0013811251744412431, + "loss": 1.673, + "step": 1800 + }, + { + "epoch": 0.1899789029535865, + "grad_norm": 0.644817054271698, + "learning_rate": 0.0013809895024309501, + "loss": 1.6456, + "step": 1801 + }, + { + "epoch": 0.190084388185654, + "grad_norm": 0.7063037753105164, + "learning_rate": 0.001380853759715137, + "loss": 1.6714, + "step": 1802 + }, + { + "epoch": 0.1901898734177215, + "grad_norm": 0.8066319227218628, + "learning_rate": 0.0013807179463090143, + "loss": 1.6978, + "step": 1803 + }, + { + "epoch": 0.19029535864978903, + "grad_norm": 0.8916489481925964, + "learning_rate": 0.0013805820622278008, + "loss": 1.696, + "step": 1804 + }, + { + "epoch": 0.19040084388185655, + "grad_norm": 0.8808045983314514, + "learning_rate": 0.0013804461074867227, + "loss": 1.6985, + "step": 1805 + }, + { + "epoch": 0.19050632911392404, + "grad_norm": 0.7076989412307739, + "learning_rate": 0.0013803100821010146, + "loss": 1.6773, + "step": 1806 + }, + { + "epoch": 0.19061181434599156, + "grad_norm": 0.7380297780036926, + "learning_rate": 0.0013801739860859188, + "loss": 1.7046, + "step": 1807 + }, + { + "epoch": 0.19071729957805908, + "grad_norm": 0.7595328688621521, + "learning_rate": 0.0013800378194566856, + "loss": 1.686, + "step": 1808 + }, + { + "epoch": 0.19082278481012657, + "grad_norm": 0.8206736445426941, + "learning_rate": 0.001379901582228573, + "loss": 1.7288, + "step": 1809 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.7144821286201477, + "learning_rate": 0.0013797652744168473, + "loss": 1.6582, + "step": 1810 + }, + { + "epoch": 0.1910337552742616, + "grad_norm": 0.8983260989189148, + "learning_rate": 0.0013796288960367822, + "loss": 1.6503, + "step": 1811 + }, + { + "epoch": 0.1911392405063291, + "grad_norm": 0.9978070855140686, + "learning_rate": 0.0013794924471036596, + "loss": 1.694, + "step": 1812 + }, + { + "epoch": 0.19124472573839663, + "grad_norm": 0.7475795149803162, + "learning_rate": 0.0013793559276327695, + "loss": 1.689, + "step": 1813 + }, + { + "epoch": 0.19135021097046415, + "grad_norm": 0.9383198618888855, + "learning_rate": 0.0013792193376394094, + "loss": 1.6561, + "step": 1814 + }, + { + "epoch": 0.19145569620253164, + "grad_norm": 1.0755488872528076, + "learning_rate": 0.001379082677138885, + "loss": 1.6849, + "step": 1815 + }, + { + "epoch": 0.19156118143459916, + "grad_norm": 0.8501977920532227, + "learning_rate": 0.0013789459461465096, + "loss": 1.6941, + "step": 1816 + }, + { + "epoch": 0.19166666666666668, + "grad_norm": 0.7187299132347107, + "learning_rate": 0.001378809144677605, + "loss": 1.6617, + "step": 1817 + }, + { + "epoch": 0.19177215189873417, + "grad_norm": 0.7107388973236084, + "learning_rate": 0.0013786722727474998, + "loss": 1.6727, + "step": 1818 + }, + { + "epoch": 0.1918776371308017, + "grad_norm": 0.6533400416374207, + "learning_rate": 0.0013785353303715317, + "loss": 1.703, + "step": 1819 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.6511378288269043, + "learning_rate": 0.0013783983175650457, + "loss": 1.7347, + "step": 1820 + }, + { + "epoch": 0.1920886075949367, + "grad_norm": 0.7379651665687561, + "learning_rate": 0.001378261234343395, + "loss": 1.6862, + "step": 1821 + }, + { + "epoch": 0.19219409282700423, + "grad_norm": 0.6613348126411438, + "learning_rate": 0.0013781240807219399, + "loss": 1.7055, + "step": 1822 + }, + { + "epoch": 0.19229957805907172, + "grad_norm": 0.686639130115509, + "learning_rate": 0.0013779868567160495, + "loss": 1.6967, + "step": 1823 + }, + { + "epoch": 0.19240506329113924, + "grad_norm": 0.7239839434623718, + "learning_rate": 0.0013778495623411008, + "loss": 1.6974, + "step": 1824 + }, + { + "epoch": 0.19251054852320676, + "grad_norm": 0.6869918704032898, + "learning_rate": 0.0013777121976124775, + "loss": 1.7332, + "step": 1825 + }, + { + "epoch": 0.19261603375527425, + "grad_norm": 0.6603453159332275, + "learning_rate": 0.0013775747625455724, + "loss": 1.6704, + "step": 1826 + }, + { + "epoch": 0.19272151898734177, + "grad_norm": 0.673184335231781, + "learning_rate": 0.0013774372571557856, + "loss": 1.6662, + "step": 1827 + }, + { + "epoch": 0.1928270042194093, + "grad_norm": 0.7450258135795593, + "learning_rate": 0.0013772996814585261, + "loss": 1.6258, + "step": 1828 + }, + { + "epoch": 0.19293248945147679, + "grad_norm": 0.9328619837760925, + "learning_rate": 0.0013771620354692087, + "loss": 1.6653, + "step": 1829 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.7625787258148193, + "learning_rate": 0.0013770243192032581, + "loss": 1.653, + "step": 1830 + }, + { + "epoch": 0.19314345991561183, + "grad_norm": 0.6622673273086548, + "learning_rate": 0.0013768865326761058, + "loss": 1.6538, + "step": 1831 + }, + { + "epoch": 0.19324894514767932, + "grad_norm": 0.7678430080413818, + "learning_rate": 0.0013767486759031918, + "loss": 1.6782, + "step": 1832 + }, + { + "epoch": 0.19335443037974684, + "grad_norm": 0.7393906116485596, + "learning_rate": 0.0013766107488999632, + "loss": 1.7139, + "step": 1833 + }, + { + "epoch": 0.19345991561181436, + "grad_norm": 0.6756302118301392, + "learning_rate": 0.0013764727516818757, + "loss": 1.7127, + "step": 1834 + }, + { + "epoch": 0.19356540084388185, + "grad_norm": 0.6851455569267273, + "learning_rate": 0.0013763346842643927, + "loss": 1.6708, + "step": 1835 + }, + { + "epoch": 0.19367088607594937, + "grad_norm": 0.7180147767066956, + "learning_rate": 0.0013761965466629847, + "loss": 1.6975, + "step": 1836 + }, + { + "epoch": 0.19377637130801686, + "grad_norm": 0.7055030465126038, + "learning_rate": 0.0013760583388931315, + "loss": 1.6544, + "step": 1837 + }, + { + "epoch": 0.19388185654008439, + "grad_norm": 0.6531878113746643, + "learning_rate": 0.0013759200609703196, + "loss": 1.6604, + "step": 1838 + }, + { + "epoch": 0.1939873417721519, + "grad_norm": 0.6808741092681885, + "learning_rate": 0.0013757817129100437, + "loss": 1.6742, + "step": 1839 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.6608408093452454, + "learning_rate": 0.0013756432947278064, + "loss": 1.6732, + "step": 1840 + }, + { + "epoch": 0.19419831223628692, + "grad_norm": 0.681273877620697, + "learning_rate": 0.0013755048064391182, + "loss": 1.7015, + "step": 1841 + }, + { + "epoch": 0.19430379746835444, + "grad_norm": 0.734163761138916, + "learning_rate": 0.0013753662480594973, + "loss": 1.6768, + "step": 1842 + }, + { + "epoch": 0.19440928270042193, + "grad_norm": 0.898627758026123, + "learning_rate": 0.0013752276196044699, + "loss": 1.6581, + "step": 1843 + }, + { + "epoch": 0.19451476793248945, + "grad_norm": 0.8512447476387024, + "learning_rate": 0.0013750889210895705, + "loss": 1.7123, + "step": 1844 + }, + { + "epoch": 0.19462025316455697, + "grad_norm": 0.6716808676719666, + "learning_rate": 0.0013749501525303401, + "loss": 1.6797, + "step": 1845 + }, + { + "epoch": 0.19472573839662446, + "grad_norm": 0.6908264756202698, + "learning_rate": 0.0013748113139423288, + "loss": 1.7053, + "step": 1846 + }, + { + "epoch": 0.19483122362869199, + "grad_norm": 0.8399966359138489, + "learning_rate": 0.0013746724053410944, + "loss": 1.6921, + "step": 1847 + }, + { + "epoch": 0.1949367088607595, + "grad_norm": 0.7023179531097412, + "learning_rate": 0.001374533426742202, + "loss": 1.649, + "step": 1848 + }, + { + "epoch": 0.195042194092827, + "grad_norm": 0.7922264933586121, + "learning_rate": 0.0013743943781612251, + "loss": 1.6309, + "step": 1849 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 1.011397361755371, + "learning_rate": 0.0013742552596137444, + "loss": 1.7115, + "step": 1850 + }, + { + "epoch": 0.19525316455696204, + "grad_norm": 0.7658153176307678, + "learning_rate": 0.0013741160711153492, + "loss": 1.6634, + "step": 1851 + }, + { + "epoch": 0.19535864978902953, + "grad_norm": 0.6916139721870422, + "learning_rate": 0.0013739768126816358, + "loss": 1.695, + "step": 1852 + }, + { + "epoch": 0.19546413502109705, + "grad_norm": 1.0186495780944824, + "learning_rate": 0.0013738374843282094, + "loss": 1.6865, + "step": 1853 + }, + { + "epoch": 0.19556962025316454, + "grad_norm": 1.1396199464797974, + "learning_rate": 0.0013736980860706819, + "loss": 1.7139, + "step": 1854 + }, + { + "epoch": 0.19567510548523206, + "grad_norm": 0.6198959350585938, + "learning_rate": 0.001373558617924674, + "loss": 1.6908, + "step": 1855 + }, + { + "epoch": 0.19578059071729959, + "grad_norm": 1.2155847549438477, + "learning_rate": 0.0013734190799058136, + "loss": 1.6957, + "step": 1856 + }, + { + "epoch": 0.19588607594936708, + "grad_norm": 0.8000158071517944, + "learning_rate": 0.0013732794720297367, + "loss": 1.6727, + "step": 1857 + }, + { + "epoch": 0.1959915611814346, + "grad_norm": 0.8424479961395264, + "learning_rate": 0.0013731397943120868, + "loss": 1.6807, + "step": 1858 + }, + { + "epoch": 0.19609704641350212, + "grad_norm": 1.081796646118164, + "learning_rate": 0.001373000046768516, + "loss": 1.686, + "step": 1859 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.7006481885910034, + "learning_rate": 0.0013728602294146833, + "loss": 1.7152, + "step": 1860 + }, + { + "epoch": 0.19630801687763713, + "grad_norm": 1.0607731342315674, + "learning_rate": 0.001372720342266256, + "loss": 1.7017, + "step": 1861 + }, + { + "epoch": 0.19641350210970465, + "grad_norm": 0.9053279161453247, + "learning_rate": 0.001372580385338909, + "loss": 1.6825, + "step": 1862 + }, + { + "epoch": 0.19651898734177214, + "grad_norm": 0.6618473529815674, + "learning_rate": 0.0013724403586483254, + "loss": 1.7472, + "step": 1863 + }, + { + "epoch": 0.19662447257383966, + "grad_norm": 1.138869285583496, + "learning_rate": 0.001372300262210196, + "loss": 1.6637, + "step": 1864 + }, + { + "epoch": 0.19672995780590719, + "grad_norm": 0.976681649684906, + "learning_rate": 0.001372160096040219, + "loss": 1.6698, + "step": 1865 + }, + { + "epoch": 0.19683544303797468, + "grad_norm": 0.6726765632629395, + "learning_rate": 0.001372019860154101, + "loss": 1.6485, + "step": 1866 + }, + { + "epoch": 0.1969409282700422, + "grad_norm": 0.7649640440940857, + "learning_rate": 0.001371879554567556, + "loss": 1.6956, + "step": 1867 + }, + { + "epoch": 0.19704641350210972, + "grad_norm": 0.7875130772590637, + "learning_rate": 0.0013717391792963062, + "loss": 1.6586, + "step": 1868 + }, + { + "epoch": 0.1971518987341772, + "grad_norm": 0.7195281982421875, + "learning_rate": 0.0013715987343560804, + "loss": 1.6921, + "step": 1869 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.647891104221344, + "learning_rate": 0.0013714582197626175, + "loss": 1.7035, + "step": 1870 + }, + { + "epoch": 0.19736286919831222, + "grad_norm": 0.646063506603241, + "learning_rate": 0.001371317635531662, + "loss": 1.6428, + "step": 1871 + }, + { + "epoch": 0.19746835443037974, + "grad_norm": 0.7042902112007141, + "learning_rate": 0.001371176981678967, + "loss": 1.6619, + "step": 1872 + }, + { + "epoch": 0.19757383966244726, + "grad_norm": 0.7078396677970886, + "learning_rate": 0.001371036258220294, + "loss": 1.6431, + "step": 1873 + }, + { + "epoch": 0.19767932489451476, + "grad_norm": 0.7125684022903442, + "learning_rate": 0.0013708954651714116, + "loss": 1.6638, + "step": 1874 + }, + { + "epoch": 0.19778481012658228, + "grad_norm": 0.6792777180671692, + "learning_rate": 0.0013707546025480961, + "loss": 1.6775, + "step": 1875 + }, + { + "epoch": 0.1978902953586498, + "grad_norm": 0.627964973449707, + "learning_rate": 0.001370613670366132, + "loss": 1.708, + "step": 1876 + }, + { + "epoch": 0.1979957805907173, + "grad_norm": 0.7148562073707581, + "learning_rate": 0.0013704726686413116, + "loss": 1.7291, + "step": 1877 + }, + { + "epoch": 0.1981012658227848, + "grad_norm": 0.8530842661857605, + "learning_rate": 0.0013703315973894346, + "loss": 1.647, + "step": 1878 + }, + { + "epoch": 0.19820675105485233, + "grad_norm": 0.7189075350761414, + "learning_rate": 0.001370190456626309, + "loss": 1.6312, + "step": 1879 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.7277823090553284, + "learning_rate": 0.0013700492463677501, + "loss": 1.692, + "step": 1880 + }, + { + "epoch": 0.19841772151898734, + "grad_norm": 0.850841760635376, + "learning_rate": 0.0013699079666295811, + "loss": 1.682, + "step": 1881 + }, + { + "epoch": 0.19852320675105486, + "grad_norm": 0.8184512257575989, + "learning_rate": 0.0013697666174276337, + "loss": 1.6599, + "step": 1882 + }, + { + "epoch": 0.19862869198312236, + "grad_norm": 0.6471538543701172, + "learning_rate": 0.001369625198777746, + "loss": 1.6718, + "step": 1883 + }, + { + "epoch": 0.19873417721518988, + "grad_norm": 0.6347360014915466, + "learning_rate": 0.0013694837106957654, + "loss": 1.6829, + "step": 1884 + }, + { + "epoch": 0.19883966244725737, + "grad_norm": 0.7201194167137146, + "learning_rate": 0.0013693421531975455, + "loss": 1.6618, + "step": 1885 + }, + { + "epoch": 0.1989451476793249, + "grad_norm": 0.9844521880149841, + "learning_rate": 0.0013692005262989496, + "loss": 1.6767, + "step": 1886 + }, + { + "epoch": 0.1990506329113924, + "grad_norm": 0.7485830783843994, + "learning_rate": 0.0013690588300158467, + "loss": 1.6776, + "step": 1887 + }, + { + "epoch": 0.1991561181434599, + "grad_norm": 0.7279402017593384, + "learning_rate": 0.001368917064364115, + "loss": 1.7098, + "step": 1888 + }, + { + "epoch": 0.19926160337552742, + "grad_norm": 1.0291376113891602, + "learning_rate": 0.0013687752293596402, + "loss": 1.6795, + "step": 1889 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.8357351422309875, + "learning_rate": 0.0013686333250183154, + "loss": 1.6645, + "step": 1890 + }, + { + "epoch": 0.19947257383966244, + "grad_norm": 0.6486485004425049, + "learning_rate": 0.0013684913513560418, + "loss": 1.6848, + "step": 1891 + }, + { + "epoch": 0.19957805907172996, + "grad_norm": 0.8514304161071777, + "learning_rate": 0.0013683493083887282, + "loss": 1.65, + "step": 1892 + }, + { + "epoch": 0.19968354430379748, + "grad_norm": 0.9217506051063538, + "learning_rate": 0.0013682071961322914, + "loss": 1.6302, + "step": 1893 + }, + { + "epoch": 0.19978902953586497, + "grad_norm": 0.6754322648048401, + "learning_rate": 0.0013680650146026554, + "loss": 1.6626, + "step": 1894 + }, + { + "epoch": 0.1998945147679325, + "grad_norm": 0.8454970121383667, + "learning_rate": 0.0013679227638157523, + "loss": 1.7029, + "step": 1895 + }, + { + "epoch": 0.2, + "grad_norm": 0.7119497656822205, + "learning_rate": 0.0013677804437875227, + "loss": 1.6731, + "step": 1896 + }, + { + "epoch": 0.2001054852320675, + "grad_norm": 0.9047150015830994, + "learning_rate": 0.0013676380545339136, + "loss": 1.691, + "step": 1897 + }, + { + "epoch": 0.20021097046413502, + "grad_norm": 1.0610361099243164, + "learning_rate": 0.0013674955960708808, + "loss": 1.6844, + "step": 1898 + }, + { + "epoch": 0.20031645569620254, + "grad_norm": 0.6998708248138428, + "learning_rate": 0.0013673530684143874, + "loss": 1.6707, + "step": 1899 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.9508792757987976, + "learning_rate": 0.001367210471580404, + "loss": 1.6912, + "step": 1900 + }, + { + "epoch": 0.20052742616033756, + "grad_norm": 0.8429707884788513, + "learning_rate": 0.0013670678055849098, + "loss": 1.6699, + "step": 1901 + }, + { + "epoch": 0.20063291139240505, + "grad_norm": 0.9381064772605896, + "learning_rate": 0.0013669250704438911, + "loss": 1.6372, + "step": 1902 + }, + { + "epoch": 0.20073839662447257, + "grad_norm": 1.250076413154602, + "learning_rate": 0.0013667822661733418, + "loss": 1.6857, + "step": 1903 + }, + { + "epoch": 0.2008438818565401, + "grad_norm": 0.8109840750694275, + "learning_rate": 0.0013666393927892642, + "loss": 1.6667, + "step": 1904 + }, + { + "epoch": 0.20094936708860758, + "grad_norm": 1.0230591297149658, + "learning_rate": 0.0013664964503076677, + "loss": 1.6228, + "step": 1905 + }, + { + "epoch": 0.2010548523206751, + "grad_norm": 0.9444345831871033, + "learning_rate": 0.0013663534387445696, + "loss": 1.7158, + "step": 1906 + }, + { + "epoch": 0.20116033755274262, + "grad_norm": 0.8344323039054871, + "learning_rate": 0.0013662103581159955, + "loss": 1.6672, + "step": 1907 + }, + { + "epoch": 0.20126582278481012, + "grad_norm": 0.7074981927871704, + "learning_rate": 0.0013660672084379781, + "loss": 1.6838, + "step": 1908 + }, + { + "epoch": 0.20137130801687764, + "grad_norm": 0.7589022517204285, + "learning_rate": 0.001365923989726558, + "loss": 1.724, + "step": 1909 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.7411625981330872, + "learning_rate": 0.0013657807019977835, + "loss": 1.6813, + "step": 1910 + }, + { + "epoch": 0.20158227848101265, + "grad_norm": 0.8135130405426025, + "learning_rate": 0.0013656373452677107, + "loss": 1.6924, + "step": 1911 + }, + { + "epoch": 0.20168776371308017, + "grad_norm": 0.7771637439727783, + "learning_rate": 0.0013654939195524038, + "loss": 1.6672, + "step": 1912 + }, + { + "epoch": 0.2017932489451477, + "grad_norm": 0.6863083243370056, + "learning_rate": 0.0013653504248679338, + "loss": 1.6559, + "step": 1913 + }, + { + "epoch": 0.20189873417721518, + "grad_norm": 1.01815664768219, + "learning_rate": 0.0013652068612303803, + "loss": 1.6768, + "step": 1914 + }, + { + "epoch": 0.2020042194092827, + "grad_norm": 1.0649935007095337, + "learning_rate": 0.0013650632286558305, + "loss": 1.6551, + "step": 1915 + }, + { + "epoch": 0.20210970464135022, + "grad_norm": 0.7289904356002808, + "learning_rate": 0.001364919527160379, + "loss": 1.6796, + "step": 1916 + }, + { + "epoch": 0.20221518987341772, + "grad_norm": 0.86855149269104, + "learning_rate": 0.001364775756760128, + "loss": 1.6634, + "step": 1917 + }, + { + "epoch": 0.20232067510548524, + "grad_norm": 1.1180757284164429, + "learning_rate": 0.0013646319174711878, + "loss": 1.6705, + "step": 1918 + }, + { + "epoch": 0.20242616033755273, + "grad_norm": 0.7323005795478821, + "learning_rate": 0.0013644880093096766, + "loss": 1.7238, + "step": 1919 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.8364667296409607, + "learning_rate": 0.0013643440322917198, + "loss": 1.6531, + "step": 1920 + }, + { + "epoch": 0.20263713080168777, + "grad_norm": 0.9197579026222229, + "learning_rate": 0.0013641999864334507, + "loss": 1.6893, + "step": 1921 + }, + { + "epoch": 0.20274261603375526, + "grad_norm": 0.6666012406349182, + "learning_rate": 0.0013640558717510107, + "loss": 1.6398, + "step": 1922 + }, + { + "epoch": 0.20284810126582278, + "grad_norm": 1.0221725702285767, + "learning_rate": 0.0013639116882605481, + "loss": 1.6885, + "step": 1923 + }, + { + "epoch": 0.2029535864978903, + "grad_norm": 1.2815030813217163, + "learning_rate": 0.0013637674359782196, + "loss": 1.6538, + "step": 1924 + }, + { + "epoch": 0.2030590717299578, + "grad_norm": 0.7257810235023499, + "learning_rate": 0.0013636231149201895, + "loss": 1.7138, + "step": 1925 + }, + { + "epoch": 0.20316455696202532, + "grad_norm": 1.4669620990753174, + "learning_rate": 0.0013634787251026296, + "loss": 1.6638, + "step": 1926 + }, + { + "epoch": 0.20327004219409284, + "grad_norm": 0.7212225794792175, + "learning_rate": 0.0013633342665417192, + "loss": 1.6709, + "step": 1927 + }, + { + "epoch": 0.20337552742616033, + "grad_norm": 1.2812660932540894, + "learning_rate": 0.0013631897392536463, + "loss": 1.7116, + "step": 1928 + }, + { + "epoch": 0.20348101265822785, + "grad_norm": 0.7539347410202026, + "learning_rate": 0.001363045143254605, + "loss": 1.6457, + "step": 1929 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.9084736108779907, + "learning_rate": 0.0013629004785607989, + "loss": 1.6907, + "step": 1930 + }, + { + "epoch": 0.20369198312236286, + "grad_norm": 0.980384349822998, + "learning_rate": 0.0013627557451884374, + "loss": 1.6155, + "step": 1931 + }, + { + "epoch": 0.20379746835443038, + "grad_norm": 0.6436879634857178, + "learning_rate": 0.0013626109431537398, + "loss": 1.6759, + "step": 1932 + }, + { + "epoch": 0.2039029535864979, + "grad_norm": 1.178465485572815, + "learning_rate": 0.001362466072472931, + "loss": 1.6719, + "step": 1933 + }, + { + "epoch": 0.2040084388185654, + "grad_norm": 0.7046089172363281, + "learning_rate": 0.0013623211331622448, + "loss": 1.6855, + "step": 1934 + }, + { + "epoch": 0.20411392405063292, + "grad_norm": 1.2459933757781982, + "learning_rate": 0.0013621761252379221, + "loss": 1.6476, + "step": 1935 + }, + { + "epoch": 0.2042194092827004, + "grad_norm": 0.7688311338424683, + "learning_rate": 0.0013620310487162124, + "loss": 1.6607, + "step": 1936 + }, + { + "epoch": 0.20432489451476793, + "grad_norm": 1.141371488571167, + "learning_rate": 0.0013618859036133714, + "loss": 1.6843, + "step": 1937 + }, + { + "epoch": 0.20443037974683545, + "grad_norm": 0.7691494822502136, + "learning_rate": 0.001361740689945664, + "loss": 1.6857, + "step": 1938 + }, + { + "epoch": 0.20453586497890294, + "grad_norm": 0.8632043600082397, + "learning_rate": 0.001361595407729362, + "loss": 1.6734, + "step": 1939 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.8190142512321472, + "learning_rate": 0.0013614500569807445, + "loss": 1.6343, + "step": 1940 + }, + { + "epoch": 0.20474683544303798, + "grad_norm": 0.8059411644935608, + "learning_rate": 0.0013613046377160996, + "loss": 1.669, + "step": 1941 + }, + { + "epoch": 0.20485232067510548, + "grad_norm": 0.9596593379974365, + "learning_rate": 0.0013611591499517212, + "loss": 1.6852, + "step": 1942 + }, + { + "epoch": 0.204957805907173, + "grad_norm": 0.7857062220573425, + "learning_rate": 0.001361013593703913, + "loss": 1.6454, + "step": 1943 + }, + { + "epoch": 0.20506329113924052, + "grad_norm": 1.0190155506134033, + "learning_rate": 0.0013608679689889847, + "loss": 1.6591, + "step": 1944 + }, + { + "epoch": 0.205168776371308, + "grad_norm": 1.0270004272460938, + "learning_rate": 0.0013607222758232546, + "loss": 1.6967, + "step": 1945 + }, + { + "epoch": 0.20527426160337553, + "grad_norm": 0.8125897645950317, + "learning_rate": 0.0013605765142230479, + "loss": 1.624, + "step": 1946 + }, + { + "epoch": 0.20537974683544305, + "grad_norm": 0.9370504021644592, + "learning_rate": 0.0013604306842046983, + "loss": 1.6973, + "step": 1947 + }, + { + "epoch": 0.20548523206751054, + "grad_norm": 0.7817099690437317, + "learning_rate": 0.0013602847857845466, + "loss": 1.6448, + "step": 1948 + }, + { + "epoch": 0.20559071729957806, + "grad_norm": 0.7688807845115662, + "learning_rate": 0.0013601388189789414, + "loss": 1.6358, + "step": 1949 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.771403968334198, + "learning_rate": 0.0013599927838042394, + "loss": 1.6502, + "step": 1950 + }, + { + "epoch": 0.20580168776371308, + "grad_norm": 0.8258805871009827, + "learning_rate": 0.0013598466802768041, + "loss": 1.7126, + "step": 1951 + }, + { + "epoch": 0.2059071729957806, + "grad_norm": 1.0377191305160522, + "learning_rate": 0.0013597005084130072, + "loss": 1.6688, + "step": 1952 + }, + { + "epoch": 0.2060126582278481, + "grad_norm": 0.9055862426757812, + "learning_rate": 0.0013595542682292281, + "loss": 1.7053, + "step": 1953 + }, + { + "epoch": 0.2061181434599156, + "grad_norm": 0.8350077271461487, + "learning_rate": 0.0013594079597418541, + "loss": 1.6251, + "step": 1954 + }, + { + "epoch": 0.20622362869198313, + "grad_norm": 1.010309100151062, + "learning_rate": 0.0013592615829672791, + "loss": 1.6696, + "step": 1955 + }, + { + "epoch": 0.20632911392405062, + "grad_norm": 0.6699627041816711, + "learning_rate": 0.0013591151379219058, + "loss": 1.6219, + "step": 1956 + }, + { + "epoch": 0.20643459915611814, + "grad_norm": 0.8707762360572815, + "learning_rate": 0.0013589686246221438, + "loss": 1.6578, + "step": 1957 + }, + { + "epoch": 0.20654008438818566, + "grad_norm": 0.7891092896461487, + "learning_rate": 0.001358822043084411, + "loss": 1.6669, + "step": 1958 + }, + { + "epoch": 0.20664556962025316, + "grad_norm": 0.7248659729957581, + "learning_rate": 0.0013586753933251322, + "loss": 1.6893, + "step": 1959 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.7765432000160217, + "learning_rate": 0.0013585286753607408, + "loss": 1.6342, + "step": 1960 + }, + { + "epoch": 0.2068565400843882, + "grad_norm": 0.6674951314926147, + "learning_rate": 0.0013583818892076765, + "loss": 1.6952, + "step": 1961 + }, + { + "epoch": 0.2069620253164557, + "grad_norm": 0.8420159816741943, + "learning_rate": 0.0013582350348823882, + "loss": 1.7052, + "step": 1962 + }, + { + "epoch": 0.2070675105485232, + "grad_norm": 0.7408103942871094, + "learning_rate": 0.0013580881124013312, + "loss": 1.6834, + "step": 1963 + }, + { + "epoch": 0.20717299578059073, + "grad_norm": 0.649383544921875, + "learning_rate": 0.001357941121780969, + "loss": 1.6392, + "step": 1964 + }, + { + "epoch": 0.20727848101265822, + "grad_norm": 0.7968661189079285, + "learning_rate": 0.0013577940630377725, + "loss": 1.6756, + "step": 1965 + }, + { + "epoch": 0.20738396624472574, + "grad_norm": 0.6262935400009155, + "learning_rate": 0.0013576469361882208, + "loss": 1.6608, + "step": 1966 + }, + { + "epoch": 0.20748945147679324, + "grad_norm": 0.9384946227073669, + "learning_rate": 0.0013574997412487996, + "loss": 1.6994, + "step": 1967 + }, + { + "epoch": 0.20759493670886076, + "grad_norm": 0.8977136015892029, + "learning_rate": 0.0013573524782360034, + "loss": 1.6712, + "step": 1968 + }, + { + "epoch": 0.20770042194092828, + "grad_norm": 0.7456044554710388, + "learning_rate": 0.0013572051471663332, + "loss": 1.6686, + "step": 1969 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.7437183856964111, + "learning_rate": 0.0013570577480562986, + "loss": 1.6647, + "step": 1970 + }, + { + "epoch": 0.2079113924050633, + "grad_norm": 0.9589160680770874, + "learning_rate": 0.0013569102809224162, + "loss": 1.665, + "step": 1971 + }, + { + "epoch": 0.2080168776371308, + "grad_norm": 0.8503575921058655, + "learning_rate": 0.0013567627457812105, + "loss": 1.6294, + "step": 1972 + }, + { + "epoch": 0.2081223628691983, + "grad_norm": 0.7167878150939941, + "learning_rate": 0.0013566151426492137, + "loss": 1.6636, + "step": 1973 + }, + { + "epoch": 0.20822784810126582, + "grad_norm": 1.175811767578125, + "learning_rate": 0.0013564674715429651, + "loss": 1.6481, + "step": 1974 + }, + { + "epoch": 0.20833333333333334, + "grad_norm": 0.8015633821487427, + "learning_rate": 0.0013563197324790123, + "loss": 1.6806, + "step": 1975 + }, + { + "epoch": 0.20843881856540084, + "grad_norm": 0.6290338039398193, + "learning_rate": 0.0013561719254739104, + "loss": 1.6464, + "step": 1976 + }, + { + "epoch": 0.20854430379746836, + "grad_norm": 0.6720382571220398, + "learning_rate": 0.001356024050544221, + "loss": 1.6712, + "step": 1977 + }, + { + "epoch": 0.20864978902953588, + "grad_norm": 0.6930551528930664, + "learning_rate": 0.0013558761077065154, + "loss": 1.6598, + "step": 1978 + }, + { + "epoch": 0.20875527426160337, + "grad_norm": 0.6618515253067017, + "learning_rate": 0.0013557280969773704, + "loss": 1.6375, + "step": 1979 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.7057193517684937, + "learning_rate": 0.0013555800183733717, + "loss": 1.6659, + "step": 1980 + }, + { + "epoch": 0.2089662447257384, + "grad_norm": 0.7072168588638306, + "learning_rate": 0.0013554318719111124, + "loss": 1.6474, + "step": 1981 + }, + { + "epoch": 0.2090717299578059, + "grad_norm": 0.6277148723602295, + "learning_rate": 0.0013552836576071925, + "loss": 1.6091, + "step": 1982 + }, + { + "epoch": 0.20917721518987342, + "grad_norm": 0.7299100756645203, + "learning_rate": 0.0013551353754782211, + "loss": 1.6604, + "step": 1983 + }, + { + "epoch": 0.20928270042194091, + "grad_norm": 0.6482892632484436, + "learning_rate": 0.0013549870255408132, + "loss": 1.696, + "step": 1984 + }, + { + "epoch": 0.20938818565400844, + "grad_norm": 0.6596720814704895, + "learning_rate": 0.0013548386078115924, + "loss": 1.6526, + "step": 1985 + }, + { + "epoch": 0.20949367088607596, + "grad_norm": 0.7831690907478333, + "learning_rate": 0.0013546901223071893, + "loss": 1.641, + "step": 1986 + }, + { + "epoch": 0.20959915611814345, + "grad_norm": 0.8322663307189941, + "learning_rate": 0.001354541569044243, + "loss": 1.6241, + "step": 1987 + }, + { + "epoch": 0.20970464135021097, + "grad_norm": 0.7873285412788391, + "learning_rate": 0.0013543929480393994, + "loss": 1.6709, + "step": 1988 + }, + { + "epoch": 0.2098101265822785, + "grad_norm": 0.7312831878662109, + "learning_rate": 0.0013542442593093122, + "loss": 1.6851, + "step": 1989 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.6941807866096497, + "learning_rate": 0.0013540955028706425, + "loss": 1.6706, + "step": 1990 + }, + { + "epoch": 0.2100210970464135, + "grad_norm": 0.6501792669296265, + "learning_rate": 0.0013539466787400598, + "loss": 1.6645, + "step": 1991 + }, + { + "epoch": 0.21012658227848102, + "grad_norm": 0.7157619595527649, + "learning_rate": 0.00135379778693424, + "loss": 1.6509, + "step": 1992 + }, + { + "epoch": 0.21023206751054851, + "grad_norm": 0.7346425652503967, + "learning_rate": 0.0013536488274698672, + "loss": 1.6759, + "step": 1993 + }, + { + "epoch": 0.21033755274261604, + "grad_norm": 0.8682294487953186, + "learning_rate": 0.0013534998003636332, + "loss": 1.6301, + "step": 1994 + }, + { + "epoch": 0.21044303797468356, + "grad_norm": 0.8090446591377258, + "learning_rate": 0.0013533507056322374, + "loss": 1.6901, + "step": 1995 + }, + { + "epoch": 0.21054852320675105, + "grad_norm": 0.8089991211891174, + "learning_rate": 0.0013532015432923864, + "loss": 1.6461, + "step": 1996 + }, + { + "epoch": 0.21065400843881857, + "grad_norm": 1.0435841083526611, + "learning_rate": 0.0013530523133607948, + "loss": 1.6533, + "step": 1997 + }, + { + "epoch": 0.2107594936708861, + "grad_norm": 0.930782675743103, + "learning_rate": 0.0013529030158541842, + "loss": 1.643, + "step": 1998 + }, + { + "epoch": 0.21086497890295358, + "grad_norm": 0.6363478899002075, + "learning_rate": 0.0013527536507892844, + "loss": 1.6401, + "step": 1999 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.9364420771598816, + "learning_rate": 0.0013526042181828324, + "loss": 1.672, + "step": 2000 + }, + { + "epoch": 0.2110759493670886, + "grad_norm": 0.7343520522117615, + "learning_rate": 0.001352454718051573, + "loss": 1.7049, + "step": 2001 + }, + { + "epoch": 0.21118143459915611, + "grad_norm": 0.8742684721946716, + "learning_rate": 0.0013523051504122584, + "loss": 1.6558, + "step": 2002 + }, + { + "epoch": 0.21128691983122364, + "grad_norm": 1.3313316106796265, + "learning_rate": 0.0013521555152816481, + "loss": 1.6196, + "step": 2003 + }, + { + "epoch": 0.21139240506329113, + "grad_norm": 0.6908631324768066, + "learning_rate": 0.0013520058126765097, + "loss": 1.656, + "step": 2004 + }, + { + "epoch": 0.21149789029535865, + "grad_norm": 0.940873384475708, + "learning_rate": 0.0013518560426136182, + "loss": 1.6867, + "step": 2005 + }, + { + "epoch": 0.21160337552742617, + "grad_norm": 1.009210467338562, + "learning_rate": 0.001351706205109756, + "loss": 1.6733, + "step": 2006 + }, + { + "epoch": 0.21170886075949366, + "grad_norm": 0.6503928303718567, + "learning_rate": 0.001351556300181713, + "loss": 1.6564, + "step": 2007 + }, + { + "epoch": 0.21181434599156118, + "grad_norm": 0.9938562512397766, + "learning_rate": 0.001351406327846287, + "loss": 1.696, + "step": 2008 + }, + { + "epoch": 0.2119198312236287, + "grad_norm": 0.6758134365081787, + "learning_rate": 0.0013512562881202832, + "loss": 1.6345, + "step": 2009 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.8230181336402893, + "learning_rate": 0.0013511061810205143, + "loss": 1.636, + "step": 2010 + }, + { + "epoch": 0.21213080168776371, + "grad_norm": 0.8435307741165161, + "learning_rate": 0.0013509560065638002, + "loss": 1.6731, + "step": 2011 + }, + { + "epoch": 0.21223628691983124, + "grad_norm": 0.6924878358840942, + "learning_rate": 0.001350805764766969, + "loss": 1.6646, + "step": 2012 + }, + { + "epoch": 0.21234177215189873, + "grad_norm": 1.008833408355713, + "learning_rate": 0.0013506554556468558, + "loss": 1.6845, + "step": 2013 + }, + { + "epoch": 0.21244725738396625, + "grad_norm": 0.7575940489768982, + "learning_rate": 0.001350505079220304, + "loss": 1.6658, + "step": 2014 + }, + { + "epoch": 0.21255274261603377, + "grad_norm": 0.7611131072044373, + "learning_rate": 0.0013503546355041636, + "loss": 1.6686, + "step": 2015 + }, + { + "epoch": 0.21265822784810126, + "grad_norm": 0.9520182609558105, + "learning_rate": 0.0013502041245152924, + "loss": 1.7028, + "step": 2016 + }, + { + "epoch": 0.21276371308016878, + "grad_norm": 0.6350567936897278, + "learning_rate": 0.0013500535462705565, + "loss": 1.6679, + "step": 2017 + }, + { + "epoch": 0.21286919831223627, + "grad_norm": 1.207460880279541, + "learning_rate": 0.0013499029007868284, + "loss": 1.6599, + "step": 2018 + }, + { + "epoch": 0.2129746835443038, + "grad_norm": 0.7338306903839111, + "learning_rate": 0.0013497521880809888, + "loss": 1.6783, + "step": 2019 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 1.096411108970642, + "learning_rate": 0.001349601408169926, + "loss": 1.6533, + "step": 2020 + }, + { + "epoch": 0.2131856540084388, + "grad_norm": 1.0697574615478516, + "learning_rate": 0.0013494505610705356, + "loss": 1.6547, + "step": 2021 + }, + { + "epoch": 0.21329113924050633, + "grad_norm": 0.7134337425231934, + "learning_rate": 0.0013492996467997205, + "loss": 1.6382, + "step": 2022 + }, + { + "epoch": 0.21339662447257385, + "grad_norm": 0.879396915435791, + "learning_rate": 0.0013491486653743918, + "loss": 1.6999, + "step": 2023 + }, + { + "epoch": 0.21350210970464134, + "grad_norm": 0.7339773774147034, + "learning_rate": 0.0013489976168114676, + "loss": 1.6802, + "step": 2024 + }, + { + "epoch": 0.21360759493670886, + "grad_norm": 0.9337875843048096, + "learning_rate": 0.0013488465011278733, + "loss": 1.6298, + "step": 2025 + }, + { + "epoch": 0.21371308016877638, + "grad_norm": 0.6278442144393921, + "learning_rate": 0.0013486953183405425, + "loss": 1.6342, + "step": 2026 + }, + { + "epoch": 0.21381856540084387, + "grad_norm": 0.8392414450645447, + "learning_rate": 0.001348544068466416, + "loss": 1.6398, + "step": 2027 + }, + { + "epoch": 0.2139240506329114, + "grad_norm": 0.6311750411987305, + "learning_rate": 0.0013483927515224418, + "loss": 1.6764, + "step": 2028 + }, + { + "epoch": 0.21402953586497891, + "grad_norm": 0.8917051553726196, + "learning_rate": 0.0013482413675255762, + "loss": 1.6842, + "step": 2029 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.6573785543441772, + "learning_rate": 0.0013480899164927823, + "loss": 1.6353, + "step": 2030 + }, + { + "epoch": 0.21424050632911393, + "grad_norm": 0.7962350249290466, + "learning_rate": 0.0013479383984410305, + "loss": 1.6738, + "step": 2031 + }, + { + "epoch": 0.21434599156118145, + "grad_norm": 0.811136782169342, + "learning_rate": 0.0013477868133873001, + "loss": 1.6498, + "step": 2032 + }, + { + "epoch": 0.21445147679324894, + "grad_norm": 0.7314328551292419, + "learning_rate": 0.0013476351613485762, + "loss": 1.657, + "step": 2033 + }, + { + "epoch": 0.21455696202531646, + "grad_norm": 0.6560602188110352, + "learning_rate": 0.0013474834423418522, + "loss": 1.6477, + "step": 2034 + }, + { + "epoch": 0.21466244725738395, + "grad_norm": 0.72807377576828, + "learning_rate": 0.0013473316563841296, + "loss": 1.6459, + "step": 2035 + }, + { + "epoch": 0.21476793248945147, + "grad_norm": 0.6761951446533203, + "learning_rate": 0.0013471798034924158, + "loss": 1.6699, + "step": 2036 + }, + { + "epoch": 0.214873417721519, + "grad_norm": 0.6889795660972595, + "learning_rate": 0.0013470278836837275, + "loss": 1.6357, + "step": 2037 + }, + { + "epoch": 0.2149789029535865, + "grad_norm": 0.640275239944458, + "learning_rate": 0.001346875896975088, + "loss": 1.649, + "step": 2038 + }, + { + "epoch": 0.215084388185654, + "grad_norm": 0.6634708046913147, + "learning_rate": 0.0013467238433835277, + "loss": 1.6632, + "step": 2039 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.656131386756897, + "learning_rate": 0.0013465717229260853, + "loss": 1.6722, + "step": 2040 + }, + { + "epoch": 0.21529535864978902, + "grad_norm": 0.803827166557312, + "learning_rate": 0.0013464195356198065, + "loss": 1.6504, + "step": 2041 + }, + { + "epoch": 0.21540084388185654, + "grad_norm": 0.8771393895149231, + "learning_rate": 0.0013462672814817445, + "loss": 1.6376, + "step": 2042 + }, + { + "epoch": 0.21550632911392406, + "grad_norm": 0.6586048603057861, + "learning_rate": 0.0013461149605289607, + "loss": 1.6575, + "step": 2043 + }, + { + "epoch": 0.21561181434599155, + "grad_norm": 0.8163931369781494, + "learning_rate": 0.001345962572778523, + "loss": 1.7062, + "step": 2044 + }, + { + "epoch": 0.21571729957805907, + "grad_norm": 0.7906981110572815, + "learning_rate": 0.0013458101182475073, + "loss": 1.6323, + "step": 2045 + }, + { + "epoch": 0.2158227848101266, + "grad_norm": 0.7564477920532227, + "learning_rate": 0.0013456575969529967, + "loss": 1.6725, + "step": 2046 + }, + { + "epoch": 0.2159282700421941, + "grad_norm": 0.717090368270874, + "learning_rate": 0.001345505008912082, + "loss": 1.6298, + "step": 2047 + }, + { + "epoch": 0.2160337552742616, + "grad_norm": 0.7604233026504517, + "learning_rate": 0.0013453523541418623, + "loss": 1.6554, + "step": 2048 + }, + { + "epoch": 0.21613924050632913, + "grad_norm": 0.7164632081985474, + "learning_rate": 0.001345199632659442, + "loss": 1.6561, + "step": 2049 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.8912903070449829, + "learning_rate": 0.001345046844481935, + "loss": 1.6546, + "step": 2050 + }, + { + "epoch": 0.21635021097046414, + "grad_norm": 0.7573561072349548, + "learning_rate": 0.0013448939896264622, + "loss": 1.6684, + "step": 2051 + }, + { + "epoch": 0.21645569620253163, + "grad_norm": 0.7860146164894104, + "learning_rate": 0.001344741068110151, + "loss": 1.6858, + "step": 2052 + }, + { + "epoch": 0.21656118143459915, + "grad_norm": 0.8973066806793213, + "learning_rate": 0.001344588079950138, + "loss": 1.6367, + "step": 2053 + }, + { + "epoch": 0.21666666666666667, + "grad_norm": 0.7833678126335144, + "learning_rate": 0.0013444350251635654, + "loss": 1.6523, + "step": 2054 + }, + { + "epoch": 0.21677215189873417, + "grad_norm": 0.7220452427864075, + "learning_rate": 0.0013442819037675843, + "loss": 1.6504, + "step": 2055 + }, + { + "epoch": 0.2168776371308017, + "grad_norm": 0.7585017085075378, + "learning_rate": 0.0013441287157793522, + "loss": 1.6559, + "step": 2056 + }, + { + "epoch": 0.2169831223628692, + "grad_norm": 0.8215897083282471, + "learning_rate": 0.0013439754612160353, + "loss": 1.6457, + "step": 2057 + }, + { + "epoch": 0.2170886075949367, + "grad_norm": 0.7141791582107544, + "learning_rate": 0.001343822140094806, + "loss": 1.6112, + "step": 2058 + }, + { + "epoch": 0.21719409282700422, + "grad_norm": 0.871239185333252, + "learning_rate": 0.0013436687524328449, + "loss": 1.6235, + "step": 2059 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.747713029384613, + "learning_rate": 0.0013435152982473396, + "loss": 1.6203, + "step": 2060 + }, + { + "epoch": 0.21740506329113923, + "grad_norm": 0.8036581873893738, + "learning_rate": 0.0013433617775554854, + "loss": 1.6503, + "step": 2061 + }, + { + "epoch": 0.21751054852320675, + "grad_norm": 0.8717371225357056, + "learning_rate": 0.0013432081903744857, + "loss": 1.6806, + "step": 2062 + }, + { + "epoch": 0.21761603375527427, + "grad_norm": 0.6586039066314697, + "learning_rate": 0.00134305453672155, + "loss": 1.6366, + "step": 2063 + }, + { + "epoch": 0.21772151898734177, + "grad_norm": 0.7464496493339539, + "learning_rate": 0.0013429008166138965, + "loss": 1.629, + "step": 2064 + }, + { + "epoch": 0.2178270042194093, + "grad_norm": 0.74095618724823, + "learning_rate": 0.0013427470300687498, + "loss": 1.6219, + "step": 2065 + }, + { + "epoch": 0.21793248945147678, + "grad_norm": 0.6378330588340759, + "learning_rate": 0.0013425931771033426, + "loss": 1.6704, + "step": 2066 + }, + { + "epoch": 0.2180379746835443, + "grad_norm": 0.6455326676368713, + "learning_rate": 0.0013424392577349152, + "loss": 1.6271, + "step": 2067 + }, + { + "epoch": 0.21814345991561182, + "grad_norm": 0.6286801695823669, + "learning_rate": 0.001342285271980715, + "loss": 1.6592, + "step": 2068 + }, + { + "epoch": 0.2182489451476793, + "grad_norm": 0.7825955152511597, + "learning_rate": 0.0013421312198579963, + "loss": 1.6641, + "step": 2069 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.7941883206367493, + "learning_rate": 0.0013419771013840217, + "loss": 1.6623, + "step": 2070 + }, + { + "epoch": 0.21845991561181435, + "grad_norm": 0.7672519683837891, + "learning_rate": 0.0013418229165760613, + "loss": 1.6855, + "step": 2071 + }, + { + "epoch": 0.21856540084388185, + "grad_norm": 1.3636953830718994, + "learning_rate": 0.001341668665451392, + "loss": 1.6606, + "step": 2072 + }, + { + "epoch": 0.21867088607594937, + "grad_norm": 0.6631350517272949, + "learning_rate": 0.0013415143480272982, + "loss": 1.6666, + "step": 2073 + }, + { + "epoch": 0.2187763713080169, + "grad_norm": 0.9986900091171265, + "learning_rate": 0.0013413599643210723, + "loss": 1.6355, + "step": 2074 + }, + { + "epoch": 0.21888185654008438, + "grad_norm": 0.9162805080413818, + "learning_rate": 0.0013412055143500136, + "loss": 1.6373, + "step": 2075 + }, + { + "epoch": 0.2189873417721519, + "grad_norm": 0.6727549433708191, + "learning_rate": 0.001341050998131429, + "loss": 1.6084, + "step": 2076 + }, + { + "epoch": 0.21909282700421942, + "grad_norm": 0.9712381958961487, + "learning_rate": 0.0013408964156826327, + "loss": 1.6644, + "step": 2077 + }, + { + "epoch": 0.2191983122362869, + "grad_norm": 0.7860013842582703, + "learning_rate": 0.0013407417670209467, + "loss": 1.6373, + "step": 2078 + }, + { + "epoch": 0.21930379746835443, + "grad_norm": 0.7063056826591492, + "learning_rate": 0.0013405870521636999, + "loss": 1.6685, + "step": 2079 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.7837082743644714, + "learning_rate": 0.001340432271128229, + "loss": 1.6788, + "step": 2080 + }, + { + "epoch": 0.21951476793248945, + "grad_norm": 0.7374839782714844, + "learning_rate": 0.001340277423931878, + "loss": 1.6485, + "step": 2081 + }, + { + "epoch": 0.21962025316455697, + "grad_norm": 0.9884734749794006, + "learning_rate": 0.0013401225105919982, + "loss": 1.6342, + "step": 2082 + }, + { + "epoch": 0.21972573839662446, + "grad_norm": 1.0136078596115112, + "learning_rate": 0.0013399675311259484, + "loss": 1.6293, + "step": 2083 + }, + { + "epoch": 0.21983122362869198, + "grad_norm": 0.6891409754753113, + "learning_rate": 0.0013398124855510951, + "loss": 1.653, + "step": 2084 + }, + { + "epoch": 0.2199367088607595, + "grad_norm": 0.9868010878562927, + "learning_rate": 0.0013396573738848115, + "loss": 1.6492, + "step": 2085 + }, + { + "epoch": 0.220042194092827, + "grad_norm": 1.028260588645935, + "learning_rate": 0.001339502196144479, + "loss": 1.6081, + "step": 2086 + }, + { + "epoch": 0.2201476793248945, + "grad_norm": 0.7166376709938049, + "learning_rate": 0.0013393469523474858, + "loss": 1.6889, + "step": 2087 + }, + { + "epoch": 0.22025316455696203, + "grad_norm": 0.9587036967277527, + "learning_rate": 0.001339191642511228, + "loss": 1.6183, + "step": 2088 + }, + { + "epoch": 0.22035864978902953, + "grad_norm": 0.6701199412345886, + "learning_rate": 0.0013390362666531085, + "loss": 1.6537, + "step": 2089 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.867583155632019, + "learning_rate": 0.0013388808247905381, + "loss": 1.6659, + "step": 2090 + }, + { + "epoch": 0.22056962025316457, + "grad_norm": 0.8384730219841003, + "learning_rate": 0.0013387253169409351, + "loss": 1.6529, + "step": 2091 + }, + { + "epoch": 0.22067510548523206, + "grad_norm": 0.671696662902832, + "learning_rate": 0.0013385697431217247, + "loss": 1.652, + "step": 2092 + }, + { + "epoch": 0.22078059071729958, + "grad_norm": 1.0826666355133057, + "learning_rate": 0.0013384141033503394, + "loss": 1.6464, + "step": 2093 + }, + { + "epoch": 0.2208860759493671, + "grad_norm": 0.6691400408744812, + "learning_rate": 0.0013382583976442198, + "loss": 1.6376, + "step": 2094 + }, + { + "epoch": 0.2209915611814346, + "grad_norm": 1.1422587633132935, + "learning_rate": 0.0013381026260208136, + "loss": 1.6054, + "step": 2095 + }, + { + "epoch": 0.2210970464135021, + "grad_norm": 0.920050859451294, + "learning_rate": 0.0013379467884975756, + "loss": 1.6409, + "step": 2096 + }, + { + "epoch": 0.22120253164556963, + "grad_norm": 0.7940022945404053, + "learning_rate": 0.001337790885091968, + "loss": 1.6562, + "step": 2097 + }, + { + "epoch": 0.22130801687763713, + "grad_norm": 1.2598811388015747, + "learning_rate": 0.0013376349158214609, + "loss": 1.685, + "step": 2098 + }, + { + "epoch": 0.22141350210970465, + "grad_norm": 0.7744095921516418, + "learning_rate": 0.0013374788807035314, + "loss": 1.6947, + "step": 2099 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 1.4878554344177246, + "learning_rate": 0.0013373227797556634, + "loss": 1.6576, + "step": 2100 + }, + { + "epoch": 0.22162447257383966, + "grad_norm": 0.9757323861122131, + "learning_rate": 0.0013371666129953497, + "loss": 1.6443, + "step": 2101 + }, + { + "epoch": 0.22172995780590718, + "grad_norm": 1.8404477834701538, + "learning_rate": 0.0013370103804400887, + "loss": 1.6946, + "step": 2102 + }, + { + "epoch": 0.22183544303797467, + "grad_norm": 1.7360197305679321, + "learning_rate": 0.001336854082107388, + "loss": 1.6722, + "step": 2103 + }, + { + "epoch": 0.2219409282700422, + "grad_norm": 0.9834901094436646, + "learning_rate": 0.001336697718014761, + "loss": 1.656, + "step": 2104 + }, + { + "epoch": 0.2220464135021097, + "grad_norm": 1.1897523403167725, + "learning_rate": 0.001336541288179729, + "loss": 1.6744, + "step": 2105 + }, + { + "epoch": 0.2221518987341772, + "grad_norm": 0.7259954214096069, + "learning_rate": 0.0013363847926198208, + "loss": 1.6268, + "step": 2106 + }, + { + "epoch": 0.22225738396624473, + "grad_norm": 1.4330683946609497, + "learning_rate": 0.0013362282313525728, + "loss": 1.6464, + "step": 2107 + }, + { + "epoch": 0.22236286919831225, + "grad_norm": 0.7712953686714172, + "learning_rate": 0.001336071604395528, + "loss": 1.6264, + "step": 2108 + }, + { + "epoch": 0.22246835443037974, + "grad_norm": 1.1396092176437378, + "learning_rate": 0.0013359149117662377, + "loss": 1.6319, + "step": 2109 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.7327630519866943, + "learning_rate": 0.00133575815348226, + "loss": 1.6416, + "step": 2110 + }, + { + "epoch": 0.22267932489451478, + "grad_norm": 0.8011661171913147, + "learning_rate": 0.0013356013295611603, + "loss": 1.6, + "step": 2111 + }, + { + "epoch": 0.22278481012658227, + "grad_norm": 0.669901430606842, + "learning_rate": 0.0013354444400205114, + "loss": 1.633, + "step": 2112 + }, + { + "epoch": 0.2228902953586498, + "grad_norm": 0.8956749439239502, + "learning_rate": 0.0013352874848778938, + "loss": 1.6725, + "step": 2113 + }, + { + "epoch": 0.2229957805907173, + "grad_norm": 0.799102783203125, + "learning_rate": 0.0013351304641508951, + "loss": 1.6343, + "step": 2114 + }, + { + "epoch": 0.2231012658227848, + "grad_norm": 0.7731862664222717, + "learning_rate": 0.0013349733778571101, + "loss": 1.6379, + "step": 2115 + }, + { + "epoch": 0.22320675105485233, + "grad_norm": 0.7068437337875366, + "learning_rate": 0.0013348162260141412, + "loss": 1.634, + "step": 2116 + }, + { + "epoch": 0.22331223628691982, + "grad_norm": 0.7761829495429993, + "learning_rate": 0.001334659008639598, + "loss": 1.6464, + "step": 2117 + }, + { + "epoch": 0.22341772151898734, + "grad_norm": 0.8911853432655334, + "learning_rate": 0.0013345017257510975, + "loss": 1.6243, + "step": 2118 + }, + { + "epoch": 0.22352320675105486, + "grad_norm": 0.6755967140197754, + "learning_rate": 0.001334344377366264, + "loss": 1.6449, + "step": 2119 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.7729552984237671, + "learning_rate": 0.0013341869635027292, + "loss": 1.624, + "step": 2120 + }, + { + "epoch": 0.22373417721518987, + "grad_norm": 0.7046098113059998, + "learning_rate": 0.0013340294841781323, + "loss": 1.6439, + "step": 2121 + }, + { + "epoch": 0.2238396624472574, + "grad_norm": 0.9357505440711975, + "learning_rate": 0.0013338719394101193, + "loss": 1.6087, + "step": 2122 + }, + { + "epoch": 0.22394514767932489, + "grad_norm": 0.694945752620697, + "learning_rate": 0.001333714329216344, + "loss": 1.6649, + "step": 2123 + }, + { + "epoch": 0.2240506329113924, + "grad_norm": 0.8282263278961182, + "learning_rate": 0.0013335566536144675, + "loss": 1.6306, + "step": 2124 + }, + { + "epoch": 0.22415611814345993, + "grad_norm": 0.7954811453819275, + "learning_rate": 0.0013333989126221581, + "loss": 1.61, + "step": 2125 + }, + { + "epoch": 0.22426160337552742, + "grad_norm": 0.7598928809165955, + "learning_rate": 0.0013332411062570914, + "loss": 1.6362, + "step": 2126 + }, + { + "epoch": 0.22436708860759494, + "grad_norm": 1.0397330522537231, + "learning_rate": 0.0013330832345369505, + "loss": 1.6807, + "step": 2127 + }, + { + "epoch": 0.22447257383966246, + "grad_norm": 0.6703567504882812, + "learning_rate": 0.0013329252974794256, + "loss": 1.6205, + "step": 2128 + }, + { + "epoch": 0.22457805907172995, + "grad_norm": 0.7821029424667358, + "learning_rate": 0.0013327672951022145, + "loss": 1.6585, + "step": 2129 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.703951895236969, + "learning_rate": 0.001332609227423022, + "loss": 1.6506, + "step": 2130 + }, + { + "epoch": 0.224789029535865, + "grad_norm": 0.6977583169937134, + "learning_rate": 0.0013324510944595605, + "loss": 1.6158, + "step": 2131 + }, + { + "epoch": 0.22489451476793249, + "grad_norm": 0.6419832110404968, + "learning_rate": 0.0013322928962295492, + "loss": 1.5823, + "step": 2132 + }, + { + "epoch": 0.225, + "grad_norm": 0.7435799837112427, + "learning_rate": 0.0013321346327507158, + "loss": 1.609, + "step": 2133 + }, + { + "epoch": 0.2251054852320675, + "grad_norm": 0.744787871837616, + "learning_rate": 0.0013319763040407938, + "loss": 1.6655, + "step": 2134 + }, + { + "epoch": 0.22521097046413502, + "grad_norm": 0.6535546779632568, + "learning_rate": 0.0013318179101175246, + "loss": 1.6376, + "step": 2135 + }, + { + "epoch": 0.22531645569620254, + "grad_norm": 0.7002606987953186, + "learning_rate": 0.0013316594509986577, + "loss": 1.6652, + "step": 2136 + }, + { + "epoch": 0.22542194092827003, + "grad_norm": 0.6666337251663208, + "learning_rate": 0.0013315009267019487, + "loss": 1.6188, + "step": 2137 + }, + { + "epoch": 0.22552742616033755, + "grad_norm": 0.6483476758003235, + "learning_rate": 0.0013313423372451614, + "loss": 1.634, + "step": 2138 + }, + { + "epoch": 0.22563291139240507, + "grad_norm": 0.6626163721084595, + "learning_rate": 0.0013311836826460665, + "loss": 1.6564, + "step": 2139 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6373234391212463, + "learning_rate": 0.0013310249629224417, + "loss": 1.6518, + "step": 2140 + }, + { + "epoch": 0.22584388185654009, + "grad_norm": 0.6656845211982727, + "learning_rate": 0.0013308661780920728, + "loss": 1.632, + "step": 2141 + }, + { + "epoch": 0.2259493670886076, + "grad_norm": 0.6881461143493652, + "learning_rate": 0.0013307073281727518, + "loss": 1.6019, + "step": 2142 + }, + { + "epoch": 0.2260548523206751, + "grad_norm": 0.6309990882873535, + "learning_rate": 0.0013305484131822792, + "loss": 1.6363, + "step": 2143 + }, + { + "epoch": 0.22616033755274262, + "grad_norm": 0.7494433522224426, + "learning_rate": 0.001330389433138462, + "loss": 1.692, + "step": 2144 + }, + { + "epoch": 0.22626582278481014, + "grad_norm": 0.7444030046463013, + "learning_rate": 0.0013302303880591147, + "loss": 1.6547, + "step": 2145 + }, + { + "epoch": 0.22637130801687763, + "grad_norm": 0.6405129432678223, + "learning_rate": 0.0013300712779620593, + "loss": 1.6318, + "step": 2146 + }, + { + "epoch": 0.22647679324894515, + "grad_norm": 0.742385983467102, + "learning_rate": 0.0013299121028651246, + "loss": 1.6708, + "step": 2147 + }, + { + "epoch": 0.22658227848101264, + "grad_norm": 0.7328295707702637, + "learning_rate": 0.001329752862786147, + "loss": 1.6715, + "step": 2148 + }, + { + "epoch": 0.22668776371308016, + "grad_norm": 0.7287135124206543, + "learning_rate": 0.0013295935577429703, + "loss": 1.624, + "step": 2149 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 1.024100661277771, + "learning_rate": 0.0013294341877534454, + "loss": 1.6137, + "step": 2150 + }, + { + "epoch": 0.22689873417721518, + "grad_norm": 1.0822447538375854, + "learning_rate": 0.0013292747528354304, + "loss": 1.7001, + "step": 2151 + }, + { + "epoch": 0.2270042194092827, + "grad_norm": 0.6519595980644226, + "learning_rate": 0.0013291152530067907, + "loss": 1.6979, + "step": 2152 + }, + { + "epoch": 0.22710970464135022, + "grad_norm": 0.9971275329589844, + "learning_rate": 0.0013289556882853993, + "loss": 1.6268, + "step": 2153 + }, + { + "epoch": 0.2272151898734177, + "grad_norm": 0.6243704557418823, + "learning_rate": 0.0013287960586891362, + "loss": 1.6487, + "step": 2154 + }, + { + "epoch": 0.22732067510548523, + "grad_norm": 1.1583865880966187, + "learning_rate": 0.0013286363642358884, + "loss": 1.6635, + "step": 2155 + }, + { + "epoch": 0.22742616033755275, + "grad_norm": 0.650800883769989, + "learning_rate": 0.0013284766049435504, + "loss": 1.6188, + "step": 2156 + }, + { + "epoch": 0.22753164556962024, + "grad_norm": 1.292920470237732, + "learning_rate": 0.0013283167808300247, + "loss": 1.6621, + "step": 2157 + }, + { + "epoch": 0.22763713080168776, + "grad_norm": 0.7177443504333496, + "learning_rate": 0.0013281568919132198, + "loss": 1.6128, + "step": 2158 + }, + { + "epoch": 0.22774261603375529, + "grad_norm": 0.9926222562789917, + "learning_rate": 0.0013279969382110524, + "loss": 1.6846, + "step": 2159 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.0132611989974976, + "learning_rate": 0.0013278369197414458, + "loss": 1.6483, + "step": 2160 + }, + { + "epoch": 0.2279535864978903, + "grad_norm": 0.6676522493362427, + "learning_rate": 0.0013276768365223306, + "loss": 1.6364, + "step": 2161 + }, + { + "epoch": 0.22805907172995782, + "grad_norm": 1.0042393207550049, + "learning_rate": 0.0013275166885716458, + "loss": 1.6194, + "step": 2162 + }, + { + "epoch": 0.2281645569620253, + "grad_norm": 0.6752215623855591, + "learning_rate": 0.0013273564759073361, + "loss": 1.6363, + "step": 2163 + }, + { + "epoch": 0.22827004219409283, + "grad_norm": 0.8779213428497314, + "learning_rate": 0.0013271961985473544, + "loss": 1.6183, + "step": 2164 + }, + { + "epoch": 0.22837552742616032, + "grad_norm": 0.7686419486999512, + "learning_rate": 0.0013270358565096606, + "loss": 1.5969, + "step": 2165 + }, + { + "epoch": 0.22848101265822784, + "grad_norm": 0.7798773646354675, + "learning_rate": 0.0013268754498122215, + "loss": 1.6658, + "step": 2166 + }, + { + "epoch": 0.22858649789029536, + "grad_norm": 0.8392724394798279, + "learning_rate": 0.0013267149784730117, + "loss": 1.6741, + "step": 2167 + }, + { + "epoch": 0.22869198312236286, + "grad_norm": 0.6801307797431946, + "learning_rate": 0.0013265544425100128, + "loss": 1.6602, + "step": 2168 + }, + { + "epoch": 0.22879746835443038, + "grad_norm": 0.8722564578056335, + "learning_rate": 0.0013263938419412137, + "loss": 1.6247, + "step": 2169 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.7344948649406433, + "learning_rate": 0.0013262331767846104, + "loss": 1.6363, + "step": 2170 + }, + { + "epoch": 0.2290084388185654, + "grad_norm": 0.6390421986579895, + "learning_rate": 0.0013260724470582064, + "loss": 1.6137, + "step": 2171 + }, + { + "epoch": 0.2291139240506329, + "grad_norm": 0.6864814162254333, + "learning_rate": 0.001325911652780012, + "loss": 1.6193, + "step": 2172 + }, + { + "epoch": 0.22921940928270043, + "grad_norm": 0.7660906314849854, + "learning_rate": 0.0013257507939680453, + "loss": 1.5994, + "step": 2173 + }, + { + "epoch": 0.22932489451476792, + "grad_norm": 0.5941644310951233, + "learning_rate": 0.0013255898706403312, + "loss": 1.6005, + "step": 2174 + }, + { + "epoch": 0.22943037974683544, + "grad_norm": 0.761006236076355, + "learning_rate": 0.001325428882814902, + "loss": 1.6036, + "step": 2175 + }, + { + "epoch": 0.22953586497890296, + "grad_norm": 0.7893893718719482, + "learning_rate": 0.001325267830509797, + "loss": 1.6369, + "step": 2176 + }, + { + "epoch": 0.22964135021097046, + "grad_norm": 0.7004324793815613, + "learning_rate": 0.0013251067137430629, + "loss": 1.6056, + "step": 2177 + }, + { + "epoch": 0.22974683544303798, + "grad_norm": 0.7607642412185669, + "learning_rate": 0.001324945532532754, + "loss": 1.6382, + "step": 2178 + }, + { + "epoch": 0.2298523206751055, + "grad_norm": 0.6365671753883362, + "learning_rate": 0.0013247842868969312, + "loss": 1.6362, + "step": 2179 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.7162729501724243, + "learning_rate": 0.0013246229768536628, + "loss": 1.642, + "step": 2180 + }, + { + "epoch": 0.2300632911392405, + "grad_norm": 0.7343415021896362, + "learning_rate": 0.0013244616024210246, + "loss": 1.6599, + "step": 2181 + }, + { + "epoch": 0.230168776371308, + "grad_norm": 0.7058369517326355, + "learning_rate": 0.0013243001636170993, + "loss": 1.6426, + "step": 2182 + }, + { + "epoch": 0.23027426160337552, + "grad_norm": 0.7890293598175049, + "learning_rate": 0.0013241386604599772, + "loss": 1.6586, + "step": 2183 + }, + { + "epoch": 0.23037974683544304, + "grad_norm": 0.7925172448158264, + "learning_rate": 0.001323977092967755, + "loss": 1.6422, + "step": 2184 + }, + { + "epoch": 0.23048523206751054, + "grad_norm": 0.7007559537887573, + "learning_rate": 0.0013238154611585375, + "loss": 1.6221, + "step": 2185 + }, + { + "epoch": 0.23059071729957806, + "grad_norm": 0.8684946298599243, + "learning_rate": 0.0013236537650504361, + "loss": 1.6547, + "step": 2186 + }, + { + "epoch": 0.23069620253164558, + "grad_norm": 0.6654014587402344, + "learning_rate": 0.00132349200466157, + "loss": 1.6216, + "step": 2187 + }, + { + "epoch": 0.23080168776371307, + "grad_norm": 0.770599901676178, + "learning_rate": 0.0013233301800100652, + "loss": 1.638, + "step": 2188 + }, + { + "epoch": 0.2309071729957806, + "grad_norm": 0.648103654384613, + "learning_rate": 0.0013231682911140545, + "loss": 1.6216, + "step": 2189 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.8366687893867493, + "learning_rate": 0.001323006337991679, + "loss": 1.6406, + "step": 2190 + }, + { + "epoch": 0.2311181434599156, + "grad_norm": 0.9552540183067322, + "learning_rate": 0.0013228443206610861, + "loss": 1.6484, + "step": 2191 + }, + { + "epoch": 0.23122362869198312, + "grad_norm": 0.6957361102104187, + "learning_rate": 0.0013226822391404305, + "loss": 1.6342, + "step": 2192 + }, + { + "epoch": 0.23132911392405064, + "grad_norm": 0.8989678025245667, + "learning_rate": 0.0013225200934478744, + "loss": 1.6835, + "step": 2193 + }, + { + "epoch": 0.23143459915611814, + "grad_norm": 0.7515004277229309, + "learning_rate": 0.0013223578836015868, + "loss": 1.6721, + "step": 2194 + }, + { + "epoch": 0.23154008438818566, + "grad_norm": 0.7023490071296692, + "learning_rate": 0.0013221956096197446, + "loss": 1.6166, + "step": 2195 + }, + { + "epoch": 0.23164556962025318, + "grad_norm": 0.7981106042861938, + "learning_rate": 0.001322033271520531, + "loss": 1.6053, + "step": 2196 + }, + { + "epoch": 0.23175105485232067, + "grad_norm": 0.6519787907600403, + "learning_rate": 0.001321870869322137, + "loss": 1.6306, + "step": 2197 + }, + { + "epoch": 0.2318565400843882, + "grad_norm": 0.7979682683944702, + "learning_rate": 0.0013217084030427604, + "loss": 1.613, + "step": 2198 + }, + { + "epoch": 0.23196202531645568, + "grad_norm": 0.6875908374786377, + "learning_rate": 0.0013215458727006064, + "loss": 1.6132, + "step": 2199 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.661091685295105, + "learning_rate": 0.0013213832783138873, + "loss": 1.6724, + "step": 2200 + }, + { + "epoch": 0.23217299578059072, + "grad_norm": 0.7696614265441895, + "learning_rate": 0.0013212206199008226, + "loss": 1.6374, + "step": 2201 + }, + { + "epoch": 0.23227848101265822, + "grad_norm": 0.6276510953903198, + "learning_rate": 0.0013210578974796393, + "loss": 1.6385, + "step": 2202 + }, + { + "epoch": 0.23238396624472574, + "grad_norm": 0.6538985967636108, + "learning_rate": 0.001320895111068571, + "loss": 1.6245, + "step": 2203 + }, + { + "epoch": 0.23248945147679326, + "grad_norm": 0.6367506980895996, + "learning_rate": 0.0013207322606858588, + "loss": 1.6032, + "step": 2204 + }, + { + "epoch": 0.23259493670886075, + "grad_norm": 0.6060202717781067, + "learning_rate": 0.001320569346349751, + "loss": 1.5727, + "step": 2205 + }, + { + "epoch": 0.23270042194092827, + "grad_norm": 0.6762512922286987, + "learning_rate": 0.0013204063680785025, + "loss": 1.6736, + "step": 2206 + }, + { + "epoch": 0.2328059071729958, + "grad_norm": 0.6349103450775146, + "learning_rate": 0.0013202433258903761, + "loss": 1.6186, + "step": 2207 + }, + { + "epoch": 0.23291139240506328, + "grad_norm": 0.6589415669441223, + "learning_rate": 0.001320080219803642, + "loss": 1.6349, + "step": 2208 + }, + { + "epoch": 0.2330168776371308, + "grad_norm": 0.6370582580566406, + "learning_rate": 0.0013199170498365764, + "loss": 1.61, + "step": 2209 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.6770766377449036, + "learning_rate": 0.0013197538160074633, + "loss": 1.6376, + "step": 2210 + }, + { + "epoch": 0.23322784810126582, + "grad_norm": 0.6642658114433289, + "learning_rate": 0.0013195905183345943, + "loss": 1.6387, + "step": 2211 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 0.6486012935638428, + "learning_rate": 0.0013194271568362673, + "loss": 1.6273, + "step": 2212 + }, + { + "epoch": 0.23343881856540086, + "grad_norm": 0.7766047120094299, + "learning_rate": 0.001319263731530788, + "loss": 1.6238, + "step": 2213 + }, + { + "epoch": 0.23354430379746835, + "grad_norm": 0.6953220963478088, + "learning_rate": 0.0013191002424364693, + "loss": 1.635, + "step": 2214 + }, + { + "epoch": 0.23364978902953587, + "grad_norm": 0.6777811646461487, + "learning_rate": 0.0013189366895716302, + "loss": 1.6729, + "step": 2215 + }, + { + "epoch": 0.23375527426160336, + "grad_norm": 0.7122456431388855, + "learning_rate": 0.0013187730729545982, + "loss": 1.6325, + "step": 2216 + }, + { + "epoch": 0.23386075949367088, + "grad_norm": 0.647774875164032, + "learning_rate": 0.0013186093926037072, + "loss": 1.6292, + "step": 2217 + }, + { + "epoch": 0.2339662447257384, + "grad_norm": 0.6868700981140137, + "learning_rate": 0.0013184456485372986, + "loss": 1.6481, + "step": 2218 + }, + { + "epoch": 0.2340717299578059, + "grad_norm": 0.6839486360549927, + "learning_rate": 0.0013182818407737203, + "loss": 1.6017, + "step": 2219 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.6608450412750244, + "learning_rate": 0.0013181179693313283, + "loss": 1.6462, + "step": 2220 + }, + { + "epoch": 0.23428270042194094, + "grad_norm": 0.6503073573112488, + "learning_rate": 0.0013179540342284847, + "loss": 1.624, + "step": 2221 + }, + { + "epoch": 0.23438818565400843, + "grad_norm": 0.7027050852775574, + "learning_rate": 0.0013177900354835598, + "loss": 1.596, + "step": 2222 + }, + { + "epoch": 0.23449367088607595, + "grad_norm": 0.7278969287872314, + "learning_rate": 0.00131762597311493, + "loss": 1.6212, + "step": 2223 + }, + { + "epoch": 0.23459915611814347, + "grad_norm": 0.7454676032066345, + "learning_rate": 0.0013174618471409793, + "loss": 1.6593, + "step": 2224 + }, + { + "epoch": 0.23470464135021096, + "grad_norm": 0.6895445585250854, + "learning_rate": 0.0013172976575800991, + "loss": 1.6294, + "step": 2225 + }, + { + "epoch": 0.23481012658227848, + "grad_norm": 0.7567785382270813, + "learning_rate": 0.0013171334044506878, + "loss": 1.6486, + "step": 2226 + }, + { + "epoch": 0.234915611814346, + "grad_norm": 0.883367657661438, + "learning_rate": 0.0013169690877711502, + "loss": 1.6152, + "step": 2227 + }, + { + "epoch": 0.2350210970464135, + "grad_norm": 0.7811601758003235, + "learning_rate": 0.0013168047075598993, + "loss": 1.6357, + "step": 2228 + }, + { + "epoch": 0.23512658227848102, + "grad_norm": 0.7307655811309814, + "learning_rate": 0.0013166402638353548, + "loss": 1.6472, + "step": 2229 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.915115475654602, + "learning_rate": 0.0013164757566159428, + "loss": 1.6266, + "step": 2230 + }, + { + "epoch": 0.23533755274261603, + "grad_norm": 0.6747360825538635, + "learning_rate": 0.0013163111859200978, + "loss": 1.6241, + "step": 2231 + }, + { + "epoch": 0.23544303797468355, + "grad_norm": 1.1280558109283447, + "learning_rate": 0.0013161465517662603, + "loss": 1.6331, + "step": 2232 + }, + { + "epoch": 0.23554852320675104, + "grad_norm": 0.9586478471755981, + "learning_rate": 0.001315981854172879, + "loss": 1.6546, + "step": 2233 + }, + { + "epoch": 0.23565400843881856, + "grad_norm": 0.7307789325714111, + "learning_rate": 0.0013158170931584084, + "loss": 1.6225, + "step": 2234 + }, + { + "epoch": 0.23575949367088608, + "grad_norm": 1.1000906229019165, + "learning_rate": 0.0013156522687413114, + "loss": 1.6139, + "step": 2235 + }, + { + "epoch": 0.23586497890295358, + "grad_norm": 0.7087038159370422, + "learning_rate": 0.0013154873809400568, + "loss": 1.6217, + "step": 2236 + }, + { + "epoch": 0.2359704641350211, + "grad_norm": 0.8287164568901062, + "learning_rate": 0.0013153224297731215, + "loss": 1.5996, + "step": 2237 + }, + { + "epoch": 0.23607594936708862, + "grad_norm": 0.7696189880371094, + "learning_rate": 0.0013151574152589888, + "loss": 1.6557, + "step": 2238 + }, + { + "epoch": 0.2361814345991561, + "grad_norm": 0.648868203163147, + "learning_rate": 0.00131499233741615, + "loss": 1.6169, + "step": 2239 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.6858984231948853, + "learning_rate": 0.001314827196263102, + "loss": 1.6197, + "step": 2240 + }, + { + "epoch": 0.23639240506329115, + "grad_norm": 0.7303621172904968, + "learning_rate": 0.0013146619918183507, + "loss": 1.6078, + "step": 2241 + }, + { + "epoch": 0.23649789029535864, + "grad_norm": 0.6413590312004089, + "learning_rate": 0.0013144967241004073, + "loss": 1.596, + "step": 2242 + }, + { + "epoch": 0.23660337552742616, + "grad_norm": 0.710537850856781, + "learning_rate": 0.001314331393127791, + "loss": 1.602, + "step": 2243 + }, + { + "epoch": 0.23670886075949368, + "grad_norm": 0.6591286063194275, + "learning_rate": 0.0013141659989190282, + "loss": 1.6161, + "step": 2244 + }, + { + "epoch": 0.23681434599156118, + "grad_norm": 0.7115836143493652, + "learning_rate": 0.001314000541492652, + "loss": 1.6225, + "step": 2245 + }, + { + "epoch": 0.2369198312236287, + "grad_norm": 0.6893541216850281, + "learning_rate": 0.0013138350208672029, + "loss": 1.6714, + "step": 2246 + }, + { + "epoch": 0.2370253164556962, + "grad_norm": 0.6853646636009216, + "learning_rate": 0.001313669437061228, + "loss": 1.592, + "step": 2247 + }, + { + "epoch": 0.2371308016877637, + "grad_norm": 0.8179225921630859, + "learning_rate": 0.0013135037900932822, + "loss": 1.6501, + "step": 2248 + }, + { + "epoch": 0.23723628691983123, + "grad_norm": 0.6734607219696045, + "learning_rate": 0.0013133380799819267, + "loss": 1.6533, + "step": 2249 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.9079087376594543, + "learning_rate": 0.0013131723067457302, + "loss": 1.6151, + "step": 2250 + }, + { + "epoch": 0.23744725738396624, + "grad_norm": 0.8300459384918213, + "learning_rate": 0.0013130064704032684, + "loss": 1.6006, + "step": 2251 + }, + { + "epoch": 0.23755274261603376, + "grad_norm": 0.6849030256271362, + "learning_rate": 0.0013128405709731245, + "loss": 1.6259, + "step": 2252 + }, + { + "epoch": 0.23765822784810126, + "grad_norm": 0.7447664141654968, + "learning_rate": 0.001312674608473888, + "loss": 1.6176, + "step": 2253 + }, + { + "epoch": 0.23776371308016878, + "grad_norm": 0.6800106763839722, + "learning_rate": 0.0013125085829241558, + "loss": 1.6141, + "step": 2254 + }, + { + "epoch": 0.2378691983122363, + "grad_norm": 0.7114765048027039, + "learning_rate": 0.0013123424943425317, + "loss": 1.6131, + "step": 2255 + }, + { + "epoch": 0.2379746835443038, + "grad_norm": 0.7112559080123901, + "learning_rate": 0.0013121763427476273, + "loss": 1.6071, + "step": 2256 + }, + { + "epoch": 0.2380801687763713, + "grad_norm": 0.7121305465698242, + "learning_rate": 0.0013120101281580605, + "loss": 1.5789, + "step": 2257 + }, + { + "epoch": 0.23818565400843883, + "grad_norm": 1.0447227954864502, + "learning_rate": 0.0013118438505924563, + "loss": 1.6257, + "step": 2258 + }, + { + "epoch": 0.23829113924050632, + "grad_norm": 0.747133195400238, + "learning_rate": 0.001311677510069447, + "loss": 1.6474, + "step": 2259 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.7665300369262695, + "learning_rate": 0.0013115111066076721, + "loss": 1.6354, + "step": 2260 + }, + { + "epoch": 0.23850210970464136, + "grad_norm": 0.6681908369064331, + "learning_rate": 0.0013113446402257774, + "loss": 1.6201, + "step": 2261 + }, + { + "epoch": 0.23860759493670886, + "grad_norm": 0.8080542683601379, + "learning_rate": 0.001311178110942417, + "loss": 1.6249, + "step": 2262 + }, + { + "epoch": 0.23871308016877638, + "grad_norm": 0.999213457107544, + "learning_rate": 0.0013110115187762506, + "loss": 1.6446, + "step": 2263 + }, + { + "epoch": 0.23881856540084387, + "grad_norm": 0.6643561720848083, + "learning_rate": 0.0013108448637459465, + "loss": 1.6713, + "step": 2264 + }, + { + "epoch": 0.2389240506329114, + "grad_norm": 1.0089516639709473, + "learning_rate": 0.0013106781458701784, + "loss": 1.6743, + "step": 2265 + }, + { + "epoch": 0.2390295358649789, + "grad_norm": 1.029296875, + "learning_rate": 0.0013105113651676287, + "loss": 1.6401, + "step": 2266 + }, + { + "epoch": 0.2391350210970464, + "grad_norm": 0.6417676210403442, + "learning_rate": 0.001310344521656985, + "loss": 1.6752, + "step": 2267 + }, + { + "epoch": 0.23924050632911392, + "grad_norm": 0.8475544452667236, + "learning_rate": 0.001310177615356944, + "loss": 1.643, + "step": 2268 + }, + { + "epoch": 0.23934599156118144, + "grad_norm": 0.7058041095733643, + "learning_rate": 0.0013100106462862076, + "loss": 1.6318, + "step": 2269 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.8280340433120728, + "learning_rate": 0.0013098436144634862, + "loss": 1.6482, + "step": 2270 + }, + { + "epoch": 0.23955696202531646, + "grad_norm": 0.7836694717407227, + "learning_rate": 0.0013096765199074958, + "loss": 1.6354, + "step": 2271 + }, + { + "epoch": 0.23966244725738398, + "grad_norm": 0.6791670918464661, + "learning_rate": 0.0013095093626369608, + "loss": 1.6007, + "step": 2272 + }, + { + "epoch": 0.23976793248945147, + "grad_norm": 0.7007776498794556, + "learning_rate": 0.0013093421426706117, + "loss": 1.6035, + "step": 2273 + }, + { + "epoch": 0.239873417721519, + "grad_norm": 0.7466392517089844, + "learning_rate": 0.0013091748600271862, + "loss": 1.6436, + "step": 2274 + }, + { + "epoch": 0.2399789029535865, + "grad_norm": 0.9531726837158203, + "learning_rate": 0.0013090075147254294, + "loss": 1.6208, + "step": 2275 + }, + { + "epoch": 0.240084388185654, + "grad_norm": 0.6762357354164124, + "learning_rate": 0.0013088401067840932, + "loss": 1.6472, + "step": 2276 + }, + { + "epoch": 0.24018987341772152, + "grad_norm": 0.8370494842529297, + "learning_rate": 0.0013086726362219363, + "loss": 1.6418, + "step": 2277 + }, + { + "epoch": 0.24029535864978904, + "grad_norm": 0.7289895415306091, + "learning_rate": 0.0013085051030577246, + "loss": 1.6392, + "step": 2278 + }, + { + "epoch": 0.24040084388185654, + "grad_norm": 0.8554510474205017, + "learning_rate": 0.0013083375073102315, + "loss": 1.6393, + "step": 2279 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.9288795590400696, + "learning_rate": 0.0013081698489982364, + "loss": 1.6424, + "step": 2280 + }, + { + "epoch": 0.24061181434599155, + "grad_norm": 0.8155109882354736, + "learning_rate": 0.0013080021281405264, + "loss": 1.6201, + "step": 2281 + }, + { + "epoch": 0.24071729957805907, + "grad_norm": 1.5058915615081787, + "learning_rate": 0.0013078343447558954, + "loss": 1.5957, + "step": 2282 + }, + { + "epoch": 0.2408227848101266, + "grad_norm": 0.7682785987854004, + "learning_rate": 0.0013076664988631447, + "loss": 1.6218, + "step": 2283 + }, + { + "epoch": 0.24092827004219408, + "grad_norm": 1.2144111394882202, + "learning_rate": 0.001307498590481082, + "loss": 1.5947, + "step": 2284 + }, + { + "epoch": 0.2410337552742616, + "grad_norm": 0.7958602905273438, + "learning_rate": 0.001307330619628522, + "loss": 1.617, + "step": 2285 + }, + { + "epoch": 0.24113924050632912, + "grad_norm": 1.0588595867156982, + "learning_rate": 0.0013071625863242875, + "loss": 1.6595, + "step": 2286 + }, + { + "epoch": 0.24124472573839661, + "grad_norm": 0.7922402620315552, + "learning_rate": 0.0013069944905872064, + "loss": 1.6335, + "step": 2287 + }, + { + "epoch": 0.24135021097046414, + "grad_norm": 0.737405002117157, + "learning_rate": 0.0013068263324361156, + "loss": 1.6317, + "step": 2288 + }, + { + "epoch": 0.24145569620253166, + "grad_norm": 0.7237796783447266, + "learning_rate": 0.0013066581118898574, + "loss": 1.6075, + "step": 2289 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.7564858198165894, + "learning_rate": 0.001306489828967282, + "loss": 1.6281, + "step": 2290 + }, + { + "epoch": 0.24166666666666667, + "grad_norm": 0.6389861106872559, + "learning_rate": 0.0013063214836872465, + "loss": 1.63, + "step": 2291 + }, + { + "epoch": 0.2417721518987342, + "grad_norm": 0.7265346050262451, + "learning_rate": 0.0013061530760686145, + "loss": 1.5782, + "step": 2292 + }, + { + "epoch": 0.24187763713080168, + "grad_norm": 0.6509338021278381, + "learning_rate": 0.0013059846061302574, + "loss": 1.6286, + "step": 2293 + }, + { + "epoch": 0.2419831223628692, + "grad_norm": 0.7815049886703491, + "learning_rate": 0.0013058160738910526, + "loss": 1.6266, + "step": 2294 + }, + { + "epoch": 0.24208860759493672, + "grad_norm": 0.7711499929428101, + "learning_rate": 0.0013056474793698852, + "loss": 1.6202, + "step": 2295 + }, + { + "epoch": 0.24219409282700421, + "grad_norm": 0.6887122392654419, + "learning_rate": 0.001305478822585647, + "loss": 1.6239, + "step": 2296 + }, + { + "epoch": 0.24229957805907174, + "grad_norm": 0.7890573143959045, + "learning_rate": 0.001305310103557237, + "loss": 1.67, + "step": 2297 + }, + { + "epoch": 0.24240506329113923, + "grad_norm": 0.7009997963905334, + "learning_rate": 0.0013051413223035607, + "loss": 1.6096, + "step": 2298 + }, + { + "epoch": 0.24251054852320675, + "grad_norm": 0.7845983505249023, + "learning_rate": 0.0013049724788435312, + "loss": 1.6126, + "step": 2299 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.7661177515983582, + "learning_rate": 0.0013048035731960679, + "loss": 1.5797, + "step": 2300 + }, + { + "epoch": 0.24272151898734176, + "grad_norm": 0.717477560043335, + "learning_rate": 0.0013046346053800979, + "loss": 1.6337, + "step": 2301 + }, + { + "epoch": 0.24282700421940928, + "grad_norm": 0.7024415135383606, + "learning_rate": 0.0013044655754145546, + "loss": 1.5975, + "step": 2302 + }, + { + "epoch": 0.2429324894514768, + "grad_norm": 0.8985474705696106, + "learning_rate": 0.001304296483318379, + "loss": 1.6316, + "step": 2303 + }, + { + "epoch": 0.2430379746835443, + "grad_norm": 0.6880091428756714, + "learning_rate": 0.0013041273291105181, + "loss": 1.6243, + "step": 2304 + }, + { + "epoch": 0.24314345991561181, + "grad_norm": 0.7499157786369324, + "learning_rate": 0.0013039581128099272, + "loss": 1.6029, + "step": 2305 + }, + { + "epoch": 0.24324894514767934, + "grad_norm": 0.9673587083816528, + "learning_rate": 0.0013037888344355673, + "loss": 1.6463, + "step": 2306 + }, + { + "epoch": 0.24335443037974683, + "grad_norm": 0.7303619384765625, + "learning_rate": 0.001303619494006407, + "loss": 1.5984, + "step": 2307 + }, + { + "epoch": 0.24345991561181435, + "grad_norm": 0.6641243696212769, + "learning_rate": 0.0013034500915414218, + "loss": 1.5847, + "step": 2308 + }, + { + "epoch": 0.24356540084388187, + "grad_norm": 0.7034048438072205, + "learning_rate": 0.0013032806270595941, + "loss": 1.6309, + "step": 2309 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.7001508474349976, + "learning_rate": 0.0013031111005799133, + "loss": 1.6394, + "step": 2310 + }, + { + "epoch": 0.24377637130801688, + "grad_norm": 0.6243919134140015, + "learning_rate": 0.0013029415121213756, + "loss": 1.6172, + "step": 2311 + }, + { + "epoch": 0.2438818565400844, + "grad_norm": 0.7071080207824707, + "learning_rate": 0.0013027718617029842, + "loss": 1.655, + "step": 2312 + }, + { + "epoch": 0.2439873417721519, + "grad_norm": 0.6209005117416382, + "learning_rate": 0.0013026021493437495, + "loss": 1.6671, + "step": 2313 + }, + { + "epoch": 0.24409282700421941, + "grad_norm": 0.6509441137313843, + "learning_rate": 0.0013024323750626882, + "loss": 1.6287, + "step": 2314 + }, + { + "epoch": 0.2441983122362869, + "grad_norm": 0.6515529751777649, + "learning_rate": 0.0013022625388788248, + "loss": 1.6176, + "step": 2315 + }, + { + "epoch": 0.24430379746835443, + "grad_norm": 0.6392093896865845, + "learning_rate": 0.0013020926408111903, + "loss": 1.6627, + "step": 2316 + }, + { + "epoch": 0.24440928270042195, + "grad_norm": 0.6097989678382874, + "learning_rate": 0.001301922680878822, + "loss": 1.5782, + "step": 2317 + }, + { + "epoch": 0.24451476793248944, + "grad_norm": 0.634003758430481, + "learning_rate": 0.001301752659100765, + "loss": 1.6202, + "step": 2318 + }, + { + "epoch": 0.24462025316455696, + "grad_norm": 0.6230834126472473, + "learning_rate": 0.001301582575496072, + "loss": 1.6402, + "step": 2319 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.62800133228302, + "learning_rate": 0.0013014124300838004, + "loss": 1.5786, + "step": 2320 + }, + { + "epoch": 0.24483122362869197, + "grad_norm": 0.5994121432304382, + "learning_rate": 0.0013012422228830165, + "loss": 1.5941, + "step": 2321 + }, + { + "epoch": 0.2449367088607595, + "grad_norm": 0.6458960771560669, + "learning_rate": 0.0013010719539127927, + "loss": 1.6351, + "step": 2322 + }, + { + "epoch": 0.24504219409282701, + "grad_norm": 0.7452684044837952, + "learning_rate": 0.001300901623192209, + "loss": 1.6234, + "step": 2323 + }, + { + "epoch": 0.2451476793248945, + "grad_norm": 0.7749155163764954, + "learning_rate": 0.0013007312307403507, + "loss": 1.6179, + "step": 2324 + }, + { + "epoch": 0.24525316455696203, + "grad_norm": 0.6231822371482849, + "learning_rate": 0.0013005607765763122, + "loss": 1.5922, + "step": 2325 + }, + { + "epoch": 0.24535864978902955, + "grad_norm": 0.7358991503715515, + "learning_rate": 0.0013003902607191934, + "loss": 1.6252, + "step": 2326 + }, + { + "epoch": 0.24546413502109704, + "grad_norm": 0.761898398399353, + "learning_rate": 0.0013002196831881014, + "loss": 1.6022, + "step": 2327 + }, + { + "epoch": 0.24556962025316456, + "grad_norm": 0.6681113839149475, + "learning_rate": 0.0013000490440021502, + "loss": 1.6069, + "step": 2328 + }, + { + "epoch": 0.24567510548523205, + "grad_norm": 0.612098217010498, + "learning_rate": 0.0012998783431804608, + "loss": 1.5994, + "step": 2329 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.7419360876083374, + "learning_rate": 0.0012997075807421612, + "loss": 1.5949, + "step": 2330 + }, + { + "epoch": 0.2458860759493671, + "grad_norm": 0.68133145570755, + "learning_rate": 0.0012995367567063861, + "loss": 1.5687, + "step": 2331 + }, + { + "epoch": 0.2459915611814346, + "grad_norm": 0.6289219260215759, + "learning_rate": 0.001299365871092277, + "loss": 1.6383, + "step": 2332 + }, + { + "epoch": 0.2460970464135021, + "grad_norm": 0.6213741302490234, + "learning_rate": 0.0012991949239189826, + "loss": 1.6174, + "step": 2333 + }, + { + "epoch": 0.24620253164556963, + "grad_norm": 0.6702001690864563, + "learning_rate": 0.0012990239152056587, + "loss": 1.6407, + "step": 2334 + }, + { + "epoch": 0.24630801687763712, + "grad_norm": 0.8354270458221436, + "learning_rate": 0.0012988528449714672, + "loss": 1.6215, + "step": 2335 + }, + { + "epoch": 0.24641350210970464, + "grad_norm": 0.8061347007751465, + "learning_rate": 0.001298681713235578, + "loss": 1.6245, + "step": 2336 + }, + { + "epoch": 0.24651898734177216, + "grad_norm": 0.6491813659667969, + "learning_rate": 0.0012985105200171664, + "loss": 1.5455, + "step": 2337 + }, + { + "epoch": 0.24662447257383965, + "grad_norm": 0.9518164396286011, + "learning_rate": 0.001298339265335416, + "loss": 1.5957, + "step": 2338 + }, + { + "epoch": 0.24672995780590717, + "grad_norm": 0.9073820114135742, + "learning_rate": 0.0012981679492095166, + "loss": 1.609, + "step": 2339 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.75528484582901, + "learning_rate": 0.0012979965716586653, + "loss": 1.6138, + "step": 2340 + }, + { + "epoch": 0.2469409282700422, + "grad_norm": 1.0873064994812012, + "learning_rate": 0.0012978251327020655, + "loss": 1.6386, + "step": 2341 + }, + { + "epoch": 0.2470464135021097, + "grad_norm": 0.7684652805328369, + "learning_rate": 0.0012976536323589278, + "loss": 1.6118, + "step": 2342 + }, + { + "epoch": 0.24715189873417723, + "grad_norm": 0.906119167804718, + "learning_rate": 0.0012974820706484697, + "loss": 1.6044, + "step": 2343 + }, + { + "epoch": 0.24725738396624472, + "grad_norm": 1.0479506254196167, + "learning_rate": 0.001297310447589916, + "loss": 1.6336, + "step": 2344 + }, + { + "epoch": 0.24736286919831224, + "grad_norm": 0.742403507232666, + "learning_rate": 0.0012971387632024968, + "loss": 1.5974, + "step": 2345 + }, + { + "epoch": 0.24746835443037973, + "grad_norm": 1.0362122058868408, + "learning_rate": 0.0012969670175054515, + "loss": 1.6249, + "step": 2346 + }, + { + "epoch": 0.24757383966244725, + "grad_norm": 0.9444226622581482, + "learning_rate": 0.0012967952105180243, + "loss": 1.5884, + "step": 2347 + }, + { + "epoch": 0.24767932489451477, + "grad_norm": 0.7453088164329529, + "learning_rate": 0.001296623342259467, + "loss": 1.5752, + "step": 2348 + }, + { + "epoch": 0.24778481012658227, + "grad_norm": 1.4388419389724731, + "learning_rate": 0.0012964514127490388, + "loss": 1.5946, + "step": 2349 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.754718542098999, + "learning_rate": 0.0012962794220060048, + "loss": 1.6214, + "step": 2350 + }, + { + "epoch": 0.2479957805907173, + "grad_norm": 1.2642415761947632, + "learning_rate": 0.0012961073700496378, + "loss": 1.6163, + "step": 2351 + }, + { + "epoch": 0.2481012658227848, + "grad_norm": 0.745709240436554, + "learning_rate": 0.0012959352568992163, + "loss": 1.632, + "step": 2352 + }, + { + "epoch": 0.24820675105485232, + "grad_norm": 1.2534725666046143, + "learning_rate": 0.0012957630825740274, + "loss": 1.598, + "step": 2353 + }, + { + "epoch": 0.24831223628691984, + "grad_norm": 0.7479328513145447, + "learning_rate": 0.0012955908470933637, + "loss": 1.6244, + "step": 2354 + }, + { + "epoch": 0.24841772151898733, + "grad_norm": 1.4068530797958374, + "learning_rate": 0.0012954185504765248, + "loss": 1.6457, + "step": 2355 + }, + { + "epoch": 0.24852320675105485, + "grad_norm": 0.7554658055305481, + "learning_rate": 0.0012952461927428177, + "loss": 1.6145, + "step": 2356 + }, + { + "epoch": 0.24862869198312237, + "grad_norm": 1.3733830451965332, + "learning_rate": 0.001295073773911556, + "loss": 1.6307, + "step": 2357 + }, + { + "epoch": 0.24873417721518987, + "grad_norm": 0.7466481924057007, + "learning_rate": 0.0012949012940020599, + "loss": 1.6418, + "step": 2358 + }, + { + "epoch": 0.2488396624472574, + "grad_norm": 1.3056995868682861, + "learning_rate": 0.0012947287530336565, + "loss": 1.6018, + "step": 2359 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.8418545722961426, + "learning_rate": 0.0012945561510256801, + "loss": 1.6019, + "step": 2360 + }, + { + "epoch": 0.2490506329113924, + "grad_norm": 1.503031611442566, + "learning_rate": 0.0012943834879974717, + "loss": 1.6078, + "step": 2361 + }, + { + "epoch": 0.24915611814345992, + "grad_norm": 0.9673888683319092, + "learning_rate": 0.001294210763968379, + "loss": 1.6284, + "step": 2362 + }, + { + "epoch": 0.2492616033755274, + "grad_norm": 1.186733603477478, + "learning_rate": 0.0012940379789577565, + "loss": 1.6316, + "step": 2363 + }, + { + "epoch": 0.24936708860759493, + "grad_norm": 0.9701619744300842, + "learning_rate": 0.0012938651329849654, + "loss": 1.6108, + "step": 2364 + }, + { + "epoch": 0.24947257383966245, + "grad_norm": 0.9917061924934387, + "learning_rate": 0.0012936922260693743, + "loss": 1.5903, + "step": 2365 + }, + { + "epoch": 0.24957805907172995, + "grad_norm": 0.901165246963501, + "learning_rate": 0.0012935192582303582, + "loss": 1.6064, + "step": 2366 + }, + { + "epoch": 0.24968354430379747, + "grad_norm": 0.9531580805778503, + "learning_rate": 0.001293346229487299, + "loss": 1.6375, + "step": 2367 + }, + { + "epoch": 0.249789029535865, + "grad_norm": 0.9567273259162903, + "learning_rate": 0.0012931731398595854, + "loss": 1.6205, + "step": 2368 + }, + { + "epoch": 0.24989451476793248, + "grad_norm": 1.0050784349441528, + "learning_rate": 0.001292999989366613, + "loss": 1.6236, + "step": 2369 + }, + { + "epoch": 0.25, + "grad_norm": 0.853762149810791, + "learning_rate": 0.001292826778027784, + "loss": 1.6288, + "step": 2370 + }, + { + "epoch": 0.2501054852320675, + "grad_norm": 0.8997113704681396, + "learning_rate": 0.001292653505862508, + "loss": 1.6101, + "step": 2371 + }, + { + "epoch": 0.25021097046413504, + "grad_norm": 0.7880038619041443, + "learning_rate": 0.0012924801728902006, + "loss": 1.6047, + "step": 2372 + }, + { + "epoch": 0.25031645569620253, + "grad_norm": 0.9362532496452332, + "learning_rate": 0.0012923067791302848, + "loss": 1.6008, + "step": 2373 + }, + { + "epoch": 0.25042194092827, + "grad_norm": 0.7260082960128784, + "learning_rate": 0.0012921333246021904, + "loss": 1.6089, + "step": 2374 + }, + { + "epoch": 0.2505274261603376, + "grad_norm": 1.0089881420135498, + "learning_rate": 0.0012919598093253533, + "loss": 1.5951, + "step": 2375 + }, + { + "epoch": 0.25063291139240507, + "grad_norm": 0.7225635647773743, + "learning_rate": 0.0012917862333192173, + "loss": 1.6403, + "step": 2376 + }, + { + "epoch": 0.25073839662447256, + "grad_norm": 0.9410362839698792, + "learning_rate": 0.0012916125966032322, + "loss": 1.5773, + "step": 2377 + }, + { + "epoch": 0.2508438818565401, + "grad_norm": 0.7071385979652405, + "learning_rate": 0.001291438899196855, + "loss": 1.6, + "step": 2378 + }, + { + "epoch": 0.2509493670886076, + "grad_norm": 0.7417123913764954, + "learning_rate": 0.0012912651411195494, + "loss": 1.6368, + "step": 2379 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.6606633067131042, + "learning_rate": 0.0012910913223907856, + "loss": 1.5932, + "step": 2380 + }, + { + "epoch": 0.25116033755274264, + "grad_norm": 0.7492092251777649, + "learning_rate": 0.0012909174430300412, + "loss": 1.6296, + "step": 2381 + }, + { + "epoch": 0.25126582278481013, + "grad_norm": 0.6898736953735352, + "learning_rate": 0.0012907435030567996, + "loss": 1.5992, + "step": 2382 + }, + { + "epoch": 0.2513713080168776, + "grad_norm": 0.7324532270431519, + "learning_rate": 0.0012905695024905525, + "loss": 1.6206, + "step": 2383 + }, + { + "epoch": 0.2514767932489452, + "grad_norm": 0.6070034503936768, + "learning_rate": 0.0012903954413507968, + "loss": 1.5768, + "step": 2384 + }, + { + "epoch": 0.25158227848101267, + "grad_norm": 0.7376143932342529, + "learning_rate": 0.0012902213196570376, + "loss": 1.6134, + "step": 2385 + }, + { + "epoch": 0.25168776371308016, + "grad_norm": 0.6459804773330688, + "learning_rate": 0.0012900471374287855, + "loss": 1.609, + "step": 2386 + }, + { + "epoch": 0.25179324894514765, + "grad_norm": 0.6214013695716858, + "learning_rate": 0.0012898728946855588, + "loss": 1.6726, + "step": 2387 + }, + { + "epoch": 0.2518987341772152, + "grad_norm": 0.670207142829895, + "learning_rate": 0.001289698591446882, + "loss": 1.5701, + "step": 2388 + }, + { + "epoch": 0.2520042194092827, + "grad_norm": 0.6050670146942139, + "learning_rate": 0.0012895242277322872, + "loss": 1.5726, + "step": 2389 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.8343501687049866, + "learning_rate": 0.0012893498035613123, + "loss": 1.6053, + "step": 2390 + }, + { + "epoch": 0.25221518987341773, + "grad_norm": 0.9546781778335571, + "learning_rate": 0.0012891753189535023, + "loss": 1.591, + "step": 2391 + }, + { + "epoch": 0.2523206751054852, + "grad_norm": 0.8709582686424255, + "learning_rate": 0.0012890007739284092, + "loss": 1.5871, + "step": 2392 + }, + { + "epoch": 0.2524261603375527, + "grad_norm": 0.626197338104248, + "learning_rate": 0.001288826168505592, + "loss": 1.5822, + "step": 2393 + }, + { + "epoch": 0.25253164556962027, + "grad_norm": 0.7867996096611023, + "learning_rate": 0.0012886515027046156, + "loss": 1.5996, + "step": 2394 + }, + { + "epoch": 0.25263713080168776, + "grad_norm": 0.7302479147911072, + "learning_rate": 0.0012884767765450524, + "loss": 1.5602, + "step": 2395 + }, + { + "epoch": 0.25274261603375525, + "grad_norm": 0.656366765499115, + "learning_rate": 0.0012883019900464814, + "loss": 1.6274, + "step": 2396 + }, + { + "epoch": 0.2528481012658228, + "grad_norm": 0.685299813747406, + "learning_rate": 0.001288127143228488, + "loss": 1.6627, + "step": 2397 + }, + { + "epoch": 0.2529535864978903, + "grad_norm": 0.6727539300918579, + "learning_rate": 0.0012879522361106646, + "loss": 1.6407, + "step": 2398 + }, + { + "epoch": 0.2530590717299578, + "grad_norm": 0.627257227897644, + "learning_rate": 0.0012877772687126111, + "loss": 1.6021, + "step": 2399 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6158517599105835, + "learning_rate": 0.001287602241053933, + "loss": 1.6189, + "step": 2400 + }, + { + "epoch": 0.2532700421940928, + "grad_norm": 0.6271724104881287, + "learning_rate": 0.001287427153154243, + "loss": 1.6006, + "step": 2401 + }, + { + "epoch": 0.2533755274261603, + "grad_norm": 0.6518517136573792, + "learning_rate": 0.0012872520050331608, + "loss": 1.6111, + "step": 2402 + }, + { + "epoch": 0.25348101265822787, + "grad_norm": 0.654318630695343, + "learning_rate": 0.0012870767967103122, + "loss": 1.6307, + "step": 2403 + }, + { + "epoch": 0.25358649789029536, + "grad_norm": 0.6474930644035339, + "learning_rate": 0.0012869015282053304, + "loss": 1.5772, + "step": 2404 + }, + { + "epoch": 0.25369198312236285, + "grad_norm": 0.6311898827552795, + "learning_rate": 0.0012867261995378554, + "loss": 1.6399, + "step": 2405 + }, + { + "epoch": 0.2537974683544304, + "grad_norm": 0.6252034306526184, + "learning_rate": 0.001286550810727533, + "loss": 1.6043, + "step": 2406 + }, + { + "epoch": 0.2539029535864979, + "grad_norm": 0.6448248028755188, + "learning_rate": 0.0012863753617940172, + "loss": 1.6309, + "step": 2407 + }, + { + "epoch": 0.2540084388185654, + "grad_norm": 0.6405127048492432, + "learning_rate": 0.001286199852756967, + "loss": 1.5973, + "step": 2408 + }, + { + "epoch": 0.25411392405063293, + "grad_norm": 0.7722159624099731, + "learning_rate": 0.0012860242836360502, + "loss": 1.5823, + "step": 2409 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.807421088218689, + "learning_rate": 0.0012858486544509392, + "loss": 1.6389, + "step": 2410 + }, + { + "epoch": 0.2543248945147679, + "grad_norm": 0.6602416634559631, + "learning_rate": 0.0012856729652213144, + "loss": 1.6471, + "step": 2411 + }, + { + "epoch": 0.25443037974683547, + "grad_norm": 0.8903405070304871, + "learning_rate": 0.001285497215966863, + "loss": 1.61, + "step": 2412 + }, + { + "epoch": 0.25453586497890296, + "grad_norm": 0.6999219059944153, + "learning_rate": 0.0012853214067072782, + "loss": 1.6202, + "step": 2413 + }, + { + "epoch": 0.25464135021097045, + "grad_norm": 0.6918017864227295, + "learning_rate": 0.0012851455374622604, + "loss": 1.5835, + "step": 2414 + }, + { + "epoch": 0.254746835443038, + "grad_norm": 0.6231915354728699, + "learning_rate": 0.0012849696082515166, + "loss": 1.5971, + "step": 2415 + }, + { + "epoch": 0.2548523206751055, + "grad_norm": 0.8238919377326965, + "learning_rate": 0.0012847936190947605, + "loss": 1.6125, + "step": 2416 + }, + { + "epoch": 0.254957805907173, + "grad_norm": 0.8190762400627136, + "learning_rate": 0.001284617570011713, + "loss": 1.5766, + "step": 2417 + }, + { + "epoch": 0.25506329113924053, + "grad_norm": 0.7174434661865234, + "learning_rate": 0.0012844414610221006, + "loss": 1.6139, + "step": 2418 + }, + { + "epoch": 0.255168776371308, + "grad_norm": 0.6499961614608765, + "learning_rate": 0.0012842652921456576, + "loss": 1.5825, + "step": 2419 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.6668521165847778, + "learning_rate": 0.0012840890634021249, + "loss": 1.6236, + "step": 2420 + }, + { + "epoch": 0.255379746835443, + "grad_norm": 0.6749675273895264, + "learning_rate": 0.001283912774811249, + "loss": 1.5719, + "step": 2421 + }, + { + "epoch": 0.25548523206751056, + "grad_norm": 0.6641529202461243, + "learning_rate": 0.0012837364263927843, + "loss": 1.6732, + "step": 2422 + }, + { + "epoch": 0.25559071729957805, + "grad_norm": 0.8084420561790466, + "learning_rate": 0.001283560018166492, + "loss": 1.6495, + "step": 2423 + }, + { + "epoch": 0.25569620253164554, + "grad_norm": 0.6386871337890625, + "learning_rate": 0.0012833835501521386, + "loss": 1.5832, + "step": 2424 + }, + { + "epoch": 0.2558016877637131, + "grad_norm": 0.8968576192855835, + "learning_rate": 0.0012832070223694992, + "loss": 1.5765, + "step": 2425 + }, + { + "epoch": 0.2559071729957806, + "grad_norm": 0.9290039539337158, + "learning_rate": 0.0012830304348383538, + "loss": 1.6024, + "step": 2426 + }, + { + "epoch": 0.2560126582278481, + "grad_norm": 0.7102710604667664, + "learning_rate": 0.0012828537875784905, + "loss": 1.6377, + "step": 2427 + }, + { + "epoch": 0.2561181434599156, + "grad_norm": 0.7491070628166199, + "learning_rate": 0.001282677080609703, + "loss": 1.5764, + "step": 2428 + }, + { + "epoch": 0.2562236286919831, + "grad_norm": 0.7167311906814575, + "learning_rate": 0.0012825003139517925, + "loss": 1.5781, + "step": 2429 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.7185764908790588, + "learning_rate": 0.0012823234876245667, + "loss": 1.5849, + "step": 2430 + }, + { + "epoch": 0.25643459915611816, + "grad_norm": 0.7563662528991699, + "learning_rate": 0.0012821466016478395, + "loss": 1.6246, + "step": 2431 + }, + { + "epoch": 0.25654008438818565, + "grad_norm": 0.6655240058898926, + "learning_rate": 0.0012819696560414323, + "loss": 1.6376, + "step": 2432 + }, + { + "epoch": 0.25664556962025314, + "grad_norm": 0.6637367606163025, + "learning_rate": 0.0012817926508251723, + "loss": 1.6533, + "step": 2433 + }, + { + "epoch": 0.2567510548523207, + "grad_norm": 0.7028847336769104, + "learning_rate": 0.0012816155860188938, + "loss": 1.5657, + "step": 2434 + }, + { + "epoch": 0.2568565400843882, + "grad_norm": 0.7322604656219482, + "learning_rate": 0.0012814384616424384, + "loss": 1.5844, + "step": 2435 + }, + { + "epoch": 0.2569620253164557, + "grad_norm": 0.6845324039459229, + "learning_rate": 0.0012812612777156533, + "loss": 1.5862, + "step": 2436 + }, + { + "epoch": 0.2570675105485232, + "grad_norm": 0.6347278356552124, + "learning_rate": 0.001281084034258393, + "loss": 1.6167, + "step": 2437 + }, + { + "epoch": 0.2571729957805907, + "grad_norm": 0.691115140914917, + "learning_rate": 0.0012809067312905182, + "loss": 1.6214, + "step": 2438 + }, + { + "epoch": 0.2572784810126582, + "grad_norm": 0.6737146973609924, + "learning_rate": 0.0012807293688318969, + "loss": 1.6075, + "step": 2439 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.8097888231277466, + "learning_rate": 0.0012805519469024035, + "loss": 1.6355, + "step": 2440 + }, + { + "epoch": 0.25748945147679325, + "grad_norm": 0.6681293845176697, + "learning_rate": 0.0012803744655219187, + "loss": 1.6162, + "step": 2441 + }, + { + "epoch": 0.25759493670886074, + "grad_norm": 0.7263829112052917, + "learning_rate": 0.0012801969247103306, + "loss": 1.5944, + "step": 2442 + }, + { + "epoch": 0.2577004219409283, + "grad_norm": 0.7230623960494995, + "learning_rate": 0.001280019324487533, + "loss": 1.6089, + "step": 2443 + }, + { + "epoch": 0.2578059071729958, + "grad_norm": 0.6658034920692444, + "learning_rate": 0.0012798416648734272, + "loss": 1.6027, + "step": 2444 + }, + { + "epoch": 0.2579113924050633, + "grad_norm": 0.6969464421272278, + "learning_rate": 0.001279663945887921, + "loss": 1.6406, + "step": 2445 + }, + { + "epoch": 0.2580168776371308, + "grad_norm": 0.645287036895752, + "learning_rate": 0.0012794861675509285, + "loss": 1.567, + "step": 2446 + }, + { + "epoch": 0.2581223628691983, + "grad_norm": 0.6887852549552917, + "learning_rate": 0.0012793083298823708, + "loss": 1.5907, + "step": 2447 + }, + { + "epoch": 0.2582278481012658, + "grad_norm": 0.7565521001815796, + "learning_rate": 0.0012791304329021751, + "loss": 1.5916, + "step": 2448 + }, + { + "epoch": 0.25833333333333336, + "grad_norm": 0.6562458276748657, + "learning_rate": 0.001278952476630276, + "loss": 1.5817, + "step": 2449 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6546562314033508, + "learning_rate": 0.0012787744610866143, + "loss": 1.5854, + "step": 2450 + }, + { + "epoch": 0.25854430379746834, + "grad_norm": 0.6904072761535645, + "learning_rate": 0.0012785963862911376, + "loss": 1.5978, + "step": 2451 + }, + { + "epoch": 0.2586497890295359, + "grad_norm": 0.6693453192710876, + "learning_rate": 0.0012784182522637998, + "loss": 1.6353, + "step": 2452 + }, + { + "epoch": 0.2587552742616034, + "grad_norm": 0.8107669949531555, + "learning_rate": 0.001278240059024562, + "loss": 1.5942, + "step": 2453 + }, + { + "epoch": 0.2588607594936709, + "grad_norm": 0.8398098945617676, + "learning_rate": 0.0012780618065933915, + "loss": 1.6458, + "step": 2454 + }, + { + "epoch": 0.25896624472573837, + "grad_norm": 0.6518373489379883, + "learning_rate": 0.0012778834949902626, + "loss": 1.5754, + "step": 2455 + }, + { + "epoch": 0.2590717299578059, + "grad_norm": 0.9784347414970398, + "learning_rate": 0.0012777051242351557, + "loss": 1.6107, + "step": 2456 + }, + { + "epoch": 0.2591772151898734, + "grad_norm": 0.9915900230407715, + "learning_rate": 0.0012775266943480582, + "loss": 1.6068, + "step": 2457 + }, + { + "epoch": 0.2592827004219409, + "grad_norm": 0.6662665009498596, + "learning_rate": 0.0012773482053489642, + "loss": 1.6243, + "step": 2458 + }, + { + "epoch": 0.25938818565400845, + "grad_norm": 1.1311523914337158, + "learning_rate": 0.0012771696572578743, + "loss": 1.6352, + "step": 2459 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.706511914730072, + "learning_rate": 0.0012769910500947954, + "loss": 1.6002, + "step": 2460 + }, + { + "epoch": 0.25959915611814344, + "grad_norm": 0.7669246196746826, + "learning_rate": 0.0012768123838797414, + "loss": 1.5607, + "step": 2461 + }, + { + "epoch": 0.259704641350211, + "grad_norm": 0.9726108312606812, + "learning_rate": 0.0012766336586327333, + "loss": 1.6175, + "step": 2462 + }, + { + "epoch": 0.2598101265822785, + "grad_norm": 0.6308902502059937, + "learning_rate": 0.0012764548743737973, + "loss": 1.6027, + "step": 2463 + }, + { + "epoch": 0.25991561181434597, + "grad_norm": 0.9313945174217224, + "learning_rate": 0.001276276031122968, + "loss": 1.5754, + "step": 2464 + }, + { + "epoch": 0.2600210970464135, + "grad_norm": 0.9530911445617676, + "learning_rate": 0.0012760971289002847, + "loss": 1.6427, + "step": 2465 + }, + { + "epoch": 0.260126582278481, + "grad_norm": 0.6343769431114197, + "learning_rate": 0.0012759181677257946, + "loss": 1.5837, + "step": 2466 + }, + { + "epoch": 0.2602320675105485, + "grad_norm": 0.9837098717689514, + "learning_rate": 0.0012757391476195517, + "loss": 1.5995, + "step": 2467 + }, + { + "epoch": 0.26033755274261605, + "grad_norm": 0.7247481942176819, + "learning_rate": 0.0012755600686016155, + "loss": 1.6096, + "step": 2468 + }, + { + "epoch": 0.26044303797468354, + "grad_norm": 0.8859376311302185, + "learning_rate": 0.0012753809306920532, + "loss": 1.619, + "step": 2469 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 1.1505688428878784, + "learning_rate": 0.0012752017339109376, + "loss": 1.6299, + "step": 2470 + }, + { + "epoch": 0.2606540084388186, + "grad_norm": 0.6449504494667053, + "learning_rate": 0.0012750224782783492, + "loss": 1.6434, + "step": 2471 + }, + { + "epoch": 0.2607594936708861, + "grad_norm": 1.1098556518554688, + "learning_rate": 0.0012748431638143739, + "loss": 1.596, + "step": 2472 + }, + { + "epoch": 0.26086497890295357, + "grad_norm": 0.6120943427085876, + "learning_rate": 0.0012746637905391048, + "loss": 1.6064, + "step": 2473 + }, + { + "epoch": 0.2609704641350211, + "grad_norm": 1.035417079925537, + "learning_rate": 0.001274484358472642, + "loss": 1.6072, + "step": 2474 + }, + { + "epoch": 0.2610759493670886, + "grad_norm": 0.7808588743209839, + "learning_rate": 0.0012743048676350911, + "loss": 1.6138, + "step": 2475 + }, + { + "epoch": 0.2611814345991561, + "grad_norm": 1.0803155899047852, + "learning_rate": 0.001274125318046566, + "loss": 1.5869, + "step": 2476 + }, + { + "epoch": 0.26128691983122365, + "grad_norm": 1.2389209270477295, + "learning_rate": 0.0012739457097271849, + "loss": 1.6186, + "step": 2477 + }, + { + "epoch": 0.26139240506329114, + "grad_norm": 0.7155228853225708, + "learning_rate": 0.0012737660426970748, + "loss": 1.6, + "step": 2478 + }, + { + "epoch": 0.26149789029535864, + "grad_norm": 0.9026675820350647, + "learning_rate": 0.0012735863169763678, + "loss": 1.6154, + "step": 2479 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.7264849543571472, + "learning_rate": 0.0012734065325852029, + "loss": 1.5997, + "step": 2480 + }, + { + "epoch": 0.2617088607594937, + "grad_norm": 1.0435082912445068, + "learning_rate": 0.0012732266895437265, + "loss": 1.5962, + "step": 2481 + }, + { + "epoch": 0.26181434599156117, + "grad_norm": 0.6769748330116272, + "learning_rate": 0.00127304678787209, + "loss": 1.5727, + "step": 2482 + }, + { + "epoch": 0.2619198312236287, + "grad_norm": 1.0666223764419556, + "learning_rate": 0.001272866827590453, + "loss": 1.6045, + "step": 2483 + }, + { + "epoch": 0.2620253164556962, + "grad_norm": 0.7084951400756836, + "learning_rate": 0.001272686808718981, + "loss": 1.5631, + "step": 2484 + }, + { + "epoch": 0.2621308016877637, + "grad_norm": 0.8766289949417114, + "learning_rate": 0.0012725067312778454, + "loss": 1.5994, + "step": 2485 + }, + { + "epoch": 0.2622362869198312, + "grad_norm": 0.8218935132026672, + "learning_rate": 0.0012723265952872252, + "loss": 1.5865, + "step": 2486 + }, + { + "epoch": 0.26234177215189874, + "grad_norm": 0.6540502309799194, + "learning_rate": 0.0012721464007673055, + "loss": 1.5915, + "step": 2487 + }, + { + "epoch": 0.26244725738396624, + "grad_norm": 0.7096043825149536, + "learning_rate": 0.0012719661477382778, + "loss": 1.6266, + "step": 2488 + }, + { + "epoch": 0.26255274261603373, + "grad_norm": 0.6923620104789734, + "learning_rate": 0.0012717858362203407, + "loss": 1.5647, + "step": 2489 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.649835467338562, + "learning_rate": 0.0012716054662336987, + "loss": 1.6183, + "step": 2490 + }, + { + "epoch": 0.26276371308016877, + "grad_norm": 0.6563825607299805, + "learning_rate": 0.001271425037798563, + "loss": 1.584, + "step": 2491 + }, + { + "epoch": 0.26286919831223626, + "grad_norm": 0.6818656325340271, + "learning_rate": 0.0012712445509351518, + "loss": 1.5946, + "step": 2492 + }, + { + "epoch": 0.2629746835443038, + "grad_norm": 0.6652683019638062, + "learning_rate": 0.00127106400566369, + "loss": 1.6393, + "step": 2493 + }, + { + "epoch": 0.2630801687763713, + "grad_norm": 0.7117735743522644, + "learning_rate": 0.0012708834020044076, + "loss": 1.5755, + "step": 2494 + }, + { + "epoch": 0.2631856540084388, + "grad_norm": 0.636810302734375, + "learning_rate": 0.0012707027399775429, + "loss": 1.5912, + "step": 2495 + }, + { + "epoch": 0.26329113924050634, + "grad_norm": 0.6879735589027405, + "learning_rate": 0.0012705220196033396, + "loss": 1.5849, + "step": 2496 + }, + { + "epoch": 0.26339662447257384, + "grad_norm": 0.700139045715332, + "learning_rate": 0.0012703412409020484, + "loss": 1.6344, + "step": 2497 + }, + { + "epoch": 0.26350210970464133, + "grad_norm": 0.6805004477500916, + "learning_rate": 0.0012701604038939268, + "loss": 1.5879, + "step": 2498 + }, + { + "epoch": 0.2636075949367089, + "grad_norm": 0.6612558960914612, + "learning_rate": 0.0012699795085992379, + "loss": 1.6007, + "step": 2499 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.6808351278305054, + "learning_rate": 0.001269798555038252, + "loss": 1.5747, + "step": 2500 + }, + { + "epoch": 0.26381856540084386, + "grad_norm": 0.7251008749008179, + "learning_rate": 0.0012696175432312465, + "loss": 1.5808, + "step": 2501 + }, + { + "epoch": 0.2639240506329114, + "grad_norm": 0.62335205078125, + "learning_rate": 0.0012694364731985041, + "loss": 1.6249, + "step": 2502 + }, + { + "epoch": 0.2640295358649789, + "grad_norm": 0.6794546842575073, + "learning_rate": 0.0012692553449603148, + "loss": 1.6007, + "step": 2503 + }, + { + "epoch": 0.2641350210970464, + "grad_norm": 0.6382340788841248, + "learning_rate": 0.0012690741585369748, + "loss": 1.5727, + "step": 2504 + }, + { + "epoch": 0.26424050632911394, + "grad_norm": 0.6546385884284973, + "learning_rate": 0.0012688929139487869, + "loss": 1.6285, + "step": 2505 + }, + { + "epoch": 0.26434599156118144, + "grad_norm": 0.6967650651931763, + "learning_rate": 0.0012687116112160607, + "loss": 1.5767, + "step": 2506 + }, + { + "epoch": 0.26445147679324893, + "grad_norm": 0.6830193400382996, + "learning_rate": 0.0012685302503591118, + "loss": 1.608, + "step": 2507 + }, + { + "epoch": 0.2645569620253165, + "grad_norm": 0.631605863571167, + "learning_rate": 0.0012683488313982628, + "loss": 1.5873, + "step": 2508 + }, + { + "epoch": 0.26466244725738397, + "grad_norm": 0.7233527898788452, + "learning_rate": 0.0012681673543538427, + "loss": 1.5725, + "step": 2509 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.6249690055847168, + "learning_rate": 0.0012679858192461864, + "loss": 1.6177, + "step": 2510 + }, + { + "epoch": 0.264873417721519, + "grad_norm": 0.7583497762680054, + "learning_rate": 0.0012678042260956363, + "loss": 1.6363, + "step": 2511 + }, + { + "epoch": 0.2649789029535865, + "grad_norm": 0.6618452668190002, + "learning_rate": 0.0012676225749225407, + "loss": 1.6034, + "step": 2512 + }, + { + "epoch": 0.265084388185654, + "grad_norm": 0.6372886896133423, + "learning_rate": 0.0012674408657472542, + "loss": 1.5662, + "step": 2513 + }, + { + "epoch": 0.26518987341772154, + "grad_norm": 0.6847763061523438, + "learning_rate": 0.0012672590985901386, + "loss": 1.5845, + "step": 2514 + }, + { + "epoch": 0.26529535864978904, + "grad_norm": 0.6712055206298828, + "learning_rate": 0.001267077273471562, + "loss": 1.5891, + "step": 2515 + }, + { + "epoch": 0.26540084388185653, + "grad_norm": 0.6789618730545044, + "learning_rate": 0.0012668953904118984, + "loss": 1.5981, + "step": 2516 + }, + { + "epoch": 0.2655063291139241, + "grad_norm": 0.6712265014648438, + "learning_rate": 0.001266713449431529, + "loss": 1.5918, + "step": 2517 + }, + { + "epoch": 0.26561181434599157, + "grad_norm": 0.7069161534309387, + "learning_rate": 0.0012665314505508406, + "loss": 1.6232, + "step": 2518 + }, + { + "epoch": 0.26571729957805906, + "grad_norm": 0.6958885192871094, + "learning_rate": 0.0012663493937902278, + "loss": 1.6107, + "step": 2519 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.7471234202384949, + "learning_rate": 0.0012661672791700906, + "loss": 1.6056, + "step": 2520 + }, + { + "epoch": 0.2659282700421941, + "grad_norm": 0.6525676250457764, + "learning_rate": 0.001265985106710836, + "loss": 1.5958, + "step": 2521 + }, + { + "epoch": 0.2660337552742616, + "grad_norm": 0.7141806483268738, + "learning_rate": 0.0012658028764328771, + "loss": 1.5875, + "step": 2522 + }, + { + "epoch": 0.2661392405063291, + "grad_norm": 0.6706332564353943, + "learning_rate": 0.0012656205883566339, + "loss": 1.6117, + "step": 2523 + }, + { + "epoch": 0.26624472573839664, + "grad_norm": 0.6981054544448853, + "learning_rate": 0.0012654382425025328, + "loss": 1.555, + "step": 2524 + }, + { + "epoch": 0.26635021097046413, + "grad_norm": 0.6392682194709778, + "learning_rate": 0.0012652558388910062, + "loss": 1.5605, + "step": 2525 + }, + { + "epoch": 0.2664556962025316, + "grad_norm": 0.6985072493553162, + "learning_rate": 0.0012650733775424938, + "loss": 1.5868, + "step": 2526 + }, + { + "epoch": 0.26656118143459917, + "grad_norm": 0.8029537200927734, + "learning_rate": 0.001264890858477441, + "loss": 1.5892, + "step": 2527 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.7548570036888123, + "learning_rate": 0.0012647082817162998, + "loss": 1.6008, + "step": 2528 + }, + { + "epoch": 0.26677215189873416, + "grad_norm": 0.7059966325759888, + "learning_rate": 0.0012645256472795295, + "loss": 1.6765, + "step": 2529 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.6413760185241699, + "learning_rate": 0.0012643429551875945, + "loss": 1.579, + "step": 2530 + }, + { + "epoch": 0.2669831223628692, + "grad_norm": 0.7054968476295471, + "learning_rate": 0.0012641602054609662, + "loss": 1.5948, + "step": 2531 + }, + { + "epoch": 0.2670886075949367, + "grad_norm": 0.7019748687744141, + "learning_rate": 0.0012639773981201238, + "loss": 1.5964, + "step": 2532 + }, + { + "epoch": 0.26719409282700424, + "grad_norm": 0.7178134322166443, + "learning_rate": 0.0012637945331855506, + "loss": 1.5331, + "step": 2533 + }, + { + "epoch": 0.26729957805907173, + "grad_norm": 0.6585642695426941, + "learning_rate": 0.0012636116106777382, + "loss": 1.5799, + "step": 2534 + }, + { + "epoch": 0.2674050632911392, + "grad_norm": 0.7989511489868164, + "learning_rate": 0.0012634286306171835, + "loss": 1.6379, + "step": 2535 + }, + { + "epoch": 0.26751054852320677, + "grad_norm": 0.8688583970069885, + "learning_rate": 0.0012632455930243907, + "loss": 1.5988, + "step": 2536 + }, + { + "epoch": 0.26761603375527426, + "grad_norm": 0.6523927450180054, + "learning_rate": 0.0012630624979198697, + "loss": 1.5933, + "step": 2537 + }, + { + "epoch": 0.26772151898734176, + "grad_norm": 1.0005346536636353, + "learning_rate": 0.0012628793453241377, + "loss": 1.6402, + "step": 2538 + }, + { + "epoch": 0.2678270042194093, + "grad_norm": 0.8628237247467041, + "learning_rate": 0.0012626961352577174, + "loss": 1.6087, + "step": 2539 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.8435291051864624, + "learning_rate": 0.0012625128677411388, + "loss": 1.5884, + "step": 2540 + }, + { + "epoch": 0.2680379746835443, + "grad_norm": 1.1887766122817993, + "learning_rate": 0.0012623295427949377, + "loss": 1.5871, + "step": 2541 + }, + { + "epoch": 0.26814345991561184, + "grad_norm": 0.6686839461326599, + "learning_rate": 0.0012621461604396566, + "loss": 1.6195, + "step": 2542 + }, + { + "epoch": 0.26824894514767933, + "grad_norm": 1.1130791902542114, + "learning_rate": 0.0012619627206958445, + "loss": 1.5918, + "step": 2543 + }, + { + "epoch": 0.2683544303797468, + "grad_norm": 0.6795485615730286, + "learning_rate": 0.0012617792235840564, + "loss": 1.5736, + "step": 2544 + }, + { + "epoch": 0.26845991561181437, + "grad_norm": 0.7439214587211609, + "learning_rate": 0.0012615956691248544, + "loss": 1.608, + "step": 2545 + }, + { + "epoch": 0.26856540084388186, + "grad_norm": 0.7565585970878601, + "learning_rate": 0.001261412057338807, + "loss": 1.5968, + "step": 2546 + }, + { + "epoch": 0.26867088607594936, + "grad_norm": 0.6557243466377258, + "learning_rate": 0.0012612283882464882, + "loss": 1.5701, + "step": 2547 + }, + { + "epoch": 0.2687763713080169, + "grad_norm": 0.8073958158493042, + "learning_rate": 0.0012610446618684793, + "loss": 1.5996, + "step": 2548 + }, + { + "epoch": 0.2688818565400844, + "grad_norm": 0.6448305249214172, + "learning_rate": 0.0012608608782253676, + "loss": 1.5976, + "step": 2549 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.9030637145042419, + "learning_rate": 0.0012606770373377475, + "loss": 1.5909, + "step": 2550 + }, + { + "epoch": 0.26909282700421944, + "grad_norm": 0.9487709999084473, + "learning_rate": 0.0012604931392262186, + "loss": 1.6124, + "step": 2551 + }, + { + "epoch": 0.26919831223628693, + "grad_norm": 0.6904355883598328, + "learning_rate": 0.001260309183911388, + "loss": 1.617, + "step": 2552 + }, + { + "epoch": 0.2693037974683544, + "grad_norm": 0.8695191144943237, + "learning_rate": 0.0012601251714138683, + "loss": 1.6298, + "step": 2553 + }, + { + "epoch": 0.2694092827004219, + "grad_norm": 0.7535239458084106, + "learning_rate": 0.0012599411017542798, + "loss": 1.5899, + "step": 2554 + }, + { + "epoch": 0.26951476793248946, + "grad_norm": 0.7076644897460938, + "learning_rate": 0.0012597569749532482, + "loss": 1.5967, + "step": 2555 + }, + { + "epoch": 0.26962025316455696, + "grad_norm": 0.7934760451316833, + "learning_rate": 0.0012595727910314056, + "loss": 1.6075, + "step": 2556 + }, + { + "epoch": 0.26972573839662445, + "grad_norm": 0.7578938007354736, + "learning_rate": 0.0012593885500093906, + "loss": 1.5866, + "step": 2557 + }, + { + "epoch": 0.269831223628692, + "grad_norm": 0.6619359850883484, + "learning_rate": 0.0012592042519078486, + "loss": 1.6057, + "step": 2558 + }, + { + "epoch": 0.2699367088607595, + "grad_norm": 0.6980561017990112, + "learning_rate": 0.0012590198967474312, + "loss": 1.5939, + "step": 2559 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.7317259311676025, + "learning_rate": 0.0012588354845487959, + "loss": 1.5608, + "step": 2560 + }, + { + "epoch": 0.27014767932489453, + "grad_norm": 0.6176422238349915, + "learning_rate": 0.0012586510153326075, + "loss": 1.5793, + "step": 2561 + }, + { + "epoch": 0.270253164556962, + "grad_norm": 0.6278406381607056, + "learning_rate": 0.0012584664891195365, + "loss": 1.6119, + "step": 2562 + }, + { + "epoch": 0.2703586497890295, + "grad_norm": 0.6345965266227722, + "learning_rate": 0.0012582819059302598, + "loss": 1.6362, + "step": 2563 + }, + { + "epoch": 0.27046413502109706, + "grad_norm": 0.6322948336601257, + "learning_rate": 0.001258097265785461, + "loss": 1.5847, + "step": 2564 + }, + { + "epoch": 0.27056962025316456, + "grad_norm": 0.6659449934959412, + "learning_rate": 0.0012579125687058302, + "loss": 1.5665, + "step": 2565 + }, + { + "epoch": 0.27067510548523205, + "grad_norm": 0.6449089646339417, + "learning_rate": 0.0012577278147120632, + "loss": 1.572, + "step": 2566 + }, + { + "epoch": 0.2707805907172996, + "grad_norm": 0.7212923169136047, + "learning_rate": 0.0012575430038248628, + "loss": 1.6218, + "step": 2567 + }, + { + "epoch": 0.2708860759493671, + "grad_norm": 0.7594980001449585, + "learning_rate": 0.001257358136064938, + "loss": 1.5909, + "step": 2568 + }, + { + "epoch": 0.2709915611814346, + "grad_norm": 0.6181400418281555, + "learning_rate": 0.001257173211453004, + "loss": 1.5512, + "step": 2569 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.6636635065078735, + "learning_rate": 0.001256988230009783, + "loss": 1.6206, + "step": 2570 + }, + { + "epoch": 0.2712025316455696, + "grad_norm": 0.6406926512718201, + "learning_rate": 0.0012568031917560027, + "loss": 1.556, + "step": 2571 + }, + { + "epoch": 0.2713080168776371, + "grad_norm": 0.6742372512817383, + "learning_rate": 0.0012566180967123976, + "loss": 1.5966, + "step": 2572 + }, + { + "epoch": 0.27141350210970466, + "grad_norm": 0.7183977365493774, + "learning_rate": 0.0012564329448997082, + "loss": 1.594, + "step": 2573 + }, + { + "epoch": 0.27151898734177216, + "grad_norm": 0.6740306615829468, + "learning_rate": 0.0012562477363386821, + "loss": 1.5879, + "step": 2574 + }, + { + "epoch": 0.27162447257383965, + "grad_norm": 0.6972319483757019, + "learning_rate": 0.0012560624710500731, + "loss": 1.5877, + "step": 2575 + }, + { + "epoch": 0.2717299578059072, + "grad_norm": 0.6379469633102417, + "learning_rate": 0.0012558771490546407, + "loss": 1.5614, + "step": 2576 + }, + { + "epoch": 0.2718354430379747, + "grad_norm": 0.652392566204071, + "learning_rate": 0.0012556917703731509, + "loss": 1.6028, + "step": 2577 + }, + { + "epoch": 0.2719409282700422, + "grad_norm": 0.6479291319847107, + "learning_rate": 0.0012555063350263768, + "loss": 1.6479, + "step": 2578 + }, + { + "epoch": 0.27204641350210973, + "grad_norm": 0.626467227935791, + "learning_rate": 0.0012553208430350973, + "loss": 1.6012, + "step": 2579 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.655225396156311, + "learning_rate": 0.0012551352944200976, + "loss": 1.5943, + "step": 2580 + }, + { + "epoch": 0.2722573839662447, + "grad_norm": 0.7408117055892944, + "learning_rate": 0.0012549496892021693, + "loss": 1.5716, + "step": 2581 + }, + { + "epoch": 0.27236286919831226, + "grad_norm": 0.6982287168502808, + "learning_rate": 0.0012547640274021103, + "loss": 1.6037, + "step": 2582 + }, + { + "epoch": 0.27246835443037976, + "grad_norm": 0.6608831286430359, + "learning_rate": 0.001254578309040725, + "loss": 1.6031, + "step": 2583 + }, + { + "epoch": 0.27257383966244725, + "grad_norm": 0.7716354727745056, + "learning_rate": 0.001254392534138824, + "loss": 1.5954, + "step": 2584 + }, + { + "epoch": 0.27267932489451474, + "grad_norm": 0.8653605580329895, + "learning_rate": 0.0012542067027172248, + "loss": 1.6189, + "step": 2585 + }, + { + "epoch": 0.2727848101265823, + "grad_norm": 0.9374101758003235, + "learning_rate": 0.0012540208147967503, + "loss": 1.5786, + "step": 2586 + }, + { + "epoch": 0.2728902953586498, + "grad_norm": 0.849249005317688, + "learning_rate": 0.00125383487039823, + "loss": 1.6199, + "step": 2587 + }, + { + "epoch": 0.2729957805907173, + "grad_norm": 0.6283584237098694, + "learning_rate": 0.0012536488695425003, + "loss": 1.5982, + "step": 2588 + }, + { + "epoch": 0.2731012658227848, + "grad_norm": 0.662526547908783, + "learning_rate": 0.0012534628122504031, + "loss": 1.5928, + "step": 2589 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6906788945198059, + "learning_rate": 0.0012532766985427874, + "loss": 1.5974, + "step": 2590 + }, + { + "epoch": 0.2733122362869198, + "grad_norm": 0.6614969968795776, + "learning_rate": 0.0012530905284405083, + "loss": 1.5965, + "step": 2591 + }, + { + "epoch": 0.27341772151898736, + "grad_norm": 0.6849450469017029, + "learning_rate": 0.0012529043019644266, + "loss": 1.6159, + "step": 2592 + }, + { + "epoch": 0.27352320675105485, + "grad_norm": 0.6600929498672485, + "learning_rate": 0.0012527180191354104, + "loss": 1.5975, + "step": 2593 + }, + { + "epoch": 0.27362869198312234, + "grad_norm": 0.7061555981636047, + "learning_rate": 0.0012525316799743332, + "loss": 1.5971, + "step": 2594 + }, + { + "epoch": 0.2737341772151899, + "grad_norm": 0.7292434573173523, + "learning_rate": 0.0012523452845020755, + "loss": 1.633, + "step": 2595 + }, + { + "epoch": 0.2738396624472574, + "grad_norm": 0.7927997708320618, + "learning_rate": 0.0012521588327395236, + "loss": 1.6061, + "step": 2596 + }, + { + "epoch": 0.2739451476793249, + "grad_norm": 0.6817910671234131, + "learning_rate": 0.0012519723247075706, + "loss": 1.5801, + "step": 2597 + }, + { + "epoch": 0.2740506329113924, + "grad_norm": 0.6793884038925171, + "learning_rate": 0.0012517857604271156, + "loss": 1.5621, + "step": 2598 + }, + { + "epoch": 0.2741561181434599, + "grad_norm": 0.7826316356658936, + "learning_rate": 0.001251599139919064, + "loss": 1.5775, + "step": 2599 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.6521453857421875, + "learning_rate": 0.0012514124632043272, + "loss": 1.6138, + "step": 2600 + }, + { + "epoch": 0.27436708860759496, + "grad_norm": 0.8734750151634216, + "learning_rate": 0.001251225730303824, + "loss": 1.5743, + "step": 2601 + }, + { + "epoch": 0.27447257383966245, + "grad_norm": 1.077608346939087, + "learning_rate": 0.0012510389412384785, + "loss": 1.5872, + "step": 2602 + }, + { + "epoch": 0.27457805907172994, + "grad_norm": 0.6625407338142395, + "learning_rate": 0.001250852096029221, + "loss": 1.5264, + "step": 2603 + }, + { + "epoch": 0.2746835443037975, + "grad_norm": 0.7120847702026367, + "learning_rate": 0.0012506651946969888, + "loss": 1.5999, + "step": 2604 + }, + { + "epoch": 0.274789029535865, + "grad_norm": 0.7233405113220215, + "learning_rate": 0.0012504782372627248, + "loss": 1.6279, + "step": 2605 + }, + { + "epoch": 0.2748945147679325, + "grad_norm": 0.6337597966194153, + "learning_rate": 0.0012502912237473789, + "loss": 1.6081, + "step": 2606 + }, + { + "epoch": 0.275, + "grad_norm": 0.722263514995575, + "learning_rate": 0.0012501041541719067, + "loss": 1.6177, + "step": 2607 + }, + { + "epoch": 0.2751054852320675, + "grad_norm": 0.7228683829307556, + "learning_rate": 0.0012499170285572702, + "loss": 1.5907, + "step": 2608 + }, + { + "epoch": 0.275210970464135, + "grad_norm": 0.670417845249176, + "learning_rate": 0.0012497298469244377, + "loss": 1.6104, + "step": 2609 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.7114824056625366, + "learning_rate": 0.0012495426092943842, + "loss": 1.601, + "step": 2610 + }, + { + "epoch": 0.27542194092827005, + "grad_norm": 0.7813767790794373, + "learning_rate": 0.0012493553156880904, + "loss": 1.6132, + "step": 2611 + }, + { + "epoch": 0.27552742616033754, + "grad_norm": 0.7304219603538513, + "learning_rate": 0.0012491679661265434, + "loss": 1.6041, + "step": 2612 + }, + { + "epoch": 0.2756329113924051, + "grad_norm": 0.6724848747253418, + "learning_rate": 0.0012489805606307367, + "loss": 1.6243, + "step": 2613 + }, + { + "epoch": 0.2757383966244726, + "grad_norm": 0.8603217601776123, + "learning_rate": 0.00124879309922167, + "loss": 1.5933, + "step": 2614 + }, + { + "epoch": 0.2758438818565401, + "grad_norm": 0.6664122939109802, + "learning_rate": 0.0012486055819203494, + "loss": 1.6043, + "step": 2615 + }, + { + "epoch": 0.2759493670886076, + "grad_norm": 0.7228140234947205, + "learning_rate": 0.001248418008747787, + "loss": 1.5739, + "step": 2616 + }, + { + "epoch": 0.2760548523206751, + "grad_norm": 0.6407665014266968, + "learning_rate": 0.0012482303797250014, + "loss": 1.573, + "step": 2617 + }, + { + "epoch": 0.2761603375527426, + "grad_norm": 0.8932082056999207, + "learning_rate": 0.0012480426948730174, + "loss": 1.5707, + "step": 2618 + }, + { + "epoch": 0.2762658227848101, + "grad_norm": 1.0087761878967285, + "learning_rate": 0.001247854954212866, + "loss": 1.6014, + "step": 2619 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.6362578868865967, + "learning_rate": 0.0012476671577655845, + "loss": 1.5756, + "step": 2620 + }, + { + "epoch": 0.27647679324894514, + "grad_norm": 1.0407477617263794, + "learning_rate": 0.001247479305552216, + "loss": 1.5661, + "step": 2621 + }, + { + "epoch": 0.27658227848101263, + "grad_norm": 1.0492899417877197, + "learning_rate": 0.001247291397593811, + "loss": 1.5602, + "step": 2622 + }, + { + "epoch": 0.2766877637130802, + "grad_norm": 0.6626974940299988, + "learning_rate": 0.001247103433911425, + "loss": 1.5724, + "step": 2623 + }, + { + "epoch": 0.2767932489451477, + "grad_norm": 0.9325923323631287, + "learning_rate": 0.0012469154145261208, + "loss": 1.6164, + "step": 2624 + }, + { + "epoch": 0.27689873417721517, + "grad_norm": 0.7811810374259949, + "learning_rate": 0.0012467273394589664, + "loss": 1.5719, + "step": 2625 + }, + { + "epoch": 0.2770042194092827, + "grad_norm": 0.6572378277778625, + "learning_rate": 0.0012465392087310366, + "loss": 1.5652, + "step": 2626 + }, + { + "epoch": 0.2771097046413502, + "grad_norm": 0.6781197190284729, + "learning_rate": 0.0012463510223634125, + "loss": 1.5651, + "step": 2627 + }, + { + "epoch": 0.2772151898734177, + "grad_norm": 0.771209180355072, + "learning_rate": 0.0012461627803771812, + "loss": 1.6089, + "step": 2628 + }, + { + "epoch": 0.27732067510548525, + "grad_norm": 0.7861365079879761, + "learning_rate": 0.0012459744827934367, + "loss": 1.5751, + "step": 2629 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.7903379797935486, + "learning_rate": 0.0012457861296332774, + "loss": 1.6303, + "step": 2630 + }, + { + "epoch": 0.27753164556962023, + "grad_norm": 0.7201457619667053, + "learning_rate": 0.0012455977209178109, + "loss": 1.5978, + "step": 2631 + }, + { + "epoch": 0.2776371308016878, + "grad_norm": 0.8262034058570862, + "learning_rate": 0.0012454092566681482, + "loss": 1.6038, + "step": 2632 + }, + { + "epoch": 0.2777426160337553, + "grad_norm": 0.7330015897750854, + "learning_rate": 0.001245220736905408, + "loss": 1.5613, + "step": 2633 + }, + { + "epoch": 0.27784810126582277, + "grad_norm": 0.982558012008667, + "learning_rate": 0.0012450321616507148, + "loss": 1.6054, + "step": 2634 + }, + { + "epoch": 0.2779535864978903, + "grad_norm": 0.8295224905014038, + "learning_rate": 0.0012448435309251995, + "loss": 1.6362, + "step": 2635 + }, + { + "epoch": 0.2780590717299578, + "grad_norm": 0.8439561724662781, + "learning_rate": 0.001244654844749999, + "loss": 1.5806, + "step": 2636 + }, + { + "epoch": 0.2781645569620253, + "grad_norm": 0.8577383160591125, + "learning_rate": 0.0012444661031462566, + "loss": 1.6088, + "step": 2637 + }, + { + "epoch": 0.27827004219409285, + "grad_norm": 0.7167330384254456, + "learning_rate": 0.0012442773061351216, + "loss": 1.5709, + "step": 2638 + }, + { + "epoch": 0.27837552742616034, + "grad_norm": 0.952094316482544, + "learning_rate": 0.0012440884537377498, + "loss": 1.5829, + "step": 2639 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.7170625329017639, + "learning_rate": 0.001243899545975303, + "loss": 1.5751, + "step": 2640 + }, + { + "epoch": 0.2785864978902954, + "grad_norm": 0.8390031456947327, + "learning_rate": 0.0012437105828689494, + "loss": 1.6048, + "step": 2641 + }, + { + "epoch": 0.2786919831223629, + "grad_norm": 0.8755555748939514, + "learning_rate": 0.0012435215644398632, + "loss": 1.5645, + "step": 2642 + }, + { + "epoch": 0.27879746835443037, + "grad_norm": 0.7442552447319031, + "learning_rate": 0.0012433324907092243, + "loss": 1.5716, + "step": 2643 + }, + { + "epoch": 0.2789029535864979, + "grad_norm": 0.940468966960907, + "learning_rate": 0.0012431433616982204, + "loss": 1.5524, + "step": 2644 + }, + { + "epoch": 0.2790084388185654, + "grad_norm": 0.7563626170158386, + "learning_rate": 0.0012429541774280435, + "loss": 1.624, + "step": 2645 + }, + { + "epoch": 0.2791139240506329, + "grad_norm": 0.7277624011039734, + "learning_rate": 0.0012427649379198932, + "loss": 1.5919, + "step": 2646 + }, + { + "epoch": 0.27921940928270045, + "grad_norm": 0.8325411677360535, + "learning_rate": 0.0012425756431949742, + "loss": 1.5841, + "step": 2647 + }, + { + "epoch": 0.27932489451476794, + "grad_norm": 0.7722815275192261, + "learning_rate": 0.001242386293274498, + "loss": 1.5988, + "step": 2648 + }, + { + "epoch": 0.27943037974683543, + "grad_norm": 0.6647870540618896, + "learning_rate": 0.0012421968881796827, + "loss": 1.5814, + "step": 2649 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.8028674125671387, + "learning_rate": 0.0012420074279317515, + "loss": 1.5856, + "step": 2650 + }, + { + "epoch": 0.2796413502109705, + "grad_norm": 0.6888619065284729, + "learning_rate": 0.001241817912551935, + "loss": 1.5616, + "step": 2651 + }, + { + "epoch": 0.27974683544303797, + "grad_norm": 0.7538149952888489, + "learning_rate": 0.0012416283420614686, + "loss": 1.5904, + "step": 2652 + }, + { + "epoch": 0.27985232067510546, + "grad_norm": 0.653291642665863, + "learning_rate": 0.0012414387164815953, + "loss": 1.6027, + "step": 2653 + }, + { + "epoch": 0.279957805907173, + "grad_norm": 0.6451777815818787, + "learning_rate": 0.001241249035833563, + "loss": 1.6108, + "step": 2654 + }, + { + "epoch": 0.2800632911392405, + "grad_norm": 0.6614588499069214, + "learning_rate": 0.0012410593001386267, + "loss": 1.5745, + "step": 2655 + }, + { + "epoch": 0.280168776371308, + "grad_norm": 0.6664301753044128, + "learning_rate": 0.0012408695094180474, + "loss": 1.5785, + "step": 2656 + }, + { + "epoch": 0.28027426160337554, + "grad_norm": 0.7233731746673584, + "learning_rate": 0.0012406796636930918, + "loss": 1.5564, + "step": 2657 + }, + { + "epoch": 0.28037974683544303, + "grad_norm": 0.6469811201095581, + "learning_rate": 0.001240489762985033, + "loss": 1.5303, + "step": 2658 + }, + { + "epoch": 0.2804852320675105, + "grad_norm": 0.6477214694023132, + "learning_rate": 0.0012402998073151505, + "loss": 1.5941, + "step": 2659 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.7461822628974915, + "learning_rate": 0.0012401097967047298, + "loss": 1.5856, + "step": 2660 + }, + { + "epoch": 0.28069620253164557, + "grad_norm": 0.6843252182006836, + "learning_rate": 0.0012399197311750623, + "loss": 1.6091, + "step": 2661 + }, + { + "epoch": 0.28080168776371306, + "grad_norm": 0.7647790908813477, + "learning_rate": 0.001239729610747446, + "loss": 1.5859, + "step": 2662 + }, + { + "epoch": 0.2809071729957806, + "grad_norm": 0.8406313061714172, + "learning_rate": 0.001239539435443185, + "loss": 1.5722, + "step": 2663 + }, + { + "epoch": 0.2810126582278481, + "grad_norm": 0.659518837928772, + "learning_rate": 0.001239349205283589, + "loss": 1.5796, + "step": 2664 + }, + { + "epoch": 0.2811181434599156, + "grad_norm": 0.7679307460784912, + "learning_rate": 0.0012391589202899746, + "loss": 1.5816, + "step": 2665 + }, + { + "epoch": 0.28122362869198314, + "grad_norm": 0.666311502456665, + "learning_rate": 0.001238968580483664, + "loss": 1.5837, + "step": 2666 + }, + { + "epoch": 0.28132911392405063, + "grad_norm": 0.7306806445121765, + "learning_rate": 0.0012387781858859857, + "loss": 1.5746, + "step": 2667 + }, + { + "epoch": 0.2814345991561181, + "grad_norm": 0.6302721500396729, + "learning_rate": 0.0012385877365182743, + "loss": 1.5688, + "step": 2668 + }, + { + "epoch": 0.2815400843881857, + "grad_norm": 0.7993252873420715, + "learning_rate": 0.0012383972324018708, + "loss": 1.5882, + "step": 2669 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.6337637901306152, + "learning_rate": 0.001238206673558122, + "loss": 1.5766, + "step": 2670 + }, + { + "epoch": 0.28175105485232066, + "grad_norm": 0.7897636294364929, + "learning_rate": 0.001238016060008381, + "loss": 1.61, + "step": 2671 + }, + { + "epoch": 0.2818565400843882, + "grad_norm": 0.8890852928161621, + "learning_rate": 0.0012378253917740072, + "loss": 1.5883, + "step": 2672 + }, + { + "epoch": 0.2819620253164557, + "grad_norm": 0.7111097574234009, + "learning_rate": 0.0012376346688763656, + "loss": 1.6259, + "step": 2673 + }, + { + "epoch": 0.2820675105485232, + "grad_norm": 0.6971083283424377, + "learning_rate": 0.0012374438913368277, + "loss": 1.5465, + "step": 2674 + }, + { + "epoch": 0.28217299578059074, + "grad_norm": 1.0294734239578247, + "learning_rate": 0.0012372530591767711, + "loss": 1.5775, + "step": 2675 + }, + { + "epoch": 0.28227848101265823, + "grad_norm": 0.764484167098999, + "learning_rate": 0.0012370621724175797, + "loss": 1.593, + "step": 2676 + }, + { + "epoch": 0.2823839662447257, + "grad_norm": 0.6508345603942871, + "learning_rate": 0.0012368712310806432, + "loss": 1.5526, + "step": 2677 + }, + { + "epoch": 0.2824894514767933, + "grad_norm": 0.6481335163116455, + "learning_rate": 0.0012366802351873574, + "loss": 1.5561, + "step": 2678 + }, + { + "epoch": 0.28259493670886077, + "grad_norm": 0.6504300236701965, + "learning_rate": 0.0012364891847591246, + "loss": 1.5816, + "step": 2679 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.6606478095054626, + "learning_rate": 0.0012362980798173526, + "loss": 1.5627, + "step": 2680 + }, + { + "epoch": 0.2828059071729958, + "grad_norm": 0.6483486294746399, + "learning_rate": 0.0012361069203834561, + "loss": 1.5514, + "step": 2681 + }, + { + "epoch": 0.2829113924050633, + "grad_norm": 0.6868378520011902, + "learning_rate": 0.0012359157064788548, + "loss": 1.5416, + "step": 2682 + }, + { + "epoch": 0.2830168776371308, + "grad_norm": 0.9117574691772461, + "learning_rate": 0.0012357244381249759, + "loss": 1.6, + "step": 2683 + }, + { + "epoch": 0.2831223628691983, + "grad_norm": 0.649205207824707, + "learning_rate": 0.0012355331153432517, + "loss": 1.5939, + "step": 2684 + }, + { + "epoch": 0.28322784810126583, + "grad_norm": 0.7302395105361938, + "learning_rate": 0.0012353417381551206, + "loss": 1.5585, + "step": 2685 + }, + { + "epoch": 0.2833333333333333, + "grad_norm": 0.6800633668899536, + "learning_rate": 0.001235150306582028, + "loss": 1.5677, + "step": 2686 + }, + { + "epoch": 0.2834388185654008, + "grad_norm": 0.7222373485565186, + "learning_rate": 0.001234958820645424, + "loss": 1.5868, + "step": 2687 + }, + { + "epoch": 0.28354430379746837, + "grad_norm": 0.6976562738418579, + "learning_rate": 0.0012347672803667662, + "loss": 1.5756, + "step": 2688 + }, + { + "epoch": 0.28364978902953586, + "grad_norm": 0.6510570645332336, + "learning_rate": 0.0012345756857675171, + "loss": 1.5892, + "step": 2689 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.7460522055625916, + "learning_rate": 0.0012343840368691462, + "loss": 1.6229, + "step": 2690 + }, + { + "epoch": 0.2838607594936709, + "grad_norm": 0.6573381423950195, + "learning_rate": 0.0012341923336931287, + "loss": 1.5935, + "step": 2691 + }, + { + "epoch": 0.2839662447257384, + "grad_norm": 0.6616348624229431, + "learning_rate": 0.0012340005762609457, + "loss": 1.5678, + "step": 2692 + }, + { + "epoch": 0.2840717299578059, + "grad_norm": 0.6805869936943054, + "learning_rate": 0.0012338087645940847, + "loss": 1.6035, + "step": 2693 + }, + { + "epoch": 0.28417721518987343, + "grad_norm": 0.803144633769989, + "learning_rate": 0.001233616898714039, + "loss": 1.5525, + "step": 2694 + }, + { + "epoch": 0.2842827004219409, + "grad_norm": 0.842366635799408, + "learning_rate": 0.0012334249786423086, + "loss": 1.585, + "step": 2695 + }, + { + "epoch": 0.2843881856540084, + "grad_norm": 0.7075057029724121, + "learning_rate": 0.0012332330044003987, + "loss": 1.6137, + "step": 2696 + }, + { + "epoch": 0.28449367088607597, + "grad_norm": 0.7255532145500183, + "learning_rate": 0.0012330409760098208, + "loss": 1.5569, + "step": 2697 + }, + { + "epoch": 0.28459915611814346, + "grad_norm": 0.8227859735488892, + "learning_rate": 0.0012328488934920932, + "loss": 1.5793, + "step": 2698 + }, + { + "epoch": 0.28470464135021095, + "grad_norm": 0.6264166235923767, + "learning_rate": 0.001232656756868739, + "loss": 1.5624, + "step": 2699 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.8216399550437927, + "learning_rate": 0.0012324645661612886, + "loss": 1.5353, + "step": 2700 + }, + { + "epoch": 0.284915611814346, + "grad_norm": 0.8798578381538391, + "learning_rate": 0.001232272321391278, + "loss": 1.6181, + "step": 2701 + }, + { + "epoch": 0.2850210970464135, + "grad_norm": 0.7686014771461487, + "learning_rate": 0.0012320800225802488, + "loss": 1.6059, + "step": 2702 + }, + { + "epoch": 0.28512658227848103, + "grad_norm": 0.7188294529914856, + "learning_rate": 0.001231887669749749, + "loss": 1.5974, + "step": 2703 + }, + { + "epoch": 0.2852320675105485, + "grad_norm": 0.7068557143211365, + "learning_rate": 0.0012316952629213332, + "loss": 1.553, + "step": 2704 + }, + { + "epoch": 0.285337552742616, + "grad_norm": 0.7089752554893494, + "learning_rate": 0.001231502802116561, + "loss": 1.5884, + "step": 2705 + }, + { + "epoch": 0.28544303797468357, + "grad_norm": 0.8042547702789307, + "learning_rate": 0.0012313102873569993, + "loss": 1.578, + "step": 2706 + }, + { + "epoch": 0.28554852320675106, + "grad_norm": 0.7827745676040649, + "learning_rate": 0.0012311177186642194, + "loss": 1.5829, + "step": 2707 + }, + { + "epoch": 0.28565400843881855, + "grad_norm": 0.8208004832267761, + "learning_rate": 0.0012309250960598, + "loss": 1.5655, + "step": 2708 + }, + { + "epoch": 0.2857594936708861, + "grad_norm": 0.7568551301956177, + "learning_rate": 0.0012307324195653256, + "loss": 1.5628, + "step": 2709 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.7396619915962219, + "learning_rate": 0.0012305396892023867, + "loss": 1.5814, + "step": 2710 + }, + { + "epoch": 0.2859704641350211, + "grad_norm": 0.7955666184425354, + "learning_rate": 0.0012303469049925791, + "loss": 1.5842, + "step": 2711 + }, + { + "epoch": 0.28607594936708863, + "grad_norm": 0.7524197697639465, + "learning_rate": 0.001230154066957506, + "loss": 1.6188, + "step": 2712 + }, + { + "epoch": 0.2861814345991561, + "grad_norm": 0.8447214961051941, + "learning_rate": 0.001229961175118775, + "loss": 1.5711, + "step": 2713 + }, + { + "epoch": 0.2862869198312236, + "grad_norm": 0.727674663066864, + "learning_rate": 0.0012297682294980013, + "loss": 1.6114, + "step": 2714 + }, + { + "epoch": 0.28639240506329117, + "grad_norm": 0.7847912311553955, + "learning_rate": 0.0012295752301168048, + "loss": 1.5959, + "step": 2715 + }, + { + "epoch": 0.28649789029535866, + "grad_norm": 0.9518777132034302, + "learning_rate": 0.0012293821769968126, + "loss": 1.5839, + "step": 2716 + }, + { + "epoch": 0.28660337552742615, + "grad_norm": 0.6774694919586182, + "learning_rate": 0.001229189070159657, + "loss": 1.6082, + "step": 2717 + }, + { + "epoch": 0.28670886075949364, + "grad_norm": 0.7864325642585754, + "learning_rate": 0.0012289959096269767, + "loss": 1.5501, + "step": 2718 + }, + { + "epoch": 0.2868143459915612, + "grad_norm": 0.7177214026451111, + "learning_rate": 0.0012288026954204165, + "loss": 1.5477, + "step": 2719 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.653083086013794, + "learning_rate": 0.0012286094275616264, + "loss": 1.5626, + "step": 2720 + }, + { + "epoch": 0.2870253164556962, + "grad_norm": 0.6182084679603577, + "learning_rate": 0.0012284161060722634, + "loss": 1.5545, + "step": 2721 + }, + { + "epoch": 0.2871308016877637, + "grad_norm": 0.6609595417976379, + "learning_rate": 0.00122822273097399, + "loss": 1.5549, + "step": 2722 + }, + { + "epoch": 0.2872362869198312, + "grad_norm": 0.6966889500617981, + "learning_rate": 0.0012280293022884753, + "loss": 1.5784, + "step": 2723 + }, + { + "epoch": 0.2873417721518987, + "grad_norm": 0.7356797456741333, + "learning_rate": 0.0012278358200373935, + "loss": 1.5377, + "step": 2724 + }, + { + "epoch": 0.28744725738396626, + "grad_norm": 0.636345624923706, + "learning_rate": 0.001227642284242425, + "loss": 1.5673, + "step": 2725 + }, + { + "epoch": 0.28755274261603375, + "grad_norm": 0.7239104509353638, + "learning_rate": 0.0012274486949252572, + "loss": 1.5922, + "step": 2726 + }, + { + "epoch": 0.28765822784810124, + "grad_norm": 0.7007545828819275, + "learning_rate": 0.0012272550521075824, + "loss": 1.5628, + "step": 2727 + }, + { + "epoch": 0.2877637130801688, + "grad_norm": 0.7903498411178589, + "learning_rate": 0.0012270613558110993, + "loss": 1.532, + "step": 2728 + }, + { + "epoch": 0.2878691983122363, + "grad_norm": 0.6375094056129456, + "learning_rate": 0.001226867606057512, + "loss": 1.5857, + "step": 2729 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.7619816660881042, + "learning_rate": 0.0012266738028685318, + "loss": 1.5715, + "step": 2730 + }, + { + "epoch": 0.2880801687763713, + "grad_norm": 0.8068419098854065, + "learning_rate": 0.001226479946265875, + "loss": 1.5821, + "step": 2731 + }, + { + "epoch": 0.2881856540084388, + "grad_norm": 0.7945586442947388, + "learning_rate": 0.0012262860362712645, + "loss": 1.5799, + "step": 2732 + }, + { + "epoch": 0.2882911392405063, + "grad_norm": 0.6820304989814758, + "learning_rate": 0.0012260920729064285, + "loss": 1.5854, + "step": 2733 + }, + { + "epoch": 0.28839662447257386, + "grad_norm": 0.7099434733390808, + "learning_rate": 0.0012258980561931016, + "loss": 1.554, + "step": 2734 + }, + { + "epoch": 0.28850210970464135, + "grad_norm": 0.729736328125, + "learning_rate": 0.0012257039861530246, + "loss": 1.5504, + "step": 2735 + }, + { + "epoch": 0.28860759493670884, + "grad_norm": 0.7139186859130859, + "learning_rate": 0.0012255098628079439, + "loss": 1.5458, + "step": 2736 + }, + { + "epoch": 0.2887130801687764, + "grad_norm": 0.7697678208351135, + "learning_rate": 0.0012253156861796119, + "loss": 1.5554, + "step": 2737 + }, + { + "epoch": 0.2888185654008439, + "grad_norm": 0.7280908226966858, + "learning_rate": 0.0012251214562897872, + "loss": 1.5802, + "step": 2738 + }, + { + "epoch": 0.2889240506329114, + "grad_norm": 1.1033036708831787, + "learning_rate": 0.0012249271731602342, + "loss": 1.6103, + "step": 2739 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.8983162641525269, + "learning_rate": 0.001224732836812723, + "loss": 1.6187, + "step": 2740 + }, + { + "epoch": 0.2891350210970464, + "grad_norm": 0.6643196940422058, + "learning_rate": 0.0012245384472690302, + "loss": 1.568, + "step": 2741 + }, + { + "epoch": 0.2892405063291139, + "grad_norm": 0.9021961092948914, + "learning_rate": 0.0012243440045509384, + "loss": 1.598, + "step": 2742 + }, + { + "epoch": 0.28934599156118146, + "grad_norm": 0.8054541349411011, + "learning_rate": 0.0012241495086802356, + "loss": 1.5446, + "step": 2743 + }, + { + "epoch": 0.28945147679324895, + "grad_norm": 0.6901267170906067, + "learning_rate": 0.0012239549596787158, + "loss": 1.5793, + "step": 2744 + }, + { + "epoch": 0.28955696202531644, + "grad_norm": 0.6356019973754883, + "learning_rate": 0.0012237603575681797, + "loss": 1.5867, + "step": 2745 + }, + { + "epoch": 0.289662447257384, + "grad_norm": 0.6802087426185608, + "learning_rate": 0.0012235657023704327, + "loss": 1.5853, + "step": 2746 + }, + { + "epoch": 0.2897679324894515, + "grad_norm": 0.666998028755188, + "learning_rate": 0.001223370994107288, + "loss": 1.5631, + "step": 2747 + }, + { + "epoch": 0.289873417721519, + "grad_norm": 0.7216251492500305, + "learning_rate": 0.0012231762328005623, + "loss": 1.5856, + "step": 2748 + }, + { + "epoch": 0.28997890295358647, + "grad_norm": 0.6993558406829834, + "learning_rate": 0.0012229814184720805, + "loss": 1.6016, + "step": 2749 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.696179211139679, + "learning_rate": 0.0012227865511436724, + "loss": 1.5733, + "step": 2750 + }, + { + "epoch": 0.2901898734177215, + "grad_norm": 0.8150785565376282, + "learning_rate": 0.0012225916308371736, + "loss": 1.5527, + "step": 2751 + }, + { + "epoch": 0.290295358649789, + "grad_norm": 0.6936688423156738, + "learning_rate": 0.001222396657574426, + "loss": 1.5892, + "step": 2752 + }, + { + "epoch": 0.29040084388185655, + "grad_norm": 0.687739372253418, + "learning_rate": 0.0012222016313772773, + "loss": 1.5541, + "step": 2753 + }, + { + "epoch": 0.29050632911392404, + "grad_norm": 0.7123782634735107, + "learning_rate": 0.0012220065522675811, + "loss": 1.5966, + "step": 2754 + }, + { + "epoch": 0.29061181434599154, + "grad_norm": 0.6610796451568604, + "learning_rate": 0.0012218114202671973, + "loss": 1.6194, + "step": 2755 + }, + { + "epoch": 0.2907172995780591, + "grad_norm": 0.8449666500091553, + "learning_rate": 0.001221616235397991, + "loss": 1.5896, + "step": 2756 + }, + { + "epoch": 0.2908227848101266, + "grad_norm": 0.675738513469696, + "learning_rate": 0.001221420997681834, + "loss": 1.5614, + "step": 2757 + }, + { + "epoch": 0.29092827004219407, + "grad_norm": 0.6395388841629028, + "learning_rate": 0.0012212257071406037, + "loss": 1.5664, + "step": 2758 + }, + { + "epoch": 0.2910337552742616, + "grad_norm": 0.6468666195869446, + "learning_rate": 0.0012210303637961828, + "loss": 1.6403, + "step": 2759 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.657462477684021, + "learning_rate": 0.001220834967670461, + "loss": 1.5739, + "step": 2760 + }, + { + "epoch": 0.2912447257383966, + "grad_norm": 0.802310585975647, + "learning_rate": 0.0012206395187853334, + "loss": 1.593, + "step": 2761 + }, + { + "epoch": 0.29135021097046415, + "grad_norm": 0.7342401742935181, + "learning_rate": 0.0012204440171627005, + "loss": 1.6019, + "step": 2762 + }, + { + "epoch": 0.29145569620253164, + "grad_norm": 0.6353623867034912, + "learning_rate": 0.00122024846282447, + "loss": 1.6147, + "step": 2763 + }, + { + "epoch": 0.29156118143459914, + "grad_norm": 0.6852613091468811, + "learning_rate": 0.0012200528557925543, + "loss": 1.5353, + "step": 2764 + }, + { + "epoch": 0.2916666666666667, + "grad_norm": 0.8058980107307434, + "learning_rate": 0.0012198571960888721, + "loss": 1.5968, + "step": 2765 + }, + { + "epoch": 0.2917721518987342, + "grad_norm": 0.713118851184845, + "learning_rate": 0.0012196614837353481, + "loss": 1.5694, + "step": 2766 + }, + { + "epoch": 0.29187763713080167, + "grad_norm": 0.6539017558097839, + "learning_rate": 0.001219465718753913, + "loss": 1.5716, + "step": 2767 + }, + { + "epoch": 0.2919831223628692, + "grad_norm": 0.90084308385849, + "learning_rate": 0.0012192699011665034, + "loss": 1.5514, + "step": 2768 + }, + { + "epoch": 0.2920886075949367, + "grad_norm": 0.7787711024284363, + "learning_rate": 0.0012190740309950612, + "loss": 1.5898, + "step": 2769 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.6617241501808167, + "learning_rate": 0.0012188781082615346, + "loss": 1.5985, + "step": 2770 + }, + { + "epoch": 0.29229957805907175, + "grad_norm": 0.6990347504615784, + "learning_rate": 0.0012186821329878783, + "loss": 1.5806, + "step": 2771 + }, + { + "epoch": 0.29240506329113924, + "grad_norm": 0.6698197722434998, + "learning_rate": 0.0012184861051960517, + "loss": 1.5895, + "step": 2772 + }, + { + "epoch": 0.29251054852320674, + "grad_norm": 0.6594641208648682, + "learning_rate": 0.001218290024908021, + "loss": 1.5766, + "step": 2773 + }, + { + "epoch": 0.2926160337552743, + "grad_norm": 0.6539227366447449, + "learning_rate": 0.0012180938921457576, + "loss": 1.57, + "step": 2774 + }, + { + "epoch": 0.2927215189873418, + "grad_norm": 0.6391293406486511, + "learning_rate": 0.00121789770693124, + "loss": 1.5741, + "step": 2775 + }, + { + "epoch": 0.29282700421940927, + "grad_norm": 0.6989942789077759, + "learning_rate": 0.001217701469286451, + "loss": 1.5754, + "step": 2776 + }, + { + "epoch": 0.2929324894514768, + "grad_norm": 0.7478144764900208, + "learning_rate": 0.00121750517923338, + "loss": 1.5945, + "step": 2777 + }, + { + "epoch": 0.2930379746835443, + "grad_norm": 0.6569371819496155, + "learning_rate": 0.0012173088367940228, + "loss": 1.5669, + "step": 2778 + }, + { + "epoch": 0.2931434599156118, + "grad_norm": 0.7090775370597839, + "learning_rate": 0.0012171124419903799, + "loss": 1.5875, + "step": 2779 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.7061943411827087, + "learning_rate": 0.0012169159948444588, + "loss": 1.5628, + "step": 2780 + }, + { + "epoch": 0.29335443037974684, + "grad_norm": 0.7989711761474609, + "learning_rate": 0.001216719495378272, + "loss": 1.5821, + "step": 2781 + }, + { + "epoch": 0.29345991561181434, + "grad_norm": 0.7229551076889038, + "learning_rate": 0.0012165229436138388, + "loss": 1.5771, + "step": 2782 + }, + { + "epoch": 0.29356540084388183, + "grad_norm": 0.6605081558227539, + "learning_rate": 0.0012163263395731834, + "loss": 1.5524, + "step": 2783 + }, + { + "epoch": 0.2936708860759494, + "grad_norm": 0.7446861267089844, + "learning_rate": 0.0012161296832783363, + "loss": 1.5774, + "step": 2784 + }, + { + "epoch": 0.29377637130801687, + "grad_norm": 0.8056871891021729, + "learning_rate": 0.0012159329747513338, + "loss": 1.5763, + "step": 2785 + }, + { + "epoch": 0.29388185654008436, + "grad_norm": 0.7094091176986694, + "learning_rate": 0.001215736214014218, + "loss": 1.5442, + "step": 2786 + }, + { + "epoch": 0.2939873417721519, + "grad_norm": 0.6531260013580322, + "learning_rate": 0.001215539401089037, + "loss": 1.5713, + "step": 2787 + }, + { + "epoch": 0.2940928270042194, + "grad_norm": 0.7402955889701843, + "learning_rate": 0.0012153425359978452, + "loss": 1.5938, + "step": 2788 + }, + { + "epoch": 0.2941983122362869, + "grad_norm": 0.7041838765144348, + "learning_rate": 0.0012151456187627016, + "loss": 1.6088, + "step": 2789 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.6048427224159241, + "learning_rate": 0.001214948649405672, + "loss": 1.5729, + "step": 2790 + }, + { + "epoch": 0.29440928270042194, + "grad_norm": 0.6409943699836731, + "learning_rate": 0.0012147516279488275, + "loss": 1.5817, + "step": 2791 + }, + { + "epoch": 0.29451476793248943, + "grad_norm": 0.6541368365287781, + "learning_rate": 0.0012145545544142461, + "loss": 1.5382, + "step": 2792 + }, + { + "epoch": 0.294620253164557, + "grad_norm": 0.6974977254867554, + "learning_rate": 0.00121435742882401, + "loss": 1.5766, + "step": 2793 + }, + { + "epoch": 0.29472573839662447, + "grad_norm": 0.6655219197273254, + "learning_rate": 0.001214160251200209, + "loss": 1.5785, + "step": 2794 + }, + { + "epoch": 0.29483122362869196, + "grad_norm": 0.6975685358047485, + "learning_rate": 0.0012139630215649369, + "loss": 1.5545, + "step": 2795 + }, + { + "epoch": 0.2949367088607595, + "grad_norm": 0.639297604560852, + "learning_rate": 0.0012137657399402947, + "loss": 1.6022, + "step": 2796 + }, + { + "epoch": 0.295042194092827, + "grad_norm": 0.7718762755393982, + "learning_rate": 0.0012135684063483891, + "loss": 1.5729, + "step": 2797 + }, + { + "epoch": 0.2951476793248945, + "grad_norm": 0.7346585988998413, + "learning_rate": 0.0012133710208113318, + "loss": 1.5897, + "step": 2798 + }, + { + "epoch": 0.29525316455696204, + "grad_norm": 0.8581695556640625, + "learning_rate": 0.0012131735833512411, + "loss": 1.5856, + "step": 2799 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 1.0832926034927368, + "learning_rate": 0.0012129760939902407, + "loss": 1.5409, + "step": 2800 + }, + { + "epoch": 0.29546413502109703, + "grad_norm": 0.7086189389228821, + "learning_rate": 0.0012127785527504603, + "loss": 1.5861, + "step": 2801 + }, + { + "epoch": 0.2955696202531646, + "grad_norm": 0.6864866018295288, + "learning_rate": 0.0012125809596540357, + "loss": 1.5543, + "step": 2802 + }, + { + "epoch": 0.29567510548523207, + "grad_norm": 0.6633280515670776, + "learning_rate": 0.0012123833147231079, + "loss": 1.5843, + "step": 2803 + }, + { + "epoch": 0.29578059071729956, + "grad_norm": 0.6960739493370056, + "learning_rate": 0.0012121856179798237, + "loss": 1.5868, + "step": 2804 + }, + { + "epoch": 0.2958860759493671, + "grad_norm": 0.6957220435142517, + "learning_rate": 0.0012119878694463366, + "loss": 1.5558, + "step": 2805 + }, + { + "epoch": 0.2959915611814346, + "grad_norm": 0.6652408838272095, + "learning_rate": 0.001211790069144805, + "loss": 1.5733, + "step": 2806 + }, + { + "epoch": 0.2960970464135021, + "grad_norm": 0.7453683018684387, + "learning_rate": 0.0012115922170973935, + "loss": 1.5812, + "step": 2807 + }, + { + "epoch": 0.29620253164556964, + "grad_norm": 0.7249065637588501, + "learning_rate": 0.0012113943133262722, + "loss": 1.5723, + "step": 2808 + }, + { + "epoch": 0.29630801687763714, + "grad_norm": 0.8133265972137451, + "learning_rate": 0.0012111963578536177, + "loss": 1.6007, + "step": 2809 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.6680949330329895, + "learning_rate": 0.0012109983507016114, + "loss": 1.5703, + "step": 2810 + }, + { + "epoch": 0.2965189873417722, + "grad_norm": 0.8091139197349548, + "learning_rate": 0.0012108002918924411, + "loss": 1.576, + "step": 2811 + }, + { + "epoch": 0.29662447257383967, + "grad_norm": 0.8685750961303711, + "learning_rate": 0.0012106021814483007, + "loss": 1.5583, + "step": 2812 + }, + { + "epoch": 0.29672995780590716, + "grad_norm": 0.7442207336425781, + "learning_rate": 0.0012104040193913884, + "loss": 1.5668, + "step": 2813 + }, + { + "epoch": 0.2968354430379747, + "grad_norm": 0.7005501389503479, + "learning_rate": 0.0012102058057439104, + "loss": 1.6057, + "step": 2814 + }, + { + "epoch": 0.2969409282700422, + "grad_norm": 0.7700272798538208, + "learning_rate": 0.001210007540528077, + "loss": 1.5817, + "step": 2815 + }, + { + "epoch": 0.2970464135021097, + "grad_norm": 0.7174177765846252, + "learning_rate": 0.0012098092237661049, + "loss": 1.5351, + "step": 2816 + }, + { + "epoch": 0.2971518987341772, + "grad_norm": 0.7803359031677246, + "learning_rate": 0.0012096108554802165, + "loss": 1.5739, + "step": 2817 + }, + { + "epoch": 0.29725738396624474, + "grad_norm": 0.7084279656410217, + "learning_rate": 0.0012094124356926397, + "loss": 1.6144, + "step": 2818 + }, + { + "epoch": 0.29736286919831223, + "grad_norm": 0.8454287052154541, + "learning_rate": 0.001209213964425609, + "loss": 1.5862, + "step": 2819 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.8337502479553223, + "learning_rate": 0.0012090154417013636, + "loss": 1.6038, + "step": 2820 + }, + { + "epoch": 0.29757383966244727, + "grad_norm": 0.7652432918548584, + "learning_rate": 0.0012088168675421487, + "loss": 1.5534, + "step": 2821 + }, + { + "epoch": 0.29767932489451476, + "grad_norm": 0.8917245864868164, + "learning_rate": 0.0012086182419702165, + "loss": 1.542, + "step": 2822 + }, + { + "epoch": 0.29778481012658226, + "grad_norm": 0.7015735507011414, + "learning_rate": 0.0012084195650078232, + "loss": 1.5811, + "step": 2823 + }, + { + "epoch": 0.2978902953586498, + "grad_norm": 1.1248955726623535, + "learning_rate": 0.001208220836677232, + "loss": 1.5469, + "step": 2824 + }, + { + "epoch": 0.2979957805907173, + "grad_norm": 0.8867091536521912, + "learning_rate": 0.0012080220570007108, + "loss": 1.5547, + "step": 2825 + }, + { + "epoch": 0.2981012658227848, + "grad_norm": 0.7846081256866455, + "learning_rate": 0.001207823226000534, + "loss": 1.5676, + "step": 2826 + }, + { + "epoch": 0.29820675105485234, + "grad_norm": 1.4423198699951172, + "learning_rate": 0.0012076243436989823, + "loss": 1.5351, + "step": 2827 + }, + { + "epoch": 0.29831223628691983, + "grad_norm": 0.7295491099357605, + "learning_rate": 0.0012074254101183408, + "loss": 1.5577, + "step": 2828 + }, + { + "epoch": 0.2984177215189873, + "grad_norm": 1.093159794807434, + "learning_rate": 0.001207226425280901, + "loss": 1.5629, + "step": 2829 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.9027212262153625, + "learning_rate": 0.0012070273892089605, + "loss": 1.5591, + "step": 2830 + }, + { + "epoch": 0.29862869198312236, + "grad_norm": 0.7385981678962708, + "learning_rate": 0.001206828301924822, + "loss": 1.5354, + "step": 2831 + }, + { + "epoch": 0.29873417721518986, + "grad_norm": 0.9690129160881042, + "learning_rate": 0.0012066291634507944, + "loss": 1.5789, + "step": 2832 + }, + { + "epoch": 0.2988396624472574, + "grad_norm": 0.5991365909576416, + "learning_rate": 0.001206429973809192, + "loss": 1.5456, + "step": 2833 + }, + { + "epoch": 0.2989451476793249, + "grad_norm": 0.8869980573654175, + "learning_rate": 0.001206230733022335, + "loss": 1.6022, + "step": 2834 + }, + { + "epoch": 0.2990506329113924, + "grad_norm": 0.9491106271743774, + "learning_rate": 0.0012060314411125497, + "loss": 1.5401, + "step": 2835 + }, + { + "epoch": 0.29915611814345994, + "grad_norm": 0.7023110389709473, + "learning_rate": 0.0012058320981021672, + "loss": 1.5342, + "step": 2836 + }, + { + "epoch": 0.29926160337552743, + "grad_norm": 0.7689185738563538, + "learning_rate": 0.001205632704013525, + "loss": 1.5556, + "step": 2837 + }, + { + "epoch": 0.2993670886075949, + "grad_norm": 0.7901353240013123, + "learning_rate": 0.0012054332588689667, + "loss": 1.537, + "step": 2838 + }, + { + "epoch": 0.29947257383966247, + "grad_norm": 0.6543164253234863, + "learning_rate": 0.0012052337626908406, + "loss": 1.5409, + "step": 2839 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.8590355515480042, + "learning_rate": 0.0012050342155015012, + "loss": 1.5824, + "step": 2840 + }, + { + "epoch": 0.29968354430379746, + "grad_norm": 0.6871265172958374, + "learning_rate": 0.0012048346173233091, + "loss": 1.5492, + "step": 2841 + }, + { + "epoch": 0.299789029535865, + "grad_norm": 0.8635113835334778, + "learning_rate": 0.0012046349681786304, + "loss": 1.5891, + "step": 2842 + }, + { + "epoch": 0.2998945147679325, + "grad_norm": 0.9312455058097839, + "learning_rate": 0.001204435268089836, + "loss": 1.5452, + "step": 2843 + }, + { + "epoch": 0.3, + "grad_norm": 0.6170958876609802, + "learning_rate": 0.001204235517079304, + "loss": 1.5742, + "step": 2844 + }, + { + "epoch": 0.30010548523206754, + "grad_norm": 0.9936872720718384, + "learning_rate": 0.0012040357151694172, + "loss": 1.5678, + "step": 2845 + }, + { + "epoch": 0.30021097046413503, + "grad_norm": 0.844024658203125, + "learning_rate": 0.0012038358623825646, + "loss": 1.5712, + "step": 2846 + }, + { + "epoch": 0.3003164556962025, + "grad_norm": 0.6691447496414185, + "learning_rate": 0.0012036359587411405, + "loss": 1.5614, + "step": 2847 + }, + { + "epoch": 0.30042194092827, + "grad_norm": 0.88441401720047, + "learning_rate": 0.0012034360042675453, + "loss": 1.5418, + "step": 2848 + }, + { + "epoch": 0.30052742616033756, + "grad_norm": 0.7982460856437683, + "learning_rate": 0.0012032359989841849, + "loss": 1.5554, + "step": 2849 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.6783889532089233, + "learning_rate": 0.0012030359429134707, + "loss": 1.5563, + "step": 2850 + }, + { + "epoch": 0.30073839662447255, + "grad_norm": 1.269956111907959, + "learning_rate": 0.00120283583607782, + "loss": 1.527, + "step": 2851 + }, + { + "epoch": 0.3008438818565401, + "grad_norm": 0.8000176548957825, + "learning_rate": 0.0012026356784996554, + "loss": 1.5741, + "step": 2852 + }, + { + "epoch": 0.3009493670886076, + "grad_norm": 0.742045521736145, + "learning_rate": 0.0012024354702014066, + "loss": 1.56, + "step": 2853 + }, + { + "epoch": 0.3010548523206751, + "grad_norm": 0.7874894142150879, + "learning_rate": 0.0012022352112055071, + "loss": 1.5643, + "step": 2854 + }, + { + "epoch": 0.30116033755274263, + "grad_norm": 0.667751133441925, + "learning_rate": 0.001202034901534397, + "loss": 1.6147, + "step": 2855 + }, + { + "epoch": 0.3012658227848101, + "grad_norm": 0.9558847546577454, + "learning_rate": 0.0012018345412105223, + "loss": 1.5838, + "step": 2856 + }, + { + "epoch": 0.3013713080168776, + "grad_norm": 0.6582426428794861, + "learning_rate": 0.0012016341302563342, + "loss": 1.5623, + "step": 2857 + }, + { + "epoch": 0.30147679324894516, + "grad_norm": 0.8859541416168213, + "learning_rate": 0.0012014336686942898, + "loss": 1.5613, + "step": 2858 + }, + { + "epoch": 0.30158227848101266, + "grad_norm": 0.8912439942359924, + "learning_rate": 0.0012012331565468518, + "loss": 1.5537, + "step": 2859 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.6370379328727722, + "learning_rate": 0.0012010325938364883, + "loss": 1.5666, + "step": 2860 + }, + { + "epoch": 0.3017932489451477, + "grad_norm": 0.9538683295249939, + "learning_rate": 0.0012008319805856737, + "loss": 1.5795, + "step": 2861 + }, + { + "epoch": 0.3018987341772152, + "grad_norm": 0.752892792224884, + "learning_rate": 0.0012006313168168878, + "loss": 1.5602, + "step": 2862 + }, + { + "epoch": 0.3020042194092827, + "grad_norm": 0.6643028855323792, + "learning_rate": 0.0012004306025526158, + "loss": 1.5541, + "step": 2863 + }, + { + "epoch": 0.30210970464135023, + "grad_norm": 0.7409567832946777, + "learning_rate": 0.0012002298378153485, + "loss": 1.5407, + "step": 2864 + }, + { + "epoch": 0.3022151898734177, + "grad_norm": 0.6642387509346008, + "learning_rate": 0.001200029022627583, + "loss": 1.5753, + "step": 2865 + }, + { + "epoch": 0.3023206751054852, + "grad_norm": 0.6990419030189514, + "learning_rate": 0.0011998281570118213, + "loss": 1.6283, + "step": 2866 + }, + { + "epoch": 0.30242616033755276, + "grad_norm": 0.7036248445510864, + "learning_rate": 0.0011996272409905717, + "loss": 1.5708, + "step": 2867 + }, + { + "epoch": 0.30253164556962026, + "grad_norm": 0.6569391489028931, + "learning_rate": 0.0011994262745863478, + "loss": 1.5626, + "step": 2868 + }, + { + "epoch": 0.30263713080168775, + "grad_norm": 0.6643542051315308, + "learning_rate": 0.0011992252578216683, + "loss": 1.5142, + "step": 2869 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.7247553467750549, + "learning_rate": 0.0011990241907190592, + "loss": 1.5978, + "step": 2870 + }, + { + "epoch": 0.3028481012658228, + "grad_norm": 0.6568275094032288, + "learning_rate": 0.0011988230733010502, + "loss": 1.5546, + "step": 2871 + }, + { + "epoch": 0.3029535864978903, + "grad_norm": 0.6638161540031433, + "learning_rate": 0.0011986219055901781, + "loss": 1.5657, + "step": 2872 + }, + { + "epoch": 0.30305907172995783, + "grad_norm": 0.6861039400100708, + "learning_rate": 0.0011984206876089842, + "loss": 1.5805, + "step": 2873 + }, + { + "epoch": 0.3031645569620253, + "grad_norm": 0.6942607164382935, + "learning_rate": 0.001198219419380016, + "loss": 1.5122, + "step": 2874 + }, + { + "epoch": 0.3032700421940928, + "grad_norm": 0.7365118265151978, + "learning_rate": 0.0011980181009258273, + "loss": 1.5525, + "step": 2875 + }, + { + "epoch": 0.30337552742616036, + "grad_norm": 0.6636347770690918, + "learning_rate": 0.0011978167322689761, + "loss": 1.5468, + "step": 2876 + }, + { + "epoch": 0.30348101265822786, + "grad_norm": 0.7141450643539429, + "learning_rate": 0.001197615313432027, + "loss": 1.5368, + "step": 2877 + }, + { + "epoch": 0.30358649789029535, + "grad_norm": 0.7123973965644836, + "learning_rate": 0.00119741384443755, + "loss": 1.5744, + "step": 2878 + }, + { + "epoch": 0.3036919831223629, + "grad_norm": 0.729347825050354, + "learning_rate": 0.001197212325308121, + "loss": 1.5553, + "step": 2879 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.8062238097190857, + "learning_rate": 0.001197010756066321, + "loss": 1.5601, + "step": 2880 + }, + { + "epoch": 0.3039029535864979, + "grad_norm": 0.656792163848877, + "learning_rate": 0.0011968091367347367, + "loss": 1.56, + "step": 2881 + }, + { + "epoch": 0.3040084388185654, + "grad_norm": 0.8536285758018494, + "learning_rate": 0.0011966074673359602, + "loss": 1.5608, + "step": 2882 + }, + { + "epoch": 0.3041139240506329, + "grad_norm": 0.6844868659973145, + "learning_rate": 0.0011964057478925903, + "loss": 1.5242, + "step": 2883 + }, + { + "epoch": 0.3042194092827004, + "grad_norm": 0.692068338394165, + "learning_rate": 0.0011962039784272306, + "loss": 1.5644, + "step": 2884 + }, + { + "epoch": 0.3043248945147679, + "grad_norm": 0.7965957522392273, + "learning_rate": 0.0011960021589624897, + "loss": 1.567, + "step": 2885 + }, + { + "epoch": 0.30443037974683546, + "grad_norm": 0.7035136818885803, + "learning_rate": 0.001195800289520983, + "loss": 1.5736, + "step": 2886 + }, + { + "epoch": 0.30453586497890295, + "grad_norm": 0.6302428245544434, + "learning_rate": 0.0011955983701253312, + "loss": 1.5771, + "step": 2887 + }, + { + "epoch": 0.30464135021097044, + "grad_norm": 0.6358972191810608, + "learning_rate": 0.0011953964007981601, + "loss": 1.523, + "step": 2888 + }, + { + "epoch": 0.304746835443038, + "grad_norm": 0.6950017213821411, + "learning_rate": 0.001195194381562101, + "loss": 1.5938, + "step": 2889 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.7092896699905396, + "learning_rate": 0.0011949923124397917, + "loss": 1.5491, + "step": 2890 + }, + { + "epoch": 0.304957805907173, + "grad_norm": 0.6942798495292664, + "learning_rate": 0.0011947901934538747, + "loss": 1.5775, + "step": 2891 + }, + { + "epoch": 0.3050632911392405, + "grad_norm": 0.6566995978355408, + "learning_rate": 0.0011945880246269987, + "loss": 1.5679, + "step": 2892 + }, + { + "epoch": 0.305168776371308, + "grad_norm": 0.7124459147453308, + "learning_rate": 0.0011943858059818178, + "loss": 1.5461, + "step": 2893 + }, + { + "epoch": 0.3052742616033755, + "grad_norm": 0.6663682460784912, + "learning_rate": 0.0011941835375409912, + "loss": 1.571, + "step": 2894 + }, + { + "epoch": 0.30537974683544306, + "grad_norm": 0.6464027762413025, + "learning_rate": 0.0011939812193271844, + "loss": 1.5561, + "step": 2895 + }, + { + "epoch": 0.30548523206751055, + "grad_norm": 0.6659049391746521, + "learning_rate": 0.001193778851363068, + "loss": 1.5589, + "step": 2896 + }, + { + "epoch": 0.30559071729957804, + "grad_norm": 0.6549782752990723, + "learning_rate": 0.0011935764336713187, + "loss": 1.5885, + "step": 2897 + }, + { + "epoch": 0.3056962025316456, + "grad_norm": 0.6699451208114624, + "learning_rate": 0.0011933739662746178, + "loss": 1.5747, + "step": 2898 + }, + { + "epoch": 0.3058016877637131, + "grad_norm": 0.6853469014167786, + "learning_rate": 0.0011931714491956531, + "loss": 1.5615, + "step": 2899 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.6707512140274048, + "learning_rate": 0.001192968882457118, + "loss": 1.5389, + "step": 2900 + }, + { + "epoch": 0.3060126582278481, + "grad_norm": 0.8721629977226257, + "learning_rate": 0.0011927662660817105, + "loss": 1.5626, + "step": 2901 + }, + { + "epoch": 0.3061181434599156, + "grad_norm": 0.7252481579780579, + "learning_rate": 0.0011925636000921355, + "loss": 1.5859, + "step": 2902 + }, + { + "epoch": 0.3062236286919831, + "grad_norm": 0.7614458203315735, + "learning_rate": 0.0011923608845111017, + "loss": 1.5632, + "step": 2903 + }, + { + "epoch": 0.30632911392405066, + "grad_norm": 1.3244301080703735, + "learning_rate": 0.0011921581193613253, + "loss": 1.5798, + "step": 2904 + }, + { + "epoch": 0.30643459915611815, + "grad_norm": 0.7367245554924011, + "learning_rate": 0.0011919553046655267, + "loss": 1.5854, + "step": 2905 + }, + { + "epoch": 0.30654008438818564, + "grad_norm": 0.9199919700622559, + "learning_rate": 0.0011917524404464325, + "loss": 1.6015, + "step": 2906 + }, + { + "epoch": 0.3066455696202532, + "grad_norm": 1.1363157033920288, + "learning_rate": 0.0011915495267267745, + "loss": 1.5376, + "step": 2907 + }, + { + "epoch": 0.3067510548523207, + "grad_norm": 0.6499098539352417, + "learning_rate": 0.0011913465635292903, + "loss": 1.5618, + "step": 2908 + }, + { + "epoch": 0.3068565400843882, + "grad_norm": 0.9670907258987427, + "learning_rate": 0.001191143550876723, + "loss": 1.5485, + "step": 2909 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.786942183971405, + "learning_rate": 0.001190940488791821, + "loss": 1.5437, + "step": 2910 + }, + { + "epoch": 0.3070675105485232, + "grad_norm": 0.877210795879364, + "learning_rate": 0.0011907373772973384, + "loss": 1.5207, + "step": 2911 + }, + { + "epoch": 0.3071729957805907, + "grad_norm": 1.3099721670150757, + "learning_rate": 0.001190534216416035, + "loss": 1.5713, + "step": 2912 + }, + { + "epoch": 0.30727848101265826, + "grad_norm": 0.8676727414131165, + "learning_rate": 0.0011903310061706762, + "loss": 1.5391, + "step": 2913 + }, + { + "epoch": 0.30738396624472575, + "grad_norm": 1.5783754587173462, + "learning_rate": 0.0011901277465840323, + "loss": 1.5905, + "step": 2914 + }, + { + "epoch": 0.30748945147679324, + "grad_norm": 1.157865285873413, + "learning_rate": 0.0011899244376788797, + "loss": 1.5969, + "step": 2915 + }, + { + "epoch": 0.30759493670886073, + "grad_norm": 1.5883450508117676, + "learning_rate": 0.001189721079478, + "loss": 1.5809, + "step": 2916 + }, + { + "epoch": 0.3077004219409283, + "grad_norm": 1.3484629392623901, + "learning_rate": 0.001189517672004181, + "loss": 1.5404, + "step": 2917 + }, + { + "epoch": 0.3078059071729958, + "grad_norm": 0.9645856618881226, + "learning_rate": 0.0011893142152802152, + "loss": 1.5495, + "step": 2918 + }, + { + "epoch": 0.30791139240506327, + "grad_norm": 1.1860847473144531, + "learning_rate": 0.0011891107093289007, + "loss": 1.5977, + "step": 2919 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.9120749235153198, + "learning_rate": 0.0011889071541730419, + "loss": 1.5487, + "step": 2920 + }, + { + "epoch": 0.3081223628691983, + "grad_norm": 0.9377906918525696, + "learning_rate": 0.0011887035498354475, + "loss": 1.5151, + "step": 2921 + }, + { + "epoch": 0.3082278481012658, + "grad_norm": 1.1667531728744507, + "learning_rate": 0.0011884998963389334, + "loss": 1.5684, + "step": 2922 + }, + { + "epoch": 0.30833333333333335, + "grad_norm": 0.6725348830223083, + "learning_rate": 0.0011882961937063187, + "loss": 1.564, + "step": 2923 + }, + { + "epoch": 0.30843881856540084, + "grad_norm": 0.7134724259376526, + "learning_rate": 0.0011880924419604305, + "loss": 1.5349, + "step": 2924 + }, + { + "epoch": 0.30854430379746833, + "grad_norm": 0.7325636148452759, + "learning_rate": 0.0011878886411240991, + "loss": 1.5386, + "step": 2925 + }, + { + "epoch": 0.3086497890295359, + "grad_norm": 0.9527124762535095, + "learning_rate": 0.0011876847912201624, + "loss": 1.5516, + "step": 2926 + }, + { + "epoch": 0.3087552742616034, + "grad_norm": 0.8622420430183411, + "learning_rate": 0.0011874808922714623, + "loss": 1.5725, + "step": 2927 + }, + { + "epoch": 0.30886075949367087, + "grad_norm": 0.6697618961334229, + "learning_rate": 0.0011872769443008466, + "loss": 1.5835, + "step": 2928 + }, + { + "epoch": 0.3089662447257384, + "grad_norm": 0.8264425992965698, + "learning_rate": 0.001187072947331169, + "loss": 1.5272, + "step": 2929 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.6446837782859802, + "learning_rate": 0.001186868901385288, + "loss": 1.5396, + "step": 2930 + }, + { + "epoch": 0.3091772151898734, + "grad_norm": 0.6855509877204895, + "learning_rate": 0.0011866648064860683, + "loss": 1.5789, + "step": 2931 + }, + { + "epoch": 0.30928270042194095, + "grad_norm": 0.7092143297195435, + "learning_rate": 0.0011864606626563795, + "loss": 1.5919, + "step": 2932 + }, + { + "epoch": 0.30938818565400844, + "grad_norm": 0.6249159574508667, + "learning_rate": 0.0011862564699190972, + "loss": 1.55, + "step": 2933 + }, + { + "epoch": 0.30949367088607593, + "grad_norm": 0.6889662742614746, + "learning_rate": 0.0011860522282971019, + "loss": 1.5686, + "step": 2934 + }, + { + "epoch": 0.3095991561181435, + "grad_norm": 0.6542078852653503, + "learning_rate": 0.0011858479378132802, + "loss": 1.5647, + "step": 2935 + }, + { + "epoch": 0.309704641350211, + "grad_norm": 0.7131572365760803, + "learning_rate": 0.0011856435984905237, + "loss": 1.6024, + "step": 2936 + }, + { + "epoch": 0.30981012658227847, + "grad_norm": 0.645293116569519, + "learning_rate": 0.00118543921035173, + "loss": 1.5233, + "step": 2937 + }, + { + "epoch": 0.309915611814346, + "grad_norm": 0.7261372804641724, + "learning_rate": 0.001185234773419801, + "loss": 1.531, + "step": 2938 + }, + { + "epoch": 0.3100210970464135, + "grad_norm": 0.6822670698165894, + "learning_rate": 0.0011850302877176456, + "loss": 1.5072, + "step": 2939 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.7404113411903381, + "learning_rate": 0.001184825753268177, + "loss": 1.5447, + "step": 2940 + }, + { + "epoch": 0.31023206751054855, + "grad_norm": 0.7395941019058228, + "learning_rate": 0.0011846211700943148, + "loss": 1.5762, + "step": 2941 + }, + { + "epoch": 0.31033755274261604, + "grad_norm": 0.6627570986747742, + "learning_rate": 0.001184416538218983, + "loss": 1.5505, + "step": 2942 + }, + { + "epoch": 0.31044303797468353, + "grad_norm": 0.7095774412155151, + "learning_rate": 0.0011842118576651122, + "loss": 1.5767, + "step": 2943 + }, + { + "epoch": 0.3105485232067511, + "grad_norm": 0.7646583318710327, + "learning_rate": 0.0011840071284556373, + "loss": 1.5908, + "step": 2944 + }, + { + "epoch": 0.3106540084388186, + "grad_norm": 0.6962594389915466, + "learning_rate": 0.0011838023506134997, + "loss": 1.577, + "step": 2945 + }, + { + "epoch": 0.31075949367088607, + "grad_norm": 0.7284361720085144, + "learning_rate": 0.0011835975241616455, + "loss": 1.5457, + "step": 2946 + }, + { + "epoch": 0.31086497890295356, + "grad_norm": 0.7899792790412903, + "learning_rate": 0.0011833926491230265, + "loss": 1.5772, + "step": 2947 + }, + { + "epoch": 0.3109704641350211, + "grad_norm": 0.7181764841079712, + "learning_rate": 0.0011831877255206002, + "loss": 1.5741, + "step": 2948 + }, + { + "epoch": 0.3110759493670886, + "grad_norm": 1.0018302202224731, + "learning_rate": 0.0011829827533773292, + "loss": 1.5753, + "step": 2949 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.9424835443496704, + "learning_rate": 0.0011827777327161814, + "loss": 1.5337, + "step": 2950 + }, + { + "epoch": 0.31128691983122364, + "grad_norm": 0.7502703070640564, + "learning_rate": 0.001182572663560131, + "loss": 1.5251, + "step": 2951 + }, + { + "epoch": 0.31139240506329113, + "grad_norm": 1.4392424821853638, + "learning_rate": 0.0011823675459321564, + "loss": 1.5644, + "step": 2952 + }, + { + "epoch": 0.3114978902953586, + "grad_norm": 0.7532937526702881, + "learning_rate": 0.0011821623798552424, + "loss": 1.5705, + "step": 2953 + }, + { + "epoch": 0.3116033755274262, + "grad_norm": 1.7455614805221558, + "learning_rate": 0.001181957165352379, + "loss": 1.5512, + "step": 2954 + }, + { + "epoch": 0.31170886075949367, + "grad_norm": 1.098635196685791, + "learning_rate": 0.0011817519024465608, + "loss": 1.5494, + "step": 2955 + }, + { + "epoch": 0.31181434599156116, + "grad_norm": 1.9003639221191406, + "learning_rate": 0.0011815465911607893, + "loss": 1.5819, + "step": 2956 + }, + { + "epoch": 0.3119198312236287, + "grad_norm": 1.7784790992736816, + "learning_rate": 0.0011813412315180704, + "loss": 1.5879, + "step": 2957 + }, + { + "epoch": 0.3120253164556962, + "grad_norm": 0.8847769498825073, + "learning_rate": 0.0011811358235414154, + "loss": 1.5337, + "step": 2958 + }, + { + "epoch": 0.3121308016877637, + "grad_norm": 1.3712064027786255, + "learning_rate": 0.0011809303672538417, + "loss": 1.5704, + "step": 2959 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.8050950765609741, + "learning_rate": 0.0011807248626783714, + "loss": 1.559, + "step": 2960 + }, + { + "epoch": 0.31234177215189873, + "grad_norm": 1.1332749128341675, + "learning_rate": 0.0011805193098380327, + "loss": 1.532, + "step": 2961 + }, + { + "epoch": 0.3124472573839662, + "grad_norm": 0.7690959572792053, + "learning_rate": 0.0011803137087558584, + "loss": 1.5497, + "step": 2962 + }, + { + "epoch": 0.3125527426160338, + "grad_norm": 1.0779491662979126, + "learning_rate": 0.0011801080594548874, + "loss": 1.5444, + "step": 2963 + }, + { + "epoch": 0.31265822784810127, + "grad_norm": 0.7092822790145874, + "learning_rate": 0.0011799023619581638, + "loss": 1.5171, + "step": 2964 + }, + { + "epoch": 0.31276371308016876, + "grad_norm": 1.0651562213897705, + "learning_rate": 0.0011796966162887364, + "loss": 1.5421, + "step": 2965 + }, + { + "epoch": 0.3128691983122363, + "grad_norm": 0.7203776240348816, + "learning_rate": 0.0011794908224696608, + "loss": 1.5458, + "step": 2966 + }, + { + "epoch": 0.3129746835443038, + "grad_norm": 1.1490747928619385, + "learning_rate": 0.0011792849805239967, + "loss": 1.5654, + "step": 2967 + }, + { + "epoch": 0.3130801687763713, + "grad_norm": 0.8125477433204651, + "learning_rate": 0.0011790790904748103, + "loss": 1.5743, + "step": 2968 + }, + { + "epoch": 0.31318565400843884, + "grad_norm": 1.1433111429214478, + "learning_rate": 0.0011788731523451718, + "loss": 1.5245, + "step": 2969 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.7912827134132385, + "learning_rate": 0.0011786671661581584, + "loss": 1.549, + "step": 2970 + }, + { + "epoch": 0.3133966244725738, + "grad_norm": 0.921352744102478, + "learning_rate": 0.0011784611319368512, + "loss": 1.5199, + "step": 2971 + }, + { + "epoch": 0.3135021097046414, + "grad_norm": 0.7249887585639954, + "learning_rate": 0.0011782550497043379, + "loss": 1.5265, + "step": 2972 + }, + { + "epoch": 0.31360759493670887, + "grad_norm": 0.9698372483253479, + "learning_rate": 0.0011780489194837106, + "loss": 1.5598, + "step": 2973 + }, + { + "epoch": 0.31371308016877636, + "grad_norm": 0.7132208943367004, + "learning_rate": 0.0011778427412980675, + "loss": 1.563, + "step": 2974 + }, + { + "epoch": 0.3138185654008439, + "grad_norm": 0.7356469631195068, + "learning_rate": 0.0011776365151705119, + "loss": 1.5245, + "step": 2975 + }, + { + "epoch": 0.3139240506329114, + "grad_norm": 0.696430504322052, + "learning_rate": 0.0011774302411241525, + "loss": 1.5487, + "step": 2976 + }, + { + "epoch": 0.3140295358649789, + "grad_norm": 0.754277765750885, + "learning_rate": 0.0011772239191821029, + "loss": 1.5554, + "step": 2977 + }, + { + "epoch": 0.31413502109704644, + "grad_norm": 0.6964945793151855, + "learning_rate": 0.0011770175493674827, + "loss": 1.5479, + "step": 2978 + }, + { + "epoch": 0.31424050632911393, + "grad_norm": 0.6854779124259949, + "learning_rate": 0.0011768111317034173, + "loss": 1.5619, + "step": 2979 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.7111576795578003, + "learning_rate": 0.001176604666213036, + "loss": 1.5763, + "step": 2980 + }, + { + "epoch": 0.3144514767932489, + "grad_norm": 0.645805299282074, + "learning_rate": 0.0011763981529194748, + "loss": 1.5812, + "step": 2981 + }, + { + "epoch": 0.31455696202531647, + "grad_norm": 0.7842586040496826, + "learning_rate": 0.001176191591845874, + "loss": 1.5224, + "step": 2982 + }, + { + "epoch": 0.31466244725738396, + "grad_norm": 0.76298987865448, + "learning_rate": 0.0011759849830153806, + "loss": 1.5491, + "step": 2983 + }, + { + "epoch": 0.31476793248945145, + "grad_norm": 0.6907167434692383, + "learning_rate": 0.0011757783264511456, + "loss": 1.5552, + "step": 2984 + }, + { + "epoch": 0.314873417721519, + "grad_norm": 0.6837208271026611, + "learning_rate": 0.001175571622176326, + "loss": 1.5349, + "step": 2985 + }, + { + "epoch": 0.3149789029535865, + "grad_norm": 0.7133066058158875, + "learning_rate": 0.0011753648702140837, + "loss": 1.5742, + "step": 2986 + }, + { + "epoch": 0.315084388185654, + "grad_norm": 0.6725098490715027, + "learning_rate": 0.001175158070587587, + "loss": 1.5786, + "step": 2987 + }, + { + "epoch": 0.31518987341772153, + "grad_norm": 0.6914415955543518, + "learning_rate": 0.0011749512233200081, + "loss": 1.5701, + "step": 2988 + }, + { + "epoch": 0.315295358649789, + "grad_norm": 0.650434672832489, + "learning_rate": 0.001174744328434526, + "loss": 1.5846, + "step": 2989 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.6493827700614929, + "learning_rate": 0.0011745373859543236, + "loss": 1.5711, + "step": 2990 + }, + { + "epoch": 0.31550632911392407, + "grad_norm": 0.630990743637085, + "learning_rate": 0.0011743303959025906, + "loss": 1.5226, + "step": 2991 + }, + { + "epoch": 0.31561181434599156, + "grad_norm": 0.6703910827636719, + "learning_rate": 0.0011741233583025205, + "loss": 1.5302, + "step": 2992 + }, + { + "epoch": 0.31571729957805905, + "grad_norm": 0.6450629830360413, + "learning_rate": 0.0011739162731773133, + "loss": 1.4984, + "step": 2993 + }, + { + "epoch": 0.3158227848101266, + "grad_norm": 0.6728131175041199, + "learning_rate": 0.0011737091405501741, + "loss": 1.584, + "step": 2994 + }, + { + "epoch": 0.3159282700421941, + "grad_norm": 0.6742730736732483, + "learning_rate": 0.0011735019604443126, + "loss": 1.5896, + "step": 2995 + }, + { + "epoch": 0.3160337552742616, + "grad_norm": 0.6630778312683105, + "learning_rate": 0.0011732947328829447, + "loss": 1.5259, + "step": 2996 + }, + { + "epoch": 0.31613924050632913, + "grad_norm": 0.6852271556854248, + "learning_rate": 0.0011730874578892913, + "loss": 1.5331, + "step": 2997 + }, + { + "epoch": 0.3162447257383966, + "grad_norm": 0.6661717295646667, + "learning_rate": 0.0011728801354865786, + "loss": 1.536, + "step": 2998 + }, + { + "epoch": 0.3163502109704641, + "grad_norm": 0.6479251980781555, + "learning_rate": 0.0011726727656980378, + "loss": 1.5685, + "step": 2999 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.6800879836082458, + "learning_rate": 0.0011724653485469063, + "loss": 1.5464, + "step": 3000 + }, + { + "epoch": 0.31656118143459916, + "grad_norm": 0.6527814865112305, + "learning_rate": 0.0011722578840564256, + "loss": 1.5433, + "step": 3001 + }, + { + "epoch": 0.31666666666666665, + "grad_norm": 0.7413210868835449, + "learning_rate": 0.0011720503722498436, + "loss": 1.5302, + "step": 3002 + }, + { + "epoch": 0.3167721518987342, + "grad_norm": 0.7192767262458801, + "learning_rate": 0.0011718428131504127, + "loss": 1.5244, + "step": 3003 + }, + { + "epoch": 0.3168776371308017, + "grad_norm": 0.6866099238395691, + "learning_rate": 0.0011716352067813914, + "loss": 1.6015, + "step": 3004 + }, + { + "epoch": 0.3169831223628692, + "grad_norm": 0.6392327547073364, + "learning_rate": 0.0011714275531660423, + "loss": 1.5642, + "step": 3005 + }, + { + "epoch": 0.31708860759493673, + "grad_norm": 0.6711474061012268, + "learning_rate": 0.0011712198523276347, + "loss": 1.5664, + "step": 3006 + }, + { + "epoch": 0.3171940928270042, + "grad_norm": 0.6507800817489624, + "learning_rate": 0.0011710121042894425, + "loss": 1.5446, + "step": 3007 + }, + { + "epoch": 0.3172995780590717, + "grad_norm": 0.7296463847160339, + "learning_rate": 0.0011708043090747442, + "loss": 1.5608, + "step": 3008 + }, + { + "epoch": 0.31740506329113927, + "grad_norm": 0.9537039995193481, + "learning_rate": 0.001170596466706825, + "loss": 1.5485, + "step": 3009 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.6208271980285645, + "learning_rate": 0.0011703885772089743, + "loss": 1.5473, + "step": 3010 + }, + { + "epoch": 0.31761603375527425, + "grad_norm": 0.9859296679496765, + "learning_rate": 0.0011701806406044875, + "loss": 1.5363, + "step": 3011 + }, + { + "epoch": 0.31772151898734174, + "grad_norm": 0.8375872373580933, + "learning_rate": 0.0011699726569166643, + "loss": 1.5431, + "step": 3012 + }, + { + "epoch": 0.3178270042194093, + "grad_norm": 0.8967879414558411, + "learning_rate": 0.0011697646261688108, + "loss": 1.5228, + "step": 3013 + }, + { + "epoch": 0.3179324894514768, + "grad_norm": 1.1311426162719727, + "learning_rate": 0.0011695565483842382, + "loss": 1.5602, + "step": 3014 + }, + { + "epoch": 0.3180379746835443, + "grad_norm": 0.716764509677887, + "learning_rate": 0.001169348423586262, + "loss": 1.5784, + "step": 3015 + }, + { + "epoch": 0.3181434599156118, + "grad_norm": 1.14674711227417, + "learning_rate": 0.0011691402517982038, + "loss": 1.5577, + "step": 3016 + }, + { + "epoch": 0.3182489451476793, + "grad_norm": 0.8075500726699829, + "learning_rate": 0.0011689320330433904, + "loss": 1.5902, + "step": 3017 + }, + { + "epoch": 0.3183544303797468, + "grad_norm": 0.9053233861923218, + "learning_rate": 0.0011687237673451538, + "loss": 1.5783, + "step": 3018 + }, + { + "epoch": 0.31845991561181436, + "grad_norm": 1.240077018737793, + "learning_rate": 0.0011685154547268312, + "loss": 1.5557, + "step": 3019 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.7529457807540894, + "learning_rate": 0.0011683070952117646, + "loss": 1.5376, + "step": 3020 + }, + { + "epoch": 0.31867088607594934, + "grad_norm": 1.1924629211425781, + "learning_rate": 0.0011680986888233024, + "loss": 1.5292, + "step": 3021 + }, + { + "epoch": 0.3187763713080169, + "grad_norm": 0.7960757613182068, + "learning_rate": 0.0011678902355847973, + "loss": 1.552, + "step": 3022 + }, + { + "epoch": 0.3188818565400844, + "grad_norm": 1.3265329599380493, + "learning_rate": 0.0011676817355196075, + "loss": 1.52, + "step": 3023 + }, + { + "epoch": 0.3189873417721519, + "grad_norm": 0.7840424180030823, + "learning_rate": 0.0011674731886510967, + "loss": 1.5401, + "step": 3024 + }, + { + "epoch": 0.3190928270042194, + "grad_norm": 1.274269938468933, + "learning_rate": 0.0011672645950026332, + "loss": 1.5187, + "step": 3025 + }, + { + "epoch": 0.3191983122362869, + "grad_norm": 0.8193167448043823, + "learning_rate": 0.001167055954597591, + "loss": 1.556, + "step": 3026 + }, + { + "epoch": 0.3193037974683544, + "grad_norm": 1.4534908533096313, + "learning_rate": 0.0011668472674593497, + "loss": 1.5582, + "step": 3027 + }, + { + "epoch": 0.31940928270042196, + "grad_norm": 0.8571312427520752, + "learning_rate": 0.0011666385336112934, + "loss": 1.5739, + "step": 3028 + }, + { + "epoch": 0.31951476793248945, + "grad_norm": 1.3183809518814087, + "learning_rate": 0.0011664297530768117, + "loss": 1.5401, + "step": 3029 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.8928508162498474, + "learning_rate": 0.0011662209258792998, + "loss": 1.5989, + "step": 3030 + }, + { + "epoch": 0.3197257383966245, + "grad_norm": 1.1843100786209106, + "learning_rate": 0.0011660120520421578, + "loss": 1.5658, + "step": 3031 + }, + { + "epoch": 0.319831223628692, + "grad_norm": 0.896878719329834, + "learning_rate": 0.0011658031315887908, + "loss": 1.545, + "step": 3032 + }, + { + "epoch": 0.3199367088607595, + "grad_norm": 1.24296236038208, + "learning_rate": 0.0011655941645426096, + "loss": 1.5307, + "step": 3033 + }, + { + "epoch": 0.320042194092827, + "grad_norm": 0.8947078585624695, + "learning_rate": 0.00116538515092703, + "loss": 1.5934, + "step": 3034 + }, + { + "epoch": 0.3201476793248945, + "grad_norm": 1.2677428722381592, + "learning_rate": 0.0011651760907654728, + "loss": 1.5448, + "step": 3035 + }, + { + "epoch": 0.320253164556962, + "grad_norm": 0.8463830351829529, + "learning_rate": 0.0011649669840813645, + "loss": 1.6002, + "step": 3036 + }, + { + "epoch": 0.32035864978902956, + "grad_norm": 1.2092636823654175, + "learning_rate": 0.0011647578308981363, + "loss": 1.5803, + "step": 3037 + }, + { + "epoch": 0.32046413502109705, + "grad_norm": 0.8496508002281189, + "learning_rate": 0.001164548631239225, + "loss": 1.5128, + "step": 3038 + }, + { + "epoch": 0.32056962025316454, + "grad_norm": 1.0744808912277222, + "learning_rate": 0.0011643393851280724, + "loss": 1.5398, + "step": 3039 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.7893195748329163, + "learning_rate": 0.0011641300925881257, + "loss": 1.5184, + "step": 3040 + }, + { + "epoch": 0.3207805907172996, + "grad_norm": 0.865130603313446, + "learning_rate": 0.001163920753642837, + "loss": 1.5447, + "step": 3041 + }, + { + "epoch": 0.3208860759493671, + "grad_norm": 0.7137648463249207, + "learning_rate": 0.001163711368315664, + "loss": 1.5301, + "step": 3042 + }, + { + "epoch": 0.3209915611814346, + "grad_norm": 0.7271702885627747, + "learning_rate": 0.001163501936630069, + "loss": 1.5619, + "step": 3043 + }, + { + "epoch": 0.3210970464135021, + "grad_norm": 0.7242078185081482, + "learning_rate": 0.0011632924586095204, + "loss": 1.5449, + "step": 3044 + }, + { + "epoch": 0.3212025316455696, + "grad_norm": 0.6890426278114319, + "learning_rate": 0.0011630829342774906, + "loss": 1.5399, + "step": 3045 + }, + { + "epoch": 0.3213080168776371, + "grad_norm": 0.705839991569519, + "learning_rate": 0.0011628733636574586, + "loss": 1.5842, + "step": 3046 + }, + { + "epoch": 0.32141350210970465, + "grad_norm": 0.693666398525238, + "learning_rate": 0.0011626637467729072, + "loss": 1.5015, + "step": 3047 + }, + { + "epoch": 0.32151898734177214, + "grad_norm": 0.6142423748970032, + "learning_rate": 0.0011624540836473252, + "loss": 1.5211, + "step": 3048 + }, + { + "epoch": 0.32162447257383964, + "grad_norm": 0.6300686597824097, + "learning_rate": 0.0011622443743042065, + "loss": 1.5185, + "step": 3049 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.6566272974014282, + "learning_rate": 0.0011620346187670501, + "loss": 1.5508, + "step": 3050 + }, + { + "epoch": 0.3218354430379747, + "grad_norm": 0.6733558177947998, + "learning_rate": 0.0011618248170593597, + "loss": 1.5279, + "step": 3051 + }, + { + "epoch": 0.32194092827004217, + "grad_norm": 0.6609898805618286, + "learning_rate": 0.0011616149692046454, + "loss": 1.5211, + "step": 3052 + }, + { + "epoch": 0.3220464135021097, + "grad_norm": 0.6482734084129333, + "learning_rate": 0.0011614050752264216, + "loss": 1.5619, + "step": 3053 + }, + { + "epoch": 0.3221518987341772, + "grad_norm": 0.6196478009223938, + "learning_rate": 0.0011611951351482071, + "loss": 1.5436, + "step": 3054 + }, + { + "epoch": 0.3222573839662447, + "grad_norm": 0.6331177353858948, + "learning_rate": 0.0011609851489935274, + "loss": 1.571, + "step": 3055 + }, + { + "epoch": 0.32236286919831225, + "grad_norm": 0.6569607257843018, + "learning_rate": 0.0011607751167859125, + "loss": 1.5621, + "step": 3056 + }, + { + "epoch": 0.32246835443037974, + "grad_norm": 0.6120293140411377, + "learning_rate": 0.0011605650385488977, + "loss": 1.5494, + "step": 3057 + }, + { + "epoch": 0.32257383966244724, + "grad_norm": 0.6356054544448853, + "learning_rate": 0.0011603549143060225, + "loss": 1.5401, + "step": 3058 + }, + { + "epoch": 0.3226793248945148, + "grad_norm": 0.6445911526679993, + "learning_rate": 0.0011601447440808335, + "loss": 1.5565, + "step": 3059 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.6419097781181335, + "learning_rate": 0.0011599345278968806, + "loss": 1.5622, + "step": 3060 + }, + { + "epoch": 0.32289029535864977, + "grad_norm": 0.6644342541694641, + "learning_rate": 0.0011597242657777195, + "loss": 1.5369, + "step": 3061 + }, + { + "epoch": 0.3229957805907173, + "grad_norm": 0.6110647320747375, + "learning_rate": 0.0011595139577469115, + "loss": 1.5273, + "step": 3062 + }, + { + "epoch": 0.3231012658227848, + "grad_norm": 0.7249144315719604, + "learning_rate": 0.0011593036038280225, + "loss": 1.5461, + "step": 3063 + }, + { + "epoch": 0.3232067510548523, + "grad_norm": 0.6726880073547363, + "learning_rate": 0.0011590932040446236, + "loss": 1.5525, + "step": 3064 + }, + { + "epoch": 0.32331223628691985, + "grad_norm": 0.6310129165649414, + "learning_rate": 0.0011588827584202914, + "loss": 1.5209, + "step": 3065 + }, + { + "epoch": 0.32341772151898734, + "grad_norm": 0.6159003973007202, + "learning_rate": 0.0011586722669786073, + "loss": 1.5472, + "step": 3066 + }, + { + "epoch": 0.32352320675105484, + "grad_norm": 0.6553379893302917, + "learning_rate": 0.0011584617297431578, + "loss": 1.5598, + "step": 3067 + }, + { + "epoch": 0.3236286919831224, + "grad_norm": 0.6134664416313171, + "learning_rate": 0.0011582511467375346, + "loss": 1.5387, + "step": 3068 + }, + { + "epoch": 0.3237341772151899, + "grad_norm": 0.6684057712554932, + "learning_rate": 0.001158040517985335, + "loss": 1.5472, + "step": 3069 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.6215099692344666, + "learning_rate": 0.0011578298435101604, + "loss": 1.5721, + "step": 3070 + }, + { + "epoch": 0.3239451476793249, + "grad_norm": 0.6944586634635925, + "learning_rate": 0.0011576191233356181, + "loss": 1.5783, + "step": 3071 + }, + { + "epoch": 0.3240506329113924, + "grad_norm": 0.6357558369636536, + "learning_rate": 0.0011574083574853208, + "loss": 1.5049, + "step": 3072 + }, + { + "epoch": 0.3241561181434599, + "grad_norm": 0.6565810441970825, + "learning_rate": 0.0011571975459828852, + "loss": 1.5448, + "step": 3073 + }, + { + "epoch": 0.32426160337552745, + "grad_norm": 0.6935651898384094, + "learning_rate": 0.0011569866888519343, + "loss": 1.5549, + "step": 3074 + }, + { + "epoch": 0.32436708860759494, + "grad_norm": 0.6692018508911133, + "learning_rate": 0.0011567757861160955, + "loss": 1.5122, + "step": 3075 + }, + { + "epoch": 0.32447257383966244, + "grad_norm": 0.6873802542686462, + "learning_rate": 0.0011565648377990017, + "loss": 1.5116, + "step": 3076 + }, + { + "epoch": 0.32457805907173, + "grad_norm": 0.7646661996841431, + "learning_rate": 0.0011563538439242902, + "loss": 1.5162, + "step": 3077 + }, + { + "epoch": 0.3246835443037975, + "grad_norm": 0.6794949769973755, + "learning_rate": 0.0011561428045156043, + "loss": 1.5223, + "step": 3078 + }, + { + "epoch": 0.32478902953586497, + "grad_norm": 0.8908830881118774, + "learning_rate": 0.001155931719596592, + "loss": 1.5415, + "step": 3079 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.807523787021637, + "learning_rate": 0.0011557205891909062, + "loss": 1.5816, + "step": 3080 + }, + { + "epoch": 0.325, + "grad_norm": 0.670113742351532, + "learning_rate": 0.0011555094133222053, + "loss": 1.489, + "step": 3081 + }, + { + "epoch": 0.3251054852320675, + "grad_norm": 0.704095721244812, + "learning_rate": 0.0011552981920141528, + "loss": 1.5577, + "step": 3082 + }, + { + "epoch": 0.325210970464135, + "grad_norm": 0.6430875658988953, + "learning_rate": 0.0011550869252904166, + "loss": 1.5534, + "step": 3083 + }, + { + "epoch": 0.32531645569620254, + "grad_norm": 0.7441636323928833, + "learning_rate": 0.0011548756131746706, + "loss": 1.555, + "step": 3084 + }, + { + "epoch": 0.32542194092827004, + "grad_norm": 0.7256618142127991, + "learning_rate": 0.0011546642556905934, + "loss": 1.5727, + "step": 3085 + }, + { + "epoch": 0.32552742616033753, + "grad_norm": 0.6221132278442383, + "learning_rate": 0.0011544528528618682, + "loss": 1.5464, + "step": 3086 + }, + { + "epoch": 0.3256329113924051, + "grad_norm": 0.6948984861373901, + "learning_rate": 0.0011542414047121842, + "loss": 1.6018, + "step": 3087 + }, + { + "epoch": 0.32573839662447257, + "grad_norm": 0.604604959487915, + "learning_rate": 0.0011540299112652351, + "loss": 1.5286, + "step": 3088 + }, + { + "epoch": 0.32584388185654006, + "grad_norm": 0.80336594581604, + "learning_rate": 0.00115381837254472, + "loss": 1.5492, + "step": 3089 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.6513827443122864, + "learning_rate": 0.0011536067885743423, + "loss": 1.5672, + "step": 3090 + }, + { + "epoch": 0.3260548523206751, + "grad_norm": 0.7401115894317627, + "learning_rate": 0.0011533951593778115, + "loss": 1.5612, + "step": 3091 + }, + { + "epoch": 0.3261603375527426, + "grad_norm": 0.7328891754150391, + "learning_rate": 0.0011531834849788417, + "loss": 1.5289, + "step": 3092 + }, + { + "epoch": 0.32626582278481014, + "grad_norm": 0.6867386698722839, + "learning_rate": 0.0011529717654011518, + "loss": 1.5163, + "step": 3093 + }, + { + "epoch": 0.32637130801687764, + "grad_norm": 0.8483149409294128, + "learning_rate": 0.001152760000668466, + "loss": 1.5373, + "step": 3094 + }, + { + "epoch": 0.32647679324894513, + "grad_norm": 0.6409226655960083, + "learning_rate": 0.001152548190804514, + "loss": 1.5351, + "step": 3095 + }, + { + "epoch": 0.3265822784810127, + "grad_norm": 0.8768031001091003, + "learning_rate": 0.0011523363358330301, + "loss": 1.5454, + "step": 3096 + }, + { + "epoch": 0.32668776371308017, + "grad_norm": 0.8384304642677307, + "learning_rate": 0.0011521244357777533, + "loss": 1.5224, + "step": 3097 + }, + { + "epoch": 0.32679324894514766, + "grad_norm": 0.7239202857017517, + "learning_rate": 0.0011519124906624284, + "loss": 1.56, + "step": 3098 + }, + { + "epoch": 0.3268987341772152, + "grad_norm": 1.0539027452468872, + "learning_rate": 0.0011517005005108048, + "loss": 1.5359, + "step": 3099 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.653710663318634, + "learning_rate": 0.001151488465346637, + "loss": 1.5586, + "step": 3100 + }, + { + "epoch": 0.3271097046413502, + "grad_norm": 0.8788576126098633, + "learning_rate": 0.0011512763851936848, + "loss": 1.5522, + "step": 3101 + }, + { + "epoch": 0.32721518987341774, + "grad_norm": 0.6568320393562317, + "learning_rate": 0.0011510642600757123, + "loss": 1.5559, + "step": 3102 + }, + { + "epoch": 0.32732067510548524, + "grad_norm": 1.0043244361877441, + "learning_rate": 0.00115085209001649, + "loss": 1.5576, + "step": 3103 + }, + { + "epoch": 0.32742616033755273, + "grad_norm": 0.6301849484443665, + "learning_rate": 0.0011506398750397919, + "loss": 1.523, + "step": 3104 + }, + { + "epoch": 0.3275316455696203, + "grad_norm": 1.0228606462478638, + "learning_rate": 0.0011504276151693984, + "loss": 1.5423, + "step": 3105 + }, + { + "epoch": 0.32763713080168777, + "grad_norm": 0.7095860838890076, + "learning_rate": 0.0011502153104290937, + "loss": 1.5505, + "step": 3106 + }, + { + "epoch": 0.32774261603375526, + "grad_norm": 0.7019138336181641, + "learning_rate": 0.0011500029608426676, + "loss": 1.5296, + "step": 3107 + }, + { + "epoch": 0.3278481012658228, + "grad_norm": 0.6186330914497375, + "learning_rate": 0.0011497905664339153, + "loss": 1.5216, + "step": 3108 + }, + { + "epoch": 0.3279535864978903, + "grad_norm": 0.6640079021453857, + "learning_rate": 0.0011495781272266366, + "loss": 1.5503, + "step": 3109 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.6735005974769592, + "learning_rate": 0.0011493656432446362, + "loss": 1.5468, + "step": 3110 + }, + { + "epoch": 0.3281645569620253, + "grad_norm": 0.6656011939048767, + "learning_rate": 0.0011491531145117243, + "loss": 1.5604, + "step": 3111 + }, + { + "epoch": 0.32827004219409284, + "grad_norm": 0.6525481939315796, + "learning_rate": 0.0011489405410517151, + "loss": 1.5484, + "step": 3112 + }, + { + "epoch": 0.32837552742616033, + "grad_norm": 0.812107503414154, + "learning_rate": 0.0011487279228884293, + "loss": 1.5412, + "step": 3113 + }, + { + "epoch": 0.3284810126582278, + "grad_norm": 0.7858553528785706, + "learning_rate": 0.0011485152600456913, + "loss": 1.555, + "step": 3114 + }, + { + "epoch": 0.32858649789029537, + "grad_norm": 0.6354445815086365, + "learning_rate": 0.0011483025525473314, + "loss": 1.5345, + "step": 3115 + }, + { + "epoch": 0.32869198312236286, + "grad_norm": 0.6949388980865479, + "learning_rate": 0.001148089800417184, + "loss": 1.5204, + "step": 3116 + }, + { + "epoch": 0.32879746835443036, + "grad_norm": 0.6744472980499268, + "learning_rate": 0.00114787700367909, + "loss": 1.5404, + "step": 3117 + }, + { + "epoch": 0.3289029535864979, + "grad_norm": 0.6644640564918518, + "learning_rate": 0.0011476641623568934, + "loss": 1.5159, + "step": 3118 + }, + { + "epoch": 0.3290084388185654, + "grad_norm": 0.7097163200378418, + "learning_rate": 0.0011474512764744445, + "loss": 1.5496, + "step": 3119 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.6485841274261475, + "learning_rate": 0.0011472383460555983, + "loss": 1.5922, + "step": 3120 + }, + { + "epoch": 0.32921940928270044, + "grad_norm": 0.6594301462173462, + "learning_rate": 0.0011470253711242146, + "loss": 1.5289, + "step": 3121 + }, + { + "epoch": 0.32932489451476793, + "grad_norm": 0.6251650452613831, + "learning_rate": 0.001146812351704158, + "loss": 1.5241, + "step": 3122 + }, + { + "epoch": 0.3294303797468354, + "grad_norm": 0.622168779373169, + "learning_rate": 0.001146599287819299, + "loss": 1.5954, + "step": 3123 + }, + { + "epoch": 0.32953586497890297, + "grad_norm": 0.6470073461532593, + "learning_rate": 0.0011463861794935122, + "loss": 1.5268, + "step": 3124 + }, + { + "epoch": 0.32964135021097046, + "grad_norm": 0.6808436512947083, + "learning_rate": 0.0011461730267506775, + "loss": 1.5753, + "step": 3125 + }, + { + "epoch": 0.32974683544303796, + "grad_norm": 0.6106361150741577, + "learning_rate": 0.0011459598296146795, + "loss": 1.5555, + "step": 3126 + }, + { + "epoch": 0.3298523206751055, + "grad_norm": 0.8087892532348633, + "learning_rate": 0.001145746588109408, + "loss": 1.5565, + "step": 3127 + }, + { + "epoch": 0.329957805907173, + "grad_norm": 0.7026957869529724, + "learning_rate": 0.0011455333022587582, + "loss": 1.5466, + "step": 3128 + }, + { + "epoch": 0.3300632911392405, + "grad_norm": 0.68568354845047, + "learning_rate": 0.0011453199720866296, + "loss": 1.5741, + "step": 3129 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6818012595176697, + "learning_rate": 0.001145106597616927, + "loss": 1.5626, + "step": 3130 + }, + { + "epoch": 0.33027426160337553, + "grad_norm": 0.6869688034057617, + "learning_rate": 0.0011448931788735595, + "loss": 1.5249, + "step": 3131 + }, + { + "epoch": 0.330379746835443, + "grad_norm": 0.7850370407104492, + "learning_rate": 0.0011446797158804426, + "loss": 1.5534, + "step": 3132 + }, + { + "epoch": 0.33048523206751057, + "grad_norm": 0.6992339491844177, + "learning_rate": 0.0011444662086614952, + "loss": 1.4888, + "step": 3133 + }, + { + "epoch": 0.33059071729957806, + "grad_norm": 0.6764273643493652, + "learning_rate": 0.0011442526572406422, + "loss": 1.572, + "step": 3134 + }, + { + "epoch": 0.33069620253164556, + "grad_norm": 0.627564013004303, + "learning_rate": 0.001144039061641813, + "loss": 1.4958, + "step": 3135 + }, + { + "epoch": 0.3308016877637131, + "grad_norm": 0.6691213846206665, + "learning_rate": 0.0011438254218889422, + "loss": 1.5185, + "step": 3136 + }, + { + "epoch": 0.3309071729957806, + "grad_norm": 0.6257817149162292, + "learning_rate": 0.0011436117380059692, + "loss": 1.502, + "step": 3137 + }, + { + "epoch": 0.3310126582278481, + "grad_norm": 0.6458118557929993, + "learning_rate": 0.0011433980100168382, + "loss": 1.5679, + "step": 3138 + }, + { + "epoch": 0.33111814345991564, + "grad_norm": 0.7295764088630676, + "learning_rate": 0.0011431842379454982, + "loss": 1.546, + "step": 3139 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.6601176857948303, + "learning_rate": 0.001142970421815904, + "loss": 1.5309, + "step": 3140 + }, + { + "epoch": 0.3313291139240506, + "grad_norm": 0.6338415741920471, + "learning_rate": 0.0011427565616520144, + "loss": 1.5706, + "step": 3141 + }, + { + "epoch": 0.33143459915611817, + "grad_norm": 0.6341627836227417, + "learning_rate": 0.0011425426574777936, + "loss": 1.5172, + "step": 3142 + }, + { + "epoch": 0.33154008438818566, + "grad_norm": 0.6421421766281128, + "learning_rate": 0.0011423287093172106, + "loss": 1.5586, + "step": 3143 + }, + { + "epoch": 0.33164556962025316, + "grad_norm": 0.6345044374465942, + "learning_rate": 0.0011421147171942398, + "loss": 1.5588, + "step": 3144 + }, + { + "epoch": 0.33175105485232065, + "grad_norm": 0.6224480271339417, + "learning_rate": 0.0011419006811328593, + "loss": 1.5679, + "step": 3145 + }, + { + "epoch": 0.3318565400843882, + "grad_norm": 0.6528698801994324, + "learning_rate": 0.0011416866011570534, + "loss": 1.5344, + "step": 3146 + }, + { + "epoch": 0.3319620253164557, + "grad_norm": 0.6867148876190186, + "learning_rate": 0.0011414724772908105, + "loss": 1.5258, + "step": 3147 + }, + { + "epoch": 0.3320675105485232, + "grad_norm": 0.6822651624679565, + "learning_rate": 0.0011412583095581248, + "loss": 1.5338, + "step": 3148 + }, + { + "epoch": 0.33217299578059073, + "grad_norm": 0.7565330862998962, + "learning_rate": 0.0011410440979829942, + "loss": 1.5236, + "step": 3149 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.6442954540252686, + "learning_rate": 0.0011408298425894226, + "loss": 1.531, + "step": 3150 + }, + { + "epoch": 0.3323839662447257, + "grad_norm": 0.9162479043006897, + "learning_rate": 0.0011406155434014185, + "loss": 1.5484, + "step": 3151 + }, + { + "epoch": 0.33248945147679326, + "grad_norm": 0.7768605947494507, + "learning_rate": 0.0011404012004429948, + "loss": 1.5426, + "step": 3152 + }, + { + "epoch": 0.33259493670886076, + "grad_norm": 0.6673774123191833, + "learning_rate": 0.00114018681373817, + "loss": 1.5567, + "step": 3153 + }, + { + "epoch": 0.33270042194092825, + "grad_norm": 0.7110280990600586, + "learning_rate": 0.001139972383310967, + "loss": 1.5453, + "step": 3154 + }, + { + "epoch": 0.3328059071729958, + "grad_norm": 0.7153448462486267, + "learning_rate": 0.0011397579091854137, + "loss": 1.548, + "step": 3155 + }, + { + "epoch": 0.3329113924050633, + "grad_norm": 0.76359623670578, + "learning_rate": 0.0011395433913855434, + "loss": 1.5325, + "step": 3156 + }, + { + "epoch": 0.3330168776371308, + "grad_norm": 0.6350100040435791, + "learning_rate": 0.0011393288299353934, + "loss": 1.5371, + "step": 3157 + }, + { + "epoch": 0.33312236286919833, + "grad_norm": 0.7038780450820923, + "learning_rate": 0.001139114224859007, + "loss": 1.5139, + "step": 3158 + }, + { + "epoch": 0.3332278481012658, + "grad_norm": 0.6652238965034485, + "learning_rate": 0.0011388995761804311, + "loss": 1.5608, + "step": 3159 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.6397727727890015, + "learning_rate": 0.0011386848839237186, + "loss": 1.5562, + "step": 3160 + }, + { + "epoch": 0.33343881856540086, + "grad_norm": 0.7744811773300171, + "learning_rate": 0.0011384701481129266, + "loss": 1.5487, + "step": 3161 + }, + { + "epoch": 0.33354430379746836, + "grad_norm": 1.0400980710983276, + "learning_rate": 0.0011382553687721174, + "loss": 1.5508, + "step": 3162 + }, + { + "epoch": 0.33364978902953585, + "grad_norm": 0.6877576112747192, + "learning_rate": 0.0011380405459253582, + "loss": 1.543, + "step": 3163 + }, + { + "epoch": 0.3337552742616034, + "grad_norm": 0.990234911441803, + "learning_rate": 0.0011378256795967208, + "loss": 1.5535, + "step": 3164 + }, + { + "epoch": 0.3338607594936709, + "grad_norm": 0.9190544486045837, + "learning_rate": 0.0011376107698102822, + "loss": 1.5367, + "step": 3165 + }, + { + "epoch": 0.3339662447257384, + "grad_norm": 0.7344247102737427, + "learning_rate": 0.001137395816590124, + "loss": 1.5915, + "step": 3166 + }, + { + "epoch": 0.33407172995780593, + "grad_norm": 0.8766541481018066, + "learning_rate": 0.001137180819960333, + "loss": 1.5142, + "step": 3167 + }, + { + "epoch": 0.3341772151898734, + "grad_norm": 0.736033022403717, + "learning_rate": 0.0011369657799450005, + "loss": 1.5529, + "step": 3168 + }, + { + "epoch": 0.3342827004219409, + "grad_norm": 0.7549515962600708, + "learning_rate": 0.0011367506965682225, + "loss": 1.5675, + "step": 3169 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.7309844493865967, + "learning_rate": 0.0011365355698541005, + "loss": 1.499, + "step": 3170 + }, + { + "epoch": 0.33449367088607596, + "grad_norm": 0.7090237736701965, + "learning_rate": 0.0011363203998267406, + "loss": 1.5365, + "step": 3171 + }, + { + "epoch": 0.33459915611814345, + "grad_norm": 0.9035194516181946, + "learning_rate": 0.0011361051865102533, + "loss": 1.5319, + "step": 3172 + }, + { + "epoch": 0.334704641350211, + "grad_norm": 0.7579817175865173, + "learning_rate": 0.0011358899299287546, + "loss": 1.5388, + "step": 3173 + }, + { + "epoch": 0.3348101265822785, + "grad_norm": 0.7272403240203857, + "learning_rate": 0.0011356746301063652, + "loss": 1.5596, + "step": 3174 + }, + { + "epoch": 0.334915611814346, + "grad_norm": 0.6853928565979004, + "learning_rate": 0.0011354592870672104, + "loss": 1.5404, + "step": 3175 + }, + { + "epoch": 0.33502109704641353, + "grad_norm": 0.6366732716560364, + "learning_rate": 0.0011352439008354201, + "loss": 1.5304, + "step": 3176 + }, + { + "epoch": 0.335126582278481, + "grad_norm": 0.6683284044265747, + "learning_rate": 0.0011350284714351298, + "loss": 1.5466, + "step": 3177 + }, + { + "epoch": 0.3352320675105485, + "grad_norm": 0.6882032155990601, + "learning_rate": 0.0011348129988904797, + "loss": 1.5374, + "step": 3178 + }, + { + "epoch": 0.335337552742616, + "grad_norm": 0.672110378742218, + "learning_rate": 0.0011345974832256138, + "loss": 1.5052, + "step": 3179 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.7408323884010315, + "learning_rate": 0.0011343819244646824, + "loss": 1.5768, + "step": 3180 + }, + { + "epoch": 0.33554852320675105, + "grad_norm": 0.9268167614936829, + "learning_rate": 0.0011341663226318395, + "loss": 1.5123, + "step": 3181 + }, + { + "epoch": 0.33565400843881854, + "grad_norm": 0.7960177063941956, + "learning_rate": 0.0011339506777512446, + "loss": 1.5179, + "step": 3182 + }, + { + "epoch": 0.3357594936708861, + "grad_norm": 0.6560107469558716, + "learning_rate": 0.0011337349898470617, + "loss": 1.5425, + "step": 3183 + }, + { + "epoch": 0.3358649789029536, + "grad_norm": 0.7000514268875122, + "learning_rate": 0.0011335192589434597, + "loss": 1.5406, + "step": 3184 + }, + { + "epoch": 0.3359704641350211, + "grad_norm": 0.6407938599586487, + "learning_rate": 0.0011333034850646124, + "loss": 1.5427, + "step": 3185 + }, + { + "epoch": 0.3360759493670886, + "grad_norm": 0.6517983675003052, + "learning_rate": 0.0011330876682346981, + "loss": 1.5335, + "step": 3186 + }, + { + "epoch": 0.3361814345991561, + "grad_norm": 0.6285250782966614, + "learning_rate": 0.0011328718084779004, + "loss": 1.5573, + "step": 3187 + }, + { + "epoch": 0.3362869198312236, + "grad_norm": 0.7677373886108398, + "learning_rate": 0.0011326559058184075, + "loss": 1.5472, + "step": 3188 + }, + { + "epoch": 0.33639240506329116, + "grad_norm": 0.6366485953330994, + "learning_rate": 0.001132439960280412, + "loss": 1.5164, + "step": 3189 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.9016680717468262, + "learning_rate": 0.001132223971888112, + "loss": 1.5747, + "step": 3190 + }, + { + "epoch": 0.33660337552742614, + "grad_norm": 0.600764274597168, + "learning_rate": 0.0011320079406657102, + "loss": 1.5304, + "step": 3191 + }, + { + "epoch": 0.3367088607594937, + "grad_norm": 0.8468356132507324, + "learning_rate": 0.0011317918666374138, + "loss": 1.5165, + "step": 3192 + }, + { + "epoch": 0.3368143459915612, + "grad_norm": 0.6966841220855713, + "learning_rate": 0.0011315757498274349, + "loss": 1.579, + "step": 3193 + }, + { + "epoch": 0.3369198312236287, + "grad_norm": 0.7967231273651123, + "learning_rate": 0.0011313595902599904, + "loss": 1.5648, + "step": 3194 + }, + { + "epoch": 0.3370253164556962, + "grad_norm": 0.7213720679283142, + "learning_rate": 0.0011311433879593023, + "loss": 1.5065, + "step": 3195 + }, + { + "epoch": 0.3371308016877637, + "grad_norm": 0.7420600652694702, + "learning_rate": 0.001130927142949597, + "loss": 1.5723, + "step": 3196 + }, + { + "epoch": 0.3372362869198312, + "grad_norm": 0.7363607287406921, + "learning_rate": 0.001130710855255106, + "loss": 1.4777, + "step": 3197 + }, + { + "epoch": 0.33734177215189876, + "grad_norm": 0.7270006537437439, + "learning_rate": 0.001130494524900065, + "loss": 1.5375, + "step": 3198 + }, + { + "epoch": 0.33744725738396625, + "grad_norm": 0.7564767599105835, + "learning_rate": 0.0011302781519087154, + "loss": 1.5002, + "step": 3199 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.7602567672729492, + "learning_rate": 0.0011300617363053024, + "loss": 1.5482, + "step": 3200 + }, + { + "epoch": 0.3376582278481013, + "grad_norm": 0.663077175617218, + "learning_rate": 0.0011298452781140769, + "loss": 1.5404, + "step": 3201 + }, + { + "epoch": 0.3377637130801688, + "grad_norm": 0.7205761075019836, + "learning_rate": 0.0011296287773592938, + "loss": 1.5628, + "step": 3202 + }, + { + "epoch": 0.3378691983122363, + "grad_norm": 0.7866820096969604, + "learning_rate": 0.0011294122340652132, + "loss": 1.5651, + "step": 3203 + }, + { + "epoch": 0.3379746835443038, + "grad_norm": 0.7273674607276917, + "learning_rate": 0.0011291956482561, + "loss": 1.5516, + "step": 3204 + }, + { + "epoch": 0.3380801687763713, + "grad_norm": 0.6266056895256042, + "learning_rate": 0.0011289790199562233, + "loss": 1.5565, + "step": 3205 + }, + { + "epoch": 0.3381856540084388, + "grad_norm": 0.6211975812911987, + "learning_rate": 0.001128762349189858, + "loss": 1.5509, + "step": 3206 + }, + { + "epoch": 0.33829113924050636, + "grad_norm": 0.6069369912147522, + "learning_rate": 0.0011285456359812825, + "loss": 1.553, + "step": 3207 + }, + { + "epoch": 0.33839662447257385, + "grad_norm": 0.6260133981704712, + "learning_rate": 0.0011283288803547809, + "loss": 1.5116, + "step": 3208 + }, + { + "epoch": 0.33850210970464134, + "grad_norm": 0.6873946785926819, + "learning_rate": 0.0011281120823346418, + "loss": 1.5194, + "step": 3209 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.6233102083206177, + "learning_rate": 0.0011278952419451586, + "loss": 1.5383, + "step": 3210 + }, + { + "epoch": 0.3387130801687764, + "grad_norm": 0.7062705755233765, + "learning_rate": 0.0011276783592106291, + "loss": 1.5279, + "step": 3211 + }, + { + "epoch": 0.3388185654008439, + "grad_norm": 0.6377112865447998, + "learning_rate": 0.001127461434155356, + "loss": 1.5236, + "step": 3212 + }, + { + "epoch": 0.33892405063291137, + "grad_norm": 0.7974831461906433, + "learning_rate": 0.001127244466803647, + "loss": 1.5192, + "step": 3213 + }, + { + "epoch": 0.3390295358649789, + "grad_norm": 0.6615817546844482, + "learning_rate": 0.0011270274571798147, + "loss": 1.5668, + "step": 3214 + }, + { + "epoch": 0.3391350210970464, + "grad_norm": 0.708756685256958, + "learning_rate": 0.0011268104053081755, + "loss": 1.5426, + "step": 3215 + }, + { + "epoch": 0.3392405063291139, + "grad_norm": 0.6801034808158875, + "learning_rate": 0.0011265933112130516, + "loss": 1.5469, + "step": 3216 + }, + { + "epoch": 0.33934599156118145, + "grad_norm": 0.6725704073905945, + "learning_rate": 0.0011263761749187693, + "loss": 1.5301, + "step": 3217 + }, + { + "epoch": 0.33945147679324894, + "grad_norm": 0.9994094967842102, + "learning_rate": 0.0011261589964496597, + "loss": 1.5426, + "step": 3218 + }, + { + "epoch": 0.33955696202531643, + "grad_norm": 0.8153029680252075, + "learning_rate": 0.001125941775830059, + "loss": 1.5169, + "step": 3219 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.746634304523468, + "learning_rate": 0.0011257245130843077, + "loss": 1.5466, + "step": 3220 + }, + { + "epoch": 0.3397679324894515, + "grad_norm": 0.7798997759819031, + "learning_rate": 0.0011255072082367512, + "loss": 1.5359, + "step": 3221 + }, + { + "epoch": 0.33987341772151897, + "grad_norm": 0.73301100730896, + "learning_rate": 0.0011252898613117394, + "loss": 1.5469, + "step": 3222 + }, + { + "epoch": 0.3399789029535865, + "grad_norm": 0.6318146586418152, + "learning_rate": 0.0011250724723336273, + "loss": 1.5477, + "step": 3223 + }, + { + "epoch": 0.340084388185654, + "grad_norm": 0.6826576590538025, + "learning_rate": 0.0011248550413267746, + "loss": 1.5096, + "step": 3224 + }, + { + "epoch": 0.3401898734177215, + "grad_norm": 0.6247354745864868, + "learning_rate": 0.001124637568315545, + "loss": 1.5373, + "step": 3225 + }, + { + "epoch": 0.34029535864978905, + "grad_norm": 0.7526631355285645, + "learning_rate": 0.001124420053324308, + "loss": 1.515, + "step": 3226 + }, + { + "epoch": 0.34040084388185654, + "grad_norm": 0.7460580468177795, + "learning_rate": 0.001124202496377437, + "loss": 1.5035, + "step": 3227 + }, + { + "epoch": 0.34050632911392403, + "grad_norm": 0.6749570369720459, + "learning_rate": 0.0011239848974993103, + "loss": 1.5162, + "step": 3228 + }, + { + "epoch": 0.3406118143459916, + "grad_norm": 0.6948545575141907, + "learning_rate": 0.0011237672567143107, + "loss": 1.5492, + "step": 3229 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.6657067537307739, + "learning_rate": 0.0011235495740468265, + "loss": 1.5474, + "step": 3230 + }, + { + "epoch": 0.34082278481012657, + "grad_norm": 0.7041597962379456, + "learning_rate": 0.00112333184952125, + "loss": 1.515, + "step": 3231 + }, + { + "epoch": 0.3409282700421941, + "grad_norm": 0.6783881187438965, + "learning_rate": 0.001123114083161978, + "loss": 1.5113, + "step": 3232 + }, + { + "epoch": 0.3410337552742616, + "grad_norm": 0.7136296033859253, + "learning_rate": 0.0011228962749934123, + "loss": 1.5198, + "step": 3233 + }, + { + "epoch": 0.3411392405063291, + "grad_norm": 0.6331042647361755, + "learning_rate": 0.0011226784250399598, + "loss": 1.5041, + "step": 3234 + }, + { + "epoch": 0.34124472573839665, + "grad_norm": 0.7761248350143433, + "learning_rate": 0.0011224605333260312, + "loss": 1.4988, + "step": 3235 + }, + { + "epoch": 0.34135021097046414, + "grad_norm": 0.6873414516448975, + "learning_rate": 0.0011222425998760428, + "loss": 1.543, + "step": 3236 + }, + { + "epoch": 0.34145569620253163, + "grad_norm": 0.7618035078048706, + "learning_rate": 0.0011220246247144149, + "loss": 1.5092, + "step": 3237 + }, + { + "epoch": 0.3415611814345992, + "grad_norm": 0.6012889742851257, + "learning_rate": 0.0011218066078655725, + "loss": 1.5063, + "step": 3238 + }, + { + "epoch": 0.3416666666666667, + "grad_norm": 0.7973775863647461, + "learning_rate": 0.001121588549353946, + "loss": 1.4979, + "step": 3239 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.7034768462181091, + "learning_rate": 0.0011213704492039694, + "loss": 1.5395, + "step": 3240 + }, + { + "epoch": 0.3418776371308017, + "grad_norm": 0.8386994004249573, + "learning_rate": 0.0011211523074400823, + "loss": 1.5268, + "step": 3241 + }, + { + "epoch": 0.3419831223628692, + "grad_norm": 0.8637974858283997, + "learning_rate": 0.0011209341240867282, + "loss": 1.5533, + "step": 3242 + }, + { + "epoch": 0.3420886075949367, + "grad_norm": 0.7657248973846436, + "learning_rate": 0.001120715899168356, + "loss": 1.5081, + "step": 3243 + }, + { + "epoch": 0.3421940928270042, + "grad_norm": 0.8047167062759399, + "learning_rate": 0.0011204976327094187, + "loss": 1.5583, + "step": 3244 + }, + { + "epoch": 0.34229957805907174, + "grad_norm": 0.7319309711456299, + "learning_rate": 0.0011202793247343742, + "loss": 1.5081, + "step": 3245 + }, + { + "epoch": 0.34240506329113923, + "grad_norm": 0.6422861814498901, + "learning_rate": 0.001120060975267685, + "loss": 1.532, + "step": 3246 + }, + { + "epoch": 0.3425105485232067, + "grad_norm": 0.6863867044448853, + "learning_rate": 0.0011198425843338183, + "loss": 1.5479, + "step": 3247 + }, + { + "epoch": 0.3426160337552743, + "grad_norm": 0.7164280414581299, + "learning_rate": 0.0011196241519572457, + "loss": 1.5573, + "step": 3248 + }, + { + "epoch": 0.34272151898734177, + "grad_norm": 0.6919922828674316, + "learning_rate": 0.001119405678162444, + "loss": 1.5362, + "step": 3249 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.6990582346916199, + "learning_rate": 0.001119187162973894, + "loss": 1.5606, + "step": 3250 + }, + { + "epoch": 0.3429324894514768, + "grad_norm": 0.9245643019676208, + "learning_rate": 0.0011189686064160811, + "loss": 1.5177, + "step": 3251 + }, + { + "epoch": 0.3430379746835443, + "grad_norm": 0.8079369068145752, + "learning_rate": 0.001118750008513496, + "loss": 1.5534, + "step": 3252 + }, + { + "epoch": 0.3431434599156118, + "grad_norm": 0.6693912744522095, + "learning_rate": 0.0011185313692906342, + "loss": 1.5588, + "step": 3253 + }, + { + "epoch": 0.34324894514767934, + "grad_norm": 0.8453589677810669, + "learning_rate": 0.0011183126887719945, + "loss": 1.5137, + "step": 3254 + }, + { + "epoch": 0.34335443037974683, + "grad_norm": 0.7063284516334534, + "learning_rate": 0.0011180939669820813, + "loss": 1.5032, + "step": 3255 + }, + { + "epoch": 0.3434599156118143, + "grad_norm": 0.6556695103645325, + "learning_rate": 0.001117875203945404, + "loss": 1.4936, + "step": 3256 + }, + { + "epoch": 0.3435654008438819, + "grad_norm": 0.6408578157424927, + "learning_rate": 0.0011176563996864754, + "loss": 1.5589, + "step": 3257 + }, + { + "epoch": 0.34367088607594937, + "grad_norm": 0.8495721817016602, + "learning_rate": 0.0011174375542298142, + "loss": 1.5181, + "step": 3258 + }, + { + "epoch": 0.34377637130801686, + "grad_norm": 0.8217025399208069, + "learning_rate": 0.0011172186675999425, + "loss": 1.5224, + "step": 3259 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.6558786630630493, + "learning_rate": 0.001116999739821388, + "loss": 1.5099, + "step": 3260 + }, + { + "epoch": 0.3439873417721519, + "grad_norm": 0.9160758256912231, + "learning_rate": 0.0011167807709186828, + "loss": 1.5563, + "step": 3261 + }, + { + "epoch": 0.3440928270042194, + "grad_norm": 0.7271402478218079, + "learning_rate": 0.0011165617609163632, + "loss": 1.5505, + "step": 3262 + }, + { + "epoch": 0.34419831223628694, + "grad_norm": 0.684756875038147, + "learning_rate": 0.0011163427098389706, + "loss": 1.5551, + "step": 3263 + }, + { + "epoch": 0.34430379746835443, + "grad_norm": 0.7176041007041931, + "learning_rate": 0.0011161236177110504, + "loss": 1.4781, + "step": 3264 + }, + { + "epoch": 0.3444092827004219, + "grad_norm": 0.6597453355789185, + "learning_rate": 0.0011159044845571533, + "loss": 1.5187, + "step": 3265 + }, + { + "epoch": 0.3445147679324895, + "grad_norm": 0.6376858949661255, + "learning_rate": 0.0011156853104018342, + "loss": 1.5653, + "step": 3266 + }, + { + "epoch": 0.34462025316455697, + "grad_norm": 0.6828319430351257, + "learning_rate": 0.0011154660952696525, + "loss": 1.567, + "step": 3267 + }, + { + "epoch": 0.34472573839662446, + "grad_norm": 0.6827604174613953, + "learning_rate": 0.0011152468391851724, + "loss": 1.5515, + "step": 3268 + }, + { + "epoch": 0.344831223628692, + "grad_norm": 0.6512935161590576, + "learning_rate": 0.0011150275421729628, + "loss": 1.5456, + "step": 3269 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.7029855251312256, + "learning_rate": 0.0011148082042575968, + "loss": 1.5539, + "step": 3270 + }, + { + "epoch": 0.345042194092827, + "grad_norm": 0.6320862174034119, + "learning_rate": 0.0011145888254636526, + "loss": 1.5159, + "step": 3271 + }, + { + "epoch": 0.34514767932489454, + "grad_norm": 0.7575724124908447, + "learning_rate": 0.0011143694058157122, + "loss": 1.5791, + "step": 3272 + }, + { + "epoch": 0.34525316455696203, + "grad_norm": 0.6283470392227173, + "learning_rate": 0.0011141499453383632, + "loss": 1.5491, + "step": 3273 + }, + { + "epoch": 0.3453586497890295, + "grad_norm": 0.7109419703483582, + "learning_rate": 0.001113930444056197, + "loss": 1.5436, + "step": 3274 + }, + { + "epoch": 0.3454641350210971, + "grad_norm": 0.6573193073272705, + "learning_rate": 0.00111371090199381, + "loss": 1.5585, + "step": 3275 + }, + { + "epoch": 0.34556962025316457, + "grad_norm": 0.6755927205085754, + "learning_rate": 0.0011134913191758024, + "loss": 1.5599, + "step": 3276 + }, + { + "epoch": 0.34567510548523206, + "grad_norm": 0.6504314541816711, + "learning_rate": 0.00111327169562678, + "loss": 1.5472, + "step": 3277 + }, + { + "epoch": 0.34578059071729955, + "grad_norm": 0.655506432056427, + "learning_rate": 0.0011130520313713528, + "loss": 1.4962, + "step": 3278 + }, + { + "epoch": 0.3458860759493671, + "grad_norm": 0.705448567867279, + "learning_rate": 0.0011128323264341352, + "loss": 1.555, + "step": 3279 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.673541247844696, + "learning_rate": 0.0011126125808397461, + "loss": 1.5457, + "step": 3280 + }, + { + "epoch": 0.3460970464135021, + "grad_norm": 0.6764629483222961, + "learning_rate": 0.0011123927946128092, + "loss": 1.5247, + "step": 3281 + }, + { + "epoch": 0.34620253164556963, + "grad_norm": 0.8312038779258728, + "learning_rate": 0.0011121729677779526, + "loss": 1.5362, + "step": 3282 + }, + { + "epoch": 0.3463080168776371, + "grad_norm": 0.9025267958641052, + "learning_rate": 0.001111953100359809, + "loss": 1.5033, + "step": 3283 + }, + { + "epoch": 0.3464135021097046, + "grad_norm": 0.7250370383262634, + "learning_rate": 0.0011117331923830157, + "loss": 1.5503, + "step": 3284 + }, + { + "epoch": 0.34651898734177217, + "grad_norm": 0.6310147047042847, + "learning_rate": 0.0011115132438722143, + "loss": 1.5377, + "step": 3285 + }, + { + "epoch": 0.34662447257383966, + "grad_norm": 0.7076604962348938, + "learning_rate": 0.0011112932548520513, + "loss": 1.5504, + "step": 3286 + }, + { + "epoch": 0.34672995780590715, + "grad_norm": 0.6759362816810608, + "learning_rate": 0.0011110732253471777, + "loss": 1.5916, + "step": 3287 + }, + { + "epoch": 0.3468354430379747, + "grad_norm": 0.6713531613349915, + "learning_rate": 0.0011108531553822485, + "loss": 1.4928, + "step": 3288 + }, + { + "epoch": 0.3469409282700422, + "grad_norm": 0.7555444240570068, + "learning_rate": 0.001110633044981924, + "loss": 1.501, + "step": 3289 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.983552873134613, + "learning_rate": 0.0011104128941708683, + "loss": 1.4684, + "step": 3290 + }, + { + "epoch": 0.34715189873417723, + "grad_norm": 0.7102998495101929, + "learning_rate": 0.001110192702973751, + "loss": 1.507, + "step": 3291 + }, + { + "epoch": 0.3472573839662447, + "grad_norm": 0.6412573456764221, + "learning_rate": 0.001109972471415245, + "loss": 1.5239, + "step": 3292 + }, + { + "epoch": 0.3473628691983122, + "grad_norm": 0.6274042129516602, + "learning_rate": 0.0011097521995200288, + "loss": 1.5277, + "step": 3293 + }, + { + "epoch": 0.34746835443037977, + "grad_norm": 0.6449210047721863, + "learning_rate": 0.0011095318873127844, + "loss": 1.5312, + "step": 3294 + }, + { + "epoch": 0.34757383966244726, + "grad_norm": 0.6346849203109741, + "learning_rate": 0.0011093115348181995, + "loss": 1.515, + "step": 3295 + }, + { + "epoch": 0.34767932489451475, + "grad_norm": 0.663745105266571, + "learning_rate": 0.0011090911420609654, + "loss": 1.5752, + "step": 3296 + }, + { + "epoch": 0.3477848101265823, + "grad_norm": 0.6536394357681274, + "learning_rate": 0.0011088707090657784, + "loss": 1.5351, + "step": 3297 + }, + { + "epoch": 0.3478902953586498, + "grad_norm": 0.6516134142875671, + "learning_rate": 0.0011086502358573387, + "loss": 1.5185, + "step": 3298 + }, + { + "epoch": 0.3479957805907173, + "grad_norm": 0.7745306491851807, + "learning_rate": 0.0011084297224603517, + "loss": 1.5488, + "step": 3299 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.6990113258361816, + "learning_rate": 0.001108209168899527, + "loss": 1.5484, + "step": 3300 + }, + { + "epoch": 0.3482067510548523, + "grad_norm": 0.6694206595420837, + "learning_rate": 0.0011079885751995788, + "loss": 1.5384, + "step": 3301 + }, + { + "epoch": 0.3483122362869198, + "grad_norm": 0.9219785928726196, + "learning_rate": 0.0011077679413852258, + "loss": 1.5176, + "step": 3302 + }, + { + "epoch": 0.34841772151898737, + "grad_norm": 0.9780604839324951, + "learning_rate": 0.0011075472674811908, + "loss": 1.5672, + "step": 3303 + }, + { + "epoch": 0.34852320675105486, + "grad_norm": 0.7416021227836609, + "learning_rate": 0.0011073265535122016, + "loss": 1.5319, + "step": 3304 + }, + { + "epoch": 0.34862869198312235, + "grad_norm": 0.764132022857666, + "learning_rate": 0.0011071057995029902, + "loss": 1.5587, + "step": 3305 + }, + { + "epoch": 0.3487341772151899, + "grad_norm": 0.7703163027763367, + "learning_rate": 0.0011068850054782933, + "loss": 1.5101, + "step": 3306 + }, + { + "epoch": 0.3488396624472574, + "grad_norm": 0.682451069355011, + "learning_rate": 0.0011066641714628522, + "loss": 1.537, + "step": 3307 + }, + { + "epoch": 0.3489451476793249, + "grad_norm": 0.6538662314414978, + "learning_rate": 0.001106443297481412, + "loss": 1.5454, + "step": 3308 + }, + { + "epoch": 0.3490506329113924, + "grad_norm": 0.6680108904838562, + "learning_rate": 0.001106222383558723, + "loss": 1.5371, + "step": 3309 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.6336672306060791, + "learning_rate": 0.0011060014297195396, + "loss": 1.5006, + "step": 3310 + }, + { + "epoch": 0.3492616033755274, + "grad_norm": 0.6724323630332947, + "learning_rate": 0.0011057804359886209, + "loss": 1.5236, + "step": 3311 + }, + { + "epoch": 0.3493670886075949, + "grad_norm": 0.6452396512031555, + "learning_rate": 0.0011055594023907302, + "loss": 1.5576, + "step": 3312 + }, + { + "epoch": 0.34947257383966246, + "grad_norm": 0.751010537147522, + "learning_rate": 0.0011053383289506354, + "loss": 1.4856, + "step": 3313 + }, + { + "epoch": 0.34957805907172995, + "grad_norm": 0.6177991628646851, + "learning_rate": 0.001105117215693109, + "loss": 1.56, + "step": 3314 + }, + { + "epoch": 0.34968354430379744, + "grad_norm": 0.701092541217804, + "learning_rate": 0.001104896062642928, + "loss": 1.5236, + "step": 3315 + }, + { + "epoch": 0.349789029535865, + "grad_norm": 0.6706386804580688, + "learning_rate": 0.001104674869824873, + "loss": 1.506, + "step": 3316 + }, + { + "epoch": 0.3498945147679325, + "grad_norm": 0.6558583378791809, + "learning_rate": 0.0011044536372637307, + "loss": 1.5673, + "step": 3317 + }, + { + "epoch": 0.35, + "grad_norm": 0.763805627822876, + "learning_rate": 0.001104232364984291, + "loss": 1.5638, + "step": 3318 + }, + { + "epoch": 0.3501054852320675, + "grad_norm": 0.6526801586151123, + "learning_rate": 0.001104011053011348, + "loss": 1.5258, + "step": 3319 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.697990357875824, + "learning_rate": 0.0011037897013697015, + "loss": 1.5434, + "step": 3320 + }, + { + "epoch": 0.3503164556962025, + "grad_norm": 0.6919389963150024, + "learning_rate": 0.0011035683100841548, + "loss": 1.4845, + "step": 3321 + }, + { + "epoch": 0.35042194092827006, + "grad_norm": 0.7124044895172119, + "learning_rate": 0.0011033468791795161, + "loss": 1.5235, + "step": 3322 + }, + { + "epoch": 0.35052742616033755, + "grad_norm": 0.7511786818504333, + "learning_rate": 0.0011031254086805973, + "loss": 1.5718, + "step": 3323 + }, + { + "epoch": 0.35063291139240504, + "grad_norm": 0.7304188013076782, + "learning_rate": 0.0011029038986122156, + "loss": 1.5049, + "step": 3324 + }, + { + "epoch": 0.3507383966244726, + "grad_norm": 0.7133219838142395, + "learning_rate": 0.0011026823489991924, + "loss": 1.5594, + "step": 3325 + }, + { + "epoch": 0.3508438818565401, + "grad_norm": 0.7483652234077454, + "learning_rate": 0.0011024607598663539, + "loss": 1.5094, + "step": 3326 + }, + { + "epoch": 0.3509493670886076, + "grad_norm": 0.6557940244674683, + "learning_rate": 0.001102239131238529, + "loss": 1.5369, + "step": 3327 + }, + { + "epoch": 0.3510548523206751, + "grad_norm": 0.7371218204498291, + "learning_rate": 0.0011020174631405533, + "loss": 1.5474, + "step": 3328 + }, + { + "epoch": 0.3511603375527426, + "grad_norm": 0.7100135087966919, + "learning_rate": 0.0011017957555972656, + "loss": 1.5414, + "step": 3329 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.6983302235603333, + "learning_rate": 0.0011015740086335092, + "loss": 1.4929, + "step": 3330 + }, + { + "epoch": 0.35137130801687766, + "grad_norm": 0.7368974089622498, + "learning_rate": 0.001101352222274132, + "loss": 1.5309, + "step": 3331 + }, + { + "epoch": 0.35147679324894515, + "grad_norm": 0.672901451587677, + "learning_rate": 0.0011011303965439863, + "loss": 1.5149, + "step": 3332 + }, + { + "epoch": 0.35158227848101264, + "grad_norm": 0.9977272748947144, + "learning_rate": 0.0011009085314679287, + "loss": 1.5842, + "step": 3333 + }, + { + "epoch": 0.3516877637130802, + "grad_norm": 1.1005685329437256, + "learning_rate": 0.0011006866270708204, + "loss": 1.5147, + "step": 3334 + }, + { + "epoch": 0.3517932489451477, + "grad_norm": 0.6778295040130615, + "learning_rate": 0.0011004646833775269, + "loss": 1.5043, + "step": 3335 + }, + { + "epoch": 0.3518987341772152, + "grad_norm": 0.8466988205909729, + "learning_rate": 0.0011002427004129184, + "loss": 1.526, + "step": 3336 + }, + { + "epoch": 0.3520042194092827, + "grad_norm": 0.9745672345161438, + "learning_rate": 0.0011000206782018683, + "loss": 1.5257, + "step": 3337 + }, + { + "epoch": 0.3521097046413502, + "grad_norm": 0.6416025161743164, + "learning_rate": 0.001099798616769256, + "loss": 1.523, + "step": 3338 + }, + { + "epoch": 0.3522151898734177, + "grad_norm": 0.7752589583396912, + "learning_rate": 0.0010995765161399646, + "loss": 1.5454, + "step": 3339 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.643466591835022, + "learning_rate": 0.0010993543763388814, + "loss": 1.5072, + "step": 3340 + }, + { + "epoch": 0.35242616033755275, + "grad_norm": 0.752571165561676, + "learning_rate": 0.0010991321973908982, + "loss": 1.4952, + "step": 3341 + }, + { + "epoch": 0.35253164556962024, + "grad_norm": 0.7045395374298096, + "learning_rate": 0.0010989099793209112, + "loss": 1.5472, + "step": 3342 + }, + { + "epoch": 0.35263713080168774, + "grad_norm": 0.625184953212738, + "learning_rate": 0.0010986877221538214, + "loss": 1.5047, + "step": 3343 + }, + { + "epoch": 0.3527426160337553, + "grad_norm": 0.7250201106071472, + "learning_rate": 0.0010984654259145335, + "loss": 1.5446, + "step": 3344 + }, + { + "epoch": 0.3528481012658228, + "grad_norm": 0.6288173198699951, + "learning_rate": 0.0010982430906279572, + "loss": 1.5491, + "step": 3345 + }, + { + "epoch": 0.35295358649789027, + "grad_norm": 0.6804786324501038, + "learning_rate": 0.001098020716319006, + "loss": 1.5159, + "step": 3346 + }, + { + "epoch": 0.3530590717299578, + "grad_norm": 0.6978651881217957, + "learning_rate": 0.0010977983030125982, + "loss": 1.5416, + "step": 3347 + }, + { + "epoch": 0.3531645569620253, + "grad_norm": 0.7997490167617798, + "learning_rate": 0.001097575850733656, + "loss": 1.5048, + "step": 3348 + }, + { + "epoch": 0.3532700421940928, + "grad_norm": 0.6823920607566833, + "learning_rate": 0.001097353359507107, + "loss": 1.4834, + "step": 3349 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.8280330300331116, + "learning_rate": 0.0010971308293578814, + "loss": 1.5202, + "step": 3350 + }, + { + "epoch": 0.35348101265822784, + "grad_norm": 0.7302618622779846, + "learning_rate": 0.0010969082603109158, + "loss": 1.5186, + "step": 3351 + }, + { + "epoch": 0.35358649789029534, + "grad_norm": 0.7440497875213623, + "learning_rate": 0.00109668565239115, + "loss": 1.557, + "step": 3352 + }, + { + "epoch": 0.3536919831223629, + "grad_norm": 0.7136513590812683, + "learning_rate": 0.001096463005623528, + "loss": 1.5198, + "step": 3353 + }, + { + "epoch": 0.3537974683544304, + "grad_norm": 0.7021432518959045, + "learning_rate": 0.0010962403200329984, + "loss": 1.4786, + "step": 3354 + }, + { + "epoch": 0.35390295358649787, + "grad_norm": 0.7371468544006348, + "learning_rate": 0.0010960175956445145, + "loss": 1.5411, + "step": 3355 + }, + { + "epoch": 0.3540084388185654, + "grad_norm": 0.623916506767273, + "learning_rate": 0.0010957948324830337, + "loss": 1.4997, + "step": 3356 + }, + { + "epoch": 0.3541139240506329, + "grad_norm": 0.7190854549407959, + "learning_rate": 0.0010955720305735176, + "loss": 1.5271, + "step": 3357 + }, + { + "epoch": 0.3542194092827004, + "grad_norm": 0.8046538233757019, + "learning_rate": 0.0010953491899409321, + "loss": 1.4982, + "step": 3358 + }, + { + "epoch": 0.35432489451476795, + "grad_norm": 0.6852995753288269, + "learning_rate": 0.001095126310610248, + "loss": 1.5237, + "step": 3359 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.6491893529891968, + "learning_rate": 0.0010949033926064397, + "loss": 1.542, + "step": 3360 + }, + { + "epoch": 0.35453586497890294, + "grad_norm": 0.6758633255958557, + "learning_rate": 0.0010946804359544867, + "loss": 1.5366, + "step": 3361 + }, + { + "epoch": 0.3546413502109705, + "grad_norm": 0.6821934580802917, + "learning_rate": 0.001094457440679372, + "loss": 1.5482, + "step": 3362 + }, + { + "epoch": 0.354746835443038, + "grad_norm": 0.7401184439659119, + "learning_rate": 0.0010942344068060833, + "loss": 1.5299, + "step": 3363 + }, + { + "epoch": 0.35485232067510547, + "grad_norm": 0.6130308508872986, + "learning_rate": 0.001094011334359613, + "loss": 1.5262, + "step": 3364 + }, + { + "epoch": 0.354957805907173, + "grad_norm": 0.6476510167121887, + "learning_rate": 0.0010937882233649572, + "loss": 1.4737, + "step": 3365 + }, + { + "epoch": 0.3550632911392405, + "grad_norm": 0.6920468211174011, + "learning_rate": 0.0010935650738471167, + "loss": 1.5269, + "step": 3366 + }, + { + "epoch": 0.355168776371308, + "grad_norm": 0.7365525364875793, + "learning_rate": 0.0010933418858310965, + "loss": 1.5406, + "step": 3367 + }, + { + "epoch": 0.35527426160337555, + "grad_norm": 0.716108500957489, + "learning_rate": 0.0010931186593419059, + "loss": 1.5278, + "step": 3368 + }, + { + "epoch": 0.35537974683544304, + "grad_norm": 0.6766468286514282, + "learning_rate": 0.0010928953944045585, + "loss": 1.5608, + "step": 3369 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.9181839227676392, + "learning_rate": 0.0010926720910440725, + "loss": 1.5337, + "step": 3370 + }, + { + "epoch": 0.3555907172995781, + "grad_norm": 0.8210980892181396, + "learning_rate": 0.00109244874928547, + "loss": 1.5307, + "step": 3371 + }, + { + "epoch": 0.3556962025316456, + "grad_norm": 0.6544942259788513, + "learning_rate": 0.0010922253691537773, + "loss": 1.5067, + "step": 3372 + }, + { + "epoch": 0.35580168776371307, + "grad_norm": 0.8772371411323547, + "learning_rate": 0.0010920019506740256, + "loss": 1.5069, + "step": 3373 + }, + { + "epoch": 0.35590717299578056, + "grad_norm": 0.6883706450462341, + "learning_rate": 0.00109177849387125, + "loss": 1.535, + "step": 3374 + }, + { + "epoch": 0.3560126582278481, + "grad_norm": 0.7723336219787598, + "learning_rate": 0.00109155499877049, + "loss": 1.5423, + "step": 3375 + }, + { + "epoch": 0.3561181434599156, + "grad_norm": 0.727180540561676, + "learning_rate": 0.001091331465396789, + "loss": 1.5484, + "step": 3376 + }, + { + "epoch": 0.3562236286919831, + "grad_norm": 0.7328897714614868, + "learning_rate": 0.0010911078937751954, + "loss": 1.5139, + "step": 3377 + }, + { + "epoch": 0.35632911392405064, + "grad_norm": 0.8364177346229553, + "learning_rate": 0.0010908842839307614, + "loss": 1.5274, + "step": 3378 + }, + { + "epoch": 0.35643459915611814, + "grad_norm": 0.6042446494102478, + "learning_rate": 0.0010906606358885437, + "loss": 1.5266, + "step": 3379 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.8128595352172852, + "learning_rate": 0.001090436949673603, + "loss": 1.5276, + "step": 3380 + }, + { + "epoch": 0.3566455696202532, + "grad_norm": 0.8433518409729004, + "learning_rate": 0.0010902132253110043, + "loss": 1.5137, + "step": 3381 + }, + { + "epoch": 0.35675105485232067, + "grad_norm": 0.6708933711051941, + "learning_rate": 0.0010899894628258174, + "loss": 1.4675, + "step": 3382 + }, + { + "epoch": 0.35685654008438816, + "grad_norm": 0.6566847562789917, + "learning_rate": 0.001089765662243116, + "loss": 1.5536, + "step": 3383 + }, + { + "epoch": 0.3569620253164557, + "grad_norm": 0.7874399423599243, + "learning_rate": 0.0010895418235879776, + "loss": 1.5398, + "step": 3384 + }, + { + "epoch": 0.3570675105485232, + "grad_norm": 0.6679413318634033, + "learning_rate": 0.0010893179468854848, + "loss": 1.4738, + "step": 3385 + }, + { + "epoch": 0.3571729957805907, + "grad_norm": 0.6399274468421936, + "learning_rate": 0.0010890940321607245, + "loss": 1.4909, + "step": 3386 + }, + { + "epoch": 0.35727848101265824, + "grad_norm": 0.6853614449501038, + "learning_rate": 0.0010888700794387867, + "loss": 1.5504, + "step": 3387 + }, + { + "epoch": 0.35738396624472574, + "grad_norm": 0.7162436842918396, + "learning_rate": 0.0010886460887447667, + "loss": 1.5396, + "step": 3388 + }, + { + "epoch": 0.35748945147679323, + "grad_norm": 0.7305517792701721, + "learning_rate": 0.0010884220601037637, + "loss": 1.5536, + "step": 3389 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.651797890663147, + "learning_rate": 0.0010881979935408815, + "loss": 1.5016, + "step": 3390 + }, + { + "epoch": 0.35770042194092827, + "grad_norm": 0.8080682754516602, + "learning_rate": 0.0010879738890812278, + "loss": 1.5188, + "step": 3391 + }, + { + "epoch": 0.35780590717299576, + "grad_norm": 1.0294581651687622, + "learning_rate": 0.0010877497467499146, + "loss": 1.5053, + "step": 3392 + }, + { + "epoch": 0.3579113924050633, + "grad_norm": 0.6540051698684692, + "learning_rate": 0.001087525566572058, + "loss": 1.4998, + "step": 3393 + }, + { + "epoch": 0.3580168776371308, + "grad_norm": 1.141606092453003, + "learning_rate": 0.0010873013485727782, + "loss": 1.4942, + "step": 3394 + }, + { + "epoch": 0.3581223628691983, + "grad_norm": 0.6952925324440002, + "learning_rate": 0.001087077092777201, + "loss": 1.5426, + "step": 3395 + }, + { + "epoch": 0.35822784810126584, + "grad_norm": 0.9322203993797302, + "learning_rate": 0.0010868527992104545, + "loss": 1.5369, + "step": 3396 + }, + { + "epoch": 0.35833333333333334, + "grad_norm": 0.9081634283065796, + "learning_rate": 0.001086628467897672, + "loss": 1.5158, + "step": 3397 + }, + { + "epoch": 0.35843881856540083, + "grad_norm": 0.6247507333755493, + "learning_rate": 0.0010864040988639912, + "loss": 1.5221, + "step": 3398 + }, + { + "epoch": 0.3585443037974684, + "grad_norm": 0.7763916850090027, + "learning_rate": 0.0010861796921345537, + "loss": 1.5667, + "step": 3399 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.6466494798660278, + "learning_rate": 0.0010859552477345052, + "loss": 1.5677, + "step": 3400 + }, + { + "epoch": 0.35875527426160336, + "grad_norm": 0.7377271056175232, + "learning_rate": 0.0010857307656889962, + "loss": 1.5511, + "step": 3401 + }, + { + "epoch": 0.3588607594936709, + "grad_norm": 0.6864738464355469, + "learning_rate": 0.0010855062460231807, + "loss": 1.5195, + "step": 3402 + }, + { + "epoch": 0.3589662447257384, + "grad_norm": 0.7517012357711792, + "learning_rate": 0.0010852816887622174, + "loss": 1.5655, + "step": 3403 + }, + { + "epoch": 0.3590717299578059, + "grad_norm": 0.7198408842086792, + "learning_rate": 0.0010850570939312687, + "loss": 1.5218, + "step": 3404 + }, + { + "epoch": 0.35917721518987344, + "grad_norm": 0.6464345455169678, + "learning_rate": 0.0010848324615555024, + "loss": 1.5113, + "step": 3405 + }, + { + "epoch": 0.35928270042194094, + "grad_norm": 0.6760526299476624, + "learning_rate": 0.0010846077916600888, + "loss": 1.5348, + "step": 3406 + }, + { + "epoch": 0.35938818565400843, + "grad_norm": 0.6929033994674683, + "learning_rate": 0.0010843830842702036, + "loss": 1.5107, + "step": 3407 + }, + { + "epoch": 0.3594936708860759, + "grad_norm": 0.7473846077919006, + "learning_rate": 0.0010841583394110266, + "loss": 1.5299, + "step": 3408 + }, + { + "epoch": 0.35959915611814347, + "grad_norm": 0.6189807057380676, + "learning_rate": 0.0010839335571077415, + "loss": 1.5299, + "step": 3409 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6577171087265015, + "learning_rate": 0.001083708737385536, + "loss": 1.5038, + "step": 3410 + }, + { + "epoch": 0.35981012658227846, + "grad_norm": 0.690686821937561, + "learning_rate": 0.0010834838802696023, + "loss": 1.5231, + "step": 3411 + }, + { + "epoch": 0.359915611814346, + "grad_norm": 0.6644322276115417, + "learning_rate": 0.0010832589857851373, + "loss": 1.5097, + "step": 3412 + }, + { + "epoch": 0.3600210970464135, + "grad_norm": 0.7337167859077454, + "learning_rate": 0.001083034053957341, + "loss": 1.5203, + "step": 3413 + }, + { + "epoch": 0.360126582278481, + "grad_norm": 0.6817553639411926, + "learning_rate": 0.0010828090848114182, + "loss": 1.483, + "step": 3414 + }, + { + "epoch": 0.36023206751054854, + "grad_norm": 0.6499770283699036, + "learning_rate": 0.001082584078372578, + "loss": 1.4804, + "step": 3415 + }, + { + "epoch": 0.36033755274261603, + "grad_norm": 0.6839283108711243, + "learning_rate": 0.0010823590346660335, + "loss": 1.5283, + "step": 3416 + }, + { + "epoch": 0.3604430379746835, + "grad_norm": 0.6406496167182922, + "learning_rate": 0.0010821339537170015, + "loss": 1.5189, + "step": 3417 + }, + { + "epoch": 0.36054852320675107, + "grad_norm": 0.700951337814331, + "learning_rate": 0.0010819088355507043, + "loss": 1.4861, + "step": 3418 + }, + { + "epoch": 0.36065400843881856, + "grad_norm": 0.8293127417564392, + "learning_rate": 0.0010816836801923666, + "loss": 1.4956, + "step": 3419 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.7274577617645264, + "learning_rate": 0.0010814584876672187, + "loss": 1.5086, + "step": 3420 + }, + { + "epoch": 0.3608649789029536, + "grad_norm": 0.6385715007781982, + "learning_rate": 0.0010812332580004947, + "loss": 1.5426, + "step": 3421 + }, + { + "epoch": 0.3609704641350211, + "grad_norm": 0.7679779529571533, + "learning_rate": 0.0010810079912174323, + "loss": 1.5604, + "step": 3422 + }, + { + "epoch": 0.3610759493670886, + "grad_norm": 0.7054489254951477, + "learning_rate": 0.001080782687343274, + "loss": 1.5249, + "step": 3423 + }, + { + "epoch": 0.36118143459915614, + "grad_norm": 0.683502733707428, + "learning_rate": 0.0010805573464032659, + "loss": 1.5377, + "step": 3424 + }, + { + "epoch": 0.36128691983122363, + "grad_norm": 0.6924149394035339, + "learning_rate": 0.0010803319684226593, + "loss": 1.5387, + "step": 3425 + }, + { + "epoch": 0.3613924050632911, + "grad_norm": 0.6616590023040771, + "learning_rate": 0.001080106553426708, + "loss": 1.4947, + "step": 3426 + }, + { + "epoch": 0.36149789029535867, + "grad_norm": 0.7304818034172058, + "learning_rate": 0.0010798811014406716, + "loss": 1.5338, + "step": 3427 + }, + { + "epoch": 0.36160337552742616, + "grad_norm": 0.6888169050216675, + "learning_rate": 0.0010796556124898127, + "loss": 1.5135, + "step": 3428 + }, + { + "epoch": 0.36170886075949366, + "grad_norm": 0.68011873960495, + "learning_rate": 0.0010794300865993988, + "loss": 1.4897, + "step": 3429 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.698401689529419, + "learning_rate": 0.0010792045237947008, + "loss": 1.4879, + "step": 3430 + }, + { + "epoch": 0.3619198312236287, + "grad_norm": 0.6592811942100525, + "learning_rate": 0.0010789789241009945, + "loss": 1.527, + "step": 3431 + }, + { + "epoch": 0.3620253164556962, + "grad_norm": 0.7214067578315735, + "learning_rate": 0.0010787532875435593, + "loss": 1.5093, + "step": 3432 + }, + { + "epoch": 0.36213080168776374, + "grad_norm": 0.6976393461227417, + "learning_rate": 0.0010785276141476786, + "loss": 1.5245, + "step": 3433 + }, + { + "epoch": 0.36223628691983123, + "grad_norm": 0.5946715474128723, + "learning_rate": 0.001078301903938641, + "loss": 1.4898, + "step": 3434 + }, + { + "epoch": 0.3623417721518987, + "grad_norm": 0.6151496171951294, + "learning_rate": 0.0010780761569417377, + "loss": 1.5302, + "step": 3435 + }, + { + "epoch": 0.36244725738396627, + "grad_norm": 0.6273733377456665, + "learning_rate": 0.0010778503731822652, + "loss": 1.4942, + "step": 3436 + }, + { + "epoch": 0.36255274261603376, + "grad_norm": 0.6806928515434265, + "learning_rate": 0.0010776245526855235, + "loss": 1.5671, + "step": 3437 + }, + { + "epoch": 0.36265822784810126, + "grad_norm": 0.6126009821891785, + "learning_rate": 0.0010773986954768172, + "loss": 1.539, + "step": 3438 + }, + { + "epoch": 0.3627637130801688, + "grad_norm": 0.6925680637359619, + "learning_rate": 0.0010771728015814544, + "loss": 1.5032, + "step": 3439 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.6448641419410706, + "learning_rate": 0.0010769468710247478, + "loss": 1.5058, + "step": 3440 + }, + { + "epoch": 0.3629746835443038, + "grad_norm": 0.6526455283164978, + "learning_rate": 0.0010767209038320138, + "loss": 1.4915, + "step": 3441 + }, + { + "epoch": 0.3630801687763713, + "grad_norm": 0.7221381664276123, + "learning_rate": 0.0010764949000285735, + "loss": 1.5361, + "step": 3442 + }, + { + "epoch": 0.36318565400843883, + "grad_norm": 0.7131943702697754, + "learning_rate": 0.0010762688596397515, + "loss": 1.5346, + "step": 3443 + }, + { + "epoch": 0.3632911392405063, + "grad_norm": 0.6611559987068176, + "learning_rate": 0.001076042782690877, + "loss": 1.4804, + "step": 3444 + }, + { + "epoch": 0.3633966244725738, + "grad_norm": 0.7685478925704956, + "learning_rate": 0.001075816669207283, + "loss": 1.5446, + "step": 3445 + }, + { + "epoch": 0.36350210970464136, + "grad_norm": 0.6364534497261047, + "learning_rate": 0.0010755905192143063, + "loss": 1.5497, + "step": 3446 + }, + { + "epoch": 0.36360759493670886, + "grad_norm": 0.7346304059028625, + "learning_rate": 0.0010753643327372886, + "loss": 1.4969, + "step": 3447 + }, + { + "epoch": 0.36371308016877635, + "grad_norm": 0.6407297849655151, + "learning_rate": 0.0010751381098015747, + "loss": 1.523, + "step": 3448 + }, + { + "epoch": 0.3638185654008439, + "grad_norm": 0.7490684986114502, + "learning_rate": 0.0010749118504325146, + "loss": 1.5085, + "step": 3449 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.7945681810379028, + "learning_rate": 0.0010746855546554612, + "loss": 1.4878, + "step": 3450 + }, + { + "epoch": 0.3640295358649789, + "grad_norm": 0.6371755003929138, + "learning_rate": 0.0010744592224957727, + "loss": 1.5114, + "step": 3451 + }, + { + "epoch": 0.36413502109704643, + "grad_norm": 0.6942017674446106, + "learning_rate": 0.00107423285397881, + "loss": 1.4694, + "step": 3452 + }, + { + "epoch": 0.3642405063291139, + "grad_norm": 0.6967131495475769, + "learning_rate": 0.0010740064491299398, + "loss": 1.5137, + "step": 3453 + }, + { + "epoch": 0.3643459915611814, + "grad_norm": 0.6639828681945801, + "learning_rate": 0.0010737800079745308, + "loss": 1.4948, + "step": 3454 + }, + { + "epoch": 0.36445147679324896, + "grad_norm": 0.7881840467453003, + "learning_rate": 0.0010735535305379576, + "loss": 1.5553, + "step": 3455 + }, + { + "epoch": 0.36455696202531646, + "grad_norm": 0.6007492542266846, + "learning_rate": 0.001073327016845598, + "loss": 1.4998, + "step": 3456 + }, + { + "epoch": 0.36466244725738395, + "grad_norm": 0.8629200458526611, + "learning_rate": 0.001073100466922834, + "loss": 1.5172, + "step": 3457 + }, + { + "epoch": 0.3647679324894515, + "grad_norm": 0.935185968875885, + "learning_rate": 0.0010728738807950515, + "loss": 1.524, + "step": 3458 + }, + { + "epoch": 0.364873417721519, + "grad_norm": 0.6102510690689087, + "learning_rate": 0.0010726472584876403, + "loss": 1.4792, + "step": 3459 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.8567251563072205, + "learning_rate": 0.0010724206000259954, + "loss": 1.5208, + "step": 3460 + }, + { + "epoch": 0.36508438818565403, + "grad_norm": 0.680968165397644, + "learning_rate": 0.0010721939054355145, + "loss": 1.5348, + "step": 3461 + }, + { + "epoch": 0.3651898734177215, + "grad_norm": 0.6846998333930969, + "learning_rate": 0.0010719671747415995, + "loss": 1.5169, + "step": 3462 + }, + { + "epoch": 0.365295358649789, + "grad_norm": 0.7790937423706055, + "learning_rate": 0.0010717404079696575, + "loss": 1.5339, + "step": 3463 + }, + { + "epoch": 0.36540084388185656, + "grad_norm": 0.6904132962226868, + "learning_rate": 0.0010715136051450982, + "loss": 1.5381, + "step": 3464 + }, + { + "epoch": 0.36550632911392406, + "grad_norm": 0.6347854137420654, + "learning_rate": 0.0010712867662933364, + "loss": 1.5103, + "step": 3465 + }, + { + "epoch": 0.36561181434599155, + "grad_norm": 0.6910585761070251, + "learning_rate": 0.0010710598914397901, + "loss": 1.4941, + "step": 3466 + }, + { + "epoch": 0.3657172995780591, + "grad_norm": 0.7208836674690247, + "learning_rate": 0.0010708329806098822, + "loss": 1.5467, + "step": 3467 + }, + { + "epoch": 0.3658227848101266, + "grad_norm": 0.6680470108985901, + "learning_rate": 0.001070606033829039, + "loss": 1.4773, + "step": 3468 + }, + { + "epoch": 0.3659282700421941, + "grad_norm": 0.6848205924034119, + "learning_rate": 0.001070379051122691, + "loss": 1.5619, + "step": 3469 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.703566312789917, + "learning_rate": 0.0010701520325162727, + "loss": 1.5335, + "step": 3470 + }, + { + "epoch": 0.3661392405063291, + "grad_norm": 0.6441104412078857, + "learning_rate": 0.001069924978035223, + "loss": 1.5021, + "step": 3471 + }, + { + "epoch": 0.3662447257383966, + "grad_norm": 0.6863146424293518, + "learning_rate": 0.0010696978877049838, + "loss": 1.558, + "step": 3472 + }, + { + "epoch": 0.3663502109704641, + "grad_norm": 0.6641765236854553, + "learning_rate": 0.0010694707615510023, + "loss": 1.5712, + "step": 3473 + }, + { + "epoch": 0.36645569620253166, + "grad_norm": 0.7806729078292847, + "learning_rate": 0.0010692435995987293, + "loss": 1.5445, + "step": 3474 + }, + { + "epoch": 0.36656118143459915, + "grad_norm": 0.7385364770889282, + "learning_rate": 0.0010690164018736187, + "loss": 1.5653, + "step": 3475 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 0.6473694443702698, + "learning_rate": 0.0010687891684011295, + "loss": 1.5245, + "step": 3476 + }, + { + "epoch": 0.3667721518987342, + "grad_norm": 0.6830251216888428, + "learning_rate": 0.0010685618992067243, + "loss": 1.5263, + "step": 3477 + }, + { + "epoch": 0.3668776371308017, + "grad_norm": 0.7753580808639526, + "learning_rate": 0.00106833459431587, + "loss": 1.4755, + "step": 3478 + }, + { + "epoch": 0.3669831223628692, + "grad_norm": 0.7015286684036255, + "learning_rate": 0.001068107253754037, + "loss": 1.5249, + "step": 3479 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.6497856974601746, + "learning_rate": 0.0010678798775467001, + "loss": 1.4961, + "step": 3480 + }, + { + "epoch": 0.3671940928270042, + "grad_norm": 0.6680678725242615, + "learning_rate": 0.0010676524657193378, + "loss": 1.5241, + "step": 3481 + }, + { + "epoch": 0.3672995780590717, + "grad_norm": 0.7331162691116333, + "learning_rate": 0.0010674250182974325, + "loss": 1.536, + "step": 3482 + }, + { + "epoch": 0.36740506329113926, + "grad_norm": 0.6271988153457642, + "learning_rate": 0.0010671975353064712, + "loss": 1.4937, + "step": 3483 + }, + { + "epoch": 0.36751054852320675, + "grad_norm": 0.8079098463058472, + "learning_rate": 0.0010669700167719443, + "loss": 1.5521, + "step": 3484 + }, + { + "epoch": 0.36761603375527424, + "grad_norm": 0.6601078510284424, + "learning_rate": 0.0010667424627193469, + "loss": 1.5258, + "step": 3485 + }, + { + "epoch": 0.3677215189873418, + "grad_norm": 0.7156246304512024, + "learning_rate": 0.0010665148731741768, + "loss": 1.5151, + "step": 3486 + }, + { + "epoch": 0.3678270042194093, + "grad_norm": 0.6044527888298035, + "learning_rate": 0.0010662872481619367, + "loss": 1.501, + "step": 3487 + }, + { + "epoch": 0.3679324894514768, + "grad_norm": 0.7932851910591125, + "learning_rate": 0.0010660595877081335, + "loss": 1.5202, + "step": 3488 + }, + { + "epoch": 0.3680379746835443, + "grad_norm": 0.846194863319397, + "learning_rate": 0.0010658318918382774, + "loss": 1.5086, + "step": 3489 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.6486513614654541, + "learning_rate": 0.0010656041605778832, + "loss": 1.5314, + "step": 3490 + }, + { + "epoch": 0.3682489451476793, + "grad_norm": 0.7040706872940063, + "learning_rate": 0.0010653763939524688, + "loss": 1.518, + "step": 3491 + }, + { + "epoch": 0.36835443037974686, + "grad_norm": 0.6043910980224609, + "learning_rate": 0.0010651485919875568, + "loss": 1.5037, + "step": 3492 + }, + { + "epoch": 0.36845991561181435, + "grad_norm": 0.686448872089386, + "learning_rate": 0.0010649207547086738, + "loss": 1.498, + "step": 3493 + }, + { + "epoch": 0.36856540084388184, + "grad_norm": 0.6302900314331055, + "learning_rate": 0.0010646928821413499, + "loss": 1.54, + "step": 3494 + }, + { + "epoch": 0.3686708860759494, + "grad_norm": 0.6441648602485657, + "learning_rate": 0.0010644649743111192, + "loss": 1.5148, + "step": 3495 + }, + { + "epoch": 0.3687763713080169, + "grad_norm": 0.6459918022155762, + "learning_rate": 0.0010642370312435201, + "loss": 1.4945, + "step": 3496 + }, + { + "epoch": 0.3688818565400844, + "grad_norm": 0.6671119928359985, + "learning_rate": 0.0010640090529640948, + "loss": 1.5276, + "step": 3497 + }, + { + "epoch": 0.3689873417721519, + "grad_norm": 0.7589203715324402, + "learning_rate": 0.0010637810394983893, + "loss": 1.4789, + "step": 3498 + }, + { + "epoch": 0.3690928270042194, + "grad_norm": 0.655016303062439, + "learning_rate": 0.0010635529908719537, + "loss": 1.5023, + "step": 3499 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.7914090752601624, + "learning_rate": 0.001063324907110342, + "loss": 1.4646, + "step": 3500 + }, + { + "epoch": 0.36930379746835446, + "grad_norm": 0.6327452659606934, + "learning_rate": 0.001063096788239112, + "loss": 1.4919, + "step": 3501 + }, + { + "epoch": 0.36940928270042195, + "grad_norm": 1.0041711330413818, + "learning_rate": 0.0010628686342838253, + "loss": 1.5276, + "step": 3502 + }, + { + "epoch": 0.36951476793248944, + "grad_norm": 0.7068522572517395, + "learning_rate": 0.0010626404452700486, + "loss": 1.5534, + "step": 3503 + }, + { + "epoch": 0.369620253164557, + "grad_norm": 1.042805790901184, + "learning_rate": 0.0010624122212233506, + "loss": 1.5541, + "step": 3504 + }, + { + "epoch": 0.3697257383966245, + "grad_norm": 0.8168570399284363, + "learning_rate": 0.0010621839621693056, + "loss": 1.4946, + "step": 3505 + }, + { + "epoch": 0.369831223628692, + "grad_norm": 0.8859397172927856, + "learning_rate": 0.0010619556681334909, + "loss": 1.5286, + "step": 3506 + }, + { + "epoch": 0.36993670886075947, + "grad_norm": 1.1297074556350708, + "learning_rate": 0.001061727339141488, + "loss": 1.5443, + "step": 3507 + }, + { + "epoch": 0.370042194092827, + "grad_norm": 0.6303486824035645, + "learning_rate": 0.0010614989752188823, + "loss": 1.5003, + "step": 3508 + }, + { + "epoch": 0.3701476793248945, + "grad_norm": 0.8318356871604919, + "learning_rate": 0.0010612705763912635, + "loss": 1.521, + "step": 3509 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6779481768608093, + "learning_rate": 0.0010610421426842241, + "loss": 1.5168, + "step": 3510 + }, + { + "epoch": 0.37035864978902955, + "grad_norm": 0.9910770654678345, + "learning_rate": 0.0010608136741233618, + "loss": 1.5211, + "step": 3511 + }, + { + "epoch": 0.37046413502109704, + "grad_norm": 0.7412863373756409, + "learning_rate": 0.0010605851707342774, + "loss": 1.504, + "step": 3512 + }, + { + "epoch": 0.37056962025316453, + "grad_norm": 0.8701664209365845, + "learning_rate": 0.0010603566325425758, + "loss": 1.5017, + "step": 3513 + }, + { + "epoch": 0.3706751054852321, + "grad_norm": 0.9217091798782349, + "learning_rate": 0.001060128059573866, + "loss": 1.5172, + "step": 3514 + }, + { + "epoch": 0.3707805907172996, + "grad_norm": 0.6301645040512085, + "learning_rate": 0.0010598994518537608, + "loss": 1.5127, + "step": 3515 + }, + { + "epoch": 0.37088607594936707, + "grad_norm": 0.8264808058738708, + "learning_rate": 0.0010596708094078766, + "loss": 1.54, + "step": 3516 + }, + { + "epoch": 0.3709915611814346, + "grad_norm": 0.6397705674171448, + "learning_rate": 0.0010594421322618341, + "loss": 1.5271, + "step": 3517 + }, + { + "epoch": 0.3710970464135021, + "grad_norm": 0.8276912569999695, + "learning_rate": 0.0010592134204412578, + "loss": 1.5107, + "step": 3518 + }, + { + "epoch": 0.3712025316455696, + "grad_norm": 0.7364963293075562, + "learning_rate": 0.0010589846739717755, + "loss": 1.5304, + "step": 3519 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.7053285837173462, + "learning_rate": 0.00105875589287902, + "loss": 1.5357, + "step": 3520 + }, + { + "epoch": 0.37141350210970464, + "grad_norm": 0.7397165298461914, + "learning_rate": 0.001058527077188627, + "loss": 1.5326, + "step": 3521 + }, + { + "epoch": 0.37151898734177213, + "grad_norm": 0.7687540650367737, + "learning_rate": 0.001058298226926237, + "loss": 1.5148, + "step": 3522 + }, + { + "epoch": 0.3716244725738397, + "grad_norm": 0.8135148286819458, + "learning_rate": 0.0010580693421174928, + "loss": 1.514, + "step": 3523 + }, + { + "epoch": 0.3717299578059072, + "grad_norm": 0.7192037105560303, + "learning_rate": 0.0010578404227880429, + "loss": 1.4833, + "step": 3524 + }, + { + "epoch": 0.37183544303797467, + "grad_norm": 0.8281124830245972, + "learning_rate": 0.0010576114689635383, + "loss": 1.5573, + "step": 3525 + }, + { + "epoch": 0.3719409282700422, + "grad_norm": 0.6880154609680176, + "learning_rate": 0.0010573824806696351, + "loss": 1.5397, + "step": 3526 + }, + { + "epoch": 0.3720464135021097, + "grad_norm": 0.8050298690795898, + "learning_rate": 0.001057153457931992, + "loss": 1.5181, + "step": 3527 + }, + { + "epoch": 0.3721518987341772, + "grad_norm": 0.7602077126502991, + "learning_rate": 0.0010569244007762723, + "loss": 1.5629, + "step": 3528 + }, + { + "epoch": 0.37225738396624475, + "grad_norm": 0.6537395119667053, + "learning_rate": 0.0010566953092281432, + "loss": 1.4935, + "step": 3529 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.6897159814834595, + "learning_rate": 0.0010564661833132752, + "loss": 1.576, + "step": 3530 + }, + { + "epoch": 0.37246835443037973, + "grad_norm": 0.6756069660186768, + "learning_rate": 0.0010562370230573432, + "loss": 1.5315, + "step": 3531 + }, + { + "epoch": 0.3725738396624473, + "grad_norm": 0.7129528522491455, + "learning_rate": 0.0010560078284860257, + "loss": 1.5639, + "step": 3532 + }, + { + "epoch": 0.3726793248945148, + "grad_norm": 0.6971719264984131, + "learning_rate": 0.0010557785996250053, + "loss": 1.5008, + "step": 3533 + }, + { + "epoch": 0.37278481012658227, + "grad_norm": 0.7871233820915222, + "learning_rate": 0.0010555493364999679, + "loss": 1.531, + "step": 3534 + }, + { + "epoch": 0.3728902953586498, + "grad_norm": 0.8303617238998413, + "learning_rate": 0.001055320039136604, + "loss": 1.543, + "step": 3535 + }, + { + "epoch": 0.3729957805907173, + "grad_norm": 0.6426006555557251, + "learning_rate": 0.001055090707560607, + "loss": 1.4965, + "step": 3536 + }, + { + "epoch": 0.3731012658227848, + "grad_norm": 0.8702876567840576, + "learning_rate": 0.0010548613417976748, + "loss": 1.4842, + "step": 3537 + }, + { + "epoch": 0.37320675105485235, + "grad_norm": 0.6897974014282227, + "learning_rate": 0.0010546319418735094, + "loss": 1.5339, + "step": 3538 + }, + { + "epoch": 0.37331223628691984, + "grad_norm": 0.770946204662323, + "learning_rate": 0.0010544025078138156, + "loss": 1.5121, + "step": 3539 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.9016287326812744, + "learning_rate": 0.001054173039644303, + "loss": 1.5043, + "step": 3540 + }, + { + "epoch": 0.3735232067510548, + "grad_norm": 0.77115797996521, + "learning_rate": 0.0010539435373906846, + "loss": 1.5174, + "step": 3541 + }, + { + "epoch": 0.3736286919831224, + "grad_norm": 0.6827710866928101, + "learning_rate": 0.0010537140010786774, + "loss": 1.5528, + "step": 3542 + }, + { + "epoch": 0.37373417721518987, + "grad_norm": 0.6357390284538269, + "learning_rate": 0.0010534844307340016, + "loss": 1.498, + "step": 3543 + }, + { + "epoch": 0.37383966244725736, + "grad_norm": 0.7743476629257202, + "learning_rate": 0.0010532548263823822, + "loss": 1.4922, + "step": 3544 + }, + { + "epoch": 0.3739451476793249, + "grad_norm": 0.6841594576835632, + "learning_rate": 0.0010530251880495473, + "loss": 1.543, + "step": 3545 + }, + { + "epoch": 0.3740506329113924, + "grad_norm": 0.7305267453193665, + "learning_rate": 0.0010527955157612291, + "loss": 1.5017, + "step": 3546 + }, + { + "epoch": 0.3741561181434599, + "grad_norm": 0.7084537148475647, + "learning_rate": 0.0010525658095431635, + "loss": 1.4748, + "step": 3547 + }, + { + "epoch": 0.37426160337552744, + "grad_norm": 0.6853293776512146, + "learning_rate": 0.00105233606942109, + "loss": 1.5001, + "step": 3548 + }, + { + "epoch": 0.37436708860759493, + "grad_norm": 0.7173791527748108, + "learning_rate": 0.0010521062954207527, + "loss": 1.5031, + "step": 3549 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.6474900245666504, + "learning_rate": 0.0010518764875678981, + "loss": 1.5198, + "step": 3550 + }, + { + "epoch": 0.37457805907173, + "grad_norm": 0.7522045373916626, + "learning_rate": 0.001051646645888278, + "loss": 1.5016, + "step": 3551 + }, + { + "epoch": 0.37468354430379747, + "grad_norm": 0.6646532416343689, + "learning_rate": 0.0010514167704076473, + "loss": 1.5006, + "step": 3552 + }, + { + "epoch": 0.37478902953586496, + "grad_norm": 0.811113715171814, + "learning_rate": 0.0010511868611517644, + "loss": 1.5238, + "step": 3553 + }, + { + "epoch": 0.3748945147679325, + "grad_norm": 0.8456649780273438, + "learning_rate": 0.0010509569181463916, + "loss": 1.5283, + "step": 3554 + }, + { + "epoch": 0.375, + "grad_norm": 0.6766811013221741, + "learning_rate": 0.0010507269414172956, + "loss": 1.5147, + "step": 3555 + }, + { + "epoch": 0.3751054852320675, + "grad_norm": 0.9405896067619324, + "learning_rate": 0.0010504969309902462, + "loss": 1.5171, + "step": 3556 + }, + { + "epoch": 0.37521097046413504, + "grad_norm": 0.7086286544799805, + "learning_rate": 0.0010502668868910174, + "loss": 1.518, + "step": 3557 + }, + { + "epoch": 0.37531645569620253, + "grad_norm": 0.6818018555641174, + "learning_rate": 0.0010500368091453864, + "loss": 1.4881, + "step": 3558 + }, + { + "epoch": 0.37542194092827, + "grad_norm": 0.6478855013847351, + "learning_rate": 0.001049806697779135, + "loss": 1.511, + "step": 3559 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.6837027072906494, + "learning_rate": 0.001049576552818048, + "loss": 1.5003, + "step": 3560 + }, + { + "epoch": 0.37563291139240507, + "grad_norm": 0.6534584164619446, + "learning_rate": 0.0010493463742879147, + "loss": 1.5079, + "step": 3561 + }, + { + "epoch": 0.37573839662447256, + "grad_norm": 0.6866925358772278, + "learning_rate": 0.0010491161622145275, + "loss": 1.488, + "step": 3562 + }, + { + "epoch": 0.3758438818565401, + "grad_norm": 0.6568836569786072, + "learning_rate": 0.0010488859166236824, + "loss": 1.4981, + "step": 3563 + }, + { + "epoch": 0.3759493670886076, + "grad_norm": 0.7670334577560425, + "learning_rate": 0.0010486556375411803, + "loss": 1.5102, + "step": 3564 + }, + { + "epoch": 0.3760548523206751, + "grad_norm": 0.6979433298110962, + "learning_rate": 0.0010484253249928247, + "loss": 1.5061, + "step": 3565 + }, + { + "epoch": 0.37616033755274264, + "grad_norm": 0.7790496349334717, + "learning_rate": 0.0010481949790044234, + "loss": 1.4757, + "step": 3566 + }, + { + "epoch": 0.37626582278481013, + "grad_norm": 0.8464193344116211, + "learning_rate": 0.0010479645996017875, + "loss": 1.5014, + "step": 3567 + }, + { + "epoch": 0.3763713080168776, + "grad_norm": 0.7239170670509338, + "learning_rate": 0.0010477341868107327, + "loss": 1.5327, + "step": 3568 + }, + { + "epoch": 0.3764767932489452, + "grad_norm": 1.181746006011963, + "learning_rate": 0.0010475037406570775, + "loss": 1.5, + "step": 3569 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.700780987739563, + "learning_rate": 0.0010472732611666448, + "loss": 1.5257, + "step": 3570 + }, + { + "epoch": 0.37668776371308016, + "grad_norm": 0.9270737171173096, + "learning_rate": 0.0010470427483652608, + "loss": 1.4808, + "step": 3571 + }, + { + "epoch": 0.37679324894514765, + "grad_norm": 0.929875910282135, + "learning_rate": 0.0010468122022787554, + "loss": 1.5066, + "step": 3572 + }, + { + "epoch": 0.3768987341772152, + "grad_norm": 0.7058815956115723, + "learning_rate": 0.001046581622932963, + "loss": 1.5142, + "step": 3573 + }, + { + "epoch": 0.3770042194092827, + "grad_norm": 0.9010534286499023, + "learning_rate": 0.001046351010353721, + "loss": 1.5323, + "step": 3574 + }, + { + "epoch": 0.3771097046413502, + "grad_norm": 0.7475859522819519, + "learning_rate": 0.0010461203645668702, + "loss": 1.5146, + "step": 3575 + }, + { + "epoch": 0.37721518987341773, + "grad_norm": 1.2727599143981934, + "learning_rate": 0.001045889685598256, + "loss": 1.5235, + "step": 3576 + }, + { + "epoch": 0.3773206751054852, + "grad_norm": 0.643476128578186, + "learning_rate": 0.0010456589734737273, + "loss": 1.5009, + "step": 3577 + }, + { + "epoch": 0.3774261603375527, + "grad_norm": 0.9109044671058655, + "learning_rate": 0.0010454282282191362, + "loss": 1.5008, + "step": 3578 + }, + { + "epoch": 0.37753164556962027, + "grad_norm": 0.7443335652351379, + "learning_rate": 0.001045197449860339, + "loss": 1.5002, + "step": 3579 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 1.247331976890564, + "learning_rate": 0.0010449666384231954, + "loss": 1.5355, + "step": 3580 + }, + { + "epoch": 0.37774261603375525, + "grad_norm": 0.7827394604682922, + "learning_rate": 0.0010447357939335693, + "loss": 1.5175, + "step": 3581 + }, + { + "epoch": 0.3778481012658228, + "grad_norm": 1.1788352727890015, + "learning_rate": 0.001044504916417328, + "loss": 1.5079, + "step": 3582 + }, + { + "epoch": 0.3779535864978903, + "grad_norm": 0.7270678281784058, + "learning_rate": 0.001044274005900342, + "loss": 1.5149, + "step": 3583 + }, + { + "epoch": 0.3780590717299578, + "grad_norm": 0.8659098148345947, + "learning_rate": 0.0010440430624084863, + "loss": 1.4875, + "step": 3584 + }, + { + "epoch": 0.37816455696202533, + "grad_norm": 0.7217019200325012, + "learning_rate": 0.0010438120859676393, + "loss": 1.5487, + "step": 3585 + }, + { + "epoch": 0.3782700421940928, + "grad_norm": 0.7035667300224304, + "learning_rate": 0.0010435810766036828, + "loss": 1.5306, + "step": 3586 + }, + { + "epoch": 0.3783755274261603, + "grad_norm": 0.7147712111473083, + "learning_rate": 0.001043350034342503, + "loss": 1.4965, + "step": 3587 + }, + { + "epoch": 0.37848101265822787, + "grad_norm": 0.6547770500183105, + "learning_rate": 0.001043118959209989, + "loss": 1.5082, + "step": 3588 + }, + { + "epoch": 0.37858649789029536, + "grad_norm": 0.6943700313568115, + "learning_rate": 0.001042887851232034, + "loss": 1.5471, + "step": 3589 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.6537338495254517, + "learning_rate": 0.0010426567104345346, + "loss": 1.5182, + "step": 3590 + }, + { + "epoch": 0.3787974683544304, + "grad_norm": 0.6832038164138794, + "learning_rate": 0.0010424255368433916, + "loss": 1.4819, + "step": 3591 + }, + { + "epoch": 0.3789029535864979, + "grad_norm": 0.6478418707847595, + "learning_rate": 0.0010421943304845093, + "loss": 1.4985, + "step": 3592 + }, + { + "epoch": 0.3790084388185654, + "grad_norm": 0.7138353586196899, + "learning_rate": 0.0010419630913837948, + "loss": 1.5414, + "step": 3593 + }, + { + "epoch": 0.37911392405063293, + "grad_norm": 0.6526336669921875, + "learning_rate": 0.0010417318195671604, + "loss": 1.5111, + "step": 3594 + }, + { + "epoch": 0.3792194092827004, + "grad_norm": 0.6416317224502563, + "learning_rate": 0.0010415005150605208, + "loss": 1.5202, + "step": 3595 + }, + { + "epoch": 0.3793248945147679, + "grad_norm": 0.6428040266036987, + "learning_rate": 0.001041269177889795, + "loss": 1.4554, + "step": 3596 + }, + { + "epoch": 0.37943037974683547, + "grad_norm": 0.6475996971130371, + "learning_rate": 0.0010410378080809052, + "loss": 1.4885, + "step": 3597 + }, + { + "epoch": 0.37953586497890296, + "grad_norm": 0.6383029818534851, + "learning_rate": 0.001040806405659778, + "loss": 1.5076, + "step": 3598 + }, + { + "epoch": 0.37964135021097045, + "grad_norm": 0.8838738203048706, + "learning_rate": 0.0010405749706523428, + "loss": 1.486, + "step": 3599 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.7364051342010498, + "learning_rate": 0.0010403435030845332, + "loss": 1.4865, + "step": 3600 + }, + { + "epoch": 0.3798523206751055, + "grad_norm": 0.7562887668609619, + "learning_rate": 0.0010401120029822864, + "loss": 1.5169, + "step": 3601 + }, + { + "epoch": 0.379957805907173, + "grad_norm": 0.8790984153747559, + "learning_rate": 0.001039880470371543, + "loss": 1.4906, + "step": 3602 + }, + { + "epoch": 0.38006329113924053, + "grad_norm": 0.7391127943992615, + "learning_rate": 0.0010396489052782473, + "loss": 1.5077, + "step": 3603 + }, + { + "epoch": 0.380168776371308, + "grad_norm": 1.1051479578018188, + "learning_rate": 0.0010394173077283477, + "loss": 1.4832, + "step": 3604 + }, + { + "epoch": 0.3802742616033755, + "grad_norm": 0.6797409653663635, + "learning_rate": 0.0010391856777477954, + "loss": 1.5281, + "step": 3605 + }, + { + "epoch": 0.380379746835443, + "grad_norm": 0.8913492560386658, + "learning_rate": 0.001038954015362546, + "loss": 1.4993, + "step": 3606 + }, + { + "epoch": 0.38048523206751056, + "grad_norm": 0.6634365916252136, + "learning_rate": 0.001038722320598558, + "loss": 1.4877, + "step": 3607 + }, + { + "epoch": 0.38059071729957805, + "grad_norm": 0.696757435798645, + "learning_rate": 0.001038490593481795, + "loss": 1.5325, + "step": 3608 + }, + { + "epoch": 0.38069620253164554, + "grad_norm": 0.6760287284851074, + "learning_rate": 0.0010382588340382218, + "loss": 1.5268, + "step": 3609 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.6677202582359314, + "learning_rate": 0.0010380270422938093, + "loss": 1.4703, + "step": 3610 + }, + { + "epoch": 0.3809071729957806, + "grad_norm": 0.7142106890678406, + "learning_rate": 0.00103779521827453, + "loss": 1.5256, + "step": 3611 + }, + { + "epoch": 0.3810126582278481, + "grad_norm": 0.8478877544403076, + "learning_rate": 0.0010375633620063618, + "loss": 1.4996, + "step": 3612 + }, + { + "epoch": 0.3811181434599156, + "grad_norm": 0.7180824279785156, + "learning_rate": 0.0010373314735152848, + "loss": 1.4985, + "step": 3613 + }, + { + "epoch": 0.3812236286919831, + "grad_norm": 0.7010191679000854, + "learning_rate": 0.0010370995528272836, + "loss": 1.49, + "step": 3614 + }, + { + "epoch": 0.3813291139240506, + "grad_norm": 0.7682008743286133, + "learning_rate": 0.0010368675999683455, + "loss": 1.4698, + "step": 3615 + }, + { + "epoch": 0.38143459915611816, + "grad_norm": 0.6131367683410645, + "learning_rate": 0.0010366356149644628, + "loss": 1.5142, + "step": 3616 + }, + { + "epoch": 0.38154008438818565, + "grad_norm": 0.7608954906463623, + "learning_rate": 0.0010364035978416297, + "loss": 1.5606, + "step": 3617 + }, + { + "epoch": 0.38164556962025314, + "grad_norm": 0.6827284693717957, + "learning_rate": 0.001036171548625846, + "loss": 1.541, + "step": 3618 + }, + { + "epoch": 0.3817510548523207, + "grad_norm": 0.7526616454124451, + "learning_rate": 0.0010359394673431126, + "loss": 1.5027, + "step": 3619 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.6659370064735413, + "learning_rate": 0.0010357073540194362, + "loss": 1.4696, + "step": 3620 + }, + { + "epoch": 0.3819620253164557, + "grad_norm": 0.7687931060791016, + "learning_rate": 0.0010354752086808264, + "loss": 1.5158, + "step": 3621 + }, + { + "epoch": 0.3820675105485232, + "grad_norm": 0.7575954794883728, + "learning_rate": 0.001035243031353296, + "loss": 1.4731, + "step": 3622 + }, + { + "epoch": 0.3821729957805907, + "grad_norm": 0.7337265610694885, + "learning_rate": 0.0010350108220628614, + "loss": 1.5216, + "step": 3623 + }, + { + "epoch": 0.3822784810126582, + "grad_norm": 0.6774712204933167, + "learning_rate": 0.001034778580835543, + "loss": 1.4956, + "step": 3624 + }, + { + "epoch": 0.38238396624472576, + "grad_norm": 0.7208748459815979, + "learning_rate": 0.0010345463076973645, + "loss": 1.5196, + "step": 3625 + }, + { + "epoch": 0.38248945147679325, + "grad_norm": 0.654003918170929, + "learning_rate": 0.0010343140026743535, + "loss": 1.5044, + "step": 3626 + }, + { + "epoch": 0.38259493670886074, + "grad_norm": 0.6878885626792908, + "learning_rate": 0.0010340816657925407, + "loss": 1.5158, + "step": 3627 + }, + { + "epoch": 0.3827004219409283, + "grad_norm": 0.7443561553955078, + "learning_rate": 0.0010338492970779606, + "loss": 1.5257, + "step": 3628 + }, + { + "epoch": 0.3828059071729958, + "grad_norm": 0.6742457747459412, + "learning_rate": 0.0010336168965566516, + "loss": 1.4815, + "step": 3629 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.7539153695106506, + "learning_rate": 0.001033384464254655, + "loss": 1.5127, + "step": 3630 + }, + { + "epoch": 0.3830168776371308, + "grad_norm": 0.8271434903144836, + "learning_rate": 0.001033152000198016, + "loss": 1.5119, + "step": 3631 + }, + { + "epoch": 0.3831223628691983, + "grad_norm": 0.6976423859596252, + "learning_rate": 0.0010329195044127834, + "loss": 1.4928, + "step": 3632 + }, + { + "epoch": 0.3832278481012658, + "grad_norm": 0.7267560958862305, + "learning_rate": 0.0010326869769250097, + "loss": 1.5272, + "step": 3633 + }, + { + "epoch": 0.38333333333333336, + "grad_norm": 0.657936692237854, + "learning_rate": 0.0010324544177607508, + "loss": 1.4654, + "step": 3634 + }, + { + "epoch": 0.38343881856540085, + "grad_norm": 0.7039468884468079, + "learning_rate": 0.0010322218269460657, + "loss": 1.5083, + "step": 3635 + }, + { + "epoch": 0.38354430379746834, + "grad_norm": 0.674793004989624, + "learning_rate": 0.001031989204507018, + "loss": 1.4922, + "step": 3636 + }, + { + "epoch": 0.3836497890295359, + "grad_norm": 0.7070874571800232, + "learning_rate": 0.0010317565504696733, + "loss": 1.5798, + "step": 3637 + }, + { + "epoch": 0.3837552742616034, + "grad_norm": 0.7116631269454956, + "learning_rate": 0.0010315238648601025, + "loss": 1.501, + "step": 3638 + }, + { + "epoch": 0.3838607594936709, + "grad_norm": 0.8598850965499878, + "learning_rate": 0.0010312911477043784, + "loss": 1.509, + "step": 3639 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.8656608462333679, + "learning_rate": 0.001031058399028579, + "loss": 1.4594, + "step": 3640 + }, + { + "epoch": 0.3840717299578059, + "grad_norm": 0.6849205493927002, + "learning_rate": 0.0010308256188587843, + "loss": 1.4979, + "step": 3641 + }, + { + "epoch": 0.3841772151898734, + "grad_norm": 1.2974886894226074, + "learning_rate": 0.0010305928072210787, + "loss": 1.5541, + "step": 3642 + }, + { + "epoch": 0.3842827004219409, + "grad_norm": 0.7836635112762451, + "learning_rate": 0.00103035996414155, + "loss": 1.5172, + "step": 3643 + }, + { + "epoch": 0.38438818565400845, + "grad_norm": 0.8292443752288818, + "learning_rate": 0.0010301270896462893, + "loss": 1.5019, + "step": 3644 + }, + { + "epoch": 0.38449367088607594, + "grad_norm": 0.6978610157966614, + "learning_rate": 0.0010298941837613913, + "loss": 1.5082, + "step": 3645 + }, + { + "epoch": 0.38459915611814344, + "grad_norm": 0.8825560212135315, + "learning_rate": 0.0010296612465129542, + "loss": 1.4791, + "step": 3646 + }, + { + "epoch": 0.384704641350211, + "grad_norm": 0.821929395198822, + "learning_rate": 0.0010294282779270802, + "loss": 1.5118, + "step": 3647 + }, + { + "epoch": 0.3848101265822785, + "grad_norm": 0.7204012870788574, + "learning_rate": 0.001029195278029874, + "loss": 1.5449, + "step": 3648 + }, + { + "epoch": 0.38491561181434597, + "grad_norm": 0.887276828289032, + "learning_rate": 0.0010289622468474448, + "loss": 1.5287, + "step": 3649 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.6479186415672302, + "learning_rate": 0.001028729184405905, + "loss": 1.5102, + "step": 3650 + }, + { + "epoch": 0.385126582278481, + "grad_norm": 0.7667229175567627, + "learning_rate": 0.00102849609073137, + "loss": 1.5295, + "step": 3651 + }, + { + "epoch": 0.3852320675105485, + "grad_norm": 0.7094600200653076, + "learning_rate": 0.0010282629658499593, + "loss": 1.5056, + "step": 3652 + }, + { + "epoch": 0.38533755274261605, + "grad_norm": 0.6787276864051819, + "learning_rate": 0.001028029809787796, + "loss": 1.4988, + "step": 3653 + }, + { + "epoch": 0.38544303797468354, + "grad_norm": 0.6490829586982727, + "learning_rate": 0.001027796622571006, + "loss": 1.5275, + "step": 3654 + }, + { + "epoch": 0.38554852320675104, + "grad_norm": 0.7865918278694153, + "learning_rate": 0.001027563404225719, + "loss": 1.5138, + "step": 3655 + }, + { + "epoch": 0.3856540084388186, + "grad_norm": 0.7611443400382996, + "learning_rate": 0.0010273301547780687, + "loss": 1.5019, + "step": 3656 + }, + { + "epoch": 0.3857594936708861, + "grad_norm": 0.6468568444252014, + "learning_rate": 0.0010270968742541917, + "loss": 1.4807, + "step": 3657 + }, + { + "epoch": 0.38586497890295357, + "grad_norm": 0.6714504361152649, + "learning_rate": 0.0010268635626802282, + "loss": 1.5114, + "step": 3658 + }, + { + "epoch": 0.3859704641350211, + "grad_norm": 0.6428451538085938, + "learning_rate": 0.001026630220082322, + "loss": 1.5263, + "step": 3659 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.6660507321357727, + "learning_rate": 0.0010263968464866201, + "loss": 1.4903, + "step": 3660 + }, + { + "epoch": 0.3861814345991561, + "grad_norm": 0.708412230014801, + "learning_rate": 0.0010261634419192732, + "loss": 1.5195, + "step": 3661 + }, + { + "epoch": 0.38628691983122365, + "grad_norm": 0.7256360054016113, + "learning_rate": 0.001025930006406436, + "loss": 1.4925, + "step": 3662 + }, + { + "epoch": 0.38639240506329114, + "grad_norm": 0.6356685757637024, + "learning_rate": 0.0010256965399742652, + "loss": 1.5011, + "step": 3663 + }, + { + "epoch": 0.38649789029535864, + "grad_norm": 0.9358772039413452, + "learning_rate": 0.0010254630426489225, + "loss": 1.4926, + "step": 3664 + }, + { + "epoch": 0.3866033755274262, + "grad_norm": 0.7997296452522278, + "learning_rate": 0.0010252295144565725, + "loss": 1.4906, + "step": 3665 + }, + { + "epoch": 0.3867088607594937, + "grad_norm": 0.7562416791915894, + "learning_rate": 0.0010249959554233827, + "loss": 1.493, + "step": 3666 + }, + { + "epoch": 0.38681434599156117, + "grad_norm": 0.8710748553276062, + "learning_rate": 0.001024762365575525, + "loss": 1.5232, + "step": 3667 + }, + { + "epoch": 0.3869198312236287, + "grad_norm": 0.724600613117218, + "learning_rate": 0.001024528744939174, + "loss": 1.5228, + "step": 3668 + }, + { + "epoch": 0.3870253164556962, + "grad_norm": 1.0101571083068848, + "learning_rate": 0.0010242950935405084, + "loss": 1.4889, + "step": 3669 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.6530289649963379, + "learning_rate": 0.0010240614114057098, + "loss": 1.4855, + "step": 3670 + }, + { + "epoch": 0.3872362869198312, + "grad_norm": 1.205763578414917, + "learning_rate": 0.0010238276985609631, + "loss": 1.5175, + "step": 3671 + }, + { + "epoch": 0.38734177215189874, + "grad_norm": 0.7263280153274536, + "learning_rate": 0.0010235939550324576, + "loss": 1.5049, + "step": 3672 + }, + { + "epoch": 0.38744725738396624, + "grad_norm": 1.049930214881897, + "learning_rate": 0.0010233601808463852, + "loss": 1.5016, + "step": 3673 + }, + { + "epoch": 0.38755274261603373, + "grad_norm": 0.6770297884941101, + "learning_rate": 0.0010231263760289416, + "loss": 1.4966, + "step": 3674 + }, + { + "epoch": 0.3876582278481013, + "grad_norm": 0.7704777717590332, + "learning_rate": 0.0010228925406063254, + "loss": 1.4813, + "step": 3675 + }, + { + "epoch": 0.38776371308016877, + "grad_norm": 0.6351442933082581, + "learning_rate": 0.0010226586746047393, + "loss": 1.4816, + "step": 3676 + }, + { + "epoch": 0.38786919831223626, + "grad_norm": 0.7810680866241455, + "learning_rate": 0.0010224247780503892, + "loss": 1.5151, + "step": 3677 + }, + { + "epoch": 0.3879746835443038, + "grad_norm": 0.7243603467941284, + "learning_rate": 0.0010221908509694842, + "loss": 1.5226, + "step": 3678 + }, + { + "epoch": 0.3880801687763713, + "grad_norm": 0.673824667930603, + "learning_rate": 0.0010219568933882372, + "loss": 1.5331, + "step": 3679 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 1.1384466886520386, + "learning_rate": 0.001021722905332864, + "loss": 1.4731, + "step": 3680 + }, + { + "epoch": 0.38829113924050634, + "grad_norm": 0.7290116548538208, + "learning_rate": 0.0010214888868295842, + "loss": 1.5122, + "step": 3681 + }, + { + "epoch": 0.38839662447257384, + "grad_norm": 0.8006223440170288, + "learning_rate": 0.0010212548379046214, + "loss": 1.5003, + "step": 3682 + }, + { + "epoch": 0.38850210970464133, + "grad_norm": 0.7445079684257507, + "learning_rate": 0.001021020758584201, + "loss": 1.5133, + "step": 3683 + }, + { + "epoch": 0.3886075949367089, + "grad_norm": 0.7240571975708008, + "learning_rate": 0.0010207866488945532, + "loss": 1.5386, + "step": 3684 + }, + { + "epoch": 0.38871308016877637, + "grad_norm": 0.6983770132064819, + "learning_rate": 0.0010205525088619112, + "loss": 1.5437, + "step": 3685 + }, + { + "epoch": 0.38881856540084386, + "grad_norm": 0.9157277941703796, + "learning_rate": 0.0010203183385125115, + "loss": 1.4982, + "step": 3686 + }, + { + "epoch": 0.3889240506329114, + "grad_norm": 0.9406967759132385, + "learning_rate": 0.001020084137872594, + "loss": 1.4978, + "step": 3687 + }, + { + "epoch": 0.3890295358649789, + "grad_norm": 0.6339371204376221, + "learning_rate": 0.0010198499069684023, + "loss": 1.4762, + "step": 3688 + }, + { + "epoch": 0.3891350210970464, + "grad_norm": 0.6965810656547546, + "learning_rate": 0.0010196156458261827, + "loss": 1.5335, + "step": 3689 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.6765130162239075, + "learning_rate": 0.0010193813544721855, + "loss": 1.5123, + "step": 3690 + }, + { + "epoch": 0.38934599156118144, + "grad_norm": 0.6725966930389404, + "learning_rate": 0.0010191470329326646, + "loss": 1.5696, + "step": 3691 + }, + { + "epoch": 0.38945147679324893, + "grad_norm": 0.7125000357627869, + "learning_rate": 0.0010189126812338765, + "loss": 1.4952, + "step": 3692 + }, + { + "epoch": 0.3895569620253165, + "grad_norm": 0.6146867275238037, + "learning_rate": 0.0010186782994020811, + "loss": 1.4607, + "step": 3693 + }, + { + "epoch": 0.38966244725738397, + "grad_norm": 0.6462666988372803, + "learning_rate": 0.0010184438874635427, + "loss": 1.4795, + "step": 3694 + }, + { + "epoch": 0.38976793248945146, + "grad_norm": 0.7254906296730042, + "learning_rate": 0.0010182094454445282, + "loss": 1.5202, + "step": 3695 + }, + { + "epoch": 0.389873417721519, + "grad_norm": 0.6589577794075012, + "learning_rate": 0.001017974973371308, + "loss": 1.5037, + "step": 3696 + }, + { + "epoch": 0.3899789029535865, + "grad_norm": 0.6721804141998291, + "learning_rate": 0.0010177404712701558, + "loss": 1.5204, + "step": 3697 + }, + { + "epoch": 0.390084388185654, + "grad_norm": 0.6521695852279663, + "learning_rate": 0.0010175059391673486, + "loss": 1.5412, + "step": 3698 + }, + { + "epoch": 0.39018987341772154, + "grad_norm": 0.6382278203964233, + "learning_rate": 0.0010172713770891673, + "loss": 1.4667, + "step": 3699 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6719930171966553, + "learning_rate": 0.001017036785061895, + "loss": 1.5759, + "step": 3700 + }, + { + "epoch": 0.39040084388185653, + "grad_norm": 0.6704282164573669, + "learning_rate": 0.0010168021631118199, + "loss": 1.514, + "step": 3701 + }, + { + "epoch": 0.3905063291139241, + "grad_norm": 0.6219956278800964, + "learning_rate": 0.0010165675112652314, + "loss": 1.504, + "step": 3702 + }, + { + "epoch": 0.39061181434599157, + "grad_norm": 0.5911837220191956, + "learning_rate": 0.0010163328295484245, + "loss": 1.471, + "step": 3703 + }, + { + "epoch": 0.39071729957805906, + "grad_norm": 0.7973676919937134, + "learning_rate": 0.001016098117987696, + "loss": 1.5178, + "step": 3704 + }, + { + "epoch": 0.39082278481012656, + "grad_norm": 0.8358592987060547, + "learning_rate": 0.0010158633766093462, + "loss": 1.4933, + "step": 3705 + }, + { + "epoch": 0.3909282700421941, + "grad_norm": 0.6415418386459351, + "learning_rate": 0.0010156286054396795, + "loss": 1.5183, + "step": 3706 + }, + { + "epoch": 0.3910337552742616, + "grad_norm": 0.7117193341255188, + "learning_rate": 0.001015393804505003, + "loss": 1.5545, + "step": 3707 + }, + { + "epoch": 0.3911392405063291, + "grad_norm": 0.6616970300674438, + "learning_rate": 0.0010151589738316275, + "loss": 1.4976, + "step": 3708 + }, + { + "epoch": 0.39124472573839664, + "grad_norm": 0.628964364528656, + "learning_rate": 0.0010149241134458666, + "loss": 1.5147, + "step": 3709 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.6471352577209473, + "learning_rate": 0.0010146892233740376, + "loss": 1.4519, + "step": 3710 + }, + { + "epoch": 0.3914556962025316, + "grad_norm": 0.7323688268661499, + "learning_rate": 0.0010144543036424616, + "loss": 1.4635, + "step": 3711 + }, + { + "epoch": 0.39156118143459917, + "grad_norm": 0.9233625531196594, + "learning_rate": 0.001014219354277462, + "loss": 1.456, + "step": 3712 + }, + { + "epoch": 0.39166666666666666, + "grad_norm": 0.6399043202400208, + "learning_rate": 0.0010139843753053663, + "loss": 1.4666, + "step": 3713 + }, + { + "epoch": 0.39177215189873416, + "grad_norm": 0.8939625024795532, + "learning_rate": 0.001013749366752505, + "loss": 1.5034, + "step": 3714 + }, + { + "epoch": 0.3918776371308017, + "grad_norm": 0.6657359004020691, + "learning_rate": 0.0010135143286452118, + "loss": 1.4844, + "step": 3715 + }, + { + "epoch": 0.3919831223628692, + "grad_norm": 0.9872020483016968, + "learning_rate": 0.0010132792610098244, + "loss": 1.4805, + "step": 3716 + }, + { + "epoch": 0.3920886075949367, + "grad_norm": 0.9369077682495117, + "learning_rate": 0.0010130441638726828, + "loss": 1.516, + "step": 3717 + }, + { + "epoch": 0.39219409282700424, + "grad_norm": 0.6953102946281433, + "learning_rate": 0.001012809037260131, + "loss": 1.4494, + "step": 3718 + }, + { + "epoch": 0.39229957805907173, + "grad_norm": 0.8499578833580017, + "learning_rate": 0.001012573881198516, + "loss": 1.5229, + "step": 3719 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.7366220951080322, + "learning_rate": 0.0010123386957141883, + "loss": 1.4863, + "step": 3720 + }, + { + "epoch": 0.39251054852320677, + "grad_norm": 0.7278056144714355, + "learning_rate": 0.0010121034808335018, + "loss": 1.4894, + "step": 3721 + }, + { + "epoch": 0.39261603375527426, + "grad_norm": 0.7842705249786377, + "learning_rate": 0.0010118682365828132, + "loss": 1.5134, + "step": 3722 + }, + { + "epoch": 0.39272151898734176, + "grad_norm": 0.7786293625831604, + "learning_rate": 0.0010116329629884827, + "loss": 1.4818, + "step": 3723 + }, + { + "epoch": 0.3928270042194093, + "grad_norm": 0.6407403945922852, + "learning_rate": 0.0010113976600768743, + "loss": 1.519, + "step": 3724 + }, + { + "epoch": 0.3929324894514768, + "grad_norm": 0.9187286496162415, + "learning_rate": 0.0010111623278743547, + "loss": 1.4819, + "step": 3725 + }, + { + "epoch": 0.3930379746835443, + "grad_norm": 0.6533879041671753, + "learning_rate": 0.001010926966407294, + "loss": 1.5153, + "step": 3726 + }, + { + "epoch": 0.39314345991561184, + "grad_norm": 0.8336098194122314, + "learning_rate": 0.0010106915757020654, + "loss": 1.4999, + "step": 3727 + }, + { + "epoch": 0.39324894514767933, + "grad_norm": 0.7066090106964111, + "learning_rate": 0.0010104561557850457, + "loss": 1.5183, + "step": 3728 + }, + { + "epoch": 0.3933544303797468, + "grad_norm": 0.7536984086036682, + "learning_rate": 0.0010102207066826155, + "loss": 1.5174, + "step": 3729 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.6637776494026184, + "learning_rate": 0.0010099852284211573, + "loss": 1.4728, + "step": 3730 + }, + { + "epoch": 0.39356540084388186, + "grad_norm": 0.7399925589561462, + "learning_rate": 0.0010097497210270578, + "loss": 1.4989, + "step": 3731 + }, + { + "epoch": 0.39367088607594936, + "grad_norm": 0.774714469909668, + "learning_rate": 0.0010095141845267066, + "loss": 1.5356, + "step": 3732 + }, + { + "epoch": 0.3937763713080169, + "grad_norm": 0.7326427698135376, + "learning_rate": 0.0010092786189464975, + "loss": 1.4925, + "step": 3733 + }, + { + "epoch": 0.3938818565400844, + "grad_norm": 0.7408696413040161, + "learning_rate": 0.0010090430243128259, + "loss": 1.5063, + "step": 3734 + }, + { + "epoch": 0.3939873417721519, + "grad_norm": 0.8101245760917664, + "learning_rate": 0.0010088074006520918, + "loss": 1.5164, + "step": 3735 + }, + { + "epoch": 0.39409282700421944, + "grad_norm": 0.8237268328666687, + "learning_rate": 0.0010085717479906978, + "loss": 1.5186, + "step": 3736 + }, + { + "epoch": 0.39419831223628693, + "grad_norm": 0.7061514258384705, + "learning_rate": 0.0010083360663550502, + "loss": 1.4723, + "step": 3737 + }, + { + "epoch": 0.3943037974683544, + "grad_norm": 0.9923291802406311, + "learning_rate": 0.0010081003557715583, + "loss": 1.4796, + "step": 3738 + }, + { + "epoch": 0.3944092827004219, + "grad_norm": 0.7638182640075684, + "learning_rate": 0.0010078646162666345, + "loss": 1.4576, + "step": 3739 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.7516796588897705, + "learning_rate": 0.0010076288478666944, + "loss": 1.538, + "step": 3740 + }, + { + "epoch": 0.39462025316455696, + "grad_norm": 0.7385545372962952, + "learning_rate": 0.0010073930505981573, + "loss": 1.5191, + "step": 3741 + }, + { + "epoch": 0.39472573839662445, + "grad_norm": 0.700629711151123, + "learning_rate": 0.0010071572244874456, + "loss": 1.5146, + "step": 3742 + }, + { + "epoch": 0.394831223628692, + "grad_norm": 0.7014999985694885, + "learning_rate": 0.0010069213695609845, + "loss": 1.4904, + "step": 3743 + }, + { + "epoch": 0.3949367088607595, + "grad_norm": 0.6832416653633118, + "learning_rate": 0.0010066854858452028, + "loss": 1.5493, + "step": 3744 + }, + { + "epoch": 0.395042194092827, + "grad_norm": 0.6975542902946472, + "learning_rate": 0.0010064495733665324, + "loss": 1.4641, + "step": 3745 + }, + { + "epoch": 0.39514767932489453, + "grad_norm": 0.8983390927314758, + "learning_rate": 0.0010062136321514084, + "loss": 1.5112, + "step": 3746 + }, + { + "epoch": 0.395253164556962, + "grad_norm": 0.9974279403686523, + "learning_rate": 0.0010059776622262698, + "loss": 1.5463, + "step": 3747 + }, + { + "epoch": 0.3953586497890295, + "grad_norm": 0.6589381694793701, + "learning_rate": 0.0010057416636175575, + "loss": 1.4917, + "step": 3748 + }, + { + "epoch": 0.39546413502109706, + "grad_norm": 0.9776214361190796, + "learning_rate": 0.0010055056363517162, + "loss": 1.4741, + "step": 3749 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.8347980976104736, + "learning_rate": 0.0010052695804551946, + "loss": 1.5037, + "step": 3750 + }, + { + "epoch": 0.39567510548523205, + "grad_norm": 0.7048782110214233, + "learning_rate": 0.0010050334959544438, + "loss": 1.5048, + "step": 3751 + }, + { + "epoch": 0.3957805907172996, + "grad_norm": 0.8417404294013977, + "learning_rate": 0.0010047973828759178, + "loss": 1.5161, + "step": 3752 + }, + { + "epoch": 0.3958860759493671, + "grad_norm": 0.6371940970420837, + "learning_rate": 0.0010045612412460747, + "loss": 1.5126, + "step": 3753 + }, + { + "epoch": 0.3959915611814346, + "grad_norm": 0.8002906441688538, + "learning_rate": 0.0010043250710913747, + "loss": 1.4951, + "step": 3754 + }, + { + "epoch": 0.39609704641350213, + "grad_norm": 0.656670093536377, + "learning_rate": 0.0010040888724382828, + "loss": 1.4901, + "step": 3755 + }, + { + "epoch": 0.3962025316455696, + "grad_norm": 0.7574936747550964, + "learning_rate": 0.0010038526453132655, + "loss": 1.5097, + "step": 3756 + }, + { + "epoch": 0.3963080168776371, + "grad_norm": 0.6625149846076965, + "learning_rate": 0.0010036163897427937, + "loss": 1.4971, + "step": 3757 + }, + { + "epoch": 0.39641350210970466, + "grad_norm": 0.7396482825279236, + "learning_rate": 0.0010033801057533404, + "loss": 1.4879, + "step": 3758 + }, + { + "epoch": 0.39651898734177216, + "grad_norm": 0.7783925533294678, + "learning_rate": 0.001003143793371383, + "loss": 1.4608, + "step": 3759 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.7088233232498169, + "learning_rate": 0.0010029074526234014, + "loss": 1.4738, + "step": 3760 + }, + { + "epoch": 0.3967299578059072, + "grad_norm": 0.7218544483184814, + "learning_rate": 0.0010026710835358786, + "loss": 1.5171, + "step": 3761 + }, + { + "epoch": 0.3968354430379747, + "grad_norm": 0.7047997117042542, + "learning_rate": 0.0010024346861353007, + "loss": 1.4969, + "step": 3762 + }, + { + "epoch": 0.3969409282700422, + "grad_norm": 0.7876434922218323, + "learning_rate": 0.0010021982604481575, + "loss": 1.4949, + "step": 3763 + }, + { + "epoch": 0.39704641350210973, + "grad_norm": 0.6774751543998718, + "learning_rate": 0.001001961806500942, + "loss": 1.4778, + "step": 3764 + }, + { + "epoch": 0.3971518987341772, + "grad_norm": 0.6666693091392517, + "learning_rate": 0.0010017253243201495, + "loss": 1.4934, + "step": 3765 + }, + { + "epoch": 0.3972573839662447, + "grad_norm": 0.6642735600471497, + "learning_rate": 0.0010014888139322792, + "loss": 1.5009, + "step": 3766 + }, + { + "epoch": 0.39736286919831226, + "grad_norm": 0.7475911378860474, + "learning_rate": 0.001001252275363833, + "loss": 1.54, + "step": 3767 + }, + { + "epoch": 0.39746835443037976, + "grad_norm": 0.6491537690162659, + "learning_rate": 0.0010010157086413167, + "loss": 1.5532, + "step": 3768 + }, + { + "epoch": 0.39757383966244725, + "grad_norm": 0.7151340246200562, + "learning_rate": 0.0010007791137912386, + "loss": 1.5021, + "step": 3769 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.6669960618019104, + "learning_rate": 0.0010005424908401104, + "loss": 1.5021, + "step": 3770 + }, + { + "epoch": 0.3977848101265823, + "grad_norm": 0.6901030540466309, + "learning_rate": 0.0010003058398144464, + "loss": 1.4677, + "step": 3771 + }, + { + "epoch": 0.3978902953586498, + "grad_norm": 0.7277828454971313, + "learning_rate": 0.0010000691607407652, + "loss": 1.5048, + "step": 3772 + }, + { + "epoch": 0.3979957805907173, + "grad_norm": 0.6544051170349121, + "learning_rate": 0.0009998324536455877, + "loss": 1.5143, + "step": 3773 + }, + { + "epoch": 0.3981012658227848, + "grad_norm": 0.6890218257904053, + "learning_rate": 0.0009995957185554378, + "loss": 1.5219, + "step": 3774 + }, + { + "epoch": 0.3982067510548523, + "grad_norm": 0.6999531984329224, + "learning_rate": 0.000999358955496843, + "loss": 1.4923, + "step": 3775 + }, + { + "epoch": 0.3983122362869198, + "grad_norm": 0.6260866522789001, + "learning_rate": 0.000999122164496334, + "loss": 1.4839, + "step": 3776 + }, + { + "epoch": 0.39841772151898736, + "grad_norm": 0.7245714664459229, + "learning_rate": 0.0009988853455804442, + "loss": 1.4738, + "step": 3777 + }, + { + "epoch": 0.39852320675105485, + "grad_norm": 0.6955580711364746, + "learning_rate": 0.0009986484987757102, + "loss": 1.4997, + "step": 3778 + }, + { + "epoch": 0.39862869198312234, + "grad_norm": 0.6214317679405212, + "learning_rate": 0.0009984116241086723, + "loss": 1.5073, + "step": 3779 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.6899923086166382, + "learning_rate": 0.0009981747216058728, + "loss": 1.4855, + "step": 3780 + }, + { + "epoch": 0.3988396624472574, + "grad_norm": 0.6600906252861023, + "learning_rate": 0.0009979377912938587, + "loss": 1.4738, + "step": 3781 + }, + { + "epoch": 0.3989451476793249, + "grad_norm": 0.6364150047302246, + "learning_rate": 0.0009977008331991785, + "loss": 1.4874, + "step": 3782 + }, + { + "epoch": 0.3990506329113924, + "grad_norm": 0.6594933867454529, + "learning_rate": 0.000997463847348385, + "loss": 1.525, + "step": 3783 + }, + { + "epoch": 0.3991561181434599, + "grad_norm": 0.6314538717269897, + "learning_rate": 0.000997226833768033, + "loss": 1.4686, + "step": 3784 + }, + { + "epoch": 0.3992616033755274, + "grad_norm": 0.639167308807373, + "learning_rate": 0.0009969897924846818, + "loss": 1.5319, + "step": 3785 + }, + { + "epoch": 0.39936708860759496, + "grad_norm": 0.6788768172264099, + "learning_rate": 0.0009967527235248928, + "loss": 1.5238, + "step": 3786 + }, + { + "epoch": 0.39947257383966245, + "grad_norm": 0.7103390097618103, + "learning_rate": 0.0009965156269152308, + "loss": 1.4698, + "step": 3787 + }, + { + "epoch": 0.39957805907172994, + "grad_norm": 0.7113536596298218, + "learning_rate": 0.0009962785026822632, + "loss": 1.5268, + "step": 3788 + }, + { + "epoch": 0.3996835443037975, + "grad_norm": 0.6767099499702454, + "learning_rate": 0.0009960413508525617, + "loss": 1.4975, + "step": 3789 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.7818514108657837, + "learning_rate": 0.0009958041714526998, + "loss": 1.5048, + "step": 3790 + }, + { + "epoch": 0.3998945147679325, + "grad_norm": 0.7509220242500305, + "learning_rate": 0.0009955669645092546, + "loss": 1.4967, + "step": 3791 + }, + { + "epoch": 0.4, + "grad_norm": 0.8161002397537231, + "learning_rate": 0.0009953297300488069, + "loss": 1.475, + "step": 3792 + }, + { + "epoch": 0.4001054852320675, + "grad_norm": 1.0982900857925415, + "learning_rate": 0.0009950924680979393, + "loss": 1.5423, + "step": 3793 + }, + { + "epoch": 0.400210970464135, + "grad_norm": 0.6720527410507202, + "learning_rate": 0.0009948551786832386, + "loss": 1.5111, + "step": 3794 + }, + { + "epoch": 0.40031645569620256, + "grad_norm": 0.9639621376991272, + "learning_rate": 0.0009946178618312942, + "loss": 1.5185, + "step": 3795 + }, + { + "epoch": 0.40042194092827005, + "grad_norm": 0.6998013257980347, + "learning_rate": 0.0009943805175686986, + "loss": 1.5341, + "step": 3796 + }, + { + "epoch": 0.40052742616033754, + "grad_norm": 0.8928141593933105, + "learning_rate": 0.0009941431459220475, + "loss": 1.4901, + "step": 3797 + }, + { + "epoch": 0.4006329113924051, + "grad_norm": 0.7210012674331665, + "learning_rate": 0.0009939057469179394, + "loss": 1.5337, + "step": 3798 + }, + { + "epoch": 0.4007383966244726, + "grad_norm": 0.6916449069976807, + "learning_rate": 0.0009936683205829762, + "loss": 1.465, + "step": 3799 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.6904787421226501, + "learning_rate": 0.0009934308669437627, + "loss": 1.4923, + "step": 3800 + }, + { + "epoch": 0.4009493670886076, + "grad_norm": 0.6418073177337646, + "learning_rate": 0.0009931933860269063, + "loss": 1.4583, + "step": 3801 + }, + { + "epoch": 0.4010548523206751, + "grad_norm": 0.675815761089325, + "learning_rate": 0.0009929558778590188, + "loss": 1.4748, + "step": 3802 + }, + { + "epoch": 0.4011603375527426, + "grad_norm": 0.7131190896034241, + "learning_rate": 0.0009927183424667135, + "loss": 1.5432, + "step": 3803 + }, + { + "epoch": 0.4012658227848101, + "grad_norm": 0.630483865737915, + "learning_rate": 0.0009924807798766077, + "loss": 1.5073, + "step": 3804 + }, + { + "epoch": 0.40137130801687765, + "grad_norm": 0.7845519781112671, + "learning_rate": 0.0009922431901153213, + "loss": 1.4659, + "step": 3805 + }, + { + "epoch": 0.40147679324894514, + "grad_norm": 0.7019419074058533, + "learning_rate": 0.0009920055732094775, + "loss": 1.4497, + "step": 3806 + }, + { + "epoch": 0.40158227848101263, + "grad_norm": 0.664476752281189, + "learning_rate": 0.0009917679291857027, + "loss": 1.4787, + "step": 3807 + }, + { + "epoch": 0.4016877637130802, + "grad_norm": 0.616468071937561, + "learning_rate": 0.0009915302580706256, + "loss": 1.4907, + "step": 3808 + }, + { + "epoch": 0.4017932489451477, + "grad_norm": 0.6713626384735107, + "learning_rate": 0.0009912925598908788, + "loss": 1.5202, + "step": 3809 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.6570860743522644, + "learning_rate": 0.0009910548346730972, + "loss": 1.535, + "step": 3810 + }, + { + "epoch": 0.4020042194092827, + "grad_norm": 0.6332383751869202, + "learning_rate": 0.00099081708244392, + "loss": 1.497, + "step": 3811 + }, + { + "epoch": 0.4021097046413502, + "grad_norm": 0.686872124671936, + "learning_rate": 0.0009905793032299875, + "loss": 1.4696, + "step": 3812 + }, + { + "epoch": 0.4022151898734177, + "grad_norm": 0.6415073871612549, + "learning_rate": 0.0009903414970579443, + "loss": 1.475, + "step": 3813 + }, + { + "epoch": 0.40232067510548525, + "grad_norm": 0.646264374256134, + "learning_rate": 0.000990103663954438, + "loss": 1.4862, + "step": 3814 + }, + { + "epoch": 0.40242616033755274, + "grad_norm": 0.6569414138793945, + "learning_rate": 0.000989865803946119, + "loss": 1.5123, + "step": 3815 + }, + { + "epoch": 0.40253164556962023, + "grad_norm": 0.7092475295066833, + "learning_rate": 0.0009896279170596406, + "loss": 1.4801, + "step": 3816 + }, + { + "epoch": 0.4026371308016878, + "grad_norm": 0.6175495386123657, + "learning_rate": 0.0009893900033216593, + "loss": 1.4635, + "step": 3817 + }, + { + "epoch": 0.4027426160337553, + "grad_norm": 0.6811845898628235, + "learning_rate": 0.0009891520627588342, + "loss": 1.486, + "step": 3818 + }, + { + "epoch": 0.40284810126582277, + "grad_norm": 0.6914072632789612, + "learning_rate": 0.000988914095397828, + "loss": 1.5052, + "step": 3819 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.6516214609146118, + "learning_rate": 0.0009886761012653062, + "loss": 1.466, + "step": 3820 + }, + { + "epoch": 0.4030590717299578, + "grad_norm": 0.6187295913696289, + "learning_rate": 0.000988438080387937, + "loss": 1.4861, + "step": 3821 + }, + { + "epoch": 0.4031645569620253, + "grad_norm": 0.6605997085571289, + "learning_rate": 0.000988200032792392, + "loss": 1.4502, + "step": 3822 + }, + { + "epoch": 0.40327004219409285, + "grad_norm": 0.713379979133606, + "learning_rate": 0.0009879619585053455, + "loss": 1.4973, + "step": 3823 + }, + { + "epoch": 0.40337552742616034, + "grad_norm": 0.6073020696640015, + "learning_rate": 0.0009877238575534749, + "loss": 1.49, + "step": 3824 + }, + { + "epoch": 0.40348101265822783, + "grad_norm": 0.7224045395851135, + "learning_rate": 0.0009874857299634605, + "loss": 1.496, + "step": 3825 + }, + { + "epoch": 0.4035864978902954, + "grad_norm": 0.6494831442832947, + "learning_rate": 0.0009872475757619862, + "loss": 1.5138, + "step": 3826 + }, + { + "epoch": 0.4036919831223629, + "grad_norm": 0.6609768271446228, + "learning_rate": 0.000987009394975738, + "loss": 1.5097, + "step": 3827 + }, + { + "epoch": 0.40379746835443037, + "grad_norm": 0.6965026259422302, + "learning_rate": 0.0009867711876314052, + "loss": 1.4778, + "step": 3828 + }, + { + "epoch": 0.4039029535864979, + "grad_norm": 0.7466182112693787, + "learning_rate": 0.00098653295375568, + "loss": 1.48, + "step": 3829 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.7008274793624878, + "learning_rate": 0.000986294693375258, + "loss": 1.4665, + "step": 3830 + }, + { + "epoch": 0.4041139240506329, + "grad_norm": 0.7171580195426941, + "learning_rate": 0.0009860564065168375, + "loss": 1.5062, + "step": 3831 + }, + { + "epoch": 0.40421940928270045, + "grad_norm": 0.6864311695098877, + "learning_rate": 0.0009858180932071192, + "loss": 1.5147, + "step": 3832 + }, + { + "epoch": 0.40432489451476794, + "grad_norm": 0.8126797676086426, + "learning_rate": 0.000985579753472808, + "loss": 1.4963, + "step": 3833 + }, + { + "epoch": 0.40443037974683543, + "grad_norm": 0.9178141951560974, + "learning_rate": 0.0009853413873406104, + "loss": 1.4913, + "step": 3834 + }, + { + "epoch": 0.4045358649789029, + "grad_norm": 0.634393572807312, + "learning_rate": 0.000985102994837237, + "loss": 1.4881, + "step": 3835 + }, + { + "epoch": 0.4046413502109705, + "grad_norm": 0.7300078272819519, + "learning_rate": 0.0009848645759894005, + "loss": 1.5123, + "step": 3836 + }, + { + "epoch": 0.40474683544303797, + "grad_norm": 0.6550975441932678, + "learning_rate": 0.0009846261308238177, + "loss": 1.4694, + "step": 3837 + }, + { + "epoch": 0.40485232067510546, + "grad_norm": 0.7340750098228455, + "learning_rate": 0.0009843876593672064, + "loss": 1.4966, + "step": 3838 + }, + { + "epoch": 0.404957805907173, + "grad_norm": 0.6887595057487488, + "learning_rate": 0.0009841491616462892, + "loss": 1.5486, + "step": 3839 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.6742536425590515, + "learning_rate": 0.000983910637687791, + "loss": 1.5334, + "step": 3840 + }, + { + "epoch": 0.405168776371308, + "grad_norm": 0.6747508645057678, + "learning_rate": 0.0009836720875184394, + "loss": 1.5149, + "step": 3841 + }, + { + "epoch": 0.40527426160337554, + "grad_norm": 0.7518059015274048, + "learning_rate": 0.0009834335111649655, + "loss": 1.4836, + "step": 3842 + }, + { + "epoch": 0.40537974683544303, + "grad_norm": 0.6428248286247253, + "learning_rate": 0.0009831949086541024, + "loss": 1.4857, + "step": 3843 + }, + { + "epoch": 0.4054852320675105, + "grad_norm": 0.7333281636238098, + "learning_rate": 0.0009829562800125868, + "loss": 1.4792, + "step": 3844 + }, + { + "epoch": 0.4055907172995781, + "grad_norm": 0.7595979571342468, + "learning_rate": 0.0009827176252671587, + "loss": 1.51, + "step": 3845 + }, + { + "epoch": 0.40569620253164557, + "grad_norm": 0.734634280204773, + "learning_rate": 0.0009824789444445603, + "loss": 1.4844, + "step": 3846 + }, + { + "epoch": 0.40580168776371306, + "grad_norm": 0.6580454707145691, + "learning_rate": 0.0009822402375715366, + "loss": 1.4619, + "step": 3847 + }, + { + "epoch": 0.4059071729957806, + "grad_norm": 0.6857550144195557, + "learning_rate": 0.0009820015046748366, + "loss": 1.4856, + "step": 3848 + }, + { + "epoch": 0.4060126582278481, + "grad_norm": 0.628856897354126, + "learning_rate": 0.0009817627457812106, + "loss": 1.4747, + "step": 3849 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.7217966318130493, + "learning_rate": 0.0009815239609174138, + "loss": 1.4796, + "step": 3850 + }, + { + "epoch": 0.40622362869198314, + "grad_norm": 0.6829034090042114, + "learning_rate": 0.0009812851501102024, + "loss": 1.5415, + "step": 3851 + }, + { + "epoch": 0.40632911392405063, + "grad_norm": 0.7084047794342041, + "learning_rate": 0.0009810463133863368, + "loss": 1.5068, + "step": 3852 + }, + { + "epoch": 0.4064345991561181, + "grad_norm": 0.6632786989212036, + "learning_rate": 0.0009808074507725794, + "loss": 1.5123, + "step": 3853 + }, + { + "epoch": 0.4065400843881857, + "grad_norm": 0.7035054564476013, + "learning_rate": 0.0009805685622956966, + "loss": 1.5057, + "step": 3854 + }, + { + "epoch": 0.40664556962025317, + "grad_norm": 0.6561732888221741, + "learning_rate": 0.0009803296479824564, + "loss": 1.4973, + "step": 3855 + }, + { + "epoch": 0.40675105485232066, + "grad_norm": 0.6469947695732117, + "learning_rate": 0.0009800907078596308, + "loss": 1.5155, + "step": 3856 + }, + { + "epoch": 0.4068565400843882, + "grad_norm": 0.6417824625968933, + "learning_rate": 0.000979851741953994, + "loss": 1.4852, + "step": 3857 + }, + { + "epoch": 0.4069620253164557, + "grad_norm": 0.6628650426864624, + "learning_rate": 0.0009796127502923232, + "loss": 1.4915, + "step": 3858 + }, + { + "epoch": 0.4070675105485232, + "grad_norm": 0.6576152443885803, + "learning_rate": 0.000979373732901399, + "loss": 1.4769, + "step": 3859 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.9099237322807312, + "learning_rate": 0.0009791346898080043, + "loss": 1.482, + "step": 3860 + }, + { + "epoch": 0.40727848101265823, + "grad_norm": 0.7125291228294373, + "learning_rate": 0.000978895621038925, + "loss": 1.4845, + "step": 3861 + }, + { + "epoch": 0.4073839662447257, + "grad_norm": 0.6084980368614197, + "learning_rate": 0.0009786565266209496, + "loss": 1.4929, + "step": 3862 + }, + { + "epoch": 0.4074894514767933, + "grad_norm": 0.6241394281387329, + "learning_rate": 0.0009784174065808706, + "loss": 1.5119, + "step": 3863 + }, + { + "epoch": 0.40759493670886077, + "grad_norm": 0.6917505264282227, + "learning_rate": 0.0009781782609454821, + "loss": 1.4568, + "step": 3864 + }, + { + "epoch": 0.40770042194092826, + "grad_norm": 0.8556948304176331, + "learning_rate": 0.000977939089741582, + "loss": 1.471, + "step": 3865 + }, + { + "epoch": 0.4078059071729958, + "grad_norm": 0.688615620136261, + "learning_rate": 0.0009776998929959695, + "loss": 1.5215, + "step": 3866 + }, + { + "epoch": 0.4079113924050633, + "grad_norm": 0.6056479215621948, + "learning_rate": 0.0009774606707354493, + "loss": 1.4714, + "step": 3867 + }, + { + "epoch": 0.4080168776371308, + "grad_norm": 0.6936322450637817, + "learning_rate": 0.0009772214229868265, + "loss": 1.4783, + "step": 3868 + }, + { + "epoch": 0.4081223628691983, + "grad_norm": 0.6176720261573792, + "learning_rate": 0.0009769821497769102, + "loss": 1.5223, + "step": 3869 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.7380179166793823, + "learning_rate": 0.0009767428511325122, + "loss": 1.5092, + "step": 3870 + }, + { + "epoch": 0.4083333333333333, + "grad_norm": 0.70152348279953, + "learning_rate": 0.000976503527080447, + "loss": 1.4764, + "step": 3871 + }, + { + "epoch": 0.4084388185654008, + "grad_norm": 0.6866744756698608, + "learning_rate": 0.0009762641776475322, + "loss": 1.5299, + "step": 3872 + }, + { + "epoch": 0.40854430379746837, + "grad_norm": 0.7704325914382935, + "learning_rate": 0.0009760248028605882, + "loss": 1.4852, + "step": 3873 + }, + { + "epoch": 0.40864978902953586, + "grad_norm": 0.7037012577056885, + "learning_rate": 0.0009757854027464377, + "loss": 1.4992, + "step": 3874 + }, + { + "epoch": 0.40875527426160335, + "grad_norm": 0.711470365524292, + "learning_rate": 0.000975545977331907, + "loss": 1.4952, + "step": 3875 + }, + { + "epoch": 0.4088607594936709, + "grad_norm": 0.7011661529541016, + "learning_rate": 0.0009753065266438249, + "loss": 1.4596, + "step": 3876 + }, + { + "epoch": 0.4089662447257384, + "grad_norm": 0.6293154954910278, + "learning_rate": 0.0009750670507090233, + "loss": 1.4479, + "step": 3877 + }, + { + "epoch": 0.4090717299578059, + "grad_norm": 0.6428210139274597, + "learning_rate": 0.000974827549554336, + "loss": 1.453, + "step": 3878 + }, + { + "epoch": 0.40917721518987343, + "grad_norm": 0.7229017615318298, + "learning_rate": 0.0009745880232066007, + "loss": 1.499, + "step": 3879 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.6591872572898865, + "learning_rate": 0.0009743484716926576, + "loss": 1.4556, + "step": 3880 + }, + { + "epoch": 0.4093881856540084, + "grad_norm": 0.8316524624824524, + "learning_rate": 0.0009741088950393497, + "loss": 1.5051, + "step": 3881 + }, + { + "epoch": 0.40949367088607597, + "grad_norm": 0.6798115372657776, + "learning_rate": 0.0009738692932735225, + "loss": 1.4772, + "step": 3882 + }, + { + "epoch": 0.40959915611814346, + "grad_norm": 0.7981741428375244, + "learning_rate": 0.0009736296664220247, + "loss": 1.4755, + "step": 3883 + }, + { + "epoch": 0.40970464135021095, + "grad_norm": 0.961022675037384, + "learning_rate": 0.0009733900145117075, + "loss": 1.4693, + "step": 3884 + }, + { + "epoch": 0.4098101265822785, + "grad_norm": 0.6423112154006958, + "learning_rate": 0.0009731503375694253, + "loss": 1.47, + "step": 3885 + }, + { + "epoch": 0.409915611814346, + "grad_norm": 0.8805918097496033, + "learning_rate": 0.0009729106356220352, + "loss": 1.4974, + "step": 3886 + }, + { + "epoch": 0.4100210970464135, + "grad_norm": 0.7434242963790894, + "learning_rate": 0.0009726709086963967, + "loss": 1.4866, + "step": 3887 + }, + { + "epoch": 0.41012658227848103, + "grad_norm": 0.7950697541236877, + "learning_rate": 0.0009724311568193726, + "loss": 1.5066, + "step": 3888 + }, + { + "epoch": 0.4102320675105485, + "grad_norm": 0.9378272294998169, + "learning_rate": 0.0009721913800178281, + "loss": 1.4697, + "step": 3889 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.6593608856201172, + "learning_rate": 0.0009719515783186319, + "loss": 1.4465, + "step": 3890 + }, + { + "epoch": 0.41044303797468357, + "grad_norm": 0.7284212708473206, + "learning_rate": 0.0009717117517486543, + "loss": 1.4797, + "step": 3891 + }, + { + "epoch": 0.41054852320675106, + "grad_norm": 0.7050490975379944, + "learning_rate": 0.0009714719003347693, + "loss": 1.5199, + "step": 3892 + }, + { + "epoch": 0.41065400843881855, + "grad_norm": 0.6478070020675659, + "learning_rate": 0.0009712320241038537, + "loss": 1.5019, + "step": 3893 + }, + { + "epoch": 0.4107594936708861, + "grad_norm": 0.688693106174469, + "learning_rate": 0.0009709921230827865, + "loss": 1.4805, + "step": 3894 + }, + { + "epoch": 0.4108649789029536, + "grad_norm": 0.6141259670257568, + "learning_rate": 0.00097075219729845, + "loss": 1.4897, + "step": 3895 + }, + { + "epoch": 0.4109704641350211, + "grad_norm": 0.6613912582397461, + "learning_rate": 0.0009705122467777292, + "loss": 1.4393, + "step": 3896 + }, + { + "epoch": 0.41107594936708863, + "grad_norm": 0.681929886341095, + "learning_rate": 0.0009702722715475113, + "loss": 1.4714, + "step": 3897 + }, + { + "epoch": 0.4111814345991561, + "grad_norm": 0.6334523558616638, + "learning_rate": 0.000970032271634687, + "loss": 1.4834, + "step": 3898 + }, + { + "epoch": 0.4112869198312236, + "grad_norm": 0.7424808740615845, + "learning_rate": 0.0009697922470661497, + "loss": 1.4602, + "step": 3899 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 1.0221788883209229, + "learning_rate": 0.0009695521978687951, + "loss": 1.497, + "step": 3900 + }, + { + "epoch": 0.41149789029535866, + "grad_norm": 0.649186372756958, + "learning_rate": 0.0009693121240695216, + "loss": 1.4609, + "step": 3901 + }, + { + "epoch": 0.41160337552742615, + "grad_norm": 1.0143144130706787, + "learning_rate": 0.0009690720256952314, + "loss": 1.5294, + "step": 3902 + }, + { + "epoch": 0.41170886075949364, + "grad_norm": 0.8133302330970764, + "learning_rate": 0.0009688319027728282, + "loss": 1.4831, + "step": 3903 + }, + { + "epoch": 0.4118143459915612, + "grad_norm": 0.8264775276184082, + "learning_rate": 0.0009685917553292192, + "loss": 1.4615, + "step": 3904 + }, + { + "epoch": 0.4119198312236287, + "grad_norm": 0.9364267587661743, + "learning_rate": 0.0009683515833913137, + "loss": 1.4774, + "step": 3905 + }, + { + "epoch": 0.4120253164556962, + "grad_norm": 0.6471055746078491, + "learning_rate": 0.0009681113869860247, + "loss": 1.4874, + "step": 3906 + }, + { + "epoch": 0.4121308016877637, + "grad_norm": 0.8584084510803223, + "learning_rate": 0.0009678711661402672, + "loss": 1.4492, + "step": 3907 + }, + { + "epoch": 0.4122362869198312, + "grad_norm": 0.7101282477378845, + "learning_rate": 0.0009676309208809592, + "loss": 1.4859, + "step": 3908 + }, + { + "epoch": 0.4123417721518987, + "grad_norm": 0.8547651767730713, + "learning_rate": 0.0009673906512350213, + "loss": 1.4849, + "step": 3909 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.7027769684791565, + "learning_rate": 0.0009671503572293767, + "loss": 1.523, + "step": 3910 + }, + { + "epoch": 0.41255274261603375, + "grad_norm": 0.8805189728736877, + "learning_rate": 0.000966910038890952, + "loss": 1.486, + "step": 3911 + }, + { + "epoch": 0.41265822784810124, + "grad_norm": 0.7105304598808289, + "learning_rate": 0.0009666696962466757, + "loss": 1.535, + "step": 3912 + }, + { + "epoch": 0.4127637130801688, + "grad_norm": 0.7189561128616333, + "learning_rate": 0.0009664293293234795, + "loss": 1.4867, + "step": 3913 + }, + { + "epoch": 0.4128691983122363, + "grad_norm": 0.7111444473266602, + "learning_rate": 0.0009661889381482977, + "loss": 1.4236, + "step": 3914 + }, + { + "epoch": 0.4129746835443038, + "grad_norm": 0.6232699751853943, + "learning_rate": 0.0009659485227480676, + "loss": 1.4726, + "step": 3915 + }, + { + "epoch": 0.4130801687763713, + "grad_norm": 0.6935182809829712, + "learning_rate": 0.0009657080831497284, + "loss": 1.4794, + "step": 3916 + }, + { + "epoch": 0.4131856540084388, + "grad_norm": 0.7188730835914612, + "learning_rate": 0.0009654676193802232, + "loss": 1.4521, + "step": 3917 + }, + { + "epoch": 0.4132911392405063, + "grad_norm": 0.6810736060142517, + "learning_rate": 0.0009652271314664966, + "loss": 1.5029, + "step": 3918 + }, + { + "epoch": 0.41339662447257386, + "grad_norm": 0.821225106716156, + "learning_rate": 0.0009649866194354967, + "loss": 1.4757, + "step": 3919 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.6229616403579712, + "learning_rate": 0.0009647460833141742, + "loss": 1.5001, + "step": 3920 + }, + { + "epoch": 0.41360759493670884, + "grad_norm": 0.7813031077384949, + "learning_rate": 0.0009645055231294823, + "loss": 1.4633, + "step": 3921 + }, + { + "epoch": 0.4137130801687764, + "grad_norm": 0.7044227123260498, + "learning_rate": 0.0009642649389083768, + "loss": 1.5292, + "step": 3922 + }, + { + "epoch": 0.4138185654008439, + "grad_norm": 0.7708126902580261, + "learning_rate": 0.0009640243306778162, + "loss": 1.5066, + "step": 3923 + }, + { + "epoch": 0.4139240506329114, + "grad_norm": 0.6936272382736206, + "learning_rate": 0.0009637836984647627, + "loss": 1.4738, + "step": 3924 + }, + { + "epoch": 0.4140295358649789, + "grad_norm": 0.8405532836914062, + "learning_rate": 0.0009635430422961794, + "loss": 1.4481, + "step": 3925 + }, + { + "epoch": 0.4141350210970464, + "grad_norm": 0.6962898969650269, + "learning_rate": 0.0009633023621990334, + "loss": 1.4897, + "step": 3926 + }, + { + "epoch": 0.4142405063291139, + "grad_norm": 0.7667049765586853, + "learning_rate": 0.000963061658200294, + "loss": 1.477, + "step": 3927 + }, + { + "epoch": 0.41434599156118146, + "grad_norm": 0.7324779629707336, + "learning_rate": 0.0009628209303269335, + "loss": 1.4887, + "step": 3928 + }, + { + "epoch": 0.41445147679324895, + "grad_norm": 0.6930740475654602, + "learning_rate": 0.0009625801786059267, + "loss": 1.498, + "step": 3929 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.7282207608222961, + "learning_rate": 0.0009623394030642507, + "loss": 1.4913, + "step": 3930 + }, + { + "epoch": 0.414662447257384, + "grad_norm": 0.6427899599075317, + "learning_rate": 0.0009620986037288858, + "loss": 1.4623, + "step": 3931 + }, + { + "epoch": 0.4147679324894515, + "grad_norm": 0.6451544165611267, + "learning_rate": 0.0009618577806268147, + "loss": 1.4684, + "step": 3932 + }, + { + "epoch": 0.414873417721519, + "grad_norm": 0.6878176927566528, + "learning_rate": 0.0009616169337850229, + "loss": 1.476, + "step": 3933 + }, + { + "epoch": 0.41497890295358647, + "grad_norm": 0.6928260326385498, + "learning_rate": 0.0009613760632304985, + "loss": 1.4919, + "step": 3934 + }, + { + "epoch": 0.415084388185654, + "grad_norm": 0.7727343440055847, + "learning_rate": 0.0009611351689902321, + "loss": 1.5077, + "step": 3935 + }, + { + "epoch": 0.4151898734177215, + "grad_norm": 0.6554715633392334, + "learning_rate": 0.000960894251091217, + "loss": 1.4768, + "step": 3936 + }, + { + "epoch": 0.415295358649789, + "grad_norm": 0.6783080101013184, + "learning_rate": 0.0009606533095604499, + "loss": 1.5151, + "step": 3937 + }, + { + "epoch": 0.41540084388185655, + "grad_norm": 0.7093545794487, + "learning_rate": 0.0009604123444249288, + "loss": 1.4409, + "step": 3938 + }, + { + "epoch": 0.41550632911392404, + "grad_norm": 0.6879766583442688, + "learning_rate": 0.0009601713557116554, + "loss": 1.4973, + "step": 3939 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.8561065196990967, + "learning_rate": 0.0009599303434476334, + "loss": 1.4861, + "step": 3940 + }, + { + "epoch": 0.4157172995780591, + "grad_norm": 0.7083315849304199, + "learning_rate": 0.0009596893076598698, + "loss": 1.4859, + "step": 3941 + }, + { + "epoch": 0.4158227848101266, + "grad_norm": 0.8839803338050842, + "learning_rate": 0.0009594482483753736, + "loss": 1.5161, + "step": 3942 + }, + { + "epoch": 0.41592827004219407, + "grad_norm": 0.7343817949295044, + "learning_rate": 0.0009592071656211568, + "loss": 1.4873, + "step": 3943 + }, + { + "epoch": 0.4160337552742616, + "grad_norm": 0.8533557653427124, + "learning_rate": 0.0009589660594242338, + "loss": 1.5074, + "step": 3944 + }, + { + "epoch": 0.4161392405063291, + "grad_norm": 0.7040666341781616, + "learning_rate": 0.0009587249298116219, + "loss": 1.4666, + "step": 3945 + }, + { + "epoch": 0.4162447257383966, + "grad_norm": 0.9286083579063416, + "learning_rate": 0.0009584837768103408, + "loss": 1.4497, + "step": 3946 + }, + { + "epoch": 0.41635021097046415, + "grad_norm": 0.7845255136489868, + "learning_rate": 0.0009582426004474129, + "loss": 1.5169, + "step": 3947 + }, + { + "epoch": 0.41645569620253164, + "grad_norm": 0.7906997203826904, + "learning_rate": 0.0009580014007498634, + "loss": 1.5032, + "step": 3948 + }, + { + "epoch": 0.41656118143459914, + "grad_norm": 1.0064966678619385, + "learning_rate": 0.0009577601777447194, + "loss": 1.474, + "step": 3949 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.6164678931236267, + "learning_rate": 0.0009575189314590118, + "loss": 1.4948, + "step": 3950 + }, + { + "epoch": 0.4167721518987342, + "grad_norm": 0.9484306573867798, + "learning_rate": 0.0009572776619197731, + "loss": 1.5023, + "step": 3951 + }, + { + "epoch": 0.41687763713080167, + "grad_norm": 0.6745089292526245, + "learning_rate": 0.0009570363691540387, + "loss": 1.4852, + "step": 3952 + }, + { + "epoch": 0.4169831223628692, + "grad_norm": 0.7895423769950867, + "learning_rate": 0.0009567950531888469, + "loss": 1.5257, + "step": 3953 + }, + { + "epoch": 0.4170886075949367, + "grad_norm": 0.7679651975631714, + "learning_rate": 0.0009565537140512381, + "loss": 1.4934, + "step": 3954 + }, + { + "epoch": 0.4171940928270042, + "grad_norm": 0.6514753103256226, + "learning_rate": 0.0009563123517682559, + "loss": 1.478, + "step": 3955 + }, + { + "epoch": 0.41729957805907175, + "grad_norm": 0.7044059634208679, + "learning_rate": 0.0009560709663669456, + "loss": 1.4601, + "step": 3956 + }, + { + "epoch": 0.41740506329113924, + "grad_norm": 0.6250731348991394, + "learning_rate": 0.0009558295578743559, + "loss": 1.4733, + "step": 3957 + }, + { + "epoch": 0.41751054852320674, + "grad_norm": 0.7139929533004761, + "learning_rate": 0.0009555881263175381, + "loss": 1.5114, + "step": 3958 + }, + { + "epoch": 0.4176160337552743, + "grad_norm": 0.6867349743843079, + "learning_rate": 0.0009553466717235456, + "loss": 1.4514, + "step": 3959 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.7019721269607544, + "learning_rate": 0.0009551051941194346, + "loss": 1.496, + "step": 3960 + }, + { + "epoch": 0.41782700421940927, + "grad_norm": 0.6934066414833069, + "learning_rate": 0.0009548636935322639, + "loss": 1.4588, + "step": 3961 + }, + { + "epoch": 0.4179324894514768, + "grad_norm": 0.7383832931518555, + "learning_rate": 0.0009546221699890945, + "loss": 1.4613, + "step": 3962 + }, + { + "epoch": 0.4180379746835443, + "grad_norm": 0.7192319631576538, + "learning_rate": 0.0009543806235169909, + "loss": 1.4685, + "step": 3963 + }, + { + "epoch": 0.4181434599156118, + "grad_norm": 0.9973915219306946, + "learning_rate": 0.0009541390541430192, + "loss": 1.5109, + "step": 3964 + }, + { + "epoch": 0.41824894514767935, + "grad_norm": 0.6533644199371338, + "learning_rate": 0.0009538974618942486, + "loss": 1.5052, + "step": 3965 + }, + { + "epoch": 0.41835443037974684, + "grad_norm": 0.7452842593193054, + "learning_rate": 0.0009536558467977505, + "loss": 1.4659, + "step": 3966 + }, + { + "epoch": 0.41845991561181434, + "grad_norm": 0.6469925045967102, + "learning_rate": 0.0009534142088805994, + "loss": 1.4803, + "step": 3967 + }, + { + "epoch": 0.41856540084388183, + "grad_norm": 1.0319989919662476, + "learning_rate": 0.0009531725481698719, + "loss": 1.4865, + "step": 3968 + }, + { + "epoch": 0.4186708860759494, + "grad_norm": 0.6818112730979919, + "learning_rate": 0.0009529308646926473, + "loss": 1.4972, + "step": 3969 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.8988183736801147, + "learning_rate": 0.0009526891584760071, + "loss": 1.4665, + "step": 3970 + }, + { + "epoch": 0.41888185654008436, + "grad_norm": 0.7875445485115051, + "learning_rate": 0.0009524474295470362, + "loss": 1.5002, + "step": 3971 + }, + { + "epoch": 0.4189873417721519, + "grad_norm": 0.7798680663108826, + "learning_rate": 0.0009522056779328214, + "loss": 1.4744, + "step": 3972 + }, + { + "epoch": 0.4190928270042194, + "grad_norm": 0.826404869556427, + "learning_rate": 0.0009519639036604522, + "loss": 1.5132, + "step": 3973 + }, + { + "epoch": 0.4191983122362869, + "grad_norm": 0.6698220372200012, + "learning_rate": 0.0009517221067570204, + "loss": 1.4686, + "step": 3974 + }, + { + "epoch": 0.41930379746835444, + "grad_norm": 0.7390868663787842, + "learning_rate": 0.0009514802872496205, + "loss": 1.4897, + "step": 3975 + }, + { + "epoch": 0.41940928270042194, + "grad_norm": 0.6597594022750854, + "learning_rate": 0.0009512384451653499, + "loss": 1.4854, + "step": 3976 + }, + { + "epoch": 0.41951476793248943, + "grad_norm": 0.8080558776855469, + "learning_rate": 0.000950996580531308, + "loss": 1.4896, + "step": 3977 + }, + { + "epoch": 0.419620253164557, + "grad_norm": 0.6377053260803223, + "learning_rate": 0.000950754693374597, + "loss": 1.4395, + "step": 3978 + }, + { + "epoch": 0.41972573839662447, + "grad_norm": 0.700760543346405, + "learning_rate": 0.0009505127837223215, + "loss": 1.4999, + "step": 3979 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.6684749722480774, + "learning_rate": 0.0009502708516015889, + "loss": 1.522, + "step": 3980 + }, + { + "epoch": 0.4199367088607595, + "grad_norm": 0.6310623288154602, + "learning_rate": 0.0009500288970395085, + "loss": 1.4655, + "step": 3981 + }, + { + "epoch": 0.420042194092827, + "grad_norm": 0.6448304057121277, + "learning_rate": 0.000949786920063193, + "loss": 1.4878, + "step": 3982 + }, + { + "epoch": 0.4201476793248945, + "grad_norm": 0.6693242192268372, + "learning_rate": 0.0009495449206997568, + "loss": 1.4711, + "step": 3983 + }, + { + "epoch": 0.42025316455696204, + "grad_norm": 0.5880758166313171, + "learning_rate": 0.0009493028989763171, + "loss": 1.4935, + "step": 3984 + }, + { + "epoch": 0.42035864978902954, + "grad_norm": 0.6842343807220459, + "learning_rate": 0.0009490608549199939, + "loss": 1.4707, + "step": 3985 + }, + { + "epoch": 0.42046413502109703, + "grad_norm": 0.6299881339073181, + "learning_rate": 0.0009488187885579092, + "loss": 1.4651, + "step": 3986 + }, + { + "epoch": 0.4205696202531646, + "grad_norm": 0.6222948431968689, + "learning_rate": 0.000948576699917188, + "loss": 1.4623, + "step": 3987 + }, + { + "epoch": 0.42067510548523207, + "grad_norm": 0.7853259444236755, + "learning_rate": 0.0009483345890249571, + "loss": 1.479, + "step": 3988 + }, + { + "epoch": 0.42078059071729956, + "grad_norm": 0.8164576888084412, + "learning_rate": 0.0009480924559083468, + "loss": 1.4876, + "step": 3989 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.6621295213699341, + "learning_rate": 0.0009478503005944888, + "loss": 1.4918, + "step": 3990 + }, + { + "epoch": 0.4209915611814346, + "grad_norm": 0.8829833269119263, + "learning_rate": 0.0009476081231105183, + "loss": 1.4939, + "step": 3991 + }, + { + "epoch": 0.4210970464135021, + "grad_norm": 0.6643090844154358, + "learning_rate": 0.0009473659234835722, + "loss": 1.4995, + "step": 3992 + }, + { + "epoch": 0.42120253164556964, + "grad_norm": 0.6611058115959167, + "learning_rate": 0.00094712370174079, + "loss": 1.4848, + "step": 3993 + }, + { + "epoch": 0.42130801687763714, + "grad_norm": 0.6129915118217468, + "learning_rate": 0.0009468814579093141, + "loss": 1.4948, + "step": 3994 + }, + { + "epoch": 0.42141350210970463, + "grad_norm": 0.6303871273994446, + "learning_rate": 0.0009466391920162894, + "loss": 1.4782, + "step": 3995 + }, + { + "epoch": 0.4215189873417722, + "grad_norm": 0.7567851543426514, + "learning_rate": 0.0009463969040888624, + "loss": 1.4938, + "step": 3996 + }, + { + "epoch": 0.42162447257383967, + "grad_norm": 0.8065080642700195, + "learning_rate": 0.0009461545941541832, + "loss": 1.4992, + "step": 3997 + }, + { + "epoch": 0.42172995780590716, + "grad_norm": 0.7160806655883789, + "learning_rate": 0.0009459122622394033, + "loss": 1.4802, + "step": 3998 + }, + { + "epoch": 0.4218354430379747, + "grad_norm": 0.9280639886856079, + "learning_rate": 0.0009456699083716777, + "loss": 1.5096, + "step": 3999 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.6307188272476196, + "learning_rate": 0.0009454275325781632, + "loss": 1.4872, + "step": 4000 + }, + { + "epoch": 0.4220464135021097, + "grad_norm": 0.914405107498169, + "learning_rate": 0.0009451851348860191, + "loss": 1.4645, + "step": 4001 + }, + { + "epoch": 0.4221518987341772, + "grad_norm": 0.6350969076156616, + "learning_rate": 0.0009449427153224076, + "loss": 1.4766, + "step": 4002 + }, + { + "epoch": 0.42225738396624474, + "grad_norm": 0.9729679226875305, + "learning_rate": 0.0009447002739144924, + "loss": 1.4568, + "step": 4003 + }, + { + "epoch": 0.42236286919831223, + "grad_norm": 0.901556134223938, + "learning_rate": 0.0009444578106894408, + "loss": 1.5046, + "step": 4004 + }, + { + "epoch": 0.4224683544303797, + "grad_norm": 0.6979547142982483, + "learning_rate": 0.000944215325674422, + "loss": 1.4691, + "step": 4005 + }, + { + "epoch": 0.42257383966244727, + "grad_norm": 0.8688889741897583, + "learning_rate": 0.0009439728188966074, + "loss": 1.4929, + "step": 4006 + }, + { + "epoch": 0.42267932489451476, + "grad_norm": 0.676706075668335, + "learning_rate": 0.0009437302903831712, + "loss": 1.4804, + "step": 4007 + }, + { + "epoch": 0.42278481012658226, + "grad_norm": 0.7038896083831787, + "learning_rate": 0.0009434877401612898, + "loss": 1.4635, + "step": 4008 + }, + { + "epoch": 0.4228902953586498, + "grad_norm": 0.756024181842804, + "learning_rate": 0.0009432451682581424, + "loss": 1.476, + "step": 4009 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.777808666229248, + "learning_rate": 0.0009430025747009104, + "loss": 1.5072, + "step": 4010 + }, + { + "epoch": 0.4231012658227848, + "grad_norm": 0.7031209468841553, + "learning_rate": 0.0009427599595167776, + "loss": 1.4588, + "step": 4011 + }, + { + "epoch": 0.42320675105485234, + "grad_norm": 0.8909032940864563, + "learning_rate": 0.0009425173227329297, + "loss": 1.4542, + "step": 4012 + }, + { + "epoch": 0.42331223628691983, + "grad_norm": 0.7222570180892944, + "learning_rate": 0.0009422746643765563, + "loss": 1.4826, + "step": 4013 + }, + { + "epoch": 0.4234177215189873, + "grad_norm": 0.7156255841255188, + "learning_rate": 0.0009420319844748476, + "loss": 1.4889, + "step": 4014 + }, + { + "epoch": 0.42352320675105487, + "grad_norm": 0.6402177214622498, + "learning_rate": 0.0009417892830549978, + "loss": 1.505, + "step": 4015 + }, + { + "epoch": 0.42362869198312236, + "grad_norm": 0.722588062286377, + "learning_rate": 0.0009415465601442023, + "loss": 1.5024, + "step": 4016 + }, + { + "epoch": 0.42373417721518986, + "grad_norm": 0.646680474281311, + "learning_rate": 0.0009413038157696595, + "loss": 1.4687, + "step": 4017 + }, + { + "epoch": 0.4238396624472574, + "grad_norm": 0.6535049676895142, + "learning_rate": 0.0009410610499585705, + "loss": 1.5041, + "step": 4018 + }, + { + "epoch": 0.4239451476793249, + "grad_norm": 0.6407057046890259, + "learning_rate": 0.000940818262738138, + "loss": 1.4652, + "step": 4019 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.6358295679092407, + "learning_rate": 0.0009405754541355677, + "loss": 1.5129, + "step": 4020 + }, + { + "epoch": 0.42415611814345994, + "grad_norm": 0.6886041760444641, + "learning_rate": 0.0009403326241780674, + "loss": 1.4534, + "step": 4021 + }, + { + "epoch": 0.42426160337552743, + "grad_norm": 0.6215149760246277, + "learning_rate": 0.0009400897728928475, + "loss": 1.4997, + "step": 4022 + }, + { + "epoch": 0.4243670886075949, + "grad_norm": 0.706059992313385, + "learning_rate": 0.0009398469003071207, + "loss": 1.4563, + "step": 4023 + }, + { + "epoch": 0.42447257383966247, + "grad_norm": 0.6802956461906433, + "learning_rate": 0.0009396040064481021, + "loss": 1.4683, + "step": 4024 + }, + { + "epoch": 0.42457805907172996, + "grad_norm": 0.6891345381736755, + "learning_rate": 0.000939361091343009, + "loss": 1.5015, + "step": 4025 + }, + { + "epoch": 0.42468354430379746, + "grad_norm": 0.7080348134040833, + "learning_rate": 0.0009391181550190615, + "loss": 1.4924, + "step": 4026 + }, + { + "epoch": 0.424789029535865, + "grad_norm": 0.7712253332138062, + "learning_rate": 0.0009388751975034815, + "loss": 1.4538, + "step": 4027 + }, + { + "epoch": 0.4248945147679325, + "grad_norm": 0.7299039363861084, + "learning_rate": 0.0009386322188234941, + "loss": 1.4853, + "step": 4028 + }, + { + "epoch": 0.425, + "grad_norm": 0.744536280632019, + "learning_rate": 0.0009383892190063256, + "loss": 1.4614, + "step": 4029 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.8261997103691101, + "learning_rate": 0.0009381461980792061, + "loss": 1.48, + "step": 4030 + }, + { + "epoch": 0.42521097046413503, + "grad_norm": 0.860332727432251, + "learning_rate": 0.0009379031560693665, + "loss": 1.4621, + "step": 4031 + }, + { + "epoch": 0.4253164556962025, + "grad_norm": 1.298978328704834, + "learning_rate": 0.0009376600930040417, + "loss": 1.5152, + "step": 4032 + }, + { + "epoch": 0.42542194092827, + "grad_norm": 0.7340439558029175, + "learning_rate": 0.0009374170089104676, + "loss": 1.4986, + "step": 4033 + }, + { + "epoch": 0.42552742616033756, + "grad_norm": 1.0135655403137207, + "learning_rate": 0.000937173903815883, + "loss": 1.4911, + "step": 4034 + }, + { + "epoch": 0.42563291139240506, + "grad_norm": 0.6844437718391418, + "learning_rate": 0.0009369307777475293, + "loss": 1.4895, + "step": 4035 + }, + { + "epoch": 0.42573839662447255, + "grad_norm": 0.8351281881332397, + "learning_rate": 0.0009366876307326496, + "loss": 1.4798, + "step": 4036 + }, + { + "epoch": 0.4258438818565401, + "grad_norm": 0.6840303540229797, + "learning_rate": 0.0009364444627984902, + "loss": 1.5026, + "step": 4037 + }, + { + "epoch": 0.4259493670886076, + "grad_norm": 0.8701189756393433, + "learning_rate": 0.000936201273972299, + "loss": 1.4987, + "step": 4038 + }, + { + "epoch": 0.4260548523206751, + "grad_norm": 0.6629571318626404, + "learning_rate": 0.0009359580642813265, + "loss": 1.4509, + "step": 4039 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.8337842226028442, + "learning_rate": 0.0009357148337528256, + "loss": 1.4684, + "step": 4040 + }, + { + "epoch": 0.4262658227848101, + "grad_norm": 0.6680313944816589, + "learning_rate": 0.0009354715824140515, + "loss": 1.4818, + "step": 4041 + }, + { + "epoch": 0.4263713080168776, + "grad_norm": 0.8099154829978943, + "learning_rate": 0.0009352283102922619, + "loss": 1.5037, + "step": 4042 + }, + { + "epoch": 0.42647679324894516, + "grad_norm": 0.6280646324157715, + "learning_rate": 0.0009349850174147165, + "loss": 1.4599, + "step": 4043 + }, + { + "epoch": 0.42658227848101266, + "grad_norm": 0.7002527117729187, + "learning_rate": 0.0009347417038086772, + "loss": 1.4784, + "step": 4044 + }, + { + "epoch": 0.42668776371308015, + "grad_norm": 0.6667949557304382, + "learning_rate": 0.000934498369501409, + "loss": 1.531, + "step": 4045 + }, + { + "epoch": 0.4267932489451477, + "grad_norm": 0.7133070230484009, + "learning_rate": 0.0009342550145201786, + "loss": 1.459, + "step": 4046 + }, + { + "epoch": 0.4268987341772152, + "grad_norm": 0.6801965832710266, + "learning_rate": 0.0009340116388922551, + "loss": 1.4829, + "step": 4047 + }, + { + "epoch": 0.4270042194092827, + "grad_norm": 0.6063666343688965, + "learning_rate": 0.0009337682426449097, + "loss": 1.4874, + "step": 4048 + }, + { + "epoch": 0.42710970464135023, + "grad_norm": 0.5996713042259216, + "learning_rate": 0.0009335248258054162, + "loss": 1.4608, + "step": 4049 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.6141687035560608, + "learning_rate": 0.0009332813884010511, + "loss": 1.4875, + "step": 4050 + }, + { + "epoch": 0.4273206751054852, + "grad_norm": 0.7174805998802185, + "learning_rate": 0.0009330379304590924, + "loss": 1.4599, + "step": 4051 + }, + { + "epoch": 0.42742616033755276, + "grad_norm": 0.771619439125061, + "learning_rate": 0.000932794452006821, + "loss": 1.5039, + "step": 4052 + }, + { + "epoch": 0.42753164556962026, + "grad_norm": 0.6682385802268982, + "learning_rate": 0.0009325509530715196, + "loss": 1.4862, + "step": 4053 + }, + { + "epoch": 0.42763713080168775, + "grad_norm": 0.6426783800125122, + "learning_rate": 0.0009323074336804738, + "loss": 1.488, + "step": 4054 + }, + { + "epoch": 0.4277426160337553, + "grad_norm": 0.720699667930603, + "learning_rate": 0.0009320638938609708, + "loss": 1.4994, + "step": 4055 + }, + { + "epoch": 0.4278481012658228, + "grad_norm": 0.6290863752365112, + "learning_rate": 0.0009318203336403008, + "loss": 1.4671, + "step": 4056 + }, + { + "epoch": 0.4279535864978903, + "grad_norm": 0.7400429248809814, + "learning_rate": 0.0009315767530457556, + "loss": 1.4752, + "step": 4057 + }, + { + "epoch": 0.42805907172995783, + "grad_norm": 0.6361969709396362, + "learning_rate": 0.0009313331521046299, + "loss": 1.4991, + "step": 4058 + }, + { + "epoch": 0.4281645569620253, + "grad_norm": 0.7858473658561707, + "learning_rate": 0.0009310895308442202, + "loss": 1.52, + "step": 4059 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.7241291403770447, + "learning_rate": 0.0009308458892918259, + "loss": 1.4645, + "step": 4060 + }, + { + "epoch": 0.42837552742616036, + "grad_norm": 0.6859668493270874, + "learning_rate": 0.0009306022274747478, + "loss": 1.5014, + "step": 4061 + }, + { + "epoch": 0.42848101265822786, + "grad_norm": 0.648787796497345, + "learning_rate": 0.0009303585454202892, + "loss": 1.4795, + "step": 4062 + }, + { + "epoch": 0.42858649789029535, + "grad_norm": 0.9028385281562805, + "learning_rate": 0.0009301148431557565, + "loss": 1.4375, + "step": 4063 + }, + { + "epoch": 0.4286919831223629, + "grad_norm": 1.1662969589233398, + "learning_rate": 0.0009298711207084575, + "loss": 1.4686, + "step": 4064 + }, + { + "epoch": 0.4287974683544304, + "grad_norm": 0.6638323664665222, + "learning_rate": 0.0009296273781057026, + "loss": 1.5145, + "step": 4065 + }, + { + "epoch": 0.4289029535864979, + "grad_norm": 1.071696400642395, + "learning_rate": 0.0009293836153748039, + "loss": 1.473, + "step": 4066 + }, + { + "epoch": 0.4290084388185654, + "grad_norm": 0.6634398102760315, + "learning_rate": 0.0009291398325430771, + "loss": 1.4715, + "step": 4067 + }, + { + "epoch": 0.4291139240506329, + "grad_norm": 1.0548962354660034, + "learning_rate": 0.0009288960296378386, + "loss": 1.4608, + "step": 4068 + }, + { + "epoch": 0.4292194092827004, + "grad_norm": 0.7371865510940552, + "learning_rate": 0.0009286522066864078, + "loss": 1.5307, + "step": 4069 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.7909426689147949, + "learning_rate": 0.0009284083637161064, + "loss": 1.4833, + "step": 4070 + }, + { + "epoch": 0.42943037974683546, + "grad_norm": 0.8225089907646179, + "learning_rate": 0.0009281645007542584, + "loss": 1.5105, + "step": 4071 + }, + { + "epoch": 0.42953586497890295, + "grad_norm": 0.6851316690444946, + "learning_rate": 0.0009279206178281895, + "loss": 1.4588, + "step": 4072 + }, + { + "epoch": 0.42964135021097044, + "grad_norm": 0.840024471282959, + "learning_rate": 0.0009276767149652284, + "loss": 1.4684, + "step": 4073 + }, + { + "epoch": 0.429746835443038, + "grad_norm": 0.8103892207145691, + "learning_rate": 0.0009274327921927054, + "loss": 1.4705, + "step": 4074 + }, + { + "epoch": 0.4298523206751055, + "grad_norm": 0.7299506664276123, + "learning_rate": 0.0009271888495379529, + "loss": 1.5286, + "step": 4075 + }, + { + "epoch": 0.429957805907173, + "grad_norm": 0.9962589740753174, + "learning_rate": 0.0009269448870283067, + "loss": 1.4874, + "step": 4076 + }, + { + "epoch": 0.4300632911392405, + "grad_norm": 0.6304444670677185, + "learning_rate": 0.0009267009046911032, + "loss": 1.4934, + "step": 4077 + }, + { + "epoch": 0.430168776371308, + "grad_norm": 1.029279351234436, + "learning_rate": 0.0009264569025536825, + "loss": 1.4864, + "step": 4078 + }, + { + "epoch": 0.4302742616033755, + "grad_norm": 0.8000733256340027, + "learning_rate": 0.0009262128806433858, + "loss": 1.4975, + "step": 4079 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.9482493996620178, + "learning_rate": 0.0009259688389875574, + "loss": 1.4663, + "step": 4080 + }, + { + "epoch": 0.43048523206751055, + "grad_norm": 1.0128380060195923, + "learning_rate": 0.000925724777613543, + "loss": 1.4883, + "step": 4081 + }, + { + "epoch": 0.43059071729957804, + "grad_norm": 0.8918288946151733, + "learning_rate": 0.0009254806965486909, + "loss": 1.5079, + "step": 4082 + }, + { + "epoch": 0.4306962025316456, + "grad_norm": 0.9216470718383789, + "learning_rate": 0.0009252365958203518, + "loss": 1.502, + "step": 4083 + }, + { + "epoch": 0.4308016877637131, + "grad_norm": 1.051779866218567, + "learning_rate": 0.0009249924754558785, + "loss": 1.4517, + "step": 4084 + }, + { + "epoch": 0.4309071729957806, + "grad_norm": 0.9967994689941406, + "learning_rate": 0.0009247483354826255, + "loss": 1.4741, + "step": 4085 + }, + { + "epoch": 0.4310126582278481, + "grad_norm": 0.9455146789550781, + "learning_rate": 0.0009245041759279502, + "loss": 1.497, + "step": 4086 + }, + { + "epoch": 0.4311181434599156, + "grad_norm": 1.0139806270599365, + "learning_rate": 0.0009242599968192119, + "loss": 1.5085, + "step": 4087 + }, + { + "epoch": 0.4312236286919831, + "grad_norm": 0.9365952014923096, + "learning_rate": 0.000924015798183772, + "loss": 1.4663, + "step": 4088 + }, + { + "epoch": 0.43132911392405066, + "grad_norm": 0.8802905082702637, + "learning_rate": 0.0009237715800489942, + "loss": 1.4527, + "step": 4089 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.9183076024055481, + "learning_rate": 0.0009235273424422442, + "loss": 1.4567, + "step": 4090 + }, + { + "epoch": 0.43154008438818564, + "grad_norm": 0.9376329183578491, + "learning_rate": 0.0009232830853908904, + "loss": 1.5116, + "step": 4091 + }, + { + "epoch": 0.4316455696202532, + "grad_norm": 0.9354287981987, + "learning_rate": 0.0009230388089223028, + "loss": 1.4734, + "step": 4092 + }, + { + "epoch": 0.4317510548523207, + "grad_norm": 0.9513534307479858, + "learning_rate": 0.0009227945130638537, + "loss": 1.4847, + "step": 4093 + }, + { + "epoch": 0.4318565400843882, + "grad_norm": 0.8666113615036011, + "learning_rate": 0.0009225501978429177, + "loss": 1.4905, + "step": 4094 + }, + { + "epoch": 0.4319620253164557, + "grad_norm": 0.9021376967430115, + "learning_rate": 0.0009223058632868719, + "loss": 1.4803, + "step": 4095 + }, + { + "epoch": 0.4320675105485232, + "grad_norm": 0.9100314974784851, + "learning_rate": 0.0009220615094230946, + "loss": 1.484, + "step": 4096 + }, + { + "epoch": 0.4321729957805907, + "grad_norm": 0.9334969520568848, + "learning_rate": 0.0009218171362789674, + "loss": 1.4678, + "step": 4097 + }, + { + "epoch": 0.43227848101265826, + "grad_norm": 0.7252868413925171, + "learning_rate": 0.0009215727438818733, + "loss": 1.4939, + "step": 4098 + }, + { + "epoch": 0.43238396624472575, + "grad_norm": 0.6806955337524414, + "learning_rate": 0.0009213283322591977, + "loss": 1.43, + "step": 4099 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.7167962789535522, + "learning_rate": 0.0009210839014383282, + "loss": 1.4683, + "step": 4100 + }, + { + "epoch": 0.43259493670886073, + "grad_norm": 0.7223259806632996, + "learning_rate": 0.0009208394514466544, + "loss": 1.4701, + "step": 4101 + }, + { + "epoch": 0.4327004219409283, + "grad_norm": 0.6451733708381653, + "learning_rate": 0.0009205949823115681, + "loss": 1.4497, + "step": 4102 + }, + { + "epoch": 0.4328059071729958, + "grad_norm": 0.7707226276397705, + "learning_rate": 0.0009203504940604634, + "loss": 1.4154, + "step": 4103 + }, + { + "epoch": 0.43291139240506327, + "grad_norm": 0.6412010192871094, + "learning_rate": 0.0009201059867207366, + "loss": 1.4652, + "step": 4104 + }, + { + "epoch": 0.4330168776371308, + "grad_norm": 0.7306724190711975, + "learning_rate": 0.0009198614603197854, + "loss": 1.465, + "step": 4105 + }, + { + "epoch": 0.4331223628691983, + "grad_norm": 0.6387706995010376, + "learning_rate": 0.0009196169148850108, + "loss": 1.4959, + "step": 4106 + }, + { + "epoch": 0.4332278481012658, + "grad_norm": 0.7922548651695251, + "learning_rate": 0.000919372350443815, + "loss": 1.4907, + "step": 4107 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.6912131309509277, + "learning_rate": 0.000919127767023603, + "loss": 1.465, + "step": 4108 + }, + { + "epoch": 0.43343881856540084, + "grad_norm": 0.836556613445282, + "learning_rate": 0.000918883164651781, + "loss": 1.5031, + "step": 4109 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.6957297921180725, + "learning_rate": 0.0009186385433557584, + "loss": 1.5035, + "step": 4110 + }, + { + "epoch": 0.4336497890295359, + "grad_norm": 0.9267609715461731, + "learning_rate": 0.0009183939031629462, + "loss": 1.4635, + "step": 4111 + }, + { + "epoch": 0.4337552742616034, + "grad_norm": 0.7699547410011292, + "learning_rate": 0.0009181492441007577, + "loss": 1.4386, + "step": 4112 + }, + { + "epoch": 0.43386075949367087, + "grad_norm": 0.8423239588737488, + "learning_rate": 0.0009179045661966075, + "loss": 1.4934, + "step": 4113 + }, + { + "epoch": 0.4339662447257384, + "grad_norm": 0.6716137528419495, + "learning_rate": 0.0009176598694779134, + "loss": 1.4728, + "step": 4114 + }, + { + "epoch": 0.4340717299578059, + "grad_norm": 0.9365404844284058, + "learning_rate": 0.0009174151539720953, + "loss": 1.4817, + "step": 4115 + }, + { + "epoch": 0.4341772151898734, + "grad_norm": 0.792913019657135, + "learning_rate": 0.0009171704197065741, + "loss": 1.4636, + "step": 4116 + }, + { + "epoch": 0.43428270042194095, + "grad_norm": 0.6860702633857727, + "learning_rate": 0.0009169256667087738, + "loss": 1.4814, + "step": 4117 + }, + { + "epoch": 0.43438818565400844, + "grad_norm": 0.6806082725524902, + "learning_rate": 0.0009166808950061202, + "loss": 1.4749, + "step": 4118 + }, + { + "epoch": 0.43449367088607593, + "grad_norm": 0.7679480910301208, + "learning_rate": 0.0009164361046260412, + "loss": 1.5004, + "step": 4119 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.6137024164199829, + "learning_rate": 0.0009161912955959668, + "loss": 1.4921, + "step": 4120 + }, + { + "epoch": 0.434704641350211, + "grad_norm": 0.768952488899231, + "learning_rate": 0.0009159464679433289, + "loss": 1.481, + "step": 4121 + }, + { + "epoch": 0.43481012658227847, + "grad_norm": 0.6090400815010071, + "learning_rate": 0.0009157016216955618, + "loss": 1.4563, + "step": 4122 + }, + { + "epoch": 0.434915611814346, + "grad_norm": 0.80779629945755, + "learning_rate": 0.0009154567568801019, + "loss": 1.444, + "step": 4123 + }, + { + "epoch": 0.4350210970464135, + "grad_norm": 0.6078137159347534, + "learning_rate": 0.0009152118735243871, + "loss": 1.4749, + "step": 4124 + }, + { + "epoch": 0.435126582278481, + "grad_norm": 0.7254389524459839, + "learning_rate": 0.0009149669716558582, + "loss": 1.4547, + "step": 4125 + }, + { + "epoch": 0.43523206751054855, + "grad_norm": 0.6606658101081848, + "learning_rate": 0.0009147220513019577, + "loss": 1.4635, + "step": 4126 + }, + { + "epoch": 0.43533755274261604, + "grad_norm": 0.6181554794311523, + "learning_rate": 0.0009144771124901295, + "loss": 1.4564, + "step": 4127 + }, + { + "epoch": 0.43544303797468353, + "grad_norm": 0.5972049832344055, + "learning_rate": 0.000914232155247821, + "loss": 1.4768, + "step": 4128 + }, + { + "epoch": 0.4355485232067511, + "grad_norm": 0.6449487805366516, + "learning_rate": 0.0009139871796024807, + "loss": 1.4776, + "step": 4129 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.6253957748413086, + "learning_rate": 0.000913742185581559, + "loss": 1.4835, + "step": 4130 + }, + { + "epoch": 0.43575949367088607, + "grad_norm": 0.7481979131698608, + "learning_rate": 0.0009134971732125088, + "loss": 1.4846, + "step": 4131 + }, + { + "epoch": 0.43586497890295356, + "grad_norm": 0.6468957662582397, + "learning_rate": 0.0009132521425227852, + "loss": 1.4499, + "step": 4132 + }, + { + "epoch": 0.4359704641350211, + "grad_norm": 0.777606725692749, + "learning_rate": 0.0009130070935398451, + "loss": 1.4906, + "step": 4133 + }, + { + "epoch": 0.4360759493670886, + "grad_norm": 0.7491641044616699, + "learning_rate": 0.0009127620262911473, + "loss": 1.4871, + "step": 4134 + }, + { + "epoch": 0.4361814345991561, + "grad_norm": 0.6606632471084595, + "learning_rate": 0.0009125169408041526, + "loss": 1.4407, + "step": 4135 + }, + { + "epoch": 0.43628691983122364, + "grad_norm": 0.7916749119758606, + "learning_rate": 0.0009122718371063247, + "loss": 1.4557, + "step": 4136 + }, + { + "epoch": 0.43639240506329113, + "grad_norm": 0.6303460597991943, + "learning_rate": 0.0009120267152251281, + "loss": 1.4624, + "step": 4137 + }, + { + "epoch": 0.4364978902953586, + "grad_norm": 0.8832973837852478, + "learning_rate": 0.0009117815751880301, + "loss": 1.4833, + "step": 4138 + }, + { + "epoch": 0.4366033755274262, + "grad_norm": 0.6490811109542847, + "learning_rate": 0.0009115364170225, + "loss": 1.4779, + "step": 4139 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.7134056687355042, + "learning_rate": 0.0009112912407560086, + "loss": 1.4626, + "step": 4140 + }, + { + "epoch": 0.43681434599156116, + "grad_norm": 0.6472575068473816, + "learning_rate": 0.0009110460464160295, + "loss": 1.4569, + "step": 4141 + }, + { + "epoch": 0.4369198312236287, + "grad_norm": 0.930506706237793, + "learning_rate": 0.000910800834030038, + "loss": 1.4744, + "step": 4142 + }, + { + "epoch": 0.4370253164556962, + "grad_norm": 0.7658478021621704, + "learning_rate": 0.0009105556036255113, + "loss": 1.4872, + "step": 4143 + }, + { + "epoch": 0.4371308016877637, + "grad_norm": 0.9491616487503052, + "learning_rate": 0.0009103103552299283, + "loss": 1.4945, + "step": 4144 + }, + { + "epoch": 0.43723628691983124, + "grad_norm": 0.848891019821167, + "learning_rate": 0.0009100650888707709, + "loss": 1.4589, + "step": 4145 + }, + { + "epoch": 0.43734177215189873, + "grad_norm": 0.6954265236854553, + "learning_rate": 0.000909819804575522, + "loss": 1.4547, + "step": 4146 + }, + { + "epoch": 0.4374472573839662, + "grad_norm": 0.7484803795814514, + "learning_rate": 0.0009095745023716671, + "loss": 1.4843, + "step": 4147 + }, + { + "epoch": 0.4375527426160338, + "grad_norm": 0.774314284324646, + "learning_rate": 0.0009093291822866933, + "loss": 1.5132, + "step": 4148 + }, + { + "epoch": 0.43765822784810127, + "grad_norm": 0.8037533164024353, + "learning_rate": 0.0009090838443480903, + "loss": 1.4715, + "step": 4149 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.6901044249534607, + "learning_rate": 0.0009088384885833495, + "loss": 1.4784, + "step": 4150 + }, + { + "epoch": 0.4378691983122363, + "grad_norm": 0.7413507699966431, + "learning_rate": 0.0009085931150199638, + "loss": 1.4822, + "step": 4151 + }, + { + "epoch": 0.4379746835443038, + "grad_norm": 0.6689403653144836, + "learning_rate": 0.0009083477236854287, + "loss": 1.4973, + "step": 4152 + }, + { + "epoch": 0.4380801687763713, + "grad_norm": 0.6671567559242249, + "learning_rate": 0.0009081023146072414, + "loss": 1.4887, + "step": 4153 + }, + { + "epoch": 0.43818565400843884, + "grad_norm": 0.6568112969398499, + "learning_rate": 0.0009078568878129018, + "loss": 1.434, + "step": 4154 + }, + { + "epoch": 0.43829113924050633, + "grad_norm": 0.8317128419876099, + "learning_rate": 0.0009076114433299107, + "loss": 1.4656, + "step": 4155 + }, + { + "epoch": 0.4383966244725738, + "grad_norm": 0.6284992098808289, + "learning_rate": 0.0009073659811857712, + "loss": 1.4869, + "step": 4156 + }, + { + "epoch": 0.4385021097046414, + "grad_norm": 0.8880622386932373, + "learning_rate": 0.0009071205014079888, + "loss": 1.488, + "step": 4157 + }, + { + "epoch": 0.43860759493670887, + "grad_norm": 0.6728898882865906, + "learning_rate": 0.0009068750040240709, + "loss": 1.445, + "step": 4158 + }, + { + "epoch": 0.43871308016877636, + "grad_norm": 0.7226570844650269, + "learning_rate": 0.0009066294890615266, + "loss": 1.4649, + "step": 4159 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.7199554443359375, + "learning_rate": 0.000906383956547867, + "loss": 1.4509, + "step": 4160 + }, + { + "epoch": 0.4389240506329114, + "grad_norm": 1.1355444192886353, + "learning_rate": 0.0009061384065106051, + "loss": 1.4663, + "step": 4161 + }, + { + "epoch": 0.4390295358649789, + "grad_norm": 0.7002373337745667, + "learning_rate": 0.0009058928389772564, + "loss": 1.4829, + "step": 4162 + }, + { + "epoch": 0.43913502109704644, + "grad_norm": 0.6452335119247437, + "learning_rate": 0.0009056472539753377, + "loss": 1.4822, + "step": 4163 + }, + { + "epoch": 0.43924050632911393, + "grad_norm": 0.6615727543830872, + "learning_rate": 0.0009054016515323679, + "loss": 1.439, + "step": 4164 + }, + { + "epoch": 0.4393459915611814, + "grad_norm": 0.7324166893959045, + "learning_rate": 0.0009051560316758684, + "loss": 1.4723, + "step": 4165 + }, + { + "epoch": 0.4394514767932489, + "grad_norm": 0.63801509141922, + "learning_rate": 0.0009049103944333616, + "loss": 1.4595, + "step": 4166 + }, + { + "epoch": 0.43955696202531647, + "grad_norm": 0.7933727502822876, + "learning_rate": 0.0009046647398323728, + "loss": 1.462, + "step": 4167 + }, + { + "epoch": 0.43966244725738396, + "grad_norm": 0.6720865368843079, + "learning_rate": 0.0009044190679004286, + "loss": 1.4691, + "step": 4168 + }, + { + "epoch": 0.43976793248945145, + "grad_norm": 0.8333881497383118, + "learning_rate": 0.0009041733786650578, + "loss": 1.4769, + "step": 4169 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.6872052550315857, + "learning_rate": 0.0009039276721537915, + "loss": 1.5037, + "step": 4170 + }, + { + "epoch": 0.4399789029535865, + "grad_norm": 0.817371129989624, + "learning_rate": 0.0009036819483941614, + "loss": 1.4697, + "step": 4171 + }, + { + "epoch": 0.440084388185654, + "grad_norm": 0.7283031940460205, + "learning_rate": 0.0009034362074137032, + "loss": 1.4757, + "step": 4172 + }, + { + "epoch": 0.44018987341772153, + "grad_norm": 0.6813532114028931, + "learning_rate": 0.0009031904492399526, + "loss": 1.5068, + "step": 4173 + }, + { + "epoch": 0.440295358649789, + "grad_norm": 0.6589047312736511, + "learning_rate": 0.0009029446739004483, + "loss": 1.4835, + "step": 4174 + }, + { + "epoch": 0.4404008438818565, + "grad_norm": 0.6380748152732849, + "learning_rate": 0.0009026988814227308, + "loss": 1.4474, + "step": 4175 + }, + { + "epoch": 0.44050632911392407, + "grad_norm": 0.6388514041900635, + "learning_rate": 0.0009024530718343418, + "loss": 1.449, + "step": 4176 + }, + { + "epoch": 0.44061181434599156, + "grad_norm": 0.6240747570991516, + "learning_rate": 0.0009022072451628263, + "loss": 1.4844, + "step": 4177 + }, + { + "epoch": 0.44071729957805905, + "grad_norm": 0.6848040819168091, + "learning_rate": 0.0009019614014357298, + "loss": 1.4531, + "step": 4178 + }, + { + "epoch": 0.4408227848101266, + "grad_norm": 0.6400675177574158, + "learning_rate": 0.0009017155406806006, + "loss": 1.4771, + "step": 4179 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.6314380168914795, + "learning_rate": 0.0009014696629249886, + "loss": 1.4487, + "step": 4180 + }, + { + "epoch": 0.4410337552742616, + "grad_norm": 0.7245312929153442, + "learning_rate": 0.0009012237681964454, + "loss": 1.4781, + "step": 4181 + }, + { + "epoch": 0.44113924050632913, + "grad_norm": 0.6389877796173096, + "learning_rate": 0.0009009778565225251, + "loss": 1.4738, + "step": 4182 + }, + { + "epoch": 0.4412447257383966, + "grad_norm": 0.6918312907218933, + "learning_rate": 0.000900731927930783, + "loss": 1.454, + "step": 4183 + }, + { + "epoch": 0.4413502109704641, + "grad_norm": 0.6622769236564636, + "learning_rate": 0.0009004859824487769, + "loss": 1.4663, + "step": 4184 + }, + { + "epoch": 0.44145569620253167, + "grad_norm": 0.7507911324501038, + "learning_rate": 0.0009002400201040659, + "loss": 1.466, + "step": 4185 + }, + { + "epoch": 0.44156118143459916, + "grad_norm": 0.5786638259887695, + "learning_rate": 0.0008999940409242115, + "loss": 1.4552, + "step": 4186 + }, + { + "epoch": 0.44166666666666665, + "grad_norm": 0.7588576674461365, + "learning_rate": 0.0008997480449367771, + "loss": 1.492, + "step": 4187 + }, + { + "epoch": 0.4417721518987342, + "grad_norm": 0.6472598910331726, + "learning_rate": 0.0008995020321693274, + "loss": 1.4849, + "step": 4188 + }, + { + "epoch": 0.4418776371308017, + "grad_norm": 0.7033989429473877, + "learning_rate": 0.0008992560026494294, + "loss": 1.4726, + "step": 4189 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.8336395621299744, + "learning_rate": 0.0008990099564046522, + "loss": 1.4887, + "step": 4190 + }, + { + "epoch": 0.44208860759493673, + "grad_norm": 0.5752753615379333, + "learning_rate": 0.0008987638934625662, + "loss": 1.4565, + "step": 4191 + }, + { + "epoch": 0.4421940928270042, + "grad_norm": 0.9687056541442871, + "learning_rate": 0.0008985178138507441, + "loss": 1.49, + "step": 4192 + }, + { + "epoch": 0.4422995780590717, + "grad_norm": 0.7723601460456848, + "learning_rate": 0.0008982717175967606, + "loss": 1.4495, + "step": 4193 + }, + { + "epoch": 0.44240506329113927, + "grad_norm": 0.7854141592979431, + "learning_rate": 0.0008980256047281919, + "loss": 1.5101, + "step": 4194 + }, + { + "epoch": 0.44251054852320676, + "grad_norm": 0.7668823003768921, + "learning_rate": 0.0008977794752726159, + "loss": 1.4678, + "step": 4195 + }, + { + "epoch": 0.44261603375527425, + "grad_norm": 0.788591206073761, + "learning_rate": 0.0008975333292576125, + "loss": 1.4483, + "step": 4196 + }, + { + "epoch": 0.44272151898734174, + "grad_norm": 0.8016175627708435, + "learning_rate": 0.0008972871667107643, + "loss": 1.4724, + "step": 4197 + }, + { + "epoch": 0.4428270042194093, + "grad_norm": 0.7546568512916565, + "learning_rate": 0.0008970409876596545, + "loss": 1.4791, + "step": 4198 + }, + { + "epoch": 0.4429324894514768, + "grad_norm": 0.810792863368988, + "learning_rate": 0.0008967947921318689, + "loss": 1.4406, + "step": 4199 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.8158887028694153, + "learning_rate": 0.0008965485801549946, + "loss": 1.5188, + "step": 4200 + }, + { + "epoch": 0.4431434599156118, + "grad_norm": 0.7730334997177124, + "learning_rate": 0.0008963023517566213, + "loss": 1.463, + "step": 4201 + }, + { + "epoch": 0.4432489451476793, + "grad_norm": 0.7778162956237793, + "learning_rate": 0.0008960561069643402, + "loss": 1.4704, + "step": 4202 + }, + { + "epoch": 0.4433544303797468, + "grad_norm": 0.7866867184638977, + "learning_rate": 0.0008958098458057436, + "loss": 1.4429, + "step": 4203 + }, + { + "epoch": 0.44345991561181436, + "grad_norm": 0.6691808104515076, + "learning_rate": 0.000895563568308427, + "loss": 1.4791, + "step": 4204 + }, + { + "epoch": 0.44356540084388185, + "grad_norm": 0.6086167693138123, + "learning_rate": 0.0008953172744999865, + "loss": 1.4355, + "step": 4205 + }, + { + "epoch": 0.44367088607594934, + "grad_norm": 0.6459118723869324, + "learning_rate": 0.000895070964408021, + "loss": 1.4991, + "step": 4206 + }, + { + "epoch": 0.4437763713080169, + "grad_norm": 0.6717722415924072, + "learning_rate": 0.0008948246380601303, + "loss": 1.4414, + "step": 4207 + }, + { + "epoch": 0.4438818565400844, + "grad_norm": 0.620092511177063, + "learning_rate": 0.000894578295483917, + "loss": 1.456, + "step": 4208 + }, + { + "epoch": 0.4439873417721519, + "grad_norm": 0.6130921840667725, + "learning_rate": 0.0008943319367069844, + "loss": 1.4644, + "step": 4209 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.6297443509101868, + "learning_rate": 0.000894085561756939, + "loss": 1.4683, + "step": 4210 + }, + { + "epoch": 0.4441983122362869, + "grad_norm": 0.64920973777771, + "learning_rate": 0.0008938391706613878, + "loss": 1.4938, + "step": 4211 + }, + { + "epoch": 0.4443037974683544, + "grad_norm": 0.5924228429794312, + "learning_rate": 0.0008935927634479403, + "loss": 1.4537, + "step": 4212 + }, + { + "epoch": 0.44440928270042196, + "grad_norm": 0.6358844637870789, + "learning_rate": 0.0008933463401442073, + "loss": 1.4875, + "step": 4213 + }, + { + "epoch": 0.44451476793248945, + "grad_norm": 0.7085954546928406, + "learning_rate": 0.0008930999007778025, + "loss": 1.4484, + "step": 4214 + }, + { + "epoch": 0.44462025316455694, + "grad_norm": 0.6600693464279175, + "learning_rate": 0.0008928534453763402, + "loss": 1.4559, + "step": 4215 + }, + { + "epoch": 0.4447257383966245, + "grad_norm": 0.6320281028747559, + "learning_rate": 0.0008926069739674369, + "loss": 1.4765, + "step": 4216 + }, + { + "epoch": 0.444831223628692, + "grad_norm": 0.654343843460083, + "learning_rate": 0.000892360486578711, + "loss": 1.4606, + "step": 4217 + }, + { + "epoch": 0.4449367088607595, + "grad_norm": 0.6258384585380554, + "learning_rate": 0.0008921139832377829, + "loss": 1.4383, + "step": 4218 + }, + { + "epoch": 0.445042194092827, + "grad_norm": 0.6443143486976624, + "learning_rate": 0.0008918674639722742, + "loss": 1.4868, + "step": 4219 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.6235471963882446, + "learning_rate": 0.0008916209288098088, + "loss": 1.4847, + "step": 4220 + }, + { + "epoch": 0.445253164556962, + "grad_norm": 0.6832448244094849, + "learning_rate": 0.0008913743777780122, + "loss": 1.4746, + "step": 4221 + }, + { + "epoch": 0.44535864978902956, + "grad_norm": 0.6219081282615662, + "learning_rate": 0.0008911278109045114, + "loss": 1.4729, + "step": 4222 + }, + { + "epoch": 0.44546413502109705, + "grad_norm": 0.6521899104118347, + "learning_rate": 0.0008908812282169359, + "loss": 1.4897, + "step": 4223 + }, + { + "epoch": 0.44556962025316454, + "grad_norm": 0.63724285364151, + "learning_rate": 0.0008906346297429161, + "loss": 1.4589, + "step": 4224 + }, + { + "epoch": 0.4456751054852321, + "grad_norm": 0.6655176877975464, + "learning_rate": 0.000890388015510085, + "loss": 1.4674, + "step": 4225 + }, + { + "epoch": 0.4457805907172996, + "grad_norm": 0.6883600950241089, + "learning_rate": 0.0008901413855460764, + "loss": 1.4609, + "step": 4226 + }, + { + "epoch": 0.4458860759493671, + "grad_norm": 0.6392378211021423, + "learning_rate": 0.0008898947398785271, + "loss": 1.4508, + "step": 4227 + }, + { + "epoch": 0.4459915611814346, + "grad_norm": 0.6346593499183655, + "learning_rate": 0.0008896480785350743, + "loss": 1.4871, + "step": 4228 + }, + { + "epoch": 0.4460970464135021, + "grad_norm": 0.6547693610191345, + "learning_rate": 0.0008894014015433582, + "loss": 1.4587, + "step": 4229 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.6933489441871643, + "learning_rate": 0.0008891547089310198, + "loss": 1.466, + "step": 4230 + }, + { + "epoch": 0.4463080168776371, + "grad_norm": 0.6280316114425659, + "learning_rate": 0.0008889080007257024, + "loss": 1.5133, + "step": 4231 + }, + { + "epoch": 0.44641350210970465, + "grad_norm": 0.7375469207763672, + "learning_rate": 0.0008886612769550508, + "loss": 1.4691, + "step": 4232 + }, + { + "epoch": 0.44651898734177214, + "grad_norm": 0.7300186157226562, + "learning_rate": 0.0008884145376467119, + "loss": 1.4687, + "step": 4233 + }, + { + "epoch": 0.44662447257383964, + "grad_norm": 0.7944932579994202, + "learning_rate": 0.0008881677828283337, + "loss": 1.4964, + "step": 4234 + }, + { + "epoch": 0.4467299578059072, + "grad_norm": 0.8766049742698669, + "learning_rate": 0.0008879210125275664, + "loss": 1.4713, + "step": 4235 + }, + { + "epoch": 0.4468354430379747, + "grad_norm": 0.6789595484733582, + "learning_rate": 0.000887674226772062, + "loss": 1.4391, + "step": 4236 + }, + { + "epoch": 0.44694092827004217, + "grad_norm": 0.882912814617157, + "learning_rate": 0.000887427425589474, + "loss": 1.4884, + "step": 4237 + }, + { + "epoch": 0.4470464135021097, + "grad_norm": 0.8640223741531372, + "learning_rate": 0.0008871806090074577, + "loss": 1.4173, + "step": 4238 + }, + { + "epoch": 0.4471518987341772, + "grad_norm": 0.6455326676368713, + "learning_rate": 0.0008869337770536699, + "loss": 1.4531, + "step": 4239 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.8011762499809265, + "learning_rate": 0.0008866869297557699, + "loss": 1.4508, + "step": 4240 + }, + { + "epoch": 0.44736286919831225, + "grad_norm": 0.6592903733253479, + "learning_rate": 0.0008864400671414177, + "loss": 1.4531, + "step": 4241 + }, + { + "epoch": 0.44746835443037974, + "grad_norm": 0.7275804281234741, + "learning_rate": 0.0008861931892382756, + "loss": 1.4761, + "step": 4242 + }, + { + "epoch": 0.44757383966244724, + "grad_norm": 0.6425269842147827, + "learning_rate": 0.0008859462960740076, + "loss": 1.4812, + "step": 4243 + }, + { + "epoch": 0.4476793248945148, + "grad_norm": 0.6955150961875916, + "learning_rate": 0.000885699387676279, + "loss": 1.4726, + "step": 4244 + }, + { + "epoch": 0.4477848101265823, + "grad_norm": 0.6477301120758057, + "learning_rate": 0.0008854524640727575, + "loss": 1.5082, + "step": 4245 + }, + { + "epoch": 0.44789029535864977, + "grad_norm": 0.8660004734992981, + "learning_rate": 0.0008852055252911121, + "loss": 1.4438, + "step": 4246 + }, + { + "epoch": 0.4479957805907173, + "grad_norm": 0.6423718333244324, + "learning_rate": 0.0008849585713590134, + "loss": 1.4241, + "step": 4247 + }, + { + "epoch": 0.4481012658227848, + "grad_norm": 1.0160528421401978, + "learning_rate": 0.0008847116023041336, + "loss": 1.4823, + "step": 4248 + }, + { + "epoch": 0.4482067510548523, + "grad_norm": 0.6868560314178467, + "learning_rate": 0.0008844646181541472, + "loss": 1.4921, + "step": 4249 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.9006314277648926, + "learning_rate": 0.0008842176189367299, + "loss": 1.4805, + "step": 4250 + }, + { + "epoch": 0.44841772151898734, + "grad_norm": 0.7283936142921448, + "learning_rate": 0.000883970604679559, + "loss": 1.4738, + "step": 4251 + }, + { + "epoch": 0.44852320675105484, + "grad_norm": 0.7502012848854065, + "learning_rate": 0.0008837235754103136, + "loss": 1.491, + "step": 4252 + }, + { + "epoch": 0.4486286919831224, + "grad_norm": 0.6538241505622864, + "learning_rate": 0.000883476531156675, + "loss": 1.4684, + "step": 4253 + }, + { + "epoch": 0.4487341772151899, + "grad_norm": 0.6714367866516113, + "learning_rate": 0.0008832294719463256, + "loss": 1.4698, + "step": 4254 + }, + { + "epoch": 0.44883966244725737, + "grad_norm": 0.6803892254829407, + "learning_rate": 0.0008829823978069494, + "loss": 1.5122, + "step": 4255 + }, + { + "epoch": 0.4489451476793249, + "grad_norm": 0.6734070181846619, + "learning_rate": 0.0008827353087662326, + "loss": 1.5079, + "step": 4256 + }, + { + "epoch": 0.4490506329113924, + "grad_norm": 0.6586962342262268, + "learning_rate": 0.0008824882048518622, + "loss": 1.4264, + "step": 4257 + }, + { + "epoch": 0.4491561181434599, + "grad_norm": 0.6510891318321228, + "learning_rate": 0.0008822410860915281, + "loss": 1.4644, + "step": 4258 + }, + { + "epoch": 0.44926160337552745, + "grad_norm": 0.6392174363136292, + "learning_rate": 0.0008819939525129207, + "loss": 1.4902, + "step": 4259 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.6900786757469177, + "learning_rate": 0.0008817468041437329, + "loss": 1.4516, + "step": 4260 + }, + { + "epoch": 0.44947257383966244, + "grad_norm": 0.642826497554779, + "learning_rate": 0.0008814996410116587, + "loss": 1.4839, + "step": 4261 + }, + { + "epoch": 0.44957805907173, + "grad_norm": 0.6358623504638672, + "learning_rate": 0.0008812524631443938, + "loss": 1.4895, + "step": 4262 + }, + { + "epoch": 0.4496835443037975, + "grad_norm": 0.6125897169113159, + "learning_rate": 0.0008810052705696363, + "loss": 1.4254, + "step": 4263 + }, + { + "epoch": 0.44978902953586497, + "grad_norm": 0.5988279581069946, + "learning_rate": 0.0008807580633150848, + "loss": 1.4882, + "step": 4264 + }, + { + "epoch": 0.44989451476793246, + "grad_norm": 0.6184555292129517, + "learning_rate": 0.0008805108414084401, + "loss": 1.4661, + "step": 4265 + }, + { + "epoch": 0.45, + "grad_norm": 0.7269158363342285, + "learning_rate": 0.0008802636048774052, + "loss": 1.4679, + "step": 4266 + }, + { + "epoch": 0.4501054852320675, + "grad_norm": 0.6563864946365356, + "learning_rate": 0.0008800163537496837, + "loss": 1.4247, + "step": 4267 + }, + { + "epoch": 0.450210970464135, + "grad_norm": 0.6297106742858887, + "learning_rate": 0.0008797690880529813, + "loss": 1.4489, + "step": 4268 + }, + { + "epoch": 0.45031645569620254, + "grad_norm": 0.6485888957977295, + "learning_rate": 0.0008795218078150056, + "loss": 1.453, + "step": 4269 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.6887204051017761, + "learning_rate": 0.0008792745130634654, + "loss": 1.4854, + "step": 4270 + }, + { + "epoch": 0.45052742616033753, + "grad_norm": 0.6416027545928955, + "learning_rate": 0.0008790272038260715, + "loss": 1.4387, + "step": 4271 + }, + { + "epoch": 0.4506329113924051, + "grad_norm": 0.7751789093017578, + "learning_rate": 0.000878779880130536, + "loss": 1.4678, + "step": 4272 + }, + { + "epoch": 0.45073839662447257, + "grad_norm": 0.6278899908065796, + "learning_rate": 0.0008785325420045727, + "loss": 1.4557, + "step": 4273 + }, + { + "epoch": 0.45084388185654006, + "grad_norm": 0.6930785179138184, + "learning_rate": 0.0008782851894758971, + "loss": 1.4604, + "step": 4274 + }, + { + "epoch": 0.4509493670886076, + "grad_norm": 0.6527055501937866, + "learning_rate": 0.0008780378225722264, + "loss": 1.4455, + "step": 4275 + }, + { + "epoch": 0.4510548523206751, + "grad_norm": 0.8414716124534607, + "learning_rate": 0.0008777904413212794, + "loss": 1.4449, + "step": 4276 + }, + { + "epoch": 0.4511603375527426, + "grad_norm": 0.707170307636261, + "learning_rate": 0.0008775430457507759, + "loss": 1.4809, + "step": 4277 + }, + { + "epoch": 0.45126582278481014, + "grad_norm": 0.8676979541778564, + "learning_rate": 0.0008772956358884383, + "loss": 1.4563, + "step": 4278 + }, + { + "epoch": 0.45137130801687764, + "grad_norm": 1.1468303203582764, + "learning_rate": 0.0008770482117619901, + "loss": 1.4524, + "step": 4279 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.6669288277626038, + "learning_rate": 0.0008768007733991561, + "loss": 1.4967, + "step": 4280 + }, + { + "epoch": 0.4515822784810127, + "grad_norm": 0.9909290671348572, + "learning_rate": 0.0008765533208276632, + "loss": 1.4559, + "step": 4281 + }, + { + "epoch": 0.45168776371308017, + "grad_norm": 0.7037336230278015, + "learning_rate": 0.0008763058540752396, + "loss": 1.5105, + "step": 4282 + }, + { + "epoch": 0.45179324894514766, + "grad_norm": 1.214548110961914, + "learning_rate": 0.0008760583731696151, + "loss": 1.4894, + "step": 4283 + }, + { + "epoch": 0.4518987341772152, + "grad_norm": 0.6370702385902405, + "learning_rate": 0.0008758108781385216, + "loss": 1.4845, + "step": 4284 + }, + { + "epoch": 0.4520042194092827, + "grad_norm": 1.0121476650238037, + "learning_rate": 0.0008755633690096918, + "loss": 1.4587, + "step": 4285 + }, + { + "epoch": 0.4521097046413502, + "grad_norm": 0.6107696890830994, + "learning_rate": 0.0008753158458108604, + "loss": 1.4419, + "step": 4286 + }, + { + "epoch": 0.45221518987341774, + "grad_norm": 0.9268682599067688, + "learning_rate": 0.0008750683085697632, + "loss": 1.454, + "step": 4287 + }, + { + "epoch": 0.45232067510548524, + "grad_norm": 0.7556064128875732, + "learning_rate": 0.0008748207573141388, + "loss": 1.4939, + "step": 4288 + }, + { + "epoch": 0.45242616033755273, + "grad_norm": 0.7471961379051208, + "learning_rate": 0.000874573192071726, + "loss": 1.492, + "step": 4289 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.6149806380271912, + "learning_rate": 0.0008743256128702658, + "loss": 1.4414, + "step": 4290 + }, + { + "epoch": 0.45263713080168777, + "grad_norm": 0.7218098044395447, + "learning_rate": 0.0008740780197375007, + "loss": 1.4654, + "step": 4291 + }, + { + "epoch": 0.45274261603375526, + "grad_norm": 0.7639976739883423, + "learning_rate": 0.000873830412701175, + "loss": 1.47, + "step": 4292 + }, + { + "epoch": 0.4528481012658228, + "grad_norm": 0.6883829832077026, + "learning_rate": 0.0008735827917890339, + "loss": 1.5113, + "step": 4293 + }, + { + "epoch": 0.4529535864978903, + "grad_norm": 1.0229825973510742, + "learning_rate": 0.000873335157028825, + "loss": 1.4707, + "step": 4294 + }, + { + "epoch": 0.4530590717299578, + "grad_norm": 0.6560525894165039, + "learning_rate": 0.0008730875084482964, + "loss": 1.4613, + "step": 4295 + }, + { + "epoch": 0.4531645569620253, + "grad_norm": 1.0681480169296265, + "learning_rate": 0.0008728398460751989, + "loss": 1.4758, + "step": 4296 + }, + { + "epoch": 0.45327004219409284, + "grad_norm": 0.7177939414978027, + "learning_rate": 0.0008725921699372839, + "loss": 1.4484, + "step": 4297 + }, + { + "epoch": 0.45337552742616033, + "grad_norm": 0.7885600328445435, + "learning_rate": 0.0008723444800623053, + "loss": 1.4709, + "step": 4298 + }, + { + "epoch": 0.4534810126582278, + "grad_norm": 0.6811338663101196, + "learning_rate": 0.0008720967764780173, + "loss": 1.4618, + "step": 4299 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.6582097411155701, + "learning_rate": 0.0008718490592121768, + "loss": 1.4638, + "step": 4300 + }, + { + "epoch": 0.45369198312236286, + "grad_norm": 0.6484494209289551, + "learning_rate": 0.0008716013282925418, + "loss": 1.5038, + "step": 4301 + }, + { + "epoch": 0.45379746835443036, + "grad_norm": 0.7123653888702393, + "learning_rate": 0.0008713535837468714, + "loss": 1.4526, + "step": 4302 + }, + { + "epoch": 0.4539029535864979, + "grad_norm": 0.6621275544166565, + "learning_rate": 0.0008711058256029269, + "loss": 1.4724, + "step": 4303 + }, + { + "epoch": 0.4540084388185654, + "grad_norm": 0.7250204682350159, + "learning_rate": 0.0008708580538884707, + "loss": 1.4624, + "step": 4304 + }, + { + "epoch": 0.4541139240506329, + "grad_norm": 0.6309456825256348, + "learning_rate": 0.0008706102686312668, + "loss": 1.4252, + "step": 4305 + }, + { + "epoch": 0.45421940928270044, + "grad_norm": 0.794440746307373, + "learning_rate": 0.0008703624698590811, + "loss": 1.4914, + "step": 4306 + }, + { + "epoch": 0.45432489451476793, + "grad_norm": 0.6671372056007385, + "learning_rate": 0.0008701146575996804, + "loss": 1.4865, + "step": 4307 + }, + { + "epoch": 0.4544303797468354, + "grad_norm": 0.7956022024154663, + "learning_rate": 0.0008698668318808334, + "loss": 1.4651, + "step": 4308 + }, + { + "epoch": 0.45453586497890297, + "grad_norm": 0.7181084752082825, + "learning_rate": 0.0008696189927303101, + "loss": 1.4586, + "step": 4309 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.817436695098877, + "learning_rate": 0.0008693711401758822, + "loss": 1.4317, + "step": 4310 + }, + { + "epoch": 0.45474683544303796, + "grad_norm": 0.7245365381240845, + "learning_rate": 0.0008691232742453229, + "loss": 1.4734, + "step": 4311 + }, + { + "epoch": 0.4548523206751055, + "grad_norm": 1.0058214664459229, + "learning_rate": 0.0008688753949664067, + "loss": 1.4576, + "step": 4312 + }, + { + "epoch": 0.454957805907173, + "grad_norm": 0.678666353225708, + "learning_rate": 0.0008686275023669096, + "loss": 1.4753, + "step": 4313 + }, + { + "epoch": 0.4550632911392405, + "grad_norm": 0.9052042365074158, + "learning_rate": 0.0008683795964746094, + "loss": 1.4233, + "step": 4314 + }, + { + "epoch": 0.45516877637130804, + "grad_norm": 0.7015458941459656, + "learning_rate": 0.0008681316773172852, + "loss": 1.4579, + "step": 4315 + }, + { + "epoch": 0.45527426160337553, + "grad_norm": 0.8134968876838684, + "learning_rate": 0.0008678837449227174, + "loss": 1.4412, + "step": 4316 + }, + { + "epoch": 0.455379746835443, + "grad_norm": 0.7415682673454285, + "learning_rate": 0.0008676357993186882, + "loss": 1.4766, + "step": 4317 + }, + { + "epoch": 0.45548523206751057, + "grad_norm": 0.6326403021812439, + "learning_rate": 0.000867387840532981, + "loss": 1.4376, + "step": 4318 + }, + { + "epoch": 0.45559071729957806, + "grad_norm": 0.8298172950744629, + "learning_rate": 0.0008671398685933811, + "loss": 1.4466, + "step": 4319 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.6709630489349365, + "learning_rate": 0.0008668918835276747, + "loss": 1.5216, + "step": 4320 + }, + { + "epoch": 0.4558016877637131, + "grad_norm": 0.7248371243476868, + "learning_rate": 0.0008666438853636499, + "loss": 1.4331, + "step": 4321 + }, + { + "epoch": 0.4559071729957806, + "grad_norm": 0.6209091544151306, + "learning_rate": 0.0008663958741290961, + "loss": 1.4402, + "step": 4322 + }, + { + "epoch": 0.4560126582278481, + "grad_norm": 0.6312342882156372, + "learning_rate": 0.0008661478498518042, + "loss": 1.5202, + "step": 4323 + }, + { + "epoch": 0.45611814345991564, + "grad_norm": 0.7673289775848389, + "learning_rate": 0.0008658998125595666, + "loss": 1.454, + "step": 4324 + }, + { + "epoch": 0.45622362869198313, + "grad_norm": 0.8395981788635254, + "learning_rate": 0.0008656517622801771, + "loss": 1.4785, + "step": 4325 + }, + { + "epoch": 0.4563291139240506, + "grad_norm": 0.6158424615859985, + "learning_rate": 0.0008654036990414308, + "loss": 1.4451, + "step": 4326 + }, + { + "epoch": 0.45643459915611817, + "grad_norm": 0.8245590925216675, + "learning_rate": 0.0008651556228711247, + "loss": 1.5123, + "step": 4327 + }, + { + "epoch": 0.45654008438818566, + "grad_norm": 0.6233504414558411, + "learning_rate": 0.0008649075337970567, + "loss": 1.471, + "step": 4328 + }, + { + "epoch": 0.45664556962025316, + "grad_norm": 0.8520079851150513, + "learning_rate": 0.0008646594318470268, + "loss": 1.4806, + "step": 4329 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.6138942837715149, + "learning_rate": 0.0008644113170488355, + "loss": 1.4442, + "step": 4330 + }, + { + "epoch": 0.4568565400843882, + "grad_norm": 0.7060030102729797, + "learning_rate": 0.0008641631894302858, + "loss": 1.4511, + "step": 4331 + }, + { + "epoch": 0.4569620253164557, + "grad_norm": 0.641670286655426, + "learning_rate": 0.0008639150490191814, + "loss": 1.4842, + "step": 4332 + }, + { + "epoch": 0.4570675105485232, + "grad_norm": 0.599383533000946, + "learning_rate": 0.0008636668958433279, + "loss": 1.429, + "step": 4333 + }, + { + "epoch": 0.45717299578059073, + "grad_norm": 0.6771667003631592, + "learning_rate": 0.0008634187299305318, + "loss": 1.4706, + "step": 4334 + }, + { + "epoch": 0.4572784810126582, + "grad_norm": 0.6146423816680908, + "learning_rate": 0.0008631705513086013, + "loss": 1.4507, + "step": 4335 + }, + { + "epoch": 0.4573839662447257, + "grad_norm": 0.765783965587616, + "learning_rate": 0.0008629223600053465, + "loss": 1.485, + "step": 4336 + }, + { + "epoch": 0.45748945147679326, + "grad_norm": 0.5901432037353516, + "learning_rate": 0.000862674156048578, + "loss": 1.4434, + "step": 4337 + }, + { + "epoch": 0.45759493670886076, + "grad_norm": 0.8576634526252747, + "learning_rate": 0.0008624259394661085, + "loss": 1.4509, + "step": 4338 + }, + { + "epoch": 0.45770042194092825, + "grad_norm": 0.8005823493003845, + "learning_rate": 0.000862177710285752, + "loss": 1.4921, + "step": 4339 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.6354132294654846, + "learning_rate": 0.0008619294685353235, + "loss": 1.443, + "step": 4340 + }, + { + "epoch": 0.4579113924050633, + "grad_norm": 0.7344257831573486, + "learning_rate": 0.00086168121424264, + "loss": 1.4496, + "step": 4341 + }, + { + "epoch": 0.4580168776371308, + "grad_norm": 0.6332956552505493, + "learning_rate": 0.0008614329474355196, + "loss": 1.4353, + "step": 4342 + }, + { + "epoch": 0.45812236286919833, + "grad_norm": 0.5858188271522522, + "learning_rate": 0.0008611846681417818, + "loss": 1.4211, + "step": 4343 + }, + { + "epoch": 0.4582278481012658, + "grad_norm": 0.6415807604789734, + "learning_rate": 0.0008609363763892474, + "loss": 1.4799, + "step": 4344 + }, + { + "epoch": 0.4583333333333333, + "grad_norm": 0.6020827889442444, + "learning_rate": 0.0008606880722057386, + "loss": 1.4724, + "step": 4345 + }, + { + "epoch": 0.45843881856540086, + "grad_norm": 0.649462103843689, + "learning_rate": 0.0008604397556190797, + "loss": 1.5051, + "step": 4346 + }, + { + "epoch": 0.45854430379746836, + "grad_norm": 0.6560340523719788, + "learning_rate": 0.0008601914266570956, + "loss": 1.439, + "step": 4347 + }, + { + "epoch": 0.45864978902953585, + "grad_norm": 0.5925257802009583, + "learning_rate": 0.0008599430853476126, + "loss": 1.4558, + "step": 4348 + }, + { + "epoch": 0.4587552742616034, + "grad_norm": 0.8312147259712219, + "learning_rate": 0.0008596947317184585, + "loss": 1.4772, + "step": 4349 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.7565881609916687, + "learning_rate": 0.0008594463657974627, + "loss": 1.4666, + "step": 4350 + }, + { + "epoch": 0.4589662447257384, + "grad_norm": 0.6293784976005554, + "learning_rate": 0.000859197987612456, + "loss": 1.5016, + "step": 4351 + }, + { + "epoch": 0.45907172995780593, + "grad_norm": 0.7229341864585876, + "learning_rate": 0.0008589495971912703, + "loss": 1.4932, + "step": 4352 + }, + { + "epoch": 0.4591772151898734, + "grad_norm": 0.6150755882263184, + "learning_rate": 0.000858701194561739, + "loss": 1.46, + "step": 4353 + }, + { + "epoch": 0.4592827004219409, + "grad_norm": 0.6999344825744629, + "learning_rate": 0.0008584527797516966, + "loss": 1.4856, + "step": 4354 + }, + { + "epoch": 0.45938818565400846, + "grad_norm": 0.6555134654045105, + "learning_rate": 0.0008582043527889797, + "loss": 1.49, + "step": 4355 + }, + { + "epoch": 0.45949367088607596, + "grad_norm": 0.6778053641319275, + "learning_rate": 0.0008579559137014254, + "loss": 1.4477, + "step": 4356 + }, + { + "epoch": 0.45959915611814345, + "grad_norm": 0.6994977593421936, + "learning_rate": 0.0008577074625168725, + "loss": 1.475, + "step": 4357 + }, + { + "epoch": 0.459704641350211, + "grad_norm": 0.6580261588096619, + "learning_rate": 0.0008574589992631617, + "loss": 1.4858, + "step": 4358 + }, + { + "epoch": 0.4598101265822785, + "grad_norm": 0.8714620471000671, + "learning_rate": 0.0008572105239681338, + "loss": 1.4784, + "step": 4359 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.8142638206481934, + "learning_rate": 0.0008569620366596322, + "loss": 1.4554, + "step": 4360 + }, + { + "epoch": 0.46002109704641353, + "grad_norm": 0.6901503205299377, + "learning_rate": 0.0008567135373655012, + "loss": 1.463, + "step": 4361 + }, + { + "epoch": 0.460126582278481, + "grad_norm": 0.7709205746650696, + "learning_rate": 0.0008564650261135862, + "loss": 1.4226, + "step": 4362 + }, + { + "epoch": 0.4602320675105485, + "grad_norm": 0.7403943538665771, + "learning_rate": 0.0008562165029317339, + "loss": 1.4415, + "step": 4363 + }, + { + "epoch": 0.460337552742616, + "grad_norm": 0.8300871253013611, + "learning_rate": 0.0008559679678477929, + "loss": 1.4697, + "step": 4364 + }, + { + "epoch": 0.46044303797468356, + "grad_norm": 0.7023184895515442, + "learning_rate": 0.0008557194208896129, + "loss": 1.474, + "step": 4365 + }, + { + "epoch": 0.46054852320675105, + "grad_norm": 0.9220607876777649, + "learning_rate": 0.0008554708620850445, + "loss": 1.4233, + "step": 4366 + }, + { + "epoch": 0.46065400843881854, + "grad_norm": 0.666252076625824, + "learning_rate": 0.0008552222914619401, + "loss": 1.4625, + "step": 4367 + }, + { + "epoch": 0.4607594936708861, + "grad_norm": 0.8815648555755615, + "learning_rate": 0.0008549737090481532, + "loss": 1.453, + "step": 4368 + }, + { + "epoch": 0.4608649789029536, + "grad_norm": 0.7860606908798218, + "learning_rate": 0.0008547251148715386, + "loss": 1.4617, + "step": 4369 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.6701197028160095, + "learning_rate": 0.000854476508959953, + "loss": 1.4628, + "step": 4370 + }, + { + "epoch": 0.4610759493670886, + "grad_norm": 0.7395409941673279, + "learning_rate": 0.0008542278913412535, + "loss": 1.4223, + "step": 4371 + }, + { + "epoch": 0.4611814345991561, + "grad_norm": 0.6713054180145264, + "learning_rate": 0.0008539792620432989, + "loss": 1.4764, + "step": 4372 + }, + { + "epoch": 0.4612869198312236, + "grad_norm": 0.7552626132965088, + "learning_rate": 0.0008537306210939497, + "loss": 1.4334, + "step": 4373 + }, + { + "epoch": 0.46139240506329116, + "grad_norm": 0.6624558568000793, + "learning_rate": 0.0008534819685210668, + "loss": 1.4627, + "step": 4374 + }, + { + "epoch": 0.46149789029535865, + "grad_norm": 0.6501563787460327, + "learning_rate": 0.0008532333043525136, + "loss": 1.4691, + "step": 4375 + }, + { + "epoch": 0.46160337552742614, + "grad_norm": 0.6501289010047913, + "learning_rate": 0.0008529846286161539, + "loss": 1.4754, + "step": 4376 + }, + { + "epoch": 0.4617088607594937, + "grad_norm": 0.6261796355247498, + "learning_rate": 0.000852735941339853, + "loss": 1.4364, + "step": 4377 + }, + { + "epoch": 0.4618143459915612, + "grad_norm": 0.6302979588508606, + "learning_rate": 0.0008524872425514775, + "loss": 1.4717, + "step": 4378 + }, + { + "epoch": 0.4619198312236287, + "grad_norm": 0.6968245506286621, + "learning_rate": 0.0008522385322788955, + "loss": 1.485, + "step": 4379 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.7741068005561829, + "learning_rate": 0.0008519898105499762, + "loss": 1.4666, + "step": 4380 + }, + { + "epoch": 0.4621308016877637, + "grad_norm": 0.6113210916519165, + "learning_rate": 0.00085174107739259, + "loss": 1.4884, + "step": 4381 + }, + { + "epoch": 0.4622362869198312, + "grad_norm": 0.7000107765197754, + "learning_rate": 0.000851492332834609, + "loss": 1.4484, + "step": 4382 + }, + { + "epoch": 0.46234177215189876, + "grad_norm": 0.6874989867210388, + "learning_rate": 0.0008512435769039055, + "loss": 1.4447, + "step": 4383 + }, + { + "epoch": 0.46244725738396625, + "grad_norm": 0.9326478242874146, + "learning_rate": 0.0008509948096283547, + "loss": 1.4286, + "step": 4384 + }, + { + "epoch": 0.46255274261603374, + "grad_norm": 0.64959317445755, + "learning_rate": 0.0008507460310358319, + "loss": 1.4704, + "step": 4385 + }, + { + "epoch": 0.4626582278481013, + "grad_norm": 0.8432052135467529, + "learning_rate": 0.0008504972411542138, + "loss": 1.465, + "step": 4386 + }, + { + "epoch": 0.4627637130801688, + "grad_norm": 0.6713340282440186, + "learning_rate": 0.0008502484400113787, + "loss": 1.4377, + "step": 4387 + }, + { + "epoch": 0.4628691983122363, + "grad_norm": 0.7313181757926941, + "learning_rate": 0.0008499996276352061, + "loss": 1.4466, + "step": 4388 + }, + { + "epoch": 0.4629746835443038, + "grad_norm": 0.6727550029754639, + "learning_rate": 0.0008497508040535766, + "loss": 1.4544, + "step": 4389 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.7987620830535889, + "learning_rate": 0.0008495019692943721, + "loss": 1.4509, + "step": 4390 + }, + { + "epoch": 0.4631856540084388, + "grad_norm": 0.8209741115570068, + "learning_rate": 0.0008492531233854757, + "loss": 1.4542, + "step": 4391 + }, + { + "epoch": 0.46329113924050636, + "grad_norm": 0.6543163061141968, + "learning_rate": 0.0008490042663547719, + "loss": 1.4341, + "step": 4392 + }, + { + "epoch": 0.46339662447257385, + "grad_norm": 0.6855558753013611, + "learning_rate": 0.0008487553982301465, + "loss": 1.4403, + "step": 4393 + }, + { + "epoch": 0.46350210970464134, + "grad_norm": 0.6530078053474426, + "learning_rate": 0.0008485065190394863, + "loss": 1.4742, + "step": 4394 + }, + { + "epoch": 0.46360759493670883, + "grad_norm": 0.6835710406303406, + "learning_rate": 0.0008482576288106794, + "loss": 1.475, + "step": 4395 + }, + { + "epoch": 0.4637130801687764, + "grad_norm": 0.6257930397987366, + "learning_rate": 0.000848008727571615, + "loss": 1.4539, + "step": 4396 + }, + { + "epoch": 0.4638185654008439, + "grad_norm": 0.7049918174743652, + "learning_rate": 0.0008477598153501842, + "loss": 1.4721, + "step": 4397 + }, + { + "epoch": 0.46392405063291137, + "grad_norm": 0.6199067234992981, + "learning_rate": 0.0008475108921742787, + "loss": 1.4875, + "step": 4398 + }, + { + "epoch": 0.4640295358649789, + "grad_norm": 0.6489985585212708, + "learning_rate": 0.0008472619580717914, + "loss": 1.428, + "step": 4399 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.654898464679718, + "learning_rate": 0.0008470130130706166, + "loss": 1.4988, + "step": 4400 + }, + { + "epoch": 0.4642405063291139, + "grad_norm": 0.7087031006813049, + "learning_rate": 0.00084676405719865, + "loss": 1.4422, + "step": 4401 + }, + { + "epoch": 0.46434599156118145, + "grad_norm": 0.6771162748336792, + "learning_rate": 0.0008465150904837883, + "loss": 1.4837, + "step": 4402 + }, + { + "epoch": 0.46445147679324894, + "grad_norm": 1.0273841619491577, + "learning_rate": 0.0008462661129539296, + "loss": 1.5214, + "step": 4403 + }, + { + "epoch": 0.46455696202531643, + "grad_norm": 0.6500129103660583, + "learning_rate": 0.0008460171246369725, + "loss": 1.4937, + "step": 4404 + }, + { + "epoch": 0.464662447257384, + "grad_norm": 1.0508060455322266, + "learning_rate": 0.000845768125560818, + "loss": 1.4651, + "step": 4405 + }, + { + "epoch": 0.4647679324894515, + "grad_norm": 0.6780763864517212, + "learning_rate": 0.0008455191157533677, + "loss": 1.4734, + "step": 4406 + }, + { + "epoch": 0.46487341772151897, + "grad_norm": 1.0090034008026123, + "learning_rate": 0.000845270095242524, + "loss": 1.4633, + "step": 4407 + }, + { + "epoch": 0.4649789029535865, + "grad_norm": 0.6944848895072937, + "learning_rate": 0.0008450210640561912, + "loss": 1.4843, + "step": 4408 + }, + { + "epoch": 0.465084388185654, + "grad_norm": 0.821017861366272, + "learning_rate": 0.000844772022222274, + "loss": 1.4681, + "step": 4409 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.6846144795417786, + "learning_rate": 0.0008445229697686795, + "loss": 1.4665, + "step": 4410 + }, + { + "epoch": 0.46529535864978905, + "grad_norm": 0.7140013575553894, + "learning_rate": 0.0008442739067233148, + "loss": 1.4476, + "step": 4411 + }, + { + "epoch": 0.46540084388185654, + "grad_norm": 0.7823429107666016, + "learning_rate": 0.0008440248331140888, + "loss": 1.4945, + "step": 4412 + }, + { + "epoch": 0.46550632911392403, + "grad_norm": 0.7332726716995239, + "learning_rate": 0.0008437757489689113, + "loss": 1.411, + "step": 4413 + }, + { + "epoch": 0.4656118143459916, + "grad_norm": 0.8648149967193604, + "learning_rate": 0.0008435266543156935, + "loss": 1.4537, + "step": 4414 + }, + { + "epoch": 0.4657172995780591, + "grad_norm": 0.7146331071853638, + "learning_rate": 0.0008432775491823477, + "loss": 1.4713, + "step": 4415 + }, + { + "epoch": 0.46582278481012657, + "grad_norm": 0.9062390327453613, + "learning_rate": 0.0008430284335967876, + "loss": 1.4919, + "step": 4416 + }, + { + "epoch": 0.4659282700421941, + "grad_norm": 0.6611160039901733, + "learning_rate": 0.0008427793075869275, + "loss": 1.4369, + "step": 4417 + }, + { + "epoch": 0.4660337552742616, + "grad_norm": 0.7065680027008057, + "learning_rate": 0.0008425301711806833, + "loss": 1.4678, + "step": 4418 + }, + { + "epoch": 0.4661392405063291, + "grad_norm": 0.6088472604751587, + "learning_rate": 0.0008422810244059721, + "loss": 1.4664, + "step": 4419 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.6969806551933289, + "learning_rate": 0.0008420318672907119, + "loss": 1.4392, + "step": 4420 + }, + { + "epoch": 0.46635021097046414, + "grad_norm": 0.6613895297050476, + "learning_rate": 0.0008417826998628222, + "loss": 1.46, + "step": 4421 + }, + { + "epoch": 0.46645569620253163, + "grad_norm": 0.6324992775917053, + "learning_rate": 0.0008415335221502231, + "loss": 1.4592, + "step": 4422 + }, + { + "epoch": 0.4665611814345992, + "grad_norm": 0.7502767443656921, + "learning_rate": 0.0008412843341808365, + "loss": 1.4754, + "step": 4423 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 0.7831994891166687, + "learning_rate": 0.0008410351359825851, + "loss": 1.485, + "step": 4424 + }, + { + "epoch": 0.46677215189873417, + "grad_norm": 0.596474826335907, + "learning_rate": 0.0008407859275833928, + "loss": 1.4456, + "step": 4425 + }, + { + "epoch": 0.4668776371308017, + "grad_norm": 0.7247762680053711, + "learning_rate": 0.0008405367090111845, + "loss": 1.4524, + "step": 4426 + }, + { + "epoch": 0.4669831223628692, + "grad_norm": 0.6272780299186707, + "learning_rate": 0.0008402874802938866, + "loss": 1.4533, + "step": 4427 + }, + { + "epoch": 0.4670886075949367, + "grad_norm": 0.676921010017395, + "learning_rate": 0.0008400382414594263, + "loss": 1.4659, + "step": 4428 + }, + { + "epoch": 0.4671940928270042, + "grad_norm": 0.6428462862968445, + "learning_rate": 0.000839788992535732, + "loss": 1.4964, + "step": 4429 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.607729971408844, + "learning_rate": 0.0008395397335507334, + "loss": 1.4281, + "step": 4430 + }, + { + "epoch": 0.46740506329113923, + "grad_norm": 0.598680317401886, + "learning_rate": 0.0008392904645323612, + "loss": 1.4399, + "step": 4431 + }, + { + "epoch": 0.4675105485232067, + "grad_norm": 0.6350219249725342, + "learning_rate": 0.0008390411855085473, + "loss": 1.489, + "step": 4432 + }, + { + "epoch": 0.4676160337552743, + "grad_norm": 0.6551492214202881, + "learning_rate": 0.0008387918965072244, + "loss": 1.4552, + "step": 4433 + }, + { + "epoch": 0.46772151898734177, + "grad_norm": 1.0336309671401978, + "learning_rate": 0.0008385425975563269, + "loss": 1.4855, + "step": 4434 + }, + { + "epoch": 0.46782700421940926, + "grad_norm": 0.7219722270965576, + "learning_rate": 0.0008382932886837897, + "loss": 1.4216, + "step": 4435 + }, + { + "epoch": 0.4679324894514768, + "grad_norm": 0.6893084049224854, + "learning_rate": 0.0008380439699175493, + "loss": 1.4252, + "step": 4436 + }, + { + "epoch": 0.4680379746835443, + "grad_norm": 0.6550696492195129, + "learning_rate": 0.000837794641285543, + "loss": 1.4513, + "step": 4437 + }, + { + "epoch": 0.4681434599156118, + "grad_norm": 0.6420310139656067, + "learning_rate": 0.0008375453028157093, + "loss": 1.4635, + "step": 4438 + }, + { + "epoch": 0.46824894514767934, + "grad_norm": 0.6512349843978882, + "learning_rate": 0.000837295954535988, + "loss": 1.4333, + "step": 4439 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.6286420822143555, + "learning_rate": 0.0008370465964743196, + "loss": 1.4811, + "step": 4440 + }, + { + "epoch": 0.4684599156118143, + "grad_norm": 0.6722313165664673, + "learning_rate": 0.0008367972286586461, + "loss": 1.4495, + "step": 4441 + }, + { + "epoch": 0.4685654008438819, + "grad_norm": 0.6972185373306274, + "learning_rate": 0.0008365478511169103, + "loss": 1.4803, + "step": 4442 + }, + { + "epoch": 0.46867088607594937, + "grad_norm": 0.6739183068275452, + "learning_rate": 0.000836298463877056, + "loss": 1.4606, + "step": 4443 + }, + { + "epoch": 0.46877637130801686, + "grad_norm": 0.8931304216384888, + "learning_rate": 0.0008360490669670288, + "loss": 1.4486, + "step": 4444 + }, + { + "epoch": 0.4688818565400844, + "grad_norm": 0.6796161532402039, + "learning_rate": 0.0008357996604147744, + "loss": 1.4783, + "step": 4445 + }, + { + "epoch": 0.4689873417721519, + "grad_norm": 0.7698934078216553, + "learning_rate": 0.0008355502442482403, + "loss": 1.4362, + "step": 4446 + }, + { + "epoch": 0.4690928270042194, + "grad_norm": 0.6608709096908569, + "learning_rate": 0.0008353008184953748, + "loss": 1.4759, + "step": 4447 + }, + { + "epoch": 0.46919831223628694, + "grad_norm": 0.8142292499542236, + "learning_rate": 0.0008350513831841271, + "loss": 1.4956, + "step": 4448 + }, + { + "epoch": 0.46930379746835443, + "grad_norm": 0.7340608835220337, + "learning_rate": 0.0008348019383424479, + "loss": 1.4699, + "step": 4449 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.6966768503189087, + "learning_rate": 0.0008345524839982886, + "loss": 1.4451, + "step": 4450 + }, + { + "epoch": 0.4695147679324895, + "grad_norm": 0.701932430267334, + "learning_rate": 0.000834303020179602, + "loss": 1.4543, + "step": 4451 + }, + { + "epoch": 0.46962025316455697, + "grad_norm": 0.6938335299491882, + "learning_rate": 0.0008340535469143414, + "loss": 1.4314, + "step": 4452 + }, + { + "epoch": 0.46972573839662446, + "grad_norm": 0.6349616646766663, + "learning_rate": 0.0008338040642304618, + "loss": 1.4707, + "step": 4453 + }, + { + "epoch": 0.469831223628692, + "grad_norm": 0.8454926609992981, + "learning_rate": 0.0008335545721559188, + "loss": 1.4195, + "step": 4454 + }, + { + "epoch": 0.4699367088607595, + "grad_norm": 0.6719247698783875, + "learning_rate": 0.0008333050707186696, + "loss": 1.4391, + "step": 4455 + }, + { + "epoch": 0.470042194092827, + "grad_norm": 0.8682289719581604, + "learning_rate": 0.0008330555599466716, + "loss": 1.4543, + "step": 4456 + }, + { + "epoch": 0.47014767932489454, + "grad_norm": 0.8247880935668945, + "learning_rate": 0.000832806039867884, + "loss": 1.4343, + "step": 4457 + }, + { + "epoch": 0.47025316455696203, + "grad_norm": 0.6892538070678711, + "learning_rate": 0.000832556510510267, + "loss": 1.4373, + "step": 4458 + }, + { + "epoch": 0.4703586497890295, + "grad_norm": 0.8419330716133118, + "learning_rate": 0.0008323069719017812, + "loss": 1.4761, + "step": 4459 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.6357392072677612, + "learning_rate": 0.0008320574240703886, + "loss": 1.4866, + "step": 4460 + }, + { + "epoch": 0.47056962025316457, + "grad_norm": 0.9263246655464172, + "learning_rate": 0.0008318078670440525, + "loss": 1.4646, + "step": 4461 + }, + { + "epoch": 0.47067510548523206, + "grad_norm": 0.6434469819068909, + "learning_rate": 0.0008315583008507372, + "loss": 1.4552, + "step": 4462 + }, + { + "epoch": 0.47078059071729955, + "grad_norm": 0.9632019996643066, + "learning_rate": 0.0008313087255184074, + "loss": 1.4435, + "step": 4463 + }, + { + "epoch": 0.4708860759493671, + "grad_norm": 0.6163313984870911, + "learning_rate": 0.0008310591410750295, + "loss": 1.4513, + "step": 4464 + }, + { + "epoch": 0.4709915611814346, + "grad_norm": 0.8464746475219727, + "learning_rate": 0.0008308095475485706, + "loss": 1.4388, + "step": 4465 + }, + { + "epoch": 0.4710970464135021, + "grad_norm": 0.6726953983306885, + "learning_rate": 0.0008305599449669989, + "loss": 1.4669, + "step": 4466 + }, + { + "epoch": 0.47120253164556963, + "grad_norm": 0.9519683122634888, + "learning_rate": 0.0008303103333582839, + "loss": 1.4432, + "step": 4467 + }, + { + "epoch": 0.4713080168776371, + "grad_norm": 0.6683551073074341, + "learning_rate": 0.0008300607127503952, + "loss": 1.4513, + "step": 4468 + }, + { + "epoch": 0.4714135021097046, + "grad_norm": 0.778900146484375, + "learning_rate": 0.0008298110831713047, + "loss": 1.4854, + "step": 4469 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.6600104570388794, + "learning_rate": 0.0008295614446489842, + "loss": 1.494, + "step": 4470 + }, + { + "epoch": 0.47162447257383966, + "grad_norm": 0.7512836456298828, + "learning_rate": 0.0008293117972114074, + "loss": 1.4442, + "step": 4471 + }, + { + "epoch": 0.47172995780590715, + "grad_norm": 0.7885863780975342, + "learning_rate": 0.0008290621408865481, + "loss": 1.4434, + "step": 4472 + }, + { + "epoch": 0.4718354430379747, + "grad_norm": 0.6271355152130127, + "learning_rate": 0.0008288124757023816, + "loss": 1.4689, + "step": 4473 + }, + { + "epoch": 0.4719409282700422, + "grad_norm": 0.6917973160743713, + "learning_rate": 0.0008285628016868841, + "loss": 1.4941, + "step": 4474 + }, + { + "epoch": 0.4720464135021097, + "grad_norm": 0.6472973227500916, + "learning_rate": 0.0008283131188680332, + "loss": 1.5021, + "step": 4475 + }, + { + "epoch": 0.47215189873417723, + "grad_norm": 0.6873749494552612, + "learning_rate": 0.0008280634272738066, + "loss": 1.4279, + "step": 4476 + }, + { + "epoch": 0.4722573839662447, + "grad_norm": 0.6662966012954712, + "learning_rate": 0.0008278137269321837, + "loss": 1.4662, + "step": 4477 + }, + { + "epoch": 0.4723628691983122, + "grad_norm": 0.6334120631217957, + "learning_rate": 0.0008275640178711447, + "loss": 1.4825, + "step": 4478 + }, + { + "epoch": 0.47246835443037977, + "grad_norm": 0.7356961369514465, + "learning_rate": 0.0008273143001186709, + "loss": 1.463, + "step": 4479 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.7006562948226929, + "learning_rate": 0.0008270645737027441, + "loss": 1.4641, + "step": 4480 + }, + { + "epoch": 0.47267932489451475, + "grad_norm": 0.6811562776565552, + "learning_rate": 0.0008268148386513475, + "loss": 1.4352, + "step": 4481 + }, + { + "epoch": 0.4727848101265823, + "grad_norm": 0.7454225420951843, + "learning_rate": 0.0008265650949924652, + "loss": 1.4442, + "step": 4482 + }, + { + "epoch": 0.4728902953586498, + "grad_norm": 0.6769773960113525, + "learning_rate": 0.0008263153427540825, + "loss": 1.436, + "step": 4483 + }, + { + "epoch": 0.4729957805907173, + "grad_norm": 0.6783870458602905, + "learning_rate": 0.0008260655819641849, + "loss": 1.4368, + "step": 4484 + }, + { + "epoch": 0.47310126582278483, + "grad_norm": 0.6500431299209595, + "learning_rate": 0.0008258158126507594, + "loss": 1.4523, + "step": 4485 + }, + { + "epoch": 0.4732067510548523, + "grad_norm": 0.7473293542861938, + "learning_rate": 0.0008255660348417944, + "loss": 1.4527, + "step": 4486 + }, + { + "epoch": 0.4733122362869198, + "grad_norm": 0.5906692743301392, + "learning_rate": 0.0008253162485652779, + "loss": 1.4411, + "step": 4487 + }, + { + "epoch": 0.47341772151898737, + "grad_norm": 0.6788524389266968, + "learning_rate": 0.0008250664538492006, + "loss": 1.4811, + "step": 4488 + }, + { + "epoch": 0.47352320675105486, + "grad_norm": 0.7023140788078308, + "learning_rate": 0.0008248166507215526, + "loss": 1.4582, + "step": 4489 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.6894218325614929, + "learning_rate": 0.0008245668392103259, + "loss": 1.497, + "step": 4490 + }, + { + "epoch": 0.4737341772151899, + "grad_norm": 0.7889652848243713, + "learning_rate": 0.000824317019343513, + "loss": 1.4467, + "step": 4491 + }, + { + "epoch": 0.4738396624472574, + "grad_norm": 0.7424919605255127, + "learning_rate": 0.0008240671911491077, + "loss": 1.4674, + "step": 4492 + }, + { + "epoch": 0.4739451476793249, + "grad_norm": 0.6529828310012817, + "learning_rate": 0.000823817354655104, + "loss": 1.4378, + "step": 4493 + }, + { + "epoch": 0.4740506329113924, + "grad_norm": 0.7517825961112976, + "learning_rate": 0.0008235675098894979, + "loss": 1.4328, + "step": 4494 + }, + { + "epoch": 0.4741561181434599, + "grad_norm": 0.6293338537216187, + "learning_rate": 0.0008233176568802851, + "loss": 1.4769, + "step": 4495 + }, + { + "epoch": 0.4742616033755274, + "grad_norm": 0.6403204202651978, + "learning_rate": 0.0008230677956554637, + "loss": 1.4477, + "step": 4496 + }, + { + "epoch": 0.4743670886075949, + "grad_norm": 0.6662796139717102, + "learning_rate": 0.0008228179262430313, + "loss": 1.425, + "step": 4497 + }, + { + "epoch": 0.47447257383966246, + "grad_norm": 0.6397725343704224, + "learning_rate": 0.0008225680486709871, + "loss": 1.4611, + "step": 4498 + }, + { + "epoch": 0.47457805907172995, + "grad_norm": 0.800053596496582, + "learning_rate": 0.0008223181629673312, + "loss": 1.4513, + "step": 4499 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.7202082872390747, + "learning_rate": 0.0008220682691600645, + "loss": 1.4579, + "step": 4500 + }, + { + "epoch": 0.474789029535865, + "grad_norm": 0.8298641443252563, + "learning_rate": 0.0008218183672771889, + "loss": 1.4691, + "step": 4501 + }, + { + "epoch": 0.4748945147679325, + "grad_norm": 0.6988716125488281, + "learning_rate": 0.0008215684573467071, + "loss": 1.4786, + "step": 4502 + }, + { + "epoch": 0.475, + "grad_norm": 0.8492128849029541, + "learning_rate": 0.0008213185393966229, + "loss": 1.4592, + "step": 4503 + }, + { + "epoch": 0.4751054852320675, + "grad_norm": 0.6279498338699341, + "learning_rate": 0.0008210686134549406, + "loss": 1.4637, + "step": 4504 + }, + { + "epoch": 0.475210970464135, + "grad_norm": 0.9500442147254944, + "learning_rate": 0.0008208186795496657, + "loss": 1.4553, + "step": 4505 + }, + { + "epoch": 0.4753164556962025, + "grad_norm": 0.7774185538291931, + "learning_rate": 0.0008205687377088048, + "loss": 1.4463, + "step": 4506 + }, + { + "epoch": 0.47542194092827006, + "grad_norm": 0.7126122117042542, + "learning_rate": 0.000820318787960365, + "loss": 1.4821, + "step": 4507 + }, + { + "epoch": 0.47552742616033755, + "grad_norm": 0.7104023694992065, + "learning_rate": 0.0008200688303323542, + "loss": 1.4381, + "step": 4508 + }, + { + "epoch": 0.47563291139240504, + "grad_norm": 0.6486998796463013, + "learning_rate": 0.0008198188648527818, + "loss": 1.4667, + "step": 4509 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.7140430212020874, + "learning_rate": 0.0008195688915496571, + "loss": 1.4197, + "step": 4510 + }, + { + "epoch": 0.4758438818565401, + "grad_norm": 0.6489710807800293, + "learning_rate": 0.0008193189104509915, + "loss": 1.4325, + "step": 4511 + }, + { + "epoch": 0.4759493670886076, + "grad_norm": 0.6386315822601318, + "learning_rate": 0.0008190689215847963, + "loss": 1.4384, + "step": 4512 + }, + { + "epoch": 0.4760548523206751, + "grad_norm": 0.6743360757827759, + "learning_rate": 0.0008188189249790838, + "loss": 1.46, + "step": 4513 + }, + { + "epoch": 0.4761603375527426, + "grad_norm": 0.6580824851989746, + "learning_rate": 0.0008185689206618677, + "loss": 1.4604, + "step": 4514 + }, + { + "epoch": 0.4762658227848101, + "grad_norm": 0.728803813457489, + "learning_rate": 0.0008183189086611623, + "loss": 1.4715, + "step": 4515 + }, + { + "epoch": 0.47637130801687766, + "grad_norm": 0.6104710102081299, + "learning_rate": 0.0008180688890049823, + "loss": 1.4409, + "step": 4516 + }, + { + "epoch": 0.47647679324894515, + "grad_norm": 0.7332503199577332, + "learning_rate": 0.000817818861721344, + "loss": 1.4297, + "step": 4517 + }, + { + "epoch": 0.47658227848101264, + "grad_norm": 0.6623108983039856, + "learning_rate": 0.0008175688268382639, + "loss": 1.4011, + "step": 4518 + }, + { + "epoch": 0.4766877637130802, + "grad_norm": 0.8311757445335388, + "learning_rate": 0.00081731878438376, + "loss": 1.4115, + "step": 4519 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.6830923557281494, + "learning_rate": 0.0008170687343858506, + "loss": 1.4289, + "step": 4520 + }, + { + "epoch": 0.4768987341772152, + "grad_norm": 0.7062208652496338, + "learning_rate": 0.000816818676872555, + "loss": 1.4847, + "step": 4521 + }, + { + "epoch": 0.4770042194092827, + "grad_norm": 0.6767730712890625, + "learning_rate": 0.0008165686118718935, + "loss": 1.4787, + "step": 4522 + }, + { + "epoch": 0.4771097046413502, + "grad_norm": 0.9111669659614563, + "learning_rate": 0.000816318539411887, + "loss": 1.4557, + "step": 4523 + }, + { + "epoch": 0.4772151898734177, + "grad_norm": 0.655879020690918, + "learning_rate": 0.0008160684595205577, + "loss": 1.4535, + "step": 4524 + }, + { + "epoch": 0.47732067510548526, + "grad_norm": 1.0127874612808228, + "learning_rate": 0.000815818372225928, + "loss": 1.4293, + "step": 4525 + }, + { + "epoch": 0.47742616033755275, + "grad_norm": 0.6643230319023132, + "learning_rate": 0.0008155682775560215, + "loss": 1.475, + "step": 4526 + }, + { + "epoch": 0.47753164556962024, + "grad_norm": 1.076544165611267, + "learning_rate": 0.0008153181755388624, + "loss": 1.4333, + "step": 4527 + }, + { + "epoch": 0.47763713080168774, + "grad_norm": 0.6127076148986816, + "learning_rate": 0.0008150680662024761, + "loss": 1.4572, + "step": 4528 + }, + { + "epoch": 0.4777426160337553, + "grad_norm": 0.7076026201248169, + "learning_rate": 0.0008148179495748885, + "loss": 1.4477, + "step": 4529 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.7192926406860352, + "learning_rate": 0.0008145678256841265, + "loss": 1.467, + "step": 4530 + }, + { + "epoch": 0.47795358649789027, + "grad_norm": 0.7544342875480652, + "learning_rate": 0.0008143176945582175, + "loss": 1.4357, + "step": 4531 + }, + { + "epoch": 0.4780590717299578, + "grad_norm": 0.6497654914855957, + "learning_rate": 0.0008140675562251904, + "loss": 1.45, + "step": 4532 + }, + { + "epoch": 0.4781645569620253, + "grad_norm": 0.6596652865409851, + "learning_rate": 0.0008138174107130739, + "loss": 1.4803, + "step": 4533 + }, + { + "epoch": 0.4782700421940928, + "grad_norm": 0.693556010723114, + "learning_rate": 0.0008135672580498984, + "loss": 1.4781, + "step": 4534 + }, + { + "epoch": 0.47837552742616035, + "grad_norm": 0.7155341506004333, + "learning_rate": 0.0008133170982636946, + "loss": 1.4319, + "step": 4535 + }, + { + "epoch": 0.47848101265822784, + "grad_norm": 0.6453586220741272, + "learning_rate": 0.0008130669313824944, + "loss": 1.4472, + "step": 4536 + }, + { + "epoch": 0.47858649789029534, + "grad_norm": 0.7791593670845032, + "learning_rate": 0.0008128167574343299, + "loss": 1.4445, + "step": 4537 + }, + { + "epoch": 0.4786919831223629, + "grad_norm": 0.6662279367446899, + "learning_rate": 0.0008125665764472345, + "loss": 1.4612, + "step": 4538 + }, + { + "epoch": 0.4787974683544304, + "grad_norm": 0.6628378033638, + "learning_rate": 0.0008123163884492422, + "loss": 1.4579, + "step": 4539 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.6552931666374207, + "learning_rate": 0.0008120661934683879, + "loss": 1.4878, + "step": 4540 + }, + { + "epoch": 0.4790084388185654, + "grad_norm": 1.0105565786361694, + "learning_rate": 0.0008118159915327072, + "loss": 1.4289, + "step": 4541 + }, + { + "epoch": 0.4791139240506329, + "grad_norm": 0.7146521806716919, + "learning_rate": 0.0008115657826702364, + "loss": 1.4319, + "step": 4542 + }, + { + "epoch": 0.4792194092827004, + "grad_norm": 0.8931132555007935, + "learning_rate": 0.0008113155669090124, + "loss": 1.4691, + "step": 4543 + }, + { + "epoch": 0.47932489451476795, + "grad_norm": 0.7432214021682739, + "learning_rate": 0.0008110653442770736, + "loss": 1.4714, + "step": 4544 + }, + { + "epoch": 0.47943037974683544, + "grad_norm": 0.8560032844543457, + "learning_rate": 0.0008108151148024584, + "loss": 1.4594, + "step": 4545 + }, + { + "epoch": 0.47953586497890294, + "grad_norm": 0.665522575378418, + "learning_rate": 0.0008105648785132065, + "loss": 1.4713, + "step": 4546 + }, + { + "epoch": 0.4796413502109705, + "grad_norm": 0.8626821637153625, + "learning_rate": 0.0008103146354373577, + "loss": 1.4672, + "step": 4547 + }, + { + "epoch": 0.479746835443038, + "grad_norm": 0.6813458800315857, + "learning_rate": 0.0008100643856029534, + "loss": 1.4852, + "step": 4548 + }, + { + "epoch": 0.47985232067510547, + "grad_norm": 0.916761577129364, + "learning_rate": 0.0008098141290380353, + "loss": 1.4774, + "step": 4549 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.8220388293266296, + "learning_rate": 0.0008095638657706456, + "loss": 1.4235, + "step": 4550 + }, + { + "epoch": 0.4800632911392405, + "grad_norm": 0.7935847043991089, + "learning_rate": 0.0008093135958288278, + "loss": 1.4298, + "step": 4551 + }, + { + "epoch": 0.480168776371308, + "grad_norm": 0.7839449644088745, + "learning_rate": 0.0008090633192406256, + "loss": 1.429, + "step": 4552 + }, + { + "epoch": 0.48027426160337555, + "grad_norm": 0.7193993926048279, + "learning_rate": 0.0008088130360340843, + "loss": 1.4207, + "step": 4553 + }, + { + "epoch": 0.48037974683544304, + "grad_norm": 0.6729044914245605, + "learning_rate": 0.0008085627462372489, + "loss": 1.4546, + "step": 4554 + }, + { + "epoch": 0.48048523206751054, + "grad_norm": 0.6485658884048462, + "learning_rate": 0.0008083124498781658, + "loss": 1.4223, + "step": 4555 + }, + { + "epoch": 0.4805907172995781, + "grad_norm": 0.7371686100959778, + "learning_rate": 0.0008080621469848817, + "loss": 1.4699, + "step": 4556 + }, + { + "epoch": 0.4806962025316456, + "grad_norm": 0.6444589495658875, + "learning_rate": 0.0008078118375854449, + "loss": 1.4435, + "step": 4557 + }, + { + "epoch": 0.48080168776371307, + "grad_norm": 0.6392772793769836, + "learning_rate": 0.000807561521707903, + "loss": 1.4527, + "step": 4558 + }, + { + "epoch": 0.48090717299578056, + "grad_norm": 0.7454724907875061, + "learning_rate": 0.000807311199380306, + "loss": 1.4606, + "step": 4559 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.6880771517753601, + "learning_rate": 0.000807060870630703, + "loss": 1.4632, + "step": 4560 + }, + { + "epoch": 0.4811181434599156, + "grad_norm": 0.8842725157737732, + "learning_rate": 0.0008068105354871449, + "loss": 1.4068, + "step": 4561 + }, + { + "epoch": 0.4812236286919831, + "grad_norm": 1.053625464439392, + "learning_rate": 0.0008065601939776833, + "loss": 1.417, + "step": 4562 + }, + { + "epoch": 0.48132911392405064, + "grad_norm": 0.6119556427001953, + "learning_rate": 0.0008063098461303698, + "loss": 1.4248, + "step": 4563 + }, + { + "epoch": 0.48143459915611814, + "grad_norm": 0.7674850821495056, + "learning_rate": 0.0008060594919732572, + "loss": 1.4492, + "step": 4564 + }, + { + "epoch": 0.48154008438818563, + "grad_norm": 0.619978666305542, + "learning_rate": 0.0008058091315343988, + "loss": 1.4227, + "step": 4565 + }, + { + "epoch": 0.4816455696202532, + "grad_norm": 0.7732418179512024, + "learning_rate": 0.0008055587648418492, + "loss": 1.4584, + "step": 4566 + }, + { + "epoch": 0.48175105485232067, + "grad_norm": 0.6169267892837524, + "learning_rate": 0.000805308391923663, + "loss": 1.4263, + "step": 4567 + }, + { + "epoch": 0.48185654008438816, + "grad_norm": 0.6429836750030518, + "learning_rate": 0.0008050580128078957, + "loss": 1.4402, + "step": 4568 + }, + { + "epoch": 0.4819620253164557, + "grad_norm": 0.6345804333686829, + "learning_rate": 0.0008048076275226032, + "loss": 1.3953, + "step": 4569 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.7525330185890198, + "learning_rate": 0.000804557236095843, + "loss": 1.4366, + "step": 4570 + }, + { + "epoch": 0.4821729957805907, + "grad_norm": 0.62031090259552, + "learning_rate": 0.0008043068385556725, + "loss": 1.4321, + "step": 4571 + }, + { + "epoch": 0.48227848101265824, + "grad_norm": 0.7575478553771973, + "learning_rate": 0.0008040564349301498, + "loss": 1.4232, + "step": 4572 + }, + { + "epoch": 0.48238396624472574, + "grad_norm": 0.665537416934967, + "learning_rate": 0.0008038060252473339, + "loss": 1.4194, + "step": 4573 + }, + { + "epoch": 0.48248945147679323, + "grad_norm": 0.8612372875213623, + "learning_rate": 0.0008035556095352847, + "loss": 1.4967, + "step": 4574 + }, + { + "epoch": 0.4825949367088608, + "grad_norm": 0.6589898467063904, + "learning_rate": 0.0008033051878220624, + "loss": 1.4216, + "step": 4575 + }, + { + "epoch": 0.48270042194092827, + "grad_norm": 0.8056100606918335, + "learning_rate": 0.0008030547601357281, + "loss": 1.4813, + "step": 4576 + }, + { + "epoch": 0.48280590717299576, + "grad_norm": 0.6532495021820068, + "learning_rate": 0.0008028043265043434, + "loss": 1.4226, + "step": 4577 + }, + { + "epoch": 0.4829113924050633, + "grad_norm": 1.0013173818588257, + "learning_rate": 0.0008025538869559703, + "loss": 1.446, + "step": 4578 + }, + { + "epoch": 0.4830168776371308, + "grad_norm": 0.6435961127281189, + "learning_rate": 0.0008023034415186725, + "loss": 1.4866, + "step": 4579 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 1.0257899761199951, + "learning_rate": 0.0008020529902205129, + "loss": 1.4763, + "step": 4580 + }, + { + "epoch": 0.48322784810126584, + "grad_norm": 0.7063087224960327, + "learning_rate": 0.0008018025330895566, + "loss": 1.4025, + "step": 4581 + }, + { + "epoch": 0.48333333333333334, + "grad_norm": 0.9399585723876953, + "learning_rate": 0.0008015520701538677, + "loss": 1.4249, + "step": 4582 + }, + { + "epoch": 0.48343881856540083, + "grad_norm": 0.6675339341163635, + "learning_rate": 0.0008013016014415126, + "loss": 1.4352, + "step": 4583 + }, + { + "epoch": 0.4835443037974684, + "grad_norm": 1.042778730392456, + "learning_rate": 0.0008010511269805571, + "loss": 1.4683, + "step": 4584 + }, + { + "epoch": 0.48364978902953587, + "grad_norm": 0.7095553874969482, + "learning_rate": 0.0008008006467990684, + "loss": 1.4501, + "step": 4585 + }, + { + "epoch": 0.48375527426160336, + "grad_norm": 1.1906782388687134, + "learning_rate": 0.0008005501609251136, + "loss": 1.4466, + "step": 4586 + }, + { + "epoch": 0.4838607594936709, + "grad_norm": 0.7155794501304626, + "learning_rate": 0.0008002996693867615, + "loss": 1.46, + "step": 4587 + }, + { + "epoch": 0.4839662447257384, + "grad_norm": 1.0909664630889893, + "learning_rate": 0.0008000491722120806, + "loss": 1.4053, + "step": 4588 + }, + { + "epoch": 0.4840717299578059, + "grad_norm": 0.6821454763412476, + "learning_rate": 0.0007997986694291404, + "loss": 1.4383, + "step": 4589 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.9663658738136292, + "learning_rate": 0.0007995481610660108, + "loss": 1.4461, + "step": 4590 + }, + { + "epoch": 0.48428270042194094, + "grad_norm": 0.6958091259002686, + "learning_rate": 0.0007992976471507628, + "loss": 1.4433, + "step": 4591 + }, + { + "epoch": 0.48438818565400843, + "grad_norm": 0.7317093014717102, + "learning_rate": 0.0007990471277114676, + "loss": 1.4356, + "step": 4592 + }, + { + "epoch": 0.4844936708860759, + "grad_norm": 0.6735441088676453, + "learning_rate": 0.0007987966027761972, + "loss": 1.4412, + "step": 4593 + }, + { + "epoch": 0.48459915611814347, + "grad_norm": 0.6238623261451721, + "learning_rate": 0.0007985460723730242, + "loss": 1.4108, + "step": 4594 + }, + { + "epoch": 0.48470464135021096, + "grad_norm": 0.630606472492218, + "learning_rate": 0.0007982955365300214, + "loss": 1.4661, + "step": 4595 + }, + { + "epoch": 0.48481012658227846, + "grad_norm": 0.7405925393104553, + "learning_rate": 0.0007980449952752633, + "loss": 1.4654, + "step": 4596 + }, + { + "epoch": 0.484915611814346, + "grad_norm": 0.8469197750091553, + "learning_rate": 0.0007977944486368237, + "loss": 1.4327, + "step": 4597 + }, + { + "epoch": 0.4850210970464135, + "grad_norm": 0.6617453694343567, + "learning_rate": 0.0007975438966427778, + "loss": 1.4609, + "step": 4598 + }, + { + "epoch": 0.485126582278481, + "grad_norm": 0.884668231010437, + "learning_rate": 0.0007972933393212012, + "loss": 1.4931, + "step": 4599 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.712999165058136, + "learning_rate": 0.0007970427767001702, + "loss": 1.4698, + "step": 4600 + }, + { + "epoch": 0.48533755274261603, + "grad_norm": 1.207410454750061, + "learning_rate": 0.0007967922088077615, + "loss": 1.4907, + "step": 4601 + }, + { + "epoch": 0.4854430379746835, + "grad_norm": 0.751575767993927, + "learning_rate": 0.0007965416356720524, + "loss": 1.4617, + "step": 4602 + }, + { + "epoch": 0.48554852320675107, + "grad_norm": 1.0569156408309937, + "learning_rate": 0.000796291057321121, + "loss": 1.4064, + "step": 4603 + }, + { + "epoch": 0.48565400843881856, + "grad_norm": 0.830456018447876, + "learning_rate": 0.0007960404737830457, + "loss": 1.4393, + "step": 4604 + }, + { + "epoch": 0.48575949367088606, + "grad_norm": 1.0289990901947021, + "learning_rate": 0.0007957898850859058, + "loss": 1.4746, + "step": 4605 + }, + { + "epoch": 0.4858649789029536, + "grad_norm": 0.8027821183204651, + "learning_rate": 0.000795539291257781, + "loss": 1.4703, + "step": 4606 + }, + { + "epoch": 0.4859704641350211, + "grad_norm": 0.7156981229782104, + "learning_rate": 0.0007952886923267516, + "loss": 1.4521, + "step": 4607 + }, + { + "epoch": 0.4860759493670886, + "grad_norm": 0.884117603302002, + "learning_rate": 0.0007950380883208981, + "loss": 1.4463, + "step": 4608 + }, + { + "epoch": 0.48618143459915614, + "grad_norm": 0.6038430333137512, + "learning_rate": 0.0007947874792683025, + "loss": 1.4446, + "step": 4609 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 1.07694411277771, + "learning_rate": 0.0007945368651970464, + "loss": 1.4829, + "step": 4610 + }, + { + "epoch": 0.4863924050632911, + "grad_norm": 0.6557701230049133, + "learning_rate": 0.0007942862461352125, + "loss": 1.4151, + "step": 4611 + }, + { + "epoch": 0.48649789029535867, + "grad_norm": 0.9459784030914307, + "learning_rate": 0.0007940356221108837, + "loss": 1.4457, + "step": 4612 + }, + { + "epoch": 0.48660337552742616, + "grad_norm": 0.6461942791938782, + "learning_rate": 0.0007937849931521441, + "loss": 1.4157, + "step": 4613 + }, + { + "epoch": 0.48670886075949366, + "grad_norm": 0.9614735245704651, + "learning_rate": 0.0007935343592870778, + "loss": 1.4291, + "step": 4614 + }, + { + "epoch": 0.4868143459915612, + "grad_norm": 0.6107286810874939, + "learning_rate": 0.0007932837205437692, + "loss": 1.4714, + "step": 4615 + }, + { + "epoch": 0.4869198312236287, + "grad_norm": 0.7832928895950317, + "learning_rate": 0.000793033076950304, + "loss": 1.4574, + "step": 4616 + }, + { + "epoch": 0.4870253164556962, + "grad_norm": 0.6845430135726929, + "learning_rate": 0.0007927824285347678, + "loss": 1.4768, + "step": 4617 + }, + { + "epoch": 0.48713080168776374, + "grad_norm": 0.7401233911514282, + "learning_rate": 0.0007925317753252473, + "loss": 1.392, + "step": 4618 + }, + { + "epoch": 0.48723628691983123, + "grad_norm": 0.6253181099891663, + "learning_rate": 0.0007922811173498293, + "loss": 1.3792, + "step": 4619 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.6385374665260315, + "learning_rate": 0.0007920304546366013, + "loss": 1.4419, + "step": 4620 + }, + { + "epoch": 0.48744725738396627, + "grad_norm": 0.7050794959068298, + "learning_rate": 0.0007917797872136511, + "loss": 1.4629, + "step": 4621 + }, + { + "epoch": 0.48755274261603376, + "grad_norm": 0.6608115434646606, + "learning_rate": 0.0007915291151090676, + "loss": 1.4207, + "step": 4622 + }, + { + "epoch": 0.48765822784810126, + "grad_norm": 0.756930410861969, + "learning_rate": 0.0007912784383509396, + "loss": 1.4352, + "step": 4623 + }, + { + "epoch": 0.4877637130801688, + "grad_norm": 0.7026593685150146, + "learning_rate": 0.0007910277569673568, + "loss": 1.4415, + "step": 4624 + }, + { + "epoch": 0.4878691983122363, + "grad_norm": 0.7115082144737244, + "learning_rate": 0.000790777070986409, + "loss": 1.4472, + "step": 4625 + }, + { + "epoch": 0.4879746835443038, + "grad_norm": 0.6779862642288208, + "learning_rate": 0.0007905263804361873, + "loss": 1.4827, + "step": 4626 + }, + { + "epoch": 0.4880801687763713, + "grad_norm": 0.731431782245636, + "learning_rate": 0.0007902756853447824, + "loss": 1.4774, + "step": 4627 + }, + { + "epoch": 0.48818565400843883, + "grad_norm": 0.6436474323272705, + "learning_rate": 0.0007900249857402863, + "loss": 1.4492, + "step": 4628 + }, + { + "epoch": 0.4882911392405063, + "grad_norm": 0.768192708492279, + "learning_rate": 0.000789774281650791, + "loss": 1.4387, + "step": 4629 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6364819407463074, + "learning_rate": 0.000789523573104389, + "loss": 1.4776, + "step": 4630 + }, + { + "epoch": 0.48850210970464136, + "grad_norm": 0.7262672781944275, + "learning_rate": 0.0007892728601291737, + "loss": 1.4583, + "step": 4631 + }, + { + "epoch": 0.48860759493670886, + "grad_norm": 0.7092540264129639, + "learning_rate": 0.0007890221427532384, + "loss": 1.4463, + "step": 4632 + }, + { + "epoch": 0.48871308016877635, + "grad_norm": 0.6217727065086365, + "learning_rate": 0.0007887714210046775, + "loss": 1.451, + "step": 4633 + }, + { + "epoch": 0.4888185654008439, + "grad_norm": 0.7586886882781982, + "learning_rate": 0.0007885206949115855, + "loss": 1.4379, + "step": 4634 + }, + { + "epoch": 0.4889240506329114, + "grad_norm": 0.7153956890106201, + "learning_rate": 0.0007882699645020577, + "loss": 1.4379, + "step": 4635 + }, + { + "epoch": 0.4890295358649789, + "grad_norm": 0.7015476822853088, + "learning_rate": 0.0007880192298041893, + "loss": 1.4668, + "step": 4636 + }, + { + "epoch": 0.48913502109704643, + "grad_norm": 0.6755387187004089, + "learning_rate": 0.0007877684908460768, + "loss": 1.4498, + "step": 4637 + }, + { + "epoch": 0.4892405063291139, + "grad_norm": 0.6314448118209839, + "learning_rate": 0.0007875177476558165, + "loss": 1.4478, + "step": 4638 + }, + { + "epoch": 0.4893459915611814, + "grad_norm": 0.6837891340255737, + "learning_rate": 0.0007872670002615056, + "loss": 1.4556, + "step": 4639 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6267989873886108, + "learning_rate": 0.0007870162486912414, + "loss": 1.4777, + "step": 4640 + }, + { + "epoch": 0.48955696202531646, + "grad_norm": 0.6558220386505127, + "learning_rate": 0.0007867654929731221, + "loss": 1.4691, + "step": 4641 + }, + { + "epoch": 0.48966244725738395, + "grad_norm": 0.695475161075592, + "learning_rate": 0.0007865147331352457, + "loss": 1.4281, + "step": 4642 + }, + { + "epoch": 0.4897679324894515, + "grad_norm": 0.7044928073883057, + "learning_rate": 0.0007862639692057115, + "loss": 1.4526, + "step": 4643 + }, + { + "epoch": 0.489873417721519, + "grad_norm": 0.6320067048072815, + "learning_rate": 0.0007860132012126187, + "loss": 1.4651, + "step": 4644 + }, + { + "epoch": 0.4899789029535865, + "grad_norm": 0.7100255489349365, + "learning_rate": 0.0007857624291840672, + "loss": 1.4835, + "step": 4645 + }, + { + "epoch": 0.49008438818565403, + "grad_norm": 0.6567082405090332, + "learning_rate": 0.0007855116531481572, + "loss": 1.4476, + "step": 4646 + }, + { + "epoch": 0.4901898734177215, + "grad_norm": 0.6729353070259094, + "learning_rate": 0.0007852608731329893, + "loss": 1.4694, + "step": 4647 + }, + { + "epoch": 0.490295358649789, + "grad_norm": 0.8764384984970093, + "learning_rate": 0.0007850100891666648, + "loss": 1.4485, + "step": 4648 + }, + { + "epoch": 0.49040084388185656, + "grad_norm": 0.6909223198890686, + "learning_rate": 0.0007847593012772852, + "loss": 1.4277, + "step": 4649 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.7230454087257385, + "learning_rate": 0.0007845085094929527, + "loss": 1.4637, + "step": 4650 + }, + { + "epoch": 0.49061181434599155, + "grad_norm": 0.6933151483535767, + "learning_rate": 0.0007842577138417695, + "loss": 1.4529, + "step": 4651 + }, + { + "epoch": 0.4907172995780591, + "grad_norm": 0.7048479914665222, + "learning_rate": 0.0007840069143518386, + "loss": 1.4475, + "step": 4652 + }, + { + "epoch": 0.4908227848101266, + "grad_norm": 0.7143802046775818, + "learning_rate": 0.0007837561110512635, + "loss": 1.4213, + "step": 4653 + }, + { + "epoch": 0.4909282700421941, + "grad_norm": 0.7232340574264526, + "learning_rate": 0.0007835053039681476, + "loss": 1.4753, + "step": 4654 + }, + { + "epoch": 0.49103375527426163, + "grad_norm": 0.7377016544342041, + "learning_rate": 0.0007832544931305956, + "loss": 1.4288, + "step": 4655 + }, + { + "epoch": 0.4911392405063291, + "grad_norm": 0.8068201541900635, + "learning_rate": 0.0007830036785667116, + "loss": 1.4327, + "step": 4656 + }, + { + "epoch": 0.4912447257383966, + "grad_norm": 0.8966072797775269, + "learning_rate": 0.000782752860304601, + "loss": 1.436, + "step": 4657 + }, + { + "epoch": 0.4913502109704641, + "grad_norm": 0.6923723816871643, + "learning_rate": 0.0007825020383723692, + "loss": 1.454, + "step": 4658 + }, + { + "epoch": 0.49145569620253166, + "grad_norm": 0.867676854133606, + "learning_rate": 0.0007822512127981218, + "loss": 1.4267, + "step": 4659 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.6748430132865906, + "learning_rate": 0.0007820003836099649, + "loss": 1.451, + "step": 4660 + }, + { + "epoch": 0.49166666666666664, + "grad_norm": 0.7557504177093506, + "learning_rate": 0.0007817495508360057, + "loss": 1.4626, + "step": 4661 + }, + { + "epoch": 0.4917721518987342, + "grad_norm": 0.6797974109649658, + "learning_rate": 0.0007814987145043511, + "loss": 1.4277, + "step": 4662 + }, + { + "epoch": 0.4918776371308017, + "grad_norm": 0.6868244409561157, + "learning_rate": 0.0007812478746431085, + "loss": 1.4293, + "step": 4663 + }, + { + "epoch": 0.4919831223628692, + "grad_norm": 0.714677095413208, + "learning_rate": 0.0007809970312803855, + "loss": 1.435, + "step": 4664 + }, + { + "epoch": 0.4920886075949367, + "grad_norm": 0.6511794328689575, + "learning_rate": 0.0007807461844442906, + "loss": 1.4488, + "step": 4665 + }, + { + "epoch": 0.4921940928270042, + "grad_norm": 0.6453446745872498, + "learning_rate": 0.0007804953341629326, + "loss": 1.4312, + "step": 4666 + }, + { + "epoch": 0.4922995780590717, + "grad_norm": 0.8098984956741333, + "learning_rate": 0.0007802444804644202, + "loss": 1.4583, + "step": 4667 + }, + { + "epoch": 0.49240506329113926, + "grad_norm": 0.7868382930755615, + "learning_rate": 0.0007799936233768632, + "loss": 1.4475, + "step": 4668 + }, + { + "epoch": 0.49251054852320675, + "grad_norm": 0.7625592350959778, + "learning_rate": 0.0007797427629283708, + "loss": 1.4559, + "step": 4669 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.627423107624054, + "learning_rate": 0.0007794918991470537, + "loss": 1.4335, + "step": 4670 + }, + { + "epoch": 0.4927215189873418, + "grad_norm": 0.8609071373939514, + "learning_rate": 0.0007792410320610222, + "loss": 1.4447, + "step": 4671 + }, + { + "epoch": 0.4928270042194093, + "grad_norm": 0.6574953198432922, + "learning_rate": 0.0007789901616983872, + "loss": 1.4141, + "step": 4672 + }, + { + "epoch": 0.4929324894514768, + "grad_norm": 0.7310822606086731, + "learning_rate": 0.0007787392880872601, + "loss": 1.4517, + "step": 4673 + }, + { + "epoch": 0.4930379746835443, + "grad_norm": 0.666316032409668, + "learning_rate": 0.0007784884112557524, + "loss": 1.4812, + "step": 4674 + }, + { + "epoch": 0.4931434599156118, + "grad_norm": 0.6148443818092346, + "learning_rate": 0.0007782375312319761, + "loss": 1.4196, + "step": 4675 + }, + { + "epoch": 0.4932489451476793, + "grad_norm": 0.6681275963783264, + "learning_rate": 0.0007779866480440437, + "loss": 1.4186, + "step": 4676 + }, + { + "epoch": 0.49335443037974686, + "grad_norm": 0.6918082237243652, + "learning_rate": 0.0007777357617200679, + "loss": 1.4421, + "step": 4677 + }, + { + "epoch": 0.49345991561181435, + "grad_norm": 0.6521772146224976, + "learning_rate": 0.0007774848722881616, + "loss": 1.4258, + "step": 4678 + }, + { + "epoch": 0.49356540084388184, + "grad_norm": 0.6262601017951965, + "learning_rate": 0.0007772339797764385, + "loss": 1.4594, + "step": 4679 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.7211189866065979, + "learning_rate": 0.0007769830842130119, + "loss": 1.4122, + "step": 4680 + }, + { + "epoch": 0.4937763713080169, + "grad_norm": 0.6540771722793579, + "learning_rate": 0.0007767321856259963, + "loss": 1.4333, + "step": 4681 + }, + { + "epoch": 0.4938818565400844, + "grad_norm": 0.7322876453399658, + "learning_rate": 0.0007764812840435058, + "loss": 1.4218, + "step": 4682 + }, + { + "epoch": 0.4939873417721519, + "grad_norm": 0.6696255803108215, + "learning_rate": 0.0007762303794936556, + "loss": 1.4383, + "step": 4683 + }, + { + "epoch": 0.4940928270042194, + "grad_norm": 0.6667640805244446, + "learning_rate": 0.0007759794720045606, + "loss": 1.408, + "step": 4684 + }, + { + "epoch": 0.4941983122362869, + "grad_norm": 0.7128403782844543, + "learning_rate": 0.0007757285616043363, + "loss": 1.3935, + "step": 4685 + }, + { + "epoch": 0.49430379746835446, + "grad_norm": 0.7262982726097107, + "learning_rate": 0.0007754776483210981, + "loss": 1.4512, + "step": 4686 + }, + { + "epoch": 0.49440928270042195, + "grad_norm": 0.7090149521827698, + "learning_rate": 0.0007752267321829624, + "loss": 1.4785, + "step": 4687 + }, + { + "epoch": 0.49451476793248944, + "grad_norm": 0.6898735761642456, + "learning_rate": 0.0007749758132180459, + "loss": 1.429, + "step": 4688 + }, + { + "epoch": 0.494620253164557, + "grad_norm": 0.788206160068512, + "learning_rate": 0.0007747248914544646, + "loss": 1.4234, + "step": 4689 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.7881309986114502, + "learning_rate": 0.0007744739669203361, + "loss": 1.4806, + "step": 4690 + }, + { + "epoch": 0.494831223628692, + "grad_norm": 0.789246141910553, + "learning_rate": 0.0007742230396437775, + "loss": 1.4819, + "step": 4691 + }, + { + "epoch": 0.49493670886075947, + "grad_norm": 0.6452215909957886, + "learning_rate": 0.0007739721096529066, + "loss": 1.4726, + "step": 4692 + }, + { + "epoch": 0.495042194092827, + "grad_norm": 0.7670867443084717, + "learning_rate": 0.0007737211769758412, + "loss": 1.4461, + "step": 4693 + }, + { + "epoch": 0.4951476793248945, + "grad_norm": 0.8283689618110657, + "learning_rate": 0.0007734702416406997, + "loss": 1.4626, + "step": 4694 + }, + { + "epoch": 0.495253164556962, + "grad_norm": 0.6275841593742371, + "learning_rate": 0.0007732193036756006, + "loss": 1.4282, + "step": 4695 + }, + { + "epoch": 0.49535864978902955, + "grad_norm": 0.784328818321228, + "learning_rate": 0.0007729683631086627, + "loss": 1.4625, + "step": 4696 + }, + { + "epoch": 0.49546413502109704, + "grad_norm": 0.6514481902122498, + "learning_rate": 0.0007727174199680051, + "loss": 1.4257, + "step": 4697 + }, + { + "epoch": 0.49556962025316453, + "grad_norm": 0.6364865899085999, + "learning_rate": 0.0007724664742817475, + "loss": 1.407, + "step": 4698 + }, + { + "epoch": 0.4956751054852321, + "grad_norm": 0.739554226398468, + "learning_rate": 0.0007722155260780093, + "loss": 1.4536, + "step": 4699 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.6823841333389282, + "learning_rate": 0.0007719645753849108, + "loss": 1.4278, + "step": 4700 + }, + { + "epoch": 0.49588607594936707, + "grad_norm": 0.6666823625564575, + "learning_rate": 0.0007717136222305718, + "loss": 1.4419, + "step": 4701 + }, + { + "epoch": 0.4959915611814346, + "grad_norm": 0.6863530874252319, + "learning_rate": 0.0007714626666431134, + "loss": 1.466, + "step": 4702 + }, + { + "epoch": 0.4960970464135021, + "grad_norm": 0.7807742953300476, + "learning_rate": 0.000771211708650656, + "loss": 1.4139, + "step": 4703 + }, + { + "epoch": 0.4962025316455696, + "grad_norm": 0.7301348447799683, + "learning_rate": 0.000770960748281321, + "loss": 1.4476, + "step": 4704 + }, + { + "epoch": 0.49630801687763715, + "grad_norm": 0.6518204212188721, + "learning_rate": 0.0007707097855632297, + "loss": 1.4301, + "step": 4705 + }, + { + "epoch": 0.49641350210970464, + "grad_norm": 0.7033689022064209, + "learning_rate": 0.0007704588205245034, + "loss": 1.4359, + "step": 4706 + }, + { + "epoch": 0.49651898734177213, + "grad_norm": 0.6954427361488342, + "learning_rate": 0.0007702078531932645, + "loss": 1.4855, + "step": 4707 + }, + { + "epoch": 0.4966244725738397, + "grad_norm": 0.6124064922332764, + "learning_rate": 0.0007699568835976348, + "loss": 1.4659, + "step": 4708 + }, + { + "epoch": 0.4967299578059072, + "grad_norm": 0.6793505549430847, + "learning_rate": 0.0007697059117657368, + "loss": 1.4278, + "step": 4709 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.6587400436401367, + "learning_rate": 0.0007694549377256932, + "loss": 1.4445, + "step": 4710 + }, + { + "epoch": 0.4969409282700422, + "grad_norm": 0.6360170841217041, + "learning_rate": 0.0007692039615056264, + "loss": 1.4528, + "step": 4711 + }, + { + "epoch": 0.4970464135021097, + "grad_norm": 0.6639877557754517, + "learning_rate": 0.0007689529831336604, + "loss": 1.4296, + "step": 4712 + }, + { + "epoch": 0.4971518987341772, + "grad_norm": 0.7192813158035278, + "learning_rate": 0.0007687020026379181, + "loss": 1.4126, + "step": 4713 + }, + { + "epoch": 0.49725738396624475, + "grad_norm": 0.6507261395454407, + "learning_rate": 0.0007684510200465231, + "loss": 1.4634, + "step": 4714 + }, + { + "epoch": 0.49736286919831224, + "grad_norm": 0.8030256628990173, + "learning_rate": 0.0007682000353875992, + "loss": 1.4365, + "step": 4715 + }, + { + "epoch": 0.49746835443037973, + "grad_norm": 0.6486564874649048, + "learning_rate": 0.0007679490486892705, + "loss": 1.4224, + "step": 4716 + }, + { + "epoch": 0.4975738396624473, + "grad_norm": 0.9625636339187622, + "learning_rate": 0.0007676980599796616, + "loss": 1.4296, + "step": 4717 + }, + { + "epoch": 0.4976793248945148, + "grad_norm": 0.6274600625038147, + "learning_rate": 0.0007674470692868967, + "loss": 1.4912, + "step": 4718 + }, + { + "epoch": 0.49778481012658227, + "grad_norm": 1.0536361932754517, + "learning_rate": 0.0007671960766391008, + "loss": 1.4583, + "step": 4719 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.68775475025177, + "learning_rate": 0.0007669450820643987, + "loss": 1.4209, + "step": 4720 + }, + { + "epoch": 0.4979957805907173, + "grad_norm": 0.9676712155342102, + "learning_rate": 0.0007666940855909155, + "loss": 1.4391, + "step": 4721 + }, + { + "epoch": 0.4981012658227848, + "grad_norm": 0.7012069821357727, + "learning_rate": 0.000766443087246777, + "loss": 1.4508, + "step": 4722 + }, + { + "epoch": 0.49820675105485235, + "grad_norm": 0.9534497857093811, + "learning_rate": 0.0007661920870601085, + "loss": 1.4549, + "step": 4723 + }, + { + "epoch": 0.49831223628691984, + "grad_norm": 0.7120623588562012, + "learning_rate": 0.000765941085059036, + "loss": 1.4541, + "step": 4724 + }, + { + "epoch": 0.49841772151898733, + "grad_norm": 0.8793537616729736, + "learning_rate": 0.0007656900812716853, + "loss": 1.444, + "step": 4725 + }, + { + "epoch": 0.4985232067510548, + "grad_norm": 0.7507222294807434, + "learning_rate": 0.0007654390757261827, + "loss": 1.4369, + "step": 4726 + }, + { + "epoch": 0.4986286919831224, + "grad_norm": 0.6623038649559021, + "learning_rate": 0.0007651880684506548, + "loss": 1.4058, + "step": 4727 + }, + { + "epoch": 0.49873417721518987, + "grad_norm": 0.70856773853302, + "learning_rate": 0.0007649370594732282, + "loss": 1.4577, + "step": 4728 + }, + { + "epoch": 0.49883966244725736, + "grad_norm": 0.785868763923645, + "learning_rate": 0.0007646860488220293, + "loss": 1.4202, + "step": 4729 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.6242792010307312, + "learning_rate": 0.0007644350365251855, + "loss": 1.4236, + "step": 4730 + }, + { + "epoch": 0.4990506329113924, + "grad_norm": 0.7455309629440308, + "learning_rate": 0.0007641840226108241, + "loss": 1.4567, + "step": 4731 + }, + { + "epoch": 0.4991561181434599, + "grad_norm": 0.6475867033004761, + "learning_rate": 0.000763933007107072, + "loss": 1.422, + "step": 4732 + }, + { + "epoch": 0.49926160337552744, + "grad_norm": 0.717263400554657, + "learning_rate": 0.0007636819900420572, + "loss": 1.4507, + "step": 4733 + }, + { + "epoch": 0.49936708860759493, + "grad_norm": 0.6025341153144836, + "learning_rate": 0.0007634309714439069, + "loss": 1.4439, + "step": 4734 + }, + { + "epoch": 0.4994725738396624, + "grad_norm": 0.7837526798248291, + "learning_rate": 0.0007631799513407495, + "loss": 1.4609, + "step": 4735 + }, + { + "epoch": 0.49957805907173, + "grad_norm": 0.6544118523597717, + "learning_rate": 0.0007629289297607127, + "loss": 1.444, + "step": 4736 + }, + { + "epoch": 0.49968354430379747, + "grad_norm": 0.6952338814735413, + "learning_rate": 0.0007626779067319251, + "loss": 1.4137, + "step": 4737 + }, + { + "epoch": 0.49978902953586496, + "grad_norm": 0.6348744034767151, + "learning_rate": 0.0007624268822825145, + "loss": 1.3963, + "step": 4738 + }, + { + "epoch": 0.4998945147679325, + "grad_norm": 0.6868152618408203, + "learning_rate": 0.00076217585644061, + "loss": 1.4428, + "step": 4739 + }, + { + "epoch": 0.5, + "grad_norm": 0.7266190648078918, + "learning_rate": 0.0007619248292343399, + "loss": 1.4426, + "step": 4740 + }, + { + "epoch": 0.5001054852320675, + "grad_norm": 0.7141066789627075, + "learning_rate": 0.0007616738006918334, + "loss": 1.4652, + "step": 4741 + }, + { + "epoch": 0.500210970464135, + "grad_norm": 0.6656322479248047, + "learning_rate": 0.0007614227708412191, + "loss": 1.4336, + "step": 4742 + }, + { + "epoch": 0.5003164556962025, + "grad_norm": 0.6831923127174377, + "learning_rate": 0.0007611717397106265, + "loss": 1.4245, + "step": 4743 + }, + { + "epoch": 0.5004219409282701, + "grad_norm": 0.6578983664512634, + "learning_rate": 0.0007609207073281848, + "loss": 1.4927, + "step": 4744 + }, + { + "epoch": 0.5005274261603375, + "grad_norm": 0.6143165826797485, + "learning_rate": 0.0007606696737220233, + "loss": 1.4286, + "step": 4745 + }, + { + "epoch": 0.5006329113924051, + "grad_norm": 0.6407020092010498, + "learning_rate": 0.000760418638920272, + "loss": 1.4589, + "step": 4746 + }, + { + "epoch": 0.5007383966244726, + "grad_norm": 0.7006335854530334, + "learning_rate": 0.0007601676029510597, + "loss": 1.4636, + "step": 4747 + }, + { + "epoch": 0.50084388185654, + "grad_norm": 0.6836245656013489, + "learning_rate": 0.000759916565842517, + "loss": 1.4702, + "step": 4748 + }, + { + "epoch": 0.5009493670886076, + "grad_norm": 0.7238335013389587, + "learning_rate": 0.0007596655276227739, + "loss": 1.4664, + "step": 4749 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.7280544638633728, + "learning_rate": 0.0007594144883199599, + "loss": 1.441, + "step": 4750 + }, + { + "epoch": 0.5011603375527426, + "grad_norm": 0.6753602623939514, + "learning_rate": 0.0007591634479622056, + "loss": 1.4649, + "step": 4751 + }, + { + "epoch": 0.5012658227848101, + "grad_norm": 0.6621561646461487, + "learning_rate": 0.0007589124065776414, + "loss": 1.4493, + "step": 4752 + }, + { + "epoch": 0.5013713080168777, + "grad_norm": 0.6835507750511169, + "learning_rate": 0.0007586613641943976, + "loss": 1.4598, + "step": 4753 + }, + { + "epoch": 0.5014767932489451, + "grad_norm": 0.6896875500679016, + "learning_rate": 0.0007584103208406048, + "loss": 1.4481, + "step": 4754 + }, + { + "epoch": 0.5015822784810127, + "grad_norm": 0.724409282207489, + "learning_rate": 0.0007581592765443933, + "loss": 1.4287, + "step": 4755 + }, + { + "epoch": 0.5016877637130802, + "grad_norm": 0.6492192149162292, + "learning_rate": 0.0007579082313338943, + "loss": 1.3993, + "step": 4756 + }, + { + "epoch": 0.5017932489451477, + "grad_norm": 0.7036164402961731, + "learning_rate": 0.0007576571852372386, + "loss": 1.45, + "step": 4757 + }, + { + "epoch": 0.5018987341772152, + "grad_norm": 0.6914783716201782, + "learning_rate": 0.0007574061382825572, + "loss": 1.425, + "step": 4758 + }, + { + "epoch": 0.5020042194092827, + "grad_norm": 0.7085438966751099, + "learning_rate": 0.0007571550904979812, + "loss": 1.4247, + "step": 4759 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.6534363031387329, + "learning_rate": 0.0007569040419116413, + "loss": 1.458, + "step": 4760 + }, + { + "epoch": 0.5022151898734177, + "grad_norm": 0.7585707306861877, + "learning_rate": 0.0007566529925516692, + "loss": 1.4461, + "step": 4761 + }, + { + "epoch": 0.5023206751054853, + "grad_norm": 0.7627110481262207, + "learning_rate": 0.0007564019424461962, + "loss": 1.4685, + "step": 4762 + }, + { + "epoch": 0.5024261603375527, + "grad_norm": 0.6471465826034546, + "learning_rate": 0.0007561508916233535, + "loss": 1.4239, + "step": 4763 + }, + { + "epoch": 0.5025316455696203, + "grad_norm": 0.7201604843139648, + "learning_rate": 0.0007558998401112727, + "loss": 1.4514, + "step": 4764 + }, + { + "epoch": 0.5026371308016878, + "grad_norm": 0.645136833190918, + "learning_rate": 0.0007556487879380856, + "loss": 1.4239, + "step": 4765 + }, + { + "epoch": 0.5027426160337553, + "grad_norm": 0.6952351927757263, + "learning_rate": 0.0007553977351319235, + "loss": 1.4003, + "step": 4766 + }, + { + "epoch": 0.5028481012658228, + "grad_norm": 0.6923434138298035, + "learning_rate": 0.0007551466817209183, + "loss": 1.446, + "step": 4767 + }, + { + "epoch": 0.5029535864978903, + "grad_norm": 0.790995717048645, + "learning_rate": 0.0007548956277332016, + "loss": 1.4242, + "step": 4768 + }, + { + "epoch": 0.5030590717299578, + "grad_norm": 0.6233625411987305, + "learning_rate": 0.0007546445731969056, + "loss": 1.4659, + "step": 4769 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.6332467794418335, + "learning_rate": 0.000754393518140162, + "loss": 1.438, + "step": 4770 + }, + { + "epoch": 0.5032700421940929, + "grad_norm": 0.7133588790893555, + "learning_rate": 0.0007541424625911026, + "loss": 1.4323, + "step": 4771 + }, + { + "epoch": 0.5033755274261603, + "grad_norm": 0.6501117944717407, + "learning_rate": 0.0007538914065778598, + "loss": 1.5048, + "step": 4772 + }, + { + "epoch": 0.5034810126582279, + "grad_norm": 0.7915835976600647, + "learning_rate": 0.0007536403501285653, + "loss": 1.4421, + "step": 4773 + }, + { + "epoch": 0.5035864978902953, + "grad_norm": 0.9016662836074829, + "learning_rate": 0.0007533892932713517, + "loss": 1.4219, + "step": 4774 + }, + { + "epoch": 0.5036919831223629, + "grad_norm": 0.6331968903541565, + "learning_rate": 0.0007531382360343507, + "loss": 1.4719, + "step": 4775 + }, + { + "epoch": 0.5037974683544304, + "grad_norm": 1.0076897144317627, + "learning_rate": 0.0007528871784456948, + "loss": 1.4604, + "step": 4776 + }, + { + "epoch": 0.5039029535864978, + "grad_norm": 0.6445798873901367, + "learning_rate": 0.0007526361205335159, + "loss": 1.4277, + "step": 4777 + }, + { + "epoch": 0.5040084388185654, + "grad_norm": 0.7114953398704529, + "learning_rate": 0.0007523850623259469, + "loss": 1.472, + "step": 4778 + }, + { + "epoch": 0.5041139240506329, + "grad_norm": 0.6865072846412659, + "learning_rate": 0.0007521340038511196, + "loss": 1.4288, + "step": 4779 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.7732363939285278, + "learning_rate": 0.0007518829451371665, + "loss": 1.4285, + "step": 4780 + }, + { + "epoch": 0.5043248945147679, + "grad_norm": 0.6842886805534363, + "learning_rate": 0.0007516318862122199, + "loss": 1.4048, + "step": 4781 + }, + { + "epoch": 0.5044303797468355, + "grad_norm": 0.7175118923187256, + "learning_rate": 0.0007513808271044125, + "loss": 1.4617, + "step": 4782 + }, + { + "epoch": 0.5045358649789029, + "grad_norm": 0.6305547952651978, + "learning_rate": 0.0007511297678418766, + "loss": 1.4435, + "step": 4783 + }, + { + "epoch": 0.5046413502109705, + "grad_norm": 0.6324455142021179, + "learning_rate": 0.0007508787084527445, + "loss": 1.4142, + "step": 4784 + }, + { + "epoch": 0.504746835443038, + "grad_norm": 0.9423410892486572, + "learning_rate": 0.0007506276489651489, + "loss": 1.4413, + "step": 4785 + }, + { + "epoch": 0.5048523206751054, + "grad_norm": 0.6533330082893372, + "learning_rate": 0.0007503765894072217, + "loss": 1.4108, + "step": 4786 + }, + { + "epoch": 0.504957805907173, + "grad_norm": 0.8322232961654663, + "learning_rate": 0.000750125529807096, + "loss": 1.4258, + "step": 4787 + }, + { + "epoch": 0.5050632911392405, + "grad_norm": 0.6117909550666809, + "learning_rate": 0.0007498744701929041, + "loss": 1.4313, + "step": 4788 + }, + { + "epoch": 0.505168776371308, + "grad_norm": 1.0550847053527832, + "learning_rate": 0.0007496234105927785, + "loss": 1.3912, + "step": 4789 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.7511658072471619, + "learning_rate": 0.0007493723510348516, + "loss": 1.477, + "step": 4790 + }, + { + "epoch": 0.5053797468354431, + "grad_norm": 0.776477038860321, + "learning_rate": 0.0007491212915472557, + "loss": 1.4686, + "step": 4791 + }, + { + "epoch": 0.5054852320675105, + "grad_norm": 0.7452101111412048, + "learning_rate": 0.0007488702321581234, + "loss": 1.3946, + "step": 4792 + }, + { + "epoch": 0.505590717299578, + "grad_norm": 0.693227231502533, + "learning_rate": 0.0007486191728955873, + "loss": 1.4608, + "step": 4793 + }, + { + "epoch": 0.5056962025316456, + "grad_norm": 0.7088339328765869, + "learning_rate": 0.00074836811378778, + "loss": 1.5032, + "step": 4794 + }, + { + "epoch": 0.505801687763713, + "grad_norm": 0.6999925971031189, + "learning_rate": 0.0007481170548628335, + "loss": 1.4321, + "step": 4795 + }, + { + "epoch": 0.5059071729957806, + "grad_norm": 0.7201201319694519, + "learning_rate": 0.0007478659961488805, + "loss": 1.432, + "step": 4796 + }, + { + "epoch": 0.5060126582278481, + "grad_norm": 0.6293515563011169, + "learning_rate": 0.0007476149376740533, + "loss": 1.423, + "step": 4797 + }, + { + "epoch": 0.5061181434599156, + "grad_norm": 0.6772581338882446, + "learning_rate": 0.0007473638794664841, + "loss": 1.3917, + "step": 4798 + }, + { + "epoch": 0.5062236286919831, + "grad_norm": 0.7293186187744141, + "learning_rate": 0.0007471128215543056, + "loss": 1.4349, + "step": 4799 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.685484766960144, + "learning_rate": 0.0007468617639656496, + "loss": 1.4178, + "step": 4800 + }, + { + "epoch": 0.5064345991561181, + "grad_norm": 0.6250488758087158, + "learning_rate": 0.0007466107067286483, + "loss": 1.4132, + "step": 4801 + }, + { + "epoch": 0.5065400843881857, + "grad_norm": 0.6665006279945374, + "learning_rate": 0.0007463596498714346, + "loss": 1.4478, + "step": 4802 + }, + { + "epoch": 0.5066455696202532, + "grad_norm": 0.6086276173591614, + "learning_rate": 0.0007461085934221402, + "loss": 1.4339, + "step": 4803 + }, + { + "epoch": 0.5067510548523206, + "grad_norm": 0.6238981485366821, + "learning_rate": 0.0007458575374088974, + "loss": 1.4159, + "step": 4804 + }, + { + "epoch": 0.5068565400843882, + "grad_norm": 0.6258466839790344, + "learning_rate": 0.0007456064818598382, + "loss": 1.4318, + "step": 4805 + }, + { + "epoch": 0.5069620253164557, + "grad_norm": 0.628930389881134, + "learning_rate": 0.0007453554268030946, + "loss": 1.4324, + "step": 4806 + }, + { + "epoch": 0.5070675105485232, + "grad_norm": 0.6555208563804626, + "learning_rate": 0.0007451043722667985, + "loss": 1.4671, + "step": 4807 + }, + { + "epoch": 0.5071729957805907, + "grad_norm": 0.6173492670059204, + "learning_rate": 0.000744853318279082, + "loss": 1.4451, + "step": 4808 + }, + { + "epoch": 0.5072784810126583, + "grad_norm": 0.6434482336044312, + "learning_rate": 0.0007446022648680768, + "loss": 1.4379, + "step": 4809 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.5982303619384766, + "learning_rate": 0.0007443512120619144, + "loss": 1.4381, + "step": 4810 + }, + { + "epoch": 0.5074894514767933, + "grad_norm": 0.6776828765869141, + "learning_rate": 0.0007441001598887273, + "loss": 1.4365, + "step": 4811 + }, + { + "epoch": 0.5075949367088608, + "grad_norm": 0.6348402500152588, + "learning_rate": 0.0007438491083766465, + "loss": 1.443, + "step": 4812 + }, + { + "epoch": 0.5077004219409282, + "grad_norm": 0.7040905356407166, + "learning_rate": 0.000743598057553804, + "loss": 1.4913, + "step": 4813 + }, + { + "epoch": 0.5078059071729958, + "grad_norm": 0.6953840255737305, + "learning_rate": 0.0007433470074483309, + "loss": 1.4711, + "step": 4814 + }, + { + "epoch": 0.5079113924050633, + "grad_norm": 0.7494120001792908, + "learning_rate": 0.0007430959580883589, + "loss": 1.4542, + "step": 4815 + }, + { + "epoch": 0.5080168776371308, + "grad_norm": 0.6837529540061951, + "learning_rate": 0.0007428449095020192, + "loss": 1.4195, + "step": 4816 + }, + { + "epoch": 0.5081223628691983, + "grad_norm": 0.6790048480033875, + "learning_rate": 0.000742593861717443, + "loss": 1.4374, + "step": 4817 + }, + { + "epoch": 0.5082278481012659, + "grad_norm": 0.7283616065979004, + "learning_rate": 0.0007423428147627613, + "loss": 1.5055, + "step": 4818 + }, + { + "epoch": 0.5083333333333333, + "grad_norm": 0.7159276604652405, + "learning_rate": 0.0007420917686661055, + "loss": 1.476, + "step": 4819 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.7504062652587891, + "learning_rate": 0.0007418407234556067, + "loss": 1.4369, + "step": 4820 + }, + { + "epoch": 0.5085443037974684, + "grad_norm": 0.7060930728912354, + "learning_rate": 0.0007415896791593955, + "loss": 1.4638, + "step": 4821 + }, + { + "epoch": 0.5086497890295358, + "grad_norm": 0.6270065307617188, + "learning_rate": 0.0007413386358056025, + "loss": 1.4252, + "step": 4822 + }, + { + "epoch": 0.5087552742616034, + "grad_norm": 0.7106403708457947, + "learning_rate": 0.0007410875934223588, + "loss": 1.4497, + "step": 4823 + }, + { + "epoch": 0.5088607594936709, + "grad_norm": 0.7497876286506653, + "learning_rate": 0.0007408365520377945, + "loss": 1.4532, + "step": 4824 + }, + { + "epoch": 0.5089662447257384, + "grad_norm": 0.6682280898094177, + "learning_rate": 0.0007405855116800403, + "loss": 1.4461, + "step": 4825 + }, + { + "epoch": 0.5090717299578059, + "grad_norm": 0.8362265229225159, + "learning_rate": 0.0007403344723772265, + "loss": 1.4051, + "step": 4826 + }, + { + "epoch": 0.5091772151898735, + "grad_norm": 0.6589981913566589, + "learning_rate": 0.0007400834341574829, + "loss": 1.4144, + "step": 4827 + }, + { + "epoch": 0.5092827004219409, + "grad_norm": 0.7003146409988403, + "learning_rate": 0.0007398323970489402, + "loss": 1.4422, + "step": 4828 + }, + { + "epoch": 0.5093881856540085, + "grad_norm": 0.680439293384552, + "learning_rate": 0.0007395813610797283, + "loss": 1.4281, + "step": 4829 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.6454216241836548, + "learning_rate": 0.0007393303262779767, + "loss": 1.4259, + "step": 4830 + }, + { + "epoch": 0.5095991561181434, + "grad_norm": 0.662148654460907, + "learning_rate": 0.0007390792926718153, + "loss": 1.4384, + "step": 4831 + }, + { + "epoch": 0.509704641350211, + "grad_norm": 0.8614054918289185, + "learning_rate": 0.0007388282602893737, + "loss": 1.4463, + "step": 4832 + }, + { + "epoch": 0.5098101265822785, + "grad_norm": 0.6622906923294067, + "learning_rate": 0.000738577229158781, + "loss": 1.3898, + "step": 4833 + }, + { + "epoch": 0.509915611814346, + "grad_norm": 1.0503284931182861, + "learning_rate": 0.000738326199308167, + "loss": 1.4325, + "step": 4834 + }, + { + "epoch": 0.5100210970464135, + "grad_norm": 0.6655111312866211, + "learning_rate": 0.0007380751707656603, + "loss": 1.4323, + "step": 4835 + }, + { + "epoch": 0.5101265822784811, + "grad_norm": 0.9565269351005554, + "learning_rate": 0.0007378241435593901, + "loss": 1.407, + "step": 4836 + }, + { + "epoch": 0.5102320675105485, + "grad_norm": 0.6473543047904968, + "learning_rate": 0.0007375731177174855, + "loss": 1.4026, + "step": 4837 + }, + { + "epoch": 0.510337552742616, + "grad_norm": 0.9070817828178406, + "learning_rate": 0.0007373220932680751, + "loss": 1.4519, + "step": 4838 + }, + { + "epoch": 0.5104430379746835, + "grad_norm": 0.6369042992591858, + "learning_rate": 0.0007370710702392873, + "loss": 1.4291, + "step": 4839 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 1.0190834999084473, + "learning_rate": 0.0007368200486592507, + "loss": 1.4743, + "step": 4840 + }, + { + "epoch": 0.5106540084388186, + "grad_norm": 0.6181876063346863, + "learning_rate": 0.0007365690285560932, + "loss": 1.4243, + "step": 4841 + }, + { + "epoch": 0.510759493670886, + "grad_norm": 0.9358966946601868, + "learning_rate": 0.0007363180099579431, + "loss": 1.4427, + "step": 4842 + }, + { + "epoch": 0.5108649789029536, + "grad_norm": 0.6570923328399658, + "learning_rate": 0.0007360669928929282, + "loss": 1.4379, + "step": 4843 + }, + { + "epoch": 0.5109704641350211, + "grad_norm": 0.8322780728340149, + "learning_rate": 0.000735815977389176, + "loss": 1.4478, + "step": 4844 + }, + { + "epoch": 0.5110759493670886, + "grad_norm": 0.7465978264808655, + "learning_rate": 0.0007355649634748143, + "loss": 1.433, + "step": 4845 + }, + { + "epoch": 0.5111814345991561, + "grad_norm": 0.8653232455253601, + "learning_rate": 0.0007353139511779707, + "loss": 1.4597, + "step": 4846 + }, + { + "epoch": 0.5112869198312237, + "grad_norm": 0.6477535367012024, + "learning_rate": 0.000735062940526772, + "loss": 1.4272, + "step": 4847 + }, + { + "epoch": 0.5113924050632911, + "grad_norm": 0.8637309074401855, + "learning_rate": 0.0007348119315493453, + "loss": 1.4589, + "step": 4848 + }, + { + "epoch": 0.5114978902953586, + "grad_norm": 0.6135000586509705, + "learning_rate": 0.0007345609242738173, + "loss": 1.4341, + "step": 4849 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.7851626873016357, + "learning_rate": 0.0007343099187283149, + "loss": 1.4536, + "step": 4850 + }, + { + "epoch": 0.5117088607594936, + "grad_norm": 0.6907130479812622, + "learning_rate": 0.0007340589149409644, + "loss": 1.4795, + "step": 4851 + }, + { + "epoch": 0.5118143459915612, + "grad_norm": 0.7220522165298462, + "learning_rate": 0.0007338079129398917, + "loss": 1.4674, + "step": 4852 + }, + { + "epoch": 0.5119198312236287, + "grad_norm": 0.6295617818832397, + "learning_rate": 0.0007335569127532231, + "loss": 1.4005, + "step": 4853 + }, + { + "epoch": 0.5120253164556962, + "grad_norm": 0.6740602850914001, + "learning_rate": 0.0007333059144090845, + "loss": 1.4737, + "step": 4854 + }, + { + "epoch": 0.5121308016877637, + "grad_norm": 0.6527765989303589, + "learning_rate": 0.0007330549179356014, + "loss": 1.4239, + "step": 4855 + }, + { + "epoch": 0.5122362869198313, + "grad_norm": 0.6640922427177429, + "learning_rate": 0.0007328039233608993, + "loss": 1.4126, + "step": 4856 + }, + { + "epoch": 0.5123417721518987, + "grad_norm": 0.6783446073532104, + "learning_rate": 0.0007325529307131034, + "loss": 1.4141, + "step": 4857 + }, + { + "epoch": 0.5124472573839662, + "grad_norm": 0.7039244174957275, + "learning_rate": 0.0007323019400203386, + "loss": 1.4053, + "step": 4858 + }, + { + "epoch": 0.5125527426160338, + "grad_norm": 0.7553814053535461, + "learning_rate": 0.0007320509513107296, + "loss": 1.3962, + "step": 4859 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.6746437549591064, + "learning_rate": 0.0007317999646124011, + "loss": 1.43, + "step": 4860 + }, + { + "epoch": 0.5127637130801688, + "grad_norm": 0.7457387447357178, + "learning_rate": 0.0007315489799534772, + "loss": 1.4244, + "step": 4861 + }, + { + "epoch": 0.5128691983122363, + "grad_norm": 0.6175376772880554, + "learning_rate": 0.000731297997362082, + "loss": 1.4086, + "step": 4862 + }, + { + "epoch": 0.5129746835443038, + "grad_norm": 0.8848392367362976, + "learning_rate": 0.0007310470168663397, + "loss": 1.4495, + "step": 4863 + }, + { + "epoch": 0.5130801687763713, + "grad_norm": 0.6452799439430237, + "learning_rate": 0.0007307960384943736, + "loss": 1.4501, + "step": 4864 + }, + { + "epoch": 0.5131856540084389, + "grad_norm": 0.8093212246894836, + "learning_rate": 0.000730545062274307, + "loss": 1.4131, + "step": 4865 + }, + { + "epoch": 0.5132911392405063, + "grad_norm": 0.6632729172706604, + "learning_rate": 0.0007302940882342634, + "loss": 1.4315, + "step": 4866 + }, + { + "epoch": 0.5133966244725738, + "grad_norm": 0.6545631885528564, + "learning_rate": 0.0007300431164023653, + "loss": 1.3909, + "step": 4867 + }, + { + "epoch": 0.5135021097046414, + "grad_norm": 0.6564692258834839, + "learning_rate": 0.0007297921468067357, + "loss": 1.429, + "step": 4868 + }, + { + "epoch": 0.5136075949367088, + "grad_norm": 0.6511498689651489, + "learning_rate": 0.0007295411794754967, + "loss": 1.4186, + "step": 4869 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.7058131694793701, + "learning_rate": 0.0007292902144367704, + "loss": 1.43, + "step": 4870 + }, + { + "epoch": 0.5138185654008439, + "grad_norm": 0.6120452284812927, + "learning_rate": 0.0007290392517186791, + "loss": 1.4616, + "step": 4871 + }, + { + "epoch": 0.5139240506329114, + "grad_norm": 0.7322299480438232, + "learning_rate": 0.000728788291349344, + "loss": 1.414, + "step": 4872 + }, + { + "epoch": 0.5140295358649789, + "grad_norm": 0.7945569753646851, + "learning_rate": 0.0007285373333568868, + "loss": 1.4869, + "step": 4873 + }, + { + "epoch": 0.5141350210970465, + "grad_norm": 1.1236909627914429, + "learning_rate": 0.0007282863777694283, + "loss": 1.4494, + "step": 4874 + }, + { + "epoch": 0.5142405063291139, + "grad_norm": 0.7848008275032043, + "learning_rate": 0.0007280354246150894, + "loss": 1.4064, + "step": 4875 + }, + { + "epoch": 0.5143459915611814, + "grad_norm": 0.9836548566818237, + "learning_rate": 0.0007277844739219908, + "loss": 1.4598, + "step": 4876 + }, + { + "epoch": 0.514451476793249, + "grad_norm": 0.850024402141571, + "learning_rate": 0.0007275335257182526, + "loss": 1.4408, + "step": 4877 + }, + { + "epoch": 0.5145569620253164, + "grad_norm": 1.1619549989700317, + "learning_rate": 0.000727282580031995, + "loss": 1.4753, + "step": 4878 + }, + { + "epoch": 0.514662447257384, + "grad_norm": 0.7512984871864319, + "learning_rate": 0.0007270316368913374, + "loss": 1.4587, + "step": 4879 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.8054240345954895, + "learning_rate": 0.0007267806963243995, + "loss": 1.4115, + "step": 4880 + }, + { + "epoch": 0.514873417721519, + "grad_norm": 0.782609224319458, + "learning_rate": 0.0007265297583593003, + "loss": 1.4558, + "step": 4881 + }, + { + "epoch": 0.5149789029535865, + "grad_norm": 0.8109288215637207, + "learning_rate": 0.0007262788230241588, + "loss": 1.4473, + "step": 4882 + }, + { + "epoch": 0.515084388185654, + "grad_norm": 0.7142608165740967, + "learning_rate": 0.0007260278903470935, + "loss": 1.4507, + "step": 4883 + }, + { + "epoch": 0.5151898734177215, + "grad_norm": 0.754327118396759, + "learning_rate": 0.0007257769603562227, + "loss": 1.4378, + "step": 4884 + }, + { + "epoch": 0.515295358649789, + "grad_norm": 0.6960189342498779, + "learning_rate": 0.0007255260330796639, + "loss": 1.4111, + "step": 4885 + }, + { + "epoch": 0.5154008438818566, + "grad_norm": 0.7513467669487, + "learning_rate": 0.0007252751085455355, + "loss": 1.4503, + "step": 4886 + }, + { + "epoch": 0.515506329113924, + "grad_norm": 0.6477515697479248, + "learning_rate": 0.0007250241867819544, + "loss": 1.4287, + "step": 4887 + }, + { + "epoch": 0.5156118143459916, + "grad_norm": 0.7504149079322815, + "learning_rate": 0.0007247732678170375, + "loss": 1.4642, + "step": 4888 + }, + { + "epoch": 0.5157172995780591, + "grad_norm": 0.7198253273963928, + "learning_rate": 0.0007245223516789019, + "loss": 1.4089, + "step": 4889 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.8024303317070007, + "learning_rate": 0.0007242714383956639, + "loss": 1.4065, + "step": 4890 + }, + { + "epoch": 0.5159282700421941, + "grad_norm": 0.6576610207557678, + "learning_rate": 0.0007240205279954395, + "loss": 1.3885, + "step": 4891 + }, + { + "epoch": 0.5160337552742617, + "grad_norm": 0.7639855146408081, + "learning_rate": 0.0007237696205063444, + "loss": 1.4656, + "step": 4892 + }, + { + "epoch": 0.5161392405063291, + "grad_norm": 0.6555715203285217, + "learning_rate": 0.0007235187159564942, + "loss": 1.4386, + "step": 4893 + }, + { + "epoch": 0.5162447257383966, + "grad_norm": 0.6569919586181641, + "learning_rate": 0.0007232678143740038, + "loss": 1.461, + "step": 4894 + }, + { + "epoch": 0.5163502109704642, + "grad_norm": 0.7481251955032349, + "learning_rate": 0.0007230169157869882, + "loss": 1.4098, + "step": 4895 + }, + { + "epoch": 0.5164556962025316, + "grad_norm": 0.6537695527076721, + "learning_rate": 0.0007227660202235616, + "loss": 1.4763, + "step": 4896 + }, + { + "epoch": 0.5165611814345992, + "grad_norm": 0.818164587020874, + "learning_rate": 0.0007225151277118384, + "loss": 1.4352, + "step": 4897 + }, + { + "epoch": 0.5166666666666667, + "grad_norm": 0.6493583917617798, + "learning_rate": 0.0007222642382799322, + "loss": 1.4717, + "step": 4898 + }, + { + "epoch": 0.5167721518987342, + "grad_norm": 0.7432751655578613, + "learning_rate": 0.0007220133519559563, + "loss": 1.4281, + "step": 4899 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.6702024340629578, + "learning_rate": 0.000721762468768024, + "loss": 1.4458, + "step": 4900 + }, + { + "epoch": 0.5169831223628693, + "grad_norm": 0.8400494456291199, + "learning_rate": 0.0007215115887442478, + "loss": 1.4608, + "step": 4901 + }, + { + "epoch": 0.5170886075949367, + "grad_norm": 0.6512711644172668, + "learning_rate": 0.0007212607119127402, + "loss": 1.407, + "step": 4902 + }, + { + "epoch": 0.5171940928270042, + "grad_norm": 0.8363732099533081, + "learning_rate": 0.000721009838301613, + "loss": 1.4254, + "step": 4903 + }, + { + "epoch": 0.5172995780590718, + "grad_norm": 0.6471530199050903, + "learning_rate": 0.000720758967938978, + "loss": 1.446, + "step": 4904 + }, + { + "epoch": 0.5174050632911392, + "grad_norm": 0.6735434532165527, + "learning_rate": 0.0007205081008529463, + "loss": 1.4553, + "step": 4905 + }, + { + "epoch": 0.5175105485232068, + "grad_norm": 0.8591793775558472, + "learning_rate": 0.0007202572370716292, + "loss": 1.4253, + "step": 4906 + }, + { + "epoch": 0.5176160337552742, + "grad_norm": 0.7253069281578064, + "learning_rate": 0.000720006376623137, + "loss": 1.3749, + "step": 4907 + }, + { + "epoch": 0.5177215189873418, + "grad_norm": 0.8946760296821594, + "learning_rate": 0.0007197555195355799, + "loss": 1.4577, + "step": 4908 + }, + { + "epoch": 0.5178270042194093, + "grad_norm": 0.7854043841362, + "learning_rate": 0.0007195046658370675, + "loss": 1.4132, + "step": 4909 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.8230656385421753, + "learning_rate": 0.0007192538155557094, + "loss": 1.4885, + "step": 4910 + }, + { + "epoch": 0.5180379746835443, + "grad_norm": 0.731343150138855, + "learning_rate": 0.0007190029687196148, + "loss": 1.4269, + "step": 4911 + }, + { + "epoch": 0.5181434599156118, + "grad_norm": 0.7260914444923401, + "learning_rate": 0.0007187521253568919, + "loss": 1.4138, + "step": 4912 + }, + { + "epoch": 0.5182489451476793, + "grad_norm": 0.7797759175300598, + "learning_rate": 0.0007185012854956491, + "loss": 1.4069, + "step": 4913 + }, + { + "epoch": 0.5183544303797468, + "grad_norm": 0.626055896282196, + "learning_rate": 0.0007182504491639942, + "loss": 1.3981, + "step": 4914 + }, + { + "epoch": 0.5184599156118144, + "grad_norm": 0.6731904149055481, + "learning_rate": 0.000717999616390035, + "loss": 1.4229, + "step": 4915 + }, + { + "epoch": 0.5185654008438818, + "grad_norm": 0.6211963295936584, + "learning_rate": 0.0007177487872018784, + "loss": 1.4454, + "step": 4916 + }, + { + "epoch": 0.5186708860759494, + "grad_norm": 0.6207939386367798, + "learning_rate": 0.000717497961627631, + "loss": 1.4454, + "step": 4917 + }, + { + "epoch": 0.5187763713080169, + "grad_norm": 0.6692730188369751, + "learning_rate": 0.0007172471396953991, + "loss": 1.412, + "step": 4918 + }, + { + "epoch": 0.5188818565400843, + "grad_norm": 0.6903280019760132, + "learning_rate": 0.0007169963214332885, + "loss": 1.4146, + "step": 4919 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.7652410268783569, + "learning_rate": 0.0007167455068694046, + "loss": 1.4542, + "step": 4920 + }, + { + "epoch": 0.5190928270042194, + "grad_norm": 0.6682150959968567, + "learning_rate": 0.0007164946960318525, + "loss": 1.4575, + "step": 4921 + }, + { + "epoch": 0.5191983122362869, + "grad_norm": 0.6555982828140259, + "learning_rate": 0.0007162438889487365, + "loss": 1.4741, + "step": 4922 + }, + { + "epoch": 0.5193037974683544, + "grad_norm": 0.5860636830329895, + "learning_rate": 0.0007159930856481614, + "loss": 1.4442, + "step": 4923 + }, + { + "epoch": 0.519409282700422, + "grad_norm": 0.6561984419822693, + "learning_rate": 0.0007157422861582306, + "loss": 1.3846, + "step": 4924 + }, + { + "epoch": 0.5195147679324894, + "grad_norm": 0.6198650598526001, + "learning_rate": 0.0007154914905070475, + "loss": 1.4278, + "step": 4925 + }, + { + "epoch": 0.519620253164557, + "grad_norm": 0.6873897910118103, + "learning_rate": 0.0007152406987227149, + "loss": 1.4457, + "step": 4926 + }, + { + "epoch": 0.5197257383966245, + "grad_norm": 0.6162039041519165, + "learning_rate": 0.0007149899108333354, + "loss": 1.4148, + "step": 4927 + }, + { + "epoch": 0.5198312236286919, + "grad_norm": 0.8099277019500732, + "learning_rate": 0.0007147391268670109, + "loss": 1.4415, + "step": 4928 + }, + { + "epoch": 0.5199367088607595, + "grad_norm": 0.6422502994537354, + "learning_rate": 0.000714488346851843, + "loss": 1.4427, + "step": 4929 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.9509245753288269, + "learning_rate": 0.000714237570815933, + "loss": 1.4265, + "step": 4930 + }, + { + "epoch": 0.5201476793248945, + "grad_norm": 0.6881899833679199, + "learning_rate": 0.0007139867987873812, + "loss": 1.439, + "step": 4931 + }, + { + "epoch": 0.520253164556962, + "grad_norm": 1.093164324760437, + "learning_rate": 0.0007137360307942885, + "loss": 1.4527, + "step": 4932 + }, + { + "epoch": 0.5203586497890296, + "grad_norm": 0.7378911972045898, + "learning_rate": 0.0007134852668647543, + "loss": 1.4609, + "step": 4933 + }, + { + "epoch": 0.520464135021097, + "grad_norm": 0.912367582321167, + "learning_rate": 0.0007132345070268781, + "loss": 1.4257, + "step": 4934 + }, + { + "epoch": 0.5205696202531646, + "grad_norm": 0.6572122573852539, + "learning_rate": 0.0007129837513087587, + "loss": 1.3897, + "step": 4935 + }, + { + "epoch": 0.5206751054852321, + "grad_norm": 1.0367331504821777, + "learning_rate": 0.0007127329997384946, + "loss": 1.4259, + "step": 4936 + }, + { + "epoch": 0.5207805907172995, + "grad_norm": 0.6892417669296265, + "learning_rate": 0.0007124822523441837, + "loss": 1.4205, + "step": 4937 + }, + { + "epoch": 0.5208860759493671, + "grad_norm": 0.9196187257766724, + "learning_rate": 0.0007122315091539234, + "loss": 1.4389, + "step": 4938 + }, + { + "epoch": 0.5209915611814346, + "grad_norm": 0.6874894499778748, + "learning_rate": 0.000711980770195811, + "loss": 1.4563, + "step": 4939 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.7999655604362488, + "learning_rate": 0.0007117300354979423, + "loss": 1.4259, + "step": 4940 + }, + { + "epoch": 0.5212025316455696, + "grad_norm": 0.6493549942970276, + "learning_rate": 0.0007114793050884145, + "loss": 1.4718, + "step": 4941 + }, + { + "epoch": 0.5213080168776372, + "grad_norm": 0.6881893873214722, + "learning_rate": 0.0007112285789953226, + "loss": 1.4499, + "step": 4942 + }, + { + "epoch": 0.5214135021097046, + "grad_norm": 0.7743894457817078, + "learning_rate": 0.0007109778572467616, + "loss": 1.4281, + "step": 4943 + }, + { + "epoch": 0.5215189873417722, + "grad_norm": 0.7601466774940491, + "learning_rate": 0.0007107271398708266, + "loss": 1.4403, + "step": 4944 + }, + { + "epoch": 0.5216244725738397, + "grad_norm": 0.6537675857543945, + "learning_rate": 0.0007104764268956111, + "loss": 1.4187, + "step": 4945 + }, + { + "epoch": 0.5217299578059071, + "grad_norm": 0.6865509152412415, + "learning_rate": 0.0007102257183492092, + "loss": 1.4273, + "step": 4946 + }, + { + "epoch": 0.5218354430379747, + "grad_norm": 0.6670790910720825, + "learning_rate": 0.0007099750142597138, + "loss": 1.4434, + "step": 4947 + }, + { + "epoch": 0.5219409282700422, + "grad_norm": 0.6724891662597656, + "learning_rate": 0.0007097243146552175, + "loss": 1.4252, + "step": 4948 + }, + { + "epoch": 0.5220464135021097, + "grad_norm": 0.7992578148841858, + "learning_rate": 0.0007094736195638128, + "loss": 1.4404, + "step": 4949 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.7111032009124756, + "learning_rate": 0.000709222929013591, + "loss": 1.4486, + "step": 4950 + }, + { + "epoch": 0.5222573839662448, + "grad_norm": 0.7504560947418213, + "learning_rate": 0.0007089722430326434, + "loss": 1.4456, + "step": 4951 + }, + { + "epoch": 0.5223628691983122, + "grad_norm": 0.6152696013450623, + "learning_rate": 0.0007087215616490606, + "loss": 1.4243, + "step": 4952 + }, + { + "epoch": 0.5224683544303798, + "grad_norm": 0.8118830323219299, + "learning_rate": 0.0007084708848909326, + "loss": 1.3757, + "step": 4953 + }, + { + "epoch": 0.5225738396624473, + "grad_norm": 0.6893605589866638, + "learning_rate": 0.000708220212786349, + "loss": 1.4312, + "step": 4954 + }, + { + "epoch": 0.5226793248945147, + "grad_norm": 0.8078876733779907, + "learning_rate": 0.000707969545363399, + "loss": 1.4367, + "step": 4955 + }, + { + "epoch": 0.5227848101265823, + "grad_norm": 0.9141381978988647, + "learning_rate": 0.000707718882650171, + "loss": 1.4239, + "step": 4956 + }, + { + "epoch": 0.5228902953586498, + "grad_norm": 0.6343516111373901, + "learning_rate": 0.0007074682246747526, + "loss": 1.4546, + "step": 4957 + }, + { + "epoch": 0.5229957805907173, + "grad_norm": 0.6926632523536682, + "learning_rate": 0.0007072175714652321, + "loss": 1.4258, + "step": 4958 + }, + { + "epoch": 0.5231012658227848, + "grad_norm": 0.6563453674316406, + "learning_rate": 0.0007069669230496961, + "loss": 1.3892, + "step": 4959 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.6416963338851929, + "learning_rate": 0.0007067162794562309, + "loss": 1.428, + "step": 4960 + }, + { + "epoch": 0.5233122362869198, + "grad_norm": 0.725906252861023, + "learning_rate": 0.0007064656407129224, + "loss": 1.4154, + "step": 4961 + }, + { + "epoch": 0.5234177215189874, + "grad_norm": 0.6463931798934937, + "learning_rate": 0.000706215006847856, + "loss": 1.43, + "step": 4962 + }, + { + "epoch": 0.5235232067510549, + "grad_norm": 0.6780803799629211, + "learning_rate": 0.0007059643778891164, + "loss": 1.4497, + "step": 4963 + }, + { + "epoch": 0.5236286919831223, + "grad_norm": 0.6620429158210754, + "learning_rate": 0.0007057137538647878, + "loss": 1.4214, + "step": 4964 + }, + { + "epoch": 0.5237341772151899, + "grad_norm": 0.7157360315322876, + "learning_rate": 0.0007054631348029539, + "loss": 1.5014, + "step": 4965 + }, + { + "epoch": 0.5238396624472574, + "grad_norm": 0.7018815875053406, + "learning_rate": 0.0007052125207316975, + "loss": 1.4136, + "step": 4966 + }, + { + "epoch": 0.5239451476793249, + "grad_norm": 0.6706404685974121, + "learning_rate": 0.0007049619116791019, + "loss": 1.3957, + "step": 4967 + }, + { + "epoch": 0.5240506329113924, + "grad_norm": 0.8842794895172119, + "learning_rate": 0.0007047113076732485, + "loss": 1.376, + "step": 4968 + }, + { + "epoch": 0.52415611814346, + "grad_norm": 0.7144839763641357, + "learning_rate": 0.0007044607087422191, + "loss": 1.4288, + "step": 4969 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 1.0083494186401367, + "learning_rate": 0.0007042101149140943, + "loss": 1.4633, + "step": 4970 + }, + { + "epoch": 0.524367088607595, + "grad_norm": 0.7925151586532593, + "learning_rate": 0.0007039595262169544, + "loss": 1.4937, + "step": 4971 + }, + { + "epoch": 0.5244725738396624, + "grad_norm": 0.8158186674118042, + "learning_rate": 0.0007037089426788792, + "loss": 1.3774, + "step": 4972 + }, + { + "epoch": 0.5245780590717299, + "grad_norm": 0.708567202091217, + "learning_rate": 0.0007034583643279479, + "loss": 1.4168, + "step": 4973 + }, + { + "epoch": 0.5246835443037975, + "grad_norm": 0.661780834197998, + "learning_rate": 0.0007032077911922384, + "loss": 1.4113, + "step": 4974 + }, + { + "epoch": 0.5247890295358649, + "grad_norm": 0.688489556312561, + "learning_rate": 0.0007029572232998298, + "loss": 1.4567, + "step": 4975 + }, + { + "epoch": 0.5248945147679325, + "grad_norm": 0.7320455312728882, + "learning_rate": 0.0007027066606787988, + "loss": 1.4281, + "step": 4976 + }, + { + "epoch": 0.525, + "grad_norm": 0.6878122687339783, + "learning_rate": 0.0007024561033572223, + "loss": 1.4323, + "step": 4977 + }, + { + "epoch": 0.5251054852320675, + "grad_norm": 0.6816239953041077, + "learning_rate": 0.0007022055513631764, + "loss": 1.4269, + "step": 4978 + }, + { + "epoch": 0.525210970464135, + "grad_norm": 0.6617605090141296, + "learning_rate": 0.000701955004724737, + "loss": 1.3841, + "step": 4979 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.8244339227676392, + "learning_rate": 0.0007017044634699787, + "loss": 1.4039, + "step": 4980 + }, + { + "epoch": 0.52542194092827, + "grad_norm": 0.693558394908905, + "learning_rate": 0.0007014539276269762, + "loss": 1.45, + "step": 4981 + }, + { + "epoch": 0.5255274261603375, + "grad_norm": 0.6722998023033142, + "learning_rate": 0.0007012033972238031, + "loss": 1.4161, + "step": 4982 + }, + { + "epoch": 0.5256329113924051, + "grad_norm": 0.765590250492096, + "learning_rate": 0.0007009528722885323, + "loss": 1.4128, + "step": 4983 + }, + { + "epoch": 0.5257383966244725, + "grad_norm": 0.63621586561203, + "learning_rate": 0.0007007023528492372, + "loss": 1.42, + "step": 4984 + }, + { + "epoch": 0.5258438818565401, + "grad_norm": 0.6538840532302856, + "learning_rate": 0.0007004518389339893, + "loss": 1.423, + "step": 4985 + }, + { + "epoch": 0.5259493670886076, + "grad_norm": 0.6595034599304199, + "learning_rate": 0.0007002013305708598, + "loss": 1.4089, + "step": 4986 + }, + { + "epoch": 0.5260548523206751, + "grad_norm": 0.719004213809967, + "learning_rate": 0.0006999508277879196, + "loss": 1.3953, + "step": 4987 + }, + { + "epoch": 0.5261603375527426, + "grad_norm": 0.6744510531425476, + "learning_rate": 0.0006997003306132386, + "loss": 1.4348, + "step": 4988 + }, + { + "epoch": 0.5262658227848102, + "grad_norm": 0.7018648982048035, + "learning_rate": 0.0006994498390748865, + "loss": 1.4689, + "step": 4989 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.6224896907806396, + "learning_rate": 0.0006991993532009319, + "loss": 1.4324, + "step": 4990 + }, + { + "epoch": 0.5264767932489451, + "grad_norm": 0.6727445721626282, + "learning_rate": 0.0006989488730194432, + "loss": 1.4313, + "step": 4991 + }, + { + "epoch": 0.5265822784810127, + "grad_norm": 0.6345086693763733, + "learning_rate": 0.0006986983985584874, + "loss": 1.424, + "step": 4992 + }, + { + "epoch": 0.5266877637130801, + "grad_norm": 0.6771995425224304, + "learning_rate": 0.0006984479298461323, + "loss": 1.433, + "step": 4993 + }, + { + "epoch": 0.5267932489451477, + "grad_norm": 0.7119765877723694, + "learning_rate": 0.0006981974669104436, + "loss": 1.4676, + "step": 4994 + }, + { + "epoch": 0.5268987341772152, + "grad_norm": 0.6626976132392883, + "learning_rate": 0.0006979470097794871, + "loss": 1.4344, + "step": 4995 + }, + { + "epoch": 0.5270042194092827, + "grad_norm": 0.7812970876693726, + "learning_rate": 0.0006976965584813277, + "loss": 1.425, + "step": 4996 + }, + { + "epoch": 0.5271097046413502, + "grad_norm": 0.661405086517334, + "learning_rate": 0.0006974461130440298, + "loss": 1.408, + "step": 4997 + }, + { + "epoch": 0.5272151898734178, + "grad_norm": 0.7611746191978455, + "learning_rate": 0.0006971956734956569, + "loss": 1.4528, + "step": 4998 + }, + { + "epoch": 0.5273206751054852, + "grad_norm": 0.6389434933662415, + "learning_rate": 0.0006969452398642721, + "loss": 1.4652, + "step": 4999 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.6526530385017395, + "learning_rate": 0.0006966948121779378, + "loss": 1.4178, + "step": 5000 + }, + { + "epoch": 0.5275316455696203, + "grad_norm": 0.6118747591972351, + "learning_rate": 0.0006964443904647152, + "loss": 1.4527, + "step": 5001 + }, + { + "epoch": 0.5276371308016877, + "grad_norm": 0.6621379256248474, + "learning_rate": 0.0006961939747526661, + "loss": 1.4567, + "step": 5002 + }, + { + "epoch": 0.5277426160337553, + "grad_norm": 0.6658535003662109, + "learning_rate": 0.0006959435650698504, + "loss": 1.4354, + "step": 5003 + }, + { + "epoch": 0.5278481012658228, + "grad_norm": 0.6841160655021667, + "learning_rate": 0.0006956931614443278, + "loss": 1.4209, + "step": 5004 + }, + { + "epoch": 0.5279535864978903, + "grad_norm": 0.690818190574646, + "learning_rate": 0.0006954427639041572, + "loss": 1.4186, + "step": 5005 + }, + { + "epoch": 0.5280590717299578, + "grad_norm": 0.6804431080818176, + "learning_rate": 0.000695192372477397, + "loss": 1.3895, + "step": 5006 + }, + { + "epoch": 0.5281645569620254, + "grad_norm": 0.595018208026886, + "learning_rate": 0.0006949419871921047, + "loss": 1.4127, + "step": 5007 + }, + { + "epoch": 0.5282700421940928, + "grad_norm": 0.6609659194946289, + "learning_rate": 0.0006946916080763373, + "loss": 1.4448, + "step": 5008 + }, + { + "epoch": 0.5283755274261603, + "grad_norm": 0.7985518574714661, + "learning_rate": 0.0006944412351581506, + "loss": 1.433, + "step": 5009 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.6233674883842468, + "learning_rate": 0.000694190868465601, + "loss": 1.4266, + "step": 5010 + }, + { + "epoch": 0.5285864978902953, + "grad_norm": 0.8489800691604614, + "learning_rate": 0.0006939405080267428, + "loss": 1.3997, + "step": 5011 + }, + { + "epoch": 0.5286919831223629, + "grad_norm": 0.6624812483787537, + "learning_rate": 0.0006936901538696303, + "loss": 1.4457, + "step": 5012 + }, + { + "epoch": 0.5287974683544304, + "grad_norm": 0.8004644513130188, + "learning_rate": 0.0006934398060223168, + "loss": 1.4295, + "step": 5013 + }, + { + "epoch": 0.5289029535864979, + "grad_norm": 0.6487240195274353, + "learning_rate": 0.0006931894645128551, + "loss": 1.4181, + "step": 5014 + }, + { + "epoch": 0.5290084388185654, + "grad_norm": 0.8620221614837646, + "learning_rate": 0.0006929391293692972, + "loss": 1.3739, + "step": 5015 + }, + { + "epoch": 0.529113924050633, + "grad_norm": 0.6478255987167358, + "learning_rate": 0.0006926888006196944, + "loss": 1.4486, + "step": 5016 + }, + { + "epoch": 0.5292194092827004, + "grad_norm": 0.9081147313117981, + "learning_rate": 0.0006924384782920971, + "loss": 1.4596, + "step": 5017 + }, + { + "epoch": 0.5293248945147679, + "grad_norm": 0.6567144393920898, + "learning_rate": 0.0006921881624145554, + "loss": 1.4207, + "step": 5018 + }, + { + "epoch": 0.5294303797468355, + "grad_norm": 0.8823665976524353, + "learning_rate": 0.0006919378530151182, + "loss": 1.405, + "step": 5019 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.7172009348869324, + "learning_rate": 0.0006916875501218343, + "loss": 1.4403, + "step": 5020 + }, + { + "epoch": 0.5296413502109705, + "grad_norm": 0.8068716526031494, + "learning_rate": 0.0006914372537627512, + "loss": 1.4083, + "step": 5021 + }, + { + "epoch": 0.529746835443038, + "grad_norm": 0.7093639969825745, + "learning_rate": 0.0006911869639659159, + "loss": 1.4212, + "step": 5022 + }, + { + "epoch": 0.5298523206751055, + "grad_norm": 0.663793683052063, + "learning_rate": 0.0006909366807593744, + "loss": 1.4132, + "step": 5023 + }, + { + "epoch": 0.529957805907173, + "grad_norm": 0.724035382270813, + "learning_rate": 0.0006906864041711725, + "loss": 1.4131, + "step": 5024 + }, + { + "epoch": 0.5300632911392406, + "grad_norm": 0.6983525156974792, + "learning_rate": 0.0006904361342293546, + "loss": 1.3975, + "step": 5025 + }, + { + "epoch": 0.530168776371308, + "grad_norm": 0.6584721207618713, + "learning_rate": 0.000690185870961965, + "loss": 1.4135, + "step": 5026 + }, + { + "epoch": 0.5302742616033755, + "grad_norm": 0.7021160125732422, + "learning_rate": 0.0006899356143970467, + "loss": 1.44, + "step": 5027 + }, + { + "epoch": 0.5303797468354431, + "grad_norm": 0.6054639220237732, + "learning_rate": 0.0006896853645626424, + "loss": 1.4098, + "step": 5028 + }, + { + "epoch": 0.5304852320675105, + "grad_norm": 0.724105954170227, + "learning_rate": 0.0006894351214867937, + "loss": 1.3835, + "step": 5029 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.6863784193992615, + "learning_rate": 0.0006891848851975416, + "loss": 1.3945, + "step": 5030 + }, + { + "epoch": 0.5306962025316456, + "grad_norm": 0.6986758708953857, + "learning_rate": 0.0006889346557229265, + "loss": 1.4304, + "step": 5031 + }, + { + "epoch": 0.5308016877637131, + "grad_norm": 0.6366258263587952, + "learning_rate": 0.0006886844330909877, + "loss": 1.4539, + "step": 5032 + }, + { + "epoch": 0.5309071729957806, + "grad_norm": 0.6969568729400635, + "learning_rate": 0.0006884342173297639, + "loss": 1.4377, + "step": 5033 + }, + { + "epoch": 0.5310126582278482, + "grad_norm": 0.6698652505874634, + "learning_rate": 0.000688184008467293, + "loss": 1.4359, + "step": 5034 + }, + { + "epoch": 0.5311181434599156, + "grad_norm": 0.635710597038269, + "learning_rate": 0.0006879338065316122, + "loss": 1.4347, + "step": 5035 + }, + { + "epoch": 0.5312236286919831, + "grad_norm": 0.6365092396736145, + "learning_rate": 0.0006876836115507579, + "loss": 1.4454, + "step": 5036 + }, + { + "epoch": 0.5313291139240506, + "grad_norm": 0.7064875960350037, + "learning_rate": 0.0006874334235527657, + "loss": 1.4023, + "step": 5037 + }, + { + "epoch": 0.5314345991561181, + "grad_norm": 0.6689114570617676, + "learning_rate": 0.0006871832425656702, + "loss": 1.4744, + "step": 5038 + }, + { + "epoch": 0.5315400843881857, + "grad_norm": 0.6792779564857483, + "learning_rate": 0.0006869330686175058, + "loss": 1.4394, + "step": 5039 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.6289998292922974, + "learning_rate": 0.0006866829017363054, + "loss": 1.4073, + "step": 5040 + }, + { + "epoch": 0.5317510548523207, + "grad_norm": 0.6523230671882629, + "learning_rate": 0.0006864327419501017, + "loss": 1.4318, + "step": 5041 + }, + { + "epoch": 0.5318565400843882, + "grad_norm": 0.6351549625396729, + "learning_rate": 0.0006861825892869262, + "loss": 1.4333, + "step": 5042 + }, + { + "epoch": 0.5319620253164556, + "grad_norm": 0.813884973526001, + "learning_rate": 0.0006859324437748099, + "loss": 1.3948, + "step": 5043 + }, + { + "epoch": 0.5320675105485232, + "grad_norm": 0.6283247470855713, + "learning_rate": 0.0006856823054417825, + "loss": 1.4379, + "step": 5044 + }, + { + "epoch": 0.5321729957805907, + "grad_norm": 0.6982409358024597, + "learning_rate": 0.0006854321743158737, + "loss": 1.4085, + "step": 5045 + }, + { + "epoch": 0.5322784810126582, + "grad_norm": 0.6308366656303406, + "learning_rate": 0.0006851820504251117, + "loss": 1.4679, + "step": 5046 + }, + { + "epoch": 0.5323839662447257, + "grad_norm": 0.6710362434387207, + "learning_rate": 0.0006849319337975242, + "loss": 1.4166, + "step": 5047 + }, + { + "epoch": 0.5324894514767933, + "grad_norm": 0.6435951590538025, + "learning_rate": 0.0006846818244611376, + "loss": 1.3942, + "step": 5048 + }, + { + "epoch": 0.5325949367088607, + "grad_norm": 0.6598569750785828, + "learning_rate": 0.0006844317224439788, + "loss": 1.4503, + "step": 5049 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.6104368567466736, + "learning_rate": 0.0006841816277740722, + "loss": 1.417, + "step": 5050 + }, + { + "epoch": 0.5328059071729958, + "grad_norm": 0.6624777317047119, + "learning_rate": 0.0006839315404794424, + "loss": 1.4479, + "step": 5051 + }, + { + "epoch": 0.5329113924050632, + "grad_norm": 0.6225583553314209, + "learning_rate": 0.0006836814605881131, + "loss": 1.4377, + "step": 5052 + }, + { + "epoch": 0.5330168776371308, + "grad_norm": 0.7344709634780884, + "learning_rate": 0.0006834313881281066, + "loss": 1.4418, + "step": 5053 + }, + { + "epoch": 0.5331223628691983, + "grad_norm": 0.8399834036827087, + "learning_rate": 0.0006831813231274451, + "loss": 1.4348, + "step": 5054 + }, + { + "epoch": 0.5332278481012658, + "grad_norm": 0.6278380751609802, + "learning_rate": 0.0006829312656141496, + "loss": 1.4388, + "step": 5055 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.8235114812850952, + "learning_rate": 0.0006826812156162401, + "loss": 1.4331, + "step": 5056 + }, + { + "epoch": 0.5334388185654009, + "grad_norm": 0.6944894790649414, + "learning_rate": 0.0006824311731617363, + "loss": 1.4556, + "step": 5057 + }, + { + "epoch": 0.5335443037974683, + "grad_norm": 0.7914268374443054, + "learning_rate": 0.0006821811382786561, + "loss": 1.4447, + "step": 5058 + }, + { + "epoch": 0.5336497890295359, + "grad_norm": 0.842955470085144, + "learning_rate": 0.0006819311109950177, + "loss": 1.4175, + "step": 5059 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.7831372618675232, + "learning_rate": 0.0006816810913388379, + "loss": 1.4138, + "step": 5060 + }, + { + "epoch": 0.5338607594936708, + "grad_norm": 0.7918896079063416, + "learning_rate": 0.0006814310793381322, + "loss": 1.4584, + "step": 5061 + }, + { + "epoch": 0.5339662447257384, + "grad_norm": 0.9209260940551758, + "learning_rate": 0.0006811810750209161, + "loss": 1.4014, + "step": 5062 + }, + { + "epoch": 0.5340717299578059, + "grad_norm": 0.9344859719276428, + "learning_rate": 0.0006809310784152039, + "loss": 1.4114, + "step": 5063 + }, + { + "epoch": 0.5341772151898734, + "grad_norm": 0.7664598822593689, + "learning_rate": 0.0006806810895490087, + "loss": 1.4676, + "step": 5064 + }, + { + "epoch": 0.5342827004219409, + "grad_norm": 0.6537558436393738, + "learning_rate": 0.000680431108450343, + "loss": 1.4024, + "step": 5065 + }, + { + "epoch": 0.5343881856540085, + "grad_norm": 1.006965160369873, + "learning_rate": 0.0006801811351472185, + "loss": 1.4238, + "step": 5066 + }, + { + "epoch": 0.5344936708860759, + "grad_norm": 0.6817166805267334, + "learning_rate": 0.000679931169667646, + "loss": 1.4072, + "step": 5067 + }, + { + "epoch": 0.5345991561181435, + "grad_norm": 1.068442702293396, + "learning_rate": 0.0006796812120396351, + "loss": 1.4576, + "step": 5068 + }, + { + "epoch": 0.534704641350211, + "grad_norm": 0.8143030405044556, + "learning_rate": 0.0006794312622911953, + "loss": 1.4077, + "step": 5069 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 1.066748857498169, + "learning_rate": 0.0006791813204503342, + "loss": 1.391, + "step": 5070 + }, + { + "epoch": 0.534915611814346, + "grad_norm": 0.850085973739624, + "learning_rate": 0.0006789313865450594, + "loss": 1.4366, + "step": 5071 + }, + { + "epoch": 0.5350210970464135, + "grad_norm": 0.7638226747512817, + "learning_rate": 0.0006786814606033773, + "loss": 1.4508, + "step": 5072 + }, + { + "epoch": 0.535126582278481, + "grad_norm": 1.1237457990646362, + "learning_rate": 0.0006784315426532929, + "loss": 1.412, + "step": 5073 + }, + { + "epoch": 0.5352320675105485, + "grad_norm": 0.7822681069374084, + "learning_rate": 0.0006781816327228112, + "loss": 1.4717, + "step": 5074 + }, + { + "epoch": 0.5353375527426161, + "grad_norm": 1.0586146116256714, + "learning_rate": 0.0006779317308399357, + "loss": 1.4566, + "step": 5075 + }, + { + "epoch": 0.5354430379746835, + "grad_norm": 0.7913360595703125, + "learning_rate": 0.000677681837032669, + "loss": 1.4327, + "step": 5076 + }, + { + "epoch": 0.5355485232067511, + "grad_norm": 0.9770914912223816, + "learning_rate": 0.0006774319513290132, + "loss": 1.4301, + "step": 5077 + }, + { + "epoch": 0.5356540084388186, + "grad_norm": 0.7627110481262207, + "learning_rate": 0.0006771820737569689, + "loss": 1.4522, + "step": 5078 + }, + { + "epoch": 0.535759493670886, + "grad_norm": 0.814415454864502, + "learning_rate": 0.0006769322043445363, + "loss": 1.4194, + "step": 5079 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.7042812705039978, + "learning_rate": 0.0006766823431197147, + "loss": 1.4332, + "step": 5080 + }, + { + "epoch": 0.5359704641350211, + "grad_norm": 0.7496633529663086, + "learning_rate": 0.0006764324901105022, + "loss": 1.42, + "step": 5081 + }, + { + "epoch": 0.5360759493670886, + "grad_norm": 0.631197452545166, + "learning_rate": 0.000676182645344896, + "loss": 1.4349, + "step": 5082 + }, + { + "epoch": 0.5361814345991561, + "grad_norm": 0.7671929597854614, + "learning_rate": 0.0006759328088508925, + "loss": 1.404, + "step": 5083 + }, + { + "epoch": 0.5362869198312237, + "grad_norm": 0.6736258268356323, + "learning_rate": 0.0006756829806564872, + "loss": 1.4484, + "step": 5084 + }, + { + "epoch": 0.5363924050632911, + "grad_norm": 0.6817651987075806, + "learning_rate": 0.0006754331607896742, + "loss": 1.4409, + "step": 5085 + }, + { + "epoch": 0.5364978902953587, + "grad_norm": 0.7224265336990356, + "learning_rate": 0.0006751833492784476, + "loss": 1.3809, + "step": 5086 + }, + { + "epoch": 0.5366033755274262, + "grad_norm": 0.6749712824821472, + "learning_rate": 0.0006749335461507995, + "loss": 1.3894, + "step": 5087 + }, + { + "epoch": 0.5367088607594936, + "grad_norm": 0.8225829601287842, + "learning_rate": 0.000674683751434722, + "loss": 1.4676, + "step": 5088 + }, + { + "epoch": 0.5368143459915612, + "grad_norm": 0.6506562829017639, + "learning_rate": 0.0006744339651582059, + "loss": 1.4263, + "step": 5089 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.7478351593017578, + "learning_rate": 0.0006741841873492406, + "loss": 1.4645, + "step": 5090 + }, + { + "epoch": 0.5370253164556962, + "grad_norm": 0.6340488791465759, + "learning_rate": 0.0006739344180358153, + "loss": 1.4495, + "step": 5091 + }, + { + "epoch": 0.5371308016877637, + "grad_norm": 0.8121150135993958, + "learning_rate": 0.0006736846572459178, + "loss": 1.3558, + "step": 5092 + }, + { + "epoch": 0.5372362869198313, + "grad_norm": 0.6600175499916077, + "learning_rate": 0.0006734349050075348, + "loss": 1.4059, + "step": 5093 + }, + { + "epoch": 0.5373417721518987, + "grad_norm": 0.8685680031776428, + "learning_rate": 0.0006731851613486526, + "loss": 1.4, + "step": 5094 + }, + { + "epoch": 0.5374472573839663, + "grad_norm": 0.6652439832687378, + "learning_rate": 0.0006729354262972561, + "loss": 1.4071, + "step": 5095 + }, + { + "epoch": 0.5375527426160338, + "grad_norm": 0.7640619277954102, + "learning_rate": 0.0006726856998813291, + "loss": 1.4124, + "step": 5096 + }, + { + "epoch": 0.5376582278481012, + "grad_norm": 0.6585860848426819, + "learning_rate": 0.0006724359821288552, + "loss": 1.4331, + "step": 5097 + }, + { + "epoch": 0.5377637130801688, + "grad_norm": 0.6869553923606873, + "learning_rate": 0.0006721862730678164, + "loss": 1.4171, + "step": 5098 + }, + { + "epoch": 0.5378691983122363, + "grad_norm": 0.6429990530014038, + "learning_rate": 0.0006719365727261935, + "loss": 1.4309, + "step": 5099 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.642961323261261, + "learning_rate": 0.0006716868811319671, + "loss": 1.3794, + "step": 5100 + }, + { + "epoch": 0.5380801687763713, + "grad_norm": 0.6431115865707397, + "learning_rate": 0.000671437198313116, + "loss": 1.4391, + "step": 5101 + }, + { + "epoch": 0.5381856540084389, + "grad_norm": 0.6610175371170044, + "learning_rate": 0.0006711875242976187, + "loss": 1.439, + "step": 5102 + }, + { + "epoch": 0.5382911392405063, + "grad_norm": 0.7076855301856995, + "learning_rate": 0.0006709378591134523, + "loss": 1.4284, + "step": 5103 + }, + { + "epoch": 0.5383966244725739, + "grad_norm": 0.7503326535224915, + "learning_rate": 0.0006706882027885929, + "loss": 1.4127, + "step": 5104 + }, + { + "epoch": 0.5385021097046413, + "grad_norm": 0.6653889417648315, + "learning_rate": 0.0006704385553510156, + "loss": 1.3916, + "step": 5105 + }, + { + "epoch": 0.5386075949367088, + "grad_norm": 0.6478928327560425, + "learning_rate": 0.0006701889168286953, + "loss": 1.4375, + "step": 5106 + }, + { + "epoch": 0.5387130801687764, + "grad_norm": 0.7578722238540649, + "learning_rate": 0.0006699392872496048, + "loss": 1.4647, + "step": 5107 + }, + { + "epoch": 0.5388185654008438, + "grad_norm": 0.66720050573349, + "learning_rate": 0.0006696896666417163, + "loss": 1.4327, + "step": 5108 + }, + { + "epoch": 0.5389240506329114, + "grad_norm": 0.8480970859527588, + "learning_rate": 0.0006694400550330013, + "loss": 1.4402, + "step": 5109 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.638985276222229, + "learning_rate": 0.0006691904524514297, + "loss": 1.378, + "step": 5110 + }, + { + "epoch": 0.5391350210970464, + "grad_norm": 0.6955103874206543, + "learning_rate": 0.0006689408589249709, + "loss": 1.4377, + "step": 5111 + }, + { + "epoch": 0.5392405063291139, + "grad_norm": 0.7664821147918701, + "learning_rate": 0.000668691274481593, + "loss": 1.4246, + "step": 5112 + }, + { + "epoch": 0.5393459915611815, + "grad_norm": 0.81597501039505, + "learning_rate": 0.0006684416991492629, + "loss": 1.445, + "step": 5113 + }, + { + "epoch": 0.5394514767932489, + "grad_norm": 0.6714767217636108, + "learning_rate": 0.0006681921329559475, + "loss": 1.4082, + "step": 5114 + }, + { + "epoch": 0.5395569620253164, + "grad_norm": 0.6407918930053711, + "learning_rate": 0.0006679425759296114, + "loss": 1.4036, + "step": 5115 + }, + { + "epoch": 0.539662447257384, + "grad_norm": 0.689376175403595, + "learning_rate": 0.000667693028098219, + "loss": 1.4233, + "step": 5116 + }, + { + "epoch": 0.5397679324894514, + "grad_norm": 0.6727247834205627, + "learning_rate": 0.0006674434894897332, + "loss": 1.4161, + "step": 5117 + }, + { + "epoch": 0.539873417721519, + "grad_norm": 0.661383330821991, + "learning_rate": 0.000667193960132116, + "loss": 1.4439, + "step": 5118 + }, + { + "epoch": 0.5399789029535865, + "grad_norm": 0.7371211647987366, + "learning_rate": 0.0006669444400533286, + "loss": 1.4158, + "step": 5119 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.6911489367485046, + "learning_rate": 0.0006666949292813306, + "loss": 1.3973, + "step": 5120 + }, + { + "epoch": 0.5401898734177215, + "grad_norm": 0.9108500480651855, + "learning_rate": 0.0006664454278440813, + "loss": 1.4211, + "step": 5121 + }, + { + "epoch": 0.5402953586497891, + "grad_norm": 0.7204761505126953, + "learning_rate": 0.0006661959357695382, + "loss": 1.407, + "step": 5122 + }, + { + "epoch": 0.5404008438818565, + "grad_norm": 0.7280859351158142, + "learning_rate": 0.0006659464530856587, + "loss": 1.4115, + "step": 5123 + }, + { + "epoch": 0.540506329113924, + "grad_norm": 0.6769346594810486, + "learning_rate": 0.0006656969798203982, + "loss": 1.4288, + "step": 5124 + }, + { + "epoch": 0.5406118143459916, + "grad_norm": 0.7581325769424438, + "learning_rate": 0.0006654475160017115, + "loss": 1.429, + "step": 5125 + }, + { + "epoch": 0.540717299578059, + "grad_norm": 0.6920643448829651, + "learning_rate": 0.0006651980616575522, + "loss": 1.4075, + "step": 5126 + }, + { + "epoch": 0.5408227848101266, + "grad_norm": 0.6456102728843689, + "learning_rate": 0.0006649486168158731, + "loss": 1.4205, + "step": 5127 + }, + { + "epoch": 0.5409282700421941, + "grad_norm": 0.6998980641365051, + "learning_rate": 0.0006646991815046254, + "loss": 1.4286, + "step": 5128 + }, + { + "epoch": 0.5410337552742616, + "grad_norm": 0.670896589756012, + "learning_rate": 0.0006644497557517599, + "loss": 1.4323, + "step": 5129 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.6642457842826843, + "learning_rate": 0.0006642003395852258, + "loss": 1.434, + "step": 5130 + }, + { + "epoch": 0.5412447257383967, + "grad_norm": 0.8084887266159058, + "learning_rate": 0.0006639509330329713, + "loss": 1.4552, + "step": 5131 + }, + { + "epoch": 0.5413502109704641, + "grad_norm": 0.6366820335388184, + "learning_rate": 0.0006637015361229438, + "loss": 1.4703, + "step": 5132 + }, + { + "epoch": 0.5414556962025316, + "grad_norm": 0.7520135045051575, + "learning_rate": 0.0006634521488830898, + "loss": 1.4224, + "step": 5133 + }, + { + "epoch": 0.5415611814345992, + "grad_norm": 0.8467905521392822, + "learning_rate": 0.0006632027713413541, + "loss": 1.4347, + "step": 5134 + }, + { + "epoch": 0.5416666666666666, + "grad_norm": 0.9018921852111816, + "learning_rate": 0.0006629534035256805, + "loss": 1.4161, + "step": 5135 + }, + { + "epoch": 0.5417721518987342, + "grad_norm": 0.7743434309959412, + "learning_rate": 0.0006627040454640123, + "loss": 1.4271, + "step": 5136 + }, + { + "epoch": 0.5418776371308017, + "grad_norm": 0.7215885519981384, + "learning_rate": 0.0006624546971842909, + "loss": 1.4385, + "step": 5137 + }, + { + "epoch": 0.5419831223628692, + "grad_norm": 0.8924516439437866, + "learning_rate": 0.0006622053587144572, + "loss": 1.426, + "step": 5138 + }, + { + "epoch": 0.5420886075949367, + "grad_norm": 0.7452061176300049, + "learning_rate": 0.0006619560300824507, + "loss": 1.4321, + "step": 5139 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.7609549164772034, + "learning_rate": 0.0006617067113162103, + "loss": 1.4247, + "step": 5140 + }, + { + "epoch": 0.5422995780590717, + "grad_norm": 0.7350574135780334, + "learning_rate": 0.0006614574024436732, + "loss": 1.4125, + "step": 5141 + }, + { + "epoch": 0.5424050632911392, + "grad_norm": 0.8586781024932861, + "learning_rate": 0.0006612081034927756, + "loss": 1.455, + "step": 5142 + }, + { + "epoch": 0.5425105485232068, + "grad_norm": 0.711178719997406, + "learning_rate": 0.0006609588144914528, + "loss": 1.4464, + "step": 5143 + }, + { + "epoch": 0.5426160337552742, + "grad_norm": 0.7788072824478149, + "learning_rate": 0.0006607095354676389, + "loss": 1.4133, + "step": 5144 + }, + { + "epoch": 0.5427215189873418, + "grad_norm": 0.6820935010910034, + "learning_rate": 0.0006604602664492667, + "loss": 1.4384, + "step": 5145 + }, + { + "epoch": 0.5428270042194093, + "grad_norm": 0.6881158351898193, + "learning_rate": 0.0006602110074642682, + "loss": 1.4165, + "step": 5146 + }, + { + "epoch": 0.5429324894514768, + "grad_norm": 0.6635683178901672, + "learning_rate": 0.000659961758540574, + "loss": 1.4526, + "step": 5147 + }, + { + "epoch": 0.5430379746835443, + "grad_norm": 0.6168239712715149, + "learning_rate": 0.0006597125197061133, + "loss": 1.3983, + "step": 5148 + }, + { + "epoch": 0.5431434599156119, + "grad_norm": 0.6093109846115112, + "learning_rate": 0.0006594632909888154, + "loss": 1.4223, + "step": 5149 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.7184295654296875, + "learning_rate": 0.0006592140724166073, + "loss": 1.4487, + "step": 5150 + }, + { + "epoch": 0.5433544303797468, + "grad_norm": 0.6425719857215881, + "learning_rate": 0.000658964864017415, + "loss": 1.3969, + "step": 5151 + }, + { + "epoch": 0.5434599156118144, + "grad_norm": 0.6997710466384888, + "learning_rate": 0.0006587156658191635, + "loss": 1.455, + "step": 5152 + }, + { + "epoch": 0.5435654008438818, + "grad_norm": 0.6807251572608948, + "learning_rate": 0.0006584664778497771, + "loss": 1.4143, + "step": 5153 + }, + { + "epoch": 0.5436708860759494, + "grad_norm": 0.6726844310760498, + "learning_rate": 0.0006582173001371781, + "loss": 1.4376, + "step": 5154 + }, + { + "epoch": 0.5437763713080169, + "grad_norm": 0.6606460213661194, + "learning_rate": 0.0006579681327092883, + "loss": 1.4493, + "step": 5155 + }, + { + "epoch": 0.5438818565400844, + "grad_norm": 0.610992431640625, + "learning_rate": 0.0006577189755940282, + "loss": 1.396, + "step": 5156 + }, + { + "epoch": 0.5439873417721519, + "grad_norm": 0.6911028623580933, + "learning_rate": 0.0006574698288193166, + "loss": 1.4219, + "step": 5157 + }, + { + "epoch": 0.5440928270042195, + "grad_norm": 0.775286853313446, + "learning_rate": 0.0006572206924130725, + "loss": 1.4138, + "step": 5158 + }, + { + "epoch": 0.5441983122362869, + "grad_norm": 0.6956114172935486, + "learning_rate": 0.0006569715664032124, + "loss": 1.4401, + "step": 5159 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.9364457130432129, + "learning_rate": 0.0006567224508176523, + "loss": 1.4161, + "step": 5160 + }, + { + "epoch": 0.544409282700422, + "grad_norm": 0.7590969800949097, + "learning_rate": 0.0006564733456843067, + "loss": 1.4595, + "step": 5161 + }, + { + "epoch": 0.5445147679324894, + "grad_norm": 0.8516257405281067, + "learning_rate": 0.000656224251031089, + "loss": 1.4559, + "step": 5162 + }, + { + "epoch": 0.544620253164557, + "grad_norm": 0.6596251726150513, + "learning_rate": 0.0006559751668859115, + "loss": 1.3885, + "step": 5163 + }, + { + "epoch": 0.5447257383966245, + "grad_norm": 0.8473746180534363, + "learning_rate": 0.0006557260932766855, + "loss": 1.4239, + "step": 5164 + }, + { + "epoch": 0.544831223628692, + "grad_norm": 0.6934423446655273, + "learning_rate": 0.0006554770302313205, + "loss": 1.4435, + "step": 5165 + }, + { + "epoch": 0.5449367088607595, + "grad_norm": 0.6675741076469421, + "learning_rate": 0.0006552279777777258, + "loss": 1.3832, + "step": 5166 + }, + { + "epoch": 0.5450421940928271, + "grad_norm": 0.7392460107803345, + "learning_rate": 0.000654978935943809, + "loss": 1.4342, + "step": 5167 + }, + { + "epoch": 0.5451476793248945, + "grad_norm": 0.739086389541626, + "learning_rate": 0.0006547299047574761, + "loss": 1.4218, + "step": 5168 + }, + { + "epoch": 0.545253164556962, + "grad_norm": 0.6580951809883118, + "learning_rate": 0.0006544808842466324, + "loss": 1.4524, + "step": 5169 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.689740002155304, + "learning_rate": 0.0006542318744391821, + "loss": 1.3767, + "step": 5170 + }, + { + "epoch": 0.545464135021097, + "grad_norm": 0.6287000179290771, + "learning_rate": 0.0006539828753630276, + "loss": 1.3999, + "step": 5171 + }, + { + "epoch": 0.5455696202531646, + "grad_norm": 0.667447566986084, + "learning_rate": 0.0006537338870460708, + "loss": 1.4241, + "step": 5172 + }, + { + "epoch": 0.545675105485232, + "grad_norm": 0.6463138461112976, + "learning_rate": 0.000653484909516212, + "loss": 1.4386, + "step": 5173 + }, + { + "epoch": 0.5457805907172996, + "grad_norm": 0.7256763577461243, + "learning_rate": 0.00065323594280135, + "loss": 1.4549, + "step": 5174 + }, + { + "epoch": 0.5458860759493671, + "grad_norm": 0.6653445959091187, + "learning_rate": 0.0006529869869293834, + "loss": 1.4316, + "step": 5175 + }, + { + "epoch": 0.5459915611814345, + "grad_norm": 0.6218041777610779, + "learning_rate": 0.0006527380419282088, + "loss": 1.4158, + "step": 5176 + }, + { + "epoch": 0.5460970464135021, + "grad_norm": 0.7120771408081055, + "learning_rate": 0.0006524891078257215, + "loss": 1.4595, + "step": 5177 + }, + { + "epoch": 0.5462025316455696, + "grad_norm": 0.734629213809967, + "learning_rate": 0.000652240184649816, + "loss": 1.4735, + "step": 5178 + }, + { + "epoch": 0.5463080168776371, + "grad_norm": 0.7342908382415771, + "learning_rate": 0.0006519912724283851, + "loss": 1.4152, + "step": 5179 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.6805370450019836, + "learning_rate": 0.0006517423711893209, + "loss": 1.4397, + "step": 5180 + }, + { + "epoch": 0.5465189873417722, + "grad_norm": 0.7490816116333008, + "learning_rate": 0.000651493480960514, + "loss": 1.4202, + "step": 5181 + }, + { + "epoch": 0.5466244725738396, + "grad_norm": 0.75783371925354, + "learning_rate": 0.0006512446017698537, + "loss": 1.4197, + "step": 5182 + }, + { + "epoch": 0.5467299578059072, + "grad_norm": 0.7208453416824341, + "learning_rate": 0.0006509957336452279, + "loss": 1.4005, + "step": 5183 + }, + { + "epoch": 0.5468354430379747, + "grad_norm": 0.6961776614189148, + "learning_rate": 0.0006507468766145242, + "loss": 1.4443, + "step": 5184 + }, + { + "epoch": 0.5469409282700421, + "grad_norm": 0.9170063734054565, + "learning_rate": 0.000650498030705628, + "loss": 1.4018, + "step": 5185 + }, + { + "epoch": 0.5470464135021097, + "grad_norm": 0.721295177936554, + "learning_rate": 0.0006502491959464235, + "loss": 1.4589, + "step": 5186 + }, + { + "epoch": 0.5471518987341772, + "grad_norm": 0.8379567265510559, + "learning_rate": 0.000650000372364794, + "loss": 1.4066, + "step": 5187 + }, + { + "epoch": 0.5472573839662447, + "grad_norm": 0.7329894304275513, + "learning_rate": 0.0006497515599886214, + "loss": 1.3867, + "step": 5188 + }, + { + "epoch": 0.5473628691983122, + "grad_norm": 0.9596930146217346, + "learning_rate": 0.0006495027588457864, + "loss": 1.4199, + "step": 5189 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.7121420502662659, + "learning_rate": 0.0006492539689641685, + "loss": 1.3985, + "step": 5190 + }, + { + "epoch": 0.5475738396624472, + "grad_norm": 0.824802577495575, + "learning_rate": 0.0006490051903716454, + "loss": 1.4308, + "step": 5191 + }, + { + "epoch": 0.5476793248945148, + "grad_norm": 0.9200422167778015, + "learning_rate": 0.0006487564230960944, + "loss": 1.433, + "step": 5192 + }, + { + "epoch": 0.5477848101265823, + "grad_norm": 0.7892900109291077, + "learning_rate": 0.0006485076671653913, + "loss": 1.4165, + "step": 5193 + }, + { + "epoch": 0.5478902953586497, + "grad_norm": 0.8509161472320557, + "learning_rate": 0.00064825892260741, + "loss": 1.4384, + "step": 5194 + }, + { + "epoch": 0.5479957805907173, + "grad_norm": 0.6447765231132507, + "learning_rate": 0.0006480101894500239, + "loss": 1.4246, + "step": 5195 + }, + { + "epoch": 0.5481012658227848, + "grad_norm": 0.8144302368164062, + "learning_rate": 0.0006477614677211046, + "loss": 1.4471, + "step": 5196 + }, + { + "epoch": 0.5482067510548523, + "grad_norm": 0.6767162084579468, + "learning_rate": 0.0006475127574485226, + "loss": 1.4346, + "step": 5197 + }, + { + "epoch": 0.5483122362869198, + "grad_norm": 0.6565280556678772, + "learning_rate": 0.0006472640586601472, + "loss": 1.3983, + "step": 5198 + }, + { + "epoch": 0.5484177215189874, + "grad_norm": 0.8320557475090027, + "learning_rate": 0.0006470153713838463, + "loss": 1.4196, + "step": 5199 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.8340521454811096, + "learning_rate": 0.0006467666956474865, + "loss": 1.4176, + "step": 5200 + }, + { + "epoch": 0.5486286919831224, + "grad_norm": 0.6341564059257507, + "learning_rate": 0.0006465180314789332, + "loss": 1.3759, + "step": 5201 + }, + { + "epoch": 0.5487341772151899, + "grad_norm": 0.6668591499328613, + "learning_rate": 0.0006462693789060505, + "loss": 1.4333, + "step": 5202 + }, + { + "epoch": 0.5488396624472573, + "grad_norm": 0.7215061783790588, + "learning_rate": 0.0006460207379567011, + "loss": 1.4136, + "step": 5203 + }, + { + "epoch": 0.5489451476793249, + "grad_norm": 0.6880277395248413, + "learning_rate": 0.0006457721086587468, + "loss": 1.4208, + "step": 5204 + }, + { + "epoch": 0.5490506329113924, + "grad_norm": 0.6790303587913513, + "learning_rate": 0.0006455234910400472, + "loss": 1.4297, + "step": 5205 + }, + { + "epoch": 0.5491561181434599, + "grad_norm": 0.6949279308319092, + "learning_rate": 0.0006452748851284615, + "loss": 1.407, + "step": 5206 + }, + { + "epoch": 0.5492616033755274, + "grad_norm": 0.8121570348739624, + "learning_rate": 0.0006450262909518471, + "loss": 1.39, + "step": 5207 + }, + { + "epoch": 0.549367088607595, + "grad_norm": 0.6469621062278748, + "learning_rate": 0.0006447777085380603, + "loss": 1.4338, + "step": 5208 + }, + { + "epoch": 0.5494725738396624, + "grad_norm": 0.7645172476768494, + "learning_rate": 0.0006445291379149556, + "loss": 1.4198, + "step": 5209 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.6261657476425171, + "learning_rate": 0.0006442805791103873, + "loss": 1.4071, + "step": 5210 + }, + { + "epoch": 0.5496835443037975, + "grad_norm": 0.6709871292114258, + "learning_rate": 0.0006440320321522071, + "loss": 1.4423, + "step": 5211 + }, + { + "epoch": 0.549789029535865, + "grad_norm": 0.7455157041549683, + "learning_rate": 0.0006437834970682661, + "loss": 1.3733, + "step": 5212 + }, + { + "epoch": 0.5498945147679325, + "grad_norm": 0.7970384359359741, + "learning_rate": 0.000643534973886414, + "loss": 1.4512, + "step": 5213 + }, + { + "epoch": 0.55, + "grad_norm": 0.6744860410690308, + "learning_rate": 0.0006432864626344989, + "loss": 1.4322, + "step": 5214 + }, + { + "epoch": 0.5501054852320675, + "grad_norm": 0.7334769368171692, + "learning_rate": 0.0006430379633403679, + "loss": 1.4339, + "step": 5215 + }, + { + "epoch": 0.550210970464135, + "grad_norm": 0.7979395389556885, + "learning_rate": 0.0006427894760318664, + "loss": 1.4447, + "step": 5216 + }, + { + "epoch": 0.5503164556962026, + "grad_norm": 0.7503217458724976, + "learning_rate": 0.0006425410007368385, + "loss": 1.4165, + "step": 5217 + }, + { + "epoch": 0.55042194092827, + "grad_norm": 0.7524239420890808, + "learning_rate": 0.0006422925374831275, + "loss": 1.4144, + "step": 5218 + }, + { + "epoch": 0.5505274261603376, + "grad_norm": 0.6487559080123901, + "learning_rate": 0.0006420440862985748, + "loss": 1.4419, + "step": 5219 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.7059316635131836, + "learning_rate": 0.0006417956472110205, + "loss": 1.378, + "step": 5220 + }, + { + "epoch": 0.5507383966244725, + "grad_norm": 0.6378591656684875, + "learning_rate": 0.0006415472202483034, + "loss": 1.4075, + "step": 5221 + }, + { + "epoch": 0.5508438818565401, + "grad_norm": 0.665618360042572, + "learning_rate": 0.0006412988054382611, + "loss": 1.3927, + "step": 5222 + }, + { + "epoch": 0.5509493670886076, + "grad_norm": 0.6447780728340149, + "learning_rate": 0.0006410504028087297, + "loss": 1.3866, + "step": 5223 + }, + { + "epoch": 0.5510548523206751, + "grad_norm": 0.6560017466545105, + "learning_rate": 0.000640802012387544, + "loss": 1.4395, + "step": 5224 + }, + { + "epoch": 0.5511603375527426, + "grad_norm": 0.6654611229896545, + "learning_rate": 0.0006405536342025374, + "loss": 1.4221, + "step": 5225 + }, + { + "epoch": 0.5512658227848102, + "grad_norm": 0.6373922824859619, + "learning_rate": 0.0006403052682815415, + "loss": 1.4084, + "step": 5226 + }, + { + "epoch": 0.5513713080168776, + "grad_norm": 0.7072852849960327, + "learning_rate": 0.0006400569146523875, + "loss": 1.3909, + "step": 5227 + }, + { + "epoch": 0.5514767932489452, + "grad_norm": 0.7916091084480286, + "learning_rate": 0.0006398085733429045, + "loss": 1.447, + "step": 5228 + }, + { + "epoch": 0.5515822784810127, + "grad_norm": 0.704236626625061, + "learning_rate": 0.0006395602443809203, + "loss": 1.4205, + "step": 5229 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.8662128448486328, + "learning_rate": 0.0006393119277942614, + "loss": 1.4402, + "step": 5230 + }, + { + "epoch": 0.5517932489451477, + "grad_norm": 0.685816764831543, + "learning_rate": 0.0006390636236107528, + "loss": 1.441, + "step": 5231 + }, + { + "epoch": 0.5518987341772152, + "grad_norm": 0.7716725468635559, + "learning_rate": 0.0006388153318582185, + "loss": 1.4157, + "step": 5232 + }, + { + "epoch": 0.5520042194092827, + "grad_norm": 0.663215696811676, + "learning_rate": 0.0006385670525644806, + "loss": 1.3635, + "step": 5233 + }, + { + "epoch": 0.5521097046413502, + "grad_norm": 0.8380842804908752, + "learning_rate": 0.0006383187857573601, + "loss": 1.4255, + "step": 5234 + }, + { + "epoch": 0.5522151898734177, + "grad_norm": 0.7084742188453674, + "learning_rate": 0.0006380705314646765, + "loss": 1.4237, + "step": 5235 + }, + { + "epoch": 0.5523206751054852, + "grad_norm": 0.7436993718147278, + "learning_rate": 0.0006378222897142482, + "loss": 1.4322, + "step": 5236 + }, + { + "epoch": 0.5524261603375528, + "grad_norm": 0.7857199311256409, + "learning_rate": 0.0006375740605338916, + "loss": 1.4146, + "step": 5237 + }, + { + "epoch": 0.5525316455696202, + "grad_norm": 0.7543919682502747, + "learning_rate": 0.0006373258439514221, + "loss": 1.415, + "step": 5238 + }, + { + "epoch": 0.5526371308016877, + "grad_norm": 0.7503506541252136, + "learning_rate": 0.0006370776399946536, + "loss": 1.4312, + "step": 5239 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.651021420955658, + "learning_rate": 0.0006368294486913987, + "loss": 1.4531, + "step": 5240 + }, + { + "epoch": 0.5528481012658227, + "grad_norm": 0.8351427912712097, + "learning_rate": 0.0006365812700694683, + "loss": 1.4278, + "step": 5241 + }, + { + "epoch": 0.5529535864978903, + "grad_norm": 0.6146057844161987, + "learning_rate": 0.0006363331041566723, + "loss": 1.4042, + "step": 5242 + }, + { + "epoch": 0.5530590717299578, + "grad_norm": 0.8980385661125183, + "learning_rate": 0.0006360849509808184, + "loss": 1.407, + "step": 5243 + }, + { + "epoch": 0.5531645569620253, + "grad_norm": 0.6092596650123596, + "learning_rate": 0.0006358368105697142, + "loss": 1.392, + "step": 5244 + }, + { + "epoch": 0.5532700421940928, + "grad_norm": 0.6804438829421997, + "learning_rate": 0.0006355886829511645, + "loss": 1.4254, + "step": 5245 + }, + { + "epoch": 0.5533755274261604, + "grad_norm": 0.8261692523956299, + "learning_rate": 0.0006353405681529734, + "loss": 1.4178, + "step": 5246 + }, + { + "epoch": 0.5534810126582278, + "grad_norm": 0.8355104327201843, + "learning_rate": 0.0006350924662029433, + "loss": 1.4103, + "step": 5247 + }, + { + "epoch": 0.5535864978902953, + "grad_norm": 0.8482396602630615, + "learning_rate": 0.0006348443771288755, + "loss": 1.4226, + "step": 5248 + }, + { + "epoch": 0.5536919831223629, + "grad_norm": 1.1638633012771606, + "learning_rate": 0.0006345963009585694, + "loss": 1.3973, + "step": 5249 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.783706545829773, + "learning_rate": 0.0006343482377198232, + "loss": 1.4423, + "step": 5250 + }, + { + "epoch": 0.5539029535864979, + "grad_norm": 0.9413915276527405, + "learning_rate": 0.0006341001874404335, + "loss": 1.4333, + "step": 5251 + }, + { + "epoch": 0.5540084388185654, + "grad_norm": 0.8330814242362976, + "learning_rate": 0.0006338521501481957, + "loss": 1.4086, + "step": 5252 + }, + { + "epoch": 0.5541139240506329, + "grad_norm": 0.9070832133293152, + "learning_rate": 0.0006336041258709039, + "loss": 1.406, + "step": 5253 + }, + { + "epoch": 0.5542194092827004, + "grad_norm": 0.833094596862793, + "learning_rate": 0.0006333561146363502, + "loss": 1.436, + "step": 5254 + }, + { + "epoch": 0.554324894514768, + "grad_norm": 0.9349180459976196, + "learning_rate": 0.0006331081164723253, + "loss": 1.4272, + "step": 5255 + }, + { + "epoch": 0.5544303797468354, + "grad_norm": 0.6848312020301819, + "learning_rate": 0.000632860131406619, + "loss": 1.3845, + "step": 5256 + }, + { + "epoch": 0.554535864978903, + "grad_norm": 0.6418766379356384, + "learning_rate": 0.0006326121594670191, + "loss": 1.4294, + "step": 5257 + }, + { + "epoch": 0.5546413502109705, + "grad_norm": 1.0399726629257202, + "learning_rate": 0.000632364200681312, + "loss": 1.4318, + "step": 5258 + }, + { + "epoch": 0.5547468354430379, + "grad_norm": 0.7537274956703186, + "learning_rate": 0.0006321162550772829, + "loss": 1.4338, + "step": 5259 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.9189210534095764, + "learning_rate": 0.0006318683226827151, + "loss": 1.3991, + "step": 5260 + }, + { + "epoch": 0.554957805907173, + "grad_norm": 0.7541347742080688, + "learning_rate": 0.0006316204035253906, + "loss": 1.4179, + "step": 5261 + }, + { + "epoch": 0.5550632911392405, + "grad_norm": 0.8797961473464966, + "learning_rate": 0.0006313724976330904, + "loss": 1.4295, + "step": 5262 + }, + { + "epoch": 0.555168776371308, + "grad_norm": 0.6541542410850525, + "learning_rate": 0.0006311246050335934, + "loss": 1.4212, + "step": 5263 + }, + { + "epoch": 0.5552742616033756, + "grad_norm": 1.0947368144989014, + "learning_rate": 0.0006308767257546772, + "loss": 1.4033, + "step": 5264 + }, + { + "epoch": 0.555379746835443, + "grad_norm": 0.7224650979042053, + "learning_rate": 0.0006306288598241179, + "loss": 1.4166, + "step": 5265 + }, + { + "epoch": 0.5554852320675105, + "grad_norm": 0.9198884963989258, + "learning_rate": 0.00063038100726969, + "loss": 1.419, + "step": 5266 + }, + { + "epoch": 0.5555907172995781, + "grad_norm": 0.6395454406738281, + "learning_rate": 0.0006301331681191668, + "loss": 1.4343, + "step": 5267 + }, + { + "epoch": 0.5556962025316455, + "grad_norm": 0.7111367583274841, + "learning_rate": 0.0006298853424003199, + "loss": 1.4401, + "step": 5268 + }, + { + "epoch": 0.5558016877637131, + "grad_norm": 0.726942241191864, + "learning_rate": 0.0006296375301409187, + "loss": 1.4334, + "step": 5269 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.6876237392425537, + "learning_rate": 0.0006293897313687331, + "loss": 1.4361, + "step": 5270 + }, + { + "epoch": 0.5560126582278481, + "grad_norm": 0.8686115145683289, + "learning_rate": 0.0006291419461115293, + "loss": 1.4145, + "step": 5271 + }, + { + "epoch": 0.5561181434599156, + "grad_norm": 0.6581480503082275, + "learning_rate": 0.0006288941743970732, + "loss": 1.4406, + "step": 5272 + }, + { + "epoch": 0.5562236286919832, + "grad_norm": 0.7131281495094299, + "learning_rate": 0.0006286464162531287, + "loss": 1.3867, + "step": 5273 + }, + { + "epoch": 0.5563291139240506, + "grad_norm": 0.6608428955078125, + "learning_rate": 0.0006283986717074585, + "loss": 1.3889, + "step": 5274 + }, + { + "epoch": 0.5564345991561181, + "grad_norm": 0.6438680291175842, + "learning_rate": 0.0006281509407878232, + "loss": 1.3987, + "step": 5275 + }, + { + "epoch": 0.5565400843881857, + "grad_norm": 0.6651315689086914, + "learning_rate": 0.0006279032235219829, + "loss": 1.4315, + "step": 5276 + }, + { + "epoch": 0.5566455696202531, + "grad_norm": 0.6222532391548157, + "learning_rate": 0.0006276555199376951, + "loss": 1.4386, + "step": 5277 + }, + { + "epoch": 0.5567510548523207, + "grad_norm": 0.626147449016571, + "learning_rate": 0.000627407830062716, + "loss": 1.4213, + "step": 5278 + }, + { + "epoch": 0.5568565400843882, + "grad_norm": 0.6806865334510803, + "learning_rate": 0.0006271601539248012, + "loss": 1.4295, + "step": 5279 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.685267448425293, + "learning_rate": 0.0006269124915517037, + "loss": 1.4062, + "step": 5280 + }, + { + "epoch": 0.5570675105485232, + "grad_norm": 0.6257442831993103, + "learning_rate": 0.0006266648429711753, + "loss": 1.4245, + "step": 5281 + }, + { + "epoch": 0.5571729957805908, + "grad_norm": 0.6524856090545654, + "learning_rate": 0.0006264172082109661, + "loss": 1.4027, + "step": 5282 + }, + { + "epoch": 0.5572784810126582, + "grad_norm": 0.6629437804222107, + "learning_rate": 0.0006261695872988252, + "loss": 1.3889, + "step": 5283 + }, + { + "epoch": 0.5573839662447257, + "grad_norm": 0.7996131777763367, + "learning_rate": 0.0006259219802624994, + "loss": 1.4278, + "step": 5284 + }, + { + "epoch": 0.5574894514767933, + "grad_norm": 0.7144418358802795, + "learning_rate": 0.0006256743871297344, + "loss": 1.4059, + "step": 5285 + }, + { + "epoch": 0.5575949367088607, + "grad_norm": 0.7848897576332092, + "learning_rate": 0.0006254268079282743, + "loss": 1.404, + "step": 5286 + }, + { + "epoch": 0.5577004219409283, + "grad_norm": 0.6455575227737427, + "learning_rate": 0.0006251792426858612, + "loss": 1.3981, + "step": 5287 + }, + { + "epoch": 0.5578059071729958, + "grad_norm": 1.1821708679199219, + "learning_rate": 0.0006249316914302368, + "loss": 1.4286, + "step": 5288 + }, + { + "epoch": 0.5579113924050633, + "grad_norm": 0.6407886743545532, + "learning_rate": 0.0006246841541891399, + "loss": 1.4312, + "step": 5289 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 1.0016671419143677, + "learning_rate": 0.0006244366309903084, + "loss": 1.4537, + "step": 5290 + }, + { + "epoch": 0.5581223628691984, + "grad_norm": 0.6383686661720276, + "learning_rate": 0.0006241891218614786, + "loss": 1.4262, + "step": 5291 + }, + { + "epoch": 0.5582278481012658, + "grad_norm": 0.8714097142219543, + "learning_rate": 0.0006239416268303849, + "loss": 1.3883, + "step": 5292 + }, + { + "epoch": 0.5583333333333333, + "grad_norm": 0.604945957660675, + "learning_rate": 0.0006236941459247606, + "loss": 1.4127, + "step": 5293 + }, + { + "epoch": 0.5584388185654009, + "grad_norm": 0.6473423838615417, + "learning_rate": 0.0006234466791723371, + "loss": 1.4366, + "step": 5294 + }, + { + "epoch": 0.5585443037974683, + "grad_norm": 0.6947076916694641, + "learning_rate": 0.0006231992266008438, + "loss": 1.4181, + "step": 5295 + }, + { + "epoch": 0.5586497890295359, + "grad_norm": 0.6356166005134583, + "learning_rate": 0.00062295178823801, + "loss": 1.4356, + "step": 5296 + }, + { + "epoch": 0.5587552742616034, + "grad_norm": 0.7325384616851807, + "learning_rate": 0.0006227043641115616, + "loss": 1.4063, + "step": 5297 + }, + { + "epoch": 0.5588607594936709, + "grad_norm": 0.6502054333686829, + "learning_rate": 0.0006224569542492241, + "loss": 1.4002, + "step": 5298 + }, + { + "epoch": 0.5589662447257384, + "grad_norm": 0.6305287480354309, + "learning_rate": 0.0006222095586787208, + "loss": 1.4425, + "step": 5299 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.8023825883865356, + "learning_rate": 0.0006219621774277737, + "loss": 1.4342, + "step": 5300 + }, + { + "epoch": 0.5591772151898734, + "grad_norm": 0.639616847038269, + "learning_rate": 0.000621714810524103, + "loss": 1.3934, + "step": 5301 + }, + { + "epoch": 0.559282700421941, + "grad_norm": 0.7358201742172241, + "learning_rate": 0.0006214674579954276, + "loss": 1.4059, + "step": 5302 + }, + { + "epoch": 0.5593881856540084, + "grad_norm": 0.6770002841949463, + "learning_rate": 0.0006212201198694643, + "loss": 1.4239, + "step": 5303 + }, + { + "epoch": 0.5594936708860759, + "grad_norm": 0.6588394045829773, + "learning_rate": 0.0006209727961739286, + "loss": 1.3991, + "step": 5304 + }, + { + "epoch": 0.5595991561181435, + "grad_norm": 0.6903225183486938, + "learning_rate": 0.0006207254869365346, + "loss": 1.4326, + "step": 5305 + }, + { + "epoch": 0.5597046413502109, + "grad_norm": 0.5988549590110779, + "learning_rate": 0.0006204781921849945, + "loss": 1.4253, + "step": 5306 + }, + { + "epoch": 0.5598101265822785, + "grad_norm": 0.6736268997192383, + "learning_rate": 0.0006202309119470188, + "loss": 1.4134, + "step": 5307 + }, + { + "epoch": 0.559915611814346, + "grad_norm": 0.6442360877990723, + "learning_rate": 0.0006199836462503166, + "loss": 1.3728, + "step": 5308 + }, + { + "epoch": 0.5600210970464135, + "grad_norm": 0.6980152726173401, + "learning_rate": 0.0006197363951225951, + "loss": 1.4039, + "step": 5309 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.7039757370948792, + "learning_rate": 0.00061948915859156, + "loss": 1.4057, + "step": 5310 + }, + { + "epoch": 0.5602320675105485, + "grad_norm": 0.6596351861953735, + "learning_rate": 0.0006192419366849155, + "loss": 1.4126, + "step": 5311 + }, + { + "epoch": 0.560337552742616, + "grad_norm": 0.6723021268844604, + "learning_rate": 0.0006189947294303641, + "loss": 1.4004, + "step": 5312 + }, + { + "epoch": 0.5604430379746835, + "grad_norm": 0.7192777991294861, + "learning_rate": 0.000618747536855606, + "loss": 1.3961, + "step": 5313 + }, + { + "epoch": 0.5605485232067511, + "grad_norm": 0.6926875114440918, + "learning_rate": 0.0006185003589883413, + "loss": 1.3996, + "step": 5314 + }, + { + "epoch": 0.5606540084388185, + "grad_norm": 0.6709699630737305, + "learning_rate": 0.0006182531958562672, + "loss": 1.4097, + "step": 5315 + }, + { + "epoch": 0.5607594936708861, + "grad_norm": 0.6637781858444214, + "learning_rate": 0.0006180060474870793, + "loss": 1.4262, + "step": 5316 + }, + { + "epoch": 0.5608649789029536, + "grad_norm": 0.7472043633460999, + "learning_rate": 0.0006177589139084721, + "loss": 1.3853, + "step": 5317 + }, + { + "epoch": 0.560970464135021, + "grad_norm": 0.6293750405311584, + "learning_rate": 0.000617511795148138, + "loss": 1.4353, + "step": 5318 + }, + { + "epoch": 0.5610759493670886, + "grad_norm": 0.6498353481292725, + "learning_rate": 0.0006172646912337678, + "loss": 1.4169, + "step": 5319 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.6892804503440857, + "learning_rate": 0.0006170176021930509, + "loss": 1.4104, + "step": 5320 + }, + { + "epoch": 0.5612869198312236, + "grad_norm": 0.6222749352455139, + "learning_rate": 0.0006167705280536745, + "loss": 1.4436, + "step": 5321 + }, + { + "epoch": 0.5613924050632911, + "grad_norm": 0.6838598251342773, + "learning_rate": 0.000616523468843325, + "loss": 1.4453, + "step": 5322 + }, + { + "epoch": 0.5614978902953587, + "grad_norm": 0.6675688624382019, + "learning_rate": 0.0006162764245896863, + "loss": 1.4264, + "step": 5323 + }, + { + "epoch": 0.5616033755274261, + "grad_norm": 0.6745519638061523, + "learning_rate": 0.0006160293953204412, + "loss": 1.4195, + "step": 5324 + }, + { + "epoch": 0.5617088607594937, + "grad_norm": 0.6697179079055786, + "learning_rate": 0.0006157823810632704, + "loss": 1.3992, + "step": 5325 + }, + { + "epoch": 0.5618143459915612, + "grad_norm": 0.7088612914085388, + "learning_rate": 0.000615535381845853, + "loss": 1.425, + "step": 5326 + }, + { + "epoch": 0.5619198312236287, + "grad_norm": 0.6371913552284241, + "learning_rate": 0.0006152883976958665, + "loss": 1.3951, + "step": 5327 + }, + { + "epoch": 0.5620253164556962, + "grad_norm": 0.6323721408843994, + "learning_rate": 0.0006150414286409869, + "loss": 1.3974, + "step": 5328 + }, + { + "epoch": 0.5621308016877637, + "grad_norm": 0.8994733095169067, + "learning_rate": 0.0006147944747088881, + "loss": 1.4304, + "step": 5329 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.6904186606407166, + "learning_rate": 0.0006145475359272424, + "loss": 1.4211, + "step": 5330 + }, + { + "epoch": 0.5623417721518987, + "grad_norm": 0.8769069910049438, + "learning_rate": 0.0006143006123237208, + "loss": 1.4153, + "step": 5331 + }, + { + "epoch": 0.5624472573839663, + "grad_norm": 0.6081880331039429, + "learning_rate": 0.0006140537039259925, + "loss": 1.4196, + "step": 5332 + }, + { + "epoch": 0.5625527426160337, + "grad_norm": 0.8548357486724854, + "learning_rate": 0.0006138068107617244, + "loss": 1.3786, + "step": 5333 + }, + { + "epoch": 0.5626582278481013, + "grad_norm": 0.6397265195846558, + "learning_rate": 0.0006135599328585824, + "loss": 1.4279, + "step": 5334 + }, + { + "epoch": 0.5627637130801688, + "grad_norm": 0.6537976264953613, + "learning_rate": 0.0006133130702442302, + "loss": 1.4247, + "step": 5335 + }, + { + "epoch": 0.5628691983122363, + "grad_norm": 0.7430461645126343, + "learning_rate": 0.0006130662229463301, + "loss": 1.4224, + "step": 5336 + }, + { + "epoch": 0.5629746835443038, + "grad_norm": 0.7077770829200745, + "learning_rate": 0.0006128193909925425, + "loss": 1.3969, + "step": 5337 + }, + { + "epoch": 0.5630801687763713, + "grad_norm": 0.6411765813827515, + "learning_rate": 0.0006125725744105263, + "loss": 1.4002, + "step": 5338 + }, + { + "epoch": 0.5631856540084388, + "grad_norm": 0.634719729423523, + "learning_rate": 0.000612325773227938, + "loss": 1.4196, + "step": 5339 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.7190914750099182, + "learning_rate": 0.0006120789874724336, + "loss": 1.4229, + "step": 5340 + }, + { + "epoch": 0.5633966244725739, + "grad_norm": 0.606334388256073, + "learning_rate": 0.0006118322171716665, + "loss": 1.4561, + "step": 5341 + }, + { + "epoch": 0.5635021097046413, + "grad_norm": 0.6730697154998779, + "learning_rate": 0.0006115854623532884, + "loss": 1.4016, + "step": 5342 + }, + { + "epoch": 0.5636075949367089, + "grad_norm": 0.6350687742233276, + "learning_rate": 0.0006113387230449493, + "loss": 1.4313, + "step": 5343 + }, + { + "epoch": 0.5637130801687764, + "grad_norm": 0.6392146944999695, + "learning_rate": 0.0006110919992742978, + "loss": 1.3959, + "step": 5344 + }, + { + "epoch": 0.5638185654008439, + "grad_norm": 0.6948207020759583, + "learning_rate": 0.0006108452910689804, + "loss": 1.4253, + "step": 5345 + }, + { + "epoch": 0.5639240506329114, + "grad_norm": 0.6832464337348938, + "learning_rate": 0.0006105985984566421, + "loss": 1.3956, + "step": 5346 + }, + { + "epoch": 0.564029535864979, + "grad_norm": 0.659032940864563, + "learning_rate": 0.0006103519214649256, + "loss": 1.3582, + "step": 5347 + }, + { + "epoch": 0.5641350210970464, + "grad_norm": 0.7055992484092712, + "learning_rate": 0.000610105260121473, + "loss": 1.388, + "step": 5348 + }, + { + "epoch": 0.5642405063291139, + "grad_norm": 0.6739064455032349, + "learning_rate": 0.0006098586144539235, + "loss": 1.4129, + "step": 5349 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.6526969075202942, + "learning_rate": 0.0006096119844899151, + "loss": 1.3823, + "step": 5350 + }, + { + "epoch": 0.5644514767932489, + "grad_norm": 0.8135843276977539, + "learning_rate": 0.000609365370257084, + "loss": 1.396, + "step": 5351 + }, + { + "epoch": 0.5645569620253165, + "grad_norm": 0.6672442555427551, + "learning_rate": 0.0006091187717830643, + "loss": 1.4032, + "step": 5352 + }, + { + "epoch": 0.564662447257384, + "grad_norm": 0.6862614750862122, + "learning_rate": 0.0006088721890954887, + "loss": 1.4198, + "step": 5353 + }, + { + "epoch": 0.5647679324894515, + "grad_norm": 0.6585639715194702, + "learning_rate": 0.0006086256222219881, + "loss": 1.4126, + "step": 5354 + }, + { + "epoch": 0.564873417721519, + "grad_norm": 0.6510211229324341, + "learning_rate": 0.0006083790711901915, + "loss": 1.4158, + "step": 5355 + }, + { + "epoch": 0.5649789029535865, + "grad_norm": 0.6628508567810059, + "learning_rate": 0.0006081325360277257, + "loss": 1.3884, + "step": 5356 + }, + { + "epoch": 0.565084388185654, + "grad_norm": 0.6454604268074036, + "learning_rate": 0.0006078860167622171, + "loss": 1.4032, + "step": 5357 + }, + { + "epoch": 0.5651898734177215, + "grad_norm": 0.6146312952041626, + "learning_rate": 0.000607639513421289, + "loss": 1.4157, + "step": 5358 + }, + { + "epoch": 0.5652953586497891, + "grad_norm": 0.6862131357192993, + "learning_rate": 0.0006073930260325632, + "loss": 1.4352, + "step": 5359 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.6346619129180908, + "learning_rate": 0.0006071465546236601, + "loss": 1.4508, + "step": 5360 + }, + { + "epoch": 0.5655063291139241, + "grad_norm": 0.6870558261871338, + "learning_rate": 0.0006069000992221977, + "loss": 1.4224, + "step": 5361 + }, + { + "epoch": 0.5656118143459916, + "grad_norm": 0.6486373543739319, + "learning_rate": 0.0006066536598557927, + "loss": 1.4147, + "step": 5362 + }, + { + "epoch": 0.565717299578059, + "grad_norm": 0.6622165441513062, + "learning_rate": 0.0006064072365520601, + "loss": 1.4744, + "step": 5363 + }, + { + "epoch": 0.5658227848101266, + "grad_norm": 0.6726401448249817, + "learning_rate": 0.0006061608293386126, + "loss": 1.4066, + "step": 5364 + }, + { + "epoch": 0.5659282700421941, + "grad_norm": 0.622787356376648, + "learning_rate": 0.0006059144382430612, + "loss": 1.3997, + "step": 5365 + }, + { + "epoch": 0.5660337552742616, + "grad_norm": 0.7085393071174622, + "learning_rate": 0.0006056680632930154, + "loss": 1.4208, + "step": 5366 + }, + { + "epoch": 0.5661392405063291, + "grad_norm": 0.6673603653907776, + "learning_rate": 0.0006054217045160831, + "loss": 1.3984, + "step": 5367 + }, + { + "epoch": 0.5662447257383966, + "grad_norm": 0.6449306607246399, + "learning_rate": 0.0006051753619398697, + "loss": 1.4086, + "step": 5368 + }, + { + "epoch": 0.5663502109704641, + "grad_norm": 0.7437227964401245, + "learning_rate": 0.0006049290355919792, + "loss": 1.377, + "step": 5369 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.7169831395149231, + "learning_rate": 0.0006046827255000135, + "loss": 1.4134, + "step": 5370 + }, + { + "epoch": 0.5665611814345991, + "grad_norm": 0.7358134984970093, + "learning_rate": 0.0006044364316915733, + "loss": 1.4237, + "step": 5371 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 0.6850075721740723, + "learning_rate": 0.0006041901541942565, + "loss": 1.4127, + "step": 5372 + }, + { + "epoch": 0.5667721518987342, + "grad_norm": 0.6785259246826172, + "learning_rate": 0.0006039438930356601, + "loss": 1.3821, + "step": 5373 + }, + { + "epoch": 0.5668776371308016, + "grad_norm": 0.8340238928794861, + "learning_rate": 0.0006036976482433787, + "loss": 1.4152, + "step": 5374 + }, + { + "epoch": 0.5669831223628692, + "grad_norm": 0.666500985622406, + "learning_rate": 0.0006034514198450053, + "loss": 1.4012, + "step": 5375 + }, + { + "epoch": 0.5670886075949367, + "grad_norm": 0.9493124485015869, + "learning_rate": 0.0006032052078681312, + "loss": 1.3719, + "step": 5376 + }, + { + "epoch": 0.5671940928270042, + "grad_norm": 0.6121443510055542, + "learning_rate": 0.0006029590123403456, + "loss": 1.4524, + "step": 5377 + }, + { + "epoch": 0.5672995780590717, + "grad_norm": 0.9803436398506165, + "learning_rate": 0.0006027128332892358, + "loss": 1.3662, + "step": 5378 + }, + { + "epoch": 0.5674050632911393, + "grad_norm": 0.6815166473388672, + "learning_rate": 0.0006024666707423875, + "loss": 1.4292, + "step": 5379 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.7561679482460022, + "learning_rate": 0.0006022205247273845, + "loss": 1.4063, + "step": 5380 + }, + { + "epoch": 0.5676160337552743, + "grad_norm": 0.7498292922973633, + "learning_rate": 0.0006019743952718085, + "loss": 1.3774, + "step": 5381 + }, + { + "epoch": 0.5677215189873418, + "grad_norm": 0.6515268087387085, + "learning_rate": 0.0006017282824032394, + "loss": 1.3692, + "step": 5382 + }, + { + "epoch": 0.5678270042194092, + "grad_norm": 0.7326071262359619, + "learning_rate": 0.0006014821861492559, + "loss": 1.414, + "step": 5383 + }, + { + "epoch": 0.5679324894514768, + "grad_norm": 0.7095131278038025, + "learning_rate": 0.0006012361065374339, + "loss": 1.4169, + "step": 5384 + }, + { + "epoch": 0.5680379746835443, + "grad_norm": 0.8705723881721497, + "learning_rate": 0.0006009900435953478, + "loss": 1.4128, + "step": 5385 + }, + { + "epoch": 0.5681434599156118, + "grad_norm": 0.6370663642883301, + "learning_rate": 0.0006007439973505707, + "loss": 1.4328, + "step": 5386 + }, + { + "epoch": 0.5682489451476793, + "grad_norm": 0.8837241530418396, + "learning_rate": 0.0006004979678306729, + "loss": 1.4262, + "step": 5387 + }, + { + "epoch": 0.5683544303797469, + "grad_norm": 0.6611368656158447, + "learning_rate": 0.0006002519550632232, + "loss": 1.4155, + "step": 5388 + }, + { + "epoch": 0.5684599156118143, + "grad_norm": 0.7695158123970032, + "learning_rate": 0.0006000059590757886, + "loss": 1.4134, + "step": 5389 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.6888339519500732, + "learning_rate": 0.0005997599798959343, + "loss": 1.4112, + "step": 5390 + }, + { + "epoch": 0.5686708860759494, + "grad_norm": 0.7656731009483337, + "learning_rate": 0.0005995140175512233, + "loss": 1.3983, + "step": 5391 + }, + { + "epoch": 0.5687763713080168, + "grad_norm": 0.6325889229774475, + "learning_rate": 0.000599268072069217, + "loss": 1.4021, + "step": 5392 + }, + { + "epoch": 0.5688818565400844, + "grad_norm": 0.6996787786483765, + "learning_rate": 0.0005990221434774751, + "loss": 1.4268, + "step": 5393 + }, + { + "epoch": 0.5689873417721519, + "grad_norm": 0.6612675786018372, + "learning_rate": 0.0005987762318035546, + "loss": 1.4091, + "step": 5394 + }, + { + "epoch": 0.5690928270042194, + "grad_norm": 0.6534438133239746, + "learning_rate": 0.0005985303370750115, + "loss": 1.4019, + "step": 5395 + }, + { + "epoch": 0.5691983122362869, + "grad_norm": 0.6578928232192993, + "learning_rate": 0.0005982844593193995, + "loss": 1.4062, + "step": 5396 + }, + { + "epoch": 0.5693037974683545, + "grad_norm": 0.6423414945602417, + "learning_rate": 0.0005980385985642703, + "loss": 1.433, + "step": 5397 + }, + { + "epoch": 0.5694092827004219, + "grad_norm": 0.6147738695144653, + "learning_rate": 0.000597792754837174, + "loss": 1.4091, + "step": 5398 + }, + { + "epoch": 0.5695147679324895, + "grad_norm": 0.7217223048210144, + "learning_rate": 0.0005975469281656581, + "loss": 1.3929, + "step": 5399 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.6192857623100281, + "learning_rate": 0.0005973011185772694, + "loss": 1.3848, + "step": 5400 + }, + { + "epoch": 0.5697257383966244, + "grad_norm": 0.6820330619812012, + "learning_rate": 0.0005970553260995517, + "loss": 1.4155, + "step": 5401 + }, + { + "epoch": 0.569831223628692, + "grad_norm": 0.6819541454315186, + "learning_rate": 0.0005968095507600476, + "loss": 1.408, + "step": 5402 + }, + { + "epoch": 0.5699367088607595, + "grad_norm": 0.7147498726844788, + "learning_rate": 0.000596563792586297, + "loss": 1.419, + "step": 5403 + }, + { + "epoch": 0.570042194092827, + "grad_norm": 0.6410568952560425, + "learning_rate": 0.0005963180516058386, + "loss": 1.4563, + "step": 5404 + }, + { + "epoch": 0.5701476793248945, + "grad_norm": 0.6773163676261902, + "learning_rate": 0.0005960723278462086, + "loss": 1.3948, + "step": 5405 + }, + { + "epoch": 0.5702531645569621, + "grad_norm": 0.649254560470581, + "learning_rate": 0.0005958266213349422, + "loss": 1.3693, + "step": 5406 + }, + { + "epoch": 0.5703586497890295, + "grad_norm": 0.6139540672302246, + "learning_rate": 0.0005955809320995714, + "loss": 1.3932, + "step": 5407 + }, + { + "epoch": 0.570464135021097, + "grad_norm": 0.6710228323936462, + "learning_rate": 0.0005953352601676272, + "loss": 1.4124, + "step": 5408 + }, + { + "epoch": 0.5705696202531646, + "grad_norm": 0.6418141722679138, + "learning_rate": 0.0005950896055666384, + "loss": 1.4282, + "step": 5409 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.6394287347793579, + "learning_rate": 0.0005948439683241318, + "loss": 1.3877, + "step": 5410 + }, + { + "epoch": 0.5707805907172996, + "grad_norm": 0.6690812110900879, + "learning_rate": 0.0005945983484676321, + "loss": 1.4022, + "step": 5411 + }, + { + "epoch": 0.5708860759493671, + "grad_norm": 0.6466752886772156, + "learning_rate": 0.0005943527460246625, + "loss": 1.4084, + "step": 5412 + }, + { + "epoch": 0.5709915611814346, + "grad_norm": 0.6500775814056396, + "learning_rate": 0.0005941071610227437, + "loss": 1.4272, + "step": 5413 + }, + { + "epoch": 0.5710970464135021, + "grad_norm": 0.7073416113853455, + "learning_rate": 0.000593861593489395, + "loss": 1.3651, + "step": 5414 + }, + { + "epoch": 0.5712025316455697, + "grad_norm": 0.6489139795303345, + "learning_rate": 0.000593616043452133, + "loss": 1.44, + "step": 5415 + }, + { + "epoch": 0.5713080168776371, + "grad_norm": 0.6206156015396118, + "learning_rate": 0.0005933705109384735, + "loss": 1.3957, + "step": 5416 + }, + { + "epoch": 0.5714135021097047, + "grad_norm": 0.6445473432540894, + "learning_rate": 0.000593124995975929, + "loss": 1.413, + "step": 5417 + }, + { + "epoch": 0.5715189873417722, + "grad_norm": 0.6173402070999146, + "learning_rate": 0.000592879498592011, + "loss": 1.4084, + "step": 5418 + }, + { + "epoch": 0.5716244725738396, + "grad_norm": 0.6462146639823914, + "learning_rate": 0.0005926340188142289, + "loss": 1.3806, + "step": 5419 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.6155890822410583, + "learning_rate": 0.0005923885566700896, + "loss": 1.3936, + "step": 5420 + }, + { + "epoch": 0.5718354430379747, + "grad_norm": 0.6224215626716614, + "learning_rate": 0.0005921431121870984, + "loss": 1.4327, + "step": 5421 + }, + { + "epoch": 0.5719409282700422, + "grad_norm": 0.6403200030326843, + "learning_rate": 0.0005918976853927586, + "loss": 1.4023, + "step": 5422 + }, + { + "epoch": 0.5720464135021097, + "grad_norm": 0.6395169496536255, + "learning_rate": 0.0005916522763145715, + "loss": 1.3604, + "step": 5423 + }, + { + "epoch": 0.5721518987341773, + "grad_norm": 0.664604902267456, + "learning_rate": 0.0005914068849800365, + "loss": 1.3738, + "step": 5424 + }, + { + "epoch": 0.5722573839662447, + "grad_norm": 0.7171845436096191, + "learning_rate": 0.0005911615114166508, + "loss": 1.4072, + "step": 5425 + }, + { + "epoch": 0.5723628691983123, + "grad_norm": 0.6371894478797913, + "learning_rate": 0.0005909161556519096, + "loss": 1.4154, + "step": 5426 + }, + { + "epoch": 0.5724683544303798, + "grad_norm": 0.6299468874931335, + "learning_rate": 0.0005906708177133066, + "loss": 1.428, + "step": 5427 + }, + { + "epoch": 0.5725738396624472, + "grad_norm": 0.7427091598510742, + "learning_rate": 0.0005904254976283331, + "loss": 1.4019, + "step": 5428 + }, + { + "epoch": 0.5726793248945148, + "grad_norm": 0.6261228322982788, + "learning_rate": 0.0005901801954244782, + "loss": 1.398, + "step": 5429 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.7157846689224243, + "learning_rate": 0.0005899349111292293, + "loss": 1.3775, + "step": 5430 + }, + { + "epoch": 0.5728902953586498, + "grad_norm": 0.63222736120224, + "learning_rate": 0.0005896896447700718, + "loss": 1.422, + "step": 5431 + }, + { + "epoch": 0.5729957805907173, + "grad_norm": 0.699333131313324, + "learning_rate": 0.0005894443963744891, + "loss": 1.3816, + "step": 5432 + }, + { + "epoch": 0.5731012658227848, + "grad_norm": 0.6859641671180725, + "learning_rate": 0.0005891991659699622, + "loss": 1.3735, + "step": 5433 + }, + { + "epoch": 0.5732067510548523, + "grad_norm": 0.6763629913330078, + "learning_rate": 0.0005889539535839704, + "loss": 1.4471, + "step": 5434 + }, + { + "epoch": 0.5733122362869199, + "grad_norm": 0.6604684591293335, + "learning_rate": 0.0005887087592439914, + "loss": 1.3659, + "step": 5435 + }, + { + "epoch": 0.5734177215189873, + "grad_norm": 0.6296084523200989, + "learning_rate": 0.0005884635829775002, + "loss": 1.383, + "step": 5436 + }, + { + "epoch": 0.5735232067510548, + "grad_norm": 0.7377481460571289, + "learning_rate": 0.00058821842481197, + "loss": 1.3774, + "step": 5437 + }, + { + "epoch": 0.5736286919831224, + "grad_norm": 0.6678453087806702, + "learning_rate": 0.0005879732847748721, + "loss": 1.4228, + "step": 5438 + }, + { + "epoch": 0.5737341772151898, + "grad_norm": 0.6781397461891174, + "learning_rate": 0.0005877281628936756, + "loss": 1.4109, + "step": 5439 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.6540431976318359, + "learning_rate": 0.0005874830591958474, + "loss": 1.3974, + "step": 5440 + }, + { + "epoch": 0.5739451476793249, + "grad_norm": 0.6359700560569763, + "learning_rate": 0.000587237973708853, + "loss": 1.4129, + "step": 5441 + }, + { + "epoch": 0.5740506329113924, + "grad_norm": 0.6446060538291931, + "learning_rate": 0.0005869929064601551, + "loss": 1.4087, + "step": 5442 + }, + { + "epoch": 0.5741561181434599, + "grad_norm": 0.6775708198547363, + "learning_rate": 0.0005867478574772147, + "loss": 1.4081, + "step": 5443 + }, + { + "epoch": 0.5742616033755275, + "grad_norm": 0.7083680629730225, + "learning_rate": 0.0005865028267874911, + "loss": 1.4085, + "step": 5444 + }, + { + "epoch": 0.5743670886075949, + "grad_norm": 0.6007539629936218, + "learning_rate": 0.0005862578144184412, + "loss": 1.3784, + "step": 5445 + }, + { + "epoch": 0.5744725738396624, + "grad_norm": 0.6667211055755615, + "learning_rate": 0.0005860128203975196, + "loss": 1.4191, + "step": 5446 + }, + { + "epoch": 0.57457805907173, + "grad_norm": 0.6527398824691772, + "learning_rate": 0.0005857678447521791, + "loss": 1.4233, + "step": 5447 + }, + { + "epoch": 0.5746835443037974, + "grad_norm": 0.6417278051376343, + "learning_rate": 0.0005855228875098706, + "loss": 1.4119, + "step": 5448 + }, + { + "epoch": 0.574789029535865, + "grad_norm": 0.6442406177520752, + "learning_rate": 0.0005852779486980427, + "loss": 1.404, + "step": 5449 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6723670363426208, + "learning_rate": 0.000585033028344142, + "loss": 1.3835, + "step": 5450 + }, + { + "epoch": 0.575, + "grad_norm": 0.6802369952201843, + "learning_rate": 0.0005847881264756131, + "loss": 1.4072, + "step": 5451 + }, + { + "epoch": 0.5751054852320675, + "grad_norm": 0.6730881929397583, + "learning_rate": 0.0005845432431198981, + "loss": 1.369, + "step": 5452 + }, + { + "epoch": 0.575210970464135, + "grad_norm": 0.6882953643798828, + "learning_rate": 0.0005842983783044381, + "loss": 1.43, + "step": 5453 + }, + { + "epoch": 0.5753164556962025, + "grad_norm": 0.6325315833091736, + "learning_rate": 0.0005840535320566711, + "loss": 1.4095, + "step": 5454 + }, + { + "epoch": 0.57542194092827, + "grad_norm": 0.6741186380386353, + "learning_rate": 0.0005838087044040334, + "loss": 1.4508, + "step": 5455 + }, + { + "epoch": 0.5755274261603376, + "grad_norm": 0.6510480046272278, + "learning_rate": 0.0005835638953739589, + "loss": 1.3848, + "step": 5456 + }, + { + "epoch": 0.575632911392405, + "grad_norm": 0.6398091316223145, + "learning_rate": 0.00058331910499388, + "loss": 1.4216, + "step": 5457 + }, + { + "epoch": 0.5757383966244726, + "grad_norm": 0.6015611886978149, + "learning_rate": 0.0005830743332912264, + "loss": 1.3879, + "step": 5458 + }, + { + "epoch": 0.5758438818565401, + "grad_norm": 0.6537144184112549, + "learning_rate": 0.0005828295802934263, + "loss": 1.3948, + "step": 5459 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.6434143781661987, + "learning_rate": 0.0005825848460279048, + "loss": 1.4149, + "step": 5460 + }, + { + "epoch": 0.5760548523206751, + "grad_norm": 0.7113327980041504, + "learning_rate": 0.0005823401305220865, + "loss": 1.3728, + "step": 5461 + }, + { + "epoch": 0.5761603375527427, + "grad_norm": 0.6774086952209473, + "learning_rate": 0.0005820954338033925, + "loss": 1.3756, + "step": 5462 + }, + { + "epoch": 0.5762658227848101, + "grad_norm": 0.8920131325721741, + "learning_rate": 0.0005818507558992426, + "loss": 1.4072, + "step": 5463 + }, + { + "epoch": 0.5763713080168776, + "grad_norm": 0.6473973393440247, + "learning_rate": 0.0005816060968370538, + "loss": 1.3934, + "step": 5464 + }, + { + "epoch": 0.5764767932489452, + "grad_norm": 0.864878237247467, + "learning_rate": 0.0005813614566442416, + "loss": 1.445, + "step": 5465 + }, + { + "epoch": 0.5765822784810126, + "grad_norm": 0.6897873282432556, + "learning_rate": 0.0005811168353482191, + "loss": 1.4358, + "step": 5466 + }, + { + "epoch": 0.5766877637130802, + "grad_norm": 0.9114664793014526, + "learning_rate": 0.0005808722329763974, + "loss": 1.4135, + "step": 5467 + }, + { + "epoch": 0.5767932489451477, + "grad_norm": 0.6971914172172546, + "learning_rate": 0.0005806276495561852, + "loss": 1.4111, + "step": 5468 + }, + { + "epoch": 0.5768987341772152, + "grad_norm": 0.780961275100708, + "learning_rate": 0.0005803830851149892, + "loss": 1.4138, + "step": 5469 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.7530432939529419, + "learning_rate": 0.0005801385396802146, + "loss": 1.4092, + "step": 5470 + }, + { + "epoch": 0.5771097046413503, + "grad_norm": 0.6954978704452515, + "learning_rate": 0.0005798940132792636, + "loss": 1.4152, + "step": 5471 + }, + { + "epoch": 0.5772151898734177, + "grad_norm": 0.7223736047744751, + "learning_rate": 0.0005796495059395367, + "loss": 1.4023, + "step": 5472 + }, + { + "epoch": 0.5773206751054852, + "grad_norm": 0.6742898225784302, + "learning_rate": 0.0005794050176884321, + "loss": 1.3767, + "step": 5473 + }, + { + "epoch": 0.5774261603375528, + "grad_norm": 0.7327281832695007, + "learning_rate": 0.0005791605485533459, + "loss": 1.3859, + "step": 5474 + }, + { + "epoch": 0.5775316455696202, + "grad_norm": 0.6749832630157471, + "learning_rate": 0.0005789160985616721, + "loss": 1.3924, + "step": 5475 + }, + { + "epoch": 0.5776371308016878, + "grad_norm": 0.6998118162155151, + "learning_rate": 0.0005786716677408025, + "loss": 1.3872, + "step": 5476 + }, + { + "epoch": 0.5777426160337553, + "grad_norm": 0.6848920583724976, + "learning_rate": 0.0005784272561181269, + "loss": 1.4364, + "step": 5477 + }, + { + "epoch": 0.5778481012658228, + "grad_norm": 0.7084426283836365, + "learning_rate": 0.0005781828637210325, + "loss": 1.417, + "step": 5478 + }, + { + "epoch": 0.5779535864978903, + "grad_norm": 0.8107335567474365, + "learning_rate": 0.0005779384905769053, + "loss": 1.4213, + "step": 5479 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.6368215084075928, + "learning_rate": 0.0005776941367131282, + "loss": 1.4473, + "step": 5480 + }, + { + "epoch": 0.5781645569620253, + "grad_norm": 0.8551036715507507, + "learning_rate": 0.0005774498021570824, + "loss": 1.3702, + "step": 5481 + }, + { + "epoch": 0.5782700421940928, + "grad_norm": 0.7041740417480469, + "learning_rate": 0.0005772054869361465, + "loss": 1.4208, + "step": 5482 + }, + { + "epoch": 0.5783755274261604, + "grad_norm": 0.7164611220359802, + "learning_rate": 0.0005769611910776975, + "loss": 1.4188, + "step": 5483 + }, + { + "epoch": 0.5784810126582278, + "grad_norm": 0.7055935263633728, + "learning_rate": 0.0005767169146091098, + "loss": 1.4104, + "step": 5484 + }, + { + "epoch": 0.5785864978902954, + "grad_norm": 0.7039307951927185, + "learning_rate": 0.0005764726575577559, + "loss": 1.4096, + "step": 5485 + }, + { + "epoch": 0.5786919831223629, + "grad_norm": 0.6550725698471069, + "learning_rate": 0.0005762284199510059, + "loss": 1.3891, + "step": 5486 + }, + { + "epoch": 0.5787974683544304, + "grad_norm": 0.716495156288147, + "learning_rate": 0.000575984201816228, + "loss": 1.4233, + "step": 5487 + }, + { + "epoch": 0.5789029535864979, + "grad_norm": 0.7433140873908997, + "learning_rate": 0.0005757400031807881, + "loss": 1.4041, + "step": 5488 + }, + { + "epoch": 0.5790084388185655, + "grad_norm": 0.9735528826713562, + "learning_rate": 0.0005754958240720498, + "loss": 1.4635, + "step": 5489 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.6236480474472046, + "learning_rate": 0.0005752516645173745, + "loss": 1.3905, + "step": 5490 + }, + { + "epoch": 0.5792194092827004, + "grad_norm": 0.8303609490394592, + "learning_rate": 0.0005750075245441218, + "loss": 1.3946, + "step": 5491 + }, + { + "epoch": 0.579324894514768, + "grad_norm": 0.6118735074996948, + "learning_rate": 0.0005747634041796484, + "loss": 1.4049, + "step": 5492 + }, + { + "epoch": 0.5794303797468354, + "grad_norm": 0.6670376062393188, + "learning_rate": 0.0005745193034513092, + "loss": 1.4121, + "step": 5493 + }, + { + "epoch": 0.579535864978903, + "grad_norm": 0.6523693799972534, + "learning_rate": 0.0005742752223864573, + "loss": 1.3898, + "step": 5494 + }, + { + "epoch": 0.5796413502109705, + "grad_norm": 0.6205941438674927, + "learning_rate": 0.0005740311610124427, + "loss": 1.4301, + "step": 5495 + }, + { + "epoch": 0.579746835443038, + "grad_norm": 0.7032996416091919, + "learning_rate": 0.0005737871193566141, + "loss": 1.3766, + "step": 5496 + }, + { + "epoch": 0.5798523206751055, + "grad_norm": 0.6429977416992188, + "learning_rate": 0.0005735430974463175, + "loss": 1.3985, + "step": 5497 + }, + { + "epoch": 0.5799578059071729, + "grad_norm": 0.8221873641014099, + "learning_rate": 0.0005732990953088968, + "loss": 1.3954, + "step": 5498 + }, + { + "epoch": 0.5800632911392405, + "grad_norm": 0.7584055662155151, + "learning_rate": 0.0005730551129716936, + "loss": 1.4462, + "step": 5499 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.7783839106559753, + "learning_rate": 0.0005728111504620472, + "loss": 1.3872, + "step": 5500 + }, + { + "epoch": 0.5802742616033755, + "grad_norm": 0.7871996760368347, + "learning_rate": 0.000572567207807295, + "loss": 1.3956, + "step": 5501 + }, + { + "epoch": 0.580379746835443, + "grad_norm": 0.7614894509315491, + "learning_rate": 0.000572323285034772, + "loss": 1.4356, + "step": 5502 + }, + { + "epoch": 0.5804852320675106, + "grad_norm": 0.6881887316703796, + "learning_rate": 0.0005720793821718108, + "loss": 1.4212, + "step": 5503 + }, + { + "epoch": 0.580590717299578, + "grad_norm": 1.018066644668579, + "learning_rate": 0.0005718354992457417, + "loss": 1.3988, + "step": 5504 + }, + { + "epoch": 0.5806962025316456, + "grad_norm": 0.6227500438690186, + "learning_rate": 0.0005715916362838936, + "loss": 1.3926, + "step": 5505 + }, + { + "epoch": 0.5808016877637131, + "grad_norm": 1.066332459449768, + "learning_rate": 0.0005713477933135923, + "loss": 1.4355, + "step": 5506 + }, + { + "epoch": 0.5809071729957805, + "grad_norm": 0.6808030605316162, + "learning_rate": 0.0005711039703621616, + "loss": 1.424, + "step": 5507 + }, + { + "epoch": 0.5810126582278481, + "grad_norm": 1.30239999294281, + "learning_rate": 0.0005708601674569232, + "loss": 1.3937, + "step": 5508 + }, + { + "epoch": 0.5811181434599156, + "grad_norm": 0.7194136381149292, + "learning_rate": 0.0005706163846251961, + "loss": 1.4222, + "step": 5509 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.9675793051719666, + "learning_rate": 0.0005703726218942976, + "loss": 1.3975, + "step": 5510 + }, + { + "epoch": 0.5813291139240506, + "grad_norm": 0.7636874914169312, + "learning_rate": 0.0005701288792915427, + "loss": 1.3684, + "step": 5511 + }, + { + "epoch": 0.5814345991561182, + "grad_norm": 0.8688571453094482, + "learning_rate": 0.0005698851568442434, + "loss": 1.4065, + "step": 5512 + }, + { + "epoch": 0.5815400843881856, + "grad_norm": 0.9504701495170593, + "learning_rate": 0.0005696414545797108, + "loss": 1.4072, + "step": 5513 + }, + { + "epoch": 0.5816455696202532, + "grad_norm": 0.8840763568878174, + "learning_rate": 0.0005693977725252525, + "loss": 1.4244, + "step": 5514 + }, + { + "epoch": 0.5817510548523207, + "grad_norm": 0.9205045700073242, + "learning_rate": 0.0005691541107081743, + "loss": 1.4048, + "step": 5515 + }, + { + "epoch": 0.5818565400843881, + "grad_norm": 0.7449555993080139, + "learning_rate": 0.0005689104691557798, + "loss": 1.3646, + "step": 5516 + }, + { + "epoch": 0.5819620253164557, + "grad_norm": 0.8067421913146973, + "learning_rate": 0.0005686668478953702, + "loss": 1.3825, + "step": 5517 + }, + { + "epoch": 0.5820675105485232, + "grad_norm": 0.687185525894165, + "learning_rate": 0.0005684232469542446, + "loss": 1.4219, + "step": 5518 + }, + { + "epoch": 0.5821729957805907, + "grad_norm": 0.7876834273338318, + "learning_rate": 0.0005681796663596996, + "loss": 1.4008, + "step": 5519 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.6383615136146545, + "learning_rate": 0.0005679361061390295, + "loss": 1.4359, + "step": 5520 + }, + { + "epoch": 0.5823839662447258, + "grad_norm": 0.6534522175788879, + "learning_rate": 0.0005676925663195263, + "loss": 1.3785, + "step": 5521 + }, + { + "epoch": 0.5824894514767932, + "grad_norm": 0.7213976383209229, + "learning_rate": 0.0005674490469284805, + "loss": 1.3859, + "step": 5522 + }, + { + "epoch": 0.5825949367088608, + "grad_norm": 0.6141411066055298, + "learning_rate": 0.0005672055479931791, + "loss": 1.3838, + "step": 5523 + }, + { + "epoch": 0.5827004219409283, + "grad_norm": 0.6953026652336121, + "learning_rate": 0.0005669620695409076, + "loss": 1.4095, + "step": 5524 + }, + { + "epoch": 0.5828059071729957, + "grad_norm": 0.703486442565918, + "learning_rate": 0.000566718611598949, + "loss": 1.3874, + "step": 5525 + }, + { + "epoch": 0.5829113924050633, + "grad_norm": 0.6748758554458618, + "learning_rate": 0.0005664751741945839, + "loss": 1.3965, + "step": 5526 + }, + { + "epoch": 0.5830168776371308, + "grad_norm": 0.7350960373878479, + "learning_rate": 0.0005662317573550906, + "loss": 1.3871, + "step": 5527 + }, + { + "epoch": 0.5831223628691983, + "grad_norm": 0.6679146885871887, + "learning_rate": 0.0005659883611077453, + "loss": 1.4139, + "step": 5528 + }, + { + "epoch": 0.5832278481012658, + "grad_norm": 0.9144203066825867, + "learning_rate": 0.0005657449854798216, + "loss": 1.3841, + "step": 5529 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.7237421274185181, + "learning_rate": 0.0005655016304985908, + "loss": 1.4349, + "step": 5530 + }, + { + "epoch": 0.5834388185654008, + "grad_norm": 0.888310432434082, + "learning_rate": 0.0005652582961913227, + "loss": 1.4165, + "step": 5531 + }, + { + "epoch": 0.5835443037974684, + "grad_norm": 0.795478880405426, + "learning_rate": 0.0005650149825852836, + "loss": 1.3722, + "step": 5532 + }, + { + "epoch": 0.5836497890295359, + "grad_norm": 0.7989038825035095, + "learning_rate": 0.0005647716897077382, + "loss": 1.3685, + "step": 5533 + }, + { + "epoch": 0.5837552742616033, + "grad_norm": 0.6499375104904175, + "learning_rate": 0.0005645284175859486, + "loss": 1.3685, + "step": 5534 + }, + { + "epoch": 0.5838607594936709, + "grad_norm": 0.9774299263954163, + "learning_rate": 0.0005642851662471745, + "loss": 1.3917, + "step": 5535 + }, + { + "epoch": 0.5839662447257384, + "grad_norm": 0.6426073908805847, + "learning_rate": 0.0005640419357186738, + "loss": 1.3885, + "step": 5536 + }, + { + "epoch": 0.5840717299578059, + "grad_norm": 0.9414013624191284, + "learning_rate": 0.0005637987260277013, + "loss": 1.3828, + "step": 5537 + }, + { + "epoch": 0.5841772151898734, + "grad_norm": 0.6376734375953674, + "learning_rate": 0.0005635555372015099, + "loss": 1.4447, + "step": 5538 + }, + { + "epoch": 0.584282700421941, + "grad_norm": 0.7275689840316772, + "learning_rate": 0.0005633123692673503, + "loss": 1.3853, + "step": 5539 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.708579957485199, + "learning_rate": 0.0005630692222524709, + "loss": 1.4213, + "step": 5540 + }, + { + "epoch": 0.584493670886076, + "grad_norm": 0.6620163917541504, + "learning_rate": 0.0005628260961841171, + "loss": 1.4165, + "step": 5541 + }, + { + "epoch": 0.5845991561181435, + "grad_norm": 0.6731498837471008, + "learning_rate": 0.0005625829910895325, + "loss": 1.4074, + "step": 5542 + }, + { + "epoch": 0.5847046413502109, + "grad_norm": 0.6504212617874146, + "learning_rate": 0.0005623399069959585, + "loss": 1.4118, + "step": 5543 + }, + { + "epoch": 0.5848101265822785, + "grad_norm": 0.6403469443321228, + "learning_rate": 0.0005620968439306335, + "loss": 1.3961, + "step": 5544 + }, + { + "epoch": 0.584915611814346, + "grad_norm": 0.6475850939750671, + "learning_rate": 0.0005618538019207943, + "loss": 1.4153, + "step": 5545 + }, + { + "epoch": 0.5850210970464135, + "grad_norm": 0.6307898759841919, + "learning_rate": 0.0005616107809936746, + "loss": 1.3874, + "step": 5546 + }, + { + "epoch": 0.585126582278481, + "grad_norm": 0.7003928422927856, + "learning_rate": 0.0005613677811765062, + "loss": 1.3912, + "step": 5547 + }, + { + "epoch": 0.5852320675105486, + "grad_norm": 0.6520985960960388, + "learning_rate": 0.0005611248024965186, + "loss": 1.4271, + "step": 5548 + }, + { + "epoch": 0.585337552742616, + "grad_norm": 0.6776325702667236, + "learning_rate": 0.0005608818449809387, + "loss": 1.4195, + "step": 5549 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.6639997959136963, + "learning_rate": 0.0005606389086569911, + "loss": 1.4494, + "step": 5550 + }, + { + "epoch": 0.5855485232067511, + "grad_norm": 0.6522771120071411, + "learning_rate": 0.0005603959935518981, + "loss": 1.4421, + "step": 5551 + }, + { + "epoch": 0.5856540084388185, + "grad_norm": 0.6917178630828857, + "learning_rate": 0.0005601530996928795, + "loss": 1.4641, + "step": 5552 + }, + { + "epoch": 0.5857594936708861, + "grad_norm": 0.6337918639183044, + "learning_rate": 0.0005599102271071527, + "loss": 1.3934, + "step": 5553 + }, + { + "epoch": 0.5858649789029536, + "grad_norm": 0.7700181007385254, + "learning_rate": 0.0005596673758219327, + "loss": 1.4155, + "step": 5554 + }, + { + "epoch": 0.5859704641350211, + "grad_norm": 0.6523522734642029, + "learning_rate": 0.0005594245458644325, + "loss": 1.4191, + "step": 5555 + }, + { + "epoch": 0.5860759493670886, + "grad_norm": 0.6966722011566162, + "learning_rate": 0.0005591817372618621, + "loss": 1.3805, + "step": 5556 + }, + { + "epoch": 0.5861814345991562, + "grad_norm": 0.6304505467414856, + "learning_rate": 0.0005589389500414296, + "loss": 1.395, + "step": 5557 + }, + { + "epoch": 0.5862869198312236, + "grad_norm": 0.6307600736618042, + "learning_rate": 0.0005586961842303405, + "loss": 1.4331, + "step": 5558 + }, + { + "epoch": 0.5863924050632912, + "grad_norm": 0.8038603067398071, + "learning_rate": 0.0005584534398557977, + "loss": 1.3943, + "step": 5559 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.6917838454246521, + "learning_rate": 0.0005582107169450023, + "loss": 1.4075, + "step": 5560 + }, + { + "epoch": 0.5866033755274261, + "grad_norm": 0.6576371788978577, + "learning_rate": 0.0005579680155251524, + "loss": 1.4059, + "step": 5561 + }, + { + "epoch": 0.5867088607594937, + "grad_norm": 0.6328065991401672, + "learning_rate": 0.0005577253356234439, + "loss": 1.4153, + "step": 5562 + }, + { + "epoch": 0.5868143459915611, + "grad_norm": 0.6484145522117615, + "learning_rate": 0.0005574826772670703, + "loss": 1.3847, + "step": 5563 + }, + { + "epoch": 0.5869198312236287, + "grad_norm": 0.6495646834373474, + "learning_rate": 0.0005572400404832226, + "loss": 1.41, + "step": 5564 + }, + { + "epoch": 0.5870253164556962, + "grad_norm": 0.6660284399986267, + "learning_rate": 0.0005569974252990896, + "loss": 1.4033, + "step": 5565 + }, + { + "epoch": 0.5871308016877637, + "grad_norm": 0.6173551082611084, + "learning_rate": 0.0005567548317418576, + "loss": 1.4022, + "step": 5566 + }, + { + "epoch": 0.5872362869198312, + "grad_norm": 0.6322763562202454, + "learning_rate": 0.0005565122598387103, + "loss": 1.4314, + "step": 5567 + }, + { + "epoch": 0.5873417721518988, + "grad_norm": 0.6429388523101807, + "learning_rate": 0.0005562697096168289, + "loss": 1.3705, + "step": 5568 + }, + { + "epoch": 0.5874472573839662, + "grad_norm": 0.6114735007286072, + "learning_rate": 0.0005560271811033928, + "loss": 1.3658, + "step": 5569 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.7185675501823425, + "learning_rate": 0.0005557846743255783, + "loss": 1.3858, + "step": 5570 + }, + { + "epoch": 0.5876582278481013, + "grad_norm": 0.6354777812957764, + "learning_rate": 0.0005555421893105593, + "loss": 1.3994, + "step": 5571 + }, + { + "epoch": 0.5877637130801687, + "grad_norm": 0.7756854891777039, + "learning_rate": 0.0005552997260855077, + "loss": 1.401, + "step": 5572 + }, + { + "epoch": 0.5878691983122363, + "grad_norm": 0.7363024353981018, + "learning_rate": 0.0005550572846775927, + "loss": 1.4026, + "step": 5573 + }, + { + "epoch": 0.5879746835443038, + "grad_norm": 0.6656511425971985, + "learning_rate": 0.0005548148651139809, + "loss": 1.4003, + "step": 5574 + }, + { + "epoch": 0.5880801687763713, + "grad_norm": 0.6593628525733948, + "learning_rate": 0.0005545724674218368, + "loss": 1.4127, + "step": 5575 + }, + { + "epoch": 0.5881856540084388, + "grad_norm": 0.6742112636566162, + "learning_rate": 0.0005543300916283223, + "loss": 1.3962, + "step": 5576 + }, + { + "epoch": 0.5882911392405064, + "grad_norm": 0.6371155977249146, + "learning_rate": 0.0005540877377605968, + "loss": 1.4106, + "step": 5577 + }, + { + "epoch": 0.5883966244725738, + "grad_norm": 0.6736936569213867, + "learning_rate": 0.0005538454058458171, + "loss": 1.3906, + "step": 5578 + }, + { + "epoch": 0.5885021097046413, + "grad_norm": 0.7019063234329224, + "learning_rate": 0.0005536030959111377, + "loss": 1.4156, + "step": 5579 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.7978177070617676, + "learning_rate": 0.0005533608079837109, + "loss": 1.3622, + "step": 5580 + }, + { + "epoch": 0.5887130801687763, + "grad_norm": 0.7112578749656677, + "learning_rate": 0.0005531185420906859, + "loss": 1.3958, + "step": 5581 + }, + { + "epoch": 0.5888185654008439, + "grad_norm": 0.7679712772369385, + "learning_rate": 0.0005528762982592101, + "loss": 1.4083, + "step": 5582 + }, + { + "epoch": 0.5889240506329114, + "grad_norm": 0.798006534576416, + "learning_rate": 0.000552634076516428, + "loss": 1.3777, + "step": 5583 + }, + { + "epoch": 0.5890295358649789, + "grad_norm": 0.7655883431434631, + "learning_rate": 0.0005523918768894819, + "loss": 1.4087, + "step": 5584 + }, + { + "epoch": 0.5891350210970464, + "grad_norm": 0.7747820615768433, + "learning_rate": 0.0005521496994055112, + "loss": 1.4196, + "step": 5585 + }, + { + "epoch": 0.589240506329114, + "grad_norm": 0.7091466784477234, + "learning_rate": 0.0005519075440916534, + "loss": 1.3995, + "step": 5586 + }, + { + "epoch": 0.5893459915611814, + "grad_norm": 0.791864275932312, + "learning_rate": 0.000551665410975043, + "loss": 1.395, + "step": 5587 + }, + { + "epoch": 0.5894514767932489, + "grad_norm": 0.6646474599838257, + "learning_rate": 0.0005514233000828121, + "loss": 1.4309, + "step": 5588 + }, + { + "epoch": 0.5895569620253165, + "grad_norm": 0.7073732018470764, + "learning_rate": 0.0005511812114420908, + "loss": 1.3904, + "step": 5589 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.6582846641540527, + "learning_rate": 0.0005509391450800061, + "loss": 1.3938, + "step": 5590 + }, + { + "epoch": 0.5897679324894515, + "grad_norm": 0.7392422556877136, + "learning_rate": 0.0005506971010236829, + "loss": 1.3851, + "step": 5591 + }, + { + "epoch": 0.589873417721519, + "grad_norm": 0.730871319770813, + "learning_rate": 0.0005504550793002433, + "loss": 1.4191, + "step": 5592 + }, + { + "epoch": 0.5899789029535865, + "grad_norm": 0.6581422686576843, + "learning_rate": 0.000550213079936807, + "loss": 1.385, + "step": 5593 + }, + { + "epoch": 0.590084388185654, + "grad_norm": 0.6692684292793274, + "learning_rate": 0.0005499711029604915, + "loss": 1.4003, + "step": 5594 + }, + { + "epoch": 0.5901898734177216, + "grad_norm": 0.6784625053405762, + "learning_rate": 0.0005497291483984113, + "loss": 1.3961, + "step": 5595 + }, + { + "epoch": 0.590295358649789, + "grad_norm": 0.6458569765090942, + "learning_rate": 0.0005494872162776786, + "loss": 1.3758, + "step": 5596 + }, + { + "epoch": 0.5904008438818565, + "grad_norm": 0.6415591239929199, + "learning_rate": 0.0005492453066254032, + "loss": 1.3831, + "step": 5597 + }, + { + "epoch": 0.5905063291139241, + "grad_norm": 0.661425769329071, + "learning_rate": 0.000549003419468692, + "loss": 1.4061, + "step": 5598 + }, + { + "epoch": 0.5906118143459915, + "grad_norm": 0.6510631442070007, + "learning_rate": 0.0005487615548346502, + "loss": 1.4077, + "step": 5599 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.6516284942626953, + "learning_rate": 0.0005485197127503795, + "loss": 1.4148, + "step": 5600 + }, + { + "epoch": 0.5908227848101266, + "grad_norm": 0.7107377052307129, + "learning_rate": 0.0005482778932429798, + "loss": 1.4235, + "step": 5601 + }, + { + "epoch": 0.5909282700421941, + "grad_norm": 0.6294478178024292, + "learning_rate": 0.000548036096339548, + "loss": 1.4212, + "step": 5602 + }, + { + "epoch": 0.5910337552742616, + "grad_norm": 0.6780471801757812, + "learning_rate": 0.0005477943220671786, + "loss": 1.4112, + "step": 5603 + }, + { + "epoch": 0.5911392405063292, + "grad_norm": 0.6594567894935608, + "learning_rate": 0.0005475525704529638, + "loss": 1.3825, + "step": 5604 + }, + { + "epoch": 0.5912447257383966, + "grad_norm": 0.6820365190505981, + "learning_rate": 0.0005473108415239929, + "loss": 1.3784, + "step": 5605 + }, + { + "epoch": 0.5913502109704641, + "grad_norm": 0.6300690770149231, + "learning_rate": 0.0005470691353073531, + "loss": 1.406, + "step": 5606 + }, + { + "epoch": 0.5914556962025317, + "grad_norm": 0.6158050298690796, + "learning_rate": 0.0005468274518301284, + "loss": 1.4, + "step": 5607 + }, + { + "epoch": 0.5915611814345991, + "grad_norm": 0.6878008842468262, + "learning_rate": 0.0005465857911194006, + "loss": 1.4096, + "step": 5608 + }, + { + "epoch": 0.5916666666666667, + "grad_norm": 0.6346327662467957, + "learning_rate": 0.0005463441532022495, + "loss": 1.3836, + "step": 5609 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.6290355324745178, + "learning_rate": 0.0005461025381057516, + "loss": 1.4036, + "step": 5610 + }, + { + "epoch": 0.5918776371308017, + "grad_norm": 0.8167923092842102, + "learning_rate": 0.000545860945856981, + "loss": 1.4019, + "step": 5611 + }, + { + "epoch": 0.5919831223628692, + "grad_norm": 0.8318681716918945, + "learning_rate": 0.0005456193764830093, + "loss": 1.422, + "step": 5612 + }, + { + "epoch": 0.5920886075949368, + "grad_norm": 0.6777157783508301, + "learning_rate": 0.0005453778300109056, + "loss": 1.4058, + "step": 5613 + }, + { + "epoch": 0.5921940928270042, + "grad_norm": 0.8349748849868774, + "learning_rate": 0.0005451363064677365, + "loss": 1.413, + "step": 5614 + }, + { + "epoch": 0.5922995780590717, + "grad_norm": 0.7016420960426331, + "learning_rate": 0.0005448948058805657, + "loss": 1.4252, + "step": 5615 + }, + { + "epoch": 0.5924050632911393, + "grad_norm": 0.7748419642448425, + "learning_rate": 0.0005446533282764543, + "loss": 1.4067, + "step": 5616 + }, + { + "epoch": 0.5925105485232067, + "grad_norm": 0.7026746869087219, + "learning_rate": 0.0005444118736824617, + "loss": 1.3795, + "step": 5617 + }, + { + "epoch": 0.5926160337552743, + "grad_norm": 0.6768701672554016, + "learning_rate": 0.000544170442125644, + "loss": 1.4141, + "step": 5618 + }, + { + "epoch": 0.5927215189873418, + "grad_norm": 0.861757218837738, + "learning_rate": 0.0005439290336330545, + "loss": 1.4154, + "step": 5619 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.7025079131126404, + "learning_rate": 0.0005436876482317444, + "loss": 1.4217, + "step": 5620 + }, + { + "epoch": 0.5929324894514768, + "grad_norm": 0.7302190065383911, + "learning_rate": 0.000543446285948762, + "loss": 1.4127, + "step": 5621 + }, + { + "epoch": 0.5930379746835444, + "grad_norm": 0.6443730592727661, + "learning_rate": 0.0005432049468111534, + "loss": 1.4299, + "step": 5622 + }, + { + "epoch": 0.5931434599156118, + "grad_norm": 0.6706241369247437, + "learning_rate": 0.0005429636308459614, + "loss": 1.357, + "step": 5623 + }, + { + "epoch": 0.5932489451476793, + "grad_norm": 0.6501530408859253, + "learning_rate": 0.0005427223380802272, + "loss": 1.4191, + "step": 5624 + }, + { + "epoch": 0.5933544303797469, + "grad_norm": 0.6859035491943359, + "learning_rate": 0.0005424810685409881, + "loss": 1.4217, + "step": 5625 + }, + { + "epoch": 0.5934599156118143, + "grad_norm": 0.623621940612793, + "learning_rate": 0.0005422398222552806, + "loss": 1.4013, + "step": 5626 + }, + { + "epoch": 0.5935654008438819, + "grad_norm": 0.6405898928642273, + "learning_rate": 0.0005419985992501367, + "loss": 1.4295, + "step": 5627 + }, + { + "epoch": 0.5936708860759494, + "grad_norm": 0.6665427684783936, + "learning_rate": 0.0005417573995525871, + "loss": 1.3741, + "step": 5628 + }, + { + "epoch": 0.5937763713080169, + "grad_norm": 0.7395927309989929, + "learning_rate": 0.0005415162231896593, + "loss": 1.398, + "step": 5629 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.6489254832267761, + "learning_rate": 0.0005412750701883782, + "loss": 1.3891, + "step": 5630 + }, + { + "epoch": 0.5939873417721518, + "grad_norm": 0.7065306901931763, + "learning_rate": 0.0005410339405757665, + "loss": 1.4103, + "step": 5631 + }, + { + "epoch": 0.5940928270042194, + "grad_norm": 0.6560654044151306, + "learning_rate": 0.0005407928343788435, + "loss": 1.409, + "step": 5632 + }, + { + "epoch": 0.5941983122362869, + "grad_norm": 0.7342032194137573, + "learning_rate": 0.0005405517516246267, + "loss": 1.388, + "step": 5633 + }, + { + "epoch": 0.5943037974683544, + "grad_norm": 0.6567786931991577, + "learning_rate": 0.0005403106923401302, + "loss": 1.4122, + "step": 5634 + }, + { + "epoch": 0.5944092827004219, + "grad_norm": 0.7787832021713257, + "learning_rate": 0.0005400696565523666, + "loss": 1.3994, + "step": 5635 + }, + { + "epoch": 0.5945147679324895, + "grad_norm": 0.6588979363441467, + "learning_rate": 0.0005398286442883448, + "loss": 1.4075, + "step": 5636 + }, + { + "epoch": 0.5946202531645569, + "grad_norm": 1.0193345546722412, + "learning_rate": 0.0005395876555750712, + "loss": 1.3985, + "step": 5637 + }, + { + "epoch": 0.5947257383966245, + "grad_norm": 0.6482759714126587, + "learning_rate": 0.0005393466904395503, + "loss": 1.4492, + "step": 5638 + }, + { + "epoch": 0.594831223628692, + "grad_norm": 1.0942460298538208, + "learning_rate": 0.000539105748908783, + "loss": 1.4055, + "step": 5639 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.828848659992218, + "learning_rate": 0.0005388648310097682, + "loss": 1.4276, + "step": 5640 + }, + { + "epoch": 0.595042194092827, + "grad_norm": 0.9601547718048096, + "learning_rate": 0.0005386239367695018, + "loss": 1.4249, + "step": 5641 + }, + { + "epoch": 0.5951476793248945, + "grad_norm": 0.9062175750732422, + "learning_rate": 0.0005383830662149771, + "loss": 1.4041, + "step": 5642 + }, + { + "epoch": 0.595253164556962, + "grad_norm": 0.7310169339179993, + "learning_rate": 0.0005381422193731853, + "loss": 1.3962, + "step": 5643 + }, + { + "epoch": 0.5953586497890295, + "grad_norm": 0.8818082809448242, + "learning_rate": 0.0005379013962711143, + "loss": 1.4081, + "step": 5644 + }, + { + "epoch": 0.5954641350210971, + "grad_norm": 0.7030655741691589, + "learning_rate": 0.0005376605969357494, + "loss": 1.408, + "step": 5645 + }, + { + "epoch": 0.5955696202531645, + "grad_norm": 0.8201655745506287, + "learning_rate": 0.0005374198213940734, + "loss": 1.3904, + "step": 5646 + }, + { + "epoch": 0.5956751054852321, + "grad_norm": 0.7229016423225403, + "learning_rate": 0.0005371790696730665, + "loss": 1.4008, + "step": 5647 + }, + { + "epoch": 0.5957805907172996, + "grad_norm": 0.7129480838775635, + "learning_rate": 0.000536938341799706, + "loss": 1.4049, + "step": 5648 + }, + { + "epoch": 0.595886075949367, + "grad_norm": 0.6506049633026123, + "learning_rate": 0.0005366976378009668, + "loss": 1.4062, + "step": 5649 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.7116509675979614, + "learning_rate": 0.000536456957703821, + "loss": 1.3925, + "step": 5650 + }, + { + "epoch": 0.5960970464135021, + "grad_norm": 0.6359286308288574, + "learning_rate": 0.0005362163015352374, + "loss": 1.3915, + "step": 5651 + }, + { + "epoch": 0.5962025316455696, + "grad_norm": 0.7476492524147034, + "learning_rate": 0.0005359756693221836, + "loss": 1.3914, + "step": 5652 + }, + { + "epoch": 0.5963080168776371, + "grad_norm": 0.6707102656364441, + "learning_rate": 0.0005357350610916233, + "loss": 1.359, + "step": 5653 + }, + { + "epoch": 0.5964135021097047, + "grad_norm": 0.6901325583457947, + "learning_rate": 0.0005354944768705179, + "loss": 1.3871, + "step": 5654 + }, + { + "epoch": 0.5965189873417721, + "grad_norm": 0.84684157371521, + "learning_rate": 0.0005352539166858258, + "loss": 1.4179, + "step": 5655 + }, + { + "epoch": 0.5966244725738397, + "grad_norm": 0.6219273209571838, + "learning_rate": 0.0005350133805645034, + "loss": 1.4181, + "step": 5656 + }, + { + "epoch": 0.5967299578059072, + "grad_norm": 0.8365927338600159, + "learning_rate": 0.0005347728685335036, + "loss": 1.4225, + "step": 5657 + }, + { + "epoch": 0.5968354430379746, + "grad_norm": 0.6942576766014099, + "learning_rate": 0.0005345323806197771, + "loss": 1.4425, + "step": 5658 + }, + { + "epoch": 0.5969409282700422, + "grad_norm": 0.8097490072250366, + "learning_rate": 0.0005342919168502717, + "loss": 1.3877, + "step": 5659 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.6080962419509888, + "learning_rate": 0.0005340514772519324, + "loss": 1.3907, + "step": 5660 + }, + { + "epoch": 0.5971518987341772, + "grad_norm": 0.845220148563385, + "learning_rate": 0.0005338110618517022, + "loss": 1.4351, + "step": 5661 + }, + { + "epoch": 0.5972573839662447, + "grad_norm": 0.7442367076873779, + "learning_rate": 0.0005335706706765205, + "loss": 1.3763, + "step": 5662 + }, + { + "epoch": 0.5973628691983123, + "grad_norm": 0.826662540435791, + "learning_rate": 0.0005333303037533244, + "loss": 1.431, + "step": 5663 + }, + { + "epoch": 0.5974683544303797, + "grad_norm": 0.9244993925094604, + "learning_rate": 0.0005330899611090482, + "loss": 1.4079, + "step": 5664 + }, + { + "epoch": 0.5975738396624473, + "grad_norm": 0.6779008507728577, + "learning_rate": 0.0005328496427706235, + "loss": 1.4046, + "step": 5665 + }, + { + "epoch": 0.5976793248945148, + "grad_norm": 1.0463205575942993, + "learning_rate": 0.000532609348764979, + "loss": 1.3744, + "step": 5666 + }, + { + "epoch": 0.5977848101265822, + "grad_norm": 0.6638580560684204, + "learning_rate": 0.0005323690791190412, + "loss": 1.429, + "step": 5667 + }, + { + "epoch": 0.5978902953586498, + "grad_norm": 0.8359646797180176, + "learning_rate": 0.0005321288338597327, + "loss": 1.4386, + "step": 5668 + }, + { + "epoch": 0.5979957805907173, + "grad_norm": 0.6452757716178894, + "learning_rate": 0.0005318886130139753, + "loss": 1.4091, + "step": 5669 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.8084726929664612, + "learning_rate": 0.0005316484166086863, + "loss": 1.3804, + "step": 5670 + }, + { + "epoch": 0.5982067510548523, + "grad_norm": 0.6868160963058472, + "learning_rate": 0.0005314082446707811, + "loss": 1.4164, + "step": 5671 + }, + { + "epoch": 0.5983122362869199, + "grad_norm": 0.7141164541244507, + "learning_rate": 0.000531168097227172, + "loss": 1.3945, + "step": 5672 + }, + { + "epoch": 0.5984177215189873, + "grad_norm": 0.6862460374832153, + "learning_rate": 0.0005309279743047687, + "loss": 1.4395, + "step": 5673 + }, + { + "epoch": 0.5985232067510549, + "grad_norm": 0.6516944766044617, + "learning_rate": 0.0005306878759304785, + "loss": 1.4245, + "step": 5674 + }, + { + "epoch": 0.5986286919831224, + "grad_norm": 0.8340885639190674, + "learning_rate": 0.0005304478021312053, + "loss": 1.3857, + "step": 5675 + }, + { + "epoch": 0.5987341772151898, + "grad_norm": 0.7101006507873535, + "learning_rate": 0.0005302077529338507, + "loss": 1.43, + "step": 5676 + }, + { + "epoch": 0.5988396624472574, + "grad_norm": 0.7908767461776733, + "learning_rate": 0.0005299677283653128, + "loss": 1.3777, + "step": 5677 + }, + { + "epoch": 0.5989451476793249, + "grad_norm": 0.6430172920227051, + "learning_rate": 0.0005297277284524888, + "loss": 1.4296, + "step": 5678 + }, + { + "epoch": 0.5990506329113924, + "grad_norm": 0.7242131233215332, + "learning_rate": 0.0005294877532222709, + "loss": 1.3694, + "step": 5679 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.700796365737915, + "learning_rate": 0.00052924780270155, + "loss": 1.4246, + "step": 5680 + }, + { + "epoch": 0.5992616033755275, + "grad_norm": 0.6670539379119873, + "learning_rate": 0.0005290078769172135, + "loss": 1.3977, + "step": 5681 + }, + { + "epoch": 0.5993670886075949, + "grad_norm": 0.7540626525878906, + "learning_rate": 0.0005287679758961465, + "loss": 1.3878, + "step": 5682 + }, + { + "epoch": 0.5994725738396625, + "grad_norm": 0.6238296031951904, + "learning_rate": 0.0005285280996652308, + "loss": 1.387, + "step": 5683 + }, + { + "epoch": 0.59957805907173, + "grad_norm": 0.6538301706314087, + "learning_rate": 0.0005282882482513459, + "loss": 1.369, + "step": 5684 + }, + { + "epoch": 0.5996835443037974, + "grad_norm": 0.6471708416938782, + "learning_rate": 0.0005280484216813686, + "loss": 1.3883, + "step": 5685 + }, + { + "epoch": 0.599789029535865, + "grad_norm": 0.6773375272750854, + "learning_rate": 0.0005278086199821718, + "loss": 1.4138, + "step": 5686 + }, + { + "epoch": 0.5998945147679325, + "grad_norm": 0.7484108209609985, + "learning_rate": 0.0005275688431806274, + "loss": 1.3893, + "step": 5687 + }, + { + "epoch": 0.6, + "grad_norm": 0.6613248586654663, + "learning_rate": 0.0005273290913036033, + "loss": 1.3767, + "step": 5688 + }, + { + "epoch": 0.6001054852320675, + "grad_norm": 0.6638107299804688, + "learning_rate": 0.0005270893643779649, + "loss": 1.3674, + "step": 5689 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.6958801746368408, + "learning_rate": 0.0005268496624305747, + "loss": 1.3936, + "step": 5690 + }, + { + "epoch": 0.6003164556962025, + "grad_norm": 0.6363433003425598, + "learning_rate": 0.0005266099854882927, + "loss": 1.4181, + "step": 5691 + }, + { + "epoch": 0.6004219409282701, + "grad_norm": 0.6441810131072998, + "learning_rate": 0.0005263703335779755, + "loss": 1.3597, + "step": 5692 + }, + { + "epoch": 0.6005274261603376, + "grad_norm": 0.6455473303794861, + "learning_rate": 0.0005261307067264778, + "loss": 1.4035, + "step": 5693 + }, + { + "epoch": 0.600632911392405, + "grad_norm": 0.6394314765930176, + "learning_rate": 0.0005258911049606503, + "loss": 1.3795, + "step": 5694 + }, + { + "epoch": 0.6007383966244726, + "grad_norm": 0.6229532361030579, + "learning_rate": 0.0005256515283073422, + "loss": 1.3523, + "step": 5695 + }, + { + "epoch": 0.60084388185654, + "grad_norm": 0.6635313034057617, + "learning_rate": 0.0005254119767933992, + "loss": 1.4113, + "step": 5696 + }, + { + "epoch": 0.6009493670886076, + "grad_norm": 0.7354600429534912, + "learning_rate": 0.0005251724504456641, + "loss": 1.4045, + "step": 5697 + }, + { + "epoch": 0.6010548523206751, + "grad_norm": 0.6408798098564148, + "learning_rate": 0.000524932949290977, + "loss": 1.4034, + "step": 5698 + }, + { + "epoch": 0.6011603375527426, + "grad_norm": 0.6640610098838806, + "learning_rate": 0.0005246934733561751, + "loss": 1.3692, + "step": 5699 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.741080641746521, + "learning_rate": 0.0005244540226680931, + "loss": 1.4201, + "step": 5700 + }, + { + "epoch": 0.6013713080168777, + "grad_norm": 0.6378716230392456, + "learning_rate": 0.0005242145972535625, + "loss": 1.3805, + "step": 5701 + }, + { + "epoch": 0.6014767932489451, + "grad_norm": 0.7041357755661011, + "learning_rate": 0.0005239751971394122, + "loss": 1.4041, + "step": 5702 + }, + { + "epoch": 0.6015822784810126, + "grad_norm": 0.6734389066696167, + "learning_rate": 0.0005237358223524678, + "loss": 1.4203, + "step": 5703 + }, + { + "epoch": 0.6016877637130802, + "grad_norm": 0.6539387106895447, + "learning_rate": 0.000523496472919553, + "loss": 1.3856, + "step": 5704 + }, + { + "epoch": 0.6017932489451476, + "grad_norm": 0.6523916125297546, + "learning_rate": 0.000523257148867488, + "loss": 1.4056, + "step": 5705 + }, + { + "epoch": 0.6018987341772152, + "grad_norm": 0.6837280988693237, + "learning_rate": 0.00052301785022309, + "loss": 1.3709, + "step": 5706 + }, + { + "epoch": 0.6020042194092827, + "grad_norm": 0.6713520288467407, + "learning_rate": 0.0005227785770131737, + "loss": 1.3761, + "step": 5707 + }, + { + "epoch": 0.6021097046413502, + "grad_norm": 0.7121042013168335, + "learning_rate": 0.0005225393292645509, + "loss": 1.3889, + "step": 5708 + }, + { + "epoch": 0.6022151898734177, + "grad_norm": 0.648698627948761, + "learning_rate": 0.0005223001070040305, + "loss": 1.3914, + "step": 5709 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.7197858691215515, + "learning_rate": 0.0005220609102584185, + "loss": 1.4422, + "step": 5710 + }, + { + "epoch": 0.6024261603375527, + "grad_norm": 0.6782214045524597, + "learning_rate": 0.0005218217390545181, + "loss": 1.4151, + "step": 5711 + }, + { + "epoch": 0.6025316455696202, + "grad_norm": 0.6070482730865479, + "learning_rate": 0.0005215825934191293, + "loss": 1.3938, + "step": 5712 + }, + { + "epoch": 0.6026371308016878, + "grad_norm": 0.714853823184967, + "learning_rate": 0.0005213434733790503, + "loss": 1.4035, + "step": 5713 + }, + { + "epoch": 0.6027426160337552, + "grad_norm": 0.6143189668655396, + "learning_rate": 0.0005211043789610752, + "loss": 1.3793, + "step": 5714 + }, + { + "epoch": 0.6028481012658228, + "grad_norm": 0.6311516165733337, + "learning_rate": 0.0005208653101919959, + "loss": 1.3789, + "step": 5715 + }, + { + "epoch": 0.6029535864978903, + "grad_norm": 0.6627320051193237, + "learning_rate": 0.0005206262670986012, + "loss": 1.424, + "step": 5716 + }, + { + "epoch": 0.6030590717299578, + "grad_norm": 0.6285845041275024, + "learning_rate": 0.0005203872497076768, + "loss": 1.4307, + "step": 5717 + }, + { + "epoch": 0.6031645569620253, + "grad_norm": 0.6529867053031921, + "learning_rate": 0.0005201482580460063, + "loss": 1.3903, + "step": 5718 + }, + { + "epoch": 0.6032700421940929, + "grad_norm": 0.6103250980377197, + "learning_rate": 0.0005199092921403696, + "loss": 1.3889, + "step": 5719 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.652704656124115, + "learning_rate": 0.0005196703520175437, + "loss": 1.3928, + "step": 5720 + }, + { + "epoch": 0.6034810126582278, + "grad_norm": 0.6387331485748291, + "learning_rate": 0.0005194314377043037, + "loss": 1.3907, + "step": 5721 + }, + { + "epoch": 0.6035864978902954, + "grad_norm": 0.6464886665344238, + "learning_rate": 0.0005191925492274205, + "loss": 1.4017, + "step": 5722 + }, + { + "epoch": 0.6036919831223628, + "grad_norm": 0.6801614761352539, + "learning_rate": 0.0005189536866136634, + "loss": 1.3948, + "step": 5723 + }, + { + "epoch": 0.6037974683544304, + "grad_norm": 0.7700284123420715, + "learning_rate": 0.0005187148498897977, + "loss": 1.4151, + "step": 5724 + }, + { + "epoch": 0.6039029535864979, + "grad_norm": 0.6490762829780579, + "learning_rate": 0.0005184760390825865, + "loss": 1.3894, + "step": 5725 + }, + { + "epoch": 0.6040084388185654, + "grad_norm": 0.7173976898193359, + "learning_rate": 0.0005182372542187895, + "loss": 1.4107, + "step": 5726 + }, + { + "epoch": 0.6041139240506329, + "grad_norm": 0.6698790192604065, + "learning_rate": 0.0005179984953251639, + "loss": 1.3782, + "step": 5727 + }, + { + "epoch": 0.6042194092827005, + "grad_norm": 0.8211134672164917, + "learning_rate": 0.0005177597624284637, + "loss": 1.3916, + "step": 5728 + }, + { + "epoch": 0.6043248945147679, + "grad_norm": 0.6143693923950195, + "learning_rate": 0.00051752105555544, + "loss": 1.3965, + "step": 5729 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.6561517715454102, + "learning_rate": 0.0005172823747328415, + "loss": 1.4114, + "step": 5730 + }, + { + "epoch": 0.604535864978903, + "grad_norm": 0.7434552907943726, + "learning_rate": 0.0005170437199874132, + "loss": 1.3949, + "step": 5731 + }, + { + "epoch": 0.6046413502109704, + "grad_norm": 0.6578709483146667, + "learning_rate": 0.0005168050913458977, + "loss": 1.3977, + "step": 5732 + }, + { + "epoch": 0.604746835443038, + "grad_norm": 0.6564269661903381, + "learning_rate": 0.0005165664888350347, + "loss": 1.4034, + "step": 5733 + }, + { + "epoch": 0.6048523206751055, + "grad_norm": 0.9355392456054688, + "learning_rate": 0.0005163279124815605, + "loss": 1.3704, + "step": 5734 + }, + { + "epoch": 0.604957805907173, + "grad_norm": 0.6640325784683228, + "learning_rate": 0.000516089362312209, + "loss": 1.3891, + "step": 5735 + }, + { + "epoch": 0.6050632911392405, + "grad_norm": 0.7309795618057251, + "learning_rate": 0.0005158508383537109, + "loss": 1.4158, + "step": 5736 + }, + { + "epoch": 0.6051687763713081, + "grad_norm": 0.6601307988166809, + "learning_rate": 0.0005156123406327938, + "loss": 1.3862, + "step": 5737 + }, + { + "epoch": 0.6052742616033755, + "grad_norm": 0.6769749522209167, + "learning_rate": 0.0005153738691761826, + "loss": 1.3852, + "step": 5738 + }, + { + "epoch": 0.605379746835443, + "grad_norm": 0.6912696361541748, + "learning_rate": 0.0005151354240105994, + "loss": 1.4356, + "step": 5739 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.650248110294342, + "learning_rate": 0.0005148970051627632, + "loss": 1.3711, + "step": 5740 + }, + { + "epoch": 0.605590717299578, + "grad_norm": 0.730765163898468, + "learning_rate": 0.0005146586126593898, + "loss": 1.3699, + "step": 5741 + }, + { + "epoch": 0.6056962025316456, + "grad_norm": 0.7084028124809265, + "learning_rate": 0.0005144202465271922, + "loss": 1.4012, + "step": 5742 + }, + { + "epoch": 0.6058016877637131, + "grad_norm": 0.6682066321372986, + "learning_rate": 0.000514181906792881, + "loss": 1.3592, + "step": 5743 + }, + { + "epoch": 0.6059071729957806, + "grad_norm": 0.6572228670120239, + "learning_rate": 0.0005139435934831628, + "loss": 1.361, + "step": 5744 + }, + { + "epoch": 0.6060126582278481, + "grad_norm": 0.6097766160964966, + "learning_rate": 0.0005137053066247421, + "loss": 1.3599, + "step": 5745 + }, + { + "epoch": 0.6061181434599157, + "grad_norm": 0.6087627410888672, + "learning_rate": 0.00051346704624432, + "loss": 1.3704, + "step": 5746 + }, + { + "epoch": 0.6062236286919831, + "grad_norm": 0.6655430793762207, + "learning_rate": 0.000513228812368595, + "loss": 1.3697, + "step": 5747 + }, + { + "epoch": 0.6063291139240506, + "grad_norm": 0.6350154876708984, + "learning_rate": 0.0005129906050242622, + "loss": 1.3849, + "step": 5748 + }, + { + "epoch": 0.6064345991561182, + "grad_norm": 0.6980540156364441, + "learning_rate": 0.0005127524242380139, + "loss": 1.3951, + "step": 5749 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.6589457392692566, + "learning_rate": 0.0005125142700365394, + "loss": 1.3305, + "step": 5750 + }, + { + "epoch": 0.6066455696202532, + "grad_norm": 0.6107281446456909, + "learning_rate": 0.0005122761424465254, + "loss": 1.347, + "step": 5751 + }, + { + "epoch": 0.6067510548523207, + "grad_norm": 0.6679951548576355, + "learning_rate": 0.0005120380414946546, + "loss": 1.3589, + "step": 5752 + }, + { + "epoch": 0.6068565400843882, + "grad_norm": 0.6270852088928223, + "learning_rate": 0.0005117999672076081, + "loss": 1.4045, + "step": 5753 + }, + { + "epoch": 0.6069620253164557, + "grad_norm": 0.6593340635299683, + "learning_rate": 0.0005115619196120632, + "loss": 1.3717, + "step": 5754 + }, + { + "epoch": 0.6070675105485233, + "grad_norm": 0.7065010666847229, + "learning_rate": 0.0005113238987346939, + "loss": 1.3985, + "step": 5755 + }, + { + "epoch": 0.6071729957805907, + "grad_norm": 0.7142382860183716, + "learning_rate": 0.000511085904602172, + "loss": 1.3653, + "step": 5756 + }, + { + "epoch": 0.6072784810126582, + "grad_norm": 0.6407605409622192, + "learning_rate": 0.0005108479372411658, + "loss": 1.3552, + "step": 5757 + }, + { + "epoch": 0.6073839662447258, + "grad_norm": 0.6300130486488342, + "learning_rate": 0.0005106099966783409, + "loss": 1.4019, + "step": 5758 + }, + { + "epoch": 0.6074894514767932, + "grad_norm": 0.6927473545074463, + "learning_rate": 0.0005103720829403594, + "loss": 1.3939, + "step": 5759 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.6372698545455933, + "learning_rate": 0.000510134196053881, + "loss": 1.3923, + "step": 5760 + }, + { + "epoch": 0.6077004219409282, + "grad_norm": 0.6252440810203552, + "learning_rate": 0.000509896336045562, + "loss": 1.3525, + "step": 5761 + }, + { + "epoch": 0.6078059071729958, + "grad_norm": 0.6262531280517578, + "learning_rate": 0.0005096585029420556, + "loss": 1.395, + "step": 5762 + }, + { + "epoch": 0.6079113924050633, + "grad_norm": 0.6415292024612427, + "learning_rate": 0.0005094206967700127, + "loss": 1.3748, + "step": 5763 + }, + { + "epoch": 0.6080168776371307, + "grad_norm": 0.6434293985366821, + "learning_rate": 0.0005091829175560801, + "loss": 1.3766, + "step": 5764 + }, + { + "epoch": 0.6081223628691983, + "grad_norm": 0.6616995930671692, + "learning_rate": 0.0005089451653269026, + "loss": 1.3702, + "step": 5765 + }, + { + "epoch": 0.6082278481012658, + "grad_norm": 0.6462805271148682, + "learning_rate": 0.0005087074401091212, + "loss": 1.4035, + "step": 5766 + }, + { + "epoch": 0.6083333333333333, + "grad_norm": 0.6304334998130798, + "learning_rate": 0.0005084697419293746, + "loss": 1.3887, + "step": 5767 + }, + { + "epoch": 0.6084388185654008, + "grad_norm": 0.783901572227478, + "learning_rate": 0.0005082320708142975, + "loss": 1.37, + "step": 5768 + }, + { + "epoch": 0.6085443037974684, + "grad_norm": 0.725447416305542, + "learning_rate": 0.0005079944267905226, + "loss": 1.3838, + "step": 5769 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.7859946489334106, + "learning_rate": 0.0005077568098846789, + "loss": 1.401, + "step": 5770 + }, + { + "epoch": 0.6087552742616034, + "grad_norm": 0.748180091381073, + "learning_rate": 0.0005075192201233924, + "loss": 1.3703, + "step": 5771 + }, + { + "epoch": 0.6088607594936709, + "grad_norm": 0.6217761635780334, + "learning_rate": 0.0005072816575332864, + "loss": 1.3911, + "step": 5772 + }, + { + "epoch": 0.6089662447257383, + "grad_norm": 0.6283357739448547, + "learning_rate": 0.0005070441221409811, + "loss": 1.4001, + "step": 5773 + }, + { + "epoch": 0.6090717299578059, + "grad_norm": 0.6415743231773376, + "learning_rate": 0.0005068066139730936, + "loss": 1.3822, + "step": 5774 + }, + { + "epoch": 0.6091772151898734, + "grad_norm": 0.6401770114898682, + "learning_rate": 0.0005065691330562375, + "loss": 1.3684, + "step": 5775 + }, + { + "epoch": 0.6092827004219409, + "grad_norm": 0.6380758881568909, + "learning_rate": 0.0005063316794170239, + "loss": 1.4092, + "step": 5776 + }, + { + "epoch": 0.6093881856540084, + "grad_norm": 0.6779094338417053, + "learning_rate": 0.0005060942530820607, + "loss": 1.3836, + "step": 5777 + }, + { + "epoch": 0.609493670886076, + "grad_norm": 0.65826815366745, + "learning_rate": 0.0005058568540779526, + "loss": 1.408, + "step": 5778 + }, + { + "epoch": 0.6095991561181434, + "grad_norm": 0.6661471724510193, + "learning_rate": 0.0005056194824313015, + "loss": 1.387, + "step": 5779 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.6362476944923401, + "learning_rate": 0.000505382138168706, + "loss": 1.3982, + "step": 5780 + }, + { + "epoch": 0.6098101265822785, + "grad_norm": 0.6875445246696472, + "learning_rate": 0.0005051448213167614, + "loss": 1.3913, + "step": 5781 + }, + { + "epoch": 0.609915611814346, + "grad_norm": 0.6469948887825012, + "learning_rate": 0.0005049075319020608, + "loss": 1.3941, + "step": 5782 + }, + { + "epoch": 0.6100210970464135, + "grad_norm": 0.633079469203949, + "learning_rate": 0.0005046702699511933, + "loss": 1.4129, + "step": 5783 + }, + { + "epoch": 0.610126582278481, + "grad_norm": 0.6183626651763916, + "learning_rate": 0.0005044330354907454, + "loss": 1.4242, + "step": 5784 + }, + { + "epoch": 0.6102320675105485, + "grad_norm": 0.6331286430358887, + "learning_rate": 0.0005041958285473005, + "loss": 1.3704, + "step": 5785 + }, + { + "epoch": 0.610337552742616, + "grad_norm": 0.620213508605957, + "learning_rate": 0.0005039586491474386, + "loss": 1.356, + "step": 5786 + }, + { + "epoch": 0.6104430379746836, + "grad_norm": 0.6254869699478149, + "learning_rate": 0.000503721497317737, + "loss": 1.3628, + "step": 5787 + }, + { + "epoch": 0.610548523206751, + "grad_norm": 0.6527975797653198, + "learning_rate": 0.0005034843730847696, + "loss": 1.3733, + "step": 5788 + }, + { + "epoch": 0.6106540084388186, + "grad_norm": 0.6177598237991333, + "learning_rate": 0.0005032472764751074, + "loss": 1.3953, + "step": 5789 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.6363442540168762, + "learning_rate": 0.0005030102075153181, + "loss": 1.3757, + "step": 5790 + }, + { + "epoch": 0.6108649789029535, + "grad_norm": 0.6334032416343689, + "learning_rate": 0.000502773166231967, + "loss": 1.4077, + "step": 5791 + }, + { + "epoch": 0.6109704641350211, + "grad_norm": 0.6371563076972961, + "learning_rate": 0.0005025361526516151, + "loss": 1.3894, + "step": 5792 + }, + { + "epoch": 0.6110759493670886, + "grad_norm": 0.7970656156539917, + "learning_rate": 0.0005022991668008216, + "loss": 1.4106, + "step": 5793 + }, + { + "epoch": 0.6111814345991561, + "grad_norm": 0.6988834738731384, + "learning_rate": 0.0005020622087061415, + "loss": 1.3913, + "step": 5794 + }, + { + "epoch": 0.6112869198312236, + "grad_norm": 0.6757332682609558, + "learning_rate": 0.0005018252783941273, + "loss": 1.3809, + "step": 5795 + }, + { + "epoch": 0.6113924050632912, + "grad_norm": 0.6521857976913452, + "learning_rate": 0.0005015883758913281, + "loss": 1.3762, + "step": 5796 + }, + { + "epoch": 0.6114978902953586, + "grad_norm": 0.8958799839019775, + "learning_rate": 0.0005013515012242901, + "loss": 1.391, + "step": 5797 + }, + { + "epoch": 0.6116033755274262, + "grad_norm": 0.6376051306724548, + "learning_rate": 0.0005011146544195559, + "loss": 1.354, + "step": 5798 + }, + { + "epoch": 0.6117088607594937, + "grad_norm": 0.6331198215484619, + "learning_rate": 0.000500877835503666, + "loss": 1.3807, + "step": 5799 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.7620409727096558, + "learning_rate": 0.0005006410445031569, + "loss": 1.4023, + "step": 5800 + }, + { + "epoch": 0.6119198312236287, + "grad_norm": 0.6472519040107727, + "learning_rate": 0.0005004042814445622, + "loss": 1.4046, + "step": 5801 + }, + { + "epoch": 0.6120253164556962, + "grad_norm": 0.7388457655906677, + "learning_rate": 0.0005001675463544125, + "loss": 1.3845, + "step": 5802 + }, + { + "epoch": 0.6121308016877637, + "grad_norm": 0.6713482141494751, + "learning_rate": 0.0004999308392592349, + "loss": 1.3723, + "step": 5803 + }, + { + "epoch": 0.6122362869198312, + "grad_norm": 0.7273929715156555, + "learning_rate": 0.0004996941601855536, + "loss": 1.3847, + "step": 5804 + }, + { + "epoch": 0.6123417721518988, + "grad_norm": 0.6978940367698669, + "learning_rate": 0.0004994575091598898, + "loss": 1.3735, + "step": 5805 + }, + { + "epoch": 0.6124472573839662, + "grad_norm": 0.7103510499000549, + "learning_rate": 0.0004992208862087616, + "loss": 1.4056, + "step": 5806 + }, + { + "epoch": 0.6125527426160338, + "grad_norm": 0.6573925614356995, + "learning_rate": 0.0004989842913586832, + "loss": 1.4068, + "step": 5807 + }, + { + "epoch": 0.6126582278481013, + "grad_norm": 0.6208477020263672, + "learning_rate": 0.000498747724636167, + "loss": 1.4107, + "step": 5808 + }, + { + "epoch": 0.6127637130801687, + "grad_norm": 0.7213187217712402, + "learning_rate": 0.000498511186067721, + "loss": 1.4083, + "step": 5809 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.6208919882774353, + "learning_rate": 0.0004982746756798507, + "loss": 1.3965, + "step": 5810 + }, + { + "epoch": 0.6129746835443038, + "grad_norm": 0.6178995966911316, + "learning_rate": 0.0004980381934990583, + "loss": 1.4267, + "step": 5811 + }, + { + "epoch": 0.6130801687763713, + "grad_norm": 0.6872401237487793, + "learning_rate": 0.0004978017395518425, + "loss": 1.3935, + "step": 5812 + }, + { + "epoch": 0.6131856540084388, + "grad_norm": 0.6358594298362732, + "learning_rate": 0.0004975653138646994, + "loss": 1.3872, + "step": 5813 + }, + { + "epoch": 0.6132911392405064, + "grad_norm": 0.6215168237686157, + "learning_rate": 0.0004973289164641217, + "loss": 1.384, + "step": 5814 + }, + { + "epoch": 0.6133966244725738, + "grad_norm": 0.6389588713645935, + "learning_rate": 0.0004970925473765988, + "loss": 1.4289, + "step": 5815 + }, + { + "epoch": 0.6135021097046414, + "grad_norm": 0.6363147497177124, + "learning_rate": 0.0004968562066286168, + "loss": 1.4268, + "step": 5816 + }, + { + "epoch": 0.6136075949367089, + "grad_norm": 0.6429824233055115, + "learning_rate": 0.0004966198942466595, + "loss": 1.4035, + "step": 5817 + }, + { + "epoch": 0.6137130801687763, + "grad_norm": 0.6491959691047668, + "learning_rate": 0.0004963836102572065, + "loss": 1.4222, + "step": 5818 + }, + { + "epoch": 0.6138185654008439, + "grad_norm": 0.6417509317398071, + "learning_rate": 0.0004961473546867346, + "loss": 1.3915, + "step": 5819 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.6349524259567261, + "learning_rate": 0.0004959111275617174, + "loss": 1.4093, + "step": 5820 + }, + { + "epoch": 0.6140295358649789, + "grad_norm": 0.7015865445137024, + "learning_rate": 0.0004956749289086254, + "loss": 1.4284, + "step": 5821 + }, + { + "epoch": 0.6141350210970464, + "grad_norm": 0.6313273906707764, + "learning_rate": 0.0004954387587539257, + "loss": 1.3737, + "step": 5822 + }, + { + "epoch": 0.614240506329114, + "grad_norm": 0.7472721934318542, + "learning_rate": 0.0004952026171240826, + "loss": 1.3778, + "step": 5823 + }, + { + "epoch": 0.6143459915611814, + "grad_norm": 0.6587797999382019, + "learning_rate": 0.0004949665040455566, + "loss": 1.4014, + "step": 5824 + }, + { + "epoch": 0.614451476793249, + "grad_norm": 0.7997559309005737, + "learning_rate": 0.0004947304195448052, + "loss": 1.4028, + "step": 5825 + }, + { + "epoch": 0.6145569620253165, + "grad_norm": 0.6508151888847351, + "learning_rate": 0.0004944943636482836, + "loss": 1.3806, + "step": 5826 + }, + { + "epoch": 0.614662447257384, + "grad_norm": 0.6292436718940735, + "learning_rate": 0.0004942583363824428, + "loss": 1.3926, + "step": 5827 + }, + { + "epoch": 0.6147679324894515, + "grad_norm": 0.6678574085235596, + "learning_rate": 0.0004940223377737304, + "loss": 1.3653, + "step": 5828 + }, + { + "epoch": 0.6148734177215189, + "grad_norm": 0.6592486500740051, + "learning_rate": 0.0004937863678485915, + "loss": 1.3993, + "step": 5829 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.7095804214477539, + "learning_rate": 0.0004935504266334677, + "loss": 1.3711, + "step": 5830 + }, + { + "epoch": 0.615084388185654, + "grad_norm": 0.6847142577171326, + "learning_rate": 0.0004933145141547975, + "loss": 1.3974, + "step": 5831 + }, + { + "epoch": 0.6151898734177215, + "grad_norm": 0.6959579586982727, + "learning_rate": 0.0004930786304390158, + "loss": 1.4211, + "step": 5832 + }, + { + "epoch": 0.615295358649789, + "grad_norm": 0.7216222286224365, + "learning_rate": 0.0004928427755125544, + "loss": 1.3776, + "step": 5833 + }, + { + "epoch": 0.6154008438818566, + "grad_norm": 0.6650270223617554, + "learning_rate": 0.0004926069494018427, + "loss": 1.4092, + "step": 5834 + }, + { + "epoch": 0.615506329113924, + "grad_norm": 0.6811755299568176, + "learning_rate": 0.0004923711521333056, + "loss": 1.3771, + "step": 5835 + }, + { + "epoch": 0.6156118143459915, + "grad_norm": 0.6107615232467651, + "learning_rate": 0.0004921353837333657, + "loss": 1.4363, + "step": 5836 + }, + { + "epoch": 0.6157172995780591, + "grad_norm": 0.6934714913368225, + "learning_rate": 0.0004918996442284419, + "loss": 1.3702, + "step": 5837 + }, + { + "epoch": 0.6158227848101265, + "grad_norm": 0.6363345384597778, + "learning_rate": 0.0004916639336449499, + "loss": 1.411, + "step": 5838 + }, + { + "epoch": 0.6159282700421941, + "grad_norm": 0.6818115711212158, + "learning_rate": 0.0004914282520093023, + "loss": 1.4033, + "step": 5839 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.6691164970397949, + "learning_rate": 0.0004911925993479085, + "loss": 1.417, + "step": 5840 + }, + { + "epoch": 0.6161392405063291, + "grad_norm": 0.6707046031951904, + "learning_rate": 0.0004909569756871745, + "loss": 1.4136, + "step": 5841 + }, + { + "epoch": 0.6162447257383966, + "grad_norm": 0.6573523879051208, + "learning_rate": 0.0004907213810535026, + "loss": 1.417, + "step": 5842 + }, + { + "epoch": 0.6163502109704642, + "grad_norm": 0.6251287460327148, + "learning_rate": 0.0004904858154732932, + "loss": 1.3543, + "step": 5843 + }, + { + "epoch": 0.6164556962025316, + "grad_norm": 0.6749710440635681, + "learning_rate": 0.0004902502789729424, + "loss": 1.4124, + "step": 5844 + }, + { + "epoch": 0.6165611814345991, + "grad_norm": 0.6330034136772156, + "learning_rate": 0.0004900147715788429, + "loss": 1.4118, + "step": 5845 + }, + { + "epoch": 0.6166666666666667, + "grad_norm": 0.6713356971740723, + "learning_rate": 0.0004897792933173847, + "loss": 1.3773, + "step": 5846 + }, + { + "epoch": 0.6167721518987341, + "grad_norm": 0.6667405366897583, + "learning_rate": 0.0004895438442149542, + "loss": 1.4057, + "step": 5847 + }, + { + "epoch": 0.6168776371308017, + "grad_norm": 0.7302161455154419, + "learning_rate": 0.0004893084242979348, + "loss": 1.4271, + "step": 5848 + }, + { + "epoch": 0.6169831223628692, + "grad_norm": 0.7789134383201599, + "learning_rate": 0.0004890730335927063, + "loss": 1.4487, + "step": 5849 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.6784597039222717, + "learning_rate": 0.0004888376721256456, + "loss": 1.3875, + "step": 5850 + }, + { + "epoch": 0.6171940928270042, + "grad_norm": 0.643807053565979, + "learning_rate": 0.0004886023399231255, + "loss": 1.402, + "step": 5851 + }, + { + "epoch": 0.6172995780590718, + "grad_norm": 0.7823060154914856, + "learning_rate": 0.0004883670370115173, + "loss": 1.3846, + "step": 5852 + }, + { + "epoch": 0.6174050632911392, + "grad_norm": 0.7167797088623047, + "learning_rate": 0.00048813176341718693, + "loss": 1.3801, + "step": 5853 + }, + { + "epoch": 0.6175105485232067, + "grad_norm": 0.7187353372573853, + "learning_rate": 0.0004878965191664983, + "loss": 1.3628, + "step": 5854 + }, + { + "epoch": 0.6176160337552743, + "grad_norm": 0.6635399460792542, + "learning_rate": 0.0004876613042858118, + "loss": 1.4285, + "step": 5855 + }, + { + "epoch": 0.6177215189873417, + "grad_norm": 0.7642872929573059, + "learning_rate": 0.0004874261188014842, + "loss": 1.3937, + "step": 5856 + }, + { + "epoch": 0.6178270042194093, + "grad_norm": 0.6285889744758606, + "learning_rate": 0.00048719096273986925, + "loss": 1.3686, + "step": 5857 + }, + { + "epoch": 0.6179324894514768, + "grad_norm": 0.7004891037940979, + "learning_rate": 0.0004869558361273175, + "loss": 1.4051, + "step": 5858 + }, + { + "epoch": 0.6180379746835443, + "grad_norm": 0.6546143293380737, + "learning_rate": 0.00048672073899017564, + "loss": 1.4066, + "step": 5859 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.7656980752944946, + "learning_rate": 0.00048648567135478805, + "loss": 1.3967, + "step": 5860 + }, + { + "epoch": 0.6182489451476794, + "grad_norm": 0.7470952272415161, + "learning_rate": 0.0004862506332474951, + "loss": 1.4082, + "step": 5861 + }, + { + "epoch": 0.6183544303797468, + "grad_norm": 0.6851308345794678, + "learning_rate": 0.0004860156246946338, + "loss": 1.4543, + "step": 5862 + }, + { + "epoch": 0.6184599156118143, + "grad_norm": 0.6943263411521912, + "learning_rate": 0.0004857806457225381, + "loss": 1.3563, + "step": 5863 + }, + { + "epoch": 0.6185654008438819, + "grad_norm": 0.7480677366256714, + "learning_rate": 0.00048554569635753857, + "loss": 1.3561, + "step": 5864 + }, + { + "epoch": 0.6186708860759493, + "grad_norm": 0.6545330286026001, + "learning_rate": 0.00048531077662596246, + "loss": 1.3901, + "step": 5865 + }, + { + "epoch": 0.6187763713080169, + "grad_norm": 0.6257426738739014, + "learning_rate": 0.00048507588655413367, + "loss": 1.403, + "step": 5866 + }, + { + "epoch": 0.6188818565400844, + "grad_norm": 0.6438637375831604, + "learning_rate": 0.00048484102616837277, + "loss": 1.3352, + "step": 5867 + }, + { + "epoch": 0.6189873417721519, + "grad_norm": 0.6716253757476807, + "learning_rate": 0.000484606195494997, + "loss": 1.3645, + "step": 5868 + }, + { + "epoch": 0.6190928270042194, + "grad_norm": 0.6626439094543457, + "learning_rate": 0.0004843713945603205, + "loss": 1.3946, + "step": 5869 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.6447592377662659, + "learning_rate": 0.0004841366233906538, + "loss": 1.381, + "step": 5870 + }, + { + "epoch": 0.6193037974683544, + "grad_norm": 0.6861157417297363, + "learning_rate": 0.0004839018820123042, + "loss": 1.4264, + "step": 5871 + }, + { + "epoch": 0.619409282700422, + "grad_norm": 0.8029265403747559, + "learning_rate": 0.0004836671704515756, + "loss": 1.3859, + "step": 5872 + }, + { + "epoch": 0.6195147679324895, + "grad_norm": 0.6516905426979065, + "learning_rate": 0.00048343248873476853, + "loss": 1.3791, + "step": 5873 + }, + { + "epoch": 0.6196202531645569, + "grad_norm": 0.6968327164649963, + "learning_rate": 0.00048319783688818043, + "loss": 1.3816, + "step": 5874 + }, + { + "epoch": 0.6197257383966245, + "grad_norm": 0.6666541695594788, + "learning_rate": 0.00048296321493810507, + "loss": 1.455, + "step": 5875 + }, + { + "epoch": 0.619831223628692, + "grad_norm": 0.7218255996704102, + "learning_rate": 0.0004827286229108331, + "loss": 1.3558, + "step": 5876 + }, + { + "epoch": 0.6199367088607595, + "grad_norm": 0.6885784864425659, + "learning_rate": 0.00048249406083265123, + "loss": 1.4083, + "step": 5877 + }, + { + "epoch": 0.620042194092827, + "grad_norm": 0.651799201965332, + "learning_rate": 0.0004822595287298442, + "loss": 1.3978, + "step": 5878 + }, + { + "epoch": 0.6201476793248946, + "grad_norm": 0.7019875645637512, + "learning_rate": 0.00048202502662869195, + "loss": 1.3824, + "step": 5879 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.6550706028938293, + "learning_rate": 0.0004817905545554717, + "loss": 1.3986, + "step": 5880 + }, + { + "epoch": 0.6203586497890295, + "grad_norm": 0.647579550743103, + "learning_rate": 0.00048155611253645727, + "loss": 1.3783, + "step": 5881 + }, + { + "epoch": 0.6204641350210971, + "grad_norm": 0.7845414280891418, + "learning_rate": 0.0004813217005979191, + "loss": 1.3946, + "step": 5882 + }, + { + "epoch": 0.6205696202531645, + "grad_norm": 0.6535588502883911, + "learning_rate": 0.000481087318766124, + "loss": 1.37, + "step": 5883 + }, + { + "epoch": 0.6206751054852321, + "grad_norm": 0.6860992312431335, + "learning_rate": 0.0004808529670673358, + "loss": 1.3568, + "step": 5884 + }, + { + "epoch": 0.6207805907172996, + "grad_norm": 0.6604651212692261, + "learning_rate": 0.00048061864552781456, + "loss": 1.3675, + "step": 5885 + }, + { + "epoch": 0.6208860759493671, + "grad_norm": 0.7568514347076416, + "learning_rate": 0.0004803843541738173, + "loss": 1.4113, + "step": 5886 + }, + { + "epoch": 0.6209915611814346, + "grad_norm": 0.7243593335151672, + "learning_rate": 0.0004801500930315978, + "loss": 1.4077, + "step": 5887 + }, + { + "epoch": 0.6210970464135022, + "grad_norm": 0.7083378434181213, + "learning_rate": 0.000479915862127406, + "loss": 1.3641, + "step": 5888 + }, + { + "epoch": 0.6212025316455696, + "grad_norm": 0.7387163043022156, + "learning_rate": 0.0004796816614874885, + "loss": 1.3998, + "step": 5889 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.686150848865509, + "learning_rate": 0.00047944749113808884, + "loss": 1.4217, + "step": 5890 + }, + { + "epoch": 0.6214135021097047, + "grad_norm": 0.6897521018981934, + "learning_rate": 0.0004792133511054469, + "loss": 1.3628, + "step": 5891 + }, + { + "epoch": 0.6215189873417721, + "grad_norm": 0.6748346090316772, + "learning_rate": 0.0004789792414157992, + "loss": 1.4132, + "step": 5892 + }, + { + "epoch": 0.6216244725738397, + "grad_norm": 0.8539541959762573, + "learning_rate": 0.000478745162095379, + "loss": 1.3457, + "step": 5893 + }, + { + "epoch": 0.6217299578059071, + "grad_norm": 0.5883303284645081, + "learning_rate": 0.0004785111131704157, + "loss": 1.3618, + "step": 5894 + }, + { + "epoch": 0.6218354430379747, + "grad_norm": 0.7862802147865295, + "learning_rate": 0.0004782770946671362, + "loss": 1.3985, + "step": 5895 + }, + { + "epoch": 0.6219409282700422, + "grad_norm": 0.7222262024879456, + "learning_rate": 0.0004780431066117629, + "loss": 1.3935, + "step": 5896 + }, + { + "epoch": 0.6220464135021097, + "grad_norm": 0.8704521656036377, + "learning_rate": 0.0004778091490305159, + "loss": 1.4234, + "step": 5897 + }, + { + "epoch": 0.6221518987341772, + "grad_norm": 0.6576271057128906, + "learning_rate": 0.0004775752219496109, + "loss": 1.4086, + "step": 5898 + }, + { + "epoch": 0.6222573839662447, + "grad_norm": 0.6833055019378662, + "learning_rate": 0.00047734132539526086, + "loss": 1.3611, + "step": 5899 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.7842674851417542, + "learning_rate": 0.00047710745939367474, + "loss": 1.4059, + "step": 5900 + }, + { + "epoch": 0.6224683544303797, + "grad_norm": 0.6334193348884583, + "learning_rate": 0.00047687362397105863, + "loss": 1.3775, + "step": 5901 + }, + { + "epoch": 0.6225738396624473, + "grad_norm": 0.7167404294013977, + "learning_rate": 0.0004766398191536149, + "loss": 1.4326, + "step": 5902 + }, + { + "epoch": 0.6226793248945147, + "grad_norm": 0.6841334104537964, + "learning_rate": 0.00047640604496754235, + "loss": 1.3568, + "step": 5903 + }, + { + "epoch": 0.6227848101265823, + "grad_norm": 0.6409292221069336, + "learning_rate": 0.000476172301439037, + "loss": 1.391, + "step": 5904 + }, + { + "epoch": 0.6228902953586498, + "grad_norm": 0.7638100385665894, + "learning_rate": 0.00047593858859429035, + "loss": 1.3565, + "step": 5905 + }, + { + "epoch": 0.6229957805907173, + "grad_norm": 0.6471102237701416, + "learning_rate": 0.00047570490645949175, + "loss": 1.3834, + "step": 5906 + }, + { + "epoch": 0.6231012658227848, + "grad_norm": 0.6474233269691467, + "learning_rate": 0.000475471255060826, + "loss": 1.3535, + "step": 5907 + }, + { + "epoch": 0.6232067510548523, + "grad_norm": 0.7709793448448181, + "learning_rate": 0.0004752376344244752, + "loss": 1.3885, + "step": 5908 + }, + { + "epoch": 0.6233122362869198, + "grad_norm": 0.7289367318153381, + "learning_rate": 0.00047500404457661747, + "loss": 1.3905, + "step": 5909 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.9476595520973206, + "learning_rate": 0.0004747704855434278, + "loss": 1.3946, + "step": 5910 + }, + { + "epoch": 0.6235232067510549, + "grad_norm": 0.7507038116455078, + "learning_rate": 0.0004745369573510775, + "loss": 1.3496, + "step": 5911 + }, + { + "epoch": 0.6236286919831223, + "grad_norm": 0.8150012493133545, + "learning_rate": 0.0004743034600257348, + "loss": 1.4062, + "step": 5912 + }, + { + "epoch": 0.6237341772151899, + "grad_norm": 0.7317979335784912, + "learning_rate": 0.0004740699935935643, + "loss": 1.3675, + "step": 5913 + }, + { + "epoch": 0.6238396624472574, + "grad_norm": 0.6811866760253906, + "learning_rate": 0.0004738365580807268, + "loss": 1.3684, + "step": 5914 + }, + { + "epoch": 0.6239451476793249, + "grad_norm": 0.703648567199707, + "learning_rate": 0.0004736031535133799, + "loss": 1.3639, + "step": 5915 + }, + { + "epoch": 0.6240506329113924, + "grad_norm": 0.6829782128334045, + "learning_rate": 0.0004733697799176781, + "loss": 1.3715, + "step": 5916 + }, + { + "epoch": 0.62415611814346, + "grad_norm": 0.6395797729492188, + "learning_rate": 0.0004731364373197718, + "loss": 1.4057, + "step": 5917 + }, + { + "epoch": 0.6242616033755274, + "grad_norm": 0.8012301325798035, + "learning_rate": 0.00047290312574580835, + "loss": 1.39, + "step": 5918 + }, + { + "epoch": 0.6243670886075949, + "grad_norm": 0.6602399349212646, + "learning_rate": 0.00047266984522193134, + "loss": 1.3859, + "step": 5919 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.8936657905578613, + "learning_rate": 0.0004724365957742809, + "loss": 1.3826, + "step": 5920 + }, + { + "epoch": 0.6245780590717299, + "grad_norm": 0.7349934577941895, + "learning_rate": 0.0004722033774289941, + "loss": 1.3991, + "step": 5921 + }, + { + "epoch": 0.6246835443037975, + "grad_norm": 0.737815260887146, + "learning_rate": 0.0004719701902122041, + "loss": 1.364, + "step": 5922 + }, + { + "epoch": 0.624789029535865, + "grad_norm": 0.7308686971664429, + "learning_rate": 0.00047173703415004066, + "loss": 1.4068, + "step": 5923 + }, + { + "epoch": 0.6248945147679325, + "grad_norm": 0.7050616145133972, + "learning_rate": 0.0004715039092686302, + "loss": 1.4034, + "step": 5924 + }, + { + "epoch": 0.625, + "grad_norm": 0.8560411334037781, + "learning_rate": 0.0004712708155940951, + "loss": 1.3986, + "step": 5925 + }, + { + "epoch": 0.6251054852320675, + "grad_norm": 0.7334998250007629, + "learning_rate": 0.0004710377531525552, + "loss": 1.3298, + "step": 5926 + }, + { + "epoch": 0.625210970464135, + "grad_norm": 1.0835082530975342, + "learning_rate": 0.000470804721970126, + "loss": 1.4008, + "step": 5927 + }, + { + "epoch": 0.6253164556962025, + "grad_norm": 0.7189914584159851, + "learning_rate": 0.00047057172207292004, + "loss": 1.3723, + "step": 5928 + }, + { + "epoch": 0.6254219409282701, + "grad_norm": 1.0296231508255005, + "learning_rate": 0.00047033875348704576, + "loss": 1.3774, + "step": 5929 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.6832123398780823, + "learning_rate": 0.00047010581623860883, + "loss": 1.3982, + "step": 5930 + }, + { + "epoch": 0.6256329113924051, + "grad_norm": 0.7679517269134521, + "learning_rate": 0.0004698729103537109, + "loss": 1.3921, + "step": 5931 + }, + { + "epoch": 0.6257383966244726, + "grad_norm": 0.6391340494155884, + "learning_rate": 0.0004696400358584501, + "loss": 1.4147, + "step": 5932 + }, + { + "epoch": 0.62584388185654, + "grad_norm": 0.6850710511207581, + "learning_rate": 0.00046940719277892143, + "loss": 1.4082, + "step": 5933 + }, + { + "epoch": 0.6259493670886076, + "grad_norm": 0.6004148125648499, + "learning_rate": 0.0004691743811412159, + "loss": 1.4004, + "step": 5934 + }, + { + "epoch": 0.6260548523206751, + "grad_norm": 0.6743803024291992, + "learning_rate": 0.00046894160097142113, + "loss": 1.3891, + "step": 5935 + }, + { + "epoch": 0.6261603375527426, + "grad_norm": 0.6314758062362671, + "learning_rate": 0.00046870885229562153, + "loss": 1.3924, + "step": 5936 + }, + { + "epoch": 0.6262658227848101, + "grad_norm": 0.7020946145057678, + "learning_rate": 0.0004684761351398976, + "loss": 1.3942, + "step": 5937 + }, + { + "epoch": 0.6263713080168777, + "grad_norm": 0.6802118420600891, + "learning_rate": 0.0004682434495303267, + "loss": 1.3504, + "step": 5938 + }, + { + "epoch": 0.6264767932489451, + "grad_norm": 0.6255691051483154, + "learning_rate": 0.00046801079549298224, + "loss": 1.3343, + "step": 5939 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.6350163817405701, + "learning_rate": 0.0004677781730539342, + "loss": 1.4252, + "step": 5940 + }, + { + "epoch": 0.6266877637130802, + "grad_norm": 0.6666052341461182, + "learning_rate": 0.00046754558223924926, + "loss": 1.4157, + "step": 5941 + }, + { + "epoch": 0.6267932489451477, + "grad_norm": 0.6693050265312195, + "learning_rate": 0.00046731302307499023, + "loss": 1.3884, + "step": 5942 + }, + { + "epoch": 0.6268987341772152, + "grad_norm": 0.6405529379844666, + "learning_rate": 0.0004670804955872166, + "loss": 1.3439, + "step": 5943 + }, + { + "epoch": 0.6270042194092827, + "grad_norm": 0.7076544761657715, + "learning_rate": 0.00046684799980198415, + "loss": 1.3937, + "step": 5944 + }, + { + "epoch": 0.6271097046413502, + "grad_norm": 0.651466429233551, + "learning_rate": 0.0004666155357453451, + "loss": 1.3559, + "step": 5945 + }, + { + "epoch": 0.6272151898734177, + "grad_norm": 0.6127836108207703, + "learning_rate": 0.00046638310344334835, + "loss": 1.3997, + "step": 5946 + }, + { + "epoch": 0.6273206751054853, + "grad_norm": 0.667510449886322, + "learning_rate": 0.0004661507029220393, + "loss": 1.3713, + "step": 5947 + }, + { + "epoch": 0.6274261603375527, + "grad_norm": 0.6766555309295654, + "learning_rate": 0.0004659183342074594, + "loss": 1.3686, + "step": 5948 + }, + { + "epoch": 0.6275316455696203, + "grad_norm": 0.6490470170974731, + "learning_rate": 0.0004656859973256466, + "loss": 1.3824, + "step": 5949 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.6488242149353027, + "learning_rate": 0.0004654536923026356, + "loss": 1.3805, + "step": 5950 + }, + { + "epoch": 0.6277426160337553, + "grad_norm": 0.6987815499305725, + "learning_rate": 0.00046522141916445725, + "loss": 1.4315, + "step": 5951 + }, + { + "epoch": 0.6278481012658228, + "grad_norm": 0.6855977177619934, + "learning_rate": 0.0004649891779371389, + "loss": 1.4047, + "step": 5952 + }, + { + "epoch": 0.6279535864978903, + "grad_norm": 0.6248047947883606, + "learning_rate": 0.0004647569686467043, + "loss": 1.3789, + "step": 5953 + }, + { + "epoch": 0.6280590717299578, + "grad_norm": 0.6997603178024292, + "learning_rate": 0.00046452479131917383, + "loss": 1.3663, + "step": 5954 + }, + { + "epoch": 0.6281645569620253, + "grad_norm": 0.6317358613014221, + "learning_rate": 0.0004642926459805636, + "loss": 1.3925, + "step": 5955 + }, + { + "epoch": 0.6282700421940929, + "grad_norm": 0.7754763960838318, + "learning_rate": 0.0004640605326568874, + "loss": 1.4006, + "step": 5956 + }, + { + "epoch": 0.6283755274261603, + "grad_norm": 0.6476829648017883, + "learning_rate": 0.00046382845137415437, + "loss": 1.3581, + "step": 5957 + }, + { + "epoch": 0.6284810126582279, + "grad_norm": 0.8019651174545288, + "learning_rate": 0.0004635964021583703, + "loss": 1.3695, + "step": 5958 + }, + { + "epoch": 0.6285864978902953, + "grad_norm": 0.6464157700538635, + "learning_rate": 0.00046336438503553754, + "loss": 1.3804, + "step": 5959 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.7243942618370056, + "learning_rate": 0.00046313240003165466, + "loss": 1.3956, + "step": 5960 + }, + { + "epoch": 0.6287974683544304, + "grad_norm": 0.6574905514717102, + "learning_rate": 0.00046290044717271685, + "loss": 1.3978, + "step": 5961 + }, + { + "epoch": 0.6289029535864978, + "grad_norm": 0.6464875936508179, + "learning_rate": 0.00046266852648471553, + "loss": 1.3747, + "step": 5962 + }, + { + "epoch": 0.6290084388185654, + "grad_norm": 0.6214715242385864, + "learning_rate": 0.0004624366379936383, + "loss": 1.3981, + "step": 5963 + }, + { + "epoch": 0.6291139240506329, + "grad_norm": 0.6686469316482544, + "learning_rate": 0.00046220478172546997, + "loss": 1.3922, + "step": 5964 + }, + { + "epoch": 0.6292194092827004, + "grad_norm": 0.6695406436920166, + "learning_rate": 0.00046197295770619105, + "loss": 1.4221, + "step": 5965 + }, + { + "epoch": 0.6293248945147679, + "grad_norm": 0.6914464235305786, + "learning_rate": 0.00046174116596177833, + "loss": 1.3873, + "step": 5966 + }, + { + "epoch": 0.6294303797468355, + "grad_norm": 0.6729449033737183, + "learning_rate": 0.00046150940651820536, + "loss": 1.3897, + "step": 5967 + }, + { + "epoch": 0.6295358649789029, + "grad_norm": 0.615609884262085, + "learning_rate": 0.0004612776794014419, + "loss": 1.3469, + "step": 5968 + }, + { + "epoch": 0.6296413502109705, + "grad_norm": 0.6784124970436096, + "learning_rate": 0.00046104598463745424, + "loss": 1.3561, + "step": 5969 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.6433786153793335, + "learning_rate": 0.0004608143222522048, + "loss": 1.408, + "step": 5970 + }, + { + "epoch": 0.6298523206751054, + "grad_norm": 0.6167643666267395, + "learning_rate": 0.00046058269227165256, + "loss": 1.4048, + "step": 5971 + }, + { + "epoch": 0.629957805907173, + "grad_norm": 0.6233484745025635, + "learning_rate": 0.0004603510947217526, + "loss": 1.3447, + "step": 5972 + }, + { + "epoch": 0.6300632911392405, + "grad_norm": 0.701504111289978, + "learning_rate": 0.000460119529628457, + "loss": 1.3985, + "step": 5973 + }, + { + "epoch": 0.630168776371308, + "grad_norm": 0.638132631778717, + "learning_rate": 0.00045988799701771364, + "loss": 1.3846, + "step": 5974 + }, + { + "epoch": 0.6302742616033755, + "grad_norm": 0.6890543103218079, + "learning_rate": 0.0004596564969154668, + "loss": 1.3647, + "step": 5975 + }, + { + "epoch": 0.6303797468354431, + "grad_norm": 0.6721588373184204, + "learning_rate": 0.00045942502934765735, + "loss": 1.3754, + "step": 5976 + }, + { + "epoch": 0.6304852320675105, + "grad_norm": 0.643223226070404, + "learning_rate": 0.0004591935943402222, + "loss": 1.3448, + "step": 5977 + }, + { + "epoch": 0.630590717299578, + "grad_norm": 0.6935306191444397, + "learning_rate": 0.00045896219191909486, + "loss": 1.3834, + "step": 5978 + }, + { + "epoch": 0.6306962025316456, + "grad_norm": 0.6322671175003052, + "learning_rate": 0.0004587308221102053, + "loss": 1.338, + "step": 5979 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.6970802545547485, + "learning_rate": 0.0004584994849394795, + "loss": 1.3616, + "step": 5980 + }, + { + "epoch": 0.6309071729957806, + "grad_norm": 0.6390208601951599, + "learning_rate": 0.0004582681804328396, + "loss": 1.387, + "step": 5981 + }, + { + "epoch": 0.6310126582278481, + "grad_norm": 0.6638549566268921, + "learning_rate": 0.0004580369086162051, + "loss": 1.3927, + "step": 5982 + }, + { + "epoch": 0.6311181434599156, + "grad_norm": 0.6401740312576294, + "learning_rate": 0.0004578056695154909, + "loss": 1.3535, + "step": 5983 + }, + { + "epoch": 0.6312236286919831, + "grad_norm": 0.6418701410293579, + "learning_rate": 0.0004575744631566083, + "loss": 1.3927, + "step": 5984 + }, + { + "epoch": 0.6313291139240507, + "grad_norm": 0.6621978878974915, + "learning_rate": 0.0004573432895654654, + "loss": 1.384, + "step": 5985 + }, + { + "epoch": 0.6314345991561181, + "grad_norm": 0.6989119052886963, + "learning_rate": 0.00045711214876796623, + "loss": 1.3942, + "step": 5986 + }, + { + "epoch": 0.6315400843881857, + "grad_norm": 0.6277797222137451, + "learning_rate": 0.0004568810407900112, + "loss": 1.381, + "step": 5987 + }, + { + "epoch": 0.6316455696202532, + "grad_norm": 0.8661463856697083, + "learning_rate": 0.00045664996565749716, + "loss": 1.4074, + "step": 5988 + }, + { + "epoch": 0.6317510548523206, + "grad_norm": 0.707148551940918, + "learning_rate": 0.00045641892339631703, + "loss": 1.4261, + "step": 5989 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.8038681149482727, + "learning_rate": 0.0004561879140323607, + "loss": 1.3905, + "step": 5990 + }, + { + "epoch": 0.6319620253164557, + "grad_norm": 0.6334635615348816, + "learning_rate": 0.0004559569375915137, + "loss": 1.3845, + "step": 5991 + }, + { + "epoch": 0.6320675105485232, + "grad_norm": 0.7100968360900879, + "learning_rate": 0.00045572599409965804, + "loss": 1.3706, + "step": 5992 + }, + { + "epoch": 0.6321729957805907, + "grad_norm": 0.7910692095756531, + "learning_rate": 0.00045549508358267224, + "loss": 1.4545, + "step": 5993 + }, + { + "epoch": 0.6322784810126583, + "grad_norm": 0.836745023727417, + "learning_rate": 0.0004552642060664307, + "loss": 1.4006, + "step": 5994 + }, + { + "epoch": 0.6323839662447257, + "grad_norm": 0.6202686429023743, + "learning_rate": 0.00045503336157680466, + "loss": 1.3692, + "step": 5995 + }, + { + "epoch": 0.6324894514767933, + "grad_norm": 0.7196480631828308, + "learning_rate": 0.00045480255013966123, + "loss": 1.3933, + "step": 5996 + }, + { + "epoch": 0.6325949367088608, + "grad_norm": 0.687606930732727, + "learning_rate": 0.00045457177178086407, + "loss": 1.3956, + "step": 5997 + }, + { + "epoch": 0.6327004219409282, + "grad_norm": 0.7083059549331665, + "learning_rate": 0.0004543410265262727, + "loss": 1.3841, + "step": 5998 + }, + { + "epoch": 0.6328059071729958, + "grad_norm": 0.7436902523040771, + "learning_rate": 0.000454110314401744, + "loss": 1.3976, + "step": 5999 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.6456258296966553, + "learning_rate": 0.0004538796354331298, + "loss": 1.4153, + "step": 6000 + }, + { + "epoch": 0.6330168776371308, + "grad_norm": 0.7113340497016907, + "learning_rate": 0.0004536489896462792, + "loss": 1.3677, + "step": 6001 + }, + { + "epoch": 0.6331223628691983, + "grad_norm": 0.6644331812858582, + "learning_rate": 0.0004534183770670371, + "loss": 1.3522, + "step": 6002 + }, + { + "epoch": 0.6332278481012659, + "grad_norm": 0.6833400726318359, + "learning_rate": 0.0004531877977212446, + "loss": 1.3924, + "step": 6003 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 0.6213039755821228, + "learning_rate": 0.00045295725163473945, + "loss": 1.4212, + "step": 6004 + }, + { + "epoch": 0.6334388185654009, + "grad_norm": 0.7496004700660706, + "learning_rate": 0.0004527267388333555, + "loss": 1.3743, + "step": 6005 + }, + { + "epoch": 0.6335443037974684, + "grad_norm": 0.6415988206863403, + "learning_rate": 0.0004524962593429227, + "loss": 1.3739, + "step": 6006 + }, + { + "epoch": 0.6336497890295358, + "grad_norm": 0.6495562791824341, + "learning_rate": 0.00045226581318926737, + "loss": 1.3808, + "step": 6007 + }, + { + "epoch": 0.6337552742616034, + "grad_norm": 0.6501138210296631, + "learning_rate": 0.0004520354003982125, + "loss": 1.3886, + "step": 6008 + }, + { + "epoch": 0.6338607594936709, + "grad_norm": 0.6642265915870667, + "learning_rate": 0.00045180502099557686, + "loss": 1.4139, + "step": 6009 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.6522647142410278, + "learning_rate": 0.0004515746750071754, + "loss": 1.3621, + "step": 6010 + }, + { + "epoch": 0.6340717299578059, + "grad_norm": 0.7152671217918396, + "learning_rate": 0.00045134436245881986, + "loss": 1.3616, + "step": 6011 + }, + { + "epoch": 0.6341772151898735, + "grad_norm": 0.7639477849006653, + "learning_rate": 0.0004511140833763177, + "loss": 1.3979, + "step": 6012 + }, + { + "epoch": 0.6342827004219409, + "grad_norm": 0.6836904883384705, + "learning_rate": 0.00045088383778547284, + "loss": 1.4089, + "step": 6013 + }, + { + "epoch": 0.6343881856540085, + "grad_norm": 0.6465287208557129, + "learning_rate": 0.0004506536257120856, + "loss": 1.4025, + "step": 6014 + }, + { + "epoch": 0.634493670886076, + "grad_norm": 0.6455654501914978, + "learning_rate": 0.0004504234471819518, + "loss": 1.4022, + "step": 6015 + }, + { + "epoch": 0.6345991561181434, + "grad_norm": 0.6482788324356079, + "learning_rate": 0.0004501933022208649, + "loss": 1.386, + "step": 6016 + }, + { + "epoch": 0.634704641350211, + "grad_norm": 0.651366114616394, + "learning_rate": 0.00044996319085461353, + "loss": 1.4092, + "step": 6017 + }, + { + "epoch": 0.6348101265822785, + "grad_norm": 0.6468707919120789, + "learning_rate": 0.00044973311310898275, + "loss": 1.3536, + "step": 6018 + }, + { + "epoch": 0.634915611814346, + "grad_norm": 0.6799265742301941, + "learning_rate": 0.00044950306900975377, + "loss": 1.3579, + "step": 6019 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.6376535892486572, + "learning_rate": 0.0004492730585827046, + "loss": 1.3906, + "step": 6020 + }, + { + "epoch": 0.6351265822784811, + "grad_norm": 0.6237635612487793, + "learning_rate": 0.0004490430818536085, + "loss": 1.3572, + "step": 6021 + }, + { + "epoch": 0.6352320675105485, + "grad_norm": 0.6220120787620544, + "learning_rate": 0.0004488131388482359, + "loss": 1.3422, + "step": 6022 + }, + { + "epoch": 0.635337552742616, + "grad_norm": 0.661410391330719, + "learning_rate": 0.000448583229592353, + "loss": 1.3835, + "step": 6023 + }, + { + "epoch": 0.6354430379746835, + "grad_norm": 0.6881818771362305, + "learning_rate": 0.0004483533541117218, + "loss": 1.3463, + "step": 6024 + }, + { + "epoch": 0.635548523206751, + "grad_norm": 0.6825907826423645, + "learning_rate": 0.0004481235124321018, + "loss": 1.3771, + "step": 6025 + }, + { + "epoch": 0.6356540084388186, + "grad_norm": 0.6705332398414612, + "learning_rate": 0.0004478937045792474, + "loss": 1.3695, + "step": 6026 + }, + { + "epoch": 0.635759493670886, + "grad_norm": 0.6816844344139099, + "learning_rate": 0.00044766393057891, + "loss": 1.3839, + "step": 6027 + }, + { + "epoch": 0.6358649789029536, + "grad_norm": 0.6824348568916321, + "learning_rate": 0.00044743419045683674, + "loss": 1.3944, + "step": 6028 + }, + { + "epoch": 0.6359704641350211, + "grad_norm": 0.7132402658462524, + "learning_rate": 0.00044720448423877113, + "loss": 1.3585, + "step": 6029 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.6782516837120056, + "learning_rate": 0.0004469748119504529, + "loss": 1.3633, + "step": 6030 + }, + { + "epoch": 0.6361814345991561, + "grad_norm": 0.8958988785743713, + "learning_rate": 0.000446745173617618, + "loss": 1.407, + "step": 6031 + }, + { + "epoch": 0.6362869198312237, + "grad_norm": 0.6356326341629028, + "learning_rate": 0.00044651556926599863, + "loss": 1.3797, + "step": 6032 + }, + { + "epoch": 0.6363924050632911, + "grad_norm": 0.7613527774810791, + "learning_rate": 0.0004462859989213227, + "loss": 1.3851, + "step": 6033 + }, + { + "epoch": 0.6364978902953586, + "grad_norm": 0.8475673198699951, + "learning_rate": 0.0004460564626093154, + "loss": 1.4032, + "step": 6034 + }, + { + "epoch": 0.6366033755274262, + "grad_norm": 0.6659074425697327, + "learning_rate": 0.00044582696035569695, + "loss": 1.3874, + "step": 6035 + }, + { + "epoch": 0.6367088607594936, + "grad_norm": 0.8509556651115417, + "learning_rate": 0.00044559749218618444, + "loss": 1.3731, + "step": 6036 + }, + { + "epoch": 0.6368143459915612, + "grad_norm": 0.6570104360580444, + "learning_rate": 0.0004453680581264908, + "loss": 1.3972, + "step": 6037 + }, + { + "epoch": 0.6369198312236287, + "grad_norm": 0.686397135257721, + "learning_rate": 0.00044513865820232525, + "loss": 1.3863, + "step": 6038 + }, + { + "epoch": 0.6370253164556962, + "grad_norm": 0.7846634984016418, + "learning_rate": 0.0004449092924393933, + "loss": 1.365, + "step": 6039 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.728268027305603, + "learning_rate": 0.0004446799608633964, + "loss": 1.4051, + "step": 6040 + }, + { + "epoch": 0.6372362869198313, + "grad_norm": 0.6040709018707275, + "learning_rate": 0.00044445066350003203, + "loss": 1.3542, + "step": 6041 + }, + { + "epoch": 0.6373417721518987, + "grad_norm": 0.6862781047821045, + "learning_rate": 0.00044422140037499473, + "loss": 1.3757, + "step": 6042 + }, + { + "epoch": 0.6374472573839662, + "grad_norm": 0.6560941934585571, + "learning_rate": 0.0004439921715139743, + "loss": 1.3665, + "step": 6043 + }, + { + "epoch": 0.6375527426160338, + "grad_norm": 0.6966434121131897, + "learning_rate": 0.00044376297694265687, + "loss": 1.3608, + "step": 6044 + }, + { + "epoch": 0.6376582278481012, + "grad_norm": 0.6143025159835815, + "learning_rate": 0.000443533816686725, + "loss": 1.3629, + "step": 6045 + }, + { + "epoch": 0.6377637130801688, + "grad_norm": 0.6815187335014343, + "learning_rate": 0.0004433046907718571, + "loss": 1.3559, + "step": 6046 + }, + { + "epoch": 0.6378691983122363, + "grad_norm": 0.6153613924980164, + "learning_rate": 0.0004430755992237278, + "loss": 1.3752, + "step": 6047 + }, + { + "epoch": 0.6379746835443038, + "grad_norm": 0.6182730793952942, + "learning_rate": 0.00044284654206800826, + "loss": 1.3867, + "step": 6048 + }, + { + "epoch": 0.6380801687763713, + "grad_norm": 0.6744678020477295, + "learning_rate": 0.00044261751933036525, + "loss": 1.3642, + "step": 6049 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.7004839777946472, + "learning_rate": 0.00044238853103646154, + "loss": 1.3747, + "step": 6050 + }, + { + "epoch": 0.6382911392405063, + "grad_norm": 0.7888221144676208, + "learning_rate": 0.0004421595772119573, + "loss": 1.3806, + "step": 6051 + }, + { + "epoch": 0.6383966244725738, + "grad_norm": 0.6565806865692139, + "learning_rate": 0.0004419306578825073, + "loss": 1.4135, + "step": 6052 + }, + { + "epoch": 0.6385021097046414, + "grad_norm": 1.0115458965301514, + "learning_rate": 0.0004417017730737633, + "loss": 1.3621, + "step": 6053 + }, + { + "epoch": 0.6386075949367088, + "grad_norm": 0.6854075193405151, + "learning_rate": 0.00044147292281137293, + "loss": 1.4044, + "step": 6054 + }, + { + "epoch": 0.6387130801687764, + "grad_norm": 1.02035391330719, + "learning_rate": 0.00044124410712098014, + "loss": 1.3539, + "step": 6055 + }, + { + "epoch": 0.6388185654008439, + "grad_norm": 0.656923234462738, + "learning_rate": 0.0004410153260282246, + "loss": 1.3624, + "step": 6056 + }, + { + "epoch": 0.6389240506329114, + "grad_norm": 1.0290520191192627, + "learning_rate": 0.00044078657955874245, + "loss": 1.3966, + "step": 6057 + }, + { + "epoch": 0.6390295358649789, + "grad_norm": 0.7222535014152527, + "learning_rate": 0.0004405578677381661, + "loss": 1.3996, + "step": 6058 + }, + { + "epoch": 0.6391350210970465, + "grad_norm": 0.860132098197937, + "learning_rate": 0.0004403291905921233, + "loss": 1.3957, + "step": 6059 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.8851647973060608, + "learning_rate": 0.00044010054814623925, + "loss": 1.4105, + "step": 6060 + }, + { + "epoch": 0.6393459915611814, + "grad_norm": 0.8069792985916138, + "learning_rate": 0.00043987194042613393, + "loss": 1.3771, + "step": 6061 + }, + { + "epoch": 0.639451476793249, + "grad_norm": 0.7150846123695374, + "learning_rate": 0.0004396433674574242, + "loss": 1.3651, + "step": 6062 + }, + { + "epoch": 0.6395569620253164, + "grad_norm": 0.6286020278930664, + "learning_rate": 0.00043941482926572277, + "loss": 1.3612, + "step": 6063 + }, + { + "epoch": 0.639662447257384, + "grad_norm": 0.7228160500526428, + "learning_rate": 0.0004391863258766384, + "loss": 1.4363, + "step": 6064 + }, + { + "epoch": 0.6397679324894515, + "grad_norm": 0.6779153943061829, + "learning_rate": 0.00043895785731577606, + "loss": 1.3743, + "step": 6065 + }, + { + "epoch": 0.639873417721519, + "grad_norm": 0.7160859704017639, + "learning_rate": 0.0004387294236087368, + "loss": 1.3545, + "step": 6066 + }, + { + "epoch": 0.6399789029535865, + "grad_norm": 0.6583572626113892, + "learning_rate": 0.00043850102478111764, + "loss": 1.3736, + "step": 6067 + }, + { + "epoch": 0.640084388185654, + "grad_norm": 0.65757817029953, + "learning_rate": 0.00043827266085851203, + "loss": 1.3521, + "step": 6068 + }, + { + "epoch": 0.6401898734177215, + "grad_norm": 0.6774805188179016, + "learning_rate": 0.00043804433186650916, + "loss": 1.4212, + "step": 6069 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.6388922929763794, + "learning_rate": 0.0004378160378306944, + "loss": 1.3723, + "step": 6070 + }, + { + "epoch": 0.6404008438818566, + "grad_norm": 0.7161704301834106, + "learning_rate": 0.0004375877787766495, + "loss": 1.3839, + "step": 6071 + }, + { + "epoch": 0.640506329113924, + "grad_norm": 0.6486334800720215, + "learning_rate": 0.0004373595547299517, + "loss": 1.3799, + "step": 6072 + }, + { + "epoch": 0.6406118143459916, + "grad_norm": 0.646582305431366, + "learning_rate": 0.00043713136571617474, + "loss": 1.408, + "step": 6073 + }, + { + "epoch": 0.6407172995780591, + "grad_norm": 0.6302438974380493, + "learning_rate": 0.00043690321176088843, + "loss": 1.4085, + "step": 6074 + }, + { + "epoch": 0.6408227848101266, + "grad_norm": 0.8179890513420105, + "learning_rate": 0.00043667509288965845, + "loss": 1.3555, + "step": 6075 + }, + { + "epoch": 0.6409282700421941, + "grad_norm": 0.6440534591674805, + "learning_rate": 0.0004364470091280463, + "loss": 1.3511, + "step": 6076 + }, + { + "epoch": 0.6410337552742617, + "grad_norm": 0.8348278403282166, + "learning_rate": 0.0004362189605016107, + "loss": 1.3834, + "step": 6077 + }, + { + "epoch": 0.6411392405063291, + "grad_norm": 0.6382031440734863, + "learning_rate": 0.00043599094703590524, + "loss": 1.3757, + "step": 6078 + }, + { + "epoch": 0.6412447257383966, + "grad_norm": 0.6992142796516418, + "learning_rate": 0.00043576296875647984, + "loss": 1.3999, + "step": 6079 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.7312188744544983, + "learning_rate": 0.00043553502568888095, + "loss": 1.351, + "step": 6080 + }, + { + "epoch": 0.6414556962025316, + "grad_norm": 0.6627947092056274, + "learning_rate": 0.00043530711785865026, + "loss": 1.3674, + "step": 6081 + }, + { + "epoch": 0.6415611814345992, + "grad_norm": 0.6914411783218384, + "learning_rate": 0.00043507924529132637, + "loss": 1.3549, + "step": 6082 + }, + { + "epoch": 0.6416666666666667, + "grad_norm": 0.6569751501083374, + "learning_rate": 0.0004348514080124432, + "loss": 1.4173, + "step": 6083 + }, + { + "epoch": 0.6417721518987342, + "grad_norm": 0.685971200466156, + "learning_rate": 0.0004346236060475314, + "loss": 1.3648, + "step": 6084 + }, + { + "epoch": 0.6418776371308017, + "grad_norm": 0.7084618806838989, + "learning_rate": 0.00043439583942211674, + "loss": 1.3669, + "step": 6085 + }, + { + "epoch": 0.6419831223628693, + "grad_norm": 0.7478229999542236, + "learning_rate": 0.00043416810816172244, + "loss": 1.4024, + "step": 6086 + }, + { + "epoch": 0.6420886075949367, + "grad_norm": 0.6437858939170837, + "learning_rate": 0.0004339404122918664, + "loss": 1.405, + "step": 6087 + }, + { + "epoch": 0.6421940928270042, + "grad_norm": 0.6433672308921814, + "learning_rate": 0.0004337127518380632, + "loss": 1.3703, + "step": 6088 + }, + { + "epoch": 0.6422995780590718, + "grad_norm": 0.650339663028717, + "learning_rate": 0.0004334851268258234, + "loss": 1.3656, + "step": 6089 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.6231374144554138, + "learning_rate": 0.0004332575372806534, + "loss": 1.3845, + "step": 6090 + }, + { + "epoch": 0.6425105485232068, + "grad_norm": 0.6438796520233154, + "learning_rate": 0.00043302998322805564, + "loss": 1.4102, + "step": 6091 + }, + { + "epoch": 0.6426160337552742, + "grad_norm": 0.6253215074539185, + "learning_rate": 0.0004328024646935289, + "loss": 1.3807, + "step": 6092 + }, + { + "epoch": 0.6427215189873418, + "grad_norm": 0.6447206735610962, + "learning_rate": 0.00043257498170256735, + "loss": 1.3851, + "step": 6093 + }, + { + "epoch": 0.6428270042194093, + "grad_norm": 0.7824521660804749, + "learning_rate": 0.0004323475342806622, + "loss": 1.3748, + "step": 6094 + }, + { + "epoch": 0.6429324894514767, + "grad_norm": 0.7375182509422302, + "learning_rate": 0.00043212012245329986, + "loss": 1.3318, + "step": 6095 + }, + { + "epoch": 0.6430379746835443, + "grad_norm": 0.7514808773994446, + "learning_rate": 0.0004318927462459629, + "loss": 1.3672, + "step": 6096 + }, + { + "epoch": 0.6431434599156118, + "grad_norm": 0.6362578272819519, + "learning_rate": 0.0004316654056841299, + "loss": 1.3556, + "step": 6097 + }, + { + "epoch": 0.6432489451476793, + "grad_norm": 0.9394885301589966, + "learning_rate": 0.0004314381007932756, + "loss": 1.4076, + "step": 6098 + }, + { + "epoch": 0.6433544303797468, + "grad_norm": 0.6185207366943359, + "learning_rate": 0.00043121083159887056, + "loss": 1.3523, + "step": 6099 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6784505844116211, + "learning_rate": 0.00043098359812638145, + "loss": 1.3763, + "step": 6100 + }, + { + "epoch": 0.6435654008438818, + "grad_norm": 0.7182687520980835, + "learning_rate": 0.000430756400401271, + "loss": 1.4121, + "step": 6101 + }, + { + "epoch": 0.6436708860759494, + "grad_norm": 0.6742717623710632, + "learning_rate": 0.00043052923844899733, + "loss": 1.3513, + "step": 6102 + }, + { + "epoch": 0.6437763713080169, + "grad_norm": 0.9676007032394409, + "learning_rate": 0.000430302112295016, + "loss": 1.3614, + "step": 6103 + }, + { + "epoch": 0.6438818565400843, + "grad_norm": 0.718325674533844, + "learning_rate": 0.00043007502196477703, + "loss": 1.3847, + "step": 6104 + }, + { + "epoch": 0.6439873417721519, + "grad_norm": 0.7356693744659424, + "learning_rate": 0.00042984796748372716, + "loss": 1.4016, + "step": 6105 + }, + { + "epoch": 0.6440928270042194, + "grad_norm": 0.7665829658508301, + "learning_rate": 0.000429620948877309, + "loss": 1.3833, + "step": 6106 + }, + { + "epoch": 0.6441983122362869, + "grad_norm": 0.6289927363395691, + "learning_rate": 0.000429393966170961, + "loss": 1.418, + "step": 6107 + }, + { + "epoch": 0.6443037974683544, + "grad_norm": 0.6711648106575012, + "learning_rate": 0.00042916701939011787, + "loss": 1.3672, + "step": 6108 + }, + { + "epoch": 0.644409282700422, + "grad_norm": 0.8060999512672424, + "learning_rate": 0.00042894010856020997, + "loss": 1.3945, + "step": 6109 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.6263580918312073, + "learning_rate": 0.00042871323370666383, + "loss": 1.3693, + "step": 6110 + }, + { + "epoch": 0.644620253164557, + "grad_norm": 0.9731548428535461, + "learning_rate": 0.00042848639485490165, + "loss": 1.3811, + "step": 6111 + }, + { + "epoch": 0.6447257383966245, + "grad_norm": 0.6446728706359863, + "learning_rate": 0.0004282595920303425, + "loss": 1.3573, + "step": 6112 + }, + { + "epoch": 0.6448312236286919, + "grad_norm": 0.6634634137153625, + "learning_rate": 0.00042803282525840036, + "loss": 1.4029, + "step": 6113 + }, + { + "epoch": 0.6449367088607595, + "grad_norm": 0.7505616545677185, + "learning_rate": 0.0004278060945644856, + "loss": 1.3716, + "step": 6114 + }, + { + "epoch": 0.645042194092827, + "grad_norm": 0.6567237973213196, + "learning_rate": 0.0004275793999740046, + "loss": 1.3882, + "step": 6115 + }, + { + "epoch": 0.6451476793248945, + "grad_norm": 0.8984032273292542, + "learning_rate": 0.00042735274151235953, + "loss": 1.4103, + "step": 6116 + }, + { + "epoch": 0.645253164556962, + "grad_norm": 0.7199463248252869, + "learning_rate": 0.00042712611920494865, + "loss": 1.3665, + "step": 6117 + }, + { + "epoch": 0.6453586497890296, + "grad_norm": 0.8018209338188171, + "learning_rate": 0.0004268995330771661, + "loss": 1.3986, + "step": 6118 + }, + { + "epoch": 0.645464135021097, + "grad_norm": 0.7210120558738708, + "learning_rate": 0.0004266729831544017, + "loss": 1.3625, + "step": 6119 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.6750608086585999, + "learning_rate": 0.0004264464694620421, + "loss": 1.3398, + "step": 6120 + }, + { + "epoch": 0.6456751054852321, + "grad_norm": 0.7124044895172119, + "learning_rate": 0.00042621999202546897, + "loss": 1.3669, + "step": 6121 + }, + { + "epoch": 0.6457805907172995, + "grad_norm": 0.6174709796905518, + "learning_rate": 0.0004259935508700603, + "loss": 1.3905, + "step": 6122 + }, + { + "epoch": 0.6458860759493671, + "grad_norm": 0.6541422605514526, + "learning_rate": 0.0004257671460211898, + "loss": 1.3727, + "step": 6123 + }, + { + "epoch": 0.6459915611814346, + "grad_norm": 0.7208021283149719, + "learning_rate": 0.00042554077750422736, + "loss": 1.3832, + "step": 6124 + }, + { + "epoch": 0.6460970464135021, + "grad_norm": 0.705852746963501, + "learning_rate": 0.00042531444534453885, + "loss": 1.3776, + "step": 6125 + }, + { + "epoch": 0.6462025316455696, + "grad_norm": 0.6818405985832214, + "learning_rate": 0.0004250881495674855, + "loss": 1.3633, + "step": 6126 + }, + { + "epoch": 0.6463080168776372, + "grad_norm": 0.7112937569618225, + "learning_rate": 0.00042486189019842535, + "loss": 1.3612, + "step": 6127 + }, + { + "epoch": 0.6464135021097046, + "grad_norm": 0.6700394749641418, + "learning_rate": 0.00042463566726271137, + "loss": 1.3559, + "step": 6128 + }, + { + "epoch": 0.6465189873417722, + "grad_norm": 0.6487092971801758, + "learning_rate": 0.0004244094807856936, + "loss": 1.3733, + "step": 6129 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.6668149828910828, + "learning_rate": 0.000424183330792717, + "loss": 1.4183, + "step": 6130 + }, + { + "epoch": 0.6467299578059071, + "grad_norm": 0.6976310014724731, + "learning_rate": 0.0004239572173091229, + "loss": 1.4128, + "step": 6131 + }, + { + "epoch": 0.6468354430379747, + "grad_norm": 0.7063514590263367, + "learning_rate": 0.0004237311403602484, + "loss": 1.3932, + "step": 6132 + }, + { + "epoch": 0.6469409282700422, + "grad_norm": 0.764626681804657, + "learning_rate": 0.0004235050999714265, + "loss": 1.3587, + "step": 6133 + }, + { + "epoch": 0.6470464135021097, + "grad_norm": 0.6789032220840454, + "learning_rate": 0.00042327909616798616, + "loss": 1.3914, + "step": 6134 + }, + { + "epoch": 0.6471518987341772, + "grad_norm": 0.7000819444656372, + "learning_rate": 0.0004230531289752523, + "loss": 1.3459, + "step": 6135 + }, + { + "epoch": 0.6472573839662448, + "grad_norm": 0.7160186767578125, + "learning_rate": 0.00042282719841854567, + "loss": 1.4026, + "step": 6136 + }, + { + "epoch": 0.6473628691983122, + "grad_norm": 0.661849856376648, + "learning_rate": 0.0004226013045231826, + "loss": 1.3839, + "step": 6137 + }, + { + "epoch": 0.6474683544303798, + "grad_norm": 0.7426649332046509, + "learning_rate": 0.00042237544731447616, + "loss": 1.3813, + "step": 6138 + }, + { + "epoch": 0.6475738396624473, + "grad_norm": 0.6482226252555847, + "learning_rate": 0.00042214962681773457, + "loss": 1.3857, + "step": 6139 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.6958522200584412, + "learning_rate": 0.0004219238430582621, + "loss": 1.315, + "step": 6140 + }, + { + "epoch": 0.6477848101265823, + "grad_norm": 0.7619611620903015, + "learning_rate": 0.00042169809606135893, + "loss": 1.3907, + "step": 6141 + }, + { + "epoch": 0.6478902953586498, + "grad_norm": 0.7259811162948608, + "learning_rate": 0.0004214723858523212, + "loss": 1.356, + "step": 6142 + }, + { + "epoch": 0.6479957805907173, + "grad_norm": 0.8001013398170471, + "learning_rate": 0.00042124671245644086, + "loss": 1.362, + "step": 6143 + }, + { + "epoch": 0.6481012658227848, + "grad_norm": 0.7075250148773193, + "learning_rate": 0.0004210210758990056, + "loss": 1.3964, + "step": 6144 + }, + { + "epoch": 0.6482067510548524, + "grad_norm": 0.7188335061073303, + "learning_rate": 0.00042079547620529927, + "loss": 1.4137, + "step": 6145 + }, + { + "epoch": 0.6483122362869198, + "grad_norm": 0.7905784249305725, + "learning_rate": 0.0004205699134006011, + "loss": 1.3793, + "step": 6146 + }, + { + "epoch": 0.6484177215189874, + "grad_norm": 0.6275314688682556, + "learning_rate": 0.0004203443875101871, + "loss": 1.3968, + "step": 6147 + }, + { + "epoch": 0.6485232067510549, + "grad_norm": 0.6549257040023804, + "learning_rate": 0.0004201188985593283, + "loss": 1.3644, + "step": 6148 + }, + { + "epoch": 0.6486286919831223, + "grad_norm": 0.8088507056236267, + "learning_rate": 0.00041989344657329187, + "loss": 1.3975, + "step": 6149 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.6702736020088196, + "learning_rate": 0.0004196680315773408, + "loss": 1.4197, + "step": 6150 + }, + { + "epoch": 0.6488396624472574, + "grad_norm": 0.6489444971084595, + "learning_rate": 0.0004194426535967339, + "loss": 1.3783, + "step": 6151 + }, + { + "epoch": 0.6489451476793249, + "grad_norm": 0.7030426263809204, + "learning_rate": 0.00041921731265672613, + "loss": 1.4073, + "step": 6152 + }, + { + "epoch": 0.6490506329113924, + "grad_norm": 0.6570305228233337, + "learning_rate": 0.0004189920087825678, + "loss": 1.3668, + "step": 6153 + }, + { + "epoch": 0.64915611814346, + "grad_norm": 0.7049002647399902, + "learning_rate": 0.00041876674199950545, + "loss": 1.3669, + "step": 6154 + }, + { + "epoch": 0.6492616033755274, + "grad_norm": 0.6884148716926575, + "learning_rate": 0.0004185415123327813, + "loss": 1.3849, + "step": 6155 + }, + { + "epoch": 0.649367088607595, + "grad_norm": 0.715002179145813, + "learning_rate": 0.00041831631980763324, + "loss": 1.3543, + "step": 6156 + }, + { + "epoch": 0.6494725738396624, + "grad_norm": 0.6863949298858643, + "learning_rate": 0.00041809116444929586, + "loss": 1.3885, + "step": 6157 + }, + { + "epoch": 0.6495780590717299, + "grad_norm": 0.676051139831543, + "learning_rate": 0.00041786604628299846, + "loss": 1.4088, + "step": 6158 + }, + { + "epoch": 0.6496835443037975, + "grad_norm": 0.687071681022644, + "learning_rate": 0.00041764096533396667, + "loss": 1.3563, + "step": 6159 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.6779961585998535, + "learning_rate": 0.00041741592162742214, + "loss": 1.4079, + "step": 6160 + }, + { + "epoch": 0.6498945147679325, + "grad_norm": 0.723853588104248, + "learning_rate": 0.0004171909151885819, + "loss": 1.3673, + "step": 6161 + }, + { + "epoch": 0.65, + "grad_norm": 0.6242474317550659, + "learning_rate": 0.0004169659460426592, + "loss": 1.3624, + "step": 6162 + }, + { + "epoch": 0.6501054852320675, + "grad_norm": 0.7656875252723694, + "learning_rate": 0.00041674101421486294, + "loss": 1.3141, + "step": 6163 + }, + { + "epoch": 0.650210970464135, + "grad_norm": 0.6583379507064819, + "learning_rate": 0.00041651611973039776, + "loss": 1.3337, + "step": 6164 + }, + { + "epoch": 0.6503164556962026, + "grad_norm": 0.7263861894607544, + "learning_rate": 0.0004162912626144642, + "loss": 1.3694, + "step": 6165 + }, + { + "epoch": 0.65042194092827, + "grad_norm": 0.6410985589027405, + "learning_rate": 0.0004160664428922586, + "loss": 1.3664, + "step": 6166 + }, + { + "epoch": 0.6505274261603375, + "grad_norm": 0.6570214033126831, + "learning_rate": 0.00041584166058897324, + "loss": 1.3503, + "step": 6167 + }, + { + "epoch": 0.6506329113924051, + "grad_norm": 0.6637453436851501, + "learning_rate": 0.00041561691572979624, + "loss": 1.3475, + "step": 6168 + }, + { + "epoch": 0.6507383966244725, + "grad_norm": 0.663945198059082, + "learning_rate": 0.00041539220833991124, + "loss": 1.4104, + "step": 6169 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.6713603734970093, + "learning_rate": 0.0004151675384444978, + "loss": 1.3561, + "step": 6170 + }, + { + "epoch": 0.6509493670886076, + "grad_norm": 0.6367960572242737, + "learning_rate": 0.0004149429060687312, + "loss": 1.3772, + "step": 6171 + }, + { + "epoch": 0.6510548523206751, + "grad_norm": 0.6761802434921265, + "learning_rate": 0.00041471831123778284, + "loss": 1.3485, + "step": 6172 + }, + { + "epoch": 0.6511603375527426, + "grad_norm": 0.6908921003341675, + "learning_rate": 0.0004144937539768195, + "loss": 1.396, + "step": 6173 + }, + { + "epoch": 0.6512658227848102, + "grad_norm": 0.6944785118103027, + "learning_rate": 0.00041426923431100396, + "loss": 1.4103, + "step": 6174 + }, + { + "epoch": 0.6513713080168776, + "grad_norm": 0.6492688059806824, + "learning_rate": 0.0004140447522654946, + "loss": 1.3601, + "step": 6175 + }, + { + "epoch": 0.6514767932489451, + "grad_norm": 0.6361261606216431, + "learning_rate": 0.0004138203078654463, + "loss": 1.3933, + "step": 6176 + }, + { + "epoch": 0.6515822784810127, + "grad_norm": 0.7320945858955383, + "learning_rate": 0.0004135959011360088, + "loss": 1.3842, + "step": 6177 + }, + { + "epoch": 0.6516877637130801, + "grad_norm": 0.6440481543540955, + "learning_rate": 0.000413371532102328, + "loss": 1.3787, + "step": 6178 + }, + { + "epoch": 0.6517932489451477, + "grad_norm": 0.8473495244979858, + "learning_rate": 0.0004131472007895457, + "loss": 1.3981, + "step": 6179 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.6437522768974304, + "learning_rate": 0.00041292290722279914, + "loss": 1.3835, + "step": 6180 + }, + { + "epoch": 0.6520042194092827, + "grad_norm": 0.8495707511901855, + "learning_rate": 0.00041269865142722176, + "loss": 1.4044, + "step": 6181 + }, + { + "epoch": 0.6521097046413502, + "grad_norm": 0.640312671661377, + "learning_rate": 0.0004124744334279424, + "loss": 1.3975, + "step": 6182 + }, + { + "epoch": 0.6522151898734178, + "grad_norm": 0.8702165484428406, + "learning_rate": 0.0004122502532500858, + "loss": 1.3801, + "step": 6183 + }, + { + "epoch": 0.6523206751054852, + "grad_norm": 0.6844167113304138, + "learning_rate": 0.0004120261109187724, + "loss": 1.3843, + "step": 6184 + }, + { + "epoch": 0.6524261603375527, + "grad_norm": 0.6323238611221313, + "learning_rate": 0.0004118020064591184, + "loss": 1.3799, + "step": 6185 + }, + { + "epoch": 0.6525316455696203, + "grad_norm": 0.7876150012016296, + "learning_rate": 0.00041157793989623625, + "loss": 1.394, + "step": 6186 + }, + { + "epoch": 0.6526371308016877, + "grad_norm": 0.6766921281814575, + "learning_rate": 0.0004113539112552334, + "loss": 1.3539, + "step": 6187 + }, + { + "epoch": 0.6527426160337553, + "grad_norm": 0.8823161125183105, + "learning_rate": 0.0004111299205612135, + "loss": 1.3767, + "step": 6188 + }, + { + "epoch": 0.6528481012658228, + "grad_norm": 0.6983814239501953, + "learning_rate": 0.00041090596783927583, + "loss": 1.3567, + "step": 6189 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.6929763555526733, + "learning_rate": 0.00041068205311451517, + "loss": 1.4023, + "step": 6190 + }, + { + "epoch": 0.6530590717299578, + "grad_norm": 0.8126384019851685, + "learning_rate": 0.00041045817641202257, + "loss": 1.4011, + "step": 6191 + }, + { + "epoch": 0.6531645569620254, + "grad_norm": 0.6372519731521606, + "learning_rate": 0.00041023433775688435, + "loss": 1.3537, + "step": 6192 + }, + { + "epoch": 0.6532700421940928, + "grad_norm": 0.8366484642028809, + "learning_rate": 0.00041001053717418283, + "loss": 1.3819, + "step": 6193 + }, + { + "epoch": 0.6533755274261603, + "grad_norm": 0.7078725695610046, + "learning_rate": 0.000409786774688996, + "loss": 1.3397, + "step": 6194 + }, + { + "epoch": 0.6534810126582279, + "grad_norm": 0.7128650546073914, + "learning_rate": 0.00040956305032639723, + "loss": 1.3871, + "step": 6195 + }, + { + "epoch": 0.6535864978902953, + "grad_norm": 0.6897216439247131, + "learning_rate": 0.0004093393641114565, + "loss": 1.3612, + "step": 6196 + }, + { + "epoch": 0.6536919831223629, + "grad_norm": 0.6829767823219299, + "learning_rate": 0.00040911571606923867, + "loss": 1.3677, + "step": 6197 + }, + { + "epoch": 0.6537974683544304, + "grad_norm": 0.6278948187828064, + "learning_rate": 0.00040889210622480467, + "loss": 1.3917, + "step": 6198 + }, + { + "epoch": 0.6539029535864979, + "grad_norm": 0.637232780456543, + "learning_rate": 0.0004086685346032111, + "loss": 1.4147, + "step": 6199 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.6186953783035278, + "learning_rate": 0.00040844500122951026, + "loss": 1.3839, + "step": 6200 + }, + { + "epoch": 0.654113924050633, + "grad_norm": 0.6572609543800354, + "learning_rate": 0.0004082215061287502, + "loss": 1.3534, + "step": 6201 + }, + { + "epoch": 0.6542194092827004, + "grad_norm": 0.6311798095703125, + "learning_rate": 0.00040799804932597464, + "loss": 1.3703, + "step": 6202 + }, + { + "epoch": 0.6543248945147679, + "grad_norm": 0.71345454454422, + "learning_rate": 0.00040777463084622304, + "loss": 1.3598, + "step": 6203 + }, + { + "epoch": 0.6544303797468355, + "grad_norm": 0.6520512700080872, + "learning_rate": 0.00040755125071453055, + "loss": 1.3744, + "step": 6204 + }, + { + "epoch": 0.6545358649789029, + "grad_norm": 0.6200043559074402, + "learning_rate": 0.00040732790895592764, + "loss": 1.3774, + "step": 6205 + }, + { + "epoch": 0.6546413502109705, + "grad_norm": 0.668379545211792, + "learning_rate": 0.00040710460559544167, + "loss": 1.3938, + "step": 6206 + }, + { + "epoch": 0.654746835443038, + "grad_norm": 0.6645801067352295, + "learning_rate": 0.0004068813406580944, + "loss": 1.3975, + "step": 6207 + }, + { + "epoch": 0.6548523206751055, + "grad_norm": 0.7614997625350952, + "learning_rate": 0.0004066581141689038, + "loss": 1.3723, + "step": 6208 + }, + { + "epoch": 0.654957805907173, + "grad_norm": 0.6388342380523682, + "learning_rate": 0.00040643492615288367, + "loss": 1.3705, + "step": 6209 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.6374304890632629, + "learning_rate": 0.00040621177663504313, + "loss": 1.3669, + "step": 6210 + }, + { + "epoch": 0.655168776371308, + "grad_norm": 0.8165882229804993, + "learning_rate": 0.0004059886656403874, + "loss": 1.3941, + "step": 6211 + }, + { + "epoch": 0.6552742616033755, + "grad_norm": 0.7163357138633728, + "learning_rate": 0.00040576559319391704, + "loss": 1.3765, + "step": 6212 + }, + { + "epoch": 0.6553797468354431, + "grad_norm": 0.7417230606079102, + "learning_rate": 0.0004055425593206285, + "loss": 1.3897, + "step": 6213 + }, + { + "epoch": 0.6554852320675105, + "grad_norm": 0.6182762384414673, + "learning_rate": 0.0004053195640455137, + "loss": 1.3589, + "step": 6214 + }, + { + "epoch": 0.6555907172995781, + "grad_norm": 0.7008188366889954, + "learning_rate": 0.0004050966073935602, + "loss": 1.3933, + "step": 6215 + }, + { + "epoch": 0.6556962025316456, + "grad_norm": 0.7293989658355713, + "learning_rate": 0.00040487368938975214, + "loss": 1.4083, + "step": 6216 + }, + { + "epoch": 0.6558016877637131, + "grad_norm": 0.6125454306602478, + "learning_rate": 0.00040465081005906805, + "loss": 1.3847, + "step": 6217 + }, + { + "epoch": 0.6559071729957806, + "grad_norm": 0.6899685859680176, + "learning_rate": 0.00040442796942648273, + "loss": 1.4006, + "step": 6218 + }, + { + "epoch": 0.6560126582278482, + "grad_norm": 0.7435320019721985, + "learning_rate": 0.00040420516751696664, + "loss": 1.3575, + "step": 6219 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.6280732750892639, + "learning_rate": 0.00040398240435548583, + "loss": 1.3541, + "step": 6220 + }, + { + "epoch": 0.6562236286919831, + "grad_norm": 0.8678712248802185, + "learning_rate": 0.000403759679967002, + "loss": 1.3799, + "step": 6221 + }, + { + "epoch": 0.6563291139240506, + "grad_norm": 0.6560789346694946, + "learning_rate": 0.00040353699437647257, + "loss": 1.3406, + "step": 6222 + }, + { + "epoch": 0.6564345991561181, + "grad_norm": 0.716238796710968, + "learning_rate": 0.0004033143476088504, + "loss": 1.3701, + "step": 6223 + }, + { + "epoch": 0.6565400843881857, + "grad_norm": 0.6677959561347961, + "learning_rate": 0.00040309173968908413, + "loss": 1.3609, + "step": 6224 + }, + { + "epoch": 0.6566455696202531, + "grad_norm": 0.6734921932220459, + "learning_rate": 0.0004028691706421185, + "loss": 1.3673, + "step": 6225 + }, + { + "epoch": 0.6567510548523207, + "grad_norm": 0.686072826385498, + "learning_rate": 0.00040264664049289336, + "loss": 1.3918, + "step": 6226 + }, + { + "epoch": 0.6568565400843882, + "grad_norm": 0.6374868750572205, + "learning_rate": 0.00040242414926634415, + "loss": 1.3689, + "step": 6227 + }, + { + "epoch": 0.6569620253164556, + "grad_norm": 0.6553919315338135, + "learning_rate": 0.0004022016969874023, + "loss": 1.3607, + "step": 6228 + }, + { + "epoch": 0.6570675105485232, + "grad_norm": 0.6570683717727661, + "learning_rate": 0.00040197928368099445, + "loss": 1.3709, + "step": 6229 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.6091029644012451, + "learning_rate": 0.00040175690937204324, + "loss": 1.3774, + "step": 6230 + }, + { + "epoch": 0.6572784810126582, + "grad_norm": 0.6707137823104858, + "learning_rate": 0.0004015345740854668, + "loss": 1.369, + "step": 6231 + }, + { + "epoch": 0.6573839662447257, + "grad_norm": 0.6593406200408936, + "learning_rate": 0.00040131227784617876, + "loss": 1.3927, + "step": 6232 + }, + { + "epoch": 0.6574894514767933, + "grad_norm": 0.6328575015068054, + "learning_rate": 0.000401090020679089, + "loss": 1.4014, + "step": 6233 + }, + { + "epoch": 0.6575949367088607, + "grad_norm": 0.6295099854469299, + "learning_rate": 0.00040086780260910213, + "loss": 1.4018, + "step": 6234 + }, + { + "epoch": 0.6577004219409283, + "grad_norm": 0.597187876701355, + "learning_rate": 0.000400645623661119, + "loss": 1.3789, + "step": 6235 + }, + { + "epoch": 0.6578059071729958, + "grad_norm": 0.7407859563827515, + "learning_rate": 0.0004004234838600357, + "loss": 1.3568, + "step": 6236 + }, + { + "epoch": 0.6579113924050632, + "grad_norm": 0.6610725522041321, + "learning_rate": 0.00040020138323074427, + "loss": 1.3521, + "step": 6237 + }, + { + "epoch": 0.6580168776371308, + "grad_norm": 0.7315172553062439, + "learning_rate": 0.00039997932179813205, + "loss": 1.3714, + "step": 6238 + }, + { + "epoch": 0.6581223628691983, + "grad_norm": 0.6557596921920776, + "learning_rate": 0.00039975729958708223, + "loss": 1.3517, + "step": 6239 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.6535132527351379, + "learning_rate": 0.00039953531662247343, + "loss": 1.3649, + "step": 6240 + }, + { + "epoch": 0.6583333333333333, + "grad_norm": 0.7001998424530029, + "learning_rate": 0.00039931337292917966, + "loss": 1.3587, + "step": 6241 + }, + { + "epoch": 0.6584388185654009, + "grad_norm": 0.6328591704368591, + "learning_rate": 0.0003990914685320714, + "loss": 1.3772, + "step": 6242 + }, + { + "epoch": 0.6585443037974683, + "grad_norm": 0.7485453486442566, + "learning_rate": 0.00039886960345601394, + "loss": 1.4194, + "step": 6243 + }, + { + "epoch": 0.6586497890295359, + "grad_norm": 0.6277223229408264, + "learning_rate": 0.00039864777772586826, + "loss": 1.3783, + "step": 6244 + }, + { + "epoch": 0.6587552742616034, + "grad_norm": 0.8066046833992004, + "learning_rate": 0.00039842599136649117, + "loss": 1.3714, + "step": 6245 + }, + { + "epoch": 0.6588607594936708, + "grad_norm": 0.6705287098884583, + "learning_rate": 0.00039820424440273474, + "loss": 1.3502, + "step": 6246 + }, + { + "epoch": 0.6589662447257384, + "grad_norm": 0.7306930422782898, + "learning_rate": 0.000397982536859447, + "loss": 1.378, + "step": 6247 + }, + { + "epoch": 0.6590717299578059, + "grad_norm": 0.6948797106742859, + "learning_rate": 0.00039776086876147133, + "loss": 1.3627, + "step": 6248 + }, + { + "epoch": 0.6591772151898734, + "grad_norm": 0.6827927231788635, + "learning_rate": 0.0003975392401336468, + "loss": 1.4076, + "step": 6249 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.889208197593689, + "learning_rate": 0.0003973176510008075, + "loss": 1.4142, + "step": 6250 + }, + { + "epoch": 0.6593881856540085, + "grad_norm": 0.9208540916442871, + "learning_rate": 0.00039709610138778445, + "loss": 1.361, + "step": 6251 + }, + { + "epoch": 0.6594936708860759, + "grad_norm": 0.8250530958175659, + "learning_rate": 0.0003968745913194029, + "loss": 1.3848, + "step": 6252 + }, + { + "epoch": 0.6595991561181435, + "grad_norm": 0.9226529598236084, + "learning_rate": 0.0003966531208204842, + "loss": 1.3565, + "step": 6253 + }, + { + "epoch": 0.659704641350211, + "grad_norm": 0.7057079076766968, + "learning_rate": 0.0003964316899158454, + "loss": 1.3746, + "step": 6254 + }, + { + "epoch": 0.6598101265822784, + "grad_norm": 0.8530165553092957, + "learning_rate": 0.00039621029863029874, + "loss": 1.3916, + "step": 6255 + }, + { + "epoch": 0.659915611814346, + "grad_norm": 0.8027042746543884, + "learning_rate": 0.00039598894698865216, + "loss": 1.3698, + "step": 6256 + }, + { + "epoch": 0.6600210970464135, + "grad_norm": 0.7328317165374756, + "learning_rate": 0.00039576763501570944, + "loss": 1.3384, + "step": 6257 + }, + { + "epoch": 0.660126582278481, + "grad_norm": 0.7407640814781189, + "learning_rate": 0.0003955463627362694, + "loss": 1.3958, + "step": 6258 + }, + { + "epoch": 0.6602320675105485, + "grad_norm": 0.6994622349739075, + "learning_rate": 0.00039532513017512694, + "loss": 1.3794, + "step": 6259 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.733176052570343, + "learning_rate": 0.00039510393735707233, + "loss": 1.3896, + "step": 6260 + }, + { + "epoch": 0.6604430379746835, + "grad_norm": 0.6625093817710876, + "learning_rate": 0.00039488278430689123, + "loss": 1.3892, + "step": 6261 + }, + { + "epoch": 0.6605485232067511, + "grad_norm": 0.6963505148887634, + "learning_rate": 0.0003946616710493649, + "loss": 1.3937, + "step": 6262 + }, + { + "epoch": 0.6606540084388186, + "grad_norm": 0.6615694761276245, + "learning_rate": 0.0003944405976092702, + "loss": 1.3375, + "step": 6263 + }, + { + "epoch": 0.660759493670886, + "grad_norm": 0.6429949402809143, + "learning_rate": 0.0003942195640113795, + "loss": 1.4274, + "step": 6264 + }, + { + "epoch": 0.6608649789029536, + "grad_norm": 0.680691659450531, + "learning_rate": 0.00039399857028046066, + "loss": 1.3798, + "step": 6265 + }, + { + "epoch": 0.6609704641350211, + "grad_norm": 0.6370673179626465, + "learning_rate": 0.0003937776164412773, + "loss": 1.3787, + "step": 6266 + }, + { + "epoch": 0.6610759493670886, + "grad_norm": 0.6728444695472717, + "learning_rate": 0.00039355670251858805, + "loss": 1.4068, + "step": 6267 + }, + { + "epoch": 0.6611814345991561, + "grad_norm": 0.683211624622345, + "learning_rate": 0.00039333582853714793, + "loss": 1.3643, + "step": 6268 + }, + { + "epoch": 0.6612869198312237, + "grad_norm": 0.6322157382965088, + "learning_rate": 0.00039311499452170665, + "loss": 1.4057, + "step": 6269 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.7500082850456238, + "learning_rate": 0.00039289420049700986, + "loss": 1.3515, + "step": 6270 + }, + { + "epoch": 0.6614978902953587, + "grad_norm": 0.6250439286231995, + "learning_rate": 0.0003926734464877986, + "loss": 1.3631, + "step": 6271 + }, + { + "epoch": 0.6616033755274262, + "grad_norm": 0.6359075903892517, + "learning_rate": 0.0003924527325188095, + "loss": 1.3845, + "step": 6272 + }, + { + "epoch": 0.6617088607594936, + "grad_norm": 0.7675477862358093, + "learning_rate": 0.00039223205861477455, + "loss": 1.4222, + "step": 6273 + }, + { + "epoch": 0.6618143459915612, + "grad_norm": 0.6743262410163879, + "learning_rate": 0.00039201142480042145, + "loss": 1.3689, + "step": 6274 + }, + { + "epoch": 0.6619198312236287, + "grad_norm": 0.6186402440071106, + "learning_rate": 0.0003917908311004732, + "loss": 1.3775, + "step": 6275 + }, + { + "epoch": 0.6620253164556962, + "grad_norm": 0.6492633819580078, + "learning_rate": 0.0003915702775396483, + "loss": 1.3777, + "step": 6276 + }, + { + "epoch": 0.6621308016877637, + "grad_norm": 0.6557908654212952, + "learning_rate": 0.0003913497641426614, + "loss": 1.3562, + "step": 6277 + }, + { + "epoch": 0.6622362869198313, + "grad_norm": 0.6828864812850952, + "learning_rate": 0.00039112929093422185, + "loss": 1.3903, + "step": 6278 + }, + { + "epoch": 0.6623417721518987, + "grad_norm": 0.6950526833534241, + "learning_rate": 0.0003909088579390347, + "loss": 1.3739, + "step": 6279 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.6634933948516846, + "learning_rate": 0.0003906884651818006, + "loss": 1.3764, + "step": 6280 + }, + { + "epoch": 0.6625527426160338, + "grad_norm": 0.652722179889679, + "learning_rate": 0.0003904681126872157, + "loss": 1.3249, + "step": 6281 + }, + { + "epoch": 0.6626582278481012, + "grad_norm": 0.6674196124076843, + "learning_rate": 0.00039024780047997157, + "loss": 1.3639, + "step": 6282 + }, + { + "epoch": 0.6627637130801688, + "grad_norm": 0.646295964717865, + "learning_rate": 0.00039002752858475527, + "loss": 1.3491, + "step": 6283 + }, + { + "epoch": 0.6628691983122363, + "grad_norm": 0.6464141011238098, + "learning_rate": 0.00038980729702624896, + "loss": 1.4111, + "step": 6284 + }, + { + "epoch": 0.6629746835443038, + "grad_norm": 0.6707195043563843, + "learning_rate": 0.00038958710582913153, + "loss": 1.3871, + "step": 6285 + }, + { + "epoch": 0.6630801687763713, + "grad_norm": 0.6551934480667114, + "learning_rate": 0.0003893669550180761, + "loss": 1.377, + "step": 6286 + }, + { + "epoch": 0.6631856540084389, + "grad_norm": 0.7021576762199402, + "learning_rate": 0.00038914684461775154, + "loss": 1.3929, + "step": 6287 + }, + { + "epoch": 0.6632911392405063, + "grad_norm": 0.6325610280036926, + "learning_rate": 0.0003889267746528225, + "loss": 1.3945, + "step": 6288 + }, + { + "epoch": 0.6633966244725739, + "grad_norm": 0.7573114037513733, + "learning_rate": 0.00038870674514794877, + "loss": 1.3872, + "step": 6289 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.7835315465927124, + "learning_rate": 0.00038848675612778577, + "loss": 1.355, + "step": 6290 + }, + { + "epoch": 0.6636075949367088, + "grad_norm": 0.7401492595672607, + "learning_rate": 0.0003882668076169846, + "loss": 1.3368, + "step": 6291 + }, + { + "epoch": 0.6637130801687764, + "grad_norm": 0.8045824766159058, + "learning_rate": 0.0003880468996401912, + "loss": 1.3915, + "step": 6292 + }, + { + "epoch": 0.6638185654008438, + "grad_norm": 0.6892832517623901, + "learning_rate": 0.0003878270322220474, + "loss": 1.3767, + "step": 6293 + }, + { + "epoch": 0.6639240506329114, + "grad_norm": 0.653550386428833, + "learning_rate": 0.00038760720538719086, + "loss": 1.3843, + "step": 6294 + }, + { + "epoch": 0.6640295358649789, + "grad_norm": 0.7977676391601562, + "learning_rate": 0.0003873874191602539, + "loss": 1.3665, + "step": 6295 + }, + { + "epoch": 0.6641350210970464, + "grad_norm": 0.6556555032730103, + "learning_rate": 0.00038716767356586487, + "loss": 1.3699, + "step": 6296 + }, + { + "epoch": 0.6642405063291139, + "grad_norm": 0.8091468811035156, + "learning_rate": 0.00038694796862864724, + "loss": 1.3681, + "step": 6297 + }, + { + "epoch": 0.6643459915611815, + "grad_norm": 0.6711048483848572, + "learning_rate": 0.00038672830437322007, + "loss": 1.3874, + "step": 6298 + }, + { + "epoch": 0.6644514767932489, + "grad_norm": 0.7361080646514893, + "learning_rate": 0.0003865086808241979, + "loss": 1.3474, + "step": 6299 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.6325992941856384, + "learning_rate": 0.00038628909800619046, + "loss": 1.3453, + "step": 6300 + }, + { + "epoch": 0.664662447257384, + "grad_norm": 0.6878641843795776, + "learning_rate": 0.00038606955594380326, + "loss": 1.3755, + "step": 6301 + }, + { + "epoch": 0.6647679324894514, + "grad_norm": 0.7197004556655884, + "learning_rate": 0.0003858500546616368, + "loss": 1.3549, + "step": 6302 + }, + { + "epoch": 0.664873417721519, + "grad_norm": 0.6622357368469238, + "learning_rate": 0.0003856305941842878, + "loss": 1.3463, + "step": 6303 + }, + { + "epoch": 0.6649789029535865, + "grad_norm": 0.7895619869232178, + "learning_rate": 0.0003854111745363476, + "loss": 1.3649, + "step": 6304 + }, + { + "epoch": 0.665084388185654, + "grad_norm": 0.6623203158378601, + "learning_rate": 0.00038519179574240324, + "loss": 1.3685, + "step": 6305 + }, + { + "epoch": 0.6651898734177215, + "grad_norm": 0.7637482285499573, + "learning_rate": 0.0003849724578270374, + "loss": 1.358, + "step": 6306 + }, + { + "epoch": 0.6652953586497891, + "grad_norm": 0.6737117171287537, + "learning_rate": 0.0003847531608148277, + "loss": 1.4052, + "step": 6307 + }, + { + "epoch": 0.6654008438818565, + "grad_norm": 0.7349868416786194, + "learning_rate": 0.0003845339047303477, + "loss": 1.376, + "step": 6308 + }, + { + "epoch": 0.665506329113924, + "grad_norm": 0.6721859574317932, + "learning_rate": 0.0003843146895981661, + "loss": 1.3639, + "step": 6309 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.6478467583656311, + "learning_rate": 0.0003840955154428467, + "loss": 1.3951, + "step": 6310 + }, + { + "epoch": 0.665717299578059, + "grad_norm": 0.6767896413803101, + "learning_rate": 0.0003838763822889495, + "loss": 1.3877, + "step": 6311 + }, + { + "epoch": 0.6658227848101266, + "grad_norm": 0.6300890445709229, + "learning_rate": 0.0003836572901610295, + "loss": 1.3294, + "step": 6312 + }, + { + "epoch": 0.6659282700421941, + "grad_norm": 0.6925090551376343, + "learning_rate": 0.0003834382390836368, + "loss": 1.3749, + "step": 6313 + }, + { + "epoch": 0.6660337552742616, + "grad_norm": 0.7033786177635193, + "learning_rate": 0.00038321922908131736, + "loss": 1.3496, + "step": 6314 + }, + { + "epoch": 0.6661392405063291, + "grad_norm": 0.7409677505493164, + "learning_rate": 0.0003830002601786121, + "loss": 1.3532, + "step": 6315 + }, + { + "epoch": 0.6662447257383967, + "grad_norm": 0.6794464588165283, + "learning_rate": 0.0003827813324000578, + "loss": 1.3618, + "step": 6316 + }, + { + "epoch": 0.6663502109704641, + "grad_norm": 0.6716349720954895, + "learning_rate": 0.0003825624457701863, + "loss": 1.3954, + "step": 6317 + }, + { + "epoch": 0.6664556962025316, + "grad_norm": 0.7351154088973999, + "learning_rate": 0.00038234360031352485, + "loss": 1.3731, + "step": 6318 + }, + { + "epoch": 0.6665611814345992, + "grad_norm": 0.7268515229225159, + "learning_rate": 0.00038212479605459617, + "loss": 1.3814, + "step": 6319 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.7064332962036133, + "learning_rate": 0.00038190603301791864, + "loss": 1.3426, + "step": 6320 + }, + { + "epoch": 0.6667721518987342, + "grad_norm": 0.7358123660087585, + "learning_rate": 0.0003816873112280056, + "loss": 1.3887, + "step": 6321 + }, + { + "epoch": 0.6668776371308017, + "grad_norm": 0.642743706703186, + "learning_rate": 0.00038146863070936607, + "loss": 1.3735, + "step": 6322 + }, + { + "epoch": 0.6669831223628692, + "grad_norm": 0.7372875213623047, + "learning_rate": 0.0003812499914865039, + "loss": 1.3563, + "step": 6323 + }, + { + "epoch": 0.6670886075949367, + "grad_norm": 0.6810083985328674, + "learning_rate": 0.00038103139358391914, + "loss": 1.4085, + "step": 6324 + }, + { + "epoch": 0.6671940928270043, + "grad_norm": 0.8273791670799255, + "learning_rate": 0.0003808128370261065, + "loss": 1.3779, + "step": 6325 + }, + { + "epoch": 0.6672995780590717, + "grad_norm": 0.6908831000328064, + "learning_rate": 0.00038059432183755633, + "loss": 1.3527, + "step": 6326 + }, + { + "epoch": 0.6674050632911392, + "grad_norm": 0.7247012257575989, + "learning_rate": 0.0003803758480427544, + "loss": 1.3847, + "step": 6327 + }, + { + "epoch": 0.6675105485232068, + "grad_norm": 0.6181751489639282, + "learning_rate": 0.0003801574156661817, + "loss": 1.3198, + "step": 6328 + }, + { + "epoch": 0.6676160337552742, + "grad_norm": 0.7422406673431396, + "learning_rate": 0.000379939024732315, + "loss": 1.3761, + "step": 6329 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.8012036085128784, + "learning_rate": 0.0003797206752656258, + "loss": 1.4033, + "step": 6330 + }, + { + "epoch": 0.6678270042194093, + "grad_norm": 0.7548378109931946, + "learning_rate": 0.0003795023672905814, + "loss": 1.3271, + "step": 6331 + }, + { + "epoch": 0.6679324894514768, + "grad_norm": 0.6976670622825623, + "learning_rate": 0.00037928410083164416, + "loss": 1.397, + "step": 6332 + }, + { + "epoch": 0.6680379746835443, + "grad_norm": 0.6686038374900818, + "learning_rate": 0.0003790658759132719, + "loss": 1.3818, + "step": 6333 + }, + { + "epoch": 0.6681434599156119, + "grad_norm": 0.6731106638908386, + "learning_rate": 0.0003788476925599181, + "loss": 1.3606, + "step": 6334 + }, + { + "epoch": 0.6682489451476793, + "grad_norm": 0.6705660223960876, + "learning_rate": 0.00037862955079603086, + "loss": 1.344, + "step": 6335 + }, + { + "epoch": 0.6683544303797468, + "grad_norm": 0.6463746428489685, + "learning_rate": 0.00037841145064605416, + "loss": 1.3944, + "step": 6336 + }, + { + "epoch": 0.6684599156118144, + "grad_norm": 0.6467850804328918, + "learning_rate": 0.00037819339213442744, + "loss": 1.3612, + "step": 6337 + }, + { + "epoch": 0.6685654008438818, + "grad_norm": 0.792236864566803, + "learning_rate": 0.0003779753752855853, + "loss": 1.3583, + "step": 6338 + }, + { + "epoch": 0.6686708860759494, + "grad_norm": 0.6003962159156799, + "learning_rate": 0.0003777574001239573, + "loss": 1.3862, + "step": 6339 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6559120416641235, + "learning_rate": 0.0003775394666739688, + "loss": 1.3524, + "step": 6340 + }, + { + "epoch": 0.6688818565400844, + "grad_norm": 0.8439293503761292, + "learning_rate": 0.0003773215749600404, + "loss": 1.3704, + "step": 6341 + }, + { + "epoch": 0.6689873417721519, + "grad_norm": 0.7031146883964539, + "learning_rate": 0.0003771037250065878, + "loss": 1.4115, + "step": 6342 + }, + { + "epoch": 0.6690928270042195, + "grad_norm": 0.7777398824691772, + "learning_rate": 0.0003768859168380223, + "loss": 1.3888, + "step": 6343 + }, + { + "epoch": 0.6691983122362869, + "grad_norm": 0.6707682013511658, + "learning_rate": 0.0003766681504787503, + "loss": 1.3636, + "step": 6344 + }, + { + "epoch": 0.6693037974683544, + "grad_norm": 0.7439103722572327, + "learning_rate": 0.0003764504259531734, + "loss": 1.3873, + "step": 6345 + }, + { + "epoch": 0.669409282700422, + "grad_norm": 0.6794284582138062, + "learning_rate": 0.0003762327432856892, + "loss": 1.3768, + "step": 6346 + }, + { + "epoch": 0.6695147679324894, + "grad_norm": 0.6824334263801575, + "learning_rate": 0.00037601510250068984, + "loss": 1.3565, + "step": 6347 + }, + { + "epoch": 0.669620253164557, + "grad_norm": 0.7299103140830994, + "learning_rate": 0.0003757975036225632, + "loss": 1.3565, + "step": 6348 + }, + { + "epoch": 0.6697257383966245, + "grad_norm": 0.6863168478012085, + "learning_rate": 0.00037557994667569217, + "loss": 1.3548, + "step": 6349 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.7446659803390503, + "learning_rate": 0.00037536243168445507, + "loss": 1.3801, + "step": 6350 + }, + { + "epoch": 0.6699367088607595, + "grad_norm": 0.6595044732093811, + "learning_rate": 0.0003751449586732257, + "loss": 1.3761, + "step": 6351 + }, + { + "epoch": 0.6700421940928271, + "grad_norm": 0.7181967496871948, + "learning_rate": 0.0003749275276663729, + "loss": 1.3675, + "step": 6352 + }, + { + "epoch": 0.6701476793248945, + "grad_norm": 0.6681446433067322, + "learning_rate": 0.0003747101386882609, + "loss": 1.3582, + "step": 6353 + }, + { + "epoch": 0.670253164556962, + "grad_norm": 0.7158746123313904, + "learning_rate": 0.0003744927917632489, + "loss": 1.4046, + "step": 6354 + }, + { + "epoch": 0.6703586497890295, + "grad_norm": 0.629170835018158, + "learning_rate": 0.00037427548691569237, + "loss": 1.3252, + "step": 6355 + }, + { + "epoch": 0.670464135021097, + "grad_norm": 0.6395947933197021, + "learning_rate": 0.000374058224169941, + "loss": 1.3568, + "step": 6356 + }, + { + "epoch": 0.6705696202531646, + "grad_norm": 0.6460703611373901, + "learning_rate": 0.00037384100355034033, + "loss": 1.3757, + "step": 6357 + }, + { + "epoch": 0.670675105485232, + "grad_norm": 0.7308785915374756, + "learning_rate": 0.0003736238250812308, + "loss": 1.4043, + "step": 6358 + }, + { + "epoch": 0.6707805907172996, + "grad_norm": 0.6490916609764099, + "learning_rate": 0.0003734066887869485, + "loss": 1.3661, + "step": 6359 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.8356994390487671, + "learning_rate": 0.0003731895946918246, + "loss": 1.3859, + "step": 6360 + }, + { + "epoch": 0.6709915611814345, + "grad_norm": 0.7046273946762085, + "learning_rate": 0.0003729725428201856, + "loss": 1.3546, + "step": 6361 + }, + { + "epoch": 0.6710970464135021, + "grad_norm": 0.6329782009124756, + "learning_rate": 0.00037275553319635285, + "loss": 1.3673, + "step": 6362 + }, + { + "epoch": 0.6712025316455696, + "grad_norm": 0.7192239165306091, + "learning_rate": 0.000372538565844644, + "loss": 1.385, + "step": 6363 + }, + { + "epoch": 0.6713080168776371, + "grad_norm": 0.6834728717803955, + "learning_rate": 0.00037232164078937106, + "loss": 1.3493, + "step": 6364 + }, + { + "epoch": 0.6714135021097046, + "grad_norm": 0.7075076103210449, + "learning_rate": 0.00037210475805484156, + "loss": 1.3335, + "step": 6365 + }, + { + "epoch": 0.6715189873417722, + "grad_norm": 0.6519190669059753, + "learning_rate": 0.00037188791766535825, + "loss": 1.3378, + "step": 6366 + }, + { + "epoch": 0.6716244725738396, + "grad_norm": 0.6305875182151794, + "learning_rate": 0.0003716711196452192, + "loss": 1.3012, + "step": 6367 + }, + { + "epoch": 0.6717299578059072, + "grad_norm": 0.6548433899879456, + "learning_rate": 0.0003714543640187177, + "loss": 1.3553, + "step": 6368 + }, + { + "epoch": 0.6718354430379747, + "grad_norm": 0.6785622239112854, + "learning_rate": 0.0003712376508101424, + "loss": 1.3798, + "step": 6369 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.6263224482536316, + "learning_rate": 0.0003710209800437769, + "loss": 1.3697, + "step": 6370 + }, + { + "epoch": 0.6720464135021097, + "grad_norm": 0.7240992188453674, + "learning_rate": 0.00037080435174390014, + "loss": 1.3336, + "step": 6371 + }, + { + "epoch": 0.6721518987341772, + "grad_norm": 0.6602389812469482, + "learning_rate": 0.00037058776593478675, + "loss": 1.3839, + "step": 6372 + }, + { + "epoch": 0.6722573839662447, + "grad_norm": 0.7683859467506409, + "learning_rate": 0.00037037122264070625, + "loss": 1.3827, + "step": 6373 + }, + { + "epoch": 0.6723628691983122, + "grad_norm": 0.6888343095779419, + "learning_rate": 0.0003701547218859232, + "loss": 1.3441, + "step": 6374 + }, + { + "epoch": 0.6724683544303798, + "grad_norm": 0.8153131604194641, + "learning_rate": 0.0003699382636946977, + "loss": 1.3323, + "step": 6375 + }, + { + "epoch": 0.6725738396624472, + "grad_norm": 0.7495926022529602, + "learning_rate": 0.0003697218480912848, + "loss": 1.343, + "step": 6376 + }, + { + "epoch": 0.6726793248945148, + "grad_norm": 0.6651413440704346, + "learning_rate": 0.0003695054750999352, + "loss": 1.3771, + "step": 6377 + }, + { + "epoch": 0.6727848101265823, + "grad_norm": 0.7435786128044128, + "learning_rate": 0.0003692891447448943, + "loss": 1.3619, + "step": 6378 + }, + { + "epoch": 0.6728902953586497, + "grad_norm": 0.6578619480133057, + "learning_rate": 0.0003690728570504032, + "loss": 1.3713, + "step": 6379 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.6474781036376953, + "learning_rate": 0.00036885661204069767, + "loss": 1.3452, + "step": 6380 + }, + { + "epoch": 0.6731012658227848, + "grad_norm": 1.0644625425338745, + "learning_rate": 0.00036864040974000955, + "loss": 1.3642, + "step": 6381 + }, + { + "epoch": 0.6732067510548523, + "grad_norm": 0.6456520557403564, + "learning_rate": 0.0003684242501725652, + "loss": 1.3494, + "step": 6382 + }, + { + "epoch": 0.6733122362869198, + "grad_norm": 0.876003623008728, + "learning_rate": 0.00036820813336258624, + "loss": 1.4256, + "step": 6383 + }, + { + "epoch": 0.6734177215189874, + "grad_norm": 0.6876536011695862, + "learning_rate": 0.0003679920593342898, + "loss": 1.3679, + "step": 6384 + }, + { + "epoch": 0.6735232067510548, + "grad_norm": 0.7266507148742676, + "learning_rate": 0.0003677760281118879, + "loss": 1.4058, + "step": 6385 + }, + { + "epoch": 0.6736286919831224, + "grad_norm": 0.7882944941520691, + "learning_rate": 0.0003675600397195881, + "loss": 1.4009, + "step": 6386 + }, + { + "epoch": 0.6737341772151899, + "grad_norm": 0.6393299698829651, + "learning_rate": 0.0003673440941815928, + "loss": 1.356, + "step": 6387 + }, + { + "epoch": 0.6738396624472573, + "grad_norm": 0.8576722741127014, + "learning_rate": 0.00036712819152209954, + "loss": 1.3718, + "step": 6388 + }, + { + "epoch": 0.6739451476793249, + "grad_norm": 0.6842714548110962, + "learning_rate": 0.00036691233176530197, + "loss": 1.3767, + "step": 6389 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.6201503276824951, + "learning_rate": 0.0003666965149353878, + "loss": 1.3145, + "step": 6390 + }, + { + "epoch": 0.6741561181434599, + "grad_norm": 0.8862195014953613, + "learning_rate": 0.00036648074105654043, + "loss": 1.3791, + "step": 6391 + }, + { + "epoch": 0.6742616033755274, + "grad_norm": 0.6194934248924255, + "learning_rate": 0.0003662650101529385, + "loss": 1.4031, + "step": 6392 + }, + { + "epoch": 0.674367088607595, + "grad_norm": 0.8653135299682617, + "learning_rate": 0.00036604932224875564, + "loss": 1.3612, + "step": 6393 + }, + { + "epoch": 0.6744725738396624, + "grad_norm": 0.6505605578422546, + "learning_rate": 0.0003658336773681607, + "loss": 1.3666, + "step": 6394 + }, + { + "epoch": 0.67457805907173, + "grad_norm": 0.9453532099723816, + "learning_rate": 0.0003656180755353179, + "loss": 1.3585, + "step": 6395 + }, + { + "epoch": 0.6746835443037975, + "grad_norm": 0.6833515167236328, + "learning_rate": 0.0003654025167743864, + "loss": 1.3587, + "step": 6396 + }, + { + "epoch": 0.674789029535865, + "grad_norm": 0.6792298555374146, + "learning_rate": 0.0003651870011095204, + "loss": 1.4141, + "step": 6397 + }, + { + "epoch": 0.6748945147679325, + "grad_norm": 0.8583233952522278, + "learning_rate": 0.0003649715285648701, + "loss": 1.3472, + "step": 6398 + }, + { + "epoch": 0.675, + "grad_norm": 0.6415086388587952, + "learning_rate": 0.00036475609916457996, + "loss": 1.3662, + "step": 6399 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.9130015969276428, + "learning_rate": 0.0003645407129327898, + "loss": 1.3888, + "step": 6400 + }, + { + "epoch": 0.675210970464135, + "grad_norm": 0.6882805228233337, + "learning_rate": 0.0003643253698936349, + "loss": 1.342, + "step": 6401 + }, + { + "epoch": 0.6753164556962026, + "grad_norm": 0.6749577522277832, + "learning_rate": 0.00036411007007124547, + "loss": 1.367, + "step": 6402 + }, + { + "epoch": 0.67542194092827, + "grad_norm": 0.674385130405426, + "learning_rate": 0.0003638948134897469, + "loss": 1.4037, + "step": 6403 + }, + { + "epoch": 0.6755274261603376, + "grad_norm": 0.6591693758964539, + "learning_rate": 0.0003636796001732597, + "loss": 1.4014, + "step": 6404 + }, + { + "epoch": 0.6756329113924051, + "grad_norm": 0.7031749486923218, + "learning_rate": 0.00036346443014589983, + "loss": 1.3739, + "step": 6405 + }, + { + "epoch": 0.6757383966244725, + "grad_norm": 0.6325563192367554, + "learning_rate": 0.00036324930343177754, + "loss": 1.379, + "step": 6406 + }, + { + "epoch": 0.6758438818565401, + "grad_norm": 0.6518779397010803, + "learning_rate": 0.0003630342200549997, + "loss": 1.3593, + "step": 6407 + }, + { + "epoch": 0.6759493670886076, + "grad_norm": 0.6736438870429993, + "learning_rate": 0.000362819180039667, + "loss": 1.354, + "step": 6408 + }, + { + "epoch": 0.6760548523206751, + "grad_norm": 0.6409212350845337, + "learning_rate": 0.000362604183409876, + "loss": 1.3564, + "step": 6409 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.7912089824676514, + "learning_rate": 0.00036238923018971783, + "loss": 1.3468, + "step": 6410 + }, + { + "epoch": 0.6762658227848102, + "grad_norm": 0.64628005027771, + "learning_rate": 0.00036217432040327926, + "loss": 1.3741, + "step": 6411 + }, + { + "epoch": 0.6763713080168776, + "grad_norm": 0.7816831469535828, + "learning_rate": 0.000361959454074642, + "loss": 1.3434, + "step": 6412 + }, + { + "epoch": 0.6764767932489452, + "grad_norm": 0.639437198638916, + "learning_rate": 0.00036174463122788273, + "loss": 1.3411, + "step": 6413 + }, + { + "epoch": 0.6765822784810127, + "grad_norm": 0.6391339302062988, + "learning_rate": 0.00036152985188707344, + "loss": 1.3428, + "step": 6414 + }, + { + "epoch": 0.6766877637130801, + "grad_norm": 0.9263442158699036, + "learning_rate": 0.0003613151160762815, + "loss": 1.3754, + "step": 6415 + }, + { + "epoch": 0.6767932489451477, + "grad_norm": 0.6309173107147217, + "learning_rate": 0.00036110042381956895, + "loss": 1.3365, + "step": 6416 + }, + { + "epoch": 0.6768987341772152, + "grad_norm": 0.9958198070526123, + "learning_rate": 0.00036088577514099325, + "loss": 1.3779, + "step": 6417 + }, + { + "epoch": 0.6770042194092827, + "grad_norm": 0.6600291132926941, + "learning_rate": 0.0003606711700646067, + "loss": 1.3777, + "step": 6418 + }, + { + "epoch": 0.6771097046413502, + "grad_norm": 0.7236067056655884, + "learning_rate": 0.00036045660861445684, + "loss": 1.3305, + "step": 6419 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.6598340272903442, + "learning_rate": 0.0003602420908145865, + "loss": 1.3809, + "step": 6420 + }, + { + "epoch": 0.6773206751054852, + "grad_norm": 0.644555926322937, + "learning_rate": 0.00036002761668903335, + "loss": 1.3736, + "step": 6421 + }, + { + "epoch": 0.6774261603375528, + "grad_norm": 0.6664373874664307, + "learning_rate": 0.0003598131862618304, + "loss": 1.3501, + "step": 6422 + }, + { + "epoch": 0.6775316455696202, + "grad_norm": 0.722557544708252, + "learning_rate": 0.0003595987995570052, + "loss": 1.3452, + "step": 6423 + }, + { + "epoch": 0.6776371308016877, + "grad_norm": 0.6495956182479858, + "learning_rate": 0.0003593844565985815, + "loss": 1.3972, + "step": 6424 + }, + { + "epoch": 0.6777426160337553, + "grad_norm": 0.7362712025642395, + "learning_rate": 0.00035917015741057727, + "loss": 1.3947, + "step": 6425 + }, + { + "epoch": 0.6778481012658227, + "grad_norm": 0.6606482863426208, + "learning_rate": 0.0003589559020170058, + "loss": 1.3595, + "step": 6426 + }, + { + "epoch": 0.6779535864978903, + "grad_norm": 0.6321194767951965, + "learning_rate": 0.00035874169044187537, + "loss": 1.3814, + "step": 6427 + }, + { + "epoch": 0.6780590717299578, + "grad_norm": 0.6772470474243164, + "learning_rate": 0.00035852752270918955, + "loss": 1.365, + "step": 6428 + }, + { + "epoch": 0.6781645569620253, + "grad_norm": 0.6616038680076599, + "learning_rate": 0.0003583133988429468, + "loss": 1.3293, + "step": 6429 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.6488134860992432, + "learning_rate": 0.00035809931886714093, + "loss": 1.3829, + "step": 6430 + }, + { + "epoch": 0.6783755274261604, + "grad_norm": 0.8074064254760742, + "learning_rate": 0.00035788528280576053, + "loss": 1.3479, + "step": 6431 + }, + { + "epoch": 0.6784810126582278, + "grad_norm": 0.7082169055938721, + "learning_rate": 0.0003576712906827892, + "loss": 1.3599, + "step": 6432 + }, + { + "epoch": 0.6785864978902953, + "grad_norm": 0.7387906908988953, + "learning_rate": 0.00035745734252220633, + "loss": 1.3392, + "step": 6433 + }, + { + "epoch": 0.6786919831223629, + "grad_norm": 0.8156517744064331, + "learning_rate": 0.00035724343834798566, + "loss": 1.3558, + "step": 6434 + }, + { + "epoch": 0.6787974683544303, + "grad_norm": 0.7574257254600525, + "learning_rate": 0.00035702957818409606, + "loss": 1.3339, + "step": 6435 + }, + { + "epoch": 0.6789029535864979, + "grad_norm": 0.6980578899383545, + "learning_rate": 0.0003568157620545019, + "loss": 1.3641, + "step": 6436 + }, + { + "epoch": 0.6790084388185654, + "grad_norm": 0.6657881736755371, + "learning_rate": 0.00035660198998316213, + "loss": 1.3451, + "step": 6437 + }, + { + "epoch": 0.6791139240506329, + "grad_norm": 0.7237455248832703, + "learning_rate": 0.00035638826199403103, + "loss": 1.3685, + "step": 6438 + }, + { + "epoch": 0.6792194092827004, + "grad_norm": 0.7067838907241821, + "learning_rate": 0.0003561745781110579, + "loss": 1.3575, + "step": 6439 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.6537795066833496, + "learning_rate": 0.00035596093835818683, + "loss": 1.3596, + "step": 6440 + }, + { + "epoch": 0.6794303797468354, + "grad_norm": 0.672290563583374, + "learning_rate": 0.0003557473427593578, + "loss": 1.3912, + "step": 6441 + }, + { + "epoch": 0.679535864978903, + "grad_norm": 0.6121392846107483, + "learning_rate": 0.0003555337913385048, + "loss": 1.3177, + "step": 6442 + }, + { + "epoch": 0.6796413502109705, + "grad_norm": 0.7181984186172485, + "learning_rate": 0.0003553202841195576, + "loss": 1.3724, + "step": 6443 + }, + { + "epoch": 0.6797468354430379, + "grad_norm": 0.7066918015480042, + "learning_rate": 0.00035510682112644055, + "loss": 1.3723, + "step": 6444 + }, + { + "epoch": 0.6798523206751055, + "grad_norm": 0.6313982009887695, + "learning_rate": 0.00035489340238307326, + "loss": 1.3673, + "step": 6445 + }, + { + "epoch": 0.679957805907173, + "grad_norm": 0.6561729907989502, + "learning_rate": 0.00035468002791337047, + "loss": 1.352, + "step": 6446 + }, + { + "epoch": 0.6800632911392405, + "grad_norm": 0.7245544791221619, + "learning_rate": 0.0003544666977412418, + "loss": 1.3579, + "step": 6447 + }, + { + "epoch": 0.680168776371308, + "grad_norm": 0.677981972694397, + "learning_rate": 0.000354253411890592, + "loss": 1.3409, + "step": 6448 + }, + { + "epoch": 0.6802742616033756, + "grad_norm": 0.6452703475952148, + "learning_rate": 0.00035404017038532045, + "loss": 1.3261, + "step": 6449 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.6491719484329224, + "learning_rate": 0.00035382697324932245, + "loss": 1.3698, + "step": 6450 + }, + { + "epoch": 0.6804852320675105, + "grad_norm": 0.6395772099494934, + "learning_rate": 0.0003536138205064877, + "loss": 1.3498, + "step": 6451 + }, + { + "epoch": 0.6805907172995781, + "grad_norm": 0.6881549954414368, + "learning_rate": 0.0003534007121807009, + "loss": 1.3995, + "step": 6452 + }, + { + "epoch": 0.6806962025316455, + "grad_norm": 0.6475149989128113, + "learning_rate": 0.00035318764829584185, + "loss": 1.3862, + "step": 6453 + }, + { + "epoch": 0.6808016877637131, + "grad_norm": 0.668266236782074, + "learning_rate": 0.0003529746288757856, + "loss": 1.3376, + "step": 6454 + }, + { + "epoch": 0.6809071729957806, + "grad_norm": 0.6605696678161621, + "learning_rate": 0.0003527616539444019, + "loss": 1.3615, + "step": 6455 + }, + { + "epoch": 0.6810126582278481, + "grad_norm": 0.6507703065872192, + "learning_rate": 0.0003525487235255556, + "loss": 1.3449, + "step": 6456 + }, + { + "epoch": 0.6811181434599156, + "grad_norm": 0.7475992441177368, + "learning_rate": 0.0003523358376431068, + "loss": 1.3804, + "step": 6457 + }, + { + "epoch": 0.6812236286919832, + "grad_norm": 0.62778639793396, + "learning_rate": 0.00035212299632090996, + "loss": 1.3606, + "step": 6458 + }, + { + "epoch": 0.6813291139240506, + "grad_norm": 0.7325910329818726, + "learning_rate": 0.00035191019958281575, + "loss": 1.3835, + "step": 6459 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.7692862153053284, + "learning_rate": 0.00035169744745266866, + "loss": 1.381, + "step": 6460 + }, + { + "epoch": 0.6815400843881857, + "grad_norm": 0.6444610357284546, + "learning_rate": 0.0003514847399543087, + "loss": 1.3587, + "step": 6461 + }, + { + "epoch": 0.6816455696202531, + "grad_norm": 0.7274123430252075, + "learning_rate": 0.00035127207711157084, + "loss": 1.3692, + "step": 6462 + }, + { + "epoch": 0.6817510548523207, + "grad_norm": 0.7236949801445007, + "learning_rate": 0.00035105945894828495, + "loss": 1.3523, + "step": 6463 + }, + { + "epoch": 0.6818565400843882, + "grad_norm": 0.631001889705658, + "learning_rate": 0.000350846885488276, + "loss": 1.3749, + "step": 6464 + }, + { + "epoch": 0.6819620253164557, + "grad_norm": 0.9076833128929138, + "learning_rate": 0.00035063435675536386, + "loss": 1.3643, + "step": 6465 + }, + { + "epoch": 0.6820675105485232, + "grad_norm": 0.6136942505836487, + "learning_rate": 0.00035042187277336325, + "loss": 1.3387, + "step": 6466 + }, + { + "epoch": 0.6821729957805908, + "grad_norm": 0.659299373626709, + "learning_rate": 0.00035020943356608444, + "loss": 1.3606, + "step": 6467 + }, + { + "epoch": 0.6822784810126582, + "grad_norm": 0.7326719164848328, + "learning_rate": 0.0003499970391573322, + "loss": 1.3845, + "step": 6468 + }, + { + "epoch": 0.6823839662447257, + "grad_norm": 0.7594329714775085, + "learning_rate": 0.00034978468957090635, + "loss": 1.3481, + "step": 6469 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.6863324642181396, + "learning_rate": 0.0003495723848306017, + "loss": 1.3975, + "step": 6470 + }, + { + "epoch": 0.6825949367088607, + "grad_norm": 0.8045861124992371, + "learning_rate": 0.000349360124960208, + "loss": 1.3457, + "step": 6471 + }, + { + "epoch": 0.6827004219409283, + "grad_norm": 0.7293587327003479, + "learning_rate": 0.00034914790998351005, + "loss": 1.404, + "step": 6472 + }, + { + "epoch": 0.6828059071729958, + "grad_norm": 0.7723101377487183, + "learning_rate": 0.0003489357399242876, + "loss": 1.374, + "step": 6473 + }, + { + "epoch": 0.6829113924050633, + "grad_norm": 0.7013517618179321, + "learning_rate": 0.0003487236148063154, + "loss": 1.3564, + "step": 6474 + }, + { + "epoch": 0.6830168776371308, + "grad_norm": 0.7063018083572388, + "learning_rate": 0.0003485115346533629, + "loss": 1.3778, + "step": 6475 + }, + { + "epoch": 0.6831223628691984, + "grad_norm": 0.7031915783882141, + "learning_rate": 0.00034829949948919517, + "loss": 1.3933, + "step": 6476 + }, + { + "epoch": 0.6832278481012658, + "grad_norm": 0.8689092993736267, + "learning_rate": 0.00034808750933757154, + "loss": 1.3585, + "step": 6477 + }, + { + "epoch": 0.6833333333333333, + "grad_norm": 0.743976891040802, + "learning_rate": 0.0003478755642222466, + "loss": 1.3771, + "step": 6478 + }, + { + "epoch": 0.6834388185654009, + "grad_norm": 0.6418873071670532, + "learning_rate": 0.0003476636641669699, + "loss": 1.3537, + "step": 6479 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.086315631866455, + "learning_rate": 0.0003474518091954859, + "loss": 1.3741, + "step": 6480 + }, + { + "epoch": 0.6836497890295359, + "grad_norm": 0.7048218846321106, + "learning_rate": 0.00034723999933153387, + "loss": 1.3743, + "step": 6481 + }, + { + "epoch": 0.6837552742616034, + "grad_norm": 0.7208907604217529, + "learning_rate": 0.00034702823459884836, + "loss": 1.3262, + "step": 6482 + }, + { + "epoch": 0.6838607594936709, + "grad_norm": 0.8254152536392212, + "learning_rate": 0.0003468165150211585, + "loss": 1.3302, + "step": 6483 + }, + { + "epoch": 0.6839662447257384, + "grad_norm": 0.6982827186584473, + "learning_rate": 0.0003466048406221883, + "loss": 1.394, + "step": 6484 + }, + { + "epoch": 0.6840717299578059, + "grad_norm": 0.6664254665374756, + "learning_rate": 0.0003463932114256576, + "loss": 1.3672, + "step": 6485 + }, + { + "epoch": 0.6841772151898734, + "grad_norm": 1.005035400390625, + "learning_rate": 0.00034618162745528, + "loss": 1.3726, + "step": 6486 + }, + { + "epoch": 0.684282700421941, + "grad_norm": 0.6432906985282898, + "learning_rate": 0.00034597008873476473, + "loss": 1.3718, + "step": 6487 + }, + { + "epoch": 0.6843881856540084, + "grad_norm": 0.7923038601875305, + "learning_rate": 0.0003457585952878156, + "loss": 1.3882, + "step": 6488 + }, + { + "epoch": 0.6844936708860759, + "grad_norm": 0.7412003874778748, + "learning_rate": 0.0003455471471381318, + "loss": 1.3687, + "step": 6489 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.7124162316322327, + "learning_rate": 0.0003453357443094068, + "loss": 1.3706, + "step": 6490 + }, + { + "epoch": 0.6847046413502109, + "grad_norm": 0.6887215375900269, + "learning_rate": 0.0003451243868253294, + "loss": 1.3909, + "step": 6491 + }, + { + "epoch": 0.6848101265822785, + "grad_norm": 0.6649815440177917, + "learning_rate": 0.0003449130747095835, + "loss": 1.3938, + "step": 6492 + }, + { + "epoch": 0.684915611814346, + "grad_norm": 0.668793261051178, + "learning_rate": 0.0003447018079858472, + "loss": 1.3681, + "step": 6493 + }, + { + "epoch": 0.6850210970464135, + "grad_norm": 0.6863491535186768, + "learning_rate": 0.0003444905866777946, + "loss": 1.3499, + "step": 6494 + }, + { + "epoch": 0.685126582278481, + "grad_norm": 0.6389793157577515, + "learning_rate": 0.0003442794108090938, + "loss": 1.3977, + "step": 6495 + }, + { + "epoch": 0.6852320675105485, + "grad_norm": 0.6672884821891785, + "learning_rate": 0.0003440682804034081, + "loss": 1.3697, + "step": 6496 + }, + { + "epoch": 0.685337552742616, + "grad_norm": 0.6766560673713684, + "learning_rate": 0.00034385719548439585, + "loss": 1.4007, + "step": 6497 + }, + { + "epoch": 0.6854430379746835, + "grad_norm": 0.6564518809318542, + "learning_rate": 0.00034364615607570994, + "loss": 1.3581, + "step": 6498 + }, + { + "epoch": 0.6855485232067511, + "grad_norm": 0.7235392332077026, + "learning_rate": 0.0003434351622009985, + "loss": 1.4017, + "step": 6499 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.602874755859375, + "learning_rate": 0.00034322421388390456, + "loss": 1.3605, + "step": 6500 + }, + { + "epoch": 0.6857594936708861, + "grad_norm": 0.6703754663467407, + "learning_rate": 0.00034301331114806573, + "loss": 1.3723, + "step": 6501 + }, + { + "epoch": 0.6858649789029536, + "grad_norm": 0.7148078680038452, + "learning_rate": 0.0003428024540171148, + "loss": 1.3621, + "step": 6502 + }, + { + "epoch": 0.685970464135021, + "grad_norm": 0.6501685976982117, + "learning_rate": 0.0003425916425146791, + "loss": 1.4069, + "step": 6503 + }, + { + "epoch": 0.6860759493670886, + "grad_norm": 0.6792184114456177, + "learning_rate": 0.0003423808766643817, + "loss": 1.392, + "step": 6504 + }, + { + "epoch": 0.6861814345991561, + "grad_norm": 0.7295405268669128, + "learning_rate": 0.00034217015648983957, + "loss": 1.3797, + "step": 6505 + }, + { + "epoch": 0.6862869198312236, + "grad_norm": 0.7370994687080383, + "learning_rate": 0.0003419594820146652, + "loss": 1.387, + "step": 6506 + }, + { + "epoch": 0.6863924050632911, + "grad_norm": 0.696997344493866, + "learning_rate": 0.0003417488532624653, + "loss": 1.3875, + "step": 6507 + }, + { + "epoch": 0.6864978902953587, + "grad_norm": 0.8958292603492737, + "learning_rate": 0.00034153827025684225, + "loss": 1.3533, + "step": 6508 + }, + { + "epoch": 0.6866033755274261, + "grad_norm": 0.6502413153648376, + "learning_rate": 0.0003413277330213928, + "loss": 1.3946, + "step": 6509 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.7115593552589417, + "learning_rate": 0.0003411172415797087, + "loss": 1.3732, + "step": 6510 + }, + { + "epoch": 0.6868143459915612, + "grad_norm": 0.7820438742637634, + "learning_rate": 0.00034090679595537646, + "loss": 1.3677, + "step": 6511 + }, + { + "epoch": 0.6869198312236287, + "grad_norm": 0.6857089400291443, + "learning_rate": 0.0003406963961719778, + "loss": 1.3699, + "step": 6512 + }, + { + "epoch": 0.6870253164556962, + "grad_norm": 0.8405474424362183, + "learning_rate": 0.00034048604225308854, + "loss": 1.3652, + "step": 6513 + }, + { + "epoch": 0.6871308016877637, + "grad_norm": 0.7333900332450867, + "learning_rate": 0.00034027573422228054, + "loss": 1.3772, + "step": 6514 + }, + { + "epoch": 0.6872362869198312, + "grad_norm": 0.6375696659088135, + "learning_rate": 0.00034006547210311964, + "loss": 1.3521, + "step": 6515 + }, + { + "epoch": 0.6873417721518987, + "grad_norm": 0.6841490268707275, + "learning_rate": 0.0003398552559191667, + "loss": 1.3685, + "step": 6516 + }, + { + "epoch": 0.6874472573839663, + "grad_norm": 0.80068439245224, + "learning_rate": 0.00033964508569397743, + "loss": 1.4033, + "step": 6517 + }, + { + "epoch": 0.6875527426160337, + "grad_norm": 0.6645880341529846, + "learning_rate": 0.0003394349614511026, + "loss": 1.3456, + "step": 6518 + }, + { + "epoch": 0.6876582278481013, + "grad_norm": 0.7396351099014282, + "learning_rate": 0.0003392248832140876, + "loss": 1.3157, + "step": 6519 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.7220817804336548, + "learning_rate": 0.0003390148510064727, + "loss": 1.3637, + "step": 6520 + }, + { + "epoch": 0.6878691983122363, + "grad_norm": 0.7014243006706238, + "learning_rate": 0.00033880486485179305, + "loss": 1.3687, + "step": 6521 + }, + { + "epoch": 0.6879746835443038, + "grad_norm": 0.7225997447967529, + "learning_rate": 0.0003385949247735786, + "loss": 1.3424, + "step": 6522 + }, + { + "epoch": 0.6880801687763713, + "grad_norm": 0.6632193326950073, + "learning_rate": 0.00033838503079535435, + "loss": 1.3338, + "step": 6523 + }, + { + "epoch": 0.6881856540084388, + "grad_norm": 0.749900758266449, + "learning_rate": 0.00033817518294064003, + "loss": 1.36, + "step": 6524 + }, + { + "epoch": 0.6882911392405063, + "grad_norm": 0.663262665271759, + "learning_rate": 0.00033796538123294996, + "loss": 1.3743, + "step": 6525 + }, + { + "epoch": 0.6883966244725739, + "grad_norm": 0.7005791068077087, + "learning_rate": 0.0003377556256957936, + "loss": 1.3636, + "step": 6526 + }, + { + "epoch": 0.6885021097046413, + "grad_norm": 0.7629561424255371, + "learning_rate": 0.0003375459163526749, + "loss": 1.3649, + "step": 6527 + }, + { + "epoch": 0.6886075949367089, + "grad_norm": 0.6529185771942139, + "learning_rate": 0.000337336253227093, + "loss": 1.3593, + "step": 6528 + }, + { + "epoch": 0.6887130801687764, + "grad_norm": 0.7870706915855408, + "learning_rate": 0.00033712663634254163, + "loss": 1.3619, + "step": 6529 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.6871551871299744, + "learning_rate": 0.0003369170657225094, + "loss": 1.3542, + "step": 6530 + }, + { + "epoch": 0.6889240506329114, + "grad_norm": 0.6266865134239197, + "learning_rate": 0.0003367075413904799, + "loss": 1.3176, + "step": 6531 + }, + { + "epoch": 0.689029535864979, + "grad_norm": 0.6400817632675171, + "learning_rate": 0.00033649806336993085, + "loss": 1.351, + "step": 6532 + }, + { + "epoch": 0.6891350210970464, + "grad_norm": 0.6923902034759521, + "learning_rate": 0.0003362886316843361, + "loss": 1.3453, + "step": 6533 + }, + { + "epoch": 0.6892405063291139, + "grad_norm": 0.6635845899581909, + "learning_rate": 0.000336079246357163, + "loss": 1.3611, + "step": 6534 + }, + { + "epoch": 0.6893459915611815, + "grad_norm": 0.6855123043060303, + "learning_rate": 0.00033586990741187446, + "loss": 1.3946, + "step": 6535 + }, + { + "epoch": 0.6894514767932489, + "grad_norm": 0.6778467893600464, + "learning_rate": 0.0003356606148719277, + "loss": 1.3776, + "step": 6536 + }, + { + "epoch": 0.6895569620253165, + "grad_norm": 0.6434230208396912, + "learning_rate": 0.00033545136876077524, + "loss": 1.3828, + "step": 6537 + }, + { + "epoch": 0.689662447257384, + "grad_norm": 0.6762382388114929, + "learning_rate": 0.00033524216910186394, + "loss": 1.3751, + "step": 6538 + }, + { + "epoch": 0.6897679324894515, + "grad_norm": 0.7163007259368896, + "learning_rate": 0.00033503301591863586, + "loss": 1.394, + "step": 6539 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.6432074308395386, + "learning_rate": 0.0003348239092345275, + "loss": 1.3707, + "step": 6540 + }, + { + "epoch": 0.6899789029535865, + "grad_norm": 0.6181939244270325, + "learning_rate": 0.00033461484907297036, + "loss": 1.3495, + "step": 6541 + }, + { + "epoch": 0.690084388185654, + "grad_norm": 0.6717314124107361, + "learning_rate": 0.00033440583545739046, + "loss": 1.3658, + "step": 6542 + }, + { + "epoch": 0.6901898734177215, + "grad_norm": 0.6189976930618286, + "learning_rate": 0.00033419686841120925, + "loss": 1.3363, + "step": 6543 + }, + { + "epoch": 0.6902953586497891, + "grad_norm": 0.6347763538360596, + "learning_rate": 0.00033398794795784227, + "loss": 1.3905, + "step": 6544 + }, + { + "epoch": 0.6904008438818565, + "grad_norm": 0.6466079354286194, + "learning_rate": 0.0003337790741207003, + "loss": 1.3661, + "step": 6545 + }, + { + "epoch": 0.6905063291139241, + "grad_norm": 0.7116128206253052, + "learning_rate": 0.0003335702469231884, + "loss": 1.3759, + "step": 6546 + }, + { + "epoch": 0.6906118143459916, + "grad_norm": 0.7129396200180054, + "learning_rate": 0.00033336146638870685, + "loss": 1.3958, + "step": 6547 + }, + { + "epoch": 0.690717299578059, + "grad_norm": 0.6899725198745728, + "learning_rate": 0.0003331527325406506, + "loss": 1.3693, + "step": 6548 + }, + { + "epoch": 0.6908227848101266, + "grad_norm": 0.6995415687561035, + "learning_rate": 0.0003329440454024092, + "loss": 1.3581, + "step": 6549 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.6786855459213257, + "learning_rate": 0.0003327354049973672, + "loss": 1.3296, + "step": 6550 + }, + { + "epoch": 0.6910337552742616, + "grad_norm": 0.7757246494293213, + "learning_rate": 0.00033252681134890373, + "loss": 1.3958, + "step": 6551 + }, + { + "epoch": 0.6911392405063291, + "grad_norm": 0.6287280321121216, + "learning_rate": 0.00033231826448039246, + "loss": 1.336, + "step": 6552 + }, + { + "epoch": 0.6912447257383966, + "grad_norm": 0.6524717807769775, + "learning_rate": 0.0003321097644152027, + "loss": 1.3627, + "step": 6553 + }, + { + "epoch": 0.6913502109704641, + "grad_norm": 0.7443824410438538, + "learning_rate": 0.00033190131117669753, + "loss": 1.3754, + "step": 6554 + }, + { + "epoch": 0.6914556962025317, + "grad_norm": 0.6931418776512146, + "learning_rate": 0.0003316929047882354, + "loss": 1.3458, + "step": 6555 + }, + { + "epoch": 0.6915611814345991, + "grad_norm": 0.6612353324890137, + "learning_rate": 0.0003314845452731691, + "loss": 1.3715, + "step": 6556 + }, + { + "epoch": 0.6916666666666667, + "grad_norm": 0.7152261734008789, + "learning_rate": 0.00033127623265484643, + "loss": 1.3663, + "step": 6557 + }, + { + "epoch": 0.6917721518987342, + "grad_norm": 0.8107393980026245, + "learning_rate": 0.00033106796695660983, + "loss": 1.3675, + "step": 6558 + }, + { + "epoch": 0.6918776371308016, + "grad_norm": 0.6656736135482788, + "learning_rate": 0.0003308597482017965, + "loss": 1.3427, + "step": 6559 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 1.0100549459457397, + "learning_rate": 0.00033065157641373847, + "loss": 1.3631, + "step": 6560 + }, + { + "epoch": 0.6920886075949367, + "grad_norm": 0.7365729808807373, + "learning_rate": 0.00033044345161576224, + "loss": 1.3811, + "step": 6561 + }, + { + "epoch": 0.6921940928270042, + "grad_norm": 0.6871423125267029, + "learning_rate": 0.00033023537383118916, + "loss": 1.3485, + "step": 6562 + }, + { + "epoch": 0.6922995780590717, + "grad_norm": 1.0142347812652588, + "learning_rate": 0.0003300273430833358, + "loss": 1.3155, + "step": 6563 + }, + { + "epoch": 0.6924050632911393, + "grad_norm": 0.7566784024238586, + "learning_rate": 0.00032981935939551294, + "loss": 1.3474, + "step": 6564 + }, + { + "epoch": 0.6925105485232067, + "grad_norm": 0.8299745917320251, + "learning_rate": 0.000329611422791026, + "loss": 1.3739, + "step": 6565 + }, + { + "epoch": 0.6926160337552743, + "grad_norm": 0.9218248128890991, + "learning_rate": 0.00032940353329317533, + "loss": 1.3646, + "step": 6566 + }, + { + "epoch": 0.6927215189873418, + "grad_norm": 0.6530163288116455, + "learning_rate": 0.0003291956909252561, + "loss": 1.3825, + "step": 6567 + }, + { + "epoch": 0.6928270042194092, + "grad_norm": 0.8555260300636292, + "learning_rate": 0.00032898789571055796, + "loss": 1.366, + "step": 6568 + }, + { + "epoch": 0.6929324894514768, + "grad_norm": 0.8742140531539917, + "learning_rate": 0.0003287801476723656, + "loss": 1.3771, + "step": 6569 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.6430342197418213, + "learning_rate": 0.0003285724468339576, + "loss": 1.3485, + "step": 6570 + }, + { + "epoch": 0.6931434599156118, + "grad_norm": 0.8483876585960388, + "learning_rate": 0.00032836479321860884, + "loss": 1.3646, + "step": 6571 + }, + { + "epoch": 0.6932489451476793, + "grad_norm": 0.8544785976409912, + "learning_rate": 0.00032815718684958727, + "loss": 1.3641, + "step": 6572 + }, + { + "epoch": 0.6933544303797469, + "grad_norm": 0.6983761191368103, + "learning_rate": 0.00032794962775015656, + "loss": 1.3439, + "step": 6573 + }, + { + "epoch": 0.6934599156118143, + "grad_norm": 0.9185715913772583, + "learning_rate": 0.0003277421159435745, + "loss": 1.3759, + "step": 6574 + }, + { + "epoch": 0.6935654008438819, + "grad_norm": 0.8234259486198425, + "learning_rate": 0.000327534651453094, + "loss": 1.3655, + "step": 6575 + }, + { + "epoch": 0.6936708860759494, + "grad_norm": 0.7203028798103333, + "learning_rate": 0.00032732723430196236, + "loss": 1.3127, + "step": 6576 + }, + { + "epoch": 0.6937763713080168, + "grad_norm": 1.0892773866653442, + "learning_rate": 0.0003271198645134218, + "loss": 1.3497, + "step": 6577 + }, + { + "epoch": 0.6938818565400844, + "grad_norm": 0.6550107002258301, + "learning_rate": 0.0003269125421107091, + "loss": 1.3563, + "step": 6578 + }, + { + "epoch": 0.6939873417721519, + "grad_norm": 0.6595633029937744, + "learning_rate": 0.00032670526711705536, + "loss": 1.3477, + "step": 6579 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.8272036910057068, + "learning_rate": 0.00032649803955568755, + "loss": 1.3959, + "step": 6580 + }, + { + "epoch": 0.6941983122362869, + "grad_norm": 0.7891648411750793, + "learning_rate": 0.0003262908594498262, + "loss": 1.3401, + "step": 6581 + }, + { + "epoch": 0.6943037974683545, + "grad_norm": 0.7020430564880371, + "learning_rate": 0.0003260837268226868, + "loss": 1.3555, + "step": 6582 + }, + { + "epoch": 0.6944092827004219, + "grad_norm": 0.8590823411941528, + "learning_rate": 0.0003258766416974796, + "loss": 1.3661, + "step": 6583 + }, + { + "epoch": 0.6945147679324895, + "grad_norm": 0.7027124166488647, + "learning_rate": 0.0003256696040974097, + "loss": 1.3576, + "step": 6584 + }, + { + "epoch": 0.694620253164557, + "grad_norm": 0.7420308589935303, + "learning_rate": 0.00032546261404567644, + "loss": 1.3703, + "step": 6585 + }, + { + "epoch": 0.6947257383966244, + "grad_norm": 0.6613793969154358, + "learning_rate": 0.0003252556715654743, + "loss": 1.3393, + "step": 6586 + }, + { + "epoch": 0.694831223628692, + "grad_norm": 0.6898732781410217, + "learning_rate": 0.00032504877667999206, + "loss": 1.359, + "step": 6587 + }, + { + "epoch": 0.6949367088607595, + "grad_norm": 0.6181709170341492, + "learning_rate": 0.00032484192941241316, + "loss": 1.3626, + "step": 6588 + }, + { + "epoch": 0.695042194092827, + "grad_norm": 0.6995390057563782, + "learning_rate": 0.0003246351297859164, + "loss": 1.3637, + "step": 6589 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.7285886406898499, + "learning_rate": 0.00032442837782367434, + "loss": 1.4182, + "step": 6590 + }, + { + "epoch": 0.6952531645569621, + "grad_norm": 0.7185877561569214, + "learning_rate": 0.00032422167354885463, + "loss": 1.3594, + "step": 6591 + }, + { + "epoch": 0.6953586497890295, + "grad_norm": 0.7455267310142517, + "learning_rate": 0.0003240150169846196, + "loss": 1.3711, + "step": 6592 + }, + { + "epoch": 0.695464135021097, + "grad_norm": 0.6905942559242249, + "learning_rate": 0.00032380840815412603, + "loss": 1.3776, + "step": 6593 + }, + { + "epoch": 0.6955696202531646, + "grad_norm": 0.6364731788635254, + "learning_rate": 0.00032360184708052554, + "loss": 1.3481, + "step": 6594 + }, + { + "epoch": 0.695675105485232, + "grad_norm": 0.6709735989570618, + "learning_rate": 0.00032339533378696424, + "loss": 1.3349, + "step": 6595 + }, + { + "epoch": 0.6957805907172996, + "grad_norm": 0.6599434018135071, + "learning_rate": 0.00032318886829658277, + "loss": 1.3904, + "step": 6596 + }, + { + "epoch": 0.6958860759493671, + "grad_norm": 0.6361075043678284, + "learning_rate": 0.0003229824506325172, + "loss": 1.3731, + "step": 6597 + }, + { + "epoch": 0.6959915611814346, + "grad_norm": 0.7082082033157349, + "learning_rate": 0.0003227760808178973, + "loss": 1.3697, + "step": 6598 + }, + { + "epoch": 0.6960970464135021, + "grad_norm": 0.6512983441352844, + "learning_rate": 0.00032256975887584783, + "loss": 1.3672, + "step": 6599 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.6676541566848755, + "learning_rate": 0.0003223634848294883, + "loss": 1.3824, + "step": 6600 + }, + { + "epoch": 0.6963080168776371, + "grad_norm": 0.7345523834228516, + "learning_rate": 0.0003221572587019327, + "loss": 1.3398, + "step": 6601 + }, + { + "epoch": 0.6964135021097047, + "grad_norm": 0.6386638283729553, + "learning_rate": 0.0003219510805162896, + "loss": 1.357, + "step": 6602 + }, + { + "epoch": 0.6965189873417722, + "grad_norm": 0.8818236589431763, + "learning_rate": 0.0003217449502956624, + "loss": 1.3739, + "step": 6603 + }, + { + "epoch": 0.6966244725738396, + "grad_norm": 0.7535274028778076, + "learning_rate": 0.0003215388680631491, + "loss": 1.3722, + "step": 6604 + }, + { + "epoch": 0.6967299578059072, + "grad_norm": 0.699188232421875, + "learning_rate": 0.00032133283384184173, + "loss": 1.3632, + "step": 6605 + }, + { + "epoch": 0.6968354430379747, + "grad_norm": 0.9167850017547607, + "learning_rate": 0.00032112684765482814, + "loss": 1.3199, + "step": 6606 + }, + { + "epoch": 0.6969409282700422, + "grad_norm": 0.6661888360977173, + "learning_rate": 0.00032092090952518996, + "loss": 1.3537, + "step": 6607 + }, + { + "epoch": 0.6970464135021097, + "grad_norm": 0.6647951602935791, + "learning_rate": 0.00032071501947600334, + "loss": 1.3086, + "step": 6608 + }, + { + "epoch": 0.6971518987341773, + "grad_norm": 1.0980054140090942, + "learning_rate": 0.00032050917753033935, + "loss": 1.3469, + "step": 6609 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.7668020129203796, + "learning_rate": 0.00032030338371126374, + "loss": 1.4054, + "step": 6610 + }, + { + "epoch": 0.6973628691983123, + "grad_norm": 0.69990473985672, + "learning_rate": 0.0003200976380418366, + "loss": 1.3307, + "step": 6611 + }, + { + "epoch": 0.6974683544303798, + "grad_norm": 0.8522589802742004, + "learning_rate": 0.00031989194054511276, + "loss": 1.374, + "step": 6612 + }, + { + "epoch": 0.6975738396624472, + "grad_norm": 0.7320780158042908, + "learning_rate": 0.0003196862912441418, + "loss": 1.4098, + "step": 6613 + }, + { + "epoch": 0.6976793248945148, + "grad_norm": 0.7786458134651184, + "learning_rate": 0.0003194806901619673, + "loss": 1.3642, + "step": 6614 + }, + { + "epoch": 0.6977848101265823, + "grad_norm": 0.8379298448562622, + "learning_rate": 0.00031927513732162856, + "loss": 1.3477, + "step": 6615 + }, + { + "epoch": 0.6978902953586498, + "grad_norm": 0.7051492929458618, + "learning_rate": 0.00031906963274615837, + "loss": 1.3826, + "step": 6616 + }, + { + "epoch": 0.6979957805907173, + "grad_norm": 0.7739113569259644, + "learning_rate": 0.00031886417645858475, + "loss": 1.3931, + "step": 6617 + }, + { + "epoch": 0.6981012658227848, + "grad_norm": 0.7427653074264526, + "learning_rate": 0.00031865876848192993, + "loss": 1.3434, + "step": 6618 + }, + { + "epoch": 0.6982067510548523, + "grad_norm": 0.6691606044769287, + "learning_rate": 0.000318453408839211, + "loss": 1.3562, + "step": 6619 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.736502468585968, + "learning_rate": 0.0003182480975534395, + "loss": 1.3611, + "step": 6620 + }, + { + "epoch": 0.6984177215189873, + "grad_norm": 0.6741708517074585, + "learning_rate": 0.0003180428346476215, + "loss": 1.3577, + "step": 6621 + }, + { + "epoch": 0.6985232067510548, + "grad_norm": 0.7737998366355896, + "learning_rate": 0.0003178376201447576, + "loss": 1.3935, + "step": 6622 + }, + { + "epoch": 0.6986286919831224, + "grad_norm": 0.7966204285621643, + "learning_rate": 0.00031763245406784364, + "loss": 1.341, + "step": 6623 + }, + { + "epoch": 0.6987341772151898, + "grad_norm": 0.7111354470252991, + "learning_rate": 0.0003174273364398691, + "loss": 1.3494, + "step": 6624 + }, + { + "epoch": 0.6988396624472574, + "grad_norm": 0.8533746600151062, + "learning_rate": 0.00031722226728381854, + "loss": 1.3737, + "step": 6625 + }, + { + "epoch": 0.6989451476793249, + "grad_norm": 0.628574788570404, + "learning_rate": 0.00031701724662267097, + "loss": 1.3259, + "step": 6626 + }, + { + "epoch": 0.6990506329113924, + "grad_norm": 0.681244432926178, + "learning_rate": 0.00031681227447939996, + "loss": 1.3565, + "step": 6627 + }, + { + "epoch": 0.6991561181434599, + "grad_norm": 0.7738984227180481, + "learning_rate": 0.00031660735087697363, + "loss": 1.37, + "step": 6628 + }, + { + "epoch": 0.6992616033755275, + "grad_norm": 0.7155830264091492, + "learning_rate": 0.0003164024758383548, + "loss": 1.3466, + "step": 6629 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.6477936506271362, + "learning_rate": 0.00031619764938650057, + "loss": 1.3583, + "step": 6630 + }, + { + "epoch": 0.6994725738396624, + "grad_norm": 0.9624441862106323, + "learning_rate": 0.00031599287154436263, + "loss": 1.3677, + "step": 6631 + }, + { + "epoch": 0.69957805907173, + "grad_norm": 0.7629706859588623, + "learning_rate": 0.0003157881423348879, + "loss": 1.3649, + "step": 6632 + }, + { + "epoch": 0.6996835443037974, + "grad_norm": 0.8344842791557312, + "learning_rate": 0.00031558346178101694, + "loss": 1.3583, + "step": 6633 + }, + { + "epoch": 0.699789029535865, + "grad_norm": 0.7136138677597046, + "learning_rate": 0.00031537882990568535, + "loss": 1.385, + "step": 6634 + }, + { + "epoch": 0.6998945147679325, + "grad_norm": 0.6610859036445618, + "learning_rate": 0.000315174246731823, + "loss": 1.3583, + "step": 6635 + }, + { + "epoch": 0.7, + "grad_norm": 0.6980324983596802, + "learning_rate": 0.00031496971228235464, + "loss": 1.3254, + "step": 6636 + }, + { + "epoch": 0.7001054852320675, + "grad_norm": 0.6703041195869446, + "learning_rate": 0.00031476522658019916, + "loss": 1.3973, + "step": 6637 + }, + { + "epoch": 0.700210970464135, + "grad_norm": 0.6861977577209473, + "learning_rate": 0.0003145607896482704, + "loss": 1.3668, + "step": 6638 + }, + { + "epoch": 0.7003164556962025, + "grad_norm": 0.7672819495201111, + "learning_rate": 0.00031435640150947645, + "loss": 1.3538, + "step": 6639 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.8119977712631226, + "learning_rate": 0.0003141520621867197, + "loss": 1.3453, + "step": 6640 + }, + { + "epoch": 0.7005274261603376, + "grad_norm": 0.6498298645019531, + "learning_rate": 0.00031394777170289806, + "loss": 1.3622, + "step": 6641 + }, + { + "epoch": 0.700632911392405, + "grad_norm": 0.8108528256416321, + "learning_rate": 0.00031374353008090285, + "loss": 1.3519, + "step": 6642 + }, + { + "epoch": 0.7007383966244726, + "grad_norm": 0.7371866703033447, + "learning_rate": 0.0003135393373436206, + "loss": 1.3766, + "step": 6643 + }, + { + "epoch": 0.7008438818565401, + "grad_norm": 0.667154848575592, + "learning_rate": 0.0003133351935139319, + "loss": 1.3216, + "step": 6644 + }, + { + "epoch": 0.7009493670886076, + "grad_norm": 0.7992228269577026, + "learning_rate": 0.00031313109861471223, + "loss": 1.3248, + "step": 6645 + }, + { + "epoch": 0.7010548523206751, + "grad_norm": 0.7186533808708191, + "learning_rate": 0.0003129270526688313, + "loss": 1.3379, + "step": 6646 + }, + { + "epoch": 0.7011603375527427, + "grad_norm": 0.6395300030708313, + "learning_rate": 0.0003127230556991536, + "loss": 1.3745, + "step": 6647 + }, + { + "epoch": 0.7012658227848101, + "grad_norm": 0.8284322619438171, + "learning_rate": 0.000312519107728538, + "loss": 1.3934, + "step": 6648 + }, + { + "epoch": 0.7013713080168776, + "grad_norm": 0.6959242224693298, + "learning_rate": 0.0003123152087798376, + "loss": 1.3078, + "step": 6649 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.6633476614952087, + "learning_rate": 0.00031211135887590074, + "loss": 1.3804, + "step": 6650 + }, + { + "epoch": 0.7015822784810126, + "grad_norm": 0.7356727123260498, + "learning_rate": 0.0003119075580395697, + "loss": 1.3458, + "step": 6651 + }, + { + "epoch": 0.7016877637130802, + "grad_norm": 0.8252260684967041, + "learning_rate": 0.0003117038062936813, + "loss": 1.384, + "step": 6652 + }, + { + "epoch": 0.7017932489451477, + "grad_norm": 0.6604204773902893, + "learning_rate": 0.0003115001036610669, + "loss": 1.3378, + "step": 6653 + }, + { + "epoch": 0.7018987341772152, + "grad_norm": 0.7535848021507263, + "learning_rate": 0.0003112964501645525, + "loss": 1.3663, + "step": 6654 + }, + { + "epoch": 0.7020042194092827, + "grad_norm": 0.7983235716819763, + "learning_rate": 0.0003110928458269584, + "loss": 1.3575, + "step": 6655 + }, + { + "epoch": 0.7021097046413503, + "grad_norm": 0.6932598948478699, + "learning_rate": 0.00031088929067109945, + "loss": 1.3424, + "step": 6656 + }, + { + "epoch": 0.7022151898734177, + "grad_norm": 0.6789823174476624, + "learning_rate": 0.0003106857847197849, + "loss": 1.3494, + "step": 6657 + }, + { + "epoch": 0.7023206751054852, + "grad_norm": 0.7125970125198364, + "learning_rate": 0.0003104823279958191, + "loss": 1.3807, + "step": 6658 + }, + { + "epoch": 0.7024261603375528, + "grad_norm": 0.6470047831535339, + "learning_rate": 0.00031027892052200003, + "loss": 1.3521, + "step": 6659 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.6412912011146545, + "learning_rate": 0.0003100755623211205, + "loss": 1.3288, + "step": 6660 + }, + { + "epoch": 0.7026371308016878, + "grad_norm": 0.6468191146850586, + "learning_rate": 0.000309872253415968, + "loss": 1.3445, + "step": 6661 + }, + { + "epoch": 0.7027426160337553, + "grad_norm": 0.6552268266677856, + "learning_rate": 0.00030966899382932404, + "loss": 1.3668, + "step": 6662 + }, + { + "epoch": 0.7028481012658228, + "grad_norm": 0.6729779839515686, + "learning_rate": 0.0003094657835839651, + "loss": 1.3619, + "step": 6663 + }, + { + "epoch": 0.7029535864978903, + "grad_norm": 0.6130385994911194, + "learning_rate": 0.00030926262270266177, + "loss": 1.3366, + "step": 6664 + }, + { + "epoch": 0.7030590717299579, + "grad_norm": 0.6463119387626648, + "learning_rate": 0.00030905951120817934, + "loss": 1.3526, + "step": 6665 + }, + { + "epoch": 0.7031645569620253, + "grad_norm": 0.6294759511947632, + "learning_rate": 0.00030885644912327713, + "loss": 1.3656, + "step": 6666 + }, + { + "epoch": 0.7032700421940928, + "grad_norm": 0.6737573146820068, + "learning_rate": 0.0003086534364707097, + "loss": 1.359, + "step": 6667 + }, + { + "epoch": 0.7033755274261604, + "grad_norm": 0.6301988363265991, + "learning_rate": 0.00030845047327322556, + "loss": 1.3533, + "step": 6668 + }, + { + "epoch": 0.7034810126582278, + "grad_norm": 0.6453431248664856, + "learning_rate": 0.0003082475595535677, + "loss": 1.3857, + "step": 6669 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.636196494102478, + "learning_rate": 0.0003080446953344735, + "loss": 1.3733, + "step": 6670 + }, + { + "epoch": 0.7036919831223629, + "grad_norm": 0.662396252155304, + "learning_rate": 0.000307841880638675, + "loss": 1.3876, + "step": 6671 + }, + { + "epoch": 0.7037974683544304, + "grad_norm": 0.6230870485305786, + "learning_rate": 0.0003076391154888985, + "loss": 1.3573, + "step": 6672 + }, + { + "epoch": 0.7039029535864979, + "grad_norm": 0.6791706681251526, + "learning_rate": 0.000307436399907865, + "loss": 1.3778, + "step": 6673 + }, + { + "epoch": 0.7040084388185655, + "grad_norm": 0.6189324259757996, + "learning_rate": 0.00030723373391828966, + "loss": 1.3948, + "step": 6674 + }, + { + "epoch": 0.7041139240506329, + "grad_norm": 0.6304290294647217, + "learning_rate": 0.00030703111754288204, + "loss": 1.3334, + "step": 6675 + }, + { + "epoch": 0.7042194092827004, + "grad_norm": 0.6396507024765015, + "learning_rate": 0.0003068285508043467, + "loss": 1.3684, + "step": 6676 + }, + { + "epoch": 0.704324894514768, + "grad_norm": 0.6966017484664917, + "learning_rate": 0.00030662603372538224, + "loss": 1.3644, + "step": 6677 + }, + { + "epoch": 0.7044303797468354, + "grad_norm": 0.6306371688842773, + "learning_rate": 0.0003064235663286815, + "loss": 1.3696, + "step": 6678 + }, + { + "epoch": 0.704535864978903, + "grad_norm": 0.6880505084991455, + "learning_rate": 0.00030622114863693205, + "loss": 1.3144, + "step": 6679 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.6581810712814331, + "learning_rate": 0.00030601878067281575, + "loss": 1.3407, + "step": 6680 + }, + { + "epoch": 0.704746835443038, + "grad_norm": 0.6619985103607178, + "learning_rate": 0.00030581646245900895, + "loss": 1.3535, + "step": 6681 + }, + { + "epoch": 0.7048523206751055, + "grad_norm": 0.6448777914047241, + "learning_rate": 0.0003056141940181825, + "loss": 1.3597, + "step": 6682 + }, + { + "epoch": 0.7049578059071729, + "grad_norm": 0.6931617856025696, + "learning_rate": 0.0003054119753730012, + "loss": 1.3327, + "step": 6683 + }, + { + "epoch": 0.7050632911392405, + "grad_norm": 0.6570295095443726, + "learning_rate": 0.00030520980654612527, + "loss": 1.3183, + "step": 6684 + }, + { + "epoch": 0.705168776371308, + "grad_norm": 0.6585750579833984, + "learning_rate": 0.0003050076875602084, + "loss": 1.3656, + "step": 6685 + }, + { + "epoch": 0.7052742616033755, + "grad_norm": 0.6408479809761047, + "learning_rate": 0.0003048056184378991, + "loss": 1.363, + "step": 6686 + }, + { + "epoch": 0.705379746835443, + "grad_norm": 0.6484508514404297, + "learning_rate": 0.0003046035992018402, + "loss": 1.3559, + "step": 6687 + }, + { + "epoch": 0.7054852320675106, + "grad_norm": 0.7452313899993896, + "learning_rate": 0.00030440162987466896, + "loss": 1.3327, + "step": 6688 + }, + { + "epoch": 0.705590717299578, + "grad_norm": 0.6210340261459351, + "learning_rate": 0.00030419971047901704, + "loss": 1.3874, + "step": 6689 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.6846380829811096, + "learning_rate": 0.00030399784103751044, + "loss": 1.3507, + "step": 6690 + }, + { + "epoch": 0.7058016877637131, + "grad_norm": 0.6167494654655457, + "learning_rate": 0.0003037960215727699, + "loss": 1.3544, + "step": 6691 + }, + { + "epoch": 0.7059071729957805, + "grad_norm": 0.6531645059585571, + "learning_rate": 0.0003035942521074097, + "loss": 1.3628, + "step": 6692 + }, + { + "epoch": 0.7060126582278481, + "grad_norm": 0.7111881971359253, + "learning_rate": 0.0003033925326640398, + "loss": 1.3699, + "step": 6693 + }, + { + "epoch": 0.7061181434599156, + "grad_norm": 0.6346677541732788, + "learning_rate": 0.00030319086326526364, + "loss": 1.3643, + "step": 6694 + }, + { + "epoch": 0.7062236286919831, + "grad_norm": 0.6759331226348877, + "learning_rate": 0.00030298924393367923, + "loss": 1.348, + "step": 6695 + }, + { + "epoch": 0.7063291139240506, + "grad_norm": 0.6387539505958557, + "learning_rate": 0.0003027876746918791, + "loss": 1.3623, + "step": 6696 + }, + { + "epoch": 0.7064345991561182, + "grad_norm": 0.666917622089386, + "learning_rate": 0.00030258615556244995, + "loss": 1.3361, + "step": 6697 + }, + { + "epoch": 0.7065400843881856, + "grad_norm": 0.6834567189216614, + "learning_rate": 0.0003023846865679731, + "loss": 1.3802, + "step": 6698 + }, + { + "epoch": 0.7066455696202532, + "grad_norm": 0.6539472341537476, + "learning_rate": 0.00030218326773102407, + "loss": 1.3199, + "step": 6699 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.6424941420555115, + "learning_rate": 0.000301981899074173, + "loss": 1.3541, + "step": 6700 + }, + { + "epoch": 0.7068565400843881, + "grad_norm": 0.635585367679596, + "learning_rate": 0.00030178058061998387, + "loss": 1.3825, + "step": 6701 + }, + { + "epoch": 0.7069620253164557, + "grad_norm": 0.6580876111984253, + "learning_rate": 0.00030157931239101595, + "loss": 1.3711, + "step": 6702 + }, + { + "epoch": 0.7070675105485232, + "grad_norm": 0.6351555585861206, + "learning_rate": 0.00030137809440982207, + "loss": 1.3451, + "step": 6703 + }, + { + "epoch": 0.7071729957805907, + "grad_norm": 0.6407109498977661, + "learning_rate": 0.0003011769266989498, + "loss": 1.3393, + "step": 6704 + }, + { + "epoch": 0.7072784810126582, + "grad_norm": 0.6914539337158203, + "learning_rate": 0.0003009758092809409, + "loss": 1.3781, + "step": 6705 + }, + { + "epoch": 0.7073839662447258, + "grad_norm": 0.6901195049285889, + "learning_rate": 0.00030077474217833167, + "loss": 1.3448, + "step": 6706 + }, + { + "epoch": 0.7074894514767932, + "grad_norm": 0.6390812397003174, + "learning_rate": 0.0003005737254136525, + "loss": 1.3689, + "step": 6707 + }, + { + "epoch": 0.7075949367088608, + "grad_norm": 0.7289256453514099, + "learning_rate": 0.0003003727590094285, + "loss": 1.3368, + "step": 6708 + }, + { + "epoch": 0.7077004219409283, + "grad_norm": 0.617263913154602, + "learning_rate": 0.00030017184298817873, + "loss": 1.3618, + "step": 6709 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.666428804397583, + "learning_rate": 0.0002999709773724171, + "loss": 1.337, + "step": 6710 + }, + { + "epoch": 0.7079113924050633, + "grad_norm": 0.6744697093963623, + "learning_rate": 0.00029977016218465154, + "loss": 1.3366, + "step": 6711 + }, + { + "epoch": 0.7080168776371308, + "grad_norm": 0.6522385478019714, + "learning_rate": 0.0002995693974473844, + "loss": 1.3511, + "step": 6712 + }, + { + "epoch": 0.7081223628691983, + "grad_norm": 0.6354044079780579, + "learning_rate": 0.00029936868318311235, + "loss": 1.3512, + "step": 6713 + }, + { + "epoch": 0.7082278481012658, + "grad_norm": 0.7391380071640015, + "learning_rate": 0.00029916801941432637, + "loss": 1.3572, + "step": 6714 + }, + { + "epoch": 0.7083333333333334, + "grad_norm": 0.6682695746421814, + "learning_rate": 0.00029896740616351187, + "loss": 1.3009, + "step": 6715 + }, + { + "epoch": 0.7084388185654008, + "grad_norm": 0.8055132627487183, + "learning_rate": 0.00029876684345314853, + "loss": 1.3228, + "step": 6716 + }, + { + "epoch": 0.7085443037974684, + "grad_norm": 0.6943092942237854, + "learning_rate": 0.00029856633130571046, + "loss": 1.3895, + "step": 6717 + }, + { + "epoch": 0.7086497890295359, + "grad_norm": 0.8157179355621338, + "learning_rate": 0.00029836586974366574, + "loss": 1.3309, + "step": 6718 + }, + { + "epoch": 0.7087552742616033, + "grad_norm": 0.6398443579673767, + "learning_rate": 0.00029816545878947763, + "loss": 1.3587, + "step": 6719 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.6811503767967224, + "learning_rate": 0.00029796509846560294, + "loss": 1.3778, + "step": 6720 + }, + { + "epoch": 0.7089662447257384, + "grad_norm": 0.7275667786598206, + "learning_rate": 0.00029776478879449305, + "loss": 1.3495, + "step": 6721 + }, + { + "epoch": 0.7090717299578059, + "grad_norm": 0.6382771134376526, + "learning_rate": 0.0002975645297985935, + "loss": 1.3717, + "step": 6722 + }, + { + "epoch": 0.7091772151898734, + "grad_norm": 0.6265467405319214, + "learning_rate": 0.0002973643215003445, + "loss": 1.3523, + "step": 6723 + }, + { + "epoch": 0.709282700421941, + "grad_norm": 0.7521436810493469, + "learning_rate": 0.0002971641639221804, + "loss": 1.3579, + "step": 6724 + }, + { + "epoch": 0.7093881856540084, + "grad_norm": 0.6512177586555481, + "learning_rate": 0.00029696405708652966, + "loss": 1.3461, + "step": 6725 + }, + { + "epoch": 0.709493670886076, + "grad_norm": 0.6695861220359802, + "learning_rate": 0.00029676400101581545, + "loss": 1.3597, + "step": 6726 + }, + { + "epoch": 0.7095991561181435, + "grad_norm": 0.6228731870651245, + "learning_rate": 0.0002965639957324546, + "loss": 1.3525, + "step": 6727 + }, + { + "epoch": 0.7097046413502109, + "grad_norm": 0.6284753680229187, + "learning_rate": 0.00029636404125885936, + "loss": 1.3651, + "step": 6728 + }, + { + "epoch": 0.7098101265822785, + "grad_norm": 0.6469168663024902, + "learning_rate": 0.00029616413761743537, + "loss": 1.3488, + "step": 6729 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.6850295066833496, + "learning_rate": 0.0002959642848305828, + "loss": 1.3472, + "step": 6730 + }, + { + "epoch": 0.7100210970464135, + "grad_norm": 0.6939191818237305, + "learning_rate": 0.0002957644829206961, + "loss": 1.3712, + "step": 6731 + }, + { + "epoch": 0.710126582278481, + "grad_norm": 0.6453575491905212, + "learning_rate": 0.0002955647319101641, + "loss": 1.353, + "step": 6732 + }, + { + "epoch": 0.7102320675105486, + "grad_norm": 0.722602128982544, + "learning_rate": 0.00029536503182137, + "loss": 1.3616, + "step": 6733 + }, + { + "epoch": 0.710337552742616, + "grad_norm": 0.6518615484237671, + "learning_rate": 0.00029516538267669096, + "loss": 1.3354, + "step": 6734 + }, + { + "epoch": 0.7104430379746836, + "grad_norm": 0.6360836625099182, + "learning_rate": 0.00029496578449849867, + "loss": 1.3611, + "step": 6735 + }, + { + "epoch": 0.7105485232067511, + "grad_norm": 0.6652728319168091, + "learning_rate": 0.00029476623730915943, + "loss": 1.3668, + "step": 6736 + }, + { + "epoch": 0.7106540084388185, + "grad_norm": 0.688271164894104, + "learning_rate": 0.00029456674113103335, + "loss": 1.3352, + "step": 6737 + }, + { + "epoch": 0.7107594936708861, + "grad_norm": 0.6374965906143188, + "learning_rate": 0.00029436729598647483, + "loss": 1.3577, + "step": 6738 + }, + { + "epoch": 0.7108649789029536, + "grad_norm": 0.6860894560813904, + "learning_rate": 0.00029416790189783286, + "loss": 1.3917, + "step": 6739 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.7152873873710632, + "learning_rate": 0.00029396855888745045, + "loss": 1.3463, + "step": 6740 + }, + { + "epoch": 0.7110759493670886, + "grad_norm": 0.6862225532531738, + "learning_rate": 0.00029376926697766495, + "loss": 1.3562, + "step": 6741 + }, + { + "epoch": 0.7111814345991562, + "grad_norm": 0.6254626512527466, + "learning_rate": 0.00029357002619080814, + "loss": 1.3615, + "step": 6742 + }, + { + "epoch": 0.7112869198312236, + "grad_norm": 0.6284996867179871, + "learning_rate": 0.0002933708365492058, + "loss": 1.3453, + "step": 6743 + }, + { + "epoch": 0.7113924050632912, + "grad_norm": 0.6214726567268372, + "learning_rate": 0.00029317169807517785, + "loss": 1.3278, + "step": 6744 + }, + { + "epoch": 0.7114978902953587, + "grad_norm": 0.6538009643554688, + "learning_rate": 0.00029297261079103945, + "loss": 1.3847, + "step": 6745 + }, + { + "epoch": 0.7116033755274261, + "grad_norm": 0.6267781853675842, + "learning_rate": 0.000292773574719099, + "loss": 1.378, + "step": 6746 + }, + { + "epoch": 0.7117088607594937, + "grad_norm": 0.653826892375946, + "learning_rate": 0.0002925745898816594, + "loss": 1.3605, + "step": 6747 + }, + { + "epoch": 0.7118143459915611, + "grad_norm": 0.7101896405220032, + "learning_rate": 0.0002923756563010179, + "loss": 1.3545, + "step": 6748 + }, + { + "epoch": 0.7119198312236287, + "grad_norm": 0.6728231906890869, + "learning_rate": 0.000292176773999466, + "loss": 1.3939, + "step": 6749 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.6509131789207458, + "learning_rate": 0.0002919779429992895, + "loss": 1.3485, + "step": 6750 + }, + { + "epoch": 0.7121308016877637, + "grad_norm": 0.6838768124580383, + "learning_rate": 0.0002917791633227685, + "loss": 1.3323, + "step": 6751 + }, + { + "epoch": 0.7122362869198312, + "grad_norm": 0.6512710452079773, + "learning_rate": 0.000291580434992177, + "loss": 1.3391, + "step": 6752 + }, + { + "epoch": 0.7123417721518988, + "grad_norm": 0.7476974129676819, + "learning_rate": 0.00029138175802978343, + "loss": 1.3422, + "step": 6753 + }, + { + "epoch": 0.7124472573839662, + "grad_norm": 0.7033519744873047, + "learning_rate": 0.00029118313245785104, + "loss": 1.3994, + "step": 6754 + }, + { + "epoch": 0.7125527426160337, + "grad_norm": 0.7206516861915588, + "learning_rate": 0.00029098455829863653, + "loss": 1.3571, + "step": 6755 + }, + { + "epoch": 0.7126582278481013, + "grad_norm": 0.8204362988471985, + "learning_rate": 0.0002907860355743911, + "loss": 1.322, + "step": 6756 + }, + { + "epoch": 0.7127637130801687, + "grad_norm": 0.6759037971496582, + "learning_rate": 0.00029058756430736025, + "loss": 1.37, + "step": 6757 + }, + { + "epoch": 0.7128691983122363, + "grad_norm": 0.8928658366203308, + "learning_rate": 0.0002903891445197836, + "loss": 1.3824, + "step": 6758 + }, + { + "epoch": 0.7129746835443038, + "grad_norm": 0.6460596919059753, + "learning_rate": 0.0002901907762338952, + "loss": 1.3683, + "step": 6759 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.7578840255737305, + "learning_rate": 0.0002899924594719231, + "loss": 1.3458, + "step": 6760 + }, + { + "epoch": 0.7131856540084388, + "grad_norm": 0.7982420325279236, + "learning_rate": 0.0002897941942560894, + "loss": 1.368, + "step": 6761 + }, + { + "epoch": 0.7132911392405064, + "grad_norm": 0.6385823488235474, + "learning_rate": 0.0002895959806086114, + "loss": 1.3376, + "step": 6762 + }, + { + "epoch": 0.7133966244725738, + "grad_norm": 0.7562954425811768, + "learning_rate": 0.0002893978185516995, + "loss": 1.3708, + "step": 6763 + }, + { + "epoch": 0.7135021097046413, + "grad_norm": 0.8512316942214966, + "learning_rate": 0.00028919970810755883, + "loss": 1.3598, + "step": 6764 + }, + { + "epoch": 0.7136075949367089, + "grad_norm": 0.6344859004020691, + "learning_rate": 0.0002890016492983886, + "loss": 1.3681, + "step": 6765 + }, + { + "epoch": 0.7137130801687763, + "grad_norm": 0.9811937808990479, + "learning_rate": 0.0002888036421463823, + "loss": 1.3174, + "step": 6766 + }, + { + "epoch": 0.7138185654008439, + "grad_norm": 0.7108927965164185, + "learning_rate": 0.0002886056866737277, + "loss": 1.3486, + "step": 6767 + }, + { + "epoch": 0.7139240506329114, + "grad_norm": 0.6273996829986572, + "learning_rate": 0.0002884077829026066, + "loss": 1.3579, + "step": 6768 + }, + { + "epoch": 0.7140295358649789, + "grad_norm": 0.8696315884590149, + "learning_rate": 0.0002882099308551951, + "loss": 1.3786, + "step": 6769 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.639689028263092, + "learning_rate": 0.00028801213055366335, + "loss": 1.3593, + "step": 6770 + }, + { + "epoch": 0.714240506329114, + "grad_norm": 0.6853806376457214, + "learning_rate": 0.00028781438202017613, + "loss": 1.4165, + "step": 6771 + }, + { + "epoch": 0.7143459915611814, + "grad_norm": 0.7221973538398743, + "learning_rate": 0.0002876166852768923, + "loss": 1.3535, + "step": 6772 + }, + { + "epoch": 0.7144514767932489, + "grad_norm": 0.6484153270721436, + "learning_rate": 0.0002874190403459644, + "loss": 1.374, + "step": 6773 + }, + { + "epoch": 0.7145569620253165, + "grad_norm": 0.6336521506309509, + "learning_rate": 0.0002872214472495397, + "loss": 1.3153, + "step": 6774 + }, + { + "epoch": 0.7146624472573839, + "grad_norm": 0.6681212782859802, + "learning_rate": 0.00028702390600975937, + "loss": 1.3388, + "step": 6775 + }, + { + "epoch": 0.7147679324894515, + "grad_norm": 0.7144957184791565, + "learning_rate": 0.0002868264166487591, + "loss": 1.3207, + "step": 6776 + }, + { + "epoch": 0.714873417721519, + "grad_norm": 0.6344905495643616, + "learning_rate": 0.0002866289791886684, + "loss": 1.3505, + "step": 6777 + }, + { + "epoch": 0.7149789029535865, + "grad_norm": 0.7175314426422119, + "learning_rate": 0.00028643159365161113, + "loss": 1.3678, + "step": 6778 + }, + { + "epoch": 0.715084388185654, + "grad_norm": 0.7537404894828796, + "learning_rate": 0.00028623426005970517, + "loss": 1.3548, + "step": 6779 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.7492557764053345, + "learning_rate": 0.00028603697843506315, + "loss": 1.3668, + "step": 6780 + }, + { + "epoch": 0.715295358649789, + "grad_norm": 0.7243245244026184, + "learning_rate": 0.00028583974879979113, + "loss": 1.3886, + "step": 6781 + }, + { + "epoch": 0.7154008438818565, + "grad_norm": 0.6771417260169983, + "learning_rate": 0.00028564257117598993, + "loss": 1.3505, + "step": 6782 + }, + { + "epoch": 0.7155063291139241, + "grad_norm": 0.6759231090545654, + "learning_rate": 0.00028544544558575395, + "loss": 1.3393, + "step": 6783 + }, + { + "epoch": 0.7156118143459915, + "grad_norm": 0.8450697064399719, + "learning_rate": 0.0002852483720511724, + "loss": 1.3875, + "step": 6784 + }, + { + "epoch": 0.7157172995780591, + "grad_norm": 0.7441373467445374, + "learning_rate": 0.0002850513505943281, + "loss": 1.3756, + "step": 6785 + }, + { + "epoch": 0.7158227848101266, + "grad_norm": 0.8237536549568176, + "learning_rate": 0.0002848543812372986, + "loss": 1.3254, + "step": 6786 + }, + { + "epoch": 0.7159282700421941, + "grad_norm": 0.9329118132591248, + "learning_rate": 0.00028465746400215463, + "loss": 1.3463, + "step": 6787 + }, + { + "epoch": 0.7160337552742616, + "grad_norm": 0.6549582481384277, + "learning_rate": 0.00028446059891096265, + "loss": 1.3618, + "step": 6788 + }, + { + "epoch": 0.7161392405063292, + "grad_norm": 0.7430165410041809, + "learning_rate": 0.00028426378598578187, + "loss": 1.3472, + "step": 6789 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.6696643233299255, + "learning_rate": 0.0002840670252486662, + "loss": 1.3985, + "step": 6790 + }, + { + "epoch": 0.7163502109704641, + "grad_norm": 0.6299256682395935, + "learning_rate": 0.00028387031672166385, + "loss": 1.351, + "step": 6791 + }, + { + "epoch": 0.7164556962025317, + "grad_norm": 0.7166950702667236, + "learning_rate": 0.0002836736604268167, + "loss": 1.3623, + "step": 6792 + }, + { + "epoch": 0.7165611814345991, + "grad_norm": 0.7144677042961121, + "learning_rate": 0.0002834770563861613, + "loss": 1.3692, + "step": 6793 + }, + { + "epoch": 0.7166666666666667, + "grad_norm": 0.6579853892326355, + "learning_rate": 0.000283280504621728, + "loss": 1.3393, + "step": 6794 + }, + { + "epoch": 0.7167721518987342, + "grad_norm": 0.6279065012931824, + "learning_rate": 0.0002830840051555414, + "loss": 1.3482, + "step": 6795 + }, + { + "epoch": 0.7168776371308017, + "grad_norm": 0.6723464727401733, + "learning_rate": 0.00028288755800962, + "loss": 1.315, + "step": 6796 + }, + { + "epoch": 0.7169831223628692, + "grad_norm": 0.6771274209022522, + "learning_rate": 0.00028269116320597733, + "loss": 1.3449, + "step": 6797 + }, + { + "epoch": 0.7170886075949368, + "grad_norm": 0.6349275708198547, + "learning_rate": 0.0002824948207666199, + "loss": 1.3586, + "step": 6798 + }, + { + "epoch": 0.7171940928270042, + "grad_norm": 0.7303996086120605, + "learning_rate": 0.0002822985307135491, + "loss": 1.3763, + "step": 6799 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.7091466784477234, + "learning_rate": 0.00028210229306876, + "loss": 1.327, + "step": 6800 + }, + { + "epoch": 0.7174050632911393, + "grad_norm": 0.7379381060600281, + "learning_rate": 0.0002819061078542422, + "loss": 1.3915, + "step": 6801 + }, + { + "epoch": 0.7175105485232067, + "grad_norm": 0.7640540599822998, + "learning_rate": 0.0002817099750919791, + "loss": 1.3354, + "step": 6802 + }, + { + "epoch": 0.7176160337552743, + "grad_norm": 0.6377902030944824, + "learning_rate": 0.0002815138948039485, + "loss": 1.3631, + "step": 6803 + }, + { + "epoch": 0.7177215189873418, + "grad_norm": 0.6453162431716919, + "learning_rate": 0.000281317867012122, + "loss": 1.3082, + "step": 6804 + }, + { + "epoch": 0.7178270042194093, + "grad_norm": 0.7710520029067993, + "learning_rate": 0.0002811218917384652, + "loss": 1.3838, + "step": 6805 + }, + { + "epoch": 0.7179324894514768, + "grad_norm": 0.7146469950675964, + "learning_rate": 0.00028092596900493885, + "loss": 1.3718, + "step": 6806 + }, + { + "epoch": 0.7180379746835444, + "grad_norm": 0.6699192523956299, + "learning_rate": 0.00028073009883349665, + "loss": 1.3395, + "step": 6807 + }, + { + "epoch": 0.7181434599156118, + "grad_norm": 0.9962306618690491, + "learning_rate": 0.00028053428124608684, + "loss": 1.379, + "step": 6808 + }, + { + "epoch": 0.7182489451476793, + "grad_norm": 0.8225874900817871, + "learning_rate": 0.0002803385162646518, + "loss": 1.3661, + "step": 6809 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.8280652165412903, + "learning_rate": 0.0002801428039111279, + "loss": 1.2867, + "step": 6810 + }, + { + "epoch": 0.7184599156118143, + "grad_norm": 0.7129773497581482, + "learning_rate": 0.0002799471442074459, + "loss": 1.3742, + "step": 6811 + }, + { + "epoch": 0.7185654008438819, + "grad_norm": 0.720249354839325, + "learning_rate": 0.00027975153717553014, + "loss": 1.3706, + "step": 6812 + }, + { + "epoch": 0.7186708860759494, + "grad_norm": 0.7150330543518066, + "learning_rate": 0.00027955598283729936, + "loss": 1.3632, + "step": 6813 + }, + { + "epoch": 0.7187763713080169, + "grad_norm": 0.7304258942604065, + "learning_rate": 0.00027936048121466673, + "loss": 1.3264, + "step": 6814 + }, + { + "epoch": 0.7188818565400844, + "grad_norm": 0.6265997290611267, + "learning_rate": 0.00027916503232953895, + "loss": 1.3605, + "step": 6815 + }, + { + "epoch": 0.7189873417721518, + "grad_norm": 0.6682577729225159, + "learning_rate": 0.0002789696362038172, + "loss": 1.3487, + "step": 6816 + }, + { + "epoch": 0.7190928270042194, + "grad_norm": 0.7387956976890564, + "learning_rate": 0.0002787742928593965, + "loss": 1.3478, + "step": 6817 + }, + { + "epoch": 0.7191983122362869, + "grad_norm": 0.6326912641525269, + "learning_rate": 0.00027857900231816594, + "loss": 1.3418, + "step": 6818 + }, + { + "epoch": 0.7193037974683544, + "grad_norm": 0.7273572683334351, + "learning_rate": 0.0002783837646020089, + "loss": 1.342, + "step": 6819 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.6956986784934998, + "learning_rate": 0.00027818857973280274, + "loss": 1.3592, + "step": 6820 + }, + { + "epoch": 0.7195147679324895, + "grad_norm": 0.6316491365432739, + "learning_rate": 0.0002779934477324189, + "loss": 1.3597, + "step": 6821 + }, + { + "epoch": 0.7196202531645569, + "grad_norm": 0.7144437432289124, + "learning_rate": 0.0002777983686227226, + "loss": 1.4005, + "step": 6822 + }, + { + "epoch": 0.7197257383966245, + "grad_norm": 0.6545629501342773, + "learning_rate": 0.00027760334242557397, + "loss": 1.3103, + "step": 6823 + }, + { + "epoch": 0.719831223628692, + "grad_norm": 0.63544100522995, + "learning_rate": 0.00027740836916282643, + "loss": 1.3559, + "step": 6824 + }, + { + "epoch": 0.7199367088607594, + "grad_norm": 0.6883193254470825, + "learning_rate": 0.00027721344885632765, + "loss": 1.3469, + "step": 6825 + }, + { + "epoch": 0.720042194092827, + "grad_norm": 0.6942600607872009, + "learning_rate": 0.0002770185815279195, + "loss": 1.326, + "step": 6826 + }, + { + "epoch": 0.7201476793248945, + "grad_norm": 0.6352781653404236, + "learning_rate": 0.0002768237671994377, + "loss": 1.389, + "step": 6827 + }, + { + "epoch": 0.720253164556962, + "grad_norm": 0.6294938921928406, + "learning_rate": 0.0002766290058927123, + "loss": 1.3537, + "step": 6828 + }, + { + "epoch": 0.7203586497890295, + "grad_norm": 0.637579083442688, + "learning_rate": 0.0002764342976295673, + "loss": 1.3344, + "step": 6829 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6684272289276123, + "learning_rate": 0.0002762396424318206, + "loss": 1.3436, + "step": 6830 + }, + { + "epoch": 0.7205696202531645, + "grad_norm": 0.6684103608131409, + "learning_rate": 0.000276045040321284, + "loss": 1.3561, + "step": 6831 + }, + { + "epoch": 0.7206751054852321, + "grad_norm": 0.7204806804656982, + "learning_rate": 0.0002758504913197644, + "loss": 1.3288, + "step": 6832 + }, + { + "epoch": 0.7207805907172996, + "grad_norm": 0.6499807238578796, + "learning_rate": 0.0002756559954490615, + "loss": 1.3392, + "step": 6833 + }, + { + "epoch": 0.720886075949367, + "grad_norm": 0.6380324363708496, + "learning_rate": 0.0002754615527309696, + "loss": 1.3396, + "step": 6834 + }, + { + "epoch": 0.7209915611814346, + "grad_norm": 0.7329873442649841, + "learning_rate": 0.000275267163187277, + "loss": 1.3536, + "step": 6835 + }, + { + "epoch": 0.7210970464135021, + "grad_norm": 0.7091097831726074, + "learning_rate": 0.00027507282683976594, + "loss": 1.3374, + "step": 6836 + }, + { + "epoch": 0.7212025316455696, + "grad_norm": 0.669453501701355, + "learning_rate": 0.0002748785437102129, + "loss": 1.3363, + "step": 6837 + }, + { + "epoch": 0.7213080168776371, + "grad_norm": 0.6369678974151611, + "learning_rate": 0.00027468431382038816, + "loss": 1.3588, + "step": 6838 + }, + { + "epoch": 0.7214135021097047, + "grad_norm": 0.6433664560317993, + "learning_rate": 0.00027449013719205623, + "loss": 1.3302, + "step": 6839 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.8259466290473938, + "learning_rate": 0.00027429601384697526, + "loss": 1.3263, + "step": 6840 + }, + { + "epoch": 0.7216244725738397, + "grad_norm": 0.6552324891090393, + "learning_rate": 0.00027410194380689826, + "loss": 1.3328, + "step": 6841 + }, + { + "epoch": 0.7217299578059072, + "grad_norm": 0.7466565370559692, + "learning_rate": 0.00027390792709357155, + "loss": 1.3437, + "step": 6842 + }, + { + "epoch": 0.7218354430379746, + "grad_norm": 0.723828911781311, + "learning_rate": 0.00027371396372873557, + "loss": 1.3468, + "step": 6843 + }, + { + "epoch": 0.7219409282700422, + "grad_norm": 0.6846514940261841, + "learning_rate": 0.00027352005373412487, + "loss": 1.37, + "step": 6844 + }, + { + "epoch": 0.7220464135021097, + "grad_norm": 0.7123448252677917, + "learning_rate": 0.00027332619713146816, + "loss": 1.3473, + "step": 6845 + }, + { + "epoch": 0.7221518987341772, + "grad_norm": 0.6639880537986755, + "learning_rate": 0.000273132393942488, + "loss": 1.3683, + "step": 6846 + }, + { + "epoch": 0.7222573839662447, + "grad_norm": 0.6466277241706848, + "learning_rate": 0.000272938644188901, + "loss": 1.3502, + "step": 6847 + }, + { + "epoch": 0.7223628691983123, + "grad_norm": 0.6758052110671997, + "learning_rate": 0.00027274494789241766, + "loss": 1.3316, + "step": 6848 + }, + { + "epoch": 0.7224683544303797, + "grad_norm": 0.6614194512367249, + "learning_rate": 0.00027255130507474276, + "loss": 1.3656, + "step": 6849 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.6484601497650146, + "learning_rate": 0.00027235771575757466, + "loss": 1.3257, + "step": 6850 + }, + { + "epoch": 0.7226793248945148, + "grad_norm": 0.6575573682785034, + "learning_rate": 0.00027216417996260654, + "loss": 1.3584, + "step": 6851 + }, + { + "epoch": 0.7227848101265822, + "grad_norm": 0.6782445311546326, + "learning_rate": 0.00027197069771152464, + "loss": 1.3639, + "step": 6852 + }, + { + "epoch": 0.7228902953586498, + "grad_norm": 0.6869509220123291, + "learning_rate": 0.0002717772690260098, + "loss": 1.3348, + "step": 6853 + }, + { + "epoch": 0.7229957805907173, + "grad_norm": 0.655297040939331, + "learning_rate": 0.0002715838939277366, + "loss": 1.3518, + "step": 6854 + }, + { + "epoch": 0.7231012658227848, + "grad_norm": 0.7010279893875122, + "learning_rate": 0.0002713905724383737, + "loss": 1.3071, + "step": 6855 + }, + { + "epoch": 0.7232067510548523, + "grad_norm": 0.6112976670265198, + "learning_rate": 0.00027119730457958376, + "loss": 1.3363, + "step": 6856 + }, + { + "epoch": 0.7233122362869199, + "grad_norm": 0.6725868582725525, + "learning_rate": 0.0002710040903730233, + "loss": 1.3068, + "step": 6857 + }, + { + "epoch": 0.7234177215189873, + "grad_norm": 0.6891632676124573, + "learning_rate": 0.00027081092984034303, + "loss": 1.3859, + "step": 6858 + }, + { + "epoch": 0.7235232067510549, + "grad_norm": 0.6547183990478516, + "learning_rate": 0.00027061782300318726, + "loss": 1.3873, + "step": 6859 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.6393678188323975, + "learning_rate": 0.0002704247698831951, + "loss": 1.3414, + "step": 6860 + }, + { + "epoch": 0.7237341772151898, + "grad_norm": 0.6680663228034973, + "learning_rate": 0.00027023177050199885, + "loss": 1.3125, + "step": 6861 + }, + { + "epoch": 0.7238396624472574, + "grad_norm": 0.6896370053291321, + "learning_rate": 0.00027003882488122507, + "loss": 1.384, + "step": 6862 + }, + { + "epoch": 0.7239451476793249, + "grad_norm": 0.7125095725059509, + "learning_rate": 0.0002698459330424942, + "loss": 1.358, + "step": 6863 + }, + { + "epoch": 0.7240506329113924, + "grad_norm": 0.6531227827072144, + "learning_rate": 0.0002696530950074208, + "loss": 1.3703, + "step": 6864 + }, + { + "epoch": 0.7241561181434599, + "grad_norm": 0.7293266654014587, + "learning_rate": 0.00026946031079761346, + "loss": 1.3488, + "step": 6865 + }, + { + "epoch": 0.7242616033755275, + "grad_norm": 0.683098554611206, + "learning_rate": 0.00026926758043467435, + "loss": 1.3333, + "step": 6866 + }, + { + "epoch": 0.7243670886075949, + "grad_norm": 0.632899820804596, + "learning_rate": 0.00026907490394020004, + "loss": 1.3352, + "step": 6867 + }, + { + "epoch": 0.7244725738396625, + "grad_norm": 0.6632471084594727, + "learning_rate": 0.00026888228133578086, + "loss": 1.3448, + "step": 6868 + }, + { + "epoch": 0.72457805907173, + "grad_norm": 0.6828827857971191, + "learning_rate": 0.0002686897126430009, + "loss": 1.3274, + "step": 6869 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.6216486692428589, + "learning_rate": 0.0002684971978834389, + "loss": 1.3236, + "step": 6870 + }, + { + "epoch": 0.724789029535865, + "grad_norm": 0.6426810026168823, + "learning_rate": 0.00026830473707866684, + "loss": 1.3575, + "step": 6871 + }, + { + "epoch": 0.7248945147679325, + "grad_norm": 0.6485047340393066, + "learning_rate": 0.00026811233025025096, + "loss": 1.3583, + "step": 6872 + }, + { + "epoch": 0.725, + "grad_norm": 0.6814596652984619, + "learning_rate": 0.00026791997741975134, + "loss": 1.3452, + "step": 6873 + }, + { + "epoch": 0.7251054852320675, + "grad_norm": 0.6386829018592834, + "learning_rate": 0.00026772767860872216, + "loss": 1.3512, + "step": 6874 + }, + { + "epoch": 0.7252109704641351, + "grad_norm": 0.7693555355072021, + "learning_rate": 0.00026753543383871143, + "loss": 1.3455, + "step": 6875 + }, + { + "epoch": 0.7253164556962025, + "grad_norm": 0.6306860446929932, + "learning_rate": 0.0002673432431312611, + "loss": 1.3419, + "step": 6876 + }, + { + "epoch": 0.7254219409282701, + "grad_norm": 0.6167746782302856, + "learning_rate": 0.0002671511065079071, + "loss": 1.3391, + "step": 6877 + }, + { + "epoch": 0.7255274261603376, + "grad_norm": 0.8262376189231873, + "learning_rate": 0.00026695902399017935, + "loss": 1.3536, + "step": 6878 + }, + { + "epoch": 0.725632911392405, + "grad_norm": 0.6305549144744873, + "learning_rate": 0.00026676699559960145, + "loss": 1.3807, + "step": 6879 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.629276692867279, + "learning_rate": 0.0002665750213576914, + "loss": 1.3558, + "step": 6880 + }, + { + "epoch": 0.72584388185654, + "grad_norm": 0.6326093673706055, + "learning_rate": 0.0002663831012859609, + "loss": 1.3473, + "step": 6881 + }, + { + "epoch": 0.7259493670886076, + "grad_norm": 0.6587929725646973, + "learning_rate": 0.0002661912354059154, + "loss": 1.3512, + "step": 6882 + }, + { + "epoch": 0.7260548523206751, + "grad_norm": 0.7072675824165344, + "learning_rate": 0.0002659994237390545, + "loss": 1.3497, + "step": 6883 + }, + { + "epoch": 0.7261603375527426, + "grad_norm": 0.6919571757316589, + "learning_rate": 0.0002658076663068715, + "loss": 1.3344, + "step": 6884 + }, + { + "epoch": 0.7262658227848101, + "grad_norm": 0.6632158756256104, + "learning_rate": 0.00026561596313085396, + "loss": 1.3756, + "step": 6885 + }, + { + "epoch": 0.7263713080168777, + "grad_norm": 0.8005536198616028, + "learning_rate": 0.00026542431423248313, + "loss": 1.3154, + "step": 6886 + }, + { + "epoch": 0.7264767932489451, + "grad_norm": 0.6563952565193176, + "learning_rate": 0.00026523271963323414, + "loss": 1.3489, + "step": 6887 + }, + { + "epoch": 0.7265822784810126, + "grad_norm": 0.662540853023529, + "learning_rate": 0.0002650411793545763, + "loss": 1.3521, + "step": 6888 + }, + { + "epoch": 0.7266877637130802, + "grad_norm": 0.6398127675056458, + "learning_rate": 0.00026484969341797224, + "loss": 1.3315, + "step": 6889 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.6392669081687927, + "learning_rate": 0.0002646582618448794, + "loss": 1.3156, + "step": 6890 + }, + { + "epoch": 0.7268987341772152, + "grad_norm": 0.6321114301681519, + "learning_rate": 0.00026446688465674845, + "loss": 1.3609, + "step": 6891 + }, + { + "epoch": 0.7270042194092827, + "grad_norm": 0.7762393951416016, + "learning_rate": 0.0002642755618750242, + "loss": 1.3671, + "step": 6892 + }, + { + "epoch": 0.7271097046413502, + "grad_norm": 0.6609459519386292, + "learning_rate": 0.0002640842935211453, + "loss": 1.3551, + "step": 6893 + }, + { + "epoch": 0.7272151898734177, + "grad_norm": 0.7541725635528564, + "learning_rate": 0.0002638930796165443, + "loss": 1.3242, + "step": 6894 + }, + { + "epoch": 0.7273206751054853, + "grad_norm": 0.8602156639099121, + "learning_rate": 0.00026370192018264766, + "loss": 1.3189, + "step": 6895 + }, + { + "epoch": 0.7274261603375527, + "grad_norm": 0.6768507361412048, + "learning_rate": 0.00026351081524087573, + "loss": 1.3395, + "step": 6896 + }, + { + "epoch": 0.7275316455696202, + "grad_norm": 0.6647036671638489, + "learning_rate": 0.0002633197648126429, + "loss": 1.351, + "step": 6897 + }, + { + "epoch": 0.7276371308016878, + "grad_norm": 0.7599960565567017, + "learning_rate": 0.0002631287689193571, + "loss": 1.354, + "step": 6898 + }, + { + "epoch": 0.7277426160337552, + "grad_norm": 0.6188682913780212, + "learning_rate": 0.0002629378275824204, + "loss": 1.3361, + "step": 6899 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.6831880807876587, + "learning_rate": 0.00026274694082322896, + "loss": 1.3309, + "step": 6900 + }, + { + "epoch": 0.7279535864978903, + "grad_norm": 0.6614338159561157, + "learning_rate": 0.00026255610866317253, + "loss": 1.3739, + "step": 6901 + }, + { + "epoch": 0.7280590717299578, + "grad_norm": 0.6356891393661499, + "learning_rate": 0.0002623653311236347, + "loss": 1.3559, + "step": 6902 + }, + { + "epoch": 0.7281645569620253, + "grad_norm": 0.6477776765823364, + "learning_rate": 0.0002621746082259931, + "loss": 1.375, + "step": 6903 + }, + { + "epoch": 0.7282700421940929, + "grad_norm": 0.645251452922821, + "learning_rate": 0.0002619839399916192, + "loss": 1.347, + "step": 6904 + }, + { + "epoch": 0.7283755274261603, + "grad_norm": 0.6318035125732422, + "learning_rate": 0.0002617933264418782, + "loss": 1.3528, + "step": 6905 + }, + { + "epoch": 0.7284810126582278, + "grad_norm": 0.6818878650665283, + "learning_rate": 0.00026160276759812953, + "loss": 1.3652, + "step": 6906 + }, + { + "epoch": 0.7285864978902954, + "grad_norm": 0.662124514579773, + "learning_rate": 0.00026141226348172595, + "loss": 1.3662, + "step": 6907 + }, + { + "epoch": 0.7286919831223628, + "grad_norm": 0.7511484026908875, + "learning_rate": 0.00026122181411401444, + "loss": 1.3658, + "step": 6908 + }, + { + "epoch": 0.7287974683544304, + "grad_norm": 0.6618821024894714, + "learning_rate": 0.00026103141951633617, + "loss": 1.4094, + "step": 6909 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.6221582889556885, + "learning_rate": 0.0002608410797100255, + "loss": 1.3547, + "step": 6910 + }, + { + "epoch": 0.7290084388185654, + "grad_norm": 0.6725780963897705, + "learning_rate": 0.000260650794716411, + "loss": 1.3646, + "step": 6911 + }, + { + "epoch": 0.7291139240506329, + "grad_norm": 0.703460156917572, + "learning_rate": 0.00026046056455681515, + "loss": 1.315, + "step": 6912 + }, + { + "epoch": 0.7292194092827005, + "grad_norm": 0.6335538625717163, + "learning_rate": 0.00026027038925255407, + "loss": 1.3573, + "step": 6913 + }, + { + "epoch": 0.7293248945147679, + "grad_norm": 0.725967526435852, + "learning_rate": 0.00026008026882493783, + "loss": 1.335, + "step": 6914 + }, + { + "epoch": 0.7294303797468354, + "grad_norm": 0.6355571746826172, + "learning_rate": 0.00025989020329527057, + "loss": 1.3532, + "step": 6915 + }, + { + "epoch": 0.729535864978903, + "grad_norm": 0.6376579999923706, + "learning_rate": 0.0002597001926848498, + "loss": 1.3367, + "step": 6916 + }, + { + "epoch": 0.7296413502109704, + "grad_norm": 0.6395505666732788, + "learning_rate": 0.00025951023701496713, + "loss": 1.3579, + "step": 6917 + }, + { + "epoch": 0.729746835443038, + "grad_norm": 0.6863495111465454, + "learning_rate": 0.0002593203363069084, + "loss": 1.3591, + "step": 6918 + }, + { + "epoch": 0.7298523206751055, + "grad_norm": 0.6452447175979614, + "learning_rate": 0.00025913049058195277, + "loss": 1.3499, + "step": 6919 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.76589035987854, + "learning_rate": 0.0002589406998613733, + "loss": 1.3466, + "step": 6920 + }, + { + "epoch": 0.7300632911392405, + "grad_norm": 0.6752560138702393, + "learning_rate": 0.0002587509641664372, + "loss": 1.3602, + "step": 6921 + }, + { + "epoch": 0.7301687763713081, + "grad_norm": 0.6572577357292175, + "learning_rate": 0.0002585612835184051, + "loss": 1.329, + "step": 6922 + }, + { + "epoch": 0.7302742616033755, + "grad_norm": 0.7549194693565369, + "learning_rate": 0.00025837165793853164, + "loss": 1.3557, + "step": 6923 + }, + { + "epoch": 0.730379746835443, + "grad_norm": 0.7181456685066223, + "learning_rate": 0.0002581820874480654, + "loss": 1.3561, + "step": 6924 + }, + { + "epoch": 0.7304852320675106, + "grad_norm": 0.694379985332489, + "learning_rate": 0.0002579925720682487, + "loss": 1.3531, + "step": 6925 + }, + { + "epoch": 0.730590717299578, + "grad_norm": 0.6788429021835327, + "learning_rate": 0.0002578031118203174, + "loss": 1.3683, + "step": 6926 + }, + { + "epoch": 0.7306962025316456, + "grad_norm": 0.6442683935165405, + "learning_rate": 0.00025761370672550203, + "loss": 1.336, + "step": 6927 + }, + { + "epoch": 0.7308016877637131, + "grad_norm": 0.6318072080612183, + "learning_rate": 0.0002574243568050261, + "loss": 1.3417, + "step": 6928 + }, + { + "epoch": 0.7309071729957806, + "grad_norm": 0.637641191482544, + "learning_rate": 0.0002572350620801072, + "loss": 1.3545, + "step": 6929 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.6805310845375061, + "learning_rate": 0.0002570458225719567, + "loss": 1.3847, + "step": 6930 + }, + { + "epoch": 0.7311181434599157, + "grad_norm": 0.6200008392333984, + "learning_rate": 0.0002568566383017799, + "loss": 1.364, + "step": 6931 + }, + { + "epoch": 0.7312236286919831, + "grad_norm": 0.6415852904319763, + "learning_rate": 0.0002566675092907757, + "loss": 1.3303, + "step": 6932 + }, + { + "epoch": 0.7313291139240506, + "grad_norm": 0.6750587224960327, + "learning_rate": 0.0002564784355601372, + "loss": 1.3792, + "step": 6933 + }, + { + "epoch": 0.7314345991561182, + "grad_norm": 0.6946531534194946, + "learning_rate": 0.0002562894171310508, + "loss": 1.345, + "step": 6934 + }, + { + "epoch": 0.7315400843881856, + "grad_norm": 0.6534045934677124, + "learning_rate": 0.00025610045402469695, + "loss": 1.3968, + "step": 6935 + }, + { + "epoch": 0.7316455696202532, + "grad_norm": 0.7575798630714417, + "learning_rate": 0.0002559115462622503, + "loss": 1.304, + "step": 6936 + }, + { + "epoch": 0.7317510548523207, + "grad_norm": 0.7206801176071167, + "learning_rate": 0.00025572269386487853, + "loss": 1.3406, + "step": 6937 + }, + { + "epoch": 0.7318565400843882, + "grad_norm": 0.7369086146354675, + "learning_rate": 0.0002555338968537436, + "loss": 1.3592, + "step": 6938 + }, + { + "epoch": 0.7319620253164557, + "grad_norm": 0.6965824961662292, + "learning_rate": 0.0002553451552500012, + "loss": 1.3692, + "step": 6939 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.714180052280426, + "learning_rate": 0.00025515646907480074, + "loss": 1.372, + "step": 6940 + }, + { + "epoch": 0.7321729957805907, + "grad_norm": 0.7549611330032349, + "learning_rate": 0.0002549678383492854, + "loss": 1.3712, + "step": 6941 + }, + { + "epoch": 0.7322784810126582, + "grad_norm": 0.6592861413955688, + "learning_rate": 0.00025477926309459224, + "loss": 1.3157, + "step": 6942 + }, + { + "epoch": 0.7323839662447258, + "grad_norm": 0.6650375127792358, + "learning_rate": 0.00025459074333185176, + "loss": 1.3312, + "step": 6943 + }, + { + "epoch": 0.7324894514767932, + "grad_norm": 0.6760206818580627, + "learning_rate": 0.0002544022790821891, + "loss": 1.3374, + "step": 6944 + }, + { + "epoch": 0.7325949367088608, + "grad_norm": 0.7290925979614258, + "learning_rate": 0.0002542138703667224, + "loss": 1.3559, + "step": 6945 + }, + { + "epoch": 0.7327004219409282, + "grad_norm": 0.6179974675178528, + "learning_rate": 0.00025402551720656366, + "loss": 1.3306, + "step": 6946 + }, + { + "epoch": 0.7328059071729958, + "grad_norm": 0.6412636637687683, + "learning_rate": 0.0002538372196228189, + "loss": 1.3526, + "step": 6947 + }, + { + "epoch": 0.7329113924050633, + "grad_norm": 0.6453697681427002, + "learning_rate": 0.00025364897763658777, + "loss": 1.3789, + "step": 6948 + }, + { + "epoch": 0.7330168776371307, + "grad_norm": 0.6558068990707397, + "learning_rate": 0.0002534607912689637, + "loss": 1.3854, + "step": 6949 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.6559797525405884, + "learning_rate": 0.00025327266054103395, + "loss": 1.346, + "step": 6950 + }, + { + "epoch": 0.7332278481012658, + "grad_norm": 0.626621663570404, + "learning_rate": 0.0002530845854738796, + "loss": 1.3516, + "step": 6951 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.6348671913146973, + "learning_rate": 0.0002528965660885749, + "loss": 1.3422, + "step": 6952 + }, + { + "epoch": 0.7334388185654008, + "grad_norm": 0.6363858580589294, + "learning_rate": 0.00025270860240618904, + "loss": 1.3396, + "step": 6953 + }, + { + "epoch": 0.7335443037974684, + "grad_norm": 0.6448925137519836, + "learning_rate": 0.000252520694447784, + "loss": 1.3476, + "step": 6954 + }, + { + "epoch": 0.7336497890295358, + "grad_norm": 0.6664122343063354, + "learning_rate": 0.0002523328422344158, + "loss": 1.3512, + "step": 6955 + }, + { + "epoch": 0.7337552742616034, + "grad_norm": 0.6794251203536987, + "learning_rate": 0.0002521450457871343, + "loss": 1.331, + "step": 6956 + }, + { + "epoch": 0.7338607594936709, + "grad_norm": 0.6523866653442383, + "learning_rate": 0.0002519573051269828, + "loss": 1.403, + "step": 6957 + }, + { + "epoch": 0.7339662447257383, + "grad_norm": 0.712145209312439, + "learning_rate": 0.0002517696202749988, + "loss": 1.3212, + "step": 6958 + }, + { + "epoch": 0.7340717299578059, + "grad_norm": 0.705480694770813, + "learning_rate": 0.00025158199125221325, + "loss": 1.3195, + "step": 6959 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.632573127746582, + "learning_rate": 0.0002513944180796509, + "loss": 1.3527, + "step": 6960 + }, + { + "epoch": 0.7342827004219409, + "grad_norm": 0.6922473907470703, + "learning_rate": 0.0002512069007783301, + "loss": 1.3889, + "step": 6961 + }, + { + "epoch": 0.7343881856540084, + "grad_norm": 0.6185136437416077, + "learning_rate": 0.00025101943936926347, + "loss": 1.3302, + "step": 6962 + }, + { + "epoch": 0.734493670886076, + "grad_norm": 0.6165253520011902, + "learning_rate": 0.0002508320338734568, + "loss": 1.3014, + "step": 6963 + }, + { + "epoch": 0.7345991561181434, + "grad_norm": 0.6674731969833374, + "learning_rate": 0.00025064468431190977, + "loss": 1.3546, + "step": 6964 + }, + { + "epoch": 0.734704641350211, + "grad_norm": 0.6529384255409241, + "learning_rate": 0.0002504573907056159, + "loss": 1.3237, + "step": 6965 + }, + { + "epoch": 0.7348101265822785, + "grad_norm": 0.6436036825180054, + "learning_rate": 0.00025027015307556234, + "loss": 1.3465, + "step": 6966 + }, + { + "epoch": 0.734915611814346, + "grad_norm": 0.625852644443512, + "learning_rate": 0.00025008297144273, + "loss": 1.3171, + "step": 6967 + }, + { + "epoch": 0.7350210970464135, + "grad_norm": 0.6787301898002625, + "learning_rate": 0.0002498958458280936, + "loss": 1.346, + "step": 6968 + }, + { + "epoch": 0.735126582278481, + "grad_norm": 0.6265354752540588, + "learning_rate": 0.0002497087762526211, + "loss": 1.3412, + "step": 6969 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.6066453456878662, + "learning_rate": 0.0002495217627372752, + "loss": 1.3413, + "step": 6970 + }, + { + "epoch": 0.735337552742616, + "grad_norm": 0.6713769435882568, + "learning_rate": 0.0002493348053030113, + "loss": 1.3273, + "step": 6971 + }, + { + "epoch": 0.7354430379746836, + "grad_norm": 0.6768689155578613, + "learning_rate": 0.0002491479039707791, + "loss": 1.3404, + "step": 6972 + }, + { + "epoch": 0.735548523206751, + "grad_norm": 0.6243783235549927, + "learning_rate": 0.00024896105876152165, + "loss": 1.3299, + "step": 6973 + }, + { + "epoch": 0.7356540084388186, + "grad_norm": 0.6395500302314758, + "learning_rate": 0.0002487742696961761, + "loss": 1.3355, + "step": 6974 + }, + { + "epoch": 0.7357594936708861, + "grad_norm": 0.7122139930725098, + "learning_rate": 0.0002485875367956729, + "loss": 1.3734, + "step": 6975 + }, + { + "epoch": 0.7358649789029535, + "grad_norm": 0.6197736263275146, + "learning_rate": 0.00024840086008093645, + "loss": 1.3112, + "step": 6976 + }, + { + "epoch": 0.7359704641350211, + "grad_norm": 0.6282251477241516, + "learning_rate": 0.0002482142395728848, + "loss": 1.3738, + "step": 6977 + }, + { + "epoch": 0.7360759493670886, + "grad_norm": 0.6605056524276733, + "learning_rate": 0.0002480276752924295, + "loss": 1.3532, + "step": 6978 + }, + { + "epoch": 0.7361814345991561, + "grad_norm": 0.6938081979751587, + "learning_rate": 0.0002478411672604766, + "loss": 1.353, + "step": 6979 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.6317717432975769, + "learning_rate": 0.0002476547154979248, + "loss": 1.3307, + "step": 6980 + }, + { + "epoch": 0.7363924050632912, + "grad_norm": 0.6577715873718262, + "learning_rate": 0.00024746832002566703, + "loss": 1.3591, + "step": 6981 + }, + { + "epoch": 0.7364978902953586, + "grad_norm": 0.7865398526191711, + "learning_rate": 0.0002472819808645899, + "loss": 1.3277, + "step": 6982 + }, + { + "epoch": 0.7366033755274262, + "grad_norm": 0.7388427257537842, + "learning_rate": 0.0002470956980355735, + "loss": 1.3216, + "step": 6983 + }, + { + "epoch": 0.7367088607594937, + "grad_norm": 0.6384670734405518, + "learning_rate": 0.00024690947155949194, + "loss": 1.3643, + "step": 6984 + }, + { + "epoch": 0.7368143459915611, + "grad_norm": 0.8021848797798157, + "learning_rate": 0.0002467233014572127, + "loss": 1.3636, + "step": 6985 + }, + { + "epoch": 0.7369198312236287, + "grad_norm": 0.700200080871582, + "learning_rate": 0.00024653718774959713, + "loss": 1.3149, + "step": 6986 + }, + { + "epoch": 0.7370253164556962, + "grad_norm": 0.6642305850982666, + "learning_rate": 0.00024635113045749985, + "loss": 1.3162, + "step": 6987 + }, + { + "epoch": 0.7371308016877637, + "grad_norm": 0.6795907020568848, + "learning_rate": 0.00024616512960177014, + "loss": 1.3657, + "step": 6988 + }, + { + "epoch": 0.7372362869198312, + "grad_norm": 0.8230381608009338, + "learning_rate": 0.00024597918520324994, + "loss": 1.3856, + "step": 6989 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.6725034713745117, + "learning_rate": 0.00024579329728277534, + "loss": 1.3138, + "step": 6990 + }, + { + "epoch": 0.7374472573839662, + "grad_norm": 0.7139604687690735, + "learning_rate": 0.00024560746586117603, + "loss": 1.3787, + "step": 6991 + }, + { + "epoch": 0.7375527426160338, + "grad_norm": 0.7637723088264465, + "learning_rate": 0.00024542169095927526, + "loss": 1.3848, + "step": 6992 + }, + { + "epoch": 0.7376582278481013, + "grad_norm": 0.6359249353408813, + "learning_rate": 0.00024523597259789004, + "loss": 1.3272, + "step": 6993 + }, + { + "epoch": 0.7377637130801687, + "grad_norm": 0.6689314842224121, + "learning_rate": 0.0002450503107978311, + "loss": 1.3478, + "step": 6994 + }, + { + "epoch": 0.7378691983122363, + "grad_norm": 0.6660280227661133, + "learning_rate": 0.00024486470557990247, + "loss": 1.3546, + "step": 6995 + }, + { + "epoch": 0.7379746835443038, + "grad_norm": 0.7001955509185791, + "learning_rate": 0.0002446791569649027, + "loss": 1.361, + "step": 6996 + }, + { + "epoch": 0.7380801687763713, + "grad_norm": 0.6828968524932861, + "learning_rate": 0.0002444936649736232, + "loss": 1.3425, + "step": 6997 + }, + { + "epoch": 0.7381856540084388, + "grad_norm": 0.7822443246841431, + "learning_rate": 0.00024430822962684905, + "loss": 1.3335, + "step": 6998 + }, + { + "epoch": 0.7382911392405064, + "grad_norm": 0.7402991652488708, + "learning_rate": 0.00024412285094535952, + "loss": 1.3852, + "step": 6999 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.6606146693229675, + "learning_rate": 0.00024393752894992708, + "loss": 1.3452, + "step": 7000 + }, + { + "epoch": 0.7385021097046414, + "grad_norm": 0.7405679225921631, + "learning_rate": 0.00024375226366131787, + "loss": 1.3263, + "step": 7001 + }, + { + "epoch": 0.7386075949367089, + "grad_norm": 0.682252824306488, + "learning_rate": 0.00024356705510029196, + "loss": 1.3458, + "step": 7002 + }, + { + "epoch": 0.7387130801687763, + "grad_norm": 0.6578314900398254, + "learning_rate": 0.00024338190328760282, + "loss": 1.3373, + "step": 7003 + }, + { + "epoch": 0.7388185654008439, + "grad_norm": 0.7700976133346558, + "learning_rate": 0.00024319680824399736, + "loss": 1.3231, + "step": 7004 + }, + { + "epoch": 0.7389240506329114, + "grad_norm": 0.6963123083114624, + "learning_rate": 0.00024301176999021702, + "loss": 1.3022, + "step": 7005 + }, + { + "epoch": 0.7390295358649789, + "grad_norm": 0.6545720100402832, + "learning_rate": 0.00024282678854699592, + "loss": 1.3198, + "step": 7006 + }, + { + "epoch": 0.7391350210970464, + "grad_norm": 0.6405913829803467, + "learning_rate": 0.00024264186393506206, + "loss": 1.3362, + "step": 7007 + }, + { + "epoch": 0.739240506329114, + "grad_norm": 0.6632754802703857, + "learning_rate": 0.00024245699617513733, + "loss": 1.3085, + "step": 7008 + }, + { + "epoch": 0.7393459915611814, + "grad_norm": 0.6623950600624084, + "learning_rate": 0.00024227218528793696, + "loss": 1.3339, + "step": 7009 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.6492516398429871, + "learning_rate": 0.00024208743129417004, + "loss": 1.3329, + "step": 7010 + }, + { + "epoch": 0.7395569620253165, + "grad_norm": 0.6390478014945984, + "learning_rate": 0.00024190273421453913, + "loss": 1.3549, + "step": 7011 + }, + { + "epoch": 0.739662447257384, + "grad_norm": 0.6316817998886108, + "learning_rate": 0.00024171809406974047, + "loss": 1.325, + "step": 7012 + }, + { + "epoch": 0.7397679324894515, + "grad_norm": 0.597277820110321, + "learning_rate": 0.0002415335108804636, + "loss": 1.3482, + "step": 7013 + }, + { + "epoch": 0.7398734177215189, + "grad_norm": 0.6445124745368958, + "learning_rate": 0.0002413489846673925, + "loss": 1.3205, + "step": 7014 + }, + { + "epoch": 0.7399789029535865, + "grad_norm": 0.6393406391143799, + "learning_rate": 0.0002411645154512041, + "loss": 1.3485, + "step": 7015 + }, + { + "epoch": 0.740084388185654, + "grad_norm": 0.6373549699783325, + "learning_rate": 0.00024098010325256897, + "loss": 1.3612, + "step": 7016 + }, + { + "epoch": 0.7401898734177215, + "grad_norm": 0.6477957367897034, + "learning_rate": 0.00024079574809215149, + "loss": 1.3424, + "step": 7017 + }, + { + "epoch": 0.740295358649789, + "grad_norm": 0.6863784193992615, + "learning_rate": 0.00024061144999060956, + "loss": 1.3625, + "step": 7018 + }, + { + "epoch": 0.7404008438818566, + "grad_norm": 0.8485403060913086, + "learning_rate": 0.00024042720896859471, + "loss": 1.3019, + "step": 7019 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.6994125843048096, + "learning_rate": 0.00024024302504675206, + "loss": 1.3536, + "step": 7020 + }, + { + "epoch": 0.7406118143459915, + "grad_norm": 0.8383228778839111, + "learning_rate": 0.00024005889824572004, + "loss": 1.3497, + "step": 7021 + }, + { + "epoch": 0.7407172995780591, + "grad_norm": 0.6975986957550049, + "learning_rate": 0.00023987482858613154, + "loss": 1.3469, + "step": 7022 + }, + { + "epoch": 0.7408227848101265, + "grad_norm": 0.6258389353752136, + "learning_rate": 0.0002396908160886123, + "loss": 1.3017, + "step": 7023 + }, + { + "epoch": 0.7409282700421941, + "grad_norm": 0.9113970994949341, + "learning_rate": 0.0002395068607737816, + "loss": 1.3247, + "step": 7024 + }, + { + "epoch": 0.7410337552742616, + "grad_norm": 0.6842756271362305, + "learning_rate": 0.0002393229626622528, + "loss": 1.3294, + "step": 7025 + }, + { + "epoch": 0.7411392405063291, + "grad_norm": 0.6388802528381348, + "learning_rate": 0.00023913912177463248, + "loss": 1.3553, + "step": 7026 + }, + { + "epoch": 0.7412447257383966, + "grad_norm": 0.8460474610328674, + "learning_rate": 0.0002389553381315209, + "loss": 1.3907, + "step": 7027 + }, + { + "epoch": 0.7413502109704642, + "grad_norm": 0.6872774362564087, + "learning_rate": 0.00023877161175351206, + "loss": 1.3793, + "step": 7028 + }, + { + "epoch": 0.7414556962025316, + "grad_norm": 0.6547296047210693, + "learning_rate": 0.00023858794266119323, + "loss": 1.3732, + "step": 7029 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.6853612661361694, + "learning_rate": 0.0002384043308751454, + "loss": 1.3245, + "step": 7030 + }, + { + "epoch": 0.7416666666666667, + "grad_norm": 0.7452306747436523, + "learning_rate": 0.0002382207764159436, + "loss": 1.3348, + "step": 7031 + }, + { + "epoch": 0.7417721518987341, + "grad_norm": 0.6363875865936279, + "learning_rate": 0.00023803727930415568, + "loss": 1.3743, + "step": 7032 + }, + { + "epoch": 0.7418776371308017, + "grad_norm": 0.7196474671363831, + "learning_rate": 0.00023785383956034353, + "loss": 1.3697, + "step": 7033 + }, + { + "epoch": 0.7419831223628692, + "grad_norm": 0.8138051629066467, + "learning_rate": 0.00023767045720506243, + "loss": 1.3304, + "step": 7034 + }, + { + "epoch": 0.7420886075949367, + "grad_norm": 0.7079886794090271, + "learning_rate": 0.00023748713225886137, + "loss": 1.3595, + "step": 7035 + }, + { + "epoch": 0.7421940928270042, + "grad_norm": 0.777258574962616, + "learning_rate": 0.0002373038647422827, + "loss": 1.3368, + "step": 7036 + }, + { + "epoch": 0.7422995780590718, + "grad_norm": 0.9008048176765442, + "learning_rate": 0.00023712065467586252, + "loss": 1.3489, + "step": 7037 + }, + { + "epoch": 0.7424050632911392, + "grad_norm": 0.6866219639778137, + "learning_rate": 0.00023693750208013045, + "loss": 1.3524, + "step": 7038 + }, + { + "epoch": 0.7425105485232067, + "grad_norm": 0.6997107863426208, + "learning_rate": 0.00023675440697560943, + "loss": 1.3822, + "step": 7039 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.9434595108032227, + "learning_rate": 0.00023657136938281653, + "loss": 1.322, + "step": 7040 + }, + { + "epoch": 0.7427215189873417, + "grad_norm": 0.653296172618866, + "learning_rate": 0.00023638838932226196, + "loss": 1.3027, + "step": 7041 + }, + { + "epoch": 0.7428270042194093, + "grad_norm": 0.7237306833267212, + "learning_rate": 0.00023620546681444942, + "loss": 1.4033, + "step": 7042 + }, + { + "epoch": 0.7429324894514768, + "grad_norm": 0.747134268283844, + "learning_rate": 0.00023602260187987635, + "loss": 1.3645, + "step": 7043 + }, + { + "epoch": 0.7430379746835443, + "grad_norm": 0.6315656304359436, + "learning_rate": 0.0002358397945390336, + "loss": 1.3235, + "step": 7044 + }, + { + "epoch": 0.7431434599156118, + "grad_norm": 0.6943070292472839, + "learning_rate": 0.0002356570448124058, + "loss": 1.3558, + "step": 7045 + }, + { + "epoch": 0.7432489451476794, + "grad_norm": 0.7836173176765442, + "learning_rate": 0.00023547435272047083, + "loss": 1.3468, + "step": 7046 + }, + { + "epoch": 0.7433544303797468, + "grad_norm": 0.649925172328949, + "learning_rate": 0.00023529171828370033, + "loss": 1.3171, + "step": 7047 + }, + { + "epoch": 0.7434599156118143, + "grad_norm": 0.6854608654975891, + "learning_rate": 0.0002351091415225591, + "loss": 1.3427, + "step": 7048 + }, + { + "epoch": 0.7435654008438819, + "grad_norm": 0.7096987962722778, + "learning_rate": 0.0002349266224575063, + "loss": 1.3427, + "step": 7049 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.7070349454879761, + "learning_rate": 0.00023474416110899377, + "loss": 1.3074, + "step": 7050 + }, + { + "epoch": 0.7437763713080169, + "grad_norm": 0.667643129825592, + "learning_rate": 0.00023456175749746736, + "loss": 1.3468, + "step": 7051 + }, + { + "epoch": 0.7438818565400844, + "grad_norm": 0.6222382187843323, + "learning_rate": 0.0002343794116433662, + "loss": 1.3091, + "step": 7052 + }, + { + "epoch": 0.7439873417721519, + "grad_norm": 0.627901017665863, + "learning_rate": 0.00023419712356712307, + "loss": 1.3042, + "step": 7053 + }, + { + "epoch": 0.7440928270042194, + "grad_norm": 0.7847193479537964, + "learning_rate": 0.00023401489328916432, + "loss": 1.3359, + "step": 7054 + }, + { + "epoch": 0.744198312236287, + "grad_norm": 0.663381040096283, + "learning_rate": 0.00023383272082990963, + "loss": 1.3728, + "step": 7055 + }, + { + "epoch": 0.7443037974683544, + "grad_norm": 0.7548450827598572, + "learning_rate": 0.00023365060620977223, + "loss": 1.3764, + "step": 7056 + }, + { + "epoch": 0.744409282700422, + "grad_norm": 0.7660924196243286, + "learning_rate": 0.00023346854944915937, + "loss": 1.3268, + "step": 7057 + }, + { + "epoch": 0.7445147679324895, + "grad_norm": 0.6552284359931946, + "learning_rate": 0.00023328655056847124, + "loss": 1.3704, + "step": 7058 + }, + { + "epoch": 0.7446202531645569, + "grad_norm": 0.6857419610023499, + "learning_rate": 0.0002331046095881017, + "loss": 1.3426, + "step": 7059 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.7423369288444519, + "learning_rate": 0.00023292272652843807, + "loss": 1.3518, + "step": 7060 + }, + { + "epoch": 0.744831223628692, + "grad_norm": 0.6973329186439514, + "learning_rate": 0.00023274090140986138, + "loss": 1.403, + "step": 7061 + }, + { + "epoch": 0.7449367088607595, + "grad_norm": 0.7067463994026184, + "learning_rate": 0.00023255913425274588, + "loss": 1.3262, + "step": 7062 + }, + { + "epoch": 0.745042194092827, + "grad_norm": 0.702724277973175, + "learning_rate": 0.00023237742507745964, + "loss": 1.324, + "step": 7063 + }, + { + "epoch": 0.7451476793248946, + "grad_norm": 0.6779473423957825, + "learning_rate": 0.00023219577390436397, + "loss": 1.3493, + "step": 7064 + }, + { + "epoch": 0.745253164556962, + "grad_norm": 0.6914492249488831, + "learning_rate": 0.00023201418075381364, + "loss": 1.3383, + "step": 7065 + }, + { + "epoch": 0.7453586497890295, + "grad_norm": 0.741312563419342, + "learning_rate": 0.00023183264564615756, + "loss": 1.3415, + "step": 7066 + }, + { + "epoch": 0.7454641350210971, + "grad_norm": 0.6416207551956177, + "learning_rate": 0.00023165116860173726, + "loss": 1.3508, + "step": 7067 + }, + { + "epoch": 0.7455696202531645, + "grad_norm": 0.6523120999336243, + "learning_rate": 0.00023146974964088825, + "loss": 1.366, + "step": 7068 + }, + { + "epoch": 0.7456751054852321, + "grad_norm": 0.6488505601882935, + "learning_rate": 0.00023128838878393946, + "loss": 1.3597, + "step": 7069 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.6963202357292175, + "learning_rate": 0.00023110708605121317, + "loss": 1.3451, + "step": 7070 + }, + { + "epoch": 0.7458860759493671, + "grad_norm": 0.6882756352424622, + "learning_rate": 0.00023092584146302539, + "loss": 1.3738, + "step": 7071 + }, + { + "epoch": 0.7459915611814346, + "grad_norm": 0.6557942032814026, + "learning_rate": 0.0002307446550396854, + "loss": 1.3633, + "step": 7072 + }, + { + "epoch": 0.7460970464135022, + "grad_norm": 0.7200218439102173, + "learning_rate": 0.0002305635268014961, + "loss": 1.3428, + "step": 7073 + }, + { + "epoch": 0.7462025316455696, + "grad_norm": 0.6840187907218933, + "learning_rate": 0.0002303824567687534, + "loss": 1.3361, + "step": 7074 + }, + { + "epoch": 0.7463080168776371, + "grad_norm": 0.7109639644622803, + "learning_rate": 0.00023020144496174781, + "loss": 1.3259, + "step": 7075 + }, + { + "epoch": 0.7464135021097047, + "grad_norm": 0.6135163307189941, + "learning_rate": 0.0002300204914007622, + "loss": 1.3303, + "step": 7076 + }, + { + "epoch": 0.7465189873417721, + "grad_norm": 0.6210311055183411, + "learning_rate": 0.00022983959610607338, + "loss": 1.3217, + "step": 7077 + }, + { + "epoch": 0.7466244725738397, + "grad_norm": 0.7532829642295837, + "learning_rate": 0.00022965875909795164, + "loss": 1.3613, + "step": 7078 + }, + { + "epoch": 0.7467299578059071, + "grad_norm": 0.634232223033905, + "learning_rate": 0.00022947798039666051, + "loss": 1.3583, + "step": 7079 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.6139917373657227, + "learning_rate": 0.00022929726002245728, + "loss": 1.3676, + "step": 7080 + }, + { + "epoch": 0.7469409282700422, + "grad_norm": 0.6533403992652893, + "learning_rate": 0.00022911659799559254, + "loss": 1.3212, + "step": 7081 + }, + { + "epoch": 0.7470464135021097, + "grad_norm": 0.6752707958221436, + "learning_rate": 0.00022893599433631014, + "loss": 1.3655, + "step": 7082 + }, + { + "epoch": 0.7471518987341772, + "grad_norm": 0.6676328182220459, + "learning_rate": 0.00022875544906484797, + "loss": 1.3541, + "step": 7083 + }, + { + "epoch": 0.7472573839662447, + "grad_norm": 0.6438317894935608, + "learning_rate": 0.00022857496220143696, + "loss": 1.3277, + "step": 7084 + }, + { + "epoch": 0.7473628691983122, + "grad_norm": 0.6287667155265808, + "learning_rate": 0.00022839453376630149, + "loss": 1.3477, + "step": 7085 + }, + { + "epoch": 0.7474683544303797, + "grad_norm": 0.6243011355400085, + "learning_rate": 0.00022821416377965948, + "loss": 1.3255, + "step": 7086 + }, + { + "epoch": 0.7475738396624473, + "grad_norm": 0.6260328888893127, + "learning_rate": 0.00022803385226172226, + "loss": 1.4057, + "step": 7087 + }, + { + "epoch": 0.7476793248945147, + "grad_norm": 0.6373695135116577, + "learning_rate": 0.0002278535992326947, + "loss": 1.3417, + "step": 7088 + }, + { + "epoch": 0.7477848101265823, + "grad_norm": 0.6390485167503357, + "learning_rate": 0.00022767340471277492, + "loss": 1.3674, + "step": 7089 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.7188734412193298, + "learning_rate": 0.00022749326872215472, + "loss": 1.283, + "step": 7090 + }, + { + "epoch": 0.7479957805907173, + "grad_norm": 0.6716201901435852, + "learning_rate": 0.00022731319128101906, + "loss": 1.3074, + "step": 7091 + }, + { + "epoch": 0.7481012658227848, + "grad_norm": 0.6402701139450073, + "learning_rate": 0.0002271331724095468, + "loss": 1.3225, + "step": 7092 + }, + { + "epoch": 0.7482067510548523, + "grad_norm": 0.7215625047683716, + "learning_rate": 0.0002269532121279099, + "loss": 1.3466, + "step": 7093 + }, + { + "epoch": 0.7483122362869198, + "grad_norm": 0.7056303024291992, + "learning_rate": 0.00022677331045627366, + "loss": 1.356, + "step": 7094 + }, + { + "epoch": 0.7484177215189873, + "grad_norm": 0.6266681551933289, + "learning_rate": 0.00022659346741479708, + "loss": 1.3614, + "step": 7095 + }, + { + "epoch": 0.7485232067510549, + "grad_norm": 0.7299904227256775, + "learning_rate": 0.00022641368302363235, + "loss": 1.3599, + "step": 7096 + }, + { + "epoch": 0.7486286919831223, + "grad_norm": 0.6243714094161987, + "learning_rate": 0.00022623395730292538, + "loss": 1.3337, + "step": 7097 + }, + { + "epoch": 0.7487341772151899, + "grad_norm": 0.6240910887718201, + "learning_rate": 0.0002260542902728151, + "loss": 1.3648, + "step": 7098 + }, + { + "epoch": 0.7488396624472574, + "grad_norm": 0.6342421770095825, + "learning_rate": 0.00022587468195343436, + "loss": 1.3359, + "step": 7099 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.6754763126373291, + "learning_rate": 0.0002256951323649087, + "loss": 1.3187, + "step": 7100 + }, + { + "epoch": 0.7490506329113924, + "grad_norm": 0.761650562286377, + "learning_rate": 0.00022551564152735814, + "loss": 1.3881, + "step": 7101 + }, + { + "epoch": 0.74915611814346, + "grad_norm": 0.64478600025177, + "learning_rate": 0.00022533620946089524, + "loss": 1.3748, + "step": 7102 + }, + { + "epoch": 0.7492616033755274, + "grad_norm": 0.6729192733764648, + "learning_rate": 0.00022515683618562626, + "loss": 1.3483, + "step": 7103 + }, + { + "epoch": 0.7493670886075949, + "grad_norm": 0.6572458744049072, + "learning_rate": 0.00022497752172165095, + "loss": 1.3026, + "step": 7104 + }, + { + "epoch": 0.7494725738396625, + "grad_norm": 0.6871916055679321, + "learning_rate": 0.0002247982660890623, + "loss": 1.3603, + "step": 7105 + }, + { + "epoch": 0.7495780590717299, + "grad_norm": 0.6224493384361267, + "learning_rate": 0.00022461906930794687, + "loss": 1.3732, + "step": 7106 + }, + { + "epoch": 0.7496835443037975, + "grad_norm": 0.6882333755493164, + "learning_rate": 0.00022443993139838447, + "loss": 1.3525, + "step": 7107 + }, + { + "epoch": 0.749789029535865, + "grad_norm": 0.7118625640869141, + "learning_rate": 0.00022426085238044823, + "loss": 1.3293, + "step": 7108 + }, + { + "epoch": 0.7498945147679325, + "grad_norm": 0.6703407168388367, + "learning_rate": 0.00022408183227420528, + "loss": 1.335, + "step": 7109 + }, + { + "epoch": 0.75, + "grad_norm": 0.7350265979766846, + "learning_rate": 0.00022390287109971547, + "loss": 1.3604, + "step": 7110 + }, + { + "epoch": 0.7501054852320675, + "grad_norm": 0.6946247220039368, + "learning_rate": 0.00022372396887703234, + "loss": 1.3979, + "step": 7111 + }, + { + "epoch": 0.750210970464135, + "grad_norm": 0.6933057904243469, + "learning_rate": 0.00022354512562620268, + "loss": 1.3433, + "step": 7112 + }, + { + "epoch": 0.7503164556962025, + "grad_norm": 0.747501015663147, + "learning_rate": 0.0002233663413672669, + "loss": 1.4049, + "step": 7113 + }, + { + "epoch": 0.7504219409282701, + "grad_norm": 0.7398675680160522, + "learning_rate": 0.00022318761612025856, + "loss": 1.3271, + "step": 7114 + }, + { + "epoch": 0.7505274261603375, + "grad_norm": 0.6787902116775513, + "learning_rate": 0.00022300894990520478, + "loss": 1.3339, + "step": 7115 + }, + { + "epoch": 0.7506329113924051, + "grad_norm": 0.7761039733886719, + "learning_rate": 0.000222830342742126, + "loss": 1.3182, + "step": 7116 + }, + { + "epoch": 0.7507383966244726, + "grad_norm": 0.8202971816062927, + "learning_rate": 0.00022265179465103574, + "loss": 1.351, + "step": 7117 + }, + { + "epoch": 0.75084388185654, + "grad_norm": 0.6646804809570312, + "learning_rate": 0.00022247330565194171, + "loss": 1.3106, + "step": 7118 + }, + { + "epoch": 0.7509493670886076, + "grad_norm": 0.8318293690681458, + "learning_rate": 0.0002222948757648443, + "loss": 1.2993, + "step": 7119 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.8234075903892517, + "learning_rate": 0.00022211650500973746, + "loss": 1.3832, + "step": 7120 + }, + { + "epoch": 0.7511603375527426, + "grad_norm": 0.6473687291145325, + "learning_rate": 0.0002219381934066084, + "loss": 1.3944, + "step": 7121 + }, + { + "epoch": 0.7512658227848101, + "grad_norm": 0.789442241191864, + "learning_rate": 0.00022175994097543806, + "loss": 1.3519, + "step": 7122 + }, + { + "epoch": 0.7513713080168777, + "grad_norm": 0.7605631351470947, + "learning_rate": 0.0002215817477362003, + "loss": 1.3394, + "step": 7123 + }, + { + "epoch": 0.7514767932489451, + "grad_norm": 0.6542356610298157, + "learning_rate": 0.00022140361370886265, + "loss": 1.3613, + "step": 7124 + }, + { + "epoch": 0.7515822784810127, + "grad_norm": 0.7709165215492249, + "learning_rate": 0.00022122553891338586, + "loss": 1.3722, + "step": 7125 + }, + { + "epoch": 0.7516877637130802, + "grad_norm": 0.7110629081726074, + "learning_rate": 0.00022104752336972396, + "loss": 1.3705, + "step": 7126 + }, + { + "epoch": 0.7517932489451477, + "grad_norm": 0.6453756093978882, + "learning_rate": 0.00022086956709782495, + "loss": 1.3581, + "step": 7127 + }, + { + "epoch": 0.7518987341772152, + "grad_norm": 0.7164291739463806, + "learning_rate": 0.0002206916701176293, + "loss": 1.3468, + "step": 7128 + }, + { + "epoch": 0.7520042194092827, + "grad_norm": 0.6825174689292908, + "learning_rate": 0.00022051383244907143, + "loss": 1.3009, + "step": 7129 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.6686068177223206, + "learning_rate": 0.0002203360541120789, + "loss": 1.3592, + "step": 7130 + }, + { + "epoch": 0.7522151898734177, + "grad_norm": 0.7867873311042786, + "learning_rate": 0.00022015833512657268, + "loss": 1.3358, + "step": 7131 + }, + { + "epoch": 0.7523206751054853, + "grad_norm": 0.6923576593399048, + "learning_rate": 0.000219980675512467, + "loss": 1.3256, + "step": 7132 + }, + { + "epoch": 0.7524261603375527, + "grad_norm": 0.6758718490600586, + "learning_rate": 0.00021980307528966962, + "loss": 1.3195, + "step": 7133 + }, + { + "epoch": 0.7525316455696203, + "grad_norm": 0.6716752648353577, + "learning_rate": 0.00021962553447808108, + "loss": 1.3128, + "step": 7134 + }, + { + "epoch": 0.7526371308016878, + "grad_norm": 0.6730222702026367, + "learning_rate": 0.00021944805309759643, + "loss": 1.3068, + "step": 7135 + }, + { + "epoch": 0.7527426160337553, + "grad_norm": 0.66834956407547, + "learning_rate": 0.000219270631168103, + "loss": 1.3538, + "step": 7136 + }, + { + "epoch": 0.7528481012658228, + "grad_norm": 0.6843867897987366, + "learning_rate": 0.0002190932687094818, + "loss": 1.3768, + "step": 7137 + }, + { + "epoch": 0.7529535864978903, + "grad_norm": 0.6833679676055908, + "learning_rate": 0.00021891596574160715, + "loss": 1.3491, + "step": 7138 + }, + { + "epoch": 0.7530590717299578, + "grad_norm": 0.7153579592704773, + "learning_rate": 0.0002187387222843467, + "loss": 1.3472, + "step": 7139 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.6607233285903931, + "learning_rate": 0.00021856153835756164, + "loss": 1.3404, + "step": 7140 + }, + { + "epoch": 0.7532700421940929, + "grad_norm": 0.6356309056282043, + "learning_rate": 0.00021838441398110617, + "loss": 1.3474, + "step": 7141 + }, + { + "epoch": 0.7533755274261603, + "grad_norm": 0.6580913662910461, + "learning_rate": 0.000218207349174828, + "loss": 1.3342, + "step": 7142 + }, + { + "epoch": 0.7534810126582279, + "grad_norm": 0.6630666851997375, + "learning_rate": 0.0002180303439585678, + "loss": 1.3178, + "step": 7143 + }, + { + "epoch": 0.7535864978902953, + "grad_norm": 0.6369837522506714, + "learning_rate": 0.0002178533983521605, + "loss": 1.3534, + "step": 7144 + }, + { + "epoch": 0.7536919831223629, + "grad_norm": 0.6626689434051514, + "learning_rate": 0.0002176765123754334, + "loss": 1.3284, + "step": 7145 + }, + { + "epoch": 0.7537974683544304, + "grad_norm": 0.6712307333946228, + "learning_rate": 0.00021749968604820754, + "loss": 1.349, + "step": 7146 + }, + { + "epoch": 0.7539029535864978, + "grad_norm": 0.662495493888855, + "learning_rate": 0.00021732291939029712, + "loss": 1.3524, + "step": 7147 + }, + { + "epoch": 0.7540084388185654, + "grad_norm": 0.6476808786392212, + "learning_rate": 0.00021714621242150973, + "loss": 1.3125, + "step": 7148 + }, + { + "epoch": 0.7541139240506329, + "grad_norm": 0.6871123909950256, + "learning_rate": 0.0002169695651616463, + "loss": 1.2962, + "step": 7149 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.654325544834137, + "learning_rate": 0.00021679297763050104, + "loss": 1.3514, + "step": 7150 + }, + { + "epoch": 0.7543248945147679, + "grad_norm": 0.6793445944786072, + "learning_rate": 0.00021661644984786142, + "loss": 1.331, + "step": 7151 + }, + { + "epoch": 0.7544303797468355, + "grad_norm": 0.6941792368888855, + "learning_rate": 0.00021643998183350802, + "loss": 1.3381, + "step": 7152 + }, + { + "epoch": 0.7545358649789029, + "grad_norm": 0.7361471652984619, + "learning_rate": 0.00021626357360721556, + "loss": 1.3215, + "step": 7153 + }, + { + "epoch": 0.7546413502109705, + "grad_norm": 0.627657413482666, + "learning_rate": 0.0002160872251887511, + "loss": 1.3063, + "step": 7154 + }, + { + "epoch": 0.754746835443038, + "grad_norm": 0.7994999289512634, + "learning_rate": 0.00021591093659787528, + "loss": 1.3932, + "step": 7155 + }, + { + "epoch": 0.7548523206751054, + "grad_norm": 0.7333296537399292, + "learning_rate": 0.00021573470785434237, + "loss": 1.3793, + "step": 7156 + }, + { + "epoch": 0.754957805907173, + "grad_norm": 0.6475338339805603, + "learning_rate": 0.00021555853897789942, + "loss": 1.2952, + "step": 7157 + }, + { + "epoch": 0.7550632911392405, + "grad_norm": 0.7020292282104492, + "learning_rate": 0.0002153824299882872, + "loss": 1.3459, + "step": 7158 + }, + { + "epoch": 0.755168776371308, + "grad_norm": 0.8773568272590637, + "learning_rate": 0.00021520638090523955, + "loss": 1.3447, + "step": 7159 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.6437010169029236, + "learning_rate": 0.0002150303917484834, + "loss": 1.3818, + "step": 7160 + }, + { + "epoch": 0.7553797468354431, + "grad_norm": 0.8838223218917847, + "learning_rate": 0.00021485446253773966, + "loss": 1.3561, + "step": 7161 + }, + { + "epoch": 0.7554852320675105, + "grad_norm": 0.8520406484603882, + "learning_rate": 0.00021467859329272188, + "loss": 1.3523, + "step": 7162 + }, + { + "epoch": 0.755590717299578, + "grad_norm": 0.6131041646003723, + "learning_rate": 0.00021450278403313707, + "loss": 1.3278, + "step": 7163 + }, + { + "epoch": 0.7556962025316456, + "grad_norm": 0.8297718167304993, + "learning_rate": 0.0002143270347786856, + "loss": 1.3553, + "step": 7164 + }, + { + "epoch": 0.755801687763713, + "grad_norm": 0.7993512153625488, + "learning_rate": 0.0002141513455490609, + "loss": 1.3619, + "step": 7165 + }, + { + "epoch": 0.7559071729957806, + "grad_norm": 0.6719152927398682, + "learning_rate": 0.00021397571636394991, + "loss": 1.3138, + "step": 7166 + }, + { + "epoch": 0.7560126582278481, + "grad_norm": 0.7449400424957275, + "learning_rate": 0.00021380014724303286, + "loss": 1.3388, + "step": 7167 + }, + { + "epoch": 0.7561181434599156, + "grad_norm": 0.8228098154067993, + "learning_rate": 0.00021362463820598297, + "loss": 1.3547, + "step": 7168 + }, + { + "epoch": 0.7562236286919831, + "grad_norm": 0.6511428356170654, + "learning_rate": 0.00021344918927246678, + "loss": 1.3702, + "step": 7169 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.7217248678207397, + "learning_rate": 0.0002132738004621446, + "loss": 1.3746, + "step": 7170 + }, + { + "epoch": 0.7564345991561181, + "grad_norm": 0.810326874256134, + "learning_rate": 0.0002130984717946695, + "loss": 1.3384, + "step": 7171 + }, + { + "epoch": 0.7565400843881857, + "grad_norm": 0.6445448398590088, + "learning_rate": 0.00021292320328968783, + "loss": 1.3371, + "step": 7172 + }, + { + "epoch": 0.7566455696202532, + "grad_norm": 0.8284294009208679, + "learning_rate": 0.0002127479949668393, + "loss": 1.3413, + "step": 7173 + }, + { + "epoch": 0.7567510548523206, + "grad_norm": 0.6542397141456604, + "learning_rate": 0.000212572846845757, + "loss": 1.314, + "step": 7174 + }, + { + "epoch": 0.7568565400843882, + "grad_norm": 0.6461221575737, + "learning_rate": 0.000212397758946067, + "loss": 1.3527, + "step": 7175 + }, + { + "epoch": 0.7569620253164557, + "grad_norm": 0.6866056323051453, + "learning_rate": 0.0002122227312873889, + "loss": 1.3229, + "step": 7176 + }, + { + "epoch": 0.7570675105485232, + "grad_norm": 0.7483984231948853, + "learning_rate": 0.00021204776388933534, + "loss": 1.3446, + "step": 7177 + }, + { + "epoch": 0.7571729957805907, + "grad_norm": 0.6281055808067322, + "learning_rate": 0.00021187285677151205, + "loss": 1.2895, + "step": 7178 + }, + { + "epoch": 0.7572784810126583, + "grad_norm": 0.6866974234580994, + "learning_rate": 0.00021169800995351874, + "loss": 1.3078, + "step": 7179 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.7735309600830078, + "learning_rate": 0.00021152322345494763, + "loss": 1.3343, + "step": 7180 + }, + { + "epoch": 0.7574894514767933, + "grad_norm": 0.7357403635978699, + "learning_rate": 0.00021134849729538438, + "loss": 1.3271, + "step": 7181 + }, + { + "epoch": 0.7575949367088608, + "grad_norm": 0.7225286960601807, + "learning_rate": 0.00021117383149440801, + "loss": 1.3499, + "step": 7182 + }, + { + "epoch": 0.7577004219409282, + "grad_norm": 0.8494777679443359, + "learning_rate": 0.00021099922607159064, + "loss": 1.3102, + "step": 7183 + }, + { + "epoch": 0.7578059071729958, + "grad_norm": 0.6572169661521912, + "learning_rate": 0.00021082468104649773, + "loss": 1.3528, + "step": 7184 + }, + { + "epoch": 0.7579113924050633, + "grad_norm": 0.7215381860733032, + "learning_rate": 0.00021065019643868785, + "loss": 1.3786, + "step": 7185 + }, + { + "epoch": 0.7580168776371308, + "grad_norm": 0.7113456130027771, + "learning_rate": 0.00021047577226771292, + "loss": 1.2994, + "step": 7186 + }, + { + "epoch": 0.7581223628691983, + "grad_norm": 0.6679551601409912, + "learning_rate": 0.00021030140855311772, + "loss": 1.3641, + "step": 7187 + }, + { + "epoch": 0.7582278481012659, + "grad_norm": 0.6448835730552673, + "learning_rate": 0.00021012710531444112, + "loss": 1.3107, + "step": 7188 + }, + { + "epoch": 0.7583333333333333, + "grad_norm": 0.6778890490531921, + "learning_rate": 0.00020995286257121453, + "loss": 1.3339, + "step": 7189 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.6668332815170288, + "learning_rate": 0.00020977868034296253, + "loss": 1.3222, + "step": 7190 + }, + { + "epoch": 0.7585443037974684, + "grad_norm": 0.660868227481842, + "learning_rate": 0.0002096045586492031, + "loss": 1.3278, + "step": 7191 + }, + { + "epoch": 0.7586497890295358, + "grad_norm": 0.6745880246162415, + "learning_rate": 0.00020943049750944768, + "loss": 1.3304, + "step": 7192 + }, + { + "epoch": 0.7587552742616034, + "grad_norm": 0.6646778583526611, + "learning_rate": 0.00020925649694320046, + "loss": 1.3423, + "step": 7193 + }, + { + "epoch": 0.7588607594936709, + "grad_norm": 0.6640921831130981, + "learning_rate": 0.0002090825569699591, + "loss": 1.309, + "step": 7194 + }, + { + "epoch": 0.7589662447257384, + "grad_norm": 0.660426139831543, + "learning_rate": 0.0002089086776092146, + "loss": 1.3656, + "step": 7195 + }, + { + "epoch": 0.7590717299578059, + "grad_norm": 0.6680814027786255, + "learning_rate": 0.0002087348588804505, + "loss": 1.3813, + "step": 7196 + }, + { + "epoch": 0.7591772151898735, + "grad_norm": 0.6635714769363403, + "learning_rate": 0.0002085611008031449, + "loss": 1.3176, + "step": 7197 + }, + { + "epoch": 0.7592827004219409, + "grad_norm": 0.6447348594665527, + "learning_rate": 0.00020838740339676763, + "loss": 1.3494, + "step": 7198 + }, + { + "epoch": 0.7593881856540085, + "grad_norm": 0.7188922166824341, + "learning_rate": 0.00020821376668078264, + "loss": 1.3338, + "step": 7199 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.6949685215950012, + "learning_rate": 0.00020804019067464667, + "loss": 1.3219, + "step": 7200 + }, + { + "epoch": 0.7595991561181434, + "grad_norm": 0.664646327495575, + "learning_rate": 0.00020786667539780977, + "loss": 1.3158, + "step": 7201 + }, + { + "epoch": 0.759704641350211, + "grad_norm": 0.7130693793296814, + "learning_rate": 0.00020769322086971524, + "loss": 1.3358, + "step": 7202 + }, + { + "epoch": 0.7598101265822785, + "grad_norm": 0.7509399056434631, + "learning_rate": 0.00020751982710979944, + "loss": 1.349, + "step": 7203 + }, + { + "epoch": 0.759915611814346, + "grad_norm": 0.6295326948165894, + "learning_rate": 0.0002073464941374921, + "loss": 1.3305, + "step": 7204 + }, + { + "epoch": 0.7600210970464135, + "grad_norm": 0.6614977121353149, + "learning_rate": 0.000207173221972216, + "loss": 1.3655, + "step": 7205 + }, + { + "epoch": 0.7601265822784811, + "grad_norm": 0.8001264929771423, + "learning_rate": 0.00020700001063338696, + "loss": 1.362, + "step": 7206 + }, + { + "epoch": 0.7602320675105485, + "grad_norm": 0.7802428007125854, + "learning_rate": 0.00020682686014041458, + "loss": 1.3303, + "step": 7207 + }, + { + "epoch": 0.760337552742616, + "grad_norm": 0.795302152633667, + "learning_rate": 0.00020665377051270095, + "loss": 1.3556, + "step": 7208 + }, + { + "epoch": 0.7604430379746835, + "grad_norm": 0.832693338394165, + "learning_rate": 0.00020648074176964182, + "loss": 1.3299, + "step": 7209 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.7705296874046326, + "learning_rate": 0.00020630777393062575, + "loss": 1.3438, + "step": 7210 + }, + { + "epoch": 0.7606540084388186, + "grad_norm": 0.6641943454742432, + "learning_rate": 0.00020613486701503473, + "loss": 1.367, + "step": 7211 + }, + { + "epoch": 0.760759493670886, + "grad_norm": 0.8375522494316101, + "learning_rate": 0.00020596202104224376, + "loss": 1.3421, + "step": 7212 + }, + { + "epoch": 0.7608649789029536, + "grad_norm": 0.7994818091392517, + "learning_rate": 0.0002057892360316212, + "loss": 1.3613, + "step": 7213 + }, + { + "epoch": 0.7609704641350211, + "grad_norm": 0.7208085060119629, + "learning_rate": 0.00020561651200252836, + "loss": 1.3556, + "step": 7214 + }, + { + "epoch": 0.7610759493670886, + "grad_norm": 0.8432884216308594, + "learning_rate": 0.00020544384897431997, + "loss": 1.3569, + "step": 7215 + }, + { + "epoch": 0.7611814345991561, + "grad_norm": 0.6298556327819824, + "learning_rate": 0.00020527124696634343, + "loss": 1.3452, + "step": 7216 + }, + { + "epoch": 0.7612869198312237, + "grad_norm": 0.6544987559318542, + "learning_rate": 0.00020509870599794022, + "loss": 1.3696, + "step": 7217 + }, + { + "epoch": 0.7613924050632911, + "grad_norm": 0.710395336151123, + "learning_rate": 0.0002049262260884441, + "loss": 1.3718, + "step": 7218 + }, + { + "epoch": 0.7614978902953586, + "grad_norm": 0.6888543963432312, + "learning_rate": 0.00020475380725718228, + "loss": 1.3405, + "step": 7219 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.6450878381729126, + "learning_rate": 0.00020458144952347523, + "loss": 1.3363, + "step": 7220 + }, + { + "epoch": 0.7617088607594936, + "grad_norm": 0.6837509274482727, + "learning_rate": 0.0002044091529066365, + "loss": 1.3578, + "step": 7221 + }, + { + "epoch": 0.7618143459915612, + "grad_norm": 0.6434285044670105, + "learning_rate": 0.00020423691742597273, + "loss": 1.3445, + "step": 7222 + }, + { + "epoch": 0.7619198312236287, + "grad_norm": 0.6259520649909973, + "learning_rate": 0.0002040647431007837, + "loss": 1.3202, + "step": 7223 + }, + { + "epoch": 0.7620253164556962, + "grad_norm": 0.6367894411087036, + "learning_rate": 0.00020389262995036263, + "loss": 1.3321, + "step": 7224 + }, + { + "epoch": 0.7621308016877637, + "grad_norm": 0.6562678217887878, + "learning_rate": 0.00020372057799399534, + "loss": 1.3673, + "step": 7225 + }, + { + "epoch": 0.7622362869198313, + "grad_norm": 0.6341389417648315, + "learning_rate": 0.00020354858725096122, + "loss": 1.3516, + "step": 7226 + }, + { + "epoch": 0.7623417721518987, + "grad_norm": 0.692915678024292, + "learning_rate": 0.00020337665774053284, + "loss": 1.3277, + "step": 7227 + }, + { + "epoch": 0.7624472573839662, + "grad_norm": 0.630954921245575, + "learning_rate": 0.0002032047894819758, + "loss": 1.349, + "step": 7228 + }, + { + "epoch": 0.7625527426160338, + "grad_norm": 0.6602571606636047, + "learning_rate": 0.00020303298249454857, + "loss": 1.3936, + "step": 7229 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.6565194725990295, + "learning_rate": 0.00020286123679750314, + "loss": 1.3598, + "step": 7230 + }, + { + "epoch": 0.7627637130801688, + "grad_norm": 0.6141059994697571, + "learning_rate": 0.00020268955241008437, + "loss": 1.3696, + "step": 7231 + }, + { + "epoch": 0.7628691983122363, + "grad_norm": 0.6250377893447876, + "learning_rate": 0.00020251792935153037, + "loss": 1.3407, + "step": 7232 + }, + { + "epoch": 0.7629746835443038, + "grad_norm": 0.7410667538642883, + "learning_rate": 0.0002023463676410724, + "loss": 1.3296, + "step": 7233 + }, + { + "epoch": 0.7630801687763713, + "grad_norm": 0.633512020111084, + "learning_rate": 0.0002021748672979348, + "loss": 1.3417, + "step": 7234 + }, + { + "epoch": 0.7631856540084389, + "grad_norm": 0.8292410969734192, + "learning_rate": 0.00020200342834133497, + "loss": 1.3611, + "step": 7235 + }, + { + "epoch": 0.7632911392405063, + "grad_norm": 0.652230441570282, + "learning_rate": 0.00020183205079048338, + "loss": 1.3267, + "step": 7236 + }, + { + "epoch": 0.7633966244725738, + "grad_norm": 0.6604596972465515, + "learning_rate": 0.0002016607346645841, + "loss": 1.3661, + "step": 7237 + }, + { + "epoch": 0.7635021097046414, + "grad_norm": 0.7050010561943054, + "learning_rate": 0.00020148947998283381, + "loss": 1.3675, + "step": 7238 + }, + { + "epoch": 0.7636075949367088, + "grad_norm": 0.6180127859115601, + "learning_rate": 0.00020131828676442237, + "loss": 1.3221, + "step": 7239 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.633593738079071, + "learning_rate": 0.00020114715502853292, + "loss": 1.3327, + "step": 7240 + }, + { + "epoch": 0.7638185654008439, + "grad_norm": 0.6477851867675781, + "learning_rate": 0.00020097608479434153, + "loss": 1.3013, + "step": 7241 + }, + { + "epoch": 0.7639240506329114, + "grad_norm": 0.633468747138977, + "learning_rate": 0.00020080507608101757, + "loss": 1.3891, + "step": 7242 + }, + { + "epoch": 0.7640295358649789, + "grad_norm": 0.6394745111465454, + "learning_rate": 0.0002006341289077233, + "loss": 1.3546, + "step": 7243 + }, + { + "epoch": 0.7641350210970465, + "grad_norm": 0.7081860303878784, + "learning_rate": 0.00020046324329361432, + "loss": 1.3406, + "step": 7244 + }, + { + "epoch": 0.7642405063291139, + "grad_norm": 0.6289195418357849, + "learning_rate": 0.00020029241925783908, + "loss": 1.3167, + "step": 7245 + }, + { + "epoch": 0.7643459915611814, + "grad_norm": 0.6320629119873047, + "learning_rate": 0.00020012165681953923, + "loss": 1.32, + "step": 7246 + }, + { + "epoch": 0.764451476793249, + "grad_norm": 0.6901712417602539, + "learning_rate": 0.00019995095599784985, + "loss": 1.3476, + "step": 7247 + }, + { + "epoch": 0.7645569620253164, + "grad_norm": 0.6135250329971313, + "learning_rate": 0.00019978031681189864, + "loss": 1.3083, + "step": 7248 + }, + { + "epoch": 0.764662447257384, + "grad_norm": 0.7346128225326538, + "learning_rate": 0.00019960973928080666, + "loss": 1.3894, + "step": 7249 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.6736385226249695, + "learning_rate": 0.0001994392234236878, + "loss": 1.3386, + "step": 7250 + }, + { + "epoch": 0.764873417721519, + "grad_norm": 0.6381040215492249, + "learning_rate": 0.00019926876925964928, + "loss": 1.3462, + "step": 7251 + }, + { + "epoch": 0.7649789029535865, + "grad_norm": 0.6571758389472961, + "learning_rate": 0.00019909837680779141, + "loss": 1.3125, + "step": 7252 + }, + { + "epoch": 0.765084388185654, + "grad_norm": 0.6231520175933838, + "learning_rate": 0.00019892804608720747, + "loss": 1.3503, + "step": 7253 + }, + { + "epoch": 0.7651898734177215, + "grad_norm": 0.7363301515579224, + "learning_rate": 0.00019875777711698384, + "loss": 1.3531, + "step": 7254 + }, + { + "epoch": 0.765295358649789, + "grad_norm": 0.6549159288406372, + "learning_rate": 0.00019858756991619978, + "loss": 1.3524, + "step": 7255 + }, + { + "epoch": 0.7654008438818566, + "grad_norm": 0.6346895694732666, + "learning_rate": 0.00019841742450392837, + "loss": 1.3411, + "step": 7256 + }, + { + "epoch": 0.765506329113924, + "grad_norm": 0.7328893542289734, + "learning_rate": 0.0001982473408992349, + "loss": 1.3512, + "step": 7257 + }, + { + "epoch": 0.7656118143459916, + "grad_norm": 0.6311695575714111, + "learning_rate": 0.00019807731912117828, + "loss": 1.3214, + "step": 7258 + }, + { + "epoch": 0.7657172995780591, + "grad_norm": 0.6366530656814575, + "learning_rate": 0.0001979073591888101, + "loss": 1.3681, + "step": 7259 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.667829155921936, + "learning_rate": 0.0001977374611211754, + "loss": 1.2876, + "step": 7260 + }, + { + "epoch": 0.7659282700421941, + "grad_norm": 0.6724556088447571, + "learning_rate": 0.00019756762493731192, + "loss": 1.3023, + "step": 7261 + }, + { + "epoch": 0.7660337552742617, + "grad_norm": 0.6878661513328552, + "learning_rate": 0.00019739785065625077, + "loss": 1.3202, + "step": 7262 + }, + { + "epoch": 0.7661392405063291, + "grad_norm": 0.6254784464836121, + "learning_rate": 0.00019722813829701593, + "loss": 1.34, + "step": 7263 + }, + { + "epoch": 0.7662447257383966, + "grad_norm": 0.6719215512275696, + "learning_rate": 0.0001970584878786244, + "loss": 1.3748, + "step": 7264 + }, + { + "epoch": 0.7663502109704642, + "grad_norm": 0.7687093019485474, + "learning_rate": 0.0001968888994200868, + "loss": 1.3167, + "step": 7265 + }, + { + "epoch": 0.7664556962025316, + "grad_norm": 0.6771566271781921, + "learning_rate": 0.00019671937294040595, + "loss": 1.3061, + "step": 7266 + }, + { + "epoch": 0.7665611814345992, + "grad_norm": 0.64701247215271, + "learning_rate": 0.00019654990845857832, + "loss": 1.3244, + "step": 7267 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 0.7753062844276428, + "learning_rate": 0.00019638050599359326, + "loss": 1.3593, + "step": 7268 + }, + { + "epoch": 0.7667721518987342, + "grad_norm": 0.6725627183914185, + "learning_rate": 0.000196211165564433, + "loss": 1.3466, + "step": 7269 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.6381708979606628, + "learning_rate": 0.00019604188719007313, + "loss": 1.3857, + "step": 7270 + }, + { + "epoch": 0.7669831223628693, + "grad_norm": 0.6642109751701355, + "learning_rate": 0.00019587267088948214, + "loss": 1.3494, + "step": 7271 + }, + { + "epoch": 0.7670886075949367, + "grad_norm": 0.7411757707595825, + "learning_rate": 0.00019570351668162143, + "loss": 1.348, + "step": 7272 + }, + { + "epoch": 0.7671940928270042, + "grad_norm": 0.6707155704498291, + "learning_rate": 0.00019553442458544542, + "loss": 1.3431, + "step": 7273 + }, + { + "epoch": 0.7672995780590718, + "grad_norm": 0.6752209663391113, + "learning_rate": 0.00019536539461990224, + "loss": 1.2977, + "step": 7274 + }, + { + "epoch": 0.7674050632911392, + "grad_norm": 0.6975091099739075, + "learning_rate": 0.0001951964268039322, + "loss": 1.3372, + "step": 7275 + }, + { + "epoch": 0.7675105485232068, + "grad_norm": 0.6427093148231506, + "learning_rate": 0.00019502752115646901, + "loss": 1.3743, + "step": 7276 + }, + { + "epoch": 0.7676160337552742, + "grad_norm": 0.6744654178619385, + "learning_rate": 0.00019485867769643945, + "loss": 1.3199, + "step": 7277 + }, + { + "epoch": 0.7677215189873418, + "grad_norm": 0.6861532926559448, + "learning_rate": 0.0001946898964427633, + "loss": 1.3678, + "step": 7278 + }, + { + "epoch": 0.7678270042194093, + "grad_norm": 0.6590916514396667, + "learning_rate": 0.00019452117741435314, + "loss": 1.3396, + "step": 7279 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.7136180400848389, + "learning_rate": 0.00019435252063011504, + "loss": 1.3167, + "step": 7280 + }, + { + "epoch": 0.7680379746835443, + "grad_norm": 0.6836820840835571, + "learning_rate": 0.00019418392610894768, + "loss": 1.3833, + "step": 7281 + }, + { + "epoch": 0.7681434599156118, + "grad_norm": 0.5959730744361877, + "learning_rate": 0.0001940153938697427, + "loss": 1.3901, + "step": 7282 + }, + { + "epoch": 0.7682489451476793, + "grad_norm": 0.6448880434036255, + "learning_rate": 0.0001938469239313855, + "loss": 1.3341, + "step": 7283 + }, + { + "epoch": 0.7683544303797468, + "grad_norm": 0.6817277073860168, + "learning_rate": 0.00019367851631275362, + "loss": 1.3281, + "step": 7284 + }, + { + "epoch": 0.7684599156118144, + "grad_norm": 0.8179141879081726, + "learning_rate": 0.00019351017103271805, + "loss": 1.3618, + "step": 7285 + }, + { + "epoch": 0.7685654008438818, + "grad_norm": 0.6437481641769409, + "learning_rate": 0.00019334188811014278, + "loss": 1.3334, + "step": 7286 + }, + { + "epoch": 0.7686708860759494, + "grad_norm": 0.828032374382019, + "learning_rate": 0.00019317366756388477, + "loss": 1.3309, + "step": 7287 + }, + { + "epoch": 0.7687763713080169, + "grad_norm": 0.6426421403884888, + "learning_rate": 0.0001930055094127938, + "loss": 1.328, + "step": 7288 + }, + { + "epoch": 0.7688818565400843, + "grad_norm": 0.7022888660430908, + "learning_rate": 0.00019283741367571294, + "loss": 1.3687, + "step": 7289 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.6204342246055603, + "learning_rate": 0.0001926693803714779, + "loss": 1.3412, + "step": 7290 + }, + { + "epoch": 0.7690928270042194, + "grad_norm": 0.6229074001312256, + "learning_rate": 0.00019250140951891813, + "loss": 1.4038, + "step": 7291 + }, + { + "epoch": 0.7691983122362869, + "grad_norm": 0.6462032794952393, + "learning_rate": 0.00019233350113685536, + "loss": 1.3609, + "step": 7292 + }, + { + "epoch": 0.7693037974683544, + "grad_norm": 0.6568781733512878, + "learning_rate": 0.00019216565524410455, + "loss": 1.3125, + "step": 7293 + }, + { + "epoch": 0.769409282700422, + "grad_norm": 0.667131245136261, + "learning_rate": 0.0001919978718594738, + "loss": 1.319, + "step": 7294 + }, + { + "epoch": 0.7695147679324894, + "grad_norm": 0.6662399768829346, + "learning_rate": 0.0001918301510017638, + "loss": 1.3803, + "step": 7295 + }, + { + "epoch": 0.769620253164557, + "grad_norm": 0.6063140034675598, + "learning_rate": 0.0001916624926897687, + "loss": 1.3607, + "step": 7296 + }, + { + "epoch": 0.7697257383966245, + "grad_norm": 0.6587943434715271, + "learning_rate": 0.0001914948969422755, + "loss": 1.3423, + "step": 7297 + }, + { + "epoch": 0.7698312236286919, + "grad_norm": 0.6663327217102051, + "learning_rate": 0.00019132736377806394, + "loss": 1.3282, + "step": 7298 + }, + { + "epoch": 0.7699367088607595, + "grad_norm": 0.6437806487083435, + "learning_rate": 0.00019115989321590694, + "loss": 1.3463, + "step": 7299 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.6904481649398804, + "learning_rate": 0.00019099248527457068, + "loss": 1.3823, + "step": 7300 + }, + { + "epoch": 0.7701476793248945, + "grad_norm": 0.6103796362876892, + "learning_rate": 0.00019082513997281398, + "loss": 1.35, + "step": 7301 + }, + { + "epoch": 0.770253164556962, + "grad_norm": 0.6509994864463806, + "learning_rate": 0.0001906578573293886, + "loss": 1.3028, + "step": 7302 + }, + { + "epoch": 0.7703586497890296, + "grad_norm": 0.6863605380058289, + "learning_rate": 0.00019049063736303946, + "loss": 1.3485, + "step": 7303 + }, + { + "epoch": 0.770464135021097, + "grad_norm": 0.6233800053596497, + "learning_rate": 0.00019032348009250433, + "loss": 1.3419, + "step": 7304 + }, + { + "epoch": 0.7705696202531646, + "grad_norm": 0.620561957359314, + "learning_rate": 0.0001901563855365141, + "loss": 1.3166, + "step": 7305 + }, + { + "epoch": 0.7706751054852321, + "grad_norm": 0.6317064166069031, + "learning_rate": 0.00018998935371379252, + "loss": 1.3376, + "step": 7306 + }, + { + "epoch": 0.7707805907172995, + "grad_norm": 0.6559199094772339, + "learning_rate": 0.00018982238464305623, + "loss": 1.3563, + "step": 7307 + }, + { + "epoch": 0.7708860759493671, + "grad_norm": 0.645896315574646, + "learning_rate": 0.0001896554783430149, + "loss": 1.346, + "step": 7308 + }, + { + "epoch": 0.7709915611814346, + "grad_norm": 0.6621363759040833, + "learning_rate": 0.00018948863483237154, + "loss": 1.3455, + "step": 7309 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.6092583537101746, + "learning_rate": 0.0001893218541298216, + "loss": 1.3189, + "step": 7310 + }, + { + "epoch": 0.7712025316455696, + "grad_norm": 0.6364088654518127, + "learning_rate": 0.00018915513625405374, + "loss": 1.3345, + "step": 7311 + }, + { + "epoch": 0.7713080168776372, + "grad_norm": 0.6609549522399902, + "learning_rate": 0.00018898848122374942, + "loss": 1.3545, + "step": 7312 + }, + { + "epoch": 0.7714135021097046, + "grad_norm": 0.7072636485099792, + "learning_rate": 0.00018882188905758326, + "loss": 1.3413, + "step": 7313 + }, + { + "epoch": 0.7715189873417722, + "grad_norm": 0.6348657608032227, + "learning_rate": 0.00018865535977422273, + "loss": 1.3193, + "step": 7314 + }, + { + "epoch": 0.7716244725738397, + "grad_norm": 0.6347276568412781, + "learning_rate": 0.00018848889339232833, + "loss": 1.3226, + "step": 7315 + }, + { + "epoch": 0.7717299578059071, + "grad_norm": 0.6529163718223572, + "learning_rate": 0.00018832248993055304, + "loss": 1.3503, + "step": 7316 + }, + { + "epoch": 0.7718354430379747, + "grad_norm": 0.7528876662254333, + "learning_rate": 0.00018815614940754377, + "loss": 1.3102, + "step": 7317 + }, + { + "epoch": 0.7719409282700422, + "grad_norm": 0.6383519172668457, + "learning_rate": 0.00018798987184193963, + "loss": 1.3121, + "step": 7318 + }, + { + "epoch": 0.7720464135021097, + "grad_norm": 0.6315410137176514, + "learning_rate": 0.00018782365725237272, + "loss": 1.3426, + "step": 7319 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.6637698411941528, + "learning_rate": 0.00018765750565746827, + "loss": 1.3307, + "step": 7320 + }, + { + "epoch": 0.7722573839662448, + "grad_norm": 0.6233860850334167, + "learning_rate": 0.00018749141707584443, + "loss": 1.3764, + "step": 7321 + }, + { + "epoch": 0.7723628691983122, + "grad_norm": 0.627495527267456, + "learning_rate": 0.0001873253915261123, + "loss": 1.3186, + "step": 7322 + }, + { + "epoch": 0.7724683544303798, + "grad_norm": 0.6227855682373047, + "learning_rate": 0.00018715942902687566, + "loss": 1.3133, + "step": 7323 + }, + { + "epoch": 0.7725738396624473, + "grad_norm": 0.6411498188972473, + "learning_rate": 0.00018699352959673172, + "loss": 1.3283, + "step": 7324 + }, + { + "epoch": 0.7726793248945147, + "grad_norm": 0.6560648083686829, + "learning_rate": 0.00018682769325426986, + "loss": 1.3574, + "step": 7325 + }, + { + "epoch": 0.7727848101265823, + "grad_norm": 0.6535948514938354, + "learning_rate": 0.00018666192001807344, + "loss": 1.3256, + "step": 7326 + }, + { + "epoch": 0.7728902953586498, + "grad_norm": 0.6484748721122742, + "learning_rate": 0.00018649620990671798, + "loss": 1.3062, + "step": 7327 + }, + { + "epoch": 0.7729957805907173, + "grad_norm": 0.6250801682472229, + "learning_rate": 0.00018633056293877203, + "loss": 1.3279, + "step": 7328 + }, + { + "epoch": 0.7731012658227848, + "grad_norm": 0.6586583256721497, + "learning_rate": 0.00018616497913279728, + "loss": 1.3092, + "step": 7329 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.7053005695343018, + "learning_rate": 0.00018599945850734812, + "loss": 1.3897, + "step": 7330 + }, + { + "epoch": 0.7733122362869198, + "grad_norm": 0.7243990898132324, + "learning_rate": 0.00018583400108097194, + "loss": 1.3209, + "step": 7331 + }, + { + "epoch": 0.7734177215189874, + "grad_norm": 0.6707352995872498, + "learning_rate": 0.00018566860687220922, + "loss": 1.3484, + "step": 7332 + }, + { + "epoch": 0.7735232067510549, + "grad_norm": 0.638607919216156, + "learning_rate": 0.00018550327589959308, + "loss": 1.3519, + "step": 7333 + }, + { + "epoch": 0.7736286919831223, + "grad_norm": 0.6978217959403992, + "learning_rate": 0.00018533800818164943, + "loss": 1.3195, + "step": 7334 + }, + { + "epoch": 0.7737341772151899, + "grad_norm": 0.6906748414039612, + "learning_rate": 0.00018517280373689789, + "loss": 1.3707, + "step": 7335 + }, + { + "epoch": 0.7738396624472574, + "grad_norm": 0.645717978477478, + "learning_rate": 0.0001850076625838502, + "loss": 1.311, + "step": 7336 + }, + { + "epoch": 0.7739451476793249, + "grad_norm": 0.654775857925415, + "learning_rate": 0.0001848425847410112, + "loss": 1.3009, + "step": 7337 + }, + { + "epoch": 0.7740506329113924, + "grad_norm": 0.6729512810707092, + "learning_rate": 0.00018467757022687864, + "loss": 1.3353, + "step": 7338 + }, + { + "epoch": 0.77415611814346, + "grad_norm": 0.6276350021362305, + "learning_rate": 0.0001845126190599434, + "loss": 1.3491, + "step": 7339 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.6799956560134888, + "learning_rate": 0.00018434773125868895, + "loss": 1.3249, + "step": 7340 + }, + { + "epoch": 0.774367088607595, + "grad_norm": 0.6738048195838928, + "learning_rate": 0.00018418290684159175, + "loss": 1.3258, + "step": 7341 + }, + { + "epoch": 0.7744725738396624, + "grad_norm": 0.654555082321167, + "learning_rate": 0.00018401814582712103, + "loss": 1.3356, + "step": 7342 + }, + { + "epoch": 0.7745780590717299, + "grad_norm": 0.6535277962684631, + "learning_rate": 0.0001838534482337396, + "loss": 1.3099, + "step": 7343 + }, + { + "epoch": 0.7746835443037975, + "grad_norm": 0.6541687250137329, + "learning_rate": 0.0001836888140799023, + "loss": 1.3671, + "step": 7344 + }, + { + "epoch": 0.7747890295358649, + "grad_norm": 0.689044713973999, + "learning_rate": 0.0001835242433840573, + "loss": 1.332, + "step": 7345 + }, + { + "epoch": 0.7748945147679325, + "grad_norm": 0.6438462138175964, + "learning_rate": 0.00018335973616464554, + "loss": 1.2779, + "step": 7346 + }, + { + "epoch": 0.775, + "grad_norm": 0.6783568859100342, + "learning_rate": 0.00018319529244010082, + "loss": 1.3561, + "step": 7347 + }, + { + "epoch": 0.7751054852320675, + "grad_norm": 0.7010441422462463, + "learning_rate": 0.00018303091222884998, + "loss": 1.3626, + "step": 7348 + }, + { + "epoch": 0.775210970464135, + "grad_norm": 0.6975547075271606, + "learning_rate": 0.00018286659554931254, + "loss": 1.334, + "step": 7349 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.6234581470489502, + "learning_rate": 0.00018270234241990108, + "loss": 1.3095, + "step": 7350 + }, + { + "epoch": 0.77542194092827, + "grad_norm": 0.6518776416778564, + "learning_rate": 0.00018253815285902074, + "loss": 1.3049, + "step": 7351 + }, + { + "epoch": 0.7755274261603375, + "grad_norm": 0.7093745470046997, + "learning_rate": 0.0001823740268850702, + "loss": 1.3611, + "step": 7352 + }, + { + "epoch": 0.7756329113924051, + "grad_norm": 0.6531721949577332, + "learning_rate": 0.0001822099645164404, + "loss": 1.3361, + "step": 7353 + }, + { + "epoch": 0.7757383966244725, + "grad_norm": 0.6878824830055237, + "learning_rate": 0.00018204596577151534, + "loss": 1.3132, + "step": 7354 + }, + { + "epoch": 0.7758438818565401, + "grad_norm": 0.9209045767784119, + "learning_rate": 0.00018188203066867178, + "loss": 1.339, + "step": 7355 + }, + { + "epoch": 0.7759493670886076, + "grad_norm": 0.6433756947517395, + "learning_rate": 0.00018171815922627974, + "loss": 1.3664, + "step": 7356 + }, + { + "epoch": 0.7760548523206751, + "grad_norm": 0.7742159366607666, + "learning_rate": 0.00018155435146270158, + "loss": 1.3417, + "step": 7357 + }, + { + "epoch": 0.7761603375527426, + "grad_norm": 0.792822003364563, + "learning_rate": 0.00018139060739629287, + "loss": 1.3582, + "step": 7358 + }, + { + "epoch": 0.7762658227848102, + "grad_norm": 0.651817262172699, + "learning_rate": 0.00018122692704540194, + "loss": 1.3431, + "step": 7359 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.6419605016708374, + "learning_rate": 0.0001810633104283698, + "loss": 1.3308, + "step": 7360 + }, + { + "epoch": 0.7764767932489451, + "grad_norm": 0.763775646686554, + "learning_rate": 0.00018089975756353083, + "loss": 1.3703, + "step": 7361 + }, + { + "epoch": 0.7765822784810127, + "grad_norm": 0.6750669479370117, + "learning_rate": 0.0001807362684692119, + "loss": 1.3618, + "step": 7362 + }, + { + "epoch": 0.7766877637130801, + "grad_norm": 0.6749182939529419, + "learning_rate": 0.00018057284316373267, + "loss": 1.3087, + "step": 7363 + }, + { + "epoch": 0.7767932489451477, + "grad_norm": 0.6305349469184875, + "learning_rate": 0.00018040948166540586, + "loss": 1.3116, + "step": 7364 + }, + { + "epoch": 0.7768987341772152, + "grad_norm": 0.6404727697372437, + "learning_rate": 0.0001802461839925368, + "loss": 1.3239, + "step": 7365 + }, + { + "epoch": 0.7770042194092827, + "grad_norm": 0.728496253490448, + "learning_rate": 0.00018008295016342383, + "loss": 1.3225, + "step": 7366 + }, + { + "epoch": 0.7771097046413502, + "grad_norm": 0.6488460302352905, + "learning_rate": 0.00017991978019635819, + "loss": 1.3315, + "step": 7367 + }, + { + "epoch": 0.7772151898734178, + "grad_norm": 0.6585664749145508, + "learning_rate": 0.00017975667410962366, + "loss": 1.3145, + "step": 7368 + }, + { + "epoch": 0.7773206751054852, + "grad_norm": 0.6383051872253418, + "learning_rate": 0.00017959363192149752, + "loss": 1.3627, + "step": 7369 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.6658352613449097, + "learning_rate": 0.0001794306536502492, + "loss": 1.3806, + "step": 7370 + }, + { + "epoch": 0.7775316455696203, + "grad_norm": 0.6338128447532654, + "learning_rate": 0.0001792677393141412, + "loss": 1.3434, + "step": 7371 + }, + { + "epoch": 0.7776371308016877, + "grad_norm": 0.6836155652999878, + "learning_rate": 0.00017910488893142903, + "loss": 1.3582, + "step": 7372 + }, + { + "epoch": 0.7777426160337553, + "grad_norm": 0.6199460029602051, + "learning_rate": 0.00017894210252036069, + "loss": 1.2968, + "step": 7373 + }, + { + "epoch": 0.7778481012658228, + "grad_norm": 0.64155113697052, + "learning_rate": 0.0001787793800991774, + "loss": 1.3367, + "step": 7374 + }, + { + "epoch": 0.7779535864978903, + "grad_norm": 0.6611424684524536, + "learning_rate": 0.00017861672168611293, + "loss": 1.3649, + "step": 7375 + }, + { + "epoch": 0.7780590717299578, + "grad_norm": 0.6239455938339233, + "learning_rate": 0.0001784541272993939, + "loss": 1.3027, + "step": 7376 + }, + { + "epoch": 0.7781645569620254, + "grad_norm": 0.6224864721298218, + "learning_rate": 0.00017829159695723973, + "loss": 1.311, + "step": 7377 + }, + { + "epoch": 0.7782700421940928, + "grad_norm": 0.665077805519104, + "learning_rate": 0.00017812913067786313, + "loss": 1.2974, + "step": 7378 + }, + { + "epoch": 0.7783755274261603, + "grad_norm": 0.6536722779273987, + "learning_rate": 0.00017796672847946905, + "loss": 1.3305, + "step": 7379 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.6740949749946594, + "learning_rate": 0.0001778043903802555, + "loss": 1.3285, + "step": 7380 + }, + { + "epoch": 0.7785864978902953, + "grad_norm": 0.6335997581481934, + "learning_rate": 0.00017764211639841312, + "loss": 1.3372, + "step": 7381 + }, + { + "epoch": 0.7786919831223629, + "grad_norm": 0.6557823419570923, + "learning_rate": 0.0001774799065521257, + "loss": 1.4141, + "step": 7382 + }, + { + "epoch": 0.7787974683544304, + "grad_norm": 0.6446201801300049, + "learning_rate": 0.0001773177608595696, + "loss": 1.3374, + "step": 7383 + }, + { + "epoch": 0.7789029535864979, + "grad_norm": 0.6967164874076843, + "learning_rate": 0.00017715567933891405, + "loss": 1.3288, + "step": 7384 + }, + { + "epoch": 0.7790084388185654, + "grad_norm": 0.6331047415733337, + "learning_rate": 0.0001769936620083211, + "loss": 1.3366, + "step": 7385 + }, + { + "epoch": 0.779113924050633, + "grad_norm": 0.680073618888855, + "learning_rate": 0.0001768317088859453, + "loss": 1.2859, + "step": 7386 + }, + { + "epoch": 0.7792194092827004, + "grad_norm": 0.6344285607337952, + "learning_rate": 0.0001766698199899349, + "loss": 1.3508, + "step": 7387 + }, + { + "epoch": 0.7793248945147679, + "grad_norm": 0.6635192632675171, + "learning_rate": 0.00017650799533842996, + "loss": 1.3205, + "step": 7388 + }, + { + "epoch": 0.7794303797468355, + "grad_norm": 0.6476348638534546, + "learning_rate": 0.0001763462349495639, + "loss": 1.3325, + "step": 7389 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.6436398029327393, + "learning_rate": 0.0001761845388414627, + "loss": 1.3314, + "step": 7390 + }, + { + "epoch": 0.7796413502109705, + "grad_norm": 0.6240007877349854, + "learning_rate": 0.00017602290703224525, + "loss": 1.3304, + "step": 7391 + }, + { + "epoch": 0.779746835443038, + "grad_norm": 0.6341984272003174, + "learning_rate": 0.00017586133954002308, + "loss": 1.3657, + "step": 7392 + }, + { + "epoch": 0.7798523206751055, + "grad_norm": 0.6674740314483643, + "learning_rate": 0.00017569983638290084, + "loss": 1.3362, + "step": 7393 + }, + { + "epoch": 0.779957805907173, + "grad_norm": 0.6341395974159241, + "learning_rate": 0.0001755383975789754, + "loss": 1.3315, + "step": 7394 + }, + { + "epoch": 0.7800632911392406, + "grad_norm": 0.6495786309242249, + "learning_rate": 0.00017537702314633722, + "loss": 1.3326, + "step": 7395 + }, + { + "epoch": 0.780168776371308, + "grad_norm": 0.6383332014083862, + "learning_rate": 0.00017521571310306889, + "loss": 1.379, + "step": 7396 + }, + { + "epoch": 0.7802742616033755, + "grad_norm": 0.6940298676490784, + "learning_rate": 0.0001750544674672461, + "loss": 1.3496, + "step": 7397 + }, + { + "epoch": 0.7803797468354431, + "grad_norm": 0.6334470510482788, + "learning_rate": 0.00017489328625693715, + "loss": 1.316, + "step": 7398 + }, + { + "epoch": 0.7804852320675105, + "grad_norm": 0.6209577918052673, + "learning_rate": 0.00017473216949020326, + "loss": 1.3354, + "step": 7399 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.7063941359519958, + "learning_rate": 0.00017457111718509831, + "loss": 1.3592, + "step": 7400 + }, + { + "epoch": 0.7806962025316456, + "grad_norm": 0.6970615386962891, + "learning_rate": 0.00017441012935966898, + "loss": 1.299, + "step": 7401 + }, + { + "epoch": 0.7808016877637131, + "grad_norm": 0.673265814781189, + "learning_rate": 0.00017424920603195483, + "loss": 1.342, + "step": 7402 + }, + { + "epoch": 0.7809071729957806, + "grad_norm": 0.6164256930351257, + "learning_rate": 0.0001740883472199879, + "loss": 1.3088, + "step": 7403 + }, + { + "epoch": 0.7810126582278482, + "grad_norm": 0.6177656650543213, + "learning_rate": 0.00017392755294179363, + "loss": 1.3325, + "step": 7404 + }, + { + "epoch": 0.7811181434599156, + "grad_norm": 0.6369768977165222, + "learning_rate": 0.0001737668232153896, + "loss": 1.324, + "step": 7405 + }, + { + "epoch": 0.7812236286919831, + "grad_norm": 0.6065545082092285, + "learning_rate": 0.00017360615805878636, + "loss": 1.3229, + "step": 7406 + }, + { + "epoch": 0.7813291139240506, + "grad_norm": 0.657355785369873, + "learning_rate": 0.00017344555748998727, + "loss": 1.3248, + "step": 7407 + }, + { + "epoch": 0.7814345991561181, + "grad_norm": 0.6417668461799622, + "learning_rate": 0.0001732850215269885, + "loss": 1.3357, + "step": 7408 + }, + { + "epoch": 0.7815400843881857, + "grad_norm": 0.6157714128494263, + "learning_rate": 0.0001731245501877787, + "loss": 1.3511, + "step": 7409 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.7028563022613525, + "learning_rate": 0.00017296414349033976, + "loss": 1.3159, + "step": 7410 + }, + { + "epoch": 0.7817510548523207, + "grad_norm": 0.7133738398551941, + "learning_rate": 0.0001728038014526458, + "loss": 1.3474, + "step": 7411 + }, + { + "epoch": 0.7818565400843882, + "grad_norm": 0.6337506771087646, + "learning_rate": 0.00017264352409266385, + "loss": 1.349, + "step": 7412 + }, + { + "epoch": 0.7819620253164556, + "grad_norm": 0.7075797915458679, + "learning_rate": 0.0001724833114283542, + "loss": 1.3345, + "step": 7413 + }, + { + "epoch": 0.7820675105485232, + "grad_norm": 0.7735417485237122, + "learning_rate": 0.0001723231634776693, + "loss": 1.3417, + "step": 7414 + }, + { + "epoch": 0.7821729957805907, + "grad_norm": 0.6069194674491882, + "learning_rate": 0.0001721630802585545, + "loss": 1.3013, + "step": 7415 + }, + { + "epoch": 0.7822784810126582, + "grad_norm": 0.6634778380393982, + "learning_rate": 0.00017200306178894785, + "loss": 1.2885, + "step": 7416 + }, + { + "epoch": 0.7823839662447257, + "grad_norm": 0.6435988545417786, + "learning_rate": 0.00017184310808678028, + "loss": 1.3072, + "step": 7417 + }, + { + "epoch": 0.7824894514767933, + "grad_norm": 0.6569295525550842, + "learning_rate": 0.00017168321916997547, + "loss": 1.3678, + "step": 7418 + }, + { + "epoch": 0.7825949367088607, + "grad_norm": 0.6736591458320618, + "learning_rate": 0.00017152339505644963, + "loss": 1.35, + "step": 7419 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.661893904209137, + "learning_rate": 0.00017136363576411172, + "loss": 1.3133, + "step": 7420 + }, + { + "epoch": 0.7828059071729958, + "grad_norm": 0.6342737078666687, + "learning_rate": 0.00017120394131086398, + "loss": 1.3237, + "step": 7421 + }, + { + "epoch": 0.7829113924050632, + "grad_norm": 0.6421570181846619, + "learning_rate": 0.00017104431171460077, + "loss": 1.3215, + "step": 7422 + }, + { + "epoch": 0.7830168776371308, + "grad_norm": 0.5995962619781494, + "learning_rate": 0.0001708847469932093, + "loss": 1.2976, + "step": 7423 + }, + { + "epoch": 0.7831223628691983, + "grad_norm": 0.6551750302314758, + "learning_rate": 0.00017072524716456975, + "loss": 1.3421, + "step": 7424 + }, + { + "epoch": 0.7832278481012658, + "grad_norm": 0.676114559173584, + "learning_rate": 0.00017056581224655473, + "loss": 1.3309, + "step": 7425 + }, + { + "epoch": 0.7833333333333333, + "grad_norm": 0.678108811378479, + "learning_rate": 0.0001704064422570298, + "loss": 1.2839, + "step": 7426 + }, + { + "epoch": 0.7834388185654009, + "grad_norm": 0.6560104489326477, + "learning_rate": 0.0001702471372138531, + "loss": 1.3117, + "step": 7427 + }, + { + "epoch": 0.7835443037974683, + "grad_norm": 0.6337754130363464, + "learning_rate": 0.00017008789713487558, + "loss": 1.3035, + "step": 7428 + }, + { + "epoch": 0.7836497890295359, + "grad_norm": 0.6080102324485779, + "learning_rate": 0.0001699287220379407, + "loss": 1.3287, + "step": 7429 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.6180487275123596, + "learning_rate": 0.00016976961194088526, + "loss": 1.3325, + "step": 7430 + }, + { + "epoch": 0.7838607594936708, + "grad_norm": 0.6022199988365173, + "learning_rate": 0.000169610566861538, + "loss": 1.3159, + "step": 7431 + }, + { + "epoch": 0.7839662447257384, + "grad_norm": 0.6679702401161194, + "learning_rate": 0.0001694515868177209, + "loss": 1.3208, + "step": 7432 + }, + { + "epoch": 0.7840717299578059, + "grad_norm": 0.6138828992843628, + "learning_rate": 0.0001692926718272483, + "loss": 1.3189, + "step": 7433 + }, + { + "epoch": 0.7841772151898734, + "grad_norm": 0.6415406465530396, + "learning_rate": 0.00016913382190792754, + "loss": 1.3382, + "step": 7434 + }, + { + "epoch": 0.7842827004219409, + "grad_norm": 0.6261516213417053, + "learning_rate": 0.0001689750370775584, + "loss": 1.3086, + "step": 7435 + }, + { + "epoch": 0.7843881856540085, + "grad_norm": 0.6389262676239014, + "learning_rate": 0.00016881631735393368, + "loss": 1.2883, + "step": 7436 + }, + { + "epoch": 0.7844936708860759, + "grad_norm": 0.6321372985839844, + "learning_rate": 0.00016865766275483865, + "loss": 1.3454, + "step": 7437 + }, + { + "epoch": 0.7845991561181435, + "grad_norm": 0.6256938576698303, + "learning_rate": 0.00016849907329805118, + "loss": 1.3102, + "step": 7438 + }, + { + "epoch": 0.784704641350211, + "grad_norm": 0.6474670171737671, + "learning_rate": 0.00016834054900134228, + "loss": 1.3483, + "step": 7439 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.623376190662384, + "learning_rate": 0.00016818208988247533, + "loss": 1.324, + "step": 7440 + }, + { + "epoch": 0.784915611814346, + "grad_norm": 0.682396411895752, + "learning_rate": 0.00016802369595920647, + "loss": 1.3492, + "step": 7441 + }, + { + "epoch": 0.7850210970464135, + "grad_norm": 0.6355257630348206, + "learning_rate": 0.00016786536724928432, + "loss": 1.3171, + "step": 7442 + }, + { + "epoch": 0.785126582278481, + "grad_norm": 0.6667599081993103, + "learning_rate": 0.00016770710377045074, + "loss": 1.2952, + "step": 7443 + }, + { + "epoch": 0.7852320675105485, + "grad_norm": 0.6423972845077515, + "learning_rate": 0.00016754890554043965, + "loss": 1.3115, + "step": 7444 + }, + { + "epoch": 0.7853375527426161, + "grad_norm": 0.689125657081604, + "learning_rate": 0.00016739077257697804, + "loss": 1.3254, + "step": 7445 + }, + { + "epoch": 0.7854430379746835, + "grad_norm": 0.6451188325881958, + "learning_rate": 0.0001672327048977856, + "loss": 1.3234, + "step": 7446 + }, + { + "epoch": 0.7855485232067511, + "grad_norm": 0.6684764623641968, + "learning_rate": 0.00016707470252057423, + "loss": 1.3519, + "step": 7447 + }, + { + "epoch": 0.7856540084388186, + "grad_norm": 0.6703110337257385, + "learning_rate": 0.00016691676546304936, + "loss": 1.3114, + "step": 7448 + }, + { + "epoch": 0.785759493670886, + "grad_norm": 0.6327549815177917, + "learning_rate": 0.00016675889374290852, + "loss": 1.3345, + "step": 7449 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.6358935832977295, + "learning_rate": 0.0001666010873778419, + "loss": 1.3399, + "step": 7450 + }, + { + "epoch": 0.7859704641350211, + "grad_norm": 0.6513803005218506, + "learning_rate": 0.0001664433463855325, + "loss": 1.3429, + "step": 7451 + }, + { + "epoch": 0.7860759493670886, + "grad_norm": 0.6270938515663147, + "learning_rate": 0.00016628567078365612, + "loss": 1.3429, + "step": 7452 + }, + { + "epoch": 0.7861814345991561, + "grad_norm": 0.6165331602096558, + "learning_rate": 0.00016612806058988088, + "loss": 1.3138, + "step": 7453 + }, + { + "epoch": 0.7862869198312237, + "grad_norm": 0.6674871444702148, + "learning_rate": 0.0001659705158218679, + "loss": 1.3426, + "step": 7454 + }, + { + "epoch": 0.7863924050632911, + "grad_norm": 0.7623143792152405, + "learning_rate": 0.00016581303649727076, + "loss": 1.3396, + "step": 7455 + }, + { + "epoch": 0.7864978902953587, + "grad_norm": 0.6310415267944336, + "learning_rate": 0.000165655622633736, + "loss": 1.3416, + "step": 7456 + }, + { + "epoch": 0.7866033755274262, + "grad_norm": 0.6339573860168457, + "learning_rate": 0.00016549827424890257, + "loss": 1.3337, + "step": 7457 + }, + { + "epoch": 0.7867088607594936, + "grad_norm": 0.6690483093261719, + "learning_rate": 0.00016534099136040207, + "loss": 1.3313, + "step": 7458 + }, + { + "epoch": 0.7868143459915612, + "grad_norm": 0.8541235327720642, + "learning_rate": 0.0001651837739858589, + "loss": 1.3492, + "step": 7459 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.6357992887496948, + "learning_rate": 0.00016502662214289, + "loss": 1.3287, + "step": 7460 + }, + { + "epoch": 0.7870253164556962, + "grad_norm": 0.8004589676856995, + "learning_rate": 0.000164869535849105, + "loss": 1.3362, + "step": 7461 + }, + { + "epoch": 0.7871308016877637, + "grad_norm": 0.6611217856407166, + "learning_rate": 0.00016471251512210626, + "loss": 1.3249, + "step": 7462 + }, + { + "epoch": 0.7872362869198313, + "grad_norm": 0.6323682069778442, + "learning_rate": 0.00016455555997948868, + "loss": 1.3231, + "step": 7463 + }, + { + "epoch": 0.7873417721518987, + "grad_norm": 0.644282877445221, + "learning_rate": 0.0001643986704388397, + "loss": 1.3037, + "step": 7464 + }, + { + "epoch": 0.7874472573839663, + "grad_norm": 0.6469168066978455, + "learning_rate": 0.00016424184651773997, + "loss": 1.3357, + "step": 7465 + }, + { + "epoch": 0.7875527426160338, + "grad_norm": 0.604770839214325, + "learning_rate": 0.0001640850882337622, + "loss": 1.3448, + "step": 7466 + }, + { + "epoch": 0.7876582278481012, + "grad_norm": 0.7723925113677979, + "learning_rate": 0.00016392839560447196, + "loss": 1.321, + "step": 7467 + }, + { + "epoch": 0.7877637130801688, + "grad_norm": 0.6438093781471252, + "learning_rate": 0.00016377176864742734, + "loss": 1.3385, + "step": 7468 + }, + { + "epoch": 0.7878691983122363, + "grad_norm": 0.6745232343673706, + "learning_rate": 0.00016361520738017934, + "loss": 1.3119, + "step": 7469 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.6559710502624512, + "learning_rate": 0.00016345871182027124, + "loss": 1.3281, + "step": 7470 + }, + { + "epoch": 0.7880801687763713, + "grad_norm": 0.7636536955833435, + "learning_rate": 0.00016330228198523927, + "loss": 1.3341, + "step": 7471 + }, + { + "epoch": 0.7881856540084389, + "grad_norm": 0.6507315635681152, + "learning_rate": 0.00016314591789261216, + "loss": 1.3035, + "step": 7472 + }, + { + "epoch": 0.7882911392405063, + "grad_norm": 0.6693921685218811, + "learning_rate": 0.00016298961955991105, + "loss": 1.3562, + "step": 7473 + }, + { + "epoch": 0.7883966244725739, + "grad_norm": 0.8080589771270752, + "learning_rate": 0.00016283338700465034, + "loss": 1.332, + "step": 7474 + }, + { + "epoch": 0.7885021097046413, + "grad_norm": 0.7235192656517029, + "learning_rate": 0.00016267722024433654, + "loss": 1.3276, + "step": 7475 + }, + { + "epoch": 0.7886075949367088, + "grad_norm": 0.6699822545051575, + "learning_rate": 0.0001625211192964688, + "loss": 1.342, + "step": 7476 + }, + { + "epoch": 0.7887130801687764, + "grad_norm": 0.6450712084770203, + "learning_rate": 0.00016236508417853917, + "loss": 1.3597, + "step": 7477 + }, + { + "epoch": 0.7888185654008438, + "grad_norm": 0.7956985235214233, + "learning_rate": 0.00016220911490803206, + "loss": 1.3373, + "step": 7478 + }, + { + "epoch": 0.7889240506329114, + "grad_norm": 0.633845865726471, + "learning_rate": 0.00016205321150242454, + "loss": 1.3022, + "step": 7479 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.6645289063453674, + "learning_rate": 0.00016189737397918653, + "loss": 1.344, + "step": 7480 + }, + { + "epoch": 0.7891350210970464, + "grad_norm": 0.6590778231620789, + "learning_rate": 0.00016174160235578, + "loss": 1.3443, + "step": 7481 + }, + { + "epoch": 0.7892405063291139, + "grad_norm": 0.6380268931388855, + "learning_rate": 0.00016158589664966053, + "loss": 1.3731, + "step": 7482 + }, + { + "epoch": 0.7893459915611815, + "grad_norm": 0.6725619435310364, + "learning_rate": 0.00016143025687827538, + "loss": 1.3253, + "step": 7483 + }, + { + "epoch": 0.7894514767932489, + "grad_norm": 0.7352547645568848, + "learning_rate": 0.0001612746830590649, + "loss": 1.3587, + "step": 7484 + }, + { + "epoch": 0.7895569620253164, + "grad_norm": 0.6649945378303528, + "learning_rate": 0.00016111917520946175, + "loss": 1.344, + "step": 7485 + }, + { + "epoch": 0.789662447257384, + "grad_norm": 0.6578184366226196, + "learning_rate": 0.00016096373334689154, + "loss": 1.3094, + "step": 7486 + }, + { + "epoch": 0.7897679324894514, + "grad_norm": 0.6868696212768555, + "learning_rate": 0.00016080835748877214, + "loss": 1.3276, + "step": 7487 + }, + { + "epoch": 0.789873417721519, + "grad_norm": 0.668549656867981, + "learning_rate": 0.00016065304765251423, + "loss": 1.339, + "step": 7488 + }, + { + "epoch": 0.7899789029535865, + "grad_norm": 0.6561934351921082, + "learning_rate": 0.00016049780385552113, + "loss": 1.3386, + "step": 7489 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.6932647824287415, + "learning_rate": 0.0001603426261151884, + "loss": 1.3409, + "step": 7490 + }, + { + "epoch": 0.7901898734177215, + "grad_norm": 0.6378629207611084, + "learning_rate": 0.000160187514448905, + "loss": 1.3092, + "step": 7491 + }, + { + "epoch": 0.7902953586497891, + "grad_norm": 0.6267630457878113, + "learning_rate": 0.0001600324688740516, + "loss": 1.3617, + "step": 7492 + }, + { + "epoch": 0.7904008438818565, + "grad_norm": 0.6153097152709961, + "learning_rate": 0.00015987748940800186, + "loss": 1.3395, + "step": 7493 + }, + { + "epoch": 0.790506329113924, + "grad_norm": 0.6333873867988586, + "learning_rate": 0.0001597225760681221, + "loss": 1.3325, + "step": 7494 + }, + { + "epoch": 0.7906118143459916, + "grad_norm": 0.6328630447387695, + "learning_rate": 0.00015956772887177115, + "loss": 1.3735, + "step": 7495 + }, + { + "epoch": 0.790717299578059, + "grad_norm": 0.6401443481445312, + "learning_rate": 0.00015941294783630022, + "loss": 1.2977, + "step": 7496 + }, + { + "epoch": 0.7908227848101266, + "grad_norm": 0.6682215332984924, + "learning_rate": 0.00015925823297905346, + "loss": 1.3166, + "step": 7497 + }, + { + "epoch": 0.7909282700421941, + "grad_norm": 0.6446288824081421, + "learning_rate": 0.00015910358431736745, + "loss": 1.3319, + "step": 7498 + }, + { + "epoch": 0.7910337552742616, + "grad_norm": 0.6807233691215515, + "learning_rate": 0.00015894900186857105, + "loss": 1.3049, + "step": 7499 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.6758900880813599, + "learning_rate": 0.00015879448564998648, + "loss": 1.3689, + "step": 7500 + }, + { + "epoch": 0.7912447257383967, + "grad_norm": 0.7795410752296448, + "learning_rate": 0.00015864003567892776, + "loss": 1.3868, + "step": 7501 + }, + { + "epoch": 0.7913502109704641, + "grad_norm": 0.6865178346633911, + "learning_rate": 0.00015848565197270175, + "loss": 1.337, + "step": 7502 + }, + { + "epoch": 0.7914556962025316, + "grad_norm": 0.6325127482414246, + "learning_rate": 0.00015833133454860814, + "loss": 1.3371, + "step": 7503 + }, + { + "epoch": 0.7915611814345992, + "grad_norm": 0.6519604325294495, + "learning_rate": 0.00015817708342393878, + "loss": 1.3875, + "step": 7504 + }, + { + "epoch": 0.7916666666666666, + "grad_norm": 0.7229796648025513, + "learning_rate": 0.0001580228986159783, + "loss": 1.3197, + "step": 7505 + }, + { + "epoch": 0.7917721518987342, + "grad_norm": 0.7911202311515808, + "learning_rate": 0.00015786878014200387, + "loss": 1.3443, + "step": 7506 + }, + { + "epoch": 0.7918776371308017, + "grad_norm": 0.6088433861732483, + "learning_rate": 0.0001577147280192851, + "loss": 1.3692, + "step": 7507 + }, + { + "epoch": 0.7919831223628692, + "grad_norm": 0.7390503883361816, + "learning_rate": 0.0001575607422650846, + "loss": 1.3152, + "step": 7508 + }, + { + "epoch": 0.7920886075949367, + "grad_norm": 0.7021722197532654, + "learning_rate": 0.00015740682289665714, + "loss": 1.2807, + "step": 7509 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.6683815717697144, + "learning_rate": 0.0001572529699312501, + "loss": 1.3291, + "step": 7510 + }, + { + "epoch": 0.7922995780590717, + "grad_norm": 0.667841374874115, + "learning_rate": 0.0001570991833861035, + "loss": 1.3204, + "step": 7511 + }, + { + "epoch": 0.7924050632911392, + "grad_norm": 0.6778929829597473, + "learning_rate": 0.00015694546327844986, + "loss": 1.3224, + "step": 7512 + }, + { + "epoch": 0.7925105485232068, + "grad_norm": 0.7886664867401123, + "learning_rate": 0.00015679180962551435, + "loss": 1.315, + "step": 7513 + }, + { + "epoch": 0.7926160337552742, + "grad_norm": 0.6819181442260742, + "learning_rate": 0.00015663822244451446, + "loss": 1.3384, + "step": 7514 + }, + { + "epoch": 0.7927215189873418, + "grad_norm": 0.6790860295295715, + "learning_rate": 0.00015648470175266057, + "loss": 1.2945, + "step": 7515 + }, + { + "epoch": 0.7928270042194093, + "grad_norm": 0.7041786909103394, + "learning_rate": 0.00015633124756715523, + "loss": 1.3645, + "step": 7516 + }, + { + "epoch": 0.7929324894514768, + "grad_norm": 0.6183968186378479, + "learning_rate": 0.00015617785990519403, + "loss": 1.3143, + "step": 7517 + }, + { + "epoch": 0.7930379746835443, + "grad_norm": 0.6641639471054077, + "learning_rate": 0.00015602453878396479, + "loss": 1.3297, + "step": 7518 + }, + { + "epoch": 0.7931434599156119, + "grad_norm": 0.7148905992507935, + "learning_rate": 0.0001558712842206477, + "loss": 1.3305, + "step": 7519 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.6193791627883911, + "learning_rate": 0.0001557180962324158, + "loss": 1.3153, + "step": 7520 + }, + { + "epoch": 0.7933544303797468, + "grad_norm": 0.6288337111473083, + "learning_rate": 0.00015556497483643466, + "loss": 1.3026, + "step": 7521 + }, + { + "epoch": 0.7934599156118144, + "grad_norm": 0.6784204840660095, + "learning_rate": 0.00015541192004986222, + "loss": 1.337, + "step": 7522 + }, + { + "epoch": 0.7935654008438818, + "grad_norm": 0.6254304647445679, + "learning_rate": 0.00015525893188984898, + "loss": 1.3151, + "step": 7523 + }, + { + "epoch": 0.7936708860759494, + "grad_norm": 0.6635228991508484, + "learning_rate": 0.00015510601037353804, + "loss": 1.3455, + "step": 7524 + }, + { + "epoch": 0.7937763713080169, + "grad_norm": 0.6527110934257507, + "learning_rate": 0.00015495315551806486, + "loss": 1.349, + "step": 7525 + }, + { + "epoch": 0.7938818565400844, + "grad_norm": 0.6390836834907532, + "learning_rate": 0.000154800367340558, + "loss": 1.3204, + "step": 7526 + }, + { + "epoch": 0.7939873417721519, + "grad_norm": 0.6402260661125183, + "learning_rate": 0.00015464764585813783, + "loss": 1.3216, + "step": 7527 + }, + { + "epoch": 0.7940928270042195, + "grad_norm": 0.6625127792358398, + "learning_rate": 0.0001544949910879177, + "loss": 1.3191, + "step": 7528 + }, + { + "epoch": 0.7941983122362869, + "grad_norm": 0.6299254298210144, + "learning_rate": 0.00015434240304700332, + "loss": 1.3301, + "step": 7529 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.6279729008674622, + "learning_rate": 0.00015418988175249282, + "loss": 1.3316, + "step": 7530 + }, + { + "epoch": 0.794409282700422, + "grad_norm": 0.6933956742286682, + "learning_rate": 0.00015403742722147707, + "loss": 1.3341, + "step": 7531 + }, + { + "epoch": 0.7945147679324894, + "grad_norm": 0.6519168019294739, + "learning_rate": 0.00015388503947103937, + "loss": 1.3268, + "step": 7532 + }, + { + "epoch": 0.794620253164557, + "grad_norm": 0.6516886353492737, + "learning_rate": 0.00015373271851825527, + "loss": 1.3428, + "step": 7533 + }, + { + "epoch": 0.7947257383966245, + "grad_norm": 0.6328943967819214, + "learning_rate": 0.00015358046438019356, + "loss": 1.3487, + "step": 7534 + }, + { + "epoch": 0.794831223628692, + "grad_norm": 0.6772682070732117, + "learning_rate": 0.00015342827707391475, + "loss": 1.2844, + "step": 7535 + }, + { + "epoch": 0.7949367088607595, + "grad_norm": 0.7048916816711426, + "learning_rate": 0.0001532761566164723, + "loss": 1.2981, + "step": 7536 + }, + { + "epoch": 0.7950421940928271, + "grad_norm": 0.6438653469085693, + "learning_rate": 0.0001531241030249121, + "loss": 1.3356, + "step": 7537 + }, + { + "epoch": 0.7951476793248945, + "grad_norm": 0.6190479397773743, + "learning_rate": 0.00015297211631627234, + "loss": 1.3162, + "step": 7538 + }, + { + "epoch": 0.795253164556962, + "grad_norm": 0.6448428630828857, + "learning_rate": 0.0001528201965075841, + "loss": 1.3099, + "step": 7539 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.6848095059394836, + "learning_rate": 0.00015266834361587063, + "loss": 1.333, + "step": 7540 + }, + { + "epoch": 0.795464135021097, + "grad_norm": 0.6729928255081177, + "learning_rate": 0.00015251655765814777, + "loss": 1.2886, + "step": 7541 + }, + { + "epoch": 0.7955696202531646, + "grad_norm": 0.6777734160423279, + "learning_rate": 0.000152364838651424, + "loss": 1.3186, + "step": 7542 + }, + { + "epoch": 0.795675105485232, + "grad_norm": 0.7484861612319946, + "learning_rate": 0.00015221318661269985, + "loss": 1.3224, + "step": 7543 + }, + { + "epoch": 0.7957805907172996, + "grad_norm": 0.6283369064331055, + "learning_rate": 0.00015206160155896924, + "loss": 1.3224, + "step": 7544 + }, + { + "epoch": 0.7958860759493671, + "grad_norm": 0.6468734741210938, + "learning_rate": 0.00015191008350721772, + "loss": 1.3397, + "step": 7545 + }, + { + "epoch": 0.7959915611814345, + "grad_norm": 0.6444494724273682, + "learning_rate": 0.00015175863247442374, + "loss": 1.3421, + "step": 7546 + }, + { + "epoch": 0.7960970464135021, + "grad_norm": 0.6425506472587585, + "learning_rate": 0.00015160724847755806, + "loss": 1.3079, + "step": 7547 + }, + { + "epoch": 0.7962025316455696, + "grad_norm": 0.649818480014801, + "learning_rate": 0.00015145593153358412, + "loss": 1.3082, + "step": 7548 + }, + { + "epoch": 0.7963080168776371, + "grad_norm": 0.6724527478218079, + "learning_rate": 0.0001513046816594575, + "loss": 1.3303, + "step": 7549 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.623993992805481, + "learning_rate": 0.00015115349887212678, + "loss": 1.3357, + "step": 7550 + }, + { + "epoch": 0.7965189873417722, + "grad_norm": 0.6901990175247192, + "learning_rate": 0.00015100238318853262, + "loss": 1.3238, + "step": 7551 + }, + { + "epoch": 0.7966244725738396, + "grad_norm": 0.6540313363075256, + "learning_rate": 0.00015085133462560833, + "loss": 1.3524, + "step": 7552 + }, + { + "epoch": 0.7967299578059072, + "grad_norm": 0.6334957480430603, + "learning_rate": 0.00015070035320027933, + "loss": 1.3604, + "step": 7553 + }, + { + "epoch": 0.7968354430379747, + "grad_norm": 0.6389131546020508, + "learning_rate": 0.00015054943892946446, + "loss": 1.3086, + "step": 7554 + }, + { + "epoch": 0.7969409282700421, + "grad_norm": 0.6329675316810608, + "learning_rate": 0.000150398591830074, + "loss": 1.2747, + "step": 7555 + }, + { + "epoch": 0.7970464135021097, + "grad_norm": 0.6879178285598755, + "learning_rate": 0.00015024781191901122, + "loss": 1.3652, + "step": 7556 + }, + { + "epoch": 0.7971518987341772, + "grad_norm": 0.6979854702949524, + "learning_rate": 0.00015009709921317172, + "loss": 1.3181, + "step": 7557 + }, + { + "epoch": 0.7972573839662447, + "grad_norm": 0.6486397385597229, + "learning_rate": 0.00014994645372944367, + "loss": 1.2955, + "step": 7558 + }, + { + "epoch": 0.7973628691983122, + "grad_norm": 0.6198636889457703, + "learning_rate": 0.0001497958754847076, + "loss": 1.3132, + "step": 7559 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.6759404540061951, + "learning_rate": 0.00014964536449583657, + "loss": 1.3285, + "step": 7560 + }, + { + "epoch": 0.7975738396624472, + "grad_norm": 0.6372925639152527, + "learning_rate": 0.0001494949207796961, + "loss": 1.3142, + "step": 7561 + }, + { + "epoch": 0.7976793248945148, + "grad_norm": 0.6584621667861938, + "learning_rate": 0.00014934454435314417, + "loss": 1.3559, + "step": 7562 + }, + { + "epoch": 0.7977848101265823, + "grad_norm": 0.852054238319397, + "learning_rate": 0.00014919423523303095, + "loss": 1.3288, + "step": 7563 + }, + { + "epoch": 0.7978902953586497, + "grad_norm": 0.628902018070221, + "learning_rate": 0.00014904399343619972, + "loss": 1.332, + "step": 7564 + }, + { + "epoch": 0.7979957805907173, + "grad_norm": 0.71123206615448, + "learning_rate": 0.00014889381897948575, + "loss": 1.3464, + "step": 7565 + }, + { + "epoch": 0.7981012658227848, + "grad_norm": 0.6863314509391785, + "learning_rate": 0.00014874371187971672, + "loss": 1.3028, + "step": 7566 + }, + { + "epoch": 0.7982067510548523, + "grad_norm": 0.6492874622344971, + "learning_rate": 0.00014859367215371293, + "loss": 1.3613, + "step": 7567 + }, + { + "epoch": 0.7983122362869198, + "grad_norm": 0.696597158908844, + "learning_rate": 0.00014844369981828698, + "loss": 1.3277, + "step": 7568 + }, + { + "epoch": 0.7984177215189874, + "grad_norm": 0.7579601407051086, + "learning_rate": 0.00014829379489024415, + "loss": 1.3571, + "step": 7569 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.6547719836235046, + "learning_rate": 0.00014814395738638195, + "loss": 1.3175, + "step": 7570 + }, + { + "epoch": 0.7986286919831224, + "grad_norm": 0.6191365718841553, + "learning_rate": 0.0001479941873234905, + "loss": 1.292, + "step": 7571 + }, + { + "epoch": 0.7987341772151899, + "grad_norm": 0.6747643351554871, + "learning_rate": 0.00014784448471835224, + "loss": 1.3441, + "step": 7572 + }, + { + "epoch": 0.7988396624472573, + "grad_norm": 0.6440410614013672, + "learning_rate": 0.0001476948495877418, + "loss": 1.3236, + "step": 7573 + }, + { + "epoch": 0.7989451476793249, + "grad_norm": 0.6646935343742371, + "learning_rate": 0.00014754528194842707, + "loss": 1.3449, + "step": 7574 + }, + { + "epoch": 0.7990506329113924, + "grad_norm": 0.6982189416885376, + "learning_rate": 0.00014739578181716765, + "loss": 1.3177, + "step": 7575 + }, + { + "epoch": 0.7991561181434599, + "grad_norm": 0.6395609974861145, + "learning_rate": 0.00014724634921071573, + "loss": 1.3295, + "step": 7576 + }, + { + "epoch": 0.7992616033755274, + "grad_norm": 0.6149303317070007, + "learning_rate": 0.0001470969841458159, + "loss": 1.3426, + "step": 7577 + }, + { + "epoch": 0.799367088607595, + "grad_norm": 0.6283429861068726, + "learning_rate": 0.00014694768663920537, + "loss": 1.3449, + "step": 7578 + }, + { + "epoch": 0.7994725738396624, + "grad_norm": 0.6248589158058167, + "learning_rate": 0.0001467984567076137, + "loss": 1.3548, + "step": 7579 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.6304753422737122, + "learning_rate": 0.00014664929436776278, + "loss": 1.3256, + "step": 7580 + }, + { + "epoch": 0.7996835443037975, + "grad_norm": 0.6362913250923157, + "learning_rate": 0.00014650019963636696, + "loss": 1.3088, + "step": 7581 + }, + { + "epoch": 0.799789029535865, + "grad_norm": 0.6569571495056152, + "learning_rate": 0.0001463511725301331, + "loss": 1.3517, + "step": 7582 + }, + { + "epoch": 0.7998945147679325, + "grad_norm": 0.6143475770950317, + "learning_rate": 0.00014620221306576027, + "loss": 1.3319, + "step": 7583 + }, + { + "epoch": 0.8, + "grad_norm": 0.6404335498809814, + "learning_rate": 0.00014605332125994038, + "loss": 1.3183, + "step": 7584 + }, + { + "epoch": 0.8001054852320675, + "grad_norm": 0.6610718369483948, + "learning_rate": 0.0001459044971293575, + "loss": 1.3385, + "step": 7585 + }, + { + "epoch": 0.800210970464135, + "grad_norm": 0.6302513480186462, + "learning_rate": 0.000145755740690688, + "loss": 1.311, + "step": 7586 + }, + { + "epoch": 0.8003164556962026, + "grad_norm": 0.6237671375274658, + "learning_rate": 0.00014560705196060074, + "loss": 1.3172, + "step": 7587 + }, + { + "epoch": 0.80042194092827, + "grad_norm": 0.6224428415298462, + "learning_rate": 0.00014545843095575709, + "loss": 1.3242, + "step": 7588 + }, + { + "epoch": 0.8005274261603376, + "grad_norm": 0.6155045032501221, + "learning_rate": 0.00014530987769281075, + "loss": 1.3392, + "step": 7589 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.6416838765144348, + "learning_rate": 0.00014516139218840788, + "loss": 1.3024, + "step": 7590 + }, + { + "epoch": 0.8007383966244725, + "grad_norm": 0.6199944615364075, + "learning_rate": 0.00014501297445918703, + "loss": 1.3274, + "step": 7591 + }, + { + "epoch": 0.8008438818565401, + "grad_norm": 0.6587530970573425, + "learning_rate": 0.00014486462452177896, + "loss": 1.3427, + "step": 7592 + }, + { + "epoch": 0.8009493670886076, + "grad_norm": 0.6168960928916931, + "learning_rate": 0.0001447163423928073, + "loss": 1.3637, + "step": 7593 + }, + { + "epoch": 0.8010548523206751, + "grad_norm": 0.6322283148765564, + "learning_rate": 0.00014456812808888775, + "loss": 1.3178, + "step": 7594 + }, + { + "epoch": 0.8011603375527426, + "grad_norm": 0.6657394170761108, + "learning_rate": 0.00014441998162662847, + "loss": 1.3853, + "step": 7595 + }, + { + "epoch": 0.8012658227848102, + "grad_norm": 0.6208088994026184, + "learning_rate": 0.00014427190302262989, + "loss": 1.3064, + "step": 7596 + }, + { + "epoch": 0.8013713080168776, + "grad_norm": 0.6599035263061523, + "learning_rate": 0.00014412389229348494, + "loss": 1.2799, + "step": 7597 + }, + { + "epoch": 0.8014767932489452, + "grad_norm": 0.6194117069244385, + "learning_rate": 0.00014397594945577912, + "loss": 1.3099, + "step": 7598 + }, + { + "epoch": 0.8015822784810127, + "grad_norm": 0.6128129959106445, + "learning_rate": 0.00014382807452609003, + "loss": 1.3358, + "step": 7599 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.6333854794502258, + "learning_rate": 0.00014368026752098782, + "loss": 1.309, + "step": 7600 + }, + { + "epoch": 0.8017932489451477, + "grad_norm": 0.6879093050956726, + "learning_rate": 0.00014353252845703506, + "loss": 1.3559, + "step": 7601 + }, + { + "epoch": 0.8018987341772152, + "grad_norm": 0.6729905009269714, + "learning_rate": 0.00014338485735078632, + "loss": 1.3139, + "step": 7602 + }, + { + "epoch": 0.8020042194092827, + "grad_norm": 0.6569309830665588, + "learning_rate": 0.0001432372542187895, + "loss": 1.3759, + "step": 7603 + }, + { + "epoch": 0.8021097046413502, + "grad_norm": 0.6348772644996643, + "learning_rate": 0.00014308971907758383, + "loss": 1.3418, + "step": 7604 + }, + { + "epoch": 0.8022151898734177, + "grad_norm": 0.6180599331855774, + "learning_rate": 0.00014294225194370154, + "loss": 1.3229, + "step": 7605 + }, + { + "epoch": 0.8023206751054852, + "grad_norm": 0.6525046229362488, + "learning_rate": 0.00014279485283366696, + "loss": 1.3413, + "step": 7606 + }, + { + "epoch": 0.8024261603375528, + "grad_norm": 0.6395208835601807, + "learning_rate": 0.00014264752176399687, + "loss": 1.315, + "step": 7607 + }, + { + "epoch": 0.8025316455696202, + "grad_norm": 0.63116854429245, + "learning_rate": 0.0001425002587512005, + "loss": 1.3089, + "step": 7608 + }, + { + "epoch": 0.8026371308016877, + "grad_norm": 0.6545761823654175, + "learning_rate": 0.00014235306381177952, + "loss": 1.3514, + "step": 7609 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.6323558688163757, + "learning_rate": 0.00014220593696222768, + "loss": 1.3548, + "step": 7610 + }, + { + "epoch": 0.8028481012658227, + "grad_norm": 0.6164981722831726, + "learning_rate": 0.00014205887821903105, + "loss": 1.2908, + "step": 7611 + }, + { + "epoch": 0.8029535864978903, + "grad_norm": 0.6442283391952515, + "learning_rate": 0.00014191188759866887, + "loss": 1.3413, + "step": 7612 + }, + { + "epoch": 0.8030590717299578, + "grad_norm": 0.6025527715682983, + "learning_rate": 0.00014176496511761192, + "loss": 1.3084, + "step": 7613 + }, + { + "epoch": 0.8031645569620253, + "grad_norm": 0.6393532156944275, + "learning_rate": 0.0001416181107923235, + "loss": 1.2941, + "step": 7614 + }, + { + "epoch": 0.8032700421940928, + "grad_norm": 0.6575323343276978, + "learning_rate": 0.0001414713246392594, + "loss": 1.3386, + "step": 7615 + }, + { + "epoch": 0.8033755274261604, + "grad_norm": 0.6558986902236938, + "learning_rate": 0.0001413246066748678, + "loss": 1.3304, + "step": 7616 + }, + { + "epoch": 0.8034810126582278, + "grad_norm": 0.649820864200592, + "learning_rate": 0.00014117795691558915, + "loss": 1.2903, + "step": 7617 + }, + { + "epoch": 0.8035864978902953, + "grad_norm": 0.6392351984977722, + "learning_rate": 0.00014103137537785633, + "loss": 1.3055, + "step": 7618 + }, + { + "epoch": 0.8036919831223629, + "grad_norm": 0.6710490584373474, + "learning_rate": 0.00014088486207809449, + "loss": 1.3248, + "step": 7619 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.6310325264930725, + "learning_rate": 0.00014073841703272092, + "loss": 1.3, + "step": 7620 + }, + { + "epoch": 0.8039029535864979, + "grad_norm": 0.686655580997467, + "learning_rate": 0.00014059204025814603, + "loss": 1.3821, + "step": 7621 + }, + { + "epoch": 0.8040084388185654, + "grad_norm": 0.6349233388900757, + "learning_rate": 0.0001404457317707718, + "loss": 1.3023, + "step": 7622 + }, + { + "epoch": 0.8041139240506329, + "grad_norm": 0.7405983805656433, + "learning_rate": 0.00014029949158699285, + "loss": 1.3197, + "step": 7623 + }, + { + "epoch": 0.8042194092827004, + "grad_norm": 0.6540986895561218, + "learning_rate": 0.00014015331972319606, + "loss": 1.3404, + "step": 7624 + }, + { + "epoch": 0.804324894514768, + "grad_norm": 0.6268332600593567, + "learning_rate": 0.00014000721619576077, + "loss": 1.3115, + "step": 7625 + }, + { + "epoch": 0.8044303797468354, + "grad_norm": 0.6220006346702576, + "learning_rate": 0.0001398611810210586, + "loss": 1.3165, + "step": 7626 + }, + { + "epoch": 0.804535864978903, + "grad_norm": 0.5970884561538696, + "learning_rate": 0.0001397152142154536, + "loss": 1.3261, + "step": 7627 + }, + { + "epoch": 0.8046413502109705, + "grad_norm": 0.6182096004486084, + "learning_rate": 0.00013956931579530194, + "loss": 1.3464, + "step": 7628 + }, + { + "epoch": 0.8047468354430379, + "grad_norm": 0.761511504650116, + "learning_rate": 0.0001394234857769521, + "loss": 1.3465, + "step": 7629 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.6572866439819336, + "learning_rate": 0.00013927772417674558, + "loss": 1.3628, + "step": 7630 + }, + { + "epoch": 0.804957805907173, + "grad_norm": 0.6324630975723267, + "learning_rate": 0.00013913203101101532, + "loss": 1.3666, + "step": 7631 + }, + { + "epoch": 0.8050632911392405, + "grad_norm": 0.6667770147323608, + "learning_rate": 0.0001389864062960871, + "loss": 1.3099, + "step": 7632 + }, + { + "epoch": 0.805168776371308, + "grad_norm": 0.641200602054596, + "learning_rate": 0.00013884085004827883, + "loss": 1.3288, + "step": 7633 + }, + { + "epoch": 0.8052742616033756, + "grad_norm": 0.6603847742080688, + "learning_rate": 0.0001386953622839008, + "loss": 1.3412, + "step": 7634 + }, + { + "epoch": 0.805379746835443, + "grad_norm": 0.634460985660553, + "learning_rate": 0.0001385499430192557, + "loss": 1.3518, + "step": 7635 + }, + { + "epoch": 0.8054852320675105, + "grad_norm": 0.6174526810646057, + "learning_rate": 0.00013840459227063842, + "loss": 1.3059, + "step": 7636 + }, + { + "epoch": 0.8055907172995781, + "grad_norm": 0.6401734352111816, + "learning_rate": 0.00013825931005433605, + "loss": 1.3015, + "step": 7637 + }, + { + "epoch": 0.8056962025316455, + "grad_norm": 0.6073313355445862, + "learning_rate": 0.00013811409638662858, + "loss": 1.3184, + "step": 7638 + }, + { + "epoch": 0.8058016877637131, + "grad_norm": 0.626855731010437, + "learning_rate": 0.0001379689512837878, + "loss": 1.34, + "step": 7639 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.6725308299064636, + "learning_rate": 0.00013782387476207788, + "loss": 1.2984, + "step": 7640 + }, + { + "epoch": 0.8060126582278481, + "grad_norm": 0.648047924041748, + "learning_rate": 0.0001376788668377554, + "loss": 1.3483, + "step": 7641 + }, + { + "epoch": 0.8061181434599156, + "grad_norm": 0.6561909317970276, + "learning_rate": 0.0001375339275270692, + "loss": 1.3668, + "step": 7642 + }, + { + "epoch": 0.8062236286919832, + "grad_norm": 0.6663406491279602, + "learning_rate": 0.00013738905684626044, + "loss": 1.3093, + "step": 7643 + }, + { + "epoch": 0.8063291139240506, + "grad_norm": 0.6084521412849426, + "learning_rate": 0.00013724425481156263, + "loss": 1.356, + "step": 7644 + }, + { + "epoch": 0.8064345991561181, + "grad_norm": 0.630859375, + "learning_rate": 0.00013709952143920148, + "loss": 1.3374, + "step": 7645 + }, + { + "epoch": 0.8065400843881857, + "grad_norm": 0.6474381685256958, + "learning_rate": 0.000136954856745395, + "loss": 1.3759, + "step": 7646 + }, + { + "epoch": 0.8066455696202531, + "grad_norm": 0.6637719869613647, + "learning_rate": 0.000136810260746354, + "loss": 1.3334, + "step": 7647 + }, + { + "epoch": 0.8067510548523207, + "grad_norm": 0.6479669809341431, + "learning_rate": 0.00013666573345828083, + "loss": 1.3101, + "step": 7648 + }, + { + "epoch": 0.8068565400843882, + "grad_norm": 0.6964028477668762, + "learning_rate": 0.00013652127489737067, + "loss": 1.3345, + "step": 7649 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.6361033916473389, + "learning_rate": 0.00013637688507981064, + "loss": 1.3659, + "step": 7650 + }, + { + "epoch": 0.8070675105485232, + "grad_norm": 0.7325851917266846, + "learning_rate": 0.0001362325640217805, + "loss": 1.3297, + "step": 7651 + }, + { + "epoch": 0.8071729957805908, + "grad_norm": 0.6447484493255615, + "learning_rate": 0.00013608831173945207, + "loss": 1.2569, + "step": 7652 + }, + { + "epoch": 0.8072784810126582, + "grad_norm": 0.6445253491401672, + "learning_rate": 0.0001359441282489895, + "loss": 1.3325, + "step": 7653 + }, + { + "epoch": 0.8073839662447257, + "grad_norm": 0.6932387351989746, + "learning_rate": 0.0001358000135665494, + "loss": 1.3618, + "step": 7654 + }, + { + "epoch": 0.8074894514767933, + "grad_norm": 0.6325111389160156, + "learning_rate": 0.00013565596770828025, + "loss": 1.2971, + "step": 7655 + }, + { + "epoch": 0.8075949367088607, + "grad_norm": 0.7399255037307739, + "learning_rate": 0.00013551199069032348, + "loss": 1.2955, + "step": 7656 + }, + { + "epoch": 0.8077004219409283, + "grad_norm": 0.6666789650917053, + "learning_rate": 0.0001353680825288123, + "loss": 1.3562, + "step": 7657 + }, + { + "epoch": 0.8078059071729958, + "grad_norm": 0.7326605916023254, + "learning_rate": 0.0001352242432398723, + "loss": 1.3486, + "step": 7658 + }, + { + "epoch": 0.8079113924050633, + "grad_norm": 0.6276304125785828, + "learning_rate": 0.00013508047283962137, + "loss": 1.3284, + "step": 7659 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.6400325894355774, + "learning_rate": 0.0001349367713441697, + "loss": 1.3077, + "step": 7660 + }, + { + "epoch": 0.8081223628691984, + "grad_norm": 0.6208072900772095, + "learning_rate": 0.0001347931387696198, + "loss": 1.2996, + "step": 7661 + }, + { + "epoch": 0.8082278481012658, + "grad_norm": 0.7278029322624207, + "learning_rate": 0.0001346495751320664, + "loss": 1.3348, + "step": 7662 + }, + { + "epoch": 0.8083333333333333, + "grad_norm": 0.6786307692527771, + "learning_rate": 0.00013450608044759634, + "loss": 1.3472, + "step": 7663 + }, + { + "epoch": 0.8084388185654009, + "grad_norm": 0.6637428998947144, + "learning_rate": 0.00013436265473228926, + "loss": 1.3203, + "step": 7664 + }, + { + "epoch": 0.8085443037974683, + "grad_norm": 0.6943090558052063, + "learning_rate": 0.0001342192980022166, + "loss": 1.3393, + "step": 7665 + }, + { + "epoch": 0.8086497890295359, + "grad_norm": 0.6238465905189514, + "learning_rate": 0.00013407601027344213, + "loss": 1.2862, + "step": 7666 + }, + { + "epoch": 0.8087552742616034, + "grad_norm": 0.6222296357154846, + "learning_rate": 0.00013393279156202197, + "loss": 1.3194, + "step": 7667 + }, + { + "epoch": 0.8088607594936709, + "grad_norm": 0.6575150489807129, + "learning_rate": 0.00013378964188400457, + "loss": 1.3713, + "step": 7668 + }, + { + "epoch": 0.8089662447257384, + "grad_norm": 0.6321315765380859, + "learning_rate": 0.00013364656125543044, + "loss": 1.3291, + "step": 7669 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.6280080080032349, + "learning_rate": 0.0001335035496923326, + "loss": 1.325, + "step": 7670 + }, + { + "epoch": 0.8091772151898734, + "grad_norm": 0.6713939905166626, + "learning_rate": 0.00013336060721073608, + "loss": 1.3461, + "step": 7671 + }, + { + "epoch": 0.809282700421941, + "grad_norm": 0.64470374584198, + "learning_rate": 0.00013321773382665822, + "loss": 1.3712, + "step": 7672 + }, + { + "epoch": 0.8093881856540084, + "grad_norm": 0.6112431883811951, + "learning_rate": 0.00013307492955610896, + "loss": 1.3631, + "step": 7673 + }, + { + "epoch": 0.8094936708860759, + "grad_norm": 0.6193538308143616, + "learning_rate": 0.0001329321944150902, + "loss": 1.3474, + "step": 7674 + }, + { + "epoch": 0.8095991561181435, + "grad_norm": 0.6278026700019836, + "learning_rate": 0.000132789528419596, + "loss": 1.2983, + "step": 7675 + }, + { + "epoch": 0.8097046413502109, + "grad_norm": 0.6551947593688965, + "learning_rate": 0.0001326469315856128, + "loss": 1.3293, + "step": 7676 + }, + { + "epoch": 0.8098101265822785, + "grad_norm": 0.6392771601676941, + "learning_rate": 0.00013250440392911927, + "loss": 1.3147, + "step": 7677 + }, + { + "epoch": 0.809915611814346, + "grad_norm": 0.6239494681358337, + "learning_rate": 0.00013236194546608645, + "loss": 1.3628, + "step": 7678 + }, + { + "epoch": 0.8100210970464135, + "grad_norm": 0.6386668086051941, + "learning_rate": 0.00013221955621247749, + "loss": 1.3025, + "step": 7679 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.7123549580574036, + "learning_rate": 0.0001320772361842478, + "loss": 1.3686, + "step": 7680 + }, + { + "epoch": 0.8102320675105485, + "grad_norm": 0.6310403347015381, + "learning_rate": 0.00013193498539734478, + "loss": 1.3283, + "step": 7681 + }, + { + "epoch": 0.810337552742616, + "grad_norm": 0.6915234923362732, + "learning_rate": 0.00013179280386770885, + "loss": 1.3121, + "step": 7682 + }, + { + "epoch": 0.8104430379746835, + "grad_norm": 0.63234943151474, + "learning_rate": 0.00013165069161127183, + "loss": 1.3088, + "step": 7683 + }, + { + "epoch": 0.8105485232067511, + "grad_norm": 0.6714625954627991, + "learning_rate": 0.00013150864864395825, + "loss": 1.3125, + "step": 7684 + }, + { + "epoch": 0.8106540084388185, + "grad_norm": 0.6469218730926514, + "learning_rate": 0.00013136667498168464, + "loss": 1.3488, + "step": 7685 + }, + { + "epoch": 0.8107594936708861, + "grad_norm": 0.6747052669525146, + "learning_rate": 0.00013122477064035992, + "loss": 1.33, + "step": 7686 + }, + { + "epoch": 0.8108649789029536, + "grad_norm": 0.6512048840522766, + "learning_rate": 0.00013108293563588504, + "loss": 1.3283, + "step": 7687 + }, + { + "epoch": 0.810970464135021, + "grad_norm": 0.6440286040306091, + "learning_rate": 0.00013094116998415358, + "loss": 1.294, + "step": 7688 + }, + { + "epoch": 0.8110759493670886, + "grad_norm": 0.6310642957687378, + "learning_rate": 0.00013079947370105057, + "loss": 1.313, + "step": 7689 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.636311411857605, + "learning_rate": 0.00013065784680245442, + "loss": 1.2817, + "step": 7690 + }, + { + "epoch": 0.8112869198312236, + "grad_norm": 0.6406234502792358, + "learning_rate": 0.00013051628930423485, + "loss": 1.3359, + "step": 7691 + }, + { + "epoch": 0.8113924050632911, + "grad_norm": 0.6185030937194824, + "learning_rate": 0.00013037480122225412, + "loss": 1.3429, + "step": 7692 + }, + { + "epoch": 0.8114978902953587, + "grad_norm": 0.608440101146698, + "learning_rate": 0.00013023338257236655, + "loss": 1.3187, + "step": 7693 + }, + { + "epoch": 0.8116033755274261, + "grad_norm": 0.6722939014434814, + "learning_rate": 0.00013009203337041898, + "loss": 1.3128, + "step": 7694 + }, + { + "epoch": 0.8117088607594937, + "grad_norm": 0.6350464224815369, + "learning_rate": 0.0001299507536322502, + "loss": 1.3089, + "step": 7695 + }, + { + "epoch": 0.8118143459915612, + "grad_norm": 0.6193252205848694, + "learning_rate": 0.00012980954337369133, + "loss": 1.3496, + "step": 7696 + }, + { + "epoch": 0.8119198312236287, + "grad_norm": 0.6898901462554932, + "learning_rate": 0.00012966840261056562, + "loss": 1.305, + "step": 7697 + }, + { + "epoch": 0.8120253164556962, + "grad_norm": 0.6710726022720337, + "learning_rate": 0.0001295273313586885, + "loss": 1.3157, + "step": 7698 + }, + { + "epoch": 0.8121308016877637, + "grad_norm": 0.6386677026748657, + "learning_rate": 0.00012938632963386808, + "loss": 1.3166, + "step": 7699 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.6174101829528809, + "learning_rate": 0.00012924539745190402, + "loss": 1.3427, + "step": 7700 + }, + { + "epoch": 0.8123417721518987, + "grad_norm": 0.7077039480209351, + "learning_rate": 0.0001291045348285885, + "loss": 1.3126, + "step": 7701 + }, + { + "epoch": 0.8124472573839663, + "grad_norm": 0.7061591744422913, + "learning_rate": 0.00012896374177970602, + "loss": 1.344, + "step": 7702 + }, + { + "epoch": 0.8125527426160337, + "grad_norm": 0.607650637626648, + "learning_rate": 0.00012882301832103297, + "loss": 1.2945, + "step": 7703 + }, + { + "epoch": 0.8126582278481013, + "grad_norm": 0.6966468095779419, + "learning_rate": 0.0001286823644683382, + "loss": 1.3156, + "step": 7704 + }, + { + "epoch": 0.8127637130801688, + "grad_norm": 0.6565161347389221, + "learning_rate": 0.0001285417802373827, + "loss": 1.3108, + "step": 7705 + }, + { + "epoch": 0.8128691983122363, + "grad_norm": 0.6241719722747803, + "learning_rate": 0.00012840126564391961, + "loss": 1.301, + "step": 7706 + }, + { + "epoch": 0.8129746835443038, + "grad_norm": 0.6668683290481567, + "learning_rate": 0.00012826082070369402, + "loss": 1.3456, + "step": 7707 + }, + { + "epoch": 0.8130801687763713, + "grad_norm": 0.6661271452903748, + "learning_rate": 0.00012812044543244395, + "loss": 1.3202, + "step": 7708 + }, + { + "epoch": 0.8131856540084388, + "grad_norm": 0.6420945525169373, + "learning_rate": 0.00012798013984589894, + "loss": 1.3711, + "step": 7709 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.6890540719032288, + "learning_rate": 0.0001278399039597809, + "loss": 1.3146, + "step": 7710 + }, + { + "epoch": 0.8133966244725739, + "grad_norm": 0.6314601898193359, + "learning_rate": 0.00012769973778980405, + "loss": 1.3545, + "step": 7711 + }, + { + "epoch": 0.8135021097046413, + "grad_norm": 0.596890926361084, + "learning_rate": 0.00012755964135167464, + "loss": 1.3198, + "step": 7712 + }, + { + "epoch": 0.8136075949367089, + "grad_norm": 0.6266177296638489, + "learning_rate": 0.00012741961466109113, + "loss": 1.3106, + "step": 7713 + }, + { + "epoch": 0.8137130801687764, + "grad_norm": 0.6790742874145508, + "learning_rate": 0.00012727965773374434, + "loss": 1.3404, + "step": 7714 + }, + { + "epoch": 0.8138185654008439, + "grad_norm": 0.6495212316513062, + "learning_rate": 0.00012713977058531685, + "loss": 1.322, + "step": 7715 + }, + { + "epoch": 0.8139240506329114, + "grad_norm": 0.6706995964050293, + "learning_rate": 0.0001269999532314841, + "loss": 1.3384, + "step": 7716 + }, + { + "epoch": 0.814029535864979, + "grad_norm": 0.652985692024231, + "learning_rate": 0.00012686020568791311, + "loss": 1.3709, + "step": 7717 + }, + { + "epoch": 0.8141350210970464, + "grad_norm": 0.6215941905975342, + "learning_rate": 0.00012672052797026344, + "loss": 1.2843, + "step": 7718 + }, + { + "epoch": 0.8142405063291139, + "grad_norm": 0.650672435760498, + "learning_rate": 0.00012658092009418652, + "loss": 1.2952, + "step": 7719 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.6388464570045471, + "learning_rate": 0.0001264413820753261, + "loss": 1.3275, + "step": 7720 + }, + { + "epoch": 0.8144514767932489, + "grad_norm": 0.6465962529182434, + "learning_rate": 0.0001263019139293182, + "loss": 1.3517, + "step": 7721 + }, + { + "epoch": 0.8145569620253165, + "grad_norm": 0.6749861240386963, + "learning_rate": 0.0001261625156717909, + "loss": 1.3171, + "step": 7722 + }, + { + "epoch": 0.814662447257384, + "grad_norm": 0.6383531093597412, + "learning_rate": 0.0001260231873183644, + "loss": 1.3542, + "step": 7723 + }, + { + "epoch": 0.8147679324894515, + "grad_norm": 0.6463008522987366, + "learning_rate": 0.00012588392888465103, + "loss": 1.3076, + "step": 7724 + }, + { + "epoch": 0.814873417721519, + "grad_norm": 0.6617644429206848, + "learning_rate": 0.0001257447403862557, + "loss": 1.3708, + "step": 7725 + }, + { + "epoch": 0.8149789029535865, + "grad_norm": 0.6227750182151794, + "learning_rate": 0.00012560562183877507, + "loss": 1.3328, + "step": 7726 + }, + { + "epoch": 0.815084388185654, + "grad_norm": 0.644473135471344, + "learning_rate": 0.00012546657325779805, + "loss": 1.322, + "step": 7727 + }, + { + "epoch": 0.8151898734177215, + "grad_norm": 0.6980053186416626, + "learning_rate": 0.00012532759465890567, + "loss": 1.3133, + "step": 7728 + }, + { + "epoch": 0.8152953586497891, + "grad_norm": 0.6003960371017456, + "learning_rate": 0.00012518868605767118, + "loss": 1.322, + "step": 7729 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.6413686275482178, + "learning_rate": 0.00012504984746966003, + "loss": 1.3411, + "step": 7730 + }, + { + "epoch": 0.8155063291139241, + "grad_norm": 0.628251850605011, + "learning_rate": 0.0001249110789104298, + "loss": 1.314, + "step": 7731 + }, + { + "epoch": 0.8156118143459916, + "grad_norm": 0.6505742073059082, + "learning_rate": 0.00012477238039553006, + "loss": 1.3499, + "step": 7732 + }, + { + "epoch": 0.815717299578059, + "grad_norm": 0.6519788503646851, + "learning_rate": 0.00012463375194050267, + "loss": 1.3326, + "step": 7733 + }, + { + "epoch": 0.8158227848101266, + "grad_norm": 0.6159809231758118, + "learning_rate": 0.00012449519356088192, + "loss": 1.3108, + "step": 7734 + }, + { + "epoch": 0.8159282700421941, + "grad_norm": 0.6059594750404358, + "learning_rate": 0.0001243567052721937, + "loss": 1.3156, + "step": 7735 + }, + { + "epoch": 0.8160337552742616, + "grad_norm": 0.6113654375076294, + "learning_rate": 0.00012421828708995649, + "loss": 1.3358, + "step": 7736 + }, + { + "epoch": 0.8161392405063291, + "grad_norm": 0.6278781890869141, + "learning_rate": 0.00012407993902968057, + "loss": 1.2965, + "step": 7737 + }, + { + "epoch": 0.8162447257383966, + "grad_norm": 0.643337070941925, + "learning_rate": 0.00012394166110686857, + "loss": 1.3498, + "step": 7738 + }, + { + "epoch": 0.8163502109704641, + "grad_norm": 0.6193232536315918, + "learning_rate": 0.0001238034533370153, + "loss": 1.3709, + "step": 7739 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.6323803663253784, + "learning_rate": 0.00012366531573560754, + "loss": 1.3371, + "step": 7740 + }, + { + "epoch": 0.8165611814345991, + "grad_norm": 0.5969319343566895, + "learning_rate": 0.00012352724831812424, + "loss": 1.2933, + "step": 7741 + }, + { + "epoch": 0.8166666666666667, + "grad_norm": 0.6084335446357727, + "learning_rate": 0.0001233892511000368, + "loss": 1.285, + "step": 7742 + }, + { + "epoch": 0.8167721518987342, + "grad_norm": 0.6431891322135925, + "learning_rate": 0.00012325132409680829, + "loss": 1.3566, + "step": 7743 + }, + { + "epoch": 0.8168776371308016, + "grad_norm": 0.7514292001724243, + "learning_rate": 0.00012311346732389418, + "loss": 1.3442, + "step": 7744 + }, + { + "epoch": 0.8169831223628692, + "grad_norm": 0.6569433808326721, + "learning_rate": 0.000122975680796742, + "loss": 1.272, + "step": 7745 + }, + { + "epoch": 0.8170886075949367, + "grad_norm": 0.6178393959999084, + "learning_rate": 0.00012283796453079146, + "loss": 1.3166, + "step": 7746 + }, + { + "epoch": 0.8171940928270042, + "grad_norm": 0.6062189340591431, + "learning_rate": 0.00012270031854147426, + "loss": 1.3017, + "step": 7747 + }, + { + "epoch": 0.8172995780590717, + "grad_norm": 0.7739629149436951, + "learning_rate": 0.0001225627428442143, + "loss": 1.3099, + "step": 7748 + }, + { + "epoch": 0.8174050632911393, + "grad_norm": 0.6616411805152893, + "learning_rate": 0.0001224252374544278, + "loss": 1.3554, + "step": 7749 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.6607761383056641, + "learning_rate": 0.00012228780238752264, + "loss": 1.3376, + "step": 7750 + }, + { + "epoch": 0.8176160337552743, + "grad_norm": 0.7608487606048584, + "learning_rate": 0.00012215043765889932, + "loss": 1.3353, + "step": 7751 + }, + { + "epoch": 0.8177215189873418, + "grad_norm": 0.6446580290794373, + "learning_rate": 0.00012201314328395032, + "loss": 1.2719, + "step": 7752 + }, + { + "epoch": 0.8178270042194092, + "grad_norm": 0.689401388168335, + "learning_rate": 0.00012187591927806, + "loss": 1.3061, + "step": 7753 + }, + { + "epoch": 0.8179324894514768, + "grad_norm": 0.6373984217643738, + "learning_rate": 0.0001217387656566051, + "loss": 1.3191, + "step": 7754 + }, + { + "epoch": 0.8180379746835443, + "grad_norm": 0.7481015920639038, + "learning_rate": 0.0001216016824349542, + "loss": 1.3312, + "step": 7755 + }, + { + "epoch": 0.8181434599156118, + "grad_norm": 0.6354801058769226, + "learning_rate": 0.00012146466962846833, + "loss": 1.3181, + "step": 7756 + }, + { + "epoch": 0.8182489451476793, + "grad_norm": 0.6229519844055176, + "learning_rate": 0.00012132772725250038, + "loss": 1.3215, + "step": 7757 + }, + { + "epoch": 0.8183544303797469, + "grad_norm": 0.728160560131073, + "learning_rate": 0.0001211908553223954, + "loss": 1.2863, + "step": 7758 + }, + { + "epoch": 0.8184599156118143, + "grad_norm": 0.6542092561721802, + "learning_rate": 0.00012105405385349047, + "loss": 1.349, + "step": 7759 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.6669650077819824, + "learning_rate": 0.00012091732286111514, + "loss": 1.3421, + "step": 7760 + }, + { + "epoch": 0.8186708860759494, + "grad_norm": 0.6247043013572693, + "learning_rate": 0.00012078066236059068, + "loss": 1.2938, + "step": 7761 + }, + { + "epoch": 0.8187763713080168, + "grad_norm": 0.6401679515838623, + "learning_rate": 0.00012064407236723066, + "loss": 1.2897, + "step": 7762 + }, + { + "epoch": 0.8188818565400844, + "grad_norm": 0.6464100480079651, + "learning_rate": 0.00012050755289634049, + "loss": 1.3598, + "step": 7763 + }, + { + "epoch": 0.8189873417721519, + "grad_norm": 0.651766836643219, + "learning_rate": 0.00012037110396321796, + "loss": 1.345, + "step": 7764 + }, + { + "epoch": 0.8190928270042194, + "grad_norm": 0.7050904035568237, + "learning_rate": 0.0001202347255831529, + "loss": 1.32, + "step": 7765 + }, + { + "epoch": 0.8191983122362869, + "grad_norm": 0.649869441986084, + "learning_rate": 0.0001200984177714271, + "loss": 1.3574, + "step": 7766 + }, + { + "epoch": 0.8193037974683545, + "grad_norm": 0.7937542796134949, + "learning_rate": 0.00011996218054331434, + "loss": 1.3299, + "step": 7767 + }, + { + "epoch": 0.8194092827004219, + "grad_norm": 0.7150658369064331, + "learning_rate": 0.00011982601391408115, + "loss": 1.33, + "step": 7768 + }, + { + "epoch": 0.8195147679324895, + "grad_norm": 0.6403562426567078, + "learning_rate": 0.00011968991789898533, + "loss": 1.3668, + "step": 7769 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.6269649863243103, + "learning_rate": 0.00011955389251327737, + "loss": 1.2747, + "step": 7770 + }, + { + "epoch": 0.8197257383966244, + "grad_norm": 0.6635616421699524, + "learning_rate": 0.00011941793777219937, + "loss": 1.3045, + "step": 7771 + }, + { + "epoch": 0.819831223628692, + "grad_norm": 0.6683438420295715, + "learning_rate": 0.00011928205369098574, + "loss": 1.2983, + "step": 7772 + }, + { + "epoch": 0.8199367088607595, + "grad_norm": 0.6565869450569153, + "learning_rate": 0.00011914624028486315, + "loss": 1.2989, + "step": 7773 + }, + { + "epoch": 0.820042194092827, + "grad_norm": 0.6161803007125854, + "learning_rate": 0.00011901049756905, + "loss": 1.2715, + "step": 7774 + }, + { + "epoch": 0.8201476793248945, + "grad_norm": 0.6643919944763184, + "learning_rate": 0.00011887482555875695, + "loss": 1.3373, + "step": 7775 + }, + { + "epoch": 0.8202531645569621, + "grad_norm": 0.7055792808532715, + "learning_rate": 0.00011873922426918668, + "loss": 1.3523, + "step": 7776 + }, + { + "epoch": 0.8203586497890295, + "grad_norm": 0.6614772081375122, + "learning_rate": 0.0001186036937155342, + "loss": 1.347, + "step": 7777 + }, + { + "epoch": 0.820464135021097, + "grad_norm": 0.6364094614982605, + "learning_rate": 0.00011846823391298628, + "loss": 1.3216, + "step": 7778 + }, + { + "epoch": 0.8205696202531646, + "grad_norm": 0.636979341506958, + "learning_rate": 0.00011833284487672185, + "loss": 1.3009, + "step": 7779 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.7619785070419312, + "learning_rate": 0.00011819752662191197, + "loss": 1.3441, + "step": 7780 + }, + { + "epoch": 0.8207805907172996, + "grad_norm": 0.6511399745941162, + "learning_rate": 0.00011806227916371964, + "loss": 1.365, + "step": 7781 + }, + { + "epoch": 0.8208860759493671, + "grad_norm": 0.5963508486747742, + "learning_rate": 0.0001179271025173001, + "loss": 1.3299, + "step": 7782 + }, + { + "epoch": 0.8209915611814346, + "grad_norm": 0.630648672580719, + "learning_rate": 0.00011779199669780046, + "loss": 1.3265, + "step": 7783 + }, + { + "epoch": 0.8210970464135021, + "grad_norm": 0.604856550693512, + "learning_rate": 0.00011765696172036006, + "loss": 1.3457, + "step": 7784 + }, + { + "epoch": 0.8212025316455697, + "grad_norm": 0.6244136095046997, + "learning_rate": 0.00011752199760011017, + "loss": 1.3192, + "step": 7785 + }, + { + "epoch": 0.8213080168776371, + "grad_norm": 0.7260312438011169, + "learning_rate": 0.00011738710435217431, + "loss": 1.36, + "step": 7786 + }, + { + "epoch": 0.8214135021097047, + "grad_norm": 0.642740786075592, + "learning_rate": 0.00011725228199166805, + "loss": 1.3556, + "step": 7787 + }, + { + "epoch": 0.8215189873417722, + "grad_norm": 0.6778746247291565, + "learning_rate": 0.00011711753053369861, + "loss": 1.323, + "step": 7788 + }, + { + "epoch": 0.8216244725738396, + "grad_norm": 0.6531384587287903, + "learning_rate": 0.00011698284999336578, + "loss": 1.3088, + "step": 7789 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.7571552395820618, + "learning_rate": 0.00011684824038576115, + "loss": 1.3242, + "step": 7790 + }, + { + "epoch": 0.8218354430379747, + "grad_norm": 0.6447141170501709, + "learning_rate": 0.00011671370172596829, + "loss": 1.3301, + "step": 7791 + }, + { + "epoch": 0.8219409282700422, + "grad_norm": 0.7119896411895752, + "learning_rate": 0.00011657923402906309, + "loss": 1.3737, + "step": 7792 + }, + { + "epoch": 0.8220464135021097, + "grad_norm": 0.7264859676361084, + "learning_rate": 0.000116444837310113, + "loss": 1.3481, + "step": 7793 + }, + { + "epoch": 0.8221518987341773, + "grad_norm": 0.6368402242660522, + "learning_rate": 0.00011631051158417828, + "loss": 1.2933, + "step": 7794 + }, + { + "epoch": 0.8222573839662447, + "grad_norm": 0.6393585801124573, + "learning_rate": 0.00011617625686631056, + "loss": 1.2948, + "step": 7795 + }, + { + "epoch": 0.8223628691983123, + "grad_norm": 0.6145182847976685, + "learning_rate": 0.00011604207317155383, + "loss": 1.327, + "step": 7796 + }, + { + "epoch": 0.8224683544303798, + "grad_norm": 0.6450685858726501, + "learning_rate": 0.00011590796051494395, + "loss": 1.3236, + "step": 7797 + }, + { + "epoch": 0.8225738396624472, + "grad_norm": 0.7416584491729736, + "learning_rate": 0.00011577391891150901, + "loss": 1.3135, + "step": 7798 + }, + { + "epoch": 0.8226793248945148, + "grad_norm": 0.6803736090660095, + "learning_rate": 0.00011563994837626898, + "loss": 1.3575, + "step": 7799 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.6094335913658142, + "learning_rate": 0.00011550604892423593, + "loss": 1.2932, + "step": 7800 + }, + { + "epoch": 0.8228902953586498, + "grad_norm": 0.885927140712738, + "learning_rate": 0.00011537222057041396, + "loss": 1.3341, + "step": 7801 + }, + { + "epoch": 0.8229957805907173, + "grad_norm": 0.6398571133613586, + "learning_rate": 0.00011523846332979907, + "loss": 1.3166, + "step": 7802 + }, + { + "epoch": 0.8231012658227848, + "grad_norm": 0.7678213119506836, + "learning_rate": 0.00011510477721737974, + "loss": 1.3109, + "step": 7803 + }, + { + "epoch": 0.8232067510548523, + "grad_norm": 0.6954907178878784, + "learning_rate": 0.00011497116224813604, + "loss": 1.3306, + "step": 7804 + }, + { + "epoch": 0.8233122362869199, + "grad_norm": 0.6940751075744629, + "learning_rate": 0.0001148376184370401, + "loss": 1.2967, + "step": 7805 + }, + { + "epoch": 0.8234177215189873, + "grad_norm": 0.6524605751037598, + "learning_rate": 0.00011470414579905617, + "loss": 1.3604, + "step": 7806 + }, + { + "epoch": 0.8235232067510548, + "grad_norm": 0.7012604475021362, + "learning_rate": 0.00011457074434914067, + "loss": 1.3343, + "step": 7807 + }, + { + "epoch": 0.8236286919831224, + "grad_norm": 0.7301698923110962, + "learning_rate": 0.00011443741410224173, + "loss": 1.2949, + "step": 7808 + }, + { + "epoch": 0.8237341772151898, + "grad_norm": 0.7267259955406189, + "learning_rate": 0.00011430415507329975, + "loss": 1.332, + "step": 7809 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.6040930151939392, + "learning_rate": 0.0001141709672772471, + "loss": 1.3092, + "step": 7810 + }, + { + "epoch": 0.8239451476793249, + "grad_norm": 0.6865214109420776, + "learning_rate": 0.00011403785072900793, + "loss": 1.2996, + "step": 7811 + }, + { + "epoch": 0.8240506329113924, + "grad_norm": 0.6670054793357849, + "learning_rate": 0.00011390480544349891, + "loss": 1.3303, + "step": 7812 + }, + { + "epoch": 0.8241561181434599, + "grad_norm": 0.7069182991981506, + "learning_rate": 0.00011377183143562833, + "loss": 1.3104, + "step": 7813 + }, + { + "epoch": 0.8242616033755275, + "grad_norm": 0.6266477704048157, + "learning_rate": 0.00011363892872029655, + "loss": 1.3751, + "step": 7814 + }, + { + "epoch": 0.8243670886075949, + "grad_norm": 0.6417503356933594, + "learning_rate": 0.00011350609731239597, + "loss": 1.3184, + "step": 7815 + }, + { + "epoch": 0.8244725738396624, + "grad_norm": 0.6138485074043274, + "learning_rate": 0.00011337333722681104, + "loss": 1.2968, + "step": 7816 + }, + { + "epoch": 0.82457805907173, + "grad_norm": 0.6645317077636719, + "learning_rate": 0.00011324064847841817, + "loss": 1.3149, + "step": 7817 + }, + { + "epoch": 0.8246835443037974, + "grad_norm": 0.6451671719551086, + "learning_rate": 0.00011310803108208581, + "loss": 1.3651, + "step": 7818 + }, + { + "epoch": 0.824789029535865, + "grad_norm": 0.6201272010803223, + "learning_rate": 0.00011297548505267424, + "loss": 1.3385, + "step": 7819 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.6228077411651611, + "learning_rate": 0.00011284301040503625, + "loss": 1.3642, + "step": 7820 + }, + { + "epoch": 0.825, + "grad_norm": 0.6902534365653992, + "learning_rate": 0.00011271060715401604, + "loss": 1.3388, + "step": 7821 + }, + { + "epoch": 0.8251054852320675, + "grad_norm": 0.6140959858894348, + "learning_rate": 0.00011257827531445017, + "loss": 1.3102, + "step": 7822 + }, + { + "epoch": 0.825210970464135, + "grad_norm": 0.6167933344841003, + "learning_rate": 0.00011244601490116693, + "loss": 1.3221, + "step": 7823 + }, + { + "epoch": 0.8253164556962025, + "grad_norm": 0.6492935419082642, + "learning_rate": 0.00011231382592898698, + "loss": 1.297, + "step": 7824 + }, + { + "epoch": 0.82542194092827, + "grad_norm": 0.6550188660621643, + "learning_rate": 0.00011218170841272254, + "loss": 1.3107, + "step": 7825 + }, + { + "epoch": 0.8255274261603376, + "grad_norm": 0.647946298122406, + "learning_rate": 0.00011204966236717811, + "loss": 1.3224, + "step": 7826 + }, + { + "epoch": 0.825632911392405, + "grad_norm": 0.6544481515884399, + "learning_rate": 0.0001119176878071502, + "loss": 1.356, + "step": 7827 + }, + { + "epoch": 0.8257383966244726, + "grad_norm": 0.6976693272590637, + "learning_rate": 0.00011178578474742687, + "loss": 1.3308, + "step": 7828 + }, + { + "epoch": 0.8258438818565401, + "grad_norm": 0.6685861945152283, + "learning_rate": 0.00011165395320278898, + "loss": 1.3468, + "step": 7829 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.6902564167976379, + "learning_rate": 0.0001115221931880088, + "loss": 1.2809, + "step": 7830 + }, + { + "epoch": 0.8260548523206751, + "grad_norm": 0.6304628252983093, + "learning_rate": 0.00011139050471785051, + "loss": 1.3202, + "step": 7831 + }, + { + "epoch": 0.8261603375527427, + "grad_norm": 0.625949501991272, + "learning_rate": 0.00011125888780707064, + "loss": 1.3027, + "step": 7832 + }, + { + "epoch": 0.8262658227848101, + "grad_norm": 0.6376245021820068, + "learning_rate": 0.00011112734247041739, + "loss": 1.3087, + "step": 7833 + }, + { + "epoch": 0.8263713080168776, + "grad_norm": 0.6726844310760498, + "learning_rate": 0.00011099586872263107, + "loss": 1.3112, + "step": 7834 + }, + { + "epoch": 0.8264767932489452, + "grad_norm": 0.6408639550209045, + "learning_rate": 0.00011086446657844412, + "loss": 1.2964, + "step": 7835 + }, + { + "epoch": 0.8265822784810126, + "grad_norm": 0.6562597155570984, + "learning_rate": 0.0001107331360525807, + "loss": 1.2692, + "step": 7836 + }, + { + "epoch": 0.8266877637130802, + "grad_norm": 0.6689485907554626, + "learning_rate": 0.00011060187715975686, + "loss": 1.339, + "step": 7837 + }, + { + "epoch": 0.8267932489451477, + "grad_norm": 0.7324913144111633, + "learning_rate": 0.00011047068991468118, + "loss": 1.3478, + "step": 7838 + }, + { + "epoch": 0.8268987341772152, + "grad_norm": 0.6108587384223938, + "learning_rate": 0.00011033957433205364, + "loss": 1.262, + "step": 7839 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.6319637298583984, + "learning_rate": 0.00011020853042656648, + "loss": 1.3141, + "step": 7840 + }, + { + "epoch": 0.8271097046413503, + "grad_norm": 0.6435394883155823, + "learning_rate": 0.00011007755821290371, + "loss": 1.3272, + "step": 7841 + }, + { + "epoch": 0.8272151898734177, + "grad_norm": 0.7159570455551147, + "learning_rate": 0.00010994665770574162, + "loss": 1.3237, + "step": 7842 + }, + { + "epoch": 0.8273206751054852, + "grad_norm": 0.6092506647109985, + "learning_rate": 0.000109815828919748, + "loss": 1.3328, + "step": 7843 + }, + { + "epoch": 0.8274261603375528, + "grad_norm": 0.6373780369758606, + "learning_rate": 0.00010968507186958302, + "loss": 1.2823, + "step": 7844 + }, + { + "epoch": 0.8275316455696202, + "grad_norm": 0.7015225887298584, + "learning_rate": 0.00010955438656989849, + "loss": 1.304, + "step": 7845 + }, + { + "epoch": 0.8276371308016878, + "grad_norm": 0.8436068892478943, + "learning_rate": 0.00010942377303533865, + "loss": 1.321, + "step": 7846 + }, + { + "epoch": 0.8277426160337553, + "grad_norm": 0.6392973065376282, + "learning_rate": 0.00010929323128053927, + "loss": 1.326, + "step": 7847 + }, + { + "epoch": 0.8278481012658228, + "grad_norm": 0.6644658446311951, + "learning_rate": 0.00010916276132012818, + "loss": 1.3362, + "step": 7848 + }, + { + "epoch": 0.8279535864978903, + "grad_norm": 0.6533156037330627, + "learning_rate": 0.00010903236316872514, + "loss": 1.2889, + "step": 7849 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.6457601189613342, + "learning_rate": 0.000108902036840942, + "loss": 1.3106, + "step": 7850 + }, + { + "epoch": 0.8281645569620253, + "grad_norm": 0.6881588101387024, + "learning_rate": 0.00010877178235138239, + "loss": 1.3357, + "step": 7851 + }, + { + "epoch": 0.8282700421940928, + "grad_norm": 0.6616324782371521, + "learning_rate": 0.00010864159971464205, + "loss": 1.288, + "step": 7852 + }, + { + "epoch": 0.8283755274261604, + "grad_norm": 0.8198772668838501, + "learning_rate": 0.00010851148894530858, + "loss": 1.3178, + "step": 7853 + }, + { + "epoch": 0.8284810126582278, + "grad_norm": 0.6676099300384521, + "learning_rate": 0.00010838145005796138, + "loss": 1.3327, + "step": 7854 + }, + { + "epoch": 0.8285864978902954, + "grad_norm": 0.6334667801856995, + "learning_rate": 0.00010825148306717222, + "loss": 1.3054, + "step": 7855 + }, + { + "epoch": 0.8286919831223629, + "grad_norm": 0.6204498410224915, + "learning_rate": 0.00010812158798750438, + "loss": 1.3017, + "step": 7856 + }, + { + "epoch": 0.8287974683544304, + "grad_norm": 0.6469302177429199, + "learning_rate": 0.00010799176483351337, + "loss": 1.2973, + "step": 7857 + }, + { + "epoch": 0.8289029535864979, + "grad_norm": 0.8056052923202515, + "learning_rate": 0.00010786201361974646, + "loss": 1.288, + "step": 7858 + }, + { + "epoch": 0.8290084388185655, + "grad_norm": 0.6437684297561646, + "learning_rate": 0.00010773233436074287, + "loss": 1.2857, + "step": 7859 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.6234626173973083, + "learning_rate": 0.00010760272707103389, + "loss": 1.2933, + "step": 7860 + }, + { + "epoch": 0.8292194092827004, + "grad_norm": 0.6609329581260681, + "learning_rate": 0.00010747319176514264, + "loss": 1.3292, + "step": 7861 + }, + { + "epoch": 0.829324894514768, + "grad_norm": 0.6779692769050598, + "learning_rate": 0.00010734372845758411, + "loss": 1.3405, + "step": 7862 + }, + { + "epoch": 0.8294303797468354, + "grad_norm": 0.6500648856163025, + "learning_rate": 0.00010721433716286527, + "loss": 1.3099, + "step": 7863 + }, + { + "epoch": 0.829535864978903, + "grad_norm": 0.6396706104278564, + "learning_rate": 0.00010708501789548527, + "loss": 1.3223, + "step": 7864 + }, + { + "epoch": 0.8296413502109705, + "grad_norm": 0.6736265420913696, + "learning_rate": 0.00010695577066993495, + "loss": 1.2736, + "step": 7865 + }, + { + "epoch": 0.829746835443038, + "grad_norm": 0.6415369510650635, + "learning_rate": 0.00010682659550069704, + "loss": 1.3476, + "step": 7866 + }, + { + "epoch": 0.8298523206751055, + "grad_norm": 0.5986819863319397, + "learning_rate": 0.00010669749240224621, + "loss": 1.2833, + "step": 7867 + }, + { + "epoch": 0.8299578059071729, + "grad_norm": 0.6482294797897339, + "learning_rate": 0.00010656846138904916, + "loss": 1.2879, + "step": 7868 + }, + { + "epoch": 0.8300632911392405, + "grad_norm": 0.6545059084892273, + "learning_rate": 0.00010643950247556447, + "loss": 1.3477, + "step": 7869 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.6517961025238037, + "learning_rate": 0.00010631061567624259, + "loss": 1.3209, + "step": 7870 + }, + { + "epoch": 0.8302742616033755, + "grad_norm": 0.6935025453567505, + "learning_rate": 0.00010618180100552596, + "loss": 1.3783, + "step": 7871 + }, + { + "epoch": 0.830379746835443, + "grad_norm": 0.6875302791595459, + "learning_rate": 0.00010605305847784871, + "loss": 1.334, + "step": 7872 + }, + { + "epoch": 0.8304852320675106, + "grad_norm": 0.6229235529899597, + "learning_rate": 0.00010592438810763747, + "loss": 1.2977, + "step": 7873 + }, + { + "epoch": 0.830590717299578, + "grad_norm": 0.6481280326843262, + "learning_rate": 0.00010579578990931019, + "loss": 1.3438, + "step": 7874 + }, + { + "epoch": 0.8306962025316456, + "grad_norm": 0.6266452670097351, + "learning_rate": 0.00010566726389727693, + "loss": 1.3281, + "step": 7875 + }, + { + "epoch": 0.8308016877637131, + "grad_norm": 0.6446281671524048, + "learning_rate": 0.00010553881008593969, + "loss": 1.3454, + "step": 7876 + }, + { + "epoch": 0.8309071729957805, + "grad_norm": 0.6630066633224487, + "learning_rate": 0.00010541042848969235, + "loss": 1.3484, + "step": 7877 + }, + { + "epoch": 0.8310126582278481, + "grad_norm": 0.7293335199356079, + "learning_rate": 0.00010528211912292066, + "loss": 1.3471, + "step": 7878 + }, + { + "epoch": 0.8311181434599156, + "grad_norm": 0.6108213663101196, + "learning_rate": 0.00010515388200000245, + "loss": 1.3368, + "step": 7879 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.6602609753608704, + "learning_rate": 0.00010502571713530706, + "loss": 1.312, + "step": 7880 + }, + { + "epoch": 0.8313291139240506, + "grad_norm": 0.6387341022491455, + "learning_rate": 0.00010489762454319634, + "loss": 1.3286, + "step": 7881 + }, + { + "epoch": 0.8314345991561182, + "grad_norm": 0.6274503469467163, + "learning_rate": 0.00010476960423802356, + "loss": 1.3215, + "step": 7882 + }, + { + "epoch": 0.8315400843881856, + "grad_norm": 0.6332539319992065, + "learning_rate": 0.00010464165623413408, + "loss": 1.3143, + "step": 7883 + }, + { + "epoch": 0.8316455696202532, + "grad_norm": 0.7096864581108093, + "learning_rate": 0.00010451378054586508, + "loss": 1.3668, + "step": 7884 + }, + { + "epoch": 0.8317510548523207, + "grad_norm": 0.6246122121810913, + "learning_rate": 0.00010438597718754561, + "loss": 1.3569, + "step": 7885 + }, + { + "epoch": 0.8318565400843881, + "grad_norm": 0.6963883638381958, + "learning_rate": 0.00010425824617349671, + "loss": 1.353, + "step": 7886 + }, + { + "epoch": 0.8319620253164557, + "grad_norm": 0.6530760526657104, + "learning_rate": 0.00010413058751803129, + "loss": 1.3444, + "step": 7887 + }, + { + "epoch": 0.8320675105485232, + "grad_norm": 0.6550817489624023, + "learning_rate": 0.0001040030012354542, + "loss": 1.308, + "step": 7888 + }, + { + "epoch": 0.8321729957805907, + "grad_norm": 0.6187398433685303, + "learning_rate": 0.00010387548734006195, + "loss": 1.3254, + "step": 7889 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.6349020004272461, + "learning_rate": 0.00010374804584614308, + "loss": 1.3343, + "step": 7890 + }, + { + "epoch": 0.8323839662447258, + "grad_norm": 0.6020061373710632, + "learning_rate": 0.00010362067676797837, + "loss": 1.2919, + "step": 7891 + }, + { + "epoch": 0.8324894514767932, + "grad_norm": 0.631738543510437, + "learning_rate": 0.00010349338011983998, + "loss": 1.283, + "step": 7892 + }, + { + "epoch": 0.8325949367088608, + "grad_norm": 0.6119332313537598, + "learning_rate": 0.00010336615591599204, + "loss": 1.318, + "step": 7893 + }, + { + "epoch": 0.8327004219409283, + "grad_norm": 0.636372447013855, + "learning_rate": 0.00010323900417069079, + "loss": 1.2928, + "step": 7894 + }, + { + "epoch": 0.8328059071729957, + "grad_norm": 0.6404855847358704, + "learning_rate": 0.00010311192489818421, + "loss": 1.359, + "step": 7895 + }, + { + "epoch": 0.8329113924050633, + "grad_norm": 0.6175559163093567, + "learning_rate": 0.0001029849181127121, + "loss": 1.3191, + "step": 7896 + }, + { + "epoch": 0.8330168776371308, + "grad_norm": 0.6238675117492676, + "learning_rate": 0.00010285798382850614, + "loss": 1.3073, + "step": 7897 + }, + { + "epoch": 0.8331223628691983, + "grad_norm": 0.6552457809448242, + "learning_rate": 0.00010273112205979012, + "loss": 1.3248, + "step": 7898 + }, + { + "epoch": 0.8332278481012658, + "grad_norm": 0.7266510725021362, + "learning_rate": 0.00010260433282077944, + "loss": 1.3082, + "step": 7899 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.623049795627594, + "learning_rate": 0.00010247761612568129, + "loss": 1.34, + "step": 7900 + }, + { + "epoch": 0.8334388185654008, + "grad_norm": 0.6293635368347168, + "learning_rate": 0.00010235097198869525, + "loss": 1.3198, + "step": 7901 + }, + { + "epoch": 0.8335443037974684, + "grad_norm": 0.6762161254882812, + "learning_rate": 0.0001022244004240123, + "loss": 1.336, + "step": 7902 + }, + { + "epoch": 0.8336497890295359, + "grad_norm": 0.7965022921562195, + "learning_rate": 0.00010209790144581533, + "loss": 1.2836, + "step": 7903 + }, + { + "epoch": 0.8337552742616033, + "grad_norm": 0.6252793073654175, + "learning_rate": 0.00010197147506827925, + "loss": 1.2982, + "step": 7904 + }, + { + "epoch": 0.8338607594936709, + "grad_norm": 0.6369488835334778, + "learning_rate": 0.00010184512130557074, + "loss": 1.3363, + "step": 7905 + }, + { + "epoch": 0.8339662447257384, + "grad_norm": 0.7478175759315491, + "learning_rate": 0.0001017188401718484, + "loss": 1.3418, + "step": 7906 + }, + { + "epoch": 0.8340717299578059, + "grad_norm": 0.7162218689918518, + "learning_rate": 0.00010159263168126265, + "loss": 1.3239, + "step": 7907 + }, + { + "epoch": 0.8341772151898734, + "grad_norm": 0.8216306567192078, + "learning_rate": 0.00010146649584795575, + "loss": 1.3355, + "step": 7908 + }, + { + "epoch": 0.834282700421941, + "grad_norm": 0.6707173585891724, + "learning_rate": 0.00010134043268606191, + "loss": 1.3283, + "step": 7909 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.8487287759780884, + "learning_rate": 0.0001012144422097069, + "loss": 1.3394, + "step": 7910 + }, + { + "epoch": 0.834493670886076, + "grad_norm": 0.7475934028625488, + "learning_rate": 0.00010108852443300895, + "loss": 1.3193, + "step": 7911 + }, + { + "epoch": 0.8345991561181435, + "grad_norm": 0.6330104470252991, + "learning_rate": 0.00010096267937007758, + "loss": 1.3165, + "step": 7912 + }, + { + "epoch": 0.8347046413502109, + "grad_norm": 0.628167450428009, + "learning_rate": 0.00010083690703501445, + "loss": 1.3276, + "step": 7913 + }, + { + "epoch": 0.8348101265822785, + "grad_norm": 0.6158461570739746, + "learning_rate": 0.00010071120744191284, + "loss": 1.2826, + "step": 7914 + }, + { + "epoch": 0.834915611814346, + "grad_norm": 0.6936129331588745, + "learning_rate": 0.0001005855806048581, + "loss": 1.3523, + "step": 7915 + }, + { + "epoch": 0.8350210970464135, + "grad_norm": 0.6728137135505676, + "learning_rate": 0.00010046002653792726, + "loss": 1.3214, + "step": 7916 + }, + { + "epoch": 0.835126582278481, + "grad_norm": 0.6616618633270264, + "learning_rate": 0.00010033454525518945, + "loss": 1.3177, + "step": 7917 + }, + { + "epoch": 0.8352320675105486, + "grad_norm": 0.6379271149635315, + "learning_rate": 0.0001002091367707053, + "loss": 1.3299, + "step": 7918 + }, + { + "epoch": 0.835337552742616, + "grad_norm": 0.6631330847740173, + "learning_rate": 0.00010008380109852752, + "loss": 1.3102, + "step": 7919 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.8227267265319824, + "learning_rate": 9.995853825270052e-05, + "loss": 1.3266, + "step": 7920 + }, + { + "epoch": 0.8355485232067511, + "grad_norm": 0.7087368965148926, + "learning_rate": 9.983334824726081e-05, + "loss": 1.3407, + "step": 7921 + }, + { + "epoch": 0.8356540084388185, + "grad_norm": 0.7116377353668213, + "learning_rate": 9.970823109623644e-05, + "loss": 1.3328, + "step": 7922 + }, + { + "epoch": 0.8357594936708861, + "grad_norm": 0.6622390747070312, + "learning_rate": 9.958318681364745e-05, + "loss": 1.3503, + "step": 7923 + }, + { + "epoch": 0.8358649789029536, + "grad_norm": 0.6780175566673279, + "learning_rate": 9.94582154135056e-05, + "loss": 1.3359, + "step": 7924 + }, + { + "epoch": 0.8359704641350211, + "grad_norm": 0.6212304830551147, + "learning_rate": 9.933331690981473e-05, + "loss": 1.3243, + "step": 7925 + }, + { + "epoch": 0.8360759493670886, + "grad_norm": 0.635110080242157, + "learning_rate": 9.920849131657011e-05, + "loss": 1.3539, + "step": 7926 + }, + { + "epoch": 0.8361814345991562, + "grad_norm": 0.6574867963790894, + "learning_rate": 9.908373864775915e-05, + "loss": 1.298, + "step": 7927 + }, + { + "epoch": 0.8362869198312236, + "grad_norm": 0.6383175849914551, + "learning_rate": 9.895905891736118e-05, + "loss": 1.3365, + "step": 7928 + }, + { + "epoch": 0.8363924050632912, + "grad_norm": 0.6270267367362976, + "learning_rate": 9.883445213934675e-05, + "loss": 1.33, + "step": 7929 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.6037085056304932, + "learning_rate": 9.870991832767919e-05, + "loss": 1.311, + "step": 7930 + }, + { + "epoch": 0.8366033755274261, + "grad_norm": 0.6283883452415466, + "learning_rate": 9.858545749631287e-05, + "loss": 1.3264, + "step": 7931 + }, + { + "epoch": 0.8367088607594937, + "grad_norm": 0.6789548397064209, + "learning_rate": 9.846106965919427e-05, + "loss": 1.3215, + "step": 7932 + }, + { + "epoch": 0.8368143459915611, + "grad_norm": 0.6566210389137268, + "learning_rate": 9.833675483026175e-05, + "loss": 1.3345, + "step": 7933 + }, + { + "epoch": 0.8369198312236287, + "grad_norm": 0.6183567643165588, + "learning_rate": 9.821251302344525e-05, + "loss": 1.3267, + "step": 7934 + }, + { + "epoch": 0.8370253164556962, + "grad_norm": 0.6329095363616943, + "learning_rate": 9.80883442526668e-05, + "loss": 1.3098, + "step": 7935 + }, + { + "epoch": 0.8371308016877637, + "grad_norm": 0.6308715343475342, + "learning_rate": 9.79642485318401e-05, + "loss": 1.3241, + "step": 7936 + }, + { + "epoch": 0.8372362869198312, + "grad_norm": 0.6548170447349548, + "learning_rate": 9.78402258748708e-05, + "loss": 1.3068, + "step": 7937 + }, + { + "epoch": 0.8373417721518988, + "grad_norm": 0.6278256177902222, + "learning_rate": 9.771627629565599e-05, + "loss": 1.3233, + "step": 7938 + }, + { + "epoch": 0.8374472573839662, + "grad_norm": 0.6385871171951294, + "learning_rate": 9.759239980808494e-05, + "loss": 1.3305, + "step": 7939 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.6007499694824219, + "learning_rate": 9.746859642603884e-05, + "loss": 1.3409, + "step": 7940 + }, + { + "epoch": 0.8376582278481013, + "grad_norm": 0.6410555243492126, + "learning_rate": 9.734486616339027e-05, + "loss": 1.3358, + "step": 7941 + }, + { + "epoch": 0.8377637130801687, + "grad_norm": 0.6649725437164307, + "learning_rate": 9.722120903400392e-05, + "loss": 1.3454, + "step": 7942 + }, + { + "epoch": 0.8378691983122363, + "grad_norm": 0.6316155195236206, + "learning_rate": 9.709762505173617e-05, + "loss": 1.3289, + "step": 7943 + }, + { + "epoch": 0.8379746835443038, + "grad_norm": 0.6458197236061096, + "learning_rate": 9.697411423043521e-05, + "loss": 1.3314, + "step": 7944 + }, + { + "epoch": 0.8380801687763713, + "grad_norm": 0.6376007199287415, + "learning_rate": 9.685067658394095e-05, + "loss": 1.3041, + "step": 7945 + }, + { + "epoch": 0.8381856540084388, + "grad_norm": 0.614666759967804, + "learning_rate": 9.672731212608535e-05, + "loss": 1.3339, + "step": 7946 + }, + { + "epoch": 0.8382911392405064, + "grad_norm": 0.6385905146598816, + "learning_rate": 9.660402087069192e-05, + "loss": 1.3355, + "step": 7947 + }, + { + "epoch": 0.8383966244725738, + "grad_norm": 0.6118661165237427, + "learning_rate": 9.648080283157604e-05, + "loss": 1.2952, + "step": 7948 + }, + { + "epoch": 0.8385021097046413, + "grad_norm": 0.6316489577293396, + "learning_rate": 9.635765802254482e-05, + "loss": 1.3312, + "step": 7949 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.637419581413269, + "learning_rate": 9.623458645739755e-05, + "loss": 1.3306, + "step": 7950 + }, + { + "epoch": 0.8387130801687763, + "grad_norm": 0.6748693585395813, + "learning_rate": 9.611158814992479e-05, + "loss": 1.337, + "step": 7951 + }, + { + "epoch": 0.8388185654008439, + "grad_norm": 0.6608666181564331, + "learning_rate": 9.598866311390919e-05, + "loss": 1.2771, + "step": 7952 + }, + { + "epoch": 0.8389240506329114, + "grad_norm": 0.6456429958343506, + "learning_rate": 9.586581136312506e-05, + "loss": 1.33, + "step": 7953 + }, + { + "epoch": 0.8390295358649789, + "grad_norm": 0.6079965829849243, + "learning_rate": 9.574303291133862e-05, + "loss": 1.2795, + "step": 7954 + }, + { + "epoch": 0.8391350210970464, + "grad_norm": 0.7215849757194519, + "learning_rate": 9.562032777230772e-05, + "loss": 1.2958, + "step": 7955 + }, + { + "epoch": 0.839240506329114, + "grad_norm": 0.6959405541419983, + "learning_rate": 9.549769595978211e-05, + "loss": 1.3087, + "step": 7956 + }, + { + "epoch": 0.8393459915611814, + "grad_norm": 0.6355606317520142, + "learning_rate": 9.537513748750337e-05, + "loss": 1.3242, + "step": 7957 + }, + { + "epoch": 0.8394514767932489, + "grad_norm": 0.6523191928863525, + "learning_rate": 9.525265236920452e-05, + "loss": 1.3335, + "step": 7958 + }, + { + "epoch": 0.8395569620253165, + "grad_norm": 0.6785088777542114, + "learning_rate": 9.5130240618611e-05, + "loss": 1.3393, + "step": 7959 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.6159928441047668, + "learning_rate": 9.50079022494395e-05, + "loss": 1.3159, + "step": 7960 + }, + { + "epoch": 0.8397679324894515, + "grad_norm": 0.6413474678993225, + "learning_rate": 9.488563727539864e-05, + "loss": 1.2797, + "step": 7961 + }, + { + "epoch": 0.839873417721519, + "grad_norm": 0.6608918309211731, + "learning_rate": 9.47634457101888e-05, + "loss": 1.3118, + "step": 7962 + }, + { + "epoch": 0.8399789029535865, + "grad_norm": 0.65125972032547, + "learning_rate": 9.464132756750218e-05, + "loss": 1.3129, + "step": 7963 + }, + { + "epoch": 0.840084388185654, + "grad_norm": 0.6289554834365845, + "learning_rate": 9.451928286102277e-05, + "loss": 1.3318, + "step": 7964 + }, + { + "epoch": 0.8401898734177216, + "grad_norm": 0.6333049535751343, + "learning_rate": 9.439731160442619e-05, + "loss": 1.3174, + "step": 7965 + }, + { + "epoch": 0.840295358649789, + "grad_norm": 0.6640065312385559, + "learning_rate": 9.427541381138002e-05, + "loss": 1.3241, + "step": 7966 + }, + { + "epoch": 0.8404008438818565, + "grad_norm": 0.6264691948890686, + "learning_rate": 9.415358949554326e-05, + "loss": 1.3239, + "step": 7967 + }, + { + "epoch": 0.8405063291139241, + "grad_norm": 0.7183279395103455, + "learning_rate": 9.40318386705673e-05, + "loss": 1.3257, + "step": 7968 + }, + { + "epoch": 0.8406118143459915, + "grad_norm": 0.6375385522842407, + "learning_rate": 9.391016135009484e-05, + "loss": 1.2857, + "step": 7969 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.6852168440818787, + "learning_rate": 9.378855754776028e-05, + "loss": 1.3086, + "step": 7970 + }, + { + "epoch": 0.8408227848101266, + "grad_norm": 0.8503065705299377, + "learning_rate": 9.366702727719006e-05, + "loss": 1.3368, + "step": 7971 + }, + { + "epoch": 0.8409282700421941, + "grad_norm": 0.7678740620613098, + "learning_rate": 9.354557055200214e-05, + "loss": 1.336, + "step": 7972 + }, + { + "epoch": 0.8410337552742616, + "grad_norm": 0.6252720952033997, + "learning_rate": 9.342418738580652e-05, + "loss": 1.2986, + "step": 7973 + }, + { + "epoch": 0.8411392405063292, + "grad_norm": 0.6351887583732605, + "learning_rate": 9.330287779220459e-05, + "loss": 1.2765, + "step": 7974 + }, + { + "epoch": 0.8412447257383966, + "grad_norm": 0.859125018119812, + "learning_rate": 9.31816417847898e-05, + "loss": 1.3302, + "step": 7975 + }, + { + "epoch": 0.8413502109704641, + "grad_norm": 0.6613227725028992, + "learning_rate": 9.306047937714713e-05, + "loss": 1.331, + "step": 7976 + }, + { + "epoch": 0.8414556962025317, + "grad_norm": 0.6619693040847778, + "learning_rate": 9.29393905828537e-05, + "loss": 1.2951, + "step": 7977 + }, + { + "epoch": 0.8415611814345991, + "grad_norm": 0.6117024421691895, + "learning_rate": 9.281837541547791e-05, + "loss": 1.3196, + "step": 7978 + }, + { + "epoch": 0.8416666666666667, + "grad_norm": 0.6391222476959229, + "learning_rate": 9.269743388858019e-05, + "loss": 1.3145, + "step": 7979 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.6247634291648865, + "learning_rate": 9.257656601571266e-05, + "loss": 1.2829, + "step": 7980 + }, + { + "epoch": 0.8418776371308017, + "grad_norm": 0.6410990357398987, + "learning_rate": 9.245577181041901e-05, + "loss": 1.3142, + "step": 7981 + }, + { + "epoch": 0.8419831223628692, + "grad_norm": 0.6711198091506958, + "learning_rate": 9.233505128623499e-05, + "loss": 1.2942, + "step": 7982 + }, + { + "epoch": 0.8420886075949368, + "grad_norm": 0.6429935693740845, + "learning_rate": 9.221440445668794e-05, + "loss": 1.3429, + "step": 7983 + }, + { + "epoch": 0.8421940928270042, + "grad_norm": 0.7622426748275757, + "learning_rate": 9.209383133529664e-05, + "loss": 1.2895, + "step": 7984 + }, + { + "epoch": 0.8422995780590717, + "grad_norm": 0.6590160727500916, + "learning_rate": 9.197333193557237e-05, + "loss": 1.353, + "step": 7985 + }, + { + "epoch": 0.8424050632911393, + "grad_norm": 0.6121687889099121, + "learning_rate": 9.185290627101747e-05, + "loss": 1.3323, + "step": 7986 + }, + { + "epoch": 0.8425105485232067, + "grad_norm": 0.8075000047683716, + "learning_rate": 9.173255435512617e-05, + "loss": 1.3466, + "step": 7987 + }, + { + "epoch": 0.8426160337552743, + "grad_norm": 0.7449835538864136, + "learning_rate": 9.161227620138468e-05, + "loss": 1.3606, + "step": 7988 + }, + { + "epoch": 0.8427215189873418, + "grad_norm": 0.6414071321487427, + "learning_rate": 9.149207182327054e-05, + "loss": 1.3025, + "step": 7989 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.6928707957267761, + "learning_rate": 9.137194123425349e-05, + "loss": 1.3179, + "step": 7990 + }, + { + "epoch": 0.8429324894514768, + "grad_norm": 0.6178991198539734, + "learning_rate": 9.125188444779458e-05, + "loss": 1.3206, + "step": 7991 + }, + { + "epoch": 0.8430379746835444, + "grad_norm": 0.6170142292976379, + "learning_rate": 9.113190147734682e-05, + "loss": 1.2984, + "step": 7992 + }, + { + "epoch": 0.8431434599156118, + "grad_norm": 0.6606640815734863, + "learning_rate": 9.101199233635477e-05, + "loss": 1.3432, + "step": 7993 + }, + { + "epoch": 0.8432489451476793, + "grad_norm": 0.8581976890563965, + "learning_rate": 9.089215703825519e-05, + "loss": 1.3063, + "step": 7994 + }, + { + "epoch": 0.8433544303797469, + "grad_norm": 0.6447834372520447, + "learning_rate": 9.077239559647591e-05, + "loss": 1.3013, + "step": 7995 + }, + { + "epoch": 0.8434599156118143, + "grad_norm": 0.6138639450073242, + "learning_rate": 9.065270802443704e-05, + "loss": 1.347, + "step": 7996 + }, + { + "epoch": 0.8435654008438819, + "grad_norm": 0.5997762084007263, + "learning_rate": 9.053309433554993e-05, + "loss": 1.3265, + "step": 7997 + }, + { + "epoch": 0.8436708860759494, + "grad_norm": 0.6305940747261047, + "learning_rate": 9.041355454321803e-05, + "loss": 1.321, + "step": 7998 + }, + { + "epoch": 0.8437763713080169, + "grad_norm": 0.661737322807312, + "learning_rate": 9.029408866083638e-05, + "loss": 1.3227, + "step": 7999 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.649282693862915, + "learning_rate": 9.017469670179168e-05, + "loss": 1.3091, + "step": 8000 + }, + { + "epoch": 0.8439873417721518, + "grad_norm": 0.6479360461235046, + "learning_rate": 9.00553786794624e-05, + "loss": 1.33, + "step": 8001 + }, + { + "epoch": 0.8440928270042194, + "grad_norm": 0.6483389139175415, + "learning_rate": 8.99361346072185e-05, + "loss": 1.3553, + "step": 8002 + }, + { + "epoch": 0.8441983122362869, + "grad_norm": 0.6067954301834106, + "learning_rate": 8.98169644984223e-05, + "loss": 1.3397, + "step": 8003 + }, + { + "epoch": 0.8443037974683544, + "grad_norm": 0.626388669013977, + "learning_rate": 8.96978683664272e-05, + "loss": 1.2986, + "step": 8004 + }, + { + "epoch": 0.8444092827004219, + "grad_norm": 0.6658235788345337, + "learning_rate": 8.957884622457854e-05, + "loss": 1.3136, + "step": 8005 + }, + { + "epoch": 0.8445147679324895, + "grad_norm": 0.7044573426246643, + "learning_rate": 8.945989808621321e-05, + "loss": 1.3229, + "step": 8006 + }, + { + "epoch": 0.8446202531645569, + "grad_norm": 0.6155751347541809, + "learning_rate": 8.934102396466016e-05, + "loss": 1.367, + "step": 8007 + }, + { + "epoch": 0.8447257383966245, + "grad_norm": 0.7085239887237549, + "learning_rate": 8.92222238732397e-05, + "loss": 1.307, + "step": 8008 + }, + { + "epoch": 0.844831223628692, + "grad_norm": 0.623604953289032, + "learning_rate": 8.910349782526394e-05, + "loss": 1.2905, + "step": 8009 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.6328223943710327, + "learning_rate": 8.898484583403668e-05, + "loss": 1.3109, + "step": 8010 + }, + { + "epoch": 0.845042194092827, + "grad_norm": 0.7159547805786133, + "learning_rate": 8.886626791285369e-05, + "loss": 1.3243, + "step": 8011 + }, + { + "epoch": 0.8451476793248945, + "grad_norm": 0.6276882886886597, + "learning_rate": 8.874776407500206e-05, + "loss": 1.3208, + "step": 8012 + }, + { + "epoch": 0.845253164556962, + "grad_norm": 0.6330004930496216, + "learning_rate": 8.86293343337608e-05, + "loss": 1.3153, + "step": 8013 + }, + { + "epoch": 0.8453586497890295, + "grad_norm": 0.6467596888542175, + "learning_rate": 8.851097870240051e-05, + "loss": 1.3197, + "step": 8014 + }, + { + "epoch": 0.8454641350210971, + "grad_norm": 0.6776340007781982, + "learning_rate": 8.839269719418361e-05, + "loss": 1.3072, + "step": 8015 + }, + { + "epoch": 0.8455696202531645, + "grad_norm": 0.6675223708152771, + "learning_rate": 8.827448982236397e-05, + "loss": 1.3024, + "step": 8016 + }, + { + "epoch": 0.8456751054852321, + "grad_norm": 0.6158689856529236, + "learning_rate": 8.815635660018742e-05, + "loss": 1.2708, + "step": 8017 + }, + { + "epoch": 0.8457805907172996, + "grad_norm": 0.6191571354866028, + "learning_rate": 8.803829754089138e-05, + "loss": 1.3513, + "step": 8018 + }, + { + "epoch": 0.845886075949367, + "grad_norm": 0.6706314086914062, + "learning_rate": 8.792031265770475e-05, + "loss": 1.3348, + "step": 8019 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.6976965665817261, + "learning_rate": 8.780240196384873e-05, + "loss": 1.3247, + "step": 8020 + }, + { + "epoch": 0.8460970464135021, + "grad_norm": 0.6795482635498047, + "learning_rate": 8.768456547253556e-05, + "loss": 1.3196, + "step": 8021 + }, + { + "epoch": 0.8462025316455696, + "grad_norm": 0.6600016355514526, + "learning_rate": 8.756680319696945e-05, + "loss": 1.3011, + "step": 8022 + }, + { + "epoch": 0.8463080168776371, + "grad_norm": 0.6321180462837219, + "learning_rate": 8.744911515034623e-05, + "loss": 1.2887, + "step": 8023 + }, + { + "epoch": 0.8464135021097047, + "grad_norm": 0.6778333783149719, + "learning_rate": 8.733150134585338e-05, + "loss": 1.3579, + "step": 8024 + }, + { + "epoch": 0.8465189873417721, + "grad_norm": 0.6908684968948364, + "learning_rate": 8.721396179667019e-05, + "loss": 1.344, + "step": 8025 + }, + { + "epoch": 0.8466244725738397, + "grad_norm": 0.6608791947364807, + "learning_rate": 8.709649651596752e-05, + "loss": 1.312, + "step": 8026 + }, + { + "epoch": 0.8467299578059072, + "grad_norm": 0.6592254042625427, + "learning_rate": 8.697910551690802e-05, + "loss": 1.2983, + "step": 8027 + }, + { + "epoch": 0.8468354430379746, + "grad_norm": 0.7024664282798767, + "learning_rate": 8.686178881264568e-05, + "loss": 1.2946, + "step": 8028 + }, + { + "epoch": 0.8469409282700422, + "grad_norm": 0.6782184839248657, + "learning_rate": 8.67445464163267e-05, + "loss": 1.3408, + "step": 8029 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.6342724561691284, + "learning_rate": 8.662737834108861e-05, + "loss": 1.3121, + "step": 8030 + }, + { + "epoch": 0.8471518987341772, + "grad_norm": 0.6395836472511292, + "learning_rate": 8.651028460006072e-05, + "loss": 1.3378, + "step": 8031 + }, + { + "epoch": 0.8472573839662447, + "grad_norm": 0.6339927315711975, + "learning_rate": 8.639326520636387e-05, + "loss": 1.2815, + "step": 8032 + }, + { + "epoch": 0.8473628691983123, + "grad_norm": 0.6986925005912781, + "learning_rate": 8.627632017311065e-05, + "loss": 1.3421, + "step": 8033 + }, + { + "epoch": 0.8474683544303797, + "grad_norm": 0.6244841814041138, + "learning_rate": 8.615944951340543e-05, + "loss": 1.2889, + "step": 8034 + }, + { + "epoch": 0.8475738396624473, + "grad_norm": 0.6443933248519897, + "learning_rate": 8.604265324034405e-05, + "loss": 1.3257, + "step": 8035 + }, + { + "epoch": 0.8476793248945148, + "grad_norm": 0.6198188662528992, + "learning_rate": 8.592593136701404e-05, + "loss": 1.331, + "step": 8036 + }, + { + "epoch": 0.8477848101265822, + "grad_norm": 0.6760772466659546, + "learning_rate": 8.580928390649496e-05, + "loss": 1.3216, + "step": 8037 + }, + { + "epoch": 0.8478902953586498, + "grad_norm": 0.6330710649490356, + "learning_rate": 8.569271087185756e-05, + "loss": 1.3594, + "step": 8038 + }, + { + "epoch": 0.8479957805907173, + "grad_norm": 0.6384992599487305, + "learning_rate": 8.557621227616444e-05, + "loss": 1.3599, + "step": 8039 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.6463088393211365, + "learning_rate": 8.545978813246987e-05, + "loss": 1.3306, + "step": 8040 + }, + { + "epoch": 0.8482067510548523, + "grad_norm": 0.7008126378059387, + "learning_rate": 8.53434384538197e-05, + "loss": 1.3073, + "step": 8041 + }, + { + "epoch": 0.8483122362869199, + "grad_norm": 0.6348085403442383, + "learning_rate": 8.522716325325155e-05, + "loss": 1.2748, + "step": 8042 + }, + { + "epoch": 0.8484177215189873, + "grad_norm": 0.6138489842414856, + "learning_rate": 8.51109625437946e-05, + "loss": 1.3534, + "step": 8043 + }, + { + "epoch": 0.8485232067510549, + "grad_norm": 0.6611854434013367, + "learning_rate": 8.499483633846977e-05, + "loss": 1.3084, + "step": 8044 + }, + { + "epoch": 0.8486286919831224, + "grad_norm": 0.6463767886161804, + "learning_rate": 8.48787846502893e-05, + "loss": 1.2954, + "step": 8045 + }, + { + "epoch": 0.8487341772151898, + "grad_norm": 0.6280196309089661, + "learning_rate": 8.476280749225782e-05, + "loss": 1.304, + "step": 8046 + }, + { + "epoch": 0.8488396624472574, + "grad_norm": 0.6288952827453613, + "learning_rate": 8.464690487737098e-05, + "loss": 1.2839, + "step": 8047 + }, + { + "epoch": 0.8489451476793249, + "grad_norm": 0.6442685723304749, + "learning_rate": 8.453107681861616e-05, + "loss": 1.287, + "step": 8048 + }, + { + "epoch": 0.8490506329113924, + "grad_norm": 0.6382757425308228, + "learning_rate": 8.441532332897248e-05, + "loss": 1.3576, + "step": 8049 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.6194390058517456, + "learning_rate": 8.429964442141072e-05, + "loss": 1.328, + "step": 8050 + }, + { + "epoch": 0.8492616033755275, + "grad_norm": 0.6377845406532288, + "learning_rate": 8.418404010889336e-05, + "loss": 1.3301, + "step": 8051 + }, + { + "epoch": 0.8493670886075949, + "grad_norm": 0.6198787093162537, + "learning_rate": 8.406851040437426e-05, + "loss": 1.3024, + "step": 8052 + }, + { + "epoch": 0.8494725738396625, + "grad_norm": 0.6291009783744812, + "learning_rate": 8.395305532079928e-05, + "loss": 1.3323, + "step": 8053 + }, + { + "epoch": 0.84957805907173, + "grad_norm": 0.6414312124252319, + "learning_rate": 8.383767487110552e-05, + "loss": 1.3103, + "step": 8054 + }, + { + "epoch": 0.8496835443037974, + "grad_norm": 0.6311107277870178, + "learning_rate": 8.372236906822217e-05, + "loss": 1.3344, + "step": 8055 + }, + { + "epoch": 0.849789029535865, + "grad_norm": 0.6981040239334106, + "learning_rate": 8.360713792506971e-05, + "loss": 1.3091, + "step": 8056 + }, + { + "epoch": 0.8498945147679325, + "grad_norm": 0.6277936100959778, + "learning_rate": 8.349198145456049e-05, + "loss": 1.3375, + "step": 8057 + }, + { + "epoch": 0.85, + "grad_norm": 0.622273862361908, + "learning_rate": 8.337689966959819e-05, + "loss": 1.3444, + "step": 8058 + }, + { + "epoch": 0.8501054852320675, + "grad_norm": 0.6310386657714844, + "learning_rate": 8.326189258307832e-05, + "loss": 1.28, + "step": 8059 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.6208080053329468, + "learning_rate": 8.314696020788806e-05, + "loss": 1.3088, + "step": 8060 + }, + { + "epoch": 0.8503164556962025, + "grad_norm": 0.7071943283081055, + "learning_rate": 8.303210255690622e-05, + "loss": 1.324, + "step": 8061 + }, + { + "epoch": 0.8504219409282701, + "grad_norm": 0.641411542892456, + "learning_rate": 8.29173196430029e-05, + "loss": 1.3174, + "step": 8062 + }, + { + "epoch": 0.8505274261603376, + "grad_norm": 0.6371302604675293, + "learning_rate": 8.280261147904039e-05, + "loss": 1.2863, + "step": 8063 + }, + { + "epoch": 0.850632911392405, + "grad_norm": 0.6651799082756042, + "learning_rate": 8.268797807787226e-05, + "loss": 1.3367, + "step": 8064 + }, + { + "epoch": 0.8507383966244726, + "grad_norm": 0.6959487199783325, + "learning_rate": 8.257341945234365e-05, + "loss": 1.3658, + "step": 8065 + }, + { + "epoch": 0.85084388185654, + "grad_norm": 0.6459840536117554, + "learning_rate": 8.245893561529153e-05, + "loss": 1.3126, + "step": 8066 + }, + { + "epoch": 0.8509493670886076, + "grad_norm": 0.620054304599762, + "learning_rate": 8.23445265795443e-05, + "loss": 1.3044, + "step": 8067 + }, + { + "epoch": 0.8510548523206751, + "grad_norm": 0.6221997737884521, + "learning_rate": 8.223019235792214e-05, + "loss": 1.3334, + "step": 8068 + }, + { + "epoch": 0.8511603375527426, + "grad_norm": 0.6248469352722168, + "learning_rate": 8.211593296323672e-05, + "loss": 1.3327, + "step": 8069 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.6505485773086548, + "learning_rate": 8.200174840829136e-05, + "loss": 1.3169, + "step": 8070 + }, + { + "epoch": 0.8513713080168777, + "grad_norm": 0.6841166019439697, + "learning_rate": 8.188763870588092e-05, + "loss": 1.2928, + "step": 8071 + }, + { + "epoch": 0.8514767932489451, + "grad_norm": 0.6299134492874146, + "learning_rate": 8.177360386879217e-05, + "loss": 1.3241, + "step": 8072 + }, + { + "epoch": 0.8515822784810126, + "grad_norm": 0.6325430870056152, + "learning_rate": 8.165964390980316e-05, + "loss": 1.3272, + "step": 8073 + }, + { + "epoch": 0.8516877637130802, + "grad_norm": 0.6545441150665283, + "learning_rate": 8.15457588416838e-05, + "loss": 1.3396, + "step": 8074 + }, + { + "epoch": 0.8517932489451476, + "grad_norm": 0.6753107309341431, + "learning_rate": 8.143194867719534e-05, + "loss": 1.3263, + "step": 8075 + }, + { + "epoch": 0.8518987341772152, + "grad_norm": 0.6807610988616943, + "learning_rate": 8.131821342909071e-05, + "loss": 1.3723, + "step": 8076 + }, + { + "epoch": 0.8520042194092827, + "grad_norm": 0.6032021045684814, + "learning_rate": 8.120455311011473e-05, + "loss": 1.3297, + "step": 8077 + }, + { + "epoch": 0.8521097046413502, + "grad_norm": 0.6370107531547546, + "learning_rate": 8.109096773300348e-05, + "loss": 1.2832, + "step": 8078 + }, + { + "epoch": 0.8522151898734177, + "grad_norm": 0.631119430065155, + "learning_rate": 8.097745731048475e-05, + "loss": 1.316, + "step": 8079 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.6169055700302124, + "learning_rate": 8.08640218552778e-05, + "loss": 1.3466, + "step": 8080 + }, + { + "epoch": 0.8524261603375527, + "grad_norm": 0.6156861782073975, + "learning_rate": 8.075066138009396e-05, + "loss": 1.2708, + "step": 8081 + }, + { + "epoch": 0.8525316455696202, + "grad_norm": 0.7353832125663757, + "learning_rate": 8.063737589763573e-05, + "loss": 1.2587, + "step": 8082 + }, + { + "epoch": 0.8526371308016878, + "grad_norm": 0.6154970526695251, + "learning_rate": 8.05241654205973e-05, + "loss": 1.2888, + "step": 8083 + }, + { + "epoch": 0.8527426160337552, + "grad_norm": 0.6361059546470642, + "learning_rate": 8.041102996166442e-05, + "loss": 1.3088, + "step": 8084 + }, + { + "epoch": 0.8528481012658228, + "grad_norm": 0.6241645216941833, + "learning_rate": 8.029796953351445e-05, + "loss": 1.3288, + "step": 8085 + }, + { + "epoch": 0.8529535864978903, + "grad_norm": 0.6342790126800537, + "learning_rate": 8.018498414881645e-05, + "loss": 1.3247, + "step": 8086 + }, + { + "epoch": 0.8530590717299578, + "grad_norm": 0.6238862872123718, + "learning_rate": 8.007207382023102e-05, + "loss": 1.3075, + "step": 8087 + }, + { + "epoch": 0.8531645569620253, + "grad_norm": 0.6863685250282288, + "learning_rate": 7.995923856041013e-05, + "loss": 1.337, + "step": 8088 + }, + { + "epoch": 0.8532700421940929, + "grad_norm": 0.6385225057601929, + "learning_rate": 7.984647838199773e-05, + "loss": 1.3624, + "step": 8089 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.6518372893333435, + "learning_rate": 7.973379329762925e-05, + "loss": 1.3386, + "step": 8090 + }, + { + "epoch": 0.8534810126582278, + "grad_norm": 0.6569281220436096, + "learning_rate": 7.96211833199314e-05, + "loss": 1.3255, + "step": 8091 + }, + { + "epoch": 0.8535864978902954, + "grad_norm": 0.6831235885620117, + "learning_rate": 7.950864846152284e-05, + "loss": 1.3195, + "step": 8092 + }, + { + "epoch": 0.8536919831223628, + "grad_norm": 0.6457845568656921, + "learning_rate": 7.939618873501356e-05, + "loss": 1.3123, + "step": 8093 + }, + { + "epoch": 0.8537974683544304, + "grad_norm": 0.6354213953018188, + "learning_rate": 7.928380415300523e-05, + "loss": 1.3014, + "step": 8094 + }, + { + "epoch": 0.8539029535864979, + "grad_norm": 0.6064987778663635, + "learning_rate": 7.917149472809113e-05, + "loss": 1.2639, + "step": 8095 + }, + { + "epoch": 0.8540084388185654, + "grad_norm": 0.6522544026374817, + "learning_rate": 7.905926047285616e-05, + "loss": 1.2911, + "step": 8096 + }, + { + "epoch": 0.8541139240506329, + "grad_norm": 0.6522780060768127, + "learning_rate": 7.894710139987645e-05, + "loss": 1.3338, + "step": 8097 + }, + { + "epoch": 0.8542194092827005, + "grad_norm": 0.6825006008148193, + "learning_rate": 7.883501752172038e-05, + "loss": 1.2897, + "step": 8098 + }, + { + "epoch": 0.8543248945147679, + "grad_norm": 0.6524546146392822, + "learning_rate": 7.872300885094736e-05, + "loss": 1.3198, + "step": 8099 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.7048925757408142, + "learning_rate": 7.861107540010845e-05, + "loss": 1.3175, + "step": 8100 + }, + { + "epoch": 0.854535864978903, + "grad_norm": 0.6419926285743713, + "learning_rate": 7.849921718174638e-05, + "loss": 1.3329, + "step": 8101 + }, + { + "epoch": 0.8546413502109704, + "grad_norm": 0.6459499001502991, + "learning_rate": 7.838743420839544e-05, + "loss": 1.3614, + "step": 8102 + }, + { + "epoch": 0.854746835443038, + "grad_norm": 0.6261332035064697, + "learning_rate": 7.827572649258147e-05, + "loss": 1.3203, + "step": 8103 + }, + { + "epoch": 0.8548523206751055, + "grad_norm": 0.6424562931060791, + "learning_rate": 7.816409404682185e-05, + "loss": 1.3481, + "step": 8104 + }, + { + "epoch": 0.854957805907173, + "grad_norm": 0.6002475023269653, + "learning_rate": 7.805253688362557e-05, + "loss": 1.3247, + "step": 8105 + }, + { + "epoch": 0.8550632911392405, + "grad_norm": 0.624364972114563, + "learning_rate": 7.794105501549306e-05, + "loss": 1.3085, + "step": 8106 + }, + { + "epoch": 0.8551687763713081, + "grad_norm": 0.6015122532844543, + "learning_rate": 7.782964845491666e-05, + "loss": 1.2956, + "step": 8107 + }, + { + "epoch": 0.8552742616033755, + "grad_norm": 0.7064802646636963, + "learning_rate": 7.771831721437989e-05, + "loss": 1.2904, + "step": 8108 + }, + { + "epoch": 0.855379746835443, + "grad_norm": 0.6677772998809814, + "learning_rate": 7.760706130635792e-05, + "loss": 1.322, + "step": 8109 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.6325047016143799, + "learning_rate": 7.749588074331762e-05, + "loss": 1.3578, + "step": 8110 + }, + { + "epoch": 0.855590717299578, + "grad_norm": 0.6311922669410706, + "learning_rate": 7.738477553771727e-05, + "loss": 1.3176, + "step": 8111 + }, + { + "epoch": 0.8556962025316456, + "grad_norm": 0.6274179220199585, + "learning_rate": 7.727374570200685e-05, + "loss": 1.3435, + "step": 8112 + }, + { + "epoch": 0.8558016877637131, + "grad_norm": 0.6970860362052917, + "learning_rate": 7.716279124862771e-05, + "loss": 1.2743, + "step": 8113 + }, + { + "epoch": 0.8559071729957806, + "grad_norm": 0.6348453760147095, + "learning_rate": 7.705191219001267e-05, + "loss": 1.3507, + "step": 8114 + }, + { + "epoch": 0.8560126582278481, + "grad_norm": 0.598423182964325, + "learning_rate": 7.694110853858671e-05, + "loss": 1.3182, + "step": 8115 + }, + { + "epoch": 0.8561181434599157, + "grad_norm": 0.629758894443512, + "learning_rate": 7.683038030676573e-05, + "loss": 1.3289, + "step": 8116 + }, + { + "epoch": 0.8562236286919831, + "grad_norm": 0.6879315376281738, + "learning_rate": 7.67197275069573e-05, + "loss": 1.3385, + "step": 8117 + }, + { + "epoch": 0.8563291139240506, + "grad_norm": 0.6230217814445496, + "learning_rate": 7.660915015156067e-05, + "loss": 1.2912, + "step": 8118 + }, + { + "epoch": 0.8564345991561182, + "grad_norm": 0.6476528644561768, + "learning_rate": 7.649864825296669e-05, + "loss": 1.3084, + "step": 8119 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.6145318150520325, + "learning_rate": 7.63882218235575e-05, + "loss": 1.2891, + "step": 8120 + }, + { + "epoch": 0.8566455696202532, + "grad_norm": 0.6479477286338806, + "learning_rate": 7.627787087570692e-05, + "loss": 1.3031, + "step": 8121 + }, + { + "epoch": 0.8567510548523207, + "grad_norm": 0.6511478424072266, + "learning_rate": 7.616759542178045e-05, + "loss": 1.3187, + "step": 8122 + }, + { + "epoch": 0.8568565400843882, + "grad_norm": 0.6303105354309082, + "learning_rate": 7.605739547413487e-05, + "loss": 1.314, + "step": 8123 + }, + { + "epoch": 0.8569620253164557, + "grad_norm": 0.6641851663589478, + "learning_rate": 7.594727104511873e-05, + "loss": 1.3091, + "step": 8124 + }, + { + "epoch": 0.8570675105485233, + "grad_norm": 0.6405497789382935, + "learning_rate": 7.583722214707206e-05, + "loss": 1.3202, + "step": 8125 + }, + { + "epoch": 0.8571729957805907, + "grad_norm": 0.6060464382171631, + "learning_rate": 7.572724879232634e-05, + "loss": 1.3194, + "step": 8126 + }, + { + "epoch": 0.8572784810126582, + "grad_norm": 0.6224243640899658, + "learning_rate": 7.561735099320463e-05, + "loss": 1.3169, + "step": 8127 + }, + { + "epoch": 0.8573839662447258, + "grad_norm": 0.6184360384941101, + "learning_rate": 7.55075287620215e-05, + "loss": 1.3292, + "step": 8128 + }, + { + "epoch": 0.8574894514767932, + "grad_norm": 0.6241578459739685, + "learning_rate": 7.539778211108309e-05, + "loss": 1.3512, + "step": 8129 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.6663792729377747, + "learning_rate": 7.528811105268699e-05, + "loss": 1.2821, + "step": 8130 + }, + { + "epoch": 0.8577004219409282, + "grad_norm": 0.633920431137085, + "learning_rate": 7.517851559912254e-05, + "loss": 1.2809, + "step": 8131 + }, + { + "epoch": 0.8578059071729958, + "grad_norm": 0.6426957845687866, + "learning_rate": 7.506899576267023e-05, + "loss": 1.3228, + "step": 8132 + }, + { + "epoch": 0.8579113924050633, + "grad_norm": 0.6518644690513611, + "learning_rate": 7.495955155560261e-05, + "loss": 1.3218, + "step": 8133 + }, + { + "epoch": 0.8580168776371307, + "grad_norm": 0.6617181897163391, + "learning_rate": 7.485018299018326e-05, + "loss": 1.2987, + "step": 8134 + }, + { + "epoch": 0.8581223628691983, + "grad_norm": 0.6272319555282593, + "learning_rate": 7.474089007866756e-05, + "loss": 1.3177, + "step": 8135 + }, + { + "epoch": 0.8582278481012658, + "grad_norm": 0.6235801577568054, + "learning_rate": 7.463167283330227e-05, + "loss": 1.3286, + "step": 8136 + }, + { + "epoch": 0.8583333333333333, + "grad_norm": 0.6182185411453247, + "learning_rate": 7.452253126632564e-05, + "loss": 1.3081, + "step": 8137 + }, + { + "epoch": 0.8584388185654008, + "grad_norm": 0.6216442584991455, + "learning_rate": 7.441346538996769e-05, + "loss": 1.3241, + "step": 8138 + }, + { + "epoch": 0.8585443037974684, + "grad_norm": 0.6307787895202637, + "learning_rate": 7.430447521644973e-05, + "loss": 1.3228, + "step": 8139 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.6501413583755493, + "learning_rate": 7.41955607579845e-05, + "loss": 1.3139, + "step": 8140 + }, + { + "epoch": 0.8587552742616034, + "grad_norm": 0.6136254668235779, + "learning_rate": 7.408672202677666e-05, + "loss": 1.2807, + "step": 8141 + }, + { + "epoch": 0.8588607594936709, + "grad_norm": 0.6310875415802002, + "learning_rate": 7.397795903502202e-05, + "loss": 1.3169, + "step": 8142 + }, + { + "epoch": 0.8589662447257383, + "grad_norm": 0.6252594590187073, + "learning_rate": 7.386927179490801e-05, + "loss": 1.3273, + "step": 8143 + }, + { + "epoch": 0.8590717299578059, + "grad_norm": 0.624838650226593, + "learning_rate": 7.376066031861364e-05, + "loss": 1.2891, + "step": 8144 + }, + { + "epoch": 0.8591772151898734, + "grad_norm": 0.6638364791870117, + "learning_rate": 7.365212461830933e-05, + "loss": 1.2917, + "step": 8145 + }, + { + "epoch": 0.8592827004219409, + "grad_norm": 0.6197853684425354, + "learning_rate": 7.354366470615695e-05, + "loss": 1.3514, + "step": 8146 + }, + { + "epoch": 0.8593881856540084, + "grad_norm": 0.6241962313652039, + "learning_rate": 7.343528059431009e-05, + "loss": 1.3087, + "step": 8147 + }, + { + "epoch": 0.859493670886076, + "grad_norm": 0.7837164998054504, + "learning_rate": 7.332697229491373e-05, + "loss": 1.3066, + "step": 8148 + }, + { + "epoch": 0.8595991561181434, + "grad_norm": 0.6136602759361267, + "learning_rate": 7.321873982010422e-05, + "loss": 1.281, + "step": 8149 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.6519450545310974, + "learning_rate": 7.311058318200969e-05, + "loss": 1.3486, + "step": 8150 + }, + { + "epoch": 0.8598101265822785, + "grad_norm": 0.6134848594665527, + "learning_rate": 7.300250239274964e-05, + "loss": 1.3195, + "step": 8151 + }, + { + "epoch": 0.859915611814346, + "grad_norm": 0.6486167311668396, + "learning_rate": 7.289449746443494e-05, + "loss": 1.2975, + "step": 8152 + }, + { + "epoch": 0.8600210970464135, + "grad_norm": 0.6714701652526855, + "learning_rate": 7.278656840916825e-05, + "loss": 1.3554, + "step": 8153 + }, + { + "epoch": 0.860126582278481, + "grad_norm": 0.6464918851852417, + "learning_rate": 7.26787152390434e-05, + "loss": 1.3408, + "step": 8154 + }, + { + "epoch": 0.8602320675105485, + "grad_norm": 0.6358845829963684, + "learning_rate": 7.257093796614597e-05, + "loss": 1.3317, + "step": 8155 + }, + { + "epoch": 0.860337552742616, + "grad_norm": 0.690703809261322, + "learning_rate": 7.246323660255289e-05, + "loss": 1.339, + "step": 8156 + }, + { + "epoch": 0.8604430379746836, + "grad_norm": 0.6740551590919495, + "learning_rate": 7.235561116033265e-05, + "loss": 1.3057, + "step": 8157 + }, + { + "epoch": 0.860548523206751, + "grad_norm": 0.6711967587471008, + "learning_rate": 7.224806165154504e-05, + "loss": 1.3185, + "step": 8158 + }, + { + "epoch": 0.8606540084388186, + "grad_norm": 0.6203529834747314, + "learning_rate": 7.214058808824192e-05, + "loss": 1.2882, + "step": 8159 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.6207805275917053, + "learning_rate": 7.203319048246599e-05, + "loss": 1.3113, + "step": 8160 + }, + { + "epoch": 0.8608649789029535, + "grad_norm": 0.6258909702301025, + "learning_rate": 7.192586884625169e-05, + "loss": 1.2985, + "step": 8161 + }, + { + "epoch": 0.8609704641350211, + "grad_norm": 0.6896752715110779, + "learning_rate": 7.1818623191625e-05, + "loss": 1.3369, + "step": 8162 + }, + { + "epoch": 0.8610759493670886, + "grad_norm": 0.6488739848136902, + "learning_rate": 7.17114535306033e-05, + "loss": 1.3327, + "step": 8163 + }, + { + "epoch": 0.8611814345991561, + "grad_norm": 0.622647225856781, + "learning_rate": 7.16043598751954e-05, + "loss": 1.2801, + "step": 8164 + }, + { + "epoch": 0.8612869198312236, + "grad_norm": 0.6754330992698669, + "learning_rate": 7.149734223740187e-05, + "loss": 1.3083, + "step": 8165 + }, + { + "epoch": 0.8613924050632912, + "grad_norm": 0.6399553418159485, + "learning_rate": 7.139040062921428e-05, + "loss": 1.3551, + "step": 8166 + }, + { + "epoch": 0.8614978902953586, + "grad_norm": 0.6313018798828125, + "learning_rate": 7.128353506261631e-05, + "loss": 1.3091, + "step": 8167 + }, + { + "epoch": 0.8616033755274262, + "grad_norm": 0.6250386238098145, + "learning_rate": 7.117674554958253e-05, + "loss": 1.3247, + "step": 8168 + }, + { + "epoch": 0.8617088607594937, + "grad_norm": 0.6689448952674866, + "learning_rate": 7.107003210207947e-05, + "loss": 1.2938, + "step": 8169 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.6405583620071411, + "learning_rate": 7.096339473206471e-05, + "loss": 1.2899, + "step": 8170 + }, + { + "epoch": 0.8619198312236287, + "grad_norm": 0.6436160802841187, + "learning_rate": 7.085683345148753e-05, + "loss": 1.3164, + "step": 8171 + }, + { + "epoch": 0.8620253164556962, + "grad_norm": 0.6424108147621155, + "learning_rate": 7.075034827228862e-05, + "loss": 1.3153, + "step": 8172 + }, + { + "epoch": 0.8621308016877637, + "grad_norm": 0.6338745951652527, + "learning_rate": 7.064393920640031e-05, + "loss": 1.3537, + "step": 8173 + }, + { + "epoch": 0.8622362869198312, + "grad_norm": 0.656744658946991, + "learning_rate": 7.053760626574618e-05, + "loss": 1.3147, + "step": 8174 + }, + { + "epoch": 0.8623417721518988, + "grad_norm": 0.666649580001831, + "learning_rate": 7.043134946224123e-05, + "loss": 1.2786, + "step": 8175 + }, + { + "epoch": 0.8624472573839662, + "grad_norm": 0.6251575350761414, + "learning_rate": 7.032516880779233e-05, + "loss": 1.3196, + "step": 8176 + }, + { + "epoch": 0.8625527426160338, + "grad_norm": 0.6833670139312744, + "learning_rate": 7.021906431429747e-05, + "loss": 1.3385, + "step": 8177 + }, + { + "epoch": 0.8626582278481013, + "grad_norm": 0.661506712436676, + "learning_rate": 7.011303599364608e-05, + "loss": 1.3031, + "step": 8178 + }, + { + "epoch": 0.8627637130801687, + "grad_norm": 0.6145763397216797, + "learning_rate": 7.000708385771928e-05, + "loss": 1.3034, + "step": 8179 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.6532588601112366, + "learning_rate": 6.990120791838953e-05, + "loss": 1.3509, + "step": 8180 + }, + { + "epoch": 0.8629746835443038, + "grad_norm": 0.6802567839622498, + "learning_rate": 6.979540818752064e-05, + "loss": 1.312, + "step": 8181 + }, + { + "epoch": 0.8630801687763713, + "grad_norm": 0.6252881288528442, + "learning_rate": 6.968968467696806e-05, + "loss": 1.3157, + "step": 8182 + }, + { + "epoch": 0.8631856540084388, + "grad_norm": 0.7100160717964172, + "learning_rate": 6.958403739857866e-05, + "loss": 1.2953, + "step": 8183 + }, + { + "epoch": 0.8632911392405064, + "grad_norm": 0.6186259388923645, + "learning_rate": 6.947846636419061e-05, + "loss": 1.3236, + "step": 8184 + }, + { + "epoch": 0.8633966244725738, + "grad_norm": 0.6382226347923279, + "learning_rate": 6.937297158563389e-05, + "loss": 1.3054, + "step": 8185 + }, + { + "epoch": 0.8635021097046414, + "grad_norm": 0.6140475273132324, + "learning_rate": 6.926755307472968e-05, + "loss": 1.3203, + "step": 8186 + }, + { + "epoch": 0.8636075949367089, + "grad_norm": 0.728777289390564, + "learning_rate": 6.916221084329055e-05, + "loss": 1.3053, + "step": 8187 + }, + { + "epoch": 0.8637130801687763, + "grad_norm": 0.7003549337387085, + "learning_rate": 6.905694490312064e-05, + "loss": 1.3007, + "step": 8188 + }, + { + "epoch": 0.8638185654008439, + "grad_norm": 0.6007750630378723, + "learning_rate": 6.89517552660156e-05, + "loss": 1.3211, + "step": 8189 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.6516117453575134, + "learning_rate": 6.884664194376233e-05, + "loss": 1.3064, + "step": 8190 + }, + { + "epoch": 0.8640295358649789, + "grad_norm": 0.680569589138031, + "learning_rate": 6.874160494813942e-05, + "loss": 1.3299, + "step": 8191 + }, + { + "epoch": 0.8641350210970464, + "grad_norm": 0.6583842039108276, + "learning_rate": 6.86366442909166e-05, + "loss": 1.3025, + "step": 8192 + }, + { + "epoch": 0.864240506329114, + "grad_norm": 0.6302453279495239, + "learning_rate": 6.853175998385547e-05, + "loss": 1.3036, + "step": 8193 + }, + { + "epoch": 0.8643459915611814, + "grad_norm": 0.6438713073730469, + "learning_rate": 6.842695203870872e-05, + "loss": 1.3429, + "step": 8194 + }, + { + "epoch": 0.864451476793249, + "grad_norm": 0.7239395976066589, + "learning_rate": 6.832222046722069e-05, + "loss": 1.3056, + "step": 8195 + }, + { + "epoch": 0.8645569620253165, + "grad_norm": 0.6425397992134094, + "learning_rate": 6.821756528112693e-05, + "loss": 1.3579, + "step": 8196 + }, + { + "epoch": 0.864662447257384, + "grad_norm": 0.6353945732116699, + "learning_rate": 6.811298649215472e-05, + "loss": 1.33, + "step": 8197 + }, + { + "epoch": 0.8647679324894515, + "grad_norm": 0.6135097742080688, + "learning_rate": 6.80084841120226e-05, + "loss": 1.3191, + "step": 8198 + }, + { + "epoch": 0.8648734177215189, + "grad_norm": 0.6269235014915466, + "learning_rate": 6.790405815244044e-05, + "loss": 1.3562, + "step": 8199 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.6038451194763184, + "learning_rate": 6.779970862510989e-05, + "loss": 1.2998, + "step": 8200 + }, + { + "epoch": 0.865084388185654, + "grad_norm": 0.6080598831176758, + "learning_rate": 6.769543554172361e-05, + "loss": 1.3221, + "step": 8201 + }, + { + "epoch": 0.8651898734177215, + "grad_norm": 0.6270411014556885, + "learning_rate": 6.759123891396615e-05, + "loss": 1.3637, + "step": 8202 + }, + { + "epoch": 0.865295358649789, + "grad_norm": 0.6465851068496704, + "learning_rate": 6.748711875351318e-05, + "loss": 1.3049, + "step": 8203 + }, + { + "epoch": 0.8654008438818566, + "grad_norm": 0.6107375621795654, + "learning_rate": 6.738307507203187e-05, + "loss": 1.2928, + "step": 8204 + }, + { + "epoch": 0.865506329113924, + "grad_norm": 0.6278404593467712, + "learning_rate": 6.72791078811808e-05, + "loss": 1.3221, + "step": 8205 + }, + { + "epoch": 0.8656118143459915, + "grad_norm": 0.6173403859138489, + "learning_rate": 6.717521719261016e-05, + "loss": 1.3143, + "step": 8206 + }, + { + "epoch": 0.8657172995780591, + "grad_norm": 0.6242444515228271, + "learning_rate": 6.707140301796122e-05, + "loss": 1.3407, + "step": 8207 + }, + { + "epoch": 0.8658227848101265, + "grad_norm": 0.621290922164917, + "learning_rate": 6.696766536886692e-05, + "loss": 1.3337, + "step": 8208 + }, + { + "epoch": 0.8659282700421941, + "grad_norm": 0.6086281538009644, + "learning_rate": 6.686400425695171e-05, + "loss": 1.2856, + "step": 8209 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.6305419206619263, + "learning_rate": 6.676041969383107e-05, + "loss": 1.2986, + "step": 8210 + }, + { + "epoch": 0.8661392405063291, + "grad_norm": 0.6488115191459656, + "learning_rate": 6.665691169111244e-05, + "loss": 1.3219, + "step": 8211 + }, + { + "epoch": 0.8662447257383966, + "grad_norm": 0.6708512902259827, + "learning_rate": 6.655348026039437e-05, + "loss": 1.3083, + "step": 8212 + }, + { + "epoch": 0.8663502109704642, + "grad_norm": 0.6970303058624268, + "learning_rate": 6.645012541326678e-05, + "loss": 1.3105, + "step": 8213 + }, + { + "epoch": 0.8664556962025316, + "grad_norm": 0.5948620438575745, + "learning_rate": 6.634684716131114e-05, + "loss": 1.3215, + "step": 8214 + }, + { + "epoch": 0.8665611814345991, + "grad_norm": 0.6517964005470276, + "learning_rate": 6.62436455161003e-05, + "loss": 1.3382, + "step": 8215 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 0.637371838092804, + "learning_rate": 6.614052048919847e-05, + "loss": 1.3126, + "step": 8216 + }, + { + "epoch": 0.8667721518987341, + "grad_norm": 0.7334874272346497, + "learning_rate": 6.603747209216135e-05, + "loss": 1.3357, + "step": 8217 + }, + { + "epoch": 0.8668776371308017, + "grad_norm": 0.6994044184684753, + "learning_rate": 6.593450033653586e-05, + "loss": 1.298, + "step": 8218 + }, + { + "epoch": 0.8669831223628692, + "grad_norm": 0.6335144639015198, + "learning_rate": 6.583160523386086e-05, + "loss": 1.3476, + "step": 8219 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.6577361226081848, + "learning_rate": 6.572878679566605e-05, + "loss": 1.3555, + "step": 8220 + }, + { + "epoch": 0.8671940928270042, + "grad_norm": 0.6633812189102173, + "learning_rate": 6.562604503347277e-05, + "loss": 1.2821, + "step": 8221 + }, + { + "epoch": 0.8672995780590718, + "grad_norm": 0.7250613570213318, + "learning_rate": 6.552337995879368e-05, + "loss": 1.3117, + "step": 8222 + }, + { + "epoch": 0.8674050632911392, + "grad_norm": 0.6312881708145142, + "learning_rate": 6.542079158313305e-05, + "loss": 1.3428, + "step": 8223 + }, + { + "epoch": 0.8675105485232067, + "grad_norm": 0.6341035962104797, + "learning_rate": 6.531827991798628e-05, + "loss": 1.3253, + "step": 8224 + }, + { + "epoch": 0.8676160337552743, + "grad_norm": 0.6334906816482544, + "learning_rate": 6.521584497484043e-05, + "loss": 1.3457, + "step": 8225 + }, + { + "epoch": 0.8677215189873417, + "grad_norm": 0.6831953525543213, + "learning_rate": 6.511348676517373e-05, + "loss": 1.3246, + "step": 8226 + }, + { + "epoch": 0.8678270042194093, + "grad_norm": 0.6515284180641174, + "learning_rate": 6.501120530045593e-05, + "loss": 1.2881, + "step": 8227 + }, + { + "epoch": 0.8679324894514768, + "grad_norm": 0.6276914477348328, + "learning_rate": 6.490900059214836e-05, + "loss": 1.3359, + "step": 8228 + }, + { + "epoch": 0.8680379746835443, + "grad_norm": 0.6145918369293213, + "learning_rate": 6.480687265170342e-05, + "loss": 1.3126, + "step": 8229 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.6589179039001465, + "learning_rate": 6.470482149056509e-05, + "loss": 1.3436, + "step": 8230 + }, + { + "epoch": 0.8682489451476794, + "grad_norm": 0.6165370941162109, + "learning_rate": 6.460284712016868e-05, + "loss": 1.315, + "step": 8231 + }, + { + "epoch": 0.8683544303797468, + "grad_norm": 0.633790135383606, + "learning_rate": 6.450094955194096e-05, + "loss": 1.2923, + "step": 8232 + }, + { + "epoch": 0.8684599156118143, + "grad_norm": 0.6176965236663818, + "learning_rate": 6.439912879730009e-05, + "loss": 1.3332, + "step": 8233 + }, + { + "epoch": 0.8685654008438819, + "grad_norm": 0.6298043727874756, + "learning_rate": 6.429738486765548e-05, + "loss": 1.3333, + "step": 8234 + }, + { + "epoch": 0.8686708860759493, + "grad_norm": 0.6474494934082031, + "learning_rate": 6.419571777440814e-05, + "loss": 1.3128, + "step": 8235 + }, + { + "epoch": 0.8687763713080169, + "grad_norm": 0.6123428344726562, + "learning_rate": 6.409412752895041e-05, + "loss": 1.3275, + "step": 8236 + }, + { + "epoch": 0.8688818565400844, + "grad_norm": 0.641872227191925, + "learning_rate": 6.399261414266571e-05, + "loss": 1.3421, + "step": 8237 + }, + { + "epoch": 0.8689873417721519, + "grad_norm": 0.6905437707901001, + "learning_rate": 6.389117762692952e-05, + "loss": 1.3332, + "step": 8238 + }, + { + "epoch": 0.8690928270042194, + "grad_norm": 0.6531820297241211, + "learning_rate": 6.37898179931081e-05, + "loss": 1.2713, + "step": 8239 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.6492443084716797, + "learning_rate": 6.368853525255942e-05, + "loss": 1.3101, + "step": 8240 + }, + { + "epoch": 0.8693037974683544, + "grad_norm": 0.6195869445800781, + "learning_rate": 6.358732941663248e-05, + "loss": 1.3344, + "step": 8241 + }, + { + "epoch": 0.869409282700422, + "grad_norm": 0.6591104865074158, + "learning_rate": 6.348620049666815e-05, + "loss": 1.3144, + "step": 8242 + }, + { + "epoch": 0.8695147679324895, + "grad_norm": 0.6285952925682068, + "learning_rate": 6.338514850399826e-05, + "loss": 1.3365, + "step": 8243 + }, + { + "epoch": 0.8696202531645569, + "grad_norm": 0.6894201040267944, + "learning_rate": 6.328417344994627e-05, + "loss": 1.3219, + "step": 8244 + }, + { + "epoch": 0.8697257383966245, + "grad_norm": 0.674487292766571, + "learning_rate": 6.318327534582688e-05, + "loss": 1.3005, + "step": 8245 + }, + { + "epoch": 0.869831223628692, + "grad_norm": 0.6114456653594971, + "learning_rate": 6.308245420294636e-05, + "loss": 1.3325, + "step": 8246 + }, + { + "epoch": 0.8699367088607595, + "grad_norm": 0.6438221335411072, + "learning_rate": 6.298171003260194e-05, + "loss": 1.3469, + "step": 8247 + }, + { + "epoch": 0.870042194092827, + "grad_norm": 0.6389046311378479, + "learning_rate": 6.288104284608284e-05, + "loss": 1.3519, + "step": 8248 + }, + { + "epoch": 0.8701476793248946, + "grad_norm": 0.668220579624176, + "learning_rate": 6.278045265466911e-05, + "loss": 1.3286, + "step": 8249 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.6358574032783508, + "learning_rate": 6.267993946963249e-05, + "loss": 1.3465, + "step": 8250 + }, + { + "epoch": 0.8703586497890295, + "grad_norm": 0.6690512895584106, + "learning_rate": 6.257950330223597e-05, + "loss": 1.3434, + "step": 8251 + }, + { + "epoch": 0.8704641350210971, + "grad_norm": 0.6368394494056702, + "learning_rate": 6.247914416373387e-05, + "loss": 1.2977, + "step": 8252 + }, + { + "epoch": 0.8705696202531645, + "grad_norm": 0.6376136541366577, + "learning_rate": 6.237886206537197e-05, + "loss": 1.3127, + "step": 8253 + }, + { + "epoch": 0.8706751054852321, + "grad_norm": 0.7281667590141296, + "learning_rate": 6.227865701838733e-05, + "loss": 1.3024, + "step": 8254 + }, + { + "epoch": 0.8707805907172996, + "grad_norm": 0.6888619661331177, + "learning_rate": 6.217852903400841e-05, + "loss": 1.3735, + "step": 8255 + }, + { + "epoch": 0.8708860759493671, + "grad_norm": 0.7349372506141663, + "learning_rate": 6.207847812345524e-05, + "loss": 1.331, + "step": 8256 + }, + { + "epoch": 0.8709915611814346, + "grad_norm": 0.6228294372558594, + "learning_rate": 6.197850429793866e-05, + "loss": 1.3236, + "step": 8257 + }, + { + "epoch": 0.8710970464135022, + "grad_norm": 0.6388886570930481, + "learning_rate": 6.187860756866157e-05, + "loss": 1.305, + "step": 8258 + }, + { + "epoch": 0.8712025316455696, + "grad_norm": 0.6580924391746521, + "learning_rate": 6.177878794681782e-05, + "loss": 1.3199, + "step": 8259 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.7155678272247314, + "learning_rate": 6.167904544359265e-05, + "loss": 1.2931, + "step": 8260 + }, + { + "epoch": 0.8714135021097047, + "grad_norm": 0.6860477924346924, + "learning_rate": 6.157938007016279e-05, + "loss": 1.3156, + "step": 8261 + }, + { + "epoch": 0.8715189873417721, + "grad_norm": 0.6097769141197205, + "learning_rate": 6.147979183769602e-05, + "loss": 1.3161, + "step": 8262 + }, + { + "epoch": 0.8716244725738397, + "grad_norm": 0.6145731210708618, + "learning_rate": 6.138028075735196e-05, + "loss": 1.3408, + "step": 8263 + }, + { + "epoch": 0.8717299578059071, + "grad_norm": 0.6026891469955444, + "learning_rate": 6.128084684028118e-05, + "loss": 1.2908, + "step": 8264 + }, + { + "epoch": 0.8718354430379747, + "grad_norm": 0.71782386302948, + "learning_rate": 6.118149009762574e-05, + "loss": 1.3, + "step": 8265 + }, + { + "epoch": 0.8719409282700422, + "grad_norm": 0.7830792665481567, + "learning_rate": 6.108221054051902e-05, + "loss": 1.3221, + "step": 8266 + }, + { + "epoch": 0.8720464135021097, + "grad_norm": 0.6211056113243103, + "learning_rate": 6.0983008180086005e-05, + "loss": 1.3249, + "step": 8267 + }, + { + "epoch": 0.8721518987341772, + "grad_norm": 0.626950740814209, + "learning_rate": 6.088388302744266e-05, + "loss": 1.3024, + "step": 8268 + }, + { + "epoch": 0.8722573839662447, + "grad_norm": 0.6292208433151245, + "learning_rate": 6.078483509369642e-05, + "loss": 1.3371, + "step": 8269 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.6863295435905457, + "learning_rate": 6.068586438994617e-05, + "loss": 1.3416, + "step": 8270 + }, + { + "epoch": 0.8724683544303797, + "grad_norm": 0.7981477379798889, + "learning_rate": 6.058697092728202e-05, + "loss": 1.3596, + "step": 8271 + }, + { + "epoch": 0.8725738396624473, + "grad_norm": 0.6436115503311157, + "learning_rate": 6.048815471678554e-05, + "loss": 1.2849, + "step": 8272 + }, + { + "epoch": 0.8726793248945147, + "grad_norm": 0.6555737853050232, + "learning_rate": 6.038941576952952e-05, + "loss": 1.2968, + "step": 8273 + }, + { + "epoch": 0.8727848101265823, + "grad_norm": 0.6187567710876465, + "learning_rate": 6.029075409657822e-05, + "loss": 1.3044, + "step": 8274 + }, + { + "epoch": 0.8728902953586498, + "grad_norm": 0.6557720303535461, + "learning_rate": 6.0192169708987026e-05, + "loss": 1.2807, + "step": 8275 + }, + { + "epoch": 0.8729957805907173, + "grad_norm": 0.7644457817077637, + "learning_rate": 6.009366261780286e-05, + "loss": 1.3296, + "step": 8276 + }, + { + "epoch": 0.8731012658227848, + "grad_norm": 0.6805019378662109, + "learning_rate": 5.999523283406405e-05, + "loss": 1.2942, + "step": 8277 + }, + { + "epoch": 0.8732067510548523, + "grad_norm": 0.6370545625686646, + "learning_rate": 5.9896880368800115e-05, + "loss": 1.2866, + "step": 8278 + }, + { + "epoch": 0.8733122362869198, + "grad_norm": 0.6253505945205688, + "learning_rate": 5.9798605233031904e-05, + "loss": 1.316, + "step": 8279 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.6405370831489563, + "learning_rate": 5.970040743777161e-05, + "loss": 1.2788, + "step": 8280 + }, + { + "epoch": 0.8735232067510549, + "grad_norm": 0.6187415719032288, + "learning_rate": 5.960228699402284e-05, + "loss": 1.3045, + "step": 8281 + }, + { + "epoch": 0.8736286919831223, + "grad_norm": 0.6913607716560364, + "learning_rate": 5.9504243912780474e-05, + "loss": 1.3289, + "step": 8282 + }, + { + "epoch": 0.8737341772151899, + "grad_norm": 0.628208577632904, + "learning_rate": 5.940627820503064e-05, + "loss": 1.3212, + "step": 8283 + }, + { + "epoch": 0.8738396624472574, + "grad_norm": 0.6285027265548706, + "learning_rate": 5.930838988175097e-05, + "loss": 1.2841, + "step": 8284 + }, + { + "epoch": 0.8739451476793249, + "grad_norm": 0.6073300242424011, + "learning_rate": 5.921057895391027e-05, + "loss": 1.2813, + "step": 8285 + }, + { + "epoch": 0.8740506329113924, + "grad_norm": 0.6355143785476685, + "learning_rate": 5.91128454324687e-05, + "loss": 1.2846, + "step": 8286 + }, + { + "epoch": 0.87415611814346, + "grad_norm": 0.642495334148407, + "learning_rate": 5.901518932837799e-05, + "loss": 1.3322, + "step": 8287 + }, + { + "epoch": 0.8742616033755274, + "grad_norm": 0.7123928666114807, + "learning_rate": 5.891761065258089e-05, + "loss": 1.3176, + "step": 8288 + }, + { + "epoch": 0.8743670886075949, + "grad_norm": 0.6290260553359985, + "learning_rate": 5.8820109416011485e-05, + "loss": 1.3111, + "step": 8289 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.6499876976013184, + "learning_rate": 5.8722685629595454e-05, + "loss": 1.2724, + "step": 8290 + }, + { + "epoch": 0.8745780590717299, + "grad_norm": 0.7193732261657715, + "learning_rate": 5.862533930424949e-05, + "loss": 1.3251, + "step": 8291 + }, + { + "epoch": 0.8746835443037975, + "grad_norm": 0.6413506865501404, + "learning_rate": 5.852807045088177e-05, + "loss": 1.3448, + "step": 8292 + }, + { + "epoch": 0.874789029535865, + "grad_norm": 0.6471371650695801, + "learning_rate": 5.843087908039166e-05, + "loss": 1.2936, + "step": 8293 + }, + { + "epoch": 0.8748945147679325, + "grad_norm": 0.6951218843460083, + "learning_rate": 5.833376520367012e-05, + "loss": 1.3328, + "step": 8294 + }, + { + "epoch": 0.875, + "grad_norm": 0.648192286491394, + "learning_rate": 5.823672883159911e-05, + "loss": 1.3566, + "step": 8295 + }, + { + "epoch": 0.8751054852320675, + "grad_norm": 0.6736572980880737, + "learning_rate": 5.813976997505202e-05, + "loss": 1.3075, + "step": 8296 + }, + { + "epoch": 0.875210970464135, + "grad_norm": 0.6242811679840088, + "learning_rate": 5.804288864489366e-05, + "loss": 1.3305, + "step": 8297 + }, + { + "epoch": 0.8753164556962025, + "grad_norm": 0.6109203696250916, + "learning_rate": 5.794608485198008e-05, + "loss": 1.3239, + "step": 8298 + }, + { + "epoch": 0.8754219409282701, + "grad_norm": 0.6226614117622375, + "learning_rate": 5.784935860715862e-05, + "loss": 1.3399, + "step": 8299 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.6390045285224915, + "learning_rate": 5.7752709921267855e-05, + "loss": 1.3188, + "step": 8300 + }, + { + "epoch": 0.8756329113924051, + "grad_norm": 0.6167485117912292, + "learning_rate": 5.7656138805137785e-05, + "loss": 1.3248, + "step": 8301 + }, + { + "epoch": 0.8757383966244726, + "grad_norm": 0.6906527280807495, + "learning_rate": 5.7559645269589764e-05, + "loss": 1.3026, + "step": 8302 + }, + { + "epoch": 0.87584388185654, + "grad_norm": 0.6235339641571045, + "learning_rate": 5.746322932543621e-05, + "loss": 1.2815, + "step": 8303 + }, + { + "epoch": 0.8759493670886076, + "grad_norm": 0.6252560615539551, + "learning_rate": 5.736689098348125e-05, + "loss": 1.321, + "step": 8304 + }, + { + "epoch": 0.8760548523206751, + "grad_norm": 0.6897796988487244, + "learning_rate": 5.727063025451973e-05, + "loss": 1.2879, + "step": 8305 + }, + { + "epoch": 0.8761603375527426, + "grad_norm": 0.6566361784934998, + "learning_rate": 5.717444714933845e-05, + "loss": 1.3248, + "step": 8306 + }, + { + "epoch": 0.8762658227848101, + "grad_norm": 0.6109856963157654, + "learning_rate": 5.707834167871512e-05, + "loss": 1.2861, + "step": 8307 + }, + { + "epoch": 0.8763713080168777, + "grad_norm": 0.6261648535728455, + "learning_rate": 5.698231385341887e-05, + "loss": 1.3238, + "step": 8308 + }, + { + "epoch": 0.8764767932489451, + "grad_norm": 0.6063156127929688, + "learning_rate": 5.6886363684210016e-05, + "loss": 1.2918, + "step": 8309 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.6310629844665527, + "learning_rate": 5.6790491181840294e-05, + "loss": 1.3219, + "step": 8310 + }, + { + "epoch": 0.8766877637130802, + "grad_norm": 0.6397956013679504, + "learning_rate": 5.6694696357052685e-05, + "loss": 1.3608, + "step": 8311 + }, + { + "epoch": 0.8767932489451477, + "grad_norm": 0.7234463095664978, + "learning_rate": 5.6598979220581434e-05, + "loss": 1.3325, + "step": 8312 + }, + { + "epoch": 0.8768987341772152, + "grad_norm": 0.6356079578399658, + "learning_rate": 5.650333978315223e-05, + "loss": 1.3457, + "step": 8313 + }, + { + "epoch": 0.8770042194092827, + "grad_norm": 0.6079540848731995, + "learning_rate": 5.640777805548181e-05, + "loss": 1.3112, + "step": 8314 + }, + { + "epoch": 0.8771097046413502, + "grad_norm": 0.7041782736778259, + "learning_rate": 5.631229404827845e-05, + "loss": 1.3241, + "step": 8315 + }, + { + "epoch": 0.8772151898734177, + "grad_norm": 0.6259835362434387, + "learning_rate": 5.6216887772241596e-05, + "loss": 1.3211, + "step": 8316 + }, + { + "epoch": 0.8773206751054853, + "grad_norm": 0.6678051352500916, + "learning_rate": 5.612155923806203e-05, + "loss": 1.3083, + "step": 8317 + }, + { + "epoch": 0.8774261603375527, + "grad_norm": 0.6748981475830078, + "learning_rate": 5.60263084564217e-05, + "loss": 1.328, + "step": 8318 + }, + { + "epoch": 0.8775316455696203, + "grad_norm": 0.6261250376701355, + "learning_rate": 5.5931135437993994e-05, + "loss": 1.3258, + "step": 8319 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.6249533295631409, + "learning_rate": 5.583604019344354e-05, + "loss": 1.3055, + "step": 8320 + }, + { + "epoch": 0.8777426160337553, + "grad_norm": 0.6229753494262695, + "learning_rate": 5.574102273342616e-05, + "loss": 1.294, + "step": 8321 + }, + { + "epoch": 0.8778481012658228, + "grad_norm": 0.6541988253593445, + "learning_rate": 5.5646083068589065e-05, + "loss": 1.3366, + "step": 8322 + }, + { + "epoch": 0.8779535864978903, + "grad_norm": 0.6695040464401245, + "learning_rate": 5.5551221209570596e-05, + "loss": 1.3274, + "step": 8323 + }, + { + "epoch": 0.8780590717299578, + "grad_norm": 0.6364715099334717, + "learning_rate": 5.5456437167000746e-05, + "loss": 1.3378, + "step": 8324 + }, + { + "epoch": 0.8781645569620253, + "grad_norm": 0.6140038371086121, + "learning_rate": 5.536173095150043e-05, + "loss": 1.3116, + "step": 8325 + }, + { + "epoch": 0.8782700421940929, + "grad_norm": 0.6288489699363708, + "learning_rate": 5.526710257368192e-05, + "loss": 1.3243, + "step": 8326 + }, + { + "epoch": 0.8783755274261603, + "grad_norm": 0.6191476583480835, + "learning_rate": 5.517255204414889e-05, + "loss": 1.3372, + "step": 8327 + }, + { + "epoch": 0.8784810126582279, + "grad_norm": 0.6660230755805969, + "learning_rate": 5.507807937349604e-05, + "loss": 1.3076, + "step": 8328 + }, + { + "epoch": 0.8785864978902953, + "grad_norm": 0.6168859601020813, + "learning_rate": 5.498368457230965e-05, + "loss": 1.2859, + "step": 8329 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.64420485496521, + "learning_rate": 5.4889367651167007e-05, + "loss": 1.321, + "step": 8330 + }, + { + "epoch": 0.8787974683544304, + "grad_norm": 0.6127023100852966, + "learning_rate": 5.479512862063674e-05, + "loss": 1.3369, + "step": 8331 + }, + { + "epoch": 0.8789029535864978, + "grad_norm": 0.633466899394989, + "learning_rate": 5.470096749127906e-05, + "loss": 1.3473, + "step": 8332 + }, + { + "epoch": 0.8790084388185654, + "grad_norm": 0.6086171865463257, + "learning_rate": 5.460688427364505e-05, + "loss": 1.3037, + "step": 8333 + }, + { + "epoch": 0.8791139240506329, + "grad_norm": 0.6576785445213318, + "learning_rate": 5.451287897827725e-05, + "loss": 1.2999, + "step": 8334 + }, + { + "epoch": 0.8792194092827004, + "grad_norm": 0.6854830384254456, + "learning_rate": 5.441895161570934e-05, + "loss": 1.3324, + "step": 8335 + }, + { + "epoch": 0.8793248945147679, + "grad_norm": 0.6170980930328369, + "learning_rate": 5.43251021964663e-05, + "loss": 1.2947, + "step": 8336 + }, + { + "epoch": 0.8794303797468355, + "grad_norm": 0.6450515389442444, + "learning_rate": 5.423133073106457e-05, + "loss": 1.3113, + "step": 8337 + }, + { + "epoch": 0.8795358649789029, + "grad_norm": 0.6647613644599915, + "learning_rate": 5.413763723001164e-05, + "loss": 1.3022, + "step": 8338 + }, + { + "epoch": 0.8796413502109705, + "grad_norm": 0.6350463628768921, + "learning_rate": 5.4044021703806375e-05, + "loss": 1.2376, + "step": 8339 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.6423353552818298, + "learning_rate": 5.3950484162938714e-05, + "loss": 1.2857, + "step": 8340 + }, + { + "epoch": 0.8798523206751054, + "grad_norm": 0.6289640665054321, + "learning_rate": 5.385702461789019e-05, + "loss": 1.3021, + "step": 8341 + }, + { + "epoch": 0.879957805907173, + "grad_norm": 0.6060901880264282, + "learning_rate": 5.376364307913334e-05, + "loss": 1.2992, + "step": 8342 + }, + { + "epoch": 0.8800632911392405, + "grad_norm": 0.601291835308075, + "learning_rate": 5.3670339557132045e-05, + "loss": 1.2687, + "step": 8343 + }, + { + "epoch": 0.880168776371308, + "grad_norm": 0.6167020797729492, + "learning_rate": 5.3577114062341446e-05, + "loss": 1.2889, + "step": 8344 + }, + { + "epoch": 0.8802742616033755, + "grad_norm": 0.6177312135696411, + "learning_rate": 5.348396660520785e-05, + "loss": 1.3292, + "step": 8345 + }, + { + "epoch": 0.8803797468354431, + "grad_norm": 0.6566522717475891, + "learning_rate": 5.339089719616891e-05, + "loss": 1.305, + "step": 8346 + }, + { + "epoch": 0.8804852320675105, + "grad_norm": 0.6285632252693176, + "learning_rate": 5.329790584565361e-05, + "loss": 1.3524, + "step": 8347 + }, + { + "epoch": 0.880590717299578, + "grad_norm": 0.6332507133483887, + "learning_rate": 5.320499256408204e-05, + "loss": 1.3161, + "step": 8348 + }, + { + "epoch": 0.8806962025316456, + "grad_norm": 0.6155449151992798, + "learning_rate": 5.311215736186536e-05, + "loss": 1.2969, + "step": 8349 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.6402378082275391, + "learning_rate": 5.3019400249406686e-05, + "loss": 1.3375, + "step": 8350 + }, + { + "epoch": 0.8809071729957806, + "grad_norm": 0.6094779372215271, + "learning_rate": 5.29267212370996e-05, + "loss": 1.3279, + "step": 8351 + }, + { + "epoch": 0.8810126582278481, + "grad_norm": 0.6298046112060547, + "learning_rate": 5.283412033532939e-05, + "loss": 1.2803, + "step": 8352 + }, + { + "epoch": 0.8811181434599156, + "grad_norm": 0.6555442214012146, + "learning_rate": 5.274159755447233e-05, + "loss": 1.2607, + "step": 8353 + }, + { + "epoch": 0.8812236286919831, + "grad_norm": 0.6513099074363708, + "learning_rate": 5.264915290489614e-05, + "loss": 1.2985, + "step": 8354 + }, + { + "epoch": 0.8813291139240507, + "grad_norm": 0.6758123636245728, + "learning_rate": 5.25567863969596e-05, + "loss": 1.2908, + "step": 8355 + }, + { + "epoch": 0.8814345991561181, + "grad_norm": 0.6286755204200745, + "learning_rate": 5.246449804101294e-05, + "loss": 1.3166, + "step": 8356 + }, + { + "epoch": 0.8815400843881857, + "grad_norm": 0.6281440258026123, + "learning_rate": 5.237228784739739e-05, + "loss": 1.3369, + "step": 8357 + }, + { + "epoch": 0.8816455696202532, + "grad_norm": 0.622675359249115, + "learning_rate": 5.228015582644585e-05, + "loss": 1.3097, + "step": 8358 + }, + { + "epoch": 0.8817510548523206, + "grad_norm": 0.6438984274864197, + "learning_rate": 5.21881019884819e-05, + "loss": 1.3518, + "step": 8359 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.6202324032783508, + "learning_rate": 5.209612634382077e-05, + "loss": 1.2931, + "step": 8360 + }, + { + "epoch": 0.8819620253164557, + "grad_norm": 0.6163958311080933, + "learning_rate": 5.2004228902768815e-05, + "loss": 1.326, + "step": 8361 + }, + { + "epoch": 0.8820675105485232, + "grad_norm": 0.6152327060699463, + "learning_rate": 5.191240967562347e-05, + "loss": 1.3549, + "step": 8362 + }, + { + "epoch": 0.8821729957805907, + "grad_norm": 0.6073678731918335, + "learning_rate": 5.182066867267357e-05, + "loss": 1.3152, + "step": 8363 + }, + { + "epoch": 0.8822784810126583, + "grad_norm": 0.6131789684295654, + "learning_rate": 5.172900590419915e-05, + "loss": 1.3269, + "step": 8364 + }, + { + "epoch": 0.8823839662447257, + "grad_norm": 0.6234472990036011, + "learning_rate": 5.1637421380471586e-05, + "loss": 1.3392, + "step": 8365 + }, + { + "epoch": 0.8824894514767933, + "grad_norm": 0.6679685711860657, + "learning_rate": 5.154591511175316e-05, + "loss": 1.2943, + "step": 8366 + }, + { + "epoch": 0.8825949367088608, + "grad_norm": 0.6084043383598328, + "learning_rate": 5.1454487108297924e-05, + "loss": 1.311, + "step": 8367 + }, + { + "epoch": 0.8827004219409282, + "grad_norm": 0.6221097111701965, + "learning_rate": 5.136313738035059e-05, + "loss": 1.3207, + "step": 8368 + }, + { + "epoch": 0.8828059071729958, + "grad_norm": 0.6083880662918091, + "learning_rate": 5.127186593814748e-05, + "loss": 1.2961, + "step": 8369 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.6080441474914551, + "learning_rate": 5.118067279191599e-05, + "loss": 1.2757, + "step": 8370 + }, + { + "epoch": 0.8830168776371308, + "grad_norm": 0.6156049966812134, + "learning_rate": 5.1089557951874696e-05, + "loss": 1.3025, + "step": 8371 + }, + { + "epoch": 0.8831223628691983, + "grad_norm": 0.6244804859161377, + "learning_rate": 5.0998521428233526e-05, + "loss": 1.3391, + "step": 8372 + }, + { + "epoch": 0.8832278481012659, + "grad_norm": 0.6531791687011719, + "learning_rate": 5.0907563231193556e-05, + "loss": 1.2982, + "step": 8373 + }, + { + "epoch": 0.8833333333333333, + "grad_norm": 0.6222841143608093, + "learning_rate": 5.081668337094713e-05, + "loss": 1.2991, + "step": 8374 + }, + { + "epoch": 0.8834388185654009, + "grad_norm": 0.6414284706115723, + "learning_rate": 5.072588185767763e-05, + "loss": 1.3448, + "step": 8375 + }, + { + "epoch": 0.8835443037974684, + "grad_norm": 0.6555367708206177, + "learning_rate": 5.063515870156013e-05, + "loss": 1.3606, + "step": 8376 + }, + { + "epoch": 0.8836497890295358, + "grad_norm": 0.6300396919250488, + "learning_rate": 5.054451391276035e-05, + "loss": 1.32, + "step": 8377 + }, + { + "epoch": 0.8837552742616034, + "grad_norm": 0.6171274185180664, + "learning_rate": 5.045394750143567e-05, + "loss": 1.3038, + "step": 8378 + }, + { + "epoch": 0.8838607594936709, + "grad_norm": 0.6138423085212708, + "learning_rate": 5.0363459477734464e-05, + "loss": 1.3, + "step": 8379 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.6640437841415405, + "learning_rate": 5.0273049851796205e-05, + "loss": 1.3428, + "step": 8380 + }, + { + "epoch": 0.8840717299578059, + "grad_norm": 0.6035593152046204, + "learning_rate": 5.0182718633751954e-05, + "loss": 1.2949, + "step": 8381 + }, + { + "epoch": 0.8841772151898735, + "grad_norm": 0.6118256449699402, + "learning_rate": 5.009246583372362e-05, + "loss": 1.3361, + "step": 8382 + }, + { + "epoch": 0.8842827004219409, + "grad_norm": 0.6810294985771179, + "learning_rate": 5.000229146182453e-05, + "loss": 1.3041, + "step": 8383 + }, + { + "epoch": 0.8843881856540085, + "grad_norm": 0.6302932500839233, + "learning_rate": 4.9912195528159174e-05, + "loss": 1.304, + "step": 8384 + }, + { + "epoch": 0.884493670886076, + "grad_norm": 0.6194151043891907, + "learning_rate": 4.982217804282332e-05, + "loss": 1.3342, + "step": 8385 + }, + { + "epoch": 0.8845991561181434, + "grad_norm": 0.6530686020851135, + "learning_rate": 4.973223901590382e-05, + "loss": 1.3182, + "step": 8386 + }, + { + "epoch": 0.884704641350211, + "grad_norm": 0.6796833276748657, + "learning_rate": 4.9642378457478847e-05, + "loss": 1.3082, + "step": 8387 + }, + { + "epoch": 0.8848101265822785, + "grad_norm": 0.6354828476905823, + "learning_rate": 4.955259637761761e-05, + "loss": 1.2785, + "step": 8388 + }, + { + "epoch": 0.884915611814346, + "grad_norm": 0.600785493850708, + "learning_rate": 4.946289278638064e-05, + "loss": 1.2894, + "step": 8389 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.6073634624481201, + "learning_rate": 4.9373267693819805e-05, + "loss": 1.3239, + "step": 8390 + }, + { + "epoch": 0.8851265822784811, + "grad_norm": 0.6308258175849915, + "learning_rate": 4.928372110997792e-05, + "loss": 1.3387, + "step": 8391 + }, + { + "epoch": 0.8852320675105485, + "grad_norm": 0.6572723984718323, + "learning_rate": 4.9194253044889117e-05, + "loss": 1.2952, + "step": 8392 + }, + { + "epoch": 0.885337552742616, + "grad_norm": 0.6877081394195557, + "learning_rate": 4.910486350857887e-05, + "loss": 1.3142, + "step": 8393 + }, + { + "epoch": 0.8854430379746835, + "grad_norm": 0.6710333824157715, + "learning_rate": 4.90155525110636e-05, + "loss": 1.3021, + "step": 8394 + }, + { + "epoch": 0.885548523206751, + "grad_norm": 0.6361010074615479, + "learning_rate": 4.89263200623512e-05, + "loss": 1.3, + "step": 8395 + }, + { + "epoch": 0.8856540084388186, + "grad_norm": 0.625547468662262, + "learning_rate": 4.883716617244044e-05, + "loss": 1.32, + "step": 8396 + }, + { + "epoch": 0.885759493670886, + "grad_norm": 0.6225746273994446, + "learning_rate": 4.874809085132148e-05, + "loss": 1.3163, + "step": 8397 + }, + { + "epoch": 0.8858649789029536, + "grad_norm": 0.622203528881073, + "learning_rate": 4.865909410897576e-05, + "loss": 1.2911, + "step": 8398 + }, + { + "epoch": 0.8859704641350211, + "grad_norm": 0.6891542077064514, + "learning_rate": 4.8570175955375715e-05, + "loss": 1.3292, + "step": 8399 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.6484179496765137, + "learning_rate": 4.848133640048513e-05, + "loss": 1.3088, + "step": 8400 + }, + { + "epoch": 0.8861814345991561, + "grad_norm": 0.6203246712684631, + "learning_rate": 4.839257545425879e-05, + "loss": 1.3268, + "step": 8401 + }, + { + "epoch": 0.8862869198312237, + "grad_norm": 0.6495834589004517, + "learning_rate": 4.830389312664299e-05, + "loss": 1.3089, + "step": 8402 + }, + { + "epoch": 0.8863924050632911, + "grad_norm": 0.6263790130615234, + "learning_rate": 4.821528942757494e-05, + "loss": 1.339, + "step": 8403 + }, + { + "epoch": 0.8864978902953586, + "grad_norm": 0.6108188629150391, + "learning_rate": 4.8126764366983126e-05, + "loss": 1.3303, + "step": 8404 + }, + { + "epoch": 0.8866033755274262, + "grad_norm": 0.7610468864440918, + "learning_rate": 4.803831795478719e-05, + "loss": 1.3284, + "step": 8405 + }, + { + "epoch": 0.8867088607594936, + "grad_norm": 0.6519230604171753, + "learning_rate": 4.794995020089804e-05, + "loss": 1.3423, + "step": 8406 + }, + { + "epoch": 0.8868143459915612, + "grad_norm": 0.656310498714447, + "learning_rate": 4.7861661115217754e-05, + "loss": 1.349, + "step": 8407 + }, + { + "epoch": 0.8869198312236287, + "grad_norm": 0.6084714531898499, + "learning_rate": 4.7773450707639414e-05, + "loss": 1.3211, + "step": 8408 + }, + { + "epoch": 0.8870253164556962, + "grad_norm": 0.6502481698989868, + "learning_rate": 4.768531898804754e-05, + "loss": 1.351, + "step": 8409 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.6394721865653992, + "learning_rate": 4.75972659663178e-05, + "loss": 1.3017, + "step": 8410 + }, + { + "epoch": 0.8872362869198313, + "grad_norm": 0.6539206504821777, + "learning_rate": 4.75092916523169e-05, + "loss": 1.3479, + "step": 8411 + }, + { + "epoch": 0.8873417721518987, + "grad_norm": 0.6104992628097534, + "learning_rate": 4.742139605590279e-05, + "loss": 1.3032, + "step": 8412 + }, + { + "epoch": 0.8874472573839662, + "grad_norm": 0.6258765459060669, + "learning_rate": 4.733357918692466e-05, + "loss": 1.3447, + "step": 8413 + }, + { + "epoch": 0.8875527426160338, + "grad_norm": 0.6255026459693909, + "learning_rate": 4.7245841055222726e-05, + "loss": 1.323, + "step": 8414 + }, + { + "epoch": 0.8876582278481012, + "grad_norm": 0.6356685161590576, + "learning_rate": 4.715818167062863e-05, + "loss": 1.3267, + "step": 8415 + }, + { + "epoch": 0.8877637130801688, + "grad_norm": 0.6175923347473145, + "learning_rate": 4.7070601042964925e-05, + "loss": 1.2842, + "step": 8416 + }, + { + "epoch": 0.8878691983122363, + "grad_norm": 0.613161563873291, + "learning_rate": 4.698309918204552e-05, + "loss": 1.3079, + "step": 8417 + }, + { + "epoch": 0.8879746835443038, + "grad_norm": 0.6324964165687561, + "learning_rate": 4.6895676097675225e-05, + "loss": 1.3165, + "step": 8418 + }, + { + "epoch": 0.8880801687763713, + "grad_norm": 0.628699779510498, + "learning_rate": 4.680833179965063e-05, + "loss": 1.3061, + "step": 8419 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.614382266998291, + "learning_rate": 4.672106629775882e-05, + "loss": 1.331, + "step": 8420 + }, + { + "epoch": 0.8882911392405063, + "grad_norm": 0.593723475933075, + "learning_rate": 4.663387960177848e-05, + "loss": 1.3172, + "step": 8421 + }, + { + "epoch": 0.8883966244725738, + "grad_norm": 0.6195310950279236, + "learning_rate": 4.654677172147912e-05, + "loss": 1.3154, + "step": 8422 + }, + { + "epoch": 0.8885021097046414, + "grad_norm": 0.6290426254272461, + "learning_rate": 4.645974266662176e-05, + "loss": 1.3404, + "step": 8423 + }, + { + "epoch": 0.8886075949367088, + "grad_norm": 0.6101723313331604, + "learning_rate": 4.637279244695844e-05, + "loss": 1.2798, + "step": 8424 + }, + { + "epoch": 0.8887130801687764, + "grad_norm": 0.6157339811325073, + "learning_rate": 4.628592107223229e-05, + "loss": 1.2957, + "step": 8425 + }, + { + "epoch": 0.8888185654008439, + "grad_norm": 0.6378819942474365, + "learning_rate": 4.6199128552177756e-05, + "loss": 1.2789, + "step": 8426 + }, + { + "epoch": 0.8889240506329114, + "grad_norm": 0.626876950263977, + "learning_rate": 4.611241489652016e-05, + "loss": 1.3585, + "step": 8427 + }, + { + "epoch": 0.8890295358649789, + "grad_norm": 0.673676609992981, + "learning_rate": 4.6025780114976545e-05, + "loss": 1.3107, + "step": 8428 + }, + { + "epoch": 0.8891350210970465, + "grad_norm": 0.6174055337905884, + "learning_rate": 4.5939224217254574e-05, + "loss": 1.3277, + "step": 8429 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.6288341879844666, + "learning_rate": 4.585274721305333e-05, + "loss": 1.3024, + "step": 8430 + }, + { + "epoch": 0.8893459915611814, + "grad_norm": 0.6168195605278015, + "learning_rate": 4.576634911206296e-05, + "loss": 1.2877, + "step": 8431 + }, + { + "epoch": 0.889451476793249, + "grad_norm": 0.6164981722831726, + "learning_rate": 4.5680029923964724e-05, + "loss": 1.2883, + "step": 8432 + }, + { + "epoch": 0.8895569620253164, + "grad_norm": 0.6298699378967285, + "learning_rate": 4.559378965843122e-05, + "loss": 1.2763, + "step": 8433 + }, + { + "epoch": 0.889662447257384, + "grad_norm": 0.6122069954872131, + "learning_rate": 4.5507628325126144e-05, + "loss": 1.2862, + "step": 8434 + }, + { + "epoch": 0.8897679324894515, + "grad_norm": 0.6448633074760437, + "learning_rate": 4.542154593370401e-05, + "loss": 1.3361, + "step": 8435 + }, + { + "epoch": 0.889873417721519, + "grad_norm": 0.6212340593338013, + "learning_rate": 4.533554249381119e-05, + "loss": 1.3289, + "step": 8436 + }, + { + "epoch": 0.8899789029535865, + "grad_norm": 0.6761865615844727, + "learning_rate": 4.524961801508456e-05, + "loss": 1.2914, + "step": 8437 + }, + { + "epoch": 0.890084388185654, + "grad_norm": 0.628122091293335, + "learning_rate": 4.5163772507152425e-05, + "loss": 1.3292, + "step": 8438 + }, + { + "epoch": 0.8901898734177215, + "grad_norm": 0.6437263488769531, + "learning_rate": 4.507800597963424e-05, + "loss": 1.3202, + "step": 8439 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.6263060569763184, + "learning_rate": 4.4992318442140575e-05, + "loss": 1.3187, + "step": 8440 + }, + { + "epoch": 0.8904008438818566, + "grad_norm": 0.6034370064735413, + "learning_rate": 4.490670990427309e-05, + "loss": 1.309, + "step": 8441 + }, + { + "epoch": 0.890506329113924, + "grad_norm": 0.6195954084396362, + "learning_rate": 4.4821180375624684e-05, + "loss": 1.295, + "step": 8442 + }, + { + "epoch": 0.8906118143459916, + "grad_norm": 0.6239821910858154, + "learning_rate": 4.473572986577928e-05, + "loss": 1.3246, + "step": 8443 + }, + { + "epoch": 0.8907172995780591, + "grad_norm": 0.6252005100250244, + "learning_rate": 4.4650358384312056e-05, + "loss": 1.3048, + "step": 8444 + }, + { + "epoch": 0.8908227848101266, + "grad_norm": 0.6101325154304504, + "learning_rate": 4.4565065940789515e-05, + "loss": 1.2848, + "step": 8445 + }, + { + "epoch": 0.8909282700421941, + "grad_norm": 0.6100523471832275, + "learning_rate": 4.447985254476894e-05, + "loss": 1.3151, + "step": 8446 + }, + { + "epoch": 0.8910337552742617, + "grad_norm": 0.6403098106384277, + "learning_rate": 4.439471820579885e-05, + "loss": 1.3392, + "step": 8447 + }, + { + "epoch": 0.8911392405063291, + "grad_norm": 0.6153309345245361, + "learning_rate": 4.430966293341912e-05, + "loss": 1.3448, + "step": 8448 + }, + { + "epoch": 0.8912447257383966, + "grad_norm": 0.6698305010795593, + "learning_rate": 4.422468673716054e-05, + "loss": 1.3372, + "step": 8449 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.6779007315635681, + "learning_rate": 4.413978962654508e-05, + "loss": 1.3077, + "step": 8450 + }, + { + "epoch": 0.8914556962025316, + "grad_norm": 0.6371434330940247, + "learning_rate": 4.405497161108596e-05, + "loss": 1.2982, + "step": 8451 + }, + { + "epoch": 0.8915611814345992, + "grad_norm": 0.622286319732666, + "learning_rate": 4.397023270028749e-05, + "loss": 1.319, + "step": 8452 + }, + { + "epoch": 0.8916666666666667, + "grad_norm": 0.6339314579963684, + "learning_rate": 4.388557290364484e-05, + "loss": 1.307, + "step": 8453 + }, + { + "epoch": 0.8917721518987342, + "grad_norm": 0.6145379543304443, + "learning_rate": 4.3800992230644904e-05, + "loss": 1.3366, + "step": 8454 + }, + { + "epoch": 0.8918776371308017, + "grad_norm": 0.6522381901741028, + "learning_rate": 4.3716490690765194e-05, + "loss": 1.272, + "step": 8455 + }, + { + "epoch": 0.8919831223628693, + "grad_norm": 0.6200320720672607, + "learning_rate": 4.3632068293474545e-05, + "loss": 1.3253, + "step": 8456 + }, + { + "epoch": 0.8920886075949367, + "grad_norm": 0.5967192053794861, + "learning_rate": 4.35477250482329e-05, + "loss": 1.2895, + "step": 8457 + }, + { + "epoch": 0.8921940928270042, + "grad_norm": 0.6039353013038635, + "learning_rate": 4.346346096449136e-05, + "loss": 1.3082, + "step": 8458 + }, + { + "epoch": 0.8922995780590718, + "grad_norm": 0.6168708205223083, + "learning_rate": 4.337927605169212e-05, + "loss": 1.3197, + "step": 8459 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.6572287678718567, + "learning_rate": 4.3295170319268554e-05, + "loss": 1.3298, + "step": 8460 + }, + { + "epoch": 0.8925105485232068, + "grad_norm": 0.6600645780563354, + "learning_rate": 4.321114377664495e-05, + "loss": 1.3289, + "step": 8461 + }, + { + "epoch": 0.8926160337552742, + "grad_norm": 0.6161590814590454, + "learning_rate": 4.3127196433237205e-05, + "loss": 1.3125, + "step": 8462 + }, + { + "epoch": 0.8927215189873418, + "grad_norm": 0.6262239217758179, + "learning_rate": 4.304332829845187e-05, + "loss": 1.3044, + "step": 8463 + }, + { + "epoch": 0.8928270042194093, + "grad_norm": 0.6224894523620605, + "learning_rate": 4.2959539381686843e-05, + "loss": 1.3034, + "step": 8464 + }, + { + "epoch": 0.8929324894514767, + "grad_norm": 0.6110442876815796, + "learning_rate": 4.287582969233103e-05, + "loss": 1.3197, + "step": 8465 + }, + { + "epoch": 0.8930379746835443, + "grad_norm": 0.6420882344245911, + "learning_rate": 4.279219923976452e-05, + "loss": 1.2979, + "step": 8466 + }, + { + "epoch": 0.8931434599156118, + "grad_norm": 0.638904333114624, + "learning_rate": 4.2708648033358554e-05, + "loss": 1.3146, + "step": 8467 + }, + { + "epoch": 0.8932489451476793, + "grad_norm": 0.640733540058136, + "learning_rate": 4.26251760824754e-05, + "loss": 1.2975, + "step": 8468 + }, + { + "epoch": 0.8933544303797468, + "grad_norm": 0.6280511617660522, + "learning_rate": 4.2541783396468584e-05, + "loss": 1.3266, + "step": 8469 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.6072341799736023, + "learning_rate": 4.245846998468261e-05, + "loss": 1.3301, + "step": 8470 + }, + { + "epoch": 0.8935654008438818, + "grad_norm": 0.6681113243103027, + "learning_rate": 4.2375235856453197e-05, + "loss": 1.3058, + "step": 8471 + }, + { + "epoch": 0.8936708860759494, + "grad_norm": 0.6125616431236267, + "learning_rate": 4.229208102110721e-05, + "loss": 1.3027, + "step": 8472 + }, + { + "epoch": 0.8937763713080169, + "grad_norm": 0.6074202060699463, + "learning_rate": 4.220900548796244e-05, + "loss": 1.3298, + "step": 8473 + }, + { + "epoch": 0.8938818565400843, + "grad_norm": 0.5956602692604065, + "learning_rate": 4.212600926632804e-05, + "loss": 1.294, + "step": 8474 + }, + { + "epoch": 0.8939873417721519, + "grad_norm": 0.6384235620498657, + "learning_rate": 4.204309236550405e-05, + "loss": 1.3275, + "step": 8475 + }, + { + "epoch": 0.8940928270042194, + "grad_norm": 0.5920202136039734, + "learning_rate": 4.1960254794781714e-05, + "loss": 1.2669, + "step": 8476 + }, + { + "epoch": 0.8941983122362869, + "grad_norm": 0.6285606026649475, + "learning_rate": 4.1877496563443446e-05, + "loss": 1.3225, + "step": 8477 + }, + { + "epoch": 0.8943037974683544, + "grad_norm": 0.6076601147651672, + "learning_rate": 4.179481768076274e-05, + "loss": 1.3126, + "step": 8478 + }, + { + "epoch": 0.894409282700422, + "grad_norm": 0.6732480525970459, + "learning_rate": 4.1712218156004014e-05, + "loss": 1.2875, + "step": 8479 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.6128367185592651, + "learning_rate": 4.16296979984232e-05, + "loss": 1.3116, + "step": 8480 + }, + { + "epoch": 0.894620253164557, + "grad_norm": 0.5952823162078857, + "learning_rate": 4.154725721726699e-05, + "loss": 1.3277, + "step": 8481 + }, + { + "epoch": 0.8947257383966245, + "grad_norm": 0.6203493475914001, + "learning_rate": 4.1464895821773235e-05, + "loss": 1.3286, + "step": 8482 + }, + { + "epoch": 0.8948312236286919, + "grad_norm": 0.6441717743873596, + "learning_rate": 4.138261382117098e-05, + "loss": 1.3155, + "step": 8483 + }, + { + "epoch": 0.8949367088607595, + "grad_norm": 0.6192573308944702, + "learning_rate": 4.130041122468042e-05, + "loss": 1.2994, + "step": 8484 + }, + { + "epoch": 0.895042194092827, + "grad_norm": 0.6288093328475952, + "learning_rate": 4.1218288041512534e-05, + "loss": 1.3026, + "step": 8485 + }, + { + "epoch": 0.8951476793248945, + "grad_norm": 0.6012380123138428, + "learning_rate": 4.113624428086987e-05, + "loss": 1.3212, + "step": 8486 + }, + { + "epoch": 0.895253164556962, + "grad_norm": 0.6179066896438599, + "learning_rate": 4.105427995194566e-05, + "loss": 1.3444, + "step": 8487 + }, + { + "epoch": 0.8953586497890296, + "grad_norm": 0.5829065442085266, + "learning_rate": 4.0972395063924554e-05, + "loss": 1.2916, + "step": 8488 + }, + { + "epoch": 0.895464135021097, + "grad_norm": 0.6412169337272644, + "learning_rate": 4.089058962598213e-05, + "loss": 1.307, + "step": 8489 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.5985771417617798, + "learning_rate": 4.080886364728506e-05, + "loss": 1.2884, + "step": 8490 + }, + { + "epoch": 0.8956751054852321, + "grad_norm": 0.6250937581062317, + "learning_rate": 4.072721713699118e-05, + "loss": 1.2958, + "step": 8491 + }, + { + "epoch": 0.8957805907172995, + "grad_norm": 0.7018920183181763, + "learning_rate": 4.064565010424942e-05, + "loss": 1.283, + "step": 8492 + }, + { + "epoch": 0.8958860759493671, + "grad_norm": 0.6626452803611755, + "learning_rate": 4.056416255819964e-05, + "loss": 1.2704, + "step": 8493 + }, + { + "epoch": 0.8959915611814346, + "grad_norm": 0.6776371002197266, + "learning_rate": 4.048275450797312e-05, + "loss": 1.2832, + "step": 8494 + }, + { + "epoch": 0.8960970464135021, + "grad_norm": 0.6630907654762268, + "learning_rate": 4.0401425962691804e-05, + "loss": 1.3649, + "step": 8495 + }, + { + "epoch": 0.8962025316455696, + "grad_norm": 0.6497223377227783, + "learning_rate": 4.032017693146908e-05, + "loss": 1.3109, + "step": 8496 + }, + { + "epoch": 0.8963080168776372, + "grad_norm": 0.6432156562805176, + "learning_rate": 4.023900742340941e-05, + "loss": 1.3113, + "step": 8497 + }, + { + "epoch": 0.8964135021097046, + "grad_norm": 0.6026905179023743, + "learning_rate": 4.015791744760811e-05, + "loss": 1.2938, + "step": 8498 + }, + { + "epoch": 0.8965189873417722, + "grad_norm": 0.6233720183372498, + "learning_rate": 4.0076907013151726e-05, + "loss": 1.316, + "step": 8499 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.6116101145744324, + "learning_rate": 3.999597612911793e-05, + "loss": 1.3119, + "step": 8500 + }, + { + "epoch": 0.8967299578059071, + "grad_norm": 0.6542644500732422, + "learning_rate": 3.991512480457546e-05, + "loss": 1.318, + "step": 8501 + }, + { + "epoch": 0.8968354430379747, + "grad_norm": 0.7131710648536682, + "learning_rate": 3.9834353048583984e-05, + "loss": 1.3334, + "step": 8502 + }, + { + "epoch": 0.8969409282700422, + "grad_norm": 0.6662569046020508, + "learning_rate": 3.9753660870194524e-05, + "loss": 1.2861, + "step": 8503 + }, + { + "epoch": 0.8970464135021097, + "grad_norm": 0.6009713411331177, + "learning_rate": 3.967304827844892e-05, + "loss": 1.3194, + "step": 8504 + }, + { + "epoch": 0.8971518987341772, + "grad_norm": 0.6611383557319641, + "learning_rate": 3.95925152823802e-05, + "loss": 1.2993, + "step": 8505 + }, + { + "epoch": 0.8972573839662448, + "grad_norm": 0.6146872043609619, + "learning_rate": 3.9512061891012643e-05, + "loss": 1.2884, + "step": 8506 + }, + { + "epoch": 0.8973628691983122, + "grad_norm": 0.6127288937568665, + "learning_rate": 3.943168811336137e-05, + "loss": 1.3185, + "step": 8507 + }, + { + "epoch": 0.8974683544303798, + "grad_norm": 0.6289463639259338, + "learning_rate": 3.93513939584326e-05, + "loss": 1.3414, + "step": 8508 + }, + { + "epoch": 0.8975738396624473, + "grad_norm": 0.6471593379974365, + "learning_rate": 3.927117943522379e-05, + "loss": 1.3349, + "step": 8509 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.621597170829773, + "learning_rate": 3.9191044552723345e-05, + "loss": 1.2916, + "step": 8510 + }, + { + "epoch": 0.8977848101265823, + "grad_norm": 0.6214932203292847, + "learning_rate": 3.911098931991075e-05, + "loss": 1.2935, + "step": 8511 + }, + { + "epoch": 0.8978902953586498, + "grad_norm": 0.67154860496521, + "learning_rate": 3.9031013745756655e-05, + "loss": 1.304, + "step": 8512 + }, + { + "epoch": 0.8979957805907173, + "grad_norm": 0.6181343793869019, + "learning_rate": 3.895111783922256e-05, + "loss": 1.2972, + "step": 8513 + }, + { + "epoch": 0.8981012658227848, + "grad_norm": 0.6238806247711182, + "learning_rate": 3.887130160926139e-05, + "loss": 1.2786, + "step": 8514 + }, + { + "epoch": 0.8982067510548524, + "grad_norm": 0.6033586859703064, + "learning_rate": 3.879156506481699e-05, + "loss": 1.3111, + "step": 8515 + }, + { + "epoch": 0.8983122362869198, + "grad_norm": 0.603122889995575, + "learning_rate": 3.8711908214824035e-05, + "loss": 1.3278, + "step": 8516 + }, + { + "epoch": 0.8984177215189874, + "grad_norm": 0.6248158812522888, + "learning_rate": 3.863233106820857e-05, + "loss": 1.322, + "step": 8517 + }, + { + "epoch": 0.8985232067510549, + "grad_norm": 0.6175407767295837, + "learning_rate": 3.855283363388762e-05, + "loss": 1.3123, + "step": 8518 + }, + { + "epoch": 0.8986286919831223, + "grad_norm": 0.6014524102210999, + "learning_rate": 3.8473415920769304e-05, + "loss": 1.2886, + "step": 8519 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.6376110315322876, + "learning_rate": 3.839407793775268e-05, + "loss": 1.3343, + "step": 8520 + }, + { + "epoch": 0.8988396624472574, + "grad_norm": 0.6327964663505554, + "learning_rate": 3.8314819693727966e-05, + "loss": 1.3174, + "step": 8521 + }, + { + "epoch": 0.8989451476793249, + "grad_norm": 0.6029228568077087, + "learning_rate": 3.823564119757647e-05, + "loss": 1.3292, + "step": 8522 + }, + { + "epoch": 0.8990506329113924, + "grad_norm": 0.6049855947494507, + "learning_rate": 3.81565424581706e-05, + "loss": 1.3219, + "step": 8523 + }, + { + "epoch": 0.89915611814346, + "grad_norm": 0.6239874362945557, + "learning_rate": 3.8077523484373764e-05, + "loss": 1.2938, + "step": 8524 + }, + { + "epoch": 0.8992616033755274, + "grad_norm": 0.6360035538673401, + "learning_rate": 3.79985842850403e-05, + "loss": 1.3421, + "step": 8525 + }, + { + "epoch": 0.899367088607595, + "grad_norm": 0.675399899482727, + "learning_rate": 3.791972486901596e-05, + "loss": 1.3575, + "step": 8526 + }, + { + "epoch": 0.8994725738396624, + "grad_norm": 0.6265740990638733, + "learning_rate": 3.784094524513709e-05, + "loss": 1.3123, + "step": 8527 + }, + { + "epoch": 0.8995780590717299, + "grad_norm": 0.6286106109619141, + "learning_rate": 3.7762245422231476e-05, + "loss": 1.332, + "step": 8528 + }, + { + "epoch": 0.8996835443037975, + "grad_norm": 0.6393797397613525, + "learning_rate": 3.768362540911788e-05, + "loss": 1.2922, + "step": 8529 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.6130082011222839, + "learning_rate": 3.760508521460584e-05, + "loss": 1.342, + "step": 8530 + }, + { + "epoch": 0.8998945147679325, + "grad_norm": 0.6028724908828735, + "learning_rate": 3.7526624847496335e-05, + "loss": 1.3096, + "step": 8531 + }, + { + "epoch": 0.9, + "grad_norm": 0.6308325529098511, + "learning_rate": 3.744824431658131e-05, + "loss": 1.3075, + "step": 8532 + }, + { + "epoch": 0.9001054852320675, + "grad_norm": 0.6385723352432251, + "learning_rate": 3.736994363064358e-05, + "loss": 1.2924, + "step": 8533 + }, + { + "epoch": 0.900210970464135, + "grad_norm": 0.6355554461479187, + "learning_rate": 3.7291722798457215e-05, + "loss": 1.342, + "step": 8534 + }, + { + "epoch": 0.9003164556962026, + "grad_norm": 0.6139175295829773, + "learning_rate": 3.72135818287872e-05, + "loss": 1.3045, + "step": 8535 + }, + { + "epoch": 0.90042194092827, + "grad_norm": 0.6158131957054138, + "learning_rate": 3.713552073038953e-05, + "loss": 1.3373, + "step": 8536 + }, + { + "epoch": 0.9005274261603375, + "grad_norm": 0.613325834274292, + "learning_rate": 3.705753951201146e-05, + "loss": 1.3501, + "step": 8537 + }, + { + "epoch": 0.9006329113924051, + "grad_norm": 0.6205823421478271, + "learning_rate": 3.697963818239117e-05, + "loss": 1.3162, + "step": 8538 + }, + { + "epoch": 0.9007383966244725, + "grad_norm": 0.6277465224266052, + "learning_rate": 3.690181675025775e-05, + "loss": 1.2949, + "step": 8539 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.6077624559402466, + "learning_rate": 3.682407522433173e-05, + "loss": 1.3339, + "step": 8540 + }, + { + "epoch": 0.9009493670886076, + "grad_norm": 0.6083143949508667, + "learning_rate": 3.674641361332423e-05, + "loss": 1.3196, + "step": 8541 + }, + { + "epoch": 0.9010548523206751, + "grad_norm": 0.6211116909980774, + "learning_rate": 3.66688319259377e-05, + "loss": 1.3182, + "step": 8542 + }, + { + "epoch": 0.9011603375527426, + "grad_norm": 0.6186180710792542, + "learning_rate": 3.6591330170865524e-05, + "loss": 1.2985, + "step": 8543 + }, + { + "epoch": 0.9012658227848102, + "grad_norm": 0.6254827976226807, + "learning_rate": 3.6513908356792244e-05, + "loss": 1.3403, + "step": 8544 + }, + { + "epoch": 0.9013713080168776, + "grad_norm": 0.619455099105835, + "learning_rate": 3.643656649239327e-05, + "loss": 1.2758, + "step": 8545 + }, + { + "epoch": 0.9014767932489451, + "grad_norm": 0.6480780243873596, + "learning_rate": 3.635930458633516e-05, + "loss": 1.3084, + "step": 8546 + }, + { + "epoch": 0.9015822784810127, + "grad_norm": 0.6163184642791748, + "learning_rate": 3.628212264727548e-05, + "loss": 1.2966, + "step": 8547 + }, + { + "epoch": 0.9016877637130801, + "grad_norm": 0.5907608866691589, + "learning_rate": 3.6205020683862836e-05, + "loss": 1.345, + "step": 8548 + }, + { + "epoch": 0.9017932489451477, + "grad_norm": 0.6175271272659302, + "learning_rate": 3.612799870473696e-05, + "loss": 1.2817, + "step": 8549 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.6300888061523438, + "learning_rate": 3.605105671852854e-05, + "loss": 1.3365, + "step": 8550 + }, + { + "epoch": 0.9020042194092827, + "grad_norm": 0.6673404574394226, + "learning_rate": 3.597419473385935e-05, + "loss": 1.2993, + "step": 8551 + }, + { + "epoch": 0.9021097046413502, + "grad_norm": 0.6135039925575256, + "learning_rate": 3.5897412759342e-05, + "loss": 1.2928, + "step": 8552 + }, + { + "epoch": 0.9022151898734178, + "grad_norm": 0.6664649248123169, + "learning_rate": 3.582071080358043e-05, + "loss": 1.3162, + "step": 8553 + }, + { + "epoch": 0.9023206751054852, + "grad_norm": 0.6027608513832092, + "learning_rate": 3.5744088875169446e-05, + "loss": 1.3266, + "step": 8554 + }, + { + "epoch": 0.9024261603375527, + "grad_norm": 0.6478785872459412, + "learning_rate": 3.566754698269492e-05, + "loss": 1.3161, + "step": 8555 + }, + { + "epoch": 0.9025316455696203, + "grad_norm": 0.6349108815193176, + "learning_rate": 3.5591085134733666e-05, + "loss": 1.3227, + "step": 8556 + }, + { + "epoch": 0.9026371308016877, + "grad_norm": 0.6515316963195801, + "learning_rate": 3.5514703339853656e-05, + "loss": 1.2557, + "step": 8557 + }, + { + "epoch": 0.9027426160337553, + "grad_norm": 0.6215649843215942, + "learning_rate": 3.543840160661396e-05, + "loss": 1.2859, + "step": 8558 + }, + { + "epoch": 0.9028481012658228, + "grad_norm": 0.6566869020462036, + "learning_rate": 3.5362179943564496e-05, + "loss": 1.3266, + "step": 8559 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.6172232031822205, + "learning_rate": 3.528603835924626e-05, + "loss": 1.2946, + "step": 8560 + }, + { + "epoch": 0.9030590717299578, + "grad_norm": 0.6009508371353149, + "learning_rate": 3.520997686219127e-05, + "loss": 1.3212, + "step": 8561 + }, + { + "epoch": 0.9031645569620254, + "grad_norm": 0.6050061583518982, + "learning_rate": 3.513399546092269e-05, + "loss": 1.343, + "step": 8562 + }, + { + "epoch": 0.9032700421940928, + "grad_norm": 0.6203808784484863, + "learning_rate": 3.5058094163954556e-05, + "loss": 1.3064, + "step": 8563 + }, + { + "epoch": 0.9033755274261603, + "grad_norm": 0.7397180795669556, + "learning_rate": 3.498227297979198e-05, + "loss": 1.2987, + "step": 8564 + }, + { + "epoch": 0.9034810126582279, + "grad_norm": 0.6808995008468628, + "learning_rate": 3.4906531916931075e-05, + "loss": 1.3192, + "step": 8565 + }, + { + "epoch": 0.9035864978902953, + "grad_norm": 0.6258946061134338, + "learning_rate": 3.483087098385906e-05, + "loss": 1.3469, + "step": 8566 + }, + { + "epoch": 0.9036919831223629, + "grad_norm": 0.6063165068626404, + "learning_rate": 3.475529018905416e-05, + "loss": 1.3296, + "step": 8567 + }, + { + "epoch": 0.9037974683544304, + "grad_norm": 0.6128347516059875, + "learning_rate": 3.467978954098549e-05, + "loss": 1.2946, + "step": 8568 + }, + { + "epoch": 0.9039029535864979, + "grad_norm": 0.616701066493988, + "learning_rate": 3.46043690481134e-05, + "loss": 1.3222, + "step": 8569 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.7000483870506287, + "learning_rate": 3.4529028718888935e-05, + "loss": 1.3369, + "step": 8570 + }, + { + "epoch": 0.904113924050633, + "grad_norm": 0.640991747379303, + "learning_rate": 3.4453768561754525e-05, + "loss": 1.3101, + "step": 8571 + }, + { + "epoch": 0.9042194092827004, + "grad_norm": 0.5996700525283813, + "learning_rate": 3.437858858514334e-05, + "loss": 1.2873, + "step": 8572 + }, + { + "epoch": 0.9043248945147679, + "grad_norm": 0.6215742826461792, + "learning_rate": 3.43034887974798e-05, + "loss": 1.3093, + "step": 8573 + }, + { + "epoch": 0.9044303797468355, + "grad_norm": 0.6451358795166016, + "learning_rate": 3.422846920717893e-05, + "loss": 1.3298, + "step": 8574 + }, + { + "epoch": 0.9045358649789029, + "grad_norm": 0.6245017647743225, + "learning_rate": 3.4153529822647414e-05, + "loss": 1.3173, + "step": 8575 + }, + { + "epoch": 0.9046413502109705, + "grad_norm": 0.7295987010002136, + "learning_rate": 3.4078670652282374e-05, + "loss": 1.2897, + "step": 8576 + }, + { + "epoch": 0.904746835443038, + "grad_norm": 0.6208138465881348, + "learning_rate": 3.400389170447218e-05, + "loss": 1.3368, + "step": 8577 + }, + { + "epoch": 0.9048523206751055, + "grad_norm": 0.5988561511039734, + "learning_rate": 3.392919298759623e-05, + "loss": 1.2833, + "step": 8578 + }, + { + "epoch": 0.904957805907173, + "grad_norm": 0.6180208325386047, + "learning_rate": 3.38545745100248e-05, + "loss": 1.3386, + "step": 8579 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.6130207180976868, + "learning_rate": 3.378003628011938e-05, + "loss": 1.3181, + "step": 8580 + }, + { + "epoch": 0.905168776371308, + "grad_norm": 0.6374391913414001, + "learning_rate": 3.3705578306232224e-05, + "loss": 1.3023, + "step": 8581 + }, + { + "epoch": 0.9052742616033755, + "grad_norm": 0.684476375579834, + "learning_rate": 3.363120059670688e-05, + "loss": 1.3336, + "step": 8582 + }, + { + "epoch": 0.9053797468354431, + "grad_norm": 0.730800211429596, + "learning_rate": 3.355690315987761e-05, + "loss": 1.3211, + "step": 8583 + }, + { + "epoch": 0.9054852320675105, + "grad_norm": 0.6542825698852539, + "learning_rate": 3.3482686004069755e-05, + "loss": 1.2978, + "step": 8584 + }, + { + "epoch": 0.9055907172995781, + "grad_norm": 0.6370463371276855, + "learning_rate": 3.340854913759983e-05, + "loss": 1.3071, + "step": 8585 + }, + { + "epoch": 0.9056962025316456, + "grad_norm": 0.5969825983047485, + "learning_rate": 3.3334492568775355e-05, + "loss": 1.3008, + "step": 8586 + }, + { + "epoch": 0.9058016877637131, + "grad_norm": 0.6379567384719849, + "learning_rate": 3.3260516305894526e-05, + "loss": 1.3215, + "step": 8587 + }, + { + "epoch": 0.9059071729957806, + "grad_norm": 0.6420677304267883, + "learning_rate": 3.318662035724679e-05, + "loss": 1.3003, + "step": 8588 + }, + { + "epoch": 0.9060126582278482, + "grad_norm": 0.7101428508758545, + "learning_rate": 3.31128047311127e-05, + "loss": 1.3401, + "step": 8589 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.6826112270355225, + "learning_rate": 3.303906943576346e-05, + "loss": 1.3535, + "step": 8590 + }, + { + "epoch": 0.9062236286919831, + "grad_norm": 0.6476688981056213, + "learning_rate": 3.296541447946164e-05, + "loss": 1.3139, + "step": 8591 + }, + { + "epoch": 0.9063291139240506, + "grad_norm": 0.6569284796714783, + "learning_rate": 3.2891839870460546e-05, + "loss": 1.3154, + "step": 8592 + }, + { + "epoch": 0.9064345991561181, + "grad_norm": 0.6255733966827393, + "learning_rate": 3.281834561700467e-05, + "loss": 1.334, + "step": 8593 + }, + { + "epoch": 0.9065400843881857, + "grad_norm": 0.634602427482605, + "learning_rate": 3.274493172732926e-05, + "loss": 1.2779, + "step": 8594 + }, + { + "epoch": 0.9066455696202531, + "grad_norm": 0.7078840136528015, + "learning_rate": 3.26715982096609e-05, + "loss": 1.3706, + "step": 8595 + }, + { + "epoch": 0.9067510548523207, + "grad_norm": 0.808857798576355, + "learning_rate": 3.259834507221684e-05, + "loss": 1.2526, + "step": 8596 + }, + { + "epoch": 0.9068565400843882, + "grad_norm": 0.6538435816764832, + "learning_rate": 3.2525172323205535e-05, + "loss": 1.2748, + "step": 8597 + }, + { + "epoch": 0.9069620253164556, + "grad_norm": 0.5963319540023804, + "learning_rate": 3.2452079970826335e-05, + "loss": 1.3361, + "step": 8598 + }, + { + "epoch": 0.9070675105485232, + "grad_norm": 0.6266533136367798, + "learning_rate": 3.237906802326951e-05, + "loss": 1.2987, + "step": 8599 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.6339909434318542, + "learning_rate": 3.230613648871661e-05, + "loss": 1.3217, + "step": 8600 + }, + { + "epoch": 0.9072784810126582, + "grad_norm": 0.6296473741531372, + "learning_rate": 3.223328537533976e-05, + "loss": 1.3394, + "step": 8601 + }, + { + "epoch": 0.9073839662447257, + "grad_norm": 0.6358899474143982, + "learning_rate": 3.216051469130243e-05, + "loss": 1.3121, + "step": 8602 + }, + { + "epoch": 0.9074894514767933, + "grad_norm": 0.6649077534675598, + "learning_rate": 3.208782444475894e-05, + "loss": 1.301, + "step": 8603 + }, + { + "epoch": 0.9075949367088607, + "grad_norm": 0.7492178678512573, + "learning_rate": 3.201521464385443e-05, + "loss": 1.3275, + "step": 8604 + }, + { + "epoch": 0.9077004219409283, + "grad_norm": 0.6680116057395935, + "learning_rate": 3.194268529672539e-05, + "loss": 1.2931, + "step": 8605 + }, + { + "epoch": 0.9078059071729958, + "grad_norm": 0.61942458152771, + "learning_rate": 3.187023641149908e-05, + "loss": 1.3054, + "step": 8606 + }, + { + "epoch": 0.9079113924050632, + "grad_norm": 0.6331883668899536, + "learning_rate": 3.1797867996293663e-05, + "loss": 1.3034, + "step": 8607 + }, + { + "epoch": 0.9080168776371308, + "grad_norm": 0.6584749221801758, + "learning_rate": 3.172558005921841e-05, + "loss": 1.2979, + "step": 8608 + }, + { + "epoch": 0.9081223628691983, + "grad_norm": 0.6997871994972229, + "learning_rate": 3.165337260837351e-05, + "loss": 1.3241, + "step": 8609 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.7057874202728271, + "learning_rate": 3.158124565185022e-05, + "loss": 1.3066, + "step": 8610 + }, + { + "epoch": 0.9083333333333333, + "grad_norm": 0.6208213567733765, + "learning_rate": 3.1509199197730765e-05, + "loss": 1.3142, + "step": 8611 + }, + { + "epoch": 0.9084388185654009, + "grad_norm": 0.7587871551513672, + "learning_rate": 3.143723325408826e-05, + "loss": 1.32, + "step": 8612 + }, + { + "epoch": 0.9085443037974683, + "grad_norm": 0.593963086605072, + "learning_rate": 3.136534782898667e-05, + "loss": 1.2882, + "step": 8613 + }, + { + "epoch": 0.9086497890295359, + "grad_norm": 0.6224262118339539, + "learning_rate": 3.129354293048148e-05, + "loss": 1.329, + "step": 8614 + }, + { + "epoch": 0.9087552742616034, + "grad_norm": 0.6199320554733276, + "learning_rate": 3.122181856661857e-05, + "loss": 1.2627, + "step": 8615 + }, + { + "epoch": 0.9088607594936708, + "grad_norm": 0.6775808334350586, + "learning_rate": 3.1150174745435026e-05, + "loss": 1.3441, + "step": 8616 + }, + { + "epoch": 0.9089662447257384, + "grad_norm": 0.6156737208366394, + "learning_rate": 3.107861147495891e-05, + "loss": 1.2892, + "step": 8617 + }, + { + "epoch": 0.9090717299578059, + "grad_norm": 0.7143170833587646, + "learning_rate": 3.100712876320924e-05, + "loss": 1.322, + "step": 8618 + }, + { + "epoch": 0.9091772151898734, + "grad_norm": 0.7092456221580505, + "learning_rate": 3.093572661819602e-05, + "loss": 1.2736, + "step": 8619 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.6751619577407837, + "learning_rate": 3.086440504792026e-05, + "loss": 1.3237, + "step": 8620 + }, + { + "epoch": 0.9093881856540085, + "grad_norm": 0.6052183508872986, + "learning_rate": 3.079316406037375e-05, + "loss": 1.3159, + "step": 8621 + }, + { + "epoch": 0.9094936708860759, + "grad_norm": 0.6068726778030396, + "learning_rate": 3.072200366353958e-05, + "loss": 1.2756, + "step": 8622 + }, + { + "epoch": 0.9095991561181435, + "grad_norm": 0.6886983513832092, + "learning_rate": 3.0650923865391395e-05, + "loss": 1.3166, + "step": 8623 + }, + { + "epoch": 0.909704641350211, + "grad_norm": 0.608624279499054, + "learning_rate": 3.057992467389431e-05, + "loss": 1.2908, + "step": 8624 + }, + { + "epoch": 0.9098101265822784, + "grad_norm": 0.6645041108131409, + "learning_rate": 3.0509006097004048e-05, + "loss": 1.3259, + "step": 8625 + }, + { + "epoch": 0.909915611814346, + "grad_norm": 0.6272978186607361, + "learning_rate": 3.043816814266734e-05, + "loss": 1.3237, + "step": 8626 + }, + { + "epoch": 0.9100210970464135, + "grad_norm": 0.6624297499656677, + "learning_rate": 3.0367410818821913e-05, + "loss": 1.3278, + "step": 8627 + }, + { + "epoch": 0.910126582278481, + "grad_norm": 0.6112922430038452, + "learning_rate": 3.029673413339651e-05, + "loss": 1.2771, + "step": 8628 + }, + { + "epoch": 0.9102320675105485, + "grad_norm": 0.6188524961471558, + "learning_rate": 3.022613809431088e-05, + "loss": 1.333, + "step": 8629 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.6398681998252869, + "learning_rate": 3.015562270947553e-05, + "loss": 1.3648, + "step": 8630 + }, + { + "epoch": 0.9104430379746835, + "grad_norm": 0.6299701929092407, + "learning_rate": 3.0085187986792136e-05, + "loss": 1.3198, + "step": 8631 + }, + { + "epoch": 0.9105485232067511, + "grad_norm": 0.6360155344009399, + "learning_rate": 3.00148339341533e-05, + "loss": 1.285, + "step": 8632 + }, + { + "epoch": 0.9106540084388186, + "grad_norm": 0.659880518913269, + "learning_rate": 2.994456055944231e-05, + "loss": 1.2799, + "step": 8633 + }, + { + "epoch": 0.910759493670886, + "grad_norm": 0.6466507911682129, + "learning_rate": 2.9874367870534018e-05, + "loss": 1.3261, + "step": 8634 + }, + { + "epoch": 0.9108649789029536, + "grad_norm": 0.6297245025634766, + "learning_rate": 2.9804255875293645e-05, + "loss": 1.3129, + "step": 8635 + }, + { + "epoch": 0.9109704641350211, + "grad_norm": 0.6200518012046814, + "learning_rate": 2.9734224581577568e-05, + "loss": 1.2765, + "step": 8636 + }, + { + "epoch": 0.9110759493670886, + "grad_norm": 0.6671374440193176, + "learning_rate": 2.966427399723326e-05, + "loss": 1.333, + "step": 8637 + }, + { + "epoch": 0.9111814345991561, + "grad_norm": 0.6166918277740479, + "learning_rate": 2.959440413009895e-05, + "loss": 1.2892, + "step": 8638 + }, + { + "epoch": 0.9112869198312237, + "grad_norm": 0.6277954578399658, + "learning_rate": 2.952461498800388e-05, + "loss": 1.289, + "step": 8639 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.673747181892395, + "learning_rate": 2.945490657876837e-05, + "loss": 1.2811, + "step": 8640 + }, + { + "epoch": 0.9114978902953587, + "grad_norm": 0.6279959678649902, + "learning_rate": 2.938527891020351e-05, + "loss": 1.2878, + "step": 8641 + }, + { + "epoch": 0.9116033755274262, + "grad_norm": 0.6276732683181763, + "learning_rate": 2.931573199011148e-05, + "loss": 1.3126, + "step": 8642 + }, + { + "epoch": 0.9117088607594936, + "grad_norm": 0.6213153600692749, + "learning_rate": 2.92462658262852e-05, + "loss": 1.2952, + "step": 8643 + }, + { + "epoch": 0.9118143459915612, + "grad_norm": 0.6437169313430786, + "learning_rate": 2.9176880426508957e-05, + "loss": 1.2865, + "step": 8644 + }, + { + "epoch": 0.9119198312236287, + "grad_norm": 0.622913122177124, + "learning_rate": 2.9107575798557605e-05, + "loss": 1.2448, + "step": 8645 + }, + { + "epoch": 0.9120253164556962, + "grad_norm": 0.6293203234672546, + "learning_rate": 2.9038351950197107e-05, + "loss": 1.3194, + "step": 8646 + }, + { + "epoch": 0.9121308016877637, + "grad_norm": 0.6193752288818359, + "learning_rate": 2.8969208889184335e-05, + "loss": 1.2883, + "step": 8647 + }, + { + "epoch": 0.9122362869198313, + "grad_norm": 0.6419821977615356, + "learning_rate": 2.890014662326701e-05, + "loss": 1.3604, + "step": 8648 + }, + { + "epoch": 0.9123417721518987, + "grad_norm": 0.6808030009269714, + "learning_rate": 2.8831165160184024e-05, + "loss": 1.3136, + "step": 8649 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.6214121580123901, + "learning_rate": 2.8762264507665113e-05, + "loss": 1.3233, + "step": 8650 + }, + { + "epoch": 0.9125527426160338, + "grad_norm": 0.6638588905334473, + "learning_rate": 2.869344467343077e-05, + "loss": 1.3106, + "step": 8651 + }, + { + "epoch": 0.9126582278481012, + "grad_norm": 0.7298478484153748, + "learning_rate": 2.862470566519265e-05, + "loss": 1.3409, + "step": 8652 + }, + { + "epoch": 0.9127637130801688, + "grad_norm": 0.6165025234222412, + "learning_rate": 2.855604749065352e-05, + "loss": 1.3108, + "step": 8653 + }, + { + "epoch": 0.9128691983122363, + "grad_norm": 0.6408652663230896, + "learning_rate": 2.8487470157506633e-05, + "loss": 1.3386, + "step": 8654 + }, + { + "epoch": 0.9129746835443038, + "grad_norm": 0.6106289029121399, + "learning_rate": 2.84189736734366e-05, + "loss": 1.2888, + "step": 8655 + }, + { + "epoch": 0.9130801687763713, + "grad_norm": 0.6779942512512207, + "learning_rate": 2.8350558046118607e-05, + "loss": 1.2884, + "step": 8656 + }, + { + "epoch": 0.9131856540084389, + "grad_norm": 0.6989264488220215, + "learning_rate": 2.828222328321911e-05, + "loss": 1.2675, + "step": 8657 + }, + { + "epoch": 0.9132911392405063, + "grad_norm": 0.6114566922187805, + "learning_rate": 2.8213969392395233e-05, + "loss": 1.299, + "step": 8658 + }, + { + "epoch": 0.9133966244725739, + "grad_norm": 0.6293156147003174, + "learning_rate": 2.8145796381295276e-05, + "loss": 1.2806, + "step": 8659 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.6591480374336243, + "learning_rate": 2.807770425755829e-05, + "loss": 1.3245, + "step": 8660 + }, + { + "epoch": 0.9136075949367088, + "grad_norm": 0.6011443138122559, + "learning_rate": 2.800969302881434e-05, + "loss": 1.2799, + "step": 8661 + }, + { + "epoch": 0.9137130801687764, + "grad_norm": 0.6123974323272705, + "learning_rate": 2.7941762702684503e-05, + "loss": 1.2997, + "step": 8662 + }, + { + "epoch": 0.9138185654008438, + "grad_norm": 0.6351272463798523, + "learning_rate": 2.7873913286780683e-05, + "loss": 1.3323, + "step": 8663 + }, + { + "epoch": 0.9139240506329114, + "grad_norm": 0.6304594874382019, + "learning_rate": 2.7806144788705718e-05, + "loss": 1.3314, + "step": 8664 + }, + { + "epoch": 0.9140295358649789, + "grad_norm": 0.6154175996780396, + "learning_rate": 2.7738457216053447e-05, + "loss": 1.3235, + "step": 8665 + }, + { + "epoch": 0.9141350210970464, + "grad_norm": 0.6467313766479492, + "learning_rate": 2.7670850576408556e-05, + "loss": 1.2984, + "step": 8666 + }, + { + "epoch": 0.9142405063291139, + "grad_norm": 0.6202054619789124, + "learning_rate": 2.7603324877346653e-05, + "loss": 1.3076, + "step": 8667 + }, + { + "epoch": 0.9143459915611815, + "grad_norm": 0.5994174480438232, + "learning_rate": 2.7535880126434433e-05, + "loss": 1.2911, + "step": 8668 + }, + { + "epoch": 0.9144514767932489, + "grad_norm": 0.6243770718574524, + "learning_rate": 2.7468516331229432e-05, + "loss": 1.3191, + "step": 8669 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.6149520874023438, + "learning_rate": 2.7401233499279866e-05, + "loss": 1.3341, + "step": 8670 + }, + { + "epoch": 0.914662447257384, + "grad_norm": 0.6762635111808777, + "learning_rate": 2.7334031638125367e-05, + "loss": 1.3538, + "step": 8671 + }, + { + "epoch": 0.9147679324894514, + "grad_norm": 0.6091582775115967, + "learning_rate": 2.726691075529625e-05, + "loss": 1.3496, + "step": 8672 + }, + { + "epoch": 0.914873417721519, + "grad_norm": 0.6453538537025452, + "learning_rate": 2.7199870858313574e-05, + "loss": 1.2991, + "step": 8673 + }, + { + "epoch": 0.9149789029535865, + "grad_norm": 0.6225104331970215, + "learning_rate": 2.7132911954689672e-05, + "loss": 1.2924, + "step": 8674 + }, + { + "epoch": 0.915084388185654, + "grad_norm": 0.7139731049537659, + "learning_rate": 2.706603405192745e-05, + "loss": 1.3274, + "step": 8675 + }, + { + "epoch": 0.9151898734177215, + "grad_norm": 0.6159448623657227, + "learning_rate": 2.6999237157521005e-05, + "loss": 1.3231, + "step": 8676 + }, + { + "epoch": 0.9152953586497891, + "grad_norm": 0.6424137949943542, + "learning_rate": 2.6932521278955262e-05, + "loss": 1.3023, + "step": 8677 + }, + { + "epoch": 0.9154008438818565, + "grad_norm": 0.6020214557647705, + "learning_rate": 2.686588642370591e-05, + "loss": 1.3349, + "step": 8678 + }, + { + "epoch": 0.915506329113924, + "grad_norm": 0.6170939207077026, + "learning_rate": 2.6799332599239974e-05, + "loss": 1.3461, + "step": 8679 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.6010814309120178, + "learning_rate": 2.6732859813014987e-05, + "loss": 1.2782, + "step": 8680 + }, + { + "epoch": 0.915717299578059, + "grad_norm": 0.6735923290252686, + "learning_rate": 2.666646807247966e-05, + "loss": 1.3449, + "step": 8681 + }, + { + "epoch": 0.9158227848101266, + "grad_norm": 0.6202865242958069, + "learning_rate": 2.660015738507346e-05, + "loss": 1.3171, + "step": 8682 + }, + { + "epoch": 0.9159282700421941, + "grad_norm": 0.6546674966812134, + "learning_rate": 2.653392775822677e-05, + "loss": 1.2878, + "step": 8683 + }, + { + "epoch": 0.9160337552742616, + "grad_norm": 0.6492567658424377, + "learning_rate": 2.6467779199361e-05, + "loss": 1.3152, + "step": 8684 + }, + { + "epoch": 0.9161392405063291, + "grad_norm": 0.6280779838562012, + "learning_rate": 2.6401711715888454e-05, + "loss": 1.2988, + "step": 8685 + }, + { + "epoch": 0.9162447257383967, + "grad_norm": 0.6227898597717285, + "learning_rate": 2.6335725315212304e-05, + "loss": 1.3152, + "step": 8686 + }, + { + "epoch": 0.9163502109704641, + "grad_norm": 0.644524872303009, + "learning_rate": 2.626982000472655e-05, + "loss": 1.3129, + "step": 8687 + }, + { + "epoch": 0.9164556962025316, + "grad_norm": 0.6270284652709961, + "learning_rate": 2.6203995791816372e-05, + "loss": 1.313, + "step": 8688 + }, + { + "epoch": 0.9165611814345992, + "grad_norm": 0.6180202960968018, + "learning_rate": 2.6138252683857693e-05, + "loss": 1.3382, + "step": 8689 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.6226276755332947, + "learning_rate": 2.607259068821721e-05, + "loss": 1.282, + "step": 8690 + }, + { + "epoch": 0.9167721518987342, + "grad_norm": 0.6415652632713318, + "learning_rate": 2.6007009812252875e-05, + "loss": 1.3505, + "step": 8691 + }, + { + "epoch": 0.9168776371308017, + "grad_norm": 0.6069870591163635, + "learning_rate": 2.594151006331322e-05, + "loss": 1.3051, + "step": 8692 + }, + { + "epoch": 0.9169831223628692, + "grad_norm": 0.6154708862304688, + "learning_rate": 2.5876091448737788e-05, + "loss": 1.3089, + "step": 8693 + }, + { + "epoch": 0.9170886075949367, + "grad_norm": 0.618749737739563, + "learning_rate": 2.5810753975857136e-05, + "loss": 1.3195, + "step": 8694 + }, + { + "epoch": 0.9171940928270043, + "grad_norm": 0.6060662269592285, + "learning_rate": 2.5745497651992662e-05, + "loss": 1.2851, + "step": 8695 + }, + { + "epoch": 0.9172995780590717, + "grad_norm": 0.6284834146499634, + "learning_rate": 2.568032248445651e-05, + "loss": 1.3214, + "step": 8696 + }, + { + "epoch": 0.9174050632911392, + "grad_norm": 0.619611382484436, + "learning_rate": 2.561522848055217e-05, + "loss": 1.3011, + "step": 8697 + }, + { + "epoch": 0.9175105485232068, + "grad_norm": 0.6129112243652344, + "learning_rate": 2.5550215647573482e-05, + "loss": 1.2993, + "step": 8698 + }, + { + "epoch": 0.9176160337552742, + "grad_norm": 0.6112714409828186, + "learning_rate": 2.5485283992805615e-05, + "loss": 1.3312, + "step": 8699 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.618956446647644, + "learning_rate": 2.5420433523524493e-05, + "loss": 1.3043, + "step": 8700 + }, + { + "epoch": 0.9178270042194093, + "grad_norm": 0.6026365160942078, + "learning_rate": 2.5355664246996813e-05, + "loss": 1.307, + "step": 8701 + }, + { + "epoch": 0.9179324894514768, + "grad_norm": 0.6426939368247986, + "learning_rate": 2.5290976170480346e-05, + "loss": 1.2807, + "step": 8702 + }, + { + "epoch": 0.9180379746835443, + "grad_norm": 0.6209653615951538, + "learning_rate": 2.522636930122371e-05, + "loss": 1.3267, + "step": 8703 + }, + { + "epoch": 0.9181434599156119, + "grad_norm": 0.5906042456626892, + "learning_rate": 2.516184364646637e-05, + "loss": 1.3003, + "step": 8704 + }, + { + "epoch": 0.9182489451476793, + "grad_norm": 0.6313439011573792, + "learning_rate": 2.5097399213438955e-05, + "loss": 1.3318, + "step": 8705 + }, + { + "epoch": 0.9183544303797468, + "grad_norm": 0.6353586912155151, + "learning_rate": 2.50330360093626e-05, + "loss": 1.2918, + "step": 8706 + }, + { + "epoch": 0.9184599156118144, + "grad_norm": 0.6132913827896118, + "learning_rate": 2.4968754041449633e-05, + "loss": 1.3251, + "step": 8707 + }, + { + "epoch": 0.9185654008438818, + "grad_norm": 0.6044731140136719, + "learning_rate": 2.490455331690303e-05, + "loss": 1.2956, + "step": 8708 + }, + { + "epoch": 0.9186708860759494, + "grad_norm": 0.5929815769195557, + "learning_rate": 2.4840433842916872e-05, + "loss": 1.3163, + "step": 8709 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.6310583353042603, + "learning_rate": 2.4776395626676162e-05, + "loss": 1.3381, + "step": 8710 + }, + { + "epoch": 0.9188818565400844, + "grad_norm": 0.6495723724365234, + "learning_rate": 2.471243867535658e-05, + "loss": 1.3248, + "step": 8711 + }, + { + "epoch": 0.9189873417721519, + "grad_norm": 0.6151522397994995, + "learning_rate": 2.4648562996124806e-05, + "loss": 1.3021, + "step": 8712 + }, + { + "epoch": 0.9190928270042195, + "grad_norm": 0.5925959348678589, + "learning_rate": 2.4584768596138452e-05, + "loss": 1.3212, + "step": 8713 + }, + { + "epoch": 0.9191983122362869, + "grad_norm": 0.6691802740097046, + "learning_rate": 2.4521055482546046e-05, + "loss": 1.3034, + "step": 8714 + }, + { + "epoch": 0.9193037974683544, + "grad_norm": 0.6411849856376648, + "learning_rate": 2.4457423662486962e-05, + "loss": 1.3135, + "step": 8715 + }, + { + "epoch": 0.919409282700422, + "grad_norm": 0.6013067960739136, + "learning_rate": 2.4393873143091495e-05, + "loss": 1.2872, + "step": 8716 + }, + { + "epoch": 0.9195147679324894, + "grad_norm": 0.619716465473175, + "learning_rate": 2.43304039314807e-05, + "loss": 1.2625, + "step": 8717 + }, + { + "epoch": 0.919620253164557, + "grad_norm": 0.6147828698158264, + "learning_rate": 2.4267016034766637e-05, + "loss": 1.3233, + "step": 8718 + }, + { + "epoch": 0.9197257383966245, + "grad_norm": 0.6300684213638306, + "learning_rate": 2.4203709460052292e-05, + "loss": 1.308, + "step": 8719 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.5987573862075806, + "learning_rate": 2.414048421443141e-05, + "loss": 1.3017, + "step": 8720 + }, + { + "epoch": 0.9199367088607595, + "grad_norm": 0.6202919483184814, + "learning_rate": 2.407734030498873e-05, + "loss": 1.3307, + "step": 8721 + }, + { + "epoch": 0.9200421940928271, + "grad_norm": 0.6705584526062012, + "learning_rate": 2.4014277738799774e-05, + "loss": 1.3207, + "step": 8722 + }, + { + "epoch": 0.9201476793248945, + "grad_norm": 0.6186177730560303, + "learning_rate": 2.395129652293121e-05, + "loss": 1.3344, + "step": 8723 + }, + { + "epoch": 0.920253164556962, + "grad_norm": 0.6258422136306763, + "learning_rate": 2.3888396664440232e-05, + "loss": 1.3677, + "step": 8724 + }, + { + "epoch": 0.9203586497890295, + "grad_norm": 0.6089040040969849, + "learning_rate": 2.38255781703752e-05, + "loss": 1.3003, + "step": 8725 + }, + { + "epoch": 0.920464135021097, + "grad_norm": 0.6222913265228271, + "learning_rate": 2.3762841047775068e-05, + "loss": 1.2736, + "step": 8726 + }, + { + "epoch": 0.9205696202531646, + "grad_norm": 0.6157301068305969, + "learning_rate": 2.3700185303670046e-05, + "loss": 1.3088, + "step": 8727 + }, + { + "epoch": 0.920675105485232, + "grad_norm": 0.621414840221405, + "learning_rate": 2.363761094508085e-05, + "loss": 1.2792, + "step": 8728 + }, + { + "epoch": 0.9207805907172996, + "grad_norm": 0.5965144038200378, + "learning_rate": 2.357511797901929e-05, + "loss": 1.3688, + "step": 8729 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.6237696409225464, + "learning_rate": 2.3512706412488012e-05, + "loss": 1.3025, + "step": 8730 + }, + { + "epoch": 0.9209915611814345, + "grad_norm": 0.6451387405395508, + "learning_rate": 2.345037625248067e-05, + "loss": 1.297, + "step": 8731 + }, + { + "epoch": 0.9210970464135021, + "grad_norm": 0.7153080105781555, + "learning_rate": 2.3388127505981515e-05, + "loss": 1.3115, + "step": 8732 + }, + { + "epoch": 0.9212025316455696, + "grad_norm": 0.6349402070045471, + "learning_rate": 2.3325960179965967e-05, + "loss": 1.3402, + "step": 8733 + }, + { + "epoch": 0.9213080168776371, + "grad_norm": 0.6391677260398865, + "learning_rate": 2.3263874281400034e-05, + "loss": 1.2798, + "step": 8734 + }, + { + "epoch": 0.9214135021097046, + "grad_norm": 0.652016818523407, + "learning_rate": 2.3201869817240817e-05, + "loss": 1.2923, + "step": 8735 + }, + { + "epoch": 0.9215189873417722, + "grad_norm": 0.6869933009147644, + "learning_rate": 2.313994679443626e-05, + "loss": 1.2941, + "step": 8736 + }, + { + "epoch": 0.9216244725738396, + "grad_norm": 0.6335510611534119, + "learning_rate": 2.307810521992515e-05, + "loss": 1.2936, + "step": 8737 + }, + { + "epoch": 0.9217299578059072, + "grad_norm": 0.6630359292030334, + "learning_rate": 2.301634510063702e-05, + "loss": 1.3288, + "step": 8738 + }, + { + "epoch": 0.9218354430379747, + "grad_norm": 0.6182637810707092, + "learning_rate": 2.2954666443492505e-05, + "loss": 1.3195, + "step": 8739 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.6500406861305237, + "learning_rate": 2.2893069255402993e-05, + "loss": 1.3102, + "step": 8740 + }, + { + "epoch": 0.9220464135021097, + "grad_norm": 0.6128456592559814, + "learning_rate": 2.2831553543270793e-05, + "loss": 1.2985, + "step": 8741 + }, + { + "epoch": 0.9221518987341772, + "grad_norm": 0.5876219868659973, + "learning_rate": 2.277011931398898e-05, + "loss": 1.2882, + "step": 8742 + }, + { + "epoch": 0.9222573839662447, + "grad_norm": 0.599266767501831, + "learning_rate": 2.2708766574441626e-05, + "loss": 1.2472, + "step": 8743 + }, + { + "epoch": 0.9223628691983122, + "grad_norm": 0.5916695594787598, + "learning_rate": 2.2647495331503565e-05, + "loss": 1.3055, + "step": 8744 + }, + { + "epoch": 0.9224683544303798, + "grad_norm": 0.6325435042381287, + "learning_rate": 2.2586305592040558e-05, + "loss": 1.3411, + "step": 8745 + }, + { + "epoch": 0.9225738396624472, + "grad_norm": 0.638283908367157, + "learning_rate": 2.2525197362909282e-05, + "loss": 1.3189, + "step": 8746 + }, + { + "epoch": 0.9226793248945148, + "grad_norm": 0.6130338907241821, + "learning_rate": 2.24641706509571e-05, + "loss": 1.3032, + "step": 8747 + }, + { + "epoch": 0.9227848101265823, + "grad_norm": 0.6051870584487915, + "learning_rate": 2.2403225463022288e-05, + "loss": 1.2938, + "step": 8748 + }, + { + "epoch": 0.9228902953586497, + "grad_norm": 0.6150918006896973, + "learning_rate": 2.2342361805934297e-05, + "loss": 1.3053, + "step": 8749 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.6260238289833069, + "learning_rate": 2.2281579686513176e-05, + "loss": 1.3309, + "step": 8750 + }, + { + "epoch": 0.9231012658227848, + "grad_norm": 0.5899978876113892, + "learning_rate": 2.2220879111569725e-05, + "loss": 1.2802, + "step": 8751 + }, + { + "epoch": 0.9232067510548523, + "grad_norm": 0.6120224595069885, + "learning_rate": 2.2160260087905753e-05, + "loss": 1.3284, + "step": 8752 + }, + { + "epoch": 0.9233122362869198, + "grad_norm": 0.6202399730682373, + "learning_rate": 2.2099722622314078e-05, + "loss": 1.2837, + "step": 8753 + }, + { + "epoch": 0.9234177215189874, + "grad_norm": 0.5972847938537598, + "learning_rate": 2.203926672157802e-05, + "loss": 1.2925, + "step": 8754 + }, + { + "epoch": 0.9235232067510548, + "grad_norm": 0.6365810036659241, + "learning_rate": 2.1978892392472085e-05, + "loss": 1.3043, + "step": 8755 + }, + { + "epoch": 0.9236286919831224, + "grad_norm": 0.580487847328186, + "learning_rate": 2.1918599641761517e-05, + "loss": 1.2965, + "step": 8756 + }, + { + "epoch": 0.9237341772151899, + "grad_norm": 0.6495883464813232, + "learning_rate": 2.185838847620242e-05, + "loss": 1.2999, + "step": 8757 + }, + { + "epoch": 0.9238396624472573, + "grad_norm": 0.6398794054985046, + "learning_rate": 2.1798258902541723e-05, + "loss": 1.2371, + "step": 8758 + }, + { + "epoch": 0.9239451476793249, + "grad_norm": 0.6134063005447388, + "learning_rate": 2.173821092751721e-05, + "loss": 1.3176, + "step": 8759 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.6393828988075256, + "learning_rate": 2.1678244557857663e-05, + "loss": 1.2844, + "step": 8760 + }, + { + "epoch": 0.9241561181434599, + "grad_norm": 0.6316152215003967, + "learning_rate": 2.161835980028254e-05, + "loss": 1.3224, + "step": 8761 + }, + { + "epoch": 0.9242616033755274, + "grad_norm": 0.6071500778198242, + "learning_rate": 2.1558556661502222e-05, + "loss": 1.2781, + "step": 8762 + }, + { + "epoch": 0.924367088607595, + "grad_norm": 0.6557965874671936, + "learning_rate": 2.1498835148218017e-05, + "loss": 1.359, + "step": 8763 + }, + { + "epoch": 0.9244725738396624, + "grad_norm": 0.6032228469848633, + "learning_rate": 2.1439195267121902e-05, + "loss": 1.319, + "step": 8764 + }, + { + "epoch": 0.92457805907173, + "grad_norm": 0.6434414982795715, + "learning_rate": 2.137963702489687e-05, + "loss": 1.2753, + "step": 8765 + }, + { + "epoch": 0.9246835443037975, + "grad_norm": 0.6594979166984558, + "learning_rate": 2.132016042821683e-05, + "loss": 1.2947, + "step": 8766 + }, + { + "epoch": 0.924789029535865, + "grad_norm": 0.707653284072876, + "learning_rate": 2.1260765483746282e-05, + "loss": 1.3107, + "step": 8767 + }, + { + "epoch": 0.9248945147679325, + "grad_norm": 0.607109010219574, + "learning_rate": 2.120145219814082e-05, + "loss": 1.2941, + "step": 8768 + }, + { + "epoch": 0.925, + "grad_norm": 0.622745931148529, + "learning_rate": 2.1142220578046712e-05, + "loss": 1.2973, + "step": 8769 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.5930353403091431, + "learning_rate": 2.1083070630101232e-05, + "loss": 1.279, + "step": 8770 + }, + { + "epoch": 0.925210970464135, + "grad_norm": 0.6103518605232239, + "learning_rate": 2.102400236093241e-05, + "loss": 1.3415, + "step": 8771 + }, + { + "epoch": 0.9253164556962026, + "grad_norm": 0.6081943511962891, + "learning_rate": 2.096501577715912e-05, + "loss": 1.2787, + "step": 8772 + }, + { + "epoch": 0.92542194092827, + "grad_norm": 0.6295158267021179, + "learning_rate": 2.0906110885391072e-05, + "loss": 1.3256, + "step": 8773 + }, + { + "epoch": 0.9255274261603376, + "grad_norm": 0.6014444828033447, + "learning_rate": 2.0847287692228905e-05, + "loss": 1.2775, + "step": 8774 + }, + { + "epoch": 0.9256329113924051, + "grad_norm": 0.6129344701766968, + "learning_rate": 2.0788546204264013e-05, + "loss": 1.3183, + "step": 8775 + }, + { + "epoch": 0.9257383966244725, + "grad_norm": 0.5895368456840515, + "learning_rate": 2.0729886428078716e-05, + "loss": 1.3027, + "step": 8776 + }, + { + "epoch": 0.9258438818565401, + "grad_norm": 0.6090274453163147, + "learning_rate": 2.0671308370246167e-05, + "loss": 1.3034, + "step": 8777 + }, + { + "epoch": 0.9259493670886076, + "grad_norm": 0.5950596928596497, + "learning_rate": 2.0612812037330202e-05, + "loss": 1.2983, + "step": 8778 + }, + { + "epoch": 0.9260548523206751, + "grad_norm": 0.6160978674888611, + "learning_rate": 2.0554397435885746e-05, + "loss": 1.3283, + "step": 8779 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.6375200152397156, + "learning_rate": 2.0496064572458395e-05, + "loss": 1.319, + "step": 8780 + }, + { + "epoch": 0.9262658227848102, + "grad_norm": 0.624634861946106, + "learning_rate": 2.043781345358467e-05, + "loss": 1.3225, + "step": 8781 + }, + { + "epoch": 0.9263713080168776, + "grad_norm": 0.6328988075256348, + "learning_rate": 2.0379644085791767e-05, + "loss": 1.3414, + "step": 8782 + }, + { + "epoch": 0.9264767932489452, + "grad_norm": 0.5903260707855225, + "learning_rate": 2.032155647559805e-05, + "loss": 1.2706, + "step": 8783 + }, + { + "epoch": 0.9265822784810127, + "grad_norm": 0.62354576587677, + "learning_rate": 2.0263550629512406e-05, + "loss": 1.2955, + "step": 8784 + }, + { + "epoch": 0.9266877637130801, + "grad_norm": 0.6155762076377869, + "learning_rate": 2.0205626554034713e-05, + "loss": 1.3192, + "step": 8785 + }, + { + "epoch": 0.9267932489451477, + "grad_norm": 0.608679473400116, + "learning_rate": 2.0147784255655692e-05, + "loss": 1.2861, + "step": 8786 + }, + { + "epoch": 0.9268987341772152, + "grad_norm": 0.609981119632721, + "learning_rate": 2.009002374085675e-05, + "loss": 1.2789, + "step": 8787 + }, + { + "epoch": 0.9270042194092827, + "grad_norm": 0.6255799531936646, + "learning_rate": 2.003234501611037e-05, + "loss": 1.3056, + "step": 8788 + }, + { + "epoch": 0.9271097046413502, + "grad_norm": 0.6017916798591614, + "learning_rate": 1.9974748087879636e-05, + "loss": 1.3067, + "step": 8789 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.6123143434524536, + "learning_rate": 1.991723296261863e-05, + "loss": 1.3187, + "step": 8790 + }, + { + "epoch": 0.9273206751054852, + "grad_norm": 0.6104992628097534, + "learning_rate": 1.985979964677212e-05, + "loss": 1.3282, + "step": 8791 + }, + { + "epoch": 0.9274261603375528, + "grad_norm": 0.6147783994674683, + "learning_rate": 1.9802448146775953e-05, + "loss": 1.2916, + "step": 8792 + }, + { + "epoch": 0.9275316455696202, + "grad_norm": 0.6401031017303467, + "learning_rate": 1.9745178469056575e-05, + "loss": 1.3057, + "step": 8793 + }, + { + "epoch": 0.9276371308016877, + "grad_norm": 0.6021702289581299, + "learning_rate": 1.9687990620031266e-05, + "loss": 1.2865, + "step": 8794 + }, + { + "epoch": 0.9277426160337553, + "grad_norm": 0.6015633940696716, + "learning_rate": 1.963088460610832e-05, + "loss": 1.2962, + "step": 8795 + }, + { + "epoch": 0.9278481012658227, + "grad_norm": 0.6396520137786865, + "learning_rate": 1.9573860433686696e-05, + "loss": 1.3169, + "step": 8796 + }, + { + "epoch": 0.9279535864978903, + "grad_norm": 0.6575623750686646, + "learning_rate": 1.9516918109156206e-05, + "loss": 1.3093, + "step": 8797 + }, + { + "epoch": 0.9280590717299578, + "grad_norm": 0.6162880659103394, + "learning_rate": 1.9460057638897578e-05, + "loss": 1.3047, + "step": 8798 + }, + { + "epoch": 0.9281645569620253, + "grad_norm": 0.6053991317749023, + "learning_rate": 1.9403279029282376e-05, + "loss": 1.2878, + "step": 8799 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.6268516182899475, + "learning_rate": 1.9346582286672686e-05, + "loss": 1.3038, + "step": 8800 + }, + { + "epoch": 0.9283755274261604, + "grad_norm": 0.6094595789909363, + "learning_rate": 1.9289967417421922e-05, + "loss": 1.3241, + "step": 8801 + }, + { + "epoch": 0.9284810126582278, + "grad_norm": 0.6816033124923706, + "learning_rate": 1.9233434427873924e-05, + "loss": 1.2983, + "step": 8802 + }, + { + "epoch": 0.9285864978902953, + "grad_norm": 0.6092706918716431, + "learning_rate": 1.9176983324363545e-05, + "loss": 1.3106, + "step": 8803 + }, + { + "epoch": 0.9286919831223629, + "grad_norm": 0.619045615196228, + "learning_rate": 1.912061411321639e-05, + "loss": 1.3051, + "step": 8804 + }, + { + "epoch": 0.9287974683544303, + "grad_norm": 0.6117709875106812, + "learning_rate": 1.9064326800748906e-05, + "loss": 1.3072, + "step": 8805 + }, + { + "epoch": 0.9289029535864979, + "grad_norm": 0.6634877324104309, + "learning_rate": 1.9008121393268462e-05, + "loss": 1.3193, + "step": 8806 + }, + { + "epoch": 0.9290084388185654, + "grad_norm": 0.5983534455299377, + "learning_rate": 1.8951997897072943e-05, + "loss": 1.3237, + "step": 8807 + }, + { + "epoch": 0.9291139240506329, + "grad_norm": 0.6280766129493713, + "learning_rate": 1.8895956318451398e-05, + "loss": 1.3274, + "step": 8808 + }, + { + "epoch": 0.9292194092827004, + "grad_norm": 0.614349901676178, + "learning_rate": 1.8839996663683635e-05, + "loss": 1.3399, + "step": 8809 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.6002707481384277, + "learning_rate": 1.878411893904014e-05, + "loss": 1.3555, + "step": 8810 + }, + { + "epoch": 0.9294303797468354, + "grad_norm": 0.6639551520347595, + "learning_rate": 1.872832315078224e-05, + "loss": 1.3127, + "step": 8811 + }, + { + "epoch": 0.929535864978903, + "grad_norm": 0.6267639398574829, + "learning_rate": 1.8672609305162263e-05, + "loss": 1.3036, + "step": 8812 + }, + { + "epoch": 0.9296413502109705, + "grad_norm": 0.6034362316131592, + "learning_rate": 1.8616977408423053e-05, + "loss": 1.3155, + "step": 8813 + }, + { + "epoch": 0.9297468354430379, + "grad_norm": 0.6067094802856445, + "learning_rate": 1.856142746679862e-05, + "loss": 1.3281, + "step": 8814 + }, + { + "epoch": 0.9298523206751055, + "grad_norm": 0.5944705009460449, + "learning_rate": 1.8505959486513485e-05, + "loss": 1.2924, + "step": 8815 + }, + { + "epoch": 0.929957805907173, + "grad_norm": 0.642798125743866, + "learning_rate": 1.8450573473783094e-05, + "loss": 1.3145, + "step": 8816 + }, + { + "epoch": 0.9300632911392405, + "grad_norm": 0.6161662340164185, + "learning_rate": 1.8395269434813733e-05, + "loss": 1.3207, + "step": 8817 + }, + { + "epoch": 0.930168776371308, + "grad_norm": 0.6014290452003479, + "learning_rate": 1.8340047375802693e-05, + "loss": 1.3049, + "step": 8818 + }, + { + "epoch": 0.9302742616033756, + "grad_norm": 0.6017434597015381, + "learning_rate": 1.8284907302937608e-05, + "loss": 1.2862, + "step": 8819 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.5906518697738647, + "learning_rate": 1.822984922239737e-05, + "loss": 1.2952, + "step": 8820 + }, + { + "epoch": 0.9304852320675105, + "grad_norm": 0.6068689823150635, + "learning_rate": 1.8174873140351544e-05, + "loss": 1.3222, + "step": 8821 + }, + { + "epoch": 0.9305907172995781, + "grad_norm": 0.6276975870132446, + "learning_rate": 1.8119979062960286e-05, + "loss": 1.3836, + "step": 8822 + }, + { + "epoch": 0.9306962025316455, + "grad_norm": 0.6130346059799194, + "learning_rate": 1.806516699637492e-05, + "loss": 1.3076, + "step": 8823 + }, + { + "epoch": 0.9308016877637131, + "grad_norm": 0.693379819393158, + "learning_rate": 1.8010436946737292e-05, + "loss": 1.3448, + "step": 8824 + }, + { + "epoch": 0.9309071729957806, + "grad_norm": 0.626475989818573, + "learning_rate": 1.7955788920180238e-05, + "loss": 1.323, + "step": 8825 + }, + { + "epoch": 0.9310126582278481, + "grad_norm": 0.6221593022346497, + "learning_rate": 1.7901222922827282e-05, + "loss": 1.3241, + "step": 8826 + }, + { + "epoch": 0.9311181434599156, + "grad_norm": 0.6316399574279785, + "learning_rate": 1.7846738960792945e-05, + "loss": 1.3178, + "step": 8827 + }, + { + "epoch": 0.9312236286919832, + "grad_norm": 0.6325588226318359, + "learning_rate": 1.7792337040182434e-05, + "loss": 1.3404, + "step": 8828 + }, + { + "epoch": 0.9313291139240506, + "grad_norm": 0.6528874039649963, + "learning_rate": 1.773801716709153e-05, + "loss": 1.3011, + "step": 8829 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.6197903752326965, + "learning_rate": 1.7683779347607286e-05, + "loss": 1.2808, + "step": 8830 + }, + { + "epoch": 0.9315400843881857, + "grad_norm": 0.5964412689208984, + "learning_rate": 1.7629623587807175e-05, + "loss": 1.2618, + "step": 8831 + }, + { + "epoch": 0.9316455696202531, + "grad_norm": 0.5872742533683777, + "learning_rate": 1.7575549893759756e-05, + "loss": 1.3387, + "step": 8832 + }, + { + "epoch": 0.9317510548523207, + "grad_norm": 0.6122995615005493, + "learning_rate": 1.7521558271524103e-05, + "loss": 1.2952, + "step": 8833 + }, + { + "epoch": 0.9318565400843882, + "grad_norm": 0.6120162010192871, + "learning_rate": 1.7467648727150202e-05, + "loss": 1.2995, + "step": 8834 + }, + { + "epoch": 0.9319620253164557, + "grad_norm": 0.6274875998497009, + "learning_rate": 1.741382126667915e-05, + "loss": 1.3106, + "step": 8835 + }, + { + "epoch": 0.9320675105485232, + "grad_norm": 0.6682108044624329, + "learning_rate": 1.7360075896142357e-05, + "loss": 1.3284, + "step": 8836 + }, + { + "epoch": 0.9321729957805908, + "grad_norm": 0.6282286643981934, + "learning_rate": 1.7306412621562352e-05, + "loss": 1.3336, + "step": 8837 + }, + { + "epoch": 0.9322784810126582, + "grad_norm": 0.6050817966461182, + "learning_rate": 1.72528314489524e-05, + "loss": 1.3216, + "step": 8838 + }, + { + "epoch": 0.9323839662447257, + "grad_norm": 0.6109735369682312, + "learning_rate": 1.719933238431645e-05, + "loss": 1.2761, + "step": 8839 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.6139078140258789, + "learning_rate": 1.714591543364938e-05, + "loss": 1.3205, + "step": 8840 + }, + { + "epoch": 0.9325949367088607, + "grad_norm": 0.6185466647148132, + "learning_rate": 1.7092580602936807e-05, + "loss": 1.2883, + "step": 8841 + }, + { + "epoch": 0.9327004219409283, + "grad_norm": 0.6335403323173523, + "learning_rate": 1.703932789815521e-05, + "loss": 1.3495, + "step": 8842 + }, + { + "epoch": 0.9328059071729958, + "grad_norm": 0.6090266108512878, + "learning_rate": 1.6986157325271727e-05, + "loss": 1.3077, + "step": 8843 + }, + { + "epoch": 0.9329113924050633, + "grad_norm": 0.6176438331604004, + "learning_rate": 1.6933068890244595e-05, + "loss": 1.3071, + "step": 8844 + }, + { + "epoch": 0.9330168776371308, + "grad_norm": 0.5931311249732971, + "learning_rate": 1.688006259902239e-05, + "loss": 1.2983, + "step": 8845 + }, + { + "epoch": 0.9331223628691984, + "grad_norm": 0.6766129732131958, + "learning_rate": 1.6827138457544854e-05, + "loss": 1.3083, + "step": 8846 + }, + { + "epoch": 0.9332278481012658, + "grad_norm": 0.5865991115570068, + "learning_rate": 1.677429647174242e-05, + "loss": 1.2689, + "step": 8847 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.6498442888259888, + "learning_rate": 1.6721536647536255e-05, + "loss": 1.3335, + "step": 8848 + }, + { + "epoch": 0.9334388185654009, + "grad_norm": 0.6358641386032104, + "learning_rate": 1.666885899083831e-05, + "loss": 1.3337, + "step": 8849 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.6420943140983582, + "learning_rate": 1.6616263507551437e-05, + "loss": 1.2825, + "step": 8850 + }, + { + "epoch": 0.9336497890295359, + "grad_norm": 0.607376754283905, + "learning_rate": 1.656375020356926e-05, + "loss": 1.3236, + "step": 8851 + }, + { + "epoch": 0.9337552742616034, + "grad_norm": 0.5957670211791992, + "learning_rate": 1.6511319084776073e-05, + "loss": 1.3121, + "step": 8852 + }, + { + "epoch": 0.9338607594936709, + "grad_norm": 0.6529752016067505, + "learning_rate": 1.645897015704709e-05, + "loss": 1.2922, + "step": 8853 + }, + { + "epoch": 0.9339662447257384, + "grad_norm": 0.679978609085083, + "learning_rate": 1.6406703426248366e-05, + "loss": 1.3016, + "step": 8854 + }, + { + "epoch": 0.9340717299578059, + "grad_norm": 0.6350160241127014, + "learning_rate": 1.6354518898236472e-05, + "loss": 1.3332, + "step": 8855 + }, + { + "epoch": 0.9341772151898734, + "grad_norm": 0.6046868562698364, + "learning_rate": 1.630241657885906e-05, + "loss": 1.3017, + "step": 8856 + }, + { + "epoch": 0.934282700421941, + "grad_norm": 0.6016430258750916, + "learning_rate": 1.6250396473954377e-05, + "loss": 1.2765, + "step": 8857 + }, + { + "epoch": 0.9343881856540084, + "grad_norm": 0.5798501968383789, + "learning_rate": 1.6198458589351595e-05, + "loss": 1.3264, + "step": 8858 + }, + { + "epoch": 0.9344936708860759, + "grad_norm": 0.6349705457687378, + "learning_rate": 1.614660293087056e-05, + "loss": 1.3255, + "step": 8859 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.6479359269142151, + "learning_rate": 1.609482950432195e-05, + "loss": 1.2814, + "step": 8860 + }, + { + "epoch": 0.9347046413502109, + "grad_norm": 0.5944907665252686, + "learning_rate": 1.6043138315507382e-05, + "loss": 1.2787, + "step": 8861 + }, + { + "epoch": 0.9348101265822785, + "grad_norm": 0.5879233479499817, + "learning_rate": 1.5991529370218887e-05, + "loss": 1.3334, + "step": 8862 + }, + { + "epoch": 0.934915611814346, + "grad_norm": 0.6310455799102783, + "learning_rate": 1.5940002674239756e-05, + "loss": 1.3184, + "step": 8863 + }, + { + "epoch": 0.9350210970464135, + "grad_norm": 0.6293732523918152, + "learning_rate": 1.588855823334362e-05, + "loss": 1.2902, + "step": 8864 + }, + { + "epoch": 0.935126582278481, + "grad_norm": 0.6198005676269531, + "learning_rate": 1.5837196053295117e-05, + "loss": 1.3271, + "step": 8865 + }, + { + "epoch": 0.9352320675105485, + "grad_norm": 0.6215552091598511, + "learning_rate": 1.5785916139849725e-05, + "loss": 1.3234, + "step": 8866 + }, + { + "epoch": 0.935337552742616, + "grad_norm": 0.6185514330863953, + "learning_rate": 1.573471849875352e-05, + "loss": 1.3343, + "step": 8867 + }, + { + "epoch": 0.9354430379746835, + "grad_norm": 0.6298299431800842, + "learning_rate": 1.568360313574349e-05, + "loss": 1.315, + "step": 8868 + }, + { + "epoch": 0.9355485232067511, + "grad_norm": 0.6043986678123474, + "learning_rate": 1.5632570056547308e-05, + "loss": 1.3085, + "step": 8869 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.6356971263885498, + "learning_rate": 1.5581619266883563e-05, + "loss": 1.2869, + "step": 8870 + }, + { + "epoch": 0.9357594936708861, + "grad_norm": 0.6025364995002747, + "learning_rate": 1.5530750772461522e-05, + "loss": 1.3423, + "step": 8871 + }, + { + "epoch": 0.9358649789029536, + "grad_norm": 0.596657395362854, + "learning_rate": 1.5479964578981293e-05, + "loss": 1.2713, + "step": 8872 + }, + { + "epoch": 0.935970464135021, + "grad_norm": 0.6167136430740356, + "learning_rate": 1.5429260692133656e-05, + "loss": 1.3286, + "step": 8873 + }, + { + "epoch": 0.9360759493670886, + "grad_norm": 0.5893048048019409, + "learning_rate": 1.5378639117600234e-05, + "loss": 1.2959, + "step": 8874 + }, + { + "epoch": 0.9361814345991561, + "grad_norm": 0.7057628035545349, + "learning_rate": 1.532809986105349e-05, + "loss": 1.2937, + "step": 8875 + }, + { + "epoch": 0.9362869198312236, + "grad_norm": 0.5958252549171448, + "learning_rate": 1.527764292815656e-05, + "loss": 1.3198, + "step": 8876 + }, + { + "epoch": 0.9363924050632911, + "grad_norm": 0.6178620457649231, + "learning_rate": 1.522726832456342e-05, + "loss": 1.3067, + "step": 8877 + }, + { + "epoch": 0.9364978902953587, + "grad_norm": 0.6109412908554077, + "learning_rate": 1.517697605591864e-05, + "loss": 1.3263, + "step": 8878 + }, + { + "epoch": 0.9366033755274261, + "grad_norm": 0.6005860567092896, + "learning_rate": 1.512676612785796e-05, + "loss": 1.3292, + "step": 8879 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.6301207542419434, + "learning_rate": 1.5076638546007548e-05, + "loss": 1.3161, + "step": 8880 + }, + { + "epoch": 0.9368143459915612, + "grad_norm": 0.6481268405914307, + "learning_rate": 1.502659331598441e-05, + "loss": 1.3162, + "step": 8881 + }, + { + "epoch": 0.9369198312236287, + "grad_norm": 0.6221020817756653, + "learning_rate": 1.4976630443396395e-05, + "loss": 1.3496, + "step": 8882 + }, + { + "epoch": 0.9370253164556962, + "grad_norm": 0.6435521245002747, + "learning_rate": 1.4926749933842187e-05, + "loss": 1.2926, + "step": 8883 + }, + { + "epoch": 0.9371308016877637, + "grad_norm": 0.6652200222015381, + "learning_rate": 1.4876951792910987e-05, + "loss": 1.3285, + "step": 8884 + }, + { + "epoch": 0.9372362869198312, + "grad_norm": 0.6415147185325623, + "learning_rate": 1.4827236026182994e-05, + "loss": 1.2967, + "step": 8885 + }, + { + "epoch": 0.9373417721518987, + "grad_norm": 0.6007397174835205, + "learning_rate": 1.4777602639229004e-05, + "loss": 1.3227, + "step": 8886 + }, + { + "epoch": 0.9374472573839663, + "grad_norm": 0.6088355779647827, + "learning_rate": 1.4728051637610902e-05, + "loss": 1.3063, + "step": 8887 + }, + { + "epoch": 0.9375527426160337, + "grad_norm": 0.6295164227485657, + "learning_rate": 1.4678583026880993e-05, + "loss": 1.3078, + "step": 8888 + }, + { + "epoch": 0.9376582278481013, + "grad_norm": 0.6166828870773315, + "learning_rate": 1.4629196812582513e-05, + "loss": 1.3233, + "step": 8889 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.6048364639282227, + "learning_rate": 1.457989300024945e-05, + "loss": 1.2817, + "step": 8890 + }, + { + "epoch": 0.9378691983122363, + "grad_norm": 0.655109703540802, + "learning_rate": 1.4530671595406469e-05, + "loss": 1.3037, + "step": 8891 + }, + { + "epoch": 0.9379746835443038, + "grad_norm": 0.6076653599739075, + "learning_rate": 1.4481532603569076e-05, + "loss": 1.3119, + "step": 8892 + }, + { + "epoch": 0.9380801687763713, + "grad_norm": 0.6161845326423645, + "learning_rate": 1.4432476030243696e-05, + "loss": 1.2873, + "step": 8893 + }, + { + "epoch": 0.9381856540084388, + "grad_norm": 0.6443436741828918, + "learning_rate": 1.4383501880927103e-05, + "loss": 1.2836, + "step": 8894 + }, + { + "epoch": 0.9382911392405063, + "grad_norm": 0.6113595366477966, + "learning_rate": 1.433461016110732e-05, + "loss": 1.3226, + "step": 8895 + }, + { + "epoch": 0.9383966244725739, + "grad_norm": 0.681540846824646, + "learning_rate": 1.42858008762628e-05, + "loss": 1.2972, + "step": 8896 + }, + { + "epoch": 0.9385021097046413, + "grad_norm": 0.6244838237762451, + "learning_rate": 1.4237074031862918e-05, + "loss": 1.3135, + "step": 8897 + }, + { + "epoch": 0.9386075949367089, + "grad_norm": 0.6351054906845093, + "learning_rate": 1.4188429633367721e-05, + "loss": 1.2788, + "step": 8898 + }, + { + "epoch": 0.9387130801687764, + "grad_norm": 0.7123880982398987, + "learning_rate": 1.4139867686228102e-05, + "loss": 1.3274, + "step": 8899 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.6102770566940308, + "learning_rate": 1.4091388195885625e-05, + "loss": 1.3057, + "step": 8900 + }, + { + "epoch": 0.9389240506329114, + "grad_norm": 0.6040680408477783, + "learning_rate": 1.404299116777269e-05, + "loss": 1.3199, + "step": 8901 + }, + { + "epoch": 0.939029535864979, + "grad_norm": 0.5873343348503113, + "learning_rate": 1.3994676607312379e-05, + "loss": 1.2785, + "step": 8902 + }, + { + "epoch": 0.9391350210970464, + "grad_norm": 0.6037443280220032, + "learning_rate": 1.3946444519918611e-05, + "loss": 1.318, + "step": 8903 + }, + { + "epoch": 0.9392405063291139, + "grad_norm": 0.6218230128288269, + "learning_rate": 1.3898294910995979e-05, + "loss": 1.3163, + "step": 8904 + }, + { + "epoch": 0.9393459915611815, + "grad_norm": 0.6110193729400635, + "learning_rate": 1.385022778594e-05, + "loss": 1.3162, + "step": 8905 + }, + { + "epoch": 0.9394514767932489, + "grad_norm": 0.5986295938491821, + "learning_rate": 1.3802243150136784e-05, + "loss": 1.3376, + "step": 8906 + }, + { + "epoch": 0.9395569620253165, + "grad_norm": 0.6218068599700928, + "learning_rate": 1.3754341008963194e-05, + "loss": 1.2839, + "step": 8907 + }, + { + "epoch": 0.939662447257384, + "grad_norm": 0.5986127257347107, + "learning_rate": 1.370652136778694e-05, + "loss": 1.298, + "step": 8908 + }, + { + "epoch": 0.9397679324894515, + "grad_norm": 0.6504202485084534, + "learning_rate": 1.3658784231966481e-05, + "loss": 1.292, + "step": 8909 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.695958137512207, + "learning_rate": 1.3611129606851041e-05, + "loss": 1.2741, + "step": 8910 + }, + { + "epoch": 0.9399789029535865, + "grad_norm": 0.6030798554420471, + "learning_rate": 1.3563557497780432e-05, + "loss": 1.2957, + "step": 8911 + }, + { + "epoch": 0.940084388185654, + "grad_norm": 0.6137349605560303, + "learning_rate": 1.3516067910085306e-05, + "loss": 1.3377, + "step": 8912 + }, + { + "epoch": 0.9401898734177215, + "grad_norm": 0.6249287724494934, + "learning_rate": 1.3468660849087322e-05, + "loss": 1.3202, + "step": 8913 + }, + { + "epoch": 0.9402953586497891, + "grad_norm": 0.6050190329551697, + "learning_rate": 1.3421336320098565e-05, + "loss": 1.3176, + "step": 8914 + }, + { + "epoch": 0.9404008438818565, + "grad_norm": 0.6103069186210632, + "learning_rate": 1.3374094328422043e-05, + "loss": 1.2998, + "step": 8915 + }, + { + "epoch": 0.9405063291139241, + "grad_norm": 0.6144002079963684, + "learning_rate": 1.3326934879351272e-05, + "loss": 1.3605, + "step": 8916 + }, + { + "epoch": 0.9406118143459916, + "grad_norm": 0.6455116868019104, + "learning_rate": 1.327985797817094e-05, + "loss": 1.3218, + "step": 8917 + }, + { + "epoch": 0.940717299578059, + "grad_norm": 0.6738781332969666, + "learning_rate": 1.3232863630156077e-05, + "loss": 1.2746, + "step": 8918 + }, + { + "epoch": 0.9408227848101266, + "grad_norm": 0.6203892827033997, + "learning_rate": 1.3185951840572723e-05, + "loss": 1.2984, + "step": 8919 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.5994904041290283, + "learning_rate": 1.313912261467759e-05, + "loss": 1.2823, + "step": 8920 + }, + { + "epoch": 0.9410337552742616, + "grad_norm": 0.6691356897354126, + "learning_rate": 1.3092375957717978e-05, + "loss": 1.311, + "step": 8921 + }, + { + "epoch": 0.9411392405063291, + "grad_norm": 0.6104572415351868, + "learning_rate": 1.3045711874932281e-05, + "loss": 1.3, + "step": 8922 + }, + { + "epoch": 0.9412447257383966, + "grad_norm": 0.6290695071220398, + "learning_rate": 1.2999130371549318e-05, + "loss": 1.308, + "step": 8923 + }, + { + "epoch": 0.9413502109704641, + "grad_norm": 0.588324785232544, + "learning_rate": 1.2952631452788826e-05, + "loss": 1.2999, + "step": 8924 + }, + { + "epoch": 0.9414556962025317, + "grad_norm": 0.6112254858016968, + "learning_rate": 1.2906215123861226e-05, + "loss": 1.3356, + "step": 8925 + }, + { + "epoch": 0.9415611814345991, + "grad_norm": 0.6003336310386658, + "learning_rate": 1.2859881389967687e-05, + "loss": 1.2819, + "step": 8926 + }, + { + "epoch": 0.9416666666666667, + "grad_norm": 0.6160174012184143, + "learning_rate": 1.2813630256300224e-05, + "loss": 1.3238, + "step": 8927 + }, + { + "epoch": 0.9417721518987342, + "grad_norm": 0.6224658489227295, + "learning_rate": 1.2767461728041357e-05, + "loss": 1.3292, + "step": 8928 + }, + { + "epoch": 0.9418776371308016, + "grad_norm": 0.6057482361793518, + "learning_rate": 1.2721375810364616e-05, + "loss": 1.2829, + "step": 8929 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.6292101740837097, + "learning_rate": 1.267537250843412e-05, + "loss": 1.288, + "step": 8930 + }, + { + "epoch": 0.9420886075949367, + "grad_norm": 0.6263541579246521, + "learning_rate": 1.2629451827404659e-05, + "loss": 1.2892, + "step": 8931 + }, + { + "epoch": 0.9421940928270042, + "grad_norm": 0.6244831681251526, + "learning_rate": 1.258361377242212e-05, + "loss": 1.3059, + "step": 8932 + }, + { + "epoch": 0.9422995780590717, + "grad_norm": 0.5943052172660828, + "learning_rate": 1.2537858348622728e-05, + "loss": 1.3118, + "step": 8933 + }, + { + "epoch": 0.9424050632911393, + "grad_norm": 0.6447764039039612, + "learning_rate": 1.2492185561133545e-05, + "loss": 1.3245, + "step": 8934 + }, + { + "epoch": 0.9425105485232067, + "grad_norm": 0.6074174046516418, + "learning_rate": 1.2446595415072565e-05, + "loss": 1.3181, + "step": 8935 + }, + { + "epoch": 0.9426160337552743, + "grad_norm": 0.5945388674736023, + "learning_rate": 1.2401087915548365e-05, + "loss": 1.2879, + "step": 8936 + }, + { + "epoch": 0.9427215189873418, + "grad_norm": 0.6170278787612915, + "learning_rate": 1.2355663067660283e-05, + "loss": 1.3199, + "step": 8937 + }, + { + "epoch": 0.9428270042194092, + "grad_norm": 0.6113978624343872, + "learning_rate": 1.2310320876498333e-05, + "loss": 1.2888, + "step": 8938 + }, + { + "epoch": 0.9429324894514768, + "grad_norm": 0.605751097202301, + "learning_rate": 1.2265061347143447e-05, + "loss": 1.324, + "step": 8939 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.6160303354263306, + "learning_rate": 1.2219884484667071e-05, + "loss": 1.3112, + "step": 8940 + }, + { + "epoch": 0.9431434599156118, + "grad_norm": 0.6060193181037903, + "learning_rate": 1.2174790294131405e-05, + "loss": 1.2796, + "step": 8941 + }, + { + "epoch": 0.9432489451476793, + "grad_norm": 0.588793933391571, + "learning_rate": 1.2129778780589823e-05, + "loss": 1.2999, + "step": 8942 + }, + { + "epoch": 0.9433544303797469, + "grad_norm": 0.6192491054534912, + "learning_rate": 1.2084849949085791e-05, + "loss": 1.3064, + "step": 8943 + }, + { + "epoch": 0.9434599156118143, + "grad_norm": 0.6212785840034485, + "learning_rate": 1.2040003804653864e-05, + "loss": 1.3443, + "step": 8944 + }, + { + "epoch": 0.9435654008438819, + "grad_norm": 0.6134397983551025, + "learning_rate": 1.199524035231936e-05, + "loss": 1.2951, + "step": 8945 + }, + { + "epoch": 0.9436708860759494, + "grad_norm": 0.6075255870819092, + "learning_rate": 1.195055959709826e-05, + "loss": 1.3108, + "step": 8946 + }, + { + "epoch": 0.9437763713080168, + "grad_norm": 0.601874053478241, + "learning_rate": 1.1905961543997147e-05, + "loss": 1.2956, + "step": 8947 + }, + { + "epoch": 0.9438818565400844, + "grad_norm": 0.605904221534729, + "learning_rate": 1.186144619801352e-05, + "loss": 1.3065, + "step": 8948 + }, + { + "epoch": 0.9439873417721519, + "grad_norm": 0.6678119897842407, + "learning_rate": 1.1817013564135475e-05, + "loss": 1.3424, + "step": 8949 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.6156478524208069, + "learning_rate": 1.1772663647341947e-05, + "loss": 1.3122, + "step": 8950 + }, + { + "epoch": 0.9441983122362869, + "grad_norm": 0.6021909713745117, + "learning_rate": 1.1728396452602708e-05, + "loss": 1.2982, + "step": 8951 + }, + { + "epoch": 0.9443037974683545, + "grad_norm": 0.6126022338867188, + "learning_rate": 1.1684211984877957e-05, + "loss": 1.3117, + "step": 8952 + }, + { + "epoch": 0.9444092827004219, + "grad_norm": 0.6191853880882263, + "learning_rate": 1.1640110249118818e-05, + "loss": 1.3277, + "step": 8953 + }, + { + "epoch": 0.9445147679324895, + "grad_norm": 0.609967827796936, + "learning_rate": 1.1596091250267171e-05, + "loss": 1.3114, + "step": 8954 + }, + { + "epoch": 0.944620253164557, + "grad_norm": 0.6243636608123779, + "learning_rate": 1.1552154993255488e-05, + "loss": 1.351, + "step": 8955 + }, + { + "epoch": 0.9447257383966244, + "grad_norm": 0.603793740272522, + "learning_rate": 1.1508301483007078e-05, + "loss": 1.2984, + "step": 8956 + }, + { + "epoch": 0.944831223628692, + "grad_norm": 0.6182936429977417, + "learning_rate": 1.1464530724435928e-05, + "loss": 1.3283, + "step": 8957 + }, + { + "epoch": 0.9449367088607595, + "grad_norm": 0.6033504009246826, + "learning_rate": 1.14208427224467e-05, + "loss": 1.2836, + "step": 8958 + }, + { + "epoch": 0.945042194092827, + "grad_norm": 0.5893195867538452, + "learning_rate": 1.137723748193506e-05, + "loss": 1.2937, + "step": 8959 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.6169829368591309, + "learning_rate": 1.1333715007786932e-05, + "loss": 1.3258, + "step": 8960 + }, + { + "epoch": 0.9452531645569621, + "grad_norm": 0.6240575313568115, + "learning_rate": 1.12902753048795e-05, + "loss": 1.2902, + "step": 8961 + }, + { + "epoch": 0.9453586497890295, + "grad_norm": 0.6235640048980713, + "learning_rate": 1.1246918378080202e-05, + "loss": 1.3029, + "step": 8962 + }, + { + "epoch": 0.945464135021097, + "grad_norm": 0.5921235084533691, + "learning_rate": 1.12036442322474e-05, + "loss": 1.3154, + "step": 8963 + }, + { + "epoch": 0.9455696202531646, + "grad_norm": 0.6724811792373657, + "learning_rate": 1.1160452872230303e-05, + "loss": 1.3078, + "step": 8964 + }, + { + "epoch": 0.945675105485232, + "grad_norm": 0.6412731409072876, + "learning_rate": 1.111734430286862e-05, + "loss": 1.3238, + "step": 8965 + }, + { + "epoch": 0.9457805907172996, + "grad_norm": 0.6037928462028503, + "learning_rate": 1.1074318528992905e-05, + "loss": 1.3474, + "step": 8966 + }, + { + "epoch": 0.9458860759493671, + "grad_norm": 0.6009837985038757, + "learning_rate": 1.1031375555424466e-05, + "loss": 1.2595, + "step": 8967 + }, + { + "epoch": 0.9459915611814346, + "grad_norm": 0.6652116775512695, + "learning_rate": 1.0988515386975206e-05, + "loss": 1.3186, + "step": 8968 + }, + { + "epoch": 0.9460970464135021, + "grad_norm": 0.5968925356864929, + "learning_rate": 1.0945738028447783e-05, + "loss": 1.3229, + "step": 8969 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.7101601362228394, + "learning_rate": 1.0903043484635694e-05, + "loss": 1.3097, + "step": 8970 + }, + { + "epoch": 0.9463080168776371, + "grad_norm": 0.628033459186554, + "learning_rate": 1.0860431760323032e-05, + "loss": 1.3215, + "step": 8971 + }, + { + "epoch": 0.9464135021097047, + "grad_norm": 0.5867465138435364, + "learning_rate": 1.0817902860284723e-05, + "loss": 1.2935, + "step": 8972 + }, + { + "epoch": 0.9465189873417722, + "grad_norm": 0.623084306716919, + "learning_rate": 1.0775456789286291e-05, + "loss": 1.2569, + "step": 8973 + }, + { + "epoch": 0.9466244725738396, + "grad_norm": 0.591526448726654, + "learning_rate": 1.0733093552084016e-05, + "loss": 1.3005, + "step": 8974 + }, + { + "epoch": 0.9467299578059072, + "grad_norm": 0.6186650395393372, + "learning_rate": 1.0690813153425016e-05, + "loss": 1.329, + "step": 8975 + }, + { + "epoch": 0.9468354430379747, + "grad_norm": 0.626879870891571, + "learning_rate": 1.0648615598046834e-05, + "loss": 1.3192, + "step": 8976 + }, + { + "epoch": 0.9469409282700422, + "grad_norm": 0.6435131430625916, + "learning_rate": 1.0606500890678023e-05, + "loss": 1.3104, + "step": 8977 + }, + { + "epoch": 0.9470464135021097, + "grad_norm": 0.6157100200653076, + "learning_rate": 1.0564469036037722e-05, + "loss": 1.3278, + "step": 8978 + }, + { + "epoch": 0.9471518987341773, + "grad_norm": 0.6264392137527466, + "learning_rate": 1.0522520038835831e-05, + "loss": 1.2774, + "step": 8979 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.678875207901001, + "learning_rate": 1.0480653903772924e-05, + "loss": 1.3317, + "step": 8980 + }, + { + "epoch": 0.9473628691983123, + "grad_norm": 0.6293619275093079, + "learning_rate": 1.0438870635540332e-05, + "loss": 1.3103, + "step": 8981 + }, + { + "epoch": 0.9474683544303798, + "grad_norm": 0.6236032843589783, + "learning_rate": 1.0397170238820142e-05, + "loss": 1.294, + "step": 8982 + }, + { + "epoch": 0.9475738396624472, + "grad_norm": 0.6298745274543762, + "learning_rate": 1.0355552718284949e-05, + "loss": 1.3277, + "step": 8983 + }, + { + "epoch": 0.9476793248945148, + "grad_norm": 0.6498255133628845, + "learning_rate": 1.0314018078598275e-05, + "loss": 1.3246, + "step": 8984 + }, + { + "epoch": 0.9477848101265823, + "grad_norm": 0.5982118844985962, + "learning_rate": 1.0272566324414313e-05, + "loss": 1.3258, + "step": 8985 + }, + { + "epoch": 0.9478902953586498, + "grad_norm": 0.5970702767372131, + "learning_rate": 1.0231197460377845e-05, + "loss": 1.3054, + "step": 8986 + }, + { + "epoch": 0.9479957805907173, + "grad_norm": 0.6223452687263489, + "learning_rate": 1.0189911491124582e-05, + "loss": 1.3092, + "step": 8987 + }, + { + "epoch": 0.9481012658227848, + "grad_norm": 0.6172961592674255, + "learning_rate": 1.0148708421280822e-05, + "loss": 1.3006, + "step": 8988 + }, + { + "epoch": 0.9482067510548523, + "grad_norm": 0.6278883218765259, + "learning_rate": 1.0107588255463373e-05, + "loss": 1.3192, + "step": 8989 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.6133033037185669, + "learning_rate": 1.0066550998280132e-05, + "loss": 1.2915, + "step": 8990 + }, + { + "epoch": 0.9484177215189873, + "grad_norm": 0.6007097959518433, + "learning_rate": 1.0025596654329504e-05, + "loss": 1.2737, + "step": 8991 + }, + { + "epoch": 0.9485232067510548, + "grad_norm": 0.6925298571586609, + "learning_rate": 9.984725228200654e-06, + "loss": 1.2882, + "step": 8992 + }, + { + "epoch": 0.9486286919831224, + "grad_norm": 0.6124686598777771, + "learning_rate": 9.943936724473412e-06, + "loss": 1.2959, + "step": 8993 + }, + { + "epoch": 0.9487341772151898, + "grad_norm": 0.6290698051452637, + "learning_rate": 9.903231147718294e-06, + "loss": 1.3152, + "step": 8994 + }, + { + "epoch": 0.9488396624472574, + "grad_norm": 0.5920825004577637, + "learning_rate": 9.862608502496568e-06, + "loss": 1.3565, + "step": 8995 + }, + { + "epoch": 0.9489451476793249, + "grad_norm": 0.5937385559082031, + "learning_rate": 9.822068793360172e-06, + "loss": 1.2914, + "step": 8996 + }, + { + "epoch": 0.9490506329113924, + "grad_norm": 0.6077348589897156, + "learning_rate": 9.781612024851893e-06, + "loss": 1.3092, + "step": 8997 + }, + { + "epoch": 0.9491561181434599, + "grad_norm": 0.5994146466255188, + "learning_rate": 9.74123820150502e-06, + "loss": 1.3273, + "step": 8998 + }, + { + "epoch": 0.9492616033755275, + "grad_norm": 0.6348695158958435, + "learning_rate": 9.700947327843685e-06, + "loss": 1.3098, + "step": 8999 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.6127103567123413, + "learning_rate": 9.660739408382608e-06, + "loss": 1.2952, + "step": 9000 + }, + { + "epoch": 0.9494725738396624, + "grad_norm": 0.6223414540290833, + "learning_rate": 9.620614447627435e-06, + "loss": 1.3014, + "step": 9001 + }, + { + "epoch": 0.94957805907173, + "grad_norm": 0.700711727142334, + "learning_rate": 9.580572450074237e-06, + "loss": 1.3086, + "step": 9002 + }, + { + "epoch": 0.9496835443037974, + "grad_norm": 0.6033319234848022, + "learning_rate": 9.540613420209927e-06, + "loss": 1.3235, + "step": 9003 + }, + { + "epoch": 0.949789029535865, + "grad_norm": 0.6228060722351074, + "learning_rate": 9.500737362512168e-06, + "loss": 1.2764, + "step": 9004 + }, + { + "epoch": 0.9498945147679325, + "grad_norm": 0.6113051772117615, + "learning_rate": 9.460944281449307e-06, + "loss": 1.2654, + "step": 9005 + }, + { + "epoch": 0.95, + "grad_norm": 0.6277506351470947, + "learning_rate": 9.421234181480275e-06, + "loss": 1.3018, + "step": 9006 + }, + { + "epoch": 0.9501054852320675, + "grad_norm": 0.6141433715820312, + "learning_rate": 9.381607067054764e-06, + "loss": 1.3287, + "step": 9007 + }, + { + "epoch": 0.950210970464135, + "grad_norm": 0.6722803711891174, + "learning_rate": 9.342062942613222e-06, + "loss": 1.3246, + "step": 9008 + }, + { + "epoch": 0.9503164556962025, + "grad_norm": 0.6084029674530029, + "learning_rate": 9.302601812586852e-06, + "loss": 1.2897, + "step": 9009 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.6457234621047974, + "learning_rate": 9.26322368139737e-06, + "loss": 1.2971, + "step": 9010 + }, + { + "epoch": 0.9505274261603376, + "grad_norm": 0.6146203279495239, + "learning_rate": 9.223928553457328e-06, + "loss": 1.2823, + "step": 9011 + }, + { + "epoch": 0.950632911392405, + "grad_norm": 0.5923100113868713, + "learning_rate": 9.184716433169955e-06, + "loss": 1.3244, + "step": 9012 + }, + { + "epoch": 0.9507383966244726, + "grad_norm": 0.6457542777061462, + "learning_rate": 9.145587324929066e-06, + "loss": 1.3232, + "step": 9013 + }, + { + "epoch": 0.9508438818565401, + "grad_norm": 0.6418234705924988, + "learning_rate": 9.106541233119409e-06, + "loss": 1.3152, + "step": 9014 + }, + { + "epoch": 0.9509493670886076, + "grad_norm": 0.7000805139541626, + "learning_rate": 9.06757816211623e-06, + "loss": 1.2988, + "step": 9015 + }, + { + "epoch": 0.9510548523206751, + "grad_norm": 0.6030864119529724, + "learning_rate": 9.028698116285538e-06, + "loss": 1.2526, + "step": 9016 + }, + { + "epoch": 0.9511603375527427, + "grad_norm": 0.588128387928009, + "learning_rate": 8.989901099984016e-06, + "loss": 1.2802, + "step": 9017 + }, + { + "epoch": 0.9512658227848101, + "grad_norm": 0.6096045970916748, + "learning_rate": 8.9511871175591e-06, + "loss": 1.3183, + "step": 9018 + }, + { + "epoch": 0.9513713080168776, + "grad_norm": 0.6224704384803772, + "learning_rate": 8.912556173348907e-06, + "loss": 1.3014, + "step": 9019 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.6199014186859131, + "learning_rate": 8.874008271682222e-06, + "loss": 1.3455, + "step": 9020 + }, + { + "epoch": 0.9515822784810126, + "grad_norm": 0.6084346175193787, + "learning_rate": 8.835543416878422e-06, + "loss": 1.3148, + "step": 9021 + }, + { + "epoch": 0.9516877637130802, + "grad_norm": 0.6149401068687439, + "learning_rate": 8.797161613247728e-06, + "loss": 1.353, + "step": 9022 + }, + { + "epoch": 0.9517932489451477, + "grad_norm": 0.6286180019378662, + "learning_rate": 8.758862865091117e-06, + "loss": 1.3474, + "step": 9023 + }, + { + "epoch": 0.9518987341772152, + "grad_norm": 0.614165723323822, + "learning_rate": 8.72064717670007e-06, + "loss": 1.2841, + "step": 9024 + }, + { + "epoch": 0.9520042194092827, + "grad_norm": 0.6163848042488098, + "learning_rate": 8.68251455235683e-06, + "loss": 1.3311, + "step": 9025 + }, + { + "epoch": 0.9521097046413503, + "grad_norm": 0.6150457262992859, + "learning_rate": 8.644464996334395e-06, + "loss": 1.365, + "step": 9026 + }, + { + "epoch": 0.9522151898734177, + "grad_norm": 0.6420808434486389, + "learning_rate": 8.606498512896438e-06, + "loss": 1.3054, + "step": 9027 + }, + { + "epoch": 0.9523206751054852, + "grad_norm": 0.637153148651123, + "learning_rate": 8.568615106297223e-06, + "loss": 1.3079, + "step": 9028 + }, + { + "epoch": 0.9524261603375528, + "grad_norm": 0.6194609999656677, + "learning_rate": 8.53081478078177e-06, + "loss": 1.3405, + "step": 9029 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.6234562397003174, + "learning_rate": 8.493097540585775e-06, + "loss": 1.3003, + "step": 9030 + }, + { + "epoch": 0.9526371308016878, + "grad_norm": 0.6097232103347778, + "learning_rate": 8.455463389935774e-06, + "loss": 1.2704, + "step": 9031 + }, + { + "epoch": 0.9527426160337553, + "grad_norm": 0.6433289647102356, + "learning_rate": 8.417912333048727e-06, + "loss": 1.3398, + "step": 9032 + }, + { + "epoch": 0.9528481012658228, + "grad_norm": 0.638526976108551, + "learning_rate": 8.380444374132517e-06, + "loss": 1.3169, + "step": 9033 + }, + { + "epoch": 0.9529535864978903, + "grad_norm": 0.6146140098571777, + "learning_rate": 8.343059517385454e-06, + "loss": 1.3484, + "step": 9034 + }, + { + "epoch": 0.9530590717299579, + "grad_norm": 0.6253801584243774, + "learning_rate": 8.305757766996935e-06, + "loss": 1.2847, + "step": 9035 + }, + { + "epoch": 0.9531645569620253, + "grad_norm": 0.6227324604988098, + "learning_rate": 8.268539127146619e-06, + "loss": 1.2696, + "step": 9036 + }, + { + "epoch": 0.9532700421940928, + "grad_norm": 0.6142639517784119, + "learning_rate": 8.231403602005083e-06, + "loss": 1.2583, + "step": 9037 + }, + { + "epoch": 0.9533755274261604, + "grad_norm": 0.609444797039032, + "learning_rate": 8.194351195733585e-06, + "loss": 1.3037, + "step": 9038 + }, + { + "epoch": 0.9534810126582278, + "grad_norm": 0.6619121432304382, + "learning_rate": 8.157381912484053e-06, + "loss": 1.2962, + "step": 9039 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.618071436882019, + "learning_rate": 8.120495756399005e-06, + "loss": 1.3189, + "step": 9040 + }, + { + "epoch": 0.9536919831223629, + "grad_norm": 0.6329520344734192, + "learning_rate": 8.08369273161172e-06, + "loss": 1.3307, + "step": 9041 + }, + { + "epoch": 0.9537974683544304, + "grad_norm": 0.6140097975730896, + "learning_rate": 8.046972842246147e-06, + "loss": 1.3197, + "step": 9042 + }, + { + "epoch": 0.9539029535864979, + "grad_norm": 0.6007914543151855, + "learning_rate": 8.01033609241708e-06, + "loss": 1.2809, + "step": 9043 + }, + { + "epoch": 0.9540084388185655, + "grad_norm": 0.6065101623535156, + "learning_rate": 7.973782486229737e-06, + "loss": 1.3094, + "step": 9044 + }, + { + "epoch": 0.9541139240506329, + "grad_norm": 0.6016790866851807, + "learning_rate": 7.937312027780169e-06, + "loss": 1.327, + "step": 9045 + }, + { + "epoch": 0.9542194092827004, + "grad_norm": 0.5924515724182129, + "learning_rate": 7.900924721154945e-06, + "loss": 1.2959, + "step": 9046 + }, + { + "epoch": 0.954324894514768, + "grad_norm": 0.6219801902770996, + "learning_rate": 7.864620570431635e-06, + "loss": 1.3571, + "step": 9047 + }, + { + "epoch": 0.9544303797468354, + "grad_norm": 0.6143323183059692, + "learning_rate": 7.828399579678153e-06, + "loss": 1.2898, + "step": 9048 + }, + { + "epoch": 0.954535864978903, + "grad_norm": 0.5952413082122803, + "learning_rate": 7.792261752953333e-06, + "loss": 1.3034, + "step": 9049 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.6210739612579346, + "learning_rate": 7.756207094306605e-06, + "loss": 1.3002, + "step": 9050 + }, + { + "epoch": 0.954746835443038, + "grad_norm": 0.6145798563957214, + "learning_rate": 7.720235607777987e-06, + "loss": 1.3275, + "step": 9051 + }, + { + "epoch": 0.9548523206751055, + "grad_norm": 0.6134696006774902, + "learning_rate": 7.684347297398254e-06, + "loss": 1.3108, + "step": 9052 + }, + { + "epoch": 0.9549578059071729, + "grad_norm": 0.6119007468223572, + "learning_rate": 7.648542167189021e-06, + "loss": 1.3077, + "step": 9053 + }, + { + "epoch": 0.9550632911392405, + "grad_norm": 0.5918996930122375, + "learning_rate": 7.612820221162331e-06, + "loss": 1.3315, + "step": 9054 + }, + { + "epoch": 0.955168776371308, + "grad_norm": 0.5962055325508118, + "learning_rate": 7.577181463320981e-06, + "loss": 1.296, + "step": 9055 + }, + { + "epoch": 0.9552742616033755, + "grad_norm": 0.6168593764305115, + "learning_rate": 7.541625897658444e-06, + "loss": 1.3336, + "step": 9056 + }, + { + "epoch": 0.955379746835443, + "grad_norm": 0.6053595542907715, + "learning_rate": 7.506153528159032e-06, + "loss": 1.3, + "step": 9057 + }, + { + "epoch": 0.9554852320675106, + "grad_norm": 0.6158223748207092, + "learning_rate": 7.470764358797566e-06, + "loss": 1.3231, + "step": 9058 + }, + { + "epoch": 0.955590717299578, + "grad_norm": 0.6304230690002441, + "learning_rate": 7.435458393539457e-06, + "loss": 1.2973, + "step": 9059 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.5844538807868958, + "learning_rate": 7.400235636340957e-06, + "loss": 1.2946, + "step": 9060 + }, + { + "epoch": 0.9558016877637131, + "grad_norm": 0.6077580451965332, + "learning_rate": 7.3650960911490764e-06, + "loss": 1.3263, + "step": 9061 + }, + { + "epoch": 0.9559071729957805, + "grad_norm": 0.621425449848175, + "learning_rate": 7.330039761901247e-06, + "loss": 1.3258, + "step": 9062 + }, + { + "epoch": 0.9560126582278481, + "grad_norm": 0.590002179145813, + "learning_rate": 7.295066652525828e-06, + "loss": 1.3157, + "step": 9063 + }, + { + "epoch": 0.9561181434599156, + "grad_norm": 0.5998336672782898, + "learning_rate": 7.260176766941601e-06, + "loss": 1.297, + "step": 9064 + }, + { + "epoch": 0.9562236286919831, + "grad_norm": 0.6314411163330078, + "learning_rate": 7.225370109058188e-06, + "loss": 1.3325, + "step": 9065 + }, + { + "epoch": 0.9563291139240506, + "grad_norm": 0.6049730777740479, + "learning_rate": 7.190646682775886e-06, + "loss": 1.2911, + "step": 9066 + }, + { + "epoch": 0.9564345991561182, + "grad_norm": 0.6107548475265503, + "learning_rate": 7.1560064919855835e-06, + "loss": 1.3339, + "step": 9067 + }, + { + "epoch": 0.9565400843881856, + "grad_norm": 0.5841323733329773, + "learning_rate": 7.121449540568842e-06, + "loss": 1.3023, + "step": 9068 + }, + { + "epoch": 0.9566455696202532, + "grad_norm": 0.6621530652046204, + "learning_rate": 7.086975832398146e-06, + "loss": 1.3063, + "step": 9069 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.6185205578804016, + "learning_rate": 7.0525853713362395e-06, + "loss": 1.29, + "step": 9070 + }, + { + "epoch": 0.9568565400843881, + "grad_norm": 0.6044824719429016, + "learning_rate": 7.018278161236791e-06, + "loss": 1.3404, + "step": 9071 + }, + { + "epoch": 0.9569620253164557, + "grad_norm": 0.6589377522468567, + "learning_rate": 6.984054205944141e-06, + "loss": 1.2902, + "step": 9072 + }, + { + "epoch": 0.9570675105485232, + "grad_norm": 0.6207507252693176, + "learning_rate": 6.949913509293221e-06, + "loss": 1.3331, + "step": 9073 + }, + { + "epoch": 0.9571729957805907, + "grad_norm": 0.6071805357933044, + "learning_rate": 6.915856075109722e-06, + "loss": 1.3297, + "step": 9074 + }, + { + "epoch": 0.9572784810126582, + "grad_norm": 0.5995541214942932, + "learning_rate": 6.881881907209841e-06, + "loss": 1.2901, + "step": 9075 + }, + { + "epoch": 0.9573839662447258, + "grad_norm": 0.5987622141838074, + "learning_rate": 6.847991009400617e-06, + "loss": 1.2757, + "step": 9076 + }, + { + "epoch": 0.9574894514767932, + "grad_norm": 0.6067409515380859, + "learning_rate": 6.814183385479677e-06, + "loss": 1.3326, + "step": 9077 + }, + { + "epoch": 0.9575949367088608, + "grad_norm": 0.5975245237350464, + "learning_rate": 6.780459039235409e-06, + "loss": 1.3281, + "step": 9078 + }, + { + "epoch": 0.9577004219409283, + "grad_norm": 0.595703125, + "learning_rate": 6.746817974446706e-06, + "loss": 1.2685, + "step": 9079 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.6109248399734497, + "learning_rate": 6.71326019488322e-06, + "loss": 1.3311, + "step": 9080 + }, + { + "epoch": 0.9579113924050633, + "grad_norm": 0.6452860832214355, + "learning_rate": 6.679785704305358e-06, + "loss": 1.3302, + "step": 9081 + }, + { + "epoch": 0.9580168776371308, + "grad_norm": 0.6723264455795288, + "learning_rate": 6.6463945064639544e-06, + "loss": 1.3172, + "step": 9082 + }, + { + "epoch": 0.9581223628691983, + "grad_norm": 0.6118870377540588, + "learning_rate": 6.6130866051007654e-06, + "loss": 1.3186, + "step": 9083 + }, + { + "epoch": 0.9582278481012658, + "grad_norm": 0.6012059450149536, + "learning_rate": 6.57986200394814e-06, + "loss": 1.3054, + "step": 9084 + }, + { + "epoch": 0.9583333333333334, + "grad_norm": 0.6078397631645203, + "learning_rate": 6.546720706728931e-06, + "loss": 1.2742, + "step": 9085 + }, + { + "epoch": 0.9584388185654008, + "grad_norm": 0.6143258810043335, + "learning_rate": 6.513662717156838e-06, + "loss": 1.3105, + "step": 9086 + }, + { + "epoch": 0.9585443037974684, + "grad_norm": 0.6319589614868164, + "learning_rate": 6.480688038936311e-06, + "loss": 1.292, + "step": 9087 + }, + { + "epoch": 0.9586497890295359, + "grad_norm": 0.6022614240646362, + "learning_rate": 6.447796675762146e-06, + "loss": 1.3274, + "step": 9088 + }, + { + "epoch": 0.9587552742616033, + "grad_norm": 0.6115493774414062, + "learning_rate": 6.414988631320062e-06, + "loss": 1.3052, + "step": 9089 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.6205894351005554, + "learning_rate": 6.3822639092862846e-06, + "loss": 1.3393, + "step": 9090 + }, + { + "epoch": 0.9589662447257384, + "grad_norm": 0.5862708687782288, + "learning_rate": 6.349622513327963e-06, + "loss": 1.2788, + "step": 9091 + }, + { + "epoch": 0.9590717299578059, + "grad_norm": 0.6584394574165344, + "learning_rate": 6.317064447102505e-06, + "loss": 1.2947, + "step": 9092 + }, + { + "epoch": 0.9591772151898734, + "grad_norm": 0.6073262095451355, + "learning_rate": 6.28458971425841e-06, + "loss": 1.3051, + "step": 9093 + }, + { + "epoch": 0.959282700421941, + "grad_norm": 0.6275874376296997, + "learning_rate": 6.252198318434432e-06, + "loss": 1.276, + "step": 9094 + }, + { + "epoch": 0.9593881856540084, + "grad_norm": 0.5994271636009216, + "learning_rate": 6.219890263260336e-06, + "loss": 1.2837, + "step": 9095 + }, + { + "epoch": 0.959493670886076, + "grad_norm": 0.6029690504074097, + "learning_rate": 6.187665552356392e-06, + "loss": 1.3251, + "step": 9096 + }, + { + "epoch": 0.9595991561181435, + "grad_norm": 0.6316705346107483, + "learning_rate": 6.155524189333461e-06, + "loss": 1.3395, + "step": 9097 + }, + { + "epoch": 0.9597046413502109, + "grad_norm": 0.6190541982650757, + "learning_rate": 6.123466177793247e-06, + "loss": 1.276, + "step": 9098 + }, + { + "epoch": 0.9598101265822785, + "grad_norm": 0.6116499900817871, + "learning_rate": 6.091491521327958e-06, + "loss": 1.2548, + "step": 9099 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.6070482134819031, + "learning_rate": 6.059600223520478e-06, + "loss": 1.3007, + "step": 9100 + }, + { + "epoch": 0.9600210970464135, + "grad_norm": 0.5823255181312561, + "learning_rate": 6.027792287944367e-06, + "loss": 1.326, + "step": 9101 + }, + { + "epoch": 0.960126582278481, + "grad_norm": 0.603337287902832, + "learning_rate": 5.996067718163939e-06, + "loss": 1.2952, + "step": 9102 + }, + { + "epoch": 0.9602320675105486, + "grad_norm": 0.6126505732536316, + "learning_rate": 5.964426517734101e-06, + "loss": 1.2901, + "step": 9103 + }, + { + "epoch": 0.960337552742616, + "grad_norm": 0.6071150302886963, + "learning_rate": 5.932868690200266e-06, + "loss": 1.3205, + "step": 9104 + }, + { + "epoch": 0.9604430379746836, + "grad_norm": 0.6275913119316101, + "learning_rate": 5.901394239098856e-06, + "loss": 1.3083, + "step": 9105 + }, + { + "epoch": 0.9605485232067511, + "grad_norm": 0.6112920641899109, + "learning_rate": 5.870003167956634e-06, + "loss": 1.3179, + "step": 9106 + }, + { + "epoch": 0.9606540084388185, + "grad_norm": 0.6044462323188782, + "learning_rate": 5.838695480291034e-06, + "loss": 1.2969, + "step": 9107 + }, + { + "epoch": 0.9607594936708861, + "grad_norm": 0.6052528619766235, + "learning_rate": 5.807471179610418e-06, + "loss": 1.2788, + "step": 9108 + }, + { + "epoch": 0.9608649789029536, + "grad_norm": 0.6109786033630371, + "learning_rate": 5.776330269413488e-06, + "loss": 1.3298, + "step": 9109 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.6078574061393738, + "learning_rate": 5.745272753189784e-06, + "loss": 1.2841, + "step": 9110 + }, + { + "epoch": 0.9610759493670886, + "grad_norm": 0.5969375371932983, + "learning_rate": 5.714298634419524e-06, + "loss": 1.3092, + "step": 9111 + }, + { + "epoch": 0.9611814345991562, + "grad_norm": 0.6014569401741028, + "learning_rate": 5.6834079165733464e-06, + "loss": 1.3074, + "step": 9112 + }, + { + "epoch": 0.9612869198312236, + "grad_norm": 0.5850406885147095, + "learning_rate": 5.652600603112818e-06, + "loss": 1.2977, + "step": 9113 + }, + { + "epoch": 0.9613924050632912, + "grad_norm": 0.6029552221298218, + "learning_rate": 5.6218766974900915e-06, + "loss": 1.3038, + "step": 9114 + }, + { + "epoch": 0.9614978902953587, + "grad_norm": 0.597920835018158, + "learning_rate": 5.591236203147915e-06, + "loss": 1.3058, + "step": 9115 + }, + { + "epoch": 0.9616033755274261, + "grad_norm": 0.6070893406867981, + "learning_rate": 5.560679123519624e-06, + "loss": 1.353, + "step": 9116 + }, + { + "epoch": 0.9617088607594937, + "grad_norm": 0.6050568222999573, + "learning_rate": 5.530205462029314e-06, + "loss": 1.3167, + "step": 9117 + }, + { + "epoch": 0.9618143459915611, + "grad_norm": 0.6023545861244202, + "learning_rate": 5.499815222091836e-06, + "loss": 1.3522, + "step": 9118 + }, + { + "epoch": 0.9619198312236287, + "grad_norm": 0.6251962780952454, + "learning_rate": 5.469508407112467e-06, + "loss": 1.3343, + "step": 9119 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.5925888419151306, + "learning_rate": 5.439285020487156e-06, + "loss": 1.2674, + "step": 9120 + }, + { + "epoch": 0.9621308016877637, + "grad_norm": 0.6372910141944885, + "learning_rate": 5.409145065602694e-06, + "loss": 1.3219, + "step": 9121 + }, + { + "epoch": 0.9622362869198312, + "grad_norm": 0.6155081987380981, + "learning_rate": 5.379088545836464e-06, + "loss": 1.2855, + "step": 9122 + }, + { + "epoch": 0.9623417721518988, + "grad_norm": 0.6080366373062134, + "learning_rate": 5.349115464556354e-06, + "loss": 1.3068, + "step": 9123 + }, + { + "epoch": 0.9624472573839662, + "grad_norm": 0.5975385308265686, + "learning_rate": 5.319225825120927e-06, + "loss": 1.287, + "step": 9124 + }, + { + "epoch": 0.9625527426160337, + "grad_norm": 0.5907671451568604, + "learning_rate": 5.289419630879672e-06, + "loss": 1.2899, + "step": 9125 + }, + { + "epoch": 0.9626582278481013, + "grad_norm": 0.6060198545455933, + "learning_rate": 5.2596968851724155e-06, + "loss": 1.279, + "step": 9126 + }, + { + "epoch": 0.9627637130801687, + "grad_norm": 0.6004933714866638, + "learning_rate": 5.230057591329662e-06, + "loss": 1.3103, + "step": 9127 + }, + { + "epoch": 0.9628691983122363, + "grad_norm": 0.6025012731552124, + "learning_rate": 5.200501752672754e-06, + "loss": 1.2799, + "step": 9128 + }, + { + "epoch": 0.9629746835443038, + "grad_norm": 0.6052520275115967, + "learning_rate": 5.171029372513458e-06, + "loss": 1.2716, + "step": 9129 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.6192530393600464, + "learning_rate": 5.141640454154467e-06, + "loss": 1.3711, + "step": 9130 + }, + { + "epoch": 0.9631856540084388, + "grad_norm": 0.6085618734359741, + "learning_rate": 5.112335000888813e-06, + "loss": 1.3267, + "step": 9131 + }, + { + "epoch": 0.9632911392405064, + "grad_norm": 0.5921602845191956, + "learning_rate": 5.083113016000368e-06, + "loss": 1.3476, + "step": 9132 + }, + { + "epoch": 0.9633966244725738, + "grad_norm": 0.6457863450050354, + "learning_rate": 5.053974502763681e-06, + "loss": 1.3587, + "step": 9133 + }, + { + "epoch": 0.9635021097046413, + "grad_norm": 0.6006467938423157, + "learning_rate": 5.024919464443723e-06, + "loss": 1.2825, + "step": 9134 + }, + { + "epoch": 0.9636075949367089, + "grad_norm": 0.6227046847343445, + "learning_rate": 4.995947904296305e-06, + "loss": 1.3014, + "step": 9135 + }, + { + "epoch": 0.9637130801687763, + "grad_norm": 0.5924714803695679, + "learning_rate": 4.967059825567832e-06, + "loss": 1.3183, + "step": 9136 + }, + { + "epoch": 0.9638185654008439, + "grad_norm": 0.6070966720581055, + "learning_rate": 4.938255231495464e-06, + "loss": 1.3169, + "step": 9137 + }, + { + "epoch": 0.9639240506329114, + "grad_norm": 0.6102980971336365, + "learning_rate": 4.909534125306702e-06, + "loss": 1.3115, + "step": 9138 + }, + { + "epoch": 0.9640295358649789, + "grad_norm": 0.6149189472198486, + "learning_rate": 4.880896510220056e-06, + "loss": 1.3083, + "step": 9139 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.6589926481246948, + "learning_rate": 4.852342389444458e-06, + "loss": 1.3119, + "step": 9140 + }, + { + "epoch": 0.964240506329114, + "grad_norm": 0.6563957333564758, + "learning_rate": 4.823871766179516e-06, + "loss": 1.2933, + "step": 9141 + }, + { + "epoch": 0.9643459915611814, + "grad_norm": 0.605222225189209, + "learning_rate": 4.7954846436155104e-06, + "loss": 1.2979, + "step": 9142 + }, + { + "epoch": 0.9644514767932489, + "grad_norm": 0.6014295220375061, + "learning_rate": 4.767181024933398e-06, + "loss": 1.3059, + "step": 9143 + }, + { + "epoch": 0.9645569620253165, + "grad_norm": 0.6125067472457886, + "learning_rate": 4.738960913304724e-06, + "loss": 1.3169, + "step": 9144 + }, + { + "epoch": 0.9646624472573839, + "grad_norm": 0.600789487361908, + "learning_rate": 4.710824311891709e-06, + "loss": 1.3015, + "step": 9145 + }, + { + "epoch": 0.9647679324894515, + "grad_norm": 0.6350312232971191, + "learning_rate": 4.682771223847166e-06, + "loss": 1.3255, + "step": 9146 + }, + { + "epoch": 0.964873417721519, + "grad_norm": 0.6209599375724792, + "learning_rate": 4.654801652314577e-06, + "loss": 1.327, + "step": 9147 + }, + { + "epoch": 0.9649789029535865, + "grad_norm": 0.6209757924079895, + "learning_rate": 4.626915600428105e-06, + "loss": 1.2983, + "step": 9148 + }, + { + "epoch": 0.965084388185654, + "grad_norm": 0.584194004535675, + "learning_rate": 4.5991130713124995e-06, + "loss": 1.3149, + "step": 9149 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.6205872893333435, + "learning_rate": 4.571394068083185e-06, + "loss": 1.2917, + "step": 9150 + }, + { + "epoch": 0.965295358649789, + "grad_norm": 0.6304208636283875, + "learning_rate": 4.543758593846175e-06, + "loss": 1.2747, + "step": 9151 + }, + { + "epoch": 0.9654008438818565, + "grad_norm": 0.6094302535057068, + "learning_rate": 4.516206651698246e-06, + "loss": 1.3326, + "step": 9152 + }, + { + "epoch": 0.9655063291139241, + "grad_norm": 0.6225808262825012, + "learning_rate": 4.488738244726593e-06, + "loss": 1.3256, + "step": 9153 + }, + { + "epoch": 0.9656118143459915, + "grad_norm": 0.6066922545433044, + "learning_rate": 4.4613533760093365e-06, + "loss": 1.3221, + "step": 9154 + }, + { + "epoch": 0.9657172995780591, + "grad_norm": 0.6474112868309021, + "learning_rate": 4.434052048615022e-06, + "loss": 1.3242, + "step": 9155 + }, + { + "epoch": 0.9658227848101266, + "grad_norm": 0.5948792099952698, + "learning_rate": 4.4068342656028715e-06, + "loss": 1.3037, + "step": 9156 + }, + { + "epoch": 0.9659282700421941, + "grad_norm": 0.6202396154403687, + "learning_rate": 4.37970003002286e-06, + "loss": 1.2961, + "step": 9157 + }, + { + "epoch": 0.9660337552742616, + "grad_norm": 0.6154382824897766, + "learning_rate": 4.352649344915471e-06, + "loss": 1.3421, + "step": 9158 + }, + { + "epoch": 0.9661392405063292, + "grad_norm": 0.6569380760192871, + "learning_rate": 4.325682213311782e-06, + "loss": 1.293, + "step": 9159 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.6025830507278442, + "learning_rate": 4.298798638233709e-06, + "loss": 1.3172, + "step": 9160 + }, + { + "epoch": 0.9663502109704641, + "grad_norm": 0.6088576316833496, + "learning_rate": 4.271998622693674e-06, + "loss": 1.2687, + "step": 9161 + }, + { + "epoch": 0.9664556962025317, + "grad_norm": 0.6082010865211487, + "learning_rate": 4.245282169694692e-06, + "loss": 1.2926, + "step": 9162 + }, + { + "epoch": 0.9665611814345991, + "grad_norm": 0.6357792019844055, + "learning_rate": 4.218649282230536e-06, + "loss": 1.2817, + "step": 9163 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 0.6447420716285706, + "learning_rate": 4.192099963285484e-06, + "loss": 1.3536, + "step": 9164 + }, + { + "epoch": 0.9667721518987342, + "grad_norm": 0.6374671459197998, + "learning_rate": 4.165634215834574e-06, + "loss": 1.3564, + "step": 9165 + }, + { + "epoch": 0.9668776371308017, + "grad_norm": 0.6627777218818665, + "learning_rate": 4.139252042843517e-06, + "loss": 1.3297, + "step": 9166 + }, + { + "epoch": 0.9669831223628692, + "grad_norm": 0.6141882538795471, + "learning_rate": 4.112953447268364e-06, + "loss": 1.3156, + "step": 9167 + }, + { + "epoch": 0.9670886075949368, + "grad_norm": 0.6399015188217163, + "learning_rate": 4.086738432056092e-06, + "loss": 1.3059, + "step": 9168 + }, + { + "epoch": 0.9671940928270042, + "grad_norm": 0.6230472326278687, + "learning_rate": 4.060607000144351e-06, + "loss": 1.3434, + "step": 9169 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.611732542514801, + "learning_rate": 4.034559154461049e-06, + "loss": 1.2751, + "step": 9170 + }, + { + "epoch": 0.9674050632911393, + "grad_norm": 0.6172710061073303, + "learning_rate": 4.008594897925183e-06, + "loss": 1.2636, + "step": 9171 + }, + { + "epoch": 0.9675105485232067, + "grad_norm": 0.624755322933197, + "learning_rate": 3.982714233446094e-06, + "loss": 1.3505, + "step": 9172 + }, + { + "epoch": 0.9676160337552743, + "grad_norm": 0.6099667549133301, + "learning_rate": 3.956917163923879e-06, + "loss": 1.3298, + "step": 9173 + }, + { + "epoch": 0.9677215189873418, + "grad_norm": 0.6309146881103516, + "learning_rate": 3.931203692249141e-06, + "loss": 1.3162, + "step": 9174 + }, + { + "epoch": 0.9678270042194093, + "grad_norm": 0.6095395684242249, + "learning_rate": 3.905573821303327e-06, + "loss": 1.2985, + "step": 9175 + }, + { + "epoch": 0.9679324894514768, + "grad_norm": 0.6198866963386536, + "learning_rate": 3.880027553958304e-06, + "loss": 1.289, + "step": 9176 + }, + { + "epoch": 0.9680379746835444, + "grad_norm": 0.6115739941596985, + "learning_rate": 3.8545648930767005e-06, + "loss": 1.3136, + "step": 9177 + }, + { + "epoch": 0.9681434599156118, + "grad_norm": 0.6176424622535706, + "learning_rate": 3.8291858415117344e-06, + "loss": 1.3332, + "step": 9178 + }, + { + "epoch": 0.9682489451476793, + "grad_norm": 0.5967671275138855, + "learning_rate": 3.803890402107213e-06, + "loss": 1.3261, + "step": 9179 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.6086543798446655, + "learning_rate": 3.7786785776976198e-06, + "loss": 1.286, + "step": 9180 + }, + { + "epoch": 0.9684599156118143, + "grad_norm": 0.6450983285903931, + "learning_rate": 3.7535503711080276e-06, + "loss": 1.318, + "step": 9181 + }, + { + "epoch": 0.9685654008438819, + "grad_norm": 0.6077331304550171, + "learning_rate": 3.7285057851543515e-06, + "loss": 1.2985, + "step": 9182 + }, + { + "epoch": 0.9686708860759494, + "grad_norm": 0.5920737981796265, + "learning_rate": 3.703544822642846e-06, + "loss": 1.2341, + "step": 9183 + }, + { + "epoch": 0.9687763713080169, + "grad_norm": 0.6512954235076904, + "learning_rate": 3.6786674863704406e-06, + "loss": 1.3222, + "step": 9184 + }, + { + "epoch": 0.9688818565400844, + "grad_norm": 0.6606256365776062, + "learning_rate": 3.6538737791249053e-06, + "loss": 1.3175, + "step": 9185 + }, + { + "epoch": 0.9689873417721518, + "grad_norm": 0.5998828411102295, + "learning_rate": 3.629163703684352e-06, + "loss": 1.2943, + "step": 9186 + }, + { + "epoch": 0.9690928270042194, + "grad_norm": 0.609951376914978, + "learning_rate": 3.604537262817814e-06, + "loss": 1.321, + "step": 9187 + }, + { + "epoch": 0.9691983122362869, + "grad_norm": 0.6112288236618042, + "learning_rate": 3.579994459284752e-06, + "loss": 1.3212, + "step": 9188 + }, + { + "epoch": 0.9693037974683544, + "grad_norm": 0.6649456024169922, + "learning_rate": 3.555535295835216e-06, + "loss": 1.3292, + "step": 9189 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.5992990136146545, + "learning_rate": 3.5311597752100964e-06, + "loss": 1.2822, + "step": 9190 + }, + { + "epoch": 0.9695147679324895, + "grad_norm": 0.6234458088874817, + "learning_rate": 3.506867900140792e-06, + "loss": 1.3397, + "step": 9191 + }, + { + "epoch": 0.9696202531645569, + "grad_norm": 0.588823676109314, + "learning_rate": 3.4826596733492087e-06, + "loss": 1.312, + "step": 9192 + }, + { + "epoch": 0.9697257383966245, + "grad_norm": 0.6101092100143433, + "learning_rate": 3.4585350975481766e-06, + "loss": 1.277, + "step": 9193 + }, + { + "epoch": 0.969831223628692, + "grad_norm": 0.6014223098754883, + "learning_rate": 3.4344941754408663e-06, + "loss": 1.3147, + "step": 9194 + }, + { + "epoch": 0.9699367088607594, + "grad_norm": 0.6698524355888367, + "learning_rate": 3.4105369097211238e-06, + "loss": 1.3002, + "step": 9195 + }, + { + "epoch": 0.970042194092827, + "grad_norm": 0.5974695086479187, + "learning_rate": 3.386663303073634e-06, + "loss": 1.3081, + "step": 9196 + }, + { + "epoch": 0.9701476793248945, + "grad_norm": 0.624389111995697, + "learning_rate": 3.362873358173424e-06, + "loss": 1.3148, + "step": 9197 + }, + { + "epoch": 0.970253164556962, + "grad_norm": 0.5950706601142883, + "learning_rate": 3.339167077686278e-06, + "loss": 1.2839, + "step": 9198 + }, + { + "epoch": 0.9703586497890295, + "grad_norm": 0.6142072081565857, + "learning_rate": 3.3155444642687384e-06, + "loss": 1.3379, + "step": 9199 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.6384198069572449, + "learning_rate": 3.2920055205676867e-06, + "loss": 1.2835, + "step": 9200 + }, + { + "epoch": 0.9705696202531645, + "grad_norm": 0.6497485637664795, + "learning_rate": 3.2685502492208475e-06, + "loss": 1.3211, + "step": 9201 + }, + { + "epoch": 0.9706751054852321, + "grad_norm": 0.5993157625198364, + "learning_rate": 3.245178652856534e-06, + "loss": 1.3317, + "step": 9202 + }, + { + "epoch": 0.9707805907172996, + "grad_norm": 0.5974697470664978, + "learning_rate": 3.221890734093569e-06, + "loss": 1.2823, + "step": 9203 + }, + { + "epoch": 0.970886075949367, + "grad_norm": 0.609333872795105, + "learning_rate": 3.198686495541531e-06, + "loss": 1.3259, + "step": 9204 + }, + { + "epoch": 0.9709915611814346, + "grad_norm": 0.6013405919075012, + "learning_rate": 3.1755659398005066e-06, + "loss": 1.3287, + "step": 9205 + }, + { + "epoch": 0.9710970464135021, + "grad_norm": 0.6109451055526733, + "learning_rate": 3.152529069461424e-06, + "loss": 1.273, + "step": 9206 + }, + { + "epoch": 0.9712025316455696, + "grad_norm": 0.6293373703956604, + "learning_rate": 3.129575887105468e-06, + "loss": 1.3239, + "step": 9207 + }, + { + "epoch": 0.9713080168776371, + "grad_norm": 0.6113551259040833, + "learning_rate": 3.1067063953048313e-06, + "loss": 1.2979, + "step": 9208 + }, + { + "epoch": 0.9714135021097047, + "grad_norm": 0.6004241704940796, + "learning_rate": 3.0839205966220474e-06, + "loss": 1.2885, + "step": 9209 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.610974907875061, + "learning_rate": 3.06121849361049e-06, + "loss": 1.303, + "step": 9210 + }, + { + "epoch": 0.9716244725738397, + "grad_norm": 0.6475294232368469, + "learning_rate": 3.0386000888139588e-06, + "loss": 1.3165, + "step": 9211 + }, + { + "epoch": 0.9717299578059072, + "grad_norm": 0.6089231967926025, + "learning_rate": 3.0160653847669252e-06, + "loss": 1.3276, + "step": 9212 + }, + { + "epoch": 0.9718354430379746, + "grad_norm": 0.5976354479789734, + "learning_rate": 2.9936143839946193e-06, + "loss": 1.3413, + "step": 9213 + }, + { + "epoch": 0.9719409282700422, + "grad_norm": 0.6177767515182495, + "learning_rate": 2.9712470890126962e-06, + "loss": 1.2898, + "step": 9214 + }, + { + "epoch": 0.9720464135021097, + "grad_norm": 0.594921350479126, + "learning_rate": 2.9489635023275676e-06, + "loss": 1.3014, + "step": 9215 + }, + { + "epoch": 0.9721518987341772, + "grad_norm": 0.6232306957244873, + "learning_rate": 2.9267636264361517e-06, + "loss": 1.2751, + "step": 9216 + }, + { + "epoch": 0.9722573839662447, + "grad_norm": 0.6209958791732788, + "learning_rate": 2.90464746382621e-06, + "loss": 1.3086, + "step": 9217 + }, + { + "epoch": 0.9723628691983123, + "grad_norm": 0.6050532460212708, + "learning_rate": 2.8826150169758425e-06, + "loss": 1.2684, + "step": 9218 + }, + { + "epoch": 0.9724683544303797, + "grad_norm": 0.6399935483932495, + "learning_rate": 2.8606662883539082e-06, + "loss": 1.314, + "step": 9219 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.6222728490829468, + "learning_rate": 2.838801280419856e-06, + "loss": 1.3045, + "step": 9220 + }, + { + "epoch": 0.9726793248945148, + "grad_norm": 0.6035630702972412, + "learning_rate": 2.817019995623893e-06, + "loss": 1.319, + "step": 9221 + }, + { + "epoch": 0.9727848101265822, + "grad_norm": 0.5845690369606018, + "learning_rate": 2.7953224364065667e-06, + "loss": 1.2716, + "step": 9222 + }, + { + "epoch": 0.9728902953586498, + "grad_norm": 0.6042241454124451, + "learning_rate": 2.7737086051992653e-06, + "loss": 1.3092, + "step": 9223 + }, + { + "epoch": 0.9729957805907173, + "grad_norm": 0.6444039940834045, + "learning_rate": 2.752178504423969e-06, + "loss": 1.285, + "step": 9224 + }, + { + "epoch": 0.9731012658227848, + "grad_norm": 0.6288872361183167, + "learning_rate": 2.7307321364930804e-06, + "loss": 1.2939, + "step": 9225 + }, + { + "epoch": 0.9732067510548523, + "grad_norm": 0.6180295348167419, + "learning_rate": 2.7093695038099277e-06, + "loss": 1.3104, + "step": 9226 + }, + { + "epoch": 0.9733122362869199, + "grad_norm": 0.6004260182380676, + "learning_rate": 2.6880906087682622e-06, + "loss": 1.281, + "step": 9227 + }, + { + "epoch": 0.9734177215189873, + "grad_norm": 0.6085917949676514, + "learning_rate": 2.66689545375251e-06, + "loss": 1.2992, + "step": 9228 + }, + { + "epoch": 0.9735232067510549, + "grad_norm": 0.6242707967758179, + "learning_rate": 2.6457840411376888e-06, + "loss": 1.2713, + "step": 9229 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.6047459244728088, + "learning_rate": 2.624756373289322e-06, + "loss": 1.3264, + "step": 9230 + }, + { + "epoch": 0.9737341772151898, + "grad_norm": 0.6175975203514099, + "learning_rate": 2.603812452563775e-06, + "loss": 1.3186, + "step": 9231 + }, + { + "epoch": 0.9738396624472574, + "grad_norm": 0.6348004937171936, + "learning_rate": 2.5829522813079207e-06, + "loss": 1.259, + "step": 9232 + }, + { + "epoch": 0.9739451476793249, + "grad_norm": 0.6095836162567139, + "learning_rate": 2.5621758618591394e-06, + "loss": 1.2889, + "step": 9233 + }, + { + "epoch": 0.9740506329113924, + "grad_norm": 0.6042273640632629, + "learning_rate": 2.541483196545735e-06, + "loss": 1.3056, + "step": 9234 + }, + { + "epoch": 0.9741561181434599, + "grad_norm": 0.60386061668396, + "learning_rate": 2.52087428768627e-06, + "loss": 1.2983, + "step": 9235 + }, + { + "epoch": 0.9742616033755275, + "grad_norm": 0.6062323451042175, + "learning_rate": 2.5003491375900633e-06, + "loss": 1.2861, + "step": 9236 + }, + { + "epoch": 0.9743670886075949, + "grad_norm": 0.5868048071861267, + "learning_rate": 2.4799077485571087e-06, + "loss": 1.286, + "step": 9237 + }, + { + "epoch": 0.9744725738396625, + "grad_norm": 0.6041567921638489, + "learning_rate": 2.4595501228779906e-06, + "loss": 1.303, + "step": 9238 + }, + { + "epoch": 0.97457805907173, + "grad_norm": 0.5886288285255432, + "learning_rate": 2.4392762628338838e-06, + "loss": 1.2635, + "step": 9239 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.6172890067100525, + "learning_rate": 2.419086170696472e-06, + "loss": 1.2975, + "step": 9240 + }, + { + "epoch": 0.974789029535865, + "grad_norm": 0.5831529498100281, + "learning_rate": 2.3989798487282776e-06, + "loss": 1.2725, + "step": 9241 + }, + { + "epoch": 0.9748945147679325, + "grad_norm": 0.6037331819534302, + "learning_rate": 2.3789572991822495e-06, + "loss": 1.3394, + "step": 9242 + }, + { + "epoch": 0.975, + "grad_norm": 0.6032410264015198, + "learning_rate": 2.3590185243020092e-06, + "loss": 1.2851, + "step": 9243 + }, + { + "epoch": 0.9751054852320675, + "grad_norm": 0.6458880305290222, + "learning_rate": 2.3391635263218526e-06, + "loss": 1.318, + "step": 9244 + }, + { + "epoch": 0.9752109704641351, + "grad_norm": 0.6126679182052612, + "learning_rate": 2.3193923074665834e-06, + "loss": 1.2584, + "step": 9245 + }, + { + "epoch": 0.9753164556962025, + "grad_norm": 0.6133818030357361, + "learning_rate": 2.299704869951763e-06, + "loss": 1.3103, + "step": 9246 + }, + { + "epoch": 0.9754219409282701, + "grad_norm": 0.6019688248634338, + "learning_rate": 2.2801012159832933e-06, + "loss": 1.3248, + "step": 9247 + }, + { + "epoch": 0.9755274261603376, + "grad_norm": 0.6828256249427795, + "learning_rate": 2.2605813477579172e-06, + "loss": 1.2944, + "step": 9248 + }, + { + "epoch": 0.975632911392405, + "grad_norm": 0.6047570705413818, + "learning_rate": 2.2411452674630517e-06, + "loss": 1.3249, + "step": 9249 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.5952421426773071, + "learning_rate": 2.2217929772764545e-06, + "loss": 1.3006, + "step": 9250 + }, + { + "epoch": 0.97584388185654, + "grad_norm": 0.6243395209312439, + "learning_rate": 2.2025244793667242e-06, + "loss": 1.2915, + "step": 9251 + }, + { + "epoch": 0.9759493670886076, + "grad_norm": 0.6460301876068115, + "learning_rate": 2.1833397758929674e-06, + "loss": 1.3037, + "step": 9252 + }, + { + "epoch": 0.9760548523206751, + "grad_norm": 0.6404301524162292, + "learning_rate": 2.1642388690049643e-06, + "loss": 1.3132, + "step": 9253 + }, + { + "epoch": 0.9761603375527426, + "grad_norm": 0.6055933833122253, + "learning_rate": 2.1452217608430857e-06, + "loss": 1.3087, + "step": 9254 + }, + { + "epoch": 0.9762658227848101, + "grad_norm": 0.5999761819839478, + "learning_rate": 2.126288453538211e-06, + "loss": 1.2833, + "step": 9255 + }, + { + "epoch": 0.9763713080168777, + "grad_norm": 0.6088129878044128, + "learning_rate": 2.107438949211976e-06, + "loss": 1.2898, + "step": 9256 + }, + { + "epoch": 0.9764767932489451, + "grad_norm": 0.6179481148719788, + "learning_rate": 2.0886732499764416e-06, + "loss": 1.3402, + "step": 9257 + }, + { + "epoch": 0.9765822784810126, + "grad_norm": 0.6142610907554626, + "learning_rate": 2.069991357934592e-06, + "loss": 1.2909, + "step": 9258 + }, + { + "epoch": 0.9766877637130802, + "grad_norm": 0.5985448360443115, + "learning_rate": 2.0513932751796695e-06, + "loss": 1.2917, + "step": 9259 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.6058366894721985, + "learning_rate": 2.0328790037957568e-06, + "loss": 1.2917, + "step": 9260 + }, + { + "epoch": 0.9768987341772152, + "grad_norm": 0.62293940782547, + "learning_rate": 2.0144485458574446e-06, + "loss": 1.3142, + "step": 9261 + }, + { + "epoch": 0.9770042194092827, + "grad_norm": 0.6448228359222412, + "learning_rate": 1.9961019034299976e-06, + "loss": 1.3059, + "step": 9262 + }, + { + "epoch": 0.9771097046413502, + "grad_norm": 0.5911363959312439, + "learning_rate": 1.977839078569188e-06, + "loss": 1.3081, + "step": 9263 + }, + { + "epoch": 0.9772151898734177, + "grad_norm": 0.6696296334266663, + "learning_rate": 1.959660073321545e-06, + "loss": 1.333, + "step": 9264 + }, + { + "epoch": 0.9773206751054853, + "grad_norm": 0.6252906322479248, + "learning_rate": 1.94156488972394e-06, + "loss": 1.2926, + "step": 9265 + }, + { + "epoch": 0.9774261603375527, + "grad_norm": 0.6207981705665588, + "learning_rate": 1.9235535298042506e-06, + "loss": 1.3253, + "step": 9266 + }, + { + "epoch": 0.9775316455696202, + "grad_norm": 0.5966672301292419, + "learning_rate": 1.905625995580612e-06, + "loss": 1.3073, + "step": 9267 + }, + { + "epoch": 0.9776371308016878, + "grad_norm": 0.6089624762535095, + "learning_rate": 1.8877822890618346e-06, + "loss": 1.307, + "step": 9268 + }, + { + "epoch": 0.9777426160337552, + "grad_norm": 0.6317061185836792, + "learning_rate": 1.8700224122475683e-06, + "loss": 1.3033, + "step": 9269 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.6066428422927856, + "learning_rate": 1.8523463671278052e-06, + "loss": 1.3142, + "step": 9270 + }, + { + "epoch": 0.9779535864978903, + "grad_norm": 0.6177568435668945, + "learning_rate": 1.8347541556832104e-06, + "loss": 1.2744, + "step": 9271 + }, + { + "epoch": 0.9780590717299578, + "grad_norm": 0.5938504934310913, + "learning_rate": 1.8172457798850407e-06, + "loss": 1.3095, + "step": 9272 + }, + { + "epoch": 0.9781645569620253, + "grad_norm": 0.5982556343078613, + "learning_rate": 1.7998212416953096e-06, + "loss": 1.3156, + "step": 9273 + }, + { + "epoch": 0.9782700421940929, + "grad_norm": 0.6170636415481567, + "learning_rate": 1.782480543066456e-06, + "loss": 1.3257, + "step": 9274 + }, + { + "epoch": 0.9783755274261603, + "grad_norm": 0.5992506742477417, + "learning_rate": 1.7652236859416748e-06, + "loss": 1.3165, + "step": 9275 + }, + { + "epoch": 0.9784810126582278, + "grad_norm": 0.6153761148452759, + "learning_rate": 1.7480506722545864e-06, + "loss": 1.2744, + "step": 9276 + }, + { + "epoch": 0.9785864978902954, + "grad_norm": 0.6146154403686523, + "learning_rate": 1.7309615039294847e-06, + "loss": 1.2884, + "step": 9277 + }, + { + "epoch": 0.9786919831223628, + "grad_norm": 0.6279167532920837, + "learning_rate": 1.7139561828813377e-06, + "loss": 1.3287, + "step": 9278 + }, + { + "epoch": 0.9787974683544304, + "grad_norm": 0.6214297413825989, + "learning_rate": 1.6970347110157879e-06, + "loss": 1.3025, + "step": 9279 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.5985597968101501, + "learning_rate": 1.6801970902288188e-06, + "loss": 1.2666, + "step": 9280 + }, + { + "epoch": 0.9790084388185654, + "grad_norm": 0.667352557182312, + "learning_rate": 1.6634433224072543e-06, + "loss": 1.297, + "step": 9281 + }, + { + "epoch": 0.9791139240506329, + "grad_norm": 0.5893337726593018, + "learning_rate": 1.6467734094283427e-06, + "loss": 1.2941, + "step": 9282 + }, + { + "epoch": 0.9792194092827005, + "grad_norm": 0.6011906862258911, + "learning_rate": 1.630187353160173e-06, + "loss": 1.2985, + "step": 9283 + }, + { + "epoch": 0.9793248945147679, + "grad_norm": 0.593126118183136, + "learning_rate": 1.6136851554611753e-06, + "loss": 1.3173, + "step": 9284 + }, + { + "epoch": 0.9794303797468354, + "grad_norm": 0.6221655607223511, + "learning_rate": 1.5972668181805373e-06, + "loss": 1.3307, + "step": 9285 + }, + { + "epoch": 0.979535864978903, + "grad_norm": 0.6179322600364685, + "learning_rate": 1.580932343158037e-06, + "loss": 1.2818, + "step": 9286 + }, + { + "epoch": 0.9796413502109704, + "grad_norm": 0.6034267544746399, + "learning_rate": 1.5646817322240436e-06, + "loss": 1.3147, + "step": 9287 + }, + { + "epoch": 0.979746835443038, + "grad_norm": 0.607647716999054, + "learning_rate": 1.5485149871995175e-06, + "loss": 1.309, + "step": 9288 + }, + { + "epoch": 0.9798523206751055, + "grad_norm": 0.5976998209953308, + "learning_rate": 1.532432109895926e-06, + "loss": 1.2979, + "step": 9289 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.6141584515571594, + "learning_rate": 1.5164331021155774e-06, + "loss": 1.3113, + "step": 9290 + }, + { + "epoch": 0.9800632911392405, + "grad_norm": 0.6058874130249023, + "learning_rate": 1.5005179656511213e-06, + "loss": 1.277, + "step": 9291 + }, + { + "epoch": 0.9801687763713081, + "grad_norm": 0.602071225643158, + "learning_rate": 1.4846867022860477e-06, + "loss": 1.2835, + "step": 9292 + }, + { + "epoch": 0.9802742616033755, + "grad_norm": 0.5947173833847046, + "learning_rate": 1.4689393137941876e-06, + "loss": 1.3102, + "step": 9293 + }, + { + "epoch": 0.980379746835443, + "grad_norm": 0.5995951890945435, + "learning_rate": 1.4532758019402958e-06, + "loss": 1.3194, + "step": 9294 + }, + { + "epoch": 0.9804852320675106, + "grad_norm": 0.5983691811561584, + "learning_rate": 1.4376961684793854e-06, + "loss": 1.3132, + "step": 9295 + }, + { + "epoch": 0.980590717299578, + "grad_norm": 0.6045620441436768, + "learning_rate": 1.4222004151572265e-06, + "loss": 1.292, + "step": 9296 + }, + { + "epoch": 0.9806962025316456, + "grad_norm": 0.5959830284118652, + "learning_rate": 1.4067885437103467e-06, + "loss": 1.2995, + "step": 9297 + }, + { + "epoch": 0.9808016877637131, + "grad_norm": 0.6018973588943481, + "learning_rate": 1.3914605558656146e-06, + "loss": 1.3217, + "step": 9298 + }, + { + "epoch": 0.9809071729957806, + "grad_norm": 0.6080197095870972, + "learning_rate": 1.376216453340573e-06, + "loss": 1.299, + "step": 9299 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.6183789372444153, + "learning_rate": 1.3610562378435221e-06, + "loss": 1.3197, + "step": 9300 + }, + { + "epoch": 0.9811181434599157, + "grad_norm": 0.6072063446044922, + "learning_rate": 1.345979911073103e-06, + "loss": 1.2815, + "step": 9301 + }, + { + "epoch": 0.9812236286919831, + "grad_norm": 0.6131986975669861, + "learning_rate": 1.3309874747187978e-06, + "loss": 1.304, + "step": 9302 + }, + { + "epoch": 0.9813291139240506, + "grad_norm": 0.6271777153015137, + "learning_rate": 1.3160789304605958e-06, + "loss": 1.3265, + "step": 9303 + }, + { + "epoch": 0.9814345991561182, + "grad_norm": 0.6115278005599976, + "learning_rate": 1.3012542799689108e-06, + "loss": 1.2773, + "step": 9304 + }, + { + "epoch": 0.9815400843881856, + "grad_norm": 0.6108042001724243, + "learning_rate": 1.286513524905164e-06, + "loss": 1.3226, + "step": 9305 + }, + { + "epoch": 0.9816455696202532, + "grad_norm": 0.6020373106002808, + "learning_rate": 1.2718566669208675e-06, + "loss": 1.3209, + "step": 9306 + }, + { + "epoch": 0.9817510548523207, + "grad_norm": 0.5998278856277466, + "learning_rate": 1.2572837076586241e-06, + "loss": 1.3418, + "step": 9307 + }, + { + "epoch": 0.9818565400843882, + "grad_norm": 0.6000780463218689, + "learning_rate": 1.2427946487512941e-06, + "loss": 1.3121, + "step": 9308 + }, + { + "epoch": 0.9819620253164557, + "grad_norm": 0.588657021522522, + "learning_rate": 1.2283894918224125e-06, + "loss": 1.2599, + "step": 9309 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.6277613043785095, + "learning_rate": 1.2140682384862712e-06, + "loss": 1.3133, + "step": 9310 + }, + { + "epoch": 0.9821729957805907, + "grad_norm": 0.6165779829025269, + "learning_rate": 1.199830890347503e-06, + "loss": 1.2924, + "step": 9311 + }, + { + "epoch": 0.9822784810126582, + "grad_norm": 0.6046853065490723, + "learning_rate": 1.185677449001582e-06, + "loss": 1.3013, + "step": 9312 + }, + { + "epoch": 0.9823839662447258, + "grad_norm": 0.6001490354537964, + "learning_rate": 1.1716079160344061e-06, + "loss": 1.2991, + "step": 9313 + }, + { + "epoch": 0.9824894514767932, + "grad_norm": 0.6089380383491516, + "learning_rate": 1.1576222930225478e-06, + "loss": 1.3406, + "step": 9314 + }, + { + "epoch": 0.9825949367088608, + "grad_norm": 0.617946207523346, + "learning_rate": 1.143720581533253e-06, + "loss": 1.2842, + "step": 9315 + }, + { + "epoch": 0.9827004219409282, + "grad_norm": 0.5914191603660583, + "learning_rate": 1.1299027831241094e-06, + "loss": 1.3305, + "step": 9316 + }, + { + "epoch": 0.9828059071729958, + "grad_norm": 0.6111389398574829, + "learning_rate": 1.1161688993435449e-06, + "loss": 1.2875, + "step": 9317 + }, + { + "epoch": 0.9829113924050633, + "grad_norm": 0.62137371301651, + "learning_rate": 1.1025189317305784e-06, + "loss": 1.2826, + "step": 9318 + }, + { + "epoch": 0.9830168776371307, + "grad_norm": 0.6676011085510254, + "learning_rate": 1.0889528818147366e-06, + "loss": 1.2989, + "step": 9319 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.623155951499939, + "learning_rate": 1.0754707511161365e-06, + "loss": 1.2918, + "step": 9320 + }, + { + "epoch": 0.9832278481012658, + "grad_norm": 0.6123469471931458, + "learning_rate": 1.0620725411454868e-06, + "loss": 1.326, + "step": 9321 + }, + { + "epoch": 0.9833333333333333, + "grad_norm": 0.6152700185775757, + "learning_rate": 1.0487582534040863e-06, + "loss": 1.3212, + "step": 9322 + }, + { + "epoch": 0.9834388185654008, + "grad_norm": 0.6088012456893921, + "learning_rate": 1.0355278893839915e-06, + "loss": 1.3758, + "step": 9323 + }, + { + "epoch": 0.9835443037974684, + "grad_norm": 0.6014592051506042, + "learning_rate": 1.0223814505676832e-06, + "loss": 1.3161, + "step": 9324 + }, + { + "epoch": 0.9836497890295358, + "grad_norm": 0.6260278820991516, + "learning_rate": 1.009318938428233e-06, + "loss": 1.3045, + "step": 9325 + }, + { + "epoch": 0.9837552742616034, + "grad_norm": 0.6326442360877991, + "learning_rate": 9.963403544294702e-07, + "loss": 1.3303, + "step": 9326 + }, + { + "epoch": 0.9838607594936709, + "grad_norm": 0.6159253120422363, + "learning_rate": 9.834457000255647e-07, + "loss": 1.3177, + "step": 9327 + }, + { + "epoch": 0.9839662447257383, + "grad_norm": 0.601283609867096, + "learning_rate": 9.706349766615275e-07, + "loss": 1.2818, + "step": 9328 + }, + { + "epoch": 0.9840717299578059, + "grad_norm": 0.7162450551986694, + "learning_rate": 9.579081857728766e-07, + "loss": 1.2923, + "step": 9329 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.6021755337715149, + "learning_rate": 9.452653287856383e-07, + "loss": 1.286, + "step": 9330 + }, + { + "epoch": 0.9842827004219409, + "grad_norm": 0.6090964078903198, + "learning_rate": 9.327064071165126e-07, + "loss": 1.3161, + "step": 9331 + }, + { + "epoch": 0.9843881856540084, + "grad_norm": 0.5904340744018555, + "learning_rate": 9.202314221728735e-07, + "loss": 1.2929, + "step": 9332 + }, + { + "epoch": 0.984493670886076, + "grad_norm": 0.6202679872512817, + "learning_rate": 9.078403753525199e-07, + "loss": 1.3297, + "step": 9333 + }, + { + "epoch": 0.9845991561181434, + "grad_norm": 0.6294239163398743, + "learning_rate": 8.955332680440076e-07, + "loss": 1.3296, + "step": 9334 + }, + { + "epoch": 0.984704641350211, + "grad_norm": 0.5888603329658508, + "learning_rate": 8.833101016263168e-07, + "loss": 1.283, + "step": 9335 + }, + { + "epoch": 0.9848101265822785, + "grad_norm": 0.6042793989181519, + "learning_rate": 8.711708774691851e-07, + "loss": 1.304, + "step": 9336 + }, + { + "epoch": 0.984915611814346, + "grad_norm": 0.6071955561637878, + "learning_rate": 8.591155969327746e-07, + "loss": 1.3061, + "step": 9337 + }, + { + "epoch": 0.9850210970464135, + "grad_norm": 0.5953038334846497, + "learning_rate": 8.47144261368088e-07, + "loss": 1.3012, + "step": 9338 + }, + { + "epoch": 0.985126582278481, + "grad_norm": 0.6096950173377991, + "learning_rate": 8.352568721165521e-07, + "loss": 1.3029, + "step": 9339 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.604365885257721, + "learning_rate": 8.234534305101015e-07, + "loss": 1.3348, + "step": 9340 + }, + { + "epoch": 0.985337552742616, + "grad_norm": 0.6079406142234802, + "learning_rate": 8.117339378714283e-07, + "loss": 1.3305, + "step": 9341 + }, + { + "epoch": 0.9854430379746836, + "grad_norm": 0.6146288514137268, + "learning_rate": 8.00098395513732e-07, + "loss": 1.3187, + "step": 9342 + }, + { + "epoch": 0.985548523206751, + "grad_norm": 0.6074128150939941, + "learning_rate": 7.885468047408862e-07, + "loss": 1.3444, + "step": 9343 + }, + { + "epoch": 0.9856540084388186, + "grad_norm": 0.6148248314857483, + "learning_rate": 7.770791668472721e-07, + "loss": 1.3028, + "step": 9344 + }, + { + "epoch": 0.9857594936708861, + "grad_norm": 0.6271820664405823, + "learning_rate": 7.656954831178619e-07, + "loss": 1.3064, + "step": 9345 + }, + { + "epoch": 0.9858649789029535, + "grad_norm": 0.6073163151741028, + "learning_rate": 7.543957548283021e-07, + "loss": 1.3191, + "step": 9346 + }, + { + "epoch": 0.9859704641350211, + "grad_norm": 0.6723214387893677, + "learning_rate": 7.431799832448294e-07, + "loss": 1.3263, + "step": 9347 + }, + { + "epoch": 0.9860759493670886, + "grad_norm": 0.6275677680969238, + "learning_rate": 7.320481696241887e-07, + "loss": 1.3473, + "step": 9348 + }, + { + "epoch": 0.9861814345991561, + "grad_norm": 0.6011973023414612, + "learning_rate": 7.210003152136324e-07, + "loss": 1.3196, + "step": 9349 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.6081833839416504, + "learning_rate": 7.100364212513367e-07, + "loss": 1.2942, + "step": 9350 + }, + { + "epoch": 0.9863924050632912, + "grad_norm": 0.6267574429512024, + "learning_rate": 6.991564889656521e-07, + "loss": 1.3397, + "step": 9351 + }, + { + "epoch": 0.9864978902953586, + "grad_norm": 0.6163247227668762, + "learning_rate": 6.883605195759369e-07, + "loss": 1.3073, + "step": 9352 + }, + { + "epoch": 0.9866033755274262, + "grad_norm": 0.6382803916931152, + "learning_rate": 6.776485142918065e-07, + "loss": 1.3271, + "step": 9353 + }, + { + "epoch": 0.9867088607594937, + "grad_norm": 0.6361485123634338, + "learning_rate": 6.67020474313551e-07, + "loss": 1.2963, + "step": 9354 + }, + { + "epoch": 0.9868143459915611, + "grad_norm": 0.5939188003540039, + "learning_rate": 6.564764008322177e-07, + "loss": 1.3237, + "step": 9355 + }, + { + "epoch": 0.9869198312236287, + "grad_norm": 0.591655433177948, + "learning_rate": 6.460162950292781e-07, + "loss": 1.3068, + "step": 9356 + }, + { + "epoch": 0.9870253164556962, + "grad_norm": 0.6390970945358276, + "learning_rate": 6.356401580767945e-07, + "loss": 1.2936, + "step": 9357 + }, + { + "epoch": 0.9871308016877637, + "grad_norm": 0.6024369597434998, + "learning_rate": 6.253479911375037e-07, + "loss": 1.2946, + "step": 9358 + }, + { + "epoch": 0.9872362869198312, + "grad_norm": 0.6213995218276978, + "learning_rate": 6.151397953647331e-07, + "loss": 1.3415, + "step": 9359 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.6226295232772827, + "learning_rate": 6.050155719023176e-07, + "loss": 1.3037, + "step": 9360 + }, + { + "epoch": 0.9874472573839662, + "grad_norm": 0.6085191965103149, + "learning_rate": 5.949753218846832e-07, + "loss": 1.3117, + "step": 9361 + }, + { + "epoch": 0.9875527426160338, + "grad_norm": 0.6303831934928894, + "learning_rate": 5.850190464369298e-07, + "loss": 1.3159, + "step": 9362 + }, + { + "epoch": 0.9876582278481013, + "grad_norm": 0.5972294807434082, + "learning_rate": 5.751467466747484e-07, + "loss": 1.309, + "step": 9363 + }, + { + "epoch": 0.9877637130801687, + "grad_norm": 0.6101588606834412, + "learning_rate": 5.653584237043374e-07, + "loss": 1.3164, + "step": 9364 + }, + { + "epoch": 0.9878691983122363, + "grad_norm": 0.6097609996795654, + "learning_rate": 5.556540786224862e-07, + "loss": 1.3134, + "step": 9365 + }, + { + "epoch": 0.9879746835443038, + "grad_norm": 0.6128721833229065, + "learning_rate": 5.460337125167414e-07, + "loss": 1.3291, + "step": 9366 + }, + { + "epoch": 0.9880801687763713, + "grad_norm": 0.6062542200088501, + "learning_rate": 5.364973264649908e-07, + "loss": 1.2742, + "step": 9367 + }, + { + "epoch": 0.9881856540084388, + "grad_norm": 0.6301002502441406, + "learning_rate": 5.270449215358797e-07, + "loss": 1.3221, + "step": 9368 + }, + { + "epoch": 0.9882911392405064, + "grad_norm": 0.6064989566802979, + "learning_rate": 5.176764987885607e-07, + "loss": 1.2912, + "step": 9369 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.6501342058181763, + "learning_rate": 5.08392059272944e-07, + "loss": 1.3452, + "step": 9370 + }, + { + "epoch": 0.9885021097046414, + "grad_norm": 0.5956379175186157, + "learning_rate": 4.991916040291977e-07, + "loss": 1.2973, + "step": 9371 + }, + { + "epoch": 0.9886075949367089, + "grad_norm": 0.5975900292396545, + "learning_rate": 4.900751340884135e-07, + "loss": 1.2913, + "step": 9372 + }, + { + "epoch": 0.9887130801687763, + "grad_norm": 0.5929540991783142, + "learning_rate": 4.810426504721077e-07, + "loss": 1.3141, + "step": 9373 + }, + { + "epoch": 0.9888185654008439, + "grad_norm": 0.59124356508255, + "learning_rate": 4.720941541923873e-07, + "loss": 1.2515, + "step": 9374 + }, + { + "epoch": 0.9889240506329114, + "grad_norm": 0.5935989618301392, + "learning_rate": 4.632296462520336e-07, + "loss": 1.3224, + "step": 9375 + }, + { + "epoch": 0.9890295358649789, + "grad_norm": 0.592210054397583, + "learning_rate": 4.544491276443352e-07, + "loss": 1.271, + "step": 9376 + }, + { + "epoch": 0.9891350210970464, + "grad_norm": 0.6157575249671936, + "learning_rate": 4.457525993531719e-07, + "loss": 1.2668, + "step": 9377 + }, + { + "epoch": 0.989240506329114, + "grad_norm": 0.6286448836326599, + "learning_rate": 4.371400623530142e-07, + "loss": 1.3446, + "step": 9378 + }, + { + "epoch": 0.9893459915611814, + "grad_norm": 0.6044376492500305, + "learning_rate": 4.2861151760900665e-07, + "loss": 1.3202, + "step": 9379 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.6223316192626953, + "learning_rate": 4.2016696607680147e-07, + "loss": 1.3017, + "step": 9380 + }, + { + "epoch": 0.9895569620253165, + "grad_norm": 0.6092166900634766, + "learning_rate": 4.118064087025586e-07, + "loss": 1.2684, + "step": 9381 + }, + { + "epoch": 0.989662447257384, + "grad_norm": 0.6263086199760437, + "learning_rate": 4.035298464232784e-07, + "loss": 1.3143, + "step": 9382 + }, + { + "epoch": 0.9897679324894515, + "grad_norm": 0.5924258232116699, + "learning_rate": 3.953372801662192e-07, + "loss": 1.3129, + "step": 9383 + }, + { + "epoch": 0.9898734177215189, + "grad_norm": 0.6101329922676086, + "learning_rate": 3.8722871084956313e-07, + "loss": 1.2991, + "step": 9384 + }, + { + "epoch": 0.9899789029535865, + "grad_norm": 0.6123459935188293, + "learning_rate": 3.7920413938175027e-07, + "loss": 1.2936, + "step": 9385 + }, + { + "epoch": 0.990084388185654, + "grad_norm": 0.588537871837616, + "learning_rate": 3.7126356666214447e-07, + "loss": 1.2879, + "step": 9386 + }, + { + "epoch": 0.9901898734177215, + "grad_norm": 0.6140715479850769, + "learning_rate": 3.6340699358036743e-07, + "loss": 1.3289, + "step": 9387 + }, + { + "epoch": 0.990295358649789, + "grad_norm": 0.606935441493988, + "learning_rate": 3.5563442101696486e-07, + "loss": 1.2876, + "step": 9388 + }, + { + "epoch": 0.9904008438818566, + "grad_norm": 0.6659934520721436, + "learning_rate": 3.479458498426569e-07, + "loss": 1.3149, + "step": 9389 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.6224636435508728, + "learning_rate": 3.4034128091917085e-07, + "loss": 1.3099, + "step": 9390 + }, + { + "epoch": 0.9906118143459915, + "grad_norm": 0.5991073846817017, + "learning_rate": 3.328207150986584e-07, + "loss": 1.3208, + "step": 9391 + }, + { + "epoch": 0.9907172995780591, + "grad_norm": 0.6396909952163696, + "learning_rate": 3.2538415322369563e-07, + "loss": 1.3146, + "step": 9392 + }, + { + "epoch": 0.9908227848101265, + "grad_norm": 0.6292794346809387, + "learning_rate": 3.180315961276159e-07, + "loss": 1.3114, + "step": 9393 + }, + { + "epoch": 0.9909282700421941, + "grad_norm": 0.6473312377929688, + "learning_rate": 3.107630446344267e-07, + "loss": 1.2894, + "step": 9394 + }, + { + "epoch": 0.9910337552742616, + "grad_norm": 0.6245794296264648, + "learning_rate": 3.035784995584767e-07, + "loss": 1.3598, + "step": 9395 + }, + { + "epoch": 0.9911392405063291, + "grad_norm": 0.6471752524375916, + "learning_rate": 2.964779617049551e-07, + "loss": 1.2893, + "step": 9396 + }, + { + "epoch": 0.9912447257383966, + "grad_norm": 0.6082220673561096, + "learning_rate": 2.8946143186930896e-07, + "loss": 1.3204, + "step": 9397 + }, + { + "epoch": 0.9913502109704642, + "grad_norm": 0.6453876495361328, + "learning_rate": 2.825289108379925e-07, + "loss": 1.3174, + "step": 9398 + }, + { + "epoch": 0.9914556962025316, + "grad_norm": 0.6269974112510681, + "learning_rate": 2.756803993877177e-07, + "loss": 1.3357, + "step": 9399 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.6065733432769775, + "learning_rate": 2.689158982859541e-07, + "loss": 1.3171, + "step": 9400 + }, + { + "epoch": 0.9916666666666667, + "grad_norm": 0.5957198143005371, + "learning_rate": 2.622354082905953e-07, + "loss": 1.2734, + "step": 9401 + }, + { + "epoch": 0.9917721518987341, + "grad_norm": 0.5928565859794617, + "learning_rate": 2.556389301502926e-07, + "loss": 1.3094, + "step": 9402 + }, + { + "epoch": 0.9918776371308017, + "grad_norm": 0.6467849612236023, + "learning_rate": 2.491264646042879e-07, + "loss": 1.2595, + "step": 9403 + }, + { + "epoch": 0.9919831223628692, + "grad_norm": 0.6164884567260742, + "learning_rate": 2.426980123821643e-07, + "loss": 1.2916, + "step": 9404 + }, + { + "epoch": 0.9920886075949367, + "grad_norm": 0.6127191185951233, + "learning_rate": 2.3635357420442872e-07, + "loss": 1.2767, + "step": 9405 + }, + { + "epoch": 0.9921940928270042, + "grad_norm": 0.5978965163230896, + "learning_rate": 2.3009315078192926e-07, + "loss": 1.2813, + "step": 9406 + }, + { + "epoch": 0.9922995780590718, + "grad_norm": 0.613462507724762, + "learning_rate": 2.2391674281610486e-07, + "loss": 1.2812, + "step": 9407 + }, + { + "epoch": 0.9924050632911392, + "grad_norm": 0.620446503162384, + "learning_rate": 2.1782435099923503e-07, + "loss": 1.3106, + "step": 9408 + }, + { + "epoch": 0.9925105485232067, + "grad_norm": 0.626304030418396, + "learning_rate": 2.1181597601385716e-07, + "loss": 1.3166, + "step": 9409 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.6309654712677002, + "learning_rate": 2.05891618533266e-07, + "loss": 1.3012, + "step": 9410 + }, + { + "epoch": 0.9927215189873417, + "grad_norm": 0.6113905310630798, + "learning_rate": 2.0005127922134713e-07, + "loss": 1.3466, + "step": 9411 + }, + { + "epoch": 0.9928270042194093, + "grad_norm": 0.5875343680381775, + "learning_rate": 1.942949587324938e-07, + "loss": 1.2636, + "step": 9412 + }, + { + "epoch": 0.9929324894514768, + "grad_norm": 0.6245113015174866, + "learning_rate": 1.8862265771177333e-07, + "loss": 1.2795, + "step": 9413 + }, + { + "epoch": 0.9930379746835443, + "grad_norm": 0.605435311794281, + "learning_rate": 1.8303437679476065e-07, + "loss": 1.3399, + "step": 9414 + }, + { + "epoch": 0.9931434599156118, + "grad_norm": 0.6442747116088867, + "learning_rate": 1.775301166077048e-07, + "loss": 1.3247, + "step": 9415 + }, + { + "epoch": 0.9932489451476794, + "grad_norm": 0.6125501394271851, + "learning_rate": 1.7210987776736243e-07, + "loss": 1.2987, + "step": 9416 + }, + { + "epoch": 0.9933544303797468, + "grad_norm": 0.6046074032783508, + "learning_rate": 1.6677366088099777e-07, + "loss": 1.3356, + "step": 9417 + }, + { + "epoch": 0.9934599156118143, + "grad_norm": 0.6203588247299194, + "learning_rate": 1.6152146654671573e-07, + "loss": 1.2978, + "step": 9418 + }, + { + "epoch": 0.9935654008438819, + "grad_norm": 0.6209622621536255, + "learning_rate": 1.5635329535304554e-07, + "loss": 1.3351, + "step": 9419 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.607370138168335, + "learning_rate": 1.5126914787894074e-07, + "loss": 1.2812, + "step": 9420 + }, + { + "epoch": 0.9937763713080169, + "grad_norm": 0.6032707095146179, + "learning_rate": 1.4626902469427882e-07, + "loss": 1.3474, + "step": 9421 + }, + { + "epoch": 0.9938818565400844, + "grad_norm": 0.6023720502853394, + "learning_rate": 1.4135292635927832e-07, + "loss": 1.328, + "step": 9422 + }, + { + "epoch": 0.9939873417721519, + "grad_norm": 0.5975549817085266, + "learning_rate": 1.365208534248319e-07, + "loss": 1.2958, + "step": 9423 + }, + { + "epoch": 0.9940928270042194, + "grad_norm": 0.6356531977653503, + "learning_rate": 1.3177280643233979e-07, + "loss": 1.2873, + "step": 9424 + }, + { + "epoch": 0.994198312236287, + "grad_norm": 0.5900936722755432, + "learning_rate": 1.271087859138764e-07, + "loss": 1.2948, + "step": 9425 + }, + { + "epoch": 0.9943037974683544, + "grad_norm": 0.6192235350608826, + "learning_rate": 1.2252879239210702e-07, + "loss": 1.3408, + "step": 9426 + }, + { + "epoch": 0.994409282700422, + "grad_norm": 0.6272086501121521, + "learning_rate": 1.1803282638020441e-07, + "loss": 1.3307, + "step": 9427 + }, + { + "epoch": 0.9945147679324895, + "grad_norm": 0.6027222871780396, + "learning_rate": 1.1362088838193229e-07, + "loss": 1.2984, + "step": 9428 + }, + { + "epoch": 0.9946202531645569, + "grad_norm": 0.626856803894043, + "learning_rate": 1.0929297889172852e-07, + "loss": 1.3317, + "step": 9429 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.6005033850669861, + "learning_rate": 1.0504909839462173e-07, + "loss": 1.2652, + "step": 9430 + }, + { + "epoch": 0.994831223628692, + "grad_norm": 0.6191896200180054, + "learning_rate": 1.008892473659817e-07, + "loss": 1.296, + "step": 9431 + }, + { + "epoch": 0.9949367088607595, + "grad_norm": 0.6031783223152161, + "learning_rate": 9.68134262721021e-08, + "loss": 1.3264, + "step": 9432 + }, + { + "epoch": 0.995042194092827, + "grad_norm": 0.5993649959564209, + "learning_rate": 9.282163556953437e-08, + "loss": 1.3122, + "step": 9433 + }, + { + "epoch": 0.9951476793248946, + "grad_norm": 0.6045063138008118, + "learning_rate": 8.891387570575393e-08, + "loss": 1.3043, + "step": 9434 + }, + { + "epoch": 0.995253164556962, + "grad_norm": 0.5928676128387451, + "learning_rate": 8.509014711857721e-08, + "loss": 1.3035, + "step": 9435 + }, + { + "epoch": 0.9953586497890295, + "grad_norm": 0.6146067976951599, + "learning_rate": 8.135045023641152e-08, + "loss": 1.2917, + "step": 9436 + }, + { + "epoch": 0.9954641350210971, + "grad_norm": 0.6224998831748962, + "learning_rate": 7.769478547842157e-08, + "loss": 1.2883, + "step": 9437 + }, + { + "epoch": 0.9955696202531645, + "grad_norm": 0.6128562688827515, + "learning_rate": 7.412315325411312e-08, + "loss": 1.3085, + "step": 9438 + }, + { + "epoch": 0.9956751054852321, + "grad_norm": 0.6349616050720215, + "learning_rate": 7.063555396383259e-08, + "loss": 1.3437, + "step": 9439 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.599149227142334, + "learning_rate": 6.723198799826746e-08, + "loss": 1.2981, + "step": 9440 + }, + { + "epoch": 0.9958860759493671, + "grad_norm": 0.6078205108642578, + "learning_rate": 6.391245573894588e-08, + "loss": 1.3172, + "step": 9441 + }, + { + "epoch": 0.9959915611814346, + "grad_norm": 0.6060683131217957, + "learning_rate": 6.067695755765379e-08, + "loss": 1.3247, + "step": 9442 + }, + { + "epoch": 0.9960970464135022, + "grad_norm": 0.6190298795700073, + "learning_rate": 5.7525493817101035e-08, + "loss": 1.2791, + "step": 9443 + }, + { + "epoch": 0.9962025316455696, + "grad_norm": 0.6100336313247681, + "learning_rate": 5.4458064870338553e-08, + "loss": 1.3119, + "step": 9444 + }, + { + "epoch": 0.9963080168776371, + "grad_norm": 0.6159368753433228, + "learning_rate": 5.147467106117465e-08, + "loss": 1.3105, + "step": 9445 + }, + { + "epoch": 0.9964135021097047, + "grad_norm": 0.6049315929412842, + "learning_rate": 4.85753127237587e-08, + "loss": 1.3031, + "step": 9446 + }, + { + "epoch": 0.9965189873417721, + "grad_norm": 0.6262489557266235, + "learning_rate": 4.575999018316401e-08, + "loss": 1.3171, + "step": 9447 + }, + { + "epoch": 0.9966244725738397, + "grad_norm": 0.5998793244361877, + "learning_rate": 4.302870375472168e-08, + "loss": 1.3205, + "step": 9448 + }, + { + "epoch": 0.9967299578059071, + "grad_norm": 0.6182252168655396, + "learning_rate": 4.038145374460345e-08, + "loss": 1.3397, + "step": 9449 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.601340651512146, + "learning_rate": 3.781824044932214e-08, + "loss": 1.3303, + "step": 9450 + }, + { + "epoch": 0.9969409282700422, + "grad_norm": 0.6106287837028503, + "learning_rate": 3.533906415614796e-08, + "loss": 1.309, + "step": 9451 + }, + { + "epoch": 0.9970464135021097, + "grad_norm": 0.6184196472167969, + "learning_rate": 3.294392514285871e-08, + "loss": 1.354, + "step": 9452 + }, + { + "epoch": 0.9971518987341772, + "grad_norm": 0.6318332552909851, + "learning_rate": 3.0632823677906316e-08, + "loss": 1.293, + "step": 9453 + }, + { + "epoch": 0.9972573839662447, + "grad_norm": 0.6243615746498108, + "learning_rate": 2.8405760020250304e-08, + "loss": 1.2967, + "step": 9454 + }, + { + "epoch": 0.9973628691983122, + "grad_norm": 0.5997670888900757, + "learning_rate": 2.6262734419441047e-08, + "loss": 1.2909, + "step": 9455 + }, + { + "epoch": 0.9974683544303797, + "grad_norm": 0.6335648894309998, + "learning_rate": 2.420374711561979e-08, + "loss": 1.2928, + "step": 9456 + }, + { + "epoch": 0.9975738396624473, + "grad_norm": 0.6185999512672424, + "learning_rate": 2.2228798339435363e-08, + "loss": 1.2843, + "step": 9457 + }, + { + "epoch": 0.9976793248945147, + "grad_norm": 0.6129922866821289, + "learning_rate": 2.0337888312210727e-08, + "loss": 1.2909, + "step": 9458 + }, + { + "epoch": 0.9977848101265823, + "grad_norm": 0.6071612238883972, + "learning_rate": 1.8531017245942972e-08, + "loss": 1.2704, + "step": 9459 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.60748690366745, + "learning_rate": 1.6808185342970238e-08, + "loss": 1.3179, + "step": 9460 + }, + { + "epoch": 0.9979957805907173, + "grad_norm": 0.6072308421134949, + "learning_rate": 1.516939279638807e-08, + "loss": 1.3065, + "step": 9461 + }, + { + "epoch": 0.9981012658227848, + "grad_norm": 0.602415144443512, + "learning_rate": 1.3614639789882866e-08, + "loss": 1.3052, + "step": 9462 + }, + { + "epoch": 0.9982067510548523, + "grad_norm": 0.7612923979759216, + "learning_rate": 1.214392649756535e-08, + "loss": 1.324, + "step": 9463 + }, + { + "epoch": 0.9983122362869198, + "grad_norm": 0.6066783666610718, + "learning_rate": 1.075725308438691e-08, + "loss": 1.3262, + "step": 9464 + }, + { + "epoch": 0.9984177215189873, + "grad_norm": 0.6061766147613525, + "learning_rate": 9.454619705556722e-09, + "loss": 1.3379, + "step": 9465 + }, + { + "epoch": 0.9985232067510549, + "grad_norm": 0.6080813407897949, + "learning_rate": 8.236026507124628e-09, + "loss": 1.2995, + "step": 9466 + }, + { + "epoch": 0.9986286919831223, + "grad_norm": 0.6031904816627502, + "learning_rate": 7.101473625648058e-09, + "loss": 1.2938, + "step": 9467 + }, + { + "epoch": 0.9987341772151899, + "grad_norm": 0.6177304983139038, + "learning_rate": 6.050961188358573e-09, + "loss": 1.3071, + "step": 9468 + }, + { + "epoch": 0.9988396624472574, + "grad_norm": 0.6170554161071777, + "learning_rate": 5.084489312745521e-09, + "loss": 1.3039, + "step": 9469 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.5937225222587585, + "learning_rate": 4.202058107305451e-09, + "loss": 1.3143, + "step": 9470 + }, + { + "epoch": 0.9990506329113924, + "grad_norm": 0.6288758516311646, + "learning_rate": 3.403667670792698e-09, + "loss": 1.316, + "step": 9471 + }, + { + "epoch": 0.99915611814346, + "grad_norm": 0.5901196599006653, + "learning_rate": 2.689318092718995e-09, + "loss": 1.2718, + "step": 9472 + }, + { + "epoch": 0.9992616033755274, + "grad_norm": 0.6003479957580566, + "learning_rate": 2.059009453103666e-09, + "loss": 1.3186, + "step": 9473 + }, + { + "epoch": 0.9993670886075949, + "grad_norm": 0.5946284532546997, + "learning_rate": 1.5127418226401623e-09, + "loss": 1.3214, + "step": 9474 + }, + { + "epoch": 0.9994725738396625, + "grad_norm": 0.6112335920333862, + "learning_rate": 1.0505152625295278e-09, + "loss": 1.3037, + "step": 9475 + }, + { + "epoch": 0.9995780590717299, + "grad_norm": 0.6161929965019226, + "learning_rate": 6.723298245636666e-10, + "loss": 1.297, + "step": 9476 + }, + { + "epoch": 0.9996835443037975, + "grad_norm": 0.6129576563835144, + "learning_rate": 3.781855510420762e-10, + "loss": 1.3017, + "step": 9477 + }, + { + "epoch": 0.999789029535865, + "grad_norm": 0.629106879234314, + "learning_rate": 1.6808247493838026e-10, + "loss": 1.3269, + "step": 9478 + }, + { + "epoch": 0.9998945147679325, + "grad_norm": 0.6134333610534668, + "learning_rate": 4.202061990032924e-11, + "loss": 1.307, + "step": 9479 + }, + { + "epoch": 1.0, + "grad_norm": 1.814998745918274, + "learning_rate": 0.0, + "loss": 1.2751, + "step": 9480 + } + ], + "logging_steps": 1, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.830818274921677e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-olmo-cosine/checkpoint-9480/training_args.bin b/saves-olmo-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd243f1a0ab2f3ca35ad126a9e9677e3e09bc631 --- /dev/null +++ b/saves-olmo-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b369366139134aff3e88950268f688dae028d80e16765d61ecd8279684a9c3 +size 5176 diff --git a/saves-olmo-cosine/config.json b/saves-olmo-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..69830f45579d99650d832c24fb725715de4848e2 --- /dev/null +++ b/saves-olmo-cosine/config.json @@ -0,0 +1,26 @@ +{ + "architectures": [ + "OlmoForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "clip_qkv": null, + "eos_token_id": 50279, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "model_type": "olmo", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 1, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-olmo-cosine/generation_config.json b/saves-olmo-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..424d0e318171a19c3fe3f1423f5d8dc090cc22d6 --- /dev/null +++ b/saves-olmo-cosine/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "eos_token_id": 50279, + "pad_token_id": 1, + "transformers_version": "4.42.4" +} diff --git a/saves-olmo-cosine/model.safetensors b/saves-olmo-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04dfa651c1205817aafcd99008672c4062401a2e --- /dev/null +++ b/saves-olmo-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93bc2de53c0289ef5f825de79e66562914ed7b7457360e1598fc60afa979f112 +size 8341080 diff --git a/saves-olmo-cosine/result.log b/saves-olmo-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..5c39acf9fb947383c4407d8f4c2e8777bfd30e49 --- /dev/null +++ b/saves-olmo-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 1709.065, 'train_samples_per_second': 5679.482, 'train_steps_per_second': 5.547, 'train_loss': 1.5855384756618411, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-olmo-cosine/special_tokens_map.json b/saves-olmo-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-olmo-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-olmo-cosine/tokenizer.json b/saves-olmo-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-olmo-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-olmo-cosine/tokenizer_config.json b/saves-olmo-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-olmo-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-olmo/checkpoint-9480/config.json b/saves-olmo/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..69830f45579d99650d832c24fb725715de4848e2 --- /dev/null +++ b/saves-olmo/checkpoint-9480/config.json @@ -0,0 +1,26 @@ +{ + "architectures": [ + "OlmoForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "clip_qkv": null, + "eos_token_id": 50279, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "model_type": "olmo", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 1, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-olmo/checkpoint-9480/generation_config.json b/saves-olmo/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..424d0e318171a19c3fe3f1423f5d8dc090cc22d6 --- /dev/null +++ b/saves-olmo/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "eos_token_id": 50279, + "pad_token_id": 1, + "transformers_version": "4.42.4" +} diff --git a/saves-olmo/checkpoint-9480/model.safetensors b/saves-olmo/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82acd7b8caa05b1dfc6ff296a3dcb1c0b7fc0d6b --- /dev/null +++ b/saves-olmo/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c512be80642c2a5196fb348b6666fd3845edb8e787d2d6824ef5316d47255f84 +size 8341080 diff --git a/saves-olmo/checkpoint-9480/optimizer.pt b/saves-olmo/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2eb33f936c69ce88c39ffad523a9759e7f068fc6 --- /dev/null +++ b/saves-olmo/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1876506588423b3634cb890881b52746ac0090bbd8f97ec608cd78ebeac93370 +size 16692017 diff --git a/saves-olmo/checkpoint-9480/rng_state.pth b/saves-olmo/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-olmo/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-olmo/checkpoint-9480/scheduler.pt b/saves-olmo/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4e146fb9369424bca1e920276a86162b00d56fd --- /dev/null +++ b/saves-olmo/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c33e6451a8a4598628b3479890d40774857cdcb0d8604c19f1bee5bdefe1e2f9 +size 1064 diff --git a/saves-olmo/checkpoint-9480/special_tokens_map.json b/saves-olmo/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-olmo/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-olmo/checkpoint-9480/tokenizer.json b/saves-olmo/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-olmo/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-olmo/checkpoint-9480/tokenizer_config.json b/saves-olmo/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-olmo/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-olmo/checkpoint-9480/trainer_state.json b/saves-olmo/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..852f7a3f4640cf369b34c55f9279b0ed50122a8c --- /dev/null +++ b/saves-olmo/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2895790338516235, + "learning_rate": 0.00015822784810126583, + "loss": 7.5103, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1878166198730469, + "learning_rate": 0.00031645569620253165, + "loss": 6.9208, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8770753741264343, + "learning_rate": 0.00047468354430379745, + "loss": 6.2784, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 1.3088784217834473, + "learning_rate": 0.0006329113924050633, + "loss": 5.8111, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.7764528393745422, + "learning_rate": 0.0007911392405063291, + "loss": 5.3655, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 1.6863492727279663, + "learning_rate": 0.0009493670886075949, + "loss": 4.861, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.8368579149246216, + "learning_rate": 0.0011075949367088608, + "loss": 4.4565, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 2.4368135929107666, + "learning_rate": 0.0012658227848101266, + "loss": 4.1835, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.6571651697158813, + "learning_rate": 0.0014240506329113926, + "loss": 3.9871, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.0983068943023682, + "learning_rate": 0.0015, + "loss": 3.8367, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 1.461104393005371, + "learning_rate": 0.0015, + "loss": 3.6704, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 1.453973412513733, + "learning_rate": 0.0015, + "loss": 3.5587, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 1.1175814867019653, + "learning_rate": 0.0015, + "loss": 3.4568, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.8704009652137756, + "learning_rate": 0.0015, + "loss": 3.3603, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.598082959651947, + "learning_rate": 0.0015, + "loss": 3.2714, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.7218077778816223, + "learning_rate": 0.0015, + "loss": 3.2116, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 1.3789252042770386, + "learning_rate": 0.0015, + "loss": 3.142, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.8487445712089539, + "learning_rate": 0.0015, + "loss": 3.1004, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.6962001919746399, + "learning_rate": 0.0015, + "loss": 3.0415, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.762483537197113, + "learning_rate": 0.0015, + "loss": 2.9904, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.6761586666107178, + "learning_rate": 0.0015, + "loss": 2.9563, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.8960474729537964, + "learning_rate": 0.0015, + "loss": 2.9239, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.791134774684906, + "learning_rate": 0.0015, + "loss": 2.8776, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.7861804962158203, + "learning_rate": 0.0015, + "loss": 2.8376, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 1.0708763599395752, + "learning_rate": 0.0015, + "loss": 2.8129, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.6192085146903992, + "learning_rate": 0.0015, + "loss": 2.7762, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.777834951877594, + "learning_rate": 0.0015, + "loss": 2.7517, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.7877839803695679, + "learning_rate": 0.0015, + "loss": 2.7137, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.8932504653930664, + "learning_rate": 0.0015, + "loss": 2.6929, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.9444891810417175, + "learning_rate": 0.0015, + "loss": 2.6676, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.7419419288635254, + "learning_rate": 0.0015, + "loss": 2.6395, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.8228128552436829, + "learning_rate": 0.0015, + "loss": 2.6062, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 1.1001321077346802, + "learning_rate": 0.0015, + "loss": 2.6027, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 1.0025521516799927, + "learning_rate": 0.0015, + "loss": 2.5757, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.8906620740890503, + "learning_rate": 0.0015, + "loss": 2.5369, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.8724020719528198, + "learning_rate": 0.0015, + "loss": 2.5171, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.6954470872879028, + "learning_rate": 0.0015, + "loss": 2.4998, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.9800807237625122, + "learning_rate": 0.0015, + "loss": 2.4925, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.3928110599517822, + "learning_rate": 0.0015, + "loss": 2.4731, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.8699875473976135, + "learning_rate": 0.0015, + "loss": 2.4462, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8118987679481506, + "learning_rate": 0.0015, + "loss": 2.4301, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 1.0215463638305664, + "learning_rate": 0.0015, + "loss": 2.416, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.8042965531349182, + "learning_rate": 0.0015, + "loss": 2.3852, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 1.4131215810775757, + "learning_rate": 0.0015, + "loss": 2.3721, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.8721475601196289, + "learning_rate": 0.0015, + "loss": 2.3729, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 1.006864309310913, + "learning_rate": 0.0015, + "loss": 2.3496, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.8279053568840027, + "learning_rate": 0.0015, + "loss": 2.3319, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.362330436706543, + "learning_rate": 0.0015, + "loss": 2.3239, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.7392188310623169, + "learning_rate": 0.0015, + "loss": 2.2935, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.9090680480003357, + "learning_rate": 0.0015, + "loss": 2.2894, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.738229513168335, + "learning_rate": 0.0015, + "loss": 2.2751, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.8596929907798767, + "learning_rate": 0.0015, + "loss": 2.2646, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.7523012161254883, + "learning_rate": 0.0015, + "loss": 2.2469, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.7785469889640808, + "learning_rate": 0.0015, + "loss": 2.2286, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.9491163492202759, + "learning_rate": 0.0015, + "loss": 2.2281, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 1.0146554708480835, + "learning_rate": 0.0015, + "loss": 2.1926, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 1.1120109558105469, + "learning_rate": 0.0015, + "loss": 2.1957, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.3082340955734253, + "learning_rate": 0.0015, + "loss": 2.1998, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.9811375141143799, + "learning_rate": 0.0015, + "loss": 2.1704, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.9287076592445374, + "learning_rate": 0.0015, + "loss": 2.1512, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.8090695142745972, + "learning_rate": 0.0015, + "loss": 2.1527, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.8670853972434998, + "learning_rate": 0.0015, + "loss": 2.1402, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.6998545527458191, + "learning_rate": 0.0015, + "loss": 2.125, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.7511937618255615, + "learning_rate": 0.0015, + "loss": 2.139, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.8139570951461792, + "learning_rate": 0.0015, + "loss": 2.1264, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.9404725432395935, + "learning_rate": 0.0015, + "loss": 2.1133, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 1.1467516422271729, + "learning_rate": 0.0015, + "loss": 2.0985, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.8005135655403137, + "learning_rate": 0.0015, + "loss": 2.0878, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.7078540921211243, + "learning_rate": 0.0015, + "loss": 2.0945, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.8982123136520386, + "learning_rate": 0.0015, + "loss": 2.0773, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.802879810333252, + "learning_rate": 0.0015, + "loss": 2.0692, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.8815244436264038, + "learning_rate": 0.0015, + "loss": 2.0487, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 1.1558257341384888, + "learning_rate": 0.0015, + "loss": 2.0416, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 1.1008294820785522, + "learning_rate": 0.0015, + "loss": 2.0556, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.8698832988739014, + "learning_rate": 0.0015, + "loss": 2.0339, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.8390915989875793, + "learning_rate": 0.0015, + "loss": 2.0299, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.9007291793823242, + "learning_rate": 0.0015, + "loss": 2.0233, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 1.1652406454086304, + "learning_rate": 0.0015, + "loss": 2.018, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.6624262928962708, + "learning_rate": 0.0015, + "loss": 2.0212, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.7017977237701416, + "learning_rate": 0.0015, + "loss": 1.9995, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.7474579215049744, + "learning_rate": 0.0015, + "loss": 1.9812, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.7321891784667969, + "learning_rate": 0.0015, + "loss": 1.9918, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 1.0261579751968384, + "learning_rate": 0.0015, + "loss": 1.9981, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.7820348739624023, + "learning_rate": 0.0015, + "loss": 1.9761, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.7755185961723328, + "learning_rate": 0.0015, + "loss": 1.9722, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 1.0036184787750244, + "learning_rate": 0.0015, + "loss": 1.9764, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.8055900931358337, + "learning_rate": 0.0015, + "loss": 1.9718, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.6808052659034729, + "learning_rate": 0.0015, + "loss": 1.9602, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.7907923460006714, + "learning_rate": 0.0015, + "loss": 1.9552, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.9402481913566589, + "learning_rate": 0.0015, + "loss": 1.9489, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.7503549456596375, + "learning_rate": 0.0015, + "loss": 1.9493, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.6939796805381775, + "learning_rate": 0.0015, + "loss": 1.9427, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.89671790599823, + "learning_rate": 0.0015, + "loss": 1.9265, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.8847548365592957, + "learning_rate": 0.0015, + "loss": 1.9333, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 1.1295051574707031, + "learning_rate": 0.0015, + "loss": 1.9172, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.7542343735694885, + "learning_rate": 0.0015, + "loss": 1.9204, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 1.184242844581604, + "learning_rate": 0.0015, + "loss": 1.921, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.7282382845878601, + "learning_rate": 0.0015, + "loss": 1.899, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.7066535353660583, + "learning_rate": 0.0015, + "loss": 1.9004, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 1.0305756330490112, + "learning_rate": 0.0015, + "loss": 1.912, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.9266646504402161, + "learning_rate": 0.0015, + "loss": 1.9061, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.752181887626648, + "learning_rate": 0.0015, + "loss": 1.8927, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.7992343902587891, + "learning_rate": 0.0015, + "loss": 1.8921, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.7804614305496216, + "learning_rate": 0.0015, + "loss": 1.8815, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.2560460567474365, + "learning_rate": 0.0015, + "loss": 1.8867, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.7629755735397339, + "learning_rate": 0.0015, + "loss": 1.8774, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.7276808023452759, + "learning_rate": 0.0015, + "loss": 1.865, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.812589704990387, + "learning_rate": 0.0015, + "loss": 1.8646, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.9490426182746887, + "learning_rate": 0.0015, + "loss": 1.872, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.7158961892127991, + "learning_rate": 0.0015, + "loss": 1.859, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.7843102812767029, + "learning_rate": 0.0015, + "loss": 1.8659, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.9048969745635986, + "learning_rate": 0.0015, + "loss": 1.8592, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.0914297103881836, + "learning_rate": 0.0015, + "loss": 1.8482, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.9063001275062561, + "learning_rate": 0.0015, + "loss": 1.8597, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.0726813077926636, + "learning_rate": 0.0015, + "loss": 1.8441, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 1.0722492933273315, + "learning_rate": 0.0015, + "loss": 1.8489, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.8034614324569702, + "learning_rate": 0.0015, + "loss": 1.8309, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.6905298829078674, + "learning_rate": 0.0015, + "loss": 1.8207, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.6911357045173645, + "learning_rate": 0.0015, + "loss": 1.8265, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.7193440198898315, + "learning_rate": 0.0015, + "loss": 1.8433, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 1.1396887302398682, + "learning_rate": 0.0015, + "loss": 1.8445, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.7247800230979919, + "learning_rate": 0.0015, + "loss": 1.8053, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.7404005527496338, + "learning_rate": 0.0015, + "loss": 1.8012, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.8438906669616699, + "learning_rate": 0.0015, + "loss": 1.8203, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.7119379043579102, + "learning_rate": 0.0015, + "loss": 1.8257, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.8522639274597168, + "learning_rate": 0.0015, + "loss": 1.8127, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.8279857635498047, + "learning_rate": 0.0015, + "loss": 1.8052, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.6656583547592163, + "learning_rate": 0.0015, + "loss": 1.817, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.9513770937919617, + "learning_rate": 0.0015, + "loss": 1.8042, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.7857588529586792, + "learning_rate": 0.0015, + "loss": 1.7933, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.6761679649353027, + "learning_rate": 0.0015, + "loss": 1.7963, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.7986840009689331, + "learning_rate": 0.0015, + "loss": 1.7991, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.5067880153656006, + "learning_rate": 0.0015, + "loss": 1.7961, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 1.2412381172180176, + "learning_rate": 0.0015, + "loss": 1.807, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.7900198101997375, + "learning_rate": 0.0015, + "loss": 1.7903, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.6337395906448364, + "learning_rate": 0.0015, + "loss": 1.7834, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.7404577136039734, + "learning_rate": 0.0015, + "loss": 1.7794, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.7440230250358582, + "learning_rate": 0.0015, + "loss": 1.7739, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.7305943369865417, + "learning_rate": 0.0015, + "loss": 1.7829, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.9844694137573242, + "learning_rate": 0.0015, + "loss": 1.7675, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.7381137609481812, + "learning_rate": 0.0015, + "loss": 1.7707, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.9263442158699036, + "learning_rate": 0.0015, + "loss": 1.7756, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.7978230118751526, + "learning_rate": 0.0015, + "loss": 1.7694, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.7997435331344604, + "learning_rate": 0.0015, + "loss": 1.7679, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.9145954847335815, + "learning_rate": 0.0015, + "loss": 1.7668, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.7129148244857788, + "learning_rate": 0.0015, + "loss": 1.758, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.9551963210105896, + "learning_rate": 0.0015, + "loss": 1.7556, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.7375782132148743, + "learning_rate": 0.0015, + "loss": 1.7607, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.7058078646659851, + "learning_rate": 0.0015, + "loss": 1.7572, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.8284546732902527, + "learning_rate": 0.0015, + "loss": 1.7581, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.8752997517585754, + "learning_rate": 0.0015, + "loss": 1.7596, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.9044471979141235, + "learning_rate": 0.0015, + "loss": 1.7594, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.888731837272644, + "learning_rate": 0.0015, + "loss": 1.7393, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.776642382144928, + "learning_rate": 0.0015, + "loss": 1.728, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.6723219156265259, + "learning_rate": 0.0015, + "loss": 1.7318, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.9292169213294983, + "learning_rate": 0.0015, + "loss": 1.7363, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.6994373202323914, + "learning_rate": 0.0015, + "loss": 1.7292, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.7503302693367004, + "learning_rate": 0.0015, + "loss": 1.7384, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.8646478056907654, + "learning_rate": 0.0015, + "loss": 1.7347, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.6981494426727295, + "learning_rate": 0.0015, + "loss": 1.7378, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.6490494012832642, + "learning_rate": 0.0015, + "loss": 1.7343, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6875865459442139, + "learning_rate": 0.0015, + "loss": 1.7089, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.692916989326477, + "learning_rate": 0.0015, + "loss": 1.7152, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.622187077999115, + "learning_rate": 0.0015, + "loss": 1.7193, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.1938786506652832, + "learning_rate": 0.0015, + "loss": 1.7119, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.8419353365898132, + "learning_rate": 0.0015, + "loss": 1.7153, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.9157094359397888, + "learning_rate": 0.0015, + "loss": 1.7273, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.2703893184661865, + "learning_rate": 0.0015, + "loss": 1.7089, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.6720706820487976, + "learning_rate": 0.0015, + "loss": 1.7035, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.8026383519172668, + "learning_rate": 0.0015, + "loss": 1.7183, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.8251804113388062, + "learning_rate": 0.0015, + "loss": 1.6985, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.8267444372177124, + "learning_rate": 0.0015, + "loss": 1.7003, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.7615325450897217, + "learning_rate": 0.0015, + "loss": 1.7, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.6987568736076355, + "learning_rate": 0.0015, + "loss": 1.7055, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.76456218957901, + "learning_rate": 0.0015, + "loss": 1.7191, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.7193688154220581, + "learning_rate": 0.0015, + "loss": 1.6984, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.9049598574638367, + "learning_rate": 0.0015, + "loss": 1.7057, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 1.1458799839019775, + "learning_rate": 0.0015, + "loss": 1.7055, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 1.5561580657958984, + "learning_rate": 0.0015, + "loss": 1.7069, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 1.1573898792266846, + "learning_rate": 0.0015, + "loss": 1.7107, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 1.0237706899642944, + "learning_rate": 0.0015, + "loss": 1.6853, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.7053158283233643, + "learning_rate": 0.0015, + "loss": 1.6802, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.768510639667511, + "learning_rate": 0.0015, + "loss": 1.6814, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.6780675649642944, + "learning_rate": 0.0015, + "loss": 1.6814, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.8621104955673218, + "learning_rate": 0.0015, + "loss": 1.6854, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.6677209138870239, + "learning_rate": 0.0015, + "loss": 1.6914, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.6893065571784973, + "learning_rate": 0.0015, + "loss": 1.6879, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.6619040966033936, + "learning_rate": 0.0015, + "loss": 1.6783, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.7361657619476318, + "learning_rate": 0.0015, + "loss": 1.683, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.7643239498138428, + "learning_rate": 0.0015, + "loss": 1.6812, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.7050077319145203, + "learning_rate": 0.0015, + "loss": 1.6804, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.8184192180633545, + "learning_rate": 0.0015, + "loss": 1.6807, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.6644271612167358, + "learning_rate": 0.0015, + "loss": 1.6803, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.6723122000694275, + "learning_rate": 0.0015, + "loss": 1.6674, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.607860267162323, + "learning_rate": 0.0015, + "loss": 1.6625, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.7268304228782654, + "learning_rate": 0.0015, + "loss": 1.6719, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.7484779953956604, + "learning_rate": 0.0015, + "loss": 1.6836, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.8863279223442078, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.9187822341918945, + "learning_rate": 0.0015, + "loss": 1.6605, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 1.0011094808578491, + "learning_rate": 0.0015, + "loss": 1.6623, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.6539487242698669, + "learning_rate": 0.0015, + "loss": 1.6705, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 1.120922565460205, + "learning_rate": 0.0015, + "loss": 1.6813, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.6275647878646851, + "learning_rate": 0.0015, + "loss": 1.6632, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.7504536509513855, + "learning_rate": 0.0015, + "loss": 1.6614, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.8328996300697327, + "learning_rate": 0.0015, + "loss": 1.6633, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.7278444170951843, + "learning_rate": 0.0015, + "loss": 1.6552, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.9023100733757019, + "learning_rate": 0.0015, + "loss": 1.6549, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.7968533635139465, + "learning_rate": 0.0015, + "loss": 1.6606, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.9829485416412354, + "learning_rate": 0.0015, + "loss": 1.6524, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.9531031250953674, + "learning_rate": 0.0015, + "loss": 1.6552, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.6997367739677429, + "learning_rate": 0.0015, + "loss": 1.6491, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6546147465705872, + "learning_rate": 0.0015, + "loss": 1.6434, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.6572033166885376, + "learning_rate": 0.0015, + "loss": 1.6482, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6487157344818115, + "learning_rate": 0.0015, + "loss": 1.642, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.9983278512954712, + "learning_rate": 0.0015, + "loss": 1.6496, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.181892991065979, + "learning_rate": 0.0015, + "loss": 1.6617, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.8899744153022766, + "learning_rate": 0.0015, + "loss": 1.6497, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.7175527215003967, + "learning_rate": 0.0015, + "loss": 1.6289, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.6469169855117798, + "learning_rate": 0.0015, + "loss": 1.6458, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.6511648893356323, + "learning_rate": 0.0015, + "loss": 1.646, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.7095751762390137, + "learning_rate": 0.0015, + "loss": 1.6292, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.6859976053237915, + "learning_rate": 0.0015, + "loss": 1.6449, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.8252400755882263, + "learning_rate": 0.0015, + "loss": 1.6385, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.6271975040435791, + "learning_rate": 0.0015, + "loss": 1.634, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.6858907341957092, + "learning_rate": 0.0015, + "loss": 1.6317, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.6653826236724854, + "learning_rate": 0.0015, + "loss": 1.6227, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.6133192777633667, + "learning_rate": 0.0015, + "loss": 1.654, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.7424896359443665, + "learning_rate": 0.0015, + "loss": 1.6404, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.7170125246047974, + "learning_rate": 0.0015, + "loss": 1.6295, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.7986323833465576, + "learning_rate": 0.0015, + "loss": 1.6246, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.7498612999916077, + "learning_rate": 0.0015, + "loss": 1.6275, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.6841030120849609, + "learning_rate": 0.0015, + "loss": 1.6354, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.9954431653022766, + "learning_rate": 0.0015, + "loss": 1.6219, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.7600809335708618, + "learning_rate": 0.0015, + "loss": 1.6181, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 1.1110663414001465, + "learning_rate": 0.0015, + "loss": 1.617, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.7403277158737183, + "learning_rate": 0.0015, + "loss": 1.6251, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.7344746589660645, + "learning_rate": 0.0015, + "loss": 1.6239, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.6385779976844788, + "learning_rate": 0.0015, + "loss": 1.6199, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 1.110111951828003, + "learning_rate": 0.0015, + "loss": 1.6198, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.7013465762138367, + "learning_rate": 0.0015, + "loss": 1.6203, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.6319063305854797, + "learning_rate": 0.0015, + "loss": 1.6232, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.6667777895927429, + "learning_rate": 0.0015, + "loss": 1.6182, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.7346725463867188, + "learning_rate": 0.0015, + "loss": 1.6132, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.6918299794197083, + "learning_rate": 0.0015, + "loss": 1.6254, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6942642331123352, + "learning_rate": 0.0015, + "loss": 1.612, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.6046151518821716, + "learning_rate": 0.0015, + "loss": 1.6253, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.9161781668663025, + "learning_rate": 0.0015, + "loss": 1.6145, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.7872688174247742, + "learning_rate": 0.0015, + "loss": 1.6219, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.6300092935562134, + "learning_rate": 0.0015, + "loss": 1.6071, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.6599094867706299, + "learning_rate": 0.0015, + "loss": 1.6119, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.8129178881645203, + "learning_rate": 0.0015, + "loss": 1.6074, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.9972307682037354, + "learning_rate": 0.0015, + "loss": 1.6118, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.7394557595252991, + "learning_rate": 0.0015, + "loss": 1.6067, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.6417461633682251, + "learning_rate": 0.0015, + "loss": 1.6106, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.9213743805885315, + "learning_rate": 0.0015, + "loss": 1.6076, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.6463521718978882, + "learning_rate": 0.0015, + "loss": 1.6121, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.7235851883888245, + "learning_rate": 0.0015, + "loss": 1.6058, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.673057496547699, + "learning_rate": 0.0015, + "loss": 1.603, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.7875622510910034, + "learning_rate": 0.0015, + "loss": 1.6106, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.8929771780967712, + "learning_rate": 0.0015, + "loss": 1.611, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.7222256660461426, + "learning_rate": 0.0015, + "loss": 1.6073, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 1.76078462600708, + "learning_rate": 0.0015, + "loss": 1.6106, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.8626707792282104, + "learning_rate": 0.0015, + "loss": 1.5968, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.6267821192741394, + "learning_rate": 0.0015, + "loss": 1.5993, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.7465523481369019, + "learning_rate": 0.0015, + "loss": 1.5965, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.7053288817405701, + "learning_rate": 0.0015, + "loss": 1.5943, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.6308602690696716, + "learning_rate": 0.0015, + "loss": 1.5921, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.6905506253242493, + "learning_rate": 0.0015, + "loss": 1.5935, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.8152787685394287, + "learning_rate": 0.0015, + "loss": 1.593, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.646096408367157, + "learning_rate": 0.0015, + "loss": 1.5893, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.7161581516265869, + "learning_rate": 0.0015, + "loss": 1.5941, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.6538783311843872, + "learning_rate": 0.0015, + "loss": 1.5987, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.6364704370498657, + "learning_rate": 0.0015, + "loss": 1.5753, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.7659748196601868, + "learning_rate": 0.0015, + "loss": 1.5887, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.6636857986450195, + "learning_rate": 0.0015, + "loss": 1.5942, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.7140323519706726, + "learning_rate": 0.0015, + "loss": 1.6033, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.693516731262207, + "learning_rate": 0.0015, + "loss": 1.598, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.7008290886878967, + "learning_rate": 0.0015, + "loss": 1.5983, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.6975066661834717, + "learning_rate": 0.0015, + "loss": 1.5939, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.629927933216095, + "learning_rate": 0.0015, + "loss": 1.5928, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.7001619338989258, + "learning_rate": 0.0015, + "loss": 1.5905, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.6938055157661438, + "learning_rate": 0.0015, + "loss": 1.5981, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.8264667391777039, + "learning_rate": 0.0015, + "loss": 1.5713, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 1.8283389806747437, + "learning_rate": 0.0015, + "loss": 1.5784, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.940721333026886, + "learning_rate": 0.0015, + "loss": 1.5872, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.7044954895973206, + "learning_rate": 0.0015, + "loss": 1.5793, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.9808927178382874, + "learning_rate": 0.0015, + "loss": 1.5802, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.6823117136955261, + "learning_rate": 0.0015, + "loss": 1.5702, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.6439627408981323, + "learning_rate": 0.0015, + "loss": 1.577, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.7213109731674194, + "learning_rate": 0.0015, + "loss": 1.5824, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 1.0450434684753418, + "learning_rate": 0.0015, + "loss": 1.5851, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.782852053642273, + "learning_rate": 0.0015, + "loss": 1.5794, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.658868134021759, + "learning_rate": 0.0015, + "loss": 1.5704, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.6660469174385071, + "learning_rate": 0.0015, + "loss": 1.5785, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 1.2032667398452759, + "learning_rate": 0.0015, + "loss": 1.5886, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 1.0701326131820679, + "learning_rate": 0.0015, + "loss": 1.5759, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 1.1308834552764893, + "learning_rate": 0.0015, + "loss": 1.5579, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.7300458550453186, + "learning_rate": 0.0015, + "loss": 1.5659, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.6701142191886902, + "learning_rate": 0.0015, + "loss": 1.5809, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.6711065769195557, + "learning_rate": 0.0015, + "loss": 1.5621, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.7424994111061096, + "learning_rate": 0.0015, + "loss": 1.5741, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.8336400985717773, + "learning_rate": 0.0015, + "loss": 1.5766, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.7403140664100647, + "learning_rate": 0.0015, + "loss": 1.5639, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.7756516337394714, + "learning_rate": 0.0015, + "loss": 1.5693, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.6076432466506958, + "learning_rate": 0.0015, + "loss": 1.5652, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.8079836964607239, + "learning_rate": 0.0015, + "loss": 1.5775, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.6974412202835083, + "learning_rate": 0.0015, + "loss": 1.5671, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.7588804960250854, + "learning_rate": 0.0015, + "loss": 1.5565, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.7476724982261658, + "learning_rate": 0.0015, + "loss": 1.5746, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.6752328276634216, + "learning_rate": 0.0015, + "loss": 1.5576, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.5766344666481018, + "learning_rate": 0.0015, + "loss": 1.5638, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.7599741220474243, + "learning_rate": 0.0015, + "loss": 1.5673, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.634009599685669, + "learning_rate": 0.0015, + "loss": 1.5822, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.8446086645126343, + "learning_rate": 0.0015, + "loss": 1.5556, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.6843515634536743, + "learning_rate": 0.0015, + "loss": 1.564, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.6914908289909363, + "learning_rate": 0.0015, + "loss": 1.5691, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.770805835723877, + "learning_rate": 0.0015, + "loss": 1.5702, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.9032822847366333, + "learning_rate": 0.0015, + "loss": 1.5605, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.791721522808075, + "learning_rate": 0.0015, + "loss": 1.5567, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.9578579068183899, + "learning_rate": 0.0015, + "loss": 1.5574, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.960804283618927, + "learning_rate": 0.0015, + "loss": 1.5702, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.5963431596755981, + "learning_rate": 0.0015, + "loss": 1.5576, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.5783581733703613, + "learning_rate": 0.0015, + "loss": 1.5531, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.7449812889099121, + "learning_rate": 0.0015, + "loss": 1.5348, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.6669825315475464, + "learning_rate": 0.0015, + "loss": 1.5595, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.6748496294021606, + "learning_rate": 0.0015, + "loss": 1.5503, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.6606012582778931, + "learning_rate": 0.0015, + "loss": 1.5695, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.8657916784286499, + "learning_rate": 0.0015, + "loss": 1.5702, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.9551817178726196, + "learning_rate": 0.0015, + "loss": 1.5507, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.6041373610496521, + "learning_rate": 0.0015, + "loss": 1.5582, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.6209775805473328, + "learning_rate": 0.0015, + "loss": 1.5571, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.5859197378158569, + "learning_rate": 0.0015, + "loss": 1.5628, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.7068500518798828, + "learning_rate": 0.0015, + "loss": 1.5524, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.7338625192642212, + "learning_rate": 0.0015, + "loss": 1.5546, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.6412256360054016, + "learning_rate": 0.0015, + "loss": 1.5458, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.6639289855957031, + "learning_rate": 0.0015, + "loss": 1.5481, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 1.0747135877609253, + "learning_rate": 0.0015, + "loss": 1.5531, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 1.0010422468185425, + "learning_rate": 0.0015, + "loss": 1.5511, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.7092239260673523, + "learning_rate": 0.0015, + "loss": 1.5435, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.8664953708648682, + "learning_rate": 0.0015, + "loss": 1.5524, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.7421767115592957, + "learning_rate": 0.0015, + "loss": 1.547, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.8898679614067078, + "learning_rate": 0.0015, + "loss": 1.5305, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.6894645094871521, + "learning_rate": 0.0015, + "loss": 1.5478, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.6658676266670227, + "learning_rate": 0.0015, + "loss": 1.5427, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.6107981204986572, + "learning_rate": 0.0015, + "loss": 1.5429, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.6870825290679932, + "learning_rate": 0.0015, + "loss": 1.5372, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.7180758714675903, + "learning_rate": 0.0015, + "loss": 1.554, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.8512082099914551, + "learning_rate": 0.0015, + "loss": 1.5565, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.6531484127044678, + "learning_rate": 0.0015, + "loss": 1.5472, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.65434730052948, + "learning_rate": 0.0015, + "loss": 1.5294, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6975955367088318, + "learning_rate": 0.0015, + "loss": 1.5471, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.6707955598831177, + "learning_rate": 0.0015, + "loss": 1.5433, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.7876633405685425, + "learning_rate": 0.0015, + "loss": 1.5559, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 1.0932773351669312, + "learning_rate": 0.0015, + "loss": 1.5522, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.6533617377281189, + "learning_rate": 0.0015, + "loss": 1.5398, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.6416671276092529, + "learning_rate": 0.0015, + "loss": 1.5347, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 1.1128736734390259, + "learning_rate": 0.0015, + "loss": 1.5354, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.6157364845275879, + "learning_rate": 0.0015, + "loss": 1.5382, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.8025332689285278, + "learning_rate": 0.0015, + "loss": 1.5474, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.169956922531128, + "learning_rate": 0.0015, + "loss": 1.5299, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.7284847497940063, + "learning_rate": 0.0015, + "loss": 1.5404, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.6278667449951172, + "learning_rate": 0.0015, + "loss": 1.5362, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.8233250975608826, + "learning_rate": 0.0015, + "loss": 1.5336, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.8548305034637451, + "learning_rate": 0.0015, + "loss": 1.5369, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.7548200488090515, + "learning_rate": 0.0015, + "loss": 1.543, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.8986817598342896, + "learning_rate": 0.0015, + "loss": 1.5371, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.6866987347602844, + "learning_rate": 0.0015, + "loss": 1.533, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.9056679010391235, + "learning_rate": 0.0015, + "loss": 1.5342, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.6829701066017151, + "learning_rate": 0.0015, + "loss": 1.5447, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6000765562057495, + "learning_rate": 0.0015, + "loss": 1.5477, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.6513208150863647, + "learning_rate": 0.0015, + "loss": 1.5347, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.6463078260421753, + "learning_rate": 0.0015, + "loss": 1.513, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.674860417842865, + "learning_rate": 0.0015, + "loss": 1.5319, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.6177114844322205, + "learning_rate": 0.0015, + "loss": 1.5316, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.951332688331604, + "learning_rate": 0.0015, + "loss": 1.5382, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.8385935425758362, + "learning_rate": 0.0015, + "loss": 1.5269, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.7090339660644531, + "learning_rate": 0.0015, + "loss": 1.5456, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5720601081848145, + "learning_rate": 0.0015, + "loss": 1.5259, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.7624120712280273, + "learning_rate": 0.0015, + "loss": 1.5337, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.7237735390663147, + "learning_rate": 0.0015, + "loss": 1.538, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.6539837718009949, + "learning_rate": 0.0015, + "loss": 1.5195, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.7327600717544556, + "learning_rate": 0.0015, + "loss": 1.5166, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.6164027452468872, + "learning_rate": 0.0015, + "loss": 1.5191, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.6477630138397217, + "learning_rate": 0.0015, + "loss": 1.5406, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.6862159371376038, + "learning_rate": 0.0015, + "loss": 1.5211, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.7061493396759033, + "learning_rate": 0.0015, + "loss": 1.5344, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.795935332775116, + "learning_rate": 0.0015, + "loss": 1.5201, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.6554853320121765, + "learning_rate": 0.0015, + "loss": 1.5153, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.6315854787826538, + "learning_rate": 0.0015, + "loss": 1.5179, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.88664710521698, + "learning_rate": 0.0015, + "loss": 1.5151, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.6432058215141296, + "learning_rate": 0.0015, + "loss": 1.5157, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.6559253931045532, + "learning_rate": 0.0015, + "loss": 1.5154, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.6490030884742737, + "learning_rate": 0.0015, + "loss": 1.5194, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.5851402878761292, + "learning_rate": 0.0015, + "loss": 1.5142, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.8605092763900757, + "learning_rate": 0.0015, + "loss": 1.5212, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.6109099984169006, + "learning_rate": 0.0015, + "loss": 1.5219, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 1.2105964422225952, + "learning_rate": 0.0015, + "loss": 1.5173, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.729722261428833, + "learning_rate": 0.0015, + "loss": 1.5215, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 1.0118728876113892, + "learning_rate": 0.0015, + "loss": 1.5128, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.6278619170188904, + "learning_rate": 0.0015, + "loss": 1.5282, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.6154831051826477, + "learning_rate": 0.0015, + "loss": 1.5129, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.6882016062736511, + "learning_rate": 0.0015, + "loss": 1.5189, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.7941892743110657, + "learning_rate": 0.0015, + "loss": 1.5091, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5746110081672668, + "learning_rate": 0.0015, + "loss": 1.5202, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5997068285942078, + "learning_rate": 0.0015, + "loss": 1.5168, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.7239584922790527, + "learning_rate": 0.0015, + "loss": 1.5232, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.6514604687690735, + "learning_rate": 0.0015, + "loss": 1.517, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.5965775847434998, + "learning_rate": 0.0015, + "loss": 1.5216, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.6456117630004883, + "learning_rate": 0.0015, + "loss": 1.5149, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.6053041219711304, + "learning_rate": 0.0015, + "loss": 1.5124, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.662718653678894, + "learning_rate": 0.0015, + "loss": 1.5095, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 1.0736215114593506, + "learning_rate": 0.0015, + "loss": 1.5143, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.927436888217926, + "learning_rate": 0.0015, + "loss": 1.5058, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.8844011425971985, + "learning_rate": 0.0015, + "loss": 1.5113, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.8308040499687195, + "learning_rate": 0.0015, + "loss": 1.5165, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.6523913145065308, + "learning_rate": 0.0015, + "loss": 1.5106, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.58203125, + "learning_rate": 0.0015, + "loss": 1.5083, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5963124632835388, + "learning_rate": 0.0015, + "loss": 1.5049, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.6012136340141296, + "learning_rate": 0.0015, + "loss": 1.5134, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.5969071388244629, + "learning_rate": 0.0015, + "loss": 1.5148, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.8676182627677917, + "learning_rate": 0.0015, + "loss": 1.5046, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.6771762371063232, + "learning_rate": 0.0015, + "loss": 1.5103, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 1.351891279220581, + "learning_rate": 0.0015, + "loss": 1.51, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.7372578978538513, + "learning_rate": 0.0015, + "loss": 1.5092, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.6076937913894653, + "learning_rate": 0.0015, + "loss": 1.5077, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 1.1676081418991089, + "learning_rate": 0.0015, + "loss": 1.5157, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.6855350136756897, + "learning_rate": 0.0015, + "loss": 1.5062, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 1.267324686050415, + "learning_rate": 0.0015, + "loss": 1.4996, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.600249171257019, + "learning_rate": 0.0015, + "loss": 1.5149, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.9072557687759399, + "learning_rate": 0.0015, + "loss": 1.5072, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.607441246509552, + "learning_rate": 0.0015, + "loss": 1.5019, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.8913252949714661, + "learning_rate": 0.0015, + "loss": 1.5031, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.6213563680648804, + "learning_rate": 0.0015, + "loss": 1.5095, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.7751739621162415, + "learning_rate": 0.0015, + "loss": 1.5023, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.7563364505767822, + "learning_rate": 0.0015, + "loss": 1.5045, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.7094775438308716, + "learning_rate": 0.0015, + "loss": 1.5176, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.9485508799552917, + "learning_rate": 0.0015, + "loss": 1.496, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.6145891547203064, + "learning_rate": 0.0015, + "loss": 1.5032, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5889794826507568, + "learning_rate": 0.0015, + "loss": 1.4997, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.7561887502670288, + "learning_rate": 0.0015, + "loss": 1.5046, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.8948625922203064, + "learning_rate": 0.0015, + "loss": 1.5162, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.7258087396621704, + "learning_rate": 0.0015, + "loss": 1.5022, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.7893818616867065, + "learning_rate": 0.0015, + "loss": 1.5051, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.7624740600585938, + "learning_rate": 0.0015, + "loss": 1.5012, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.6293278336524963, + "learning_rate": 0.0015, + "loss": 1.5092, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.6187537312507629, + "learning_rate": 0.0015, + "loss": 1.4943, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.6581799983978271, + "learning_rate": 0.0015, + "loss": 1.5031, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.7659748792648315, + "learning_rate": 0.0015, + "loss": 1.5086, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.6567431688308716, + "learning_rate": 0.0015, + "loss": 1.4979, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.8296955823898315, + "learning_rate": 0.0015, + "loss": 1.4971, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.5771436095237732, + "learning_rate": 0.0015, + "loss": 1.4999, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.7694754600524902, + "learning_rate": 0.0015, + "loss": 1.4837, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.665338933467865, + "learning_rate": 0.0015, + "loss": 1.5053, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.7107120156288147, + "learning_rate": 0.0015, + "loss": 1.5063, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.7967615723609924, + "learning_rate": 0.0015, + "loss": 1.5058, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.708405077457428, + "learning_rate": 0.0015, + "loss": 1.4915, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.6078059077262878, + "learning_rate": 0.0015, + "loss": 1.4763, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.5851005911827087, + "learning_rate": 0.0015, + "loss": 1.4967, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.6043736934661865, + "learning_rate": 0.0015, + "loss": 1.4824, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.611513078212738, + "learning_rate": 0.0015, + "loss": 1.5017, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 1.0746793746948242, + "learning_rate": 0.0015, + "loss": 1.505, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 1.0146729946136475, + "learning_rate": 0.0015, + "loss": 1.4842, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6626529097557068, + "learning_rate": 0.0015, + "loss": 1.4992, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6300058364868164, + "learning_rate": 0.0015, + "loss": 1.501, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.6722200512886047, + "learning_rate": 0.0015, + "loss": 1.5007, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.714202344417572, + "learning_rate": 0.0015, + "loss": 1.4899, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.7716938853263855, + "learning_rate": 0.0015, + "loss": 1.4914, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.6024720668792725, + "learning_rate": 0.0015, + "loss": 1.4883, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.5815703272819519, + "learning_rate": 0.0015, + "loss": 1.483, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.6057709455490112, + "learning_rate": 0.0015, + "loss": 1.4934, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.921125054359436, + "learning_rate": 0.0015, + "loss": 1.4956, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.5955872535705566, + "learning_rate": 0.0015, + "loss": 1.4908, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.6432823538780212, + "learning_rate": 0.0015, + "loss": 1.4877, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.8056228756904602, + "learning_rate": 0.0015, + "loss": 1.4861, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.6135498285293579, + "learning_rate": 0.0015, + "loss": 1.5075, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.5867377519607544, + "learning_rate": 0.0015, + "loss": 1.4889, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.8009827733039856, + "learning_rate": 0.0015, + "loss": 1.4884, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.6317930221557617, + "learning_rate": 0.0015, + "loss": 1.5011, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.9503878951072693, + "learning_rate": 0.0015, + "loss": 1.4814, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.6696228981018066, + "learning_rate": 0.0015, + "loss": 1.4894, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.61104816198349, + "learning_rate": 0.0015, + "loss": 1.4854, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.6080767512321472, + "learning_rate": 0.0015, + "loss": 1.5032, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.965461254119873, + "learning_rate": 0.0015, + "loss": 1.4912, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.655433714389801, + "learning_rate": 0.0015, + "loss": 1.481, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.6455399394035339, + "learning_rate": 0.0015, + "loss": 1.4935, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.5902749300003052, + "learning_rate": 0.0015, + "loss": 1.482, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.783729612827301, + "learning_rate": 0.0015, + "loss": 1.4777, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.7097678780555725, + "learning_rate": 0.0015, + "loss": 1.4977, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.7562985420227051, + "learning_rate": 0.0015, + "loss": 1.4862, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.59348464012146, + "learning_rate": 0.0015, + "loss": 1.4929, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6034818887710571, + "learning_rate": 0.0015, + "loss": 1.4875, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.6982986330986023, + "learning_rate": 0.0015, + "loss": 1.4811, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.6169223189353943, + "learning_rate": 0.0015, + "loss": 1.4871, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.614870011806488, + "learning_rate": 0.0015, + "loss": 1.4882, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.6056461930274963, + "learning_rate": 0.0015, + "loss": 1.4949, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.7622484564781189, + "learning_rate": 0.0015, + "loss": 1.4742, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.6300858855247498, + "learning_rate": 0.0015, + "loss": 1.483, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.6003949046134949, + "learning_rate": 0.0015, + "loss": 1.4794, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.9810915589332581, + "learning_rate": 0.0015, + "loss": 1.4847, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.8623272776603699, + "learning_rate": 0.0015, + "loss": 1.493, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.6241163015365601, + "learning_rate": 0.0015, + "loss": 1.4867, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5742214918136597, + "learning_rate": 0.0015, + "loss": 1.4778, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.6740446090698242, + "learning_rate": 0.0015, + "loss": 1.4635, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5944175720214844, + "learning_rate": 0.0015, + "loss": 1.492, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.7261972427368164, + "learning_rate": 0.0015, + "loss": 1.4787, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 1.0926592350006104, + "learning_rate": 0.0015, + "loss": 1.492, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 1.0481741428375244, + "learning_rate": 0.0015, + "loss": 1.4814, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.6393164396286011, + "learning_rate": 0.0015, + "loss": 1.4945, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.6638101935386658, + "learning_rate": 0.0015, + "loss": 1.4821, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.6531327366828918, + "learning_rate": 0.0015, + "loss": 1.4646, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.7382935881614685, + "learning_rate": 0.0015, + "loss": 1.4818, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.6771251559257507, + "learning_rate": 0.0015, + "loss": 1.4795, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.6889464855194092, + "learning_rate": 0.0015, + "loss": 1.4759, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 1.102895975112915, + "learning_rate": 0.0015, + "loss": 1.4918, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.7885115742683411, + "learning_rate": 0.0015, + "loss": 1.4881, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5954360961914062, + "learning_rate": 0.0015, + "loss": 1.4818, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 1.172462821006775, + "learning_rate": 0.0015, + "loss": 1.4833, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.7841358780860901, + "learning_rate": 0.0015, + "loss": 1.4891, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.6792880296707153, + "learning_rate": 0.0015, + "loss": 1.4708, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.7228267192840576, + "learning_rate": 0.0015, + "loss": 1.4813, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.6226990818977356, + "learning_rate": 0.0015, + "loss": 1.4691, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.6215463280677795, + "learning_rate": 0.0015, + "loss": 1.4819, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.6222267150878906, + "learning_rate": 0.0015, + "loss": 1.4729, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6465316414833069, + "learning_rate": 0.0015, + "loss": 1.4818, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.6831579804420471, + "learning_rate": 0.0015, + "loss": 1.4743, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 1.2003217935562134, + "learning_rate": 0.0015, + "loss": 1.4896, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.7084863185882568, + "learning_rate": 0.0015, + "loss": 1.4874, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.7810060381889343, + "learning_rate": 0.0015, + "loss": 1.4721, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.6424432992935181, + "learning_rate": 0.0015, + "loss": 1.4755, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.6694437861442566, + "learning_rate": 0.0015, + "loss": 1.4806, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 1.1037667989730835, + "learning_rate": 0.0015, + "loss": 1.471, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.5845842361450195, + "learning_rate": 0.0015, + "loss": 1.4698, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.7136526703834534, + "learning_rate": 0.0015, + "loss": 1.477, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.6119893789291382, + "learning_rate": 0.0015, + "loss": 1.4716, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.6221395134925842, + "learning_rate": 0.0015, + "loss": 1.4645, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.9985007047653198, + "learning_rate": 0.0015, + "loss": 1.4743, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.8363048434257507, + "learning_rate": 0.0015, + "loss": 1.4772, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.6103449463844299, + "learning_rate": 0.0015, + "loss": 1.4681, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.6348961591720581, + "learning_rate": 0.0015, + "loss": 1.4702, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.6951332688331604, + "learning_rate": 0.0015, + "loss": 1.4698, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.6135388016700745, + "learning_rate": 0.0015, + "loss": 1.4762, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.6115954518318176, + "learning_rate": 0.0015, + "loss": 1.4656, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.8886831998825073, + "learning_rate": 0.0015, + "loss": 1.4628, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.5508164763450623, + "learning_rate": 0.0015, + "loss": 1.4624, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6739417314529419, + "learning_rate": 0.0015, + "loss": 1.4698, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.7444676756858826, + "learning_rate": 0.0015, + "loss": 1.4707, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.8379223942756653, + "learning_rate": 0.0015, + "loss": 1.4717, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.6977660059928894, + "learning_rate": 0.0015, + "loss": 1.4724, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.6287328600883484, + "learning_rate": 0.0015, + "loss": 1.4766, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.6387433409690857, + "learning_rate": 0.0015, + "loss": 1.4663, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 1.0466444492340088, + "learning_rate": 0.0015, + "loss": 1.4778, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.6020264625549316, + "learning_rate": 0.0015, + "loss": 1.4682, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.6632500886917114, + "learning_rate": 0.0015, + "loss": 1.4594, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.6150721907615662, + "learning_rate": 0.0015, + "loss": 1.4576, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.5892557501792908, + "learning_rate": 0.0015, + "loss": 1.4776, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.71415776014328, + "learning_rate": 0.0015, + "loss": 1.4795, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5766378045082092, + "learning_rate": 0.0015, + "loss": 1.4647, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.726824164390564, + "learning_rate": 0.0015, + "loss": 1.4643, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.7908912301063538, + "learning_rate": 0.0015, + "loss": 1.4687, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.8290234208106995, + "learning_rate": 0.0015, + "loss": 1.4654, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.6372126936912537, + "learning_rate": 0.0015, + "loss": 1.467, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.6666516661643982, + "learning_rate": 0.0015, + "loss": 1.4785, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.7377310991287231, + "learning_rate": 0.0015, + "loss": 1.4727, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.6241707801818848, + "learning_rate": 0.0015, + "loss": 1.4811, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.6799861192703247, + "learning_rate": 0.0015, + "loss": 1.475, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.6277510523796082, + "learning_rate": 0.0015, + "loss": 1.4679, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.5897135138511658, + "learning_rate": 0.0015, + "loss": 1.4784, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.597100555896759, + "learning_rate": 0.0015, + "loss": 1.4759, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 1.3132964372634888, + "learning_rate": 0.0015, + "loss": 1.4559, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.6332428455352783, + "learning_rate": 0.0015, + "loss": 1.4631, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.630925714969635, + "learning_rate": 0.0015, + "loss": 1.4659, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.621139645576477, + "learning_rate": 0.0015, + "loss": 1.4696, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.6006580591201782, + "learning_rate": 0.0015, + "loss": 1.4674, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.657659649848938, + "learning_rate": 0.0015, + "loss": 1.4643, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.6169291138648987, + "learning_rate": 0.0015, + "loss": 1.4404, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.6228291988372803, + "learning_rate": 0.0015, + "loss": 1.448, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.7357169985771179, + "learning_rate": 0.0015, + "loss": 1.4526, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.5935964584350586, + "learning_rate": 0.0015, + "loss": 1.4576, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.6356377005577087, + "learning_rate": 0.0015, + "loss": 1.4562, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.6030194759368896, + "learning_rate": 0.0015, + "loss": 1.4605, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.6205433011054993, + "learning_rate": 0.0015, + "loss": 1.4695, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.8475396633148193, + "learning_rate": 0.0015, + "loss": 1.4787, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.9684879779815674, + "learning_rate": 0.0015, + "loss": 1.4634, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.9152795076370239, + "learning_rate": 0.0015, + "loss": 1.4744, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.8805249333381653, + "learning_rate": 0.0015, + "loss": 1.4773, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.6770639419555664, + "learning_rate": 0.0015, + "loss": 1.463, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.8071243166923523, + "learning_rate": 0.0015, + "loss": 1.4558, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.8213844299316406, + "learning_rate": 0.0015, + "loss": 1.4675, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.7583251595497131, + "learning_rate": 0.0015, + "loss": 1.4572, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.6644003987312317, + "learning_rate": 0.0015, + "loss": 1.4606, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.6264317035675049, + "learning_rate": 0.0015, + "loss": 1.4555, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.6607256531715393, + "learning_rate": 0.0015, + "loss": 1.4497, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.1336363554000854, + "learning_rate": 0.0015, + "loss": 1.4592, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.7510808110237122, + "learning_rate": 0.0015, + "loss": 1.4652, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.5830952525138855, + "learning_rate": 0.0015, + "loss": 1.4521, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.6531512141227722, + "learning_rate": 0.0015, + "loss": 1.4607, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5881785750389099, + "learning_rate": 0.0015, + "loss": 1.4625, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.6385230422019958, + "learning_rate": 0.0015, + "loss": 1.4423, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.7686502933502197, + "learning_rate": 0.0015, + "loss": 1.4657, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.6703057885169983, + "learning_rate": 0.0015, + "loss": 1.473, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.6484505534172058, + "learning_rate": 0.0015, + "loss": 1.457, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 1.021883487701416, + "learning_rate": 0.0015, + "loss": 1.4601, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.6086958646774292, + "learning_rate": 0.0015, + "loss": 1.4434, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.6141099333763123, + "learning_rate": 0.0015, + "loss": 1.4626, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.664806604385376, + "learning_rate": 0.0015, + "loss": 1.4457, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.6827659010887146, + "learning_rate": 0.0015, + "loss": 1.4634, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.7445857524871826, + "learning_rate": 0.0015, + "loss": 1.4554, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.7499704957008362, + "learning_rate": 0.0015, + "loss": 1.4549, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.8100832104682922, + "learning_rate": 0.0015, + "loss": 1.4549, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.9377439618110657, + "learning_rate": 0.0015, + "loss": 1.4504, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 1.3876328468322754, + "learning_rate": 0.0015, + "loss": 1.4637, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 1.1637641191482544, + "learning_rate": 0.0015, + "loss": 1.4567, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.753265917301178, + "learning_rate": 0.0015, + "loss": 1.4467, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 1.0227552652359009, + "learning_rate": 0.0015, + "loss": 1.4496, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.647739052772522, + "learning_rate": 0.0015, + "loss": 1.4634, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.614063024520874, + "learning_rate": 0.0015, + "loss": 1.4573, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.5970761179924011, + "learning_rate": 0.0014854972418331944, + "loss": 1.4365, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.5832245349884033, + "learning_rate": 0.0014650219182191931, + "loss": 1.4565, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.63349449634552, + "learning_rate": 0.001444828815847542, + "loss": 1.4577, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.6463556885719299, + "learning_rate": 0.0014249140447269945, + "loss": 1.449, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.6163970828056335, + "learning_rate": 0.0014052737684839257, + "loss": 1.4451, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.7840620279312134, + "learning_rate": 0.0013859042036232954, + "loss": 1.4549, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.6174883246421814, + "learning_rate": 0.001366801618799797, + "loss": 1.4422, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.7292013168334961, + "learning_rate": 0.001347962334099052, + "loss": 1.4435, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.6027617454528809, + "learning_rate": 0.0013293827203287143, + "loss": 1.4495, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.6866016387939453, + "learning_rate": 0.0013110591983193423, + "loss": 1.4384, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.6381696462631226, + "learning_rate": 0.0012929882382349102, + "loss": 1.447, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.6010462641716003, + "learning_rate": 0.0012751663588928214, + "loss": 1.4407, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.6927262544631958, + "learning_rate": 0.0012575901270932943, + "loss": 1.4371, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.6917093992233276, + "learning_rate": 0.0012402561569579936, + "loss": 1.4324, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.5623947978019714, + "learning_rate": 0.0012231611092777745, + "loss": 1.4274, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5369976162910461, + "learning_rate": 0.0012063016908694193, + "loss": 1.4194, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.6493591070175171, + "learning_rate": 0.0011896746539412405, + "loss": 1.4319, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.7017821669578552, + "learning_rate": 0.0011732767954674265, + "loss": 1.4217, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.5725095868110657, + "learning_rate": 0.0011571049565710122, + "loss": 1.427, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.6379651427268982, + "learning_rate": 0.001141156021915355, + "loss": 1.4233, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.6038700342178345, + "learning_rate": 0.001125426919103997, + "loss": 1.4049, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.8193974494934082, + "learning_rate": 0.001109914618088799, + "loss": 1.4099, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.8549070358276367, + "learning_rate": 0.0010946161305862348, + "loss": 1.4216, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.670832097530365, + "learning_rate": 0.001079528509501728, + "loss": 1.4188, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.5933755040168762, + "learning_rate": 0.0010646488483619261, + "loss": 1.4134, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.6660593152046204, + "learning_rate": 0.0010499742807547976, + "loss": 1.4026, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.6021583676338196, + "learning_rate": 0.0010355019797774478, + "loss": 1.412, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.5979887247085571, + "learning_rate": 0.001021229157491546, + "loss": 1.3942, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.6080266237258911, + "learning_rate": 0.0010071530643862578, + "loss": 1.3989, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.5783116817474365, + "learning_rate": 0.000993270988848579, + "loss": 1.4105, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.5605062246322632, + "learning_rate": 0.0009795802566409742, + "loss": 1.4034, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.8012086153030396, + "learning_rate": 0.0009660782303862109, + "loss": 1.4078, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5455026030540466, + "learning_rate": 0.0009527623090592963, + "loss": 1.4043, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.691439688205719, + "learning_rate": 0.0009396299274864177, + "loss": 1.4184, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.5907977223396301, + "learning_rate": 0.0009266785558507877, + "loss": 1.4157, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.5983433723449707, + "learning_rate": 0.0009139056992053016, + "loss": 1.4003, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.6499401926994324, + "learning_rate": 0.000901308896991912, + "loss": 1.3904, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.727854311466217, + "learning_rate": 0.000888885722567627, + "loss": 1.398, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.5444949269294739, + "learning_rate": 0.0008766337827370438, + "loss": 1.3922, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.7156972885131836, + "learning_rate": 0.000864550717291324, + "loss": 1.3921, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.5901584029197693, + "learning_rate": 0.0008526341985535229, + "loss": 1.3902, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.53646320104599, + "learning_rate": 0.0008408819309301891, + "loss": 1.3848, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.6122167110443115, + "learning_rate": 0.0008292916504691397, + "loss": 1.3883, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.7208245992660522, + "learning_rate": 0.0008178611244233354, + "loss": 1.3929, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.5553930401802063, + "learning_rate": 0.0008065881508207637, + "loss": 1.3794, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.573006808757782, + "learning_rate": 0.0007954705580402523, + "loss": 1.3889, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.6661033034324646, + "learning_rate": 0.0007845062043931298, + "loss": 1.3801, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.647193193435669, + "learning_rate": 0.0007736929777106497, + "loss": 1.3848, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.6375086307525635, + "learning_rate": 0.000763028794937105, + "loss": 1.3791, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.5338489413261414, + "learning_rate": 0.0007525116017285476, + "loss": 1.3769, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.6267464756965637, + "learning_rate": 0.0007421393720570417, + "loss": 1.379, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.5349339842796326, + "learning_rate": 0.0007319101078203694, + "loss": 1.378, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.5406131148338318, + "learning_rate": 0.0007218218384571178, + "loss": 1.3721, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.5232717394828796, + "learning_rate": 0.0007118726205670703, + "loss": 1.3749, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.5295721292495728, + "learning_rate": 0.0007020605375368316, + "loss": 1.3735, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.6002175807952881, + "learning_rate": 0.000692383699170611, + "loss": 1.3638, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.5596437454223633, + "learning_rate": 0.0006828402413260966, + "loss": 1.3716, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.523356020450592, + "learning_rate": 0.0006734283255553471, + "loss": 1.3745, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.6856476664543152, + "learning_rate": 0.0006641461387506347, + "loss": 1.3746, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.621408998966217, + "learning_rate": 0.0006549918927951678, + "loss": 1.3681, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.662599503993988, + "learning_rate": 0.0006459638242186297, + "loss": 1.3702, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5775492191314697, + "learning_rate": 0.0006370601938574639, + "loss": 1.3702, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.5994099974632263, + "learning_rate": 0.0006282792865198421, + "loss": 1.3737, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.5773037672042847, + "learning_rate": 0.0006196194106552512, + "loss": 1.3598, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.8939270973205566, + "learning_rate": 0.0006110788980286328, + "loss": 1.3626, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.6143664717674255, + "learning_rate": 0.0006026561033990158, + "loss": 1.3652, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.639657199382782, + "learning_rate": 0.000594349404202577, + "loss": 1.3609, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.6304871439933777, + "learning_rate": 0.0005861572002400716, + "loss": 1.3489, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.5501492023468018, + "learning_rate": 0.0005780779133685717, + "loss": 1.3589, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.5761770009994507, + "learning_rate": 0.0005701099871974524, + "loss": 1.3538, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.5242398381233215, + "learning_rate": 0.0005622518867885708, + "loss": 1.352, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5525650382041931, + "learning_rate": 0.0005545020983605748, + "loss": 1.357, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.5790526270866394, + "learning_rate": 0.0005468591289972898, + "loss": 1.348, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5800452828407288, + "learning_rate": 0.0005393215063601232, + "loss": 1.3495, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.5815948247909546, + "learning_rate": 0.0005318877784044343, + "loss": 1.3726, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.5454148650169373, + "learning_rate": 0.0005245565130998126, + "loss": 1.3524, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.5337786674499512, + "learning_rate": 0.000517326298154212, + "loss": 1.3564, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.6180921792984009, + "learning_rate": 0.0005101957407418877, + "loss": 1.3579, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.5910723209381104, + "learning_rate": 0.0005031634672350829, + "loss": 1.3574, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.5380088090896606, + "learning_rate": 0.0004962281229394129, + "loss": 1.3485, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.5460664629936218, + "learning_rate": 0.0004893883718328983, + "loss": 1.3418, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.51988685131073, + "learning_rate": 0.0004826428963085938, + "loss": 1.3452, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.5688920617103577, + "learning_rate": 0.00047599039692076457, + "loss": 1.3452, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.5576639771461487, + "learning_rate": 0.0004694295921345622, + "loss": 1.3539, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.5200167894363403, + "learning_rate": 0.00046295921807915015, + "loss": 1.3262, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.7411196827888489, + "learning_rate": 0.00045657802830423164, + "loss": 1.3403, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.5510851144790649, + "learning_rate": 0.00045028479353993473, + "loss": 1.3463, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.685629665851593, + "learning_rate": 0.00044407830146000587, + "loss": 1.352, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.5442322492599487, + "learning_rate": 0.0004379573564482676, + "loss": 1.3397, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.5900353193283081, + "learning_rate": 0.0004319207793682963, + "loss": 1.3431, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.5816121697425842, + "learning_rate": 0.0004259674073362731, + "loss": 1.3489, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.5132590532302856, + "learning_rate": 0.00042009609349696626, + "loss": 1.3435, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.6614291667938232, + "learning_rate": 0.00041430570680280233, + "loss": 1.3414, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.5441742539405823, + "learning_rate": 0.0004085951317959809, + "loss": 1.3373, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.5576218962669373, + "learning_rate": 0.00040296326839359315, + "loss": 1.3472, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.5546467900276184, + "learning_rate": 0.000397409031675703, + "loss": 1.3415, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.5801625847816467, + "learning_rate": 0.00039193135167634786, + "loss": 1.3495, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.5182439088821411, + "learning_rate": 0.00038652917317742123, + "loss": 1.3314, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.5661932826042175, + "learning_rate": 0.0003812014555053956, + "loss": 1.329, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.5124690532684326, + "learning_rate": 0.00037594717233084774, + "loss": 1.3379, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.6871455311775208, + "learning_rate": 0.0003707653114707471, + "loss": 1.3459, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.5211157202720642, + "learning_rate": 0.00036565487469346906, + "loss": 1.3222, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.524029016494751, + "learning_rate": 0.0003606148775264958, + "loss": 1.3258, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.554638147354126, + "learning_rate": 0.0003556443490667684, + "loss": 1.3309, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.5793325304985046, + "learning_rate": 0.0003507423317936521, + "loss": 1.3341, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.567593514919281, + "learning_rate": 0.00034590788138448006, + "loss": 1.3464, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.5207052230834961, + "learning_rate": 0.0003411400665326393, + "loss": 1.3436, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.554315447807312, + "learning_rate": 0.00033643796876816424, + "loss": 1.3396, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.5942793488502502, + "learning_rate": 0.000331800682280803, + "loss": 1.3345, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.5175394415855408, + "learning_rate": 0.0003272273137455226, + "loss": 1.3289, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.5587158203125, + "learning_rate": 0.00032271698215041863, + "loss": 1.3281, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.6837599277496338, + "learning_rate": 0.0003182688186269984, + "loss": 1.3313, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.5992154479026794, + "learning_rate": 0.0003138819662828017, + "loss": 1.3404, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.649645209312439, + "learning_rate": 0.00030955558003632966, + "loss": 1.3422, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.5224177837371826, + "learning_rate": 0.0003052888264542483, + "loss": 1.3258, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.5287717580795288, + "learning_rate": 0.0003010808835908368, + "loss": 1.3231, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.5435400009155273, + "learning_rate": 0.00029693094082964785, + "loss": 1.3261, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.6325677633285522, + "learning_rate": 0.0002928381987273508, + "loss": 1.3209, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.562950074672699, + "learning_rate": 0.0002888018688597272, + "loss": 1.3193, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.6350160837173462, + "learning_rate": 0.0002848211736697894, + "loss": 1.3268, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5619743466377258, + "learning_rate": 0.00028089534631799183, + "loss": 1.3277, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.5412030220031738, + "learning_rate": 0.0002770236305345076, + "loss": 1.3157, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.544607400894165, + "learning_rate": 0.00027320528047354093, + "loss": 1.3251, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.5767512917518616, + "learning_rate": 0.00026943956056964773, + "loss": 1.3307, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.5896579623222351, + "learning_rate": 0.0002657257453960364, + "loss": 1.3124, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.5270397663116455, + "learning_rate": 0.0002620631195248222, + "loss": 1.3176, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.5265063047409058, + "learning_rate": 0.00025845097738920735, + "loss": 1.3041, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.5315859317779541, + "learning_rate": 0.0002548886231475606, + "loss": 1.3085, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.5290600061416626, + "learning_rate": 0.0002513753705493713, + "loss": 1.3106, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.5224332213401794, + "learning_rate": 0.0002479105428030497, + "loss": 1.3221, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.5644242167472839, + "learning_rate": 0.00024449347244555043, + "loss": 1.3129, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.6498456001281738, + "learning_rate": 0.00024112350121379254, + "loss": 1.3185, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.5531762838363647, + "learning_rate": 0.000237799979917852, + "loss": 1.3247, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.551281750202179, + "learning_rate": 0.00023452226831590227, + "loss": 1.3205, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.5409637689590454, + "learning_rate": 0.00023128973499087779, + "loss": 1.3286, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.5300262570381165, + "learning_rate": 0.00022810175722883858, + "loss": 1.3094, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.5107564330101013, + "learning_rate": 0.0002249577208990106, + "loss": 1.3124, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.5383130311965942, + "learning_rate": 0.00022185702033547996, + "loss": 1.3088, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.5378766059875488, + "learning_rate": 0.00021879905822051756, + "loss": 1.3064, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5944802165031433, + "learning_rate": 0.00021578324546951222, + "loss": 1.3106, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.5424947142601013, + "learning_rate": 0.00021280900111748948, + "loss": 1.3191, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.527175784111023, + "learning_rate": 0.00020987575220719483, + "loss": 1.3168, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.5503515601158142, + "learning_rate": 0.00020698293367871933, + "loss": 1.3073, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.5347628593444824, + "learning_rate": 0.00020412998826064692, + "loss": 1.3115, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.5652713775634766, + "learning_rate": 0.00020131636636270178, + "loss": 1.3235, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.5310170650482178, + "learning_rate": 0.00019854152596987523, + "loss": 1.2963, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.5455449819564819, + "learning_rate": 0.00019580493253801255, + "loss": 1.3193, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.5479976534843445, + "learning_rate": 0.00019310605889083838, + "loss": 1.3101, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.5144944787025452, + "learning_rate": 0.0001904443851184018, + "loss": 1.3254, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.5066655874252319, + "learning_rate": 0.00018781939847692096, + "loss": 1.3058, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.5283026099205017, + "learning_rate": 0.00018523059329000844, + "loss": 1.3112, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.6069780588150024, + "learning_rate": 0.0001826774708512579, + "loss": 1.3232, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.6388716697692871, + "learning_rate": 0.00018015953932817348, + "loss": 1.2984, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.5345794558525085, + "learning_rate": 0.00017767631366742332, + "loss": 1.3057, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.7205454111099243, + "learning_rate": 0.00017522731550139922, + "loss": 1.3052, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.5462461709976196, + "learning_rate": 0.00017281207305606407, + "loss": 1.3121, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.5599740743637085, + "learning_rate": 0.00017043012106006926, + "loss": 1.3146, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.5159615874290466, + "learning_rate": 0.00016808100065512528, + "loss": 1.3147, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.5742266774177551, + "learning_rate": 0.00016576425930760734, + "loss": 1.301, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.6085094213485718, + "learning_rate": 0.00016347945072137934, + "loss": 1.2991, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.5180689096450806, + "learning_rate": 0.00016122613475181977, + "loss": 1.3102, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.7322177290916443, + "learning_rate": 0.00015900387732103232, + "loss": 1.3023, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.6276547908782959, + "learning_rate": 0.00015681225033422526, + "loss": 1.3167, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.507826566696167, + "learning_rate": 0.00015465083159724345, + "loss": 1.3062, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.5147706270217896, + "learning_rate": 0.0001525192047352371, + "loss": 1.3051, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.5236575603485107, + "learning_rate": 0.00015041695911245136, + "loss": 1.3127, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.5811189413070679, + "learning_rate": 0.00014834368975312172, + "loss": 1.3027, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.5144124031066895, + "learning_rate": 0.00014629899726345958, + "loss": 1.2905, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.5891360640525818, + "learning_rate": 0.00014428248775471316, + "loss": 1.2968, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.5310967564582825, + "learning_rate": 0.000142293772767289, + "loss": 1.2885, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5702033638954163, + "learning_rate": 0.00014033246919591922, + "loss": 1.2997, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.5758183002471924, + "learning_rate": 0.00013839819921586025, + "loss": 1.3202, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.5175114274024963, + "learning_rate": 0.00013649059021010894, + "loss": 1.3159, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5361286997795105, + "learning_rate": 0.00013460927469762155, + "loss": 1.2951, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.6139363050460815, + "learning_rate": 0.00013275389026252255, + "loss": 1.3049, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.6472334265708923, + "learning_rate": 0.0001309240794842889, + "loss": 1.3038, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.5167619585990906, + "learning_rate": 0.00012911948986889664, + "loss": 1.3128, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.5123831629753113, + "learning_rate": 0.00012733977378091664, + "loss": 1.307, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.5331373810768127, + "learning_rate": 0.00012558458837654633, + "loss": 1.3101, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.5281723141670227, + "learning_rate": 0.00012385359553756422, + "loss": 1.2935, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.545865535736084, + "learning_rate": 0.0001221464618061951, + "loss": 1.2951, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.5309757590293884, + "learning_rate": 0.0001204628583208727, + "loss": 1.2935, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.5760915279388428, + "learning_rate": 0.00011880246075288824, + "loss": 1.3055, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.54163658618927, + "learning_rate": 0.00011716494924391148, + "loss": 1.3012, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.5665397047996521, + "learning_rate": 0.00011555000834437363, + "loss": 1.3061, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.5961025953292847, + "learning_rate": 0.00011395732695269907, + "loss": 1.3013, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.5584152936935425, + "learning_rate": 0.00011238659825537507, + "loss": 1.2973, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.5412735342979431, + "learning_rate": 0.00011083751966784716, + "loss": 1.3102, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.5326846241950989, + "learning_rate": 0.00010930979277622952, + "loss": 1.2901, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.5362796783447266, + "learning_rate": 0.00010780312327981853, + "loss": 1.2993, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.5208104252815247, + "learning_rate": 0.0001063172209343989, + "loss": 1.3025, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.5004886984825134, + "learning_rate": 0.000104851799496331, + "loss": 1.3065, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.5164020657539368, + "learning_rate": 0.00010340657666740917, + "loss": 1.2934, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.5336525440216064, + "learning_rate": 0.00010198127404047976, + "loss": 1.2896, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.5941040515899658, + "learning_rate": 0.00010057561704580898, + "loss": 1.3078, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.5325416922569275, + "learning_rate": 9.918933489818986e-05, + "loss": 1.2964, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.5825205445289612, + "learning_rate": 9.782216054477828e-05, + "loss": 1.2963, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.5118545889854431, + "learning_rate": 9.647383061364803e-05, + "loss": 1.2957, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.5081261992454529, + "learning_rate": 9.514408536305497e-05, + "loss": 1.291, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.5143427848815918, + "learning_rate": 9.383266863140043e-05, + "loss": 1.3036, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.5509889125823975, + "learning_rate": 9.25393277878844e-05, + "loss": 1.292, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.546241819858551, + "learning_rate": 9.126381368383881e-05, + "loss": 1.3004, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.5705376267433167, + "learning_rate": 9.000588060473158e-05, + "loss": 1.2945, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.5209640860557556, + "learning_rate": 8.876528622283232e-05, + "loss": 1.3052, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.511217474937439, + "learning_rate": 8.754179155053052e-05, + "loss": 1.2977, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.5343218445777893, + "learning_rate": 8.63351608942968e-05, + "loss": 1.3053, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.529174268245697, + "learning_rate": 8.514516180927926e-05, + "loss": 1.3003, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.5331752300262451, + "learning_rate": 8.397156505452524e-05, + "loss": 1.2962, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.5284889936447144, + "learning_rate": 8.28141445488205e-05, + "loss": 1.3121, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.5290368795394897, + "learning_rate": 8.167267732713705e-05, + "loss": 1.3031, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.5936283469200134, + "learning_rate": 8.054694349768114e-05, + "loss": 1.2998, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.5169528126716614, + "learning_rate": 7.943672619953359e-05, + "loss": 1.2816, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.5384418964385986, + "learning_rate": 7.834181156087357e-05, + "loss": 1.2834, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.5332403182983398, + "learning_rate": 7.726198865777852e-05, + "loss": 1.3077, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.5618483424186707, + "learning_rate": 7.61970494735919e-05, + "loss": 1.2872, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.5266836881637573, + "learning_rate": 7.514678885885086e-05, + "loss": 1.307, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.5128325819969177, + "learning_rate": 7.411100449176634e-05, + "loss": 1.2997, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.5142980217933655, + "learning_rate": 7.308949683924792e-05, + "loss": 1.289, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.536188006401062, + "learning_rate": 7.208206911846581e-05, + "loss": 1.2893, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.5220873355865479, + "learning_rate": 7.10885272589427e-05, + "loss": 1.2869, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.5021454095840454, + "learning_rate": 7.010867986516811e-05, + "loss": 1.2949, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.5656719207763672, + "learning_rate": 6.914233817972799e-05, + "loss": 1.301, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.5156108140945435, + "learning_rate": 6.818931604694264e-05, + "loss": 1.2922, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.5471301078796387, + "learning_rate": 6.724942987700563e-05, + "loss": 1.2934, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.6051763296127319, + "learning_rate": 6.632249861061733e-05, + "loss": 1.3108, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.5115141272544861, + "learning_rate": 6.540834368410549e-05, + "loss": 1.2973, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.5198580622673035, + "learning_rate": 6.4506788995027e-05, + "loss": 1.2961, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.5270715355873108, + "learning_rate": 6.361766086824344e-05, + "loss": 1.2875, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.593762218952179, + "learning_rate": 6.274078802246449e-05, + "loss": 1.2978, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.5731693506240845, + "learning_rate": 6.187600153725223e-05, + "loss": 1.2922, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.5129960775375366, + "learning_rate": 6.1023134820480546e-05, + "loss": 1.2946, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.5267176628112793, + "learning_rate": 6.0182023576242725e-05, + "loss": 1.288, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.5282775163650513, + "learning_rate": 5.9352505773201664e-05, + "loss": 1.2926, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.5173938870429993, + "learning_rate": 5.8534421613376175e-05, + "loss": 1.2851, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.5107463598251343, + "learning_rate": 5.772761350135759e-05, + "loss": 1.2932, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.5102089047431946, + "learning_rate": 5.6931926013950586e-05, + "loss": 1.2919, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.5097337961196899, + "learning_rate": 5.61472058702326e-05, + "loss": 1.3049, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.5096694827079773, + "learning_rate": 5.53733019020258e-05, + "loss": 1.3014, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.5256516933441162, + "learning_rate": 5.4610065024776125e-05, + "loss": 1.2919, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.5312962532043457, + "learning_rate": 5.38573482088337e-05, + "loss": 1.2837, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.6747413277626038, + "learning_rate": 5.3115006451129075e-05, + "loss": 1.3012, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.5266739726066589, + "learning_rate": 5.2382896747239935e-05, + "loss": 1.2939, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.6045685410499573, + "learning_rate": 5.166087806384275e-05, + "loss": 1.3004, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.5957581996917725, + "learning_rate": 5.0948811311544186e-05, + "loss": 1.2904, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.5947316884994507, + "learning_rate": 5.024655931808697e-05, + "loss": 1.2935, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.5241140723228455, + "learning_rate": 4.955398680192509e-05, + "loss": 1.2881, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.5557602643966675, + "learning_rate": 4.887096034616319e-05, + "loss": 1.2977, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.5545855164527893, + "learning_rate": 4.819734837285529e-05, + "loss": 1.2805, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.5288124680519104, + "learning_rate": 4.7533021117657475e-05, + "loss": 1.2837, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5827479362487793, + "learning_rate": 4.687785060483031e-05, + "loss": 1.2857, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.5763390064239502, + "learning_rate": 4.623171062258557e-05, + "loss": 1.2921, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.4946485757827759, + "learning_rate": 4.559447669877288e-05, + "loss": 1.3028, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.5472092032432556, + "learning_rate": 4.496602607690141e-05, + "loss": 1.2951, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.519067645072937, + "learning_rate": 4.434623769249217e-05, + "loss": 1.2938, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.5109884738922119, + "learning_rate": 4.373499214975615e-05, + "loss": 1.2912, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.5203489661216736, + "learning_rate": 4.313217169859397e-05, + "loss": 1.2856, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.5210446119308472, + "learning_rate": 4.253766021191256e-05, + "loss": 1.2988, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.5181707143783569, + "learning_rate": 4.19513431632545e-05, + "loss": 1.2856, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.49860599637031555, + "learning_rate": 4.1373107604735626e-05, + "loss": 1.2862, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.5813160538673401, + "learning_rate": 4.0802842145286876e-05, + "loss": 1.2723, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.5179439783096313, + "learning_rate": 4.024043692919589e-05, + "loss": 1.2825, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.532271146774292, + "learning_rate": 3.968578361494449e-05, + "loss": 1.2903, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.5049121379852295, + "learning_rate": 3.91387753543378e-05, + "loss": 1.286, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.5227738618850708, + "learning_rate": 3.859930677192103e-05, + "loss": 1.2822, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.5182602405548096, + "learning_rate": 3.806727394468005e-05, + "loss": 1.3021, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.5031376481056213, + "learning_rate": 3.7542574382021635e-05, + "loss": 1.2857, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.5353463292121887, + "learning_rate": 3.702510700602975e-05, + "loss": 1.3062, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.5061064958572388, + "learning_rate": 3.651477213199394e-05, + "loss": 1.2892, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.5634616017341614, + "learning_rate": 3.601147144920609e-05, + "loss": 1.2884, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5346449017524719, + "learning_rate": 3.5515108002021946e-05, + "loss": 1.2876, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.5153203010559082, + "learning_rate": 3.502558617118352e-05, + "loss": 1.2903, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.5325221419334412, + "learning_rate": 3.454281165539913e-05, + "loss": 1.2953, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.5171404480934143, + "learning_rate": 3.406669145317717e-05, + "loss": 1.2937, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.5177060961723328, + "learning_rate": 3.359713384491036e-05, + "loss": 1.2862, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.5180915594100952, + "learning_rate": 3.313404837520694e-05, + "loss": 1.2861, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.49760887026786804, + "learning_rate": 3.267734583546536e-05, + "loss": 1.2918, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.5445263981819153, + "learning_rate": 3.222693824668916e-05, + "loss": 1.2863, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.5059675574302673, + "learning_rate": 3.178273884253874e-05, + "loss": 1.2892, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.5080063343048096, + "learning_rate": 3.134466205261674e-05, + "loss": 1.2909, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.5205719470977783, + "learning_rate": 3.0912623485983774e-05, + "loss": 1.2913, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5928550958633423, + "learning_rate": 3.048653991490141e-05, + "loss": 1.2894, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5706565976142883, + "learning_rate": 3.0066329258799184e-05, + "loss": 1.2856, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.5227463841438293, + "learning_rate": 2.965191056846266e-05, + "loss": 1.2905, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.5120936632156372, + "learning_rate": 2.9243204010439396e-05, + "loss": 1.2845, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.5366312861442566, + "learning_rate": 2.8840130851659852e-05, + "loss": 1.2838, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.5253814458847046, + "learning_rate": 2.844261344427029e-05, + "loss": 1.2852, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.6347829699516296, + "learning_rate": 2.805057521067472e-05, + "loss": 1.3042, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.5304010510444641, + "learning_rate": 2.766394062878302e-05, + "loss": 1.2799, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.5144104361534119, + "learning_rate": 2.7282635217462405e-05, + "loss": 1.2907, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.4972911477088928, + "learning_rate": 2.6906585522189378e-05, + "loss": 1.2918, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.5126804709434509, + "learning_rate": 2.653571910089951e-05, + "loss": 1.2943, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.5150929689407349, + "learning_rate": 2.6169964510032243e-05, + "loss": 1.293, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.5232914090156555, + "learning_rate": 2.580925129076798e-05, + "loss": 1.2924, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.51108717918396, + "learning_rate": 2.5453509955454954e-05, + "loss": 1.2718, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.5077714920043945, + "learning_rate": 2.510267197422317e-05, + "loss": 1.2859, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.5147657990455627, + "learning_rate": 2.4756669761782806e-05, + "loss": 1.295, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.554429829120636, + "learning_rate": 2.4415436664404643e-05, + "loss": 1.2795, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.5688751339912415, + "learning_rate": 2.4078906947079882e-05, + "loss": 1.2987, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.5767641067504883, + "learning_rate": 2.3747015780857007e-05, + "loss": 1.288, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.5213897824287415, + "learning_rate": 2.3419699230353144e-05, + "loss": 1.2925, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.516747772693634, + "learning_rate": 2.3096894241437583e-05, + "loss": 1.2918, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.5072665810585022, + "learning_rate": 2.2778538629085057e-05, + "loss": 1.287, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.5229827165603638, + "learning_rate": 2.2464571065396428e-05, + "loss": 1.2826, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.5397767424583435, + "learning_rate": 2.2154931067784525e-05, + "loss": 1.2873, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.5343099236488342, + "learning_rate": 2.1849558987322783e-05, + "loss": 1.2876, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.5475115180015564, + "learning_rate": 2.1548395997254516e-05, + "loss": 1.2849, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.502901554107666, + "learning_rate": 2.1251384081660546e-05, + "loss": 1.2769, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.5272713899612427, + "learning_rate": 2.0958466024283035e-05, + "loss": 1.2717, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.5067476034164429, + "learning_rate": 2.0669585397503362e-05, + "loss": 1.2827, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.5231614112854004, + "learning_rate": 2.0384686551471954e-05, + "loss": 1.2789, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.5139203667640686, + "learning_rate": 2.0103714603387898e-05, + "loss": 1.2923, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.5003830790519714, + "learning_rate": 1.9826615426926342e-05, + "loss": 1.2786, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.5089663863182068, + "learning_rate": 1.9553335641811623e-05, + "loss": 1.2848, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.5198462605476379, + "learning_rate": 1.9283822603534143e-05, + "loss": 1.2844, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.5730404257774353, + "learning_rate": 1.90180243932089e-05, + "loss": 1.2855, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.5285211205482483, + "learning_rate": 1.8755889807573868e-05, + "loss": 1.28, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.5256885886192322, + "learning_rate": 1.8497368349126255e-05, + "loss": 1.2933, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.5330718159675598, + "learning_rate": 1.824241021639465e-05, + "loss": 1.2908, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.49764251708984375, + "learning_rate": 1.799096629434529e-05, + "loss": 1.302, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.5102213025093079, + "learning_rate": 1.7742988144920578e-05, + "loss": 1.2929, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.5339385271072388, + "learning_rate": 1.7498427997707978e-05, + "loss": 1.294, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.552943766117096, + "learning_rate": 1.7257238740737548e-05, + "loss": 1.2762, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.5403295755386353, + "learning_rate": 1.7019373911406307e-05, + "loss": 1.2811, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.5238360166549683, + "learning_rate": 1.67847876875277e-05, + "loss": 1.296, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.5338429808616638, + "learning_rate": 1.655343487850443e-05, + "loss": 1.2701, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.5198045372962952, + "learning_rate": 1.6325270916622947e-05, + "loss": 1.2901, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.5099501013755798, + "learning_rate": 1.610025184846797e-05, + "loss": 1.2905, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.5310129523277283, + "learning_rate": 1.587833432645528e-05, + "loss": 1.2842, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.5107247233390808, + "learning_rate": 1.5659475600481297e-05, + "loss": 1.2942, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.5072303414344788, + "learning_rate": 1.544363350968769e-05, + "loss": 1.2781, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.5008078813552856, + "learning_rate": 1.523076647433954e-05, + "loss": 1.2908, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.5291218757629395, + "learning_rate": 1.5020833487815421e-05, + "loss": 1.2821, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.830818274921677e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-olmo/checkpoint-9480/training_args.bin b/saves-olmo/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1e5e2d014e5131cd13bbc867464d4ab032ce06a --- /dev/null +++ b/saves-olmo/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44a3a6530beb46e66626d2809a6b0fb414d05ab218cbcca1e8bb8ee065cd460 +size 5112 diff --git a/saves-olmo/config.json b/saves-olmo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..69830f45579d99650d832c24fb725715de4848e2 --- /dev/null +++ b/saves-olmo/config.json @@ -0,0 +1,26 @@ +{ + "architectures": [ + "OlmoForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "clip_qkv": null, + "eos_token_id": 50279, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 2048, + "model_type": "olmo", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pad_token_id": 1, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-olmo/generation_config.json b/saves-olmo/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..424d0e318171a19c3fe3f1423f5d8dc090cc22d6 --- /dev/null +++ b/saves-olmo/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "eos_token_id": 50279, + "pad_token_id": 1, + "transformers_version": "4.42.4" +} diff --git a/saves-olmo/model.safetensors b/saves-olmo/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82acd7b8caa05b1dfc6ff296a3dcb1c0b7fc0d6b --- /dev/null +++ b/saves-olmo/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c512be80642c2a5196fb348b6666fd3845edb8e787d2d6824ef5316d47255f84 +size 8341080 diff --git a/saves-olmo/result.log b/saves-olmo/result.log new file mode 100644 index 0000000000000000000000000000000000000000..8797f0249221cc0052e0491233c35f5f611d42c0 --- /dev/null +++ b/saves-olmo/result.log @@ -0,0 +1 @@ +{'train_runtime': 1895.7109, 'train_samples_per_second': 5120.298, 'train_steps_per_second': 5.001, 'train_loss': 1.6011213989700446, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-olmo/special_tokens_map.json b/saves-olmo/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-olmo/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-olmo/tokenizer.json b/saves-olmo/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-olmo/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-olmo/tokenizer_config.json b/saves-olmo/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-olmo/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-phi-cosine/checkpoint-9480/config.json b/saves-phi-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3fb5342bea6ae91c8f86ba3e8c07161371845c2a --- /dev/null +++ b/saves-phi-cosine/checkpoint-9480/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "PhiForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "embd_pdrop": 0.0, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 2048, + "model_type": "phi", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "partial_rotary_factor": 0.5, + "qk_layernorm": false, + "resid_pdrop": 0.0, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-phi-cosine/checkpoint-9480/generation_config.json b/saves-phi-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-phi-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-phi-cosine/checkpoint-9480/model.safetensors b/saves-phi-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd161fcef38485525c51230a766c516bff83f1fc --- /dev/null +++ b/saves-phi-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:330f2020c2a3ca5bb655b8694d173dcc16f5e4f0d07a6f34eefb07fc00c8e2f8 +size 7848944 diff --git a/saves-phi-cosine/checkpoint-9480/optimizer.pt b/saves-phi-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..25e233664d9b4c721bee06bee3706eda7b4a0e64 --- /dev/null +++ b/saves-phi-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7781d5c3281f8ab0e108c99758a2b528281141c2c48a69103b42b1aaf33ceb8 +size 15718590 diff --git a/saves-phi-cosine/checkpoint-9480/rng_state.pth b/saves-phi-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-phi-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-phi-cosine/checkpoint-9480/scheduler.pt b/saves-phi-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03c145297021546d40e130546440641e02059bcb --- /dev/null +++ b/saves-phi-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fd617624c087e1a286ed7cf3fa38baa4a8815e49f107c3186b4c7c58e1adbb +size 1064 diff --git a/saves-phi-cosine/checkpoint-9480/special_tokens_map.json b/saves-phi-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-phi-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-phi-cosine/checkpoint-9480/tokenizer.json b/saves-phi-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-phi-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-phi-cosine/checkpoint-9480/tokenizer_config.json b/saves-phi-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-phi-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-phi-cosine/checkpoint-9480/trainer_state.json b/saves-phi-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7a21c8f0401ae329d9dd5a6ef77163f92d98838a --- /dev/null +++ b/saves-phi-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,66393 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00010548523206751055, + "grad_norm": 1.990415334701538, + "learning_rate": 1.5789473684210526e-05, + "loss": 7.6707, + "step": 1 + }, + { + "epoch": 0.0002109704641350211, + "grad_norm": 1.9741954803466797, + "learning_rate": 3.157894736842105e-05, + "loss": 7.6669, + "step": 2 + }, + { + "epoch": 0.00031645569620253165, + "grad_norm": 1.9685741662979126, + "learning_rate": 4.736842105263158e-05, + "loss": 7.6507, + "step": 3 + }, + { + "epoch": 0.0004219409282700422, + "grad_norm": 1.9787766933441162, + "learning_rate": 6.31578947368421e-05, + "loss": 7.604, + "step": 4 + }, + { + "epoch": 0.0005274261603375527, + "grad_norm": 1.9657325744628906, + "learning_rate": 7.894736842105263e-05, + "loss": 7.5442, + "step": 5 + }, + { + "epoch": 0.0006329113924050633, + "grad_norm": 1.790980339050293, + "learning_rate": 9.473684210526316e-05, + "loss": 7.4773, + "step": 6 + }, + { + "epoch": 0.0007383966244725738, + "grad_norm": 1.6637516021728516, + "learning_rate": 0.00011052631578947368, + "loss": 7.3871, + "step": 7 + }, + { + "epoch": 0.0008438818565400844, + "grad_norm": 1.4874807596206665, + "learning_rate": 0.0001263157894736842, + "loss": 7.3051, + "step": 8 + }, + { + "epoch": 0.0009493670886075949, + "grad_norm": 1.3727025985717773, + "learning_rate": 0.00014210526315789474, + "loss": 7.2103, + "step": 9 + }, + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2512125968933105, + "learning_rate": 0.00015789473684210527, + "loss": 7.1339, + "step": 10 + }, + { + "epoch": 0.001160337552742616, + "grad_norm": 1.2013553380966187, + "learning_rate": 0.0001736842105263158, + "loss": 7.0487, + "step": 11 + }, + { + "epoch": 0.0012658227848101266, + "grad_norm": 1.1448709964752197, + "learning_rate": 0.00018947368421052632, + "loss": 6.9755, + "step": 12 + }, + { + "epoch": 0.0013713080168776372, + "grad_norm": 1.1207118034362793, + "learning_rate": 0.00020526315789473685, + "loss": 6.9082, + "step": 13 + }, + { + "epoch": 0.0014767932489451476, + "grad_norm": 1.1159111261367798, + "learning_rate": 0.00022105263157894735, + "loss": 6.8448, + "step": 14 + }, + { + "epoch": 0.0015822784810126582, + "grad_norm": 1.102912425994873, + "learning_rate": 0.00023684210526315788, + "loss": 6.7852, + "step": 15 + }, + { + "epoch": 0.0016877637130801688, + "grad_norm": 1.0928661823272705, + "learning_rate": 0.0002526315789473684, + "loss": 6.7333, + "step": 16 + }, + { + "epoch": 0.0017932489451476794, + "grad_norm": 1.1035737991333008, + "learning_rate": 0.00026842105263157897, + "loss": 6.6568, + "step": 17 + }, + { + "epoch": 0.0018987341772151898, + "grad_norm": 1.0995936393737793, + "learning_rate": 0.00028421052631578947, + "loss": 6.5939, + "step": 18 + }, + { + "epoch": 0.0020042194092827004, + "grad_norm": 1.098261833190918, + "learning_rate": 0.00030000000000000003, + "loss": 6.5169, + "step": 19 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.0713468790054321, + "learning_rate": 0.00031578947368421053, + "loss": 6.4613, + "step": 20 + }, + { + "epoch": 0.0022151898734177216, + "grad_norm": 1.051919937133789, + "learning_rate": 0.00033157894736842103, + "loss": 6.3954, + "step": 21 + }, + { + "epoch": 0.002320675105485232, + "grad_norm": 1.0464094877243042, + "learning_rate": 0.0003473684210526316, + "loss": 6.3273, + "step": 22 + }, + { + "epoch": 0.002426160337552743, + "grad_norm": 1.0348244905471802, + "learning_rate": 0.0003631578947368421, + "loss": 6.252, + "step": 23 + }, + { + "epoch": 0.002531645569620253, + "grad_norm": 1.0217260122299194, + "learning_rate": 0.00037894736842105265, + "loss": 6.1812, + "step": 24 + }, + { + "epoch": 0.0026371308016877636, + "grad_norm": 0.9888567328453064, + "learning_rate": 0.00039473684210526315, + "loss": 6.1338, + "step": 25 + }, + { + "epoch": 0.0027426160337552744, + "grad_norm": 0.9841895699501038, + "learning_rate": 0.0004105263157894737, + "loss": 6.055, + "step": 26 + }, + { + "epoch": 0.002848101265822785, + "grad_norm": 0.935365617275238, + "learning_rate": 0.0004263157894736842, + "loss": 6.02, + "step": 27 + }, + { + "epoch": 0.002953586497890295, + "grad_norm": 0.9254730343818665, + "learning_rate": 0.0004421052631578947, + "loss": 5.9396, + "step": 28 + }, + { + "epoch": 0.003059071729957806, + "grad_norm": 0.9023505449295044, + "learning_rate": 0.00045789473684210527, + "loss": 5.8844, + "step": 29 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8524325489997864, + "learning_rate": 0.00047368421052631577, + "loss": 5.8419, + "step": 30 + }, + { + "epoch": 0.003270042194092827, + "grad_norm": 0.8452946543693542, + "learning_rate": 0.0004894736842105264, + "loss": 5.7782, + "step": 31 + }, + { + "epoch": 0.0033755274261603376, + "grad_norm": 0.8303349614143372, + "learning_rate": 0.0005052631578947368, + "loss": 5.7037, + "step": 32 + }, + { + "epoch": 0.003481012658227848, + "grad_norm": 0.8019928336143494, + "learning_rate": 0.0005210526315789474, + "loss": 5.6378, + "step": 33 + }, + { + "epoch": 0.003586497890295359, + "grad_norm": 0.7660385370254517, + "learning_rate": 0.0005368421052631579, + "loss": 5.6033, + "step": 34 + }, + { + "epoch": 0.003691983122362869, + "grad_norm": 0.776572585105896, + "learning_rate": 0.0005526315789473684, + "loss": 5.5161, + "step": 35 + }, + { + "epoch": 0.0037974683544303796, + "grad_norm": 0.733932375907898, + "learning_rate": 0.0005684210526315789, + "loss": 5.4823, + "step": 36 + }, + { + "epoch": 0.0039029535864978904, + "grad_norm": 0.7159765958786011, + "learning_rate": 0.0005842105263157895, + "loss": 5.4119, + "step": 37 + }, + { + "epoch": 0.004008438818565401, + "grad_norm": 0.6581476926803589, + "learning_rate": 0.0006000000000000001, + "loss": 5.3786, + "step": 38 + }, + { + "epoch": 0.004113924050632912, + "grad_norm": 0.6359902024269104, + "learning_rate": 0.0006157894736842105, + "loss": 5.2869, + "step": 39 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.6128204464912415, + "learning_rate": 0.0006315789473684211, + "loss": 5.264, + "step": 40 + }, + { + "epoch": 0.004324894514767932, + "grad_norm": 0.627169132232666, + "learning_rate": 0.0006473684210526316, + "loss": 5.2221, + "step": 41 + }, + { + "epoch": 0.004430379746835443, + "grad_norm": 1.1887714862823486, + "learning_rate": 0.0006631578947368421, + "loss": 5.1844, + "step": 42 + }, + { + "epoch": 0.004535864978902953, + "grad_norm": 1.191601037979126, + "learning_rate": 0.0006789473684210526, + "loss": 5.1225, + "step": 43 + }, + { + "epoch": 0.004641350210970464, + "grad_norm": 0.7144765853881836, + "learning_rate": 0.0006947368421052632, + "loss": 5.1016, + "step": 44 + }, + { + "epoch": 0.004746835443037975, + "grad_norm": 2.205565929412842, + "learning_rate": 0.0007105263157894736, + "loss": 5.0717, + "step": 45 + }, + { + "epoch": 0.004852320675105486, + "grad_norm": 1.1527762413024902, + "learning_rate": 0.0007263157894736842, + "loss": 4.9826, + "step": 46 + }, + { + "epoch": 0.004957805907172996, + "grad_norm": 0.8625244498252869, + "learning_rate": 0.0007421052631578947, + "loss": 4.9672, + "step": 47 + }, + { + "epoch": 0.005063291139240506, + "grad_norm": 0.8451009392738342, + "learning_rate": 0.0007578947368421053, + "loss": 4.932, + "step": 48 + }, + { + "epoch": 0.005168776371308017, + "grad_norm": 0.41903024911880493, + "learning_rate": 0.0007736842105263159, + "loss": 4.8372, + "step": 49 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.9804922342300415, + "learning_rate": 0.0007894736842105263, + "loss": 4.8434, + "step": 50 + }, + { + "epoch": 0.005379746835443038, + "grad_norm": 0.4249635338783264, + "learning_rate": 0.0008052631578947369, + "loss": 4.7696, + "step": 51 + }, + { + "epoch": 0.005485232067510549, + "grad_norm": 0.527254581451416, + "learning_rate": 0.0008210526315789474, + "loss": 4.7344, + "step": 52 + }, + { + "epoch": 0.005590717299578059, + "grad_norm": 0.4901870787143707, + "learning_rate": 0.0008368421052631579, + "loss": 4.7035, + "step": 53 + }, + { + "epoch": 0.00569620253164557, + "grad_norm": 0.4018175005912781, + "learning_rate": 0.0008526315789473684, + "loss": 4.6306, + "step": 54 + }, + { + "epoch": 0.0058016877637130804, + "grad_norm": 0.5346843600273132, + "learning_rate": 0.000868421052631579, + "loss": 4.6215, + "step": 55 + }, + { + "epoch": 0.00590717299578059, + "grad_norm": 0.3589933216571808, + "learning_rate": 0.0008842105263157894, + "loss": 4.5553, + "step": 56 + }, + { + "epoch": 0.006012658227848101, + "grad_norm": 0.43189701437950134, + "learning_rate": 0.0009, + "loss": 4.5649, + "step": 57 + }, + { + "epoch": 0.006118143459915612, + "grad_norm": 0.3390350639820099, + "learning_rate": 0.0009157894736842105, + "loss": 4.544, + "step": 58 + }, + { + "epoch": 0.006223628691983122, + "grad_norm": 0.3437258005142212, + "learning_rate": 0.0009315789473684211, + "loss": 4.4919, + "step": 59 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.44064611196517944, + "learning_rate": 0.0009473684210526315, + "loss": 4.4508, + "step": 60 + }, + { + "epoch": 0.006434599156118144, + "grad_norm": 0.3137136399745941, + "learning_rate": 0.0009631578947368421, + "loss": 4.4383, + "step": 61 + }, + { + "epoch": 0.006540084388185654, + "grad_norm": 0.3484095633029938, + "learning_rate": 0.0009789473684210528, + "loss": 4.39, + "step": 62 + }, + { + "epoch": 0.006645569620253164, + "grad_norm": 0.3365086019039154, + "learning_rate": 0.000994736842105263, + "loss": 4.3781, + "step": 63 + }, + { + "epoch": 0.006751054852320675, + "grad_norm": 0.32437974214553833, + "learning_rate": 0.0010105263157894737, + "loss": 4.3366, + "step": 64 + }, + { + "epoch": 0.006856540084388186, + "grad_norm": 0.3608114719390869, + "learning_rate": 0.0010263157894736842, + "loss": 4.3388, + "step": 65 + }, + { + "epoch": 0.006962025316455696, + "grad_norm": 0.3221237063407898, + "learning_rate": 0.0010421052631578948, + "loss": 4.2861, + "step": 66 + }, + { + "epoch": 0.007067510548523207, + "grad_norm": 0.31596195697784424, + "learning_rate": 0.0010578947368421053, + "loss": 4.2857, + "step": 67 + }, + { + "epoch": 0.007172995780590718, + "grad_norm": 0.37636852264404297, + "learning_rate": 0.0010736842105263159, + "loss": 4.2667, + "step": 68 + }, + { + "epoch": 0.007278481012658228, + "grad_norm": 0.36961105465888977, + "learning_rate": 0.0010894736842105264, + "loss": 4.2058, + "step": 69 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.49518004059791565, + "learning_rate": 0.0011052631578947368, + "loss": 4.1927, + "step": 70 + }, + { + "epoch": 0.007489451476793249, + "grad_norm": 0.7145194411277771, + "learning_rate": 0.0011210526315789473, + "loss": 4.2269, + "step": 71 + }, + { + "epoch": 0.007594936708860759, + "grad_norm": 0.8915210962295532, + "learning_rate": 0.0011368421052631579, + "loss": 4.1911, + "step": 72 + }, + { + "epoch": 0.00770042194092827, + "grad_norm": 0.6716008186340332, + "learning_rate": 0.0011526315789473684, + "loss": 4.1549, + "step": 73 + }, + { + "epoch": 0.007805907172995781, + "grad_norm": 0.3761642277240753, + "learning_rate": 0.001168421052631579, + "loss": 4.1325, + "step": 74 + }, + { + "epoch": 0.007911392405063292, + "grad_norm": 0.6363518834114075, + "learning_rate": 0.0011842105263157896, + "loss": 4.1121, + "step": 75 + }, + { + "epoch": 0.008016877637130802, + "grad_norm": 0.46860456466674805, + "learning_rate": 0.0012000000000000001, + "loss": 4.1161, + "step": 76 + }, + { + "epoch": 0.008122362869198312, + "grad_norm": 0.42861828207969666, + "learning_rate": 0.0012157894736842105, + "loss": 4.0976, + "step": 77 + }, + { + "epoch": 0.008227848101265823, + "grad_norm": 0.45418357849121094, + "learning_rate": 0.001231578947368421, + "loss": 4.0809, + "step": 78 + }, + { + "epoch": 0.008333333333333333, + "grad_norm": 0.4758206605911255, + "learning_rate": 0.0012473684210526316, + "loss": 4.06, + "step": 79 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.6014159917831421, + "learning_rate": 0.0012631578947368421, + "loss": 4.0618, + "step": 80 + }, + { + "epoch": 0.008544303797468355, + "grad_norm": 0.8716511726379395, + "learning_rate": 0.0012789473684210527, + "loss": 4.0434, + "step": 81 + }, + { + "epoch": 0.008649789029535865, + "grad_norm": 0.6775972843170166, + "learning_rate": 0.0012947368421052632, + "loss": 4.0253, + "step": 82 + }, + { + "epoch": 0.008755274261603375, + "grad_norm": 0.5388532876968384, + "learning_rate": 0.0013105263157894738, + "loss": 3.9972, + "step": 83 + }, + { + "epoch": 0.008860759493670886, + "grad_norm": 0.5995179414749146, + "learning_rate": 0.0013263157894736841, + "loss": 3.9782, + "step": 84 + }, + { + "epoch": 0.008966244725738396, + "grad_norm": 0.4843006134033203, + "learning_rate": 0.0013421052631578947, + "loss": 3.9588, + "step": 85 + }, + { + "epoch": 0.009071729957805906, + "grad_norm": 0.5736069083213806, + "learning_rate": 0.0013578947368421052, + "loss": 3.9837, + "step": 86 + }, + { + "epoch": 0.009177215189873418, + "grad_norm": 0.6417155265808105, + "learning_rate": 0.0013736842105263158, + "loss": 3.9725, + "step": 87 + }, + { + "epoch": 0.009282700421940928, + "grad_norm": 0.7312027812004089, + "learning_rate": 0.0013894736842105264, + "loss": 3.9049, + "step": 88 + }, + { + "epoch": 0.009388185654008438, + "grad_norm": 0.7139880657196045, + "learning_rate": 0.001405263157894737, + "loss": 3.9169, + "step": 89 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.6410272121429443, + "learning_rate": 0.0014210526315789472, + "loss": 3.9173, + "step": 90 + }, + { + "epoch": 0.00959915611814346, + "grad_norm": 0.5165127515792847, + "learning_rate": 0.0014368421052631578, + "loss": 3.9041, + "step": 91 + }, + { + "epoch": 0.009704641350210971, + "grad_norm": 0.47067081928253174, + "learning_rate": 0.0014526315789473684, + "loss": 3.8834, + "step": 92 + }, + { + "epoch": 0.009810126582278481, + "grad_norm": 0.5926494598388672, + "learning_rate": 0.0014684210526315791, + "loss": 3.8716, + "step": 93 + }, + { + "epoch": 0.009915611814345991, + "grad_norm": 0.487052857875824, + "learning_rate": 0.0014842105263157895, + "loss": 3.9004, + "step": 94 + }, + { + "epoch": 0.010021097046413503, + "grad_norm": 0.6300457119941711, + "learning_rate": 0.0015, + "loss": 3.8626, + "step": 95 + }, + { + "epoch": 0.010126582278481013, + "grad_norm": 0.7769326567649841, + "learning_rate": 0.00149999995797938, + "loss": 3.8626, + "step": 96 + }, + { + "epoch": 0.010232067510548523, + "grad_norm": 0.7663260102272034, + "learning_rate": 0.001499999831917525, + "loss": 3.8353, + "step": 97 + }, + { + "epoch": 0.010337552742616034, + "grad_norm": 0.6994576454162598, + "learning_rate": 0.001499999621814449, + "loss": 3.8262, + "step": 98 + }, + { + "epoch": 0.010443037974683544, + "grad_norm": 0.6600782871246338, + "learning_rate": 0.0014999993276701756, + "loss": 3.8537, + "step": 99 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.7549358010292053, + "learning_rate": 0.0014999989494847376, + "loss": 3.8076, + "step": 100 + }, + { + "epoch": 0.010654008438818566, + "grad_norm": 0.7750245928764343, + "learning_rate": 0.0014999984872581774, + "loss": 3.8211, + "step": 101 + }, + { + "epoch": 0.010759493670886076, + "grad_norm": 0.5864017009735107, + "learning_rate": 0.0014999979409905469, + "loss": 3.7621, + "step": 102 + }, + { + "epoch": 0.010864978902953586, + "grad_norm": 0.6484239101409912, + "learning_rate": 0.0014999973106819074, + "loss": 3.7591, + "step": 103 + }, + { + "epoch": 0.010970464135021098, + "grad_norm": 0.45239436626434326, + "learning_rate": 0.0014999965963323294, + "loss": 3.744, + "step": 104 + }, + { + "epoch": 0.011075949367088608, + "grad_norm": 0.5323035717010498, + "learning_rate": 0.0014999957979418927, + "loss": 3.735, + "step": 105 + }, + { + "epoch": 0.011181434599156118, + "grad_norm": 0.4015861451625824, + "learning_rate": 0.0014999949155106874, + "loss": 3.7284, + "step": 106 + }, + { + "epoch": 0.01128691983122363, + "grad_norm": 0.46352314949035645, + "learning_rate": 0.0014999939490388115, + "loss": 3.7302, + "step": 107 + }, + { + "epoch": 0.01139240506329114, + "grad_norm": 0.4921068251132965, + "learning_rate": 0.0014999928985263743, + "loss": 3.7096, + "step": 108 + }, + { + "epoch": 0.01149789029535865, + "grad_norm": 0.8228769898414612, + "learning_rate": 0.001499991763973493, + "loss": 3.7339, + "step": 109 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 1.0287593603134155, + "learning_rate": 0.0014999905453802946, + "loss": 3.7138, + "step": 110 + }, + { + "epoch": 0.01170886075949367, + "grad_norm": 1.05591881275177, + "learning_rate": 0.0014999892427469156, + "loss": 3.7261, + "step": 111 + }, + { + "epoch": 0.01181434599156118, + "grad_norm": 1.099137306213379, + "learning_rate": 0.0014999878560735024, + "loss": 3.7201, + "step": 112 + }, + { + "epoch": 0.011919831223628692, + "grad_norm": 0.7215378880500793, + "learning_rate": 0.0014999863853602101, + "loss": 3.6798, + "step": 113 + }, + { + "epoch": 0.012025316455696202, + "grad_norm": 0.6717193126678467, + "learning_rate": 0.0014999848306072037, + "loss": 3.6975, + "step": 114 + }, + { + "epoch": 0.012130801687763712, + "grad_norm": 0.6363789439201355, + "learning_rate": 0.0014999831918146571, + "loss": 3.6874, + "step": 115 + }, + { + "epoch": 0.012236286919831224, + "grad_norm": 0.5084103941917419, + "learning_rate": 0.001499981468982754, + "loss": 3.6607, + "step": 116 + }, + { + "epoch": 0.012341772151898734, + "grad_norm": 0.6182293891906738, + "learning_rate": 0.001499979662111688, + "loss": 3.6737, + "step": 117 + }, + { + "epoch": 0.012447257383966244, + "grad_norm": 0.7704952955245972, + "learning_rate": 0.0014999777712016607, + "loss": 3.662, + "step": 118 + }, + { + "epoch": 0.012552742616033756, + "grad_norm": 0.5551393032073975, + "learning_rate": 0.0014999757962528846, + "loss": 3.6039, + "step": 119 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.5525965690612793, + "learning_rate": 0.0014999737372655805, + "loss": 3.6334, + "step": 120 + }, + { + "epoch": 0.012763713080168776, + "grad_norm": 0.4860827326774597, + "learning_rate": 0.0014999715942399798, + "loss": 3.6333, + "step": 121 + }, + { + "epoch": 0.012869198312236287, + "grad_norm": 0.4586487114429474, + "learning_rate": 0.001499969367176322, + "loss": 3.6184, + "step": 122 + }, + { + "epoch": 0.012974683544303797, + "grad_norm": 0.5613583326339722, + "learning_rate": 0.0014999670560748573, + "loss": 3.5876, + "step": 123 + }, + { + "epoch": 0.013080168776371307, + "grad_norm": 0.48275187611579895, + "learning_rate": 0.001499964660935844, + "loss": 3.5721, + "step": 124 + }, + { + "epoch": 0.013185654008438819, + "grad_norm": 0.4742601215839386, + "learning_rate": 0.0014999621817595509, + "loss": 3.5958, + "step": 125 + }, + { + "epoch": 0.013291139240506329, + "grad_norm": 0.5116722583770752, + "learning_rate": 0.0014999596185462556, + "loss": 3.5748, + "step": 126 + }, + { + "epoch": 0.01339662447257384, + "grad_norm": 0.4690430462360382, + "learning_rate": 0.0014999569712962452, + "loss": 3.5826, + "step": 127 + }, + { + "epoch": 0.01350210970464135, + "grad_norm": 0.5590510964393616, + "learning_rate": 0.0014999542400098169, + "loss": 3.5541, + "step": 128 + }, + { + "epoch": 0.01360759493670886, + "grad_norm": 0.47909513115882874, + "learning_rate": 0.0014999514246872762, + "loss": 3.5416, + "step": 129 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.48453688621520996, + "learning_rate": 0.0014999485253289388, + "loss": 3.5633, + "step": 130 + }, + { + "epoch": 0.013818565400843882, + "grad_norm": 0.5796778202056885, + "learning_rate": 0.0014999455419351297, + "loss": 3.5162, + "step": 131 + }, + { + "epoch": 0.013924050632911392, + "grad_norm": 0.701404869556427, + "learning_rate": 0.001499942474506183, + "loss": 3.5705, + "step": 132 + }, + { + "epoch": 0.014029535864978904, + "grad_norm": 0.6439765691757202, + "learning_rate": 0.0014999393230424422, + "loss": 3.5539, + "step": 133 + }, + { + "epoch": 0.014135021097046414, + "grad_norm": 0.6641003489494324, + "learning_rate": 0.001499936087544261, + "loss": 3.4995, + "step": 134 + }, + { + "epoch": 0.014240506329113924, + "grad_norm": 0.6322571635246277, + "learning_rate": 0.001499932768012002, + "loss": 3.5126, + "step": 135 + }, + { + "epoch": 0.014345991561181435, + "grad_norm": 0.48219600319862366, + "learning_rate": 0.0014999293644460362, + "loss": 3.5008, + "step": 136 + }, + { + "epoch": 0.014451476793248945, + "grad_norm": 0.4121226370334625, + "learning_rate": 0.0014999258768467459, + "loss": 3.502, + "step": 137 + }, + { + "epoch": 0.014556962025316455, + "grad_norm": 0.5118046402931213, + "learning_rate": 0.0014999223052145215, + "loss": 3.4572, + "step": 138 + }, + { + "epoch": 0.014662447257383967, + "grad_norm": 0.49675849080085754, + "learning_rate": 0.0014999186495497636, + "loss": 3.46, + "step": 139 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.6869795322418213, + "learning_rate": 0.0014999149098528814, + "loss": 3.4623, + "step": 140 + }, + { + "epoch": 0.014873417721518987, + "grad_norm": 0.7666463255882263, + "learning_rate": 0.0014999110861242944, + "loss": 3.4745, + "step": 141 + }, + { + "epoch": 0.014978902953586498, + "grad_norm": 0.7772156000137329, + "learning_rate": 0.0014999071783644306, + "loss": 3.4655, + "step": 142 + }, + { + "epoch": 0.015084388185654008, + "grad_norm": 0.9682581424713135, + "learning_rate": 0.001499903186573728, + "loss": 3.4656, + "step": 143 + }, + { + "epoch": 0.015189873417721518, + "grad_norm": 0.9083653688430786, + "learning_rate": 0.001499899110752634, + "loss": 3.4741, + "step": 144 + }, + { + "epoch": 0.01529535864978903, + "grad_norm": 0.6573701500892639, + "learning_rate": 0.0014998949509016054, + "loss": 3.4757, + "step": 145 + }, + { + "epoch": 0.01540084388185654, + "grad_norm": 0.5613637566566467, + "learning_rate": 0.0014998907070211084, + "loss": 3.4266, + "step": 146 + }, + { + "epoch": 0.01550632911392405, + "grad_norm": 0.5330507159233093, + "learning_rate": 0.0014998863791116182, + "loss": 3.4628, + "step": 147 + }, + { + "epoch": 0.015611814345991562, + "grad_norm": 0.5010958313941956, + "learning_rate": 0.0014998819671736198, + "loss": 3.4021, + "step": 148 + }, + { + "epoch": 0.015717299578059073, + "grad_norm": 0.6742215752601624, + "learning_rate": 0.001499877471207608, + "loss": 3.4297, + "step": 149 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.8106435537338257, + "learning_rate": 0.0014998728912140862, + "loss": 3.4295, + "step": 150 + }, + { + "epoch": 0.015928270042194093, + "grad_norm": 0.7103162407875061, + "learning_rate": 0.0014998682271935677, + "loss": 3.4541, + "step": 151 + }, + { + "epoch": 0.016033755274261603, + "grad_norm": 0.4184955358505249, + "learning_rate": 0.0014998634791465752, + "loss": 3.3775, + "step": 152 + }, + { + "epoch": 0.016139240506329113, + "grad_norm": 0.5855023860931396, + "learning_rate": 0.001499858647073641, + "loss": 3.437, + "step": 153 + }, + { + "epoch": 0.016244725738396623, + "grad_norm": 0.48243358731269836, + "learning_rate": 0.0014998537309753057, + "loss": 3.3742, + "step": 154 + }, + { + "epoch": 0.016350210970464137, + "grad_norm": 0.49330422282218933, + "learning_rate": 0.001499848730852121, + "loss": 3.397, + "step": 155 + }, + { + "epoch": 0.016455696202531647, + "grad_norm": 0.5432450175285339, + "learning_rate": 0.001499843646704647, + "loss": 3.3816, + "step": 156 + }, + { + "epoch": 0.016561181434599156, + "grad_norm": 0.5061481595039368, + "learning_rate": 0.0014998384785334532, + "loss": 3.3895, + "step": 157 + }, + { + "epoch": 0.016666666666666666, + "grad_norm": 0.4866331219673157, + "learning_rate": 0.0014998332263391192, + "loss": 3.3579, + "step": 158 + }, + { + "epoch": 0.016772151898734176, + "grad_norm": 0.4874270260334015, + "learning_rate": 0.0014998278901222327, + "loss": 3.3753, + "step": 159 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.6459382772445679, + "learning_rate": 0.0014998224698833922, + "loss": 3.3709, + "step": 160 + }, + { + "epoch": 0.0169831223628692, + "grad_norm": 0.904331386089325, + "learning_rate": 0.0014998169656232053, + "loss": 3.3368, + "step": 161 + }, + { + "epoch": 0.01708860759493671, + "grad_norm": 0.9735814929008484, + "learning_rate": 0.0014998113773422883, + "loss": 3.3958, + "step": 162 + }, + { + "epoch": 0.01719409282700422, + "grad_norm": 0.7412448525428772, + "learning_rate": 0.0014998057050412674, + "loss": 3.3683, + "step": 163 + }, + { + "epoch": 0.01729957805907173, + "grad_norm": 0.47699683904647827, + "learning_rate": 0.0014997999487207786, + "loss": 3.3364, + "step": 164 + }, + { + "epoch": 0.01740506329113924, + "grad_norm": 0.6547554731369019, + "learning_rate": 0.0014997941083814666, + "loss": 3.3576, + "step": 165 + }, + { + "epoch": 0.01751054852320675, + "grad_norm": 0.6138315796852112, + "learning_rate": 0.001499788184023986, + "loss": 3.3428, + "step": 166 + }, + { + "epoch": 0.017616033755274263, + "grad_norm": 0.617776095867157, + "learning_rate": 0.0014997821756490008, + "loss": 3.309, + "step": 167 + }, + { + "epoch": 0.017721518987341773, + "grad_norm": 0.5150807499885559, + "learning_rate": 0.0014997760832571839, + "loss": 3.3011, + "step": 168 + }, + { + "epoch": 0.017827004219409283, + "grad_norm": 0.4819340705871582, + "learning_rate": 0.001499769906849218, + "loss": 3.2886, + "step": 169 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.5429812073707581, + "learning_rate": 0.0014997636464257956, + "loss": 3.3079, + "step": 170 + }, + { + "epoch": 0.018037974683544303, + "grad_norm": 0.5139880180358887, + "learning_rate": 0.0014997573019876179, + "loss": 3.2756, + "step": 171 + }, + { + "epoch": 0.018143459915611813, + "grad_norm": 0.47588396072387695, + "learning_rate": 0.0014997508735353957, + "loss": 3.3249, + "step": 172 + }, + { + "epoch": 0.018248945147679326, + "grad_norm": 0.49224913120269775, + "learning_rate": 0.0014997443610698497, + "loss": 3.3006, + "step": 173 + }, + { + "epoch": 0.018354430379746836, + "grad_norm": 0.46535786986351013, + "learning_rate": 0.0014997377645917095, + "loss": 3.2611, + "step": 174 + }, + { + "epoch": 0.018459915611814346, + "grad_norm": 0.5687915086746216, + "learning_rate": 0.001499731084101714, + "loss": 3.3163, + "step": 175 + }, + { + "epoch": 0.018565400843881856, + "grad_norm": 0.6074397563934326, + "learning_rate": 0.0014997243196006125, + "loss": 3.2931, + "step": 176 + }, + { + "epoch": 0.018670886075949366, + "grad_norm": 0.6521103978157043, + "learning_rate": 0.001499717471089162, + "loss": 3.2967, + "step": 177 + }, + { + "epoch": 0.018776371308016876, + "grad_norm": 0.6388331055641174, + "learning_rate": 0.0014997105385681306, + "loss": 3.284, + "step": 178 + }, + { + "epoch": 0.01888185654008439, + "grad_norm": 0.5434625148773193, + "learning_rate": 0.001499703522038295, + "loss": 3.2957, + "step": 179 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.592345654964447, + "learning_rate": 0.0014996964215004416, + "loss": 3.2737, + "step": 180 + }, + { + "epoch": 0.01909282700421941, + "grad_norm": 0.541386604309082, + "learning_rate": 0.0014996892369553655, + "loss": 3.2806, + "step": 181 + }, + { + "epoch": 0.01919831223628692, + "grad_norm": 0.5269626379013062, + "learning_rate": 0.0014996819684038726, + "loss": 3.2612, + "step": 182 + }, + { + "epoch": 0.01930379746835443, + "grad_norm": 0.4950464963912964, + "learning_rate": 0.0014996746158467762, + "loss": 3.2335, + "step": 183 + }, + { + "epoch": 0.019409282700421943, + "grad_norm": 0.5476550459861755, + "learning_rate": 0.0014996671792849015, + "loss": 3.2668, + "step": 184 + }, + { + "epoch": 0.019514767932489453, + "grad_norm": 0.596758246421814, + "learning_rate": 0.001499659658719081, + "loss": 3.2177, + "step": 185 + }, + { + "epoch": 0.019620253164556962, + "grad_norm": 0.5562368631362915, + "learning_rate": 0.0014996520541501574, + "loss": 3.2107, + "step": 186 + }, + { + "epoch": 0.019725738396624472, + "grad_norm": 0.6264805793762207, + "learning_rate": 0.0014996443655789832, + "loss": 3.2007, + "step": 187 + }, + { + "epoch": 0.019831223628691982, + "grad_norm": 0.5780759453773499, + "learning_rate": 0.0014996365930064197, + "loss": 3.1927, + "step": 188 + }, + { + "epoch": 0.019936708860759492, + "grad_norm": 0.880408525466919, + "learning_rate": 0.001499628736433338, + "loss": 3.2141, + "step": 189 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 1.03953218460083, + "learning_rate": 0.0014996207958606182, + "loss": 3.2586, + "step": 190 + }, + { + "epoch": 0.020147679324894516, + "grad_norm": 0.8846624493598938, + "learning_rate": 0.0014996127712891504, + "loss": 3.2022, + "step": 191 + }, + { + "epoch": 0.020253164556962026, + "grad_norm": 0.6752599477767944, + "learning_rate": 0.0014996046627198337, + "loss": 3.2356, + "step": 192 + }, + { + "epoch": 0.020358649789029536, + "grad_norm": 0.5756528377532959, + "learning_rate": 0.0014995964701535768, + "loss": 3.1684, + "step": 193 + }, + { + "epoch": 0.020464135021097046, + "grad_norm": 0.809370756149292, + "learning_rate": 0.0014995881935912973, + "loss": 3.218, + "step": 194 + }, + { + "epoch": 0.020569620253164556, + "grad_norm": 0.7209935784339905, + "learning_rate": 0.0014995798330339233, + "loss": 3.1994, + "step": 195 + }, + { + "epoch": 0.02067510548523207, + "grad_norm": 0.6657562851905823, + "learning_rate": 0.001499571388482391, + "loss": 3.1865, + "step": 196 + }, + { + "epoch": 0.02078059071729958, + "grad_norm": 0.6898912787437439, + "learning_rate": 0.001499562859937647, + "loss": 3.185, + "step": 197 + }, + { + "epoch": 0.02088607594936709, + "grad_norm": 0.7692835330963135, + "learning_rate": 0.001499554247400647, + "loss": 3.2082, + "step": 198 + }, + { + "epoch": 0.0209915611814346, + "grad_norm": 0.6726500988006592, + "learning_rate": 0.0014995455508723557, + "loss": 3.1947, + "step": 199 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.6024327874183655, + "learning_rate": 0.001499536770353748, + "loss": 3.1419, + "step": 200 + }, + { + "epoch": 0.02120253164556962, + "grad_norm": 0.742987334728241, + "learning_rate": 0.0014995279058458075, + "loss": 3.1921, + "step": 201 + }, + { + "epoch": 0.021308016877637132, + "grad_norm": 0.7170171141624451, + "learning_rate": 0.001499518957349528, + "loss": 3.1781, + "step": 202 + }, + { + "epoch": 0.021413502109704642, + "grad_norm": 0.7441462278366089, + "learning_rate": 0.0014995099248659115, + "loss": 3.1698, + "step": 203 + }, + { + "epoch": 0.021518987341772152, + "grad_norm": 0.6891679763793945, + "learning_rate": 0.001499500808395971, + "loss": 3.1577, + "step": 204 + }, + { + "epoch": 0.021624472573839662, + "grad_norm": 0.6226285099983215, + "learning_rate": 0.0014994916079407272, + "loss": 3.1417, + "step": 205 + }, + { + "epoch": 0.021729957805907172, + "grad_norm": 0.5427891612052917, + "learning_rate": 0.0014994823235012114, + "loss": 3.1171, + "step": 206 + }, + { + "epoch": 0.021835443037974682, + "grad_norm": 0.5152313113212585, + "learning_rate": 0.0014994729550784642, + "loss": 3.1468, + "step": 207 + }, + { + "epoch": 0.021940928270042195, + "grad_norm": 0.5553493499755859, + "learning_rate": 0.001499463502673535, + "loss": 3.1194, + "step": 208 + }, + { + "epoch": 0.022046413502109705, + "grad_norm": 0.6156442761421204, + "learning_rate": 0.0014994539662874832, + "loss": 3.1277, + "step": 209 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.5608401298522949, + "learning_rate": 0.0014994443459213774, + "loss": 3.1716, + "step": 210 + }, + { + "epoch": 0.022257383966244725, + "grad_norm": 0.495272159576416, + "learning_rate": 0.0014994346415762956, + "loss": 3.1317, + "step": 211 + }, + { + "epoch": 0.022362869198312235, + "grad_norm": 0.5021925568580627, + "learning_rate": 0.0014994248532533253, + "loss": 3.1276, + "step": 212 + }, + { + "epoch": 0.022468354430379745, + "grad_norm": 0.573703408241272, + "learning_rate": 0.001499414980953563, + "loss": 3.1528, + "step": 213 + }, + { + "epoch": 0.02257383966244726, + "grad_norm": 0.5174473524093628, + "learning_rate": 0.0014994050246781153, + "loss": 3.1204, + "step": 214 + }, + { + "epoch": 0.02267932489451477, + "grad_norm": 0.4991498589515686, + "learning_rate": 0.0014993949844280977, + "loss": 3.0893, + "step": 215 + }, + { + "epoch": 0.02278481012658228, + "grad_norm": 0.4799327850341797, + "learning_rate": 0.0014993848602046355, + "loss": 3.108, + "step": 216 + }, + { + "epoch": 0.02289029535864979, + "grad_norm": 0.4871271550655365, + "learning_rate": 0.0014993746520088626, + "loss": 3.101, + "step": 217 + }, + { + "epoch": 0.0229957805907173, + "grad_norm": 0.5894774198532104, + "learning_rate": 0.0014993643598419234, + "loss": 3.1258, + "step": 218 + }, + { + "epoch": 0.023101265822784812, + "grad_norm": 0.6969600915908813, + "learning_rate": 0.0014993539837049707, + "loss": 3.1216, + "step": 219 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.659087061882019, + "learning_rate": 0.001499343523599168, + "loss": 3.0849, + "step": 220 + }, + { + "epoch": 0.02331223628691983, + "grad_norm": 0.5225766897201538, + "learning_rate": 0.0014993329795256864, + "loss": 3.0863, + "step": 221 + }, + { + "epoch": 0.02341772151898734, + "grad_norm": 0.5317411422729492, + "learning_rate": 0.0014993223514857081, + "loss": 3.0615, + "step": 222 + }, + { + "epoch": 0.02352320675105485, + "grad_norm": 0.6747162938117981, + "learning_rate": 0.001499311639480424, + "loss": 3.0777, + "step": 223 + }, + { + "epoch": 0.02362869198312236, + "grad_norm": 0.6818360686302185, + "learning_rate": 0.0014993008435110345, + "loss": 3.054, + "step": 224 + }, + { + "epoch": 0.023734177215189875, + "grad_norm": 0.8184267282485962, + "learning_rate": 0.0014992899635787487, + "loss": 3.0529, + "step": 225 + }, + { + "epoch": 0.023839662447257385, + "grad_norm": 1.196481466293335, + "learning_rate": 0.0014992789996847863, + "loss": 3.1274, + "step": 226 + }, + { + "epoch": 0.023945147679324895, + "grad_norm": 0.8492373824119568, + "learning_rate": 0.0014992679518303761, + "loss": 3.0576, + "step": 227 + }, + { + "epoch": 0.024050632911392405, + "grad_norm": 0.6035951375961304, + "learning_rate": 0.001499256820016755, + "loss": 3.0554, + "step": 228 + }, + { + "epoch": 0.024156118143459915, + "grad_norm": 0.6859185099601746, + "learning_rate": 0.0014992456042451717, + "loss": 3.0444, + "step": 229 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.7136833667755127, + "learning_rate": 0.0014992343045168823, + "loss": 3.0657, + "step": 230 + }, + { + "epoch": 0.024367088607594938, + "grad_norm": 0.7372051477432251, + "learning_rate": 0.0014992229208331527, + "loss": 3.0459, + "step": 231 + }, + { + "epoch": 0.024472573839662448, + "grad_norm": 0.6096509695053101, + "learning_rate": 0.0014992114531952592, + "loss": 3.0545, + "step": 232 + }, + { + "epoch": 0.024578059071729958, + "grad_norm": 0.6039866805076599, + "learning_rate": 0.0014991999016044865, + "loss": 3.0089, + "step": 233 + }, + { + "epoch": 0.024683544303797468, + "grad_norm": 0.5600811839103699, + "learning_rate": 0.0014991882660621285, + "loss": 3.0591, + "step": 234 + }, + { + "epoch": 0.024789029535864978, + "grad_norm": 0.6002296209335327, + "learning_rate": 0.0014991765465694898, + "loss": 2.9962, + "step": 235 + }, + { + "epoch": 0.024894514767932488, + "grad_norm": 0.7531085014343262, + "learning_rate": 0.0014991647431278835, + "loss": 3.0268, + "step": 236 + }, + { + "epoch": 0.025, + "grad_norm": 0.7606476545333862, + "learning_rate": 0.001499152855738632, + "loss": 3.0123, + "step": 237 + }, + { + "epoch": 0.02510548523206751, + "grad_norm": 0.6565594673156738, + "learning_rate": 0.0014991408844030672, + "loss": 2.9934, + "step": 238 + }, + { + "epoch": 0.02521097046413502, + "grad_norm": 0.5720810294151306, + "learning_rate": 0.0014991288291225308, + "loss": 3.0188, + "step": 239 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.5903963446617126, + "learning_rate": 0.0014991166898983739, + "loss": 3.0379, + "step": 240 + }, + { + "epoch": 0.02542194092827004, + "grad_norm": 0.6835680603981018, + "learning_rate": 0.001499104466731956, + "loss": 2.9829, + "step": 241 + }, + { + "epoch": 0.02552742616033755, + "grad_norm": 0.5755590796470642, + "learning_rate": 0.0014990921596246475, + "loss": 3.0058, + "step": 242 + }, + { + "epoch": 0.025632911392405065, + "grad_norm": 0.7185651659965515, + "learning_rate": 0.0014990797685778272, + "loss": 3.031, + "step": 243 + }, + { + "epoch": 0.025738396624472575, + "grad_norm": 0.8377906680107117, + "learning_rate": 0.0014990672935928835, + "loss": 3.0216, + "step": 244 + }, + { + "epoch": 0.025843881856540084, + "grad_norm": 0.9580426812171936, + "learning_rate": 0.0014990547346712144, + "loss": 3.0079, + "step": 245 + }, + { + "epoch": 0.025949367088607594, + "grad_norm": 0.853093147277832, + "learning_rate": 0.0014990420918142271, + "loss": 3.005, + "step": 246 + }, + { + "epoch": 0.026054852320675104, + "grad_norm": 0.6089229583740234, + "learning_rate": 0.0014990293650233384, + "loss": 2.954, + "step": 247 + }, + { + "epoch": 0.026160337552742614, + "grad_norm": 0.5856234431266785, + "learning_rate": 0.0014990165542999746, + "loss": 3.0114, + "step": 248 + }, + { + "epoch": 0.026265822784810128, + "grad_norm": 0.6749204397201538, + "learning_rate": 0.0014990036596455706, + "loss": 2.9684, + "step": 249 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.5574491024017334, + "learning_rate": 0.001498990681061572, + "loss": 2.9459, + "step": 250 + }, + { + "epoch": 0.026476793248945148, + "grad_norm": 0.6993549466133118, + "learning_rate": 0.0014989776185494322, + "loss": 2.9919, + "step": 251 + }, + { + "epoch": 0.026582278481012658, + "grad_norm": 0.7620849609375, + "learning_rate": 0.001498964472110616, + "loss": 2.9645, + "step": 252 + }, + { + "epoch": 0.026687763713080168, + "grad_norm": 0.5584017634391785, + "learning_rate": 0.001498951241746596, + "loss": 2.9632, + "step": 253 + }, + { + "epoch": 0.02679324894514768, + "grad_norm": 0.5731980204582214, + "learning_rate": 0.0014989379274588546, + "loss": 2.9469, + "step": 254 + }, + { + "epoch": 0.02689873417721519, + "grad_norm": 0.6176680326461792, + "learning_rate": 0.0014989245292488839, + "loss": 2.9433, + "step": 255 + }, + { + "epoch": 0.0270042194092827, + "grad_norm": 0.7660251259803772, + "learning_rate": 0.0014989110471181853, + "loss": 2.9068, + "step": 256 + }, + { + "epoch": 0.02710970464135021, + "grad_norm": 0.8521888852119446, + "learning_rate": 0.0014988974810682695, + "loss": 2.9631, + "step": 257 + }, + { + "epoch": 0.02721518987341772, + "grad_norm": 0.7631460428237915, + "learning_rate": 0.0014988838311006565, + "loss": 2.971, + "step": 258 + }, + { + "epoch": 0.02732067510548523, + "grad_norm": 0.5169890522956848, + "learning_rate": 0.0014988700972168758, + "loss": 2.961, + "step": 259 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.688254714012146, + "learning_rate": 0.001498856279418467, + "loss": 2.9099, + "step": 260 + }, + { + "epoch": 0.027531645569620254, + "grad_norm": 0.5732064247131348, + "learning_rate": 0.0014988423777069775, + "loss": 2.9405, + "step": 261 + }, + { + "epoch": 0.027637130801687764, + "grad_norm": 0.693707287311554, + "learning_rate": 0.0014988283920839658, + "loss": 2.9202, + "step": 262 + }, + { + "epoch": 0.027742616033755274, + "grad_norm": 0.8059178590774536, + "learning_rate": 0.0014988143225509983, + "loss": 2.9249, + "step": 263 + }, + { + "epoch": 0.027848101265822784, + "grad_norm": 0.8680794835090637, + "learning_rate": 0.0014988001691096525, + "loss": 2.9374, + "step": 264 + }, + { + "epoch": 0.027953586497890294, + "grad_norm": 0.8296427130699158, + "learning_rate": 0.0014987859317615137, + "loss": 2.9336, + "step": 265 + }, + { + "epoch": 0.028059071729957807, + "grad_norm": 0.6723565459251404, + "learning_rate": 0.0014987716105081775, + "loss": 2.923, + "step": 266 + }, + { + "epoch": 0.028164556962025317, + "grad_norm": 0.5532949566841125, + "learning_rate": 0.001498757205351249, + "loss": 2.8977, + "step": 267 + }, + { + "epoch": 0.028270042194092827, + "grad_norm": 0.8449069857597351, + "learning_rate": 0.0014987427162923416, + "loss": 2.8887, + "step": 268 + }, + { + "epoch": 0.028375527426160337, + "grad_norm": 1.024977207183838, + "learning_rate": 0.001498728143333079, + "loss": 2.929, + "step": 269 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.7084341049194336, + "learning_rate": 0.0014987134864750948, + "loss": 2.9173, + "step": 270 + }, + { + "epoch": 0.028586497890295357, + "grad_norm": 0.7018455266952515, + "learning_rate": 0.0014986987457200312, + "loss": 2.9126, + "step": 271 + }, + { + "epoch": 0.02869198312236287, + "grad_norm": 0.9334180355072021, + "learning_rate": 0.0014986839210695394, + "loss": 2.8996, + "step": 272 + }, + { + "epoch": 0.02879746835443038, + "grad_norm": 0.8155325055122375, + "learning_rate": 0.0014986690125252814, + "loss": 2.8765, + "step": 273 + }, + { + "epoch": 0.02890295358649789, + "grad_norm": 0.6873154640197754, + "learning_rate": 0.001498654020088927, + "loss": 2.8865, + "step": 274 + }, + { + "epoch": 0.0290084388185654, + "grad_norm": 0.5750824213027954, + "learning_rate": 0.0014986389437621566, + "loss": 2.9077, + "step": 275 + }, + { + "epoch": 0.02911392405063291, + "grad_norm": 0.6674890518188477, + "learning_rate": 0.0014986237835466596, + "loss": 2.8558, + "step": 276 + }, + { + "epoch": 0.02921940928270042, + "grad_norm": 0.6671152114868164, + "learning_rate": 0.0014986085394441343, + "loss": 2.8944, + "step": 277 + }, + { + "epoch": 0.029324894514767934, + "grad_norm": 0.6004770398139954, + "learning_rate": 0.0014985932114562896, + "loss": 2.8194, + "step": 278 + }, + { + "epoch": 0.029430379746835444, + "grad_norm": 0.6547853946685791, + "learning_rate": 0.0014985777995848428, + "loss": 2.8717, + "step": 279 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.5536069869995117, + "learning_rate": 0.0014985623038315206, + "loss": 2.851, + "step": 280 + }, + { + "epoch": 0.029641350210970464, + "grad_norm": 0.5169731378555298, + "learning_rate": 0.0014985467241980597, + "loss": 2.8314, + "step": 281 + }, + { + "epoch": 0.029746835443037974, + "grad_norm": 0.5498530268669128, + "learning_rate": 0.0014985310606862058, + "loss": 2.8881, + "step": 282 + }, + { + "epoch": 0.029852320675105484, + "grad_norm": 0.5555122494697571, + "learning_rate": 0.0014985153132977141, + "loss": 2.8158, + "step": 283 + }, + { + "epoch": 0.029957805907172997, + "grad_norm": 0.5799844264984131, + "learning_rate": 0.0014984994820343488, + "loss": 2.8296, + "step": 284 + }, + { + "epoch": 0.030063291139240507, + "grad_norm": 0.5923528075218201, + "learning_rate": 0.0014984835668978844, + "loss": 2.8777, + "step": 285 + }, + { + "epoch": 0.030168776371308017, + "grad_norm": 0.6230958104133606, + "learning_rate": 0.0014984675678901042, + "loss": 2.8649, + "step": 286 + }, + { + "epoch": 0.030274261603375527, + "grad_norm": 0.6411816477775574, + "learning_rate": 0.0014984514850128006, + "loss": 2.8544, + "step": 287 + }, + { + "epoch": 0.030379746835443037, + "grad_norm": 0.7555307149887085, + "learning_rate": 0.0014984353182677759, + "loss": 2.8449, + "step": 288 + }, + { + "epoch": 0.03048523206751055, + "grad_norm": 0.7193092107772827, + "learning_rate": 0.001498419067656842, + "loss": 2.8433, + "step": 289 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.7252710461616516, + "learning_rate": 0.0014984027331818193, + "loss": 2.8255, + "step": 290 + }, + { + "epoch": 0.03069620253164557, + "grad_norm": 0.8513174653053284, + "learning_rate": 0.0014983863148445389, + "loss": 2.8382, + "step": 291 + }, + { + "epoch": 0.03080168776371308, + "grad_norm": 0.8952385783195496, + "learning_rate": 0.0014983698126468398, + "loss": 2.8603, + "step": 292 + }, + { + "epoch": 0.03090717299578059, + "grad_norm": 0.8202630877494812, + "learning_rate": 0.0014983532265905716, + "loss": 2.8465, + "step": 293 + }, + { + "epoch": 0.0310126582278481, + "grad_norm": 0.6958128809928894, + "learning_rate": 0.0014983365566775928, + "loss": 2.7939, + "step": 294 + }, + { + "epoch": 0.031118143459915613, + "grad_norm": 0.7578851580619812, + "learning_rate": 0.0014983198029097711, + "loss": 2.8054, + "step": 295 + }, + { + "epoch": 0.031223628691983123, + "grad_norm": 0.6923290491104126, + "learning_rate": 0.0014983029652889843, + "loss": 2.8432, + "step": 296 + }, + { + "epoch": 0.03132911392405063, + "grad_norm": 0.497479647397995, + "learning_rate": 0.0014982860438171187, + "loss": 2.8475, + "step": 297 + }, + { + "epoch": 0.03143459915611815, + "grad_norm": 0.7015809416770935, + "learning_rate": 0.0014982690384960705, + "loss": 2.823, + "step": 298 + }, + { + "epoch": 0.03154008438818565, + "grad_norm": 0.5273310542106628, + "learning_rate": 0.0014982519493277455, + "loss": 2.7929, + "step": 299 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.6134404540061951, + "learning_rate": 0.0014982347763140584, + "loss": 2.8006, + "step": 300 + }, + { + "epoch": 0.03175105485232067, + "grad_norm": 0.6834130883216858, + "learning_rate": 0.0014982175194569337, + "loss": 2.7705, + "step": 301 + }, + { + "epoch": 0.03185654008438819, + "grad_norm": 0.6310880780220032, + "learning_rate": 0.0014982001787583047, + "loss": 2.7807, + "step": 302 + }, + { + "epoch": 0.03196202531645569, + "grad_norm": 0.5843527913093567, + "learning_rate": 0.001498182754220115, + "loss": 2.7904, + "step": 303 + }, + { + "epoch": 0.032067510548523206, + "grad_norm": 0.7363938689231873, + "learning_rate": 0.001498165245844317, + "loss": 2.7971, + "step": 304 + }, + { + "epoch": 0.03217299578059072, + "grad_norm": 0.6606372594833374, + "learning_rate": 0.0014981476536328722, + "loss": 2.8111, + "step": 305 + }, + { + "epoch": 0.032278481012658226, + "grad_norm": 0.6151188015937805, + "learning_rate": 0.0014981299775877525, + "loss": 2.7855, + "step": 306 + }, + { + "epoch": 0.03238396624472574, + "grad_norm": 0.5965151786804199, + "learning_rate": 0.0014981122177109383, + "loss": 2.7962, + "step": 307 + }, + { + "epoch": 0.032489451476793246, + "grad_norm": 0.6231781840324402, + "learning_rate": 0.0014980943740044196, + "loss": 2.7743, + "step": 308 + }, + { + "epoch": 0.03259493670886076, + "grad_norm": 0.5160700678825378, + "learning_rate": 0.0014980764464701958, + "loss": 2.7895, + "step": 309 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.5292901396751404, + "learning_rate": 0.0014980584351102762, + "loss": 2.7799, + "step": 310 + }, + { + "epoch": 0.03280590717299578, + "grad_norm": 0.7051997780799866, + "learning_rate": 0.0014980403399266786, + "loss": 2.7591, + "step": 311 + }, + { + "epoch": 0.03291139240506329, + "grad_norm": 0.6766244173049927, + "learning_rate": 0.0014980221609214308, + "loss": 2.7784, + "step": 312 + }, + { + "epoch": 0.0330168776371308, + "grad_norm": 0.7975818514823914, + "learning_rate": 0.0014980038980965701, + "loss": 2.7513, + "step": 313 + }, + { + "epoch": 0.03312236286919831, + "grad_norm": 0.7642866969108582, + "learning_rate": 0.0014979855514541424, + "loss": 2.7442, + "step": 314 + }, + { + "epoch": 0.03322784810126582, + "grad_norm": 0.7366965413093567, + "learning_rate": 0.0014979671209962044, + "loss": 2.765, + "step": 315 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 0.5550256967544556, + "learning_rate": 0.0014979486067248204, + "loss": 2.7356, + "step": 316 + }, + { + "epoch": 0.033438818565400846, + "grad_norm": 0.6140890121459961, + "learning_rate": 0.0014979300086420655, + "loss": 2.7652, + "step": 317 + }, + { + "epoch": 0.03354430379746835, + "grad_norm": 0.7041509747505188, + "learning_rate": 0.0014979113267500235, + "loss": 2.7628, + "step": 318 + }, + { + "epoch": 0.033649789029535866, + "grad_norm": 0.7257493734359741, + "learning_rate": 0.0014978925610507879, + "loss": 2.7444, + "step": 319 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.789559006690979, + "learning_rate": 0.001497873711546462, + "loss": 2.7652, + "step": 320 + }, + { + "epoch": 0.033860759493670886, + "grad_norm": 0.795741617679596, + "learning_rate": 0.001497854778239157, + "loss": 2.7696, + "step": 321 + }, + { + "epoch": 0.0339662447257384, + "grad_norm": 0.8135206699371338, + "learning_rate": 0.0014978357611309951, + "loss": 2.7704, + "step": 322 + }, + { + "epoch": 0.034071729957805906, + "grad_norm": 0.7526617646217346, + "learning_rate": 0.0014978166602241068, + "loss": 2.7708, + "step": 323 + }, + { + "epoch": 0.03417721518987342, + "grad_norm": 0.6017882227897644, + "learning_rate": 0.0014977974755206334, + "loss": 2.7857, + "step": 324 + }, + { + "epoch": 0.034282700421940926, + "grad_norm": 0.6499062180519104, + "learning_rate": 0.0014977782070227236, + "loss": 2.722, + "step": 325 + }, + { + "epoch": 0.03438818565400844, + "grad_norm": 1.0074416399002075, + "learning_rate": 0.001497758854732537, + "loss": 2.7544, + "step": 326 + }, + { + "epoch": 0.03449367088607595, + "grad_norm": 1.0769282579421997, + "learning_rate": 0.001497739418652242, + "loss": 2.7694, + "step": 327 + }, + { + "epoch": 0.03459915611814346, + "grad_norm": 0.7376272082328796, + "learning_rate": 0.0014977198987840168, + "loss": 2.7228, + "step": 328 + }, + { + "epoch": 0.03470464135021097, + "grad_norm": 0.5982550382614136, + "learning_rate": 0.0014977002951300483, + "loss": 2.7067, + "step": 329 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.5951549410820007, + "learning_rate": 0.0014976806076925334, + "loss": 2.6973, + "step": 330 + }, + { + "epoch": 0.03491561181434599, + "grad_norm": 0.5273138880729675, + "learning_rate": 0.0014976608364736781, + "loss": 2.6986, + "step": 331 + }, + { + "epoch": 0.0350210970464135, + "grad_norm": 0.5996392369270325, + "learning_rate": 0.001497640981475698, + "loss": 2.7129, + "step": 332 + }, + { + "epoch": 0.03512658227848101, + "grad_norm": 0.6428535580635071, + "learning_rate": 0.0014976210427008177, + "loss": 2.7383, + "step": 333 + }, + { + "epoch": 0.035232067510548526, + "grad_norm": 0.5757026672363281, + "learning_rate": 0.0014976010201512718, + "loss": 2.7083, + "step": 334 + }, + { + "epoch": 0.03533755274261603, + "grad_norm": 0.6519010663032532, + "learning_rate": 0.0014975809138293036, + "loss": 2.7056, + "step": 335 + }, + { + "epoch": 0.035443037974683546, + "grad_norm": 0.7298191785812378, + "learning_rate": 0.0014975607237371663, + "loss": 2.7062, + "step": 336 + }, + { + "epoch": 0.03554852320675105, + "grad_norm": 0.8740049600601196, + "learning_rate": 0.0014975404498771222, + "loss": 2.7068, + "step": 337 + }, + { + "epoch": 0.035654008438818566, + "grad_norm": 0.863081693649292, + "learning_rate": 0.0014975200922514428, + "loss": 2.7452, + "step": 338 + }, + { + "epoch": 0.03575949367088608, + "grad_norm": 0.6442844271659851, + "learning_rate": 0.00149749965086241, + "loss": 2.7368, + "step": 339 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.6610534191131592, + "learning_rate": 0.0014974791257123137, + "loss": 2.7096, + "step": 340 + }, + { + "epoch": 0.0359704641350211, + "grad_norm": 1.0199223756790161, + "learning_rate": 0.0014974585168034543, + "loss": 2.6983, + "step": 341 + }, + { + "epoch": 0.036075949367088606, + "grad_norm": 1.1876922845840454, + "learning_rate": 0.0014974378241381409, + "loss": 2.679, + "step": 342 + }, + { + "epoch": 0.03618143459915612, + "grad_norm": 0.7193607687950134, + "learning_rate": 0.001497417047718692, + "loss": 2.7032, + "step": 343 + }, + { + "epoch": 0.036286919831223625, + "grad_norm": 0.6313633322715759, + "learning_rate": 0.0014973961875474364, + "loss": 2.6909, + "step": 344 + }, + { + "epoch": 0.03639240506329114, + "grad_norm": 0.9171256422996521, + "learning_rate": 0.0014973752436267106, + "loss": 2.6985, + "step": 345 + }, + { + "epoch": 0.03649789029535865, + "grad_norm": 1.0426268577575684, + "learning_rate": 0.0014973542159588623, + "loss": 2.6773, + "step": 346 + }, + { + "epoch": 0.03660337552742616, + "grad_norm": 0.657045304775238, + "learning_rate": 0.0014973331045462475, + "loss": 2.6978, + "step": 347 + }, + { + "epoch": 0.03670886075949367, + "grad_norm": 0.7302868366241455, + "learning_rate": 0.0014973119093912317, + "loss": 2.6883, + "step": 348 + }, + { + "epoch": 0.03681434599156118, + "grad_norm": 1.1683646440505981, + "learning_rate": 0.00149729063049619, + "loss": 2.6638, + "step": 349 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.6301737427711487, + "learning_rate": 0.001497269267863507, + "loss": 2.6606, + "step": 350 + }, + { + "epoch": 0.037025316455696206, + "grad_norm": 0.6634512543678284, + "learning_rate": 0.0014972478214955762, + "loss": 2.6511, + "step": 351 + }, + { + "epoch": 0.03713080168776371, + "grad_norm": 0.8298724889755249, + "learning_rate": 0.0014972262913948008, + "loss": 2.6843, + "step": 352 + }, + { + "epoch": 0.037236286919831225, + "grad_norm": 0.5825247168540955, + "learning_rate": 0.0014972046775635934, + "loss": 2.6917, + "step": 353 + }, + { + "epoch": 0.03734177215189873, + "grad_norm": 0.7429960370063782, + "learning_rate": 0.0014971829800043762, + "loss": 2.6218, + "step": 354 + }, + { + "epoch": 0.037447257383966245, + "grad_norm": 0.9002052545547485, + "learning_rate": 0.0014971611987195802, + "loss": 2.661, + "step": 355 + }, + { + "epoch": 0.03755274261603375, + "grad_norm": 0.6138924956321716, + "learning_rate": 0.0014971393337116462, + "loss": 2.6737, + "step": 356 + }, + { + "epoch": 0.037658227848101265, + "grad_norm": 0.5207064151763916, + "learning_rate": 0.0014971173849830243, + "loss": 2.6256, + "step": 357 + }, + { + "epoch": 0.03776371308016878, + "grad_norm": 0.650394082069397, + "learning_rate": 0.0014970953525361738, + "loss": 2.6545, + "step": 358 + }, + { + "epoch": 0.037869198312236285, + "grad_norm": 0.6586868762969971, + "learning_rate": 0.001497073236373564, + "loss": 2.622, + "step": 359 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.5829240083694458, + "learning_rate": 0.0014970510364976724, + "loss": 2.671, + "step": 360 + }, + { + "epoch": 0.038080168776371305, + "grad_norm": 0.5428882241249084, + "learning_rate": 0.0014970287529109873, + "loss": 2.6405, + "step": 361 + }, + { + "epoch": 0.03818565400843882, + "grad_norm": 0.5960879921913147, + "learning_rate": 0.0014970063856160054, + "loss": 2.6743, + "step": 362 + }, + { + "epoch": 0.03829113924050633, + "grad_norm": 0.550055205821991, + "learning_rate": 0.0014969839346152332, + "loss": 2.6202, + "step": 363 + }, + { + "epoch": 0.03839662447257384, + "grad_norm": 0.5516056418418884, + "learning_rate": 0.001496961399911186, + "loss": 2.6264, + "step": 364 + }, + { + "epoch": 0.03850210970464135, + "grad_norm": 0.5342622995376587, + "learning_rate": 0.0014969387815063897, + "loss": 2.6486, + "step": 365 + }, + { + "epoch": 0.03860759493670886, + "grad_norm": 0.6294840574264526, + "learning_rate": 0.0014969160794033778, + "loss": 2.6341, + "step": 366 + }, + { + "epoch": 0.03871308016877637, + "grad_norm": 0.5290802717208862, + "learning_rate": 0.0014968932936046953, + "loss": 2.6242, + "step": 367 + }, + { + "epoch": 0.038818565400843885, + "grad_norm": 0.5644403696060181, + "learning_rate": 0.0014968704241128947, + "loss": 2.6477, + "step": 368 + }, + { + "epoch": 0.03892405063291139, + "grad_norm": 0.5509594082832336, + "learning_rate": 0.0014968474709305384, + "loss": 2.6396, + "step": 369 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.5550581216812134, + "learning_rate": 0.0014968244340601996, + "loss": 2.6081, + "step": 370 + }, + { + "epoch": 0.03913502109704641, + "grad_norm": 0.635097086429596, + "learning_rate": 0.0014968013135044586, + "loss": 2.6203, + "step": 371 + }, + { + "epoch": 0.039240506329113925, + "grad_norm": 0.6135249137878418, + "learning_rate": 0.0014967781092659065, + "loss": 2.6004, + "step": 372 + }, + { + "epoch": 0.03934599156118143, + "grad_norm": 0.5527298450469971, + "learning_rate": 0.0014967548213471436, + "loss": 2.645, + "step": 373 + }, + { + "epoch": 0.039451476793248945, + "grad_norm": 0.7356232404708862, + "learning_rate": 0.0014967314497507792, + "loss": 2.6664, + "step": 374 + }, + { + "epoch": 0.03955696202531646, + "grad_norm": 0.8692887425422668, + "learning_rate": 0.0014967079944794323, + "loss": 2.6048, + "step": 375 + }, + { + "epoch": 0.039662447257383965, + "grad_norm": 0.7453126907348633, + "learning_rate": 0.0014966844555357314, + "loss": 2.6313, + "step": 376 + }, + { + "epoch": 0.03976793248945148, + "grad_norm": 0.5617813467979431, + "learning_rate": 0.0014966608329223137, + "loss": 2.6233, + "step": 377 + }, + { + "epoch": 0.039873417721518985, + "grad_norm": 0.800761342048645, + "learning_rate": 0.0014966371266418267, + "loss": 2.6112, + "step": 378 + }, + { + "epoch": 0.0399789029535865, + "grad_norm": 0.5592907667160034, + "learning_rate": 0.0014966133366969264, + "loss": 2.6183, + "step": 379 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.5851669311523438, + "learning_rate": 0.001496589463090279, + "loss": 2.6122, + "step": 380 + }, + { + "epoch": 0.04018987341772152, + "grad_norm": 0.5740135908126831, + "learning_rate": 0.0014965655058245592, + "loss": 2.6207, + "step": 381 + }, + { + "epoch": 0.04029535864978903, + "grad_norm": 0.5437324047088623, + "learning_rate": 0.001496541464902452, + "loss": 2.6227, + "step": 382 + }, + { + "epoch": 0.04040084388185654, + "grad_norm": 0.5414389967918396, + "learning_rate": 0.001496517340326651, + "loss": 2.6081, + "step": 383 + }, + { + "epoch": 0.04050632911392405, + "grad_norm": 0.5361164212226868, + "learning_rate": 0.0014964931320998593, + "loss": 2.5949, + "step": 384 + }, + { + "epoch": 0.04061181434599156, + "grad_norm": 0.5510179400444031, + "learning_rate": 0.00149646884022479, + "loss": 2.5878, + "step": 385 + }, + { + "epoch": 0.04071729957805907, + "grad_norm": 0.5200835466384888, + "learning_rate": 0.0014964444647041647, + "loss": 2.5771, + "step": 386 + }, + { + "epoch": 0.040822784810126585, + "grad_norm": 0.5653385519981384, + "learning_rate": 0.0014964200055407153, + "loss": 2.613, + "step": 387 + }, + { + "epoch": 0.04092827004219409, + "grad_norm": 0.6348411440849304, + "learning_rate": 0.0014963954627371823, + "loss": 2.5971, + "step": 388 + }, + { + "epoch": 0.041033755274261605, + "grad_norm": 0.7210496664047241, + "learning_rate": 0.0014963708362963157, + "loss": 2.5954, + "step": 389 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.0760339498519897, + "learning_rate": 0.001496346126220875, + "loss": 2.6004, + "step": 390 + }, + { + "epoch": 0.041244725738396625, + "grad_norm": 1.315760850906372, + "learning_rate": 0.0014963213325136296, + "loss": 2.6056, + "step": 391 + }, + { + "epoch": 0.04135021097046414, + "grad_norm": 0.5706395506858826, + "learning_rate": 0.0014962964551773572, + "loss": 2.5854, + "step": 392 + }, + { + "epoch": 0.041455696202531644, + "grad_norm": 1.433014988899231, + "learning_rate": 0.0014962714942148457, + "loss": 2.5874, + "step": 393 + }, + { + "epoch": 0.04156118143459916, + "grad_norm": 1.0216515064239502, + "learning_rate": 0.001496246449628892, + "loss": 2.5725, + "step": 394 + }, + { + "epoch": 0.041666666666666664, + "grad_norm": 0.5685988664627075, + "learning_rate": 0.0014962213214223025, + "loss": 2.5674, + "step": 395 + }, + { + "epoch": 0.04177215189873418, + "grad_norm": 0.7201846837997437, + "learning_rate": 0.001496196109597893, + "loss": 2.5702, + "step": 396 + }, + { + "epoch": 0.04187763713080169, + "grad_norm": 0.8310861587524414, + "learning_rate": 0.0014961708141584885, + "loss": 2.5558, + "step": 397 + }, + { + "epoch": 0.0419831223628692, + "grad_norm": 0.6129959225654602, + "learning_rate": 0.0014961454351069233, + "loss": 2.5589, + "step": 398 + }, + { + "epoch": 0.04208860759493671, + "grad_norm": 0.5187612771987915, + "learning_rate": 0.0014961199724460418, + "loss": 2.5755, + "step": 399 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.6253417134284973, + "learning_rate": 0.0014960944261786966, + "loss": 2.5708, + "step": 400 + }, + { + "epoch": 0.04229957805907173, + "grad_norm": 0.5630738735198975, + "learning_rate": 0.001496068796307751, + "loss": 2.5469, + "step": 401 + }, + { + "epoch": 0.04240506329113924, + "grad_norm": 0.5105161666870117, + "learning_rate": 0.0014960430828360762, + "loss": 2.5362, + "step": 402 + }, + { + "epoch": 0.04251054852320675, + "grad_norm": 0.5707828998565674, + "learning_rate": 0.001496017285766554, + "loss": 2.5842, + "step": 403 + }, + { + "epoch": 0.042616033755274264, + "grad_norm": 0.5882430672645569, + "learning_rate": 0.0014959914051020748, + "loss": 2.5788, + "step": 404 + }, + { + "epoch": 0.04272151898734177, + "grad_norm": 0.521066427230835, + "learning_rate": 0.001495965440845539, + "loss": 2.5324, + "step": 405 + }, + { + "epoch": 0.042827004219409284, + "grad_norm": 0.6223902106285095, + "learning_rate": 0.0014959393929998557, + "loss": 2.5699, + "step": 406 + }, + { + "epoch": 0.04293248945147679, + "grad_norm": 0.6338274478912354, + "learning_rate": 0.001495913261567944, + "loss": 2.5961, + "step": 407 + }, + { + "epoch": 0.043037974683544304, + "grad_norm": 0.587840735912323, + "learning_rate": 0.0014958870465527317, + "loss": 2.5546, + "step": 408 + }, + { + "epoch": 0.04314345991561182, + "grad_norm": 0.7710127830505371, + "learning_rate": 0.0014958607479571564, + "loss": 2.5545, + "step": 409 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8679630756378174, + "learning_rate": 0.0014958343657841655, + "loss": 2.5407, + "step": 410 + }, + { + "epoch": 0.04335443037974684, + "grad_norm": 0.893332302570343, + "learning_rate": 0.0014958079000367147, + "loss": 2.5347, + "step": 411 + }, + { + "epoch": 0.043459915611814344, + "grad_norm": 0.9314123392105103, + "learning_rate": 0.0014957813507177696, + "loss": 2.5595, + "step": 412 + }, + { + "epoch": 0.04356540084388186, + "grad_norm": 0.8991696238517761, + "learning_rate": 0.0014957547178303054, + "loss": 2.5642, + "step": 413 + }, + { + "epoch": 0.043670886075949364, + "grad_norm": 0.5955166816711426, + "learning_rate": 0.0014957280013773065, + "loss": 2.5191, + "step": 414 + }, + { + "epoch": 0.04377637130801688, + "grad_norm": 0.6419926881790161, + "learning_rate": 0.0014957012013617663, + "loss": 2.5645, + "step": 415 + }, + { + "epoch": 0.04388185654008439, + "grad_norm": 0.8075816631317139, + "learning_rate": 0.0014956743177866882, + "loss": 2.5386, + "step": 416 + }, + { + "epoch": 0.0439873417721519, + "grad_norm": 0.6732020378112793, + "learning_rate": 0.0014956473506550845, + "loss": 2.546, + "step": 417 + }, + { + "epoch": 0.04409282700421941, + "grad_norm": 0.6480821371078491, + "learning_rate": 0.0014956202999699773, + "loss": 2.5698, + "step": 418 + }, + { + "epoch": 0.04419831223628692, + "grad_norm": 0.7274339199066162, + "learning_rate": 0.001495593165734397, + "loss": 2.5438, + "step": 419 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.6608585715293884, + "learning_rate": 0.001495565947951385, + "loss": 2.5128, + "step": 420 + }, + { + "epoch": 0.044409282700421944, + "grad_norm": 0.7653111815452576, + "learning_rate": 0.0014955386466239907, + "loss": 2.5409, + "step": 421 + }, + { + "epoch": 0.04451476793248945, + "grad_norm": 0.6082889437675476, + "learning_rate": 0.0014955112617552734, + "loss": 2.5205, + "step": 422 + }, + { + "epoch": 0.044620253164556964, + "grad_norm": 0.5905618667602539, + "learning_rate": 0.001495483793348302, + "loss": 2.5163, + "step": 423 + }, + { + "epoch": 0.04472573839662447, + "grad_norm": 0.8694251775741577, + "learning_rate": 0.0014954562414061538, + "loss": 2.5266, + "step": 424 + }, + { + "epoch": 0.044831223628691984, + "grad_norm": 0.9548704028129578, + "learning_rate": 0.0014954286059319167, + "loss": 2.5033, + "step": 425 + }, + { + "epoch": 0.04493670886075949, + "grad_norm": 0.699002206325531, + "learning_rate": 0.0014954008869286876, + "loss": 2.537, + "step": 426 + }, + { + "epoch": 0.045042194092827004, + "grad_norm": 0.5707356929779053, + "learning_rate": 0.001495373084399572, + "loss": 2.5203, + "step": 427 + }, + { + "epoch": 0.04514767932489452, + "grad_norm": 0.6411645412445068, + "learning_rate": 0.0014953451983476854, + "loss": 2.5157, + "step": 428 + }, + { + "epoch": 0.045253164556962024, + "grad_norm": 0.5532188415527344, + "learning_rate": 0.0014953172287761529, + "loss": 2.4938, + "step": 429 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.6011930704116821, + "learning_rate": 0.0014952891756881085, + "loss": 2.4852, + "step": 430 + }, + { + "epoch": 0.045464135021097044, + "grad_norm": 0.6035800576210022, + "learning_rate": 0.0014952610390866954, + "loss": 2.4738, + "step": 431 + }, + { + "epoch": 0.04556962025316456, + "grad_norm": 0.5588313937187195, + "learning_rate": 0.0014952328189750666, + "loss": 2.4928, + "step": 432 + }, + { + "epoch": 0.04567510548523207, + "grad_norm": 0.5642839670181274, + "learning_rate": 0.0014952045153563845, + "loss": 2.4861, + "step": 433 + }, + { + "epoch": 0.04578059071729958, + "grad_norm": 0.5543592572212219, + "learning_rate": 0.0014951761282338205, + "loss": 2.5143, + "step": 434 + }, + { + "epoch": 0.04588607594936709, + "grad_norm": 0.5459843277931213, + "learning_rate": 0.0014951476576105555, + "loss": 2.4959, + "step": 435 + }, + { + "epoch": 0.0459915611814346, + "grad_norm": 0.5680912733078003, + "learning_rate": 0.00149511910348978, + "loss": 2.4998, + "step": 436 + }, + { + "epoch": 0.04609704641350211, + "grad_norm": 0.5706222653388977, + "learning_rate": 0.0014950904658746933, + "loss": 2.4879, + "step": 437 + }, + { + "epoch": 0.046202531645569624, + "grad_norm": 0.5343067049980164, + "learning_rate": 0.0014950617447685047, + "loss": 2.5287, + "step": 438 + }, + { + "epoch": 0.04630801687763713, + "grad_norm": 0.6289631724357605, + "learning_rate": 0.001495032940174432, + "loss": 2.4715, + "step": 439 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.6915307641029358, + "learning_rate": 0.0014950040520957037, + "loss": 2.4983, + "step": 440 + }, + { + "epoch": 0.04651898734177215, + "grad_norm": 0.6366404294967651, + "learning_rate": 0.0014949750805355563, + "loss": 2.4922, + "step": 441 + }, + { + "epoch": 0.04662447257383966, + "grad_norm": 0.6120153665542603, + "learning_rate": 0.0014949460254972363, + "loss": 2.5006, + "step": 442 + }, + { + "epoch": 0.04672995780590717, + "grad_norm": 0.6783043146133423, + "learning_rate": 0.0014949168869839997, + "loss": 2.4749, + "step": 443 + }, + { + "epoch": 0.04683544303797468, + "grad_norm": 0.7931810021400452, + "learning_rate": 0.0014948876649991112, + "loss": 2.5038, + "step": 444 + }, + { + "epoch": 0.0469409282700422, + "grad_norm": 0.7647908926010132, + "learning_rate": 0.0014948583595458455, + "loss": 2.4934, + "step": 445 + }, + { + "epoch": 0.0470464135021097, + "grad_norm": 0.6186598539352417, + "learning_rate": 0.0014948289706274865, + "loss": 2.5126, + "step": 446 + }, + { + "epoch": 0.04715189873417722, + "grad_norm": 0.5164459943771362, + "learning_rate": 0.0014947994982473273, + "loss": 2.4919, + "step": 447 + }, + { + "epoch": 0.04725738396624472, + "grad_norm": 0.5778205990791321, + "learning_rate": 0.0014947699424086704, + "loss": 2.4797, + "step": 448 + }, + { + "epoch": 0.04736286919831224, + "grad_norm": 0.6147980093955994, + "learning_rate": 0.0014947403031148278, + "loss": 2.5164, + "step": 449 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.5935326814651489, + "learning_rate": 0.0014947105803691204, + "loss": 2.4729, + "step": 450 + }, + { + "epoch": 0.047573839662447256, + "grad_norm": 0.5689955353736877, + "learning_rate": 0.0014946807741748791, + "loss": 2.4956, + "step": 451 + }, + { + "epoch": 0.04767932489451477, + "grad_norm": 0.5309973955154419, + "learning_rate": 0.001494650884535444, + "loss": 2.4551, + "step": 452 + }, + { + "epoch": 0.047784810126582276, + "grad_norm": 0.6628833413124084, + "learning_rate": 0.0014946209114541636, + "loss": 2.5191, + "step": 453 + }, + { + "epoch": 0.04789029535864979, + "grad_norm": 0.8640896081924438, + "learning_rate": 0.0014945908549343974, + "loss": 2.4909, + "step": 454 + }, + { + "epoch": 0.047995780590717296, + "grad_norm": 1.0915123224258423, + "learning_rate": 0.001494560714979513, + "loss": 2.4801, + "step": 455 + }, + { + "epoch": 0.04810126582278481, + "grad_norm": 0.9790918231010437, + "learning_rate": 0.0014945304915928875, + "loss": 2.4478, + "step": 456 + }, + { + "epoch": 0.04820675105485232, + "grad_norm": 0.8456334471702576, + "learning_rate": 0.0014945001847779082, + "loss": 2.4565, + "step": 457 + }, + { + "epoch": 0.04831223628691983, + "grad_norm": 0.5359956622123718, + "learning_rate": 0.0014944697945379708, + "loss": 2.4394, + "step": 458 + }, + { + "epoch": 0.04841772151898734, + "grad_norm": 0.9539507627487183, + "learning_rate": 0.0014944393208764805, + "loss": 2.45, + "step": 459 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 1.1126893758773804, + "learning_rate": 0.0014944087637968522, + "loss": 2.5089, + "step": 460 + }, + { + "epoch": 0.04862869198312236, + "grad_norm": 0.5981240272521973, + "learning_rate": 0.00149437812330251, + "loss": 2.4502, + "step": 461 + }, + { + "epoch": 0.048734177215189876, + "grad_norm": 0.9134164452552795, + "learning_rate": 0.0014943473993968871, + "loss": 2.4783, + "step": 462 + }, + { + "epoch": 0.04883966244725738, + "grad_norm": 1.0946471691131592, + "learning_rate": 0.0014943165920834266, + "loss": 2.464, + "step": 463 + }, + { + "epoch": 0.048945147679324896, + "grad_norm": 0.48983457684516907, + "learning_rate": 0.0014942857013655806, + "loss": 2.4514, + "step": 464 + }, + { + "epoch": 0.0490506329113924, + "grad_norm": 1.1122416257858276, + "learning_rate": 0.0014942547272468103, + "loss": 2.4626, + "step": 465 + }, + { + "epoch": 0.049156118143459916, + "grad_norm": 0.8826571106910706, + "learning_rate": 0.0014942236697305866, + "loss": 2.434, + "step": 466 + }, + { + "epoch": 0.04926160337552743, + "grad_norm": 0.6356802582740784, + "learning_rate": 0.0014941925288203897, + "loss": 2.4631, + "step": 467 + }, + { + "epoch": 0.049367088607594936, + "grad_norm": 0.8806255459785461, + "learning_rate": 0.001494161304519709, + "loss": 2.4633, + "step": 468 + }, + { + "epoch": 0.04947257383966245, + "grad_norm": 0.8227080702781677, + "learning_rate": 0.0014941299968320434, + "loss": 2.4323, + "step": 469 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.6707944869995117, + "learning_rate": 0.0014940986057609012, + "loss": 2.4313, + "step": 470 + }, + { + "epoch": 0.04968354430379747, + "grad_norm": 0.7955568432807922, + "learning_rate": 0.0014940671313097998, + "loss": 2.4406, + "step": 471 + }, + { + "epoch": 0.049789029535864976, + "grad_norm": 0.9845168590545654, + "learning_rate": 0.001494035573482266, + "loss": 2.4469, + "step": 472 + }, + { + "epoch": 0.04989451476793249, + "grad_norm": 0.6886542439460754, + "learning_rate": 0.0014940039322818362, + "loss": 2.4667, + "step": 473 + }, + { + "epoch": 0.05, + "grad_norm": 0.6832264065742493, + "learning_rate": 0.0014939722077120558, + "loss": 2.461, + "step": 474 + }, + { + "epoch": 0.05010548523206751, + "grad_norm": 0.7463812828063965, + "learning_rate": 0.0014939403997764795, + "loss": 2.4516, + "step": 475 + }, + { + "epoch": 0.05021097046413502, + "grad_norm": 0.6634633541107178, + "learning_rate": 0.001493908508478672, + "loss": 2.4357, + "step": 476 + }, + { + "epoch": 0.05031645569620253, + "grad_norm": 0.8325605392456055, + "learning_rate": 0.0014938765338222068, + "loss": 2.4034, + "step": 477 + }, + { + "epoch": 0.05042194092827004, + "grad_norm": 0.5103318095207214, + "learning_rate": 0.0014938444758106665, + "loss": 2.4726, + "step": 478 + }, + { + "epoch": 0.050527426160337556, + "grad_norm": 0.8697096705436707, + "learning_rate": 0.0014938123344476436, + "loss": 2.4322, + "step": 479 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.9535786509513855, + "learning_rate": 0.0014937801097367396, + "loss": 2.4448, + "step": 480 + }, + { + "epoch": 0.050738396624472576, + "grad_norm": 0.6288813948631287, + "learning_rate": 0.0014937478016815657, + "loss": 2.3912, + "step": 481 + }, + { + "epoch": 0.05084388185654008, + "grad_norm": 0.5717400908470154, + "learning_rate": 0.0014937154102857416, + "loss": 2.4442, + "step": 482 + }, + { + "epoch": 0.050949367088607596, + "grad_norm": 0.5772767663002014, + "learning_rate": 0.0014936829355528976, + "loss": 2.4641, + "step": 483 + }, + { + "epoch": 0.0510548523206751, + "grad_norm": 0.5387223958969116, + "learning_rate": 0.0014936503774866721, + "loss": 2.4168, + "step": 484 + }, + { + "epoch": 0.051160337552742616, + "grad_norm": 0.5261485576629639, + "learning_rate": 0.0014936177360907138, + "loss": 2.3783, + "step": 485 + }, + { + "epoch": 0.05126582278481013, + "grad_norm": 0.5893489718437195, + "learning_rate": 0.00149358501136868, + "loss": 2.3817, + "step": 486 + }, + { + "epoch": 0.051371308016877636, + "grad_norm": 0.7373615503311157, + "learning_rate": 0.0014935522033242379, + "loss": 2.4373, + "step": 487 + }, + { + "epoch": 0.05147679324894515, + "grad_norm": 0.6597126126289368, + "learning_rate": 0.0014935193119610638, + "loss": 2.4004, + "step": 488 + }, + { + "epoch": 0.051582278481012656, + "grad_norm": 0.6340881586074829, + "learning_rate": 0.0014934863372828432, + "loss": 2.4041, + "step": 489 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.5226415395736694, + "learning_rate": 0.001493453279293271, + "loss": 2.4201, + "step": 490 + }, + { + "epoch": 0.05179324894514768, + "grad_norm": 0.6055819392204285, + "learning_rate": 0.001493420137996052, + "loss": 2.4244, + "step": 491 + }, + { + "epoch": 0.05189873417721519, + "grad_norm": 0.792822003364563, + "learning_rate": 0.0014933869133948992, + "loss": 2.4195, + "step": 492 + }, + { + "epoch": 0.0520042194092827, + "grad_norm": 0.7144798040390015, + "learning_rate": 0.0014933536054935362, + "loss": 2.4202, + "step": 493 + }, + { + "epoch": 0.05210970464135021, + "grad_norm": 0.5331548452377319, + "learning_rate": 0.0014933202142956947, + "loss": 2.4081, + "step": 494 + }, + { + "epoch": 0.05221518987341772, + "grad_norm": 0.9314326047897339, + "learning_rate": 0.0014932867398051168, + "loss": 2.4111, + "step": 495 + }, + { + "epoch": 0.05232067510548523, + "grad_norm": 1.0251197814941406, + "learning_rate": 0.0014932531820255534, + "loss": 2.3895, + "step": 496 + }, + { + "epoch": 0.05242616033755274, + "grad_norm": 0.5922232270240784, + "learning_rate": 0.0014932195409607645, + "loss": 2.4196, + "step": 497 + }, + { + "epoch": 0.052531645569620256, + "grad_norm": 0.7854163646697998, + "learning_rate": 0.0014931858166145203, + "loss": 2.381, + "step": 498 + }, + { + "epoch": 0.05263713080168776, + "grad_norm": 0.979280412197113, + "learning_rate": 0.0014931520089905993, + "loss": 2.4174, + "step": 499 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.6227829456329346, + "learning_rate": 0.0014931181180927902, + "loss": 2.3911, + "step": 500 + }, + { + "epoch": 0.05284810126582278, + "grad_norm": 0.7529129981994629, + "learning_rate": 0.0014930841439248904, + "loss": 2.4114, + "step": 501 + }, + { + "epoch": 0.052953586497890295, + "grad_norm": 0.987598180770874, + "learning_rate": 0.0014930500864907066, + "loss": 2.4442, + "step": 502 + }, + { + "epoch": 0.05305907172995781, + "grad_norm": 0.5310690402984619, + "learning_rate": 0.001493015945794056, + "loss": 2.3593, + "step": 503 + }, + { + "epoch": 0.053164556962025315, + "grad_norm": 0.9582506418228149, + "learning_rate": 0.0014929817218387632, + "loss": 2.3974, + "step": 504 + }, + { + "epoch": 0.05327004219409283, + "grad_norm": 0.9969832301139832, + "learning_rate": 0.0014929474146286638, + "loss": 2.3962, + "step": 505 + }, + { + "epoch": 0.053375527426160335, + "grad_norm": 0.5360527038574219, + "learning_rate": 0.001492913024167602, + "loss": 2.433, + "step": 506 + }, + { + "epoch": 0.05348101265822785, + "grad_norm": 0.9626401662826538, + "learning_rate": 0.001492878550459431, + "loss": 2.4172, + "step": 507 + }, + { + "epoch": 0.05358649789029536, + "grad_norm": 0.5570617318153381, + "learning_rate": 0.0014928439935080143, + "loss": 2.3808, + "step": 508 + }, + { + "epoch": 0.05369198312236287, + "grad_norm": 0.9375855922698975, + "learning_rate": 0.0014928093533172243, + "loss": 2.3592, + "step": 509 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.8565971255302429, + "learning_rate": 0.001492774629890942, + "loss": 2.4171, + "step": 510 + }, + { + "epoch": 0.05390295358649789, + "grad_norm": 0.6702606678009033, + "learning_rate": 0.0014927398232330584, + "loss": 2.3748, + "step": 511 + }, + { + "epoch": 0.0540084388185654, + "grad_norm": 1.020655632019043, + "learning_rate": 0.0014927049333474743, + "loss": 2.3986, + "step": 512 + }, + { + "epoch": 0.05411392405063291, + "grad_norm": 0.5249305963516235, + "learning_rate": 0.001492669960238099, + "loss": 2.3838, + "step": 513 + }, + { + "epoch": 0.05421940928270042, + "grad_norm": 0.7942704558372498, + "learning_rate": 0.001492634903908851, + "loss": 2.4008, + "step": 514 + }, + { + "epoch": 0.054324894514767935, + "grad_norm": 0.6520714163780212, + "learning_rate": 0.001492599764363659, + "loss": 2.358, + "step": 515 + }, + { + "epoch": 0.05443037974683544, + "grad_norm": 0.5873451828956604, + "learning_rate": 0.0014925645416064605, + "loss": 2.3813, + "step": 516 + }, + { + "epoch": 0.054535864978902955, + "grad_norm": 0.5906271934509277, + "learning_rate": 0.0014925292356412025, + "loss": 2.3733, + "step": 517 + }, + { + "epoch": 0.05464135021097046, + "grad_norm": 0.4984043836593628, + "learning_rate": 0.001492493846471841, + "loss": 2.3238, + "step": 518 + }, + { + "epoch": 0.054746835443037975, + "grad_norm": 0.5583387017250061, + "learning_rate": 0.0014924583741023417, + "loss": 2.3848, + "step": 519 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.5345872640609741, + "learning_rate": 0.001492422818536679, + "loss": 2.3808, + "step": 520 + }, + { + "epoch": 0.054957805907172995, + "grad_norm": 0.6082392930984497, + "learning_rate": 0.0014923871797788378, + "loss": 2.3797, + "step": 521 + }, + { + "epoch": 0.05506329113924051, + "grad_norm": 0.4840923845767975, + "learning_rate": 0.001492351457832811, + "loss": 2.3444, + "step": 522 + }, + { + "epoch": 0.055168776371308015, + "grad_norm": 0.6127695441246033, + "learning_rate": 0.0014923156527026017, + "loss": 2.3739, + "step": 523 + }, + { + "epoch": 0.05527426160337553, + "grad_norm": 0.5477768778800964, + "learning_rate": 0.001492279764392222, + "loss": 2.3736, + "step": 524 + }, + { + "epoch": 0.055379746835443035, + "grad_norm": 0.5384707450866699, + "learning_rate": 0.0014922437929056934, + "loss": 2.361, + "step": 525 + }, + { + "epoch": 0.05548523206751055, + "grad_norm": 0.7092294096946716, + "learning_rate": 0.0014922077382470468, + "loss": 2.356, + "step": 526 + }, + { + "epoch": 0.05559071729957806, + "grad_norm": 0.5635525584220886, + "learning_rate": 0.001492171600420322, + "loss": 2.3389, + "step": 527 + }, + { + "epoch": 0.05569620253164557, + "grad_norm": 0.5487437844276428, + "learning_rate": 0.0014921353794295684, + "loss": 2.3595, + "step": 528 + }, + { + "epoch": 0.05580168776371308, + "grad_norm": 0.5655043721199036, + "learning_rate": 0.001492099075278845, + "loss": 2.3716, + "step": 529 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.4990295469760895, + "learning_rate": 0.00149206268797222, + "loss": 2.3496, + "step": 530 + }, + { + "epoch": 0.0560126582278481, + "grad_norm": 0.543310821056366, + "learning_rate": 0.0014920262175137703, + "loss": 2.358, + "step": 531 + }, + { + "epoch": 0.056118143459915615, + "grad_norm": 0.5448722243309021, + "learning_rate": 0.001491989663907583, + "loss": 2.3189, + "step": 532 + }, + { + "epoch": 0.05622362869198312, + "grad_norm": 0.6268422603607178, + "learning_rate": 0.001491953027157754, + "loss": 2.3465, + "step": 533 + }, + { + "epoch": 0.056329113924050635, + "grad_norm": 0.6035872101783752, + "learning_rate": 0.0014919163072683883, + "loss": 2.3443, + "step": 534 + }, + { + "epoch": 0.05643459915611814, + "grad_norm": 0.5340408682823181, + "learning_rate": 0.0014918795042436013, + "loss": 2.3492, + "step": 535 + }, + { + "epoch": 0.056540084388185655, + "grad_norm": 0.5242352485656738, + "learning_rate": 0.001491842618087516, + "loss": 2.3414, + "step": 536 + }, + { + "epoch": 0.05664556962025316, + "grad_norm": 0.5813149213790894, + "learning_rate": 0.0014918056488042665, + "loss": 2.3099, + "step": 537 + }, + { + "epoch": 0.056751054852320675, + "grad_norm": 0.6053271889686584, + "learning_rate": 0.0014917685963979949, + "loss": 2.3535, + "step": 538 + }, + { + "epoch": 0.05685654008438819, + "grad_norm": 0.5727052688598633, + "learning_rate": 0.0014917314608728536, + "loss": 2.3418, + "step": 539 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.6049475073814392, + "learning_rate": 0.0014916942422330032, + "loss": 2.3462, + "step": 540 + }, + { + "epoch": 0.05706751054852321, + "grad_norm": 0.54838627576828, + "learning_rate": 0.0014916569404826146, + "loss": 2.3687, + "step": 541 + }, + { + "epoch": 0.057172995780590714, + "grad_norm": 0.6050218343734741, + "learning_rate": 0.0014916195556258676, + "loss": 2.3524, + "step": 542 + }, + { + "epoch": 0.05727848101265823, + "grad_norm": 0.6474766135215759, + "learning_rate": 0.0014915820876669514, + "loss": 2.3207, + "step": 543 + }, + { + "epoch": 0.05738396624472574, + "grad_norm": 0.5258772969245911, + "learning_rate": 0.0014915445366100641, + "loss": 2.3471, + "step": 544 + }, + { + "epoch": 0.05748945147679325, + "grad_norm": 0.5153073072433472, + "learning_rate": 0.0014915069024594144, + "loss": 2.3524, + "step": 545 + }, + { + "epoch": 0.05759493670886076, + "grad_norm": 0.5898798704147339, + "learning_rate": 0.0014914691852192183, + "loss": 2.3339, + "step": 546 + }, + { + "epoch": 0.05770042194092827, + "grad_norm": 0.530746579170227, + "learning_rate": 0.001491431384893703, + "loss": 2.3398, + "step": 547 + }, + { + "epoch": 0.05780590717299578, + "grad_norm": 0.5281400084495544, + "learning_rate": 0.0014913935014871035, + "loss": 2.3525, + "step": 548 + }, + { + "epoch": 0.057911392405063294, + "grad_norm": 0.5926679968833923, + "learning_rate": 0.0014913555350036657, + "loss": 2.3549, + "step": 549 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.512959361076355, + "learning_rate": 0.001491317485447643, + "loss": 2.3055, + "step": 550 + }, + { + "epoch": 0.058122362869198314, + "grad_norm": 0.5841031074523926, + "learning_rate": 0.0014912793528233, + "loss": 2.2892, + "step": 551 + }, + { + "epoch": 0.05822784810126582, + "grad_norm": 0.6441091299057007, + "learning_rate": 0.0014912411371349088, + "loss": 2.3221, + "step": 552 + }, + { + "epoch": 0.058333333333333334, + "grad_norm": 0.7033314108848572, + "learning_rate": 0.0014912028383867522, + "loss": 2.3277, + "step": 553 + }, + { + "epoch": 0.05843881856540084, + "grad_norm": 0.6076004505157471, + "learning_rate": 0.0014911644565831217, + "loss": 2.2647, + "step": 554 + }, + { + "epoch": 0.058544303797468354, + "grad_norm": 0.7038746476173401, + "learning_rate": 0.001491125991728318, + "loss": 2.2745, + "step": 555 + }, + { + "epoch": 0.05864978902953587, + "grad_norm": 0.7687201499938965, + "learning_rate": 0.001491087443826651, + "loss": 2.3215, + "step": 556 + }, + { + "epoch": 0.058755274261603374, + "grad_norm": 0.5334739685058594, + "learning_rate": 0.0014910488128824409, + "loss": 2.3409, + "step": 557 + }, + { + "epoch": 0.05886075949367089, + "grad_norm": 0.5491735339164734, + "learning_rate": 0.0014910100989000159, + "loss": 2.2949, + "step": 558 + }, + { + "epoch": 0.058966244725738394, + "grad_norm": 0.6169052720069885, + "learning_rate": 0.0014909713018837144, + "loss": 2.276, + "step": 559 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.5764474272727966, + "learning_rate": 0.0014909324218378838, + "loss": 2.3061, + "step": 560 + }, + { + "epoch": 0.05917721518987342, + "grad_norm": 0.6257509589195251, + "learning_rate": 0.0014908934587668805, + "loss": 2.3041, + "step": 561 + }, + { + "epoch": 0.05928270042194093, + "grad_norm": 0.5847519636154175, + "learning_rate": 0.001490854412675071, + "loss": 2.2919, + "step": 562 + }, + { + "epoch": 0.05938818565400844, + "grad_norm": 0.6064106822013855, + "learning_rate": 0.0014908152835668301, + "loss": 2.3384, + "step": 563 + }, + { + "epoch": 0.05949367088607595, + "grad_norm": 0.5572032332420349, + "learning_rate": 0.0014907760714465428, + "loss": 2.315, + "step": 564 + }, + { + "epoch": 0.05959915611814346, + "grad_norm": 0.5540280342102051, + "learning_rate": 0.0014907367763186026, + "loss": 2.3143, + "step": 565 + }, + { + "epoch": 0.05970464135021097, + "grad_norm": 0.5532748699188232, + "learning_rate": 0.0014906973981874132, + "loss": 2.3204, + "step": 566 + }, + { + "epoch": 0.05981012658227848, + "grad_norm": 0.6510828137397766, + "learning_rate": 0.0014906579370573868, + "loss": 2.3284, + "step": 567 + }, + { + "epoch": 0.059915611814345994, + "grad_norm": 0.6977630853652954, + "learning_rate": 0.0014906183929329455, + "loss": 2.2689, + "step": 568 + }, + { + "epoch": 0.0600210970464135, + "grad_norm": 0.5492218732833862, + "learning_rate": 0.00149057876581852, + "loss": 2.2525, + "step": 569 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.5315922498703003, + "learning_rate": 0.0014905390557185508, + "loss": 2.3407, + "step": 570 + }, + { + "epoch": 0.06023206751054852, + "grad_norm": 0.5523985028266907, + "learning_rate": 0.0014904992626374879, + "loss": 2.3239, + "step": 571 + }, + { + "epoch": 0.060337552742616034, + "grad_norm": 0.737938642501831, + "learning_rate": 0.0014904593865797903, + "loss": 2.3232, + "step": 572 + }, + { + "epoch": 0.06044303797468355, + "grad_norm": 0.7473946213722229, + "learning_rate": 0.0014904194275499258, + "loss": 2.3184, + "step": 573 + }, + { + "epoch": 0.060548523206751054, + "grad_norm": 0.6278741359710693, + "learning_rate": 0.0014903793855523726, + "loss": 2.2777, + "step": 574 + }, + { + "epoch": 0.06065400843881857, + "grad_norm": 0.5592268705368042, + "learning_rate": 0.0014903392605916175, + "loss": 2.3313, + "step": 575 + }, + { + "epoch": 0.060759493670886074, + "grad_norm": 0.6486956477165222, + "learning_rate": 0.0014902990526721564, + "loss": 2.3176, + "step": 576 + }, + { + "epoch": 0.06086497890295359, + "grad_norm": 0.5842364430427551, + "learning_rate": 0.0014902587617984951, + "loss": 2.2939, + "step": 577 + }, + { + "epoch": 0.0609704641350211, + "grad_norm": 0.601065993309021, + "learning_rate": 0.0014902183879751483, + "loss": 2.2996, + "step": 578 + }, + { + "epoch": 0.06107594936708861, + "grad_norm": 0.6625818014144897, + "learning_rate": 0.0014901779312066399, + "loss": 2.3005, + "step": 579 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.6543604135513306, + "learning_rate": 0.0014901373914975036, + "loss": 2.305, + "step": 580 + }, + { + "epoch": 0.06128691983122363, + "grad_norm": 0.5844017267227173, + "learning_rate": 0.0014900967688522818, + "loss": 2.2998, + "step": 581 + }, + { + "epoch": 0.06139240506329114, + "grad_norm": 0.6784266233444214, + "learning_rate": 0.0014900560632755265, + "loss": 2.2661, + "step": 582 + }, + { + "epoch": 0.06149789029535865, + "grad_norm": 0.6648930907249451, + "learning_rate": 0.0014900152747717994, + "loss": 2.3152, + "step": 583 + }, + { + "epoch": 0.06160337552742616, + "grad_norm": 0.7066315412521362, + "learning_rate": 0.0014899744033456705, + "loss": 2.2819, + "step": 584 + }, + { + "epoch": 0.061708860759493674, + "grad_norm": 0.8075369596481323, + "learning_rate": 0.0014899334490017198, + "loss": 2.3105, + "step": 585 + }, + { + "epoch": 0.06181434599156118, + "grad_norm": 0.7078437805175781, + "learning_rate": 0.0014898924117445367, + "loss": 2.2834, + "step": 586 + }, + { + "epoch": 0.061919831223628694, + "grad_norm": 0.5865651369094849, + "learning_rate": 0.0014898512915787192, + "loss": 2.2514, + "step": 587 + }, + { + "epoch": 0.0620253164556962, + "grad_norm": 0.5735505819320679, + "learning_rate": 0.0014898100885088754, + "loss": 2.2561, + "step": 588 + }, + { + "epoch": 0.06213080168776371, + "grad_norm": 0.5312996506690979, + "learning_rate": 0.001489768802539622, + "loss": 2.2902, + "step": 589 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.579434871673584, + "learning_rate": 0.0014897274336755856, + "loss": 2.2317, + "step": 590 + }, + { + "epoch": 0.06234177215189873, + "grad_norm": 0.5723267793655396, + "learning_rate": 0.0014896859819214018, + "loss": 2.2607, + "step": 591 + }, + { + "epoch": 0.06244725738396625, + "grad_norm": 0.5042039155960083, + "learning_rate": 0.001489644447281715, + "loss": 2.2563, + "step": 592 + }, + { + "epoch": 0.06255274261603376, + "grad_norm": 0.49723514914512634, + "learning_rate": 0.00148960282976118, + "loss": 2.2899, + "step": 593 + }, + { + "epoch": 0.06265822784810127, + "grad_norm": 0.5256079435348511, + "learning_rate": 0.0014895611293644596, + "loss": 2.2446, + "step": 594 + }, + { + "epoch": 0.06276371308016877, + "grad_norm": 0.5386826992034912, + "learning_rate": 0.0014895193460962271, + "loss": 2.2669, + "step": 595 + }, + { + "epoch": 0.0628691983122363, + "grad_norm": 0.6000149250030518, + "learning_rate": 0.001489477479961164, + "loss": 2.2232, + "step": 596 + }, + { + "epoch": 0.0629746835443038, + "grad_norm": 0.7514697909355164, + "learning_rate": 0.0014894355309639621, + "loss": 2.2631, + "step": 597 + }, + { + "epoch": 0.0630801687763713, + "grad_norm": 0.6609325408935547, + "learning_rate": 0.0014893934991093221, + "loss": 2.3093, + "step": 598 + }, + { + "epoch": 0.06318565400843881, + "grad_norm": 0.5420136451721191, + "learning_rate": 0.0014893513844019533, + "loss": 2.241, + "step": 599 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.7649706602096558, + "learning_rate": 0.001489309186846575, + "loss": 2.2642, + "step": 600 + }, + { + "epoch": 0.06339662447257384, + "grad_norm": 0.9391316175460815, + "learning_rate": 0.001489266906447916, + "loss": 2.2781, + "step": 601 + }, + { + "epoch": 0.06350210970464135, + "grad_norm": 0.8751060366630554, + "learning_rate": 0.0014892245432107138, + "loss": 2.2612, + "step": 602 + }, + { + "epoch": 0.06360759493670887, + "grad_norm": 0.7139244079589844, + "learning_rate": 0.0014891820971397152, + "loss": 2.2835, + "step": 603 + }, + { + "epoch": 0.06371308016877637, + "grad_norm": 0.5141409039497375, + "learning_rate": 0.001489139568239677, + "loss": 2.2228, + "step": 604 + }, + { + "epoch": 0.06381856540084388, + "grad_norm": 0.7053531408309937, + "learning_rate": 0.0014890969565153642, + "loss": 2.2659, + "step": 605 + }, + { + "epoch": 0.06392405063291139, + "grad_norm": 0.9350357055664062, + "learning_rate": 0.0014890542619715522, + "loss": 2.2432, + "step": 606 + }, + { + "epoch": 0.0640295358649789, + "grad_norm": 0.7118342518806458, + "learning_rate": 0.0014890114846130248, + "loss": 2.2805, + "step": 607 + }, + { + "epoch": 0.06413502109704641, + "grad_norm": 0.5741058588027954, + "learning_rate": 0.0014889686244445755, + "loss": 2.309, + "step": 608 + }, + { + "epoch": 0.06424050632911392, + "grad_norm": 0.6944141983985901, + "learning_rate": 0.0014889256814710071, + "loss": 2.2587, + "step": 609 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.7553461194038391, + "learning_rate": 0.0014888826556971313, + "loss": 2.2661, + "step": 610 + }, + { + "epoch": 0.06445147679324895, + "grad_norm": 0.679198682308197, + "learning_rate": 0.0014888395471277698, + "loss": 2.2655, + "step": 611 + }, + { + "epoch": 0.06455696202531645, + "grad_norm": 0.549150824546814, + "learning_rate": 0.0014887963557677526, + "loss": 2.2585, + "step": 612 + }, + { + "epoch": 0.06466244725738397, + "grad_norm": 0.5800547003746033, + "learning_rate": 0.00148875308162192, + "loss": 2.2362, + "step": 613 + }, + { + "epoch": 0.06476793248945148, + "grad_norm": 0.6962719559669495, + "learning_rate": 0.0014887097246951205, + "loss": 2.212, + "step": 614 + }, + { + "epoch": 0.06487341772151899, + "grad_norm": 0.5916872620582581, + "learning_rate": 0.001488666284992213, + "loss": 2.2517, + "step": 615 + }, + { + "epoch": 0.06497890295358649, + "grad_norm": 0.616729199886322, + "learning_rate": 0.001488622762518065, + "loss": 2.2504, + "step": 616 + }, + { + "epoch": 0.06508438818565401, + "grad_norm": 0.7586269378662109, + "learning_rate": 0.0014885791572775533, + "loss": 2.2403, + "step": 617 + }, + { + "epoch": 0.06518987341772152, + "grad_norm": 0.8340543508529663, + "learning_rate": 0.0014885354692755642, + "loss": 2.2712, + "step": 618 + }, + { + "epoch": 0.06529535864978903, + "grad_norm": 0.6970111727714539, + "learning_rate": 0.001488491698516993, + "loss": 2.2872, + "step": 619 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.5284109711647034, + "learning_rate": 0.0014884478450067444, + "loss": 2.2478, + "step": 620 + }, + { + "epoch": 0.06550632911392405, + "grad_norm": 0.5848127603530884, + "learning_rate": 0.001488403908749733, + "loss": 2.234, + "step": 621 + }, + { + "epoch": 0.06561181434599156, + "grad_norm": 0.7411887645721436, + "learning_rate": 0.0014883598897508811, + "loss": 2.2653, + "step": 622 + }, + { + "epoch": 0.06571729957805907, + "grad_norm": 0.815963625907898, + "learning_rate": 0.0014883157880151222, + "loss": 2.2364, + "step": 623 + }, + { + "epoch": 0.06582278481012659, + "grad_norm": 0.6434774398803711, + "learning_rate": 0.0014882716035473974, + "loss": 2.2079, + "step": 624 + }, + { + "epoch": 0.06592827004219409, + "grad_norm": 0.5349832773208618, + "learning_rate": 0.001488227336352658, + "loss": 2.2354, + "step": 625 + }, + { + "epoch": 0.0660337552742616, + "grad_norm": 0.5482350587844849, + "learning_rate": 0.0014881829864358644, + "loss": 2.2256, + "step": 626 + }, + { + "epoch": 0.06613924050632912, + "grad_norm": 0.5299554467201233, + "learning_rate": 0.0014881385538019867, + "loss": 2.257, + "step": 627 + }, + { + "epoch": 0.06624472573839663, + "grad_norm": 0.53357994556427, + "learning_rate": 0.0014880940384560028, + "loss": 2.2285, + "step": 628 + }, + { + "epoch": 0.06635021097046413, + "grad_norm": 0.5468655824661255, + "learning_rate": 0.0014880494404029016, + "loss": 2.2482, + "step": 629 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.4894934594631195, + "learning_rate": 0.0014880047596476807, + "loss": 2.199, + "step": 630 + }, + { + "epoch": 0.06656118143459916, + "grad_norm": 0.5461459755897522, + "learning_rate": 0.0014879599961953461, + "loss": 2.2051, + "step": 631 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 0.4994697868824005, + "learning_rate": 0.0014879151500509142, + "loss": 2.2498, + "step": 632 + }, + { + "epoch": 0.06677215189873417, + "grad_norm": 0.7065902352333069, + "learning_rate": 0.0014878702212194103, + "loss": 2.2385, + "step": 633 + }, + { + "epoch": 0.06687763713080169, + "grad_norm": 1.0811246633529663, + "learning_rate": 0.0014878252097058685, + "loss": 2.2139, + "step": 634 + }, + { + "epoch": 0.0669831223628692, + "grad_norm": 1.0254247188568115, + "learning_rate": 0.001487780115515333, + "loss": 2.2866, + "step": 635 + }, + { + "epoch": 0.0670886075949367, + "grad_norm": 0.6816698908805847, + "learning_rate": 0.0014877349386528565, + "loss": 2.2261, + "step": 636 + }, + { + "epoch": 0.06719409282700423, + "grad_norm": 0.5424239039421082, + "learning_rate": 0.0014876896791235015, + "loss": 2.2775, + "step": 637 + }, + { + "epoch": 0.06729957805907173, + "grad_norm": 0.7969256639480591, + "learning_rate": 0.0014876443369323397, + "loss": 2.2603, + "step": 638 + }, + { + "epoch": 0.06740506329113924, + "grad_norm": 0.6586845517158508, + "learning_rate": 0.0014875989120844517, + "loss": 2.2431, + "step": 639 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.5070925951004028, + "learning_rate": 0.0014875534045849274, + "loss": 2.2338, + "step": 640 + }, + { + "epoch": 0.06761603375527427, + "grad_norm": 0.7614973187446594, + "learning_rate": 0.0014875078144388665, + "loss": 2.2235, + "step": 641 + }, + { + "epoch": 0.06772151898734177, + "grad_norm": 0.8199191093444824, + "learning_rate": 0.0014874621416513774, + "loss": 2.2361, + "step": 642 + }, + { + "epoch": 0.06782700421940928, + "grad_norm": 0.6325783729553223, + "learning_rate": 0.001487416386227578, + "loss": 2.2432, + "step": 643 + }, + { + "epoch": 0.0679324894514768, + "grad_norm": 0.5748120546340942, + "learning_rate": 0.0014873705481725952, + "loss": 2.1873, + "step": 644 + }, + { + "epoch": 0.0680379746835443, + "grad_norm": 0.5734559297561646, + "learning_rate": 0.0014873246274915658, + "loss": 2.1965, + "step": 645 + }, + { + "epoch": 0.06814345991561181, + "grad_norm": 0.5379096269607544, + "learning_rate": 0.0014872786241896354, + "loss": 2.1756, + "step": 646 + }, + { + "epoch": 0.06824894514767932, + "grad_norm": 0.6973960995674133, + "learning_rate": 0.0014872325382719587, + "loss": 2.2649, + "step": 647 + }, + { + "epoch": 0.06835443037974684, + "grad_norm": 0.935612678527832, + "learning_rate": 0.0014871863697436998, + "loss": 2.282, + "step": 648 + }, + { + "epoch": 0.06845991561181435, + "grad_norm": 1.0585988759994507, + "learning_rate": 0.0014871401186100322, + "loss": 2.2397, + "step": 649 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.6079781651496887, + "learning_rate": 0.0014870937848761388, + "loss": 2.2455, + "step": 650 + }, + { + "epoch": 0.06867088607594937, + "grad_norm": 0.6824865341186523, + "learning_rate": 0.0014870473685472112, + "loss": 2.2023, + "step": 651 + }, + { + "epoch": 0.06877637130801688, + "grad_norm": 1.06521475315094, + "learning_rate": 0.0014870008696284507, + "loss": 2.2336, + "step": 652 + }, + { + "epoch": 0.06888185654008439, + "grad_norm": 0.8096982836723328, + "learning_rate": 0.0014869542881250678, + "loss": 2.222, + "step": 653 + }, + { + "epoch": 0.0689873417721519, + "grad_norm": 0.5370252728462219, + "learning_rate": 0.001486907624042282, + "loss": 2.2373, + "step": 654 + }, + { + "epoch": 0.06909282700421941, + "grad_norm": 0.9983959794044495, + "learning_rate": 0.0014868608773853226, + "loss": 2.231, + "step": 655 + }, + { + "epoch": 0.06919831223628692, + "grad_norm": 0.8902814984321594, + "learning_rate": 0.0014868140481594273, + "loss": 2.2335, + "step": 656 + }, + { + "epoch": 0.06930379746835443, + "grad_norm": 0.576537013053894, + "learning_rate": 0.001486767136369844, + "loss": 2.1979, + "step": 657 + }, + { + "epoch": 0.06940928270042195, + "grad_norm": 0.6677432060241699, + "learning_rate": 0.0014867201420218292, + "loss": 2.1932, + "step": 658 + }, + { + "epoch": 0.06951476793248945, + "grad_norm": 0.8495171666145325, + "learning_rate": 0.0014866730651206487, + "loss": 2.2286, + "step": 659 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.6086940169334412, + "learning_rate": 0.001486625905671578, + "loss": 2.2024, + "step": 660 + }, + { + "epoch": 0.06972573839662448, + "grad_norm": 0.59086674451828, + "learning_rate": 0.0014865786636799015, + "loss": 2.1938, + "step": 661 + }, + { + "epoch": 0.06983122362869199, + "grad_norm": 0.7763571739196777, + "learning_rate": 0.0014865313391509126, + "loss": 2.1973, + "step": 662 + }, + { + "epoch": 0.06993670886075949, + "grad_norm": 0.7936520576477051, + "learning_rate": 0.0014864839320899148, + "loss": 2.1437, + "step": 663 + }, + { + "epoch": 0.070042194092827, + "grad_norm": 0.6472890377044678, + "learning_rate": 0.0014864364425022198, + "loss": 2.2286, + "step": 664 + }, + { + "epoch": 0.07014767932489452, + "grad_norm": 0.5981523990631104, + "learning_rate": 0.001486388870393149, + "loss": 2.2109, + "step": 665 + }, + { + "epoch": 0.07025316455696203, + "grad_norm": 0.5551098585128784, + "learning_rate": 0.0014863412157680336, + "loss": 2.1695, + "step": 666 + }, + { + "epoch": 0.07035864978902953, + "grad_norm": 0.6456543803215027, + "learning_rate": 0.0014862934786322131, + "loss": 2.2208, + "step": 667 + }, + { + "epoch": 0.07046413502109705, + "grad_norm": 0.6900268793106079, + "learning_rate": 0.0014862456589910368, + "loss": 2.1741, + "step": 668 + }, + { + "epoch": 0.07056962025316456, + "grad_norm": 0.510040819644928, + "learning_rate": 0.0014861977568498632, + "loss": 2.228, + "step": 669 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.6472639441490173, + "learning_rate": 0.00148614977221406, + "loss": 2.2234, + "step": 670 + }, + { + "epoch": 0.07078059071729957, + "grad_norm": 0.5718596577644348, + "learning_rate": 0.001486101705089004, + "loss": 2.209, + "step": 671 + }, + { + "epoch": 0.07088607594936709, + "grad_norm": 0.6969082951545715, + "learning_rate": 0.0014860535554800814, + "loss": 2.1919, + "step": 672 + }, + { + "epoch": 0.0709915611814346, + "grad_norm": 0.8053690195083618, + "learning_rate": 0.0014860053233926875, + "loss": 2.2109, + "step": 673 + }, + { + "epoch": 0.0710970464135021, + "grad_norm": 0.6904172897338867, + "learning_rate": 0.0014859570088322273, + "loss": 2.151, + "step": 674 + }, + { + "epoch": 0.07120253164556962, + "grad_norm": 0.5822588205337524, + "learning_rate": 0.0014859086118041145, + "loss": 2.2019, + "step": 675 + }, + { + "epoch": 0.07130801687763713, + "grad_norm": 0.6237710118293762, + "learning_rate": 0.001485860132313772, + "loss": 2.195, + "step": 676 + }, + { + "epoch": 0.07141350210970464, + "grad_norm": 0.6449328660964966, + "learning_rate": 0.0014858115703666325, + "loss": 2.173, + "step": 677 + }, + { + "epoch": 0.07151898734177216, + "grad_norm": 0.7748191952705383, + "learning_rate": 0.001485762925968137, + "loss": 2.1978, + "step": 678 + }, + { + "epoch": 0.07162447257383966, + "grad_norm": 0.7912722229957581, + "learning_rate": 0.0014857141991237372, + "loss": 2.1732, + "step": 679 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.6376476883888245, + "learning_rate": 0.0014856653898388927, + "loss": 2.2053, + "step": 680 + }, + { + "epoch": 0.07183544303797468, + "grad_norm": 0.5323092937469482, + "learning_rate": 0.0014856164981190728, + "loss": 2.1998, + "step": 681 + }, + { + "epoch": 0.0719409282700422, + "grad_norm": 0.8641214370727539, + "learning_rate": 0.0014855675239697564, + "loss": 2.2333, + "step": 682 + }, + { + "epoch": 0.0720464135021097, + "grad_norm": 1.1687265634536743, + "learning_rate": 0.0014855184673964311, + "loss": 2.2059, + "step": 683 + }, + { + "epoch": 0.07215189873417721, + "grad_norm": 0.7718749642372131, + "learning_rate": 0.0014854693284045936, + "loss": 2.2144, + "step": 684 + }, + { + "epoch": 0.07225738396624473, + "grad_norm": 0.656460165977478, + "learning_rate": 0.0014854201069997505, + "loss": 2.1816, + "step": 685 + }, + { + "epoch": 0.07236286919831224, + "grad_norm": 0.9055923819541931, + "learning_rate": 0.0014853708031874176, + "loss": 2.2308, + "step": 686 + }, + { + "epoch": 0.07246835443037974, + "grad_norm": 0.6384252905845642, + "learning_rate": 0.001485321416973119, + "loss": 2.1922, + "step": 687 + }, + { + "epoch": 0.07257383966244725, + "grad_norm": 0.6877858638763428, + "learning_rate": 0.0014852719483623893, + "loss": 2.2309, + "step": 688 + }, + { + "epoch": 0.07267932489451477, + "grad_norm": 0.7388851642608643, + "learning_rate": 0.001485222397360771, + "loss": 2.1528, + "step": 689 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.5498057007789612, + "learning_rate": 0.001485172763973817, + "loss": 2.2161, + "step": 690 + }, + { + "epoch": 0.07289029535864978, + "grad_norm": 0.637596607208252, + "learning_rate": 0.0014851230482070892, + "loss": 2.2063, + "step": 691 + }, + { + "epoch": 0.0729957805907173, + "grad_norm": 0.6676555871963501, + "learning_rate": 0.001485073250066158, + "loss": 2.1721, + "step": 692 + }, + { + "epoch": 0.07310126582278481, + "grad_norm": 0.6060968637466431, + "learning_rate": 0.0014850233695566034, + "loss": 2.1956, + "step": 693 + }, + { + "epoch": 0.07320675105485232, + "grad_norm": 0.660168468952179, + "learning_rate": 0.0014849734066840158, + "loss": 2.2252, + "step": 694 + }, + { + "epoch": 0.07331223628691984, + "grad_norm": 0.6559826731681824, + "learning_rate": 0.0014849233614539926, + "loss": 2.1932, + "step": 695 + }, + { + "epoch": 0.07341772151898734, + "grad_norm": 0.6207395792007446, + "learning_rate": 0.001484873233872142, + "loss": 2.1637, + "step": 696 + }, + { + "epoch": 0.07352320675105485, + "grad_norm": 0.7117727994918823, + "learning_rate": 0.0014848230239440812, + "loss": 2.1884, + "step": 697 + }, + { + "epoch": 0.07362869198312236, + "grad_norm": 0.652306854724884, + "learning_rate": 0.0014847727316754367, + "loss": 2.1584, + "step": 698 + }, + { + "epoch": 0.07373417721518988, + "grad_norm": 0.6645639538764954, + "learning_rate": 0.0014847223570718436, + "loss": 2.1544, + "step": 699 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.6677911281585693, + "learning_rate": 0.0014846719001389466, + "loss": 2.1506, + "step": 700 + }, + { + "epoch": 0.07394514767932489, + "grad_norm": 0.5429142713546753, + "learning_rate": 0.0014846213608823997, + "loss": 2.1343, + "step": 701 + }, + { + "epoch": 0.07405063291139241, + "grad_norm": 0.7018622756004333, + "learning_rate": 0.0014845707393078664, + "loss": 2.1536, + "step": 702 + }, + { + "epoch": 0.07415611814345992, + "grad_norm": 0.6145892143249512, + "learning_rate": 0.0014845200354210186, + "loss": 2.2027, + "step": 703 + }, + { + "epoch": 0.07426160337552742, + "grad_norm": 0.5414426922798157, + "learning_rate": 0.0014844692492275385, + "loss": 2.1698, + "step": 704 + }, + { + "epoch": 0.07436708860759493, + "grad_norm": 0.5705887079238892, + "learning_rate": 0.0014844183807331164, + "loss": 2.1603, + "step": 705 + }, + { + "epoch": 0.07447257383966245, + "grad_norm": 0.8729245662689209, + "learning_rate": 0.0014843674299434527, + "loss": 2.1835, + "step": 706 + }, + { + "epoch": 0.07457805907172996, + "grad_norm": 0.981329083442688, + "learning_rate": 0.0014843163968642566, + "loss": 2.169, + "step": 707 + }, + { + "epoch": 0.07468354430379746, + "grad_norm": 0.8169779181480408, + "learning_rate": 0.0014842652815012466, + "loss": 2.1832, + "step": 708 + }, + { + "epoch": 0.07478902953586498, + "grad_norm": 0.5608134865760803, + "learning_rate": 0.0014842140838601501, + "loss": 2.1772, + "step": 709 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.5620700716972351, + "learning_rate": 0.001484162803946705, + "loss": 2.1651, + "step": 710 + }, + { + "epoch": 0.075, + "grad_norm": 0.5830070972442627, + "learning_rate": 0.0014841114417666564, + "loss": 2.1661, + "step": 711 + }, + { + "epoch": 0.0751054852320675, + "grad_norm": 0.6039665341377258, + "learning_rate": 0.0014840599973257604, + "loss": 2.176, + "step": 712 + }, + { + "epoch": 0.07521097046413502, + "grad_norm": 0.7619819045066833, + "learning_rate": 0.001484008470629781, + "loss": 2.1715, + "step": 713 + }, + { + "epoch": 0.07531645569620253, + "grad_norm": 0.7442662119865417, + "learning_rate": 0.0014839568616844927, + "loss": 2.1431, + "step": 714 + }, + { + "epoch": 0.07542194092827004, + "grad_norm": 0.7092955112457275, + "learning_rate": 0.0014839051704956781, + "loss": 2.1732, + "step": 715 + }, + { + "epoch": 0.07552742616033756, + "grad_norm": 1.0257385969161987, + "learning_rate": 0.0014838533970691296, + "loss": 2.1563, + "step": 716 + }, + { + "epoch": 0.07563291139240506, + "grad_norm": 0.7333285212516785, + "learning_rate": 0.0014838015414106486, + "loss": 2.1389, + "step": 717 + }, + { + "epoch": 0.07573839662447257, + "grad_norm": 0.7912759780883789, + "learning_rate": 0.0014837496035260457, + "loss": 2.1112, + "step": 718 + }, + { + "epoch": 0.07584388185654009, + "grad_norm": 0.8615280985832214, + "learning_rate": 0.0014836975834211412, + "loss": 2.1523, + "step": 719 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.6974195241928101, + "learning_rate": 0.0014836454811017635, + "loss": 2.1381, + "step": 720 + }, + { + "epoch": 0.0760548523206751, + "grad_norm": 0.6831492185592651, + "learning_rate": 0.0014835932965737517, + "loss": 2.1663, + "step": 721 + }, + { + "epoch": 0.07616033755274261, + "grad_norm": 0.6421518325805664, + "learning_rate": 0.0014835410298429529, + "loss": 2.1532, + "step": 722 + }, + { + "epoch": 0.07626582278481013, + "grad_norm": 0.5152579545974731, + "learning_rate": 0.001483488680915224, + "loss": 2.1411, + "step": 723 + }, + { + "epoch": 0.07637130801687764, + "grad_norm": 0.6127821207046509, + "learning_rate": 0.0014834362497964308, + "loss": 2.1283, + "step": 724 + }, + { + "epoch": 0.07647679324894514, + "grad_norm": 0.5234668850898743, + "learning_rate": 0.0014833837364924484, + "loss": 2.1494, + "step": 725 + }, + { + "epoch": 0.07658227848101266, + "grad_norm": 0.562936544418335, + "learning_rate": 0.0014833311410091617, + "loss": 2.1321, + "step": 726 + }, + { + "epoch": 0.07668776371308017, + "grad_norm": 0.6627261638641357, + "learning_rate": 0.0014832784633524638, + "loss": 2.1332, + "step": 727 + }, + { + "epoch": 0.07679324894514768, + "grad_norm": 0.5830509662628174, + "learning_rate": 0.0014832257035282577, + "loss": 2.1527, + "step": 728 + }, + { + "epoch": 0.07689873417721518, + "grad_norm": 0.6804736256599426, + "learning_rate": 0.0014831728615424553, + "loss": 2.1572, + "step": 729 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.6087080240249634, + "learning_rate": 0.0014831199374009778, + "loss": 2.1247, + "step": 730 + }, + { + "epoch": 0.07710970464135021, + "grad_norm": 0.5479363799095154, + "learning_rate": 0.0014830669311097554, + "loss": 2.1444, + "step": 731 + }, + { + "epoch": 0.07721518987341772, + "grad_norm": 0.6053205728530884, + "learning_rate": 0.0014830138426747282, + "loss": 2.1456, + "step": 732 + }, + { + "epoch": 0.07732067510548524, + "grad_norm": 0.6870375871658325, + "learning_rate": 0.0014829606721018448, + "loss": 2.1687, + "step": 733 + }, + { + "epoch": 0.07742616033755274, + "grad_norm": 0.70124751329422, + "learning_rate": 0.0014829074193970634, + "loss": 2.1921, + "step": 734 + }, + { + "epoch": 0.07753164556962025, + "grad_norm": 0.8299857378005981, + "learning_rate": 0.0014828540845663507, + "loss": 2.1615, + "step": 735 + }, + { + "epoch": 0.07763713080168777, + "grad_norm": 0.6427547931671143, + "learning_rate": 0.0014828006676156837, + "loss": 2.1305, + "step": 736 + }, + { + "epoch": 0.07774261603375528, + "grad_norm": 0.575364351272583, + "learning_rate": 0.0014827471685510477, + "loss": 2.1705, + "step": 737 + }, + { + "epoch": 0.07784810126582278, + "grad_norm": 0.6645688414573669, + "learning_rate": 0.0014826935873784378, + "loss": 2.1318, + "step": 738 + }, + { + "epoch": 0.07795358649789029, + "grad_norm": 0.5789070725440979, + "learning_rate": 0.0014826399241038577, + "loss": 2.1808, + "step": 739 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.6436827182769775, + "learning_rate": 0.0014825861787333208, + "loss": 2.1266, + "step": 740 + }, + { + "epoch": 0.07816455696202532, + "grad_norm": 0.5519630312919617, + "learning_rate": 0.00148253235127285, + "loss": 2.1371, + "step": 741 + }, + { + "epoch": 0.07827004219409282, + "grad_norm": 0.6324716210365295, + "learning_rate": 0.001482478441728476, + "loss": 2.1712, + "step": 742 + }, + { + "epoch": 0.07837552742616034, + "grad_norm": 0.6717787981033325, + "learning_rate": 0.0014824244501062402, + "loss": 2.1351, + "step": 743 + }, + { + "epoch": 0.07848101265822785, + "grad_norm": 0.49430596828460693, + "learning_rate": 0.0014823703764121929, + "loss": 2.1422, + "step": 744 + }, + { + "epoch": 0.07858649789029536, + "grad_norm": 0.6084884405136108, + "learning_rate": 0.0014823162206523926, + "loss": 2.1478, + "step": 745 + }, + { + "epoch": 0.07869198312236286, + "grad_norm": 0.6064751744270325, + "learning_rate": 0.0014822619828329085, + "loss": 2.1393, + "step": 746 + }, + { + "epoch": 0.07879746835443038, + "grad_norm": 0.5481725931167603, + "learning_rate": 0.0014822076629598176, + "loss": 2.1189, + "step": 747 + }, + { + "epoch": 0.07890295358649789, + "grad_norm": 0.5997464656829834, + "learning_rate": 0.001482153261039207, + "loss": 2.0883, + "step": 748 + }, + { + "epoch": 0.0790084388185654, + "grad_norm": 0.7969518303871155, + "learning_rate": 0.0014820987770771726, + "loss": 2.1315, + "step": 749 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 1.0001581907272339, + "learning_rate": 0.0014820442110798197, + "loss": 2.1299, + "step": 750 + }, + { + "epoch": 0.07921940928270042, + "grad_norm": 0.847104549407959, + "learning_rate": 0.0014819895630532628, + "loss": 2.1331, + "step": 751 + }, + { + "epoch": 0.07932489451476793, + "grad_norm": 0.5635699033737183, + "learning_rate": 0.0014819348330036251, + "loss": 2.1569, + "step": 752 + }, + { + "epoch": 0.07943037974683544, + "grad_norm": 0.7667535543441772, + "learning_rate": 0.0014818800209370397, + "loss": 2.123, + "step": 753 + }, + { + "epoch": 0.07953586497890296, + "grad_norm": 1.0746839046478271, + "learning_rate": 0.0014818251268596486, + "loss": 2.1361, + "step": 754 + }, + { + "epoch": 0.07964135021097046, + "grad_norm": 0.6554408669471741, + "learning_rate": 0.0014817701507776025, + "loss": 2.143, + "step": 755 + }, + { + "epoch": 0.07974683544303797, + "grad_norm": 0.6426889300346375, + "learning_rate": 0.0014817150926970625, + "loss": 2.1204, + "step": 756 + }, + { + "epoch": 0.07985232067510549, + "grad_norm": 1.2160634994506836, + "learning_rate": 0.0014816599526241974, + "loss": 2.1182, + "step": 757 + }, + { + "epoch": 0.079957805907173, + "grad_norm": 0.6974890232086182, + "learning_rate": 0.0014816047305651863, + "loss": 2.1655, + "step": 758 + }, + { + "epoch": 0.0800632911392405, + "grad_norm": 0.7297891974449158, + "learning_rate": 0.0014815494265262169, + "loss": 2.099, + "step": 759 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 1.1113061904907227, + "learning_rate": 0.0014814940405134865, + "loss": 2.1166, + "step": 760 + }, + { + "epoch": 0.08027426160337553, + "grad_norm": 0.6071848273277283, + "learning_rate": 0.0014814385725332015, + "loss": 2.1588, + "step": 761 + }, + { + "epoch": 0.08037974683544304, + "grad_norm": 0.8505532741546631, + "learning_rate": 0.001481383022591577, + "loss": 2.1158, + "step": 762 + }, + { + "epoch": 0.08048523206751054, + "grad_norm": 1.0721222162246704, + "learning_rate": 0.0014813273906948378, + "loss": 2.1011, + "step": 763 + }, + { + "epoch": 0.08059071729957806, + "grad_norm": 0.6975430846214294, + "learning_rate": 0.0014812716768492177, + "loss": 2.1232, + "step": 764 + }, + { + "epoch": 0.08069620253164557, + "grad_norm": 0.5829721689224243, + "learning_rate": 0.0014812158810609598, + "loss": 2.1121, + "step": 765 + }, + { + "epoch": 0.08080168776371308, + "grad_norm": 0.6872682571411133, + "learning_rate": 0.0014811600033363165, + "loss": 2.1248, + "step": 766 + }, + { + "epoch": 0.0809071729957806, + "grad_norm": 0.5959787964820862, + "learning_rate": 0.0014811040436815486, + "loss": 2.0976, + "step": 767 + }, + { + "epoch": 0.0810126582278481, + "grad_norm": 0.6178424954414368, + "learning_rate": 0.001481048002102927, + "loss": 2.1116, + "step": 768 + }, + { + "epoch": 0.08111814345991561, + "grad_norm": 0.7452529072761536, + "learning_rate": 0.0014809918786067315, + "loss": 2.149, + "step": 769 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.5701708793640137, + "learning_rate": 0.001480935673199251, + "loss": 2.0939, + "step": 770 + }, + { + "epoch": 0.08132911392405064, + "grad_norm": 0.6334879398345947, + "learning_rate": 0.0014808793858867837, + "loss": 2.0964, + "step": 771 + }, + { + "epoch": 0.08143459915611814, + "grad_norm": 0.5817608833312988, + "learning_rate": 0.0014808230166756366, + "loss": 2.1758, + "step": 772 + }, + { + "epoch": 0.08154008438818565, + "grad_norm": 0.6251794099807739, + "learning_rate": 0.0014807665655721261, + "loss": 2.1002, + "step": 773 + }, + { + "epoch": 0.08164556962025317, + "grad_norm": 0.605261504650116, + "learning_rate": 0.0014807100325825782, + "loss": 2.1125, + "step": 774 + }, + { + "epoch": 0.08175105485232068, + "grad_norm": 0.5264526009559631, + "learning_rate": 0.0014806534177133274, + "loss": 2.0791, + "step": 775 + }, + { + "epoch": 0.08185654008438818, + "grad_norm": 0.6153285503387451, + "learning_rate": 0.0014805967209707178, + "loss": 2.0662, + "step": 776 + }, + { + "epoch": 0.0819620253164557, + "grad_norm": 0.5479755401611328, + "learning_rate": 0.0014805399423611025, + "loss": 2.1136, + "step": 777 + }, + { + "epoch": 0.08206751054852321, + "grad_norm": 0.7072218060493469, + "learning_rate": 0.0014804830818908438, + "loss": 2.1331, + "step": 778 + }, + { + "epoch": 0.08217299578059072, + "grad_norm": 0.824827253818512, + "learning_rate": 0.0014804261395663133, + "loss": 2.1472, + "step": 779 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.7875043153762817, + "learning_rate": 0.0014803691153938915, + "loss": 2.1298, + "step": 780 + }, + { + "epoch": 0.08238396624472574, + "grad_norm": 0.6745094060897827, + "learning_rate": 0.0014803120093799687, + "loss": 2.0896, + "step": 781 + }, + { + "epoch": 0.08248945147679325, + "grad_norm": 0.773171603679657, + "learning_rate": 0.0014802548215309434, + "loss": 2.1063, + "step": 782 + }, + { + "epoch": 0.08259493670886076, + "grad_norm": 1.0232412815093994, + "learning_rate": 0.001480197551853224, + "loss": 2.1064, + "step": 783 + }, + { + "epoch": 0.08270042194092828, + "grad_norm": 0.7389966249465942, + "learning_rate": 0.0014801402003532277, + "loss": 2.1307, + "step": 784 + }, + { + "epoch": 0.08280590717299578, + "grad_norm": 0.5896443128585815, + "learning_rate": 0.0014800827670373815, + "loss": 2.1318, + "step": 785 + }, + { + "epoch": 0.08291139240506329, + "grad_norm": 0.880555272102356, + "learning_rate": 0.0014800252519121203, + "loss": 2.1064, + "step": 786 + }, + { + "epoch": 0.0830168776371308, + "grad_norm": 0.5536566376686096, + "learning_rate": 0.0014799676549838898, + "loss": 2.1444, + "step": 787 + }, + { + "epoch": 0.08312236286919832, + "grad_norm": 0.7171345949172974, + "learning_rate": 0.0014799099762591434, + "loss": 2.1372, + "step": 788 + }, + { + "epoch": 0.08322784810126582, + "grad_norm": 0.6982835531234741, + "learning_rate": 0.0014798522157443443, + "loss": 2.1101, + "step": 789 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.6839098334312439, + "learning_rate": 0.0014797943734459653, + "loss": 2.164, + "step": 790 + }, + { + "epoch": 0.08343881856540085, + "grad_norm": 0.7022814750671387, + "learning_rate": 0.0014797364493704876, + "loss": 2.125, + "step": 791 + }, + { + "epoch": 0.08354430379746836, + "grad_norm": 0.5325747132301331, + "learning_rate": 0.001479678443524402, + "loss": 2.1117, + "step": 792 + }, + { + "epoch": 0.08364978902953586, + "grad_norm": 0.6735307574272156, + "learning_rate": 0.0014796203559142081, + "loss": 2.0879, + "step": 793 + }, + { + "epoch": 0.08375527426160338, + "grad_norm": 0.5703725814819336, + "learning_rate": 0.0014795621865464155, + "loss": 2.1044, + "step": 794 + }, + { + "epoch": 0.08386075949367089, + "grad_norm": 0.5550674200057983, + "learning_rate": 0.0014795039354275417, + "loss": 2.0731, + "step": 795 + }, + { + "epoch": 0.0839662447257384, + "grad_norm": 0.6049332618713379, + "learning_rate": 0.0014794456025641143, + "loss": 2.1071, + "step": 796 + }, + { + "epoch": 0.0840717299578059, + "grad_norm": 0.723820686340332, + "learning_rate": 0.00147938718796267, + "loss": 2.1254, + "step": 797 + }, + { + "epoch": 0.08417721518987342, + "grad_norm": 0.7177790403366089, + "learning_rate": 0.001479328691629754, + "loss": 2.0922, + "step": 798 + }, + { + "epoch": 0.08428270042194093, + "grad_norm": 0.6932066082954407, + "learning_rate": 0.0014792701135719214, + "loss": 2.082, + "step": 799 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.7158593535423279, + "learning_rate": 0.001479211453795736, + "loss": 2.0648, + "step": 800 + }, + { + "epoch": 0.08449367088607596, + "grad_norm": 0.5213654041290283, + "learning_rate": 0.001479152712307771, + "loss": 2.0728, + "step": 801 + }, + { + "epoch": 0.08459915611814346, + "grad_norm": 0.6777067184448242, + "learning_rate": 0.0014790938891146089, + "loss": 2.093, + "step": 802 + }, + { + "epoch": 0.08470464135021097, + "grad_norm": 0.6951941847801208, + "learning_rate": 0.001479034984222841, + "loss": 2.0726, + "step": 803 + }, + { + "epoch": 0.08481012658227848, + "grad_norm": 0.5488183498382568, + "learning_rate": 0.0014789759976390675, + "loss": 2.0395, + "step": 804 + }, + { + "epoch": 0.084915611814346, + "grad_norm": 0.7829550504684448, + "learning_rate": 0.0014789169293698988, + "loss": 2.1038, + "step": 805 + }, + { + "epoch": 0.0850210970464135, + "grad_norm": 0.6582744717597961, + "learning_rate": 0.0014788577794219533, + "loss": 2.1227, + "step": 806 + }, + { + "epoch": 0.08512658227848101, + "grad_norm": 0.703486442565918, + "learning_rate": 0.0014787985478018593, + "loss": 2.0943, + "step": 807 + }, + { + "epoch": 0.08523206751054853, + "grad_norm": 0.6249163746833801, + "learning_rate": 0.0014787392345162538, + "loss": 2.1127, + "step": 808 + }, + { + "epoch": 0.08533755274261604, + "grad_norm": 0.6208978295326233, + "learning_rate": 0.0014786798395717833, + "loss": 2.0519, + "step": 809 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.6573852300643921, + "learning_rate": 0.0014786203629751033, + "loss": 2.0713, + "step": 810 + }, + { + "epoch": 0.08554852320675105, + "grad_norm": 0.5232307314872742, + "learning_rate": 0.001478560804732878, + "loss": 2.1005, + "step": 811 + }, + { + "epoch": 0.08565400843881857, + "grad_norm": 0.6507112979888916, + "learning_rate": 0.001478501164851782, + "loss": 2.073, + "step": 812 + }, + { + "epoch": 0.08575949367088608, + "grad_norm": 0.7199704051017761, + "learning_rate": 0.0014784414433384977, + "loss": 2.1371, + "step": 813 + }, + { + "epoch": 0.08586497890295358, + "grad_norm": 0.6647977232933044, + "learning_rate": 0.0014783816401997174, + "loss": 2.1117, + "step": 814 + }, + { + "epoch": 0.0859704641350211, + "grad_norm": 0.5336772203445435, + "learning_rate": 0.0014783217554421423, + "loss": 2.0761, + "step": 815 + }, + { + "epoch": 0.08607594936708861, + "grad_norm": 0.7073361277580261, + "learning_rate": 0.0014782617890724827, + "loss": 2.0783, + "step": 816 + }, + { + "epoch": 0.08618143459915611, + "grad_norm": 0.801508367061615, + "learning_rate": 0.0014782017410974583, + "loss": 2.1027, + "step": 817 + }, + { + "epoch": 0.08628691983122364, + "grad_norm": 0.6194837093353271, + "learning_rate": 0.0014781416115237976, + "loss": 2.0991, + "step": 818 + }, + { + "epoch": 0.08639240506329114, + "grad_norm": 0.5445713996887207, + "learning_rate": 0.0014780814003582385, + "loss": 2.079, + "step": 819 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.8229538202285767, + "learning_rate": 0.0014780211076075279, + "loss": 2.0768, + "step": 820 + }, + { + "epoch": 0.08660337552742615, + "grad_norm": 0.9834152460098267, + "learning_rate": 0.001477960733278422, + "loss": 2.1002, + "step": 821 + }, + { + "epoch": 0.08670886075949367, + "grad_norm": 0.8466164469718933, + "learning_rate": 0.001477900277377686, + "loss": 2.1016, + "step": 822 + }, + { + "epoch": 0.08681434599156118, + "grad_norm": 0.6099902391433716, + "learning_rate": 0.0014778397399120942, + "loss": 2.1, + "step": 823 + }, + { + "epoch": 0.08691983122362869, + "grad_norm": 0.7636228799819946, + "learning_rate": 0.0014777791208884304, + "loss": 2.1338, + "step": 824 + }, + { + "epoch": 0.08702531645569621, + "grad_norm": 0.9618605971336365, + "learning_rate": 0.0014777184203134867, + "loss": 2.0748, + "step": 825 + }, + { + "epoch": 0.08713080168776371, + "grad_norm": 0.6632595658302307, + "learning_rate": 0.0014776576381940658, + "loss": 2.1091, + "step": 826 + }, + { + "epoch": 0.08723628691983122, + "grad_norm": 0.6982611417770386, + "learning_rate": 0.0014775967745369778, + "loss": 2.0925, + "step": 827 + }, + { + "epoch": 0.08734177215189873, + "grad_norm": 0.8749459385871887, + "learning_rate": 0.001477535829349043, + "loss": 2.0846, + "step": 828 + }, + { + "epoch": 0.08744725738396625, + "grad_norm": 0.7643752694129944, + "learning_rate": 0.0014774748026370908, + "loss": 2.083, + "step": 829 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.6725792288780212, + "learning_rate": 0.0014774136944079594, + "loss": 2.0854, + "step": 830 + }, + { + "epoch": 0.08765822784810126, + "grad_norm": 0.6900714635848999, + "learning_rate": 0.0014773525046684964, + "loss": 2.05, + "step": 831 + }, + { + "epoch": 0.08776371308016878, + "grad_norm": 0.7450143694877625, + "learning_rate": 0.0014772912334255585, + "loss": 2.0522, + "step": 832 + }, + { + "epoch": 0.08786919831223629, + "grad_norm": 0.653249204158783, + "learning_rate": 0.0014772298806860111, + "loss": 2.0657, + "step": 833 + }, + { + "epoch": 0.0879746835443038, + "grad_norm": 0.6444782614707947, + "learning_rate": 0.0014771684464567293, + "loss": 2.0394, + "step": 834 + }, + { + "epoch": 0.08808016877637131, + "grad_norm": 0.7270081639289856, + "learning_rate": 0.0014771069307445972, + "loss": 2.0998, + "step": 835 + }, + { + "epoch": 0.08818565400843882, + "grad_norm": 0.6043494939804077, + "learning_rate": 0.0014770453335565077, + "loss": 2.0711, + "step": 836 + }, + { + "epoch": 0.08829113924050633, + "grad_norm": 0.5779742002487183, + "learning_rate": 0.0014769836548993631, + "loss": 2.0854, + "step": 837 + }, + { + "epoch": 0.08839662447257383, + "grad_norm": 0.6390913724899292, + "learning_rate": 0.0014769218947800749, + "loss": 2.1117, + "step": 838 + }, + { + "epoch": 0.08850210970464135, + "grad_norm": 0.6121242046356201, + "learning_rate": 0.0014768600532055638, + "loss": 2.07, + "step": 839 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.5345577001571655, + "learning_rate": 0.0014767981301827592, + "loss": 2.087, + "step": 840 + }, + { + "epoch": 0.08871308016877637, + "grad_norm": 0.5312256217002869, + "learning_rate": 0.0014767361257186, + "loss": 2.0796, + "step": 841 + }, + { + "epoch": 0.08881856540084389, + "grad_norm": 0.720096230506897, + "learning_rate": 0.0014766740398200343, + "loss": 2.0429, + "step": 842 + }, + { + "epoch": 0.0889240506329114, + "grad_norm": 0.6575066447257996, + "learning_rate": 0.0014766118724940185, + "loss": 2.0862, + "step": 843 + }, + { + "epoch": 0.0890295358649789, + "grad_norm": 0.5495222210884094, + "learning_rate": 0.0014765496237475195, + "loss": 2.0619, + "step": 844 + }, + { + "epoch": 0.08913502109704641, + "grad_norm": 0.5025101900100708, + "learning_rate": 0.001476487293587512, + "loss": 2.082, + "step": 845 + }, + { + "epoch": 0.08924050632911393, + "grad_norm": 0.5558927059173584, + "learning_rate": 0.0014764248820209808, + "loss": 2.0953, + "step": 846 + }, + { + "epoch": 0.08934599156118143, + "grad_norm": 0.5647702813148499, + "learning_rate": 0.0014763623890549193, + "loss": 2.0905, + "step": 847 + }, + { + "epoch": 0.08945147679324894, + "grad_norm": 0.5876045823097229, + "learning_rate": 0.00147629981469633, + "loss": 2.0816, + "step": 848 + }, + { + "epoch": 0.08955696202531646, + "grad_norm": 0.5539434552192688, + "learning_rate": 0.001476237158952225, + "loss": 2.0434, + "step": 849 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.6369818449020386, + "learning_rate": 0.0014761744218296249, + "loss": 2.036, + "step": 850 + }, + { + "epoch": 0.08976793248945147, + "grad_norm": 0.676005482673645, + "learning_rate": 0.0014761116033355597, + "loss": 2.0664, + "step": 851 + }, + { + "epoch": 0.08987341772151898, + "grad_norm": 0.5349884629249573, + "learning_rate": 0.001476048703477069, + "loss": 2.0812, + "step": 852 + }, + { + "epoch": 0.0899789029535865, + "grad_norm": 0.632834255695343, + "learning_rate": 0.0014759857222612003, + "loss": 2.0512, + "step": 853 + }, + { + "epoch": 0.09008438818565401, + "grad_norm": 0.7174394130706787, + "learning_rate": 0.0014759226596950115, + "loss": 2.1106, + "step": 854 + }, + { + "epoch": 0.09018987341772151, + "grad_norm": 0.5347782373428345, + "learning_rate": 0.0014758595157855687, + "loss": 2.0686, + "step": 855 + }, + { + "epoch": 0.09029535864978903, + "grad_norm": 0.6173971891403198, + "learning_rate": 0.001475796290539948, + "loss": 2.0766, + "step": 856 + }, + { + "epoch": 0.09040084388185654, + "grad_norm": 0.7588369250297546, + "learning_rate": 0.0014757329839652335, + "loss": 2.0525, + "step": 857 + }, + { + "epoch": 0.09050632911392405, + "grad_norm": 0.917641818523407, + "learning_rate": 0.0014756695960685194, + "loss": 2.0778, + "step": 858 + }, + { + "epoch": 0.09061181434599157, + "grad_norm": 0.8581278920173645, + "learning_rate": 0.0014756061268569086, + "loss": 2.0712, + "step": 859 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.563814103603363, + "learning_rate": 0.001475542576337513, + "loss": 2.0203, + "step": 860 + }, + { + "epoch": 0.09082278481012658, + "grad_norm": 0.6342161893844604, + "learning_rate": 0.001475478944517454, + "loss": 2.078, + "step": 861 + }, + { + "epoch": 0.09092827004219409, + "grad_norm": 0.7115143537521362, + "learning_rate": 0.0014754152314038617, + "loss": 2.062, + "step": 862 + }, + { + "epoch": 0.09103375527426161, + "grad_norm": 0.5524473786354065, + "learning_rate": 0.0014753514370038753, + "loss": 2.0663, + "step": 863 + }, + { + "epoch": 0.09113924050632911, + "grad_norm": 0.6360031962394714, + "learning_rate": 0.0014752875613246435, + "loss": 2.0776, + "step": 864 + }, + { + "epoch": 0.09124472573839662, + "grad_norm": 0.5429706573486328, + "learning_rate": 0.001475223604373324, + "loss": 2.0813, + "step": 865 + }, + { + "epoch": 0.09135021097046414, + "grad_norm": 0.540732741355896, + "learning_rate": 0.0014751595661570832, + "loss": 2.0293, + "step": 866 + }, + { + "epoch": 0.09145569620253165, + "grad_norm": 0.5493524670600891, + "learning_rate": 0.001475095446683097, + "loss": 2.0639, + "step": 867 + }, + { + "epoch": 0.09156118143459915, + "grad_norm": 0.6062735915184021, + "learning_rate": 0.0014750312459585505, + "loss": 2.0983, + "step": 868 + }, + { + "epoch": 0.09166666666666666, + "grad_norm": 0.6357471942901611, + "learning_rate": 0.0014749669639906374, + "loss": 2.0288, + "step": 869 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.617594838142395, + "learning_rate": 0.001474902600786561, + "loss": 2.0786, + "step": 870 + }, + { + "epoch": 0.09187763713080169, + "grad_norm": 0.6684480309486389, + "learning_rate": 0.0014748381563535337, + "loss": 2.0655, + "step": 871 + }, + { + "epoch": 0.0919831223628692, + "grad_norm": 0.5654816031455994, + "learning_rate": 0.0014747736306987764, + "loss": 2.0566, + "step": 872 + }, + { + "epoch": 0.09208860759493671, + "grad_norm": 0.5567104816436768, + "learning_rate": 0.0014747090238295198, + "loss": 2.0533, + "step": 873 + }, + { + "epoch": 0.09219409282700422, + "grad_norm": 0.7422654628753662, + "learning_rate": 0.0014746443357530033, + "loss": 2.0628, + "step": 874 + }, + { + "epoch": 0.09229957805907173, + "grad_norm": 0.7911421060562134, + "learning_rate": 0.0014745795664764757, + "loss": 2.0459, + "step": 875 + }, + { + "epoch": 0.09240506329113925, + "grad_norm": 0.7289369106292725, + "learning_rate": 0.0014745147160071944, + "loss": 2.0745, + "step": 876 + }, + { + "epoch": 0.09251054852320675, + "grad_norm": 0.5697579979896545, + "learning_rate": 0.0014744497843524266, + "loss": 2.049, + "step": 877 + }, + { + "epoch": 0.09261603375527426, + "grad_norm": 0.5870150923728943, + "learning_rate": 0.001474384771519448, + "loss": 2.0313, + "step": 878 + }, + { + "epoch": 0.09272151898734177, + "grad_norm": 0.6678891777992249, + "learning_rate": 0.0014743196775155434, + "loss": 2.0842, + "step": 879 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.7292107343673706, + "learning_rate": 0.0014742545023480075, + "loss": 2.0651, + "step": 880 + }, + { + "epoch": 0.0929324894514768, + "grad_norm": 0.6382560133934021, + "learning_rate": 0.001474189246024143, + "loss": 2.085, + "step": 881 + }, + { + "epoch": 0.0930379746835443, + "grad_norm": 0.5485881567001343, + "learning_rate": 0.0014741239085512624, + "loss": 2.0303, + "step": 882 + }, + { + "epoch": 0.09314345991561182, + "grad_norm": 0.7323777079582214, + "learning_rate": 0.0014740584899366868, + "loss": 1.9964, + "step": 883 + }, + { + "epoch": 0.09324894514767933, + "grad_norm": 0.6251181364059448, + "learning_rate": 0.0014739929901877473, + "loss": 2.0378, + "step": 884 + }, + { + "epoch": 0.09335443037974683, + "grad_norm": 0.6687463521957397, + "learning_rate": 0.001473927409311783, + "loss": 2.0998, + "step": 885 + }, + { + "epoch": 0.09345991561181434, + "grad_norm": 0.8949375152587891, + "learning_rate": 0.0014738617473161425, + "loss": 2.0272, + "step": 886 + }, + { + "epoch": 0.09356540084388186, + "grad_norm": 1.1951814889907837, + "learning_rate": 0.0014737960042081836, + "loss": 2.0439, + "step": 887 + }, + { + "epoch": 0.09367088607594937, + "grad_norm": 0.7335267663002014, + "learning_rate": 0.0014737301799952734, + "loss": 2.0756, + "step": 888 + }, + { + "epoch": 0.09377637130801687, + "grad_norm": 0.6293990015983582, + "learning_rate": 0.001473664274684788, + "loss": 2.0489, + "step": 889 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.8667661547660828, + "learning_rate": 0.0014735982882841117, + "loss": 2.0776, + "step": 890 + }, + { + "epoch": 0.0939873417721519, + "grad_norm": 0.7777113914489746, + "learning_rate": 0.0014735322208006391, + "loss": 2.096, + "step": 891 + }, + { + "epoch": 0.0940928270042194, + "grad_norm": 0.5713672041893005, + "learning_rate": 0.0014734660722417734, + "loss": 2.0514, + "step": 892 + }, + { + "epoch": 0.09419831223628691, + "grad_norm": 0.7268725037574768, + "learning_rate": 0.0014733998426149266, + "loss": 2.0303, + "step": 893 + }, + { + "epoch": 0.09430379746835443, + "grad_norm": 0.7038925290107727, + "learning_rate": 0.0014733335319275203, + "loss": 2.0743, + "step": 894 + }, + { + "epoch": 0.09440928270042194, + "grad_norm": 0.7439193725585938, + "learning_rate": 0.001473267140186985, + "loss": 2.0248, + "step": 895 + }, + { + "epoch": 0.09451476793248945, + "grad_norm": 0.9561305046081543, + "learning_rate": 0.00147320066740076, + "loss": 2.0429, + "step": 896 + }, + { + "epoch": 0.09462025316455697, + "grad_norm": 0.8382845520973206, + "learning_rate": 0.001473134113576294, + "loss": 2.04, + "step": 897 + }, + { + "epoch": 0.09472573839662447, + "grad_norm": 0.5215100646018982, + "learning_rate": 0.0014730674787210448, + "loss": 2.0345, + "step": 898 + }, + { + "epoch": 0.09483122362869198, + "grad_norm": 0.6866390705108643, + "learning_rate": 0.0014730007628424792, + "loss": 2.0247, + "step": 899 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.69844651222229, + "learning_rate": 0.0014729339659480727, + "loss": 2.0394, + "step": 900 + }, + { + "epoch": 0.095042194092827, + "grad_norm": 0.5483406782150269, + "learning_rate": 0.0014728670880453105, + "loss": 2.0621, + "step": 901 + }, + { + "epoch": 0.09514767932489451, + "grad_norm": 0.6232728958129883, + "learning_rate": 0.0014728001291416863, + "loss": 2.0539, + "step": 902 + }, + { + "epoch": 0.09525316455696202, + "grad_norm": 0.6200667023658752, + "learning_rate": 0.001472733089244704, + "loss": 2.0429, + "step": 903 + }, + { + "epoch": 0.09535864978902954, + "grad_norm": 0.6365472078323364, + "learning_rate": 0.0014726659683618746, + "loss": 2.0414, + "step": 904 + }, + { + "epoch": 0.09546413502109705, + "grad_norm": 0.5491126179695129, + "learning_rate": 0.0014725987665007202, + "loss": 2.0285, + "step": 905 + }, + { + "epoch": 0.09556962025316455, + "grad_norm": 0.5842496156692505, + "learning_rate": 0.0014725314836687708, + "loss": 2.0589, + "step": 906 + }, + { + "epoch": 0.09567510548523207, + "grad_norm": 0.630530059337616, + "learning_rate": 0.0014724641198735659, + "loss": 2.0775, + "step": 907 + }, + { + "epoch": 0.09578059071729958, + "grad_norm": 0.6018935441970825, + "learning_rate": 0.0014723966751226535, + "loss": 2.0214, + "step": 908 + }, + { + "epoch": 0.09588607594936709, + "grad_norm": 0.5692439079284668, + "learning_rate": 0.0014723291494235916, + "loss": 2.0503, + "step": 909 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.5447186827659607, + "learning_rate": 0.0014722615427839468, + "loss": 2.0982, + "step": 910 + }, + { + "epoch": 0.09609704641350211, + "grad_norm": 0.5260642766952515, + "learning_rate": 0.0014721938552112943, + "loss": 2.0528, + "step": 911 + }, + { + "epoch": 0.09620253164556962, + "grad_norm": 0.5581090450286865, + "learning_rate": 0.0014721260867132193, + "loss": 2.0364, + "step": 912 + }, + { + "epoch": 0.09630801687763713, + "grad_norm": 0.5775792002677917, + "learning_rate": 0.0014720582372973155, + "loss": 2.0329, + "step": 913 + }, + { + "epoch": 0.09641350210970465, + "grad_norm": 0.5709571242332458, + "learning_rate": 0.0014719903069711857, + "loss": 2.0536, + "step": 914 + }, + { + "epoch": 0.09651898734177215, + "grad_norm": 0.7929944396018982, + "learning_rate": 0.0014719222957424417, + "loss": 2.0845, + "step": 915 + }, + { + "epoch": 0.09662447257383966, + "grad_norm": 0.830448567867279, + "learning_rate": 0.0014718542036187049, + "loss": 2.0559, + "step": 916 + }, + { + "epoch": 0.09672995780590718, + "grad_norm": 0.8288160562515259, + "learning_rate": 0.0014717860306076049, + "loss": 2.0238, + "step": 917 + }, + { + "epoch": 0.09683544303797469, + "grad_norm": 0.6868204474449158, + "learning_rate": 0.0014717177767167812, + "loss": 2.0241, + "step": 918 + }, + { + "epoch": 0.09694092827004219, + "grad_norm": 0.5395604968070984, + "learning_rate": 0.0014716494419538815, + "loss": 2.0589, + "step": 919 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.839026689529419, + "learning_rate": 0.0014715810263265633, + "loss": 1.9939, + "step": 920 + }, + { + "epoch": 0.09715189873417722, + "grad_norm": 0.8506187200546265, + "learning_rate": 0.0014715125298424934, + "loss": 2.0687, + "step": 921 + }, + { + "epoch": 0.09725738396624473, + "grad_norm": 0.6252387166023254, + "learning_rate": 0.0014714439525093466, + "loss": 2.0363, + "step": 922 + }, + { + "epoch": 0.09736286919831223, + "grad_norm": 0.5949407815933228, + "learning_rate": 0.0014713752943348074, + "loss": 2.0092, + "step": 923 + }, + { + "epoch": 0.09746835443037975, + "grad_norm": 0.6182016730308533, + "learning_rate": 0.0014713065553265694, + "loss": 2.0449, + "step": 924 + }, + { + "epoch": 0.09757383966244726, + "grad_norm": 0.7706696391105652, + "learning_rate": 0.001471237735492335, + "loss": 2.0143, + "step": 925 + }, + { + "epoch": 0.09767932489451477, + "grad_norm": 0.6151801347732544, + "learning_rate": 0.0014711688348398161, + "loss": 2.033, + "step": 926 + }, + { + "epoch": 0.09778481012658227, + "grad_norm": 0.5530830025672913, + "learning_rate": 0.001471099853376733, + "loss": 2.0381, + "step": 927 + }, + { + "epoch": 0.09789029535864979, + "grad_norm": 0.952257513999939, + "learning_rate": 0.0014710307911108159, + "loss": 1.9856, + "step": 928 + }, + { + "epoch": 0.0979957805907173, + "grad_norm": 1.3985097408294678, + "learning_rate": 0.0014709616480498029, + "loss": 2.0462, + "step": 929 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.5738630294799805, + "learning_rate": 0.0014708924242014423, + "loss": 2.0314, + "step": 930 + }, + { + "epoch": 0.09820675105485233, + "grad_norm": 1.1997510194778442, + "learning_rate": 0.001470823119573491, + "loss": 2.0149, + "step": 931 + }, + { + "epoch": 0.09831223628691983, + "grad_norm": 1.1724603176116943, + "learning_rate": 0.0014707537341737149, + "loss": 2.0767, + "step": 932 + }, + { + "epoch": 0.09841772151898734, + "grad_norm": 0.5761026740074158, + "learning_rate": 0.0014706842680098887, + "loss": 2.0472, + "step": 933 + }, + { + "epoch": 0.09852320675105486, + "grad_norm": 1.0002801418304443, + "learning_rate": 0.0014706147210897967, + "loss": 2.0516, + "step": 934 + }, + { + "epoch": 0.09862869198312237, + "grad_norm": 1.1106265783309937, + "learning_rate": 0.0014705450934212317, + "loss": 2.0424, + "step": 935 + }, + { + "epoch": 0.09873417721518987, + "grad_norm": 0.6157057881355286, + "learning_rate": 0.0014704753850119962, + "loss": 2.0156, + "step": 936 + }, + { + "epoch": 0.09883966244725738, + "grad_norm": 0.6936944723129272, + "learning_rate": 0.001470405595869901, + "loss": 2.0454, + "step": 937 + }, + { + "epoch": 0.0989451476793249, + "grad_norm": 0.7377538681030273, + "learning_rate": 0.0014703357260027667, + "loss": 2.0235, + "step": 938 + }, + { + "epoch": 0.0990506329113924, + "grad_norm": 0.6870999932289124, + "learning_rate": 0.0014702657754184225, + "loss": 2.0361, + "step": 939 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.6371002197265625, + "learning_rate": 0.0014701957441247064, + "loss": 2.0235, + "step": 940 + }, + { + "epoch": 0.09926160337552743, + "grad_norm": 0.6130989193916321, + "learning_rate": 0.001470125632129466, + "loss": 2.0026, + "step": 941 + }, + { + "epoch": 0.09936708860759494, + "grad_norm": 0.5907646417617798, + "learning_rate": 0.0014700554394405576, + "loss": 2.044, + "step": 942 + }, + { + "epoch": 0.09947257383966245, + "grad_norm": 0.5403623580932617, + "learning_rate": 0.0014699851660658469, + "loss": 1.9988, + "step": 943 + }, + { + "epoch": 0.09957805907172995, + "grad_norm": 0.6045404672622681, + "learning_rate": 0.0014699148120132079, + "loss": 1.9838, + "step": 944 + }, + { + "epoch": 0.09968354430379747, + "grad_norm": 0.660070538520813, + "learning_rate": 0.0014698443772905247, + "loss": 2.0113, + "step": 945 + }, + { + "epoch": 0.09978902953586498, + "grad_norm": 0.6706902384757996, + "learning_rate": 0.0014697738619056891, + "loss": 2.0227, + "step": 946 + }, + { + "epoch": 0.09989451476793249, + "grad_norm": 0.5888060927391052, + "learning_rate": 0.0014697032658666036, + "loss": 1.9757, + "step": 947 + }, + { + "epoch": 0.1, + "grad_norm": 0.7204238772392273, + "learning_rate": 0.001469632589181178, + "loss": 2.0452, + "step": 948 + }, + { + "epoch": 0.10010548523206751, + "grad_norm": 0.9617745876312256, + "learning_rate": 0.0014695618318573327, + "loss": 2.0104, + "step": 949 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.8661434650421143, + "learning_rate": 0.0014694909939029959, + "loss": 2.0323, + "step": 950 + }, + { + "epoch": 0.10031645569620253, + "grad_norm": 0.6151024103164673, + "learning_rate": 0.0014694200753261057, + "loss": 2.0451, + "step": 951 + }, + { + "epoch": 0.10042194092827005, + "grad_norm": 1.0312453508377075, + "learning_rate": 0.0014693490761346086, + "loss": 2.029, + "step": 952 + }, + { + "epoch": 0.10052742616033755, + "grad_norm": 1.1266227960586548, + "learning_rate": 0.0014692779963364606, + "loss": 2.05, + "step": 953 + }, + { + "epoch": 0.10063291139240506, + "grad_norm": 0.5612419843673706, + "learning_rate": 0.0014692068359396264, + "loss": 2.0315, + "step": 954 + }, + { + "epoch": 0.10073839662447258, + "grad_norm": 0.8607825040817261, + "learning_rate": 0.00146913559495208, + "loss": 2.0361, + "step": 955 + }, + { + "epoch": 0.10084388185654009, + "grad_norm": 0.8217899203300476, + "learning_rate": 0.001469064273381804, + "loss": 2.0172, + "step": 956 + }, + { + "epoch": 0.10094936708860759, + "grad_norm": 0.5320350527763367, + "learning_rate": 0.0014689928712367907, + "loss": 2.001, + "step": 957 + }, + { + "epoch": 0.10105485232067511, + "grad_norm": 0.7110685110092163, + "learning_rate": 0.0014689213885250411, + "loss": 2.0413, + "step": 958 + }, + { + "epoch": 0.10116033755274262, + "grad_norm": 0.7548103928565979, + "learning_rate": 0.001468849825254565, + "loss": 1.988, + "step": 959 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.5269470810890198, + "learning_rate": 0.0014687781814333814, + "loss": 2.0075, + "step": 960 + }, + { + "epoch": 0.10137130801687763, + "grad_norm": 0.797202467918396, + "learning_rate": 0.0014687064570695185, + "loss": 2.028, + "step": 961 + }, + { + "epoch": 0.10147679324894515, + "grad_norm": 0.9065843820571899, + "learning_rate": 0.0014686346521710133, + "loss": 2.0039, + "step": 962 + }, + { + "epoch": 0.10158227848101266, + "grad_norm": 0.5905656814575195, + "learning_rate": 0.0014685627667459118, + "loss": 2.0569, + "step": 963 + }, + { + "epoch": 0.10168776371308016, + "grad_norm": 0.7084620594978333, + "learning_rate": 0.0014684908008022694, + "loss": 2.0114, + "step": 964 + }, + { + "epoch": 0.10179324894514769, + "grad_norm": 0.6697912216186523, + "learning_rate": 0.00146841875434815, + "loss": 2.0598, + "step": 965 + }, + { + "epoch": 0.10189873417721519, + "grad_norm": 0.6458673477172852, + "learning_rate": 0.0014683466273916266, + "loss": 1.9883, + "step": 966 + }, + { + "epoch": 0.1020042194092827, + "grad_norm": 0.6990525126457214, + "learning_rate": 0.0014682744199407817, + "loss": 2.0182, + "step": 967 + }, + { + "epoch": 0.1021097046413502, + "grad_norm": 0.680051863193512, + "learning_rate": 0.0014682021320037064, + "loss": 2.0553, + "step": 968 + }, + { + "epoch": 0.10221518987341772, + "grad_norm": 0.5545700788497925, + "learning_rate": 0.0014681297635885011, + "loss": 2.0039, + "step": 969 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.9169774055480957, + "learning_rate": 0.0014680573147032746, + "loss": 2.0201, + "step": 970 + }, + { + "epoch": 0.10242616033755274, + "grad_norm": 0.9501149654388428, + "learning_rate": 0.0014679847853561457, + "loss": 1.9967, + "step": 971 + }, + { + "epoch": 0.10253164556962026, + "grad_norm": 0.5792279243469238, + "learning_rate": 0.0014679121755552412, + "loss": 1.9928, + "step": 972 + }, + { + "epoch": 0.10263713080168776, + "grad_norm": 0.818527102470398, + "learning_rate": 0.0014678394853086976, + "loss": 2.0026, + "step": 973 + }, + { + "epoch": 0.10274261603375527, + "grad_norm": 0.7883652448654175, + "learning_rate": 0.0014677667146246604, + "loss": 2.0173, + "step": 974 + }, + { + "epoch": 0.10284810126582279, + "grad_norm": 0.5701575875282288, + "learning_rate": 0.0014676938635112835, + "loss": 1.9973, + "step": 975 + }, + { + "epoch": 0.1029535864978903, + "grad_norm": 0.6611241102218628, + "learning_rate": 0.0014676209319767306, + "loss": 1.9969, + "step": 976 + }, + { + "epoch": 0.1030590717299578, + "grad_norm": 0.8530029654502869, + "learning_rate": 0.0014675479200291738, + "loss": 2.0118, + "step": 977 + }, + { + "epoch": 0.10316455696202531, + "grad_norm": 0.7846843600273132, + "learning_rate": 0.0014674748276767944, + "loss": 1.9996, + "step": 978 + }, + { + "epoch": 0.10327004219409283, + "grad_norm": 0.6311259269714355, + "learning_rate": 0.0014674016549277831, + "loss": 1.9938, + "step": 979 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.8879949450492859, + "learning_rate": 0.0014673284017903392, + "loss": 2.0342, + "step": 980 + }, + { + "epoch": 0.10348101265822784, + "grad_norm": 0.7608528733253479, + "learning_rate": 0.001467255068272671, + "loss": 2.0183, + "step": 981 + }, + { + "epoch": 0.10358649789029536, + "grad_norm": 0.655994713306427, + "learning_rate": 0.0014671816543829954, + "loss": 2.0315, + "step": 982 + }, + { + "epoch": 0.10369198312236287, + "grad_norm": 1.0171102285385132, + "learning_rate": 0.0014671081601295394, + "loss": 1.9934, + "step": 983 + }, + { + "epoch": 0.10379746835443038, + "grad_norm": 0.6392386555671692, + "learning_rate": 0.0014670345855205384, + "loss": 1.9942, + "step": 984 + }, + { + "epoch": 0.10390295358649788, + "grad_norm": 0.828840970993042, + "learning_rate": 0.0014669609305642366, + "loss": 2.0047, + "step": 985 + }, + { + "epoch": 0.1040084388185654, + "grad_norm": 0.8618424534797668, + "learning_rate": 0.0014668871952688873, + "loss": 2.005, + "step": 986 + }, + { + "epoch": 0.10411392405063291, + "grad_norm": 0.6648793816566467, + "learning_rate": 0.0014668133796427532, + "loss": 2.0293, + "step": 987 + }, + { + "epoch": 0.10421940928270042, + "grad_norm": 0.6300562620162964, + "learning_rate": 0.0014667394836941055, + "loss": 1.9967, + "step": 988 + }, + { + "epoch": 0.10432489451476794, + "grad_norm": 0.7003135085105896, + "learning_rate": 0.0014666655074312247, + "loss": 1.98, + "step": 989 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.6503307819366455, + "learning_rate": 0.0014665914508624, + "loss": 1.9997, + "step": 990 + }, + { + "epoch": 0.10453586497890295, + "grad_norm": 0.6096027493476868, + "learning_rate": 0.0014665173139959305, + "loss": 2.0747, + "step": 991 + }, + { + "epoch": 0.10464135021097046, + "grad_norm": 0.861186683177948, + "learning_rate": 0.0014664430968401225, + "loss": 1.9998, + "step": 992 + }, + { + "epoch": 0.10474683544303798, + "grad_norm": 0.7677274942398071, + "learning_rate": 0.0014663687994032931, + "loss": 1.9861, + "step": 993 + }, + { + "epoch": 0.10485232067510548, + "grad_norm": 0.7285496592521667, + "learning_rate": 0.0014662944216937677, + "loss": 2.0302, + "step": 994 + }, + { + "epoch": 0.10495780590717299, + "grad_norm": 0.8189219832420349, + "learning_rate": 0.0014662199637198807, + "loss": 1.9817, + "step": 995 + }, + { + "epoch": 0.10506329113924051, + "grad_norm": 0.5252412557601929, + "learning_rate": 0.0014661454254899754, + "loss": 2.0097, + "step": 996 + }, + { + "epoch": 0.10516877637130802, + "grad_norm": 0.685366153717041, + "learning_rate": 0.0014660708070124038, + "loss": 1.983, + "step": 997 + }, + { + "epoch": 0.10527426160337552, + "grad_norm": 0.5246286988258362, + "learning_rate": 0.0014659961082955277, + "loss": 2.0246, + "step": 998 + }, + { + "epoch": 0.10537974683544304, + "grad_norm": 0.7138487100601196, + "learning_rate": 0.0014659213293477177, + "loss": 2.0302, + "step": 999 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.6618337631225586, + "learning_rate": 0.0014658464701773526, + "loss": 2.0351, + "step": 1000 + }, + { + "epoch": 0.10559071729957806, + "grad_norm": 0.525221586227417, + "learning_rate": 0.0014657715307928212, + "loss": 1.9807, + "step": 1001 + }, + { + "epoch": 0.10569620253164556, + "grad_norm": 0.8338286280632019, + "learning_rate": 0.0014656965112025203, + "loss": 1.9917, + "step": 1002 + }, + { + "epoch": 0.10580168776371308, + "grad_norm": 0.8605924248695374, + "learning_rate": 0.0014656214114148567, + "loss": 2.0443, + "step": 1003 + }, + { + "epoch": 0.10590717299578059, + "grad_norm": 0.6674926280975342, + "learning_rate": 0.0014655462314382456, + "loss": 1.9613, + "step": 1004 + }, + { + "epoch": 0.1060126582278481, + "grad_norm": 1.0160744190216064, + "learning_rate": 0.0014654709712811113, + "loss": 2.0006, + "step": 1005 + }, + { + "epoch": 0.10611814345991562, + "grad_norm": 1.186792016029358, + "learning_rate": 0.0014653956309518866, + "loss": 2.0024, + "step": 1006 + }, + { + "epoch": 0.10622362869198312, + "grad_norm": 0.6002411246299744, + "learning_rate": 0.0014653202104590146, + "loss": 2.0356, + "step": 1007 + }, + { + "epoch": 0.10632911392405063, + "grad_norm": 1.059224009513855, + "learning_rate": 0.0014652447098109458, + "loss": 1.9985, + "step": 1008 + }, + { + "epoch": 0.10643459915611814, + "grad_norm": 0.7853568196296692, + "learning_rate": 0.001465169129016141, + "loss": 2.0166, + "step": 1009 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.7165223360061646, + "learning_rate": 0.0014650934680830688, + "loss": 2.0151, + "step": 1010 + }, + { + "epoch": 0.10664556962025316, + "grad_norm": 0.899348795413971, + "learning_rate": 0.001465017727020208, + "loss": 1.9867, + "step": 1011 + }, + { + "epoch": 0.10675105485232067, + "grad_norm": 0.5951515436172485, + "learning_rate": 0.0014649419058360455, + "loss": 1.9733, + "step": 1012 + }, + { + "epoch": 0.10685654008438819, + "grad_norm": 0.8406158685684204, + "learning_rate": 0.0014648660045390772, + "loss": 1.9555, + "step": 1013 + }, + { + "epoch": 0.1069620253164557, + "grad_norm": 0.7781445980072021, + "learning_rate": 0.0014647900231378086, + "loss": 2.0117, + "step": 1014 + }, + { + "epoch": 0.1070675105485232, + "grad_norm": 0.6169943809509277, + "learning_rate": 0.0014647139616407539, + "loss": 1.9839, + "step": 1015 + }, + { + "epoch": 0.10717299578059072, + "grad_norm": 0.6656562089920044, + "learning_rate": 0.0014646378200564355, + "loss": 1.9475, + "step": 1016 + }, + { + "epoch": 0.10727848101265823, + "grad_norm": 0.7009028196334839, + "learning_rate": 0.001464561598393386, + "loss": 2.0196, + "step": 1017 + }, + { + "epoch": 0.10738396624472574, + "grad_norm": 0.6019009947776794, + "learning_rate": 0.0014644852966601463, + "loss": 2.0291, + "step": 1018 + }, + { + "epoch": 0.10748945147679324, + "grad_norm": 0.9432244896888733, + "learning_rate": 0.0014644089148652664, + "loss": 2.0695, + "step": 1019 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.9771539568901062, + "learning_rate": 0.0014643324530173051, + "loss": 1.9862, + "step": 1020 + }, + { + "epoch": 0.10770042194092827, + "grad_norm": 0.6675443649291992, + "learning_rate": 0.0014642559111248306, + "loss": 1.9885, + "step": 1021 + }, + { + "epoch": 0.10780590717299578, + "grad_norm": 0.8472841382026672, + "learning_rate": 0.0014641792891964195, + "loss": 1.9975, + "step": 1022 + }, + { + "epoch": 0.1079113924050633, + "grad_norm": 0.8012048006057739, + "learning_rate": 0.0014641025872406581, + "loss": 1.9709, + "step": 1023 + }, + { + "epoch": 0.1080168776371308, + "grad_norm": 0.7737429141998291, + "learning_rate": 0.0014640258052661405, + "loss": 2.001, + "step": 1024 + }, + { + "epoch": 0.10812236286919831, + "grad_norm": 0.8442005515098572, + "learning_rate": 0.0014639489432814712, + "loss": 2.0258, + "step": 1025 + }, + { + "epoch": 0.10822784810126582, + "grad_norm": 0.767215371131897, + "learning_rate": 0.001463872001295263, + "loss": 1.9693, + "step": 1026 + }, + { + "epoch": 0.10833333333333334, + "grad_norm": 0.8036083579063416, + "learning_rate": 0.0014637949793161371, + "loss": 2.0206, + "step": 1027 + }, + { + "epoch": 0.10843881856540084, + "grad_norm": 0.6758812665939331, + "learning_rate": 0.0014637178773527246, + "loss": 1.971, + "step": 1028 + }, + { + "epoch": 0.10854430379746835, + "grad_norm": 0.6905952095985413, + "learning_rate": 0.001463640695413665, + "loss": 2.0122, + "step": 1029 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.816936194896698, + "learning_rate": 0.0014635634335076067, + "loss": 2.0016, + "step": 1030 + }, + { + "epoch": 0.10875527426160338, + "grad_norm": 0.9530200362205505, + "learning_rate": 0.0014634860916432077, + "loss": 1.9844, + "step": 1031 + }, + { + "epoch": 0.10886075949367088, + "grad_norm": 0.6399800181388855, + "learning_rate": 0.0014634086698291345, + "loss": 1.9514, + "step": 1032 + }, + { + "epoch": 0.10896624472573839, + "grad_norm": 0.6881306171417236, + "learning_rate": 0.0014633311680740625, + "loss": 1.9902, + "step": 1033 + }, + { + "epoch": 0.10907172995780591, + "grad_norm": 0.6452796459197998, + "learning_rate": 0.0014632535863866756, + "loss": 2.0307, + "step": 1034 + }, + { + "epoch": 0.10917721518987342, + "grad_norm": 0.5692335963249207, + "learning_rate": 0.0014631759247756683, + "loss": 2.0183, + "step": 1035 + }, + { + "epoch": 0.10928270042194092, + "grad_norm": 0.7163100242614746, + "learning_rate": 0.0014630981832497421, + "loss": 1.9705, + "step": 1036 + }, + { + "epoch": 0.10938818565400844, + "grad_norm": 0.5267016291618347, + "learning_rate": 0.0014630203618176088, + "loss": 1.9805, + "step": 1037 + }, + { + "epoch": 0.10949367088607595, + "grad_norm": 0.8115071058273315, + "learning_rate": 0.0014629424604879885, + "loss": 2.0207, + "step": 1038 + }, + { + "epoch": 0.10959915611814346, + "grad_norm": 0.7617741823196411, + "learning_rate": 0.0014628644792696105, + "loss": 1.9801, + "step": 1039 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.5937676429748535, + "learning_rate": 0.001462786418171213, + "loss": 1.9917, + "step": 1040 + }, + { + "epoch": 0.10981012658227848, + "grad_norm": 0.7317233085632324, + "learning_rate": 0.0014627082772015428, + "loss": 1.9984, + "step": 1041 + }, + { + "epoch": 0.10991561181434599, + "grad_norm": 0.8161544799804688, + "learning_rate": 0.0014626300563693566, + "loss": 1.9974, + "step": 1042 + }, + { + "epoch": 0.1100210970464135, + "grad_norm": 0.6424931883811951, + "learning_rate": 0.0014625517556834187, + "loss": 1.983, + "step": 1043 + }, + { + "epoch": 0.11012658227848102, + "grad_norm": 0.5537563562393188, + "learning_rate": 0.0014624733751525036, + "loss": 1.9704, + "step": 1044 + }, + { + "epoch": 0.11023206751054852, + "grad_norm": 0.5770753026008606, + "learning_rate": 0.001462394914785394, + "loss": 1.993, + "step": 1045 + }, + { + "epoch": 0.11033755274261603, + "grad_norm": 0.5275008082389832, + "learning_rate": 0.0014623163745908821, + "loss": 2.0092, + "step": 1046 + }, + { + "epoch": 0.11044303797468355, + "grad_norm": 0.6184133887290955, + "learning_rate": 0.0014622377545777687, + "loss": 1.9895, + "step": 1047 + }, + { + "epoch": 0.11054852320675106, + "grad_norm": 0.5743455290794373, + "learning_rate": 0.001462159054754863, + "loss": 1.9876, + "step": 1048 + }, + { + "epoch": 0.11065400843881856, + "grad_norm": 0.7211107015609741, + "learning_rate": 0.0014620802751309841, + "loss": 1.9738, + "step": 1049 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.8780311942100525, + "learning_rate": 0.0014620014157149597, + "loss": 2.0141, + "step": 1050 + }, + { + "epoch": 0.11086497890295359, + "grad_norm": 0.6032792925834656, + "learning_rate": 0.0014619224765156263, + "loss": 1.9762, + "step": 1051 + }, + { + "epoch": 0.1109704641350211, + "grad_norm": 0.6896377801895142, + "learning_rate": 0.0014618434575418293, + "loss": 2.0084, + "step": 1052 + }, + { + "epoch": 0.1110759493670886, + "grad_norm": 1.0655765533447266, + "learning_rate": 0.0014617643588024237, + "loss": 2.0073, + "step": 1053 + }, + { + "epoch": 0.11118143459915612, + "grad_norm": 0.6450295448303223, + "learning_rate": 0.001461685180306272, + "loss": 1.9926, + "step": 1054 + }, + { + "epoch": 0.11128691983122363, + "grad_norm": 0.7640180587768555, + "learning_rate": 0.0014616059220622475, + "loss": 1.9664, + "step": 1055 + }, + { + "epoch": 0.11139240506329114, + "grad_norm": 1.0277851819992065, + "learning_rate": 0.0014615265840792308, + "loss": 1.9634, + "step": 1056 + }, + { + "epoch": 0.11149789029535866, + "grad_norm": 0.6522850394248962, + "learning_rate": 0.0014614471663661123, + "loss": 1.9896, + "step": 1057 + }, + { + "epoch": 0.11160337552742616, + "grad_norm": 0.6861599683761597, + "learning_rate": 0.0014613676689317916, + "loss": 1.9796, + "step": 1058 + }, + { + "epoch": 0.11170886075949367, + "grad_norm": 0.6764658689498901, + "learning_rate": 0.001461288091785176, + "loss": 1.956, + "step": 1059 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.6308872699737549, + "learning_rate": 0.001461208434935183, + "loss": 1.9783, + "step": 1060 + }, + { + "epoch": 0.1119198312236287, + "grad_norm": 0.5949121713638306, + "learning_rate": 0.0014611286983907384, + "loss": 2.0119, + "step": 1061 + }, + { + "epoch": 0.1120253164556962, + "grad_norm": 0.5305895209312439, + "learning_rate": 0.0014610488821607775, + "loss": 1.9405, + "step": 1062 + }, + { + "epoch": 0.11213080168776371, + "grad_norm": 0.6711398959159851, + "learning_rate": 0.0014609689862542434, + "loss": 1.9883, + "step": 1063 + }, + { + "epoch": 0.11223628691983123, + "grad_norm": 0.5801088213920593, + "learning_rate": 0.0014608890106800893, + "loss": 1.955, + "step": 1064 + }, + { + "epoch": 0.11234177215189874, + "grad_norm": 0.6389816999435425, + "learning_rate": 0.0014608089554472767, + "loss": 1.9629, + "step": 1065 + }, + { + "epoch": 0.11244725738396624, + "grad_norm": 0.5528869032859802, + "learning_rate": 0.0014607288205647762, + "loss": 1.9475, + "step": 1066 + }, + { + "epoch": 0.11255274261603375, + "grad_norm": 0.5154584646224976, + "learning_rate": 0.0014606486060415673, + "loss": 1.9735, + "step": 1067 + }, + { + "epoch": 0.11265822784810127, + "grad_norm": 0.5518656373023987, + "learning_rate": 0.0014605683118866387, + "loss": 1.9437, + "step": 1068 + }, + { + "epoch": 0.11276371308016878, + "grad_norm": 0.6206042766571045, + "learning_rate": 0.0014604879381089873, + "loss": 1.9722, + "step": 1069 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.7181331515312195, + "learning_rate": 0.0014604074847176197, + "loss": 1.9892, + "step": 1070 + }, + { + "epoch": 0.1129746835443038, + "grad_norm": 0.636681318283081, + "learning_rate": 0.0014603269517215512, + "loss": 1.9378, + "step": 1071 + }, + { + "epoch": 0.11308016877637131, + "grad_norm": 0.5426703095436096, + "learning_rate": 0.0014602463391298055, + "loss": 2.0061, + "step": 1072 + }, + { + "epoch": 0.11318565400843882, + "grad_norm": 0.6029060482978821, + "learning_rate": 0.0014601656469514159, + "loss": 1.9515, + "step": 1073 + }, + { + "epoch": 0.11329113924050632, + "grad_norm": 0.6451303362846375, + "learning_rate": 0.0014600848751954248, + "loss": 1.9453, + "step": 1074 + }, + { + "epoch": 0.11339662447257384, + "grad_norm": 0.514620304107666, + "learning_rate": 0.001460004023870882, + "loss": 1.9777, + "step": 1075 + }, + { + "epoch": 0.11350210970464135, + "grad_norm": 0.5516576170921326, + "learning_rate": 0.0014599230929868482, + "loss": 2.0124, + "step": 1076 + }, + { + "epoch": 0.11360759493670886, + "grad_norm": 0.5613174438476562, + "learning_rate": 0.0014598420825523918, + "loss": 1.983, + "step": 1077 + }, + { + "epoch": 0.11371308016877638, + "grad_norm": 0.5994141101837158, + "learning_rate": 0.0014597609925765906, + "loss": 1.9438, + "step": 1078 + }, + { + "epoch": 0.11381856540084388, + "grad_norm": 0.5371135473251343, + "learning_rate": 0.0014596798230685308, + "loss": 1.9845, + "step": 1079 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.589935839176178, + "learning_rate": 0.0014595985740373082, + "loss": 1.9512, + "step": 1080 + }, + { + "epoch": 0.11402953586497891, + "grad_norm": 0.5591606497764587, + "learning_rate": 0.001459517245492027, + "loss": 1.9739, + "step": 1081 + }, + { + "epoch": 0.11413502109704642, + "grad_norm": 0.5454668998718262, + "learning_rate": 0.0014594358374418004, + "loss": 1.9921, + "step": 1082 + }, + { + "epoch": 0.11424050632911392, + "grad_norm": 0.577911913394928, + "learning_rate": 0.0014593543498957506, + "loss": 1.9636, + "step": 1083 + }, + { + "epoch": 0.11434599156118143, + "grad_norm": 0.5535557270050049, + "learning_rate": 0.0014592727828630088, + "loss": 1.978, + "step": 1084 + }, + { + "epoch": 0.11445147679324895, + "grad_norm": 0.547998309135437, + "learning_rate": 0.001459191136352715, + "loss": 1.9455, + "step": 1085 + }, + { + "epoch": 0.11455696202531646, + "grad_norm": 0.67669278383255, + "learning_rate": 0.0014591094103740179, + "loss": 1.9394, + "step": 1086 + }, + { + "epoch": 0.11466244725738396, + "grad_norm": 0.7145247459411621, + "learning_rate": 0.0014590276049360755, + "loss": 2.0213, + "step": 1087 + }, + { + "epoch": 0.11476793248945148, + "grad_norm": 0.6789604425430298, + "learning_rate": 0.0014589457200480543, + "loss": 1.9925, + "step": 1088 + }, + { + "epoch": 0.11487341772151899, + "grad_norm": 0.8554436564445496, + "learning_rate": 0.0014588637557191302, + "loss": 2.0083, + "step": 1089 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.8873697519302368, + "learning_rate": 0.0014587817119584873, + "loss": 1.9775, + "step": 1090 + }, + { + "epoch": 0.115084388185654, + "grad_norm": 0.6394447088241577, + "learning_rate": 0.0014586995887753197, + "loss": 1.9299, + "step": 1091 + }, + { + "epoch": 0.11518987341772152, + "grad_norm": 0.8196018934249878, + "learning_rate": 0.001458617386178829, + "loss": 1.9485, + "step": 1092 + }, + { + "epoch": 0.11529535864978903, + "grad_norm": 0.8777449131011963, + "learning_rate": 0.001458535104178227, + "loss": 1.9488, + "step": 1093 + }, + { + "epoch": 0.11540084388185654, + "grad_norm": 0.6937008500099182, + "learning_rate": 0.001458452742782733, + "loss": 1.9822, + "step": 1094 + }, + { + "epoch": 0.11550632911392406, + "grad_norm": 0.828104555606842, + "learning_rate": 0.0014583703020015768, + "loss": 1.9753, + "step": 1095 + }, + { + "epoch": 0.11561181434599156, + "grad_norm": 0.7213875651359558, + "learning_rate": 0.001458287781843996, + "loss": 1.9914, + "step": 1096 + }, + { + "epoch": 0.11571729957805907, + "grad_norm": 0.7398901581764221, + "learning_rate": 0.0014582051823192374, + "loss": 1.9683, + "step": 1097 + }, + { + "epoch": 0.11582278481012659, + "grad_norm": 0.7411930561065674, + "learning_rate": 0.0014581225034365564, + "loss": 1.9953, + "step": 1098 + }, + { + "epoch": 0.1159282700421941, + "grad_norm": 0.5657179355621338, + "learning_rate": 0.0014580397452052182, + "loss": 1.9583, + "step": 1099 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.6025940179824829, + "learning_rate": 0.001457956907634496, + "loss": 1.9636, + "step": 1100 + }, + { + "epoch": 0.11613924050632911, + "grad_norm": 0.5842971801757812, + "learning_rate": 0.001457873990733672, + "loss": 1.9661, + "step": 1101 + }, + { + "epoch": 0.11624472573839663, + "grad_norm": 0.5629121661186218, + "learning_rate": 0.0014577909945120376, + "loss": 1.9435, + "step": 1102 + }, + { + "epoch": 0.11635021097046414, + "grad_norm": 0.7208744883537292, + "learning_rate": 0.001457707918978893, + "loss": 1.9819, + "step": 1103 + }, + { + "epoch": 0.11645569620253164, + "grad_norm": 0.522457480430603, + "learning_rate": 0.0014576247641435469, + "loss": 1.973, + "step": 1104 + }, + { + "epoch": 0.11656118143459916, + "grad_norm": 0.5819994211196899, + "learning_rate": 0.0014575415300153174, + "loss": 1.9962, + "step": 1105 + }, + { + "epoch": 0.11666666666666667, + "grad_norm": 0.5715758204460144, + "learning_rate": 0.0014574582166035314, + "loss": 1.9686, + "step": 1106 + }, + { + "epoch": 0.11677215189873418, + "grad_norm": 0.5573920011520386, + "learning_rate": 0.0014573748239175247, + "loss": 1.9633, + "step": 1107 + }, + { + "epoch": 0.11687763713080168, + "grad_norm": 0.6143296360969543, + "learning_rate": 0.0014572913519666417, + "loss": 1.9571, + "step": 1108 + }, + { + "epoch": 0.1169831223628692, + "grad_norm": 0.5188027620315552, + "learning_rate": 0.0014572078007602355, + "loss": 1.9881, + "step": 1109 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.504709780216217, + "learning_rate": 0.0014571241703076692, + "loss": 1.9499, + "step": 1110 + }, + { + "epoch": 0.11719409282700421, + "grad_norm": 0.5780119299888611, + "learning_rate": 0.0014570404606183132, + "loss": 1.989, + "step": 1111 + }, + { + "epoch": 0.11729957805907174, + "grad_norm": 0.6306459307670593, + "learning_rate": 0.0014569566717015483, + "loss": 1.9386, + "step": 1112 + }, + { + "epoch": 0.11740506329113924, + "grad_norm": 0.6115565299987793, + "learning_rate": 0.0014568728035667627, + "loss": 1.982, + "step": 1113 + }, + { + "epoch": 0.11751054852320675, + "grad_norm": 0.6621360778808594, + "learning_rate": 0.001456788856223355, + "loss": 1.9369, + "step": 1114 + }, + { + "epoch": 0.11761603375527427, + "grad_norm": 0.6500293016433716, + "learning_rate": 0.0014567048296807315, + "loss": 1.9905, + "step": 1115 + }, + { + "epoch": 0.11772151898734177, + "grad_norm": 0.6201908588409424, + "learning_rate": 0.0014566207239483078, + "loss": 1.9574, + "step": 1116 + }, + { + "epoch": 0.11782700421940928, + "grad_norm": 0.7550560235977173, + "learning_rate": 0.0014565365390355087, + "loss": 1.958, + "step": 1117 + }, + { + "epoch": 0.11793248945147679, + "grad_norm": 0.74176025390625, + "learning_rate": 0.001456452274951767, + "loss": 1.986, + "step": 1118 + }, + { + "epoch": 0.11803797468354431, + "grad_norm": 0.5764637589454651, + "learning_rate": 0.0014563679317065254, + "loss": 1.9493, + "step": 1119 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.5881341695785522, + "learning_rate": 0.0014562835093092348, + "loss": 1.9218, + "step": 1120 + }, + { + "epoch": 0.11824894514767932, + "grad_norm": 0.5485637187957764, + "learning_rate": 0.0014561990077693553, + "loss": 1.9211, + "step": 1121 + }, + { + "epoch": 0.11835443037974684, + "grad_norm": 0.6253213286399841, + "learning_rate": 0.0014561144270963551, + "loss": 1.9687, + "step": 1122 + }, + { + "epoch": 0.11845991561181435, + "grad_norm": 0.7547743320465088, + "learning_rate": 0.0014560297672997127, + "loss": 1.9708, + "step": 1123 + }, + { + "epoch": 0.11856540084388185, + "grad_norm": 0.6920896768569946, + "learning_rate": 0.001455945028388914, + "loss": 1.9287, + "step": 1124 + }, + { + "epoch": 0.11867088607594936, + "grad_norm": 0.5276017785072327, + "learning_rate": 0.001455860210373455, + "loss": 1.9606, + "step": 1125 + }, + { + "epoch": 0.11877637130801688, + "grad_norm": 0.7066054940223694, + "learning_rate": 0.0014557753132628396, + "loss": 1.9389, + "step": 1126 + }, + { + "epoch": 0.11888185654008439, + "grad_norm": 0.8308013677597046, + "learning_rate": 0.0014556903370665807, + "loss": 1.9618, + "step": 1127 + }, + { + "epoch": 0.1189873417721519, + "grad_norm": 0.6271633505821228, + "learning_rate": 0.0014556052817942013, + "loss": 1.9642, + "step": 1128 + }, + { + "epoch": 0.11909282700421941, + "grad_norm": 0.9178131222724915, + "learning_rate": 0.001455520147455231, + "loss": 1.968, + "step": 1129 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.1569904088974, + "learning_rate": 0.0014554349340592104, + "loss": 1.9425, + "step": 1130 + }, + { + "epoch": 0.11930379746835443, + "grad_norm": 0.5928104519844055, + "learning_rate": 0.001455349641615688, + "loss": 1.9944, + "step": 1131 + }, + { + "epoch": 0.11940928270042193, + "grad_norm": 0.7399933338165283, + "learning_rate": 0.001455264270134221, + "loss": 1.9431, + "step": 1132 + }, + { + "epoch": 0.11951476793248945, + "grad_norm": 0.8233100175857544, + "learning_rate": 0.0014551788196243754, + "loss": 1.9669, + "step": 1133 + }, + { + "epoch": 0.11962025316455696, + "grad_norm": 0.7339267730712891, + "learning_rate": 0.0014550932900957271, + "loss": 1.9788, + "step": 1134 + }, + { + "epoch": 0.11972573839662447, + "grad_norm": 1.034615159034729, + "learning_rate": 0.0014550076815578595, + "loss": 1.9609, + "step": 1135 + }, + { + "epoch": 0.11983122362869199, + "grad_norm": 0.6947922706604004, + "learning_rate": 0.0014549219940203659, + "loss": 1.9127, + "step": 1136 + }, + { + "epoch": 0.1199367088607595, + "grad_norm": 0.6535161137580872, + "learning_rate": 0.0014548362274928476, + "loss": 1.9863, + "step": 1137 + }, + { + "epoch": 0.120042194092827, + "grad_norm": 0.7737777233123779, + "learning_rate": 0.0014547503819849154, + "loss": 1.9714, + "step": 1138 + }, + { + "epoch": 0.12014767932489452, + "grad_norm": 0.5352864265441895, + "learning_rate": 0.001454664457506189, + "loss": 1.9486, + "step": 1139 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.7206131219863892, + "learning_rate": 0.001454578454066296, + "loss": 1.9683, + "step": 1140 + }, + { + "epoch": 0.12035864978902953, + "grad_norm": 0.5925705432891846, + "learning_rate": 0.001454492371674874, + "loss": 1.8991, + "step": 1141 + }, + { + "epoch": 0.12046413502109704, + "grad_norm": 0.6601157784461975, + "learning_rate": 0.0014544062103415687, + "loss": 1.9603, + "step": 1142 + }, + { + "epoch": 0.12056962025316456, + "grad_norm": 0.8290013074874878, + "learning_rate": 0.0014543199700760353, + "loss": 2.002, + "step": 1143 + }, + { + "epoch": 0.12067510548523207, + "grad_norm": 0.6038021445274353, + "learning_rate": 0.0014542336508879372, + "loss": 1.9142, + "step": 1144 + }, + { + "epoch": 0.12078059071729957, + "grad_norm": 0.6269853115081787, + "learning_rate": 0.0014541472527869468, + "loss": 1.9559, + "step": 1145 + }, + { + "epoch": 0.1208860759493671, + "grad_norm": 0.5076548457145691, + "learning_rate": 0.0014540607757827456, + "loss": 1.9499, + "step": 1146 + }, + { + "epoch": 0.1209915611814346, + "grad_norm": 0.6906920671463013, + "learning_rate": 0.0014539742198850234, + "loss": 1.9342, + "step": 1147 + }, + { + "epoch": 0.12109704641350211, + "grad_norm": 0.6703537702560425, + "learning_rate": 0.0014538875851034798, + "loss": 1.9944, + "step": 1148 + }, + { + "epoch": 0.12120253164556961, + "grad_norm": 0.6494718194007874, + "learning_rate": 0.0014538008714478224, + "loss": 1.957, + "step": 1149 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.0631392002105713, + "learning_rate": 0.0014537140789277678, + "loss": 1.9546, + "step": 1150 + }, + { + "epoch": 0.12141350210970464, + "grad_norm": 0.6786403059959412, + "learning_rate": 0.0014536272075530417, + "loss": 1.9533, + "step": 1151 + }, + { + "epoch": 0.12151898734177215, + "grad_norm": 0.7652180790901184, + "learning_rate": 0.0014535402573333783, + "loss": 2.0009, + "step": 1152 + }, + { + "epoch": 0.12162447257383967, + "grad_norm": 0.9196487665176392, + "learning_rate": 0.001453453228278521, + "loss": 1.956, + "step": 1153 + }, + { + "epoch": 0.12172995780590717, + "grad_norm": 0.6321209073066711, + "learning_rate": 0.0014533661203982215, + "loss": 1.9514, + "step": 1154 + }, + { + "epoch": 0.12183544303797468, + "grad_norm": 0.922248363494873, + "learning_rate": 0.0014532789337022413, + "loss": 1.9554, + "step": 1155 + }, + { + "epoch": 0.1219409282700422, + "grad_norm": 0.6372406482696533, + "learning_rate": 0.0014531916682003494, + "loss": 1.9599, + "step": 1156 + }, + { + "epoch": 0.12204641350210971, + "grad_norm": 0.9040522575378418, + "learning_rate": 0.0014531043239023247, + "loss": 1.9494, + "step": 1157 + }, + { + "epoch": 0.12215189873417721, + "grad_norm": 1.050411343574524, + "learning_rate": 0.0014530169008179546, + "loss": 1.956, + "step": 1158 + }, + { + "epoch": 0.12225738396624472, + "grad_norm": 0.588082492351532, + "learning_rate": 0.001452929398957035, + "loss": 1.9704, + "step": 1159 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.9602245092391968, + "learning_rate": 0.0014528418183293716, + "loss": 1.9394, + "step": 1160 + }, + { + "epoch": 0.12246835443037975, + "grad_norm": 0.6963881254196167, + "learning_rate": 0.0014527541589447774, + "loss": 1.9733, + "step": 1161 + }, + { + "epoch": 0.12257383966244725, + "grad_norm": 0.6205583810806274, + "learning_rate": 0.0014526664208130756, + "loss": 1.976, + "step": 1162 + }, + { + "epoch": 0.12267932489451477, + "grad_norm": 0.6791574358940125, + "learning_rate": 0.0014525786039440971, + "loss": 1.9373, + "step": 1163 + }, + { + "epoch": 0.12278481012658228, + "grad_norm": 0.5511821508407593, + "learning_rate": 0.001452490708347683, + "loss": 1.9436, + "step": 1164 + }, + { + "epoch": 0.12289029535864979, + "grad_norm": 0.6409441232681274, + "learning_rate": 0.0014524027340336821, + "loss": 1.9384, + "step": 1165 + }, + { + "epoch": 0.1229957805907173, + "grad_norm": 0.6514928340911865, + "learning_rate": 0.0014523146810119525, + "loss": 1.9308, + "step": 1166 + }, + { + "epoch": 0.12310126582278481, + "grad_norm": 0.6259689927101135, + "learning_rate": 0.0014522265492923608, + "loss": 1.979, + "step": 1167 + }, + { + "epoch": 0.12320675105485232, + "grad_norm": 0.7294979691505432, + "learning_rate": 0.0014521383388847824, + "loss": 1.9107, + "step": 1168 + }, + { + "epoch": 0.12331223628691983, + "grad_norm": 0.6898124814033508, + "learning_rate": 0.0014520500497991022, + "loss": 1.9494, + "step": 1169 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.5704181790351868, + "learning_rate": 0.001451961682045213, + "loss": 1.9124, + "step": 1170 + }, + { + "epoch": 0.12352320675105485, + "grad_norm": 0.7581634521484375, + "learning_rate": 0.001451873235633017, + "loss": 1.967, + "step": 1171 + }, + { + "epoch": 0.12362869198312236, + "grad_norm": 0.6217529773712158, + "learning_rate": 0.0014517847105724251, + "loss": 1.9393, + "step": 1172 + }, + { + "epoch": 0.12373417721518987, + "grad_norm": 0.5268408060073853, + "learning_rate": 0.0014516961068733569, + "loss": 1.8917, + "step": 1173 + }, + { + "epoch": 0.12383966244725739, + "grad_norm": 0.5390276312828064, + "learning_rate": 0.0014516074245457412, + "loss": 1.9527, + "step": 1174 + }, + { + "epoch": 0.1239451476793249, + "grad_norm": 0.6406852006912231, + "learning_rate": 0.001451518663599515, + "loss": 1.9184, + "step": 1175 + }, + { + "epoch": 0.1240506329113924, + "grad_norm": 0.8096076250076294, + "learning_rate": 0.0014514298240446244, + "loss": 1.8842, + "step": 1176 + }, + { + "epoch": 0.12415611814345992, + "grad_norm": 0.7908474802970886, + "learning_rate": 0.0014513409058910243, + "loss": 1.9104, + "step": 1177 + }, + { + "epoch": 0.12426160337552743, + "grad_norm": 0.5687995553016663, + "learning_rate": 0.0014512519091486786, + "loss": 1.952, + "step": 1178 + }, + { + "epoch": 0.12436708860759493, + "grad_norm": 0.586245596408844, + "learning_rate": 0.0014511628338275597, + "loss": 1.9628, + "step": 1179 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.5481122136116028, + "learning_rate": 0.001451073679937649, + "loss": 1.9471, + "step": 1180 + }, + { + "epoch": 0.12457805907172996, + "grad_norm": 0.5913495421409607, + "learning_rate": 0.0014509844474889365, + "loss": 1.9231, + "step": 1181 + }, + { + "epoch": 0.12468354430379747, + "grad_norm": 0.6583248376846313, + "learning_rate": 0.0014508951364914213, + "loss": 1.9265, + "step": 1182 + }, + { + "epoch": 0.12478902953586497, + "grad_norm": 0.5326104164123535, + "learning_rate": 0.001450805746955111, + "loss": 1.9417, + "step": 1183 + }, + { + "epoch": 0.1248945147679325, + "grad_norm": 0.6438238024711609, + "learning_rate": 0.001450716278890022, + "loss": 1.9453, + "step": 1184 + }, + { + "epoch": 0.125, + "grad_norm": 0.6368554830551147, + "learning_rate": 0.0014506267323061803, + "loss": 1.9444, + "step": 1185 + }, + { + "epoch": 0.12510548523206752, + "grad_norm": 0.5990705490112305, + "learning_rate": 0.0014505371072136195, + "loss": 1.9397, + "step": 1186 + }, + { + "epoch": 0.125210970464135, + "grad_norm": 0.5824708342552185, + "learning_rate": 0.0014504474036223826, + "loss": 1.9567, + "step": 1187 + }, + { + "epoch": 0.12531645569620253, + "grad_norm": 0.4841042160987854, + "learning_rate": 0.0014503576215425212, + "loss": 1.9485, + "step": 1188 + }, + { + "epoch": 0.12542194092827005, + "grad_norm": 0.5671766400337219, + "learning_rate": 0.0014502677609840964, + "loss": 1.9125, + "step": 1189 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.5477116703987122, + "learning_rate": 0.0014501778219571766, + "loss": 1.904, + "step": 1190 + }, + { + "epoch": 0.12563291139240507, + "grad_norm": 0.5640432834625244, + "learning_rate": 0.0014500878044718408, + "loss": 1.9276, + "step": 1191 + }, + { + "epoch": 0.1257383966244726, + "grad_norm": 0.5558633208274841, + "learning_rate": 0.0014499977085381756, + "loss": 1.9188, + "step": 1192 + }, + { + "epoch": 0.12584388185654008, + "grad_norm": 0.549628496170044, + "learning_rate": 0.0014499075341662764, + "loss": 1.9603, + "step": 1193 + }, + { + "epoch": 0.1259493670886076, + "grad_norm": 0.5744264125823975, + "learning_rate": 0.0014498172813662482, + "loss": 1.9888, + "step": 1194 + }, + { + "epoch": 0.1260548523206751, + "grad_norm": 0.6178702712059021, + "learning_rate": 0.0014497269501482037, + "loss": 1.9346, + "step": 1195 + }, + { + "epoch": 0.1261603375527426, + "grad_norm": 0.8179274201393127, + "learning_rate": 0.0014496365405222656, + "loss": 1.93, + "step": 1196 + }, + { + "epoch": 0.12626582278481013, + "grad_norm": 0.7463276982307434, + "learning_rate": 0.0014495460524985644, + "loss": 1.9209, + "step": 1197 + }, + { + "epoch": 0.12637130801687763, + "grad_norm": 0.5583138465881348, + "learning_rate": 0.0014494554860872398, + "loss": 1.9568, + "step": 1198 + }, + { + "epoch": 0.12647679324894515, + "grad_norm": 0.5954578518867493, + "learning_rate": 0.00144936484129844, + "loss": 1.9687, + "step": 1199 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.6071377992630005, + "learning_rate": 0.0014492741181423225, + "loss": 1.9332, + "step": 1200 + }, + { + "epoch": 0.12668776371308016, + "grad_norm": 0.5910906791687012, + "learning_rate": 0.001449183316629053, + "loss": 1.9336, + "step": 1201 + }, + { + "epoch": 0.12679324894514768, + "grad_norm": 0.5334830284118652, + "learning_rate": 0.0014490924367688066, + "loss": 1.9372, + "step": 1202 + }, + { + "epoch": 0.1268987341772152, + "grad_norm": 0.6607654094696045, + "learning_rate": 0.0014490014785717667, + "loss": 1.9089, + "step": 1203 + }, + { + "epoch": 0.1270042194092827, + "grad_norm": 0.9110004305839539, + "learning_rate": 0.0014489104420481254, + "loss": 1.9417, + "step": 1204 + }, + { + "epoch": 0.1271097046413502, + "grad_norm": 0.952542781829834, + "learning_rate": 0.001448819327208084, + "loss": 1.9701, + "step": 1205 + }, + { + "epoch": 0.12721518987341773, + "grad_norm": 0.7246801257133484, + "learning_rate": 0.0014487281340618526, + "loss": 1.9354, + "step": 1206 + }, + { + "epoch": 0.12732067510548523, + "grad_norm": 0.5783079266548157, + "learning_rate": 0.0014486368626196494, + "loss": 1.9275, + "step": 1207 + }, + { + "epoch": 0.12742616033755275, + "grad_norm": 0.8576749563217163, + "learning_rate": 0.001448545512891702, + "loss": 1.9467, + "step": 1208 + }, + { + "epoch": 0.12753164556962027, + "grad_norm": 0.9098116755485535, + "learning_rate": 0.0014484540848882469, + "loss": 1.9396, + "step": 1209 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.6920645236968994, + "learning_rate": 0.0014483625786195285, + "loss": 1.9577, + "step": 1210 + }, + { + "epoch": 0.12774261603375528, + "grad_norm": 0.6329607367515564, + "learning_rate": 0.0014482709940958009, + "loss": 1.9126, + "step": 1211 + }, + { + "epoch": 0.12784810126582277, + "grad_norm": 0.9421775341033936, + "learning_rate": 0.0014481793313273266, + "loss": 1.9067, + "step": 1212 + }, + { + "epoch": 0.1279535864978903, + "grad_norm": 1.0263557434082031, + "learning_rate": 0.0014480875903243766, + "loss": 1.9401, + "step": 1213 + }, + { + "epoch": 0.1280590717299578, + "grad_norm": 0.7619597911834717, + "learning_rate": 0.0014479957710972313, + "loss": 1.9248, + "step": 1214 + }, + { + "epoch": 0.1281645569620253, + "grad_norm": 0.5916169285774231, + "learning_rate": 0.0014479038736561793, + "loss": 1.9114, + "step": 1215 + }, + { + "epoch": 0.12827004219409283, + "grad_norm": 0.5816412568092346, + "learning_rate": 0.001447811898011518, + "loss": 1.942, + "step": 1216 + }, + { + "epoch": 0.12837552742616035, + "grad_norm": 0.6554460525512695, + "learning_rate": 0.0014477198441735543, + "loss": 1.9325, + "step": 1217 + }, + { + "epoch": 0.12848101265822784, + "grad_norm": 0.5943748354911804, + "learning_rate": 0.0014476277121526027, + "loss": 1.9797, + "step": 1218 + }, + { + "epoch": 0.12858649789029536, + "grad_norm": 0.6332750916481018, + "learning_rate": 0.0014475355019589872, + "loss": 1.8961, + "step": 1219 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.5412176847457886, + "learning_rate": 0.0014474432136030405, + "loss": 1.9012, + "step": 1220 + }, + { + "epoch": 0.12879746835443037, + "grad_norm": 0.5844665765762329, + "learning_rate": 0.001447350847095104, + "loss": 1.9423, + "step": 1221 + }, + { + "epoch": 0.1289029535864979, + "grad_norm": 0.6139405369758606, + "learning_rate": 0.001447258402445528, + "loss": 1.9476, + "step": 1222 + }, + { + "epoch": 0.1290084388185654, + "grad_norm": 0.5937597155570984, + "learning_rate": 0.0014471658796646708, + "loss": 1.905, + "step": 1223 + }, + { + "epoch": 0.1291139240506329, + "grad_norm": 0.6001068353652954, + "learning_rate": 0.0014470732787629005, + "loss": 1.9579, + "step": 1224 + }, + { + "epoch": 0.12921940928270043, + "grad_norm": 0.5131018161773682, + "learning_rate": 0.0014469805997505932, + "loss": 1.8847, + "step": 1225 + }, + { + "epoch": 0.12932489451476795, + "grad_norm": 0.5697823762893677, + "learning_rate": 0.0014468878426381346, + "loss": 1.898, + "step": 1226 + }, + { + "epoch": 0.12943037974683544, + "grad_norm": 0.5864412784576416, + "learning_rate": 0.001446795007435918, + "loss": 1.9012, + "step": 1227 + }, + { + "epoch": 0.12953586497890296, + "grad_norm": 0.703755259513855, + "learning_rate": 0.0014467020941543464, + "loss": 1.9018, + "step": 1228 + }, + { + "epoch": 0.12964135021097045, + "grad_norm": 0.8494018912315369, + "learning_rate": 0.0014466091028038314, + "loss": 1.9461, + "step": 1229 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.6710630655288696, + "learning_rate": 0.0014465160333947923, + "loss": 1.8744, + "step": 1230 + }, + { + "epoch": 0.1298523206751055, + "grad_norm": 0.5319110751152039, + "learning_rate": 0.0014464228859376587, + "loss": 1.9384, + "step": 1231 + }, + { + "epoch": 0.12995780590717299, + "grad_norm": 0.6599804759025574, + "learning_rate": 0.001446329660442868, + "loss": 1.9683, + "step": 1232 + }, + { + "epoch": 0.1300632911392405, + "grad_norm": 0.7937655448913574, + "learning_rate": 0.0014462363569208666, + "loss": 1.9021, + "step": 1233 + }, + { + "epoch": 0.13016877637130803, + "grad_norm": 0.9417902827262878, + "learning_rate": 0.00144614297538211, + "loss": 1.9494, + "step": 1234 + }, + { + "epoch": 0.13027426160337552, + "grad_norm": 0.9733338952064514, + "learning_rate": 0.0014460495158370615, + "loss": 1.9548, + "step": 1235 + }, + { + "epoch": 0.13037974683544304, + "grad_norm": 0.7364691495895386, + "learning_rate": 0.0014459559782961937, + "loss": 1.9234, + "step": 1236 + }, + { + "epoch": 0.13048523206751056, + "grad_norm": 0.5808089971542358, + "learning_rate": 0.0014458623627699883, + "loss": 1.9126, + "step": 1237 + }, + { + "epoch": 0.13059071729957805, + "grad_norm": 0.7091988921165466, + "learning_rate": 0.0014457686692689355, + "loss": 1.9189, + "step": 1238 + }, + { + "epoch": 0.13069620253164557, + "grad_norm": 0.9794600009918213, + "learning_rate": 0.0014456748978035339, + "loss": 1.9492, + "step": 1239 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.8731154799461365, + "learning_rate": 0.0014455810483842908, + "loss": 1.9137, + "step": 1240 + }, + { + "epoch": 0.13090717299578059, + "grad_norm": 0.6176528334617615, + "learning_rate": 0.0014454871210217229, + "loss": 1.8927, + "step": 1241 + }, + { + "epoch": 0.1310126582278481, + "grad_norm": 0.7696582674980164, + "learning_rate": 0.0014453931157263548, + "loss": 1.9444, + "step": 1242 + }, + { + "epoch": 0.1311181434599156, + "grad_norm": 0.758773922920227, + "learning_rate": 0.001445299032508721, + "loss": 1.933, + "step": 1243 + }, + { + "epoch": 0.13122362869198312, + "grad_norm": 0.554261326789856, + "learning_rate": 0.0014452048713793633, + "loss": 1.9243, + "step": 1244 + }, + { + "epoch": 0.13132911392405064, + "grad_norm": 0.569963812828064, + "learning_rate": 0.0014451106323488331, + "loss": 1.9575, + "step": 1245 + }, + { + "epoch": 0.13143459915611813, + "grad_norm": 0.5528079867362976, + "learning_rate": 0.0014450163154276906, + "loss": 1.9603, + "step": 1246 + }, + { + "epoch": 0.13154008438818565, + "grad_norm": 0.6474608778953552, + "learning_rate": 0.0014449219206265041, + "loss": 1.9309, + "step": 1247 + }, + { + "epoch": 0.13164556962025317, + "grad_norm": 0.6841100454330444, + "learning_rate": 0.0014448274479558513, + "loss": 1.9359, + "step": 1248 + }, + { + "epoch": 0.13175105485232066, + "grad_norm": 0.6603577733039856, + "learning_rate": 0.0014447328974263182, + "loss": 1.9199, + "step": 1249 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.6021895408630371, + "learning_rate": 0.0014446382690484997, + "loss": 1.9228, + "step": 1250 + }, + { + "epoch": 0.1319620253164557, + "grad_norm": 0.6510868668556213, + "learning_rate": 0.0014445435628329993, + "loss": 1.96, + "step": 1251 + }, + { + "epoch": 0.1320675105485232, + "grad_norm": 0.6402379870414734, + "learning_rate": 0.0014444487787904294, + "loss": 1.9181, + "step": 1252 + }, + { + "epoch": 0.13217299578059072, + "grad_norm": 0.6292267441749573, + "learning_rate": 0.001444353916931411, + "loss": 1.9153, + "step": 1253 + }, + { + "epoch": 0.13227848101265824, + "grad_norm": 0.5366946458816528, + "learning_rate": 0.001444258977266574, + "loss": 1.8927, + "step": 1254 + }, + { + "epoch": 0.13238396624472573, + "grad_norm": 0.6413100957870483, + "learning_rate": 0.0014441639598065565, + "loss": 1.8943, + "step": 1255 + }, + { + "epoch": 0.13248945147679325, + "grad_norm": 0.6977307200431824, + "learning_rate": 0.001444068864562006, + "loss": 1.9045, + "step": 1256 + }, + { + "epoch": 0.13259493670886077, + "grad_norm": 0.5511654019355774, + "learning_rate": 0.0014439736915435786, + "loss": 1.9352, + "step": 1257 + }, + { + "epoch": 0.13270042194092826, + "grad_norm": 0.5714332461357117, + "learning_rate": 0.001443878440761938, + "loss": 1.9106, + "step": 1258 + }, + { + "epoch": 0.13280590717299579, + "grad_norm": 0.8608718514442444, + "learning_rate": 0.0014437831122277585, + "loss": 1.9308, + "step": 1259 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.6680339574813843, + "learning_rate": 0.0014436877059517215, + "loss": 1.9238, + "step": 1260 + }, + { + "epoch": 0.1330168776371308, + "grad_norm": 0.7115082740783691, + "learning_rate": 0.0014435922219445182, + "loss": 1.9337, + "step": 1261 + }, + { + "epoch": 0.13312236286919832, + "grad_norm": 0.9192911982536316, + "learning_rate": 0.0014434966602168478, + "loss": 1.9034, + "step": 1262 + }, + { + "epoch": 0.1332278481012658, + "grad_norm": 0.7188848853111267, + "learning_rate": 0.0014434010207794185, + "loss": 1.9023, + "step": 1263 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 0.6205708980560303, + "learning_rate": 0.0014433053036429474, + "loss": 1.8985, + "step": 1264 + }, + { + "epoch": 0.13343881856540085, + "grad_norm": 0.7155177593231201, + "learning_rate": 0.00144320950881816, + "loss": 1.915, + "step": 1265 + }, + { + "epoch": 0.13354430379746834, + "grad_norm": 0.9251977205276489, + "learning_rate": 0.0014431136363157902, + "loss": 1.9197, + "step": 1266 + }, + { + "epoch": 0.13364978902953586, + "grad_norm": 0.684947669506073, + "learning_rate": 0.0014430176861465812, + "loss": 1.941, + "step": 1267 + }, + { + "epoch": 0.13375527426160339, + "grad_norm": 0.66976398229599, + "learning_rate": 0.001442921658321285, + "loss": 1.8895, + "step": 1268 + }, + { + "epoch": 0.13386075949367088, + "grad_norm": 0.8384138941764832, + "learning_rate": 0.0014428255528506617, + "loss": 1.9533, + "step": 1269 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.6831515431404114, + "learning_rate": 0.0014427293697454803, + "loss": 1.9176, + "step": 1270 + }, + { + "epoch": 0.13407172995780592, + "grad_norm": 0.7184561491012573, + "learning_rate": 0.001442633109016519, + "loss": 1.9441, + "step": 1271 + }, + { + "epoch": 0.1341772151898734, + "grad_norm": 0.7760181427001953, + "learning_rate": 0.001442536770674564, + "loss": 1.9833, + "step": 1272 + }, + { + "epoch": 0.13428270042194093, + "grad_norm": 0.773673415184021, + "learning_rate": 0.0014424403547304103, + "loss": 1.9089, + "step": 1273 + }, + { + "epoch": 0.13438818565400845, + "grad_norm": 0.8363143801689148, + "learning_rate": 0.0014423438611948624, + "loss": 1.9298, + "step": 1274 + }, + { + "epoch": 0.13449367088607594, + "grad_norm": 0.6174676418304443, + "learning_rate": 0.0014422472900787323, + "loss": 1.9308, + "step": 1275 + }, + { + "epoch": 0.13459915611814346, + "grad_norm": 0.5849359631538391, + "learning_rate": 0.0014421506413928415, + "loss": 1.9035, + "step": 1276 + }, + { + "epoch": 0.13470464135021096, + "grad_norm": 0.6343469023704529, + "learning_rate": 0.0014420539151480199, + "loss": 1.9417, + "step": 1277 + }, + { + "epoch": 0.13481012658227848, + "grad_norm": 0.6490067839622498, + "learning_rate": 0.0014419571113551063, + "loss": 1.9142, + "step": 1278 + }, + { + "epoch": 0.134915611814346, + "grad_norm": 0.6505517363548279, + "learning_rate": 0.0014418602300249482, + "loss": 1.9395, + "step": 1279 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.8504713773727417, + "learning_rate": 0.001441763271168401, + "loss": 1.8823, + "step": 1280 + }, + { + "epoch": 0.135126582278481, + "grad_norm": 0.7335286140441895, + "learning_rate": 0.00144166623479633, + "loss": 1.915, + "step": 1281 + }, + { + "epoch": 0.13523206751054853, + "grad_norm": 0.721224308013916, + "learning_rate": 0.0014415691209196085, + "loss": 1.8729, + "step": 1282 + }, + { + "epoch": 0.13533755274261602, + "grad_norm": 0.5964475870132446, + "learning_rate": 0.0014414719295491184, + "loss": 1.9243, + "step": 1283 + }, + { + "epoch": 0.13544303797468354, + "grad_norm": 0.5670751929283142, + "learning_rate": 0.0014413746606957505, + "loss": 1.8903, + "step": 1284 + }, + { + "epoch": 0.13554852320675106, + "grad_norm": 0.6008087992668152, + "learning_rate": 0.0014412773143704046, + "loss": 1.8962, + "step": 1285 + }, + { + "epoch": 0.13565400843881856, + "grad_norm": 0.561278223991394, + "learning_rate": 0.0014411798905839884, + "loss": 1.9192, + "step": 1286 + }, + { + "epoch": 0.13575949367088608, + "grad_norm": 0.5897908806800842, + "learning_rate": 0.0014410823893474193, + "loss": 1.9056, + "step": 1287 + }, + { + "epoch": 0.1358649789029536, + "grad_norm": 0.5822799801826477, + "learning_rate": 0.001440984810671622, + "loss": 1.9239, + "step": 1288 + }, + { + "epoch": 0.1359704641350211, + "grad_norm": 0.7327895760536194, + "learning_rate": 0.0014408871545675314, + "loss": 1.9556, + "step": 1289 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.7404309511184692, + "learning_rate": 0.00144078942104609, + "loss": 1.9298, + "step": 1290 + }, + { + "epoch": 0.13618143459915613, + "grad_norm": 0.5900877714157104, + "learning_rate": 0.0014406916101182491, + "loss": 1.9399, + "step": 1291 + }, + { + "epoch": 0.13628691983122362, + "grad_norm": 0.6265782713890076, + "learning_rate": 0.0014405937217949695, + "loss": 1.9043, + "step": 1292 + }, + { + "epoch": 0.13639240506329114, + "grad_norm": 0.8728049397468567, + "learning_rate": 0.0014404957560872197, + "loss": 1.9118, + "step": 1293 + }, + { + "epoch": 0.13649789029535864, + "grad_norm": 0.8807627558708191, + "learning_rate": 0.0014403977130059773, + "loss": 1.9321, + "step": 1294 + }, + { + "epoch": 0.13660337552742616, + "grad_norm": 0.6475411057472229, + "learning_rate": 0.0014402995925622284, + "loss": 1.8492, + "step": 1295 + }, + { + "epoch": 0.13670886075949368, + "grad_norm": 0.6884780526161194, + "learning_rate": 0.0014402013947669681, + "loss": 1.9127, + "step": 1296 + }, + { + "epoch": 0.13681434599156117, + "grad_norm": 0.6980240941047668, + "learning_rate": 0.0014401031196312, + "loss": 1.9173, + "step": 1297 + }, + { + "epoch": 0.1369198312236287, + "grad_norm": 0.8129332661628723, + "learning_rate": 0.001440004767165936, + "loss": 1.8628, + "step": 1298 + }, + { + "epoch": 0.1370253164556962, + "grad_norm": 0.9249903559684753, + "learning_rate": 0.0014399063373821972, + "loss": 1.8948, + "step": 1299 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.7490662336349487, + "learning_rate": 0.001439807830291013, + "loss": 1.9077, + "step": 1300 + }, + { + "epoch": 0.13723628691983122, + "grad_norm": 0.5971458554267883, + "learning_rate": 0.001439709245903422, + "loss": 1.9074, + "step": 1301 + }, + { + "epoch": 0.13734177215189874, + "grad_norm": 0.8399003148078918, + "learning_rate": 0.0014396105842304707, + "loss": 1.9347, + "step": 1302 + }, + { + "epoch": 0.13744725738396624, + "grad_norm": 0.8345500826835632, + "learning_rate": 0.0014395118452832146, + "loss": 1.9072, + "step": 1303 + }, + { + "epoch": 0.13755274261603376, + "grad_norm": 0.7797419428825378, + "learning_rate": 0.001439413029072718, + "loss": 1.9037, + "step": 1304 + }, + { + "epoch": 0.13765822784810128, + "grad_norm": 0.7397858500480652, + "learning_rate": 0.001439314135610054, + "loss": 1.932, + "step": 1305 + }, + { + "epoch": 0.13776371308016877, + "grad_norm": 0.7233573198318481, + "learning_rate": 0.0014392151649063039, + "loss": 1.9068, + "step": 1306 + }, + { + "epoch": 0.1378691983122363, + "grad_norm": 0.9670490622520447, + "learning_rate": 0.0014391161169725573, + "loss": 1.8886, + "step": 1307 + }, + { + "epoch": 0.1379746835443038, + "grad_norm": 0.6938996315002441, + "learning_rate": 0.001439016991819914, + "loss": 1.9323, + "step": 1308 + }, + { + "epoch": 0.1380801687763713, + "grad_norm": 0.5717893838882446, + "learning_rate": 0.001438917789459481, + "loss": 1.8849, + "step": 1309 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.7129306197166443, + "learning_rate": 0.0014388185099023744, + "loss": 1.8913, + "step": 1310 + }, + { + "epoch": 0.13829113924050632, + "grad_norm": 0.6391154527664185, + "learning_rate": 0.001438719153159719, + "loss": 1.9077, + "step": 1311 + }, + { + "epoch": 0.13839662447257384, + "grad_norm": 0.6345740556716919, + "learning_rate": 0.0014386197192426482, + "loss": 1.9086, + "step": 1312 + }, + { + "epoch": 0.13850210970464136, + "grad_norm": 0.7683095335960388, + "learning_rate": 0.001438520208162304, + "loss": 1.9311, + "step": 1313 + }, + { + "epoch": 0.13860759493670885, + "grad_norm": 0.5813595056533813, + "learning_rate": 0.0014384206199298374, + "loss": 1.9385, + "step": 1314 + }, + { + "epoch": 0.13871308016877637, + "grad_norm": 0.6391108632087708, + "learning_rate": 0.0014383209545564073, + "loss": 1.929, + "step": 1315 + }, + { + "epoch": 0.1388185654008439, + "grad_norm": 0.5481319427490234, + "learning_rate": 0.001438221212053182, + "loss": 1.8607, + "step": 1316 + }, + { + "epoch": 0.13892405063291138, + "grad_norm": 0.6657216548919678, + "learning_rate": 0.0014381213924313386, + "loss": 1.901, + "step": 1317 + }, + { + "epoch": 0.1390295358649789, + "grad_norm": 0.8021477460861206, + "learning_rate": 0.0014380214957020613, + "loss": 1.9071, + "step": 1318 + }, + { + "epoch": 0.13913502109704642, + "grad_norm": 0.6098538041114807, + "learning_rate": 0.001437921521876545, + "loss": 1.9178, + "step": 1319 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.6143245697021484, + "learning_rate": 0.0014378214709659916, + "loss": 1.9023, + "step": 1320 + }, + { + "epoch": 0.13934599156118144, + "grad_norm": 0.7643201351165771, + "learning_rate": 0.0014377213429816128, + "loss": 1.8907, + "step": 1321 + }, + { + "epoch": 0.13945147679324896, + "grad_norm": 0.745007336139679, + "learning_rate": 0.0014376211379346282, + "loss": 1.896, + "step": 1322 + }, + { + "epoch": 0.13955696202531645, + "grad_norm": 0.6645268797874451, + "learning_rate": 0.0014375208558362663, + "loss": 1.9075, + "step": 1323 + }, + { + "epoch": 0.13966244725738397, + "grad_norm": 0.6169770956039429, + "learning_rate": 0.0014374204966977639, + "loss": 1.9066, + "step": 1324 + }, + { + "epoch": 0.13976793248945146, + "grad_norm": 0.7142629027366638, + "learning_rate": 0.0014373200605303674, + "loss": 1.8874, + "step": 1325 + }, + { + "epoch": 0.13987341772151898, + "grad_norm": 0.905268669128418, + "learning_rate": 0.001437219547345331, + "loss": 1.9124, + "step": 1326 + }, + { + "epoch": 0.1399789029535865, + "grad_norm": 0.6375822424888611, + "learning_rate": 0.0014371189571539174, + "loss": 1.8901, + "step": 1327 + }, + { + "epoch": 0.140084388185654, + "grad_norm": 0.5832866430282593, + "learning_rate": 0.0014370182899673982, + "loss": 1.8893, + "step": 1328 + }, + { + "epoch": 0.14018987341772152, + "grad_norm": 0.9832186102867126, + "learning_rate": 0.0014369175457970538, + "loss": 1.9104, + "step": 1329 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.0404150485992432, + "learning_rate": 0.0014368167246541733, + "loss": 1.9257, + "step": 1330 + }, + { + "epoch": 0.14040084388185653, + "grad_norm": 0.6539885401725769, + "learning_rate": 0.0014367158265500537, + "loss": 1.92, + "step": 1331 + }, + { + "epoch": 0.14050632911392405, + "grad_norm": 0.6120945811271667, + "learning_rate": 0.0014366148514960016, + "loss": 1.899, + "step": 1332 + }, + { + "epoch": 0.14061181434599157, + "grad_norm": 0.8625987768173218, + "learning_rate": 0.001436513799503332, + "loss": 1.8802, + "step": 1333 + }, + { + "epoch": 0.14071729957805906, + "grad_norm": 0.7135043740272522, + "learning_rate": 0.0014364126705833675, + "loss": 1.899, + "step": 1334 + }, + { + "epoch": 0.14082278481012658, + "grad_norm": 0.7978743314743042, + "learning_rate": 0.0014363114647474406, + "loss": 1.8736, + "step": 1335 + }, + { + "epoch": 0.1409282700421941, + "grad_norm": 0.9941582679748535, + "learning_rate": 0.0014362101820068918, + "loss": 1.9057, + "step": 1336 + }, + { + "epoch": 0.1410337552742616, + "grad_norm": 0.8251815438270569, + "learning_rate": 0.0014361088223730704, + "loss": 1.9225, + "step": 1337 + }, + { + "epoch": 0.14113924050632912, + "grad_norm": 0.8814579844474792, + "learning_rate": 0.0014360073858573341, + "loss": 1.9001, + "step": 1338 + }, + { + "epoch": 0.14124472573839664, + "grad_norm": 0.9066755771636963, + "learning_rate": 0.0014359058724710497, + "loss": 1.9472, + "step": 1339 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.8799824118614197, + "learning_rate": 0.0014358042822255918, + "loss": 1.8954, + "step": 1340 + }, + { + "epoch": 0.14145569620253165, + "grad_norm": 0.6933948397636414, + "learning_rate": 0.0014357026151323444, + "loss": 1.9132, + "step": 1341 + }, + { + "epoch": 0.14156118143459914, + "grad_norm": 0.7067035436630249, + "learning_rate": 0.0014356008712027, + "loss": 1.9425, + "step": 1342 + }, + { + "epoch": 0.14166666666666666, + "grad_norm": 0.816493034362793, + "learning_rate": 0.0014354990504480592, + "loss": 1.8893, + "step": 1343 + }, + { + "epoch": 0.14177215189873418, + "grad_norm": 0.7853293418884277, + "learning_rate": 0.0014353971528798313, + "loss": 1.9202, + "step": 1344 + }, + { + "epoch": 0.14187763713080168, + "grad_norm": 0.7116511464118958, + "learning_rate": 0.001435295178509435, + "loss": 1.8959, + "step": 1345 + }, + { + "epoch": 0.1419831223628692, + "grad_norm": 0.9027408361434937, + "learning_rate": 0.0014351931273482966, + "loss": 1.8952, + "step": 1346 + }, + { + "epoch": 0.14208860759493672, + "grad_norm": 0.9985544085502625, + "learning_rate": 0.0014350909994078516, + "loss": 1.9138, + "step": 1347 + }, + { + "epoch": 0.1421940928270042, + "grad_norm": 0.567952573299408, + "learning_rate": 0.0014349887946995441, + "loss": 1.892, + "step": 1348 + }, + { + "epoch": 0.14229957805907173, + "grad_norm": 0.8021208047866821, + "learning_rate": 0.0014348865132348262, + "loss": 1.9071, + "step": 1349 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.7286292910575867, + "learning_rate": 0.0014347841550251597, + "loss": 1.9108, + "step": 1350 + }, + { + "epoch": 0.14251054852320674, + "grad_norm": 0.5960955023765564, + "learning_rate": 0.0014346817200820137, + "loss": 1.8862, + "step": 1351 + }, + { + "epoch": 0.14261603375527426, + "grad_norm": 0.8376131057739258, + "learning_rate": 0.0014345792084168672, + "loss": 1.8827, + "step": 1352 + }, + { + "epoch": 0.14272151898734178, + "grad_norm": 0.6217941641807556, + "learning_rate": 0.0014344766200412062, + "loss": 1.9195, + "step": 1353 + }, + { + "epoch": 0.14282700421940928, + "grad_norm": 0.7058742642402649, + "learning_rate": 0.0014343739549665274, + "loss": 1.9246, + "step": 1354 + }, + { + "epoch": 0.1429324894514768, + "grad_norm": 0.9605209231376648, + "learning_rate": 0.0014342712132043342, + "loss": 1.8822, + "step": 1355 + }, + { + "epoch": 0.14303797468354432, + "grad_norm": 0.7261276841163635, + "learning_rate": 0.001434168394766139, + "loss": 1.8793, + "step": 1356 + }, + { + "epoch": 0.1431434599156118, + "grad_norm": 0.6906192302703857, + "learning_rate": 0.001434065499663464, + "loss": 1.9082, + "step": 1357 + }, + { + "epoch": 0.14324894514767933, + "grad_norm": 0.6652770042419434, + "learning_rate": 0.0014339625279078388, + "loss": 1.8993, + "step": 1358 + }, + { + "epoch": 0.14335443037974682, + "grad_norm": 0.7592640519142151, + "learning_rate": 0.0014338594795108017, + "loss": 1.88, + "step": 1359 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.5719397068023682, + "learning_rate": 0.0014337563544838997, + "loss": 1.912, + "step": 1360 + }, + { + "epoch": 0.14356540084388186, + "grad_norm": 0.5898477435112, + "learning_rate": 0.0014336531528386888, + "loss": 1.8769, + "step": 1361 + }, + { + "epoch": 0.14367088607594936, + "grad_norm": 0.8269219398498535, + "learning_rate": 0.0014335498745867332, + "loss": 1.8986, + "step": 1362 + }, + { + "epoch": 0.14377637130801688, + "grad_norm": 0.6951969265937805, + "learning_rate": 0.0014334465197396054, + "loss": 1.8993, + "step": 1363 + }, + { + "epoch": 0.1438818565400844, + "grad_norm": 0.5643299221992493, + "learning_rate": 0.0014333430883088877, + "loss": 1.8885, + "step": 1364 + }, + { + "epoch": 0.1439873417721519, + "grad_norm": 0.9499465227127075, + "learning_rate": 0.001433239580306169, + "loss": 1.8846, + "step": 1365 + }, + { + "epoch": 0.1440928270042194, + "grad_norm": 0.9675007462501526, + "learning_rate": 0.0014331359957430482, + "loss": 1.8985, + "step": 1366 + }, + { + "epoch": 0.14419831223628693, + "grad_norm": 0.6380030512809753, + "learning_rate": 0.001433032334631133, + "loss": 1.9037, + "step": 1367 + }, + { + "epoch": 0.14430379746835442, + "grad_norm": 0.5914275050163269, + "learning_rate": 0.0014329285969820389, + "loss": 1.8805, + "step": 1368 + }, + { + "epoch": 0.14440928270042194, + "grad_norm": 0.6340346336364746, + "learning_rate": 0.00143282478280739, + "loss": 1.9156, + "step": 1369 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.6241291761398315, + "learning_rate": 0.001432720892118819, + "loss": 1.8829, + "step": 1370 + }, + { + "epoch": 0.14462025316455696, + "grad_norm": 0.6167173385620117, + "learning_rate": 0.0014326169249279683, + "loss": 1.8832, + "step": 1371 + }, + { + "epoch": 0.14472573839662448, + "grad_norm": 0.5829523205757141, + "learning_rate": 0.001432512881246487, + "loss": 1.871, + "step": 1372 + }, + { + "epoch": 0.144831223628692, + "grad_norm": 0.5034545063972473, + "learning_rate": 0.0014324087610860339, + "loss": 1.8951, + "step": 1373 + }, + { + "epoch": 0.1449367088607595, + "grad_norm": 0.6312142014503479, + "learning_rate": 0.0014323045644582765, + "loss": 1.8756, + "step": 1374 + }, + { + "epoch": 0.145042194092827, + "grad_norm": 0.7175055742263794, + "learning_rate": 0.0014322002913748902, + "loss": 1.8995, + "step": 1375 + }, + { + "epoch": 0.1451476793248945, + "grad_norm": 0.5865703225135803, + "learning_rate": 0.0014320959418475596, + "loss": 1.8746, + "step": 1376 + }, + { + "epoch": 0.14525316455696202, + "grad_norm": 0.5730242729187012, + "learning_rate": 0.0014319915158879776, + "loss": 1.8578, + "step": 1377 + }, + { + "epoch": 0.14535864978902954, + "grad_norm": 0.7867786288261414, + "learning_rate": 0.0014318870135078452, + "loss": 1.8554, + "step": 1378 + }, + { + "epoch": 0.14546413502109704, + "grad_norm": 0.8412884473800659, + "learning_rate": 0.001431782434718873, + "loss": 1.9265, + "step": 1379 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.634476363658905, + "learning_rate": 0.0014316777795327794, + "loss": 1.8673, + "step": 1380 + }, + { + "epoch": 0.14567510548523208, + "grad_norm": 0.5584858655929565, + "learning_rate": 0.0014315730479612914, + "loss": 1.921, + "step": 1381 + }, + { + "epoch": 0.14578059071729957, + "grad_norm": 0.6598551869392395, + "learning_rate": 0.0014314682400161445, + "loss": 1.8873, + "step": 1382 + }, + { + "epoch": 0.1458860759493671, + "grad_norm": 0.7445759773254395, + "learning_rate": 0.0014313633557090834, + "loss": 1.8844, + "step": 1383 + }, + { + "epoch": 0.1459915611814346, + "grad_norm": 0.5534412860870361, + "learning_rate": 0.0014312583950518607, + "loss": 1.8794, + "step": 1384 + }, + { + "epoch": 0.1460970464135021, + "grad_norm": 0.6073155999183655, + "learning_rate": 0.0014311533580562378, + "loss": 1.9123, + "step": 1385 + }, + { + "epoch": 0.14620253164556962, + "grad_norm": 0.5547102093696594, + "learning_rate": 0.0014310482447339845, + "loss": 1.8977, + "step": 1386 + }, + { + "epoch": 0.14630801687763714, + "grad_norm": 0.6571534276008606, + "learning_rate": 0.0014309430550968794, + "loss": 1.8912, + "step": 1387 + }, + { + "epoch": 0.14641350210970464, + "grad_norm": 0.6907552480697632, + "learning_rate": 0.0014308377891567095, + "loss": 1.8859, + "step": 1388 + }, + { + "epoch": 0.14651898734177216, + "grad_norm": 0.5668095350265503, + "learning_rate": 0.0014307324469252703, + "loss": 1.9005, + "step": 1389 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.731006920337677, + "learning_rate": 0.001430627028414366, + "loss": 1.9063, + "step": 1390 + }, + { + "epoch": 0.14672995780590717, + "grad_norm": 0.7819527983665466, + "learning_rate": 0.0014305215336358093, + "loss": 1.9236, + "step": 1391 + }, + { + "epoch": 0.1468354430379747, + "grad_norm": 0.5683172941207886, + "learning_rate": 0.0014304159626014213, + "loss": 1.9026, + "step": 1392 + }, + { + "epoch": 0.14694092827004218, + "grad_norm": 0.6816293001174927, + "learning_rate": 0.0014303103153230322, + "loss": 1.8713, + "step": 1393 + }, + { + "epoch": 0.1470464135021097, + "grad_norm": 0.711218535900116, + "learning_rate": 0.0014302045918124795, + "loss": 1.8772, + "step": 1394 + }, + { + "epoch": 0.14715189873417722, + "grad_norm": 0.7073158621788025, + "learning_rate": 0.0014300987920816107, + "loss": 1.9114, + "step": 1395 + }, + { + "epoch": 0.14725738396624471, + "grad_norm": 0.6013917922973633, + "learning_rate": 0.0014299929161422807, + "loss": 1.8894, + "step": 1396 + }, + { + "epoch": 0.14736286919831224, + "grad_norm": 0.630455732345581, + "learning_rate": 0.001429886964006354, + "loss": 1.8523, + "step": 1397 + }, + { + "epoch": 0.14746835443037976, + "grad_norm": 0.5391079187393188, + "learning_rate": 0.0014297809356857026, + "loss": 1.8383, + "step": 1398 + }, + { + "epoch": 0.14757383966244725, + "grad_norm": 0.6038159132003784, + "learning_rate": 0.0014296748311922074, + "loss": 1.8864, + "step": 1399 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.5871886014938354, + "learning_rate": 0.0014295686505377586, + "loss": 1.8474, + "step": 1400 + }, + { + "epoch": 0.1477848101265823, + "grad_norm": 0.5435317158699036, + "learning_rate": 0.001429462393734254, + "loss": 1.8975, + "step": 1401 + }, + { + "epoch": 0.14789029535864978, + "grad_norm": 0.6952894926071167, + "learning_rate": 0.0014293560607935999, + "loss": 1.8766, + "step": 1402 + }, + { + "epoch": 0.1479957805907173, + "grad_norm": 0.723815381526947, + "learning_rate": 0.0014292496517277116, + "loss": 1.9193, + "step": 1403 + }, + { + "epoch": 0.14810126582278482, + "grad_norm": 0.4927630126476288, + "learning_rate": 0.0014291431665485125, + "loss": 1.8896, + "step": 1404 + }, + { + "epoch": 0.14820675105485231, + "grad_norm": 0.6176181435585022, + "learning_rate": 0.0014290366052679352, + "loss": 1.8586, + "step": 1405 + }, + { + "epoch": 0.14831223628691984, + "grad_norm": 0.5979462265968323, + "learning_rate": 0.0014289299678979207, + "loss": 1.8623, + "step": 1406 + }, + { + "epoch": 0.14841772151898736, + "grad_norm": 0.5242260098457336, + "learning_rate": 0.0014288232544504174, + "loss": 1.8054, + "step": 1407 + }, + { + "epoch": 0.14852320675105485, + "grad_norm": 0.6949982643127441, + "learning_rate": 0.0014287164649373837, + "loss": 1.8984, + "step": 1408 + }, + { + "epoch": 0.14862869198312237, + "grad_norm": 0.7949866652488708, + "learning_rate": 0.0014286095993707856, + "loss": 1.9068, + "step": 1409 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.5880922675132751, + "learning_rate": 0.0014285026577625982, + "loss": 1.8976, + "step": 1410 + }, + { + "epoch": 0.14883966244725738, + "grad_norm": 0.7167695760726929, + "learning_rate": 0.0014283956401248048, + "loss": 1.9209, + "step": 1411 + }, + { + "epoch": 0.1489451476793249, + "grad_norm": 0.8831102252006531, + "learning_rate": 0.0014282885464693969, + "loss": 1.8744, + "step": 1412 + }, + { + "epoch": 0.1490506329113924, + "grad_norm": 0.7124742865562439, + "learning_rate": 0.001428181376808375, + "loss": 1.8826, + "step": 1413 + }, + { + "epoch": 0.14915611814345991, + "grad_norm": 0.9048194885253906, + "learning_rate": 0.0014280741311537483, + "loss": 1.9008, + "step": 1414 + }, + { + "epoch": 0.14926160337552744, + "grad_norm": 0.9924871325492859, + "learning_rate": 0.001427966809517534, + "loss": 1.8571, + "step": 1415 + }, + { + "epoch": 0.14936708860759493, + "grad_norm": 0.8502264022827148, + "learning_rate": 0.001427859411911758, + "loss": 1.9277, + "step": 1416 + }, + { + "epoch": 0.14947257383966245, + "grad_norm": 0.8177351951599121, + "learning_rate": 0.0014277519383484548, + "loss": 1.8685, + "step": 1417 + }, + { + "epoch": 0.14957805907172997, + "grad_norm": 0.6695716381072998, + "learning_rate": 0.0014276443888396675, + "loss": 1.893, + "step": 1418 + }, + { + "epoch": 0.14968354430379746, + "grad_norm": 0.7677885890007019, + "learning_rate": 0.0014275367633974473, + "loss": 1.894, + "step": 1419 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.8541100025177002, + "learning_rate": 0.0014274290620338542, + "loss": 1.9128, + "step": 1420 + }, + { + "epoch": 0.1498945147679325, + "grad_norm": 0.7208904027938843, + "learning_rate": 0.0014273212847609566, + "loss": 1.8906, + "step": 1421 + }, + { + "epoch": 0.15, + "grad_norm": 0.8357604146003723, + "learning_rate": 0.0014272134315908317, + "loss": 1.8748, + "step": 1422 + }, + { + "epoch": 0.15010548523206751, + "grad_norm": 0.6135260462760925, + "learning_rate": 0.0014271055025355652, + "loss": 1.8893, + "step": 1423 + }, + { + "epoch": 0.150210970464135, + "grad_norm": 0.7063449621200562, + "learning_rate": 0.0014269974976072505, + "loss": 1.8805, + "step": 1424 + }, + { + "epoch": 0.15031645569620253, + "grad_norm": 0.7023139595985413, + "learning_rate": 0.0014268894168179903, + "loss": 1.8588, + "step": 1425 + }, + { + "epoch": 0.15042194092827005, + "grad_norm": 0.673704206943512, + "learning_rate": 0.0014267812601798957, + "loss": 1.9064, + "step": 1426 + }, + { + "epoch": 0.15052742616033754, + "grad_norm": 0.8587343692779541, + "learning_rate": 0.0014266730277050863, + "loss": 1.9303, + "step": 1427 + }, + { + "epoch": 0.15063291139240506, + "grad_norm": 0.846866250038147, + "learning_rate": 0.00142656471940569, + "loss": 1.8798, + "step": 1428 + }, + { + "epoch": 0.15073839662447258, + "grad_norm": 1.1234171390533447, + "learning_rate": 0.001426456335293843, + "loss": 1.8753, + "step": 1429 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.7582347393035889, + "learning_rate": 0.0014263478753816906, + "loss": 1.8664, + "step": 1430 + }, + { + "epoch": 0.1509493670886076, + "grad_norm": 0.7723405361175537, + "learning_rate": 0.0014262393396813863, + "loss": 1.8892, + "step": 1431 + }, + { + "epoch": 0.15105485232067511, + "grad_norm": 0.9440683126449585, + "learning_rate": 0.001426130728205092, + "loss": 1.9002, + "step": 1432 + }, + { + "epoch": 0.1511603375527426, + "grad_norm": 0.7082378268241882, + "learning_rate": 0.001426022040964978, + "loss": 1.8334, + "step": 1433 + }, + { + "epoch": 0.15126582278481013, + "grad_norm": 0.6791836619377136, + "learning_rate": 0.0014259132779732234, + "loss": 1.8757, + "step": 1434 + }, + { + "epoch": 0.15137130801687765, + "grad_norm": 0.7042331695556641, + "learning_rate": 0.0014258044392420155, + "loss": 1.9161, + "step": 1435 + }, + { + "epoch": 0.15147679324894514, + "grad_norm": 0.8494817614555359, + "learning_rate": 0.0014256955247835504, + "loss": 1.8661, + "step": 1436 + }, + { + "epoch": 0.15158227848101266, + "grad_norm": 0.520348846912384, + "learning_rate": 0.0014255865346100324, + "loss": 1.8937, + "step": 1437 + }, + { + "epoch": 0.15168776371308018, + "grad_norm": 0.8544005751609802, + "learning_rate": 0.0014254774687336744, + "loss": 1.8544, + "step": 1438 + }, + { + "epoch": 0.15179324894514767, + "grad_norm": 0.9180837273597717, + "learning_rate": 0.0014253683271666978, + "loss": 1.9015, + "step": 1439 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.5638651847839355, + "learning_rate": 0.0014252591099213326, + "loss": 1.8808, + "step": 1440 + }, + { + "epoch": 0.1520042194092827, + "grad_norm": 0.7862883806228638, + "learning_rate": 0.0014251498170098167, + "loss": 1.8429, + "step": 1441 + }, + { + "epoch": 0.1521097046413502, + "grad_norm": 0.8892688155174255, + "learning_rate": 0.0014250404484443975, + "loss": 1.894, + "step": 1442 + }, + { + "epoch": 0.15221518987341773, + "grad_norm": 0.5870394110679626, + "learning_rate": 0.0014249310042373298, + "loss": 1.8755, + "step": 1443 + }, + { + "epoch": 0.15232067510548522, + "grad_norm": 0.7698791027069092, + "learning_rate": 0.0014248214844008776, + "loss": 1.8893, + "step": 1444 + }, + { + "epoch": 0.15242616033755274, + "grad_norm": 0.7996105551719666, + "learning_rate": 0.001424711888947313, + "loss": 1.863, + "step": 1445 + }, + { + "epoch": 0.15253164556962026, + "grad_norm": 0.6042503118515015, + "learning_rate": 0.001424602217888917, + "loss": 1.8677, + "step": 1446 + }, + { + "epoch": 0.15263713080168775, + "grad_norm": 0.7977689504623413, + "learning_rate": 0.0014244924712379786, + "loss": 1.8885, + "step": 1447 + }, + { + "epoch": 0.15274261603375527, + "grad_norm": 0.671804666519165, + "learning_rate": 0.0014243826490067954, + "loss": 1.8774, + "step": 1448 + }, + { + "epoch": 0.1528481012658228, + "grad_norm": 0.6422544717788696, + "learning_rate": 0.0014242727512076736, + "loss": 1.8977, + "step": 1449 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.8386622667312622, + "learning_rate": 0.001424162777852928, + "loss": 1.899, + "step": 1450 + }, + { + "epoch": 0.1530590717299578, + "grad_norm": 0.7438416481018066, + "learning_rate": 0.0014240527289548814, + "loss": 1.8917, + "step": 1451 + }, + { + "epoch": 0.15316455696202533, + "grad_norm": 0.5332273244857788, + "learning_rate": 0.0014239426045258652, + "loss": 1.8849, + "step": 1452 + }, + { + "epoch": 0.15327004219409282, + "grad_norm": 0.6253914833068848, + "learning_rate": 0.0014238324045782198, + "loss": 1.8575, + "step": 1453 + }, + { + "epoch": 0.15337552742616034, + "grad_norm": 0.5243579745292664, + "learning_rate": 0.0014237221291242932, + "loss": 1.8306, + "step": 1454 + }, + { + "epoch": 0.15348101265822786, + "grad_norm": 0.5512919425964355, + "learning_rate": 0.0014236117781764425, + "loss": 1.8528, + "step": 1455 + }, + { + "epoch": 0.15358649789029535, + "grad_norm": 0.6124680042266846, + "learning_rate": 0.0014235013517470334, + "loss": 1.8976, + "step": 1456 + }, + { + "epoch": 0.15369198312236287, + "grad_norm": 0.5778800845146179, + "learning_rate": 0.0014233908498484393, + "loss": 1.8487, + "step": 1457 + }, + { + "epoch": 0.15379746835443037, + "grad_norm": 0.5900087356567383, + "learning_rate": 0.0014232802724930427, + "loss": 1.8617, + "step": 1458 + }, + { + "epoch": 0.1539029535864979, + "grad_norm": 0.7359034419059753, + "learning_rate": 0.0014231696196932342, + "loss": 1.8773, + "step": 1459 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.6664808988571167, + "learning_rate": 0.0014230588914614134, + "loss": 1.9272, + "step": 1460 + }, + { + "epoch": 0.1541139240506329, + "grad_norm": 0.5533428192138672, + "learning_rate": 0.0014229480878099872, + "loss": 1.8871, + "step": 1461 + }, + { + "epoch": 0.15421940928270042, + "grad_norm": 0.7634075284004211, + "learning_rate": 0.0014228372087513725, + "loss": 1.8293, + "step": 1462 + }, + { + "epoch": 0.15432489451476794, + "grad_norm": 0.531859278678894, + "learning_rate": 0.0014227262542979933, + "loss": 1.8485, + "step": 1463 + }, + { + "epoch": 0.15443037974683543, + "grad_norm": 0.6466610431671143, + "learning_rate": 0.0014226152244622826, + "loss": 1.8717, + "step": 1464 + }, + { + "epoch": 0.15453586497890295, + "grad_norm": 0.7180575132369995, + "learning_rate": 0.0014225041192566822, + "loss": 1.8599, + "step": 1465 + }, + { + "epoch": 0.15464135021097047, + "grad_norm": 0.6475085020065308, + "learning_rate": 0.001422392938693642, + "loss": 1.8605, + "step": 1466 + }, + { + "epoch": 0.15474683544303797, + "grad_norm": 0.6366753578186035, + "learning_rate": 0.0014222816827856202, + "loss": 1.9169, + "step": 1467 + }, + { + "epoch": 0.1548523206751055, + "grad_norm": 0.6405039429664612, + "learning_rate": 0.0014221703515450834, + "loss": 1.8901, + "step": 1468 + }, + { + "epoch": 0.154957805907173, + "grad_norm": 0.5279743671417236, + "learning_rate": 0.001422058944984507, + "loss": 1.9021, + "step": 1469 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.9017136693000793, + "learning_rate": 0.0014219474631163745, + "loss": 1.8761, + "step": 1470 + }, + { + "epoch": 0.15516877637130802, + "grad_norm": 0.8705962896347046, + "learning_rate": 0.0014218359059531783, + "loss": 1.8937, + "step": 1471 + }, + { + "epoch": 0.15527426160337554, + "grad_norm": 0.5104793906211853, + "learning_rate": 0.0014217242735074188, + "loss": 1.8877, + "step": 1472 + }, + { + "epoch": 0.15537974683544303, + "grad_norm": 0.7928394675254822, + "learning_rate": 0.0014216125657916046, + "loss": 1.8645, + "step": 1473 + }, + { + "epoch": 0.15548523206751055, + "grad_norm": 0.7384815812110901, + "learning_rate": 0.0014215007828182536, + "loss": 1.862, + "step": 1474 + }, + { + "epoch": 0.15559071729957805, + "grad_norm": 0.5451481342315674, + "learning_rate": 0.0014213889245998917, + "loss": 1.8824, + "step": 1475 + }, + { + "epoch": 0.15569620253164557, + "grad_norm": 0.9296305775642395, + "learning_rate": 0.0014212769911490528, + "loss": 1.8756, + "step": 1476 + }, + { + "epoch": 0.1558016877637131, + "grad_norm": 1.0127062797546387, + "learning_rate": 0.0014211649824782797, + "loss": 1.8731, + "step": 1477 + }, + { + "epoch": 0.15590717299578058, + "grad_norm": 0.5518911480903625, + "learning_rate": 0.0014210528986001237, + "loss": 1.8692, + "step": 1478 + }, + { + "epoch": 0.1560126582278481, + "grad_norm": 0.827684760093689, + "learning_rate": 0.001420940739527144, + "loss": 1.8935, + "step": 1479 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.8289563059806824, + "learning_rate": 0.001420828505271909, + "loss": 1.8385, + "step": 1480 + }, + { + "epoch": 0.1562236286919831, + "grad_norm": 0.5632433295249939, + "learning_rate": 0.001420716195846995, + "loss": 1.8892, + "step": 1481 + }, + { + "epoch": 0.15632911392405063, + "grad_norm": 0.7756373286247253, + "learning_rate": 0.0014206038112649865, + "loss": 1.8694, + "step": 1482 + }, + { + "epoch": 0.15643459915611815, + "grad_norm": 0.7612035870552063, + "learning_rate": 0.0014204913515384772, + "loss": 1.8733, + "step": 1483 + }, + { + "epoch": 0.15654008438818565, + "grad_norm": 0.6285582184791565, + "learning_rate": 0.0014203788166800685, + "loss": 1.8972, + "step": 1484 + }, + { + "epoch": 0.15664556962025317, + "grad_norm": 0.7137769460678101, + "learning_rate": 0.0014202662067023708, + "loss": 1.8383, + "step": 1485 + }, + { + "epoch": 0.1567510548523207, + "grad_norm": 0.7633421421051025, + "learning_rate": 0.0014201535216180024, + "loss": 1.908, + "step": 1486 + }, + { + "epoch": 0.15685654008438818, + "grad_norm": 0.5918532013893127, + "learning_rate": 0.0014200407614395898, + "loss": 1.8326, + "step": 1487 + }, + { + "epoch": 0.1569620253164557, + "grad_norm": 0.7170225381851196, + "learning_rate": 0.0014199279261797692, + "loss": 1.8978, + "step": 1488 + }, + { + "epoch": 0.15706751054852322, + "grad_norm": 0.6519536972045898, + "learning_rate": 0.0014198150158511837, + "loss": 1.8533, + "step": 1489 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.5757782459259033, + "learning_rate": 0.0014197020304664856, + "loss": 1.8606, + "step": 1490 + }, + { + "epoch": 0.15727848101265823, + "grad_norm": 0.7754002809524536, + "learning_rate": 0.0014195889700383357, + "loss": 1.8464, + "step": 1491 + }, + { + "epoch": 0.15738396624472573, + "grad_norm": 0.808380126953125, + "learning_rate": 0.0014194758345794029, + "loss": 1.8817, + "step": 1492 + }, + { + "epoch": 0.15748945147679325, + "grad_norm": 0.7512904405593872, + "learning_rate": 0.0014193626241023644, + "loss": 1.8629, + "step": 1493 + }, + { + "epoch": 0.15759493670886077, + "grad_norm": 0.726902425289154, + "learning_rate": 0.001419249338619906, + "loss": 1.8607, + "step": 1494 + }, + { + "epoch": 0.15770042194092826, + "grad_norm": 0.7629278898239136, + "learning_rate": 0.0014191359781447223, + "loss": 1.8526, + "step": 1495 + }, + { + "epoch": 0.15780590717299578, + "grad_norm": 0.6305598020553589, + "learning_rate": 0.0014190225426895153, + "loss": 1.9264, + "step": 1496 + }, + { + "epoch": 0.1579113924050633, + "grad_norm": 0.7186782360076904, + "learning_rate": 0.0014189090322669967, + "loss": 1.8823, + "step": 1497 + }, + { + "epoch": 0.1580168776371308, + "grad_norm": 0.6852849721908569, + "learning_rate": 0.0014187954468898854, + "loss": 1.8627, + "step": 1498 + }, + { + "epoch": 0.1581223628691983, + "grad_norm": 0.5628988742828369, + "learning_rate": 0.0014186817865709095, + "loss": 1.8952, + "step": 1499 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.7543247938156128, + "learning_rate": 0.0014185680513228048, + "loss": 1.8599, + "step": 1500 + }, + { + "epoch": 0.15833333333333333, + "grad_norm": 0.5575157403945923, + "learning_rate": 0.0014184542411583162, + "loss": 1.864, + "step": 1501 + }, + { + "epoch": 0.15843881856540085, + "grad_norm": 0.8543862700462341, + "learning_rate": 0.001418340356090197, + "loss": 1.8668, + "step": 1502 + }, + { + "epoch": 0.15854430379746837, + "grad_norm": 0.8615949749946594, + "learning_rate": 0.0014182263961312078, + "loss": 1.8698, + "step": 1503 + }, + { + "epoch": 0.15864978902953586, + "grad_norm": 0.5493414402008057, + "learning_rate": 0.001418112361294119, + "loss": 1.9063, + "step": 1504 + }, + { + "epoch": 0.15875527426160338, + "grad_norm": 0.9539059996604919, + "learning_rate": 0.0014179982515917088, + "loss": 1.8683, + "step": 1505 + }, + { + "epoch": 0.15886075949367087, + "grad_norm": 0.857519268989563, + "learning_rate": 0.0014178840670367634, + "loss": 1.8553, + "step": 1506 + }, + { + "epoch": 0.1589662447257384, + "grad_norm": 0.5438959002494812, + "learning_rate": 0.001417769807642078, + "loss": 1.8962, + "step": 1507 + }, + { + "epoch": 0.1590717299578059, + "grad_norm": 0.5997400879859924, + "learning_rate": 0.0014176554734204557, + "loss": 1.8345, + "step": 1508 + }, + { + "epoch": 0.1591772151898734, + "grad_norm": 0.7127436399459839, + "learning_rate": 0.0014175410643847085, + "loss": 1.8639, + "step": 1509 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.5642744898796082, + "learning_rate": 0.0014174265805476564, + "loss": 1.8716, + "step": 1510 + }, + { + "epoch": 0.15938818565400845, + "grad_norm": 0.5842957496643066, + "learning_rate": 0.001417312021922128, + "loss": 1.8481, + "step": 1511 + }, + { + "epoch": 0.15949367088607594, + "grad_norm": 0.8055692315101624, + "learning_rate": 0.0014171973885209596, + "loss": 1.8677, + "step": 1512 + }, + { + "epoch": 0.15959915611814346, + "grad_norm": 0.7096117734909058, + "learning_rate": 0.0014170826803569971, + "loss": 1.9009, + "step": 1513 + }, + { + "epoch": 0.15970464135021098, + "grad_norm": 0.5295675992965698, + "learning_rate": 0.0014169678974430941, + "loss": 1.8582, + "step": 1514 + }, + { + "epoch": 0.15981012658227847, + "grad_norm": 0.7337211966514587, + "learning_rate": 0.0014168530397921121, + "loss": 1.896, + "step": 1515 + }, + { + "epoch": 0.159915611814346, + "grad_norm": 0.8272942304611206, + "learning_rate": 0.0014167381074169218, + "loss": 1.8775, + "step": 1516 + }, + { + "epoch": 0.1600210970464135, + "grad_norm": 0.6355969905853271, + "learning_rate": 0.0014166231003304019, + "loss": 1.8677, + "step": 1517 + }, + { + "epoch": 0.160126582278481, + "grad_norm": 0.6070106029510498, + "learning_rate": 0.0014165080185454396, + "loss": 1.867, + "step": 1518 + }, + { + "epoch": 0.16023206751054853, + "grad_norm": 0.6495498418807983, + "learning_rate": 0.0014163928620749301, + "loss": 1.8826, + "step": 1519 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.7389470338821411, + "learning_rate": 0.0014162776309317778, + "loss": 1.8694, + "step": 1520 + }, + { + "epoch": 0.16044303797468354, + "grad_norm": 0.7670414447784424, + "learning_rate": 0.0014161623251288944, + "loss": 1.8759, + "step": 1521 + }, + { + "epoch": 0.16054852320675106, + "grad_norm": 0.6030794978141785, + "learning_rate": 0.001416046944679201, + "loss": 1.8545, + "step": 1522 + }, + { + "epoch": 0.16065400843881855, + "grad_norm": 0.775231659412384, + "learning_rate": 0.0014159314895956258, + "loss": 1.8397, + "step": 1523 + }, + { + "epoch": 0.16075949367088607, + "grad_norm": 0.7346013784408569, + "learning_rate": 0.0014158159598911067, + "loss": 1.8517, + "step": 1524 + }, + { + "epoch": 0.1608649789029536, + "grad_norm": 0.49856624007225037, + "learning_rate": 0.0014157003555785893, + "loss": 1.8743, + "step": 1525 + }, + { + "epoch": 0.16097046413502109, + "grad_norm": 0.5030092597007751, + "learning_rate": 0.0014155846766710277, + "loss": 1.7989, + "step": 1526 + }, + { + "epoch": 0.1610759493670886, + "grad_norm": 0.550590991973877, + "learning_rate": 0.0014154689231813838, + "loss": 1.8978, + "step": 1527 + }, + { + "epoch": 0.16118143459915613, + "grad_norm": 0.5419245362281799, + "learning_rate": 0.001415353095122629, + "loss": 1.8275, + "step": 1528 + }, + { + "epoch": 0.16128691983122362, + "grad_norm": 0.6156306862831116, + "learning_rate": 0.0014152371925077423, + "loss": 1.8925, + "step": 1529 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.6023067831993103, + "learning_rate": 0.0014151212153497108, + "loss": 1.8315, + "step": 1530 + }, + { + "epoch": 0.16149789029535866, + "grad_norm": 0.5969240665435791, + "learning_rate": 0.0014150051636615305, + "loss": 1.8228, + "step": 1531 + }, + { + "epoch": 0.16160337552742615, + "grad_norm": 0.6362522840499878, + "learning_rate": 0.0014148890374562056, + "loss": 1.8272, + "step": 1532 + }, + { + "epoch": 0.16170886075949367, + "grad_norm": 0.5448126196861267, + "learning_rate": 0.0014147728367467486, + "loss": 1.8581, + "step": 1533 + }, + { + "epoch": 0.1618143459915612, + "grad_norm": 0.6257558465003967, + "learning_rate": 0.0014146565615461805, + "loss": 1.8559, + "step": 1534 + }, + { + "epoch": 0.16191983122362869, + "grad_norm": 0.6590729355812073, + "learning_rate": 0.0014145402118675302, + "loss": 1.8861, + "step": 1535 + }, + { + "epoch": 0.1620253164556962, + "grad_norm": 0.5782714486122131, + "learning_rate": 0.0014144237877238355, + "loss": 1.8138, + "step": 1536 + }, + { + "epoch": 0.16213080168776373, + "grad_norm": 0.7788006067276001, + "learning_rate": 0.0014143072891281425, + "loss": 1.8307, + "step": 1537 + }, + { + "epoch": 0.16223628691983122, + "grad_norm": 0.7636642456054688, + "learning_rate": 0.001414190716093505, + "loss": 1.8434, + "step": 1538 + }, + { + "epoch": 0.16234177215189874, + "grad_norm": 0.573341429233551, + "learning_rate": 0.001414074068632986, + "loss": 1.8508, + "step": 1539 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6856685280799866, + "learning_rate": 0.0014139573467596561, + "loss": 1.8747, + "step": 1540 + }, + { + "epoch": 0.16255274261603375, + "grad_norm": 0.6803492903709412, + "learning_rate": 0.0014138405504865949, + "loss": 1.8602, + "step": 1541 + }, + { + "epoch": 0.16265822784810127, + "grad_norm": 0.5560793876647949, + "learning_rate": 0.0014137236798268896, + "loss": 1.8116, + "step": 1542 + }, + { + "epoch": 0.16276371308016876, + "grad_norm": 0.756413996219635, + "learning_rate": 0.0014136067347936363, + "loss": 1.8536, + "step": 1543 + }, + { + "epoch": 0.16286919831223629, + "grad_norm": 0.6148220300674438, + "learning_rate": 0.0014134897153999394, + "loss": 1.8647, + "step": 1544 + }, + { + "epoch": 0.1629746835443038, + "grad_norm": 0.6712705492973328, + "learning_rate": 0.0014133726216589114, + "loss": 1.8161, + "step": 1545 + }, + { + "epoch": 0.1630801687763713, + "grad_norm": 0.6765649318695068, + "learning_rate": 0.0014132554535836732, + "loss": 1.8236, + "step": 1546 + }, + { + "epoch": 0.16318565400843882, + "grad_norm": 0.5696303844451904, + "learning_rate": 0.0014131382111873543, + "loss": 1.8458, + "step": 1547 + }, + { + "epoch": 0.16329113924050634, + "grad_norm": 0.8000442385673523, + "learning_rate": 0.0014130208944830923, + "loss": 1.8931, + "step": 1548 + }, + { + "epoch": 0.16339662447257383, + "grad_norm": 0.594349205493927, + "learning_rate": 0.0014129035034840325, + "loss": 1.8728, + "step": 1549 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.746400773525238, + "learning_rate": 0.00141278603820333, + "loss": 1.8424, + "step": 1550 + }, + { + "epoch": 0.16360759493670887, + "grad_norm": 0.70405513048172, + "learning_rate": 0.0014126684986541468, + "loss": 1.8557, + "step": 1551 + }, + { + "epoch": 0.16371308016877636, + "grad_norm": 0.6002037525177002, + "learning_rate": 0.0014125508848496539, + "loss": 1.8644, + "step": 1552 + }, + { + "epoch": 0.16381856540084389, + "grad_norm": 0.724192202091217, + "learning_rate": 0.0014124331968030307, + "loss": 1.8995, + "step": 1553 + }, + { + "epoch": 0.1639240506329114, + "grad_norm": 0.5534403324127197, + "learning_rate": 0.0014123154345274645, + "loss": 1.8184, + "step": 1554 + }, + { + "epoch": 0.1640295358649789, + "grad_norm": 0.6293768882751465, + "learning_rate": 0.0014121975980361512, + "loss": 1.8395, + "step": 1555 + }, + { + "epoch": 0.16413502109704642, + "grad_norm": 0.6735476851463318, + "learning_rate": 0.0014120796873422952, + "loss": 1.8503, + "step": 1556 + }, + { + "epoch": 0.1642405063291139, + "grad_norm": 0.624128520488739, + "learning_rate": 0.0014119617024591089, + "loss": 1.8778, + "step": 1557 + }, + { + "epoch": 0.16434599156118143, + "grad_norm": 0.8744688034057617, + "learning_rate": 0.0014118436433998127, + "loss": 1.8281, + "step": 1558 + }, + { + "epoch": 0.16445147679324895, + "grad_norm": 0.7854128479957581, + "learning_rate": 0.0014117255101776362, + "loss": 1.8081, + "step": 1559 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.579482913017273, + "learning_rate": 0.0014116073028058165, + "loss": 1.8441, + "step": 1560 + }, + { + "epoch": 0.16466244725738396, + "grad_norm": 0.6765166521072388, + "learning_rate": 0.0014114890212975997, + "loss": 1.7916, + "step": 1561 + }, + { + "epoch": 0.16476793248945149, + "grad_norm": 0.7372018098831177, + "learning_rate": 0.0014113706656662393, + "loss": 1.8747, + "step": 1562 + }, + { + "epoch": 0.16487341772151898, + "grad_norm": 0.5780003666877747, + "learning_rate": 0.001411252235924998, + "loss": 1.8292, + "step": 1563 + }, + { + "epoch": 0.1649789029535865, + "grad_norm": 0.6285006999969482, + "learning_rate": 0.0014111337320871463, + "loss": 1.8363, + "step": 1564 + }, + { + "epoch": 0.16508438818565402, + "grad_norm": 0.6424933671951294, + "learning_rate": 0.0014110151541659633, + "loss": 1.808, + "step": 1565 + }, + { + "epoch": 0.1651898734177215, + "grad_norm": 0.5517683029174805, + "learning_rate": 0.0014108965021747363, + "loss": 1.8548, + "step": 1566 + }, + { + "epoch": 0.16529535864978903, + "grad_norm": 0.6656227111816406, + "learning_rate": 0.0014107777761267605, + "loss": 1.8495, + "step": 1567 + }, + { + "epoch": 0.16540084388185655, + "grad_norm": 0.7699546217918396, + "learning_rate": 0.00141065897603534, + "loss": 1.8903, + "step": 1568 + }, + { + "epoch": 0.16550632911392404, + "grad_norm": 0.6612445712089539, + "learning_rate": 0.001410540101913787, + "loss": 1.8448, + "step": 1569 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.5594730377197266, + "learning_rate": 0.0014104211537754217, + "loss": 1.8432, + "step": 1570 + }, + { + "epoch": 0.16571729957805909, + "grad_norm": 0.570366382598877, + "learning_rate": 0.001410302131633573, + "loss": 1.8195, + "step": 1571 + }, + { + "epoch": 0.16582278481012658, + "grad_norm": 0.6440137028694153, + "learning_rate": 0.0014101830355015778, + "loss": 1.8707, + "step": 1572 + }, + { + "epoch": 0.1659282700421941, + "grad_norm": 0.5498396754264832, + "learning_rate": 0.0014100638653927816, + "loss": 1.864, + "step": 1573 + }, + { + "epoch": 0.1660337552742616, + "grad_norm": 0.6557112336158752, + "learning_rate": 0.0014099446213205378, + "loss": 1.8272, + "step": 1574 + }, + { + "epoch": 0.1661392405063291, + "grad_norm": 0.8645330667495728, + "learning_rate": 0.0014098253032982086, + "loss": 1.8339, + "step": 1575 + }, + { + "epoch": 0.16624472573839663, + "grad_norm": 0.6640101075172424, + "learning_rate": 0.0014097059113391639, + "loss": 1.8732, + "step": 1576 + }, + { + "epoch": 0.16635021097046412, + "grad_norm": 0.575050413608551, + "learning_rate": 0.0014095864454567821, + "loss": 1.8654, + "step": 1577 + }, + { + "epoch": 0.16645569620253164, + "grad_norm": 0.5616680383682251, + "learning_rate": 0.0014094669056644502, + "loss": 1.8287, + "step": 1578 + }, + { + "epoch": 0.16656118143459916, + "grad_norm": 0.5482111573219299, + "learning_rate": 0.001409347291975563, + "loss": 1.8897, + "step": 1579 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.8188108801841736, + "learning_rate": 0.001409227604403524, + "loss": 1.8463, + "step": 1580 + }, + { + "epoch": 0.16677215189873418, + "grad_norm": 0.645258903503418, + "learning_rate": 0.0014091078429617448, + "loss": 1.8613, + "step": 1581 + }, + { + "epoch": 0.1668776371308017, + "grad_norm": 0.7732670307159424, + "learning_rate": 0.0014089880076636452, + "loss": 1.8672, + "step": 1582 + }, + { + "epoch": 0.1669831223628692, + "grad_norm": 0.9245077967643738, + "learning_rate": 0.0014088680985226533, + "loss": 1.8299, + "step": 1583 + }, + { + "epoch": 0.1670886075949367, + "grad_norm": 0.7496465444564819, + "learning_rate": 0.0014087481155522056, + "loss": 1.8354, + "step": 1584 + }, + { + "epoch": 0.16719409282700423, + "grad_norm": 0.7556281089782715, + "learning_rate": 0.0014086280587657467, + "loss": 1.8863, + "step": 1585 + }, + { + "epoch": 0.16729957805907172, + "grad_norm": 0.884229302406311, + "learning_rate": 0.0014085079281767295, + "loss": 1.875, + "step": 1586 + }, + { + "epoch": 0.16740506329113924, + "grad_norm": 0.9067335724830627, + "learning_rate": 0.0014083877237986153, + "loss": 1.8147, + "step": 1587 + }, + { + "epoch": 0.16751054852320676, + "grad_norm": 0.6540364027023315, + "learning_rate": 0.0014082674456448738, + "loss": 1.8392, + "step": 1588 + }, + { + "epoch": 0.16761603375527426, + "grad_norm": 0.7903019189834595, + "learning_rate": 0.0014081470937289827, + "loss": 1.8666, + "step": 1589 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.9455569982528687, + "learning_rate": 0.0014080266680644277, + "loss": 1.8294, + "step": 1590 + }, + { + "epoch": 0.16782700421940927, + "grad_norm": 0.7742378115653992, + "learning_rate": 0.0014079061686647033, + "loss": 1.8483, + "step": 1591 + }, + { + "epoch": 0.1679324894514768, + "grad_norm": 0.5834284424781799, + "learning_rate": 0.0014077855955433123, + "loss": 1.824, + "step": 1592 + }, + { + "epoch": 0.1680379746835443, + "grad_norm": 0.652269721031189, + "learning_rate": 0.001407664948713765, + "loss": 1.8587, + "step": 1593 + }, + { + "epoch": 0.1681434599156118, + "grad_norm": 0.5547722578048706, + "learning_rate": 0.001407544228189581, + "loss": 1.8437, + "step": 1594 + }, + { + "epoch": 0.16824894514767932, + "grad_norm": 0.684628427028656, + "learning_rate": 0.0014074234339842874, + "loss": 1.8895, + "step": 1595 + }, + { + "epoch": 0.16835443037974684, + "grad_norm": 0.6268038749694824, + "learning_rate": 0.00140730256611142, + "loss": 1.8665, + "step": 1596 + }, + { + "epoch": 0.16845991561181434, + "grad_norm": 0.6131630539894104, + "learning_rate": 0.001407181624584522, + "loss": 1.8424, + "step": 1597 + }, + { + "epoch": 0.16856540084388186, + "grad_norm": 0.7048508524894714, + "learning_rate": 0.0014070606094171464, + "loss": 1.85, + "step": 1598 + }, + { + "epoch": 0.16867088607594938, + "grad_norm": 0.7159748077392578, + "learning_rate": 0.0014069395206228528, + "loss": 1.8445, + "step": 1599 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.6437264084815979, + "learning_rate": 0.0014068183582152103, + "loss": 1.8756, + "step": 1600 + }, + { + "epoch": 0.1688818565400844, + "grad_norm": 0.6245765089988708, + "learning_rate": 0.0014066971222077955, + "loss": 1.8237, + "step": 1601 + }, + { + "epoch": 0.1689873417721519, + "grad_norm": 0.6940786838531494, + "learning_rate": 0.0014065758126141938, + "loss": 1.893, + "step": 1602 + }, + { + "epoch": 0.1690928270042194, + "grad_norm": 0.8205002546310425, + "learning_rate": 0.0014064544294479981, + "loss": 1.8667, + "step": 1603 + }, + { + "epoch": 0.16919831223628692, + "grad_norm": 0.6185948252677917, + "learning_rate": 0.0014063329727228102, + "loss": 1.8362, + "step": 1604 + }, + { + "epoch": 0.16930379746835442, + "grad_norm": 0.5433720946311951, + "learning_rate": 0.0014062114424522397, + "loss": 1.8172, + "step": 1605 + }, + { + "epoch": 0.16940928270042194, + "grad_norm": 0.7545897960662842, + "learning_rate": 0.0014060898386499053, + "loss": 1.8738, + "step": 1606 + }, + { + "epoch": 0.16951476793248946, + "grad_norm": 0.5976491570472717, + "learning_rate": 0.0014059681613294327, + "loss": 1.8811, + "step": 1607 + }, + { + "epoch": 0.16962025316455695, + "grad_norm": 0.5372681021690369, + "learning_rate": 0.0014058464105044567, + "loss": 1.8295, + "step": 1608 + }, + { + "epoch": 0.16972573839662447, + "grad_norm": 0.5871236324310303, + "learning_rate": 0.0014057245861886201, + "loss": 1.8452, + "step": 1609 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.568348228931427, + "learning_rate": 0.001405602688395574, + "loss": 1.8567, + "step": 1610 + }, + { + "epoch": 0.16993670886075948, + "grad_norm": 0.6358767747879028, + "learning_rate": 0.0014054807171389773, + "loss": 1.7939, + "step": 1611 + }, + { + "epoch": 0.170042194092827, + "grad_norm": 0.700186014175415, + "learning_rate": 0.001405358672432498, + "loss": 1.7752, + "step": 1612 + }, + { + "epoch": 0.17014767932489452, + "grad_norm": 1.0189894437789917, + "learning_rate": 0.0014052365542898111, + "loss": 1.8212, + "step": 1613 + }, + { + "epoch": 0.17025316455696202, + "grad_norm": 0.9162911176681519, + "learning_rate": 0.0014051143627246015, + "loss": 1.8559, + "step": 1614 + }, + { + "epoch": 0.17035864978902954, + "grad_norm": 0.5635157823562622, + "learning_rate": 0.0014049920977505608, + "loss": 1.824, + "step": 1615 + }, + { + "epoch": 0.17046413502109706, + "grad_norm": 0.6146384477615356, + "learning_rate": 0.0014048697593813891, + "loss": 1.8432, + "step": 1616 + }, + { + "epoch": 0.17056962025316455, + "grad_norm": 0.8274125456809998, + "learning_rate": 0.0014047473476307955, + "loss": 1.842, + "step": 1617 + }, + { + "epoch": 0.17067510548523207, + "grad_norm": 0.610501229763031, + "learning_rate": 0.001404624862512497, + "loss": 1.8157, + "step": 1618 + }, + { + "epoch": 0.1707805907172996, + "grad_norm": 0.5162004232406616, + "learning_rate": 0.001404502304040218, + "loss": 1.8273, + "step": 1619 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6502171754837036, + "learning_rate": 0.0014043796722276924, + "loss": 1.8511, + "step": 1620 + }, + { + "epoch": 0.1709915611814346, + "grad_norm": 0.5498970746994019, + "learning_rate": 0.0014042569670886615, + "loss": 1.8433, + "step": 1621 + }, + { + "epoch": 0.1710970464135021, + "grad_norm": 0.6442164182662964, + "learning_rate": 0.0014041341886368752, + "loss": 1.8839, + "step": 1622 + }, + { + "epoch": 0.17120253164556962, + "grad_norm": 0.746438205242157, + "learning_rate": 0.0014040113368860908, + "loss": 1.849, + "step": 1623 + }, + { + "epoch": 0.17130801687763714, + "grad_norm": 0.9905840754508972, + "learning_rate": 0.0014038884118500754, + "loss": 1.819, + "step": 1624 + }, + { + "epoch": 0.17141350210970463, + "grad_norm": 0.7740461230278015, + "learning_rate": 0.0014037654135426025, + "loss": 1.8787, + "step": 1625 + }, + { + "epoch": 0.17151898734177215, + "grad_norm": 0.6848217248916626, + "learning_rate": 0.0014036423419774551, + "loss": 1.804, + "step": 1626 + }, + { + "epoch": 0.17162447257383967, + "grad_norm": 0.715851366519928, + "learning_rate": 0.0014035191971684242, + "loss": 1.8099, + "step": 1627 + }, + { + "epoch": 0.17172995780590716, + "grad_norm": 0.6681694984436035, + "learning_rate": 0.0014033959791293082, + "loss": 1.8278, + "step": 1628 + }, + { + "epoch": 0.17183544303797468, + "grad_norm": 0.7576026320457458, + "learning_rate": 0.0014032726878739148, + "loss": 1.8628, + "step": 1629 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.7839313745498657, + "learning_rate": 0.0014031493234160591, + "loss": 1.8151, + "step": 1630 + }, + { + "epoch": 0.1720464135021097, + "grad_norm": 0.8804556727409363, + "learning_rate": 0.001403025885769565, + "loss": 1.8581, + "step": 1631 + }, + { + "epoch": 0.17215189873417722, + "grad_norm": 0.5770796537399292, + "learning_rate": 0.001402902374948264, + "loss": 1.8473, + "step": 1632 + }, + { + "epoch": 0.17225738396624474, + "grad_norm": 0.806982696056366, + "learning_rate": 0.0014027787909659962, + "loss": 1.837, + "step": 1633 + }, + { + "epoch": 0.17236286919831223, + "grad_norm": 0.8889955282211304, + "learning_rate": 0.0014026551338366098, + "loss": 1.8466, + "step": 1634 + }, + { + "epoch": 0.17246835443037975, + "grad_norm": 0.5614049434661865, + "learning_rate": 0.0014025314035739614, + "loss": 1.828, + "step": 1635 + }, + { + "epoch": 0.17257383966244727, + "grad_norm": 0.7653814554214478, + "learning_rate": 0.001402407600191915, + "loss": 1.8339, + "step": 1636 + }, + { + "epoch": 0.17267932489451476, + "grad_norm": 0.7032321691513062, + "learning_rate": 0.0014022837237043441, + "loss": 1.8439, + "step": 1637 + }, + { + "epoch": 0.17278481012658228, + "grad_norm": 0.8243837356567383, + "learning_rate": 0.0014021597741251295, + "loss": 1.8507, + "step": 1638 + }, + { + "epoch": 0.17289029535864978, + "grad_norm": 1.1945618391036987, + "learning_rate": 0.00140203575146816, + "loss": 1.8521, + "step": 1639 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.6440109014511108, + "learning_rate": 0.0014019116557473332, + "loss": 1.8107, + "step": 1640 + }, + { + "epoch": 0.17310126582278482, + "grad_norm": 1.1709644794464111, + "learning_rate": 0.0014017874869765548, + "loss": 1.8213, + "step": 1641 + }, + { + "epoch": 0.1732067510548523, + "grad_norm": 0.6187630295753479, + "learning_rate": 0.0014016632451697383, + "loss": 1.8133, + "step": 1642 + }, + { + "epoch": 0.17331223628691983, + "grad_norm": 1.0818506479263306, + "learning_rate": 0.0014015389303408058, + "loss": 1.8334, + "step": 1643 + }, + { + "epoch": 0.17341772151898735, + "grad_norm": 0.8022618293762207, + "learning_rate": 0.001401414542503687, + "loss": 1.8251, + "step": 1644 + }, + { + "epoch": 0.17352320675105484, + "grad_norm": 1.0393681526184082, + "learning_rate": 0.001401290081672321, + "loss": 1.8398, + "step": 1645 + }, + { + "epoch": 0.17362869198312236, + "grad_norm": 0.7732388973236084, + "learning_rate": 0.0014011655478606531, + "loss": 1.8413, + "step": 1646 + }, + { + "epoch": 0.17373417721518988, + "grad_norm": 0.7199129462242126, + "learning_rate": 0.001401040941082639, + "loss": 1.8455, + "step": 1647 + }, + { + "epoch": 0.17383966244725738, + "grad_norm": 0.9718675017356873, + "learning_rate": 0.001400916261352241, + "loss": 1.8289, + "step": 1648 + }, + { + "epoch": 0.1739451476793249, + "grad_norm": 0.6366944909095764, + "learning_rate": 0.00140079150868343, + "loss": 1.8623, + "step": 1649 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.9856382012367249, + "learning_rate": 0.0014006666830901854, + "loss": 1.8437, + "step": 1650 + }, + { + "epoch": 0.1741561181434599, + "grad_norm": 0.5472239255905151, + "learning_rate": 0.0014005417845864945, + "loss": 1.844, + "step": 1651 + }, + { + "epoch": 0.17426160337552743, + "grad_norm": 0.9320327043533325, + "learning_rate": 0.0014004168131863525, + "loss": 1.8213, + "step": 1652 + }, + { + "epoch": 0.17436708860759495, + "grad_norm": 0.7807600498199463, + "learning_rate": 0.0014002917689037637, + "loss": 1.8031, + "step": 1653 + }, + { + "epoch": 0.17447257383966244, + "grad_norm": 0.6054607033729553, + "learning_rate": 0.0014001666517527392, + "loss": 1.8441, + "step": 1654 + }, + { + "epoch": 0.17457805907172996, + "grad_norm": 0.6089448928833008, + "learning_rate": 0.0014000414617472996, + "loss": 1.833, + "step": 1655 + }, + { + "epoch": 0.17468354430379746, + "grad_norm": 0.6042707562446594, + "learning_rate": 0.0013999161989014725, + "loss": 1.8666, + "step": 1656 + }, + { + "epoch": 0.17478902953586498, + "grad_norm": 0.7445318102836609, + "learning_rate": 0.0013997908632292948, + "loss": 1.8203, + "step": 1657 + }, + { + "epoch": 0.1748945147679325, + "grad_norm": 0.6172434091567993, + "learning_rate": 0.0013996654547448106, + "loss": 1.8152, + "step": 1658 + }, + { + "epoch": 0.175, + "grad_norm": 0.6280675530433655, + "learning_rate": 0.0013995399734620729, + "loss": 1.8311, + "step": 1659 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.5977142453193665, + "learning_rate": 0.001399414419395142, + "loss": 1.8149, + "step": 1660 + }, + { + "epoch": 0.17521097046413503, + "grad_norm": 0.6560184955596924, + "learning_rate": 0.0013992887925580874, + "loss": 1.8722, + "step": 1661 + }, + { + "epoch": 0.17531645569620252, + "grad_norm": 0.5770848393440247, + "learning_rate": 0.0013991630929649857, + "loss": 1.8217, + "step": 1662 + }, + { + "epoch": 0.17542194092827004, + "grad_norm": 0.955504298210144, + "learning_rate": 0.0013990373206299225, + "loss": 1.8357, + "step": 1663 + }, + { + "epoch": 0.17552742616033756, + "grad_norm": 0.6162383556365967, + "learning_rate": 0.0013989114755669912, + "loss": 1.8337, + "step": 1664 + }, + { + "epoch": 0.17563291139240506, + "grad_norm": 0.8499171137809753, + "learning_rate": 0.001398785557790293, + "loss": 1.8501, + "step": 1665 + }, + { + "epoch": 0.17573839662447258, + "grad_norm": 0.5664222240447998, + "learning_rate": 0.0013986595673139382, + "loss": 1.865, + "step": 1666 + }, + { + "epoch": 0.1758438818565401, + "grad_norm": 0.7999162673950195, + "learning_rate": 0.0013985335041520443, + "loss": 1.8527, + "step": 1667 + }, + { + "epoch": 0.1759493670886076, + "grad_norm": 0.75045245885849, + "learning_rate": 0.0013984073683187374, + "loss": 1.8592, + "step": 1668 + }, + { + "epoch": 0.1760548523206751, + "grad_norm": 0.6948562860488892, + "learning_rate": 0.0013982811598281517, + "loss": 1.8162, + "step": 1669 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.845655620098114, + "learning_rate": 0.0013981548786944293, + "loss": 1.7925, + "step": 1670 + }, + { + "epoch": 0.17626582278481012, + "grad_norm": 0.6059150695800781, + "learning_rate": 0.0013980285249317209, + "loss": 1.7952, + "step": 1671 + }, + { + "epoch": 0.17637130801687764, + "grad_norm": 0.7367244362831116, + "learning_rate": 0.0013979020985541847, + "loss": 1.8331, + "step": 1672 + }, + { + "epoch": 0.17647679324894514, + "grad_norm": 0.5386902689933777, + "learning_rate": 0.0013977755995759876, + "loss": 1.8497, + "step": 1673 + }, + { + "epoch": 0.17658227848101266, + "grad_norm": 0.8185182213783264, + "learning_rate": 0.0013976490280113048, + "loss": 1.8439, + "step": 1674 + }, + { + "epoch": 0.17668776371308018, + "grad_norm": 0.682350754737854, + "learning_rate": 0.0013975223838743188, + "loss": 1.8258, + "step": 1675 + }, + { + "epoch": 0.17679324894514767, + "grad_norm": 0.7427065372467041, + "learning_rate": 0.0013973956671792206, + "loss": 1.8461, + "step": 1676 + }, + { + "epoch": 0.1768987341772152, + "grad_norm": 0.8012073636054993, + "learning_rate": 0.00139726887794021, + "loss": 1.7935, + "step": 1677 + }, + { + "epoch": 0.1770042194092827, + "grad_norm": 0.7227526307106018, + "learning_rate": 0.001397142016171494, + "loss": 1.868, + "step": 1678 + }, + { + "epoch": 0.1771097046413502, + "grad_norm": 0.7238121628761292, + "learning_rate": 0.0013970150818872881, + "loss": 1.8034, + "step": 1679 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.1135774850845337, + "learning_rate": 0.0013968880751018158, + "loss": 1.8442, + "step": 1680 + }, + { + "epoch": 0.17732067510548524, + "grad_norm": 0.9114503264427185, + "learning_rate": 0.0013967609958293091, + "loss": 1.8236, + "step": 1681 + }, + { + "epoch": 0.17742616033755274, + "grad_norm": 0.6743898391723633, + "learning_rate": 0.001396633844084008, + "loss": 1.8132, + "step": 1682 + }, + { + "epoch": 0.17753164556962026, + "grad_norm": 1.0972694158554077, + "learning_rate": 0.00139650661988016, + "loss": 1.8699, + "step": 1683 + }, + { + "epoch": 0.17763713080168778, + "grad_norm": 1.0547226667404175, + "learning_rate": 0.0013963793232320216, + "loss": 1.8663, + "step": 1684 + }, + { + "epoch": 0.17774261603375527, + "grad_norm": 0.5548208951950073, + "learning_rate": 0.0013962519541538569, + "loss": 1.7881, + "step": 1685 + }, + { + "epoch": 0.1778481012658228, + "grad_norm": 0.8984236121177673, + "learning_rate": 0.001396124512659938, + "loss": 1.789, + "step": 1686 + }, + { + "epoch": 0.17795358649789028, + "grad_norm": 0.6437095403671265, + "learning_rate": 0.001395996998764546, + "loss": 1.8343, + "step": 1687 + }, + { + "epoch": 0.1780590717299578, + "grad_norm": 0.703136682510376, + "learning_rate": 0.0013958694124819688, + "loss": 1.7721, + "step": 1688 + }, + { + "epoch": 0.17816455696202532, + "grad_norm": 0.6602951884269714, + "learning_rate": 0.0013957417538265032, + "loss": 1.8486, + "step": 1689 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.6467139720916748, + "learning_rate": 0.0013956140228124545, + "loss": 1.817, + "step": 1690 + }, + { + "epoch": 0.17837552742616034, + "grad_norm": 0.6531676054000854, + "learning_rate": 0.001395486219454135, + "loss": 1.8683, + "step": 1691 + }, + { + "epoch": 0.17848101265822786, + "grad_norm": 0.552640438079834, + "learning_rate": 0.0013953583437658658, + "loss": 1.8358, + "step": 1692 + }, + { + "epoch": 0.17858649789029535, + "grad_norm": 0.709023654460907, + "learning_rate": 0.0013952303957619763, + "loss": 1.8092, + "step": 1693 + }, + { + "epoch": 0.17869198312236287, + "grad_norm": 0.7601611018180847, + "learning_rate": 0.0013951023754568035, + "loss": 1.8207, + "step": 1694 + }, + { + "epoch": 0.1787974683544304, + "grad_norm": 0.7996731996536255, + "learning_rate": 0.001394974282864693, + "loss": 1.8079, + "step": 1695 + }, + { + "epoch": 0.17890295358649788, + "grad_norm": 0.5070310235023499, + "learning_rate": 0.0013948461179999977, + "loss": 1.8136, + "step": 1696 + }, + { + "epoch": 0.1790084388185654, + "grad_norm": 0.7496079206466675, + "learning_rate": 0.0013947178808770794, + "loss": 1.8259, + "step": 1697 + }, + { + "epoch": 0.17911392405063292, + "grad_norm": 0.6276077032089233, + "learning_rate": 0.0013945895715103077, + "loss": 1.8392, + "step": 1698 + }, + { + "epoch": 0.17921940928270041, + "grad_norm": 0.6011828184127808, + "learning_rate": 0.0013944611899140604, + "loss": 1.8808, + "step": 1699 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.8300737738609314, + "learning_rate": 0.0013943327361027231, + "loss": 1.8491, + "step": 1700 + }, + { + "epoch": 0.17943037974683546, + "grad_norm": 0.9569635987281799, + "learning_rate": 0.0013942042100906899, + "loss": 1.8216, + "step": 1701 + }, + { + "epoch": 0.17953586497890295, + "grad_norm": 0.579797089099884, + "learning_rate": 0.0013940756118923626, + "loss": 1.8235, + "step": 1702 + }, + { + "epoch": 0.17964135021097047, + "grad_norm": 0.7526249885559082, + "learning_rate": 0.0013939469415221513, + "loss": 1.7897, + "step": 1703 + }, + { + "epoch": 0.17974683544303796, + "grad_norm": 0.850848913192749, + "learning_rate": 0.0013938181989944741, + "loss": 1.8166, + "step": 1704 + }, + { + "epoch": 0.17985232067510548, + "grad_norm": 0.516425609588623, + "learning_rate": 0.0013936893843237573, + "loss": 1.8235, + "step": 1705 + }, + { + "epoch": 0.179957805907173, + "grad_norm": 0.6917879581451416, + "learning_rate": 0.0013935604975244356, + "loss": 1.8478, + "step": 1706 + }, + { + "epoch": 0.1800632911392405, + "grad_norm": 0.6912444829940796, + "learning_rate": 0.0013934315386109509, + "loss": 1.7885, + "step": 1707 + }, + { + "epoch": 0.18016877637130801, + "grad_norm": 0.6171613931655884, + "learning_rate": 0.0013933025075977539, + "loss": 1.7965, + "step": 1708 + }, + { + "epoch": 0.18027426160337554, + "grad_norm": 0.5981811881065369, + "learning_rate": 0.0013931734044993031, + "loss": 1.8016, + "step": 1709 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.6869712471961975, + "learning_rate": 0.0013930442293300649, + "loss": 1.8532, + "step": 1710 + }, + { + "epoch": 0.18048523206751055, + "grad_norm": 0.573339581489563, + "learning_rate": 0.0013929149821045148, + "loss": 1.8342, + "step": 1711 + }, + { + "epoch": 0.18059071729957807, + "grad_norm": 0.6253071427345276, + "learning_rate": 0.0013927856628371347, + "loss": 1.8585, + "step": 1712 + }, + { + "epoch": 0.18069620253164556, + "grad_norm": 0.8224431872367859, + "learning_rate": 0.0013926562715424159, + "loss": 1.8126, + "step": 1713 + }, + { + "epoch": 0.18080168776371308, + "grad_norm": 0.6860092282295227, + "learning_rate": 0.0013925268082348576, + "loss": 1.7957, + "step": 1714 + }, + { + "epoch": 0.1809071729957806, + "grad_norm": 0.597072958946228, + "learning_rate": 0.0013923972729289662, + "loss": 1.7981, + "step": 1715 + }, + { + "epoch": 0.1810126582278481, + "grad_norm": 0.575775682926178, + "learning_rate": 0.0013922676656392572, + "loss": 1.8125, + "step": 1716 + }, + { + "epoch": 0.18111814345991561, + "grad_norm": 0.6455070972442627, + "learning_rate": 0.0013921379863802536, + "loss": 1.7827, + "step": 1717 + }, + { + "epoch": 0.18122362869198314, + "grad_norm": 0.683452308177948, + "learning_rate": 0.0013920082351664867, + "loss": 1.7992, + "step": 1718 + }, + { + "epoch": 0.18132911392405063, + "grad_norm": 0.5726423859596252, + "learning_rate": 0.0013918784120124956, + "loss": 1.814, + "step": 1719 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.5322694778442383, + "learning_rate": 0.0013917485169328279, + "loss": 1.8445, + "step": 1720 + }, + { + "epoch": 0.18154008438818564, + "grad_norm": 0.6019428372383118, + "learning_rate": 0.0013916185499420386, + "loss": 1.8094, + "step": 1721 + }, + { + "epoch": 0.18164556962025316, + "grad_norm": 0.5052763223648071, + "learning_rate": 0.0013914885110546916, + "loss": 1.8056, + "step": 1722 + }, + { + "epoch": 0.18175105485232068, + "grad_norm": 0.5948351621627808, + "learning_rate": 0.001391358400285358, + "loss": 1.8084, + "step": 1723 + }, + { + "epoch": 0.18185654008438817, + "grad_norm": 0.5294570922851562, + "learning_rate": 0.0013912282176486177, + "loss": 1.801, + "step": 1724 + }, + { + "epoch": 0.1819620253164557, + "grad_norm": 0.5931138396263123, + "learning_rate": 0.0013910979631590581, + "loss": 1.8047, + "step": 1725 + }, + { + "epoch": 0.18206751054852321, + "grad_norm": 0.5476739406585693, + "learning_rate": 0.001390967636831275, + "loss": 1.8388, + "step": 1726 + }, + { + "epoch": 0.1821729957805907, + "grad_norm": 0.6455272436141968, + "learning_rate": 0.0013908372386798717, + "loss": 1.8132, + "step": 1727 + }, + { + "epoch": 0.18227848101265823, + "grad_norm": 0.5134037733078003, + "learning_rate": 0.0013907067687194607, + "loss": 1.8135, + "step": 1728 + }, + { + "epoch": 0.18238396624472575, + "grad_norm": 0.5634459257125854, + "learning_rate": 0.0013905762269646614, + "loss": 1.8367, + "step": 1729 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.6260510683059692, + "learning_rate": 0.0013904456134301016, + "loss": 1.8468, + "step": 1730 + }, + { + "epoch": 0.18259493670886076, + "grad_norm": 0.566551148891449, + "learning_rate": 0.001390314928130417, + "loss": 1.8068, + "step": 1731 + }, + { + "epoch": 0.18270042194092828, + "grad_norm": 0.678026556968689, + "learning_rate": 0.0013901841710802522, + "loss": 1.8099, + "step": 1732 + }, + { + "epoch": 0.18280590717299577, + "grad_norm": 0.7109790444374084, + "learning_rate": 0.0013900533422942585, + "loss": 1.8131, + "step": 1733 + }, + { + "epoch": 0.1829113924050633, + "grad_norm": 0.7090463638305664, + "learning_rate": 0.0013899224417870963, + "loss": 1.8312, + "step": 1734 + }, + { + "epoch": 0.18301687763713081, + "grad_norm": 0.6968048810958862, + "learning_rate": 0.0013897914695734336, + "loss": 1.807, + "step": 1735 + }, + { + "epoch": 0.1831223628691983, + "grad_norm": 0.6520181894302368, + "learning_rate": 0.0013896604256679462, + "loss": 1.8472, + "step": 1736 + }, + { + "epoch": 0.18322784810126583, + "grad_norm": 0.8523566126823425, + "learning_rate": 0.0013895293100853188, + "loss": 1.8101, + "step": 1737 + }, + { + "epoch": 0.18333333333333332, + "grad_norm": 0.8033066391944885, + "learning_rate": 0.001389398122840243, + "loss": 1.8263, + "step": 1738 + }, + { + "epoch": 0.18343881856540084, + "grad_norm": 0.6226003170013428, + "learning_rate": 0.0013892668639474194, + "loss": 1.8415, + "step": 1739 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 1.1216462850570679, + "learning_rate": 0.0013891355334215562, + "loss": 1.8485, + "step": 1740 + }, + { + "epoch": 0.18364978902953585, + "grad_norm": 0.7606018781661987, + "learning_rate": 0.001389004131277369, + "loss": 1.8488, + "step": 1741 + }, + { + "epoch": 0.18375527426160337, + "grad_norm": 0.7026885151863098, + "learning_rate": 0.0013888726575295826, + "loss": 1.831, + "step": 1742 + }, + { + "epoch": 0.1838607594936709, + "grad_norm": 0.8999369740486145, + "learning_rate": 0.0013887411121929294, + "loss": 1.8337, + "step": 1743 + }, + { + "epoch": 0.1839662447257384, + "grad_norm": 0.6937849521636963, + "learning_rate": 0.0013886094952821496, + "loss": 1.8389, + "step": 1744 + }, + { + "epoch": 0.1840717299578059, + "grad_norm": 0.9566742181777954, + "learning_rate": 0.0013884778068119913, + "loss": 1.8367, + "step": 1745 + }, + { + "epoch": 0.18417721518987343, + "grad_norm": 0.8469867706298828, + "learning_rate": 0.0013883460467972108, + "loss": 1.8504, + "step": 1746 + }, + { + "epoch": 0.18428270042194092, + "grad_norm": 0.6696764230728149, + "learning_rate": 0.0013882142152525732, + "loss": 1.8697, + "step": 1747 + }, + { + "epoch": 0.18438818565400844, + "grad_norm": 0.698218584060669, + "learning_rate": 0.0013880823121928498, + "loss": 1.8125, + "step": 1748 + }, + { + "epoch": 0.18449367088607596, + "grad_norm": 0.5495554208755493, + "learning_rate": 0.0013879503376328219, + "loss": 1.8549, + "step": 1749 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.7503832578659058, + "learning_rate": 0.0013878182915872776, + "loss": 1.863, + "step": 1750 + }, + { + "epoch": 0.18470464135021097, + "grad_norm": 0.8499141931533813, + "learning_rate": 0.001387686174071013, + "loss": 1.8238, + "step": 1751 + }, + { + "epoch": 0.1848101265822785, + "grad_norm": 0.9722330570220947, + "learning_rate": 0.001387553985098833, + "loss": 1.8349, + "step": 1752 + }, + { + "epoch": 0.184915611814346, + "grad_norm": 0.8295923471450806, + "learning_rate": 0.0013874217246855499, + "loss": 1.8406, + "step": 1753 + }, + { + "epoch": 0.1850210970464135, + "grad_norm": 0.6842702627182007, + "learning_rate": 0.001387289392845984, + "loss": 1.8272, + "step": 1754 + }, + { + "epoch": 0.185126582278481, + "grad_norm": 0.8681945204734802, + "learning_rate": 0.0013871569895949635, + "loss": 1.8315, + "step": 1755 + }, + { + "epoch": 0.18523206751054852, + "grad_norm": 0.5694568157196045, + "learning_rate": 0.0013870245149473256, + "loss": 1.7961, + "step": 1756 + }, + { + "epoch": 0.18533755274261604, + "grad_norm": 0.6993078589439392, + "learning_rate": 0.0013868919689179143, + "loss": 1.8229, + "step": 1757 + }, + { + "epoch": 0.18544303797468353, + "grad_norm": 0.5635566115379333, + "learning_rate": 0.001386759351521582, + "loss": 1.8327, + "step": 1758 + }, + { + "epoch": 0.18554852320675105, + "grad_norm": 0.6539811491966248, + "learning_rate": 0.0013866266627731892, + "loss": 1.8263, + "step": 1759 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.6685446500778198, + "learning_rate": 0.001386493902687604, + "loss": 1.7756, + "step": 1760 + }, + { + "epoch": 0.18575949367088607, + "grad_norm": 0.5039246082305908, + "learning_rate": 0.0013863610712797035, + "loss": 1.8191, + "step": 1761 + }, + { + "epoch": 0.1858649789029536, + "grad_norm": 0.6139691472053528, + "learning_rate": 0.0013862281685643716, + "loss": 1.8401, + "step": 1762 + }, + { + "epoch": 0.1859704641350211, + "grad_norm": 0.6312441825866699, + "learning_rate": 0.001386095194556501, + "loss": 1.8198, + "step": 1763 + }, + { + "epoch": 0.1860759493670886, + "grad_norm": 0.6362075805664062, + "learning_rate": 0.001385962149270992, + "loss": 1.8031, + "step": 1764 + }, + { + "epoch": 0.18618143459915612, + "grad_norm": 0.5509835481643677, + "learning_rate": 0.001385829032722753, + "loss": 1.8153, + "step": 1765 + }, + { + "epoch": 0.18628691983122364, + "grad_norm": 0.744755208492279, + "learning_rate": 0.0013856958449267002, + "loss": 1.824, + "step": 1766 + }, + { + "epoch": 0.18639240506329113, + "grad_norm": 0.6365403532981873, + "learning_rate": 0.0013855625858977584, + "loss": 1.8304, + "step": 1767 + }, + { + "epoch": 0.18649789029535865, + "grad_norm": 0.5689749717712402, + "learning_rate": 0.0013854292556508593, + "loss": 1.7976, + "step": 1768 + }, + { + "epoch": 0.18660337552742617, + "grad_norm": 0.682975172996521, + "learning_rate": 0.0013852958542009438, + "loss": 1.8305, + "step": 1769 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.5935356616973877, + "learning_rate": 0.00138516238156296, + "loss": 1.8359, + "step": 1770 + }, + { + "epoch": 0.1868143459915612, + "grad_norm": 0.5381571650505066, + "learning_rate": 0.001385028837751864, + "loss": 1.8305, + "step": 1771 + }, + { + "epoch": 0.18691983122362868, + "grad_norm": 0.5598241090774536, + "learning_rate": 0.0013848952227826202, + "loss": 1.8264, + "step": 1772 + }, + { + "epoch": 0.1870253164556962, + "grad_norm": 0.6018627882003784, + "learning_rate": 0.0013847615366702009, + "loss": 1.8173, + "step": 1773 + }, + { + "epoch": 0.18713080168776372, + "grad_norm": 0.5651939511299133, + "learning_rate": 0.001384627779429586, + "loss": 1.8678, + "step": 1774 + }, + { + "epoch": 0.1872362869198312, + "grad_norm": 0.5512242913246155, + "learning_rate": 0.0013844939510757642, + "loss": 1.8682, + "step": 1775 + }, + { + "epoch": 0.18734177215189873, + "grad_norm": 0.5965521335601807, + "learning_rate": 0.0013843600516237312, + "loss": 1.8386, + "step": 1776 + }, + { + "epoch": 0.18744725738396625, + "grad_norm": 0.6168941259384155, + "learning_rate": 0.001384226081088491, + "loss": 1.8042, + "step": 1777 + }, + { + "epoch": 0.18755274261603375, + "grad_norm": 0.571948766708374, + "learning_rate": 0.001384092039485056, + "loss": 1.764, + "step": 1778 + }, + { + "epoch": 0.18765822784810127, + "grad_norm": 0.7319517731666565, + "learning_rate": 0.0013839579268284461, + "loss": 1.823, + "step": 1779 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.7784920930862427, + "learning_rate": 0.0013838237431336895, + "loss": 1.8173, + "step": 1780 + }, + { + "epoch": 0.18786919831223628, + "grad_norm": 0.6123964190483093, + "learning_rate": 0.0013836894884158217, + "loss": 1.8163, + "step": 1781 + }, + { + "epoch": 0.1879746835443038, + "grad_norm": 0.612315833568573, + "learning_rate": 0.001383555162689887, + "loss": 1.8805, + "step": 1782 + }, + { + "epoch": 0.18808016877637132, + "grad_norm": 0.7433322072029114, + "learning_rate": 0.001383420765970937, + "loss": 1.8201, + "step": 1783 + }, + { + "epoch": 0.1881856540084388, + "grad_norm": 0.5649722814559937, + "learning_rate": 0.0013832862982740318, + "loss": 1.803, + "step": 1784 + }, + { + "epoch": 0.18829113924050633, + "grad_norm": 0.6292198300361633, + "learning_rate": 0.001383151759614239, + "loss": 1.8249, + "step": 1785 + }, + { + "epoch": 0.18839662447257383, + "grad_norm": 0.8692865967750549, + "learning_rate": 0.0013830171500066343, + "loss": 1.7973, + "step": 1786 + }, + { + "epoch": 0.18850210970464135, + "grad_norm": 0.6352278590202332, + "learning_rate": 0.0013828824694663013, + "loss": 1.8244, + "step": 1787 + }, + { + "epoch": 0.18860759493670887, + "grad_norm": 0.528076708316803, + "learning_rate": 0.001382747718008332, + "loss": 1.8263, + "step": 1788 + }, + { + "epoch": 0.18871308016877636, + "grad_norm": 0.5952082872390747, + "learning_rate": 0.0013826128956478255, + "loss": 1.8179, + "step": 1789 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.5581578612327576, + "learning_rate": 0.0013824780023998899, + "loss": 1.8115, + "step": 1790 + }, + { + "epoch": 0.1889240506329114, + "grad_norm": 0.6310727596282959, + "learning_rate": 0.0013823430382796402, + "loss": 1.7832, + "step": 1791 + }, + { + "epoch": 0.1890295358649789, + "grad_norm": 0.5376208424568176, + "learning_rate": 0.0013822080033021997, + "loss": 1.8116, + "step": 1792 + }, + { + "epoch": 0.1891350210970464, + "grad_norm": 0.5993407964706421, + "learning_rate": 0.0013820728974827, + "loss": 1.8454, + "step": 1793 + }, + { + "epoch": 0.18924050632911393, + "grad_norm": 0.7239713668823242, + "learning_rate": 0.0013819377208362806, + "loss": 1.8853, + "step": 1794 + }, + { + "epoch": 0.18934599156118143, + "grad_norm": 0.7179734110832214, + "learning_rate": 0.0013818024733780881, + "loss": 1.8084, + "step": 1795 + }, + { + "epoch": 0.18945147679324895, + "grad_norm": 0.5359510183334351, + "learning_rate": 0.0013816671551232782, + "loss": 1.8427, + "step": 1796 + }, + { + "epoch": 0.18955696202531647, + "grad_norm": 0.5359193682670593, + "learning_rate": 0.0013815317660870138, + "loss": 1.7995, + "step": 1797 + }, + { + "epoch": 0.18966244725738396, + "grad_norm": 0.5632604360580444, + "learning_rate": 0.001381396306284466, + "loss": 1.8146, + "step": 1798 + }, + { + "epoch": 0.18976793248945148, + "grad_norm": 0.5246084332466125, + "learning_rate": 0.0013812607757308134, + "loss": 1.8055, + "step": 1799 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.5633968114852905, + "learning_rate": 0.0013811251744412431, + "loss": 1.7936, + "step": 1800 + }, + { + "epoch": 0.1899789029535865, + "grad_norm": 0.505204975605011, + "learning_rate": 0.0013809895024309501, + "loss": 1.7592, + "step": 1801 + }, + { + "epoch": 0.190084388185654, + "grad_norm": 0.5384221076965332, + "learning_rate": 0.001380853759715137, + "loss": 1.7957, + "step": 1802 + }, + { + "epoch": 0.1901898734177215, + "grad_norm": 0.577791154384613, + "learning_rate": 0.0013807179463090143, + "loss": 1.8212, + "step": 1803 + }, + { + "epoch": 0.19029535864978903, + "grad_norm": 0.5403988361358643, + "learning_rate": 0.0013805820622278008, + "loss": 1.8053, + "step": 1804 + }, + { + "epoch": 0.19040084388185655, + "grad_norm": 0.5736650824546814, + "learning_rate": 0.0013804461074867227, + "loss": 1.8133, + "step": 1805 + }, + { + "epoch": 0.19050632911392404, + "grad_norm": 0.6644865274429321, + "learning_rate": 0.0013803100821010146, + "loss": 1.8115, + "step": 1806 + }, + { + "epoch": 0.19061181434599156, + "grad_norm": 0.9250186681747437, + "learning_rate": 0.0013801739860859188, + "loss": 1.8168, + "step": 1807 + }, + { + "epoch": 0.19071729957805908, + "grad_norm": 0.7576774954795837, + "learning_rate": 0.0013800378194566856, + "loss": 1.8, + "step": 1808 + }, + { + "epoch": 0.19082278481012657, + "grad_norm": 0.6116045117378235, + "learning_rate": 0.001379901582228573, + "loss": 1.8473, + "step": 1809 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.95722496509552, + "learning_rate": 0.0013797652744168473, + "loss": 1.7876, + "step": 1810 + }, + { + "epoch": 0.1910337552742616, + "grad_norm": 0.7040595412254333, + "learning_rate": 0.0013796288960367822, + "loss": 1.7703, + "step": 1811 + }, + { + "epoch": 0.1911392405063291, + "grad_norm": 0.5433789491653442, + "learning_rate": 0.0013794924471036596, + "loss": 1.8057, + "step": 1812 + }, + { + "epoch": 0.19124472573839663, + "grad_norm": 0.7019160389900208, + "learning_rate": 0.0013793559276327695, + "loss": 1.8034, + "step": 1813 + }, + { + "epoch": 0.19135021097046415, + "grad_norm": 0.7267802953720093, + "learning_rate": 0.0013792193376394094, + "loss": 1.7752, + "step": 1814 + }, + { + "epoch": 0.19145569620253164, + "grad_norm": 0.547259509563446, + "learning_rate": 0.001379082677138885, + "loss": 1.8006, + "step": 1815 + }, + { + "epoch": 0.19156118143459916, + "grad_norm": 0.775007963180542, + "learning_rate": 0.0013789459461465096, + "loss": 1.8137, + "step": 1816 + }, + { + "epoch": 0.19166666666666668, + "grad_norm": 0.7312063574790955, + "learning_rate": 0.001378809144677605, + "loss": 1.788, + "step": 1817 + }, + { + "epoch": 0.19177215189873417, + "grad_norm": 0.5861467123031616, + "learning_rate": 0.0013786722727474998, + "loss": 1.8022, + "step": 1818 + }, + { + "epoch": 0.1918776371308017, + "grad_norm": 0.5974711775779724, + "learning_rate": 0.0013785353303715317, + "loss": 1.8215, + "step": 1819 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.5382301211357117, + "learning_rate": 0.0013783983175650457, + "loss": 1.8605, + "step": 1820 + }, + { + "epoch": 0.1920886075949367, + "grad_norm": 0.6467664837837219, + "learning_rate": 0.001378261234343395, + "loss": 1.8067, + "step": 1821 + }, + { + "epoch": 0.19219409282700423, + "grad_norm": 0.6787568926811218, + "learning_rate": 0.0013781240807219399, + "loss": 1.8308, + "step": 1822 + }, + { + "epoch": 0.19229957805907172, + "grad_norm": 0.6820540428161621, + "learning_rate": 0.0013779868567160495, + "loss": 1.831, + "step": 1823 + }, + { + "epoch": 0.19240506329113924, + "grad_norm": 0.8255243897438049, + "learning_rate": 0.0013778495623411008, + "loss": 1.8281, + "step": 1824 + }, + { + "epoch": 0.19251054852320676, + "grad_norm": 0.7325361967086792, + "learning_rate": 0.0013777121976124775, + "loss": 1.8583, + "step": 1825 + }, + { + "epoch": 0.19261603375527425, + "grad_norm": 0.6110305786132812, + "learning_rate": 0.0013775747625455724, + "loss": 1.8008, + "step": 1826 + }, + { + "epoch": 0.19272151898734177, + "grad_norm": 0.6269739866256714, + "learning_rate": 0.0013774372571557856, + "loss": 1.7915, + "step": 1827 + }, + { + "epoch": 0.1928270042194093, + "grad_norm": 0.6545220017433167, + "learning_rate": 0.0013772996814585261, + "loss": 1.7489, + "step": 1828 + }, + { + "epoch": 0.19293248945147679, + "grad_norm": 0.781040608882904, + "learning_rate": 0.0013771620354692087, + "loss": 1.7889, + "step": 1829 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.5693268179893494, + "learning_rate": 0.0013770243192032581, + "loss": 1.7813, + "step": 1830 + }, + { + "epoch": 0.19314345991561183, + "grad_norm": 0.758301854133606, + "learning_rate": 0.0013768865326761058, + "loss": 1.7872, + "step": 1831 + }, + { + "epoch": 0.19324894514767932, + "grad_norm": 0.5822269320487976, + "learning_rate": 0.0013767486759031918, + "loss": 1.8031, + "step": 1832 + }, + { + "epoch": 0.19335443037974684, + "grad_norm": 0.8579005002975464, + "learning_rate": 0.0013766107488999632, + "loss": 1.8453, + "step": 1833 + }, + { + "epoch": 0.19345991561181436, + "grad_norm": 0.9313836693763733, + "learning_rate": 0.0013764727516818757, + "loss": 1.8341, + "step": 1834 + }, + { + "epoch": 0.19356540084388185, + "grad_norm": 0.721560001373291, + "learning_rate": 0.0013763346842643927, + "loss": 1.7998, + "step": 1835 + }, + { + "epoch": 0.19367088607594937, + "grad_norm": 0.6192070245742798, + "learning_rate": 0.0013761965466629847, + "loss": 1.8178, + "step": 1836 + }, + { + "epoch": 0.19377637130801686, + "grad_norm": 0.642662763595581, + "learning_rate": 0.0013760583388931315, + "loss": 1.7849, + "step": 1837 + }, + { + "epoch": 0.19388185654008439, + "grad_norm": 0.6284757852554321, + "learning_rate": 0.0013759200609703196, + "loss": 1.7846, + "step": 1838 + }, + { + "epoch": 0.1939873417721519, + "grad_norm": 0.582843005657196, + "learning_rate": 0.0013757817129100437, + "loss": 1.8044, + "step": 1839 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.7135298848152161, + "learning_rate": 0.0013756432947278064, + "loss": 1.7948, + "step": 1840 + }, + { + "epoch": 0.19419831223628692, + "grad_norm": 0.5968571901321411, + "learning_rate": 0.0013755048064391182, + "loss": 1.8288, + "step": 1841 + }, + { + "epoch": 0.19430379746835444, + "grad_norm": 0.6077160835266113, + "learning_rate": 0.0013753662480594973, + "loss": 1.8052, + "step": 1842 + }, + { + "epoch": 0.19440928270042193, + "grad_norm": 0.697867751121521, + "learning_rate": 0.0013752276196044699, + "loss": 1.7849, + "step": 1843 + }, + { + "epoch": 0.19451476793248945, + "grad_norm": 0.8788059949874878, + "learning_rate": 0.0013750889210895705, + "loss": 1.8406, + "step": 1844 + }, + { + "epoch": 0.19462025316455697, + "grad_norm": 0.8923109173774719, + "learning_rate": 0.0013749501525303401, + "loss": 1.8047, + "step": 1845 + }, + { + "epoch": 0.19472573839662446, + "grad_norm": 0.6588500738143921, + "learning_rate": 0.0013748113139423288, + "loss": 1.8336, + "step": 1846 + }, + { + "epoch": 0.19483122362869199, + "grad_norm": 0.7642912268638611, + "learning_rate": 0.0013746724053410944, + "loss": 1.8262, + "step": 1847 + }, + { + "epoch": 0.1949367088607595, + "grad_norm": 0.6751001477241516, + "learning_rate": 0.001374533426742202, + "loss": 1.7757, + "step": 1848 + }, + { + "epoch": 0.195042194092827, + "grad_norm": 0.8043478727340698, + "learning_rate": 0.0013743943781612251, + "loss": 1.7591, + "step": 1849 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.9823368191719055, + "learning_rate": 0.0013742552596137444, + "loss": 1.8372, + "step": 1850 + }, + { + "epoch": 0.19525316455696204, + "grad_norm": 0.706421971321106, + "learning_rate": 0.0013741160711153492, + "loss": 1.7787, + "step": 1851 + }, + { + "epoch": 0.19535864978902953, + "grad_norm": 0.662057638168335, + "learning_rate": 0.0013739768126816358, + "loss": 1.8193, + "step": 1852 + }, + { + "epoch": 0.19546413502109705, + "grad_norm": 0.8521687388420105, + "learning_rate": 0.0013738374843282094, + "loss": 1.8046, + "step": 1853 + }, + { + "epoch": 0.19556962025316454, + "grad_norm": 0.7220386862754822, + "learning_rate": 0.0013736980860706819, + "loss": 1.8391, + "step": 1854 + }, + { + "epoch": 0.19567510548523206, + "grad_norm": 0.5429621338844299, + "learning_rate": 0.001373558617924674, + "loss": 1.807, + "step": 1855 + }, + { + "epoch": 0.19578059071729959, + "grad_norm": 0.8180233240127563, + "learning_rate": 0.0013734190799058136, + "loss": 1.8136, + "step": 1856 + }, + { + "epoch": 0.19588607594936708, + "grad_norm": 0.8063397407531738, + "learning_rate": 0.0013732794720297367, + "loss": 1.8073, + "step": 1857 + }, + { + "epoch": 0.1959915611814346, + "grad_norm": 0.5641857981681824, + "learning_rate": 0.0013731397943120868, + "loss": 1.8082, + "step": 1858 + }, + { + "epoch": 0.19609704641350212, + "grad_norm": 0.7181227207183838, + "learning_rate": 0.001373000046768516, + "loss": 1.801, + "step": 1859 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.6571053862571716, + "learning_rate": 0.0013728602294146833, + "loss": 1.8419, + "step": 1860 + }, + { + "epoch": 0.19630801687763713, + "grad_norm": 0.5546746850013733, + "learning_rate": 0.001372720342266256, + "loss": 1.8113, + "step": 1861 + }, + { + "epoch": 0.19641350210970465, + "grad_norm": 0.7122602462768555, + "learning_rate": 0.001372580385338909, + "loss": 1.8003, + "step": 1862 + }, + { + "epoch": 0.19651898734177214, + "grad_norm": 0.5130662322044373, + "learning_rate": 0.0013724403586483254, + "loss": 1.8672, + "step": 1863 + }, + { + "epoch": 0.19662447257383966, + "grad_norm": 0.6890078186988831, + "learning_rate": 0.001372300262210196, + "loss": 1.7923, + "step": 1864 + }, + { + "epoch": 0.19672995780590719, + "grad_norm": 0.7426404356956482, + "learning_rate": 0.001372160096040219, + "loss": 1.7921, + "step": 1865 + }, + { + "epoch": 0.19683544303797468, + "grad_norm": 0.7277930974960327, + "learning_rate": 0.001372019860154101, + "loss": 1.7726, + "step": 1866 + }, + { + "epoch": 0.1969409282700422, + "grad_norm": 0.7282004952430725, + "learning_rate": 0.001371879554567556, + "loss": 1.8216, + "step": 1867 + }, + { + "epoch": 0.19704641350210972, + "grad_norm": 0.6172088384628296, + "learning_rate": 0.0013717391792963062, + "loss": 1.7819, + "step": 1868 + }, + { + "epoch": 0.1971518987341772, + "grad_norm": 1.0173195600509644, + "learning_rate": 0.0013715987343560804, + "loss": 1.8281, + "step": 1869 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.7915918231010437, + "learning_rate": 0.0013714582197626175, + "loss": 1.8338, + "step": 1870 + }, + { + "epoch": 0.19736286919831222, + "grad_norm": 0.589462161064148, + "learning_rate": 0.001371317635531662, + "loss": 1.7755, + "step": 1871 + }, + { + "epoch": 0.19746835443037974, + "grad_norm": 0.6464663147926331, + "learning_rate": 0.001371176981678967, + "loss": 1.7747, + "step": 1872 + }, + { + "epoch": 0.19757383966244726, + "grad_norm": 0.7846564650535583, + "learning_rate": 0.001371036258220294, + "loss": 1.7744, + "step": 1873 + }, + { + "epoch": 0.19767932489451476, + "grad_norm": 0.5613476037979126, + "learning_rate": 0.0013708954651714116, + "loss": 1.7868, + "step": 1874 + }, + { + "epoch": 0.19778481012658228, + "grad_norm": 0.5911543369293213, + "learning_rate": 0.0013707546025480961, + "loss": 1.8063, + "step": 1875 + }, + { + "epoch": 0.1978902953586498, + "grad_norm": 0.5710782408714294, + "learning_rate": 0.001370613670366132, + "loss": 1.839, + "step": 1876 + }, + { + "epoch": 0.1979957805907173, + "grad_norm": 0.5587511658668518, + "learning_rate": 0.0013704726686413116, + "loss": 1.8599, + "step": 1877 + }, + { + "epoch": 0.1981012658227848, + "grad_norm": 0.5481368899345398, + "learning_rate": 0.0013703315973894346, + "loss": 1.7736, + "step": 1878 + }, + { + "epoch": 0.19820675105485233, + "grad_norm": 0.5733166337013245, + "learning_rate": 0.001370190456626309, + "loss": 1.7614, + "step": 1879 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.5310536623001099, + "learning_rate": 0.0013700492463677501, + "loss": 1.8169, + "step": 1880 + }, + { + "epoch": 0.19841772151898734, + "grad_norm": 0.5985238552093506, + "learning_rate": 0.0013699079666295811, + "loss": 1.8113, + "step": 1881 + }, + { + "epoch": 0.19852320675105486, + "grad_norm": 0.6656289100646973, + "learning_rate": 0.0013697666174276337, + "loss": 1.7821, + "step": 1882 + }, + { + "epoch": 0.19862869198312236, + "grad_norm": 0.6720181107521057, + "learning_rate": 0.001369625198777746, + "loss": 1.8006, + "step": 1883 + }, + { + "epoch": 0.19873417721518988, + "grad_norm": 0.7157179117202759, + "learning_rate": 0.0013694837106957654, + "loss": 1.8109, + "step": 1884 + }, + { + "epoch": 0.19883966244725737, + "grad_norm": 0.5946904420852661, + "learning_rate": 0.0013693421531975455, + "loss": 1.7843, + "step": 1885 + }, + { + "epoch": 0.1989451476793249, + "grad_norm": 0.7766394019126892, + "learning_rate": 0.0013692005262989496, + "loss": 1.8114, + "step": 1886 + }, + { + "epoch": 0.1990506329113924, + "grad_norm": 0.7409523129463196, + "learning_rate": 0.0013690588300158467, + "loss": 1.8036, + "step": 1887 + }, + { + "epoch": 0.1991561181434599, + "grad_norm": 0.7590195536613464, + "learning_rate": 0.001368917064364115, + "loss": 1.8326, + "step": 1888 + }, + { + "epoch": 0.19926160337552742, + "grad_norm": 1.0504653453826904, + "learning_rate": 0.0013687752293596402, + "loss": 1.7992, + "step": 1889 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.568359375, + "learning_rate": 0.0013686333250183154, + "loss": 1.7963, + "step": 1890 + }, + { + "epoch": 0.19947257383966244, + "grad_norm": 0.7557954788208008, + "learning_rate": 0.0013684913513560418, + "loss": 1.8116, + "step": 1891 + }, + { + "epoch": 0.19957805907172996, + "grad_norm": 0.8103844523429871, + "learning_rate": 0.0013683493083887282, + "loss": 1.7835, + "step": 1892 + }, + { + "epoch": 0.19968354430379748, + "grad_norm": 0.844800591468811, + "learning_rate": 0.0013682071961322914, + "loss": 1.7513, + "step": 1893 + }, + { + "epoch": 0.19978902953586497, + "grad_norm": 0.6569165587425232, + "learning_rate": 0.0013680650146026554, + "loss": 1.7954, + "step": 1894 + }, + { + "epoch": 0.1998945147679325, + "grad_norm": 0.7628807425498962, + "learning_rate": 0.0013679227638157523, + "loss": 1.8301, + "step": 1895 + }, + { + "epoch": 0.2, + "grad_norm": 0.7438894510269165, + "learning_rate": 0.0013677804437875227, + "loss": 1.7979, + "step": 1896 + }, + { + "epoch": 0.2001054852320675, + "grad_norm": 0.6410209536552429, + "learning_rate": 0.0013676380545339136, + "loss": 1.8145, + "step": 1897 + }, + { + "epoch": 0.20021097046413502, + "grad_norm": 0.7649298906326294, + "learning_rate": 0.0013674955960708808, + "loss": 1.8139, + "step": 1898 + }, + { + "epoch": 0.20031645569620254, + "grad_norm": 0.7395493984222412, + "learning_rate": 0.0013673530684143874, + "loss": 1.8041, + "step": 1899 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.7182289361953735, + "learning_rate": 0.001367210471580404, + "loss": 1.8137, + "step": 1900 + }, + { + "epoch": 0.20052742616033756, + "grad_norm": 0.7966656684875488, + "learning_rate": 0.0013670678055849098, + "loss": 1.7952, + "step": 1901 + }, + { + "epoch": 0.20063291139240505, + "grad_norm": 0.5820793509483337, + "learning_rate": 0.0013669250704438911, + "loss": 1.7598, + "step": 1902 + }, + { + "epoch": 0.20073839662447257, + "grad_norm": 0.8627523183822632, + "learning_rate": 0.0013667822661733418, + "loss": 1.8222, + "step": 1903 + }, + { + "epoch": 0.2008438818565401, + "grad_norm": 0.8543941974639893, + "learning_rate": 0.0013666393927892642, + "loss": 1.7988, + "step": 1904 + }, + { + "epoch": 0.20094936708860758, + "grad_norm": 0.7369166016578674, + "learning_rate": 0.0013664964503076677, + "loss": 1.7532, + "step": 1905 + }, + { + "epoch": 0.2010548523206751, + "grad_norm": 0.6176595091819763, + "learning_rate": 0.0013663534387445696, + "loss": 1.8336, + "step": 1906 + }, + { + "epoch": 0.20116033755274262, + "grad_norm": 0.7554225325584412, + "learning_rate": 0.0013662103581159955, + "loss": 1.7993, + "step": 1907 + }, + { + "epoch": 0.20126582278481012, + "grad_norm": 0.6865677833557129, + "learning_rate": 0.0013660672084379781, + "loss": 1.8043, + "step": 1908 + }, + { + "epoch": 0.20137130801687764, + "grad_norm": 0.7225984930992126, + "learning_rate": 0.001365923989726558, + "loss": 1.8493, + "step": 1909 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.7784916162490845, + "learning_rate": 0.0013657807019977835, + "loss": 1.8164, + "step": 1910 + }, + { + "epoch": 0.20158227848101265, + "grad_norm": 0.6273195743560791, + "learning_rate": 0.0013656373452677107, + "loss": 1.823, + "step": 1911 + }, + { + "epoch": 0.20168776371308017, + "grad_norm": 0.8511533737182617, + "learning_rate": 0.0013654939195524038, + "loss": 1.7983, + "step": 1912 + }, + { + "epoch": 0.2017932489451477, + "grad_norm": 0.6014835834503174, + "learning_rate": 0.0013653504248679338, + "loss": 1.7793, + "step": 1913 + }, + { + "epoch": 0.20189873417721518, + "grad_norm": 0.7899645566940308, + "learning_rate": 0.0013652068612303803, + "loss": 1.8054, + "step": 1914 + }, + { + "epoch": 0.2020042194092827, + "grad_norm": 1.045361042022705, + "learning_rate": 0.0013650632286558305, + "loss": 1.7797, + "step": 1915 + }, + { + "epoch": 0.20210970464135022, + "grad_norm": 0.7087655663490295, + "learning_rate": 0.001364919527160379, + "loss": 1.8109, + "step": 1916 + }, + { + "epoch": 0.20221518987341772, + "grad_norm": 0.6971458196640015, + "learning_rate": 0.001364775756760128, + "loss": 1.7853, + "step": 1917 + }, + { + "epoch": 0.20232067510548524, + "grad_norm": 0.8821741342544556, + "learning_rate": 0.0013646319174711878, + "loss": 1.8071, + "step": 1918 + }, + { + "epoch": 0.20242616033755273, + "grad_norm": 0.7598319053649902, + "learning_rate": 0.0013644880093096766, + "loss": 1.8497, + "step": 1919 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.5697308778762817, + "learning_rate": 0.0013643440322917198, + "loss": 1.7797, + "step": 1920 + }, + { + "epoch": 0.20263713080168777, + "grad_norm": 0.7451226115226746, + "learning_rate": 0.0013641999864334507, + "loss": 1.8142, + "step": 1921 + }, + { + "epoch": 0.20274261603375526, + "grad_norm": 0.5878582000732422, + "learning_rate": 0.0013640558717510107, + "loss": 1.7655, + "step": 1922 + }, + { + "epoch": 0.20284810126582278, + "grad_norm": 0.7141257524490356, + "learning_rate": 0.0013639116882605481, + "loss": 1.8092, + "step": 1923 + }, + { + "epoch": 0.2029535864978903, + "grad_norm": 1.0552257299423218, + "learning_rate": 0.0013637674359782196, + "loss": 1.7805, + "step": 1924 + }, + { + "epoch": 0.2030590717299578, + "grad_norm": 0.694965660572052, + "learning_rate": 0.0013636231149201895, + "loss": 1.837, + "step": 1925 + }, + { + "epoch": 0.20316455696202532, + "grad_norm": 0.7520695328712463, + "learning_rate": 0.0013634787251026296, + "loss": 1.7811, + "step": 1926 + }, + { + "epoch": 0.20327004219409284, + "grad_norm": 0.7973810434341431, + "learning_rate": 0.0013633342665417192, + "loss": 1.7881, + "step": 1927 + }, + { + "epoch": 0.20337552742616033, + "grad_norm": 0.5866389274597168, + "learning_rate": 0.0013631897392536463, + "loss": 1.8322, + "step": 1928 + }, + { + "epoch": 0.20348101265822785, + "grad_norm": 0.8697535991668701, + "learning_rate": 0.001363045143254605, + "loss": 1.782, + "step": 1929 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 1.0281116962432861, + "learning_rate": 0.0013629004785607989, + "loss": 1.8196, + "step": 1930 + }, + { + "epoch": 0.20369198312236286, + "grad_norm": 0.6361050605773926, + "learning_rate": 0.0013627557451884374, + "loss": 1.7383, + "step": 1931 + }, + { + "epoch": 0.20379746835443038, + "grad_norm": 0.6334152817726135, + "learning_rate": 0.0013626109431537398, + "loss": 1.8108, + "step": 1932 + }, + { + "epoch": 0.2039029535864979, + "grad_norm": 0.6471084356307983, + "learning_rate": 0.001362466072472931, + "loss": 1.7964, + "step": 1933 + }, + { + "epoch": 0.2040084388185654, + "grad_norm": 0.5491583347320557, + "learning_rate": 0.0013623211331622448, + "loss": 1.8029, + "step": 1934 + }, + { + "epoch": 0.20411392405063292, + "grad_norm": 0.6053113341331482, + "learning_rate": 0.0013621761252379221, + "loss": 1.7737, + "step": 1935 + }, + { + "epoch": 0.2042194092827004, + "grad_norm": 0.5324949026107788, + "learning_rate": 0.0013620310487162124, + "loss": 1.7955, + "step": 1936 + }, + { + "epoch": 0.20432489451476793, + "grad_norm": 0.6179758310317993, + "learning_rate": 0.0013618859036133714, + "loss": 1.8086, + "step": 1937 + }, + { + "epoch": 0.20443037974683545, + "grad_norm": 0.5430899858474731, + "learning_rate": 0.001361740689945664, + "loss": 1.8052, + "step": 1938 + }, + { + "epoch": 0.20453586497890294, + "grad_norm": 0.5990208983421326, + "learning_rate": 0.001361595407729362, + "loss": 1.8113, + "step": 1939 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.6683238744735718, + "learning_rate": 0.0013614500569807445, + "loss": 1.7619, + "step": 1940 + }, + { + "epoch": 0.20474683544303798, + "grad_norm": 0.5782997012138367, + "learning_rate": 0.0013613046377160996, + "loss": 1.8034, + "step": 1941 + }, + { + "epoch": 0.20485232067510548, + "grad_norm": 0.5957682132720947, + "learning_rate": 0.0013611591499517212, + "loss": 1.8092, + "step": 1942 + }, + { + "epoch": 0.204957805907173, + "grad_norm": 0.5357755422592163, + "learning_rate": 0.001361013593703913, + "loss": 1.7788, + "step": 1943 + }, + { + "epoch": 0.20506329113924052, + "grad_norm": 0.5654252767562866, + "learning_rate": 0.0013608679689889847, + "loss": 1.7865, + "step": 1944 + }, + { + "epoch": 0.205168776371308, + "grad_norm": 0.5950928330421448, + "learning_rate": 0.0013607222758232546, + "loss": 1.8268, + "step": 1945 + }, + { + "epoch": 0.20527426160337553, + "grad_norm": 0.6105898022651672, + "learning_rate": 0.0013605765142230479, + "loss": 1.7557, + "step": 1946 + }, + { + "epoch": 0.20537974683544305, + "grad_norm": 0.5216822028160095, + "learning_rate": 0.0013604306842046983, + "loss": 1.8298, + "step": 1947 + }, + { + "epoch": 0.20548523206751054, + "grad_norm": 0.6122041344642639, + "learning_rate": 0.0013602847857845466, + "loss": 1.7762, + "step": 1948 + }, + { + "epoch": 0.20559071729957806, + "grad_norm": 0.5624343752861023, + "learning_rate": 0.0013601388189789414, + "loss": 1.7661, + "step": 1949 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.5941805839538574, + "learning_rate": 0.0013599927838042394, + "loss": 1.7649, + "step": 1950 + }, + { + "epoch": 0.20580168776371308, + "grad_norm": 0.5495529174804688, + "learning_rate": 0.0013598466802768041, + "loss": 1.8263, + "step": 1951 + }, + { + "epoch": 0.2059071729957806, + "grad_norm": 0.6203208565711975, + "learning_rate": 0.0013597005084130072, + "loss": 1.8006, + "step": 1952 + }, + { + "epoch": 0.2060126582278481, + "grad_norm": 0.6023412346839905, + "learning_rate": 0.0013595542682292281, + "loss": 1.8235, + "step": 1953 + }, + { + "epoch": 0.2061181434599156, + "grad_norm": 0.6031159162521362, + "learning_rate": 0.0013594079597418541, + "loss": 1.7553, + "step": 1954 + }, + { + "epoch": 0.20622362869198313, + "grad_norm": 0.6355703473091125, + "learning_rate": 0.0013592615829672791, + "loss": 1.8028, + "step": 1955 + }, + { + "epoch": 0.20632911392405062, + "grad_norm": 0.69113689661026, + "learning_rate": 0.0013591151379219058, + "loss": 1.76, + "step": 1956 + }, + { + "epoch": 0.20643459915611814, + "grad_norm": 0.6234780550003052, + "learning_rate": 0.0013589686246221438, + "loss": 1.778, + "step": 1957 + }, + { + "epoch": 0.20654008438818566, + "grad_norm": 0.5637633204460144, + "learning_rate": 0.001358822043084411, + "loss": 1.7978, + "step": 1958 + }, + { + "epoch": 0.20664556962025316, + "grad_norm": 0.5783771276473999, + "learning_rate": 0.0013586753933251322, + "loss": 1.8123, + "step": 1959 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.6334384679794312, + "learning_rate": 0.0013585286753607408, + "loss": 1.7695, + "step": 1960 + }, + { + "epoch": 0.2068565400843882, + "grad_norm": 0.5771629214286804, + "learning_rate": 0.0013583818892076765, + "loss": 1.8332, + "step": 1961 + }, + { + "epoch": 0.2069620253164557, + "grad_norm": 0.6295068860054016, + "learning_rate": 0.0013582350348823882, + "loss": 1.8337, + "step": 1962 + }, + { + "epoch": 0.2070675105485232, + "grad_norm": 0.5551509857177734, + "learning_rate": 0.0013580881124013312, + "loss": 1.8177, + "step": 1963 + }, + { + "epoch": 0.20717299578059073, + "grad_norm": 0.6429426670074463, + "learning_rate": 0.001357941121780969, + "loss": 1.7674, + "step": 1964 + }, + { + "epoch": 0.20727848101265822, + "grad_norm": 0.7579837441444397, + "learning_rate": 0.0013577940630377725, + "loss": 1.8011, + "step": 1965 + }, + { + "epoch": 0.20738396624472574, + "grad_norm": 0.9849380254745483, + "learning_rate": 0.0013576469361882208, + "loss": 1.7866, + "step": 1966 + }, + { + "epoch": 0.20748945147679324, + "grad_norm": 0.6152302026748657, + "learning_rate": 0.0013574997412487996, + "loss": 1.816, + "step": 1967 + }, + { + "epoch": 0.20759493670886076, + "grad_norm": 0.9792753458023071, + "learning_rate": 0.0013573524782360034, + "loss": 1.7993, + "step": 1968 + }, + { + "epoch": 0.20770042194092828, + "grad_norm": 1.0709459781646729, + "learning_rate": 0.0013572051471663332, + "loss": 1.8068, + "step": 1969 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.5994015336036682, + "learning_rate": 0.0013570577480562986, + "loss": 1.7885, + "step": 1970 + }, + { + "epoch": 0.2079113924050633, + "grad_norm": 1.0734914541244507, + "learning_rate": 0.0013569102809224162, + "loss": 1.7964, + "step": 1971 + }, + { + "epoch": 0.2080168776371308, + "grad_norm": 1.295829176902771, + "learning_rate": 0.0013567627457812105, + "loss": 1.7635, + "step": 1972 + }, + { + "epoch": 0.2081223628691983, + "grad_norm": 0.6244844198226929, + "learning_rate": 0.0013566151426492137, + "loss": 1.7948, + "step": 1973 + }, + { + "epoch": 0.20822784810126582, + "grad_norm": 1.5932644605636597, + "learning_rate": 0.0013564674715429651, + "loss": 1.7923, + "step": 1974 + }, + { + "epoch": 0.20833333333333334, + "grad_norm": 0.5851067900657654, + "learning_rate": 0.0013563197324790123, + "loss": 1.7997, + "step": 1975 + }, + { + "epoch": 0.20843881856540084, + "grad_norm": 1.0767399072647095, + "learning_rate": 0.0013561719254739104, + "loss": 1.789, + "step": 1976 + }, + { + "epoch": 0.20854430379746836, + "grad_norm": 0.61883944272995, + "learning_rate": 0.001356024050544221, + "loss": 1.7923, + "step": 1977 + }, + { + "epoch": 0.20864978902953588, + "grad_norm": 0.9016543030738831, + "learning_rate": 0.0013558761077065154, + "loss": 1.7908, + "step": 1978 + }, + { + "epoch": 0.20875527426160337, + "grad_norm": 1.1230791807174683, + "learning_rate": 0.0013557280969773704, + "loss": 1.7718, + "step": 1979 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.6206470131874084, + "learning_rate": 0.0013555800183733717, + "loss": 1.7946, + "step": 1980 + }, + { + "epoch": 0.2089662447257384, + "grad_norm": 1.037485122680664, + "learning_rate": 0.0013554318719111124, + "loss": 1.7735, + "step": 1981 + }, + { + "epoch": 0.2090717299578059, + "grad_norm": 0.7871443033218384, + "learning_rate": 0.0013552836576071925, + "loss": 1.7332, + "step": 1982 + }, + { + "epoch": 0.20917721518987342, + "grad_norm": 0.7480434775352478, + "learning_rate": 0.0013551353754782211, + "loss": 1.7916, + "step": 1983 + }, + { + "epoch": 0.20928270042194091, + "grad_norm": 0.9932753443717957, + "learning_rate": 0.0013549870255408132, + "loss": 1.8291, + "step": 1984 + }, + { + "epoch": 0.20938818565400844, + "grad_norm": 0.7966845631599426, + "learning_rate": 0.0013548386078115924, + "loss": 1.7786, + "step": 1985 + }, + { + "epoch": 0.20949367088607596, + "grad_norm": 0.7313942313194275, + "learning_rate": 0.0013546901223071893, + "loss": 1.7523, + "step": 1986 + }, + { + "epoch": 0.20959915611814345, + "grad_norm": 1.0343010425567627, + "learning_rate": 0.001354541569044243, + "loss": 1.7556, + "step": 1987 + }, + { + "epoch": 0.20970464135021097, + "grad_norm": 0.6739106774330139, + "learning_rate": 0.0013543929480393994, + "loss": 1.8048, + "step": 1988 + }, + { + "epoch": 0.2098101265822785, + "grad_norm": 0.6657394766807556, + "learning_rate": 0.0013542442593093122, + "loss": 1.8142, + "step": 1989 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.7530588507652283, + "learning_rate": 0.0013540955028706425, + "loss": 1.7929, + "step": 1990 + }, + { + "epoch": 0.2100210970464135, + "grad_norm": 0.5455927848815918, + "learning_rate": 0.0013539466787400598, + "loss": 1.8046, + "step": 1991 + }, + { + "epoch": 0.21012658227848102, + "grad_norm": 0.8623123168945312, + "learning_rate": 0.00135379778693424, + "loss": 1.7718, + "step": 1992 + }, + { + "epoch": 0.21023206751054851, + "grad_norm": 0.7400192618370056, + "learning_rate": 0.0013536488274698672, + "loss": 1.8119, + "step": 1993 + }, + { + "epoch": 0.21033755274261604, + "grad_norm": 0.5437809824943542, + "learning_rate": 0.0013534998003636332, + "loss": 1.7494, + "step": 1994 + }, + { + "epoch": 0.21044303797468356, + "grad_norm": 0.6963363289833069, + "learning_rate": 0.0013533507056322374, + "loss": 1.8185, + "step": 1995 + }, + { + "epoch": 0.21054852320675105, + "grad_norm": 0.7310370802879333, + "learning_rate": 0.0013532015432923864, + "loss": 1.7644, + "step": 1996 + }, + { + "epoch": 0.21065400843881857, + "grad_norm": 0.6488956809043884, + "learning_rate": 0.0013530523133607948, + "loss": 1.7757, + "step": 1997 + }, + { + "epoch": 0.2107594936708861, + "grad_norm": 0.5085965394973755, + "learning_rate": 0.0013529030158541842, + "loss": 1.7613, + "step": 1998 + }, + { + "epoch": 0.21086497890295358, + "grad_norm": 0.6141491532325745, + "learning_rate": 0.0013527536507892844, + "loss": 1.7578, + "step": 1999 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.6041334867477417, + "learning_rate": 0.0013526042181828324, + "loss": 1.7955, + "step": 2000 + }, + { + "epoch": 0.2110759493670886, + "grad_norm": 0.6641448736190796, + "learning_rate": 0.001352454718051573, + "loss": 1.835, + "step": 2001 + }, + { + "epoch": 0.21118143459915611, + "grad_norm": 0.7246802449226379, + "learning_rate": 0.0013523051504122584, + "loss": 1.7818, + "step": 2002 + }, + { + "epoch": 0.21128691983122364, + "grad_norm": 0.5530958771705627, + "learning_rate": 0.0013521555152816481, + "loss": 1.7346, + "step": 2003 + }, + { + "epoch": 0.21139240506329113, + "grad_norm": 0.5496693849563599, + "learning_rate": 0.0013520058126765097, + "loss": 1.7827, + "step": 2004 + }, + { + "epoch": 0.21149789029535865, + "grad_norm": 0.56659334897995, + "learning_rate": 0.0013518560426136182, + "loss": 1.8162, + "step": 2005 + }, + { + "epoch": 0.21160337552742617, + "grad_norm": 0.6241558790206909, + "learning_rate": 0.001351706205109756, + "loss": 1.7923, + "step": 2006 + }, + { + "epoch": 0.21170886075949366, + "grad_norm": 0.519885241985321, + "learning_rate": 0.001351556300181713, + "loss": 1.777, + "step": 2007 + }, + { + "epoch": 0.21181434599156118, + "grad_norm": 0.6064340472221375, + "learning_rate": 0.001351406327846287, + "loss": 1.8236, + "step": 2008 + }, + { + "epoch": 0.2119198312236287, + "grad_norm": 0.6992626786231995, + "learning_rate": 0.0013512562881202832, + "loss": 1.7675, + "step": 2009 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.8140596747398376, + "learning_rate": 0.0013511061810205143, + "loss": 1.7555, + "step": 2010 + }, + { + "epoch": 0.21213080168776371, + "grad_norm": 0.5567247271537781, + "learning_rate": 0.0013509560065638002, + "loss": 1.7955, + "step": 2011 + }, + { + "epoch": 0.21223628691983124, + "grad_norm": 0.6246695518493652, + "learning_rate": 0.001350805764766969, + "loss": 1.7916, + "step": 2012 + }, + { + "epoch": 0.21234177215189873, + "grad_norm": 0.6204013228416443, + "learning_rate": 0.0013506554556468558, + "loss": 1.804, + "step": 2013 + }, + { + "epoch": 0.21244725738396625, + "grad_norm": 0.5236452221870422, + "learning_rate": 0.001350505079220304, + "loss": 1.7956, + "step": 2014 + }, + { + "epoch": 0.21255274261603377, + "grad_norm": 0.5257412195205688, + "learning_rate": 0.0013503546355041636, + "loss": 1.7942, + "step": 2015 + }, + { + "epoch": 0.21265822784810126, + "grad_norm": 0.6528241038322449, + "learning_rate": 0.0013502041245152924, + "loss": 1.8285, + "step": 2016 + }, + { + "epoch": 0.21276371308016878, + "grad_norm": 0.6654987931251526, + "learning_rate": 0.0013500535462705565, + "loss": 1.7866, + "step": 2017 + }, + { + "epoch": 0.21286919831223627, + "grad_norm": 0.7003796696662903, + "learning_rate": 0.0013499029007868284, + "loss": 1.7896, + "step": 2018 + }, + { + "epoch": 0.2129746835443038, + "grad_norm": 0.6006567478179932, + "learning_rate": 0.0013497521880809888, + "loss": 1.8076, + "step": 2019 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.6563979983329773, + "learning_rate": 0.001349601408169926, + "loss": 1.7742, + "step": 2020 + }, + { + "epoch": 0.2131856540084388, + "grad_norm": 0.8251574039459229, + "learning_rate": 0.0013494505610705356, + "loss": 1.7724, + "step": 2021 + }, + { + "epoch": 0.21329113924050633, + "grad_norm": 0.597712516784668, + "learning_rate": 0.0013492996467997205, + "loss": 1.7629, + "step": 2022 + }, + { + "epoch": 0.21339662447257385, + "grad_norm": 0.5631758570671082, + "learning_rate": 0.0013491486653743918, + "loss": 1.8321, + "step": 2023 + }, + { + "epoch": 0.21350210970464134, + "grad_norm": 0.523254930973053, + "learning_rate": 0.0013489976168114676, + "loss": 1.8102, + "step": 2024 + }, + { + "epoch": 0.21360759493670886, + "grad_norm": 0.6308006048202515, + "learning_rate": 0.0013488465011278733, + "loss": 1.7607, + "step": 2025 + }, + { + "epoch": 0.21371308016877638, + "grad_norm": 0.662747859954834, + "learning_rate": 0.0013486953183405425, + "loss": 1.767, + "step": 2026 + }, + { + "epoch": 0.21381856540084387, + "grad_norm": 0.6848785281181335, + "learning_rate": 0.001348544068466416, + "loss": 1.7728, + "step": 2027 + }, + { + "epoch": 0.2139240506329114, + "grad_norm": 0.7336981296539307, + "learning_rate": 0.0013483927515224418, + "loss": 1.8178, + "step": 2028 + }, + { + "epoch": 0.21402953586497891, + "grad_norm": 0.6746830940246582, + "learning_rate": 0.0013482413675255762, + "loss": 1.8029, + "step": 2029 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.5790984034538269, + "learning_rate": 0.0013480899164927823, + "loss": 1.7663, + "step": 2030 + }, + { + "epoch": 0.21424050632911393, + "grad_norm": 0.8092432022094727, + "learning_rate": 0.0013479383984410305, + "loss": 1.8022, + "step": 2031 + }, + { + "epoch": 0.21434599156118145, + "grad_norm": 0.7725085616111755, + "learning_rate": 0.0013477868133873001, + "loss": 1.7795, + "step": 2032 + }, + { + "epoch": 0.21445147679324894, + "grad_norm": 0.6123947501182556, + "learning_rate": 0.0013476351613485762, + "loss": 1.786, + "step": 2033 + }, + { + "epoch": 0.21455696202531646, + "grad_norm": 0.6457443833351135, + "learning_rate": 0.0013474834423418522, + "loss": 1.7709, + "step": 2034 + }, + { + "epoch": 0.21466244725738395, + "grad_norm": 0.5916783213615417, + "learning_rate": 0.0013473316563841296, + "loss": 1.7722, + "step": 2035 + }, + { + "epoch": 0.21476793248945147, + "grad_norm": 0.5760599374771118, + "learning_rate": 0.0013471798034924158, + "loss": 1.8001, + "step": 2036 + }, + { + "epoch": 0.214873417721519, + "grad_norm": 0.6245244145393372, + "learning_rate": 0.0013470278836837275, + "loss": 1.77, + "step": 2037 + }, + { + "epoch": 0.2149789029535865, + "grad_norm": 0.5999014973640442, + "learning_rate": 0.001346875896975088, + "loss": 1.7811, + "step": 2038 + }, + { + "epoch": 0.215084388185654, + "grad_norm": 0.5843830108642578, + "learning_rate": 0.0013467238433835277, + "loss": 1.7888, + "step": 2039 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.5729867815971375, + "learning_rate": 0.0013465717229260853, + "loss": 1.8025, + "step": 2040 + }, + { + "epoch": 0.21529535864978902, + "grad_norm": 0.5571960210800171, + "learning_rate": 0.0013464195356198065, + "loss": 1.7769, + "step": 2041 + }, + { + "epoch": 0.21540084388185654, + "grad_norm": 0.5457934737205505, + "learning_rate": 0.0013462672814817445, + "loss": 1.7614, + "step": 2042 + }, + { + "epoch": 0.21550632911392406, + "grad_norm": 0.7046700716018677, + "learning_rate": 0.0013461149605289607, + "loss": 1.7929, + "step": 2043 + }, + { + "epoch": 0.21561181434599155, + "grad_norm": 0.7905642986297607, + "learning_rate": 0.001345962572778523, + "loss": 1.8245, + "step": 2044 + }, + { + "epoch": 0.21571729957805907, + "grad_norm": 0.5842594504356384, + "learning_rate": 0.0013458101182475073, + "loss": 1.7632, + "step": 2045 + }, + { + "epoch": 0.2158227848101266, + "grad_norm": 0.5587006211280823, + "learning_rate": 0.0013456575969529967, + "loss": 1.8082, + "step": 2046 + }, + { + "epoch": 0.2159282700421941, + "grad_norm": 0.5834338068962097, + "learning_rate": 0.001345505008912082, + "loss": 1.7624, + "step": 2047 + }, + { + "epoch": 0.2160337552742616, + "grad_norm": 0.5805526971817017, + "learning_rate": 0.0013453523541418623, + "loss": 1.7853, + "step": 2048 + }, + { + "epoch": 0.21613924050632913, + "grad_norm": 0.6216466426849365, + "learning_rate": 0.001345199632659442, + "loss": 1.7941, + "step": 2049 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.5308225154876709, + "learning_rate": 0.001345046844481935, + "loss": 1.7836, + "step": 2050 + }, + { + "epoch": 0.21635021097046414, + "grad_norm": 0.6020833253860474, + "learning_rate": 0.0013448939896264622, + "loss": 1.8003, + "step": 2051 + }, + { + "epoch": 0.21645569620253163, + "grad_norm": 0.7352676391601562, + "learning_rate": 0.001344741068110151, + "loss": 1.8164, + "step": 2052 + }, + { + "epoch": 0.21656118143459915, + "grad_norm": 0.7391988635063171, + "learning_rate": 0.001344588079950138, + "loss": 1.7601, + "step": 2053 + }, + { + "epoch": 0.21666666666666667, + "grad_norm": 0.6262949109077454, + "learning_rate": 0.0013444350251635654, + "loss": 1.777, + "step": 2054 + }, + { + "epoch": 0.21677215189873417, + "grad_norm": 0.6167628765106201, + "learning_rate": 0.0013442819037675843, + "loss": 1.7898, + "step": 2055 + }, + { + "epoch": 0.2168776371308017, + "grad_norm": 0.6578426361083984, + "learning_rate": 0.0013441287157793522, + "loss": 1.7912, + "step": 2056 + }, + { + "epoch": 0.2169831223628692, + "grad_norm": 0.6626226902008057, + "learning_rate": 0.0013439754612160353, + "loss": 1.7773, + "step": 2057 + }, + { + "epoch": 0.2170886075949367, + "grad_norm": 0.5664095878601074, + "learning_rate": 0.001343822140094806, + "loss": 1.7537, + "step": 2058 + }, + { + "epoch": 0.21719409282700422, + "grad_norm": 0.7531218528747559, + "learning_rate": 0.0013436687524328449, + "loss": 1.7552, + "step": 2059 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.7652623057365417, + "learning_rate": 0.0013435152982473396, + "loss": 1.7592, + "step": 2060 + }, + { + "epoch": 0.21740506329113923, + "grad_norm": 0.8017783164978027, + "learning_rate": 0.0013433617775554854, + "loss": 1.7793, + "step": 2061 + }, + { + "epoch": 0.21751054852320675, + "grad_norm": 0.5413491725921631, + "learning_rate": 0.0013432081903744857, + "loss": 1.8039, + "step": 2062 + }, + { + "epoch": 0.21761603375527427, + "grad_norm": 0.6882590651512146, + "learning_rate": 0.00134305453672155, + "loss": 1.7653, + "step": 2063 + }, + { + "epoch": 0.21772151898734177, + "grad_norm": 0.6954107880592346, + "learning_rate": 0.0013429008166138965, + "loss": 1.754, + "step": 2064 + }, + { + "epoch": 0.2178270042194093, + "grad_norm": 0.5661115050315857, + "learning_rate": 0.0013427470300687498, + "loss": 1.7514, + "step": 2065 + }, + { + "epoch": 0.21793248945147678, + "grad_norm": 0.7300050258636475, + "learning_rate": 0.0013425931771033426, + "loss": 1.801, + "step": 2066 + }, + { + "epoch": 0.2180379746835443, + "grad_norm": 0.5851845741271973, + "learning_rate": 0.0013424392577349152, + "loss": 1.7571, + "step": 2067 + }, + { + "epoch": 0.21814345991561182, + "grad_norm": 0.5933858752250671, + "learning_rate": 0.001342285271980715, + "loss": 1.7933, + "step": 2068 + }, + { + "epoch": 0.2182489451476793, + "grad_norm": 0.7228987812995911, + "learning_rate": 0.0013421312198579963, + "loss": 1.7964, + "step": 2069 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.8774288892745972, + "learning_rate": 0.0013419771013840217, + "loss": 1.7826, + "step": 2070 + }, + { + "epoch": 0.21845991561181435, + "grad_norm": 0.5107086896896362, + "learning_rate": 0.0013418229165760613, + "loss": 1.8036, + "step": 2071 + }, + { + "epoch": 0.21856540084388185, + "grad_norm": 1.1191219091415405, + "learning_rate": 0.001341668665451392, + "loss": 1.7816, + "step": 2072 + }, + { + "epoch": 0.21867088607594937, + "grad_norm": 1.1077667474746704, + "learning_rate": 0.0013415143480272982, + "loss": 1.802, + "step": 2073 + }, + { + "epoch": 0.2187763713080169, + "grad_norm": 0.5610079169273376, + "learning_rate": 0.0013413599643210723, + "loss": 1.7612, + "step": 2074 + }, + { + "epoch": 0.21888185654008438, + "grad_norm": 0.9011014699935913, + "learning_rate": 0.0013412055143500136, + "loss": 1.7665, + "step": 2075 + }, + { + "epoch": 0.2189873417721519, + "grad_norm": 0.7008845210075378, + "learning_rate": 0.001341050998131429, + "loss": 1.7444, + "step": 2076 + }, + { + "epoch": 0.21909282700421942, + "grad_norm": 0.6390042304992676, + "learning_rate": 0.0013408964156826327, + "loss": 1.7926, + "step": 2077 + }, + { + "epoch": 0.2191983122362869, + "grad_norm": 0.8864540457725525, + "learning_rate": 0.0013407417670209467, + "loss": 1.7651, + "step": 2078 + }, + { + "epoch": 0.21930379746835443, + "grad_norm": 0.7309128046035767, + "learning_rate": 0.0013405870521636999, + "loss": 1.8034, + "step": 2079 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.5363326668739319, + "learning_rate": 0.001340432271128229, + "loss": 1.8075, + "step": 2080 + }, + { + "epoch": 0.21951476793248945, + "grad_norm": 0.6182695627212524, + "learning_rate": 0.001340277423931878, + "loss": 1.7815, + "step": 2081 + }, + { + "epoch": 0.21962025316455697, + "grad_norm": 0.6820416450500488, + "learning_rate": 0.0013401225105919982, + "loss": 1.7585, + "step": 2082 + }, + { + "epoch": 0.21972573839662446, + "grad_norm": 0.758502721786499, + "learning_rate": 0.0013399675311259484, + "loss": 1.7557, + "step": 2083 + }, + { + "epoch": 0.21983122362869198, + "grad_norm": 0.6672703623771667, + "learning_rate": 0.0013398124855510951, + "loss": 1.7811, + "step": 2084 + }, + { + "epoch": 0.2199367088607595, + "grad_norm": 0.5749210715293884, + "learning_rate": 0.0013396573738848115, + "loss": 1.7719, + "step": 2085 + }, + { + "epoch": 0.220042194092827, + "grad_norm": 0.7589380741119385, + "learning_rate": 0.001339502196144479, + "loss": 1.7376, + "step": 2086 + }, + { + "epoch": 0.2201476793248945, + "grad_norm": 0.6042190790176392, + "learning_rate": 0.0013393469523474858, + "loss": 1.815, + "step": 2087 + }, + { + "epoch": 0.22025316455696203, + "grad_norm": 0.5696602463722229, + "learning_rate": 0.001339191642511228, + "loss": 1.734, + "step": 2088 + }, + { + "epoch": 0.22035864978902953, + "grad_norm": 0.6344752311706543, + "learning_rate": 0.0013390362666531085, + "loss": 1.7835, + "step": 2089 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.6420271992683411, + "learning_rate": 0.0013388808247905381, + "loss": 1.7948, + "step": 2090 + }, + { + "epoch": 0.22056962025316457, + "grad_norm": 0.5215303897857666, + "learning_rate": 0.0013387253169409351, + "loss": 1.7709, + "step": 2091 + }, + { + "epoch": 0.22067510548523206, + "grad_norm": 0.6396991014480591, + "learning_rate": 0.0013385697431217247, + "loss": 1.7829, + "step": 2092 + }, + { + "epoch": 0.22078059071729958, + "grad_norm": 0.5154736638069153, + "learning_rate": 0.0013384141033503394, + "loss": 1.7688, + "step": 2093 + }, + { + "epoch": 0.2208860759493671, + "grad_norm": 0.5546190738677979, + "learning_rate": 0.0013382583976442198, + "loss": 1.7681, + "step": 2094 + }, + { + "epoch": 0.2209915611814346, + "grad_norm": 0.560543417930603, + "learning_rate": 0.0013381026260208136, + "loss": 1.7208, + "step": 2095 + }, + { + "epoch": 0.2210970464135021, + "grad_norm": 0.5622967481613159, + "learning_rate": 0.0013379467884975756, + "loss": 1.7884, + "step": 2096 + }, + { + "epoch": 0.22120253164556963, + "grad_norm": 0.5799266695976257, + "learning_rate": 0.001337790885091968, + "loss": 1.7821, + "step": 2097 + }, + { + "epoch": 0.22130801687763713, + "grad_norm": 0.8853607773780823, + "learning_rate": 0.0013376349158214609, + "loss": 1.8086, + "step": 2098 + }, + { + "epoch": 0.22141350210970465, + "grad_norm": 0.7822458744049072, + "learning_rate": 0.0013374788807035314, + "loss": 1.8249, + "step": 2099 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.5653793811798096, + "learning_rate": 0.0013373227797556634, + "loss": 1.7701, + "step": 2100 + }, + { + "epoch": 0.22162447257383966, + "grad_norm": 0.6190131902694702, + "learning_rate": 0.0013371666129953497, + "loss": 1.7665, + "step": 2101 + }, + { + "epoch": 0.22172995780590718, + "grad_norm": 0.6740388870239258, + "learning_rate": 0.0013370103804400887, + "loss": 1.8005, + "step": 2102 + }, + { + "epoch": 0.22183544303797467, + "grad_norm": 0.6346478462219238, + "learning_rate": 0.001336854082107388, + "loss": 1.7694, + "step": 2103 + }, + { + "epoch": 0.2219409282700422, + "grad_norm": 0.5928282141685486, + "learning_rate": 0.001336697718014761, + "loss": 1.7722, + "step": 2104 + }, + { + "epoch": 0.2220464135021097, + "grad_norm": 0.7070712447166443, + "learning_rate": 0.001336541288179729, + "loss": 1.7922, + "step": 2105 + }, + { + "epoch": 0.2221518987341772, + "grad_norm": 0.5944730639457703, + "learning_rate": 0.0013363847926198208, + "loss": 1.7495, + "step": 2106 + }, + { + "epoch": 0.22225738396624473, + "grad_norm": 0.786544919013977, + "learning_rate": 0.0013362282313525728, + "loss": 1.7746, + "step": 2107 + }, + { + "epoch": 0.22236286919831225, + "grad_norm": 1.162582278251648, + "learning_rate": 0.001336071604395528, + "loss": 1.7608, + "step": 2108 + }, + { + "epoch": 0.22246835443037974, + "grad_norm": 0.6498979330062866, + "learning_rate": 0.0013359149117662377, + "loss": 1.7663, + "step": 2109 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.7921167612075806, + "learning_rate": 0.00133575815348226, + "loss": 1.7674, + "step": 2110 + }, + { + "epoch": 0.22267932489451478, + "grad_norm": 0.9675626754760742, + "learning_rate": 0.0013356013295611603, + "loss": 1.7401, + "step": 2111 + }, + { + "epoch": 0.22278481012658227, + "grad_norm": 0.5892433524131775, + "learning_rate": 0.0013354444400205114, + "loss": 1.7646, + "step": 2112 + }, + { + "epoch": 0.2228902953586498, + "grad_norm": 0.7026878595352173, + "learning_rate": 0.0013352874848778938, + "loss": 1.8018, + "step": 2113 + }, + { + "epoch": 0.2229957805907173, + "grad_norm": 0.9883962869644165, + "learning_rate": 0.0013351304641508951, + "loss": 1.7731, + "step": 2114 + }, + { + "epoch": 0.2231012658227848, + "grad_norm": 0.5579490065574646, + "learning_rate": 0.0013349733778571101, + "loss": 1.7674, + "step": 2115 + }, + { + "epoch": 0.22320675105485233, + "grad_norm": 0.7131237983703613, + "learning_rate": 0.0013348162260141412, + "loss": 1.7597, + "step": 2116 + }, + { + "epoch": 0.22331223628691982, + "grad_norm": 0.8689464330673218, + "learning_rate": 0.001334659008639598, + "loss": 1.782, + "step": 2117 + }, + { + "epoch": 0.22341772151898734, + "grad_norm": 0.6013777852058411, + "learning_rate": 0.0013345017257510975, + "loss": 1.7614, + "step": 2118 + }, + { + "epoch": 0.22352320675105486, + "grad_norm": 0.7073885202407837, + "learning_rate": 0.001334344377366264, + "loss": 1.7841, + "step": 2119 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6529536247253418, + "learning_rate": 0.0013341869635027292, + "loss": 1.7517, + "step": 2120 + }, + { + "epoch": 0.22373417721518987, + "grad_norm": 0.6659338474273682, + "learning_rate": 0.0013340294841781323, + "loss": 1.7734, + "step": 2121 + }, + { + "epoch": 0.2238396624472574, + "grad_norm": 0.9036666750907898, + "learning_rate": 0.0013338719394101193, + "loss": 1.739, + "step": 2122 + }, + { + "epoch": 0.22394514767932489, + "grad_norm": 0.5657690167427063, + "learning_rate": 0.001333714329216344, + "loss": 1.8036, + "step": 2123 + }, + { + "epoch": 0.2240506329113924, + "grad_norm": 0.8136948347091675, + "learning_rate": 0.0013335566536144675, + "loss": 1.7652, + "step": 2124 + }, + { + "epoch": 0.22415611814345993, + "grad_norm": 0.7731471061706543, + "learning_rate": 0.0013333989126221581, + "loss": 1.7511, + "step": 2125 + }, + { + "epoch": 0.22426160337552742, + "grad_norm": 0.5317573547363281, + "learning_rate": 0.0013332411062570914, + "loss": 1.7631, + "step": 2126 + }, + { + "epoch": 0.22436708860759494, + "grad_norm": 0.8186142444610596, + "learning_rate": 0.0013330832345369505, + "loss": 1.8027, + "step": 2127 + }, + { + "epoch": 0.22447257383966246, + "grad_norm": 0.9216009378433228, + "learning_rate": 0.0013329252974794256, + "loss": 1.7592, + "step": 2128 + }, + { + "epoch": 0.22457805907172995, + "grad_norm": 0.5872471332550049, + "learning_rate": 0.0013327672951022145, + "loss": 1.7932, + "step": 2129 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.5665504336357117, + "learning_rate": 0.001332609227423022, + "loss": 1.7676, + "step": 2130 + }, + { + "epoch": 0.224789029535865, + "grad_norm": 0.596952497959137, + "learning_rate": 0.0013324510944595605, + "loss": 1.7349, + "step": 2131 + }, + { + "epoch": 0.22489451476793249, + "grad_norm": 0.5594269633293152, + "learning_rate": 0.0013322928962295492, + "loss": 1.7176, + "step": 2132 + }, + { + "epoch": 0.225, + "grad_norm": 0.5410604476928711, + "learning_rate": 0.0013321346327507158, + "loss": 1.7422, + "step": 2133 + }, + { + "epoch": 0.2251054852320675, + "grad_norm": 0.5286909341812134, + "learning_rate": 0.0013319763040407938, + "loss": 1.791, + "step": 2134 + }, + { + "epoch": 0.22521097046413502, + "grad_norm": 0.5967248678207397, + "learning_rate": 0.0013318179101175246, + "loss": 1.7688, + "step": 2135 + }, + { + "epoch": 0.22531645569620254, + "grad_norm": 0.6702744960784912, + "learning_rate": 0.0013316594509986577, + "loss": 1.7974, + "step": 2136 + }, + { + "epoch": 0.22542194092827003, + "grad_norm": 0.6102120280265808, + "learning_rate": 0.0013315009267019487, + "loss": 1.7587, + "step": 2137 + }, + { + "epoch": 0.22552742616033755, + "grad_norm": 0.6018372178077698, + "learning_rate": 0.0013313423372451614, + "loss": 1.7671, + "step": 2138 + }, + { + "epoch": 0.22563291139240507, + "grad_norm": 0.5727458596229553, + "learning_rate": 0.0013311836826460665, + "loss": 1.7905, + "step": 2139 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.5707597136497498, + "learning_rate": 0.0013310249629224417, + "loss": 1.7915, + "step": 2140 + }, + { + "epoch": 0.22584388185654009, + "grad_norm": 0.5800102949142456, + "learning_rate": 0.0013308661780920728, + "loss": 1.7639, + "step": 2141 + }, + { + "epoch": 0.2259493670886076, + "grad_norm": 0.5798423886299133, + "learning_rate": 0.0013307073281727518, + "loss": 1.7342, + "step": 2142 + }, + { + "epoch": 0.2260548523206751, + "grad_norm": 0.5811206102371216, + "learning_rate": 0.0013305484131822792, + "loss": 1.7589, + "step": 2143 + }, + { + "epoch": 0.22616033755274262, + "grad_norm": 0.6568109393119812, + "learning_rate": 0.001330389433138462, + "loss": 1.8259, + "step": 2144 + }, + { + "epoch": 0.22626582278481014, + "grad_norm": 0.7201579809188843, + "learning_rate": 0.0013302303880591147, + "loss": 1.7904, + "step": 2145 + }, + { + "epoch": 0.22637130801687763, + "grad_norm": 0.6539525389671326, + "learning_rate": 0.0013300712779620593, + "loss": 1.7661, + "step": 2146 + }, + { + "epoch": 0.22647679324894515, + "grad_norm": 0.5703042149543762, + "learning_rate": 0.0013299121028651246, + "loss": 1.7976, + "step": 2147 + }, + { + "epoch": 0.22658227848101264, + "grad_norm": 0.7560691237449646, + "learning_rate": 0.001329752862786147, + "loss": 1.8021, + "step": 2148 + }, + { + "epoch": 0.22668776371308016, + "grad_norm": 0.844936192035675, + "learning_rate": 0.0013295935577429703, + "loss": 1.7566, + "step": 2149 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.6013624668121338, + "learning_rate": 0.0013294341877534454, + "loss": 1.7426, + "step": 2150 + }, + { + "epoch": 0.22689873417721518, + "grad_norm": 0.9520506262779236, + "learning_rate": 0.0013292747528354304, + "loss": 1.8264, + "step": 2151 + }, + { + "epoch": 0.2270042194092827, + "grad_norm": 0.8702092170715332, + "learning_rate": 0.0013291152530067907, + "loss": 1.8343, + "step": 2152 + }, + { + "epoch": 0.22710970464135022, + "grad_norm": 0.5591957569122314, + "learning_rate": 0.0013289556882853993, + "loss": 1.7588, + "step": 2153 + }, + { + "epoch": 0.2272151898734177, + "grad_norm": 0.6268147826194763, + "learning_rate": 0.0013287960586891362, + "loss": 1.785, + "step": 2154 + }, + { + "epoch": 0.22732067510548523, + "grad_norm": 0.7419527769088745, + "learning_rate": 0.0013286363642358884, + "loss": 1.7932, + "step": 2155 + }, + { + "epoch": 0.22742616033755275, + "grad_norm": 0.6015053987503052, + "learning_rate": 0.0013284766049435504, + "loss": 1.7565, + "step": 2156 + }, + { + "epoch": 0.22753164556962024, + "grad_norm": 0.6430548429489136, + "learning_rate": 0.0013283167808300247, + "loss": 1.7842, + "step": 2157 + }, + { + "epoch": 0.22763713080168776, + "grad_norm": 0.799693763256073, + "learning_rate": 0.0013281568919132198, + "loss": 1.7482, + "step": 2158 + }, + { + "epoch": 0.22774261603375529, + "grad_norm": 0.6371711492538452, + "learning_rate": 0.0013279969382110524, + "loss": 1.8117, + "step": 2159 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.7263153791427612, + "learning_rate": 0.0013278369197414458, + "loss": 1.7791, + "step": 2160 + }, + { + "epoch": 0.2279535864978903, + "grad_norm": 0.728791356086731, + "learning_rate": 0.0013276768365223306, + "loss": 1.7712, + "step": 2161 + }, + { + "epoch": 0.22805907172995782, + "grad_norm": 0.5929757356643677, + "learning_rate": 0.0013275166885716458, + "loss": 1.7522, + "step": 2162 + }, + { + "epoch": 0.2281645569620253, + "grad_norm": 0.7427323460578918, + "learning_rate": 0.0013273564759073361, + "loss": 1.7708, + "step": 2163 + }, + { + "epoch": 0.22827004219409283, + "grad_norm": 0.672178328037262, + "learning_rate": 0.0013271961985473544, + "loss": 1.7575, + "step": 2164 + }, + { + "epoch": 0.22837552742616032, + "grad_norm": 0.557633638381958, + "learning_rate": 0.0013270358565096606, + "loss": 1.7288, + "step": 2165 + }, + { + "epoch": 0.22848101265822784, + "grad_norm": 0.6172701120376587, + "learning_rate": 0.0013268754498122215, + "loss": 1.8047, + "step": 2166 + }, + { + "epoch": 0.22858649789029536, + "grad_norm": 0.5353729724884033, + "learning_rate": 0.0013267149784730117, + "loss": 1.7997, + "step": 2167 + }, + { + "epoch": 0.22869198312236286, + "grad_norm": 0.5830134749412537, + "learning_rate": 0.0013265544425100128, + "loss": 1.7918, + "step": 2168 + }, + { + "epoch": 0.22879746835443038, + "grad_norm": 0.5724103450775146, + "learning_rate": 0.0013263938419412137, + "loss": 1.7574, + "step": 2169 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.5750206112861633, + "learning_rate": 0.0013262331767846104, + "loss": 1.7672, + "step": 2170 + }, + { + "epoch": 0.2290084388185654, + "grad_norm": 0.5326185822486877, + "learning_rate": 0.0013260724470582064, + "loss": 1.7486, + "step": 2171 + }, + { + "epoch": 0.2291139240506329, + "grad_norm": 0.504402220249176, + "learning_rate": 0.001325911652780012, + "loss": 1.7498, + "step": 2172 + }, + { + "epoch": 0.22921940928270043, + "grad_norm": 0.6071501970291138, + "learning_rate": 0.0013257507939680453, + "loss": 1.7288, + "step": 2173 + }, + { + "epoch": 0.22932489451476792, + "grad_norm": 0.5569027066230774, + "learning_rate": 0.0013255898706403312, + "loss": 1.7403, + "step": 2174 + }, + { + "epoch": 0.22943037974683544, + "grad_norm": 0.5849369168281555, + "learning_rate": 0.001325428882814902, + "loss": 1.7407, + "step": 2175 + }, + { + "epoch": 0.22953586497890296, + "grad_norm": 0.648510754108429, + "learning_rate": 0.001325267830509797, + "loss": 1.7634, + "step": 2176 + }, + { + "epoch": 0.22964135021097046, + "grad_norm": 0.5662021040916443, + "learning_rate": 0.0013251067137430629, + "loss": 1.7408, + "step": 2177 + }, + { + "epoch": 0.22974683544303798, + "grad_norm": 0.5203872323036194, + "learning_rate": 0.001324945532532754, + "loss": 1.7703, + "step": 2178 + }, + { + "epoch": 0.2298523206751055, + "grad_norm": 0.5568910241127014, + "learning_rate": 0.0013247842868969312, + "loss": 1.7652, + "step": 2179 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.5675352215766907, + "learning_rate": 0.0013246229768536628, + "loss": 1.7648, + "step": 2180 + }, + { + "epoch": 0.2300632911392405, + "grad_norm": 0.5235761404037476, + "learning_rate": 0.0013244616024210246, + "loss": 1.7967, + "step": 2181 + }, + { + "epoch": 0.230168776371308, + "grad_norm": 0.5541737675666809, + "learning_rate": 0.0013243001636170993, + "loss": 1.7564, + "step": 2182 + }, + { + "epoch": 0.23027426160337552, + "grad_norm": 0.6113124489784241, + "learning_rate": 0.0013241386604599772, + "loss": 1.7769, + "step": 2183 + }, + { + "epoch": 0.23037974683544304, + "grad_norm": 0.6042553186416626, + "learning_rate": 0.001323977092967755, + "loss": 1.7707, + "step": 2184 + }, + { + "epoch": 0.23048523206751054, + "grad_norm": 0.7295132279396057, + "learning_rate": 0.0013238154611585375, + "loss": 1.7516, + "step": 2185 + }, + { + "epoch": 0.23059071729957806, + "grad_norm": 0.7614543437957764, + "learning_rate": 0.0013236537650504361, + "loss": 1.7863, + "step": 2186 + }, + { + "epoch": 0.23069620253164558, + "grad_norm": 0.5847471952438354, + "learning_rate": 0.00132349200466157, + "loss": 1.7473, + "step": 2187 + }, + { + "epoch": 0.23080168776371307, + "grad_norm": 0.6996764540672302, + "learning_rate": 0.0013233301800100652, + "loss": 1.7637, + "step": 2188 + }, + { + "epoch": 0.2309071729957806, + "grad_norm": 0.7283911108970642, + "learning_rate": 0.0013231682911140545, + "loss": 1.7641, + "step": 2189 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.7008375525474548, + "learning_rate": 0.001323006337991679, + "loss": 1.7845, + "step": 2190 + }, + { + "epoch": 0.2311181434599156, + "grad_norm": 0.7923140525817871, + "learning_rate": 0.0013228443206610861, + "loss": 1.7718, + "step": 2191 + }, + { + "epoch": 0.23122362869198312, + "grad_norm": 0.7672163844108582, + "learning_rate": 0.0013226822391404305, + "loss": 1.7688, + "step": 2192 + }, + { + "epoch": 0.23132911392405064, + "grad_norm": 0.7584683299064636, + "learning_rate": 0.0013225200934478744, + "loss": 1.8187, + "step": 2193 + }, + { + "epoch": 0.23143459915611814, + "grad_norm": 0.6584490537643433, + "learning_rate": 0.0013223578836015868, + "loss": 1.8, + "step": 2194 + }, + { + "epoch": 0.23154008438818566, + "grad_norm": 0.7715032696723938, + "learning_rate": 0.0013221956096197446, + "loss": 1.7445, + "step": 2195 + }, + { + "epoch": 0.23164556962025318, + "grad_norm": 0.6327605843544006, + "learning_rate": 0.001322033271520531, + "loss": 1.7376, + "step": 2196 + }, + { + "epoch": 0.23175105485232067, + "grad_norm": 0.6605768799781799, + "learning_rate": 0.001321870869322137, + "loss": 1.7579, + "step": 2197 + }, + { + "epoch": 0.2318565400843882, + "grad_norm": 0.6315977573394775, + "learning_rate": 0.0013217084030427604, + "loss": 1.746, + "step": 2198 + }, + { + "epoch": 0.23196202531645568, + "grad_norm": 0.6018505692481995, + "learning_rate": 0.0013215458727006064, + "loss": 1.7433, + "step": 2199 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.5734003186225891, + "learning_rate": 0.0013213832783138873, + "loss": 1.7968, + "step": 2200 + }, + { + "epoch": 0.23217299578059072, + "grad_norm": 0.520892322063446, + "learning_rate": 0.0013212206199008226, + "loss": 1.7546, + "step": 2201 + }, + { + "epoch": 0.23227848101265822, + "grad_norm": 0.5756822228431702, + "learning_rate": 0.0013210578974796393, + "loss": 1.7626, + "step": 2202 + }, + { + "epoch": 0.23238396624472574, + "grad_norm": 0.5590127110481262, + "learning_rate": 0.001320895111068571, + "loss": 1.7669, + "step": 2203 + }, + { + "epoch": 0.23248945147679326, + "grad_norm": 0.6485333442687988, + "learning_rate": 0.0013207322606858588, + "loss": 1.7475, + "step": 2204 + }, + { + "epoch": 0.23259493670886075, + "grad_norm": 0.6784553527832031, + "learning_rate": 0.001320569346349751, + "loss": 1.706, + "step": 2205 + }, + { + "epoch": 0.23270042194092827, + "grad_norm": 0.6146546602249146, + "learning_rate": 0.0013204063680785025, + "loss": 1.813, + "step": 2206 + }, + { + "epoch": 0.2328059071729958, + "grad_norm": 0.7925986647605896, + "learning_rate": 0.0013202433258903761, + "loss": 1.7484, + "step": 2207 + }, + { + "epoch": 0.23291139240506328, + "grad_norm": 0.6378032565116882, + "learning_rate": 0.001320080219803642, + "loss": 1.772, + "step": 2208 + }, + { + "epoch": 0.2330168776371308, + "grad_norm": 0.552385687828064, + "learning_rate": 0.0013199170498365764, + "loss": 1.7389, + "step": 2209 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.6146122813224792, + "learning_rate": 0.0013197538160074633, + "loss": 1.7671, + "step": 2210 + }, + { + "epoch": 0.23322784810126582, + "grad_norm": 0.5680294632911682, + "learning_rate": 0.0013195905183345943, + "loss": 1.7583, + "step": 2211 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 0.6452968120574951, + "learning_rate": 0.0013194271568362673, + "loss": 1.7632, + "step": 2212 + }, + { + "epoch": 0.23343881856540086, + "grad_norm": 0.5975542664527893, + "learning_rate": 0.001319263731530788, + "loss": 1.7626, + "step": 2213 + }, + { + "epoch": 0.23354430379746835, + "grad_norm": 0.6149652600288391, + "learning_rate": 0.0013191002424364693, + "loss": 1.7689, + "step": 2214 + }, + { + "epoch": 0.23364978902953587, + "grad_norm": 0.7773263454437256, + "learning_rate": 0.0013189366895716302, + "loss": 1.8046, + "step": 2215 + }, + { + "epoch": 0.23375527426160336, + "grad_norm": 0.5552393198013306, + "learning_rate": 0.0013187730729545982, + "loss": 1.7652, + "step": 2216 + }, + { + "epoch": 0.23386075949367088, + "grad_norm": 0.6236850619316101, + "learning_rate": 0.0013186093926037072, + "loss": 1.7657, + "step": 2217 + }, + { + "epoch": 0.2339662447257384, + "grad_norm": 0.5787410736083984, + "learning_rate": 0.0013184456485372986, + "loss": 1.7839, + "step": 2218 + }, + { + "epoch": 0.2340717299578059, + "grad_norm": 0.5782155394554138, + "learning_rate": 0.0013182818407737203, + "loss": 1.7377, + "step": 2219 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.7239537239074707, + "learning_rate": 0.0013181179693313283, + "loss": 1.7934, + "step": 2220 + }, + { + "epoch": 0.23428270042194094, + "grad_norm": 0.8001000285148621, + "learning_rate": 0.0013179540342284847, + "loss": 1.7574, + "step": 2221 + }, + { + "epoch": 0.23438818565400843, + "grad_norm": 0.5571138858795166, + "learning_rate": 0.0013177900354835598, + "loss": 1.7298, + "step": 2222 + }, + { + "epoch": 0.23449367088607595, + "grad_norm": 0.5605456829071045, + "learning_rate": 0.00131762597311493, + "loss": 1.7554, + "step": 2223 + }, + { + "epoch": 0.23459915611814347, + "grad_norm": 0.5879361629486084, + "learning_rate": 0.0013174618471409793, + "loss": 1.7928, + "step": 2224 + }, + { + "epoch": 0.23470464135021096, + "grad_norm": 0.5689192414283752, + "learning_rate": 0.0013172976575800991, + "loss": 1.7533, + "step": 2225 + }, + { + "epoch": 0.23481012658227848, + "grad_norm": 0.6982372403144836, + "learning_rate": 0.0013171334044506878, + "loss": 1.7849, + "step": 2226 + }, + { + "epoch": 0.234915611814346, + "grad_norm": 0.6837165951728821, + "learning_rate": 0.0013169690877711502, + "loss": 1.7552, + "step": 2227 + }, + { + "epoch": 0.2350210970464135, + "grad_norm": 0.562567949295044, + "learning_rate": 0.0013168047075598993, + "loss": 1.761, + "step": 2228 + }, + { + "epoch": 0.23512658227848102, + "grad_norm": 0.7817406058311462, + "learning_rate": 0.0013166402638353548, + "loss": 1.784, + "step": 2229 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.6001663208007812, + "learning_rate": 0.0013164757566159428, + "loss": 1.7528, + "step": 2230 + }, + { + "epoch": 0.23533755274261603, + "grad_norm": 0.8249187469482422, + "learning_rate": 0.0013163111859200978, + "loss": 1.7582, + "step": 2231 + }, + { + "epoch": 0.23544303797468355, + "grad_norm": 0.9080456495285034, + "learning_rate": 0.0013161465517662603, + "loss": 1.7804, + "step": 2232 + }, + { + "epoch": 0.23554852320675104, + "grad_norm": 0.8798048496246338, + "learning_rate": 0.001315981854172879, + "loss": 1.7859, + "step": 2233 + }, + { + "epoch": 0.23565400843881856, + "grad_norm": 0.8443210124969482, + "learning_rate": 0.0013158170931584084, + "loss": 1.7642, + "step": 2234 + }, + { + "epoch": 0.23575949367088608, + "grad_norm": 0.704205334186554, + "learning_rate": 0.0013156522687413114, + "loss": 1.7425, + "step": 2235 + }, + { + "epoch": 0.23586497890295358, + "grad_norm": 0.8370984792709351, + "learning_rate": 0.0013154873809400568, + "loss": 1.755, + "step": 2236 + }, + { + "epoch": 0.2359704641350211, + "grad_norm": 0.7717642188072205, + "learning_rate": 0.0013153224297731215, + "loss": 1.7342, + "step": 2237 + }, + { + "epoch": 0.23607594936708862, + "grad_norm": 0.5423630475997925, + "learning_rate": 0.0013151574152589888, + "loss": 1.7926, + "step": 2238 + }, + { + "epoch": 0.2361814345991561, + "grad_norm": 0.8369584679603577, + "learning_rate": 0.00131499233741615, + "loss": 1.754, + "step": 2239 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.9371525645256042, + "learning_rate": 0.001314827196263102, + "loss": 1.7596, + "step": 2240 + }, + { + "epoch": 0.23639240506329115, + "grad_norm": 0.6601789593696594, + "learning_rate": 0.0013146619918183507, + "loss": 1.7369, + "step": 2241 + }, + { + "epoch": 0.23649789029535864, + "grad_norm": 0.7743209600448608, + "learning_rate": 0.0013144967241004073, + "loss": 1.7352, + "step": 2242 + }, + { + "epoch": 0.23660337552742616, + "grad_norm": 0.8958768844604492, + "learning_rate": 0.001314331393127791, + "loss": 1.7333, + "step": 2243 + }, + { + "epoch": 0.23670886075949368, + "grad_norm": 0.9427881240844727, + "learning_rate": 0.0013141659989190282, + "loss": 1.753, + "step": 2244 + }, + { + "epoch": 0.23681434599156118, + "grad_norm": 0.6405366063117981, + "learning_rate": 0.001314000541492652, + "loss": 1.7538, + "step": 2245 + }, + { + "epoch": 0.2369198312236287, + "grad_norm": 0.6664368510246277, + "learning_rate": 0.0013138350208672029, + "loss": 1.81, + "step": 2246 + }, + { + "epoch": 0.2370253164556962, + "grad_norm": 0.7979840636253357, + "learning_rate": 0.001313669437061228, + "loss": 1.7269, + "step": 2247 + }, + { + "epoch": 0.2371308016877637, + "grad_norm": 0.8056900501251221, + "learning_rate": 0.0013135037900932822, + "loss": 1.7838, + "step": 2248 + }, + { + "epoch": 0.23723628691983123, + "grad_norm": 0.9192821383476257, + "learning_rate": 0.0013133380799819267, + "loss": 1.7946, + "step": 2249 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.8517353534698486, + "learning_rate": 0.0013131723067457302, + "loss": 1.7577, + "step": 2250 + }, + { + "epoch": 0.23744725738396624, + "grad_norm": 0.9732785820960999, + "learning_rate": 0.0013130064704032684, + "loss": 1.7435, + "step": 2251 + }, + { + "epoch": 0.23755274261603376, + "grad_norm": 0.5837040543556213, + "learning_rate": 0.0013128405709731245, + "loss": 1.7582, + "step": 2252 + }, + { + "epoch": 0.23765822784810126, + "grad_norm": 0.8810707926750183, + "learning_rate": 0.001312674608473888, + "loss": 1.7404, + "step": 2253 + }, + { + "epoch": 0.23776371308016878, + "grad_norm": 0.8529717326164246, + "learning_rate": 0.0013125085829241558, + "loss": 1.7442, + "step": 2254 + }, + { + "epoch": 0.2378691983122363, + "grad_norm": 0.5831390619277954, + "learning_rate": 0.0013123424943425317, + "loss": 1.7452, + "step": 2255 + }, + { + "epoch": 0.2379746835443038, + "grad_norm": 0.8124730587005615, + "learning_rate": 0.0013121763427476273, + "loss": 1.7438, + "step": 2256 + }, + { + "epoch": 0.2380801687763713, + "grad_norm": 0.6081793904304504, + "learning_rate": 0.0013120101281580605, + "loss": 1.7196, + "step": 2257 + }, + { + "epoch": 0.23818565400843883, + "grad_norm": 0.7025086283683777, + "learning_rate": 0.0013118438505924563, + "loss": 1.7509, + "step": 2258 + }, + { + "epoch": 0.23829113924050632, + "grad_norm": 0.6362736225128174, + "learning_rate": 0.001311677510069447, + "loss": 1.7787, + "step": 2259 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.8244772553443909, + "learning_rate": 0.0013115111066076721, + "loss": 1.7755, + "step": 2260 + }, + { + "epoch": 0.23850210970464136, + "grad_norm": 0.5726867914199829, + "learning_rate": 0.0013113446402257774, + "loss": 1.7493, + "step": 2261 + }, + { + "epoch": 0.23860759493670886, + "grad_norm": 0.7306455969810486, + "learning_rate": 0.001311178110942417, + "loss": 1.7564, + "step": 2262 + }, + { + "epoch": 0.23871308016877638, + "grad_norm": 0.9227611422538757, + "learning_rate": 0.0013110115187762506, + "loss": 1.7742, + "step": 2263 + }, + { + "epoch": 0.23881856540084387, + "grad_norm": 0.6462796926498413, + "learning_rate": 0.0013108448637459465, + "loss": 1.8039, + "step": 2264 + }, + { + "epoch": 0.2389240506329114, + "grad_norm": 0.6952676177024841, + "learning_rate": 0.0013106781458701784, + "loss": 1.8221, + "step": 2265 + }, + { + "epoch": 0.2390295358649789, + "grad_norm": 0.6277433633804321, + "learning_rate": 0.0013105113651676287, + "loss": 1.771, + "step": 2266 + }, + { + "epoch": 0.2391350210970464, + "grad_norm": 0.808197021484375, + "learning_rate": 0.001310344521656985, + "loss": 1.8108, + "step": 2267 + }, + { + "epoch": 0.23924050632911392, + "grad_norm": 0.6158503293991089, + "learning_rate": 0.001310177615356944, + "loss": 1.7833, + "step": 2268 + }, + { + "epoch": 0.23934599156118144, + "grad_norm": 0.5951923131942749, + "learning_rate": 0.0013100106462862076, + "loss": 1.772, + "step": 2269 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.6190905570983887, + "learning_rate": 0.0013098436144634862, + "loss": 1.7863, + "step": 2270 + }, + { + "epoch": 0.23955696202531646, + "grad_norm": 0.5938636660575867, + "learning_rate": 0.0013096765199074958, + "loss": 1.7667, + "step": 2271 + }, + { + "epoch": 0.23966244725738398, + "grad_norm": 0.5303232073783875, + "learning_rate": 0.0013095093626369608, + "loss": 1.7479, + "step": 2272 + }, + { + "epoch": 0.23976793248945147, + "grad_norm": 0.5282822847366333, + "learning_rate": 0.0013093421426706117, + "loss": 1.7325, + "step": 2273 + }, + { + "epoch": 0.239873417721519, + "grad_norm": 0.49836426973342896, + "learning_rate": 0.0013091748600271862, + "loss": 1.7815, + "step": 2274 + }, + { + "epoch": 0.2399789029535865, + "grad_norm": 0.5012999773025513, + "learning_rate": 0.0013090075147254294, + "loss": 1.7578, + "step": 2275 + }, + { + "epoch": 0.240084388185654, + "grad_norm": 0.5617629289627075, + "learning_rate": 0.0013088401067840932, + "loss": 1.7793, + "step": 2276 + }, + { + "epoch": 0.24018987341772152, + "grad_norm": 0.5404247045516968, + "learning_rate": 0.0013086726362219363, + "loss": 1.7631, + "step": 2277 + }, + { + "epoch": 0.24029535864978904, + "grad_norm": 0.5515851974487305, + "learning_rate": 0.0013085051030577246, + "loss": 1.7627, + "step": 2278 + }, + { + "epoch": 0.24040084388185654, + "grad_norm": 0.6132825613021851, + "learning_rate": 0.0013083375073102315, + "loss": 1.7718, + "step": 2279 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.6302200555801392, + "learning_rate": 0.0013081698489982364, + "loss": 1.7706, + "step": 2280 + }, + { + "epoch": 0.24061181434599155, + "grad_norm": 0.5833495855331421, + "learning_rate": 0.0013080021281405264, + "loss": 1.7561, + "step": 2281 + }, + { + "epoch": 0.24071729957805907, + "grad_norm": 0.8152864575386047, + "learning_rate": 0.0013078343447558954, + "loss": 1.7191, + "step": 2282 + }, + { + "epoch": 0.2408227848101266, + "grad_norm": 1.0381789207458496, + "learning_rate": 0.0013076664988631447, + "loss": 1.7539, + "step": 2283 + }, + { + "epoch": 0.24092827004219408, + "grad_norm": 0.8037685751914978, + "learning_rate": 0.001307498590481082, + "loss": 1.7164, + "step": 2284 + }, + { + "epoch": 0.2410337552742616, + "grad_norm": 0.6679971814155579, + "learning_rate": 0.001307330619628522, + "loss": 1.7436, + "step": 2285 + }, + { + "epoch": 0.24113924050632912, + "grad_norm": 1.1447497606277466, + "learning_rate": 0.0013071625863242875, + "loss": 1.7928, + "step": 2286 + }, + { + "epoch": 0.24124472573839661, + "grad_norm": 0.6406293511390686, + "learning_rate": 0.0013069944905872064, + "loss": 1.766, + "step": 2287 + }, + { + "epoch": 0.24135021097046414, + "grad_norm": 0.9831286072731018, + "learning_rate": 0.0013068263324361156, + "loss": 1.7794, + "step": 2288 + }, + { + "epoch": 0.24145569620253166, + "grad_norm": 1.1781834363937378, + "learning_rate": 0.0013066581118898574, + "loss": 1.7482, + "step": 2289 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.6617604494094849, + "learning_rate": 0.001306489828967282, + "loss": 1.7678, + "step": 2290 + }, + { + "epoch": 0.24166666666666667, + "grad_norm": 1.0174564123153687, + "learning_rate": 0.0013063214836872465, + "loss": 1.7726, + "step": 2291 + }, + { + "epoch": 0.2417721518987342, + "grad_norm": 0.7788093686103821, + "learning_rate": 0.0013061530760686145, + "loss": 1.7068, + "step": 2292 + }, + { + "epoch": 0.24187763713080168, + "grad_norm": 0.7556354999542236, + "learning_rate": 0.0013059846061302574, + "loss": 1.7609, + "step": 2293 + }, + { + "epoch": 0.2419831223628692, + "grad_norm": 1.150669813156128, + "learning_rate": 0.0013058160738910526, + "loss": 1.7664, + "step": 2294 + }, + { + "epoch": 0.24208860759493672, + "grad_norm": 0.6311616897583008, + "learning_rate": 0.0013056474793698852, + "loss": 1.7563, + "step": 2295 + }, + { + "epoch": 0.24219409282700421, + "grad_norm": 0.8612399697303772, + "learning_rate": 0.001305478822585647, + "loss": 1.7602, + "step": 2296 + }, + { + "epoch": 0.24229957805907174, + "grad_norm": 0.9616355299949646, + "learning_rate": 0.001305310103557237, + "loss": 1.802, + "step": 2297 + }, + { + "epoch": 0.24240506329113923, + "grad_norm": 0.5177884697914124, + "learning_rate": 0.0013051413223035607, + "loss": 1.745, + "step": 2298 + }, + { + "epoch": 0.24251054852320675, + "grad_norm": 0.8237097859382629, + "learning_rate": 0.0013049724788435312, + "loss": 1.7437, + "step": 2299 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.5559508204460144, + "learning_rate": 0.0013048035731960679, + "loss": 1.7171, + "step": 2300 + }, + { + "epoch": 0.24272151898734176, + "grad_norm": 1.1237049102783203, + "learning_rate": 0.0013046346053800979, + "loss": 1.7766, + "step": 2301 + }, + { + "epoch": 0.24282700421940928, + "grad_norm": 0.6244679689407349, + "learning_rate": 0.0013044655754145546, + "loss": 1.7309, + "step": 2302 + }, + { + "epoch": 0.2429324894514768, + "grad_norm": 1.1133081912994385, + "learning_rate": 0.001304296483318379, + "loss": 1.7797, + "step": 2303 + }, + { + "epoch": 0.2430379746835443, + "grad_norm": 0.916325032711029, + "learning_rate": 0.0013041273291105181, + "loss": 1.764, + "step": 2304 + }, + { + "epoch": 0.24314345991561181, + "grad_norm": 0.6939435601234436, + "learning_rate": 0.0013039581128099272, + "loss": 1.739, + "step": 2305 + }, + { + "epoch": 0.24324894514767934, + "grad_norm": 1.0978018045425415, + "learning_rate": 0.0013037888344355673, + "loss": 1.7826, + "step": 2306 + }, + { + "epoch": 0.24335443037974683, + "grad_norm": 0.6482760906219482, + "learning_rate": 0.001303619494006407, + "loss": 1.7331, + "step": 2307 + }, + { + "epoch": 0.24345991561181435, + "grad_norm": 0.6100127100944519, + "learning_rate": 0.0013034500915414218, + "loss": 1.7194, + "step": 2308 + }, + { + "epoch": 0.24356540084388187, + "grad_norm": 0.5544464588165283, + "learning_rate": 0.0013032806270595941, + "loss": 1.7654, + "step": 2309 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.5592302680015564, + "learning_rate": 0.0013031111005799133, + "loss": 1.7636, + "step": 2310 + }, + { + "epoch": 0.24377637130801688, + "grad_norm": 0.6374934911727905, + "learning_rate": 0.0013029415121213756, + "loss": 1.7555, + "step": 2311 + }, + { + "epoch": 0.2438818565400844, + "grad_norm": 0.5889583826065063, + "learning_rate": 0.0013027718617029842, + "loss": 1.7759, + "step": 2312 + }, + { + "epoch": 0.2439873417721519, + "grad_norm": 0.5736298561096191, + "learning_rate": 0.0013026021493437495, + "loss": 1.8067, + "step": 2313 + }, + { + "epoch": 0.24409282700421941, + "grad_norm": 0.5532177686691284, + "learning_rate": 0.0013024323750626882, + "loss": 1.7633, + "step": 2314 + }, + { + "epoch": 0.2441983122362869, + "grad_norm": 0.5916410684585571, + "learning_rate": 0.0013022625388788248, + "loss": 1.748, + "step": 2315 + }, + { + "epoch": 0.24430379746835443, + "grad_norm": 0.7195175290107727, + "learning_rate": 0.0013020926408111903, + "loss": 1.7933, + "step": 2316 + }, + { + "epoch": 0.24440928270042195, + "grad_norm": 0.5698735117912292, + "learning_rate": 0.001301922680878822, + "loss": 1.7089, + "step": 2317 + }, + { + "epoch": 0.24451476793248944, + "grad_norm": 0.558394193649292, + "learning_rate": 0.001301752659100765, + "loss": 1.7456, + "step": 2318 + }, + { + "epoch": 0.24462025316455696, + "grad_norm": 0.6105250120162964, + "learning_rate": 0.001301582575496072, + "loss": 1.7815, + "step": 2319 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.5996942520141602, + "learning_rate": 0.0013014124300838004, + "loss": 1.7113, + "step": 2320 + }, + { + "epoch": 0.24483122362869197, + "grad_norm": 0.5429485440254211, + "learning_rate": 0.0013012422228830165, + "loss": 1.7178, + "step": 2321 + }, + { + "epoch": 0.2449367088607595, + "grad_norm": 0.560434103012085, + "learning_rate": 0.0013010719539127927, + "loss": 1.7666, + "step": 2322 + }, + { + "epoch": 0.24504219409282701, + "grad_norm": 0.5546209216117859, + "learning_rate": 0.001300901623192209, + "loss": 1.7523, + "step": 2323 + }, + { + "epoch": 0.2451476793248945, + "grad_norm": 0.5930539965629578, + "learning_rate": 0.0013007312307403507, + "loss": 1.7542, + "step": 2324 + }, + { + "epoch": 0.24525316455696203, + "grad_norm": 0.5641794800758362, + "learning_rate": 0.0013005607765763122, + "loss": 1.727, + "step": 2325 + }, + { + "epoch": 0.24535864978902955, + "grad_norm": 0.5641219615936279, + "learning_rate": 0.0013003902607191934, + "loss": 1.7609, + "step": 2326 + }, + { + "epoch": 0.24546413502109704, + "grad_norm": 0.667813241481781, + "learning_rate": 0.0013002196831881014, + "loss": 1.7369, + "step": 2327 + }, + { + "epoch": 0.24556962025316456, + "grad_norm": 0.5890228152275085, + "learning_rate": 0.0013000490440021502, + "loss": 1.7377, + "step": 2328 + }, + { + "epoch": 0.24567510548523205, + "grad_norm": 0.5673329830169678, + "learning_rate": 0.0012998783431804608, + "loss": 1.7448, + "step": 2329 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.6669266223907471, + "learning_rate": 0.0012997075807421612, + "loss": 1.7197, + "step": 2330 + }, + { + "epoch": 0.2458860759493671, + "grad_norm": 0.5925434827804565, + "learning_rate": 0.0012995367567063861, + "loss": 1.7125, + "step": 2331 + }, + { + "epoch": 0.2459915611814346, + "grad_norm": 0.7966897487640381, + "learning_rate": 0.001299365871092277, + "loss": 1.7782, + "step": 2332 + }, + { + "epoch": 0.2460970464135021, + "grad_norm": 0.5806139707565308, + "learning_rate": 0.0012991949239189826, + "loss": 1.7546, + "step": 2333 + }, + { + "epoch": 0.24620253164556963, + "grad_norm": 0.6630098223686218, + "learning_rate": 0.0012990239152056587, + "loss": 1.77, + "step": 2334 + }, + { + "epoch": 0.24630801687763712, + "grad_norm": 0.7820857763290405, + "learning_rate": 0.0012988528449714672, + "loss": 1.7665, + "step": 2335 + }, + { + "epoch": 0.24641350210970464, + "grad_norm": 0.6739190220832825, + "learning_rate": 0.001298681713235578, + "loss": 1.7589, + "step": 2336 + }, + { + "epoch": 0.24651898734177216, + "grad_norm": 0.6242907047271729, + "learning_rate": 0.0012985105200171664, + "loss": 1.6933, + "step": 2337 + }, + { + "epoch": 0.24662447257383965, + "grad_norm": 0.791114091873169, + "learning_rate": 0.001298339265335416, + "loss": 1.7304, + "step": 2338 + }, + { + "epoch": 0.24672995780590717, + "grad_norm": 0.6972154974937439, + "learning_rate": 0.0012981679492095166, + "loss": 1.7476, + "step": 2339 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.6887376308441162, + "learning_rate": 0.0012979965716586653, + "loss": 1.7394, + "step": 2340 + }, + { + "epoch": 0.2469409282700422, + "grad_norm": 0.7566161751747131, + "learning_rate": 0.0012978251327020655, + "loss": 1.7549, + "step": 2341 + }, + { + "epoch": 0.2470464135021097, + "grad_norm": 0.7807169556617737, + "learning_rate": 0.0012976536323589278, + "loss": 1.7494, + "step": 2342 + }, + { + "epoch": 0.24715189873417723, + "grad_norm": 0.7775095701217651, + "learning_rate": 0.0012974820706484697, + "loss": 1.7302, + "step": 2343 + }, + { + "epoch": 0.24725738396624472, + "grad_norm": 0.8390941619873047, + "learning_rate": 0.001297310447589916, + "loss": 1.7561, + "step": 2344 + }, + { + "epoch": 0.24736286919831224, + "grad_norm": 1.0669060945510864, + "learning_rate": 0.0012971387632024968, + "loss": 1.747, + "step": 2345 + }, + { + "epoch": 0.24746835443037973, + "grad_norm": 0.7156290411949158, + "learning_rate": 0.0012969670175054515, + "loss": 1.7592, + "step": 2346 + }, + { + "epoch": 0.24757383966244725, + "grad_norm": 1.0936137437820435, + "learning_rate": 0.0012967952105180243, + "loss": 1.7212, + "step": 2347 + }, + { + "epoch": 0.24767932489451477, + "grad_norm": 0.5477805137634277, + "learning_rate": 0.001296623342259467, + "loss": 1.7235, + "step": 2348 + }, + { + "epoch": 0.24778481012658227, + "grad_norm": 1.2113826274871826, + "learning_rate": 0.0012964514127490388, + "loss": 1.7252, + "step": 2349 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.6215565204620361, + "learning_rate": 0.0012962794220060048, + "loss": 1.7633, + "step": 2350 + }, + { + "epoch": 0.2479957805907173, + "grad_norm": 0.9567737579345703, + "learning_rate": 0.0012961073700496378, + "loss": 1.7491, + "step": 2351 + }, + { + "epoch": 0.2481012658227848, + "grad_norm": 0.858181893825531, + "learning_rate": 0.0012959352568992163, + "loss": 1.7707, + "step": 2352 + }, + { + "epoch": 0.24820675105485232, + "grad_norm": 0.7112249135971069, + "learning_rate": 0.0012957630825740274, + "loss": 1.7306, + "step": 2353 + }, + { + "epoch": 0.24831223628691984, + "grad_norm": 0.9804508090019226, + "learning_rate": 0.0012955908470933637, + "loss": 1.7697, + "step": 2354 + }, + { + "epoch": 0.24841772151898733, + "grad_norm": 0.6389239430427551, + "learning_rate": 0.0012954185504765248, + "loss": 1.7672, + "step": 2355 + }, + { + "epoch": 0.24852320675105485, + "grad_norm": 1.1188690662384033, + "learning_rate": 0.0012952461927428177, + "loss": 1.7462, + "step": 2356 + }, + { + "epoch": 0.24862869198312237, + "grad_norm": 0.7357596158981323, + "learning_rate": 0.001295073773911556, + "loss": 1.7525, + "step": 2357 + }, + { + "epoch": 0.24873417721518987, + "grad_norm": 0.8397043943405151, + "learning_rate": 0.0012949012940020599, + "loss": 1.7784, + "step": 2358 + }, + { + "epoch": 0.2488396624472574, + "grad_norm": 0.705858588218689, + "learning_rate": 0.0012947287530336565, + "loss": 1.7338, + "step": 2359 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.6874955892562866, + "learning_rate": 0.0012945561510256801, + "loss": 1.7332, + "step": 2360 + }, + { + "epoch": 0.2490506329113924, + "grad_norm": 0.6092076301574707, + "learning_rate": 0.0012943834879974717, + "loss": 1.7361, + "step": 2361 + }, + { + "epoch": 0.24915611814345992, + "grad_norm": 0.6832860112190247, + "learning_rate": 0.001294210763968379, + "loss": 1.7526, + "step": 2362 + }, + { + "epoch": 0.2492616033755274, + "grad_norm": 0.5346623659133911, + "learning_rate": 0.0012940379789577565, + "loss": 1.7594, + "step": 2363 + }, + { + "epoch": 0.24936708860759493, + "grad_norm": 0.6140515804290771, + "learning_rate": 0.0012938651329849654, + "loss": 1.7338, + "step": 2364 + }, + { + "epoch": 0.24947257383966245, + "grad_norm": 0.6765629053115845, + "learning_rate": 0.0012936922260693743, + "loss": 1.7209, + "step": 2365 + }, + { + "epoch": 0.24957805907172995, + "grad_norm": 0.5558510422706604, + "learning_rate": 0.0012935192582303582, + "loss": 1.7425, + "step": 2366 + }, + { + "epoch": 0.24968354430379747, + "grad_norm": 0.7025548815727234, + "learning_rate": 0.001293346229487299, + "loss": 1.7731, + "step": 2367 + }, + { + "epoch": 0.249789029535865, + "grad_norm": 0.6723126769065857, + "learning_rate": 0.0012931731398595854, + "loss": 1.7628, + "step": 2368 + }, + { + "epoch": 0.24989451476793248, + "grad_norm": 0.7485144138336182, + "learning_rate": 0.001292999989366613, + "loss": 1.7504, + "step": 2369 + }, + { + "epoch": 0.25, + "grad_norm": 0.5041139721870422, + "learning_rate": 0.001292826778027784, + "loss": 1.7509, + "step": 2370 + }, + { + "epoch": 0.2501054852320675, + "grad_norm": 0.7684192657470703, + "learning_rate": 0.001292653505862508, + "loss": 1.747, + "step": 2371 + }, + { + "epoch": 0.25021097046413504, + "grad_norm": 0.5460065603256226, + "learning_rate": 0.0012924801728902006, + "loss": 1.7418, + "step": 2372 + }, + { + "epoch": 0.25031645569620253, + "grad_norm": 0.8090450763702393, + "learning_rate": 0.0012923067791302848, + "loss": 1.7442, + "step": 2373 + }, + { + "epoch": 0.25042194092827, + "grad_norm": 0.5593411922454834, + "learning_rate": 0.0012921333246021904, + "loss": 1.7413, + "step": 2374 + }, + { + "epoch": 0.2505274261603376, + "grad_norm": 0.7686710953712463, + "learning_rate": 0.0012919598093253533, + "loss": 1.7334, + "step": 2375 + }, + { + "epoch": 0.25063291139240507, + "grad_norm": 0.676001250743866, + "learning_rate": 0.0012917862333192173, + "loss": 1.7915, + "step": 2376 + }, + { + "epoch": 0.25073839662447256, + "grad_norm": 0.7769465446472168, + "learning_rate": 0.0012916125966032322, + "loss": 1.7147, + "step": 2377 + }, + { + "epoch": 0.2508438818565401, + "grad_norm": 0.7561560273170471, + "learning_rate": 0.001291438899196855, + "loss": 1.742, + "step": 2378 + }, + { + "epoch": 0.2509493670886076, + "grad_norm": 0.6167594194412231, + "learning_rate": 0.0012912651411195494, + "loss": 1.7773, + "step": 2379 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.8426740169525146, + "learning_rate": 0.0012910913223907856, + "loss": 1.7338, + "step": 2380 + }, + { + "epoch": 0.25116033755274264, + "grad_norm": 0.764397919178009, + "learning_rate": 0.0012909174430300412, + "loss": 1.7735, + "step": 2381 + }, + { + "epoch": 0.25126582278481013, + "grad_norm": 0.6865614652633667, + "learning_rate": 0.0012907435030567996, + "loss": 1.74, + "step": 2382 + }, + { + "epoch": 0.2513713080168776, + "grad_norm": 0.5825433135032654, + "learning_rate": 0.0012905695024905525, + "loss": 1.7526, + "step": 2383 + }, + { + "epoch": 0.2514767932489452, + "grad_norm": 0.6197133660316467, + "learning_rate": 0.0012903954413507968, + "loss": 1.7114, + "step": 2384 + }, + { + "epoch": 0.25158227848101267, + "grad_norm": 0.6233225464820862, + "learning_rate": 0.0012902213196570376, + "loss": 1.7539, + "step": 2385 + }, + { + "epoch": 0.25168776371308016, + "grad_norm": 0.6469907164573669, + "learning_rate": 0.0012900471374287855, + "loss": 1.7532, + "step": 2386 + }, + { + "epoch": 0.25179324894514765, + "grad_norm": 0.6473496556282043, + "learning_rate": 0.0012898728946855588, + "loss": 1.8114, + "step": 2387 + }, + { + "epoch": 0.2518987341772152, + "grad_norm": 0.5647655725479126, + "learning_rate": 0.001289698591446882, + "loss": 1.7144, + "step": 2388 + }, + { + "epoch": 0.2520042194092827, + "grad_norm": 0.7183703780174255, + "learning_rate": 0.0012895242277322872, + "loss": 1.7194, + "step": 2389 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.6140972971916199, + "learning_rate": 0.0012893498035613123, + "loss": 1.7439, + "step": 2390 + }, + { + "epoch": 0.25221518987341773, + "grad_norm": 0.580516517162323, + "learning_rate": 0.0012891753189535023, + "loss": 1.7215, + "step": 2391 + }, + { + "epoch": 0.2523206751054852, + "grad_norm": 0.80419921875, + "learning_rate": 0.0012890007739284092, + "loss": 1.7243, + "step": 2392 + }, + { + "epoch": 0.2524261603375527, + "grad_norm": 0.8801421523094177, + "learning_rate": 0.001288826168505592, + "loss": 1.7347, + "step": 2393 + }, + { + "epoch": 0.25253164556962027, + "grad_norm": 0.5951151251792908, + "learning_rate": 0.0012886515027046156, + "loss": 1.7475, + "step": 2394 + }, + { + "epoch": 0.25263713080168776, + "grad_norm": 0.6517694592475891, + "learning_rate": 0.0012884767765450524, + "loss": 1.7, + "step": 2395 + }, + { + "epoch": 0.25274261603375525, + "grad_norm": 0.7636370658874512, + "learning_rate": 0.0012883019900464814, + "loss": 1.7645, + "step": 2396 + }, + { + "epoch": 0.2528481012658228, + "grad_norm": 0.5424375534057617, + "learning_rate": 0.001288127143228488, + "loss": 1.8053, + "step": 2397 + }, + { + "epoch": 0.2529535864978903, + "grad_norm": 0.7212592959403992, + "learning_rate": 0.0012879522361106646, + "loss": 1.7789, + "step": 2398 + }, + { + "epoch": 0.2530590717299578, + "grad_norm": 0.618686318397522, + "learning_rate": 0.0012877772687126111, + "loss": 1.7366, + "step": 2399 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.5393285155296326, + "learning_rate": 0.001287602241053933, + "loss": 1.7647, + "step": 2400 + }, + { + "epoch": 0.2532700421940928, + "grad_norm": 0.725698709487915, + "learning_rate": 0.001287427153154243, + "loss": 1.7512, + "step": 2401 + }, + { + "epoch": 0.2533755274261603, + "grad_norm": 0.7028099298477173, + "learning_rate": 0.0012872520050331608, + "loss": 1.7578, + "step": 2402 + }, + { + "epoch": 0.25348101265822787, + "grad_norm": 0.5879082083702087, + "learning_rate": 0.0012870767967103122, + "loss": 1.768, + "step": 2403 + }, + { + "epoch": 0.25358649789029536, + "grad_norm": 0.6757950782775879, + "learning_rate": 0.0012869015282053304, + "loss": 1.7118, + "step": 2404 + }, + { + "epoch": 0.25369198312236285, + "grad_norm": 0.5491636395454407, + "learning_rate": 0.0012867261995378554, + "loss": 1.7741, + "step": 2405 + }, + { + "epoch": 0.2537974683544304, + "grad_norm": 0.6510275602340698, + "learning_rate": 0.001286550810727533, + "loss": 1.7323, + "step": 2406 + }, + { + "epoch": 0.2539029535864979, + "grad_norm": 0.695721447467804, + "learning_rate": 0.0012863753617940172, + "loss": 1.7763, + "step": 2407 + }, + { + "epoch": 0.2540084388185654, + "grad_norm": 0.5528689622879028, + "learning_rate": 0.001286199852756967, + "loss": 1.7288, + "step": 2408 + }, + { + "epoch": 0.25411392405063293, + "grad_norm": 0.6072112321853638, + "learning_rate": 0.0012860242836360502, + "loss": 1.7107, + "step": 2409 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.5408107042312622, + "learning_rate": 0.0012858486544509392, + "loss": 1.7713, + "step": 2410 + }, + { + "epoch": 0.2543248945147679, + "grad_norm": 0.6378675103187561, + "learning_rate": 0.0012856729652213144, + "loss": 1.7878, + "step": 2411 + }, + { + "epoch": 0.25443037974683547, + "grad_norm": 0.6597774028778076, + "learning_rate": 0.001285497215966863, + "loss": 1.7357, + "step": 2412 + }, + { + "epoch": 0.25453586497890296, + "grad_norm": 0.5503777265548706, + "learning_rate": 0.0012853214067072782, + "loss": 1.7528, + "step": 2413 + }, + { + "epoch": 0.25464135021097045, + "grad_norm": 0.6668869256973267, + "learning_rate": 0.0012851455374622604, + "loss": 1.7163, + "step": 2414 + }, + { + "epoch": 0.254746835443038, + "grad_norm": 0.8135492205619812, + "learning_rate": 0.0012849696082515166, + "loss": 1.7342, + "step": 2415 + }, + { + "epoch": 0.2548523206751055, + "grad_norm": 0.7600919008255005, + "learning_rate": 0.0012847936190947605, + "loss": 1.7527, + "step": 2416 + }, + { + "epoch": 0.254957805907173, + "grad_norm": 0.5773012638092041, + "learning_rate": 0.001284617570011713, + "loss": 1.7084, + "step": 2417 + }, + { + "epoch": 0.25506329113924053, + "grad_norm": 0.7831494808197021, + "learning_rate": 0.0012844414610221006, + "loss": 1.754, + "step": 2418 + }, + { + "epoch": 0.255168776371308, + "grad_norm": 0.7106553912162781, + "learning_rate": 0.0012842652921456576, + "loss": 1.7254, + "step": 2419 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.658639669418335, + "learning_rate": 0.0012840890634021249, + "loss": 1.7648, + "step": 2420 + }, + { + "epoch": 0.255379746835443, + "grad_norm": 0.8389819860458374, + "learning_rate": 0.001283912774811249, + "loss": 1.7142, + "step": 2421 + }, + { + "epoch": 0.25548523206751056, + "grad_norm": 0.6785309314727783, + "learning_rate": 0.0012837364263927843, + "loss": 1.8184, + "step": 2422 + }, + { + "epoch": 0.25559071729957805, + "grad_norm": 0.6110810041427612, + "learning_rate": 0.001283560018166492, + "loss": 1.7817, + "step": 2423 + }, + { + "epoch": 0.25569620253164554, + "grad_norm": 0.7451142072677612, + "learning_rate": 0.0012833835501521386, + "loss": 1.7293, + "step": 2424 + }, + { + "epoch": 0.2558016877637131, + "grad_norm": 0.5640731453895569, + "learning_rate": 0.0012832070223694992, + "loss": 1.6999, + "step": 2425 + }, + { + "epoch": 0.2559071729957806, + "grad_norm": 0.6326661109924316, + "learning_rate": 0.0012830304348383538, + "loss": 1.7411, + "step": 2426 + }, + { + "epoch": 0.2560126582278481, + "grad_norm": 0.6476118564605713, + "learning_rate": 0.0012828537875784905, + "loss": 1.7669, + "step": 2427 + }, + { + "epoch": 0.2561181434599156, + "grad_norm": 0.5402170419692993, + "learning_rate": 0.001282677080609703, + "loss": 1.7061, + "step": 2428 + }, + { + "epoch": 0.2562236286919831, + "grad_norm": 0.5425247550010681, + "learning_rate": 0.0012825003139517925, + "loss": 1.7214, + "step": 2429 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.5703710317611694, + "learning_rate": 0.0012823234876245667, + "loss": 1.7121, + "step": 2430 + }, + { + "epoch": 0.25643459915611816, + "grad_norm": 0.7066762447357178, + "learning_rate": 0.0012821466016478395, + "loss": 1.7608, + "step": 2431 + }, + { + "epoch": 0.25654008438818565, + "grad_norm": 0.5778835415840149, + "learning_rate": 0.0012819696560414323, + "loss": 1.7707, + "step": 2432 + }, + { + "epoch": 0.25664556962025314, + "grad_norm": 0.5349011421203613, + "learning_rate": 0.0012817926508251723, + "loss": 1.7904, + "step": 2433 + }, + { + "epoch": 0.2567510548523207, + "grad_norm": 0.53676438331604, + "learning_rate": 0.0012816155860188938, + "loss": 1.699, + "step": 2434 + }, + { + "epoch": 0.2568565400843882, + "grad_norm": 0.5181719660758972, + "learning_rate": 0.0012814384616424384, + "loss": 1.7176, + "step": 2435 + }, + { + "epoch": 0.2569620253164557, + "grad_norm": 0.6383203864097595, + "learning_rate": 0.0012812612777156533, + "loss": 1.7152, + "step": 2436 + }, + { + "epoch": 0.2570675105485232, + "grad_norm": 0.5444127917289734, + "learning_rate": 0.001281084034258393, + "loss": 1.7502, + "step": 2437 + }, + { + "epoch": 0.2571729957805907, + "grad_norm": 0.5807179808616638, + "learning_rate": 0.0012809067312905182, + "loss": 1.7428, + "step": 2438 + }, + { + "epoch": 0.2572784810126582, + "grad_norm": 0.5968647599220276, + "learning_rate": 0.0012807293688318969, + "loss": 1.7439, + "step": 2439 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.571002721786499, + "learning_rate": 0.0012805519469024035, + "loss": 1.7732, + "step": 2440 + }, + { + "epoch": 0.25748945147679325, + "grad_norm": 0.7465933561325073, + "learning_rate": 0.0012803744655219187, + "loss": 1.75, + "step": 2441 + }, + { + "epoch": 0.25759493670886074, + "grad_norm": 0.6278141736984253, + "learning_rate": 0.0012801969247103306, + "loss": 1.7404, + "step": 2442 + }, + { + "epoch": 0.2577004219409283, + "grad_norm": 0.5474388599395752, + "learning_rate": 0.001280019324487533, + "loss": 1.7402, + "step": 2443 + }, + { + "epoch": 0.2578059071729958, + "grad_norm": 0.5658200979232788, + "learning_rate": 0.0012798416648734272, + "loss": 1.7289, + "step": 2444 + }, + { + "epoch": 0.2579113924050633, + "grad_norm": 0.5908535718917847, + "learning_rate": 0.001279663945887921, + "loss": 1.781, + "step": 2445 + }, + { + "epoch": 0.2580168776371308, + "grad_norm": 0.6151873469352722, + "learning_rate": 0.0012794861675509285, + "loss": 1.7094, + "step": 2446 + }, + { + "epoch": 0.2581223628691983, + "grad_norm": 0.6701158285140991, + "learning_rate": 0.0012793083298823708, + "loss": 1.7322, + "step": 2447 + }, + { + "epoch": 0.2582278481012658, + "grad_norm": 0.5395596027374268, + "learning_rate": 0.0012791304329021751, + "loss": 1.7402, + "step": 2448 + }, + { + "epoch": 0.25833333333333336, + "grad_norm": 0.6213874816894531, + "learning_rate": 0.001278952476630276, + "loss": 1.7184, + "step": 2449 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6710637807846069, + "learning_rate": 0.0012787744610866143, + "loss": 1.7218, + "step": 2450 + }, + { + "epoch": 0.25854430379746834, + "grad_norm": 0.5998837947845459, + "learning_rate": 0.0012785963862911376, + "loss": 1.7495, + "step": 2451 + }, + { + "epoch": 0.2586497890295359, + "grad_norm": 0.5460300445556641, + "learning_rate": 0.0012784182522637998, + "loss": 1.7737, + "step": 2452 + }, + { + "epoch": 0.2587552742616034, + "grad_norm": 0.5729964375495911, + "learning_rate": 0.001278240059024562, + "loss": 1.7322, + "step": 2453 + }, + { + "epoch": 0.2588607594936709, + "grad_norm": 0.5952600836753845, + "learning_rate": 0.0012780618065933915, + "loss": 1.7807, + "step": 2454 + }, + { + "epoch": 0.25896624472573837, + "grad_norm": 0.5655389428138733, + "learning_rate": 0.0012778834949902626, + "loss": 1.719, + "step": 2455 + }, + { + "epoch": 0.2590717299578059, + "grad_norm": 0.5265870094299316, + "learning_rate": 0.0012777051242351557, + "loss": 1.7513, + "step": 2456 + }, + { + "epoch": 0.2591772151898734, + "grad_norm": 0.5616098642349243, + "learning_rate": 0.0012775266943480582, + "loss": 1.7422, + "step": 2457 + }, + { + "epoch": 0.2592827004219409, + "grad_norm": 0.5536954998970032, + "learning_rate": 0.0012773482053489642, + "loss": 1.7686, + "step": 2458 + }, + { + "epoch": 0.25938818565400845, + "grad_norm": 0.6399211287498474, + "learning_rate": 0.0012771696572578743, + "loss": 1.7691, + "step": 2459 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.648747444152832, + "learning_rate": 0.0012769910500947954, + "loss": 1.7434, + "step": 2460 + }, + { + "epoch": 0.25959915611814344, + "grad_norm": 0.5762650966644287, + "learning_rate": 0.0012768123838797414, + "loss": 1.6978, + "step": 2461 + }, + { + "epoch": 0.259704641350211, + "grad_norm": 0.672791600227356, + "learning_rate": 0.0012766336586327333, + "loss": 1.7471, + "step": 2462 + }, + { + "epoch": 0.2598101265822785, + "grad_norm": 0.5759983062744141, + "learning_rate": 0.0012764548743737973, + "loss": 1.7458, + "step": 2463 + }, + { + "epoch": 0.25991561181434597, + "grad_norm": 0.5431709885597229, + "learning_rate": 0.001276276031122968, + "loss": 1.7154, + "step": 2464 + }, + { + "epoch": 0.2600210970464135, + "grad_norm": 0.6701442003250122, + "learning_rate": 0.0012760971289002847, + "loss": 1.7681, + "step": 2465 + }, + { + "epoch": 0.260126582278481, + "grad_norm": 0.6193946003913879, + "learning_rate": 0.0012759181677257946, + "loss": 1.7289, + "step": 2466 + }, + { + "epoch": 0.2602320675105485, + "grad_norm": 0.563148558139801, + "learning_rate": 0.0012757391476195517, + "loss": 1.7379, + "step": 2467 + }, + { + "epoch": 0.26033755274261605, + "grad_norm": 0.5797519087791443, + "learning_rate": 0.0012755600686016155, + "loss": 1.7492, + "step": 2468 + }, + { + "epoch": 0.26044303797468354, + "grad_norm": 0.5880351662635803, + "learning_rate": 0.0012753809306920532, + "loss": 1.7645, + "step": 2469 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.6027030348777771, + "learning_rate": 0.0012752017339109376, + "loss": 1.756, + "step": 2470 + }, + { + "epoch": 0.2606540084388186, + "grad_norm": 0.596081018447876, + "learning_rate": 0.0012750224782783492, + "loss": 1.7772, + "step": 2471 + }, + { + "epoch": 0.2607594936708861, + "grad_norm": 0.5931146144866943, + "learning_rate": 0.0012748431638143739, + "loss": 1.729, + "step": 2472 + }, + { + "epoch": 0.26086497890295357, + "grad_norm": 0.5792598128318787, + "learning_rate": 0.0012746637905391048, + "loss": 1.7374, + "step": 2473 + }, + { + "epoch": 0.2609704641350211, + "grad_norm": 0.5488055348396301, + "learning_rate": 0.001274484358472642, + "loss": 1.7378, + "step": 2474 + }, + { + "epoch": 0.2610759493670886, + "grad_norm": 0.6558566689491272, + "learning_rate": 0.0012743048676350911, + "loss": 1.7509, + "step": 2475 + }, + { + "epoch": 0.2611814345991561, + "grad_norm": 0.55729079246521, + "learning_rate": 0.001274125318046566, + "loss": 1.7231, + "step": 2476 + }, + { + "epoch": 0.26128691983122365, + "grad_norm": 1.015706181526184, + "learning_rate": 0.0012739457097271849, + "loss": 1.7588, + "step": 2477 + }, + { + "epoch": 0.26139240506329114, + "grad_norm": 0.926632821559906, + "learning_rate": 0.0012737660426970748, + "loss": 1.7352, + "step": 2478 + }, + { + "epoch": 0.26149789029535864, + "grad_norm": 0.5050556659698486, + "learning_rate": 0.0012735863169763678, + "loss": 1.7444, + "step": 2479 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.8081964254379272, + "learning_rate": 0.0012734065325852029, + "loss": 1.7458, + "step": 2480 + }, + { + "epoch": 0.2617088607594937, + "grad_norm": 0.8106364607810974, + "learning_rate": 0.0012732266895437265, + "loss": 1.7292, + "step": 2481 + }, + { + "epoch": 0.26181434599156117, + "grad_norm": 0.598118007183075, + "learning_rate": 0.00127304678787209, + "loss": 1.7128, + "step": 2482 + }, + { + "epoch": 0.2619198312236287, + "grad_norm": 0.8221193552017212, + "learning_rate": 0.001272866827590453, + "loss": 1.7392, + "step": 2483 + }, + { + "epoch": 0.2620253164556962, + "grad_norm": 0.8188832402229309, + "learning_rate": 0.001272686808718981, + "loss": 1.71, + "step": 2484 + }, + { + "epoch": 0.2621308016877637, + "grad_norm": 0.6156265735626221, + "learning_rate": 0.0012725067312778454, + "loss": 1.7438, + "step": 2485 + }, + { + "epoch": 0.2622362869198312, + "grad_norm": 0.7138093709945679, + "learning_rate": 0.0012723265952872252, + "loss": 1.732, + "step": 2486 + }, + { + "epoch": 0.26234177215189874, + "grad_norm": 0.737231433391571, + "learning_rate": 0.0012721464007673055, + "loss": 1.7268, + "step": 2487 + }, + { + "epoch": 0.26244725738396624, + "grad_norm": 0.557700514793396, + "learning_rate": 0.0012719661477382778, + "loss": 1.7682, + "step": 2488 + }, + { + "epoch": 0.26255274261603373, + "grad_norm": 0.7862690687179565, + "learning_rate": 0.0012717858362203407, + "loss": 1.6985, + "step": 2489 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.8754299283027649, + "learning_rate": 0.0012716054662336987, + "loss": 1.7597, + "step": 2490 + }, + { + "epoch": 0.26276371308016877, + "grad_norm": 0.6016347408294678, + "learning_rate": 0.001271425037798563, + "loss": 1.7236, + "step": 2491 + }, + { + "epoch": 0.26286919831223626, + "grad_norm": 0.6816181540489197, + "learning_rate": 0.0012712445509351518, + "loss": 1.7429, + "step": 2492 + }, + { + "epoch": 0.2629746835443038, + "grad_norm": 0.6761646270751953, + "learning_rate": 0.00127106400566369, + "loss": 1.7747, + "step": 2493 + }, + { + "epoch": 0.2630801687763713, + "grad_norm": 0.5222062468528748, + "learning_rate": 0.0012708834020044076, + "loss": 1.7276, + "step": 2494 + }, + { + "epoch": 0.2631856540084388, + "grad_norm": 0.6389471292495728, + "learning_rate": 0.0012707027399775429, + "loss": 1.7402, + "step": 2495 + }, + { + "epoch": 0.26329113924050634, + "grad_norm": 0.5273581743240356, + "learning_rate": 0.0012705220196033396, + "loss": 1.7268, + "step": 2496 + }, + { + "epoch": 0.26339662447257384, + "grad_norm": 0.5954968929290771, + "learning_rate": 0.0012703412409020484, + "loss": 1.7816, + "step": 2497 + }, + { + "epoch": 0.26350210970464133, + "grad_norm": 0.5678990483283997, + "learning_rate": 0.0012701604038939268, + "loss": 1.727, + "step": 2498 + }, + { + "epoch": 0.2636075949367089, + "grad_norm": 0.5615913271903992, + "learning_rate": 0.0012699795085992379, + "loss": 1.7406, + "step": 2499 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.6197724938392639, + "learning_rate": 0.001269798555038252, + "loss": 1.7128, + "step": 2500 + }, + { + "epoch": 0.26381856540084386, + "grad_norm": 0.7102848291397095, + "learning_rate": 0.0012696175432312465, + "loss": 1.7286, + "step": 2501 + }, + { + "epoch": 0.2639240506329114, + "grad_norm": 0.7603291869163513, + "learning_rate": 0.0012694364731985041, + "loss": 1.7637, + "step": 2502 + }, + { + "epoch": 0.2640295358649789, + "grad_norm": 0.530730664730072, + "learning_rate": 0.0012692553449603148, + "loss": 1.7361, + "step": 2503 + }, + { + "epoch": 0.2641350210970464, + "grad_norm": 0.7350026369094849, + "learning_rate": 0.0012690741585369748, + "loss": 1.7178, + "step": 2504 + }, + { + "epoch": 0.26424050632911394, + "grad_norm": 0.6548504829406738, + "learning_rate": 0.0012688929139487869, + "loss": 1.7638, + "step": 2505 + }, + { + "epoch": 0.26434599156118144, + "grad_norm": 0.6852399706840515, + "learning_rate": 0.0012687116112160607, + "loss": 1.7186, + "step": 2506 + }, + { + "epoch": 0.26445147679324893, + "grad_norm": 0.6187108755111694, + "learning_rate": 0.0012685302503591118, + "loss": 1.7453, + "step": 2507 + }, + { + "epoch": 0.2645569620253165, + "grad_norm": 0.5703083276748657, + "learning_rate": 0.0012683488313982628, + "loss": 1.7234, + "step": 2508 + }, + { + "epoch": 0.26466244725738397, + "grad_norm": 0.6030914783477783, + "learning_rate": 0.0012681673543538427, + "loss": 1.7128, + "step": 2509 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.5388596057891846, + "learning_rate": 0.0012679858192461864, + "loss": 1.763, + "step": 2510 + }, + { + "epoch": 0.264873417721519, + "grad_norm": 0.5690459609031677, + "learning_rate": 0.0012678042260956363, + "loss": 1.7703, + "step": 2511 + }, + { + "epoch": 0.2649789029535865, + "grad_norm": 0.5653604865074158, + "learning_rate": 0.0012676225749225407, + "loss": 1.7488, + "step": 2512 + }, + { + "epoch": 0.265084388185654, + "grad_norm": 0.5231766700744629, + "learning_rate": 0.0012674408657472542, + "loss": 1.7037, + "step": 2513 + }, + { + "epoch": 0.26518987341772154, + "grad_norm": 0.5940752625465393, + "learning_rate": 0.0012672590985901386, + "loss": 1.7272, + "step": 2514 + }, + { + "epoch": 0.26529535864978904, + "grad_norm": 0.5528303980827332, + "learning_rate": 0.001267077273471562, + "loss": 1.7284, + "step": 2515 + }, + { + "epoch": 0.26540084388185653, + "grad_norm": 0.572151243686676, + "learning_rate": 0.0012668953904118984, + "loss": 1.7438, + "step": 2516 + }, + { + "epoch": 0.2655063291139241, + "grad_norm": 0.7021767497062683, + "learning_rate": 0.001266713449431529, + "loss": 1.7485, + "step": 2517 + }, + { + "epoch": 0.26561181434599157, + "grad_norm": 0.7099441289901733, + "learning_rate": 0.0012665314505508406, + "loss": 1.7597, + "step": 2518 + }, + { + "epoch": 0.26571729957805906, + "grad_norm": 0.5061041712760925, + "learning_rate": 0.0012663493937902278, + "loss": 1.7431, + "step": 2519 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.7706206440925598, + "learning_rate": 0.0012661672791700906, + "loss": 1.7448, + "step": 2520 + }, + { + "epoch": 0.2659282700421941, + "grad_norm": 0.7216829657554626, + "learning_rate": 0.001265985106710836, + "loss": 1.7284, + "step": 2521 + }, + { + "epoch": 0.2660337552742616, + "grad_norm": 0.6715671420097351, + "learning_rate": 0.0012658028764328771, + "loss": 1.7334, + "step": 2522 + }, + { + "epoch": 0.2661392405063291, + "grad_norm": 0.7200184464454651, + "learning_rate": 0.0012656205883566339, + "loss": 1.7577, + "step": 2523 + }, + { + "epoch": 0.26624472573839664, + "grad_norm": 0.6408863067626953, + "learning_rate": 0.0012654382425025328, + "loss": 1.6885, + "step": 2524 + }, + { + "epoch": 0.26635021097046413, + "grad_norm": 0.6053536534309387, + "learning_rate": 0.0012652558388910062, + "loss": 1.7011, + "step": 2525 + }, + { + "epoch": 0.2664556962025316, + "grad_norm": 0.7200090885162354, + "learning_rate": 0.0012650733775424938, + "loss": 1.7207, + "step": 2526 + }, + { + "epoch": 0.26656118143459917, + "grad_norm": 0.5442737340927124, + "learning_rate": 0.001264890858477441, + "loss": 1.7272, + "step": 2527 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.662484884262085, + "learning_rate": 0.0012647082817162998, + "loss": 1.7345, + "step": 2528 + }, + { + "epoch": 0.26677215189873416, + "grad_norm": 0.557899534702301, + "learning_rate": 0.0012645256472795295, + "loss": 1.8138, + "step": 2529 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.8287085294723511, + "learning_rate": 0.0012643429551875945, + "loss": 1.7261, + "step": 2530 + }, + { + "epoch": 0.2669831223628692, + "grad_norm": 0.7547269463539124, + "learning_rate": 0.0012641602054609662, + "loss": 1.7279, + "step": 2531 + }, + { + "epoch": 0.2670886075949367, + "grad_norm": 0.6937845945358276, + "learning_rate": 0.0012639773981201238, + "loss": 1.738, + "step": 2532 + }, + { + "epoch": 0.26719409282700424, + "grad_norm": 1.2210432291030884, + "learning_rate": 0.0012637945331855506, + "loss": 1.6828, + "step": 2533 + }, + { + "epoch": 0.26729957805907173, + "grad_norm": 0.6483930349349976, + "learning_rate": 0.0012636116106777382, + "loss": 1.7225, + "step": 2534 + }, + { + "epoch": 0.2674050632911392, + "grad_norm": 0.790865421295166, + "learning_rate": 0.0012634286306171835, + "loss": 1.7819, + "step": 2535 + }, + { + "epoch": 0.26751054852320677, + "grad_norm": 0.6825897097587585, + "learning_rate": 0.0012632455930243907, + "loss": 1.7452, + "step": 2536 + }, + { + "epoch": 0.26761603375527426, + "grad_norm": 0.6941238641738892, + "learning_rate": 0.0012630624979198697, + "loss": 1.7342, + "step": 2537 + }, + { + "epoch": 0.26772151898734176, + "grad_norm": 0.6861960887908936, + "learning_rate": 0.0012628793453241377, + "loss": 1.7844, + "step": 2538 + }, + { + "epoch": 0.2678270042194093, + "grad_norm": 0.723371684551239, + "learning_rate": 0.0012626961352577174, + "loss": 1.7499, + "step": 2539 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.6026545166969299, + "learning_rate": 0.0012625128677411388, + "loss": 1.7287, + "step": 2540 + }, + { + "epoch": 0.2680379746835443, + "grad_norm": 0.8114922046661377, + "learning_rate": 0.0012623295427949377, + "loss": 1.7268, + "step": 2541 + }, + { + "epoch": 0.26814345991561184, + "grad_norm": 0.8287862539291382, + "learning_rate": 0.0012621461604396566, + "loss": 1.7525, + "step": 2542 + }, + { + "epoch": 0.26824894514767933, + "grad_norm": 0.5562458634376526, + "learning_rate": 0.0012619627206958445, + "loss": 1.7226, + "step": 2543 + }, + { + "epoch": 0.2683544303797468, + "grad_norm": 1.0646401643753052, + "learning_rate": 0.0012617792235840564, + "loss": 1.7074, + "step": 2544 + }, + { + "epoch": 0.26845991561181437, + "grad_norm": 0.6832635998725891, + "learning_rate": 0.0012615956691248544, + "loss": 1.7421, + "step": 2545 + }, + { + "epoch": 0.26856540084388186, + "grad_norm": 0.7653653025627136, + "learning_rate": 0.001261412057338807, + "loss": 1.7335, + "step": 2546 + }, + { + "epoch": 0.26867088607594936, + "grad_norm": 0.9339815974235535, + "learning_rate": 0.0012612283882464882, + "loss": 1.7223, + "step": 2547 + }, + { + "epoch": 0.2687763713080169, + "grad_norm": 0.818374514579773, + "learning_rate": 0.0012610446618684793, + "loss": 1.7364, + "step": 2548 + }, + { + "epoch": 0.2688818565400844, + "grad_norm": 0.6379709839820862, + "learning_rate": 0.0012608608782253676, + "loss": 1.7397, + "step": 2549 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.7682555913925171, + "learning_rate": 0.0012606770373377475, + "loss": 1.724, + "step": 2550 + }, + { + "epoch": 0.26909282700421944, + "grad_norm": 0.7050039768218994, + "learning_rate": 0.0012604931392262186, + "loss": 1.7481, + "step": 2551 + }, + { + "epoch": 0.26919831223628693, + "grad_norm": 0.7372577786445618, + "learning_rate": 0.001260309183911388, + "loss": 1.7614, + "step": 2552 + }, + { + "epoch": 0.2693037974683544, + "grad_norm": 0.863684356212616, + "learning_rate": 0.0012601251714138683, + "loss": 1.7755, + "step": 2553 + }, + { + "epoch": 0.2694092827004219, + "grad_norm": 0.7167185544967651, + "learning_rate": 0.0012599411017542798, + "loss": 1.7298, + "step": 2554 + }, + { + "epoch": 0.26951476793248946, + "grad_norm": 0.63503098487854, + "learning_rate": 0.0012597569749532482, + "loss": 1.7278, + "step": 2555 + }, + { + "epoch": 0.26962025316455696, + "grad_norm": 0.7235109210014343, + "learning_rate": 0.0012595727910314056, + "loss": 1.7462, + "step": 2556 + }, + { + "epoch": 0.26972573839662445, + "grad_norm": 0.7619304060935974, + "learning_rate": 0.0012593885500093906, + "loss": 1.7218, + "step": 2557 + }, + { + "epoch": 0.269831223628692, + "grad_norm": 0.6856017112731934, + "learning_rate": 0.0012592042519078486, + "loss": 1.7473, + "step": 2558 + }, + { + "epoch": 0.2699367088607595, + "grad_norm": 0.8160871267318726, + "learning_rate": 0.0012590198967474312, + "loss": 1.7344, + "step": 2559 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.5946592688560486, + "learning_rate": 0.0012588354845487959, + "loss": 1.6954, + "step": 2560 + }, + { + "epoch": 0.27014767932489453, + "grad_norm": 0.9688990116119385, + "learning_rate": 0.0012586510153326075, + "loss": 1.72, + "step": 2561 + }, + { + "epoch": 0.270253164556962, + "grad_norm": 0.9400184154510498, + "learning_rate": 0.0012584664891195365, + "loss": 1.7588, + "step": 2562 + }, + { + "epoch": 0.2703586497890295, + "grad_norm": 0.6112096905708313, + "learning_rate": 0.0012582819059302598, + "loss": 1.7711, + "step": 2563 + }, + { + "epoch": 0.27046413502109706, + "grad_norm": 0.6998224258422852, + "learning_rate": 0.001258097265785461, + "loss": 1.7219, + "step": 2564 + }, + { + "epoch": 0.27056962025316456, + "grad_norm": 0.6095157265663147, + "learning_rate": 0.0012579125687058302, + "loss": 1.7012, + "step": 2565 + }, + { + "epoch": 0.27067510548523205, + "grad_norm": 0.5667386651039124, + "learning_rate": 0.0012577278147120632, + "loss": 1.7108, + "step": 2566 + }, + { + "epoch": 0.2707805907172996, + "grad_norm": 0.586430013179779, + "learning_rate": 0.0012575430038248628, + "loss": 1.7561, + "step": 2567 + }, + { + "epoch": 0.2708860759493671, + "grad_norm": 0.5470792651176453, + "learning_rate": 0.001257358136064938, + "loss": 1.7428, + "step": 2568 + }, + { + "epoch": 0.2709915611814346, + "grad_norm": 0.5370815992355347, + "learning_rate": 0.001257173211453004, + "loss": 1.6919, + "step": 2569 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.5906941294670105, + "learning_rate": 0.001256988230009783, + "loss": 1.7639, + "step": 2570 + }, + { + "epoch": 0.2712025316455696, + "grad_norm": 0.6485133767127991, + "learning_rate": 0.0012568031917560027, + "loss": 1.7022, + "step": 2571 + }, + { + "epoch": 0.2713080168776371, + "grad_norm": 0.6480128765106201, + "learning_rate": 0.0012566180967123976, + "loss": 1.7359, + "step": 2572 + }, + { + "epoch": 0.27141350210970466, + "grad_norm": 0.6019198894500732, + "learning_rate": 0.0012564329448997082, + "loss": 1.7292, + "step": 2573 + }, + { + "epoch": 0.27151898734177216, + "grad_norm": 0.6204916834831238, + "learning_rate": 0.0012562477363386821, + "loss": 1.728, + "step": 2574 + }, + { + "epoch": 0.27162447257383965, + "grad_norm": 0.672171413898468, + "learning_rate": 0.0012560624710500731, + "loss": 1.7252, + "step": 2575 + }, + { + "epoch": 0.2717299578059072, + "grad_norm": 0.5547239780426025, + "learning_rate": 0.0012558771490546407, + "loss": 1.7002, + "step": 2576 + }, + { + "epoch": 0.2718354430379747, + "grad_norm": 0.6673055291175842, + "learning_rate": 0.0012556917703731509, + "loss": 1.749, + "step": 2577 + }, + { + "epoch": 0.2719409282700422, + "grad_norm": 0.6782718300819397, + "learning_rate": 0.0012555063350263768, + "loss": 1.7893, + "step": 2578 + }, + { + "epoch": 0.27204641350210973, + "grad_norm": 0.5968663096427917, + "learning_rate": 0.0012553208430350973, + "loss": 1.7322, + "step": 2579 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.6068095564842224, + "learning_rate": 0.0012551352944200976, + "loss": 1.7317, + "step": 2580 + }, + { + "epoch": 0.2722573839662447, + "grad_norm": 0.6757047772407532, + "learning_rate": 0.0012549496892021693, + "loss": 1.7081, + "step": 2581 + }, + { + "epoch": 0.27236286919831226, + "grad_norm": 0.676723301410675, + "learning_rate": 0.0012547640274021103, + "loss": 1.7525, + "step": 2582 + }, + { + "epoch": 0.27246835443037976, + "grad_norm": 0.559741199016571, + "learning_rate": 0.001254578309040725, + "loss": 1.7395, + "step": 2583 + }, + { + "epoch": 0.27257383966244725, + "grad_norm": 0.6946984529495239, + "learning_rate": 0.001254392534138824, + "loss": 1.7309, + "step": 2584 + }, + { + "epoch": 0.27267932489451474, + "grad_norm": 0.6254798769950867, + "learning_rate": 0.0012542067027172248, + "loss": 1.7567, + "step": 2585 + }, + { + "epoch": 0.2727848101265823, + "grad_norm": 0.605078399181366, + "learning_rate": 0.0012540208147967503, + "loss": 1.7152, + "step": 2586 + }, + { + "epoch": 0.2728902953586498, + "grad_norm": 0.6391255855560303, + "learning_rate": 0.00125383487039823, + "loss": 1.7507, + "step": 2587 + }, + { + "epoch": 0.2729957805907173, + "grad_norm": 0.6900272965431213, + "learning_rate": 0.0012536488695425003, + "loss": 1.7548, + "step": 2588 + }, + { + "epoch": 0.2731012658227848, + "grad_norm": 0.5551100373268127, + "learning_rate": 0.0012534628122504031, + "loss": 1.7267, + "step": 2589 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6530448198318481, + "learning_rate": 0.0012532766985427874, + "loss": 1.741, + "step": 2590 + }, + { + "epoch": 0.2733122362869198, + "grad_norm": 0.7493420839309692, + "learning_rate": 0.0012530905284405083, + "loss": 1.7315, + "step": 2591 + }, + { + "epoch": 0.27341772151898736, + "grad_norm": 0.5437103509902954, + "learning_rate": 0.0012529043019644266, + "loss": 1.755, + "step": 2592 + }, + { + "epoch": 0.27352320675105485, + "grad_norm": 0.8595595955848694, + "learning_rate": 0.0012527180191354104, + "loss": 1.745, + "step": 2593 + }, + { + "epoch": 0.27362869198312234, + "grad_norm": 0.7747331857681274, + "learning_rate": 0.0012525316799743332, + "loss": 1.743, + "step": 2594 + }, + { + "epoch": 0.2737341772151899, + "grad_norm": 0.6357258558273315, + "learning_rate": 0.0012523452845020755, + "loss": 1.7631, + "step": 2595 + }, + { + "epoch": 0.2738396624472574, + "grad_norm": 0.8270261883735657, + "learning_rate": 0.0012521588327395236, + "loss": 1.7375, + "step": 2596 + }, + { + "epoch": 0.2739451476793249, + "grad_norm": 0.5672783255577087, + "learning_rate": 0.0012519723247075706, + "loss": 1.7265, + "step": 2597 + }, + { + "epoch": 0.2740506329113924, + "grad_norm": 0.6641061305999756, + "learning_rate": 0.0012517857604271156, + "loss": 1.7087, + "step": 2598 + }, + { + "epoch": 0.2741561181434599, + "grad_norm": 0.6945777535438538, + "learning_rate": 0.001251599139919064, + "loss": 1.7146, + "step": 2599 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.5396037101745605, + "learning_rate": 0.0012514124632043272, + "loss": 1.7573, + "step": 2600 + }, + { + "epoch": 0.27436708860759496, + "grad_norm": 0.7621541023254395, + "learning_rate": 0.001251225730303824, + "loss": 1.7228, + "step": 2601 + }, + { + "epoch": 0.27447257383966245, + "grad_norm": 0.9060667753219604, + "learning_rate": 0.0012510389412384785, + "loss": 1.7301, + "step": 2602 + }, + { + "epoch": 0.27457805907172994, + "grad_norm": 0.6725060939788818, + "learning_rate": 0.001250852096029221, + "loss": 1.6728, + "step": 2603 + }, + { + "epoch": 0.2746835443037975, + "grad_norm": 0.568886935710907, + "learning_rate": 0.0012506651946969888, + "loss": 1.7447, + "step": 2604 + }, + { + "epoch": 0.274789029535865, + "grad_norm": 0.6186895370483398, + "learning_rate": 0.0012504782372627248, + "loss": 1.7676, + "step": 2605 + }, + { + "epoch": 0.2748945147679325, + "grad_norm": 0.5712254643440247, + "learning_rate": 0.0012502912237473789, + "loss": 1.7556, + "step": 2606 + }, + { + "epoch": 0.275, + "grad_norm": 0.5588326454162598, + "learning_rate": 0.0012501041541719067, + "loss": 1.7465, + "step": 2607 + }, + { + "epoch": 0.2751054852320675, + "grad_norm": 0.6416376829147339, + "learning_rate": 0.0012499170285572702, + "loss": 1.7358, + "step": 2608 + }, + { + "epoch": 0.275210970464135, + "grad_norm": 0.6156525015830994, + "learning_rate": 0.0012497298469244377, + "loss": 1.7491, + "step": 2609 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.5822126865386963, + "learning_rate": 0.0012495426092943842, + "loss": 1.7404, + "step": 2610 + }, + { + "epoch": 0.27542194092827005, + "grad_norm": 0.866473913192749, + "learning_rate": 0.0012493553156880904, + "loss": 1.7555, + "step": 2611 + }, + { + "epoch": 0.27552742616033754, + "grad_norm": 0.9900190830230713, + "learning_rate": 0.0012491679661265434, + "loss": 1.7475, + "step": 2612 + }, + { + "epoch": 0.2756329113924051, + "grad_norm": 0.5823668837547302, + "learning_rate": 0.0012489805606307367, + "loss": 1.7695, + "step": 2613 + }, + { + "epoch": 0.2757383966244726, + "grad_norm": 0.8738654255867004, + "learning_rate": 0.00124879309922167, + "loss": 1.733, + "step": 2614 + }, + { + "epoch": 0.2758438818565401, + "grad_norm": 0.8666998147964478, + "learning_rate": 0.0012486055819203494, + "loss": 1.738, + "step": 2615 + }, + { + "epoch": 0.2759493670886076, + "grad_norm": 0.5519676804542542, + "learning_rate": 0.001248418008747787, + "loss": 1.7111, + "step": 2616 + }, + { + "epoch": 0.2760548523206751, + "grad_norm": 0.6217747926712036, + "learning_rate": 0.0012482303797250014, + "loss": 1.7161, + "step": 2617 + }, + { + "epoch": 0.2761603375527426, + "grad_norm": 0.5651679635047913, + "learning_rate": 0.0012480426948730174, + "loss": 1.7021, + "step": 2618 + }, + { + "epoch": 0.2762658227848101, + "grad_norm": 0.6094161868095398, + "learning_rate": 0.001247854954212866, + "loss": 1.7368, + "step": 2619 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.5690747499465942, + "learning_rate": 0.0012476671577655845, + "loss": 1.7096, + "step": 2620 + }, + { + "epoch": 0.27647679324894514, + "grad_norm": 0.6227295398712158, + "learning_rate": 0.001247479305552216, + "loss": 1.7043, + "step": 2621 + }, + { + "epoch": 0.27658227848101263, + "grad_norm": 0.7391974925994873, + "learning_rate": 0.001247291397593811, + "loss": 1.6906, + "step": 2622 + }, + { + "epoch": 0.2766877637130802, + "grad_norm": 0.8030617833137512, + "learning_rate": 0.001247103433911425, + "loss": 1.7072, + "step": 2623 + }, + { + "epoch": 0.2767932489451477, + "grad_norm": 0.711759090423584, + "learning_rate": 0.0012469154145261208, + "loss": 1.753, + "step": 2624 + }, + { + "epoch": 0.27689873417721517, + "grad_norm": 0.9367809891700745, + "learning_rate": 0.0012467273394589664, + "loss": 1.7162, + "step": 2625 + }, + { + "epoch": 0.2770042194092827, + "grad_norm": 0.9177563786506653, + "learning_rate": 0.0012465392087310366, + "loss": 1.7145, + "step": 2626 + }, + { + "epoch": 0.2771097046413502, + "grad_norm": 0.7657164335250854, + "learning_rate": 0.0012463510223634125, + "loss": 1.7067, + "step": 2627 + }, + { + "epoch": 0.2772151898734177, + "grad_norm": 0.8321167230606079, + "learning_rate": 0.0012461627803771812, + "loss": 1.7457, + "step": 2628 + }, + { + "epoch": 0.27732067510548525, + "grad_norm": 0.715015709400177, + "learning_rate": 0.0012459744827934367, + "loss": 1.7151, + "step": 2629 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.809705376625061, + "learning_rate": 0.0012457861296332774, + "loss": 1.7645, + "step": 2630 + }, + { + "epoch": 0.27753164556962023, + "grad_norm": 0.7001917958259583, + "learning_rate": 0.0012455977209178109, + "loss": 1.7404, + "step": 2631 + }, + { + "epoch": 0.2776371308016878, + "grad_norm": 0.8504956960678101, + "learning_rate": 0.0012454092566681482, + "loss": 1.7423, + "step": 2632 + }, + { + "epoch": 0.2777426160337553, + "grad_norm": 0.6344122290611267, + "learning_rate": 0.001245220736905408, + "loss": 1.7051, + "step": 2633 + }, + { + "epoch": 0.27784810126582277, + "grad_norm": 0.88411945104599, + "learning_rate": 0.0012450321616507148, + "loss": 1.7543, + "step": 2634 + }, + { + "epoch": 0.2779535864978903, + "grad_norm": 1.0044407844543457, + "learning_rate": 0.0012448435309251995, + "loss": 1.7863, + "step": 2635 + }, + { + "epoch": 0.2780590717299578, + "grad_norm": 0.5861935615539551, + "learning_rate": 0.001244654844749999, + "loss": 1.7219, + "step": 2636 + }, + { + "epoch": 0.2781645569620253, + "grad_norm": 0.8707360625267029, + "learning_rate": 0.0012444661031462566, + "loss": 1.7607, + "step": 2637 + }, + { + "epoch": 0.27827004219409285, + "grad_norm": 0.8643624186515808, + "learning_rate": 0.0012442773061351216, + "loss": 1.7192, + "step": 2638 + }, + { + "epoch": 0.27837552742616034, + "grad_norm": 0.5943443775177002, + "learning_rate": 0.0012440884537377498, + "loss": 1.7287, + "step": 2639 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.9118046164512634, + "learning_rate": 0.001243899545975303, + "loss": 1.7151, + "step": 2640 + }, + { + "epoch": 0.2785864978902954, + "grad_norm": 0.6871889233589172, + "learning_rate": 0.0012437105828689494, + "loss": 1.7496, + "step": 2641 + }, + { + "epoch": 0.2786919831223629, + "grad_norm": 0.6415829062461853, + "learning_rate": 0.0012435215644398632, + "loss": 1.7002, + "step": 2642 + }, + { + "epoch": 0.27879746835443037, + "grad_norm": 0.6595430970191956, + "learning_rate": 0.0012433324907092243, + "loss": 1.7162, + "step": 2643 + }, + { + "epoch": 0.2789029535864979, + "grad_norm": 0.5914141535758972, + "learning_rate": 0.0012431433616982204, + "loss": 1.6856, + "step": 2644 + }, + { + "epoch": 0.2790084388185654, + "grad_norm": 0.6916948556900024, + "learning_rate": 0.0012429541774280435, + "loss": 1.7596, + "step": 2645 + }, + { + "epoch": 0.2791139240506329, + "grad_norm": 0.5235204100608826, + "learning_rate": 0.0012427649379198932, + "loss": 1.7277, + "step": 2646 + }, + { + "epoch": 0.27921940928270045, + "grad_norm": 0.6909539699554443, + "learning_rate": 0.0012425756431949742, + "loss": 1.7261, + "step": 2647 + }, + { + "epoch": 0.27932489451476794, + "grad_norm": 0.5490891337394714, + "learning_rate": 0.001242386293274498, + "loss": 1.7338, + "step": 2648 + }, + { + "epoch": 0.27943037974683543, + "grad_norm": 0.7136644124984741, + "learning_rate": 0.0012421968881796827, + "loss": 1.729, + "step": 2649 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.5884703993797302, + "learning_rate": 0.0012420074279317515, + "loss": 1.724, + "step": 2650 + }, + { + "epoch": 0.2796413502109705, + "grad_norm": 0.6325852870941162, + "learning_rate": 0.001241817912551935, + "loss": 1.708, + "step": 2651 + }, + { + "epoch": 0.27974683544303797, + "grad_norm": 0.5760251879692078, + "learning_rate": 0.0012416283420614686, + "loss": 1.7357, + "step": 2652 + }, + { + "epoch": 0.27985232067510546, + "grad_norm": 0.6099349856376648, + "learning_rate": 0.0012414387164815953, + "loss": 1.7416, + "step": 2653 + }, + { + "epoch": 0.279957805907173, + "grad_norm": 0.5919824838638306, + "learning_rate": 0.001241249035833563, + "loss": 1.7517, + "step": 2654 + }, + { + "epoch": 0.2800632911392405, + "grad_norm": 0.7045250535011292, + "learning_rate": 0.0012410593001386267, + "loss": 1.7185, + "step": 2655 + }, + { + "epoch": 0.280168776371308, + "grad_norm": 0.5747294425964355, + "learning_rate": 0.0012408695094180474, + "loss": 1.7102, + "step": 2656 + }, + { + "epoch": 0.28027426160337554, + "grad_norm": 0.7181317806243896, + "learning_rate": 0.0012406796636930918, + "loss": 1.7038, + "step": 2657 + }, + { + "epoch": 0.28037974683544303, + "grad_norm": 0.6269156336784363, + "learning_rate": 0.001240489762985033, + "loss": 1.6689, + "step": 2658 + }, + { + "epoch": 0.2804852320675105, + "grad_norm": 0.6096441745758057, + "learning_rate": 0.0012402998073151505, + "loss": 1.7415, + "step": 2659 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.7423406839370728, + "learning_rate": 0.0012401097967047298, + "loss": 1.7169, + "step": 2660 + }, + { + "epoch": 0.28069620253164557, + "grad_norm": 0.576375424861908, + "learning_rate": 0.0012399197311750623, + "loss": 1.7485, + "step": 2661 + }, + { + "epoch": 0.28080168776371306, + "grad_norm": 0.6918196082115173, + "learning_rate": 0.001239729610747446, + "loss": 1.7363, + "step": 2662 + }, + { + "epoch": 0.2809071729957806, + "grad_norm": 0.7320905923843384, + "learning_rate": 0.001239539435443185, + "loss": 1.7084, + "step": 2663 + }, + { + "epoch": 0.2810126582278481, + "grad_norm": 0.6592481732368469, + "learning_rate": 0.001239349205283589, + "loss": 1.7125, + "step": 2664 + }, + { + "epoch": 0.2811181434599156, + "grad_norm": 0.5539427399635315, + "learning_rate": 0.0012391589202899746, + "loss": 1.725, + "step": 2665 + }, + { + "epoch": 0.28122362869198314, + "grad_norm": 0.6806110143661499, + "learning_rate": 0.001238968580483664, + "loss": 1.7277, + "step": 2666 + }, + { + "epoch": 0.28132911392405063, + "grad_norm": 0.6841118931770325, + "learning_rate": 0.0012387781858859857, + "loss": 1.7096, + "step": 2667 + }, + { + "epoch": 0.2814345991561181, + "grad_norm": 0.5275412201881409, + "learning_rate": 0.0012385877365182743, + "loss": 1.7086, + "step": 2668 + }, + { + "epoch": 0.2815400843881857, + "grad_norm": 0.8030583262443542, + "learning_rate": 0.0012383972324018708, + "loss": 1.7275, + "step": 2669 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.6081271767616272, + "learning_rate": 0.001238206673558122, + "loss": 1.7212, + "step": 2670 + }, + { + "epoch": 0.28175105485232066, + "grad_norm": 0.6384410262107849, + "learning_rate": 0.001238016060008381, + "loss": 1.7516, + "step": 2671 + }, + { + "epoch": 0.2818565400843882, + "grad_norm": 0.7683647871017456, + "learning_rate": 0.0012378253917740072, + "loss": 1.7374, + "step": 2672 + }, + { + "epoch": 0.2819620253164557, + "grad_norm": 0.7485621571540833, + "learning_rate": 0.0012376346688763656, + "loss": 1.7721, + "step": 2673 + }, + { + "epoch": 0.2820675105485232, + "grad_norm": 0.5916500091552734, + "learning_rate": 0.0012374438913368277, + "loss": 1.6924, + "step": 2674 + }, + { + "epoch": 0.28217299578059074, + "grad_norm": 0.8345897793769836, + "learning_rate": 0.0012372530591767711, + "loss": 1.7259, + "step": 2675 + }, + { + "epoch": 0.28227848101265823, + "grad_norm": 0.8252478837966919, + "learning_rate": 0.0012370621724175797, + "loss": 1.7293, + "step": 2676 + }, + { + "epoch": 0.2823839662447257, + "grad_norm": 0.5299639105796814, + "learning_rate": 0.0012368712310806432, + "loss": 1.6954, + "step": 2677 + }, + { + "epoch": 0.2824894514767933, + "grad_norm": 0.6513544917106628, + "learning_rate": 0.0012366802351873574, + "loss": 1.7001, + "step": 2678 + }, + { + "epoch": 0.28259493670886077, + "grad_norm": 0.5767882466316223, + "learning_rate": 0.0012364891847591246, + "loss": 1.7253, + "step": 2679 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.7396652102470398, + "learning_rate": 0.0012362980798173526, + "loss": 1.7058, + "step": 2680 + }, + { + "epoch": 0.2828059071729958, + "grad_norm": 0.638664722442627, + "learning_rate": 0.0012361069203834561, + "loss": 1.6932, + "step": 2681 + }, + { + "epoch": 0.2829113924050633, + "grad_norm": 0.6350319385528564, + "learning_rate": 0.0012359157064788548, + "loss": 1.6819, + "step": 2682 + }, + { + "epoch": 0.2830168776371308, + "grad_norm": 0.6343206167221069, + "learning_rate": 0.0012357244381249759, + "loss": 1.7457, + "step": 2683 + }, + { + "epoch": 0.2831223628691983, + "grad_norm": 0.6567674279212952, + "learning_rate": 0.0012355331153432517, + "loss": 1.738, + "step": 2684 + }, + { + "epoch": 0.28322784810126583, + "grad_norm": 0.6531992554664612, + "learning_rate": 0.0012353417381551206, + "loss": 1.6986, + "step": 2685 + }, + { + "epoch": 0.2833333333333333, + "grad_norm": 0.5779063105583191, + "learning_rate": 0.001235150306582028, + "loss": 1.7142, + "step": 2686 + }, + { + "epoch": 0.2834388185654008, + "grad_norm": 0.6852748394012451, + "learning_rate": 0.001234958820645424, + "loss": 1.7352, + "step": 2687 + }, + { + "epoch": 0.28354430379746837, + "grad_norm": 0.6139652729034424, + "learning_rate": 0.0012347672803667662, + "loss": 1.717, + "step": 2688 + }, + { + "epoch": 0.28364978902953586, + "grad_norm": 0.7050642371177673, + "learning_rate": 0.0012345756857675171, + "loss": 1.7294, + "step": 2689 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.5793996453285217, + "learning_rate": 0.0012343840368691462, + "loss": 1.76, + "step": 2690 + }, + { + "epoch": 0.2838607594936709, + "grad_norm": 0.6409081816673279, + "learning_rate": 0.0012341923336931287, + "loss": 1.7422, + "step": 2691 + }, + { + "epoch": 0.2839662447257384, + "grad_norm": 0.6465917229652405, + "learning_rate": 0.0012340005762609457, + "loss": 1.7135, + "step": 2692 + }, + { + "epoch": 0.2840717299578059, + "grad_norm": 0.6393659114837646, + "learning_rate": 0.0012338087645940847, + "loss": 1.7413, + "step": 2693 + }, + { + "epoch": 0.28417721518987343, + "grad_norm": 0.7544477581977844, + "learning_rate": 0.001233616898714039, + "loss": 1.6898, + "step": 2694 + }, + { + "epoch": 0.2842827004219409, + "grad_norm": 0.6965079307556152, + "learning_rate": 0.0012334249786423086, + "loss": 1.7313, + "step": 2695 + }, + { + "epoch": 0.2843881856540084, + "grad_norm": 0.7470909357070923, + "learning_rate": 0.0012332330044003987, + "loss": 1.7603, + "step": 2696 + }, + { + "epoch": 0.28449367088607597, + "grad_norm": 0.7124586701393127, + "learning_rate": 0.0012330409760098208, + "loss": 1.7019, + "step": 2697 + }, + { + "epoch": 0.28459915611814346, + "grad_norm": 0.5635649561882019, + "learning_rate": 0.0012328488934920932, + "loss": 1.7182, + "step": 2698 + }, + { + "epoch": 0.28470464135021095, + "grad_norm": 0.7354134321212769, + "learning_rate": 0.001232656756868739, + "loss": 1.7063, + "step": 2699 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.6921523809432983, + "learning_rate": 0.0012324645661612886, + "loss": 1.6754, + "step": 2700 + }, + { + "epoch": 0.284915611814346, + "grad_norm": 0.8467967510223389, + "learning_rate": 0.001232272321391278, + "loss": 1.7562, + "step": 2701 + }, + { + "epoch": 0.2850210970464135, + "grad_norm": 0.7192379236221313, + "learning_rate": 0.0012320800225802488, + "loss": 1.7394, + "step": 2702 + }, + { + "epoch": 0.28512658227848103, + "grad_norm": 0.6693114638328552, + "learning_rate": 0.001231887669749749, + "loss": 1.7349, + "step": 2703 + }, + { + "epoch": 0.2852320675105485, + "grad_norm": 0.5791452527046204, + "learning_rate": 0.0012316952629213332, + "loss": 1.7064, + "step": 2704 + }, + { + "epoch": 0.285337552742616, + "grad_norm": 0.550238311290741, + "learning_rate": 0.001231502802116561, + "loss": 1.7381, + "step": 2705 + }, + { + "epoch": 0.28544303797468357, + "grad_norm": 0.59062659740448, + "learning_rate": 0.0012313102873569993, + "loss": 1.7248, + "step": 2706 + }, + { + "epoch": 0.28554852320675106, + "grad_norm": 0.6934171915054321, + "learning_rate": 0.0012311177186642194, + "loss": 1.718, + "step": 2707 + }, + { + "epoch": 0.28565400843881855, + "grad_norm": 0.7435616254806519, + "learning_rate": 0.0012309250960598, + "loss": 1.6929, + "step": 2708 + }, + { + "epoch": 0.2857594936708861, + "grad_norm": 0.5905529856681824, + "learning_rate": 0.0012307324195653256, + "loss": 1.7122, + "step": 2709 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.8111931681632996, + "learning_rate": 0.0012305396892023867, + "loss": 1.7252, + "step": 2710 + }, + { + "epoch": 0.2859704641350211, + "grad_norm": 0.6691953539848328, + "learning_rate": 0.0012303469049925791, + "loss": 1.7196, + "step": 2711 + }, + { + "epoch": 0.28607594936708863, + "grad_norm": 0.6048410534858704, + "learning_rate": 0.001230154066957506, + "loss": 1.7593, + "step": 2712 + }, + { + "epoch": 0.2861814345991561, + "grad_norm": 0.6867150068283081, + "learning_rate": 0.001229961175118775, + "loss": 1.7126, + "step": 2713 + }, + { + "epoch": 0.2862869198312236, + "grad_norm": 0.6931400299072266, + "learning_rate": 0.0012297682294980013, + "loss": 1.7492, + "step": 2714 + }, + { + "epoch": 0.28639240506329117, + "grad_norm": 0.5597832202911377, + "learning_rate": 0.0012295752301168048, + "loss": 1.7359, + "step": 2715 + }, + { + "epoch": 0.28649789029535866, + "grad_norm": 0.9058472514152527, + "learning_rate": 0.0012293821769968126, + "loss": 1.7238, + "step": 2716 + }, + { + "epoch": 0.28660337552742615, + "grad_norm": 0.7321832776069641, + "learning_rate": 0.001229189070159657, + "loss": 1.7544, + "step": 2717 + }, + { + "epoch": 0.28670886075949364, + "grad_norm": 0.5921398997306824, + "learning_rate": 0.0012289959096269767, + "loss": 1.6927, + "step": 2718 + }, + { + "epoch": 0.2868143459915612, + "grad_norm": 0.8038333654403687, + "learning_rate": 0.0012288026954204165, + "loss": 1.6873, + "step": 2719 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.6769702434539795, + "learning_rate": 0.0012286094275616264, + "loss": 1.7053, + "step": 2720 + }, + { + "epoch": 0.2870253164556962, + "grad_norm": 0.5416713356971741, + "learning_rate": 0.0012284161060722634, + "loss": 1.704, + "step": 2721 + }, + { + "epoch": 0.2871308016877637, + "grad_norm": 0.7238776683807373, + "learning_rate": 0.00122822273097399, + "loss": 1.7002, + "step": 2722 + }, + { + "epoch": 0.2872362869198312, + "grad_norm": 0.6127561926841736, + "learning_rate": 0.0012280293022884753, + "loss": 1.7203, + "step": 2723 + }, + { + "epoch": 0.2873417721518987, + "grad_norm": 0.588959276676178, + "learning_rate": 0.0012278358200373935, + "loss": 1.6737, + "step": 2724 + }, + { + "epoch": 0.28744725738396626, + "grad_norm": 0.6477018594741821, + "learning_rate": 0.001227642284242425, + "loss": 1.7076, + "step": 2725 + }, + { + "epoch": 0.28755274261603375, + "grad_norm": 0.5753586292266846, + "learning_rate": 0.0012274486949252572, + "loss": 1.7315, + "step": 2726 + }, + { + "epoch": 0.28765822784810124, + "grad_norm": 0.8141101002693176, + "learning_rate": 0.0012272550521075824, + "loss": 1.7097, + "step": 2727 + }, + { + "epoch": 0.2877637130801688, + "grad_norm": 0.8924548029899597, + "learning_rate": 0.0012270613558110993, + "loss": 1.6883, + "step": 2728 + }, + { + "epoch": 0.2878691983122363, + "grad_norm": 0.842298150062561, + "learning_rate": 0.001226867606057512, + "loss": 1.7297, + "step": 2729 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.6130778193473816, + "learning_rate": 0.0012266738028685318, + "loss": 1.7177, + "step": 2730 + }, + { + "epoch": 0.2880801687763713, + "grad_norm": 0.8781532049179077, + "learning_rate": 0.001226479946265875, + "loss": 1.7195, + "step": 2731 + }, + { + "epoch": 0.2881856540084388, + "grad_norm": 0.8783857822418213, + "learning_rate": 0.0012262860362712645, + "loss": 1.7324, + "step": 2732 + }, + { + "epoch": 0.2882911392405063, + "grad_norm": 0.5368445515632629, + "learning_rate": 0.0012260920729064285, + "loss": 1.7155, + "step": 2733 + }, + { + "epoch": 0.28839662447257386, + "grad_norm": 0.7336302995681763, + "learning_rate": 0.0012258980561931016, + "loss": 1.6962, + "step": 2734 + }, + { + "epoch": 0.28850210970464135, + "grad_norm": 0.6271884441375732, + "learning_rate": 0.0012257039861530246, + "loss": 1.6881, + "step": 2735 + }, + { + "epoch": 0.28860759493670884, + "grad_norm": 0.9102656245231628, + "learning_rate": 0.0012255098628079439, + "loss": 1.6925, + "step": 2736 + }, + { + "epoch": 0.2887130801687764, + "grad_norm": 0.8258417248725891, + "learning_rate": 0.0012253156861796119, + "loss": 1.7085, + "step": 2737 + }, + { + "epoch": 0.2888185654008439, + "grad_norm": 0.6079517602920532, + "learning_rate": 0.0012251214562897872, + "loss": 1.7238, + "step": 2738 + }, + { + "epoch": 0.2889240506329114, + "grad_norm": 1.0470213890075684, + "learning_rate": 0.0012249271731602342, + "loss": 1.7519, + "step": 2739 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.8284512162208557, + "learning_rate": 0.001224732836812723, + "loss": 1.7613, + "step": 2740 + }, + { + "epoch": 0.2891350210970464, + "grad_norm": 0.5742484927177429, + "learning_rate": 0.0012245384472690302, + "loss": 1.707, + "step": 2741 + }, + { + "epoch": 0.2892405063291139, + "grad_norm": 0.577004611492157, + "learning_rate": 0.0012243440045509384, + "loss": 1.7348, + "step": 2742 + }, + { + "epoch": 0.28934599156118146, + "grad_norm": 0.5455804467201233, + "learning_rate": 0.0012241495086802356, + "loss": 1.6937, + "step": 2743 + }, + { + "epoch": 0.28945147679324895, + "grad_norm": 0.6207886934280396, + "learning_rate": 0.0012239549596787158, + "loss": 1.726, + "step": 2744 + }, + { + "epoch": 0.28955696202531644, + "grad_norm": 0.5287864208221436, + "learning_rate": 0.0012237603575681797, + "loss": 1.7236, + "step": 2745 + }, + { + "epoch": 0.289662447257384, + "grad_norm": 0.598885715007782, + "learning_rate": 0.0012235657023704327, + "loss": 1.7294, + "step": 2746 + }, + { + "epoch": 0.2897679324894515, + "grad_norm": 0.6594720482826233, + "learning_rate": 0.001223370994107288, + "loss": 1.7071, + "step": 2747 + }, + { + "epoch": 0.289873417721519, + "grad_norm": 0.6018736362457275, + "learning_rate": 0.0012231762328005623, + "loss": 1.7327, + "step": 2748 + }, + { + "epoch": 0.28997890295358647, + "grad_norm": 0.5512080192565918, + "learning_rate": 0.0012229814184720805, + "loss": 1.7369, + "step": 2749 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.5652768611907959, + "learning_rate": 0.0012227865511436724, + "loss": 1.7231, + "step": 2750 + }, + { + "epoch": 0.2901898734177215, + "grad_norm": 0.6021260023117065, + "learning_rate": 0.0012225916308371736, + "loss": 1.7067, + "step": 2751 + }, + { + "epoch": 0.290295358649789, + "grad_norm": 0.6388516426086426, + "learning_rate": 0.001222396657574426, + "loss": 1.7332, + "step": 2752 + }, + { + "epoch": 0.29040084388185655, + "grad_norm": 0.5318418741226196, + "learning_rate": 0.0012222016313772773, + "loss": 1.6978, + "step": 2753 + }, + { + "epoch": 0.29050632911392404, + "grad_norm": 0.6598793864250183, + "learning_rate": 0.0012220065522675811, + "loss": 1.7428, + "step": 2754 + }, + { + "epoch": 0.29061181434599154, + "grad_norm": 0.591850757598877, + "learning_rate": 0.0012218114202671973, + "loss": 1.7578, + "step": 2755 + }, + { + "epoch": 0.2907172995780591, + "grad_norm": 0.5924499034881592, + "learning_rate": 0.001221616235397991, + "loss": 1.7326, + "step": 2756 + }, + { + "epoch": 0.2908227848101266, + "grad_norm": 0.5975692868232727, + "learning_rate": 0.001221420997681834, + "loss": 1.7076, + "step": 2757 + }, + { + "epoch": 0.29092827004219407, + "grad_norm": 0.5817380547523499, + "learning_rate": 0.0012212257071406037, + "loss": 1.7128, + "step": 2758 + }, + { + "epoch": 0.2910337552742616, + "grad_norm": 0.6518471837043762, + "learning_rate": 0.0012210303637961828, + "loss": 1.7834, + "step": 2759 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.6945774555206299, + "learning_rate": 0.001220834967670461, + "loss": 1.7316, + "step": 2760 + }, + { + "epoch": 0.2912447257383966, + "grad_norm": 0.6992396116256714, + "learning_rate": 0.0012206395187853334, + "loss": 1.7349, + "step": 2761 + }, + { + "epoch": 0.29135021097046415, + "grad_norm": 0.6013285517692566, + "learning_rate": 0.0012204440171627005, + "loss": 1.7509, + "step": 2762 + }, + { + "epoch": 0.29145569620253164, + "grad_norm": 0.6783084273338318, + "learning_rate": 0.00122024846282447, + "loss": 1.7538, + "step": 2763 + }, + { + "epoch": 0.29156118143459914, + "grad_norm": 0.5651559829711914, + "learning_rate": 0.0012200528557925543, + "loss": 1.6822, + "step": 2764 + }, + { + "epoch": 0.2916666666666667, + "grad_norm": 0.6512115001678467, + "learning_rate": 0.0012198571960888721, + "loss": 1.7351, + "step": 2765 + }, + { + "epoch": 0.2917721518987342, + "grad_norm": 0.5910741686820984, + "learning_rate": 0.0012196614837353481, + "loss": 1.7084, + "step": 2766 + }, + { + "epoch": 0.29187763713080167, + "grad_norm": 0.6022875308990479, + "learning_rate": 0.001219465718753913, + "loss": 1.7178, + "step": 2767 + }, + { + "epoch": 0.2919831223628692, + "grad_norm": 0.533203661441803, + "learning_rate": 0.0012192699011665034, + "loss": 1.6963, + "step": 2768 + }, + { + "epoch": 0.2920886075949367, + "grad_norm": 0.6333370804786682, + "learning_rate": 0.0012190740309950612, + "loss": 1.7313, + "step": 2769 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.5840874314308167, + "learning_rate": 0.0012188781082615346, + "loss": 1.7411, + "step": 2770 + }, + { + "epoch": 0.29229957805907175, + "grad_norm": 0.541674017906189, + "learning_rate": 0.0012186821329878783, + "loss": 1.7169, + "step": 2771 + }, + { + "epoch": 0.29240506329113924, + "grad_norm": 0.627842128276825, + "learning_rate": 0.0012184861051960517, + "loss": 1.7434, + "step": 2772 + }, + { + "epoch": 0.29251054852320674, + "grad_norm": 0.768291175365448, + "learning_rate": 0.001218290024908021, + "loss": 1.7257, + "step": 2773 + }, + { + "epoch": 0.2926160337552743, + "grad_norm": 0.6435198783874512, + "learning_rate": 0.0012180938921457576, + "loss": 1.7145, + "step": 2774 + }, + { + "epoch": 0.2927215189873418, + "grad_norm": 0.5444157123565674, + "learning_rate": 0.00121789770693124, + "loss": 1.7068, + "step": 2775 + }, + { + "epoch": 0.29282700421940927, + "grad_norm": 0.5754446387290955, + "learning_rate": 0.001217701469286451, + "loss": 1.7322, + "step": 2776 + }, + { + "epoch": 0.2929324894514768, + "grad_norm": 0.5619425177574158, + "learning_rate": 0.00121750517923338, + "loss": 1.7446, + "step": 2777 + }, + { + "epoch": 0.2930379746835443, + "grad_norm": 0.5967603921890259, + "learning_rate": 0.0012173088367940228, + "loss": 1.6966, + "step": 2778 + }, + { + "epoch": 0.2931434599156118, + "grad_norm": 0.5942963361740112, + "learning_rate": 0.0012171124419903799, + "loss": 1.7293, + "step": 2779 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.5893107652664185, + "learning_rate": 0.0012169159948444588, + "loss": 1.7085, + "step": 2780 + }, + { + "epoch": 0.29335443037974684, + "grad_norm": 0.6884446740150452, + "learning_rate": 0.001216719495378272, + "loss": 1.7188, + "step": 2781 + }, + { + "epoch": 0.29345991561181434, + "grad_norm": 0.5998000502586365, + "learning_rate": 0.0012165229436138388, + "loss": 1.7161, + "step": 2782 + }, + { + "epoch": 0.29356540084388183, + "grad_norm": 0.6323588490486145, + "learning_rate": 0.0012163263395731834, + "loss": 1.7063, + "step": 2783 + }, + { + "epoch": 0.2936708860759494, + "grad_norm": 0.6991434097290039, + "learning_rate": 0.0012161296832783363, + "loss": 1.7217, + "step": 2784 + }, + { + "epoch": 0.29377637130801687, + "grad_norm": 0.6474641561508179, + "learning_rate": 0.0012159329747513338, + "loss": 1.7151, + "step": 2785 + }, + { + "epoch": 0.29388185654008436, + "grad_norm": 0.6870236396789551, + "learning_rate": 0.001215736214014218, + "loss": 1.6816, + "step": 2786 + }, + { + "epoch": 0.2939873417721519, + "grad_norm": 0.6113909482955933, + "learning_rate": 0.001215539401089037, + "loss": 1.7015, + "step": 2787 + }, + { + "epoch": 0.2940928270042194, + "grad_norm": 0.7536439299583435, + "learning_rate": 0.0012153425359978452, + "loss": 1.7431, + "step": 2788 + }, + { + "epoch": 0.2941983122362869, + "grad_norm": 0.8295890688896179, + "learning_rate": 0.0012151456187627016, + "loss": 1.7519, + "step": 2789 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.552730143070221, + "learning_rate": 0.001214948649405672, + "loss": 1.7197, + "step": 2790 + }, + { + "epoch": 0.29440928270042194, + "grad_norm": 0.7120258212089539, + "learning_rate": 0.0012147516279488275, + "loss": 1.7386, + "step": 2791 + }, + { + "epoch": 0.29451476793248943, + "grad_norm": 0.631781816482544, + "learning_rate": 0.0012145545544142461, + "loss": 1.684, + "step": 2792 + }, + { + "epoch": 0.294620253164557, + "grad_norm": 0.6269472241401672, + "learning_rate": 0.00121435742882401, + "loss": 1.7245, + "step": 2793 + }, + { + "epoch": 0.29472573839662447, + "grad_norm": 0.6471291780471802, + "learning_rate": 0.001214160251200209, + "loss": 1.7241, + "step": 2794 + }, + { + "epoch": 0.29483122362869196, + "grad_norm": 0.6518362760543823, + "learning_rate": 0.0012139630215649369, + "loss": 1.6908, + "step": 2795 + }, + { + "epoch": 0.2949367088607595, + "grad_norm": 0.7826017737388611, + "learning_rate": 0.0012137657399402947, + "loss": 1.7482, + "step": 2796 + }, + { + "epoch": 0.295042194092827, + "grad_norm": 0.6009743213653564, + "learning_rate": 0.0012135684063483891, + "loss": 1.7254, + "step": 2797 + }, + { + "epoch": 0.2951476793248945, + "grad_norm": 0.732081949710846, + "learning_rate": 0.0012133710208113318, + "loss": 1.7379, + "step": 2798 + }, + { + "epoch": 0.29525316455696204, + "grad_norm": 0.7820234894752502, + "learning_rate": 0.0012131735833512411, + "loss": 1.7254, + "step": 2799 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.6110000014305115, + "learning_rate": 0.0012129760939902407, + "loss": 1.6806, + "step": 2800 + }, + { + "epoch": 0.29546413502109703, + "grad_norm": 1.010272741317749, + "learning_rate": 0.0012127785527504603, + "loss": 1.7416, + "step": 2801 + }, + { + "epoch": 0.2955696202531646, + "grad_norm": 0.8300780057907104, + "learning_rate": 0.0012125809596540357, + "loss": 1.7044, + "step": 2802 + }, + { + "epoch": 0.29567510548523207, + "grad_norm": 0.6854960918426514, + "learning_rate": 0.0012123833147231079, + "loss": 1.7304, + "step": 2803 + }, + { + "epoch": 0.29578059071729956, + "grad_norm": 0.9492891430854797, + "learning_rate": 0.0012121856179798237, + "loss": 1.7277, + "step": 2804 + }, + { + "epoch": 0.2958860759493671, + "grad_norm": 0.8865678310394287, + "learning_rate": 0.0012119878694463366, + "loss": 1.7031, + "step": 2805 + }, + { + "epoch": 0.2959915611814346, + "grad_norm": 0.7050021290779114, + "learning_rate": 0.001211790069144805, + "loss": 1.7243, + "step": 2806 + }, + { + "epoch": 0.2960970464135021, + "grad_norm": 0.8085785508155823, + "learning_rate": 0.0012115922170973935, + "loss": 1.7208, + "step": 2807 + }, + { + "epoch": 0.29620253164556964, + "grad_norm": 0.8417293429374695, + "learning_rate": 0.0012113943133262722, + "loss": 1.7207, + "step": 2808 + }, + { + "epoch": 0.29630801687763714, + "grad_norm": 0.6289182901382446, + "learning_rate": 0.0012111963578536177, + "loss": 1.7503, + "step": 2809 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.6813104748725891, + "learning_rate": 0.0012109983507016114, + "loss": 1.7207, + "step": 2810 + }, + { + "epoch": 0.2965189873417722, + "grad_norm": 0.5845564007759094, + "learning_rate": 0.0012108002918924411, + "loss": 1.7127, + "step": 2811 + }, + { + "epoch": 0.29662447257383967, + "grad_norm": 0.6016592383384705, + "learning_rate": 0.0012106021814483007, + "loss": 1.6998, + "step": 2812 + }, + { + "epoch": 0.29672995780590716, + "grad_norm": 0.6404763460159302, + "learning_rate": 0.0012104040193913884, + "loss": 1.715, + "step": 2813 + }, + { + "epoch": 0.2968354430379747, + "grad_norm": 0.5920789837837219, + "learning_rate": 0.0012102058057439104, + "loss": 1.7575, + "step": 2814 + }, + { + "epoch": 0.2969409282700422, + "grad_norm": 0.6154076457023621, + "learning_rate": 0.001210007540528077, + "loss": 1.7211, + "step": 2815 + }, + { + "epoch": 0.2970464135021097, + "grad_norm": 0.592146635055542, + "learning_rate": 0.0012098092237661049, + "loss": 1.6811, + "step": 2816 + }, + { + "epoch": 0.2971518987341772, + "grad_norm": 0.5489823818206787, + "learning_rate": 0.0012096108554802165, + "loss": 1.7212, + "step": 2817 + }, + { + "epoch": 0.29725738396624474, + "grad_norm": 0.6895086169242859, + "learning_rate": 0.0012094124356926397, + "loss": 1.752, + "step": 2818 + }, + { + "epoch": 0.29736286919831223, + "grad_norm": 0.7632346153259277, + "learning_rate": 0.001209213964425609, + "loss": 1.7243, + "step": 2819 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.5848435163497925, + "learning_rate": 0.0012090154417013636, + "loss": 1.7541, + "step": 2820 + }, + { + "epoch": 0.29757383966244727, + "grad_norm": 0.6884308457374573, + "learning_rate": 0.0012088168675421487, + "loss": 1.6935, + "step": 2821 + }, + { + "epoch": 0.29767932489451476, + "grad_norm": 0.7143921256065369, + "learning_rate": 0.0012086182419702165, + "loss": 1.6909, + "step": 2822 + }, + { + "epoch": 0.29778481012658226, + "grad_norm": 0.5732161998748779, + "learning_rate": 0.0012084195650078232, + "loss": 1.7234, + "step": 2823 + }, + { + "epoch": 0.2978902953586498, + "grad_norm": 0.9507518410682678, + "learning_rate": 0.001208220836677232, + "loss": 1.6956, + "step": 2824 + }, + { + "epoch": 0.2979957805907173, + "grad_norm": 0.8010728359222412, + "learning_rate": 0.0012080220570007108, + "loss": 1.7018, + "step": 2825 + }, + { + "epoch": 0.2981012658227848, + "grad_norm": 0.7303234338760376, + "learning_rate": 0.001207823226000534, + "loss": 1.7017, + "step": 2826 + }, + { + "epoch": 0.29820675105485234, + "grad_norm": 1.3611432313919067, + "learning_rate": 0.0012076243436989823, + "loss": 1.6695, + "step": 2827 + }, + { + "epoch": 0.29831223628691983, + "grad_norm": 0.6850190758705139, + "learning_rate": 0.0012074254101183408, + "loss": 1.6947, + "step": 2828 + }, + { + "epoch": 0.2984177215189873, + "grad_norm": 1.3569262027740479, + "learning_rate": 0.001207226425280901, + "loss": 1.7091, + "step": 2829 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.6107029318809509, + "learning_rate": 0.0012070273892089605, + "loss": 1.6984, + "step": 2830 + }, + { + "epoch": 0.29862869198312236, + "grad_norm": 0.9973471760749817, + "learning_rate": 0.001206828301924822, + "loss": 1.6873, + "step": 2831 + }, + { + "epoch": 0.29873417721518986, + "grad_norm": 0.8869240283966064, + "learning_rate": 0.0012066291634507944, + "loss": 1.7274, + "step": 2832 + }, + { + "epoch": 0.2988396624472574, + "grad_norm": 0.6252869367599487, + "learning_rate": 0.001206429973809192, + "loss": 1.6907, + "step": 2833 + }, + { + "epoch": 0.2989451476793249, + "grad_norm": 1.0625495910644531, + "learning_rate": 0.001206230733022335, + "loss": 1.76, + "step": 2834 + }, + { + "epoch": 0.2990506329113924, + "grad_norm": 0.830562949180603, + "learning_rate": 0.0012060314411125497, + "loss": 1.676, + "step": 2835 + }, + { + "epoch": 0.29915611814345994, + "grad_norm": 0.6261222958564758, + "learning_rate": 0.0012058320981021672, + "loss": 1.6875, + "step": 2836 + }, + { + "epoch": 0.29926160337552743, + "grad_norm": 0.9190272092819214, + "learning_rate": 0.001205632704013525, + "loss": 1.7063, + "step": 2837 + }, + { + "epoch": 0.2993670886075949, + "grad_norm": 0.6072496175765991, + "learning_rate": 0.0012054332588689667, + "loss": 1.6758, + "step": 2838 + }, + { + "epoch": 0.29947257383966247, + "grad_norm": 0.7233343124389648, + "learning_rate": 0.0012052337626908406, + "loss": 1.6915, + "step": 2839 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.7493374347686768, + "learning_rate": 0.0012050342155015012, + "loss": 1.7347, + "step": 2840 + }, + { + "epoch": 0.29968354430379746, + "grad_norm": 0.5963945984840393, + "learning_rate": 0.0012048346173233091, + "loss": 1.6867, + "step": 2841 + }, + { + "epoch": 0.299789029535865, + "grad_norm": 0.8826276659965515, + "learning_rate": 0.0012046349681786304, + "loss": 1.737, + "step": 2842 + }, + { + "epoch": 0.2998945147679325, + "grad_norm": 0.8860647678375244, + "learning_rate": 0.001204435268089836, + "loss": 1.6929, + "step": 2843 + }, + { + "epoch": 0.3, + "grad_norm": 0.5255698561668396, + "learning_rate": 0.001204235517079304, + "loss": 1.7303, + "step": 2844 + }, + { + "epoch": 0.30010548523206754, + "grad_norm": 0.6627172231674194, + "learning_rate": 0.0012040357151694172, + "loss": 1.7122, + "step": 2845 + }, + { + "epoch": 0.30021097046413503, + "grad_norm": 0.5702851414680481, + "learning_rate": 0.0012038358623825646, + "loss": 1.7179, + "step": 2846 + }, + { + "epoch": 0.3003164556962025, + "grad_norm": 0.5255481004714966, + "learning_rate": 0.0012036359587411405, + "loss": 1.7141, + "step": 2847 + }, + { + "epoch": 0.30042194092827, + "grad_norm": 0.6255142092704773, + "learning_rate": 0.0012034360042675453, + "loss": 1.6786, + "step": 2848 + }, + { + "epoch": 0.30052742616033756, + "grad_norm": 0.6447635293006897, + "learning_rate": 0.0012032359989841849, + "loss": 1.6919, + "step": 2849 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.5719641447067261, + "learning_rate": 0.0012030359429134707, + "loss": 1.7037, + "step": 2850 + }, + { + "epoch": 0.30073839662447255, + "grad_norm": 0.6354784965515137, + "learning_rate": 0.00120283583607782, + "loss": 1.6702, + "step": 2851 + }, + { + "epoch": 0.3008438818565401, + "grad_norm": 0.7332575917243958, + "learning_rate": 0.0012026356784996554, + "loss": 1.7188, + "step": 2852 + }, + { + "epoch": 0.3009493670886076, + "grad_norm": 0.6442960500717163, + "learning_rate": 0.0012024354702014066, + "loss": 1.7026, + "step": 2853 + }, + { + "epoch": 0.3010548523206751, + "grad_norm": 0.57463538646698, + "learning_rate": 0.0012022352112055071, + "loss": 1.7046, + "step": 2854 + }, + { + "epoch": 0.30116033755274263, + "grad_norm": 0.6692013740539551, + "learning_rate": 0.001202034901534397, + "loss": 1.7579, + "step": 2855 + }, + { + "epoch": 0.3012658227848101, + "grad_norm": 0.5431938171386719, + "learning_rate": 0.0012018345412105223, + "loss": 1.728, + "step": 2856 + }, + { + "epoch": 0.3013713080168776, + "grad_norm": 0.7656723856925964, + "learning_rate": 0.0012016341302563342, + "loss": 1.7119, + "step": 2857 + }, + { + "epoch": 0.30147679324894516, + "grad_norm": 0.7648784518241882, + "learning_rate": 0.0012014336686942898, + "loss": 1.7045, + "step": 2858 + }, + { + "epoch": 0.30158227848101266, + "grad_norm": 0.5529355406761169, + "learning_rate": 0.0012012331565468518, + "loss": 1.687, + "step": 2859 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.7876601219177246, + "learning_rate": 0.0012010325938364883, + "loss": 1.7169, + "step": 2860 + }, + { + "epoch": 0.3017932489451477, + "grad_norm": 0.5823630094528198, + "learning_rate": 0.0012008319805856737, + "loss": 1.7207, + "step": 2861 + }, + { + "epoch": 0.3018987341772152, + "grad_norm": 0.7926335334777832, + "learning_rate": 0.0012006313168168878, + "loss": 1.7096, + "step": 2862 + }, + { + "epoch": 0.3020042194092827, + "grad_norm": 0.8710632920265198, + "learning_rate": 0.0012004306025526158, + "loss": 1.7025, + "step": 2863 + }, + { + "epoch": 0.30210970464135023, + "grad_norm": 0.5977706909179688, + "learning_rate": 0.0012002298378153485, + "loss": 1.6844, + "step": 2864 + }, + { + "epoch": 0.3022151898734177, + "grad_norm": 0.7270274758338928, + "learning_rate": 0.001200029022627583, + "loss": 1.714, + "step": 2865 + }, + { + "epoch": 0.3023206751054852, + "grad_norm": 0.7711147665977478, + "learning_rate": 0.0011998281570118213, + "loss": 1.7668, + "step": 2866 + }, + { + "epoch": 0.30242616033755276, + "grad_norm": 0.6113318204879761, + "learning_rate": 0.0011996272409905717, + "loss": 1.7132, + "step": 2867 + }, + { + "epoch": 0.30253164556962026, + "grad_norm": 1.005998134613037, + "learning_rate": 0.0011994262745863478, + "loss": 1.7152, + "step": 2868 + }, + { + "epoch": 0.30263713080168775, + "grad_norm": 0.6913082599639893, + "learning_rate": 0.0011992252578216683, + "loss": 1.6528, + "step": 2869 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.9286509156227112, + "learning_rate": 0.0011990241907190592, + "loss": 1.7544, + "step": 2870 + }, + { + "epoch": 0.3028481012658228, + "grad_norm": 1.020156741142273, + "learning_rate": 0.0011988230733010502, + "loss": 1.7019, + "step": 2871 + }, + { + "epoch": 0.3029535864978903, + "grad_norm": 0.5993337631225586, + "learning_rate": 0.0011986219055901781, + "loss": 1.7004, + "step": 2872 + }, + { + "epoch": 0.30305907172995783, + "grad_norm": 0.7290818691253662, + "learning_rate": 0.0011984206876089842, + "loss": 1.731, + "step": 2873 + }, + { + "epoch": 0.3031645569620253, + "grad_norm": 0.5987440943717957, + "learning_rate": 0.001198219419380016, + "loss": 1.6589, + "step": 2874 + }, + { + "epoch": 0.3032700421940928, + "grad_norm": 0.6953843832015991, + "learning_rate": 0.0011980181009258273, + "loss": 1.6997, + "step": 2875 + }, + { + "epoch": 0.30337552742616036, + "grad_norm": 0.6255649328231812, + "learning_rate": 0.0011978167322689761, + "loss": 1.6822, + "step": 2876 + }, + { + "epoch": 0.30348101265822786, + "grad_norm": 0.799502968788147, + "learning_rate": 0.001197615313432027, + "loss": 1.6808, + "step": 2877 + }, + { + "epoch": 0.30358649789029535, + "grad_norm": 0.8663495182991028, + "learning_rate": 0.00119741384443755, + "loss": 1.7214, + "step": 2878 + }, + { + "epoch": 0.3036919831223629, + "grad_norm": 0.6049209237098694, + "learning_rate": 0.001197212325308121, + "loss": 1.6985, + "step": 2879 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.7650212645530701, + "learning_rate": 0.001197010756066321, + "loss": 1.703, + "step": 2880 + }, + { + "epoch": 0.3039029535864979, + "grad_norm": 0.6210387349128723, + "learning_rate": 0.0011968091367347367, + "loss": 1.7067, + "step": 2881 + }, + { + "epoch": 0.3040084388185654, + "grad_norm": 0.6446793079376221, + "learning_rate": 0.0011966074673359602, + "loss": 1.7003, + "step": 2882 + }, + { + "epoch": 0.3041139240506329, + "grad_norm": 0.6662588715553284, + "learning_rate": 0.0011964057478925903, + "loss": 1.6643, + "step": 2883 + }, + { + "epoch": 0.3042194092827004, + "grad_norm": 0.7701327204704285, + "learning_rate": 0.0011962039784272306, + "loss": 1.7061, + "step": 2884 + }, + { + "epoch": 0.3043248945147679, + "grad_norm": 0.6452768445014954, + "learning_rate": 0.0011960021589624897, + "loss": 1.7108, + "step": 2885 + }, + { + "epoch": 0.30443037974683546, + "grad_norm": 0.6061689853668213, + "learning_rate": 0.001195800289520983, + "loss": 1.7178, + "step": 2886 + }, + { + "epoch": 0.30453586497890295, + "grad_norm": 0.6044679880142212, + "learning_rate": 0.0011955983701253312, + "loss": 1.7336, + "step": 2887 + }, + { + "epoch": 0.30464135021097044, + "grad_norm": 0.6561599373817444, + "learning_rate": 0.0011953964007981601, + "loss": 1.6718, + "step": 2888 + }, + { + "epoch": 0.304746835443038, + "grad_norm": 0.5591588020324707, + "learning_rate": 0.001195194381562101, + "loss": 1.7412, + "step": 2889 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.5430124402046204, + "learning_rate": 0.0011949923124397917, + "loss": 1.692, + "step": 2890 + }, + { + "epoch": 0.304957805907173, + "grad_norm": 0.6102527379989624, + "learning_rate": 0.0011947901934538747, + "loss": 1.7234, + "step": 2891 + }, + { + "epoch": 0.3050632911392405, + "grad_norm": 0.7186262011528015, + "learning_rate": 0.0011945880246269987, + "loss": 1.7127, + "step": 2892 + }, + { + "epoch": 0.305168776371308, + "grad_norm": 0.678917646408081, + "learning_rate": 0.0011943858059818178, + "loss": 1.6929, + "step": 2893 + }, + { + "epoch": 0.3052742616033755, + "grad_norm": 0.5991974472999573, + "learning_rate": 0.0011941835375409912, + "loss": 1.7149, + "step": 2894 + }, + { + "epoch": 0.30537974683544306, + "grad_norm": 0.8222271800041199, + "learning_rate": 0.0011939812193271844, + "loss": 1.6996, + "step": 2895 + }, + { + "epoch": 0.30548523206751055, + "grad_norm": 0.5829967260360718, + "learning_rate": 0.001193778851363068, + "loss": 1.7085, + "step": 2896 + }, + { + "epoch": 0.30559071729957804, + "grad_norm": 0.7620936036109924, + "learning_rate": 0.0011935764336713187, + "loss": 1.7309, + "step": 2897 + }, + { + "epoch": 0.3056962025316456, + "grad_norm": 0.9681381583213806, + "learning_rate": 0.0011933739662746178, + "loss": 1.7196, + "step": 2898 + }, + { + "epoch": 0.3058016877637131, + "grad_norm": 0.7081172466278076, + "learning_rate": 0.0011931714491956531, + "loss": 1.7053, + "step": 2899 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.7664505243301392, + "learning_rate": 0.001192968882457118, + "loss": 1.6846, + "step": 2900 + }, + { + "epoch": 0.3060126582278481, + "grad_norm": 0.8717189431190491, + "learning_rate": 0.0011927662660817105, + "loss": 1.7121, + "step": 2901 + }, + { + "epoch": 0.3061181434599156, + "grad_norm": 0.6573538780212402, + "learning_rate": 0.0011925636000921355, + "loss": 1.7445, + "step": 2902 + }, + { + "epoch": 0.3062236286919831, + "grad_norm": 0.7551894783973694, + "learning_rate": 0.0011923608845111017, + "loss": 1.7147, + "step": 2903 + }, + { + "epoch": 0.30632911392405066, + "grad_norm": 0.9554975032806396, + "learning_rate": 0.0011921581193613253, + "loss": 1.7163, + "step": 2904 + }, + { + "epoch": 0.30643459915611815, + "grad_norm": 0.7338685393333435, + "learning_rate": 0.0011919553046655267, + "loss": 1.7198, + "step": 2905 + }, + { + "epoch": 0.30654008438818564, + "grad_norm": 0.7345893383026123, + "learning_rate": 0.0011917524404464325, + "loss": 1.7413, + "step": 2906 + }, + { + "epoch": 0.3066455696202532, + "grad_norm": 0.8992466330528259, + "learning_rate": 0.0011915495267267745, + "loss": 1.6808, + "step": 2907 + }, + { + "epoch": 0.3067510548523207, + "grad_norm": 0.6333493590354919, + "learning_rate": 0.0011913465635292903, + "loss": 1.7069, + "step": 2908 + }, + { + "epoch": 0.3068565400843882, + "grad_norm": 0.8052087426185608, + "learning_rate": 0.001191143550876723, + "loss": 1.6959, + "step": 2909 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.6020766496658325, + "learning_rate": 0.001190940488791821, + "loss": 1.681, + "step": 2910 + }, + { + "epoch": 0.3070675105485232, + "grad_norm": 0.8495736122131348, + "learning_rate": 0.0011907373772973384, + "loss": 1.677, + "step": 2911 + }, + { + "epoch": 0.3071729957805907, + "grad_norm": 0.9203757047653198, + "learning_rate": 0.001190534216416035, + "loss": 1.7112, + "step": 2912 + }, + { + "epoch": 0.30727848101265826, + "grad_norm": 0.5753132700920105, + "learning_rate": 0.0011903310061706762, + "loss": 1.6838, + "step": 2913 + }, + { + "epoch": 0.30738396624472575, + "grad_norm": 0.9219061136245728, + "learning_rate": 0.0011901277465840323, + "loss": 1.7289, + "step": 2914 + }, + { + "epoch": 0.30748945147679324, + "grad_norm": 0.960699200630188, + "learning_rate": 0.0011899244376788797, + "loss": 1.7367, + "step": 2915 + }, + { + "epoch": 0.30759493670886073, + "grad_norm": 0.5459678769111633, + "learning_rate": 0.001189721079478, + "loss": 1.7198, + "step": 2916 + }, + { + "epoch": 0.3077004219409283, + "grad_norm": 0.9222418069839478, + "learning_rate": 0.001189517672004181, + "loss": 1.6699, + "step": 2917 + }, + { + "epoch": 0.3078059071729958, + "grad_norm": 0.7814964056015015, + "learning_rate": 0.0011893142152802152, + "loss": 1.6972, + "step": 2918 + }, + { + "epoch": 0.30791139240506327, + "grad_norm": 0.6383765339851379, + "learning_rate": 0.0011891107093289007, + "loss": 1.7413, + "step": 2919 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.8565518856048584, + "learning_rate": 0.0011889071541730419, + "loss": 1.6922, + "step": 2920 + }, + { + "epoch": 0.3081223628691983, + "grad_norm": 0.69661545753479, + "learning_rate": 0.0011887035498354475, + "loss": 1.6684, + "step": 2921 + }, + { + "epoch": 0.3082278481012658, + "grad_norm": 0.6171892285346985, + "learning_rate": 0.0011884998963389334, + "loss": 1.7089, + "step": 2922 + }, + { + "epoch": 0.30833333333333335, + "grad_norm": 0.6453232765197754, + "learning_rate": 0.0011882961937063187, + "loss": 1.7222, + "step": 2923 + }, + { + "epoch": 0.30843881856540084, + "grad_norm": 0.7829732298851013, + "learning_rate": 0.0011880924419604305, + "loss": 1.6879, + "step": 2924 + }, + { + "epoch": 0.30854430379746833, + "grad_norm": 0.7031631469726562, + "learning_rate": 0.0011878886411240991, + "loss": 1.6911, + "step": 2925 + }, + { + "epoch": 0.3086497890295359, + "grad_norm": 0.5836666822433472, + "learning_rate": 0.0011876847912201624, + "loss": 1.7046, + "step": 2926 + }, + { + "epoch": 0.3087552742616034, + "grad_norm": 0.6118823885917664, + "learning_rate": 0.0011874808922714623, + "loss": 1.7227, + "step": 2927 + }, + { + "epoch": 0.30886075949367087, + "grad_norm": 0.6188777685165405, + "learning_rate": 0.0011872769443008466, + "loss": 1.7386, + "step": 2928 + }, + { + "epoch": 0.3089662447257384, + "grad_norm": 0.5674414038658142, + "learning_rate": 0.001187072947331169, + "loss": 1.6759, + "step": 2929 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.5580363869667053, + "learning_rate": 0.001186868901385288, + "loss": 1.6849, + "step": 2930 + }, + { + "epoch": 0.3091772151898734, + "grad_norm": 0.49184390902519226, + "learning_rate": 0.0011866648064860683, + "loss": 1.7275, + "step": 2931 + }, + { + "epoch": 0.30928270042194095, + "grad_norm": 0.6338784098625183, + "learning_rate": 0.0011864606626563795, + "loss": 1.7554, + "step": 2932 + }, + { + "epoch": 0.30938818565400844, + "grad_norm": 0.5778022408485413, + "learning_rate": 0.0011862564699190972, + "loss": 1.6931, + "step": 2933 + }, + { + "epoch": 0.30949367088607593, + "grad_norm": 0.5380681753158569, + "learning_rate": 0.0011860522282971019, + "loss": 1.7234, + "step": 2934 + }, + { + "epoch": 0.3095991561181435, + "grad_norm": 0.6223295331001282, + "learning_rate": 0.0011858479378132802, + "loss": 1.7124, + "step": 2935 + }, + { + "epoch": 0.309704641350211, + "grad_norm": 0.537689745426178, + "learning_rate": 0.0011856435984905237, + "loss": 1.7415, + "step": 2936 + }, + { + "epoch": 0.30981012658227847, + "grad_norm": 0.5501865744590759, + "learning_rate": 0.00118543921035173, + "loss": 1.6661, + "step": 2937 + }, + { + "epoch": 0.309915611814346, + "grad_norm": 0.6055060029029846, + "learning_rate": 0.001185234773419801, + "loss": 1.6953, + "step": 2938 + }, + { + "epoch": 0.3100210970464135, + "grad_norm": 0.8063340783119202, + "learning_rate": 0.0011850302877176456, + "loss": 1.6548, + "step": 2939 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.8027287125587463, + "learning_rate": 0.001184825753268177, + "loss": 1.6824, + "step": 2940 + }, + { + "epoch": 0.31023206751054855, + "grad_norm": 0.5271583795547485, + "learning_rate": 0.0011846211700943148, + "loss": 1.7311, + "step": 2941 + }, + { + "epoch": 0.31033755274261604, + "grad_norm": 0.7032161951065063, + "learning_rate": 0.001184416538218983, + "loss": 1.7034, + "step": 2942 + }, + { + "epoch": 0.31044303797468353, + "grad_norm": 0.7222145795822144, + "learning_rate": 0.0011842118576651122, + "loss": 1.7227, + "step": 2943 + }, + { + "epoch": 0.3105485232067511, + "grad_norm": 0.6759734153747559, + "learning_rate": 0.0011840071284556373, + "loss": 1.749, + "step": 2944 + }, + { + "epoch": 0.3106540084388186, + "grad_norm": 0.570482075214386, + "learning_rate": 0.0011838023506134997, + "loss": 1.7273, + "step": 2945 + }, + { + "epoch": 0.31075949367088607, + "grad_norm": 0.5892627239227295, + "learning_rate": 0.0011835975241616455, + "loss": 1.6843, + "step": 2946 + }, + { + "epoch": 0.31086497890295356, + "grad_norm": 0.5515057444572449, + "learning_rate": 0.0011833926491230265, + "loss": 1.7206, + "step": 2947 + }, + { + "epoch": 0.3109704641350211, + "grad_norm": 0.6321721076965332, + "learning_rate": 0.0011831877255206002, + "loss": 1.7261, + "step": 2948 + }, + { + "epoch": 0.3110759493670886, + "grad_norm": 0.6420926451683044, + "learning_rate": 0.0011829827533773292, + "loss": 1.7251, + "step": 2949 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.7378538250923157, + "learning_rate": 0.0011827777327161814, + "loss": 1.6876, + "step": 2950 + }, + { + "epoch": 0.31128691983122364, + "grad_norm": 0.5591055750846863, + "learning_rate": 0.001182572663560131, + "loss": 1.674, + "step": 2951 + }, + { + "epoch": 0.31139240506329113, + "grad_norm": 0.9426929950714111, + "learning_rate": 0.0011823675459321564, + "loss": 1.6959, + "step": 2952 + }, + { + "epoch": 0.3114978902953586, + "grad_norm": 0.8166945576667786, + "learning_rate": 0.0011821623798552424, + "loss": 1.7098, + "step": 2953 + }, + { + "epoch": 0.3116033755274262, + "grad_norm": 0.7084601521492004, + "learning_rate": 0.001181957165352379, + "loss": 1.6713, + "step": 2954 + }, + { + "epoch": 0.31170886075949367, + "grad_norm": 1.084167242050171, + "learning_rate": 0.0011817519024465608, + "loss": 1.6923, + "step": 2955 + }, + { + "epoch": 0.31181434599156116, + "grad_norm": 0.5514090657234192, + "learning_rate": 0.0011815465911607893, + "loss": 1.7061, + "step": 2956 + }, + { + "epoch": 0.3119198312236287, + "grad_norm": 1.0639287233352661, + "learning_rate": 0.0011813412315180704, + "loss": 1.7195, + "step": 2957 + }, + { + "epoch": 0.3120253164556962, + "grad_norm": 0.6663340330123901, + "learning_rate": 0.0011811358235414154, + "loss": 1.6649, + "step": 2958 + }, + { + "epoch": 0.3121308016877637, + "grad_norm": 0.7657527327537537, + "learning_rate": 0.0011809303672538417, + "loss": 1.7086, + "step": 2959 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.8175531625747681, + "learning_rate": 0.0011807248626783714, + "loss": 1.701, + "step": 2960 + }, + { + "epoch": 0.31234177215189873, + "grad_norm": 0.5198732018470764, + "learning_rate": 0.0011805193098380327, + "loss": 1.6705, + "step": 2961 + }, + { + "epoch": 0.3124472573839662, + "grad_norm": 0.8098794221878052, + "learning_rate": 0.0011803137087558584, + "loss": 1.6983, + "step": 2962 + }, + { + "epoch": 0.3125527426160338, + "grad_norm": 0.6151401400566101, + "learning_rate": 0.0011801080594548874, + "loss": 1.6939, + "step": 2963 + }, + { + "epoch": 0.31265822784810127, + "grad_norm": 0.6463494896888733, + "learning_rate": 0.0011799023619581638, + "loss": 1.6547, + "step": 2964 + }, + { + "epoch": 0.31276371308016876, + "grad_norm": 0.9707182049751282, + "learning_rate": 0.0011796966162887364, + "loss": 1.6863, + "step": 2965 + }, + { + "epoch": 0.3128691983122363, + "grad_norm": 0.575859785079956, + "learning_rate": 0.0011794908224696608, + "loss": 1.6926, + "step": 2966 + }, + { + "epoch": 0.3129746835443038, + "grad_norm": 0.8645239472389221, + "learning_rate": 0.0011792849805239967, + "loss": 1.7155, + "step": 2967 + }, + { + "epoch": 0.3130801687763713, + "grad_norm": 0.7762258052825928, + "learning_rate": 0.0011790790904748103, + "loss": 1.7216, + "step": 2968 + }, + { + "epoch": 0.31318565400843884, + "grad_norm": 0.6061776280403137, + "learning_rate": 0.0011788731523451718, + "loss": 1.6801, + "step": 2969 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.8586156964302063, + "learning_rate": 0.0011786671661581584, + "loss": 1.6937, + "step": 2970 + }, + { + "epoch": 0.3133966244725738, + "grad_norm": 0.5675486326217651, + "learning_rate": 0.0011784611319368512, + "loss": 1.6549, + "step": 2971 + }, + { + "epoch": 0.3135021097046414, + "grad_norm": 0.8744390606880188, + "learning_rate": 0.0011782550497043379, + "loss": 1.6792, + "step": 2972 + }, + { + "epoch": 0.31360759493670887, + "grad_norm": 0.5862585306167603, + "learning_rate": 0.0011780489194837106, + "loss": 1.7036, + "step": 2973 + }, + { + "epoch": 0.31371308016877636, + "grad_norm": 0.9326998591423035, + "learning_rate": 0.0011778427412980675, + "loss": 1.7161, + "step": 2974 + }, + { + "epoch": 0.3138185654008439, + "grad_norm": 0.9070796370506287, + "learning_rate": 0.0011776365151705119, + "loss": 1.6723, + "step": 2975 + }, + { + "epoch": 0.3139240506329114, + "grad_norm": 0.6404147148132324, + "learning_rate": 0.0011774302411241525, + "loss": 1.6915, + "step": 2976 + }, + { + "epoch": 0.3140295358649789, + "grad_norm": 0.8034524917602539, + "learning_rate": 0.0011772239191821029, + "loss": 1.713, + "step": 2977 + }, + { + "epoch": 0.31413502109704644, + "grad_norm": 0.5713402628898621, + "learning_rate": 0.0011770175493674827, + "loss": 1.6969, + "step": 2978 + }, + { + "epoch": 0.31424050632911393, + "grad_norm": 0.8903684616088867, + "learning_rate": 0.0011768111317034173, + "loss": 1.7137, + "step": 2979 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.7160112857818604, + "learning_rate": 0.001176604666213036, + "loss": 1.734, + "step": 2980 + }, + { + "epoch": 0.3144514767932489, + "grad_norm": 0.6689348816871643, + "learning_rate": 0.0011763981529194748, + "loss": 1.7407, + "step": 2981 + }, + { + "epoch": 0.31455696202531647, + "grad_norm": 1.0583722591400146, + "learning_rate": 0.001176191591845874, + "loss": 1.6845, + "step": 2982 + }, + { + "epoch": 0.31466244725738396, + "grad_norm": 0.8099325299263, + "learning_rate": 0.0011759849830153806, + "loss": 1.7053, + "step": 2983 + }, + { + "epoch": 0.31476793248945145, + "grad_norm": 0.6168746948242188, + "learning_rate": 0.0011757783264511456, + "loss": 1.7015, + "step": 2984 + }, + { + "epoch": 0.314873417721519, + "grad_norm": 0.6887666583061218, + "learning_rate": 0.001175571622176326, + "loss": 1.6922, + "step": 2985 + }, + { + "epoch": 0.3149789029535865, + "grad_norm": 0.5720744729042053, + "learning_rate": 0.0011753648702140837, + "loss": 1.7273, + "step": 2986 + }, + { + "epoch": 0.315084388185654, + "grad_norm": 0.5808117389678955, + "learning_rate": 0.001175158070587587, + "loss": 1.7308, + "step": 2987 + }, + { + "epoch": 0.31518987341772153, + "grad_norm": 0.6115036010742188, + "learning_rate": 0.0011749512233200081, + "loss": 1.715, + "step": 2988 + }, + { + "epoch": 0.315295358649789, + "grad_norm": 0.5560402870178223, + "learning_rate": 0.001174744328434526, + "loss": 1.7395, + "step": 2989 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.5710960030555725, + "learning_rate": 0.0011745373859543236, + "loss": 1.7156, + "step": 2990 + }, + { + "epoch": 0.31550632911392407, + "grad_norm": 0.6850401759147644, + "learning_rate": 0.0011743303959025906, + "loss": 1.6694, + "step": 2991 + }, + { + "epoch": 0.31561181434599156, + "grad_norm": 0.5394611358642578, + "learning_rate": 0.0011741233583025205, + "loss": 1.6824, + "step": 2992 + }, + { + "epoch": 0.31571729957805905, + "grad_norm": 0.6362937688827515, + "learning_rate": 0.0011739162731773133, + "loss": 1.6465, + "step": 2993 + }, + { + "epoch": 0.3158227848101266, + "grad_norm": 0.5725205540657043, + "learning_rate": 0.0011737091405501741, + "loss": 1.7183, + "step": 2994 + }, + { + "epoch": 0.3159282700421941, + "grad_norm": 0.5259007811546326, + "learning_rate": 0.0011735019604443126, + "loss": 1.7359, + "step": 2995 + }, + { + "epoch": 0.3160337552742616, + "grad_norm": 0.6140416860580444, + "learning_rate": 0.0011732947328829447, + "loss": 1.6757, + "step": 2996 + }, + { + "epoch": 0.31613924050632913, + "grad_norm": 0.5371164679527283, + "learning_rate": 0.0011730874578892913, + "loss": 1.6832, + "step": 2997 + }, + { + "epoch": 0.3162447257383966, + "grad_norm": 0.5851504802703857, + "learning_rate": 0.0011728801354865786, + "loss": 1.6794, + "step": 2998 + }, + { + "epoch": 0.3163502109704641, + "grad_norm": 0.5767219066619873, + "learning_rate": 0.0011726727656980378, + "loss": 1.7085, + "step": 2999 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.6016729474067688, + "learning_rate": 0.0011724653485469063, + "loss": 1.6953, + "step": 3000 + }, + { + "epoch": 0.31656118143459916, + "grad_norm": 0.6045137643814087, + "learning_rate": 0.0011722578840564256, + "loss": 1.6911, + "step": 3001 + }, + { + "epoch": 0.31666666666666665, + "grad_norm": 0.5566709637641907, + "learning_rate": 0.0011720503722498436, + "loss": 1.675, + "step": 3002 + }, + { + "epoch": 0.3167721518987342, + "grad_norm": 0.5867782235145569, + "learning_rate": 0.0011718428131504127, + "loss": 1.6677, + "step": 3003 + }, + { + "epoch": 0.3168776371308017, + "grad_norm": 0.5521369576454163, + "learning_rate": 0.0011716352067813914, + "loss": 1.7511, + "step": 3004 + }, + { + "epoch": 0.3169831223628692, + "grad_norm": 0.5639520883560181, + "learning_rate": 0.0011714275531660423, + "loss": 1.7124, + "step": 3005 + }, + { + "epoch": 0.31708860759493673, + "grad_norm": 0.5543733239173889, + "learning_rate": 0.0011712198523276347, + "loss": 1.7185, + "step": 3006 + }, + { + "epoch": 0.3171940928270042, + "grad_norm": 0.5006607174873352, + "learning_rate": 0.0011710121042894425, + "loss": 1.6958, + "step": 3007 + }, + { + "epoch": 0.3172995780590717, + "grad_norm": 0.5978516936302185, + "learning_rate": 0.0011708043090747442, + "loss": 1.6911, + "step": 3008 + }, + { + "epoch": 0.31740506329113927, + "grad_norm": 0.5815473794937134, + "learning_rate": 0.001170596466706825, + "loss": 1.691, + "step": 3009 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.5671709775924683, + "learning_rate": 0.0011703885772089743, + "loss": 1.6989, + "step": 3010 + }, + { + "epoch": 0.31761603375527425, + "grad_norm": 0.7140976786613464, + "learning_rate": 0.0011701806406044875, + "loss": 1.6814, + "step": 3011 + }, + { + "epoch": 0.31772151898734174, + "grad_norm": 0.6574289798736572, + "learning_rate": 0.0011699726569166643, + "loss": 1.6886, + "step": 3012 + }, + { + "epoch": 0.3178270042194093, + "grad_norm": 0.8711857199668884, + "learning_rate": 0.0011697646261688108, + "loss": 1.6692, + "step": 3013 + }, + { + "epoch": 0.3179324894514768, + "grad_norm": 0.7787351012229919, + "learning_rate": 0.0011695565483842382, + "loss": 1.7042, + "step": 3014 + }, + { + "epoch": 0.3180379746835443, + "grad_norm": 0.7562683820724487, + "learning_rate": 0.001169348423586262, + "loss": 1.7207, + "step": 3015 + }, + { + "epoch": 0.3181434599156118, + "grad_norm": 0.931656539440155, + "learning_rate": 0.0011691402517982038, + "loss": 1.7055, + "step": 3016 + }, + { + "epoch": 0.3182489451476793, + "grad_norm": 0.6536237597465515, + "learning_rate": 0.0011689320330433904, + "loss": 1.7347, + "step": 3017 + }, + { + "epoch": 0.3183544303797468, + "grad_norm": 0.9700791239738464, + "learning_rate": 0.0011687237673451538, + "loss": 1.7304, + "step": 3018 + }, + { + "epoch": 0.31845991561181436, + "grad_norm": 0.6482995748519897, + "learning_rate": 0.0011685154547268312, + "loss": 1.6862, + "step": 3019 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 1.1585637331008911, + "learning_rate": 0.0011683070952117646, + "loss": 1.6917, + "step": 3020 + }, + { + "epoch": 0.31867088607594934, + "grad_norm": 0.6147388219833374, + "learning_rate": 0.0011680986888233024, + "loss": 1.6667, + "step": 3021 + }, + { + "epoch": 0.3187763713080169, + "grad_norm": 0.8204049468040466, + "learning_rate": 0.0011678902355847973, + "loss": 1.6966, + "step": 3022 + }, + { + "epoch": 0.3188818565400844, + "grad_norm": 0.5445926785469055, + "learning_rate": 0.0011676817355196075, + "loss": 1.6595, + "step": 3023 + }, + { + "epoch": 0.3189873417721519, + "grad_norm": 1.0380985736846924, + "learning_rate": 0.0011674731886510967, + "loss": 1.6794, + "step": 3024 + }, + { + "epoch": 0.3190928270042194, + "grad_norm": 0.5895813703536987, + "learning_rate": 0.0011672645950026332, + "loss": 1.6488, + "step": 3025 + }, + { + "epoch": 0.3191983122362869, + "grad_norm": 0.8480004668235779, + "learning_rate": 0.001167055954597591, + "loss": 1.7136, + "step": 3026 + }, + { + "epoch": 0.3193037974683544, + "grad_norm": 0.5968009829521179, + "learning_rate": 0.0011668472674593497, + "loss": 1.6958, + "step": 3027 + }, + { + "epoch": 0.31940928270042196, + "grad_norm": 0.7836902141571045, + "learning_rate": 0.0011666385336112934, + "loss": 1.7174, + "step": 3028 + }, + { + "epoch": 0.31951476793248945, + "grad_norm": 0.6751335263252258, + "learning_rate": 0.0011664297530768117, + "loss": 1.6759, + "step": 3029 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.7879648208618164, + "learning_rate": 0.0011662209258792998, + "loss": 1.7358, + "step": 3030 + }, + { + "epoch": 0.3197257383966245, + "grad_norm": 0.7725225687026978, + "learning_rate": 0.0011660120520421578, + "loss": 1.7055, + "step": 3031 + }, + { + "epoch": 0.319831223628692, + "grad_norm": 0.7763948440551758, + "learning_rate": 0.0011658031315887908, + "loss": 1.6965, + "step": 3032 + }, + { + "epoch": 0.3199367088607595, + "grad_norm": 0.6269888877868652, + "learning_rate": 0.0011655941645426096, + "loss": 1.674, + "step": 3033 + }, + { + "epoch": 0.320042194092827, + "grad_norm": 0.7244494557380676, + "learning_rate": 0.00116538515092703, + "loss": 1.7313, + "step": 3034 + }, + { + "epoch": 0.3201476793248945, + "grad_norm": 0.5572606325149536, + "learning_rate": 0.0011651760907654728, + "loss": 1.6884, + "step": 3035 + }, + { + "epoch": 0.320253164556962, + "grad_norm": 0.7312152981758118, + "learning_rate": 0.0011649669840813645, + "loss": 1.7423, + "step": 3036 + }, + { + "epoch": 0.32035864978902956, + "grad_norm": 0.546052873134613, + "learning_rate": 0.0011647578308981363, + "loss": 1.7256, + "step": 3037 + }, + { + "epoch": 0.32046413502109705, + "grad_norm": 0.671341061592102, + "learning_rate": 0.001164548631239225, + "loss": 1.6562, + "step": 3038 + }, + { + "epoch": 0.32056962025316454, + "grad_norm": 0.58878493309021, + "learning_rate": 0.0011643393851280724, + "loss": 1.6791, + "step": 3039 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.6756911277770996, + "learning_rate": 0.0011641300925881257, + "loss": 1.6756, + "step": 3040 + }, + { + "epoch": 0.3207805907172996, + "grad_norm": 0.7305509448051453, + "learning_rate": 0.001163920753642837, + "loss": 1.6879, + "step": 3041 + }, + { + "epoch": 0.3208860759493671, + "grad_norm": 0.7627716064453125, + "learning_rate": 0.001163711368315664, + "loss": 1.6866, + "step": 3042 + }, + { + "epoch": 0.3209915611814346, + "grad_norm": 0.6123584508895874, + "learning_rate": 0.001163501936630069, + "loss": 1.7169, + "step": 3043 + }, + { + "epoch": 0.3210970464135021, + "grad_norm": 0.9095980525016785, + "learning_rate": 0.0011632924586095204, + "loss": 1.7042, + "step": 3044 + }, + { + "epoch": 0.3212025316455696, + "grad_norm": 0.581336259841919, + "learning_rate": 0.0011630829342774906, + "loss": 1.6955, + "step": 3045 + }, + { + "epoch": 0.3213080168776371, + "grad_norm": 0.7348148822784424, + "learning_rate": 0.0011628733636574586, + "loss": 1.7406, + "step": 3046 + }, + { + "epoch": 0.32141350210970465, + "grad_norm": 0.7217309474945068, + "learning_rate": 0.0011626637467729072, + "loss": 1.6529, + "step": 3047 + }, + { + "epoch": 0.32151898734177214, + "grad_norm": 0.6820605397224426, + "learning_rate": 0.0011624540836473252, + "loss": 1.6834, + "step": 3048 + }, + { + "epoch": 0.32162447257383964, + "grad_norm": 0.531399667263031, + "learning_rate": 0.0011622443743042065, + "loss": 1.6693, + "step": 3049 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.5652931332588196, + "learning_rate": 0.0011620346187670501, + "loss": 1.6938, + "step": 3050 + }, + { + "epoch": 0.3218354430379747, + "grad_norm": 0.5757699608802795, + "learning_rate": 0.0011618248170593597, + "loss": 1.6909, + "step": 3051 + }, + { + "epoch": 0.32194092827004217, + "grad_norm": 0.5035116672515869, + "learning_rate": 0.0011616149692046454, + "loss": 1.6715, + "step": 3052 + }, + { + "epoch": 0.3220464135021097, + "grad_norm": 0.6096426248550415, + "learning_rate": 0.0011614050752264216, + "loss": 1.7223, + "step": 3053 + }, + { + "epoch": 0.3221518987341772, + "grad_norm": 0.6423020362854004, + "learning_rate": 0.0011611951351482071, + "loss": 1.7002, + "step": 3054 + }, + { + "epoch": 0.3222573839662447, + "grad_norm": 0.6588013172149658, + "learning_rate": 0.0011609851489935274, + "loss": 1.7238, + "step": 3055 + }, + { + "epoch": 0.32236286919831225, + "grad_norm": 0.6198142766952515, + "learning_rate": 0.0011607751167859125, + "loss": 1.7152, + "step": 3056 + }, + { + "epoch": 0.32246835443037974, + "grad_norm": 0.7465327382087708, + "learning_rate": 0.0011605650385488977, + "loss": 1.7076, + "step": 3057 + }, + { + "epoch": 0.32257383966244724, + "grad_norm": 0.5736443996429443, + "learning_rate": 0.0011603549143060225, + "loss": 1.6935, + "step": 3058 + }, + { + "epoch": 0.3226793248945148, + "grad_norm": 0.6906132698059082, + "learning_rate": 0.0011601447440808335, + "loss": 1.7043, + "step": 3059 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.6442501544952393, + "learning_rate": 0.0011599345278968806, + "loss": 1.7184, + "step": 3060 + }, + { + "epoch": 0.32289029535864977, + "grad_norm": 0.6308546662330627, + "learning_rate": 0.0011597242657777195, + "loss": 1.6872, + "step": 3061 + }, + { + "epoch": 0.3229957805907173, + "grad_norm": 0.5583713054656982, + "learning_rate": 0.0011595139577469115, + "loss": 1.6763, + "step": 3062 + }, + { + "epoch": 0.3231012658227848, + "grad_norm": 0.5898340344429016, + "learning_rate": 0.0011593036038280225, + "loss": 1.6926, + "step": 3063 + }, + { + "epoch": 0.3232067510548523, + "grad_norm": 0.5305371880531311, + "learning_rate": 0.0011590932040446236, + "loss": 1.7055, + "step": 3064 + }, + { + "epoch": 0.32331223628691985, + "grad_norm": 0.5602351427078247, + "learning_rate": 0.0011588827584202914, + "loss": 1.6755, + "step": 3065 + }, + { + "epoch": 0.32341772151898734, + "grad_norm": 0.5950972437858582, + "learning_rate": 0.0011586722669786073, + "loss": 1.6966, + "step": 3066 + }, + { + "epoch": 0.32352320675105484, + "grad_norm": 0.6115118861198425, + "learning_rate": 0.0011584617297431578, + "loss": 1.7089, + "step": 3067 + }, + { + "epoch": 0.3236286919831224, + "grad_norm": 0.5196417570114136, + "learning_rate": 0.0011582511467375346, + "loss": 1.6839, + "step": 3068 + }, + { + "epoch": 0.3237341772151899, + "grad_norm": 0.5919336080551147, + "learning_rate": 0.001158040517985335, + "loss": 1.7064, + "step": 3069 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5862380266189575, + "learning_rate": 0.0011578298435101604, + "loss": 1.716, + "step": 3070 + }, + { + "epoch": 0.3239451476793249, + "grad_norm": 0.6282337307929993, + "learning_rate": 0.0011576191233356181, + "loss": 1.7325, + "step": 3071 + }, + { + "epoch": 0.3240506329113924, + "grad_norm": 0.6265293955802917, + "learning_rate": 0.0011574083574853208, + "loss": 1.6464, + "step": 3072 + }, + { + "epoch": 0.3241561181434599, + "grad_norm": 0.5297693610191345, + "learning_rate": 0.0011571975459828852, + "loss": 1.6878, + "step": 3073 + }, + { + "epoch": 0.32426160337552745, + "grad_norm": 0.5970214605331421, + "learning_rate": 0.0011569866888519343, + "loss": 1.7028, + "step": 3074 + }, + { + "epoch": 0.32436708860759494, + "grad_norm": 0.5573514103889465, + "learning_rate": 0.0011567757861160955, + "loss": 1.6629, + "step": 3075 + }, + { + "epoch": 0.32447257383966244, + "grad_norm": 0.5729311108589172, + "learning_rate": 0.0011565648377990017, + "loss": 1.6455, + "step": 3076 + }, + { + "epoch": 0.32457805907173, + "grad_norm": 0.5180996656417847, + "learning_rate": 0.0011563538439242902, + "loss": 1.662, + "step": 3077 + }, + { + "epoch": 0.3246835443037975, + "grad_norm": 0.5715532898902893, + "learning_rate": 0.0011561428045156043, + "loss": 1.6794, + "step": 3078 + }, + { + "epoch": 0.32478902953586497, + "grad_norm": 0.5725281834602356, + "learning_rate": 0.001155931719596592, + "loss": 1.6897, + "step": 3079 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.6016237735748291, + "learning_rate": 0.0011557205891909062, + "loss": 1.7304, + "step": 3080 + }, + { + "epoch": 0.325, + "grad_norm": 0.5930393934249878, + "learning_rate": 0.0011555094133222053, + "loss": 1.6355, + "step": 3081 + }, + { + "epoch": 0.3251054852320675, + "grad_norm": 0.7191144227981567, + "learning_rate": 0.0011552981920141528, + "loss": 1.7023, + "step": 3082 + }, + { + "epoch": 0.325210970464135, + "grad_norm": 0.7444854378700256, + "learning_rate": 0.0011550869252904166, + "loss": 1.6939, + "step": 3083 + }, + { + "epoch": 0.32531645569620254, + "grad_norm": 0.5532559752464294, + "learning_rate": 0.0011548756131746706, + "loss": 1.7, + "step": 3084 + }, + { + "epoch": 0.32542194092827004, + "grad_norm": 0.8372817635536194, + "learning_rate": 0.0011546642556905934, + "loss": 1.7166, + "step": 3085 + }, + { + "epoch": 0.32552742616033753, + "grad_norm": 0.649836003780365, + "learning_rate": 0.0011544528528618682, + "loss": 1.6942, + "step": 3086 + }, + { + "epoch": 0.3256329113924051, + "grad_norm": 0.6392456293106079, + "learning_rate": 0.0011542414047121842, + "loss": 1.7437, + "step": 3087 + }, + { + "epoch": 0.32573839662447257, + "grad_norm": 0.6989343166351318, + "learning_rate": 0.0011540299112652351, + "loss": 1.6696, + "step": 3088 + }, + { + "epoch": 0.32584388185654006, + "grad_norm": 0.6036138534545898, + "learning_rate": 0.00115381837254472, + "loss": 1.6978, + "step": 3089 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.6691973805427551, + "learning_rate": 0.0011536067885743423, + "loss": 1.7248, + "step": 3090 + }, + { + "epoch": 0.3260548523206751, + "grad_norm": 0.5498711466789246, + "learning_rate": 0.0011533951593778115, + "loss": 1.7129, + "step": 3091 + }, + { + "epoch": 0.3261603375527426, + "grad_norm": 0.6210324168205261, + "learning_rate": 0.0011531834849788417, + "loss": 1.676, + "step": 3092 + }, + { + "epoch": 0.32626582278481014, + "grad_norm": 0.5855928063392639, + "learning_rate": 0.0011529717654011518, + "loss": 1.6617, + "step": 3093 + }, + { + "epoch": 0.32637130801687764, + "grad_norm": 0.6035115122795105, + "learning_rate": 0.001152760000668466, + "loss": 1.6819, + "step": 3094 + }, + { + "epoch": 0.32647679324894513, + "grad_norm": 0.5346202254295349, + "learning_rate": 0.001152548190804514, + "loss": 1.6769, + "step": 3095 + }, + { + "epoch": 0.3265822784810127, + "grad_norm": 0.53151535987854, + "learning_rate": 0.0011523363358330301, + "loss": 1.6851, + "step": 3096 + }, + { + "epoch": 0.32668776371308017, + "grad_norm": 0.6004726886749268, + "learning_rate": 0.0011521244357777533, + "loss": 1.6695, + "step": 3097 + }, + { + "epoch": 0.32679324894514766, + "grad_norm": 0.503022313117981, + "learning_rate": 0.0011519124906624284, + "loss": 1.7061, + "step": 3098 + }, + { + "epoch": 0.3268987341772152, + "grad_norm": 0.750214159488678, + "learning_rate": 0.0011517005005108048, + "loss": 1.676, + "step": 3099 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.7153443694114685, + "learning_rate": 0.001151488465346637, + "loss": 1.7, + "step": 3100 + }, + { + "epoch": 0.3271097046413502, + "grad_norm": 0.559505820274353, + "learning_rate": 0.0011512763851936848, + "loss": 1.6927, + "step": 3101 + }, + { + "epoch": 0.32721518987341774, + "grad_norm": 0.6362743377685547, + "learning_rate": 0.0011510642600757123, + "loss": 1.7082, + "step": 3102 + }, + { + "epoch": 0.32732067510548524, + "grad_norm": 0.6169916987419128, + "learning_rate": 0.00115085209001649, + "loss": 1.7102, + "step": 3103 + }, + { + "epoch": 0.32742616033755273, + "grad_norm": 0.6038335561752319, + "learning_rate": 0.0011506398750397919, + "loss": 1.6728, + "step": 3104 + }, + { + "epoch": 0.3275316455696203, + "grad_norm": 0.7572118043899536, + "learning_rate": 0.0011504276151693984, + "loss": 1.6852, + "step": 3105 + }, + { + "epoch": 0.32763713080168777, + "grad_norm": 0.6154852509498596, + "learning_rate": 0.0011502153104290937, + "loss": 1.7054, + "step": 3106 + }, + { + "epoch": 0.32774261603375526, + "grad_norm": 0.7075600028038025, + "learning_rate": 0.0011500029608426676, + "loss": 1.6686, + "step": 3107 + }, + { + "epoch": 0.3278481012658228, + "grad_norm": 0.8497433662414551, + "learning_rate": 0.0011497905664339153, + "loss": 1.6726, + "step": 3108 + }, + { + "epoch": 0.3279535864978903, + "grad_norm": 0.633171796798706, + "learning_rate": 0.0011495781272266366, + "loss": 1.7117, + "step": 3109 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.7346598505973816, + "learning_rate": 0.0011493656432446362, + "loss": 1.6961, + "step": 3110 + }, + { + "epoch": 0.3281645569620253, + "grad_norm": 0.7149184346199036, + "learning_rate": 0.0011491531145117243, + "loss": 1.7029, + "step": 3111 + }, + { + "epoch": 0.32827004219409284, + "grad_norm": 0.6306958198547363, + "learning_rate": 0.0011489405410517151, + "loss": 1.6856, + "step": 3112 + }, + { + "epoch": 0.32837552742616033, + "grad_norm": 0.9122109413146973, + "learning_rate": 0.0011487279228884293, + "loss": 1.6853, + "step": 3113 + }, + { + "epoch": 0.3284810126582278, + "grad_norm": 0.8024755716323853, + "learning_rate": 0.0011485152600456913, + "loss": 1.7032, + "step": 3114 + }, + { + "epoch": 0.32858649789029537, + "grad_norm": 0.6281011700630188, + "learning_rate": 0.0011483025525473314, + "loss": 1.6784, + "step": 3115 + }, + { + "epoch": 0.32869198312236286, + "grad_norm": 0.841999888420105, + "learning_rate": 0.001148089800417184, + "loss": 1.6733, + "step": 3116 + }, + { + "epoch": 0.32879746835443036, + "grad_norm": 0.6215574741363525, + "learning_rate": 0.00114787700367909, + "loss": 1.6836, + "step": 3117 + }, + { + "epoch": 0.3289029535864979, + "grad_norm": 0.8337342143058777, + "learning_rate": 0.0011476641623568934, + "loss": 1.664, + "step": 3118 + }, + { + "epoch": 0.3290084388185654, + "grad_norm": 0.9396091103553772, + "learning_rate": 0.0011474512764744445, + "loss": 1.6937, + "step": 3119 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.6143617033958435, + "learning_rate": 0.0011472383460555983, + "loss": 1.7428, + "step": 3120 + }, + { + "epoch": 0.32921940928270044, + "grad_norm": 0.793931782245636, + "learning_rate": 0.0011470253711242146, + "loss": 1.6927, + "step": 3121 + }, + { + "epoch": 0.32932489451476793, + "grad_norm": 0.670964241027832, + "learning_rate": 0.001146812351704158, + "loss": 1.6804, + "step": 3122 + }, + { + "epoch": 0.3294303797468354, + "grad_norm": 0.7556331157684326, + "learning_rate": 0.001146599287819299, + "loss": 1.7484, + "step": 3123 + }, + { + "epoch": 0.32953586497890297, + "grad_norm": 1.0771087408065796, + "learning_rate": 0.0011463861794935122, + "loss": 1.6761, + "step": 3124 + }, + { + "epoch": 0.32964135021097046, + "grad_norm": 0.5356592535972595, + "learning_rate": 0.0011461730267506775, + "loss": 1.7234, + "step": 3125 + }, + { + "epoch": 0.32974683544303796, + "grad_norm": 0.8109096884727478, + "learning_rate": 0.0011459598296146795, + "loss": 1.7026, + "step": 3126 + }, + { + "epoch": 0.3298523206751055, + "grad_norm": 0.6134926080703735, + "learning_rate": 0.001145746588109408, + "loss": 1.6976, + "step": 3127 + }, + { + "epoch": 0.329957805907173, + "grad_norm": 0.8712722063064575, + "learning_rate": 0.0011455333022587582, + "loss": 1.6945, + "step": 3128 + }, + { + "epoch": 0.3300632911392405, + "grad_norm": 0.8999170064926147, + "learning_rate": 0.0011453199720866296, + "loss": 1.7153, + "step": 3129 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.5924164056777954, + "learning_rate": 0.001145106597616927, + "loss": 1.715, + "step": 3130 + }, + { + "epoch": 0.33027426160337553, + "grad_norm": 0.9019118547439575, + "learning_rate": 0.0011448931788735595, + "loss": 1.6816, + "step": 3131 + }, + { + "epoch": 0.330379746835443, + "grad_norm": 0.534149706363678, + "learning_rate": 0.0011446797158804426, + "loss": 1.7, + "step": 3132 + }, + { + "epoch": 0.33048523206751057, + "grad_norm": 0.8679245710372925, + "learning_rate": 0.0011444662086614952, + "loss": 1.6422, + "step": 3133 + }, + { + "epoch": 0.33059071729957806, + "grad_norm": 0.7632002234458923, + "learning_rate": 0.0011442526572406422, + "loss": 1.7214, + "step": 3134 + }, + { + "epoch": 0.33069620253164556, + "grad_norm": 0.7066748738288879, + "learning_rate": 0.001144039061641813, + "loss": 1.6447, + "step": 3135 + }, + { + "epoch": 0.3308016877637131, + "grad_norm": 0.9056064486503601, + "learning_rate": 0.0011438254218889422, + "loss": 1.6758, + "step": 3136 + }, + { + "epoch": 0.3309071729957806, + "grad_norm": 0.6341511607170105, + "learning_rate": 0.0011436117380059692, + "loss": 1.6493, + "step": 3137 + }, + { + "epoch": 0.3310126582278481, + "grad_norm": 0.8701412677764893, + "learning_rate": 0.0011433980100168382, + "loss": 1.7248, + "step": 3138 + }, + { + "epoch": 0.33111814345991564, + "grad_norm": 0.8543267250061035, + "learning_rate": 0.0011431842379454982, + "loss": 1.7065, + "step": 3139 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.8038463592529297, + "learning_rate": 0.001142970421815904, + "loss": 1.6766, + "step": 3140 + }, + { + "epoch": 0.3313291139240506, + "grad_norm": 0.6491302847862244, + "learning_rate": 0.0011427565616520144, + "loss": 1.7182, + "step": 3141 + }, + { + "epoch": 0.33143459915611817, + "grad_norm": 0.6844155192375183, + "learning_rate": 0.0011425426574777936, + "loss": 1.6707, + "step": 3142 + }, + { + "epoch": 0.33154008438818566, + "grad_norm": 0.5601357817649841, + "learning_rate": 0.0011423287093172106, + "loss": 1.705, + "step": 3143 + }, + { + "epoch": 0.33164556962025316, + "grad_norm": 0.5973260998725891, + "learning_rate": 0.0011421147171942398, + "loss": 1.7068, + "step": 3144 + }, + { + "epoch": 0.33175105485232065, + "grad_norm": 0.5595605373382568, + "learning_rate": 0.0011419006811328593, + "loss": 1.7082, + "step": 3145 + }, + { + "epoch": 0.3318565400843882, + "grad_norm": 0.6543859839439392, + "learning_rate": 0.0011416866011570534, + "loss": 1.6873, + "step": 3146 + }, + { + "epoch": 0.3319620253164557, + "grad_norm": 0.6488057374954224, + "learning_rate": 0.0011414724772908105, + "loss": 1.6708, + "step": 3147 + }, + { + "epoch": 0.3320675105485232, + "grad_norm": 0.5573769211769104, + "learning_rate": 0.0011412583095581248, + "loss": 1.6751, + "step": 3148 + }, + { + "epoch": 0.33217299578059073, + "grad_norm": 0.7651211619377136, + "learning_rate": 0.0011410440979829942, + "loss": 1.6788, + "step": 3149 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.5230357646942139, + "learning_rate": 0.0011408298425894226, + "loss": 1.6825, + "step": 3150 + }, + { + "epoch": 0.3323839662447257, + "grad_norm": 0.8633323907852173, + "learning_rate": 0.0011406155434014185, + "loss": 1.6885, + "step": 3151 + }, + { + "epoch": 0.33248945147679326, + "grad_norm": 0.8741127848625183, + "learning_rate": 0.0011404012004429948, + "loss": 1.6832, + "step": 3152 + }, + { + "epoch": 0.33259493670886076, + "grad_norm": 0.5661185383796692, + "learning_rate": 0.00114018681373817, + "loss": 1.6977, + "step": 3153 + }, + { + "epoch": 0.33270042194092825, + "grad_norm": 0.6481982469558716, + "learning_rate": 0.001139972383310967, + "loss": 1.697, + "step": 3154 + }, + { + "epoch": 0.3328059071729958, + "grad_norm": 0.576563835144043, + "learning_rate": 0.0011397579091854137, + "loss": 1.6931, + "step": 3155 + }, + { + "epoch": 0.3329113924050633, + "grad_norm": 0.6549649834632874, + "learning_rate": 0.0011395433913855434, + "loss": 1.6877, + "step": 3156 + }, + { + "epoch": 0.3330168776371308, + "grad_norm": 0.5881607532501221, + "learning_rate": 0.0011393288299353934, + "loss": 1.6836, + "step": 3157 + }, + { + "epoch": 0.33312236286919833, + "grad_norm": 0.6413782835006714, + "learning_rate": 0.001139114224859007, + "loss": 1.6704, + "step": 3158 + }, + { + "epoch": 0.3332278481012658, + "grad_norm": 0.6308808326721191, + "learning_rate": 0.0011388995761804311, + "loss": 1.7014, + "step": 3159 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5971371531486511, + "learning_rate": 0.0011386848839237186, + "loss": 1.692, + "step": 3160 + }, + { + "epoch": 0.33343881856540086, + "grad_norm": 0.6190614104270935, + "learning_rate": 0.0011384701481129266, + "loss": 1.6914, + "step": 3161 + }, + { + "epoch": 0.33354430379746836, + "grad_norm": 0.5004486441612244, + "learning_rate": 0.0011382553687721174, + "loss": 1.6843, + "step": 3162 + }, + { + "epoch": 0.33364978902953585, + "grad_norm": 0.5821259617805481, + "learning_rate": 0.0011380405459253582, + "loss": 1.683, + "step": 3163 + }, + { + "epoch": 0.3337552742616034, + "grad_norm": 0.5381121039390564, + "learning_rate": 0.0011378256795967208, + "loss": 1.7067, + "step": 3164 + }, + { + "epoch": 0.3338607594936709, + "grad_norm": 0.637647271156311, + "learning_rate": 0.0011376107698102822, + "loss": 1.6832, + "step": 3165 + }, + { + "epoch": 0.3339662447257384, + "grad_norm": 0.5249194502830505, + "learning_rate": 0.001137395816590124, + "loss": 1.7229, + "step": 3166 + }, + { + "epoch": 0.33407172995780593, + "grad_norm": 0.6929455399513245, + "learning_rate": 0.001137180819960333, + "loss": 1.662, + "step": 3167 + }, + { + "epoch": 0.3341772151898734, + "grad_norm": 0.6795527338981628, + "learning_rate": 0.0011369657799450005, + "loss": 1.6986, + "step": 3168 + }, + { + "epoch": 0.3342827004219409, + "grad_norm": 0.599939227104187, + "learning_rate": 0.0011367506965682225, + "loss": 1.7047, + "step": 3169 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.5605489015579224, + "learning_rate": 0.0011365355698541005, + "loss": 1.6482, + "step": 3170 + }, + { + "epoch": 0.33449367088607596, + "grad_norm": 0.5634242296218872, + "learning_rate": 0.0011363203998267406, + "loss": 1.6804, + "step": 3171 + }, + { + "epoch": 0.33459915611814345, + "grad_norm": 0.6087498068809509, + "learning_rate": 0.0011361051865102533, + "loss": 1.6684, + "step": 3172 + }, + { + "epoch": 0.334704641350211, + "grad_norm": 0.6453419327735901, + "learning_rate": 0.0011358899299287546, + "loss": 1.6996, + "step": 3173 + }, + { + "epoch": 0.3348101265822785, + "grad_norm": 0.6130910515785217, + "learning_rate": 0.0011356746301063652, + "loss": 1.6994, + "step": 3174 + }, + { + "epoch": 0.334915611814346, + "grad_norm": 0.6062207818031311, + "learning_rate": 0.0011354592870672104, + "loss": 1.695, + "step": 3175 + }, + { + "epoch": 0.33502109704641353, + "grad_norm": 0.5260416865348816, + "learning_rate": 0.0011352439008354201, + "loss": 1.6819, + "step": 3176 + }, + { + "epoch": 0.335126582278481, + "grad_norm": 0.6274133324623108, + "learning_rate": 0.0011350284714351298, + "loss": 1.6891, + "step": 3177 + }, + { + "epoch": 0.3352320675105485, + "grad_norm": 0.5723053216934204, + "learning_rate": 0.0011348129988904797, + "loss": 1.6744, + "step": 3178 + }, + { + "epoch": 0.335337552742616, + "grad_norm": 0.6029011011123657, + "learning_rate": 0.0011345974832256138, + "loss": 1.6442, + "step": 3179 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.6413357257843018, + "learning_rate": 0.0011343819244646824, + "loss": 1.723, + "step": 3180 + }, + { + "epoch": 0.33554852320675105, + "grad_norm": 0.7092902660369873, + "learning_rate": 0.0011341663226318395, + "loss": 1.6564, + "step": 3181 + }, + { + "epoch": 0.33565400843881854, + "grad_norm": 0.6832759976387024, + "learning_rate": 0.0011339506777512446, + "loss": 1.6687, + "step": 3182 + }, + { + "epoch": 0.3357594936708861, + "grad_norm": 0.6031545400619507, + "learning_rate": 0.0011337349898470617, + "loss": 1.6864, + "step": 3183 + }, + { + "epoch": 0.3358649789029536, + "grad_norm": 0.5828854441642761, + "learning_rate": 0.0011335192589434597, + "loss": 1.6952, + "step": 3184 + }, + { + "epoch": 0.3359704641350211, + "grad_norm": 0.5262449979782104, + "learning_rate": 0.0011333034850646124, + "loss": 1.6988, + "step": 3185 + }, + { + "epoch": 0.3360759493670886, + "grad_norm": 0.6168988943099976, + "learning_rate": 0.0011330876682346981, + "loss": 1.6871, + "step": 3186 + }, + { + "epoch": 0.3361814345991561, + "grad_norm": 0.5680065751075745, + "learning_rate": 0.0011328718084779004, + "loss": 1.7153, + "step": 3187 + }, + { + "epoch": 0.3362869198312236, + "grad_norm": 0.7547072768211365, + "learning_rate": 0.0011326559058184075, + "loss": 1.69, + "step": 3188 + }, + { + "epoch": 0.33639240506329116, + "grad_norm": 0.5028789043426514, + "learning_rate": 0.001132439960280412, + "loss": 1.6706, + "step": 3189 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.7438380122184753, + "learning_rate": 0.001132223971888112, + "loss": 1.7275, + "step": 3190 + }, + { + "epoch": 0.33660337552742614, + "grad_norm": 0.5915769934654236, + "learning_rate": 0.0011320079406657102, + "loss": 1.6837, + "step": 3191 + }, + { + "epoch": 0.3367088607594937, + "grad_norm": 0.6527901887893677, + "learning_rate": 0.0011317918666374138, + "loss": 1.6643, + "step": 3192 + }, + { + "epoch": 0.3368143459915612, + "grad_norm": 0.5841851234436035, + "learning_rate": 0.0011315757498274349, + "loss": 1.7241, + "step": 3193 + }, + { + "epoch": 0.3369198312236287, + "grad_norm": 0.6639875173568726, + "learning_rate": 0.0011313595902599904, + "loss": 1.709, + "step": 3194 + }, + { + "epoch": 0.3370253164556962, + "grad_norm": 0.5808886885643005, + "learning_rate": 0.0011311433879593023, + "loss": 1.6538, + "step": 3195 + }, + { + "epoch": 0.3371308016877637, + "grad_norm": 0.6815524697303772, + "learning_rate": 0.001130927142949597, + "loss": 1.7247, + "step": 3196 + }, + { + "epoch": 0.3372362869198312, + "grad_norm": 0.595551073551178, + "learning_rate": 0.001130710855255106, + "loss": 1.6327, + "step": 3197 + }, + { + "epoch": 0.33734177215189876, + "grad_norm": 0.5975286960601807, + "learning_rate": 0.001130494524900065, + "loss": 1.6846, + "step": 3198 + }, + { + "epoch": 0.33744725738396625, + "grad_norm": 0.7678396701812744, + "learning_rate": 0.0011302781519087154, + "loss": 1.6519, + "step": 3199 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.6526070237159729, + "learning_rate": 0.0011300617363053024, + "loss": 1.6942, + "step": 3200 + }, + { + "epoch": 0.3376582278481013, + "grad_norm": 0.6050896048545837, + "learning_rate": 0.0011298452781140769, + "loss": 1.6808, + "step": 3201 + }, + { + "epoch": 0.3377637130801688, + "grad_norm": 0.7060108780860901, + "learning_rate": 0.0011296287773592938, + "loss": 1.7207, + "step": 3202 + }, + { + "epoch": 0.3378691983122363, + "grad_norm": 0.9717117547988892, + "learning_rate": 0.0011294122340652132, + "loss": 1.7129, + "step": 3203 + }, + { + "epoch": 0.3379746835443038, + "grad_norm": 0.6778128147125244, + "learning_rate": 0.0011291956482561, + "loss": 1.7007, + "step": 3204 + }, + { + "epoch": 0.3380801687763713, + "grad_norm": 0.7040000557899475, + "learning_rate": 0.0011289790199562233, + "loss": 1.7049, + "step": 3205 + }, + { + "epoch": 0.3381856540084388, + "grad_norm": 0.6533977389335632, + "learning_rate": 0.001128762349189858, + "loss": 1.7066, + "step": 3206 + }, + { + "epoch": 0.33829113924050636, + "grad_norm": 0.7135905027389526, + "learning_rate": 0.0011285456359812825, + "loss": 1.7034, + "step": 3207 + }, + { + "epoch": 0.33839662447257385, + "grad_norm": 0.7604281306266785, + "learning_rate": 0.0011283288803547809, + "loss": 1.6604, + "step": 3208 + }, + { + "epoch": 0.33850210970464134, + "grad_norm": 0.6437171697616577, + "learning_rate": 0.0011281120823346418, + "loss": 1.6721, + "step": 3209 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 1.051780104637146, + "learning_rate": 0.0011278952419451586, + "loss": 1.6949, + "step": 3210 + }, + { + "epoch": 0.3387130801687764, + "grad_norm": 0.6423981785774231, + "learning_rate": 0.0011276783592106291, + "loss": 1.6738, + "step": 3211 + }, + { + "epoch": 0.3388185654008439, + "grad_norm": 0.8222752809524536, + "learning_rate": 0.001127461434155356, + "loss": 1.678, + "step": 3212 + }, + { + "epoch": 0.33892405063291137, + "grad_norm": 0.6562021374702454, + "learning_rate": 0.001127244466803647, + "loss": 1.6587, + "step": 3213 + }, + { + "epoch": 0.3390295358649789, + "grad_norm": 0.7270435094833374, + "learning_rate": 0.0011270274571798147, + "loss": 1.7223, + "step": 3214 + }, + { + "epoch": 0.3391350210970464, + "grad_norm": 0.652885377407074, + "learning_rate": 0.0011268104053081755, + "loss": 1.6943, + "step": 3215 + }, + { + "epoch": 0.3392405063291139, + "grad_norm": 0.6049208641052246, + "learning_rate": 0.0011265933112130516, + "loss": 1.6992, + "step": 3216 + }, + { + "epoch": 0.33934599156118145, + "grad_norm": 0.6016462445259094, + "learning_rate": 0.0011263761749187693, + "loss": 1.6777, + "step": 3217 + }, + { + "epoch": 0.33945147679324894, + "grad_norm": 0.7842075824737549, + "learning_rate": 0.0011261589964496597, + "loss": 1.697, + "step": 3218 + }, + { + "epoch": 0.33955696202531643, + "grad_norm": 0.7335256934165955, + "learning_rate": 0.001125941775830059, + "loss": 1.6669, + "step": 3219 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.704109787940979, + "learning_rate": 0.0011257245130843077, + "loss": 1.6959, + "step": 3220 + }, + { + "epoch": 0.3397679324894515, + "grad_norm": 0.9144779443740845, + "learning_rate": 0.0011255072082367512, + "loss": 1.6898, + "step": 3221 + }, + { + "epoch": 0.33987341772151897, + "grad_norm": 0.5869356393814087, + "learning_rate": 0.0011252898613117394, + "loss": 1.6955, + "step": 3222 + }, + { + "epoch": 0.3399789029535865, + "grad_norm": 0.8090496063232422, + "learning_rate": 0.0011250724723336273, + "loss": 1.6934, + "step": 3223 + }, + { + "epoch": 0.340084388185654, + "grad_norm": 0.5956006050109863, + "learning_rate": 0.0011248550413267746, + "loss": 1.6577, + "step": 3224 + }, + { + "epoch": 0.3401898734177215, + "grad_norm": 0.7514613270759583, + "learning_rate": 0.001124637568315545, + "loss": 1.6881, + "step": 3225 + }, + { + "epoch": 0.34029535864978905, + "grad_norm": 0.6651980876922607, + "learning_rate": 0.001124420053324308, + "loss": 1.6615, + "step": 3226 + }, + { + "epoch": 0.34040084388185654, + "grad_norm": 0.762773871421814, + "learning_rate": 0.001124202496377437, + "loss": 1.6655, + "step": 3227 + }, + { + "epoch": 0.34050632911392403, + "grad_norm": 0.5247664451599121, + "learning_rate": 0.0011239848974993103, + "loss": 1.6598, + "step": 3228 + }, + { + "epoch": 0.3406118143459916, + "grad_norm": 0.7929210662841797, + "learning_rate": 0.0011237672567143107, + "loss": 1.7056, + "step": 3229 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.5377238988876343, + "learning_rate": 0.0011235495740468265, + "loss": 1.6986, + "step": 3230 + }, + { + "epoch": 0.34082278481012657, + "grad_norm": 0.8077965974807739, + "learning_rate": 0.00112333184952125, + "loss": 1.6694, + "step": 3231 + }, + { + "epoch": 0.3409282700421941, + "grad_norm": 0.6377952098846436, + "learning_rate": 0.001123114083161978, + "loss": 1.6672, + "step": 3232 + }, + { + "epoch": 0.3410337552742616, + "grad_norm": 0.6953635215759277, + "learning_rate": 0.0011228962749934123, + "loss": 1.6676, + "step": 3233 + }, + { + "epoch": 0.3411392405063291, + "grad_norm": 0.7408193945884705, + "learning_rate": 0.0011226784250399598, + "loss": 1.6548, + "step": 3234 + }, + { + "epoch": 0.34124472573839665, + "grad_norm": 0.6278428435325623, + "learning_rate": 0.0011224605333260312, + "loss": 1.6412, + "step": 3235 + }, + { + "epoch": 0.34135021097046414, + "grad_norm": 0.795434296131134, + "learning_rate": 0.0011222425998760428, + "loss": 1.6951, + "step": 3236 + }, + { + "epoch": 0.34145569620253163, + "grad_norm": 0.5883035063743591, + "learning_rate": 0.0011220246247144149, + "loss": 1.6521, + "step": 3237 + }, + { + "epoch": 0.3415611814345992, + "grad_norm": 0.7549896240234375, + "learning_rate": 0.0011218066078655725, + "loss": 1.6565, + "step": 3238 + }, + { + "epoch": 0.3416666666666667, + "grad_norm": 0.6349897980690002, + "learning_rate": 0.001121588549353946, + "loss": 1.6422, + "step": 3239 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.7609131932258606, + "learning_rate": 0.0011213704492039694, + "loss": 1.6848, + "step": 3240 + }, + { + "epoch": 0.3418776371308017, + "grad_norm": 0.5913417339324951, + "learning_rate": 0.0011211523074400823, + "loss": 1.6696, + "step": 3241 + }, + { + "epoch": 0.3419831223628692, + "grad_norm": 0.7747142314910889, + "learning_rate": 0.0011209341240867282, + "loss": 1.6941, + "step": 3242 + }, + { + "epoch": 0.3420886075949367, + "grad_norm": 0.5868082642555237, + "learning_rate": 0.001120715899168356, + "loss": 1.6602, + "step": 3243 + }, + { + "epoch": 0.3421940928270042, + "grad_norm": 0.6903421878814697, + "learning_rate": 0.0011204976327094187, + "loss": 1.6914, + "step": 3244 + }, + { + "epoch": 0.34229957805907174, + "grad_norm": 0.6468902826309204, + "learning_rate": 0.0011202793247343742, + "loss": 1.6587, + "step": 3245 + }, + { + "epoch": 0.34240506329113923, + "grad_norm": 0.772706925868988, + "learning_rate": 0.001120060975267685, + "loss": 1.6811, + "step": 3246 + }, + { + "epoch": 0.3425105485232067, + "grad_norm": 0.5659087300300598, + "learning_rate": 0.0011198425843338183, + "loss": 1.7036, + "step": 3247 + }, + { + "epoch": 0.3426160337552743, + "grad_norm": 0.7880945801734924, + "learning_rate": 0.0011196241519572457, + "loss": 1.7058, + "step": 3248 + }, + { + "epoch": 0.34272151898734177, + "grad_norm": 0.7922316789627075, + "learning_rate": 0.001119405678162444, + "loss": 1.6907, + "step": 3249 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.6090136170387268, + "learning_rate": 0.001119187162973894, + "loss": 1.6992, + "step": 3250 + }, + { + "epoch": 0.3429324894514768, + "grad_norm": 0.8077101111412048, + "learning_rate": 0.0011189686064160811, + "loss": 1.663, + "step": 3251 + }, + { + "epoch": 0.3430379746835443, + "grad_norm": 0.8344103097915649, + "learning_rate": 0.001118750008513496, + "loss": 1.7071, + "step": 3252 + }, + { + "epoch": 0.3431434599156118, + "grad_norm": 0.6485111117362976, + "learning_rate": 0.0011185313692906342, + "loss": 1.6981, + "step": 3253 + }, + { + "epoch": 0.34324894514767934, + "grad_norm": 0.6560565829277039, + "learning_rate": 0.0011183126887719945, + "loss": 1.6678, + "step": 3254 + }, + { + "epoch": 0.34335443037974683, + "grad_norm": 0.7697076797485352, + "learning_rate": 0.0011180939669820813, + "loss": 1.6599, + "step": 3255 + }, + { + "epoch": 0.3434599156118143, + "grad_norm": 0.6859826445579529, + "learning_rate": 0.001117875203945404, + "loss": 1.6422, + "step": 3256 + }, + { + "epoch": 0.3435654008438819, + "grad_norm": 0.5658156871795654, + "learning_rate": 0.0011176563996864754, + "loss": 1.7107, + "step": 3257 + }, + { + "epoch": 0.34367088607594937, + "grad_norm": 0.7007507681846619, + "learning_rate": 0.0011174375542298142, + "loss": 1.6639, + "step": 3258 + }, + { + "epoch": 0.34377637130801686, + "grad_norm": 0.5105016827583313, + "learning_rate": 0.0011172186675999425, + "loss": 1.6727, + "step": 3259 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.612175464630127, + "learning_rate": 0.001116999739821388, + "loss": 1.6656, + "step": 3260 + }, + { + "epoch": 0.3439873417721519, + "grad_norm": 0.5611022114753723, + "learning_rate": 0.0011167807709186828, + "loss": 1.7153, + "step": 3261 + }, + { + "epoch": 0.3440928270042194, + "grad_norm": 0.5902330279350281, + "learning_rate": 0.0011165617609163632, + "loss": 1.698, + "step": 3262 + }, + { + "epoch": 0.34419831223628694, + "grad_norm": 0.6272709965705872, + "learning_rate": 0.0011163427098389706, + "loss": 1.7032, + "step": 3263 + }, + { + "epoch": 0.34430379746835443, + "grad_norm": 0.6317716240882874, + "learning_rate": 0.0011161236177110504, + "loss": 1.6249, + "step": 3264 + }, + { + "epoch": 0.3444092827004219, + "grad_norm": 0.6003811955451965, + "learning_rate": 0.0011159044845571533, + "loss": 1.6628, + "step": 3265 + }, + { + "epoch": 0.3445147679324895, + "grad_norm": 0.7897863388061523, + "learning_rate": 0.0011156853104018342, + "loss": 1.7046, + "step": 3266 + }, + { + "epoch": 0.34462025316455697, + "grad_norm": 0.6431436538696289, + "learning_rate": 0.0011154660952696525, + "loss": 1.7164, + "step": 3267 + }, + { + "epoch": 0.34472573839662446, + "grad_norm": 0.7645360231399536, + "learning_rate": 0.0011152468391851724, + "loss": 1.7046, + "step": 3268 + }, + { + "epoch": 0.344831223628692, + "grad_norm": 0.6298180818557739, + "learning_rate": 0.0011150275421729628, + "loss": 1.7034, + "step": 3269 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.6355316638946533, + "learning_rate": 0.0011148082042575968, + "loss": 1.6943, + "step": 3270 + }, + { + "epoch": 0.345042194092827, + "grad_norm": 0.5899715423583984, + "learning_rate": 0.0011145888254636526, + "loss": 1.6661, + "step": 3271 + }, + { + "epoch": 0.34514767932489454, + "grad_norm": 0.790313720703125, + "learning_rate": 0.0011143694058157122, + "loss": 1.7266, + "step": 3272 + }, + { + "epoch": 0.34525316455696203, + "grad_norm": 0.6724966764450073, + "learning_rate": 0.0011141499453383632, + "loss": 1.6962, + "step": 3273 + }, + { + "epoch": 0.3453586497890295, + "grad_norm": 0.7721072435379028, + "learning_rate": 0.001113930444056197, + "loss": 1.6917, + "step": 3274 + }, + { + "epoch": 0.3454641350210971, + "grad_norm": 0.8581139445304871, + "learning_rate": 0.00111371090199381, + "loss": 1.7119, + "step": 3275 + }, + { + "epoch": 0.34556962025316457, + "grad_norm": 0.7884678840637207, + "learning_rate": 0.0011134913191758024, + "loss": 1.718, + "step": 3276 + }, + { + "epoch": 0.34567510548523206, + "grad_norm": 0.6040551662445068, + "learning_rate": 0.00111327169562678, + "loss": 1.6884, + "step": 3277 + }, + { + "epoch": 0.34578059071729955, + "grad_norm": 0.7433796525001526, + "learning_rate": 0.0011130520313713528, + "loss": 1.6454, + "step": 3278 + }, + { + "epoch": 0.3458860759493671, + "grad_norm": 0.5910035371780396, + "learning_rate": 0.0011128323264341352, + "loss": 1.6976, + "step": 3279 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.908137857913971, + "learning_rate": 0.0011126125808397461, + "loss": 1.6992, + "step": 3280 + }, + { + "epoch": 0.3460970464135021, + "grad_norm": 0.5895287394523621, + "learning_rate": 0.0011123927946128092, + "loss": 1.6753, + "step": 3281 + }, + { + "epoch": 0.34620253164556963, + "grad_norm": 0.8373340368270874, + "learning_rate": 0.0011121729677779526, + "loss": 1.6842, + "step": 3282 + }, + { + "epoch": 0.3463080168776371, + "grad_norm": 1.0780467987060547, + "learning_rate": 0.001111953100359809, + "loss": 1.6558, + "step": 3283 + }, + { + "epoch": 0.3464135021097046, + "grad_norm": 0.780758798122406, + "learning_rate": 0.0011117331923830157, + "loss": 1.7005, + "step": 3284 + }, + { + "epoch": 0.34651898734177217, + "grad_norm": 0.9008828401565552, + "learning_rate": 0.0011115132438722143, + "loss": 1.6875, + "step": 3285 + }, + { + "epoch": 0.34662447257383966, + "grad_norm": 0.869410514831543, + "learning_rate": 0.0011112932548520513, + "loss": 1.6983, + "step": 3286 + }, + { + "epoch": 0.34672995780590715, + "grad_norm": 0.6344162225723267, + "learning_rate": 0.0011110732253471777, + "loss": 1.7414, + "step": 3287 + }, + { + "epoch": 0.3468354430379747, + "grad_norm": 1.0526950359344482, + "learning_rate": 0.0011108531553822485, + "loss": 1.6477, + "step": 3288 + }, + { + "epoch": 0.3469409282700422, + "grad_norm": 0.6000604629516602, + "learning_rate": 0.001110633044981924, + "loss": 1.6614, + "step": 3289 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 1.0837315320968628, + "learning_rate": 0.0011104128941708683, + "loss": 1.6175, + "step": 3290 + }, + { + "epoch": 0.34715189873417723, + "grad_norm": 0.9421790242195129, + "learning_rate": 0.001110192702973751, + "loss": 1.6663, + "step": 3291 + }, + { + "epoch": 0.3472573839662447, + "grad_norm": 0.6058396697044373, + "learning_rate": 0.001109972471415245, + "loss": 1.6801, + "step": 3292 + }, + { + "epoch": 0.3473628691983122, + "grad_norm": 0.8480358123779297, + "learning_rate": 0.0011097521995200288, + "loss": 1.6839, + "step": 3293 + }, + { + "epoch": 0.34746835443037977, + "grad_norm": 0.6757047772407532, + "learning_rate": 0.0011095318873127844, + "loss": 1.6768, + "step": 3294 + }, + { + "epoch": 0.34757383966244726, + "grad_norm": 0.6877089142799377, + "learning_rate": 0.0011093115348181995, + "loss": 1.6771, + "step": 3295 + }, + { + "epoch": 0.34767932489451475, + "grad_norm": 0.6612163782119751, + "learning_rate": 0.0011090911420609654, + "loss": 1.723, + "step": 3296 + }, + { + "epoch": 0.3477848101265823, + "grad_norm": 0.59202641248703, + "learning_rate": 0.0011088707090657784, + "loss": 1.6915, + "step": 3297 + }, + { + "epoch": 0.3478902953586498, + "grad_norm": 0.688156008720398, + "learning_rate": 0.0011086502358573387, + "loss": 1.6642, + "step": 3298 + }, + { + "epoch": 0.3479957805907173, + "grad_norm": 0.5855370759963989, + "learning_rate": 0.0011084297224603517, + "loss": 1.7049, + "step": 3299 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.7226648926734924, + "learning_rate": 0.001108209168899527, + "loss": 1.6946, + "step": 3300 + }, + { + "epoch": 0.3482067510548523, + "grad_norm": 0.5388634204864502, + "learning_rate": 0.0011079885751995788, + "loss": 1.6911, + "step": 3301 + }, + { + "epoch": 0.3483122362869198, + "grad_norm": 0.7976944446563721, + "learning_rate": 0.0011077679413852258, + "loss": 1.6704, + "step": 3302 + }, + { + "epoch": 0.34841772151898737, + "grad_norm": 0.6071638464927673, + "learning_rate": 0.0011075472674811908, + "loss": 1.7127, + "step": 3303 + }, + { + "epoch": 0.34852320675105486, + "grad_norm": 0.7783722877502441, + "learning_rate": 0.0011073265535122016, + "loss": 1.6888, + "step": 3304 + }, + { + "epoch": 0.34862869198312235, + "grad_norm": 0.5483695268630981, + "learning_rate": 0.0011071057995029902, + "loss": 1.7071, + "step": 3305 + }, + { + "epoch": 0.3487341772151899, + "grad_norm": 0.6873095035552979, + "learning_rate": 0.0011068850054782933, + "loss": 1.6634, + "step": 3306 + }, + { + "epoch": 0.3488396624472574, + "grad_norm": 0.5426749587059021, + "learning_rate": 0.0011066641714628522, + "loss": 1.6772, + "step": 3307 + }, + { + "epoch": 0.3489451476793249, + "grad_norm": 0.6569273471832275, + "learning_rate": 0.001106443297481412, + "loss": 1.697, + "step": 3308 + }, + { + "epoch": 0.3490506329113924, + "grad_norm": 0.5811034440994263, + "learning_rate": 0.001106222383558723, + "loss": 1.6882, + "step": 3309 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.5510952472686768, + "learning_rate": 0.0011060014297195396, + "loss": 1.6625, + "step": 3310 + }, + { + "epoch": 0.3492616033755274, + "grad_norm": 0.6322005391120911, + "learning_rate": 0.0011057804359886209, + "loss": 1.6844, + "step": 3311 + }, + { + "epoch": 0.3493670886075949, + "grad_norm": 0.5485896468162537, + "learning_rate": 0.0011055594023907302, + "loss": 1.7134, + "step": 3312 + }, + { + "epoch": 0.34947257383966246, + "grad_norm": 0.6269224882125854, + "learning_rate": 0.0011053383289506354, + "loss": 1.6289, + "step": 3313 + }, + { + "epoch": 0.34957805907172995, + "grad_norm": 0.6490100026130676, + "learning_rate": 0.001105117215693109, + "loss": 1.7223, + "step": 3314 + }, + { + "epoch": 0.34968354430379744, + "grad_norm": 0.5771713256835938, + "learning_rate": 0.001104896062642928, + "loss": 1.6719, + "step": 3315 + }, + { + "epoch": 0.349789029535865, + "grad_norm": 0.6592031717300415, + "learning_rate": 0.001104674869824873, + "loss": 1.6598, + "step": 3316 + }, + { + "epoch": 0.3498945147679325, + "grad_norm": 0.5580374002456665, + "learning_rate": 0.0011044536372637307, + "loss": 1.7186, + "step": 3317 + }, + { + "epoch": 0.35, + "grad_norm": 0.6511110067367554, + "learning_rate": 0.001104232364984291, + "loss": 1.7229, + "step": 3318 + }, + { + "epoch": 0.3501054852320675, + "grad_norm": 0.675641655921936, + "learning_rate": 0.001104011053011348, + "loss": 1.6885, + "step": 3319 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.5697359442710876, + "learning_rate": 0.0011037897013697015, + "loss": 1.6839, + "step": 3320 + }, + { + "epoch": 0.3503164556962025, + "grad_norm": 0.6401320695877075, + "learning_rate": 0.0011035683100841548, + "loss": 1.6326, + "step": 3321 + }, + { + "epoch": 0.35042194092827006, + "grad_norm": 0.5424004197120667, + "learning_rate": 0.0011033468791795161, + "loss": 1.6615, + "step": 3322 + }, + { + "epoch": 0.35052742616033755, + "grad_norm": 0.6100664734840393, + "learning_rate": 0.0011031254086805973, + "loss": 1.7246, + "step": 3323 + }, + { + "epoch": 0.35063291139240504, + "grad_norm": 0.5427920818328857, + "learning_rate": 0.0011029038986122156, + "loss": 1.6591, + "step": 3324 + }, + { + "epoch": 0.3507383966244726, + "grad_norm": 0.6571317911148071, + "learning_rate": 0.0011026823489991924, + "loss": 1.7184, + "step": 3325 + }, + { + "epoch": 0.3508438818565401, + "grad_norm": 0.5753554105758667, + "learning_rate": 0.0011024607598663539, + "loss": 1.6538, + "step": 3326 + }, + { + "epoch": 0.3509493670886076, + "grad_norm": 0.6878660321235657, + "learning_rate": 0.001102239131238529, + "loss": 1.7002, + "step": 3327 + }, + { + "epoch": 0.3510548523206751, + "grad_norm": 0.6435391306877136, + "learning_rate": 0.0011020174631405533, + "loss": 1.7004, + "step": 3328 + }, + { + "epoch": 0.3511603375527426, + "grad_norm": 0.5949329137802124, + "learning_rate": 0.0011017957555972656, + "loss": 1.6933, + "step": 3329 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.6317486763000488, + "learning_rate": 0.0011015740086335092, + "loss": 1.6539, + "step": 3330 + }, + { + "epoch": 0.35137130801687766, + "grad_norm": 0.6161980032920837, + "learning_rate": 0.001101352222274132, + "loss": 1.6775, + "step": 3331 + }, + { + "epoch": 0.35147679324894515, + "grad_norm": 0.6087603569030762, + "learning_rate": 0.0011011303965439863, + "loss": 1.6702, + "step": 3332 + }, + { + "epoch": 0.35158227848101264, + "grad_norm": 0.7363982796669006, + "learning_rate": 0.0011009085314679287, + "loss": 1.7382, + "step": 3333 + }, + { + "epoch": 0.3516877637130802, + "grad_norm": 0.8647283911705017, + "learning_rate": 0.0011006866270708204, + "loss": 1.6642, + "step": 3334 + }, + { + "epoch": 0.3517932489451477, + "grad_norm": 0.6138545274734497, + "learning_rate": 0.0011004646833775269, + "loss": 1.6527, + "step": 3335 + }, + { + "epoch": 0.3518987341772152, + "grad_norm": 0.7101024985313416, + "learning_rate": 0.0011002427004129184, + "loss": 1.68, + "step": 3336 + }, + { + "epoch": 0.3520042194092827, + "grad_norm": 0.820637583732605, + "learning_rate": 0.0011000206782018683, + "loss": 1.6713, + "step": 3337 + }, + { + "epoch": 0.3521097046413502, + "grad_norm": 0.6565256118774414, + "learning_rate": 0.001099798616769256, + "loss": 1.6747, + "step": 3338 + }, + { + "epoch": 0.3522151898734177, + "grad_norm": 0.551174521446228, + "learning_rate": 0.0010995765161399646, + "loss": 1.7001, + "step": 3339 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.5880438685417175, + "learning_rate": 0.0010993543763388814, + "loss": 1.6646, + "step": 3340 + }, + { + "epoch": 0.35242616033755275, + "grad_norm": 0.5347265005111694, + "learning_rate": 0.0010991321973908982, + "loss": 1.6462, + "step": 3341 + }, + { + "epoch": 0.35253164556962024, + "grad_norm": 0.599676787853241, + "learning_rate": 0.0010989099793209112, + "loss": 1.6959, + "step": 3342 + }, + { + "epoch": 0.35263713080168774, + "grad_norm": 0.615686297416687, + "learning_rate": 0.0010986877221538214, + "loss": 1.6602, + "step": 3343 + }, + { + "epoch": 0.3527426160337553, + "grad_norm": 0.6066254377365112, + "learning_rate": 0.0010984654259145335, + "loss": 1.6997, + "step": 3344 + }, + { + "epoch": 0.3528481012658228, + "grad_norm": 0.5946992635726929, + "learning_rate": 0.0010982430906279572, + "loss": 1.702, + "step": 3345 + }, + { + "epoch": 0.35295358649789027, + "grad_norm": 0.8268540501594543, + "learning_rate": 0.001098020716319006, + "loss": 1.6694, + "step": 3346 + }, + { + "epoch": 0.3530590717299578, + "grad_norm": 0.6425325274467468, + "learning_rate": 0.0010977983030125982, + "loss": 1.6872, + "step": 3347 + }, + { + "epoch": 0.3531645569620253, + "grad_norm": 0.6265702843666077, + "learning_rate": 0.001097575850733656, + "loss": 1.6659, + "step": 3348 + }, + { + "epoch": 0.3532700421940928, + "grad_norm": 0.7464581727981567, + "learning_rate": 0.001097353359507107, + "loss": 1.6368, + "step": 3349 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.5697673559188843, + "learning_rate": 0.0010971308293578814, + "loss": 1.6654, + "step": 3350 + }, + { + "epoch": 0.35348101265822784, + "grad_norm": 0.6225534081459045, + "learning_rate": 0.0010969082603109158, + "loss": 1.6644, + "step": 3351 + }, + { + "epoch": 0.35358649789029534, + "grad_norm": 0.6579681038856506, + "learning_rate": 0.00109668565239115, + "loss": 1.7048, + "step": 3352 + }, + { + "epoch": 0.3536919831223629, + "grad_norm": 0.7381942272186279, + "learning_rate": 0.001096463005623528, + "loss": 1.67, + "step": 3353 + }, + { + "epoch": 0.3537974683544304, + "grad_norm": 0.5857477188110352, + "learning_rate": 0.0010962403200329984, + "loss": 1.6363, + "step": 3354 + }, + { + "epoch": 0.35390295358649787, + "grad_norm": 0.6368505954742432, + "learning_rate": 0.0010960175956445145, + "loss": 1.6947, + "step": 3355 + }, + { + "epoch": 0.3540084388185654, + "grad_norm": 0.6458939909934998, + "learning_rate": 0.0010957948324830337, + "loss": 1.6641, + "step": 3356 + }, + { + "epoch": 0.3541139240506329, + "grad_norm": 0.6418812274932861, + "learning_rate": 0.0010955720305735176, + "loss": 1.6878, + "step": 3357 + }, + { + "epoch": 0.3542194092827004, + "grad_norm": 0.7471024394035339, + "learning_rate": 0.0010953491899409321, + "loss": 1.6496, + "step": 3358 + }, + { + "epoch": 0.35432489451476795, + "grad_norm": 0.5808180570602417, + "learning_rate": 0.001095126310610248, + "loss": 1.6752, + "step": 3359 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.6854507923126221, + "learning_rate": 0.0010949033926064397, + "loss": 1.6997, + "step": 3360 + }, + { + "epoch": 0.35453586497890294, + "grad_norm": 0.5740010142326355, + "learning_rate": 0.0010946804359544867, + "loss": 1.6797, + "step": 3361 + }, + { + "epoch": 0.3546413502109705, + "grad_norm": 0.7734246850013733, + "learning_rate": 0.001094457440679372, + "loss": 1.6992, + "step": 3362 + }, + { + "epoch": 0.354746835443038, + "grad_norm": 0.5584022998809814, + "learning_rate": 0.0010942344068060833, + "loss": 1.6844, + "step": 3363 + }, + { + "epoch": 0.35485232067510547, + "grad_norm": 0.6507749557495117, + "learning_rate": 0.001094011334359613, + "loss": 1.6816, + "step": 3364 + }, + { + "epoch": 0.354957805907173, + "grad_norm": 0.5218636393547058, + "learning_rate": 0.0010937882233649572, + "loss": 1.6386, + "step": 3365 + }, + { + "epoch": 0.3550632911392405, + "grad_norm": 0.7073304057121277, + "learning_rate": 0.0010935650738471167, + "loss": 1.674, + "step": 3366 + }, + { + "epoch": 0.355168776371308, + "grad_norm": 0.5180399417877197, + "learning_rate": 0.0010933418858310965, + "loss": 1.6856, + "step": 3367 + }, + { + "epoch": 0.35527426160337555, + "grad_norm": 0.775424599647522, + "learning_rate": 0.0010931186593419059, + "loss": 1.6837, + "step": 3368 + }, + { + "epoch": 0.35537974683544304, + "grad_norm": 0.5812028050422668, + "learning_rate": 0.0010928953944045585, + "loss": 1.7107, + "step": 3369 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.745546281337738, + "learning_rate": 0.0010926720910440725, + "loss": 1.6783, + "step": 3370 + }, + { + "epoch": 0.3555907172995781, + "grad_norm": 0.6453139185905457, + "learning_rate": 0.00109244874928547, + "loss": 1.6849, + "step": 3371 + }, + { + "epoch": 0.3556962025316456, + "grad_norm": 0.5600695610046387, + "learning_rate": 0.0010922253691537773, + "loss": 1.6528, + "step": 3372 + }, + { + "epoch": 0.35580168776371307, + "grad_norm": 0.66029953956604, + "learning_rate": 0.0010920019506740256, + "loss": 1.6607, + "step": 3373 + }, + { + "epoch": 0.35590717299578056, + "grad_norm": 0.6313539147377014, + "learning_rate": 0.00109177849387125, + "loss": 1.6859, + "step": 3374 + }, + { + "epoch": 0.3560126582278481, + "grad_norm": 0.7111948132514954, + "learning_rate": 0.00109155499877049, + "loss": 1.6935, + "step": 3375 + }, + { + "epoch": 0.3561181434599156, + "grad_norm": 0.5844467282295227, + "learning_rate": 0.001091331465396789, + "loss": 1.6869, + "step": 3376 + }, + { + "epoch": 0.3562236286919831, + "grad_norm": 0.7736206650733948, + "learning_rate": 0.0010911078937751954, + "loss": 1.6706, + "step": 3377 + }, + { + "epoch": 0.35632911392405064, + "grad_norm": 0.7647660970687866, + "learning_rate": 0.0010908842839307614, + "loss": 1.6681, + "step": 3378 + }, + { + "epoch": 0.35643459915611814, + "grad_norm": 0.6933203935623169, + "learning_rate": 0.0010906606358885437, + "loss": 1.6815, + "step": 3379 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.6792889833450317, + "learning_rate": 0.001090436949673603, + "loss": 1.6784, + "step": 3380 + }, + { + "epoch": 0.3566455696202532, + "grad_norm": 0.6504720449447632, + "learning_rate": 0.0010902132253110043, + "loss": 1.6631, + "step": 3381 + }, + { + "epoch": 0.35675105485232067, + "grad_norm": 0.5188521146774292, + "learning_rate": 0.0010899894628258174, + "loss": 1.6196, + "step": 3382 + }, + { + "epoch": 0.35685654008438816, + "grad_norm": 0.7297681570053101, + "learning_rate": 0.001089765662243116, + "loss": 1.7056, + "step": 3383 + }, + { + "epoch": 0.3569620253164557, + "grad_norm": 0.6405835747718811, + "learning_rate": 0.0010895418235879776, + "loss": 1.6862, + "step": 3384 + }, + { + "epoch": 0.3570675105485232, + "grad_norm": 0.6113638877868652, + "learning_rate": 0.0010893179468854848, + "loss": 1.6221, + "step": 3385 + }, + { + "epoch": 0.3571729957805907, + "grad_norm": 0.5842270851135254, + "learning_rate": 0.0010890940321607245, + "loss": 1.6439, + "step": 3386 + }, + { + "epoch": 0.35727848101265824, + "grad_norm": 0.5869910717010498, + "learning_rate": 0.0010888700794387867, + "loss": 1.6984, + "step": 3387 + }, + { + "epoch": 0.35738396624472574, + "grad_norm": 0.6585204601287842, + "learning_rate": 0.0010886460887447667, + "loss": 1.6957, + "step": 3388 + }, + { + "epoch": 0.35748945147679323, + "grad_norm": 0.5251168608665466, + "learning_rate": 0.0010884220601037637, + "loss": 1.7132, + "step": 3389 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.6402048468589783, + "learning_rate": 0.0010881979935408815, + "loss": 1.652, + "step": 3390 + }, + { + "epoch": 0.35770042194092827, + "grad_norm": 0.5718497037887573, + "learning_rate": 0.0010879738890812278, + "loss": 1.676, + "step": 3391 + }, + { + "epoch": 0.35780590717299576, + "grad_norm": 0.6759741306304932, + "learning_rate": 0.0010877497467499146, + "loss": 1.6531, + "step": 3392 + }, + { + "epoch": 0.3579113924050633, + "grad_norm": 0.5969752669334412, + "learning_rate": 0.001087525566572058, + "loss": 1.6625, + "step": 3393 + }, + { + "epoch": 0.3580168776371308, + "grad_norm": 0.6365604400634766, + "learning_rate": 0.0010873013485727782, + "loss": 1.6486, + "step": 3394 + }, + { + "epoch": 0.3581223628691983, + "grad_norm": 0.6450257897377014, + "learning_rate": 0.001087077092777201, + "loss": 1.6923, + "step": 3395 + }, + { + "epoch": 0.35822784810126584, + "grad_norm": 0.6703571677207947, + "learning_rate": 0.0010868527992104545, + "loss": 1.6853, + "step": 3396 + }, + { + "epoch": 0.35833333333333334, + "grad_norm": 0.7112861275672913, + "learning_rate": 0.001086628467897672, + "loss": 1.6681, + "step": 3397 + }, + { + "epoch": 0.35843881856540083, + "grad_norm": 0.8983025550842285, + "learning_rate": 0.0010864040988639912, + "loss": 1.6818, + "step": 3398 + }, + { + "epoch": 0.3585443037974684, + "grad_norm": 0.6208280920982361, + "learning_rate": 0.0010861796921345537, + "loss": 1.7189, + "step": 3399 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.8209718465805054, + "learning_rate": 0.0010859552477345052, + "loss": 1.7362, + "step": 3400 + }, + { + "epoch": 0.35875527426160336, + "grad_norm": 0.697842001914978, + "learning_rate": 0.0010857307656889962, + "loss": 1.6922, + "step": 3401 + }, + { + "epoch": 0.3588607594936709, + "grad_norm": 0.6964836120605469, + "learning_rate": 0.0010855062460231807, + "loss": 1.6645, + "step": 3402 + }, + { + "epoch": 0.3589662447257384, + "grad_norm": 0.9451407194137573, + "learning_rate": 0.0010852816887622174, + "loss": 1.7172, + "step": 3403 + }, + { + "epoch": 0.3590717299578059, + "grad_norm": 0.8790950775146484, + "learning_rate": 0.0010850570939312687, + "loss": 1.6853, + "step": 3404 + }, + { + "epoch": 0.35917721518987344, + "grad_norm": 0.5889505743980408, + "learning_rate": 0.0010848324615555024, + "loss": 1.6698, + "step": 3405 + }, + { + "epoch": 0.35928270042194094, + "grad_norm": 0.6251682043075562, + "learning_rate": 0.0010846077916600888, + "loss": 1.6957, + "step": 3406 + }, + { + "epoch": 0.35938818565400843, + "grad_norm": 0.5715717077255249, + "learning_rate": 0.0010843830842702036, + "loss": 1.6597, + "step": 3407 + }, + { + "epoch": 0.3594936708860759, + "grad_norm": 0.6012524962425232, + "learning_rate": 0.0010841583394110266, + "loss": 1.676, + "step": 3408 + }, + { + "epoch": 0.35959915611814347, + "grad_norm": 0.5641628503799438, + "learning_rate": 0.0010839335571077415, + "loss": 1.6832, + "step": 3409 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6668037176132202, + "learning_rate": 0.001083708737385536, + "loss": 1.6485, + "step": 3410 + }, + { + "epoch": 0.35981012658227846, + "grad_norm": 0.6823651194572449, + "learning_rate": 0.0010834838802696023, + "loss": 1.6752, + "step": 3411 + }, + { + "epoch": 0.359915611814346, + "grad_norm": 0.6253612637519836, + "learning_rate": 0.0010832589857851373, + "loss": 1.6594, + "step": 3412 + }, + { + "epoch": 0.3600210970464135, + "grad_norm": 0.5997772216796875, + "learning_rate": 0.001083034053957341, + "loss": 1.6666, + "step": 3413 + }, + { + "epoch": 0.360126582278481, + "grad_norm": 0.605349600315094, + "learning_rate": 0.0010828090848114182, + "loss": 1.6459, + "step": 3414 + }, + { + "epoch": 0.36023206751054854, + "grad_norm": 0.5523426532745361, + "learning_rate": 0.001082584078372578, + "loss": 1.6477, + "step": 3415 + }, + { + "epoch": 0.36033755274261603, + "grad_norm": 0.6784523725509644, + "learning_rate": 0.0010823590346660335, + "loss": 1.6698, + "step": 3416 + }, + { + "epoch": 0.3604430379746835, + "grad_norm": 0.572284996509552, + "learning_rate": 0.0010821339537170015, + "loss": 1.6629, + "step": 3417 + }, + { + "epoch": 0.36054852320675107, + "grad_norm": 0.8081411719322205, + "learning_rate": 0.0010819088355507043, + "loss": 1.6497, + "step": 3418 + }, + { + "epoch": 0.36065400843881856, + "grad_norm": 0.7165445685386658, + "learning_rate": 0.0010816836801923666, + "loss": 1.6466, + "step": 3419 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5836054682731628, + "learning_rate": 0.0010814584876672187, + "loss": 1.6607, + "step": 3420 + }, + { + "epoch": 0.3608649789029536, + "grad_norm": 0.5796100497245789, + "learning_rate": 0.0010812332580004947, + "loss": 1.6836, + "step": 3421 + }, + { + "epoch": 0.3609704641350211, + "grad_norm": 0.5603018403053284, + "learning_rate": 0.0010810079912174323, + "loss": 1.7121, + "step": 3422 + }, + { + "epoch": 0.3610759493670886, + "grad_norm": 0.6592555046081543, + "learning_rate": 0.001080782687343274, + "loss": 1.6777, + "step": 3423 + }, + { + "epoch": 0.36118143459915614, + "grad_norm": 0.6485758423805237, + "learning_rate": 0.0010805573464032659, + "loss": 1.6942, + "step": 3424 + }, + { + "epoch": 0.36128691983122363, + "grad_norm": 0.5645560026168823, + "learning_rate": 0.0010803319684226593, + "loss": 1.6903, + "step": 3425 + }, + { + "epoch": 0.3613924050632911, + "grad_norm": 0.6846600770950317, + "learning_rate": 0.001080106553426708, + "loss": 1.6446, + "step": 3426 + }, + { + "epoch": 0.36149789029535867, + "grad_norm": 0.5995133519172668, + "learning_rate": 0.0010798811014406716, + "loss": 1.6859, + "step": 3427 + }, + { + "epoch": 0.36160337552742616, + "grad_norm": 0.5807026028633118, + "learning_rate": 0.0010796556124898127, + "loss": 1.6521, + "step": 3428 + }, + { + "epoch": 0.36170886075949366, + "grad_norm": 0.6412133574485779, + "learning_rate": 0.0010794300865993988, + "loss": 1.6535, + "step": 3429 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.553377628326416, + "learning_rate": 0.0010792045237947008, + "loss": 1.6432, + "step": 3430 + }, + { + "epoch": 0.3619198312236287, + "grad_norm": 0.7258496880531311, + "learning_rate": 0.0010789789241009945, + "loss": 1.675, + "step": 3431 + }, + { + "epoch": 0.3620253164556962, + "grad_norm": 0.6347574591636658, + "learning_rate": 0.0010787532875435593, + "loss": 1.6576, + "step": 3432 + }, + { + "epoch": 0.36213080168776374, + "grad_norm": 0.6064067482948303, + "learning_rate": 0.0010785276141476786, + "loss": 1.6722, + "step": 3433 + }, + { + "epoch": 0.36223628691983123, + "grad_norm": 0.5964642763137817, + "learning_rate": 0.001078301903938641, + "loss": 1.6529, + "step": 3434 + }, + { + "epoch": 0.3623417721518987, + "grad_norm": 0.6017929315567017, + "learning_rate": 0.0010780761569417377, + "loss": 1.6884, + "step": 3435 + }, + { + "epoch": 0.36244725738396627, + "grad_norm": 0.7545732855796814, + "learning_rate": 0.0010778503731822652, + "loss": 1.6568, + "step": 3436 + }, + { + "epoch": 0.36255274261603376, + "grad_norm": 0.5441755652427673, + "learning_rate": 0.0010776245526855235, + "loss": 1.7146, + "step": 3437 + }, + { + "epoch": 0.36265822784810126, + "grad_norm": 0.6409702301025391, + "learning_rate": 0.0010773986954768172, + "loss": 1.6931, + "step": 3438 + }, + { + "epoch": 0.3627637130801688, + "grad_norm": 0.5680525302886963, + "learning_rate": 0.0010771728015814544, + "loss": 1.6454, + "step": 3439 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.5640722513198853, + "learning_rate": 0.0010769468710247478, + "loss": 1.6629, + "step": 3440 + }, + { + "epoch": 0.3629746835443038, + "grad_norm": 0.5148028135299683, + "learning_rate": 0.0010767209038320138, + "loss": 1.6395, + "step": 3441 + }, + { + "epoch": 0.3630801687763713, + "grad_norm": 0.5966982245445251, + "learning_rate": 0.0010764949000285735, + "loss": 1.6746, + "step": 3442 + }, + { + "epoch": 0.36318565400843883, + "grad_norm": 0.629584789276123, + "learning_rate": 0.0010762688596397515, + "loss": 1.6953, + "step": 3443 + }, + { + "epoch": 0.3632911392405063, + "grad_norm": 0.5726338624954224, + "learning_rate": 0.001076042782690877, + "loss": 1.6387, + "step": 3444 + }, + { + "epoch": 0.3633966244725738, + "grad_norm": 0.6207457780838013, + "learning_rate": 0.001075816669207283, + "loss": 1.6984, + "step": 3445 + }, + { + "epoch": 0.36350210970464136, + "grad_norm": 0.6538130044937134, + "learning_rate": 0.0010755905192143063, + "loss": 1.7065, + "step": 3446 + }, + { + "epoch": 0.36360759493670886, + "grad_norm": 0.5081515908241272, + "learning_rate": 0.0010753643327372886, + "loss": 1.644, + "step": 3447 + }, + { + "epoch": 0.36371308016877635, + "grad_norm": 0.6567127704620361, + "learning_rate": 0.0010751381098015747, + "loss": 1.6717, + "step": 3448 + }, + { + "epoch": 0.3638185654008439, + "grad_norm": 0.6239334940910339, + "learning_rate": 0.0010749118504325146, + "loss": 1.6609, + "step": 3449 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.5595077276229858, + "learning_rate": 0.0010746855546554612, + "loss": 1.6377, + "step": 3450 + }, + { + "epoch": 0.3640295358649789, + "grad_norm": 0.7557843923568726, + "learning_rate": 0.0010744592224957727, + "loss": 1.6631, + "step": 3451 + }, + { + "epoch": 0.36413502109704643, + "grad_norm": 0.6342592239379883, + "learning_rate": 0.00107423285397881, + "loss": 1.6242, + "step": 3452 + }, + { + "epoch": 0.3642405063291139, + "grad_norm": 0.6059820055961609, + "learning_rate": 0.0010740064491299398, + "loss": 1.6759, + "step": 3453 + }, + { + "epoch": 0.3643459915611814, + "grad_norm": 0.790637195110321, + "learning_rate": 0.0010737800079745308, + "loss": 1.6488, + "step": 3454 + }, + { + "epoch": 0.36445147679324896, + "grad_norm": 0.599638819694519, + "learning_rate": 0.0010735535305379576, + "loss": 1.7085, + "step": 3455 + }, + { + "epoch": 0.36455696202531646, + "grad_norm": 0.7326543927192688, + "learning_rate": 0.001073327016845598, + "loss": 1.6597, + "step": 3456 + }, + { + "epoch": 0.36466244725738395, + "grad_norm": 0.6175320148468018, + "learning_rate": 0.001073100466922834, + "loss": 1.665, + "step": 3457 + }, + { + "epoch": 0.3647679324894515, + "grad_norm": 0.8615133762359619, + "learning_rate": 0.0010728738807950515, + "loss": 1.6901, + "step": 3458 + }, + { + "epoch": 0.364873417721519, + "grad_norm": 0.9831819534301758, + "learning_rate": 0.0010726472584876403, + "loss": 1.6386, + "step": 3459 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.6145330667495728, + "learning_rate": 0.0010724206000259954, + "loss": 1.6767, + "step": 3460 + }, + { + "epoch": 0.36508438818565403, + "grad_norm": 0.7039278745651245, + "learning_rate": 0.0010721939054355145, + "loss": 1.6827, + "step": 3461 + }, + { + "epoch": 0.3651898734177215, + "grad_norm": 0.6447168588638306, + "learning_rate": 0.0010719671747415995, + "loss": 1.6691, + "step": 3462 + }, + { + "epoch": 0.365295358649789, + "grad_norm": 0.645064115524292, + "learning_rate": 0.0010717404079696575, + "loss": 1.6814, + "step": 3463 + }, + { + "epoch": 0.36540084388185656, + "grad_norm": 0.6706565618515015, + "learning_rate": 0.0010715136051450982, + "loss": 1.7041, + "step": 3464 + }, + { + "epoch": 0.36550632911392406, + "grad_norm": 0.6089961528778076, + "learning_rate": 0.0010712867662933364, + "loss": 1.6768, + "step": 3465 + }, + { + "epoch": 0.36561181434599155, + "grad_norm": 0.5834619998931885, + "learning_rate": 0.0010710598914397901, + "loss": 1.6448, + "step": 3466 + }, + { + "epoch": 0.3657172995780591, + "grad_norm": 0.7072096467018127, + "learning_rate": 0.0010708329806098822, + "loss": 1.7049, + "step": 3467 + }, + { + "epoch": 0.3658227848101266, + "grad_norm": 0.5880206227302551, + "learning_rate": 0.001070606033829039, + "loss": 1.6316, + "step": 3468 + }, + { + "epoch": 0.3659282700421941, + "grad_norm": 0.7023890018463135, + "learning_rate": 0.001070379051122691, + "loss": 1.7233, + "step": 3469 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.7559579014778137, + "learning_rate": 0.0010701520325162727, + "loss": 1.6866, + "step": 3470 + }, + { + "epoch": 0.3661392405063291, + "grad_norm": 0.6024960875511169, + "learning_rate": 0.001069924978035223, + "loss": 1.6682, + "step": 3471 + }, + { + "epoch": 0.3662447257383966, + "grad_norm": 0.6271214485168457, + "learning_rate": 0.0010696978877049838, + "loss": 1.7001, + "step": 3472 + }, + { + "epoch": 0.3663502109704641, + "grad_norm": 0.6319669485092163, + "learning_rate": 0.0010694707615510023, + "loss": 1.7265, + "step": 3473 + }, + { + "epoch": 0.36645569620253166, + "grad_norm": 0.6744113564491272, + "learning_rate": 0.0010692435995987293, + "loss": 1.6958, + "step": 3474 + }, + { + "epoch": 0.36656118143459915, + "grad_norm": 0.9264358878135681, + "learning_rate": 0.0010690164018736187, + "loss": 1.7274, + "step": 3475 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 0.7918448448181152, + "learning_rate": 0.0010687891684011295, + "loss": 1.6662, + "step": 3476 + }, + { + "epoch": 0.3667721518987342, + "grad_norm": 0.7851439714431763, + "learning_rate": 0.0010685618992067243, + "loss": 1.6774, + "step": 3477 + }, + { + "epoch": 0.3668776371308017, + "grad_norm": 1.1098456382751465, + "learning_rate": 0.00106833459431587, + "loss": 1.6378, + "step": 3478 + }, + { + "epoch": 0.3669831223628692, + "grad_norm": 0.5385888814926147, + "learning_rate": 0.001068107253754037, + "loss": 1.676, + "step": 3479 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 1.0384759902954102, + "learning_rate": 0.0010678798775467001, + "loss": 1.6578, + "step": 3480 + }, + { + "epoch": 0.3671940928270042, + "grad_norm": 0.5675515532493591, + "learning_rate": 0.0010676524657193378, + "loss": 1.6866, + "step": 3481 + }, + { + "epoch": 0.3672995780590717, + "grad_norm": 0.837575376033783, + "learning_rate": 0.0010674250182974325, + "loss": 1.6924, + "step": 3482 + }, + { + "epoch": 0.36740506329113926, + "grad_norm": 0.7996034026145935, + "learning_rate": 0.0010671975353064712, + "loss": 1.6517, + "step": 3483 + }, + { + "epoch": 0.36751054852320675, + "grad_norm": 0.7137848138809204, + "learning_rate": 0.0010669700167719443, + "loss": 1.7048, + "step": 3484 + }, + { + "epoch": 0.36761603375527424, + "grad_norm": 0.6604402661323547, + "learning_rate": 0.0010667424627193469, + "loss": 1.6773, + "step": 3485 + }, + { + "epoch": 0.3677215189873418, + "grad_norm": 0.8441234230995178, + "learning_rate": 0.0010665148731741768, + "loss": 1.667, + "step": 3486 + }, + { + "epoch": 0.3678270042194093, + "grad_norm": 0.6561861038208008, + "learning_rate": 0.0010662872481619367, + "loss": 1.659, + "step": 3487 + }, + { + "epoch": 0.3679324894514768, + "grad_norm": 0.9702469706535339, + "learning_rate": 0.0010660595877081335, + "loss": 1.6762, + "step": 3488 + }, + { + "epoch": 0.3680379746835443, + "grad_norm": 0.6918792128562927, + "learning_rate": 0.0010658318918382774, + "loss": 1.6724, + "step": 3489 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.713769257068634, + "learning_rate": 0.0010656041605778832, + "loss": 1.6814, + "step": 3490 + }, + { + "epoch": 0.3682489451476793, + "grad_norm": 0.7468788623809814, + "learning_rate": 0.0010653763939524688, + "loss": 1.6737, + "step": 3491 + }, + { + "epoch": 0.36835443037974686, + "grad_norm": 0.5651984810829163, + "learning_rate": 0.0010651485919875568, + "loss": 1.6596, + "step": 3492 + }, + { + "epoch": 0.36845991561181435, + "grad_norm": 0.8542428016662598, + "learning_rate": 0.0010649207547086738, + "loss": 1.6433, + "step": 3493 + }, + { + "epoch": 0.36856540084388184, + "grad_norm": 0.7597643733024597, + "learning_rate": 0.0010646928821413499, + "loss": 1.7, + "step": 3494 + }, + { + "epoch": 0.3686708860759494, + "grad_norm": 0.6111069917678833, + "learning_rate": 0.0010644649743111192, + "loss": 1.6693, + "step": 3495 + }, + { + "epoch": 0.3687763713080169, + "grad_norm": 0.5827228426933289, + "learning_rate": 0.0010642370312435201, + "loss": 1.6464, + "step": 3496 + }, + { + "epoch": 0.3688818565400844, + "grad_norm": 0.6087369322776794, + "learning_rate": 0.0010640090529640948, + "loss": 1.6839, + "step": 3497 + }, + { + "epoch": 0.3689873417721519, + "grad_norm": 0.7358559966087341, + "learning_rate": 0.0010637810394983893, + "loss": 1.6267, + "step": 3498 + }, + { + "epoch": 0.3690928270042194, + "grad_norm": 0.7549495100975037, + "learning_rate": 0.0010635529908719537, + "loss": 1.6489, + "step": 3499 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.6238910555839539, + "learning_rate": 0.001063324907110342, + "loss": 1.6198, + "step": 3500 + }, + { + "epoch": 0.36930379746835446, + "grad_norm": 0.8136535882949829, + "learning_rate": 0.001063096788239112, + "loss": 1.6426, + "step": 3501 + }, + { + "epoch": 0.36940928270042195, + "grad_norm": 0.6939119100570679, + "learning_rate": 0.0010628686342838253, + "loss": 1.6767, + "step": 3502 + }, + { + "epoch": 0.36951476793248944, + "grad_norm": 0.7715752124786377, + "learning_rate": 0.0010626404452700486, + "loss": 1.6969, + "step": 3503 + }, + { + "epoch": 0.369620253164557, + "grad_norm": 0.7183017134666443, + "learning_rate": 0.0010624122212233506, + "loss": 1.7013, + "step": 3504 + }, + { + "epoch": 0.3697257383966245, + "grad_norm": 0.940911054611206, + "learning_rate": 0.0010621839621693056, + "loss": 1.653, + "step": 3505 + }, + { + "epoch": 0.369831223628692, + "grad_norm": 0.6650838255882263, + "learning_rate": 0.0010619556681334909, + "loss": 1.683, + "step": 3506 + }, + { + "epoch": 0.36993670886075947, + "grad_norm": 0.6441161632537842, + "learning_rate": 0.001061727339141488, + "loss": 1.6994, + "step": 3507 + }, + { + "epoch": 0.370042194092827, + "grad_norm": 0.8055761456489563, + "learning_rate": 0.0010614989752188823, + "loss": 1.6486, + "step": 3508 + }, + { + "epoch": 0.3701476793248945, + "grad_norm": 0.6043726801872253, + "learning_rate": 0.0010612705763912635, + "loss": 1.6795, + "step": 3509 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.7772912979125977, + "learning_rate": 0.0010610421426842241, + "loss": 1.6786, + "step": 3510 + }, + { + "epoch": 0.37035864978902955, + "grad_norm": 0.5628781914710999, + "learning_rate": 0.0010608136741233618, + "loss": 1.686, + "step": 3511 + }, + { + "epoch": 0.37046413502109704, + "grad_norm": 0.7559870481491089, + "learning_rate": 0.0010605851707342774, + "loss": 1.6619, + "step": 3512 + }, + { + "epoch": 0.37056962025316453, + "grad_norm": 0.5402347445487976, + "learning_rate": 0.0010603566325425758, + "loss": 1.6586, + "step": 3513 + }, + { + "epoch": 0.3706751054852321, + "grad_norm": 0.7563749551773071, + "learning_rate": 0.001060128059573866, + "loss": 1.6674, + "step": 3514 + }, + { + "epoch": 0.3707805907172996, + "grad_norm": 0.536383867263794, + "learning_rate": 0.0010598994518537608, + "loss": 1.6639, + "step": 3515 + }, + { + "epoch": 0.37088607594936707, + "grad_norm": 0.7744184732437134, + "learning_rate": 0.0010596708094078766, + "loss": 1.6989, + "step": 3516 + }, + { + "epoch": 0.3709915611814346, + "grad_norm": 0.5181053280830383, + "learning_rate": 0.0010594421322618341, + "loss": 1.6816, + "step": 3517 + }, + { + "epoch": 0.3710970464135021, + "grad_norm": 0.712695300579071, + "learning_rate": 0.0010592134204412578, + "loss": 1.6603, + "step": 3518 + }, + { + "epoch": 0.3712025316455696, + "grad_norm": 0.535637378692627, + "learning_rate": 0.0010589846739717755, + "loss": 1.687, + "step": 3519 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.6413073539733887, + "learning_rate": 0.00105875589287902, + "loss": 1.6866, + "step": 3520 + }, + { + "epoch": 0.37141350210970464, + "grad_norm": 0.5915167331695557, + "learning_rate": 0.001058527077188627, + "loss": 1.6859, + "step": 3521 + }, + { + "epoch": 0.37151898734177213, + "grad_norm": 0.6945158839225769, + "learning_rate": 0.001058298226926237, + "loss": 1.6689, + "step": 3522 + }, + { + "epoch": 0.3716244725738397, + "grad_norm": 0.5383625030517578, + "learning_rate": 0.0010580693421174928, + "loss": 1.6596, + "step": 3523 + }, + { + "epoch": 0.3717299578059072, + "grad_norm": 0.6478634476661682, + "learning_rate": 0.0010578404227880429, + "loss": 1.633, + "step": 3524 + }, + { + "epoch": 0.37183544303797467, + "grad_norm": 0.5740054249763489, + "learning_rate": 0.0010576114689635383, + "loss": 1.7089, + "step": 3525 + }, + { + "epoch": 0.3719409282700422, + "grad_norm": 0.6226447820663452, + "learning_rate": 0.0010573824806696351, + "loss": 1.6906, + "step": 3526 + }, + { + "epoch": 0.3720464135021097, + "grad_norm": 0.6168699264526367, + "learning_rate": 0.001057153457931992, + "loss": 1.6653, + "step": 3527 + }, + { + "epoch": 0.3721518987341772, + "grad_norm": 0.6962388157844543, + "learning_rate": 0.0010569244007762723, + "loss": 1.711, + "step": 3528 + }, + { + "epoch": 0.37225738396624475, + "grad_norm": 0.598470151424408, + "learning_rate": 0.0010566953092281432, + "loss": 1.645, + "step": 3529 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.7178444862365723, + "learning_rate": 0.0010564661833132752, + "loss": 1.7329, + "step": 3530 + }, + { + "epoch": 0.37246835443037973, + "grad_norm": 0.6007276177406311, + "learning_rate": 0.0010562370230573432, + "loss": 1.69, + "step": 3531 + }, + { + "epoch": 0.3725738396624473, + "grad_norm": 0.8017824292182922, + "learning_rate": 0.0010560078284860257, + "loss": 1.7178, + "step": 3532 + }, + { + "epoch": 0.3726793248945148, + "grad_norm": 0.559743344783783, + "learning_rate": 0.0010557785996250053, + "loss": 1.6565, + "step": 3533 + }, + { + "epoch": 0.37278481012658227, + "grad_norm": 0.6452385187149048, + "learning_rate": 0.0010555493364999679, + "loss": 1.6981, + "step": 3534 + }, + { + "epoch": 0.3728902953586498, + "grad_norm": 0.6383205056190491, + "learning_rate": 0.001055320039136604, + "loss": 1.6985, + "step": 3535 + }, + { + "epoch": 0.3729957805907173, + "grad_norm": 0.5801429748535156, + "learning_rate": 0.001055090707560607, + "loss": 1.6526, + "step": 3536 + }, + { + "epoch": 0.3731012658227848, + "grad_norm": 0.7078941464424133, + "learning_rate": 0.0010548613417976748, + "loss": 1.6419, + "step": 3537 + }, + { + "epoch": 0.37320675105485235, + "grad_norm": 0.835791826248169, + "learning_rate": 0.0010546319418735094, + "loss": 1.6965, + "step": 3538 + }, + { + "epoch": 0.37331223628691984, + "grad_norm": 0.5711936950683594, + "learning_rate": 0.0010544025078138156, + "loss": 1.668, + "step": 3539 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.9939824938774109, + "learning_rate": 0.001054173039644303, + "loss": 1.6639, + "step": 3540 + }, + { + "epoch": 0.3735232067510548, + "grad_norm": 0.8945290446281433, + "learning_rate": 0.0010539435373906846, + "loss": 1.6652, + "step": 3541 + }, + { + "epoch": 0.3736286919831224, + "grad_norm": 0.624640941619873, + "learning_rate": 0.0010537140010786774, + "loss": 1.7041, + "step": 3542 + }, + { + "epoch": 0.37373417721518987, + "grad_norm": 0.7180119752883911, + "learning_rate": 0.0010534844307340016, + "loss": 1.6602, + "step": 3543 + }, + { + "epoch": 0.37383966244725736, + "grad_norm": 0.6068240404129028, + "learning_rate": 0.0010532548263823822, + "loss": 1.6424, + "step": 3544 + }, + { + "epoch": 0.3739451476793249, + "grad_norm": 0.9124069809913635, + "learning_rate": 0.0010530251880495473, + "loss": 1.6903, + "step": 3545 + }, + { + "epoch": 0.3740506329113924, + "grad_norm": 0.5691615343093872, + "learning_rate": 0.0010527955157612291, + "loss": 1.6576, + "step": 3546 + }, + { + "epoch": 0.3741561181434599, + "grad_norm": 0.8485403656959534, + "learning_rate": 0.0010525658095431635, + "loss": 1.6283, + "step": 3547 + }, + { + "epoch": 0.37426160337552744, + "grad_norm": 0.7283747792243958, + "learning_rate": 0.00105233606942109, + "loss": 1.6574, + "step": 3548 + }, + { + "epoch": 0.37436708860759493, + "grad_norm": 0.6459931135177612, + "learning_rate": 0.0010521062954207527, + "loss": 1.6592, + "step": 3549 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.7974196672439575, + "learning_rate": 0.0010518764875678981, + "loss": 1.6777, + "step": 3550 + }, + { + "epoch": 0.37457805907173, + "grad_norm": 0.5636102557182312, + "learning_rate": 0.001051646645888278, + "loss": 1.6437, + "step": 3551 + }, + { + "epoch": 0.37468354430379747, + "grad_norm": 0.7000816464424133, + "learning_rate": 0.0010514167704076473, + "loss": 1.6548, + "step": 3552 + }, + { + "epoch": 0.37478902953586496, + "grad_norm": 0.545533299446106, + "learning_rate": 0.0010511868611517644, + "loss": 1.6757, + "step": 3553 + }, + { + "epoch": 0.3748945147679325, + "grad_norm": 0.7159028053283691, + "learning_rate": 0.0010509569181463916, + "loss": 1.6684, + "step": 3554 + }, + { + "epoch": 0.375, + "grad_norm": 0.6282603144645691, + "learning_rate": 0.0010507269414172956, + "loss": 1.6734, + "step": 3555 + }, + { + "epoch": 0.3751054852320675, + "grad_norm": 0.5649363994598389, + "learning_rate": 0.0010504969309902462, + "loss": 1.6642, + "step": 3556 + }, + { + "epoch": 0.37521097046413504, + "grad_norm": 0.6058201789855957, + "learning_rate": 0.0010502668868910174, + "loss": 1.6729, + "step": 3557 + }, + { + "epoch": 0.37531645569620253, + "grad_norm": 0.5045019388198853, + "learning_rate": 0.0010500368091453864, + "loss": 1.6372, + "step": 3558 + }, + { + "epoch": 0.37542194092827, + "grad_norm": 0.6328331232070923, + "learning_rate": 0.001049806697779135, + "loss": 1.6691, + "step": 3559 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.5679216384887695, + "learning_rate": 0.001049576552818048, + "loss": 1.6599, + "step": 3560 + }, + { + "epoch": 0.37563291139240507, + "grad_norm": 0.529019832611084, + "learning_rate": 0.0010493463742879147, + "loss": 1.6736, + "step": 3561 + }, + { + "epoch": 0.37573839662447256, + "grad_norm": 0.5561468005180359, + "learning_rate": 0.0010491161622145275, + "loss": 1.6489, + "step": 3562 + }, + { + "epoch": 0.3758438818565401, + "grad_norm": 0.5894694924354553, + "learning_rate": 0.0010488859166236824, + "loss": 1.6574, + "step": 3563 + }, + { + "epoch": 0.3759493670886076, + "grad_norm": 0.6346741318702698, + "learning_rate": 0.0010486556375411803, + "loss": 1.6673, + "step": 3564 + }, + { + "epoch": 0.3760548523206751, + "grad_norm": 0.5575799345970154, + "learning_rate": 0.0010484253249928247, + "loss": 1.6633, + "step": 3565 + }, + { + "epoch": 0.37616033755274264, + "grad_norm": 0.6186873912811279, + "learning_rate": 0.0010481949790044234, + "loss": 1.6366, + "step": 3566 + }, + { + "epoch": 0.37626582278481013, + "grad_norm": 0.562836229801178, + "learning_rate": 0.0010479645996017875, + "loss": 1.6553, + "step": 3567 + }, + { + "epoch": 0.3763713080168776, + "grad_norm": 0.6298385262489319, + "learning_rate": 0.0010477341868107327, + "loss": 1.6937, + "step": 3568 + }, + { + "epoch": 0.3764767932489452, + "grad_norm": 0.668717622756958, + "learning_rate": 0.0010475037406570775, + "loss": 1.6393, + "step": 3569 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.7708678245544434, + "learning_rate": 0.0010472732611666448, + "loss": 1.6814, + "step": 3570 + }, + { + "epoch": 0.37668776371308016, + "grad_norm": 0.5696653127670288, + "learning_rate": 0.0010470427483652608, + "loss": 1.6267, + "step": 3571 + }, + { + "epoch": 0.37679324894514765, + "grad_norm": 0.8756732940673828, + "learning_rate": 0.0010468122022787554, + "loss": 1.664, + "step": 3572 + }, + { + "epoch": 0.3768987341772152, + "grad_norm": 0.6954655051231384, + "learning_rate": 0.001046581622932963, + "loss": 1.67, + "step": 3573 + }, + { + "epoch": 0.3770042194092827, + "grad_norm": 0.5506461262702942, + "learning_rate": 0.001046351010353721, + "loss": 1.6787, + "step": 3574 + }, + { + "epoch": 0.3771097046413502, + "grad_norm": 0.6000367403030396, + "learning_rate": 0.0010461203645668702, + "loss": 1.667, + "step": 3575 + }, + { + "epoch": 0.37721518987341773, + "grad_norm": 0.5912491679191589, + "learning_rate": 0.001045889685598256, + "loss": 1.6697, + "step": 3576 + }, + { + "epoch": 0.3773206751054852, + "grad_norm": 0.5595210194587708, + "learning_rate": 0.0010456589734737273, + "loss": 1.6536, + "step": 3577 + }, + { + "epoch": 0.3774261603375527, + "grad_norm": 0.5553572177886963, + "learning_rate": 0.0010454282282191362, + "loss": 1.6622, + "step": 3578 + }, + { + "epoch": 0.37753164556962027, + "grad_norm": 0.6305703520774841, + "learning_rate": 0.001045197449860339, + "loss": 1.6534, + "step": 3579 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.9174524545669556, + "learning_rate": 0.0010449666384231954, + "loss": 1.6904, + "step": 3580 + }, + { + "epoch": 0.37774261603375525, + "grad_norm": 0.7668032050132751, + "learning_rate": 0.0010447357939335693, + "loss": 1.6728, + "step": 3581 + }, + { + "epoch": 0.3778481012658228, + "grad_norm": 0.6607357859611511, + "learning_rate": 0.001044504916417328, + "loss": 1.6588, + "step": 3582 + }, + { + "epoch": 0.3779535864978903, + "grad_norm": 0.7422307729721069, + "learning_rate": 0.001044274005900342, + "loss": 1.6691, + "step": 3583 + }, + { + "epoch": 0.3780590717299578, + "grad_norm": 0.7630242109298706, + "learning_rate": 0.0010440430624084863, + "loss": 1.6495, + "step": 3584 + }, + { + "epoch": 0.37816455696202533, + "grad_norm": 0.5622577667236328, + "learning_rate": 0.0010438120859676393, + "loss": 1.6998, + "step": 3585 + }, + { + "epoch": 0.3782700421940928, + "grad_norm": 0.6326735019683838, + "learning_rate": 0.0010435810766036828, + "loss": 1.6927, + "step": 3586 + }, + { + "epoch": 0.3783755274261603, + "grad_norm": 0.6221277117729187, + "learning_rate": 0.001043350034342503, + "loss": 1.6538, + "step": 3587 + }, + { + "epoch": 0.37848101265822787, + "grad_norm": 0.6711096167564392, + "learning_rate": 0.001043118959209989, + "loss": 1.6666, + "step": 3588 + }, + { + "epoch": 0.37858649789029536, + "grad_norm": 0.6572210788726807, + "learning_rate": 0.001042887851232034, + "loss": 1.6999, + "step": 3589 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.6210349202156067, + "learning_rate": 0.0010426567104345346, + "loss": 1.6748, + "step": 3590 + }, + { + "epoch": 0.3787974683544304, + "grad_norm": 0.540454089641571, + "learning_rate": 0.0010424255368433916, + "loss": 1.6445, + "step": 3591 + }, + { + "epoch": 0.3789029535864979, + "grad_norm": 0.727881669998169, + "learning_rate": 0.0010421943304845093, + "loss": 1.6626, + "step": 3592 + }, + { + "epoch": 0.3790084388185654, + "grad_norm": 0.6269682049751282, + "learning_rate": 0.0010419630913837948, + "loss": 1.7079, + "step": 3593 + }, + { + "epoch": 0.37911392405063293, + "grad_norm": 0.6966627240180969, + "learning_rate": 0.0010417318195671604, + "loss": 1.669, + "step": 3594 + }, + { + "epoch": 0.3792194092827004, + "grad_norm": 0.6018219590187073, + "learning_rate": 0.0010415005150605208, + "loss": 1.68, + "step": 3595 + }, + { + "epoch": 0.3793248945147679, + "grad_norm": 0.7633251547813416, + "learning_rate": 0.001041269177889795, + "loss": 1.6187, + "step": 3596 + }, + { + "epoch": 0.37943037974683547, + "grad_norm": 0.5624533891677856, + "learning_rate": 0.0010410378080809052, + "loss": 1.6571, + "step": 3597 + }, + { + "epoch": 0.37953586497890296, + "grad_norm": 0.6863645911216736, + "learning_rate": 0.001040806405659778, + "loss": 1.6614, + "step": 3598 + }, + { + "epoch": 0.37964135021097045, + "grad_norm": 0.7826694250106812, + "learning_rate": 0.0010405749706523428, + "loss": 1.6401, + "step": 3599 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.8230925798416138, + "learning_rate": 0.0010403435030845332, + "loss": 1.6509, + "step": 3600 + }, + { + "epoch": 0.3798523206751055, + "grad_norm": 0.5821171402931213, + "learning_rate": 0.0010401120029822864, + "loss": 1.672, + "step": 3601 + }, + { + "epoch": 0.379957805907173, + "grad_norm": 0.7951922416687012, + "learning_rate": 0.001039880470371543, + "loss": 1.6562, + "step": 3602 + }, + { + "epoch": 0.38006329113924053, + "grad_norm": 0.5832919478416443, + "learning_rate": 0.0010396489052782473, + "loss": 1.6576, + "step": 3603 + }, + { + "epoch": 0.380168776371308, + "grad_norm": 0.9798996448516846, + "learning_rate": 0.0010394173077283477, + "loss": 1.6467, + "step": 3604 + }, + { + "epoch": 0.3802742616033755, + "grad_norm": 0.7696218490600586, + "learning_rate": 0.0010391856777477954, + "loss": 1.6833, + "step": 3605 + }, + { + "epoch": 0.380379746835443, + "grad_norm": 0.7409243583679199, + "learning_rate": 0.001038954015362546, + "loss": 1.6503, + "step": 3606 + }, + { + "epoch": 0.38048523206751056, + "grad_norm": 1.026416540145874, + "learning_rate": 0.001038722320598558, + "loss": 1.6499, + "step": 3607 + }, + { + "epoch": 0.38059071729957805, + "grad_norm": 0.5817092657089233, + "learning_rate": 0.001038490593481795, + "loss": 1.6867, + "step": 3608 + }, + { + "epoch": 0.38069620253164554, + "grad_norm": 0.75994473695755, + "learning_rate": 0.0010382588340382218, + "loss": 1.6844, + "step": 3609 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.5762898921966553, + "learning_rate": 0.0010380270422938093, + "loss": 1.6257, + "step": 3610 + }, + { + "epoch": 0.3809071729957806, + "grad_norm": 0.6471061706542969, + "learning_rate": 0.00103779521827453, + "loss": 1.6886, + "step": 3611 + }, + { + "epoch": 0.3810126582278481, + "grad_norm": 0.5358253121376038, + "learning_rate": 0.0010375633620063618, + "loss": 1.6426, + "step": 3612 + }, + { + "epoch": 0.3811181434599156, + "grad_norm": 0.7836207151412964, + "learning_rate": 0.0010373314735152848, + "loss": 1.6654, + "step": 3613 + }, + { + "epoch": 0.3812236286919831, + "grad_norm": 0.5760005116462708, + "learning_rate": 0.0010370995528272836, + "loss": 1.63, + "step": 3614 + }, + { + "epoch": 0.3813291139240506, + "grad_norm": 0.7361229658126831, + "learning_rate": 0.0010368675999683455, + "loss": 1.6202, + "step": 3615 + }, + { + "epoch": 0.38143459915611816, + "grad_norm": 0.6271204948425293, + "learning_rate": 0.0010366356149644628, + "loss": 1.6733, + "step": 3616 + }, + { + "epoch": 0.38154008438818565, + "grad_norm": 0.6706591248512268, + "learning_rate": 0.0010364035978416297, + "loss": 1.7114, + "step": 3617 + }, + { + "epoch": 0.38164556962025314, + "grad_norm": 0.7148157954216003, + "learning_rate": 0.001036171548625846, + "loss": 1.6959, + "step": 3618 + }, + { + "epoch": 0.3817510548523207, + "grad_norm": 0.6954987645149231, + "learning_rate": 0.0010359394673431126, + "loss": 1.6584, + "step": 3619 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.8269587159156799, + "learning_rate": 0.0010357073540194362, + "loss": 1.6284, + "step": 3620 + }, + { + "epoch": 0.3819620253164557, + "grad_norm": 0.6633116602897644, + "learning_rate": 0.0010354752086808264, + "loss": 1.67, + "step": 3621 + }, + { + "epoch": 0.3820675105485232, + "grad_norm": 0.8260660767555237, + "learning_rate": 0.001035243031353296, + "loss": 1.6316, + "step": 3622 + }, + { + "epoch": 0.3821729957805907, + "grad_norm": 0.5983051061630249, + "learning_rate": 0.0010350108220628614, + "loss": 1.6786, + "step": 3623 + }, + { + "epoch": 0.3822784810126582, + "grad_norm": 0.7706470489501953, + "learning_rate": 0.001034778580835543, + "loss": 1.6574, + "step": 3624 + }, + { + "epoch": 0.38238396624472576, + "grad_norm": 0.6160020232200623, + "learning_rate": 0.0010345463076973645, + "loss": 1.6697, + "step": 3625 + }, + { + "epoch": 0.38248945147679325, + "grad_norm": 0.9721792936325073, + "learning_rate": 0.0010343140026743535, + "loss": 1.6603, + "step": 3626 + }, + { + "epoch": 0.38259493670886074, + "grad_norm": 0.5228412747383118, + "learning_rate": 0.0010340816657925407, + "loss": 1.6624, + "step": 3627 + }, + { + "epoch": 0.3827004219409283, + "grad_norm": 0.8077463507652283, + "learning_rate": 0.0010338492970779606, + "loss": 1.6797, + "step": 3628 + }, + { + "epoch": 0.3828059071729958, + "grad_norm": 0.5627638697624207, + "learning_rate": 0.0010336168965566516, + "loss": 1.6356, + "step": 3629 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.7013182044029236, + "learning_rate": 0.001033384464254655, + "loss": 1.6673, + "step": 3630 + }, + { + "epoch": 0.3830168776371308, + "grad_norm": 0.5479388236999512, + "learning_rate": 0.001033152000198016, + "loss": 1.6667, + "step": 3631 + }, + { + "epoch": 0.3831223628691983, + "grad_norm": 0.7425985932350159, + "learning_rate": 0.0010329195044127834, + "loss": 1.6548, + "step": 3632 + }, + { + "epoch": 0.3832278481012658, + "grad_norm": 0.6269157528877258, + "learning_rate": 0.0010326869769250097, + "loss": 1.6863, + "step": 3633 + }, + { + "epoch": 0.38333333333333336, + "grad_norm": 0.6668545603752136, + "learning_rate": 0.0010324544177607508, + "loss": 1.6285, + "step": 3634 + }, + { + "epoch": 0.38343881856540085, + "grad_norm": 0.5919183492660522, + "learning_rate": 0.0010322218269460657, + "loss": 1.6587, + "step": 3635 + }, + { + "epoch": 0.38354430379746834, + "grad_norm": 0.6212754845619202, + "learning_rate": 0.001031989204507018, + "loss": 1.6417, + "step": 3636 + }, + { + "epoch": 0.3836497890295359, + "grad_norm": 0.6193587779998779, + "learning_rate": 0.0010317565504696733, + "loss": 1.734, + "step": 3637 + }, + { + "epoch": 0.3837552742616034, + "grad_norm": 0.6530500650405884, + "learning_rate": 0.0010315238648601025, + "loss": 1.6535, + "step": 3638 + }, + { + "epoch": 0.3838607594936709, + "grad_norm": 0.8142815828323364, + "learning_rate": 0.0010312911477043784, + "loss": 1.6679, + "step": 3639 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.8107724785804749, + "learning_rate": 0.001031058399028579, + "loss": 1.617, + "step": 3640 + }, + { + "epoch": 0.3840717299578059, + "grad_norm": 0.7081316113471985, + "learning_rate": 0.0010308256188587843, + "loss": 1.6479, + "step": 3641 + }, + { + "epoch": 0.3841772151898734, + "grad_norm": 0.7583612203598022, + "learning_rate": 0.0010305928072210787, + "loss": 1.7032, + "step": 3642 + }, + { + "epoch": 0.3842827004219409, + "grad_norm": 0.7184051275253296, + "learning_rate": 0.00103035996414155, + "loss": 1.6649, + "step": 3643 + }, + { + "epoch": 0.38438818565400845, + "grad_norm": 0.6745214462280273, + "learning_rate": 0.0010301270896462893, + "loss": 1.646, + "step": 3644 + }, + { + "epoch": 0.38449367088607594, + "grad_norm": 0.6186873912811279, + "learning_rate": 0.0010298941837613913, + "loss": 1.6602, + "step": 3645 + }, + { + "epoch": 0.38459915611814344, + "grad_norm": 0.7584296464920044, + "learning_rate": 0.0010296612465129542, + "loss": 1.6367, + "step": 3646 + }, + { + "epoch": 0.384704641350211, + "grad_norm": 0.6692399382591248, + "learning_rate": 0.0010294282779270802, + "loss": 1.6718, + "step": 3647 + }, + { + "epoch": 0.3848101265822785, + "grad_norm": 0.7026774287223816, + "learning_rate": 0.001029195278029874, + "loss": 1.7007, + "step": 3648 + }, + { + "epoch": 0.38491561181434597, + "grad_norm": 0.5376822352409363, + "learning_rate": 0.0010289622468474448, + "loss": 1.6858, + "step": 3649 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.7121860980987549, + "learning_rate": 0.001028729184405905, + "loss": 1.6668, + "step": 3650 + }, + { + "epoch": 0.385126582278481, + "grad_norm": 0.621481716632843, + "learning_rate": 0.00102849609073137, + "loss": 1.6888, + "step": 3651 + }, + { + "epoch": 0.3852320675105485, + "grad_norm": 0.6737475395202637, + "learning_rate": 0.0010282629658499593, + "loss": 1.6511, + "step": 3652 + }, + { + "epoch": 0.38533755274261605, + "grad_norm": 0.7506799697875977, + "learning_rate": 0.001028029809787796, + "loss": 1.6494, + "step": 3653 + }, + { + "epoch": 0.38544303797468354, + "grad_norm": 0.7628942131996155, + "learning_rate": 0.001027796622571006, + "loss": 1.6907, + "step": 3654 + }, + { + "epoch": 0.38554852320675104, + "grad_norm": 0.6761271953582764, + "learning_rate": 0.001027563404225719, + "loss": 1.6683, + "step": 3655 + }, + { + "epoch": 0.3856540084388186, + "grad_norm": 0.6823180317878723, + "learning_rate": 0.0010273301547780687, + "loss": 1.6474, + "step": 3656 + }, + { + "epoch": 0.3857594936708861, + "grad_norm": 0.7787825465202332, + "learning_rate": 0.0010270968742541917, + "loss": 1.6333, + "step": 3657 + }, + { + "epoch": 0.38586497890295357, + "grad_norm": 0.7856544852256775, + "learning_rate": 0.0010268635626802282, + "loss": 1.6624, + "step": 3658 + }, + { + "epoch": 0.3859704641350211, + "grad_norm": 0.815278947353363, + "learning_rate": 0.001026630220082322, + "loss": 1.687, + "step": 3659 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.797116756439209, + "learning_rate": 0.0010263968464866201, + "loss": 1.6509, + "step": 3660 + }, + { + "epoch": 0.3861814345991561, + "grad_norm": 0.8099934458732605, + "learning_rate": 0.0010261634419192732, + "loss": 1.6734, + "step": 3661 + }, + { + "epoch": 0.38628691983122365, + "grad_norm": 0.762208878993988, + "learning_rate": 0.001025930006406436, + "loss": 1.6516, + "step": 3662 + }, + { + "epoch": 0.38639240506329114, + "grad_norm": 0.7522825002670288, + "learning_rate": 0.0010256965399742652, + "loss": 1.658, + "step": 3663 + }, + { + "epoch": 0.38649789029535864, + "grad_norm": 0.6921849846839905, + "learning_rate": 0.0010254630426489225, + "loss": 1.6422, + "step": 3664 + }, + { + "epoch": 0.3866033755274262, + "grad_norm": 0.9199202656745911, + "learning_rate": 0.0010252295144565725, + "loss": 1.6444, + "step": 3665 + }, + { + "epoch": 0.3867088607594937, + "grad_norm": 0.8015302419662476, + "learning_rate": 0.0010249959554233827, + "loss": 1.6589, + "step": 3666 + }, + { + "epoch": 0.38681434599156117, + "grad_norm": 0.7612413167953491, + "learning_rate": 0.001024762365575525, + "loss": 1.6782, + "step": 3667 + }, + { + "epoch": 0.3869198312236287, + "grad_norm": 0.7441490888595581, + "learning_rate": 0.001024528744939174, + "loss": 1.6832, + "step": 3668 + }, + { + "epoch": 0.3870253164556962, + "grad_norm": 0.8558472990989685, + "learning_rate": 0.0010242950935405084, + "loss": 1.6491, + "step": 3669 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.5705664753913879, + "learning_rate": 0.0010240614114057098, + "loss": 1.643, + "step": 3670 + }, + { + "epoch": 0.3872362869198312, + "grad_norm": 0.8095905780792236, + "learning_rate": 0.0010238276985609631, + "loss": 1.6677, + "step": 3671 + }, + { + "epoch": 0.38734177215189874, + "grad_norm": 0.599589467048645, + "learning_rate": 0.0010235939550324576, + "loss": 1.653, + "step": 3672 + }, + { + "epoch": 0.38744725738396624, + "grad_norm": 0.8618872761726379, + "learning_rate": 0.0010233601808463852, + "loss": 1.6621, + "step": 3673 + }, + { + "epoch": 0.38755274261603373, + "grad_norm": 0.5978236794471741, + "learning_rate": 0.0010231263760289416, + "loss": 1.6537, + "step": 3674 + }, + { + "epoch": 0.3876582278481013, + "grad_norm": 0.8185756802558899, + "learning_rate": 0.0010228925406063254, + "loss": 1.6377, + "step": 3675 + }, + { + "epoch": 0.38776371308016877, + "grad_norm": 0.6024383902549744, + "learning_rate": 0.0010226586746047393, + "loss": 1.645, + "step": 3676 + }, + { + "epoch": 0.38786919831223626, + "grad_norm": 0.7816929221153259, + "learning_rate": 0.0010224247780503892, + "loss": 1.6762, + "step": 3677 + }, + { + "epoch": 0.3879746835443038, + "grad_norm": 0.672534167766571, + "learning_rate": 0.0010221908509694842, + "loss": 1.6866, + "step": 3678 + }, + { + "epoch": 0.3880801687763713, + "grad_norm": 0.6894327402114868, + "learning_rate": 0.0010219568933882372, + "loss": 1.705, + "step": 3679 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.7591341137886047, + "learning_rate": 0.001021722905332864, + "loss": 1.6276, + "step": 3680 + }, + { + "epoch": 0.38829113924050634, + "grad_norm": 0.9432777762413025, + "learning_rate": 0.0010214888868295842, + "loss": 1.6798, + "step": 3681 + }, + { + "epoch": 0.38839662447257384, + "grad_norm": 0.6244461536407471, + "learning_rate": 0.0010212548379046214, + "loss": 1.6697, + "step": 3682 + }, + { + "epoch": 0.38850210970464133, + "grad_norm": 0.7732518911361694, + "learning_rate": 0.001021020758584201, + "loss": 1.6718, + "step": 3683 + }, + { + "epoch": 0.3886075949367089, + "grad_norm": 0.7319206595420837, + "learning_rate": 0.0010207866488945532, + "loss": 1.6859, + "step": 3684 + }, + { + "epoch": 0.38871308016877637, + "grad_norm": 0.6426280736923218, + "learning_rate": 0.0010205525088619112, + "loss": 1.6985, + "step": 3685 + }, + { + "epoch": 0.38881856540084386, + "grad_norm": 0.7282331585884094, + "learning_rate": 0.0010203183385125115, + "loss": 1.6509, + "step": 3686 + }, + { + "epoch": 0.3889240506329114, + "grad_norm": 0.6228091716766357, + "learning_rate": 0.001020084137872594, + "loss": 1.6444, + "step": 3687 + }, + { + "epoch": 0.3890295358649789, + "grad_norm": 0.9654447436332703, + "learning_rate": 0.0010198499069684023, + "loss": 1.6437, + "step": 3688 + }, + { + "epoch": 0.3891350210970464, + "grad_norm": 0.8221418261528015, + "learning_rate": 0.0010196156458261827, + "loss": 1.6893, + "step": 3689 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.8446932435035706, + "learning_rate": 0.0010193813544721855, + "loss": 1.675, + "step": 3690 + }, + { + "epoch": 0.38934599156118144, + "grad_norm": 1.1278072595596313, + "learning_rate": 0.0010191470329326646, + "loss": 1.7291, + "step": 3691 + }, + { + "epoch": 0.38945147679324893, + "grad_norm": 0.6948318481445312, + "learning_rate": 0.0010189126812338765, + "loss": 1.6583, + "step": 3692 + }, + { + "epoch": 0.3895569620253165, + "grad_norm": 0.870588481426239, + "learning_rate": 0.0010186782994020811, + "loss": 1.6328, + "step": 3693 + }, + { + "epoch": 0.38966244725738397, + "grad_norm": 0.7722818851470947, + "learning_rate": 0.0010184438874635427, + "loss": 1.6521, + "step": 3694 + }, + { + "epoch": 0.38976793248945146, + "grad_norm": 0.9259994029998779, + "learning_rate": 0.0010182094454445282, + "loss": 1.6784, + "step": 3695 + }, + { + "epoch": 0.389873417721519, + "grad_norm": 0.971741795539856, + "learning_rate": 0.001017974973371308, + "loss": 1.6611, + "step": 3696 + }, + { + "epoch": 0.3899789029535865, + "grad_norm": 0.6258294582366943, + "learning_rate": 0.0010177404712701558, + "loss": 1.6753, + "step": 3697 + }, + { + "epoch": 0.390084388185654, + "grad_norm": 1.0351766347885132, + "learning_rate": 0.0010175059391673486, + "loss": 1.6992, + "step": 3698 + }, + { + "epoch": 0.39018987341772154, + "grad_norm": 0.5876832604408264, + "learning_rate": 0.0010172713770891673, + "loss": 1.6266, + "step": 3699 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.8584021329879761, + "learning_rate": 0.001017036785061895, + "loss": 1.7339, + "step": 3700 + }, + { + "epoch": 0.39040084388185653, + "grad_norm": 0.6842017769813538, + "learning_rate": 0.0010168021631118199, + "loss": 1.6745, + "step": 3701 + }, + { + "epoch": 0.3905063291139241, + "grad_norm": 0.685009777545929, + "learning_rate": 0.0010165675112652314, + "loss": 1.668, + "step": 3702 + }, + { + "epoch": 0.39061181434599157, + "grad_norm": 0.7153673768043518, + "learning_rate": 0.0010163328295484245, + "loss": 1.6356, + "step": 3703 + }, + { + "epoch": 0.39071729957805906, + "grad_norm": 0.7910043597221375, + "learning_rate": 0.001016098117987696, + "loss": 1.674, + "step": 3704 + }, + { + "epoch": 0.39082278481012656, + "grad_norm": 1.1740983724594116, + "learning_rate": 0.0010158633766093462, + "loss": 1.657, + "step": 3705 + }, + { + "epoch": 0.3909282700421941, + "grad_norm": 0.5800259709358215, + "learning_rate": 0.0010156286054396795, + "loss": 1.6781, + "step": 3706 + }, + { + "epoch": 0.3910337552742616, + "grad_norm": 0.906664252281189, + "learning_rate": 0.001015393804505003, + "loss": 1.7194, + "step": 3707 + }, + { + "epoch": 0.3911392405063291, + "grad_norm": 0.6727745532989502, + "learning_rate": 0.0010151589738316275, + "loss": 1.6509, + "step": 3708 + }, + { + "epoch": 0.39124472573839664, + "grad_norm": 0.5632918477058411, + "learning_rate": 0.0010149241134458666, + "loss": 1.6582, + "step": 3709 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.6737353205680847, + "learning_rate": 0.0010146892233740376, + "loss": 1.6015, + "step": 3710 + }, + { + "epoch": 0.3914556962025316, + "grad_norm": 0.6936718225479126, + "learning_rate": 0.0010144543036424616, + "loss": 1.6284, + "step": 3711 + }, + { + "epoch": 0.39156118143459917, + "grad_norm": 0.7794670462608337, + "learning_rate": 0.001014219354277462, + "loss": 1.6093, + "step": 3712 + }, + { + "epoch": 0.39166666666666666, + "grad_norm": 0.6012841463088989, + "learning_rate": 0.0010139843753053663, + "loss": 1.6203, + "step": 3713 + }, + { + "epoch": 0.39177215189873416, + "grad_norm": 0.8309971690177917, + "learning_rate": 0.001013749366752505, + "loss": 1.6575, + "step": 3714 + }, + { + "epoch": 0.3918776371308017, + "grad_norm": 0.5812920331954956, + "learning_rate": 0.0010135143286452118, + "loss": 1.6375, + "step": 3715 + }, + { + "epoch": 0.3919831223628692, + "grad_norm": 0.9598045349121094, + "learning_rate": 0.0010132792610098244, + "loss": 1.631, + "step": 3716 + }, + { + "epoch": 0.3920886075949367, + "grad_norm": 0.6181454658508301, + "learning_rate": 0.0010130441638726828, + "loss": 1.6646, + "step": 3717 + }, + { + "epoch": 0.39219409282700424, + "grad_norm": 0.8363194465637207, + "learning_rate": 0.001012809037260131, + "loss": 1.601, + "step": 3718 + }, + { + "epoch": 0.39229957805907173, + "grad_norm": 0.6552649140357971, + "learning_rate": 0.001012573881198516, + "loss": 1.6794, + "step": 3719 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.7086014151573181, + "learning_rate": 0.0010123386957141883, + "loss": 1.6492, + "step": 3720 + }, + { + "epoch": 0.39251054852320677, + "grad_norm": 0.5623844861984253, + "learning_rate": 0.0010121034808335018, + "loss": 1.6373, + "step": 3721 + }, + { + "epoch": 0.39261603375527426, + "grad_norm": 0.6425117254257202, + "learning_rate": 0.0010118682365828132, + "loss": 1.6662, + "step": 3722 + }, + { + "epoch": 0.39272151898734176, + "grad_norm": 0.5559179782867432, + "learning_rate": 0.0010116329629884827, + "loss": 1.6385, + "step": 3723 + }, + { + "epoch": 0.3928270042194093, + "grad_norm": 0.6275590658187866, + "learning_rate": 0.0010113976600768743, + "loss": 1.6821, + "step": 3724 + }, + { + "epoch": 0.3929324894514768, + "grad_norm": 0.5620225071907043, + "learning_rate": 0.0010111623278743547, + "loss": 1.6352, + "step": 3725 + }, + { + "epoch": 0.3930379746835443, + "grad_norm": 0.6270215511322021, + "learning_rate": 0.001010926966407294, + "loss": 1.668, + "step": 3726 + }, + { + "epoch": 0.39314345991561184, + "grad_norm": 0.5951416492462158, + "learning_rate": 0.0010106915757020654, + "loss": 1.6611, + "step": 3727 + }, + { + "epoch": 0.39324894514767933, + "grad_norm": 0.5912175178527832, + "learning_rate": 0.0010104561557850457, + "loss": 1.6735, + "step": 3728 + }, + { + "epoch": 0.3933544303797468, + "grad_norm": 0.5874732136726379, + "learning_rate": 0.0010102207066826155, + "loss": 1.6728, + "step": 3729 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.7187961339950562, + "learning_rate": 0.0010099852284211573, + "loss": 1.6386, + "step": 3730 + }, + { + "epoch": 0.39356540084388186, + "grad_norm": 0.5698044896125793, + "learning_rate": 0.0010097497210270578, + "loss": 1.6644, + "step": 3731 + }, + { + "epoch": 0.39367088607594936, + "grad_norm": 0.636389434337616, + "learning_rate": 0.0010095141845267066, + "loss": 1.6954, + "step": 3732 + }, + { + "epoch": 0.3937763713080169, + "grad_norm": 0.5812423825263977, + "learning_rate": 0.0010092786189464975, + "loss": 1.6487, + "step": 3733 + }, + { + "epoch": 0.3938818565400844, + "grad_norm": 0.7516233325004578, + "learning_rate": 0.0010090430243128259, + "loss": 1.6681, + "step": 3734 + }, + { + "epoch": 0.3939873417721519, + "grad_norm": 0.6664842367172241, + "learning_rate": 0.0010088074006520918, + "loss": 1.6712, + "step": 3735 + }, + { + "epoch": 0.39409282700421944, + "grad_norm": 0.866113007068634, + "learning_rate": 0.0010085717479906978, + "loss": 1.6767, + "step": 3736 + }, + { + "epoch": 0.39419831223628693, + "grad_norm": 0.5735491514205933, + "learning_rate": 0.0010083360663550502, + "loss": 1.621, + "step": 3737 + }, + { + "epoch": 0.3943037974683544, + "grad_norm": 0.8956275582313538, + "learning_rate": 0.0010081003557715583, + "loss": 1.6329, + "step": 3738 + }, + { + "epoch": 0.3944092827004219, + "grad_norm": 0.5486570000648499, + "learning_rate": 0.0010078646162666345, + "loss": 1.6154, + "step": 3739 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.6130797266960144, + "learning_rate": 0.0010076288478666944, + "loss": 1.681, + "step": 3740 + }, + { + "epoch": 0.39462025316455696, + "grad_norm": 0.6666299104690552, + "learning_rate": 0.0010073930505981573, + "loss": 1.677, + "step": 3741 + }, + { + "epoch": 0.39472573839662445, + "grad_norm": 0.6586785912513733, + "learning_rate": 0.0010071572244874456, + "loss": 1.6725, + "step": 3742 + }, + { + "epoch": 0.394831223628692, + "grad_norm": 0.5562691688537598, + "learning_rate": 0.0010069213695609845, + "loss": 1.6453, + "step": 3743 + }, + { + "epoch": 0.3949367088607595, + "grad_norm": 0.7045764923095703, + "learning_rate": 0.0010066854858452028, + "loss": 1.7073, + "step": 3744 + }, + { + "epoch": 0.395042194092827, + "grad_norm": 0.6570308208465576, + "learning_rate": 0.0010064495733665324, + "loss": 1.6226, + "step": 3745 + }, + { + "epoch": 0.39514767932489453, + "grad_norm": 0.7809102535247803, + "learning_rate": 0.0010062136321514084, + "loss": 1.6638, + "step": 3746 + }, + { + "epoch": 0.395253164556962, + "grad_norm": 0.6584307551383972, + "learning_rate": 0.0010059776622262698, + "loss": 1.6967, + "step": 3747 + }, + { + "epoch": 0.3953586497890295, + "grad_norm": 0.6736603379249573, + "learning_rate": 0.0010057416636175575, + "loss": 1.647, + "step": 3748 + }, + { + "epoch": 0.39546413502109706, + "grad_norm": 0.7947348356246948, + "learning_rate": 0.0010055056363517162, + "loss": 1.6339, + "step": 3749 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.648848295211792, + "learning_rate": 0.0010052695804551946, + "loss": 1.6405, + "step": 3750 + }, + { + "epoch": 0.39567510548523205, + "grad_norm": 0.7643243074417114, + "learning_rate": 0.0010050334959544438, + "loss": 1.6583, + "step": 3751 + }, + { + "epoch": 0.3957805907172996, + "grad_norm": 0.698025643825531, + "learning_rate": 0.0010047973828759178, + "loss": 1.6798, + "step": 3752 + }, + { + "epoch": 0.3958860759493671, + "grad_norm": 0.7216835021972656, + "learning_rate": 0.0010045612412460747, + "loss": 1.6728, + "step": 3753 + }, + { + "epoch": 0.3959915611814346, + "grad_norm": 0.6631700396537781, + "learning_rate": 0.0010043250710913747, + "loss": 1.6575, + "step": 3754 + }, + { + "epoch": 0.39609704641350213, + "grad_norm": 0.7803492546081543, + "learning_rate": 0.0010040888724382828, + "loss": 1.6568, + "step": 3755 + }, + { + "epoch": 0.3962025316455696, + "grad_norm": 0.6253324747085571, + "learning_rate": 0.0010038526453132655, + "loss": 1.6593, + "step": 3756 + }, + { + "epoch": 0.3963080168776371, + "grad_norm": 0.6978639364242554, + "learning_rate": 0.0010036163897427937, + "loss": 1.6591, + "step": 3757 + }, + { + "epoch": 0.39641350210970466, + "grad_norm": 0.6839731931686401, + "learning_rate": 0.0010033801057533404, + "loss": 1.6476, + "step": 3758 + }, + { + "epoch": 0.39651898734177216, + "grad_norm": 0.6314038634300232, + "learning_rate": 0.001003143793371383, + "loss": 1.6134, + "step": 3759 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.84079909324646, + "learning_rate": 0.0010029074526234014, + "loss": 1.6369, + "step": 3760 + }, + { + "epoch": 0.3967299578059072, + "grad_norm": 0.7708376049995422, + "learning_rate": 0.0010026710835358786, + "loss": 1.6812, + "step": 3761 + }, + { + "epoch": 0.3968354430379747, + "grad_norm": 0.574503481388092, + "learning_rate": 0.0010024346861353007, + "loss": 1.6566, + "step": 3762 + }, + { + "epoch": 0.3969409282700422, + "grad_norm": 0.6689659357070923, + "learning_rate": 0.0010021982604481575, + "loss": 1.6476, + "step": 3763 + }, + { + "epoch": 0.39704641350210973, + "grad_norm": 0.5567765235900879, + "learning_rate": 0.001001961806500942, + "loss": 1.638, + "step": 3764 + }, + { + "epoch": 0.3971518987341772, + "grad_norm": 0.773569643497467, + "learning_rate": 0.0010017253243201495, + "loss": 1.6499, + "step": 3765 + }, + { + "epoch": 0.3972573839662447, + "grad_norm": 0.7236846089363098, + "learning_rate": 0.0010014888139322792, + "loss": 1.6599, + "step": 3766 + }, + { + "epoch": 0.39736286919831226, + "grad_norm": 0.618609607219696, + "learning_rate": 0.001001252275363833, + "loss": 1.7029, + "step": 3767 + }, + { + "epoch": 0.39746835443037976, + "grad_norm": 0.7405760288238525, + "learning_rate": 0.0010010157086413167, + "loss": 1.7179, + "step": 3768 + }, + { + "epoch": 0.39757383966244725, + "grad_norm": 0.6534562110900879, + "learning_rate": 0.0010007791137912386, + "loss": 1.6577, + "step": 3769 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.7326590418815613, + "learning_rate": 0.0010005424908401104, + "loss": 1.6669, + "step": 3770 + }, + { + "epoch": 0.3977848101265823, + "grad_norm": 0.6463741660118103, + "learning_rate": 0.0010003058398144464, + "loss": 1.6266, + "step": 3771 + }, + { + "epoch": 0.3978902953586498, + "grad_norm": 0.5993008017539978, + "learning_rate": 0.0010000691607407652, + "loss": 1.6615, + "step": 3772 + }, + { + "epoch": 0.3979957805907173, + "grad_norm": 0.6203269958496094, + "learning_rate": 0.0009998324536455877, + "loss": 1.6698, + "step": 3773 + }, + { + "epoch": 0.3981012658227848, + "grad_norm": 0.616693913936615, + "learning_rate": 0.0009995957185554378, + "loss": 1.6865, + "step": 3774 + }, + { + "epoch": 0.3982067510548523, + "grad_norm": 0.5568800568580627, + "learning_rate": 0.000999358955496843, + "loss": 1.6449, + "step": 3775 + }, + { + "epoch": 0.3983122362869198, + "grad_norm": 0.5681452751159668, + "learning_rate": 0.000999122164496334, + "loss": 1.6469, + "step": 3776 + }, + { + "epoch": 0.39841772151898736, + "grad_norm": 0.5404303073883057, + "learning_rate": 0.0009988853455804442, + "loss": 1.6313, + "step": 3777 + }, + { + "epoch": 0.39852320675105485, + "grad_norm": 0.6091628074645996, + "learning_rate": 0.0009986484987757102, + "loss": 1.6562, + "step": 3778 + }, + { + "epoch": 0.39862869198312234, + "grad_norm": 0.6193424463272095, + "learning_rate": 0.0009984116241086723, + "loss": 1.6631, + "step": 3779 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5926948189735413, + "learning_rate": 0.0009981747216058728, + "loss": 1.6353, + "step": 3780 + }, + { + "epoch": 0.3988396624472574, + "grad_norm": 0.5838751792907715, + "learning_rate": 0.0009979377912938587, + "loss": 1.6331, + "step": 3781 + }, + { + "epoch": 0.3989451476793249, + "grad_norm": 0.6337102651596069, + "learning_rate": 0.0009977008331991785, + "loss": 1.641, + "step": 3782 + }, + { + "epoch": 0.3990506329113924, + "grad_norm": 0.6011034250259399, + "learning_rate": 0.000997463847348385, + "loss": 1.6904, + "step": 3783 + }, + { + "epoch": 0.3991561181434599, + "grad_norm": 0.6621648073196411, + "learning_rate": 0.000997226833768033, + "loss": 1.6284, + "step": 3784 + }, + { + "epoch": 0.3992616033755274, + "grad_norm": 0.6912870407104492, + "learning_rate": 0.0009969897924846818, + "loss": 1.6901, + "step": 3785 + }, + { + "epoch": 0.39936708860759496, + "grad_norm": 0.6375886797904968, + "learning_rate": 0.0009967527235248928, + "loss": 1.6856, + "step": 3786 + }, + { + "epoch": 0.39947257383966245, + "grad_norm": 1.033742070198059, + "learning_rate": 0.0009965156269152308, + "loss": 1.6245, + "step": 3787 + }, + { + "epoch": 0.39957805907172994, + "grad_norm": 0.7262400388717651, + "learning_rate": 0.0009962785026822632, + "loss": 1.6782, + "step": 3788 + }, + { + "epoch": 0.3996835443037975, + "grad_norm": 0.6923663020133972, + "learning_rate": 0.0009960413508525617, + "loss": 1.654, + "step": 3789 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.6168990135192871, + "learning_rate": 0.0009958041714526998, + "loss": 1.6529, + "step": 3790 + }, + { + "epoch": 0.3998945147679325, + "grad_norm": 0.8122061491012573, + "learning_rate": 0.0009955669645092546, + "loss": 1.6514, + "step": 3791 + }, + { + "epoch": 0.4, + "grad_norm": 0.589441180229187, + "learning_rate": 0.0009953297300488069, + "loss": 1.6289, + "step": 3792 + }, + { + "epoch": 0.4001054852320675, + "grad_norm": 1.0027846097946167, + "learning_rate": 0.0009950924680979393, + "loss": 1.6941, + "step": 3793 + }, + { + "epoch": 0.400210970464135, + "grad_norm": 0.7147393226623535, + "learning_rate": 0.0009948551786832386, + "loss": 1.6692, + "step": 3794 + }, + { + "epoch": 0.40031645569620256, + "grad_norm": 0.7276666164398193, + "learning_rate": 0.0009946178618312942, + "loss": 1.6781, + "step": 3795 + }, + { + "epoch": 0.40042194092827005, + "grad_norm": 0.7341674566268921, + "learning_rate": 0.0009943805175686986, + "loss": 1.6835, + "step": 3796 + }, + { + "epoch": 0.40052742616033754, + "grad_norm": 0.6560831069946289, + "learning_rate": 0.0009941431459220475, + "loss": 1.6531, + "step": 3797 + }, + { + "epoch": 0.4006329113924051, + "grad_norm": 0.7114737629890442, + "learning_rate": 0.0009939057469179394, + "loss": 1.6817, + "step": 3798 + }, + { + "epoch": 0.4007383966244726, + "grad_norm": 0.741947591304779, + "learning_rate": 0.0009936683205829762, + "loss": 1.6269, + "step": 3799 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.6011796593666077, + "learning_rate": 0.0009934308669437627, + "loss": 1.6526, + "step": 3800 + }, + { + "epoch": 0.4009493670886076, + "grad_norm": 0.7485908269882202, + "learning_rate": 0.0009931933860269063, + "loss": 1.6161, + "step": 3801 + }, + { + "epoch": 0.4010548523206751, + "grad_norm": 0.7156597375869751, + "learning_rate": 0.0009929558778590188, + "loss": 1.6355, + "step": 3802 + }, + { + "epoch": 0.4011603375527426, + "grad_norm": 0.8178455233573914, + "learning_rate": 0.0009927183424667135, + "loss": 1.6873, + "step": 3803 + }, + { + "epoch": 0.4012658227848101, + "grad_norm": 0.9551687240600586, + "learning_rate": 0.0009924807798766077, + "loss": 1.6772, + "step": 3804 + }, + { + "epoch": 0.40137130801687765, + "grad_norm": 0.5745814442634583, + "learning_rate": 0.0009922431901153213, + "loss": 1.6163, + "step": 3805 + }, + { + "epoch": 0.40147679324894514, + "grad_norm": 0.7342539429664612, + "learning_rate": 0.0009920055732094775, + "loss": 1.6157, + "step": 3806 + }, + { + "epoch": 0.40158227848101263, + "grad_norm": 0.594070315361023, + "learning_rate": 0.0009917679291857027, + "loss": 1.6284, + "step": 3807 + }, + { + "epoch": 0.4016877637130802, + "grad_norm": 0.6910374164581299, + "learning_rate": 0.0009915302580706256, + "loss": 1.6537, + "step": 3808 + }, + { + "epoch": 0.4017932489451477, + "grad_norm": 0.7718406319618225, + "learning_rate": 0.0009912925598908788, + "loss": 1.684, + "step": 3809 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.5872762203216553, + "learning_rate": 0.0009910548346730972, + "loss": 1.6869, + "step": 3810 + }, + { + "epoch": 0.4020042194092827, + "grad_norm": 0.6697355508804321, + "learning_rate": 0.00099081708244392, + "loss": 1.6519, + "step": 3811 + }, + { + "epoch": 0.4021097046413502, + "grad_norm": 0.5687549114227295, + "learning_rate": 0.0009905793032299875, + "loss": 1.6307, + "step": 3812 + }, + { + "epoch": 0.4022151898734177, + "grad_norm": 0.630298376083374, + "learning_rate": 0.0009903414970579443, + "loss": 1.628, + "step": 3813 + }, + { + "epoch": 0.40232067510548525, + "grad_norm": 0.614802896976471, + "learning_rate": 0.000990103663954438, + "loss": 1.6468, + "step": 3814 + }, + { + "epoch": 0.40242616033755274, + "grad_norm": 0.5822769403457642, + "learning_rate": 0.000989865803946119, + "loss": 1.6662, + "step": 3815 + }, + { + "epoch": 0.40253164556962023, + "grad_norm": 0.6463348865509033, + "learning_rate": 0.0009896279170596406, + "loss": 1.6309, + "step": 3816 + }, + { + "epoch": 0.4026371308016878, + "grad_norm": 0.6249316930770874, + "learning_rate": 0.0009893900033216593, + "loss": 1.6106, + "step": 3817 + }, + { + "epoch": 0.4027426160337553, + "grad_norm": 0.6481075286865234, + "learning_rate": 0.0009891520627588342, + "loss": 1.6458, + "step": 3818 + }, + { + "epoch": 0.40284810126582277, + "grad_norm": 0.6820740699768066, + "learning_rate": 0.000988914095397828, + "loss": 1.6652, + "step": 3819 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5203555226325989, + "learning_rate": 0.0009886761012653062, + "loss": 1.619, + "step": 3820 + }, + { + "epoch": 0.4030590717299578, + "grad_norm": 0.6966664791107178, + "learning_rate": 0.000988438080387937, + "loss": 1.6556, + "step": 3821 + }, + { + "epoch": 0.4031645569620253, + "grad_norm": 0.5943464636802673, + "learning_rate": 0.000988200032792392, + "loss": 1.6066, + "step": 3822 + }, + { + "epoch": 0.40327004219409285, + "grad_norm": 0.6626752614974976, + "learning_rate": 0.0009879619585053455, + "loss": 1.6518, + "step": 3823 + }, + { + "epoch": 0.40337552742616034, + "grad_norm": 0.6849520206451416, + "learning_rate": 0.0009877238575534749, + "loss": 1.6549, + "step": 3824 + }, + { + "epoch": 0.40348101265822783, + "grad_norm": 0.6246000528335571, + "learning_rate": 0.0009874857299634605, + "loss": 1.6549, + "step": 3825 + }, + { + "epoch": 0.4035864978902954, + "grad_norm": 0.5911944508552551, + "learning_rate": 0.0009872475757619862, + "loss": 1.6803, + "step": 3826 + }, + { + "epoch": 0.4036919831223629, + "grad_norm": 0.604875922203064, + "learning_rate": 0.000987009394975738, + "loss": 1.6572, + "step": 3827 + }, + { + "epoch": 0.40379746835443037, + "grad_norm": 0.5662400126457214, + "learning_rate": 0.0009867711876314052, + "loss": 1.6219, + "step": 3828 + }, + { + "epoch": 0.4039029535864979, + "grad_norm": 0.5992848873138428, + "learning_rate": 0.00098653295375568, + "loss": 1.6398, + "step": 3829 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.6592227220535278, + "learning_rate": 0.000986294693375258, + "loss": 1.623, + "step": 3830 + }, + { + "epoch": 0.4041139240506329, + "grad_norm": 0.5122600793838501, + "learning_rate": 0.0009860564065168375, + "loss": 1.6617, + "step": 3831 + }, + { + "epoch": 0.40421940928270045, + "grad_norm": 0.7746385335922241, + "learning_rate": 0.0009858180932071192, + "loss": 1.6787, + "step": 3832 + }, + { + "epoch": 0.40432489451476794, + "grad_norm": 0.7554858326911926, + "learning_rate": 0.000985579753472808, + "loss": 1.6502, + "step": 3833 + }, + { + "epoch": 0.40443037974683543, + "grad_norm": 0.5768341422080994, + "learning_rate": 0.0009853413873406104, + "loss": 1.6454, + "step": 3834 + }, + { + "epoch": 0.4045358649789029, + "grad_norm": 0.6038499474525452, + "learning_rate": 0.000985102994837237, + "loss": 1.6454, + "step": 3835 + }, + { + "epoch": 0.4046413502109705, + "grad_norm": 0.6217444539070129, + "learning_rate": 0.0009848645759894005, + "loss": 1.6738, + "step": 3836 + }, + { + "epoch": 0.40474683544303797, + "grad_norm": 0.5975627899169922, + "learning_rate": 0.0009846261308238177, + "loss": 1.6339, + "step": 3837 + }, + { + "epoch": 0.40485232067510546, + "grad_norm": 0.628990650177002, + "learning_rate": 0.0009843876593672064, + "loss": 1.6556, + "step": 3838 + }, + { + "epoch": 0.404957805907173, + "grad_norm": 0.5308504700660706, + "learning_rate": 0.0009841491616462892, + "loss": 1.6992, + "step": 3839 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.6042254567146301, + "learning_rate": 0.000983910637687791, + "loss": 1.6973, + "step": 3840 + }, + { + "epoch": 0.405168776371308, + "grad_norm": 0.5604141354560852, + "learning_rate": 0.0009836720875184394, + "loss": 1.6765, + "step": 3841 + }, + { + "epoch": 0.40527426160337554, + "grad_norm": 0.5410302877426147, + "learning_rate": 0.0009834335111649655, + "loss": 1.6407, + "step": 3842 + }, + { + "epoch": 0.40537974683544303, + "grad_norm": 0.5431283712387085, + "learning_rate": 0.0009831949086541024, + "loss": 1.6521, + "step": 3843 + }, + { + "epoch": 0.4054852320675105, + "grad_norm": 0.6217369437217712, + "learning_rate": 0.0009829562800125868, + "loss": 1.6445, + "step": 3844 + }, + { + "epoch": 0.4055907172995781, + "grad_norm": 0.7007007002830505, + "learning_rate": 0.0009827176252671587, + "loss": 1.6706, + "step": 3845 + }, + { + "epoch": 0.40569620253164557, + "grad_norm": 0.5597778558731079, + "learning_rate": 0.0009824789444445603, + "loss": 1.6353, + "step": 3846 + }, + { + "epoch": 0.40580168776371306, + "grad_norm": 0.7165943384170532, + "learning_rate": 0.0009822402375715366, + "loss": 1.6235, + "step": 3847 + }, + { + "epoch": 0.4059071729957806, + "grad_norm": 0.5568972826004028, + "learning_rate": 0.0009820015046748366, + "loss": 1.642, + "step": 3848 + }, + { + "epoch": 0.4060126582278481, + "grad_norm": 0.6357077956199646, + "learning_rate": 0.0009817627457812106, + "loss": 1.6356, + "step": 3849 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.6413879990577698, + "learning_rate": 0.0009815239609174138, + "loss": 1.6315, + "step": 3850 + }, + { + "epoch": 0.40622362869198314, + "grad_norm": 0.6354017853736877, + "learning_rate": 0.0009812851501102024, + "loss": 1.6987, + "step": 3851 + }, + { + "epoch": 0.40632911392405063, + "grad_norm": 0.5313035845756531, + "learning_rate": 0.0009810463133863368, + "loss": 1.6643, + "step": 3852 + }, + { + "epoch": 0.4064345991561181, + "grad_norm": 0.6084563732147217, + "learning_rate": 0.0009808074507725794, + "loss": 1.6718, + "step": 3853 + }, + { + "epoch": 0.4065400843881857, + "grad_norm": 0.5518197417259216, + "learning_rate": 0.0009805685622956966, + "loss": 1.6654, + "step": 3854 + }, + { + "epoch": 0.40664556962025317, + "grad_norm": 0.6068358421325684, + "learning_rate": 0.0009803296479824564, + "loss": 1.6467, + "step": 3855 + }, + { + "epoch": 0.40675105485232066, + "grad_norm": 0.6105945706367493, + "learning_rate": 0.0009800907078596308, + "loss": 1.669, + "step": 3856 + }, + { + "epoch": 0.4068565400843882, + "grad_norm": 0.750139057636261, + "learning_rate": 0.000979851741953994, + "loss": 1.6471, + "step": 3857 + }, + { + "epoch": 0.4069620253164557, + "grad_norm": 0.8996508717536926, + "learning_rate": 0.0009796127502923232, + "loss": 1.6569, + "step": 3858 + }, + { + "epoch": 0.4070675105485232, + "grad_norm": 0.5484583973884583, + "learning_rate": 0.000979373732901399, + "loss": 1.6347, + "step": 3859 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.967182457447052, + "learning_rate": 0.0009791346898080043, + "loss": 1.6481, + "step": 3860 + }, + { + "epoch": 0.40727848101265823, + "grad_norm": 0.7307468056678772, + "learning_rate": 0.000978895621038925, + "loss": 1.6452, + "step": 3861 + }, + { + "epoch": 0.4073839662447257, + "grad_norm": 0.6489903330802917, + "learning_rate": 0.0009786565266209496, + "loss": 1.6512, + "step": 3862 + }, + { + "epoch": 0.4074894514767933, + "grad_norm": 0.7073720693588257, + "learning_rate": 0.0009784174065808706, + "loss": 1.668, + "step": 3863 + }, + { + "epoch": 0.40759493670886077, + "grad_norm": 0.5729940533638, + "learning_rate": 0.0009781782609454821, + "loss": 1.6162, + "step": 3864 + }, + { + "epoch": 0.40770042194092826, + "grad_norm": 0.952653169631958, + "learning_rate": 0.000977939089741582, + "loss": 1.6327, + "step": 3865 + }, + { + "epoch": 0.4078059071729958, + "grad_norm": 0.8836867213249207, + "learning_rate": 0.0009776998929959695, + "loss": 1.6778, + "step": 3866 + }, + { + "epoch": 0.4079113924050633, + "grad_norm": 0.568464994430542, + "learning_rate": 0.0009774606707354493, + "loss": 1.6216, + "step": 3867 + }, + { + "epoch": 0.4080168776371308, + "grad_norm": 0.707825779914856, + "learning_rate": 0.0009772214229868265, + "loss": 1.6349, + "step": 3868 + }, + { + "epoch": 0.4081223628691983, + "grad_norm": 0.601919949054718, + "learning_rate": 0.0009769821497769102, + "loss": 1.6802, + "step": 3869 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.5863027572631836, + "learning_rate": 0.0009767428511325122, + "loss": 1.661, + "step": 3870 + }, + { + "epoch": 0.4083333333333333, + "grad_norm": 0.7435557842254639, + "learning_rate": 0.000976503527080447, + "loss": 1.6314, + "step": 3871 + }, + { + "epoch": 0.4084388185654008, + "grad_norm": 0.5315307974815369, + "learning_rate": 0.0009762641776475322, + "loss": 1.6845, + "step": 3872 + }, + { + "epoch": 0.40854430379746837, + "grad_norm": 0.6763384342193604, + "learning_rate": 0.0009760248028605882, + "loss": 1.6428, + "step": 3873 + }, + { + "epoch": 0.40864978902953586, + "grad_norm": 0.6673433780670166, + "learning_rate": 0.0009757854027464377, + "loss": 1.6646, + "step": 3874 + }, + { + "epoch": 0.40875527426160335, + "grad_norm": 0.601343035697937, + "learning_rate": 0.000975545977331907, + "loss": 1.6559, + "step": 3875 + }, + { + "epoch": 0.4088607594936709, + "grad_norm": 0.6415640711784363, + "learning_rate": 0.0009753065266438249, + "loss": 1.6161, + "step": 3876 + }, + { + "epoch": 0.4089662447257384, + "grad_norm": 0.6785621047019958, + "learning_rate": 0.0009750670507090233, + "loss": 1.6125, + "step": 3877 + }, + { + "epoch": 0.4090717299578059, + "grad_norm": 0.5620672106742859, + "learning_rate": 0.000974827549554336, + "loss": 1.6074, + "step": 3878 + }, + { + "epoch": 0.40917721518987343, + "grad_norm": 0.5832816958427429, + "learning_rate": 0.0009745880232066007, + "loss": 1.6572, + "step": 3879 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.540829062461853, + "learning_rate": 0.0009743484716926576, + "loss": 1.6152, + "step": 3880 + }, + { + "epoch": 0.4093881856540084, + "grad_norm": 0.6119992733001709, + "learning_rate": 0.0009741088950393497, + "loss": 1.66, + "step": 3881 + }, + { + "epoch": 0.40949367088607597, + "grad_norm": 0.5452184081077576, + "learning_rate": 0.0009738692932735225, + "loss": 1.6302, + "step": 3882 + }, + { + "epoch": 0.40959915611814346, + "grad_norm": 0.7263383269309998, + "learning_rate": 0.0009736296664220247, + "loss": 1.6376, + "step": 3883 + }, + { + "epoch": 0.40970464135021095, + "grad_norm": 0.5777744054794312, + "learning_rate": 0.0009733900145117075, + "loss": 1.6218, + "step": 3884 + }, + { + "epoch": 0.4098101265822785, + "grad_norm": 0.6885830760002136, + "learning_rate": 0.0009731503375694253, + "loss": 1.6295, + "step": 3885 + }, + { + "epoch": 0.409915611814346, + "grad_norm": 0.531667172908783, + "learning_rate": 0.0009729106356220352, + "loss": 1.6552, + "step": 3886 + }, + { + "epoch": 0.4100210970464135, + "grad_norm": 0.6550934314727783, + "learning_rate": 0.0009726709086963967, + "loss": 1.6493, + "step": 3887 + }, + { + "epoch": 0.41012658227848103, + "grad_norm": 0.5898877382278442, + "learning_rate": 0.0009724311568193726, + "loss": 1.6608, + "step": 3888 + }, + { + "epoch": 0.4102320675105485, + "grad_norm": 0.6494988203048706, + "learning_rate": 0.0009721913800178281, + "loss": 1.6189, + "step": 3889 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.6539369821548462, + "learning_rate": 0.0009719515783186319, + "loss": 1.6124, + "step": 3890 + }, + { + "epoch": 0.41044303797468357, + "grad_norm": 0.5674626231193542, + "learning_rate": 0.0009717117517486543, + "loss": 1.634, + "step": 3891 + }, + { + "epoch": 0.41054852320675106, + "grad_norm": 0.6033061146736145, + "learning_rate": 0.0009714719003347693, + "loss": 1.6764, + "step": 3892 + }, + { + "epoch": 0.41065400843881855, + "grad_norm": 0.6769444346427917, + "learning_rate": 0.0009712320241038537, + "loss": 1.6696, + "step": 3893 + }, + { + "epoch": 0.4107594936708861, + "grad_norm": 0.65360027551651, + "learning_rate": 0.0009709921230827865, + "loss": 1.6417, + "step": 3894 + }, + { + "epoch": 0.4108649789029536, + "grad_norm": 0.5627012848854065, + "learning_rate": 0.00097075219729845, + "loss": 1.6419, + "step": 3895 + }, + { + "epoch": 0.4109704641350211, + "grad_norm": 0.739581823348999, + "learning_rate": 0.0009705122467777292, + "loss": 1.6091, + "step": 3896 + }, + { + "epoch": 0.41107594936708863, + "grad_norm": 0.5924693942070007, + "learning_rate": 0.0009702722715475113, + "loss": 1.6312, + "step": 3897 + }, + { + "epoch": 0.4111814345991561, + "grad_norm": 0.6310521364212036, + "learning_rate": 0.000970032271634687, + "loss": 1.6327, + "step": 3898 + }, + { + "epoch": 0.4112869198312236, + "grad_norm": 0.6449947953224182, + "learning_rate": 0.0009697922470661497, + "loss": 1.6154, + "step": 3899 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.905424952507019, + "learning_rate": 0.0009695521978687951, + "loss": 1.664, + "step": 3900 + }, + { + "epoch": 0.41149789029535866, + "grad_norm": 0.845976710319519, + "learning_rate": 0.0009693121240695216, + "loss": 1.6249, + "step": 3901 + }, + { + "epoch": 0.41160337552742615, + "grad_norm": 0.76848304271698, + "learning_rate": 0.0009690720256952314, + "loss": 1.6937, + "step": 3902 + }, + { + "epoch": 0.41170886075949364, + "grad_norm": 0.7793521881103516, + "learning_rate": 0.0009688319027728282, + "loss": 1.6404, + "step": 3903 + }, + { + "epoch": 0.4118143459915612, + "grad_norm": 0.6974165439605713, + "learning_rate": 0.0009685917553292192, + "loss": 1.6311, + "step": 3904 + }, + { + "epoch": 0.4119198312236287, + "grad_norm": 0.7794814705848694, + "learning_rate": 0.0009683515833913137, + "loss": 1.6398, + "step": 3905 + }, + { + "epoch": 0.4120253164556962, + "grad_norm": 0.6510893702507019, + "learning_rate": 0.0009681113869860247, + "loss": 1.649, + "step": 3906 + }, + { + "epoch": 0.4121308016877637, + "grad_norm": 0.6310925483703613, + "learning_rate": 0.0009678711661402672, + "loss": 1.6093, + "step": 3907 + }, + { + "epoch": 0.4122362869198312, + "grad_norm": 0.8290175795555115, + "learning_rate": 0.0009676309208809592, + "loss": 1.6508, + "step": 3908 + }, + { + "epoch": 0.4123417721518987, + "grad_norm": 0.7311103343963623, + "learning_rate": 0.0009673906512350213, + "loss": 1.6351, + "step": 3909 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.6716174483299255, + "learning_rate": 0.0009671503572293767, + "loss": 1.6777, + "step": 3910 + }, + { + "epoch": 0.41255274261603375, + "grad_norm": 0.6191571950912476, + "learning_rate": 0.000966910038890952, + "loss": 1.6486, + "step": 3911 + }, + { + "epoch": 0.41265822784810124, + "grad_norm": 0.7472687363624573, + "learning_rate": 0.0009666696962466757, + "loss": 1.6965, + "step": 3912 + }, + { + "epoch": 0.4127637130801688, + "grad_norm": 0.6781683564186096, + "learning_rate": 0.0009664293293234795, + "loss": 1.654, + "step": 3913 + }, + { + "epoch": 0.4128691983122363, + "grad_norm": 0.6184729337692261, + "learning_rate": 0.0009661889381482977, + "loss": 1.5709, + "step": 3914 + }, + { + "epoch": 0.4129746835443038, + "grad_norm": 0.6964147090911865, + "learning_rate": 0.0009659485227480676, + "loss": 1.6373, + "step": 3915 + }, + { + "epoch": 0.4130801687763713, + "grad_norm": 0.592530369758606, + "learning_rate": 0.0009657080831497284, + "loss": 1.6381, + "step": 3916 + }, + { + "epoch": 0.4131856540084388, + "grad_norm": 0.684333324432373, + "learning_rate": 0.0009654676193802232, + "loss": 1.6205, + "step": 3917 + }, + { + "epoch": 0.4132911392405063, + "grad_norm": 0.6437218189239502, + "learning_rate": 0.0009652271314664966, + "loss": 1.6603, + "step": 3918 + }, + { + "epoch": 0.41339662447257386, + "grad_norm": 0.7979083061218262, + "learning_rate": 0.0009649866194354967, + "loss": 1.6258, + "step": 3919 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.5732024312019348, + "learning_rate": 0.0009647460833141742, + "loss": 1.6688, + "step": 3920 + }, + { + "epoch": 0.41360759493670884, + "grad_norm": 0.8175686597824097, + "learning_rate": 0.0009645055231294823, + "loss": 1.6248, + "step": 3921 + }, + { + "epoch": 0.4137130801687764, + "grad_norm": 0.5453131794929504, + "learning_rate": 0.0009642649389083768, + "loss": 1.6913, + "step": 3922 + }, + { + "epoch": 0.4138185654008439, + "grad_norm": 0.7324654459953308, + "learning_rate": 0.0009640243306778162, + "loss": 1.6613, + "step": 3923 + }, + { + "epoch": 0.4139240506329114, + "grad_norm": 0.5243261456489563, + "learning_rate": 0.0009637836984647627, + "loss": 1.634, + "step": 3924 + }, + { + "epoch": 0.4140295358649789, + "grad_norm": 0.7016990780830383, + "learning_rate": 0.0009635430422961794, + "loss": 1.5977, + "step": 3925 + }, + { + "epoch": 0.4141350210970464, + "grad_norm": 0.5279157757759094, + "learning_rate": 0.0009633023621990334, + "loss": 1.6461, + "step": 3926 + }, + { + "epoch": 0.4142405063291139, + "grad_norm": 0.6616409420967102, + "learning_rate": 0.000963061658200294, + "loss": 1.6343, + "step": 3927 + }, + { + "epoch": 0.41434599156118146, + "grad_norm": 0.5490773320198059, + "learning_rate": 0.0009628209303269335, + "loss": 1.6487, + "step": 3928 + }, + { + "epoch": 0.41445147679324895, + "grad_norm": 0.5943078398704529, + "learning_rate": 0.0009625801786059267, + "loss": 1.6621, + "step": 3929 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.5764214992523193, + "learning_rate": 0.0009623394030642507, + "loss": 1.6597, + "step": 3930 + }, + { + "epoch": 0.414662447257384, + "grad_norm": 0.5319805145263672, + "learning_rate": 0.0009620986037288858, + "loss": 1.613, + "step": 3931 + }, + { + "epoch": 0.4147679324894515, + "grad_norm": 0.6169564723968506, + "learning_rate": 0.0009618577806268147, + "loss": 1.6307, + "step": 3932 + }, + { + "epoch": 0.414873417721519, + "grad_norm": 0.5591673254966736, + "learning_rate": 0.0009616169337850229, + "loss": 1.6345, + "step": 3933 + }, + { + "epoch": 0.41497890295358647, + "grad_norm": 0.6217891573905945, + "learning_rate": 0.0009613760632304985, + "loss": 1.6521, + "step": 3934 + }, + { + "epoch": 0.415084388185654, + "grad_norm": 0.5587570667266846, + "learning_rate": 0.0009611351689902321, + "loss": 1.6673, + "step": 3935 + }, + { + "epoch": 0.4151898734177215, + "grad_norm": 0.6077689528465271, + "learning_rate": 0.000960894251091217, + "loss": 1.6328, + "step": 3936 + }, + { + "epoch": 0.415295358649789, + "grad_norm": 0.5554523468017578, + "learning_rate": 0.0009606533095604499, + "loss": 1.6757, + "step": 3937 + }, + { + "epoch": 0.41540084388185655, + "grad_norm": 0.6014025211334229, + "learning_rate": 0.0009604123444249288, + "loss": 1.5993, + "step": 3938 + }, + { + "epoch": 0.41550632911392404, + "grad_norm": 0.6304892301559448, + "learning_rate": 0.0009601713557116554, + "loss": 1.6557, + "step": 3939 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.6666033864021301, + "learning_rate": 0.0009599303434476334, + "loss": 1.6514, + "step": 3940 + }, + { + "epoch": 0.4157172995780591, + "grad_norm": 0.608586311340332, + "learning_rate": 0.0009596893076598698, + "loss": 1.6457, + "step": 3941 + }, + { + "epoch": 0.4158227848101266, + "grad_norm": 0.6121818423271179, + "learning_rate": 0.0009594482483753736, + "loss": 1.6819, + "step": 3942 + }, + { + "epoch": 0.41592827004219407, + "grad_norm": 0.6267017126083374, + "learning_rate": 0.0009592071656211568, + "loss": 1.6409, + "step": 3943 + }, + { + "epoch": 0.4160337552742616, + "grad_norm": 0.5712522864341736, + "learning_rate": 0.0009589660594242338, + "loss": 1.6571, + "step": 3944 + }, + { + "epoch": 0.4161392405063291, + "grad_norm": 0.5639753341674805, + "learning_rate": 0.0009587249298116219, + "loss": 1.6271, + "step": 3945 + }, + { + "epoch": 0.4162447257383966, + "grad_norm": 0.6221401691436768, + "learning_rate": 0.0009584837768103408, + "loss": 1.6116, + "step": 3946 + }, + { + "epoch": 0.41635021097046415, + "grad_norm": 0.5739244222640991, + "learning_rate": 0.0009582426004474129, + "loss": 1.6725, + "step": 3947 + }, + { + "epoch": 0.41645569620253164, + "grad_norm": 0.5398848056793213, + "learning_rate": 0.0009580014007498634, + "loss": 1.6535, + "step": 3948 + }, + { + "epoch": 0.41656118143459914, + "grad_norm": 0.5804428458213806, + "learning_rate": 0.0009577601777447194, + "loss": 1.6305, + "step": 3949 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.6051232218742371, + "learning_rate": 0.0009575189314590118, + "loss": 1.6527, + "step": 3950 + }, + { + "epoch": 0.4167721518987342, + "grad_norm": 0.5239699482917786, + "learning_rate": 0.0009572776619197731, + "loss": 1.6635, + "step": 3951 + }, + { + "epoch": 0.41687763713080167, + "grad_norm": 0.6122360825538635, + "learning_rate": 0.0009570363691540387, + "loss": 1.6476, + "step": 3952 + }, + { + "epoch": 0.4169831223628692, + "grad_norm": 0.5692673921585083, + "learning_rate": 0.0009567950531888469, + "loss": 1.677, + "step": 3953 + }, + { + "epoch": 0.4170886075949367, + "grad_norm": 0.7856029272079468, + "learning_rate": 0.0009565537140512381, + "loss": 1.6437, + "step": 3954 + }, + { + "epoch": 0.4171940928270042, + "grad_norm": 0.5810770988464355, + "learning_rate": 0.0009563123517682559, + "loss": 1.6357, + "step": 3955 + }, + { + "epoch": 0.41729957805907175, + "grad_norm": 0.6715772151947021, + "learning_rate": 0.0009560709663669456, + "loss": 1.6206, + "step": 3956 + }, + { + "epoch": 0.41740506329113924, + "grad_norm": 0.6135169267654419, + "learning_rate": 0.0009558295578743559, + "loss": 1.6442, + "step": 3957 + }, + { + "epoch": 0.41751054852320674, + "grad_norm": 0.6120316982269287, + "learning_rate": 0.0009555881263175381, + "loss": 1.6845, + "step": 3958 + }, + { + "epoch": 0.4176160337552743, + "grad_norm": 0.5774263143539429, + "learning_rate": 0.0009553466717235456, + "loss": 1.6045, + "step": 3959 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.5778355002403259, + "learning_rate": 0.0009551051941194346, + "loss": 1.6556, + "step": 3960 + }, + { + "epoch": 0.41782700421940927, + "grad_norm": 0.5736408829689026, + "learning_rate": 0.0009548636935322639, + "loss": 1.6044, + "step": 3961 + }, + { + "epoch": 0.4179324894514768, + "grad_norm": 0.6808750629425049, + "learning_rate": 0.0009546221699890945, + "loss": 1.6207, + "step": 3962 + }, + { + "epoch": 0.4180379746835443, + "grad_norm": 0.516189694404602, + "learning_rate": 0.0009543806235169909, + "loss": 1.6235, + "step": 3963 + }, + { + "epoch": 0.4181434599156118, + "grad_norm": 0.6558707356452942, + "learning_rate": 0.0009541390541430192, + "loss": 1.6673, + "step": 3964 + }, + { + "epoch": 0.41824894514767935, + "grad_norm": 0.5743632912635803, + "learning_rate": 0.0009538974618942486, + "loss": 1.6593, + "step": 3965 + }, + { + "epoch": 0.41835443037974684, + "grad_norm": 0.5780421495437622, + "learning_rate": 0.0009536558467977505, + "loss": 1.6216, + "step": 3966 + }, + { + "epoch": 0.41845991561181434, + "grad_norm": 0.717409610748291, + "learning_rate": 0.0009534142088805994, + "loss": 1.6384, + "step": 3967 + }, + { + "epoch": 0.41856540084388183, + "grad_norm": 0.8104245662689209, + "learning_rate": 0.0009531725481698719, + "loss": 1.6379, + "step": 3968 + }, + { + "epoch": 0.4186708860759494, + "grad_norm": 0.5698139667510986, + "learning_rate": 0.0009529308646926473, + "loss": 1.6509, + "step": 3969 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.7460362911224365, + "learning_rate": 0.0009526891584760071, + "loss": 1.6203, + "step": 3970 + }, + { + "epoch": 0.41888185654008436, + "grad_norm": 0.5834997892379761, + "learning_rate": 0.0009524474295470362, + "loss": 1.6659, + "step": 3971 + }, + { + "epoch": 0.4189873417721519, + "grad_norm": 0.6851149201393127, + "learning_rate": 0.0009522056779328214, + "loss": 1.634, + "step": 3972 + }, + { + "epoch": 0.4190928270042194, + "grad_norm": 0.674410343170166, + "learning_rate": 0.0009519639036604522, + "loss": 1.6694, + "step": 3973 + }, + { + "epoch": 0.4191983122362869, + "grad_norm": 0.5989243984222412, + "learning_rate": 0.0009517221067570204, + "loss": 1.6359, + "step": 3974 + }, + { + "epoch": 0.41930379746835444, + "grad_norm": 0.6868027448654175, + "learning_rate": 0.0009514802872496205, + "loss": 1.6441, + "step": 3975 + }, + { + "epoch": 0.41940928270042194, + "grad_norm": 0.635015606880188, + "learning_rate": 0.0009512384451653499, + "loss": 1.6325, + "step": 3976 + }, + { + "epoch": 0.41951476793248943, + "grad_norm": 0.8550861477851868, + "learning_rate": 0.000950996580531308, + "loss": 1.652, + "step": 3977 + }, + { + "epoch": 0.419620253164557, + "grad_norm": 0.6839339733123779, + "learning_rate": 0.000950754693374597, + "loss": 1.6095, + "step": 3978 + }, + { + "epoch": 0.41972573839662447, + "grad_norm": 0.5813921093940735, + "learning_rate": 0.0009505127837223215, + "loss": 1.6506, + "step": 3979 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.8001173138618469, + "learning_rate": 0.0009502708516015889, + "loss": 1.6757, + "step": 3980 + }, + { + "epoch": 0.4199367088607595, + "grad_norm": 0.5629385113716125, + "learning_rate": 0.0009500288970395085, + "loss": 1.6363, + "step": 3981 + }, + { + "epoch": 0.420042194092827, + "grad_norm": 0.6945555210113525, + "learning_rate": 0.000949786920063193, + "loss": 1.6504, + "step": 3982 + }, + { + "epoch": 0.4201476793248945, + "grad_norm": 0.874769389629364, + "learning_rate": 0.0009495449206997568, + "loss": 1.6308, + "step": 3983 + }, + { + "epoch": 0.42025316455696204, + "grad_norm": 0.674446165561676, + "learning_rate": 0.0009493028989763171, + "loss": 1.6533, + "step": 3984 + }, + { + "epoch": 0.42035864978902954, + "grad_norm": 0.6741652488708496, + "learning_rate": 0.0009490608549199939, + "loss": 1.6314, + "step": 3985 + }, + { + "epoch": 0.42046413502109703, + "grad_norm": 0.75823575258255, + "learning_rate": 0.0009488187885579092, + "loss": 1.6336, + "step": 3986 + }, + { + "epoch": 0.4205696202531646, + "grad_norm": 0.5701711773872375, + "learning_rate": 0.000948576699917188, + "loss": 1.6192, + "step": 3987 + }, + { + "epoch": 0.42067510548523207, + "grad_norm": 0.620676577091217, + "learning_rate": 0.0009483345890249571, + "loss": 1.6406, + "step": 3988 + }, + { + "epoch": 0.42078059071729956, + "grad_norm": 0.7851098775863647, + "learning_rate": 0.0009480924559083468, + "loss": 1.6493, + "step": 3989 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.7329517006874084, + "learning_rate": 0.0009478503005944888, + "loss": 1.6552, + "step": 3990 + }, + { + "epoch": 0.4209915611814346, + "grad_norm": 0.6379110217094421, + "learning_rate": 0.0009476081231105183, + "loss": 1.6591, + "step": 3991 + }, + { + "epoch": 0.4210970464135021, + "grad_norm": 0.7337241768836975, + "learning_rate": 0.0009473659234835722, + "loss": 1.6582, + "step": 3992 + }, + { + "epoch": 0.42120253164556964, + "grad_norm": 0.5749568939208984, + "learning_rate": 0.00094712370174079, + "loss": 1.6407, + "step": 3993 + }, + { + "epoch": 0.42130801687763714, + "grad_norm": 0.668973982334137, + "learning_rate": 0.0009468814579093141, + "loss": 1.6546, + "step": 3994 + }, + { + "epoch": 0.42141350210970463, + "grad_norm": 0.5478916168212891, + "learning_rate": 0.0009466391920162894, + "loss": 1.641, + "step": 3995 + }, + { + "epoch": 0.4215189873417722, + "grad_norm": 0.7187995910644531, + "learning_rate": 0.0009463969040888624, + "loss": 1.6613, + "step": 3996 + }, + { + "epoch": 0.42162447257383967, + "grad_norm": 0.5691145658493042, + "learning_rate": 0.0009461545941541832, + "loss": 1.6681, + "step": 3997 + }, + { + "epoch": 0.42172995780590716, + "grad_norm": 0.71571284532547, + "learning_rate": 0.0009459122622394033, + "loss": 1.6345, + "step": 3998 + }, + { + "epoch": 0.4218354430379747, + "grad_norm": 0.6386890411376953, + "learning_rate": 0.0009456699083716777, + "loss": 1.6601, + "step": 3999 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.8301774859428406, + "learning_rate": 0.0009454275325781632, + "loss": 1.6432, + "step": 4000 + }, + { + "epoch": 0.4220464135021097, + "grad_norm": 0.99766606092453, + "learning_rate": 0.0009451851348860191, + "loss": 1.6301, + "step": 4001 + }, + { + "epoch": 0.4221518987341772, + "grad_norm": 0.55620276927948, + "learning_rate": 0.0009449427153224076, + "loss": 1.6338, + "step": 4002 + }, + { + "epoch": 0.42225738396624474, + "grad_norm": 0.9699079394340515, + "learning_rate": 0.0009447002739144924, + "loss": 1.6249, + "step": 4003 + }, + { + "epoch": 0.42236286919831223, + "grad_norm": 0.7678016424179077, + "learning_rate": 0.0009444578106894408, + "loss": 1.6583, + "step": 4004 + }, + { + "epoch": 0.4224683544303797, + "grad_norm": 0.7136052846908569, + "learning_rate": 0.000944215325674422, + "loss": 1.6262, + "step": 4005 + }, + { + "epoch": 0.42257383966244727, + "grad_norm": 0.9597100019454956, + "learning_rate": 0.0009439728188966074, + "loss": 1.6654, + "step": 4006 + }, + { + "epoch": 0.42267932489451476, + "grad_norm": 0.5385827422142029, + "learning_rate": 0.0009437302903831712, + "loss": 1.6505, + "step": 4007 + }, + { + "epoch": 0.42278481012658226, + "grad_norm": 0.821671724319458, + "learning_rate": 0.0009434877401612898, + "loss": 1.6215, + "step": 4008 + }, + { + "epoch": 0.4228902953586498, + "grad_norm": 0.6220433115959167, + "learning_rate": 0.0009432451682581424, + "loss": 1.6374, + "step": 4009 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.6891624331474304, + "learning_rate": 0.0009430025747009104, + "loss": 1.6662, + "step": 4010 + }, + { + "epoch": 0.4231012658227848, + "grad_norm": 0.6272874474525452, + "learning_rate": 0.0009427599595167776, + "loss": 1.626, + "step": 4011 + }, + { + "epoch": 0.42320675105485234, + "grad_norm": 0.612480878829956, + "learning_rate": 0.0009425173227329297, + "loss": 1.6079, + "step": 4012 + }, + { + "epoch": 0.42331223628691983, + "grad_norm": 0.6946389675140381, + "learning_rate": 0.0009422746643765563, + "loss": 1.6275, + "step": 4013 + }, + { + "epoch": 0.4234177215189873, + "grad_norm": 0.6747690439224243, + "learning_rate": 0.0009420319844748476, + "loss": 1.6429, + "step": 4014 + }, + { + "epoch": 0.42352320675105487, + "grad_norm": 0.543289303779602, + "learning_rate": 0.0009417892830549978, + "loss": 1.6627, + "step": 4015 + }, + { + "epoch": 0.42362869198312236, + "grad_norm": 0.6255531311035156, + "learning_rate": 0.0009415465601442023, + "loss": 1.6513, + "step": 4016 + }, + { + "epoch": 0.42373417721518986, + "grad_norm": 0.6193114519119263, + "learning_rate": 0.0009413038157696595, + "loss": 1.6353, + "step": 4017 + }, + { + "epoch": 0.4238396624472574, + "grad_norm": 0.555911660194397, + "learning_rate": 0.0009410610499585705, + "loss": 1.6593, + "step": 4018 + }, + { + "epoch": 0.4239451476793249, + "grad_norm": 0.6538995504379272, + "learning_rate": 0.000940818262738138, + "loss": 1.6187, + "step": 4019 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.6128061413764954, + "learning_rate": 0.0009405754541355677, + "loss": 1.6763, + "step": 4020 + }, + { + "epoch": 0.42415611814345994, + "grad_norm": 0.5812502503395081, + "learning_rate": 0.0009403326241780674, + "loss": 1.614, + "step": 4021 + }, + { + "epoch": 0.42426160337552743, + "grad_norm": 0.5825834274291992, + "learning_rate": 0.0009400897728928475, + "loss": 1.6614, + "step": 4022 + }, + { + "epoch": 0.4243670886075949, + "grad_norm": 0.5616243481636047, + "learning_rate": 0.0009398469003071207, + "loss": 1.6194, + "step": 4023 + }, + { + "epoch": 0.42447257383966247, + "grad_norm": 0.5227155685424805, + "learning_rate": 0.0009396040064481021, + "loss": 1.6277, + "step": 4024 + }, + { + "epoch": 0.42457805907172996, + "grad_norm": 0.5865626931190491, + "learning_rate": 0.000939361091343009, + "loss": 1.6631, + "step": 4025 + }, + { + "epoch": 0.42468354430379746, + "grad_norm": 0.6305036544799805, + "learning_rate": 0.0009391181550190615, + "loss": 1.6545, + "step": 4026 + }, + { + "epoch": 0.424789029535865, + "grad_norm": 0.6208937168121338, + "learning_rate": 0.0009388751975034815, + "loss": 1.6096, + "step": 4027 + }, + { + "epoch": 0.4248945147679325, + "grad_norm": 0.5126195549964905, + "learning_rate": 0.0009386322188234941, + "loss": 1.6358, + "step": 4028 + }, + { + "epoch": 0.425, + "grad_norm": 0.536403477191925, + "learning_rate": 0.0009383892190063256, + "loss": 1.6116, + "step": 4029 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.6069897413253784, + "learning_rate": 0.0009381461980792061, + "loss": 1.6353, + "step": 4030 + }, + { + "epoch": 0.42521097046413503, + "grad_norm": 0.544500470161438, + "learning_rate": 0.0009379031560693665, + "loss": 1.6184, + "step": 4031 + }, + { + "epoch": 0.4253164556962025, + "grad_norm": 0.6277579069137573, + "learning_rate": 0.0009376600930040417, + "loss": 1.6642, + "step": 4032 + }, + { + "epoch": 0.42542194092827, + "grad_norm": 0.5815123915672302, + "learning_rate": 0.0009374170089104676, + "loss": 1.6555, + "step": 4033 + }, + { + "epoch": 0.42552742616033756, + "grad_norm": 0.5328850150108337, + "learning_rate": 0.000937173903815883, + "loss": 1.6376, + "step": 4034 + }, + { + "epoch": 0.42563291139240506, + "grad_norm": 0.5895113945007324, + "learning_rate": 0.0009369307777475293, + "loss": 1.6509, + "step": 4035 + }, + { + "epoch": 0.42573839662447255, + "grad_norm": 0.5351682305335999, + "learning_rate": 0.0009366876307326496, + "loss": 1.6239, + "step": 4036 + }, + { + "epoch": 0.4258438818565401, + "grad_norm": 0.5492815375328064, + "learning_rate": 0.0009364444627984902, + "loss": 1.6641, + "step": 4037 + }, + { + "epoch": 0.4259493670886076, + "grad_norm": 0.5861501693725586, + "learning_rate": 0.000936201273972299, + "loss": 1.652, + "step": 4038 + }, + { + "epoch": 0.4260548523206751, + "grad_norm": 0.5302600860595703, + "learning_rate": 0.0009359580642813265, + "loss": 1.6176, + "step": 4039 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5744174718856812, + "learning_rate": 0.0009357148337528256, + "loss": 1.6311, + "step": 4040 + }, + { + "epoch": 0.4262658227848101, + "grad_norm": 0.5232083797454834, + "learning_rate": 0.0009354715824140515, + "loss": 1.6407, + "step": 4041 + }, + { + "epoch": 0.4263713080168776, + "grad_norm": 0.5521926283836365, + "learning_rate": 0.0009352283102922619, + "loss": 1.6618, + "step": 4042 + }, + { + "epoch": 0.42647679324894516, + "grad_norm": 0.5929190516471863, + "learning_rate": 0.0009349850174147165, + "loss": 1.6157, + "step": 4043 + }, + { + "epoch": 0.42658227848101266, + "grad_norm": 0.5471429824829102, + "learning_rate": 0.0009347417038086772, + "loss": 1.6389, + "step": 4044 + }, + { + "epoch": 0.42668776371308015, + "grad_norm": 0.6203145384788513, + "learning_rate": 0.000934498369501409, + "loss": 1.6856, + "step": 4045 + }, + { + "epoch": 0.4267932489451477, + "grad_norm": 0.6365582346916199, + "learning_rate": 0.0009342550145201786, + "loss": 1.6203, + "step": 4046 + }, + { + "epoch": 0.4268987341772152, + "grad_norm": 0.5569519400596619, + "learning_rate": 0.0009340116388922551, + "loss": 1.6341, + "step": 4047 + }, + { + "epoch": 0.4270042194092827, + "grad_norm": 0.5922760963439941, + "learning_rate": 0.0009337682426449097, + "loss": 1.6529, + "step": 4048 + }, + { + "epoch": 0.42710970464135023, + "grad_norm": 0.6538982391357422, + "learning_rate": 0.0009335248258054162, + "loss": 1.6273, + "step": 4049 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5168061852455139, + "learning_rate": 0.0009332813884010511, + "loss": 1.6506, + "step": 4050 + }, + { + "epoch": 0.4273206751054852, + "grad_norm": 0.7148412466049194, + "learning_rate": 0.0009330379304590924, + "loss": 1.6183, + "step": 4051 + }, + { + "epoch": 0.42742616033755276, + "grad_norm": 0.7118167877197266, + "learning_rate": 0.000932794452006821, + "loss": 1.6667, + "step": 4052 + }, + { + "epoch": 0.42753164556962026, + "grad_norm": 0.563317060470581, + "learning_rate": 0.0009325509530715196, + "loss": 1.654, + "step": 4053 + }, + { + "epoch": 0.42763713080168775, + "grad_norm": 0.6382344961166382, + "learning_rate": 0.0009323074336804738, + "loss": 1.6391, + "step": 4054 + }, + { + "epoch": 0.4277426160337553, + "grad_norm": 0.7027692794799805, + "learning_rate": 0.0009320638938609708, + "loss": 1.6647, + "step": 4055 + }, + { + "epoch": 0.4278481012658228, + "grad_norm": 0.5869139432907104, + "learning_rate": 0.0009318203336403008, + "loss": 1.6303, + "step": 4056 + }, + { + "epoch": 0.4279535864978903, + "grad_norm": 0.6656413078308105, + "learning_rate": 0.0009315767530457556, + "loss": 1.6289, + "step": 4057 + }, + { + "epoch": 0.42805907172995783, + "grad_norm": 0.6228660345077515, + "learning_rate": 0.0009313331521046299, + "loss": 1.6632, + "step": 4058 + }, + { + "epoch": 0.4281645569620253, + "grad_norm": 0.5668264031410217, + "learning_rate": 0.0009310895308442202, + "loss": 1.6814, + "step": 4059 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.7588785886764526, + "learning_rate": 0.0009308458892918259, + "loss": 1.6364, + "step": 4060 + }, + { + "epoch": 0.42837552742616036, + "grad_norm": 0.5590380430221558, + "learning_rate": 0.0009306022274747478, + "loss": 1.653, + "step": 4061 + }, + { + "epoch": 0.42848101265822786, + "grad_norm": 0.7446161508560181, + "learning_rate": 0.0009303585454202892, + "loss": 1.6349, + "step": 4062 + }, + { + "epoch": 0.42858649789029535, + "grad_norm": 0.5464650392532349, + "learning_rate": 0.0009301148431557565, + "loss": 1.5993, + "step": 4063 + }, + { + "epoch": 0.4286919831223629, + "grad_norm": 0.8709349632263184, + "learning_rate": 0.0009298711207084575, + "loss": 1.625, + "step": 4064 + }, + { + "epoch": 0.4287974683544304, + "grad_norm": 0.9297404885292053, + "learning_rate": 0.0009296273781057026, + "loss": 1.6781, + "step": 4065 + }, + { + "epoch": 0.4289029535864979, + "grad_norm": 0.6593539714813232, + "learning_rate": 0.0009293836153748039, + "loss": 1.6354, + "step": 4066 + }, + { + "epoch": 0.4290084388185654, + "grad_norm": 0.7118033766746521, + "learning_rate": 0.0009291398325430771, + "loss": 1.638, + "step": 4067 + }, + { + "epoch": 0.4291139240506329, + "grad_norm": 0.5911070108413696, + "learning_rate": 0.0009288960296378386, + "loss": 1.62, + "step": 4068 + }, + { + "epoch": 0.4292194092827004, + "grad_norm": 0.8550640344619751, + "learning_rate": 0.0009286522066864078, + "loss": 1.6994, + "step": 4069 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.7787663340568542, + "learning_rate": 0.0009284083637161064, + "loss": 1.6507, + "step": 4070 + }, + { + "epoch": 0.42943037974683546, + "grad_norm": 0.684658944606781, + "learning_rate": 0.0009281645007542584, + "loss": 1.6763, + "step": 4071 + }, + { + "epoch": 0.42953586497890295, + "grad_norm": 1.149645447731018, + "learning_rate": 0.0009279206178281895, + "loss": 1.6292, + "step": 4072 + }, + { + "epoch": 0.42964135021097044, + "grad_norm": 0.5980559587478638, + "learning_rate": 0.0009276767149652284, + "loss": 1.6378, + "step": 4073 + }, + { + "epoch": 0.429746835443038, + "grad_norm": 1.3326762914657593, + "learning_rate": 0.0009274327921927054, + "loss": 1.6426, + "step": 4074 + }, + { + "epoch": 0.4298523206751055, + "grad_norm": 0.6543900966644287, + "learning_rate": 0.0009271888495379529, + "loss": 1.6853, + "step": 4075 + }, + { + "epoch": 0.429957805907173, + "grad_norm": 1.108445167541504, + "learning_rate": 0.0009269448870283067, + "loss": 1.653, + "step": 4076 + }, + { + "epoch": 0.4300632911392405, + "grad_norm": 0.6608222126960754, + "learning_rate": 0.0009267009046911032, + "loss": 1.6674, + "step": 4077 + }, + { + "epoch": 0.430168776371308, + "grad_norm": 0.8959518074989319, + "learning_rate": 0.0009264569025536825, + "loss": 1.6508, + "step": 4078 + }, + { + "epoch": 0.4302742616033755, + "grad_norm": 0.683873176574707, + "learning_rate": 0.0009262128806433858, + "loss": 1.6483, + "step": 4079 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.9004800915718079, + "learning_rate": 0.0009259688389875574, + "loss": 1.619, + "step": 4080 + }, + { + "epoch": 0.43048523206751055, + "grad_norm": 1.0766726732254028, + "learning_rate": 0.000925724777613543, + "loss": 1.6483, + "step": 4081 + }, + { + "epoch": 0.43059071729957804, + "grad_norm": 0.6865494251251221, + "learning_rate": 0.0009254806965486909, + "loss": 1.6716, + "step": 4082 + }, + { + "epoch": 0.4306962025316456, + "grad_norm": 1.0204356908798218, + "learning_rate": 0.0009252365958203518, + "loss": 1.6689, + "step": 4083 + }, + { + "epoch": 0.4308016877637131, + "grad_norm": 0.7572318315505981, + "learning_rate": 0.0009249924754558785, + "loss": 1.6133, + "step": 4084 + }, + { + "epoch": 0.4309071729957806, + "grad_norm": 1.1099631786346436, + "learning_rate": 0.0009247483354826255, + "loss": 1.6313, + "step": 4085 + }, + { + "epoch": 0.4310126582278481, + "grad_norm": 0.7493747472763062, + "learning_rate": 0.0009245041759279502, + "loss": 1.6549, + "step": 4086 + }, + { + "epoch": 0.4311181434599156, + "grad_norm": 0.8631014227867126, + "learning_rate": 0.0009242599968192119, + "loss": 1.6646, + "step": 4087 + }, + { + "epoch": 0.4312236286919831, + "grad_norm": 0.8690780997276306, + "learning_rate": 0.000924015798183772, + "loss": 1.6321, + "step": 4088 + }, + { + "epoch": 0.43132911392405066, + "grad_norm": 0.6899272799491882, + "learning_rate": 0.0009237715800489942, + "loss": 1.6158, + "step": 4089 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.6589668989181519, + "learning_rate": 0.0009235273424422442, + "loss": 1.6233, + "step": 4090 + }, + { + "epoch": 0.43154008438818564, + "grad_norm": 0.6905503869056702, + "learning_rate": 0.0009232830853908904, + "loss": 1.679, + "step": 4091 + }, + { + "epoch": 0.4316455696202532, + "grad_norm": 0.6659833788871765, + "learning_rate": 0.0009230388089223028, + "loss": 1.6414, + "step": 4092 + }, + { + "epoch": 0.4317510548523207, + "grad_norm": 0.7689623832702637, + "learning_rate": 0.0009227945130638537, + "loss": 1.6426, + "step": 4093 + }, + { + "epoch": 0.4318565400843882, + "grad_norm": 0.6083974242210388, + "learning_rate": 0.0009225501978429177, + "loss": 1.6488, + "step": 4094 + }, + { + "epoch": 0.4319620253164557, + "grad_norm": 0.8494607210159302, + "learning_rate": 0.0009223058632868719, + "loss": 1.6374, + "step": 4095 + }, + { + "epoch": 0.4320675105485232, + "grad_norm": 0.653774082660675, + "learning_rate": 0.0009220615094230946, + "loss": 1.652, + "step": 4096 + }, + { + "epoch": 0.4321729957805907, + "grad_norm": 0.8472821712493896, + "learning_rate": 0.0009218171362789674, + "loss": 1.6302, + "step": 4097 + }, + { + "epoch": 0.43227848101265826, + "grad_norm": 0.5767624974250793, + "learning_rate": 0.0009215727438818733, + "loss": 1.6494, + "step": 4098 + }, + { + "epoch": 0.43238396624472575, + "grad_norm": 0.7375500202178955, + "learning_rate": 0.0009213283322591977, + "loss": 1.5987, + "step": 4099 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.571069061756134, + "learning_rate": 0.0009210839014383282, + "loss": 1.6239, + "step": 4100 + }, + { + "epoch": 0.43259493670886073, + "grad_norm": 0.6687952876091003, + "learning_rate": 0.0009208394514466544, + "loss": 1.6298, + "step": 4101 + }, + { + "epoch": 0.4327004219409283, + "grad_norm": 0.5352658033370972, + "learning_rate": 0.0009205949823115681, + "loss": 1.6135, + "step": 4102 + }, + { + "epoch": 0.4328059071729958, + "grad_norm": 0.6313284635543823, + "learning_rate": 0.0009203504940604634, + "loss": 1.5705, + "step": 4103 + }, + { + "epoch": 0.43291139240506327, + "grad_norm": 0.5243473052978516, + "learning_rate": 0.0009201059867207366, + "loss": 1.6376, + "step": 4104 + }, + { + "epoch": 0.4330168776371308, + "grad_norm": 0.6033046245574951, + "learning_rate": 0.0009198614603197854, + "loss": 1.6188, + "step": 4105 + }, + { + "epoch": 0.4331223628691983, + "grad_norm": 0.5272048711776733, + "learning_rate": 0.0009196169148850108, + "loss": 1.6552, + "step": 4106 + }, + { + "epoch": 0.4332278481012658, + "grad_norm": 0.5851149559020996, + "learning_rate": 0.000919372350443815, + "loss": 1.6583, + "step": 4107 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.677655816078186, + "learning_rate": 0.000919127767023603, + "loss": 1.6302, + "step": 4108 + }, + { + "epoch": 0.43343881856540084, + "grad_norm": 0.6235166192054749, + "learning_rate": 0.000918883164651781, + "loss": 1.6656, + "step": 4109 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.6033297777175903, + "learning_rate": 0.0009186385433557584, + "loss": 1.6622, + "step": 4110 + }, + { + "epoch": 0.4336497890295359, + "grad_norm": 0.6489992737770081, + "learning_rate": 0.0009183939031629462, + "loss": 1.6214, + "step": 4111 + }, + { + "epoch": 0.4337552742616034, + "grad_norm": 0.9032720923423767, + "learning_rate": 0.0009181492441007577, + "loss": 1.602, + "step": 4112 + }, + { + "epoch": 0.43386075949367087, + "grad_norm": 0.7121887803077698, + "learning_rate": 0.0009179045661966075, + "loss": 1.6497, + "step": 4113 + }, + { + "epoch": 0.4339662447257384, + "grad_norm": 0.7119659185409546, + "learning_rate": 0.0009176598694779134, + "loss": 1.6377, + "step": 4114 + }, + { + "epoch": 0.4340717299578059, + "grad_norm": 0.7268553376197815, + "learning_rate": 0.0009174151539720953, + "loss": 1.6433, + "step": 4115 + }, + { + "epoch": 0.4341772151898734, + "grad_norm": 0.7835707068443298, + "learning_rate": 0.0009171704197065741, + "loss": 1.6307, + "step": 4116 + }, + { + "epoch": 0.43428270042194095, + "grad_norm": 0.9590549468994141, + "learning_rate": 0.0009169256667087738, + "loss": 1.638, + "step": 4117 + }, + { + "epoch": 0.43438818565400844, + "grad_norm": 0.6300780177116394, + "learning_rate": 0.0009166808950061202, + "loss": 1.6271, + "step": 4118 + }, + { + "epoch": 0.43449367088607593, + "grad_norm": 0.9851877689361572, + "learning_rate": 0.0009164361046260412, + "loss": 1.6672, + "step": 4119 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.8240021467208862, + "learning_rate": 0.0009161912955959668, + "loss": 1.6565, + "step": 4120 + }, + { + "epoch": 0.434704641350211, + "grad_norm": 0.7372665405273438, + "learning_rate": 0.0009159464679433289, + "loss": 1.6429, + "step": 4121 + }, + { + "epoch": 0.43481012658227847, + "grad_norm": 0.6690071821212769, + "learning_rate": 0.0009157016216955618, + "loss": 1.6223, + "step": 4122 + }, + { + "epoch": 0.434915611814346, + "grad_norm": 0.8193141222000122, + "learning_rate": 0.0009154567568801019, + "loss": 1.6107, + "step": 4123 + }, + { + "epoch": 0.4350210970464135, + "grad_norm": 0.6735964417457581, + "learning_rate": 0.0009152118735243871, + "loss": 1.6414, + "step": 4124 + }, + { + "epoch": 0.435126582278481, + "grad_norm": 0.72186279296875, + "learning_rate": 0.0009149669716558582, + "loss": 1.6214, + "step": 4125 + }, + { + "epoch": 0.43523206751054855, + "grad_norm": 0.7412655353546143, + "learning_rate": 0.0009147220513019577, + "loss": 1.6321, + "step": 4126 + }, + { + "epoch": 0.43533755274261604, + "grad_norm": 0.6638069152832031, + "learning_rate": 0.0009144771124901295, + "loss": 1.6243, + "step": 4127 + }, + { + "epoch": 0.43544303797468353, + "grad_norm": 0.6223947405815125, + "learning_rate": 0.000914232155247821, + "loss": 1.638, + "step": 4128 + }, + { + "epoch": 0.4355485232067511, + "grad_norm": 0.7217512726783752, + "learning_rate": 0.0009139871796024807, + "loss": 1.6517, + "step": 4129 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.541408121585846, + "learning_rate": 0.000913742185581559, + "loss": 1.642, + "step": 4130 + }, + { + "epoch": 0.43575949367088607, + "grad_norm": 0.6626996994018555, + "learning_rate": 0.0009134971732125088, + "loss": 1.6556, + "step": 4131 + }, + { + "epoch": 0.43586497890295356, + "grad_norm": 0.5374137759208679, + "learning_rate": 0.0009132521425227852, + "loss": 1.6099, + "step": 4132 + }, + { + "epoch": 0.4359704641350211, + "grad_norm": 0.6845840215682983, + "learning_rate": 0.0009130070935398451, + "loss": 1.6582, + "step": 4133 + }, + { + "epoch": 0.4360759493670886, + "grad_norm": 0.5516034364700317, + "learning_rate": 0.0009127620262911473, + "loss": 1.6538, + "step": 4134 + }, + { + "epoch": 0.4361814345991561, + "grad_norm": 0.6858668923377991, + "learning_rate": 0.0009125169408041526, + "loss": 1.6066, + "step": 4135 + }, + { + "epoch": 0.43628691983122364, + "grad_norm": 0.5623438358306885, + "learning_rate": 0.0009122718371063247, + "loss": 1.6213, + "step": 4136 + }, + { + "epoch": 0.43639240506329113, + "grad_norm": 0.5709373950958252, + "learning_rate": 0.0009120267152251281, + "loss": 1.6199, + "step": 4137 + }, + { + "epoch": 0.4364978902953586, + "grad_norm": 0.5398585200309753, + "learning_rate": 0.0009117815751880301, + "loss": 1.6337, + "step": 4138 + }, + { + "epoch": 0.4366033755274262, + "grad_norm": 0.5416811108589172, + "learning_rate": 0.0009115364170225, + "loss": 1.6443, + "step": 4139 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.6098245978355408, + "learning_rate": 0.0009112912407560086, + "loss": 1.6267, + "step": 4140 + }, + { + "epoch": 0.43681434599156116, + "grad_norm": 0.5820693373680115, + "learning_rate": 0.0009110460464160295, + "loss": 1.6147, + "step": 4141 + }, + { + "epoch": 0.4369198312236287, + "grad_norm": 0.5312711000442505, + "learning_rate": 0.000910800834030038, + "loss": 1.6386, + "step": 4142 + }, + { + "epoch": 0.4370253164556962, + "grad_norm": 0.5486575365066528, + "learning_rate": 0.0009105556036255113, + "loss": 1.6468, + "step": 4143 + }, + { + "epoch": 0.4371308016877637, + "grad_norm": 0.6270120143890381, + "learning_rate": 0.0009103103552299283, + "loss": 1.652, + "step": 4144 + }, + { + "epoch": 0.43723628691983124, + "grad_norm": 0.5441007018089294, + "learning_rate": 0.0009100650888707709, + "loss": 1.6152, + "step": 4145 + }, + { + "epoch": 0.43734177215189873, + "grad_norm": 0.6212572455406189, + "learning_rate": 0.000909819804575522, + "loss": 1.6173, + "step": 4146 + }, + { + "epoch": 0.4374472573839662, + "grad_norm": 0.5500609278678894, + "learning_rate": 0.0009095745023716671, + "loss": 1.6434, + "step": 4147 + }, + { + "epoch": 0.4375527426160338, + "grad_norm": 0.6327112317085266, + "learning_rate": 0.0009093291822866933, + "loss": 1.6839, + "step": 4148 + }, + { + "epoch": 0.43765822784810127, + "grad_norm": 0.6635929346084595, + "learning_rate": 0.0009090838443480903, + "loss": 1.6302, + "step": 4149 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.5722339153289795, + "learning_rate": 0.0009088384885833495, + "loss": 1.644, + "step": 4150 + }, + { + "epoch": 0.4378691983122363, + "grad_norm": 0.5798163414001465, + "learning_rate": 0.0009085931150199638, + "loss": 1.642, + "step": 4151 + }, + { + "epoch": 0.4379746835443038, + "grad_norm": 0.5827244520187378, + "learning_rate": 0.0009083477236854287, + "loss": 1.6534, + "step": 4152 + }, + { + "epoch": 0.4380801687763713, + "grad_norm": 0.7021118402481079, + "learning_rate": 0.0009081023146072414, + "loss": 1.6462, + "step": 4153 + }, + { + "epoch": 0.43818565400843884, + "grad_norm": 0.6189935803413391, + "learning_rate": 0.0009078568878129018, + "loss": 1.594, + "step": 4154 + }, + { + "epoch": 0.43829113924050633, + "grad_norm": 0.5898137092590332, + "learning_rate": 0.0009076114433299107, + "loss": 1.6395, + "step": 4155 + }, + { + "epoch": 0.4383966244725738, + "grad_norm": 0.5529583096504211, + "learning_rate": 0.0009073659811857712, + "loss": 1.6428, + "step": 4156 + }, + { + "epoch": 0.4385021097046414, + "grad_norm": 0.5973849296569824, + "learning_rate": 0.0009071205014079888, + "loss": 1.6491, + "step": 4157 + }, + { + "epoch": 0.43860759493670887, + "grad_norm": 0.5741589069366455, + "learning_rate": 0.0009068750040240709, + "loss": 1.6067, + "step": 4158 + }, + { + "epoch": 0.43871308016877636, + "grad_norm": 0.6605230569839478, + "learning_rate": 0.0009066294890615266, + "loss": 1.6287, + "step": 4159 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.6709357500076294, + "learning_rate": 0.000906383956547867, + "loss": 1.6137, + "step": 4160 + }, + { + "epoch": 0.4389240506329114, + "grad_norm": 0.6664318442344666, + "learning_rate": 0.0009061384065106051, + "loss": 1.6216, + "step": 4161 + }, + { + "epoch": 0.4390295358649789, + "grad_norm": 0.6243993043899536, + "learning_rate": 0.0009058928389772564, + "loss": 1.6492, + "step": 4162 + }, + { + "epoch": 0.43913502109704644, + "grad_norm": 0.5791100859642029, + "learning_rate": 0.0009056472539753377, + "loss": 1.6483, + "step": 4163 + }, + { + "epoch": 0.43924050632911393, + "grad_norm": 0.6528064012527466, + "learning_rate": 0.0009054016515323679, + "loss": 1.5965, + "step": 4164 + }, + { + "epoch": 0.4393459915611814, + "grad_norm": 0.5797888040542603, + "learning_rate": 0.0009051560316758684, + "loss": 1.6381, + "step": 4165 + }, + { + "epoch": 0.4394514767932489, + "grad_norm": 0.5618374347686768, + "learning_rate": 0.0009049103944333616, + "loss": 1.6248, + "step": 4166 + }, + { + "epoch": 0.43955696202531647, + "grad_norm": 0.5265519022941589, + "learning_rate": 0.0009046647398323728, + "loss": 1.6188, + "step": 4167 + }, + { + "epoch": 0.43966244725738396, + "grad_norm": 0.6478899717330933, + "learning_rate": 0.0009044190679004286, + "loss": 1.6428, + "step": 4168 + }, + { + "epoch": 0.43976793248945145, + "grad_norm": 0.6971511244773865, + "learning_rate": 0.0009041733786650578, + "loss": 1.6423, + "step": 4169 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.5348067879676819, + "learning_rate": 0.0009039276721537915, + "loss": 1.6614, + "step": 4170 + }, + { + "epoch": 0.4399789029535865, + "grad_norm": 0.6032482981681824, + "learning_rate": 0.0009036819483941614, + "loss": 1.6321, + "step": 4171 + }, + { + "epoch": 0.440084388185654, + "grad_norm": 0.5794818997383118, + "learning_rate": 0.0009034362074137032, + "loss": 1.6216, + "step": 4172 + }, + { + "epoch": 0.44018987341772153, + "grad_norm": 0.5960403680801392, + "learning_rate": 0.0009031904492399526, + "loss": 1.6613, + "step": 4173 + }, + { + "epoch": 0.440295358649789, + "grad_norm": 0.5278424024581909, + "learning_rate": 0.0009029446739004483, + "loss": 1.6425, + "step": 4174 + }, + { + "epoch": 0.4404008438818565, + "grad_norm": 0.5895372629165649, + "learning_rate": 0.0009026988814227308, + "loss": 1.6042, + "step": 4175 + }, + { + "epoch": 0.44050632911392407, + "grad_norm": 0.5372918248176575, + "learning_rate": 0.0009024530718343418, + "loss": 1.6048, + "step": 4176 + }, + { + "epoch": 0.44061181434599156, + "grad_norm": 0.5563502311706543, + "learning_rate": 0.0009022072451628263, + "loss": 1.6441, + "step": 4177 + }, + { + "epoch": 0.44071729957805905, + "grad_norm": 0.569299042224884, + "learning_rate": 0.0009019614014357298, + "loss": 1.6018, + "step": 4178 + }, + { + "epoch": 0.4408227848101266, + "grad_norm": 0.5561728477478027, + "learning_rate": 0.0009017155406806006, + "loss": 1.6448, + "step": 4179 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5709185004234314, + "learning_rate": 0.0009014696629249886, + "loss": 1.6124, + "step": 4180 + }, + { + "epoch": 0.4410337552742616, + "grad_norm": 0.5883145928382874, + "learning_rate": 0.0009012237681964454, + "loss": 1.6361, + "step": 4181 + }, + { + "epoch": 0.44113924050632913, + "grad_norm": 0.5841755867004395, + "learning_rate": 0.0009009778565225251, + "loss": 1.637, + "step": 4182 + }, + { + "epoch": 0.4412447257383966, + "grad_norm": 0.6310212016105652, + "learning_rate": 0.000900731927930783, + "loss": 1.6126, + "step": 4183 + }, + { + "epoch": 0.4413502109704641, + "grad_norm": 0.5576636791229248, + "learning_rate": 0.0009004859824487769, + "loss": 1.624, + "step": 4184 + }, + { + "epoch": 0.44145569620253167, + "grad_norm": 0.5746493339538574, + "learning_rate": 0.0009002400201040659, + "loss": 1.6235, + "step": 4185 + }, + { + "epoch": 0.44156118143459916, + "grad_norm": 0.5619171857833862, + "learning_rate": 0.0008999940409242115, + "loss": 1.6234, + "step": 4186 + }, + { + "epoch": 0.44166666666666665, + "grad_norm": 0.5518098473548889, + "learning_rate": 0.0008997480449367771, + "loss": 1.6536, + "step": 4187 + }, + { + "epoch": 0.4417721518987342, + "grad_norm": 0.5902150273323059, + "learning_rate": 0.0008995020321693274, + "loss": 1.6541, + "step": 4188 + }, + { + "epoch": 0.4418776371308017, + "grad_norm": 0.5257380604743958, + "learning_rate": 0.0008992560026494294, + "loss": 1.6325, + "step": 4189 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.552778959274292, + "learning_rate": 0.0008990099564046522, + "loss": 1.647, + "step": 4190 + }, + { + "epoch": 0.44208860759493673, + "grad_norm": 0.6142827272415161, + "learning_rate": 0.0008987638934625662, + "loss": 1.6192, + "step": 4191 + }, + { + "epoch": 0.4421940928270042, + "grad_norm": 0.5470446944236755, + "learning_rate": 0.0008985178138507441, + "loss": 1.6525, + "step": 4192 + }, + { + "epoch": 0.4422995780590717, + "grad_norm": 0.799617350101471, + "learning_rate": 0.0008982717175967606, + "loss": 1.6138, + "step": 4193 + }, + { + "epoch": 0.44240506329113927, + "grad_norm": 0.5376743674278259, + "learning_rate": 0.0008980256047281919, + "loss": 1.6676, + "step": 4194 + }, + { + "epoch": 0.44251054852320676, + "grad_norm": 0.6992775201797485, + "learning_rate": 0.0008977794752726159, + "loss": 1.6244, + "step": 4195 + }, + { + "epoch": 0.44261603375527425, + "grad_norm": 0.5907681584358215, + "learning_rate": 0.0008975333292576125, + "loss": 1.6064, + "step": 4196 + }, + { + "epoch": 0.44272151898734174, + "grad_norm": 0.8146162629127502, + "learning_rate": 0.0008972871667107643, + "loss": 1.6343, + "step": 4197 + }, + { + "epoch": 0.4428270042194093, + "grad_norm": 0.7421292066574097, + "learning_rate": 0.0008970409876596545, + "loss": 1.6459, + "step": 4198 + }, + { + "epoch": 0.4429324894514768, + "grad_norm": 0.6447854042053223, + "learning_rate": 0.0008967947921318689, + "loss": 1.6117, + "step": 4199 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.7032788395881653, + "learning_rate": 0.0008965485801549946, + "loss": 1.6754, + "step": 4200 + }, + { + "epoch": 0.4431434599156118, + "grad_norm": 0.6710827350616455, + "learning_rate": 0.0008963023517566213, + "loss": 1.6338, + "step": 4201 + }, + { + "epoch": 0.4432489451476793, + "grad_norm": 0.7549927234649658, + "learning_rate": 0.0008960561069643402, + "loss": 1.6358, + "step": 4202 + }, + { + "epoch": 0.4433544303797468, + "grad_norm": 0.7573994398117065, + "learning_rate": 0.0008958098458057436, + "loss": 1.6041, + "step": 4203 + }, + { + "epoch": 0.44345991561181436, + "grad_norm": 0.6341649889945984, + "learning_rate": 0.000895563568308427, + "loss": 1.6325, + "step": 4204 + }, + { + "epoch": 0.44356540084388185, + "grad_norm": 0.7422118186950684, + "learning_rate": 0.0008953172744999865, + "loss": 1.5983, + "step": 4205 + }, + { + "epoch": 0.44367088607594934, + "grad_norm": 0.666573703289032, + "learning_rate": 0.000895070964408021, + "loss": 1.6584, + "step": 4206 + }, + { + "epoch": 0.4437763713080169, + "grad_norm": 0.8405724763870239, + "learning_rate": 0.0008948246380601303, + "loss": 1.6111, + "step": 4207 + }, + { + "epoch": 0.4438818565400844, + "grad_norm": 0.7403432130813599, + "learning_rate": 0.000894578295483917, + "loss": 1.6237, + "step": 4208 + }, + { + "epoch": 0.4439873417721519, + "grad_norm": 0.7536558508872986, + "learning_rate": 0.0008943319367069844, + "loss": 1.6249, + "step": 4209 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.9271068572998047, + "learning_rate": 0.000894085561756939, + "loss": 1.6417, + "step": 4210 + }, + { + "epoch": 0.4441983122362869, + "grad_norm": 0.627385139465332, + "learning_rate": 0.0008938391706613878, + "loss": 1.6629, + "step": 4211 + }, + { + "epoch": 0.4443037974683544, + "grad_norm": 1.007472038269043, + "learning_rate": 0.0008935927634479403, + "loss": 1.6209, + "step": 4212 + }, + { + "epoch": 0.44440928270042196, + "grad_norm": 0.6586564183235168, + "learning_rate": 0.0008933463401442073, + "loss": 1.6501, + "step": 4213 + }, + { + "epoch": 0.44451476793248945, + "grad_norm": 1.1899969577789307, + "learning_rate": 0.0008930999007778025, + "loss": 1.6192, + "step": 4214 + }, + { + "epoch": 0.44462025316455694, + "grad_norm": 0.8373725414276123, + "learning_rate": 0.0008928534453763402, + "loss": 1.6196, + "step": 4215 + }, + { + "epoch": 0.4447257383966245, + "grad_norm": 0.8232155442237854, + "learning_rate": 0.0008926069739674369, + "loss": 1.6395, + "step": 4216 + }, + { + "epoch": 0.444831223628692, + "grad_norm": 1.1073634624481201, + "learning_rate": 0.000892360486578711, + "loss": 1.6293, + "step": 4217 + }, + { + "epoch": 0.4449367088607595, + "grad_norm": 0.5458498001098633, + "learning_rate": 0.0008921139832377829, + "loss": 1.5982, + "step": 4218 + }, + { + "epoch": 0.445042194092827, + "grad_norm": 0.9485758543014526, + "learning_rate": 0.0008918674639722742, + "loss": 1.653, + "step": 4219 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.5143296122550964, + "learning_rate": 0.0008916209288098088, + "loss": 1.6488, + "step": 4220 + }, + { + "epoch": 0.445253164556962, + "grad_norm": 0.8232793807983398, + "learning_rate": 0.0008913743777780122, + "loss": 1.6305, + "step": 4221 + }, + { + "epoch": 0.44535864978902956, + "grad_norm": 0.613756537437439, + "learning_rate": 0.0008911278109045114, + "loss": 1.6397, + "step": 4222 + }, + { + "epoch": 0.44546413502109705, + "grad_norm": 0.7310481667518616, + "learning_rate": 0.0008908812282169359, + "loss": 1.641, + "step": 4223 + }, + { + "epoch": 0.44556962025316454, + "grad_norm": 0.7486088275909424, + "learning_rate": 0.0008906346297429161, + "loss": 1.6139, + "step": 4224 + }, + { + "epoch": 0.4456751054852321, + "grad_norm": 0.5316728353500366, + "learning_rate": 0.000890388015510085, + "loss": 1.6248, + "step": 4225 + }, + { + "epoch": 0.4457805907172996, + "grad_norm": 0.6807406544685364, + "learning_rate": 0.0008901413855460764, + "loss": 1.6218, + "step": 4226 + }, + { + "epoch": 0.4458860759493671, + "grad_norm": 0.6109276413917542, + "learning_rate": 0.0008898947398785271, + "loss": 1.6156, + "step": 4227 + }, + { + "epoch": 0.4459915611814346, + "grad_norm": 0.553231418132782, + "learning_rate": 0.0008896480785350743, + "loss": 1.6363, + "step": 4228 + }, + { + "epoch": 0.4460970464135021, + "grad_norm": 0.5901875495910645, + "learning_rate": 0.0008894014015433582, + "loss": 1.6261, + "step": 4229 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.6580528616905212, + "learning_rate": 0.0008891547089310198, + "loss": 1.6249, + "step": 4230 + }, + { + "epoch": 0.4463080168776371, + "grad_norm": 0.6291429400444031, + "learning_rate": 0.0008889080007257024, + "loss": 1.6828, + "step": 4231 + }, + { + "epoch": 0.44641350210970465, + "grad_norm": 0.5702469944953918, + "learning_rate": 0.0008886612769550508, + "loss": 1.628, + "step": 4232 + }, + { + "epoch": 0.44651898734177214, + "grad_norm": 0.586970329284668, + "learning_rate": 0.0008884145376467119, + "loss": 1.63, + "step": 4233 + }, + { + "epoch": 0.44662447257383964, + "grad_norm": 0.5781841278076172, + "learning_rate": 0.0008881677828283337, + "loss": 1.6649, + "step": 4234 + }, + { + "epoch": 0.4467299578059072, + "grad_norm": 0.6174421906471252, + "learning_rate": 0.0008879210125275664, + "loss": 1.6278, + "step": 4235 + }, + { + "epoch": 0.4468354430379747, + "grad_norm": 0.548221230506897, + "learning_rate": 0.000887674226772062, + "loss": 1.6039, + "step": 4236 + }, + { + "epoch": 0.44694092827004217, + "grad_norm": 0.657470703125, + "learning_rate": 0.000887427425589474, + "loss": 1.6494, + "step": 4237 + }, + { + "epoch": 0.4470464135021097, + "grad_norm": 0.78307044506073, + "learning_rate": 0.0008871806090074577, + "loss": 1.5822, + "step": 4238 + }, + { + "epoch": 0.4471518987341772, + "grad_norm": 0.5891963839530945, + "learning_rate": 0.0008869337770536699, + "loss": 1.6111, + "step": 4239 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.6257622838020325, + "learning_rate": 0.0008866869297557699, + "loss": 1.6107, + "step": 4240 + }, + { + "epoch": 0.44736286919831225, + "grad_norm": 0.5590792894363403, + "learning_rate": 0.0008864400671414177, + "loss": 1.6039, + "step": 4241 + }, + { + "epoch": 0.44746835443037974, + "grad_norm": 0.5461711287498474, + "learning_rate": 0.0008861931892382756, + "loss": 1.6328, + "step": 4242 + }, + { + "epoch": 0.44757383966244724, + "grad_norm": 0.5804974436759949, + "learning_rate": 0.0008859462960740076, + "loss": 1.6402, + "step": 4243 + }, + { + "epoch": 0.4476793248945148, + "grad_norm": 0.6150066256523132, + "learning_rate": 0.000885699387676279, + "loss": 1.6342, + "step": 4244 + }, + { + "epoch": 0.4477848101265823, + "grad_norm": 0.6395910978317261, + "learning_rate": 0.0008854524640727575, + "loss": 1.6702, + "step": 4245 + }, + { + "epoch": 0.44789029535864977, + "grad_norm": 0.8674719929695129, + "learning_rate": 0.0008852055252911121, + "loss": 1.6106, + "step": 4246 + }, + { + "epoch": 0.4479957805907173, + "grad_norm": 0.5985876321792603, + "learning_rate": 0.0008849585713590134, + "loss": 1.5814, + "step": 4247 + }, + { + "epoch": 0.4481012658227848, + "grad_norm": 0.8060964941978455, + "learning_rate": 0.0008847116023041336, + "loss": 1.6566, + "step": 4248 + }, + { + "epoch": 0.4482067510548523, + "grad_norm": 0.6891213655471802, + "learning_rate": 0.0008844646181541472, + "loss": 1.6616, + "step": 4249 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.661748468875885, + "learning_rate": 0.0008842176189367299, + "loss": 1.6442, + "step": 4250 + }, + { + "epoch": 0.44841772151898734, + "grad_norm": 0.5721883177757263, + "learning_rate": 0.000883970604679559, + "loss": 1.6273, + "step": 4251 + }, + { + "epoch": 0.44852320675105484, + "grad_norm": 0.6211469769477844, + "learning_rate": 0.0008837235754103136, + "loss": 1.6455, + "step": 4252 + }, + { + "epoch": 0.4486286919831224, + "grad_norm": 0.5770026445388794, + "learning_rate": 0.000883476531156675, + "loss": 1.6349, + "step": 4253 + }, + { + "epoch": 0.4487341772151899, + "grad_norm": 0.5898496508598328, + "learning_rate": 0.0008832294719463256, + "loss": 1.6236, + "step": 4254 + }, + { + "epoch": 0.44883966244725737, + "grad_norm": 0.6555864214897156, + "learning_rate": 0.0008829823978069494, + "loss": 1.6691, + "step": 4255 + }, + { + "epoch": 0.4489451476793249, + "grad_norm": 0.5612785220146179, + "learning_rate": 0.0008827353087662326, + "loss": 1.6678, + "step": 4256 + }, + { + "epoch": 0.4490506329113924, + "grad_norm": 0.6890183687210083, + "learning_rate": 0.0008824882048518622, + "loss": 1.5987, + "step": 4257 + }, + { + "epoch": 0.4491561181434599, + "grad_norm": 0.6061158180236816, + "learning_rate": 0.0008822410860915281, + "loss": 1.6307, + "step": 4258 + }, + { + "epoch": 0.44926160337552745, + "grad_norm": 0.6788460612297058, + "learning_rate": 0.0008819939525129207, + "loss": 1.6638, + "step": 4259 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.8446901440620422, + "learning_rate": 0.0008817468041437329, + "loss": 1.6191, + "step": 4260 + }, + { + "epoch": 0.44947257383966244, + "grad_norm": 0.6048992276191711, + "learning_rate": 0.0008814996410116587, + "loss": 1.6419, + "step": 4261 + }, + { + "epoch": 0.44957805907173, + "grad_norm": 0.7044769525527954, + "learning_rate": 0.0008812524631443938, + "loss": 1.6554, + "step": 4262 + }, + { + "epoch": 0.4496835443037975, + "grad_norm": 0.6273467540740967, + "learning_rate": 0.0008810052705696363, + "loss": 1.5909, + "step": 4263 + }, + { + "epoch": 0.44978902953586497, + "grad_norm": 0.5859277844429016, + "learning_rate": 0.0008807580633150848, + "loss": 1.6535, + "step": 4264 + }, + { + "epoch": 0.44989451476793246, + "grad_norm": 0.8079290390014648, + "learning_rate": 0.0008805108414084401, + "loss": 1.618, + "step": 4265 + }, + { + "epoch": 0.45, + "grad_norm": 0.5758571624755859, + "learning_rate": 0.0008802636048774052, + "loss": 1.627, + "step": 4266 + }, + { + "epoch": 0.4501054852320675, + "grad_norm": 0.5797906517982483, + "learning_rate": 0.0008800163537496837, + "loss": 1.5887, + "step": 4267 + }, + { + "epoch": 0.450210970464135, + "grad_norm": 0.5650228261947632, + "learning_rate": 0.0008797690880529813, + "loss": 1.6157, + "step": 4268 + }, + { + "epoch": 0.45031645569620254, + "grad_norm": 0.6404386162757874, + "learning_rate": 0.0008795218078150056, + "loss": 1.6131, + "step": 4269 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.6375009417533875, + "learning_rate": 0.0008792745130634654, + "loss": 1.6516, + "step": 4270 + }, + { + "epoch": 0.45052742616033753, + "grad_norm": 0.5769631862640381, + "learning_rate": 0.0008790272038260715, + "loss": 1.6013, + "step": 4271 + }, + { + "epoch": 0.4506329113924051, + "grad_norm": 0.5890920758247375, + "learning_rate": 0.000878779880130536, + "loss": 1.6283, + "step": 4272 + }, + { + "epoch": 0.45073839662447257, + "grad_norm": 0.5452843904495239, + "learning_rate": 0.0008785325420045727, + "loss": 1.6308, + "step": 4273 + }, + { + "epoch": 0.45084388185654006, + "grad_norm": 0.5732265710830688, + "learning_rate": 0.0008782851894758971, + "loss": 1.6234, + "step": 4274 + }, + { + "epoch": 0.4509493670886076, + "grad_norm": 0.655504584312439, + "learning_rate": 0.0008780378225722264, + "loss": 1.6062, + "step": 4275 + }, + { + "epoch": 0.4510548523206751, + "grad_norm": 0.6809930205345154, + "learning_rate": 0.0008777904413212794, + "loss": 1.6021, + "step": 4276 + }, + { + "epoch": 0.4511603375527426, + "grad_norm": 0.7557550668716431, + "learning_rate": 0.0008775430457507759, + "loss": 1.6465, + "step": 4277 + }, + { + "epoch": 0.45126582278481014, + "grad_norm": 0.5548058748245239, + "learning_rate": 0.0008772956358884383, + "loss": 1.6091, + "step": 4278 + }, + { + "epoch": 0.45137130801687764, + "grad_norm": 0.8982890248298645, + "learning_rate": 0.0008770482117619901, + "loss": 1.6076, + "step": 4279 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.7692838311195374, + "learning_rate": 0.0008768007733991561, + "loss": 1.663, + "step": 4280 + }, + { + "epoch": 0.4515822784810127, + "grad_norm": 0.5870657563209534, + "learning_rate": 0.0008765533208276632, + "loss": 1.619, + "step": 4281 + }, + { + "epoch": 0.45168776371308017, + "grad_norm": 0.6281301975250244, + "learning_rate": 0.0008763058540752396, + "loss": 1.6743, + "step": 4282 + }, + { + "epoch": 0.45179324894514766, + "grad_norm": 0.6914165616035461, + "learning_rate": 0.0008760583731696151, + "loss": 1.6466, + "step": 4283 + }, + { + "epoch": 0.4518987341772152, + "grad_norm": 0.7427144050598145, + "learning_rate": 0.0008758108781385216, + "loss": 1.6369, + "step": 4284 + }, + { + "epoch": 0.4520042194092827, + "grad_norm": 0.7763237357139587, + "learning_rate": 0.0008755633690096918, + "loss": 1.6153, + "step": 4285 + }, + { + "epoch": 0.4521097046413502, + "grad_norm": 0.678255021572113, + "learning_rate": 0.0008753158458108604, + "loss": 1.6035, + "step": 4286 + }, + { + "epoch": 0.45221518987341774, + "grad_norm": 0.9256573915481567, + "learning_rate": 0.0008750683085697632, + "loss": 1.6069, + "step": 4287 + }, + { + "epoch": 0.45232067510548524, + "grad_norm": 0.6188719868659973, + "learning_rate": 0.0008748207573141388, + "loss": 1.66, + "step": 4288 + }, + { + "epoch": 0.45242616033755273, + "grad_norm": 1.0504001379013062, + "learning_rate": 0.000874573192071726, + "loss": 1.6624, + "step": 4289 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.5480011701583862, + "learning_rate": 0.0008743256128702658, + "loss": 1.6039, + "step": 4290 + }, + { + "epoch": 0.45263713080168777, + "grad_norm": 0.8067851662635803, + "learning_rate": 0.0008740780197375007, + "loss": 1.6241, + "step": 4291 + }, + { + "epoch": 0.45274261603375526, + "grad_norm": 0.6140615344047546, + "learning_rate": 0.000873830412701175, + "loss": 1.63, + "step": 4292 + }, + { + "epoch": 0.4528481012658228, + "grad_norm": 0.7928540110588074, + "learning_rate": 0.0008735827917890339, + "loss": 1.6628, + "step": 4293 + }, + { + "epoch": 0.4529535864978903, + "grad_norm": 0.5414756536483765, + "learning_rate": 0.000873335157028825, + "loss": 1.6301, + "step": 4294 + }, + { + "epoch": 0.4530590717299578, + "grad_norm": 0.7288104295730591, + "learning_rate": 0.0008730875084482964, + "loss": 1.6261, + "step": 4295 + }, + { + "epoch": 0.4531645569620253, + "grad_norm": 0.5562858581542969, + "learning_rate": 0.0008728398460751989, + "loss": 1.6389, + "step": 4296 + }, + { + "epoch": 0.45327004219409284, + "grad_norm": 0.9543285965919495, + "learning_rate": 0.0008725921699372839, + "loss": 1.6083, + "step": 4297 + }, + { + "epoch": 0.45337552742616033, + "grad_norm": 0.7308788895606995, + "learning_rate": 0.0008723444800623053, + "loss": 1.634, + "step": 4298 + }, + { + "epoch": 0.4534810126582278, + "grad_norm": 0.8065575361251831, + "learning_rate": 0.0008720967764780173, + "loss": 1.6268, + "step": 4299 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.9015058279037476, + "learning_rate": 0.0008718490592121768, + "loss": 1.6285, + "step": 4300 + }, + { + "epoch": 0.45369198312236286, + "grad_norm": 0.6737461090087891, + "learning_rate": 0.0008716013282925418, + "loss": 1.6685, + "step": 4301 + }, + { + "epoch": 0.45379746835443036, + "grad_norm": 0.9950727224349976, + "learning_rate": 0.0008713535837468714, + "loss": 1.6245, + "step": 4302 + }, + { + "epoch": 0.4539029535864979, + "grad_norm": 0.5818344950675964, + "learning_rate": 0.0008711058256029269, + "loss": 1.6311, + "step": 4303 + }, + { + "epoch": 0.4540084388185654, + "grad_norm": 0.6844067573547363, + "learning_rate": 0.0008708580538884707, + "loss": 1.6286, + "step": 4304 + }, + { + "epoch": 0.4541139240506329, + "grad_norm": 0.5988804697990417, + "learning_rate": 0.0008706102686312668, + "loss": 1.5791, + "step": 4305 + }, + { + "epoch": 0.45421940928270044, + "grad_norm": 0.5650348663330078, + "learning_rate": 0.0008703624698590811, + "loss": 1.6554, + "step": 4306 + }, + { + "epoch": 0.45432489451476793, + "grad_norm": 0.5941834449768066, + "learning_rate": 0.0008701146575996804, + "loss": 1.657, + "step": 4307 + }, + { + "epoch": 0.4544303797468354, + "grad_norm": 0.566230058670044, + "learning_rate": 0.0008698668318808334, + "loss": 1.6271, + "step": 4308 + }, + { + "epoch": 0.45453586497890297, + "grad_norm": 0.5804744362831116, + "learning_rate": 0.0008696189927303101, + "loss": 1.6277, + "step": 4309 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5526958703994751, + "learning_rate": 0.0008693711401758822, + "loss": 1.5891, + "step": 4310 + }, + { + "epoch": 0.45474683544303796, + "grad_norm": 0.7008069753646851, + "learning_rate": 0.0008691232742453229, + "loss": 1.6374, + "step": 4311 + }, + { + "epoch": 0.4548523206751055, + "grad_norm": 0.9305817484855652, + "learning_rate": 0.0008688753949664067, + "loss": 1.6126, + "step": 4312 + }, + { + "epoch": 0.454957805907173, + "grad_norm": 0.5304001569747925, + "learning_rate": 0.0008686275023669096, + "loss": 1.636, + "step": 4313 + }, + { + "epoch": 0.4550632911392405, + "grad_norm": 0.7769773602485657, + "learning_rate": 0.0008683795964746094, + "loss": 1.5914, + "step": 4314 + }, + { + "epoch": 0.45516877637130804, + "grad_norm": 0.6310211420059204, + "learning_rate": 0.0008681316773172852, + "loss": 1.6178, + "step": 4315 + }, + { + "epoch": 0.45527426160337553, + "grad_norm": 0.6058962345123291, + "learning_rate": 0.0008678837449227174, + "loss": 1.5984, + "step": 4316 + }, + { + "epoch": 0.455379746835443, + "grad_norm": 0.8095162510871887, + "learning_rate": 0.0008676357993186882, + "loss": 1.6328, + "step": 4317 + }, + { + "epoch": 0.45548523206751057, + "grad_norm": 0.5783989429473877, + "learning_rate": 0.000867387840532981, + "loss": 1.6088, + "step": 4318 + }, + { + "epoch": 0.45559071729957806, + "grad_norm": 0.600899338722229, + "learning_rate": 0.0008671398685933811, + "loss": 1.6089, + "step": 4319 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.6530472636222839, + "learning_rate": 0.0008668918835276747, + "loss": 1.6829, + "step": 4320 + }, + { + "epoch": 0.4558016877637131, + "grad_norm": 0.5593256950378418, + "learning_rate": 0.0008666438853636499, + "loss": 1.5994, + "step": 4321 + }, + { + "epoch": 0.4559071729957806, + "grad_norm": 0.6254192590713501, + "learning_rate": 0.0008663958741290961, + "loss": 1.6043, + "step": 4322 + }, + { + "epoch": 0.4560126582278481, + "grad_norm": 0.6265437602996826, + "learning_rate": 0.0008661478498518042, + "loss": 1.6843, + "step": 4323 + }, + { + "epoch": 0.45611814345991564, + "grad_norm": 0.6294101476669312, + "learning_rate": 0.0008658998125595666, + "loss": 1.6138, + "step": 4324 + }, + { + "epoch": 0.45622362869198313, + "grad_norm": 0.5422559380531311, + "learning_rate": 0.0008656517622801771, + "loss": 1.6495, + "step": 4325 + }, + { + "epoch": 0.4563291139240506, + "grad_norm": 0.6557784080505371, + "learning_rate": 0.0008654036990414308, + "loss": 1.6096, + "step": 4326 + }, + { + "epoch": 0.45643459915611817, + "grad_norm": 0.5740750432014465, + "learning_rate": 0.0008651556228711247, + "loss": 1.6704, + "step": 4327 + }, + { + "epoch": 0.45654008438818566, + "grad_norm": 0.6440047025680542, + "learning_rate": 0.0008649075337970567, + "loss": 1.6434, + "step": 4328 + }, + { + "epoch": 0.45664556962025316, + "grad_norm": 0.535135805606842, + "learning_rate": 0.0008646594318470268, + "loss": 1.6413, + "step": 4329 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.704571545124054, + "learning_rate": 0.0008644113170488355, + "loss": 1.6116, + "step": 4330 + }, + { + "epoch": 0.4568565400843882, + "grad_norm": 0.6104801297187805, + "learning_rate": 0.0008641631894302858, + "loss": 1.6197, + "step": 4331 + }, + { + "epoch": 0.4569620253164557, + "grad_norm": 0.6588351726531982, + "learning_rate": 0.0008639150490191814, + "loss": 1.6394, + "step": 4332 + }, + { + "epoch": 0.4570675105485232, + "grad_norm": 0.586542546749115, + "learning_rate": 0.0008636668958433279, + "loss": 1.595, + "step": 4333 + }, + { + "epoch": 0.45717299578059073, + "grad_norm": 0.6528662443161011, + "learning_rate": 0.0008634187299305318, + "loss": 1.6424, + "step": 4334 + }, + { + "epoch": 0.4572784810126582, + "grad_norm": 0.5708362460136414, + "learning_rate": 0.0008631705513086013, + "loss": 1.6195, + "step": 4335 + }, + { + "epoch": 0.4573839662447257, + "grad_norm": 0.6577020287513733, + "learning_rate": 0.0008629223600053465, + "loss": 1.6591, + "step": 4336 + }, + { + "epoch": 0.45748945147679326, + "grad_norm": 0.659078061580658, + "learning_rate": 0.000862674156048578, + "loss": 1.6057, + "step": 4337 + }, + { + "epoch": 0.45759493670886076, + "grad_norm": 0.5813785791397095, + "learning_rate": 0.0008624259394661085, + "loss": 1.6121, + "step": 4338 + }, + { + "epoch": 0.45770042194092825, + "grad_norm": 0.6654723286628723, + "learning_rate": 0.000862177710285752, + "loss": 1.6466, + "step": 4339 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.699609637260437, + "learning_rate": 0.0008619294685353235, + "loss": 1.6049, + "step": 4340 + }, + { + "epoch": 0.4579113924050633, + "grad_norm": 0.5631093978881836, + "learning_rate": 0.00086168121424264, + "loss": 1.6108, + "step": 4341 + }, + { + "epoch": 0.4580168776371308, + "grad_norm": 0.624778687953949, + "learning_rate": 0.0008614329474355196, + "loss": 1.6064, + "step": 4342 + }, + { + "epoch": 0.45812236286919833, + "grad_norm": 0.5693281292915344, + "learning_rate": 0.0008611846681417818, + "loss": 1.5809, + "step": 4343 + }, + { + "epoch": 0.4582278481012658, + "grad_norm": 0.5932193398475647, + "learning_rate": 0.0008609363763892474, + "loss": 1.6432, + "step": 4344 + }, + { + "epoch": 0.4583333333333333, + "grad_norm": 0.5392530560493469, + "learning_rate": 0.0008606880722057386, + "loss": 1.6338, + "step": 4345 + }, + { + "epoch": 0.45843881856540086, + "grad_norm": 0.6112661361694336, + "learning_rate": 0.0008604397556190797, + "loss": 1.6654, + "step": 4346 + }, + { + "epoch": 0.45854430379746836, + "grad_norm": 0.7135403752326965, + "learning_rate": 0.0008601914266570956, + "loss": 1.6027, + "step": 4347 + }, + { + "epoch": 0.45864978902953585, + "grad_norm": 0.5351112484931946, + "learning_rate": 0.0008599430853476126, + "loss": 1.6199, + "step": 4348 + }, + { + "epoch": 0.4587552742616034, + "grad_norm": 0.816925585269928, + "learning_rate": 0.0008596947317184585, + "loss": 1.6522, + "step": 4349 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.6409527659416199, + "learning_rate": 0.0008594463657974627, + "loss": 1.6373, + "step": 4350 + }, + { + "epoch": 0.4589662447257384, + "grad_norm": 0.577051043510437, + "learning_rate": 0.000859197987612456, + "loss": 1.6597, + "step": 4351 + }, + { + "epoch": 0.45907172995780593, + "grad_norm": 0.6633345484733582, + "learning_rate": 0.0008589495971912703, + "loss": 1.6522, + "step": 4352 + }, + { + "epoch": 0.4591772151898734, + "grad_norm": 0.6544476747512817, + "learning_rate": 0.000858701194561739, + "loss": 1.6224, + "step": 4353 + }, + { + "epoch": 0.4592827004219409, + "grad_norm": 0.64154452085495, + "learning_rate": 0.0008584527797516966, + "loss": 1.6571, + "step": 4354 + }, + { + "epoch": 0.45938818565400846, + "grad_norm": 0.6370884776115417, + "learning_rate": 0.0008582043527889797, + "loss": 1.6487, + "step": 4355 + }, + { + "epoch": 0.45949367088607596, + "grad_norm": 0.6692924499511719, + "learning_rate": 0.0008579559137014254, + "loss": 1.6099, + "step": 4356 + }, + { + "epoch": 0.45959915611814345, + "grad_norm": 0.698013961315155, + "learning_rate": 0.0008577074625168725, + "loss": 1.6363, + "step": 4357 + }, + { + "epoch": 0.459704641350211, + "grad_norm": 0.5290843844413757, + "learning_rate": 0.0008574589992631617, + "loss": 1.6466, + "step": 4358 + }, + { + "epoch": 0.4598101265822785, + "grad_norm": 0.7971016764640808, + "learning_rate": 0.0008572105239681338, + "loss": 1.6338, + "step": 4359 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.8116889595985413, + "learning_rate": 0.0008569620366596322, + "loss": 1.6184, + "step": 4360 + }, + { + "epoch": 0.46002109704641353, + "grad_norm": 0.6853300929069519, + "learning_rate": 0.0008567135373655012, + "loss": 1.6338, + "step": 4361 + }, + { + "epoch": 0.460126582278481, + "grad_norm": 0.7497984170913696, + "learning_rate": 0.0008564650261135862, + "loss": 1.5992, + "step": 4362 + }, + { + "epoch": 0.4602320675105485, + "grad_norm": 0.7149655818939209, + "learning_rate": 0.0008562165029317339, + "loss": 1.6138, + "step": 4363 + }, + { + "epoch": 0.460337552742616, + "grad_norm": 0.6425312757492065, + "learning_rate": 0.0008559679678477929, + "loss": 1.6354, + "step": 4364 + }, + { + "epoch": 0.46044303797468356, + "grad_norm": 0.6452682018280029, + "learning_rate": 0.0008557194208896129, + "loss": 1.6319, + "step": 4365 + }, + { + "epoch": 0.46054852320675105, + "grad_norm": 0.6373199820518494, + "learning_rate": 0.0008554708620850445, + "loss": 1.5839, + "step": 4366 + }, + { + "epoch": 0.46065400843881854, + "grad_norm": 0.6584261655807495, + "learning_rate": 0.0008552222914619401, + "loss": 1.6307, + "step": 4367 + }, + { + "epoch": 0.4607594936708861, + "grad_norm": 0.6760040521621704, + "learning_rate": 0.0008549737090481532, + "loss": 1.6113, + "step": 4368 + }, + { + "epoch": 0.4608649789029536, + "grad_norm": 0.7197534441947937, + "learning_rate": 0.0008547251148715386, + "loss": 1.6198, + "step": 4369 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.6767386794090271, + "learning_rate": 0.000854476508959953, + "loss": 1.6182, + "step": 4370 + }, + { + "epoch": 0.4610759493670886, + "grad_norm": 0.5965183973312378, + "learning_rate": 0.0008542278913412535, + "loss": 1.5696, + "step": 4371 + }, + { + "epoch": 0.4611814345991561, + "grad_norm": 0.6449134349822998, + "learning_rate": 0.0008539792620432989, + "loss": 1.6326, + "step": 4372 + }, + { + "epoch": 0.4612869198312236, + "grad_norm": 0.5253111720085144, + "learning_rate": 0.0008537306210939497, + "loss": 1.6012, + "step": 4373 + }, + { + "epoch": 0.46139240506329116, + "grad_norm": 0.6597420573234558, + "learning_rate": 0.0008534819685210668, + "loss": 1.6283, + "step": 4374 + }, + { + "epoch": 0.46149789029535865, + "grad_norm": 0.6445645689964294, + "learning_rate": 0.0008532333043525136, + "loss": 1.6363, + "step": 4375 + }, + { + "epoch": 0.46160337552742614, + "grad_norm": 0.6060430407524109, + "learning_rate": 0.0008529846286161539, + "loss": 1.6477, + "step": 4376 + }, + { + "epoch": 0.4617088607594937, + "grad_norm": 0.5686970353126526, + "learning_rate": 0.000852735941339853, + "loss": 1.5998, + "step": 4377 + }, + { + "epoch": 0.4618143459915612, + "grad_norm": 0.5696197152137756, + "learning_rate": 0.0008524872425514775, + "loss": 1.6365, + "step": 4378 + }, + { + "epoch": 0.4619198312236287, + "grad_norm": 0.5611931681632996, + "learning_rate": 0.0008522385322788955, + "loss": 1.649, + "step": 4379 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.6277179718017578, + "learning_rate": 0.0008519898105499762, + "loss": 1.6271, + "step": 4380 + }, + { + "epoch": 0.4621308016877637, + "grad_norm": 0.5890340805053711, + "learning_rate": 0.00085174107739259, + "loss": 1.6517, + "step": 4381 + }, + { + "epoch": 0.4622362869198312, + "grad_norm": 0.6017661094665527, + "learning_rate": 0.000851492332834609, + "loss": 1.6134, + "step": 4382 + }, + { + "epoch": 0.46234177215189876, + "grad_norm": 0.5304552912712097, + "learning_rate": 0.0008512435769039055, + "loss": 1.6105, + "step": 4383 + }, + { + "epoch": 0.46244725738396625, + "grad_norm": 0.5959940552711487, + "learning_rate": 0.0008509948096283547, + "loss": 1.5884, + "step": 4384 + }, + { + "epoch": 0.46255274261603374, + "grad_norm": 0.655412495136261, + "learning_rate": 0.0008507460310358319, + "loss": 1.6411, + "step": 4385 + }, + { + "epoch": 0.4626582278481013, + "grad_norm": 0.6113051772117615, + "learning_rate": 0.0008504972411542138, + "loss": 1.6375, + "step": 4386 + }, + { + "epoch": 0.4627637130801688, + "grad_norm": 0.8795647025108337, + "learning_rate": 0.0008502484400113787, + "loss": 1.5986, + "step": 4387 + }, + { + "epoch": 0.4628691983122363, + "grad_norm": 0.5520366430282593, + "learning_rate": 0.0008499996276352061, + "loss": 1.6223, + "step": 4388 + }, + { + "epoch": 0.4629746835443038, + "grad_norm": 0.7903146743774414, + "learning_rate": 0.0008497508040535766, + "loss": 1.6212, + "step": 4389 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5726592540740967, + "learning_rate": 0.0008495019692943721, + "loss": 1.6099, + "step": 4390 + }, + { + "epoch": 0.4631856540084388, + "grad_norm": 0.7751229405403137, + "learning_rate": 0.0008492531233854757, + "loss": 1.6208, + "step": 4391 + }, + { + "epoch": 0.46329113924050636, + "grad_norm": 0.8222077488899231, + "learning_rate": 0.0008490042663547719, + "loss": 1.6035, + "step": 4392 + }, + { + "epoch": 0.46339662447257385, + "grad_norm": 0.5864148736000061, + "learning_rate": 0.0008487553982301465, + "loss": 1.6037, + "step": 4393 + }, + { + "epoch": 0.46350210970464134, + "grad_norm": 0.6323365569114685, + "learning_rate": 0.0008485065190394863, + "loss": 1.6341, + "step": 4394 + }, + { + "epoch": 0.46360759493670883, + "grad_norm": 0.6852652430534363, + "learning_rate": 0.0008482576288106794, + "loss": 1.6342, + "step": 4395 + }, + { + "epoch": 0.4637130801687764, + "grad_norm": 0.6178639531135559, + "learning_rate": 0.000848008727571615, + "loss": 1.6229, + "step": 4396 + }, + { + "epoch": 0.4638185654008439, + "grad_norm": 0.6326265335083008, + "learning_rate": 0.0008477598153501842, + "loss": 1.6206, + "step": 4397 + }, + { + "epoch": 0.46392405063291137, + "grad_norm": 0.6220011711120605, + "learning_rate": 0.0008475108921742787, + "loss": 1.6569, + "step": 4398 + }, + { + "epoch": 0.4640295358649789, + "grad_norm": 0.6130567193031311, + "learning_rate": 0.0008472619580717914, + "loss": 1.593, + "step": 4399 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.5756215453147888, + "learning_rate": 0.0008470130130706166, + "loss": 1.6585, + "step": 4400 + }, + { + "epoch": 0.4642405063291139, + "grad_norm": 0.6132723093032837, + "learning_rate": 0.00084676405719865, + "loss": 1.6071, + "step": 4401 + }, + { + "epoch": 0.46434599156118145, + "grad_norm": 0.659011960029602, + "learning_rate": 0.0008465150904837883, + "loss": 1.6461, + "step": 4402 + }, + { + "epoch": 0.46445147679324894, + "grad_norm": 0.5819559097290039, + "learning_rate": 0.0008462661129539296, + "loss": 1.6779, + "step": 4403 + }, + { + "epoch": 0.46455696202531643, + "grad_norm": 0.5816832780838013, + "learning_rate": 0.0008460171246369725, + "loss": 1.6511, + "step": 4404 + }, + { + "epoch": 0.464662447257384, + "grad_norm": 0.575931191444397, + "learning_rate": 0.000845768125560818, + "loss": 1.6354, + "step": 4405 + }, + { + "epoch": 0.4647679324894515, + "grad_norm": 0.5846771001815796, + "learning_rate": 0.0008455191157533677, + "loss": 1.6403, + "step": 4406 + }, + { + "epoch": 0.46487341772151897, + "grad_norm": 0.6112242937088013, + "learning_rate": 0.000845270095242524, + "loss": 1.6233, + "step": 4407 + }, + { + "epoch": 0.4649789029535865, + "grad_norm": 0.6154381632804871, + "learning_rate": 0.0008450210640561912, + "loss": 1.6478, + "step": 4408 + }, + { + "epoch": 0.465084388185654, + "grad_norm": 0.5904394388198853, + "learning_rate": 0.000844772022222274, + "loss": 1.6295, + "step": 4409 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.6465165019035339, + "learning_rate": 0.0008445229697686795, + "loss": 1.6303, + "step": 4410 + }, + { + "epoch": 0.46529535864978905, + "grad_norm": 0.6136993169784546, + "learning_rate": 0.0008442739067233148, + "loss": 1.6222, + "step": 4411 + }, + { + "epoch": 0.46540084388185654, + "grad_norm": 0.7550585269927979, + "learning_rate": 0.0008440248331140888, + "loss": 1.6679, + "step": 4412 + }, + { + "epoch": 0.46550632911392403, + "grad_norm": 0.6674261689186096, + "learning_rate": 0.0008437757489689113, + "loss": 1.5726, + "step": 4413 + }, + { + "epoch": 0.4656118143459916, + "grad_norm": 0.7482666373252869, + "learning_rate": 0.0008435266543156935, + "loss": 1.6191, + "step": 4414 + }, + { + "epoch": 0.4657172995780591, + "grad_norm": 0.6753524541854858, + "learning_rate": 0.0008432775491823477, + "loss": 1.6285, + "step": 4415 + }, + { + "epoch": 0.46582278481012657, + "grad_norm": 0.6843773126602173, + "learning_rate": 0.0008430284335967876, + "loss": 1.6525, + "step": 4416 + }, + { + "epoch": 0.4659282700421941, + "grad_norm": 0.7219042181968689, + "learning_rate": 0.0008427793075869275, + "loss": 1.5969, + "step": 4417 + }, + { + "epoch": 0.4660337552742616, + "grad_norm": 0.6120694875717163, + "learning_rate": 0.0008425301711806833, + "loss": 1.6325, + "step": 4418 + }, + { + "epoch": 0.4661392405063291, + "grad_norm": 0.5834725499153137, + "learning_rate": 0.0008422810244059721, + "loss": 1.6275, + "step": 4419 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.6244850158691406, + "learning_rate": 0.0008420318672907119, + "loss": 1.609, + "step": 4420 + }, + { + "epoch": 0.46635021097046414, + "grad_norm": 0.5282089710235596, + "learning_rate": 0.0008417826998628222, + "loss": 1.6202, + "step": 4421 + }, + { + "epoch": 0.46645569620253163, + "grad_norm": 0.7503560185432434, + "learning_rate": 0.0008415335221502231, + "loss": 1.6281, + "step": 4422 + }, + { + "epoch": 0.4665611814345992, + "grad_norm": 0.8017835021018982, + "learning_rate": 0.0008412843341808365, + "loss": 1.6441, + "step": 4423 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 0.6596938967704773, + "learning_rate": 0.0008410351359825851, + "loss": 1.6471, + "step": 4424 + }, + { + "epoch": 0.46677215189873417, + "grad_norm": 0.6104410886764526, + "learning_rate": 0.0008407859275833928, + "loss": 1.6147, + "step": 4425 + }, + { + "epoch": 0.4668776371308017, + "grad_norm": 0.6777629256248474, + "learning_rate": 0.0008405367090111845, + "loss": 1.6149, + "step": 4426 + }, + { + "epoch": 0.4669831223628692, + "grad_norm": 0.6214308142662048, + "learning_rate": 0.0008402874802938866, + "loss": 1.6202, + "step": 4427 + }, + { + "epoch": 0.4670886075949367, + "grad_norm": 0.7175410389900208, + "learning_rate": 0.0008400382414594263, + "loss": 1.6266, + "step": 4428 + }, + { + "epoch": 0.4671940928270042, + "grad_norm": 0.5873833894729614, + "learning_rate": 0.000839788992535732, + "loss": 1.6549, + "step": 4429 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.6343600749969482, + "learning_rate": 0.0008395397335507334, + "loss": 1.5914, + "step": 4430 + }, + { + "epoch": 0.46740506329113923, + "grad_norm": 0.6244483590126038, + "learning_rate": 0.0008392904645323612, + "loss": 1.61, + "step": 4431 + }, + { + "epoch": 0.4675105485232067, + "grad_norm": 0.7233603596687317, + "learning_rate": 0.0008390411855085473, + "loss": 1.659, + "step": 4432 + }, + { + "epoch": 0.4676160337552743, + "grad_norm": 0.64625483751297, + "learning_rate": 0.0008387918965072244, + "loss": 1.6177, + "step": 4433 + }, + { + "epoch": 0.46772151898734177, + "grad_norm": 1.1320043802261353, + "learning_rate": 0.0008385425975563269, + "loss": 1.6554, + "step": 4434 + }, + { + "epoch": 0.46782700421940926, + "grad_norm": 0.6239098310470581, + "learning_rate": 0.0008382932886837897, + "loss": 1.5918, + "step": 4435 + }, + { + "epoch": 0.4679324894514768, + "grad_norm": 0.8875237107276917, + "learning_rate": 0.0008380439699175493, + "loss": 1.5952, + "step": 4436 + }, + { + "epoch": 0.4680379746835443, + "grad_norm": 0.6268960237503052, + "learning_rate": 0.000837794641285543, + "loss": 1.6242, + "step": 4437 + }, + { + "epoch": 0.4681434599156118, + "grad_norm": 0.7660098075866699, + "learning_rate": 0.0008375453028157093, + "loss": 1.6308, + "step": 4438 + }, + { + "epoch": 0.46824894514767934, + "grad_norm": 0.6616922616958618, + "learning_rate": 0.000837295954535988, + "loss": 1.596, + "step": 4439 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.660865306854248, + "learning_rate": 0.0008370465964743196, + "loss": 1.6449, + "step": 4440 + }, + { + "epoch": 0.4684599156118143, + "grad_norm": 0.5790109634399414, + "learning_rate": 0.0008367972286586461, + "loss": 1.6153, + "step": 4441 + }, + { + "epoch": 0.4685654008438819, + "grad_norm": 0.7831623554229736, + "learning_rate": 0.0008365478511169103, + "loss": 1.6577, + "step": 4442 + }, + { + "epoch": 0.46867088607594937, + "grad_norm": 0.584256112575531, + "learning_rate": 0.000836298463877056, + "loss": 1.6357, + "step": 4443 + }, + { + "epoch": 0.46877637130801686, + "grad_norm": 0.7947301864624023, + "learning_rate": 0.0008360490669670288, + "loss": 1.6057, + "step": 4444 + }, + { + "epoch": 0.4688818565400844, + "grad_norm": 0.7504630088806152, + "learning_rate": 0.0008357996604147744, + "loss": 1.6505, + "step": 4445 + }, + { + "epoch": 0.4689873417721519, + "grad_norm": 0.6527398228645325, + "learning_rate": 0.0008355502442482403, + "loss": 1.585, + "step": 4446 + }, + { + "epoch": 0.4690928270042194, + "grad_norm": 0.7097603678703308, + "learning_rate": 0.0008353008184953748, + "loss": 1.6393, + "step": 4447 + }, + { + "epoch": 0.46919831223628694, + "grad_norm": 0.6385760307312012, + "learning_rate": 0.0008350513831841271, + "loss": 1.6597, + "step": 4448 + }, + { + "epoch": 0.46930379746835443, + "grad_norm": 0.8302870392799377, + "learning_rate": 0.0008348019383424479, + "loss": 1.6357, + "step": 4449 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.5833671689033508, + "learning_rate": 0.0008345524839982886, + "loss": 1.6014, + "step": 4450 + }, + { + "epoch": 0.4695147679324895, + "grad_norm": 0.8784348368644714, + "learning_rate": 0.000834303020179602, + "loss": 1.6251, + "step": 4451 + }, + { + "epoch": 0.46962025316455697, + "grad_norm": 0.8771331310272217, + "learning_rate": 0.0008340535469143414, + "loss": 1.5941, + "step": 4452 + }, + { + "epoch": 0.46972573839662446, + "grad_norm": 0.5993649363517761, + "learning_rate": 0.0008338040642304618, + "loss": 1.6407, + "step": 4453 + }, + { + "epoch": 0.469831223628692, + "grad_norm": 0.6806899905204773, + "learning_rate": 0.0008335545721559188, + "loss": 1.5837, + "step": 4454 + }, + { + "epoch": 0.4699367088607595, + "grad_norm": 0.5240955352783203, + "learning_rate": 0.0008333050707186696, + "loss": 1.605, + "step": 4455 + }, + { + "epoch": 0.470042194092827, + "grad_norm": 0.5754007697105408, + "learning_rate": 0.0008330555599466716, + "loss": 1.6102, + "step": 4456 + }, + { + "epoch": 0.47014767932489454, + "grad_norm": 0.53670334815979, + "learning_rate": 0.000832806039867884, + "loss": 1.5961, + "step": 4457 + }, + { + "epoch": 0.47025316455696203, + "grad_norm": 0.551366925239563, + "learning_rate": 0.000832556510510267, + "loss": 1.6017, + "step": 4458 + }, + { + "epoch": 0.4703586497890295, + "grad_norm": 0.5497669577598572, + "learning_rate": 0.0008323069719017812, + "loss": 1.6267, + "step": 4459 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.5435782074928284, + "learning_rate": 0.0008320574240703886, + "loss": 1.6606, + "step": 4460 + }, + { + "epoch": 0.47056962025316457, + "grad_norm": 0.544468104839325, + "learning_rate": 0.0008318078670440525, + "loss": 1.6186, + "step": 4461 + }, + { + "epoch": 0.47067510548523206, + "grad_norm": 0.5668899416923523, + "learning_rate": 0.0008315583008507372, + "loss": 1.6282, + "step": 4462 + }, + { + "epoch": 0.47078059071729955, + "grad_norm": 0.6178203821182251, + "learning_rate": 0.0008313087255184074, + "loss": 1.614, + "step": 4463 + }, + { + "epoch": 0.4708860759493671, + "grad_norm": 0.7228158116340637, + "learning_rate": 0.0008310591410750295, + "loss": 1.6203, + "step": 4464 + }, + { + "epoch": 0.4709915611814346, + "grad_norm": 0.7397622466087341, + "learning_rate": 0.0008308095475485706, + "loss": 1.6075, + "step": 4465 + }, + { + "epoch": 0.4710970464135021, + "grad_norm": 0.6217041611671448, + "learning_rate": 0.0008305599449669989, + "loss": 1.6351, + "step": 4466 + }, + { + "epoch": 0.47120253164556963, + "grad_norm": 0.5387865900993347, + "learning_rate": 0.0008303103333582839, + "loss": 1.6007, + "step": 4467 + }, + { + "epoch": 0.4713080168776371, + "grad_norm": 0.6332359313964844, + "learning_rate": 0.0008300607127503952, + "loss": 1.6146, + "step": 4468 + }, + { + "epoch": 0.4714135021097046, + "grad_norm": 0.7007975578308105, + "learning_rate": 0.0008298110831713047, + "loss": 1.6577, + "step": 4469 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.5879513621330261, + "learning_rate": 0.0008295614446489842, + "loss": 1.6546, + "step": 4470 + }, + { + "epoch": 0.47162447257383966, + "grad_norm": 0.5756363868713379, + "learning_rate": 0.0008293117972114074, + "loss": 1.6125, + "step": 4471 + }, + { + "epoch": 0.47172995780590715, + "grad_norm": 0.5651829242706299, + "learning_rate": 0.0008290621408865481, + "loss": 1.605, + "step": 4472 + }, + { + "epoch": 0.4718354430379747, + "grad_norm": 0.5947337746620178, + "learning_rate": 0.0008288124757023816, + "loss": 1.644, + "step": 4473 + }, + { + "epoch": 0.4719409282700422, + "grad_norm": 0.5854704976081848, + "learning_rate": 0.0008285628016868841, + "loss": 1.6613, + "step": 4474 + }, + { + "epoch": 0.4720464135021097, + "grad_norm": 0.5574603080749512, + "learning_rate": 0.0008283131188680332, + "loss": 1.6544, + "step": 4475 + }, + { + "epoch": 0.47215189873417723, + "grad_norm": 0.5800216197967529, + "learning_rate": 0.0008280634272738066, + "loss": 1.5952, + "step": 4476 + }, + { + "epoch": 0.4722573839662447, + "grad_norm": 0.6087362170219421, + "learning_rate": 0.0008278137269321837, + "loss": 1.6352, + "step": 4477 + }, + { + "epoch": 0.4723628691983122, + "grad_norm": 0.5266197323799133, + "learning_rate": 0.0008275640178711447, + "loss": 1.6487, + "step": 4478 + }, + { + "epoch": 0.47246835443037977, + "grad_norm": 0.5884939432144165, + "learning_rate": 0.0008273143001186709, + "loss": 1.6248, + "step": 4479 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.6473218202590942, + "learning_rate": 0.0008270645737027441, + "loss": 1.6334, + "step": 4480 + }, + { + "epoch": 0.47267932489451475, + "grad_norm": 0.6313621997833252, + "learning_rate": 0.0008268148386513475, + "loss": 1.5901, + "step": 4481 + }, + { + "epoch": 0.4727848101265823, + "grad_norm": 0.6038578748703003, + "learning_rate": 0.0008265650949924652, + "loss": 1.6094, + "step": 4482 + }, + { + "epoch": 0.4728902953586498, + "grad_norm": 0.6835364699363708, + "learning_rate": 0.0008263153427540825, + "loss": 1.604, + "step": 4483 + }, + { + "epoch": 0.4729957805907173, + "grad_norm": 0.5555885434150696, + "learning_rate": 0.0008260655819641849, + "loss": 1.6039, + "step": 4484 + }, + { + "epoch": 0.47310126582278483, + "grad_norm": 0.6284432411193848, + "learning_rate": 0.0008258158126507594, + "loss": 1.6293, + "step": 4485 + }, + { + "epoch": 0.4732067510548523, + "grad_norm": 0.6112720370292664, + "learning_rate": 0.0008255660348417944, + "loss": 1.6261, + "step": 4486 + }, + { + "epoch": 0.4733122362869198, + "grad_norm": 0.5587974190711975, + "learning_rate": 0.0008253162485652779, + "loss": 1.6087, + "step": 4487 + }, + { + "epoch": 0.47341772151898737, + "grad_norm": 0.6449179649353027, + "learning_rate": 0.0008250664538492006, + "loss": 1.6405, + "step": 4488 + }, + { + "epoch": 0.47352320675105486, + "grad_norm": 1.0359879732131958, + "learning_rate": 0.0008248166507215526, + "loss": 1.6175, + "step": 4489 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.7187209129333496, + "learning_rate": 0.0008245668392103259, + "loss": 1.659, + "step": 4490 + }, + { + "epoch": 0.4737341772151899, + "grad_norm": 0.8495188355445862, + "learning_rate": 0.000824317019343513, + "loss": 1.615, + "step": 4491 + }, + { + "epoch": 0.4738396624472574, + "grad_norm": 0.8356031179428101, + "learning_rate": 0.0008240671911491077, + "loss": 1.639, + "step": 4492 + }, + { + "epoch": 0.4739451476793249, + "grad_norm": 0.7090591788291931, + "learning_rate": 0.000823817354655104, + "loss": 1.5992, + "step": 4493 + }, + { + "epoch": 0.4740506329113924, + "grad_norm": 0.8999658823013306, + "learning_rate": 0.0008235675098894979, + "loss": 1.5958, + "step": 4494 + }, + { + "epoch": 0.4741561181434599, + "grad_norm": 0.6280614733695984, + "learning_rate": 0.0008233176568802851, + "loss": 1.6444, + "step": 4495 + }, + { + "epoch": 0.4742616033755274, + "grad_norm": 0.7755066752433777, + "learning_rate": 0.0008230677956554637, + "loss": 1.6105, + "step": 4496 + }, + { + "epoch": 0.4743670886075949, + "grad_norm": 0.6467302441596985, + "learning_rate": 0.0008228179262430313, + "loss": 1.5932, + "step": 4497 + }, + { + "epoch": 0.47447257383966246, + "grad_norm": 0.847457766532898, + "learning_rate": 0.0008225680486709871, + "loss": 1.631, + "step": 4498 + }, + { + "epoch": 0.47457805907172995, + "grad_norm": 0.6267364025115967, + "learning_rate": 0.0008223181629673312, + "loss": 1.6198, + "step": 4499 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.7182373404502869, + "learning_rate": 0.0008220682691600645, + "loss": 1.6234, + "step": 4500 + }, + { + "epoch": 0.474789029535865, + "grad_norm": 0.5890523195266724, + "learning_rate": 0.0008218183672771889, + "loss": 1.6474, + "step": 4501 + }, + { + "epoch": 0.4748945147679325, + "grad_norm": 0.7613189220428467, + "learning_rate": 0.0008215684573467071, + "loss": 1.642, + "step": 4502 + }, + { + "epoch": 0.475, + "grad_norm": 0.6344131231307983, + "learning_rate": 0.0008213185393966229, + "loss": 1.6175, + "step": 4503 + }, + { + "epoch": 0.4751054852320675, + "grad_norm": 0.631385087966919, + "learning_rate": 0.0008210686134549406, + "loss": 1.6244, + "step": 4504 + }, + { + "epoch": 0.475210970464135, + "grad_norm": 0.6172440648078918, + "learning_rate": 0.0008208186795496657, + "loss": 1.6113, + "step": 4505 + }, + { + "epoch": 0.4753164556962025, + "grad_norm": 0.6991317868232727, + "learning_rate": 0.0008205687377088048, + "loss": 1.6123, + "step": 4506 + }, + { + "epoch": 0.47542194092827006, + "grad_norm": 0.6044358611106873, + "learning_rate": 0.000820318787960365, + "loss": 1.6432, + "step": 4507 + }, + { + "epoch": 0.47552742616033755, + "grad_norm": 0.7095423340797424, + "learning_rate": 0.0008200688303323542, + "loss": 1.6117, + "step": 4508 + }, + { + "epoch": 0.47563291139240504, + "grad_norm": 0.5999248027801514, + "learning_rate": 0.0008198188648527818, + "loss": 1.6279, + "step": 4509 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.6435315012931824, + "learning_rate": 0.0008195688915496571, + "loss": 1.5905, + "step": 4510 + }, + { + "epoch": 0.4758438818565401, + "grad_norm": 0.556786298751831, + "learning_rate": 0.0008193189104509915, + "loss": 1.5958, + "step": 4511 + }, + { + "epoch": 0.4759493670886076, + "grad_norm": 0.6247630715370178, + "learning_rate": 0.0008190689215847963, + "loss": 1.6072, + "step": 4512 + }, + { + "epoch": 0.4760548523206751, + "grad_norm": 0.607367992401123, + "learning_rate": 0.0008188189249790838, + "loss": 1.6229, + "step": 4513 + }, + { + "epoch": 0.4761603375527426, + "grad_norm": 0.6884822249412537, + "learning_rate": 0.0008185689206618677, + "loss": 1.6284, + "step": 4514 + }, + { + "epoch": 0.4762658227848101, + "grad_norm": 0.5854563117027283, + "learning_rate": 0.0008183189086611623, + "loss": 1.6467, + "step": 4515 + }, + { + "epoch": 0.47637130801687766, + "grad_norm": 0.824907660484314, + "learning_rate": 0.0008180688890049823, + "loss": 1.6, + "step": 4516 + }, + { + "epoch": 0.47647679324894515, + "grad_norm": 0.6496743559837341, + "learning_rate": 0.000817818861721344, + "loss": 1.5948, + "step": 4517 + }, + { + "epoch": 0.47658227848101264, + "grad_norm": 0.673152506351471, + "learning_rate": 0.0008175688268382639, + "loss": 1.5716, + "step": 4518 + }, + { + "epoch": 0.4766877637130802, + "grad_norm": 0.6013345122337341, + "learning_rate": 0.00081731878438376, + "loss": 1.5745, + "step": 4519 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.6056921482086182, + "learning_rate": 0.0008170687343858506, + "loss": 1.5937, + "step": 4520 + }, + { + "epoch": 0.4768987341772152, + "grad_norm": 0.5795904994010925, + "learning_rate": 0.000816818676872555, + "loss": 1.6414, + "step": 4521 + }, + { + "epoch": 0.4770042194092827, + "grad_norm": 0.5585246682167053, + "learning_rate": 0.0008165686118718935, + "loss": 1.6502, + "step": 4522 + }, + { + "epoch": 0.4771097046413502, + "grad_norm": 0.5871509909629822, + "learning_rate": 0.000816318539411887, + "loss": 1.6252, + "step": 4523 + }, + { + "epoch": 0.4772151898734177, + "grad_norm": 0.5606921315193176, + "learning_rate": 0.0008160684595205577, + "loss": 1.6202, + "step": 4524 + }, + { + "epoch": 0.47732067510548526, + "grad_norm": 0.5307207703590393, + "learning_rate": 0.000815818372225928, + "loss": 1.5884, + "step": 4525 + }, + { + "epoch": 0.47742616033755275, + "grad_norm": 0.5717658996582031, + "learning_rate": 0.0008155682775560215, + "loss": 1.6438, + "step": 4526 + }, + { + "epoch": 0.47753164556962024, + "grad_norm": 0.5596261024475098, + "learning_rate": 0.0008153181755388624, + "loss": 1.6027, + "step": 4527 + }, + { + "epoch": 0.47763713080168774, + "grad_norm": 0.6888816356658936, + "learning_rate": 0.0008150680662024761, + "loss": 1.6284, + "step": 4528 + }, + { + "epoch": 0.4777426160337553, + "grad_norm": 0.6684629321098328, + "learning_rate": 0.0008148179495748885, + "loss": 1.6168, + "step": 4529 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.7107338905334473, + "learning_rate": 0.0008145678256841265, + "loss": 1.6353, + "step": 4530 + }, + { + "epoch": 0.47795358649789027, + "grad_norm": 0.5539276599884033, + "learning_rate": 0.0008143176945582175, + "loss": 1.5958, + "step": 4531 + }, + { + "epoch": 0.4780590717299578, + "grad_norm": 0.5503248572349548, + "learning_rate": 0.0008140675562251904, + "loss": 1.6092, + "step": 4532 + }, + { + "epoch": 0.4781645569620253, + "grad_norm": 0.5506139993667603, + "learning_rate": 0.0008138174107130739, + "loss": 1.6511, + "step": 4533 + }, + { + "epoch": 0.4782700421940928, + "grad_norm": 0.5283046364784241, + "learning_rate": 0.0008135672580498984, + "loss": 1.6472, + "step": 4534 + }, + { + "epoch": 0.47837552742616035, + "grad_norm": 0.6269198656082153, + "learning_rate": 0.0008133170982636946, + "loss": 1.6036, + "step": 4535 + }, + { + "epoch": 0.47848101265822784, + "grad_norm": 0.5996680855751038, + "learning_rate": 0.0008130669313824944, + "loss": 1.6158, + "step": 4536 + }, + { + "epoch": 0.47858649789029534, + "grad_norm": 0.5927919149398804, + "learning_rate": 0.0008128167574343299, + "loss": 1.6179, + "step": 4537 + }, + { + "epoch": 0.4786919831223629, + "grad_norm": 0.6523494720458984, + "learning_rate": 0.0008125665764472345, + "loss": 1.6303, + "step": 4538 + }, + { + "epoch": 0.4787974683544304, + "grad_norm": 0.5851922035217285, + "learning_rate": 0.0008123163884492422, + "loss": 1.6208, + "step": 4539 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.6325877904891968, + "learning_rate": 0.0008120661934683879, + "loss": 1.6474, + "step": 4540 + }, + { + "epoch": 0.4790084388185654, + "grad_norm": 0.5749902129173279, + "learning_rate": 0.0008118159915327072, + "loss": 1.5978, + "step": 4541 + }, + { + "epoch": 0.4791139240506329, + "grad_norm": 0.6617357730865479, + "learning_rate": 0.0008115657826702364, + "loss": 1.5893, + "step": 4542 + }, + { + "epoch": 0.4792194092827004, + "grad_norm": 0.5988341569900513, + "learning_rate": 0.0008113155669090124, + "loss": 1.6479, + "step": 4543 + }, + { + "epoch": 0.47932489451476795, + "grad_norm": 0.637848973274231, + "learning_rate": 0.0008110653442770736, + "loss": 1.6354, + "step": 4544 + }, + { + "epoch": 0.47943037974683544, + "grad_norm": 0.6050624847412109, + "learning_rate": 0.0008108151148024584, + "loss": 1.6216, + "step": 4545 + }, + { + "epoch": 0.47953586497890294, + "grad_norm": 0.757036030292511, + "learning_rate": 0.0008105648785132065, + "loss": 1.6358, + "step": 4546 + }, + { + "epoch": 0.4796413502109705, + "grad_norm": 0.5669739842414856, + "learning_rate": 0.0008103146354373577, + "loss": 1.6307, + "step": 4547 + }, + { + "epoch": 0.479746835443038, + "grad_norm": 0.7185400724411011, + "learning_rate": 0.0008100643856029534, + "loss": 1.6554, + "step": 4548 + }, + { + "epoch": 0.47985232067510547, + "grad_norm": 0.6086196899414062, + "learning_rate": 0.0008098141290380353, + "loss": 1.6374, + "step": 4549 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.7669814229011536, + "learning_rate": 0.0008095638657706456, + "loss": 1.5929, + "step": 4550 + }, + { + "epoch": 0.4800632911392405, + "grad_norm": 0.7485328316688538, + "learning_rate": 0.0008093135958288278, + "loss": 1.6053, + "step": 4551 + }, + { + "epoch": 0.480168776371308, + "grad_norm": 0.6405413150787354, + "learning_rate": 0.0008090633192406256, + "loss": 1.5919, + "step": 4552 + }, + { + "epoch": 0.48027426160337555, + "grad_norm": 0.6986269950866699, + "learning_rate": 0.0008088130360340843, + "loss": 1.5963, + "step": 4553 + }, + { + "epoch": 0.48037974683544304, + "grad_norm": 0.7127420902252197, + "learning_rate": 0.0008085627462372489, + "loss": 1.6302, + "step": 4554 + }, + { + "epoch": 0.48048523206751054, + "grad_norm": 0.5478160381317139, + "learning_rate": 0.0008083124498781658, + "loss": 1.585, + "step": 4555 + }, + { + "epoch": 0.4805907172995781, + "grad_norm": 0.7383471131324768, + "learning_rate": 0.0008080621469848817, + "loss": 1.6357, + "step": 4556 + }, + { + "epoch": 0.4806962025316456, + "grad_norm": 0.6492341756820679, + "learning_rate": 0.0008078118375854449, + "loss": 1.6142, + "step": 4557 + }, + { + "epoch": 0.48080168776371307, + "grad_norm": 0.8873612284660339, + "learning_rate": 0.000807561521707903, + "loss": 1.6253, + "step": 4558 + }, + { + "epoch": 0.48090717299578056, + "grad_norm": 0.7341089248657227, + "learning_rate": 0.000807311199380306, + "loss": 1.6316, + "step": 4559 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.8388393521308899, + "learning_rate": 0.000807060870630703, + "loss": 1.6275, + "step": 4560 + }, + { + "epoch": 0.4811181434599156, + "grad_norm": 0.9824666380882263, + "learning_rate": 0.0008068105354871449, + "loss": 1.5829, + "step": 4561 + }, + { + "epoch": 0.4812236286919831, + "grad_norm": 0.7201429009437561, + "learning_rate": 0.0008065601939776833, + "loss": 1.5786, + "step": 4562 + }, + { + "epoch": 0.48132911392405064, + "grad_norm": 0.729275643825531, + "learning_rate": 0.0008063098461303698, + "loss": 1.5943, + "step": 4563 + }, + { + "epoch": 0.48143459915611814, + "grad_norm": 0.6603390574455261, + "learning_rate": 0.0008060594919732572, + "loss": 1.6116, + "step": 4564 + }, + { + "epoch": 0.48154008438818563, + "grad_norm": 0.7642948627471924, + "learning_rate": 0.0008058091315343988, + "loss": 1.5793, + "step": 4565 + }, + { + "epoch": 0.4816455696202532, + "grad_norm": 0.6650425791740417, + "learning_rate": 0.0008055587648418492, + "loss": 1.6269, + "step": 4566 + }, + { + "epoch": 0.48175105485232067, + "grad_norm": 0.6168044209480286, + "learning_rate": 0.000805308391923663, + "loss": 1.5952, + "step": 4567 + }, + { + "epoch": 0.48185654008438816, + "grad_norm": 0.6102758646011353, + "learning_rate": 0.0008050580128078957, + "loss": 1.6091, + "step": 4568 + }, + { + "epoch": 0.4819620253164557, + "grad_norm": 0.700873076915741, + "learning_rate": 0.0008048076275226032, + "loss": 1.57, + "step": 4569 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.7106543779373169, + "learning_rate": 0.000804557236095843, + "loss": 1.5977, + "step": 4570 + }, + { + "epoch": 0.4821729957805907, + "grad_norm": 0.684246838092804, + "learning_rate": 0.0008043068385556725, + "loss": 1.599, + "step": 4571 + }, + { + "epoch": 0.48227848101265824, + "grad_norm": 0.6565681099891663, + "learning_rate": 0.0008040564349301498, + "loss": 1.5814, + "step": 4572 + }, + { + "epoch": 0.48238396624472574, + "grad_norm": 0.6833487749099731, + "learning_rate": 0.0008038060252473339, + "loss": 1.5951, + "step": 4573 + }, + { + "epoch": 0.48248945147679323, + "grad_norm": 0.7603049278259277, + "learning_rate": 0.0008035556095352847, + "loss": 1.6704, + "step": 4574 + }, + { + "epoch": 0.4825949367088608, + "grad_norm": 0.7488970756530762, + "learning_rate": 0.0008033051878220624, + "loss": 1.5946, + "step": 4575 + }, + { + "epoch": 0.48270042194092827, + "grad_norm": 0.7372132539749146, + "learning_rate": 0.0008030547601357281, + "loss": 1.6408, + "step": 4576 + }, + { + "epoch": 0.48280590717299576, + "grad_norm": 0.6986951231956482, + "learning_rate": 0.0008028043265043434, + "loss": 1.5975, + "step": 4577 + }, + { + "epoch": 0.4829113924050633, + "grad_norm": 0.716160774230957, + "learning_rate": 0.0008025538869559703, + "loss": 1.6194, + "step": 4578 + }, + { + "epoch": 0.4830168776371308, + "grad_norm": 0.7205145955085754, + "learning_rate": 0.0008023034415186725, + "loss": 1.6557, + "step": 4579 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.7506338357925415, + "learning_rate": 0.0008020529902205129, + "loss": 1.6401, + "step": 4580 + }, + { + "epoch": 0.48322784810126584, + "grad_norm": 0.9456238150596619, + "learning_rate": 0.0008018025330895566, + "loss": 1.5731, + "step": 4581 + }, + { + "epoch": 0.48333333333333334, + "grad_norm": 0.6589499711990356, + "learning_rate": 0.0008015520701538677, + "loss": 1.6005, + "step": 4582 + }, + { + "epoch": 0.48343881856540083, + "grad_norm": 0.771369993686676, + "learning_rate": 0.0008013016014415126, + "loss": 1.6059, + "step": 4583 + }, + { + "epoch": 0.4835443037974684, + "grad_norm": 0.6918783783912659, + "learning_rate": 0.0008010511269805571, + "loss": 1.6461, + "step": 4584 + }, + { + "epoch": 0.48364978902953587, + "grad_norm": 0.726956844329834, + "learning_rate": 0.0008008006467990684, + "loss": 1.6272, + "step": 4585 + }, + { + "epoch": 0.48375527426160336, + "grad_norm": 0.7932528853416443, + "learning_rate": 0.0008005501609251136, + "loss": 1.6134, + "step": 4586 + }, + { + "epoch": 0.4838607594936709, + "grad_norm": 0.7243734002113342, + "learning_rate": 0.0008002996693867615, + "loss": 1.6243, + "step": 4587 + }, + { + "epoch": 0.4839662447257384, + "grad_norm": 0.9236436486244202, + "learning_rate": 0.0008000491722120806, + "loss": 1.5751, + "step": 4588 + }, + { + "epoch": 0.4840717299578059, + "grad_norm": 0.7794052362442017, + "learning_rate": 0.0007997986694291404, + "loss": 1.6078, + "step": 4589 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.8407885432243347, + "learning_rate": 0.0007995481610660108, + "loss": 1.6047, + "step": 4590 + }, + { + "epoch": 0.48428270042194094, + "grad_norm": 0.7386727929115295, + "learning_rate": 0.0007992976471507628, + "loss": 1.612, + "step": 4591 + }, + { + "epoch": 0.48438818565400843, + "grad_norm": 0.7181751728057861, + "learning_rate": 0.0007990471277114676, + "loss": 1.6017, + "step": 4592 + }, + { + "epoch": 0.4844936708860759, + "grad_norm": 0.5833379030227661, + "learning_rate": 0.0007987966027761972, + "loss": 1.6108, + "step": 4593 + }, + { + "epoch": 0.48459915611814347, + "grad_norm": 0.6684516072273254, + "learning_rate": 0.0007985460723730242, + "loss": 1.5742, + "step": 4594 + }, + { + "epoch": 0.48470464135021096, + "grad_norm": 0.6640851497650146, + "learning_rate": 0.0007982955365300214, + "loss": 1.6432, + "step": 4595 + }, + { + "epoch": 0.48481012658227846, + "grad_norm": 0.743274986743927, + "learning_rate": 0.0007980449952752633, + "loss": 1.6387, + "step": 4596 + }, + { + "epoch": 0.484915611814346, + "grad_norm": 0.5561324954032898, + "learning_rate": 0.0007977944486368237, + "loss": 1.597, + "step": 4597 + }, + { + "epoch": 0.4850210970464135, + "grad_norm": 0.6927162408828735, + "learning_rate": 0.0007975438966427778, + "loss": 1.6267, + "step": 4598 + }, + { + "epoch": 0.485126582278481, + "grad_norm": 0.5881175398826599, + "learning_rate": 0.0007972933393212012, + "loss": 1.6619, + "step": 4599 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.6828265190124512, + "learning_rate": 0.0007970427767001702, + "loss": 1.6389, + "step": 4600 + }, + { + "epoch": 0.48533755274261603, + "grad_norm": 0.7273560762405396, + "learning_rate": 0.0007967922088077615, + "loss": 1.6603, + "step": 4601 + }, + { + "epoch": 0.4854430379746835, + "grad_norm": 0.5872033834457397, + "learning_rate": 0.0007965416356720524, + "loss": 1.6226, + "step": 4602 + }, + { + "epoch": 0.48554852320675107, + "grad_norm": 0.7831789255142212, + "learning_rate": 0.000796291057321121, + "loss": 1.5742, + "step": 4603 + }, + { + "epoch": 0.48565400843881856, + "grad_norm": 0.628600001335144, + "learning_rate": 0.0007960404737830457, + "loss": 1.5989, + "step": 4604 + }, + { + "epoch": 0.48575949367088606, + "grad_norm": 0.8055114150047302, + "learning_rate": 0.0007957898850859058, + "loss": 1.6515, + "step": 4605 + }, + { + "epoch": 0.4858649789029536, + "grad_norm": 0.7344996333122253, + "learning_rate": 0.000795539291257781, + "loss": 1.6381, + "step": 4606 + }, + { + "epoch": 0.4859704641350211, + "grad_norm": 0.7140079736709595, + "learning_rate": 0.0007952886923267516, + "loss": 1.6221, + "step": 4607 + }, + { + "epoch": 0.4860759493670886, + "grad_norm": 0.6754825115203857, + "learning_rate": 0.0007950380883208981, + "loss": 1.6177, + "step": 4608 + }, + { + "epoch": 0.48618143459915614, + "grad_norm": 0.9616337418556213, + "learning_rate": 0.0007947874792683025, + "loss": 1.6315, + "step": 4609 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.9512064456939697, + "learning_rate": 0.0007945368651970464, + "loss": 1.6515, + "step": 4610 + }, + { + "epoch": 0.4863924050632911, + "grad_norm": 0.8722438812255859, + "learning_rate": 0.0007942862461352125, + "loss": 1.5775, + "step": 4611 + }, + { + "epoch": 0.48649789029535867, + "grad_norm": 0.8959334492683411, + "learning_rate": 0.0007940356221108837, + "loss": 1.6181, + "step": 4612 + }, + { + "epoch": 0.48660337552742616, + "grad_norm": 0.8850497007369995, + "learning_rate": 0.0007937849931521441, + "loss": 1.5836, + "step": 4613 + }, + { + "epoch": 0.48670886075949366, + "grad_norm": 0.9668505787849426, + "learning_rate": 0.0007935343592870778, + "loss": 1.5936, + "step": 4614 + }, + { + "epoch": 0.4868143459915612, + "grad_norm": 0.8656611442565918, + "learning_rate": 0.0007932837205437692, + "loss": 1.6539, + "step": 4615 + }, + { + "epoch": 0.4869198312236287, + "grad_norm": 0.8380774855613708, + "learning_rate": 0.000793033076950304, + "loss": 1.6258, + "step": 4616 + }, + { + "epoch": 0.4870253164556962, + "grad_norm": 0.808237612247467, + "learning_rate": 0.0007927824285347678, + "loss": 1.6385, + "step": 4617 + }, + { + "epoch": 0.48713080168776374, + "grad_norm": 0.7753523588180542, + "learning_rate": 0.0007925317753252473, + "loss": 1.5684, + "step": 4618 + }, + { + "epoch": 0.48723628691983123, + "grad_norm": 0.9106887578964233, + "learning_rate": 0.0007922811173498293, + "loss": 1.5522, + "step": 4619 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.9213322401046753, + "learning_rate": 0.0007920304546366013, + "loss": 1.6163, + "step": 4620 + }, + { + "epoch": 0.48744725738396627, + "grad_norm": 0.6828353404998779, + "learning_rate": 0.0007917797872136511, + "loss": 1.6357, + "step": 4621 + }, + { + "epoch": 0.48755274261603376, + "grad_norm": 0.8988171815872192, + "learning_rate": 0.0007915291151090676, + "loss": 1.5899, + "step": 4622 + }, + { + "epoch": 0.48765822784810126, + "grad_norm": 0.6567809581756592, + "learning_rate": 0.0007912784383509396, + "loss": 1.5966, + "step": 4623 + }, + { + "epoch": 0.4877637130801688, + "grad_norm": 0.9421156048774719, + "learning_rate": 0.0007910277569673568, + "loss": 1.618, + "step": 4624 + }, + { + "epoch": 0.4878691983122363, + "grad_norm": 0.6893950700759888, + "learning_rate": 0.000790777070986409, + "loss": 1.6145, + "step": 4625 + }, + { + "epoch": 0.4879746835443038, + "grad_norm": 0.8615567088127136, + "learning_rate": 0.0007905263804361873, + "loss": 1.6504, + "step": 4626 + }, + { + "epoch": 0.4880801687763713, + "grad_norm": 0.7766703367233276, + "learning_rate": 0.0007902756853447824, + "loss": 1.6515, + "step": 4627 + }, + { + "epoch": 0.48818565400843883, + "grad_norm": 0.6037343740463257, + "learning_rate": 0.0007900249857402863, + "loss": 1.6054, + "step": 4628 + }, + { + "epoch": 0.4882911392405063, + "grad_norm": 0.714465856552124, + "learning_rate": 0.000789774281650791, + "loss": 1.5991, + "step": 4629 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6142235994338989, + "learning_rate": 0.000789523573104389, + "loss": 1.6502, + "step": 4630 + }, + { + "epoch": 0.48850210970464136, + "grad_norm": 0.6594054698944092, + "learning_rate": 0.0007892728601291737, + "loss": 1.6217, + "step": 4631 + }, + { + "epoch": 0.48860759493670886, + "grad_norm": 0.6096288561820984, + "learning_rate": 0.0007890221427532384, + "loss": 1.6116, + "step": 4632 + }, + { + "epoch": 0.48871308016877635, + "grad_norm": 0.6227425336837769, + "learning_rate": 0.0007887714210046775, + "loss": 1.6183, + "step": 4633 + }, + { + "epoch": 0.4888185654008439, + "grad_norm": 0.6513197422027588, + "learning_rate": 0.0007885206949115855, + "loss": 1.608, + "step": 4634 + }, + { + "epoch": 0.4889240506329114, + "grad_norm": 0.6731924414634705, + "learning_rate": 0.0007882699645020577, + "loss": 1.6011, + "step": 4635 + }, + { + "epoch": 0.4890295358649789, + "grad_norm": 0.5597129464149475, + "learning_rate": 0.0007880192298041893, + "loss": 1.624, + "step": 4636 + }, + { + "epoch": 0.48913502109704643, + "grad_norm": 0.5798847079277039, + "learning_rate": 0.0007877684908460768, + "loss": 1.615, + "step": 4637 + }, + { + "epoch": 0.4892405063291139, + "grad_norm": 0.5553725957870483, + "learning_rate": 0.0007875177476558165, + "loss": 1.6288, + "step": 4638 + }, + { + "epoch": 0.4893459915611814, + "grad_norm": 0.5565118193626404, + "learning_rate": 0.0007872670002615056, + "loss": 1.6297, + "step": 4639 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.5716167688369751, + "learning_rate": 0.0007870162486912414, + "loss": 1.6387, + "step": 4640 + }, + { + "epoch": 0.48955696202531646, + "grad_norm": 0.5573167204856873, + "learning_rate": 0.0007867654929731221, + "loss": 1.6347, + "step": 4641 + }, + { + "epoch": 0.48966244725738395, + "grad_norm": 0.5634401440620422, + "learning_rate": 0.0007865147331352457, + "loss": 1.5915, + "step": 4642 + }, + { + "epoch": 0.4897679324894515, + "grad_norm": 0.5523548722267151, + "learning_rate": 0.0007862639692057115, + "loss": 1.6191, + "step": 4643 + }, + { + "epoch": 0.489873417721519, + "grad_norm": 0.613739013671875, + "learning_rate": 0.0007860132012126187, + "loss": 1.6328, + "step": 4644 + }, + { + "epoch": 0.4899789029535865, + "grad_norm": 0.5722295045852661, + "learning_rate": 0.0007857624291840672, + "loss": 1.6498, + "step": 4645 + }, + { + "epoch": 0.49008438818565403, + "grad_norm": 0.7270601391792297, + "learning_rate": 0.0007855116531481572, + "loss": 1.6125, + "step": 4646 + }, + { + "epoch": 0.4901898734177215, + "grad_norm": 0.5945188403129578, + "learning_rate": 0.0007852608731329893, + "loss": 1.6455, + "step": 4647 + }, + { + "epoch": 0.490295358649789, + "grad_norm": 0.6007760763168335, + "learning_rate": 0.0007850100891666648, + "loss": 1.6128, + "step": 4648 + }, + { + "epoch": 0.49040084388185656, + "grad_norm": 0.6130055785179138, + "learning_rate": 0.0007847593012772852, + "loss": 1.5971, + "step": 4649 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.5972514748573303, + "learning_rate": 0.0007845085094929527, + "loss": 1.6261, + "step": 4650 + }, + { + "epoch": 0.49061181434599155, + "grad_norm": 0.589286208152771, + "learning_rate": 0.0007842577138417695, + "loss": 1.6231, + "step": 4651 + }, + { + "epoch": 0.4907172995780591, + "grad_norm": 0.6221838593482971, + "learning_rate": 0.0007840069143518386, + "loss": 1.6156, + "step": 4652 + }, + { + "epoch": 0.4908227848101266, + "grad_norm": 0.5785776376724243, + "learning_rate": 0.0007837561110512635, + "loss": 1.5928, + "step": 4653 + }, + { + "epoch": 0.4909282700421941, + "grad_norm": 0.6252830624580383, + "learning_rate": 0.0007835053039681476, + "loss": 1.6366, + "step": 4654 + }, + { + "epoch": 0.49103375527426163, + "grad_norm": 0.558775782585144, + "learning_rate": 0.0007832544931305956, + "loss": 1.5927, + "step": 4655 + }, + { + "epoch": 0.4911392405063291, + "grad_norm": 0.5425917506217957, + "learning_rate": 0.0007830036785667116, + "loss": 1.598, + "step": 4656 + }, + { + "epoch": 0.4912447257383966, + "grad_norm": 0.5822109580039978, + "learning_rate": 0.000782752860304601, + "loss": 1.6025, + "step": 4657 + }, + { + "epoch": 0.4913502109704641, + "grad_norm": 0.5884714126586914, + "learning_rate": 0.0007825020383723692, + "loss": 1.6213, + "step": 4658 + }, + { + "epoch": 0.49145569620253166, + "grad_norm": 0.5522034168243408, + "learning_rate": 0.0007822512127981218, + "loss": 1.6013, + "step": 4659 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.6265906095504761, + "learning_rate": 0.0007820003836099649, + "loss": 1.6195, + "step": 4660 + }, + { + "epoch": 0.49166666666666664, + "grad_norm": 0.6946244835853577, + "learning_rate": 0.0007817495508360057, + "loss": 1.6262, + "step": 4661 + }, + { + "epoch": 0.4917721518987342, + "grad_norm": 0.6261293888092041, + "learning_rate": 0.0007814987145043511, + "loss": 1.5896, + "step": 4662 + }, + { + "epoch": 0.4918776371308017, + "grad_norm": 0.6960366368293762, + "learning_rate": 0.0007812478746431085, + "loss": 1.5994, + "step": 4663 + }, + { + "epoch": 0.4919831223628692, + "grad_norm": 0.6284975409507751, + "learning_rate": 0.0007809970312803855, + "loss": 1.6069, + "step": 4664 + }, + { + "epoch": 0.4920886075949367, + "grad_norm": 0.5361141562461853, + "learning_rate": 0.0007807461844442906, + "loss": 1.6174, + "step": 4665 + }, + { + "epoch": 0.4921940928270042, + "grad_norm": 0.6379793882369995, + "learning_rate": 0.0007804953341629326, + "loss": 1.6081, + "step": 4666 + }, + { + "epoch": 0.4922995780590717, + "grad_norm": 0.7198461294174194, + "learning_rate": 0.0007802444804644202, + "loss": 1.6311, + "step": 4667 + }, + { + "epoch": 0.49240506329113926, + "grad_norm": 0.5961447358131409, + "learning_rate": 0.0007799936233768632, + "loss": 1.615, + "step": 4668 + }, + { + "epoch": 0.49251054852320675, + "grad_norm": 0.6274780631065369, + "learning_rate": 0.0007797427629283708, + "loss": 1.6257, + "step": 4669 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.6482848525047302, + "learning_rate": 0.0007794918991470537, + "loss": 1.5976, + "step": 4670 + }, + { + "epoch": 0.4927215189873418, + "grad_norm": 0.746627926826477, + "learning_rate": 0.0007792410320610222, + "loss": 1.6155, + "step": 4671 + }, + { + "epoch": 0.4928270042194093, + "grad_norm": 0.6981410384178162, + "learning_rate": 0.0007789901616983872, + "loss": 1.5834, + "step": 4672 + }, + { + "epoch": 0.4929324894514768, + "grad_norm": 0.6557857394218445, + "learning_rate": 0.0007787392880872601, + "loss": 1.6293, + "step": 4673 + }, + { + "epoch": 0.4930379746835443, + "grad_norm": 0.6798202395439148, + "learning_rate": 0.0007784884112557524, + "loss": 1.6459, + "step": 4674 + }, + { + "epoch": 0.4931434599156118, + "grad_norm": 0.5824443697929382, + "learning_rate": 0.0007782375312319761, + "loss": 1.593, + "step": 4675 + }, + { + "epoch": 0.4932489451476793, + "grad_norm": 0.6686611175537109, + "learning_rate": 0.0007779866480440437, + "loss": 1.582, + "step": 4676 + }, + { + "epoch": 0.49335443037974686, + "grad_norm": 0.5791701078414917, + "learning_rate": 0.0007777357617200679, + "loss": 1.6048, + "step": 4677 + }, + { + "epoch": 0.49345991561181435, + "grad_norm": 0.7362942695617676, + "learning_rate": 0.0007774848722881616, + "loss": 1.5924, + "step": 4678 + }, + { + "epoch": 0.49356540084388184, + "grad_norm": 0.5878885984420776, + "learning_rate": 0.0007772339797764385, + "loss": 1.6363, + "step": 4679 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.6824476718902588, + "learning_rate": 0.0007769830842130119, + "loss": 1.5835, + "step": 4680 + }, + { + "epoch": 0.4937763713080169, + "grad_norm": 0.5645202994346619, + "learning_rate": 0.0007767321856259963, + "loss": 1.5994, + "step": 4681 + }, + { + "epoch": 0.4938818565400844, + "grad_norm": 0.7133975625038147, + "learning_rate": 0.0007764812840435058, + "loss": 1.58, + "step": 4682 + }, + { + "epoch": 0.4939873417721519, + "grad_norm": 0.6171513795852661, + "learning_rate": 0.0007762303794936556, + "loss": 1.6109, + "step": 4683 + }, + { + "epoch": 0.4940928270042194, + "grad_norm": 0.6527866125106812, + "learning_rate": 0.0007759794720045606, + "loss": 1.573, + "step": 4684 + }, + { + "epoch": 0.4941983122362869, + "grad_norm": 0.6352267265319824, + "learning_rate": 0.0007757285616043363, + "loss": 1.5724, + "step": 4685 + }, + { + "epoch": 0.49430379746835446, + "grad_norm": 0.7007918953895569, + "learning_rate": 0.0007754776483210981, + "loss": 1.6186, + "step": 4686 + }, + { + "epoch": 0.49440928270042195, + "grad_norm": 0.7538022398948669, + "learning_rate": 0.0007752267321829624, + "loss": 1.645, + "step": 4687 + }, + { + "epoch": 0.49451476793248944, + "grad_norm": 0.7522662878036499, + "learning_rate": 0.0007749758132180459, + "loss": 1.5976, + "step": 4688 + }, + { + "epoch": 0.494620253164557, + "grad_norm": 0.6735943555831909, + "learning_rate": 0.0007747248914544646, + "loss": 1.5927, + "step": 4689 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.8003470301628113, + "learning_rate": 0.0007744739669203361, + "loss": 1.6559, + "step": 4690 + }, + { + "epoch": 0.494831223628692, + "grad_norm": 0.563116729259491, + "learning_rate": 0.0007742230396437775, + "loss": 1.6402, + "step": 4691 + }, + { + "epoch": 0.49493670886075947, + "grad_norm": 0.7431126236915588, + "learning_rate": 0.0007739721096529066, + "loss": 1.6442, + "step": 4692 + }, + { + "epoch": 0.495042194092827, + "grad_norm": 0.6507112979888916, + "learning_rate": 0.0007737211769758412, + "loss": 1.6042, + "step": 4693 + }, + { + "epoch": 0.4951476793248945, + "grad_norm": 0.7525187134742737, + "learning_rate": 0.0007734702416406997, + "loss": 1.6288, + "step": 4694 + }, + { + "epoch": 0.495253164556962, + "grad_norm": 0.6978188753128052, + "learning_rate": 0.0007732193036756006, + "loss": 1.5946, + "step": 4695 + }, + { + "epoch": 0.49535864978902955, + "grad_norm": 0.6408021450042725, + "learning_rate": 0.0007729683631086627, + "loss": 1.6322, + "step": 4696 + }, + { + "epoch": 0.49546413502109704, + "grad_norm": 0.6143543124198914, + "learning_rate": 0.0007727174199680051, + "loss": 1.597, + "step": 4697 + }, + { + "epoch": 0.49556962025316453, + "grad_norm": 0.652851402759552, + "learning_rate": 0.0007724664742817475, + "loss": 1.5828, + "step": 4698 + }, + { + "epoch": 0.4956751054852321, + "grad_norm": 0.5926876664161682, + "learning_rate": 0.0007722155260780093, + "loss": 1.6186, + "step": 4699 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.6167706251144409, + "learning_rate": 0.0007719645753849108, + "loss": 1.5884, + "step": 4700 + }, + { + "epoch": 0.49588607594936707, + "grad_norm": 0.7121110558509827, + "learning_rate": 0.0007717136222305718, + "loss": 1.6173, + "step": 4701 + }, + { + "epoch": 0.4959915611814346, + "grad_norm": 0.5903317332267761, + "learning_rate": 0.0007714626666431134, + "loss": 1.6303, + "step": 4702 + }, + { + "epoch": 0.4960970464135021, + "grad_norm": 0.5788334608078003, + "learning_rate": 0.000771211708650656, + "loss": 1.583, + "step": 4703 + }, + { + "epoch": 0.4962025316455696, + "grad_norm": 0.5932456254959106, + "learning_rate": 0.000770960748281321, + "loss": 1.6177, + "step": 4704 + }, + { + "epoch": 0.49630801687763715, + "grad_norm": 0.596782922744751, + "learning_rate": 0.0007707097855632297, + "loss": 1.5954, + "step": 4705 + }, + { + "epoch": 0.49641350210970464, + "grad_norm": 0.5846361517906189, + "learning_rate": 0.0007704588205245034, + "loss": 1.6047, + "step": 4706 + }, + { + "epoch": 0.49651898734177213, + "grad_norm": 0.6208446025848389, + "learning_rate": 0.0007702078531932645, + "loss": 1.661, + "step": 4707 + }, + { + "epoch": 0.4966244725738397, + "grad_norm": 0.5868269205093384, + "learning_rate": 0.0007699568835976348, + "loss": 1.6265, + "step": 4708 + }, + { + "epoch": 0.4967299578059072, + "grad_norm": 0.6908110976219177, + "learning_rate": 0.0007697059117657368, + "loss": 1.6007, + "step": 4709 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.7353700995445251, + "learning_rate": 0.0007694549377256932, + "loss": 1.6175, + "step": 4710 + }, + { + "epoch": 0.4969409282700422, + "grad_norm": 0.6541075706481934, + "learning_rate": 0.0007692039615056264, + "loss": 1.6221, + "step": 4711 + }, + { + "epoch": 0.4970464135021097, + "grad_norm": 0.6589155197143555, + "learning_rate": 0.0007689529831336604, + "loss": 1.6027, + "step": 4712 + }, + { + "epoch": 0.4971518987341772, + "grad_norm": 0.6671639680862427, + "learning_rate": 0.0007687020026379181, + "loss": 1.5839, + "step": 4713 + }, + { + "epoch": 0.49725738396624475, + "grad_norm": 0.6046185493469238, + "learning_rate": 0.0007684510200465231, + "loss": 1.6193, + "step": 4714 + }, + { + "epoch": 0.49736286919831224, + "grad_norm": 0.6722612380981445, + "learning_rate": 0.0007682000353875992, + "loss": 1.6042, + "step": 4715 + }, + { + "epoch": 0.49746835443037973, + "grad_norm": 0.6371982097625732, + "learning_rate": 0.0007679490486892705, + "loss": 1.5924, + "step": 4716 + }, + { + "epoch": 0.4975738396624473, + "grad_norm": 0.6783912181854248, + "learning_rate": 0.0007676980599796616, + "loss": 1.602, + "step": 4717 + }, + { + "epoch": 0.4976793248945148, + "grad_norm": 0.6814916133880615, + "learning_rate": 0.0007674470692868967, + "loss": 1.6496, + "step": 4718 + }, + { + "epoch": 0.49778481012658227, + "grad_norm": 0.8515249490737915, + "learning_rate": 0.0007671960766391008, + "loss": 1.6165, + "step": 4719 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.5607902407646179, + "learning_rate": 0.0007669450820643987, + "loss": 1.5766, + "step": 4720 + }, + { + "epoch": 0.4979957805907173, + "grad_norm": 0.7488727569580078, + "learning_rate": 0.0007666940855909155, + "loss": 1.6038, + "step": 4721 + }, + { + "epoch": 0.4981012658227848, + "grad_norm": 0.63688725233078, + "learning_rate": 0.000766443087246777, + "loss": 1.6212, + "step": 4722 + }, + { + "epoch": 0.49820675105485235, + "grad_norm": 0.7932765483856201, + "learning_rate": 0.0007661920870601085, + "loss": 1.6133, + "step": 4723 + }, + { + "epoch": 0.49831223628691984, + "grad_norm": 0.6934543251991272, + "learning_rate": 0.000765941085059036, + "loss": 1.6211, + "step": 4724 + }, + { + "epoch": 0.49841772151898733, + "grad_norm": 0.700019896030426, + "learning_rate": 0.0007656900812716853, + "loss": 1.6051, + "step": 4725 + }, + { + "epoch": 0.4985232067510548, + "grad_norm": 0.7457073330879211, + "learning_rate": 0.0007654390757261827, + "loss": 1.6, + "step": 4726 + }, + { + "epoch": 0.4986286919831224, + "grad_norm": 0.7469515204429626, + "learning_rate": 0.0007651880684506548, + "loss": 1.572, + "step": 4727 + }, + { + "epoch": 0.49873417721518987, + "grad_norm": 0.6860659122467041, + "learning_rate": 0.0007649370594732282, + "loss": 1.6378, + "step": 4728 + }, + { + "epoch": 0.49883966244725736, + "grad_norm": 0.6419793963432312, + "learning_rate": 0.0007646860488220293, + "loss": 1.5877, + "step": 4729 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.6116735339164734, + "learning_rate": 0.0007644350365251855, + "loss": 1.5882, + "step": 4730 + }, + { + "epoch": 0.4990506329113924, + "grad_norm": 0.5878215432167053, + "learning_rate": 0.0007641840226108241, + "loss": 1.6268, + "step": 4731 + }, + { + "epoch": 0.4991561181434599, + "grad_norm": 0.6883168816566467, + "learning_rate": 0.000763933007107072, + "loss": 1.5924, + "step": 4732 + }, + { + "epoch": 0.49926160337552744, + "grad_norm": 0.587074339389801, + "learning_rate": 0.0007636819900420572, + "loss": 1.6302, + "step": 4733 + }, + { + "epoch": 0.49936708860759493, + "grad_norm": 0.6266050934791565, + "learning_rate": 0.0007634309714439069, + "loss": 1.6092, + "step": 4734 + }, + { + "epoch": 0.4994725738396624, + "grad_norm": 0.6531434059143066, + "learning_rate": 0.0007631799513407495, + "loss": 1.6336, + "step": 4735 + }, + { + "epoch": 0.49957805907173, + "grad_norm": 0.6452326774597168, + "learning_rate": 0.0007629289297607127, + "loss": 1.6098, + "step": 4736 + }, + { + "epoch": 0.49968354430379747, + "grad_norm": 0.5680766105651855, + "learning_rate": 0.0007626779067319251, + "loss": 1.5828, + "step": 4737 + }, + { + "epoch": 0.49978902953586496, + "grad_norm": 0.655790388584137, + "learning_rate": 0.0007624268822825145, + "loss": 1.5644, + "step": 4738 + }, + { + "epoch": 0.4998945147679325, + "grad_norm": 0.5572847127914429, + "learning_rate": 0.00076217585644061, + "loss": 1.6027, + "step": 4739 + }, + { + "epoch": 0.5, + "grad_norm": 0.7884122729301453, + "learning_rate": 0.0007619248292343399, + "loss": 1.6119, + "step": 4740 + }, + { + "epoch": 0.5001054852320675, + "grad_norm": 0.6332219243049622, + "learning_rate": 0.0007616738006918334, + "loss": 1.6366, + "step": 4741 + }, + { + "epoch": 0.500210970464135, + "grad_norm": 0.7958171367645264, + "learning_rate": 0.0007614227708412191, + "loss": 1.5989, + "step": 4742 + }, + { + "epoch": 0.5003164556962025, + "grad_norm": 0.5741320848464966, + "learning_rate": 0.0007611717397106265, + "loss": 1.5963, + "step": 4743 + }, + { + "epoch": 0.5004219409282701, + "grad_norm": 0.6193554997444153, + "learning_rate": 0.0007609207073281848, + "loss": 1.6519, + "step": 4744 + }, + { + "epoch": 0.5005274261603375, + "grad_norm": 0.6987004280090332, + "learning_rate": 0.0007606696737220233, + "loss": 1.598, + "step": 4745 + }, + { + "epoch": 0.5006329113924051, + "grad_norm": 0.6141858696937561, + "learning_rate": 0.000760418638920272, + "loss": 1.6297, + "step": 4746 + }, + { + "epoch": 0.5007383966244726, + "grad_norm": 0.5820015668869019, + "learning_rate": 0.0007601676029510597, + "loss": 1.6257, + "step": 4747 + }, + { + "epoch": 0.50084388185654, + "grad_norm": 0.5732802748680115, + "learning_rate": 0.000759916565842517, + "loss": 1.6454, + "step": 4748 + }, + { + "epoch": 0.5009493670886076, + "grad_norm": 0.6602269411087036, + "learning_rate": 0.0007596655276227739, + "loss": 1.6352, + "step": 4749 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.685021698474884, + "learning_rate": 0.0007594144883199599, + "loss": 1.6196, + "step": 4750 + }, + { + "epoch": 0.5011603375527426, + "grad_norm": 0.7076736092567444, + "learning_rate": 0.0007591634479622056, + "loss": 1.6273, + "step": 4751 + }, + { + "epoch": 0.5012658227848101, + "grad_norm": 0.7420931458473206, + "learning_rate": 0.0007589124065776414, + "loss": 1.6138, + "step": 4752 + }, + { + "epoch": 0.5013713080168777, + "grad_norm": 0.6869295835494995, + "learning_rate": 0.0007586613641943976, + "loss": 1.6324, + "step": 4753 + }, + { + "epoch": 0.5014767932489451, + "grad_norm": 0.5671600103378296, + "learning_rate": 0.0007584103208406048, + "loss": 1.6081, + "step": 4754 + }, + { + "epoch": 0.5015822784810127, + "grad_norm": 0.594403862953186, + "learning_rate": 0.0007581592765443933, + "loss": 1.5905, + "step": 4755 + }, + { + "epoch": 0.5016877637130802, + "grad_norm": 0.5912253856658936, + "learning_rate": 0.0007579082313338943, + "loss": 1.5718, + "step": 4756 + }, + { + "epoch": 0.5017932489451477, + "grad_norm": 0.5540190935134888, + "learning_rate": 0.0007576571852372386, + "loss": 1.6159, + "step": 4757 + }, + { + "epoch": 0.5018987341772152, + "grad_norm": 0.5854849219322205, + "learning_rate": 0.0007574061382825572, + "loss": 1.5916, + "step": 4758 + }, + { + "epoch": 0.5020042194092827, + "grad_norm": 0.633554220199585, + "learning_rate": 0.0007571550904979812, + "loss": 1.5918, + "step": 4759 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.573394238948822, + "learning_rate": 0.0007569040419116413, + "loss": 1.6202, + "step": 4760 + }, + { + "epoch": 0.5022151898734177, + "grad_norm": 0.6697221994400024, + "learning_rate": 0.0007566529925516692, + "loss": 1.6121, + "step": 4761 + }, + { + "epoch": 0.5023206751054853, + "grad_norm": 0.7573768496513367, + "learning_rate": 0.0007564019424461962, + "loss": 1.628, + "step": 4762 + }, + { + "epoch": 0.5024261603375527, + "grad_norm": 0.6626774668693542, + "learning_rate": 0.0007561508916233535, + "loss": 1.5957, + "step": 4763 + }, + { + "epoch": 0.5025316455696203, + "grad_norm": 0.8690049648284912, + "learning_rate": 0.0007558998401112727, + "loss": 1.6066, + "step": 4764 + }, + { + "epoch": 0.5026371308016878, + "grad_norm": 0.5939679741859436, + "learning_rate": 0.0007556487879380856, + "loss": 1.5882, + "step": 4765 + }, + { + "epoch": 0.5027426160337553, + "grad_norm": 0.8728967308998108, + "learning_rate": 0.0007553977351319235, + "loss": 1.5777, + "step": 4766 + }, + { + "epoch": 0.5028481012658228, + "grad_norm": 0.5750523209571838, + "learning_rate": 0.0007551466817209183, + "loss": 1.6154, + "step": 4767 + }, + { + "epoch": 0.5029535864978903, + "grad_norm": 0.9364904761314392, + "learning_rate": 0.0007548956277332016, + "loss": 1.5954, + "step": 4768 + }, + { + "epoch": 0.5030590717299578, + "grad_norm": 0.5908493399620056, + "learning_rate": 0.0007546445731969056, + "loss": 1.6327, + "step": 4769 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.8202195763587952, + "learning_rate": 0.000754393518140162, + "loss": 1.6137, + "step": 4770 + }, + { + "epoch": 0.5032700421940929, + "grad_norm": 0.5993514060974121, + "learning_rate": 0.0007541424625911026, + "loss": 1.5977, + "step": 4771 + }, + { + "epoch": 0.5033755274261603, + "grad_norm": 0.6923753619194031, + "learning_rate": 0.0007538914065778598, + "loss": 1.6755, + "step": 4772 + }, + { + "epoch": 0.5034810126582279, + "grad_norm": 0.6074042320251465, + "learning_rate": 0.0007536403501285653, + "loss": 1.6169, + "step": 4773 + }, + { + "epoch": 0.5035864978902953, + "grad_norm": 0.7197924852371216, + "learning_rate": 0.0007533892932713517, + "loss": 1.5899, + "step": 4774 + }, + { + "epoch": 0.5036919831223629, + "grad_norm": 0.6040269136428833, + "learning_rate": 0.0007531382360343507, + "loss": 1.6406, + "step": 4775 + }, + { + "epoch": 0.5037974683544304, + "grad_norm": 0.7664696574211121, + "learning_rate": 0.0007528871784456948, + "loss": 1.6301, + "step": 4776 + }, + { + "epoch": 0.5039029535864978, + "grad_norm": 0.683036208152771, + "learning_rate": 0.0007526361205335159, + "loss": 1.5913, + "step": 4777 + }, + { + "epoch": 0.5040084388185654, + "grad_norm": 0.8353697061538696, + "learning_rate": 0.0007523850623259469, + "loss": 1.6374, + "step": 4778 + }, + { + "epoch": 0.5041139240506329, + "grad_norm": 0.5848055481910706, + "learning_rate": 0.0007521340038511196, + "loss": 1.5956, + "step": 4779 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.8039825558662415, + "learning_rate": 0.0007518829451371665, + "loss": 1.595, + "step": 4780 + }, + { + "epoch": 0.5043248945147679, + "grad_norm": 0.6105281710624695, + "learning_rate": 0.0007516318862122199, + "loss": 1.5779, + "step": 4781 + }, + { + "epoch": 0.5044303797468355, + "grad_norm": 0.838067889213562, + "learning_rate": 0.0007513808271044125, + "loss": 1.6411, + "step": 4782 + }, + { + "epoch": 0.5045358649789029, + "grad_norm": 0.6122531890869141, + "learning_rate": 0.0007511297678418766, + "loss": 1.613, + "step": 4783 + }, + { + "epoch": 0.5046413502109705, + "grad_norm": 0.7645689845085144, + "learning_rate": 0.0007508787084527445, + "loss": 1.5907, + "step": 4784 + }, + { + "epoch": 0.504746835443038, + "grad_norm": 0.8620412349700928, + "learning_rate": 0.0007506276489651489, + "loss": 1.6181, + "step": 4785 + }, + { + "epoch": 0.5048523206751054, + "grad_norm": 0.6545829772949219, + "learning_rate": 0.0007503765894072217, + "loss": 1.5908, + "step": 4786 + }, + { + "epoch": 0.504957805907173, + "grad_norm": 0.7428253889083862, + "learning_rate": 0.000750125529807096, + "loss": 1.5963, + "step": 4787 + }, + { + "epoch": 0.5050632911392405, + "grad_norm": 0.639258623123169, + "learning_rate": 0.0007498744701929041, + "loss": 1.5994, + "step": 4788 + }, + { + "epoch": 0.505168776371308, + "grad_norm": 0.8144617676734924, + "learning_rate": 0.0007496234105927785, + "loss": 1.553, + "step": 4789 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.6959471106529236, + "learning_rate": 0.0007493723510348516, + "loss": 1.6446, + "step": 4790 + }, + { + "epoch": 0.5053797468354431, + "grad_norm": 0.7186495065689087, + "learning_rate": 0.0007491212915472557, + "loss": 1.634, + "step": 4791 + }, + { + "epoch": 0.5054852320675105, + "grad_norm": 0.712540328502655, + "learning_rate": 0.0007488702321581234, + "loss": 1.5611, + "step": 4792 + }, + { + "epoch": 0.505590717299578, + "grad_norm": 0.6953135132789612, + "learning_rate": 0.0007486191728955873, + "loss": 1.6329, + "step": 4793 + }, + { + "epoch": 0.5056962025316456, + "grad_norm": 0.702216386795044, + "learning_rate": 0.00074836811378778, + "loss": 1.6804, + "step": 4794 + }, + { + "epoch": 0.505801687763713, + "grad_norm": 0.7752143740653992, + "learning_rate": 0.0007481170548628335, + "loss": 1.5983, + "step": 4795 + }, + { + "epoch": 0.5059071729957806, + "grad_norm": 0.631641149520874, + "learning_rate": 0.0007478659961488805, + "loss": 1.5926, + "step": 4796 + }, + { + "epoch": 0.5060126582278481, + "grad_norm": 0.6651142835617065, + "learning_rate": 0.0007476149376740533, + "loss": 1.5967, + "step": 4797 + }, + { + "epoch": 0.5061181434599156, + "grad_norm": 0.577934980392456, + "learning_rate": 0.0007473638794664841, + "loss": 1.5563, + "step": 4798 + }, + { + "epoch": 0.5062236286919831, + "grad_norm": 0.7315315008163452, + "learning_rate": 0.0007471128215543056, + "loss": 1.6054, + "step": 4799 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.5789390206336975, + "learning_rate": 0.0007468617639656496, + "loss": 1.5834, + "step": 4800 + }, + { + "epoch": 0.5064345991561181, + "grad_norm": 0.6303738951683044, + "learning_rate": 0.0007466107067286483, + "loss": 1.5918, + "step": 4801 + }, + { + "epoch": 0.5065400843881857, + "grad_norm": 0.6128150820732117, + "learning_rate": 0.0007463596498714346, + "loss": 1.6144, + "step": 4802 + }, + { + "epoch": 0.5066455696202532, + "grad_norm": 0.6580193638801575, + "learning_rate": 0.0007461085934221402, + "loss": 1.6084, + "step": 4803 + }, + { + "epoch": 0.5067510548523206, + "grad_norm": 0.5692727565765381, + "learning_rate": 0.0007458575374088974, + "loss": 1.5882, + "step": 4804 + }, + { + "epoch": 0.5068565400843882, + "grad_norm": 0.6402649879455566, + "learning_rate": 0.0007456064818598382, + "loss": 1.5959, + "step": 4805 + }, + { + "epoch": 0.5069620253164557, + "grad_norm": 0.6238978505134583, + "learning_rate": 0.0007453554268030946, + "loss": 1.6037, + "step": 4806 + }, + { + "epoch": 0.5070675105485232, + "grad_norm": 0.6469060778617859, + "learning_rate": 0.0007451043722667985, + "loss": 1.6399, + "step": 4807 + }, + { + "epoch": 0.5071729957805907, + "grad_norm": 0.6209894418716431, + "learning_rate": 0.000744853318279082, + "loss": 1.6119, + "step": 4808 + }, + { + "epoch": 0.5072784810126583, + "grad_norm": 0.575164794921875, + "learning_rate": 0.0007446022648680768, + "loss": 1.6131, + "step": 4809 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.7144646644592285, + "learning_rate": 0.0007443512120619144, + "loss": 1.6054, + "step": 4810 + }, + { + "epoch": 0.5074894514767933, + "grad_norm": 0.5778596997261047, + "learning_rate": 0.0007441001598887273, + "loss": 1.6023, + "step": 4811 + }, + { + "epoch": 0.5075949367088608, + "grad_norm": 0.6415585875511169, + "learning_rate": 0.0007438491083766465, + "loss": 1.6027, + "step": 4812 + }, + { + "epoch": 0.5077004219409282, + "grad_norm": 0.628017246723175, + "learning_rate": 0.000743598057553804, + "loss": 1.6579, + "step": 4813 + }, + { + "epoch": 0.5078059071729958, + "grad_norm": 0.7011303901672363, + "learning_rate": 0.0007433470074483309, + "loss": 1.6365, + "step": 4814 + }, + { + "epoch": 0.5079113924050633, + "grad_norm": 0.5638206601142883, + "learning_rate": 0.0007430959580883589, + "loss": 1.6183, + "step": 4815 + }, + { + "epoch": 0.5080168776371308, + "grad_norm": 0.6145520210266113, + "learning_rate": 0.0007428449095020192, + "loss": 1.5982, + "step": 4816 + }, + { + "epoch": 0.5081223628691983, + "grad_norm": 0.6284211874008179, + "learning_rate": 0.000742593861717443, + "loss": 1.606, + "step": 4817 + }, + { + "epoch": 0.5082278481012659, + "grad_norm": 0.5253342390060425, + "learning_rate": 0.0007423428147627613, + "loss": 1.6614, + "step": 4818 + }, + { + "epoch": 0.5083333333333333, + "grad_norm": 0.5843905210494995, + "learning_rate": 0.0007420917686661055, + "loss": 1.646, + "step": 4819 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.5830698609352112, + "learning_rate": 0.0007418407234556067, + "loss": 1.6004, + "step": 4820 + }, + { + "epoch": 0.5085443037974684, + "grad_norm": 0.5591059923171997, + "learning_rate": 0.0007415896791593955, + "loss": 1.6329, + "step": 4821 + }, + { + "epoch": 0.5086497890295358, + "grad_norm": 0.5571542382240295, + "learning_rate": 0.0007413386358056025, + "loss": 1.5945, + "step": 4822 + }, + { + "epoch": 0.5087552742616034, + "grad_norm": 0.5948936343193054, + "learning_rate": 0.0007410875934223588, + "loss": 1.6198, + "step": 4823 + }, + { + "epoch": 0.5088607594936709, + "grad_norm": 0.6602652072906494, + "learning_rate": 0.0007408365520377945, + "loss": 1.6287, + "step": 4824 + }, + { + "epoch": 0.5089662447257384, + "grad_norm": 0.5631862282752991, + "learning_rate": 0.0007405855116800403, + "loss": 1.6076, + "step": 4825 + }, + { + "epoch": 0.5090717299578059, + "grad_norm": 0.7243731617927551, + "learning_rate": 0.0007403344723772265, + "loss": 1.5664, + "step": 4826 + }, + { + "epoch": 0.5091772151898735, + "grad_norm": 0.7071858644485474, + "learning_rate": 0.0007400834341574829, + "loss": 1.5925, + "step": 4827 + }, + { + "epoch": 0.5092827004219409, + "grad_norm": 0.6410729289054871, + "learning_rate": 0.0007398323970489402, + "loss": 1.6149, + "step": 4828 + }, + { + "epoch": 0.5093881856540085, + "grad_norm": 0.5702781081199646, + "learning_rate": 0.0007395813610797283, + "loss": 1.5928, + "step": 4829 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.7064352035522461, + "learning_rate": 0.0007393303262779767, + "loss": 1.6003, + "step": 4830 + }, + { + "epoch": 0.5095991561181434, + "grad_norm": 0.6809344291687012, + "learning_rate": 0.0007390792926718153, + "loss": 1.6126, + "step": 4831 + }, + { + "epoch": 0.509704641350211, + "grad_norm": 0.5680269002914429, + "learning_rate": 0.0007388282602893737, + "loss": 1.6171, + "step": 4832 + }, + { + "epoch": 0.5098101265822785, + "grad_norm": 0.5862136483192444, + "learning_rate": 0.000738577229158781, + "loss": 1.5632, + "step": 4833 + }, + { + "epoch": 0.509915611814346, + "grad_norm": 0.7006485462188721, + "learning_rate": 0.000738326199308167, + "loss": 1.5936, + "step": 4834 + }, + { + "epoch": 0.5100210970464135, + "grad_norm": 0.6137369871139526, + "learning_rate": 0.0007380751707656603, + "loss": 1.6066, + "step": 4835 + }, + { + "epoch": 0.5101265822784811, + "grad_norm": 0.5521350502967834, + "learning_rate": 0.0007378241435593901, + "loss": 1.5695, + "step": 4836 + }, + { + "epoch": 0.5102320675105485, + "grad_norm": 0.547035276889801, + "learning_rate": 0.0007375731177174855, + "loss": 1.5733, + "step": 4837 + }, + { + "epoch": 0.510337552742616, + "grad_norm": 0.5962918996810913, + "learning_rate": 0.0007373220932680751, + "loss": 1.6217, + "step": 4838 + }, + { + "epoch": 0.5104430379746835, + "grad_norm": 0.610626757144928, + "learning_rate": 0.0007370710702392873, + "loss": 1.5933, + "step": 4839 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.6641005277633667, + "learning_rate": 0.0007368200486592507, + "loss": 1.6393, + "step": 4840 + }, + { + "epoch": 0.5106540084388186, + "grad_norm": 0.6004360318183899, + "learning_rate": 0.0007365690285560932, + "loss": 1.5986, + "step": 4841 + }, + { + "epoch": 0.510759493670886, + "grad_norm": 0.6784504652023315, + "learning_rate": 0.0007363180099579431, + "loss": 1.6155, + "step": 4842 + }, + { + "epoch": 0.5108649789029536, + "grad_norm": 0.5973266363143921, + "learning_rate": 0.0007360669928929282, + "loss": 1.6082, + "step": 4843 + }, + { + "epoch": 0.5109704641350211, + "grad_norm": 0.5878106355667114, + "learning_rate": 0.000735815977389176, + "loss": 1.6108, + "step": 4844 + }, + { + "epoch": 0.5110759493670886, + "grad_norm": 0.6484596729278564, + "learning_rate": 0.0007355649634748143, + "loss": 1.6049, + "step": 4845 + }, + { + "epoch": 0.5111814345991561, + "grad_norm": 0.5848736763000488, + "learning_rate": 0.0007353139511779707, + "loss": 1.6344, + "step": 4846 + }, + { + "epoch": 0.5112869198312237, + "grad_norm": 0.7455049157142639, + "learning_rate": 0.000735062940526772, + "loss": 1.5991, + "step": 4847 + }, + { + "epoch": 0.5113924050632911, + "grad_norm": 0.5809134840965271, + "learning_rate": 0.0007348119315493453, + "loss": 1.6255, + "step": 4848 + }, + { + "epoch": 0.5114978902953586, + "grad_norm": 0.733672559261322, + "learning_rate": 0.0007345609242738173, + "loss": 1.6146, + "step": 4849 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.571454644203186, + "learning_rate": 0.0007343099187283149, + "loss": 1.6299, + "step": 4850 + }, + { + "epoch": 0.5117088607594936, + "grad_norm": 0.729168176651001, + "learning_rate": 0.0007340589149409644, + "loss": 1.6433, + "step": 4851 + }, + { + "epoch": 0.5118143459915612, + "grad_norm": 0.6711212992668152, + "learning_rate": 0.0007338079129398917, + "loss": 1.6409, + "step": 4852 + }, + { + "epoch": 0.5119198312236287, + "grad_norm": 0.6594322323799133, + "learning_rate": 0.0007335569127532231, + "loss": 1.5762, + "step": 4853 + }, + { + "epoch": 0.5120253164556962, + "grad_norm": 0.7186621427536011, + "learning_rate": 0.0007333059144090845, + "loss": 1.6449, + "step": 4854 + }, + { + "epoch": 0.5121308016877637, + "grad_norm": 0.5383464097976685, + "learning_rate": 0.0007330549179356014, + "loss": 1.5866, + "step": 4855 + }, + { + "epoch": 0.5122362869198313, + "grad_norm": 0.6247869729995728, + "learning_rate": 0.0007328039233608993, + "loss": 1.5793, + "step": 4856 + }, + { + "epoch": 0.5123417721518987, + "grad_norm": 0.539919912815094, + "learning_rate": 0.0007325529307131034, + "loss": 1.5949, + "step": 4857 + }, + { + "epoch": 0.5124472573839662, + "grad_norm": 0.5667951703071594, + "learning_rate": 0.0007323019400203386, + "loss": 1.5751, + "step": 4858 + }, + { + "epoch": 0.5125527426160338, + "grad_norm": 0.5826632976531982, + "learning_rate": 0.0007320509513107296, + "loss": 1.5562, + "step": 4859 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.6281191110610962, + "learning_rate": 0.0007317999646124011, + "loss": 1.5968, + "step": 4860 + }, + { + "epoch": 0.5127637130801688, + "grad_norm": 0.6303006410598755, + "learning_rate": 0.0007315489799534772, + "loss": 1.588, + "step": 4861 + }, + { + "epoch": 0.5128691983122363, + "grad_norm": 0.5886346697807312, + "learning_rate": 0.000731297997362082, + "loss": 1.5894, + "step": 4862 + }, + { + "epoch": 0.5129746835443038, + "grad_norm": 0.6422368884086609, + "learning_rate": 0.0007310470168663397, + "loss": 1.6135, + "step": 4863 + }, + { + "epoch": 0.5130801687763713, + "grad_norm": 0.5600766539573669, + "learning_rate": 0.0007307960384943736, + "loss": 1.6209, + "step": 4864 + }, + { + "epoch": 0.5131856540084389, + "grad_norm": 0.6401827335357666, + "learning_rate": 0.000730545062274307, + "loss": 1.574, + "step": 4865 + }, + { + "epoch": 0.5132911392405063, + "grad_norm": 0.5388279557228088, + "learning_rate": 0.0007302940882342634, + "loss": 1.5817, + "step": 4866 + }, + { + "epoch": 0.5133966244725738, + "grad_norm": 0.6572133898735046, + "learning_rate": 0.0007300431164023653, + "loss": 1.5578, + "step": 4867 + }, + { + "epoch": 0.5135021097046414, + "grad_norm": 0.5260173082351685, + "learning_rate": 0.0007297921468067357, + "loss": 1.5987, + "step": 4868 + }, + { + "epoch": 0.5136075949367088, + "grad_norm": 0.5929830074310303, + "learning_rate": 0.0007295411794754967, + "loss": 1.5844, + "step": 4869 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.5536484122276306, + "learning_rate": 0.0007292902144367704, + "loss": 1.5986, + "step": 4870 + }, + { + "epoch": 0.5138185654008439, + "grad_norm": 0.6028886437416077, + "learning_rate": 0.0007290392517186791, + "loss": 1.6259, + "step": 4871 + }, + { + "epoch": 0.5139240506329114, + "grad_norm": 0.5283724665641785, + "learning_rate": 0.000728788291349344, + "loss": 1.5913, + "step": 4872 + }, + { + "epoch": 0.5140295358649789, + "grad_norm": 0.7215498089790344, + "learning_rate": 0.0007285373333568868, + "loss": 1.6512, + "step": 4873 + }, + { + "epoch": 0.5141350210970465, + "grad_norm": 0.8743926882743835, + "learning_rate": 0.0007282863777694283, + "loss": 1.6196, + "step": 4874 + }, + { + "epoch": 0.5142405063291139, + "grad_norm": 0.6402186751365662, + "learning_rate": 0.0007280354246150894, + "loss": 1.5767, + "step": 4875 + }, + { + "epoch": 0.5143459915611814, + "grad_norm": 0.7164437770843506, + "learning_rate": 0.0007277844739219908, + "loss": 1.6218, + "step": 4876 + }, + { + "epoch": 0.514451476793249, + "grad_norm": 0.614882230758667, + "learning_rate": 0.0007275335257182526, + "loss": 1.6128, + "step": 4877 + }, + { + "epoch": 0.5145569620253164, + "grad_norm": 0.8022119402885437, + "learning_rate": 0.000727282580031995, + "loss": 1.6391, + "step": 4878 + }, + { + "epoch": 0.514662447257384, + "grad_norm": 0.7976577281951904, + "learning_rate": 0.0007270316368913374, + "loss": 1.641, + "step": 4879 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.5540345907211304, + "learning_rate": 0.0007267806963243995, + "loss": 1.5784, + "step": 4880 + }, + { + "epoch": 0.514873417721519, + "grad_norm": 0.8494786024093628, + "learning_rate": 0.0007265297583593003, + "loss": 1.6195, + "step": 4881 + }, + { + "epoch": 0.5149789029535865, + "grad_norm": 0.5821918845176697, + "learning_rate": 0.0007262788230241588, + "loss": 1.6048, + "step": 4882 + }, + { + "epoch": 0.515084388185654, + "grad_norm": 0.7799019813537598, + "learning_rate": 0.0007260278903470935, + "loss": 1.623, + "step": 4883 + }, + { + "epoch": 0.5151898734177215, + "grad_norm": 0.5801438689231873, + "learning_rate": 0.0007257769603562227, + "loss": 1.5998, + "step": 4884 + }, + { + "epoch": 0.515295358649789, + "grad_norm": 0.7765496373176575, + "learning_rate": 0.0007255260330796639, + "loss": 1.5845, + "step": 4885 + }, + { + "epoch": 0.5154008438818566, + "grad_norm": 0.564373254776001, + "learning_rate": 0.0007252751085455355, + "loss": 1.621, + "step": 4886 + }, + { + "epoch": 0.515506329113924, + "grad_norm": 0.7865350246429443, + "learning_rate": 0.0007250241867819544, + "loss": 1.5925, + "step": 4887 + }, + { + "epoch": 0.5156118143459916, + "grad_norm": 0.6272732615470886, + "learning_rate": 0.0007247732678170375, + "loss": 1.6314, + "step": 4888 + }, + { + "epoch": 0.5157172995780591, + "grad_norm": 0.6446961164474487, + "learning_rate": 0.0007245223516789019, + "loss": 1.5784, + "step": 4889 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.6260302662849426, + "learning_rate": 0.0007242714383956639, + "loss": 1.5715, + "step": 4890 + }, + { + "epoch": 0.5159282700421941, + "grad_norm": 0.6848987936973572, + "learning_rate": 0.0007240205279954395, + "loss": 1.5624, + "step": 4891 + }, + { + "epoch": 0.5160337552742617, + "grad_norm": 0.704460084438324, + "learning_rate": 0.0007237696205063444, + "loss": 1.6317, + "step": 4892 + }, + { + "epoch": 0.5161392405063291, + "grad_norm": 0.8104442954063416, + "learning_rate": 0.0007235187159564942, + "loss": 1.6069, + "step": 4893 + }, + { + "epoch": 0.5162447257383966, + "grad_norm": 0.7434507608413696, + "learning_rate": 0.0007232678143740038, + "loss": 1.6168, + "step": 4894 + }, + { + "epoch": 0.5163502109704642, + "grad_norm": 0.6092925667762756, + "learning_rate": 0.0007230169157869882, + "loss": 1.5827, + "step": 4895 + }, + { + "epoch": 0.5164556962025316, + "grad_norm": 0.721275269985199, + "learning_rate": 0.0007227660202235616, + "loss": 1.6574, + "step": 4896 + }, + { + "epoch": 0.5165611814345992, + "grad_norm": 0.5831030011177063, + "learning_rate": 0.0007225151277118384, + "loss": 1.6088, + "step": 4897 + }, + { + "epoch": 0.5166666666666667, + "grad_norm": 0.7160267233848572, + "learning_rate": 0.0007222642382799322, + "loss": 1.6296, + "step": 4898 + }, + { + "epoch": 0.5167721518987342, + "grad_norm": 0.6116313338279724, + "learning_rate": 0.0007220133519559563, + "loss": 1.5991, + "step": 4899 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.66895592212677, + "learning_rate": 0.000721762468768024, + "loss": 1.6179, + "step": 4900 + }, + { + "epoch": 0.5169831223628693, + "grad_norm": 0.7343776822090149, + "learning_rate": 0.0007215115887442478, + "loss": 1.6314, + "step": 4901 + }, + { + "epoch": 0.5170886075949367, + "grad_norm": 0.5515242218971252, + "learning_rate": 0.0007212607119127402, + "loss": 1.5826, + "step": 4902 + }, + { + "epoch": 0.5171940928270042, + "grad_norm": 0.8404529690742493, + "learning_rate": 0.000721009838301613, + "loss": 1.5996, + "step": 4903 + }, + { + "epoch": 0.5172995780590718, + "grad_norm": 0.6304848790168762, + "learning_rate": 0.000720758967938978, + "loss": 1.6093, + "step": 4904 + }, + { + "epoch": 0.5174050632911392, + "grad_norm": 0.731863260269165, + "learning_rate": 0.0007205081008529463, + "loss": 1.6289, + "step": 4905 + }, + { + "epoch": 0.5175105485232068, + "grad_norm": 0.5461196899414062, + "learning_rate": 0.0007202572370716292, + "loss": 1.5988, + "step": 4906 + }, + { + "epoch": 0.5176160337552742, + "grad_norm": 0.6814433336257935, + "learning_rate": 0.000720006376623137, + "loss": 1.5537, + "step": 4907 + }, + { + "epoch": 0.5177215189873418, + "grad_norm": 0.615485429763794, + "learning_rate": 0.0007197555195355799, + "loss": 1.6315, + "step": 4908 + }, + { + "epoch": 0.5178270042194093, + "grad_norm": 0.6292814016342163, + "learning_rate": 0.0007195046658370675, + "loss": 1.5863, + "step": 4909 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6180698275566101, + "learning_rate": 0.0007192538155557094, + "loss": 1.6485, + "step": 4910 + }, + { + "epoch": 0.5180379746835443, + "grad_norm": 0.6562658548355103, + "learning_rate": 0.0007190029687196148, + "loss": 1.6042, + "step": 4911 + }, + { + "epoch": 0.5181434599156118, + "grad_norm": 0.5973897576332092, + "learning_rate": 0.0007187521253568919, + "loss": 1.577, + "step": 4912 + }, + { + "epoch": 0.5182489451476793, + "grad_norm": 0.5819017887115479, + "learning_rate": 0.0007185012854956491, + "loss": 1.5728, + "step": 4913 + }, + { + "epoch": 0.5183544303797468, + "grad_norm": 0.6827523708343506, + "learning_rate": 0.0007182504491639942, + "loss": 1.5681, + "step": 4914 + }, + { + "epoch": 0.5184599156118144, + "grad_norm": 0.7145710587501526, + "learning_rate": 0.000717999616390035, + "loss": 1.6027, + "step": 4915 + }, + { + "epoch": 0.5185654008438818, + "grad_norm": 0.5774852633476257, + "learning_rate": 0.0007177487872018784, + "loss": 1.6184, + "step": 4916 + }, + { + "epoch": 0.5186708860759494, + "grad_norm": 0.6901787519454956, + "learning_rate": 0.000717497961627631, + "loss": 1.6133, + "step": 4917 + }, + { + "epoch": 0.5187763713080169, + "grad_norm": 0.6548444032669067, + "learning_rate": 0.0007172471396953991, + "loss": 1.5792, + "step": 4918 + }, + { + "epoch": 0.5188818565400843, + "grad_norm": 0.6542536616325378, + "learning_rate": 0.0007169963214332885, + "loss": 1.5906, + "step": 4919 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.6819018125534058, + "learning_rate": 0.0007167455068694046, + "loss": 1.6259, + "step": 4920 + }, + { + "epoch": 0.5190928270042194, + "grad_norm": 0.8159233927726746, + "learning_rate": 0.0007164946960318525, + "loss": 1.6196, + "step": 4921 + }, + { + "epoch": 0.5191983122362869, + "grad_norm": 0.6606346368789673, + "learning_rate": 0.0007162438889487365, + "loss": 1.645, + "step": 4922 + }, + { + "epoch": 0.5193037974683544, + "grad_norm": 0.7886705994606018, + "learning_rate": 0.0007159930856481614, + "loss": 1.6188, + "step": 4923 + }, + { + "epoch": 0.519409282700422, + "grad_norm": 0.7061139345169067, + "learning_rate": 0.0007157422861582306, + "loss": 1.5572, + "step": 4924 + }, + { + "epoch": 0.5195147679324894, + "grad_norm": 0.8375596404075623, + "learning_rate": 0.0007154914905070475, + "loss": 1.5963, + "step": 4925 + }, + { + "epoch": 0.519620253164557, + "grad_norm": 0.6725770831108093, + "learning_rate": 0.0007152406987227149, + "loss": 1.6187, + "step": 4926 + }, + { + "epoch": 0.5197257383966245, + "grad_norm": 0.8115255236625671, + "learning_rate": 0.0007149899108333354, + "loss": 1.5873, + "step": 4927 + }, + { + "epoch": 0.5198312236286919, + "grad_norm": 0.7078489065170288, + "learning_rate": 0.0007147391268670109, + "loss": 1.6123, + "step": 4928 + }, + { + "epoch": 0.5199367088607595, + "grad_norm": 0.6378858089447021, + "learning_rate": 0.000714488346851843, + "loss": 1.6131, + "step": 4929 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.9134105443954468, + "learning_rate": 0.000714237570815933, + "loss": 1.602, + "step": 4930 + }, + { + "epoch": 0.5201476793248945, + "grad_norm": 0.627112090587616, + "learning_rate": 0.0007139867987873812, + "loss": 1.5988, + "step": 4931 + }, + { + "epoch": 0.520253164556962, + "grad_norm": 0.9015727639198303, + "learning_rate": 0.0007137360307942885, + "loss": 1.6237, + "step": 4932 + }, + { + "epoch": 0.5203586497890296, + "grad_norm": 0.6144789457321167, + "learning_rate": 0.0007134852668647543, + "loss": 1.6319, + "step": 4933 + }, + { + "epoch": 0.520464135021097, + "grad_norm": 0.8353612422943115, + "learning_rate": 0.0007132345070268781, + "loss": 1.5993, + "step": 4934 + }, + { + "epoch": 0.5205696202531646, + "grad_norm": 0.6769370436668396, + "learning_rate": 0.0007129837513087587, + "loss": 1.5457, + "step": 4935 + }, + { + "epoch": 0.5206751054852321, + "grad_norm": 0.803750216960907, + "learning_rate": 0.0007127329997384946, + "loss": 1.5977, + "step": 4936 + }, + { + "epoch": 0.5207805907172995, + "grad_norm": 0.78049635887146, + "learning_rate": 0.0007124822523441837, + "loss": 1.5944, + "step": 4937 + }, + { + "epoch": 0.5208860759493671, + "grad_norm": 0.7372730374336243, + "learning_rate": 0.0007122315091539234, + "loss": 1.6081, + "step": 4938 + }, + { + "epoch": 0.5209915611814346, + "grad_norm": 0.8073235750198364, + "learning_rate": 0.000711980770195811, + "loss": 1.6266, + "step": 4939 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.7182435989379883, + "learning_rate": 0.0007117300354979423, + "loss": 1.5898, + "step": 4940 + }, + { + "epoch": 0.5212025316455696, + "grad_norm": 0.8008257746696472, + "learning_rate": 0.0007114793050884145, + "loss": 1.6399, + "step": 4941 + }, + { + "epoch": 0.5213080168776372, + "grad_norm": 0.5917121767997742, + "learning_rate": 0.0007112285789953226, + "loss": 1.6149, + "step": 4942 + }, + { + "epoch": 0.5214135021097046, + "grad_norm": 0.6088781356811523, + "learning_rate": 0.0007109778572467616, + "loss": 1.594, + "step": 4943 + }, + { + "epoch": 0.5215189873417722, + "grad_norm": 0.7792158126831055, + "learning_rate": 0.0007107271398708266, + "loss": 1.6134, + "step": 4944 + }, + { + "epoch": 0.5216244725738397, + "grad_norm": 0.7524416446685791, + "learning_rate": 0.0007104764268956111, + "loss": 1.5992, + "step": 4945 + }, + { + "epoch": 0.5217299578059071, + "grad_norm": 0.6751667857170105, + "learning_rate": 0.0007102257183492092, + "loss": 1.597, + "step": 4946 + }, + { + "epoch": 0.5218354430379747, + "grad_norm": 0.6100225448608398, + "learning_rate": 0.0007099750142597138, + "loss": 1.6159, + "step": 4947 + }, + { + "epoch": 0.5219409282700422, + "grad_norm": 0.602031946182251, + "learning_rate": 0.0007097243146552175, + "loss": 1.6048, + "step": 4948 + }, + { + "epoch": 0.5220464135021097, + "grad_norm": 0.5964629650115967, + "learning_rate": 0.0007094736195638128, + "loss": 1.6054, + "step": 4949 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.6395933032035828, + "learning_rate": 0.000709222929013591, + "loss": 1.6247, + "step": 4950 + }, + { + "epoch": 0.5222573839662448, + "grad_norm": 0.6084792017936707, + "learning_rate": 0.0007089722430326434, + "loss": 1.6212, + "step": 4951 + }, + { + "epoch": 0.5223628691983122, + "grad_norm": 0.6181237697601318, + "learning_rate": 0.0007087215616490606, + "loss": 1.5933, + "step": 4952 + }, + { + "epoch": 0.5224683544303798, + "grad_norm": 0.5096347332000732, + "learning_rate": 0.0007084708848909326, + "loss": 1.5483, + "step": 4953 + }, + { + "epoch": 0.5225738396624473, + "grad_norm": 0.6953054666519165, + "learning_rate": 0.000708220212786349, + "loss": 1.5938, + "step": 4954 + }, + { + "epoch": 0.5226793248945147, + "grad_norm": 0.6464244723320007, + "learning_rate": 0.000707969545363399, + "loss": 1.606, + "step": 4955 + }, + { + "epoch": 0.5227848101265823, + "grad_norm": 0.728124737739563, + "learning_rate": 0.000707718882650171, + "loss": 1.592, + "step": 4956 + }, + { + "epoch": 0.5228902953586498, + "grad_norm": 0.7621326446533203, + "learning_rate": 0.0007074682246747526, + "loss": 1.6225, + "step": 4957 + }, + { + "epoch": 0.5229957805907173, + "grad_norm": 0.6086481809616089, + "learning_rate": 0.0007072175714652321, + "loss": 1.5972, + "step": 4958 + }, + { + "epoch": 0.5231012658227848, + "grad_norm": 0.7596073150634766, + "learning_rate": 0.0007069669230496961, + "loss": 1.5635, + "step": 4959 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.6698921322822571, + "learning_rate": 0.0007067162794562309, + "loss": 1.5961, + "step": 4960 + }, + { + "epoch": 0.5233122362869198, + "grad_norm": 0.6648685336112976, + "learning_rate": 0.0007064656407129224, + "loss": 1.5865, + "step": 4961 + }, + { + "epoch": 0.5234177215189874, + "grad_norm": 0.8092038035392761, + "learning_rate": 0.000706215006847856, + "loss": 1.5992, + "step": 4962 + }, + { + "epoch": 0.5235232067510549, + "grad_norm": 0.5798614025115967, + "learning_rate": 0.0007059643778891164, + "loss": 1.619, + "step": 4963 + }, + { + "epoch": 0.5236286919831223, + "grad_norm": 0.8321263194084167, + "learning_rate": 0.0007057137538647878, + "loss": 1.6014, + "step": 4964 + }, + { + "epoch": 0.5237341772151899, + "grad_norm": 0.6886154413223267, + "learning_rate": 0.0007054631348029539, + "loss": 1.6701, + "step": 4965 + }, + { + "epoch": 0.5238396624472574, + "grad_norm": 0.6943937540054321, + "learning_rate": 0.0007052125207316975, + "loss": 1.5876, + "step": 4966 + }, + { + "epoch": 0.5239451476793249, + "grad_norm": 0.7820772528648376, + "learning_rate": 0.0007049619116791019, + "loss": 1.5631, + "step": 4967 + }, + { + "epoch": 0.5240506329113924, + "grad_norm": 0.571512758731842, + "learning_rate": 0.0007047113076732485, + "loss": 1.5487, + "step": 4968 + }, + { + "epoch": 0.52415611814346, + "grad_norm": 0.8196996450424194, + "learning_rate": 0.0007044607087422191, + "loss": 1.6041, + "step": 4969 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.6407755017280579, + "learning_rate": 0.0007042101149140943, + "loss": 1.6451, + "step": 4970 + }, + { + "epoch": 0.524367088607595, + "grad_norm": 0.9115898013114929, + "learning_rate": 0.0007039595262169544, + "loss": 1.6629, + "step": 4971 + }, + { + "epoch": 0.5244725738396624, + "grad_norm": 0.8098087906837463, + "learning_rate": 0.0007037089426788792, + "loss": 1.5432, + "step": 4972 + }, + { + "epoch": 0.5245780590717299, + "grad_norm": 0.7465206384658813, + "learning_rate": 0.0007034583643279479, + "loss": 1.5896, + "step": 4973 + }, + { + "epoch": 0.5246835443037975, + "grad_norm": 0.7890239953994751, + "learning_rate": 0.0007032077911922384, + "loss": 1.5905, + "step": 4974 + }, + { + "epoch": 0.5247890295358649, + "grad_norm": 0.703356146812439, + "learning_rate": 0.0007029572232998298, + "loss": 1.6334, + "step": 4975 + }, + { + "epoch": 0.5248945147679325, + "grad_norm": 0.7234653234481812, + "learning_rate": 0.0007027066606787988, + "loss": 1.6001, + "step": 4976 + }, + { + "epoch": 0.525, + "grad_norm": 0.7836076617240906, + "learning_rate": 0.0007024561033572223, + "loss": 1.6075, + "step": 4977 + }, + { + "epoch": 0.5251054852320675, + "grad_norm": 0.6945216059684753, + "learning_rate": 0.0007022055513631764, + "loss": 1.5968, + "step": 4978 + }, + { + "epoch": 0.525210970464135, + "grad_norm": 0.6563199162483215, + "learning_rate": 0.000701955004724737, + "loss": 1.5509, + "step": 4979 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.604552149772644, + "learning_rate": 0.0007017044634699787, + "loss": 1.5658, + "step": 4980 + }, + { + "epoch": 0.52542194092827, + "grad_norm": 0.7686401605606079, + "learning_rate": 0.0007014539276269762, + "loss": 1.6186, + "step": 4981 + }, + { + "epoch": 0.5255274261603375, + "grad_norm": 0.6153714656829834, + "learning_rate": 0.0007012033972238031, + "loss": 1.581, + "step": 4982 + }, + { + "epoch": 0.5256329113924051, + "grad_norm": 0.7820702791213989, + "learning_rate": 0.0007009528722885323, + "loss": 1.5857, + "step": 4983 + }, + { + "epoch": 0.5257383966244725, + "grad_norm": 0.6423811316490173, + "learning_rate": 0.0007007023528492372, + "loss": 1.5818, + "step": 4984 + }, + { + "epoch": 0.5258438818565401, + "grad_norm": 0.7886558771133423, + "learning_rate": 0.0007004518389339893, + "loss": 1.5957, + "step": 4985 + }, + { + "epoch": 0.5259493670886076, + "grad_norm": 0.6899831295013428, + "learning_rate": 0.0007002013305708598, + "loss": 1.5757, + "step": 4986 + }, + { + "epoch": 0.5260548523206751, + "grad_norm": 0.6918283104896545, + "learning_rate": 0.0006999508277879196, + "loss": 1.5596, + "step": 4987 + }, + { + "epoch": 0.5261603375527426, + "grad_norm": 0.727691113948822, + "learning_rate": 0.0006997003306132386, + "loss": 1.608, + "step": 4988 + }, + { + "epoch": 0.5262658227848102, + "grad_norm": 0.5968754887580872, + "learning_rate": 0.0006994498390748865, + "loss": 1.6345, + "step": 4989 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.6350266337394714, + "learning_rate": 0.0006991993532009319, + "loss": 1.6071, + "step": 4990 + }, + { + "epoch": 0.5264767932489451, + "grad_norm": 0.6016932129859924, + "learning_rate": 0.0006989488730194432, + "loss": 1.6021, + "step": 4991 + }, + { + "epoch": 0.5265822784810127, + "grad_norm": 0.5876120924949646, + "learning_rate": 0.0006986983985584874, + "loss": 1.6047, + "step": 4992 + }, + { + "epoch": 0.5266877637130801, + "grad_norm": 0.6204512715339661, + "learning_rate": 0.0006984479298461323, + "loss": 1.6051, + "step": 4993 + }, + { + "epoch": 0.5267932489451477, + "grad_norm": 0.6098913550376892, + "learning_rate": 0.0006981974669104436, + "loss": 1.6429, + "step": 4994 + }, + { + "epoch": 0.5268987341772152, + "grad_norm": 0.701617956161499, + "learning_rate": 0.0006979470097794871, + "loss": 1.6081, + "step": 4995 + }, + { + "epoch": 0.5270042194092827, + "grad_norm": 0.6164836287498474, + "learning_rate": 0.0006976965584813277, + "loss": 1.5951, + "step": 4996 + }, + { + "epoch": 0.5271097046413502, + "grad_norm": 0.6606321930885315, + "learning_rate": 0.0006974461130440298, + "loss": 1.5811, + "step": 4997 + }, + { + "epoch": 0.5272151898734178, + "grad_norm": 0.5819945335388184, + "learning_rate": 0.0006971956734956569, + "loss": 1.6216, + "step": 4998 + }, + { + "epoch": 0.5273206751054852, + "grad_norm": 0.6939828991889954, + "learning_rate": 0.0006969452398642721, + "loss": 1.6312, + "step": 4999 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.5828123688697815, + "learning_rate": 0.0006966948121779378, + "loss": 1.5852, + "step": 5000 + }, + { + "epoch": 0.5275316455696203, + "grad_norm": 0.7921366691589355, + "learning_rate": 0.0006964443904647152, + "loss": 1.6411, + "step": 5001 + }, + { + "epoch": 0.5276371308016877, + "grad_norm": 0.6237935423851013, + "learning_rate": 0.0006961939747526661, + "loss": 1.633, + "step": 5002 + }, + { + "epoch": 0.5277426160337553, + "grad_norm": 0.5614455938339233, + "learning_rate": 0.0006959435650698504, + "loss": 1.5982, + "step": 5003 + }, + { + "epoch": 0.5278481012658228, + "grad_norm": 0.6425033807754517, + "learning_rate": 0.0006956931614443278, + "loss": 1.5887, + "step": 5004 + }, + { + "epoch": 0.5279535864978903, + "grad_norm": 0.613521933555603, + "learning_rate": 0.0006954427639041572, + "loss": 1.588, + "step": 5005 + }, + { + "epoch": 0.5280590717299578, + "grad_norm": 0.6902639269828796, + "learning_rate": 0.000695192372477397, + "loss": 1.5567, + "step": 5006 + }, + { + "epoch": 0.5281645569620254, + "grad_norm": 0.5618957877159119, + "learning_rate": 0.0006949419871921047, + "loss": 1.5808, + "step": 5007 + }, + { + "epoch": 0.5282700421940928, + "grad_norm": 0.6130552887916565, + "learning_rate": 0.0006946916080763373, + "loss": 1.614, + "step": 5008 + }, + { + "epoch": 0.5283755274261603, + "grad_norm": 0.6430751085281372, + "learning_rate": 0.0006944412351581506, + "loss": 1.5989, + "step": 5009 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.5742433071136475, + "learning_rate": 0.000694190868465601, + "loss": 1.6058, + "step": 5010 + }, + { + "epoch": 0.5285864978902953, + "grad_norm": 0.7044973969459534, + "learning_rate": 0.0006939405080267428, + "loss": 1.5807, + "step": 5011 + }, + { + "epoch": 0.5286919831223629, + "grad_norm": 0.6653546094894409, + "learning_rate": 0.0006936901538696303, + "loss": 1.6182, + "step": 5012 + }, + { + "epoch": 0.5287974683544304, + "grad_norm": 0.6292969584465027, + "learning_rate": 0.0006934398060223168, + "loss": 1.596, + "step": 5013 + }, + { + "epoch": 0.5289029535864979, + "grad_norm": 0.6161313652992249, + "learning_rate": 0.0006931894645128551, + "loss": 1.5828, + "step": 5014 + }, + { + "epoch": 0.5290084388185654, + "grad_norm": 0.7076820135116577, + "learning_rate": 0.0006929391293692972, + "loss": 1.5374, + "step": 5015 + }, + { + "epoch": 0.529113924050633, + "grad_norm": 0.7082212567329407, + "learning_rate": 0.0006926888006196944, + "loss": 1.6221, + "step": 5016 + }, + { + "epoch": 0.5292194092827004, + "grad_norm": 0.7519503235816956, + "learning_rate": 0.0006924384782920971, + "loss": 1.6243, + "step": 5017 + }, + { + "epoch": 0.5293248945147679, + "grad_norm": 0.8223909735679626, + "learning_rate": 0.0006921881624145554, + "loss": 1.5978, + "step": 5018 + }, + { + "epoch": 0.5294303797468355, + "grad_norm": 0.7496793866157532, + "learning_rate": 0.0006919378530151182, + "loss": 1.5758, + "step": 5019 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.6856167316436768, + "learning_rate": 0.0006916875501218343, + "loss": 1.6095, + "step": 5020 + }, + { + "epoch": 0.5296413502109705, + "grad_norm": 0.6509621739387512, + "learning_rate": 0.0006914372537627512, + "loss": 1.5789, + "step": 5021 + }, + { + "epoch": 0.529746835443038, + "grad_norm": 0.7606245875358582, + "learning_rate": 0.0006911869639659159, + "loss": 1.5877, + "step": 5022 + }, + { + "epoch": 0.5298523206751055, + "grad_norm": 0.6611577868461609, + "learning_rate": 0.0006909366807593744, + "loss": 1.5848, + "step": 5023 + }, + { + "epoch": 0.529957805907173, + "grad_norm": 0.716060996055603, + "learning_rate": 0.0006906864041711725, + "loss": 1.5825, + "step": 5024 + }, + { + "epoch": 0.5300632911392406, + "grad_norm": 0.6306818723678589, + "learning_rate": 0.0006904361342293546, + "loss": 1.5695, + "step": 5025 + }, + { + "epoch": 0.530168776371308, + "grad_norm": 0.8255805969238281, + "learning_rate": 0.000690185870961965, + "loss": 1.5795, + "step": 5026 + }, + { + "epoch": 0.5302742616033755, + "grad_norm": 0.7292072772979736, + "learning_rate": 0.0006899356143970467, + "loss": 1.6141, + "step": 5027 + }, + { + "epoch": 0.5303797468354431, + "grad_norm": 0.5761002898216248, + "learning_rate": 0.0006896853645626424, + "loss": 1.5804, + "step": 5028 + }, + { + "epoch": 0.5304852320675105, + "grad_norm": 0.7863202095031738, + "learning_rate": 0.0006894351214867937, + "loss": 1.5524, + "step": 5029 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.6055540442466736, + "learning_rate": 0.0006891848851975416, + "loss": 1.5617, + "step": 5030 + }, + { + "epoch": 0.5306962025316456, + "grad_norm": 0.7770669460296631, + "learning_rate": 0.0006889346557229265, + "loss": 1.5986, + "step": 5031 + }, + { + "epoch": 0.5308016877637131, + "grad_norm": 0.5861395001411438, + "learning_rate": 0.0006886844330909877, + "loss": 1.6257, + "step": 5032 + }, + { + "epoch": 0.5309071729957806, + "grad_norm": 0.9299703240394592, + "learning_rate": 0.0006884342173297639, + "loss": 1.6185, + "step": 5033 + }, + { + "epoch": 0.5310126582278482, + "grad_norm": 0.7080916166305542, + "learning_rate": 0.000688184008467293, + "loss": 1.6077, + "step": 5034 + }, + { + "epoch": 0.5311181434599156, + "grad_norm": 0.8244010210037231, + "learning_rate": 0.0006879338065316122, + "loss": 1.6109, + "step": 5035 + }, + { + "epoch": 0.5312236286919831, + "grad_norm": 0.6299169063568115, + "learning_rate": 0.0006876836115507579, + "loss": 1.606, + "step": 5036 + }, + { + "epoch": 0.5313291139240506, + "grad_norm": 1.0499329566955566, + "learning_rate": 0.0006874334235527657, + "loss": 1.5848, + "step": 5037 + }, + { + "epoch": 0.5314345991561181, + "grad_norm": 0.8230117559432983, + "learning_rate": 0.0006871832425656702, + "loss": 1.6455, + "step": 5038 + }, + { + "epoch": 0.5315400843881857, + "grad_norm": 0.7126038074493408, + "learning_rate": 0.0006869330686175058, + "loss": 1.6071, + "step": 5039 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.7620951533317566, + "learning_rate": 0.0006866829017363054, + "loss": 1.5763, + "step": 5040 + }, + { + "epoch": 0.5317510548523207, + "grad_norm": 0.6194590330123901, + "learning_rate": 0.0006864327419501017, + "loss": 1.6006, + "step": 5041 + }, + { + "epoch": 0.5318565400843882, + "grad_norm": 0.6610148549079895, + "learning_rate": 0.0006861825892869262, + "loss": 1.6011, + "step": 5042 + }, + { + "epoch": 0.5319620253164556, + "grad_norm": 0.6506215929985046, + "learning_rate": 0.0006859324437748099, + "loss": 1.5671, + "step": 5043 + }, + { + "epoch": 0.5320675105485232, + "grad_norm": 0.597592294216156, + "learning_rate": 0.0006856823054417825, + "loss": 1.6056, + "step": 5044 + }, + { + "epoch": 0.5321729957805907, + "grad_norm": 0.744284451007843, + "learning_rate": 0.0006854321743158737, + "loss": 1.5799, + "step": 5045 + }, + { + "epoch": 0.5322784810126582, + "grad_norm": 0.6445761919021606, + "learning_rate": 0.0006851820504251117, + "loss": 1.6401, + "step": 5046 + }, + { + "epoch": 0.5323839662447257, + "grad_norm": 0.5971753597259521, + "learning_rate": 0.0006849319337975242, + "loss": 1.584, + "step": 5047 + }, + { + "epoch": 0.5324894514767933, + "grad_norm": 0.652095377445221, + "learning_rate": 0.0006846818244611376, + "loss": 1.5554, + "step": 5048 + }, + { + "epoch": 0.5325949367088607, + "grad_norm": 0.6035884618759155, + "learning_rate": 0.0006844317224439788, + "loss": 1.6222, + "step": 5049 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.6156376600265503, + "learning_rate": 0.0006841816277740722, + "loss": 1.5866, + "step": 5050 + }, + { + "epoch": 0.5328059071729958, + "grad_norm": 0.6169779896736145, + "learning_rate": 0.0006839315404794424, + "loss": 1.6136, + "step": 5051 + }, + { + "epoch": 0.5329113924050632, + "grad_norm": 0.6743027567863464, + "learning_rate": 0.0006836814605881131, + "loss": 1.6051, + "step": 5052 + }, + { + "epoch": 0.5330168776371308, + "grad_norm": 0.655940055847168, + "learning_rate": 0.0006834313881281066, + "loss": 1.6127, + "step": 5053 + }, + { + "epoch": 0.5331223628691983, + "grad_norm": 0.7164573073387146, + "learning_rate": 0.0006831813231274451, + "loss": 1.616, + "step": 5054 + }, + { + "epoch": 0.5332278481012658, + "grad_norm": 0.6845137476921082, + "learning_rate": 0.0006829312656141496, + "loss": 1.6059, + "step": 5055 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.6087310314178467, + "learning_rate": 0.0006826812156162401, + "loss": 1.5993, + "step": 5056 + }, + { + "epoch": 0.5334388185654009, + "grad_norm": 0.5805554986000061, + "learning_rate": 0.0006824311731617363, + "loss": 1.622, + "step": 5057 + }, + { + "epoch": 0.5335443037974683, + "grad_norm": 0.6168662309646606, + "learning_rate": 0.0006821811382786561, + "loss": 1.6098, + "step": 5058 + }, + { + "epoch": 0.5336497890295359, + "grad_norm": 0.636888325214386, + "learning_rate": 0.0006819311109950177, + "loss": 1.5877, + "step": 5059 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.5989710688591003, + "learning_rate": 0.0006816810913388379, + "loss": 1.5903, + "step": 5060 + }, + { + "epoch": 0.5338607594936708, + "grad_norm": 0.5977267026901245, + "learning_rate": 0.0006814310793381322, + "loss": 1.6279, + "step": 5061 + }, + { + "epoch": 0.5339662447257384, + "grad_norm": 0.6134890913963318, + "learning_rate": 0.0006811810750209161, + "loss": 1.5708, + "step": 5062 + }, + { + "epoch": 0.5340717299578059, + "grad_norm": 0.6509652137756348, + "learning_rate": 0.0006809310784152039, + "loss": 1.5826, + "step": 5063 + }, + { + "epoch": 0.5341772151898734, + "grad_norm": 0.5791399478912354, + "learning_rate": 0.0006806810895490087, + "loss": 1.6427, + "step": 5064 + }, + { + "epoch": 0.5342827004219409, + "grad_norm": 0.6277163624763489, + "learning_rate": 0.000680431108450343, + "loss": 1.5739, + "step": 5065 + }, + { + "epoch": 0.5343881856540085, + "grad_norm": 0.7230496406555176, + "learning_rate": 0.0006801811351472185, + "loss": 1.6016, + "step": 5066 + }, + { + "epoch": 0.5344936708860759, + "grad_norm": 0.5753990411758423, + "learning_rate": 0.000679931169667646, + "loss": 1.5808, + "step": 5067 + }, + { + "epoch": 0.5345991561181435, + "grad_norm": 0.7417773008346558, + "learning_rate": 0.0006796812120396351, + "loss": 1.6187, + "step": 5068 + }, + { + "epoch": 0.534704641350211, + "grad_norm": 0.5373862385749817, + "learning_rate": 0.0006794312622911953, + "loss": 1.5757, + "step": 5069 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.8968938589096069, + "learning_rate": 0.0006791813204503342, + "loss": 1.5572, + "step": 5070 + }, + { + "epoch": 0.534915611814346, + "grad_norm": 0.5580962300300598, + "learning_rate": 0.0006789313865450594, + "loss": 1.5933, + "step": 5071 + }, + { + "epoch": 0.5350210970464135, + "grad_norm": 0.7376260161399841, + "learning_rate": 0.0006786814606033773, + "loss": 1.6051, + "step": 5072 + }, + { + "epoch": 0.535126582278481, + "grad_norm": 0.6275738477706909, + "learning_rate": 0.0006784315426532929, + "loss": 1.5786, + "step": 5073 + }, + { + "epoch": 0.5352320675105485, + "grad_norm": 0.9318963885307312, + "learning_rate": 0.0006781816327228112, + "loss": 1.6387, + "step": 5074 + }, + { + "epoch": 0.5353375527426161, + "grad_norm": 0.6200342178344727, + "learning_rate": 0.0006779317308399357, + "loss": 1.6269, + "step": 5075 + }, + { + "epoch": 0.5354430379746835, + "grad_norm": 0.8600548505783081, + "learning_rate": 0.000677681837032669, + "loss": 1.6103, + "step": 5076 + }, + { + "epoch": 0.5355485232067511, + "grad_norm": 0.6999862194061279, + "learning_rate": 0.0006774319513290132, + "loss": 1.596, + "step": 5077 + }, + { + "epoch": 0.5356540084388186, + "grad_norm": 1.0539772510528564, + "learning_rate": 0.0006771820737569689, + "loss": 1.6187, + "step": 5078 + }, + { + "epoch": 0.535759493670886, + "grad_norm": 0.7051787972450256, + "learning_rate": 0.0006769322043445363, + "loss": 1.5799, + "step": 5079 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.679509699344635, + "learning_rate": 0.0006766823431197147, + "loss": 1.598, + "step": 5080 + }, + { + "epoch": 0.5359704641350211, + "grad_norm": 0.7274112105369568, + "learning_rate": 0.0006764324901105022, + "loss": 1.5934, + "step": 5081 + }, + { + "epoch": 0.5360759493670886, + "grad_norm": 0.7701904773712158, + "learning_rate": 0.000676182645344896, + "loss": 1.609, + "step": 5082 + }, + { + "epoch": 0.5361814345991561, + "grad_norm": 0.6730894446372986, + "learning_rate": 0.0006759328088508925, + "loss": 1.5732, + "step": 5083 + }, + { + "epoch": 0.5362869198312237, + "grad_norm": 0.7660239338874817, + "learning_rate": 0.0006756829806564872, + "loss": 1.6234, + "step": 5084 + }, + { + "epoch": 0.5363924050632911, + "grad_norm": 0.7284854054450989, + "learning_rate": 0.0006754331607896742, + "loss": 1.6128, + "step": 5085 + }, + { + "epoch": 0.5364978902953587, + "grad_norm": 0.66961270570755, + "learning_rate": 0.0006751833492784476, + "loss": 1.5552, + "step": 5086 + }, + { + "epoch": 0.5366033755274262, + "grad_norm": 0.6640745401382446, + "learning_rate": 0.0006749335461507995, + "loss": 1.5588, + "step": 5087 + }, + { + "epoch": 0.5367088607594936, + "grad_norm": 0.6651301980018616, + "learning_rate": 0.000674683751434722, + "loss": 1.6293, + "step": 5088 + }, + { + "epoch": 0.5368143459915612, + "grad_norm": 0.7842827439308167, + "learning_rate": 0.0006744339651582059, + "loss": 1.6008, + "step": 5089 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.6444167494773865, + "learning_rate": 0.0006741841873492406, + "loss": 1.6346, + "step": 5090 + }, + { + "epoch": 0.5370253164556962, + "grad_norm": 0.9569106698036194, + "learning_rate": 0.0006739344180358153, + "loss": 1.6262, + "step": 5091 + }, + { + "epoch": 0.5371308016877637, + "grad_norm": 0.5865442156791687, + "learning_rate": 0.0006736846572459178, + "loss": 1.5229, + "step": 5092 + }, + { + "epoch": 0.5372362869198313, + "grad_norm": 0.8149553537368774, + "learning_rate": 0.0006734349050075348, + "loss": 1.5804, + "step": 5093 + }, + { + "epoch": 0.5373417721518987, + "grad_norm": 0.6067254543304443, + "learning_rate": 0.0006731851613486526, + "loss": 1.5639, + "step": 5094 + }, + { + "epoch": 0.5374472573839663, + "grad_norm": 1.0115169286727905, + "learning_rate": 0.0006729354262972561, + "loss": 1.5805, + "step": 5095 + }, + { + "epoch": 0.5375527426160338, + "grad_norm": 0.5375372767448425, + "learning_rate": 0.0006726856998813291, + "loss": 1.5822, + "step": 5096 + }, + { + "epoch": 0.5376582278481012, + "grad_norm": 1.086815357208252, + "learning_rate": 0.0006724359821288552, + "loss": 1.611, + "step": 5097 + }, + { + "epoch": 0.5377637130801688, + "grad_norm": 0.5573437809944153, + "learning_rate": 0.0006721862730678164, + "loss": 1.6034, + "step": 5098 + }, + { + "epoch": 0.5378691983122363, + "grad_norm": 0.9454841613769531, + "learning_rate": 0.0006719365727261935, + "loss": 1.6074, + "step": 5099 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.6161977052688599, + "learning_rate": 0.0006716868811319671, + "loss": 1.5552, + "step": 5100 + }, + { + "epoch": 0.5380801687763713, + "grad_norm": 0.7784796357154846, + "learning_rate": 0.000671437198313116, + "loss": 1.608, + "step": 5101 + }, + { + "epoch": 0.5381856540084389, + "grad_norm": 0.6498932838439941, + "learning_rate": 0.0006711875242976187, + "loss": 1.6134, + "step": 5102 + }, + { + "epoch": 0.5382911392405063, + "grad_norm": 1.0358588695526123, + "learning_rate": 0.0006709378591134523, + "loss": 1.6026, + "step": 5103 + }, + { + "epoch": 0.5383966244725739, + "grad_norm": 0.771987795829773, + "learning_rate": 0.0006706882027885929, + "loss": 1.5716, + "step": 5104 + }, + { + "epoch": 0.5385021097046413, + "grad_norm": 0.8194320201873779, + "learning_rate": 0.0006704385553510156, + "loss": 1.5562, + "step": 5105 + }, + { + "epoch": 0.5386075949367088, + "grad_norm": 0.7009221911430359, + "learning_rate": 0.0006701889168286953, + "loss": 1.6007, + "step": 5106 + }, + { + "epoch": 0.5387130801687764, + "grad_norm": 0.9032169580459595, + "learning_rate": 0.0006699392872496048, + "loss": 1.6304, + "step": 5107 + }, + { + "epoch": 0.5388185654008438, + "grad_norm": 0.62613844871521, + "learning_rate": 0.0006696896666417163, + "loss": 1.6037, + "step": 5108 + }, + { + "epoch": 0.5389240506329114, + "grad_norm": 0.8199893832206726, + "learning_rate": 0.0006694400550330013, + "loss": 1.6082, + "step": 5109 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.5881650447845459, + "learning_rate": 0.0006691904524514297, + "loss": 1.5521, + "step": 5110 + }, + { + "epoch": 0.5391350210970464, + "grad_norm": 0.88984614610672, + "learning_rate": 0.0006689408589249709, + "loss": 1.6124, + "step": 5111 + }, + { + "epoch": 0.5392405063291139, + "grad_norm": 0.5958966612815857, + "learning_rate": 0.000668691274481593, + "loss": 1.5857, + "step": 5112 + }, + { + "epoch": 0.5393459915611815, + "grad_norm": 0.8199614882469177, + "learning_rate": 0.0006684416991492629, + "loss": 1.6239, + "step": 5113 + }, + { + "epoch": 0.5394514767932489, + "grad_norm": 0.6948811411857605, + "learning_rate": 0.0006681921329559475, + "loss": 1.5828, + "step": 5114 + }, + { + "epoch": 0.5395569620253164, + "grad_norm": 0.6980289816856384, + "learning_rate": 0.0006679425759296114, + "loss": 1.5785, + "step": 5115 + }, + { + "epoch": 0.539662447257384, + "grad_norm": 0.7394945621490479, + "learning_rate": 0.000667693028098219, + "loss": 1.591, + "step": 5116 + }, + { + "epoch": 0.5397679324894514, + "grad_norm": 0.6760668754577637, + "learning_rate": 0.0006674434894897332, + "loss": 1.5904, + "step": 5117 + }, + { + "epoch": 0.539873417721519, + "grad_norm": 0.7908607721328735, + "learning_rate": 0.000667193960132116, + "loss": 1.6232, + "step": 5118 + }, + { + "epoch": 0.5399789029535865, + "grad_norm": 0.516300618648529, + "learning_rate": 0.0006669444400533286, + "loss": 1.5812, + "step": 5119 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.8114026188850403, + "learning_rate": 0.0006666949292813306, + "loss": 1.5684, + "step": 5120 + }, + { + "epoch": 0.5401898734177215, + "grad_norm": 0.6754075884819031, + "learning_rate": 0.0006664454278440813, + "loss": 1.5881, + "step": 5121 + }, + { + "epoch": 0.5402953586497891, + "grad_norm": 0.7179992198944092, + "learning_rate": 0.0006661959357695382, + "loss": 1.5764, + "step": 5122 + }, + { + "epoch": 0.5404008438818565, + "grad_norm": 0.876653790473938, + "learning_rate": 0.0006659464530856587, + "loss": 1.5753, + "step": 5123 + }, + { + "epoch": 0.540506329113924, + "grad_norm": 0.680297315120697, + "learning_rate": 0.0006656969798203982, + "loss": 1.5955, + "step": 5124 + }, + { + "epoch": 0.5406118143459916, + "grad_norm": 0.7084438800811768, + "learning_rate": 0.0006654475160017115, + "loss": 1.593, + "step": 5125 + }, + { + "epoch": 0.540717299578059, + "grad_norm": 0.594897449016571, + "learning_rate": 0.0006651980616575522, + "loss": 1.5766, + "step": 5126 + }, + { + "epoch": 0.5408227848101266, + "grad_norm": 0.6758778691291809, + "learning_rate": 0.0006649486168158731, + "loss": 1.5896, + "step": 5127 + }, + { + "epoch": 0.5409282700421941, + "grad_norm": 0.6776182651519775, + "learning_rate": 0.0006646991815046254, + "loss": 1.5954, + "step": 5128 + }, + { + "epoch": 0.5410337552742616, + "grad_norm": 0.6052396893501282, + "learning_rate": 0.0006644497557517599, + "loss": 1.5983, + "step": 5129 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.7715256214141846, + "learning_rate": 0.0006642003395852258, + "loss": 1.5918, + "step": 5130 + }, + { + "epoch": 0.5412447257383967, + "grad_norm": 0.5821048021316528, + "learning_rate": 0.0006639509330329713, + "loss": 1.6221, + "step": 5131 + }, + { + "epoch": 0.5413502109704641, + "grad_norm": 0.7393077611923218, + "learning_rate": 0.0006637015361229438, + "loss": 1.6346, + "step": 5132 + }, + { + "epoch": 0.5414556962025316, + "grad_norm": 0.6289612054824829, + "learning_rate": 0.0006634521488830898, + "loss": 1.5867, + "step": 5133 + }, + { + "epoch": 0.5415611814345992, + "grad_norm": 0.6035498380661011, + "learning_rate": 0.0006632027713413541, + "loss": 1.605, + "step": 5134 + }, + { + "epoch": 0.5416666666666666, + "grad_norm": 0.6396929621696472, + "learning_rate": 0.0006629534035256805, + "loss": 1.5885, + "step": 5135 + }, + { + "epoch": 0.5417721518987342, + "grad_norm": 0.5579069256782532, + "learning_rate": 0.0006627040454640123, + "loss": 1.5915, + "step": 5136 + }, + { + "epoch": 0.5418776371308017, + "grad_norm": 0.6478556394577026, + "learning_rate": 0.0006624546971842909, + "loss": 1.615, + "step": 5137 + }, + { + "epoch": 0.5419831223628692, + "grad_norm": 0.6278925538063049, + "learning_rate": 0.0006622053587144572, + "loss": 1.602, + "step": 5138 + }, + { + "epoch": 0.5420886075949367, + "grad_norm": 0.5674294233322144, + "learning_rate": 0.0006619560300824507, + "loss": 1.6099, + "step": 5139 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.5917444825172424, + "learning_rate": 0.0006617067113162103, + "loss": 1.5955, + "step": 5140 + }, + { + "epoch": 0.5422995780590717, + "grad_norm": 0.6110427975654602, + "learning_rate": 0.0006614574024436732, + "loss": 1.5785, + "step": 5141 + }, + { + "epoch": 0.5424050632911392, + "grad_norm": 0.6854137182235718, + "learning_rate": 0.0006612081034927756, + "loss": 1.6283, + "step": 5142 + }, + { + "epoch": 0.5425105485232068, + "grad_norm": 0.6138169169425964, + "learning_rate": 0.0006609588144914528, + "loss": 1.6242, + "step": 5143 + }, + { + "epoch": 0.5426160337552742, + "grad_norm": 0.5798704028129578, + "learning_rate": 0.0006607095354676389, + "loss": 1.5915, + "step": 5144 + }, + { + "epoch": 0.5427215189873418, + "grad_norm": 0.5434279441833496, + "learning_rate": 0.0006604602664492667, + "loss": 1.6051, + "step": 5145 + }, + { + "epoch": 0.5428270042194093, + "grad_norm": 0.5707079172134399, + "learning_rate": 0.0006602110074642682, + "loss": 1.5867, + "step": 5146 + }, + { + "epoch": 0.5429324894514768, + "grad_norm": 0.6136588454246521, + "learning_rate": 0.000659961758540574, + "loss": 1.6244, + "step": 5147 + }, + { + "epoch": 0.5430379746835443, + "grad_norm": 0.5607332587242126, + "learning_rate": 0.0006597125197061133, + "loss": 1.5667, + "step": 5148 + }, + { + "epoch": 0.5431434599156119, + "grad_norm": 0.5725066661834717, + "learning_rate": 0.0006594632909888154, + "loss": 1.5978, + "step": 5149 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5763807892799377, + "learning_rate": 0.0006592140724166073, + "loss": 1.6218, + "step": 5150 + }, + { + "epoch": 0.5433544303797468, + "grad_norm": 0.6949591040611267, + "learning_rate": 0.000658964864017415, + "loss": 1.5704, + "step": 5151 + }, + { + "epoch": 0.5434599156118144, + "grad_norm": 0.6387679576873779, + "learning_rate": 0.0006587156658191635, + "loss": 1.627, + "step": 5152 + }, + { + "epoch": 0.5435654008438818, + "grad_norm": 0.7209502458572388, + "learning_rate": 0.0006584664778497771, + "loss": 1.5831, + "step": 5153 + }, + { + "epoch": 0.5436708860759494, + "grad_norm": 0.6273326873779297, + "learning_rate": 0.0006582173001371781, + "loss": 1.6023, + "step": 5154 + }, + { + "epoch": 0.5437763713080169, + "grad_norm": 0.6091082096099854, + "learning_rate": 0.0006579681327092883, + "loss": 1.6162, + "step": 5155 + }, + { + "epoch": 0.5438818565400844, + "grad_norm": 0.5217263102531433, + "learning_rate": 0.0006577189755940282, + "loss": 1.5576, + "step": 5156 + }, + { + "epoch": 0.5439873417721519, + "grad_norm": 0.537628710269928, + "learning_rate": 0.0006574698288193166, + "loss": 1.5905, + "step": 5157 + }, + { + "epoch": 0.5440928270042195, + "grad_norm": 0.575078010559082, + "learning_rate": 0.0006572206924130725, + "loss": 1.5819, + "step": 5158 + }, + { + "epoch": 0.5441983122362869, + "grad_norm": 0.5899105668067932, + "learning_rate": 0.0006569715664032124, + "loss": 1.6121, + "step": 5159 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.6504930853843689, + "learning_rate": 0.0006567224508176523, + "loss": 1.5968, + "step": 5160 + }, + { + "epoch": 0.544409282700422, + "grad_norm": 0.604494571685791, + "learning_rate": 0.0006564733456843067, + "loss": 1.6288, + "step": 5161 + }, + { + "epoch": 0.5445147679324894, + "grad_norm": 0.6265363693237305, + "learning_rate": 0.000656224251031089, + "loss": 1.6279, + "step": 5162 + }, + { + "epoch": 0.544620253164557, + "grad_norm": 0.640588641166687, + "learning_rate": 0.0006559751668859115, + "loss": 1.5643, + "step": 5163 + }, + { + "epoch": 0.5447257383966245, + "grad_norm": 0.6222333908081055, + "learning_rate": 0.0006557260932766855, + "loss": 1.5957, + "step": 5164 + }, + { + "epoch": 0.544831223628692, + "grad_norm": 0.6162852644920349, + "learning_rate": 0.0006554770302313205, + "loss": 1.6081, + "step": 5165 + }, + { + "epoch": 0.5449367088607595, + "grad_norm": 0.6747568249702454, + "learning_rate": 0.0006552279777777258, + "loss": 1.561, + "step": 5166 + }, + { + "epoch": 0.5450421940928271, + "grad_norm": 0.6299450397491455, + "learning_rate": 0.000654978935943809, + "loss": 1.6082, + "step": 5167 + }, + { + "epoch": 0.5451476793248945, + "grad_norm": 0.5254710912704468, + "learning_rate": 0.0006547299047574761, + "loss": 1.5906, + "step": 5168 + }, + { + "epoch": 0.545253164556962, + "grad_norm": 0.6647785902023315, + "learning_rate": 0.0006544808842466324, + "loss": 1.6246, + "step": 5169 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5635039210319519, + "learning_rate": 0.0006542318744391821, + "loss": 1.5425, + "step": 5170 + }, + { + "epoch": 0.545464135021097, + "grad_norm": 0.7174153327941895, + "learning_rate": 0.0006539828753630276, + "loss": 1.5766, + "step": 5171 + }, + { + "epoch": 0.5455696202531646, + "grad_norm": 0.7051339745521545, + "learning_rate": 0.0006537338870460708, + "loss": 1.5971, + "step": 5172 + }, + { + "epoch": 0.545675105485232, + "grad_norm": 0.624095618724823, + "learning_rate": 0.000653484909516212, + "loss": 1.6091, + "step": 5173 + }, + { + "epoch": 0.5457805907172996, + "grad_norm": 0.6316670179367065, + "learning_rate": 0.00065323594280135, + "loss": 1.6234, + "step": 5174 + }, + { + "epoch": 0.5458860759493671, + "grad_norm": 0.6557101607322693, + "learning_rate": 0.0006529869869293834, + "loss": 1.6052, + "step": 5175 + }, + { + "epoch": 0.5459915611814345, + "grad_norm": 0.7150468826293945, + "learning_rate": 0.0006527380419282088, + "loss": 1.5877, + "step": 5176 + }, + { + "epoch": 0.5460970464135021, + "grad_norm": 0.5839698314666748, + "learning_rate": 0.0006524891078257215, + "loss": 1.6268, + "step": 5177 + }, + { + "epoch": 0.5462025316455696, + "grad_norm": 0.6658345460891724, + "learning_rate": 0.000652240184649816, + "loss": 1.6505, + "step": 5178 + }, + { + "epoch": 0.5463080168776371, + "grad_norm": 0.7507173418998718, + "learning_rate": 0.0006519912724283851, + "loss": 1.5897, + "step": 5179 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.8336673378944397, + "learning_rate": 0.0006517423711893209, + "loss": 1.6093, + "step": 5180 + }, + { + "epoch": 0.5465189873417722, + "grad_norm": 0.6486624479293823, + "learning_rate": 0.000651493480960514, + "loss": 1.6013, + "step": 5181 + }, + { + "epoch": 0.5466244725738396, + "grad_norm": 0.7080000042915344, + "learning_rate": 0.0006512446017698537, + "loss": 1.5954, + "step": 5182 + }, + { + "epoch": 0.5467299578059072, + "grad_norm": 0.7093836665153503, + "learning_rate": 0.0006509957336452279, + "loss": 1.5751, + "step": 5183 + }, + { + "epoch": 0.5468354430379747, + "grad_norm": 0.6306095719337463, + "learning_rate": 0.0006507468766145242, + "loss": 1.6071, + "step": 5184 + }, + { + "epoch": 0.5469409282700421, + "grad_norm": 0.7644929885864258, + "learning_rate": 0.000650498030705628, + "loss": 1.5708, + "step": 5185 + }, + { + "epoch": 0.5470464135021097, + "grad_norm": 0.7609584927558899, + "learning_rate": 0.0006502491959464235, + "loss": 1.6271, + "step": 5186 + }, + { + "epoch": 0.5471518987341772, + "grad_norm": 0.6713290810585022, + "learning_rate": 0.000650000372364794, + "loss": 1.5788, + "step": 5187 + }, + { + "epoch": 0.5472573839662447, + "grad_norm": 0.8014529943466187, + "learning_rate": 0.0006497515599886214, + "loss": 1.5533, + "step": 5188 + }, + { + "epoch": 0.5473628691983122, + "grad_norm": 0.9118387699127197, + "learning_rate": 0.0006495027588457864, + "loss": 1.5993, + "step": 5189 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.5961146950721741, + "learning_rate": 0.0006492539689641685, + "loss": 1.5699, + "step": 5190 + }, + { + "epoch": 0.5475738396624472, + "grad_norm": 0.8469865918159485, + "learning_rate": 0.0006490051903716454, + "loss": 1.6057, + "step": 5191 + }, + { + "epoch": 0.5476793248945148, + "grad_norm": 0.6130828857421875, + "learning_rate": 0.0006487564230960944, + "loss": 1.6083, + "step": 5192 + }, + { + "epoch": 0.5477848101265823, + "grad_norm": 0.7785215377807617, + "learning_rate": 0.0006485076671653913, + "loss": 1.5849, + "step": 5193 + }, + { + "epoch": 0.5478902953586497, + "grad_norm": 0.7278326749801636, + "learning_rate": 0.00064825892260741, + "loss": 1.6107, + "step": 5194 + }, + { + "epoch": 0.5479957805907173, + "grad_norm": 0.6434265971183777, + "learning_rate": 0.0006480101894500239, + "loss": 1.5918, + "step": 5195 + }, + { + "epoch": 0.5481012658227848, + "grad_norm": 0.7892020344734192, + "learning_rate": 0.0006477614677211046, + "loss": 1.6189, + "step": 5196 + }, + { + "epoch": 0.5482067510548523, + "grad_norm": 0.6788495182991028, + "learning_rate": 0.0006475127574485226, + "loss": 1.6085, + "step": 5197 + }, + { + "epoch": 0.5483122362869198, + "grad_norm": 0.6472010612487793, + "learning_rate": 0.0006472640586601472, + "loss": 1.5748, + "step": 5198 + }, + { + "epoch": 0.5484177215189874, + "grad_norm": 0.6589930057525635, + "learning_rate": 0.0006470153713838463, + "loss": 1.5863, + "step": 5199 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.5829207301139832, + "learning_rate": 0.0006467666956474865, + "loss": 1.5874, + "step": 5200 + }, + { + "epoch": 0.5486286919831224, + "grad_norm": 0.8547044396400452, + "learning_rate": 0.0006465180314789332, + "loss": 1.5421, + "step": 5201 + }, + { + "epoch": 0.5487341772151899, + "grad_norm": 0.5662292242050171, + "learning_rate": 0.0006462693789060505, + "loss": 1.6044, + "step": 5202 + }, + { + "epoch": 0.5488396624472573, + "grad_norm": 0.7289818525314331, + "learning_rate": 0.0006460207379567011, + "loss": 1.5832, + "step": 5203 + }, + { + "epoch": 0.5489451476793249, + "grad_norm": 0.6469651460647583, + "learning_rate": 0.0006457721086587468, + "loss": 1.5838, + "step": 5204 + }, + { + "epoch": 0.5490506329113924, + "grad_norm": 0.6289713978767395, + "learning_rate": 0.0006455234910400472, + "loss": 1.5979, + "step": 5205 + }, + { + "epoch": 0.5491561181434599, + "grad_norm": 0.6011807918548584, + "learning_rate": 0.0006452748851284615, + "loss": 1.578, + "step": 5206 + }, + { + "epoch": 0.5492616033755274, + "grad_norm": 0.6146422028541565, + "learning_rate": 0.0006450262909518471, + "loss": 1.5604, + "step": 5207 + }, + { + "epoch": 0.549367088607595, + "grad_norm": 0.6351657509803772, + "learning_rate": 0.0006447777085380603, + "loss": 1.607, + "step": 5208 + }, + { + "epoch": 0.5494725738396624, + "grad_norm": 0.6123545169830322, + "learning_rate": 0.0006445291379149556, + "loss": 1.5948, + "step": 5209 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.6446181535720825, + "learning_rate": 0.0006442805791103873, + "loss": 1.5771, + "step": 5210 + }, + { + "epoch": 0.5496835443037975, + "grad_norm": 0.5485979318618774, + "learning_rate": 0.0006440320321522071, + "loss": 1.6089, + "step": 5211 + }, + { + "epoch": 0.549789029535865, + "grad_norm": 0.8688071370124817, + "learning_rate": 0.0006437834970682661, + "loss": 1.5489, + "step": 5212 + }, + { + "epoch": 0.5498945147679325, + "grad_norm": 0.7305094003677368, + "learning_rate": 0.000643534973886414, + "loss": 1.6269, + "step": 5213 + }, + { + "epoch": 0.55, + "grad_norm": 0.7029256224632263, + "learning_rate": 0.0006432864626344989, + "loss": 1.6098, + "step": 5214 + }, + { + "epoch": 0.5501054852320675, + "grad_norm": 0.7100976705551147, + "learning_rate": 0.0006430379633403679, + "loss": 1.6052, + "step": 5215 + }, + { + "epoch": 0.550210970464135, + "grad_norm": 0.6645053029060364, + "learning_rate": 0.0006427894760318664, + "loss": 1.6234, + "step": 5216 + }, + { + "epoch": 0.5503164556962026, + "grad_norm": 0.6340701580047607, + "learning_rate": 0.0006425410007368385, + "loss": 1.589, + "step": 5217 + }, + { + "epoch": 0.55042194092827, + "grad_norm": 0.6332590579986572, + "learning_rate": 0.0006422925374831275, + "loss": 1.5937, + "step": 5218 + }, + { + "epoch": 0.5505274261603376, + "grad_norm": 0.6125528812408447, + "learning_rate": 0.0006420440862985748, + "loss": 1.6183, + "step": 5219 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.7023836374282837, + "learning_rate": 0.0006417956472110205, + "loss": 1.5541, + "step": 5220 + }, + { + "epoch": 0.5507383966244725, + "grad_norm": 0.6267361044883728, + "learning_rate": 0.0006415472202483034, + "loss": 1.5762, + "step": 5221 + }, + { + "epoch": 0.5508438818565401, + "grad_norm": 0.5872868895530701, + "learning_rate": 0.0006412988054382611, + "loss": 1.5566, + "step": 5222 + }, + { + "epoch": 0.5509493670886076, + "grad_norm": 0.6211118698120117, + "learning_rate": 0.0006410504028087297, + "loss": 1.5588, + "step": 5223 + }, + { + "epoch": 0.5510548523206751, + "grad_norm": 0.6020947098731995, + "learning_rate": 0.000640802012387544, + "loss": 1.6097, + "step": 5224 + }, + { + "epoch": 0.5511603375527426, + "grad_norm": 0.5888354778289795, + "learning_rate": 0.0006405536342025374, + "loss": 1.5927, + "step": 5225 + }, + { + "epoch": 0.5512658227848102, + "grad_norm": 0.640885055065155, + "learning_rate": 0.0006403052682815415, + "loss": 1.5874, + "step": 5226 + }, + { + "epoch": 0.5513713080168776, + "grad_norm": 0.5705142617225647, + "learning_rate": 0.0006400569146523875, + "loss": 1.554, + "step": 5227 + }, + { + "epoch": 0.5514767932489452, + "grad_norm": 0.7152031660079956, + "learning_rate": 0.0006398085733429045, + "loss": 1.6278, + "step": 5228 + }, + { + "epoch": 0.5515822784810127, + "grad_norm": 0.5617630481719971, + "learning_rate": 0.0006395602443809203, + "loss": 1.5915, + "step": 5229 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.8012044429779053, + "learning_rate": 0.0006393119277942614, + "loss": 1.5984, + "step": 5230 + }, + { + "epoch": 0.5517932489451477, + "grad_norm": 0.5110200643539429, + "learning_rate": 0.0006390636236107528, + "loss": 1.6155, + "step": 5231 + }, + { + "epoch": 0.5518987341772152, + "grad_norm": 0.8277698755264282, + "learning_rate": 0.0006388153318582185, + "loss": 1.5842, + "step": 5232 + }, + { + "epoch": 0.5520042194092827, + "grad_norm": 0.5671764016151428, + "learning_rate": 0.0006385670525644806, + "loss": 1.5438, + "step": 5233 + }, + { + "epoch": 0.5521097046413502, + "grad_norm": 0.9168720841407776, + "learning_rate": 0.0006383187857573601, + "loss": 1.5936, + "step": 5234 + }, + { + "epoch": 0.5522151898734177, + "grad_norm": 0.5641317367553711, + "learning_rate": 0.0006380705314646765, + "loss": 1.5954, + "step": 5235 + }, + { + "epoch": 0.5523206751054852, + "grad_norm": 0.7661169171333313, + "learning_rate": 0.0006378222897142482, + "loss": 1.6054, + "step": 5236 + }, + { + "epoch": 0.5524261603375528, + "grad_norm": 0.6831728219985962, + "learning_rate": 0.0006375740605338916, + "loss": 1.5935, + "step": 5237 + }, + { + "epoch": 0.5525316455696202, + "grad_norm": 0.8587228059768677, + "learning_rate": 0.0006373258439514221, + "loss": 1.5828, + "step": 5238 + }, + { + "epoch": 0.5526371308016877, + "grad_norm": 0.6470775604248047, + "learning_rate": 0.0006370776399946536, + "loss": 1.6052, + "step": 5239 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6064849495887756, + "learning_rate": 0.0006368294486913987, + "loss": 1.6199, + "step": 5240 + }, + { + "epoch": 0.5528481012658227, + "grad_norm": 0.7431096434593201, + "learning_rate": 0.0006365812700694683, + "loss": 1.5994, + "step": 5241 + }, + { + "epoch": 0.5529535864978903, + "grad_norm": 0.5872042775154114, + "learning_rate": 0.0006363331041566723, + "loss": 1.5794, + "step": 5242 + }, + { + "epoch": 0.5530590717299578, + "grad_norm": 0.7882615327835083, + "learning_rate": 0.0006360849509808184, + "loss": 1.5695, + "step": 5243 + }, + { + "epoch": 0.5531645569620253, + "grad_norm": 0.5897212624549866, + "learning_rate": 0.0006358368105697142, + "loss": 1.5634, + "step": 5244 + }, + { + "epoch": 0.5532700421940928, + "grad_norm": 0.6477144956588745, + "learning_rate": 0.0006355886829511645, + "loss": 1.5923, + "step": 5245 + }, + { + "epoch": 0.5533755274261604, + "grad_norm": 0.6309342980384827, + "learning_rate": 0.0006353405681529734, + "loss": 1.5944, + "step": 5246 + }, + { + "epoch": 0.5534810126582278, + "grad_norm": 0.6212407350540161, + "learning_rate": 0.0006350924662029433, + "loss": 1.588, + "step": 5247 + }, + { + "epoch": 0.5535864978902953, + "grad_norm": 0.7983561754226685, + "learning_rate": 0.0006348443771288755, + "loss": 1.6019, + "step": 5248 + }, + { + "epoch": 0.5536919831223629, + "grad_norm": 0.8172224164009094, + "learning_rate": 0.0006345963009585694, + "loss": 1.5684, + "step": 5249 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.9332393407821655, + "learning_rate": 0.0006343482377198232, + "loss": 1.6143, + "step": 5250 + }, + { + "epoch": 0.5539029535864979, + "grad_norm": 0.760391891002655, + "learning_rate": 0.0006341001874404335, + "loss": 1.6144, + "step": 5251 + }, + { + "epoch": 0.5540084388185654, + "grad_norm": 0.6221593022346497, + "learning_rate": 0.0006338521501481957, + "loss": 1.5806, + "step": 5252 + }, + { + "epoch": 0.5541139240506329, + "grad_norm": 0.8472188115119934, + "learning_rate": 0.0006336041258709039, + "loss": 1.5866, + "step": 5253 + }, + { + "epoch": 0.5542194092827004, + "grad_norm": 0.6060925126075745, + "learning_rate": 0.0006333561146363502, + "loss": 1.6091, + "step": 5254 + }, + { + "epoch": 0.554324894514768, + "grad_norm": 0.8474965691566467, + "learning_rate": 0.0006331081164723253, + "loss": 1.6, + "step": 5255 + }, + { + "epoch": 0.5544303797468354, + "grad_norm": 0.7075764536857605, + "learning_rate": 0.000632860131406619, + "loss": 1.5567, + "step": 5256 + }, + { + "epoch": 0.554535864978903, + "grad_norm": 0.7350090742111206, + "learning_rate": 0.0006326121594670191, + "loss": 1.6045, + "step": 5257 + }, + { + "epoch": 0.5546413502109705, + "grad_norm": 0.62300705909729, + "learning_rate": 0.000632364200681312, + "loss": 1.5936, + "step": 5258 + }, + { + "epoch": 0.5547468354430379, + "grad_norm": 0.7969799041748047, + "learning_rate": 0.0006321162550772829, + "loss": 1.6107, + "step": 5259 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.7122313380241394, + "learning_rate": 0.0006318683226827151, + "loss": 1.568, + "step": 5260 + }, + { + "epoch": 0.554957805907173, + "grad_norm": 0.7852382063865662, + "learning_rate": 0.0006316204035253906, + "loss": 1.6001, + "step": 5261 + }, + { + "epoch": 0.5550632911392405, + "grad_norm": 0.7615951299667358, + "learning_rate": 0.0006313724976330904, + "loss": 1.5996, + "step": 5262 + }, + { + "epoch": 0.555168776371308, + "grad_norm": 0.6798845529556274, + "learning_rate": 0.0006311246050335934, + "loss": 1.5994, + "step": 5263 + }, + { + "epoch": 0.5552742616033756, + "grad_norm": 0.6830047965049744, + "learning_rate": 0.0006308767257546772, + "loss": 1.5766, + "step": 5264 + }, + { + "epoch": 0.555379746835443, + "grad_norm": 0.8239656686782837, + "learning_rate": 0.0006306288598241179, + "loss": 1.5911, + "step": 5265 + }, + { + "epoch": 0.5554852320675105, + "grad_norm": 0.6542679071426392, + "learning_rate": 0.00063038100726969, + "loss": 1.592, + "step": 5266 + }, + { + "epoch": 0.5555907172995781, + "grad_norm": 0.9193620681762695, + "learning_rate": 0.0006301331681191668, + "loss": 1.6057, + "step": 5267 + }, + { + "epoch": 0.5556962025316455, + "grad_norm": 0.6901242733001709, + "learning_rate": 0.0006298853424003199, + "loss": 1.6127, + "step": 5268 + }, + { + "epoch": 0.5558016877637131, + "grad_norm": 0.7317206859588623, + "learning_rate": 0.0006296375301409187, + "loss": 1.6097, + "step": 5269 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.6622313857078552, + "learning_rate": 0.0006293897313687331, + "loss": 1.6014, + "step": 5270 + }, + { + "epoch": 0.5560126582278481, + "grad_norm": 0.7744805812835693, + "learning_rate": 0.0006291419461115293, + "loss": 1.5975, + "step": 5271 + }, + { + "epoch": 0.5561181434599156, + "grad_norm": 0.6122986078262329, + "learning_rate": 0.0006288941743970732, + "loss": 1.6131, + "step": 5272 + }, + { + "epoch": 0.5562236286919832, + "grad_norm": 0.7169475555419922, + "learning_rate": 0.0006286464162531287, + "loss": 1.5548, + "step": 5273 + }, + { + "epoch": 0.5563291139240506, + "grad_norm": 0.7342836856842041, + "learning_rate": 0.0006283986717074585, + "loss": 1.5583, + "step": 5274 + }, + { + "epoch": 0.5564345991561181, + "grad_norm": 0.6343541145324707, + "learning_rate": 0.0006281509407878232, + "loss": 1.5748, + "step": 5275 + }, + { + "epoch": 0.5565400843881857, + "grad_norm": 0.7954130172729492, + "learning_rate": 0.0006279032235219829, + "loss": 1.6087, + "step": 5276 + }, + { + "epoch": 0.5566455696202531, + "grad_norm": 0.5829187631607056, + "learning_rate": 0.0006276555199376951, + "loss": 1.6079, + "step": 5277 + }, + { + "epoch": 0.5567510548523207, + "grad_norm": 0.6037096977233887, + "learning_rate": 0.000627407830062716, + "loss": 1.5827, + "step": 5278 + }, + { + "epoch": 0.5568565400843882, + "grad_norm": 0.7449883222579956, + "learning_rate": 0.0006271601539248012, + "loss": 1.6022, + "step": 5279 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.5881402492523193, + "learning_rate": 0.0006269124915517037, + "loss": 1.5771, + "step": 5280 + }, + { + "epoch": 0.5570675105485232, + "grad_norm": 0.6452946662902832, + "learning_rate": 0.0006266648429711753, + "loss": 1.6004, + "step": 5281 + }, + { + "epoch": 0.5571729957805908, + "grad_norm": 0.6720901131629944, + "learning_rate": 0.0006264172082109661, + "loss": 1.5854, + "step": 5282 + }, + { + "epoch": 0.5572784810126582, + "grad_norm": 0.6479543447494507, + "learning_rate": 0.0006261695872988252, + "loss": 1.5654, + "step": 5283 + }, + { + "epoch": 0.5573839662447257, + "grad_norm": 0.7382979393005371, + "learning_rate": 0.0006259219802624994, + "loss": 1.5996, + "step": 5284 + }, + { + "epoch": 0.5574894514767933, + "grad_norm": 0.6831662654876709, + "learning_rate": 0.0006256743871297344, + "loss": 1.583, + "step": 5285 + }, + { + "epoch": 0.5575949367088607, + "grad_norm": 0.6224706768989563, + "learning_rate": 0.0006254268079282743, + "loss": 1.5827, + "step": 5286 + }, + { + "epoch": 0.5577004219409283, + "grad_norm": 0.6555694937705994, + "learning_rate": 0.0006251792426858612, + "loss": 1.5692, + "step": 5287 + }, + { + "epoch": 0.5578059071729958, + "grad_norm": 0.7428411841392517, + "learning_rate": 0.0006249316914302368, + "loss": 1.5931, + "step": 5288 + }, + { + "epoch": 0.5579113924050633, + "grad_norm": 0.7283396124839783, + "learning_rate": 0.0006246841541891399, + "loss": 1.6111, + "step": 5289 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.6598605513572693, + "learning_rate": 0.0006244366309903084, + "loss": 1.6217, + "step": 5290 + }, + { + "epoch": 0.5581223628691984, + "grad_norm": 0.6769275069236755, + "learning_rate": 0.0006241891218614786, + "loss": 1.6074, + "step": 5291 + }, + { + "epoch": 0.5582278481012658, + "grad_norm": 0.69774329662323, + "learning_rate": 0.0006239416268303849, + "loss": 1.5684, + "step": 5292 + }, + { + "epoch": 0.5583333333333333, + "grad_norm": 0.817751407623291, + "learning_rate": 0.0006236941459247606, + "loss": 1.5906, + "step": 5293 + }, + { + "epoch": 0.5584388185654009, + "grad_norm": 0.6947160363197327, + "learning_rate": 0.0006234466791723371, + "loss": 1.6032, + "step": 5294 + }, + { + "epoch": 0.5585443037974683, + "grad_norm": 0.657581627368927, + "learning_rate": 0.0006231992266008438, + "loss": 1.5832, + "step": 5295 + }, + { + "epoch": 0.5586497890295359, + "grad_norm": 0.6676764488220215, + "learning_rate": 0.00062295178823801, + "loss": 1.6193, + "step": 5296 + }, + { + "epoch": 0.5587552742616034, + "grad_norm": 0.6531654596328735, + "learning_rate": 0.0006227043641115616, + "loss": 1.5759, + "step": 5297 + }, + { + "epoch": 0.5588607594936709, + "grad_norm": 0.6520283818244934, + "learning_rate": 0.0006224569542492241, + "loss": 1.5731, + "step": 5298 + }, + { + "epoch": 0.5589662447257384, + "grad_norm": 0.5986793041229248, + "learning_rate": 0.0006222095586787208, + "loss": 1.6222, + "step": 5299 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.6624261736869812, + "learning_rate": 0.0006219621774277737, + "loss": 1.616, + "step": 5300 + }, + { + "epoch": 0.5591772151898734, + "grad_norm": 0.5853669047355652, + "learning_rate": 0.000621714810524103, + "loss": 1.572, + "step": 5301 + }, + { + "epoch": 0.559282700421941, + "grad_norm": 0.7326205372810364, + "learning_rate": 0.0006214674579954276, + "loss": 1.5869, + "step": 5302 + }, + { + "epoch": 0.5593881856540084, + "grad_norm": 0.7958455681800842, + "learning_rate": 0.0006212201198694643, + "loss": 1.6049, + "step": 5303 + }, + { + "epoch": 0.5594936708860759, + "grad_norm": 0.5798230767250061, + "learning_rate": 0.0006209727961739286, + "loss": 1.5796, + "step": 5304 + }, + { + "epoch": 0.5595991561181435, + "grad_norm": 0.8374904990196228, + "learning_rate": 0.0006207254869365346, + "loss": 1.6119, + "step": 5305 + }, + { + "epoch": 0.5597046413502109, + "grad_norm": 0.5690895318984985, + "learning_rate": 0.0006204781921849945, + "loss": 1.6061, + "step": 5306 + }, + { + "epoch": 0.5598101265822785, + "grad_norm": 0.7166079878807068, + "learning_rate": 0.0006202309119470188, + "loss": 1.5843, + "step": 5307 + }, + { + "epoch": 0.559915611814346, + "grad_norm": 0.6454137563705444, + "learning_rate": 0.0006199836462503166, + "loss": 1.5419, + "step": 5308 + }, + { + "epoch": 0.5600210970464135, + "grad_norm": 0.8374615907669067, + "learning_rate": 0.0006197363951225951, + "loss": 1.5802, + "step": 5309 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.6458372473716736, + "learning_rate": 0.00061948915859156, + "loss": 1.5762, + "step": 5310 + }, + { + "epoch": 0.5602320675105485, + "grad_norm": 0.6325492262840271, + "learning_rate": 0.0006192419366849155, + "loss": 1.5872, + "step": 5311 + }, + { + "epoch": 0.560337552742616, + "grad_norm": 0.6869396567344666, + "learning_rate": 0.0006189947294303641, + "loss": 1.5725, + "step": 5312 + }, + { + "epoch": 0.5604430379746835, + "grad_norm": 0.604071319103241, + "learning_rate": 0.000618747536855606, + "loss": 1.571, + "step": 5313 + }, + { + "epoch": 0.5605485232067511, + "grad_norm": 0.6286689639091492, + "learning_rate": 0.0006185003589883413, + "loss": 1.5567, + "step": 5314 + }, + { + "epoch": 0.5606540084388185, + "grad_norm": 0.6228803396224976, + "learning_rate": 0.0006182531958562672, + "loss": 1.5794, + "step": 5315 + }, + { + "epoch": 0.5607594936708861, + "grad_norm": 0.5892757177352905, + "learning_rate": 0.0006180060474870793, + "loss": 1.5997, + "step": 5316 + }, + { + "epoch": 0.5608649789029536, + "grad_norm": 0.6440526843070984, + "learning_rate": 0.0006177589139084721, + "loss": 1.5469, + "step": 5317 + }, + { + "epoch": 0.560970464135021, + "grad_norm": 0.6137335300445557, + "learning_rate": 0.000617511795148138, + "loss": 1.6181, + "step": 5318 + }, + { + "epoch": 0.5610759493670886, + "grad_norm": 0.6774730086326599, + "learning_rate": 0.0006172646912337678, + "loss": 1.5856, + "step": 5319 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.5152369737625122, + "learning_rate": 0.0006170176021930509, + "loss": 1.5841, + "step": 5320 + }, + { + "epoch": 0.5612869198312236, + "grad_norm": 0.5927020311355591, + "learning_rate": 0.0006167705280536745, + "loss": 1.6184, + "step": 5321 + }, + { + "epoch": 0.5613924050632911, + "grad_norm": 0.6227037310600281, + "learning_rate": 0.000616523468843325, + "loss": 1.6101, + "step": 5322 + }, + { + "epoch": 0.5614978902953587, + "grad_norm": 0.6050466895103455, + "learning_rate": 0.0006162764245896863, + "loss": 1.5974, + "step": 5323 + }, + { + "epoch": 0.5616033755274261, + "grad_norm": 0.6857571601867676, + "learning_rate": 0.0006160293953204412, + "loss": 1.5922, + "step": 5324 + }, + { + "epoch": 0.5617088607594937, + "grad_norm": 0.7192785739898682, + "learning_rate": 0.0006157823810632704, + "loss": 1.5737, + "step": 5325 + }, + { + "epoch": 0.5618143459915612, + "grad_norm": 0.5456132292747498, + "learning_rate": 0.000615535381845853, + "loss": 1.5883, + "step": 5326 + }, + { + "epoch": 0.5619198312236287, + "grad_norm": 0.7095822691917419, + "learning_rate": 0.0006152883976958665, + "loss": 1.5642, + "step": 5327 + }, + { + "epoch": 0.5620253164556962, + "grad_norm": 0.588423490524292, + "learning_rate": 0.0006150414286409869, + "loss": 1.5582, + "step": 5328 + }, + { + "epoch": 0.5621308016877637, + "grad_norm": 0.9368204474449158, + "learning_rate": 0.0006147944747088881, + "loss": 1.5958, + "step": 5329 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.5711838006973267, + "learning_rate": 0.0006145475359272424, + "loss": 1.5969, + "step": 5330 + }, + { + "epoch": 0.5623417721518987, + "grad_norm": 0.8756574392318726, + "learning_rate": 0.0006143006123237208, + "loss": 1.5908, + "step": 5331 + }, + { + "epoch": 0.5624472573839663, + "grad_norm": 0.5952726602554321, + "learning_rate": 0.0006140537039259925, + "loss": 1.5888, + "step": 5332 + }, + { + "epoch": 0.5625527426160337, + "grad_norm": 0.7922720313072205, + "learning_rate": 0.0006138068107617244, + "loss": 1.5554, + "step": 5333 + }, + { + "epoch": 0.5626582278481013, + "grad_norm": 0.6130345463752747, + "learning_rate": 0.0006135599328585824, + "loss": 1.5964, + "step": 5334 + }, + { + "epoch": 0.5627637130801688, + "grad_norm": 0.681977391242981, + "learning_rate": 0.0006133130702442302, + "loss": 1.5925, + "step": 5335 + }, + { + "epoch": 0.5628691983122363, + "grad_norm": 0.7623736262321472, + "learning_rate": 0.0006130662229463301, + "loss": 1.6002, + "step": 5336 + }, + { + "epoch": 0.5629746835443038, + "grad_norm": 0.7927847504615784, + "learning_rate": 0.0006128193909925425, + "loss": 1.5661, + "step": 5337 + }, + { + "epoch": 0.5630801687763713, + "grad_norm": 0.6150054335594177, + "learning_rate": 0.0006125725744105263, + "loss": 1.5739, + "step": 5338 + }, + { + "epoch": 0.5631856540084388, + "grad_norm": 0.5738579034805298, + "learning_rate": 0.000612325773227938, + "loss": 1.5984, + "step": 5339 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.6585860252380371, + "learning_rate": 0.0006120789874724336, + "loss": 1.5984, + "step": 5340 + }, + { + "epoch": 0.5633966244725739, + "grad_norm": 0.6496714353561401, + "learning_rate": 0.0006118322171716665, + "loss": 1.6367, + "step": 5341 + }, + { + "epoch": 0.5635021097046413, + "grad_norm": 0.674777626991272, + "learning_rate": 0.0006115854623532884, + "loss": 1.5809, + "step": 5342 + }, + { + "epoch": 0.5636075949367089, + "grad_norm": 0.6029003858566284, + "learning_rate": 0.0006113387230449493, + "loss": 1.5993, + "step": 5343 + }, + { + "epoch": 0.5637130801687764, + "grad_norm": 0.6382285356521606, + "learning_rate": 0.0006110919992742978, + "loss": 1.5649, + "step": 5344 + }, + { + "epoch": 0.5638185654008439, + "grad_norm": 0.6706044673919678, + "learning_rate": 0.0006108452910689804, + "loss": 1.599, + "step": 5345 + }, + { + "epoch": 0.5639240506329114, + "grad_norm": 0.6212074160575867, + "learning_rate": 0.0006105985984566421, + "loss": 1.5692, + "step": 5346 + }, + { + "epoch": 0.564029535864979, + "grad_norm": 0.7880715131759644, + "learning_rate": 0.0006103519214649256, + "loss": 1.5333, + "step": 5347 + }, + { + "epoch": 0.5641350210970464, + "grad_norm": 0.6199901700019836, + "learning_rate": 0.000610105260121473, + "loss": 1.5681, + "step": 5348 + }, + { + "epoch": 0.5642405063291139, + "grad_norm": 0.674527645111084, + "learning_rate": 0.0006098586144539235, + "loss": 1.589, + "step": 5349 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.712820827960968, + "learning_rate": 0.0006096119844899151, + "loss": 1.5585, + "step": 5350 + }, + { + "epoch": 0.5644514767932489, + "grad_norm": 0.600493311882019, + "learning_rate": 0.000609365370257084, + "loss": 1.5695, + "step": 5351 + }, + { + "epoch": 0.5645569620253165, + "grad_norm": 0.8788906931877136, + "learning_rate": 0.0006091187717830643, + "loss": 1.5792, + "step": 5352 + }, + { + "epoch": 0.564662447257384, + "grad_norm": 0.657202422618866, + "learning_rate": 0.0006088721890954887, + "loss": 1.5917, + "step": 5353 + }, + { + "epoch": 0.5647679324894515, + "grad_norm": 0.7577962875366211, + "learning_rate": 0.0006086256222219881, + "loss": 1.5926, + "step": 5354 + }, + { + "epoch": 0.564873417721519, + "grad_norm": 0.6334778666496277, + "learning_rate": 0.0006083790711901915, + "loss": 1.5964, + "step": 5355 + }, + { + "epoch": 0.5649789029535865, + "grad_norm": 0.7540385723114014, + "learning_rate": 0.0006081325360277257, + "loss": 1.5577, + "step": 5356 + }, + { + "epoch": 0.565084388185654, + "grad_norm": 0.6137079000473022, + "learning_rate": 0.0006078860167622171, + "loss": 1.5834, + "step": 5357 + }, + { + "epoch": 0.5651898734177215, + "grad_norm": 0.6807109117507935, + "learning_rate": 0.000607639513421289, + "loss": 1.5885, + "step": 5358 + }, + { + "epoch": 0.5652953586497891, + "grad_norm": 0.6351199746131897, + "learning_rate": 0.0006073930260325632, + "loss": 1.5963, + "step": 5359 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.71797776222229, + "learning_rate": 0.0006071465546236601, + "loss": 1.6261, + "step": 5360 + }, + { + "epoch": 0.5655063291139241, + "grad_norm": 0.538316011428833, + "learning_rate": 0.0006069000992221977, + "loss": 1.6025, + "step": 5361 + }, + { + "epoch": 0.5656118143459916, + "grad_norm": 0.6127990484237671, + "learning_rate": 0.0006066536598557927, + "loss": 1.5932, + "step": 5362 + }, + { + "epoch": 0.565717299578059, + "grad_norm": 0.6229443550109863, + "learning_rate": 0.0006064072365520601, + "loss": 1.6539, + "step": 5363 + }, + { + "epoch": 0.5658227848101266, + "grad_norm": 0.5557300448417664, + "learning_rate": 0.0006061608293386126, + "loss": 1.5781, + "step": 5364 + }, + { + "epoch": 0.5659282700421941, + "grad_norm": 0.7850141525268555, + "learning_rate": 0.0006059144382430612, + "loss": 1.5776, + "step": 5365 + }, + { + "epoch": 0.5660337552742616, + "grad_norm": 0.5792469382286072, + "learning_rate": 0.0006056680632930154, + "loss": 1.5954, + "step": 5366 + }, + { + "epoch": 0.5661392405063291, + "grad_norm": 0.6791990995407104, + "learning_rate": 0.0006054217045160831, + "loss": 1.5812, + "step": 5367 + }, + { + "epoch": 0.5662447257383966, + "grad_norm": 0.6727703809738159, + "learning_rate": 0.0006051753619398697, + "loss": 1.5757, + "step": 5368 + }, + { + "epoch": 0.5663502109704641, + "grad_norm": 0.5394134521484375, + "learning_rate": 0.0006049290355919792, + "loss": 1.5385, + "step": 5369 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.653416633605957, + "learning_rate": 0.0006046827255000135, + "loss": 1.5908, + "step": 5370 + }, + { + "epoch": 0.5665611814345991, + "grad_norm": 0.7035013437271118, + "learning_rate": 0.0006044364316915733, + "loss": 1.5963, + "step": 5371 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 0.7167463302612305, + "learning_rate": 0.0006041901541942565, + "loss": 1.5872, + "step": 5372 + }, + { + "epoch": 0.5667721518987342, + "grad_norm": 0.6173297166824341, + "learning_rate": 0.0006039438930356601, + "loss": 1.5605, + "step": 5373 + }, + { + "epoch": 0.5668776371308016, + "grad_norm": 0.7174326181411743, + "learning_rate": 0.0006036976482433787, + "loss": 1.5992, + "step": 5374 + }, + { + "epoch": 0.5669831223628692, + "grad_norm": 0.6239550709724426, + "learning_rate": 0.0006034514198450053, + "loss": 1.5774, + "step": 5375 + }, + { + "epoch": 0.5670886075949367, + "grad_norm": 0.7517046928405762, + "learning_rate": 0.0006032052078681312, + "loss": 1.5496, + "step": 5376 + }, + { + "epoch": 0.5671940928270042, + "grad_norm": 0.614578127861023, + "learning_rate": 0.0006029590123403456, + "loss": 1.6294, + "step": 5377 + }, + { + "epoch": 0.5672995780590717, + "grad_norm": 0.8070013523101807, + "learning_rate": 0.0006027128332892358, + "loss": 1.5322, + "step": 5378 + }, + { + "epoch": 0.5674050632911393, + "grad_norm": 0.6100845336914062, + "learning_rate": 0.0006024666707423875, + "loss": 1.5894, + "step": 5379 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.6510815620422363, + "learning_rate": 0.0006022205247273845, + "loss": 1.5754, + "step": 5380 + }, + { + "epoch": 0.5676160337552743, + "grad_norm": 0.5586113333702087, + "learning_rate": 0.0006019743952718085, + "loss": 1.5461, + "step": 5381 + }, + { + "epoch": 0.5677215189873418, + "grad_norm": 0.6524059176445007, + "learning_rate": 0.0006017282824032394, + "loss": 1.5447, + "step": 5382 + }, + { + "epoch": 0.5678270042194092, + "grad_norm": 0.5883893370628357, + "learning_rate": 0.0006014821861492559, + "loss": 1.5865, + "step": 5383 + }, + { + "epoch": 0.5679324894514768, + "grad_norm": 0.6300042867660522, + "learning_rate": 0.0006012361065374339, + "loss": 1.5853, + "step": 5384 + }, + { + "epoch": 0.5680379746835443, + "grad_norm": 0.7221118807792664, + "learning_rate": 0.0006009900435953478, + "loss": 1.5809, + "step": 5385 + }, + { + "epoch": 0.5681434599156118, + "grad_norm": 0.6250579953193665, + "learning_rate": 0.0006007439973505707, + "loss": 1.5978, + "step": 5386 + }, + { + "epoch": 0.5682489451476793, + "grad_norm": 0.7423145771026611, + "learning_rate": 0.0006004979678306729, + "loss": 1.5902, + "step": 5387 + }, + { + "epoch": 0.5683544303797469, + "grad_norm": 0.6642302870750427, + "learning_rate": 0.0006002519550632232, + "loss": 1.5797, + "step": 5388 + }, + { + "epoch": 0.5684599156118143, + "grad_norm": 0.5913203358650208, + "learning_rate": 0.0006000059590757886, + "loss": 1.5862, + "step": 5389 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.7250692844390869, + "learning_rate": 0.0005997599798959343, + "loss": 1.5902, + "step": 5390 + }, + { + "epoch": 0.5686708860759494, + "grad_norm": 0.5903068780899048, + "learning_rate": 0.0005995140175512233, + "loss": 1.5693, + "step": 5391 + }, + { + "epoch": 0.5687763713080168, + "grad_norm": 0.634555459022522, + "learning_rate": 0.000599268072069217, + "loss": 1.5799, + "step": 5392 + }, + { + "epoch": 0.5688818565400844, + "grad_norm": 0.6593945026397705, + "learning_rate": 0.0005990221434774751, + "loss": 1.6018, + "step": 5393 + }, + { + "epoch": 0.5689873417721519, + "grad_norm": 0.5477491617202759, + "learning_rate": 0.0005987762318035546, + "loss": 1.5849, + "step": 5394 + }, + { + "epoch": 0.5690928270042194, + "grad_norm": 0.6788293719291687, + "learning_rate": 0.0005985303370750115, + "loss": 1.568, + "step": 5395 + }, + { + "epoch": 0.5691983122362869, + "grad_norm": 0.6094345450401306, + "learning_rate": 0.0005982844593193995, + "loss": 1.5797, + "step": 5396 + }, + { + "epoch": 0.5693037974683545, + "grad_norm": 0.7108115553855896, + "learning_rate": 0.0005980385985642703, + "loss": 1.6148, + "step": 5397 + }, + { + "epoch": 0.5694092827004219, + "grad_norm": 0.6904873251914978, + "learning_rate": 0.000597792754837174, + "loss": 1.5892, + "step": 5398 + }, + { + "epoch": 0.5695147679324895, + "grad_norm": 0.7852742671966553, + "learning_rate": 0.0005975469281656581, + "loss": 1.5591, + "step": 5399 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.6934173107147217, + "learning_rate": 0.0005973011185772694, + "loss": 1.5621, + "step": 5400 + }, + { + "epoch": 0.5697257383966244, + "grad_norm": 0.6990570425987244, + "learning_rate": 0.0005970553260995517, + "loss": 1.5843, + "step": 5401 + }, + { + "epoch": 0.569831223628692, + "grad_norm": 0.7326129674911499, + "learning_rate": 0.0005968095507600476, + "loss": 1.5877, + "step": 5402 + }, + { + "epoch": 0.5699367088607595, + "grad_norm": 0.6155556440353394, + "learning_rate": 0.000596563792586297, + "loss": 1.603, + "step": 5403 + }, + { + "epoch": 0.570042194092827, + "grad_norm": 0.7219456434249878, + "learning_rate": 0.0005963180516058386, + "loss": 1.63, + "step": 5404 + }, + { + "epoch": 0.5701476793248945, + "grad_norm": 0.6550527811050415, + "learning_rate": 0.0005960723278462086, + "loss": 1.5692, + "step": 5405 + }, + { + "epoch": 0.5702531645569621, + "grad_norm": 0.6147754192352295, + "learning_rate": 0.0005958266213349422, + "loss": 1.5541, + "step": 5406 + }, + { + "epoch": 0.5703586497890295, + "grad_norm": 0.6852133870124817, + "learning_rate": 0.0005955809320995714, + "loss": 1.5799, + "step": 5407 + }, + { + "epoch": 0.570464135021097, + "grad_norm": 0.657050371170044, + "learning_rate": 0.0005953352601676272, + "loss": 1.5851, + "step": 5408 + }, + { + "epoch": 0.5705696202531646, + "grad_norm": 0.7173314690589905, + "learning_rate": 0.0005950896055666384, + "loss": 1.5988, + "step": 5409 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.5998842716217041, + "learning_rate": 0.0005948439683241318, + "loss": 1.5576, + "step": 5410 + }, + { + "epoch": 0.5707805907172996, + "grad_norm": 0.626063346862793, + "learning_rate": 0.0005945983484676321, + "loss": 1.58, + "step": 5411 + }, + { + "epoch": 0.5708860759493671, + "grad_norm": 0.6835116744041443, + "learning_rate": 0.0005943527460246625, + "loss": 1.5893, + "step": 5412 + }, + { + "epoch": 0.5709915611814346, + "grad_norm": 0.6289712190628052, + "learning_rate": 0.0005941071610227437, + "loss": 1.5988, + "step": 5413 + }, + { + "epoch": 0.5710970464135021, + "grad_norm": 0.822025716304779, + "learning_rate": 0.000593861593489395, + "loss": 1.5308, + "step": 5414 + }, + { + "epoch": 0.5712025316455697, + "grad_norm": 0.7364919781684875, + "learning_rate": 0.000593616043452133, + "loss": 1.6132, + "step": 5415 + }, + { + "epoch": 0.5713080168776371, + "grad_norm": 0.7145334482192993, + "learning_rate": 0.0005933705109384735, + "loss": 1.5675, + "step": 5416 + }, + { + "epoch": 0.5714135021097047, + "grad_norm": 0.6712886095046997, + "learning_rate": 0.000593124995975929, + "loss": 1.5917, + "step": 5417 + }, + { + "epoch": 0.5715189873417722, + "grad_norm": 0.791419506072998, + "learning_rate": 0.000592879498592011, + "loss": 1.5879, + "step": 5418 + }, + { + "epoch": 0.5716244725738396, + "grad_norm": 0.7012849450111389, + "learning_rate": 0.0005926340188142289, + "loss": 1.5551, + "step": 5419 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.7183247804641724, + "learning_rate": 0.0005923885566700896, + "loss": 1.5684, + "step": 5420 + }, + { + "epoch": 0.5718354430379747, + "grad_norm": 0.7501686215400696, + "learning_rate": 0.0005921431121870984, + "loss": 1.6161, + "step": 5421 + }, + { + "epoch": 0.5719409282700422, + "grad_norm": 0.6897193193435669, + "learning_rate": 0.0005918976853927586, + "loss": 1.5904, + "step": 5422 + }, + { + "epoch": 0.5720464135021097, + "grad_norm": 0.6850428581237793, + "learning_rate": 0.0005916522763145715, + "loss": 1.5468, + "step": 5423 + }, + { + "epoch": 0.5721518987341773, + "grad_norm": 0.7243770956993103, + "learning_rate": 0.0005914068849800365, + "loss": 1.5519, + "step": 5424 + }, + { + "epoch": 0.5722573839662447, + "grad_norm": 0.7238306403160095, + "learning_rate": 0.0005911615114166508, + "loss": 1.5806, + "step": 5425 + }, + { + "epoch": 0.5723628691983123, + "grad_norm": 0.6865734457969666, + "learning_rate": 0.0005909161556519096, + "loss": 1.5922, + "step": 5426 + }, + { + "epoch": 0.5724683544303798, + "grad_norm": 0.6248385310173035, + "learning_rate": 0.0005906708177133066, + "loss": 1.5909, + "step": 5427 + }, + { + "epoch": 0.5725738396624472, + "grad_norm": 0.6359636187553406, + "learning_rate": 0.0005904254976283331, + "loss": 1.5757, + "step": 5428 + }, + { + "epoch": 0.5726793248945148, + "grad_norm": 0.5754595994949341, + "learning_rate": 0.0005901801954244782, + "loss": 1.5726, + "step": 5429 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.594554603099823, + "learning_rate": 0.0005899349111292293, + "loss": 1.5561, + "step": 5430 + }, + { + "epoch": 0.5728902953586498, + "grad_norm": 0.6463928818702698, + "learning_rate": 0.0005896896447700718, + "loss": 1.6025, + "step": 5431 + }, + { + "epoch": 0.5729957805907173, + "grad_norm": 0.6167823076248169, + "learning_rate": 0.0005894443963744891, + "loss": 1.5626, + "step": 5432 + }, + { + "epoch": 0.5731012658227848, + "grad_norm": 0.7070375680923462, + "learning_rate": 0.0005891991659699622, + "loss": 1.5473, + "step": 5433 + }, + { + "epoch": 0.5732067510548523, + "grad_norm": 0.6352397203445435, + "learning_rate": 0.0005889539535839704, + "loss": 1.6214, + "step": 5434 + }, + { + "epoch": 0.5733122362869199, + "grad_norm": 0.6967485547065735, + "learning_rate": 0.0005887087592439914, + "loss": 1.5372, + "step": 5435 + }, + { + "epoch": 0.5734177215189873, + "grad_norm": 0.8062803745269775, + "learning_rate": 0.0005884635829775002, + "loss": 1.5555, + "step": 5436 + }, + { + "epoch": 0.5735232067510548, + "grad_norm": 0.6653757691383362, + "learning_rate": 0.00058821842481197, + "loss": 1.551, + "step": 5437 + }, + { + "epoch": 0.5736286919831224, + "grad_norm": 0.7402856945991516, + "learning_rate": 0.0005879732847748721, + "loss": 1.5989, + "step": 5438 + }, + { + "epoch": 0.5737341772151898, + "grad_norm": 0.6809348464012146, + "learning_rate": 0.0005877281628936756, + "loss": 1.5821, + "step": 5439 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.6406720280647278, + "learning_rate": 0.0005874830591958474, + "loss": 1.5801, + "step": 5440 + }, + { + "epoch": 0.5739451476793249, + "grad_norm": 0.5904711484909058, + "learning_rate": 0.000587237973708853, + "loss": 1.585, + "step": 5441 + }, + { + "epoch": 0.5740506329113924, + "grad_norm": 0.6534938812255859, + "learning_rate": 0.0005869929064601551, + "loss": 1.5906, + "step": 5442 + }, + { + "epoch": 0.5741561181434599, + "grad_norm": 0.6243739128112793, + "learning_rate": 0.0005867478574772147, + "loss": 1.5814, + "step": 5443 + }, + { + "epoch": 0.5742616033755275, + "grad_norm": 0.6424314975738525, + "learning_rate": 0.0005865028267874911, + "loss": 1.5823, + "step": 5444 + }, + { + "epoch": 0.5743670886075949, + "grad_norm": 0.6008940935134888, + "learning_rate": 0.0005862578144184412, + "loss": 1.5519, + "step": 5445 + }, + { + "epoch": 0.5744725738396624, + "grad_norm": 0.6688745021820068, + "learning_rate": 0.0005860128203975196, + "loss": 1.5981, + "step": 5446 + }, + { + "epoch": 0.57457805907173, + "grad_norm": 0.7189667224884033, + "learning_rate": 0.0005857678447521791, + "loss": 1.5963, + "step": 5447 + }, + { + "epoch": 0.5746835443037974, + "grad_norm": 0.5784962177276611, + "learning_rate": 0.0005855228875098706, + "loss": 1.5827, + "step": 5448 + }, + { + "epoch": 0.574789029535865, + "grad_norm": 0.6050390005111694, + "learning_rate": 0.0005852779486980427, + "loss": 1.5764, + "step": 5449 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6131117343902588, + "learning_rate": 0.000585033028344142, + "loss": 1.5682, + "step": 5450 + }, + { + "epoch": 0.575, + "grad_norm": 0.7192298769950867, + "learning_rate": 0.0005847881264756131, + "loss": 1.5802, + "step": 5451 + }, + { + "epoch": 0.5751054852320675, + "grad_norm": 0.60145103931427, + "learning_rate": 0.0005845432431198981, + "loss": 1.5469, + "step": 5452 + }, + { + "epoch": 0.575210970464135, + "grad_norm": 0.6731974482536316, + "learning_rate": 0.0005842983783044381, + "loss": 1.6082, + "step": 5453 + }, + { + "epoch": 0.5753164556962025, + "grad_norm": 0.6320311427116394, + "learning_rate": 0.0005840535320566711, + "loss": 1.5834, + "step": 5454 + }, + { + "epoch": 0.57542194092827, + "grad_norm": 0.6356359124183655, + "learning_rate": 0.0005838087044040334, + "loss": 1.627, + "step": 5455 + }, + { + "epoch": 0.5755274261603376, + "grad_norm": 0.7046006917953491, + "learning_rate": 0.0005835638953739589, + "loss": 1.5521, + "step": 5456 + }, + { + "epoch": 0.575632911392405, + "grad_norm": 0.586502194404602, + "learning_rate": 0.00058331910499388, + "loss": 1.5887, + "step": 5457 + }, + { + "epoch": 0.5757383966244726, + "grad_norm": 0.6216977834701538, + "learning_rate": 0.0005830743332912264, + "loss": 1.5703, + "step": 5458 + }, + { + "epoch": 0.5758438818565401, + "grad_norm": 0.5775234699249268, + "learning_rate": 0.0005828295802934263, + "loss": 1.5649, + "step": 5459 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.5879883766174316, + "learning_rate": 0.0005825848460279048, + "loss": 1.5912, + "step": 5460 + }, + { + "epoch": 0.5760548523206751, + "grad_norm": 0.6366018056869507, + "learning_rate": 0.0005823401305220865, + "loss": 1.5447, + "step": 5461 + }, + { + "epoch": 0.5761603375527427, + "grad_norm": 0.6906954646110535, + "learning_rate": 0.0005820954338033925, + "loss": 1.5444, + "step": 5462 + }, + { + "epoch": 0.5762658227848101, + "grad_norm": 0.7335106134414673, + "learning_rate": 0.0005818507558992426, + "loss": 1.5792, + "step": 5463 + }, + { + "epoch": 0.5763713080168776, + "grad_norm": 0.5767647624015808, + "learning_rate": 0.0005816060968370538, + "loss": 1.5685, + "step": 5464 + }, + { + "epoch": 0.5764767932489452, + "grad_norm": 0.7186124324798584, + "learning_rate": 0.0005813614566442416, + "loss": 1.6166, + "step": 5465 + }, + { + "epoch": 0.5765822784810126, + "grad_norm": 0.6646296977996826, + "learning_rate": 0.0005811168353482191, + "loss": 1.6011, + "step": 5466 + }, + { + "epoch": 0.5766877637130802, + "grad_norm": 0.7089313864707947, + "learning_rate": 0.0005808722329763974, + "loss": 1.5887, + "step": 5467 + }, + { + "epoch": 0.5767932489451477, + "grad_norm": 0.627125084400177, + "learning_rate": 0.0005806276495561852, + "loss": 1.5789, + "step": 5468 + }, + { + "epoch": 0.5768987341772152, + "grad_norm": 0.7265461683273315, + "learning_rate": 0.0005803830851149892, + "loss": 1.5858, + "step": 5469 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.6275790929794312, + "learning_rate": 0.0005801385396802146, + "loss": 1.58, + "step": 5470 + }, + { + "epoch": 0.5771097046413503, + "grad_norm": 0.692061722278595, + "learning_rate": 0.0005798940132792636, + "loss": 1.6006, + "step": 5471 + }, + { + "epoch": 0.5772151898734177, + "grad_norm": 0.6397607326507568, + "learning_rate": 0.0005796495059395367, + "loss": 1.571, + "step": 5472 + }, + { + "epoch": 0.5773206751054852, + "grad_norm": 0.7813669443130493, + "learning_rate": 0.0005794050176884321, + "loss": 1.5556, + "step": 5473 + }, + { + "epoch": 0.5774261603375528, + "grad_norm": 0.7881836295127869, + "learning_rate": 0.0005791605485533459, + "loss": 1.5615, + "step": 5474 + }, + { + "epoch": 0.5775316455696202, + "grad_norm": 0.9510564804077148, + "learning_rate": 0.0005789160985616721, + "loss": 1.5729, + "step": 5475 + }, + { + "epoch": 0.5776371308016878, + "grad_norm": 0.7038776874542236, + "learning_rate": 0.0005786716677408025, + "loss": 1.5573, + "step": 5476 + }, + { + "epoch": 0.5777426160337553, + "grad_norm": 0.9584254622459412, + "learning_rate": 0.0005784272561181269, + "loss": 1.6131, + "step": 5477 + }, + { + "epoch": 0.5778481012658228, + "grad_norm": 0.6566774249076843, + "learning_rate": 0.0005781828637210325, + "loss": 1.5911, + "step": 5478 + }, + { + "epoch": 0.5779535864978903, + "grad_norm": 0.9458083510398865, + "learning_rate": 0.0005779384905769053, + "loss": 1.6009, + "step": 5479 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.625148594379425, + "learning_rate": 0.0005776941367131282, + "loss": 1.624, + "step": 5480 + }, + { + "epoch": 0.5781645569620253, + "grad_norm": 0.8622514009475708, + "learning_rate": 0.0005774498021570824, + "loss": 1.541, + "step": 5481 + }, + { + "epoch": 0.5782700421940928, + "grad_norm": 0.7065276503562927, + "learning_rate": 0.0005772054869361465, + "loss": 1.5981, + "step": 5482 + }, + { + "epoch": 0.5783755274261604, + "grad_norm": 0.6446636915206909, + "learning_rate": 0.0005769611910776975, + "loss": 1.5973, + "step": 5483 + }, + { + "epoch": 0.5784810126582278, + "grad_norm": 0.8244423270225525, + "learning_rate": 0.0005767169146091098, + "loss": 1.5936, + "step": 5484 + }, + { + "epoch": 0.5785864978902954, + "grad_norm": 0.5991681814193726, + "learning_rate": 0.0005764726575577559, + "loss": 1.5885, + "step": 5485 + }, + { + "epoch": 0.5786919831223629, + "grad_norm": 0.6596003174781799, + "learning_rate": 0.0005762284199510059, + "loss": 1.5667, + "step": 5486 + }, + { + "epoch": 0.5787974683544304, + "grad_norm": 0.6216614246368408, + "learning_rate": 0.000575984201816228, + "loss": 1.5906, + "step": 5487 + }, + { + "epoch": 0.5789029535864979, + "grad_norm": 0.5695140361785889, + "learning_rate": 0.0005757400031807881, + "loss": 1.5692, + "step": 5488 + }, + { + "epoch": 0.5790084388185655, + "grad_norm": 0.771271824836731, + "learning_rate": 0.0005754958240720498, + "loss": 1.6346, + "step": 5489 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.5276814103126526, + "learning_rate": 0.0005752516645173745, + "loss": 1.5701, + "step": 5490 + }, + { + "epoch": 0.5792194092827004, + "grad_norm": 0.7173754572868347, + "learning_rate": 0.0005750075245441218, + "loss": 1.5711, + "step": 5491 + }, + { + "epoch": 0.579324894514768, + "grad_norm": 0.6549975872039795, + "learning_rate": 0.0005747634041796484, + "loss": 1.5818, + "step": 5492 + }, + { + "epoch": 0.5794303797468354, + "grad_norm": 0.7018535137176514, + "learning_rate": 0.0005745193034513092, + "loss": 1.5892, + "step": 5493 + }, + { + "epoch": 0.579535864978903, + "grad_norm": 0.6410003304481506, + "learning_rate": 0.0005742752223864573, + "loss": 1.5588, + "step": 5494 + }, + { + "epoch": 0.5796413502109705, + "grad_norm": 0.5921409726142883, + "learning_rate": 0.0005740311610124427, + "loss": 1.5979, + "step": 5495 + }, + { + "epoch": 0.579746835443038, + "grad_norm": 0.5813025236129761, + "learning_rate": 0.0005737871193566141, + "loss": 1.5461, + "step": 5496 + }, + { + "epoch": 0.5798523206751055, + "grad_norm": 0.6054691076278687, + "learning_rate": 0.0005735430974463175, + "loss": 1.5728, + "step": 5497 + }, + { + "epoch": 0.5799578059071729, + "grad_norm": 0.565470278263092, + "learning_rate": 0.0005732990953088968, + "loss": 1.5683, + "step": 5498 + }, + { + "epoch": 0.5800632911392405, + "grad_norm": 0.8199266195297241, + "learning_rate": 0.0005730551129716936, + "loss": 1.6184, + "step": 5499 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.5455203652381897, + "learning_rate": 0.0005728111504620472, + "loss": 1.5579, + "step": 5500 + }, + { + "epoch": 0.5802742616033755, + "grad_norm": 0.9511101841926575, + "learning_rate": 0.000572567207807295, + "loss": 1.5626, + "step": 5501 + }, + { + "epoch": 0.580379746835443, + "grad_norm": 0.5459025502204895, + "learning_rate": 0.000572323285034772, + "loss": 1.6059, + "step": 5502 + }, + { + "epoch": 0.5804852320675106, + "grad_norm": 0.9037071466445923, + "learning_rate": 0.0005720793821718108, + "loss": 1.6024, + "step": 5503 + }, + { + "epoch": 0.580590717299578, + "grad_norm": 0.7004002928733826, + "learning_rate": 0.0005718354992457417, + "loss": 1.5704, + "step": 5504 + }, + { + "epoch": 0.5806962025316456, + "grad_norm": 1.031464695930481, + "learning_rate": 0.0005715916362838936, + "loss": 1.5712, + "step": 5505 + }, + { + "epoch": 0.5808016877637131, + "grad_norm": 0.7390285134315491, + "learning_rate": 0.0005713477933135923, + "loss": 1.6211, + "step": 5506 + }, + { + "epoch": 0.5809071729957805, + "grad_norm": 0.7588917016983032, + "learning_rate": 0.0005711039703621616, + "loss": 1.6049, + "step": 5507 + }, + { + "epoch": 0.5810126582278481, + "grad_norm": 0.9200000762939453, + "learning_rate": 0.0005708601674569232, + "loss": 1.5623, + "step": 5508 + }, + { + "epoch": 0.5811181434599156, + "grad_norm": 0.8441893458366394, + "learning_rate": 0.0005706163846251961, + "loss": 1.5941, + "step": 5509 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.7430054545402527, + "learning_rate": 0.0005703726218942976, + "loss": 1.5629, + "step": 5510 + }, + { + "epoch": 0.5813291139240506, + "grad_norm": 0.7596115469932556, + "learning_rate": 0.0005701288792915427, + "loss": 1.5465, + "step": 5511 + }, + { + "epoch": 0.5814345991561182, + "grad_norm": 0.7768556475639343, + "learning_rate": 0.0005698851568442434, + "loss": 1.5738, + "step": 5512 + }, + { + "epoch": 0.5815400843881856, + "grad_norm": 0.8230984807014465, + "learning_rate": 0.0005696414545797108, + "loss": 1.5886, + "step": 5513 + }, + { + "epoch": 0.5816455696202532, + "grad_norm": 0.8225305676460266, + "learning_rate": 0.0005693977725252525, + "loss": 1.5994, + "step": 5514 + }, + { + "epoch": 0.5817510548523207, + "grad_norm": 0.7245922088623047, + "learning_rate": 0.0005691541107081743, + "loss": 1.584, + "step": 5515 + }, + { + "epoch": 0.5818565400843881, + "grad_norm": 0.7820099592208862, + "learning_rate": 0.0005689104691557798, + "loss": 1.5415, + "step": 5516 + }, + { + "epoch": 0.5819620253164557, + "grad_norm": 0.6318641304969788, + "learning_rate": 0.0005686668478953702, + "loss": 1.5544, + "step": 5517 + }, + { + "epoch": 0.5820675105485232, + "grad_norm": 0.813739538192749, + "learning_rate": 0.0005684232469542446, + "loss": 1.5999, + "step": 5518 + }, + { + "epoch": 0.5821729957805907, + "grad_norm": 0.6827484965324402, + "learning_rate": 0.0005681796663596996, + "loss": 1.5739, + "step": 5519 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.8106764554977417, + "learning_rate": 0.0005679361061390295, + "loss": 1.6121, + "step": 5520 + }, + { + "epoch": 0.5823839662447258, + "grad_norm": 0.6474671363830566, + "learning_rate": 0.0005676925663195263, + "loss": 1.5594, + "step": 5521 + }, + { + "epoch": 0.5824894514767932, + "grad_norm": 0.6740016341209412, + "learning_rate": 0.0005674490469284805, + "loss": 1.5547, + "step": 5522 + }, + { + "epoch": 0.5825949367088608, + "grad_norm": 0.6275516748428345, + "learning_rate": 0.0005672055479931791, + "loss": 1.5535, + "step": 5523 + }, + { + "epoch": 0.5827004219409283, + "grad_norm": 0.6067603230476379, + "learning_rate": 0.0005669620695409076, + "loss": 1.5805, + "step": 5524 + }, + { + "epoch": 0.5828059071729957, + "grad_norm": 0.6454451084136963, + "learning_rate": 0.000566718611598949, + "loss": 1.5636, + "step": 5525 + }, + { + "epoch": 0.5829113924050633, + "grad_norm": 0.5803794860839844, + "learning_rate": 0.0005664751741945839, + "loss": 1.5656, + "step": 5526 + }, + { + "epoch": 0.5830168776371308, + "grad_norm": 0.6579867601394653, + "learning_rate": 0.0005662317573550906, + "loss": 1.562, + "step": 5527 + }, + { + "epoch": 0.5831223628691983, + "grad_norm": 0.6195555329322815, + "learning_rate": 0.0005659883611077453, + "loss": 1.5869, + "step": 5528 + }, + { + "epoch": 0.5832278481012658, + "grad_norm": 0.7217262983322144, + "learning_rate": 0.0005657449854798216, + "loss": 1.56, + "step": 5529 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.6934452056884766, + "learning_rate": 0.0005655016304985908, + "loss": 1.6041, + "step": 5530 + }, + { + "epoch": 0.5834388185654008, + "grad_norm": 0.6582111120223999, + "learning_rate": 0.0005652582961913227, + "loss": 1.5864, + "step": 5531 + }, + { + "epoch": 0.5835443037974684, + "grad_norm": 0.6447290182113647, + "learning_rate": 0.0005650149825852836, + "loss": 1.5389, + "step": 5532 + }, + { + "epoch": 0.5836497890295359, + "grad_norm": 0.7493941783905029, + "learning_rate": 0.0005647716897077382, + "loss": 1.5434, + "step": 5533 + }, + { + "epoch": 0.5837552742616033, + "grad_norm": 0.546858549118042, + "learning_rate": 0.0005645284175859486, + "loss": 1.5491, + "step": 5534 + }, + { + "epoch": 0.5838607594936709, + "grad_norm": 0.6943339109420776, + "learning_rate": 0.0005642851662471745, + "loss": 1.5602, + "step": 5535 + }, + { + "epoch": 0.5839662447257384, + "grad_norm": 0.6125208735466003, + "learning_rate": 0.0005640419357186738, + "loss": 1.5677, + "step": 5536 + }, + { + "epoch": 0.5840717299578059, + "grad_norm": 0.8742371201515198, + "learning_rate": 0.0005637987260277013, + "loss": 1.568, + "step": 5537 + }, + { + "epoch": 0.5841772151898734, + "grad_norm": 0.6387349963188171, + "learning_rate": 0.0005635555372015099, + "loss": 1.6253, + "step": 5538 + }, + { + "epoch": 0.584282700421941, + "grad_norm": 0.8560100197792053, + "learning_rate": 0.0005633123692673503, + "loss": 1.5555, + "step": 5539 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.5798711776733398, + "learning_rate": 0.0005630692222524709, + "loss": 1.5978, + "step": 5540 + }, + { + "epoch": 0.584493670886076, + "grad_norm": 0.6619745492935181, + "learning_rate": 0.0005628260961841171, + "loss": 1.5826, + "step": 5541 + }, + { + "epoch": 0.5845991561181435, + "grad_norm": 0.5737175345420837, + "learning_rate": 0.0005625829910895325, + "loss": 1.5815, + "step": 5542 + }, + { + "epoch": 0.5847046413502109, + "grad_norm": 0.5898101329803467, + "learning_rate": 0.0005623399069959585, + "loss": 1.5833, + "step": 5543 + }, + { + "epoch": 0.5848101265822785, + "grad_norm": 0.6744459867477417, + "learning_rate": 0.0005620968439306335, + "loss": 1.5676, + "step": 5544 + }, + { + "epoch": 0.584915611814346, + "grad_norm": 0.5373870134353638, + "learning_rate": 0.0005618538019207943, + "loss": 1.5877, + "step": 5545 + }, + { + "epoch": 0.5850210970464135, + "grad_norm": 0.6935928463935852, + "learning_rate": 0.0005616107809936746, + "loss": 1.5691, + "step": 5546 + }, + { + "epoch": 0.585126582278481, + "grad_norm": 0.6707370281219482, + "learning_rate": 0.0005613677811765062, + "loss": 1.5585, + "step": 5547 + }, + { + "epoch": 0.5852320675105486, + "grad_norm": 0.6107662320137024, + "learning_rate": 0.0005611248024965186, + "loss": 1.6011, + "step": 5548 + }, + { + "epoch": 0.585337552742616, + "grad_norm": 0.5715230107307434, + "learning_rate": 0.0005608818449809387, + "loss": 1.5848, + "step": 5549 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.5599727630615234, + "learning_rate": 0.0005606389086569911, + "loss": 1.6192, + "step": 5550 + }, + { + "epoch": 0.5855485232067511, + "grad_norm": 0.6150754690170288, + "learning_rate": 0.0005603959935518981, + "loss": 1.6135, + "step": 5551 + }, + { + "epoch": 0.5856540084388185, + "grad_norm": 0.5782602429389954, + "learning_rate": 0.0005601530996928795, + "loss": 1.6377, + "step": 5552 + }, + { + "epoch": 0.5857594936708861, + "grad_norm": 0.6897070407867432, + "learning_rate": 0.0005599102271071527, + "loss": 1.57, + "step": 5553 + }, + { + "epoch": 0.5858649789029536, + "grad_norm": 0.6308229565620422, + "learning_rate": 0.0005596673758219327, + "loss": 1.5929, + "step": 5554 + }, + { + "epoch": 0.5859704641350211, + "grad_norm": 0.600324273109436, + "learning_rate": 0.0005594245458644325, + "loss": 1.5853, + "step": 5555 + }, + { + "epoch": 0.5860759493670886, + "grad_norm": 0.7253000736236572, + "learning_rate": 0.0005591817372618621, + "loss": 1.5703, + "step": 5556 + }, + { + "epoch": 0.5861814345991562, + "grad_norm": 0.5959121584892273, + "learning_rate": 0.0005589389500414296, + "loss": 1.5721, + "step": 5557 + }, + { + "epoch": 0.5862869198312236, + "grad_norm": 0.7116453647613525, + "learning_rate": 0.0005586961842303405, + "loss": 1.6043, + "step": 5558 + }, + { + "epoch": 0.5863924050632912, + "grad_norm": 0.5495025515556335, + "learning_rate": 0.0005584534398557977, + "loss": 1.5758, + "step": 5559 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.6519418954849243, + "learning_rate": 0.0005582107169450023, + "loss": 1.5805, + "step": 5560 + }, + { + "epoch": 0.5866033755274261, + "grad_norm": 0.6036416292190552, + "learning_rate": 0.0005579680155251524, + "loss": 1.5769, + "step": 5561 + }, + { + "epoch": 0.5867088607594937, + "grad_norm": 0.5860921740531921, + "learning_rate": 0.0005577253356234439, + "loss": 1.5907, + "step": 5562 + }, + { + "epoch": 0.5868143459915611, + "grad_norm": 0.5764782428741455, + "learning_rate": 0.0005574826772670703, + "loss": 1.5642, + "step": 5563 + }, + { + "epoch": 0.5869198312236287, + "grad_norm": 0.6425598859786987, + "learning_rate": 0.0005572400404832226, + "loss": 1.5841, + "step": 5564 + }, + { + "epoch": 0.5870253164556962, + "grad_norm": 0.584837019443512, + "learning_rate": 0.0005569974252990896, + "loss": 1.5738, + "step": 5565 + }, + { + "epoch": 0.5871308016877637, + "grad_norm": 0.6394988894462585, + "learning_rate": 0.0005567548317418576, + "loss": 1.5885, + "step": 5566 + }, + { + "epoch": 0.5872362869198312, + "grad_norm": 0.6821979880332947, + "learning_rate": 0.0005565122598387103, + "loss": 1.6046, + "step": 5567 + }, + { + "epoch": 0.5873417721518988, + "grad_norm": 0.5731332898139954, + "learning_rate": 0.0005562697096168289, + "loss": 1.5512, + "step": 5568 + }, + { + "epoch": 0.5874472573839662, + "grad_norm": 0.7380949854850769, + "learning_rate": 0.0005560271811033928, + "loss": 1.5491, + "step": 5569 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5428503155708313, + "learning_rate": 0.0005557846743255783, + "loss": 1.5554, + "step": 5570 + }, + { + "epoch": 0.5876582278481013, + "grad_norm": 0.6804865002632141, + "learning_rate": 0.0005555421893105593, + "loss": 1.5747, + "step": 5571 + }, + { + "epoch": 0.5877637130801687, + "grad_norm": 0.6794483065605164, + "learning_rate": 0.0005552997260855077, + "loss": 1.5886, + "step": 5572 + }, + { + "epoch": 0.5878691983122363, + "grad_norm": 0.6579214334487915, + "learning_rate": 0.0005550572846775927, + "loss": 1.569, + "step": 5573 + }, + { + "epoch": 0.5879746835443038, + "grad_norm": 0.637007474899292, + "learning_rate": 0.0005548148651139809, + "loss": 1.5837, + "step": 5574 + }, + { + "epoch": 0.5880801687763713, + "grad_norm": 0.6377767324447632, + "learning_rate": 0.0005545724674218368, + "loss": 1.5891, + "step": 5575 + }, + { + "epoch": 0.5881856540084388, + "grad_norm": 0.6385467052459717, + "learning_rate": 0.0005543300916283223, + "loss": 1.5607, + "step": 5576 + }, + { + "epoch": 0.5882911392405064, + "grad_norm": 0.5746524930000305, + "learning_rate": 0.0005540877377605968, + "loss": 1.5886, + "step": 5577 + }, + { + "epoch": 0.5883966244725738, + "grad_norm": 0.6720818877220154, + "learning_rate": 0.0005538454058458171, + "loss": 1.5635, + "step": 5578 + }, + { + "epoch": 0.5885021097046413, + "grad_norm": 0.5977919697761536, + "learning_rate": 0.0005536030959111377, + "loss": 1.584, + "step": 5579 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.8170925974845886, + "learning_rate": 0.0005533608079837109, + "loss": 1.5354, + "step": 5580 + }, + { + "epoch": 0.5887130801687763, + "grad_norm": 0.6087709665298462, + "learning_rate": 0.0005531185420906859, + "loss": 1.5746, + "step": 5581 + }, + { + "epoch": 0.5888185654008439, + "grad_norm": 0.6697252988815308, + "learning_rate": 0.0005528762982592101, + "loss": 1.5848, + "step": 5582 + }, + { + "epoch": 0.5889240506329114, + "grad_norm": 0.6455133557319641, + "learning_rate": 0.000552634076516428, + "loss": 1.5566, + "step": 5583 + }, + { + "epoch": 0.5890295358649789, + "grad_norm": 0.8346021175384521, + "learning_rate": 0.0005523918768894819, + "loss": 1.578, + "step": 5584 + }, + { + "epoch": 0.5891350210970464, + "grad_norm": 0.558990478515625, + "learning_rate": 0.0005521496994055112, + "loss": 1.6002, + "step": 5585 + }, + { + "epoch": 0.589240506329114, + "grad_norm": 0.6225048303604126, + "learning_rate": 0.0005519075440916534, + "loss": 1.5684, + "step": 5586 + }, + { + "epoch": 0.5893459915611814, + "grad_norm": 0.5781633257865906, + "learning_rate": 0.000551665410975043, + "loss": 1.5629, + "step": 5587 + }, + { + "epoch": 0.5894514767932489, + "grad_norm": 0.6571250557899475, + "learning_rate": 0.0005514233000828121, + "loss": 1.6157, + "step": 5588 + }, + { + "epoch": 0.5895569620253165, + "grad_norm": 0.56083083152771, + "learning_rate": 0.0005511812114420908, + "loss": 1.5603, + "step": 5589 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.6794281005859375, + "learning_rate": 0.0005509391450800061, + "loss": 1.5623, + "step": 5590 + }, + { + "epoch": 0.5897679324894515, + "grad_norm": 0.613581120967865, + "learning_rate": 0.0005506971010236829, + "loss": 1.5517, + "step": 5591 + }, + { + "epoch": 0.589873417721519, + "grad_norm": 0.7727274298667908, + "learning_rate": 0.0005504550793002433, + "loss": 1.6035, + "step": 5592 + }, + { + "epoch": 0.5899789029535865, + "grad_norm": 0.5556207895278931, + "learning_rate": 0.000550213079936807, + "loss": 1.5607, + "step": 5593 + }, + { + "epoch": 0.590084388185654, + "grad_norm": 0.6486953496932983, + "learning_rate": 0.0005499711029604915, + "loss": 1.5659, + "step": 5594 + }, + { + "epoch": 0.5901898734177216, + "grad_norm": 0.6411743760108948, + "learning_rate": 0.0005497291483984113, + "loss": 1.5806, + "step": 5595 + }, + { + "epoch": 0.590295358649789, + "grad_norm": 0.6338092684745789, + "learning_rate": 0.0005494872162776786, + "loss": 1.5502, + "step": 5596 + }, + { + "epoch": 0.5904008438818565, + "grad_norm": 0.604567289352417, + "learning_rate": 0.0005492453066254032, + "loss": 1.5652, + "step": 5597 + }, + { + "epoch": 0.5905063291139241, + "grad_norm": 0.6387218236923218, + "learning_rate": 0.000549003419468692, + "loss": 1.5789, + "step": 5598 + }, + { + "epoch": 0.5906118143459915, + "grad_norm": 0.5929901599884033, + "learning_rate": 0.0005487615548346502, + "loss": 1.5877, + "step": 5599 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.5821229219436646, + "learning_rate": 0.0005485197127503795, + "loss": 1.5964, + "step": 5600 + }, + { + "epoch": 0.5908227848101266, + "grad_norm": 0.6104651093482971, + "learning_rate": 0.0005482778932429798, + "loss": 1.5956, + "step": 5601 + }, + { + "epoch": 0.5909282700421941, + "grad_norm": 0.6256374716758728, + "learning_rate": 0.000548036096339548, + "loss": 1.6019, + "step": 5602 + }, + { + "epoch": 0.5910337552742616, + "grad_norm": 0.712048351764679, + "learning_rate": 0.0005477943220671786, + "loss": 1.5904, + "step": 5603 + }, + { + "epoch": 0.5911392405063292, + "grad_norm": 0.6191166043281555, + "learning_rate": 0.0005475525704529638, + "loss": 1.56, + "step": 5604 + }, + { + "epoch": 0.5912447257383966, + "grad_norm": 0.7296028137207031, + "learning_rate": 0.0005473108415239929, + "loss": 1.5502, + "step": 5605 + }, + { + "epoch": 0.5913502109704641, + "grad_norm": 0.6099110245704651, + "learning_rate": 0.0005470691353073531, + "loss": 1.5735, + "step": 5606 + }, + { + "epoch": 0.5914556962025317, + "grad_norm": 0.7869654297828674, + "learning_rate": 0.0005468274518301284, + "loss": 1.5789, + "step": 5607 + }, + { + "epoch": 0.5915611814345991, + "grad_norm": 0.6045321226119995, + "learning_rate": 0.0005465857911194006, + "loss": 1.5759, + "step": 5608 + }, + { + "epoch": 0.5916666666666667, + "grad_norm": 0.75149005651474, + "learning_rate": 0.0005463441532022495, + "loss": 1.5594, + "step": 5609 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.6627628207206726, + "learning_rate": 0.0005461025381057516, + "loss": 1.576, + "step": 5610 + }, + { + "epoch": 0.5918776371308017, + "grad_norm": 0.8547760844230652, + "learning_rate": 0.000545860945856981, + "loss": 1.581, + "step": 5611 + }, + { + "epoch": 0.5919831223628692, + "grad_norm": 0.7081727981567383, + "learning_rate": 0.0005456193764830093, + "loss": 1.5958, + "step": 5612 + }, + { + "epoch": 0.5920886075949368, + "grad_norm": 0.5962968468666077, + "learning_rate": 0.0005453778300109056, + "loss": 1.575, + "step": 5613 + }, + { + "epoch": 0.5921940928270042, + "grad_norm": 0.6935669779777527, + "learning_rate": 0.0005451363064677365, + "loss": 1.5925, + "step": 5614 + }, + { + "epoch": 0.5922995780590717, + "grad_norm": 0.6891075372695923, + "learning_rate": 0.0005448948058805657, + "loss": 1.5999, + "step": 5615 + }, + { + "epoch": 0.5924050632911393, + "grad_norm": 0.6704819798469543, + "learning_rate": 0.0005446533282764543, + "loss": 1.5794, + "step": 5616 + }, + { + "epoch": 0.5925105485232067, + "grad_norm": 0.6794523000717163, + "learning_rate": 0.0005444118736824617, + "loss": 1.5669, + "step": 5617 + }, + { + "epoch": 0.5926160337552743, + "grad_norm": 0.5438517332077026, + "learning_rate": 0.000544170442125644, + "loss": 1.585, + "step": 5618 + }, + { + "epoch": 0.5927215189873418, + "grad_norm": 0.8026859164237976, + "learning_rate": 0.0005439290336330545, + "loss": 1.587, + "step": 5619 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.6095602512359619, + "learning_rate": 0.0005436876482317444, + "loss": 1.5878, + "step": 5620 + }, + { + "epoch": 0.5929324894514768, + "grad_norm": 0.7309786081314087, + "learning_rate": 0.000543446285948762, + "loss": 1.5958, + "step": 5621 + }, + { + "epoch": 0.5930379746835444, + "grad_norm": 0.6244728565216064, + "learning_rate": 0.0005432049468111534, + "loss": 1.6021, + "step": 5622 + }, + { + "epoch": 0.5931434599156118, + "grad_norm": 0.6063764095306396, + "learning_rate": 0.0005429636308459614, + "loss": 1.5267, + "step": 5623 + }, + { + "epoch": 0.5932489451476793, + "grad_norm": 0.6657538414001465, + "learning_rate": 0.0005427223380802272, + "loss": 1.599, + "step": 5624 + }, + { + "epoch": 0.5933544303797469, + "grad_norm": 0.6521964073181152, + "learning_rate": 0.0005424810685409881, + "loss": 1.6032, + "step": 5625 + }, + { + "epoch": 0.5934599156118143, + "grad_norm": 0.7204378247261047, + "learning_rate": 0.0005422398222552806, + "loss": 1.5803, + "step": 5626 + }, + { + "epoch": 0.5935654008438819, + "grad_norm": 0.6135136485099792, + "learning_rate": 0.0005419985992501367, + "loss": 1.6063, + "step": 5627 + }, + { + "epoch": 0.5936708860759494, + "grad_norm": 0.7029746770858765, + "learning_rate": 0.0005417573995525871, + "loss": 1.5456, + "step": 5628 + }, + { + "epoch": 0.5937763713080169, + "grad_norm": 0.6544419527053833, + "learning_rate": 0.0005415162231896593, + "loss": 1.5756, + "step": 5629 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.5560754537582397, + "learning_rate": 0.0005412750701883782, + "loss": 1.553, + "step": 5630 + }, + { + "epoch": 0.5939873417721518, + "grad_norm": 0.6815051436424255, + "learning_rate": 0.0005410339405757665, + "loss": 1.5868, + "step": 5631 + }, + { + "epoch": 0.5940928270042194, + "grad_norm": 0.5556408762931824, + "learning_rate": 0.0005407928343788435, + "loss": 1.5876, + "step": 5632 + }, + { + "epoch": 0.5941983122362869, + "grad_norm": 0.651254415512085, + "learning_rate": 0.0005405517516246267, + "loss": 1.569, + "step": 5633 + }, + { + "epoch": 0.5943037974683544, + "grad_norm": 0.6304174065589905, + "learning_rate": 0.0005403106923401302, + "loss": 1.5894, + "step": 5634 + }, + { + "epoch": 0.5944092827004219, + "grad_norm": 0.557384192943573, + "learning_rate": 0.0005400696565523666, + "loss": 1.5704, + "step": 5635 + }, + { + "epoch": 0.5945147679324895, + "grad_norm": 0.5622380971908569, + "learning_rate": 0.0005398286442883448, + "loss": 1.5767, + "step": 5636 + }, + { + "epoch": 0.5946202531645569, + "grad_norm": 0.7430731654167175, + "learning_rate": 0.0005395876555750712, + "loss": 1.5742, + "step": 5637 + }, + { + "epoch": 0.5947257383966245, + "grad_norm": 0.6155410408973694, + "learning_rate": 0.0005393466904395503, + "loss": 1.6253, + "step": 5638 + }, + { + "epoch": 0.594831223628692, + "grad_norm": 0.7601397633552551, + "learning_rate": 0.000539105748908783, + "loss": 1.5789, + "step": 5639 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.6786602735519409, + "learning_rate": 0.0005388648310097682, + "loss": 1.6032, + "step": 5640 + }, + { + "epoch": 0.595042194092827, + "grad_norm": 0.7725270390510559, + "learning_rate": 0.0005386239367695018, + "loss": 1.5928, + "step": 5641 + }, + { + "epoch": 0.5951476793248945, + "grad_norm": 0.7010865807533264, + "learning_rate": 0.0005383830662149771, + "loss": 1.5838, + "step": 5642 + }, + { + "epoch": 0.595253164556962, + "grad_norm": 0.6804008483886719, + "learning_rate": 0.0005381422193731853, + "loss": 1.5736, + "step": 5643 + }, + { + "epoch": 0.5953586497890295, + "grad_norm": 0.6557987332344055, + "learning_rate": 0.0005379013962711143, + "loss": 1.5845, + "step": 5644 + }, + { + "epoch": 0.5954641350210971, + "grad_norm": 0.6040624380111694, + "learning_rate": 0.0005376605969357494, + "loss": 1.5841, + "step": 5645 + }, + { + "epoch": 0.5955696202531645, + "grad_norm": 0.7218878269195557, + "learning_rate": 0.0005374198213940734, + "loss": 1.5697, + "step": 5646 + }, + { + "epoch": 0.5956751054852321, + "grad_norm": 0.626291811466217, + "learning_rate": 0.0005371790696730665, + "loss": 1.5804, + "step": 5647 + }, + { + "epoch": 0.5957805907172996, + "grad_norm": 0.6593583226203918, + "learning_rate": 0.000536938341799706, + "loss": 1.5773, + "step": 5648 + }, + { + "epoch": 0.595886075949367, + "grad_norm": 0.6325438022613525, + "learning_rate": 0.0005366976378009668, + "loss": 1.5916, + "step": 5649 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.6031495332717896, + "learning_rate": 0.000536456957703821, + "loss": 1.5737, + "step": 5650 + }, + { + "epoch": 0.5960970464135021, + "grad_norm": 0.6571020483970642, + "learning_rate": 0.0005362163015352374, + "loss": 1.5738, + "step": 5651 + }, + { + "epoch": 0.5962025316455696, + "grad_norm": 0.595591127872467, + "learning_rate": 0.0005359756693221836, + "loss": 1.5647, + "step": 5652 + }, + { + "epoch": 0.5963080168776371, + "grad_norm": 0.5920302271842957, + "learning_rate": 0.0005357350610916233, + "loss": 1.5323, + "step": 5653 + }, + { + "epoch": 0.5964135021097047, + "grad_norm": 0.5683962106704712, + "learning_rate": 0.0005354944768705179, + "loss": 1.5726, + "step": 5654 + }, + { + "epoch": 0.5965189873417721, + "grad_norm": 0.6628867387771606, + "learning_rate": 0.0005352539166858258, + "loss": 1.5907, + "step": 5655 + }, + { + "epoch": 0.5966244725738397, + "grad_norm": 0.6567812561988831, + "learning_rate": 0.0005350133805645034, + "loss": 1.5952, + "step": 5656 + }, + { + "epoch": 0.5967299578059072, + "grad_norm": 0.6367135047912598, + "learning_rate": 0.0005347728685335036, + "loss": 1.6, + "step": 5657 + }, + { + "epoch": 0.5968354430379746, + "grad_norm": 0.7006592154502869, + "learning_rate": 0.0005345323806197771, + "loss": 1.6131, + "step": 5658 + }, + { + "epoch": 0.5969409282700422, + "grad_norm": 0.599143385887146, + "learning_rate": 0.0005342919168502717, + "loss": 1.5631, + "step": 5659 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.8191824555397034, + "learning_rate": 0.0005340514772519324, + "loss": 1.5665, + "step": 5660 + }, + { + "epoch": 0.5971518987341772, + "grad_norm": 0.6498884558677673, + "learning_rate": 0.0005338110618517022, + "loss": 1.6073, + "step": 5661 + }, + { + "epoch": 0.5972573839662447, + "grad_norm": 0.714560866355896, + "learning_rate": 0.0005335706706765205, + "loss": 1.5534, + "step": 5662 + }, + { + "epoch": 0.5973628691983123, + "grad_norm": 0.6705093383789062, + "learning_rate": 0.0005333303037533244, + "loss": 1.6041, + "step": 5663 + }, + { + "epoch": 0.5974683544303797, + "grad_norm": 0.6431041955947876, + "learning_rate": 0.0005330899611090482, + "loss": 1.5874, + "step": 5664 + }, + { + "epoch": 0.5975738396624473, + "grad_norm": 0.6585779190063477, + "learning_rate": 0.0005328496427706235, + "loss": 1.5796, + "step": 5665 + }, + { + "epoch": 0.5976793248945148, + "grad_norm": 0.6589877009391785, + "learning_rate": 0.000532609348764979, + "loss": 1.5468, + "step": 5666 + }, + { + "epoch": 0.5977848101265822, + "grad_norm": 0.6402413249015808, + "learning_rate": 0.0005323690791190412, + "loss": 1.613, + "step": 5667 + }, + { + "epoch": 0.5978902953586498, + "grad_norm": 0.5974688529968262, + "learning_rate": 0.0005321288338597327, + "loss": 1.6129, + "step": 5668 + }, + { + "epoch": 0.5979957805907173, + "grad_norm": 0.6392907500267029, + "learning_rate": 0.0005318886130139753, + "loss": 1.5889, + "step": 5669 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6413401365280151, + "learning_rate": 0.0005316484166086863, + "loss": 1.5566, + "step": 5670 + }, + { + "epoch": 0.5982067510548523, + "grad_norm": 0.5735971331596375, + "learning_rate": 0.0005314082446707811, + "loss": 1.5856, + "step": 5671 + }, + { + "epoch": 0.5983122362869199, + "grad_norm": 0.6241706013679504, + "learning_rate": 0.000531168097227172, + "loss": 1.5704, + "step": 5672 + }, + { + "epoch": 0.5984177215189873, + "grad_norm": 0.5781348943710327, + "learning_rate": 0.0005309279743047687, + "loss": 1.6144, + "step": 5673 + }, + { + "epoch": 0.5985232067510549, + "grad_norm": 0.5832539796829224, + "learning_rate": 0.0005306878759304785, + "loss": 1.6, + "step": 5674 + }, + { + "epoch": 0.5986286919831224, + "grad_norm": 0.6444602012634277, + "learning_rate": 0.0005304478021312053, + "loss": 1.5737, + "step": 5675 + }, + { + "epoch": 0.5987341772151898, + "grad_norm": 0.5994611978530884, + "learning_rate": 0.0005302077529338507, + "loss": 1.6032, + "step": 5676 + }, + { + "epoch": 0.5988396624472574, + "grad_norm": 0.6291059255599976, + "learning_rate": 0.0005299677283653128, + "loss": 1.5461, + "step": 5677 + }, + { + "epoch": 0.5989451476793249, + "grad_norm": 0.6383351683616638, + "learning_rate": 0.0005297277284524888, + "loss": 1.6117, + "step": 5678 + }, + { + "epoch": 0.5990506329113924, + "grad_norm": 0.5966117978096008, + "learning_rate": 0.0005294877532222709, + "loss": 1.5455, + "step": 5679 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.5738033056259155, + "learning_rate": 0.00052924780270155, + "loss": 1.5952, + "step": 5680 + }, + { + "epoch": 0.5992616033755275, + "grad_norm": 0.6085278987884521, + "learning_rate": 0.0005290078769172135, + "loss": 1.5792, + "step": 5681 + }, + { + "epoch": 0.5993670886075949, + "grad_norm": 0.630861759185791, + "learning_rate": 0.0005287679758961465, + "loss": 1.5633, + "step": 5682 + }, + { + "epoch": 0.5994725738396625, + "grad_norm": 0.5688868165016174, + "learning_rate": 0.0005285280996652308, + "loss": 1.5607, + "step": 5683 + }, + { + "epoch": 0.59957805907173, + "grad_norm": 0.6144999861717224, + "learning_rate": 0.0005282882482513459, + "loss": 1.5434, + "step": 5684 + }, + { + "epoch": 0.5996835443037974, + "grad_norm": 0.5873779058456421, + "learning_rate": 0.0005280484216813686, + "loss": 1.5593, + "step": 5685 + }, + { + "epoch": 0.599789029535865, + "grad_norm": 0.5837879180908203, + "learning_rate": 0.0005278086199821718, + "loss": 1.5846, + "step": 5686 + }, + { + "epoch": 0.5998945147679325, + "grad_norm": 0.6197059154510498, + "learning_rate": 0.0005275688431806274, + "loss": 1.5703, + "step": 5687 + }, + { + "epoch": 0.6, + "grad_norm": 0.5877225995063782, + "learning_rate": 0.0005273290913036033, + "loss": 1.555, + "step": 5688 + }, + { + "epoch": 0.6001054852320675, + "grad_norm": 0.6513393521308899, + "learning_rate": 0.0005270893643779649, + "loss": 1.544, + "step": 5689 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.7686142325401306, + "learning_rate": 0.0005268496624305747, + "loss": 1.5688, + "step": 5690 + }, + { + "epoch": 0.6003164556962025, + "grad_norm": 0.5797557234764099, + "learning_rate": 0.0005266099854882927, + "loss": 1.5999, + "step": 5691 + }, + { + "epoch": 0.6004219409282701, + "grad_norm": 0.6811420321464539, + "learning_rate": 0.0005263703335779755, + "loss": 1.5304, + "step": 5692 + }, + { + "epoch": 0.6005274261603376, + "grad_norm": 0.6824289560317993, + "learning_rate": 0.0005261307067264778, + "loss": 1.5793, + "step": 5693 + }, + { + "epoch": 0.600632911392405, + "grad_norm": 0.6150546073913574, + "learning_rate": 0.0005258911049606503, + "loss": 1.5514, + "step": 5694 + }, + { + "epoch": 0.6007383966244726, + "grad_norm": 0.574905276298523, + "learning_rate": 0.0005256515283073422, + "loss": 1.5289, + "step": 5695 + }, + { + "epoch": 0.60084388185654, + "grad_norm": 0.6001357436180115, + "learning_rate": 0.0005254119767933992, + "loss": 1.5825, + "step": 5696 + }, + { + "epoch": 0.6009493670886076, + "grad_norm": 0.6194983720779419, + "learning_rate": 0.0005251724504456641, + "loss": 1.5859, + "step": 5697 + }, + { + "epoch": 0.6010548523206751, + "grad_norm": 0.5948306918144226, + "learning_rate": 0.000524932949290977, + "loss": 1.5736, + "step": 5698 + }, + { + "epoch": 0.6011603375527426, + "grad_norm": 0.6697880625724792, + "learning_rate": 0.0005246934733561751, + "loss": 1.5496, + "step": 5699 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.6662291288375854, + "learning_rate": 0.0005244540226680931, + "loss": 1.5946, + "step": 5700 + }, + { + "epoch": 0.6013713080168777, + "grad_norm": 0.632082998752594, + "learning_rate": 0.0005242145972535625, + "loss": 1.5596, + "step": 5701 + }, + { + "epoch": 0.6014767932489451, + "grad_norm": 0.7144533395767212, + "learning_rate": 0.0005239751971394122, + "loss": 1.5806, + "step": 5702 + }, + { + "epoch": 0.6015822784810126, + "grad_norm": 0.6279142498970032, + "learning_rate": 0.0005237358223524678, + "loss": 1.5952, + "step": 5703 + }, + { + "epoch": 0.6016877637130802, + "grad_norm": 0.6426966786384583, + "learning_rate": 0.000523496472919553, + "loss": 1.5584, + "step": 5704 + }, + { + "epoch": 0.6017932489451476, + "grad_norm": 0.6566734313964844, + "learning_rate": 0.000523257148867488, + "loss": 1.5875, + "step": 5705 + }, + { + "epoch": 0.6018987341772152, + "grad_norm": 0.7021567821502686, + "learning_rate": 0.00052301785022309, + "loss": 1.5456, + "step": 5706 + }, + { + "epoch": 0.6020042194092827, + "grad_norm": 0.5557321906089783, + "learning_rate": 0.0005227785770131737, + "loss": 1.5493, + "step": 5707 + }, + { + "epoch": 0.6021097046413502, + "grad_norm": 0.6765873432159424, + "learning_rate": 0.0005225393292645509, + "loss": 1.5643, + "step": 5708 + }, + { + "epoch": 0.6022151898734177, + "grad_norm": 0.6133869290351868, + "learning_rate": 0.0005223001070040305, + "loss": 1.5629, + "step": 5709 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.7211856842041016, + "learning_rate": 0.0005220609102584185, + "loss": 1.6196, + "step": 5710 + }, + { + "epoch": 0.6024261603375527, + "grad_norm": 0.5591139197349548, + "learning_rate": 0.0005218217390545181, + "loss": 1.5964, + "step": 5711 + }, + { + "epoch": 0.6025316455696202, + "grad_norm": 0.6369987726211548, + "learning_rate": 0.0005215825934191293, + "loss": 1.5674, + "step": 5712 + }, + { + "epoch": 0.6026371308016878, + "grad_norm": 0.6540602445602417, + "learning_rate": 0.0005213434733790503, + "loss": 1.5805, + "step": 5713 + }, + { + "epoch": 0.6027426160337552, + "grad_norm": 0.6624118685722351, + "learning_rate": 0.0005211043789610752, + "loss": 1.5659, + "step": 5714 + }, + { + "epoch": 0.6028481012658228, + "grad_norm": 0.641331136226654, + "learning_rate": 0.0005208653101919959, + "loss": 1.5553, + "step": 5715 + }, + { + "epoch": 0.6029535864978903, + "grad_norm": 0.6043226718902588, + "learning_rate": 0.0005206262670986012, + "loss": 1.5977, + "step": 5716 + }, + { + "epoch": 0.6030590717299578, + "grad_norm": 0.5988179445266724, + "learning_rate": 0.0005203872497076768, + "loss": 1.6017, + "step": 5717 + }, + { + "epoch": 0.6031645569620253, + "grad_norm": 0.6116716265678406, + "learning_rate": 0.0005201482580460063, + "loss": 1.576, + "step": 5718 + }, + { + "epoch": 0.6032700421940929, + "grad_norm": 0.5721542835235596, + "learning_rate": 0.0005199092921403696, + "loss": 1.5644, + "step": 5719 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5955446362495422, + "learning_rate": 0.0005196703520175437, + "loss": 1.5723, + "step": 5720 + }, + { + "epoch": 0.6034810126582278, + "grad_norm": 0.6101672053337097, + "learning_rate": 0.0005194314377043037, + "loss": 1.5622, + "step": 5721 + }, + { + "epoch": 0.6035864978902954, + "grad_norm": 0.584173858165741, + "learning_rate": 0.0005191925492274205, + "loss": 1.5712, + "step": 5722 + }, + { + "epoch": 0.6036919831223628, + "grad_norm": 0.6472320556640625, + "learning_rate": 0.0005189536866136634, + "loss": 1.5625, + "step": 5723 + }, + { + "epoch": 0.6037974683544304, + "grad_norm": 0.5869910717010498, + "learning_rate": 0.0005187148498897977, + "loss": 1.5966, + "step": 5724 + }, + { + "epoch": 0.6039029535864979, + "grad_norm": 0.6504735946655273, + "learning_rate": 0.0005184760390825865, + "loss": 1.5567, + "step": 5725 + }, + { + "epoch": 0.6040084388185654, + "grad_norm": 0.6114180088043213, + "learning_rate": 0.0005182372542187895, + "loss": 1.5746, + "step": 5726 + }, + { + "epoch": 0.6041139240506329, + "grad_norm": 0.5962209105491638, + "learning_rate": 0.0005179984953251639, + "loss": 1.5559, + "step": 5727 + }, + { + "epoch": 0.6042194092827005, + "grad_norm": 0.7660477161407471, + "learning_rate": 0.0005177597624284637, + "loss": 1.5636, + "step": 5728 + }, + { + "epoch": 0.6043248945147679, + "grad_norm": 0.573400616645813, + "learning_rate": 0.00051752105555544, + "loss": 1.5726, + "step": 5729 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.6024017930030823, + "learning_rate": 0.0005172823747328415, + "loss": 1.5707, + "step": 5730 + }, + { + "epoch": 0.604535864978903, + "grad_norm": 0.6109668016433716, + "learning_rate": 0.0005170437199874132, + "loss": 1.5729, + "step": 5731 + }, + { + "epoch": 0.6046413502109704, + "grad_norm": 0.6515042781829834, + "learning_rate": 0.0005168050913458977, + "loss": 1.5567, + "step": 5732 + }, + { + "epoch": 0.604746835443038, + "grad_norm": 0.6277028322219849, + "learning_rate": 0.0005165664888350347, + "loss": 1.5752, + "step": 5733 + }, + { + "epoch": 0.6048523206751055, + "grad_norm": 0.7467052340507507, + "learning_rate": 0.0005163279124815605, + "loss": 1.545, + "step": 5734 + }, + { + "epoch": 0.604957805907173, + "grad_norm": 0.7250126004219055, + "learning_rate": 0.000516089362312209, + "loss": 1.563, + "step": 5735 + }, + { + "epoch": 0.6050632911392405, + "grad_norm": 0.8132262825965881, + "learning_rate": 0.0005158508383537109, + "loss": 1.5935, + "step": 5736 + }, + { + "epoch": 0.6051687763713081, + "grad_norm": 0.7621058821678162, + "learning_rate": 0.0005156123406327938, + "loss": 1.569, + "step": 5737 + }, + { + "epoch": 0.6052742616033755, + "grad_norm": 0.7192016243934631, + "learning_rate": 0.0005153738691761826, + "loss": 1.5729, + "step": 5738 + }, + { + "epoch": 0.605379746835443, + "grad_norm": 0.5807883143424988, + "learning_rate": 0.0005151354240105994, + "loss": 1.6141, + "step": 5739 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.6687362194061279, + "learning_rate": 0.0005148970051627632, + "loss": 1.5516, + "step": 5740 + }, + { + "epoch": 0.605590717299578, + "grad_norm": 0.5762607455253601, + "learning_rate": 0.0005146586126593898, + "loss": 1.5507, + "step": 5741 + }, + { + "epoch": 0.6056962025316456, + "grad_norm": 0.6813618540763855, + "learning_rate": 0.0005144202465271922, + "loss": 1.5763, + "step": 5742 + }, + { + "epoch": 0.6058016877637131, + "grad_norm": 0.5924825668334961, + "learning_rate": 0.000514181906792881, + "loss": 1.5255, + "step": 5743 + }, + { + "epoch": 0.6059071729957806, + "grad_norm": 0.6279429793357849, + "learning_rate": 0.0005139435934831628, + "loss": 1.5415, + "step": 5744 + }, + { + "epoch": 0.6060126582278481, + "grad_norm": 0.6124259829521179, + "learning_rate": 0.0005137053066247421, + "loss": 1.5336, + "step": 5745 + }, + { + "epoch": 0.6061181434599157, + "grad_norm": 0.6109564900398254, + "learning_rate": 0.00051346704624432, + "loss": 1.5566, + "step": 5746 + }, + { + "epoch": 0.6062236286919831, + "grad_norm": 0.762710690498352, + "learning_rate": 0.000513228812368595, + "loss": 1.5472, + "step": 5747 + }, + { + "epoch": 0.6063291139240506, + "grad_norm": 0.605510413646698, + "learning_rate": 0.0005129906050242622, + "loss": 1.5551, + "step": 5748 + }, + { + "epoch": 0.6064345991561182, + "grad_norm": 0.6946987509727478, + "learning_rate": 0.0005127524242380139, + "loss": 1.5714, + "step": 5749 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.6807785034179688, + "learning_rate": 0.0005125142700365394, + "loss": 1.5069, + "step": 5750 + }, + { + "epoch": 0.6066455696202532, + "grad_norm": 0.5985965728759766, + "learning_rate": 0.0005122761424465254, + "loss": 1.5175, + "step": 5751 + }, + { + "epoch": 0.6067510548523207, + "grad_norm": 0.5997193455696106, + "learning_rate": 0.0005120380414946546, + "loss": 1.5355, + "step": 5752 + }, + { + "epoch": 0.6068565400843882, + "grad_norm": 0.6735415458679199, + "learning_rate": 0.0005117999672076081, + "loss": 1.5742, + "step": 5753 + }, + { + "epoch": 0.6069620253164557, + "grad_norm": 0.6597728729248047, + "learning_rate": 0.0005115619196120632, + "loss": 1.5559, + "step": 5754 + }, + { + "epoch": 0.6070675105485233, + "grad_norm": 0.6316596865653992, + "learning_rate": 0.0005113238987346939, + "loss": 1.5814, + "step": 5755 + }, + { + "epoch": 0.6071729957805907, + "grad_norm": 0.705649733543396, + "learning_rate": 0.000511085904602172, + "loss": 1.541, + "step": 5756 + }, + { + "epoch": 0.6072784810126582, + "grad_norm": 0.6281130909919739, + "learning_rate": 0.0005108479372411658, + "loss": 1.5305, + "step": 5757 + }, + { + "epoch": 0.6073839662447258, + "grad_norm": 0.5703398585319519, + "learning_rate": 0.0005106099966783409, + "loss": 1.5849, + "step": 5758 + }, + { + "epoch": 0.6074894514767932, + "grad_norm": 0.6377915143966675, + "learning_rate": 0.0005103720829403594, + "loss": 1.5667, + "step": 5759 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.6256996393203735, + "learning_rate": 0.000510134196053881, + "loss": 1.5671, + "step": 5760 + }, + { + "epoch": 0.6077004219409282, + "grad_norm": 0.6755868196487427, + "learning_rate": 0.000509896336045562, + "loss": 1.5222, + "step": 5761 + }, + { + "epoch": 0.6078059071729958, + "grad_norm": 0.6294710636138916, + "learning_rate": 0.0005096585029420556, + "loss": 1.5712, + "step": 5762 + }, + { + "epoch": 0.6079113924050633, + "grad_norm": 0.7228021025657654, + "learning_rate": 0.0005094206967700127, + "loss": 1.5599, + "step": 5763 + }, + { + "epoch": 0.6080168776371307, + "grad_norm": 0.5754632353782654, + "learning_rate": 0.0005091829175560801, + "loss": 1.5558, + "step": 5764 + }, + { + "epoch": 0.6081223628691983, + "grad_norm": 0.6941200494766235, + "learning_rate": 0.0005089451653269026, + "loss": 1.5462, + "step": 5765 + }, + { + "epoch": 0.6082278481012658, + "grad_norm": 0.6133466362953186, + "learning_rate": 0.0005087074401091212, + "loss": 1.5861, + "step": 5766 + }, + { + "epoch": 0.6083333333333333, + "grad_norm": 0.7029843926429749, + "learning_rate": 0.0005084697419293746, + "loss": 1.5661, + "step": 5767 + }, + { + "epoch": 0.6084388185654008, + "grad_norm": 0.6528362035751343, + "learning_rate": 0.0005082320708142975, + "loss": 1.5426, + "step": 5768 + }, + { + "epoch": 0.6085443037974684, + "grad_norm": 0.6371585726737976, + "learning_rate": 0.0005079944267905226, + "loss": 1.5587, + "step": 5769 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.7927284240722656, + "learning_rate": 0.0005077568098846789, + "loss": 1.5743, + "step": 5770 + }, + { + "epoch": 0.6087552742616034, + "grad_norm": 0.7361547946929932, + "learning_rate": 0.0005075192201233924, + "loss": 1.5457, + "step": 5771 + }, + { + "epoch": 0.6088607594936709, + "grad_norm": 0.5725402235984802, + "learning_rate": 0.0005072816575332864, + "loss": 1.5707, + "step": 5772 + }, + { + "epoch": 0.6089662447257383, + "grad_norm": 0.6133144497871399, + "learning_rate": 0.0005070441221409811, + "loss": 1.573, + "step": 5773 + }, + { + "epoch": 0.6090717299578059, + "grad_norm": 0.5652174949645996, + "learning_rate": 0.0005068066139730936, + "loss": 1.5575, + "step": 5774 + }, + { + "epoch": 0.6091772151898734, + "grad_norm": 0.5647732019424438, + "learning_rate": 0.0005065691330562375, + "loss": 1.5532, + "step": 5775 + }, + { + "epoch": 0.6092827004219409, + "grad_norm": 0.6053433418273926, + "learning_rate": 0.0005063316794170239, + "loss": 1.5886, + "step": 5776 + }, + { + "epoch": 0.6093881856540084, + "grad_norm": 0.5933833718299866, + "learning_rate": 0.0005060942530820607, + "loss": 1.561, + "step": 5777 + }, + { + "epoch": 0.609493670886076, + "grad_norm": 0.5971875190734863, + "learning_rate": 0.0005058568540779526, + "loss": 1.5825, + "step": 5778 + }, + { + "epoch": 0.6095991561181434, + "grad_norm": 0.6117184162139893, + "learning_rate": 0.0005056194824313015, + "loss": 1.568, + "step": 5779 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.5883001089096069, + "learning_rate": 0.000505382138168706, + "loss": 1.5758, + "step": 5780 + }, + { + "epoch": 0.6098101265822785, + "grad_norm": 0.5699877738952637, + "learning_rate": 0.0005051448213167614, + "loss": 1.5663, + "step": 5781 + }, + { + "epoch": 0.609915611814346, + "grad_norm": 0.6544675827026367, + "learning_rate": 0.0005049075319020608, + "loss": 1.5689, + "step": 5782 + }, + { + "epoch": 0.6100210970464135, + "grad_norm": 0.6201446056365967, + "learning_rate": 0.0005046702699511933, + "loss": 1.591, + "step": 5783 + }, + { + "epoch": 0.610126582278481, + "grad_norm": 0.5834605097770691, + "learning_rate": 0.0005044330354907454, + "loss": 1.5963, + "step": 5784 + }, + { + "epoch": 0.6102320675105485, + "grad_norm": 0.5694079995155334, + "learning_rate": 0.0005041958285473005, + "loss": 1.5475, + "step": 5785 + }, + { + "epoch": 0.610337552742616, + "grad_norm": 0.6714529395103455, + "learning_rate": 0.0005039586491474386, + "loss": 1.5292, + "step": 5786 + }, + { + "epoch": 0.6104430379746836, + "grad_norm": 0.6094448566436768, + "learning_rate": 0.000503721497317737, + "loss": 1.537, + "step": 5787 + }, + { + "epoch": 0.610548523206751, + "grad_norm": 0.5809205174446106, + "learning_rate": 0.0005034843730847696, + "loss": 1.5523, + "step": 5788 + }, + { + "epoch": 0.6106540084388186, + "grad_norm": 0.5702049732208252, + "learning_rate": 0.0005032472764751074, + "loss": 1.5756, + "step": 5789 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.6222290992736816, + "learning_rate": 0.0005030102075153181, + "loss": 1.5449, + "step": 5790 + }, + { + "epoch": 0.6108649789029535, + "grad_norm": 0.5969006419181824, + "learning_rate": 0.000502773166231967, + "loss": 1.5924, + "step": 5791 + }, + { + "epoch": 0.6109704641350211, + "grad_norm": 0.6937646865844727, + "learning_rate": 0.0005025361526516151, + "loss": 1.5663, + "step": 5792 + }, + { + "epoch": 0.6110759493670886, + "grad_norm": 0.6507700681686401, + "learning_rate": 0.0005022991668008216, + "loss": 1.5908, + "step": 5793 + }, + { + "epoch": 0.6111814345991561, + "grad_norm": 0.5988737940788269, + "learning_rate": 0.0005020622087061415, + "loss": 1.5765, + "step": 5794 + }, + { + "epoch": 0.6112869198312236, + "grad_norm": 0.5899956226348877, + "learning_rate": 0.0005018252783941273, + "loss": 1.5485, + "step": 5795 + }, + { + "epoch": 0.6113924050632912, + "grad_norm": 0.6970071196556091, + "learning_rate": 0.0005015883758913281, + "loss": 1.5553, + "step": 5796 + }, + { + "epoch": 0.6114978902953586, + "grad_norm": 0.6216375827789307, + "learning_rate": 0.0005013515012242901, + "loss": 1.5689, + "step": 5797 + }, + { + "epoch": 0.6116033755274262, + "grad_norm": 0.6080371141433716, + "learning_rate": 0.0005011146544195559, + "loss": 1.5311, + "step": 5798 + }, + { + "epoch": 0.6117088607594937, + "grad_norm": 0.5661231875419617, + "learning_rate": 0.000500877835503666, + "loss": 1.5632, + "step": 5799 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.6525977253913879, + "learning_rate": 0.0005006410445031569, + "loss": 1.573, + "step": 5800 + }, + { + "epoch": 0.6119198312236287, + "grad_norm": 0.5273959040641785, + "learning_rate": 0.0005004042814445622, + "loss": 1.58, + "step": 5801 + }, + { + "epoch": 0.6120253164556962, + "grad_norm": 0.6799975037574768, + "learning_rate": 0.0005001675463544125, + "loss": 1.5597, + "step": 5802 + }, + { + "epoch": 0.6121308016877637, + "grad_norm": 0.5645708441734314, + "learning_rate": 0.0004999308392592349, + "loss": 1.5427, + "step": 5803 + }, + { + "epoch": 0.6122362869198312, + "grad_norm": 0.5852965116500854, + "learning_rate": 0.0004996941601855536, + "loss": 1.5677, + "step": 5804 + }, + { + "epoch": 0.6123417721518988, + "grad_norm": 0.5655395984649658, + "learning_rate": 0.0004994575091598898, + "loss": 1.54, + "step": 5805 + }, + { + "epoch": 0.6124472573839662, + "grad_norm": 0.6047569513320923, + "learning_rate": 0.0004992208862087616, + "loss": 1.598, + "step": 5806 + }, + { + "epoch": 0.6125527426160338, + "grad_norm": 0.604585587978363, + "learning_rate": 0.0004989842913586832, + "loss": 1.5903, + "step": 5807 + }, + { + "epoch": 0.6126582278481013, + "grad_norm": 0.5523398518562317, + "learning_rate": 0.000498747724636167, + "loss": 1.5836, + "step": 5808 + }, + { + "epoch": 0.6127637130801687, + "grad_norm": 0.5443519949913025, + "learning_rate": 0.000498511186067721, + "loss": 1.5911, + "step": 5809 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.5988107323646545, + "learning_rate": 0.0004982746756798507, + "loss": 1.5723, + "step": 5810 + }, + { + "epoch": 0.6129746835443038, + "grad_norm": 0.6448413133621216, + "learning_rate": 0.0004980381934990583, + "loss": 1.6084, + "step": 5811 + }, + { + "epoch": 0.6130801687763713, + "grad_norm": 0.5236389636993408, + "learning_rate": 0.0004978017395518425, + "loss": 1.5691, + "step": 5812 + }, + { + "epoch": 0.6131856540084388, + "grad_norm": 0.6038722395896912, + "learning_rate": 0.0004975653138646994, + "loss": 1.559, + "step": 5813 + }, + { + "epoch": 0.6132911392405064, + "grad_norm": 0.5506672859191895, + "learning_rate": 0.0004973289164641217, + "loss": 1.5645, + "step": 5814 + }, + { + "epoch": 0.6133966244725738, + "grad_norm": 0.5575199723243713, + "learning_rate": 0.0004970925473765988, + "loss": 1.6049, + "step": 5815 + }, + { + "epoch": 0.6135021097046414, + "grad_norm": 0.5511023998260498, + "learning_rate": 0.0004968562066286168, + "loss": 1.5994, + "step": 5816 + }, + { + "epoch": 0.6136075949367089, + "grad_norm": 0.5366103649139404, + "learning_rate": 0.0004966198942466595, + "loss": 1.5741, + "step": 5817 + }, + { + "epoch": 0.6137130801687763, + "grad_norm": 0.5705751776695251, + "learning_rate": 0.0004963836102572065, + "loss": 1.5945, + "step": 5818 + }, + { + "epoch": 0.6138185654008439, + "grad_norm": 0.6171708106994629, + "learning_rate": 0.0004961473546867346, + "loss": 1.5733, + "step": 5819 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.6564912796020508, + "learning_rate": 0.0004959111275617174, + "loss": 1.5862, + "step": 5820 + }, + { + "epoch": 0.6140295358649789, + "grad_norm": 0.7394846081733704, + "learning_rate": 0.0004956749289086254, + "loss": 1.6006, + "step": 5821 + }, + { + "epoch": 0.6141350210970464, + "grad_norm": 0.6273571848869324, + "learning_rate": 0.0004954387587539257, + "loss": 1.5536, + "step": 5822 + }, + { + "epoch": 0.614240506329114, + "grad_norm": 0.6879159808158875, + "learning_rate": 0.0004952026171240826, + "loss": 1.5571, + "step": 5823 + }, + { + "epoch": 0.6143459915611814, + "grad_norm": 0.5817408561706543, + "learning_rate": 0.0004949665040455566, + "loss": 1.567, + "step": 5824 + }, + { + "epoch": 0.614451476793249, + "grad_norm": 0.6883964538574219, + "learning_rate": 0.0004947304195448052, + "loss": 1.5635, + "step": 5825 + }, + { + "epoch": 0.6145569620253165, + "grad_norm": 0.6382464170455933, + "learning_rate": 0.0004944943636482836, + "loss": 1.5625, + "step": 5826 + }, + { + "epoch": 0.614662447257384, + "grad_norm": 0.5761606097221375, + "learning_rate": 0.0004942583363824428, + "loss": 1.5679, + "step": 5827 + }, + { + "epoch": 0.6147679324894515, + "grad_norm": 0.618432879447937, + "learning_rate": 0.0004940223377737304, + "loss": 1.5435, + "step": 5828 + }, + { + "epoch": 0.6148734177215189, + "grad_norm": 0.6196099519729614, + "learning_rate": 0.0004937863678485915, + "loss": 1.5745, + "step": 5829 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.6006761193275452, + "learning_rate": 0.0004935504266334677, + "loss": 1.5461, + "step": 5830 + }, + { + "epoch": 0.615084388185654, + "grad_norm": 0.6835829019546509, + "learning_rate": 0.0004933145141547975, + "loss": 1.5723, + "step": 5831 + }, + { + "epoch": 0.6151898734177215, + "grad_norm": 0.5806536078453064, + "learning_rate": 0.0004930786304390158, + "loss": 1.5964, + "step": 5832 + }, + { + "epoch": 0.615295358649789, + "grad_norm": 0.6028302907943726, + "learning_rate": 0.0004928427755125544, + "loss": 1.5518, + "step": 5833 + }, + { + "epoch": 0.6154008438818566, + "grad_norm": 0.7370254993438721, + "learning_rate": 0.0004926069494018427, + "loss": 1.5819, + "step": 5834 + }, + { + "epoch": 0.615506329113924, + "grad_norm": 0.5884962677955627, + "learning_rate": 0.0004923711521333056, + "loss": 1.5659, + "step": 5835 + }, + { + "epoch": 0.6156118143459915, + "grad_norm": 0.7463118433952332, + "learning_rate": 0.0004921353837333657, + "loss": 1.6063, + "step": 5836 + }, + { + "epoch": 0.6157172995780591, + "grad_norm": 0.6625201106071472, + "learning_rate": 0.0004918996442284419, + "loss": 1.5493, + "step": 5837 + }, + { + "epoch": 0.6158227848101265, + "grad_norm": 0.6132204532623291, + "learning_rate": 0.0004916639336449499, + "loss": 1.5991, + "step": 5838 + }, + { + "epoch": 0.6159282700421941, + "grad_norm": 0.725536048412323, + "learning_rate": 0.0004914282520093023, + "loss": 1.5749, + "step": 5839 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.543655276298523, + "learning_rate": 0.0004911925993479085, + "loss": 1.603, + "step": 5840 + }, + { + "epoch": 0.6161392405063291, + "grad_norm": 0.5997471809387207, + "learning_rate": 0.0004909569756871745, + "loss": 1.5887, + "step": 5841 + }, + { + "epoch": 0.6162447257383966, + "grad_norm": 0.6932569146156311, + "learning_rate": 0.0004907213810535026, + "loss": 1.6006, + "step": 5842 + }, + { + "epoch": 0.6163502109704642, + "grad_norm": 0.6007352471351624, + "learning_rate": 0.0004904858154732932, + "loss": 1.5304, + "step": 5843 + }, + { + "epoch": 0.6164556962025316, + "grad_norm": 0.6271460652351379, + "learning_rate": 0.0004902502789729424, + "loss": 1.5785, + "step": 5844 + }, + { + "epoch": 0.6165611814345991, + "grad_norm": 0.5174399614334106, + "learning_rate": 0.0004900147715788429, + "loss": 1.5852, + "step": 5845 + }, + { + "epoch": 0.6166666666666667, + "grad_norm": 0.6116039752960205, + "learning_rate": 0.0004897792933173847, + "loss": 1.5589, + "step": 5846 + }, + { + "epoch": 0.6167721518987341, + "grad_norm": 0.6240472197532654, + "learning_rate": 0.0004895438442149542, + "loss": 1.5761, + "step": 5847 + }, + { + "epoch": 0.6168776371308017, + "grad_norm": 0.6475279331207275, + "learning_rate": 0.0004893084242979348, + "loss": 1.6005, + "step": 5848 + }, + { + "epoch": 0.6169831223628692, + "grad_norm": 0.6716201901435852, + "learning_rate": 0.0004890730335927063, + "loss": 1.6277, + "step": 5849 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.7113438248634338, + "learning_rate": 0.0004888376721256456, + "loss": 1.572, + "step": 5850 + }, + { + "epoch": 0.6171940928270042, + "grad_norm": 0.6152970790863037, + "learning_rate": 0.0004886023399231255, + "loss": 1.5777, + "step": 5851 + }, + { + "epoch": 0.6172995780590718, + "grad_norm": 0.8844988346099854, + "learning_rate": 0.0004883670370115173, + "loss": 1.5589, + "step": 5852 + }, + { + "epoch": 0.6174050632911392, + "grad_norm": 0.6432408690452576, + "learning_rate": 0.00048813176341718693, + "loss": 1.5552, + "step": 5853 + }, + { + "epoch": 0.6175105485232067, + "grad_norm": 0.7211397290229797, + "learning_rate": 0.0004878965191664983, + "loss": 1.5342, + "step": 5854 + }, + { + "epoch": 0.6176160337552743, + "grad_norm": 0.673200786113739, + "learning_rate": 0.0004876613042858118, + "loss": 1.6159, + "step": 5855 + }, + { + "epoch": 0.6177215189873417, + "grad_norm": 0.714525580406189, + "learning_rate": 0.0004874261188014842, + "loss": 1.5703, + "step": 5856 + }, + { + "epoch": 0.6178270042194093, + "grad_norm": 0.6501107811927795, + "learning_rate": 0.00048719096273986925, + "loss": 1.5457, + "step": 5857 + }, + { + "epoch": 0.6179324894514768, + "grad_norm": 0.6375983953475952, + "learning_rate": 0.0004869558361273175, + "loss": 1.5744, + "step": 5858 + }, + { + "epoch": 0.6180379746835443, + "grad_norm": 0.5836599469184875, + "learning_rate": 0.00048672073899017564, + "loss": 1.5832, + "step": 5859 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.683696985244751, + "learning_rate": 0.00048648567135478805, + "loss": 1.5733, + "step": 5860 + }, + { + "epoch": 0.6182489451476794, + "grad_norm": 0.6045279502868652, + "learning_rate": 0.0004862506332474951, + "loss": 1.6054, + "step": 5861 + }, + { + "epoch": 0.6183544303797468, + "grad_norm": 0.6851102709770203, + "learning_rate": 0.0004860156246946338, + "loss": 1.6371, + "step": 5862 + }, + { + "epoch": 0.6184599156118143, + "grad_norm": 0.6212549209594727, + "learning_rate": 0.0004857806457225381, + "loss": 1.5346, + "step": 5863 + }, + { + "epoch": 0.6185654008438819, + "grad_norm": 0.7066507935523987, + "learning_rate": 0.00048554569635753857, + "loss": 1.534, + "step": 5864 + }, + { + "epoch": 0.6186708860759493, + "grad_norm": 0.6905725598335266, + "learning_rate": 0.00048531077662596246, + "loss": 1.5689, + "step": 5865 + }, + { + "epoch": 0.6187763713080169, + "grad_norm": 0.6411884427070618, + "learning_rate": 0.00048507588655413367, + "loss": 1.5785, + "step": 5866 + }, + { + "epoch": 0.6188818565400844, + "grad_norm": 0.657441258430481, + "learning_rate": 0.00048484102616837277, + "loss": 1.5136, + "step": 5867 + }, + { + "epoch": 0.6189873417721519, + "grad_norm": 0.5806869864463806, + "learning_rate": 0.000484606195494997, + "loss": 1.5494, + "step": 5868 + }, + { + "epoch": 0.6190928270042194, + "grad_norm": 0.594558596611023, + "learning_rate": 0.0004843713945603205, + "loss": 1.5738, + "step": 5869 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.578087329864502, + "learning_rate": 0.0004841366233906538, + "loss": 1.5587, + "step": 5870 + }, + { + "epoch": 0.6193037974683544, + "grad_norm": 0.5507726669311523, + "learning_rate": 0.0004839018820123042, + "loss": 1.5986, + "step": 5871 + }, + { + "epoch": 0.619409282700422, + "grad_norm": 0.6133553981781006, + "learning_rate": 0.0004836671704515756, + "loss": 1.5608, + "step": 5872 + }, + { + "epoch": 0.6195147679324895, + "grad_norm": 0.6282194256782532, + "learning_rate": 0.00048343248873476853, + "loss": 1.547, + "step": 5873 + }, + { + "epoch": 0.6196202531645569, + "grad_norm": 0.5727047324180603, + "learning_rate": 0.00048319783688818043, + "loss": 1.5512, + "step": 5874 + }, + { + "epoch": 0.6197257383966245, + "grad_norm": 0.6326066851615906, + "learning_rate": 0.00048296321493810507, + "loss": 1.6262, + "step": 5875 + }, + { + "epoch": 0.619831223628692, + "grad_norm": 0.5512299537658691, + "learning_rate": 0.0004827286229108331, + "loss": 1.5369, + "step": 5876 + }, + { + "epoch": 0.6199367088607595, + "grad_norm": 0.6149318218231201, + "learning_rate": 0.00048249406083265123, + "loss": 1.5847, + "step": 5877 + }, + { + "epoch": 0.620042194092827, + "grad_norm": 0.7048386335372925, + "learning_rate": 0.0004822595287298442, + "loss": 1.5782, + "step": 5878 + }, + { + "epoch": 0.6201476793248946, + "grad_norm": 0.6243412494659424, + "learning_rate": 0.00048202502662869195, + "loss": 1.553, + "step": 5879 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.6227597594261169, + "learning_rate": 0.0004817905545554717, + "loss": 1.5674, + "step": 5880 + }, + { + "epoch": 0.6203586497890295, + "grad_norm": 0.6929152011871338, + "learning_rate": 0.00048155611253645727, + "loss": 1.5572, + "step": 5881 + }, + { + "epoch": 0.6204641350210971, + "grad_norm": 0.6137037873268127, + "learning_rate": 0.0004813217005979191, + "loss": 1.5668, + "step": 5882 + }, + { + "epoch": 0.6205696202531645, + "grad_norm": 0.9237692952156067, + "learning_rate": 0.000481087318766124, + "loss": 1.5446, + "step": 5883 + }, + { + "epoch": 0.6206751054852321, + "grad_norm": 0.570734977722168, + "learning_rate": 0.0004808529670673358, + "loss": 1.5348, + "step": 5884 + }, + { + "epoch": 0.6207805907172996, + "grad_norm": 0.6722893714904785, + "learning_rate": 0.00048061864552781456, + "loss": 1.5519, + "step": 5885 + }, + { + "epoch": 0.6208860759493671, + "grad_norm": 0.623112678527832, + "learning_rate": 0.0004803843541738173, + "loss": 1.5986, + "step": 5886 + }, + { + "epoch": 0.6209915611814346, + "grad_norm": 0.683282732963562, + "learning_rate": 0.0004801500930315978, + "loss": 1.5784, + "step": 5887 + }, + { + "epoch": 0.6210970464135022, + "grad_norm": 0.675360918045044, + "learning_rate": 0.000479915862127406, + "loss": 1.5432, + "step": 5888 + }, + { + "epoch": 0.6212025316455696, + "grad_norm": 0.6177393198013306, + "learning_rate": 0.0004796816614874885, + "loss": 1.5743, + "step": 5889 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.7872042059898376, + "learning_rate": 0.00047944749113808884, + "loss": 1.6027, + "step": 5890 + }, + { + "epoch": 0.6214135021097047, + "grad_norm": 0.6276216506958008, + "learning_rate": 0.0004792133511054469, + "loss": 1.5378, + "step": 5891 + }, + { + "epoch": 0.6215189873417721, + "grad_norm": 0.9056515693664551, + "learning_rate": 0.0004789792414157992, + "loss": 1.5793, + "step": 5892 + }, + { + "epoch": 0.6216244725738397, + "grad_norm": 0.7419778108596802, + "learning_rate": 0.000478745162095379, + "loss": 1.5338, + "step": 5893 + }, + { + "epoch": 0.6217299578059071, + "grad_norm": 0.6927601099014282, + "learning_rate": 0.0004785111131704157, + "loss": 1.5412, + "step": 5894 + }, + { + "epoch": 0.6218354430379747, + "grad_norm": 0.7056891918182373, + "learning_rate": 0.0004782770946671362, + "loss": 1.5764, + "step": 5895 + }, + { + "epoch": 0.6219409282700422, + "grad_norm": 0.6146629452705383, + "learning_rate": 0.0004780431066117629, + "loss": 1.5696, + "step": 5896 + }, + { + "epoch": 0.6220464135021097, + "grad_norm": 0.6017658710479736, + "learning_rate": 0.0004778091490305159, + "loss": 1.6021, + "step": 5897 + }, + { + "epoch": 0.6221518987341772, + "grad_norm": 0.6696279644966125, + "learning_rate": 0.0004775752219496109, + "loss": 1.5846, + "step": 5898 + }, + { + "epoch": 0.6222573839662447, + "grad_norm": 0.6257932186126709, + "learning_rate": 0.00047734132539526086, + "loss": 1.5513, + "step": 5899 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.5601863861083984, + "learning_rate": 0.00047710745939367474, + "loss": 1.5853, + "step": 5900 + }, + { + "epoch": 0.6224683544303797, + "grad_norm": 0.6134058833122253, + "learning_rate": 0.00047687362397105863, + "loss": 1.5538, + "step": 5901 + }, + { + "epoch": 0.6225738396624473, + "grad_norm": 0.6484968066215515, + "learning_rate": 0.0004766398191536149, + "loss": 1.6029, + "step": 5902 + }, + { + "epoch": 0.6226793248945147, + "grad_norm": 0.613743782043457, + "learning_rate": 0.00047640604496754235, + "loss": 1.5385, + "step": 5903 + }, + { + "epoch": 0.6227848101265823, + "grad_norm": 0.6624789237976074, + "learning_rate": 0.000476172301439037, + "loss": 1.5673, + "step": 5904 + }, + { + "epoch": 0.6228902953586498, + "grad_norm": 0.6676670908927917, + "learning_rate": 0.00047593858859429035, + "loss": 1.5341, + "step": 5905 + }, + { + "epoch": 0.6229957805907173, + "grad_norm": 0.6060507297515869, + "learning_rate": 0.00047570490645949175, + "loss": 1.5667, + "step": 5906 + }, + { + "epoch": 0.6231012658227848, + "grad_norm": 0.6658929586410522, + "learning_rate": 0.000475471255060826, + "loss": 1.5319, + "step": 5907 + }, + { + "epoch": 0.6232067510548523, + "grad_norm": 0.7561147212982178, + "learning_rate": 0.0004752376344244752, + "loss": 1.5724, + "step": 5908 + }, + { + "epoch": 0.6233122362869198, + "grad_norm": 0.7451354265213013, + "learning_rate": 0.00047500404457661747, + "loss": 1.5627, + "step": 5909 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.8997847437858582, + "learning_rate": 0.0004747704855434278, + "loss": 1.5628, + "step": 5910 + }, + { + "epoch": 0.6235232067510549, + "grad_norm": 0.7395223379135132, + "learning_rate": 0.0004745369573510775, + "loss": 1.5262, + "step": 5911 + }, + { + "epoch": 0.6236286919831223, + "grad_norm": 0.9911643862724304, + "learning_rate": 0.0004743034600257348, + "loss": 1.595, + "step": 5912 + }, + { + "epoch": 0.6237341772151899, + "grad_norm": 0.6377283930778503, + "learning_rate": 0.0004740699935935643, + "loss": 1.5432, + "step": 5913 + }, + { + "epoch": 0.6238396624472574, + "grad_norm": 0.9815446138381958, + "learning_rate": 0.0004738365580807268, + "loss": 1.539, + "step": 5914 + }, + { + "epoch": 0.6239451476793249, + "grad_norm": 0.6834763288497925, + "learning_rate": 0.0004736031535133799, + "loss": 1.5503, + "step": 5915 + }, + { + "epoch": 0.6240506329113924, + "grad_norm": 0.8165510296821594, + "learning_rate": 0.0004733697799176781, + "loss": 1.5507, + "step": 5916 + }, + { + "epoch": 0.62415611814346, + "grad_norm": 0.7808812260627747, + "learning_rate": 0.0004731364373197718, + "loss": 1.5839, + "step": 5917 + }, + { + "epoch": 0.6242616033755274, + "grad_norm": 0.5995807647705078, + "learning_rate": 0.00047290312574580835, + "loss": 1.5635, + "step": 5918 + }, + { + "epoch": 0.6243670886075949, + "grad_norm": 0.9479121565818787, + "learning_rate": 0.00047266984522193134, + "loss": 1.5597, + "step": 5919 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.8294240236282349, + "learning_rate": 0.0004724365957742809, + "loss": 1.5539, + "step": 5920 + }, + { + "epoch": 0.6245780590717299, + "grad_norm": 0.8868148922920227, + "learning_rate": 0.0004722033774289941, + "loss": 1.5796, + "step": 5921 + }, + { + "epoch": 0.6246835443037975, + "grad_norm": 0.7852480411529541, + "learning_rate": 0.0004719701902122041, + "loss": 1.5443, + "step": 5922 + }, + { + "epoch": 0.624789029535865, + "grad_norm": 0.6822193264961243, + "learning_rate": 0.00047173703415004066, + "loss": 1.5857, + "step": 5923 + }, + { + "epoch": 0.6248945147679325, + "grad_norm": 0.8959804177284241, + "learning_rate": 0.0004715039092686302, + "loss": 1.5876, + "step": 5924 + }, + { + "epoch": 0.625, + "grad_norm": 0.6856427788734436, + "learning_rate": 0.0004712708155940951, + "loss": 1.5729, + "step": 5925 + }, + { + "epoch": 0.6251054852320675, + "grad_norm": 0.855680525302887, + "learning_rate": 0.0004710377531525552, + "loss": 1.5098, + "step": 5926 + }, + { + "epoch": 0.625210970464135, + "grad_norm": 0.7345057129859924, + "learning_rate": 0.000470804721970126, + "loss": 1.5704, + "step": 5927 + }, + { + "epoch": 0.6253164556962025, + "grad_norm": 0.9761554002761841, + "learning_rate": 0.00047057172207292004, + "loss": 1.5613, + "step": 5928 + }, + { + "epoch": 0.6254219409282701, + "grad_norm": 0.8558339476585388, + "learning_rate": 0.00047033875348704576, + "loss": 1.5541, + "step": 5929 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.9225606322288513, + "learning_rate": 0.00047010581623860883, + "loss": 1.5799, + "step": 5930 + }, + { + "epoch": 0.6256329113924051, + "grad_norm": 0.7045841813087463, + "learning_rate": 0.0004698729103537109, + "loss": 1.5716, + "step": 5931 + }, + { + "epoch": 0.6257383966244726, + "grad_norm": 0.771682858467102, + "learning_rate": 0.0004696400358584501, + "loss": 1.5868, + "step": 5932 + }, + { + "epoch": 0.62584388185654, + "grad_norm": 0.6325017809867859, + "learning_rate": 0.00046940719277892143, + "loss": 1.5926, + "step": 5933 + }, + { + "epoch": 0.6259493670886076, + "grad_norm": 0.6787639260292053, + "learning_rate": 0.0004691743811412159, + "loss": 1.5792, + "step": 5934 + }, + { + "epoch": 0.6260548523206751, + "grad_norm": 0.6354299187660217, + "learning_rate": 0.00046894160097142113, + "loss": 1.5712, + "step": 5935 + }, + { + "epoch": 0.6261603375527426, + "grad_norm": 0.6665228009223938, + "learning_rate": 0.00046870885229562153, + "loss": 1.5658, + "step": 5936 + }, + { + "epoch": 0.6262658227848101, + "grad_norm": 0.6688371896743774, + "learning_rate": 0.0004684761351398976, + "loss": 1.5702, + "step": 5937 + }, + { + "epoch": 0.6263713080168777, + "grad_norm": 0.6828647255897522, + "learning_rate": 0.0004682434495303267, + "loss": 1.5232, + "step": 5938 + }, + { + "epoch": 0.6264767932489451, + "grad_norm": 0.6108859181404114, + "learning_rate": 0.00046801079549298224, + "loss": 1.5107, + "step": 5939 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.5760031342506409, + "learning_rate": 0.0004677781730539342, + "loss": 1.5973, + "step": 5940 + }, + { + "epoch": 0.6266877637130802, + "grad_norm": 0.6171717643737793, + "learning_rate": 0.00046754558223924926, + "loss": 1.5876, + "step": 5941 + }, + { + "epoch": 0.6267932489451477, + "grad_norm": 0.6343581080436707, + "learning_rate": 0.00046731302307499023, + "loss": 1.5712, + "step": 5942 + }, + { + "epoch": 0.6268987341772152, + "grad_norm": 0.6305285692214966, + "learning_rate": 0.0004670804955872166, + "loss": 1.5177, + "step": 5943 + }, + { + "epoch": 0.6270042194092827, + "grad_norm": 0.628250777721405, + "learning_rate": 0.00046684799980198415, + "loss": 1.5757, + "step": 5944 + }, + { + "epoch": 0.6271097046413502, + "grad_norm": 0.6199836134910583, + "learning_rate": 0.0004666155357453451, + "loss": 1.527, + "step": 5945 + }, + { + "epoch": 0.6272151898734177, + "grad_norm": 0.6349585056304932, + "learning_rate": 0.00046638310344334835, + "loss": 1.5763, + "step": 5946 + }, + { + "epoch": 0.6273206751054853, + "grad_norm": 0.6850860118865967, + "learning_rate": 0.0004661507029220393, + "loss": 1.5491, + "step": 5947 + }, + { + "epoch": 0.6274261603375527, + "grad_norm": 0.6635087728500366, + "learning_rate": 0.0004659183342074594, + "loss": 1.5423, + "step": 5948 + }, + { + "epoch": 0.6275316455696203, + "grad_norm": 0.6082690954208374, + "learning_rate": 0.0004656859973256466, + "loss": 1.5659, + "step": 5949 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.613192617893219, + "learning_rate": 0.0004654536923026356, + "loss": 1.5596, + "step": 5950 + }, + { + "epoch": 0.6277426160337553, + "grad_norm": 0.5723614692687988, + "learning_rate": 0.00046522141916445725, + "loss": 1.6062, + "step": 5951 + }, + { + "epoch": 0.6278481012658228, + "grad_norm": 0.6412534713745117, + "learning_rate": 0.0004649891779371389, + "loss": 1.5705, + "step": 5952 + }, + { + "epoch": 0.6279535864978903, + "grad_norm": 0.61676025390625, + "learning_rate": 0.0004647569686467043, + "loss": 1.5559, + "step": 5953 + }, + { + "epoch": 0.6280590717299578, + "grad_norm": 0.5681227445602417, + "learning_rate": 0.00046452479131917383, + "loss": 1.5431, + "step": 5954 + }, + { + "epoch": 0.6281645569620253, + "grad_norm": 0.7194600701332092, + "learning_rate": 0.0004642926459805636, + "loss": 1.5722, + "step": 5955 + }, + { + "epoch": 0.6282700421940929, + "grad_norm": 0.7490346431732178, + "learning_rate": 0.0004640605326568874, + "loss": 1.5773, + "step": 5956 + }, + { + "epoch": 0.6283755274261603, + "grad_norm": 0.632147490978241, + "learning_rate": 0.00046382845137415437, + "loss": 1.5379, + "step": 5957 + }, + { + "epoch": 0.6284810126582279, + "grad_norm": 0.7884665727615356, + "learning_rate": 0.0004635964021583703, + "loss": 1.548, + "step": 5958 + }, + { + "epoch": 0.6285864978902953, + "grad_norm": 0.689825177192688, + "learning_rate": 0.00046336438503553754, + "loss": 1.5465, + "step": 5959 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.7260953187942505, + "learning_rate": 0.00046313240003165466, + "loss": 1.5786, + "step": 5960 + }, + { + "epoch": 0.6287974683544304, + "grad_norm": 0.6245235800743103, + "learning_rate": 0.00046290044717271685, + "loss": 1.5755, + "step": 5961 + }, + { + "epoch": 0.6289029535864978, + "grad_norm": 0.5960447788238525, + "learning_rate": 0.00046266852648471553, + "loss": 1.5525, + "step": 5962 + }, + { + "epoch": 0.6290084388185654, + "grad_norm": 0.6044137477874756, + "learning_rate": 0.0004624366379936383, + "loss": 1.5805, + "step": 5963 + }, + { + "epoch": 0.6291139240506329, + "grad_norm": 0.5459996461868286, + "learning_rate": 0.00046220478172546997, + "loss": 1.5678, + "step": 5964 + }, + { + "epoch": 0.6292194092827004, + "grad_norm": 0.6141189932823181, + "learning_rate": 0.00046197295770619105, + "loss": 1.5926, + "step": 5965 + }, + { + "epoch": 0.6293248945147679, + "grad_norm": 0.6588041186332703, + "learning_rate": 0.00046174116596177833, + "loss": 1.5633, + "step": 5966 + }, + { + "epoch": 0.6294303797468355, + "grad_norm": 0.682264506816864, + "learning_rate": 0.00046150940651820536, + "loss": 1.5657, + "step": 5967 + }, + { + "epoch": 0.6295358649789029, + "grad_norm": 0.6335310935974121, + "learning_rate": 0.0004612776794014419, + "loss": 1.5259, + "step": 5968 + }, + { + "epoch": 0.6296413502109705, + "grad_norm": 0.6232180595397949, + "learning_rate": 0.00046104598463745424, + "loss": 1.5367, + "step": 5969 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.693080484867096, + "learning_rate": 0.0004608143222522048, + "loss": 1.5826, + "step": 5970 + }, + { + "epoch": 0.6298523206751054, + "grad_norm": 0.5644881129264832, + "learning_rate": 0.00046058269227165256, + "loss": 1.589, + "step": 5971 + }, + { + "epoch": 0.629957805907173, + "grad_norm": 0.6199895739555359, + "learning_rate": 0.0004603510947217526, + "loss": 1.5308, + "step": 5972 + }, + { + "epoch": 0.6300632911392405, + "grad_norm": 0.5811815857887268, + "learning_rate": 0.000460119529628457, + "loss": 1.5715, + "step": 5973 + }, + { + "epoch": 0.630168776371308, + "grad_norm": 0.7022080421447754, + "learning_rate": 0.00045988799701771364, + "loss": 1.5703, + "step": 5974 + }, + { + "epoch": 0.6302742616033755, + "grad_norm": 0.65276700258255, + "learning_rate": 0.0004596564969154668, + "loss": 1.5449, + "step": 5975 + }, + { + "epoch": 0.6303797468354431, + "grad_norm": 0.6739935874938965, + "learning_rate": 0.00045942502934765735, + "loss": 1.5559, + "step": 5976 + }, + { + "epoch": 0.6304852320675105, + "grad_norm": 0.6232617497444153, + "learning_rate": 0.0004591935943402222, + "loss": 1.5251, + "step": 5977 + }, + { + "epoch": 0.630590717299578, + "grad_norm": 0.6152631640434265, + "learning_rate": 0.00045896219191909486, + "loss": 1.5612, + "step": 5978 + }, + { + "epoch": 0.6306962025316456, + "grad_norm": 0.5683629512786865, + "learning_rate": 0.0004587308221102053, + "loss": 1.5089, + "step": 5979 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.6964104175567627, + "learning_rate": 0.0004584994849394795, + "loss": 1.535, + "step": 5980 + }, + { + "epoch": 0.6309071729957806, + "grad_norm": 0.5646963119506836, + "learning_rate": 0.0004582681804328396, + "loss": 1.5649, + "step": 5981 + }, + { + "epoch": 0.6310126582278481, + "grad_norm": 0.688471257686615, + "learning_rate": 0.0004580369086162051, + "loss": 1.5718, + "step": 5982 + }, + { + "epoch": 0.6311181434599156, + "grad_norm": 0.5984242558479309, + "learning_rate": 0.0004578056695154909, + "loss": 1.5252, + "step": 5983 + }, + { + "epoch": 0.6312236286919831, + "grad_norm": 0.6124697327613831, + "learning_rate": 0.0004575744631566083, + "loss": 1.5707, + "step": 5984 + }, + { + "epoch": 0.6313291139240507, + "grad_norm": 0.7241514921188354, + "learning_rate": 0.0004573432895654654, + "loss": 1.5659, + "step": 5985 + }, + { + "epoch": 0.6314345991561181, + "grad_norm": 0.7445733547210693, + "learning_rate": 0.00045711214876796623, + "loss": 1.5776, + "step": 5986 + }, + { + "epoch": 0.6315400843881857, + "grad_norm": 0.589422881603241, + "learning_rate": 0.0004568810407900112, + "loss": 1.5534, + "step": 5987 + }, + { + "epoch": 0.6316455696202532, + "grad_norm": 0.7950220704078674, + "learning_rate": 0.00045664996565749716, + "loss": 1.5788, + "step": 5988 + }, + { + "epoch": 0.6317510548523206, + "grad_norm": 0.6885092854499817, + "learning_rate": 0.00045641892339631703, + "loss": 1.6043, + "step": 5989 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.7248930931091309, + "learning_rate": 0.0004561879140323607, + "loss": 1.5669, + "step": 5990 + }, + { + "epoch": 0.6319620253164557, + "grad_norm": 0.6658524870872498, + "learning_rate": 0.0004559569375915137, + "loss": 1.5708, + "step": 5991 + }, + { + "epoch": 0.6320675105485232, + "grad_norm": 0.6596807241439819, + "learning_rate": 0.00045572599409965804, + "loss": 1.5493, + "step": 5992 + }, + { + "epoch": 0.6321729957805907, + "grad_norm": 0.6867967247962952, + "learning_rate": 0.00045549508358267224, + "loss": 1.6353, + "step": 5993 + }, + { + "epoch": 0.6322784810126583, + "grad_norm": 0.7161076664924622, + "learning_rate": 0.0004552642060664307, + "loss": 1.578, + "step": 5994 + }, + { + "epoch": 0.6323839662447257, + "grad_norm": 0.6772304773330688, + "learning_rate": 0.00045503336157680466, + "loss": 1.5393, + "step": 5995 + }, + { + "epoch": 0.6324894514767933, + "grad_norm": 0.6778499484062195, + "learning_rate": 0.00045480255013966123, + "loss": 1.5709, + "step": 5996 + }, + { + "epoch": 0.6325949367088608, + "grad_norm": 0.6079370975494385, + "learning_rate": 0.00045457177178086407, + "loss": 1.5741, + "step": 5997 + }, + { + "epoch": 0.6327004219409282, + "grad_norm": 0.6524494886398315, + "learning_rate": 0.0004543410265262727, + "loss": 1.556, + "step": 5998 + }, + { + "epoch": 0.6328059071729958, + "grad_norm": 0.6127801537513733, + "learning_rate": 0.000454110314401744, + "loss": 1.5703, + "step": 5999 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.8086773157119751, + "learning_rate": 0.0004538796354331298, + "loss": 1.5788, + "step": 6000 + }, + { + "epoch": 0.6330168776371308, + "grad_norm": 0.5810496211051941, + "learning_rate": 0.0004536489896462792, + "loss": 1.5423, + "step": 6001 + }, + { + "epoch": 0.6331223628691983, + "grad_norm": 0.7158277630805969, + "learning_rate": 0.0004534183770670371, + "loss": 1.5234, + "step": 6002 + }, + { + "epoch": 0.6332278481012659, + "grad_norm": 0.6493390798568726, + "learning_rate": 0.0004531877977212446, + "loss": 1.5809, + "step": 6003 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 0.6289868354797363, + "learning_rate": 0.00045295725163473945, + "loss": 1.605, + "step": 6004 + }, + { + "epoch": 0.6334388185654009, + "grad_norm": 0.808802604675293, + "learning_rate": 0.0004527267388333555, + "loss": 1.5579, + "step": 6005 + }, + { + "epoch": 0.6335443037974684, + "grad_norm": 0.6805428266525269, + "learning_rate": 0.0004524962593429227, + "loss": 1.5564, + "step": 6006 + }, + { + "epoch": 0.6336497890295358, + "grad_norm": 0.80323326587677, + "learning_rate": 0.00045226581318926737, + "loss": 1.5589, + "step": 6007 + }, + { + "epoch": 0.6337552742616034, + "grad_norm": 0.6484524011611938, + "learning_rate": 0.0004520354003982125, + "loss": 1.5587, + "step": 6008 + }, + { + "epoch": 0.6338607594936709, + "grad_norm": 0.7093818187713623, + "learning_rate": 0.00045180502099557686, + "loss": 1.5955, + "step": 6009 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.5919053554534912, + "learning_rate": 0.0004515746750071754, + "loss": 1.5293, + "step": 6010 + }, + { + "epoch": 0.6340717299578059, + "grad_norm": 0.6303244829177856, + "learning_rate": 0.00045134436245881986, + "loss": 1.5385, + "step": 6011 + }, + { + "epoch": 0.6341772151898735, + "grad_norm": 0.646419882774353, + "learning_rate": 0.0004511140833763177, + "loss": 1.5815, + "step": 6012 + }, + { + "epoch": 0.6342827004219409, + "grad_norm": 0.8686279654502869, + "learning_rate": 0.00045088383778547284, + "loss": 1.5835, + "step": 6013 + }, + { + "epoch": 0.6343881856540085, + "grad_norm": 0.6188957095146179, + "learning_rate": 0.0004506536257120856, + "loss": 1.5788, + "step": 6014 + }, + { + "epoch": 0.634493670886076, + "grad_norm": 0.777944028377533, + "learning_rate": 0.0004504234471819518, + "loss": 1.5774, + "step": 6015 + }, + { + "epoch": 0.6345991561181434, + "grad_norm": 0.8736336827278137, + "learning_rate": 0.0004501933022208649, + "loss": 1.5593, + "step": 6016 + }, + { + "epoch": 0.634704641350211, + "grad_norm": 0.7800394892692566, + "learning_rate": 0.00044996319085461353, + "loss": 1.5908, + "step": 6017 + }, + { + "epoch": 0.6348101265822785, + "grad_norm": 1.1416860818862915, + "learning_rate": 0.00044973311310898275, + "loss": 1.5402, + "step": 6018 + }, + { + "epoch": 0.634915611814346, + "grad_norm": 0.6058957576751709, + "learning_rate": 0.00044950306900975377, + "loss": 1.5371, + "step": 6019 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 1.0072643756866455, + "learning_rate": 0.0004492730585827046, + "loss": 1.5902, + "step": 6020 + }, + { + "epoch": 0.6351265822784811, + "grad_norm": 0.586866021156311, + "learning_rate": 0.0004490430818536085, + "loss": 1.5406, + "step": 6021 + }, + { + "epoch": 0.6352320675105485, + "grad_norm": 0.8510637879371643, + "learning_rate": 0.0004488131388482359, + "loss": 1.5229, + "step": 6022 + }, + { + "epoch": 0.635337552742616, + "grad_norm": 0.655327320098877, + "learning_rate": 0.000448583229592353, + "loss": 1.5686, + "step": 6023 + }, + { + "epoch": 0.6354430379746835, + "grad_norm": 0.6442968845367432, + "learning_rate": 0.0004483533541117218, + "loss": 1.5308, + "step": 6024 + }, + { + "epoch": 0.635548523206751, + "grad_norm": 0.7138233780860901, + "learning_rate": 0.0004481235124321018, + "loss": 1.5582, + "step": 6025 + }, + { + "epoch": 0.6356540084388186, + "grad_norm": 0.6493349075317383, + "learning_rate": 0.0004478937045792474, + "loss": 1.5437, + "step": 6026 + }, + { + "epoch": 0.635759493670886, + "grad_norm": 0.5894729495048523, + "learning_rate": 0.00044766393057891, + "loss": 1.5551, + "step": 6027 + }, + { + "epoch": 0.6358649789029536, + "grad_norm": 0.6066110730171204, + "learning_rate": 0.00044743419045683674, + "loss": 1.573, + "step": 6028 + }, + { + "epoch": 0.6359704641350211, + "grad_norm": 0.6042739152908325, + "learning_rate": 0.00044720448423877113, + "loss": 1.5326, + "step": 6029 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.5748161673545837, + "learning_rate": 0.0004469748119504529, + "loss": 1.5452, + "step": 6030 + }, + { + "epoch": 0.6361814345991561, + "grad_norm": 0.6176207065582275, + "learning_rate": 0.000446745173617618, + "loss": 1.5905, + "step": 6031 + }, + { + "epoch": 0.6362869198312237, + "grad_norm": 0.5513623952865601, + "learning_rate": 0.00044651556926599863, + "loss": 1.5634, + "step": 6032 + }, + { + "epoch": 0.6363924050632911, + "grad_norm": 0.5905172228813171, + "learning_rate": 0.0004462859989213227, + "loss": 1.553, + "step": 6033 + }, + { + "epoch": 0.6364978902953586, + "grad_norm": 0.5796509385108948, + "learning_rate": 0.0004460564626093154, + "loss": 1.5844, + "step": 6034 + }, + { + "epoch": 0.6366033755274262, + "grad_norm": 0.5721989870071411, + "learning_rate": 0.00044582696035569695, + "loss": 1.5607, + "step": 6035 + }, + { + "epoch": 0.6367088607594936, + "grad_norm": 0.5785207748413086, + "learning_rate": 0.00044559749218618444, + "loss": 1.5447, + "step": 6036 + }, + { + "epoch": 0.6368143459915612, + "grad_norm": 0.6386047601699829, + "learning_rate": 0.0004453680581264908, + "loss": 1.5722, + "step": 6037 + }, + { + "epoch": 0.6369198312236287, + "grad_norm": 0.5683966279029846, + "learning_rate": 0.00044513865820232525, + "loss": 1.5651, + "step": 6038 + }, + { + "epoch": 0.6370253164556962, + "grad_norm": 0.6356945037841797, + "learning_rate": 0.0004449092924393933, + "loss": 1.5491, + "step": 6039 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.6147563457489014, + "learning_rate": 0.0004446799608633964, + "loss": 1.5791, + "step": 6040 + }, + { + "epoch": 0.6372362869198313, + "grad_norm": 0.5717151761054993, + "learning_rate": 0.00044445066350003203, + "loss": 1.5353, + "step": 6041 + }, + { + "epoch": 0.6373417721518987, + "grad_norm": 0.6287201046943665, + "learning_rate": 0.00044422140037499473, + "loss": 1.5612, + "step": 6042 + }, + { + "epoch": 0.6374472573839662, + "grad_norm": 0.5662114024162292, + "learning_rate": 0.0004439921715139743, + "loss": 1.5437, + "step": 6043 + }, + { + "epoch": 0.6375527426160338, + "grad_norm": 0.6183167099952698, + "learning_rate": 0.00044376297694265687, + "loss": 1.5373, + "step": 6044 + }, + { + "epoch": 0.6376582278481012, + "grad_norm": 0.6058226823806763, + "learning_rate": 0.000443533816686725, + "loss": 1.5399, + "step": 6045 + }, + { + "epoch": 0.6377637130801688, + "grad_norm": 0.5601648092269897, + "learning_rate": 0.0004433046907718571, + "loss": 1.5362, + "step": 6046 + }, + { + "epoch": 0.6378691983122363, + "grad_norm": 0.5731310844421387, + "learning_rate": 0.0004430755992237278, + "loss": 1.5596, + "step": 6047 + }, + { + "epoch": 0.6379746835443038, + "grad_norm": 0.6122311353683472, + "learning_rate": 0.00044284654206800826, + "loss": 1.5642, + "step": 6048 + }, + { + "epoch": 0.6380801687763713, + "grad_norm": 0.603013813495636, + "learning_rate": 0.00044261751933036525, + "loss": 1.5438, + "step": 6049 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.607659637928009, + "learning_rate": 0.00044238853103646154, + "loss": 1.5489, + "step": 6050 + }, + { + "epoch": 0.6382911392405063, + "grad_norm": 0.569595992565155, + "learning_rate": 0.0004421595772119573, + "loss": 1.566, + "step": 6051 + }, + { + "epoch": 0.6383966244725738, + "grad_norm": 0.5737202763557434, + "learning_rate": 0.0004419306578825073, + "loss": 1.5989, + "step": 6052 + }, + { + "epoch": 0.6385021097046414, + "grad_norm": 0.7189630270004272, + "learning_rate": 0.0004417017730737633, + "loss": 1.5459, + "step": 6053 + }, + { + "epoch": 0.6386075949367088, + "grad_norm": 0.6785176992416382, + "learning_rate": 0.00044147292281137293, + "loss": 1.5837, + "step": 6054 + }, + { + "epoch": 0.6387130801687764, + "grad_norm": 0.7349963188171387, + "learning_rate": 0.00044124410712098014, + "loss": 1.5326, + "step": 6055 + }, + { + "epoch": 0.6388185654008439, + "grad_norm": 0.677470326423645, + "learning_rate": 0.0004410153260282246, + "loss": 1.5394, + "step": 6056 + }, + { + "epoch": 0.6389240506329114, + "grad_norm": 0.7621476054191589, + "learning_rate": 0.00044078657955874245, + "loss": 1.5692, + "step": 6057 + }, + { + "epoch": 0.6390295358649789, + "grad_norm": 0.6531301140785217, + "learning_rate": 0.0004405578677381661, + "loss": 1.5758, + "step": 6058 + }, + { + "epoch": 0.6391350210970465, + "grad_norm": 0.6882256865501404, + "learning_rate": 0.0004403291905921233, + "loss": 1.5736, + "step": 6059 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.6187039613723755, + "learning_rate": 0.00044010054814623925, + "loss": 1.592, + "step": 6060 + }, + { + "epoch": 0.6393459915611814, + "grad_norm": 0.8672829866409302, + "learning_rate": 0.00043987194042613393, + "loss": 1.5598, + "step": 6061 + }, + { + "epoch": 0.639451476793249, + "grad_norm": 0.5780484676361084, + "learning_rate": 0.0004396433674574242, + "loss": 1.5509, + "step": 6062 + }, + { + "epoch": 0.6395569620253164, + "grad_norm": 0.7161834239959717, + "learning_rate": 0.00043941482926572277, + "loss": 1.5488, + "step": 6063 + }, + { + "epoch": 0.639662447257384, + "grad_norm": 0.6067153811454773, + "learning_rate": 0.0004391863258766384, + "loss": 1.6125, + "step": 6064 + }, + { + "epoch": 0.6397679324894515, + "grad_norm": 0.7239086031913757, + "learning_rate": 0.00043895785731577606, + "loss": 1.5373, + "step": 6065 + }, + { + "epoch": 0.639873417721519, + "grad_norm": 0.5858249068260193, + "learning_rate": 0.0004387294236087368, + "loss": 1.523, + "step": 6066 + }, + { + "epoch": 0.6399789029535865, + "grad_norm": 0.6192839741706848, + "learning_rate": 0.00043850102478111764, + "loss": 1.5479, + "step": 6067 + }, + { + "epoch": 0.640084388185654, + "grad_norm": 0.6735710501670837, + "learning_rate": 0.00043827266085851203, + "loss": 1.5356, + "step": 6068 + }, + { + "epoch": 0.6401898734177215, + "grad_norm": 0.663962721824646, + "learning_rate": 0.00043804433186650916, + "loss": 1.6022, + "step": 6069 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.7353321313858032, + "learning_rate": 0.0004378160378306944, + "loss": 1.5481, + "step": 6070 + }, + { + "epoch": 0.6404008438818566, + "grad_norm": 0.6274499297142029, + "learning_rate": 0.0004375877787766495, + "loss": 1.5601, + "step": 6071 + }, + { + "epoch": 0.640506329113924, + "grad_norm": 0.5344957709312439, + "learning_rate": 0.0004373595547299517, + "loss": 1.5564, + "step": 6072 + }, + { + "epoch": 0.6406118143459916, + "grad_norm": 0.6873953342437744, + "learning_rate": 0.00043713136571617474, + "loss": 1.5883, + "step": 6073 + }, + { + "epoch": 0.6407172995780591, + "grad_norm": 0.6689660549163818, + "learning_rate": 0.00043690321176088843, + "loss": 1.5919, + "step": 6074 + }, + { + "epoch": 0.6408227848101266, + "grad_norm": 0.5757175087928772, + "learning_rate": 0.00043667509288965845, + "loss": 1.529, + "step": 6075 + }, + { + "epoch": 0.6409282700421941, + "grad_norm": 0.702063798904419, + "learning_rate": 0.0004364470091280463, + "loss": 1.53, + "step": 6076 + }, + { + "epoch": 0.6410337552742617, + "grad_norm": 0.6274358630180359, + "learning_rate": 0.0004362189605016107, + "loss": 1.5589, + "step": 6077 + }, + { + "epoch": 0.6411392405063291, + "grad_norm": 0.7130358219146729, + "learning_rate": 0.00043599094703590524, + "loss": 1.5527, + "step": 6078 + }, + { + "epoch": 0.6412447257383966, + "grad_norm": 0.6250369548797607, + "learning_rate": 0.00043576296875647984, + "loss": 1.5827, + "step": 6079 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.6094885468482971, + "learning_rate": 0.00043553502568888095, + "loss": 1.5337, + "step": 6080 + }, + { + "epoch": 0.6414556962025316, + "grad_norm": 0.6676262617111206, + "learning_rate": 0.00043530711785865026, + "loss": 1.547, + "step": 6081 + }, + { + "epoch": 0.6415611814345992, + "grad_norm": 0.6369389295578003, + "learning_rate": 0.00043507924529132637, + "loss": 1.5218, + "step": 6082 + }, + { + "epoch": 0.6416666666666667, + "grad_norm": 0.6478972434997559, + "learning_rate": 0.0004348514080124432, + "loss": 1.5994, + "step": 6083 + }, + { + "epoch": 0.6417721518987342, + "grad_norm": 0.5344834327697754, + "learning_rate": 0.0004346236060475314, + "loss": 1.5373, + "step": 6084 + }, + { + "epoch": 0.6418776371308017, + "grad_norm": 0.6945509314537048, + "learning_rate": 0.00043439583942211674, + "loss": 1.5428, + "step": 6085 + }, + { + "epoch": 0.6419831223628693, + "grad_norm": 0.701511561870575, + "learning_rate": 0.00043416810816172244, + "loss": 1.5853, + "step": 6086 + }, + { + "epoch": 0.6420886075949367, + "grad_norm": 0.5551355481147766, + "learning_rate": 0.0004339404122918664, + "loss": 1.583, + "step": 6087 + }, + { + "epoch": 0.6421940928270042, + "grad_norm": 0.7831875085830688, + "learning_rate": 0.0004337127518380632, + "loss": 1.5364, + "step": 6088 + }, + { + "epoch": 0.6422995780590718, + "grad_norm": 0.5965104699134827, + "learning_rate": 0.0004334851268258234, + "loss": 1.5435, + "step": 6089 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.732803463935852, + "learning_rate": 0.0004332575372806534, + "loss": 1.5617, + "step": 6090 + }, + { + "epoch": 0.6425105485232068, + "grad_norm": 0.6352491974830627, + "learning_rate": 0.00043302998322805564, + "loss": 1.5823, + "step": 6091 + }, + { + "epoch": 0.6426160337552742, + "grad_norm": 0.7970208525657654, + "learning_rate": 0.0004328024646935289, + "loss": 1.5507, + "step": 6092 + }, + { + "epoch": 0.6427215189873418, + "grad_norm": 0.6395640969276428, + "learning_rate": 0.00043257498170256735, + "loss": 1.5602, + "step": 6093 + }, + { + "epoch": 0.6428270042194093, + "grad_norm": 0.6150014400482178, + "learning_rate": 0.0004323475342806622, + "loss": 1.5441, + "step": 6094 + }, + { + "epoch": 0.6429324894514767, + "grad_norm": 0.7374228835105896, + "learning_rate": 0.00043212012245329986, + "loss": 1.5154, + "step": 6095 + }, + { + "epoch": 0.6430379746835443, + "grad_norm": 0.5570265650749207, + "learning_rate": 0.0004318927462459629, + "loss": 1.5434, + "step": 6096 + }, + { + "epoch": 0.6431434599156118, + "grad_norm": 0.7565696835517883, + "learning_rate": 0.0004316654056841299, + "loss": 1.5375, + "step": 6097 + }, + { + "epoch": 0.6432489451476793, + "grad_norm": 0.7136867046356201, + "learning_rate": 0.0004314381007932756, + "loss": 1.5787, + "step": 6098 + }, + { + "epoch": 0.6433544303797468, + "grad_norm": 0.7905300855636597, + "learning_rate": 0.00043121083159887056, + "loss": 1.5286, + "step": 6099 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6457900404930115, + "learning_rate": 0.00043098359812638145, + "loss": 1.5554, + "step": 6100 + }, + { + "epoch": 0.6435654008438818, + "grad_norm": 0.6706395745277405, + "learning_rate": 0.000430756400401271, + "loss": 1.5841, + "step": 6101 + }, + { + "epoch": 0.6436708860759494, + "grad_norm": 0.6988136768341064, + "learning_rate": 0.00043052923844899733, + "loss": 1.5247, + "step": 6102 + }, + { + "epoch": 0.6437763713080169, + "grad_norm": 0.6448177695274353, + "learning_rate": 0.000430302112295016, + "loss": 1.5376, + "step": 6103 + }, + { + "epoch": 0.6438818565400843, + "grad_norm": 0.7403927445411682, + "learning_rate": 0.00043007502196477703, + "loss": 1.559, + "step": 6104 + }, + { + "epoch": 0.6439873417721519, + "grad_norm": 0.6369539499282837, + "learning_rate": 0.00042984796748372716, + "loss": 1.5839, + "step": 6105 + }, + { + "epoch": 0.6440928270042194, + "grad_norm": 0.6779518127441406, + "learning_rate": 0.000429620948877309, + "loss": 1.5629, + "step": 6106 + }, + { + "epoch": 0.6441983122362869, + "grad_norm": 0.686525821685791, + "learning_rate": 0.000429393966170961, + "loss": 1.5998, + "step": 6107 + }, + { + "epoch": 0.6443037974683544, + "grad_norm": 0.6808059811592102, + "learning_rate": 0.00042916701939011787, + "loss": 1.5532, + "step": 6108 + }, + { + "epoch": 0.644409282700422, + "grad_norm": 0.6487772464752197, + "learning_rate": 0.00042894010856020997, + "loss": 1.5771, + "step": 6109 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.9089380502700806, + "learning_rate": 0.00042871323370666383, + "loss": 1.5559, + "step": 6110 + }, + { + "epoch": 0.644620253164557, + "grad_norm": 0.8155338168144226, + "learning_rate": 0.00042848639485490165, + "loss": 1.5624, + "step": 6111 + }, + { + "epoch": 0.6447257383966245, + "grad_norm": 0.7166086435317993, + "learning_rate": 0.0004282595920303425, + "loss": 1.5266, + "step": 6112 + }, + { + "epoch": 0.6448312236286919, + "grad_norm": 0.599012017250061, + "learning_rate": 0.00042803282525840036, + "loss": 1.5884, + "step": 6113 + }, + { + "epoch": 0.6449367088607595, + "grad_norm": 0.7153019905090332, + "learning_rate": 0.0004278060945644856, + "loss": 1.5476, + "step": 6114 + }, + { + "epoch": 0.645042194092827, + "grad_norm": 0.6646295189857483, + "learning_rate": 0.0004275793999740046, + "loss": 1.5661, + "step": 6115 + }, + { + "epoch": 0.6451476793248945, + "grad_norm": 0.8170561790466309, + "learning_rate": 0.00042735274151235953, + "loss": 1.5815, + "step": 6116 + }, + { + "epoch": 0.645253164556962, + "grad_norm": 0.6375616192817688, + "learning_rate": 0.00042712611920494865, + "loss": 1.5359, + "step": 6117 + }, + { + "epoch": 0.6453586497890296, + "grad_norm": 0.6988826394081116, + "learning_rate": 0.0004268995330771661, + "loss": 1.5811, + "step": 6118 + }, + { + "epoch": 0.645464135021097, + "grad_norm": 0.6457114815711975, + "learning_rate": 0.0004266729831544017, + "loss": 1.5465, + "step": 6119 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.6205315589904785, + "learning_rate": 0.0004264464694620421, + "loss": 1.5231, + "step": 6120 + }, + { + "epoch": 0.6456751054852321, + "grad_norm": 0.6548480987548828, + "learning_rate": 0.00042621999202546897, + "loss": 1.5502, + "step": 6121 + }, + { + "epoch": 0.6457805907172995, + "grad_norm": 0.5750626921653748, + "learning_rate": 0.0004259935508700603, + "loss": 1.5849, + "step": 6122 + }, + { + "epoch": 0.6458860759493671, + "grad_norm": 0.5831412076950073, + "learning_rate": 0.0004257671460211898, + "loss": 1.5525, + "step": 6123 + }, + { + "epoch": 0.6459915611814346, + "grad_norm": 0.7238519787788391, + "learning_rate": 0.00042554077750422736, + "loss": 1.5631, + "step": 6124 + }, + { + "epoch": 0.6460970464135021, + "grad_norm": 0.6672130823135376, + "learning_rate": 0.00042531444534453885, + "loss": 1.5521, + "step": 6125 + }, + { + "epoch": 0.6462025316455696, + "grad_norm": 0.6814072728157043, + "learning_rate": 0.0004250881495674855, + "loss": 1.5544, + "step": 6126 + }, + { + "epoch": 0.6463080168776372, + "grad_norm": 0.7213913202285767, + "learning_rate": 0.00042486189019842535, + "loss": 1.5447, + "step": 6127 + }, + { + "epoch": 0.6464135021097046, + "grad_norm": 0.5891356468200684, + "learning_rate": 0.00042463566726271137, + "loss": 1.5342, + "step": 6128 + }, + { + "epoch": 0.6465189873417722, + "grad_norm": 0.7468838095664978, + "learning_rate": 0.0004244094807856936, + "loss": 1.5528, + "step": 6129 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.6379943490028381, + "learning_rate": 0.000424183330792717, + "loss": 1.5878, + "step": 6130 + }, + { + "epoch": 0.6467299578059071, + "grad_norm": 0.5906268358230591, + "learning_rate": 0.0004239572173091229, + "loss": 1.5947, + "step": 6131 + }, + { + "epoch": 0.6468354430379747, + "grad_norm": 0.768113374710083, + "learning_rate": 0.0004237311403602484, + "loss": 1.5768, + "step": 6132 + }, + { + "epoch": 0.6469409282700422, + "grad_norm": 0.6686628460884094, + "learning_rate": 0.0004235050999714265, + "loss": 1.5488, + "step": 6133 + }, + { + "epoch": 0.6470464135021097, + "grad_norm": 0.7021548748016357, + "learning_rate": 0.00042327909616798616, + "loss": 1.5701, + "step": 6134 + }, + { + "epoch": 0.6471518987341772, + "grad_norm": 0.6931016445159912, + "learning_rate": 0.0004230531289752523, + "loss": 1.5276, + "step": 6135 + }, + { + "epoch": 0.6472573839662448, + "grad_norm": 0.639273464679718, + "learning_rate": 0.00042282719841854567, + "loss": 1.583, + "step": 6136 + }, + { + "epoch": 0.6473628691983122, + "grad_norm": 0.8090608716011047, + "learning_rate": 0.0004226013045231826, + "loss": 1.5578, + "step": 6137 + }, + { + "epoch": 0.6474683544303798, + "grad_norm": 0.6281756162643433, + "learning_rate": 0.00042237544731447616, + "loss": 1.5695, + "step": 6138 + }, + { + "epoch": 0.6475738396624473, + "grad_norm": 0.696306049823761, + "learning_rate": 0.00042214962681773457, + "loss": 1.5686, + "step": 6139 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.6444409489631653, + "learning_rate": 0.0004219238430582621, + "loss": 1.4882, + "step": 6140 + }, + { + "epoch": 0.6477848101265823, + "grad_norm": 0.6621747016906738, + "learning_rate": 0.00042169809606135893, + "loss": 1.5668, + "step": 6141 + }, + { + "epoch": 0.6478902953586498, + "grad_norm": 0.637239933013916, + "learning_rate": 0.0004214723858523212, + "loss": 1.5312, + "step": 6142 + }, + { + "epoch": 0.6479957805907173, + "grad_norm": 0.780676007270813, + "learning_rate": 0.00042124671245644086, + "loss": 1.537, + "step": 6143 + }, + { + "epoch": 0.6481012658227848, + "grad_norm": 0.6266670227050781, + "learning_rate": 0.0004210210758990056, + "loss": 1.5786, + "step": 6144 + }, + { + "epoch": 0.6482067510548524, + "grad_norm": 0.6703481674194336, + "learning_rate": 0.00042079547620529927, + "loss": 1.5901, + "step": 6145 + }, + { + "epoch": 0.6483122362869198, + "grad_norm": 0.7026779055595398, + "learning_rate": 0.0004205699134006011, + "loss": 1.5698, + "step": 6146 + }, + { + "epoch": 0.6484177215189874, + "grad_norm": 0.6636910438537598, + "learning_rate": 0.0004203443875101871, + "loss": 1.5709, + "step": 6147 + }, + { + "epoch": 0.6485232067510549, + "grad_norm": 0.7275995016098022, + "learning_rate": 0.0004201188985593283, + "loss": 1.5475, + "step": 6148 + }, + { + "epoch": 0.6486286919831223, + "grad_norm": 0.5347452759742737, + "learning_rate": 0.00041989344657329187, + "loss": 1.5675, + "step": 6149 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.837807297706604, + "learning_rate": 0.0004196680315773408, + "loss": 1.5965, + "step": 6150 + }, + { + "epoch": 0.6488396624472574, + "grad_norm": 0.5973814725875854, + "learning_rate": 0.0004194426535967339, + "loss": 1.553, + "step": 6151 + }, + { + "epoch": 0.6489451476793249, + "grad_norm": 0.6128478050231934, + "learning_rate": 0.00041921731265672613, + "loss": 1.5865, + "step": 6152 + }, + { + "epoch": 0.6490506329113924, + "grad_norm": 0.6286333203315735, + "learning_rate": 0.0004189920087825678, + "loss": 1.5413, + "step": 6153 + }, + { + "epoch": 0.64915611814346, + "grad_norm": 0.5643185377120972, + "learning_rate": 0.00041876674199950545, + "loss": 1.5416, + "step": 6154 + }, + { + "epoch": 0.6492616033755274, + "grad_norm": 0.5784268379211426, + "learning_rate": 0.0004185415123327813, + "loss": 1.5592, + "step": 6155 + }, + { + "epoch": 0.649367088607595, + "grad_norm": 0.6785906553268433, + "learning_rate": 0.00041831631980763324, + "loss": 1.5349, + "step": 6156 + }, + { + "epoch": 0.6494725738396624, + "grad_norm": 0.6508108973503113, + "learning_rate": 0.00041809116444929586, + "loss": 1.5598, + "step": 6157 + }, + { + "epoch": 0.6495780590717299, + "grad_norm": 0.6880964040756226, + "learning_rate": 0.00041786604628299846, + "loss": 1.5952, + "step": 6158 + }, + { + "epoch": 0.6496835443037975, + "grad_norm": 0.717713475227356, + "learning_rate": 0.00041764096533396667, + "loss": 1.5227, + "step": 6159 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.6589133143424988, + "learning_rate": 0.00041741592162742214, + "loss": 1.5808, + "step": 6160 + }, + { + "epoch": 0.6498945147679325, + "grad_norm": 0.7324159741401672, + "learning_rate": 0.0004171909151885819, + "loss": 1.5534, + "step": 6161 + }, + { + "epoch": 0.65, + "grad_norm": 0.6074619293212891, + "learning_rate": 0.0004169659460426592, + "loss": 1.5407, + "step": 6162 + }, + { + "epoch": 0.6501054852320675, + "grad_norm": 1.0440740585327148, + "learning_rate": 0.00041674101421486294, + "loss": 1.4909, + "step": 6163 + }, + { + "epoch": 0.650210970464135, + "grad_norm": 0.6057644486427307, + "learning_rate": 0.00041651611973039776, + "loss": 1.5255, + "step": 6164 + }, + { + "epoch": 0.6503164556962026, + "grad_norm": 0.783287763595581, + "learning_rate": 0.0004162912626144642, + "loss": 1.5431, + "step": 6165 + }, + { + "epoch": 0.65042194092827, + "grad_norm": 0.7469261288642883, + "learning_rate": 0.0004160664428922586, + "loss": 1.5485, + "step": 6166 + }, + { + "epoch": 0.6505274261603375, + "grad_norm": 0.6465548872947693, + "learning_rate": 0.00041584166058897324, + "loss": 1.5216, + "step": 6167 + }, + { + "epoch": 0.6506329113924051, + "grad_norm": 0.633530855178833, + "learning_rate": 0.00041561691572979624, + "loss": 1.5261, + "step": 6168 + }, + { + "epoch": 0.6507383966244725, + "grad_norm": 0.6118305325508118, + "learning_rate": 0.00041539220833991124, + "loss": 1.5834, + "step": 6169 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.6014434695243835, + "learning_rate": 0.0004151675384444978, + "loss": 1.5334, + "step": 6170 + }, + { + "epoch": 0.6509493670886076, + "grad_norm": 0.619651198387146, + "learning_rate": 0.0004149429060687312, + "loss": 1.5626, + "step": 6171 + }, + { + "epoch": 0.6510548523206751, + "grad_norm": 0.5954903960227966, + "learning_rate": 0.00041471831123778284, + "loss": 1.5257, + "step": 6172 + }, + { + "epoch": 0.6511603375527426, + "grad_norm": 0.6939777135848999, + "learning_rate": 0.0004144937539768195, + "loss": 1.5628, + "step": 6173 + }, + { + "epoch": 0.6512658227848102, + "grad_norm": 0.6058967709541321, + "learning_rate": 0.00041426923431100396, + "loss": 1.5813, + "step": 6174 + }, + { + "epoch": 0.6513713080168776, + "grad_norm": 0.6802607178688049, + "learning_rate": 0.0004140447522654946, + "loss": 1.5353, + "step": 6175 + }, + { + "epoch": 0.6514767932489451, + "grad_norm": 0.6444260478019714, + "learning_rate": 0.0004138203078654463, + "loss": 1.5709, + "step": 6176 + }, + { + "epoch": 0.6515822784810127, + "grad_norm": 0.625625491142273, + "learning_rate": 0.0004135959011360088, + "loss": 1.5671, + "step": 6177 + }, + { + "epoch": 0.6516877637130801, + "grad_norm": 0.6771073341369629, + "learning_rate": 0.000413371532102328, + "loss": 1.5569, + "step": 6178 + }, + { + "epoch": 0.6517932489451477, + "grad_norm": 0.7210134267807007, + "learning_rate": 0.0004131472007895457, + "loss": 1.574, + "step": 6179 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.686178982257843, + "learning_rate": 0.00041292290722279914, + "loss": 1.5668, + "step": 6180 + }, + { + "epoch": 0.6520042194092827, + "grad_norm": 0.8881514072418213, + "learning_rate": 0.00041269865142722176, + "loss": 1.5784, + "step": 6181 + }, + { + "epoch": 0.6521097046413502, + "grad_norm": 0.6546814441680908, + "learning_rate": 0.0004124744334279424, + "loss": 1.5723, + "step": 6182 + }, + { + "epoch": 0.6522151898734178, + "grad_norm": 0.9453746676445007, + "learning_rate": 0.0004122502532500858, + "loss": 1.5571, + "step": 6183 + }, + { + "epoch": 0.6523206751054852, + "grad_norm": 0.5442281365394592, + "learning_rate": 0.0004120261109187724, + "loss": 1.5553, + "step": 6184 + }, + { + "epoch": 0.6524261603375527, + "grad_norm": 0.8733136057853699, + "learning_rate": 0.0004118020064591184, + "loss": 1.5551, + "step": 6185 + }, + { + "epoch": 0.6525316455696203, + "grad_norm": 0.7078721523284912, + "learning_rate": 0.00041157793989623625, + "loss": 1.5719, + "step": 6186 + }, + { + "epoch": 0.6526371308016877, + "grad_norm": 0.7200087904930115, + "learning_rate": 0.0004113539112552334, + "loss": 1.5423, + "step": 6187 + }, + { + "epoch": 0.6527426160337553, + "grad_norm": 0.8041085004806519, + "learning_rate": 0.0004111299205612135, + "loss": 1.5514, + "step": 6188 + }, + { + "epoch": 0.6528481012658228, + "grad_norm": 0.7577432990074158, + "learning_rate": 0.00041090596783927583, + "loss": 1.5395, + "step": 6189 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.7981062531471252, + "learning_rate": 0.00041068205311451517, + "loss": 1.5903, + "step": 6190 + }, + { + "epoch": 0.6530590717299578, + "grad_norm": 0.6215429306030273, + "learning_rate": 0.00041045817641202257, + "loss": 1.5828, + "step": 6191 + }, + { + "epoch": 0.6531645569620254, + "grad_norm": 0.7702365517616272, + "learning_rate": 0.00041023433775688435, + "loss": 1.5218, + "step": 6192 + }, + { + "epoch": 0.6532700421940928, + "grad_norm": 0.6453219056129456, + "learning_rate": 0.00041001053717418283, + "loss": 1.5562, + "step": 6193 + }, + { + "epoch": 0.6533755274261603, + "grad_norm": 0.9593873023986816, + "learning_rate": 0.000409786774688996, + "loss": 1.5168, + "step": 6194 + }, + { + "epoch": 0.6534810126582279, + "grad_norm": 0.5470028519630432, + "learning_rate": 0.00040956305032639723, + "loss": 1.5682, + "step": 6195 + }, + { + "epoch": 0.6535864978902953, + "grad_norm": 0.7151534557342529, + "learning_rate": 0.0004093393641114565, + "loss": 1.5379, + "step": 6196 + }, + { + "epoch": 0.6536919831223629, + "grad_norm": 0.684022068977356, + "learning_rate": 0.00040911571606923867, + "loss": 1.5412, + "step": 6197 + }, + { + "epoch": 0.6537974683544304, + "grad_norm": 0.6180412173271179, + "learning_rate": 0.00040889210622480467, + "loss": 1.5683, + "step": 6198 + }, + { + "epoch": 0.6539029535864979, + "grad_norm": 0.773928165435791, + "learning_rate": 0.0004086685346032111, + "loss": 1.595, + "step": 6199 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.7176297903060913, + "learning_rate": 0.00040844500122951026, + "loss": 1.5587, + "step": 6200 + }, + { + "epoch": 0.654113924050633, + "grad_norm": 0.8413993716239929, + "learning_rate": 0.0004082215061287502, + "loss": 1.5273, + "step": 6201 + }, + { + "epoch": 0.6542194092827004, + "grad_norm": 0.6556046605110168, + "learning_rate": 0.00040799804932597464, + "loss": 1.5482, + "step": 6202 + }, + { + "epoch": 0.6543248945147679, + "grad_norm": 0.8983070254325867, + "learning_rate": 0.00040777463084622304, + "loss": 1.5381, + "step": 6203 + }, + { + "epoch": 0.6544303797468355, + "grad_norm": 0.6887397170066833, + "learning_rate": 0.00040755125071453055, + "loss": 1.5546, + "step": 6204 + }, + { + "epoch": 0.6545358649789029, + "grad_norm": 0.8274036645889282, + "learning_rate": 0.00040732790895592764, + "loss": 1.5521, + "step": 6205 + }, + { + "epoch": 0.6546413502109705, + "grad_norm": 0.6382724046707153, + "learning_rate": 0.00040710460559544167, + "loss": 1.574, + "step": 6206 + }, + { + "epoch": 0.654746835443038, + "grad_norm": 0.7843133211135864, + "learning_rate": 0.0004068813406580944, + "loss": 1.5678, + "step": 6207 + }, + { + "epoch": 0.6548523206751055, + "grad_norm": 0.8243295550346375, + "learning_rate": 0.0004066581141689038, + "loss": 1.556, + "step": 6208 + }, + { + "epoch": 0.654957805907173, + "grad_norm": 0.7440428733825684, + "learning_rate": 0.00040643492615288367, + "loss": 1.5556, + "step": 6209 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.7782318592071533, + "learning_rate": 0.00040621177663504313, + "loss": 1.5435, + "step": 6210 + }, + { + "epoch": 0.655168776371308, + "grad_norm": 0.6614751219749451, + "learning_rate": 0.0004059886656403874, + "loss": 1.5653, + "step": 6211 + }, + { + "epoch": 0.6552742616033755, + "grad_norm": 0.8062946200370789, + "learning_rate": 0.00040576559319391704, + "loss": 1.5509, + "step": 6212 + }, + { + "epoch": 0.6553797468354431, + "grad_norm": 0.6372640132904053, + "learning_rate": 0.0004055425593206285, + "loss": 1.5685, + "step": 6213 + }, + { + "epoch": 0.6554852320675105, + "grad_norm": 0.80302494764328, + "learning_rate": 0.0004053195640455137, + "loss": 1.5358, + "step": 6214 + }, + { + "epoch": 0.6555907172995781, + "grad_norm": 0.7132750153541565, + "learning_rate": 0.0004050966073935602, + "loss": 1.5701, + "step": 6215 + }, + { + "epoch": 0.6556962025316456, + "grad_norm": 0.6206978559494019, + "learning_rate": 0.00040487368938975214, + "loss": 1.5883, + "step": 6216 + }, + { + "epoch": 0.6558016877637131, + "grad_norm": 0.6796267032623291, + "learning_rate": 0.00040465081005906805, + "loss": 1.5602, + "step": 6217 + }, + { + "epoch": 0.6559071729957806, + "grad_norm": 0.6024948954582214, + "learning_rate": 0.00040442796942648273, + "loss": 1.575, + "step": 6218 + }, + { + "epoch": 0.6560126582278482, + "grad_norm": 0.6052607297897339, + "learning_rate": 0.00040420516751696664, + "loss": 1.5409, + "step": 6219 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.665073573589325, + "learning_rate": 0.00040398240435548583, + "loss": 1.5334, + "step": 6220 + }, + { + "epoch": 0.6562236286919831, + "grad_norm": 0.7203143835067749, + "learning_rate": 0.000403759679967002, + "loss": 1.5609, + "step": 6221 + }, + { + "epoch": 0.6563291139240506, + "grad_norm": 0.6349233984947205, + "learning_rate": 0.00040353699437647257, + "loss": 1.5237, + "step": 6222 + }, + { + "epoch": 0.6564345991561181, + "grad_norm": 0.6272197365760803, + "learning_rate": 0.0004033143476088504, + "loss": 1.5459, + "step": 6223 + }, + { + "epoch": 0.6565400843881857, + "grad_norm": 0.555779218673706, + "learning_rate": 0.00040309173968908413, + "loss": 1.5426, + "step": 6224 + }, + { + "epoch": 0.6566455696202531, + "grad_norm": 0.5861729979515076, + "learning_rate": 0.0004028691706421185, + "loss": 1.5454, + "step": 6225 + }, + { + "epoch": 0.6567510548523207, + "grad_norm": 0.6837261319160461, + "learning_rate": 0.00040264664049289336, + "loss": 1.5755, + "step": 6226 + }, + { + "epoch": 0.6568565400843882, + "grad_norm": 0.5847106575965881, + "learning_rate": 0.00040242414926634415, + "loss": 1.5493, + "step": 6227 + }, + { + "epoch": 0.6569620253164556, + "grad_norm": 0.672758936882019, + "learning_rate": 0.0004022016969874023, + "loss": 1.5499, + "step": 6228 + }, + { + "epoch": 0.6570675105485232, + "grad_norm": 0.6206968426704407, + "learning_rate": 0.00040197928368099445, + "loss": 1.5456, + "step": 6229 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.660374104976654, + "learning_rate": 0.00040175690937204324, + "loss": 1.5595, + "step": 6230 + }, + { + "epoch": 0.6572784810126582, + "grad_norm": 0.6093613505363464, + "learning_rate": 0.0004015345740854668, + "loss": 1.5488, + "step": 6231 + }, + { + "epoch": 0.6573839662447257, + "grad_norm": 0.5725140571594238, + "learning_rate": 0.00040131227784617876, + "loss": 1.5692, + "step": 6232 + }, + { + "epoch": 0.6574894514767933, + "grad_norm": 0.5939970016479492, + "learning_rate": 0.000401090020679089, + "loss": 1.5809, + "step": 6233 + }, + { + "epoch": 0.6575949367088607, + "grad_norm": 0.6626554131507874, + "learning_rate": 0.00040086780260910213, + "loss": 1.5764, + "step": 6234 + }, + { + "epoch": 0.6577004219409283, + "grad_norm": 0.6289709806442261, + "learning_rate": 0.000400645623661119, + "loss": 1.5574, + "step": 6235 + }, + { + "epoch": 0.6578059071729958, + "grad_norm": 0.7645975947380066, + "learning_rate": 0.0004004234838600357, + "loss": 1.5485, + "step": 6236 + }, + { + "epoch": 0.6579113924050632, + "grad_norm": 0.5919140577316284, + "learning_rate": 0.00040020138323074427, + "loss": 1.5337, + "step": 6237 + }, + { + "epoch": 0.6580168776371308, + "grad_norm": 0.8138222694396973, + "learning_rate": 0.00039997932179813205, + "loss": 1.5447, + "step": 6238 + }, + { + "epoch": 0.6581223628691983, + "grad_norm": 0.646529495716095, + "learning_rate": 0.00039975729958708223, + "loss": 1.5317, + "step": 6239 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.8127038478851318, + "learning_rate": 0.00039953531662247343, + "loss": 1.535, + "step": 6240 + }, + { + "epoch": 0.6583333333333333, + "grad_norm": 0.6642956137657166, + "learning_rate": 0.00039931337292917966, + "loss": 1.5322, + "step": 6241 + }, + { + "epoch": 0.6584388185654009, + "grad_norm": 0.6514142751693726, + "learning_rate": 0.0003990914685320714, + "loss": 1.5533, + "step": 6242 + }, + { + "epoch": 0.6585443037974683, + "grad_norm": 0.7761908769607544, + "learning_rate": 0.00039886960345601394, + "loss": 1.5922, + "step": 6243 + }, + { + "epoch": 0.6586497890295359, + "grad_norm": 0.6405248045921326, + "learning_rate": 0.00039864777772586826, + "loss": 1.5476, + "step": 6244 + }, + { + "epoch": 0.6587552742616034, + "grad_norm": 0.779024600982666, + "learning_rate": 0.00039842599136649117, + "loss": 1.5588, + "step": 6245 + }, + { + "epoch": 0.6588607594936708, + "grad_norm": 0.62300044298172, + "learning_rate": 0.00039820424440273474, + "loss": 1.5278, + "step": 6246 + }, + { + "epoch": 0.6589662447257384, + "grad_norm": 0.9083572030067444, + "learning_rate": 0.000397982536859447, + "loss": 1.5648, + "step": 6247 + }, + { + "epoch": 0.6590717299578059, + "grad_norm": 0.6153918504714966, + "learning_rate": 0.00039776086876147133, + "loss": 1.5352, + "step": 6248 + }, + { + "epoch": 0.6591772151898734, + "grad_norm": 0.8096504807472229, + "learning_rate": 0.0003975392401336468, + "loss": 1.5981, + "step": 6249 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.7365738153457642, + "learning_rate": 0.0003973176510008075, + "loss": 1.5893, + "step": 6250 + }, + { + "epoch": 0.6593881856540085, + "grad_norm": 0.7366805076599121, + "learning_rate": 0.00039709610138778445, + "loss": 1.5415, + "step": 6251 + }, + { + "epoch": 0.6594936708860759, + "grad_norm": 0.9409077763557434, + "learning_rate": 0.0003968745913194029, + "loss": 1.5683, + "step": 6252 + }, + { + "epoch": 0.6595991561181435, + "grad_norm": 0.7246860861778259, + "learning_rate": 0.0003966531208204842, + "loss": 1.5322, + "step": 6253 + }, + { + "epoch": 0.659704641350211, + "grad_norm": 0.6892825961112976, + "learning_rate": 0.0003964316899158454, + "loss": 1.5544, + "step": 6254 + }, + { + "epoch": 0.6598101265822784, + "grad_norm": 0.6696341633796692, + "learning_rate": 0.00039621029863029874, + "loss": 1.5736, + "step": 6255 + }, + { + "epoch": 0.659915611814346, + "grad_norm": 0.6232489347457886, + "learning_rate": 0.00039598894698865216, + "loss": 1.545, + "step": 6256 + }, + { + "epoch": 0.6600210970464135, + "grad_norm": 0.6966599225997925, + "learning_rate": 0.00039576763501570944, + "loss": 1.5099, + "step": 6257 + }, + { + "epoch": 0.660126582278481, + "grad_norm": 0.5579274892807007, + "learning_rate": 0.0003955463627362694, + "loss": 1.5605, + "step": 6258 + }, + { + "epoch": 0.6602320675105485, + "grad_norm": 0.7289134860038757, + "learning_rate": 0.00039532513017512694, + "loss": 1.5611, + "step": 6259 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.6671301126480103, + "learning_rate": 0.00039510393735707233, + "loss": 1.5692, + "step": 6260 + }, + { + "epoch": 0.6604430379746835, + "grad_norm": 0.6649094223976135, + "learning_rate": 0.00039488278430689123, + "loss": 1.5662, + "step": 6261 + }, + { + "epoch": 0.6605485232067511, + "grad_norm": 0.676811695098877, + "learning_rate": 0.0003946616710493649, + "loss": 1.5668, + "step": 6262 + }, + { + "epoch": 0.6606540084388186, + "grad_norm": 0.6630480885505676, + "learning_rate": 0.0003944405976092702, + "loss": 1.5168, + "step": 6263 + }, + { + "epoch": 0.660759493670886, + "grad_norm": 0.6583742499351501, + "learning_rate": 0.0003942195640113795, + "loss": 1.5999, + "step": 6264 + }, + { + "epoch": 0.6608649789029536, + "grad_norm": 0.6058037877082825, + "learning_rate": 0.00039399857028046066, + "loss": 1.5564, + "step": 6265 + }, + { + "epoch": 0.6609704641350211, + "grad_norm": 0.6468507051467896, + "learning_rate": 0.0003937776164412773, + "loss": 1.5619, + "step": 6266 + }, + { + "epoch": 0.6610759493670886, + "grad_norm": 0.6542741060256958, + "learning_rate": 0.00039355670251858805, + "loss": 1.5922, + "step": 6267 + }, + { + "epoch": 0.6611814345991561, + "grad_norm": 0.6977825164794922, + "learning_rate": 0.00039333582853714793, + "loss": 1.5393, + "step": 6268 + }, + { + "epoch": 0.6612869198312237, + "grad_norm": 0.6739007830619812, + "learning_rate": 0.00039311499452170665, + "loss": 1.5781, + "step": 6269 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.6293218731880188, + "learning_rate": 0.00039289420049700986, + "loss": 1.5267, + "step": 6270 + }, + { + "epoch": 0.6614978902953587, + "grad_norm": 0.660040020942688, + "learning_rate": 0.0003926734464877986, + "loss": 1.5324, + "step": 6271 + }, + { + "epoch": 0.6616033755274262, + "grad_norm": 0.7369188070297241, + "learning_rate": 0.0003924527325188095, + "loss": 1.5671, + "step": 6272 + }, + { + "epoch": 0.6617088607594936, + "grad_norm": 0.7093050479888916, + "learning_rate": 0.00039223205861477455, + "loss": 1.6046, + "step": 6273 + }, + { + "epoch": 0.6618143459915612, + "grad_norm": 0.7586897015571594, + "learning_rate": 0.00039201142480042145, + "loss": 1.5493, + "step": 6274 + }, + { + "epoch": 0.6619198312236287, + "grad_norm": 0.587177574634552, + "learning_rate": 0.0003917908311004732, + "loss": 1.5528, + "step": 6275 + }, + { + "epoch": 0.6620253164556962, + "grad_norm": 0.6644476056098938, + "learning_rate": 0.0003915702775396483, + "loss": 1.5559, + "step": 6276 + }, + { + "epoch": 0.6621308016877637, + "grad_norm": 0.7336865067481995, + "learning_rate": 0.0003913497641426614, + "loss": 1.5379, + "step": 6277 + }, + { + "epoch": 0.6622362869198313, + "grad_norm": 0.6627311706542969, + "learning_rate": 0.00039112929093422185, + "loss": 1.5846, + "step": 6278 + }, + { + "epoch": 0.6623417721518987, + "grad_norm": 0.618859052658081, + "learning_rate": 0.0003909088579390347, + "loss": 1.5517, + "step": 6279 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.6955670714378357, + "learning_rate": 0.0003906884651818006, + "loss": 1.557, + "step": 6280 + }, + { + "epoch": 0.6625527426160338, + "grad_norm": 0.6836668848991394, + "learning_rate": 0.0003904681126872157, + "loss": 1.5035, + "step": 6281 + }, + { + "epoch": 0.6626582278481012, + "grad_norm": 0.6008947491645813, + "learning_rate": 0.00039024780047997157, + "loss": 1.5489, + "step": 6282 + }, + { + "epoch": 0.6627637130801688, + "grad_norm": 0.6749040484428406, + "learning_rate": 0.00039002752858475527, + "loss": 1.5312, + "step": 6283 + }, + { + "epoch": 0.6628691983122363, + "grad_norm": 0.684668242931366, + "learning_rate": 0.00038980729702624896, + "loss": 1.5957, + "step": 6284 + }, + { + "epoch": 0.6629746835443038, + "grad_norm": 0.6633396744728088, + "learning_rate": 0.00038958710582913153, + "loss": 1.569, + "step": 6285 + }, + { + "epoch": 0.6630801687763713, + "grad_norm": 0.6429586410522461, + "learning_rate": 0.0003893669550180761, + "loss": 1.5473, + "step": 6286 + }, + { + "epoch": 0.6631856540084389, + "grad_norm": 0.5796381235122681, + "learning_rate": 0.00038914684461775154, + "loss": 1.5718, + "step": 6287 + }, + { + "epoch": 0.6632911392405063, + "grad_norm": 0.5978429913520813, + "learning_rate": 0.0003889267746528225, + "loss": 1.5668, + "step": 6288 + }, + { + "epoch": 0.6633966244725739, + "grad_norm": 0.5995641946792603, + "learning_rate": 0.00038870674514794877, + "loss": 1.5633, + "step": 6289 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.6156789660453796, + "learning_rate": 0.00038848675612778577, + "loss": 1.536, + "step": 6290 + }, + { + "epoch": 0.6636075949367088, + "grad_norm": 0.5509282350540161, + "learning_rate": 0.0003882668076169846, + "loss": 1.5185, + "step": 6291 + }, + { + "epoch": 0.6637130801687764, + "grad_norm": 0.5581165552139282, + "learning_rate": 0.0003880468996401912, + "loss": 1.5686, + "step": 6292 + }, + { + "epoch": 0.6638185654008438, + "grad_norm": 0.6919741034507751, + "learning_rate": 0.0003878270322220474, + "loss": 1.5539, + "step": 6293 + }, + { + "epoch": 0.6639240506329114, + "grad_norm": 0.5772029161453247, + "learning_rate": 0.00038760720538719086, + "loss": 1.5725, + "step": 6294 + }, + { + "epoch": 0.6640295358649789, + "grad_norm": 0.6322962045669556, + "learning_rate": 0.0003873874191602539, + "loss": 1.5482, + "step": 6295 + }, + { + "epoch": 0.6641350210970464, + "grad_norm": 0.6118799448013306, + "learning_rate": 0.00038716767356586487, + "loss": 1.5575, + "step": 6296 + }, + { + "epoch": 0.6642405063291139, + "grad_norm": 0.6978674530982971, + "learning_rate": 0.00038694796862864724, + "loss": 1.5513, + "step": 6297 + }, + { + "epoch": 0.6643459915611815, + "grad_norm": 0.7129005193710327, + "learning_rate": 0.00038672830437322007, + "loss": 1.5704, + "step": 6298 + }, + { + "epoch": 0.6644514767932489, + "grad_norm": 0.6709166765213013, + "learning_rate": 0.0003865086808241979, + "loss": 1.5296, + "step": 6299 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.6095154285430908, + "learning_rate": 0.00038628909800619046, + "loss": 1.521, + "step": 6300 + }, + { + "epoch": 0.664662447257384, + "grad_norm": 0.662627637386322, + "learning_rate": 0.00038606955594380326, + "loss": 1.5502, + "step": 6301 + }, + { + "epoch": 0.6647679324894514, + "grad_norm": 0.6000443696975708, + "learning_rate": 0.0003858500546616368, + "loss": 1.537, + "step": 6302 + }, + { + "epoch": 0.664873417721519, + "grad_norm": 0.8131557106971741, + "learning_rate": 0.0003856305941842878, + "loss": 1.5215, + "step": 6303 + }, + { + "epoch": 0.6649789029535865, + "grad_norm": 0.642214298248291, + "learning_rate": 0.0003854111745363476, + "loss": 1.5387, + "step": 6304 + }, + { + "epoch": 0.665084388185654, + "grad_norm": 0.7693077921867371, + "learning_rate": 0.00038519179574240324, + "loss": 1.5552, + "step": 6305 + }, + { + "epoch": 0.6651898734177215, + "grad_norm": 0.6351905465126038, + "learning_rate": 0.0003849724578270374, + "loss": 1.5369, + "step": 6306 + }, + { + "epoch": 0.6652953586497891, + "grad_norm": 0.5565944910049438, + "learning_rate": 0.0003847531608148277, + "loss": 1.5935, + "step": 6307 + }, + { + "epoch": 0.6654008438818565, + "grad_norm": 0.6297193169593811, + "learning_rate": 0.0003845339047303477, + "loss": 1.5547, + "step": 6308 + }, + { + "epoch": 0.665506329113924, + "grad_norm": 0.5964865684509277, + "learning_rate": 0.0003843146895981661, + "loss": 1.5318, + "step": 6309 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.6254372596740723, + "learning_rate": 0.0003840955154428467, + "loss": 1.5703, + "step": 6310 + }, + { + "epoch": 0.665717299578059, + "grad_norm": 0.6126334071159363, + "learning_rate": 0.0003838763822889495, + "loss": 1.5759, + "step": 6311 + }, + { + "epoch": 0.6658227848101266, + "grad_norm": 0.5814884305000305, + "learning_rate": 0.0003836572901610295, + "loss": 1.5079, + "step": 6312 + }, + { + "epoch": 0.6659282700421941, + "grad_norm": 0.559445858001709, + "learning_rate": 0.0003834382390836368, + "loss": 1.5506, + "step": 6313 + }, + { + "epoch": 0.6660337552742616, + "grad_norm": 0.622349202632904, + "learning_rate": 0.00038321922908131736, + "loss": 1.5341, + "step": 6314 + }, + { + "epoch": 0.6661392405063291, + "grad_norm": 0.5714811086654663, + "learning_rate": 0.0003830002601786121, + "loss": 1.5182, + "step": 6315 + }, + { + "epoch": 0.6662447257383967, + "grad_norm": 0.5570531487464905, + "learning_rate": 0.0003827813324000578, + "loss": 1.5468, + "step": 6316 + }, + { + "epoch": 0.6663502109704641, + "grad_norm": 0.611077070236206, + "learning_rate": 0.0003825624457701863, + "loss": 1.5739, + "step": 6317 + }, + { + "epoch": 0.6664556962025316, + "grad_norm": 0.6122767329216003, + "learning_rate": 0.00038234360031352485, + "loss": 1.5513, + "step": 6318 + }, + { + "epoch": 0.6665611814345992, + "grad_norm": 0.6702613830566406, + "learning_rate": 0.00038212479605459617, + "loss": 1.5661, + "step": 6319 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.593781590461731, + "learning_rate": 0.00038190603301791864, + "loss": 1.5248, + "step": 6320 + }, + { + "epoch": 0.6667721518987342, + "grad_norm": 0.5887137055397034, + "learning_rate": 0.0003816873112280056, + "loss": 1.5644, + "step": 6321 + }, + { + "epoch": 0.6668776371308017, + "grad_norm": 0.5685881972312927, + "learning_rate": 0.00038146863070936607, + "loss": 1.5536, + "step": 6322 + }, + { + "epoch": 0.6669831223628692, + "grad_norm": 0.6250532269477844, + "learning_rate": 0.0003812499914865039, + "loss": 1.5352, + "step": 6323 + }, + { + "epoch": 0.6670886075949367, + "grad_norm": 0.6628276705741882, + "learning_rate": 0.00038103139358391914, + "loss": 1.5977, + "step": 6324 + }, + { + "epoch": 0.6671940928270043, + "grad_norm": 0.6216896176338196, + "learning_rate": 0.0003808128370261065, + "loss": 1.5604, + "step": 6325 + }, + { + "epoch": 0.6672995780590717, + "grad_norm": 0.6887274384498596, + "learning_rate": 0.00038059432183755633, + "loss": 1.5342, + "step": 6326 + }, + { + "epoch": 0.6674050632911392, + "grad_norm": 0.6622229814529419, + "learning_rate": 0.0003803758480427544, + "loss": 1.5632, + "step": 6327 + }, + { + "epoch": 0.6675105485232068, + "grad_norm": 0.6187766790390015, + "learning_rate": 0.0003801574156661817, + "loss": 1.4952, + "step": 6328 + }, + { + "epoch": 0.6676160337552742, + "grad_norm": 0.6901224851608276, + "learning_rate": 0.000379939024732315, + "loss": 1.5469, + "step": 6329 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.6835325360298157, + "learning_rate": 0.0003797206752656258, + "loss": 1.5818, + "step": 6330 + }, + { + "epoch": 0.6678270042194093, + "grad_norm": 0.6935779452323914, + "learning_rate": 0.0003795023672905814, + "loss": 1.5171, + "step": 6331 + }, + { + "epoch": 0.6679324894514768, + "grad_norm": 0.7058724164962769, + "learning_rate": 0.00037928410083164416, + "loss": 1.5843, + "step": 6332 + }, + { + "epoch": 0.6680379746835443, + "grad_norm": 0.5498900413513184, + "learning_rate": 0.0003790658759132719, + "loss": 1.5586, + "step": 6333 + }, + { + "epoch": 0.6681434599156119, + "grad_norm": 0.7047332525253296, + "learning_rate": 0.0003788476925599181, + "loss": 1.5389, + "step": 6334 + }, + { + "epoch": 0.6682489451476793, + "grad_norm": 0.5663877725601196, + "learning_rate": 0.00037862955079603086, + "loss": 1.523, + "step": 6335 + }, + { + "epoch": 0.6683544303797468, + "grad_norm": 0.5852495431900024, + "learning_rate": 0.00037841145064605416, + "loss": 1.5753, + "step": 6336 + }, + { + "epoch": 0.6684599156118144, + "grad_norm": 0.5625362396240234, + "learning_rate": 0.00037819339213442744, + "loss": 1.5421, + "step": 6337 + }, + { + "epoch": 0.6685654008438818, + "grad_norm": 0.57704758644104, + "learning_rate": 0.0003779753752855853, + "loss": 1.5369, + "step": 6338 + }, + { + "epoch": 0.6686708860759494, + "grad_norm": 0.6054133772850037, + "learning_rate": 0.0003777574001239573, + "loss": 1.5623, + "step": 6339 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.572931706905365, + "learning_rate": 0.0003775394666739688, + "loss": 1.5296, + "step": 6340 + }, + { + "epoch": 0.6688818565400844, + "grad_norm": 0.6353102326393127, + "learning_rate": 0.0003773215749600404, + "loss": 1.5445, + "step": 6341 + }, + { + "epoch": 0.6689873417721519, + "grad_norm": 0.6637093424797058, + "learning_rate": 0.0003771037250065878, + "loss": 1.5947, + "step": 6342 + }, + { + "epoch": 0.6690928270042195, + "grad_norm": 0.599976122379303, + "learning_rate": 0.0003768859168380223, + "loss": 1.5584, + "step": 6343 + }, + { + "epoch": 0.6691983122362869, + "grad_norm": 0.6469217538833618, + "learning_rate": 0.0003766681504787503, + "loss": 1.5435, + "step": 6344 + }, + { + "epoch": 0.6693037974683544, + "grad_norm": 0.6602401733398438, + "learning_rate": 0.0003764504259531734, + "loss": 1.5646, + "step": 6345 + }, + { + "epoch": 0.669409282700422, + "grad_norm": 0.5704680681228638, + "learning_rate": 0.0003762327432856892, + "loss": 1.5533, + "step": 6346 + }, + { + "epoch": 0.6695147679324894, + "grad_norm": 0.698168158531189, + "learning_rate": 0.00037601510250068984, + "loss": 1.5315, + "step": 6347 + }, + { + "epoch": 0.669620253164557, + "grad_norm": 0.6257003545761108, + "learning_rate": 0.0003757975036225632, + "loss": 1.526, + "step": 6348 + }, + { + "epoch": 0.6697257383966245, + "grad_norm": 0.6918255686759949, + "learning_rate": 0.00037557994667569217, + "loss": 1.5341, + "step": 6349 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.706445574760437, + "learning_rate": 0.00037536243168445507, + "loss": 1.5592, + "step": 6350 + }, + { + "epoch": 0.6699367088607595, + "grad_norm": 0.5903289318084717, + "learning_rate": 0.0003751449586732257, + "loss": 1.5496, + "step": 6351 + }, + { + "epoch": 0.6700421940928271, + "grad_norm": 0.7070174813270569, + "learning_rate": 0.0003749275276663729, + "loss": 1.5374, + "step": 6352 + }, + { + "epoch": 0.6701476793248945, + "grad_norm": 0.6155831217765808, + "learning_rate": 0.0003747101386882609, + "loss": 1.5302, + "step": 6353 + }, + { + "epoch": 0.670253164556962, + "grad_norm": 0.6769121885299683, + "learning_rate": 0.0003744927917632489, + "loss": 1.5896, + "step": 6354 + }, + { + "epoch": 0.6703586497890295, + "grad_norm": 0.6185394525527954, + "learning_rate": 0.00037427548691569237, + "loss": 1.5024, + "step": 6355 + }, + { + "epoch": 0.670464135021097, + "grad_norm": 0.6025123000144958, + "learning_rate": 0.000374058224169941, + "loss": 1.5405, + "step": 6356 + }, + { + "epoch": 0.6705696202531646, + "grad_norm": 0.6389116048812866, + "learning_rate": 0.00037384100355034033, + "loss": 1.5514, + "step": 6357 + }, + { + "epoch": 0.670675105485232, + "grad_norm": 0.653720498085022, + "learning_rate": 0.0003736238250812308, + "loss": 1.58, + "step": 6358 + }, + { + "epoch": 0.6707805907172996, + "grad_norm": 0.633380651473999, + "learning_rate": 0.0003734066887869485, + "loss": 1.539, + "step": 6359 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.6472046971321106, + "learning_rate": 0.0003731895946918246, + "loss": 1.5654, + "step": 6360 + }, + { + "epoch": 0.6709915611814345, + "grad_norm": 0.6148370504379272, + "learning_rate": 0.0003729725428201856, + "loss": 1.5353, + "step": 6361 + }, + { + "epoch": 0.6710970464135021, + "grad_norm": 0.5786266922950745, + "learning_rate": 0.00037275553319635285, + "loss": 1.5482, + "step": 6362 + }, + { + "epoch": 0.6712025316455696, + "grad_norm": 0.6770305037498474, + "learning_rate": 0.000372538565844644, + "loss": 1.561, + "step": 6363 + }, + { + "epoch": 0.6713080168776371, + "grad_norm": 0.6799592971801758, + "learning_rate": 0.00037232164078937106, + "loss": 1.5236, + "step": 6364 + }, + { + "epoch": 0.6714135021097046, + "grad_norm": 0.5685285329818726, + "learning_rate": 0.00037210475805484156, + "loss": 1.5135, + "step": 6365 + }, + { + "epoch": 0.6715189873417722, + "grad_norm": 0.7116044759750366, + "learning_rate": 0.00037188791766535825, + "loss": 1.5039, + "step": 6366 + }, + { + "epoch": 0.6716244725738396, + "grad_norm": 0.5920616984367371, + "learning_rate": 0.0003716711196452192, + "loss": 1.4817, + "step": 6367 + }, + { + "epoch": 0.6717299578059072, + "grad_norm": 0.58809894323349, + "learning_rate": 0.0003714543640187177, + "loss": 1.5409, + "step": 6368 + }, + { + "epoch": 0.6718354430379747, + "grad_norm": 0.6709463000297546, + "learning_rate": 0.0003712376508101424, + "loss": 1.559, + "step": 6369 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.5831149220466614, + "learning_rate": 0.0003710209800437769, + "loss": 1.5488, + "step": 6370 + }, + { + "epoch": 0.6720464135021097, + "grad_norm": 0.7230551242828369, + "learning_rate": 0.00037080435174390014, + "loss": 1.5106, + "step": 6371 + }, + { + "epoch": 0.6721518987341772, + "grad_norm": 0.6562464237213135, + "learning_rate": 0.00037058776593478675, + "loss": 1.5586, + "step": 6372 + }, + { + "epoch": 0.6722573839662447, + "grad_norm": 0.7749350070953369, + "learning_rate": 0.00037037122264070625, + "loss": 1.5565, + "step": 6373 + }, + { + "epoch": 0.6723628691983122, + "grad_norm": 0.5907933712005615, + "learning_rate": 0.0003701547218859232, + "loss": 1.5263, + "step": 6374 + }, + { + "epoch": 0.6724683544303798, + "grad_norm": 0.8326179385185242, + "learning_rate": 0.0003699382636946977, + "loss": 1.5148, + "step": 6375 + }, + { + "epoch": 0.6725738396624472, + "grad_norm": 0.6020264029502869, + "learning_rate": 0.0003697218480912848, + "loss": 1.5247, + "step": 6376 + }, + { + "epoch": 0.6726793248945148, + "grad_norm": 0.6237233877182007, + "learning_rate": 0.0003695054750999352, + "loss": 1.5635, + "step": 6377 + }, + { + "epoch": 0.6727848101265823, + "grad_norm": 0.6724500060081482, + "learning_rate": 0.0003692891447448943, + "loss": 1.539, + "step": 6378 + }, + { + "epoch": 0.6728902953586497, + "grad_norm": 0.5904805660247803, + "learning_rate": 0.0003690728570504032, + "loss": 1.5497, + "step": 6379 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5997678637504578, + "learning_rate": 0.00036885661204069767, + "loss": 1.5326, + "step": 6380 + }, + { + "epoch": 0.6731012658227848, + "grad_norm": 0.7178093791007996, + "learning_rate": 0.00036864040974000955, + "loss": 1.5492, + "step": 6381 + }, + { + "epoch": 0.6732067510548523, + "grad_norm": 0.7277467846870422, + "learning_rate": 0.0003684242501725652, + "loss": 1.5306, + "step": 6382 + }, + { + "epoch": 0.6733122362869198, + "grad_norm": 0.7142476439476013, + "learning_rate": 0.00036820813336258624, + "loss": 1.609, + "step": 6383 + }, + { + "epoch": 0.6734177215189874, + "grad_norm": 0.5548854470252991, + "learning_rate": 0.0003679920593342898, + "loss": 1.5356, + "step": 6384 + }, + { + "epoch": 0.6735232067510548, + "grad_norm": 0.7580459713935852, + "learning_rate": 0.0003677760281118879, + "loss": 1.5897, + "step": 6385 + }, + { + "epoch": 0.6736286919831224, + "grad_norm": 0.6294963955879211, + "learning_rate": 0.0003675600397195881, + "loss": 1.5834, + "step": 6386 + }, + { + "epoch": 0.6737341772151899, + "grad_norm": 0.8373022079467773, + "learning_rate": 0.0003673440941815928, + "loss": 1.553, + "step": 6387 + }, + { + "epoch": 0.6738396624472573, + "grad_norm": 0.6815715432167053, + "learning_rate": 0.00036712819152209954, + "loss": 1.5517, + "step": 6388 + }, + { + "epoch": 0.6739451476793249, + "grad_norm": 0.8451685905456543, + "learning_rate": 0.00036691233176530197, + "loss": 1.5543, + "step": 6389 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.5930854082107544, + "learning_rate": 0.0003666965149353878, + "loss": 1.4971, + "step": 6390 + }, + { + "epoch": 0.6741561181434599, + "grad_norm": 0.6118779182434082, + "learning_rate": 0.00036648074105654043, + "loss": 1.5495, + "step": 6391 + }, + { + "epoch": 0.6742616033755274, + "grad_norm": 0.7256380319595337, + "learning_rate": 0.0003662650101529385, + "loss": 1.5875, + "step": 6392 + }, + { + "epoch": 0.674367088607595, + "grad_norm": 0.6323543787002563, + "learning_rate": 0.00036604932224875564, + "loss": 1.5388, + "step": 6393 + }, + { + "epoch": 0.6744725738396624, + "grad_norm": 0.6043851971626282, + "learning_rate": 0.0003658336773681607, + "loss": 1.5484, + "step": 6394 + }, + { + "epoch": 0.67457805907173, + "grad_norm": 0.8571478128433228, + "learning_rate": 0.0003656180755353179, + "loss": 1.538, + "step": 6395 + }, + { + "epoch": 0.6746835443037975, + "grad_norm": 0.6427726149559021, + "learning_rate": 0.0003654025167743864, + "loss": 1.5278, + "step": 6396 + }, + { + "epoch": 0.674789029535865, + "grad_norm": 0.8187741637229919, + "learning_rate": 0.0003651870011095204, + "loss": 1.592, + "step": 6397 + }, + { + "epoch": 0.6748945147679325, + "grad_norm": 0.6245806217193604, + "learning_rate": 0.0003649715285648701, + "loss": 1.5176, + "step": 6398 + }, + { + "epoch": 0.675, + "grad_norm": 0.6540650129318237, + "learning_rate": 0.00036475609916457996, + "loss": 1.5444, + "step": 6399 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.7524939775466919, + "learning_rate": 0.0003645407129327898, + "loss": 1.5712, + "step": 6400 + }, + { + "epoch": 0.675210970464135, + "grad_norm": 0.6443389058113098, + "learning_rate": 0.0003643253698936349, + "loss": 1.5236, + "step": 6401 + }, + { + "epoch": 0.6753164556962026, + "grad_norm": 0.6841305494308472, + "learning_rate": 0.00036411007007124547, + "loss": 1.547, + "step": 6402 + }, + { + "epoch": 0.67542194092827, + "grad_norm": 0.6407127976417542, + "learning_rate": 0.0003638948134897469, + "loss": 1.5801, + "step": 6403 + }, + { + "epoch": 0.6755274261603376, + "grad_norm": 0.685471773147583, + "learning_rate": 0.0003636796001732597, + "loss": 1.5799, + "step": 6404 + }, + { + "epoch": 0.6756329113924051, + "grad_norm": 0.5876286029815674, + "learning_rate": 0.00036346443014589983, + "loss": 1.56, + "step": 6405 + }, + { + "epoch": 0.6757383966244725, + "grad_norm": 0.6963139176368713, + "learning_rate": 0.00036324930343177754, + "loss": 1.5562, + "step": 6406 + }, + { + "epoch": 0.6758438818565401, + "grad_norm": 0.6122089624404907, + "learning_rate": 0.0003630342200549997, + "loss": 1.5383, + "step": 6407 + }, + { + "epoch": 0.6759493670886076, + "grad_norm": 0.6777079105377197, + "learning_rate": 0.000362819180039667, + "loss": 1.5307, + "step": 6408 + }, + { + "epoch": 0.6760548523206751, + "grad_norm": 0.6360573768615723, + "learning_rate": 0.000362604183409876, + "loss": 1.5348, + "step": 6409 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.7171667218208313, + "learning_rate": 0.00036238923018971783, + "loss": 1.5225, + "step": 6410 + }, + { + "epoch": 0.6762658227848102, + "grad_norm": 0.649173378944397, + "learning_rate": 0.00036217432040327926, + "loss": 1.5552, + "step": 6411 + }, + { + "epoch": 0.6763713080168776, + "grad_norm": 0.604894757270813, + "learning_rate": 0.000361959454074642, + "loss": 1.5117, + "step": 6412 + }, + { + "epoch": 0.6764767932489452, + "grad_norm": 0.6883509159088135, + "learning_rate": 0.00036174463122788273, + "loss": 1.5234, + "step": 6413 + }, + { + "epoch": 0.6765822784810127, + "grad_norm": 0.5961794853210449, + "learning_rate": 0.00036152985188707344, + "loss": 1.5187, + "step": 6414 + }, + { + "epoch": 0.6766877637130801, + "grad_norm": 0.726655125617981, + "learning_rate": 0.0003613151160762815, + "loss": 1.5623, + "step": 6415 + }, + { + "epoch": 0.6767932489451477, + "grad_norm": 0.6048687100410461, + "learning_rate": 0.00036110042381956895, + "loss": 1.5122, + "step": 6416 + }, + { + "epoch": 0.6768987341772152, + "grad_norm": 0.6716532707214355, + "learning_rate": 0.00036088577514099325, + "loss": 1.5573, + "step": 6417 + }, + { + "epoch": 0.6770042194092827, + "grad_norm": 0.7477344274520874, + "learning_rate": 0.0003606711700646067, + "loss": 1.5634, + "step": 6418 + }, + { + "epoch": 0.6771097046413502, + "grad_norm": 0.6660054922103882, + "learning_rate": 0.00036045660861445684, + "loss": 1.5177, + "step": 6419 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.5841560959815979, + "learning_rate": 0.0003602420908145865, + "loss": 1.5683, + "step": 6420 + }, + { + "epoch": 0.6773206751054852, + "grad_norm": 0.6597288250923157, + "learning_rate": 0.00036002761668903335, + "loss": 1.5471, + "step": 6421 + }, + { + "epoch": 0.6774261603375528, + "grad_norm": 0.6446864604949951, + "learning_rate": 0.0003598131862618304, + "loss": 1.5321, + "step": 6422 + }, + { + "epoch": 0.6775316455696202, + "grad_norm": 0.5979139804840088, + "learning_rate": 0.0003595987995570052, + "loss": 1.5236, + "step": 6423 + }, + { + "epoch": 0.6776371308016877, + "grad_norm": 0.6423291563987732, + "learning_rate": 0.0003593844565985815, + "loss": 1.5801, + "step": 6424 + }, + { + "epoch": 0.6777426160337553, + "grad_norm": 0.6447482109069824, + "learning_rate": 0.00035917015741057727, + "loss": 1.5808, + "step": 6425 + }, + { + "epoch": 0.6778481012658227, + "grad_norm": 0.6859869956970215, + "learning_rate": 0.0003589559020170058, + "loss": 1.5434, + "step": 6426 + }, + { + "epoch": 0.6779535864978903, + "grad_norm": 0.6457021832466125, + "learning_rate": 0.00035874169044187537, + "loss": 1.5688, + "step": 6427 + }, + { + "epoch": 0.6780590717299578, + "grad_norm": 0.6194134950637817, + "learning_rate": 0.00035852752270918955, + "loss": 1.5556, + "step": 6428 + }, + { + "epoch": 0.6781645569620253, + "grad_norm": 0.6230828166007996, + "learning_rate": 0.0003583133988429468, + "loss": 1.5042, + "step": 6429 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.6233963966369629, + "learning_rate": 0.00035809931886714093, + "loss": 1.5677, + "step": 6430 + }, + { + "epoch": 0.6783755274261604, + "grad_norm": 0.700780987739563, + "learning_rate": 0.00035788528280576053, + "loss": 1.5131, + "step": 6431 + }, + { + "epoch": 0.6784810126582278, + "grad_norm": 0.6608981490135193, + "learning_rate": 0.0003576712906827892, + "loss": 1.5407, + "step": 6432 + }, + { + "epoch": 0.6785864978902953, + "grad_norm": 0.6264764666557312, + "learning_rate": 0.00035745734252220633, + "loss": 1.5117, + "step": 6433 + }, + { + "epoch": 0.6786919831223629, + "grad_norm": 0.7222819328308105, + "learning_rate": 0.00035724343834798566, + "loss": 1.5332, + "step": 6434 + }, + { + "epoch": 0.6787974683544303, + "grad_norm": 0.7161921858787537, + "learning_rate": 0.00035702957818409606, + "loss": 1.5156, + "step": 6435 + }, + { + "epoch": 0.6789029535864979, + "grad_norm": 0.6224656105041504, + "learning_rate": 0.0003568157620545019, + "loss": 1.5376, + "step": 6436 + }, + { + "epoch": 0.6790084388185654, + "grad_norm": 0.70475834608078, + "learning_rate": 0.00035660198998316213, + "loss": 1.5288, + "step": 6437 + }, + { + "epoch": 0.6791139240506329, + "grad_norm": 0.6374896764755249, + "learning_rate": 0.00035638826199403103, + "loss": 1.5442, + "step": 6438 + }, + { + "epoch": 0.6792194092827004, + "grad_norm": 0.7114620804786682, + "learning_rate": 0.0003561745781110579, + "loss": 1.5334, + "step": 6439 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.6190913319587708, + "learning_rate": 0.00035596093835818683, + "loss": 1.546, + "step": 6440 + }, + { + "epoch": 0.6794303797468354, + "grad_norm": 0.6888184547424316, + "learning_rate": 0.0003557473427593578, + "loss": 1.5664, + "step": 6441 + }, + { + "epoch": 0.679535864978903, + "grad_norm": 0.6040745377540588, + "learning_rate": 0.0003555337913385048, + "loss": 1.4972, + "step": 6442 + }, + { + "epoch": 0.6796413502109705, + "grad_norm": 0.611476719379425, + "learning_rate": 0.0003553202841195576, + "loss": 1.5397, + "step": 6443 + }, + { + "epoch": 0.6797468354430379, + "grad_norm": 0.6496919989585876, + "learning_rate": 0.00035510682112644055, + "loss": 1.5599, + "step": 6444 + }, + { + "epoch": 0.6798523206751055, + "grad_norm": 0.6137261986732483, + "learning_rate": 0.00035489340238307326, + "loss": 1.5506, + "step": 6445 + }, + { + "epoch": 0.679957805907173, + "grad_norm": 0.6187177896499634, + "learning_rate": 0.00035468002791337047, + "loss": 1.5349, + "step": 6446 + }, + { + "epoch": 0.6800632911392405, + "grad_norm": 0.7140669226646423, + "learning_rate": 0.0003544666977412418, + "loss": 1.5362, + "step": 6447 + }, + { + "epoch": 0.680168776371308, + "grad_norm": 0.8237379193305969, + "learning_rate": 0.000354253411890592, + "loss": 1.523, + "step": 6448 + }, + { + "epoch": 0.6802742616033756, + "grad_norm": 0.6233646273612976, + "learning_rate": 0.00035404017038532045, + "loss": 1.5128, + "step": 6449 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.6260765790939331, + "learning_rate": 0.00035382697324932245, + "loss": 1.5494, + "step": 6450 + }, + { + "epoch": 0.6804852320675105, + "grad_norm": 0.6928077936172485, + "learning_rate": 0.0003536138205064877, + "loss": 1.5302, + "step": 6451 + }, + { + "epoch": 0.6805907172995781, + "grad_norm": 0.6267272233963013, + "learning_rate": 0.0003534007121807009, + "loss": 1.5782, + "step": 6452 + }, + { + "epoch": 0.6806962025316455, + "grad_norm": 0.6993907690048218, + "learning_rate": 0.00035318764829584185, + "loss": 1.5772, + "step": 6453 + }, + { + "epoch": 0.6808016877637131, + "grad_norm": 0.5932862758636475, + "learning_rate": 0.0003529746288757856, + "loss": 1.5189, + "step": 6454 + }, + { + "epoch": 0.6809071729957806, + "grad_norm": 0.6401775479316711, + "learning_rate": 0.0003527616539444019, + "loss": 1.5403, + "step": 6455 + }, + { + "epoch": 0.6810126582278481, + "grad_norm": 0.604815661907196, + "learning_rate": 0.0003525487235255556, + "loss": 1.5264, + "step": 6456 + }, + { + "epoch": 0.6811181434599156, + "grad_norm": 0.6224656701087952, + "learning_rate": 0.0003523358376431068, + "loss": 1.5613, + "step": 6457 + }, + { + "epoch": 0.6812236286919832, + "grad_norm": 0.6289427876472473, + "learning_rate": 0.00035212299632090996, + "loss": 1.5449, + "step": 6458 + }, + { + "epoch": 0.6813291139240506, + "grad_norm": 0.6073744893074036, + "learning_rate": 0.00035191019958281575, + "loss": 1.5624, + "step": 6459 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.6987532377243042, + "learning_rate": 0.00035169744745266866, + "loss": 1.5666, + "step": 6460 + }, + { + "epoch": 0.6815400843881857, + "grad_norm": 0.6203799247741699, + "learning_rate": 0.0003514847399543087, + "loss": 1.5416, + "step": 6461 + }, + { + "epoch": 0.6816455696202531, + "grad_norm": 0.5889977216720581, + "learning_rate": 0.00035127207711157084, + "loss": 1.5456, + "step": 6462 + }, + { + "epoch": 0.6817510548523207, + "grad_norm": 0.7469324469566345, + "learning_rate": 0.00035105945894828495, + "loss": 1.5386, + "step": 6463 + }, + { + "epoch": 0.6818565400843882, + "grad_norm": 0.6381261348724365, + "learning_rate": 0.000350846885488276, + "loss": 1.5493, + "step": 6464 + }, + { + "epoch": 0.6819620253164557, + "grad_norm": 0.8778780102729797, + "learning_rate": 0.00035063435675536386, + "loss": 1.5426, + "step": 6465 + }, + { + "epoch": 0.6820675105485232, + "grad_norm": 0.5638143420219421, + "learning_rate": 0.00035042187277336325, + "loss": 1.5163, + "step": 6466 + }, + { + "epoch": 0.6821729957805908, + "grad_norm": 0.6300468444824219, + "learning_rate": 0.00035020943356608444, + "loss": 1.5406, + "step": 6467 + }, + { + "epoch": 0.6822784810126582, + "grad_norm": 0.6460151076316833, + "learning_rate": 0.0003499970391573322, + "loss": 1.5664, + "step": 6468 + }, + { + "epoch": 0.6823839662447257, + "grad_norm": 0.6014083623886108, + "learning_rate": 0.00034978468957090635, + "loss": 1.5292, + "step": 6469 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.6123272180557251, + "learning_rate": 0.0003495723848306017, + "loss": 1.5763, + "step": 6470 + }, + { + "epoch": 0.6825949367088607, + "grad_norm": 0.6150495409965515, + "learning_rate": 0.000349360124960208, + "loss": 1.5314, + "step": 6471 + }, + { + "epoch": 0.6827004219409283, + "grad_norm": 0.5938233733177185, + "learning_rate": 0.00034914790998351005, + "loss": 1.5918, + "step": 6472 + }, + { + "epoch": 0.6828059071729958, + "grad_norm": 0.7236776351928711, + "learning_rate": 0.0003489357399242876, + "loss": 1.5573, + "step": 6473 + }, + { + "epoch": 0.6829113924050633, + "grad_norm": 0.6670244932174683, + "learning_rate": 0.0003487236148063154, + "loss": 1.5305, + "step": 6474 + }, + { + "epoch": 0.6830168776371308, + "grad_norm": 0.5661152005195618, + "learning_rate": 0.0003485115346533629, + "loss": 1.5537, + "step": 6475 + }, + { + "epoch": 0.6831223628691984, + "grad_norm": 0.6651420593261719, + "learning_rate": 0.00034829949948919517, + "loss": 1.5701, + "step": 6476 + }, + { + "epoch": 0.6832278481012658, + "grad_norm": 0.5899657011032104, + "learning_rate": 0.00034808750933757154, + "loss": 1.5441, + "step": 6477 + }, + { + "epoch": 0.6833333333333333, + "grad_norm": 0.6337290406227112, + "learning_rate": 0.0003478755642222466, + "loss": 1.5533, + "step": 6478 + }, + { + "epoch": 0.6834388185654009, + "grad_norm": 0.6397991180419922, + "learning_rate": 0.0003476636641669699, + "loss": 1.5267, + "step": 6479 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.718904972076416, + "learning_rate": 0.0003474518091954859, + "loss": 1.5575, + "step": 6480 + }, + { + "epoch": 0.6836497890295359, + "grad_norm": 0.6134064197540283, + "learning_rate": 0.00034723999933153387, + "loss": 1.5468, + "step": 6481 + }, + { + "epoch": 0.6837552742616034, + "grad_norm": 0.6121754050254822, + "learning_rate": 0.00034702823459884836, + "loss": 1.5047, + "step": 6482 + }, + { + "epoch": 0.6838607594936709, + "grad_norm": 0.5855075120925903, + "learning_rate": 0.0003468165150211585, + "loss": 1.4993, + "step": 6483 + }, + { + "epoch": 0.6839662447257384, + "grad_norm": 0.5663575530052185, + "learning_rate": 0.0003466048406221883, + "loss": 1.5721, + "step": 6484 + }, + { + "epoch": 0.6840717299578059, + "grad_norm": 0.5939492583274841, + "learning_rate": 0.0003463932114256576, + "loss": 1.5491, + "step": 6485 + }, + { + "epoch": 0.6841772151898734, + "grad_norm": 0.7193787097930908, + "learning_rate": 0.00034618162745528, + "loss": 1.5473, + "step": 6486 + }, + { + "epoch": 0.684282700421941, + "grad_norm": 0.5827855467796326, + "learning_rate": 0.00034597008873476473, + "loss": 1.5465, + "step": 6487 + }, + { + "epoch": 0.6843881856540084, + "grad_norm": 0.748431384563446, + "learning_rate": 0.0003457585952878156, + "loss": 1.5708, + "step": 6488 + }, + { + "epoch": 0.6844936708860759, + "grad_norm": 0.6287034153938293, + "learning_rate": 0.0003455471471381318, + "loss": 1.5489, + "step": 6489 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5672698020935059, + "learning_rate": 0.0003453357443094068, + "loss": 1.5547, + "step": 6490 + }, + { + "epoch": 0.6847046413502109, + "grad_norm": 0.7683044672012329, + "learning_rate": 0.0003451243868253294, + "loss": 1.5721, + "step": 6491 + }, + { + "epoch": 0.6848101265822785, + "grad_norm": 0.5734617710113525, + "learning_rate": 0.0003449130747095835, + "loss": 1.5828, + "step": 6492 + }, + { + "epoch": 0.684915611814346, + "grad_norm": 0.6168100833892822, + "learning_rate": 0.0003447018079858472, + "loss": 1.5495, + "step": 6493 + }, + { + "epoch": 0.6850210970464135, + "grad_norm": 0.6055771112442017, + "learning_rate": 0.0003444905866777946, + "loss": 1.5419, + "step": 6494 + }, + { + "epoch": 0.685126582278481, + "grad_norm": 0.5836721658706665, + "learning_rate": 0.0003442794108090938, + "loss": 1.5766, + "step": 6495 + }, + { + "epoch": 0.6852320675105485, + "grad_norm": 0.5828378200531006, + "learning_rate": 0.0003440682804034081, + "loss": 1.5518, + "step": 6496 + }, + { + "epoch": 0.685337552742616, + "grad_norm": 0.6052467226982117, + "learning_rate": 0.00034385719548439585, + "loss": 1.5813, + "step": 6497 + }, + { + "epoch": 0.6854430379746835, + "grad_norm": 0.5306416153907776, + "learning_rate": 0.00034364615607570994, + "loss": 1.5419, + "step": 6498 + }, + { + "epoch": 0.6855485232067511, + "grad_norm": 0.6631790399551392, + "learning_rate": 0.0003434351622009985, + "loss": 1.5825, + "step": 6499 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.5954750776290894, + "learning_rate": 0.00034322421388390456, + "loss": 1.5356, + "step": 6500 + }, + { + "epoch": 0.6857594936708861, + "grad_norm": 0.5980273485183716, + "learning_rate": 0.00034301331114806573, + "loss": 1.556, + "step": 6501 + }, + { + "epoch": 0.6858649789029536, + "grad_norm": 0.5465315580368042, + "learning_rate": 0.0003428024540171148, + "loss": 1.5445, + "step": 6502 + }, + { + "epoch": 0.685970464135021, + "grad_norm": 0.5514545440673828, + "learning_rate": 0.0003425916425146791, + "loss": 1.5822, + "step": 6503 + }, + { + "epoch": 0.6860759493670886, + "grad_norm": 0.5941530466079712, + "learning_rate": 0.0003423808766643817, + "loss": 1.5798, + "step": 6504 + }, + { + "epoch": 0.6861814345991561, + "grad_norm": 0.5867817401885986, + "learning_rate": 0.00034217015648983957, + "loss": 1.5731, + "step": 6505 + }, + { + "epoch": 0.6862869198312236, + "grad_norm": 0.6443145275115967, + "learning_rate": 0.0003419594820146652, + "loss": 1.5753, + "step": 6506 + }, + { + "epoch": 0.6863924050632911, + "grad_norm": 0.6068113446235657, + "learning_rate": 0.0003417488532624653, + "loss": 1.5448, + "step": 6507 + }, + { + "epoch": 0.6864978902953587, + "grad_norm": 0.7181529402732849, + "learning_rate": 0.00034153827025684225, + "loss": 1.5274, + "step": 6508 + }, + { + "epoch": 0.6866033755274261, + "grad_norm": 0.5718250274658203, + "learning_rate": 0.0003413277330213928, + "loss": 1.5812, + "step": 6509 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.6672452092170715, + "learning_rate": 0.0003411172415797087, + "loss": 1.5484, + "step": 6510 + }, + { + "epoch": 0.6868143459915612, + "grad_norm": 0.6104376912117004, + "learning_rate": 0.00034090679595537646, + "loss": 1.5463, + "step": 6511 + }, + { + "epoch": 0.6869198312236287, + "grad_norm": 0.6356569528579712, + "learning_rate": 0.0003406963961719778, + "loss": 1.5573, + "step": 6512 + }, + { + "epoch": 0.6870253164556962, + "grad_norm": 0.6797261238098145, + "learning_rate": 0.00034048604225308854, + "loss": 1.5535, + "step": 6513 + }, + { + "epoch": 0.6871308016877637, + "grad_norm": 0.576042890548706, + "learning_rate": 0.00034027573422228054, + "loss": 1.5573, + "step": 6514 + }, + { + "epoch": 0.6872362869198312, + "grad_norm": 0.6314927935600281, + "learning_rate": 0.00034006547210311964, + "loss": 1.5398, + "step": 6515 + }, + { + "epoch": 0.6873417721518987, + "grad_norm": 0.6321495771408081, + "learning_rate": 0.0003398552559191667, + "loss": 1.5434, + "step": 6516 + }, + { + "epoch": 0.6874472573839663, + "grad_norm": 0.5790107250213623, + "learning_rate": 0.00033964508569397743, + "loss": 1.5829, + "step": 6517 + }, + { + "epoch": 0.6875527426160337, + "grad_norm": 0.6884430646896362, + "learning_rate": 0.0003394349614511026, + "loss": 1.5197, + "step": 6518 + }, + { + "epoch": 0.6876582278481013, + "grad_norm": 0.6149720549583435, + "learning_rate": 0.0003392248832140876, + "loss": 1.5005, + "step": 6519 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.5562114119529724, + "learning_rate": 0.0003390148510064727, + "loss": 1.5391, + "step": 6520 + }, + { + "epoch": 0.6878691983122363, + "grad_norm": 0.678675651550293, + "learning_rate": 0.00033880486485179305, + "loss": 1.5476, + "step": 6521 + }, + { + "epoch": 0.6879746835443038, + "grad_norm": 0.6726123690605164, + "learning_rate": 0.0003385949247735786, + "loss": 1.5172, + "step": 6522 + }, + { + "epoch": 0.6880801687763713, + "grad_norm": 0.6231810450553894, + "learning_rate": 0.00033838503079535435, + "loss": 1.5128, + "step": 6523 + }, + { + "epoch": 0.6881856540084388, + "grad_norm": 0.6614060997962952, + "learning_rate": 0.00033817518294064003, + "loss": 1.5422, + "step": 6524 + }, + { + "epoch": 0.6882911392405063, + "grad_norm": 0.6246515512466431, + "learning_rate": 0.00033796538123294996, + "loss": 1.5507, + "step": 6525 + }, + { + "epoch": 0.6883966244725739, + "grad_norm": 0.6496474742889404, + "learning_rate": 0.0003377556256957936, + "loss": 1.5459, + "step": 6526 + }, + { + "epoch": 0.6885021097046413, + "grad_norm": 0.6339291334152222, + "learning_rate": 0.0003375459163526749, + "loss": 1.5426, + "step": 6527 + }, + { + "epoch": 0.6886075949367089, + "grad_norm": 0.6080068945884705, + "learning_rate": 0.000337336253227093, + "loss": 1.5427, + "step": 6528 + }, + { + "epoch": 0.6887130801687764, + "grad_norm": 0.7401185035705566, + "learning_rate": 0.00033712663634254163, + "loss": 1.5268, + "step": 6529 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.7983853816986084, + "learning_rate": 0.0003369170657225094, + "loss": 1.5356, + "step": 6530 + }, + { + "epoch": 0.6889240506329114, + "grad_norm": 0.6747255325317383, + "learning_rate": 0.0003367075413904799, + "loss": 1.4924, + "step": 6531 + }, + { + "epoch": 0.689029535864979, + "grad_norm": 0.5438796281814575, + "learning_rate": 0.00033649806336993085, + "loss": 1.5177, + "step": 6532 + }, + { + "epoch": 0.6891350210970464, + "grad_norm": 0.7914319634437561, + "learning_rate": 0.0003362886316843361, + "loss": 1.5257, + "step": 6533 + }, + { + "epoch": 0.6892405063291139, + "grad_norm": 0.667393147945404, + "learning_rate": 0.000336079246357163, + "loss": 1.5429, + "step": 6534 + }, + { + "epoch": 0.6893459915611815, + "grad_norm": 0.6073583364486694, + "learning_rate": 0.00033586990741187446, + "loss": 1.5787, + "step": 6535 + }, + { + "epoch": 0.6894514767932489, + "grad_norm": 0.813051700592041, + "learning_rate": 0.0003356606148719277, + "loss": 1.5556, + "step": 6536 + }, + { + "epoch": 0.6895569620253165, + "grad_norm": 0.6319224834442139, + "learning_rate": 0.00033545136876077524, + "loss": 1.5652, + "step": 6537 + }, + { + "epoch": 0.689662447257384, + "grad_norm": 0.7118549942970276, + "learning_rate": 0.00033524216910186394, + "loss": 1.5569, + "step": 6538 + }, + { + "epoch": 0.6897679324894515, + "grad_norm": 0.7890326976776123, + "learning_rate": 0.00033503301591863586, + "loss": 1.5811, + "step": 6539 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.6257898807525635, + "learning_rate": 0.0003348239092345275, + "loss": 1.5512, + "step": 6540 + }, + { + "epoch": 0.6899789029535865, + "grad_norm": 0.7070261836051941, + "learning_rate": 0.00033461484907297036, + "loss": 1.5423, + "step": 6541 + }, + { + "epoch": 0.690084388185654, + "grad_norm": 0.6524146795272827, + "learning_rate": 0.00033440583545739046, + "loss": 1.5389, + "step": 6542 + }, + { + "epoch": 0.6901898734177215, + "grad_norm": 0.7697224020957947, + "learning_rate": 0.00033419686841120925, + "loss": 1.517, + "step": 6543 + }, + { + "epoch": 0.6902953586497891, + "grad_norm": 0.5778565406799316, + "learning_rate": 0.00033398794795784227, + "loss": 1.5626, + "step": 6544 + }, + { + "epoch": 0.6904008438818565, + "grad_norm": 0.7332028150558472, + "learning_rate": 0.0003337790741207003, + "loss": 1.544, + "step": 6545 + }, + { + "epoch": 0.6905063291139241, + "grad_norm": 0.5869078040122986, + "learning_rate": 0.0003335702469231884, + "loss": 1.5545, + "step": 6546 + }, + { + "epoch": 0.6906118143459916, + "grad_norm": 0.6010746359825134, + "learning_rate": 0.00033336146638870685, + "loss": 1.5698, + "step": 6547 + }, + { + "epoch": 0.690717299578059, + "grad_norm": 0.6084466576576233, + "learning_rate": 0.0003331527325406506, + "loss": 1.5458, + "step": 6548 + }, + { + "epoch": 0.6908227848101266, + "grad_norm": 0.5528586506843567, + "learning_rate": 0.0003329440454024092, + "loss": 1.5265, + "step": 6549 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.6776862144470215, + "learning_rate": 0.0003327354049973672, + "loss": 1.5071, + "step": 6550 + }, + { + "epoch": 0.6910337552742616, + "grad_norm": 0.5882729291915894, + "learning_rate": 0.00033252681134890373, + "loss": 1.5694, + "step": 6551 + }, + { + "epoch": 0.6911392405063291, + "grad_norm": 0.6021029949188232, + "learning_rate": 0.00033231826448039246, + "loss": 1.5165, + "step": 6552 + }, + { + "epoch": 0.6912447257383966, + "grad_norm": 0.6012164354324341, + "learning_rate": 0.0003321097644152027, + "loss": 1.5499, + "step": 6553 + }, + { + "epoch": 0.6913502109704641, + "grad_norm": 0.5937821269035339, + "learning_rate": 0.00033190131117669753, + "loss": 1.5617, + "step": 6554 + }, + { + "epoch": 0.6914556962025317, + "grad_norm": 0.6858252882957458, + "learning_rate": 0.0003316929047882354, + "loss": 1.5244, + "step": 6555 + }, + { + "epoch": 0.6915611814345991, + "grad_norm": 0.6558409929275513, + "learning_rate": 0.0003314845452731691, + "loss": 1.5579, + "step": 6556 + }, + { + "epoch": 0.6916666666666667, + "grad_norm": 0.7326956391334534, + "learning_rate": 0.00033127623265484643, + "loss": 1.5467, + "step": 6557 + }, + { + "epoch": 0.6917721518987342, + "grad_norm": 0.7837071418762207, + "learning_rate": 0.00033106796695660983, + "loss": 1.5454, + "step": 6558 + }, + { + "epoch": 0.6918776371308016, + "grad_norm": 0.6584327816963196, + "learning_rate": 0.0003308597482017965, + "loss": 1.5135, + "step": 6559 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.9532114863395691, + "learning_rate": 0.00033065157641373847, + "loss": 1.5494, + "step": 6560 + }, + { + "epoch": 0.6920886075949367, + "grad_norm": 0.5876283645629883, + "learning_rate": 0.00033044345161576224, + "loss": 1.5664, + "step": 6561 + }, + { + "epoch": 0.6921940928270042, + "grad_norm": 0.7993385791778564, + "learning_rate": 0.00033023537383118916, + "loss": 1.5322, + "step": 6562 + }, + { + "epoch": 0.6922995780590717, + "grad_norm": 0.989596962928772, + "learning_rate": 0.0003300273430833358, + "loss": 1.491, + "step": 6563 + }, + { + "epoch": 0.6924050632911393, + "grad_norm": 0.706299901008606, + "learning_rate": 0.00032981935939551294, + "loss": 1.5234, + "step": 6564 + }, + { + "epoch": 0.6925105485232067, + "grad_norm": 1.0562666654586792, + "learning_rate": 0.000329611422791026, + "loss": 1.5631, + "step": 6565 + }, + { + "epoch": 0.6926160337552743, + "grad_norm": 0.7297782897949219, + "learning_rate": 0.00032940353329317533, + "loss": 1.5407, + "step": 6566 + }, + { + "epoch": 0.6927215189873418, + "grad_norm": 0.9262658357620239, + "learning_rate": 0.0003291956909252561, + "loss": 1.5747, + "step": 6567 + }, + { + "epoch": 0.6928270042194092, + "grad_norm": 0.6977256536483765, + "learning_rate": 0.00032898789571055796, + "loss": 1.5328, + "step": 6568 + }, + { + "epoch": 0.6929324894514768, + "grad_norm": 0.7728700041770935, + "learning_rate": 0.0003287801476723656, + "loss": 1.5582, + "step": 6569 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.8400724530220032, + "learning_rate": 0.0003285724468339576, + "loss": 1.5285, + "step": 6570 + }, + { + "epoch": 0.6931434599156118, + "grad_norm": 0.5832553505897522, + "learning_rate": 0.00032836479321860884, + "loss": 1.5527, + "step": 6571 + }, + { + "epoch": 0.6932489451476793, + "grad_norm": 0.8231166005134583, + "learning_rate": 0.00032815718684958727, + "loss": 1.5463, + "step": 6572 + }, + { + "epoch": 0.6933544303797469, + "grad_norm": 0.7177068591117859, + "learning_rate": 0.00032794962775015656, + "loss": 1.5146, + "step": 6573 + }, + { + "epoch": 0.6934599156118143, + "grad_norm": 0.6446713209152222, + "learning_rate": 0.0003277421159435745, + "loss": 1.5532, + "step": 6574 + }, + { + "epoch": 0.6935654008438819, + "grad_norm": 0.6378622651100159, + "learning_rate": 0.000327534651453094, + "loss": 1.5414, + "step": 6575 + }, + { + "epoch": 0.6936708860759494, + "grad_norm": 0.6081342101097107, + "learning_rate": 0.00032732723430196236, + "loss": 1.4896, + "step": 6576 + }, + { + "epoch": 0.6937763713080168, + "grad_norm": 0.5460678935050964, + "learning_rate": 0.0003271198645134218, + "loss": 1.5299, + "step": 6577 + }, + { + "epoch": 0.6938818565400844, + "grad_norm": 0.7760695815086365, + "learning_rate": 0.0003269125421107091, + "loss": 1.5295, + "step": 6578 + }, + { + "epoch": 0.6939873417721519, + "grad_norm": 0.6539500951766968, + "learning_rate": 0.00032670526711705536, + "loss": 1.5241, + "step": 6579 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.6202104091644287, + "learning_rate": 0.00032649803955568755, + "loss": 1.5782, + "step": 6580 + }, + { + "epoch": 0.6941983122362869, + "grad_norm": 0.5750711560249329, + "learning_rate": 0.0003262908594498262, + "loss": 1.5134, + "step": 6581 + }, + { + "epoch": 0.6943037974683545, + "grad_norm": 0.6593641638755798, + "learning_rate": 0.0003260837268226868, + "loss": 1.5387, + "step": 6582 + }, + { + "epoch": 0.6944092827004219, + "grad_norm": 0.5534638166427612, + "learning_rate": 0.0003258766416974796, + "loss": 1.5443, + "step": 6583 + }, + { + "epoch": 0.6945147679324895, + "grad_norm": 0.6007228493690491, + "learning_rate": 0.0003256696040974097, + "loss": 1.53, + "step": 6584 + }, + { + "epoch": 0.694620253164557, + "grad_norm": 0.5723539590835571, + "learning_rate": 0.00032546261404567644, + "loss": 1.5518, + "step": 6585 + }, + { + "epoch": 0.6947257383966244, + "grad_norm": 0.6048136353492737, + "learning_rate": 0.0003252556715654743, + "loss": 1.5167, + "step": 6586 + }, + { + "epoch": 0.694831223628692, + "grad_norm": 0.6109697818756104, + "learning_rate": 0.00032504877667999206, + "loss": 1.5487, + "step": 6587 + }, + { + "epoch": 0.6949367088607595, + "grad_norm": 0.6434722542762756, + "learning_rate": 0.00032484192941241316, + "loss": 1.5473, + "step": 6588 + }, + { + "epoch": 0.695042194092827, + "grad_norm": 0.6735769510269165, + "learning_rate": 0.0003246351297859164, + "loss": 1.5362, + "step": 6589 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.6943210363388062, + "learning_rate": 0.00032442837782367434, + "loss": 1.6011, + "step": 6590 + }, + { + "epoch": 0.6952531645569621, + "grad_norm": 0.7523420453071594, + "learning_rate": 0.00032422167354885463, + "loss": 1.5387, + "step": 6591 + }, + { + "epoch": 0.6953586497890295, + "grad_norm": 0.7102241516113281, + "learning_rate": 0.0003240150169846196, + "loss": 1.559, + "step": 6592 + }, + { + "epoch": 0.695464135021097, + "grad_norm": 0.6674736142158508, + "learning_rate": 0.00032380840815412603, + "loss": 1.5598, + "step": 6593 + }, + { + "epoch": 0.6955696202531646, + "grad_norm": 0.6670449376106262, + "learning_rate": 0.00032360184708052554, + "loss": 1.5213, + "step": 6594 + }, + { + "epoch": 0.695675105485232, + "grad_norm": 0.5660226345062256, + "learning_rate": 0.00032339533378696424, + "loss": 1.517, + "step": 6595 + }, + { + "epoch": 0.6957805907172996, + "grad_norm": 0.6443275809288025, + "learning_rate": 0.00032318886829658277, + "loss": 1.5658, + "step": 6596 + }, + { + "epoch": 0.6958860759493671, + "grad_norm": 0.6017906665802002, + "learning_rate": 0.0003229824506325172, + "loss": 1.5566, + "step": 6597 + }, + { + "epoch": 0.6959915611814346, + "grad_norm": 0.5603938698768616, + "learning_rate": 0.0003227760808178973, + "loss": 1.5518, + "step": 6598 + }, + { + "epoch": 0.6960970464135021, + "grad_norm": 0.6688340306282043, + "learning_rate": 0.00032256975887584783, + "loss": 1.5461, + "step": 6599 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.596042275428772, + "learning_rate": 0.0003223634848294883, + "loss": 1.56, + "step": 6600 + }, + { + "epoch": 0.6963080168776371, + "grad_norm": 0.5890272855758667, + "learning_rate": 0.0003221572587019327, + "loss": 1.5126, + "step": 6601 + }, + { + "epoch": 0.6964135021097047, + "grad_norm": 0.6918437480926514, + "learning_rate": 0.0003219510805162896, + "loss": 1.5375, + "step": 6602 + }, + { + "epoch": 0.6965189873417722, + "grad_norm": 0.6974610090255737, + "learning_rate": 0.0003217449502956624, + "loss": 1.5555, + "step": 6603 + }, + { + "epoch": 0.6966244725738396, + "grad_norm": 0.7320588827133179, + "learning_rate": 0.0003215388680631491, + "loss": 1.5511, + "step": 6604 + }, + { + "epoch": 0.6967299578059072, + "grad_norm": 0.6732274293899536, + "learning_rate": 0.00032133283384184173, + "loss": 1.549, + "step": 6605 + }, + { + "epoch": 0.6968354430379747, + "grad_norm": 0.7214210629463196, + "learning_rate": 0.00032112684765482814, + "loss": 1.504, + "step": 6606 + }, + { + "epoch": 0.6969409282700422, + "grad_norm": 0.6320361495018005, + "learning_rate": 0.00032092090952518996, + "loss": 1.5404, + "step": 6607 + }, + { + "epoch": 0.6970464135021097, + "grad_norm": 0.614227294921875, + "learning_rate": 0.00032071501947600334, + "loss": 1.4834, + "step": 6608 + }, + { + "epoch": 0.6971518987341773, + "grad_norm": 0.715485692024231, + "learning_rate": 0.00032050917753033935, + "loss": 1.5303, + "step": 6609 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.6103719472885132, + "learning_rate": 0.00032030338371126374, + "loss": 1.5898, + "step": 6610 + }, + { + "epoch": 0.6973628691983123, + "grad_norm": 0.6354016065597534, + "learning_rate": 0.0003200976380418366, + "loss": 1.4991, + "step": 6611 + }, + { + "epoch": 0.6974683544303798, + "grad_norm": 0.6439772844314575, + "learning_rate": 0.00031989194054511276, + "loss": 1.5442, + "step": 6612 + }, + { + "epoch": 0.6975738396624472, + "grad_norm": 0.5998992919921875, + "learning_rate": 0.0003196862912441418, + "loss": 1.5906, + "step": 6613 + }, + { + "epoch": 0.6976793248945148, + "grad_norm": 0.6531121134757996, + "learning_rate": 0.0003194806901619673, + "loss": 1.5443, + "step": 6614 + }, + { + "epoch": 0.6977848101265823, + "grad_norm": 0.6027771234512329, + "learning_rate": 0.00031927513732162856, + "loss": 1.5258, + "step": 6615 + }, + { + "epoch": 0.6978902953586498, + "grad_norm": 0.6890758872032166, + "learning_rate": 0.00031906963274615837, + "loss": 1.5711, + "step": 6616 + }, + { + "epoch": 0.6979957805907173, + "grad_norm": 0.614166259765625, + "learning_rate": 0.00031886417645858475, + "loss": 1.5646, + "step": 6617 + }, + { + "epoch": 0.6981012658227848, + "grad_norm": 0.7988936901092529, + "learning_rate": 0.00031865876848192993, + "loss": 1.5273, + "step": 6618 + }, + { + "epoch": 0.6982067510548523, + "grad_norm": 0.6020188331604004, + "learning_rate": 0.000318453408839211, + "loss": 1.5288, + "step": 6619 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.7009994387626648, + "learning_rate": 0.0003182480975534395, + "loss": 1.5288, + "step": 6620 + }, + { + "epoch": 0.6984177215189873, + "grad_norm": 0.6727538704872131, + "learning_rate": 0.0003180428346476215, + "loss": 1.5392, + "step": 6621 + }, + { + "epoch": 0.6985232067510548, + "grad_norm": 0.6796686053276062, + "learning_rate": 0.0003178376201447576, + "loss": 1.5624, + "step": 6622 + }, + { + "epoch": 0.6986286919831224, + "grad_norm": 0.6488789319992065, + "learning_rate": 0.00031763245406784364, + "loss": 1.5229, + "step": 6623 + }, + { + "epoch": 0.6987341772151898, + "grad_norm": 0.7268084287643433, + "learning_rate": 0.0003174273364398691, + "loss": 1.5385, + "step": 6624 + }, + { + "epoch": 0.6988396624472574, + "grad_norm": 0.5952140688896179, + "learning_rate": 0.00031722226728381854, + "loss": 1.5472, + "step": 6625 + }, + { + "epoch": 0.6989451476793249, + "grad_norm": 0.6151365637779236, + "learning_rate": 0.00031701724662267097, + "loss": 1.5107, + "step": 6626 + }, + { + "epoch": 0.6990506329113924, + "grad_norm": 0.6489952802658081, + "learning_rate": 0.00031681227447939996, + "loss": 1.5301, + "step": 6627 + }, + { + "epoch": 0.6991561181434599, + "grad_norm": 0.5855088829994202, + "learning_rate": 0.00031660735087697363, + "loss": 1.5474, + "step": 6628 + }, + { + "epoch": 0.6992616033755275, + "grad_norm": 0.6411623954772949, + "learning_rate": 0.0003164024758383548, + "loss": 1.5252, + "step": 6629 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.5843903422355652, + "learning_rate": 0.00031619764938650057, + "loss": 1.5332, + "step": 6630 + }, + { + "epoch": 0.6994725738396624, + "grad_norm": 0.8213173747062683, + "learning_rate": 0.00031599287154436263, + "loss": 1.5438, + "step": 6631 + }, + { + "epoch": 0.69957805907173, + "grad_norm": 0.6050781607627869, + "learning_rate": 0.0003157881423348879, + "loss": 1.5339, + "step": 6632 + }, + { + "epoch": 0.6996835443037974, + "grad_norm": 0.816797137260437, + "learning_rate": 0.00031558346178101694, + "loss": 1.5306, + "step": 6633 + }, + { + "epoch": 0.699789029535865, + "grad_norm": 0.6810383796691895, + "learning_rate": 0.00031537882990568535, + "loss": 1.5651, + "step": 6634 + }, + { + "epoch": 0.6998945147679325, + "grad_norm": 0.6222115159034729, + "learning_rate": 0.000315174246731823, + "loss": 1.545, + "step": 6635 + }, + { + "epoch": 0.7, + "grad_norm": 0.5961121916770935, + "learning_rate": 0.00031496971228235464, + "loss": 1.5166, + "step": 6636 + }, + { + "epoch": 0.7001054852320675, + "grad_norm": 0.6073036789894104, + "learning_rate": 0.00031476522658019916, + "loss": 1.5778, + "step": 6637 + }, + { + "epoch": 0.700210970464135, + "grad_norm": 0.5956943035125732, + "learning_rate": 0.0003145607896482704, + "loss": 1.5528, + "step": 6638 + }, + { + "epoch": 0.7003164556962025, + "grad_norm": 0.6626628041267395, + "learning_rate": 0.00031435640150947645, + "loss": 1.5331, + "step": 6639 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.6736539602279663, + "learning_rate": 0.0003141520621867197, + "loss": 1.5297, + "step": 6640 + }, + { + "epoch": 0.7005274261603376, + "grad_norm": 0.5554836988449097, + "learning_rate": 0.00031394777170289806, + "loss": 1.5502, + "step": 6641 + }, + { + "epoch": 0.700632911392405, + "grad_norm": 0.7433426380157471, + "learning_rate": 0.00031374353008090285, + "loss": 1.5319, + "step": 6642 + }, + { + "epoch": 0.7007383966244726, + "grad_norm": 0.7726922035217285, + "learning_rate": 0.0003135393373436206, + "loss": 1.5595, + "step": 6643 + }, + { + "epoch": 0.7008438818565401, + "grad_norm": 0.7753844261169434, + "learning_rate": 0.0003133351935139319, + "loss": 1.5026, + "step": 6644 + }, + { + "epoch": 0.7009493670886076, + "grad_norm": 0.8561853170394897, + "learning_rate": 0.00031313109861471223, + "loss": 1.5102, + "step": 6645 + }, + { + "epoch": 0.7010548523206751, + "grad_norm": 0.5650288462638855, + "learning_rate": 0.0003129270526688313, + "loss": 1.5126, + "step": 6646 + }, + { + "epoch": 0.7011603375527427, + "grad_norm": 0.682715892791748, + "learning_rate": 0.0003127230556991536, + "loss": 1.5512, + "step": 6647 + }, + { + "epoch": 0.7012658227848101, + "grad_norm": 0.6347917914390564, + "learning_rate": 0.000312519107728538, + "loss": 1.5776, + "step": 6648 + }, + { + "epoch": 0.7013713080168776, + "grad_norm": 0.6289447546005249, + "learning_rate": 0.0003123152087798376, + "loss": 1.492, + "step": 6649 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.5971227884292603, + "learning_rate": 0.00031211135887590074, + "loss": 1.5595, + "step": 6650 + }, + { + "epoch": 0.7015822784810126, + "grad_norm": 0.5706222057342529, + "learning_rate": 0.0003119075580395697, + "loss": 1.5231, + "step": 6651 + }, + { + "epoch": 0.7016877637130802, + "grad_norm": 0.6035836935043335, + "learning_rate": 0.0003117038062936813, + "loss": 1.5634, + "step": 6652 + }, + { + "epoch": 0.7017932489451477, + "grad_norm": 0.5706924796104431, + "learning_rate": 0.0003115001036610669, + "loss": 1.5108, + "step": 6653 + }, + { + "epoch": 0.7018987341772152, + "grad_norm": 0.639896035194397, + "learning_rate": 0.0003112964501645525, + "loss": 1.5407, + "step": 6654 + }, + { + "epoch": 0.7020042194092827, + "grad_norm": 0.5937532782554626, + "learning_rate": 0.0003110928458269584, + "loss": 1.5415, + "step": 6655 + }, + { + "epoch": 0.7021097046413503, + "grad_norm": 0.6070740818977356, + "learning_rate": 0.00031088929067109945, + "loss": 1.5179, + "step": 6656 + }, + { + "epoch": 0.7022151898734177, + "grad_norm": 0.5726488828659058, + "learning_rate": 0.0003106857847197849, + "loss": 1.5441, + "step": 6657 + }, + { + "epoch": 0.7023206751054852, + "grad_norm": 0.6269590854644775, + "learning_rate": 0.0003104823279958191, + "loss": 1.5765, + "step": 6658 + }, + { + "epoch": 0.7024261603375528, + "grad_norm": 0.6267864108085632, + "learning_rate": 0.00031027892052200003, + "loss": 1.5388, + "step": 6659 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.5400430560112, + "learning_rate": 0.0003100755623211205, + "loss": 1.5085, + "step": 6660 + }, + { + "epoch": 0.7026371308016878, + "grad_norm": 0.5623102188110352, + "learning_rate": 0.000309872253415968, + "loss": 1.5157, + "step": 6661 + }, + { + "epoch": 0.7027426160337553, + "grad_norm": 0.5606948733329773, + "learning_rate": 0.00030966899382932404, + "loss": 1.5405, + "step": 6662 + }, + { + "epoch": 0.7028481012658228, + "grad_norm": 0.6526033878326416, + "learning_rate": 0.0003094657835839651, + "loss": 1.5374, + "step": 6663 + }, + { + "epoch": 0.7029535864978903, + "grad_norm": 0.6072307229042053, + "learning_rate": 0.00030926262270266177, + "loss": 1.5138, + "step": 6664 + }, + { + "epoch": 0.7030590717299579, + "grad_norm": 0.6003898978233337, + "learning_rate": 0.00030905951120817934, + "loss": 1.5418, + "step": 6665 + }, + { + "epoch": 0.7031645569620253, + "grad_norm": 0.6143918037414551, + "learning_rate": 0.00030885644912327713, + "loss": 1.5377, + "step": 6666 + }, + { + "epoch": 0.7032700421940928, + "grad_norm": 0.6409276127815247, + "learning_rate": 0.0003086534364707097, + "loss": 1.5392, + "step": 6667 + }, + { + "epoch": 0.7033755274261604, + "grad_norm": 0.597218930721283, + "learning_rate": 0.00030845047327322556, + "loss": 1.533, + "step": 6668 + }, + { + "epoch": 0.7034810126582278, + "grad_norm": 0.5931506156921387, + "learning_rate": 0.0003082475595535677, + "loss": 1.5675, + "step": 6669 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.6557291746139526, + "learning_rate": 0.0003080446953344735, + "loss": 1.5563, + "step": 6670 + }, + { + "epoch": 0.7036919831223629, + "grad_norm": 0.6538068652153015, + "learning_rate": 0.000307841880638675, + "loss": 1.5736, + "step": 6671 + }, + { + "epoch": 0.7037974683544304, + "grad_norm": 0.59206223487854, + "learning_rate": 0.0003076391154888985, + "loss": 1.5325, + "step": 6672 + }, + { + "epoch": 0.7039029535864979, + "grad_norm": 0.6302756071090698, + "learning_rate": 0.000307436399907865, + "loss": 1.5607, + "step": 6673 + }, + { + "epoch": 0.7040084388185655, + "grad_norm": 0.5818601250648499, + "learning_rate": 0.00030723373391828966, + "loss": 1.5701, + "step": 6674 + }, + { + "epoch": 0.7041139240506329, + "grad_norm": 0.565091609954834, + "learning_rate": 0.00030703111754288204, + "loss": 1.5153, + "step": 6675 + }, + { + "epoch": 0.7042194092827004, + "grad_norm": 0.6710157990455627, + "learning_rate": 0.0003068285508043467, + "loss": 1.5524, + "step": 6676 + }, + { + "epoch": 0.704324894514768, + "grad_norm": 0.6926749348640442, + "learning_rate": 0.00030662603372538224, + "loss": 1.5356, + "step": 6677 + }, + { + "epoch": 0.7044303797468354, + "grad_norm": 0.5975764393806458, + "learning_rate": 0.0003064235663286815, + "loss": 1.5547, + "step": 6678 + }, + { + "epoch": 0.704535864978903, + "grad_norm": 0.629535973072052, + "learning_rate": 0.00030622114863693205, + "loss": 1.4863, + "step": 6679 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.6124526262283325, + "learning_rate": 0.00030601878067281575, + "loss": 1.5173, + "step": 6680 + }, + { + "epoch": 0.704746835443038, + "grad_norm": 0.5838149785995483, + "learning_rate": 0.00030581646245900895, + "loss": 1.5372, + "step": 6681 + }, + { + "epoch": 0.7048523206751055, + "grad_norm": 0.5793778300285339, + "learning_rate": 0.0003056141940181825, + "loss": 1.5361, + "step": 6682 + }, + { + "epoch": 0.7049578059071729, + "grad_norm": 0.64571613073349, + "learning_rate": 0.0003054119753730012, + "loss": 1.5158, + "step": 6683 + }, + { + "epoch": 0.7050632911392405, + "grad_norm": 0.616423487663269, + "learning_rate": 0.00030520980654612527, + "loss": 1.4981, + "step": 6684 + }, + { + "epoch": 0.705168776371308, + "grad_norm": 0.6377403140068054, + "learning_rate": 0.0003050076875602084, + "loss": 1.5453, + "step": 6685 + }, + { + "epoch": 0.7052742616033755, + "grad_norm": 0.5785754919052124, + "learning_rate": 0.0003048056184378991, + "loss": 1.5387, + "step": 6686 + }, + { + "epoch": 0.705379746835443, + "grad_norm": 0.5670540928840637, + "learning_rate": 0.0003046035992018402, + "loss": 1.5433, + "step": 6687 + }, + { + "epoch": 0.7054852320675106, + "grad_norm": 0.6196306347846985, + "learning_rate": 0.00030440162987466896, + "loss": 1.5053, + "step": 6688 + }, + { + "epoch": 0.705590717299578, + "grad_norm": 0.5976040959358215, + "learning_rate": 0.00030419971047901704, + "loss": 1.5749, + "step": 6689 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.7100936770439148, + "learning_rate": 0.00030399784103751044, + "loss": 1.525, + "step": 6690 + }, + { + "epoch": 0.7058016877637131, + "grad_norm": 0.6556717753410339, + "learning_rate": 0.0003037960215727699, + "loss": 1.5412, + "step": 6691 + }, + { + "epoch": 0.7059071729957805, + "grad_norm": 0.6123474836349487, + "learning_rate": 0.0003035942521074097, + "loss": 1.5464, + "step": 6692 + }, + { + "epoch": 0.7060126582278481, + "grad_norm": 0.6821704506874084, + "learning_rate": 0.0003033925326640398, + "loss": 1.5541, + "step": 6693 + }, + { + "epoch": 0.7061181434599156, + "grad_norm": 0.5756093263626099, + "learning_rate": 0.00030319086326526364, + "loss": 1.539, + "step": 6694 + }, + { + "epoch": 0.7062236286919831, + "grad_norm": 0.6187666654586792, + "learning_rate": 0.00030298924393367923, + "loss": 1.5208, + "step": 6695 + }, + { + "epoch": 0.7063291139240506, + "grad_norm": 0.607578694820404, + "learning_rate": 0.0003027876746918791, + "loss": 1.5388, + "step": 6696 + }, + { + "epoch": 0.7064345991561182, + "grad_norm": 0.6603869795799255, + "learning_rate": 0.00030258615556244995, + "loss": 1.5191, + "step": 6697 + }, + { + "epoch": 0.7065400843881856, + "grad_norm": 0.6099638342857361, + "learning_rate": 0.0003023846865679731, + "loss": 1.5687, + "step": 6698 + }, + { + "epoch": 0.7066455696202532, + "grad_norm": 0.6133288145065308, + "learning_rate": 0.00030218326773102407, + "loss": 1.4927, + "step": 6699 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.6453334093093872, + "learning_rate": 0.000301981899074173, + "loss": 1.5343, + "step": 6700 + }, + { + "epoch": 0.7068565400843881, + "grad_norm": 0.5960437655448914, + "learning_rate": 0.00030178058061998387, + "loss": 1.5668, + "step": 6701 + }, + { + "epoch": 0.7069620253164557, + "grad_norm": 0.6552228331565857, + "learning_rate": 0.00030157931239101595, + "loss": 1.5485, + "step": 6702 + }, + { + "epoch": 0.7070675105485232, + "grad_norm": 0.6266435980796814, + "learning_rate": 0.00030137809440982207, + "loss": 1.5298, + "step": 6703 + }, + { + "epoch": 0.7071729957805907, + "grad_norm": 0.6469079852104187, + "learning_rate": 0.0003011769266989498, + "loss": 1.5255, + "step": 6704 + }, + { + "epoch": 0.7072784810126582, + "grad_norm": 0.7100560665130615, + "learning_rate": 0.0003009758092809409, + "loss": 1.5547, + "step": 6705 + }, + { + "epoch": 0.7073839662447258, + "grad_norm": 0.6200946569442749, + "learning_rate": 0.00030077474217833167, + "loss": 1.5161, + "step": 6706 + }, + { + "epoch": 0.7074894514767932, + "grad_norm": 0.6096377372741699, + "learning_rate": 0.0003005737254136525, + "loss": 1.5489, + "step": 6707 + }, + { + "epoch": 0.7075949367088608, + "grad_norm": 0.7432951331138611, + "learning_rate": 0.0003003727590094285, + "loss": 1.5155, + "step": 6708 + }, + { + "epoch": 0.7077004219409283, + "grad_norm": 0.5481594800949097, + "learning_rate": 0.00030017184298817873, + "loss": 1.5404, + "step": 6709 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.5774154663085938, + "learning_rate": 0.0002999709773724171, + "loss": 1.5099, + "step": 6710 + }, + { + "epoch": 0.7079113924050633, + "grad_norm": 0.6081318259239197, + "learning_rate": 0.00029977016218465154, + "loss": 1.5126, + "step": 6711 + }, + { + "epoch": 0.7080168776371308, + "grad_norm": 0.5831964015960693, + "learning_rate": 0.0002995693974473844, + "loss": 1.5341, + "step": 6712 + }, + { + "epoch": 0.7081223628691983, + "grad_norm": 0.6345267295837402, + "learning_rate": 0.00029936868318311235, + "loss": 1.5316, + "step": 6713 + }, + { + "epoch": 0.7082278481012658, + "grad_norm": 0.6635064482688904, + "learning_rate": 0.00029916801941432637, + "loss": 1.5365, + "step": 6714 + }, + { + "epoch": 0.7083333333333334, + "grad_norm": 0.6687660217285156, + "learning_rate": 0.00029896740616351187, + "loss": 1.4886, + "step": 6715 + }, + { + "epoch": 0.7084388185654008, + "grad_norm": 0.6985775828361511, + "learning_rate": 0.00029876684345314853, + "loss": 1.5088, + "step": 6716 + }, + { + "epoch": 0.7085443037974684, + "grad_norm": 0.7292715907096863, + "learning_rate": 0.00029856633130571046, + "loss": 1.5636, + "step": 6717 + }, + { + "epoch": 0.7086497890295359, + "grad_norm": 0.7492624521255493, + "learning_rate": 0.00029836586974366574, + "loss": 1.5091, + "step": 6718 + }, + { + "epoch": 0.7087552742616033, + "grad_norm": 0.5876450538635254, + "learning_rate": 0.00029816545878947763, + "loss": 1.5364, + "step": 6719 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.6983479857444763, + "learning_rate": 0.00029796509846560294, + "loss": 1.5604, + "step": 6720 + }, + { + "epoch": 0.7089662447257384, + "grad_norm": 0.5793054103851318, + "learning_rate": 0.00029776478879449305, + "loss": 1.5287, + "step": 6721 + }, + { + "epoch": 0.7090717299578059, + "grad_norm": 0.6595240831375122, + "learning_rate": 0.0002975645297985935, + "loss": 1.5628, + "step": 6722 + }, + { + "epoch": 0.7091772151898734, + "grad_norm": 0.6046832203865051, + "learning_rate": 0.0002973643215003445, + "loss": 1.5365, + "step": 6723 + }, + { + "epoch": 0.709282700421941, + "grad_norm": 0.6446406245231628, + "learning_rate": 0.0002971641639221804, + "loss": 1.5268, + "step": 6724 + }, + { + "epoch": 0.7093881856540084, + "grad_norm": 0.5980350971221924, + "learning_rate": 0.00029696405708652966, + "loss": 1.5248, + "step": 6725 + }, + { + "epoch": 0.709493670886076, + "grad_norm": 0.6656964421272278, + "learning_rate": 0.00029676400101581545, + "loss": 1.5375, + "step": 6726 + }, + { + "epoch": 0.7095991561181435, + "grad_norm": 0.587654709815979, + "learning_rate": 0.0002965639957324546, + "loss": 1.5338, + "step": 6727 + }, + { + "epoch": 0.7097046413502109, + "grad_norm": 0.5764431357383728, + "learning_rate": 0.00029636404125885936, + "loss": 1.5616, + "step": 6728 + }, + { + "epoch": 0.7098101265822785, + "grad_norm": 0.5935356020927429, + "learning_rate": 0.00029616413761743537, + "loss": 1.5291, + "step": 6729 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.6526263356208801, + "learning_rate": 0.0002959642848305828, + "loss": 1.5189, + "step": 6730 + }, + { + "epoch": 0.7100210970464135, + "grad_norm": 0.6451595425605774, + "learning_rate": 0.0002957644829206961, + "loss": 1.5474, + "step": 6731 + }, + { + "epoch": 0.710126582278481, + "grad_norm": 0.6557911038398743, + "learning_rate": 0.0002955647319101641, + "loss": 1.5336, + "step": 6732 + }, + { + "epoch": 0.7102320675105486, + "grad_norm": 0.6770099401473999, + "learning_rate": 0.00029536503182137, + "loss": 1.5506, + "step": 6733 + }, + { + "epoch": 0.710337552742616, + "grad_norm": 0.6685137152671814, + "learning_rate": 0.00029516538267669096, + "loss": 1.5155, + "step": 6734 + }, + { + "epoch": 0.7104430379746836, + "grad_norm": 0.5815519094467163, + "learning_rate": 0.00029496578449849867, + "loss": 1.5374, + "step": 6735 + }, + { + "epoch": 0.7105485232067511, + "grad_norm": 0.6975462436676025, + "learning_rate": 0.00029476623730915943, + "loss": 1.5485, + "step": 6736 + }, + { + "epoch": 0.7106540084388185, + "grad_norm": 0.6219248175621033, + "learning_rate": 0.00029456674113103335, + "loss": 1.5174, + "step": 6737 + }, + { + "epoch": 0.7107594936708861, + "grad_norm": 0.6658716797828674, + "learning_rate": 0.00029436729598647483, + "loss": 1.54, + "step": 6738 + }, + { + "epoch": 0.7108649789029536, + "grad_norm": 0.6222186088562012, + "learning_rate": 0.00029416790189783286, + "loss": 1.5793, + "step": 6739 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.6388615965843201, + "learning_rate": 0.00029396855888745045, + "loss": 1.5332, + "step": 6740 + }, + { + "epoch": 0.7110759493670886, + "grad_norm": 0.7217008471488953, + "learning_rate": 0.00029376926697766495, + "loss": 1.5378, + "step": 6741 + }, + { + "epoch": 0.7111814345991562, + "grad_norm": 0.6084980964660645, + "learning_rate": 0.00029357002619080814, + "loss": 1.5387, + "step": 6742 + }, + { + "epoch": 0.7112869198312236, + "grad_norm": 0.6194488406181335, + "learning_rate": 0.0002933708365492058, + "loss": 1.5253, + "step": 6743 + }, + { + "epoch": 0.7113924050632912, + "grad_norm": 0.6717939972877502, + "learning_rate": 0.00029317169807517785, + "loss": 1.5154, + "step": 6744 + }, + { + "epoch": 0.7114978902953587, + "grad_norm": 0.5546609163284302, + "learning_rate": 0.00029297261079103945, + "loss": 1.5727, + "step": 6745 + }, + { + "epoch": 0.7116033755274261, + "grad_norm": 0.7556111812591553, + "learning_rate": 0.000292773574719099, + "loss": 1.5529, + "step": 6746 + }, + { + "epoch": 0.7117088607594937, + "grad_norm": 0.6484915018081665, + "learning_rate": 0.0002925745898816594, + "loss": 1.5476, + "step": 6747 + }, + { + "epoch": 0.7118143459915611, + "grad_norm": 0.5619749426841736, + "learning_rate": 0.0002923756563010179, + "loss": 1.5335, + "step": 6748 + }, + { + "epoch": 0.7119198312236287, + "grad_norm": 0.6918942332267761, + "learning_rate": 0.000292176773999466, + "loss": 1.5668, + "step": 6749 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.675297737121582, + "learning_rate": 0.0002919779429992895, + "loss": 1.5348, + "step": 6750 + }, + { + "epoch": 0.7121308016877637, + "grad_norm": 0.5606458783149719, + "learning_rate": 0.0002917791633227685, + "loss": 1.5153, + "step": 6751 + }, + { + "epoch": 0.7122362869198312, + "grad_norm": 0.8183725476264954, + "learning_rate": 0.000291580434992177, + "loss": 1.5305, + "step": 6752 + }, + { + "epoch": 0.7123417721518988, + "grad_norm": 0.7038650512695312, + "learning_rate": 0.00029138175802978343, + "loss": 1.5341, + "step": 6753 + }, + { + "epoch": 0.7124472573839662, + "grad_norm": 0.600849986076355, + "learning_rate": 0.00029118313245785104, + "loss": 1.5845, + "step": 6754 + }, + { + "epoch": 0.7125527426160337, + "grad_norm": 0.8060716986656189, + "learning_rate": 0.00029098455829863653, + "loss": 1.5391, + "step": 6755 + }, + { + "epoch": 0.7126582278481013, + "grad_norm": 0.6661921739578247, + "learning_rate": 0.0002907860355743911, + "loss": 1.5091, + "step": 6756 + }, + { + "epoch": 0.7127637130801687, + "grad_norm": 0.7068005204200745, + "learning_rate": 0.00029058756430736025, + "loss": 1.5508, + "step": 6757 + }, + { + "epoch": 0.7128691983122363, + "grad_norm": 0.827602207660675, + "learning_rate": 0.0002903891445197836, + "loss": 1.5492, + "step": 6758 + }, + { + "epoch": 0.7129746835443038, + "grad_norm": 0.6106283664703369, + "learning_rate": 0.0002901907762338952, + "loss": 1.5526, + "step": 6759 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.8819045424461365, + "learning_rate": 0.0002899924594719231, + "loss": 1.5328, + "step": 6760 + }, + { + "epoch": 0.7131856540084388, + "grad_norm": 0.6667078733444214, + "learning_rate": 0.0002897941942560894, + "loss": 1.5589, + "step": 6761 + }, + { + "epoch": 0.7132911392405064, + "grad_norm": 0.6970370411872864, + "learning_rate": 0.0002895959806086114, + "loss": 1.5271, + "step": 6762 + }, + { + "epoch": 0.7133966244725738, + "grad_norm": 0.7063979506492615, + "learning_rate": 0.0002893978185516995, + "loss": 1.5534, + "step": 6763 + }, + { + "epoch": 0.7135021097046413, + "grad_norm": 0.7295739650726318, + "learning_rate": 0.00028919970810755883, + "loss": 1.55, + "step": 6764 + }, + { + "epoch": 0.7136075949367089, + "grad_norm": 0.6546187400817871, + "learning_rate": 0.0002890016492983886, + "loss": 1.5581, + "step": 6765 + }, + { + "epoch": 0.7137130801687763, + "grad_norm": 0.838067889213562, + "learning_rate": 0.0002888036421463823, + "loss": 1.5033, + "step": 6766 + }, + { + "epoch": 0.7138185654008439, + "grad_norm": 0.6565909385681152, + "learning_rate": 0.0002886056866737277, + "loss": 1.5306, + "step": 6767 + }, + { + "epoch": 0.7139240506329114, + "grad_norm": 0.652362585067749, + "learning_rate": 0.0002884077829026066, + "loss": 1.5414, + "step": 6768 + }, + { + "epoch": 0.7140295358649789, + "grad_norm": 0.6540713310241699, + "learning_rate": 0.0002882099308551951, + "loss": 1.5526, + "step": 6769 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.6880561709403992, + "learning_rate": 0.00028801213055366335, + "loss": 1.5402, + "step": 6770 + }, + { + "epoch": 0.714240506329114, + "grad_norm": 0.6468711495399475, + "learning_rate": 0.00028781438202017613, + "loss": 1.5913, + "step": 6771 + }, + { + "epoch": 0.7143459915611814, + "grad_norm": 0.7285500764846802, + "learning_rate": 0.0002876166852768923, + "loss": 1.5366, + "step": 6772 + }, + { + "epoch": 0.7144514767932489, + "grad_norm": 0.630841076374054, + "learning_rate": 0.0002874190403459644, + "loss": 1.5587, + "step": 6773 + }, + { + "epoch": 0.7145569620253165, + "grad_norm": 0.5850895643234253, + "learning_rate": 0.0002872214472495397, + "loss": 1.495, + "step": 6774 + }, + { + "epoch": 0.7146624472573839, + "grad_norm": 0.6646517515182495, + "learning_rate": 0.00028702390600975937, + "loss": 1.5204, + "step": 6775 + }, + { + "epoch": 0.7147679324894515, + "grad_norm": 0.6779972314834595, + "learning_rate": 0.0002868264166487591, + "loss": 1.5009, + "step": 6776 + }, + { + "epoch": 0.714873417721519, + "grad_norm": 0.6422214508056641, + "learning_rate": 0.0002866289791886684, + "loss": 1.5306, + "step": 6777 + }, + { + "epoch": 0.7149789029535865, + "grad_norm": 0.6379969716072083, + "learning_rate": 0.00028643159365161113, + "loss": 1.5487, + "step": 6778 + }, + { + "epoch": 0.715084388185654, + "grad_norm": 0.6915110945701599, + "learning_rate": 0.00028623426005970517, + "loss": 1.5368, + "step": 6779 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5875823497772217, + "learning_rate": 0.00028603697843506315, + "loss": 1.5447, + "step": 6780 + }, + { + "epoch": 0.715295358649789, + "grad_norm": 0.6107310056686401, + "learning_rate": 0.00028583974879979113, + "loss": 1.5696, + "step": 6781 + }, + { + "epoch": 0.7154008438818565, + "grad_norm": 0.665383517742157, + "learning_rate": 0.00028564257117598993, + "loss": 1.5339, + "step": 6782 + }, + { + "epoch": 0.7155063291139241, + "grad_norm": 0.5990309715270996, + "learning_rate": 0.00028544544558575395, + "loss": 1.5117, + "step": 6783 + }, + { + "epoch": 0.7156118143459915, + "grad_norm": 0.7394906282424927, + "learning_rate": 0.0002852483720511724, + "loss": 1.5709, + "step": 6784 + }, + { + "epoch": 0.7157172995780591, + "grad_norm": 0.6194083094596863, + "learning_rate": 0.0002850513505943281, + "loss": 1.5492, + "step": 6785 + }, + { + "epoch": 0.7158227848101266, + "grad_norm": 0.6865739226341248, + "learning_rate": 0.0002848543812372986, + "loss": 1.5015, + "step": 6786 + }, + { + "epoch": 0.7159282700421941, + "grad_norm": 0.8206589221954346, + "learning_rate": 0.00028465746400215463, + "loss": 1.5243, + "step": 6787 + }, + { + "epoch": 0.7160337552742616, + "grad_norm": 0.7556677460670471, + "learning_rate": 0.00028446059891096265, + "loss": 1.5499, + "step": 6788 + }, + { + "epoch": 0.7161392405063292, + "grad_norm": 0.7125974893569946, + "learning_rate": 0.00028426378598578187, + "loss": 1.5265, + "step": 6789 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.6521942615509033, + "learning_rate": 0.0002840670252486662, + "loss": 1.5757, + "step": 6790 + }, + { + "epoch": 0.7163502109704641, + "grad_norm": 0.592104434967041, + "learning_rate": 0.00028387031672166385, + "loss": 1.5317, + "step": 6791 + }, + { + "epoch": 0.7164556962025317, + "grad_norm": 0.6446436047554016, + "learning_rate": 0.0002836736604268167, + "loss": 1.5315, + "step": 6792 + }, + { + "epoch": 0.7165611814345991, + "grad_norm": 0.6639468669891357, + "learning_rate": 0.0002834770563861613, + "loss": 1.5517, + "step": 6793 + }, + { + "epoch": 0.7166666666666667, + "grad_norm": 0.6213145852088928, + "learning_rate": 0.000283280504621728, + "loss": 1.5297, + "step": 6794 + }, + { + "epoch": 0.7167721518987342, + "grad_norm": 0.5711320042610168, + "learning_rate": 0.0002830840051555414, + "loss": 1.5197, + "step": 6795 + }, + { + "epoch": 0.7168776371308017, + "grad_norm": 0.6018215417861938, + "learning_rate": 0.00028288755800962, + "loss": 1.4886, + "step": 6796 + }, + { + "epoch": 0.7169831223628692, + "grad_norm": 0.5861425399780273, + "learning_rate": 0.00028269116320597733, + "loss": 1.5234, + "step": 6797 + }, + { + "epoch": 0.7170886075949368, + "grad_norm": 0.5389037132263184, + "learning_rate": 0.0002824948207666199, + "loss": 1.542, + "step": 6798 + }, + { + "epoch": 0.7171940928270042, + "grad_norm": 0.7239651083946228, + "learning_rate": 0.0002822985307135491, + "loss": 1.5623, + "step": 6799 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.5825331807136536, + "learning_rate": 0.00028210229306876, + "loss": 1.4995, + "step": 6800 + }, + { + "epoch": 0.7174050632911393, + "grad_norm": 0.6741269826889038, + "learning_rate": 0.0002819061078542422, + "loss": 1.5637, + "step": 6801 + }, + { + "epoch": 0.7175105485232067, + "grad_norm": 0.6037532091140747, + "learning_rate": 0.0002817099750919791, + "loss": 1.5233, + "step": 6802 + }, + { + "epoch": 0.7176160337552743, + "grad_norm": 0.611836314201355, + "learning_rate": 0.0002815138948039485, + "loss": 1.5436, + "step": 6803 + }, + { + "epoch": 0.7177215189873418, + "grad_norm": 0.5938943028450012, + "learning_rate": 0.000281317867012122, + "loss": 1.4965, + "step": 6804 + }, + { + "epoch": 0.7178270042194093, + "grad_norm": 0.655035674571991, + "learning_rate": 0.0002811218917384652, + "loss": 1.5704, + "step": 6805 + }, + { + "epoch": 0.7179324894514768, + "grad_norm": 0.5900968909263611, + "learning_rate": 0.00028092596900493885, + "loss": 1.5458, + "step": 6806 + }, + { + "epoch": 0.7180379746835444, + "grad_norm": 0.557526171207428, + "learning_rate": 0.00028073009883349665, + "loss": 1.512, + "step": 6807 + }, + { + "epoch": 0.7181434599156118, + "grad_norm": 0.7828075289726257, + "learning_rate": 0.00028053428124608684, + "loss": 1.5646, + "step": 6808 + }, + { + "epoch": 0.7182489451476793, + "grad_norm": 0.6467950940132141, + "learning_rate": 0.0002803385162646518, + "loss": 1.5439, + "step": 6809 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.9039344191551208, + "learning_rate": 0.0002801428039111279, + "loss": 1.4619, + "step": 6810 + }, + { + "epoch": 0.7184599156118143, + "grad_norm": 0.6350924372673035, + "learning_rate": 0.0002799471442074459, + "loss": 1.5509, + "step": 6811 + }, + { + "epoch": 0.7185654008438819, + "grad_norm": 0.6202815175056458, + "learning_rate": 0.00027975153717553014, + "loss": 1.5577, + "step": 6812 + }, + { + "epoch": 0.7186708860759494, + "grad_norm": 0.8062613606452942, + "learning_rate": 0.00027955598283729936, + "loss": 1.5495, + "step": 6813 + }, + { + "epoch": 0.7187763713080169, + "grad_norm": 0.5603278279304504, + "learning_rate": 0.00027936048121466673, + "loss": 1.514, + "step": 6814 + }, + { + "epoch": 0.7188818565400844, + "grad_norm": 0.6438913941383362, + "learning_rate": 0.00027916503232953895, + "loss": 1.5421, + "step": 6815 + }, + { + "epoch": 0.7189873417721518, + "grad_norm": 0.6223918795585632, + "learning_rate": 0.0002789696362038172, + "loss": 1.5284, + "step": 6816 + }, + { + "epoch": 0.7190928270042194, + "grad_norm": 0.5684909224510193, + "learning_rate": 0.0002787742928593965, + "loss": 1.5253, + "step": 6817 + }, + { + "epoch": 0.7191983122362869, + "grad_norm": 0.6077797412872314, + "learning_rate": 0.00027857900231816594, + "loss": 1.518, + "step": 6818 + }, + { + "epoch": 0.7193037974683544, + "grad_norm": 0.6305537819862366, + "learning_rate": 0.0002783837646020089, + "loss": 1.5322, + "step": 6819 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.6557918787002563, + "learning_rate": 0.00027818857973280274, + "loss": 1.5419, + "step": 6820 + }, + { + "epoch": 0.7195147679324895, + "grad_norm": 0.5845826268196106, + "learning_rate": 0.0002779934477324189, + "loss": 1.5361, + "step": 6821 + }, + { + "epoch": 0.7196202531645569, + "grad_norm": 0.6901136040687561, + "learning_rate": 0.0002777983686227226, + "loss": 1.5796, + "step": 6822 + }, + { + "epoch": 0.7197257383966245, + "grad_norm": 0.6015174388885498, + "learning_rate": 0.00027760334242557397, + "loss": 1.4878, + "step": 6823 + }, + { + "epoch": 0.719831223628692, + "grad_norm": 0.5645516514778137, + "learning_rate": 0.00027740836916282643, + "loss": 1.5365, + "step": 6824 + }, + { + "epoch": 0.7199367088607594, + "grad_norm": 0.705769419670105, + "learning_rate": 0.00027721344885632765, + "loss": 1.535, + "step": 6825 + }, + { + "epoch": 0.720042194092827, + "grad_norm": 0.6607920527458191, + "learning_rate": 0.0002770185815279195, + "loss": 1.505, + "step": 6826 + }, + { + "epoch": 0.7201476793248945, + "grad_norm": 0.5800368785858154, + "learning_rate": 0.0002768237671994377, + "loss": 1.5613, + "step": 6827 + }, + { + "epoch": 0.720253164556962, + "grad_norm": 0.6683099865913391, + "learning_rate": 0.0002766290058927123, + "loss": 1.5276, + "step": 6828 + }, + { + "epoch": 0.7203586497890295, + "grad_norm": 0.6254390478134155, + "learning_rate": 0.0002764342976295673, + "loss": 1.5143, + "step": 6829 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6781206130981445, + "learning_rate": 0.0002762396424318206, + "loss": 1.5204, + "step": 6830 + }, + { + "epoch": 0.7205696202531645, + "grad_norm": 0.7229189872741699, + "learning_rate": 0.000276045040321284, + "loss": 1.5427, + "step": 6831 + }, + { + "epoch": 0.7206751054852321, + "grad_norm": 0.6573375463485718, + "learning_rate": 0.0002758504913197644, + "loss": 1.5107, + "step": 6832 + }, + { + "epoch": 0.7207805907172996, + "grad_norm": 0.7074263095855713, + "learning_rate": 0.0002756559954490615, + "loss": 1.5231, + "step": 6833 + }, + { + "epoch": 0.720886075949367, + "grad_norm": 0.5951071381568909, + "learning_rate": 0.0002754615527309696, + "loss": 1.5239, + "step": 6834 + }, + { + "epoch": 0.7209915611814346, + "grad_norm": 0.7331635355949402, + "learning_rate": 0.000275267163187277, + "loss": 1.5263, + "step": 6835 + }, + { + "epoch": 0.7210970464135021, + "grad_norm": 0.7039169073104858, + "learning_rate": 0.00027507282683976594, + "loss": 1.5188, + "step": 6836 + }, + { + "epoch": 0.7212025316455696, + "grad_norm": 0.554421067237854, + "learning_rate": 0.0002748785437102129, + "loss": 1.5313, + "step": 6837 + }, + { + "epoch": 0.7213080168776371, + "grad_norm": 0.6481538414955139, + "learning_rate": 0.00027468431382038816, + "loss": 1.5393, + "step": 6838 + }, + { + "epoch": 0.7214135021097047, + "grad_norm": 0.646198570728302, + "learning_rate": 0.00027449013719205623, + "loss": 1.5097, + "step": 6839 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.6677937507629395, + "learning_rate": 0.00027429601384697526, + "loss": 1.515, + "step": 6840 + }, + { + "epoch": 0.7216244725738397, + "grad_norm": 0.6801962852478027, + "learning_rate": 0.00027410194380689826, + "loss": 1.5147, + "step": 6841 + }, + { + "epoch": 0.7217299578059072, + "grad_norm": 0.753715991973877, + "learning_rate": 0.00027390792709357155, + "loss": 1.5341, + "step": 6842 + }, + { + "epoch": 0.7218354430379746, + "grad_norm": 0.6106957197189331, + "learning_rate": 0.00027371396372873557, + "loss": 1.5284, + "step": 6843 + }, + { + "epoch": 0.7219409282700422, + "grad_norm": 0.7369165420532227, + "learning_rate": 0.00027352005373412487, + "loss": 1.554, + "step": 6844 + }, + { + "epoch": 0.7220464135021097, + "grad_norm": 0.666217565536499, + "learning_rate": 0.00027332619713146816, + "loss": 1.5435, + "step": 6845 + }, + { + "epoch": 0.7221518987341772, + "grad_norm": 0.674563467502594, + "learning_rate": 0.000273132393942488, + "loss": 1.5479, + "step": 6846 + }, + { + "epoch": 0.7222573839662447, + "grad_norm": 0.7107812762260437, + "learning_rate": 0.000272938644188901, + "loss": 1.5445, + "step": 6847 + }, + { + "epoch": 0.7223628691983123, + "grad_norm": 0.7357791066169739, + "learning_rate": 0.00027274494789241766, + "loss": 1.5079, + "step": 6848 + }, + { + "epoch": 0.7224683544303797, + "grad_norm": 0.6365472078323364, + "learning_rate": 0.00027255130507474276, + "loss": 1.5448, + "step": 6849 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.7096417546272278, + "learning_rate": 0.00027235771575757466, + "loss": 1.5134, + "step": 6850 + }, + { + "epoch": 0.7226793248945148, + "grad_norm": 0.7320483922958374, + "learning_rate": 0.00027216417996260654, + "loss": 1.5521, + "step": 6851 + }, + { + "epoch": 0.7227848101265822, + "grad_norm": 0.6600437760353088, + "learning_rate": 0.00027197069771152464, + "loss": 1.5461, + "step": 6852 + }, + { + "epoch": 0.7228902953586498, + "grad_norm": 0.6946755647659302, + "learning_rate": 0.0002717772690260098, + "loss": 1.5159, + "step": 6853 + }, + { + "epoch": 0.7229957805907173, + "grad_norm": 0.6956264972686768, + "learning_rate": 0.0002715838939277366, + "loss": 1.5381, + "step": 6854 + }, + { + "epoch": 0.7231012658227848, + "grad_norm": 0.6665220260620117, + "learning_rate": 0.0002713905724383737, + "loss": 1.4819, + "step": 6855 + }, + { + "epoch": 0.7232067510548523, + "grad_norm": 0.6729310154914856, + "learning_rate": 0.00027119730457958376, + "loss": 1.5221, + "step": 6856 + }, + { + "epoch": 0.7233122362869199, + "grad_norm": 0.5866939425468445, + "learning_rate": 0.0002710040903730233, + "loss": 1.4867, + "step": 6857 + }, + { + "epoch": 0.7234177215189873, + "grad_norm": 0.6391886472702026, + "learning_rate": 0.00027081092984034303, + "loss": 1.5579, + "step": 6858 + }, + { + "epoch": 0.7235232067510549, + "grad_norm": 0.696800947189331, + "learning_rate": 0.00027061782300318726, + "loss": 1.5564, + "step": 6859 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.6182870864868164, + "learning_rate": 0.0002704247698831951, + "loss": 1.5168, + "step": 6860 + }, + { + "epoch": 0.7237341772151898, + "grad_norm": 0.6149284839630127, + "learning_rate": 0.00027023177050199885, + "loss": 1.4955, + "step": 6861 + }, + { + "epoch": 0.7238396624472574, + "grad_norm": 0.6919633150100708, + "learning_rate": 0.00027003882488122507, + "loss": 1.5633, + "step": 6862 + }, + { + "epoch": 0.7239451476793249, + "grad_norm": 0.6821942925453186, + "learning_rate": 0.0002698459330424942, + "loss": 1.5402, + "step": 6863 + }, + { + "epoch": 0.7240506329113924, + "grad_norm": 0.6415356993675232, + "learning_rate": 0.0002696530950074208, + "loss": 1.5439, + "step": 6864 + }, + { + "epoch": 0.7241561181434599, + "grad_norm": 0.6715474128723145, + "learning_rate": 0.00026946031079761346, + "loss": 1.5338, + "step": 6865 + }, + { + "epoch": 0.7242616033755275, + "grad_norm": 0.6235246658325195, + "learning_rate": 0.00026926758043467435, + "loss": 1.5242, + "step": 6866 + }, + { + "epoch": 0.7243670886075949, + "grad_norm": 0.6609224081039429, + "learning_rate": 0.00026907490394020004, + "loss": 1.5137, + "step": 6867 + }, + { + "epoch": 0.7244725738396625, + "grad_norm": 0.6048340201377869, + "learning_rate": 0.00026888228133578086, + "loss": 1.5269, + "step": 6868 + }, + { + "epoch": 0.72457805907173, + "grad_norm": 0.7017120122909546, + "learning_rate": 0.0002686897126430009, + "loss": 1.5082, + "step": 6869 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.6585515141487122, + "learning_rate": 0.0002684971978834389, + "loss": 1.5063, + "step": 6870 + }, + { + "epoch": 0.724789029535865, + "grad_norm": 0.597542405128479, + "learning_rate": 0.00026830473707866684, + "loss": 1.5346, + "step": 6871 + }, + { + "epoch": 0.7248945147679325, + "grad_norm": 0.5519822835922241, + "learning_rate": 0.00026811233025025096, + "loss": 1.5348, + "step": 6872 + }, + { + "epoch": 0.725, + "grad_norm": 0.6775665879249573, + "learning_rate": 0.00026791997741975134, + "loss": 1.5283, + "step": 6873 + }, + { + "epoch": 0.7251054852320675, + "grad_norm": 0.6727107763290405, + "learning_rate": 0.00026772767860872216, + "loss": 1.5397, + "step": 6874 + }, + { + "epoch": 0.7252109704641351, + "grad_norm": 0.7263813614845276, + "learning_rate": 0.00026753543383871143, + "loss": 1.5302, + "step": 6875 + }, + { + "epoch": 0.7253164556962025, + "grad_norm": 0.6244476437568665, + "learning_rate": 0.0002673432431312611, + "loss": 1.5266, + "step": 6876 + }, + { + "epoch": 0.7254219409282701, + "grad_norm": 0.648307740688324, + "learning_rate": 0.0002671511065079071, + "loss": 1.5237, + "step": 6877 + }, + { + "epoch": 0.7255274261603376, + "grad_norm": 0.8735585808753967, + "learning_rate": 0.00026695902399017935, + "loss": 1.5295, + "step": 6878 + }, + { + "epoch": 0.725632911392405, + "grad_norm": 0.6337442994117737, + "learning_rate": 0.00026676699559960145, + "loss": 1.5679, + "step": 6879 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.6638092398643494, + "learning_rate": 0.0002665750213576914, + "loss": 1.5312, + "step": 6880 + }, + { + "epoch": 0.72584388185654, + "grad_norm": 0.5868866443634033, + "learning_rate": 0.0002663831012859609, + "loss": 1.5223, + "step": 6881 + }, + { + "epoch": 0.7259493670886076, + "grad_norm": 0.6753818988800049, + "learning_rate": 0.0002661912354059154, + "loss": 1.5241, + "step": 6882 + }, + { + "epoch": 0.7260548523206751, + "grad_norm": 0.6165459156036377, + "learning_rate": 0.0002659994237390545, + "loss": 1.5289, + "step": 6883 + }, + { + "epoch": 0.7261603375527426, + "grad_norm": 0.6046576499938965, + "learning_rate": 0.0002658076663068715, + "loss": 1.5236, + "step": 6884 + }, + { + "epoch": 0.7262658227848101, + "grad_norm": 0.7115411162376404, + "learning_rate": 0.00026561596313085396, + "loss": 1.5546, + "step": 6885 + }, + { + "epoch": 0.7263713080168777, + "grad_norm": 0.607011079788208, + "learning_rate": 0.00026542431423248313, + "loss": 1.4967, + "step": 6886 + }, + { + "epoch": 0.7264767932489451, + "grad_norm": 0.6997591257095337, + "learning_rate": 0.00026523271963323414, + "loss": 1.5327, + "step": 6887 + }, + { + "epoch": 0.7265822784810126, + "grad_norm": 0.5564890503883362, + "learning_rate": 0.0002650411793545763, + "loss": 1.5356, + "step": 6888 + }, + { + "epoch": 0.7266877637130802, + "grad_norm": 0.599538266658783, + "learning_rate": 0.00026484969341797224, + "loss": 1.5013, + "step": 6889 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.5593017935752869, + "learning_rate": 0.0002646582618448794, + "loss": 1.4946, + "step": 6890 + }, + { + "epoch": 0.7268987341772152, + "grad_norm": 0.6379513740539551, + "learning_rate": 0.00026446688465674845, + "loss": 1.5366, + "step": 6891 + }, + { + "epoch": 0.7270042194092827, + "grad_norm": 0.6447013020515442, + "learning_rate": 0.0002642755618750242, + "loss": 1.5508, + "step": 6892 + }, + { + "epoch": 0.7271097046413502, + "grad_norm": 0.603362500667572, + "learning_rate": 0.0002640842935211453, + "loss": 1.5325, + "step": 6893 + }, + { + "epoch": 0.7272151898734177, + "grad_norm": 0.7178763151168823, + "learning_rate": 0.0002638930796165443, + "loss": 1.5011, + "step": 6894 + }, + { + "epoch": 0.7273206751054853, + "grad_norm": 0.6661558747291565, + "learning_rate": 0.00026370192018264766, + "loss": 1.4959, + "step": 6895 + }, + { + "epoch": 0.7274261603375527, + "grad_norm": 0.7630661725997925, + "learning_rate": 0.00026351081524087573, + "loss": 1.5282, + "step": 6896 + }, + { + "epoch": 0.7275316455696202, + "grad_norm": 0.6120738983154297, + "learning_rate": 0.0002633197648126429, + "loss": 1.5343, + "step": 6897 + }, + { + "epoch": 0.7276371308016878, + "grad_norm": 0.6384773850440979, + "learning_rate": 0.0002631287689193571, + "loss": 1.5336, + "step": 6898 + }, + { + "epoch": 0.7277426160337552, + "grad_norm": 0.7292004227638245, + "learning_rate": 0.0002629378275824204, + "loss": 1.5238, + "step": 6899 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.6842349171638489, + "learning_rate": 0.00026274694082322896, + "loss": 1.5097, + "step": 6900 + }, + { + "epoch": 0.7279535864978903, + "grad_norm": 0.7198325395584106, + "learning_rate": 0.00026255610866317253, + "loss": 1.555, + "step": 6901 + }, + { + "epoch": 0.7280590717299578, + "grad_norm": 0.6132409572601318, + "learning_rate": 0.0002623653311236347, + "loss": 1.5395, + "step": 6902 + }, + { + "epoch": 0.7281645569620253, + "grad_norm": 0.633699357509613, + "learning_rate": 0.0002621746082259931, + "loss": 1.5557, + "step": 6903 + }, + { + "epoch": 0.7282700421940929, + "grad_norm": 0.6501533389091492, + "learning_rate": 0.0002619839399916192, + "loss": 1.5311, + "step": 6904 + }, + { + "epoch": 0.7283755274261603, + "grad_norm": 0.6458803415298462, + "learning_rate": 0.0002617933264418782, + "loss": 1.524, + "step": 6905 + }, + { + "epoch": 0.7284810126582278, + "grad_norm": 0.6734859943389893, + "learning_rate": 0.00026160276759812953, + "loss": 1.5589, + "step": 6906 + }, + { + "epoch": 0.7285864978902954, + "grad_norm": 0.6062381267547607, + "learning_rate": 0.00026141226348172595, + "loss": 1.557, + "step": 6907 + }, + { + "epoch": 0.7286919831223628, + "grad_norm": 0.6749553680419922, + "learning_rate": 0.00026122181411401444, + "loss": 1.5398, + "step": 6908 + }, + { + "epoch": 0.7287974683544304, + "grad_norm": 0.5853327512741089, + "learning_rate": 0.00026103141951633617, + "loss": 1.5937, + "step": 6909 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.6031194925308228, + "learning_rate": 0.0002608410797100255, + "loss": 1.5354, + "step": 6910 + }, + { + "epoch": 0.7290084388185654, + "grad_norm": 0.6180073022842407, + "learning_rate": 0.000260650794716411, + "loss": 1.5445, + "step": 6911 + }, + { + "epoch": 0.7291139240506329, + "grad_norm": 0.5474382042884827, + "learning_rate": 0.00026046056455681515, + "loss": 1.4981, + "step": 6912 + }, + { + "epoch": 0.7292194092827005, + "grad_norm": 0.616852343082428, + "learning_rate": 0.00026027038925255407, + "loss": 1.5381, + "step": 6913 + }, + { + "epoch": 0.7293248945147679, + "grad_norm": 0.6313870549201965, + "learning_rate": 0.00026008026882493783, + "loss": 1.5094, + "step": 6914 + }, + { + "epoch": 0.7294303797468354, + "grad_norm": 0.6625404953956604, + "learning_rate": 0.00025989020329527057, + "loss": 1.5296, + "step": 6915 + }, + { + "epoch": 0.729535864978903, + "grad_norm": 0.6173087358474731, + "learning_rate": 0.0002597001926848498, + "loss": 1.5218, + "step": 6916 + }, + { + "epoch": 0.7296413502109704, + "grad_norm": 0.6027964353561401, + "learning_rate": 0.00025951023701496713, + "loss": 1.538, + "step": 6917 + }, + { + "epoch": 0.729746835443038, + "grad_norm": 0.674089252948761, + "learning_rate": 0.0002593203363069084, + "loss": 1.541, + "step": 6918 + }, + { + "epoch": 0.7298523206751055, + "grad_norm": 0.7139757871627808, + "learning_rate": 0.00025913049058195277, + "loss": 1.5326, + "step": 6919 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.585300087928772, + "learning_rate": 0.0002589406998613733, + "loss": 1.53, + "step": 6920 + }, + { + "epoch": 0.7300632911392405, + "grad_norm": 0.6711223721504211, + "learning_rate": 0.0002587509641664372, + "loss": 1.5427, + "step": 6921 + }, + { + "epoch": 0.7301687763713081, + "grad_norm": 0.6922019124031067, + "learning_rate": 0.0002585612835184051, + "loss": 1.5158, + "step": 6922 + }, + { + "epoch": 0.7302742616033755, + "grad_norm": 0.6807230114936829, + "learning_rate": 0.00025837165793853164, + "loss": 1.5381, + "step": 6923 + }, + { + "epoch": 0.730379746835443, + "grad_norm": 0.6638785004615784, + "learning_rate": 0.0002581820874480654, + "loss": 1.5406, + "step": 6924 + }, + { + "epoch": 0.7304852320675106, + "grad_norm": 0.6891346573829651, + "learning_rate": 0.0002579925720682487, + "loss": 1.5298, + "step": 6925 + }, + { + "epoch": 0.730590717299578, + "grad_norm": 0.7227736711502075, + "learning_rate": 0.0002578031118203174, + "loss": 1.5495, + "step": 6926 + }, + { + "epoch": 0.7306962025316456, + "grad_norm": 0.737969160079956, + "learning_rate": 0.00025761370672550203, + "loss": 1.5196, + "step": 6927 + }, + { + "epoch": 0.7308016877637131, + "grad_norm": 0.6247255802154541, + "learning_rate": 0.0002574243568050261, + "loss": 1.5168, + "step": 6928 + }, + { + "epoch": 0.7309071729957806, + "grad_norm": 0.6435412764549255, + "learning_rate": 0.0002572350620801072, + "loss": 1.5375, + "step": 6929 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.6607081294059753, + "learning_rate": 0.0002570458225719567, + "loss": 1.5605, + "step": 6930 + }, + { + "epoch": 0.7311181434599157, + "grad_norm": 0.6468022465705872, + "learning_rate": 0.0002568566383017799, + "loss": 1.5514, + "step": 6931 + }, + { + "epoch": 0.7312236286919831, + "grad_norm": 0.6447959542274475, + "learning_rate": 0.0002566675092907757, + "loss": 1.5057, + "step": 6932 + }, + { + "epoch": 0.7313291139240506, + "grad_norm": 0.6356683969497681, + "learning_rate": 0.0002564784355601372, + "loss": 1.564, + "step": 6933 + }, + { + "epoch": 0.7314345991561182, + "grad_norm": 0.6693257689476013, + "learning_rate": 0.0002562894171310508, + "loss": 1.524, + "step": 6934 + }, + { + "epoch": 0.7315400843881856, + "grad_norm": 0.6121037602424622, + "learning_rate": 0.00025610045402469695, + "loss": 1.5838, + "step": 6935 + }, + { + "epoch": 0.7316455696202532, + "grad_norm": 0.686016321182251, + "learning_rate": 0.0002559115462622503, + "loss": 1.4874, + "step": 6936 + }, + { + "epoch": 0.7317510548523207, + "grad_norm": 0.5991679430007935, + "learning_rate": 0.00025572269386487853, + "loss": 1.5164, + "step": 6937 + }, + { + "epoch": 0.7318565400843882, + "grad_norm": 0.6240922808647156, + "learning_rate": 0.0002555338968537436, + "loss": 1.5552, + "step": 6938 + }, + { + "epoch": 0.7319620253164557, + "grad_norm": 0.6655003428459167, + "learning_rate": 0.0002553451552500012, + "loss": 1.549, + "step": 6939 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.7658456563949585, + "learning_rate": 0.00025515646907480074, + "loss": 1.5564, + "step": 6940 + }, + { + "epoch": 0.7321729957805907, + "grad_norm": 0.661781907081604, + "learning_rate": 0.0002549678383492854, + "loss": 1.5543, + "step": 6941 + }, + { + "epoch": 0.7322784810126582, + "grad_norm": 0.6370189189910889, + "learning_rate": 0.00025477926309459224, + "loss": 1.5025, + "step": 6942 + }, + { + "epoch": 0.7323839662447258, + "grad_norm": 0.7355113625526428, + "learning_rate": 0.00025459074333185176, + "loss": 1.5125, + "step": 6943 + }, + { + "epoch": 0.7324894514767932, + "grad_norm": 0.697213888168335, + "learning_rate": 0.0002544022790821891, + "loss": 1.5263, + "step": 6944 + }, + { + "epoch": 0.7325949367088608, + "grad_norm": 0.7048782706260681, + "learning_rate": 0.0002542138703667224, + "loss": 1.5461, + "step": 6945 + }, + { + "epoch": 0.7327004219409282, + "grad_norm": 0.6905006170272827, + "learning_rate": 0.00025402551720656366, + "loss": 1.5107, + "step": 6946 + }, + { + "epoch": 0.7328059071729958, + "grad_norm": 0.7639763355255127, + "learning_rate": 0.0002538372196228189, + "loss": 1.5308, + "step": 6947 + }, + { + "epoch": 0.7329113924050633, + "grad_norm": 0.6677406430244446, + "learning_rate": 0.00025364897763658777, + "loss": 1.558, + "step": 6948 + }, + { + "epoch": 0.7330168776371307, + "grad_norm": 0.5752298831939697, + "learning_rate": 0.0002534607912689637, + "loss": 1.5772, + "step": 6949 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.640478789806366, + "learning_rate": 0.00025327266054103395, + "loss": 1.5284, + "step": 6950 + }, + { + "epoch": 0.7332278481012658, + "grad_norm": 0.7108500003814697, + "learning_rate": 0.0002530845854738796, + "loss": 1.5283, + "step": 6951 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.5569787621498108, + "learning_rate": 0.0002528965660885749, + "loss": 1.5178, + "step": 6952 + }, + { + "epoch": 0.7334388185654008, + "grad_norm": 0.7323802709579468, + "learning_rate": 0.00025270860240618904, + "loss": 1.5214, + "step": 6953 + }, + { + "epoch": 0.7335443037974684, + "grad_norm": 0.6130258440971375, + "learning_rate": 0.000252520694447784, + "loss": 1.5268, + "step": 6954 + }, + { + "epoch": 0.7336497890295358, + "grad_norm": 0.6525563597679138, + "learning_rate": 0.0002523328422344158, + "loss": 1.5322, + "step": 6955 + }, + { + "epoch": 0.7337552742616034, + "grad_norm": 0.6206003427505493, + "learning_rate": 0.0002521450457871343, + "loss": 1.5137, + "step": 6956 + }, + { + "epoch": 0.7338607594936709, + "grad_norm": 0.6073675155639648, + "learning_rate": 0.0002519573051269828, + "loss": 1.5757, + "step": 6957 + }, + { + "epoch": 0.7339662447257383, + "grad_norm": 0.5631245374679565, + "learning_rate": 0.0002517696202749988, + "loss": 1.499, + "step": 6958 + }, + { + "epoch": 0.7340717299578059, + "grad_norm": 0.7761408686637878, + "learning_rate": 0.00025158199125221325, + "loss": 1.4949, + "step": 6959 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.645480215549469, + "learning_rate": 0.0002513944180796509, + "loss": 1.5443, + "step": 6960 + }, + { + "epoch": 0.7342827004219409, + "grad_norm": 0.7404863834381104, + "learning_rate": 0.0002512069007783301, + "loss": 1.5729, + "step": 6961 + }, + { + "epoch": 0.7343881856540084, + "grad_norm": 0.7311789393424988, + "learning_rate": 0.00025101943936926347, + "loss": 1.5119, + "step": 6962 + }, + { + "epoch": 0.734493670886076, + "grad_norm": 0.5892291069030762, + "learning_rate": 0.0002508320338734568, + "loss": 1.4737, + "step": 6963 + }, + { + "epoch": 0.7345991561181434, + "grad_norm": 0.7022191286087036, + "learning_rate": 0.00025064468431190977, + "loss": 1.5407, + "step": 6964 + }, + { + "epoch": 0.734704641350211, + "grad_norm": 0.6895778775215149, + "learning_rate": 0.0002504573907056159, + "loss": 1.5072, + "step": 6965 + }, + { + "epoch": 0.7348101265822785, + "grad_norm": 0.634137749671936, + "learning_rate": 0.00025027015307556234, + "loss": 1.5385, + "step": 6966 + }, + { + "epoch": 0.734915611814346, + "grad_norm": 0.7129725813865662, + "learning_rate": 0.00025008297144273, + "loss": 1.4967, + "step": 6967 + }, + { + "epoch": 0.7350210970464135, + "grad_norm": 0.5955337882041931, + "learning_rate": 0.0002498958458280936, + "loss": 1.5285, + "step": 6968 + }, + { + "epoch": 0.735126582278481, + "grad_norm": 0.6262558698654175, + "learning_rate": 0.0002497087762526211, + "loss": 1.5122, + "step": 6969 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.60209059715271, + "learning_rate": 0.0002495217627372752, + "loss": 1.5219, + "step": 6970 + }, + { + "epoch": 0.735337552742616, + "grad_norm": 0.6351622939109802, + "learning_rate": 0.0002493348053030113, + "loss": 1.5068, + "step": 6971 + }, + { + "epoch": 0.7354430379746836, + "grad_norm": 0.6608371734619141, + "learning_rate": 0.0002491479039707791, + "loss": 1.5217, + "step": 6972 + }, + { + "epoch": 0.735548523206751, + "grad_norm": 0.549889326095581, + "learning_rate": 0.00024896105876152165, + "loss": 1.5165, + "step": 6973 + }, + { + "epoch": 0.7356540084388186, + "grad_norm": 0.6038939952850342, + "learning_rate": 0.0002487742696961761, + "loss": 1.5181, + "step": 6974 + }, + { + "epoch": 0.7357594936708861, + "grad_norm": 0.7175413966178894, + "learning_rate": 0.0002485875367956729, + "loss": 1.5579, + "step": 6975 + }, + { + "epoch": 0.7358649789029535, + "grad_norm": 0.5905881524085999, + "learning_rate": 0.00024840086008093645, + "loss": 1.4986, + "step": 6976 + }, + { + "epoch": 0.7359704641350211, + "grad_norm": 0.5408501029014587, + "learning_rate": 0.0002482142395728848, + "loss": 1.5521, + "step": 6977 + }, + { + "epoch": 0.7360759493670886, + "grad_norm": 0.647290050983429, + "learning_rate": 0.0002480276752924295, + "loss": 1.5282, + "step": 6978 + }, + { + "epoch": 0.7361814345991561, + "grad_norm": 0.6255329251289368, + "learning_rate": 0.0002478411672604766, + "loss": 1.5338, + "step": 6979 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.6154159307479858, + "learning_rate": 0.0002476547154979248, + "loss": 1.5135, + "step": 6980 + }, + { + "epoch": 0.7363924050632912, + "grad_norm": 0.6124496459960938, + "learning_rate": 0.00024746832002566703, + "loss": 1.5365, + "step": 6981 + }, + { + "epoch": 0.7364978902953586, + "grad_norm": 0.6704180836677551, + "learning_rate": 0.0002472819808645899, + "loss": 1.5129, + "step": 6982 + }, + { + "epoch": 0.7366033755274262, + "grad_norm": 0.6010652184486389, + "learning_rate": 0.0002470956980355735, + "loss": 1.5003, + "step": 6983 + }, + { + "epoch": 0.7367088607594937, + "grad_norm": 0.7517694234848022, + "learning_rate": 0.00024690947155949194, + "loss": 1.5471, + "step": 6984 + }, + { + "epoch": 0.7368143459915611, + "grad_norm": 0.6450297832489014, + "learning_rate": 0.0002467233014572127, + "loss": 1.5497, + "step": 6985 + }, + { + "epoch": 0.7369198312236287, + "grad_norm": 0.6426171064376831, + "learning_rate": 0.00024653718774959713, + "loss": 1.499, + "step": 6986 + }, + { + "epoch": 0.7370253164556962, + "grad_norm": 0.6288716197013855, + "learning_rate": 0.00024635113045749985, + "loss": 1.4943, + "step": 6987 + }, + { + "epoch": 0.7371308016877637, + "grad_norm": 0.6351954936981201, + "learning_rate": 0.00024616512960177014, + "loss": 1.552, + "step": 6988 + }, + { + "epoch": 0.7372362869198312, + "grad_norm": 0.6167472004890442, + "learning_rate": 0.00024597918520324994, + "loss": 1.567, + "step": 6989 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.6480320692062378, + "learning_rate": 0.00024579329728277534, + "loss": 1.5012, + "step": 6990 + }, + { + "epoch": 0.7374472573839662, + "grad_norm": 0.6105344891548157, + "learning_rate": 0.00024560746586117603, + "loss": 1.5574, + "step": 6991 + }, + { + "epoch": 0.7375527426160338, + "grad_norm": 0.5908449292182922, + "learning_rate": 0.00024542169095927526, + "loss": 1.5688, + "step": 6992 + }, + { + "epoch": 0.7376582278481013, + "grad_norm": 0.6119973063468933, + "learning_rate": 0.00024523597259789004, + "loss": 1.5161, + "step": 6993 + }, + { + "epoch": 0.7377637130801687, + "grad_norm": 0.65181565284729, + "learning_rate": 0.0002450503107978311, + "loss": 1.5303, + "step": 6994 + }, + { + "epoch": 0.7378691983122363, + "grad_norm": 0.6930673718452454, + "learning_rate": 0.00024486470557990247, + "loss": 1.5336, + "step": 6995 + }, + { + "epoch": 0.7379746835443038, + "grad_norm": 0.6737003326416016, + "learning_rate": 0.0002446791569649027, + "loss": 1.5296, + "step": 6996 + }, + { + "epoch": 0.7380801687763713, + "grad_norm": 0.7127618193626404, + "learning_rate": 0.0002444936649736232, + "loss": 1.5153, + "step": 6997 + }, + { + "epoch": 0.7381856540084388, + "grad_norm": 0.6094720959663391, + "learning_rate": 0.00024430822962684905, + "loss": 1.5059, + "step": 6998 + }, + { + "epoch": 0.7382911392405064, + "grad_norm": 0.6884212493896484, + "learning_rate": 0.00024412285094535952, + "loss": 1.5749, + "step": 6999 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.6514138579368591, + "learning_rate": 0.00024393752894992708, + "loss": 1.5259, + "step": 7000 + }, + { + "epoch": 0.7385021097046414, + "grad_norm": 0.5953992605209351, + "learning_rate": 0.00024375226366131787, + "loss": 1.5043, + "step": 7001 + }, + { + "epoch": 0.7386075949367089, + "grad_norm": 0.5952104330062866, + "learning_rate": 0.00024356705510029196, + "loss": 1.5351, + "step": 7002 + }, + { + "epoch": 0.7387130801687763, + "grad_norm": 0.6121811270713806, + "learning_rate": 0.00024338190328760282, + "loss": 1.5225, + "step": 7003 + }, + { + "epoch": 0.7388185654008439, + "grad_norm": 0.6270585656166077, + "learning_rate": 0.00024319680824399736, + "loss": 1.4969, + "step": 7004 + }, + { + "epoch": 0.7389240506329114, + "grad_norm": 0.6323848962783813, + "learning_rate": 0.00024301176999021702, + "loss": 1.4881, + "step": 7005 + }, + { + "epoch": 0.7390295358649789, + "grad_norm": 0.7653446793556213, + "learning_rate": 0.00024282678854699592, + "loss": 1.5018, + "step": 7006 + }, + { + "epoch": 0.7391350210970464, + "grad_norm": 0.6258358359336853, + "learning_rate": 0.00024264186393506206, + "loss": 1.5183, + "step": 7007 + }, + { + "epoch": 0.739240506329114, + "grad_norm": 0.6250653266906738, + "learning_rate": 0.00024245699617513733, + "loss": 1.49, + "step": 7008 + }, + { + "epoch": 0.7393459915611814, + "grad_norm": 0.6518223881721497, + "learning_rate": 0.00024227218528793696, + "loss": 1.5224, + "step": 7009 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.5785535573959351, + "learning_rate": 0.00024208743129417004, + "loss": 1.509, + "step": 7010 + }, + { + "epoch": 0.7395569620253165, + "grad_norm": 0.7584890723228455, + "learning_rate": 0.00024190273421453913, + "loss": 1.5383, + "step": 7011 + }, + { + "epoch": 0.739662447257384, + "grad_norm": 0.5970528721809387, + "learning_rate": 0.00024171809406974047, + "loss": 1.5124, + "step": 7012 + }, + { + "epoch": 0.7397679324894515, + "grad_norm": 0.5452079772949219, + "learning_rate": 0.0002415335108804636, + "loss": 1.536, + "step": 7013 + }, + { + "epoch": 0.7398734177215189, + "grad_norm": 0.7518081068992615, + "learning_rate": 0.0002413489846673925, + "loss": 1.5118, + "step": 7014 + }, + { + "epoch": 0.7399789029535865, + "grad_norm": 0.6744739413261414, + "learning_rate": 0.0002411645154512041, + "loss": 1.5369, + "step": 7015 + }, + { + "epoch": 0.740084388185654, + "grad_norm": 0.655301570892334, + "learning_rate": 0.00024098010325256897, + "loss": 1.5437, + "step": 7016 + }, + { + "epoch": 0.7401898734177215, + "grad_norm": 0.7066177129745483, + "learning_rate": 0.00024079574809215149, + "loss": 1.5187, + "step": 7017 + }, + { + "epoch": 0.740295358649789, + "grad_norm": 0.6950086355209351, + "learning_rate": 0.00024061144999060956, + "loss": 1.539, + "step": 7018 + }, + { + "epoch": 0.7404008438818566, + "grad_norm": 0.6400241255760193, + "learning_rate": 0.00024042720896859471, + "loss": 1.4825, + "step": 7019 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.8395974636077881, + "learning_rate": 0.00024024302504675206, + "loss": 1.5331, + "step": 7020 + }, + { + "epoch": 0.7406118143459915, + "grad_norm": 0.6772676706314087, + "learning_rate": 0.00024005889824572004, + "loss": 1.5302, + "step": 7021 + }, + { + "epoch": 0.7407172995780591, + "grad_norm": 0.6479936242103577, + "learning_rate": 0.00023987482858613154, + "loss": 1.5298, + "step": 7022 + }, + { + "epoch": 0.7408227848101265, + "grad_norm": 0.7529109716415405, + "learning_rate": 0.0002396908160886123, + "loss": 1.4825, + "step": 7023 + }, + { + "epoch": 0.7409282700421941, + "grad_norm": 0.6447405219078064, + "learning_rate": 0.0002395068607737816, + "loss": 1.4965, + "step": 7024 + }, + { + "epoch": 0.7410337552742616, + "grad_norm": 0.5866894721984863, + "learning_rate": 0.0002393229626622528, + "loss": 1.5083, + "step": 7025 + }, + { + "epoch": 0.7411392405063291, + "grad_norm": 0.6915994882583618, + "learning_rate": 0.00023913912177463248, + "loss": 1.5252, + "step": 7026 + }, + { + "epoch": 0.7412447257383966, + "grad_norm": 0.6814780235290527, + "learning_rate": 0.0002389553381315209, + "loss": 1.5746, + "step": 7027 + }, + { + "epoch": 0.7413502109704642, + "grad_norm": 0.6062209606170654, + "learning_rate": 0.00023877161175351206, + "loss": 1.5518, + "step": 7028 + }, + { + "epoch": 0.7414556962025316, + "grad_norm": 0.5870444178581238, + "learning_rate": 0.00023858794266119323, + "loss": 1.5511, + "step": 7029 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.6803735494613647, + "learning_rate": 0.0002384043308751454, + "loss": 1.5072, + "step": 7030 + }, + { + "epoch": 0.7416666666666667, + "grad_norm": 0.6349050402641296, + "learning_rate": 0.0002382207764159436, + "loss": 1.5214, + "step": 7031 + }, + { + "epoch": 0.7417721518987341, + "grad_norm": 0.6524086594581604, + "learning_rate": 0.00023803727930415568, + "loss": 1.5525, + "step": 7032 + }, + { + "epoch": 0.7418776371308017, + "grad_norm": 0.6551660895347595, + "learning_rate": 0.00023785383956034353, + "loss": 1.5577, + "step": 7033 + }, + { + "epoch": 0.7419831223628692, + "grad_norm": 0.6406400203704834, + "learning_rate": 0.00023767045720506243, + "loss": 1.5228, + "step": 7034 + }, + { + "epoch": 0.7420886075949367, + "grad_norm": 0.6280101537704468, + "learning_rate": 0.00023748713225886137, + "loss": 1.5425, + "step": 7035 + }, + { + "epoch": 0.7421940928270042, + "grad_norm": 0.7366310358047485, + "learning_rate": 0.0002373038647422827, + "loss": 1.5179, + "step": 7036 + }, + { + "epoch": 0.7422995780590718, + "grad_norm": 0.6754043698310852, + "learning_rate": 0.00023712065467586252, + "loss": 1.5229, + "step": 7037 + }, + { + "epoch": 0.7424050632911392, + "grad_norm": 0.6065295934677124, + "learning_rate": 0.00023693750208013045, + "loss": 1.5341, + "step": 7038 + }, + { + "epoch": 0.7425105485232067, + "grad_norm": 0.7347819805145264, + "learning_rate": 0.00023675440697560943, + "loss": 1.5612, + "step": 7039 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.734210193157196, + "learning_rate": 0.00023657136938281653, + "loss": 1.5033, + "step": 7040 + }, + { + "epoch": 0.7427215189873417, + "grad_norm": 0.7187139987945557, + "learning_rate": 0.00023638838932226196, + "loss": 1.4875, + "step": 7041 + }, + { + "epoch": 0.7428270042194093, + "grad_norm": 0.7686939835548401, + "learning_rate": 0.00023620546681444942, + "loss": 1.5794, + "step": 7042 + }, + { + "epoch": 0.7429324894514768, + "grad_norm": 0.609597384929657, + "learning_rate": 0.00023602260187987635, + "loss": 1.5451, + "step": 7043 + }, + { + "epoch": 0.7430379746835443, + "grad_norm": 0.6789280772209167, + "learning_rate": 0.0002358397945390336, + "loss": 1.5017, + "step": 7044 + }, + { + "epoch": 0.7431434599156118, + "grad_norm": 0.6654317378997803, + "learning_rate": 0.0002356570448124058, + "loss": 1.5275, + "step": 7045 + }, + { + "epoch": 0.7432489451476794, + "grad_norm": 0.5999841690063477, + "learning_rate": 0.00023547435272047083, + "loss": 1.5292, + "step": 7046 + }, + { + "epoch": 0.7433544303797468, + "grad_norm": 0.5808872580528259, + "learning_rate": 0.00023529171828370033, + "loss": 1.5014, + "step": 7047 + }, + { + "epoch": 0.7434599156118143, + "grad_norm": 0.608669102191925, + "learning_rate": 0.0002351091415225591, + "loss": 1.5203, + "step": 7048 + }, + { + "epoch": 0.7435654008438819, + "grad_norm": 0.6380796432495117, + "learning_rate": 0.0002349266224575063, + "loss": 1.5185, + "step": 7049 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.6116066575050354, + "learning_rate": 0.00023474416110899377, + "loss": 1.4837, + "step": 7050 + }, + { + "epoch": 0.7437763713080169, + "grad_norm": 0.7163864970207214, + "learning_rate": 0.00023456175749746736, + "loss": 1.5223, + "step": 7051 + }, + { + "epoch": 0.7438818565400844, + "grad_norm": 0.5556846261024475, + "learning_rate": 0.0002343794116433662, + "loss": 1.4952, + "step": 7052 + }, + { + "epoch": 0.7439873417721519, + "grad_norm": 0.6388125419616699, + "learning_rate": 0.00023419712356712307, + "loss": 1.4932, + "step": 7053 + }, + { + "epoch": 0.7440928270042194, + "grad_norm": 0.5571557879447937, + "learning_rate": 0.00023401489328916432, + "loss": 1.5193, + "step": 7054 + }, + { + "epoch": 0.744198312236287, + "grad_norm": 0.5929639935493469, + "learning_rate": 0.00023383272082990963, + "loss": 1.5563, + "step": 7055 + }, + { + "epoch": 0.7443037974683544, + "grad_norm": 0.6164854168891907, + "learning_rate": 0.00023365060620977223, + "loss": 1.5581, + "step": 7056 + }, + { + "epoch": 0.744409282700422, + "grad_norm": 0.5993822813034058, + "learning_rate": 0.00023346854944915937, + "loss": 1.5179, + "step": 7057 + }, + { + "epoch": 0.7445147679324895, + "grad_norm": 0.5715587735176086, + "learning_rate": 0.00023328655056847124, + "loss": 1.5498, + "step": 7058 + }, + { + "epoch": 0.7446202531645569, + "grad_norm": 0.5978904366493225, + "learning_rate": 0.0002331046095881017, + "loss": 1.5194, + "step": 7059 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.6019467711448669, + "learning_rate": 0.00023292272652843807, + "loss": 1.5376, + "step": 7060 + }, + { + "epoch": 0.744831223628692, + "grad_norm": 0.6395410299301147, + "learning_rate": 0.00023274090140986138, + "loss": 1.588, + "step": 7061 + }, + { + "epoch": 0.7449367088607595, + "grad_norm": 0.6132714748382568, + "learning_rate": 0.00023255913425274588, + "loss": 1.5094, + "step": 7062 + }, + { + "epoch": 0.745042194092827, + "grad_norm": 0.5757219195365906, + "learning_rate": 0.00023237742507745964, + "loss": 1.5057, + "step": 7063 + }, + { + "epoch": 0.7451476793248946, + "grad_norm": 0.6512095928192139, + "learning_rate": 0.00023219577390436397, + "loss": 1.5355, + "step": 7064 + }, + { + "epoch": 0.745253164556962, + "grad_norm": 0.6079359650611877, + "learning_rate": 0.00023201418075381364, + "loss": 1.5195, + "step": 7065 + }, + { + "epoch": 0.7453586497890295, + "grad_norm": 0.6363177299499512, + "learning_rate": 0.00023183264564615756, + "loss": 1.5239, + "step": 7066 + }, + { + "epoch": 0.7454641350210971, + "grad_norm": 0.855065107345581, + "learning_rate": 0.00023165116860173726, + "loss": 1.5292, + "step": 7067 + }, + { + "epoch": 0.7455696202531645, + "grad_norm": 0.632849395275116, + "learning_rate": 0.00023146974964088825, + "loss": 1.5476, + "step": 7068 + }, + { + "epoch": 0.7456751054852321, + "grad_norm": 0.5948481559753418, + "learning_rate": 0.00023128838878393946, + "loss": 1.5518, + "step": 7069 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.7374147176742554, + "learning_rate": 0.00023110708605121317, + "loss": 1.5239, + "step": 7070 + }, + { + "epoch": 0.7458860759493671, + "grad_norm": 0.6470229029655457, + "learning_rate": 0.00023092584146302539, + "loss": 1.5604, + "step": 7071 + }, + { + "epoch": 0.7459915611814346, + "grad_norm": 0.6118130683898926, + "learning_rate": 0.0002307446550396854, + "loss": 1.5461, + "step": 7072 + }, + { + "epoch": 0.7460970464135022, + "grad_norm": 0.7152345180511475, + "learning_rate": 0.0002305635268014961, + "loss": 1.525, + "step": 7073 + }, + { + "epoch": 0.7462025316455696, + "grad_norm": 0.620642900466919, + "learning_rate": 0.0002303824567687534, + "loss": 1.5162, + "step": 7074 + }, + { + "epoch": 0.7463080168776371, + "grad_norm": 0.6066505312919617, + "learning_rate": 0.00023020144496174781, + "loss": 1.4978, + "step": 7075 + }, + { + "epoch": 0.7464135021097047, + "grad_norm": 0.6830134987831116, + "learning_rate": 0.0002300204914007622, + "loss": 1.5103, + "step": 7076 + }, + { + "epoch": 0.7465189873417721, + "grad_norm": 0.7992804646492004, + "learning_rate": 0.00022983959610607338, + "loss": 1.5014, + "step": 7077 + }, + { + "epoch": 0.7466244725738397, + "grad_norm": 0.5758792161941528, + "learning_rate": 0.00022965875909795164, + "loss": 1.547, + "step": 7078 + }, + { + "epoch": 0.7467299578059071, + "grad_norm": 0.6036418080329895, + "learning_rate": 0.00022947798039666051, + "loss": 1.5462, + "step": 7079 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.7682303190231323, + "learning_rate": 0.00022929726002245728, + "loss": 1.5482, + "step": 7080 + }, + { + "epoch": 0.7469409282700422, + "grad_norm": 0.5841034054756165, + "learning_rate": 0.00022911659799559254, + "loss": 1.5061, + "step": 7081 + }, + { + "epoch": 0.7470464135021097, + "grad_norm": 0.6801189184188843, + "learning_rate": 0.00022893599433631014, + "loss": 1.5483, + "step": 7082 + }, + { + "epoch": 0.7471518987341772, + "grad_norm": 0.6867237687110901, + "learning_rate": 0.00022875544906484797, + "loss": 1.5458, + "step": 7083 + }, + { + "epoch": 0.7472573839662447, + "grad_norm": 0.6647112965583801, + "learning_rate": 0.00022857496220143696, + "loss": 1.5135, + "step": 7084 + }, + { + "epoch": 0.7473628691983122, + "grad_norm": 0.6080396175384521, + "learning_rate": 0.00022839453376630149, + "loss": 1.5275, + "step": 7085 + }, + { + "epoch": 0.7474683544303797, + "grad_norm": 0.6074345707893372, + "learning_rate": 0.00022821416377965948, + "loss": 1.5066, + "step": 7086 + }, + { + "epoch": 0.7475738396624473, + "grad_norm": 0.6404573321342468, + "learning_rate": 0.00022803385226172226, + "loss": 1.5857, + "step": 7087 + }, + { + "epoch": 0.7476793248945147, + "grad_norm": 0.6099743247032166, + "learning_rate": 0.0002278535992326947, + "loss": 1.5351, + "step": 7088 + }, + { + "epoch": 0.7477848101265823, + "grad_norm": 0.70615154504776, + "learning_rate": 0.00022767340471277492, + "loss": 1.5503, + "step": 7089 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.7822904586791992, + "learning_rate": 0.00022749326872215472, + "loss": 1.466, + "step": 7090 + }, + { + "epoch": 0.7479957805907173, + "grad_norm": 0.583767294883728, + "learning_rate": 0.00022731319128101906, + "loss": 1.5003, + "step": 7091 + }, + { + "epoch": 0.7481012658227848, + "grad_norm": 0.6193767786026001, + "learning_rate": 0.0002271331724095468, + "loss": 1.4986, + "step": 7092 + }, + { + "epoch": 0.7482067510548523, + "grad_norm": 0.7352082133293152, + "learning_rate": 0.0002269532121279099, + "loss": 1.5288, + "step": 7093 + }, + { + "epoch": 0.7483122362869198, + "grad_norm": 0.6796940565109253, + "learning_rate": 0.00022677331045627366, + "loss": 1.5412, + "step": 7094 + }, + { + "epoch": 0.7484177215189873, + "grad_norm": 0.6520798802375793, + "learning_rate": 0.00022659346741479708, + "loss": 1.5409, + "step": 7095 + }, + { + "epoch": 0.7485232067510549, + "grad_norm": 0.7077030539512634, + "learning_rate": 0.00022641368302363235, + "loss": 1.5441, + "step": 7096 + }, + { + "epoch": 0.7486286919831223, + "grad_norm": 0.6059404611587524, + "learning_rate": 0.00022623395730292538, + "loss": 1.5218, + "step": 7097 + }, + { + "epoch": 0.7487341772151899, + "grad_norm": 0.5910434722900391, + "learning_rate": 0.0002260542902728151, + "loss": 1.5497, + "step": 7098 + }, + { + "epoch": 0.7488396624472574, + "grad_norm": 0.6442719101905823, + "learning_rate": 0.00022587468195343436, + "loss": 1.5157, + "step": 7099 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.617872416973114, + "learning_rate": 0.0002256951323649087, + "loss": 1.4965, + "step": 7100 + }, + { + "epoch": 0.7490506329113924, + "grad_norm": 0.607085645198822, + "learning_rate": 0.00022551564152735814, + "loss": 1.5654, + "step": 7101 + }, + { + "epoch": 0.74915611814346, + "grad_norm": 0.6699708104133606, + "learning_rate": 0.00022533620946089524, + "loss": 1.5616, + "step": 7102 + }, + { + "epoch": 0.7492616033755274, + "grad_norm": 0.6073563098907471, + "learning_rate": 0.00022515683618562626, + "loss": 1.527, + "step": 7103 + }, + { + "epoch": 0.7493670886075949, + "grad_norm": 0.5717176198959351, + "learning_rate": 0.00022497752172165095, + "loss": 1.487, + "step": 7104 + }, + { + "epoch": 0.7494725738396625, + "grad_norm": 0.5596964359283447, + "learning_rate": 0.0002247982660890623, + "loss": 1.5481, + "step": 7105 + }, + { + "epoch": 0.7495780590717299, + "grad_norm": 0.6234091520309448, + "learning_rate": 0.00022461906930794687, + "loss": 1.5544, + "step": 7106 + }, + { + "epoch": 0.7496835443037975, + "grad_norm": 0.7190237641334534, + "learning_rate": 0.00022443993139838447, + "loss": 1.5339, + "step": 7107 + }, + { + "epoch": 0.749789029535865, + "grad_norm": 0.6530364155769348, + "learning_rate": 0.00022426085238044823, + "loss": 1.514, + "step": 7108 + }, + { + "epoch": 0.7498945147679325, + "grad_norm": 0.582472562789917, + "learning_rate": 0.00022408183227420528, + "loss": 1.5115, + "step": 7109 + }, + { + "epoch": 0.75, + "grad_norm": 0.7288585305213928, + "learning_rate": 0.00022390287109971547, + "loss": 1.5412, + "step": 7110 + }, + { + "epoch": 0.7501054852320675, + "grad_norm": 0.6702937483787537, + "learning_rate": 0.00022372396887703234, + "loss": 1.5819, + "step": 7111 + }, + { + "epoch": 0.750210970464135, + "grad_norm": 0.5768805146217346, + "learning_rate": 0.00022354512562620268, + "loss": 1.532, + "step": 7112 + }, + { + "epoch": 0.7503164556962025, + "grad_norm": 0.6640151739120483, + "learning_rate": 0.0002233663413672669, + "loss": 1.5899, + "step": 7113 + }, + { + "epoch": 0.7504219409282701, + "grad_norm": 0.7729074954986572, + "learning_rate": 0.00022318761612025856, + "loss": 1.5047, + "step": 7114 + }, + { + "epoch": 0.7505274261603375, + "grad_norm": 0.6238647699356079, + "learning_rate": 0.00022300894990520478, + "loss": 1.5201, + "step": 7115 + }, + { + "epoch": 0.7506329113924051, + "grad_norm": 0.7442247271537781, + "learning_rate": 0.000222830342742126, + "loss": 1.4976, + "step": 7116 + }, + { + "epoch": 0.7507383966244726, + "grad_norm": 0.73341304063797, + "learning_rate": 0.00022265179465103574, + "loss": 1.5305, + "step": 7117 + }, + { + "epoch": 0.75084388185654, + "grad_norm": 0.7149502038955688, + "learning_rate": 0.00022247330565194171, + "loss": 1.4994, + "step": 7118 + }, + { + "epoch": 0.7509493670886076, + "grad_norm": 0.8206255435943604, + "learning_rate": 0.0002222948757648443, + "loss": 1.4841, + "step": 7119 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.6934958696365356, + "learning_rate": 0.00022211650500973746, + "loss": 1.5628, + "step": 7120 + }, + { + "epoch": 0.7511603375527426, + "grad_norm": 0.7133862972259521, + "learning_rate": 0.0002219381934066084, + "loss": 1.5817, + "step": 7121 + }, + { + "epoch": 0.7512658227848101, + "grad_norm": 0.736642062664032, + "learning_rate": 0.00022175994097543806, + "loss": 1.5393, + "step": 7122 + }, + { + "epoch": 0.7513713080168777, + "grad_norm": 0.6348379254341125, + "learning_rate": 0.0002215817477362003, + "loss": 1.5264, + "step": 7123 + }, + { + "epoch": 0.7514767932489451, + "grad_norm": 0.7244420647621155, + "learning_rate": 0.00022140361370886265, + "loss": 1.5465, + "step": 7124 + }, + { + "epoch": 0.7515822784810127, + "grad_norm": 0.7472776770591736, + "learning_rate": 0.00022122553891338586, + "loss": 1.5502, + "step": 7125 + }, + { + "epoch": 0.7516877637130802, + "grad_norm": 0.7949463725090027, + "learning_rate": 0.00022104752336972396, + "loss": 1.548, + "step": 7126 + }, + { + "epoch": 0.7517932489451477, + "grad_norm": 0.6270722150802612, + "learning_rate": 0.00022086956709782495, + "loss": 1.5398, + "step": 7127 + }, + { + "epoch": 0.7518987341772152, + "grad_norm": 0.6548155546188354, + "learning_rate": 0.0002206916701176293, + "loss": 1.5317, + "step": 7128 + }, + { + "epoch": 0.7520042194092827, + "grad_norm": 0.6201095581054688, + "learning_rate": 0.00022051383244907143, + "loss": 1.4785, + "step": 7129 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.6224431395530701, + "learning_rate": 0.0002203360541120789, + "loss": 1.534, + "step": 7130 + }, + { + "epoch": 0.7522151898734177, + "grad_norm": 0.6040566563606262, + "learning_rate": 0.00022015833512657268, + "loss": 1.5281, + "step": 7131 + }, + { + "epoch": 0.7523206751054853, + "grad_norm": 0.6784681081771851, + "learning_rate": 0.000219980675512467, + "loss": 1.5127, + "step": 7132 + }, + { + "epoch": 0.7524261603375527, + "grad_norm": 0.8093906044960022, + "learning_rate": 0.00021980307528966962, + "loss": 1.5031, + "step": 7133 + }, + { + "epoch": 0.7525316455696203, + "grad_norm": 0.5928219556808472, + "learning_rate": 0.00021962553447808108, + "loss": 1.5054, + "step": 7134 + }, + { + "epoch": 0.7526371308016878, + "grad_norm": 0.8249396681785583, + "learning_rate": 0.00021944805309759643, + "loss": 1.4874, + "step": 7135 + }, + { + "epoch": 0.7527426160337553, + "grad_norm": 0.6579453349113464, + "learning_rate": 0.000219270631168103, + "loss": 1.5435, + "step": 7136 + }, + { + "epoch": 0.7528481012658228, + "grad_norm": 0.7085180282592773, + "learning_rate": 0.0002190932687094818, + "loss": 1.5518, + "step": 7137 + }, + { + "epoch": 0.7529535864978903, + "grad_norm": 0.5988505482673645, + "learning_rate": 0.00021891596574160715, + "loss": 1.5234, + "step": 7138 + }, + { + "epoch": 0.7530590717299578, + "grad_norm": 0.5711328387260437, + "learning_rate": 0.0002187387222843467, + "loss": 1.5326, + "step": 7139 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.5733660459518433, + "learning_rate": 0.00021856153835756164, + "loss": 1.5273, + "step": 7140 + }, + { + "epoch": 0.7532700421940929, + "grad_norm": 0.7063443660736084, + "learning_rate": 0.00021838441398110617, + "loss": 1.5264, + "step": 7141 + }, + { + "epoch": 0.7533755274261603, + "grad_norm": 0.5707864165306091, + "learning_rate": 0.000218207349174828, + "loss": 1.5151, + "step": 7142 + }, + { + "epoch": 0.7534810126582279, + "grad_norm": 0.5720102190971375, + "learning_rate": 0.0002180303439585678, + "loss": 1.5043, + "step": 7143 + }, + { + "epoch": 0.7535864978902953, + "grad_norm": 0.687835156917572, + "learning_rate": 0.0002178533983521605, + "loss": 1.5313, + "step": 7144 + }, + { + "epoch": 0.7536919831223629, + "grad_norm": 0.5988674163818359, + "learning_rate": 0.0002176765123754334, + "loss": 1.509, + "step": 7145 + }, + { + "epoch": 0.7537974683544304, + "grad_norm": 0.6015203595161438, + "learning_rate": 0.00021749968604820754, + "loss": 1.5337, + "step": 7146 + }, + { + "epoch": 0.7539029535864978, + "grad_norm": 0.7224370241165161, + "learning_rate": 0.00021732291939029712, + "loss": 1.528, + "step": 7147 + }, + { + "epoch": 0.7540084388185654, + "grad_norm": 0.801561176776886, + "learning_rate": 0.00021714621242150973, + "loss": 1.4905, + "step": 7148 + }, + { + "epoch": 0.7541139240506329, + "grad_norm": 0.5653326511383057, + "learning_rate": 0.0002169695651616463, + "loss": 1.4711, + "step": 7149 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.8081293106079102, + "learning_rate": 0.00021679297763050104, + "loss": 1.5365, + "step": 7150 + }, + { + "epoch": 0.7543248945147679, + "grad_norm": 0.8689607977867126, + "learning_rate": 0.00021661644984786142, + "loss": 1.514, + "step": 7151 + }, + { + "epoch": 0.7544303797468355, + "grad_norm": 0.6046136021614075, + "learning_rate": 0.00021643998183350802, + "loss": 1.5221, + "step": 7152 + }, + { + "epoch": 0.7545358649789029, + "grad_norm": 0.7218544483184814, + "learning_rate": 0.00021626357360721556, + "loss": 1.5031, + "step": 7153 + }, + { + "epoch": 0.7546413502109705, + "grad_norm": 0.7186922430992126, + "learning_rate": 0.0002160872251887511, + "loss": 1.4939, + "step": 7154 + }, + { + "epoch": 0.754746835443038, + "grad_norm": 0.5696269869804382, + "learning_rate": 0.00021591093659787528, + "loss": 1.568, + "step": 7155 + }, + { + "epoch": 0.7548523206751054, + "grad_norm": 0.7494037747383118, + "learning_rate": 0.00021573470785434237, + "loss": 1.5551, + "step": 7156 + }, + { + "epoch": 0.754957805907173, + "grad_norm": 0.6227465867996216, + "learning_rate": 0.00021555853897789942, + "loss": 1.4843, + "step": 7157 + }, + { + "epoch": 0.7550632911392405, + "grad_norm": 0.6400367021560669, + "learning_rate": 0.0002153824299882872, + "loss": 1.5195, + "step": 7158 + }, + { + "epoch": 0.755168776371308, + "grad_norm": 0.8783590197563171, + "learning_rate": 0.00021520638090523955, + "loss": 1.5252, + "step": 7159 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.6665789484977722, + "learning_rate": 0.0002150303917484834, + "loss": 1.5775, + "step": 7160 + }, + { + "epoch": 0.7553797468354431, + "grad_norm": 0.7846203446388245, + "learning_rate": 0.00021485446253773966, + "loss": 1.5381, + "step": 7161 + }, + { + "epoch": 0.7554852320675105, + "grad_norm": 0.8370165228843689, + "learning_rate": 0.00021467859329272188, + "loss": 1.5257, + "step": 7162 + }, + { + "epoch": 0.755590717299578, + "grad_norm": 0.6281906366348267, + "learning_rate": 0.00021450278403313707, + "loss": 1.5131, + "step": 7163 + }, + { + "epoch": 0.7556962025316456, + "grad_norm": 0.9042286276817322, + "learning_rate": 0.0002143270347786856, + "loss": 1.5378, + "step": 7164 + }, + { + "epoch": 0.755801687763713, + "grad_norm": 0.7090526223182678, + "learning_rate": 0.0002141513455490609, + "loss": 1.5444, + "step": 7165 + }, + { + "epoch": 0.7559071729957806, + "grad_norm": 0.7118324041366577, + "learning_rate": 0.00021397571636394991, + "loss": 1.4977, + "step": 7166 + }, + { + "epoch": 0.7560126582278481, + "grad_norm": 0.8752036690711975, + "learning_rate": 0.00021380014724303286, + "loss": 1.521, + "step": 7167 + }, + { + "epoch": 0.7561181434599156, + "grad_norm": 0.6822726726531982, + "learning_rate": 0.00021362463820598297, + "loss": 1.5327, + "step": 7168 + }, + { + "epoch": 0.7562236286919831, + "grad_norm": 0.7514375448226929, + "learning_rate": 0.00021344918927246678, + "loss": 1.5554, + "step": 7169 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.7095540761947632, + "learning_rate": 0.0002132738004621446, + "loss": 1.5653, + "step": 7170 + }, + { + "epoch": 0.7564345991561181, + "grad_norm": 0.622344970703125, + "learning_rate": 0.0002130984717946695, + "loss": 1.5247, + "step": 7171 + }, + { + "epoch": 0.7565400843881857, + "grad_norm": 0.7689622640609741, + "learning_rate": 0.00021292320328968783, + "loss": 1.5268, + "step": 7172 + }, + { + "epoch": 0.7566455696202532, + "grad_norm": 0.7753618955612183, + "learning_rate": 0.0002127479949668393, + "loss": 1.5286, + "step": 7173 + }, + { + "epoch": 0.7567510548523206, + "grad_norm": 0.5522468686103821, + "learning_rate": 0.000212572846845757, + "loss": 1.4914, + "step": 7174 + }, + { + "epoch": 0.7568565400843882, + "grad_norm": 0.7661203742027283, + "learning_rate": 0.000212397758946067, + "loss": 1.5463, + "step": 7175 + }, + { + "epoch": 0.7569620253164557, + "grad_norm": 0.6790978312492371, + "learning_rate": 0.0002122227312873889, + "loss": 1.5009, + "step": 7176 + }, + { + "epoch": 0.7570675105485232, + "grad_norm": 0.5455016493797302, + "learning_rate": 0.00021204776388933534, + "loss": 1.5223, + "step": 7177 + }, + { + "epoch": 0.7571729957805907, + "grad_norm": 0.7594599723815918, + "learning_rate": 0.00021187285677151205, + "loss": 1.472, + "step": 7178 + }, + { + "epoch": 0.7572784810126583, + "grad_norm": 0.7124378085136414, + "learning_rate": 0.00021169800995351874, + "loss": 1.4959, + "step": 7179 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.621508002281189, + "learning_rate": 0.00021152322345494763, + "loss": 1.5158, + "step": 7180 + }, + { + "epoch": 0.7574894514767933, + "grad_norm": 0.7476723790168762, + "learning_rate": 0.00021134849729538438, + "loss": 1.5071, + "step": 7181 + }, + { + "epoch": 0.7575949367088608, + "grad_norm": 0.7179259061813354, + "learning_rate": 0.00021117383149440801, + "loss": 1.5258, + "step": 7182 + }, + { + "epoch": 0.7577004219409282, + "grad_norm": 0.6332883834838867, + "learning_rate": 0.00021099922607159064, + "loss": 1.4856, + "step": 7183 + }, + { + "epoch": 0.7578059071729958, + "grad_norm": 0.8166847229003906, + "learning_rate": 0.00021082468104649773, + "loss": 1.5352, + "step": 7184 + }, + { + "epoch": 0.7579113924050633, + "grad_norm": 0.6408894658088684, + "learning_rate": 0.00021065019643868785, + "loss": 1.557, + "step": 7185 + }, + { + "epoch": 0.7580168776371308, + "grad_norm": 0.6593574285507202, + "learning_rate": 0.00021047577226771292, + "loss": 1.4816, + "step": 7186 + }, + { + "epoch": 0.7581223628691983, + "grad_norm": 0.7845453023910522, + "learning_rate": 0.00021030140855311772, + "loss": 1.544, + "step": 7187 + }, + { + "epoch": 0.7582278481012659, + "grad_norm": 0.583609938621521, + "learning_rate": 0.00021012710531444112, + "loss": 1.4912, + "step": 7188 + }, + { + "epoch": 0.7583333333333333, + "grad_norm": 0.6117342114448547, + "learning_rate": 0.00020995286257121453, + "loss": 1.511, + "step": 7189 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.7044767141342163, + "learning_rate": 0.00020977868034296253, + "loss": 1.4983, + "step": 7190 + }, + { + "epoch": 0.7585443037974684, + "grad_norm": 0.6029933094978333, + "learning_rate": 0.0002096045586492031, + "loss": 1.5138, + "step": 7191 + }, + { + "epoch": 0.7586497890295358, + "grad_norm": 0.5904281735420227, + "learning_rate": 0.00020943049750944768, + "loss": 1.5147, + "step": 7192 + }, + { + "epoch": 0.7587552742616034, + "grad_norm": 0.6348797678947449, + "learning_rate": 0.00020925649694320046, + "loss": 1.5276, + "step": 7193 + }, + { + "epoch": 0.7588607594936709, + "grad_norm": 0.8020390868186951, + "learning_rate": 0.0002090825569699591, + "loss": 1.4957, + "step": 7194 + }, + { + "epoch": 0.7589662447257384, + "grad_norm": 0.5833337903022766, + "learning_rate": 0.0002089086776092146, + "loss": 1.55, + "step": 7195 + }, + { + "epoch": 0.7590717299578059, + "grad_norm": 0.7577213048934937, + "learning_rate": 0.0002087348588804505, + "loss": 1.5664, + "step": 7196 + }, + { + "epoch": 0.7591772151898735, + "grad_norm": 0.5939814448356628, + "learning_rate": 0.0002085611008031449, + "loss": 1.4977, + "step": 7197 + }, + { + "epoch": 0.7592827004219409, + "grad_norm": 0.5887320637702942, + "learning_rate": 0.00020838740339676763, + "loss": 1.5347, + "step": 7198 + }, + { + "epoch": 0.7593881856540085, + "grad_norm": 0.6326873898506165, + "learning_rate": 0.00020821376668078264, + "loss": 1.5183, + "step": 7199 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.5574637055397034, + "learning_rate": 0.00020804019067464667, + "loss": 1.5118, + "step": 7200 + }, + { + "epoch": 0.7595991561181434, + "grad_norm": 0.5604099035263062, + "learning_rate": 0.00020786667539780977, + "loss": 1.4968, + "step": 7201 + }, + { + "epoch": 0.759704641350211, + "grad_norm": 0.6031555533409119, + "learning_rate": 0.00020769322086971524, + "loss": 1.5133, + "step": 7202 + }, + { + "epoch": 0.7598101265822785, + "grad_norm": 0.5723567008972168, + "learning_rate": 0.00020751982710979944, + "loss": 1.5341, + "step": 7203 + }, + { + "epoch": 0.759915611814346, + "grad_norm": 0.5817543268203735, + "learning_rate": 0.0002073464941374921, + "loss": 1.5282, + "step": 7204 + }, + { + "epoch": 0.7600210970464135, + "grad_norm": 0.550913393497467, + "learning_rate": 0.000207173221972216, + "loss": 1.5563, + "step": 7205 + }, + { + "epoch": 0.7601265822784811, + "grad_norm": 0.6673417091369629, + "learning_rate": 0.00020700001063338696, + "loss": 1.5454, + "step": 7206 + }, + { + "epoch": 0.7602320675105485, + "grad_norm": 0.740218997001648, + "learning_rate": 0.00020682686014041458, + "loss": 1.5112, + "step": 7207 + }, + { + "epoch": 0.760337552742616, + "grad_norm": 0.6303597688674927, + "learning_rate": 0.00020665377051270095, + "loss": 1.5398, + "step": 7208 + }, + { + "epoch": 0.7604430379746835, + "grad_norm": 0.624174177646637, + "learning_rate": 0.00020648074176964182, + "loss": 1.501, + "step": 7209 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.6564939618110657, + "learning_rate": 0.00020630777393062575, + "loss": 1.5294, + "step": 7210 + }, + { + "epoch": 0.7606540084388186, + "grad_norm": 0.5724647641181946, + "learning_rate": 0.00020613486701503473, + "loss": 1.551, + "step": 7211 + }, + { + "epoch": 0.760759493670886, + "grad_norm": 0.6425649523735046, + "learning_rate": 0.00020596202104224376, + "loss": 1.5246, + "step": 7212 + }, + { + "epoch": 0.7608649789029536, + "grad_norm": 0.6380171775817871, + "learning_rate": 0.0002057892360316212, + "loss": 1.5378, + "step": 7213 + }, + { + "epoch": 0.7609704641350211, + "grad_norm": 0.6648778915405273, + "learning_rate": 0.00020561651200252836, + "loss": 1.5472, + "step": 7214 + }, + { + "epoch": 0.7610759493670886, + "grad_norm": 0.6277673840522766, + "learning_rate": 0.00020544384897431997, + "loss": 1.5403, + "step": 7215 + }, + { + "epoch": 0.7611814345991561, + "grad_norm": 0.580820620059967, + "learning_rate": 0.00020527124696634343, + "loss": 1.5259, + "step": 7216 + }, + { + "epoch": 0.7612869198312237, + "grad_norm": 0.595691442489624, + "learning_rate": 0.00020509870599794022, + "loss": 1.5496, + "step": 7217 + }, + { + "epoch": 0.7613924050632911, + "grad_norm": 0.6012159585952759, + "learning_rate": 0.0002049262260884441, + "loss": 1.5536, + "step": 7218 + }, + { + "epoch": 0.7614978902953586, + "grad_norm": 0.5886990427970886, + "learning_rate": 0.00020475380725718228, + "loss": 1.522, + "step": 7219 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.5923089385032654, + "learning_rate": 0.00020458144952347523, + "loss": 1.5173, + "step": 7220 + }, + { + "epoch": 0.7617088607594936, + "grad_norm": 0.6010850071907043, + "learning_rate": 0.0002044091529066365, + "loss": 1.5359, + "step": 7221 + }, + { + "epoch": 0.7618143459915612, + "grad_norm": 0.5534952282905579, + "learning_rate": 0.00020423691742597273, + "loss": 1.5228, + "step": 7222 + }, + { + "epoch": 0.7619198312236287, + "grad_norm": 0.5904103517532349, + "learning_rate": 0.0002040647431007837, + "loss": 1.5062, + "step": 7223 + }, + { + "epoch": 0.7620253164556962, + "grad_norm": 0.6248980164527893, + "learning_rate": 0.00020389262995036263, + "loss": 1.5167, + "step": 7224 + }, + { + "epoch": 0.7621308016877637, + "grad_norm": 0.5643144845962524, + "learning_rate": 0.00020372057799399534, + "loss": 1.55, + "step": 7225 + }, + { + "epoch": 0.7622362869198313, + "grad_norm": 0.5953130722045898, + "learning_rate": 0.00020354858725096122, + "loss": 1.5276, + "step": 7226 + }, + { + "epoch": 0.7623417721518987, + "grad_norm": 0.5868222117424011, + "learning_rate": 0.00020337665774053284, + "loss": 1.5105, + "step": 7227 + }, + { + "epoch": 0.7624472573839662, + "grad_norm": 0.5929425954818726, + "learning_rate": 0.0002032047894819758, + "loss": 1.532, + "step": 7228 + }, + { + "epoch": 0.7625527426160338, + "grad_norm": 0.5798326134681702, + "learning_rate": 0.00020303298249454857, + "loss": 1.5845, + "step": 7229 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.5526480078697205, + "learning_rate": 0.00020286123679750314, + "loss": 1.5416, + "step": 7230 + }, + { + "epoch": 0.7627637130801688, + "grad_norm": 0.5797103047370911, + "learning_rate": 0.00020268955241008437, + "loss": 1.5576, + "step": 7231 + }, + { + "epoch": 0.7628691983122363, + "grad_norm": 0.5697080492973328, + "learning_rate": 0.00020251792935153037, + "loss": 1.5262, + "step": 7232 + }, + { + "epoch": 0.7629746835443038, + "grad_norm": 0.5673046112060547, + "learning_rate": 0.0002023463676410724, + "loss": 1.5153, + "step": 7233 + }, + { + "epoch": 0.7630801687763713, + "grad_norm": 0.6513043642044067, + "learning_rate": 0.0002021748672979348, + "loss": 1.5245, + "step": 7234 + }, + { + "epoch": 0.7631856540084389, + "grad_norm": 0.6083111763000488, + "learning_rate": 0.00020200342834133497, + "loss": 1.5551, + "step": 7235 + }, + { + "epoch": 0.7632911392405063, + "grad_norm": 0.5956534743309021, + "learning_rate": 0.00020183205079048338, + "loss": 1.5048, + "step": 7236 + }, + { + "epoch": 0.7633966244725738, + "grad_norm": 0.6892145872116089, + "learning_rate": 0.0002016607346645841, + "loss": 1.5553, + "step": 7237 + }, + { + "epoch": 0.7635021097046414, + "grad_norm": 0.5902448296546936, + "learning_rate": 0.00020148947998283381, + "loss": 1.555, + "step": 7238 + }, + { + "epoch": 0.7636075949367088, + "grad_norm": 0.603115975856781, + "learning_rate": 0.00020131828676442237, + "loss": 1.5083, + "step": 7239 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.6127225160598755, + "learning_rate": 0.00020114715502853292, + "loss": 1.5142, + "step": 7240 + }, + { + "epoch": 0.7638185654008439, + "grad_norm": 0.5819869041442871, + "learning_rate": 0.00020097608479434153, + "loss": 1.4956, + "step": 7241 + }, + { + "epoch": 0.7639240506329114, + "grad_norm": 0.5757550597190857, + "learning_rate": 0.00020080507608101757, + "loss": 1.5753, + "step": 7242 + }, + { + "epoch": 0.7640295358649789, + "grad_norm": 0.6155908703804016, + "learning_rate": 0.0002006341289077233, + "loss": 1.5384, + "step": 7243 + }, + { + "epoch": 0.7641350210970465, + "grad_norm": 0.6387591361999512, + "learning_rate": 0.00020046324329361432, + "loss": 1.5232, + "step": 7244 + }, + { + "epoch": 0.7642405063291139, + "grad_norm": 0.5813124179840088, + "learning_rate": 0.00020029241925783908, + "loss": 1.501, + "step": 7245 + }, + { + "epoch": 0.7643459915611814, + "grad_norm": 0.5610361099243164, + "learning_rate": 0.00020012165681953923, + "loss": 1.4943, + "step": 7246 + }, + { + "epoch": 0.764451476793249, + "grad_norm": 0.6091545224189758, + "learning_rate": 0.00019995095599784985, + "loss": 1.5277, + "step": 7247 + }, + { + "epoch": 0.7645569620253164, + "grad_norm": 0.5533765554428101, + "learning_rate": 0.00019978031681189864, + "loss": 1.495, + "step": 7248 + }, + { + "epoch": 0.764662447257384, + "grad_norm": 0.7487054467201233, + "learning_rate": 0.00019960973928080666, + "loss": 1.5708, + "step": 7249 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.5721408128738403, + "learning_rate": 0.0001994392234236878, + "loss": 1.5241, + "step": 7250 + }, + { + "epoch": 0.764873417721519, + "grad_norm": 0.5370747447013855, + "learning_rate": 0.00019926876925964928, + "loss": 1.5294, + "step": 7251 + }, + { + "epoch": 0.7649789029535865, + "grad_norm": 0.6102161407470703, + "learning_rate": 0.00019909837680779141, + "loss": 1.4942, + "step": 7252 + }, + { + "epoch": 0.765084388185654, + "grad_norm": 0.580931544303894, + "learning_rate": 0.00019892804608720747, + "loss": 1.5297, + "step": 7253 + }, + { + "epoch": 0.7651898734177215, + "grad_norm": 0.8051382303237915, + "learning_rate": 0.00019875777711698384, + "loss": 1.5354, + "step": 7254 + }, + { + "epoch": 0.765295358649789, + "grad_norm": 0.6050834655761719, + "learning_rate": 0.00019858756991619978, + "loss": 1.5323, + "step": 7255 + }, + { + "epoch": 0.7654008438818566, + "grad_norm": 0.5858368873596191, + "learning_rate": 0.00019841742450392837, + "loss": 1.5227, + "step": 7256 + }, + { + "epoch": 0.765506329113924, + "grad_norm": 0.8035500049591064, + "learning_rate": 0.0001982473408992349, + "loss": 1.5381, + "step": 7257 + }, + { + "epoch": 0.7656118143459916, + "grad_norm": 0.539275050163269, + "learning_rate": 0.00019807731912117828, + "loss": 1.5127, + "step": 7258 + }, + { + "epoch": 0.7657172995780591, + "grad_norm": 0.5644697546958923, + "learning_rate": 0.0001979073591888101, + "loss": 1.5539, + "step": 7259 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.6042241454124451, + "learning_rate": 0.0001977374611211754, + "loss": 1.4641, + "step": 7260 + }, + { + "epoch": 0.7659282700421941, + "grad_norm": 0.5922195315361023, + "learning_rate": 0.00019756762493731192, + "loss": 1.486, + "step": 7261 + }, + { + "epoch": 0.7660337552742617, + "grad_norm": 0.6101502180099487, + "learning_rate": 0.00019739785065625077, + "loss": 1.5062, + "step": 7262 + }, + { + "epoch": 0.7661392405063291, + "grad_norm": 0.5695568323135376, + "learning_rate": 0.00019722813829701593, + "loss": 1.5193, + "step": 7263 + }, + { + "epoch": 0.7662447257383966, + "grad_norm": 0.5727135539054871, + "learning_rate": 0.0001970584878786244, + "loss": 1.5574, + "step": 7264 + }, + { + "epoch": 0.7663502109704642, + "grad_norm": 0.7593119144439697, + "learning_rate": 0.0001968888994200868, + "loss": 1.5033, + "step": 7265 + }, + { + "epoch": 0.7664556962025316, + "grad_norm": 0.5601691007614136, + "learning_rate": 0.00019671937294040595, + "loss": 1.4919, + "step": 7266 + }, + { + "epoch": 0.7665611814345992, + "grad_norm": 0.6777199506759644, + "learning_rate": 0.00019654990845857832, + "loss": 1.5055, + "step": 7267 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 0.6344163417816162, + "learning_rate": 0.00019638050599359326, + "loss": 1.5458, + "step": 7268 + }, + { + "epoch": 0.7667721518987342, + "grad_norm": 0.5827726125717163, + "learning_rate": 0.000196211165564433, + "loss": 1.5313, + "step": 7269 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.6596550345420837, + "learning_rate": 0.00019604188719007313, + "loss": 1.5605, + "step": 7270 + }, + { + "epoch": 0.7669831223628693, + "grad_norm": 0.6487749218940735, + "learning_rate": 0.00019587267088948214, + "loss": 1.5215, + "step": 7271 + }, + { + "epoch": 0.7670886075949367, + "grad_norm": 0.5678458213806152, + "learning_rate": 0.00019570351668162143, + "loss": 1.5274, + "step": 7272 + }, + { + "epoch": 0.7671940928270042, + "grad_norm": 0.7788910865783691, + "learning_rate": 0.00019553442458544542, + "loss": 1.5282, + "step": 7273 + }, + { + "epoch": 0.7672995780590718, + "grad_norm": 0.5996631979942322, + "learning_rate": 0.00019536539461990224, + "loss": 1.4811, + "step": 7274 + }, + { + "epoch": 0.7674050632911392, + "grad_norm": 0.5733367800712585, + "learning_rate": 0.0001951964268039322, + "loss": 1.517, + "step": 7275 + }, + { + "epoch": 0.7675105485232068, + "grad_norm": 0.7580784559249878, + "learning_rate": 0.00019502752115646901, + "loss": 1.567, + "step": 7276 + }, + { + "epoch": 0.7676160337552742, + "grad_norm": 0.6146027445793152, + "learning_rate": 0.00019485867769643945, + "loss": 1.4999, + "step": 7277 + }, + { + "epoch": 0.7677215189873418, + "grad_norm": 0.5698131322860718, + "learning_rate": 0.0001946898964427633, + "loss": 1.5428, + "step": 7278 + }, + { + "epoch": 0.7678270042194093, + "grad_norm": 0.682797372341156, + "learning_rate": 0.00019452117741435314, + "loss": 1.5259, + "step": 7279 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.6731231808662415, + "learning_rate": 0.00019435252063011504, + "loss": 1.5066, + "step": 7280 + }, + { + "epoch": 0.7680379746835443, + "grad_norm": 0.5795170068740845, + "learning_rate": 0.00019418392610894768, + "loss": 1.5679, + "step": 7281 + }, + { + "epoch": 0.7681434599156118, + "grad_norm": 0.6198179721832275, + "learning_rate": 0.0001940153938697427, + "loss": 1.5693, + "step": 7282 + }, + { + "epoch": 0.7682489451476793, + "grad_norm": 0.5767260789871216, + "learning_rate": 0.0001938469239313855, + "loss": 1.525, + "step": 7283 + }, + { + "epoch": 0.7683544303797468, + "grad_norm": 0.5694810152053833, + "learning_rate": 0.00019367851631275362, + "loss": 1.5095, + "step": 7284 + }, + { + "epoch": 0.7684599156118144, + "grad_norm": 0.5716107487678528, + "learning_rate": 0.00019351017103271805, + "loss": 1.5464, + "step": 7285 + }, + { + "epoch": 0.7685654008438818, + "grad_norm": 0.6747732758522034, + "learning_rate": 0.00019334188811014278, + "loss": 1.5169, + "step": 7286 + }, + { + "epoch": 0.7686708860759494, + "grad_norm": 0.6539579629898071, + "learning_rate": 0.00019317366756388477, + "loss": 1.5088, + "step": 7287 + }, + { + "epoch": 0.7687763713080169, + "grad_norm": 0.6677218079566956, + "learning_rate": 0.0001930055094127938, + "loss": 1.5173, + "step": 7288 + }, + { + "epoch": 0.7688818565400843, + "grad_norm": 0.7292783260345459, + "learning_rate": 0.00019283741367571294, + "loss": 1.5478, + "step": 7289 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.6230709552764893, + "learning_rate": 0.0001926693803714779, + "loss": 1.5154, + "step": 7290 + }, + { + "epoch": 0.7690928270042194, + "grad_norm": 0.5834298729896545, + "learning_rate": 0.00019250140951891813, + "loss": 1.5971, + "step": 7291 + }, + { + "epoch": 0.7691983122362869, + "grad_norm": 0.6170585751533508, + "learning_rate": 0.00019233350113685536, + "loss": 1.5498, + "step": 7292 + }, + { + "epoch": 0.7693037974683544, + "grad_norm": 0.6068359613418579, + "learning_rate": 0.00019216565524410455, + "loss": 1.5011, + "step": 7293 + }, + { + "epoch": 0.769409282700422, + "grad_norm": 0.6131360530853271, + "learning_rate": 0.0001919978718594738, + "loss": 1.5162, + "step": 7294 + }, + { + "epoch": 0.7695147679324894, + "grad_norm": 0.6340829133987427, + "learning_rate": 0.0001918301510017638, + "loss": 1.5582, + "step": 7295 + }, + { + "epoch": 0.769620253164557, + "grad_norm": 0.5802450776100159, + "learning_rate": 0.0001916624926897687, + "loss": 1.5341, + "step": 7296 + }, + { + "epoch": 0.7697257383966245, + "grad_norm": 0.5754836201667786, + "learning_rate": 0.0001914948969422755, + "loss": 1.5272, + "step": 7297 + }, + { + "epoch": 0.7698312236286919, + "grad_norm": 0.5775659084320068, + "learning_rate": 0.00019132736377806394, + "loss": 1.5097, + "step": 7298 + }, + { + "epoch": 0.7699367088607595, + "grad_norm": 0.5414965748786926, + "learning_rate": 0.00019115989321590694, + "loss": 1.5228, + "step": 7299 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.6029505133628845, + "learning_rate": 0.00019099248527457068, + "loss": 1.5671, + "step": 7300 + }, + { + "epoch": 0.7701476793248945, + "grad_norm": 0.5670499801635742, + "learning_rate": 0.00019082513997281398, + "loss": 1.529, + "step": 7301 + }, + { + "epoch": 0.770253164556962, + "grad_norm": 0.5813521146774292, + "learning_rate": 0.0001906578573293886, + "loss": 1.4817, + "step": 7302 + }, + { + "epoch": 0.7703586497890296, + "grad_norm": 0.6319548487663269, + "learning_rate": 0.00019049063736303946, + "loss": 1.5246, + "step": 7303 + }, + { + "epoch": 0.770464135021097, + "grad_norm": 0.5559335350990295, + "learning_rate": 0.00019032348009250433, + "loss": 1.5242, + "step": 7304 + }, + { + "epoch": 0.7705696202531646, + "grad_norm": 0.5615382194519043, + "learning_rate": 0.0001901563855365141, + "loss": 1.4977, + "step": 7305 + }, + { + "epoch": 0.7706751054852321, + "grad_norm": 0.5773873925209045, + "learning_rate": 0.00018998935371379252, + "loss": 1.5198, + "step": 7306 + }, + { + "epoch": 0.7707805907172995, + "grad_norm": 0.6200713515281677, + "learning_rate": 0.00018982238464305623, + "loss": 1.536, + "step": 7307 + }, + { + "epoch": 0.7708860759493671, + "grad_norm": 0.5777005553245544, + "learning_rate": 0.0001896554783430149, + "loss": 1.5208, + "step": 7308 + }, + { + "epoch": 0.7709915611814346, + "grad_norm": 0.6426216959953308, + "learning_rate": 0.00018948863483237154, + "loss": 1.5292, + "step": 7309 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.673957347869873, + "learning_rate": 0.0001893218541298216, + "loss": 1.5075, + "step": 7310 + }, + { + "epoch": 0.7712025316455696, + "grad_norm": 0.5840373635292053, + "learning_rate": 0.00018915513625405374, + "loss": 1.5322, + "step": 7311 + }, + { + "epoch": 0.7713080168776372, + "grad_norm": 0.6020443439483643, + "learning_rate": 0.00018898848122374942, + "loss": 1.5382, + "step": 7312 + }, + { + "epoch": 0.7714135021097046, + "grad_norm": 0.6758438944816589, + "learning_rate": 0.00018882188905758326, + "loss": 1.5235, + "step": 7313 + }, + { + "epoch": 0.7715189873417722, + "grad_norm": 0.5712345838546753, + "learning_rate": 0.00018865535977422273, + "loss": 1.5024, + "step": 7314 + }, + { + "epoch": 0.7716244725738397, + "grad_norm": 0.5779015421867371, + "learning_rate": 0.00018848889339232833, + "loss": 1.4935, + "step": 7315 + }, + { + "epoch": 0.7717299578059071, + "grad_norm": 0.6315849423408508, + "learning_rate": 0.00018832248993055304, + "loss": 1.541, + "step": 7316 + }, + { + "epoch": 0.7718354430379747, + "grad_norm": 0.6787692308425903, + "learning_rate": 0.00018815614940754377, + "loss": 1.4871, + "step": 7317 + }, + { + "epoch": 0.7719409282700422, + "grad_norm": 0.6386889815330505, + "learning_rate": 0.00018798987184193963, + "loss": 1.4976, + "step": 7318 + }, + { + "epoch": 0.7720464135021097, + "grad_norm": 0.5728421211242676, + "learning_rate": 0.00018782365725237272, + "loss": 1.5265, + "step": 7319 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.6956103444099426, + "learning_rate": 0.00018765750565746827, + "loss": 1.5103, + "step": 7320 + }, + { + "epoch": 0.7722573839662448, + "grad_norm": 0.5885766744613647, + "learning_rate": 0.00018749141707584443, + "loss": 1.5652, + "step": 7321 + }, + { + "epoch": 0.7723628691983122, + "grad_norm": 0.5907243490219116, + "learning_rate": 0.0001873253915261123, + "loss": 1.503, + "step": 7322 + }, + { + "epoch": 0.7724683544303798, + "grad_norm": 0.6237804293632507, + "learning_rate": 0.00018715942902687566, + "loss": 1.5031, + "step": 7323 + }, + { + "epoch": 0.7725738396624473, + "grad_norm": 0.6323893070220947, + "learning_rate": 0.00018699352959673172, + "loss": 1.5182, + "step": 7324 + }, + { + "epoch": 0.7726793248945147, + "grad_norm": 0.5982613563537598, + "learning_rate": 0.00018682769325426986, + "loss": 1.5389, + "step": 7325 + }, + { + "epoch": 0.7727848101265823, + "grad_norm": 0.5698344707489014, + "learning_rate": 0.00018666192001807344, + "loss": 1.5154, + "step": 7326 + }, + { + "epoch": 0.7728902953586498, + "grad_norm": 0.615932822227478, + "learning_rate": 0.00018649620990671798, + "loss": 1.4924, + "step": 7327 + }, + { + "epoch": 0.7729957805907173, + "grad_norm": 0.5536485910415649, + "learning_rate": 0.00018633056293877203, + "loss": 1.5146, + "step": 7328 + }, + { + "epoch": 0.7731012658227848, + "grad_norm": 0.621597945690155, + "learning_rate": 0.00018616497913279728, + "loss": 1.4924, + "step": 7329 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.7344424724578857, + "learning_rate": 0.00018599945850734812, + "loss": 1.5699, + "step": 7330 + }, + { + "epoch": 0.7733122362869198, + "grad_norm": 0.5849729776382446, + "learning_rate": 0.00018583400108097194, + "loss": 1.5003, + "step": 7331 + }, + { + "epoch": 0.7734177215189874, + "grad_norm": 0.5515056252479553, + "learning_rate": 0.00018566860687220922, + "loss": 1.526, + "step": 7332 + }, + { + "epoch": 0.7735232067510549, + "grad_norm": 0.6132886409759521, + "learning_rate": 0.00018550327589959308, + "loss": 1.5302, + "step": 7333 + }, + { + "epoch": 0.7736286919831223, + "grad_norm": 0.6006864905357361, + "learning_rate": 0.00018533800818164943, + "loss": 1.5052, + "step": 7334 + }, + { + "epoch": 0.7737341772151899, + "grad_norm": 0.6364529728889465, + "learning_rate": 0.00018517280373689789, + "loss": 1.5449, + "step": 7335 + }, + { + "epoch": 0.7738396624472574, + "grad_norm": 0.6143629550933838, + "learning_rate": 0.0001850076625838502, + "loss": 1.5004, + "step": 7336 + }, + { + "epoch": 0.7739451476793249, + "grad_norm": 0.6156517267227173, + "learning_rate": 0.0001848425847410112, + "loss": 1.4883, + "step": 7337 + }, + { + "epoch": 0.7740506329113924, + "grad_norm": 0.5895000696182251, + "learning_rate": 0.00018467757022687864, + "loss": 1.5182, + "step": 7338 + }, + { + "epoch": 0.77415611814346, + "grad_norm": 0.5872728228569031, + "learning_rate": 0.0001845126190599434, + "loss": 1.5413, + "step": 7339 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.627903938293457, + "learning_rate": 0.00018434773125868895, + "loss": 1.5124, + "step": 7340 + }, + { + "epoch": 0.774367088607595, + "grad_norm": 0.6115598678588867, + "learning_rate": 0.00018418290684159175, + "loss": 1.506, + "step": 7341 + }, + { + "epoch": 0.7744725738396624, + "grad_norm": 0.6238791346549988, + "learning_rate": 0.00018401814582712103, + "loss": 1.5173, + "step": 7342 + }, + { + "epoch": 0.7745780590717299, + "grad_norm": 0.5733575820922852, + "learning_rate": 0.0001838534482337396, + "loss": 1.4893, + "step": 7343 + }, + { + "epoch": 0.7746835443037975, + "grad_norm": 0.6061587333679199, + "learning_rate": 0.0001836888140799023, + "loss": 1.5453, + "step": 7344 + }, + { + "epoch": 0.7747890295358649, + "grad_norm": 0.5580891966819763, + "learning_rate": 0.0001835242433840573, + "loss": 1.5081, + "step": 7345 + }, + { + "epoch": 0.7748945147679325, + "grad_norm": 0.6840581297874451, + "learning_rate": 0.00018335973616464554, + "loss": 1.4623, + "step": 7346 + }, + { + "epoch": 0.775, + "grad_norm": 0.6121295690536499, + "learning_rate": 0.00018319529244010082, + "loss": 1.5259, + "step": 7347 + }, + { + "epoch": 0.7751054852320675, + "grad_norm": 0.5828501582145691, + "learning_rate": 0.00018303091222884998, + "loss": 1.5426, + "step": 7348 + }, + { + "epoch": 0.775210970464135, + "grad_norm": 0.5565976500511169, + "learning_rate": 0.00018286659554931254, + "loss": 1.5183, + "step": 7349 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.5737484097480774, + "learning_rate": 0.00018270234241990108, + "loss": 1.4907, + "step": 7350 + }, + { + "epoch": 0.77542194092827, + "grad_norm": 0.5927782654762268, + "learning_rate": 0.00018253815285902074, + "loss": 1.4966, + "step": 7351 + }, + { + "epoch": 0.7755274261603375, + "grad_norm": 0.6056440472602844, + "learning_rate": 0.0001823740268850702, + "loss": 1.5437, + "step": 7352 + }, + { + "epoch": 0.7756329113924051, + "grad_norm": 0.603557288646698, + "learning_rate": 0.0001822099645164404, + "loss": 1.518, + "step": 7353 + }, + { + "epoch": 0.7757383966244725, + "grad_norm": 0.5852877497673035, + "learning_rate": 0.00018204596577151534, + "loss": 1.4897, + "step": 7354 + }, + { + "epoch": 0.7758438818565401, + "grad_norm": 0.6112610697746277, + "learning_rate": 0.00018188203066867178, + "loss": 1.513, + "step": 7355 + }, + { + "epoch": 0.7759493670886076, + "grad_norm": 0.5794355869293213, + "learning_rate": 0.00018171815922627974, + "loss": 1.5585, + "step": 7356 + }, + { + "epoch": 0.7760548523206751, + "grad_norm": 0.6853221654891968, + "learning_rate": 0.00018155435146270158, + "loss": 1.5214, + "step": 7357 + }, + { + "epoch": 0.7761603375527426, + "grad_norm": 0.5716460347175598, + "learning_rate": 0.00018139060739629287, + "loss": 1.5347, + "step": 7358 + }, + { + "epoch": 0.7762658227848102, + "grad_norm": 0.6398293972015381, + "learning_rate": 0.00018122692704540194, + "loss": 1.5239, + "step": 7359 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.571724534034729, + "learning_rate": 0.0001810633104283698, + "loss": 1.5171, + "step": 7360 + }, + { + "epoch": 0.7764767932489451, + "grad_norm": 0.629468560218811, + "learning_rate": 0.00018089975756353083, + "loss": 1.5496, + "step": 7361 + }, + { + "epoch": 0.7765822784810127, + "grad_norm": 0.6535378694534302, + "learning_rate": 0.0001807362684692119, + "loss": 1.5445, + "step": 7362 + }, + { + "epoch": 0.7766877637130801, + "grad_norm": 0.6000810861587524, + "learning_rate": 0.00018057284316373267, + "loss": 1.4898, + "step": 7363 + }, + { + "epoch": 0.7767932489451477, + "grad_norm": 0.5616637468338013, + "learning_rate": 0.00018040948166540586, + "loss": 1.4876, + "step": 7364 + }, + { + "epoch": 0.7768987341772152, + "grad_norm": 0.5892712473869324, + "learning_rate": 0.0001802461839925368, + "loss": 1.5089, + "step": 7365 + }, + { + "epoch": 0.7770042194092827, + "grad_norm": 0.5918957591056824, + "learning_rate": 0.00018008295016342383, + "loss": 1.5129, + "step": 7366 + }, + { + "epoch": 0.7771097046413502, + "grad_norm": 0.5746480226516724, + "learning_rate": 0.00017991978019635819, + "loss": 1.5154, + "step": 7367 + }, + { + "epoch": 0.7772151898734178, + "grad_norm": 0.5624518394470215, + "learning_rate": 0.00017975667410962366, + "loss": 1.4904, + "step": 7368 + }, + { + "epoch": 0.7773206751054852, + "grad_norm": 0.6440549492835999, + "learning_rate": 0.00017959363192149752, + "loss": 1.5467, + "step": 7369 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5946300029754639, + "learning_rate": 0.0001794306536502492, + "loss": 1.5576, + "step": 7370 + }, + { + "epoch": 0.7775316455696203, + "grad_norm": 0.610632598400116, + "learning_rate": 0.0001792677393141412, + "loss": 1.5305, + "step": 7371 + }, + { + "epoch": 0.7776371308016877, + "grad_norm": 0.5922273993492126, + "learning_rate": 0.00017910488893142903, + "loss": 1.5442, + "step": 7372 + }, + { + "epoch": 0.7777426160337553, + "grad_norm": 0.5838559865951538, + "learning_rate": 0.00017894210252036069, + "loss": 1.4769, + "step": 7373 + }, + { + "epoch": 0.7778481012658228, + "grad_norm": 0.5567162036895752, + "learning_rate": 0.0001787793800991774, + "loss": 1.521, + "step": 7374 + }, + { + "epoch": 0.7779535864978903, + "grad_norm": 0.6243470907211304, + "learning_rate": 0.00017861672168611293, + "loss": 1.554, + "step": 7375 + }, + { + "epoch": 0.7780590717299578, + "grad_norm": 0.5978196859359741, + "learning_rate": 0.0001784541272993939, + "loss": 1.4843, + "step": 7376 + }, + { + "epoch": 0.7781645569620254, + "grad_norm": 0.5760339498519897, + "learning_rate": 0.00017829159695723973, + "loss": 1.4989, + "step": 7377 + }, + { + "epoch": 0.7782700421940928, + "grad_norm": 0.5962379574775696, + "learning_rate": 0.00017812913067786313, + "loss": 1.4744, + "step": 7378 + }, + { + "epoch": 0.7783755274261603, + "grad_norm": 0.5323149561882019, + "learning_rate": 0.00017796672847946905, + "loss": 1.5102, + "step": 7379 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.6747624278068542, + "learning_rate": 0.0001778043903802555, + "loss": 1.5122, + "step": 7380 + }, + { + "epoch": 0.7785864978902953, + "grad_norm": 0.5689386129379272, + "learning_rate": 0.00017764211639841312, + "loss": 1.5176, + "step": 7381 + }, + { + "epoch": 0.7786919831223629, + "grad_norm": 0.6129149198532104, + "learning_rate": 0.0001774799065521257, + "loss": 1.5994, + "step": 7382 + }, + { + "epoch": 0.7787974683544304, + "grad_norm": 0.6723682880401611, + "learning_rate": 0.0001773177608595696, + "loss": 1.5158, + "step": 7383 + }, + { + "epoch": 0.7789029535864979, + "grad_norm": 0.632742166519165, + "learning_rate": 0.00017715567933891405, + "loss": 1.5076, + "step": 7384 + }, + { + "epoch": 0.7790084388185654, + "grad_norm": 0.5634408593177795, + "learning_rate": 0.0001769936620083211, + "loss": 1.5154, + "step": 7385 + }, + { + "epoch": 0.779113924050633, + "grad_norm": 0.6697182059288025, + "learning_rate": 0.0001768317088859453, + "loss": 1.4647, + "step": 7386 + }, + { + "epoch": 0.7792194092827004, + "grad_norm": 0.6364632248878479, + "learning_rate": 0.0001766698199899349, + "loss": 1.5359, + "step": 7387 + }, + { + "epoch": 0.7793248945147679, + "grad_norm": 0.5944094061851501, + "learning_rate": 0.00017650799533842996, + "loss": 1.5114, + "step": 7388 + }, + { + "epoch": 0.7794303797468355, + "grad_norm": 0.6575924158096313, + "learning_rate": 0.0001763462349495639, + "loss": 1.5153, + "step": 7389 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.6284034848213196, + "learning_rate": 0.0001761845388414627, + "loss": 1.5102, + "step": 7390 + }, + { + "epoch": 0.7796413502109705, + "grad_norm": 0.6145673990249634, + "learning_rate": 0.00017602290703224525, + "loss": 1.5219, + "step": 7391 + }, + { + "epoch": 0.779746835443038, + "grad_norm": 0.6603193879127502, + "learning_rate": 0.00017586133954002308, + "loss": 1.5525, + "step": 7392 + }, + { + "epoch": 0.7798523206751055, + "grad_norm": 0.5976398587226868, + "learning_rate": 0.00017569983638290084, + "loss": 1.5143, + "step": 7393 + }, + { + "epoch": 0.779957805907173, + "grad_norm": 0.6204336881637573, + "learning_rate": 0.0001755383975789754, + "loss": 1.5264, + "step": 7394 + }, + { + "epoch": 0.7800632911392406, + "grad_norm": 0.6150304079055786, + "learning_rate": 0.00017537702314633722, + "loss": 1.5115, + "step": 7395 + }, + { + "epoch": 0.780168776371308, + "grad_norm": 0.6120457053184509, + "learning_rate": 0.00017521571310306889, + "loss": 1.5716, + "step": 7396 + }, + { + "epoch": 0.7802742616033755, + "grad_norm": 0.7079291343688965, + "learning_rate": 0.0001750544674672461, + "loss": 1.5273, + "step": 7397 + }, + { + "epoch": 0.7803797468354431, + "grad_norm": 0.6246384978294373, + "learning_rate": 0.00017489328625693715, + "loss": 1.5121, + "step": 7398 + }, + { + "epoch": 0.7804852320675105, + "grad_norm": 0.5789629817008972, + "learning_rate": 0.00017473216949020326, + "loss": 1.5237, + "step": 7399 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.6332085132598877, + "learning_rate": 0.00017457111718509831, + "loss": 1.5361, + "step": 7400 + }, + { + "epoch": 0.7806962025316456, + "grad_norm": 0.6812030673027039, + "learning_rate": 0.00017441012935966898, + "loss": 1.4775, + "step": 7401 + }, + { + "epoch": 0.7808016877637131, + "grad_norm": 0.6982613205909729, + "learning_rate": 0.00017424920603195483, + "loss": 1.5262, + "step": 7402 + }, + { + "epoch": 0.7809071729957806, + "grad_norm": 0.5720661282539368, + "learning_rate": 0.0001740883472199879, + "loss": 1.4884, + "step": 7403 + }, + { + "epoch": 0.7810126582278482, + "grad_norm": 0.6492461562156677, + "learning_rate": 0.00017392755294179363, + "loss": 1.5208, + "step": 7404 + }, + { + "epoch": 0.7811181434599156, + "grad_norm": 0.6781448125839233, + "learning_rate": 0.0001737668232153896, + "loss": 1.5033, + "step": 7405 + }, + { + "epoch": 0.7812236286919831, + "grad_norm": 0.6541131734848022, + "learning_rate": 0.00017360615805878636, + "loss": 1.5038, + "step": 7406 + }, + { + "epoch": 0.7813291139240506, + "grad_norm": 0.5997607111930847, + "learning_rate": 0.00017344555748998727, + "loss": 1.509, + "step": 7407 + }, + { + "epoch": 0.7814345991561181, + "grad_norm": 0.6197347640991211, + "learning_rate": 0.0001732850215269885, + "loss": 1.5164, + "step": 7408 + }, + { + "epoch": 0.7815400843881857, + "grad_norm": 0.6011230945587158, + "learning_rate": 0.0001731245501877787, + "loss": 1.5325, + "step": 7409 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.566008985042572, + "learning_rate": 0.00017296414349033976, + "loss": 1.4922, + "step": 7410 + }, + { + "epoch": 0.7817510548523207, + "grad_norm": 0.6001706123352051, + "learning_rate": 0.0001728038014526458, + "loss": 1.5311, + "step": 7411 + }, + { + "epoch": 0.7818565400843882, + "grad_norm": 0.6757147312164307, + "learning_rate": 0.00017264352409266385, + "loss": 1.536, + "step": 7412 + }, + { + "epoch": 0.7819620253164556, + "grad_norm": 0.6422447562217712, + "learning_rate": 0.0001724833114283542, + "loss": 1.5256, + "step": 7413 + }, + { + "epoch": 0.7820675105485232, + "grad_norm": 0.675294816493988, + "learning_rate": 0.0001723231634776693, + "loss": 1.5214, + "step": 7414 + }, + { + "epoch": 0.7821729957805907, + "grad_norm": 0.6154472231864929, + "learning_rate": 0.0001721630802585545, + "loss": 1.482, + "step": 7415 + }, + { + "epoch": 0.7822784810126582, + "grad_norm": 0.6602045297622681, + "learning_rate": 0.00017200306178894785, + "loss": 1.4753, + "step": 7416 + }, + { + "epoch": 0.7823839662447257, + "grad_norm": 0.6406641602516174, + "learning_rate": 0.00017184310808678028, + "loss": 1.4889, + "step": 7417 + }, + { + "epoch": 0.7824894514767933, + "grad_norm": 0.6150414347648621, + "learning_rate": 0.00017168321916997547, + "loss": 1.5452, + "step": 7418 + }, + { + "epoch": 0.7825949367088607, + "grad_norm": 0.6038205027580261, + "learning_rate": 0.00017152339505644963, + "loss": 1.544, + "step": 7419 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.6771997809410095, + "learning_rate": 0.00017136363576411172, + "loss": 1.5031, + "step": 7420 + }, + { + "epoch": 0.7828059071729958, + "grad_norm": 0.5757315754890442, + "learning_rate": 0.00017120394131086398, + "loss": 1.5021, + "step": 7421 + }, + { + "epoch": 0.7829113924050632, + "grad_norm": 0.6158758401870728, + "learning_rate": 0.00017104431171460077, + "loss": 1.5088, + "step": 7422 + }, + { + "epoch": 0.7830168776371308, + "grad_norm": 0.6038413047790527, + "learning_rate": 0.0001708847469932093, + "loss": 1.484, + "step": 7423 + }, + { + "epoch": 0.7831223628691983, + "grad_norm": 0.6473309397697449, + "learning_rate": 0.00017072524716456975, + "loss": 1.5249, + "step": 7424 + }, + { + "epoch": 0.7832278481012658, + "grad_norm": 0.5714094042778015, + "learning_rate": 0.00017056581224655473, + "loss": 1.5131, + "step": 7425 + }, + { + "epoch": 0.7833333333333333, + "grad_norm": 0.6354431509971619, + "learning_rate": 0.0001704064422570298, + "loss": 1.4722, + "step": 7426 + }, + { + "epoch": 0.7834388185654009, + "grad_norm": 0.5779864192008972, + "learning_rate": 0.0001702471372138531, + "loss": 1.4952, + "step": 7427 + }, + { + "epoch": 0.7835443037974683, + "grad_norm": 0.6498522162437439, + "learning_rate": 0.00017008789713487558, + "loss": 1.4829, + "step": 7428 + }, + { + "epoch": 0.7836497890295359, + "grad_norm": 0.6622968912124634, + "learning_rate": 0.0001699287220379407, + "loss": 1.5098, + "step": 7429 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.5608402490615845, + "learning_rate": 0.00016976961194088526, + "loss": 1.5163, + "step": 7430 + }, + { + "epoch": 0.7838607594936708, + "grad_norm": 0.6073692440986633, + "learning_rate": 0.000169610566861538, + "loss": 1.4951, + "step": 7431 + }, + { + "epoch": 0.7839662447257384, + "grad_norm": 0.5553798079490662, + "learning_rate": 0.0001694515868177209, + "loss": 1.4965, + "step": 7432 + }, + { + "epoch": 0.7840717299578059, + "grad_norm": 0.5833522081375122, + "learning_rate": 0.0001692926718272483, + "loss": 1.4964, + "step": 7433 + }, + { + "epoch": 0.7841772151898734, + "grad_norm": 0.5639848113059998, + "learning_rate": 0.00016913382190792754, + "loss": 1.522, + "step": 7434 + }, + { + "epoch": 0.7842827004219409, + "grad_norm": 0.6087833642959595, + "learning_rate": 0.0001689750370775584, + "loss": 1.4997, + "step": 7435 + }, + { + "epoch": 0.7843881856540085, + "grad_norm": 0.6401132941246033, + "learning_rate": 0.00016881631735393368, + "loss": 1.4665, + "step": 7436 + }, + { + "epoch": 0.7844936708860759, + "grad_norm": 0.6643381118774414, + "learning_rate": 0.00016865766275483865, + "loss": 1.5285, + "step": 7437 + }, + { + "epoch": 0.7845991561181435, + "grad_norm": 0.5869215130805969, + "learning_rate": 0.00016849907329805118, + "loss": 1.4944, + "step": 7438 + }, + { + "epoch": 0.784704641350211, + "grad_norm": 0.6238596439361572, + "learning_rate": 0.00016834054900134228, + "loss": 1.5355, + "step": 7439 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.6306948065757751, + "learning_rate": 0.00016818208988247533, + "loss": 1.5046, + "step": 7440 + }, + { + "epoch": 0.784915611814346, + "grad_norm": 0.6374438405036926, + "learning_rate": 0.00016802369595920647, + "loss": 1.5207, + "step": 7441 + }, + { + "epoch": 0.7850210970464135, + "grad_norm": 0.6541205048561096, + "learning_rate": 0.00016786536724928432, + "loss": 1.4981, + "step": 7442 + }, + { + "epoch": 0.785126582278481, + "grad_norm": 0.6529732346534729, + "learning_rate": 0.00016770710377045074, + "loss": 1.4762, + "step": 7443 + }, + { + "epoch": 0.7852320675105485, + "grad_norm": 0.6088582873344421, + "learning_rate": 0.00016754890554043965, + "loss": 1.4982, + "step": 7444 + }, + { + "epoch": 0.7853375527426161, + "grad_norm": 0.5970326662063599, + "learning_rate": 0.00016739077257697804, + "loss": 1.506, + "step": 7445 + }, + { + "epoch": 0.7854430379746835, + "grad_norm": 0.6376338005065918, + "learning_rate": 0.0001672327048977856, + "loss": 1.5018, + "step": 7446 + }, + { + "epoch": 0.7855485232067511, + "grad_norm": 0.6102411150932312, + "learning_rate": 0.00016707470252057423, + "loss": 1.5303, + "step": 7447 + }, + { + "epoch": 0.7856540084388186, + "grad_norm": 0.60674649477005, + "learning_rate": 0.00016691676546304936, + "loss": 1.5004, + "step": 7448 + }, + { + "epoch": 0.785759493670886, + "grad_norm": 0.6055317521095276, + "learning_rate": 0.00016675889374290852, + "loss": 1.5259, + "step": 7449 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.6132242679595947, + "learning_rate": 0.0001666010873778419, + "loss": 1.5227, + "step": 7450 + }, + { + "epoch": 0.7859704641350211, + "grad_norm": 0.5668851733207703, + "learning_rate": 0.0001664433463855325, + "loss": 1.5331, + "step": 7451 + }, + { + "epoch": 0.7860759493670886, + "grad_norm": 0.6307340860366821, + "learning_rate": 0.00016628567078365612, + "loss": 1.5212, + "step": 7452 + }, + { + "epoch": 0.7861814345991561, + "grad_norm": 0.5982663035392761, + "learning_rate": 0.00016612806058988088, + "loss": 1.4952, + "step": 7453 + }, + { + "epoch": 0.7862869198312237, + "grad_norm": 0.6720650792121887, + "learning_rate": 0.0001659705158218679, + "loss": 1.5191, + "step": 7454 + }, + { + "epoch": 0.7863924050632911, + "grad_norm": 0.5828465819358826, + "learning_rate": 0.00016581303649727076, + "loss": 1.5213, + "step": 7455 + }, + { + "epoch": 0.7864978902953587, + "grad_norm": 0.6541213989257812, + "learning_rate": 0.000165655622633736, + "loss": 1.53, + "step": 7456 + }, + { + "epoch": 0.7866033755274262, + "grad_norm": 0.6332530379295349, + "learning_rate": 0.00016549827424890257, + "loss": 1.5197, + "step": 7457 + }, + { + "epoch": 0.7867088607594936, + "grad_norm": 0.5990210771560669, + "learning_rate": 0.00016534099136040207, + "loss": 1.5141, + "step": 7458 + }, + { + "epoch": 0.7868143459915612, + "grad_norm": 0.6008036136627197, + "learning_rate": 0.0001651837739858589, + "loss": 1.5291, + "step": 7459 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.5803916454315186, + "learning_rate": 0.00016502662214289, + "loss": 1.5088, + "step": 7460 + }, + { + "epoch": 0.7870253164556962, + "grad_norm": 0.5998785495758057, + "learning_rate": 0.000164869535849105, + "loss": 1.519, + "step": 7461 + }, + { + "epoch": 0.7871308016877637, + "grad_norm": 0.617906928062439, + "learning_rate": 0.00016471251512210626, + "loss": 1.51, + "step": 7462 + }, + { + "epoch": 0.7872362869198313, + "grad_norm": 0.569316565990448, + "learning_rate": 0.00016455555997948868, + "loss": 1.5128, + "step": 7463 + }, + { + "epoch": 0.7873417721518987, + "grad_norm": 0.6002281308174133, + "learning_rate": 0.0001643986704388397, + "loss": 1.4892, + "step": 7464 + }, + { + "epoch": 0.7874472573839663, + "grad_norm": 0.5882935523986816, + "learning_rate": 0.00016424184651773997, + "loss": 1.5136, + "step": 7465 + }, + { + "epoch": 0.7875527426160338, + "grad_norm": 0.6028233766555786, + "learning_rate": 0.0001640850882337622, + "loss": 1.5285, + "step": 7466 + }, + { + "epoch": 0.7876582278481012, + "grad_norm": 0.581799328327179, + "learning_rate": 0.00016392839560447196, + "loss": 1.4974, + "step": 7467 + }, + { + "epoch": 0.7877637130801688, + "grad_norm": 0.5803029537200928, + "learning_rate": 0.00016377176864742734, + "loss": 1.5185, + "step": 7468 + }, + { + "epoch": 0.7878691983122363, + "grad_norm": 0.7021270394325256, + "learning_rate": 0.00016361520738017934, + "loss": 1.4913, + "step": 7469 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.6172983050346375, + "learning_rate": 0.00016345871182027124, + "loss": 1.5107, + "step": 7470 + }, + { + "epoch": 0.7880801687763713, + "grad_norm": 0.5636469721794128, + "learning_rate": 0.00016330228198523927, + "loss": 1.4966, + "step": 7471 + }, + { + "epoch": 0.7881856540084389, + "grad_norm": 0.6356780529022217, + "learning_rate": 0.00016314591789261216, + "loss": 1.4892, + "step": 7472 + }, + { + "epoch": 0.7882911392405063, + "grad_norm": 0.7109768390655518, + "learning_rate": 0.00016298961955991105, + "loss": 1.5486, + "step": 7473 + }, + { + "epoch": 0.7883966244725739, + "grad_norm": 0.5841807126998901, + "learning_rate": 0.00016283338700465034, + "loss": 1.5027, + "step": 7474 + }, + { + "epoch": 0.7885021097046413, + "grad_norm": 0.6023677587509155, + "learning_rate": 0.00016267722024433654, + "loss": 1.5098, + "step": 7475 + }, + { + "epoch": 0.7886075949367088, + "grad_norm": 0.6833862662315369, + "learning_rate": 0.0001625211192964688, + "loss": 1.5267, + "step": 7476 + }, + { + "epoch": 0.7887130801687764, + "grad_norm": 0.6012612581253052, + "learning_rate": 0.00016236508417853917, + "loss": 1.5516, + "step": 7477 + }, + { + "epoch": 0.7888185654008438, + "grad_norm": 0.6410853862762451, + "learning_rate": 0.00016220911490803206, + "loss": 1.5207, + "step": 7478 + }, + { + "epoch": 0.7889240506329114, + "grad_norm": 0.6035162806510925, + "learning_rate": 0.00016205321150242454, + "loss": 1.4766, + "step": 7479 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.6800728440284729, + "learning_rate": 0.00016189737397918653, + "loss": 1.5202, + "step": 7480 + }, + { + "epoch": 0.7891350210970464, + "grad_norm": 0.5829452276229858, + "learning_rate": 0.00016174160235578, + "loss": 1.5269, + "step": 7481 + }, + { + "epoch": 0.7892405063291139, + "grad_norm": 0.6467936635017395, + "learning_rate": 0.00016158589664966053, + "loss": 1.5544, + "step": 7482 + }, + { + "epoch": 0.7893459915611815, + "grad_norm": 0.6182694435119629, + "learning_rate": 0.00016143025687827538, + "loss": 1.5168, + "step": 7483 + }, + { + "epoch": 0.7894514767932489, + "grad_norm": 0.6066710352897644, + "learning_rate": 0.0001612746830590649, + "loss": 1.5379, + "step": 7484 + }, + { + "epoch": 0.7895569620253164, + "grad_norm": 0.6463712453842163, + "learning_rate": 0.00016111917520946175, + "loss": 1.5369, + "step": 7485 + }, + { + "epoch": 0.789662447257384, + "grad_norm": 0.6311846375465393, + "learning_rate": 0.00016096373334689154, + "loss": 1.4946, + "step": 7486 + }, + { + "epoch": 0.7897679324894514, + "grad_norm": 0.5793089270591736, + "learning_rate": 0.00016080835748877214, + "loss": 1.5095, + "step": 7487 + }, + { + "epoch": 0.789873417721519, + "grad_norm": 0.6688909530639648, + "learning_rate": 0.00016065304765251423, + "loss": 1.5254, + "step": 7488 + }, + { + "epoch": 0.7899789029535865, + "grad_norm": 0.5888822674751282, + "learning_rate": 0.00016049780385552113, + "loss": 1.5241, + "step": 7489 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.5816655158996582, + "learning_rate": 0.0001603426261151884, + "loss": 1.5165, + "step": 7490 + }, + { + "epoch": 0.7901898734177215, + "grad_norm": 0.6196232438087463, + "learning_rate": 0.000160187514448905, + "loss": 1.4968, + "step": 7491 + }, + { + "epoch": 0.7902953586497891, + "grad_norm": 0.5929698348045349, + "learning_rate": 0.0001600324688740516, + "loss": 1.5336, + "step": 7492 + }, + { + "epoch": 0.7904008438818565, + "grad_norm": 0.5790019035339355, + "learning_rate": 0.00015987748940800186, + "loss": 1.5334, + "step": 7493 + }, + { + "epoch": 0.790506329113924, + "grad_norm": 0.5929949879646301, + "learning_rate": 0.0001597225760681221, + "loss": 1.5111, + "step": 7494 + }, + { + "epoch": 0.7906118143459916, + "grad_norm": 0.5902900099754333, + "learning_rate": 0.00015956772887177115, + "loss": 1.5568, + "step": 7495 + }, + { + "epoch": 0.790717299578059, + "grad_norm": 0.5872161388397217, + "learning_rate": 0.00015941294783630022, + "loss": 1.4765, + "step": 7496 + }, + { + "epoch": 0.7908227848101266, + "grad_norm": 0.5604379177093506, + "learning_rate": 0.00015925823297905346, + "loss": 1.5003, + "step": 7497 + }, + { + "epoch": 0.7909282700421941, + "grad_norm": 0.5935449004173279, + "learning_rate": 0.00015910358431736745, + "loss": 1.5169, + "step": 7498 + }, + { + "epoch": 0.7910337552742616, + "grad_norm": 0.65030437707901, + "learning_rate": 0.00015894900186857105, + "loss": 1.4914, + "step": 7499 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.5773783326148987, + "learning_rate": 0.00015879448564998648, + "loss": 1.5523, + "step": 7500 + }, + { + "epoch": 0.7912447257383967, + "grad_norm": 0.6934990882873535, + "learning_rate": 0.00015864003567892776, + "loss": 1.5655, + "step": 7501 + }, + { + "epoch": 0.7913502109704641, + "grad_norm": 0.5922262072563171, + "learning_rate": 0.00015848565197270175, + "loss": 1.5231, + "step": 7502 + }, + { + "epoch": 0.7914556962025316, + "grad_norm": 0.606638491153717, + "learning_rate": 0.00015833133454860814, + "loss": 1.5228, + "step": 7503 + }, + { + "epoch": 0.7915611814345992, + "grad_norm": 0.6172564625740051, + "learning_rate": 0.00015817708342393878, + "loss": 1.5697, + "step": 7504 + }, + { + "epoch": 0.7916666666666666, + "grad_norm": 0.6253176331520081, + "learning_rate": 0.0001580228986159783, + "loss": 1.5046, + "step": 7505 + }, + { + "epoch": 0.7917721518987342, + "grad_norm": 0.7459272146224976, + "learning_rate": 0.00015786878014200387, + "loss": 1.5344, + "step": 7506 + }, + { + "epoch": 0.7918776371308017, + "grad_norm": 0.6533315777778625, + "learning_rate": 0.0001577147280192851, + "loss": 1.5497, + "step": 7507 + }, + { + "epoch": 0.7919831223628692, + "grad_norm": 0.8287122845649719, + "learning_rate": 0.0001575607422650846, + "loss": 1.4938, + "step": 7508 + }, + { + "epoch": 0.7920886075949367, + "grad_norm": 0.6214670538902283, + "learning_rate": 0.00015740682289665714, + "loss": 1.4672, + "step": 7509 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.5860722064971924, + "learning_rate": 0.0001572529699312501, + "loss": 1.5102, + "step": 7510 + }, + { + "epoch": 0.7922995780590717, + "grad_norm": 0.6556915044784546, + "learning_rate": 0.0001570991833861035, + "loss": 1.5012, + "step": 7511 + }, + { + "epoch": 0.7924050632911392, + "grad_norm": 0.7367563247680664, + "learning_rate": 0.00015694546327844986, + "loss": 1.504, + "step": 7512 + }, + { + "epoch": 0.7925105485232068, + "grad_norm": 0.5837484002113342, + "learning_rate": 0.00015679180962551435, + "loss": 1.502, + "step": 7513 + }, + { + "epoch": 0.7926160337552742, + "grad_norm": 0.6720466017723083, + "learning_rate": 0.00015663822244451446, + "loss": 1.5222, + "step": 7514 + }, + { + "epoch": 0.7927215189873418, + "grad_norm": 0.6823978424072266, + "learning_rate": 0.00015648470175266057, + "loss": 1.4793, + "step": 7515 + }, + { + "epoch": 0.7928270042194093, + "grad_norm": 0.587338387966156, + "learning_rate": 0.00015633124756715523, + "loss": 1.5531, + "step": 7516 + }, + { + "epoch": 0.7929324894514768, + "grad_norm": 0.6100544929504395, + "learning_rate": 0.00015617785990519403, + "loss": 1.4923, + "step": 7517 + }, + { + "epoch": 0.7930379746835443, + "grad_norm": 0.7038679718971252, + "learning_rate": 0.00015602453878396479, + "loss": 1.5229, + "step": 7518 + }, + { + "epoch": 0.7931434599156119, + "grad_norm": 0.6222920417785645, + "learning_rate": 0.0001558712842206477, + "loss": 1.5182, + "step": 7519 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.7112630009651184, + "learning_rate": 0.0001557180962324158, + "loss": 1.4964, + "step": 7520 + }, + { + "epoch": 0.7933544303797468, + "grad_norm": 0.5773868560791016, + "learning_rate": 0.00015556497483643466, + "loss": 1.4901, + "step": 7521 + }, + { + "epoch": 0.7934599156118144, + "grad_norm": 0.6436287760734558, + "learning_rate": 0.00015541192004986222, + "loss": 1.5128, + "step": 7522 + }, + { + "epoch": 0.7935654008438818, + "grad_norm": 0.6245293021202087, + "learning_rate": 0.00015525893188984898, + "loss": 1.5019, + "step": 7523 + }, + { + "epoch": 0.7936708860759494, + "grad_norm": 0.665753185749054, + "learning_rate": 0.00015510601037353804, + "loss": 1.5172, + "step": 7524 + }, + { + "epoch": 0.7937763713080169, + "grad_norm": 0.5403124094009399, + "learning_rate": 0.00015495315551806486, + "loss": 1.5353, + "step": 7525 + }, + { + "epoch": 0.7938818565400844, + "grad_norm": 0.5965964198112488, + "learning_rate": 0.000154800367340558, + "loss": 1.4964, + "step": 7526 + }, + { + "epoch": 0.7939873417721519, + "grad_norm": 0.6232386827468872, + "learning_rate": 0.00015464764585813783, + "loss": 1.5143, + "step": 7527 + }, + { + "epoch": 0.7940928270042195, + "grad_norm": 0.5895153284072876, + "learning_rate": 0.0001544949910879177, + "loss": 1.5063, + "step": 7528 + }, + { + "epoch": 0.7941983122362869, + "grad_norm": 0.5861321091651917, + "learning_rate": 0.00015434240304700332, + "loss": 1.516, + "step": 7529 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.5695503354072571, + "learning_rate": 0.00015418988175249282, + "loss": 1.5118, + "step": 7530 + }, + { + "epoch": 0.794409282700422, + "grad_norm": 0.5761889815330505, + "learning_rate": 0.00015403742722147707, + "loss": 1.5271, + "step": 7531 + }, + { + "epoch": 0.7945147679324894, + "grad_norm": 0.5764886140823364, + "learning_rate": 0.00015388503947103937, + "loss": 1.5038, + "step": 7532 + }, + { + "epoch": 0.794620253164557, + "grad_norm": 0.658724844455719, + "learning_rate": 0.00015373271851825527, + "loss": 1.5283, + "step": 7533 + }, + { + "epoch": 0.7947257383966245, + "grad_norm": 0.5554746389389038, + "learning_rate": 0.00015358046438019356, + "loss": 1.536, + "step": 7534 + }, + { + "epoch": 0.794831223628692, + "grad_norm": 0.6070231199264526, + "learning_rate": 0.00015342827707391475, + "loss": 1.475, + "step": 7535 + }, + { + "epoch": 0.7949367088607595, + "grad_norm": 0.5909380912780762, + "learning_rate": 0.0001532761566164723, + "loss": 1.481, + "step": 7536 + }, + { + "epoch": 0.7950421940928271, + "grad_norm": 0.6037484407424927, + "learning_rate": 0.0001531241030249121, + "loss": 1.5151, + "step": 7537 + }, + { + "epoch": 0.7951476793248945, + "grad_norm": 0.6074955463409424, + "learning_rate": 0.00015297211631627234, + "loss": 1.5066, + "step": 7538 + }, + { + "epoch": 0.795253164556962, + "grad_norm": 0.6241724491119385, + "learning_rate": 0.0001528201965075841, + "loss": 1.4915, + "step": 7539 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.6203395128250122, + "learning_rate": 0.00015266834361587063, + "loss": 1.5206, + "step": 7540 + }, + { + "epoch": 0.795464135021097, + "grad_norm": 0.653510570526123, + "learning_rate": 0.00015251655765814777, + "loss": 1.4691, + "step": 7541 + }, + { + "epoch": 0.7955696202531646, + "grad_norm": 0.6151084899902344, + "learning_rate": 0.000152364838651424, + "loss": 1.4997, + "step": 7542 + }, + { + "epoch": 0.795675105485232, + "grad_norm": 0.6857190132141113, + "learning_rate": 0.00015221318661269985, + "loss": 1.5095, + "step": 7543 + }, + { + "epoch": 0.7957805907172996, + "grad_norm": 0.5800842642784119, + "learning_rate": 0.00015206160155896924, + "loss": 1.5107, + "step": 7544 + }, + { + "epoch": 0.7958860759493671, + "grad_norm": 0.655845582485199, + "learning_rate": 0.00015191008350721772, + "loss": 1.526, + "step": 7545 + }, + { + "epoch": 0.7959915611814345, + "grad_norm": 0.6090908646583557, + "learning_rate": 0.00015175863247442374, + "loss": 1.5358, + "step": 7546 + }, + { + "epoch": 0.7960970464135021, + "grad_norm": 0.6238208413124084, + "learning_rate": 0.00015160724847755806, + "loss": 1.4884, + "step": 7547 + }, + { + "epoch": 0.7962025316455696, + "grad_norm": 0.5564160943031311, + "learning_rate": 0.00015145593153358412, + "loss": 1.4946, + "step": 7548 + }, + { + "epoch": 0.7963080168776371, + "grad_norm": 0.5528047680854797, + "learning_rate": 0.0001513046816594575, + "loss": 1.5096, + "step": 7549 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.6494990587234497, + "learning_rate": 0.00015115349887212678, + "loss": 1.5137, + "step": 7550 + }, + { + "epoch": 0.7965189873417722, + "grad_norm": 0.5935213565826416, + "learning_rate": 0.00015100238318853262, + "loss": 1.4963, + "step": 7551 + }, + { + "epoch": 0.7966244725738396, + "grad_norm": 0.5852930545806885, + "learning_rate": 0.00015085133462560833, + "loss": 1.5438, + "step": 7552 + }, + { + "epoch": 0.7967299578059072, + "grad_norm": 0.6201268434524536, + "learning_rate": 0.00015070035320027933, + "loss": 1.5383, + "step": 7553 + }, + { + "epoch": 0.7968354430379747, + "grad_norm": 0.6180981397628784, + "learning_rate": 0.00015054943892946446, + "loss": 1.483, + "step": 7554 + }, + { + "epoch": 0.7969409282700421, + "grad_norm": 0.5934417843818665, + "learning_rate": 0.000150398591830074, + "loss": 1.445, + "step": 7555 + }, + { + "epoch": 0.7970464135021097, + "grad_norm": 0.6596316695213318, + "learning_rate": 0.00015024781191901122, + "loss": 1.5629, + "step": 7556 + }, + { + "epoch": 0.7971518987341772, + "grad_norm": 0.6267884373664856, + "learning_rate": 0.00015009709921317172, + "loss": 1.5053, + "step": 7557 + }, + { + "epoch": 0.7972573839662447, + "grad_norm": 0.7077874541282654, + "learning_rate": 0.00014994645372944367, + "loss": 1.4862, + "step": 7558 + }, + { + "epoch": 0.7973628691983122, + "grad_norm": 0.6662502288818359, + "learning_rate": 0.0001497958754847076, + "loss": 1.4994, + "step": 7559 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.6557727456092834, + "learning_rate": 0.00014964536449583657, + "loss": 1.5125, + "step": 7560 + }, + { + "epoch": 0.7975738396624472, + "grad_norm": 0.6419434547424316, + "learning_rate": 0.0001494949207796961, + "loss": 1.5107, + "step": 7561 + }, + { + "epoch": 0.7976793248945148, + "grad_norm": 0.7175958752632141, + "learning_rate": 0.00014934454435314417, + "loss": 1.5494, + "step": 7562 + }, + { + "epoch": 0.7977848101265823, + "grad_norm": 0.8862952589988708, + "learning_rate": 0.00014919423523303095, + "loss": 1.5139, + "step": 7563 + }, + { + "epoch": 0.7978902953586497, + "grad_norm": 0.5757266283035278, + "learning_rate": 0.00014904399343619972, + "loss": 1.522, + "step": 7564 + }, + { + "epoch": 0.7979957805907173, + "grad_norm": 0.7583336234092712, + "learning_rate": 0.00014889381897948575, + "loss": 1.5318, + "step": 7565 + }, + { + "epoch": 0.7981012658227848, + "grad_norm": 0.7068955898284912, + "learning_rate": 0.00014874371187971672, + "loss": 1.4888, + "step": 7566 + }, + { + "epoch": 0.7982067510548523, + "grad_norm": 0.6330370903015137, + "learning_rate": 0.00014859367215371293, + "loss": 1.5427, + "step": 7567 + }, + { + "epoch": 0.7983122362869198, + "grad_norm": 0.6811701655387878, + "learning_rate": 0.00014844369981828698, + "loss": 1.5247, + "step": 7568 + }, + { + "epoch": 0.7984177215189874, + "grad_norm": 0.8079935312271118, + "learning_rate": 0.00014829379489024415, + "loss": 1.5387, + "step": 7569 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.6310039162635803, + "learning_rate": 0.00014814395738638195, + "loss": 1.5075, + "step": 7570 + }, + { + "epoch": 0.7986286919831224, + "grad_norm": 0.5752525925636292, + "learning_rate": 0.0001479941873234905, + "loss": 1.4786, + "step": 7571 + }, + { + "epoch": 0.7987341772151899, + "grad_norm": 0.6469733715057373, + "learning_rate": 0.00014784448471835224, + "loss": 1.5251, + "step": 7572 + }, + { + "epoch": 0.7988396624472573, + "grad_norm": 0.6497083306312561, + "learning_rate": 0.0001476948495877418, + "loss": 1.5164, + "step": 7573 + }, + { + "epoch": 0.7989451476793249, + "grad_norm": 0.5865034461021423, + "learning_rate": 0.00014754528194842707, + "loss": 1.5294, + "step": 7574 + }, + { + "epoch": 0.7990506329113924, + "grad_norm": 0.614106297492981, + "learning_rate": 0.00014739578181716765, + "loss": 1.4965, + "step": 7575 + }, + { + "epoch": 0.7991561181434599, + "grad_norm": 0.6522930860519409, + "learning_rate": 0.00014724634921071573, + "loss": 1.5159, + "step": 7576 + }, + { + "epoch": 0.7992616033755274, + "grad_norm": 0.5491697192192078, + "learning_rate": 0.0001470969841458159, + "loss": 1.5261, + "step": 7577 + }, + { + "epoch": 0.799367088607595, + "grad_norm": 0.6279969215393066, + "learning_rate": 0.00014694768663920537, + "loss": 1.5251, + "step": 7578 + }, + { + "epoch": 0.7994725738396624, + "grad_norm": 0.5893758535385132, + "learning_rate": 0.0001467984567076137, + "loss": 1.5494, + "step": 7579 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.5965490341186523, + "learning_rate": 0.00014664929436776278, + "loss": 1.5058, + "step": 7580 + }, + { + "epoch": 0.7996835443037975, + "grad_norm": 0.5984877943992615, + "learning_rate": 0.00014650019963636696, + "loss": 1.4893, + "step": 7581 + }, + { + "epoch": 0.799789029535865, + "grad_norm": 0.5592886209487915, + "learning_rate": 0.0001463511725301331, + "loss": 1.5298, + "step": 7582 + }, + { + "epoch": 0.7998945147679325, + "grad_norm": 0.5852375626564026, + "learning_rate": 0.00014620221306576027, + "loss": 1.535, + "step": 7583 + }, + { + "epoch": 0.8, + "grad_norm": 0.5590638518333435, + "learning_rate": 0.00014605332125994038, + "loss": 1.4985, + "step": 7584 + }, + { + "epoch": 0.8001054852320675, + "grad_norm": 0.5655669569969177, + "learning_rate": 0.0001459044971293575, + "loss": 1.5194, + "step": 7585 + }, + { + "epoch": 0.800210970464135, + "grad_norm": 0.5813979506492615, + "learning_rate": 0.000145755740690688, + "loss": 1.4949, + "step": 7586 + }, + { + "epoch": 0.8003164556962026, + "grad_norm": 0.5935661792755127, + "learning_rate": 0.00014560705196060074, + "loss": 1.4938, + "step": 7587 + }, + { + "epoch": 0.80042194092827, + "grad_norm": 0.5987158417701721, + "learning_rate": 0.00014545843095575709, + "loss": 1.5171, + "step": 7588 + }, + { + "epoch": 0.8005274261603376, + "grad_norm": 0.5738258957862854, + "learning_rate": 0.00014530987769281075, + "loss": 1.5158, + "step": 7589 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.5448023080825806, + "learning_rate": 0.00014516139218840788, + "loss": 1.4757, + "step": 7590 + }, + { + "epoch": 0.8007383966244725, + "grad_norm": 0.5975995063781738, + "learning_rate": 0.00014501297445918703, + "loss": 1.5123, + "step": 7591 + }, + { + "epoch": 0.8008438818565401, + "grad_norm": 0.6222701668739319, + "learning_rate": 0.00014486462452177896, + "loss": 1.5308, + "step": 7592 + }, + { + "epoch": 0.8009493670886076, + "grad_norm": 0.5992230772972107, + "learning_rate": 0.0001447163423928073, + "loss": 1.544, + "step": 7593 + }, + { + "epoch": 0.8010548523206751, + "grad_norm": 0.5924118757247925, + "learning_rate": 0.00014456812808888775, + "loss": 1.4955, + "step": 7594 + }, + { + "epoch": 0.8011603375527426, + "grad_norm": 0.583116352558136, + "learning_rate": 0.00014441998162662847, + "loss": 1.5671, + "step": 7595 + }, + { + "epoch": 0.8012658227848102, + "grad_norm": 0.6113268733024597, + "learning_rate": 0.00014427190302262989, + "loss": 1.4916, + "step": 7596 + }, + { + "epoch": 0.8013713080168776, + "grad_norm": 0.602851927280426, + "learning_rate": 0.00014412389229348494, + "loss": 1.4643, + "step": 7597 + }, + { + "epoch": 0.8014767932489452, + "grad_norm": 0.5532145500183105, + "learning_rate": 0.00014397594945577912, + "loss": 1.4901, + "step": 7598 + }, + { + "epoch": 0.8015822784810127, + "grad_norm": 0.5939487218856812, + "learning_rate": 0.00014382807452609003, + "loss": 1.5147, + "step": 7599 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.6535288095474243, + "learning_rate": 0.00014368026752098782, + "loss": 1.4934, + "step": 7600 + }, + { + "epoch": 0.8017932489451477, + "grad_norm": 0.614974319934845, + "learning_rate": 0.00014353252845703506, + "loss": 1.5476, + "step": 7601 + }, + { + "epoch": 0.8018987341772152, + "grad_norm": 0.5930627584457397, + "learning_rate": 0.00014338485735078632, + "loss": 1.504, + "step": 7602 + }, + { + "epoch": 0.8020042194092827, + "grad_norm": 0.5995104312896729, + "learning_rate": 0.0001432372542187895, + "loss": 1.565, + "step": 7603 + }, + { + "epoch": 0.8021097046413502, + "grad_norm": 0.5823193788528442, + "learning_rate": 0.00014308971907758383, + "loss": 1.5391, + "step": 7604 + }, + { + "epoch": 0.8022151898734177, + "grad_norm": 0.5638706684112549, + "learning_rate": 0.00014294225194370154, + "loss": 1.5068, + "step": 7605 + }, + { + "epoch": 0.8023206751054852, + "grad_norm": 0.5956803560256958, + "learning_rate": 0.00014279485283366696, + "loss": 1.525, + "step": 7606 + }, + { + "epoch": 0.8024261603375528, + "grad_norm": 0.6027578115463257, + "learning_rate": 0.00014264752176399687, + "loss": 1.4938, + "step": 7607 + }, + { + "epoch": 0.8025316455696202, + "grad_norm": 0.6517495512962341, + "learning_rate": 0.0001425002587512005, + "loss": 1.4869, + "step": 7608 + }, + { + "epoch": 0.8026371308016877, + "grad_norm": 0.6496115922927856, + "learning_rate": 0.00014235306381177952, + "loss": 1.5317, + "step": 7609 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.7217275500297546, + "learning_rate": 0.00014220593696222768, + "loss": 1.5423, + "step": 7610 + }, + { + "epoch": 0.8028481012658227, + "grad_norm": 0.559851884841919, + "learning_rate": 0.00014205887821903105, + "loss": 1.4755, + "step": 7611 + }, + { + "epoch": 0.8029535864978903, + "grad_norm": 0.5996726751327515, + "learning_rate": 0.00014191188759866887, + "loss": 1.5259, + "step": 7612 + }, + { + "epoch": 0.8030590717299578, + "grad_norm": 0.5879226922988892, + "learning_rate": 0.00014176496511761192, + "loss": 1.4858, + "step": 7613 + }, + { + "epoch": 0.8031645569620253, + "grad_norm": 0.5556496977806091, + "learning_rate": 0.0001416181107923235, + "loss": 1.4849, + "step": 7614 + }, + { + "epoch": 0.8032700421940928, + "grad_norm": 0.578754186630249, + "learning_rate": 0.0001414713246392594, + "loss": 1.5102, + "step": 7615 + }, + { + "epoch": 0.8033755274261604, + "grad_norm": 0.5950930714607239, + "learning_rate": 0.0001413246066748678, + "loss": 1.5049, + "step": 7616 + }, + { + "epoch": 0.8034810126582278, + "grad_norm": 0.5877275466918945, + "learning_rate": 0.00014117795691558915, + "loss": 1.4793, + "step": 7617 + }, + { + "epoch": 0.8035864978902953, + "grad_norm": 0.5350720286369324, + "learning_rate": 0.00014103137537785633, + "loss": 1.4887, + "step": 7618 + }, + { + "epoch": 0.8036919831223629, + "grad_norm": 0.7094367742538452, + "learning_rate": 0.00014088486207809449, + "loss": 1.5124, + "step": 7619 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.5622050166130066, + "learning_rate": 0.00014073841703272092, + "loss": 1.488, + "step": 7620 + }, + { + "epoch": 0.8039029535864979, + "grad_norm": 0.6530174016952515, + "learning_rate": 0.00014059204025814603, + "loss": 1.5755, + "step": 7621 + }, + { + "epoch": 0.8040084388185654, + "grad_norm": 0.6323561668395996, + "learning_rate": 0.0001404457317707718, + "loss": 1.4772, + "step": 7622 + }, + { + "epoch": 0.8041139240506329, + "grad_norm": 0.7959002256393433, + "learning_rate": 0.00014029949158699285, + "loss": 1.5001, + "step": 7623 + }, + { + "epoch": 0.8042194092827004, + "grad_norm": 0.60673987865448, + "learning_rate": 0.00014015331972319606, + "loss": 1.5291, + "step": 7624 + }, + { + "epoch": 0.804324894514768, + "grad_norm": 0.6851693987846375, + "learning_rate": 0.00014000721619576077, + "loss": 1.5086, + "step": 7625 + }, + { + "epoch": 0.8044303797468354, + "grad_norm": 0.6344135999679565, + "learning_rate": 0.0001398611810210586, + "loss": 1.5023, + "step": 7626 + }, + { + "epoch": 0.804535864978903, + "grad_norm": 0.5836651921272278, + "learning_rate": 0.0001397152142154536, + "loss": 1.5131, + "step": 7627 + }, + { + "epoch": 0.8046413502109705, + "grad_norm": 0.6052422523498535, + "learning_rate": 0.00013956931579530194, + "loss": 1.5206, + "step": 7628 + }, + { + "epoch": 0.8047468354430379, + "grad_norm": 0.8590986728668213, + "learning_rate": 0.0001394234857769521, + "loss": 1.5315, + "step": 7629 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.5820575952529907, + "learning_rate": 0.00013927772417674558, + "loss": 1.5421, + "step": 7630 + }, + { + "epoch": 0.804957805907173, + "grad_norm": 0.6541255712509155, + "learning_rate": 0.00013913203101101532, + "loss": 1.5482, + "step": 7631 + }, + { + "epoch": 0.8050632911392405, + "grad_norm": 0.6894555687904358, + "learning_rate": 0.0001389864062960871, + "loss": 1.4907, + "step": 7632 + }, + { + "epoch": 0.805168776371308, + "grad_norm": 0.6666078567504883, + "learning_rate": 0.00013884085004827883, + "loss": 1.5104, + "step": 7633 + }, + { + "epoch": 0.8052742616033756, + "grad_norm": 0.6918875575065613, + "learning_rate": 0.0001386953622839008, + "loss": 1.5335, + "step": 7634 + }, + { + "epoch": 0.805379746835443, + "grad_norm": 0.8455651998519897, + "learning_rate": 0.0001385499430192557, + "loss": 1.5459, + "step": 7635 + }, + { + "epoch": 0.8054852320675105, + "grad_norm": 0.6137141585350037, + "learning_rate": 0.00013840459227063842, + "loss": 1.4884, + "step": 7636 + }, + { + "epoch": 0.8055907172995781, + "grad_norm": 0.5261053442955017, + "learning_rate": 0.00013825931005433605, + "loss": 1.486, + "step": 7637 + }, + { + "epoch": 0.8056962025316455, + "grad_norm": 0.635147213935852, + "learning_rate": 0.00013811409638662858, + "loss": 1.4999, + "step": 7638 + }, + { + "epoch": 0.8058016877637131, + "grad_norm": 0.6387996077537537, + "learning_rate": 0.0001379689512837878, + "loss": 1.5172, + "step": 7639 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.5871735215187073, + "learning_rate": 0.00013782387476207788, + "loss": 1.4901, + "step": 7640 + }, + { + "epoch": 0.8060126582278481, + "grad_norm": 0.6232497096061707, + "learning_rate": 0.0001376788668377554, + "loss": 1.5271, + "step": 7641 + }, + { + "epoch": 0.8061181434599156, + "grad_norm": 0.583361804485321, + "learning_rate": 0.0001375339275270692, + "loss": 1.5497, + "step": 7642 + }, + { + "epoch": 0.8062236286919832, + "grad_norm": 0.5702431201934814, + "learning_rate": 0.00013738905684626044, + "loss": 1.4873, + "step": 7643 + }, + { + "epoch": 0.8063291139240506, + "grad_norm": 0.5493018627166748, + "learning_rate": 0.00013724425481156263, + "loss": 1.5371, + "step": 7644 + }, + { + "epoch": 0.8064345991561181, + "grad_norm": 0.6020561456680298, + "learning_rate": 0.00013709952143920148, + "loss": 1.5279, + "step": 7645 + }, + { + "epoch": 0.8065400843881857, + "grad_norm": 0.5938580632209778, + "learning_rate": 0.000136954856745395, + "loss": 1.56, + "step": 7646 + }, + { + "epoch": 0.8066455696202531, + "grad_norm": 0.5702493190765381, + "learning_rate": 0.000136810260746354, + "loss": 1.5099, + "step": 7647 + }, + { + "epoch": 0.8067510548523207, + "grad_norm": 0.6198325157165527, + "learning_rate": 0.00013666573345828083, + "loss": 1.4915, + "step": 7648 + }, + { + "epoch": 0.8068565400843882, + "grad_norm": 0.6261126399040222, + "learning_rate": 0.00013652127489737067, + "loss": 1.5166, + "step": 7649 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.5949349403381348, + "learning_rate": 0.00013637688507981064, + "loss": 1.552, + "step": 7650 + }, + { + "epoch": 0.8070675105485232, + "grad_norm": 0.6093190312385559, + "learning_rate": 0.0001362325640217805, + "loss": 1.5142, + "step": 7651 + }, + { + "epoch": 0.8071729957805908, + "grad_norm": 0.5631209015846252, + "learning_rate": 0.00013608831173945207, + "loss": 1.4415, + "step": 7652 + }, + { + "epoch": 0.8072784810126582, + "grad_norm": 0.6251153945922852, + "learning_rate": 0.0001359441282489895, + "loss": 1.5127, + "step": 7653 + }, + { + "epoch": 0.8073839662447257, + "grad_norm": 0.6278437972068787, + "learning_rate": 0.0001358000135665494, + "loss": 1.5438, + "step": 7654 + }, + { + "epoch": 0.8074894514767933, + "grad_norm": 0.5701504945755005, + "learning_rate": 0.00013565596770828025, + "loss": 1.4796, + "step": 7655 + }, + { + "epoch": 0.8075949367088607, + "grad_norm": 0.6196970343589783, + "learning_rate": 0.00013551199069032348, + "loss": 1.4866, + "step": 7656 + }, + { + "epoch": 0.8077004219409283, + "grad_norm": 0.5642666220664978, + "learning_rate": 0.0001353680825288123, + "loss": 1.5417, + "step": 7657 + }, + { + "epoch": 0.8078059071729958, + "grad_norm": 0.5809318423271179, + "learning_rate": 0.0001352242432398723, + "loss": 1.5234, + "step": 7658 + }, + { + "epoch": 0.8079113924050633, + "grad_norm": 0.6183022260665894, + "learning_rate": 0.00013508047283962137, + "loss": 1.5028, + "step": 7659 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.5754873752593994, + "learning_rate": 0.0001349367713441697, + "loss": 1.4813, + "step": 7660 + }, + { + "epoch": 0.8081223628691984, + "grad_norm": 0.5831127762794495, + "learning_rate": 0.0001347931387696198, + "loss": 1.4849, + "step": 7661 + }, + { + "epoch": 0.8082278481012658, + "grad_norm": 0.630499541759491, + "learning_rate": 0.0001346495751320664, + "loss": 1.5215, + "step": 7662 + }, + { + "epoch": 0.8083333333333333, + "grad_norm": 0.6178796291351318, + "learning_rate": 0.00013450608044759634, + "loss": 1.5354, + "step": 7663 + }, + { + "epoch": 0.8084388185654009, + "grad_norm": 0.6334956288337708, + "learning_rate": 0.00013436265473228926, + "loss": 1.5093, + "step": 7664 + }, + { + "epoch": 0.8085443037974683, + "grad_norm": 0.5919117331504822, + "learning_rate": 0.0001342192980022166, + "loss": 1.5261, + "step": 7665 + }, + { + "epoch": 0.8086497890295359, + "grad_norm": 0.65104079246521, + "learning_rate": 0.00013407601027344213, + "loss": 1.4663, + "step": 7666 + }, + { + "epoch": 0.8087552742616034, + "grad_norm": 0.5715122818946838, + "learning_rate": 0.00013393279156202197, + "loss": 1.5003, + "step": 7667 + }, + { + "epoch": 0.8088607594936709, + "grad_norm": 0.6131259202957153, + "learning_rate": 0.00013378964188400457, + "loss": 1.5447, + "step": 7668 + }, + { + "epoch": 0.8089662447257384, + "grad_norm": 0.5738056898117065, + "learning_rate": 0.00013364656125543044, + "loss": 1.5083, + "step": 7669 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.5649034976959229, + "learning_rate": 0.0001335035496923326, + "loss": 1.5151, + "step": 7670 + }, + { + "epoch": 0.8091772151898734, + "grad_norm": 0.6261987090110779, + "learning_rate": 0.00013336060721073608, + "loss": 1.5262, + "step": 7671 + }, + { + "epoch": 0.809282700421941, + "grad_norm": 0.6244848966598511, + "learning_rate": 0.00013321773382665822, + "loss": 1.5554, + "step": 7672 + }, + { + "epoch": 0.8093881856540084, + "grad_norm": 0.6000209450721741, + "learning_rate": 0.00013307492955610896, + "loss": 1.551, + "step": 7673 + }, + { + "epoch": 0.8094936708860759, + "grad_norm": 0.5864971280097961, + "learning_rate": 0.0001329321944150902, + "loss": 1.5339, + "step": 7674 + }, + { + "epoch": 0.8095991561181435, + "grad_norm": 0.59794682264328, + "learning_rate": 0.000132789528419596, + "loss": 1.4792, + "step": 7675 + }, + { + "epoch": 0.8097046413502109, + "grad_norm": 0.5811276435852051, + "learning_rate": 0.0001326469315856128, + "loss": 1.5177, + "step": 7676 + }, + { + "epoch": 0.8098101265822785, + "grad_norm": 0.5950625538825989, + "learning_rate": 0.00013250440392911927, + "loss": 1.5011, + "step": 7677 + }, + { + "epoch": 0.809915611814346, + "grad_norm": 0.5705804824829102, + "learning_rate": 0.00013236194546608645, + "loss": 1.5533, + "step": 7678 + }, + { + "epoch": 0.8100210970464135, + "grad_norm": 0.6349958777427673, + "learning_rate": 0.00013221955621247749, + "loss": 1.4968, + "step": 7679 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.6409980654716492, + "learning_rate": 0.0001320772361842478, + "loss": 1.5558, + "step": 7680 + }, + { + "epoch": 0.8102320675105485, + "grad_norm": 0.5692750811576843, + "learning_rate": 0.00013193498539734478, + "loss": 1.5086, + "step": 7681 + }, + { + "epoch": 0.810337552742616, + "grad_norm": 0.6760788559913635, + "learning_rate": 0.00013179280386770885, + "loss": 1.507, + "step": 7682 + }, + { + "epoch": 0.8104430379746835, + "grad_norm": 0.5934817790985107, + "learning_rate": 0.00013165069161127183, + "loss": 1.4863, + "step": 7683 + }, + { + "epoch": 0.8105485232067511, + "grad_norm": 0.6613250970840454, + "learning_rate": 0.00013150864864395825, + "loss": 1.489, + "step": 7684 + }, + { + "epoch": 0.8106540084388185, + "grad_norm": 0.5972319841384888, + "learning_rate": 0.00013136667498168464, + "loss": 1.5309, + "step": 7685 + }, + { + "epoch": 0.8107594936708861, + "grad_norm": 0.6338675022125244, + "learning_rate": 0.00013122477064035992, + "loss": 1.5197, + "step": 7686 + }, + { + "epoch": 0.8108649789029536, + "grad_norm": 0.632262647151947, + "learning_rate": 0.00013108293563588504, + "loss": 1.5077, + "step": 7687 + }, + { + "epoch": 0.810970464135021, + "grad_norm": 0.6016536355018616, + "learning_rate": 0.00013094116998415358, + "loss": 1.4813, + "step": 7688 + }, + { + "epoch": 0.8110759493670886, + "grad_norm": 0.6188677549362183, + "learning_rate": 0.00013079947370105057, + "loss": 1.4953, + "step": 7689 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.6123583912849426, + "learning_rate": 0.00013065784680245442, + "loss": 1.4615, + "step": 7690 + }, + { + "epoch": 0.8112869198312236, + "grad_norm": 0.6253855228424072, + "learning_rate": 0.00013051628930423485, + "loss": 1.5146, + "step": 7691 + }, + { + "epoch": 0.8113924050632911, + "grad_norm": 0.5471342206001282, + "learning_rate": 0.00013037480122225412, + "loss": 1.5303, + "step": 7692 + }, + { + "epoch": 0.8114978902953587, + "grad_norm": 0.5452311635017395, + "learning_rate": 0.00013023338257236655, + "loss": 1.5033, + "step": 7693 + }, + { + "epoch": 0.8116033755274261, + "grad_norm": 0.6127024292945862, + "learning_rate": 0.00013009203337041898, + "loss": 1.4929, + "step": 7694 + }, + { + "epoch": 0.8117088607594937, + "grad_norm": 0.659613311290741, + "learning_rate": 0.0001299507536322502, + "loss": 1.4937, + "step": 7695 + }, + { + "epoch": 0.8118143459915612, + "grad_norm": 0.5739288926124573, + "learning_rate": 0.00012980954337369133, + "loss": 1.538, + "step": 7696 + }, + { + "epoch": 0.8119198312236287, + "grad_norm": 0.6474050283432007, + "learning_rate": 0.00012966840261056562, + "loss": 1.4961, + "step": 7697 + }, + { + "epoch": 0.8120253164556962, + "grad_norm": 0.6040645837783813, + "learning_rate": 0.0001295273313586885, + "loss": 1.5011, + "step": 7698 + }, + { + "epoch": 0.8121308016877637, + "grad_norm": 0.5588385462760925, + "learning_rate": 0.00012938632963386808, + "loss": 1.5025, + "step": 7699 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.6039049625396729, + "learning_rate": 0.00012924539745190402, + "loss": 1.5299, + "step": 7700 + }, + { + "epoch": 0.8123417721518987, + "grad_norm": 0.6484990119934082, + "learning_rate": 0.0001291045348285885, + "loss": 1.4956, + "step": 7701 + }, + { + "epoch": 0.8124472573839663, + "grad_norm": 0.6716830134391785, + "learning_rate": 0.00012896374177970602, + "loss": 1.5238, + "step": 7702 + }, + { + "epoch": 0.8125527426160337, + "grad_norm": 0.5311853885650635, + "learning_rate": 0.00012882301832103297, + "loss": 1.4717, + "step": 7703 + }, + { + "epoch": 0.8126582278481013, + "grad_norm": 0.5840174555778503, + "learning_rate": 0.0001286823644683382, + "loss": 1.5, + "step": 7704 + }, + { + "epoch": 0.8127637130801688, + "grad_norm": 0.6117832064628601, + "learning_rate": 0.0001285417802373827, + "loss": 1.4992, + "step": 7705 + }, + { + "epoch": 0.8128691983122363, + "grad_norm": 0.6196329593658447, + "learning_rate": 0.00012840126564391961, + "loss": 1.4822, + "step": 7706 + }, + { + "epoch": 0.8129746835443038, + "grad_norm": 0.5885174870491028, + "learning_rate": 0.00012826082070369402, + "loss": 1.5218, + "step": 7707 + }, + { + "epoch": 0.8130801687763713, + "grad_norm": 0.5787088871002197, + "learning_rate": 0.00012812044543244395, + "loss": 1.5065, + "step": 7708 + }, + { + "epoch": 0.8131856540084388, + "grad_norm": 0.5794746279716492, + "learning_rate": 0.00012798013984589894, + "loss": 1.5478, + "step": 7709 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.6903585195541382, + "learning_rate": 0.0001278399039597809, + "loss": 1.5022, + "step": 7710 + }, + { + "epoch": 0.8133966244725739, + "grad_norm": 0.6651071906089783, + "learning_rate": 0.00012769973778980405, + "loss": 1.5357, + "step": 7711 + }, + { + "epoch": 0.8135021097046413, + "grad_norm": 0.5914744138717651, + "learning_rate": 0.00012755964135167464, + "loss": 1.5048, + "step": 7712 + }, + { + "epoch": 0.8136075949367089, + "grad_norm": 0.6073241829872131, + "learning_rate": 0.00012741961466109113, + "loss": 1.4998, + "step": 7713 + }, + { + "epoch": 0.8137130801687764, + "grad_norm": 0.6640692949295044, + "learning_rate": 0.00012727965773374434, + "loss": 1.534, + "step": 7714 + }, + { + "epoch": 0.8138185654008439, + "grad_norm": 0.5706428289413452, + "learning_rate": 0.00012713977058531685, + "loss": 1.5098, + "step": 7715 + }, + { + "epoch": 0.8139240506329114, + "grad_norm": 0.6138387322425842, + "learning_rate": 0.0001269999532314841, + "loss": 1.5235, + "step": 7716 + }, + { + "epoch": 0.814029535864979, + "grad_norm": 0.5877163410186768, + "learning_rate": 0.00012686020568791311, + "loss": 1.5579, + "step": 7717 + }, + { + "epoch": 0.8141350210970464, + "grad_norm": 0.5288856029510498, + "learning_rate": 0.00012672052797026344, + "loss": 1.4632, + "step": 7718 + }, + { + "epoch": 0.8142405063291139, + "grad_norm": 0.6068286299705505, + "learning_rate": 0.00012658092009418652, + "loss": 1.4806, + "step": 7719 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.6054596900939941, + "learning_rate": 0.0001264413820753261, + "loss": 1.5188, + "step": 7720 + }, + { + "epoch": 0.8144514767932489, + "grad_norm": 0.557390570640564, + "learning_rate": 0.0001263019139293182, + "loss": 1.5308, + "step": 7721 + }, + { + "epoch": 0.8145569620253165, + "grad_norm": 0.5584434270858765, + "learning_rate": 0.0001261625156717909, + "loss": 1.5007, + "step": 7722 + }, + { + "epoch": 0.814662447257384, + "grad_norm": 0.610927402973175, + "learning_rate": 0.0001260231873183644, + "loss": 1.5324, + "step": 7723 + }, + { + "epoch": 0.8147679324894515, + "grad_norm": 0.6485070586204529, + "learning_rate": 0.00012588392888465103, + "loss": 1.4905, + "step": 7724 + }, + { + "epoch": 0.814873417721519, + "grad_norm": 0.5935006737709045, + "learning_rate": 0.0001257447403862557, + "loss": 1.56, + "step": 7725 + }, + { + "epoch": 0.8149789029535865, + "grad_norm": 0.586430549621582, + "learning_rate": 0.00012560562183877507, + "loss": 1.5123, + "step": 7726 + }, + { + "epoch": 0.815084388185654, + "grad_norm": 0.5761376023292542, + "learning_rate": 0.00012546657325779805, + "loss": 1.4974, + "step": 7727 + }, + { + "epoch": 0.8151898734177215, + "grad_norm": 0.5995194315910339, + "learning_rate": 0.00012532759465890567, + "loss": 1.4959, + "step": 7728 + }, + { + "epoch": 0.8152953586497891, + "grad_norm": 0.5580874681472778, + "learning_rate": 0.00012518868605767118, + "loss": 1.5073, + "step": 7729 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.643932044506073, + "learning_rate": 0.00012504984746966003, + "loss": 1.5224, + "step": 7730 + }, + { + "epoch": 0.8155063291139241, + "grad_norm": 0.5982974171638489, + "learning_rate": 0.0001249110789104298, + "loss": 1.489, + "step": 7731 + }, + { + "epoch": 0.8156118143459916, + "grad_norm": 0.6128320097923279, + "learning_rate": 0.00012477238039553006, + "loss": 1.5293, + "step": 7732 + }, + { + "epoch": 0.815717299578059, + "grad_norm": 0.5711730122566223, + "learning_rate": 0.00012463375194050267, + "loss": 1.5105, + "step": 7733 + }, + { + "epoch": 0.8158227848101266, + "grad_norm": 0.5707519054412842, + "learning_rate": 0.00012449519356088192, + "loss": 1.4892, + "step": 7734 + }, + { + "epoch": 0.8159282700421941, + "grad_norm": 0.5588021874427795, + "learning_rate": 0.0001243567052721937, + "loss": 1.5084, + "step": 7735 + }, + { + "epoch": 0.8160337552742616, + "grad_norm": 0.5948117971420288, + "learning_rate": 0.00012421828708995649, + "loss": 1.5181, + "step": 7736 + }, + { + "epoch": 0.8161392405063291, + "grad_norm": 0.6240710020065308, + "learning_rate": 0.00012407993902968057, + "loss": 1.4828, + "step": 7737 + }, + { + "epoch": 0.8162447257383966, + "grad_norm": 0.5862104892730713, + "learning_rate": 0.00012394166110686857, + "loss": 1.5247, + "step": 7738 + }, + { + "epoch": 0.8163502109704641, + "grad_norm": 0.5905552506446838, + "learning_rate": 0.0001238034533370153, + "loss": 1.5574, + "step": 7739 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.6229321956634521, + "learning_rate": 0.00012366531573560754, + "loss": 1.534, + "step": 7740 + }, + { + "epoch": 0.8165611814345991, + "grad_norm": 0.6077169179916382, + "learning_rate": 0.00012352724831812424, + "loss": 1.475, + "step": 7741 + }, + { + "epoch": 0.8166666666666667, + "grad_norm": 0.5771961808204651, + "learning_rate": 0.0001233892511000368, + "loss": 1.4777, + "step": 7742 + }, + { + "epoch": 0.8167721518987342, + "grad_norm": 0.6250103116035461, + "learning_rate": 0.00012325132409680829, + "loss": 1.5382, + "step": 7743 + }, + { + "epoch": 0.8168776371308016, + "grad_norm": 0.7914557456970215, + "learning_rate": 0.00012311346732389418, + "loss": 1.5223, + "step": 7744 + }, + { + "epoch": 0.8169831223628692, + "grad_norm": 0.6360073089599609, + "learning_rate": 0.000122975680796742, + "loss": 1.4612, + "step": 7745 + }, + { + "epoch": 0.8170886075949367, + "grad_norm": 0.5957863926887512, + "learning_rate": 0.00012283796453079146, + "loss": 1.5016, + "step": 7746 + }, + { + "epoch": 0.8171940928270042, + "grad_norm": 0.601535975933075, + "learning_rate": 0.00012270031854147426, + "loss": 1.4871, + "step": 7747 + }, + { + "epoch": 0.8172995780590717, + "grad_norm": 0.6754758954048157, + "learning_rate": 0.0001225627428442143, + "loss": 1.4945, + "step": 7748 + }, + { + "epoch": 0.8174050632911393, + "grad_norm": 0.6096088290214539, + "learning_rate": 0.0001224252374544278, + "loss": 1.5444, + "step": 7749 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.5919698476791382, + "learning_rate": 0.00012228780238752264, + "loss": 1.505, + "step": 7750 + }, + { + "epoch": 0.8176160337552743, + "grad_norm": 0.7854317426681519, + "learning_rate": 0.00012215043765889932, + "loss": 1.5233, + "step": 7751 + }, + { + "epoch": 0.8177215189873418, + "grad_norm": 0.6036674380302429, + "learning_rate": 0.00012201314328395032, + "loss": 1.4573, + "step": 7752 + }, + { + "epoch": 0.8178270042194092, + "grad_norm": 0.6358328461647034, + "learning_rate": 0.00012187591927806, + "loss": 1.4873, + "step": 7753 + }, + { + "epoch": 0.8179324894514768, + "grad_norm": 0.6771661043167114, + "learning_rate": 0.0001217387656566051, + "loss": 1.5079, + "step": 7754 + }, + { + "epoch": 0.8180379746835443, + "grad_norm": 0.8421444296836853, + "learning_rate": 0.0001216016824349542, + "loss": 1.5102, + "step": 7755 + }, + { + "epoch": 0.8181434599156118, + "grad_norm": 0.631643533706665, + "learning_rate": 0.00012146466962846833, + "loss": 1.502, + "step": 7756 + }, + { + "epoch": 0.8182489451476793, + "grad_norm": 0.6282027363777161, + "learning_rate": 0.00012132772725250038, + "loss": 1.5115, + "step": 7757 + }, + { + "epoch": 0.8183544303797469, + "grad_norm": 0.7647317051887512, + "learning_rate": 0.0001211908553223954, + "loss": 1.4712, + "step": 7758 + }, + { + "epoch": 0.8184599156118143, + "grad_norm": 0.6299341917037964, + "learning_rate": 0.00012105405385349047, + "loss": 1.5369, + "step": 7759 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.6461431980133057, + "learning_rate": 0.00012091732286111514, + "loss": 1.5305, + "step": 7760 + }, + { + "epoch": 0.8186708860759494, + "grad_norm": 0.5783994793891907, + "learning_rate": 0.00012078066236059068, + "loss": 1.4881, + "step": 7761 + }, + { + "epoch": 0.8187763713080168, + "grad_norm": 0.5969078540802002, + "learning_rate": 0.00012064407236723066, + "loss": 1.4828, + "step": 7762 + }, + { + "epoch": 0.8188818565400844, + "grad_norm": 0.8243065476417542, + "learning_rate": 0.00012050755289634049, + "loss": 1.5479, + "step": 7763 + }, + { + "epoch": 0.8189873417721519, + "grad_norm": 0.6097508668899536, + "learning_rate": 0.00012037110396321796, + "loss": 1.5404, + "step": 7764 + }, + { + "epoch": 0.8190928270042194, + "grad_norm": 0.6113643646240234, + "learning_rate": 0.0001202347255831529, + "loss": 1.4989, + "step": 7765 + }, + { + "epoch": 0.8191983122362869, + "grad_norm": 0.5632550716400146, + "learning_rate": 0.0001200984177714271, + "loss": 1.5365, + "step": 7766 + }, + { + "epoch": 0.8193037974683545, + "grad_norm": 0.690463125705719, + "learning_rate": 0.00011996218054331434, + "loss": 1.5144, + "step": 7767 + }, + { + "epoch": 0.8194092827004219, + "grad_norm": 0.7438617944717407, + "learning_rate": 0.00011982601391408115, + "loss": 1.5164, + "step": 7768 + }, + { + "epoch": 0.8195147679324895, + "grad_norm": 0.5732688903808594, + "learning_rate": 0.00011968991789898533, + "loss": 1.5402, + "step": 7769 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.6096445322036743, + "learning_rate": 0.00011955389251327737, + "loss": 1.457, + "step": 7770 + }, + { + "epoch": 0.8197257383966244, + "grad_norm": 0.6708980202674866, + "learning_rate": 0.00011941793777219937, + "loss": 1.4951, + "step": 7771 + }, + { + "epoch": 0.819831223628692, + "grad_norm": 0.6672976613044739, + "learning_rate": 0.00011928205369098574, + "loss": 1.4885, + "step": 7772 + }, + { + "epoch": 0.8199367088607595, + "grad_norm": 0.59940105676651, + "learning_rate": 0.00011914624028486315, + "loss": 1.4889, + "step": 7773 + }, + { + "epoch": 0.820042194092827, + "grad_norm": 0.5893868207931519, + "learning_rate": 0.00011901049756905, + "loss": 1.4521, + "step": 7774 + }, + { + "epoch": 0.8201476793248945, + "grad_norm": 0.6778846383094788, + "learning_rate": 0.00011887482555875695, + "loss": 1.519, + "step": 7775 + }, + { + "epoch": 0.8202531645569621, + "grad_norm": 0.5592150688171387, + "learning_rate": 0.00011873922426918668, + "loss": 1.5299, + "step": 7776 + }, + { + "epoch": 0.8203586497890295, + "grad_norm": 0.5724233388900757, + "learning_rate": 0.0001186036937155342, + "loss": 1.5357, + "step": 7777 + }, + { + "epoch": 0.820464135021097, + "grad_norm": 0.5659502148628235, + "learning_rate": 0.00011846823391298628, + "loss": 1.5014, + "step": 7778 + }, + { + "epoch": 0.8205696202531646, + "grad_norm": 0.6678732633590698, + "learning_rate": 0.00011833284487672185, + "loss": 1.4837, + "step": 7779 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.7362218499183655, + "learning_rate": 0.00011819752662191197, + "loss": 1.5353, + "step": 7780 + }, + { + "epoch": 0.8207805907172996, + "grad_norm": 0.5575602650642395, + "learning_rate": 0.00011806227916371964, + "loss": 1.5524, + "step": 7781 + }, + { + "epoch": 0.8208860759493671, + "grad_norm": 0.7499693632125854, + "learning_rate": 0.0001179271025173001, + "loss": 1.5141, + "step": 7782 + }, + { + "epoch": 0.8209915611814346, + "grad_norm": 0.7109810709953308, + "learning_rate": 0.00011779199669780046, + "loss": 1.5129, + "step": 7783 + }, + { + "epoch": 0.8210970464135021, + "grad_norm": 0.5710666179656982, + "learning_rate": 0.00011765696172036006, + "loss": 1.5372, + "step": 7784 + }, + { + "epoch": 0.8212025316455697, + "grad_norm": 0.6240333318710327, + "learning_rate": 0.00011752199760011017, + "loss": 1.5026, + "step": 7785 + }, + { + "epoch": 0.8213080168776371, + "grad_norm": 0.5649271607398987, + "learning_rate": 0.00011738710435217431, + "loss": 1.5442, + "step": 7786 + }, + { + "epoch": 0.8214135021097047, + "grad_norm": 0.6155076622962952, + "learning_rate": 0.00011725228199166805, + "loss": 1.5478, + "step": 7787 + }, + { + "epoch": 0.8215189873417722, + "grad_norm": 0.664182722568512, + "learning_rate": 0.00011711753053369861, + "loss": 1.5052, + "step": 7788 + }, + { + "epoch": 0.8216244725738396, + "grad_norm": 0.6421725153923035, + "learning_rate": 0.00011698284999336578, + "loss": 1.493, + "step": 7789 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.6156697869300842, + "learning_rate": 0.00011684824038576115, + "loss": 1.5092, + "step": 7790 + }, + { + "epoch": 0.8218354430379747, + "grad_norm": 0.6313364505767822, + "learning_rate": 0.00011671370172596829, + "loss": 1.5077, + "step": 7791 + }, + { + "epoch": 0.8219409282700422, + "grad_norm": 0.6056076884269714, + "learning_rate": 0.00011657923402906309, + "loss": 1.5602, + "step": 7792 + }, + { + "epoch": 0.8220464135021097, + "grad_norm": 0.6182287931442261, + "learning_rate": 0.000116444837310113, + "loss": 1.5216, + "step": 7793 + }, + { + "epoch": 0.8221518987341773, + "grad_norm": 0.5759435892105103, + "learning_rate": 0.00011631051158417828, + "loss": 1.4744, + "step": 7794 + }, + { + "epoch": 0.8222573839662447, + "grad_norm": 0.5685601234436035, + "learning_rate": 0.00011617625686631056, + "loss": 1.4772, + "step": 7795 + }, + { + "epoch": 0.8223628691983123, + "grad_norm": 0.6194779276847839, + "learning_rate": 0.00011604207317155383, + "loss": 1.5125, + "step": 7796 + }, + { + "epoch": 0.8224683544303798, + "grad_norm": 0.6790281534194946, + "learning_rate": 0.00011590796051494395, + "loss": 1.5119, + "step": 7797 + }, + { + "epoch": 0.8225738396624472, + "grad_norm": 0.5443506836891174, + "learning_rate": 0.00011577391891150901, + "loss": 1.4894, + "step": 7798 + }, + { + "epoch": 0.8226793248945148, + "grad_norm": 0.604243278503418, + "learning_rate": 0.00011563994837626898, + "loss": 1.539, + "step": 7799 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.6419333815574646, + "learning_rate": 0.00011550604892423593, + "loss": 1.4679, + "step": 7800 + }, + { + "epoch": 0.8228902953586498, + "grad_norm": 0.6281059980392456, + "learning_rate": 0.00011537222057041396, + "loss": 1.5205, + "step": 7801 + }, + { + "epoch": 0.8229957805907173, + "grad_norm": 0.6174309849739075, + "learning_rate": 0.00011523846332979907, + "loss": 1.4975, + "step": 7802 + }, + { + "epoch": 0.8231012658227848, + "grad_norm": 0.7690178751945496, + "learning_rate": 0.00011510477721737974, + "loss": 1.4983, + "step": 7803 + }, + { + "epoch": 0.8232067510548523, + "grad_norm": 0.5952246189117432, + "learning_rate": 0.00011497116224813604, + "loss": 1.5181, + "step": 7804 + }, + { + "epoch": 0.8233122362869199, + "grad_norm": 0.5576610565185547, + "learning_rate": 0.0001148376184370401, + "loss": 1.4827, + "step": 7805 + }, + { + "epoch": 0.8234177215189873, + "grad_norm": 0.6281406879425049, + "learning_rate": 0.00011470414579905617, + "loss": 1.5479, + "step": 7806 + }, + { + "epoch": 0.8235232067510548, + "grad_norm": 0.6469004154205322, + "learning_rate": 0.00011457074434914067, + "loss": 1.5235, + "step": 7807 + }, + { + "epoch": 0.8236286919831224, + "grad_norm": 0.5621099472045898, + "learning_rate": 0.00011443741410224173, + "loss": 1.4639, + "step": 7808 + }, + { + "epoch": 0.8237341772151898, + "grad_norm": 0.5656566023826599, + "learning_rate": 0.00011430415507329975, + "loss": 1.5156, + "step": 7809 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.5606250762939453, + "learning_rate": 0.0001141709672772471, + "loss": 1.4984, + "step": 7810 + }, + { + "epoch": 0.8239451476793249, + "grad_norm": 0.6589621305465698, + "learning_rate": 0.00011403785072900793, + "loss": 1.4919, + "step": 7811 + }, + { + "epoch": 0.8240506329113924, + "grad_norm": 0.5664991736412048, + "learning_rate": 0.00011390480544349891, + "loss": 1.5164, + "step": 7812 + }, + { + "epoch": 0.8241561181434599, + "grad_norm": 0.5551931262016296, + "learning_rate": 0.00011377183143562833, + "loss": 1.5006, + "step": 7813 + }, + { + "epoch": 0.8242616033755275, + "grad_norm": 0.6042037010192871, + "learning_rate": 0.00011363892872029655, + "loss": 1.5603, + "step": 7814 + }, + { + "epoch": 0.8243670886075949, + "grad_norm": 0.5568233132362366, + "learning_rate": 0.00011350609731239597, + "loss": 1.5058, + "step": 7815 + }, + { + "epoch": 0.8244725738396624, + "grad_norm": 0.5372852683067322, + "learning_rate": 0.00011337333722681104, + "loss": 1.4817, + "step": 7816 + }, + { + "epoch": 0.82457805907173, + "grad_norm": 0.6008899807929993, + "learning_rate": 0.00011324064847841817, + "loss": 1.4939, + "step": 7817 + }, + { + "epoch": 0.8246835443037974, + "grad_norm": 0.6015622615814209, + "learning_rate": 0.00011310803108208581, + "loss": 1.5494, + "step": 7818 + }, + { + "epoch": 0.824789029535865, + "grad_norm": 0.5955657958984375, + "learning_rate": 0.00011297548505267424, + "loss": 1.5285, + "step": 7819 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.5798176527023315, + "learning_rate": 0.00011284301040503625, + "loss": 1.5391, + "step": 7820 + }, + { + "epoch": 0.825, + "grad_norm": 0.6140605807304382, + "learning_rate": 0.00011271060715401604, + "loss": 1.5308, + "step": 7821 + }, + { + "epoch": 0.8251054852320675, + "grad_norm": 0.6344013810157776, + "learning_rate": 0.00011257827531445017, + "loss": 1.5007, + "step": 7822 + }, + { + "epoch": 0.825210970464135, + "grad_norm": 0.5584215521812439, + "learning_rate": 0.00011244601490116693, + "loss": 1.5132, + "step": 7823 + }, + { + "epoch": 0.8253164556962025, + "grad_norm": 0.5628694295883179, + "learning_rate": 0.00011231382592898698, + "loss": 1.4774, + "step": 7824 + }, + { + "epoch": 0.82542194092827, + "grad_norm": 0.6380406022071838, + "learning_rate": 0.00011218170841272254, + "loss": 1.4902, + "step": 7825 + }, + { + "epoch": 0.8255274261603376, + "grad_norm": 0.6637543439865112, + "learning_rate": 0.00011204966236717811, + "loss": 1.5079, + "step": 7826 + }, + { + "epoch": 0.825632911392405, + "grad_norm": 0.692162811756134, + "learning_rate": 0.0001119176878071502, + "loss": 1.5361, + "step": 7827 + }, + { + "epoch": 0.8257383966244726, + "grad_norm": 0.58906090259552, + "learning_rate": 0.00011178578474742687, + "loss": 1.5153, + "step": 7828 + }, + { + "epoch": 0.8258438818565401, + "grad_norm": 0.5849313139915466, + "learning_rate": 0.00011165395320278898, + "loss": 1.5167, + "step": 7829 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.679487943649292, + "learning_rate": 0.0001115221931880088, + "loss": 1.466, + "step": 7830 + }, + { + "epoch": 0.8260548523206751, + "grad_norm": 0.578133761882782, + "learning_rate": 0.00011139050471785051, + "loss": 1.508, + "step": 7831 + }, + { + "epoch": 0.8261603375527427, + "grad_norm": 0.5560833215713501, + "learning_rate": 0.00011125888780707064, + "loss": 1.4861, + "step": 7832 + }, + { + "epoch": 0.8262658227848101, + "grad_norm": 0.5961370468139648, + "learning_rate": 0.00011112734247041739, + "loss": 1.5007, + "step": 7833 + }, + { + "epoch": 0.8263713080168776, + "grad_norm": 0.6177191138267517, + "learning_rate": 0.00011099586872263107, + "loss": 1.4877, + "step": 7834 + }, + { + "epoch": 0.8264767932489452, + "grad_norm": 0.590773344039917, + "learning_rate": 0.00011086446657844412, + "loss": 1.4764, + "step": 7835 + }, + { + "epoch": 0.8265822784810126, + "grad_norm": 0.586446225643158, + "learning_rate": 0.0001107331360525807, + "loss": 1.4564, + "step": 7836 + }, + { + "epoch": 0.8266877637130802, + "grad_norm": 0.642139196395874, + "learning_rate": 0.00011060187715975686, + "loss": 1.5215, + "step": 7837 + }, + { + "epoch": 0.8267932489451477, + "grad_norm": 0.6558060646057129, + "learning_rate": 0.00011047068991468118, + "loss": 1.5337, + "step": 7838 + }, + { + "epoch": 0.8268987341772152, + "grad_norm": 0.5595026612281799, + "learning_rate": 0.00011033957433205364, + "loss": 1.4375, + "step": 7839 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.5387570261955261, + "learning_rate": 0.00011020853042656648, + "loss": 1.4971, + "step": 7840 + }, + { + "epoch": 0.8271097046413503, + "grad_norm": 0.6352220773696899, + "learning_rate": 0.00011007755821290371, + "loss": 1.5246, + "step": 7841 + }, + { + "epoch": 0.8272151898734177, + "grad_norm": 0.67579585313797, + "learning_rate": 0.00010994665770574162, + "loss": 1.5063, + "step": 7842 + }, + { + "epoch": 0.8273206751054852, + "grad_norm": 0.5394086837768555, + "learning_rate": 0.000109815828919748, + "loss": 1.5131, + "step": 7843 + }, + { + "epoch": 0.8274261603375528, + "grad_norm": 0.5907552242279053, + "learning_rate": 0.00010968507186958302, + "loss": 1.4723, + "step": 7844 + }, + { + "epoch": 0.8275316455696202, + "grad_norm": 0.6325142979621887, + "learning_rate": 0.00010955438656989849, + "loss": 1.4918, + "step": 7845 + }, + { + "epoch": 0.8276371308016878, + "grad_norm": 0.7807749509811401, + "learning_rate": 0.00010942377303533865, + "loss": 1.5075, + "step": 7846 + }, + { + "epoch": 0.8277426160337553, + "grad_norm": 0.5975595712661743, + "learning_rate": 0.00010929323128053927, + "loss": 1.519, + "step": 7847 + }, + { + "epoch": 0.8278481012658228, + "grad_norm": 0.6423031091690063, + "learning_rate": 0.00010916276132012818, + "loss": 1.5211, + "step": 7848 + }, + { + "epoch": 0.8279535864978903, + "grad_norm": 0.6075759530067444, + "learning_rate": 0.00010903236316872514, + "loss": 1.4677, + "step": 7849 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.5664303302764893, + "learning_rate": 0.000108902036840942, + "loss": 1.5007, + "step": 7850 + }, + { + "epoch": 0.8281645569620253, + "grad_norm": 0.6722022294998169, + "learning_rate": 0.00010877178235138239, + "loss": 1.52, + "step": 7851 + }, + { + "epoch": 0.8282700421940928, + "grad_norm": 0.5798791646957397, + "learning_rate": 0.00010864159971464205, + "loss": 1.4702, + "step": 7852 + }, + { + "epoch": 0.8283755274261604, + "grad_norm": 0.7160575985908508, + "learning_rate": 0.00010851148894530858, + "loss": 1.4994, + "step": 7853 + }, + { + "epoch": 0.8284810126582278, + "grad_norm": 0.5958582758903503, + "learning_rate": 0.00010838145005796138, + "loss": 1.515, + "step": 7854 + }, + { + "epoch": 0.8285864978902954, + "grad_norm": 0.6001325845718384, + "learning_rate": 0.00010825148306717222, + "loss": 1.4955, + "step": 7855 + }, + { + "epoch": 0.8286919831223629, + "grad_norm": 0.5542721152305603, + "learning_rate": 0.00010812158798750438, + "loss": 1.4776, + "step": 7856 + }, + { + "epoch": 0.8287974683544304, + "grad_norm": 0.6528697609901428, + "learning_rate": 0.00010799176483351337, + "loss": 1.487, + "step": 7857 + }, + { + "epoch": 0.8289029535864979, + "grad_norm": 0.7750137448310852, + "learning_rate": 0.00010786201361974646, + "loss": 1.4803, + "step": 7858 + }, + { + "epoch": 0.8290084388185655, + "grad_norm": 0.5802093148231506, + "learning_rate": 0.00010773233436074287, + "loss": 1.4691, + "step": 7859 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.6148557662963867, + "learning_rate": 0.00010760272707103389, + "loss": 1.4739, + "step": 7860 + }, + { + "epoch": 0.8292194092827004, + "grad_norm": 0.593460738658905, + "learning_rate": 0.00010747319176514264, + "loss": 1.5161, + "step": 7861 + }, + { + "epoch": 0.829324894514768, + "grad_norm": 0.610214352607727, + "learning_rate": 0.00010734372845758411, + "loss": 1.5201, + "step": 7862 + }, + { + "epoch": 0.8294303797468354, + "grad_norm": 0.6109564304351807, + "learning_rate": 0.00010721433716286527, + "loss": 1.4879, + "step": 7863 + }, + { + "epoch": 0.829535864978903, + "grad_norm": 0.5790244340896606, + "learning_rate": 0.00010708501789548527, + "loss": 1.513, + "step": 7864 + }, + { + "epoch": 0.8296413502109705, + "grad_norm": 0.6877948045730591, + "learning_rate": 0.00010695577066993495, + "loss": 1.4677, + "step": 7865 + }, + { + "epoch": 0.829746835443038, + "grad_norm": 0.6933984756469727, + "learning_rate": 0.00010682659550069704, + "loss": 1.5429, + "step": 7866 + }, + { + "epoch": 0.8298523206751055, + "grad_norm": 0.5588926076889038, + "learning_rate": 0.00010669749240224621, + "loss": 1.4696, + "step": 7867 + }, + { + "epoch": 0.8299578059071729, + "grad_norm": 0.565255343914032, + "learning_rate": 0.00010656846138904916, + "loss": 1.4644, + "step": 7868 + }, + { + "epoch": 0.8300632911392405, + "grad_norm": 0.6046736240386963, + "learning_rate": 0.00010643950247556447, + "loss": 1.5352, + "step": 7869 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.6035740375518799, + "learning_rate": 0.00010631061567624259, + "loss": 1.5064, + "step": 7870 + }, + { + "epoch": 0.8302742616033755, + "grad_norm": 0.6871709823608398, + "learning_rate": 0.00010618180100552596, + "loss": 1.564, + "step": 7871 + }, + { + "epoch": 0.830379746835443, + "grad_norm": 0.5928703546524048, + "learning_rate": 0.00010605305847784871, + "loss": 1.5226, + "step": 7872 + }, + { + "epoch": 0.8304852320675106, + "grad_norm": 0.5414719581604004, + "learning_rate": 0.00010592438810763747, + "loss": 1.4969, + "step": 7873 + }, + { + "epoch": 0.830590717299578, + "grad_norm": 0.5704710483551025, + "learning_rate": 0.00010579578990931019, + "loss": 1.5297, + "step": 7874 + }, + { + "epoch": 0.8306962025316456, + "grad_norm": 0.5872528553009033, + "learning_rate": 0.00010566726389727693, + "loss": 1.5071, + "step": 7875 + }, + { + "epoch": 0.8308016877637131, + "grad_norm": 0.597628653049469, + "learning_rate": 0.00010553881008593969, + "loss": 1.5293, + "step": 7876 + }, + { + "epoch": 0.8309071729957805, + "grad_norm": 0.6079739332199097, + "learning_rate": 0.00010541042848969235, + "loss": 1.5339, + "step": 7877 + }, + { + "epoch": 0.8310126582278481, + "grad_norm": 0.5429105758666992, + "learning_rate": 0.00010528211912292066, + "loss": 1.5185, + "step": 7878 + }, + { + "epoch": 0.8311181434599156, + "grad_norm": 0.6089192032814026, + "learning_rate": 0.00010515388200000245, + "loss": 1.5167, + "step": 7879 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.6303350925445557, + "learning_rate": 0.00010502571713530706, + "loss": 1.5061, + "step": 7880 + }, + { + "epoch": 0.8313291139240506, + "grad_norm": 0.6388457417488098, + "learning_rate": 0.00010489762454319634, + "loss": 1.5062, + "step": 7881 + }, + { + "epoch": 0.8314345991561182, + "grad_norm": 0.6277060508728027, + "learning_rate": 0.00010476960423802356, + "loss": 1.4998, + "step": 7882 + }, + { + "epoch": 0.8315400843881856, + "grad_norm": 0.6238337755203247, + "learning_rate": 0.00010464165623413408, + "loss": 1.4922, + "step": 7883 + }, + { + "epoch": 0.8316455696202532, + "grad_norm": 0.6155350208282471, + "learning_rate": 0.00010451378054586508, + "loss": 1.5569, + "step": 7884 + }, + { + "epoch": 0.8317510548523207, + "grad_norm": 0.6039286851882935, + "learning_rate": 0.00010438597718754561, + "loss": 1.5449, + "step": 7885 + }, + { + "epoch": 0.8318565400843881, + "grad_norm": 0.64223313331604, + "learning_rate": 0.00010425824617349671, + "loss": 1.5473, + "step": 7886 + }, + { + "epoch": 0.8319620253164557, + "grad_norm": 0.5728675723075867, + "learning_rate": 0.00010413058751803129, + "loss": 1.5304, + "step": 7887 + }, + { + "epoch": 0.8320675105485232, + "grad_norm": 0.6596827507019043, + "learning_rate": 0.0001040030012354542, + "loss": 1.486, + "step": 7888 + }, + { + "epoch": 0.8321729957805907, + "grad_norm": 0.5721032619476318, + "learning_rate": 0.00010387548734006195, + "loss": 1.5072, + "step": 7889 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.70518958568573, + "learning_rate": 0.00010374804584614308, + "loss": 1.528, + "step": 7890 + }, + { + "epoch": 0.8323839662447258, + "grad_norm": 0.668516218662262, + "learning_rate": 0.00010362067676797837, + "loss": 1.4783, + "step": 7891 + }, + { + "epoch": 0.8324894514767932, + "grad_norm": 0.5978291630744934, + "learning_rate": 0.00010349338011983998, + "loss": 1.4668, + "step": 7892 + }, + { + "epoch": 0.8325949367088608, + "grad_norm": 0.5886739492416382, + "learning_rate": 0.00010336615591599204, + "loss": 1.5048, + "step": 7893 + }, + { + "epoch": 0.8327004219409283, + "grad_norm": 0.6198743581771851, + "learning_rate": 0.00010323900417069079, + "loss": 1.4775, + "step": 7894 + }, + { + "epoch": 0.8328059071729957, + "grad_norm": 0.6698330044746399, + "learning_rate": 0.00010311192489818421, + "loss": 1.5419, + "step": 7895 + }, + { + "epoch": 0.8329113924050633, + "grad_norm": 0.6356502771377563, + "learning_rate": 0.0001029849181127121, + "loss": 1.5055, + "step": 7896 + }, + { + "epoch": 0.8330168776371308, + "grad_norm": 0.5718545317649841, + "learning_rate": 0.00010285798382850614, + "loss": 1.4957, + "step": 7897 + }, + { + "epoch": 0.8331223628691983, + "grad_norm": 0.6808866262435913, + "learning_rate": 0.00010273112205979012, + "loss": 1.5049, + "step": 7898 + }, + { + "epoch": 0.8332278481012658, + "grad_norm": 0.7426813840866089, + "learning_rate": 0.00010260433282077944, + "loss": 1.4914, + "step": 7899 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.6111672520637512, + "learning_rate": 0.00010247761612568129, + "loss": 1.5275, + "step": 7900 + }, + { + "epoch": 0.8334388185654008, + "grad_norm": 0.5559819936752319, + "learning_rate": 0.00010235097198869525, + "loss": 1.4987, + "step": 7901 + }, + { + "epoch": 0.8335443037974684, + "grad_norm": 0.8364967107772827, + "learning_rate": 0.0001022244004240123, + "loss": 1.5224, + "step": 7902 + }, + { + "epoch": 0.8336497890295359, + "grad_norm": 0.7771208882331848, + "learning_rate": 0.00010209790144581533, + "loss": 1.4747, + "step": 7903 + }, + { + "epoch": 0.8337552742616033, + "grad_norm": 0.5388540029525757, + "learning_rate": 0.00010197147506827925, + "loss": 1.4768, + "step": 7904 + }, + { + "epoch": 0.8338607594936709, + "grad_norm": 0.6562647819519043, + "learning_rate": 0.00010184512130557074, + "loss": 1.5237, + "step": 7905 + }, + { + "epoch": 0.8339662447257384, + "grad_norm": 0.7675014734268188, + "learning_rate": 0.0001017188401718484, + "loss": 1.5329, + "step": 7906 + }, + { + "epoch": 0.8340717299578059, + "grad_norm": 0.6954792737960815, + "learning_rate": 0.00010159263168126265, + "loss": 1.5147, + "step": 7907 + }, + { + "epoch": 0.8341772151898734, + "grad_norm": 0.6733344197273254, + "learning_rate": 0.00010146649584795575, + "loss": 1.517, + "step": 7908 + }, + { + "epoch": 0.834282700421941, + "grad_norm": 0.702911376953125, + "learning_rate": 0.00010134043268606191, + "loss": 1.5079, + "step": 7909 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.8610835075378418, + "learning_rate": 0.0001012144422097069, + "loss": 1.526, + "step": 7910 + }, + { + "epoch": 0.834493670886076, + "grad_norm": 0.7939404249191284, + "learning_rate": 0.00010108852443300895, + "loss": 1.5044, + "step": 7911 + }, + { + "epoch": 0.8345991561181435, + "grad_norm": 0.5776965618133545, + "learning_rate": 0.00010096267937007758, + "loss": 1.5053, + "step": 7912 + }, + { + "epoch": 0.8347046413502109, + "grad_norm": 0.7028140425682068, + "learning_rate": 0.00010083690703501445, + "loss": 1.5205, + "step": 7913 + }, + { + "epoch": 0.8348101265822785, + "grad_norm": 0.6216159462928772, + "learning_rate": 0.00010071120744191284, + "loss": 1.474, + "step": 7914 + }, + { + "epoch": 0.834915611814346, + "grad_norm": 0.6477316617965698, + "learning_rate": 0.0001005855806048581, + "loss": 1.5382, + "step": 7915 + }, + { + "epoch": 0.8350210970464135, + "grad_norm": 0.587067186832428, + "learning_rate": 0.00010046002653792726, + "loss": 1.5132, + "step": 7916 + }, + { + "epoch": 0.835126582278481, + "grad_norm": 0.5540424585342407, + "learning_rate": 0.00010033454525518945, + "loss": 1.5043, + "step": 7917 + }, + { + "epoch": 0.8352320675105486, + "grad_norm": 0.5854399800300598, + "learning_rate": 0.0001002091367707053, + "loss": 1.5132, + "step": 7918 + }, + { + "epoch": 0.835337552742616, + "grad_norm": 0.710451066493988, + "learning_rate": 0.00010008380109852752, + "loss": 1.5007, + "step": 7919 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.62095046043396, + "learning_rate": 9.995853825270052e-05, + "loss": 1.5121, + "step": 7920 + }, + { + "epoch": 0.8355485232067511, + "grad_norm": 0.5850193500518799, + "learning_rate": 9.983334824726081e-05, + "loss": 1.523, + "step": 7921 + }, + { + "epoch": 0.8356540084388185, + "grad_norm": 0.8313699960708618, + "learning_rate": 9.970823109623644e-05, + "loss": 1.5179, + "step": 7922 + }, + { + "epoch": 0.8357594936708861, + "grad_norm": 0.6451310515403748, + "learning_rate": 9.958318681364745e-05, + "loss": 1.5445, + "step": 7923 + }, + { + "epoch": 0.8358649789029536, + "grad_norm": 0.5882033705711365, + "learning_rate": 9.94582154135056e-05, + "loss": 1.5178, + "step": 7924 + }, + { + "epoch": 0.8359704641350211, + "grad_norm": 0.5832379460334778, + "learning_rate": 9.933331690981473e-05, + "loss": 1.5001, + "step": 7925 + }, + { + "epoch": 0.8360759493670886, + "grad_norm": 0.7470884919166565, + "learning_rate": 9.920849131657011e-05, + "loss": 1.5364, + "step": 7926 + }, + { + "epoch": 0.8361814345991562, + "grad_norm": 0.5562522411346436, + "learning_rate": 9.908373864775915e-05, + "loss": 1.4942, + "step": 7927 + }, + { + "epoch": 0.8362869198312236, + "grad_norm": 0.5897666215896606, + "learning_rate": 9.895905891736118e-05, + "loss": 1.5212, + "step": 7928 + }, + { + "epoch": 0.8363924050632912, + "grad_norm": 0.576606273651123, + "learning_rate": 9.883445213934675e-05, + "loss": 1.5231, + "step": 7929 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.6154667735099792, + "learning_rate": 9.870991832767919e-05, + "loss": 1.4918, + "step": 7930 + }, + { + "epoch": 0.8366033755274261, + "grad_norm": 0.6355293989181519, + "learning_rate": 9.858545749631287e-05, + "loss": 1.505, + "step": 7931 + }, + { + "epoch": 0.8367088607594937, + "grad_norm": 0.6329848766326904, + "learning_rate": 9.846106965919427e-05, + "loss": 1.4971, + "step": 7932 + }, + { + "epoch": 0.8368143459915611, + "grad_norm": 0.6906605362892151, + "learning_rate": 9.833675483026175e-05, + "loss": 1.5233, + "step": 7933 + }, + { + "epoch": 0.8369198312236287, + "grad_norm": 0.7100114226341248, + "learning_rate": 9.821251302344525e-05, + "loss": 1.5154, + "step": 7934 + }, + { + "epoch": 0.8370253164556962, + "grad_norm": 0.5969324707984924, + "learning_rate": 9.80883442526668e-05, + "loss": 1.4983, + "step": 7935 + }, + { + "epoch": 0.8371308016877637, + "grad_norm": 0.5926192998886108, + "learning_rate": 9.79642485318401e-05, + "loss": 1.5061, + "step": 7936 + }, + { + "epoch": 0.8372362869198312, + "grad_norm": 0.6179748773574829, + "learning_rate": 9.78402258748708e-05, + "loss": 1.4858, + "step": 7937 + }, + { + "epoch": 0.8373417721518988, + "grad_norm": 0.6001385450363159, + "learning_rate": 9.771627629565599e-05, + "loss": 1.5088, + "step": 7938 + }, + { + "epoch": 0.8374472573839662, + "grad_norm": 0.5699326395988464, + "learning_rate": 9.759239980808494e-05, + "loss": 1.5172, + "step": 7939 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.5738930702209473, + "learning_rate": 9.746859642603884e-05, + "loss": 1.5215, + "step": 7940 + }, + { + "epoch": 0.8376582278481013, + "grad_norm": 0.5962559580802917, + "learning_rate": 9.734486616339027e-05, + "loss": 1.5149, + "step": 7941 + }, + { + "epoch": 0.8377637130801687, + "grad_norm": 0.6393314599990845, + "learning_rate": 9.722120903400392e-05, + "loss": 1.5235, + "step": 7942 + }, + { + "epoch": 0.8378691983122363, + "grad_norm": 0.5576362013816833, + "learning_rate": 9.709762505173617e-05, + "loss": 1.507, + "step": 7943 + }, + { + "epoch": 0.8379746835443038, + "grad_norm": 0.5891581773757935, + "learning_rate": 9.697411423043521e-05, + "loss": 1.5209, + "step": 7944 + }, + { + "epoch": 0.8380801687763713, + "grad_norm": 0.6047725081443787, + "learning_rate": 9.685067658394095e-05, + "loss": 1.4886, + "step": 7945 + }, + { + "epoch": 0.8381856540084388, + "grad_norm": 0.6020649671554565, + "learning_rate": 9.672731212608535e-05, + "loss": 1.5226, + "step": 7946 + }, + { + "epoch": 0.8382911392405064, + "grad_norm": 0.5468841195106506, + "learning_rate": 9.660402087069192e-05, + "loss": 1.5236, + "step": 7947 + }, + { + "epoch": 0.8383966244725738, + "grad_norm": 0.569316565990448, + "learning_rate": 9.648080283157604e-05, + "loss": 1.4812, + "step": 7948 + }, + { + "epoch": 0.8385021097046413, + "grad_norm": 0.5980765223503113, + "learning_rate": 9.635765802254482e-05, + "loss": 1.5061, + "step": 7949 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.567437469959259, + "learning_rate": 9.623458645739755e-05, + "loss": 1.5252, + "step": 7950 + }, + { + "epoch": 0.8387130801687763, + "grad_norm": 0.581532895565033, + "learning_rate": 9.611158814992479e-05, + "loss": 1.5203, + "step": 7951 + }, + { + "epoch": 0.8388185654008439, + "grad_norm": 0.5401748418807983, + "learning_rate": 9.598866311390919e-05, + "loss": 1.4672, + "step": 7952 + }, + { + "epoch": 0.8389240506329114, + "grad_norm": 0.604524552822113, + "learning_rate": 9.586581136312506e-05, + "loss": 1.5093, + "step": 7953 + }, + { + "epoch": 0.8390295358649789, + "grad_norm": 0.5963433980941772, + "learning_rate": 9.574303291133862e-05, + "loss": 1.4619, + "step": 7954 + }, + { + "epoch": 0.8391350210970464, + "grad_norm": 0.5716742873191833, + "learning_rate": 9.562032777230772e-05, + "loss": 1.4763, + "step": 7955 + }, + { + "epoch": 0.839240506329114, + "grad_norm": 0.5978495478630066, + "learning_rate": 9.549769595978211e-05, + "loss": 1.494, + "step": 7956 + }, + { + "epoch": 0.8393459915611814, + "grad_norm": 0.5926896333694458, + "learning_rate": 9.537513748750337e-05, + "loss": 1.5044, + "step": 7957 + }, + { + "epoch": 0.8394514767932489, + "grad_norm": 0.5849170088768005, + "learning_rate": 9.525265236920452e-05, + "loss": 1.5293, + "step": 7958 + }, + { + "epoch": 0.8395569620253165, + "grad_norm": 0.5556712746620178, + "learning_rate": 9.5130240618611e-05, + "loss": 1.5121, + "step": 7959 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.5345446467399597, + "learning_rate": 9.50079022494395e-05, + "loss": 1.5012, + "step": 7960 + }, + { + "epoch": 0.8397679324894515, + "grad_norm": 0.584995687007904, + "learning_rate": 9.488563727539864e-05, + "loss": 1.4701, + "step": 7961 + }, + { + "epoch": 0.839873417721519, + "grad_norm": 0.5373770594596863, + "learning_rate": 9.47634457101888e-05, + "loss": 1.5037, + "step": 7962 + }, + { + "epoch": 0.8399789029535865, + "grad_norm": 0.5365831255912781, + "learning_rate": 9.464132756750218e-05, + "loss": 1.4963, + "step": 7963 + }, + { + "epoch": 0.840084388185654, + "grad_norm": 0.5675795674324036, + "learning_rate": 9.451928286102277e-05, + "loss": 1.5101, + "step": 7964 + }, + { + "epoch": 0.8401898734177216, + "grad_norm": 0.5773781538009644, + "learning_rate": 9.439731160442619e-05, + "loss": 1.5064, + "step": 7965 + }, + { + "epoch": 0.840295358649789, + "grad_norm": 0.5788686871528625, + "learning_rate": 9.427541381138002e-05, + "loss": 1.5085, + "step": 7966 + }, + { + "epoch": 0.8404008438818565, + "grad_norm": 0.5558779835700989, + "learning_rate": 9.415358949554326e-05, + "loss": 1.5052, + "step": 7967 + }, + { + "epoch": 0.8405063291139241, + "grad_norm": 0.5680338144302368, + "learning_rate": 9.40318386705673e-05, + "loss": 1.515, + "step": 7968 + }, + { + "epoch": 0.8406118143459915, + "grad_norm": 0.5900113582611084, + "learning_rate": 9.391016135009484e-05, + "loss": 1.4646, + "step": 7969 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.5664304494857788, + "learning_rate": 9.378855754776028e-05, + "loss": 1.4955, + "step": 7970 + }, + { + "epoch": 0.8408227848101266, + "grad_norm": 0.613723874092102, + "learning_rate": 9.366702727719006e-05, + "loss": 1.5198, + "step": 7971 + }, + { + "epoch": 0.8409282700421941, + "grad_norm": 0.5968380570411682, + "learning_rate": 9.354557055200214e-05, + "loss": 1.5328, + "step": 7972 + }, + { + "epoch": 0.8410337552742616, + "grad_norm": 0.5634049773216248, + "learning_rate": 9.342418738580652e-05, + "loss": 1.4847, + "step": 7973 + }, + { + "epoch": 0.8411392405063292, + "grad_norm": 0.5668849349021912, + "learning_rate": 9.330287779220459e-05, + "loss": 1.4679, + "step": 7974 + }, + { + "epoch": 0.8412447257383966, + "grad_norm": 0.6615244746208191, + "learning_rate": 9.31816417847898e-05, + "loss": 1.5127, + "step": 7975 + }, + { + "epoch": 0.8413502109704641, + "grad_norm": 0.5770912766456604, + "learning_rate": 9.306047937714713e-05, + "loss": 1.508, + "step": 7976 + }, + { + "epoch": 0.8414556962025317, + "grad_norm": 0.5537204742431641, + "learning_rate": 9.29393905828537e-05, + "loss": 1.4775, + "step": 7977 + }, + { + "epoch": 0.8415611814345991, + "grad_norm": 0.5742653012275696, + "learning_rate": 9.281837541547791e-05, + "loss": 1.4888, + "step": 7978 + }, + { + "epoch": 0.8416666666666667, + "grad_norm": 0.5890442728996277, + "learning_rate": 9.269743388858019e-05, + "loss": 1.4994, + "step": 7979 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.5493541955947876, + "learning_rate": 9.257656601571266e-05, + "loss": 1.4676, + "step": 7980 + }, + { + "epoch": 0.8418776371308017, + "grad_norm": 0.5821306705474854, + "learning_rate": 9.245577181041901e-05, + "loss": 1.4905, + "step": 7981 + }, + { + "epoch": 0.8419831223628692, + "grad_norm": 0.5986722707748413, + "learning_rate": 9.233505128623499e-05, + "loss": 1.4838, + "step": 7982 + }, + { + "epoch": 0.8420886075949368, + "grad_norm": 0.5979666113853455, + "learning_rate": 9.221440445668794e-05, + "loss": 1.5253, + "step": 7983 + }, + { + "epoch": 0.8421940928270042, + "grad_norm": 0.6749881505966187, + "learning_rate": 9.209383133529664e-05, + "loss": 1.4739, + "step": 7984 + }, + { + "epoch": 0.8422995780590717, + "grad_norm": 0.5919481515884399, + "learning_rate": 9.197333193557237e-05, + "loss": 1.5286, + "step": 7985 + }, + { + "epoch": 0.8424050632911393, + "grad_norm": 0.5770317912101746, + "learning_rate": 9.185290627101747e-05, + "loss": 1.5131, + "step": 7986 + }, + { + "epoch": 0.8425105485232067, + "grad_norm": 0.6668513417243958, + "learning_rate": 9.173255435512617e-05, + "loss": 1.5263, + "step": 7987 + }, + { + "epoch": 0.8426160337552743, + "grad_norm": 0.5707548260688782, + "learning_rate": 9.161227620138468e-05, + "loss": 1.5434, + "step": 7988 + }, + { + "epoch": 0.8427215189873418, + "grad_norm": 0.6305684447288513, + "learning_rate": 9.149207182327054e-05, + "loss": 1.488, + "step": 7989 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.6158482432365417, + "learning_rate": 9.137194123425349e-05, + "loss": 1.5039, + "step": 7990 + }, + { + "epoch": 0.8429324894514768, + "grad_norm": 0.5572646856307983, + "learning_rate": 9.125188444779458e-05, + "loss": 1.5037, + "step": 7991 + }, + { + "epoch": 0.8430379746835444, + "grad_norm": 0.5583123564720154, + "learning_rate": 9.113190147734682e-05, + "loss": 1.4792, + "step": 7992 + }, + { + "epoch": 0.8431434599156118, + "grad_norm": 0.5853632688522339, + "learning_rate": 9.101199233635477e-05, + "loss": 1.5178, + "step": 7993 + }, + { + "epoch": 0.8432489451476793, + "grad_norm": 0.7394426465034485, + "learning_rate": 9.089215703825519e-05, + "loss": 1.4959, + "step": 7994 + }, + { + "epoch": 0.8433544303797469, + "grad_norm": 0.5739732980728149, + "learning_rate": 9.077239559647591e-05, + "loss": 1.4902, + "step": 7995 + }, + { + "epoch": 0.8434599156118143, + "grad_norm": 0.539587140083313, + "learning_rate": 9.065270802443704e-05, + "loss": 1.5282, + "step": 7996 + }, + { + "epoch": 0.8435654008438819, + "grad_norm": 0.5781717896461487, + "learning_rate": 9.053309433554993e-05, + "loss": 1.5049, + "step": 7997 + }, + { + "epoch": 0.8436708860759494, + "grad_norm": 0.5727368593215942, + "learning_rate": 9.041355454321803e-05, + "loss": 1.5059, + "step": 7998 + }, + { + "epoch": 0.8437763713080169, + "grad_norm": 0.6635501980781555, + "learning_rate": 9.029408866083638e-05, + "loss": 1.5094, + "step": 7999 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.5933712720870972, + "learning_rate": 9.017469670179168e-05, + "loss": 1.4942, + "step": 8000 + }, + { + "epoch": 0.8439873417721518, + "grad_norm": 0.6166720390319824, + "learning_rate": 9.00553786794624e-05, + "loss": 1.5192, + "step": 8001 + }, + { + "epoch": 0.8440928270042194, + "grad_norm": 0.5537218451499939, + "learning_rate": 8.99361346072185e-05, + "loss": 1.5435, + "step": 8002 + }, + { + "epoch": 0.8441983122362869, + "grad_norm": 0.5583345890045166, + "learning_rate": 8.98169644984223e-05, + "loss": 1.5294, + "step": 8003 + }, + { + "epoch": 0.8443037974683544, + "grad_norm": 0.606837809085846, + "learning_rate": 8.96978683664272e-05, + "loss": 1.4768, + "step": 8004 + }, + { + "epoch": 0.8444092827004219, + "grad_norm": 0.610932469367981, + "learning_rate": 8.957884622457854e-05, + "loss": 1.4966, + "step": 8005 + }, + { + "epoch": 0.8445147679324895, + "grad_norm": 0.5892922878265381, + "learning_rate": 8.945989808621321e-05, + "loss": 1.5102, + "step": 8006 + }, + { + "epoch": 0.8446202531645569, + "grad_norm": 0.5456603169441223, + "learning_rate": 8.934102396466016e-05, + "loss": 1.5531, + "step": 8007 + }, + { + "epoch": 0.8447257383966245, + "grad_norm": 0.8189565539360046, + "learning_rate": 8.92222238732397e-05, + "loss": 1.4962, + "step": 8008 + }, + { + "epoch": 0.844831223628692, + "grad_norm": 0.5609636902809143, + "learning_rate": 8.910349782526394e-05, + "loss": 1.4713, + "step": 8009 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.573078989982605, + "learning_rate": 8.898484583403668e-05, + "loss": 1.4936, + "step": 8010 + }, + { + "epoch": 0.845042194092827, + "grad_norm": 0.5156030058860779, + "learning_rate": 8.886626791285369e-05, + "loss": 1.5074, + "step": 8011 + }, + { + "epoch": 0.8451476793248945, + "grad_norm": 0.5673456192016602, + "learning_rate": 8.874776407500206e-05, + "loss": 1.4977, + "step": 8012 + }, + { + "epoch": 0.845253164556962, + "grad_norm": 0.6630195379257202, + "learning_rate": 8.86293343337608e-05, + "loss": 1.4915, + "step": 8013 + }, + { + "epoch": 0.8453586497890295, + "grad_norm": 0.6109731197357178, + "learning_rate": 8.851097870240051e-05, + "loss": 1.5008, + "step": 8014 + }, + { + "epoch": 0.8454641350210971, + "grad_norm": 0.5607989430427551, + "learning_rate": 8.839269719418361e-05, + "loss": 1.4988, + "step": 8015 + }, + { + "epoch": 0.8455696202531645, + "grad_norm": 0.5555319786071777, + "learning_rate": 8.827448982236397e-05, + "loss": 1.4971, + "step": 8016 + }, + { + "epoch": 0.8456751054852321, + "grad_norm": 0.5701496005058289, + "learning_rate": 8.815635660018742e-05, + "loss": 1.4491, + "step": 8017 + }, + { + "epoch": 0.8457805907172996, + "grad_norm": 0.6108170747756958, + "learning_rate": 8.803829754089138e-05, + "loss": 1.5334, + "step": 8018 + }, + { + "epoch": 0.845886075949367, + "grad_norm": 0.6015042662620544, + "learning_rate": 8.792031265770475e-05, + "loss": 1.5239, + "step": 8019 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.5398753881454468, + "learning_rate": 8.780240196384873e-05, + "loss": 1.5067, + "step": 8020 + }, + { + "epoch": 0.8460970464135021, + "grad_norm": 0.5588745474815369, + "learning_rate": 8.768456547253556e-05, + "loss": 1.5012, + "step": 8021 + }, + { + "epoch": 0.8462025316455696, + "grad_norm": 0.5813895463943481, + "learning_rate": 8.756680319696945e-05, + "loss": 1.4827, + "step": 8022 + }, + { + "epoch": 0.8463080168776371, + "grad_norm": 0.5807129144668579, + "learning_rate": 8.744911515034623e-05, + "loss": 1.4738, + "step": 8023 + }, + { + "epoch": 0.8464135021097047, + "grad_norm": 0.5415717959403992, + "learning_rate": 8.733150134585338e-05, + "loss": 1.5482, + "step": 8024 + }, + { + "epoch": 0.8465189873417721, + "grad_norm": 0.6271615624427795, + "learning_rate": 8.721396179667019e-05, + "loss": 1.5179, + "step": 8025 + }, + { + "epoch": 0.8466244725738397, + "grad_norm": 0.6112353205680847, + "learning_rate": 8.709649651596752e-05, + "loss": 1.4953, + "step": 8026 + }, + { + "epoch": 0.8467299578059072, + "grad_norm": 0.5447966456413269, + "learning_rate": 8.697910551690802e-05, + "loss": 1.48, + "step": 8027 + }, + { + "epoch": 0.8468354430379746, + "grad_norm": 0.6905320882797241, + "learning_rate": 8.686178881264568e-05, + "loss": 1.4757, + "step": 8028 + }, + { + "epoch": 0.8469409282700422, + "grad_norm": 0.6196342706680298, + "learning_rate": 8.67445464163267e-05, + "loss": 1.524, + "step": 8029 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.6059688329696655, + "learning_rate": 8.662737834108861e-05, + "loss": 1.4955, + "step": 8030 + }, + { + "epoch": 0.8471518987341772, + "grad_norm": 0.5767323970794678, + "learning_rate": 8.651028460006072e-05, + "loss": 1.5252, + "step": 8031 + }, + { + "epoch": 0.8472573839662447, + "grad_norm": 0.6217206120491028, + "learning_rate": 8.639326520636387e-05, + "loss": 1.4728, + "step": 8032 + }, + { + "epoch": 0.8473628691983123, + "grad_norm": 0.6035910248756409, + "learning_rate": 8.627632017311065e-05, + "loss": 1.5272, + "step": 8033 + }, + { + "epoch": 0.8474683544303797, + "grad_norm": 0.6234737634658813, + "learning_rate": 8.615944951340543e-05, + "loss": 1.472, + "step": 8034 + }, + { + "epoch": 0.8475738396624473, + "grad_norm": 0.6168854832649231, + "learning_rate": 8.604265324034405e-05, + "loss": 1.5112, + "step": 8035 + }, + { + "epoch": 0.8476793248945148, + "grad_norm": 0.6463575959205627, + "learning_rate": 8.592593136701404e-05, + "loss": 1.522, + "step": 8036 + }, + { + "epoch": 0.8477848101265822, + "grad_norm": 0.5685198903083801, + "learning_rate": 8.580928390649496e-05, + "loss": 1.5027, + "step": 8037 + }, + { + "epoch": 0.8478902953586498, + "grad_norm": 0.5865472555160522, + "learning_rate": 8.569271087185756e-05, + "loss": 1.5499, + "step": 8038 + }, + { + "epoch": 0.8479957805907173, + "grad_norm": 0.7327468395233154, + "learning_rate": 8.557621227616444e-05, + "loss": 1.538, + "step": 8039 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.5375199317932129, + "learning_rate": 8.545978813246987e-05, + "loss": 1.5164, + "step": 8040 + }, + { + "epoch": 0.8482067510548523, + "grad_norm": 0.6223655939102173, + "learning_rate": 8.53434384538197e-05, + "loss": 1.4933, + "step": 8041 + }, + { + "epoch": 0.8483122362869199, + "grad_norm": 0.5881874561309814, + "learning_rate": 8.522716325325155e-05, + "loss": 1.4559, + "step": 8042 + }, + { + "epoch": 0.8484177215189873, + "grad_norm": 0.5754590034484863, + "learning_rate": 8.51109625437946e-05, + "loss": 1.5331, + "step": 8043 + }, + { + "epoch": 0.8485232067510549, + "grad_norm": 0.5903629064559937, + "learning_rate": 8.499483633846977e-05, + "loss": 1.4947, + "step": 8044 + }, + { + "epoch": 0.8486286919831224, + "grad_norm": 0.5655953884124756, + "learning_rate": 8.48787846502893e-05, + "loss": 1.4868, + "step": 8045 + }, + { + "epoch": 0.8487341772151898, + "grad_norm": 0.5740991830825806, + "learning_rate": 8.476280749225782e-05, + "loss": 1.487, + "step": 8046 + }, + { + "epoch": 0.8488396624472574, + "grad_norm": 0.6947318911552429, + "learning_rate": 8.464690487737098e-05, + "loss": 1.4714, + "step": 8047 + }, + { + "epoch": 0.8489451476793249, + "grad_norm": 0.6066462993621826, + "learning_rate": 8.453107681861616e-05, + "loss": 1.4549, + "step": 8048 + }, + { + "epoch": 0.8490506329113924, + "grad_norm": 0.6086761355400085, + "learning_rate": 8.441532332897248e-05, + "loss": 1.5457, + "step": 8049 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.5871821045875549, + "learning_rate": 8.429964442141072e-05, + "loss": 1.5122, + "step": 8050 + }, + { + "epoch": 0.8492616033755275, + "grad_norm": 0.6853534579277039, + "learning_rate": 8.418404010889336e-05, + "loss": 1.5182, + "step": 8051 + }, + { + "epoch": 0.8493670886075949, + "grad_norm": 0.6286603808403015, + "learning_rate": 8.406851040437426e-05, + "loss": 1.478, + "step": 8052 + }, + { + "epoch": 0.8494725738396625, + "grad_norm": 0.6080175638198853, + "learning_rate": 8.395305532079928e-05, + "loss": 1.5164, + "step": 8053 + }, + { + "epoch": 0.84957805907173, + "grad_norm": 0.5997054576873779, + "learning_rate": 8.383767487110552e-05, + "loss": 1.4974, + "step": 8054 + }, + { + "epoch": 0.8496835443037974, + "grad_norm": 0.6139504909515381, + "learning_rate": 8.372236906822217e-05, + "loss": 1.516, + "step": 8055 + }, + { + "epoch": 0.849789029535865, + "grad_norm": 0.6382527947425842, + "learning_rate": 8.360713792506971e-05, + "loss": 1.4976, + "step": 8056 + }, + { + "epoch": 0.8498945147679325, + "grad_norm": 0.5421963930130005, + "learning_rate": 8.349198145456049e-05, + "loss": 1.5114, + "step": 8057 + }, + { + "epoch": 0.85, + "grad_norm": 0.6397777795791626, + "learning_rate": 8.337689966959819e-05, + "loss": 1.5304, + "step": 8058 + }, + { + "epoch": 0.8501054852320675, + "grad_norm": 0.5725385546684265, + "learning_rate": 8.326189258307832e-05, + "loss": 1.4612, + "step": 8059 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.6015229821205139, + "learning_rate": 8.314696020788806e-05, + "loss": 1.4963, + "step": 8060 + }, + { + "epoch": 0.8503164556962025, + "grad_norm": 0.6386984586715698, + "learning_rate": 8.303210255690622e-05, + "loss": 1.5093, + "step": 8061 + }, + { + "epoch": 0.8504219409282701, + "grad_norm": 0.5597966909408569, + "learning_rate": 8.29173196430029e-05, + "loss": 1.5041, + "step": 8062 + }, + { + "epoch": 0.8505274261603376, + "grad_norm": 0.5750008225440979, + "learning_rate": 8.280261147904039e-05, + "loss": 1.472, + "step": 8063 + }, + { + "epoch": 0.850632911392405, + "grad_norm": 0.6479034423828125, + "learning_rate": 8.268797807787226e-05, + "loss": 1.5104, + "step": 8064 + }, + { + "epoch": 0.8507383966244726, + "grad_norm": 0.604712188243866, + "learning_rate": 8.257341945234365e-05, + "loss": 1.5525, + "step": 8065 + }, + { + "epoch": 0.85084388185654, + "grad_norm": 0.5682463645935059, + "learning_rate": 8.245893561529153e-05, + "loss": 1.4871, + "step": 8066 + }, + { + "epoch": 0.8509493670886076, + "grad_norm": 0.6008539795875549, + "learning_rate": 8.23445265795443e-05, + "loss": 1.4916, + "step": 8067 + }, + { + "epoch": 0.8510548523206751, + "grad_norm": 0.5579414963722229, + "learning_rate": 8.223019235792214e-05, + "loss": 1.5218, + "step": 8068 + }, + { + "epoch": 0.8511603375527426, + "grad_norm": 0.5948139429092407, + "learning_rate": 8.211593296323672e-05, + "loss": 1.5197, + "step": 8069 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.5711910128593445, + "learning_rate": 8.200174840829136e-05, + "loss": 1.5102, + "step": 8070 + }, + { + "epoch": 0.8513713080168777, + "grad_norm": 0.599407970905304, + "learning_rate": 8.188763870588092e-05, + "loss": 1.4709, + "step": 8071 + }, + { + "epoch": 0.8514767932489451, + "grad_norm": 0.5760802626609802, + "learning_rate": 8.177360386879217e-05, + "loss": 1.5076, + "step": 8072 + }, + { + "epoch": 0.8515822784810126, + "grad_norm": 0.6015502214431763, + "learning_rate": 8.165964390980316e-05, + "loss": 1.5086, + "step": 8073 + }, + { + "epoch": 0.8516877637130802, + "grad_norm": 0.6218940019607544, + "learning_rate": 8.15457588416838e-05, + "loss": 1.5166, + "step": 8074 + }, + { + "epoch": 0.8517932489451476, + "grad_norm": 0.6001022458076477, + "learning_rate": 8.143194867719534e-05, + "loss": 1.5138, + "step": 8075 + }, + { + "epoch": 0.8518987341772152, + "grad_norm": 0.6297536492347717, + "learning_rate": 8.131821342909071e-05, + "loss": 1.5549, + "step": 8076 + }, + { + "epoch": 0.8520042194092827, + "grad_norm": 0.6381260752677917, + "learning_rate": 8.120455311011473e-05, + "loss": 1.5127, + "step": 8077 + }, + { + "epoch": 0.8521097046413502, + "grad_norm": 0.6061806678771973, + "learning_rate": 8.109096773300348e-05, + "loss": 1.4728, + "step": 8078 + }, + { + "epoch": 0.8522151898734177, + "grad_norm": 0.5622913837432861, + "learning_rate": 8.097745731048475e-05, + "loss": 1.5091, + "step": 8079 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.6289039850234985, + "learning_rate": 8.08640218552778e-05, + "loss": 1.5273, + "step": 8080 + }, + { + "epoch": 0.8524261603375527, + "grad_norm": 0.620159924030304, + "learning_rate": 8.075066138009396e-05, + "loss": 1.4558, + "step": 8081 + }, + { + "epoch": 0.8525316455696202, + "grad_norm": 0.5750613808631897, + "learning_rate": 8.063737589763573e-05, + "loss": 1.4354, + "step": 8082 + }, + { + "epoch": 0.8526371308016878, + "grad_norm": 0.5786623954772949, + "learning_rate": 8.05241654205973e-05, + "loss": 1.4805, + "step": 8083 + }, + { + "epoch": 0.8527426160337552, + "grad_norm": 0.5790690183639526, + "learning_rate": 8.041102996166442e-05, + "loss": 1.4873, + "step": 8084 + }, + { + "epoch": 0.8528481012658228, + "grad_norm": 0.5695843696594238, + "learning_rate": 8.029796953351445e-05, + "loss": 1.5133, + "step": 8085 + }, + { + "epoch": 0.8529535864978903, + "grad_norm": 0.571151852607727, + "learning_rate": 8.018498414881645e-05, + "loss": 1.5074, + "step": 8086 + }, + { + "epoch": 0.8530590717299578, + "grad_norm": 0.5804629325866699, + "learning_rate": 8.007207382023102e-05, + "loss": 1.4918, + "step": 8087 + }, + { + "epoch": 0.8531645569620253, + "grad_norm": 0.6484772562980652, + "learning_rate": 7.995923856041013e-05, + "loss": 1.5231, + "step": 8088 + }, + { + "epoch": 0.8532700421940929, + "grad_norm": 0.5453590154647827, + "learning_rate": 7.984647838199773e-05, + "loss": 1.5466, + "step": 8089 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.5357676148414612, + "learning_rate": 7.973379329762925e-05, + "loss": 1.5307, + "step": 8090 + }, + { + "epoch": 0.8534810126582278, + "grad_norm": 0.5550597906112671, + "learning_rate": 7.96211833199314e-05, + "loss": 1.4967, + "step": 8091 + }, + { + "epoch": 0.8535864978902954, + "grad_norm": 0.5851867198944092, + "learning_rate": 7.950864846152284e-05, + "loss": 1.5032, + "step": 8092 + }, + { + "epoch": 0.8536919831223628, + "grad_norm": 0.5693808794021606, + "learning_rate": 7.939618873501356e-05, + "loss": 1.4973, + "step": 8093 + }, + { + "epoch": 0.8537974683544304, + "grad_norm": 0.5571208000183105, + "learning_rate": 7.928380415300523e-05, + "loss": 1.4903, + "step": 8094 + }, + { + "epoch": 0.8539029535864979, + "grad_norm": 0.5842289328575134, + "learning_rate": 7.917149472809113e-05, + "loss": 1.4511, + "step": 8095 + }, + { + "epoch": 0.8540084388185654, + "grad_norm": 0.5935985445976257, + "learning_rate": 7.905926047285616e-05, + "loss": 1.48, + "step": 8096 + }, + { + "epoch": 0.8541139240506329, + "grad_norm": 0.6079577207565308, + "learning_rate": 7.894710139987645e-05, + "loss": 1.5096, + "step": 8097 + }, + { + "epoch": 0.8542194092827005, + "grad_norm": 0.67818284034729, + "learning_rate": 7.883501752172038e-05, + "loss": 1.4804, + "step": 8098 + }, + { + "epoch": 0.8543248945147679, + "grad_norm": 0.5754499435424805, + "learning_rate": 7.872300885094736e-05, + "loss": 1.4938, + "step": 8099 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.5814951062202454, + "learning_rate": 7.861107540010845e-05, + "loss": 1.5051, + "step": 8100 + }, + { + "epoch": 0.854535864978903, + "grad_norm": 0.6946840286254883, + "learning_rate": 7.849921718174638e-05, + "loss": 1.5138, + "step": 8101 + }, + { + "epoch": 0.8546413502109704, + "grad_norm": 0.6337461471557617, + "learning_rate": 7.838743420839544e-05, + "loss": 1.5502, + "step": 8102 + }, + { + "epoch": 0.854746835443038, + "grad_norm": 0.6065704226493835, + "learning_rate": 7.827572649258147e-05, + "loss": 1.4969, + "step": 8103 + }, + { + "epoch": 0.8548523206751055, + "grad_norm": 0.5704129338264465, + "learning_rate": 7.816409404682185e-05, + "loss": 1.5339, + "step": 8104 + }, + { + "epoch": 0.854957805907173, + "grad_norm": 0.5573807954788208, + "learning_rate": 7.805253688362557e-05, + "loss": 1.5128, + "step": 8105 + }, + { + "epoch": 0.8550632911392405, + "grad_norm": 0.5698369741439819, + "learning_rate": 7.794105501549306e-05, + "loss": 1.4975, + "step": 8106 + }, + { + "epoch": 0.8551687763713081, + "grad_norm": 0.6002579927444458, + "learning_rate": 7.782964845491666e-05, + "loss": 1.4876, + "step": 8107 + }, + { + "epoch": 0.8552742616033755, + "grad_norm": 0.6897786259651184, + "learning_rate": 7.771831721437989e-05, + "loss": 1.4656, + "step": 8108 + }, + { + "epoch": 0.855379746835443, + "grad_norm": 0.5853578448295593, + "learning_rate": 7.760706130635792e-05, + "loss": 1.5108, + "step": 8109 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.5733108520507812, + "learning_rate": 7.749588074331762e-05, + "loss": 1.5392, + "step": 8110 + }, + { + "epoch": 0.855590717299578, + "grad_norm": 0.6183744072914124, + "learning_rate": 7.738477553771727e-05, + "loss": 1.5148, + "step": 8111 + }, + { + "epoch": 0.8556962025316456, + "grad_norm": 0.6681329607963562, + "learning_rate": 7.727374570200685e-05, + "loss": 1.5283, + "step": 8112 + }, + { + "epoch": 0.8558016877637131, + "grad_norm": 0.7075000405311584, + "learning_rate": 7.716279124862771e-05, + "loss": 1.4575, + "step": 8113 + }, + { + "epoch": 0.8559071729957806, + "grad_norm": 0.5603424906730652, + "learning_rate": 7.705191219001267e-05, + "loss": 1.5354, + "step": 8114 + }, + { + "epoch": 0.8560126582278481, + "grad_norm": 0.5514984130859375, + "learning_rate": 7.694110853858671e-05, + "loss": 1.506, + "step": 8115 + }, + { + "epoch": 0.8561181434599157, + "grad_norm": 0.5358585119247437, + "learning_rate": 7.683038030676573e-05, + "loss": 1.5027, + "step": 8116 + }, + { + "epoch": 0.8562236286919831, + "grad_norm": 0.7014994621276855, + "learning_rate": 7.67197275069573e-05, + "loss": 1.5253, + "step": 8117 + }, + { + "epoch": 0.8563291139240506, + "grad_norm": 0.5662782788276672, + "learning_rate": 7.660915015156067e-05, + "loss": 1.4733, + "step": 8118 + }, + { + "epoch": 0.8564345991561182, + "grad_norm": 0.5920583009719849, + "learning_rate": 7.649864825296669e-05, + "loss": 1.4828, + "step": 8119 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.5543338060379028, + "learning_rate": 7.63882218235575e-05, + "loss": 1.4656, + "step": 8120 + }, + { + "epoch": 0.8566455696202532, + "grad_norm": 0.5551273226737976, + "learning_rate": 7.627787087570692e-05, + "loss": 1.4941, + "step": 8121 + }, + { + "epoch": 0.8567510548523207, + "grad_norm": 0.5661372542381287, + "learning_rate": 7.616759542178045e-05, + "loss": 1.4926, + "step": 8122 + }, + { + "epoch": 0.8568565400843882, + "grad_norm": 0.5567362308502197, + "learning_rate": 7.605739547413487e-05, + "loss": 1.4933, + "step": 8123 + }, + { + "epoch": 0.8569620253164557, + "grad_norm": 0.6067094802856445, + "learning_rate": 7.594727104511873e-05, + "loss": 1.4903, + "step": 8124 + }, + { + "epoch": 0.8570675105485233, + "grad_norm": 0.5747743844985962, + "learning_rate": 7.583722214707206e-05, + "loss": 1.5034, + "step": 8125 + }, + { + "epoch": 0.8571729957805907, + "grad_norm": 0.5473068952560425, + "learning_rate": 7.572724879232634e-05, + "loss": 1.5006, + "step": 8126 + }, + { + "epoch": 0.8572784810126582, + "grad_norm": 0.5483472943305969, + "learning_rate": 7.561735099320463e-05, + "loss": 1.5025, + "step": 8127 + }, + { + "epoch": 0.8573839662447258, + "grad_norm": 0.5528035759925842, + "learning_rate": 7.55075287620215e-05, + "loss": 1.5147, + "step": 8128 + }, + { + "epoch": 0.8574894514767932, + "grad_norm": 0.6310689449310303, + "learning_rate": 7.539778211108309e-05, + "loss": 1.542, + "step": 8129 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.5941850543022156, + "learning_rate": 7.528811105268699e-05, + "loss": 1.4721, + "step": 8130 + }, + { + "epoch": 0.8577004219409282, + "grad_norm": 0.557431697845459, + "learning_rate": 7.517851559912254e-05, + "loss": 1.4677, + "step": 8131 + }, + { + "epoch": 0.8578059071729958, + "grad_norm": 0.5676758885383606, + "learning_rate": 7.506899576267023e-05, + "loss": 1.5022, + "step": 8132 + }, + { + "epoch": 0.8579113924050633, + "grad_norm": 0.6044456958770752, + "learning_rate": 7.495955155560261e-05, + "loss": 1.5126, + "step": 8133 + }, + { + "epoch": 0.8580168776371307, + "grad_norm": 0.5760323405265808, + "learning_rate": 7.485018299018326e-05, + "loss": 1.4845, + "step": 8134 + }, + { + "epoch": 0.8581223628691983, + "grad_norm": 0.5637674331665039, + "learning_rate": 7.474089007866756e-05, + "loss": 1.5066, + "step": 8135 + }, + { + "epoch": 0.8582278481012658, + "grad_norm": 0.5582444667816162, + "learning_rate": 7.463167283330227e-05, + "loss": 1.5156, + "step": 8136 + }, + { + "epoch": 0.8583333333333333, + "grad_norm": 0.6163241863250732, + "learning_rate": 7.452253126632564e-05, + "loss": 1.4964, + "step": 8137 + }, + { + "epoch": 0.8584388185654008, + "grad_norm": 0.5607191920280457, + "learning_rate": 7.441346538996769e-05, + "loss": 1.5092, + "step": 8138 + }, + { + "epoch": 0.8585443037974684, + "grad_norm": 0.5879098773002625, + "learning_rate": 7.430447521644973e-05, + "loss": 1.5057, + "step": 8139 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.5624547004699707, + "learning_rate": 7.41955607579845e-05, + "loss": 1.5005, + "step": 8140 + }, + { + "epoch": 0.8587552742616034, + "grad_norm": 0.5225486159324646, + "learning_rate": 7.408672202677666e-05, + "loss": 1.4581, + "step": 8141 + }, + { + "epoch": 0.8588607594936709, + "grad_norm": 0.5805643200874329, + "learning_rate": 7.397795903502202e-05, + "loss": 1.4921, + "step": 8142 + }, + { + "epoch": 0.8589662447257383, + "grad_norm": 0.5339337587356567, + "learning_rate": 7.386927179490801e-05, + "loss": 1.5093, + "step": 8143 + }, + { + "epoch": 0.8590717299578059, + "grad_norm": 0.5522460341453552, + "learning_rate": 7.376066031861364e-05, + "loss": 1.4801, + "step": 8144 + }, + { + "epoch": 0.8591772151898734, + "grad_norm": 0.5728732347488403, + "learning_rate": 7.365212461830933e-05, + "loss": 1.4767, + "step": 8145 + }, + { + "epoch": 0.8592827004219409, + "grad_norm": 0.5492302179336548, + "learning_rate": 7.354366470615695e-05, + "loss": 1.5348, + "step": 8146 + }, + { + "epoch": 0.8593881856540084, + "grad_norm": 0.5750755667686462, + "learning_rate": 7.343528059431009e-05, + "loss": 1.4892, + "step": 8147 + }, + { + "epoch": 0.859493670886076, + "grad_norm": 0.5797236561775208, + "learning_rate": 7.332697229491373e-05, + "loss": 1.4976, + "step": 8148 + }, + { + "epoch": 0.8595991561181434, + "grad_norm": 0.5592548847198486, + "learning_rate": 7.321873982010422e-05, + "loss": 1.4729, + "step": 8149 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.5690310597419739, + "learning_rate": 7.311058318200969e-05, + "loss": 1.5357, + "step": 8150 + }, + { + "epoch": 0.8598101265822785, + "grad_norm": 0.5424529910087585, + "learning_rate": 7.300250239274964e-05, + "loss": 1.4951, + "step": 8151 + }, + { + "epoch": 0.859915611814346, + "grad_norm": 0.5524740815162659, + "learning_rate": 7.289449746443494e-05, + "loss": 1.4883, + "step": 8152 + }, + { + "epoch": 0.8600210970464135, + "grad_norm": 0.5657305717468262, + "learning_rate": 7.278656840916825e-05, + "loss": 1.5423, + "step": 8153 + }, + { + "epoch": 0.860126582278481, + "grad_norm": 0.5981988906860352, + "learning_rate": 7.26787152390434e-05, + "loss": 1.5235, + "step": 8154 + }, + { + "epoch": 0.8602320675105485, + "grad_norm": 0.5675592422485352, + "learning_rate": 7.257093796614597e-05, + "loss": 1.5208, + "step": 8155 + }, + { + "epoch": 0.860337552742616, + "grad_norm": 0.6195733547210693, + "learning_rate": 7.246323660255289e-05, + "loss": 1.5303, + "step": 8156 + }, + { + "epoch": 0.8604430379746836, + "grad_norm": 0.5200232267379761, + "learning_rate": 7.235561116033265e-05, + "loss": 1.4877, + "step": 8157 + }, + { + "epoch": 0.860548523206751, + "grad_norm": 0.5667166113853455, + "learning_rate": 7.224806165154504e-05, + "loss": 1.4994, + "step": 8158 + }, + { + "epoch": 0.8606540084388186, + "grad_norm": 0.5696017146110535, + "learning_rate": 7.214058808824192e-05, + "loss": 1.4714, + "step": 8159 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.5752085447311401, + "learning_rate": 7.203319048246599e-05, + "loss": 1.5055, + "step": 8160 + }, + { + "epoch": 0.8608649789029535, + "grad_norm": 0.5830256342887878, + "learning_rate": 7.192586884625169e-05, + "loss": 1.4675, + "step": 8161 + }, + { + "epoch": 0.8609704641350211, + "grad_norm": 0.5529128909111023, + "learning_rate": 7.1818623191625e-05, + "loss": 1.5195, + "step": 8162 + }, + { + "epoch": 0.8610759493670886, + "grad_norm": 0.5389652848243713, + "learning_rate": 7.17114535306033e-05, + "loss": 1.5174, + "step": 8163 + }, + { + "epoch": 0.8611814345991561, + "grad_norm": 0.5762060880661011, + "learning_rate": 7.16043598751954e-05, + "loss": 1.4717, + "step": 8164 + }, + { + "epoch": 0.8612869198312236, + "grad_norm": 0.5612945556640625, + "learning_rate": 7.149734223740187e-05, + "loss": 1.4958, + "step": 8165 + }, + { + "epoch": 0.8613924050632912, + "grad_norm": 0.5756213665008545, + "learning_rate": 7.139040062921428e-05, + "loss": 1.5337, + "step": 8166 + }, + { + "epoch": 0.8614978902953586, + "grad_norm": 0.5768166780471802, + "learning_rate": 7.128353506261631e-05, + "loss": 1.5033, + "step": 8167 + }, + { + "epoch": 0.8616033755274262, + "grad_norm": 0.5462666749954224, + "learning_rate": 7.117674554958253e-05, + "loss": 1.5128, + "step": 8168 + }, + { + "epoch": 0.8617088607594937, + "grad_norm": 0.5807523131370544, + "learning_rate": 7.107003210207947e-05, + "loss": 1.4775, + "step": 8169 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.6119133830070496, + "learning_rate": 7.096339473206471e-05, + "loss": 1.4707, + "step": 8170 + }, + { + "epoch": 0.8619198312236287, + "grad_norm": 0.5752464532852173, + "learning_rate": 7.085683345148753e-05, + "loss": 1.5099, + "step": 8171 + }, + { + "epoch": 0.8620253164556962, + "grad_norm": 0.570197582244873, + "learning_rate": 7.075034827228862e-05, + "loss": 1.4989, + "step": 8172 + }, + { + "epoch": 0.8621308016877637, + "grad_norm": 0.565434455871582, + "learning_rate": 7.064393920640031e-05, + "loss": 1.543, + "step": 8173 + }, + { + "epoch": 0.8622362869198312, + "grad_norm": 0.5585256218910217, + "learning_rate": 7.053760626574618e-05, + "loss": 1.5012, + "step": 8174 + }, + { + "epoch": 0.8623417721518988, + "grad_norm": 0.596204400062561, + "learning_rate": 7.043134946224123e-05, + "loss": 1.4629, + "step": 8175 + }, + { + "epoch": 0.8624472573839662, + "grad_norm": 0.5810979008674622, + "learning_rate": 7.032516880779233e-05, + "loss": 1.502, + "step": 8176 + }, + { + "epoch": 0.8625527426160338, + "grad_norm": 0.6271765232086182, + "learning_rate": 7.021906431429747e-05, + "loss": 1.5209, + "step": 8177 + }, + { + "epoch": 0.8626582278481013, + "grad_norm": 0.6019001007080078, + "learning_rate": 7.011303599364608e-05, + "loss": 1.4893, + "step": 8178 + }, + { + "epoch": 0.8627637130801687, + "grad_norm": 0.6055362820625305, + "learning_rate": 7.000708385771928e-05, + "loss": 1.4915, + "step": 8179 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.6181427836418152, + "learning_rate": 6.990120791838953e-05, + "loss": 1.5465, + "step": 8180 + }, + { + "epoch": 0.8629746835443038, + "grad_norm": 0.6091094017028809, + "learning_rate": 6.979540818752064e-05, + "loss": 1.4938, + "step": 8181 + }, + { + "epoch": 0.8630801687763713, + "grad_norm": 0.5602231621742249, + "learning_rate": 6.968968467696806e-05, + "loss": 1.5009, + "step": 8182 + }, + { + "epoch": 0.8631856540084388, + "grad_norm": 0.5710597038269043, + "learning_rate": 6.958403739857866e-05, + "loss": 1.4902, + "step": 8183 + }, + { + "epoch": 0.8632911392405064, + "grad_norm": 0.5908118486404419, + "learning_rate": 6.947846636419061e-05, + "loss": 1.5186, + "step": 8184 + }, + { + "epoch": 0.8633966244725738, + "grad_norm": 0.6123397946357727, + "learning_rate": 6.937297158563389e-05, + "loss": 1.5009, + "step": 8185 + }, + { + "epoch": 0.8635021097046414, + "grad_norm": 0.5811074376106262, + "learning_rate": 6.926755307472968e-05, + "loss": 1.5066, + "step": 8186 + }, + { + "epoch": 0.8636075949367089, + "grad_norm": 0.6684236526489258, + "learning_rate": 6.916221084329055e-05, + "loss": 1.4906, + "step": 8187 + }, + { + "epoch": 0.8637130801687763, + "grad_norm": 0.6158905029296875, + "learning_rate": 6.905694490312064e-05, + "loss": 1.4818, + "step": 8188 + }, + { + "epoch": 0.8638185654008439, + "grad_norm": 0.5804924964904785, + "learning_rate": 6.89517552660156e-05, + "loss": 1.5114, + "step": 8189 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.6438694000244141, + "learning_rate": 6.884664194376233e-05, + "loss": 1.4932, + "step": 8190 + }, + { + "epoch": 0.8640295358649789, + "grad_norm": 0.6328652501106262, + "learning_rate": 6.874160494813942e-05, + "loss": 1.5026, + "step": 8191 + }, + { + "epoch": 0.8641350210970464, + "grad_norm": 0.5981042385101318, + "learning_rate": 6.86366442909166e-05, + "loss": 1.483, + "step": 8192 + }, + { + "epoch": 0.864240506329114, + "grad_norm": 0.5791234374046326, + "learning_rate": 6.853175998385547e-05, + "loss": 1.4862, + "step": 8193 + }, + { + "epoch": 0.8643459915611814, + "grad_norm": 0.6016628742218018, + "learning_rate": 6.842695203870872e-05, + "loss": 1.5309, + "step": 8194 + }, + { + "epoch": 0.864451476793249, + "grad_norm": 0.6295387148857117, + "learning_rate": 6.832222046722069e-05, + "loss": 1.485, + "step": 8195 + }, + { + "epoch": 0.8645569620253165, + "grad_norm": 0.5448543429374695, + "learning_rate": 6.821756528112693e-05, + "loss": 1.5426, + "step": 8196 + }, + { + "epoch": 0.864662447257384, + "grad_norm": 0.5953505635261536, + "learning_rate": 6.811298649215472e-05, + "loss": 1.5201, + "step": 8197 + }, + { + "epoch": 0.8647679324894515, + "grad_norm": 0.5584825277328491, + "learning_rate": 6.80084841120226e-05, + "loss": 1.5048, + "step": 8198 + }, + { + "epoch": 0.8648734177215189, + "grad_norm": 0.5763803124427795, + "learning_rate": 6.790405815244044e-05, + "loss": 1.534, + "step": 8199 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.568311870098114, + "learning_rate": 6.779970862510989e-05, + "loss": 1.4905, + "step": 8200 + }, + { + "epoch": 0.865084388185654, + "grad_norm": 0.5967497825622559, + "learning_rate": 6.769543554172361e-05, + "loss": 1.5009, + "step": 8201 + }, + { + "epoch": 0.8651898734177215, + "grad_norm": 0.5971418619155884, + "learning_rate": 6.759123891396615e-05, + "loss": 1.5401, + "step": 8202 + }, + { + "epoch": 0.865295358649789, + "grad_norm": 0.5445838570594788, + "learning_rate": 6.748711875351318e-05, + "loss": 1.4886, + "step": 8203 + }, + { + "epoch": 0.8654008438818566, + "grad_norm": 0.5844756364822388, + "learning_rate": 6.738307507203187e-05, + "loss": 1.4774, + "step": 8204 + }, + { + "epoch": 0.865506329113924, + "grad_norm": 0.5821036696434021, + "learning_rate": 6.72791078811808e-05, + "loss": 1.501, + "step": 8205 + }, + { + "epoch": 0.8656118143459915, + "grad_norm": 0.6080436110496521, + "learning_rate": 6.717521719261016e-05, + "loss": 1.4998, + "step": 8206 + }, + { + "epoch": 0.8657172995780591, + "grad_norm": 0.7188351154327393, + "learning_rate": 6.707140301796122e-05, + "loss": 1.5209, + "step": 8207 + }, + { + "epoch": 0.8658227848101265, + "grad_norm": 0.5667679905891418, + "learning_rate": 6.696766536886692e-05, + "loss": 1.5242, + "step": 8208 + }, + { + "epoch": 0.8659282700421941, + "grad_norm": 0.5339351892471313, + "learning_rate": 6.686400425695171e-05, + "loss": 1.4686, + "step": 8209 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.5978707075119019, + "learning_rate": 6.676041969383107e-05, + "loss": 1.4829, + "step": 8210 + }, + { + "epoch": 0.8661392405063291, + "grad_norm": 0.5771703124046326, + "learning_rate": 6.665691169111244e-05, + "loss": 1.5039, + "step": 8211 + }, + { + "epoch": 0.8662447257383966, + "grad_norm": 0.5934908986091614, + "learning_rate": 6.655348026039437e-05, + "loss": 1.4857, + "step": 8212 + }, + { + "epoch": 0.8663502109704642, + "grad_norm": 0.5956532955169678, + "learning_rate": 6.645012541326678e-05, + "loss": 1.5007, + "step": 8213 + }, + { + "epoch": 0.8664556962025316, + "grad_norm": 0.564696192741394, + "learning_rate": 6.634684716131114e-05, + "loss": 1.5098, + "step": 8214 + }, + { + "epoch": 0.8665611814345991, + "grad_norm": 0.5815297961235046, + "learning_rate": 6.62436455161003e-05, + "loss": 1.5243, + "step": 8215 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 0.5692692399024963, + "learning_rate": 6.614052048919847e-05, + "loss": 1.4955, + "step": 8216 + }, + { + "epoch": 0.8667721518987341, + "grad_norm": 0.7139742970466614, + "learning_rate": 6.603747209216135e-05, + "loss": 1.5252, + "step": 8217 + }, + { + "epoch": 0.8668776371308017, + "grad_norm": 0.5779005289077759, + "learning_rate": 6.593450033653586e-05, + "loss": 1.4856, + "step": 8218 + }, + { + "epoch": 0.8669831223628692, + "grad_norm": 0.6004500985145569, + "learning_rate": 6.583160523386086e-05, + "loss": 1.5237, + "step": 8219 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.6468473076820374, + "learning_rate": 6.572878679566605e-05, + "loss": 1.5338, + "step": 8220 + }, + { + "epoch": 0.8671940928270042, + "grad_norm": 0.560039758682251, + "learning_rate": 6.562604503347277e-05, + "loss": 1.4691, + "step": 8221 + }, + { + "epoch": 0.8672995780590718, + "grad_norm": 0.564639687538147, + "learning_rate": 6.552337995879368e-05, + "loss": 1.4895, + "step": 8222 + }, + { + "epoch": 0.8674050632911392, + "grad_norm": 0.5605901479721069, + "learning_rate": 6.542079158313305e-05, + "loss": 1.5158, + "step": 8223 + }, + { + "epoch": 0.8675105485232067, + "grad_norm": 0.5434699058532715, + "learning_rate": 6.531827991798628e-05, + "loss": 1.509, + "step": 8224 + }, + { + "epoch": 0.8676160337552743, + "grad_norm": 0.5790572762489319, + "learning_rate": 6.521584497484043e-05, + "loss": 1.5342, + "step": 8225 + }, + { + "epoch": 0.8677215189873417, + "grad_norm": 0.5921952724456787, + "learning_rate": 6.511348676517373e-05, + "loss": 1.5053, + "step": 8226 + }, + { + "epoch": 0.8678270042194093, + "grad_norm": 0.5630998611450195, + "learning_rate": 6.501120530045593e-05, + "loss": 1.4716, + "step": 8227 + }, + { + "epoch": 0.8679324894514768, + "grad_norm": 0.5524029731750488, + "learning_rate": 6.490900059214836e-05, + "loss": 1.5262, + "step": 8228 + }, + { + "epoch": 0.8680379746835443, + "grad_norm": 0.5641048550605774, + "learning_rate": 6.480687265170342e-05, + "loss": 1.4933, + "step": 8229 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.6159922480583191, + "learning_rate": 6.470482149056509e-05, + "loss": 1.5242, + "step": 8230 + }, + { + "epoch": 0.8682489451476794, + "grad_norm": 0.5848441123962402, + "learning_rate": 6.460284712016868e-05, + "loss": 1.5098, + "step": 8231 + }, + { + "epoch": 0.8683544303797468, + "grad_norm": 0.5667997598648071, + "learning_rate": 6.450094955194096e-05, + "loss": 1.4704, + "step": 8232 + }, + { + "epoch": 0.8684599156118143, + "grad_norm": 0.5730487108230591, + "learning_rate": 6.439912879730009e-05, + "loss": 1.5224, + "step": 8233 + }, + { + "epoch": 0.8685654008438819, + "grad_norm": 0.5675647258758545, + "learning_rate": 6.429738486765548e-05, + "loss": 1.514, + "step": 8234 + }, + { + "epoch": 0.8686708860759493, + "grad_norm": 0.532840371131897, + "learning_rate": 6.419571777440814e-05, + "loss": 1.4917, + "step": 8235 + }, + { + "epoch": 0.8687763713080169, + "grad_norm": 0.5892089009284973, + "learning_rate": 6.409412752895041e-05, + "loss": 1.503, + "step": 8236 + }, + { + "epoch": 0.8688818565400844, + "grad_norm": 0.5622027516365051, + "learning_rate": 6.399261414266571e-05, + "loss": 1.5215, + "step": 8237 + }, + { + "epoch": 0.8689873417721519, + "grad_norm": 0.6224781274795532, + "learning_rate": 6.389117762692952e-05, + "loss": 1.5186, + "step": 8238 + }, + { + "epoch": 0.8690928270042194, + "grad_norm": 0.6003425121307373, + "learning_rate": 6.37898179931081e-05, + "loss": 1.4589, + "step": 8239 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.5529787540435791, + "learning_rate": 6.368853525255942e-05, + "loss": 1.4983, + "step": 8240 + }, + { + "epoch": 0.8693037974683544, + "grad_norm": 0.5537022352218628, + "learning_rate": 6.358732941663248e-05, + "loss": 1.5175, + "step": 8241 + }, + { + "epoch": 0.869409282700422, + "grad_norm": 0.5625491142272949, + "learning_rate": 6.348620049666815e-05, + "loss": 1.4961, + "step": 8242 + }, + { + "epoch": 0.8695147679324895, + "grad_norm": 0.5574873685836792, + "learning_rate": 6.338514850399826e-05, + "loss": 1.531, + "step": 8243 + }, + { + "epoch": 0.8696202531645569, + "grad_norm": 0.6418902277946472, + "learning_rate": 6.328417344994627e-05, + "loss": 1.5092, + "step": 8244 + }, + { + "epoch": 0.8697257383966245, + "grad_norm": 0.5780499577522278, + "learning_rate": 6.318327534582688e-05, + "loss": 1.4906, + "step": 8245 + }, + { + "epoch": 0.869831223628692, + "grad_norm": 0.5254501104354858, + "learning_rate": 6.308245420294636e-05, + "loss": 1.5159, + "step": 8246 + }, + { + "epoch": 0.8699367088607595, + "grad_norm": 0.6186427474021912, + "learning_rate": 6.298171003260194e-05, + "loss": 1.5337, + "step": 8247 + }, + { + "epoch": 0.870042194092827, + "grad_norm": 0.620522141456604, + "learning_rate": 6.288104284608284e-05, + "loss": 1.5438, + "step": 8248 + }, + { + "epoch": 0.8701476793248946, + "grad_norm": 0.5866483449935913, + "learning_rate": 6.278045265466911e-05, + "loss": 1.5155, + "step": 8249 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.5686328411102295, + "learning_rate": 6.267993946963249e-05, + "loss": 1.5237, + "step": 8250 + }, + { + "epoch": 0.8703586497890295, + "grad_norm": 0.5549601912498474, + "learning_rate": 6.257950330223597e-05, + "loss": 1.5338, + "step": 8251 + }, + { + "epoch": 0.8704641350210971, + "grad_norm": 0.5516408085823059, + "learning_rate": 6.247914416373387e-05, + "loss": 1.4812, + "step": 8252 + }, + { + "epoch": 0.8705696202531645, + "grad_norm": 0.6471725106239319, + "learning_rate": 6.237886206537197e-05, + "loss": 1.4939, + "step": 8253 + }, + { + "epoch": 0.8706751054852321, + "grad_norm": 0.6114680171012878, + "learning_rate": 6.227865701838733e-05, + "loss": 1.4973, + "step": 8254 + }, + { + "epoch": 0.8707805907172996, + "grad_norm": 0.5675532817840576, + "learning_rate": 6.217852903400841e-05, + "loss": 1.5625, + "step": 8255 + }, + { + "epoch": 0.8708860759493671, + "grad_norm": 0.5497972369194031, + "learning_rate": 6.207847812345524e-05, + "loss": 1.5184, + "step": 8256 + }, + { + "epoch": 0.8709915611814346, + "grad_norm": 0.6498136520385742, + "learning_rate": 6.197850429793866e-05, + "loss": 1.5171, + "step": 8257 + }, + { + "epoch": 0.8710970464135022, + "grad_norm": 0.6800841689109802, + "learning_rate": 6.187860756866157e-05, + "loss": 1.482, + "step": 8258 + }, + { + "epoch": 0.8712025316455696, + "grad_norm": 0.6161894798278809, + "learning_rate": 6.177878794681782e-05, + "loss": 1.5058, + "step": 8259 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.5688996315002441, + "learning_rate": 6.167904544359265e-05, + "loss": 1.4814, + "step": 8260 + }, + { + "epoch": 0.8714135021097047, + "grad_norm": 0.6291729807853699, + "learning_rate": 6.157938007016279e-05, + "loss": 1.4991, + "step": 8261 + }, + { + "epoch": 0.8715189873417721, + "grad_norm": 0.7544305324554443, + "learning_rate": 6.147979183769602e-05, + "loss": 1.4896, + "step": 8262 + }, + { + "epoch": 0.8716244725738397, + "grad_norm": 0.6048492193222046, + "learning_rate": 6.138028075735196e-05, + "loss": 1.5192, + "step": 8263 + }, + { + "epoch": 0.8717299578059071, + "grad_norm": 0.5341664552688599, + "learning_rate": 6.128084684028118e-05, + "loss": 1.4791, + "step": 8264 + }, + { + "epoch": 0.8718354430379747, + "grad_norm": 0.5337679386138916, + "learning_rate": 6.118149009762574e-05, + "loss": 1.4823, + "step": 8265 + }, + { + "epoch": 0.8719409282700422, + "grad_norm": 0.583838939666748, + "learning_rate": 6.108221054051902e-05, + "loss": 1.5099, + "step": 8266 + }, + { + "epoch": 0.8720464135021097, + "grad_norm": 0.6280133724212646, + "learning_rate": 6.0983008180086005e-05, + "loss": 1.5061, + "step": 8267 + }, + { + "epoch": 0.8721518987341772, + "grad_norm": 0.6117594838142395, + "learning_rate": 6.088388302744266e-05, + "loss": 1.481, + "step": 8268 + }, + { + "epoch": 0.8722573839662447, + "grad_norm": 0.5774719715118408, + "learning_rate": 6.078483509369642e-05, + "loss": 1.5254, + "step": 8269 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.5737250447273254, + "learning_rate": 6.068586438994617e-05, + "loss": 1.5331, + "step": 8270 + }, + { + "epoch": 0.8724683544303797, + "grad_norm": 0.5592074394226074, + "learning_rate": 6.058697092728202e-05, + "loss": 1.5368, + "step": 8271 + }, + { + "epoch": 0.8725738396624473, + "grad_norm": 0.5760943293571472, + "learning_rate": 6.048815471678554e-05, + "loss": 1.4733, + "step": 8272 + }, + { + "epoch": 0.8726793248945147, + "grad_norm": 0.5513484477996826, + "learning_rate": 6.038941576952952e-05, + "loss": 1.4832, + "step": 8273 + }, + { + "epoch": 0.8727848101265823, + "grad_norm": 0.5722225904464722, + "learning_rate": 6.029075409657822e-05, + "loss": 1.4881, + "step": 8274 + }, + { + "epoch": 0.8728902953586498, + "grad_norm": 0.5975533723831177, + "learning_rate": 6.0192169708987026e-05, + "loss": 1.4686, + "step": 8275 + }, + { + "epoch": 0.8729957805907173, + "grad_norm": 0.6132709383964539, + "learning_rate": 6.009366261780286e-05, + "loss": 1.5052, + "step": 8276 + }, + { + "epoch": 0.8731012658227848, + "grad_norm": 0.5888716578483582, + "learning_rate": 5.999523283406405e-05, + "loss": 1.4843, + "step": 8277 + }, + { + "epoch": 0.8732067510548523, + "grad_norm": 0.5966176986694336, + "learning_rate": 5.9896880368800115e-05, + "loss": 1.4682, + "step": 8278 + }, + { + "epoch": 0.8733122362869198, + "grad_norm": 0.5959044098854065, + "learning_rate": 5.9798605233031904e-05, + "loss": 1.5011, + "step": 8279 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.5827769637107849, + "learning_rate": 5.970040743777161e-05, + "loss": 1.4604, + "step": 8280 + }, + { + "epoch": 0.8735232067510549, + "grad_norm": 0.5658276677131653, + "learning_rate": 5.960228699402284e-05, + "loss": 1.4879, + "step": 8281 + }, + { + "epoch": 0.8736286919831223, + "grad_norm": 0.5652166604995728, + "learning_rate": 5.9504243912780474e-05, + "loss": 1.511, + "step": 8282 + }, + { + "epoch": 0.8737341772151899, + "grad_norm": 0.589472234249115, + "learning_rate": 5.940627820503064e-05, + "loss": 1.5174, + "step": 8283 + }, + { + "epoch": 0.8738396624472574, + "grad_norm": 0.5868620276451111, + "learning_rate": 5.930838988175097e-05, + "loss": 1.4667, + "step": 8284 + }, + { + "epoch": 0.8739451476793249, + "grad_norm": 0.5977030396461487, + "learning_rate": 5.921057895391027e-05, + "loss": 1.4664, + "step": 8285 + }, + { + "epoch": 0.8740506329113924, + "grad_norm": 0.6461166143417358, + "learning_rate": 5.91128454324687e-05, + "loss": 1.4788, + "step": 8286 + }, + { + "epoch": 0.87415611814346, + "grad_norm": 0.5658218264579773, + "learning_rate": 5.901518932837799e-05, + "loss": 1.5129, + "step": 8287 + }, + { + "epoch": 0.8742616033755274, + "grad_norm": 0.5251671075820923, + "learning_rate": 5.891761065258089e-05, + "loss": 1.5071, + "step": 8288 + }, + { + "epoch": 0.8743670886075949, + "grad_norm": 0.5254503488540649, + "learning_rate": 5.8820109416011485e-05, + "loss": 1.5007, + "step": 8289 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.5413451194763184, + "learning_rate": 5.8722685629595454e-05, + "loss": 1.4476, + "step": 8290 + }, + { + "epoch": 0.8745780590717299, + "grad_norm": 0.6089202761650085, + "learning_rate": 5.862533930424949e-05, + "loss": 1.5127, + "step": 8291 + }, + { + "epoch": 0.8746835443037975, + "grad_norm": 0.5986435413360596, + "learning_rate": 5.852807045088177e-05, + "loss": 1.532, + "step": 8292 + }, + { + "epoch": 0.874789029535865, + "grad_norm": 0.577689528465271, + "learning_rate": 5.843087908039166e-05, + "loss": 1.4743, + "step": 8293 + }, + { + "epoch": 0.8748945147679325, + "grad_norm": 0.5689354538917542, + "learning_rate": 5.833376520367012e-05, + "loss": 1.5166, + "step": 8294 + }, + { + "epoch": 0.875, + "grad_norm": 0.5488570332527161, + "learning_rate": 5.823672883159911e-05, + "loss": 1.5315, + "step": 8295 + }, + { + "epoch": 0.8751054852320675, + "grad_norm": 0.5576912760734558, + "learning_rate": 5.813976997505202e-05, + "loss": 1.493, + "step": 8296 + }, + { + "epoch": 0.875210970464135, + "grad_norm": 0.5934337973594666, + "learning_rate": 5.804288864489366e-05, + "loss": 1.5183, + "step": 8297 + }, + { + "epoch": 0.8753164556962025, + "grad_norm": 0.5481178164482117, + "learning_rate": 5.794608485198008e-05, + "loss": 1.5112, + "step": 8298 + }, + { + "epoch": 0.8754219409282701, + "grad_norm": 0.5940700769424438, + "learning_rate": 5.784935860715862e-05, + "loss": 1.5311, + "step": 8299 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.5551467537879944, + "learning_rate": 5.7752709921267855e-05, + "loss": 1.4956, + "step": 8300 + }, + { + "epoch": 0.8756329113924051, + "grad_norm": 0.5398936867713928, + "learning_rate": 5.7656138805137785e-05, + "loss": 1.5113, + "step": 8301 + }, + { + "epoch": 0.8757383966244726, + "grad_norm": 0.5864905714988708, + "learning_rate": 5.7559645269589764e-05, + "loss": 1.4942, + "step": 8302 + }, + { + "epoch": 0.87584388185654, + "grad_norm": 0.5486473441123962, + "learning_rate": 5.746322932543621e-05, + "loss": 1.4712, + "step": 8303 + }, + { + "epoch": 0.8759493670886076, + "grad_norm": 0.5839229822158813, + "learning_rate": 5.736689098348125e-05, + "loss": 1.4976, + "step": 8304 + }, + { + "epoch": 0.8760548523206751, + "grad_norm": 0.5508242249488831, + "learning_rate": 5.727063025451973e-05, + "loss": 1.4799, + "step": 8305 + }, + { + "epoch": 0.8761603375527426, + "grad_norm": 0.5628284811973572, + "learning_rate": 5.717444714933845e-05, + "loss": 1.5116, + "step": 8306 + }, + { + "epoch": 0.8762658227848101, + "grad_norm": 0.586932897567749, + "learning_rate": 5.707834167871512e-05, + "loss": 1.4747, + "step": 8307 + }, + { + "epoch": 0.8763713080168777, + "grad_norm": 0.576238751411438, + "learning_rate": 5.698231385341887e-05, + "loss": 1.5074, + "step": 8308 + }, + { + "epoch": 0.8764767932489451, + "grad_norm": 0.542136549949646, + "learning_rate": 5.6886363684210016e-05, + "loss": 1.4734, + "step": 8309 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.5739567875862122, + "learning_rate": 5.6790491181840294e-05, + "loss": 1.5092, + "step": 8310 + }, + { + "epoch": 0.8766877637130802, + "grad_norm": 0.5704970955848694, + "learning_rate": 5.6694696357052685e-05, + "loss": 1.5488, + "step": 8311 + }, + { + "epoch": 0.8767932489451477, + "grad_norm": 0.5916831493377686, + "learning_rate": 5.6598979220581434e-05, + "loss": 1.5116, + "step": 8312 + }, + { + "epoch": 0.8768987341772152, + "grad_norm": 0.5807032585144043, + "learning_rate": 5.650333978315223e-05, + "loss": 1.5238, + "step": 8313 + }, + { + "epoch": 0.8770042194092827, + "grad_norm": 0.5765137076377869, + "learning_rate": 5.640777805548181e-05, + "loss": 1.4922, + "step": 8314 + }, + { + "epoch": 0.8771097046413502, + "grad_norm": 0.7150170207023621, + "learning_rate": 5.631229404827845e-05, + "loss": 1.5119, + "step": 8315 + }, + { + "epoch": 0.8772151898734177, + "grad_norm": 0.5768731236457825, + "learning_rate": 5.6216887772241596e-05, + "loss": 1.5066, + "step": 8316 + }, + { + "epoch": 0.8773206751054853, + "grad_norm": 0.5620297193527222, + "learning_rate": 5.612155923806203e-05, + "loss": 1.4957, + "step": 8317 + }, + { + "epoch": 0.8774261603375527, + "grad_norm": 0.5582455992698669, + "learning_rate": 5.60263084564217e-05, + "loss": 1.5117, + "step": 8318 + }, + { + "epoch": 0.8775316455696203, + "grad_norm": 0.5988828539848328, + "learning_rate": 5.5931135437993994e-05, + "loss": 1.5116, + "step": 8319 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.5710445046424866, + "learning_rate": 5.583604019344354e-05, + "loss": 1.4891, + "step": 8320 + }, + { + "epoch": 0.8777426160337553, + "grad_norm": 0.5734010934829712, + "learning_rate": 5.574102273342616e-05, + "loss": 1.477, + "step": 8321 + }, + { + "epoch": 0.8778481012658228, + "grad_norm": 0.5288071036338806, + "learning_rate": 5.5646083068589065e-05, + "loss": 1.5235, + "step": 8322 + }, + { + "epoch": 0.8779535864978903, + "grad_norm": 0.5602090954780579, + "learning_rate": 5.5551221209570596e-05, + "loss": 1.5169, + "step": 8323 + }, + { + "epoch": 0.8780590717299578, + "grad_norm": 0.5805942416191101, + "learning_rate": 5.5456437167000746e-05, + "loss": 1.529, + "step": 8324 + }, + { + "epoch": 0.8781645569620253, + "grad_norm": 0.5473354458808899, + "learning_rate": 5.536173095150043e-05, + "loss": 1.503, + "step": 8325 + }, + { + "epoch": 0.8782700421940929, + "grad_norm": 0.5746342539787292, + "learning_rate": 5.526710257368192e-05, + "loss": 1.5113, + "step": 8326 + }, + { + "epoch": 0.8783755274261603, + "grad_norm": 0.5238164067268372, + "learning_rate": 5.517255204414889e-05, + "loss": 1.5194, + "step": 8327 + }, + { + "epoch": 0.8784810126582279, + "grad_norm": 0.5642191767692566, + "learning_rate": 5.507807937349604e-05, + "loss": 1.4907, + "step": 8328 + }, + { + "epoch": 0.8785864978902953, + "grad_norm": 0.568899393081665, + "learning_rate": 5.498368457230965e-05, + "loss": 1.473, + "step": 8329 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.6002305150032043, + "learning_rate": 5.4889367651167007e-05, + "loss": 1.5058, + "step": 8330 + }, + { + "epoch": 0.8787974683544304, + "grad_norm": 0.5451385378837585, + "learning_rate": 5.479512862063674e-05, + "loss": 1.5269, + "step": 8331 + }, + { + "epoch": 0.8789029535864978, + "grad_norm": 0.562323808670044, + "learning_rate": 5.470096749127906e-05, + "loss": 1.5393, + "step": 8332 + }, + { + "epoch": 0.8790084388185654, + "grad_norm": 0.5556163787841797, + "learning_rate": 5.460688427364505e-05, + "loss": 1.4837, + "step": 8333 + }, + { + "epoch": 0.8791139240506329, + "grad_norm": 0.5859853029251099, + "learning_rate": 5.451287897827725e-05, + "loss": 1.497, + "step": 8334 + }, + { + "epoch": 0.8792194092827004, + "grad_norm": 0.5507094860076904, + "learning_rate": 5.441895161570934e-05, + "loss": 1.5121, + "step": 8335 + }, + { + "epoch": 0.8793248945147679, + "grad_norm": 0.5417308807373047, + "learning_rate": 5.43251021964663e-05, + "loss": 1.4867, + "step": 8336 + }, + { + "epoch": 0.8794303797468355, + "grad_norm": 0.5665503144264221, + "learning_rate": 5.423133073106457e-05, + "loss": 1.4943, + "step": 8337 + }, + { + "epoch": 0.8795358649789029, + "grad_norm": 0.5643391609191895, + "learning_rate": 5.413763723001164e-05, + "loss": 1.4992, + "step": 8338 + }, + { + "epoch": 0.8796413502109705, + "grad_norm": 0.6051027178764343, + "learning_rate": 5.4044021703806375e-05, + "loss": 1.4223, + "step": 8339 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.549644947052002, + "learning_rate": 5.3950484162938714e-05, + "loss": 1.4619, + "step": 8340 + }, + { + "epoch": 0.8798523206751054, + "grad_norm": 0.5656269192695618, + "learning_rate": 5.385702461789019e-05, + "loss": 1.5023, + "step": 8341 + }, + { + "epoch": 0.879957805907173, + "grad_norm": 0.5861770510673523, + "learning_rate": 5.376364307913334e-05, + "loss": 1.4796, + "step": 8342 + }, + { + "epoch": 0.8800632911392405, + "grad_norm": 0.5244765877723694, + "learning_rate": 5.3670339557132045e-05, + "loss": 1.4557, + "step": 8343 + }, + { + "epoch": 0.880168776371308, + "grad_norm": 0.5538406372070312, + "learning_rate": 5.3577114062341446e-05, + "loss": 1.4697, + "step": 8344 + }, + { + "epoch": 0.8802742616033755, + "grad_norm": 0.5506071448326111, + "learning_rate": 5.348396660520785e-05, + "loss": 1.5107, + "step": 8345 + }, + { + "epoch": 0.8803797468354431, + "grad_norm": 0.5534111857414246, + "learning_rate": 5.339089719616891e-05, + "loss": 1.4935, + "step": 8346 + }, + { + "epoch": 0.8804852320675105, + "grad_norm": 0.6082796454429626, + "learning_rate": 5.329790584565361e-05, + "loss": 1.5449, + "step": 8347 + }, + { + "epoch": 0.880590717299578, + "grad_norm": 0.5341275930404663, + "learning_rate": 5.320499256408204e-05, + "loss": 1.4975, + "step": 8348 + }, + { + "epoch": 0.8806962025316456, + "grad_norm": 0.5309158563613892, + "learning_rate": 5.311215736186536e-05, + "loss": 1.488, + "step": 8349 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.5478104948997498, + "learning_rate": 5.3019400249406686e-05, + "loss": 1.5195, + "step": 8350 + }, + { + "epoch": 0.8809071729957806, + "grad_norm": 0.5284262895584106, + "learning_rate": 5.29267212370996e-05, + "loss": 1.5117, + "step": 8351 + }, + { + "epoch": 0.8810126582278481, + "grad_norm": 0.5636165142059326, + "learning_rate": 5.283412033532939e-05, + "loss": 1.4673, + "step": 8352 + }, + { + "epoch": 0.8811181434599156, + "grad_norm": 0.5238246917724609, + "learning_rate": 5.274159755447233e-05, + "loss": 1.4423, + "step": 8353 + }, + { + "epoch": 0.8812236286919831, + "grad_norm": 0.5588087439537048, + "learning_rate": 5.264915290489614e-05, + "loss": 1.4822, + "step": 8354 + }, + { + "epoch": 0.8813291139240507, + "grad_norm": 0.5975792407989502, + "learning_rate": 5.25567863969596e-05, + "loss": 1.4761, + "step": 8355 + }, + { + "epoch": 0.8814345991561181, + "grad_norm": 0.5579373240470886, + "learning_rate": 5.246449804101294e-05, + "loss": 1.4961, + "step": 8356 + }, + { + "epoch": 0.8815400843881857, + "grad_norm": 0.5430968999862671, + "learning_rate": 5.237228784739739e-05, + "loss": 1.5281, + "step": 8357 + }, + { + "epoch": 0.8816455696202532, + "grad_norm": 0.5746854543685913, + "learning_rate": 5.228015582644585e-05, + "loss": 1.501, + "step": 8358 + }, + { + "epoch": 0.8817510548523206, + "grad_norm": 0.5809199213981628, + "learning_rate": 5.21881019884819e-05, + "loss": 1.5326, + "step": 8359 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.6259477734565735, + "learning_rate": 5.209612634382077e-05, + "loss": 1.4782, + "step": 8360 + }, + { + "epoch": 0.8819620253164557, + "grad_norm": 0.5389335751533508, + "learning_rate": 5.2004228902768815e-05, + "loss": 1.5034, + "step": 8361 + }, + { + "epoch": 0.8820675105485232, + "grad_norm": 0.5665708184242249, + "learning_rate": 5.191240967562347e-05, + "loss": 1.5433, + "step": 8362 + }, + { + "epoch": 0.8821729957805907, + "grad_norm": 0.5803258419036865, + "learning_rate": 5.182066867267357e-05, + "loss": 1.5087, + "step": 8363 + }, + { + "epoch": 0.8822784810126583, + "grad_norm": 0.5526942610740662, + "learning_rate": 5.172900590419915e-05, + "loss": 1.5161, + "step": 8364 + }, + { + "epoch": 0.8823839662447257, + "grad_norm": 0.5366986989974976, + "learning_rate": 5.1637421380471586e-05, + "loss": 1.5279, + "step": 8365 + }, + { + "epoch": 0.8824894514767933, + "grad_norm": 0.5796151757240295, + "learning_rate": 5.154591511175316e-05, + "loss": 1.4754, + "step": 8366 + }, + { + "epoch": 0.8825949367088608, + "grad_norm": 0.5289108753204346, + "learning_rate": 5.1454487108297924e-05, + "loss": 1.4864, + "step": 8367 + }, + { + "epoch": 0.8827004219409282, + "grad_norm": 0.5878637433052063, + "learning_rate": 5.136313738035059e-05, + "loss": 1.5041, + "step": 8368 + }, + { + "epoch": 0.8828059071729958, + "grad_norm": 0.5682455897331238, + "learning_rate": 5.127186593814748e-05, + "loss": 1.4808, + "step": 8369 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.5511020421981812, + "learning_rate": 5.118067279191599e-05, + "loss": 1.4546, + "step": 8370 + }, + { + "epoch": 0.8830168776371308, + "grad_norm": 0.550645112991333, + "learning_rate": 5.1089557951874696e-05, + "loss": 1.4872, + "step": 8371 + }, + { + "epoch": 0.8831223628691983, + "grad_norm": 0.5587513446807861, + "learning_rate": 5.0998521428233526e-05, + "loss": 1.5188, + "step": 8372 + }, + { + "epoch": 0.8832278481012659, + "grad_norm": 0.5733350515365601, + "learning_rate": 5.0907563231193556e-05, + "loss": 1.4863, + "step": 8373 + }, + { + "epoch": 0.8833333333333333, + "grad_norm": 0.5390364527702332, + "learning_rate": 5.081668337094713e-05, + "loss": 1.4798, + "step": 8374 + }, + { + "epoch": 0.8834388185654009, + "grad_norm": 0.5865548253059387, + "learning_rate": 5.072588185767763e-05, + "loss": 1.5287, + "step": 8375 + }, + { + "epoch": 0.8835443037974684, + "grad_norm": 0.5598992705345154, + "learning_rate": 5.063515870156013e-05, + "loss": 1.5437, + "step": 8376 + }, + { + "epoch": 0.8836497890295358, + "grad_norm": 0.5924822092056274, + "learning_rate": 5.054451391276035e-05, + "loss": 1.5058, + "step": 8377 + }, + { + "epoch": 0.8837552742616034, + "grad_norm": 0.5477449297904968, + "learning_rate": 5.045394750143567e-05, + "loss": 1.4866, + "step": 8378 + }, + { + "epoch": 0.8838607594936709, + "grad_norm": 0.5379781723022461, + "learning_rate": 5.0363459477734464e-05, + "loss": 1.4835, + "step": 8379 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.5619903206825256, + "learning_rate": 5.0273049851796205e-05, + "loss": 1.5388, + "step": 8380 + }, + { + "epoch": 0.8840717299578059, + "grad_norm": 0.5475001931190491, + "learning_rate": 5.0182718633751954e-05, + "loss": 1.4868, + "step": 8381 + }, + { + "epoch": 0.8841772151898735, + "grad_norm": 0.5501093864440918, + "learning_rate": 5.009246583372362e-05, + "loss": 1.5179, + "step": 8382 + }, + { + "epoch": 0.8842827004219409, + "grad_norm": 0.6910074949264526, + "learning_rate": 5.000229146182453e-05, + "loss": 1.4899, + "step": 8383 + }, + { + "epoch": 0.8843881856540085, + "grad_norm": 0.5972383618354797, + "learning_rate": 4.9912195528159174e-05, + "loss": 1.4911, + "step": 8384 + }, + { + "epoch": 0.884493670886076, + "grad_norm": 0.5454254150390625, + "learning_rate": 4.982217804282332e-05, + "loss": 1.5204, + "step": 8385 + }, + { + "epoch": 0.8845991561181434, + "grad_norm": 0.6059595346450806, + "learning_rate": 4.973223901590382e-05, + "loss": 1.502, + "step": 8386 + }, + { + "epoch": 0.884704641350211, + "grad_norm": 0.5598260164260864, + "learning_rate": 4.9642378457478847e-05, + "loss": 1.4942, + "step": 8387 + }, + { + "epoch": 0.8848101265822785, + "grad_norm": 0.6224713921546936, + "learning_rate": 4.955259637761761e-05, + "loss": 1.4754, + "step": 8388 + }, + { + "epoch": 0.884915611814346, + "grad_norm": 0.5651736855506897, + "learning_rate": 4.946289278638064e-05, + "loss": 1.4862, + "step": 8389 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.5824029445648193, + "learning_rate": 4.9373267693819805e-05, + "loss": 1.5099, + "step": 8390 + }, + { + "epoch": 0.8851265822784811, + "grad_norm": 0.5587872266769409, + "learning_rate": 4.928372110997792e-05, + "loss": 1.5199, + "step": 8391 + }, + { + "epoch": 0.8852320675105485, + "grad_norm": 0.5895681977272034, + "learning_rate": 4.9194253044889117e-05, + "loss": 1.4849, + "step": 8392 + }, + { + "epoch": 0.885337552742616, + "grad_norm": 0.6809305548667908, + "learning_rate": 4.910486350857887e-05, + "loss": 1.4974, + "step": 8393 + }, + { + "epoch": 0.8854430379746835, + "grad_norm": 0.6427374482154846, + "learning_rate": 4.90155525110636e-05, + "loss": 1.4943, + "step": 8394 + }, + { + "epoch": 0.885548523206751, + "grad_norm": 0.621790885925293, + "learning_rate": 4.89263200623512e-05, + "loss": 1.4918, + "step": 8395 + }, + { + "epoch": 0.8856540084388186, + "grad_norm": 0.5633459091186523, + "learning_rate": 4.883716617244044e-05, + "loss": 1.5052, + "step": 8396 + }, + { + "epoch": 0.885759493670886, + "grad_norm": 0.5622960925102234, + "learning_rate": 4.874809085132148e-05, + "loss": 1.4993, + "step": 8397 + }, + { + "epoch": 0.8858649789029536, + "grad_norm": 0.5791352391242981, + "learning_rate": 4.865909410897576e-05, + "loss": 1.4762, + "step": 8398 + }, + { + "epoch": 0.8859704641350211, + "grad_norm": 0.6801947355270386, + "learning_rate": 4.8570175955375715e-05, + "loss": 1.5153, + "step": 8399 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.5657055974006653, + "learning_rate": 4.848133640048513e-05, + "loss": 1.4926, + "step": 8400 + }, + { + "epoch": 0.8861814345991561, + "grad_norm": 0.5457417368888855, + "learning_rate": 4.839257545425879e-05, + "loss": 1.5095, + "step": 8401 + }, + { + "epoch": 0.8862869198312237, + "grad_norm": 0.5429149866104126, + "learning_rate": 4.830389312664299e-05, + "loss": 1.506, + "step": 8402 + }, + { + "epoch": 0.8863924050632911, + "grad_norm": 0.6009023189544678, + "learning_rate": 4.821528942757494e-05, + "loss": 1.5278, + "step": 8403 + }, + { + "epoch": 0.8864978902953586, + "grad_norm": 0.6230659484863281, + "learning_rate": 4.8126764366983126e-05, + "loss": 1.5186, + "step": 8404 + }, + { + "epoch": 0.8866033755274262, + "grad_norm": 0.7615289092063904, + "learning_rate": 4.803831795478719e-05, + "loss": 1.5112, + "step": 8405 + }, + { + "epoch": 0.8867088607594936, + "grad_norm": 0.6172685027122498, + "learning_rate": 4.794995020089804e-05, + "loss": 1.5346, + "step": 8406 + }, + { + "epoch": 0.8868143459915612, + "grad_norm": 0.553214430809021, + "learning_rate": 4.7861661115217754e-05, + "loss": 1.5302, + "step": 8407 + }, + { + "epoch": 0.8869198312236287, + "grad_norm": 0.5762004256248474, + "learning_rate": 4.7773450707639414e-05, + "loss": 1.502, + "step": 8408 + }, + { + "epoch": 0.8870253164556962, + "grad_norm": 0.6531417369842529, + "learning_rate": 4.768531898804754e-05, + "loss": 1.541, + "step": 8409 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.6308422088623047, + "learning_rate": 4.75972659663178e-05, + "loss": 1.4883, + "step": 8410 + }, + { + "epoch": 0.8872362869198313, + "grad_norm": 0.5872061252593994, + "learning_rate": 4.75092916523169e-05, + "loss": 1.5277, + "step": 8411 + }, + { + "epoch": 0.8873417721518987, + "grad_norm": 0.5628679990768433, + "learning_rate": 4.742139605590279e-05, + "loss": 1.4863, + "step": 8412 + }, + { + "epoch": 0.8874472573839662, + "grad_norm": 0.5831801295280457, + "learning_rate": 4.733357918692466e-05, + "loss": 1.5213, + "step": 8413 + }, + { + "epoch": 0.8875527426160338, + "grad_norm": 0.573511004447937, + "learning_rate": 4.7245841055222726e-05, + "loss": 1.5097, + "step": 8414 + }, + { + "epoch": 0.8876582278481012, + "grad_norm": 0.5486517548561096, + "learning_rate": 4.715818167062863e-05, + "loss": 1.5147, + "step": 8415 + }, + { + "epoch": 0.8877637130801688, + "grad_norm": 0.5972833037376404, + "learning_rate": 4.7070601042964925e-05, + "loss": 1.4668, + "step": 8416 + }, + { + "epoch": 0.8878691983122363, + "grad_norm": 0.5848897695541382, + "learning_rate": 4.698309918204552e-05, + "loss": 1.4845, + "step": 8417 + }, + { + "epoch": 0.8879746835443038, + "grad_norm": 0.5550430417060852, + "learning_rate": 4.6895676097675225e-05, + "loss": 1.4975, + "step": 8418 + }, + { + "epoch": 0.8880801687763713, + "grad_norm": 0.5787193775177002, + "learning_rate": 4.680833179965063e-05, + "loss": 1.4917, + "step": 8419 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.5594303011894226, + "learning_rate": 4.672106629775882e-05, + "loss": 1.5085, + "step": 8420 + }, + { + "epoch": 0.8882911392405063, + "grad_norm": 0.5635338425636292, + "learning_rate": 4.663387960177848e-05, + "loss": 1.506, + "step": 8421 + }, + { + "epoch": 0.8883966244725738, + "grad_norm": 0.5539011359214783, + "learning_rate": 4.654677172147912e-05, + "loss": 1.5043, + "step": 8422 + }, + { + "epoch": 0.8885021097046414, + "grad_norm": 0.5639219880104065, + "learning_rate": 4.645974266662176e-05, + "loss": 1.5227, + "step": 8423 + }, + { + "epoch": 0.8886075949367088, + "grad_norm": 0.5776867866516113, + "learning_rate": 4.637279244695844e-05, + "loss": 1.4732, + "step": 8424 + }, + { + "epoch": 0.8887130801687764, + "grad_norm": 0.5349134802818298, + "learning_rate": 4.628592107223229e-05, + "loss": 1.4836, + "step": 8425 + }, + { + "epoch": 0.8888185654008439, + "grad_norm": 0.548683226108551, + "learning_rate": 4.6199128552177756e-05, + "loss": 1.464, + "step": 8426 + }, + { + "epoch": 0.8889240506329114, + "grad_norm": 0.5292691588401794, + "learning_rate": 4.611241489652016e-05, + "loss": 1.5338, + "step": 8427 + }, + { + "epoch": 0.8890295358649789, + "grad_norm": 0.5860678553581238, + "learning_rate": 4.6025780114976545e-05, + "loss": 1.4963, + "step": 8428 + }, + { + "epoch": 0.8891350210970465, + "grad_norm": 0.5543019771575928, + "learning_rate": 4.5939224217254574e-05, + "loss": 1.5103, + "step": 8429 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.5772046446800232, + "learning_rate": 4.585274721305333e-05, + "loss": 1.4862, + "step": 8430 + }, + { + "epoch": 0.8893459915611814, + "grad_norm": 0.5492841601371765, + "learning_rate": 4.576634911206296e-05, + "loss": 1.4782, + "step": 8431 + }, + { + "epoch": 0.889451476793249, + "grad_norm": 0.5841895937919617, + "learning_rate": 4.5680029923964724e-05, + "loss": 1.4743, + "step": 8432 + }, + { + "epoch": 0.8895569620253164, + "grad_norm": 0.5875382423400879, + "learning_rate": 4.559378965843122e-05, + "loss": 1.4613, + "step": 8433 + }, + { + "epoch": 0.889662447257384, + "grad_norm": 0.5307220220565796, + "learning_rate": 4.5507628325126144e-05, + "loss": 1.4733, + "step": 8434 + }, + { + "epoch": 0.8897679324894515, + "grad_norm": 0.5387293100357056, + "learning_rate": 4.542154593370401e-05, + "loss": 1.5303, + "step": 8435 + }, + { + "epoch": 0.889873417721519, + "grad_norm": 0.6033880114555359, + "learning_rate": 4.533554249381119e-05, + "loss": 1.5159, + "step": 8436 + }, + { + "epoch": 0.8899789029535865, + "grad_norm": 0.5801428556442261, + "learning_rate": 4.524961801508456e-05, + "loss": 1.4808, + "step": 8437 + }, + { + "epoch": 0.890084388185654, + "grad_norm": 0.5733945965766907, + "learning_rate": 4.5163772507152425e-05, + "loss": 1.5127, + "step": 8438 + }, + { + "epoch": 0.8901898734177215, + "grad_norm": 0.5838088989257812, + "learning_rate": 4.507800597963424e-05, + "loss": 1.5018, + "step": 8439 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.5791057348251343, + "learning_rate": 4.4992318442140575e-05, + "loss": 1.5152, + "step": 8440 + }, + { + "epoch": 0.8904008438818566, + "grad_norm": 0.5503762364387512, + "learning_rate": 4.490670990427309e-05, + "loss": 1.4967, + "step": 8441 + }, + { + "epoch": 0.890506329113924, + "grad_norm": 0.5849024653434753, + "learning_rate": 4.4821180375624684e-05, + "loss": 1.4845, + "step": 8442 + }, + { + "epoch": 0.8906118143459916, + "grad_norm": 0.5609479546546936, + "learning_rate": 4.473572986577928e-05, + "loss": 1.502, + "step": 8443 + }, + { + "epoch": 0.8907172995780591, + "grad_norm": 0.5370166301727295, + "learning_rate": 4.4650358384312056e-05, + "loss": 1.4898, + "step": 8444 + }, + { + "epoch": 0.8908227848101266, + "grad_norm": 0.5581381320953369, + "learning_rate": 4.4565065940789515e-05, + "loss": 1.4798, + "step": 8445 + }, + { + "epoch": 0.8909282700421941, + "grad_norm": 0.6102114915847778, + "learning_rate": 4.447985254476894e-05, + "loss": 1.5038, + "step": 8446 + }, + { + "epoch": 0.8910337552742617, + "grad_norm": 0.6447523236274719, + "learning_rate": 4.439471820579885e-05, + "loss": 1.5258, + "step": 8447 + }, + { + "epoch": 0.8911392405063291, + "grad_norm": 0.574702799320221, + "learning_rate": 4.430966293341912e-05, + "loss": 1.5211, + "step": 8448 + }, + { + "epoch": 0.8912447257383966, + "grad_norm": 0.6003491878509521, + "learning_rate": 4.422468673716054e-05, + "loss": 1.5289, + "step": 8449 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.630527913570404, + "learning_rate": 4.413978962654508e-05, + "loss": 1.4937, + "step": 8450 + }, + { + "epoch": 0.8914556962025316, + "grad_norm": 0.5644270777702332, + "learning_rate": 4.405497161108596e-05, + "loss": 1.4903, + "step": 8451 + }, + { + "epoch": 0.8915611814345992, + "grad_norm": 0.5437998175621033, + "learning_rate": 4.397023270028749e-05, + "loss": 1.5, + "step": 8452 + }, + { + "epoch": 0.8916666666666667, + "grad_norm": 0.5849775075912476, + "learning_rate": 4.388557290364484e-05, + "loss": 1.4947, + "step": 8453 + }, + { + "epoch": 0.8917721518987342, + "grad_norm": 0.5874205827713013, + "learning_rate": 4.3800992230644904e-05, + "loss": 1.5248, + "step": 8454 + }, + { + "epoch": 0.8918776371308017, + "grad_norm": 0.5877450108528137, + "learning_rate": 4.3716490690765194e-05, + "loss": 1.4605, + "step": 8455 + }, + { + "epoch": 0.8919831223628693, + "grad_norm": 0.5616570711135864, + "learning_rate": 4.3632068293474545e-05, + "loss": 1.5221, + "step": 8456 + }, + { + "epoch": 0.8920886075949367, + "grad_norm": 0.5373104214668274, + "learning_rate": 4.35477250482329e-05, + "loss": 1.4824, + "step": 8457 + }, + { + "epoch": 0.8921940928270042, + "grad_norm": 0.5798189640045166, + "learning_rate": 4.346346096449136e-05, + "loss": 1.493, + "step": 8458 + }, + { + "epoch": 0.8922995780590718, + "grad_norm": 0.5906557440757751, + "learning_rate": 4.337927605169212e-05, + "loss": 1.5087, + "step": 8459 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.5646762251853943, + "learning_rate": 4.3295170319268554e-05, + "loss": 1.5237, + "step": 8460 + }, + { + "epoch": 0.8925105485232068, + "grad_norm": 0.6247727870941162, + "learning_rate": 4.321114377664495e-05, + "loss": 1.517, + "step": 8461 + }, + { + "epoch": 0.8926160337552742, + "grad_norm": 0.5456817150115967, + "learning_rate": 4.3127196433237205e-05, + "loss": 1.4925, + "step": 8462 + }, + { + "epoch": 0.8927215189873418, + "grad_norm": 0.6208202838897705, + "learning_rate": 4.304332829845187e-05, + "loss": 1.4887, + "step": 8463 + }, + { + "epoch": 0.8928270042194093, + "grad_norm": 0.6312853693962097, + "learning_rate": 4.2959539381686843e-05, + "loss": 1.4901, + "step": 8464 + }, + { + "epoch": 0.8929324894514767, + "grad_norm": 0.5506435632705688, + "learning_rate": 4.287582969233103e-05, + "loss": 1.5059, + "step": 8465 + }, + { + "epoch": 0.8930379746835443, + "grad_norm": 0.5448291301727295, + "learning_rate": 4.279219923976452e-05, + "loss": 1.4904, + "step": 8466 + }, + { + "epoch": 0.8931434599156118, + "grad_norm": 0.559116542339325, + "learning_rate": 4.2708648033358554e-05, + "loss": 1.5051, + "step": 8467 + }, + { + "epoch": 0.8932489451476793, + "grad_norm": 0.5484091639518738, + "learning_rate": 4.26251760824754e-05, + "loss": 1.4835, + "step": 8468 + }, + { + "epoch": 0.8933544303797468, + "grad_norm": 0.5549167394638062, + "learning_rate": 4.2541783396468584e-05, + "loss": 1.5057, + "step": 8469 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.5379235148429871, + "learning_rate": 4.245846998468261e-05, + "loss": 1.516, + "step": 8470 + }, + { + "epoch": 0.8935654008438818, + "grad_norm": 0.6288155913352966, + "learning_rate": 4.2375235856453197e-05, + "loss": 1.4968, + "step": 8471 + }, + { + "epoch": 0.8936708860759494, + "grad_norm": 0.5785971879959106, + "learning_rate": 4.229208102110721e-05, + "loss": 1.4854, + "step": 8472 + }, + { + "epoch": 0.8937763713080169, + "grad_norm": 0.5605460405349731, + "learning_rate": 4.220900548796244e-05, + "loss": 1.519, + "step": 8473 + }, + { + "epoch": 0.8938818565400843, + "grad_norm": 0.5578184127807617, + "learning_rate": 4.212600926632804e-05, + "loss": 1.4755, + "step": 8474 + }, + { + "epoch": 0.8939873417721519, + "grad_norm": 0.5885234475135803, + "learning_rate": 4.204309236550405e-05, + "loss": 1.5132, + "step": 8475 + }, + { + "epoch": 0.8940928270042194, + "grad_norm": 0.5205344557762146, + "learning_rate": 4.1960254794781714e-05, + "loss": 1.4469, + "step": 8476 + }, + { + "epoch": 0.8941983122362869, + "grad_norm": 0.5648173689842224, + "learning_rate": 4.1877496563443446e-05, + "loss": 1.5046, + "step": 8477 + }, + { + "epoch": 0.8943037974683544, + "grad_norm": 0.542081356048584, + "learning_rate": 4.179481768076274e-05, + "loss": 1.4975, + "step": 8478 + }, + { + "epoch": 0.894409282700422, + "grad_norm": 0.6246916651725769, + "learning_rate": 4.1712218156004014e-05, + "loss": 1.472, + "step": 8479 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.5438011884689331, + "learning_rate": 4.16296979984232e-05, + "loss": 1.4999, + "step": 8480 + }, + { + "epoch": 0.894620253164557, + "grad_norm": 0.5628832578659058, + "learning_rate": 4.154725721726699e-05, + "loss": 1.5163, + "step": 8481 + }, + { + "epoch": 0.8947257383966245, + "grad_norm": 0.5375081896781921, + "learning_rate": 4.1464895821773235e-05, + "loss": 1.5276, + "step": 8482 + }, + { + "epoch": 0.8948312236286919, + "grad_norm": 0.6022173166275024, + "learning_rate": 4.138261382117098e-05, + "loss": 1.5017, + "step": 8483 + }, + { + "epoch": 0.8949367088607595, + "grad_norm": 0.5810072422027588, + "learning_rate": 4.130041122468042e-05, + "loss": 1.491, + "step": 8484 + }, + { + "epoch": 0.895042194092827, + "grad_norm": 0.5207911133766174, + "learning_rate": 4.1218288041512534e-05, + "loss": 1.497, + "step": 8485 + }, + { + "epoch": 0.8951476793248945, + "grad_norm": 0.533916711807251, + "learning_rate": 4.113624428086987e-05, + "loss": 1.5065, + "step": 8486 + }, + { + "epoch": 0.895253164556962, + "grad_norm": 0.5203202366828918, + "learning_rate": 4.105427995194566e-05, + "loss": 1.5326, + "step": 8487 + }, + { + "epoch": 0.8953586497890296, + "grad_norm": 0.5142279267311096, + "learning_rate": 4.0972395063924554e-05, + "loss": 1.4771, + "step": 8488 + }, + { + "epoch": 0.895464135021097, + "grad_norm": 0.5551392436027527, + "learning_rate": 4.089058962598213e-05, + "loss": 1.491, + "step": 8489 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.5313698053359985, + "learning_rate": 4.080886364728506e-05, + "loss": 1.4678, + "step": 8490 + }, + { + "epoch": 0.8956751054852321, + "grad_norm": 0.5310467481613159, + "learning_rate": 4.072721713699118e-05, + "loss": 1.4704, + "step": 8491 + }, + { + "epoch": 0.8957805907172995, + "grad_norm": 0.5971317887306213, + "learning_rate": 4.064565010424942e-05, + "loss": 1.4679, + "step": 8492 + }, + { + "epoch": 0.8958860759493671, + "grad_norm": 0.5715688467025757, + "learning_rate": 4.056416255819964e-05, + "loss": 1.4525, + "step": 8493 + }, + { + "epoch": 0.8959915611814346, + "grad_norm": 0.5871835350990295, + "learning_rate": 4.048275450797312e-05, + "loss": 1.4666, + "step": 8494 + }, + { + "epoch": 0.8960970464135021, + "grad_norm": 0.5402570366859436, + "learning_rate": 4.0401425962691804e-05, + "loss": 1.5546, + "step": 8495 + }, + { + "epoch": 0.8962025316455696, + "grad_norm": 0.5460636615753174, + "learning_rate": 4.032017693146908e-05, + "loss": 1.4975, + "step": 8496 + }, + { + "epoch": 0.8963080168776372, + "grad_norm": 0.5785344243049622, + "learning_rate": 4.023900742340941e-05, + "loss": 1.4867, + "step": 8497 + }, + { + "epoch": 0.8964135021097046, + "grad_norm": 0.5271035432815552, + "learning_rate": 4.015791744760811e-05, + "loss": 1.4848, + "step": 8498 + }, + { + "epoch": 0.8965189873417722, + "grad_norm": 0.5679897665977478, + "learning_rate": 4.0076907013151726e-05, + "loss": 1.4893, + "step": 8499 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.5835543274879456, + "learning_rate": 3.999597612911793e-05, + "loss": 1.4989, + "step": 8500 + }, + { + "epoch": 0.8967299578059071, + "grad_norm": 0.5943140387535095, + "learning_rate": 3.991512480457546e-05, + "loss": 1.5007, + "step": 8501 + }, + { + "epoch": 0.8968354430379747, + "grad_norm": 0.6093449592590332, + "learning_rate": 3.9834353048583984e-05, + "loss": 1.5254, + "step": 8502 + }, + { + "epoch": 0.8969409282700422, + "grad_norm": 0.5563914775848389, + "learning_rate": 3.9753660870194524e-05, + "loss": 1.4829, + "step": 8503 + }, + { + "epoch": 0.8970464135021097, + "grad_norm": 0.5232620239257812, + "learning_rate": 3.967304827844892e-05, + "loss": 1.5113, + "step": 8504 + }, + { + "epoch": 0.8971518987341772, + "grad_norm": 0.5581002235412598, + "learning_rate": 3.95925152823802e-05, + "loss": 1.4785, + "step": 8505 + }, + { + "epoch": 0.8972573839662448, + "grad_norm": 0.5683492422103882, + "learning_rate": 3.9512061891012643e-05, + "loss": 1.4692, + "step": 8506 + }, + { + "epoch": 0.8973628691983122, + "grad_norm": 0.5619034767150879, + "learning_rate": 3.943168811336137e-05, + "loss": 1.4969, + "step": 8507 + }, + { + "epoch": 0.8974683544303798, + "grad_norm": 0.5309977531433105, + "learning_rate": 3.93513939584326e-05, + "loss": 1.5268, + "step": 8508 + }, + { + "epoch": 0.8975738396624473, + "grad_norm": 0.5801534652709961, + "learning_rate": 3.927117943522379e-05, + "loss": 1.5142, + "step": 8509 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.5438864827156067, + "learning_rate": 3.9191044552723345e-05, + "loss": 1.4802, + "step": 8510 + }, + { + "epoch": 0.8977848101265823, + "grad_norm": 0.5463704466819763, + "learning_rate": 3.911098931991075e-05, + "loss": 1.4806, + "step": 8511 + }, + { + "epoch": 0.8978902953586498, + "grad_norm": 0.5456312894821167, + "learning_rate": 3.9031013745756655e-05, + "loss": 1.4947, + "step": 8512 + }, + { + "epoch": 0.8979957805907173, + "grad_norm": 0.5544512867927551, + "learning_rate": 3.895111783922256e-05, + "loss": 1.4824, + "step": 8513 + }, + { + "epoch": 0.8981012658227848, + "grad_norm": 0.560992419719696, + "learning_rate": 3.887130160926139e-05, + "loss": 1.464, + "step": 8514 + }, + { + "epoch": 0.8982067510548524, + "grad_norm": 0.5619199275970459, + "learning_rate": 3.879156506481699e-05, + "loss": 1.4978, + "step": 8515 + }, + { + "epoch": 0.8983122362869198, + "grad_norm": 0.5714251399040222, + "learning_rate": 3.8711908214824035e-05, + "loss": 1.5117, + "step": 8516 + }, + { + "epoch": 0.8984177215189874, + "grad_norm": 0.5529679656028748, + "learning_rate": 3.863233106820857e-05, + "loss": 1.5053, + "step": 8517 + }, + { + "epoch": 0.8985232067510549, + "grad_norm": 0.5838978290557861, + "learning_rate": 3.855283363388762e-05, + "loss": 1.4993, + "step": 8518 + }, + { + "epoch": 0.8986286919831223, + "grad_norm": 0.5500903129577637, + "learning_rate": 3.8473415920769304e-05, + "loss": 1.469, + "step": 8519 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.6247581243515015, + "learning_rate": 3.839407793775268e-05, + "loss": 1.5205, + "step": 8520 + }, + { + "epoch": 0.8988396624472574, + "grad_norm": 0.5571051239967346, + "learning_rate": 3.8314819693727966e-05, + "loss": 1.495, + "step": 8521 + }, + { + "epoch": 0.8989451476793249, + "grad_norm": 0.5654444098472595, + "learning_rate": 3.823564119757647e-05, + "loss": 1.5152, + "step": 8522 + }, + { + "epoch": 0.8990506329113924, + "grad_norm": 0.5889667272567749, + "learning_rate": 3.81565424581706e-05, + "loss": 1.513, + "step": 8523 + }, + { + "epoch": 0.89915611814346, + "grad_norm": 0.538837730884552, + "learning_rate": 3.8077523484373764e-05, + "loss": 1.4789, + "step": 8524 + }, + { + "epoch": 0.8992616033755274, + "grad_norm": 0.6072540879249573, + "learning_rate": 3.79985842850403e-05, + "loss": 1.5294, + "step": 8525 + }, + { + "epoch": 0.899367088607595, + "grad_norm": 0.5903694033622742, + "learning_rate": 3.791972486901596e-05, + "loss": 1.5426, + "step": 8526 + }, + { + "epoch": 0.8994725738396624, + "grad_norm": 0.5237528085708618, + "learning_rate": 3.784094524513709e-05, + "loss": 1.4978, + "step": 8527 + }, + { + "epoch": 0.8995780590717299, + "grad_norm": 0.5934498906135559, + "learning_rate": 3.7762245422231476e-05, + "loss": 1.5089, + "step": 8528 + }, + { + "epoch": 0.8996835443037975, + "grad_norm": 0.6087724566459656, + "learning_rate": 3.768362540911788e-05, + "loss": 1.4941, + "step": 8529 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.5761503577232361, + "learning_rate": 3.760508521460584e-05, + "loss": 1.5238, + "step": 8530 + }, + { + "epoch": 0.8998945147679325, + "grad_norm": 0.5950917601585388, + "learning_rate": 3.7526624847496335e-05, + "loss": 1.5019, + "step": 8531 + }, + { + "epoch": 0.9, + "grad_norm": 0.6571856141090393, + "learning_rate": 3.744824431658131e-05, + "loss": 1.4975, + "step": 8532 + }, + { + "epoch": 0.9001054852320675, + "grad_norm": 0.5542988181114197, + "learning_rate": 3.736994363064358e-05, + "loss": 1.4763, + "step": 8533 + }, + { + "epoch": 0.900210970464135, + "grad_norm": 0.5522314310073853, + "learning_rate": 3.7291722798457215e-05, + "loss": 1.5338, + "step": 8534 + }, + { + "epoch": 0.9003164556962026, + "grad_norm": 0.5595475435256958, + "learning_rate": 3.72135818287872e-05, + "loss": 1.4887, + "step": 8535 + }, + { + "epoch": 0.90042194092827, + "grad_norm": 0.5805642008781433, + "learning_rate": 3.713552073038953e-05, + "loss": 1.5267, + "step": 8536 + }, + { + "epoch": 0.9005274261603375, + "grad_norm": 0.6210959553718567, + "learning_rate": 3.705753951201146e-05, + "loss": 1.5433, + "step": 8537 + }, + { + "epoch": 0.9006329113924051, + "grad_norm": 0.5983858704566956, + "learning_rate": 3.697963818239117e-05, + "loss": 1.4989, + "step": 8538 + }, + { + "epoch": 0.9007383966244725, + "grad_norm": 0.5669243931770325, + "learning_rate": 3.690181675025775e-05, + "loss": 1.4867, + "step": 8539 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.5463601350784302, + "learning_rate": 3.682407522433173e-05, + "loss": 1.5208, + "step": 8540 + }, + { + "epoch": 0.9009493670886076, + "grad_norm": 0.5222317576408386, + "learning_rate": 3.674641361332423e-05, + "loss": 1.5005, + "step": 8541 + }, + { + "epoch": 0.9010548523206751, + "grad_norm": 0.5905207991600037, + "learning_rate": 3.66688319259377e-05, + "loss": 1.5097, + "step": 8542 + }, + { + "epoch": 0.9011603375527426, + "grad_norm": 0.5698584318161011, + "learning_rate": 3.6591330170865524e-05, + "loss": 1.4775, + "step": 8543 + }, + { + "epoch": 0.9012658227848102, + "grad_norm": 0.5480445027351379, + "learning_rate": 3.6513908356792244e-05, + "loss": 1.5158, + "step": 8544 + }, + { + "epoch": 0.9013713080168776, + "grad_norm": 0.5313002467155457, + "learning_rate": 3.643656649239327e-05, + "loss": 1.4592, + "step": 8545 + }, + { + "epoch": 0.9014767932489451, + "grad_norm": 0.5894957780838013, + "learning_rate": 3.635930458633516e-05, + "loss": 1.5028, + "step": 8546 + }, + { + "epoch": 0.9015822784810127, + "grad_norm": 0.5578500628471375, + "learning_rate": 3.628212264727548e-05, + "loss": 1.4848, + "step": 8547 + }, + { + "epoch": 0.9016877637130801, + "grad_norm": 0.5416409969329834, + "learning_rate": 3.6205020683862836e-05, + "loss": 1.5203, + "step": 8548 + }, + { + "epoch": 0.9017932489451477, + "grad_norm": 0.5539082884788513, + "learning_rate": 3.612799870473696e-05, + "loss": 1.4728, + "step": 8549 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.5584364533424377, + "learning_rate": 3.605105671852854e-05, + "loss": 1.5188, + "step": 8550 + }, + { + "epoch": 0.9020042194092827, + "grad_norm": 0.5456465482711792, + "learning_rate": 3.597419473385935e-05, + "loss": 1.4737, + "step": 8551 + }, + { + "epoch": 0.9021097046413502, + "grad_norm": 0.5468533635139465, + "learning_rate": 3.5897412759342e-05, + "loss": 1.4793, + "step": 8552 + }, + { + "epoch": 0.9022151898734178, + "grad_norm": 0.6087284088134766, + "learning_rate": 3.582071080358043e-05, + "loss": 1.4985, + "step": 8553 + }, + { + "epoch": 0.9023206751054852, + "grad_norm": 0.5455145835876465, + "learning_rate": 3.5744088875169446e-05, + "loss": 1.5102, + "step": 8554 + }, + { + "epoch": 0.9024261603375527, + "grad_norm": 0.5932387113571167, + "learning_rate": 3.566754698269492e-05, + "loss": 1.5053, + "step": 8555 + }, + { + "epoch": 0.9025316455696203, + "grad_norm": 0.5775589942932129, + "learning_rate": 3.5591085134733666e-05, + "loss": 1.508, + "step": 8556 + }, + { + "epoch": 0.9026371308016877, + "grad_norm": 0.5358015894889832, + "learning_rate": 3.5514703339853656e-05, + "loss": 1.439, + "step": 8557 + }, + { + "epoch": 0.9027426160337553, + "grad_norm": 0.5621095895767212, + "learning_rate": 3.543840160661396e-05, + "loss": 1.4744, + "step": 8558 + }, + { + "epoch": 0.9028481012658228, + "grad_norm": 0.5668265223503113, + "learning_rate": 3.5362179943564496e-05, + "loss": 1.5021, + "step": 8559 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.5290742516517639, + "learning_rate": 3.528603835924626e-05, + "loss": 1.4792, + "step": 8560 + }, + { + "epoch": 0.9030590717299578, + "grad_norm": 0.5565457940101624, + "learning_rate": 3.520997686219127e-05, + "loss": 1.5037, + "step": 8561 + }, + { + "epoch": 0.9031645569620254, + "grad_norm": 0.5628176927566528, + "learning_rate": 3.513399546092269e-05, + "loss": 1.5354, + "step": 8562 + }, + { + "epoch": 0.9032700421940928, + "grad_norm": 0.559477686882019, + "learning_rate": 3.5058094163954556e-05, + "loss": 1.4932, + "step": 8563 + }, + { + "epoch": 0.9033755274261603, + "grad_norm": 0.6907073855400085, + "learning_rate": 3.498227297979198e-05, + "loss": 1.4782, + "step": 8564 + }, + { + "epoch": 0.9034810126582279, + "grad_norm": 0.5929887890815735, + "learning_rate": 3.4906531916931075e-05, + "loss": 1.5032, + "step": 8565 + }, + { + "epoch": 0.9035864978902953, + "grad_norm": 0.5625991225242615, + "learning_rate": 3.483087098385906e-05, + "loss": 1.5383, + "step": 8566 + }, + { + "epoch": 0.9036919831223629, + "grad_norm": 0.5826674103736877, + "learning_rate": 3.475529018905416e-05, + "loss": 1.5149, + "step": 8567 + }, + { + "epoch": 0.9037974683544304, + "grad_norm": 0.5653082132339478, + "learning_rate": 3.467978954098549e-05, + "loss": 1.4835, + "step": 8568 + }, + { + "epoch": 0.9039029535864979, + "grad_norm": 0.5382222533226013, + "learning_rate": 3.46043690481134e-05, + "loss": 1.5056, + "step": 8569 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.6207802891731262, + "learning_rate": 3.4529028718888935e-05, + "loss": 1.5261, + "step": 8570 + }, + { + "epoch": 0.904113924050633, + "grad_norm": 0.6002964973449707, + "learning_rate": 3.4453768561754525e-05, + "loss": 1.4982, + "step": 8571 + }, + { + "epoch": 0.9042194092827004, + "grad_norm": 0.5225180983543396, + "learning_rate": 3.437858858514334e-05, + "loss": 1.4697, + "step": 8572 + }, + { + "epoch": 0.9043248945147679, + "grad_norm": 0.5575162172317505, + "learning_rate": 3.43034887974798e-05, + "loss": 1.4984, + "step": 8573 + }, + { + "epoch": 0.9044303797468355, + "grad_norm": 0.5616544485092163, + "learning_rate": 3.422846920717893e-05, + "loss": 1.5078, + "step": 8574 + }, + { + "epoch": 0.9045358649789029, + "grad_norm": 0.5615770220756531, + "learning_rate": 3.4153529822647414e-05, + "loss": 1.5088, + "step": 8575 + }, + { + "epoch": 0.9046413502109705, + "grad_norm": 0.6522559523582458, + "learning_rate": 3.4078670652282374e-05, + "loss": 1.4743, + "step": 8576 + }, + { + "epoch": 0.904746835443038, + "grad_norm": 0.5824788212776184, + "learning_rate": 3.400389170447218e-05, + "loss": 1.5212, + "step": 8577 + }, + { + "epoch": 0.9048523206751055, + "grad_norm": 0.5391163229942322, + "learning_rate": 3.392919298759623e-05, + "loss": 1.4781, + "step": 8578 + }, + { + "epoch": 0.904957805907173, + "grad_norm": 0.5472601056098938, + "learning_rate": 3.38545745100248e-05, + "loss": 1.5228, + "step": 8579 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.5734350681304932, + "learning_rate": 3.378003628011938e-05, + "loss": 1.5059, + "step": 8580 + }, + { + "epoch": 0.905168776371308, + "grad_norm": 0.5475274920463562, + "learning_rate": 3.3705578306232224e-05, + "loss": 1.4859, + "step": 8581 + }, + { + "epoch": 0.9052742616033755, + "grad_norm": 0.5977222919464111, + "learning_rate": 3.363120059670688e-05, + "loss": 1.5215, + "step": 8582 + }, + { + "epoch": 0.9053797468354431, + "grad_norm": 0.6928655505180359, + "learning_rate": 3.355690315987761e-05, + "loss": 1.5016, + "step": 8583 + }, + { + "epoch": 0.9054852320675105, + "grad_norm": 0.5835947394371033, + "learning_rate": 3.3482686004069755e-05, + "loss": 1.4815, + "step": 8584 + }, + { + "epoch": 0.9055907172995781, + "grad_norm": 0.5684735178947449, + "learning_rate": 3.340854913759983e-05, + "loss": 1.4981, + "step": 8585 + }, + { + "epoch": 0.9056962025316456, + "grad_norm": 0.5681020021438599, + "learning_rate": 3.3334492568775355e-05, + "loss": 1.4831, + "step": 8586 + }, + { + "epoch": 0.9058016877637131, + "grad_norm": 0.6867591142654419, + "learning_rate": 3.3260516305894526e-05, + "loss": 1.5112, + "step": 8587 + }, + { + "epoch": 0.9059071729957806, + "grad_norm": 0.6137024164199829, + "learning_rate": 3.318662035724679e-05, + "loss": 1.4749, + "step": 8588 + }, + { + "epoch": 0.9060126582278482, + "grad_norm": 0.5893077254295349, + "learning_rate": 3.31128047311127e-05, + "loss": 1.5228, + "step": 8589 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.6284685730934143, + "learning_rate": 3.303906943576346e-05, + "loss": 1.5341, + "step": 8590 + }, + { + "epoch": 0.9062236286919831, + "grad_norm": 0.526959240436554, + "learning_rate": 3.296541447946164e-05, + "loss": 1.4922, + "step": 8591 + }, + { + "epoch": 0.9063291139240506, + "grad_norm": 0.5439159870147705, + "learning_rate": 3.2891839870460546e-05, + "loss": 1.5002, + "step": 8592 + }, + { + "epoch": 0.9064345991561181, + "grad_norm": 0.6530102491378784, + "learning_rate": 3.281834561700467e-05, + "loss": 1.5203, + "step": 8593 + }, + { + "epoch": 0.9065400843881857, + "grad_norm": 0.606492280960083, + "learning_rate": 3.274493172732926e-05, + "loss": 1.468, + "step": 8594 + }, + { + "epoch": 0.9066455696202531, + "grad_norm": 0.5872029066085815, + "learning_rate": 3.26715982096609e-05, + "loss": 1.5559, + "step": 8595 + }, + { + "epoch": 0.9067510548523207, + "grad_norm": 0.7344939112663269, + "learning_rate": 3.259834507221684e-05, + "loss": 1.4296, + "step": 8596 + }, + { + "epoch": 0.9068565400843882, + "grad_norm": 0.5507740378379822, + "learning_rate": 3.2525172323205535e-05, + "loss": 1.4625, + "step": 8597 + }, + { + "epoch": 0.9069620253164556, + "grad_norm": 0.527687132358551, + "learning_rate": 3.2452079970826335e-05, + "loss": 1.5189, + "step": 8598 + }, + { + "epoch": 0.9070675105485232, + "grad_norm": 0.5551592707633972, + "learning_rate": 3.237906802326951e-05, + "loss": 1.4853, + "step": 8599 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.6220091581344604, + "learning_rate": 3.230613648871661e-05, + "loss": 1.517, + "step": 8600 + }, + { + "epoch": 0.9072784810126582, + "grad_norm": 0.5851929783821106, + "learning_rate": 3.223328537533976e-05, + "loss": 1.5198, + "step": 8601 + }, + { + "epoch": 0.9073839662447257, + "grad_norm": 0.5870986580848694, + "learning_rate": 3.216051469130243e-05, + "loss": 1.4935, + "step": 8602 + }, + { + "epoch": 0.9074894514767933, + "grad_norm": 0.5716292858123779, + "learning_rate": 3.208782444475894e-05, + "loss": 1.4788, + "step": 8603 + }, + { + "epoch": 0.9075949367088607, + "grad_norm": 0.6648808121681213, + "learning_rate": 3.201521464385443e-05, + "loss": 1.5123, + "step": 8604 + }, + { + "epoch": 0.9077004219409283, + "grad_norm": 0.6324127316474915, + "learning_rate": 3.194268529672539e-05, + "loss": 1.4783, + "step": 8605 + }, + { + "epoch": 0.9078059071729958, + "grad_norm": 0.5966345071792603, + "learning_rate": 3.187023641149908e-05, + "loss": 1.4873, + "step": 8606 + }, + { + "epoch": 0.9079113924050632, + "grad_norm": 0.5778228044509888, + "learning_rate": 3.1797867996293663e-05, + "loss": 1.4905, + "step": 8607 + }, + { + "epoch": 0.9080168776371308, + "grad_norm": 0.6405037641525269, + "learning_rate": 3.172558005921841e-05, + "loss": 1.4855, + "step": 8608 + }, + { + "epoch": 0.9081223628691983, + "grad_norm": 0.6559368371963501, + "learning_rate": 3.165337260837351e-05, + "loss": 1.51, + "step": 8609 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.6091207265853882, + "learning_rate": 3.158124565185022e-05, + "loss": 1.4888, + "step": 8610 + }, + { + "epoch": 0.9083333333333333, + "grad_norm": 0.5511375069618225, + "learning_rate": 3.1509199197730765e-05, + "loss": 1.5033, + "step": 8611 + }, + { + "epoch": 0.9084388185654009, + "grad_norm": 0.563088059425354, + "learning_rate": 3.143723325408826e-05, + "loss": 1.5015, + "step": 8612 + }, + { + "epoch": 0.9085443037974683, + "grad_norm": 0.5401620268821716, + "learning_rate": 3.136534782898667e-05, + "loss": 1.474, + "step": 8613 + }, + { + "epoch": 0.9086497890295359, + "grad_norm": 0.564789354801178, + "learning_rate": 3.129354293048148e-05, + "loss": 1.5208, + "step": 8614 + }, + { + "epoch": 0.9087552742616034, + "grad_norm": 0.5938335061073303, + "learning_rate": 3.122181856661857e-05, + "loss": 1.4482, + "step": 8615 + }, + { + "epoch": 0.9088607594936708, + "grad_norm": 0.6676617860794067, + "learning_rate": 3.1150174745435026e-05, + "loss": 1.5226, + "step": 8616 + }, + { + "epoch": 0.9089662447257384, + "grad_norm": 0.5780553817749023, + "learning_rate": 3.107861147495891e-05, + "loss": 1.4705, + "step": 8617 + }, + { + "epoch": 0.9090717299578059, + "grad_norm": 0.6455126404762268, + "learning_rate": 3.100712876320924e-05, + "loss": 1.5191, + "step": 8618 + }, + { + "epoch": 0.9091772151898734, + "grad_norm": 0.5760440826416016, + "learning_rate": 3.093572661819602e-05, + "loss": 1.4598, + "step": 8619 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.549587607383728, + "learning_rate": 3.086440504792026e-05, + "loss": 1.5086, + "step": 8620 + }, + { + "epoch": 0.9093881856540085, + "grad_norm": 0.5490609407424927, + "learning_rate": 3.079316406037375e-05, + "loss": 1.5008, + "step": 8621 + }, + { + "epoch": 0.9094936708860759, + "grad_norm": 0.5755451321601868, + "learning_rate": 3.072200366353958e-05, + "loss": 1.4603, + "step": 8622 + }, + { + "epoch": 0.9095991561181435, + "grad_norm": 0.6271716356277466, + "learning_rate": 3.0650923865391395e-05, + "loss": 1.5095, + "step": 8623 + }, + { + "epoch": 0.909704641350211, + "grad_norm": 0.5389968752861023, + "learning_rate": 3.057992467389431e-05, + "loss": 1.4913, + "step": 8624 + }, + { + "epoch": 0.9098101265822784, + "grad_norm": 0.615045964717865, + "learning_rate": 3.0509006097004048e-05, + "loss": 1.5139, + "step": 8625 + }, + { + "epoch": 0.909915611814346, + "grad_norm": 0.5431857705116272, + "learning_rate": 3.043816814266734e-05, + "loss": 1.5105, + "step": 8626 + }, + { + "epoch": 0.9100210970464135, + "grad_norm": 0.5467764735221863, + "learning_rate": 3.0367410818821913e-05, + "loss": 1.5082, + "step": 8627 + }, + { + "epoch": 0.910126582278481, + "grad_norm": 0.5607613325119019, + "learning_rate": 3.029673413339651e-05, + "loss": 1.4539, + "step": 8628 + }, + { + "epoch": 0.9102320675105485, + "grad_norm": 0.5617594122886658, + "learning_rate": 3.022613809431088e-05, + "loss": 1.5239, + "step": 8629 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.5739703178405762, + "learning_rate": 3.015562270947553e-05, + "loss": 1.5484, + "step": 8630 + }, + { + "epoch": 0.9104430379746835, + "grad_norm": 0.5560736060142517, + "learning_rate": 3.0085187986792136e-05, + "loss": 1.5174, + "step": 8631 + }, + { + "epoch": 0.9105485232067511, + "grad_norm": 0.5696040391921997, + "learning_rate": 3.00148339341533e-05, + "loss": 1.4662, + "step": 8632 + }, + { + "epoch": 0.9106540084388186, + "grad_norm": 0.5527856945991516, + "learning_rate": 2.994456055944231e-05, + "loss": 1.4637, + "step": 8633 + }, + { + "epoch": 0.910759493670886, + "grad_norm": 0.5789701342582703, + "learning_rate": 2.9874367870534018e-05, + "loss": 1.5051, + "step": 8634 + }, + { + "epoch": 0.9108649789029536, + "grad_norm": 0.5700148940086365, + "learning_rate": 2.9804255875293645e-05, + "loss": 1.4971, + "step": 8635 + }, + { + "epoch": 0.9109704641350211, + "grad_norm": 0.550168514251709, + "learning_rate": 2.9734224581577568e-05, + "loss": 1.4583, + "step": 8636 + }, + { + "epoch": 0.9110759493670886, + "grad_norm": 0.5769275426864624, + "learning_rate": 2.966427399723326e-05, + "loss": 1.5176, + "step": 8637 + }, + { + "epoch": 0.9111814345991561, + "grad_norm": 0.5685549378395081, + "learning_rate": 2.959440413009895e-05, + "loss": 1.4787, + "step": 8638 + }, + { + "epoch": 0.9112869198312237, + "grad_norm": 0.5327998399734497, + "learning_rate": 2.952461498800388e-05, + "loss": 1.4694, + "step": 8639 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.6036297678947449, + "learning_rate": 2.945490657876837e-05, + "loss": 1.4641, + "step": 8640 + }, + { + "epoch": 0.9114978902953587, + "grad_norm": 0.5554484128952026, + "learning_rate": 2.938527891020351e-05, + "loss": 1.4754, + "step": 8641 + }, + { + "epoch": 0.9116033755274262, + "grad_norm": 0.5761685967445374, + "learning_rate": 2.931573199011148e-05, + "loss": 1.5035, + "step": 8642 + }, + { + "epoch": 0.9117088607594936, + "grad_norm": 0.5417206287384033, + "learning_rate": 2.92462658262852e-05, + "loss": 1.484, + "step": 8643 + }, + { + "epoch": 0.9118143459915612, + "grad_norm": 0.5651965141296387, + "learning_rate": 2.9176880426508957e-05, + "loss": 1.4749, + "step": 8644 + }, + { + "epoch": 0.9119198312236287, + "grad_norm": 0.5523425936698914, + "learning_rate": 2.9107575798557605e-05, + "loss": 1.4401, + "step": 8645 + }, + { + "epoch": 0.9120253164556962, + "grad_norm": 0.5770334601402283, + "learning_rate": 2.9038351950197107e-05, + "loss": 1.5106, + "step": 8646 + }, + { + "epoch": 0.9121308016877637, + "grad_norm": 0.5459643006324768, + "learning_rate": 2.8969208889184335e-05, + "loss": 1.4749, + "step": 8647 + }, + { + "epoch": 0.9122362869198313, + "grad_norm": 0.6100335717201233, + "learning_rate": 2.890014662326701e-05, + "loss": 1.5458, + "step": 8648 + }, + { + "epoch": 0.9123417721518987, + "grad_norm": 0.5446184873580933, + "learning_rate": 2.8831165160184024e-05, + "loss": 1.4928, + "step": 8649 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.5652933120727539, + "learning_rate": 2.8762264507665113e-05, + "loss": 1.5114, + "step": 8650 + }, + { + "epoch": 0.9125527426160338, + "grad_norm": 0.5810913443565369, + "learning_rate": 2.869344467343077e-05, + "loss": 1.4995, + "step": 8651 + }, + { + "epoch": 0.9126582278481012, + "grad_norm": 0.5984285473823547, + "learning_rate": 2.862470566519265e-05, + "loss": 1.5143, + "step": 8652 + }, + { + "epoch": 0.9127637130801688, + "grad_norm": 0.562228262424469, + "learning_rate": 2.855604749065352e-05, + "loss": 1.503, + "step": 8653 + }, + { + "epoch": 0.9128691983122363, + "grad_norm": 0.5818116664886475, + "learning_rate": 2.8487470157506633e-05, + "loss": 1.52, + "step": 8654 + }, + { + "epoch": 0.9129746835443038, + "grad_norm": 0.5359835624694824, + "learning_rate": 2.84189736734366e-05, + "loss": 1.4733, + "step": 8655 + }, + { + "epoch": 0.9130801687763713, + "grad_norm": 0.6097909808158875, + "learning_rate": 2.8350558046118607e-05, + "loss": 1.4737, + "step": 8656 + }, + { + "epoch": 0.9131856540084389, + "grad_norm": 0.577505350112915, + "learning_rate": 2.828222328321911e-05, + "loss": 1.4502, + "step": 8657 + }, + { + "epoch": 0.9132911392405063, + "grad_norm": 0.5595988631248474, + "learning_rate": 2.8213969392395233e-05, + "loss": 1.4844, + "step": 8658 + }, + { + "epoch": 0.9133966244725739, + "grad_norm": 0.5516666173934937, + "learning_rate": 2.8145796381295276e-05, + "loss": 1.4718, + "step": 8659 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5838126540184021, + "learning_rate": 2.807770425755829e-05, + "loss": 1.509, + "step": 8660 + }, + { + "epoch": 0.9136075949367088, + "grad_norm": 0.58052659034729, + "learning_rate": 2.800969302881434e-05, + "loss": 1.4643, + "step": 8661 + }, + { + "epoch": 0.9137130801687764, + "grad_norm": 0.5465428829193115, + "learning_rate": 2.7941762702684503e-05, + "loss": 1.493, + "step": 8662 + }, + { + "epoch": 0.9138185654008438, + "grad_norm": 0.575810968875885, + "learning_rate": 2.7873913286780683e-05, + "loss": 1.5194, + "step": 8663 + }, + { + "epoch": 0.9139240506329114, + "grad_norm": 0.5663394331932068, + "learning_rate": 2.7806144788705718e-05, + "loss": 1.5152, + "step": 8664 + }, + { + "epoch": 0.9140295358649789, + "grad_norm": 0.5553434491157532, + "learning_rate": 2.7738457216053447e-05, + "loss": 1.5113, + "step": 8665 + }, + { + "epoch": 0.9141350210970464, + "grad_norm": 0.5531180500984192, + "learning_rate": 2.7670850576408556e-05, + "loss": 1.4762, + "step": 8666 + }, + { + "epoch": 0.9142405063291139, + "grad_norm": 0.526526927947998, + "learning_rate": 2.7603324877346653e-05, + "loss": 1.4921, + "step": 8667 + }, + { + "epoch": 0.9143459915611815, + "grad_norm": 0.5426539182662964, + "learning_rate": 2.7535880126434433e-05, + "loss": 1.471, + "step": 8668 + }, + { + "epoch": 0.9144514767932489, + "grad_norm": 0.5537633299827576, + "learning_rate": 2.7468516331229432e-05, + "loss": 1.5123, + "step": 8669 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.5698282718658447, + "learning_rate": 2.7401233499279866e-05, + "loss": 1.5228, + "step": 8670 + }, + { + "epoch": 0.914662447257384, + "grad_norm": 0.5623780488967896, + "learning_rate": 2.7334031638125367e-05, + "loss": 1.5445, + "step": 8671 + }, + { + "epoch": 0.9147679324894514, + "grad_norm": 0.5348531603813171, + "learning_rate": 2.726691075529625e-05, + "loss": 1.5397, + "step": 8672 + }, + { + "epoch": 0.914873417721519, + "grad_norm": 0.5491921901702881, + "learning_rate": 2.7199870858313574e-05, + "loss": 1.4842, + "step": 8673 + }, + { + "epoch": 0.9149789029535865, + "grad_norm": 0.5415360927581787, + "learning_rate": 2.7132911954689672e-05, + "loss": 1.485, + "step": 8674 + }, + { + "epoch": 0.915084388185654, + "grad_norm": 0.6272087693214417, + "learning_rate": 2.706603405192745e-05, + "loss": 1.5132, + "step": 8675 + }, + { + "epoch": 0.9151898734177215, + "grad_norm": 0.6031949520111084, + "learning_rate": 2.6999237157521005e-05, + "loss": 1.4997, + "step": 8676 + }, + { + "epoch": 0.9152953586497891, + "grad_norm": 0.5697411298751831, + "learning_rate": 2.6932521278955262e-05, + "loss": 1.4907, + "step": 8677 + }, + { + "epoch": 0.9154008438818565, + "grad_norm": 0.5175105929374695, + "learning_rate": 2.686588642370591e-05, + "loss": 1.5162, + "step": 8678 + }, + { + "epoch": 0.915506329113924, + "grad_norm": 0.5516725778579712, + "learning_rate": 2.6799332599239974e-05, + "loss": 1.5281, + "step": 8679 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.5471696853637695, + "learning_rate": 2.6732859813014987e-05, + "loss": 1.4707, + "step": 8680 + }, + { + "epoch": 0.915717299578059, + "grad_norm": 0.5707612633705139, + "learning_rate": 2.666646807247966e-05, + "loss": 1.5323, + "step": 8681 + }, + { + "epoch": 0.9158227848101266, + "grad_norm": 0.5420556664466858, + "learning_rate": 2.660015738507346e-05, + "loss": 1.504, + "step": 8682 + }, + { + "epoch": 0.9159282700421941, + "grad_norm": 0.6118819117546082, + "learning_rate": 2.653392775822677e-05, + "loss": 1.4765, + "step": 8683 + }, + { + "epoch": 0.9160337552742616, + "grad_norm": 0.5904796719551086, + "learning_rate": 2.6467779199361e-05, + "loss": 1.4981, + "step": 8684 + }, + { + "epoch": 0.9161392405063291, + "grad_norm": 0.5566571950912476, + "learning_rate": 2.6401711715888454e-05, + "loss": 1.4914, + "step": 8685 + }, + { + "epoch": 0.9162447257383967, + "grad_norm": 0.5399649739265442, + "learning_rate": 2.6335725315212304e-05, + "loss": 1.5017, + "step": 8686 + }, + { + "epoch": 0.9163502109704641, + "grad_norm": 0.5629456639289856, + "learning_rate": 2.626982000472655e-05, + "loss": 1.4915, + "step": 8687 + }, + { + "epoch": 0.9164556962025316, + "grad_norm": 0.5390834808349609, + "learning_rate": 2.6203995791816372e-05, + "loss": 1.5051, + "step": 8688 + }, + { + "epoch": 0.9165611814345992, + "grad_norm": 0.5500139594078064, + "learning_rate": 2.6138252683857693e-05, + "loss": 1.5274, + "step": 8689 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.5336147546768188, + "learning_rate": 2.607259068821721e-05, + "loss": 1.4706, + "step": 8690 + }, + { + "epoch": 0.9167721518987342, + "grad_norm": 0.5595725178718567, + "learning_rate": 2.6007009812252875e-05, + "loss": 1.5332, + "step": 8691 + }, + { + "epoch": 0.9168776371308017, + "grad_norm": 0.5575589537620544, + "learning_rate": 2.594151006331322e-05, + "loss": 1.4945, + "step": 8692 + }, + { + "epoch": 0.9169831223628692, + "grad_norm": 0.538307249546051, + "learning_rate": 2.5876091448737788e-05, + "loss": 1.4875, + "step": 8693 + }, + { + "epoch": 0.9170886075949367, + "grad_norm": 0.5521544218063354, + "learning_rate": 2.5810753975857136e-05, + "loss": 1.5117, + "step": 8694 + }, + { + "epoch": 0.9171940928270043, + "grad_norm": 0.5340226292610168, + "learning_rate": 2.5745497651992662e-05, + "loss": 1.4716, + "step": 8695 + }, + { + "epoch": 0.9172995780590717, + "grad_norm": 0.5452455282211304, + "learning_rate": 2.568032248445651e-05, + "loss": 1.5038, + "step": 8696 + }, + { + "epoch": 0.9174050632911392, + "grad_norm": 0.5309298038482666, + "learning_rate": 2.561522848055217e-05, + "loss": 1.4851, + "step": 8697 + }, + { + "epoch": 0.9175105485232068, + "grad_norm": 0.5241214632987976, + "learning_rate": 2.5550215647573482e-05, + "loss": 1.488, + "step": 8698 + }, + { + "epoch": 0.9176160337552742, + "grad_norm": 0.5090792775154114, + "learning_rate": 2.5485283992805615e-05, + "loss": 1.5155, + "step": 8699 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.5449185967445374, + "learning_rate": 2.5420433523524493e-05, + "loss": 1.4884, + "step": 8700 + }, + { + "epoch": 0.9178270042194093, + "grad_norm": 0.527625560760498, + "learning_rate": 2.5355664246996813e-05, + "loss": 1.4932, + "step": 8701 + }, + { + "epoch": 0.9179324894514768, + "grad_norm": 0.5381128191947937, + "learning_rate": 2.5290976170480346e-05, + "loss": 1.4599, + "step": 8702 + }, + { + "epoch": 0.9180379746835443, + "grad_norm": 0.5218362212181091, + "learning_rate": 2.522636930122371e-05, + "loss": 1.511, + "step": 8703 + }, + { + "epoch": 0.9181434599156119, + "grad_norm": 0.5257148742675781, + "learning_rate": 2.516184364646637e-05, + "loss": 1.485, + "step": 8704 + }, + { + "epoch": 0.9182489451476793, + "grad_norm": 0.5509452223777771, + "learning_rate": 2.5097399213438955e-05, + "loss": 1.5084, + "step": 8705 + }, + { + "epoch": 0.9183544303797468, + "grad_norm": 0.5683303475379944, + "learning_rate": 2.50330360093626e-05, + "loss": 1.4855, + "step": 8706 + }, + { + "epoch": 0.9184599156118144, + "grad_norm": 0.5337793231010437, + "learning_rate": 2.4968754041449633e-05, + "loss": 1.5039, + "step": 8707 + }, + { + "epoch": 0.9185654008438818, + "grad_norm": 0.5335894823074341, + "learning_rate": 2.490455331690303e-05, + "loss": 1.4826, + "step": 8708 + }, + { + "epoch": 0.9186708860759494, + "grad_norm": 0.5246702432632446, + "learning_rate": 2.4840433842916872e-05, + "loss": 1.5048, + "step": 8709 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.5787049531936646, + "learning_rate": 2.4776395626676162e-05, + "loss": 1.5217, + "step": 8710 + }, + { + "epoch": 0.9188818565400844, + "grad_norm": 0.5419198870658875, + "learning_rate": 2.471243867535658e-05, + "loss": 1.5157, + "step": 8711 + }, + { + "epoch": 0.9189873417721519, + "grad_norm": 0.5546576976776123, + "learning_rate": 2.4648562996124806e-05, + "loss": 1.4853, + "step": 8712 + }, + { + "epoch": 0.9190928270042195, + "grad_norm": 0.5536454319953918, + "learning_rate": 2.4584768596138452e-05, + "loss": 1.5064, + "step": 8713 + }, + { + "epoch": 0.9191983122362869, + "grad_norm": 0.632554292678833, + "learning_rate": 2.4521055482546046e-05, + "loss": 1.4826, + "step": 8714 + }, + { + "epoch": 0.9193037974683544, + "grad_norm": 0.5337753295898438, + "learning_rate": 2.4457423662486962e-05, + "loss": 1.5009, + "step": 8715 + }, + { + "epoch": 0.919409282700422, + "grad_norm": 0.5140770077705383, + "learning_rate": 2.4393873143091495e-05, + "loss": 1.4708, + "step": 8716 + }, + { + "epoch": 0.9195147679324894, + "grad_norm": 0.5413410067558289, + "learning_rate": 2.43304039314807e-05, + "loss": 1.4421, + "step": 8717 + }, + { + "epoch": 0.919620253164557, + "grad_norm": 0.5658493041992188, + "learning_rate": 2.4267016034766637e-05, + "loss": 1.5175, + "step": 8718 + }, + { + "epoch": 0.9197257383966245, + "grad_norm": 0.5643099546432495, + "learning_rate": 2.4203709460052292e-05, + "loss": 1.497, + "step": 8719 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.5835442543029785, + "learning_rate": 2.414048421443141e-05, + "loss": 1.4807, + "step": 8720 + }, + { + "epoch": 0.9199367088607595, + "grad_norm": 0.533685564994812, + "learning_rate": 2.407734030498873e-05, + "loss": 1.5109, + "step": 8721 + }, + { + "epoch": 0.9200421940928271, + "grad_norm": 0.552165687084198, + "learning_rate": 2.4014277738799774e-05, + "loss": 1.5038, + "step": 8722 + }, + { + "epoch": 0.9201476793248945, + "grad_norm": 0.5543442368507385, + "learning_rate": 2.395129652293121e-05, + "loss": 1.5119, + "step": 8723 + }, + { + "epoch": 0.920253164556962, + "grad_norm": 0.5437019467353821, + "learning_rate": 2.3888396664440232e-05, + "loss": 1.5485, + "step": 8724 + }, + { + "epoch": 0.9203586497890295, + "grad_norm": 0.5934814810752869, + "learning_rate": 2.38255781703752e-05, + "loss": 1.4914, + "step": 8725 + }, + { + "epoch": 0.920464135021097, + "grad_norm": 0.524687647819519, + "learning_rate": 2.3762841047775068e-05, + "loss": 1.4528, + "step": 8726 + }, + { + "epoch": 0.9205696202531646, + "grad_norm": 0.5552328824996948, + "learning_rate": 2.3700185303670046e-05, + "loss": 1.4944, + "step": 8727 + }, + { + "epoch": 0.920675105485232, + "grad_norm": 0.5695934891700745, + "learning_rate": 2.363761094508085e-05, + "loss": 1.4605, + "step": 8728 + }, + { + "epoch": 0.9207805907172996, + "grad_norm": 0.5234283208847046, + "learning_rate": 2.357511797901929e-05, + "loss": 1.5494, + "step": 8729 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.5470796227455139, + "learning_rate": 2.3512706412488012e-05, + "loss": 1.4896, + "step": 8730 + }, + { + "epoch": 0.9209915611814345, + "grad_norm": 0.5481711030006409, + "learning_rate": 2.345037625248067e-05, + "loss": 1.4751, + "step": 8731 + }, + { + "epoch": 0.9210970464135021, + "grad_norm": 0.5894069671630859, + "learning_rate": 2.3388127505981515e-05, + "loss": 1.5019, + "step": 8732 + }, + { + "epoch": 0.9212025316455696, + "grad_norm": 0.5420600771903992, + "learning_rate": 2.3325960179965967e-05, + "loss": 1.5256, + "step": 8733 + }, + { + "epoch": 0.9213080168776371, + "grad_norm": 0.547588050365448, + "learning_rate": 2.3263874281400034e-05, + "loss": 1.4746, + "step": 8734 + }, + { + "epoch": 0.9214135021097046, + "grad_norm": 0.5817544460296631, + "learning_rate": 2.3201869817240817e-05, + "loss": 1.4791, + "step": 8735 + }, + { + "epoch": 0.9215189873417722, + "grad_norm": 0.6203306913375854, + "learning_rate": 2.313994679443626e-05, + "loss": 1.4805, + "step": 8736 + }, + { + "epoch": 0.9216244725738396, + "grad_norm": 0.608254075050354, + "learning_rate": 2.307810521992515e-05, + "loss": 1.4797, + "step": 8737 + }, + { + "epoch": 0.9217299578059072, + "grad_norm": 0.5605396628379822, + "learning_rate": 2.301634510063702e-05, + "loss": 1.5062, + "step": 8738 + }, + { + "epoch": 0.9218354430379747, + "grad_norm": 0.5508666634559631, + "learning_rate": 2.2954666443492505e-05, + "loss": 1.5105, + "step": 8739 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.5446193218231201, + "learning_rate": 2.2893069255402993e-05, + "loss": 1.4953, + "step": 8740 + }, + { + "epoch": 0.9220464135021097, + "grad_norm": 0.5517874956130981, + "learning_rate": 2.2831553543270793e-05, + "loss": 1.4792, + "step": 8741 + }, + { + "epoch": 0.9221518987341772, + "grad_norm": 0.523271918296814, + "learning_rate": 2.277011931398898e-05, + "loss": 1.4775, + "step": 8742 + }, + { + "epoch": 0.9222573839662447, + "grad_norm": 0.5651897192001343, + "learning_rate": 2.2708766574441626e-05, + "loss": 1.4378, + "step": 8743 + }, + { + "epoch": 0.9223628691983122, + "grad_norm": 0.5654625296592712, + "learning_rate": 2.2647495331503565e-05, + "loss": 1.4956, + "step": 8744 + }, + { + "epoch": 0.9224683544303798, + "grad_norm": 0.5529236793518066, + "learning_rate": 2.2586305592040558e-05, + "loss": 1.5281, + "step": 8745 + }, + { + "epoch": 0.9225738396624472, + "grad_norm": 0.5713614821434021, + "learning_rate": 2.2525197362909282e-05, + "loss": 1.5019, + "step": 8746 + }, + { + "epoch": 0.9226793248945148, + "grad_norm": 0.5674921274185181, + "learning_rate": 2.24641706509571e-05, + "loss": 1.4867, + "step": 8747 + }, + { + "epoch": 0.9227848101265823, + "grad_norm": 0.5327324271202087, + "learning_rate": 2.2403225463022288e-05, + "loss": 1.4822, + "step": 8748 + }, + { + "epoch": 0.9228902953586497, + "grad_norm": 0.5415039658546448, + "learning_rate": 2.2342361805934297e-05, + "loss": 1.4976, + "step": 8749 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.5366424918174744, + "learning_rate": 2.2281579686513176e-05, + "loss": 1.5191, + "step": 8750 + }, + { + "epoch": 0.9231012658227848, + "grad_norm": 0.5203217267990112, + "learning_rate": 2.2220879111569725e-05, + "loss": 1.4598, + "step": 8751 + }, + { + "epoch": 0.9232067510548523, + "grad_norm": 0.5428957939147949, + "learning_rate": 2.2160260087905753e-05, + "loss": 1.5086, + "step": 8752 + }, + { + "epoch": 0.9233122362869198, + "grad_norm": 0.5415568351745605, + "learning_rate": 2.2099722622314078e-05, + "loss": 1.4642, + "step": 8753 + }, + { + "epoch": 0.9234177215189874, + "grad_norm": 0.5676276683807373, + "learning_rate": 2.203926672157802e-05, + "loss": 1.4858, + "step": 8754 + }, + { + "epoch": 0.9235232067510548, + "grad_norm": 0.5626223087310791, + "learning_rate": 2.1978892392472085e-05, + "loss": 1.4938, + "step": 8755 + }, + { + "epoch": 0.9236286919831224, + "grad_norm": 0.5219549536705017, + "learning_rate": 2.1918599641761517e-05, + "loss": 1.4833, + "step": 8756 + }, + { + "epoch": 0.9237341772151899, + "grad_norm": 0.5390403866767883, + "learning_rate": 2.185838847620242e-05, + "loss": 1.481, + "step": 8757 + }, + { + "epoch": 0.9238396624472573, + "grad_norm": 0.5390588641166687, + "learning_rate": 2.1798258902541723e-05, + "loss": 1.4201, + "step": 8758 + }, + { + "epoch": 0.9239451476793249, + "grad_norm": 0.5697013139724731, + "learning_rate": 2.173821092751721e-05, + "loss": 1.5029, + "step": 8759 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.5564780831336975, + "learning_rate": 2.1678244557857663e-05, + "loss": 1.4631, + "step": 8760 + }, + { + "epoch": 0.9241561181434599, + "grad_norm": 0.5881486535072327, + "learning_rate": 2.161835980028254e-05, + "loss": 1.5146, + "step": 8761 + }, + { + "epoch": 0.9242616033755274, + "grad_norm": 0.5305315256118774, + "learning_rate": 2.1558556661502222e-05, + "loss": 1.4598, + "step": 8762 + }, + { + "epoch": 0.924367088607595, + "grad_norm": 0.5519385933876038, + "learning_rate": 2.1498835148218017e-05, + "loss": 1.5415, + "step": 8763 + }, + { + "epoch": 0.9244725738396624, + "grad_norm": 0.542969822883606, + "learning_rate": 2.1439195267121902e-05, + "loss": 1.5004, + "step": 8764 + }, + { + "epoch": 0.92457805907173, + "grad_norm": 0.5591965317726135, + "learning_rate": 2.137963702489687e-05, + "loss": 1.4628, + "step": 8765 + }, + { + "epoch": 0.9246835443037975, + "grad_norm": 0.5663960576057434, + "learning_rate": 2.132016042821683e-05, + "loss": 1.4876, + "step": 8766 + }, + { + "epoch": 0.924789029535865, + "grad_norm": 0.5569717884063721, + "learning_rate": 2.1260765483746282e-05, + "loss": 1.4947, + "step": 8767 + }, + { + "epoch": 0.9248945147679325, + "grad_norm": 0.5625186562538147, + "learning_rate": 2.120145219814082e-05, + "loss": 1.4785, + "step": 8768 + }, + { + "epoch": 0.925, + "grad_norm": 0.5466179251670837, + "learning_rate": 2.1142220578046712e-05, + "loss": 1.4864, + "step": 8769 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.5668838024139404, + "learning_rate": 2.1083070630101232e-05, + "loss": 1.4684, + "step": 8770 + }, + { + "epoch": 0.925210970464135, + "grad_norm": 0.5382811427116394, + "learning_rate": 2.102400236093241e-05, + "loss": 1.5236, + "step": 8771 + }, + { + "epoch": 0.9253164556962026, + "grad_norm": 0.5460042357444763, + "learning_rate": 2.096501577715912e-05, + "loss": 1.4648, + "step": 8772 + }, + { + "epoch": 0.92542194092827, + "grad_norm": 0.5530492067337036, + "learning_rate": 2.0906110885391072e-05, + "loss": 1.5091, + "step": 8773 + }, + { + "epoch": 0.9255274261603376, + "grad_norm": 0.5600837469100952, + "learning_rate": 2.0847287692228905e-05, + "loss": 1.4691, + "step": 8774 + }, + { + "epoch": 0.9256329113924051, + "grad_norm": 0.5496786236763, + "learning_rate": 2.0788546204264013e-05, + "loss": 1.5018, + "step": 8775 + }, + { + "epoch": 0.9257383966244725, + "grad_norm": 0.5170912146568298, + "learning_rate": 2.0729886428078716e-05, + "loss": 1.494, + "step": 8776 + }, + { + "epoch": 0.9258438818565401, + "grad_norm": 0.5451633930206299, + "learning_rate": 2.0671308370246167e-05, + "loss": 1.4897, + "step": 8777 + }, + { + "epoch": 0.9259493670886076, + "grad_norm": 0.5237491130828857, + "learning_rate": 2.0612812037330202e-05, + "loss": 1.4826, + "step": 8778 + }, + { + "epoch": 0.9260548523206751, + "grad_norm": 0.5321451425552368, + "learning_rate": 2.0554397435885746e-05, + "loss": 1.5198, + "step": 8779 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.522804856300354, + "learning_rate": 2.0496064572458395e-05, + "loss": 1.512, + "step": 8780 + }, + { + "epoch": 0.9262658227848102, + "grad_norm": 0.5627873539924622, + "learning_rate": 2.043781345358467e-05, + "loss": 1.5083, + "step": 8781 + }, + { + "epoch": 0.9263713080168776, + "grad_norm": 0.5239964723587036, + "learning_rate": 2.0379644085791767e-05, + "loss": 1.5306, + "step": 8782 + }, + { + "epoch": 0.9264767932489452, + "grad_norm": 0.5388802289962769, + "learning_rate": 2.032155647559805e-05, + "loss": 1.4591, + "step": 8783 + }, + { + "epoch": 0.9265822784810127, + "grad_norm": 0.5858954787254333, + "learning_rate": 2.0263550629512406e-05, + "loss": 1.4715, + "step": 8784 + }, + { + "epoch": 0.9266877637130801, + "grad_norm": 0.5245155692100525, + "learning_rate": 2.0205626554034713e-05, + "loss": 1.5023, + "step": 8785 + }, + { + "epoch": 0.9267932489451477, + "grad_norm": 0.5559835433959961, + "learning_rate": 2.0147784255655692e-05, + "loss": 1.4792, + "step": 8786 + }, + { + "epoch": 0.9268987341772152, + "grad_norm": 0.5197020173072815, + "learning_rate": 2.009002374085675e-05, + "loss": 1.4632, + "step": 8787 + }, + { + "epoch": 0.9270042194092827, + "grad_norm": 0.5566214919090271, + "learning_rate": 2.003234501611037e-05, + "loss": 1.4947, + "step": 8788 + }, + { + "epoch": 0.9271097046413502, + "grad_norm": 0.5810142755508423, + "learning_rate": 1.9974748087879636e-05, + "loss": 1.4995, + "step": 8789 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.5338920950889587, + "learning_rate": 1.991723296261863e-05, + "loss": 1.5026, + "step": 8790 + }, + { + "epoch": 0.9273206751054852, + "grad_norm": 0.5293373465538025, + "learning_rate": 1.985979964677212e-05, + "loss": 1.5056, + "step": 8791 + }, + { + "epoch": 0.9274261603375528, + "grad_norm": 0.5425761342048645, + "learning_rate": 1.9802448146775953e-05, + "loss": 1.4723, + "step": 8792 + }, + { + "epoch": 0.9275316455696202, + "grad_norm": 0.5638019442558289, + "learning_rate": 1.9745178469056575e-05, + "loss": 1.4973, + "step": 8793 + }, + { + "epoch": 0.9276371308016877, + "grad_norm": 0.5300357937812805, + "learning_rate": 1.9687990620031266e-05, + "loss": 1.4663, + "step": 8794 + }, + { + "epoch": 0.9277426160337553, + "grad_norm": 0.5469360947608948, + "learning_rate": 1.963088460610832e-05, + "loss": 1.4756, + "step": 8795 + }, + { + "epoch": 0.9278481012658227, + "grad_norm": 0.5607604384422302, + "learning_rate": 1.9573860433686696e-05, + "loss": 1.5105, + "step": 8796 + }, + { + "epoch": 0.9279535864978903, + "grad_norm": 0.6147506237030029, + "learning_rate": 1.9516918109156206e-05, + "loss": 1.4961, + "step": 8797 + }, + { + "epoch": 0.9280590717299578, + "grad_norm": 0.5625432133674622, + "learning_rate": 1.9460057638897578e-05, + "loss": 1.4976, + "step": 8798 + }, + { + "epoch": 0.9281645569620253, + "grad_norm": 0.5361321568489075, + "learning_rate": 1.9403279029282376e-05, + "loss": 1.4704, + "step": 8799 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.5466960072517395, + "learning_rate": 1.9346582286672686e-05, + "loss": 1.4884, + "step": 8800 + }, + { + "epoch": 0.9283755274261604, + "grad_norm": 0.5573300123214722, + "learning_rate": 1.9289967417421922e-05, + "loss": 1.5153, + "step": 8801 + }, + { + "epoch": 0.9284810126582278, + "grad_norm": 0.5484150052070618, + "learning_rate": 1.9233434427873924e-05, + "loss": 1.4849, + "step": 8802 + }, + { + "epoch": 0.9285864978902953, + "grad_norm": 0.5416959524154663, + "learning_rate": 1.9176983324363545e-05, + "loss": 1.4927, + "step": 8803 + }, + { + "epoch": 0.9286919831223629, + "grad_norm": 0.5478035807609558, + "learning_rate": 1.912061411321639e-05, + "loss": 1.4917, + "step": 8804 + }, + { + "epoch": 0.9287974683544303, + "grad_norm": 0.5544034242630005, + "learning_rate": 1.9064326800748906e-05, + "loss": 1.4917, + "step": 8805 + }, + { + "epoch": 0.9289029535864979, + "grad_norm": 0.5389281511306763, + "learning_rate": 1.9008121393268462e-05, + "loss": 1.493, + "step": 8806 + }, + { + "epoch": 0.9290084388185654, + "grad_norm": 0.5217760801315308, + "learning_rate": 1.8951997897072943e-05, + "loss": 1.4962, + "step": 8807 + }, + { + "epoch": 0.9291139240506329, + "grad_norm": 0.5553712844848633, + "learning_rate": 1.8895956318451398e-05, + "loss": 1.5226, + "step": 8808 + }, + { + "epoch": 0.9292194092827004, + "grad_norm": 0.537169337272644, + "learning_rate": 1.8839996663683635e-05, + "loss": 1.5319, + "step": 8809 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.5601081848144531, + "learning_rate": 1.878411893904014e-05, + "loss": 1.5409, + "step": 8810 + }, + { + "epoch": 0.9294303797468354, + "grad_norm": 0.5494713187217712, + "learning_rate": 1.872832315078224e-05, + "loss": 1.5008, + "step": 8811 + }, + { + "epoch": 0.929535864978903, + "grad_norm": 0.543627142906189, + "learning_rate": 1.8672609305162263e-05, + "loss": 1.4866, + "step": 8812 + }, + { + "epoch": 0.9296413502109705, + "grad_norm": 0.5443251132965088, + "learning_rate": 1.8616977408423053e-05, + "loss": 1.4901, + "step": 8813 + }, + { + "epoch": 0.9297468354430379, + "grad_norm": 0.5312963128089905, + "learning_rate": 1.856142746679862e-05, + "loss": 1.5122, + "step": 8814 + }, + { + "epoch": 0.9298523206751055, + "grad_norm": 0.544899582862854, + "learning_rate": 1.8505959486513485e-05, + "loss": 1.4809, + "step": 8815 + }, + { + "epoch": 0.929957805907173, + "grad_norm": 0.5761235952377319, + "learning_rate": 1.8450573473783094e-05, + "loss": 1.5093, + "step": 8816 + }, + { + "epoch": 0.9300632911392405, + "grad_norm": 0.5480226874351501, + "learning_rate": 1.8395269434813733e-05, + "loss": 1.5067, + "step": 8817 + }, + { + "epoch": 0.930168776371308, + "grad_norm": 0.5288880467414856, + "learning_rate": 1.8340047375802693e-05, + "loss": 1.493, + "step": 8818 + }, + { + "epoch": 0.9302742616033756, + "grad_norm": 0.5375040173530579, + "learning_rate": 1.8284907302937608e-05, + "loss": 1.4796, + "step": 8819 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.5260084271430969, + "learning_rate": 1.822984922239737e-05, + "loss": 1.4761, + "step": 8820 + }, + { + "epoch": 0.9304852320675105, + "grad_norm": 0.525802493095398, + "learning_rate": 1.8174873140351544e-05, + "loss": 1.519, + "step": 8821 + }, + { + "epoch": 0.9305907172995781, + "grad_norm": 0.5513235926628113, + "learning_rate": 1.8119979062960286e-05, + "loss": 1.5651, + "step": 8822 + }, + { + "epoch": 0.9306962025316455, + "grad_norm": 0.5565255284309387, + "learning_rate": 1.806516699637492e-05, + "loss": 1.4862, + "step": 8823 + }, + { + "epoch": 0.9308016877637131, + "grad_norm": 0.5575913190841675, + "learning_rate": 1.8010436946737292e-05, + "loss": 1.5414, + "step": 8824 + }, + { + "epoch": 0.9309071729957806, + "grad_norm": 0.559464693069458, + "learning_rate": 1.7955788920180238e-05, + "loss": 1.5105, + "step": 8825 + }, + { + "epoch": 0.9310126582278481, + "grad_norm": 0.561187207698822, + "learning_rate": 1.7901222922827282e-05, + "loss": 1.5036, + "step": 8826 + }, + { + "epoch": 0.9311181434599156, + "grad_norm": 0.541124701499939, + "learning_rate": 1.7846738960792945e-05, + "loss": 1.4957, + "step": 8827 + }, + { + "epoch": 0.9312236286919832, + "grad_norm": 0.5325022339820862, + "learning_rate": 1.7792337040182434e-05, + "loss": 1.524, + "step": 8828 + }, + { + "epoch": 0.9313291139240506, + "grad_norm": 0.5612843036651611, + "learning_rate": 1.773801716709153e-05, + "loss": 1.4818, + "step": 8829 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.5491388440132141, + "learning_rate": 1.7683779347607286e-05, + "loss": 1.4583, + "step": 8830 + }, + { + "epoch": 0.9315400843881857, + "grad_norm": 0.5389885902404785, + "learning_rate": 1.7629623587807175e-05, + "loss": 1.4528, + "step": 8831 + }, + { + "epoch": 0.9316455696202531, + "grad_norm": 0.5166723728179932, + "learning_rate": 1.7575549893759756e-05, + "loss": 1.529, + "step": 8832 + }, + { + "epoch": 0.9317510548523207, + "grad_norm": 0.5230802297592163, + "learning_rate": 1.7521558271524103e-05, + "loss": 1.4815, + "step": 8833 + }, + { + "epoch": 0.9318565400843882, + "grad_norm": 0.5375209450721741, + "learning_rate": 1.7467648727150202e-05, + "loss": 1.4811, + "step": 8834 + }, + { + "epoch": 0.9319620253164557, + "grad_norm": 0.5456715822219849, + "learning_rate": 1.741382126667915e-05, + "loss": 1.5011, + "step": 8835 + }, + { + "epoch": 0.9320675105485232, + "grad_norm": 0.6138780117034912, + "learning_rate": 1.7360075896142357e-05, + "loss": 1.5136, + "step": 8836 + }, + { + "epoch": 0.9321729957805908, + "grad_norm": 0.5964294075965881, + "learning_rate": 1.7306412621562352e-05, + "loss": 1.5262, + "step": 8837 + }, + { + "epoch": 0.9322784810126582, + "grad_norm": 0.5301030874252319, + "learning_rate": 1.72528314489524e-05, + "loss": 1.5016, + "step": 8838 + }, + { + "epoch": 0.9323839662447257, + "grad_norm": 0.5561608076095581, + "learning_rate": 1.719933238431645e-05, + "loss": 1.4617, + "step": 8839 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.5488212704658508, + "learning_rate": 1.714591543364938e-05, + "loss": 1.5002, + "step": 8840 + }, + { + "epoch": 0.9325949367088607, + "grad_norm": 0.5184688568115234, + "learning_rate": 1.7092580602936807e-05, + "loss": 1.4734, + "step": 8841 + }, + { + "epoch": 0.9327004219409283, + "grad_norm": 0.5495523810386658, + "learning_rate": 1.703932789815521e-05, + "loss": 1.5421, + "step": 8842 + }, + { + "epoch": 0.9328059071729958, + "grad_norm": 0.5258859992027283, + "learning_rate": 1.6986157325271727e-05, + "loss": 1.4887, + "step": 8843 + }, + { + "epoch": 0.9329113924050633, + "grad_norm": 0.5101792812347412, + "learning_rate": 1.6933068890244595e-05, + "loss": 1.4949, + "step": 8844 + }, + { + "epoch": 0.9330168776371308, + "grad_norm": 0.5405325889587402, + "learning_rate": 1.688006259902239e-05, + "loss": 1.4796, + "step": 8845 + }, + { + "epoch": 0.9331223628691984, + "grad_norm": 0.5745729207992554, + "learning_rate": 1.6827138457544854e-05, + "loss": 1.4997, + "step": 8846 + }, + { + "epoch": 0.9332278481012658, + "grad_norm": 0.5152033567428589, + "learning_rate": 1.677429647174242e-05, + "loss": 1.4455, + "step": 8847 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.5710365176200867, + "learning_rate": 1.6721536647536255e-05, + "loss": 1.5108, + "step": 8848 + }, + { + "epoch": 0.9334388185654009, + "grad_norm": 0.5570980906486511, + "learning_rate": 1.666885899083831e-05, + "loss": 1.5257, + "step": 8849 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.5626038312911987, + "learning_rate": 1.6616263507551437e-05, + "loss": 1.469, + "step": 8850 + }, + { + "epoch": 0.9336497890295359, + "grad_norm": 0.5449593663215637, + "learning_rate": 1.656375020356926e-05, + "loss": 1.5074, + "step": 8851 + }, + { + "epoch": 0.9337552742616034, + "grad_norm": 0.5436168909072876, + "learning_rate": 1.6511319084776073e-05, + "loss": 1.4991, + "step": 8852 + }, + { + "epoch": 0.9338607594936709, + "grad_norm": 0.5396621823310852, + "learning_rate": 1.645897015704709e-05, + "loss": 1.4791, + "step": 8853 + }, + { + "epoch": 0.9339662447257384, + "grad_norm": 0.547958254814148, + "learning_rate": 1.6406703426248366e-05, + "loss": 1.486, + "step": 8854 + }, + { + "epoch": 0.9340717299578059, + "grad_norm": 0.5393418073654175, + "learning_rate": 1.6354518898236472e-05, + "loss": 1.5188, + "step": 8855 + }, + { + "epoch": 0.9341772151898734, + "grad_norm": 0.5187286138534546, + "learning_rate": 1.630241657885906e-05, + "loss": 1.4932, + "step": 8856 + }, + { + "epoch": 0.934282700421941, + "grad_norm": 0.5181279182434082, + "learning_rate": 1.6250396473954377e-05, + "loss": 1.4627, + "step": 8857 + }, + { + "epoch": 0.9343881856540084, + "grad_norm": 0.544024646282196, + "learning_rate": 1.6198458589351595e-05, + "loss": 1.5102, + "step": 8858 + }, + { + "epoch": 0.9344936708860759, + "grad_norm": 0.5750916004180908, + "learning_rate": 1.614660293087056e-05, + "loss": 1.5089, + "step": 8859 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5754094123840332, + "learning_rate": 1.609482950432195e-05, + "loss": 1.4673, + "step": 8860 + }, + { + "epoch": 0.9347046413502109, + "grad_norm": 0.5213358998298645, + "learning_rate": 1.6043138315507382e-05, + "loss": 1.4546, + "step": 8861 + }, + { + "epoch": 0.9348101265822785, + "grad_norm": 0.5044276714324951, + "learning_rate": 1.5991529370218887e-05, + "loss": 1.5251, + "step": 8862 + }, + { + "epoch": 0.934915611814346, + "grad_norm": 0.5525864362716675, + "learning_rate": 1.5940002674239756e-05, + "loss": 1.5115, + "step": 8863 + }, + { + "epoch": 0.9350210970464135, + "grad_norm": 0.5361214280128479, + "learning_rate": 1.588855823334362e-05, + "loss": 1.487, + "step": 8864 + }, + { + "epoch": 0.935126582278481, + "grad_norm": 0.5400807857513428, + "learning_rate": 1.5837196053295117e-05, + "loss": 1.5118, + "step": 8865 + }, + { + "epoch": 0.9352320675105485, + "grad_norm": 0.5843711495399475, + "learning_rate": 1.5785916139849725e-05, + "loss": 1.5026, + "step": 8866 + }, + { + "epoch": 0.935337552742616, + "grad_norm": 0.5506822466850281, + "learning_rate": 1.573471849875352e-05, + "loss": 1.5214, + "step": 8867 + }, + { + "epoch": 0.9354430379746835, + "grad_norm": 0.5163999795913696, + "learning_rate": 1.568360313574349e-05, + "loss": 1.4934, + "step": 8868 + }, + { + "epoch": 0.9355485232067511, + "grad_norm": 0.5470105409622192, + "learning_rate": 1.5632570056547308e-05, + "loss": 1.5004, + "step": 8869 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.5344634652137756, + "learning_rate": 1.5581619266883563e-05, + "loss": 1.4646, + "step": 8870 + }, + { + "epoch": 0.9357594936708861, + "grad_norm": 0.5332074165344238, + "learning_rate": 1.5530750772461522e-05, + "loss": 1.5289, + "step": 8871 + }, + { + "epoch": 0.9358649789029536, + "grad_norm": 0.5465049743652344, + "learning_rate": 1.5479964578981293e-05, + "loss": 1.4526, + "step": 8872 + }, + { + "epoch": 0.935970464135021, + "grad_norm": 0.561720073223114, + "learning_rate": 1.5429260692133656e-05, + "loss": 1.522, + "step": 8873 + }, + { + "epoch": 0.9360759493670886, + "grad_norm": 0.5531208515167236, + "learning_rate": 1.5378639117600234e-05, + "loss": 1.4807, + "step": 8874 + }, + { + "epoch": 0.9361814345991561, + "grad_norm": 0.5790717005729675, + "learning_rate": 1.532809986105349e-05, + "loss": 1.4848, + "step": 8875 + }, + { + "epoch": 0.9362869198312236, + "grad_norm": 0.537634015083313, + "learning_rate": 1.527764292815656e-05, + "loss": 1.5092, + "step": 8876 + }, + { + "epoch": 0.9363924050632911, + "grad_norm": 0.5008704662322998, + "learning_rate": 1.522726832456342e-05, + "loss": 1.489, + "step": 8877 + }, + { + "epoch": 0.9364978902953587, + "grad_norm": 0.5178319215774536, + "learning_rate": 1.517697605591864e-05, + "loss": 1.5211, + "step": 8878 + }, + { + "epoch": 0.9366033755274261, + "grad_norm": 0.5484443306922913, + "learning_rate": 1.512676612785796e-05, + "loss": 1.5178, + "step": 8879 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.5695708990097046, + "learning_rate": 1.5076638546007548e-05, + "loss": 1.4991, + "step": 8880 + }, + { + "epoch": 0.9368143459915612, + "grad_norm": 0.545431911945343, + "learning_rate": 1.502659331598441e-05, + "loss": 1.4906, + "step": 8881 + }, + { + "epoch": 0.9369198312236287, + "grad_norm": 0.5354093313217163, + "learning_rate": 1.4976630443396395e-05, + "loss": 1.5389, + "step": 8882 + }, + { + "epoch": 0.9370253164556962, + "grad_norm": 0.5782936215400696, + "learning_rate": 1.4926749933842187e-05, + "loss": 1.4701, + "step": 8883 + }, + { + "epoch": 0.9371308016877637, + "grad_norm": 0.5566501617431641, + "learning_rate": 1.4876951792910987e-05, + "loss": 1.5106, + "step": 8884 + }, + { + "epoch": 0.9372362869198312, + "grad_norm": 0.5586019158363342, + "learning_rate": 1.4827236026182994e-05, + "loss": 1.4798, + "step": 8885 + }, + { + "epoch": 0.9373417721518987, + "grad_norm": 0.5440168976783752, + "learning_rate": 1.4777602639229004e-05, + "loss": 1.5099, + "step": 8886 + }, + { + "epoch": 0.9374472573839663, + "grad_norm": 0.5326303243637085, + "learning_rate": 1.4728051637610902e-05, + "loss": 1.49, + "step": 8887 + }, + { + "epoch": 0.9375527426160337, + "grad_norm": 0.5507270097732544, + "learning_rate": 1.4678583026880993e-05, + "loss": 1.5055, + "step": 8888 + }, + { + "epoch": 0.9376582278481013, + "grad_norm": 0.5817315578460693, + "learning_rate": 1.4629196812582513e-05, + "loss": 1.5021, + "step": 8889 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.5231399536132812, + "learning_rate": 1.457989300024945e-05, + "loss": 1.473, + "step": 8890 + }, + { + "epoch": 0.9378691983122363, + "grad_norm": 0.594109296798706, + "learning_rate": 1.4530671595406469e-05, + "loss": 1.4908, + "step": 8891 + }, + { + "epoch": 0.9379746835443038, + "grad_norm": 0.5209593176841736, + "learning_rate": 1.4481532603569076e-05, + "loss": 1.4956, + "step": 8892 + }, + { + "epoch": 0.9380801687763713, + "grad_norm": 0.5334316492080688, + "learning_rate": 1.4432476030243696e-05, + "loss": 1.4673, + "step": 8893 + }, + { + "epoch": 0.9381856540084388, + "grad_norm": 0.5306863784790039, + "learning_rate": 1.4383501880927103e-05, + "loss": 1.4729, + "step": 8894 + }, + { + "epoch": 0.9382911392405063, + "grad_norm": 0.5283511281013489, + "learning_rate": 1.433461016110732e-05, + "loss": 1.5046, + "step": 8895 + }, + { + "epoch": 0.9383966244725739, + "grad_norm": 0.5631054043769836, + "learning_rate": 1.42858008762628e-05, + "loss": 1.4885, + "step": 8896 + }, + { + "epoch": 0.9385021097046413, + "grad_norm": 0.559698760509491, + "learning_rate": 1.4237074031862918e-05, + "loss": 1.4988, + "step": 8897 + }, + { + "epoch": 0.9386075949367089, + "grad_norm": 0.5403364896774292, + "learning_rate": 1.4188429633367721e-05, + "loss": 1.4577, + "step": 8898 + }, + { + "epoch": 0.9387130801687764, + "grad_norm": 0.5578094124794006, + "learning_rate": 1.4139867686228102e-05, + "loss": 1.512, + "step": 8899 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.5720673203468323, + "learning_rate": 1.4091388195885625e-05, + "loss": 1.4842, + "step": 8900 + }, + { + "epoch": 0.9389240506329114, + "grad_norm": 0.5165798664093018, + "learning_rate": 1.404299116777269e-05, + "loss": 1.5028, + "step": 8901 + }, + { + "epoch": 0.939029535864979, + "grad_norm": 0.5472872257232666, + "learning_rate": 1.3994676607312379e-05, + "loss": 1.4739, + "step": 8902 + }, + { + "epoch": 0.9391350210970464, + "grad_norm": 0.5246853828430176, + "learning_rate": 1.3946444519918611e-05, + "loss": 1.5015, + "step": 8903 + }, + { + "epoch": 0.9392405063291139, + "grad_norm": 0.5253665447235107, + "learning_rate": 1.3898294910995979e-05, + "loss": 1.4973, + "step": 8904 + }, + { + "epoch": 0.9393459915611815, + "grad_norm": 0.5477705597877502, + "learning_rate": 1.385022778594e-05, + "loss": 1.5022, + "step": 8905 + }, + { + "epoch": 0.9394514767932489, + "grad_norm": 0.5255270600318909, + "learning_rate": 1.3802243150136784e-05, + "loss": 1.5232, + "step": 8906 + }, + { + "epoch": 0.9395569620253165, + "grad_norm": 0.5511412620544434, + "learning_rate": 1.3754341008963194e-05, + "loss": 1.4862, + "step": 8907 + }, + { + "epoch": 0.939662447257384, + "grad_norm": 0.5307536721229553, + "learning_rate": 1.370652136778694e-05, + "loss": 1.4731, + "step": 8908 + }, + { + "epoch": 0.9397679324894515, + "grad_norm": 0.5920286178588867, + "learning_rate": 1.3658784231966481e-05, + "loss": 1.4739, + "step": 8909 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.6466826796531677, + "learning_rate": 1.3611129606851041e-05, + "loss": 1.4681, + "step": 8910 + }, + { + "epoch": 0.9399789029535865, + "grad_norm": 0.5773149728775024, + "learning_rate": 1.3563557497780432e-05, + "loss": 1.48, + "step": 8911 + }, + { + "epoch": 0.940084388185654, + "grad_norm": 0.5421623587608337, + "learning_rate": 1.3516067910085306e-05, + "loss": 1.5161, + "step": 8912 + }, + { + "epoch": 0.9401898734177215, + "grad_norm": 0.5372695326805115, + "learning_rate": 1.3468660849087322e-05, + "loss": 1.4994, + "step": 8913 + }, + { + "epoch": 0.9402953586497891, + "grad_norm": 0.5275027751922607, + "learning_rate": 1.3421336320098565e-05, + "loss": 1.5051, + "step": 8914 + }, + { + "epoch": 0.9404008438818565, + "grad_norm": 0.5674175024032593, + "learning_rate": 1.3374094328422043e-05, + "loss": 1.4818, + "step": 8915 + }, + { + "epoch": 0.9405063291139241, + "grad_norm": 0.585702121257782, + "learning_rate": 1.3326934879351272e-05, + "loss": 1.5431, + "step": 8916 + }, + { + "epoch": 0.9406118143459916, + "grad_norm": 0.5600317120552063, + "learning_rate": 1.327985797817094e-05, + "loss": 1.5141, + "step": 8917 + }, + { + "epoch": 0.940717299578059, + "grad_norm": 0.5663731694221497, + "learning_rate": 1.3232863630156077e-05, + "loss": 1.4602, + "step": 8918 + }, + { + "epoch": 0.9408227848101266, + "grad_norm": 0.5404043793678284, + "learning_rate": 1.3185951840572723e-05, + "loss": 1.4885, + "step": 8919 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.556914746761322, + "learning_rate": 1.313912261467759e-05, + "loss": 1.464, + "step": 8920 + }, + { + "epoch": 0.9410337552742616, + "grad_norm": 0.5870622396469116, + "learning_rate": 1.3092375957717978e-05, + "loss": 1.5031, + "step": 8921 + }, + { + "epoch": 0.9411392405063291, + "grad_norm": 0.5350087881088257, + "learning_rate": 1.3045711874932281e-05, + "loss": 1.4903, + "step": 8922 + }, + { + "epoch": 0.9412447257383966, + "grad_norm": 0.5278683304786682, + "learning_rate": 1.2999130371549318e-05, + "loss": 1.4889, + "step": 8923 + }, + { + "epoch": 0.9413502109704641, + "grad_norm": 0.5533756017684937, + "learning_rate": 1.2952631452788826e-05, + "loss": 1.4911, + "step": 8924 + }, + { + "epoch": 0.9414556962025317, + "grad_norm": 0.5933789610862732, + "learning_rate": 1.2906215123861226e-05, + "loss": 1.5228, + "step": 8925 + }, + { + "epoch": 0.9415611814345991, + "grad_norm": 0.5292412042617798, + "learning_rate": 1.2859881389967687e-05, + "loss": 1.4633, + "step": 8926 + }, + { + "epoch": 0.9416666666666667, + "grad_norm": 0.5580168962478638, + "learning_rate": 1.2813630256300224e-05, + "loss": 1.5105, + "step": 8927 + }, + { + "epoch": 0.9417721518987342, + "grad_norm": 0.5918232202529907, + "learning_rate": 1.2767461728041357e-05, + "loss": 1.5129, + "step": 8928 + }, + { + "epoch": 0.9418776371308016, + "grad_norm": 0.5284067392349243, + "learning_rate": 1.2721375810364616e-05, + "loss": 1.4603, + "step": 8929 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.5488449335098267, + "learning_rate": 1.267537250843412e-05, + "loss": 1.4761, + "step": 8930 + }, + { + "epoch": 0.9420886075949367, + "grad_norm": 0.5458678007125854, + "learning_rate": 1.2629451827404659e-05, + "loss": 1.4745, + "step": 8931 + }, + { + "epoch": 0.9421940928270042, + "grad_norm": 0.5657885074615479, + "learning_rate": 1.258361377242212e-05, + "loss": 1.4813, + "step": 8932 + }, + { + "epoch": 0.9422995780590717, + "grad_norm": 0.5173025727272034, + "learning_rate": 1.2537858348622728e-05, + "loss": 1.4921, + "step": 8933 + }, + { + "epoch": 0.9424050632911393, + "grad_norm": 0.5787217617034912, + "learning_rate": 1.2492185561133545e-05, + "loss": 1.508, + "step": 8934 + }, + { + "epoch": 0.9425105485232067, + "grad_norm": 0.5327693223953247, + "learning_rate": 1.2446595415072565e-05, + "loss": 1.5041, + "step": 8935 + }, + { + "epoch": 0.9426160337552743, + "grad_norm": 0.5450128316879272, + "learning_rate": 1.2401087915548365e-05, + "loss": 1.4803, + "step": 8936 + }, + { + "epoch": 0.9427215189873418, + "grad_norm": 0.5635148882865906, + "learning_rate": 1.2355663067660283e-05, + "loss": 1.5021, + "step": 8937 + }, + { + "epoch": 0.9428270042194092, + "grad_norm": 0.5366591215133667, + "learning_rate": 1.2310320876498333e-05, + "loss": 1.4681, + "step": 8938 + }, + { + "epoch": 0.9429324894514768, + "grad_norm": 0.5515419840812683, + "learning_rate": 1.2265061347143447e-05, + "loss": 1.5155, + "step": 8939 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.525877058506012, + "learning_rate": 1.2219884484667071e-05, + "loss": 1.4955, + "step": 8940 + }, + { + "epoch": 0.9431434599156118, + "grad_norm": 0.5210142135620117, + "learning_rate": 1.2174790294131405e-05, + "loss": 1.4642, + "step": 8941 + }, + { + "epoch": 0.9432489451476793, + "grad_norm": 0.5237002372741699, + "learning_rate": 1.2129778780589823e-05, + "loss": 1.4862, + "step": 8942 + }, + { + "epoch": 0.9433544303797469, + "grad_norm": 0.5518798232078552, + "learning_rate": 1.2084849949085791e-05, + "loss": 1.4931, + "step": 8943 + }, + { + "epoch": 0.9434599156118143, + "grad_norm": 0.5284111499786377, + "learning_rate": 1.2040003804653864e-05, + "loss": 1.5288, + "step": 8944 + }, + { + "epoch": 0.9435654008438819, + "grad_norm": 0.5073506236076355, + "learning_rate": 1.199524035231936e-05, + "loss": 1.4755, + "step": 8945 + }, + { + "epoch": 0.9436708860759494, + "grad_norm": 0.5390236973762512, + "learning_rate": 1.195055959709826e-05, + "loss": 1.5014, + "step": 8946 + }, + { + "epoch": 0.9437763713080168, + "grad_norm": 0.555683434009552, + "learning_rate": 1.1905961543997147e-05, + "loss": 1.4852, + "step": 8947 + }, + { + "epoch": 0.9438818565400844, + "grad_norm": 0.5311065912246704, + "learning_rate": 1.186144619801352e-05, + "loss": 1.4902, + "step": 8948 + }, + { + "epoch": 0.9439873417721519, + "grad_norm": 0.5636129975318909, + "learning_rate": 1.1817013564135475e-05, + "loss": 1.5303, + "step": 8949 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.5282248258590698, + "learning_rate": 1.1772663647341947e-05, + "loss": 1.4968, + "step": 8950 + }, + { + "epoch": 0.9441983122362869, + "grad_norm": 0.5184053182601929, + "learning_rate": 1.1728396452602708e-05, + "loss": 1.4897, + "step": 8951 + }, + { + "epoch": 0.9443037974683545, + "grad_norm": 0.5534455180168152, + "learning_rate": 1.1684211984877957e-05, + "loss": 1.4962, + "step": 8952 + }, + { + "epoch": 0.9444092827004219, + "grad_norm": 0.5595492720603943, + "learning_rate": 1.1640110249118818e-05, + "loss": 1.5213, + "step": 8953 + }, + { + "epoch": 0.9445147679324895, + "grad_norm": 0.5292876362800598, + "learning_rate": 1.1596091250267171e-05, + "loss": 1.4958, + "step": 8954 + }, + { + "epoch": 0.944620253164557, + "grad_norm": 0.5908967852592468, + "learning_rate": 1.1552154993255488e-05, + "loss": 1.528, + "step": 8955 + }, + { + "epoch": 0.9447257383966244, + "grad_norm": 0.5639292597770691, + "learning_rate": 1.1508301483007078e-05, + "loss": 1.4888, + "step": 8956 + }, + { + "epoch": 0.944831223628692, + "grad_norm": 0.5385989546775818, + "learning_rate": 1.1464530724435928e-05, + "loss": 1.515, + "step": 8957 + }, + { + "epoch": 0.9449367088607595, + "grad_norm": 0.5325223803520203, + "learning_rate": 1.14208427224467e-05, + "loss": 1.4658, + "step": 8958 + }, + { + "epoch": 0.945042194092827, + "grad_norm": 0.5421932339668274, + "learning_rate": 1.137723748193506e-05, + "loss": 1.4766, + "step": 8959 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.5475661158561707, + "learning_rate": 1.1333715007786932e-05, + "loss": 1.5152, + "step": 8960 + }, + { + "epoch": 0.9452531645569621, + "grad_norm": 0.5265828371047974, + "learning_rate": 1.12902753048795e-05, + "loss": 1.4771, + "step": 8961 + }, + { + "epoch": 0.9453586497890295, + "grad_norm": 0.5110461115837097, + "learning_rate": 1.1246918378080202e-05, + "loss": 1.49, + "step": 8962 + }, + { + "epoch": 0.945464135021097, + "grad_norm": 0.5253652930259705, + "learning_rate": 1.12036442322474e-05, + "loss": 1.5, + "step": 8963 + }, + { + "epoch": 0.9455696202531646, + "grad_norm": 0.6015256643295288, + "learning_rate": 1.1160452872230303e-05, + "loss": 1.4862, + "step": 8964 + }, + { + "epoch": 0.945675105485232, + "grad_norm": 0.6061765551567078, + "learning_rate": 1.111734430286862e-05, + "loss": 1.5124, + "step": 8965 + }, + { + "epoch": 0.9457805907172996, + "grad_norm": 0.5587429404258728, + "learning_rate": 1.1074318528992905e-05, + "loss": 1.5429, + "step": 8966 + }, + { + "epoch": 0.9458860759493671, + "grad_norm": 0.5456869006156921, + "learning_rate": 1.1031375555424466e-05, + "loss": 1.4524, + "step": 8967 + }, + { + "epoch": 0.9459915611814346, + "grad_norm": 0.6132672429084778, + "learning_rate": 1.0988515386975206e-05, + "loss": 1.5012, + "step": 8968 + }, + { + "epoch": 0.9460970464135021, + "grad_norm": 0.5730155110359192, + "learning_rate": 1.0945738028447783e-05, + "loss": 1.5105, + "step": 8969 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5525025725364685, + "learning_rate": 1.0903043484635694e-05, + "loss": 1.4918, + "step": 8970 + }, + { + "epoch": 0.9463080168776371, + "grad_norm": 0.5505198240280151, + "learning_rate": 1.0860431760323032e-05, + "loss": 1.5032, + "step": 8971 + }, + { + "epoch": 0.9464135021097047, + "grad_norm": 0.5303801894187927, + "learning_rate": 1.0817902860284723e-05, + "loss": 1.4725, + "step": 8972 + }, + { + "epoch": 0.9465189873417722, + "grad_norm": 0.5550169944763184, + "learning_rate": 1.0775456789286291e-05, + "loss": 1.443, + "step": 8973 + }, + { + "epoch": 0.9466244725738396, + "grad_norm": 0.5225626826286316, + "learning_rate": 1.0733093552084016e-05, + "loss": 1.4871, + "step": 8974 + }, + { + "epoch": 0.9467299578059072, + "grad_norm": 0.5246666669845581, + "learning_rate": 1.0690813153425016e-05, + "loss": 1.5186, + "step": 8975 + }, + { + "epoch": 0.9468354430379747, + "grad_norm": 0.5825701951980591, + "learning_rate": 1.0648615598046834e-05, + "loss": 1.512, + "step": 8976 + }, + { + "epoch": 0.9469409282700422, + "grad_norm": 0.5516079068183899, + "learning_rate": 1.0606500890678023e-05, + "loss": 1.4908, + "step": 8977 + }, + { + "epoch": 0.9470464135021097, + "grad_norm": 0.5258510112762451, + "learning_rate": 1.0564469036037722e-05, + "loss": 1.5087, + "step": 8978 + }, + { + "epoch": 0.9471518987341773, + "grad_norm": 0.566221296787262, + "learning_rate": 1.0522520038835831e-05, + "loss": 1.4583, + "step": 8979 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5478129386901855, + "learning_rate": 1.0480653903772924e-05, + "loss": 1.5062, + "step": 8980 + }, + { + "epoch": 0.9473628691983123, + "grad_norm": 0.5332730412483215, + "learning_rate": 1.0438870635540332e-05, + "loss": 1.4999, + "step": 8981 + }, + { + "epoch": 0.9474683544303798, + "grad_norm": 0.5410340428352356, + "learning_rate": 1.0397170238820142e-05, + "loss": 1.4853, + "step": 8982 + }, + { + "epoch": 0.9475738396624472, + "grad_norm": 0.5369127988815308, + "learning_rate": 1.0355552718284949e-05, + "loss": 1.5155, + "step": 8983 + }, + { + "epoch": 0.9476793248945148, + "grad_norm": 0.5488144755363464, + "learning_rate": 1.0314018078598275e-05, + "loss": 1.5121, + "step": 8984 + }, + { + "epoch": 0.9477848101265823, + "grad_norm": 0.5463382601737976, + "learning_rate": 1.0272566324414313e-05, + "loss": 1.5144, + "step": 8985 + }, + { + "epoch": 0.9478902953586498, + "grad_norm": 0.5425140261650085, + "learning_rate": 1.0231197460377845e-05, + "loss": 1.4976, + "step": 8986 + }, + { + "epoch": 0.9479957805907173, + "grad_norm": 0.5324376225471497, + "learning_rate": 1.0189911491124582e-05, + "loss": 1.4839, + "step": 8987 + }, + { + "epoch": 0.9481012658227848, + "grad_norm": 0.563228964805603, + "learning_rate": 1.0148708421280822e-05, + "loss": 1.4909, + "step": 8988 + }, + { + "epoch": 0.9482067510548523, + "grad_norm": 0.5312089920043945, + "learning_rate": 1.0107588255463373e-05, + "loss": 1.5044, + "step": 8989 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.5344285368919373, + "learning_rate": 1.0066550998280132e-05, + "loss": 1.4841, + "step": 8990 + }, + { + "epoch": 0.9484177215189873, + "grad_norm": 0.5450170040130615, + "learning_rate": 1.0025596654329504e-05, + "loss": 1.4613, + "step": 8991 + }, + { + "epoch": 0.9485232067510548, + "grad_norm": 0.5634647607803345, + "learning_rate": 9.984725228200654e-06, + "loss": 1.4755, + "step": 8992 + }, + { + "epoch": 0.9486286919831224, + "grad_norm": 0.5562004446983337, + "learning_rate": 9.943936724473412e-06, + "loss": 1.4741, + "step": 8993 + }, + { + "epoch": 0.9487341772151898, + "grad_norm": 0.5727685689926147, + "learning_rate": 9.903231147718294e-06, + "loss": 1.4955, + "step": 8994 + }, + { + "epoch": 0.9488396624472574, + "grad_norm": 0.5150254964828491, + "learning_rate": 9.862608502496568e-06, + "loss": 1.5397, + "step": 8995 + }, + { + "epoch": 0.9489451476793249, + "grad_norm": 0.5119209289550781, + "learning_rate": 9.822068793360172e-06, + "loss": 1.4763, + "step": 8996 + }, + { + "epoch": 0.9490506329113924, + "grad_norm": 0.5345094203948975, + "learning_rate": 9.781612024851893e-06, + "loss": 1.4934, + "step": 8997 + }, + { + "epoch": 0.9491561181434599, + "grad_norm": 0.514701247215271, + "learning_rate": 9.74123820150502e-06, + "loss": 1.5083, + "step": 8998 + }, + { + "epoch": 0.9492616033755275, + "grad_norm": 0.5494600534439087, + "learning_rate": 9.700947327843685e-06, + "loss": 1.4965, + "step": 8999 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.5365203022956848, + "learning_rate": 9.660739408382608e-06, + "loss": 1.4824, + "step": 9000 + }, + { + "epoch": 0.9494725738396624, + "grad_norm": 0.5204641222953796, + "learning_rate": 9.620614447627435e-06, + "loss": 1.4944, + "step": 9001 + }, + { + "epoch": 0.94957805907173, + "grad_norm": 0.5513626337051392, + "learning_rate": 9.580572450074237e-06, + "loss": 1.4988, + "step": 9002 + }, + { + "epoch": 0.9496835443037974, + "grad_norm": 0.5521519780158997, + "learning_rate": 9.540613420209927e-06, + "loss": 1.5093, + "step": 9003 + }, + { + "epoch": 0.949789029535865, + "grad_norm": 0.5287699699401855, + "learning_rate": 9.500737362512168e-06, + "loss": 1.4722, + "step": 9004 + }, + { + "epoch": 0.9498945147679325, + "grad_norm": 0.5306258201599121, + "learning_rate": 9.460944281449307e-06, + "loss": 1.4516, + "step": 9005 + }, + { + "epoch": 0.95, + "grad_norm": 0.5590242147445679, + "learning_rate": 9.421234181480275e-06, + "loss": 1.4944, + "step": 9006 + }, + { + "epoch": 0.9501054852320675, + "grad_norm": 0.5683691501617432, + "learning_rate": 9.381607067054764e-06, + "loss": 1.5259, + "step": 9007 + }, + { + "epoch": 0.950210970464135, + "grad_norm": 0.5390235781669617, + "learning_rate": 9.342062942613222e-06, + "loss": 1.5167, + "step": 9008 + }, + { + "epoch": 0.9503164556962025, + "grad_norm": 0.5479554533958435, + "learning_rate": 9.302601812586852e-06, + "loss": 1.4783, + "step": 9009 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.5643352270126343, + "learning_rate": 9.26322368139737e-06, + "loss": 1.4822, + "step": 9010 + }, + { + "epoch": 0.9505274261603376, + "grad_norm": 0.5669637322425842, + "learning_rate": 9.223928553457328e-06, + "loss": 1.4573, + "step": 9011 + }, + { + "epoch": 0.950632911392405, + "grad_norm": 0.5653201937675476, + "learning_rate": 9.184716433169955e-06, + "loss": 1.506, + "step": 9012 + }, + { + "epoch": 0.9507383966244726, + "grad_norm": 0.5531694889068604, + "learning_rate": 9.145587324929066e-06, + "loss": 1.5054, + "step": 9013 + }, + { + "epoch": 0.9508438818565401, + "grad_norm": 0.5718324184417725, + "learning_rate": 9.106541233119409e-06, + "loss": 1.4937, + "step": 9014 + }, + { + "epoch": 0.9509493670886076, + "grad_norm": 0.6034225821495056, + "learning_rate": 9.06757816211623e-06, + "loss": 1.4869, + "step": 9015 + }, + { + "epoch": 0.9510548523206751, + "grad_norm": 0.5596174597740173, + "learning_rate": 9.028698116285538e-06, + "loss": 1.443, + "step": 9016 + }, + { + "epoch": 0.9511603375527427, + "grad_norm": 0.5267828702926636, + "learning_rate": 8.989901099984016e-06, + "loss": 1.4621, + "step": 9017 + }, + { + "epoch": 0.9512658227848101, + "grad_norm": 0.5185201168060303, + "learning_rate": 8.9511871175591e-06, + "loss": 1.5055, + "step": 9018 + }, + { + "epoch": 0.9513713080168776, + "grad_norm": 0.5180529356002808, + "learning_rate": 8.912556173348907e-06, + "loss": 1.4904, + "step": 9019 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.5541896820068359, + "learning_rate": 8.874008271682222e-06, + "loss": 1.5331, + "step": 9020 + }, + { + "epoch": 0.9515822784810126, + "grad_norm": 0.524610698223114, + "learning_rate": 8.835543416878422e-06, + "loss": 1.5071, + "step": 9021 + }, + { + "epoch": 0.9516877637130802, + "grad_norm": 0.5530322194099426, + "learning_rate": 8.797161613247728e-06, + "loss": 1.5372, + "step": 9022 + }, + { + "epoch": 0.9517932489451477, + "grad_norm": 0.5422590970993042, + "learning_rate": 8.758862865091117e-06, + "loss": 1.5322, + "step": 9023 + }, + { + "epoch": 0.9518987341772152, + "grad_norm": 0.5256364941596985, + "learning_rate": 8.72064717670007e-06, + "loss": 1.4727, + "step": 9024 + }, + { + "epoch": 0.9520042194092827, + "grad_norm": 0.5503426790237427, + "learning_rate": 8.68251455235683e-06, + "loss": 1.5159, + "step": 9025 + }, + { + "epoch": 0.9521097046413503, + "grad_norm": 0.5404753684997559, + "learning_rate": 8.644464996334395e-06, + "loss": 1.5472, + "step": 9026 + }, + { + "epoch": 0.9522151898734177, + "grad_norm": 0.5556758642196655, + "learning_rate": 8.606498512896438e-06, + "loss": 1.4845, + "step": 9027 + }, + { + "epoch": 0.9523206751054852, + "grad_norm": 0.5697832107543945, + "learning_rate": 8.568615106297223e-06, + "loss": 1.5032, + "step": 9028 + }, + { + "epoch": 0.9524261603375528, + "grad_norm": 0.5402491092681885, + "learning_rate": 8.53081478078177e-06, + "loss": 1.5288, + "step": 9029 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.5251110792160034, + "learning_rate": 8.493097540585775e-06, + "loss": 1.4798, + "step": 9030 + }, + { + "epoch": 0.9526371308016878, + "grad_norm": 0.5307680368423462, + "learning_rate": 8.455463389935774e-06, + "loss": 1.4581, + "step": 9031 + }, + { + "epoch": 0.9527426160337553, + "grad_norm": 0.5413397550582886, + "learning_rate": 8.417912333048727e-06, + "loss": 1.523, + "step": 9032 + }, + { + "epoch": 0.9528481012658228, + "grad_norm": 0.5701863765716553, + "learning_rate": 8.380444374132517e-06, + "loss": 1.5013, + "step": 9033 + }, + { + "epoch": 0.9529535864978903, + "grad_norm": 0.5360689163208008, + "learning_rate": 8.343059517385454e-06, + "loss": 1.5168, + "step": 9034 + }, + { + "epoch": 0.9530590717299579, + "grad_norm": 0.5282937288284302, + "learning_rate": 8.305757766996935e-06, + "loss": 1.4623, + "step": 9035 + }, + { + "epoch": 0.9531645569620253, + "grad_norm": 0.5448880195617676, + "learning_rate": 8.268539127146619e-06, + "loss": 1.4597, + "step": 9036 + }, + { + "epoch": 0.9532700421940928, + "grad_norm": 0.5149115920066833, + "learning_rate": 8.231403602005083e-06, + "loss": 1.4489, + "step": 9037 + }, + { + "epoch": 0.9533755274261604, + "grad_norm": 0.5415140390396118, + "learning_rate": 8.194351195733585e-06, + "loss": 1.492, + "step": 9038 + }, + { + "epoch": 0.9534810126582278, + "grad_norm": 0.5690169334411621, + "learning_rate": 8.157381912484053e-06, + "loss": 1.4833, + "step": 9039 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.5194066762924194, + "learning_rate": 8.120495756399005e-06, + "loss": 1.5047, + "step": 9040 + }, + { + "epoch": 0.9536919831223629, + "grad_norm": 0.554690420627594, + "learning_rate": 8.08369273161172e-06, + "loss": 1.5128, + "step": 9041 + }, + { + "epoch": 0.9537974683544304, + "grad_norm": 0.5351239442825317, + "learning_rate": 8.046972842246147e-06, + "loss": 1.5078, + "step": 9042 + }, + { + "epoch": 0.9539029535864979, + "grad_norm": 0.55775386095047, + "learning_rate": 8.01033609241708e-06, + "loss": 1.4727, + "step": 9043 + }, + { + "epoch": 0.9540084388185655, + "grad_norm": 0.5218008160591125, + "learning_rate": 7.973782486229737e-06, + "loss": 1.4922, + "step": 9044 + }, + { + "epoch": 0.9541139240506329, + "grad_norm": 0.5274496674537659, + "learning_rate": 7.937312027780169e-06, + "loss": 1.5073, + "step": 9045 + }, + { + "epoch": 0.9542194092827004, + "grad_norm": 0.5205379128456116, + "learning_rate": 7.900924721154945e-06, + "loss": 1.4864, + "step": 9046 + }, + { + "epoch": 0.954324894514768, + "grad_norm": 0.5206708312034607, + "learning_rate": 7.864620570431635e-06, + "loss": 1.5449, + "step": 9047 + }, + { + "epoch": 0.9544303797468354, + "grad_norm": 0.5474662780761719, + "learning_rate": 7.828399579678153e-06, + "loss": 1.4818, + "step": 9048 + }, + { + "epoch": 0.954535864978903, + "grad_norm": 0.5177718997001648, + "learning_rate": 7.792261752953333e-06, + "loss": 1.4879, + "step": 9049 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.5412337183952332, + "learning_rate": 7.756207094306605e-06, + "loss": 1.4883, + "step": 9050 + }, + { + "epoch": 0.954746835443038, + "grad_norm": 0.5087414383888245, + "learning_rate": 7.720235607777987e-06, + "loss": 1.5091, + "step": 9051 + }, + { + "epoch": 0.9548523206751055, + "grad_norm": 0.5132730007171631, + "learning_rate": 7.684347297398254e-06, + "loss": 1.4968, + "step": 9052 + }, + { + "epoch": 0.9549578059071729, + "grad_norm": 0.5384321808815002, + "learning_rate": 7.648542167189021e-06, + "loss": 1.4823, + "step": 9053 + }, + { + "epoch": 0.9550632911392405, + "grad_norm": 0.515757143497467, + "learning_rate": 7.612820221162331e-06, + "loss": 1.5224, + "step": 9054 + }, + { + "epoch": 0.955168776371308, + "grad_norm": 0.5383664965629578, + "learning_rate": 7.577181463320981e-06, + "loss": 1.4789, + "step": 9055 + }, + { + "epoch": 0.9552742616033755, + "grad_norm": 0.5724589228630066, + "learning_rate": 7.541625897658444e-06, + "loss": 1.5162, + "step": 9056 + }, + { + "epoch": 0.955379746835443, + "grad_norm": 0.5560566186904907, + "learning_rate": 7.506153528159032e-06, + "loss": 1.4967, + "step": 9057 + }, + { + "epoch": 0.9554852320675106, + "grad_norm": 0.5517662167549133, + "learning_rate": 7.470764358797566e-06, + "loss": 1.5211, + "step": 9058 + }, + { + "epoch": 0.955590717299578, + "grad_norm": 0.5337706804275513, + "learning_rate": 7.435458393539457e-06, + "loss": 1.4757, + "step": 9059 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.5233127474784851, + "learning_rate": 7.400235636340957e-06, + "loss": 1.487, + "step": 9060 + }, + { + "epoch": 0.9558016877637131, + "grad_norm": 0.5510777235031128, + "learning_rate": 7.3650960911490764e-06, + "loss": 1.5152, + "step": 9061 + }, + { + "epoch": 0.9559071729957805, + "grad_norm": 0.5182316899299622, + "learning_rate": 7.330039761901247e-06, + "loss": 1.5152, + "step": 9062 + }, + { + "epoch": 0.9560126582278481, + "grad_norm": 0.5492849349975586, + "learning_rate": 7.295066652525828e-06, + "loss": 1.5023, + "step": 9063 + }, + { + "epoch": 0.9561181434599156, + "grad_norm": 0.5461703538894653, + "learning_rate": 7.260176766941601e-06, + "loss": 1.4853, + "step": 9064 + }, + { + "epoch": 0.9562236286919831, + "grad_norm": 0.5510846376419067, + "learning_rate": 7.225370109058188e-06, + "loss": 1.5233, + "step": 9065 + }, + { + "epoch": 0.9563291139240506, + "grad_norm": 0.5769557356834412, + "learning_rate": 7.190646682775886e-06, + "loss": 1.4787, + "step": 9066 + }, + { + "epoch": 0.9564345991561182, + "grad_norm": 0.5340915322303772, + "learning_rate": 7.1560064919855835e-06, + "loss": 1.5134, + "step": 9067 + }, + { + "epoch": 0.9565400843881856, + "grad_norm": 0.5361104011535645, + "learning_rate": 7.121449540568842e-06, + "loss": 1.4923, + "step": 9068 + }, + { + "epoch": 0.9566455696202532, + "grad_norm": 0.5635768175125122, + "learning_rate": 7.086975832398146e-06, + "loss": 1.4938, + "step": 9069 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.5219584107398987, + "learning_rate": 7.0525853713362395e-06, + "loss": 1.4606, + "step": 9070 + }, + { + "epoch": 0.9568565400843881, + "grad_norm": 0.529251217842102, + "learning_rate": 7.018278161236791e-06, + "loss": 1.5217, + "step": 9071 + }, + { + "epoch": 0.9569620253164557, + "grad_norm": 0.585749626159668, + "learning_rate": 6.984054205944141e-06, + "loss": 1.4721, + "step": 9072 + }, + { + "epoch": 0.9570675105485232, + "grad_norm": 0.5596514940261841, + "learning_rate": 6.949913509293221e-06, + "loss": 1.521, + "step": 9073 + }, + { + "epoch": 0.9571729957805907, + "grad_norm": 0.5418827533721924, + "learning_rate": 6.915856075109722e-06, + "loss": 1.5172, + "step": 9074 + }, + { + "epoch": 0.9572784810126582, + "grad_norm": 0.5079063177108765, + "learning_rate": 6.881881907209841e-06, + "loss": 1.4771, + "step": 9075 + }, + { + "epoch": 0.9573839662447258, + "grad_norm": 0.5063021183013916, + "learning_rate": 6.847991009400617e-06, + "loss": 1.4594, + "step": 9076 + }, + { + "epoch": 0.9574894514767932, + "grad_norm": 0.581186830997467, + "learning_rate": 6.814183385479677e-06, + "loss": 1.5164, + "step": 9077 + }, + { + "epoch": 0.9575949367088608, + "grad_norm": 0.5340443253517151, + "learning_rate": 6.780459039235409e-06, + "loss": 1.512, + "step": 9078 + }, + { + "epoch": 0.9577004219409283, + "grad_norm": 0.5399587750434875, + "learning_rate": 6.746817974446706e-06, + "loss": 1.4511, + "step": 9079 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.5601441264152527, + "learning_rate": 6.71326019488322e-06, + "loss": 1.5233, + "step": 9080 + }, + { + "epoch": 0.9579113924050633, + "grad_norm": 0.549071192741394, + "learning_rate": 6.679785704305358e-06, + "loss": 1.5212, + "step": 9081 + }, + { + "epoch": 0.9580168776371308, + "grad_norm": 0.5329592227935791, + "learning_rate": 6.6463945064639544e-06, + "loss": 1.5056, + "step": 9082 + }, + { + "epoch": 0.9581223628691983, + "grad_norm": 0.5341116189956665, + "learning_rate": 6.6130866051007654e-06, + "loss": 1.4997, + "step": 9083 + }, + { + "epoch": 0.9582278481012658, + "grad_norm": 0.5471962094306946, + "learning_rate": 6.57986200394814e-06, + "loss": 1.4997, + "step": 9084 + }, + { + "epoch": 0.9583333333333334, + "grad_norm": 0.520425021648407, + "learning_rate": 6.546720706728931e-06, + "loss": 1.465, + "step": 9085 + }, + { + "epoch": 0.9584388185654008, + "grad_norm": 0.5170020461082458, + "learning_rate": 6.513662717156838e-06, + "loss": 1.4949, + "step": 9086 + }, + { + "epoch": 0.9585443037974684, + "grad_norm": 0.5542484521865845, + "learning_rate": 6.480688038936311e-06, + "loss": 1.4836, + "step": 9087 + }, + { + "epoch": 0.9586497890295359, + "grad_norm": 0.5538113713264465, + "learning_rate": 6.447796675762146e-06, + "loss": 1.5161, + "step": 9088 + }, + { + "epoch": 0.9587552742616033, + "grad_norm": 0.5503586530685425, + "learning_rate": 6.414988631320062e-06, + "loss": 1.4983, + "step": 9089 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.5386479496955872, + "learning_rate": 6.3822639092862846e-06, + "loss": 1.5255, + "step": 9090 + }, + { + "epoch": 0.9589662447257384, + "grad_norm": 0.5148718953132629, + "learning_rate": 6.349622513327963e-06, + "loss": 1.4553, + "step": 9091 + }, + { + "epoch": 0.9590717299578059, + "grad_norm": 0.5663661956787109, + "learning_rate": 6.317064447102505e-06, + "loss": 1.4783, + "step": 9092 + }, + { + "epoch": 0.9591772151898734, + "grad_norm": 0.5425037145614624, + "learning_rate": 6.28458971425841e-06, + "loss": 1.4931, + "step": 9093 + }, + { + "epoch": 0.959282700421941, + "grad_norm": 0.5627016425132751, + "learning_rate": 6.252198318434432e-06, + "loss": 1.4642, + "step": 9094 + }, + { + "epoch": 0.9593881856540084, + "grad_norm": 0.5200532078742981, + "learning_rate": 6.219890263260336e-06, + "loss": 1.4746, + "step": 9095 + }, + { + "epoch": 0.959493670886076, + "grad_norm": 0.5574741363525391, + "learning_rate": 6.187665552356392e-06, + "loss": 1.5008, + "step": 9096 + }, + { + "epoch": 0.9595991561181435, + "grad_norm": 0.5771438479423523, + "learning_rate": 6.155524189333461e-06, + "loss": 1.5199, + "step": 9097 + }, + { + "epoch": 0.9597046413502109, + "grad_norm": 0.5508196949958801, + "learning_rate": 6.123466177793247e-06, + "loss": 1.4607, + "step": 9098 + }, + { + "epoch": 0.9598101265822785, + "grad_norm": 0.5378077030181885, + "learning_rate": 6.091491521327958e-06, + "loss": 1.4318, + "step": 9099 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.5202570557594299, + "learning_rate": 6.059600223520478e-06, + "loss": 1.4798, + "step": 9100 + }, + { + "epoch": 0.9600210970464135, + "grad_norm": 0.5325257182121277, + "learning_rate": 6.027792287944367e-06, + "loss": 1.5183, + "step": 9101 + }, + { + "epoch": 0.960126582278481, + "grad_norm": 0.5314754843711853, + "learning_rate": 5.996067718163939e-06, + "loss": 1.4729, + "step": 9102 + }, + { + "epoch": 0.9602320675105486, + "grad_norm": 0.5693559050559998, + "learning_rate": 5.964426517734101e-06, + "loss": 1.4834, + "step": 9103 + }, + { + "epoch": 0.960337552742616, + "grad_norm": 0.5148019790649414, + "learning_rate": 5.932868690200266e-06, + "loss": 1.4948, + "step": 9104 + }, + { + "epoch": 0.9604430379746836, + "grad_norm": 0.5460163354873657, + "learning_rate": 5.901394239098856e-06, + "loss": 1.4915, + "step": 9105 + }, + { + "epoch": 0.9605485232067511, + "grad_norm": 0.5151533484458923, + "learning_rate": 5.870003167956634e-06, + "loss": 1.5067, + "step": 9106 + }, + { + "epoch": 0.9606540084388185, + "grad_norm": 0.5258286595344543, + "learning_rate": 5.838695480291034e-06, + "loss": 1.4826, + "step": 9107 + }, + { + "epoch": 0.9607594936708861, + "grad_norm": 0.5263980627059937, + "learning_rate": 5.807471179610418e-06, + "loss": 1.4639, + "step": 9108 + }, + { + "epoch": 0.9608649789029536, + "grad_norm": 0.517041027545929, + "learning_rate": 5.776330269413488e-06, + "loss": 1.5176, + "step": 9109 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.5670960545539856, + "learning_rate": 5.745272753189784e-06, + "loss": 1.475, + "step": 9110 + }, + { + "epoch": 0.9610759493670886, + "grad_norm": 0.5482287406921387, + "learning_rate": 5.714298634419524e-06, + "loss": 1.4903, + "step": 9111 + }, + { + "epoch": 0.9611814345991562, + "grad_norm": 0.5433443188667297, + "learning_rate": 5.6834079165733464e-06, + "loss": 1.4988, + "step": 9112 + }, + { + "epoch": 0.9612869198312236, + "grad_norm": 0.5272303223609924, + "learning_rate": 5.652600603112818e-06, + "loss": 1.4888, + "step": 9113 + }, + { + "epoch": 0.9613924050632912, + "grad_norm": 0.566161036491394, + "learning_rate": 5.6218766974900915e-06, + "loss": 1.4936, + "step": 9114 + }, + { + "epoch": 0.9614978902953587, + "grad_norm": 0.5181717872619629, + "learning_rate": 5.591236203147915e-06, + "loss": 1.4956, + "step": 9115 + }, + { + "epoch": 0.9616033755274261, + "grad_norm": 0.5584035515785217, + "learning_rate": 5.560679123519624e-06, + "loss": 1.5353, + "step": 9116 + }, + { + "epoch": 0.9617088607594937, + "grad_norm": 0.5569641590118408, + "learning_rate": 5.530205462029314e-06, + "loss": 1.4946, + "step": 9117 + }, + { + "epoch": 0.9618143459915611, + "grad_norm": 0.5615367293357849, + "learning_rate": 5.499815222091836e-06, + "loss": 1.5446, + "step": 9118 + }, + { + "epoch": 0.9619198312236287, + "grad_norm": 0.5341727137565613, + "learning_rate": 5.469508407112467e-06, + "loss": 1.5177, + "step": 9119 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.550889253616333, + "learning_rate": 5.439285020487156e-06, + "loss": 1.4409, + "step": 9120 + }, + { + "epoch": 0.9621308016877637, + "grad_norm": 0.5608037710189819, + "learning_rate": 5.409145065602694e-06, + "loss": 1.506, + "step": 9121 + }, + { + "epoch": 0.9622362869198312, + "grad_norm": 0.5339140295982361, + "learning_rate": 5.379088545836464e-06, + "loss": 1.4709, + "step": 9122 + }, + { + "epoch": 0.9623417721518988, + "grad_norm": 0.5432995557785034, + "learning_rate": 5.349115464556354e-06, + "loss": 1.5064, + "step": 9123 + }, + { + "epoch": 0.9624472573839662, + "grad_norm": 0.515389084815979, + "learning_rate": 5.319225825120927e-06, + "loss": 1.4769, + "step": 9124 + }, + { + "epoch": 0.9625527426160337, + "grad_norm": 0.5212834477424622, + "learning_rate": 5.289419630879672e-06, + "loss": 1.4648, + "step": 9125 + }, + { + "epoch": 0.9626582278481013, + "grad_norm": 0.5564427375793457, + "learning_rate": 5.2596968851724155e-06, + "loss": 1.4725, + "step": 9126 + }, + { + "epoch": 0.9627637130801687, + "grad_norm": 0.5364004373550415, + "learning_rate": 5.230057591329662e-06, + "loss": 1.5071, + "step": 9127 + }, + { + "epoch": 0.9628691983122363, + "grad_norm": 0.5548893809318542, + "learning_rate": 5.200501752672754e-06, + "loss": 1.4585, + "step": 9128 + }, + { + "epoch": 0.9629746835443038, + "grad_norm": 0.5164585113525391, + "learning_rate": 5.171029372513458e-06, + "loss": 1.459, + "step": 9129 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.5255855917930603, + "learning_rate": 5.141640454154467e-06, + "loss": 1.5608, + "step": 9130 + }, + { + "epoch": 0.9631856540084388, + "grad_norm": 0.5532920360565186, + "learning_rate": 5.112335000888813e-06, + "loss": 1.5178, + "step": 9131 + }, + { + "epoch": 0.9632911392405064, + "grad_norm": 0.5181962251663208, + "learning_rate": 5.083113016000368e-06, + "loss": 1.5347, + "step": 9132 + }, + { + "epoch": 0.9633966244725738, + "grad_norm": 0.5801791548728943, + "learning_rate": 5.053974502763681e-06, + "loss": 1.5464, + "step": 9133 + }, + { + "epoch": 0.9635021097046413, + "grad_norm": 0.5283863544464111, + "learning_rate": 5.024919464443723e-06, + "loss": 1.4607, + "step": 9134 + }, + { + "epoch": 0.9636075949367089, + "grad_norm": 0.5557407736778259, + "learning_rate": 4.995947904296305e-06, + "loss": 1.4818, + "step": 9135 + }, + { + "epoch": 0.9637130801687763, + "grad_norm": 0.5342746376991272, + "learning_rate": 4.967059825567832e-06, + "loss": 1.508, + "step": 9136 + }, + { + "epoch": 0.9638185654008439, + "grad_norm": 0.5438628196716309, + "learning_rate": 4.938255231495464e-06, + "loss": 1.4976, + "step": 9137 + }, + { + "epoch": 0.9639240506329114, + "grad_norm": 0.5390324592590332, + "learning_rate": 4.909534125306702e-06, + "loss": 1.5099, + "step": 9138 + }, + { + "epoch": 0.9640295358649789, + "grad_norm": 0.5381283164024353, + "learning_rate": 4.880896510220056e-06, + "loss": 1.5021, + "step": 9139 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.5577210783958435, + "learning_rate": 4.852342389444458e-06, + "loss": 1.5038, + "step": 9140 + }, + { + "epoch": 0.964240506329114, + "grad_norm": 0.5622023940086365, + "learning_rate": 4.823871766179516e-06, + "loss": 1.4725, + "step": 9141 + }, + { + "epoch": 0.9643459915611814, + "grad_norm": 0.5249776840209961, + "learning_rate": 4.7954846436155104e-06, + "loss": 1.4844, + "step": 9142 + }, + { + "epoch": 0.9644514767932489, + "grad_norm": 0.5432139039039612, + "learning_rate": 4.767181024933398e-06, + "loss": 1.4945, + "step": 9143 + }, + { + "epoch": 0.9645569620253165, + "grad_norm": 0.546330988407135, + "learning_rate": 4.738960913304724e-06, + "loss": 1.4969, + "step": 9144 + }, + { + "epoch": 0.9646624472573839, + "grad_norm": 0.5296467542648315, + "learning_rate": 4.710824311891709e-06, + "loss": 1.4858, + "step": 9145 + }, + { + "epoch": 0.9647679324894515, + "grad_norm": 0.5363339781761169, + "learning_rate": 4.682771223847166e-06, + "loss": 1.5166, + "step": 9146 + }, + { + "epoch": 0.964873417721519, + "grad_norm": 0.5212062001228333, + "learning_rate": 4.654801652314577e-06, + "loss": 1.5139, + "step": 9147 + }, + { + "epoch": 0.9649789029535865, + "grad_norm": 0.5355806946754456, + "learning_rate": 4.626915600428105e-06, + "loss": 1.4788, + "step": 9148 + }, + { + "epoch": 0.965084388185654, + "grad_norm": 0.5503888726234436, + "learning_rate": 4.5991130713124995e-06, + "loss": 1.5022, + "step": 9149 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.5510222911834717, + "learning_rate": 4.571394068083185e-06, + "loss": 1.4867, + "step": 9150 + }, + { + "epoch": 0.965295358649789, + "grad_norm": 0.5405086278915405, + "learning_rate": 4.543758593846175e-06, + "loss": 1.4687, + "step": 9151 + }, + { + "epoch": 0.9654008438818565, + "grad_norm": 0.543758749961853, + "learning_rate": 4.516206651698246e-06, + "loss": 1.5108, + "step": 9152 + }, + { + "epoch": 0.9655063291139241, + "grad_norm": 0.541227400302887, + "learning_rate": 4.488738244726593e-06, + "loss": 1.5116, + "step": 9153 + }, + { + "epoch": 0.9656118143459915, + "grad_norm": 0.5431193113327026, + "learning_rate": 4.4613533760093365e-06, + "loss": 1.5019, + "step": 9154 + }, + { + "epoch": 0.9657172995780591, + "grad_norm": 0.550828754901886, + "learning_rate": 4.434052048615022e-06, + "loss": 1.5124, + "step": 9155 + }, + { + "epoch": 0.9658227848101266, + "grad_norm": 0.5125474333763123, + "learning_rate": 4.4068342656028715e-06, + "loss": 1.4898, + "step": 9156 + }, + { + "epoch": 0.9659282700421941, + "grad_norm": 0.5482292175292969, + "learning_rate": 4.37970003002286e-06, + "loss": 1.4879, + "step": 9157 + }, + { + "epoch": 0.9660337552742616, + "grad_norm": 0.5624083876609802, + "learning_rate": 4.352649344915471e-06, + "loss": 1.5201, + "step": 9158 + }, + { + "epoch": 0.9661392405063292, + "grad_norm": 0.5415869355201721, + "learning_rate": 4.325682213311782e-06, + "loss": 1.4757, + "step": 9159 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.5158420205116272, + "learning_rate": 4.298798638233709e-06, + "loss": 1.509, + "step": 9160 + }, + { + "epoch": 0.9663502109704641, + "grad_norm": 0.5069647431373596, + "learning_rate": 4.271998622693674e-06, + "loss": 1.4612, + "step": 9161 + }, + { + "epoch": 0.9664556962025317, + "grad_norm": 0.5221863985061646, + "learning_rate": 4.245282169694692e-06, + "loss": 1.4868, + "step": 9162 + }, + { + "epoch": 0.9665611814345991, + "grad_norm": 0.5489450693130493, + "learning_rate": 4.218649282230536e-06, + "loss": 1.4638, + "step": 9163 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 0.559729814529419, + "learning_rate": 4.192099963285484e-06, + "loss": 1.5291, + "step": 9164 + }, + { + "epoch": 0.9667721518987342, + "grad_norm": 0.6027981638908386, + "learning_rate": 4.165634215834574e-06, + "loss": 1.5329, + "step": 9165 + }, + { + "epoch": 0.9668776371308017, + "grad_norm": 0.5651026964187622, + "learning_rate": 4.139252042843517e-06, + "loss": 1.512, + "step": 9166 + }, + { + "epoch": 0.9669831223628692, + "grad_norm": 0.5038247108459473, + "learning_rate": 4.112953447268364e-06, + "loss": 1.5071, + "step": 9167 + }, + { + "epoch": 0.9670886075949368, + "grad_norm": 0.5429088473320007, + "learning_rate": 4.086738432056092e-06, + "loss": 1.4933, + "step": 9168 + }, + { + "epoch": 0.9671940928270042, + "grad_norm": 0.5447906255722046, + "learning_rate": 4.060607000144351e-06, + "loss": 1.5291, + "step": 9169 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.5604856014251709, + "learning_rate": 4.034559154461049e-06, + "loss": 1.4571, + "step": 9170 + }, + { + "epoch": 0.9674050632911393, + "grad_norm": 0.5212651491165161, + "learning_rate": 4.008594897925183e-06, + "loss": 1.4487, + "step": 9171 + }, + { + "epoch": 0.9675105485232067, + "grad_norm": 0.5500742793083191, + "learning_rate": 3.982714233446094e-06, + "loss": 1.5364, + "step": 9172 + }, + { + "epoch": 0.9676160337552743, + "grad_norm": 0.5261222720146179, + "learning_rate": 3.956917163923879e-06, + "loss": 1.5163, + "step": 9173 + }, + { + "epoch": 0.9677215189873418, + "grad_norm": 0.5271826386451721, + "learning_rate": 3.931203692249141e-06, + "loss": 1.4928, + "step": 9174 + }, + { + "epoch": 0.9678270042194093, + "grad_norm": 0.5323324799537659, + "learning_rate": 3.905573821303327e-06, + "loss": 1.4808, + "step": 9175 + }, + { + "epoch": 0.9679324894514768, + "grad_norm": 0.5315701961517334, + "learning_rate": 3.880027553958304e-06, + "loss": 1.4732, + "step": 9176 + }, + { + "epoch": 0.9680379746835444, + "grad_norm": 0.5231594443321228, + "learning_rate": 3.8545648930767005e-06, + "loss": 1.4984, + "step": 9177 + }, + { + "epoch": 0.9681434599156118, + "grad_norm": 0.5658448934555054, + "learning_rate": 3.8291858415117344e-06, + "loss": 1.5138, + "step": 9178 + }, + { + "epoch": 0.9682489451476793, + "grad_norm": 0.5379332900047302, + "learning_rate": 3.803890402107213e-06, + "loss": 1.5047, + "step": 9179 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.5460322499275208, + "learning_rate": 3.7786785776976198e-06, + "loss": 1.4651, + "step": 9180 + }, + { + "epoch": 0.9684599156118143, + "grad_norm": 0.570617139339447, + "learning_rate": 3.7535503711080276e-06, + "loss": 1.504, + "step": 9181 + }, + { + "epoch": 0.9685654008438819, + "grad_norm": 0.5623070001602173, + "learning_rate": 3.7285057851543515e-06, + "loss": 1.4852, + "step": 9182 + }, + { + "epoch": 0.9686708860759494, + "grad_norm": 0.5435299873352051, + "learning_rate": 3.703544822642846e-06, + "loss": 1.4165, + "step": 9183 + }, + { + "epoch": 0.9687763713080169, + "grad_norm": 0.5558583736419678, + "learning_rate": 3.6786674863704406e-06, + "loss": 1.5088, + "step": 9184 + }, + { + "epoch": 0.9688818565400844, + "grad_norm": 0.5334396362304688, + "learning_rate": 3.6538737791249053e-06, + "loss": 1.5101, + "step": 9185 + }, + { + "epoch": 0.9689873417721518, + "grad_norm": 0.5189523696899414, + "learning_rate": 3.629163703684352e-06, + "loss": 1.4817, + "step": 9186 + }, + { + "epoch": 0.9690928270042194, + "grad_norm": 0.542043924331665, + "learning_rate": 3.604537262817814e-06, + "loss": 1.5113, + "step": 9187 + }, + { + "epoch": 0.9691983122362869, + "grad_norm": 0.5267513394355774, + "learning_rate": 3.579994459284752e-06, + "loss": 1.4948, + "step": 9188 + }, + { + "epoch": 0.9693037974683544, + "grad_norm": 0.5217053890228271, + "learning_rate": 3.555535295835216e-06, + "loss": 1.5185, + "step": 9189 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.5541068315505981, + "learning_rate": 3.5311597752100964e-06, + "loss": 1.4685, + "step": 9190 + }, + { + "epoch": 0.9695147679324895, + "grad_norm": 0.5890070199966431, + "learning_rate": 3.506867900140792e-06, + "loss": 1.5325, + "step": 9191 + }, + { + "epoch": 0.9696202531645569, + "grad_norm": 0.5081506371498108, + "learning_rate": 3.4826596733492087e-06, + "loss": 1.4861, + "step": 9192 + }, + { + "epoch": 0.9697257383966245, + "grad_norm": 0.546771228313446, + "learning_rate": 3.4585350975481766e-06, + "loss": 1.465, + "step": 9193 + }, + { + "epoch": 0.969831223628692, + "grad_norm": 0.5944409370422363, + "learning_rate": 3.4344941754408663e-06, + "loss": 1.503, + "step": 9194 + }, + { + "epoch": 0.9699367088607594, + "grad_norm": 0.6008039116859436, + "learning_rate": 3.4105369097211238e-06, + "loss": 1.4813, + "step": 9195 + }, + { + "epoch": 0.970042194092827, + "grad_norm": 0.5492914319038391, + "learning_rate": 3.386663303073634e-06, + "loss": 1.4977, + "step": 9196 + }, + { + "epoch": 0.9701476793248945, + "grad_norm": 0.5250178575515747, + "learning_rate": 3.362873358173424e-06, + "loss": 1.5001, + "step": 9197 + }, + { + "epoch": 0.970253164556962, + "grad_norm": 0.5429612398147583, + "learning_rate": 3.339167077686278e-06, + "loss": 1.4584, + "step": 9198 + }, + { + "epoch": 0.9703586497890295, + "grad_norm": 0.5367332100868225, + "learning_rate": 3.3155444642687384e-06, + "loss": 1.53, + "step": 9199 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.5553164482116699, + "learning_rate": 3.2920055205676867e-06, + "loss": 1.4612, + "step": 9200 + }, + { + "epoch": 0.9705696202531645, + "grad_norm": 0.5510815382003784, + "learning_rate": 3.2685502492208475e-06, + "loss": 1.5046, + "step": 9201 + }, + { + "epoch": 0.9706751054852321, + "grad_norm": 0.5430046319961548, + "learning_rate": 3.245178652856534e-06, + "loss": 1.5151, + "step": 9202 + }, + { + "epoch": 0.9707805907172996, + "grad_norm": 0.5425148606300354, + "learning_rate": 3.221890734093569e-06, + "loss": 1.4673, + "step": 9203 + }, + { + "epoch": 0.970886075949367, + "grad_norm": 0.5436000823974609, + "learning_rate": 3.198686495541531e-06, + "loss": 1.5152, + "step": 9204 + }, + { + "epoch": 0.9709915611814346, + "grad_norm": 0.5319321751594543, + "learning_rate": 3.1755659398005066e-06, + "loss": 1.5095, + "step": 9205 + }, + { + "epoch": 0.9710970464135021, + "grad_norm": 0.5666003823280334, + "learning_rate": 3.152529069461424e-06, + "loss": 1.4682, + "step": 9206 + }, + { + "epoch": 0.9712025316455696, + "grad_norm": 0.592090904712677, + "learning_rate": 3.129575887105468e-06, + "loss": 1.512, + "step": 9207 + }, + { + "epoch": 0.9713080168776371, + "grad_norm": 0.5209305882453918, + "learning_rate": 3.1067063953048313e-06, + "loss": 1.4887, + "step": 9208 + }, + { + "epoch": 0.9714135021097047, + "grad_norm": 0.5218835473060608, + "learning_rate": 3.0839205966220474e-06, + "loss": 1.4783, + "step": 9209 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.5263001918792725, + "learning_rate": 3.06121849361049e-06, + "loss": 1.4806, + "step": 9210 + }, + { + "epoch": 0.9716244725738397, + "grad_norm": 0.5230184197425842, + "learning_rate": 3.0386000888139588e-06, + "loss": 1.4983, + "step": 9211 + }, + { + "epoch": 0.9717299578059072, + "grad_norm": 0.5393623113632202, + "learning_rate": 3.0160653847669252e-06, + "loss": 1.525, + "step": 9212 + }, + { + "epoch": 0.9718354430379746, + "grad_norm": 0.5478364825248718, + "learning_rate": 2.9936143839946193e-06, + "loss": 1.5271, + "step": 9213 + }, + { + "epoch": 0.9719409282700422, + "grad_norm": 0.5146527290344238, + "learning_rate": 2.9712470890126962e-06, + "loss": 1.4779, + "step": 9214 + }, + { + "epoch": 0.9720464135021097, + "grad_norm": 0.5486544370651245, + "learning_rate": 2.9489635023275676e-06, + "loss": 1.493, + "step": 9215 + }, + { + "epoch": 0.9721518987341772, + "grad_norm": 0.5518807172775269, + "learning_rate": 2.9267636264361517e-06, + "loss": 1.465, + "step": 9216 + }, + { + "epoch": 0.9722573839662447, + "grad_norm": 0.5443707704544067, + "learning_rate": 2.90464746382621e-06, + "loss": 1.4922, + "step": 9217 + }, + { + "epoch": 0.9723628691983123, + "grad_norm": 0.5621280670166016, + "learning_rate": 2.8826150169758425e-06, + "loss": 1.4572, + "step": 9218 + }, + { + "epoch": 0.9724683544303797, + "grad_norm": 0.5296116471290588, + "learning_rate": 2.8606662883539082e-06, + "loss": 1.4991, + "step": 9219 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.5287841558456421, + "learning_rate": 2.838801280419856e-06, + "loss": 1.4914, + "step": 9220 + }, + { + "epoch": 0.9726793248945148, + "grad_norm": 0.5448576807975769, + "learning_rate": 2.817019995623893e-06, + "loss": 1.5123, + "step": 9221 + }, + { + "epoch": 0.9727848101265822, + "grad_norm": 0.5283353328704834, + "learning_rate": 2.7953224364065667e-06, + "loss": 1.4612, + "step": 9222 + }, + { + "epoch": 0.9728902953586498, + "grad_norm": 0.5162976980209351, + "learning_rate": 2.7737086051992653e-06, + "loss": 1.4938, + "step": 9223 + }, + { + "epoch": 0.9729957805907173, + "grad_norm": 0.6355493664741516, + "learning_rate": 2.752178504423969e-06, + "loss": 1.4751, + "step": 9224 + }, + { + "epoch": 0.9731012658227848, + "grad_norm": 0.5464450120925903, + "learning_rate": 2.7307321364930804e-06, + "loss": 1.4781, + "step": 9225 + }, + { + "epoch": 0.9732067510548523, + "grad_norm": 0.5390134453773499, + "learning_rate": 2.7093695038099277e-06, + "loss": 1.4959, + "step": 9226 + }, + { + "epoch": 0.9733122362869199, + "grad_norm": 0.542365312576294, + "learning_rate": 2.6880906087682622e-06, + "loss": 1.4563, + "step": 9227 + }, + { + "epoch": 0.9734177215189873, + "grad_norm": 0.5477645397186279, + "learning_rate": 2.66689545375251e-06, + "loss": 1.4922, + "step": 9228 + }, + { + "epoch": 0.9735232067510549, + "grad_norm": 0.5471468567848206, + "learning_rate": 2.6457840411376888e-06, + "loss": 1.4612, + "step": 9229 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.5201444625854492, + "learning_rate": 2.624756373289322e-06, + "loss": 1.506, + "step": 9230 + }, + { + "epoch": 0.9737341772151898, + "grad_norm": 0.5296967625617981, + "learning_rate": 2.603812452563775e-06, + "loss": 1.5079, + "step": 9231 + }, + { + "epoch": 0.9738396624472574, + "grad_norm": 0.5769899487495422, + "learning_rate": 2.5829522813079207e-06, + "loss": 1.4446, + "step": 9232 + }, + { + "epoch": 0.9739451476793249, + "grad_norm": 0.5275826454162598, + "learning_rate": 2.5621758618591394e-06, + "loss": 1.4689, + "step": 9233 + }, + { + "epoch": 0.9740506329113924, + "grad_norm": 0.5490172505378723, + "learning_rate": 2.541483196545735e-06, + "loss": 1.4967, + "step": 9234 + }, + { + "epoch": 0.9741561181434599, + "grad_norm": 0.5617236495018005, + "learning_rate": 2.52087428768627e-06, + "loss": 1.4904, + "step": 9235 + }, + { + "epoch": 0.9742616033755275, + "grad_norm": 0.5325865149497986, + "learning_rate": 2.5003491375900633e-06, + "loss": 1.4747, + "step": 9236 + }, + { + "epoch": 0.9743670886075949, + "grad_norm": 0.5475979447364807, + "learning_rate": 2.4799077485571087e-06, + "loss": 1.4776, + "step": 9237 + }, + { + "epoch": 0.9744725738396625, + "grad_norm": 0.5357040762901306, + "learning_rate": 2.4595501228779906e-06, + "loss": 1.4949, + "step": 9238 + }, + { + "epoch": 0.97457805907173, + "grad_norm": 0.516262948513031, + "learning_rate": 2.4392762628338838e-06, + "loss": 1.434, + "step": 9239 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.518149197101593, + "learning_rate": 2.419086170696472e-06, + "loss": 1.4818, + "step": 9240 + }, + { + "epoch": 0.974789029535865, + "grad_norm": 0.5030744075775146, + "learning_rate": 2.3989798487282776e-06, + "loss": 1.4543, + "step": 9241 + }, + { + "epoch": 0.9748945147679325, + "grad_norm": 0.5316911935806274, + "learning_rate": 2.3789572991822495e-06, + "loss": 1.5142, + "step": 9242 + }, + { + "epoch": 0.975, + "grad_norm": 0.526637077331543, + "learning_rate": 2.3590185243020092e-06, + "loss": 1.471, + "step": 9243 + }, + { + "epoch": 0.9751054852320675, + "grad_norm": 0.5635231137275696, + "learning_rate": 2.3391635263218526e-06, + "loss": 1.5069, + "step": 9244 + }, + { + "epoch": 0.9752109704641351, + "grad_norm": 0.5384933948516846, + "learning_rate": 2.3193923074665834e-06, + "loss": 1.4461, + "step": 9245 + }, + { + "epoch": 0.9753164556962025, + "grad_norm": 0.5502457618713379, + "learning_rate": 2.299704869951763e-06, + "loss": 1.4914, + "step": 9246 + }, + { + "epoch": 0.9754219409282701, + "grad_norm": 0.5210179686546326, + "learning_rate": 2.2801012159832933e-06, + "loss": 1.5219, + "step": 9247 + }, + { + "epoch": 0.9755274261603376, + "grad_norm": 0.5486876368522644, + "learning_rate": 2.2605813477579172e-06, + "loss": 1.4818, + "step": 9248 + }, + { + "epoch": 0.975632911392405, + "grad_norm": 0.5528859496116638, + "learning_rate": 2.2411452674630517e-06, + "loss": 1.5128, + "step": 9249 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.5138213634490967, + "learning_rate": 2.2217929772764545e-06, + "loss": 1.4844, + "step": 9250 + }, + { + "epoch": 0.97584388185654, + "grad_norm": 0.5551625490188599, + "learning_rate": 2.2025244793667242e-06, + "loss": 1.4753, + "step": 9251 + }, + { + "epoch": 0.9759493670886076, + "grad_norm": 0.5575900077819824, + "learning_rate": 2.1833397758929674e-06, + "loss": 1.4926, + "step": 9252 + }, + { + "epoch": 0.9760548523206751, + "grad_norm": 0.5354050397872925, + "learning_rate": 2.1642388690049643e-06, + "loss": 1.4969, + "step": 9253 + }, + { + "epoch": 0.9761603375527426, + "grad_norm": 0.5439233183860779, + "learning_rate": 2.1452217608430857e-06, + "loss": 1.4888, + "step": 9254 + }, + { + "epoch": 0.9762658227848101, + "grad_norm": 0.5385172367095947, + "learning_rate": 2.126288453538211e-06, + "loss": 1.4775, + "step": 9255 + }, + { + "epoch": 0.9763713080168777, + "grad_norm": 0.5209006071090698, + "learning_rate": 2.107438949211976e-06, + "loss": 1.4705, + "step": 9256 + }, + { + "epoch": 0.9764767932489451, + "grad_norm": 0.5603335499763489, + "learning_rate": 2.0886732499764416e-06, + "loss": 1.5245, + "step": 9257 + }, + { + "epoch": 0.9765822784810126, + "grad_norm": 0.5396624207496643, + "learning_rate": 2.069991357934592e-06, + "loss": 1.4767, + "step": 9258 + }, + { + "epoch": 0.9766877637130802, + "grad_norm": 0.5227704644203186, + "learning_rate": 2.0513932751796695e-06, + "loss": 1.4773, + "step": 9259 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.5346076488494873, + "learning_rate": 2.0328790037957568e-06, + "loss": 1.4727, + "step": 9260 + }, + { + "epoch": 0.9768987341772152, + "grad_norm": 0.5405548810958862, + "learning_rate": 2.0144485458574446e-06, + "loss": 1.4932, + "step": 9261 + }, + { + "epoch": 0.9770042194092827, + "grad_norm": 0.5719506740570068, + "learning_rate": 1.9961019034299976e-06, + "loss": 1.4917, + "step": 9262 + }, + { + "epoch": 0.9771097046413502, + "grad_norm": 0.5374000072479248, + "learning_rate": 1.977839078569188e-06, + "loss": 1.5035, + "step": 9263 + }, + { + "epoch": 0.9772151898734177, + "grad_norm": 0.5304287075996399, + "learning_rate": 1.959660073321545e-06, + "loss": 1.525, + "step": 9264 + }, + { + "epoch": 0.9773206751054853, + "grad_norm": 0.5401631593704224, + "learning_rate": 1.94156488972394e-06, + "loss": 1.4864, + "step": 9265 + }, + { + "epoch": 0.9774261603375527, + "grad_norm": 0.5329889059066772, + "learning_rate": 1.9235535298042506e-06, + "loss": 1.5097, + "step": 9266 + }, + { + "epoch": 0.9775316455696202, + "grad_norm": 0.521740198135376, + "learning_rate": 1.905625995580612e-06, + "loss": 1.4887, + "step": 9267 + }, + { + "epoch": 0.9776371308016878, + "grad_norm": 0.5383413434028625, + "learning_rate": 1.8877822890618346e-06, + "loss": 1.4884, + "step": 9268 + }, + { + "epoch": 0.9777426160337552, + "grad_norm": 0.535744845867157, + "learning_rate": 1.8700224122475683e-06, + "loss": 1.4994, + "step": 9269 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.5128731727600098, + "learning_rate": 1.8523463671278052e-06, + "loss": 1.4914, + "step": 9270 + }, + { + "epoch": 0.9779535864978903, + "grad_norm": 0.5335046648979187, + "learning_rate": 1.8347541556832104e-06, + "loss": 1.4611, + "step": 9271 + }, + { + "epoch": 0.9780590717299578, + "grad_norm": 0.5473113059997559, + "learning_rate": 1.8172457798850407e-06, + "loss": 1.489, + "step": 9272 + }, + { + "epoch": 0.9781645569620253, + "grad_norm": 0.5408937335014343, + "learning_rate": 1.7998212416953096e-06, + "loss": 1.4961, + "step": 9273 + }, + { + "epoch": 0.9782700421940929, + "grad_norm": 0.5372254252433777, + "learning_rate": 1.782480543066456e-06, + "loss": 1.5102, + "step": 9274 + }, + { + "epoch": 0.9783755274261603, + "grad_norm": 0.5181279182434082, + "learning_rate": 1.7652236859416748e-06, + "loss": 1.4962, + "step": 9275 + }, + { + "epoch": 0.9784810126582278, + "grad_norm": 0.5591068863868713, + "learning_rate": 1.7480506722545864e-06, + "loss": 1.4676, + "step": 9276 + }, + { + "epoch": 0.9785864978902954, + "grad_norm": 0.5247682332992554, + "learning_rate": 1.7309615039294847e-06, + "loss": 1.4679, + "step": 9277 + }, + { + "epoch": 0.9786919831223628, + "grad_norm": 0.5571992993354797, + "learning_rate": 1.7139561828813377e-06, + "loss": 1.5144, + "step": 9278 + }, + { + "epoch": 0.9787974683544304, + "grad_norm": 0.5330104827880859, + "learning_rate": 1.6970347110157879e-06, + "loss": 1.4887, + "step": 9279 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.5188180208206177, + "learning_rate": 1.6801970902288188e-06, + "loss": 1.4467, + "step": 9280 + }, + { + "epoch": 0.9790084388185654, + "grad_norm": 0.5223090052604675, + "learning_rate": 1.6634433224072543e-06, + "loss": 1.4834, + "step": 9281 + }, + { + "epoch": 0.9791139240506329, + "grad_norm": 0.5183203816413879, + "learning_rate": 1.6467734094283427e-06, + "loss": 1.481, + "step": 9282 + }, + { + "epoch": 0.9792194092827005, + "grad_norm": 0.5401901006698608, + "learning_rate": 1.630187353160173e-06, + "loss": 1.4885, + "step": 9283 + }, + { + "epoch": 0.9793248945147679, + "grad_norm": 0.5237101316452026, + "learning_rate": 1.6136851554611753e-06, + "loss": 1.5024, + "step": 9284 + }, + { + "epoch": 0.9794303797468354, + "grad_norm": 0.5534086227416992, + "learning_rate": 1.5972668181805373e-06, + "loss": 1.5198, + "step": 9285 + }, + { + "epoch": 0.979535864978903, + "grad_norm": 0.5491413474082947, + "learning_rate": 1.580932343158037e-06, + "loss": 1.4787, + "step": 9286 + }, + { + "epoch": 0.9796413502109704, + "grad_norm": 0.5122298002243042, + "learning_rate": 1.5646817322240436e-06, + "loss": 1.5012, + "step": 9287 + }, + { + "epoch": 0.979746835443038, + "grad_norm": 0.5337023735046387, + "learning_rate": 1.5485149871995175e-06, + "loss": 1.4942, + "step": 9288 + }, + { + "epoch": 0.9798523206751055, + "grad_norm": 0.5479239225387573, + "learning_rate": 1.532432109895926e-06, + "loss": 1.4844, + "step": 9289 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.5313217639923096, + "learning_rate": 1.5164331021155774e-06, + "loss": 1.5067, + "step": 9290 + }, + { + "epoch": 0.9800632911392405, + "grad_norm": 0.5455507040023804, + "learning_rate": 1.5005179656511213e-06, + "loss": 1.4625, + "step": 9291 + }, + { + "epoch": 0.9801687763713081, + "grad_norm": 0.5224635601043701, + "learning_rate": 1.4846867022860477e-06, + "loss": 1.4652, + "step": 9292 + }, + { + "epoch": 0.9802742616033755, + "grad_norm": 0.5294021964073181, + "learning_rate": 1.4689393137941876e-06, + "loss": 1.5094, + "step": 9293 + }, + { + "epoch": 0.980379746835443, + "grad_norm": 0.5280531644821167, + "learning_rate": 1.4532758019402958e-06, + "loss": 1.5094, + "step": 9294 + }, + { + "epoch": 0.9804852320675106, + "grad_norm": 0.5385118722915649, + "learning_rate": 1.4376961684793854e-06, + "loss": 1.4936, + "step": 9295 + }, + { + "epoch": 0.980590717299578, + "grad_norm": 0.5297698378562927, + "learning_rate": 1.4222004151572265e-06, + "loss": 1.4706, + "step": 9296 + }, + { + "epoch": 0.9806962025316456, + "grad_norm": 0.5216357111930847, + "learning_rate": 1.4067885437103467e-06, + "loss": 1.4859, + "step": 9297 + }, + { + "epoch": 0.9808016877637131, + "grad_norm": 0.528515636920929, + "learning_rate": 1.3914605558656146e-06, + "loss": 1.5106, + "step": 9298 + }, + { + "epoch": 0.9809071729957806, + "grad_norm": 0.5595098733901978, + "learning_rate": 1.376216453340573e-06, + "loss": 1.4874, + "step": 9299 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.5440078973770142, + "learning_rate": 1.3610562378435221e-06, + "loss": 1.5031, + "step": 9300 + }, + { + "epoch": 0.9811181434599157, + "grad_norm": 0.5280998349189758, + "learning_rate": 1.345979911073103e-06, + "loss": 1.4767, + "step": 9301 + }, + { + "epoch": 0.9812236286919831, + "grad_norm": 0.5425869226455688, + "learning_rate": 1.3309874747187978e-06, + "loss": 1.482, + "step": 9302 + }, + { + "epoch": 0.9813291139240506, + "grad_norm": 0.5480627417564392, + "learning_rate": 1.3160789304605958e-06, + "loss": 1.5115, + "step": 9303 + }, + { + "epoch": 0.9814345991561182, + "grad_norm": 0.5320067405700684, + "learning_rate": 1.3012542799689108e-06, + "loss": 1.4744, + "step": 9304 + }, + { + "epoch": 0.9815400843881856, + "grad_norm": 0.5333817005157471, + "learning_rate": 1.286513524905164e-06, + "loss": 1.5046, + "step": 9305 + }, + { + "epoch": 0.9816455696202532, + "grad_norm": 0.5230602622032166, + "learning_rate": 1.2718566669208675e-06, + "loss": 1.5049, + "step": 9306 + }, + { + "epoch": 0.9817510548523207, + "grad_norm": 0.5313012599945068, + "learning_rate": 1.2572837076586241e-06, + "loss": 1.5221, + "step": 9307 + }, + { + "epoch": 0.9818565400843882, + "grad_norm": 0.5348551273345947, + "learning_rate": 1.2427946487512941e-06, + "loss": 1.499, + "step": 9308 + }, + { + "epoch": 0.9819620253164557, + "grad_norm": 0.5560609102249146, + "learning_rate": 1.2283894918224125e-06, + "loss": 1.4446, + "step": 9309 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.537088930606842, + "learning_rate": 1.2140682384862712e-06, + "loss": 1.5019, + "step": 9310 + }, + { + "epoch": 0.9821729957805907, + "grad_norm": 0.5317324995994568, + "learning_rate": 1.199830890347503e-06, + "loss": 1.4791, + "step": 9311 + }, + { + "epoch": 0.9822784810126582, + "grad_norm": 0.5239995121955872, + "learning_rate": 1.185677449001582e-06, + "loss": 1.4888, + "step": 9312 + }, + { + "epoch": 0.9823839662447258, + "grad_norm": 0.5207394361495972, + "learning_rate": 1.1716079160344061e-06, + "loss": 1.483, + "step": 9313 + }, + { + "epoch": 0.9824894514767932, + "grad_norm": 0.5311497449874878, + "learning_rate": 1.1576222930225478e-06, + "loss": 1.5235, + "step": 9314 + }, + { + "epoch": 0.9825949367088608, + "grad_norm": 0.5408836603164673, + "learning_rate": 1.143720581533253e-06, + "loss": 1.4633, + "step": 9315 + }, + { + "epoch": 0.9827004219409282, + "grad_norm": 0.5284598469734192, + "learning_rate": 1.1299027831241094e-06, + "loss": 1.5165, + "step": 9316 + }, + { + "epoch": 0.9828059071729958, + "grad_norm": 0.5116883516311646, + "learning_rate": 1.1161688993435449e-06, + "loss": 1.4689, + "step": 9317 + }, + { + "epoch": 0.9829113924050633, + "grad_norm": 0.5903293490409851, + "learning_rate": 1.1025189317305784e-06, + "loss": 1.4693, + "step": 9318 + }, + { + "epoch": 0.9830168776371307, + "grad_norm": 0.5684235095977783, + "learning_rate": 1.0889528818147366e-06, + "loss": 1.4832, + "step": 9319 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.5495097041130066, + "learning_rate": 1.0754707511161365e-06, + "loss": 1.4733, + "step": 9320 + }, + { + "epoch": 0.9832278481012658, + "grad_norm": 0.5228985548019409, + "learning_rate": 1.0620725411454868e-06, + "loss": 1.508, + "step": 9321 + }, + { + "epoch": 0.9833333333333333, + "grad_norm": 0.5509506464004517, + "learning_rate": 1.0487582534040863e-06, + "loss": 1.5081, + "step": 9322 + }, + { + "epoch": 0.9834388185654008, + "grad_norm": 0.5842440724372864, + "learning_rate": 1.0355278893839915e-06, + "loss": 1.552, + "step": 9323 + }, + { + "epoch": 0.9835443037974684, + "grad_norm": 0.5443979501724243, + "learning_rate": 1.0223814505676832e-06, + "loss": 1.5026, + "step": 9324 + }, + { + "epoch": 0.9836497890295358, + "grad_norm": 0.5188543796539307, + "learning_rate": 1.009318938428233e-06, + "loss": 1.4809, + "step": 9325 + }, + { + "epoch": 0.9837552742616034, + "grad_norm": 0.5214053988456726, + "learning_rate": 9.963403544294702e-07, + "loss": 1.508, + "step": 9326 + }, + { + "epoch": 0.9838607594936709, + "grad_norm": 0.5377270579338074, + "learning_rate": 9.834457000255647e-07, + "loss": 1.511, + "step": 9327 + }, + { + "epoch": 0.9839662447257383, + "grad_norm": 0.5352790951728821, + "learning_rate": 9.706349766615275e-07, + "loss": 1.4556, + "step": 9328 + }, + { + "epoch": 0.9840717299578059, + "grad_norm": 0.5927813053131104, + "learning_rate": 9.579081857728766e-07, + "loss": 1.475, + "step": 9329 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.5259334444999695, + "learning_rate": 9.452653287856383e-07, + "loss": 1.4629, + "step": 9330 + }, + { + "epoch": 0.9842827004219409, + "grad_norm": 0.5255692601203918, + "learning_rate": 9.327064071165126e-07, + "loss": 1.513, + "step": 9331 + }, + { + "epoch": 0.9843881856540084, + "grad_norm": 0.513903796672821, + "learning_rate": 9.202314221728735e-07, + "loss": 1.4763, + "step": 9332 + }, + { + "epoch": 0.984493670886076, + "grad_norm": 0.5193980932235718, + "learning_rate": 9.078403753525199e-07, + "loss": 1.5067, + "step": 9333 + }, + { + "epoch": 0.9845991561181434, + "grad_norm": 0.5327640175819397, + "learning_rate": 8.955332680440076e-07, + "loss": 1.5081, + "step": 9334 + }, + { + "epoch": 0.984704641350211, + "grad_norm": 0.5297581553459167, + "learning_rate": 8.833101016263168e-07, + "loss": 1.4619, + "step": 9335 + }, + { + "epoch": 0.9848101265822785, + "grad_norm": 0.5330150127410889, + "learning_rate": 8.711708774691851e-07, + "loss": 1.4878, + "step": 9336 + }, + { + "epoch": 0.984915611814346, + "grad_norm": 0.5156834125518799, + "learning_rate": 8.591155969327746e-07, + "loss": 1.4979, + "step": 9337 + }, + { + "epoch": 0.9850210970464135, + "grad_norm": 0.5251047015190125, + "learning_rate": 8.47144261368088e-07, + "loss": 1.4851, + "step": 9338 + }, + { + "epoch": 0.985126582278481, + "grad_norm": 0.5213818550109863, + "learning_rate": 8.352568721165521e-07, + "loss": 1.4941, + "step": 9339 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.5145054459571838, + "learning_rate": 8.234534305101015e-07, + "loss": 1.5174, + "step": 9340 + }, + { + "epoch": 0.985337552742616, + "grad_norm": 0.5334819555282593, + "learning_rate": 8.117339378714283e-07, + "loss": 1.5128, + "step": 9341 + }, + { + "epoch": 0.9854430379746836, + "grad_norm": 0.5330250263214111, + "learning_rate": 8.00098395513732e-07, + "loss": 1.5157, + "step": 9342 + }, + { + "epoch": 0.985548523206751, + "grad_norm": 0.5333518981933594, + "learning_rate": 7.885468047408862e-07, + "loss": 1.5361, + "step": 9343 + }, + { + "epoch": 0.9856540084388186, + "grad_norm": 0.5204362869262695, + "learning_rate": 7.770791668472721e-07, + "loss": 1.4873, + "step": 9344 + }, + { + "epoch": 0.9857594936708861, + "grad_norm": 0.5270372033119202, + "learning_rate": 7.656954831178619e-07, + "loss": 1.5015, + "step": 9345 + }, + { + "epoch": 0.9858649789029535, + "grad_norm": 0.5211661458015442, + "learning_rate": 7.543957548283021e-07, + "loss": 1.5094, + "step": 9346 + }, + { + "epoch": 0.9859704641350211, + "grad_norm": 0.53940349817276, + "learning_rate": 7.431799832448294e-07, + "loss": 1.5129, + "step": 9347 + }, + { + "epoch": 0.9860759493670886, + "grad_norm": 0.5373362898826599, + "learning_rate": 7.320481696241887e-07, + "loss": 1.533, + "step": 9348 + }, + { + "epoch": 0.9861814345991561, + "grad_norm": 0.5370702743530273, + "learning_rate": 7.210003152136324e-07, + "loss": 1.5021, + "step": 9349 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.5104297995567322, + "learning_rate": 7.100364212513367e-07, + "loss": 1.4816, + "step": 9350 + }, + { + "epoch": 0.9863924050632912, + "grad_norm": 0.5312998294830322, + "learning_rate": 6.991564889656521e-07, + "loss": 1.5332, + "step": 9351 + }, + { + "epoch": 0.9864978902953586, + "grad_norm": 0.5414518713951111, + "learning_rate": 6.883605195759369e-07, + "loss": 1.5014, + "step": 9352 + }, + { + "epoch": 0.9866033755274262, + "grad_norm": 0.5369912385940552, + "learning_rate": 6.776485142918065e-07, + "loss": 1.5226, + "step": 9353 + }, + { + "epoch": 0.9867088607594937, + "grad_norm": 0.5172037482261658, + "learning_rate": 6.67020474313551e-07, + "loss": 1.4791, + "step": 9354 + }, + { + "epoch": 0.9868143459915611, + "grad_norm": 0.517571747303009, + "learning_rate": 6.564764008322177e-07, + "loss": 1.5078, + "step": 9355 + }, + { + "epoch": 0.9869198312236287, + "grad_norm": 0.5324224233627319, + "learning_rate": 6.460162950292781e-07, + "loss": 1.4946, + "step": 9356 + }, + { + "epoch": 0.9870253164556962, + "grad_norm": 0.5640791058540344, + "learning_rate": 6.356401580767945e-07, + "loss": 1.4796, + "step": 9357 + }, + { + "epoch": 0.9871308016877637, + "grad_norm": 0.5334258675575256, + "learning_rate": 6.253479911375037e-07, + "loss": 1.4713, + "step": 9358 + }, + { + "epoch": 0.9872362869198312, + "grad_norm": 0.5489567518234253, + "learning_rate": 6.151397953647331e-07, + "loss": 1.523, + "step": 9359 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.5320504307746887, + "learning_rate": 6.050155719023176e-07, + "loss": 1.4984, + "step": 9360 + }, + { + "epoch": 0.9874472573839662, + "grad_norm": 0.5140419602394104, + "learning_rate": 5.949753218846832e-07, + "loss": 1.498, + "step": 9361 + }, + { + "epoch": 0.9875527426160338, + "grad_norm": 0.5816311240196228, + "learning_rate": 5.850190464369298e-07, + "loss": 1.5112, + "step": 9362 + }, + { + "epoch": 0.9876582278481013, + "grad_norm": 0.5358282327651978, + "learning_rate": 5.751467466747484e-07, + "loss": 1.4946, + "step": 9363 + }, + { + "epoch": 0.9877637130801687, + "grad_norm": 0.5562007427215576, + "learning_rate": 5.653584237043374e-07, + "loss": 1.4985, + "step": 9364 + }, + { + "epoch": 0.9878691983122363, + "grad_norm": 0.5559691786766052, + "learning_rate": 5.556540786224862e-07, + "loss": 1.4944, + "step": 9365 + }, + { + "epoch": 0.9879746835443038, + "grad_norm": 0.5285075306892395, + "learning_rate": 5.460337125167414e-07, + "loss": 1.5131, + "step": 9366 + }, + { + "epoch": 0.9880801687763713, + "grad_norm": 0.5134132504463196, + "learning_rate": 5.364973264649908e-07, + "loss": 1.4647, + "step": 9367 + }, + { + "epoch": 0.9881856540084388, + "grad_norm": 0.5450810194015503, + "learning_rate": 5.270449215358797e-07, + "loss": 1.5174, + "step": 9368 + }, + { + "epoch": 0.9882911392405064, + "grad_norm": 0.5486460328102112, + "learning_rate": 5.176764987885607e-07, + "loss": 1.478, + "step": 9369 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.5348783731460571, + "learning_rate": 5.08392059272944e-07, + "loss": 1.5275, + "step": 9370 + }, + { + "epoch": 0.9885021097046414, + "grad_norm": 0.5118326544761658, + "learning_rate": 4.991916040291977e-07, + "loss": 1.4739, + "step": 9371 + }, + { + "epoch": 0.9886075949367089, + "grad_norm": 0.5091007351875305, + "learning_rate": 4.900751340884135e-07, + "loss": 1.4786, + "step": 9372 + }, + { + "epoch": 0.9887130801687763, + "grad_norm": 0.5256640911102295, + "learning_rate": 4.810426504721077e-07, + "loss": 1.4907, + "step": 9373 + }, + { + "epoch": 0.9888185654008439, + "grad_norm": 0.5351490378379822, + "learning_rate": 4.720941541923873e-07, + "loss": 1.4379, + "step": 9374 + }, + { + "epoch": 0.9889240506329114, + "grad_norm": 0.5115474462509155, + "learning_rate": 4.632296462520336e-07, + "loss": 1.5134, + "step": 9375 + }, + { + "epoch": 0.9890295358649789, + "grad_norm": 0.5158138275146484, + "learning_rate": 4.544491276443352e-07, + "loss": 1.4544, + "step": 9376 + }, + { + "epoch": 0.9891350210970464, + "grad_norm": 0.5608594417572021, + "learning_rate": 4.457525993531719e-07, + "loss": 1.4579, + "step": 9377 + }, + { + "epoch": 0.989240506329114, + "grad_norm": 0.5460013747215271, + "learning_rate": 4.371400623530142e-07, + "loss": 1.5265, + "step": 9378 + }, + { + "epoch": 0.9893459915611814, + "grad_norm": 0.5252239108085632, + "learning_rate": 4.2861151760900665e-07, + "loss": 1.5128, + "step": 9379 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.5649232268333435, + "learning_rate": 4.2016696607680147e-07, + "loss": 1.4911, + "step": 9380 + }, + { + "epoch": 0.9895569620253165, + "grad_norm": 0.5311670303344727, + "learning_rate": 4.118064087025586e-07, + "loss": 1.4539, + "step": 9381 + }, + { + "epoch": 0.989662447257384, + "grad_norm": 0.5595170855522156, + "learning_rate": 4.035298464232784e-07, + "loss": 1.5, + "step": 9382 + }, + { + "epoch": 0.9897679324894515, + "grad_norm": 0.5200569033622742, + "learning_rate": 3.953372801662192e-07, + "loss": 1.4998, + "step": 9383 + }, + { + "epoch": 0.9898734177215189, + "grad_norm": 0.5359269380569458, + "learning_rate": 3.8722871084956313e-07, + "loss": 1.4846, + "step": 9384 + }, + { + "epoch": 0.9899789029535865, + "grad_norm": 0.5223734378814697, + "learning_rate": 3.7920413938175027e-07, + "loss": 1.481, + "step": 9385 + }, + { + "epoch": 0.990084388185654, + "grad_norm": 0.5509656667709351, + "learning_rate": 3.7126356666214447e-07, + "loss": 1.491, + "step": 9386 + }, + { + "epoch": 0.9901898734177215, + "grad_norm": 0.5025913715362549, + "learning_rate": 3.6340699358036743e-07, + "loss": 1.5167, + "step": 9387 + }, + { + "epoch": 0.990295358649789, + "grad_norm": 0.5341947078704834, + "learning_rate": 3.5563442101696486e-07, + "loss": 1.473, + "step": 9388 + }, + { + "epoch": 0.9904008438818566, + "grad_norm": 0.6441130042076111, + "learning_rate": 3.479458498426569e-07, + "loss": 1.4984, + "step": 9389 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.5501677393913269, + "learning_rate": 3.4034128091917085e-07, + "loss": 1.4963, + "step": 9390 + }, + { + "epoch": 0.9906118143459915, + "grad_norm": 0.525855302810669, + "learning_rate": 3.328207150986584e-07, + "loss": 1.4934, + "step": 9391 + }, + { + "epoch": 0.9907172995780591, + "grad_norm": 0.5583406686782837, + "learning_rate": 3.2538415322369563e-07, + "loss": 1.4969, + "step": 9392 + }, + { + "epoch": 0.9908227848101265, + "grad_norm": 0.5308588743209839, + "learning_rate": 3.180315961276159e-07, + "loss": 1.4905, + "step": 9393 + }, + { + "epoch": 0.9909282700421941, + "grad_norm": 0.5393091440200806, + "learning_rate": 3.107630446344267e-07, + "loss": 1.4794, + "step": 9394 + }, + { + "epoch": 0.9910337552742616, + "grad_norm": 0.5430125594139099, + "learning_rate": 3.035784995584767e-07, + "loss": 1.5436, + "step": 9395 + }, + { + "epoch": 0.9911392405063291, + "grad_norm": 0.5244787335395813, + "learning_rate": 2.964779617049551e-07, + "loss": 1.4732, + "step": 9396 + }, + { + "epoch": 0.9912447257383966, + "grad_norm": 0.5336843729019165, + "learning_rate": 2.8946143186930896e-07, + "loss": 1.4953, + "step": 9397 + }, + { + "epoch": 0.9913502109704642, + "grad_norm": 0.5540372133255005, + "learning_rate": 2.825289108379925e-07, + "loss": 1.5, + "step": 9398 + }, + { + "epoch": 0.9914556962025316, + "grad_norm": 0.5372031927108765, + "learning_rate": 2.756803993877177e-07, + "loss": 1.5173, + "step": 9399 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.5142315626144409, + "learning_rate": 2.689158982859541e-07, + "loss": 1.4999, + "step": 9400 + }, + { + "epoch": 0.9916666666666667, + "grad_norm": 0.5152605175971985, + "learning_rate": 2.622354082905953e-07, + "loss": 1.4512, + "step": 9401 + }, + { + "epoch": 0.9917721518987341, + "grad_norm": 0.5497332215309143, + "learning_rate": 2.556389301502926e-07, + "loss": 1.4945, + "step": 9402 + }, + { + "epoch": 0.9918776371308017, + "grad_norm": 0.5776994228363037, + "learning_rate": 2.491264646042879e-07, + "loss": 1.4474, + "step": 9403 + }, + { + "epoch": 0.9919831223628692, + "grad_norm": 0.5705167055130005, + "learning_rate": 2.426980123821643e-07, + "loss": 1.4815, + "step": 9404 + }, + { + "epoch": 0.9920886075949367, + "grad_norm": 0.5461461544036865, + "learning_rate": 2.3635357420442872e-07, + "loss": 1.4691, + "step": 9405 + }, + { + "epoch": 0.9921940928270042, + "grad_norm": 0.531199038028717, + "learning_rate": 2.3009315078192926e-07, + "loss": 1.4769, + "step": 9406 + }, + { + "epoch": 0.9922995780590718, + "grad_norm": 0.5547901391983032, + "learning_rate": 2.2391674281610486e-07, + "loss": 1.4661, + "step": 9407 + }, + { + "epoch": 0.9924050632911392, + "grad_norm": 0.5373242497444153, + "learning_rate": 2.1782435099923503e-07, + "loss": 1.4967, + "step": 9408 + }, + { + "epoch": 0.9925105485232067, + "grad_norm": 0.5318999290466309, + "learning_rate": 2.1181597601385716e-07, + "loss": 1.4946, + "step": 9409 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.5667362809181213, + "learning_rate": 2.05891618533266e-07, + "loss": 1.483, + "step": 9410 + }, + { + "epoch": 0.9927215189873417, + "grad_norm": 0.5426313877105713, + "learning_rate": 2.0005127922134713e-07, + "loss": 1.5406, + "step": 9411 + }, + { + "epoch": 0.9928270042194093, + "grad_norm": 0.514711856842041, + "learning_rate": 1.942949587324938e-07, + "loss": 1.4433, + "step": 9412 + }, + { + "epoch": 0.9929324894514768, + "grad_norm": 0.5289005041122437, + "learning_rate": 1.8862265771177333e-07, + "loss": 1.4618, + "step": 9413 + }, + { + "epoch": 0.9930379746835443, + "grad_norm": 0.5351307392120361, + "learning_rate": 1.8303437679476065e-07, + "loss": 1.5228, + "step": 9414 + }, + { + "epoch": 0.9931434599156118, + "grad_norm": 0.5562357902526855, + "learning_rate": 1.775301166077048e-07, + "loss": 1.5174, + "step": 9415 + }, + { + "epoch": 0.9932489451476794, + "grad_norm": 0.529257595539093, + "learning_rate": 1.7210987776736243e-07, + "loss": 1.4861, + "step": 9416 + }, + { + "epoch": 0.9933544303797468, + "grad_norm": 0.5605564713478088, + "learning_rate": 1.6677366088099777e-07, + "loss": 1.525, + "step": 9417 + }, + { + "epoch": 0.9934599156118143, + "grad_norm": 0.5246610045433044, + "learning_rate": 1.6152146654671573e-07, + "loss": 1.4788, + "step": 9418 + }, + { + "epoch": 0.9935654008438819, + "grad_norm": 0.5210171341896057, + "learning_rate": 1.5635329535304554e-07, + "loss": 1.5202, + "step": 9419 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.5330246090888977, + "learning_rate": 1.5126914787894074e-07, + "loss": 1.4556, + "step": 9420 + }, + { + "epoch": 0.9937763713080169, + "grad_norm": 0.5348329544067383, + "learning_rate": 1.4626902469427882e-07, + "loss": 1.5311, + "step": 9421 + }, + { + "epoch": 0.9938818565400844, + "grad_norm": 0.5199273824691772, + "learning_rate": 1.4135292635927832e-07, + "loss": 1.518, + "step": 9422 + }, + { + "epoch": 0.9939873417721519, + "grad_norm": 0.5486904382705688, + "learning_rate": 1.365208534248319e-07, + "loss": 1.4766, + "step": 9423 + }, + { + "epoch": 0.9940928270042194, + "grad_norm": 0.541450023651123, + "learning_rate": 1.3177280643233979e-07, + "loss": 1.4693, + "step": 9424 + }, + { + "epoch": 0.994198312236287, + "grad_norm": 0.5069431662559509, + "learning_rate": 1.271087859138764e-07, + "loss": 1.4828, + "step": 9425 + }, + { + "epoch": 0.9943037974683544, + "grad_norm": 0.5438243746757507, + "learning_rate": 1.2252879239210702e-07, + "loss": 1.5283, + "step": 9426 + }, + { + "epoch": 0.994409282700422, + "grad_norm": 0.5580857396125793, + "learning_rate": 1.1803282638020441e-07, + "loss": 1.5132, + "step": 9427 + }, + { + "epoch": 0.9945147679324895, + "grad_norm": 0.5346469283103943, + "learning_rate": 1.1362088838193229e-07, + "loss": 1.4718, + "step": 9428 + }, + { + "epoch": 0.9946202531645569, + "grad_norm": 0.5496415495872498, + "learning_rate": 1.0929297889172852e-07, + "loss": 1.5162, + "step": 9429 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.5412315726280212, + "learning_rate": 1.0504909839462173e-07, + "loss": 1.4536, + "step": 9430 + }, + { + "epoch": 0.994831223628692, + "grad_norm": 0.5425090789794922, + "learning_rate": 1.008892473659817e-07, + "loss": 1.4805, + "step": 9431 + }, + { + "epoch": 0.9949367088607595, + "grad_norm": 0.5482635498046875, + "learning_rate": 9.68134262721021e-08, + "loss": 1.5139, + "step": 9432 + }, + { + "epoch": 0.995042194092827, + "grad_norm": 0.5141649842262268, + "learning_rate": 9.282163556953437e-08, + "loss": 1.4932, + "step": 9433 + }, + { + "epoch": 0.9951476793248946, + "grad_norm": 0.5310150980949402, + "learning_rate": 8.891387570575393e-08, + "loss": 1.4866, + "step": 9434 + }, + { + "epoch": 0.995253164556962, + "grad_norm": 0.5271302461624146, + "learning_rate": 8.509014711857721e-08, + "loss": 1.488, + "step": 9435 + }, + { + "epoch": 0.9953586497890295, + "grad_norm": 0.5581306219100952, + "learning_rate": 8.135045023641152e-08, + "loss": 1.4804, + "step": 9436 + }, + { + "epoch": 0.9954641350210971, + "grad_norm": 0.5221717953681946, + "learning_rate": 7.769478547842157e-08, + "loss": 1.4809, + "step": 9437 + }, + { + "epoch": 0.9955696202531645, + "grad_norm": 0.5213772058486938, + "learning_rate": 7.412315325411312e-08, + "loss": 1.487, + "step": 9438 + }, + { + "epoch": 0.9956751054852321, + "grad_norm": 0.5360990762710571, + "learning_rate": 7.063555396383259e-08, + "loss": 1.5345, + "step": 9439 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.5299261212348938, + "learning_rate": 6.723198799826746e-08, + "loss": 1.4829, + "step": 9440 + }, + { + "epoch": 0.9958860759493671, + "grad_norm": 0.5165615081787109, + "learning_rate": 6.391245573894588e-08, + "loss": 1.5061, + "step": 9441 + }, + { + "epoch": 0.9959915611814346, + "grad_norm": 0.5239521265029907, + "learning_rate": 6.067695755765379e-08, + "loss": 1.518, + "step": 9442 + }, + { + "epoch": 0.9960970464135022, + "grad_norm": 0.5628992915153503, + "learning_rate": 5.7525493817101035e-08, + "loss": 1.4725, + "step": 9443 + }, + { + "epoch": 0.9962025316455696, + "grad_norm": 0.5335294604301453, + "learning_rate": 5.4458064870338553e-08, + "loss": 1.4965, + "step": 9444 + }, + { + "epoch": 0.9963080168776371, + "grad_norm": 0.5271139144897461, + "learning_rate": 5.147467106117465e-08, + "loss": 1.495, + "step": 9445 + }, + { + "epoch": 0.9964135021097047, + "grad_norm": 0.5484854578971863, + "learning_rate": 4.85753127237587e-08, + "loss": 1.4912, + "step": 9446 + }, + { + "epoch": 0.9965189873417721, + "grad_norm": 0.5365214347839355, + "learning_rate": 4.575999018316401e-08, + "loss": 1.499, + "step": 9447 + }, + { + "epoch": 0.9966244725738397, + "grad_norm": 0.5401077270507812, + "learning_rate": 4.302870375472168e-08, + "loss": 1.5015, + "step": 9448 + }, + { + "epoch": 0.9967299578059071, + "grad_norm": 0.5364035367965698, + "learning_rate": 4.038145374460345e-08, + "loss": 1.5205, + "step": 9449 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.5229104161262512, + "learning_rate": 3.781824044932214e-08, + "loss": 1.5156, + "step": 9450 + }, + { + "epoch": 0.9969409282700422, + "grad_norm": 0.5215301513671875, + "learning_rate": 3.533906415614796e-08, + "loss": 1.4914, + "step": 9451 + }, + { + "epoch": 0.9970464135021097, + "grad_norm": 0.5540784597396851, + "learning_rate": 3.294392514285871e-08, + "loss": 1.5384, + "step": 9452 + }, + { + "epoch": 0.9971518987341772, + "grad_norm": 0.5122950077056885, + "learning_rate": 3.0632823677906316e-08, + "loss": 1.4762, + "step": 9453 + }, + { + "epoch": 0.9972573839662447, + "grad_norm": 0.5423650145530701, + "learning_rate": 2.8405760020250304e-08, + "loss": 1.4847, + "step": 9454 + }, + { + "epoch": 0.9973628691983122, + "grad_norm": 0.5395012497901917, + "learning_rate": 2.6262734419441047e-08, + "loss": 1.4732, + "step": 9455 + }, + { + "epoch": 0.9974683544303797, + "grad_norm": 0.5424842238426208, + "learning_rate": 2.420374711561979e-08, + "loss": 1.4766, + "step": 9456 + }, + { + "epoch": 0.9975738396624473, + "grad_norm": 0.5320226550102234, + "learning_rate": 2.2228798339435363e-08, + "loss": 1.4636, + "step": 9457 + }, + { + "epoch": 0.9976793248945147, + "grad_norm": 0.5834058523178101, + "learning_rate": 2.0337888312210727e-08, + "loss": 1.4783, + "step": 9458 + }, + { + "epoch": 0.9977848101265823, + "grad_norm": 0.5195724964141846, + "learning_rate": 1.8531017245942972e-08, + "loss": 1.4627, + "step": 9459 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.5303186774253845, + "learning_rate": 1.6808185342970238e-08, + "loss": 1.4966, + "step": 9460 + }, + { + "epoch": 0.9979957805907173, + "grad_norm": 0.5143783688545227, + "learning_rate": 1.516939279638807e-08, + "loss": 1.4866, + "step": 9461 + }, + { + "epoch": 0.9981012658227848, + "grad_norm": 0.5223006010055542, + "learning_rate": 1.3614639789882866e-08, + "loss": 1.4914, + "step": 9462 + }, + { + "epoch": 0.9982067510548523, + "grad_norm": 0.6459400057792664, + "learning_rate": 1.214392649756535e-08, + "loss": 1.512, + "step": 9463 + }, + { + "epoch": 0.9983122362869198, + "grad_norm": 0.5304635763168335, + "learning_rate": 1.075725308438691e-08, + "loss": 1.5089, + "step": 9464 + }, + { + "epoch": 0.9984177215189873, + "grad_norm": 0.5494211912155151, + "learning_rate": 9.454619705556722e-09, + "loss": 1.5333, + "step": 9465 + }, + { + "epoch": 0.9985232067510549, + "grad_norm": 0.5139241814613342, + "learning_rate": 8.236026507124628e-09, + "loss": 1.4751, + "step": 9466 + }, + { + "epoch": 0.9986286919831223, + "grad_norm": 0.527025580406189, + "learning_rate": 7.101473625648058e-09, + "loss": 1.4839, + "step": 9467 + }, + { + "epoch": 0.9987341772151899, + "grad_norm": 0.5234396457672119, + "learning_rate": 6.050961188358573e-09, + "loss": 1.4975, + "step": 9468 + }, + { + "epoch": 0.9988396624472574, + "grad_norm": 0.5368417501449585, + "learning_rate": 5.084489312745521e-09, + "loss": 1.4843, + "step": 9469 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.527698814868927, + "learning_rate": 4.202058107305451e-09, + "loss": 1.5052, + "step": 9470 + }, + { + "epoch": 0.9990506329113924, + "grad_norm": 0.5574713945388794, + "learning_rate": 3.403667670792698e-09, + "loss": 1.5075, + "step": 9471 + }, + { + "epoch": 0.99915611814346, + "grad_norm": 0.5438014268875122, + "learning_rate": 2.689318092718995e-09, + "loss": 1.4584, + "step": 9472 + }, + { + "epoch": 0.9992616033755274, + "grad_norm": 0.5383046269416809, + "learning_rate": 2.059009453103666e-09, + "loss": 1.4917, + "step": 9473 + }, + { + "epoch": 0.9993670886075949, + "grad_norm": 0.538865864276886, + "learning_rate": 1.5127418226401623e-09, + "loss": 1.5104, + "step": 9474 + }, + { + "epoch": 0.9994725738396625, + "grad_norm": 0.5247359275817871, + "learning_rate": 1.0505152625295278e-09, + "loss": 1.4807, + "step": 9475 + }, + { + "epoch": 0.9995780590717299, + "grad_norm": 0.5423430800437927, + "learning_rate": 6.723298245636666e-10, + "loss": 1.4806, + "step": 9476 + }, + { + "epoch": 0.9996835443037975, + "grad_norm": 0.5532984137535095, + "learning_rate": 3.781855510420762e-10, + "loss": 1.4835, + "step": 9477 + }, + { + "epoch": 0.999789029535865, + "grad_norm": 0.530912458896637, + "learning_rate": 1.6808247493838026e-10, + "loss": 1.5137, + "step": 9478 + }, + { + "epoch": 0.9998945147679325, + "grad_norm": 0.54227614402771, + "learning_rate": 4.202061990032924e-11, + "loss": 1.4957, + "step": 9479 + }, + { + "epoch": 1.0, + "grad_norm": 1.587295651435852, + "learning_rate": 0.0, + "loss": 1.4537, + "step": 9480 + } + ], + "logging_steps": 1, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.6871337555631488e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-phi-cosine/checkpoint-9480/training_args.bin b/saves-phi-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..aea1daade60fef0fc6a0e98554c76cf34f36995e --- /dev/null +++ b/saves-phi-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32c912ccbd7619c0c1709552aa340349ad22a192acd862676429ca308ea315f9 +size 5176 diff --git a/saves-phi-cosine/config.json b/saves-phi-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3fb5342bea6ae91c8f86ba3e8c07161371845c2a --- /dev/null +++ b/saves-phi-cosine/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "PhiForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "embd_pdrop": 0.0, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 2048, + "model_type": "phi", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "partial_rotary_factor": 0.5, + "qk_layernorm": false, + "resid_pdrop": 0.0, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-phi-cosine/generation_config.json b/saves-phi-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-phi-cosine/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-phi-cosine/model.safetensors b/saves-phi-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd161fcef38485525c51230a766c516bff83f1fc --- /dev/null +++ b/saves-phi-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:330f2020c2a3ca5bb655b8694d173dcc16f5e4f0d07a6f34eefb07fc00c8e2f8 +size 7848944 diff --git a/saves-phi-cosine/result.log b/saves-phi-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..136a0f213d11f6786a2344ca1f6fbd3e2ab22d41 --- /dev/null +++ b/saves-phi-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 1639.3526, 'train_samples_per_second': 5920.998, 'train_steps_per_second': 5.783, 'train_loss': 1.7418548818379012, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-phi-cosine/special_tokens_map.json b/saves-phi-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-phi-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-phi-cosine/tokenizer.json b/saves-phi-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-phi-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-phi-cosine/tokenizer_config.json b/saves-phi-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-phi-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-phi/checkpoint-9480/config.json b/saves-phi/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3fb5342bea6ae91c8f86ba3e8c07161371845c2a --- /dev/null +++ b/saves-phi/checkpoint-9480/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "PhiForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "embd_pdrop": 0.0, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 2048, + "model_type": "phi", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "partial_rotary_factor": 0.5, + "qk_layernorm": false, + "resid_pdrop": 0.0, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-phi/checkpoint-9480/generation_config.json b/saves-phi/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-phi/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-phi/checkpoint-9480/model.safetensors b/saves-phi/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69f04e8e7486a18258f458ce9367d8820406425a --- /dev/null +++ b/saves-phi/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf90c00665a3e7557aa7e8faf3dd2d1d9d3466763fbf635fbd74532d2000842b +size 7848944 diff --git a/saves-phi/checkpoint-9480/optimizer.pt b/saves-phi/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4c12550ad492fef5d49e8639001a80312f22fa3 --- /dev/null +++ b/saves-phi/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eec46af48e43c14d24683c74ffcec91702e9eee88e968823f7c681cb8d75d12b +size 15718462 diff --git a/saves-phi/checkpoint-9480/rng_state.pth b/saves-phi/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-phi/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-phi/checkpoint-9480/scheduler.pt b/saves-phi/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-phi/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-phi/checkpoint-9480/special_tokens_map.json b/saves-phi/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-phi/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-phi/checkpoint-9480/tokenizer.json b/saves-phi/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-phi/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-phi/checkpoint-9480/tokenizer_config.json b/saves-phi/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-phi/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-phi/checkpoint-9480/trainer_state.json b/saves-phi/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d97d03699840a3f106c79c8705a7d5568e9ff7ff --- /dev/null +++ b/saves-phi/checkpoint-9480/trainer_state.json @@ -0,0 +1,66393 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00010548523206751055, + "grad_norm": 2.011542320251465, + "learning_rate": 1.5822784810126583e-05, + "loss": 7.6591, + "step": 1 + }, + { + "epoch": 0.0002109704641350211, + "grad_norm": 1.9964383840560913, + "learning_rate": 3.1645569620253167e-05, + "loss": 7.6601, + "step": 2 + }, + { + "epoch": 0.00031645569620253165, + "grad_norm": 1.9953736066818237, + "learning_rate": 4.746835443037975e-05, + "loss": 7.6399, + "step": 3 + }, + { + "epoch": 0.0004219409282700422, + "grad_norm": 2.022376537322998, + "learning_rate": 6.329113924050633e-05, + "loss": 7.5979, + "step": 4 + }, + { + "epoch": 0.0005274261603375527, + "grad_norm": 2.0246565341949463, + "learning_rate": 7.911392405063291e-05, + "loss": 7.532, + "step": 5 + }, + { + "epoch": 0.0006329113924050633, + "grad_norm": 1.8483935594558716, + "learning_rate": 9.49367088607595e-05, + "loss": 7.459, + "step": 6 + }, + { + "epoch": 0.0007383966244725738, + "grad_norm": 1.706437110900879, + "learning_rate": 0.00011075949367088609, + "loss": 7.3705, + "step": 7 + }, + { + "epoch": 0.0008438818565400844, + "grad_norm": 1.507551908493042, + "learning_rate": 0.00012658227848101267, + "loss": 7.283, + "step": 8 + }, + { + "epoch": 0.0009493670886075949, + "grad_norm": 1.3828750848770142, + "learning_rate": 0.00014240506329113925, + "loss": 7.191, + "step": 9 + }, + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.251572847366333, + "learning_rate": 0.00015822784810126583, + "loss": 7.1113, + "step": 10 + }, + { + "epoch": 0.001160337552742616, + "grad_norm": 1.2011066675186157, + "learning_rate": 0.0001740506329113924, + "loss": 7.0247, + "step": 11 + }, + { + "epoch": 0.0012658227848101266, + "grad_norm": 1.1461619138717651, + "learning_rate": 0.000189873417721519, + "loss": 6.9572, + "step": 12 + }, + { + "epoch": 0.0013713080168776372, + "grad_norm": 1.1297284364700317, + "learning_rate": 0.00020569620253164557, + "loss": 6.8903, + "step": 13 + }, + { + "epoch": 0.0014767932489451476, + "grad_norm": 1.1294082403182983, + "learning_rate": 0.00022151898734177217, + "loss": 6.8264, + "step": 14 + }, + { + "epoch": 0.0015822784810126582, + "grad_norm": 1.120693325996399, + "learning_rate": 0.00023734177215189873, + "loss": 6.7678, + "step": 15 + }, + { + "epoch": 0.0016877637130801688, + "grad_norm": 1.1134696006774902, + "learning_rate": 0.00025316455696202533, + "loss": 6.7163, + "step": 16 + }, + { + "epoch": 0.0017932489451476794, + "grad_norm": 1.1267699003219604, + "learning_rate": 0.0002689873417721519, + "loss": 6.6375, + "step": 17 + }, + { + "epoch": 0.0018987341772151898, + "grad_norm": 1.1200734376907349, + "learning_rate": 0.0002848101265822785, + "loss": 6.5736, + "step": 18 + }, + { + "epoch": 0.0020042194092827004, + "grad_norm": 1.1191186904907227, + "learning_rate": 0.00030063291139240507, + "loss": 6.4996, + "step": 19 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.0853898525238037, + "learning_rate": 0.00031645569620253165, + "loss": 6.4426, + "step": 20 + }, + { + "epoch": 0.0022151898734177216, + "grad_norm": 1.0650807619094849, + "learning_rate": 0.00033227848101265823, + "loss": 6.3779, + "step": 21 + }, + { + "epoch": 0.002320675105485232, + "grad_norm": 1.0586391687393188, + "learning_rate": 0.0003481012658227848, + "loss": 6.311, + "step": 22 + }, + { + "epoch": 0.002426160337552743, + "grad_norm": 1.0491359233856201, + "learning_rate": 0.00036392405063291145, + "loss": 6.2385, + "step": 23 + }, + { + "epoch": 0.002531645569620253, + "grad_norm": 1.0341084003448486, + "learning_rate": 0.000379746835443038, + "loss": 6.1742, + "step": 24 + }, + { + "epoch": 0.0026371308016877636, + "grad_norm": 0.9996719360351562, + "learning_rate": 0.00039556962025316455, + "loss": 6.1213, + "step": 25 + }, + { + "epoch": 0.0027426160337552744, + "grad_norm": 0.9974717497825623, + "learning_rate": 0.00041139240506329113, + "loss": 6.048, + "step": 26 + }, + { + "epoch": 0.002848101265822785, + "grad_norm": 0.9489433765411377, + "learning_rate": 0.00042721518987341776, + "loss": 6.0148, + "step": 27 + }, + { + "epoch": 0.002953586497890295, + "grad_norm": 0.9366974830627441, + "learning_rate": 0.00044303797468354434, + "loss": 5.9313, + "step": 28 + }, + { + "epoch": 0.003059071729957806, + "grad_norm": 0.9141618013381958, + "learning_rate": 0.0004588607594936709, + "loss": 5.8789, + "step": 29 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8686885237693787, + "learning_rate": 0.00047468354430379745, + "loss": 5.8355, + "step": 30 + }, + { + "epoch": 0.003270042194092827, + "grad_norm": 0.8527199029922485, + "learning_rate": 0.0004905063291139241, + "loss": 5.7709, + "step": 31 + }, + { + "epoch": 0.0033755274261603376, + "grad_norm": 0.8326303958892822, + "learning_rate": 0.0005063291139240507, + "loss": 5.6998, + "step": 32 + }, + { + "epoch": 0.003481012658227848, + "grad_norm": 0.8138381242752075, + "learning_rate": 0.0005221518987341772, + "loss": 5.6333, + "step": 33 + }, + { + "epoch": 0.003586497890295359, + "grad_norm": 0.7682183384895325, + "learning_rate": 0.0005379746835443038, + "loss": 5.5995, + "step": 34 + }, + { + "epoch": 0.003691983122362869, + "grad_norm": 0.7663596868515015, + "learning_rate": 0.0005537974683544304, + "loss": 5.5098, + "step": 35 + }, + { + "epoch": 0.0037974683544303796, + "grad_norm": 0.7409225702285767, + "learning_rate": 0.000569620253164557, + "loss": 5.4808, + "step": 36 + }, + { + "epoch": 0.0039029535864978904, + "grad_norm": 0.7263235449790955, + "learning_rate": 0.0005854430379746836, + "loss": 5.4119, + "step": 37 + }, + { + "epoch": 0.004008438818565401, + "grad_norm": 0.6572887301445007, + "learning_rate": 0.0006012658227848101, + "loss": 5.3797, + "step": 38 + }, + { + "epoch": 0.004113924050632912, + "grad_norm": 0.6338242292404175, + "learning_rate": 0.0006170886075949367, + "loss": 5.289, + "step": 39 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.6308150887489319, + "learning_rate": 0.0006329113924050633, + "loss": 5.2654, + "step": 40 + }, + { + "epoch": 0.004324894514767932, + "grad_norm": 0.6700575947761536, + "learning_rate": 0.0006487341772151899, + "loss": 5.2235, + "step": 41 + }, + { + "epoch": 0.004430379746835443, + "grad_norm": 1.0167239904403687, + "learning_rate": 0.0006645569620253165, + "loss": 5.1842, + "step": 42 + }, + { + "epoch": 0.004535864978902953, + "grad_norm": 1.0152013301849365, + "learning_rate": 0.000680379746835443, + "loss": 5.1102, + "step": 43 + }, + { + "epoch": 0.004641350210970464, + "grad_norm": 0.5130178332328796, + "learning_rate": 0.0006962025316455696, + "loss": 5.0944, + "step": 44 + }, + { + "epoch": 0.004746835443037975, + "grad_norm": 1.6894309520721436, + "learning_rate": 0.0007120253164556963, + "loss": 5.0441, + "step": 45 + }, + { + "epoch": 0.004852320675105486, + "grad_norm": 0.4479582905769348, + "learning_rate": 0.0007278481012658229, + "loss": 4.9589, + "step": 46 + }, + { + "epoch": 0.004957805907172996, + "grad_norm": 0.7943894267082214, + "learning_rate": 0.0007436708860759495, + "loss": 4.9549, + "step": 47 + }, + { + "epoch": 0.005063291139240506, + "grad_norm": 0.48056092858314514, + "learning_rate": 0.000759493670886076, + "loss": 4.8983, + "step": 48 + }, + { + "epoch": 0.005168776371308017, + "grad_norm": 0.8553137183189392, + "learning_rate": 0.0007753164556962025, + "loss": 4.8396, + "step": 49 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.4161706566810608, + "learning_rate": 0.0007911392405063291, + "loss": 4.81, + "step": 50 + }, + { + "epoch": 0.005379746835443038, + "grad_norm": 0.5087617039680481, + "learning_rate": 0.0008069620253164557, + "loss": 4.763, + "step": 51 + }, + { + "epoch": 0.005485232067510549, + "grad_norm": 0.4462997019290924, + "learning_rate": 0.0008227848101265823, + "loss": 4.7158, + "step": 52 + }, + { + "epoch": 0.005590717299578059, + "grad_norm": 0.4181429445743561, + "learning_rate": 0.000838607594936709, + "loss": 4.6819, + "step": 53 + }, + { + "epoch": 0.00569620253164557, + "grad_norm": 0.44886595010757446, + "learning_rate": 0.0008544303797468355, + "loss": 4.6129, + "step": 54 + }, + { + "epoch": 0.0058016877637130804, + "grad_norm": 0.3937552571296692, + "learning_rate": 0.0008702531645569621, + "loss": 4.599, + "step": 55 + }, + { + "epoch": 0.00590717299578059, + "grad_norm": 0.3460679352283478, + "learning_rate": 0.0008860759493670887, + "loss": 4.5437, + "step": 56 + }, + { + "epoch": 0.006012658227848101, + "grad_norm": 0.36171504855155945, + "learning_rate": 0.0009018987341772153, + "loss": 4.545, + "step": 57 + }, + { + "epoch": 0.006118143459915612, + "grad_norm": 0.4088135063648224, + "learning_rate": 0.0009177215189873418, + "loss": 4.5293, + "step": 58 + }, + { + "epoch": 0.006223628691983122, + "grad_norm": 0.37013325095176697, + "learning_rate": 0.0009335443037974683, + "loss": 4.4753, + "step": 59 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.32116636633872986, + "learning_rate": 0.0009493670886075949, + "loss": 4.432, + "step": 60 + }, + { + "epoch": 0.006434599156118144, + "grad_norm": 0.30790355801582336, + "learning_rate": 0.0009651898734177215, + "loss": 4.4279, + "step": 61 + }, + { + "epoch": 0.006540084388185654, + "grad_norm": 0.33993634581565857, + "learning_rate": 0.0009810126582278482, + "loss": 4.3744, + "step": 62 + }, + { + "epoch": 0.006645569620253164, + "grad_norm": 0.29409778118133545, + "learning_rate": 0.0009968354430379747, + "loss": 4.3612, + "step": 63 + }, + { + "epoch": 0.006751054852320675, + "grad_norm": 0.35212376713752747, + "learning_rate": 0.0010126582278481013, + "loss": 4.3189, + "step": 64 + }, + { + "epoch": 0.006856540084388186, + "grad_norm": 0.4115193486213684, + "learning_rate": 0.001028481012658228, + "loss": 4.3196, + "step": 65 + }, + { + "epoch": 0.006962025316455696, + "grad_norm": 0.4627211093902588, + "learning_rate": 0.0010443037974683545, + "loss": 4.2708, + "step": 66 + }, + { + "epoch": 0.007067510548523207, + "grad_norm": 0.5270651578903198, + "learning_rate": 0.001060126582278481, + "loss": 4.2782, + "step": 67 + }, + { + "epoch": 0.007172995780590718, + "grad_norm": 0.794312596321106, + "learning_rate": 0.0010759493670886076, + "loss": 4.2603, + "step": 68 + }, + { + "epoch": 0.007278481012658228, + "grad_norm": 0.6797000169754028, + "learning_rate": 0.0010917721518987342, + "loss": 4.205, + "step": 69 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.46117499470710754, + "learning_rate": 0.0011075949367088608, + "loss": 4.1972, + "step": 70 + }, + { + "epoch": 0.007489451476793249, + "grad_norm": 0.4507919251918793, + "learning_rate": 0.0011234177215189874, + "loss": 4.2155, + "step": 71 + }, + { + "epoch": 0.007594936708860759, + "grad_norm": 0.38559678196907043, + "learning_rate": 0.001139240506329114, + "loss": 4.1693, + "step": 72 + }, + { + "epoch": 0.00770042194092827, + "grad_norm": 0.41005730628967285, + "learning_rate": 0.0011550632911392405, + "loss": 4.1402, + "step": 73 + }, + { + "epoch": 0.007805907172995781, + "grad_norm": 0.5099040865898132, + "learning_rate": 0.0011708860759493671, + "loss": 4.1277, + "step": 74 + }, + { + "epoch": 0.007911392405063292, + "grad_norm": 0.9070386290550232, + "learning_rate": 0.0011867088607594937, + "loss": 4.1139, + "step": 75 + }, + { + "epoch": 0.008016877637130802, + "grad_norm": 1.352543592453003, + "learning_rate": 0.0012025316455696203, + "loss": 4.13, + "step": 76 + }, + { + "epoch": 0.008122362869198312, + "grad_norm": 0.685218870639801, + "learning_rate": 0.0012183544303797469, + "loss": 4.0973, + "step": 77 + }, + { + "epoch": 0.008227848101265823, + "grad_norm": 1.1689188480377197, + "learning_rate": 0.0012341772151898734, + "loss": 4.0989, + "step": 78 + }, + { + "epoch": 0.008333333333333333, + "grad_norm": 1.1035869121551514, + "learning_rate": 0.00125, + "loss": 4.0717, + "step": 79 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.6442673802375793, + "learning_rate": 0.0012658227848101266, + "loss": 4.0578, + "step": 80 + }, + { + "epoch": 0.008544303797468355, + "grad_norm": 0.6536983847618103, + "learning_rate": 0.0012816455696202532, + "loss": 4.032, + "step": 81 + }, + { + "epoch": 0.008649789029535865, + "grad_norm": 0.5698292255401611, + "learning_rate": 0.0012974683544303798, + "loss": 4.0215, + "step": 82 + }, + { + "epoch": 0.008755274261603375, + "grad_norm": 0.5748029351234436, + "learning_rate": 0.0013132911392405063, + "loss": 3.9926, + "step": 83 + }, + { + "epoch": 0.008860759493670886, + "grad_norm": 0.5127552151679993, + "learning_rate": 0.001329113924050633, + "loss": 3.9656, + "step": 84 + }, + { + "epoch": 0.008966244725738396, + "grad_norm": 0.5425557494163513, + "learning_rate": 0.0013449367088607595, + "loss": 3.9589, + "step": 85 + }, + { + "epoch": 0.009071729957805906, + "grad_norm": 0.38073161244392395, + "learning_rate": 0.001360759493670886, + "loss": 3.9674, + "step": 86 + }, + { + "epoch": 0.009177215189873418, + "grad_norm": 0.49425122141838074, + "learning_rate": 0.0013765822784810127, + "loss": 3.9609, + "step": 87 + }, + { + "epoch": 0.009282700421940928, + "grad_norm": 0.5223596096038818, + "learning_rate": 0.0013924050632911392, + "loss": 3.8881, + "step": 88 + }, + { + "epoch": 0.009388185654008438, + "grad_norm": 0.6041417121887207, + "learning_rate": 0.001408227848101266, + "loss": 3.903, + "step": 89 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.6410835385322571, + "learning_rate": 0.0014240506329113926, + "loss": 3.9053, + "step": 90 + }, + { + "epoch": 0.00959915611814346, + "grad_norm": 0.6636977791786194, + "learning_rate": 0.0014398734177215192, + "loss": 3.8916, + "step": 91 + }, + { + "epoch": 0.009704641350210971, + "grad_norm": 0.6700908541679382, + "learning_rate": 0.0014556962025316458, + "loss": 3.874, + "step": 92 + }, + { + "epoch": 0.009810126582278481, + "grad_norm": 0.4951082766056061, + "learning_rate": 0.0014715189873417724, + "loss": 3.8548, + "step": 93 + }, + { + "epoch": 0.009915611814345991, + "grad_norm": 0.5571147203445435, + "learning_rate": 0.001487341772151899, + "loss": 3.8841, + "step": 94 + }, + { + "epoch": 0.010021097046413503, + "grad_norm": 0.7228207588195801, + "learning_rate": 0.0015, + "loss": 3.8444, + "step": 95 + }, + { + "epoch": 0.010126582278481013, + "grad_norm": 0.5196840763092041, + "learning_rate": 0.0015, + "loss": 3.8324, + "step": 96 + }, + { + "epoch": 0.010232067510548523, + "grad_norm": 0.6950877904891968, + "learning_rate": 0.0015, + "loss": 3.8145, + "step": 97 + }, + { + "epoch": 0.010337552742616034, + "grad_norm": 0.7402157783508301, + "learning_rate": 0.0015, + "loss": 3.7988, + "step": 98 + }, + { + "epoch": 0.010443037974683544, + "grad_norm": 0.9650890231132507, + "learning_rate": 0.0015, + "loss": 3.8394, + "step": 99 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.6854791641235352, + "learning_rate": 0.0015, + "loss": 3.783, + "step": 100 + }, + { + "epoch": 0.010654008438818566, + "grad_norm": 0.5937216877937317, + "learning_rate": 0.0015, + "loss": 3.7849, + "step": 101 + }, + { + "epoch": 0.010759493670886076, + "grad_norm": 0.7037617564201355, + "learning_rate": 0.0015, + "loss": 3.7504, + "step": 102 + }, + { + "epoch": 0.010864978902953586, + "grad_norm": 0.6718727350234985, + "learning_rate": 0.0015, + "loss": 3.7401, + "step": 103 + }, + { + "epoch": 0.010970464135021098, + "grad_norm": 0.6742255091667175, + "learning_rate": 0.0015, + "loss": 3.7252, + "step": 104 + }, + { + "epoch": 0.011075949367088608, + "grad_norm": 0.8535324931144714, + "learning_rate": 0.0015, + "loss": 3.7297, + "step": 105 + }, + { + "epoch": 0.011181434599156118, + "grad_norm": 0.6025777459144592, + "learning_rate": 0.0015, + "loss": 3.7178, + "step": 106 + }, + { + "epoch": 0.01128691983122363, + "grad_norm": 0.5835834741592407, + "learning_rate": 0.0015, + "loss": 3.723, + "step": 107 + }, + { + "epoch": 0.01139240506329114, + "grad_norm": 0.6241533756256104, + "learning_rate": 0.0015, + "loss": 3.6998, + "step": 108 + }, + { + "epoch": 0.01149789029535865, + "grad_norm": 0.5421590209007263, + "learning_rate": 0.0015, + "loss": 3.7155, + "step": 109 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.548008918762207, + "learning_rate": 0.0015, + "loss": 3.6711, + "step": 110 + }, + { + "epoch": 0.01170886075949367, + "grad_norm": 0.5887262225151062, + "learning_rate": 0.0015, + "loss": 3.6718, + "step": 111 + }, + { + "epoch": 0.01181434599156118, + "grad_norm": 0.5165668725967407, + "learning_rate": 0.0015, + "loss": 3.6759, + "step": 112 + }, + { + "epoch": 0.011919831223628692, + "grad_norm": 0.6030138731002808, + "learning_rate": 0.0015, + "loss": 3.6532, + "step": 113 + }, + { + "epoch": 0.012025316455696202, + "grad_norm": 0.48852312564849854, + "learning_rate": 0.0015, + "loss": 3.6623, + "step": 114 + }, + { + "epoch": 0.012130801687763712, + "grad_norm": 0.49858132004737854, + "learning_rate": 0.0015, + "loss": 3.6547, + "step": 115 + }, + { + "epoch": 0.012236286919831224, + "grad_norm": 0.4732319116592407, + "learning_rate": 0.0015, + "loss": 3.6331, + "step": 116 + }, + { + "epoch": 0.012341772151898734, + "grad_norm": 0.7504942417144775, + "learning_rate": 0.0015, + "loss": 3.6535, + "step": 117 + }, + { + "epoch": 0.012447257383966244, + "grad_norm": 0.8717824816703796, + "learning_rate": 0.0015, + "loss": 3.6396, + "step": 118 + }, + { + "epoch": 0.012552742616033756, + "grad_norm": 1.1342066526412964, + "learning_rate": 0.0015, + "loss": 3.6024, + "step": 119 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.7692056894302368, + "learning_rate": 0.0015, + "loss": 3.6184, + "step": 120 + }, + { + "epoch": 0.012763713080168776, + "grad_norm": 0.9235360026359558, + "learning_rate": 0.0015, + "loss": 3.6258, + "step": 121 + }, + { + "epoch": 0.012869198312236287, + "grad_norm": 0.5077849626541138, + "learning_rate": 0.0015, + "loss": 3.5949, + "step": 122 + }, + { + "epoch": 0.012974683544303797, + "grad_norm": 0.6044186353683472, + "learning_rate": 0.0015, + "loss": 3.5698, + "step": 123 + }, + { + "epoch": 0.013080168776371307, + "grad_norm": 0.47526460886001587, + "learning_rate": 0.0015, + "loss": 3.555, + "step": 124 + }, + { + "epoch": 0.013185654008438819, + "grad_norm": 0.46524354815483093, + "learning_rate": 0.0015, + "loss": 3.5769, + "step": 125 + }, + { + "epoch": 0.013291139240506329, + "grad_norm": 0.5536606907844543, + "learning_rate": 0.0015, + "loss": 3.5524, + "step": 126 + }, + { + "epoch": 0.01339662447257384, + "grad_norm": 0.4977322518825531, + "learning_rate": 0.0015, + "loss": 3.5644, + "step": 127 + }, + { + "epoch": 0.01350210970464135, + "grad_norm": 0.481624960899353, + "learning_rate": 0.0015, + "loss": 3.5316, + "step": 128 + }, + { + "epoch": 0.01360759493670886, + "grad_norm": 0.4720386564731598, + "learning_rate": 0.0015, + "loss": 3.5218, + "step": 129 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.5213106870651245, + "learning_rate": 0.0015, + "loss": 3.542, + "step": 130 + }, + { + "epoch": 0.013818565400843882, + "grad_norm": 0.46011459827423096, + "learning_rate": 0.0015, + "loss": 3.4923, + "step": 131 + }, + { + "epoch": 0.013924050632911392, + "grad_norm": 0.6758908629417419, + "learning_rate": 0.0015, + "loss": 3.5476, + "step": 132 + }, + { + "epoch": 0.014029535864978904, + "grad_norm": 0.977131187915802, + "learning_rate": 0.0015, + "loss": 3.5444, + "step": 133 + }, + { + "epoch": 0.014135021097046414, + "grad_norm": 1.2688044309616089, + "learning_rate": 0.0015, + "loss": 3.5134, + "step": 134 + }, + { + "epoch": 0.014240506329113924, + "grad_norm": 0.6184185743331909, + "learning_rate": 0.0015, + "loss": 3.504, + "step": 135 + }, + { + "epoch": 0.014345991561181435, + "grad_norm": 0.8497546911239624, + "learning_rate": 0.0015, + "loss": 3.5002, + "step": 136 + }, + { + "epoch": 0.014451476793248945, + "grad_norm": 0.9705550074577332, + "learning_rate": 0.0015, + "loss": 3.5111, + "step": 137 + }, + { + "epoch": 0.014556962025316455, + "grad_norm": 0.7666255831718445, + "learning_rate": 0.0015, + "loss": 3.4586, + "step": 138 + }, + { + "epoch": 0.014662447257383967, + "grad_norm": 0.6074670553207397, + "learning_rate": 0.0015, + "loss": 3.4511, + "step": 139 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.5568854808807373, + "learning_rate": 0.0015, + "loss": 3.4506, + "step": 140 + }, + { + "epoch": 0.014873417721518987, + "grad_norm": 0.5095645785331726, + "learning_rate": 0.0015, + "loss": 3.4491, + "step": 141 + }, + { + "epoch": 0.014978902953586498, + "grad_norm": 0.507882297039032, + "learning_rate": 0.0015, + "loss": 3.446, + "step": 142 + }, + { + "epoch": 0.015084388185654008, + "grad_norm": 0.5321149230003357, + "learning_rate": 0.0015, + "loss": 3.4366, + "step": 143 + }, + { + "epoch": 0.015189873417721518, + "grad_norm": 0.5249614119529724, + "learning_rate": 0.0015, + "loss": 3.4379, + "step": 144 + }, + { + "epoch": 0.01529535864978903, + "grad_norm": 0.4674147069454193, + "learning_rate": 0.0015, + "loss": 3.4449, + "step": 145 + }, + { + "epoch": 0.01540084388185654, + "grad_norm": 0.47469788789749146, + "learning_rate": 0.0015, + "loss": 3.4026, + "step": 146 + }, + { + "epoch": 0.01550632911392405, + "grad_norm": 0.5053249001502991, + "learning_rate": 0.0015, + "loss": 3.4355, + "step": 147 + }, + { + "epoch": 0.015611814345991562, + "grad_norm": 0.408283531665802, + "learning_rate": 0.0015, + "loss": 3.3723, + "step": 148 + }, + { + "epoch": 0.015717299578059073, + "grad_norm": 0.47718942165374756, + "learning_rate": 0.0015, + "loss": 3.3956, + "step": 149 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.5963375568389893, + "learning_rate": 0.0015, + "loss": 3.3988, + "step": 150 + }, + { + "epoch": 0.015928270042194093, + "grad_norm": 0.6277444362640381, + "learning_rate": 0.0015, + "loss": 3.4276, + "step": 151 + }, + { + "epoch": 0.016033755274261603, + "grad_norm": 0.6306942701339722, + "learning_rate": 0.0015, + "loss": 3.3551, + "step": 152 + }, + { + "epoch": 0.016139240506329113, + "grad_norm": 0.5683684945106506, + "learning_rate": 0.0015, + "loss": 3.4084, + "step": 153 + }, + { + "epoch": 0.016244725738396623, + "grad_norm": 0.5669676065444946, + "learning_rate": 0.0015, + "loss": 3.352, + "step": 154 + }, + { + "epoch": 0.016350210970464137, + "grad_norm": 0.45035621523857117, + "learning_rate": 0.0015, + "loss": 3.3635, + "step": 155 + }, + { + "epoch": 0.016455696202531647, + "grad_norm": 0.48036280274391174, + "learning_rate": 0.0015, + "loss": 3.3482, + "step": 156 + }, + { + "epoch": 0.016561181434599156, + "grad_norm": 0.5925886631011963, + "learning_rate": 0.0015, + "loss": 3.3647, + "step": 157 + }, + { + "epoch": 0.016666666666666666, + "grad_norm": 0.5252505540847778, + "learning_rate": 0.0015, + "loss": 3.3313, + "step": 158 + }, + { + "epoch": 0.016772151898734176, + "grad_norm": 0.4907812774181366, + "learning_rate": 0.0015, + "loss": 3.3498, + "step": 159 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.5507779121398926, + "learning_rate": 0.0015, + "loss": 3.3435, + "step": 160 + }, + { + "epoch": 0.0169831223628692, + "grad_norm": 0.5671168565750122, + "learning_rate": 0.0015, + "loss": 3.3011, + "step": 161 + }, + { + "epoch": 0.01708860759493671, + "grad_norm": 0.560148298740387, + "learning_rate": 0.0015, + "loss": 3.3534, + "step": 162 + }, + { + "epoch": 0.01719409282700422, + "grad_norm": 0.5426363945007324, + "learning_rate": 0.0015, + "loss": 3.3287, + "step": 163 + }, + { + "epoch": 0.01729957805907173, + "grad_norm": 0.48038220405578613, + "learning_rate": 0.0015, + "loss": 3.3026, + "step": 164 + }, + { + "epoch": 0.01740506329113924, + "grad_norm": 0.49991828203201294, + "learning_rate": 0.0015, + "loss": 3.323, + "step": 165 + }, + { + "epoch": 0.01751054852320675, + "grad_norm": 0.542594313621521, + "learning_rate": 0.0015, + "loss": 3.3064, + "step": 166 + }, + { + "epoch": 0.017616033755274263, + "grad_norm": 0.6581192016601562, + "learning_rate": 0.0015, + "loss": 3.2768, + "step": 167 + }, + { + "epoch": 0.017721518987341773, + "grad_norm": 1.042526125907898, + "learning_rate": 0.0015, + "loss": 3.2859, + "step": 168 + }, + { + "epoch": 0.017827004219409283, + "grad_norm": 1.2364501953125, + "learning_rate": 0.0015, + "loss": 3.2897, + "step": 169 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.7119993567466736, + "learning_rate": 0.0015, + "loss": 3.2836, + "step": 170 + }, + { + "epoch": 0.018037974683544303, + "grad_norm": 0.9350215792655945, + "learning_rate": 0.0015, + "loss": 3.2656, + "step": 171 + }, + { + "epoch": 0.018143459915611813, + "grad_norm": 1.0866774320602417, + "learning_rate": 0.0015, + "loss": 3.3286, + "step": 172 + }, + { + "epoch": 0.018248945147679326, + "grad_norm": 0.7057785987854004, + "learning_rate": 0.0015, + "loss": 3.2893, + "step": 173 + }, + { + "epoch": 0.018354430379746836, + "grad_norm": 0.8040363192558289, + "learning_rate": 0.0015, + "loss": 3.2537, + "step": 174 + }, + { + "epoch": 0.018459915611814346, + "grad_norm": 0.682712733745575, + "learning_rate": 0.0015, + "loss": 3.2988, + "step": 175 + }, + { + "epoch": 0.018565400843881856, + "grad_norm": 0.7391072511672974, + "learning_rate": 0.0015, + "loss": 3.276, + "step": 176 + }, + { + "epoch": 0.018670886075949366, + "grad_norm": 0.4904000759124756, + "learning_rate": 0.0015, + "loss": 3.2685, + "step": 177 + }, + { + "epoch": 0.018776371308016876, + "grad_norm": 0.6686579585075378, + "learning_rate": 0.0015, + "loss": 3.2668, + "step": 178 + }, + { + "epoch": 0.01888185654008439, + "grad_norm": 0.7120933532714844, + "learning_rate": 0.0015, + "loss": 3.2756, + "step": 179 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.682628870010376, + "learning_rate": 0.0015, + "loss": 3.2507, + "step": 180 + }, + { + "epoch": 0.01909282700421941, + "grad_norm": 0.5308851599693298, + "learning_rate": 0.0015, + "loss": 3.2523, + "step": 181 + }, + { + "epoch": 0.01919831223628692, + "grad_norm": 0.5541619658470154, + "learning_rate": 0.0015, + "loss": 3.2407, + "step": 182 + }, + { + "epoch": 0.01930379746835443, + "grad_norm": 0.5862583518028259, + "learning_rate": 0.0015, + "loss": 3.2176, + "step": 183 + }, + { + "epoch": 0.019409282700421943, + "grad_norm": 0.5416473746299744, + "learning_rate": 0.0015, + "loss": 3.2443, + "step": 184 + }, + { + "epoch": 0.019514767932489453, + "grad_norm": 0.6392229795455933, + "learning_rate": 0.0015, + "loss": 3.1996, + "step": 185 + }, + { + "epoch": 0.019620253164556962, + "grad_norm": 0.6571645736694336, + "learning_rate": 0.0015, + "loss": 3.1835, + "step": 186 + }, + { + "epoch": 0.019725738396624472, + "grad_norm": 0.4875413775444031, + "learning_rate": 0.0015, + "loss": 3.1681, + "step": 187 + }, + { + "epoch": 0.019831223628691982, + "grad_norm": 0.611209511756897, + "learning_rate": 0.0015, + "loss": 3.1712, + "step": 188 + }, + { + "epoch": 0.019936708860759492, + "grad_norm": 0.5870056748390198, + "learning_rate": 0.0015, + "loss": 3.1819, + "step": 189 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.5035120248794556, + "learning_rate": 0.0015, + "loss": 3.2077, + "step": 190 + }, + { + "epoch": 0.020147679324894516, + "grad_norm": 0.543683648109436, + "learning_rate": 0.0015, + "loss": 3.161, + "step": 191 + }, + { + "epoch": 0.020253164556962026, + "grad_norm": 0.5996739864349365, + "learning_rate": 0.0015, + "loss": 3.203, + "step": 192 + }, + { + "epoch": 0.020358649789029536, + "grad_norm": 0.5495070219039917, + "learning_rate": 0.0015, + "loss": 3.135, + "step": 193 + }, + { + "epoch": 0.020464135021097046, + "grad_norm": 0.5591219663619995, + "learning_rate": 0.0015, + "loss": 3.1768, + "step": 194 + }, + { + "epoch": 0.020569620253164556, + "grad_norm": 0.5875229239463806, + "learning_rate": 0.0015, + "loss": 3.1571, + "step": 195 + }, + { + "epoch": 0.02067510548523207, + "grad_norm": 0.5609016418457031, + "learning_rate": 0.0015, + "loss": 3.1541, + "step": 196 + }, + { + "epoch": 0.02078059071729958, + "grad_norm": 0.542759120464325, + "learning_rate": 0.0015, + "loss": 3.1458, + "step": 197 + }, + { + "epoch": 0.02088607594936709, + "grad_norm": 0.5104601383209229, + "learning_rate": 0.0015, + "loss": 3.1704, + "step": 198 + }, + { + "epoch": 0.0209915611814346, + "grad_norm": 0.738538384437561, + "learning_rate": 0.0015, + "loss": 3.1601, + "step": 199 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.9710153341293335, + "learning_rate": 0.0015, + "loss": 3.1217, + "step": 200 + }, + { + "epoch": 0.02120253164556962, + "grad_norm": 1.136983871459961, + "learning_rate": 0.0015, + "loss": 3.1744, + "step": 201 + }, + { + "epoch": 0.021308016877637132, + "grad_norm": 0.7257745265960693, + "learning_rate": 0.0015, + "loss": 3.1412, + "step": 202 + }, + { + "epoch": 0.021413502109704642, + "grad_norm": 0.836239218711853, + "learning_rate": 0.0015, + "loss": 3.1502, + "step": 203 + }, + { + "epoch": 0.021518987341772152, + "grad_norm": 0.7749267220497131, + "learning_rate": 0.0015, + "loss": 3.1358, + "step": 204 + }, + { + "epoch": 0.021624472573839662, + "grad_norm": 0.6772562861442566, + "learning_rate": 0.0015, + "loss": 3.1092, + "step": 205 + }, + { + "epoch": 0.021729957805907172, + "grad_norm": 0.8574336171150208, + "learning_rate": 0.0015, + "loss": 3.0991, + "step": 206 + }, + { + "epoch": 0.021835443037974682, + "grad_norm": 0.7635952234268188, + "learning_rate": 0.0015, + "loss": 3.1208, + "step": 207 + }, + { + "epoch": 0.021940928270042195, + "grad_norm": 0.8011783361434937, + "learning_rate": 0.0015, + "loss": 3.0982, + "step": 208 + }, + { + "epoch": 0.022046413502109705, + "grad_norm": 0.7332499027252197, + "learning_rate": 0.0015, + "loss": 3.0964, + "step": 209 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.6593369841575623, + "learning_rate": 0.0015, + "loss": 3.1455, + "step": 210 + }, + { + "epoch": 0.022257383966244725, + "grad_norm": 0.5410482883453369, + "learning_rate": 0.0015, + "loss": 3.1029, + "step": 211 + }, + { + "epoch": 0.022362869198312235, + "grad_norm": 0.6211299300193787, + "learning_rate": 0.0015, + "loss": 3.0993, + "step": 212 + }, + { + "epoch": 0.022468354430379745, + "grad_norm": 0.6471499800682068, + "learning_rate": 0.0015, + "loss": 3.1209, + "step": 213 + }, + { + "epoch": 0.02257383966244726, + "grad_norm": 0.7534574270248413, + "learning_rate": 0.0015, + "loss": 3.1005, + "step": 214 + }, + { + "epoch": 0.02267932489451477, + "grad_norm": 0.6201465129852295, + "learning_rate": 0.0015, + "loss": 3.06, + "step": 215 + }, + { + "epoch": 0.02278481012658228, + "grad_norm": 0.5705814361572266, + "learning_rate": 0.0015, + "loss": 3.0798, + "step": 216 + }, + { + "epoch": 0.02289029535864979, + "grad_norm": 0.5413900017738342, + "learning_rate": 0.0015, + "loss": 3.0694, + "step": 217 + }, + { + "epoch": 0.0229957805907173, + "grad_norm": 0.6111287474632263, + "learning_rate": 0.0015, + "loss": 3.0953, + "step": 218 + }, + { + "epoch": 0.023101265822784812, + "grad_norm": 0.6253988742828369, + "learning_rate": 0.0015, + "loss": 3.0854, + "step": 219 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.6183094382286072, + "learning_rate": 0.0015, + "loss": 3.0453, + "step": 220 + }, + { + "epoch": 0.02331223628691983, + "grad_norm": 0.5453416109085083, + "learning_rate": 0.0015, + "loss": 3.0475, + "step": 221 + }, + { + "epoch": 0.02341772151898734, + "grad_norm": 0.5267513990402222, + "learning_rate": 0.0015, + "loss": 3.0213, + "step": 222 + }, + { + "epoch": 0.02352320675105485, + "grad_norm": 0.5483818054199219, + "learning_rate": 0.0015, + "loss": 3.0388, + "step": 223 + }, + { + "epoch": 0.02362869198312236, + "grad_norm": 0.5734818577766418, + "learning_rate": 0.0015, + "loss": 3.0223, + "step": 224 + }, + { + "epoch": 0.023734177215189875, + "grad_norm": 0.6358630657196045, + "learning_rate": 0.0015, + "loss": 3.0151, + "step": 225 + }, + { + "epoch": 0.023839662447257385, + "grad_norm": 0.6588574051856995, + "learning_rate": 0.0015, + "loss": 3.059, + "step": 226 + }, + { + "epoch": 0.023945147679324895, + "grad_norm": 0.5852987170219421, + "learning_rate": 0.0015, + "loss": 3.0166, + "step": 227 + }, + { + "epoch": 0.024050632911392405, + "grad_norm": 0.5072793364524841, + "learning_rate": 0.0015, + "loss": 3.0079, + "step": 228 + }, + { + "epoch": 0.024156118143459915, + "grad_norm": 0.5593225955963135, + "learning_rate": 0.0015, + "loss": 2.996, + "step": 229 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.7914431095123291, + "learning_rate": 0.0015, + "loss": 3.0295, + "step": 230 + }, + { + "epoch": 0.024367088607594938, + "grad_norm": 0.9069041013717651, + "learning_rate": 0.0015, + "loss": 3.0118, + "step": 231 + }, + { + "epoch": 0.024472573839662448, + "grad_norm": 1.1159306764602661, + "learning_rate": 0.0015, + "loss": 3.0398, + "step": 232 + }, + { + "epoch": 0.024578059071729958, + "grad_norm": 0.8557447791099548, + "learning_rate": 0.0015, + "loss": 2.982, + "step": 233 + }, + { + "epoch": 0.024683544303797468, + "grad_norm": 0.6739777326583862, + "learning_rate": 0.0015, + "loss": 3.0187, + "step": 234 + }, + { + "epoch": 0.024789029535864978, + "grad_norm": 0.5493777990341187, + "learning_rate": 0.0015, + "loss": 2.9504, + "step": 235 + }, + { + "epoch": 0.024894514767932488, + "grad_norm": 0.6609274744987488, + "learning_rate": 0.0015, + "loss": 2.9803, + "step": 236 + }, + { + "epoch": 0.025, + "grad_norm": 0.8530800342559814, + "learning_rate": 0.0015, + "loss": 2.9653, + "step": 237 + }, + { + "epoch": 0.02510548523206751, + "grad_norm": 0.9023459553718567, + "learning_rate": 0.0015, + "loss": 2.9668, + "step": 238 + }, + { + "epoch": 0.02521097046413502, + "grad_norm": 0.787721574306488, + "learning_rate": 0.0015, + "loss": 2.984, + "step": 239 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.7218961119651794, + "learning_rate": 0.0015, + "loss": 3.0032, + "step": 240 + }, + { + "epoch": 0.02542194092827004, + "grad_norm": 0.7726406455039978, + "learning_rate": 0.0015, + "loss": 2.9452, + "step": 241 + }, + { + "epoch": 0.02552742616033755, + "grad_norm": 0.8458675146102905, + "learning_rate": 0.0015, + "loss": 2.9789, + "step": 242 + }, + { + "epoch": 0.025632911392405065, + "grad_norm": 0.6562297344207764, + "learning_rate": 0.0015, + "loss": 2.9888, + "step": 243 + }, + { + "epoch": 0.025738396624472575, + "grad_norm": 0.5418792366981506, + "learning_rate": 0.0015, + "loss": 2.9712, + "step": 244 + }, + { + "epoch": 0.025843881856540084, + "grad_norm": 0.7549543976783752, + "learning_rate": 0.0015, + "loss": 2.9604, + "step": 245 + }, + { + "epoch": 0.025949367088607594, + "grad_norm": 0.6601927280426025, + "learning_rate": 0.0015, + "loss": 2.9502, + "step": 246 + }, + { + "epoch": 0.026054852320675104, + "grad_norm": 0.5595108270645142, + "learning_rate": 0.0015, + "loss": 2.9067, + "step": 247 + }, + { + "epoch": 0.026160337552742614, + "grad_norm": 0.7016994953155518, + "learning_rate": 0.0015, + "loss": 2.9668, + "step": 248 + }, + { + "epoch": 0.026265822784810128, + "grad_norm": 0.7691634893417358, + "learning_rate": 0.0015, + "loss": 2.9278, + "step": 249 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.6277539134025574, + "learning_rate": 0.0015, + "loss": 2.9002, + "step": 250 + }, + { + "epoch": 0.026476793248945148, + "grad_norm": 0.5682135820388794, + "learning_rate": 0.0015, + "loss": 2.946, + "step": 251 + }, + { + "epoch": 0.026582278481012658, + "grad_norm": 0.6085950136184692, + "learning_rate": 0.0015, + "loss": 2.9167, + "step": 252 + }, + { + "epoch": 0.026687763713080168, + "grad_norm": 0.7385963797569275, + "learning_rate": 0.0015, + "loss": 2.9158, + "step": 253 + }, + { + "epoch": 0.02679324894514768, + "grad_norm": 0.8353692293167114, + "learning_rate": 0.0015, + "loss": 2.9078, + "step": 254 + }, + { + "epoch": 0.02689873417721519, + "grad_norm": 0.6944759488105774, + "learning_rate": 0.0015, + "loss": 2.8956, + "step": 255 + }, + { + "epoch": 0.0270042194092827, + "grad_norm": 0.6617680191993713, + "learning_rate": 0.0015, + "loss": 2.8609, + "step": 256 + }, + { + "epoch": 0.02710970464135021, + "grad_norm": 0.6998510360717773, + "learning_rate": 0.0015, + "loss": 2.9136, + "step": 257 + }, + { + "epoch": 0.02721518987341772, + "grad_norm": 0.7599093914031982, + "learning_rate": 0.0015, + "loss": 2.9203, + "step": 258 + }, + { + "epoch": 0.02732067510548523, + "grad_norm": 0.594812273979187, + "learning_rate": 0.0015, + "loss": 2.9195, + "step": 259 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.6033343076705933, + "learning_rate": 0.0015, + "loss": 2.859, + "step": 260 + }, + { + "epoch": 0.027531645569620254, + "grad_norm": 0.7880474328994751, + "learning_rate": 0.0015, + "loss": 2.8976, + "step": 261 + }, + { + "epoch": 0.027637130801687764, + "grad_norm": 0.8647599220275879, + "learning_rate": 0.0015, + "loss": 2.8831, + "step": 262 + }, + { + "epoch": 0.027742616033755274, + "grad_norm": 0.7458866238594055, + "learning_rate": 0.0015, + "loss": 2.881, + "step": 263 + }, + { + "epoch": 0.027848101265822784, + "grad_norm": 0.6117191314697266, + "learning_rate": 0.0015, + "loss": 2.8818, + "step": 264 + }, + { + "epoch": 0.027953586497890294, + "grad_norm": 0.5987802147865295, + "learning_rate": 0.0015, + "loss": 2.879, + "step": 265 + }, + { + "epoch": 0.028059071729957807, + "grad_norm": 0.7147089838981628, + "learning_rate": 0.0015, + "loss": 2.8719, + "step": 266 + }, + { + "epoch": 0.028164556962025317, + "grad_norm": 0.7244475483894348, + "learning_rate": 0.0015, + "loss": 2.8613, + "step": 267 + }, + { + "epoch": 0.028270042194092827, + "grad_norm": 0.6152216196060181, + "learning_rate": 0.0015, + "loss": 2.8304, + "step": 268 + }, + { + "epoch": 0.028375527426160337, + "grad_norm": 0.9417058229446411, + "learning_rate": 0.0015, + "loss": 2.8717, + "step": 269 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 1.171797513961792, + "learning_rate": 0.0015, + "loss": 2.8844, + "step": 270 + }, + { + "epoch": 0.028586497890295357, + "grad_norm": 0.8627143502235413, + "learning_rate": 0.0015, + "loss": 2.8732, + "step": 271 + }, + { + "epoch": 0.02869198312236287, + "grad_norm": 0.7298221588134766, + "learning_rate": 0.0015, + "loss": 2.8455, + "step": 272 + }, + { + "epoch": 0.02879746835443038, + "grad_norm": 0.8978715538978577, + "learning_rate": 0.0015, + "loss": 2.8394, + "step": 273 + }, + { + "epoch": 0.02890295358649789, + "grad_norm": 0.8794608116149902, + "learning_rate": 0.0015, + "loss": 2.844, + "step": 274 + }, + { + "epoch": 0.0290084388185654, + "grad_norm": 0.6987318396568298, + "learning_rate": 0.0015, + "loss": 2.8633, + "step": 275 + }, + { + "epoch": 0.02911392405063291, + "grad_norm": 0.5574908256530762, + "learning_rate": 0.0015, + "loss": 2.8063, + "step": 276 + }, + { + "epoch": 0.02921940928270042, + "grad_norm": 0.6566581130027771, + "learning_rate": 0.0015, + "loss": 2.8406, + "step": 277 + }, + { + "epoch": 0.029324894514767934, + "grad_norm": 0.7081720232963562, + "learning_rate": 0.0015, + "loss": 2.7729, + "step": 278 + }, + { + "epoch": 0.029430379746835444, + "grad_norm": 0.6616462469100952, + "learning_rate": 0.0015, + "loss": 2.8339, + "step": 279 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.7024974822998047, + "learning_rate": 0.0015, + "loss": 2.8073, + "step": 280 + }, + { + "epoch": 0.029641350210970464, + "grad_norm": 0.6736257672309875, + "learning_rate": 0.0015, + "loss": 2.7869, + "step": 281 + }, + { + "epoch": 0.029746835443037974, + "grad_norm": 0.5996975302696228, + "learning_rate": 0.0015, + "loss": 2.854, + "step": 282 + }, + { + "epoch": 0.029852320675105484, + "grad_norm": 0.526852548122406, + "learning_rate": 0.0015, + "loss": 2.7721, + "step": 283 + }, + { + "epoch": 0.029957805907172997, + "grad_norm": 0.5094711184501648, + "learning_rate": 0.0015, + "loss": 2.7869, + "step": 284 + }, + { + "epoch": 0.030063291139240507, + "grad_norm": 0.5385901927947998, + "learning_rate": 0.0015, + "loss": 2.8363, + "step": 285 + }, + { + "epoch": 0.030168776371308017, + "grad_norm": 0.5184576511383057, + "learning_rate": 0.0015, + "loss": 2.8153, + "step": 286 + }, + { + "epoch": 0.030274261603375527, + "grad_norm": 0.5651463866233826, + "learning_rate": 0.0015, + "loss": 2.812, + "step": 287 + }, + { + "epoch": 0.030379746835443037, + "grad_norm": 0.5776814222335815, + "learning_rate": 0.0015, + "loss": 2.7961, + "step": 288 + }, + { + "epoch": 0.03048523206751055, + "grad_norm": 0.6253670454025269, + "learning_rate": 0.0015, + "loss": 2.793, + "step": 289 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.5828775763511658, + "learning_rate": 0.0015, + "loss": 2.7765, + "step": 290 + }, + { + "epoch": 0.03069620253164557, + "grad_norm": 0.6641796231269836, + "learning_rate": 0.0015, + "loss": 2.7911, + "step": 291 + }, + { + "epoch": 0.03080168776371308, + "grad_norm": 0.8532554507255554, + "learning_rate": 0.0015, + "loss": 2.8123, + "step": 292 + }, + { + "epoch": 0.03090717299578059, + "grad_norm": 0.7868938446044922, + "learning_rate": 0.0015, + "loss": 2.788, + "step": 293 + }, + { + "epoch": 0.0310126582278481, + "grad_norm": 0.7309849858283997, + "learning_rate": 0.0015, + "loss": 2.7535, + "step": 294 + }, + { + "epoch": 0.031118143459915613, + "grad_norm": 0.6793031096458435, + "learning_rate": 0.0015, + "loss": 2.7548, + "step": 295 + }, + { + "epoch": 0.031223628691983123, + "grad_norm": 0.6210504770278931, + "learning_rate": 0.0015, + "loss": 2.7976, + "step": 296 + }, + { + "epoch": 0.03132911392405063, + "grad_norm": 0.5565879940986633, + "learning_rate": 0.0015, + "loss": 2.8034, + "step": 297 + }, + { + "epoch": 0.03143459915611815, + "grad_norm": 0.5763741135597229, + "learning_rate": 0.0015, + "loss": 2.7692, + "step": 298 + }, + { + "epoch": 0.03154008438818565, + "grad_norm": 0.5793166160583496, + "learning_rate": 0.0015, + "loss": 2.7381, + "step": 299 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.5885812640190125, + "learning_rate": 0.0015, + "loss": 2.7534, + "step": 300 + }, + { + "epoch": 0.03175105485232067, + "grad_norm": 0.5877847075462341, + "learning_rate": 0.0015, + "loss": 2.7264, + "step": 301 + }, + { + "epoch": 0.03185654008438819, + "grad_norm": 0.5303623676300049, + "learning_rate": 0.0015, + "loss": 2.7349, + "step": 302 + }, + { + "epoch": 0.03196202531645569, + "grad_norm": 0.5827655792236328, + "learning_rate": 0.0015, + "loss": 2.74, + "step": 303 + }, + { + "epoch": 0.032067510548523206, + "grad_norm": 0.6397809982299805, + "learning_rate": 0.0015, + "loss": 2.7474, + "step": 304 + }, + { + "epoch": 0.03217299578059072, + "grad_norm": 0.6652061343193054, + "learning_rate": 0.0015, + "loss": 2.7568, + "step": 305 + }, + { + "epoch": 0.032278481012658226, + "grad_norm": 0.5894809365272522, + "learning_rate": 0.0015, + "loss": 2.7353, + "step": 306 + }, + { + "epoch": 0.03238396624472574, + "grad_norm": 0.7586065530776978, + "learning_rate": 0.0015, + "loss": 2.7503, + "step": 307 + }, + { + "epoch": 0.032489451476793246, + "grad_norm": 0.7334324717521667, + "learning_rate": 0.0015, + "loss": 2.7292, + "step": 308 + }, + { + "epoch": 0.03259493670886076, + "grad_norm": 0.8651225566864014, + "learning_rate": 0.0015, + "loss": 2.7511, + "step": 309 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 1.0487282276153564, + "learning_rate": 0.0015, + "loss": 2.7477, + "step": 310 + }, + { + "epoch": 0.03280590717299578, + "grad_norm": 1.106597900390625, + "learning_rate": 0.0015, + "loss": 2.7262, + "step": 311 + }, + { + "epoch": 0.03291139240506329, + "grad_norm": 0.7443594336509705, + "learning_rate": 0.0015, + "loss": 2.7328, + "step": 312 + }, + { + "epoch": 0.0330168776371308, + "grad_norm": 0.5400886535644531, + "learning_rate": 0.0015, + "loss": 2.7009, + "step": 313 + }, + { + "epoch": 0.03312236286919831, + "grad_norm": 0.5155996084213257, + "learning_rate": 0.0015, + "loss": 2.6974, + "step": 314 + }, + { + "epoch": 0.03322784810126582, + "grad_norm": 0.6222251653671265, + "learning_rate": 0.0015, + "loss": 2.7196, + "step": 315 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 0.6961142420768738, + "learning_rate": 0.0015, + "loss": 2.6971, + "step": 316 + }, + { + "epoch": 0.033438818565400846, + "grad_norm": 0.6365319490432739, + "learning_rate": 0.0015, + "loss": 2.7201, + "step": 317 + }, + { + "epoch": 0.03354430379746835, + "grad_norm": 0.5702477693557739, + "learning_rate": 0.0015, + "loss": 2.7052, + "step": 318 + }, + { + "epoch": 0.033649789029535866, + "grad_norm": 0.6683318018913269, + "learning_rate": 0.0015, + "loss": 2.7026, + "step": 319 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.7750487923622131, + "learning_rate": 0.0015, + "loss": 2.7222, + "step": 320 + }, + { + "epoch": 0.033860759493670886, + "grad_norm": 0.8867079019546509, + "learning_rate": 0.0015, + "loss": 2.7187, + "step": 321 + }, + { + "epoch": 0.0339662447257384, + "grad_norm": 0.9916465878486633, + "learning_rate": 0.0015, + "loss": 2.7188, + "step": 322 + }, + { + "epoch": 0.034071729957805906, + "grad_norm": 0.9273934960365295, + "learning_rate": 0.0015, + "loss": 2.7302, + "step": 323 + }, + { + "epoch": 0.03417721518987342, + "grad_norm": 0.6221110224723816, + "learning_rate": 0.0015, + "loss": 2.7487, + "step": 324 + }, + { + "epoch": 0.034282700421940926, + "grad_norm": 0.6257839798927307, + "learning_rate": 0.0015, + "loss": 2.6852, + "step": 325 + }, + { + "epoch": 0.03438818565400844, + "grad_norm": 0.9795200824737549, + "learning_rate": 0.0015, + "loss": 2.7023, + "step": 326 + }, + { + "epoch": 0.03449367088607595, + "grad_norm": 0.9704818725585938, + "learning_rate": 0.0015, + "loss": 2.7275, + "step": 327 + }, + { + "epoch": 0.03459915611814346, + "grad_norm": 0.6575543284416199, + "learning_rate": 0.0015, + "loss": 2.6751, + "step": 328 + }, + { + "epoch": 0.03470464135021097, + "grad_norm": 0.5703964233398438, + "learning_rate": 0.0015, + "loss": 2.6609, + "step": 329 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.7702048420906067, + "learning_rate": 0.0015, + "loss": 2.6545, + "step": 330 + }, + { + "epoch": 0.03491561181434599, + "grad_norm": 0.6877147555351257, + "learning_rate": 0.0015, + "loss": 2.6597, + "step": 331 + }, + { + "epoch": 0.0350210970464135, + "grad_norm": 0.666469156742096, + "learning_rate": 0.0015, + "loss": 2.6652, + "step": 332 + }, + { + "epoch": 0.03512658227848101, + "grad_norm": 1.0524582862854004, + "learning_rate": 0.0015, + "loss": 2.7067, + "step": 333 + }, + { + "epoch": 0.035232067510548526, + "grad_norm": 1.1469674110412598, + "learning_rate": 0.0015, + "loss": 2.6914, + "step": 334 + }, + { + "epoch": 0.03533755274261603, + "grad_norm": 0.8497218489646912, + "learning_rate": 0.0015, + "loss": 2.6765, + "step": 335 + }, + { + "epoch": 0.035443037974683546, + "grad_norm": 0.8292915225028992, + "learning_rate": 0.0015, + "loss": 2.6794, + "step": 336 + }, + { + "epoch": 0.03554852320675105, + "grad_norm": 0.713080644607544, + "learning_rate": 0.0015, + "loss": 2.6623, + "step": 337 + }, + { + "epoch": 0.035654008438818566, + "grad_norm": 0.6908189058303833, + "learning_rate": 0.0015, + "loss": 2.6872, + "step": 338 + }, + { + "epoch": 0.03575949367088608, + "grad_norm": 0.7819079756736755, + "learning_rate": 0.0015, + "loss": 2.7063, + "step": 339 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.6286224126815796, + "learning_rate": 0.0015, + "loss": 2.6636, + "step": 340 + }, + { + "epoch": 0.0359704641350211, + "grad_norm": 0.6791321039199829, + "learning_rate": 0.0015, + "loss": 2.6431, + "step": 341 + }, + { + "epoch": 0.036075949367088606, + "grad_norm": 0.6508675813674927, + "learning_rate": 0.0015, + "loss": 2.6151, + "step": 342 + }, + { + "epoch": 0.03618143459915612, + "grad_norm": 0.6431472897529602, + "learning_rate": 0.0015, + "loss": 2.656, + "step": 343 + }, + { + "epoch": 0.036286919831223625, + "grad_norm": 0.5608028769493103, + "learning_rate": 0.0015, + "loss": 2.6329, + "step": 344 + }, + { + "epoch": 0.03639240506329114, + "grad_norm": 0.5455867052078247, + "learning_rate": 0.0015, + "loss": 2.6485, + "step": 345 + }, + { + "epoch": 0.03649789029535865, + "grad_norm": 0.5543195009231567, + "learning_rate": 0.0015, + "loss": 2.6162, + "step": 346 + }, + { + "epoch": 0.03660337552742616, + "grad_norm": 0.5591506958007812, + "learning_rate": 0.0015, + "loss": 2.6481, + "step": 347 + }, + { + "epoch": 0.03670886075949367, + "grad_norm": 0.6832466125488281, + "learning_rate": 0.0015, + "loss": 2.64, + "step": 348 + }, + { + "epoch": 0.03681434599156118, + "grad_norm": 0.7821243405342102, + "learning_rate": 0.0015, + "loss": 2.5994, + "step": 349 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.9078012108802795, + "learning_rate": 0.0015, + "loss": 2.6168, + "step": 350 + }, + { + "epoch": 0.037025316455696206, + "grad_norm": 1.0436315536499023, + "learning_rate": 0.0015, + "loss": 2.6131, + "step": 351 + }, + { + "epoch": 0.03713080168776371, + "grad_norm": 0.7729336619377136, + "learning_rate": 0.0015, + "loss": 2.6366, + "step": 352 + }, + { + "epoch": 0.037236286919831225, + "grad_norm": 0.6074826717376709, + "learning_rate": 0.0015, + "loss": 2.6429, + "step": 353 + }, + { + "epoch": 0.03734177215189873, + "grad_norm": 0.5426937341690063, + "learning_rate": 0.0015, + "loss": 2.5742, + "step": 354 + }, + { + "epoch": 0.037447257383966245, + "grad_norm": 0.6311236023902893, + "learning_rate": 0.0015, + "loss": 2.6135, + "step": 355 + }, + { + "epoch": 0.03755274261603375, + "grad_norm": 0.6573986411094666, + "learning_rate": 0.0015, + "loss": 2.6319, + "step": 356 + }, + { + "epoch": 0.037658227848101265, + "grad_norm": 0.5465301871299744, + "learning_rate": 0.0015, + "loss": 2.5794, + "step": 357 + }, + { + "epoch": 0.03776371308016878, + "grad_norm": 0.5390920639038086, + "learning_rate": 0.0015, + "loss": 2.6012, + "step": 358 + }, + { + "epoch": 0.037869198312236285, + "grad_norm": 0.5990561842918396, + "learning_rate": 0.0015, + "loss": 2.5803, + "step": 359 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.6378905177116394, + "learning_rate": 0.0015, + "loss": 2.628, + "step": 360 + }, + { + "epoch": 0.038080168776371305, + "grad_norm": 0.5791345834732056, + "learning_rate": 0.0015, + "loss": 2.5944, + "step": 361 + }, + { + "epoch": 0.03818565400843882, + "grad_norm": 0.5968369841575623, + "learning_rate": 0.0015, + "loss": 2.6213, + "step": 362 + }, + { + "epoch": 0.03829113924050633, + "grad_norm": 0.7104408144950867, + "learning_rate": 0.0015, + "loss": 2.5727, + "step": 363 + }, + { + "epoch": 0.03839662447257384, + "grad_norm": 0.6978356242179871, + "learning_rate": 0.0015, + "loss": 2.582, + "step": 364 + }, + { + "epoch": 0.03850210970464135, + "grad_norm": 0.67247074842453, + "learning_rate": 0.0015, + "loss": 2.6225, + "step": 365 + }, + { + "epoch": 0.03860759493670886, + "grad_norm": 0.6189120411872864, + "learning_rate": 0.0015, + "loss": 2.5861, + "step": 366 + }, + { + "epoch": 0.03871308016877637, + "grad_norm": 0.5502013564109802, + "learning_rate": 0.0015, + "loss": 2.5867, + "step": 367 + }, + { + "epoch": 0.038818565400843885, + "grad_norm": 0.6945213675498962, + "learning_rate": 0.0015, + "loss": 2.6102, + "step": 368 + }, + { + "epoch": 0.03892405063291139, + "grad_norm": 0.8436279296875, + "learning_rate": 0.0015, + "loss": 2.5969, + "step": 369 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.8353848457336426, + "learning_rate": 0.0015, + "loss": 2.5668, + "step": 370 + }, + { + "epoch": 0.03913502109704641, + "grad_norm": 0.7062360048294067, + "learning_rate": 0.0015, + "loss": 2.5801, + "step": 371 + }, + { + "epoch": 0.039240506329113925, + "grad_norm": 0.5779558420181274, + "learning_rate": 0.0015, + "loss": 2.5597, + "step": 372 + }, + { + "epoch": 0.03934599156118143, + "grad_norm": 0.7748365998268127, + "learning_rate": 0.0015, + "loss": 2.6049, + "step": 373 + }, + { + "epoch": 0.039451476793248945, + "grad_norm": 1.1137992143630981, + "learning_rate": 0.0015, + "loss": 2.625, + "step": 374 + }, + { + "epoch": 0.03955696202531646, + "grad_norm": 0.7565868496894836, + "learning_rate": 0.0015, + "loss": 2.5573, + "step": 375 + }, + { + "epoch": 0.039662447257383965, + "grad_norm": 0.690518856048584, + "learning_rate": 0.0015, + "loss": 2.5875, + "step": 376 + }, + { + "epoch": 0.03976793248945148, + "grad_norm": 1.1048948764801025, + "learning_rate": 0.0015, + "loss": 2.5974, + "step": 377 + }, + { + "epoch": 0.039873417721518985, + "grad_norm": 0.913773775100708, + "learning_rate": 0.0015, + "loss": 2.5692, + "step": 378 + }, + { + "epoch": 0.0399789029535865, + "grad_norm": 0.599328339099884, + "learning_rate": 0.0015, + "loss": 2.586, + "step": 379 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 1.0448973178863525, + "learning_rate": 0.0015, + "loss": 2.5786, + "step": 380 + }, + { + "epoch": 0.04018987341772152, + "grad_norm": 0.9382826685905457, + "learning_rate": 0.0015, + "loss": 2.5854, + "step": 381 + }, + { + "epoch": 0.04029535864978903, + "grad_norm": 0.6715505123138428, + "learning_rate": 0.0015, + "loss": 2.5878, + "step": 382 + }, + { + "epoch": 0.04040084388185654, + "grad_norm": 0.6978819966316223, + "learning_rate": 0.0015, + "loss": 2.5783, + "step": 383 + }, + { + "epoch": 0.04050632911392405, + "grad_norm": 0.7260177135467529, + "learning_rate": 0.0015, + "loss": 2.559, + "step": 384 + }, + { + "epoch": 0.04061181434599156, + "grad_norm": 0.630085825920105, + "learning_rate": 0.0015, + "loss": 2.5605, + "step": 385 + }, + { + "epoch": 0.04071729957805907, + "grad_norm": 0.6087722778320312, + "learning_rate": 0.0015, + "loss": 2.5364, + "step": 386 + }, + { + "epoch": 0.040822784810126585, + "grad_norm": 0.599683403968811, + "learning_rate": 0.0015, + "loss": 2.5776, + "step": 387 + }, + { + "epoch": 0.04092827004219409, + "grad_norm": 0.4998455345630646, + "learning_rate": 0.0015, + "loss": 2.5425, + "step": 388 + }, + { + "epoch": 0.041033755274261605, + "grad_norm": 0.6637672185897827, + "learning_rate": 0.0015, + "loss": 2.5436, + "step": 389 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.8620326519012451, + "learning_rate": 0.0015, + "loss": 2.5473, + "step": 390 + }, + { + "epoch": 0.041244725738396625, + "grad_norm": 0.8207728266716003, + "learning_rate": 0.0015, + "loss": 2.5478, + "step": 391 + }, + { + "epoch": 0.04135021097046414, + "grad_norm": 0.6227527260780334, + "learning_rate": 0.0015, + "loss": 2.5378, + "step": 392 + }, + { + "epoch": 0.041455696202531644, + "grad_norm": 0.8634827136993408, + "learning_rate": 0.0015, + "loss": 2.536, + "step": 393 + }, + { + "epoch": 0.04156118143459916, + "grad_norm": 0.934406578540802, + "learning_rate": 0.0015, + "loss": 2.5233, + "step": 394 + }, + { + "epoch": 0.041666666666666664, + "grad_norm": 1.0203802585601807, + "learning_rate": 0.0015, + "loss": 2.5296, + "step": 395 + }, + { + "epoch": 0.04177215189873418, + "grad_norm": 1.086199402809143, + "learning_rate": 0.0015, + "loss": 2.55, + "step": 396 + }, + { + "epoch": 0.04187763713080169, + "grad_norm": 0.6120628714561462, + "learning_rate": 0.0015, + "loss": 2.5153, + "step": 397 + }, + { + "epoch": 0.0419831223628692, + "grad_norm": 1.0643750429153442, + "learning_rate": 0.0015, + "loss": 2.5341, + "step": 398 + }, + { + "epoch": 0.04208860759493671, + "grad_norm": 1.1236447095870972, + "learning_rate": 0.0015, + "loss": 2.5576, + "step": 399 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.6694768071174622, + "learning_rate": 0.0015, + "loss": 2.548, + "step": 400 + }, + { + "epoch": 0.04229957805907173, + "grad_norm": 0.772110641002655, + "learning_rate": 0.0015, + "loss": 2.5275, + "step": 401 + }, + { + "epoch": 0.04240506329113924, + "grad_norm": 0.7637317776679993, + "learning_rate": 0.0015, + "loss": 2.5056, + "step": 402 + }, + { + "epoch": 0.04251054852320675, + "grad_norm": 0.6758496761322021, + "learning_rate": 0.0015, + "loss": 2.5498, + "step": 403 + }, + { + "epoch": 0.042616033755274264, + "grad_norm": 0.7265697717666626, + "learning_rate": 0.0015, + "loss": 2.5457, + "step": 404 + }, + { + "epoch": 0.04272151898734177, + "grad_norm": 0.5827400088310242, + "learning_rate": 0.0015, + "loss": 2.4919, + "step": 405 + }, + { + "epoch": 0.042827004219409284, + "grad_norm": 0.6745532155036926, + "learning_rate": 0.0015, + "loss": 2.5369, + "step": 406 + }, + { + "epoch": 0.04293248945147679, + "grad_norm": 0.5512591004371643, + "learning_rate": 0.0015, + "loss": 2.5534, + "step": 407 + }, + { + "epoch": 0.043037974683544304, + "grad_norm": 0.6537434458732605, + "learning_rate": 0.0015, + "loss": 2.52, + "step": 408 + }, + { + "epoch": 0.04314345991561182, + "grad_norm": 0.8097763657569885, + "learning_rate": 0.0015, + "loss": 2.5116, + "step": 409 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8432343006134033, + "learning_rate": 0.0015, + "loss": 2.5115, + "step": 410 + }, + { + "epoch": 0.04335443037974684, + "grad_norm": 0.6861345767974854, + "learning_rate": 0.0015, + "loss": 2.4932, + "step": 411 + }, + { + "epoch": 0.043459915611814344, + "grad_norm": 0.6173989176750183, + "learning_rate": 0.0015, + "loss": 2.5086, + "step": 412 + }, + { + "epoch": 0.04356540084388186, + "grad_norm": 0.5517957806587219, + "learning_rate": 0.0015, + "loss": 2.5073, + "step": 413 + }, + { + "epoch": 0.043670886075949364, + "grad_norm": 0.5692659020423889, + "learning_rate": 0.0015, + "loss": 2.4853, + "step": 414 + }, + { + "epoch": 0.04377637130801688, + "grad_norm": 0.5736432075500488, + "learning_rate": 0.0015, + "loss": 2.5176, + "step": 415 + }, + { + "epoch": 0.04388185654008439, + "grad_norm": 0.5319284796714783, + "learning_rate": 0.0015, + "loss": 2.4839, + "step": 416 + }, + { + "epoch": 0.0439873417721519, + "grad_norm": 0.5271692276000977, + "learning_rate": 0.0015, + "loss": 2.5005, + "step": 417 + }, + { + "epoch": 0.04409282700421941, + "grad_norm": 0.5658661723136902, + "learning_rate": 0.0015, + "loss": 2.5209, + "step": 418 + }, + { + "epoch": 0.04419831223628692, + "grad_norm": 0.5643138289451599, + "learning_rate": 0.0015, + "loss": 2.5047, + "step": 419 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.6114088892936707, + "learning_rate": 0.0015, + "loss": 2.4671, + "step": 420 + }, + { + "epoch": 0.044409282700421944, + "grad_norm": 0.7342972755432129, + "learning_rate": 0.0015, + "loss": 2.4929, + "step": 421 + }, + { + "epoch": 0.04451476793248945, + "grad_norm": 0.6202294230461121, + "learning_rate": 0.0015, + "loss": 2.4719, + "step": 422 + }, + { + "epoch": 0.044620253164556964, + "grad_norm": 0.5686100125312805, + "learning_rate": 0.0015, + "loss": 2.482, + "step": 423 + }, + { + "epoch": 0.04472573839662447, + "grad_norm": 0.7075520753860474, + "learning_rate": 0.0015, + "loss": 2.4976, + "step": 424 + }, + { + "epoch": 0.044831223628691984, + "grad_norm": 0.8364173173904419, + "learning_rate": 0.0015, + "loss": 2.4605, + "step": 425 + }, + { + "epoch": 0.04493670886075949, + "grad_norm": 0.6680495142936707, + "learning_rate": 0.0015, + "loss": 2.4886, + "step": 426 + }, + { + "epoch": 0.045042194092827004, + "grad_norm": 0.5401420593261719, + "learning_rate": 0.0015, + "loss": 2.4795, + "step": 427 + }, + { + "epoch": 0.04514767932489452, + "grad_norm": 0.5875886678695679, + "learning_rate": 0.0015, + "loss": 2.4747, + "step": 428 + }, + { + "epoch": 0.045253164556962024, + "grad_norm": 0.5946866869926453, + "learning_rate": 0.0015, + "loss": 2.4539, + "step": 429 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.5908014178276062, + "learning_rate": 0.0015, + "loss": 2.4399, + "step": 430 + }, + { + "epoch": 0.045464135021097044, + "grad_norm": 0.7210457921028137, + "learning_rate": 0.0015, + "loss": 2.4281, + "step": 431 + }, + { + "epoch": 0.04556962025316456, + "grad_norm": 0.6678378582000732, + "learning_rate": 0.0015, + "loss": 2.4607, + "step": 432 + }, + { + "epoch": 0.04567510548523207, + "grad_norm": 0.6293447613716125, + "learning_rate": 0.0015, + "loss": 2.4535, + "step": 433 + }, + { + "epoch": 0.04578059071729958, + "grad_norm": 0.5875449180603027, + "learning_rate": 0.0015, + "loss": 2.4784, + "step": 434 + }, + { + "epoch": 0.04588607594936709, + "grad_norm": 0.5475998520851135, + "learning_rate": 0.0015, + "loss": 2.4629, + "step": 435 + }, + { + "epoch": 0.0459915611814346, + "grad_norm": 0.5864022970199585, + "learning_rate": 0.0015, + "loss": 2.466, + "step": 436 + }, + { + "epoch": 0.04609704641350211, + "grad_norm": 0.6136614084243774, + "learning_rate": 0.0015, + "loss": 2.4491, + "step": 437 + }, + { + "epoch": 0.046202531645569624, + "grad_norm": 0.5596399307250977, + "learning_rate": 0.0015, + "loss": 2.4851, + "step": 438 + }, + { + "epoch": 0.04630801687763713, + "grad_norm": 0.617128849029541, + "learning_rate": 0.0015, + "loss": 2.4432, + "step": 439 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.6627237200737, + "learning_rate": 0.0015, + "loss": 2.4641, + "step": 440 + }, + { + "epoch": 0.04651898734177215, + "grad_norm": 0.6823180317878723, + "learning_rate": 0.0015, + "loss": 2.457, + "step": 441 + }, + { + "epoch": 0.04662447257383966, + "grad_norm": 0.6112765669822693, + "learning_rate": 0.0015, + "loss": 2.4667, + "step": 442 + }, + { + "epoch": 0.04672995780590717, + "grad_norm": 0.7016992568969727, + "learning_rate": 0.0015, + "loss": 2.4383, + "step": 443 + }, + { + "epoch": 0.04683544303797468, + "grad_norm": 0.7539768218994141, + "learning_rate": 0.0015, + "loss": 2.4564, + "step": 444 + }, + { + "epoch": 0.0469409282700422, + "grad_norm": 0.7324343323707581, + "learning_rate": 0.0015, + "loss": 2.4494, + "step": 445 + }, + { + "epoch": 0.0470464135021097, + "grad_norm": 0.5209283232688904, + "learning_rate": 0.0015, + "loss": 2.4662, + "step": 446 + }, + { + "epoch": 0.04715189873417722, + "grad_norm": 0.5297408699989319, + "learning_rate": 0.0015, + "loss": 2.4531, + "step": 447 + }, + { + "epoch": 0.04725738396624472, + "grad_norm": 0.5961151719093323, + "learning_rate": 0.0015, + "loss": 2.4268, + "step": 448 + }, + { + "epoch": 0.04736286919831224, + "grad_norm": 0.6886147260665894, + "learning_rate": 0.0015, + "loss": 2.4761, + "step": 449 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.5722856521606445, + "learning_rate": 0.0015, + "loss": 2.4366, + "step": 450 + }, + { + "epoch": 0.047573839662447256, + "grad_norm": 0.5168962478637695, + "learning_rate": 0.0015, + "loss": 2.4501, + "step": 451 + }, + { + "epoch": 0.04767932489451477, + "grad_norm": 0.646081268787384, + "learning_rate": 0.0015, + "loss": 2.4133, + "step": 452 + }, + { + "epoch": 0.047784810126582276, + "grad_norm": 0.7620195746421814, + "learning_rate": 0.0015, + "loss": 2.4844, + "step": 453 + }, + { + "epoch": 0.04789029535864979, + "grad_norm": 0.701268196105957, + "learning_rate": 0.0015, + "loss": 2.4486, + "step": 454 + }, + { + "epoch": 0.047995780590717296, + "grad_norm": 0.6020218729972839, + "learning_rate": 0.0015, + "loss": 2.4346, + "step": 455 + }, + { + "epoch": 0.04810126582278481, + "grad_norm": 0.5474327802658081, + "learning_rate": 0.0015, + "loss": 2.3995, + "step": 456 + }, + { + "epoch": 0.04820675105485232, + "grad_norm": 0.5468835830688477, + "learning_rate": 0.0015, + "loss": 2.4123, + "step": 457 + }, + { + "epoch": 0.04831223628691983, + "grad_norm": 0.5273303985595703, + "learning_rate": 0.0015, + "loss": 2.4013, + "step": 458 + }, + { + "epoch": 0.04841772151898734, + "grad_norm": 0.5838407874107361, + "learning_rate": 0.0015, + "loss": 2.4056, + "step": 459 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.5841299295425415, + "learning_rate": 0.0015, + "loss": 2.4535, + "step": 460 + }, + { + "epoch": 0.04862869198312236, + "grad_norm": 0.5731238722801208, + "learning_rate": 0.0015, + "loss": 2.4097, + "step": 461 + }, + { + "epoch": 0.048734177215189876, + "grad_norm": 0.614305317401886, + "learning_rate": 0.0015, + "loss": 2.4327, + "step": 462 + }, + { + "epoch": 0.04883966244725738, + "grad_norm": 0.666114330291748, + "learning_rate": 0.0015, + "loss": 2.4071, + "step": 463 + }, + { + "epoch": 0.048945147679324896, + "grad_norm": 0.7254586219787598, + "learning_rate": 0.0015, + "loss": 2.4103, + "step": 464 + }, + { + "epoch": 0.0490506329113924, + "grad_norm": 0.6337289214134216, + "learning_rate": 0.0015, + "loss": 2.4079, + "step": 465 + }, + { + "epoch": 0.049156118143459916, + "grad_norm": 0.5618485808372498, + "learning_rate": 0.0015, + "loss": 2.3842, + "step": 466 + }, + { + "epoch": 0.04926160337552743, + "grad_norm": 0.5461037158966064, + "learning_rate": 0.0015, + "loss": 2.4154, + "step": 467 + }, + { + "epoch": 0.049367088607594936, + "grad_norm": 0.5691366195678711, + "learning_rate": 0.0015, + "loss": 2.4129, + "step": 468 + }, + { + "epoch": 0.04947257383966245, + "grad_norm": 0.5997835993766785, + "learning_rate": 0.0015, + "loss": 2.3984, + "step": 469 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.6511602997779846, + "learning_rate": 0.0015, + "loss": 2.3911, + "step": 470 + }, + { + "epoch": 0.04968354430379747, + "grad_norm": 0.6190035343170166, + "learning_rate": 0.0015, + "loss": 2.4017, + "step": 471 + }, + { + "epoch": 0.049789029535864976, + "grad_norm": 0.5229429602622986, + "learning_rate": 0.0015, + "loss": 2.4059, + "step": 472 + }, + { + "epoch": 0.04989451476793249, + "grad_norm": 0.6145848631858826, + "learning_rate": 0.0015, + "loss": 2.4286, + "step": 473 + }, + { + "epoch": 0.05, + "grad_norm": 0.7568457126617432, + "learning_rate": 0.0015, + "loss": 2.4268, + "step": 474 + }, + { + "epoch": 0.05010548523206751, + "grad_norm": 0.698989987373352, + "learning_rate": 0.0015, + "loss": 2.4168, + "step": 475 + }, + { + "epoch": 0.05021097046413502, + "grad_norm": 0.6458556652069092, + "learning_rate": 0.0015, + "loss": 2.4086, + "step": 476 + }, + { + "epoch": 0.05031645569620253, + "grad_norm": 0.6524352431297302, + "learning_rate": 0.0015, + "loss": 2.3677, + "step": 477 + }, + { + "epoch": 0.05042194092827004, + "grad_norm": 0.6493321061134338, + "learning_rate": 0.0015, + "loss": 2.4448, + "step": 478 + }, + { + "epoch": 0.050527426160337556, + "grad_norm": 0.6462939977645874, + "learning_rate": 0.0015, + "loss": 2.3956, + "step": 479 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.6222774386405945, + "learning_rate": 0.0015, + "loss": 2.4013, + "step": 480 + }, + { + "epoch": 0.050738396624472576, + "grad_norm": 0.658645510673523, + "learning_rate": 0.0015, + "loss": 2.3639, + "step": 481 + }, + { + "epoch": 0.05084388185654008, + "grad_norm": 0.6878094673156738, + "learning_rate": 0.0015, + "loss": 2.4076, + "step": 482 + }, + { + "epoch": 0.050949367088607596, + "grad_norm": 0.8742771148681641, + "learning_rate": 0.0015, + "loss": 2.4382, + "step": 483 + }, + { + "epoch": 0.0510548523206751, + "grad_norm": 1.0188567638397217, + "learning_rate": 0.0015, + "loss": 2.402, + "step": 484 + }, + { + "epoch": 0.051160337552742616, + "grad_norm": 0.9459471702575684, + "learning_rate": 0.0015, + "loss": 2.3598, + "step": 485 + }, + { + "epoch": 0.05126582278481013, + "grad_norm": 0.5860833525657654, + "learning_rate": 0.0015, + "loss": 2.3536, + "step": 486 + }, + { + "epoch": 0.051371308016877636, + "grad_norm": 0.8695175647735596, + "learning_rate": 0.0015, + "loss": 2.4133, + "step": 487 + }, + { + "epoch": 0.05147679324894515, + "grad_norm": 1.435496211051941, + "learning_rate": 0.0015, + "loss": 2.3908, + "step": 488 + }, + { + "epoch": 0.051582278481012656, + "grad_norm": 0.6248399615287781, + "learning_rate": 0.0015, + "loss": 2.3795, + "step": 489 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 1.3923648595809937, + "learning_rate": 0.0015, + "loss": 2.4195, + "step": 490 + }, + { + "epoch": 0.05179324894514768, + "grad_norm": 0.8495714068412781, + "learning_rate": 0.0015, + "loss": 2.4117, + "step": 491 + }, + { + "epoch": 0.05189873417721519, + "grad_norm": 0.7323932647705078, + "learning_rate": 0.0015, + "loss": 2.3952, + "step": 492 + }, + { + "epoch": 0.0520042194092827, + "grad_norm": 1.0794618129730225, + "learning_rate": 0.0015, + "loss": 2.391, + "step": 493 + }, + { + "epoch": 0.05210970464135021, + "grad_norm": 0.7117097973823547, + "learning_rate": 0.0015, + "loss": 2.3928, + "step": 494 + }, + { + "epoch": 0.05221518987341772, + "grad_norm": 0.5951404571533203, + "learning_rate": 0.0015, + "loss": 2.3742, + "step": 495 + }, + { + "epoch": 0.05232067510548523, + "grad_norm": 0.7290152907371521, + "learning_rate": 0.0015, + "loss": 2.3557, + "step": 496 + }, + { + "epoch": 0.05242616033755274, + "grad_norm": 0.7287200689315796, + "learning_rate": 0.0015, + "loss": 2.3915, + "step": 497 + }, + { + "epoch": 0.052531645569620256, + "grad_norm": 0.5445609092712402, + "learning_rate": 0.0015, + "loss": 2.3464, + "step": 498 + }, + { + "epoch": 0.05263713080168776, + "grad_norm": 0.6681470274925232, + "learning_rate": 0.0015, + "loss": 2.3818, + "step": 499 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.8860105276107788, + "learning_rate": 0.0015, + "loss": 2.3743, + "step": 500 + }, + { + "epoch": 0.05284810126582278, + "grad_norm": 0.7466394305229187, + "learning_rate": 0.0015, + "loss": 2.3796, + "step": 501 + }, + { + "epoch": 0.052953586497890295, + "grad_norm": 0.5135951638221741, + "learning_rate": 0.0015, + "loss": 2.4019, + "step": 502 + }, + { + "epoch": 0.05305907172995781, + "grad_norm": 0.7751052975654602, + "learning_rate": 0.0015, + "loss": 2.3433, + "step": 503 + }, + { + "epoch": 0.053164556962025315, + "grad_norm": 0.8892273902893066, + "learning_rate": 0.0015, + "loss": 2.3632, + "step": 504 + }, + { + "epoch": 0.05327004219409283, + "grad_norm": 0.6392226815223694, + "learning_rate": 0.0015, + "loss": 2.3638, + "step": 505 + }, + { + "epoch": 0.053375527426160335, + "grad_norm": 0.7267099618911743, + "learning_rate": 0.0015, + "loss": 2.4042, + "step": 506 + }, + { + "epoch": 0.05348101265822785, + "grad_norm": 0.7698792815208435, + "learning_rate": 0.0015, + "loss": 2.3684, + "step": 507 + }, + { + "epoch": 0.05358649789029536, + "grad_norm": 0.5992477536201477, + "learning_rate": 0.0015, + "loss": 2.3566, + "step": 508 + }, + { + "epoch": 0.05369198312236287, + "grad_norm": 0.67427659034729, + "learning_rate": 0.0015, + "loss": 2.3174, + "step": 509 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.6546165943145752, + "learning_rate": 0.0015, + "loss": 2.3764, + "step": 510 + }, + { + "epoch": 0.05390295358649789, + "grad_norm": 0.5334204435348511, + "learning_rate": 0.0015, + "loss": 2.3481, + "step": 511 + }, + { + "epoch": 0.0540084388185654, + "grad_norm": 0.807060718536377, + "learning_rate": 0.0015, + "loss": 2.3659, + "step": 512 + }, + { + "epoch": 0.05411392405063291, + "grad_norm": 0.6117734313011169, + "learning_rate": 0.0015, + "loss": 2.3503, + "step": 513 + }, + { + "epoch": 0.05421940928270042, + "grad_norm": 0.6080499887466431, + "learning_rate": 0.0015, + "loss": 2.3636, + "step": 514 + }, + { + "epoch": 0.054324894514767935, + "grad_norm": 0.7046027779579163, + "learning_rate": 0.0015, + "loss": 2.328, + "step": 515 + }, + { + "epoch": 0.05443037974683544, + "grad_norm": 0.6732563376426697, + "learning_rate": 0.0015, + "loss": 2.3559, + "step": 516 + }, + { + "epoch": 0.054535864978902955, + "grad_norm": 0.6331255435943604, + "learning_rate": 0.0015, + "loss": 2.3518, + "step": 517 + }, + { + "epoch": 0.05464135021097046, + "grad_norm": 0.560720682144165, + "learning_rate": 0.0015, + "loss": 2.3017, + "step": 518 + }, + { + "epoch": 0.054746835443037975, + "grad_norm": 0.6451405882835388, + "learning_rate": 0.0015, + "loss": 2.3548, + "step": 519 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.7499489784240723, + "learning_rate": 0.0015, + "loss": 2.3601, + "step": 520 + }, + { + "epoch": 0.054957805907172995, + "grad_norm": 0.7935493588447571, + "learning_rate": 0.0015, + "loss": 2.3588, + "step": 521 + }, + { + "epoch": 0.05506329113924051, + "grad_norm": 0.6315867900848389, + "learning_rate": 0.0015, + "loss": 2.3242, + "step": 522 + }, + { + "epoch": 0.055168776371308015, + "grad_norm": 0.5828781127929688, + "learning_rate": 0.0015, + "loss": 2.3563, + "step": 523 + }, + { + "epoch": 0.05527426160337553, + "grad_norm": 0.8364554047584534, + "learning_rate": 0.0015, + "loss": 2.3566, + "step": 524 + }, + { + "epoch": 0.055379746835443035, + "grad_norm": 0.6991103291511536, + "learning_rate": 0.0015, + "loss": 2.335, + "step": 525 + }, + { + "epoch": 0.05548523206751055, + "grad_norm": 0.5167779922485352, + "learning_rate": 0.0015, + "loss": 2.3269, + "step": 526 + }, + { + "epoch": 0.05559071729957806, + "grad_norm": 0.7246729135513306, + "learning_rate": 0.0015, + "loss": 2.3199, + "step": 527 + }, + { + "epoch": 0.05569620253164557, + "grad_norm": 0.6558444499969482, + "learning_rate": 0.0015, + "loss": 2.3397, + "step": 528 + }, + { + "epoch": 0.05580168776371308, + "grad_norm": 0.5521800518035889, + "learning_rate": 0.0015, + "loss": 2.3423, + "step": 529 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.5958268046379089, + "learning_rate": 0.0015, + "loss": 2.3282, + "step": 530 + }, + { + "epoch": 0.0560126582278481, + "grad_norm": 0.7040470838546753, + "learning_rate": 0.0015, + "loss": 2.3378, + "step": 531 + }, + { + "epoch": 0.056118143459915615, + "grad_norm": 0.665379524230957, + "learning_rate": 0.0015, + "loss": 2.2979, + "step": 532 + }, + { + "epoch": 0.05622362869198312, + "grad_norm": 0.5010139346122742, + "learning_rate": 0.0015, + "loss": 2.3223, + "step": 533 + }, + { + "epoch": 0.056329113924050635, + "grad_norm": 0.5269133448600769, + "learning_rate": 0.0015, + "loss": 2.317, + "step": 534 + }, + { + "epoch": 0.05643459915611814, + "grad_norm": 0.5116428732872009, + "learning_rate": 0.0015, + "loss": 2.3151, + "step": 535 + }, + { + "epoch": 0.056540084388185655, + "grad_norm": 0.5242851376533508, + "learning_rate": 0.0015, + "loss": 2.3138, + "step": 536 + }, + { + "epoch": 0.05664556962025316, + "grad_norm": 0.5800849795341492, + "learning_rate": 0.0015, + "loss": 2.2844, + "step": 537 + }, + { + "epoch": 0.056751054852320675, + "grad_norm": 0.5497094392776489, + "learning_rate": 0.0015, + "loss": 2.3159, + "step": 538 + }, + { + "epoch": 0.05685654008438819, + "grad_norm": 0.4886211156845093, + "learning_rate": 0.0015, + "loss": 2.3201, + "step": 539 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.636679470539093, + "learning_rate": 0.0015, + "loss": 2.3284, + "step": 540 + }, + { + "epoch": 0.05706751054852321, + "grad_norm": 0.525220513343811, + "learning_rate": 0.0015, + "loss": 2.3409, + "step": 541 + }, + { + "epoch": 0.057172995780590714, + "grad_norm": 0.6222096085548401, + "learning_rate": 0.0015, + "loss": 2.3222, + "step": 542 + }, + { + "epoch": 0.05727848101265823, + "grad_norm": 0.592156708240509, + "learning_rate": 0.0015, + "loss": 2.2913, + "step": 543 + }, + { + "epoch": 0.05738396624472574, + "grad_norm": 0.6042721271514893, + "learning_rate": 0.0015, + "loss": 2.3167, + "step": 544 + }, + { + "epoch": 0.05748945147679325, + "grad_norm": 0.5723241567611694, + "learning_rate": 0.0015, + "loss": 2.3211, + "step": 545 + }, + { + "epoch": 0.05759493670886076, + "grad_norm": 0.5514154434204102, + "learning_rate": 0.0015, + "loss": 2.314, + "step": 546 + }, + { + "epoch": 0.05770042194092827, + "grad_norm": 0.5217574238777161, + "learning_rate": 0.0015, + "loss": 2.3222, + "step": 547 + }, + { + "epoch": 0.05780590717299578, + "grad_norm": 0.5978240966796875, + "learning_rate": 0.0015, + "loss": 2.3298, + "step": 548 + }, + { + "epoch": 0.057911392405063294, + "grad_norm": 0.5819392800331116, + "learning_rate": 0.0015, + "loss": 2.3219, + "step": 549 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.5390170812606812, + "learning_rate": 0.0015, + "loss": 2.2765, + "step": 550 + }, + { + "epoch": 0.058122362869198314, + "grad_norm": 0.6666958332061768, + "learning_rate": 0.0015, + "loss": 2.2777, + "step": 551 + }, + { + "epoch": 0.05822784810126582, + "grad_norm": 0.8040550947189331, + "learning_rate": 0.0015, + "loss": 2.3051, + "step": 552 + }, + { + "epoch": 0.058333333333333334, + "grad_norm": 0.8092217445373535, + "learning_rate": 0.0015, + "loss": 2.2997, + "step": 553 + }, + { + "epoch": 0.05843881856540084, + "grad_norm": 0.7098361253738403, + "learning_rate": 0.0015, + "loss": 2.247, + "step": 554 + }, + { + "epoch": 0.058544303797468354, + "grad_norm": 0.5931481719017029, + "learning_rate": 0.0015, + "loss": 2.2511, + "step": 555 + }, + { + "epoch": 0.05864978902953587, + "grad_norm": 0.6600017547607422, + "learning_rate": 0.0015, + "loss": 2.306, + "step": 556 + }, + { + "epoch": 0.058755274261603374, + "grad_norm": 0.7226420044898987, + "learning_rate": 0.0015, + "loss": 2.3279, + "step": 557 + }, + { + "epoch": 0.05886075949367089, + "grad_norm": 0.854891300201416, + "learning_rate": 0.0015, + "loss": 2.2823, + "step": 558 + }, + { + "epoch": 0.058966244725738394, + "grad_norm": 0.7509938478469849, + "learning_rate": 0.0015, + "loss": 2.2597, + "step": 559 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.5163098573684692, + "learning_rate": 0.0015, + "loss": 2.2749, + "step": 560 + }, + { + "epoch": 0.05917721518987342, + "grad_norm": 0.752998948097229, + "learning_rate": 0.0015, + "loss": 2.2796, + "step": 561 + }, + { + "epoch": 0.05928270042194093, + "grad_norm": 0.7569834589958191, + "learning_rate": 0.0015, + "loss": 2.286, + "step": 562 + }, + { + "epoch": 0.05938818565400844, + "grad_norm": 0.5669600367546082, + "learning_rate": 0.0015, + "loss": 2.3074, + "step": 563 + }, + { + "epoch": 0.05949367088607595, + "grad_norm": 0.6263278126716614, + "learning_rate": 0.0015, + "loss": 2.3007, + "step": 564 + }, + { + "epoch": 0.05959915611814346, + "grad_norm": 0.7167249321937561, + "learning_rate": 0.0015, + "loss": 2.295, + "step": 565 + }, + { + "epoch": 0.05970464135021097, + "grad_norm": 0.657244086265564, + "learning_rate": 0.0015, + "loss": 2.3088, + "step": 566 + }, + { + "epoch": 0.05981012658227848, + "grad_norm": 0.5568730235099792, + "learning_rate": 0.0015, + "loss": 2.3145, + "step": 567 + }, + { + "epoch": 0.059915611814345994, + "grad_norm": 0.6420565843582153, + "learning_rate": 0.0015, + "loss": 2.2538, + "step": 568 + }, + { + "epoch": 0.0600210970464135, + "grad_norm": 0.6171932816505432, + "learning_rate": 0.0015, + "loss": 2.2294, + "step": 569 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.630375862121582, + "learning_rate": 0.0015, + "loss": 2.3236, + "step": 570 + }, + { + "epoch": 0.06023206751054852, + "grad_norm": 0.5438060760498047, + "learning_rate": 0.0015, + "loss": 2.2984, + "step": 571 + }, + { + "epoch": 0.060337552742616034, + "grad_norm": 0.5490220785140991, + "learning_rate": 0.0015, + "loss": 2.2881, + "step": 572 + }, + { + "epoch": 0.06044303797468355, + "grad_norm": 0.6096124053001404, + "learning_rate": 0.0015, + "loss": 2.3037, + "step": 573 + }, + { + "epoch": 0.060548523206751054, + "grad_norm": 0.5740655660629272, + "learning_rate": 0.0015, + "loss": 2.2615, + "step": 574 + }, + { + "epoch": 0.06065400843881857, + "grad_norm": 0.5798143744468689, + "learning_rate": 0.0015, + "loss": 2.3062, + "step": 575 + }, + { + "epoch": 0.060759493670886074, + "grad_norm": 0.6170733571052551, + "learning_rate": 0.0015, + "loss": 2.2874, + "step": 576 + }, + { + "epoch": 0.06086497890295359, + "grad_norm": 0.5171892046928406, + "learning_rate": 0.0015, + "loss": 2.2718, + "step": 577 + }, + { + "epoch": 0.0609704641350211, + "grad_norm": 0.6215243935585022, + "learning_rate": 0.0015, + "loss": 2.2765, + "step": 578 + }, + { + "epoch": 0.06107594936708861, + "grad_norm": 0.7542302012443542, + "learning_rate": 0.0015, + "loss": 2.2812, + "step": 579 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.7645319104194641, + "learning_rate": 0.0015, + "loss": 2.289, + "step": 580 + }, + { + "epoch": 0.06128691983122363, + "grad_norm": 0.7993782758712769, + "learning_rate": 0.0015, + "loss": 2.284, + "step": 581 + }, + { + "epoch": 0.06139240506329114, + "grad_norm": 0.7777169346809387, + "learning_rate": 0.0015, + "loss": 2.2568, + "step": 582 + }, + { + "epoch": 0.06149789029535865, + "grad_norm": 0.5615309476852417, + "learning_rate": 0.0015, + "loss": 2.2898, + "step": 583 + }, + { + "epoch": 0.06160337552742616, + "grad_norm": 0.6185851693153381, + "learning_rate": 0.0015, + "loss": 2.2565, + "step": 584 + }, + { + "epoch": 0.061708860759493674, + "grad_norm": 0.7018452882766724, + "learning_rate": 0.0015, + "loss": 2.2963, + "step": 585 + }, + { + "epoch": 0.06181434599156118, + "grad_norm": 0.6029337048530579, + "learning_rate": 0.0015, + "loss": 2.2682, + "step": 586 + }, + { + "epoch": 0.061919831223628694, + "grad_norm": 0.6678157448768616, + "learning_rate": 0.0015, + "loss": 2.2443, + "step": 587 + }, + { + "epoch": 0.0620253164556962, + "grad_norm": 0.7533937096595764, + "learning_rate": 0.0015, + "loss": 2.2334, + "step": 588 + }, + { + "epoch": 0.06213080168776371, + "grad_norm": 0.7184306979179382, + "learning_rate": 0.0015, + "loss": 2.2771, + "step": 589 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.7327088117599487, + "learning_rate": 0.0015, + "loss": 2.2224, + "step": 590 + }, + { + "epoch": 0.06234177215189873, + "grad_norm": 0.6438800692558289, + "learning_rate": 0.0015, + "loss": 2.2418, + "step": 591 + }, + { + "epoch": 0.06244725738396625, + "grad_norm": 0.5882759094238281, + "learning_rate": 0.0015, + "loss": 2.2468, + "step": 592 + }, + { + "epoch": 0.06255274261603376, + "grad_norm": 0.5381282567977905, + "learning_rate": 0.0015, + "loss": 2.2718, + "step": 593 + }, + { + "epoch": 0.06265822784810127, + "grad_norm": 0.7376332879066467, + "learning_rate": 0.0015, + "loss": 2.23, + "step": 594 + }, + { + "epoch": 0.06276371308016877, + "grad_norm": 0.8881618976593018, + "learning_rate": 0.0015, + "loss": 2.2605, + "step": 595 + }, + { + "epoch": 0.0628691983122363, + "grad_norm": 0.576248049736023, + "learning_rate": 0.0015, + "loss": 2.2125, + "step": 596 + }, + { + "epoch": 0.0629746835443038, + "grad_norm": 0.5991083979606628, + "learning_rate": 0.0015, + "loss": 2.2441, + "step": 597 + }, + { + "epoch": 0.0630801687763713, + "grad_norm": 0.6734524369239807, + "learning_rate": 0.0015, + "loss": 2.2927, + "step": 598 + }, + { + "epoch": 0.06318565400843881, + "grad_norm": 0.5658050179481506, + "learning_rate": 0.0015, + "loss": 2.2358, + "step": 599 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.5681853294372559, + "learning_rate": 0.0015, + "loss": 2.2486, + "step": 600 + }, + { + "epoch": 0.06339662447257384, + "grad_norm": 0.7272197008132935, + "learning_rate": 0.0015, + "loss": 2.2683, + "step": 601 + }, + { + "epoch": 0.06350210970464135, + "grad_norm": 0.5892140865325928, + "learning_rate": 0.0015, + "loss": 2.2398, + "step": 602 + }, + { + "epoch": 0.06360759493670887, + "grad_norm": 0.545028567314148, + "learning_rate": 0.0015, + "loss": 2.263, + "step": 603 + }, + { + "epoch": 0.06371308016877637, + "grad_norm": 0.5640975832939148, + "learning_rate": 0.0015, + "loss": 2.2047, + "step": 604 + }, + { + "epoch": 0.06381856540084388, + "grad_norm": 0.6265382766723633, + "learning_rate": 0.0015, + "loss": 2.2459, + "step": 605 + }, + { + "epoch": 0.06392405063291139, + "grad_norm": 0.6744657754898071, + "learning_rate": 0.0015, + "loss": 2.2274, + "step": 606 + }, + { + "epoch": 0.0640295358649789, + "grad_norm": 0.46932274103164673, + "learning_rate": 0.0015, + "loss": 2.2616, + "step": 607 + }, + { + "epoch": 0.06413502109704641, + "grad_norm": 0.6074360013008118, + "learning_rate": 0.0015, + "loss": 2.2918, + "step": 608 + }, + { + "epoch": 0.06424050632911392, + "grad_norm": 0.5361587405204773, + "learning_rate": 0.0015, + "loss": 2.2424, + "step": 609 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.5556535124778748, + "learning_rate": 0.0015, + "loss": 2.2512, + "step": 610 + }, + { + "epoch": 0.06445147679324895, + "grad_norm": 0.5393496751785278, + "learning_rate": 0.0015, + "loss": 2.2317, + "step": 611 + }, + { + "epoch": 0.06455696202531645, + "grad_norm": 0.5839556455612183, + "learning_rate": 0.0015, + "loss": 2.239, + "step": 612 + }, + { + "epoch": 0.06466244725738397, + "grad_norm": 0.5499026775360107, + "learning_rate": 0.0015, + "loss": 2.2222, + "step": 613 + }, + { + "epoch": 0.06476793248945148, + "grad_norm": 0.569526731967926, + "learning_rate": 0.0015, + "loss": 2.1879, + "step": 614 + }, + { + "epoch": 0.06487341772151899, + "grad_norm": 0.49739882349967957, + "learning_rate": 0.0015, + "loss": 2.24, + "step": 615 + }, + { + "epoch": 0.06497890295358649, + "grad_norm": 0.5533305406570435, + "learning_rate": 0.0015, + "loss": 2.2358, + "step": 616 + }, + { + "epoch": 0.06508438818565401, + "grad_norm": 0.739514172077179, + "learning_rate": 0.0015, + "loss": 2.2399, + "step": 617 + }, + { + "epoch": 0.06518987341772152, + "grad_norm": 0.7596186399459839, + "learning_rate": 0.0015, + "loss": 2.2509, + "step": 618 + }, + { + "epoch": 0.06529535864978903, + "grad_norm": 0.6795740127563477, + "learning_rate": 0.0015, + "loss": 2.2705, + "step": 619 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.5516452789306641, + "learning_rate": 0.0015, + "loss": 2.2454, + "step": 620 + }, + { + "epoch": 0.06550632911392405, + "grad_norm": 0.5583570003509521, + "learning_rate": 0.0015, + "loss": 2.2198, + "step": 621 + }, + { + "epoch": 0.06561181434599156, + "grad_norm": 0.6461924910545349, + "learning_rate": 0.0015, + "loss": 2.2505, + "step": 622 + }, + { + "epoch": 0.06571729957805907, + "grad_norm": 0.8149744868278503, + "learning_rate": 0.0015, + "loss": 2.2306, + "step": 623 + }, + { + "epoch": 0.06582278481012659, + "grad_norm": 0.7241102457046509, + "learning_rate": 0.0015, + "loss": 2.1932, + "step": 624 + }, + { + "epoch": 0.06592827004219409, + "grad_norm": 0.7473236918449402, + "learning_rate": 0.0015, + "loss": 2.223, + "step": 625 + }, + { + "epoch": 0.0660337552742616, + "grad_norm": 0.9002881646156311, + "learning_rate": 0.0015, + "loss": 2.2247, + "step": 626 + }, + { + "epoch": 0.06613924050632912, + "grad_norm": 0.806682288646698, + "learning_rate": 0.0015, + "loss": 2.2434, + "step": 627 + }, + { + "epoch": 0.06624472573839663, + "grad_norm": 0.5749506950378418, + "learning_rate": 0.0015, + "loss": 2.2224, + "step": 628 + }, + { + "epoch": 0.06635021097046413, + "grad_norm": 0.6432097554206848, + "learning_rate": 0.0015, + "loss": 2.2443, + "step": 629 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.7201072573661804, + "learning_rate": 0.0015, + "loss": 2.1948, + "step": 630 + }, + { + "epoch": 0.06656118143459916, + "grad_norm": 0.6522536873817444, + "learning_rate": 0.0015, + "loss": 2.1953, + "step": 631 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 0.5203250646591187, + "learning_rate": 0.0015, + "loss": 2.2295, + "step": 632 + }, + { + "epoch": 0.06677215189873417, + "grad_norm": 0.8893463015556335, + "learning_rate": 0.0015, + "loss": 2.2317, + "step": 633 + }, + { + "epoch": 0.06687763713080169, + "grad_norm": 1.216801643371582, + "learning_rate": 0.0015, + "loss": 2.2222, + "step": 634 + }, + { + "epoch": 0.0669831223628692, + "grad_norm": 0.5740790963172913, + "learning_rate": 0.0015, + "loss": 2.2802, + "step": 635 + }, + { + "epoch": 0.0670886075949367, + "grad_norm": 0.9687669277191162, + "learning_rate": 0.0015, + "loss": 2.2258, + "step": 636 + }, + { + "epoch": 0.06719409282700423, + "grad_norm": 1.0738919973373413, + "learning_rate": 0.0015, + "loss": 2.2869, + "step": 637 + }, + { + "epoch": 0.06729957805907173, + "grad_norm": 0.5578457117080688, + "learning_rate": 0.0015, + "loss": 2.2387, + "step": 638 + }, + { + "epoch": 0.06740506329113924, + "grad_norm": 1.198937177658081, + "learning_rate": 0.0015, + "loss": 2.2381, + "step": 639 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.7291653752326965, + "learning_rate": 0.0015, + "loss": 2.2371, + "step": 640 + }, + { + "epoch": 0.06761603375527427, + "grad_norm": 0.6903422474861145, + "learning_rate": 0.0015, + "loss": 2.219, + "step": 641 + }, + { + "epoch": 0.06772151898734177, + "grad_norm": 0.8727262020111084, + "learning_rate": 0.0015, + "loss": 2.2328, + "step": 642 + }, + { + "epoch": 0.06782700421940928, + "grad_norm": 0.6038119196891785, + "learning_rate": 0.0015, + "loss": 2.2282, + "step": 643 + }, + { + "epoch": 0.0679324894514768, + "grad_norm": 0.6179218888282776, + "learning_rate": 0.0015, + "loss": 2.1871, + "step": 644 + }, + { + "epoch": 0.0680379746835443, + "grad_norm": 0.8188096880912781, + "learning_rate": 0.0015, + "loss": 2.1958, + "step": 645 + }, + { + "epoch": 0.06814345991561181, + "grad_norm": 0.5943591594696045, + "learning_rate": 0.0015, + "loss": 2.1807, + "step": 646 + }, + { + "epoch": 0.06824894514767932, + "grad_norm": 0.603207528591156, + "learning_rate": 0.0015, + "loss": 2.2571, + "step": 647 + }, + { + "epoch": 0.06835443037974684, + "grad_norm": 0.8463202714920044, + "learning_rate": 0.0015, + "loss": 2.2727, + "step": 648 + }, + { + "epoch": 0.06845991561181435, + "grad_norm": 0.732502818107605, + "learning_rate": 0.0015, + "loss": 2.2261, + "step": 649 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.5361315608024597, + "learning_rate": 0.0015, + "loss": 2.2414, + "step": 650 + }, + { + "epoch": 0.06867088607594937, + "grad_norm": 0.7492843866348267, + "learning_rate": 0.0015, + "loss": 2.2033, + "step": 651 + }, + { + "epoch": 0.06877637130801688, + "grad_norm": 0.8218656182289124, + "learning_rate": 0.0015, + "loss": 2.2163, + "step": 652 + }, + { + "epoch": 0.06888185654008439, + "grad_norm": 0.5739061236381531, + "learning_rate": 0.0015, + "loss": 2.2198, + "step": 653 + }, + { + "epoch": 0.0689873417721519, + "grad_norm": 0.7323062419891357, + "learning_rate": 0.0015, + "loss": 2.2298, + "step": 654 + }, + { + "epoch": 0.06909282700421941, + "grad_norm": 0.9246823787689209, + "learning_rate": 0.0015, + "loss": 2.2114, + "step": 655 + }, + { + "epoch": 0.06919831223628692, + "grad_norm": 0.6526539921760559, + "learning_rate": 0.0015, + "loss": 2.2249, + "step": 656 + }, + { + "epoch": 0.06930379746835443, + "grad_norm": 0.506503164768219, + "learning_rate": 0.0015, + "loss": 2.1921, + "step": 657 + }, + { + "epoch": 0.06940928270042195, + "grad_norm": 0.7326666116714478, + "learning_rate": 0.0015, + "loss": 2.1825, + "step": 658 + }, + { + "epoch": 0.06951476793248945, + "grad_norm": 0.6644753217697144, + "learning_rate": 0.0015, + "loss": 2.2153, + "step": 659 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.5262641310691833, + "learning_rate": 0.0015, + "loss": 2.1952, + "step": 660 + }, + { + "epoch": 0.06972573839662448, + "grad_norm": 0.4978020489215851, + "learning_rate": 0.0015, + "loss": 2.1827, + "step": 661 + }, + { + "epoch": 0.06983122362869199, + "grad_norm": 0.6205664873123169, + "learning_rate": 0.0015, + "loss": 2.1931, + "step": 662 + }, + { + "epoch": 0.06993670886075949, + "grad_norm": 0.6488565802574158, + "learning_rate": 0.0015, + "loss": 2.1365, + "step": 663 + }, + { + "epoch": 0.070042194092827, + "grad_norm": 0.5634413957595825, + "learning_rate": 0.0015, + "loss": 2.2136, + "step": 664 + }, + { + "epoch": 0.07014767932489452, + "grad_norm": 0.5196778178215027, + "learning_rate": 0.0015, + "loss": 2.1981, + "step": 665 + }, + { + "epoch": 0.07025316455696203, + "grad_norm": 0.6007410287857056, + "learning_rate": 0.0015, + "loss": 2.1712, + "step": 666 + }, + { + "epoch": 0.07035864978902953, + "grad_norm": 0.5979984402656555, + "learning_rate": 0.0015, + "loss": 2.2109, + "step": 667 + }, + { + "epoch": 0.07046413502109705, + "grad_norm": 0.5532450079917908, + "learning_rate": 0.0015, + "loss": 2.1711, + "step": 668 + }, + { + "epoch": 0.07056962025316456, + "grad_norm": 0.5314489603042603, + "learning_rate": 0.0015, + "loss": 2.2164, + "step": 669 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.541140615940094, + "learning_rate": 0.0015, + "loss": 2.2041, + "step": 670 + }, + { + "epoch": 0.07078059071729957, + "grad_norm": 0.6494712233543396, + "learning_rate": 0.0015, + "loss": 2.2075, + "step": 671 + }, + { + "epoch": 0.07088607594936709, + "grad_norm": 0.7607830762863159, + "learning_rate": 0.0015, + "loss": 2.1846, + "step": 672 + }, + { + "epoch": 0.0709915611814346, + "grad_norm": 0.5456535220146179, + "learning_rate": 0.0015, + "loss": 2.208, + "step": 673 + }, + { + "epoch": 0.0710970464135021, + "grad_norm": 0.5576298832893372, + "learning_rate": 0.0015, + "loss": 2.1365, + "step": 674 + }, + { + "epoch": 0.07120253164556962, + "grad_norm": 0.5766642689704895, + "learning_rate": 0.0015, + "loss": 2.1986, + "step": 675 + }, + { + "epoch": 0.07130801687763713, + "grad_norm": 0.51692134141922, + "learning_rate": 0.0015, + "loss": 2.1853, + "step": 676 + }, + { + "epoch": 0.07141350210970464, + "grad_norm": 0.517126202583313, + "learning_rate": 0.0015, + "loss": 2.1648, + "step": 677 + }, + { + "epoch": 0.07151898734177216, + "grad_norm": 0.582494854927063, + "learning_rate": 0.0015, + "loss": 2.1938, + "step": 678 + }, + { + "epoch": 0.07162447257383966, + "grad_norm": 0.5192087292671204, + "learning_rate": 0.0015, + "loss": 2.1605, + "step": 679 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.532579779624939, + "learning_rate": 0.0015, + "loss": 2.195, + "step": 680 + }, + { + "epoch": 0.07183544303797468, + "grad_norm": 0.5732529759407043, + "learning_rate": 0.0015, + "loss": 2.1955, + "step": 681 + }, + { + "epoch": 0.0719409282700422, + "grad_norm": 0.6465937495231628, + "learning_rate": 0.0015, + "loss": 2.214, + "step": 682 + }, + { + "epoch": 0.0720464135021097, + "grad_norm": 0.911175012588501, + "learning_rate": 0.0015, + "loss": 2.2002, + "step": 683 + }, + { + "epoch": 0.07215189873417721, + "grad_norm": 0.9529163837432861, + "learning_rate": 0.0015, + "loss": 2.2194, + "step": 684 + }, + { + "epoch": 0.07225738396624473, + "grad_norm": 0.6736530065536499, + "learning_rate": 0.0015, + "loss": 2.1713, + "step": 685 + }, + { + "epoch": 0.07236286919831224, + "grad_norm": 0.5988576412200928, + "learning_rate": 0.0015, + "loss": 2.2258, + "step": 686 + }, + { + "epoch": 0.07246835443037974, + "grad_norm": 0.7737632393836975, + "learning_rate": 0.0015, + "loss": 2.1885, + "step": 687 + }, + { + "epoch": 0.07257383966244725, + "grad_norm": 0.8874591588973999, + "learning_rate": 0.0015, + "loss": 2.2235, + "step": 688 + }, + { + "epoch": 0.07267932489451477, + "grad_norm": 0.7639130353927612, + "learning_rate": 0.0015, + "loss": 2.145, + "step": 689 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.5340981483459473, + "learning_rate": 0.0015, + "loss": 2.218, + "step": 690 + }, + { + "epoch": 0.07289029535864978, + "grad_norm": 0.6690497994422913, + "learning_rate": 0.0015, + "loss": 2.199, + "step": 691 + }, + { + "epoch": 0.0729957805907173, + "grad_norm": 0.7696680426597595, + "learning_rate": 0.0015, + "loss": 2.1728, + "step": 692 + }, + { + "epoch": 0.07310126582278481, + "grad_norm": 0.6508092880249023, + "learning_rate": 0.0015, + "loss": 2.2021, + "step": 693 + }, + { + "epoch": 0.07320675105485232, + "grad_norm": 0.5294222235679626, + "learning_rate": 0.0015, + "loss": 2.2278, + "step": 694 + }, + { + "epoch": 0.07331223628691984, + "grad_norm": 0.5852862000465393, + "learning_rate": 0.0015, + "loss": 2.1901, + "step": 695 + }, + { + "epoch": 0.07341772151898734, + "grad_norm": 0.7184546589851379, + "learning_rate": 0.0015, + "loss": 2.1666, + "step": 696 + }, + { + "epoch": 0.07352320675105485, + "grad_norm": 0.6798696517944336, + "learning_rate": 0.0015, + "loss": 2.1779, + "step": 697 + }, + { + "epoch": 0.07362869198312236, + "grad_norm": 0.5243903398513794, + "learning_rate": 0.0015, + "loss": 2.1461, + "step": 698 + }, + { + "epoch": 0.07373417721518988, + "grad_norm": 0.5244808197021484, + "learning_rate": 0.0015, + "loss": 2.1401, + "step": 699 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.5956200361251831, + "learning_rate": 0.0015, + "loss": 2.1455, + "step": 700 + }, + { + "epoch": 0.07394514767932489, + "grad_norm": 0.6188979744911194, + "learning_rate": 0.0015, + "loss": 2.1398, + "step": 701 + }, + { + "epoch": 0.07405063291139241, + "grad_norm": 0.5388018488883972, + "learning_rate": 0.0015, + "loss": 2.154, + "step": 702 + }, + { + "epoch": 0.07415611814345992, + "grad_norm": 0.5440879464149475, + "learning_rate": 0.0015, + "loss": 2.1976, + "step": 703 + }, + { + "epoch": 0.07426160337552742, + "grad_norm": 0.7074193954467773, + "learning_rate": 0.0015, + "loss": 2.1673, + "step": 704 + }, + { + "epoch": 0.07436708860759493, + "grad_norm": 0.675395131111145, + "learning_rate": 0.0015, + "loss": 2.1536, + "step": 705 + }, + { + "epoch": 0.07447257383966245, + "grad_norm": 0.5538883805274963, + "learning_rate": 0.0015, + "loss": 2.1816, + "step": 706 + }, + { + "epoch": 0.07457805907172996, + "grad_norm": 0.5245557427406311, + "learning_rate": 0.0015, + "loss": 2.1515, + "step": 707 + }, + { + "epoch": 0.07468354430379746, + "grad_norm": 0.5336805582046509, + "learning_rate": 0.0015, + "loss": 2.181, + "step": 708 + }, + { + "epoch": 0.07478902953586498, + "grad_norm": 0.5505400896072388, + "learning_rate": 0.0015, + "loss": 2.1744, + "step": 709 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.5089576840400696, + "learning_rate": 0.0015, + "loss": 2.1609, + "step": 710 + }, + { + "epoch": 0.075, + "grad_norm": 0.5851828455924988, + "learning_rate": 0.0015, + "loss": 2.1624, + "step": 711 + }, + { + "epoch": 0.0751054852320675, + "grad_norm": 0.7139943838119507, + "learning_rate": 0.0015, + "loss": 2.1741, + "step": 712 + }, + { + "epoch": 0.07521097046413502, + "grad_norm": 0.6880309581756592, + "learning_rate": 0.0015, + "loss": 2.1633, + "step": 713 + }, + { + "epoch": 0.07531645569620253, + "grad_norm": 0.5787697434425354, + "learning_rate": 0.0015, + "loss": 2.1401, + "step": 714 + }, + { + "epoch": 0.07542194092827004, + "grad_norm": 0.6487759351730347, + "learning_rate": 0.0015, + "loss": 2.1647, + "step": 715 + }, + { + "epoch": 0.07552742616033756, + "grad_norm": 0.8057007789611816, + "learning_rate": 0.0015, + "loss": 2.1567, + "step": 716 + }, + { + "epoch": 0.07563291139240506, + "grad_norm": 0.566325843334198, + "learning_rate": 0.0015, + "loss": 2.1354, + "step": 717 + }, + { + "epoch": 0.07573839662447257, + "grad_norm": 0.6920539140701294, + "learning_rate": 0.0015, + "loss": 2.1069, + "step": 718 + }, + { + "epoch": 0.07584388185654009, + "grad_norm": 0.7430959939956665, + "learning_rate": 0.0015, + "loss": 2.1486, + "step": 719 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.7641369700431824, + "learning_rate": 0.0015, + "loss": 2.1426, + "step": 720 + }, + { + "epoch": 0.0760548523206751, + "grad_norm": 0.7014267444610596, + "learning_rate": 0.0015, + "loss": 2.1756, + "step": 721 + }, + { + "epoch": 0.07616033755274261, + "grad_norm": 0.5831820368766785, + "learning_rate": 0.0015, + "loss": 2.1519, + "step": 722 + }, + { + "epoch": 0.07626582278481013, + "grad_norm": 0.5453060269355774, + "learning_rate": 0.0015, + "loss": 2.1494, + "step": 723 + }, + { + "epoch": 0.07637130801687764, + "grad_norm": 0.5728681683540344, + "learning_rate": 0.0015, + "loss": 2.1202, + "step": 724 + }, + { + "epoch": 0.07647679324894514, + "grad_norm": 0.5387490391731262, + "learning_rate": 0.0015, + "loss": 2.1438, + "step": 725 + }, + { + "epoch": 0.07658227848101266, + "grad_norm": 0.6138736009597778, + "learning_rate": 0.0015, + "loss": 2.1363, + "step": 726 + }, + { + "epoch": 0.07668776371308017, + "grad_norm": 0.7266629934310913, + "learning_rate": 0.0015, + "loss": 2.1411, + "step": 727 + }, + { + "epoch": 0.07679324894514768, + "grad_norm": 0.6275491118431091, + "learning_rate": 0.0015, + "loss": 2.1555, + "step": 728 + }, + { + "epoch": 0.07689873417721518, + "grad_norm": 0.6663520932197571, + "learning_rate": 0.0015, + "loss": 2.16, + "step": 729 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.5328286290168762, + "learning_rate": 0.0015, + "loss": 2.126, + "step": 730 + }, + { + "epoch": 0.07710970464135021, + "grad_norm": 0.5819591283798218, + "learning_rate": 0.0015, + "loss": 2.1471, + "step": 731 + }, + { + "epoch": 0.07721518987341772, + "grad_norm": 0.5698686242103577, + "learning_rate": 0.0015, + "loss": 2.1406, + "step": 732 + }, + { + "epoch": 0.07732067510548524, + "grad_norm": 0.542801022529602, + "learning_rate": 0.0015, + "loss": 2.1671, + "step": 733 + }, + { + "epoch": 0.07742616033755274, + "grad_norm": 0.5827435255050659, + "learning_rate": 0.0015, + "loss": 2.1873, + "step": 734 + }, + { + "epoch": 0.07753164556962025, + "grad_norm": 0.5806185603141785, + "learning_rate": 0.0015, + "loss": 2.1574, + "step": 735 + }, + { + "epoch": 0.07763713080168777, + "grad_norm": 0.6766160130500793, + "learning_rate": 0.0015, + "loss": 2.1232, + "step": 736 + }, + { + "epoch": 0.07774261603375528, + "grad_norm": 0.6849648356437683, + "learning_rate": 0.0015, + "loss": 2.1708, + "step": 737 + }, + { + "epoch": 0.07784810126582278, + "grad_norm": 0.5955662727355957, + "learning_rate": 0.0015, + "loss": 2.1286, + "step": 738 + }, + { + "epoch": 0.07795358649789029, + "grad_norm": 0.5330286622047424, + "learning_rate": 0.0015, + "loss": 2.1891, + "step": 739 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.6239925622940063, + "learning_rate": 0.0015, + "loss": 2.1241, + "step": 740 + }, + { + "epoch": 0.07816455696202532, + "grad_norm": 0.5498470067977905, + "learning_rate": 0.0015, + "loss": 2.1374, + "step": 741 + }, + { + "epoch": 0.07827004219409282, + "grad_norm": 0.5445939302444458, + "learning_rate": 0.0015, + "loss": 2.1607, + "step": 742 + }, + { + "epoch": 0.07837552742616034, + "grad_norm": 0.558857262134552, + "learning_rate": 0.0015, + "loss": 2.1321, + "step": 743 + }, + { + "epoch": 0.07848101265822785, + "grad_norm": 0.5508167147636414, + "learning_rate": 0.0015, + "loss": 2.1469, + "step": 744 + }, + { + "epoch": 0.07858649789029536, + "grad_norm": 0.48142459988594055, + "learning_rate": 0.0015, + "loss": 2.1532, + "step": 745 + }, + { + "epoch": 0.07869198312236286, + "grad_norm": 0.5125391483306885, + "learning_rate": 0.0015, + "loss": 2.1399, + "step": 746 + }, + { + "epoch": 0.07879746835443038, + "grad_norm": 0.5317972302436829, + "learning_rate": 0.0015, + "loss": 2.1238, + "step": 747 + }, + { + "epoch": 0.07890295358649789, + "grad_norm": 0.49319085478782654, + "learning_rate": 0.0015, + "loss": 2.0949, + "step": 748 + }, + { + "epoch": 0.0790084388185654, + "grad_norm": 0.6260822415351868, + "learning_rate": 0.0015, + "loss": 2.1365, + "step": 749 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.8042246699333191, + "learning_rate": 0.0015, + "loss": 2.1318, + "step": 750 + }, + { + "epoch": 0.07921940928270042, + "grad_norm": 0.748389482498169, + "learning_rate": 0.0015, + "loss": 2.1359, + "step": 751 + }, + { + "epoch": 0.07932489451476793, + "grad_norm": 0.5691909790039062, + "learning_rate": 0.0015, + "loss": 2.161, + "step": 752 + }, + { + "epoch": 0.07943037974683544, + "grad_norm": 0.5758563280105591, + "learning_rate": 0.0015, + "loss": 2.1311, + "step": 753 + }, + { + "epoch": 0.07953586497890296, + "grad_norm": 0.8811555504798889, + "learning_rate": 0.0015, + "loss": 2.1346, + "step": 754 + }, + { + "epoch": 0.07964135021097046, + "grad_norm": 0.9433655738830566, + "learning_rate": 0.0015, + "loss": 2.15, + "step": 755 + }, + { + "epoch": 0.07974683544303797, + "grad_norm": 0.528934895992279, + "learning_rate": 0.0015, + "loss": 2.1164, + "step": 756 + }, + { + "epoch": 0.07985232067510549, + "grad_norm": 0.8696373701095581, + "learning_rate": 0.0015, + "loss": 2.1111, + "step": 757 + }, + { + "epoch": 0.079957805907173, + "grad_norm": 1.1396772861480713, + "learning_rate": 0.0015, + "loss": 2.1701, + "step": 758 + }, + { + "epoch": 0.0800632911392405, + "grad_norm": 0.49097946286201477, + "learning_rate": 0.0015, + "loss": 2.1009, + "step": 759 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 1.039196252822876, + "learning_rate": 0.0015, + "loss": 2.1188, + "step": 760 + }, + { + "epoch": 0.08027426160337553, + "grad_norm": 0.916569173336029, + "learning_rate": 0.0015, + "loss": 2.1709, + "step": 761 + }, + { + "epoch": 0.08037974683544304, + "grad_norm": 0.5536020398139954, + "learning_rate": 0.0015, + "loss": 2.1158, + "step": 762 + }, + { + "epoch": 0.08048523206751054, + "grad_norm": 1.2422629594802856, + "learning_rate": 0.0015, + "loss": 2.1076, + "step": 763 + }, + { + "epoch": 0.08059071729957806, + "grad_norm": 0.6607741117477417, + "learning_rate": 0.0015, + "loss": 2.1299, + "step": 764 + }, + { + "epoch": 0.08069620253164557, + "grad_norm": 0.6373748183250427, + "learning_rate": 0.0015, + "loss": 2.1238, + "step": 765 + }, + { + "epoch": 0.08080168776371308, + "grad_norm": 0.7773301005363464, + "learning_rate": 0.0015, + "loss": 2.1345, + "step": 766 + }, + { + "epoch": 0.0809071729957806, + "grad_norm": 0.535194993019104, + "learning_rate": 0.0015, + "loss": 2.1051, + "step": 767 + }, + { + "epoch": 0.0810126582278481, + "grad_norm": 0.6248049139976501, + "learning_rate": 0.0015, + "loss": 2.1182, + "step": 768 + }, + { + "epoch": 0.08111814345991561, + "grad_norm": 0.7658226490020752, + "learning_rate": 0.0015, + "loss": 2.1528, + "step": 769 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.5887392163276672, + "learning_rate": 0.0015, + "loss": 2.1008, + "step": 770 + }, + { + "epoch": 0.08132911392405064, + "grad_norm": 0.5774050354957581, + "learning_rate": 0.0015, + "loss": 2.0934, + "step": 771 + }, + { + "epoch": 0.08143459915611814, + "grad_norm": 0.595152735710144, + "learning_rate": 0.0015, + "loss": 2.1748, + "step": 772 + }, + { + "epoch": 0.08154008438818565, + "grad_norm": 0.5118763446807861, + "learning_rate": 0.0015, + "loss": 2.1056, + "step": 773 + }, + { + "epoch": 0.08164556962025317, + "grad_norm": 0.5855534076690674, + "learning_rate": 0.0015, + "loss": 2.1198, + "step": 774 + }, + { + "epoch": 0.08175105485232068, + "grad_norm": 0.5367034673690796, + "learning_rate": 0.0015, + "loss": 2.0888, + "step": 775 + }, + { + "epoch": 0.08185654008438818, + "grad_norm": 0.5104321837425232, + "learning_rate": 0.0015, + "loss": 2.0662, + "step": 776 + }, + { + "epoch": 0.0819620253164557, + "grad_norm": 0.4698849320411682, + "learning_rate": 0.0015, + "loss": 2.1104, + "step": 777 + }, + { + "epoch": 0.08206751054852321, + "grad_norm": 0.6529117226600647, + "learning_rate": 0.0015, + "loss": 2.1377, + "step": 778 + }, + { + "epoch": 0.08217299578059072, + "grad_norm": 0.8502539396286011, + "learning_rate": 0.0015, + "loss": 2.1527, + "step": 779 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.9378929138183594, + "learning_rate": 0.0015, + "loss": 2.1388, + "step": 780 + }, + { + "epoch": 0.08238396624472574, + "grad_norm": 0.567205011844635, + "learning_rate": 0.0015, + "loss": 2.0874, + "step": 781 + }, + { + "epoch": 0.08248945147679325, + "grad_norm": 0.8217090368270874, + "learning_rate": 0.0015, + "loss": 2.1141, + "step": 782 + }, + { + "epoch": 0.08259493670886076, + "grad_norm": 1.048482894897461, + "learning_rate": 0.0015, + "loss": 2.1185, + "step": 783 + }, + { + "epoch": 0.08270042194092828, + "grad_norm": 0.5079817771911621, + "learning_rate": 0.0015, + "loss": 2.1207, + "step": 784 + }, + { + "epoch": 0.08280590717299578, + "grad_norm": 0.9008298516273499, + "learning_rate": 0.0015, + "loss": 2.1355, + "step": 785 + }, + { + "epoch": 0.08291139240506329, + "grad_norm": 0.8511000275611877, + "learning_rate": 0.0015, + "loss": 2.1183, + "step": 786 + }, + { + "epoch": 0.0830168776371308, + "grad_norm": 0.5419773459434509, + "learning_rate": 0.0015, + "loss": 2.1527, + "step": 787 + }, + { + "epoch": 0.08312236286919832, + "grad_norm": 0.9313815236091614, + "learning_rate": 0.0015, + "loss": 2.1413, + "step": 788 + }, + { + "epoch": 0.08322784810126582, + "grad_norm": 0.7607855200767517, + "learning_rate": 0.0015, + "loss": 2.1122, + "step": 789 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.6888298392295837, + "learning_rate": 0.0015, + "loss": 2.1706, + "step": 790 + }, + { + "epoch": 0.08343881856540085, + "grad_norm": 0.6333563923835754, + "learning_rate": 0.0015, + "loss": 2.1394, + "step": 791 + }, + { + "epoch": 0.08354430379746836, + "grad_norm": 0.6713033318519592, + "learning_rate": 0.0015, + "loss": 2.1206, + "step": 792 + }, + { + "epoch": 0.08364978902953586, + "grad_norm": 0.7254213690757751, + "learning_rate": 0.0015, + "loss": 2.0938, + "step": 793 + }, + { + "epoch": 0.08375527426160338, + "grad_norm": 0.562282145023346, + "learning_rate": 0.0015, + "loss": 2.1005, + "step": 794 + }, + { + "epoch": 0.08386075949367089, + "grad_norm": 0.6040173768997192, + "learning_rate": 0.0015, + "loss": 2.0771, + "step": 795 + }, + { + "epoch": 0.0839662447257384, + "grad_norm": 0.5453705191612244, + "learning_rate": 0.0015, + "loss": 2.1052, + "step": 796 + }, + { + "epoch": 0.0840717299578059, + "grad_norm": 0.7957865595817566, + "learning_rate": 0.0015, + "loss": 2.1356, + "step": 797 + }, + { + "epoch": 0.08417721518987342, + "grad_norm": 0.8070213794708252, + "learning_rate": 0.0015, + "loss": 2.0855, + "step": 798 + }, + { + "epoch": 0.08428270042194093, + "grad_norm": 0.5396027565002441, + "learning_rate": 0.0015, + "loss": 2.0852, + "step": 799 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.6908420324325562, + "learning_rate": 0.0015, + "loss": 2.067, + "step": 800 + }, + { + "epoch": 0.08449367088607596, + "grad_norm": 0.787321150302887, + "learning_rate": 0.0015, + "loss": 2.0946, + "step": 801 + }, + { + "epoch": 0.08459915611814346, + "grad_norm": 0.5643385648727417, + "learning_rate": 0.0015, + "loss": 2.0993, + "step": 802 + }, + { + "epoch": 0.08470464135021097, + "grad_norm": 0.7755754590034485, + "learning_rate": 0.0015, + "loss": 2.0839, + "step": 803 + }, + { + "epoch": 0.08481012658227848, + "grad_norm": 0.7757706046104431, + "learning_rate": 0.0015, + "loss": 2.0479, + "step": 804 + }, + { + "epoch": 0.084915611814346, + "grad_norm": 0.520271360874176, + "learning_rate": 0.0015, + "loss": 2.0966, + "step": 805 + }, + { + "epoch": 0.0850210970464135, + "grad_norm": 0.8074872493743896, + "learning_rate": 0.0015, + "loss": 2.1286, + "step": 806 + }, + { + "epoch": 0.08512658227848101, + "grad_norm": 0.9122884273529053, + "learning_rate": 0.0015, + "loss": 2.0998, + "step": 807 + }, + { + "epoch": 0.08523206751054853, + "grad_norm": 0.5138601660728455, + "learning_rate": 0.0015, + "loss": 2.1151, + "step": 808 + }, + { + "epoch": 0.08533755274261604, + "grad_norm": 0.8120249509811401, + "learning_rate": 0.0015, + "loss": 2.0593, + "step": 809 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.8636590242385864, + "learning_rate": 0.0015, + "loss": 2.0836, + "step": 810 + }, + { + "epoch": 0.08554852320675105, + "grad_norm": 0.4965047538280487, + "learning_rate": 0.0015, + "loss": 2.1041, + "step": 811 + }, + { + "epoch": 0.08565400843881857, + "grad_norm": 0.778988778591156, + "learning_rate": 0.0015, + "loss": 2.0745, + "step": 812 + }, + { + "epoch": 0.08575949367088608, + "grad_norm": 0.7225834727287292, + "learning_rate": 0.0015, + "loss": 2.1396, + "step": 813 + }, + { + "epoch": 0.08586497890295358, + "grad_norm": 0.5558802485466003, + "learning_rate": 0.0015, + "loss": 2.1128, + "step": 814 + }, + { + "epoch": 0.0859704641350211, + "grad_norm": 0.8078777194023132, + "learning_rate": 0.0015, + "loss": 2.0878, + "step": 815 + }, + { + "epoch": 0.08607594936708861, + "grad_norm": 0.5530787706375122, + "learning_rate": 0.0015, + "loss": 2.0787, + "step": 816 + }, + { + "epoch": 0.08618143459915611, + "grad_norm": 0.633363664150238, + "learning_rate": 0.0015, + "loss": 2.106, + "step": 817 + }, + { + "epoch": 0.08628691983122364, + "grad_norm": 0.7179210782051086, + "learning_rate": 0.0015, + "loss": 2.1051, + "step": 818 + }, + { + "epoch": 0.08639240506329114, + "grad_norm": 0.5488991737365723, + "learning_rate": 0.0015, + "loss": 2.0767, + "step": 819 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.7378754615783691, + "learning_rate": 0.0015, + "loss": 2.074, + "step": 820 + }, + { + "epoch": 0.08660337552742615, + "grad_norm": 0.7365741729736328, + "learning_rate": 0.0015, + "loss": 2.1063, + "step": 821 + }, + { + "epoch": 0.08670886075949367, + "grad_norm": 0.5890570282936096, + "learning_rate": 0.0015, + "loss": 2.0978, + "step": 822 + }, + { + "epoch": 0.08681434599156118, + "grad_norm": 0.5736057758331299, + "learning_rate": 0.0015, + "loss": 2.0928, + "step": 823 + }, + { + "epoch": 0.08691983122362869, + "grad_norm": 0.6182436347007751, + "learning_rate": 0.0015, + "loss": 2.1404, + "step": 824 + }, + { + "epoch": 0.08702531645569621, + "grad_norm": 0.564915657043457, + "learning_rate": 0.0015, + "loss": 2.0751, + "step": 825 + }, + { + "epoch": 0.08713080168776371, + "grad_norm": 0.4753126800060272, + "learning_rate": 0.0015, + "loss": 2.1099, + "step": 826 + }, + { + "epoch": 0.08723628691983122, + "grad_norm": 0.5777750015258789, + "learning_rate": 0.0015, + "loss": 2.1002, + "step": 827 + }, + { + "epoch": 0.08734177215189873, + "grad_norm": 0.5815672874450684, + "learning_rate": 0.0015, + "loss": 2.0849, + "step": 828 + }, + { + "epoch": 0.08744725738396625, + "grad_norm": 0.5881022214889526, + "learning_rate": 0.0015, + "loss": 2.0816, + "step": 829 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.5091956257820129, + "learning_rate": 0.0015, + "loss": 2.0973, + "step": 830 + }, + { + "epoch": 0.08765822784810126, + "grad_norm": 0.5754399299621582, + "learning_rate": 0.0015, + "loss": 2.052, + "step": 831 + }, + { + "epoch": 0.08776371308016878, + "grad_norm": 0.4812692105770111, + "learning_rate": 0.0015, + "loss": 2.0468, + "step": 832 + }, + { + "epoch": 0.08786919831223629, + "grad_norm": 0.5575996041297913, + "learning_rate": 0.0015, + "loss": 2.0753, + "step": 833 + }, + { + "epoch": 0.0879746835443038, + "grad_norm": 0.797473132610321, + "learning_rate": 0.0015, + "loss": 2.0403, + "step": 834 + }, + { + "epoch": 0.08808016877637131, + "grad_norm": 0.6777927875518799, + "learning_rate": 0.0015, + "loss": 2.0985, + "step": 835 + }, + { + "epoch": 0.08818565400843882, + "grad_norm": 0.48958081007003784, + "learning_rate": 0.0015, + "loss": 2.0704, + "step": 836 + }, + { + "epoch": 0.08829113924050633, + "grad_norm": 0.6332954168319702, + "learning_rate": 0.0015, + "loss": 2.0903, + "step": 837 + }, + { + "epoch": 0.08839662447257383, + "grad_norm": 0.6067173480987549, + "learning_rate": 0.0015, + "loss": 2.1112, + "step": 838 + }, + { + "epoch": 0.08850210970464135, + "grad_norm": 0.4982474744319916, + "learning_rate": 0.0015, + "loss": 2.0722, + "step": 839 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.6219644546508789, + "learning_rate": 0.0015, + "loss": 2.0967, + "step": 840 + }, + { + "epoch": 0.08871308016877637, + "grad_norm": 0.7281220555305481, + "learning_rate": 0.0015, + "loss": 2.0884, + "step": 841 + }, + { + "epoch": 0.08881856540084389, + "grad_norm": 0.5716444849967957, + "learning_rate": 0.0015, + "loss": 2.0399, + "step": 842 + }, + { + "epoch": 0.0889240506329114, + "grad_norm": 0.5442397594451904, + "learning_rate": 0.0015, + "loss": 2.0879, + "step": 843 + }, + { + "epoch": 0.0890295358649789, + "grad_norm": 0.6892992854118347, + "learning_rate": 0.0015, + "loss": 2.0685, + "step": 844 + }, + { + "epoch": 0.08913502109704641, + "grad_norm": 0.578584611415863, + "learning_rate": 0.0015, + "loss": 2.0866, + "step": 845 + }, + { + "epoch": 0.08924050632911393, + "grad_norm": 0.6582306623458862, + "learning_rate": 0.0015, + "loss": 2.1037, + "step": 846 + }, + { + "epoch": 0.08934599156118143, + "grad_norm": 0.6320885419845581, + "learning_rate": 0.0015, + "loss": 2.0964, + "step": 847 + }, + { + "epoch": 0.08945147679324894, + "grad_norm": 0.6004634499549866, + "learning_rate": 0.0015, + "loss": 2.0891, + "step": 848 + }, + { + "epoch": 0.08955696202531646, + "grad_norm": 0.5187250971794128, + "learning_rate": 0.0015, + "loss": 2.054, + "step": 849 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.5002400875091553, + "learning_rate": 0.0015, + "loss": 2.0343, + "step": 850 + }, + { + "epoch": 0.08976793248945147, + "grad_norm": 0.5926933884620667, + "learning_rate": 0.0015, + "loss": 2.0767, + "step": 851 + }, + { + "epoch": 0.08987341772151898, + "grad_norm": 0.5073577165603638, + "learning_rate": 0.0015, + "loss": 2.091, + "step": 852 + }, + { + "epoch": 0.0899789029535865, + "grad_norm": 0.5054778456687927, + "learning_rate": 0.0015, + "loss": 2.0622, + "step": 853 + }, + { + "epoch": 0.09008438818565401, + "grad_norm": 0.5435723066329956, + "learning_rate": 0.0015, + "loss": 2.1132, + "step": 854 + }, + { + "epoch": 0.09018987341772151, + "grad_norm": 0.4906049370765686, + "learning_rate": 0.0015, + "loss": 2.0811, + "step": 855 + }, + { + "epoch": 0.09029535864978903, + "grad_norm": 0.5326725840568542, + "learning_rate": 0.0015, + "loss": 2.0879, + "step": 856 + }, + { + "epoch": 0.09040084388185654, + "grad_norm": 0.5024213790893555, + "learning_rate": 0.0015, + "loss": 2.0634, + "step": 857 + }, + { + "epoch": 0.09050632911392405, + "grad_norm": 0.6202135682106018, + "learning_rate": 0.0015, + "loss": 2.0841, + "step": 858 + }, + { + "epoch": 0.09061181434599157, + "grad_norm": 0.6742455959320068, + "learning_rate": 0.0015, + "loss": 2.0819, + "step": 859 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.6315163373947144, + "learning_rate": 0.0015, + "loss": 2.0284, + "step": 860 + }, + { + "epoch": 0.09082278481012658, + "grad_norm": 0.6156589388847351, + "learning_rate": 0.0015, + "loss": 2.0886, + "step": 861 + }, + { + "epoch": 0.09092827004219409, + "grad_norm": 0.4940195381641388, + "learning_rate": 0.0015, + "loss": 2.0542, + "step": 862 + }, + { + "epoch": 0.09103375527426161, + "grad_norm": 0.5877742767333984, + "learning_rate": 0.0015, + "loss": 2.0654, + "step": 863 + }, + { + "epoch": 0.09113924050632911, + "grad_norm": 0.5197194814682007, + "learning_rate": 0.0015, + "loss": 2.0727, + "step": 864 + }, + { + "epoch": 0.09124472573839662, + "grad_norm": 0.5381242632865906, + "learning_rate": 0.0015, + "loss": 2.0749, + "step": 865 + }, + { + "epoch": 0.09135021097046414, + "grad_norm": 0.5649956464767456, + "learning_rate": 0.0015, + "loss": 2.0322, + "step": 866 + }, + { + "epoch": 0.09145569620253165, + "grad_norm": 0.5806474089622498, + "learning_rate": 0.0015, + "loss": 2.0715, + "step": 867 + }, + { + "epoch": 0.09156118143459915, + "grad_norm": 0.5830804109573364, + "learning_rate": 0.0015, + "loss": 2.1055, + "step": 868 + }, + { + "epoch": 0.09166666666666666, + "grad_norm": 0.47413384914398193, + "learning_rate": 0.0015, + "loss": 2.0386, + "step": 869 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.5385271906852722, + "learning_rate": 0.0015, + "loss": 2.0903, + "step": 870 + }, + { + "epoch": 0.09187763713080169, + "grad_norm": 0.5487086772918701, + "learning_rate": 0.0015, + "loss": 2.073, + "step": 871 + }, + { + "epoch": 0.0919831223628692, + "grad_norm": 0.5364266633987427, + "learning_rate": 0.0015, + "loss": 2.0621, + "step": 872 + }, + { + "epoch": 0.09208860759493671, + "grad_norm": 0.5214003324508667, + "learning_rate": 0.0015, + "loss": 2.0578, + "step": 873 + }, + { + "epoch": 0.09219409282700422, + "grad_norm": 0.5408740639686584, + "learning_rate": 0.0015, + "loss": 2.0689, + "step": 874 + }, + { + "epoch": 0.09229957805907173, + "grad_norm": 0.5028771758079529, + "learning_rate": 0.0015, + "loss": 2.0566, + "step": 875 + }, + { + "epoch": 0.09240506329113925, + "grad_norm": 0.46585795283317566, + "learning_rate": 0.0015, + "loss": 2.0709, + "step": 876 + }, + { + "epoch": 0.09251054852320675, + "grad_norm": 0.5307130217552185, + "learning_rate": 0.0015, + "loss": 2.0632, + "step": 877 + }, + { + "epoch": 0.09261603375527426, + "grad_norm": 0.47115400433540344, + "learning_rate": 0.0015, + "loss": 2.0358, + "step": 878 + }, + { + "epoch": 0.09272151898734177, + "grad_norm": 0.5778770446777344, + "learning_rate": 0.0015, + "loss": 2.0956, + "step": 879 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.6261560320854187, + "learning_rate": 0.0015, + "loss": 2.0703, + "step": 880 + }, + { + "epoch": 0.0929324894514768, + "grad_norm": 0.5335504412651062, + "learning_rate": 0.0015, + "loss": 2.0924, + "step": 881 + }, + { + "epoch": 0.0930379746835443, + "grad_norm": 0.5628005266189575, + "learning_rate": 0.0015, + "loss": 2.0395, + "step": 882 + }, + { + "epoch": 0.09314345991561182, + "grad_norm": 0.7454283833503723, + "learning_rate": 0.0015, + "loss": 2.0085, + "step": 883 + }, + { + "epoch": 0.09324894514767933, + "grad_norm": 0.6037567257881165, + "learning_rate": 0.0015, + "loss": 2.0398, + "step": 884 + }, + { + "epoch": 0.09335443037974683, + "grad_norm": 0.5915200710296631, + "learning_rate": 0.0015, + "loss": 2.103, + "step": 885 + }, + { + "epoch": 0.09345991561181434, + "grad_norm": 0.8386354446411133, + "learning_rate": 0.0015, + "loss": 2.0275, + "step": 886 + }, + { + "epoch": 0.09356540084388186, + "grad_norm": 0.9508600831031799, + "learning_rate": 0.0015, + "loss": 2.0471, + "step": 887 + }, + { + "epoch": 0.09367088607594937, + "grad_norm": 0.7215097546577454, + "learning_rate": 0.0015, + "loss": 2.0822, + "step": 888 + }, + { + "epoch": 0.09377637130801687, + "grad_norm": 0.7270636558532715, + "learning_rate": 0.0015, + "loss": 2.0547, + "step": 889 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.5731157660484314, + "learning_rate": 0.0015, + "loss": 2.0668, + "step": 890 + }, + { + "epoch": 0.0939873417721519, + "grad_norm": 0.6870090365409851, + "learning_rate": 0.0015, + "loss": 2.1074, + "step": 891 + }, + { + "epoch": 0.0940928270042194, + "grad_norm": 0.6307251453399658, + "learning_rate": 0.0015, + "loss": 2.0586, + "step": 892 + }, + { + "epoch": 0.09419831223628691, + "grad_norm": 0.5434120893478394, + "learning_rate": 0.0015, + "loss": 2.0349, + "step": 893 + }, + { + "epoch": 0.09430379746835443, + "grad_norm": 0.5502089858055115, + "learning_rate": 0.0015, + "loss": 2.0808, + "step": 894 + }, + { + "epoch": 0.09440928270042194, + "grad_norm": 0.5304689407348633, + "learning_rate": 0.0015, + "loss": 2.035, + "step": 895 + }, + { + "epoch": 0.09451476793248945, + "grad_norm": 0.5041806101799011, + "learning_rate": 0.0015, + "loss": 2.0407, + "step": 896 + }, + { + "epoch": 0.09462025316455697, + "grad_norm": 0.5043999552726746, + "learning_rate": 0.0015, + "loss": 2.0427, + "step": 897 + }, + { + "epoch": 0.09472573839662447, + "grad_norm": 0.6511165499687195, + "learning_rate": 0.0015, + "loss": 2.0476, + "step": 898 + }, + { + "epoch": 0.09483122362869198, + "grad_norm": 0.6559220552444458, + "learning_rate": 0.0015, + "loss": 2.0277, + "step": 899 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.6053939461708069, + "learning_rate": 0.0015, + "loss": 2.05, + "step": 900 + }, + { + "epoch": 0.095042194092827, + "grad_norm": 0.5160669684410095, + "learning_rate": 0.0015, + "loss": 2.0707, + "step": 901 + }, + { + "epoch": 0.09514767932489451, + "grad_norm": 0.5985272526741028, + "learning_rate": 0.0015, + "loss": 2.0565, + "step": 902 + }, + { + "epoch": 0.09525316455696202, + "grad_norm": 0.6856181621551514, + "learning_rate": 0.0015, + "loss": 2.0514, + "step": 903 + }, + { + "epoch": 0.09535864978902954, + "grad_norm": 0.6011475920677185, + "learning_rate": 0.0015, + "loss": 2.0498, + "step": 904 + }, + { + "epoch": 0.09546413502109705, + "grad_norm": 0.6291939616203308, + "learning_rate": 0.0015, + "loss": 2.0307, + "step": 905 + }, + { + "epoch": 0.09556962025316455, + "grad_norm": 0.7939316630363464, + "learning_rate": 0.0015, + "loss": 2.0714, + "step": 906 + }, + { + "epoch": 0.09567510548523207, + "grad_norm": 0.8090095520019531, + "learning_rate": 0.0015, + "loss": 2.0856, + "step": 907 + }, + { + "epoch": 0.09578059071729958, + "grad_norm": 0.7017444968223572, + "learning_rate": 0.0015, + "loss": 2.0333, + "step": 908 + }, + { + "epoch": 0.09588607594936709, + "grad_norm": 0.47305548191070557, + "learning_rate": 0.0015, + "loss": 2.0538, + "step": 909 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.7019219398498535, + "learning_rate": 0.0015, + "loss": 2.1068, + "step": 910 + }, + { + "epoch": 0.09609704641350211, + "grad_norm": 0.6788797974586487, + "learning_rate": 0.0015, + "loss": 2.0596, + "step": 911 + }, + { + "epoch": 0.09620253164556962, + "grad_norm": 0.5691693425178528, + "learning_rate": 0.0015, + "loss": 2.0453, + "step": 912 + }, + { + "epoch": 0.09630801687763713, + "grad_norm": 0.6917991638183594, + "learning_rate": 0.0015, + "loss": 2.039, + "step": 913 + }, + { + "epoch": 0.09641350210970465, + "grad_norm": 0.5481536388397217, + "learning_rate": 0.0015, + "loss": 2.0631, + "step": 914 + }, + { + "epoch": 0.09651898734177215, + "grad_norm": 0.5559115409851074, + "learning_rate": 0.0015, + "loss": 2.0885, + "step": 915 + }, + { + "epoch": 0.09662447257383966, + "grad_norm": 0.6355902552604675, + "learning_rate": 0.0015, + "loss": 2.0648, + "step": 916 + }, + { + "epoch": 0.09672995780590718, + "grad_norm": 0.6880175471305847, + "learning_rate": 0.0015, + "loss": 2.0277, + "step": 917 + }, + { + "epoch": 0.09683544303797469, + "grad_norm": 0.650114119052887, + "learning_rate": 0.0015, + "loss": 2.0384, + "step": 918 + }, + { + "epoch": 0.09694092827004219, + "grad_norm": 0.5070211291313171, + "learning_rate": 0.0015, + "loss": 2.0686, + "step": 919 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.559771716594696, + "learning_rate": 0.0015, + "loss": 2.0018, + "step": 920 + }, + { + "epoch": 0.09715189873417722, + "grad_norm": 0.6372707486152649, + "learning_rate": 0.0015, + "loss": 2.0719, + "step": 921 + }, + { + "epoch": 0.09725738396624473, + "grad_norm": 0.44606539607048035, + "learning_rate": 0.0015, + "loss": 2.0349, + "step": 922 + }, + { + "epoch": 0.09736286919831223, + "grad_norm": 0.583096444606781, + "learning_rate": 0.0015, + "loss": 2.0134, + "step": 923 + }, + { + "epoch": 0.09746835443037975, + "grad_norm": 0.5403363108634949, + "learning_rate": 0.0015, + "loss": 2.0489, + "step": 924 + }, + { + "epoch": 0.09757383966244726, + "grad_norm": 0.47966498136520386, + "learning_rate": 0.0015, + "loss": 2.0077, + "step": 925 + }, + { + "epoch": 0.09767932489451477, + "grad_norm": 0.6291763782501221, + "learning_rate": 0.0015, + "loss": 2.0395, + "step": 926 + }, + { + "epoch": 0.09778481012658227, + "grad_norm": 0.575806736946106, + "learning_rate": 0.0015, + "loss": 2.0404, + "step": 927 + }, + { + "epoch": 0.09789029535864979, + "grad_norm": 0.4905599355697632, + "learning_rate": 0.0015, + "loss": 1.9916, + "step": 928 + }, + { + "epoch": 0.0979957805907173, + "grad_norm": 0.461168110370636, + "learning_rate": 0.0015, + "loss": 2.045, + "step": 929 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.4989011883735657, + "learning_rate": 0.0015, + "loss": 2.042, + "step": 930 + }, + { + "epoch": 0.09820675105485233, + "grad_norm": 0.6231623888015747, + "learning_rate": 0.0015, + "loss": 2.0148, + "step": 931 + }, + { + "epoch": 0.09831223628691983, + "grad_norm": 0.7172541618347168, + "learning_rate": 0.0015, + "loss": 2.0736, + "step": 932 + }, + { + "epoch": 0.09841772151898734, + "grad_norm": 0.7043025493621826, + "learning_rate": 0.0015, + "loss": 2.0488, + "step": 933 + }, + { + "epoch": 0.09852320675105486, + "grad_norm": 0.5088157057762146, + "learning_rate": 0.0015, + "loss": 2.0493, + "step": 934 + }, + { + "epoch": 0.09862869198312237, + "grad_norm": 0.5786383748054504, + "learning_rate": 0.0015, + "loss": 2.0489, + "step": 935 + }, + { + "epoch": 0.09873417721518987, + "grad_norm": 0.7820794582366943, + "learning_rate": 0.0015, + "loss": 2.0251, + "step": 936 + }, + { + "epoch": 0.09883966244725738, + "grad_norm": 0.7000012993812561, + "learning_rate": 0.0015, + "loss": 2.0558, + "step": 937 + }, + { + "epoch": 0.0989451476793249, + "grad_norm": 0.5158394575119019, + "learning_rate": 0.0015, + "loss": 2.0317, + "step": 938 + }, + { + "epoch": 0.0990506329113924, + "grad_norm": 0.5724575519561768, + "learning_rate": 0.0015, + "loss": 2.0387, + "step": 939 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.6447023749351501, + "learning_rate": 0.0015, + "loss": 2.0268, + "step": 940 + }, + { + "epoch": 0.09926160337552743, + "grad_norm": 0.603764533996582, + "learning_rate": 0.0015, + "loss": 2.0097, + "step": 941 + }, + { + "epoch": 0.09936708860759494, + "grad_norm": 0.49410590529441833, + "learning_rate": 0.0015, + "loss": 2.0437, + "step": 942 + }, + { + "epoch": 0.09947257383966245, + "grad_norm": 0.5381091833114624, + "learning_rate": 0.0015, + "loss": 2.0101, + "step": 943 + }, + { + "epoch": 0.09957805907172995, + "grad_norm": 0.6123061180114746, + "learning_rate": 0.0015, + "loss": 1.9906, + "step": 944 + }, + { + "epoch": 0.09968354430379747, + "grad_norm": 0.5386263132095337, + "learning_rate": 0.0015, + "loss": 2.0218, + "step": 945 + }, + { + "epoch": 0.09978902953586498, + "grad_norm": 0.4787702262401581, + "learning_rate": 0.0015, + "loss": 2.019, + "step": 946 + }, + { + "epoch": 0.09989451476793249, + "grad_norm": 0.5042153000831604, + "learning_rate": 0.0015, + "loss": 1.9806, + "step": 947 + }, + { + "epoch": 0.1, + "grad_norm": 0.4878217875957489, + "learning_rate": 0.0015, + "loss": 2.0517, + "step": 948 + }, + { + "epoch": 0.10010548523206751, + "grad_norm": 0.47430136799812317, + "learning_rate": 0.0015, + "loss": 2.0142, + "step": 949 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.5746744871139526, + "learning_rate": 0.0015, + "loss": 2.0445, + "step": 950 + }, + { + "epoch": 0.10031645569620253, + "grad_norm": 0.5339949727058411, + "learning_rate": 0.0015, + "loss": 2.0465, + "step": 951 + }, + { + "epoch": 0.10042194092827005, + "grad_norm": 0.5987122654914856, + "learning_rate": 0.0015, + "loss": 2.0319, + "step": 952 + }, + { + "epoch": 0.10052742616033755, + "grad_norm": 0.6438586711883545, + "learning_rate": 0.0015, + "loss": 2.055, + "step": 953 + }, + { + "epoch": 0.10063291139240506, + "grad_norm": 0.6770201921463013, + "learning_rate": 0.0015, + "loss": 2.0432, + "step": 954 + }, + { + "epoch": 0.10073839662447258, + "grad_norm": 0.5997467637062073, + "learning_rate": 0.0015, + "loss": 2.0345, + "step": 955 + }, + { + "epoch": 0.10084388185654009, + "grad_norm": 0.5038588047027588, + "learning_rate": 0.0015, + "loss": 2.0118, + "step": 956 + }, + { + "epoch": 0.10094936708860759, + "grad_norm": 0.6786378622055054, + "learning_rate": 0.0015, + "loss": 2.0024, + "step": 957 + }, + { + "epoch": 0.10105485232067511, + "grad_norm": 0.9915666580200195, + "learning_rate": 0.0015, + "loss": 2.0532, + "step": 958 + }, + { + "epoch": 0.10116033755274262, + "grad_norm": 0.8501437306404114, + "learning_rate": 0.0015, + "loss": 1.9929, + "step": 959 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.5623571872711182, + "learning_rate": 0.0015, + "loss": 2.012, + "step": 960 + }, + { + "epoch": 0.10137130801687763, + "grad_norm": 0.5599110126495361, + "learning_rate": 0.0015, + "loss": 2.0275, + "step": 961 + }, + { + "epoch": 0.10147679324894515, + "grad_norm": 0.72342449426651, + "learning_rate": 0.0015, + "loss": 2.0046, + "step": 962 + }, + { + "epoch": 0.10158227848101266, + "grad_norm": 0.6908513903617859, + "learning_rate": 0.0015, + "loss": 2.0556, + "step": 963 + }, + { + "epoch": 0.10168776371308016, + "grad_norm": 0.531358540058136, + "learning_rate": 0.0015, + "loss": 2.0225, + "step": 964 + }, + { + "epoch": 0.10179324894514769, + "grad_norm": 0.5810999274253845, + "learning_rate": 0.0015, + "loss": 2.0637, + "step": 965 + }, + { + "epoch": 0.10189873417721519, + "grad_norm": 0.7688680291175842, + "learning_rate": 0.0015, + "loss": 2.0004, + "step": 966 + }, + { + "epoch": 0.1020042194092827, + "grad_norm": 0.7815285921096802, + "learning_rate": 0.0015, + "loss": 2.0317, + "step": 967 + }, + { + "epoch": 0.1021097046413502, + "grad_norm": 0.5666143894195557, + "learning_rate": 0.0015, + "loss": 2.0622, + "step": 968 + }, + { + "epoch": 0.10221518987341772, + "grad_norm": 0.821260929107666, + "learning_rate": 0.0015, + "loss": 2.0122, + "step": 969 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 1.2794028520584106, + "learning_rate": 0.0015, + "loss": 2.0467, + "step": 970 + }, + { + "epoch": 0.10242616033755274, + "grad_norm": 0.5275937914848328, + "learning_rate": 0.0015, + "loss": 2.002, + "step": 971 + }, + { + "epoch": 0.10253164556962026, + "grad_norm": 0.9917479753494263, + "learning_rate": 0.0015, + "loss": 2.0123, + "step": 972 + }, + { + "epoch": 0.10263713080168776, + "grad_norm": 1.0854936838150024, + "learning_rate": 0.0015, + "loss": 2.0247, + "step": 973 + }, + { + "epoch": 0.10274261603375527, + "grad_norm": 0.5119172930717468, + "learning_rate": 0.0015, + "loss": 2.0184, + "step": 974 + }, + { + "epoch": 0.10284810126582279, + "grad_norm": 0.7493650317192078, + "learning_rate": 0.0015, + "loss": 2.0069, + "step": 975 + }, + { + "epoch": 0.1029535864978903, + "grad_norm": 0.7433720827102661, + "learning_rate": 0.0015, + "loss": 2.0068, + "step": 976 + }, + { + "epoch": 0.1030590717299578, + "grad_norm": 0.5080604553222656, + "learning_rate": 0.0015, + "loss": 2.0213, + "step": 977 + }, + { + "epoch": 0.10316455696202531, + "grad_norm": 0.6605928540229797, + "learning_rate": 0.0015, + "loss": 2.0108, + "step": 978 + }, + { + "epoch": 0.10327004219409283, + "grad_norm": 0.7366412878036499, + "learning_rate": 0.0015, + "loss": 1.9982, + "step": 979 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.5461682677268982, + "learning_rate": 0.0015, + "loss": 2.0353, + "step": 980 + }, + { + "epoch": 0.10348101265822784, + "grad_norm": 0.5941941142082214, + "learning_rate": 0.0015, + "loss": 2.0194, + "step": 981 + }, + { + "epoch": 0.10358649789029536, + "grad_norm": 0.6931915283203125, + "learning_rate": 0.0015, + "loss": 2.0428, + "step": 982 + }, + { + "epoch": 0.10369198312236287, + "grad_norm": 0.4936782419681549, + "learning_rate": 0.0015, + "loss": 1.9833, + "step": 983 + }, + { + "epoch": 0.10379746835443038, + "grad_norm": 0.641244113445282, + "learning_rate": 0.0015, + "loss": 1.9947, + "step": 984 + }, + { + "epoch": 0.10390295358649788, + "grad_norm": 0.8523558378219604, + "learning_rate": 0.0015, + "loss": 2.0132, + "step": 985 + }, + { + "epoch": 0.1040084388185654, + "grad_norm": 0.5719506740570068, + "learning_rate": 0.0015, + "loss": 2.0082, + "step": 986 + }, + { + "epoch": 0.10411392405063291, + "grad_norm": 0.6469573974609375, + "learning_rate": 0.0015, + "loss": 2.0403, + "step": 987 + }, + { + "epoch": 0.10421940928270042, + "grad_norm": 0.8099203109741211, + "learning_rate": 0.0015, + "loss": 1.9987, + "step": 988 + }, + { + "epoch": 0.10432489451476794, + "grad_norm": 0.5929587483406067, + "learning_rate": 0.0015, + "loss": 1.9888, + "step": 989 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.5847246050834656, + "learning_rate": 0.0015, + "loss": 2.0016, + "step": 990 + }, + { + "epoch": 0.10453586497890295, + "grad_norm": 0.7067970037460327, + "learning_rate": 0.0015, + "loss": 2.0808, + "step": 991 + }, + { + "epoch": 0.10464135021097046, + "grad_norm": 0.5860495567321777, + "learning_rate": 0.0015, + "loss": 2.012, + "step": 992 + }, + { + "epoch": 0.10474683544303798, + "grad_norm": 0.5571528077125549, + "learning_rate": 0.0015, + "loss": 1.9845, + "step": 993 + }, + { + "epoch": 0.10485232067510548, + "grad_norm": 0.6108555197715759, + "learning_rate": 0.0015, + "loss": 2.035, + "step": 994 + }, + { + "epoch": 0.10495780590717299, + "grad_norm": 0.5549131035804749, + "learning_rate": 0.0015, + "loss": 1.9854, + "step": 995 + }, + { + "epoch": 0.10506329113924051, + "grad_norm": 0.48402878642082214, + "learning_rate": 0.0015, + "loss": 2.0152, + "step": 996 + }, + { + "epoch": 0.10516877637130802, + "grad_norm": 0.5344575643539429, + "learning_rate": 0.0015, + "loss": 1.9893, + "step": 997 + }, + { + "epoch": 0.10527426160337552, + "grad_norm": 0.5143426656723022, + "learning_rate": 0.0015, + "loss": 2.0294, + "step": 998 + }, + { + "epoch": 0.10537974683544304, + "grad_norm": 0.5781170129776001, + "learning_rate": 0.0015, + "loss": 2.0395, + "step": 999 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.4923545718193054, + "learning_rate": 0.0015, + "loss": 2.0296, + "step": 1000 + }, + { + "epoch": 0.10559071729957806, + "grad_norm": 0.6493373513221741, + "learning_rate": 0.0015, + "loss": 1.9926, + "step": 1001 + }, + { + "epoch": 0.10569620253164556, + "grad_norm": 0.7961377501487732, + "learning_rate": 0.0015, + "loss": 2.0014, + "step": 1002 + }, + { + "epoch": 0.10580168776371308, + "grad_norm": 0.5933529734611511, + "learning_rate": 0.0015, + "loss": 2.0503, + "step": 1003 + }, + { + "epoch": 0.10590717299578059, + "grad_norm": 0.5460384488105774, + "learning_rate": 0.0015, + "loss": 1.9732, + "step": 1004 + }, + { + "epoch": 0.1060126582278481, + "grad_norm": 0.8536837697029114, + "learning_rate": 0.0015, + "loss": 1.9999, + "step": 1005 + }, + { + "epoch": 0.10611814345991562, + "grad_norm": 0.7791167497634888, + "learning_rate": 0.0015, + "loss": 2.014, + "step": 1006 + }, + { + "epoch": 0.10622362869198312, + "grad_norm": 0.49463993310928345, + "learning_rate": 0.0015, + "loss": 2.0405, + "step": 1007 + }, + { + "epoch": 0.10632911392405063, + "grad_norm": 0.6463145613670349, + "learning_rate": 0.0015, + "loss": 2.0008, + "step": 1008 + }, + { + "epoch": 0.10643459915611814, + "grad_norm": 0.7075051665306091, + "learning_rate": 0.0015, + "loss": 2.0169, + "step": 1009 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.5285488963127136, + "learning_rate": 0.0015, + "loss": 2.0235, + "step": 1010 + }, + { + "epoch": 0.10664556962025316, + "grad_norm": 0.6681978702545166, + "learning_rate": 0.0015, + "loss": 1.9893, + "step": 1011 + }, + { + "epoch": 0.10675105485232067, + "grad_norm": 0.7121663689613342, + "learning_rate": 0.0015, + "loss": 1.9869, + "step": 1012 + }, + { + "epoch": 0.10685654008438819, + "grad_norm": 0.5381683111190796, + "learning_rate": 0.0015, + "loss": 1.9709, + "step": 1013 + }, + { + "epoch": 0.1069620253164557, + "grad_norm": 0.5448917150497437, + "learning_rate": 0.0015, + "loss": 2.0114, + "step": 1014 + }, + { + "epoch": 0.1070675105485232, + "grad_norm": 0.5569539666175842, + "learning_rate": 0.0015, + "loss": 1.993, + "step": 1015 + }, + { + "epoch": 0.10717299578059072, + "grad_norm": 0.48897144198417664, + "learning_rate": 0.0015, + "loss": 1.9559, + "step": 1016 + }, + { + "epoch": 0.10727848101265823, + "grad_norm": 0.5529647469520569, + "learning_rate": 0.0015, + "loss": 2.0235, + "step": 1017 + }, + { + "epoch": 0.10738396624472574, + "grad_norm": 0.5180500149726868, + "learning_rate": 0.0015, + "loss": 2.0314, + "step": 1018 + }, + { + "epoch": 0.10748945147679324, + "grad_norm": 0.7292158603668213, + "learning_rate": 0.0015, + "loss": 2.0739, + "step": 1019 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.9114202857017517, + "learning_rate": 0.0015, + "loss": 1.9931, + "step": 1020 + }, + { + "epoch": 0.10770042194092827, + "grad_norm": 0.6082236766815186, + "learning_rate": 0.0015, + "loss": 1.9947, + "step": 1021 + }, + { + "epoch": 0.10780590717299578, + "grad_norm": 0.5877995491027832, + "learning_rate": 0.0015, + "loss": 2.0045, + "step": 1022 + }, + { + "epoch": 0.1079113924050633, + "grad_norm": 0.7676417231559753, + "learning_rate": 0.0015, + "loss": 1.9798, + "step": 1023 + }, + { + "epoch": 0.1080168776371308, + "grad_norm": 0.6970999240875244, + "learning_rate": 0.0015, + "loss": 2.012, + "step": 1024 + }, + { + "epoch": 0.10812236286919831, + "grad_norm": 0.5083110928535461, + "learning_rate": 0.0015, + "loss": 2.0343, + "step": 1025 + }, + { + "epoch": 0.10822784810126582, + "grad_norm": 0.7133620381355286, + "learning_rate": 0.0015, + "loss": 1.9744, + "step": 1026 + }, + { + "epoch": 0.10833333333333334, + "grad_norm": 0.6370148062705994, + "learning_rate": 0.0015, + "loss": 2.0279, + "step": 1027 + }, + { + "epoch": 0.10843881856540084, + "grad_norm": 0.4984666109085083, + "learning_rate": 0.0015, + "loss": 1.9719, + "step": 1028 + }, + { + "epoch": 0.10854430379746835, + "grad_norm": 0.7172163724899292, + "learning_rate": 0.0015, + "loss": 2.0103, + "step": 1029 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.5677787065505981, + "learning_rate": 0.0015, + "loss": 2.0073, + "step": 1030 + }, + { + "epoch": 0.10875527426160338, + "grad_norm": 0.5745548009872437, + "learning_rate": 0.0015, + "loss": 1.9782, + "step": 1031 + }, + { + "epoch": 0.10886075949367088, + "grad_norm": 0.7175948619842529, + "learning_rate": 0.0015, + "loss": 1.966, + "step": 1032 + }, + { + "epoch": 0.10896624472573839, + "grad_norm": 0.5983565449714661, + "learning_rate": 0.0015, + "loss": 1.9896, + "step": 1033 + }, + { + "epoch": 0.10907172995780591, + "grad_norm": 0.5486365556716919, + "learning_rate": 0.0015, + "loss": 2.0331, + "step": 1034 + }, + { + "epoch": 0.10917721518987342, + "grad_norm": 0.5033898949623108, + "learning_rate": 0.0015, + "loss": 2.0143, + "step": 1035 + }, + { + "epoch": 0.10928270042194092, + "grad_norm": 0.5592193603515625, + "learning_rate": 0.0015, + "loss": 1.9721, + "step": 1036 + }, + { + "epoch": 0.10938818565400844, + "grad_norm": 0.4769430458545685, + "learning_rate": 0.0015, + "loss": 1.9864, + "step": 1037 + }, + { + "epoch": 0.10949367088607595, + "grad_norm": 0.5129791498184204, + "learning_rate": 0.0015, + "loss": 2.0264, + "step": 1038 + }, + { + "epoch": 0.10959915611814346, + "grad_norm": 0.5057224035263062, + "learning_rate": 0.0015, + "loss": 1.9787, + "step": 1039 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.623877763748169, + "learning_rate": 0.0015, + "loss": 1.9988, + "step": 1040 + }, + { + "epoch": 0.10981012658227848, + "grad_norm": 0.5065798759460449, + "learning_rate": 0.0015, + "loss": 2.0087, + "step": 1041 + }, + { + "epoch": 0.10991561181434599, + "grad_norm": 0.5388501286506653, + "learning_rate": 0.0015, + "loss": 1.9967, + "step": 1042 + }, + { + "epoch": 0.1100210970464135, + "grad_norm": 0.589823305606842, + "learning_rate": 0.0015, + "loss": 1.9957, + "step": 1043 + }, + { + "epoch": 0.11012658227848102, + "grad_norm": 0.6062374114990234, + "learning_rate": 0.0015, + "loss": 1.9748, + "step": 1044 + }, + { + "epoch": 0.11023206751054852, + "grad_norm": 0.5757289528846741, + "learning_rate": 0.0015, + "loss": 2.0066, + "step": 1045 + }, + { + "epoch": 0.11033755274261603, + "grad_norm": 0.5318777561187744, + "learning_rate": 0.0015, + "loss": 2.0156, + "step": 1046 + }, + { + "epoch": 0.11044303797468355, + "grad_norm": 0.4598942697048187, + "learning_rate": 0.0015, + "loss": 1.9953, + "step": 1047 + }, + { + "epoch": 0.11054852320675106, + "grad_norm": 0.5211374759674072, + "learning_rate": 0.0015, + "loss": 1.9958, + "step": 1048 + }, + { + "epoch": 0.11065400843881856, + "grad_norm": 0.6289006471633911, + "learning_rate": 0.0015, + "loss": 1.9837, + "step": 1049 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.7800363302230835, + "learning_rate": 0.0015, + "loss": 2.0197, + "step": 1050 + }, + { + "epoch": 0.11086497890295359, + "grad_norm": 0.5729717016220093, + "learning_rate": 0.0015, + "loss": 1.981, + "step": 1051 + }, + { + "epoch": 0.1109704641350211, + "grad_norm": 0.5333393216133118, + "learning_rate": 0.0015, + "loss": 2.0084, + "step": 1052 + }, + { + "epoch": 0.1110759493670886, + "grad_norm": 0.8517758250236511, + "learning_rate": 0.0015, + "loss": 2.01, + "step": 1053 + }, + { + "epoch": 0.11118143459915612, + "grad_norm": 0.8174675703048706, + "learning_rate": 0.0015, + "loss": 2.005, + "step": 1054 + }, + { + "epoch": 0.11128691983122363, + "grad_norm": 0.5155147910118103, + "learning_rate": 0.0015, + "loss": 1.979, + "step": 1055 + }, + { + "epoch": 0.11139240506329114, + "grad_norm": 0.6293675303459167, + "learning_rate": 0.0015, + "loss": 1.9687, + "step": 1056 + }, + { + "epoch": 0.11149789029535866, + "grad_norm": 0.7848137617111206, + "learning_rate": 0.0015, + "loss": 2.0003, + "step": 1057 + }, + { + "epoch": 0.11160337552742616, + "grad_norm": 0.6546721458435059, + "learning_rate": 0.0015, + "loss": 1.9847, + "step": 1058 + }, + { + "epoch": 0.11170886075949367, + "grad_norm": 0.5005208849906921, + "learning_rate": 0.0015, + "loss": 1.9546, + "step": 1059 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.660610020160675, + "learning_rate": 0.0015, + "loss": 1.9856, + "step": 1060 + }, + { + "epoch": 0.1119198312236287, + "grad_norm": 0.7893019914627075, + "learning_rate": 0.0015, + "loss": 2.0161, + "step": 1061 + }, + { + "epoch": 0.1120253164556962, + "grad_norm": 0.5391327142715454, + "learning_rate": 0.0015, + "loss": 1.9456, + "step": 1062 + }, + { + "epoch": 0.11213080168776371, + "grad_norm": 0.611042857170105, + "learning_rate": 0.0015, + "loss": 1.9991, + "step": 1063 + }, + { + "epoch": 0.11223628691983123, + "grad_norm": 0.8975500464439392, + "learning_rate": 0.0015, + "loss": 1.9622, + "step": 1064 + }, + { + "epoch": 0.11234177215189874, + "grad_norm": 0.8975341320037842, + "learning_rate": 0.0015, + "loss": 1.9706, + "step": 1065 + }, + { + "epoch": 0.11244725738396624, + "grad_norm": 0.5800900459289551, + "learning_rate": 0.0015, + "loss": 1.9549, + "step": 1066 + }, + { + "epoch": 0.11255274261603375, + "grad_norm": 0.5761541724205017, + "learning_rate": 0.0015, + "loss": 1.9832, + "step": 1067 + }, + { + "epoch": 0.11265822784810127, + "grad_norm": 0.777255654335022, + "learning_rate": 0.0015, + "loss": 1.9604, + "step": 1068 + }, + { + "epoch": 0.11276371308016878, + "grad_norm": 0.5889245271682739, + "learning_rate": 0.0015, + "loss": 1.9752, + "step": 1069 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.5593006610870361, + "learning_rate": 0.0015, + "loss": 1.992, + "step": 1070 + }, + { + "epoch": 0.1129746835443038, + "grad_norm": 0.7435798048973083, + "learning_rate": 0.0015, + "loss": 1.9515, + "step": 1071 + }, + { + "epoch": 0.11308016877637131, + "grad_norm": 0.6535780429840088, + "learning_rate": 0.0015, + "loss": 2.0214, + "step": 1072 + }, + { + "epoch": 0.11318565400843882, + "grad_norm": 0.5546253323554993, + "learning_rate": 0.0015, + "loss": 1.959, + "step": 1073 + }, + { + "epoch": 0.11329113924050632, + "grad_norm": 0.695321261882782, + "learning_rate": 0.0015, + "loss": 1.9537, + "step": 1074 + }, + { + "epoch": 0.11339662447257384, + "grad_norm": 0.5591767430305481, + "learning_rate": 0.0015, + "loss": 1.9822, + "step": 1075 + }, + { + "epoch": 0.11350210970464135, + "grad_norm": 0.6114763021469116, + "learning_rate": 0.0015, + "loss": 2.0125, + "step": 1076 + }, + { + "epoch": 0.11360759493670886, + "grad_norm": 0.7554244995117188, + "learning_rate": 0.0015, + "loss": 2.0057, + "step": 1077 + }, + { + "epoch": 0.11371308016877638, + "grad_norm": 0.7061241269111633, + "learning_rate": 0.0015, + "loss": 1.9577, + "step": 1078 + }, + { + "epoch": 0.11381856540084388, + "grad_norm": 0.551810085773468, + "learning_rate": 0.0015, + "loss": 1.9885, + "step": 1079 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.6357426643371582, + "learning_rate": 0.0015, + "loss": 1.9633, + "step": 1080 + }, + { + "epoch": 0.11402953586497891, + "grad_norm": 0.736291229724884, + "learning_rate": 0.0015, + "loss": 1.9794, + "step": 1081 + }, + { + "epoch": 0.11413502109704642, + "grad_norm": 0.6027610301971436, + "learning_rate": 0.0015, + "loss": 1.9985, + "step": 1082 + }, + { + "epoch": 0.11424050632911392, + "grad_norm": 0.5908238887786865, + "learning_rate": 0.0015, + "loss": 1.9655, + "step": 1083 + }, + { + "epoch": 0.11434599156118143, + "grad_norm": 0.609118640422821, + "learning_rate": 0.0015, + "loss": 1.9944, + "step": 1084 + }, + { + "epoch": 0.11445147679324895, + "grad_norm": 0.5698200464248657, + "learning_rate": 0.0015, + "loss": 1.9589, + "step": 1085 + }, + { + "epoch": 0.11455696202531646, + "grad_norm": 0.5287510752677917, + "learning_rate": 0.0015, + "loss": 1.9422, + "step": 1086 + }, + { + "epoch": 0.11466244725738396, + "grad_norm": 0.545129120349884, + "learning_rate": 0.0015, + "loss": 2.0234, + "step": 1087 + }, + { + "epoch": 0.11476793248945148, + "grad_norm": 0.5150210857391357, + "learning_rate": 0.0015, + "loss": 2.0034, + "step": 1088 + }, + { + "epoch": 0.11487341772151899, + "grad_norm": 0.5504254698753357, + "learning_rate": 0.0015, + "loss": 2.014, + "step": 1089 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.5121006369590759, + "learning_rate": 0.0015, + "loss": 1.9867, + "step": 1090 + }, + { + "epoch": 0.115084388185654, + "grad_norm": 0.5371829271316528, + "learning_rate": 0.0015, + "loss": 1.9441, + "step": 1091 + }, + { + "epoch": 0.11518987341772152, + "grad_norm": 0.5726335048675537, + "learning_rate": 0.0015, + "loss": 1.9548, + "step": 1092 + }, + { + "epoch": 0.11529535864978903, + "grad_norm": 0.615773618221283, + "learning_rate": 0.0015, + "loss": 1.9593, + "step": 1093 + }, + { + "epoch": 0.11540084388185654, + "grad_norm": 0.5490939617156982, + "learning_rate": 0.0015, + "loss": 1.9929, + "step": 1094 + }, + { + "epoch": 0.11550632911392406, + "grad_norm": 0.5754554271697998, + "learning_rate": 0.0015, + "loss": 1.9863, + "step": 1095 + }, + { + "epoch": 0.11561181434599156, + "grad_norm": 0.5992894172668457, + "learning_rate": 0.0015, + "loss": 1.9975, + "step": 1096 + }, + { + "epoch": 0.11571729957805907, + "grad_norm": 0.5731102228164673, + "learning_rate": 0.0015, + "loss": 1.9731, + "step": 1097 + }, + { + "epoch": 0.11582278481012659, + "grad_norm": 0.4969916343688965, + "learning_rate": 0.0015, + "loss": 1.9942, + "step": 1098 + }, + { + "epoch": 0.1159282700421941, + "grad_norm": 0.5169114470481873, + "learning_rate": 0.0015, + "loss": 1.9716, + "step": 1099 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.5371574759483337, + "learning_rate": 0.0015, + "loss": 1.9765, + "step": 1100 + }, + { + "epoch": 0.11613924050632911, + "grad_norm": 0.4847295880317688, + "learning_rate": 0.0015, + "loss": 1.9667, + "step": 1101 + }, + { + "epoch": 0.11624472573839663, + "grad_norm": 0.4888870120048523, + "learning_rate": 0.0015, + "loss": 1.9542, + "step": 1102 + }, + { + "epoch": 0.11635021097046414, + "grad_norm": 0.47460025548934937, + "learning_rate": 0.0015, + "loss": 1.9795, + "step": 1103 + }, + { + "epoch": 0.11645569620253164, + "grad_norm": 0.48211368918418884, + "learning_rate": 0.0015, + "loss": 1.9859, + "step": 1104 + }, + { + "epoch": 0.11656118143459916, + "grad_norm": 0.4726405441761017, + "learning_rate": 0.0015, + "loss": 2.001, + "step": 1105 + }, + { + "epoch": 0.11666666666666667, + "grad_norm": 0.48614928126335144, + "learning_rate": 0.0015, + "loss": 1.9683, + "step": 1106 + }, + { + "epoch": 0.11677215189873418, + "grad_norm": 0.47507819533348083, + "learning_rate": 0.0015, + "loss": 1.9645, + "step": 1107 + }, + { + "epoch": 0.11687763713080168, + "grad_norm": 0.47301697731018066, + "learning_rate": 0.0015, + "loss": 1.9654, + "step": 1108 + }, + { + "epoch": 0.1169831223628692, + "grad_norm": 0.5072380900382996, + "learning_rate": 0.0015, + "loss": 1.9898, + "step": 1109 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.47868582606315613, + "learning_rate": 0.0015, + "loss": 1.952, + "step": 1110 + }, + { + "epoch": 0.11719409282700421, + "grad_norm": 0.4702244699001312, + "learning_rate": 0.0015, + "loss": 1.9978, + "step": 1111 + }, + { + "epoch": 0.11729957805907174, + "grad_norm": 0.5345383286476135, + "learning_rate": 0.0015, + "loss": 1.9474, + "step": 1112 + }, + { + "epoch": 0.11740506329113924, + "grad_norm": 0.5054839849472046, + "learning_rate": 0.0015, + "loss": 1.9918, + "step": 1113 + }, + { + "epoch": 0.11751054852320675, + "grad_norm": 0.5359642505645752, + "learning_rate": 0.0015, + "loss": 1.9513, + "step": 1114 + }, + { + "epoch": 0.11761603375527427, + "grad_norm": 0.6218224167823792, + "learning_rate": 0.0015, + "loss": 2.0022, + "step": 1115 + }, + { + "epoch": 0.11772151898734177, + "grad_norm": 0.585112452507019, + "learning_rate": 0.0015, + "loss": 1.9663, + "step": 1116 + }, + { + "epoch": 0.11782700421940928, + "grad_norm": 0.5225456953048706, + "learning_rate": 0.0015, + "loss": 1.9649, + "step": 1117 + }, + { + "epoch": 0.11793248945147679, + "grad_norm": 0.45402202010154724, + "learning_rate": 0.0015, + "loss": 1.9892, + "step": 1118 + }, + { + "epoch": 0.11803797468354431, + "grad_norm": 0.5021755695343018, + "learning_rate": 0.0015, + "loss": 1.9542, + "step": 1119 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.4669731557369232, + "learning_rate": 0.0015, + "loss": 1.9293, + "step": 1120 + }, + { + "epoch": 0.11824894514767932, + "grad_norm": 0.4613353908061981, + "learning_rate": 0.0015, + "loss": 1.9253, + "step": 1121 + }, + { + "epoch": 0.11835443037974684, + "grad_norm": 0.48189011216163635, + "learning_rate": 0.0015, + "loss": 1.972, + "step": 1122 + }, + { + "epoch": 0.11845991561181435, + "grad_norm": 0.5315321087837219, + "learning_rate": 0.0015, + "loss": 1.9697, + "step": 1123 + }, + { + "epoch": 0.11856540084388185, + "grad_norm": 0.5763983130455017, + "learning_rate": 0.0015, + "loss": 1.942, + "step": 1124 + }, + { + "epoch": 0.11867088607594936, + "grad_norm": 0.5278778672218323, + "learning_rate": 0.0015, + "loss": 1.9655, + "step": 1125 + }, + { + "epoch": 0.11877637130801688, + "grad_norm": 0.5579981803894043, + "learning_rate": 0.0015, + "loss": 1.9401, + "step": 1126 + }, + { + "epoch": 0.11888185654008439, + "grad_norm": 0.5369895100593567, + "learning_rate": 0.0015, + "loss": 1.9643, + "step": 1127 + }, + { + "epoch": 0.1189873417721519, + "grad_norm": 0.5380120277404785, + "learning_rate": 0.0015, + "loss": 1.9634, + "step": 1128 + }, + { + "epoch": 0.11909282700421941, + "grad_norm": 0.6157765984535217, + "learning_rate": 0.0015, + "loss": 1.9678, + "step": 1129 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.734170138835907, + "learning_rate": 0.0015, + "loss": 1.9468, + "step": 1130 + }, + { + "epoch": 0.11930379746835443, + "grad_norm": 0.7338579297065735, + "learning_rate": 0.0015, + "loss": 2.0127, + "step": 1131 + }, + { + "epoch": 0.11940928270042193, + "grad_norm": 0.6663212180137634, + "learning_rate": 0.0015, + "loss": 1.9487, + "step": 1132 + }, + { + "epoch": 0.11951476793248945, + "grad_norm": 0.5774522423744202, + "learning_rate": 0.0015, + "loss": 1.9725, + "step": 1133 + }, + { + "epoch": 0.11962025316455696, + "grad_norm": 0.5378499031066895, + "learning_rate": 0.0015, + "loss": 1.9819, + "step": 1134 + }, + { + "epoch": 0.11972573839662447, + "grad_norm": 0.7140102982521057, + "learning_rate": 0.0015, + "loss": 1.9674, + "step": 1135 + }, + { + "epoch": 0.11983122362869199, + "grad_norm": 0.7592074871063232, + "learning_rate": 0.0015, + "loss": 1.9326, + "step": 1136 + }, + { + "epoch": 0.1199367088607595, + "grad_norm": 0.5613864660263062, + "learning_rate": 0.0015, + "loss": 1.9879, + "step": 1137 + }, + { + "epoch": 0.120042194092827, + "grad_norm": 0.6440332531929016, + "learning_rate": 0.0015, + "loss": 1.9775, + "step": 1138 + }, + { + "epoch": 0.12014767932489452, + "grad_norm": 0.7823119759559631, + "learning_rate": 0.0015, + "loss": 1.9609, + "step": 1139 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.5680260062217712, + "learning_rate": 0.0015, + "loss": 1.9837, + "step": 1140 + }, + { + "epoch": 0.12035864978902953, + "grad_norm": 0.5426461696624756, + "learning_rate": 0.0015, + "loss": 1.9091, + "step": 1141 + }, + { + "epoch": 0.12046413502109704, + "grad_norm": 0.5475901961326599, + "learning_rate": 0.0015, + "loss": 1.9542, + "step": 1142 + }, + { + "epoch": 0.12056962025316456, + "grad_norm": 0.6051155924797058, + "learning_rate": 0.0015, + "loss": 1.9994, + "step": 1143 + }, + { + "epoch": 0.12067510548523207, + "grad_norm": 0.6208446621894836, + "learning_rate": 0.0015, + "loss": 1.9243, + "step": 1144 + }, + { + "epoch": 0.12078059071729957, + "grad_norm": 0.5487984418869019, + "learning_rate": 0.0015, + "loss": 1.9616, + "step": 1145 + }, + { + "epoch": 0.1208860759493671, + "grad_norm": 0.5089645981788635, + "learning_rate": 0.0015, + "loss": 1.9577, + "step": 1146 + }, + { + "epoch": 0.1209915611814346, + "grad_norm": 0.547914981842041, + "learning_rate": 0.0015, + "loss": 1.9444, + "step": 1147 + }, + { + "epoch": 0.12109704641350211, + "grad_norm": 0.569980800151825, + "learning_rate": 0.0015, + "loss": 1.9948, + "step": 1148 + }, + { + "epoch": 0.12120253164556961, + "grad_norm": 0.541676938533783, + "learning_rate": 0.0015, + "loss": 1.9639, + "step": 1149 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.6242750883102417, + "learning_rate": 0.0015, + "loss": 1.9558, + "step": 1150 + }, + { + "epoch": 0.12141350210970464, + "grad_norm": 0.6878270506858826, + "learning_rate": 0.0015, + "loss": 1.9578, + "step": 1151 + }, + { + "epoch": 0.12151898734177215, + "grad_norm": 0.5642980337142944, + "learning_rate": 0.0015, + "loss": 2.0148, + "step": 1152 + }, + { + "epoch": 0.12162447257383967, + "grad_norm": 0.5831053853034973, + "learning_rate": 0.0015, + "loss": 1.9544, + "step": 1153 + }, + { + "epoch": 0.12172995780590717, + "grad_norm": 0.478061705827713, + "learning_rate": 0.0015, + "loss": 1.9535, + "step": 1154 + }, + { + "epoch": 0.12183544303797468, + "grad_norm": 0.5408726334571838, + "learning_rate": 0.0015, + "loss": 1.9616, + "step": 1155 + }, + { + "epoch": 0.1219409282700422, + "grad_norm": 0.5360070466995239, + "learning_rate": 0.0015, + "loss": 1.9697, + "step": 1156 + }, + { + "epoch": 0.12204641350210971, + "grad_norm": 0.6306201219558716, + "learning_rate": 0.0015, + "loss": 1.9458, + "step": 1157 + }, + { + "epoch": 0.12215189873417721, + "grad_norm": 0.7493512034416199, + "learning_rate": 0.0015, + "loss": 1.9565, + "step": 1158 + }, + { + "epoch": 0.12225738396624472, + "grad_norm": 0.6081839203834534, + "learning_rate": 0.0015, + "loss": 1.9812, + "step": 1159 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.4983415901660919, + "learning_rate": 0.0015, + "loss": 1.9505, + "step": 1160 + }, + { + "epoch": 0.12246835443037975, + "grad_norm": 0.7041601538658142, + "learning_rate": 0.0015, + "loss": 1.9813, + "step": 1161 + }, + { + "epoch": 0.12257383966244725, + "grad_norm": 0.9435353875160217, + "learning_rate": 0.0015, + "loss": 1.9892, + "step": 1162 + }, + { + "epoch": 0.12267932489451477, + "grad_norm": 0.8395876884460449, + "learning_rate": 0.0015, + "loss": 1.9444, + "step": 1163 + }, + { + "epoch": 0.12278481012658228, + "grad_norm": 0.5393134951591492, + "learning_rate": 0.0015, + "loss": 1.9603, + "step": 1164 + }, + { + "epoch": 0.12289029535864979, + "grad_norm": 0.7198797464370728, + "learning_rate": 0.0015, + "loss": 1.9438, + "step": 1165 + }, + { + "epoch": 0.1229957805907173, + "grad_norm": 0.6611842513084412, + "learning_rate": 0.0015, + "loss": 1.9457, + "step": 1166 + }, + { + "epoch": 0.12310126582278481, + "grad_norm": 0.5443592667579651, + "learning_rate": 0.0015, + "loss": 1.9891, + "step": 1167 + }, + { + "epoch": 0.12320675105485232, + "grad_norm": 0.5471925735473633, + "learning_rate": 0.0015, + "loss": 1.9243, + "step": 1168 + }, + { + "epoch": 0.12331223628691983, + "grad_norm": 0.5817985534667969, + "learning_rate": 0.0015, + "loss": 1.9644, + "step": 1169 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.6553073525428772, + "learning_rate": 0.0015, + "loss": 1.9297, + "step": 1170 + }, + { + "epoch": 0.12352320675105485, + "grad_norm": 0.5561738014221191, + "learning_rate": 0.0015, + "loss": 1.972, + "step": 1171 + }, + { + "epoch": 0.12362869198312236, + "grad_norm": 0.5268250107765198, + "learning_rate": 0.0015, + "loss": 1.9412, + "step": 1172 + }, + { + "epoch": 0.12373417721518987, + "grad_norm": 0.6380401849746704, + "learning_rate": 0.0015, + "loss": 1.9007, + "step": 1173 + }, + { + "epoch": 0.12383966244725739, + "grad_norm": 0.5170300006866455, + "learning_rate": 0.0015, + "loss": 1.9554, + "step": 1174 + }, + { + "epoch": 0.1239451476793249, + "grad_norm": 0.6789668798446655, + "learning_rate": 0.0015, + "loss": 1.9243, + "step": 1175 + }, + { + "epoch": 0.1240506329113924, + "grad_norm": 0.6632237434387207, + "learning_rate": 0.0015, + "loss": 1.8938, + "step": 1176 + }, + { + "epoch": 0.12415611814345992, + "grad_norm": 0.5542116761207581, + "learning_rate": 0.0015, + "loss": 1.9172, + "step": 1177 + }, + { + "epoch": 0.12426160337552743, + "grad_norm": 0.5060286521911621, + "learning_rate": 0.0015, + "loss": 1.9651, + "step": 1178 + }, + { + "epoch": 0.12436708860759493, + "grad_norm": 0.6038116216659546, + "learning_rate": 0.0015, + "loss": 1.9736, + "step": 1179 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.7100387811660767, + "learning_rate": 0.0015, + "loss": 1.9515, + "step": 1180 + }, + { + "epoch": 0.12457805907172996, + "grad_norm": 0.5594522953033447, + "learning_rate": 0.0015, + "loss": 1.924, + "step": 1181 + }, + { + "epoch": 0.12468354430379747, + "grad_norm": 0.5433719754219055, + "learning_rate": 0.0015, + "loss": 1.9346, + "step": 1182 + }, + { + "epoch": 0.12478902953586497, + "grad_norm": 0.7844252586364746, + "learning_rate": 0.0015, + "loss": 1.9514, + "step": 1183 + }, + { + "epoch": 0.1248945147679325, + "grad_norm": 0.7507432103157043, + "learning_rate": 0.0015, + "loss": 1.9621, + "step": 1184 + }, + { + "epoch": 0.125, + "grad_norm": 0.4914301037788391, + "learning_rate": 0.0015, + "loss": 1.953, + "step": 1185 + }, + { + "epoch": 0.12510548523206752, + "grad_norm": 0.5079389214515686, + "learning_rate": 0.0015, + "loss": 1.9482, + "step": 1186 + }, + { + "epoch": 0.125210970464135, + "grad_norm": 0.5197246670722961, + "learning_rate": 0.0015, + "loss": 1.9596, + "step": 1187 + }, + { + "epoch": 0.12531645569620253, + "grad_norm": 0.48691096901893616, + "learning_rate": 0.0015, + "loss": 1.9611, + "step": 1188 + }, + { + "epoch": 0.12542194092827005, + "grad_norm": 0.5134087800979614, + "learning_rate": 0.0015, + "loss": 1.9179, + "step": 1189 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.5778923630714417, + "learning_rate": 0.0015, + "loss": 1.9153, + "step": 1190 + }, + { + "epoch": 0.12563291139240507, + "grad_norm": 0.6021434664726257, + "learning_rate": 0.0015, + "loss": 1.9434, + "step": 1191 + }, + { + "epoch": 0.1257383966244726, + "grad_norm": 0.567496120929718, + "learning_rate": 0.0015, + "loss": 1.9278, + "step": 1192 + }, + { + "epoch": 0.12584388185654008, + "grad_norm": 0.7252230644226074, + "learning_rate": 0.0015, + "loss": 1.9859, + "step": 1193 + }, + { + "epoch": 0.1259493670886076, + "grad_norm": 0.6520013809204102, + "learning_rate": 0.0015, + "loss": 2.0026, + "step": 1194 + }, + { + "epoch": 0.1260548523206751, + "grad_norm": 0.593677818775177, + "learning_rate": 0.0015, + "loss": 1.9466, + "step": 1195 + }, + { + "epoch": 0.1261603375527426, + "grad_norm": 1.0265676975250244, + "learning_rate": 0.0015, + "loss": 1.9382, + "step": 1196 + }, + { + "epoch": 0.12626582278481013, + "grad_norm": 1.0938862562179565, + "learning_rate": 0.0015, + "loss": 1.9382, + "step": 1197 + }, + { + "epoch": 0.12637130801687763, + "grad_norm": 0.5909872055053711, + "learning_rate": 0.0015, + "loss": 1.9739, + "step": 1198 + }, + { + "epoch": 0.12647679324894515, + "grad_norm": 0.8484125137329102, + "learning_rate": 0.0015, + "loss": 1.9813, + "step": 1199 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.8648718595504761, + "learning_rate": 0.0015, + "loss": 1.952, + "step": 1200 + }, + { + "epoch": 0.12668776371308016, + "grad_norm": 0.6355069279670715, + "learning_rate": 0.0015, + "loss": 1.9441, + "step": 1201 + }, + { + "epoch": 0.12679324894514768, + "grad_norm": 0.6418319344520569, + "learning_rate": 0.0015, + "loss": 1.9418, + "step": 1202 + }, + { + "epoch": 0.1268987341772152, + "grad_norm": 0.9209024906158447, + "learning_rate": 0.0015, + "loss": 1.9329, + "step": 1203 + }, + { + "epoch": 0.1270042194092827, + "grad_norm": 0.8455290198326111, + "learning_rate": 0.0015, + "loss": 1.9469, + "step": 1204 + }, + { + "epoch": 0.1271097046413502, + "grad_norm": 0.5296677350997925, + "learning_rate": 0.0015, + "loss": 1.9771, + "step": 1205 + }, + { + "epoch": 0.12721518987341773, + "grad_norm": 0.5680837631225586, + "learning_rate": 0.0015, + "loss": 1.9523, + "step": 1206 + }, + { + "epoch": 0.12732067510548523, + "grad_norm": 0.6870173215866089, + "learning_rate": 0.0015, + "loss": 1.939, + "step": 1207 + }, + { + "epoch": 0.12742616033755275, + "grad_norm": 0.584763765335083, + "learning_rate": 0.0015, + "loss": 1.9413, + "step": 1208 + }, + { + "epoch": 0.12753164556962027, + "grad_norm": 0.5821747183799744, + "learning_rate": 0.0015, + "loss": 1.9455, + "step": 1209 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.6364511847496033, + "learning_rate": 0.0015, + "loss": 1.9723, + "step": 1210 + }, + { + "epoch": 0.12774261603375528, + "grad_norm": 0.5813069939613342, + "learning_rate": 0.0015, + "loss": 1.9321, + "step": 1211 + }, + { + "epoch": 0.12784810126582277, + "grad_norm": 0.4982289671897888, + "learning_rate": 0.0015, + "loss": 1.9126, + "step": 1212 + }, + { + "epoch": 0.1279535864978903, + "grad_norm": 0.4817267954349518, + "learning_rate": 0.0015, + "loss": 1.927, + "step": 1213 + }, + { + "epoch": 0.1280590717299578, + "grad_norm": 0.5128777623176575, + "learning_rate": 0.0015, + "loss": 1.9373, + "step": 1214 + }, + { + "epoch": 0.1281645569620253, + "grad_norm": 0.5634309649467468, + "learning_rate": 0.0015, + "loss": 1.9241, + "step": 1215 + }, + { + "epoch": 0.12827004219409283, + "grad_norm": 0.6341714859008789, + "learning_rate": 0.0015, + "loss": 1.9532, + "step": 1216 + }, + { + "epoch": 0.12837552742616035, + "grad_norm": 0.5437911152839661, + "learning_rate": 0.0015, + "loss": 1.939, + "step": 1217 + }, + { + "epoch": 0.12848101265822784, + "grad_norm": 0.5569978952407837, + "learning_rate": 0.0015, + "loss": 1.9899, + "step": 1218 + }, + { + "epoch": 0.12858649789029536, + "grad_norm": 0.6110464334487915, + "learning_rate": 0.0015, + "loss": 1.9051, + "step": 1219 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.7041509747505188, + "learning_rate": 0.0015, + "loss": 1.9077, + "step": 1220 + }, + { + "epoch": 0.12879746835443037, + "grad_norm": 0.6820054054260254, + "learning_rate": 0.0015, + "loss": 1.9507, + "step": 1221 + }, + { + "epoch": 0.1289029535864979, + "grad_norm": 0.5716399550437927, + "learning_rate": 0.0015, + "loss": 1.9591, + "step": 1222 + }, + { + "epoch": 0.1290084388185654, + "grad_norm": 0.5545234680175781, + "learning_rate": 0.0015, + "loss": 1.9115, + "step": 1223 + }, + { + "epoch": 0.1291139240506329, + "grad_norm": 0.6347628831863403, + "learning_rate": 0.0015, + "loss": 1.9709, + "step": 1224 + }, + { + "epoch": 0.12921940928270043, + "grad_norm": 0.5690528154373169, + "learning_rate": 0.0015, + "loss": 1.8906, + "step": 1225 + }, + { + "epoch": 0.12932489451476795, + "grad_norm": 0.5223212838172913, + "learning_rate": 0.0015, + "loss": 1.9083, + "step": 1226 + }, + { + "epoch": 0.12943037974683544, + "grad_norm": 0.5073110461235046, + "learning_rate": 0.0015, + "loss": 1.9062, + "step": 1227 + }, + { + "epoch": 0.12953586497890296, + "grad_norm": 0.5078768134117126, + "learning_rate": 0.0015, + "loss": 1.9045, + "step": 1228 + }, + { + "epoch": 0.12964135021097045, + "grad_norm": 0.5362216830253601, + "learning_rate": 0.0015, + "loss": 1.9488, + "step": 1229 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.5915535092353821, + "learning_rate": 0.0015, + "loss": 1.8828, + "step": 1230 + }, + { + "epoch": 0.1298523206751055, + "grad_norm": 0.5478518605232239, + "learning_rate": 0.0015, + "loss": 1.9493, + "step": 1231 + }, + { + "epoch": 0.12995780590717299, + "grad_norm": 0.48701831698417664, + "learning_rate": 0.0015, + "loss": 1.9755, + "step": 1232 + }, + { + "epoch": 0.1300632911392405, + "grad_norm": 0.6849209070205688, + "learning_rate": 0.0015, + "loss": 1.9118, + "step": 1233 + }, + { + "epoch": 0.13016877637130803, + "grad_norm": 0.7918779850006104, + "learning_rate": 0.0015, + "loss": 1.9584, + "step": 1234 + }, + { + "epoch": 0.13027426160337552, + "grad_norm": 0.6584113240242004, + "learning_rate": 0.0015, + "loss": 1.9594, + "step": 1235 + }, + { + "epoch": 0.13037974683544304, + "grad_norm": 0.5166054964065552, + "learning_rate": 0.0015, + "loss": 1.9275, + "step": 1236 + }, + { + "epoch": 0.13048523206751056, + "grad_norm": 0.5061794519424438, + "learning_rate": 0.0015, + "loss": 1.9268, + "step": 1237 + }, + { + "epoch": 0.13059071729957805, + "grad_norm": 0.6233305931091309, + "learning_rate": 0.0015, + "loss": 1.9293, + "step": 1238 + }, + { + "epoch": 0.13069620253164557, + "grad_norm": 0.6041268706321716, + "learning_rate": 0.0015, + "loss": 1.9525, + "step": 1239 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.49778875708580017, + "learning_rate": 0.0015, + "loss": 1.9124, + "step": 1240 + }, + { + "epoch": 0.13090717299578059, + "grad_norm": 0.606499195098877, + "learning_rate": 0.0015, + "loss": 1.8972, + "step": 1241 + }, + { + "epoch": 0.1310126582278481, + "grad_norm": 0.6507505774497986, + "learning_rate": 0.0015, + "loss": 1.947, + "step": 1242 + }, + { + "epoch": 0.1311181434599156, + "grad_norm": 0.6108731031417847, + "learning_rate": 0.0015, + "loss": 1.9446, + "step": 1243 + }, + { + "epoch": 0.13122362869198312, + "grad_norm": 0.47272953391075134, + "learning_rate": 0.0015, + "loss": 1.9318, + "step": 1244 + }, + { + "epoch": 0.13132911392405064, + "grad_norm": 0.647142767906189, + "learning_rate": 0.0015, + "loss": 1.9676, + "step": 1245 + }, + { + "epoch": 0.13143459915611813, + "grad_norm": 0.529202938079834, + "learning_rate": 0.0015, + "loss": 1.9716, + "step": 1246 + }, + { + "epoch": 0.13154008438818565, + "grad_norm": 0.6392340064048767, + "learning_rate": 0.0015, + "loss": 1.9349, + "step": 1247 + }, + { + "epoch": 0.13164556962025317, + "grad_norm": 0.8022511601448059, + "learning_rate": 0.0015, + "loss": 1.9426, + "step": 1248 + }, + { + "epoch": 0.13175105485232066, + "grad_norm": 0.6682711243629456, + "learning_rate": 0.0015, + "loss": 1.9289, + "step": 1249 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.572135329246521, + "learning_rate": 0.0015, + "loss": 1.9282, + "step": 1250 + }, + { + "epoch": 0.1319620253164557, + "grad_norm": 0.6414625644683838, + "learning_rate": 0.0015, + "loss": 1.9652, + "step": 1251 + }, + { + "epoch": 0.1320675105485232, + "grad_norm": 0.47704997658729553, + "learning_rate": 0.0015, + "loss": 1.9204, + "step": 1252 + }, + { + "epoch": 0.13217299578059072, + "grad_norm": 0.6268481016159058, + "learning_rate": 0.0015, + "loss": 1.9174, + "step": 1253 + }, + { + "epoch": 0.13227848101265824, + "grad_norm": 0.5927824378013611, + "learning_rate": 0.0015, + "loss": 1.9014, + "step": 1254 + }, + { + "epoch": 0.13238396624472573, + "grad_norm": 0.5019801259040833, + "learning_rate": 0.0015, + "loss": 1.9107, + "step": 1255 + }, + { + "epoch": 0.13248945147679325, + "grad_norm": 0.7533797025680542, + "learning_rate": 0.0015, + "loss": 1.9139, + "step": 1256 + }, + { + "epoch": 0.13259493670886077, + "grad_norm": 0.7740539908409119, + "learning_rate": 0.0015, + "loss": 1.9471, + "step": 1257 + }, + { + "epoch": 0.13270042194092826, + "grad_norm": 0.5386258363723755, + "learning_rate": 0.0015, + "loss": 1.9229, + "step": 1258 + }, + { + "epoch": 0.13280590717299579, + "grad_norm": 0.7877216339111328, + "learning_rate": 0.0015, + "loss": 1.9412, + "step": 1259 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.9018682241439819, + "learning_rate": 0.0015, + "loss": 1.9276, + "step": 1260 + }, + { + "epoch": 0.1330168776371308, + "grad_norm": 0.6250298619270325, + "learning_rate": 0.0015, + "loss": 1.9353, + "step": 1261 + }, + { + "epoch": 0.13312236286919832, + "grad_norm": 0.7231791615486145, + "learning_rate": 0.0015, + "loss": 1.9128, + "step": 1262 + }, + { + "epoch": 0.1332278481012658, + "grad_norm": 0.7344803810119629, + "learning_rate": 0.0015, + "loss": 1.9061, + "step": 1263 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 0.7405490875244141, + "learning_rate": 0.0015, + "loss": 1.9064, + "step": 1264 + }, + { + "epoch": 0.13343881856540085, + "grad_norm": 0.6008923649787903, + "learning_rate": 0.0015, + "loss": 1.9235, + "step": 1265 + }, + { + "epoch": 0.13354430379746834, + "grad_norm": 0.5715137124061584, + "learning_rate": 0.0015, + "loss": 1.925, + "step": 1266 + }, + { + "epoch": 0.13364978902953586, + "grad_norm": 0.6669307947158813, + "learning_rate": 0.0015, + "loss": 1.9494, + "step": 1267 + }, + { + "epoch": 0.13375527426160339, + "grad_norm": 0.6033222675323486, + "learning_rate": 0.0015, + "loss": 1.9016, + "step": 1268 + }, + { + "epoch": 0.13386075949367088, + "grad_norm": 0.6311067938804626, + "learning_rate": 0.0015, + "loss": 1.953, + "step": 1269 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.5897722244262695, + "learning_rate": 0.0015, + "loss": 1.9264, + "step": 1270 + }, + { + "epoch": 0.13407172995780592, + "grad_norm": 0.5945970416069031, + "learning_rate": 0.0015, + "loss": 1.9584, + "step": 1271 + }, + { + "epoch": 0.1341772151898734, + "grad_norm": 0.703152060508728, + "learning_rate": 0.0015, + "loss": 1.9897, + "step": 1272 + }, + { + "epoch": 0.13428270042194093, + "grad_norm": 0.5912957787513733, + "learning_rate": 0.0015, + "loss": 1.9146, + "step": 1273 + }, + { + "epoch": 0.13438818565400845, + "grad_norm": 0.5079948902130127, + "learning_rate": 0.0015, + "loss": 1.9265, + "step": 1274 + }, + { + "epoch": 0.13449367088607594, + "grad_norm": 0.5293281078338623, + "learning_rate": 0.0015, + "loss": 1.9319, + "step": 1275 + }, + { + "epoch": 0.13459915611814346, + "grad_norm": 0.5533709526062012, + "learning_rate": 0.0015, + "loss": 1.9128, + "step": 1276 + }, + { + "epoch": 0.13470464135021096, + "grad_norm": 0.5535594820976257, + "learning_rate": 0.0015, + "loss": 1.948, + "step": 1277 + }, + { + "epoch": 0.13481012658227848, + "grad_norm": 0.6553687453269958, + "learning_rate": 0.0015, + "loss": 1.9219, + "step": 1278 + }, + { + "epoch": 0.134915611814346, + "grad_norm": 0.5242647528648376, + "learning_rate": 0.0015, + "loss": 1.9505, + "step": 1279 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.4557071924209595, + "learning_rate": 0.0015, + "loss": 1.8867, + "step": 1280 + }, + { + "epoch": 0.135126582278481, + "grad_norm": 0.49745699763298035, + "learning_rate": 0.0015, + "loss": 1.9194, + "step": 1281 + }, + { + "epoch": 0.13523206751054853, + "grad_norm": 0.49396854639053345, + "learning_rate": 0.0015, + "loss": 1.874, + "step": 1282 + }, + { + "epoch": 0.13533755274261602, + "grad_norm": 0.5221226215362549, + "learning_rate": 0.0015, + "loss": 1.9331, + "step": 1283 + }, + { + "epoch": 0.13544303797468354, + "grad_norm": 0.5184764266014099, + "learning_rate": 0.0015, + "loss": 1.9056, + "step": 1284 + }, + { + "epoch": 0.13554852320675106, + "grad_norm": 0.5110886096954346, + "learning_rate": 0.0015, + "loss": 1.9058, + "step": 1285 + }, + { + "epoch": 0.13565400843881856, + "grad_norm": 0.5074360966682434, + "learning_rate": 0.0015, + "loss": 1.9169, + "step": 1286 + }, + { + "epoch": 0.13575949367088608, + "grad_norm": 0.5905272364616394, + "learning_rate": 0.0015, + "loss": 1.9116, + "step": 1287 + }, + { + "epoch": 0.1358649789029536, + "grad_norm": 1.001373291015625, + "learning_rate": 0.0015, + "loss": 1.9335, + "step": 1288 + }, + { + "epoch": 0.1359704641350211, + "grad_norm": 0.9810734391212463, + "learning_rate": 0.0015, + "loss": 1.977, + "step": 1289 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.7405872344970703, + "learning_rate": 0.0015, + "loss": 1.9381, + "step": 1290 + }, + { + "epoch": 0.13618143459915613, + "grad_norm": 0.5617262721061707, + "learning_rate": 0.0015, + "loss": 1.9498, + "step": 1291 + }, + { + "epoch": 0.13628691983122362, + "grad_norm": 0.6589044332504272, + "learning_rate": 0.0015, + "loss": 1.9188, + "step": 1292 + }, + { + "epoch": 0.13639240506329114, + "grad_norm": 0.62932288646698, + "learning_rate": 0.0015, + "loss": 1.9082, + "step": 1293 + }, + { + "epoch": 0.13649789029535864, + "grad_norm": 0.6271987557411194, + "learning_rate": 0.0015, + "loss": 1.9473, + "step": 1294 + }, + { + "epoch": 0.13660337552742616, + "grad_norm": 0.567391574382782, + "learning_rate": 0.0015, + "loss": 1.8498, + "step": 1295 + }, + { + "epoch": 0.13670886075949368, + "grad_norm": 0.5319356918334961, + "learning_rate": 0.0015, + "loss": 1.9092, + "step": 1296 + }, + { + "epoch": 0.13681434599156117, + "grad_norm": 0.6010887622833252, + "learning_rate": 0.0015, + "loss": 1.9257, + "step": 1297 + }, + { + "epoch": 0.1369198312236287, + "grad_norm": 0.4996672570705414, + "learning_rate": 0.0015, + "loss": 1.865, + "step": 1298 + }, + { + "epoch": 0.1370253164556962, + "grad_norm": 0.6371192932128906, + "learning_rate": 0.0015, + "loss": 1.9058, + "step": 1299 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.6034473776817322, + "learning_rate": 0.0015, + "loss": 1.9081, + "step": 1300 + }, + { + "epoch": 0.13723628691983122, + "grad_norm": 0.5497844815254211, + "learning_rate": 0.0015, + "loss": 1.9084, + "step": 1301 + }, + { + "epoch": 0.13734177215189874, + "grad_norm": 0.4697115123271942, + "learning_rate": 0.0015, + "loss": 1.9346, + "step": 1302 + }, + { + "epoch": 0.13744725738396624, + "grad_norm": 0.5742257237434387, + "learning_rate": 0.0015, + "loss": 1.91, + "step": 1303 + }, + { + "epoch": 0.13755274261603376, + "grad_norm": 0.5243496298789978, + "learning_rate": 0.0015, + "loss": 1.9071, + "step": 1304 + }, + { + "epoch": 0.13765822784810128, + "grad_norm": 0.5664542317390442, + "learning_rate": 0.0015, + "loss": 1.9405, + "step": 1305 + }, + { + "epoch": 0.13776371308016877, + "grad_norm": 0.5212581753730774, + "learning_rate": 0.0015, + "loss": 1.9191, + "step": 1306 + }, + { + "epoch": 0.1378691983122363, + "grad_norm": 0.540576696395874, + "learning_rate": 0.0015, + "loss": 1.8893, + "step": 1307 + }, + { + "epoch": 0.1379746835443038, + "grad_norm": 0.511799156665802, + "learning_rate": 0.0015, + "loss": 1.9379, + "step": 1308 + }, + { + "epoch": 0.1380801687763713, + "grad_norm": 0.5222600698471069, + "learning_rate": 0.0015, + "loss": 1.8976, + "step": 1309 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.5310981273651123, + "learning_rate": 0.0015, + "loss": 1.8895, + "step": 1310 + }, + { + "epoch": 0.13829113924050632, + "grad_norm": 0.5159386396408081, + "learning_rate": 0.0015, + "loss": 1.9094, + "step": 1311 + }, + { + "epoch": 0.13839662447257384, + "grad_norm": 0.5052205324172974, + "learning_rate": 0.0015, + "loss": 1.9066, + "step": 1312 + }, + { + "epoch": 0.13850210970464136, + "grad_norm": 0.598240852355957, + "learning_rate": 0.0015, + "loss": 1.9448, + "step": 1313 + }, + { + "epoch": 0.13860759493670885, + "grad_norm": 0.5641024112701416, + "learning_rate": 0.0015, + "loss": 1.9506, + "step": 1314 + }, + { + "epoch": 0.13871308016877637, + "grad_norm": 0.5119905471801758, + "learning_rate": 0.0015, + "loss": 1.9432, + "step": 1315 + }, + { + "epoch": 0.1388185654008439, + "grad_norm": 0.5300618410110474, + "learning_rate": 0.0015, + "loss": 1.8719, + "step": 1316 + }, + { + "epoch": 0.13892405063291138, + "grad_norm": 0.5458492636680603, + "learning_rate": 0.0015, + "loss": 1.9017, + "step": 1317 + }, + { + "epoch": 0.1390295358649789, + "grad_norm": 0.696159839630127, + "learning_rate": 0.0015, + "loss": 1.9102, + "step": 1318 + }, + { + "epoch": 0.13913502109704642, + "grad_norm": 0.6056944131851196, + "learning_rate": 0.0015, + "loss": 1.9299, + "step": 1319 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.5543187260627747, + "learning_rate": 0.0015, + "loss": 1.9144, + "step": 1320 + }, + { + "epoch": 0.13934599156118144, + "grad_norm": 0.6323696374893188, + "learning_rate": 0.0015, + "loss": 1.8901, + "step": 1321 + }, + { + "epoch": 0.13945147679324896, + "grad_norm": 0.7577414512634277, + "learning_rate": 0.0015, + "loss": 1.9112, + "step": 1322 + }, + { + "epoch": 0.13955696202531645, + "grad_norm": 0.6322816014289856, + "learning_rate": 0.0015, + "loss": 1.9211, + "step": 1323 + }, + { + "epoch": 0.13966244725738397, + "grad_norm": 0.558798611164093, + "learning_rate": 0.0015, + "loss": 1.9221, + "step": 1324 + }, + { + "epoch": 0.13976793248945146, + "grad_norm": 0.561659038066864, + "learning_rate": 0.0015, + "loss": 1.8909, + "step": 1325 + }, + { + "epoch": 0.13987341772151898, + "grad_norm": 0.637718141078949, + "learning_rate": 0.0015, + "loss": 1.9256, + "step": 1326 + }, + { + "epoch": 0.1399789029535865, + "grad_norm": 0.6083945631980896, + "learning_rate": 0.0015, + "loss": 1.9133, + "step": 1327 + }, + { + "epoch": 0.140084388185654, + "grad_norm": 0.5388321280479431, + "learning_rate": 0.0015, + "loss": 1.8933, + "step": 1328 + }, + { + "epoch": 0.14018987341772152, + "grad_norm": 0.8044694662094116, + "learning_rate": 0.0015, + "loss": 1.9152, + "step": 1329 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.0448036193847656, + "learning_rate": 0.0015, + "loss": 1.9463, + "step": 1330 + }, + { + "epoch": 0.14040084388185653, + "grad_norm": 0.723845362663269, + "learning_rate": 0.0015, + "loss": 1.9355, + "step": 1331 + }, + { + "epoch": 0.14050632911392405, + "grad_norm": 0.4954807162284851, + "learning_rate": 0.0015, + "loss": 1.9053, + "step": 1332 + }, + { + "epoch": 0.14061181434599157, + "grad_norm": 0.7370380163192749, + "learning_rate": 0.0015, + "loss": 1.8835, + "step": 1333 + }, + { + "epoch": 0.14071729957805906, + "grad_norm": 0.5626806616783142, + "learning_rate": 0.0015, + "loss": 1.8999, + "step": 1334 + }, + { + "epoch": 0.14082278481012658, + "grad_norm": 0.642727792263031, + "learning_rate": 0.0015, + "loss": 1.8812, + "step": 1335 + }, + { + "epoch": 0.1409282700421941, + "grad_norm": 0.7532495856285095, + "learning_rate": 0.0015, + "loss": 1.9145, + "step": 1336 + }, + { + "epoch": 0.1410337552742616, + "grad_norm": 0.520085871219635, + "learning_rate": 0.0015, + "loss": 1.9333, + "step": 1337 + }, + { + "epoch": 0.14113924050632912, + "grad_norm": 0.6775652766227722, + "learning_rate": 0.0015, + "loss": 1.9143, + "step": 1338 + }, + { + "epoch": 0.14124472573839664, + "grad_norm": 0.5690031051635742, + "learning_rate": 0.0015, + "loss": 1.95, + "step": 1339 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.5454837679862976, + "learning_rate": 0.0015, + "loss": 1.8997, + "step": 1340 + }, + { + "epoch": 0.14145569620253165, + "grad_norm": 0.49931493401527405, + "learning_rate": 0.0015, + "loss": 1.9232, + "step": 1341 + }, + { + "epoch": 0.14156118143459914, + "grad_norm": 0.49317747354507446, + "learning_rate": 0.0015, + "loss": 1.9455, + "step": 1342 + }, + { + "epoch": 0.14166666666666666, + "grad_norm": 0.5960569381713867, + "learning_rate": 0.0015, + "loss": 1.8954, + "step": 1343 + }, + { + "epoch": 0.14177215189873418, + "grad_norm": 0.46698129177093506, + "learning_rate": 0.0015, + "loss": 1.9203, + "step": 1344 + }, + { + "epoch": 0.14187763713080168, + "grad_norm": 0.5159361362457275, + "learning_rate": 0.0015, + "loss": 1.9051, + "step": 1345 + }, + { + "epoch": 0.1419831223628692, + "grad_norm": 0.5780317783355713, + "learning_rate": 0.0015, + "loss": 1.8936, + "step": 1346 + }, + { + "epoch": 0.14208860759493672, + "grad_norm": 0.5957590341567993, + "learning_rate": 0.0015, + "loss": 1.9178, + "step": 1347 + }, + { + "epoch": 0.1421940928270042, + "grad_norm": 0.5916475653648376, + "learning_rate": 0.0015, + "loss": 1.9047, + "step": 1348 + }, + { + "epoch": 0.14229957805907173, + "grad_norm": 0.5349442958831787, + "learning_rate": 0.0015, + "loss": 1.9249, + "step": 1349 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.5401943325996399, + "learning_rate": 0.0015, + "loss": 1.9202, + "step": 1350 + }, + { + "epoch": 0.14251054852320674, + "grad_norm": 0.6015076041221619, + "learning_rate": 0.0015, + "loss": 1.9001, + "step": 1351 + }, + { + "epoch": 0.14261603375527426, + "grad_norm": 0.5438253879547119, + "learning_rate": 0.0015, + "loss": 1.8802, + "step": 1352 + }, + { + "epoch": 0.14272151898734178, + "grad_norm": 0.5660523176193237, + "learning_rate": 0.0015, + "loss": 1.9255, + "step": 1353 + }, + { + "epoch": 0.14282700421940928, + "grad_norm": 0.530584454536438, + "learning_rate": 0.0015, + "loss": 1.925, + "step": 1354 + }, + { + "epoch": 0.1429324894514768, + "grad_norm": 0.5304757952690125, + "learning_rate": 0.0015, + "loss": 1.8891, + "step": 1355 + }, + { + "epoch": 0.14303797468354432, + "grad_norm": 0.5094669461250305, + "learning_rate": 0.0015, + "loss": 1.886, + "step": 1356 + }, + { + "epoch": 0.1431434599156118, + "grad_norm": 0.5024034976959229, + "learning_rate": 0.0015, + "loss": 1.9161, + "step": 1357 + }, + { + "epoch": 0.14324894514767933, + "grad_norm": 0.4967750608921051, + "learning_rate": 0.0015, + "loss": 1.9029, + "step": 1358 + }, + { + "epoch": 0.14335443037974682, + "grad_norm": 0.5492123365402222, + "learning_rate": 0.0015, + "loss": 1.8813, + "step": 1359 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.554287314414978, + "learning_rate": 0.0015, + "loss": 1.9306, + "step": 1360 + }, + { + "epoch": 0.14356540084388186, + "grad_norm": 0.5291145443916321, + "learning_rate": 0.0015, + "loss": 1.8936, + "step": 1361 + }, + { + "epoch": 0.14367088607594936, + "grad_norm": 0.49103015661239624, + "learning_rate": 0.0015, + "loss": 1.9026, + "step": 1362 + }, + { + "epoch": 0.14377637130801688, + "grad_norm": 0.44740259647369385, + "learning_rate": 0.0015, + "loss": 1.9061, + "step": 1363 + }, + { + "epoch": 0.1438818565400844, + "grad_norm": 0.536920428276062, + "learning_rate": 0.0015, + "loss": 1.9036, + "step": 1364 + }, + { + "epoch": 0.1439873417721519, + "grad_norm": 0.5714421272277832, + "learning_rate": 0.0015, + "loss": 1.8859, + "step": 1365 + }, + { + "epoch": 0.1440928270042194, + "grad_norm": 0.6274572014808655, + "learning_rate": 0.0015, + "loss": 1.8955, + "step": 1366 + }, + { + "epoch": 0.14419831223628693, + "grad_norm": 0.6626944541931152, + "learning_rate": 0.0015, + "loss": 1.9207, + "step": 1367 + }, + { + "epoch": 0.14430379746835442, + "grad_norm": 0.6287270188331604, + "learning_rate": 0.0015, + "loss": 1.8994, + "step": 1368 + }, + { + "epoch": 0.14440928270042194, + "grad_norm": 0.5640893578529358, + "learning_rate": 0.0015, + "loss": 1.9263, + "step": 1369 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.5738046169281006, + "learning_rate": 0.0015, + "loss": 1.8986, + "step": 1370 + }, + { + "epoch": 0.14462025316455696, + "grad_norm": 0.5989475846290588, + "learning_rate": 0.0015, + "loss": 1.8974, + "step": 1371 + }, + { + "epoch": 0.14472573839662448, + "grad_norm": 0.700721263885498, + "learning_rate": 0.0015, + "loss": 1.8893, + "step": 1372 + }, + { + "epoch": 0.144831223628692, + "grad_norm": 0.6120259761810303, + "learning_rate": 0.0015, + "loss": 1.9043, + "step": 1373 + }, + { + "epoch": 0.1449367088607595, + "grad_norm": 0.496288925409317, + "learning_rate": 0.0015, + "loss": 1.886, + "step": 1374 + }, + { + "epoch": 0.145042194092827, + "grad_norm": 0.5448711514472961, + "learning_rate": 0.0015, + "loss": 1.9054, + "step": 1375 + }, + { + "epoch": 0.1451476793248945, + "grad_norm": 0.6740733981132507, + "learning_rate": 0.0015, + "loss": 1.8896, + "step": 1376 + }, + { + "epoch": 0.14525316455696202, + "grad_norm": 0.506820797920227, + "learning_rate": 0.0015, + "loss": 1.8615, + "step": 1377 + }, + { + "epoch": 0.14535864978902954, + "grad_norm": 0.6523921489715576, + "learning_rate": 0.0015, + "loss": 1.8576, + "step": 1378 + }, + { + "epoch": 0.14546413502109704, + "grad_norm": 0.8767607808113098, + "learning_rate": 0.0015, + "loss": 1.9352, + "step": 1379 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.7612723112106323, + "learning_rate": 0.0015, + "loss": 1.8807, + "step": 1380 + }, + { + "epoch": 0.14567510548523208, + "grad_norm": 0.5680716037750244, + "learning_rate": 0.0015, + "loss": 1.9319, + "step": 1381 + }, + { + "epoch": 0.14578059071729957, + "grad_norm": 0.6475235223770142, + "learning_rate": 0.0015, + "loss": 1.9018, + "step": 1382 + }, + { + "epoch": 0.1458860759493671, + "grad_norm": 0.9306323528289795, + "learning_rate": 0.0015, + "loss": 1.8967, + "step": 1383 + }, + { + "epoch": 0.1459915611814346, + "grad_norm": 0.5690900683403015, + "learning_rate": 0.0015, + "loss": 1.88, + "step": 1384 + }, + { + "epoch": 0.1460970464135021, + "grad_norm": 0.5900740027427673, + "learning_rate": 0.0015, + "loss": 1.9248, + "step": 1385 + }, + { + "epoch": 0.14620253164556962, + "grad_norm": 0.6930634379386902, + "learning_rate": 0.0015, + "loss": 1.9081, + "step": 1386 + }, + { + "epoch": 0.14630801687763714, + "grad_norm": 0.6711006760597229, + "learning_rate": 0.0015, + "loss": 1.9043, + "step": 1387 + }, + { + "epoch": 0.14641350210970464, + "grad_norm": 0.5288648009300232, + "learning_rate": 0.0015, + "loss": 1.894, + "step": 1388 + }, + { + "epoch": 0.14651898734177216, + "grad_norm": 0.5598739981651306, + "learning_rate": 0.0015, + "loss": 1.8999, + "step": 1389 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.7916274070739746, + "learning_rate": 0.0015, + "loss": 1.9217, + "step": 1390 + }, + { + "epoch": 0.14672995780590717, + "grad_norm": 0.775980532169342, + "learning_rate": 0.0015, + "loss": 1.9335, + "step": 1391 + }, + { + "epoch": 0.1468354430379747, + "grad_norm": 0.5397216081619263, + "learning_rate": 0.0015, + "loss": 1.9054, + "step": 1392 + }, + { + "epoch": 0.14694092827004218, + "grad_norm": 0.544588029384613, + "learning_rate": 0.0015, + "loss": 1.8826, + "step": 1393 + }, + { + "epoch": 0.1470464135021097, + "grad_norm": 0.7192983031272888, + "learning_rate": 0.0015, + "loss": 1.8856, + "step": 1394 + }, + { + "epoch": 0.14715189873417722, + "grad_norm": 0.7209638953208923, + "learning_rate": 0.0015, + "loss": 1.9181, + "step": 1395 + }, + { + "epoch": 0.14725738396624471, + "grad_norm": 0.4494272470474243, + "learning_rate": 0.0015, + "loss": 1.8951, + "step": 1396 + }, + { + "epoch": 0.14736286919831224, + "grad_norm": 0.5227426290512085, + "learning_rate": 0.0015, + "loss": 1.8495, + "step": 1397 + }, + { + "epoch": 0.14746835443037976, + "grad_norm": 0.4833694398403168, + "learning_rate": 0.0015, + "loss": 1.8527, + "step": 1398 + }, + { + "epoch": 0.14757383966244725, + "grad_norm": 0.5250594615936279, + "learning_rate": 0.0015, + "loss": 1.8901, + "step": 1399 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.5358574986457825, + "learning_rate": 0.0015, + "loss": 1.855, + "step": 1400 + }, + { + "epoch": 0.1477848101265823, + "grad_norm": 0.6389911770820618, + "learning_rate": 0.0015, + "loss": 1.9013, + "step": 1401 + }, + { + "epoch": 0.14789029535864978, + "grad_norm": 0.6424770355224609, + "learning_rate": 0.0015, + "loss": 1.8813, + "step": 1402 + }, + { + "epoch": 0.1479957805907173, + "grad_norm": 0.5069495439529419, + "learning_rate": 0.0015, + "loss": 1.9264, + "step": 1403 + }, + { + "epoch": 0.14810126582278482, + "grad_norm": 0.5170621871948242, + "learning_rate": 0.0015, + "loss": 1.8949, + "step": 1404 + }, + { + "epoch": 0.14820675105485231, + "grad_norm": 0.5447293519973755, + "learning_rate": 0.0015, + "loss": 1.8721, + "step": 1405 + }, + { + "epoch": 0.14831223628691984, + "grad_norm": 0.5404070019721985, + "learning_rate": 0.0015, + "loss": 1.8762, + "step": 1406 + }, + { + "epoch": 0.14841772151898736, + "grad_norm": 0.5344438552856445, + "learning_rate": 0.0015, + "loss": 1.8184, + "step": 1407 + }, + { + "epoch": 0.14852320675105485, + "grad_norm": 0.542682409286499, + "learning_rate": 0.0015, + "loss": 1.9025, + "step": 1408 + }, + { + "epoch": 0.14862869198312237, + "grad_norm": 0.5511244535446167, + "learning_rate": 0.0015, + "loss": 1.9206, + "step": 1409 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.5384794473648071, + "learning_rate": 0.0015, + "loss": 1.9052, + "step": 1410 + }, + { + "epoch": 0.14883966244725738, + "grad_norm": 0.5632681250572205, + "learning_rate": 0.0015, + "loss": 1.9285, + "step": 1411 + }, + { + "epoch": 0.1489451476793249, + "grad_norm": 0.6840286254882812, + "learning_rate": 0.0015, + "loss": 1.8769, + "step": 1412 + }, + { + "epoch": 0.1490506329113924, + "grad_norm": 0.5219169855117798, + "learning_rate": 0.0015, + "loss": 1.8857, + "step": 1413 + }, + { + "epoch": 0.14915611814345991, + "grad_norm": 0.6446149945259094, + "learning_rate": 0.0015, + "loss": 1.9076, + "step": 1414 + }, + { + "epoch": 0.14926160337552744, + "grad_norm": 0.8110989928245544, + "learning_rate": 0.0015, + "loss": 1.8654, + "step": 1415 + }, + { + "epoch": 0.14936708860759493, + "grad_norm": 0.7284207344055176, + "learning_rate": 0.0015, + "loss": 1.9381, + "step": 1416 + }, + { + "epoch": 0.14947257383966245, + "grad_norm": 0.49806055426597595, + "learning_rate": 0.0015, + "loss": 1.8676, + "step": 1417 + }, + { + "epoch": 0.14957805907172997, + "grad_norm": 0.5483485460281372, + "learning_rate": 0.0015, + "loss": 1.9014, + "step": 1418 + }, + { + "epoch": 0.14968354430379746, + "grad_norm": 0.6617444753646851, + "learning_rate": 0.0015, + "loss": 1.8951, + "step": 1419 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.5816461443901062, + "learning_rate": 0.0015, + "loss": 1.9154, + "step": 1420 + }, + { + "epoch": 0.1498945147679325, + "grad_norm": 0.6530291438102722, + "learning_rate": 0.0015, + "loss": 1.8919, + "step": 1421 + }, + { + "epoch": 0.15, + "grad_norm": 0.7094903588294983, + "learning_rate": 0.0015, + "loss": 1.8866, + "step": 1422 + }, + { + "epoch": 0.15010548523206751, + "grad_norm": 0.5501838326454163, + "learning_rate": 0.0015, + "loss": 1.8919, + "step": 1423 + }, + { + "epoch": 0.150210970464135, + "grad_norm": 0.47818896174430847, + "learning_rate": 0.0015, + "loss": 1.8814, + "step": 1424 + }, + { + "epoch": 0.15031645569620253, + "grad_norm": 0.47611212730407715, + "learning_rate": 0.0015, + "loss": 1.8664, + "step": 1425 + }, + { + "epoch": 0.15042194092827005, + "grad_norm": 0.4526898264884949, + "learning_rate": 0.0015, + "loss": 1.9122, + "step": 1426 + }, + { + "epoch": 0.15052742616033754, + "grad_norm": 0.5256379246711731, + "learning_rate": 0.0015, + "loss": 1.9339, + "step": 1427 + }, + { + "epoch": 0.15063291139240506, + "grad_norm": 0.5785458087921143, + "learning_rate": 0.0015, + "loss": 1.8744, + "step": 1428 + }, + { + "epoch": 0.15073839662447258, + "grad_norm": 0.6067950129508972, + "learning_rate": 0.0015, + "loss": 1.8688, + "step": 1429 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.6320534348487854, + "learning_rate": 0.0015, + "loss": 1.8747, + "step": 1430 + }, + { + "epoch": 0.1509493670886076, + "grad_norm": 0.49489179253578186, + "learning_rate": 0.0015, + "loss": 1.8906, + "step": 1431 + }, + { + "epoch": 0.15105485232067511, + "grad_norm": 0.6265627145767212, + "learning_rate": 0.0015, + "loss": 1.8953, + "step": 1432 + }, + { + "epoch": 0.1511603375527426, + "grad_norm": 0.6951988339424133, + "learning_rate": 0.0015, + "loss": 1.8505, + "step": 1433 + }, + { + "epoch": 0.15126582278481013, + "grad_norm": 0.5689625144004822, + "learning_rate": 0.0015, + "loss": 1.8699, + "step": 1434 + }, + { + "epoch": 0.15137130801687765, + "grad_norm": 0.5780248641967773, + "learning_rate": 0.0015, + "loss": 1.9143, + "step": 1435 + }, + { + "epoch": 0.15147679324894514, + "grad_norm": 0.7600285410881042, + "learning_rate": 0.0015, + "loss": 1.8772, + "step": 1436 + }, + { + "epoch": 0.15158227848101266, + "grad_norm": 0.6233757138252258, + "learning_rate": 0.0015, + "loss": 1.9013, + "step": 1437 + }, + { + "epoch": 0.15168776371308018, + "grad_norm": 0.5610736012458801, + "learning_rate": 0.0015, + "loss": 1.8579, + "step": 1438 + }, + { + "epoch": 0.15179324894514767, + "grad_norm": 0.6786007285118103, + "learning_rate": 0.0015, + "loss": 1.9008, + "step": 1439 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.7043277025222778, + "learning_rate": 0.0015, + "loss": 1.8911, + "step": 1440 + }, + { + "epoch": 0.1520042194092827, + "grad_norm": 0.5467262864112854, + "learning_rate": 0.0015, + "loss": 1.8553, + "step": 1441 + }, + { + "epoch": 0.1521097046413502, + "grad_norm": 0.5686880946159363, + "learning_rate": 0.0015, + "loss": 1.8951, + "step": 1442 + }, + { + "epoch": 0.15221518987341773, + "grad_norm": 0.6156380772590637, + "learning_rate": 0.0015, + "loss": 1.8887, + "step": 1443 + }, + { + "epoch": 0.15232067510548522, + "grad_norm": 0.5663610696792603, + "learning_rate": 0.0015, + "loss": 1.896, + "step": 1444 + }, + { + "epoch": 0.15242616033755274, + "grad_norm": 0.4784590005874634, + "learning_rate": 0.0015, + "loss": 1.8684, + "step": 1445 + }, + { + "epoch": 0.15253164556962026, + "grad_norm": 0.5938230156898499, + "learning_rate": 0.0015, + "loss": 1.8793, + "step": 1446 + }, + { + "epoch": 0.15263713080168775, + "grad_norm": 0.49647626280784607, + "learning_rate": 0.0015, + "loss": 1.8941, + "step": 1447 + }, + { + "epoch": 0.15274261603375527, + "grad_norm": 0.4847690463066101, + "learning_rate": 0.0015, + "loss": 1.8821, + "step": 1448 + }, + { + "epoch": 0.1528481012658228, + "grad_norm": 0.4634542465209961, + "learning_rate": 0.0015, + "loss": 1.9033, + "step": 1449 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.4846337139606476, + "learning_rate": 0.0015, + "loss": 1.9016, + "step": 1450 + }, + { + "epoch": 0.1530590717299578, + "grad_norm": 0.46131351590156555, + "learning_rate": 0.0015, + "loss": 1.8913, + "step": 1451 + }, + { + "epoch": 0.15316455696202533, + "grad_norm": 0.4795179069042206, + "learning_rate": 0.0015, + "loss": 1.8861, + "step": 1452 + }, + { + "epoch": 0.15327004219409282, + "grad_norm": 0.5590512156486511, + "learning_rate": 0.0015, + "loss": 1.861, + "step": 1453 + }, + { + "epoch": 0.15337552742616034, + "grad_norm": 0.6019673943519592, + "learning_rate": 0.0015, + "loss": 1.8494, + "step": 1454 + }, + { + "epoch": 0.15348101265822786, + "grad_norm": 0.5595062375068665, + "learning_rate": 0.0015, + "loss": 1.8629, + "step": 1455 + }, + { + "epoch": 0.15358649789029535, + "grad_norm": 0.5171561241149902, + "learning_rate": 0.0015, + "loss": 1.9071, + "step": 1456 + }, + { + "epoch": 0.15369198312236287, + "grad_norm": 0.6185160279273987, + "learning_rate": 0.0015, + "loss": 1.8534, + "step": 1457 + }, + { + "epoch": 0.15379746835443037, + "grad_norm": 0.6364607214927673, + "learning_rate": 0.0015, + "loss": 1.8662, + "step": 1458 + }, + { + "epoch": 0.1539029535864979, + "grad_norm": 0.4733855426311493, + "learning_rate": 0.0015, + "loss": 1.8811, + "step": 1459 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.5263296365737915, + "learning_rate": 0.0015, + "loss": 1.9313, + "step": 1460 + }, + { + "epoch": 0.1541139240506329, + "grad_norm": 0.5156674981117249, + "learning_rate": 0.0015, + "loss": 1.8937, + "step": 1461 + }, + { + "epoch": 0.15421940928270042, + "grad_norm": 0.4853241741657257, + "learning_rate": 0.0015, + "loss": 1.8315, + "step": 1462 + }, + { + "epoch": 0.15432489451476794, + "grad_norm": 0.49924007058143616, + "learning_rate": 0.0015, + "loss": 1.8542, + "step": 1463 + }, + { + "epoch": 0.15443037974683543, + "grad_norm": 0.5335673093795776, + "learning_rate": 0.0015, + "loss": 1.8749, + "step": 1464 + }, + { + "epoch": 0.15453586497890295, + "grad_norm": 0.5205351114273071, + "learning_rate": 0.0015, + "loss": 1.8589, + "step": 1465 + }, + { + "epoch": 0.15464135021097047, + "grad_norm": 0.5043001770973206, + "learning_rate": 0.0015, + "loss": 1.8689, + "step": 1466 + }, + { + "epoch": 0.15474683544303797, + "grad_norm": 0.5932759642601013, + "learning_rate": 0.0015, + "loss": 1.9155, + "step": 1467 + }, + { + "epoch": 0.1548523206751055, + "grad_norm": 0.6116816401481628, + "learning_rate": 0.0015, + "loss": 1.8978, + "step": 1468 + }, + { + "epoch": 0.154957805907173, + "grad_norm": 0.43347838521003723, + "learning_rate": 0.0015, + "loss": 1.9024, + "step": 1469 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.6800468564033508, + "learning_rate": 0.0015, + "loss": 1.8802, + "step": 1470 + }, + { + "epoch": 0.15516877637130802, + "grad_norm": 0.7267336845397949, + "learning_rate": 0.0015, + "loss": 1.9061, + "step": 1471 + }, + { + "epoch": 0.15527426160337554, + "grad_norm": 0.5035741925239563, + "learning_rate": 0.0015, + "loss": 1.9023, + "step": 1472 + }, + { + "epoch": 0.15537974683544303, + "grad_norm": 0.55813068151474, + "learning_rate": 0.0015, + "loss": 1.8653, + "step": 1473 + }, + { + "epoch": 0.15548523206751055, + "grad_norm": 0.6493954658508301, + "learning_rate": 0.0015, + "loss": 1.8819, + "step": 1474 + }, + { + "epoch": 0.15559071729957805, + "grad_norm": 0.5621296763420105, + "learning_rate": 0.0015, + "loss": 1.8897, + "step": 1475 + }, + { + "epoch": 0.15569620253164557, + "grad_norm": 0.5570229887962341, + "learning_rate": 0.0015, + "loss": 1.88, + "step": 1476 + }, + { + "epoch": 0.1558016877637131, + "grad_norm": 0.6718025207519531, + "learning_rate": 0.0015, + "loss": 1.8785, + "step": 1477 + }, + { + "epoch": 0.15590717299578058, + "grad_norm": 0.56767737865448, + "learning_rate": 0.0015, + "loss": 1.8757, + "step": 1478 + }, + { + "epoch": 0.1560126582278481, + "grad_norm": 0.6203089952468872, + "learning_rate": 0.0015, + "loss": 1.9055, + "step": 1479 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.6166823506355286, + "learning_rate": 0.0015, + "loss": 1.8435, + "step": 1480 + }, + { + "epoch": 0.1562236286919831, + "grad_norm": 0.5030540823936462, + "learning_rate": 0.0015, + "loss": 1.895, + "step": 1481 + }, + { + "epoch": 0.15632911392405063, + "grad_norm": 0.6684638261795044, + "learning_rate": 0.0015, + "loss": 1.8784, + "step": 1482 + }, + { + "epoch": 0.15643459915611815, + "grad_norm": 0.6513040661811829, + "learning_rate": 0.0015, + "loss": 1.8804, + "step": 1483 + }, + { + "epoch": 0.15654008438818565, + "grad_norm": 0.5312937498092651, + "learning_rate": 0.0015, + "loss": 1.9073, + "step": 1484 + }, + { + "epoch": 0.15664556962025317, + "grad_norm": 0.5202449560165405, + "learning_rate": 0.0015, + "loss": 1.8449, + "step": 1485 + }, + { + "epoch": 0.1567510548523207, + "grad_norm": 0.47056058049201965, + "learning_rate": 0.0015, + "loss": 1.9037, + "step": 1486 + }, + { + "epoch": 0.15685654008438818, + "grad_norm": 0.49510762095451355, + "learning_rate": 0.0015, + "loss": 1.8363, + "step": 1487 + }, + { + "epoch": 0.1569620253164557, + "grad_norm": 0.4986557960510254, + "learning_rate": 0.0015, + "loss": 1.8992, + "step": 1488 + }, + { + "epoch": 0.15706751054852322, + "grad_norm": 0.4750652015209198, + "learning_rate": 0.0015, + "loss": 1.8632, + "step": 1489 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.5059555172920227, + "learning_rate": 0.0015, + "loss": 1.8714, + "step": 1490 + }, + { + "epoch": 0.15727848101265823, + "grad_norm": 0.4990314245223999, + "learning_rate": 0.0015, + "loss": 1.8459, + "step": 1491 + }, + { + "epoch": 0.15738396624472573, + "grad_norm": 0.49329227209091187, + "learning_rate": 0.0015, + "loss": 1.8777, + "step": 1492 + }, + { + "epoch": 0.15748945147679325, + "grad_norm": 0.5516387224197388, + "learning_rate": 0.0015, + "loss": 1.8695, + "step": 1493 + }, + { + "epoch": 0.15759493670886077, + "grad_norm": 0.6168501973152161, + "learning_rate": 0.0015, + "loss": 1.8702, + "step": 1494 + }, + { + "epoch": 0.15770042194092826, + "grad_norm": 0.5879580974578857, + "learning_rate": 0.0015, + "loss": 1.8553, + "step": 1495 + }, + { + "epoch": 0.15780590717299578, + "grad_norm": 0.5255633592605591, + "learning_rate": 0.0015, + "loss": 1.9239, + "step": 1496 + }, + { + "epoch": 0.1579113924050633, + "grad_norm": 0.49261853098869324, + "learning_rate": 0.0015, + "loss": 1.8797, + "step": 1497 + }, + { + "epoch": 0.1580168776371308, + "grad_norm": 0.552263617515564, + "learning_rate": 0.0015, + "loss": 1.8586, + "step": 1498 + }, + { + "epoch": 0.1581223628691983, + "grad_norm": 0.574817955493927, + "learning_rate": 0.0015, + "loss": 1.9088, + "step": 1499 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.49150896072387695, + "learning_rate": 0.0015, + "loss": 1.8756, + "step": 1500 + }, + { + "epoch": 0.15833333333333333, + "grad_norm": 0.5402496457099915, + "learning_rate": 0.0015, + "loss": 1.8705, + "step": 1501 + }, + { + "epoch": 0.15843881856540085, + "grad_norm": 0.6506123542785645, + "learning_rate": 0.0015, + "loss": 1.866, + "step": 1502 + }, + { + "epoch": 0.15854430379746837, + "grad_norm": 0.7406247854232788, + "learning_rate": 0.0015, + "loss": 1.8821, + "step": 1503 + }, + { + "epoch": 0.15864978902953586, + "grad_norm": 0.6048151850700378, + "learning_rate": 0.0015, + "loss": 1.9199, + "step": 1504 + }, + { + "epoch": 0.15875527426160338, + "grad_norm": 0.5365409851074219, + "learning_rate": 0.0015, + "loss": 1.8691, + "step": 1505 + }, + { + "epoch": 0.15886075949367087, + "grad_norm": 0.8235931396484375, + "learning_rate": 0.0015, + "loss": 1.859, + "step": 1506 + }, + { + "epoch": 0.1589662447257384, + "grad_norm": 1.0617879629135132, + "learning_rate": 0.0015, + "loss": 1.9236, + "step": 1507 + }, + { + "epoch": 0.1590717299578059, + "grad_norm": 0.7661322355270386, + "learning_rate": 0.0015, + "loss": 1.8518, + "step": 1508 + }, + { + "epoch": 0.1591772151898734, + "grad_norm": 0.5323702096939087, + "learning_rate": 0.0015, + "loss": 1.8727, + "step": 1509 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.9609919786453247, + "learning_rate": 0.0015, + "loss": 1.8842, + "step": 1510 + }, + { + "epoch": 0.15938818565400845, + "grad_norm": 0.8012824654579163, + "learning_rate": 0.0015, + "loss": 1.8607, + "step": 1511 + }, + { + "epoch": 0.15949367088607594, + "grad_norm": 0.5442456603050232, + "learning_rate": 0.0015, + "loss": 1.8757, + "step": 1512 + }, + { + "epoch": 0.15959915611814346, + "grad_norm": 0.8813095092773438, + "learning_rate": 0.0015, + "loss": 1.9034, + "step": 1513 + }, + { + "epoch": 0.15970464135021098, + "grad_norm": 0.704605758190155, + "learning_rate": 0.0015, + "loss": 1.8746, + "step": 1514 + }, + { + "epoch": 0.15981012658227847, + "grad_norm": 0.6096099615097046, + "learning_rate": 0.0015, + "loss": 1.9057, + "step": 1515 + }, + { + "epoch": 0.159915611814346, + "grad_norm": 0.6169876456260681, + "learning_rate": 0.0015, + "loss": 1.8855, + "step": 1516 + }, + { + "epoch": 0.1600210970464135, + "grad_norm": 0.7339420914649963, + "learning_rate": 0.0015, + "loss": 1.8827, + "step": 1517 + }, + { + "epoch": 0.160126582278481, + "grad_norm": 0.718132495880127, + "learning_rate": 0.0015, + "loss": 1.8824, + "step": 1518 + }, + { + "epoch": 0.16023206751054853, + "grad_norm": 0.5238819122314453, + "learning_rate": 0.0015, + "loss": 1.8892, + "step": 1519 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.5656552314758301, + "learning_rate": 0.0015, + "loss": 1.8712, + "step": 1520 + }, + { + "epoch": 0.16044303797468354, + "grad_norm": 0.6397966146469116, + "learning_rate": 0.0015, + "loss": 1.8874, + "step": 1521 + }, + { + "epoch": 0.16054852320675106, + "grad_norm": 0.5664899349212646, + "learning_rate": 0.0015, + "loss": 1.8662, + "step": 1522 + }, + { + "epoch": 0.16065400843881855, + "grad_norm": 0.5070544481277466, + "learning_rate": 0.0015, + "loss": 1.8484, + "step": 1523 + }, + { + "epoch": 0.16075949367088607, + "grad_norm": 0.5437641143798828, + "learning_rate": 0.0015, + "loss": 1.8599, + "step": 1524 + }, + { + "epoch": 0.1608649789029536, + "grad_norm": 0.5785396695137024, + "learning_rate": 0.0015, + "loss": 1.8813, + "step": 1525 + }, + { + "epoch": 0.16097046413502109, + "grad_norm": 0.5251486301422119, + "learning_rate": 0.0015, + "loss": 1.8095, + "step": 1526 + }, + { + "epoch": 0.1610759493670886, + "grad_norm": 0.4845963716506958, + "learning_rate": 0.0015, + "loss": 1.9025, + "step": 1527 + }, + { + "epoch": 0.16118143459915613, + "grad_norm": 0.5619175434112549, + "learning_rate": 0.0015, + "loss": 1.8383, + "step": 1528 + }, + { + "epoch": 0.16128691983122362, + "grad_norm": 0.5256401896476746, + "learning_rate": 0.0015, + "loss": 1.9047, + "step": 1529 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.4932977259159088, + "learning_rate": 0.0015, + "loss": 1.841, + "step": 1530 + }, + { + "epoch": 0.16149789029535866, + "grad_norm": 0.6641926169395447, + "learning_rate": 0.0015, + "loss": 1.8212, + "step": 1531 + }, + { + "epoch": 0.16160337552742615, + "grad_norm": 0.561372697353363, + "learning_rate": 0.0015, + "loss": 1.8225, + "step": 1532 + }, + { + "epoch": 0.16170886075949367, + "grad_norm": 0.4468382000923157, + "learning_rate": 0.0015, + "loss": 1.8606, + "step": 1533 + }, + { + "epoch": 0.1618143459915612, + "grad_norm": 0.5442577600479126, + "learning_rate": 0.0015, + "loss": 1.8562, + "step": 1534 + }, + { + "epoch": 0.16191983122362869, + "grad_norm": 0.5323686003684998, + "learning_rate": 0.0015, + "loss": 1.8833, + "step": 1535 + }, + { + "epoch": 0.1620253164556962, + "grad_norm": 0.5348008871078491, + "learning_rate": 0.0015, + "loss": 1.8188, + "step": 1536 + }, + { + "epoch": 0.16213080168776373, + "grad_norm": 0.841894268989563, + "learning_rate": 0.0015, + "loss": 1.8405, + "step": 1537 + }, + { + "epoch": 0.16223628691983122, + "grad_norm": 0.7206017374992371, + "learning_rate": 0.0015, + "loss": 1.8518, + "step": 1538 + }, + { + "epoch": 0.16234177215189874, + "grad_norm": 0.5118170380592346, + "learning_rate": 0.0015, + "loss": 1.8518, + "step": 1539 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6494106650352478, + "learning_rate": 0.0015, + "loss": 1.8818, + "step": 1540 + }, + { + "epoch": 0.16255274261603375, + "grad_norm": 0.7285601496696472, + "learning_rate": 0.0015, + "loss": 1.8676, + "step": 1541 + }, + { + "epoch": 0.16265822784810127, + "grad_norm": 0.45697057247161865, + "learning_rate": 0.0015, + "loss": 1.8189, + "step": 1542 + }, + { + "epoch": 0.16276371308016876, + "grad_norm": 0.6642175316810608, + "learning_rate": 0.0015, + "loss": 1.8592, + "step": 1543 + }, + { + "epoch": 0.16286919831223629, + "grad_norm": 0.7059324979782104, + "learning_rate": 0.0015, + "loss": 1.8697, + "step": 1544 + }, + { + "epoch": 0.1629746835443038, + "grad_norm": 0.5232691168785095, + "learning_rate": 0.0015, + "loss": 1.8293, + "step": 1545 + }, + { + "epoch": 0.1630801687763713, + "grad_norm": 0.5357704758644104, + "learning_rate": 0.0015, + "loss": 1.8315, + "step": 1546 + }, + { + "epoch": 0.16318565400843882, + "grad_norm": 0.5508078932762146, + "learning_rate": 0.0015, + "loss": 1.855, + "step": 1547 + }, + { + "epoch": 0.16329113924050634, + "grad_norm": 0.582895815372467, + "learning_rate": 0.0015, + "loss": 1.8947, + "step": 1548 + }, + { + "epoch": 0.16339662447257383, + "grad_norm": 0.5417135953903198, + "learning_rate": 0.0015, + "loss": 1.8855, + "step": 1549 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.5199078321456909, + "learning_rate": 0.0015, + "loss": 1.8524, + "step": 1550 + }, + { + "epoch": 0.16360759493670887, + "grad_norm": 0.5891591310501099, + "learning_rate": 0.0015, + "loss": 1.8633, + "step": 1551 + }, + { + "epoch": 0.16371308016877636, + "grad_norm": 0.5260766744613647, + "learning_rate": 0.0015, + "loss": 1.8756, + "step": 1552 + }, + { + "epoch": 0.16381856540084389, + "grad_norm": 0.532564640045166, + "learning_rate": 0.0015, + "loss": 1.9029, + "step": 1553 + }, + { + "epoch": 0.1639240506329114, + "grad_norm": 0.5645464062690735, + "learning_rate": 0.0015, + "loss": 1.8243, + "step": 1554 + }, + { + "epoch": 0.1640295358649789, + "grad_norm": 0.6165211796760559, + "learning_rate": 0.0015, + "loss": 1.8533, + "step": 1555 + }, + { + "epoch": 0.16413502109704642, + "grad_norm": 0.592799186706543, + "learning_rate": 0.0015, + "loss": 1.8479, + "step": 1556 + }, + { + "epoch": 0.1642405063291139, + "grad_norm": 0.7791257500648499, + "learning_rate": 0.0015, + "loss": 1.8865, + "step": 1557 + }, + { + "epoch": 0.16434599156118143, + "grad_norm": 0.9933264851570129, + "learning_rate": 0.0015, + "loss": 1.8491, + "step": 1558 + }, + { + "epoch": 0.16445147679324895, + "grad_norm": 0.8477607369422913, + "learning_rate": 0.0015, + "loss": 1.828, + "step": 1559 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.6159766912460327, + "learning_rate": 0.0015, + "loss": 1.8583, + "step": 1560 + }, + { + "epoch": 0.16466244725738396, + "grad_norm": 0.8783729076385498, + "learning_rate": 0.0015, + "loss": 1.8076, + "step": 1561 + }, + { + "epoch": 0.16476793248945149, + "grad_norm": 0.8979581594467163, + "learning_rate": 0.0015, + "loss": 1.8854, + "step": 1562 + }, + { + "epoch": 0.16487341772151898, + "grad_norm": 0.5556858777999878, + "learning_rate": 0.0015, + "loss": 1.8422, + "step": 1563 + }, + { + "epoch": 0.1649789029535865, + "grad_norm": 0.9216030240058899, + "learning_rate": 0.0015, + "loss": 1.8519, + "step": 1564 + }, + { + "epoch": 0.16508438818565402, + "grad_norm": 1.0365732908248901, + "learning_rate": 0.0015, + "loss": 1.8103, + "step": 1565 + }, + { + "epoch": 0.1651898734177215, + "grad_norm": 0.4660235047340393, + "learning_rate": 0.0015, + "loss": 1.8632, + "step": 1566 + }, + { + "epoch": 0.16529535864978903, + "grad_norm": 0.9182472825050354, + "learning_rate": 0.0015, + "loss": 1.8678, + "step": 1567 + }, + { + "epoch": 0.16540084388185655, + "grad_norm": 0.7967995405197144, + "learning_rate": 0.0015, + "loss": 1.8962, + "step": 1568 + }, + { + "epoch": 0.16550632911392404, + "grad_norm": 0.5126940608024597, + "learning_rate": 0.0015, + "loss": 1.8474, + "step": 1569 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.7454519867897034, + "learning_rate": 0.0015, + "loss": 1.8575, + "step": 1570 + }, + { + "epoch": 0.16571729957805909, + "grad_norm": 0.5309233069419861, + "learning_rate": 0.0015, + "loss": 1.829, + "step": 1571 + }, + { + "epoch": 0.16582278481012658, + "grad_norm": 0.596048891544342, + "learning_rate": 0.0015, + "loss": 1.877, + "step": 1572 + }, + { + "epoch": 0.1659282700421941, + "grad_norm": 0.5971057415008545, + "learning_rate": 0.0015, + "loss": 1.8776, + "step": 1573 + }, + { + "epoch": 0.1660337552742616, + "grad_norm": 0.5350774526596069, + "learning_rate": 0.0015, + "loss": 1.8381, + "step": 1574 + }, + { + "epoch": 0.1661392405063291, + "grad_norm": 0.6208733320236206, + "learning_rate": 0.0015, + "loss": 1.8453, + "step": 1575 + }, + { + "epoch": 0.16624472573839663, + "grad_norm": 0.5253459811210632, + "learning_rate": 0.0015, + "loss": 1.8794, + "step": 1576 + }, + { + "epoch": 0.16635021097046412, + "grad_norm": 0.566020131111145, + "learning_rate": 0.0015, + "loss": 1.8807, + "step": 1577 + }, + { + "epoch": 0.16645569620253164, + "grad_norm": 0.6361575126647949, + "learning_rate": 0.0015, + "loss": 1.8412, + "step": 1578 + }, + { + "epoch": 0.16656118143459916, + "grad_norm": 0.6150435209274292, + "learning_rate": 0.0015, + "loss": 1.9096, + "step": 1579 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.6432693004608154, + "learning_rate": 0.0015, + "loss": 1.8587, + "step": 1580 + }, + { + "epoch": 0.16677215189873418, + "grad_norm": 0.5596107840538025, + "learning_rate": 0.0015, + "loss": 1.8737, + "step": 1581 + }, + { + "epoch": 0.1668776371308017, + "grad_norm": 0.6534042358398438, + "learning_rate": 0.0015, + "loss": 1.867, + "step": 1582 + }, + { + "epoch": 0.1669831223628692, + "grad_norm": 0.6573490500450134, + "learning_rate": 0.0015, + "loss": 1.8356, + "step": 1583 + }, + { + "epoch": 0.1670886075949367, + "grad_norm": 0.5070683360099792, + "learning_rate": 0.0015, + "loss": 1.8489, + "step": 1584 + }, + { + "epoch": 0.16719409282700423, + "grad_norm": 0.62180095911026, + "learning_rate": 0.0015, + "loss": 1.897, + "step": 1585 + }, + { + "epoch": 0.16729957805907172, + "grad_norm": 0.5332339406013489, + "learning_rate": 0.0015, + "loss": 1.8668, + "step": 1586 + }, + { + "epoch": 0.16740506329113924, + "grad_norm": 0.6476864814758301, + "learning_rate": 0.0015, + "loss": 1.8272, + "step": 1587 + }, + { + "epoch": 0.16751054852320676, + "grad_norm": 0.5089560151100159, + "learning_rate": 0.0015, + "loss": 1.8459, + "step": 1588 + }, + { + "epoch": 0.16761603375527426, + "grad_norm": 0.4792965054512024, + "learning_rate": 0.0015, + "loss": 1.8695, + "step": 1589 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.5402447581291199, + "learning_rate": 0.0015, + "loss": 1.8333, + "step": 1590 + }, + { + "epoch": 0.16782700421940927, + "grad_norm": 0.508988082408905, + "learning_rate": 0.0015, + "loss": 1.8442, + "step": 1591 + }, + { + "epoch": 0.1679324894514768, + "grad_norm": 0.5005342960357666, + "learning_rate": 0.0015, + "loss": 1.8281, + "step": 1592 + }, + { + "epoch": 0.1680379746835443, + "grad_norm": 0.4904800057411194, + "learning_rate": 0.0015, + "loss": 1.8654, + "step": 1593 + }, + { + "epoch": 0.1681434599156118, + "grad_norm": 0.5084935426712036, + "learning_rate": 0.0015, + "loss": 1.8517, + "step": 1594 + }, + { + "epoch": 0.16824894514767932, + "grad_norm": 0.5018736720085144, + "learning_rate": 0.0015, + "loss": 1.8989, + "step": 1595 + }, + { + "epoch": 0.16835443037974684, + "grad_norm": 0.4767528772354126, + "learning_rate": 0.0015, + "loss": 1.8713, + "step": 1596 + }, + { + "epoch": 0.16845991561181434, + "grad_norm": 0.5333482623100281, + "learning_rate": 0.0015, + "loss": 1.8476, + "step": 1597 + }, + { + "epoch": 0.16856540084388186, + "grad_norm": 0.4845268726348877, + "learning_rate": 0.0015, + "loss": 1.8608, + "step": 1598 + }, + { + "epoch": 0.16867088607594938, + "grad_norm": 0.6233534812927246, + "learning_rate": 0.0015, + "loss": 1.8541, + "step": 1599 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.5676378607749939, + "learning_rate": 0.0015, + "loss": 1.8727, + "step": 1600 + }, + { + "epoch": 0.1688818565400844, + "grad_norm": 0.5269106030464172, + "learning_rate": 0.0015, + "loss": 1.8265, + "step": 1601 + }, + { + "epoch": 0.1689873417721519, + "grad_norm": 0.6660743951797485, + "learning_rate": 0.0015, + "loss": 1.896, + "step": 1602 + }, + { + "epoch": 0.1690928270042194, + "grad_norm": 0.6672782897949219, + "learning_rate": 0.0015, + "loss": 1.8653, + "step": 1603 + }, + { + "epoch": 0.16919831223628692, + "grad_norm": 0.5369535684585571, + "learning_rate": 0.0015, + "loss": 1.8526, + "step": 1604 + }, + { + "epoch": 0.16930379746835442, + "grad_norm": 0.5175344944000244, + "learning_rate": 0.0015, + "loss": 1.8336, + "step": 1605 + }, + { + "epoch": 0.16940928270042194, + "grad_norm": 0.5831400156021118, + "learning_rate": 0.0015, + "loss": 1.875, + "step": 1606 + }, + { + "epoch": 0.16951476793248946, + "grad_norm": 0.5787636637687683, + "learning_rate": 0.0015, + "loss": 1.8797, + "step": 1607 + }, + { + "epoch": 0.16962025316455695, + "grad_norm": 0.531754195690155, + "learning_rate": 0.0015, + "loss": 1.8332, + "step": 1608 + }, + { + "epoch": 0.16972573839662447, + "grad_norm": 0.5120327472686768, + "learning_rate": 0.0015, + "loss": 1.8604, + "step": 1609 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.5185548067092896, + "learning_rate": 0.0015, + "loss": 1.8681, + "step": 1610 + }, + { + "epoch": 0.16993670886075948, + "grad_norm": 0.4890459477901459, + "learning_rate": 0.0015, + "loss": 1.8058, + "step": 1611 + }, + { + "epoch": 0.170042194092827, + "grad_norm": 0.5376722812652588, + "learning_rate": 0.0015, + "loss": 1.7789, + "step": 1612 + }, + { + "epoch": 0.17014767932489452, + "grad_norm": 0.7383502125740051, + "learning_rate": 0.0015, + "loss": 1.8277, + "step": 1613 + }, + { + "epoch": 0.17025316455696202, + "grad_norm": 0.736123263835907, + "learning_rate": 0.0015, + "loss": 1.8573, + "step": 1614 + }, + { + "epoch": 0.17035864978902954, + "grad_norm": 0.5057313442230225, + "learning_rate": 0.0015, + "loss": 1.8295, + "step": 1615 + }, + { + "epoch": 0.17046413502109706, + "grad_norm": 0.5502669215202332, + "learning_rate": 0.0015, + "loss": 1.8571, + "step": 1616 + }, + { + "epoch": 0.17056962025316455, + "grad_norm": 0.6407621502876282, + "learning_rate": 0.0015, + "loss": 1.8458, + "step": 1617 + }, + { + "epoch": 0.17067510548523207, + "grad_norm": 0.5297473669052124, + "learning_rate": 0.0015, + "loss": 1.8257, + "step": 1618 + }, + { + "epoch": 0.1707805907172996, + "grad_norm": 0.4920409023761749, + "learning_rate": 0.0015, + "loss": 1.8458, + "step": 1619 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6494439840316772, + "learning_rate": 0.0015, + "loss": 1.8494, + "step": 1620 + }, + { + "epoch": 0.1709915611814346, + "grad_norm": 0.5731796622276306, + "learning_rate": 0.0015, + "loss": 1.8435, + "step": 1621 + }, + { + "epoch": 0.1710970464135021, + "grad_norm": 0.5125641226768494, + "learning_rate": 0.0015, + "loss": 1.8897, + "step": 1622 + }, + { + "epoch": 0.17120253164556962, + "grad_norm": 0.5825533270835876, + "learning_rate": 0.0015, + "loss": 1.8596, + "step": 1623 + }, + { + "epoch": 0.17130801687763714, + "grad_norm": 0.7571307420730591, + "learning_rate": 0.0015, + "loss": 1.8213, + "step": 1624 + }, + { + "epoch": 0.17141350210970463, + "grad_norm": 0.6041271686553955, + "learning_rate": 0.0015, + "loss": 1.8801, + "step": 1625 + }, + { + "epoch": 0.17151898734177215, + "grad_norm": 0.5118398070335388, + "learning_rate": 0.0015, + "loss": 1.8093, + "step": 1626 + }, + { + "epoch": 0.17162447257383967, + "grad_norm": 0.579782247543335, + "learning_rate": 0.0015, + "loss": 1.83, + "step": 1627 + }, + { + "epoch": 0.17172995780590716, + "grad_norm": 0.5675089955329895, + "learning_rate": 0.0015, + "loss": 1.8402, + "step": 1628 + }, + { + "epoch": 0.17183544303797468, + "grad_norm": 0.7444484829902649, + "learning_rate": 0.0015, + "loss": 1.8657, + "step": 1629 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.628352165222168, + "learning_rate": 0.0015, + "loss": 1.8231, + "step": 1630 + }, + { + "epoch": 0.1720464135021097, + "grad_norm": 0.5874509811401367, + "learning_rate": 0.0015, + "loss": 1.8561, + "step": 1631 + }, + { + "epoch": 0.17215189873417722, + "grad_norm": 0.5177331566810608, + "learning_rate": 0.0015, + "loss": 1.8524, + "step": 1632 + }, + { + "epoch": 0.17225738396624474, + "grad_norm": 0.544594943523407, + "learning_rate": 0.0015, + "loss": 1.8433, + "step": 1633 + }, + { + "epoch": 0.17236286919831223, + "grad_norm": 0.5621534585952759, + "learning_rate": 0.0015, + "loss": 1.8499, + "step": 1634 + }, + { + "epoch": 0.17246835443037975, + "grad_norm": 0.4710772931575775, + "learning_rate": 0.0015, + "loss": 1.8341, + "step": 1635 + }, + { + "epoch": 0.17257383966244727, + "grad_norm": 0.49431484937667847, + "learning_rate": 0.0015, + "loss": 1.8405, + "step": 1636 + }, + { + "epoch": 0.17267932489451476, + "grad_norm": 0.5371549129486084, + "learning_rate": 0.0015, + "loss": 1.851, + "step": 1637 + }, + { + "epoch": 0.17278481012658228, + "grad_norm": 0.6518941521644592, + "learning_rate": 0.0015, + "loss": 1.8644, + "step": 1638 + }, + { + "epoch": 0.17289029535864978, + "grad_norm": 0.6416102051734924, + "learning_rate": 0.0015, + "loss": 1.8576, + "step": 1639 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.5830026865005493, + "learning_rate": 0.0015, + "loss": 1.8284, + "step": 1640 + }, + { + "epoch": 0.17310126582278482, + "grad_norm": 0.5949551463127136, + "learning_rate": 0.0015, + "loss": 1.8203, + "step": 1641 + }, + { + "epoch": 0.1732067510548523, + "grad_norm": 0.6380580067634583, + "learning_rate": 0.0015, + "loss": 1.8122, + "step": 1642 + }, + { + "epoch": 0.17331223628691983, + "grad_norm": 0.5648717880249023, + "learning_rate": 0.0015, + "loss": 1.8289, + "step": 1643 + }, + { + "epoch": 0.17341772151898735, + "grad_norm": 0.6136492490768433, + "learning_rate": 0.0015, + "loss": 1.8305, + "step": 1644 + }, + { + "epoch": 0.17352320675105484, + "grad_norm": 0.7076066136360168, + "learning_rate": 0.0015, + "loss": 1.8576, + "step": 1645 + }, + { + "epoch": 0.17362869198312236, + "grad_norm": 0.7656069993972778, + "learning_rate": 0.0015, + "loss": 1.8514, + "step": 1646 + }, + { + "epoch": 0.17373417721518988, + "grad_norm": 0.5621904730796814, + "learning_rate": 0.0015, + "loss": 1.8444, + "step": 1647 + }, + { + "epoch": 0.17383966244725738, + "grad_norm": 0.5006690621376038, + "learning_rate": 0.0015, + "loss": 1.8273, + "step": 1648 + }, + { + "epoch": 0.1739451476793249, + "grad_norm": 0.6614305377006531, + "learning_rate": 0.0015, + "loss": 1.8813, + "step": 1649 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.6870092749595642, + "learning_rate": 0.0015, + "loss": 1.8497, + "step": 1650 + }, + { + "epoch": 0.1741561181434599, + "grad_norm": 0.5306625962257385, + "learning_rate": 0.0015, + "loss": 1.8506, + "step": 1651 + }, + { + "epoch": 0.17426160337552743, + "grad_norm": 0.5352807641029358, + "learning_rate": 0.0015, + "loss": 1.8216, + "step": 1652 + }, + { + "epoch": 0.17436708860759495, + "grad_norm": 0.6174004077911377, + "learning_rate": 0.0015, + "loss": 1.8136, + "step": 1653 + }, + { + "epoch": 0.17447257383966244, + "grad_norm": 0.5463118553161621, + "learning_rate": 0.0015, + "loss": 1.8617, + "step": 1654 + }, + { + "epoch": 0.17457805907172996, + "grad_norm": 0.45304176211357117, + "learning_rate": 0.0015, + "loss": 1.8408, + "step": 1655 + }, + { + "epoch": 0.17468354430379746, + "grad_norm": 0.4786616265773773, + "learning_rate": 0.0015, + "loss": 1.8728, + "step": 1656 + }, + { + "epoch": 0.17478902953586498, + "grad_norm": 0.541914165019989, + "learning_rate": 0.0015, + "loss": 1.8261, + "step": 1657 + }, + { + "epoch": 0.1748945147679325, + "grad_norm": 0.5445770621299744, + "learning_rate": 0.0015, + "loss": 1.8269, + "step": 1658 + }, + { + "epoch": 0.175, + "grad_norm": 0.5142346620559692, + "learning_rate": 0.0015, + "loss": 1.8341, + "step": 1659 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.5110147595405579, + "learning_rate": 0.0015, + "loss": 1.8259, + "step": 1660 + }, + { + "epoch": 0.17521097046413503, + "grad_norm": 0.5237147212028503, + "learning_rate": 0.0015, + "loss": 1.8776, + "step": 1661 + }, + { + "epoch": 0.17531645569620252, + "grad_norm": 0.6443389058113098, + "learning_rate": 0.0015, + "loss": 1.836, + "step": 1662 + }, + { + "epoch": 0.17542194092827004, + "grad_norm": 0.7561408877372742, + "learning_rate": 0.0015, + "loss": 1.8373, + "step": 1663 + }, + { + "epoch": 0.17552742616033756, + "grad_norm": 0.4879377782344818, + "learning_rate": 0.0015, + "loss": 1.8345, + "step": 1664 + }, + { + "epoch": 0.17563291139240506, + "grad_norm": 0.7401023507118225, + "learning_rate": 0.0015, + "loss": 1.8542, + "step": 1665 + }, + { + "epoch": 0.17573839662447258, + "grad_norm": 0.6402983665466309, + "learning_rate": 0.0015, + "loss": 1.8797, + "step": 1666 + }, + { + "epoch": 0.1758438818565401, + "grad_norm": 0.5913488268852234, + "learning_rate": 0.0015, + "loss": 1.8581, + "step": 1667 + }, + { + "epoch": 0.1759493670886076, + "grad_norm": 0.7639641761779785, + "learning_rate": 0.0015, + "loss": 1.871, + "step": 1668 + }, + { + "epoch": 0.1760548523206751, + "grad_norm": 0.5228761434555054, + "learning_rate": 0.0015, + "loss": 1.8232, + "step": 1669 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.6810857653617859, + "learning_rate": 0.0015, + "loss": 1.8015, + "step": 1670 + }, + { + "epoch": 0.17626582278481012, + "grad_norm": 0.6449247598648071, + "learning_rate": 0.0015, + "loss": 1.8007, + "step": 1671 + }, + { + "epoch": 0.17637130801687764, + "grad_norm": 0.5456721782684326, + "learning_rate": 0.0015, + "loss": 1.8397, + "step": 1672 + }, + { + "epoch": 0.17647679324894514, + "grad_norm": 0.48542457818984985, + "learning_rate": 0.0015, + "loss": 1.8497, + "step": 1673 + }, + { + "epoch": 0.17658227848101266, + "grad_norm": 0.5676390528678894, + "learning_rate": 0.0015, + "loss": 1.8531, + "step": 1674 + }, + { + "epoch": 0.17668776371308018, + "grad_norm": 0.6341606974601746, + "learning_rate": 0.0015, + "loss": 1.8329, + "step": 1675 + }, + { + "epoch": 0.17679324894514767, + "grad_norm": 0.5365669131278992, + "learning_rate": 0.0015, + "loss": 1.8516, + "step": 1676 + }, + { + "epoch": 0.1768987341772152, + "grad_norm": 0.5370458960533142, + "learning_rate": 0.0015, + "loss": 1.7982, + "step": 1677 + }, + { + "epoch": 0.1770042194092827, + "grad_norm": 0.7546199560165405, + "learning_rate": 0.0015, + "loss": 1.8696, + "step": 1678 + }, + { + "epoch": 0.1771097046413502, + "grad_norm": 0.5257035493850708, + "learning_rate": 0.0015, + "loss": 1.8103, + "step": 1679 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.7526178359985352, + "learning_rate": 0.0015, + "loss": 1.8424, + "step": 1680 + }, + { + "epoch": 0.17732067510548524, + "grad_norm": 0.9093194007873535, + "learning_rate": 0.0015, + "loss": 1.8335, + "step": 1681 + }, + { + "epoch": 0.17742616033755274, + "grad_norm": 0.6237613558769226, + "learning_rate": 0.0015, + "loss": 1.8225, + "step": 1682 + }, + { + "epoch": 0.17753164556962026, + "grad_norm": 0.685141921043396, + "learning_rate": 0.0015, + "loss": 1.8651, + "step": 1683 + }, + { + "epoch": 0.17763713080168778, + "grad_norm": 0.9615616202354431, + "learning_rate": 0.0015, + "loss": 1.8715, + "step": 1684 + }, + { + "epoch": 0.17774261603375527, + "grad_norm": 0.6962200403213501, + "learning_rate": 0.0015, + "loss": 1.7927, + "step": 1685 + }, + { + "epoch": 0.1778481012658228, + "grad_norm": 0.503422200679779, + "learning_rate": 0.0015, + "loss": 1.7943, + "step": 1686 + }, + { + "epoch": 0.17795358649789028, + "grad_norm": 0.6377720236778259, + "learning_rate": 0.0015, + "loss": 1.847, + "step": 1687 + }, + { + "epoch": 0.1780590717299578, + "grad_norm": 0.6209604144096375, + "learning_rate": 0.0015, + "loss": 1.7904, + "step": 1688 + }, + { + "epoch": 0.17816455696202532, + "grad_norm": 0.5179665088653564, + "learning_rate": 0.0015, + "loss": 1.8506, + "step": 1689 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.5614203810691833, + "learning_rate": 0.0015, + "loss": 1.8255, + "step": 1690 + }, + { + "epoch": 0.17837552742616034, + "grad_norm": 0.6475895643234253, + "learning_rate": 0.0015, + "loss": 1.8837, + "step": 1691 + }, + { + "epoch": 0.17848101265822786, + "grad_norm": 0.5590607523918152, + "learning_rate": 0.0015, + "loss": 1.8446, + "step": 1692 + }, + { + "epoch": 0.17858649789029535, + "grad_norm": 0.5380192399024963, + "learning_rate": 0.0015, + "loss": 1.8111, + "step": 1693 + }, + { + "epoch": 0.17869198312236287, + "grad_norm": 0.7930208444595337, + "learning_rate": 0.0015, + "loss": 1.84, + "step": 1694 + }, + { + "epoch": 0.1787974683544304, + "grad_norm": 0.9309415817260742, + "learning_rate": 0.0015, + "loss": 1.8248, + "step": 1695 + }, + { + "epoch": 0.17890295358649788, + "grad_norm": 0.6583831310272217, + "learning_rate": 0.0015, + "loss": 1.8256, + "step": 1696 + }, + { + "epoch": 0.1790084388185654, + "grad_norm": 0.7181501984596252, + "learning_rate": 0.0015, + "loss": 1.8295, + "step": 1697 + }, + { + "epoch": 0.17911392405063292, + "grad_norm": 0.9588510990142822, + "learning_rate": 0.0015, + "loss": 1.8566, + "step": 1698 + }, + { + "epoch": 0.17921940928270041, + "grad_norm": 0.74287348985672, + "learning_rate": 0.0015, + "loss": 1.9011, + "step": 1699 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.5708943605422974, + "learning_rate": 0.0015, + "loss": 1.8581, + "step": 1700 + }, + { + "epoch": 0.17943037974683546, + "grad_norm": 0.849864661693573, + "learning_rate": 0.0015, + "loss": 1.8403, + "step": 1701 + }, + { + "epoch": 0.17953586497890295, + "grad_norm": 0.6671258211135864, + "learning_rate": 0.0015, + "loss": 1.8395, + "step": 1702 + }, + { + "epoch": 0.17964135021097047, + "grad_norm": 0.6216587424278259, + "learning_rate": 0.0015, + "loss": 1.7977, + "step": 1703 + }, + { + "epoch": 0.17974683544303796, + "grad_norm": 0.8394911289215088, + "learning_rate": 0.0015, + "loss": 1.8265, + "step": 1704 + }, + { + "epoch": 0.17985232067510548, + "grad_norm": 0.4909205138683319, + "learning_rate": 0.0015, + "loss": 1.8343, + "step": 1705 + }, + { + "epoch": 0.179957805907173, + "grad_norm": 0.6571671366691589, + "learning_rate": 0.0015, + "loss": 1.8563, + "step": 1706 + }, + { + "epoch": 0.1800632911392405, + "grad_norm": 0.6637552380561829, + "learning_rate": 0.0015, + "loss": 1.8051, + "step": 1707 + }, + { + "epoch": 0.18016877637130801, + "grad_norm": 0.47139400243759155, + "learning_rate": 0.0015, + "loss": 1.8009, + "step": 1708 + }, + { + "epoch": 0.18027426160337554, + "grad_norm": 0.6116742491722107, + "learning_rate": 0.0015, + "loss": 1.8116, + "step": 1709 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.5905157923698425, + "learning_rate": 0.0015, + "loss": 1.8604, + "step": 1710 + }, + { + "epoch": 0.18048523206751055, + "grad_norm": 0.502013087272644, + "learning_rate": 0.0015, + "loss": 1.8423, + "step": 1711 + }, + { + "epoch": 0.18059071729957807, + "grad_norm": 0.5473625659942627, + "learning_rate": 0.0015, + "loss": 1.8591, + "step": 1712 + }, + { + "epoch": 0.18069620253164556, + "grad_norm": 0.5390632748603821, + "learning_rate": 0.0015, + "loss": 1.8207, + "step": 1713 + }, + { + "epoch": 0.18080168776371308, + "grad_norm": 0.5800744891166687, + "learning_rate": 0.0015, + "loss": 1.8051, + "step": 1714 + }, + { + "epoch": 0.1809071729957806, + "grad_norm": 0.518036425113678, + "learning_rate": 0.0015, + "loss": 1.8016, + "step": 1715 + }, + { + "epoch": 0.1810126582278481, + "grad_norm": 0.5341516733169556, + "learning_rate": 0.0015, + "loss": 1.8275, + "step": 1716 + }, + { + "epoch": 0.18111814345991561, + "grad_norm": 0.5151551961898804, + "learning_rate": 0.0015, + "loss": 1.7892, + "step": 1717 + }, + { + "epoch": 0.18122362869198314, + "grad_norm": 0.5301831364631653, + "learning_rate": 0.0015, + "loss": 1.8195, + "step": 1718 + }, + { + "epoch": 0.18132911392405063, + "grad_norm": 0.4552363157272339, + "learning_rate": 0.0015, + "loss": 1.8206, + "step": 1719 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.496009886264801, + "learning_rate": 0.0015, + "loss": 1.8484, + "step": 1720 + }, + { + "epoch": 0.18154008438818564, + "grad_norm": 0.476816326379776, + "learning_rate": 0.0015, + "loss": 1.813, + "step": 1721 + }, + { + "epoch": 0.18164556962025316, + "grad_norm": 0.48803362250328064, + "learning_rate": 0.0015, + "loss": 1.8112, + "step": 1722 + }, + { + "epoch": 0.18175105485232068, + "grad_norm": 0.5525384545326233, + "learning_rate": 0.0015, + "loss": 1.811, + "step": 1723 + }, + { + "epoch": 0.18185654008438817, + "grad_norm": 0.46394020318984985, + "learning_rate": 0.0015, + "loss": 1.808, + "step": 1724 + }, + { + "epoch": 0.1819620253164557, + "grad_norm": 0.5969376564025879, + "learning_rate": 0.0015, + "loss": 1.8106, + "step": 1725 + }, + { + "epoch": 0.18206751054852321, + "grad_norm": 0.5168143510818481, + "learning_rate": 0.0015, + "loss": 1.8467, + "step": 1726 + }, + { + "epoch": 0.1821729957805907, + "grad_norm": 0.5381571650505066, + "learning_rate": 0.0015, + "loss": 1.8176, + "step": 1727 + }, + { + "epoch": 0.18227848101265823, + "grad_norm": 0.46947160363197327, + "learning_rate": 0.0015, + "loss": 1.8212, + "step": 1728 + }, + { + "epoch": 0.18238396624472575, + "grad_norm": 0.5031561851501465, + "learning_rate": 0.0015, + "loss": 1.8408, + "step": 1729 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.5438616871833801, + "learning_rate": 0.0015, + "loss": 1.8651, + "step": 1730 + }, + { + "epoch": 0.18259493670886076, + "grad_norm": 0.47203943133354187, + "learning_rate": 0.0015, + "loss": 1.8139, + "step": 1731 + }, + { + "epoch": 0.18270042194092828, + "grad_norm": 0.5111636519432068, + "learning_rate": 0.0015, + "loss": 1.8216, + "step": 1732 + }, + { + "epoch": 0.18280590717299577, + "grad_norm": 0.5975373387336731, + "learning_rate": 0.0015, + "loss": 1.8136, + "step": 1733 + }, + { + "epoch": 0.1829113924050633, + "grad_norm": 0.5054951906204224, + "learning_rate": 0.0015, + "loss": 1.831, + "step": 1734 + }, + { + "epoch": 0.18301687763713081, + "grad_norm": 0.5159053206443787, + "learning_rate": 0.0015, + "loss": 1.8149, + "step": 1735 + }, + { + "epoch": 0.1831223628691983, + "grad_norm": 0.5666469931602478, + "learning_rate": 0.0015, + "loss": 1.8544, + "step": 1736 + }, + { + "epoch": 0.18322784810126583, + "grad_norm": 0.6056023836135864, + "learning_rate": 0.0015, + "loss": 1.8151, + "step": 1737 + }, + { + "epoch": 0.18333333333333332, + "grad_norm": 0.5668106079101562, + "learning_rate": 0.0015, + "loss": 1.8343, + "step": 1738 + }, + { + "epoch": 0.18343881856540084, + "grad_norm": 0.5655359029769897, + "learning_rate": 0.0015, + "loss": 1.8585, + "step": 1739 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.8024944067001343, + "learning_rate": 0.0015, + "loss": 1.8497, + "step": 1740 + }, + { + "epoch": 0.18364978902953585, + "grad_norm": 0.6638942956924438, + "learning_rate": 0.0015, + "loss": 1.856, + "step": 1741 + }, + { + "epoch": 0.18375527426160337, + "grad_norm": 0.5905331969261169, + "learning_rate": 0.0015, + "loss": 1.8435, + "step": 1742 + }, + { + "epoch": 0.1838607594936709, + "grad_norm": 0.7127625942230225, + "learning_rate": 0.0015, + "loss": 1.8459, + "step": 1743 + }, + { + "epoch": 0.1839662447257384, + "grad_norm": 0.5033014416694641, + "learning_rate": 0.0015, + "loss": 1.8483, + "step": 1744 + }, + { + "epoch": 0.1840717299578059, + "grad_norm": 0.703793466091156, + "learning_rate": 0.0015, + "loss": 1.8413, + "step": 1745 + }, + { + "epoch": 0.18417721518987343, + "grad_norm": 0.73264479637146, + "learning_rate": 0.0015, + "loss": 1.8548, + "step": 1746 + }, + { + "epoch": 0.18428270042194092, + "grad_norm": 0.5461260080337524, + "learning_rate": 0.0015, + "loss": 1.875, + "step": 1747 + }, + { + "epoch": 0.18438818565400844, + "grad_norm": 0.5849325656890869, + "learning_rate": 0.0015, + "loss": 1.8169, + "step": 1748 + }, + { + "epoch": 0.18449367088607596, + "grad_norm": 0.5817967653274536, + "learning_rate": 0.0015, + "loss": 1.8647, + "step": 1749 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.6045406460762024, + "learning_rate": 0.0015, + "loss": 1.8692, + "step": 1750 + }, + { + "epoch": 0.18470464135021097, + "grad_norm": 0.5886181592941284, + "learning_rate": 0.0015, + "loss": 1.8272, + "step": 1751 + }, + { + "epoch": 0.1848101265822785, + "grad_norm": 0.6443178653717041, + "learning_rate": 0.0015, + "loss": 1.8452, + "step": 1752 + }, + { + "epoch": 0.184915611814346, + "grad_norm": 0.7146679759025574, + "learning_rate": 0.0015, + "loss": 1.8506, + "step": 1753 + }, + { + "epoch": 0.1850210970464135, + "grad_norm": 0.49639758467674255, + "learning_rate": 0.0015, + "loss": 1.8355, + "step": 1754 + }, + { + "epoch": 0.185126582278481, + "grad_norm": 0.6765944361686707, + "learning_rate": 0.0015, + "loss": 1.8424, + "step": 1755 + }, + { + "epoch": 0.18523206751054852, + "grad_norm": 0.6170516014099121, + "learning_rate": 0.0015, + "loss": 1.8067, + "step": 1756 + }, + { + "epoch": 0.18533755274261604, + "grad_norm": 0.5510251522064209, + "learning_rate": 0.0015, + "loss": 1.8241, + "step": 1757 + }, + { + "epoch": 0.18544303797468353, + "grad_norm": 0.549027681350708, + "learning_rate": 0.0015, + "loss": 1.8427, + "step": 1758 + }, + { + "epoch": 0.18554852320675105, + "grad_norm": 0.6877909898757935, + "learning_rate": 0.0015, + "loss": 1.8346, + "step": 1759 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.643034815788269, + "learning_rate": 0.0015, + "loss": 1.7842, + "step": 1760 + }, + { + "epoch": 0.18575949367088607, + "grad_norm": 0.48639509081840515, + "learning_rate": 0.0015, + "loss": 1.8209, + "step": 1761 + }, + { + "epoch": 0.1858649789029536, + "grad_norm": 0.5269736647605896, + "learning_rate": 0.0015, + "loss": 1.8492, + "step": 1762 + }, + { + "epoch": 0.1859704641350211, + "grad_norm": 0.560742199420929, + "learning_rate": 0.0015, + "loss": 1.8237, + "step": 1763 + }, + { + "epoch": 0.1860759493670886, + "grad_norm": 0.5695181488990784, + "learning_rate": 0.0015, + "loss": 1.8118, + "step": 1764 + }, + { + "epoch": 0.18618143459915612, + "grad_norm": 0.6077991127967834, + "learning_rate": 0.0015, + "loss": 1.8292, + "step": 1765 + }, + { + "epoch": 0.18628691983122364, + "grad_norm": 0.5074968338012695, + "learning_rate": 0.0015, + "loss": 1.8301, + "step": 1766 + }, + { + "epoch": 0.18639240506329113, + "grad_norm": 0.7066789269447327, + "learning_rate": 0.0015, + "loss": 1.8509, + "step": 1767 + }, + { + "epoch": 0.18649789029535865, + "grad_norm": 0.6739769577980042, + "learning_rate": 0.0015, + "loss": 1.803, + "step": 1768 + }, + { + "epoch": 0.18660337552742617, + "grad_norm": 0.47891584038734436, + "learning_rate": 0.0015, + "loss": 1.8413, + "step": 1769 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.6155842542648315, + "learning_rate": 0.0015, + "loss": 1.8387, + "step": 1770 + }, + { + "epoch": 0.1868143459915612, + "grad_norm": 0.562042236328125, + "learning_rate": 0.0015, + "loss": 1.8459, + "step": 1771 + }, + { + "epoch": 0.18691983122362868, + "grad_norm": 0.5393577218055725, + "learning_rate": 0.0015, + "loss": 1.8357, + "step": 1772 + }, + { + "epoch": 0.1870253164556962, + "grad_norm": 0.7965392470359802, + "learning_rate": 0.0015, + "loss": 1.8372, + "step": 1773 + }, + { + "epoch": 0.18713080168776372, + "grad_norm": 0.7083988189697266, + "learning_rate": 0.0015, + "loss": 1.8761, + "step": 1774 + }, + { + "epoch": 0.1872362869198312, + "grad_norm": 0.5049605369567871, + "learning_rate": 0.0015, + "loss": 1.878, + "step": 1775 + }, + { + "epoch": 0.18734177215189873, + "grad_norm": 0.6535817384719849, + "learning_rate": 0.0015, + "loss": 1.839, + "step": 1776 + }, + { + "epoch": 0.18744725738396625, + "grad_norm": 0.7916276454925537, + "learning_rate": 0.0015, + "loss": 1.8187, + "step": 1777 + }, + { + "epoch": 0.18755274261603375, + "grad_norm": 0.5668764114379883, + "learning_rate": 0.0015, + "loss": 1.7739, + "step": 1778 + }, + { + "epoch": 0.18765822784810127, + "grad_norm": 0.5987294316291809, + "learning_rate": 0.0015, + "loss": 1.8262, + "step": 1779 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.7916189432144165, + "learning_rate": 0.0015, + "loss": 1.8266, + "step": 1780 + }, + { + "epoch": 0.18786919831223628, + "grad_norm": 0.5684725642204285, + "learning_rate": 0.0015, + "loss": 1.8218, + "step": 1781 + }, + { + "epoch": 0.1879746835443038, + "grad_norm": 0.6714436411857605, + "learning_rate": 0.0015, + "loss": 1.8894, + "step": 1782 + }, + { + "epoch": 0.18808016877637132, + "grad_norm": 0.8307836651802063, + "learning_rate": 0.0015, + "loss": 1.8434, + "step": 1783 + }, + { + "epoch": 0.1881856540084388, + "grad_norm": 0.5791143178939819, + "learning_rate": 0.0015, + "loss": 1.8103, + "step": 1784 + }, + { + "epoch": 0.18829113924050633, + "grad_norm": 0.7713437676429749, + "learning_rate": 0.0015, + "loss": 1.8484, + "step": 1785 + }, + { + "epoch": 0.18839662447257383, + "grad_norm": 1.103920578956604, + "learning_rate": 0.0015, + "loss": 1.8143, + "step": 1786 + }, + { + "epoch": 0.18850210970464135, + "grad_norm": 0.5065297484397888, + "learning_rate": 0.0015, + "loss": 1.8272, + "step": 1787 + }, + { + "epoch": 0.18860759493670887, + "grad_norm": 0.7157902717590332, + "learning_rate": 0.0015, + "loss": 1.8429, + "step": 1788 + }, + { + "epoch": 0.18871308016877636, + "grad_norm": 0.5488754510879517, + "learning_rate": 0.0015, + "loss": 1.8247, + "step": 1789 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.6051275730133057, + "learning_rate": 0.0015, + "loss": 1.8171, + "step": 1790 + }, + { + "epoch": 0.1889240506329114, + "grad_norm": 0.744580864906311, + "learning_rate": 0.0015, + "loss": 1.799, + "step": 1791 + }, + { + "epoch": 0.1890295358649789, + "grad_norm": 0.5383637547492981, + "learning_rate": 0.0015, + "loss": 1.8214, + "step": 1792 + }, + { + "epoch": 0.1891350210970464, + "grad_norm": 0.6169219017028809, + "learning_rate": 0.0015, + "loss": 1.8502, + "step": 1793 + }, + { + "epoch": 0.18924050632911393, + "grad_norm": 0.7362203598022461, + "learning_rate": 0.0015, + "loss": 1.891, + "step": 1794 + }, + { + "epoch": 0.18934599156118143, + "grad_norm": 0.5578073859214783, + "learning_rate": 0.0015, + "loss": 1.8143, + "step": 1795 + }, + { + "epoch": 0.18945147679324895, + "grad_norm": 0.5064363479614258, + "learning_rate": 0.0015, + "loss": 1.8513, + "step": 1796 + }, + { + "epoch": 0.18955696202531647, + "grad_norm": 0.5139697194099426, + "learning_rate": 0.0015, + "loss": 1.81, + "step": 1797 + }, + { + "epoch": 0.18966244725738396, + "grad_norm": 0.5032205581665039, + "learning_rate": 0.0015, + "loss": 1.8164, + "step": 1798 + }, + { + "epoch": 0.18976793248945148, + "grad_norm": 0.5216019153594971, + "learning_rate": 0.0015, + "loss": 1.8131, + "step": 1799 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.5179572701454163, + "learning_rate": 0.0015, + "loss": 1.812, + "step": 1800 + }, + { + "epoch": 0.1899789029535865, + "grad_norm": 0.46445566415786743, + "learning_rate": 0.0015, + "loss": 1.7668, + "step": 1801 + }, + { + "epoch": 0.190084388185654, + "grad_norm": 0.536352276802063, + "learning_rate": 0.0015, + "loss": 1.8004, + "step": 1802 + }, + { + "epoch": 0.1901898734177215, + "grad_norm": 0.5057159066200256, + "learning_rate": 0.0015, + "loss": 1.8296, + "step": 1803 + }, + { + "epoch": 0.19029535864978903, + "grad_norm": 0.4822067320346832, + "learning_rate": 0.0015, + "loss": 1.8135, + "step": 1804 + }, + { + "epoch": 0.19040084388185655, + "grad_norm": 0.634695827960968, + "learning_rate": 0.0015, + "loss": 1.8144, + "step": 1805 + }, + { + "epoch": 0.19050632911392404, + "grad_norm": 0.4706762433052063, + "learning_rate": 0.0015, + "loss": 1.8149, + "step": 1806 + }, + { + "epoch": 0.19061181434599156, + "grad_norm": 0.5567672848701477, + "learning_rate": 0.0015, + "loss": 1.8273, + "step": 1807 + }, + { + "epoch": 0.19071729957805908, + "grad_norm": 0.4880026876926422, + "learning_rate": 0.0015, + "loss": 1.809, + "step": 1808 + }, + { + "epoch": 0.19082278481012657, + "grad_norm": 0.5511531829833984, + "learning_rate": 0.0015, + "loss": 1.8503, + "step": 1809 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.5229374170303345, + "learning_rate": 0.0015, + "loss": 1.7905, + "step": 1810 + }, + { + "epoch": 0.1910337552742616, + "grad_norm": 0.6054973006248474, + "learning_rate": 0.0015, + "loss": 1.7705, + "step": 1811 + }, + { + "epoch": 0.1911392405063291, + "grad_norm": 0.6326051354408264, + "learning_rate": 0.0015, + "loss": 1.823, + "step": 1812 + }, + { + "epoch": 0.19124472573839663, + "grad_norm": 0.4894816279411316, + "learning_rate": 0.0015, + "loss": 1.8092, + "step": 1813 + }, + { + "epoch": 0.19135021097046415, + "grad_norm": 0.5103054642677307, + "learning_rate": 0.0015, + "loss": 1.7862, + "step": 1814 + }, + { + "epoch": 0.19145569620253164, + "grad_norm": 0.5828525424003601, + "learning_rate": 0.0015, + "loss": 1.799, + "step": 1815 + }, + { + "epoch": 0.19156118143459916, + "grad_norm": 0.5314319133758545, + "learning_rate": 0.0015, + "loss": 1.824, + "step": 1816 + }, + { + "epoch": 0.19166666666666668, + "grad_norm": 0.4891503155231476, + "learning_rate": 0.0015, + "loss": 1.7893, + "step": 1817 + }, + { + "epoch": 0.19177215189873417, + "grad_norm": 0.5227344036102295, + "learning_rate": 0.0015, + "loss": 1.8017, + "step": 1818 + }, + { + "epoch": 0.1918776371308017, + "grad_norm": 0.5064538717269897, + "learning_rate": 0.0015, + "loss": 1.8351, + "step": 1819 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.4842202663421631, + "learning_rate": 0.0015, + "loss": 1.8762, + "step": 1820 + }, + { + "epoch": 0.1920886075949367, + "grad_norm": 0.5408952832221985, + "learning_rate": 0.0015, + "loss": 1.8131, + "step": 1821 + }, + { + "epoch": 0.19219409282700423, + "grad_norm": 0.559117317199707, + "learning_rate": 0.0015, + "loss": 1.8419, + "step": 1822 + }, + { + "epoch": 0.19229957805907172, + "grad_norm": 0.6070255041122437, + "learning_rate": 0.0015, + "loss": 1.8325, + "step": 1823 + }, + { + "epoch": 0.19240506329113924, + "grad_norm": 0.5837768912315369, + "learning_rate": 0.0015, + "loss": 1.84, + "step": 1824 + }, + { + "epoch": 0.19251054852320676, + "grad_norm": 0.574431300163269, + "learning_rate": 0.0015, + "loss": 1.8667, + "step": 1825 + }, + { + "epoch": 0.19261603375527425, + "grad_norm": 0.5465744137763977, + "learning_rate": 0.0015, + "loss": 1.8116, + "step": 1826 + }, + { + "epoch": 0.19272151898734177, + "grad_norm": 0.4835458993911743, + "learning_rate": 0.0015, + "loss": 1.8011, + "step": 1827 + }, + { + "epoch": 0.1928270042194093, + "grad_norm": 0.536300003528595, + "learning_rate": 0.0015, + "loss": 1.7582, + "step": 1828 + }, + { + "epoch": 0.19293248945147679, + "grad_norm": 0.5421085953712463, + "learning_rate": 0.0015, + "loss": 1.796, + "step": 1829 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.5275554060935974, + "learning_rate": 0.0015, + "loss": 1.7876, + "step": 1830 + }, + { + "epoch": 0.19314345991561183, + "grad_norm": 0.536766529083252, + "learning_rate": 0.0015, + "loss": 1.7966, + "step": 1831 + }, + { + "epoch": 0.19324894514767932, + "grad_norm": 0.4939540922641754, + "learning_rate": 0.0015, + "loss": 1.8043, + "step": 1832 + }, + { + "epoch": 0.19335443037974684, + "grad_norm": 0.5534696578979492, + "learning_rate": 0.0015, + "loss": 1.8456, + "step": 1833 + }, + { + "epoch": 0.19345991561181436, + "grad_norm": 0.6097571849822998, + "learning_rate": 0.0015, + "loss": 1.8403, + "step": 1834 + }, + { + "epoch": 0.19356540084388185, + "grad_norm": 0.6294739842414856, + "learning_rate": 0.0015, + "loss": 1.8057, + "step": 1835 + }, + { + "epoch": 0.19367088607594937, + "grad_norm": 0.6127732396125793, + "learning_rate": 0.0015, + "loss": 1.8297, + "step": 1836 + }, + { + "epoch": 0.19377637130801686, + "grad_norm": 0.5328330993652344, + "learning_rate": 0.0015, + "loss": 1.7824, + "step": 1837 + }, + { + "epoch": 0.19388185654008439, + "grad_norm": 0.5999840497970581, + "learning_rate": 0.0015, + "loss": 1.8088, + "step": 1838 + }, + { + "epoch": 0.1939873417721519, + "grad_norm": 0.6274462342262268, + "learning_rate": 0.0015, + "loss": 1.822, + "step": 1839 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.5135725736618042, + "learning_rate": 0.0015, + "loss": 1.809, + "step": 1840 + }, + { + "epoch": 0.19419831223628692, + "grad_norm": 0.5076367259025574, + "learning_rate": 0.0015, + "loss": 1.8304, + "step": 1841 + }, + { + "epoch": 0.19430379746835444, + "grad_norm": 0.5924019813537598, + "learning_rate": 0.0015, + "loss": 1.8179, + "step": 1842 + }, + { + "epoch": 0.19440928270042193, + "grad_norm": 0.5452865362167358, + "learning_rate": 0.0015, + "loss": 1.7876, + "step": 1843 + }, + { + "epoch": 0.19451476793248945, + "grad_norm": 0.6155842542648315, + "learning_rate": 0.0015, + "loss": 1.8497, + "step": 1844 + }, + { + "epoch": 0.19462025316455697, + "grad_norm": 0.5350075960159302, + "learning_rate": 0.0015, + "loss": 1.8067, + "step": 1845 + }, + { + "epoch": 0.19472573839662446, + "grad_norm": 0.5192350149154663, + "learning_rate": 0.0015, + "loss": 1.8387, + "step": 1846 + }, + { + "epoch": 0.19483122362869199, + "grad_norm": 0.5149152874946594, + "learning_rate": 0.0015, + "loss": 1.83, + "step": 1847 + }, + { + "epoch": 0.1949367088607595, + "grad_norm": 0.5416075587272644, + "learning_rate": 0.0015, + "loss": 1.7897, + "step": 1848 + }, + { + "epoch": 0.195042194092827, + "grad_norm": 0.5153766870498657, + "learning_rate": 0.0015, + "loss": 1.7679, + "step": 1849 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.6838843822479248, + "learning_rate": 0.0015, + "loss": 1.8395, + "step": 1850 + }, + { + "epoch": 0.19525316455696204, + "grad_norm": 0.5588085055351257, + "learning_rate": 0.0015, + "loss": 1.7849, + "step": 1851 + }, + { + "epoch": 0.19535864978902953, + "grad_norm": 0.5469839572906494, + "learning_rate": 0.0015, + "loss": 1.8287, + "step": 1852 + }, + { + "epoch": 0.19546413502109705, + "grad_norm": 0.6076549887657166, + "learning_rate": 0.0015, + "loss": 1.8027, + "step": 1853 + }, + { + "epoch": 0.19556962025316454, + "grad_norm": 0.6546722054481506, + "learning_rate": 0.0015, + "loss": 1.8364, + "step": 1854 + }, + { + "epoch": 0.19567510548523206, + "grad_norm": 0.5049344897270203, + "learning_rate": 0.0015, + "loss": 1.8074, + "step": 1855 + }, + { + "epoch": 0.19578059071729959, + "grad_norm": 0.6427181959152222, + "learning_rate": 0.0015, + "loss": 1.8203, + "step": 1856 + }, + { + "epoch": 0.19588607594936708, + "grad_norm": 0.6976577639579773, + "learning_rate": 0.0015, + "loss": 1.8131, + "step": 1857 + }, + { + "epoch": 0.1959915611814346, + "grad_norm": 0.5904081463813782, + "learning_rate": 0.0015, + "loss": 1.8154, + "step": 1858 + }, + { + "epoch": 0.19609704641350212, + "grad_norm": 0.5384019017219543, + "learning_rate": 0.0015, + "loss": 1.8107, + "step": 1859 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.7040228843688965, + "learning_rate": 0.0015, + "loss": 1.8424, + "step": 1860 + }, + { + "epoch": 0.19630801687763713, + "grad_norm": 0.5308371186256409, + "learning_rate": 0.0015, + "loss": 1.8195, + "step": 1861 + }, + { + "epoch": 0.19641350210970465, + "grad_norm": 0.5134651064872742, + "learning_rate": 0.0015, + "loss": 1.8195, + "step": 1862 + }, + { + "epoch": 0.19651898734177214, + "grad_norm": 0.5103256702423096, + "learning_rate": 0.0015, + "loss": 1.8766, + "step": 1863 + }, + { + "epoch": 0.19662447257383966, + "grad_norm": 0.4865470230579376, + "learning_rate": 0.0015, + "loss": 1.8013, + "step": 1864 + }, + { + "epoch": 0.19672995780590719, + "grad_norm": 0.49293550848960876, + "learning_rate": 0.0015, + "loss": 1.8039, + "step": 1865 + }, + { + "epoch": 0.19683544303797468, + "grad_norm": 0.546439528465271, + "learning_rate": 0.0015, + "loss": 1.7816, + "step": 1866 + }, + { + "epoch": 0.1969409282700422, + "grad_norm": 0.5053555965423584, + "learning_rate": 0.0015, + "loss": 1.83, + "step": 1867 + }, + { + "epoch": 0.19704641350210972, + "grad_norm": 0.5129899978637695, + "learning_rate": 0.0015, + "loss": 1.7929, + "step": 1868 + }, + { + "epoch": 0.1971518987341772, + "grad_norm": 0.4739459455013275, + "learning_rate": 0.0015, + "loss": 1.8354, + "step": 1869 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.5697641372680664, + "learning_rate": 0.0015, + "loss": 1.8476, + "step": 1870 + }, + { + "epoch": 0.19736286919831222, + "grad_norm": 0.6657871603965759, + "learning_rate": 0.0015, + "loss": 1.781, + "step": 1871 + }, + { + "epoch": 0.19746835443037974, + "grad_norm": 0.5394650101661682, + "learning_rate": 0.0015, + "loss": 1.7904, + "step": 1872 + }, + { + "epoch": 0.19757383966244726, + "grad_norm": 0.5282880067825317, + "learning_rate": 0.0015, + "loss": 1.7813, + "step": 1873 + }, + { + "epoch": 0.19767932489451476, + "grad_norm": 0.6685124039649963, + "learning_rate": 0.0015, + "loss": 1.8038, + "step": 1874 + }, + { + "epoch": 0.19778481012658228, + "grad_norm": 0.6586697101593018, + "learning_rate": 0.0015, + "loss": 1.8199, + "step": 1875 + }, + { + "epoch": 0.1978902953586498, + "grad_norm": 0.6609626412391663, + "learning_rate": 0.0015, + "loss": 1.8417, + "step": 1876 + }, + { + "epoch": 0.1979957805907173, + "grad_norm": 0.6019270420074463, + "learning_rate": 0.0015, + "loss": 1.8683, + "step": 1877 + }, + { + "epoch": 0.1981012658227848, + "grad_norm": 0.7429397702217102, + "learning_rate": 0.0015, + "loss": 1.7826, + "step": 1878 + }, + { + "epoch": 0.19820675105485233, + "grad_norm": 0.948248028755188, + "learning_rate": 0.0015, + "loss": 1.7684, + "step": 1879 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.49379172921180725, + "learning_rate": 0.0015, + "loss": 1.8243, + "step": 1880 + }, + { + "epoch": 0.19841772151898734, + "grad_norm": 0.7900415062904358, + "learning_rate": 0.0015, + "loss": 1.8172, + "step": 1881 + }, + { + "epoch": 0.19852320675105486, + "grad_norm": 0.8166360855102539, + "learning_rate": 0.0015, + "loss": 1.7975, + "step": 1882 + }, + { + "epoch": 0.19862869198312236, + "grad_norm": 0.5131245255470276, + "learning_rate": 0.0015, + "loss": 1.8088, + "step": 1883 + }, + { + "epoch": 0.19873417721518988, + "grad_norm": 0.4987246096134186, + "learning_rate": 0.0015, + "loss": 1.8229, + "step": 1884 + }, + { + "epoch": 0.19883966244725737, + "grad_norm": 0.466266006231308, + "learning_rate": 0.0015, + "loss": 1.7958, + "step": 1885 + }, + { + "epoch": 0.1989451476793249, + "grad_norm": 0.578522801399231, + "learning_rate": 0.0015, + "loss": 1.8149, + "step": 1886 + }, + { + "epoch": 0.1990506329113924, + "grad_norm": 0.46351176500320435, + "learning_rate": 0.0015, + "loss": 1.8114, + "step": 1887 + }, + { + "epoch": 0.1991561181434599, + "grad_norm": 0.6146172285079956, + "learning_rate": 0.0015, + "loss": 1.8381, + "step": 1888 + }, + { + "epoch": 0.19926160337552742, + "grad_norm": 0.7563574910163879, + "learning_rate": 0.0015, + "loss": 1.803, + "step": 1889 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.5614977478981018, + "learning_rate": 0.0015, + "loss": 1.8064, + "step": 1890 + }, + { + "epoch": 0.19947257383966244, + "grad_norm": 0.6315422058105469, + "learning_rate": 0.0015, + "loss": 1.8257, + "step": 1891 + }, + { + "epoch": 0.19957805907172996, + "grad_norm": 0.8877629637718201, + "learning_rate": 0.0015, + "loss": 1.7949, + "step": 1892 + }, + { + "epoch": 0.19968354430379748, + "grad_norm": 0.6193661093711853, + "learning_rate": 0.0015, + "loss": 1.7596, + "step": 1893 + }, + { + "epoch": 0.19978902953586497, + "grad_norm": 0.5928061008453369, + "learning_rate": 0.0015, + "loss": 1.8014, + "step": 1894 + }, + { + "epoch": 0.1998945147679325, + "grad_norm": 0.7133049964904785, + "learning_rate": 0.0015, + "loss": 1.8423, + "step": 1895 + }, + { + "epoch": 0.2, + "grad_norm": 0.48946404457092285, + "learning_rate": 0.0015, + "loss": 1.8044, + "step": 1896 + }, + { + "epoch": 0.2001054852320675, + "grad_norm": 0.689228355884552, + "learning_rate": 0.0015, + "loss": 1.8168, + "step": 1897 + }, + { + "epoch": 0.20021097046413502, + "grad_norm": 0.7096043825149536, + "learning_rate": 0.0015, + "loss": 1.827, + "step": 1898 + }, + { + "epoch": 0.20031645569620254, + "grad_norm": 0.5263689756393433, + "learning_rate": 0.0015, + "loss": 1.805, + "step": 1899 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.8081044554710388, + "learning_rate": 0.0015, + "loss": 1.8287, + "step": 1900 + }, + { + "epoch": 0.20052742616033756, + "grad_norm": 0.7103652358055115, + "learning_rate": 0.0015, + "loss": 1.8024, + "step": 1901 + }, + { + "epoch": 0.20063291139240505, + "grad_norm": 0.8252784013748169, + "learning_rate": 0.0015, + "loss": 1.7685, + "step": 1902 + }, + { + "epoch": 0.20073839662447257, + "grad_norm": 0.9588136076927185, + "learning_rate": 0.0015, + "loss": 1.8297, + "step": 1903 + }, + { + "epoch": 0.2008438818565401, + "grad_norm": 0.7772409319877625, + "learning_rate": 0.0015, + "loss": 1.8036, + "step": 1904 + }, + { + "epoch": 0.20094936708860758, + "grad_norm": 0.4961509704589844, + "learning_rate": 0.0015, + "loss": 1.7533, + "step": 1905 + }, + { + "epoch": 0.2010548523206751, + "grad_norm": 0.691749632358551, + "learning_rate": 0.0015, + "loss": 1.8402, + "step": 1906 + }, + { + "epoch": 0.20116033755274262, + "grad_norm": 0.6328140497207642, + "learning_rate": 0.0015, + "loss": 1.8068, + "step": 1907 + }, + { + "epoch": 0.20126582278481012, + "grad_norm": 0.5572474598884583, + "learning_rate": 0.0015, + "loss": 1.8157, + "step": 1908 + }, + { + "epoch": 0.20137130801687764, + "grad_norm": 0.6494549512863159, + "learning_rate": 0.0015, + "loss": 1.8577, + "step": 1909 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.607793390750885, + "learning_rate": 0.0015, + "loss": 1.8236, + "step": 1910 + }, + { + "epoch": 0.20158227848101265, + "grad_norm": 0.6155970096588135, + "learning_rate": 0.0015, + "loss": 1.8284, + "step": 1911 + }, + { + "epoch": 0.20168776371308017, + "grad_norm": 0.5968335866928101, + "learning_rate": 0.0015, + "loss": 1.7965, + "step": 1912 + }, + { + "epoch": 0.2017932489451477, + "grad_norm": 0.5001022815704346, + "learning_rate": 0.0015, + "loss": 1.7886, + "step": 1913 + }, + { + "epoch": 0.20189873417721518, + "grad_norm": 0.7552065253257751, + "learning_rate": 0.0015, + "loss": 1.814, + "step": 1914 + }, + { + "epoch": 0.2020042194092827, + "grad_norm": 0.7346456050872803, + "learning_rate": 0.0015, + "loss": 1.7885, + "step": 1915 + }, + { + "epoch": 0.20210970464135022, + "grad_norm": 0.5578684210777283, + "learning_rate": 0.0015, + "loss": 1.8142, + "step": 1916 + }, + { + "epoch": 0.20221518987341772, + "grad_norm": 0.6246179342269897, + "learning_rate": 0.0015, + "loss": 1.7962, + "step": 1917 + }, + { + "epoch": 0.20232067510548524, + "grad_norm": 0.6429421305656433, + "learning_rate": 0.0015, + "loss": 1.8151, + "step": 1918 + }, + { + "epoch": 0.20242616033755273, + "grad_norm": 0.5428091883659363, + "learning_rate": 0.0015, + "loss": 1.8491, + "step": 1919 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.5293623805046082, + "learning_rate": 0.0015, + "loss": 1.7877, + "step": 1920 + }, + { + "epoch": 0.20263713080168777, + "grad_norm": 0.5695270299911499, + "learning_rate": 0.0015, + "loss": 1.826, + "step": 1921 + }, + { + "epoch": 0.20274261603375526, + "grad_norm": 0.46148911118507385, + "learning_rate": 0.0015, + "loss": 1.7684, + "step": 1922 + }, + { + "epoch": 0.20284810126582278, + "grad_norm": 0.8151350021362305, + "learning_rate": 0.0015, + "loss": 1.8195, + "step": 1923 + }, + { + "epoch": 0.2029535864978903, + "grad_norm": 0.9036180973052979, + "learning_rate": 0.0015, + "loss": 1.7905, + "step": 1924 + }, + { + "epoch": 0.2030590717299578, + "grad_norm": 0.5296486616134644, + "learning_rate": 0.0015, + "loss": 1.8419, + "step": 1925 + }, + { + "epoch": 0.20316455696202532, + "grad_norm": 0.5247792601585388, + "learning_rate": 0.0015, + "loss": 1.7909, + "step": 1926 + }, + { + "epoch": 0.20327004219409284, + "grad_norm": 0.5949251651763916, + "learning_rate": 0.0015, + "loss": 1.7899, + "step": 1927 + }, + { + "epoch": 0.20337552742616033, + "grad_norm": 0.4845055043697357, + "learning_rate": 0.0015, + "loss": 1.8417, + "step": 1928 + }, + { + "epoch": 0.20348101265822785, + "grad_norm": 0.8005715608596802, + "learning_rate": 0.0015, + "loss": 1.7822, + "step": 1929 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.8493461012840271, + "learning_rate": 0.0015, + "loss": 1.8313, + "step": 1930 + }, + { + "epoch": 0.20369198312236286, + "grad_norm": 0.47339266538619995, + "learning_rate": 0.0015, + "loss": 1.7568, + "step": 1931 + }, + { + "epoch": 0.20379746835443038, + "grad_norm": 0.6098864674568176, + "learning_rate": 0.0015, + "loss": 1.8162, + "step": 1932 + }, + { + "epoch": 0.2039029535864979, + "grad_norm": 0.5633969902992249, + "learning_rate": 0.0015, + "loss": 1.8032, + "step": 1933 + }, + { + "epoch": 0.2040084388185654, + "grad_norm": 0.4892830550670624, + "learning_rate": 0.0015, + "loss": 1.8083, + "step": 1934 + }, + { + "epoch": 0.20411392405063292, + "grad_norm": 0.5428746938705444, + "learning_rate": 0.0015, + "loss": 1.7833, + "step": 1935 + }, + { + "epoch": 0.2042194092827004, + "grad_norm": 0.5698093175888062, + "learning_rate": 0.0015, + "loss": 1.8085, + "step": 1936 + }, + { + "epoch": 0.20432489451476793, + "grad_norm": 0.5401638746261597, + "learning_rate": 0.0015, + "loss": 1.8205, + "step": 1937 + }, + { + "epoch": 0.20443037974683545, + "grad_norm": 0.5090903639793396, + "learning_rate": 0.0015, + "loss": 1.8161, + "step": 1938 + }, + { + "epoch": 0.20453586497890294, + "grad_norm": 0.6209476590156555, + "learning_rate": 0.0015, + "loss": 1.8176, + "step": 1939 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.483394593000412, + "learning_rate": 0.0015, + "loss": 1.7667, + "step": 1940 + }, + { + "epoch": 0.20474683544303798, + "grad_norm": 0.5928177237510681, + "learning_rate": 0.0015, + "loss": 1.813, + "step": 1941 + }, + { + "epoch": 0.20485232067510548, + "grad_norm": 0.5569455623626709, + "learning_rate": 0.0015, + "loss": 1.8175, + "step": 1942 + }, + { + "epoch": 0.204957805907173, + "grad_norm": 0.5346285104751587, + "learning_rate": 0.0015, + "loss": 1.7918, + "step": 1943 + }, + { + "epoch": 0.20506329113924052, + "grad_norm": 0.5668536424636841, + "learning_rate": 0.0015, + "loss": 1.7953, + "step": 1944 + }, + { + "epoch": 0.205168776371308, + "grad_norm": 0.6100139021873474, + "learning_rate": 0.0015, + "loss": 1.8392, + "step": 1945 + }, + { + "epoch": 0.20527426160337553, + "grad_norm": 0.5450618267059326, + "learning_rate": 0.0015, + "loss": 1.765, + "step": 1946 + }, + { + "epoch": 0.20537974683544305, + "grad_norm": 0.5417037010192871, + "learning_rate": 0.0015, + "loss": 1.8286, + "step": 1947 + }, + { + "epoch": 0.20548523206751054, + "grad_norm": 0.5623211860656738, + "learning_rate": 0.0015, + "loss": 1.7838, + "step": 1948 + }, + { + "epoch": 0.20559071729957806, + "grad_norm": 0.5735540986061096, + "learning_rate": 0.0015, + "loss": 1.7815, + "step": 1949 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.514814019203186, + "learning_rate": 0.0015, + "loss": 1.7698, + "step": 1950 + }, + { + "epoch": 0.20580168776371308, + "grad_norm": 0.532285213470459, + "learning_rate": 0.0015, + "loss": 1.8375, + "step": 1951 + }, + { + "epoch": 0.2059071729957806, + "grad_norm": 0.6268038749694824, + "learning_rate": 0.0015, + "loss": 1.8088, + "step": 1952 + }, + { + "epoch": 0.2060126582278481, + "grad_norm": 0.5530931353569031, + "learning_rate": 0.0015, + "loss": 1.834, + "step": 1953 + }, + { + "epoch": 0.2061181434599156, + "grad_norm": 0.6239747405052185, + "learning_rate": 0.0015, + "loss": 1.7687, + "step": 1954 + }, + { + "epoch": 0.20622362869198313, + "grad_norm": 0.6208072900772095, + "learning_rate": 0.0015, + "loss": 1.8158, + "step": 1955 + }, + { + "epoch": 0.20632911392405062, + "grad_norm": 0.561712384223938, + "learning_rate": 0.0015, + "loss": 1.7592, + "step": 1956 + }, + { + "epoch": 0.20643459915611814, + "grad_norm": 0.5547567009925842, + "learning_rate": 0.0015, + "loss": 1.7936, + "step": 1957 + }, + { + "epoch": 0.20654008438818566, + "grad_norm": 0.5211175680160522, + "learning_rate": 0.0015, + "loss": 1.8057, + "step": 1958 + }, + { + "epoch": 0.20664556962025316, + "grad_norm": 0.5201579928398132, + "learning_rate": 0.0015, + "loss": 1.8237, + "step": 1959 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.49631425738334656, + "learning_rate": 0.0015, + "loss": 1.7696, + "step": 1960 + }, + { + "epoch": 0.2068565400843882, + "grad_norm": 0.4951164126396179, + "learning_rate": 0.0015, + "loss": 1.8308, + "step": 1961 + }, + { + "epoch": 0.2069620253164557, + "grad_norm": 0.5647163987159729, + "learning_rate": 0.0015, + "loss": 1.8411, + "step": 1962 + }, + { + "epoch": 0.2070675105485232, + "grad_norm": 0.5356828570365906, + "learning_rate": 0.0015, + "loss": 1.8246, + "step": 1963 + }, + { + "epoch": 0.20717299578059073, + "grad_norm": 0.5040904879570007, + "learning_rate": 0.0015, + "loss": 1.7794, + "step": 1964 + }, + { + "epoch": 0.20727848101265822, + "grad_norm": 0.6009175777435303, + "learning_rate": 0.0015, + "loss": 1.8102, + "step": 1965 + }, + { + "epoch": 0.20738396624472574, + "grad_norm": 0.583840548992157, + "learning_rate": 0.0015, + "loss": 1.7967, + "step": 1966 + }, + { + "epoch": 0.20748945147679324, + "grad_norm": 0.5271764993667603, + "learning_rate": 0.0015, + "loss": 1.8245, + "step": 1967 + }, + { + "epoch": 0.20759493670886076, + "grad_norm": 0.6596837043762207, + "learning_rate": 0.0015, + "loss": 1.8085, + "step": 1968 + }, + { + "epoch": 0.20770042194092828, + "grad_norm": 0.7611307501792908, + "learning_rate": 0.0015, + "loss": 1.8126, + "step": 1969 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.512626051902771, + "learning_rate": 0.0015, + "loss": 1.7983, + "step": 1970 + }, + { + "epoch": 0.2079113924050633, + "grad_norm": 0.7491683959960938, + "learning_rate": 0.0015, + "loss": 1.8134, + "step": 1971 + }, + { + "epoch": 0.2080168776371308, + "grad_norm": 1.0004591941833496, + "learning_rate": 0.0015, + "loss": 1.7722, + "step": 1972 + }, + { + "epoch": 0.2081223628691983, + "grad_norm": 0.5658147931098938, + "learning_rate": 0.0015, + "loss": 1.8139, + "step": 1973 + }, + { + "epoch": 0.20822784810126582, + "grad_norm": 0.8615539073944092, + "learning_rate": 0.0015, + "loss": 1.7961, + "step": 1974 + }, + { + "epoch": 0.20833333333333334, + "grad_norm": 1.200351595878601, + "learning_rate": 0.0015, + "loss": 1.8216, + "step": 1975 + }, + { + "epoch": 0.20843881856540084, + "grad_norm": 0.48392531275749207, + "learning_rate": 0.0015, + "loss": 1.7886, + "step": 1976 + }, + { + "epoch": 0.20854430379746836, + "grad_norm": 0.9284475445747375, + "learning_rate": 0.0015, + "loss": 1.8182, + "step": 1977 + }, + { + "epoch": 0.20864978902953588, + "grad_norm": 0.7154855728149414, + "learning_rate": 0.0015, + "loss": 1.792, + "step": 1978 + }, + { + "epoch": 0.20875527426160337, + "grad_norm": 0.7586033940315247, + "learning_rate": 0.0015, + "loss": 1.7763, + "step": 1979 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 1.0334913730621338, + "learning_rate": 0.0015, + "loss": 1.8212, + "step": 1980 + }, + { + "epoch": 0.2089662447257384, + "grad_norm": 0.6829190254211426, + "learning_rate": 0.0015, + "loss": 1.7854, + "step": 1981 + }, + { + "epoch": 0.2090717299578059, + "grad_norm": 0.7945849299430847, + "learning_rate": 0.0015, + "loss": 1.7529, + "step": 1982 + }, + { + "epoch": 0.20917721518987342, + "grad_norm": 0.7287125587463379, + "learning_rate": 0.0015, + "loss": 1.8047, + "step": 1983 + }, + { + "epoch": 0.20928270042194091, + "grad_norm": 0.6630305051803589, + "learning_rate": 0.0015, + "loss": 1.8365, + "step": 1984 + }, + { + "epoch": 0.20938818565400844, + "grad_norm": 0.7076170444488525, + "learning_rate": 0.0015, + "loss": 1.7945, + "step": 1985 + }, + { + "epoch": 0.20949367088607596, + "grad_norm": 0.6776531338691711, + "learning_rate": 0.0015, + "loss": 1.7725, + "step": 1986 + }, + { + "epoch": 0.20959915611814345, + "grad_norm": 0.7501946091651917, + "learning_rate": 0.0015, + "loss": 1.762, + "step": 1987 + }, + { + "epoch": 0.20970464135021097, + "grad_norm": 0.8988157510757446, + "learning_rate": 0.0015, + "loss": 1.8142, + "step": 1988 + }, + { + "epoch": 0.2098101265822785, + "grad_norm": 0.5500444769859314, + "learning_rate": 0.0015, + "loss": 1.8292, + "step": 1989 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.7037571668624878, + "learning_rate": 0.0015, + "loss": 1.8082, + "step": 1990 + }, + { + "epoch": 0.2100210970464135, + "grad_norm": 0.6246312856674194, + "learning_rate": 0.0015, + "loss": 1.8202, + "step": 1991 + }, + { + "epoch": 0.21012658227848102, + "grad_norm": 0.5556793808937073, + "learning_rate": 0.0015, + "loss": 1.7824, + "step": 1992 + }, + { + "epoch": 0.21023206751054851, + "grad_norm": 0.7253715991973877, + "learning_rate": 0.0015, + "loss": 1.8209, + "step": 1993 + }, + { + "epoch": 0.21033755274261604, + "grad_norm": 0.5947061777114868, + "learning_rate": 0.0015, + "loss": 1.7625, + "step": 1994 + }, + { + "epoch": 0.21044303797468356, + "grad_norm": 0.5559309124946594, + "learning_rate": 0.0015, + "loss": 1.8327, + "step": 1995 + }, + { + "epoch": 0.21054852320675105, + "grad_norm": 0.6597854495048523, + "learning_rate": 0.0015, + "loss": 1.7723, + "step": 1996 + }, + { + "epoch": 0.21065400843881857, + "grad_norm": 0.5748874545097351, + "learning_rate": 0.0015, + "loss": 1.7907, + "step": 1997 + }, + { + "epoch": 0.2107594936708861, + "grad_norm": 0.4824632406234741, + "learning_rate": 0.0015, + "loss": 1.7729, + "step": 1998 + }, + { + "epoch": 0.21086497890295358, + "grad_norm": 0.5499720573425293, + "learning_rate": 0.0015, + "loss": 1.7715, + "step": 1999 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.47200867533683777, + "learning_rate": 0.0015, + "loss": 1.8024, + "step": 2000 + }, + { + "epoch": 0.2110759493670886, + "grad_norm": 0.52181476354599, + "learning_rate": 0.0015, + "loss": 1.8389, + "step": 2001 + }, + { + "epoch": 0.21118143459915611, + "grad_norm": 0.5757996439933777, + "learning_rate": 0.0015, + "loss": 1.7929, + "step": 2002 + }, + { + "epoch": 0.21128691983122364, + "grad_norm": 0.4950844645500183, + "learning_rate": 0.0015, + "loss": 1.7456, + "step": 2003 + }, + { + "epoch": 0.21139240506329113, + "grad_norm": 0.47291117906570435, + "learning_rate": 0.0015, + "loss": 1.7939, + "step": 2004 + }, + { + "epoch": 0.21149789029535865, + "grad_norm": 0.4911331832408905, + "learning_rate": 0.0015, + "loss": 1.8218, + "step": 2005 + }, + { + "epoch": 0.21160337552742617, + "grad_norm": 0.477486789226532, + "learning_rate": 0.0015, + "loss": 1.8038, + "step": 2006 + }, + { + "epoch": 0.21170886075949366, + "grad_norm": 0.4663199484348297, + "learning_rate": 0.0015, + "loss": 1.7897, + "step": 2007 + }, + { + "epoch": 0.21181434599156118, + "grad_norm": 0.521081268787384, + "learning_rate": 0.0015, + "loss": 1.8358, + "step": 2008 + }, + { + "epoch": 0.2119198312236287, + "grad_norm": 0.49341341853141785, + "learning_rate": 0.0015, + "loss": 1.7782, + "step": 2009 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.5227444171905518, + "learning_rate": 0.0015, + "loss": 1.7784, + "step": 2010 + }, + { + "epoch": 0.21213080168776371, + "grad_norm": 0.47457006573677063, + "learning_rate": 0.0015, + "loss": 1.8092, + "step": 2011 + }, + { + "epoch": 0.21223628691983124, + "grad_norm": 0.4924984276294708, + "learning_rate": 0.0015, + "loss": 1.8039, + "step": 2012 + }, + { + "epoch": 0.21234177215189873, + "grad_norm": 0.4714333713054657, + "learning_rate": 0.0015, + "loss": 1.8099, + "step": 2013 + }, + { + "epoch": 0.21244725738396625, + "grad_norm": 0.45190441608428955, + "learning_rate": 0.0015, + "loss": 1.7932, + "step": 2014 + }, + { + "epoch": 0.21255274261603377, + "grad_norm": 0.46433931589126587, + "learning_rate": 0.0015, + "loss": 1.8013, + "step": 2015 + }, + { + "epoch": 0.21265822784810126, + "grad_norm": 0.5200649499893188, + "learning_rate": 0.0015, + "loss": 1.8317, + "step": 2016 + }, + { + "epoch": 0.21276371308016878, + "grad_norm": 0.5095267295837402, + "learning_rate": 0.0015, + "loss": 1.7953, + "step": 2017 + }, + { + "epoch": 0.21286919831223627, + "grad_norm": 0.4929715096950531, + "learning_rate": 0.0015, + "loss": 1.7978, + "step": 2018 + }, + { + "epoch": 0.2129746835443038, + "grad_norm": 0.5923967957496643, + "learning_rate": 0.0015, + "loss": 1.8137, + "step": 2019 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.4702209234237671, + "learning_rate": 0.0015, + "loss": 1.7838, + "step": 2020 + }, + { + "epoch": 0.2131856540084388, + "grad_norm": 0.6560384035110474, + "learning_rate": 0.0015, + "loss": 1.7878, + "step": 2021 + }, + { + "epoch": 0.21329113924050633, + "grad_norm": 0.6585400700569153, + "learning_rate": 0.0015, + "loss": 1.7798, + "step": 2022 + }, + { + "epoch": 0.21339662447257385, + "grad_norm": 0.5050377249717712, + "learning_rate": 0.0015, + "loss": 1.8481, + "step": 2023 + }, + { + "epoch": 0.21350210970464134, + "grad_norm": 0.7235761880874634, + "learning_rate": 0.0015, + "loss": 1.826, + "step": 2024 + }, + { + "epoch": 0.21360759493670886, + "grad_norm": 0.5125359892845154, + "learning_rate": 0.0015, + "loss": 1.7541, + "step": 2025 + }, + { + "epoch": 0.21371308016877638, + "grad_norm": 0.6349862217903137, + "learning_rate": 0.0015, + "loss": 1.7685, + "step": 2026 + }, + { + "epoch": 0.21381856540084387, + "grad_norm": 0.559128999710083, + "learning_rate": 0.0015, + "loss": 1.7753, + "step": 2027 + }, + { + "epoch": 0.2139240506329114, + "grad_norm": 0.5841044187545776, + "learning_rate": 0.0015, + "loss": 1.8158, + "step": 2028 + }, + { + "epoch": 0.21402953586497891, + "grad_norm": 0.5483068823814392, + "learning_rate": 0.0015, + "loss": 1.8183, + "step": 2029 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.5353325605392456, + "learning_rate": 0.0015, + "loss": 1.7804, + "step": 2030 + }, + { + "epoch": 0.21424050632911393, + "grad_norm": 0.5862396955490112, + "learning_rate": 0.0015, + "loss": 1.8114, + "step": 2031 + }, + { + "epoch": 0.21434599156118145, + "grad_norm": 0.612694501876831, + "learning_rate": 0.0015, + "loss": 1.7935, + "step": 2032 + }, + { + "epoch": 0.21445147679324894, + "grad_norm": 0.4950452744960785, + "learning_rate": 0.0015, + "loss": 1.7931, + "step": 2033 + }, + { + "epoch": 0.21455696202531646, + "grad_norm": 0.6351242065429688, + "learning_rate": 0.0015, + "loss": 1.7857, + "step": 2034 + }, + { + "epoch": 0.21466244725738395, + "grad_norm": 0.4815472364425659, + "learning_rate": 0.0015, + "loss": 1.7829, + "step": 2035 + }, + { + "epoch": 0.21476793248945147, + "grad_norm": 0.5411445498466492, + "learning_rate": 0.0015, + "loss": 1.8062, + "step": 2036 + }, + { + "epoch": 0.214873417721519, + "grad_norm": 0.5612822771072388, + "learning_rate": 0.0015, + "loss": 1.7741, + "step": 2037 + }, + { + "epoch": 0.2149789029535865, + "grad_norm": 0.5131843686103821, + "learning_rate": 0.0015, + "loss": 1.7879, + "step": 2038 + }, + { + "epoch": 0.215084388185654, + "grad_norm": 0.5584938526153564, + "learning_rate": 0.0015, + "loss": 1.7966, + "step": 2039 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.5455635190010071, + "learning_rate": 0.0015, + "loss": 1.8202, + "step": 2040 + }, + { + "epoch": 0.21529535864978902, + "grad_norm": 0.5857892036437988, + "learning_rate": 0.0015, + "loss": 1.7788, + "step": 2041 + }, + { + "epoch": 0.21540084388185654, + "grad_norm": 0.6014600396156311, + "learning_rate": 0.0015, + "loss": 1.7754, + "step": 2042 + }, + { + "epoch": 0.21550632911392406, + "grad_norm": 0.5224863886833191, + "learning_rate": 0.0015, + "loss": 1.8038, + "step": 2043 + }, + { + "epoch": 0.21561181434599155, + "grad_norm": 0.604583740234375, + "learning_rate": 0.0015, + "loss": 1.8325, + "step": 2044 + }, + { + "epoch": 0.21571729957805907, + "grad_norm": 0.523348331451416, + "learning_rate": 0.0015, + "loss": 1.7784, + "step": 2045 + }, + { + "epoch": 0.2158227848101266, + "grad_norm": 0.6126733422279358, + "learning_rate": 0.0015, + "loss": 1.8076, + "step": 2046 + }, + { + "epoch": 0.2159282700421941, + "grad_norm": 0.553733766078949, + "learning_rate": 0.0015, + "loss": 1.7776, + "step": 2047 + }, + { + "epoch": 0.2160337552742616, + "grad_norm": 0.6064372062683105, + "learning_rate": 0.0015, + "loss": 1.7918, + "step": 2048 + }, + { + "epoch": 0.21613924050632913, + "grad_norm": 0.5542830228805542, + "learning_rate": 0.0015, + "loss": 1.8039, + "step": 2049 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.49567410349845886, + "learning_rate": 0.0015, + "loss": 1.7975, + "step": 2050 + }, + { + "epoch": 0.21635021097046414, + "grad_norm": 0.5664834976196289, + "learning_rate": 0.0015, + "loss": 1.8117, + "step": 2051 + }, + { + "epoch": 0.21645569620253163, + "grad_norm": 0.5432770252227783, + "learning_rate": 0.0015, + "loss": 1.8184, + "step": 2052 + }, + { + "epoch": 0.21656118143459915, + "grad_norm": 0.5299774408340454, + "learning_rate": 0.0015, + "loss": 1.7653, + "step": 2053 + }, + { + "epoch": 0.21666666666666667, + "grad_norm": 0.6001642346382141, + "learning_rate": 0.0015, + "loss": 1.7823, + "step": 2054 + }, + { + "epoch": 0.21677215189873417, + "grad_norm": 0.4917055666446686, + "learning_rate": 0.0015, + "loss": 1.785, + "step": 2055 + }, + { + "epoch": 0.2168776371308017, + "grad_norm": 0.5083092451095581, + "learning_rate": 0.0015, + "loss": 1.7962, + "step": 2056 + }, + { + "epoch": 0.2169831223628692, + "grad_norm": 0.5252932906150818, + "learning_rate": 0.0015, + "loss": 1.7852, + "step": 2057 + }, + { + "epoch": 0.2170886075949367, + "grad_norm": 0.4780125021934509, + "learning_rate": 0.0015, + "loss": 1.7602, + "step": 2058 + }, + { + "epoch": 0.21719409282700422, + "grad_norm": 0.617774248123169, + "learning_rate": 0.0015, + "loss": 1.7632, + "step": 2059 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.6348524689674377, + "learning_rate": 0.0015, + "loss": 1.7597, + "step": 2060 + }, + { + "epoch": 0.21740506329113923, + "grad_norm": 0.48544228076934814, + "learning_rate": 0.0015, + "loss": 1.7768, + "step": 2061 + }, + { + "epoch": 0.21751054852320675, + "grad_norm": 0.4656197130680084, + "learning_rate": 0.0015, + "loss": 1.809, + "step": 2062 + }, + { + "epoch": 0.21761603375527427, + "grad_norm": 0.518939197063446, + "learning_rate": 0.0015, + "loss": 1.7651, + "step": 2063 + }, + { + "epoch": 0.21772151898734177, + "grad_norm": 0.6058339476585388, + "learning_rate": 0.0015, + "loss": 1.7675, + "step": 2064 + }, + { + "epoch": 0.2178270042194093, + "grad_norm": 0.5695094466209412, + "learning_rate": 0.0015, + "loss": 1.7576, + "step": 2065 + }, + { + "epoch": 0.21793248945147678, + "grad_norm": 0.5090291500091553, + "learning_rate": 0.0015, + "loss": 1.8055, + "step": 2066 + }, + { + "epoch": 0.2180379746835443, + "grad_norm": 0.5104354619979858, + "learning_rate": 0.0015, + "loss": 1.7603, + "step": 2067 + }, + { + "epoch": 0.21814345991561182, + "grad_norm": 0.5981147885322571, + "learning_rate": 0.0015, + "loss": 1.8037, + "step": 2068 + }, + { + "epoch": 0.2182489451476793, + "grad_norm": 0.7407425045967102, + "learning_rate": 0.0015, + "loss": 1.8131, + "step": 2069 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.5820272564888, + "learning_rate": 0.0015, + "loss": 1.7915, + "step": 2070 + }, + { + "epoch": 0.21845991561181435, + "grad_norm": 0.7423871755599976, + "learning_rate": 0.0015, + "loss": 1.8166, + "step": 2071 + }, + { + "epoch": 0.21856540084388185, + "grad_norm": 1.164667010307312, + "learning_rate": 0.0015, + "loss": 1.7936, + "step": 2072 + }, + { + "epoch": 0.21867088607594937, + "grad_norm": 0.551436185836792, + "learning_rate": 0.0015, + "loss": 1.8044, + "step": 2073 + }, + { + "epoch": 0.2187763713080169, + "grad_norm": 0.8940706253051758, + "learning_rate": 0.0015, + "loss": 1.7778, + "step": 2074 + }, + { + "epoch": 0.21888185654008438, + "grad_norm": 0.8622190356254578, + "learning_rate": 0.0015, + "loss": 1.7781, + "step": 2075 + }, + { + "epoch": 0.2189873417721519, + "grad_norm": 0.4920498728752136, + "learning_rate": 0.0015, + "loss": 1.7471, + "step": 2076 + }, + { + "epoch": 0.21909282700421942, + "grad_norm": 0.8537492752075195, + "learning_rate": 0.0015, + "loss": 1.8177, + "step": 2077 + }, + { + "epoch": 0.2191983122362869, + "grad_norm": 0.6676235198974609, + "learning_rate": 0.0015, + "loss": 1.7695, + "step": 2078 + }, + { + "epoch": 0.21930379746835443, + "grad_norm": 0.5924880504608154, + "learning_rate": 0.0015, + "loss": 1.8098, + "step": 2079 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.8442460298538208, + "learning_rate": 0.0015, + "loss": 1.8183, + "step": 2080 + }, + { + "epoch": 0.21951476793248945, + "grad_norm": 0.5594147443771362, + "learning_rate": 0.0015, + "loss": 1.791, + "step": 2081 + }, + { + "epoch": 0.21962025316455697, + "grad_norm": 0.814310610294342, + "learning_rate": 0.0015, + "loss": 1.7805, + "step": 2082 + }, + { + "epoch": 0.21972573839662446, + "grad_norm": 0.929280161857605, + "learning_rate": 0.0015, + "loss": 1.7676, + "step": 2083 + }, + { + "epoch": 0.21983122362869198, + "grad_norm": 0.482134610414505, + "learning_rate": 0.0015, + "loss": 1.7829, + "step": 2084 + }, + { + "epoch": 0.2199367088607595, + "grad_norm": 0.916218638420105, + "learning_rate": 0.0015, + "loss": 1.797, + "step": 2085 + }, + { + "epoch": 0.220042194092827, + "grad_norm": 0.8021676540374756, + "learning_rate": 0.0015, + "loss": 1.7477, + "step": 2086 + }, + { + "epoch": 0.2201476793248945, + "grad_norm": 0.5192430019378662, + "learning_rate": 0.0015, + "loss": 1.8255, + "step": 2087 + }, + { + "epoch": 0.22025316455696203, + "grad_norm": 0.8338963985443115, + "learning_rate": 0.0015, + "loss": 1.7566, + "step": 2088 + }, + { + "epoch": 0.22035864978902953, + "grad_norm": 0.5037552118301392, + "learning_rate": 0.0015, + "loss": 1.7922, + "step": 2089 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.7627525329589844, + "learning_rate": 0.0015, + "loss": 1.8104, + "step": 2090 + }, + { + "epoch": 0.22056962025316457, + "grad_norm": 0.7417036294937134, + "learning_rate": 0.0015, + "loss": 1.7891, + "step": 2091 + }, + { + "epoch": 0.22067510548523206, + "grad_norm": 0.5279183387756348, + "learning_rate": 0.0015, + "loss": 1.7959, + "step": 2092 + }, + { + "epoch": 0.22078059071729958, + "grad_norm": 0.9817603826522827, + "learning_rate": 0.0015, + "loss": 1.7903, + "step": 2093 + }, + { + "epoch": 0.2208860759493671, + "grad_norm": 0.7074859142303467, + "learning_rate": 0.0015, + "loss": 1.79, + "step": 2094 + }, + { + "epoch": 0.2209915611814346, + "grad_norm": 0.677912175655365, + "learning_rate": 0.0015, + "loss": 1.734, + "step": 2095 + }, + { + "epoch": 0.2210970464135021, + "grad_norm": 0.8714148998260498, + "learning_rate": 0.0015, + "loss": 1.7989, + "step": 2096 + }, + { + "epoch": 0.22120253164556963, + "grad_norm": 0.5216149091720581, + "learning_rate": 0.0015, + "loss": 1.8042, + "step": 2097 + }, + { + "epoch": 0.22130801687763713, + "grad_norm": 0.8973119854927063, + "learning_rate": 0.0015, + "loss": 1.8231, + "step": 2098 + }, + { + "epoch": 0.22141350210970465, + "grad_norm": 0.5994203090667725, + "learning_rate": 0.0015, + "loss": 1.8379, + "step": 2099 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.7807937264442444, + "learning_rate": 0.0015, + "loss": 1.7876, + "step": 2100 + }, + { + "epoch": 0.22162447257383966, + "grad_norm": 0.7964973449707031, + "learning_rate": 0.0015, + "loss": 1.7758, + "step": 2101 + }, + { + "epoch": 0.22172995780590718, + "grad_norm": 0.49382224678993225, + "learning_rate": 0.0015, + "loss": 1.8076, + "step": 2102 + }, + { + "epoch": 0.22183544303797467, + "grad_norm": 0.7006746530532837, + "learning_rate": 0.0015, + "loss": 1.7815, + "step": 2103 + }, + { + "epoch": 0.2219409282700422, + "grad_norm": 0.4756909906864166, + "learning_rate": 0.0015, + "loss": 1.7763, + "step": 2104 + }, + { + "epoch": 0.2220464135021097, + "grad_norm": 0.6178300976753235, + "learning_rate": 0.0015, + "loss": 1.7967, + "step": 2105 + }, + { + "epoch": 0.2221518987341772, + "grad_norm": 0.6046226024627686, + "learning_rate": 0.0015, + "loss": 1.7589, + "step": 2106 + }, + { + "epoch": 0.22225738396624473, + "grad_norm": 0.4923652410507202, + "learning_rate": 0.0015, + "loss": 1.7768, + "step": 2107 + }, + { + "epoch": 0.22236286919831225, + "grad_norm": 0.49180859327316284, + "learning_rate": 0.0015, + "loss": 1.7653, + "step": 2108 + }, + { + "epoch": 0.22246835443037974, + "grad_norm": 0.49905455112457275, + "learning_rate": 0.0015, + "loss": 1.767, + "step": 2109 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.5526487231254578, + "learning_rate": 0.0015, + "loss": 1.7682, + "step": 2110 + }, + { + "epoch": 0.22267932489451478, + "grad_norm": 0.5231235027313232, + "learning_rate": 0.0015, + "loss": 1.7421, + "step": 2111 + }, + { + "epoch": 0.22278481012658227, + "grad_norm": 0.4582445025444031, + "learning_rate": 0.0015, + "loss": 1.774, + "step": 2112 + }, + { + "epoch": 0.2228902953586498, + "grad_norm": 0.45971113443374634, + "learning_rate": 0.0015, + "loss": 1.8047, + "step": 2113 + }, + { + "epoch": 0.2229957805907173, + "grad_norm": 0.4648434519767761, + "learning_rate": 0.0015, + "loss": 1.7784, + "step": 2114 + }, + { + "epoch": 0.2231012658227848, + "grad_norm": 0.46904227137565613, + "learning_rate": 0.0015, + "loss": 1.7714, + "step": 2115 + }, + { + "epoch": 0.22320675105485233, + "grad_norm": 0.5289586186408997, + "learning_rate": 0.0015, + "loss": 1.7727, + "step": 2116 + }, + { + "epoch": 0.22331223628691982, + "grad_norm": 0.5065954327583313, + "learning_rate": 0.0015, + "loss": 1.7892, + "step": 2117 + }, + { + "epoch": 0.22341772151898734, + "grad_norm": 0.45888662338256836, + "learning_rate": 0.0015, + "loss": 1.7675, + "step": 2118 + }, + { + "epoch": 0.22352320675105486, + "grad_norm": 0.4652993977069855, + "learning_rate": 0.0015, + "loss": 1.7902, + "step": 2119 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.48693880438804626, + "learning_rate": 0.0015, + "loss": 1.7642, + "step": 2120 + }, + { + "epoch": 0.22373417721518987, + "grad_norm": 0.6744024157524109, + "learning_rate": 0.0015, + "loss": 1.7821, + "step": 2121 + }, + { + "epoch": 0.2238396624472574, + "grad_norm": 0.592976987361908, + "learning_rate": 0.0015, + "loss": 1.7468, + "step": 2122 + }, + { + "epoch": 0.22394514767932489, + "grad_norm": 0.49479156732559204, + "learning_rate": 0.0015, + "loss": 1.8064, + "step": 2123 + }, + { + "epoch": 0.2240506329113924, + "grad_norm": 0.7077398300170898, + "learning_rate": 0.0015, + "loss": 1.7812, + "step": 2124 + }, + { + "epoch": 0.22415611814345993, + "grad_norm": 0.5500556826591492, + "learning_rate": 0.0015, + "loss": 1.7569, + "step": 2125 + }, + { + "epoch": 0.22426160337552742, + "grad_norm": 0.5560785531997681, + "learning_rate": 0.0015, + "loss": 1.7717, + "step": 2126 + }, + { + "epoch": 0.22436708860759494, + "grad_norm": 0.844450056552887, + "learning_rate": 0.0015, + "loss": 1.8187, + "step": 2127 + }, + { + "epoch": 0.22447257383966246, + "grad_norm": 0.5791806578636169, + "learning_rate": 0.0015, + "loss": 1.7607, + "step": 2128 + }, + { + "epoch": 0.22457805907172995, + "grad_norm": 0.5502035617828369, + "learning_rate": 0.0015, + "loss": 1.799, + "step": 2129 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.5226722359657288, + "learning_rate": 0.0015, + "loss": 1.7848, + "step": 2130 + }, + { + "epoch": 0.224789029535865, + "grad_norm": 0.5256772637367249, + "learning_rate": 0.0015, + "loss": 1.7574, + "step": 2131 + }, + { + "epoch": 0.22489451476793249, + "grad_norm": 0.5230455994606018, + "learning_rate": 0.0015, + "loss": 1.7299, + "step": 2132 + }, + { + "epoch": 0.225, + "grad_norm": 0.484761506319046, + "learning_rate": 0.0015, + "loss": 1.7566, + "step": 2133 + }, + { + "epoch": 0.2251054852320675, + "grad_norm": 0.5470814108848572, + "learning_rate": 0.0015, + "loss": 1.8039, + "step": 2134 + }, + { + "epoch": 0.22521097046413502, + "grad_norm": 0.5252546668052673, + "learning_rate": 0.0015, + "loss": 1.7829, + "step": 2135 + }, + { + "epoch": 0.22531645569620254, + "grad_norm": 0.49324169754981995, + "learning_rate": 0.0015, + "loss": 1.8049, + "step": 2136 + }, + { + "epoch": 0.22542194092827003, + "grad_norm": 0.5957626104354858, + "learning_rate": 0.0015, + "loss": 1.7664, + "step": 2137 + }, + { + "epoch": 0.22552742616033755, + "grad_norm": 0.5057148337364197, + "learning_rate": 0.0015, + "loss": 1.7721, + "step": 2138 + }, + { + "epoch": 0.22563291139240507, + "grad_norm": 0.594565749168396, + "learning_rate": 0.0015, + "loss": 1.8047, + "step": 2139 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.57353276014328, + "learning_rate": 0.0015, + "loss": 1.7995, + "step": 2140 + }, + { + "epoch": 0.22584388185654009, + "grad_norm": 0.5522382259368896, + "learning_rate": 0.0015, + "loss": 1.7697, + "step": 2141 + }, + { + "epoch": 0.2259493670886076, + "grad_norm": 0.5503726601600647, + "learning_rate": 0.0015, + "loss": 1.7394, + "step": 2142 + }, + { + "epoch": 0.2260548523206751, + "grad_norm": 0.5022009611129761, + "learning_rate": 0.0015, + "loss": 1.7725, + "step": 2143 + }, + { + "epoch": 0.22616033755274262, + "grad_norm": 0.5625883936882019, + "learning_rate": 0.0015, + "loss": 1.8358, + "step": 2144 + }, + { + "epoch": 0.22626582278481014, + "grad_norm": 0.5966932773590088, + "learning_rate": 0.0015, + "loss": 1.7974, + "step": 2145 + }, + { + "epoch": 0.22637130801687763, + "grad_norm": 0.6162047982215881, + "learning_rate": 0.0015, + "loss": 1.7807, + "step": 2146 + }, + { + "epoch": 0.22647679324894515, + "grad_norm": 0.4755229651927948, + "learning_rate": 0.0015, + "loss": 1.8087, + "step": 2147 + }, + { + "epoch": 0.22658227848101264, + "grad_norm": 0.6164548397064209, + "learning_rate": 0.0015, + "loss": 1.806, + "step": 2148 + }, + { + "epoch": 0.22668776371308016, + "grad_norm": 0.6168848872184753, + "learning_rate": 0.0015, + "loss": 1.7675, + "step": 2149 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.5575670003890991, + "learning_rate": 0.0015, + "loss": 1.7559, + "step": 2150 + }, + { + "epoch": 0.22689873417721518, + "grad_norm": 0.87114018201828, + "learning_rate": 0.0015, + "loss": 1.8342, + "step": 2151 + }, + { + "epoch": 0.2270042194092827, + "grad_norm": 0.6856433749198914, + "learning_rate": 0.0015, + "loss": 1.8385, + "step": 2152 + }, + { + "epoch": 0.22710970464135022, + "grad_norm": 0.5108639597892761, + "learning_rate": 0.0015, + "loss": 1.7644, + "step": 2153 + }, + { + "epoch": 0.2272151898734177, + "grad_norm": 0.6017791032791138, + "learning_rate": 0.0015, + "loss": 1.7978, + "step": 2154 + }, + { + "epoch": 0.22732067510548523, + "grad_norm": 0.48644721508026123, + "learning_rate": 0.0015, + "loss": 1.7952, + "step": 2155 + }, + { + "epoch": 0.22742616033755275, + "grad_norm": 0.5356946587562561, + "learning_rate": 0.0015, + "loss": 1.7715, + "step": 2156 + }, + { + "epoch": 0.22753164556962024, + "grad_norm": 0.5389983654022217, + "learning_rate": 0.0015, + "loss": 1.7918, + "step": 2157 + }, + { + "epoch": 0.22763713080168776, + "grad_norm": 0.6026822924613953, + "learning_rate": 0.0015, + "loss": 1.7516, + "step": 2158 + }, + { + "epoch": 0.22774261603375529, + "grad_norm": 0.5365722179412842, + "learning_rate": 0.0015, + "loss": 1.8183, + "step": 2159 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.5630393624305725, + "learning_rate": 0.0015, + "loss": 1.7838, + "step": 2160 + }, + { + "epoch": 0.2279535864978903, + "grad_norm": 0.6574785709381104, + "learning_rate": 0.0015, + "loss": 1.7782, + "step": 2161 + }, + { + "epoch": 0.22805907172995782, + "grad_norm": 0.4817398488521576, + "learning_rate": 0.0015, + "loss": 1.7533, + "step": 2162 + }, + { + "epoch": 0.2281645569620253, + "grad_norm": 0.5800243616104126, + "learning_rate": 0.0015, + "loss": 1.78, + "step": 2163 + }, + { + "epoch": 0.22827004219409283, + "grad_norm": 0.6244105100631714, + "learning_rate": 0.0015, + "loss": 1.7533, + "step": 2164 + }, + { + "epoch": 0.22837552742616032, + "grad_norm": 0.5163666009902954, + "learning_rate": 0.0015, + "loss": 1.7302, + "step": 2165 + }, + { + "epoch": 0.22848101265822784, + "grad_norm": 0.5679610371589661, + "learning_rate": 0.0015, + "loss": 1.8157, + "step": 2166 + }, + { + "epoch": 0.22858649789029536, + "grad_norm": 0.5949824452400208, + "learning_rate": 0.0015, + "loss": 1.8137, + "step": 2167 + }, + { + "epoch": 0.22869198312236286, + "grad_norm": 0.5223953723907471, + "learning_rate": 0.0015, + "loss": 1.8106, + "step": 2168 + }, + { + "epoch": 0.22879746835443038, + "grad_norm": 0.4686530530452728, + "learning_rate": 0.0015, + "loss": 1.7649, + "step": 2169 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.5529415607452393, + "learning_rate": 0.0015, + "loss": 1.7757, + "step": 2170 + }, + { + "epoch": 0.2290084388185654, + "grad_norm": 0.46849730610847473, + "learning_rate": 0.0015, + "loss": 1.7642, + "step": 2171 + }, + { + "epoch": 0.2291139240506329, + "grad_norm": 0.596246063709259, + "learning_rate": 0.0015, + "loss": 1.7505, + "step": 2172 + }, + { + "epoch": 0.22921940928270043, + "grad_norm": 0.563389241695404, + "learning_rate": 0.0015, + "loss": 1.7421, + "step": 2173 + }, + { + "epoch": 0.22932489451476792, + "grad_norm": 0.5011084675788879, + "learning_rate": 0.0015, + "loss": 1.7505, + "step": 2174 + }, + { + "epoch": 0.22943037974683544, + "grad_norm": 0.6441606879234314, + "learning_rate": 0.0015, + "loss": 1.7535, + "step": 2175 + }, + { + "epoch": 0.22953586497890296, + "grad_norm": 0.5723125338554382, + "learning_rate": 0.0015, + "loss": 1.7804, + "step": 2176 + }, + { + "epoch": 0.22964135021097046, + "grad_norm": 0.48091256618499756, + "learning_rate": 0.0015, + "loss": 1.7455, + "step": 2177 + }, + { + "epoch": 0.22974683544303798, + "grad_norm": 0.5418523550033569, + "learning_rate": 0.0015, + "loss": 1.782, + "step": 2178 + }, + { + "epoch": 0.2298523206751055, + "grad_norm": 0.5442428588867188, + "learning_rate": 0.0015, + "loss": 1.7752, + "step": 2179 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.48983144760131836, + "learning_rate": 0.0015, + "loss": 1.7709, + "step": 2180 + }, + { + "epoch": 0.2300632911392405, + "grad_norm": 0.5266924500465393, + "learning_rate": 0.0015, + "loss": 1.8014, + "step": 2181 + }, + { + "epoch": 0.230168776371308, + "grad_norm": 0.4973575472831726, + "learning_rate": 0.0015, + "loss": 1.7677, + "step": 2182 + }, + { + "epoch": 0.23027426160337552, + "grad_norm": 0.5498241782188416, + "learning_rate": 0.0015, + "loss": 1.7918, + "step": 2183 + }, + { + "epoch": 0.23037974683544304, + "grad_norm": 0.4875961244106293, + "learning_rate": 0.0015, + "loss": 1.7767, + "step": 2184 + }, + { + "epoch": 0.23048523206751054, + "grad_norm": 0.6175986528396606, + "learning_rate": 0.0015, + "loss": 1.7595, + "step": 2185 + }, + { + "epoch": 0.23059071729957806, + "grad_norm": 0.5438140034675598, + "learning_rate": 0.0015, + "loss": 1.7966, + "step": 2186 + }, + { + "epoch": 0.23069620253164558, + "grad_norm": 0.48663565516471863, + "learning_rate": 0.0015, + "loss": 1.7493, + "step": 2187 + }, + { + "epoch": 0.23080168776371307, + "grad_norm": 0.6219778656959534, + "learning_rate": 0.0015, + "loss": 1.7771, + "step": 2188 + }, + { + "epoch": 0.2309071729957806, + "grad_norm": 0.48322415351867676, + "learning_rate": 0.0015, + "loss": 1.7713, + "step": 2189 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.6042963266372681, + "learning_rate": 0.0015, + "loss": 1.7944, + "step": 2190 + }, + { + "epoch": 0.2311181434599156, + "grad_norm": 0.6675837635993958, + "learning_rate": 0.0015, + "loss": 1.778, + "step": 2191 + }, + { + "epoch": 0.23122362869198312, + "grad_norm": 0.5233267545700073, + "learning_rate": 0.0015, + "loss": 1.7748, + "step": 2192 + }, + { + "epoch": 0.23132911392405064, + "grad_norm": 0.6182083487510681, + "learning_rate": 0.0015, + "loss": 1.8242, + "step": 2193 + }, + { + "epoch": 0.23143459915611814, + "grad_norm": 0.5622542500495911, + "learning_rate": 0.0015, + "loss": 1.8129, + "step": 2194 + }, + { + "epoch": 0.23154008438818566, + "grad_norm": 0.5527053475379944, + "learning_rate": 0.0015, + "loss": 1.7541, + "step": 2195 + }, + { + "epoch": 0.23164556962025318, + "grad_norm": 0.5042627453804016, + "learning_rate": 0.0015, + "loss": 1.7409, + "step": 2196 + }, + { + "epoch": 0.23175105485232067, + "grad_norm": 0.4917895793914795, + "learning_rate": 0.0015, + "loss": 1.7587, + "step": 2197 + }, + { + "epoch": 0.2318565400843882, + "grad_norm": 0.5107989311218262, + "learning_rate": 0.0015, + "loss": 1.7495, + "step": 2198 + }, + { + "epoch": 0.23196202531645568, + "grad_norm": 0.4925711750984192, + "learning_rate": 0.0015, + "loss": 1.751, + "step": 2199 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.4993196427822113, + "learning_rate": 0.0015, + "loss": 1.8107, + "step": 2200 + }, + { + "epoch": 0.23217299578059072, + "grad_norm": 0.470471054315567, + "learning_rate": 0.0015, + "loss": 1.7697, + "step": 2201 + }, + { + "epoch": 0.23227848101265822, + "grad_norm": 0.4779837727546692, + "learning_rate": 0.0015, + "loss": 1.775, + "step": 2202 + }, + { + "epoch": 0.23238396624472574, + "grad_norm": 0.4769928753376007, + "learning_rate": 0.0015, + "loss": 1.7647, + "step": 2203 + }, + { + "epoch": 0.23248945147679326, + "grad_norm": 0.4866911768913269, + "learning_rate": 0.0015, + "loss": 1.7544, + "step": 2204 + }, + { + "epoch": 0.23259493670886075, + "grad_norm": 0.5387329459190369, + "learning_rate": 0.0015, + "loss": 1.7114, + "step": 2205 + }, + { + "epoch": 0.23270042194092827, + "grad_norm": 0.4828197956085205, + "learning_rate": 0.0015, + "loss": 1.8261, + "step": 2206 + }, + { + "epoch": 0.2328059071729958, + "grad_norm": 0.6584717035293579, + "learning_rate": 0.0015, + "loss": 1.761, + "step": 2207 + }, + { + "epoch": 0.23291139240506328, + "grad_norm": 0.5035545229911804, + "learning_rate": 0.0015, + "loss": 1.7768, + "step": 2208 + }, + { + "epoch": 0.2330168776371308, + "grad_norm": 0.4784407615661621, + "learning_rate": 0.0015, + "loss": 1.7438, + "step": 2209 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.5389178991317749, + "learning_rate": 0.0015, + "loss": 1.7768, + "step": 2210 + }, + { + "epoch": 0.23322784810126582, + "grad_norm": 0.45946386456489563, + "learning_rate": 0.0015, + "loss": 1.7652, + "step": 2211 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 0.6016244292259216, + "learning_rate": 0.0015, + "loss": 1.773, + "step": 2212 + }, + { + "epoch": 0.23343881856540086, + "grad_norm": 0.48807260394096375, + "learning_rate": 0.0015, + "loss": 1.7706, + "step": 2213 + }, + { + "epoch": 0.23354430379746835, + "grad_norm": 0.49960052967071533, + "learning_rate": 0.0015, + "loss": 1.7778, + "step": 2214 + }, + { + "epoch": 0.23364978902953587, + "grad_norm": 0.5041942000389099, + "learning_rate": 0.0015, + "loss": 1.8037, + "step": 2215 + }, + { + "epoch": 0.23375527426160336, + "grad_norm": 0.504001259803772, + "learning_rate": 0.0015, + "loss": 1.7668, + "step": 2216 + }, + { + "epoch": 0.23386075949367088, + "grad_norm": 0.4873487055301666, + "learning_rate": 0.0015, + "loss": 1.7755, + "step": 2217 + }, + { + "epoch": 0.2339662447257384, + "grad_norm": 0.5332985520362854, + "learning_rate": 0.0015, + "loss": 1.7913, + "step": 2218 + }, + { + "epoch": 0.2340717299578059, + "grad_norm": 0.5267864465713501, + "learning_rate": 0.0015, + "loss": 1.7552, + "step": 2219 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.4540298879146576, + "learning_rate": 0.0015, + "loss": 1.8023, + "step": 2220 + }, + { + "epoch": 0.23428270042194094, + "grad_norm": 0.502090334892273, + "learning_rate": 0.0015, + "loss": 1.7619, + "step": 2221 + }, + { + "epoch": 0.23438818565400843, + "grad_norm": 0.48418280482292175, + "learning_rate": 0.0015, + "loss": 1.7422, + "step": 2222 + }, + { + "epoch": 0.23449367088607595, + "grad_norm": 0.5112397074699402, + "learning_rate": 0.0015, + "loss": 1.7564, + "step": 2223 + }, + { + "epoch": 0.23459915611814347, + "grad_norm": 0.546126127243042, + "learning_rate": 0.0015, + "loss": 1.7984, + "step": 2224 + }, + { + "epoch": 0.23470464135021096, + "grad_norm": 0.46486884355545044, + "learning_rate": 0.0015, + "loss": 1.7654, + "step": 2225 + }, + { + "epoch": 0.23481012658227848, + "grad_norm": 0.5794968008995056, + "learning_rate": 0.0015, + "loss": 1.7965, + "step": 2226 + }, + { + "epoch": 0.234915611814346, + "grad_norm": 0.6269369125366211, + "learning_rate": 0.0015, + "loss": 1.7652, + "step": 2227 + }, + { + "epoch": 0.2350210970464135, + "grad_norm": 0.4783438444137573, + "learning_rate": 0.0015, + "loss": 1.7749, + "step": 2228 + }, + { + "epoch": 0.23512658227848102, + "grad_norm": 0.5994933247566223, + "learning_rate": 0.0015, + "loss": 1.7969, + "step": 2229 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.5959358215332031, + "learning_rate": 0.0015, + "loss": 1.7622, + "step": 2230 + }, + { + "epoch": 0.23533755274261603, + "grad_norm": 0.47580185532569885, + "learning_rate": 0.0015, + "loss": 1.7595, + "step": 2231 + }, + { + "epoch": 0.23544303797468355, + "grad_norm": 0.7106600403785706, + "learning_rate": 0.0015, + "loss": 1.7833, + "step": 2232 + }, + { + "epoch": 0.23554852320675104, + "grad_norm": 0.7072519659996033, + "learning_rate": 0.0015, + "loss": 1.7867, + "step": 2233 + }, + { + "epoch": 0.23565400843881856, + "grad_norm": 0.554775059223175, + "learning_rate": 0.0015, + "loss": 1.7611, + "step": 2234 + }, + { + "epoch": 0.23575949367088608, + "grad_norm": 0.627525806427002, + "learning_rate": 0.0015, + "loss": 1.7524, + "step": 2235 + }, + { + "epoch": 0.23586497890295358, + "grad_norm": 0.5165814757347107, + "learning_rate": 0.0015, + "loss": 1.7617, + "step": 2236 + }, + { + "epoch": 0.2359704641350211, + "grad_norm": 0.5884402990341187, + "learning_rate": 0.0015, + "loss": 1.7329, + "step": 2237 + }, + { + "epoch": 0.23607594936708862, + "grad_norm": 0.466094970703125, + "learning_rate": 0.0015, + "loss": 1.8022, + "step": 2238 + }, + { + "epoch": 0.2361814345991561, + "grad_norm": 0.5659448504447937, + "learning_rate": 0.0015, + "loss": 1.7602, + "step": 2239 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.5581298470497131, + "learning_rate": 0.0015, + "loss": 1.7634, + "step": 2240 + }, + { + "epoch": 0.23639240506329115, + "grad_norm": 0.47687047719955444, + "learning_rate": 0.0015, + "loss": 1.7445, + "step": 2241 + }, + { + "epoch": 0.23649789029535864, + "grad_norm": 0.6534562706947327, + "learning_rate": 0.0015, + "loss": 1.7454, + "step": 2242 + }, + { + "epoch": 0.23660337552742616, + "grad_norm": 0.7514973282814026, + "learning_rate": 0.0015, + "loss": 1.748, + "step": 2243 + }, + { + "epoch": 0.23670886075949368, + "grad_norm": 0.8936958312988281, + "learning_rate": 0.0015, + "loss": 1.7634, + "step": 2244 + }, + { + "epoch": 0.23681434599156118, + "grad_norm": 0.7259150147438049, + "learning_rate": 0.0015, + "loss": 1.7698, + "step": 2245 + }, + { + "epoch": 0.2369198312236287, + "grad_norm": 0.5334154963493347, + "learning_rate": 0.0015, + "loss": 1.8148, + "step": 2246 + }, + { + "epoch": 0.2370253164556962, + "grad_norm": 0.9084137082099915, + "learning_rate": 0.0015, + "loss": 1.7314, + "step": 2247 + }, + { + "epoch": 0.2371308016877637, + "grad_norm": 0.6602224111557007, + "learning_rate": 0.0015, + "loss": 1.7956, + "step": 2248 + }, + { + "epoch": 0.23723628691983123, + "grad_norm": 0.6410700082778931, + "learning_rate": 0.0015, + "loss": 1.7958, + "step": 2249 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.8773201704025269, + "learning_rate": 0.0015, + "loss": 1.768, + "step": 2250 + }, + { + "epoch": 0.23744725738396624, + "grad_norm": 0.8360086679458618, + "learning_rate": 0.0015, + "loss": 1.7672, + "step": 2251 + }, + { + "epoch": 0.23755274261603376, + "grad_norm": 0.4706181287765503, + "learning_rate": 0.0015, + "loss": 1.7657, + "step": 2252 + }, + { + "epoch": 0.23765822784810126, + "grad_norm": 0.8980696201324463, + "learning_rate": 0.0015, + "loss": 1.7531, + "step": 2253 + }, + { + "epoch": 0.23776371308016878, + "grad_norm": 0.9459096789360046, + "learning_rate": 0.0015, + "loss": 1.7694, + "step": 2254 + }, + { + "epoch": 0.2378691983122363, + "grad_norm": 0.4759543836116791, + "learning_rate": 0.0015, + "loss": 1.7568, + "step": 2255 + }, + { + "epoch": 0.2379746835443038, + "grad_norm": 0.8913567662239075, + "learning_rate": 0.0015, + "loss": 1.7587, + "step": 2256 + }, + { + "epoch": 0.2380801687763713, + "grad_norm": 0.6646949648857117, + "learning_rate": 0.0015, + "loss": 1.7306, + "step": 2257 + }, + { + "epoch": 0.23818565400843883, + "grad_norm": 0.6183149814605713, + "learning_rate": 0.0015, + "loss": 1.7644, + "step": 2258 + }, + { + "epoch": 0.23829113924050632, + "grad_norm": 0.6488416194915771, + "learning_rate": 0.0015, + "loss": 1.7854, + "step": 2259 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.5634511709213257, + "learning_rate": 0.0015, + "loss": 1.7799, + "step": 2260 + }, + { + "epoch": 0.23850210970464136, + "grad_norm": 0.6052711009979248, + "learning_rate": 0.0015, + "loss": 1.7648, + "step": 2261 + }, + { + "epoch": 0.23860759493670886, + "grad_norm": 0.5160919427871704, + "learning_rate": 0.0015, + "loss": 1.7572, + "step": 2262 + }, + { + "epoch": 0.23871308016877638, + "grad_norm": 0.6135463714599609, + "learning_rate": 0.0015, + "loss": 1.7876, + "step": 2263 + }, + { + "epoch": 0.23881856540084387, + "grad_norm": 0.5171403884887695, + "learning_rate": 0.0015, + "loss": 1.8167, + "step": 2264 + }, + { + "epoch": 0.2389240506329114, + "grad_norm": 0.6402329802513123, + "learning_rate": 0.0015, + "loss": 1.828, + "step": 2265 + }, + { + "epoch": 0.2390295358649789, + "grad_norm": 0.48066070675849915, + "learning_rate": 0.0015, + "loss": 1.7841, + "step": 2266 + }, + { + "epoch": 0.2391350210970464, + "grad_norm": 0.6777234673500061, + "learning_rate": 0.0015, + "loss": 1.8165, + "step": 2267 + }, + { + "epoch": 0.23924050632911392, + "grad_norm": 0.5749910473823547, + "learning_rate": 0.0015, + "loss": 1.7853, + "step": 2268 + }, + { + "epoch": 0.23934599156118144, + "grad_norm": 0.5601595044136047, + "learning_rate": 0.0015, + "loss": 1.7819, + "step": 2269 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.5099558234214783, + "learning_rate": 0.0015, + "loss": 1.7881, + "step": 2270 + }, + { + "epoch": 0.23955696202531646, + "grad_norm": 0.5596107840538025, + "learning_rate": 0.0015, + "loss": 1.7902, + "step": 2271 + }, + { + "epoch": 0.23966244725738398, + "grad_norm": 0.5314821004867554, + "learning_rate": 0.0015, + "loss": 1.7548, + "step": 2272 + }, + { + "epoch": 0.23976793248945147, + "grad_norm": 0.4766457974910736, + "learning_rate": 0.0015, + "loss": 1.7468, + "step": 2273 + }, + { + "epoch": 0.239873417721519, + "grad_norm": 0.6159982681274414, + "learning_rate": 0.0015, + "loss": 1.7946, + "step": 2274 + }, + { + "epoch": 0.2399789029535865, + "grad_norm": 0.66802579164505, + "learning_rate": 0.0015, + "loss": 1.7656, + "step": 2275 + }, + { + "epoch": 0.240084388185654, + "grad_norm": 0.5005107522010803, + "learning_rate": 0.0015, + "loss": 1.7842, + "step": 2276 + }, + { + "epoch": 0.24018987341772152, + "grad_norm": 0.5707549452781677, + "learning_rate": 0.0015, + "loss": 1.7734, + "step": 2277 + }, + { + "epoch": 0.24029535864978904, + "grad_norm": 0.5253913402557373, + "learning_rate": 0.0015, + "loss": 1.7752, + "step": 2278 + }, + { + "epoch": 0.24040084388185654, + "grad_norm": 0.53165602684021, + "learning_rate": 0.0015, + "loss": 1.7891, + "step": 2279 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.7151433825492859, + "learning_rate": 0.0015, + "loss": 1.7872, + "step": 2280 + }, + { + "epoch": 0.24061181434599155, + "grad_norm": 0.5916571021080017, + "learning_rate": 0.0015, + "loss": 1.7706, + "step": 2281 + }, + { + "epoch": 0.24071729957805907, + "grad_norm": 0.6928505897521973, + "learning_rate": 0.0015, + "loss": 1.733, + "step": 2282 + }, + { + "epoch": 0.2408227848101266, + "grad_norm": 1.0352296829223633, + "learning_rate": 0.0015, + "loss": 1.7642, + "step": 2283 + }, + { + "epoch": 0.24092827004219408, + "grad_norm": 0.7083351612091064, + "learning_rate": 0.0015, + "loss": 1.7254, + "step": 2284 + }, + { + "epoch": 0.2410337552742616, + "grad_norm": 0.529478907585144, + "learning_rate": 0.0015, + "loss": 1.7556, + "step": 2285 + }, + { + "epoch": 0.24113924050632912, + "grad_norm": 0.8601495623588562, + "learning_rate": 0.0015, + "loss": 1.7973, + "step": 2286 + }, + { + "epoch": 0.24124472573839661, + "grad_norm": 0.5362940430641174, + "learning_rate": 0.0015, + "loss": 1.7757, + "step": 2287 + }, + { + "epoch": 0.24135021097046414, + "grad_norm": 0.6998580098152161, + "learning_rate": 0.0015, + "loss": 1.7864, + "step": 2288 + }, + { + "epoch": 0.24145569620253166, + "grad_norm": 0.8627341985702515, + "learning_rate": 0.0015, + "loss": 1.7583, + "step": 2289 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.49089959263801575, + "learning_rate": 0.0015, + "loss": 1.7727, + "step": 2290 + }, + { + "epoch": 0.24166666666666667, + "grad_norm": 0.8630306720733643, + "learning_rate": 0.0015, + "loss": 1.7806, + "step": 2291 + }, + { + "epoch": 0.2417721518987342, + "grad_norm": 0.5493969917297363, + "learning_rate": 0.0015, + "loss": 1.7151, + "step": 2292 + }, + { + "epoch": 0.24187763713080168, + "grad_norm": 0.7692028284072876, + "learning_rate": 0.0015, + "loss": 1.7705, + "step": 2293 + }, + { + "epoch": 0.2419831223628692, + "grad_norm": 0.8354348540306091, + "learning_rate": 0.0015, + "loss": 1.7715, + "step": 2294 + }, + { + "epoch": 0.24208860759493672, + "grad_norm": 0.47700226306915283, + "learning_rate": 0.0015, + "loss": 1.7695, + "step": 2295 + }, + { + "epoch": 0.24219409282700421, + "grad_norm": 0.6921583414077759, + "learning_rate": 0.0015, + "loss": 1.7743, + "step": 2296 + }, + { + "epoch": 0.24229957805907174, + "grad_norm": 0.5307833552360535, + "learning_rate": 0.0015, + "loss": 1.8149, + "step": 2297 + }, + { + "epoch": 0.24240506329113923, + "grad_norm": 0.6120598912239075, + "learning_rate": 0.0015, + "loss": 1.7587, + "step": 2298 + }, + { + "epoch": 0.24251054852320675, + "grad_norm": 0.6797193884849548, + "learning_rate": 0.0015, + "loss": 1.7552, + "step": 2299 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.5515388250350952, + "learning_rate": 0.0015, + "loss": 1.7317, + "step": 2300 + }, + { + "epoch": 0.24272151898734176, + "grad_norm": 0.8552504181861877, + "learning_rate": 0.0015, + "loss": 1.7891, + "step": 2301 + }, + { + "epoch": 0.24282700421940928, + "grad_norm": 0.5781500339508057, + "learning_rate": 0.0015, + "loss": 1.7358, + "step": 2302 + }, + { + "epoch": 0.2429324894514768, + "grad_norm": 0.7933359146118164, + "learning_rate": 0.0015, + "loss": 1.7919, + "step": 2303 + }, + { + "epoch": 0.2430379746835443, + "grad_norm": 0.8131921291351318, + "learning_rate": 0.0015, + "loss": 1.7787, + "step": 2304 + }, + { + "epoch": 0.24314345991561181, + "grad_norm": 0.5366730690002441, + "learning_rate": 0.0015, + "loss": 1.7492, + "step": 2305 + }, + { + "epoch": 0.24324894514767934, + "grad_norm": 0.9996156096458435, + "learning_rate": 0.0015, + "loss": 1.7902, + "step": 2306 + }, + { + "epoch": 0.24335443037974683, + "grad_norm": 0.7723273634910583, + "learning_rate": 0.0015, + "loss": 1.7579, + "step": 2307 + }, + { + "epoch": 0.24345991561181435, + "grad_norm": 0.4938789904117584, + "learning_rate": 0.0015, + "loss": 1.7272, + "step": 2308 + }, + { + "epoch": 0.24356540084388187, + "grad_norm": 0.5969580411911011, + "learning_rate": 0.0015, + "loss": 1.7757, + "step": 2309 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.5582490563392639, + "learning_rate": 0.0015, + "loss": 1.7795, + "step": 2310 + }, + { + "epoch": 0.24377637130801688, + "grad_norm": 0.4658015966415405, + "learning_rate": 0.0015, + "loss": 1.7685, + "step": 2311 + }, + { + "epoch": 0.2438818565400844, + "grad_norm": 0.5754499435424805, + "learning_rate": 0.0015, + "loss": 1.7894, + "step": 2312 + }, + { + "epoch": 0.2439873417721519, + "grad_norm": 0.5328736901283264, + "learning_rate": 0.0015, + "loss": 1.8219, + "step": 2313 + }, + { + "epoch": 0.24409282700421941, + "grad_norm": 0.48569199442863464, + "learning_rate": 0.0015, + "loss": 1.7684, + "step": 2314 + }, + { + "epoch": 0.2441983122362869, + "grad_norm": 0.47076699137687683, + "learning_rate": 0.0015, + "loss": 1.7653, + "step": 2315 + }, + { + "epoch": 0.24430379746835443, + "grad_norm": 0.5177969932556152, + "learning_rate": 0.0015, + "loss": 1.8044, + "step": 2316 + }, + { + "epoch": 0.24440928270042195, + "grad_norm": 0.5015473961830139, + "learning_rate": 0.0015, + "loss": 1.7133, + "step": 2317 + }, + { + "epoch": 0.24451476793248944, + "grad_norm": 0.6228369474411011, + "learning_rate": 0.0015, + "loss": 1.7599, + "step": 2318 + }, + { + "epoch": 0.24462025316455696, + "grad_norm": 0.4836505651473999, + "learning_rate": 0.0015, + "loss": 1.7856, + "step": 2319 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.5013377666473389, + "learning_rate": 0.0015, + "loss": 1.7263, + "step": 2320 + }, + { + "epoch": 0.24483122362869197, + "grad_norm": 0.5119166970252991, + "learning_rate": 0.0015, + "loss": 1.7297, + "step": 2321 + }, + { + "epoch": 0.2449367088607595, + "grad_norm": 0.5447758436203003, + "learning_rate": 0.0015, + "loss": 1.784, + "step": 2322 + }, + { + "epoch": 0.24504219409282701, + "grad_norm": 0.4415464997291565, + "learning_rate": 0.0015, + "loss": 1.7608, + "step": 2323 + }, + { + "epoch": 0.2451476793248945, + "grad_norm": 0.5140018463134766, + "learning_rate": 0.0015, + "loss": 1.7631, + "step": 2324 + }, + { + "epoch": 0.24525316455696203, + "grad_norm": 0.513386607170105, + "learning_rate": 0.0015, + "loss": 1.7412, + "step": 2325 + }, + { + "epoch": 0.24535864978902955, + "grad_norm": 0.48736950755119324, + "learning_rate": 0.0015, + "loss": 1.7737, + "step": 2326 + }, + { + "epoch": 0.24546413502109704, + "grad_norm": 0.4886453151702881, + "learning_rate": 0.0015, + "loss": 1.7501, + "step": 2327 + }, + { + "epoch": 0.24556962025316456, + "grad_norm": 0.5579635500907898, + "learning_rate": 0.0015, + "loss": 1.7509, + "step": 2328 + }, + { + "epoch": 0.24567510548523205, + "grad_norm": 0.5681976675987244, + "learning_rate": 0.0015, + "loss": 1.752, + "step": 2329 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.4890390932559967, + "learning_rate": 0.0015, + "loss": 1.7307, + "step": 2330 + }, + { + "epoch": 0.2458860759493671, + "grad_norm": 0.5141686797142029, + "learning_rate": 0.0015, + "loss": 1.7177, + "step": 2331 + }, + { + "epoch": 0.2459915611814346, + "grad_norm": 0.5592617392539978, + "learning_rate": 0.0015, + "loss": 1.7774, + "step": 2332 + }, + { + "epoch": 0.2460970464135021, + "grad_norm": 0.45796963572502136, + "learning_rate": 0.0015, + "loss": 1.7619, + "step": 2333 + }, + { + "epoch": 0.24620253164556963, + "grad_norm": 0.4724443256855011, + "learning_rate": 0.0015, + "loss": 1.78, + "step": 2334 + }, + { + "epoch": 0.24630801687763712, + "grad_norm": 0.5037837028503418, + "learning_rate": 0.0015, + "loss": 1.7774, + "step": 2335 + }, + { + "epoch": 0.24641350210970464, + "grad_norm": 0.5889279246330261, + "learning_rate": 0.0015, + "loss": 1.769, + "step": 2336 + }, + { + "epoch": 0.24651898734177216, + "grad_norm": 0.515529990196228, + "learning_rate": 0.0015, + "loss": 1.7033, + "step": 2337 + }, + { + "epoch": 0.24662447257383965, + "grad_norm": 0.6168153882026672, + "learning_rate": 0.0015, + "loss": 1.743, + "step": 2338 + }, + { + "epoch": 0.24672995780590717, + "grad_norm": 0.5972260236740112, + "learning_rate": 0.0015, + "loss": 1.7623, + "step": 2339 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.5272727608680725, + "learning_rate": 0.0015, + "loss": 1.7522, + "step": 2340 + }, + { + "epoch": 0.2469409282700422, + "grad_norm": 0.614957869052887, + "learning_rate": 0.0015, + "loss": 1.7773, + "step": 2341 + }, + { + "epoch": 0.2470464135021097, + "grad_norm": 0.6469771862030029, + "learning_rate": 0.0015, + "loss": 1.7594, + "step": 2342 + }, + { + "epoch": 0.24715189873417723, + "grad_norm": 0.5531512498855591, + "learning_rate": 0.0015, + "loss": 1.7363, + "step": 2343 + }, + { + "epoch": 0.24725738396624472, + "grad_norm": 0.6211775541305542, + "learning_rate": 0.0015, + "loss": 1.7691, + "step": 2344 + }, + { + "epoch": 0.24736286919831224, + "grad_norm": 0.5498647689819336, + "learning_rate": 0.0015, + "loss": 1.7529, + "step": 2345 + }, + { + "epoch": 0.24746835443037973, + "grad_norm": 0.7096125483512878, + "learning_rate": 0.0015, + "loss": 1.767, + "step": 2346 + }, + { + "epoch": 0.24757383966244725, + "grad_norm": 0.7859359383583069, + "learning_rate": 0.0015, + "loss": 1.7357, + "step": 2347 + }, + { + "epoch": 0.24767932489451477, + "grad_norm": 0.6023251414299011, + "learning_rate": 0.0015, + "loss": 1.7318, + "step": 2348 + }, + { + "epoch": 0.24778481012658227, + "grad_norm": 0.6840221881866455, + "learning_rate": 0.0015, + "loss": 1.7355, + "step": 2349 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.7530645132064819, + "learning_rate": 0.0015, + "loss": 1.7842, + "step": 2350 + }, + { + "epoch": 0.2479957805907173, + "grad_norm": 0.5974234342575073, + "learning_rate": 0.0015, + "loss": 1.7545, + "step": 2351 + }, + { + "epoch": 0.2481012658227848, + "grad_norm": 0.5100545287132263, + "learning_rate": 0.0015, + "loss": 1.7849, + "step": 2352 + }, + { + "epoch": 0.24820675105485232, + "grad_norm": 0.6527138948440552, + "learning_rate": 0.0015, + "loss": 1.7419, + "step": 2353 + }, + { + "epoch": 0.24831223628691984, + "grad_norm": 0.5429184436798096, + "learning_rate": 0.0015, + "loss": 1.773, + "step": 2354 + }, + { + "epoch": 0.24841772151898733, + "grad_norm": 0.7330167889595032, + "learning_rate": 0.0015, + "loss": 1.7893, + "step": 2355 + }, + { + "epoch": 0.24852320675105485, + "grad_norm": 0.6700617074966431, + "learning_rate": 0.0015, + "loss": 1.7491, + "step": 2356 + }, + { + "epoch": 0.24862869198312237, + "grad_norm": 0.521098256111145, + "learning_rate": 0.0015, + "loss": 1.7625, + "step": 2357 + }, + { + "epoch": 0.24873417721518987, + "grad_norm": 0.6005222797393799, + "learning_rate": 0.0015, + "loss": 1.7922, + "step": 2358 + }, + { + "epoch": 0.2488396624472574, + "grad_norm": 0.5163782835006714, + "learning_rate": 0.0015, + "loss": 1.7432, + "step": 2359 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.5147345662117004, + "learning_rate": 0.0015, + "loss": 1.7407, + "step": 2360 + }, + { + "epoch": 0.2490506329113924, + "grad_norm": 0.4879380762577057, + "learning_rate": 0.0015, + "loss": 1.7367, + "step": 2361 + }, + { + "epoch": 0.24915611814345992, + "grad_norm": 0.46628686785697937, + "learning_rate": 0.0015, + "loss": 1.7659, + "step": 2362 + }, + { + "epoch": 0.2492616033755274, + "grad_norm": 0.49309617280960083, + "learning_rate": 0.0015, + "loss": 1.7663, + "step": 2363 + }, + { + "epoch": 0.24936708860759493, + "grad_norm": 0.4331234097480774, + "learning_rate": 0.0015, + "loss": 1.747, + "step": 2364 + }, + { + "epoch": 0.24947257383966245, + "grad_norm": 0.49875301122665405, + "learning_rate": 0.0015, + "loss": 1.733, + "step": 2365 + }, + { + "epoch": 0.24957805907172995, + "grad_norm": 0.46396973729133606, + "learning_rate": 0.0015, + "loss": 1.7514, + "step": 2366 + }, + { + "epoch": 0.24968354430379747, + "grad_norm": 0.44407719373703003, + "learning_rate": 0.0015, + "loss": 1.7757, + "step": 2367 + }, + { + "epoch": 0.249789029535865, + "grad_norm": 0.48293107748031616, + "learning_rate": 0.0015, + "loss": 1.7652, + "step": 2368 + }, + { + "epoch": 0.24989451476793248, + "grad_norm": 0.5071579217910767, + "learning_rate": 0.0015, + "loss": 1.7642, + "step": 2369 + }, + { + "epoch": 0.25, + "grad_norm": 0.4867854714393616, + "learning_rate": 0.0015, + "loss": 1.7662, + "step": 2370 + }, + { + "epoch": 0.2501054852320675, + "grad_norm": 0.46139228343963623, + "learning_rate": 0.0015, + "loss": 1.756, + "step": 2371 + }, + { + "epoch": 0.25021097046413504, + "grad_norm": 0.5459284782409668, + "learning_rate": 0.0015, + "loss": 1.7577, + "step": 2372 + }, + { + "epoch": 0.25031645569620253, + "grad_norm": 0.642245888710022, + "learning_rate": 0.0015, + "loss": 1.7512, + "step": 2373 + }, + { + "epoch": 0.25042194092827, + "grad_norm": 0.6148761510848999, + "learning_rate": 0.0015, + "loss": 1.7565, + "step": 2374 + }, + { + "epoch": 0.2505274261603376, + "grad_norm": 0.49759599566459656, + "learning_rate": 0.0015, + "loss": 1.7444, + "step": 2375 + }, + { + "epoch": 0.25063291139240507, + "grad_norm": 0.668725848197937, + "learning_rate": 0.0015, + "loss": 1.8011, + "step": 2376 + }, + { + "epoch": 0.25073839662447256, + "grad_norm": 0.4881817698478699, + "learning_rate": 0.0015, + "loss": 1.719, + "step": 2377 + }, + { + "epoch": 0.2508438818565401, + "grad_norm": 0.5557053685188293, + "learning_rate": 0.0015, + "loss": 1.7453, + "step": 2378 + }, + { + "epoch": 0.2509493670886076, + "grad_norm": 0.5239472985267639, + "learning_rate": 0.0015, + "loss": 1.7905, + "step": 2379 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.49819645285606384, + "learning_rate": 0.0015, + "loss": 1.7447, + "step": 2380 + }, + { + "epoch": 0.25116033755274264, + "grad_norm": 0.6239401698112488, + "learning_rate": 0.0015, + "loss": 1.769, + "step": 2381 + }, + { + "epoch": 0.25126582278481013, + "grad_norm": 0.7374022603034973, + "learning_rate": 0.0015, + "loss": 1.7528, + "step": 2382 + }, + { + "epoch": 0.2513713080168776, + "grad_norm": 0.578967809677124, + "learning_rate": 0.0015, + "loss": 1.7624, + "step": 2383 + }, + { + "epoch": 0.2514767932489452, + "grad_norm": 0.5678188800811768, + "learning_rate": 0.0015, + "loss": 1.7281, + "step": 2384 + }, + { + "epoch": 0.25158227848101267, + "grad_norm": 0.7271468043327332, + "learning_rate": 0.0015, + "loss": 1.7775, + "step": 2385 + }, + { + "epoch": 0.25168776371308016, + "grad_norm": 0.5477023720741272, + "learning_rate": 0.0015, + "loss": 1.7672, + "step": 2386 + }, + { + "epoch": 0.25179324894514765, + "grad_norm": 0.5434300303459167, + "learning_rate": 0.0015, + "loss": 1.8252, + "step": 2387 + }, + { + "epoch": 0.2518987341772152, + "grad_norm": 0.6164018511772156, + "learning_rate": 0.0015, + "loss": 1.7287, + "step": 2388 + }, + { + "epoch": 0.2520042194092827, + "grad_norm": 0.45354118943214417, + "learning_rate": 0.0015, + "loss": 1.7271, + "step": 2389 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.554826557636261, + "learning_rate": 0.0015, + "loss": 1.7546, + "step": 2390 + }, + { + "epoch": 0.25221518987341773, + "grad_norm": 0.5292742252349854, + "learning_rate": 0.0015, + "loss": 1.7347, + "step": 2391 + }, + { + "epoch": 0.2523206751054852, + "grad_norm": 0.6176003813743591, + "learning_rate": 0.0015, + "loss": 1.7249, + "step": 2392 + }, + { + "epoch": 0.2524261603375527, + "grad_norm": 0.8826800584793091, + "learning_rate": 0.0015, + "loss": 1.738, + "step": 2393 + }, + { + "epoch": 0.25253164556962027, + "grad_norm": 0.7090798020362854, + "learning_rate": 0.0015, + "loss": 1.7608, + "step": 2394 + }, + { + "epoch": 0.25263713080168776, + "grad_norm": 0.5037247538566589, + "learning_rate": 0.0015, + "loss": 1.7066, + "step": 2395 + }, + { + "epoch": 0.25274261603375525, + "grad_norm": 0.7920147180557251, + "learning_rate": 0.0015, + "loss": 1.7855, + "step": 2396 + }, + { + "epoch": 0.2528481012658228, + "grad_norm": 0.6410861015319824, + "learning_rate": 0.0015, + "loss": 1.8146, + "step": 2397 + }, + { + "epoch": 0.2529535864978903, + "grad_norm": 0.5408570766448975, + "learning_rate": 0.0015, + "loss": 1.7941, + "step": 2398 + }, + { + "epoch": 0.2530590717299578, + "grad_norm": 0.8117817640304565, + "learning_rate": 0.0015, + "loss": 1.754, + "step": 2399 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6268106698989868, + "learning_rate": 0.0015, + "loss": 1.7755, + "step": 2400 + }, + { + "epoch": 0.2532700421940928, + "grad_norm": 0.553735077381134, + "learning_rate": 0.0015, + "loss": 1.7573, + "step": 2401 + }, + { + "epoch": 0.2533755274261603, + "grad_norm": 0.8653258085250854, + "learning_rate": 0.0015, + "loss": 1.7728, + "step": 2402 + }, + { + "epoch": 0.25348101265822787, + "grad_norm": 0.6993894577026367, + "learning_rate": 0.0015, + "loss": 1.784, + "step": 2403 + }, + { + "epoch": 0.25358649789029536, + "grad_norm": 0.5147871971130371, + "learning_rate": 0.0015, + "loss": 1.7225, + "step": 2404 + }, + { + "epoch": 0.25369198312236285, + "grad_norm": 0.6098518967628479, + "learning_rate": 0.0015, + "loss": 1.7917, + "step": 2405 + }, + { + "epoch": 0.2537974683544304, + "grad_norm": 0.5055338144302368, + "learning_rate": 0.0015, + "loss": 1.7499, + "step": 2406 + }, + { + "epoch": 0.2539029535864979, + "grad_norm": 0.5311747193336487, + "learning_rate": 0.0015, + "loss": 1.783, + "step": 2407 + }, + { + "epoch": 0.2540084388185654, + "grad_norm": 0.6478826403617859, + "learning_rate": 0.0015, + "loss": 1.7385, + "step": 2408 + }, + { + "epoch": 0.25411392405063293, + "grad_norm": 0.5538125038146973, + "learning_rate": 0.0015, + "loss": 1.7245, + "step": 2409 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.4873404800891876, + "learning_rate": 0.0015, + "loss": 1.7841, + "step": 2410 + }, + { + "epoch": 0.2543248945147679, + "grad_norm": 0.4269186854362488, + "learning_rate": 0.0015, + "loss": 1.7877, + "step": 2411 + }, + { + "epoch": 0.25443037974683547, + "grad_norm": 0.49043551087379456, + "learning_rate": 0.0015, + "loss": 1.7437, + "step": 2412 + }, + { + "epoch": 0.25453586497890296, + "grad_norm": 0.4809432327747345, + "learning_rate": 0.0015, + "loss": 1.7624, + "step": 2413 + }, + { + "epoch": 0.25464135021097045, + "grad_norm": 0.4802286922931671, + "learning_rate": 0.0015, + "loss": 1.7227, + "step": 2414 + }, + { + "epoch": 0.254746835443038, + "grad_norm": 0.5694488883018494, + "learning_rate": 0.0015, + "loss": 1.7404, + "step": 2415 + }, + { + "epoch": 0.2548523206751055, + "grad_norm": 0.7195572853088379, + "learning_rate": 0.0015, + "loss": 1.7608, + "step": 2416 + }, + { + "epoch": 0.254957805907173, + "grad_norm": 0.5387224555015564, + "learning_rate": 0.0015, + "loss": 1.7206, + "step": 2417 + }, + { + "epoch": 0.25506329113924053, + "grad_norm": 0.5167665481567383, + "learning_rate": 0.0015, + "loss": 1.7602, + "step": 2418 + }, + { + "epoch": 0.255168776371308, + "grad_norm": 0.6237902641296387, + "learning_rate": 0.0015, + "loss": 1.7328, + "step": 2419 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.5012637972831726, + "learning_rate": 0.0015, + "loss": 1.7853, + "step": 2420 + }, + { + "epoch": 0.255379746835443, + "grad_norm": 0.6789900660514832, + "learning_rate": 0.0015, + "loss": 1.7353, + "step": 2421 + }, + { + "epoch": 0.25548523206751056, + "grad_norm": 0.5136539936065674, + "learning_rate": 0.0015, + "loss": 1.828, + "step": 2422 + }, + { + "epoch": 0.25559071729957805, + "grad_norm": 0.607568085193634, + "learning_rate": 0.0015, + "loss": 1.7954, + "step": 2423 + }, + { + "epoch": 0.25569620253164554, + "grad_norm": 0.5693743824958801, + "learning_rate": 0.0015, + "loss": 1.7383, + "step": 2424 + }, + { + "epoch": 0.2558016877637131, + "grad_norm": 0.5048482418060303, + "learning_rate": 0.0015, + "loss": 1.7209, + "step": 2425 + }, + { + "epoch": 0.2559071729957806, + "grad_norm": 0.6488950252532959, + "learning_rate": 0.0015, + "loss": 1.7569, + "step": 2426 + }, + { + "epoch": 0.2560126582278481, + "grad_norm": 0.5138487815856934, + "learning_rate": 0.0015, + "loss": 1.7779, + "step": 2427 + }, + { + "epoch": 0.2561181434599156, + "grad_norm": 0.5535092949867249, + "learning_rate": 0.0015, + "loss": 1.7229, + "step": 2428 + }, + { + "epoch": 0.2562236286919831, + "grad_norm": 0.5026729702949524, + "learning_rate": 0.0015, + "loss": 1.7278, + "step": 2429 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.5470440983772278, + "learning_rate": 0.0015, + "loss": 1.7285, + "step": 2430 + }, + { + "epoch": 0.25643459915611816, + "grad_norm": 0.5697625279426575, + "learning_rate": 0.0015, + "loss": 1.7707, + "step": 2431 + }, + { + "epoch": 0.25654008438818565, + "grad_norm": 0.45551273226737976, + "learning_rate": 0.0015, + "loss": 1.784, + "step": 2432 + }, + { + "epoch": 0.25664556962025314, + "grad_norm": 0.5044599771499634, + "learning_rate": 0.0015, + "loss": 1.7985, + "step": 2433 + }, + { + "epoch": 0.2567510548523207, + "grad_norm": 0.507034420967102, + "learning_rate": 0.0015, + "loss": 1.7076, + "step": 2434 + }, + { + "epoch": 0.2568565400843882, + "grad_norm": 0.6209434270858765, + "learning_rate": 0.0015, + "loss": 1.7289, + "step": 2435 + }, + { + "epoch": 0.2569620253164557, + "grad_norm": 0.6941367387771606, + "learning_rate": 0.0015, + "loss": 1.7307, + "step": 2436 + }, + { + "epoch": 0.2570675105485232, + "grad_norm": 0.5281949043273926, + "learning_rate": 0.0015, + "loss": 1.7658, + "step": 2437 + }, + { + "epoch": 0.2571729957805907, + "grad_norm": 0.5222294330596924, + "learning_rate": 0.0015, + "loss": 1.7592, + "step": 2438 + }, + { + "epoch": 0.2572784810126582, + "grad_norm": 0.5669064521789551, + "learning_rate": 0.0015, + "loss": 1.7521, + "step": 2439 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.4953993558883667, + "learning_rate": 0.0015, + "loss": 1.7841, + "step": 2440 + }, + { + "epoch": 0.25748945147679325, + "grad_norm": 0.5113043785095215, + "learning_rate": 0.0015, + "loss": 1.7606, + "step": 2441 + }, + { + "epoch": 0.25759493670886074, + "grad_norm": 0.4993591606616974, + "learning_rate": 0.0015, + "loss": 1.7429, + "step": 2442 + }, + { + "epoch": 0.2577004219409283, + "grad_norm": 0.5303695201873779, + "learning_rate": 0.0015, + "loss": 1.7589, + "step": 2443 + }, + { + "epoch": 0.2578059071729958, + "grad_norm": 0.5011569261550903, + "learning_rate": 0.0015, + "loss": 1.7346, + "step": 2444 + }, + { + "epoch": 0.2579113924050633, + "grad_norm": 0.49535638093948364, + "learning_rate": 0.0015, + "loss": 1.7893, + "step": 2445 + }, + { + "epoch": 0.2580168776371308, + "grad_norm": 0.5035144686698914, + "learning_rate": 0.0015, + "loss": 1.7236, + "step": 2446 + }, + { + "epoch": 0.2581223628691983, + "grad_norm": 0.47144025564193726, + "learning_rate": 0.0015, + "loss": 1.7437, + "step": 2447 + }, + { + "epoch": 0.2582278481012658, + "grad_norm": 0.49981555342674255, + "learning_rate": 0.0015, + "loss": 1.7408, + "step": 2448 + }, + { + "epoch": 0.25833333333333336, + "grad_norm": 0.5020222067832947, + "learning_rate": 0.0015, + "loss": 1.732, + "step": 2449 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.5577045679092407, + "learning_rate": 0.0015, + "loss": 1.7257, + "step": 2450 + }, + { + "epoch": 0.25854430379746834, + "grad_norm": 0.4685960114002228, + "learning_rate": 0.0015, + "loss": 1.7441, + "step": 2451 + }, + { + "epoch": 0.2586497890295359, + "grad_norm": 0.644061267375946, + "learning_rate": 0.0015, + "loss": 1.7921, + "step": 2452 + }, + { + "epoch": 0.2587552742616034, + "grad_norm": 0.5561867952346802, + "learning_rate": 0.0015, + "loss": 1.7458, + "step": 2453 + }, + { + "epoch": 0.2588607594936709, + "grad_norm": 0.5068477988243103, + "learning_rate": 0.0015, + "loss": 1.7944, + "step": 2454 + }, + { + "epoch": 0.25896624472573837, + "grad_norm": 0.531140148639679, + "learning_rate": 0.0015, + "loss": 1.7259, + "step": 2455 + }, + { + "epoch": 0.2590717299578059, + "grad_norm": 0.5053848624229431, + "learning_rate": 0.0015, + "loss": 1.7578, + "step": 2456 + }, + { + "epoch": 0.2591772151898734, + "grad_norm": 0.5224964618682861, + "learning_rate": 0.0015, + "loss": 1.7489, + "step": 2457 + }, + { + "epoch": 0.2592827004219409, + "grad_norm": 0.5178977251052856, + "learning_rate": 0.0015, + "loss": 1.7682, + "step": 2458 + }, + { + "epoch": 0.25938818565400845, + "grad_norm": 0.5664845705032349, + "learning_rate": 0.0015, + "loss": 1.7788, + "step": 2459 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.4939206838607788, + "learning_rate": 0.0015, + "loss": 1.7568, + "step": 2460 + }, + { + "epoch": 0.25959915611814344, + "grad_norm": 0.46851345896720886, + "learning_rate": 0.0015, + "loss": 1.7087, + "step": 2461 + }, + { + "epoch": 0.259704641350211, + "grad_norm": 0.4888216257095337, + "learning_rate": 0.0015, + "loss": 1.7576, + "step": 2462 + }, + { + "epoch": 0.2598101265822785, + "grad_norm": 0.5279091000556946, + "learning_rate": 0.0015, + "loss": 1.7623, + "step": 2463 + }, + { + "epoch": 0.25991561181434597, + "grad_norm": 0.4827437996864319, + "learning_rate": 0.0015, + "loss": 1.7188, + "step": 2464 + }, + { + "epoch": 0.2600210970464135, + "grad_norm": 0.4542250633239746, + "learning_rate": 0.0015, + "loss": 1.7798, + "step": 2465 + }, + { + "epoch": 0.260126582278481, + "grad_norm": 0.5125083923339844, + "learning_rate": 0.0015, + "loss": 1.732, + "step": 2466 + }, + { + "epoch": 0.2602320675105485, + "grad_norm": 0.5349894762039185, + "learning_rate": 0.0015, + "loss": 1.7494, + "step": 2467 + }, + { + "epoch": 0.26033755274261605, + "grad_norm": 0.5146206021308899, + "learning_rate": 0.0015, + "loss": 1.7601, + "step": 2468 + }, + { + "epoch": 0.26044303797468354, + "grad_norm": 0.5680448412895203, + "learning_rate": 0.0015, + "loss": 1.769, + "step": 2469 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.7838568091392517, + "learning_rate": 0.0015, + "loss": 1.7703, + "step": 2470 + }, + { + "epoch": 0.2606540084388186, + "grad_norm": 0.5395078063011169, + "learning_rate": 0.0015, + "loss": 1.793, + "step": 2471 + }, + { + "epoch": 0.2607594936708861, + "grad_norm": 0.5863775014877319, + "learning_rate": 0.0015, + "loss": 1.7372, + "step": 2472 + }, + { + "epoch": 0.26086497890295357, + "grad_norm": 0.6507776379585266, + "learning_rate": 0.0015, + "loss": 1.7503, + "step": 2473 + }, + { + "epoch": 0.2609704641350211, + "grad_norm": 0.6046692132949829, + "learning_rate": 0.0015, + "loss": 1.7513, + "step": 2474 + }, + { + "epoch": 0.2610759493670886, + "grad_norm": 0.49888819456100464, + "learning_rate": 0.0015, + "loss": 1.7628, + "step": 2475 + }, + { + "epoch": 0.2611814345991561, + "grad_norm": 0.5066530704498291, + "learning_rate": 0.0015, + "loss": 1.7337, + "step": 2476 + }, + { + "epoch": 0.26128691983122365, + "grad_norm": 0.5808389782905579, + "learning_rate": 0.0015, + "loss": 1.7711, + "step": 2477 + }, + { + "epoch": 0.26139240506329114, + "grad_norm": 0.5939879417419434, + "learning_rate": 0.0015, + "loss": 1.7491, + "step": 2478 + }, + { + "epoch": 0.26149789029535864, + "grad_norm": 0.5914461612701416, + "learning_rate": 0.0015, + "loss": 1.7535, + "step": 2479 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.5965650677680969, + "learning_rate": 0.0015, + "loss": 1.7548, + "step": 2480 + }, + { + "epoch": 0.2617088607594937, + "grad_norm": 0.5369855165481567, + "learning_rate": 0.0015, + "loss": 1.7341, + "step": 2481 + }, + { + "epoch": 0.26181434599156117, + "grad_norm": 0.5695091485977173, + "learning_rate": 0.0015, + "loss": 1.7267, + "step": 2482 + }, + { + "epoch": 0.2619198312236287, + "grad_norm": 0.6204779744148254, + "learning_rate": 0.0015, + "loss": 1.7511, + "step": 2483 + }, + { + "epoch": 0.2620253164556962, + "grad_norm": 0.5356783866882324, + "learning_rate": 0.0015, + "loss": 1.715, + "step": 2484 + }, + { + "epoch": 0.2621308016877637, + "grad_norm": 0.57508784532547, + "learning_rate": 0.0015, + "loss": 1.7589, + "step": 2485 + }, + { + "epoch": 0.2622362869198312, + "grad_norm": 0.5130199193954468, + "learning_rate": 0.0015, + "loss": 1.7382, + "step": 2486 + }, + { + "epoch": 0.26234177215189874, + "grad_norm": 0.4947783946990967, + "learning_rate": 0.0015, + "loss": 1.742, + "step": 2487 + }, + { + "epoch": 0.26244725738396624, + "grad_norm": 0.5992721319198608, + "learning_rate": 0.0015, + "loss": 1.7786, + "step": 2488 + }, + { + "epoch": 0.26255274261603373, + "grad_norm": 0.5535210967063904, + "learning_rate": 0.0015, + "loss": 1.7184, + "step": 2489 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.5110808610916138, + "learning_rate": 0.0015, + "loss": 1.7651, + "step": 2490 + }, + { + "epoch": 0.26276371308016877, + "grad_norm": 0.5434951782226562, + "learning_rate": 0.0015, + "loss": 1.7297, + "step": 2491 + }, + { + "epoch": 0.26286919831223626, + "grad_norm": 0.5436090230941772, + "learning_rate": 0.0015, + "loss": 1.7435, + "step": 2492 + }, + { + "epoch": 0.2629746835443038, + "grad_norm": 0.5366165041923523, + "learning_rate": 0.0015, + "loss": 1.7876, + "step": 2493 + }, + { + "epoch": 0.2630801687763713, + "grad_norm": 0.5321416854858398, + "learning_rate": 0.0015, + "loss": 1.738, + "step": 2494 + }, + { + "epoch": 0.2631856540084388, + "grad_norm": 0.4772559404373169, + "learning_rate": 0.0015, + "loss": 1.7456, + "step": 2495 + }, + { + "epoch": 0.26329113924050634, + "grad_norm": 0.5794147253036499, + "learning_rate": 0.0015, + "loss": 1.7415, + "step": 2496 + }, + { + "epoch": 0.26339662447257384, + "grad_norm": 0.5084999203681946, + "learning_rate": 0.0015, + "loss": 1.7911, + "step": 2497 + }, + { + "epoch": 0.26350210970464133, + "grad_norm": 0.5278586149215698, + "learning_rate": 0.0015, + "loss": 1.7408, + "step": 2498 + }, + { + "epoch": 0.2636075949367089, + "grad_norm": 0.49334725737571716, + "learning_rate": 0.0015, + "loss": 1.7483, + "step": 2499 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.5233896970748901, + "learning_rate": 0.0015, + "loss": 1.7296, + "step": 2500 + }, + { + "epoch": 0.26381856540084386, + "grad_norm": 0.5626576542854309, + "learning_rate": 0.0015, + "loss": 1.7361, + "step": 2501 + }, + { + "epoch": 0.2639240506329114, + "grad_norm": 0.48603805899620056, + "learning_rate": 0.0015, + "loss": 1.7742, + "step": 2502 + }, + { + "epoch": 0.2640295358649789, + "grad_norm": 0.5147922039031982, + "learning_rate": 0.0015, + "loss": 1.7524, + "step": 2503 + }, + { + "epoch": 0.2641350210970464, + "grad_norm": 0.4762532114982605, + "learning_rate": 0.0015, + "loss": 1.7223, + "step": 2504 + }, + { + "epoch": 0.26424050632911394, + "grad_norm": 0.5490362048149109, + "learning_rate": 0.0015, + "loss": 1.7778, + "step": 2505 + }, + { + "epoch": 0.26434599156118144, + "grad_norm": 0.534259557723999, + "learning_rate": 0.0015, + "loss": 1.7273, + "step": 2506 + }, + { + "epoch": 0.26445147679324893, + "grad_norm": 0.5113989114761353, + "learning_rate": 0.0015, + "loss": 1.7562, + "step": 2507 + }, + { + "epoch": 0.2645569620253165, + "grad_norm": 0.492646723985672, + "learning_rate": 0.0015, + "loss": 1.7309, + "step": 2508 + }, + { + "epoch": 0.26466244725738397, + "grad_norm": 0.507503092288971, + "learning_rate": 0.0015, + "loss": 1.7302, + "step": 2509 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.5104250311851501, + "learning_rate": 0.0015, + "loss": 1.7672, + "step": 2510 + }, + { + "epoch": 0.264873417721519, + "grad_norm": 0.5170099139213562, + "learning_rate": 0.0015, + "loss": 1.7847, + "step": 2511 + }, + { + "epoch": 0.2649789029535865, + "grad_norm": 0.4758244454860687, + "learning_rate": 0.0015, + "loss": 1.7553, + "step": 2512 + }, + { + "epoch": 0.265084388185654, + "grad_norm": 0.503151535987854, + "learning_rate": 0.0015, + "loss": 1.7228, + "step": 2513 + }, + { + "epoch": 0.26518987341772154, + "grad_norm": 0.47079765796661377, + "learning_rate": 0.0015, + "loss": 1.7403, + "step": 2514 + }, + { + "epoch": 0.26529535864978904, + "grad_norm": 0.4670954644680023, + "learning_rate": 0.0015, + "loss": 1.7363, + "step": 2515 + }, + { + "epoch": 0.26540084388185653, + "grad_norm": 0.49063602089881897, + "learning_rate": 0.0015, + "loss": 1.7531, + "step": 2516 + }, + { + "epoch": 0.2655063291139241, + "grad_norm": 0.5368266105651855, + "learning_rate": 0.0015, + "loss": 1.7577, + "step": 2517 + }, + { + "epoch": 0.26561181434599157, + "grad_norm": 0.5149813890457153, + "learning_rate": 0.0015, + "loss": 1.763, + "step": 2518 + }, + { + "epoch": 0.26571729957805906, + "grad_norm": 0.49429503083229065, + "learning_rate": 0.0015, + "loss": 1.7477, + "step": 2519 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.5160220861434937, + "learning_rate": 0.0015, + "loss": 1.7603, + "step": 2520 + }, + { + "epoch": 0.2659282700421941, + "grad_norm": 0.6020708680152893, + "learning_rate": 0.0015, + "loss": 1.7451, + "step": 2521 + }, + { + "epoch": 0.2660337552742616, + "grad_norm": 0.6109582781791687, + "learning_rate": 0.0015, + "loss": 1.7396, + "step": 2522 + }, + { + "epoch": 0.2661392405063291, + "grad_norm": 0.5616773366928101, + "learning_rate": 0.0015, + "loss": 1.7657, + "step": 2523 + }, + { + "epoch": 0.26624472573839664, + "grad_norm": 0.4889385998249054, + "learning_rate": 0.0015, + "loss": 1.6985, + "step": 2524 + }, + { + "epoch": 0.26635021097046413, + "grad_norm": 0.5477367639541626, + "learning_rate": 0.0015, + "loss": 1.7117, + "step": 2525 + }, + { + "epoch": 0.2664556962025316, + "grad_norm": 0.5434495806694031, + "learning_rate": 0.0015, + "loss": 1.7252, + "step": 2526 + }, + { + "epoch": 0.26656118143459917, + "grad_norm": 0.5032594203948975, + "learning_rate": 0.0015, + "loss": 1.7423, + "step": 2527 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.6376218795776367, + "learning_rate": 0.0015, + "loss": 1.745, + "step": 2528 + }, + { + "epoch": 0.26677215189873416, + "grad_norm": 0.740061342716217, + "learning_rate": 0.0015, + "loss": 1.8298, + "step": 2529 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.7886686325073242, + "learning_rate": 0.0015, + "loss": 1.7371, + "step": 2530 + }, + { + "epoch": 0.2669831223628692, + "grad_norm": 0.520111620426178, + "learning_rate": 0.0015, + "loss": 1.7384, + "step": 2531 + }, + { + "epoch": 0.2670886075949367, + "grad_norm": 0.6698089241981506, + "learning_rate": 0.0015, + "loss": 1.7459, + "step": 2532 + }, + { + "epoch": 0.26719409282700424, + "grad_norm": 0.8717212080955505, + "learning_rate": 0.0015, + "loss": 1.6923, + "step": 2533 + }, + { + "epoch": 0.26729957805907173, + "grad_norm": 0.5864850282669067, + "learning_rate": 0.0015, + "loss": 1.7308, + "step": 2534 + }, + { + "epoch": 0.2674050632911392, + "grad_norm": 0.4986039996147156, + "learning_rate": 0.0015, + "loss": 1.7879, + "step": 2535 + }, + { + "epoch": 0.26751054852320677, + "grad_norm": 0.6276726126670837, + "learning_rate": 0.0015, + "loss": 1.7516, + "step": 2536 + }, + { + "epoch": 0.26761603375527426, + "grad_norm": 0.5389881730079651, + "learning_rate": 0.0015, + "loss": 1.7396, + "step": 2537 + }, + { + "epoch": 0.26772151898734176, + "grad_norm": 0.5643240213394165, + "learning_rate": 0.0015, + "loss": 1.7832, + "step": 2538 + }, + { + "epoch": 0.2678270042194093, + "grad_norm": 0.6625216007232666, + "learning_rate": 0.0015, + "loss": 1.7545, + "step": 2539 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.5147009491920471, + "learning_rate": 0.0015, + "loss": 1.7298, + "step": 2540 + }, + { + "epoch": 0.2680379746835443, + "grad_norm": 0.6473565101623535, + "learning_rate": 0.0015, + "loss": 1.7371, + "step": 2541 + }, + { + "epoch": 0.26814345991561184, + "grad_norm": 0.6117268204689026, + "learning_rate": 0.0015, + "loss": 1.7687, + "step": 2542 + }, + { + "epoch": 0.26824894514767933, + "grad_norm": 0.4728650152683258, + "learning_rate": 0.0015, + "loss": 1.7349, + "step": 2543 + }, + { + "epoch": 0.2683544303797468, + "grad_norm": 0.5104753375053406, + "learning_rate": 0.0015, + "loss": 1.7113, + "step": 2544 + }, + { + "epoch": 0.26845991561181437, + "grad_norm": 0.6263878345489502, + "learning_rate": 0.0015, + "loss": 1.7494, + "step": 2545 + }, + { + "epoch": 0.26856540084388186, + "grad_norm": 0.5564925074577332, + "learning_rate": 0.0015, + "loss": 1.7466, + "step": 2546 + }, + { + "epoch": 0.26867088607594936, + "grad_norm": 0.48637768626213074, + "learning_rate": 0.0015, + "loss": 1.7278, + "step": 2547 + }, + { + "epoch": 0.2687763713080169, + "grad_norm": 0.5296652913093567, + "learning_rate": 0.0015, + "loss": 1.7439, + "step": 2548 + }, + { + "epoch": 0.2688818565400844, + "grad_norm": 0.5677223801612854, + "learning_rate": 0.0015, + "loss": 1.7483, + "step": 2549 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.5286999344825745, + "learning_rate": 0.0015, + "loss": 1.7355, + "step": 2550 + }, + { + "epoch": 0.26909282700421944, + "grad_norm": 0.4440421164035797, + "learning_rate": 0.0015, + "loss": 1.7615, + "step": 2551 + }, + { + "epoch": 0.26919831223628693, + "grad_norm": 0.533702552318573, + "learning_rate": 0.0015, + "loss": 1.7746, + "step": 2552 + }, + { + "epoch": 0.2693037974683544, + "grad_norm": 0.46381649374961853, + "learning_rate": 0.0015, + "loss": 1.7797, + "step": 2553 + }, + { + "epoch": 0.2694092827004219, + "grad_norm": 0.5055655241012573, + "learning_rate": 0.0015, + "loss": 1.7355, + "step": 2554 + }, + { + "epoch": 0.26951476793248946, + "grad_norm": 0.5017513036727905, + "learning_rate": 0.0015, + "loss": 1.748, + "step": 2555 + }, + { + "epoch": 0.26962025316455696, + "grad_norm": 0.51686692237854, + "learning_rate": 0.0015, + "loss": 1.7609, + "step": 2556 + }, + { + "epoch": 0.26972573839662445, + "grad_norm": 0.4669591188430786, + "learning_rate": 0.0015, + "loss": 1.7476, + "step": 2557 + }, + { + "epoch": 0.269831223628692, + "grad_norm": 0.4892376661300659, + "learning_rate": 0.0015, + "loss": 1.7544, + "step": 2558 + }, + { + "epoch": 0.2699367088607595, + "grad_norm": 0.5178463459014893, + "learning_rate": 0.0015, + "loss": 1.7458, + "step": 2559 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.5536075234413147, + "learning_rate": 0.0015, + "loss": 1.7082, + "step": 2560 + }, + { + "epoch": 0.27014767932489453, + "grad_norm": 0.6125830411911011, + "learning_rate": 0.0015, + "loss": 1.7318, + "step": 2561 + }, + { + "epoch": 0.270253164556962, + "grad_norm": 0.5590088963508606, + "learning_rate": 0.0015, + "loss": 1.7592, + "step": 2562 + }, + { + "epoch": 0.2703586497890295, + "grad_norm": 0.4859699606895447, + "learning_rate": 0.0015, + "loss": 1.7826, + "step": 2563 + }, + { + "epoch": 0.27046413502109706, + "grad_norm": 0.4657300114631653, + "learning_rate": 0.0015, + "loss": 1.7336, + "step": 2564 + }, + { + "epoch": 0.27056962025316456, + "grad_norm": 0.4587562382221222, + "learning_rate": 0.0015, + "loss": 1.704, + "step": 2565 + }, + { + "epoch": 0.27067510548523205, + "grad_norm": 0.5151911973953247, + "learning_rate": 0.0015, + "loss": 1.7224, + "step": 2566 + }, + { + "epoch": 0.2707805907172996, + "grad_norm": 0.6008763313293457, + "learning_rate": 0.0015, + "loss": 1.764, + "step": 2567 + }, + { + "epoch": 0.2708860759493671, + "grad_norm": 0.6141114234924316, + "learning_rate": 0.0015, + "loss": 1.7535, + "step": 2568 + }, + { + "epoch": 0.2709915611814346, + "grad_norm": 0.5276148915290833, + "learning_rate": 0.0015, + "loss": 1.7049, + "step": 2569 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.5054593682289124, + "learning_rate": 0.0015, + "loss": 1.7749, + "step": 2570 + }, + { + "epoch": 0.2712025316455696, + "grad_norm": 0.5235294103622437, + "learning_rate": 0.0015, + "loss": 1.7161, + "step": 2571 + }, + { + "epoch": 0.2713080168776371, + "grad_norm": 0.561466634273529, + "learning_rate": 0.0015, + "loss": 1.7458, + "step": 2572 + }, + { + "epoch": 0.27141350210970466, + "grad_norm": 0.6114590167999268, + "learning_rate": 0.0015, + "loss": 1.7455, + "step": 2573 + }, + { + "epoch": 0.27151898734177216, + "grad_norm": 0.5305688977241516, + "learning_rate": 0.0015, + "loss": 1.7409, + "step": 2574 + }, + { + "epoch": 0.27162447257383965, + "grad_norm": 0.56999671459198, + "learning_rate": 0.0015, + "loss": 1.7445, + "step": 2575 + }, + { + "epoch": 0.2717299578059072, + "grad_norm": 0.5928850173950195, + "learning_rate": 0.0015, + "loss": 1.7156, + "step": 2576 + }, + { + "epoch": 0.2718354430379747, + "grad_norm": 0.5534878373146057, + "learning_rate": 0.0015, + "loss": 1.757, + "step": 2577 + }, + { + "epoch": 0.2719409282700422, + "grad_norm": 0.4853445589542389, + "learning_rate": 0.0015, + "loss": 1.7935, + "step": 2578 + }, + { + "epoch": 0.27204641350210973, + "grad_norm": 0.5856049060821533, + "learning_rate": 0.0015, + "loss": 1.7429, + "step": 2579 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.5238052606582642, + "learning_rate": 0.0015, + "loss": 1.7442, + "step": 2580 + }, + { + "epoch": 0.2722573839662447, + "grad_norm": 0.6355899572372437, + "learning_rate": 0.0015, + "loss": 1.7258, + "step": 2581 + }, + { + "epoch": 0.27236286919831226, + "grad_norm": 0.880685567855835, + "learning_rate": 0.0015, + "loss": 1.7596, + "step": 2582 + }, + { + "epoch": 0.27246835443037976, + "grad_norm": 0.7288837432861328, + "learning_rate": 0.0015, + "loss": 1.7502, + "step": 2583 + }, + { + "epoch": 0.27257383966244725, + "grad_norm": 0.49775776267051697, + "learning_rate": 0.0015, + "loss": 1.7494, + "step": 2584 + }, + { + "epoch": 0.27267932489451474, + "grad_norm": 0.899782657623291, + "learning_rate": 0.0015, + "loss": 1.774, + "step": 2585 + }, + { + "epoch": 0.2727848101265823, + "grad_norm": 1.0974575281143188, + "learning_rate": 0.0015, + "loss": 1.736, + "step": 2586 + }, + { + "epoch": 0.2728902953586498, + "grad_norm": 0.7350040078163147, + "learning_rate": 0.0015, + "loss": 1.7729, + "step": 2587 + }, + { + "epoch": 0.2729957805907173, + "grad_norm": 0.5562779903411865, + "learning_rate": 0.0015, + "loss": 1.7598, + "step": 2588 + }, + { + "epoch": 0.2731012658227848, + "grad_norm": 0.5916325449943542, + "learning_rate": 0.0015, + "loss": 1.7439, + "step": 2589 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.5184524059295654, + "learning_rate": 0.0015, + "loss": 1.7488, + "step": 2590 + }, + { + "epoch": 0.2733122362869198, + "grad_norm": 0.5797756314277649, + "learning_rate": 0.0015, + "loss": 1.7461, + "step": 2591 + }, + { + "epoch": 0.27341772151898736, + "grad_norm": 0.6259637475013733, + "learning_rate": 0.0015, + "loss": 1.768, + "step": 2592 + }, + { + "epoch": 0.27352320675105485, + "grad_norm": 0.5711591243743896, + "learning_rate": 0.0015, + "loss": 1.7512, + "step": 2593 + }, + { + "epoch": 0.27362869198312234, + "grad_norm": 0.5064637660980225, + "learning_rate": 0.0015, + "loss": 1.7501, + "step": 2594 + }, + { + "epoch": 0.2737341772151899, + "grad_norm": 0.5987383723258972, + "learning_rate": 0.0015, + "loss": 1.7777, + "step": 2595 + }, + { + "epoch": 0.2738396624472574, + "grad_norm": 0.6491714119911194, + "learning_rate": 0.0015, + "loss": 1.7477, + "step": 2596 + }, + { + "epoch": 0.2739451476793249, + "grad_norm": 0.5929031372070312, + "learning_rate": 0.0015, + "loss": 1.7369, + "step": 2597 + }, + { + "epoch": 0.2740506329113924, + "grad_norm": 0.4909879267215729, + "learning_rate": 0.0015, + "loss": 1.7172, + "step": 2598 + }, + { + "epoch": 0.2741561181434599, + "grad_norm": 0.6378136873245239, + "learning_rate": 0.0015, + "loss": 1.729, + "step": 2599 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.598289430141449, + "learning_rate": 0.0015, + "loss": 1.776, + "step": 2600 + }, + { + "epoch": 0.27436708860759496, + "grad_norm": 0.46960604190826416, + "learning_rate": 0.0015, + "loss": 1.7285, + "step": 2601 + }, + { + "epoch": 0.27447257383966245, + "grad_norm": 0.7559182643890381, + "learning_rate": 0.0015, + "loss": 1.7455, + "step": 2602 + }, + { + "epoch": 0.27457805907172994, + "grad_norm": 0.8152137398719788, + "learning_rate": 0.0015, + "loss": 1.6857, + "step": 2603 + }, + { + "epoch": 0.2746835443037975, + "grad_norm": 0.5375908613204956, + "learning_rate": 0.0015, + "loss": 1.7668, + "step": 2604 + }, + { + "epoch": 0.274789029535865, + "grad_norm": 0.4875588119029999, + "learning_rate": 0.0015, + "loss": 1.7754, + "step": 2605 + }, + { + "epoch": 0.2748945147679325, + "grad_norm": 0.4983108341693878, + "learning_rate": 0.0015, + "loss": 1.7716, + "step": 2606 + }, + { + "epoch": 0.275, + "grad_norm": 0.47570163011550903, + "learning_rate": 0.0015, + "loss": 1.7631, + "step": 2607 + }, + { + "epoch": 0.2751054852320675, + "grad_norm": 0.5114532709121704, + "learning_rate": 0.0015, + "loss": 1.7378, + "step": 2608 + }, + { + "epoch": 0.275210970464135, + "grad_norm": 0.5763033628463745, + "learning_rate": 0.0015, + "loss": 1.7628, + "step": 2609 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.4745025336742401, + "learning_rate": 0.0015, + "loss": 1.7486, + "step": 2610 + }, + { + "epoch": 0.27542194092827005, + "grad_norm": 0.6279363632202148, + "learning_rate": 0.0015, + "loss": 1.765, + "step": 2611 + }, + { + "epoch": 0.27552742616033754, + "grad_norm": 0.7709202766418457, + "learning_rate": 0.0015, + "loss": 1.7587, + "step": 2612 + }, + { + "epoch": 0.2756329113924051, + "grad_norm": 0.589849591255188, + "learning_rate": 0.0015, + "loss": 1.787, + "step": 2613 + }, + { + "epoch": 0.2757383966244726, + "grad_norm": 0.6236584186553955, + "learning_rate": 0.0015, + "loss": 1.7393, + "step": 2614 + }, + { + "epoch": 0.2758438818565401, + "grad_norm": 0.5918351411819458, + "learning_rate": 0.0015, + "loss": 1.7454, + "step": 2615 + }, + { + "epoch": 0.2759493670886076, + "grad_norm": 0.5218271613121033, + "learning_rate": 0.0015, + "loss": 1.7274, + "step": 2616 + }, + { + "epoch": 0.2760548523206751, + "grad_norm": 0.5378252267837524, + "learning_rate": 0.0015, + "loss": 1.7254, + "step": 2617 + }, + { + "epoch": 0.2761603375527426, + "grad_norm": 0.4909058213233948, + "learning_rate": 0.0015, + "loss": 1.7187, + "step": 2618 + }, + { + "epoch": 0.2762658227848101, + "grad_norm": 0.5239964127540588, + "learning_rate": 0.0015, + "loss": 1.7432, + "step": 2619 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.5242708325386047, + "learning_rate": 0.0015, + "loss": 1.7262, + "step": 2620 + }, + { + "epoch": 0.27647679324894514, + "grad_norm": 0.4703781008720398, + "learning_rate": 0.0015, + "loss": 1.7169, + "step": 2621 + }, + { + "epoch": 0.27658227848101263, + "grad_norm": 0.6690729856491089, + "learning_rate": 0.0015, + "loss": 1.7061, + "step": 2622 + }, + { + "epoch": 0.2766877637130802, + "grad_norm": 0.6361964344978333, + "learning_rate": 0.0015, + "loss": 1.7189, + "step": 2623 + }, + { + "epoch": 0.2767932489451477, + "grad_norm": 0.520235002040863, + "learning_rate": 0.0015, + "loss": 1.7574, + "step": 2624 + }, + { + "epoch": 0.27689873417721517, + "grad_norm": 0.6627557873725891, + "learning_rate": 0.0015, + "loss": 1.7287, + "step": 2625 + }, + { + "epoch": 0.2770042194092827, + "grad_norm": 0.6653998494148254, + "learning_rate": 0.0015, + "loss": 1.7212, + "step": 2626 + }, + { + "epoch": 0.2771097046413502, + "grad_norm": 0.49855709075927734, + "learning_rate": 0.0015, + "loss": 1.7185, + "step": 2627 + }, + { + "epoch": 0.2772151898734177, + "grad_norm": 0.5100919008255005, + "learning_rate": 0.0015, + "loss": 1.7525, + "step": 2628 + }, + { + "epoch": 0.27732067510548525, + "grad_norm": 0.5465761423110962, + "learning_rate": 0.0015, + "loss": 1.7255, + "step": 2629 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.4805075228214264, + "learning_rate": 0.0015, + "loss": 1.7762, + "step": 2630 + }, + { + "epoch": 0.27753164556962023, + "grad_norm": 0.5802726149559021, + "learning_rate": 0.0015, + "loss": 1.7474, + "step": 2631 + }, + { + "epoch": 0.2776371308016878, + "grad_norm": 0.5042747259140015, + "learning_rate": 0.0015, + "loss": 1.7498, + "step": 2632 + }, + { + "epoch": 0.2777426160337553, + "grad_norm": 0.526651918888092, + "learning_rate": 0.0015, + "loss": 1.7131, + "step": 2633 + }, + { + "epoch": 0.27784810126582277, + "grad_norm": 0.6353644728660583, + "learning_rate": 0.0015, + "loss": 1.7585, + "step": 2634 + }, + { + "epoch": 0.2779535864978903, + "grad_norm": 0.6921878457069397, + "learning_rate": 0.0015, + "loss": 1.7893, + "step": 2635 + }, + { + "epoch": 0.2780590717299578, + "grad_norm": 0.45830094814300537, + "learning_rate": 0.0015, + "loss": 1.7361, + "step": 2636 + }, + { + "epoch": 0.2781645569620253, + "grad_norm": 0.5728946924209595, + "learning_rate": 0.0015, + "loss": 1.7706, + "step": 2637 + }, + { + "epoch": 0.27827004219409285, + "grad_norm": 0.5714189410209656, + "learning_rate": 0.0015, + "loss": 1.7181, + "step": 2638 + }, + { + "epoch": 0.27837552742616034, + "grad_norm": 0.576870858669281, + "learning_rate": 0.0015, + "loss": 1.7343, + "step": 2639 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.4691697657108307, + "learning_rate": 0.0015, + "loss": 1.7189, + "step": 2640 + }, + { + "epoch": 0.2785864978902954, + "grad_norm": 0.46785059571266174, + "learning_rate": 0.0015, + "loss": 1.7545, + "step": 2641 + }, + { + "epoch": 0.2786919831223629, + "grad_norm": 0.4791947305202484, + "learning_rate": 0.0015, + "loss": 1.7099, + "step": 2642 + }, + { + "epoch": 0.27879746835443037, + "grad_norm": 0.4511741101741791, + "learning_rate": 0.0015, + "loss": 1.7102, + "step": 2643 + }, + { + "epoch": 0.2789029535864979, + "grad_norm": 0.51612389087677, + "learning_rate": 0.0015, + "loss": 1.6999, + "step": 2644 + }, + { + "epoch": 0.2790084388185654, + "grad_norm": 0.4879615902900696, + "learning_rate": 0.0015, + "loss": 1.7785, + "step": 2645 + }, + { + "epoch": 0.2791139240506329, + "grad_norm": 0.5071017146110535, + "learning_rate": 0.0015, + "loss": 1.7351, + "step": 2646 + }, + { + "epoch": 0.27921940928270045, + "grad_norm": 0.6026368141174316, + "learning_rate": 0.0015, + "loss": 1.7432, + "step": 2647 + }, + { + "epoch": 0.27932489451476794, + "grad_norm": 0.5431324243545532, + "learning_rate": 0.0015, + "loss": 1.7542, + "step": 2648 + }, + { + "epoch": 0.27943037974683543, + "grad_norm": 0.4874689280986786, + "learning_rate": 0.0015, + "loss": 1.7363, + "step": 2649 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.5022848844528198, + "learning_rate": 0.0015, + "loss": 1.747, + "step": 2650 + }, + { + "epoch": 0.2796413502109705, + "grad_norm": 0.5504576563835144, + "learning_rate": 0.0015, + "loss": 1.7166, + "step": 2651 + }, + { + "epoch": 0.27974683544303797, + "grad_norm": 0.528978705406189, + "learning_rate": 0.0015, + "loss": 1.7533, + "step": 2652 + }, + { + "epoch": 0.27985232067510546, + "grad_norm": 0.4611363708972931, + "learning_rate": 0.0015, + "loss": 1.7609, + "step": 2653 + }, + { + "epoch": 0.279957805907173, + "grad_norm": 0.4825553596019745, + "learning_rate": 0.0015, + "loss": 1.755, + "step": 2654 + }, + { + "epoch": 0.2800632911392405, + "grad_norm": 0.5768417119979858, + "learning_rate": 0.0015, + "loss": 1.7338, + "step": 2655 + }, + { + "epoch": 0.280168776371308, + "grad_norm": 0.48545992374420166, + "learning_rate": 0.0015, + "loss": 1.7299, + "step": 2656 + }, + { + "epoch": 0.28027426160337554, + "grad_norm": 0.5208084583282471, + "learning_rate": 0.0015, + "loss": 1.7053, + "step": 2657 + }, + { + "epoch": 0.28037974683544303, + "grad_norm": 0.610554575920105, + "learning_rate": 0.0015, + "loss": 1.6835, + "step": 2658 + }, + { + "epoch": 0.2804852320675105, + "grad_norm": 0.5752939581871033, + "learning_rate": 0.0015, + "loss": 1.747, + "step": 2659 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.46665632724761963, + "learning_rate": 0.0015, + "loss": 1.7333, + "step": 2660 + }, + { + "epoch": 0.28069620253164557, + "grad_norm": 0.53729647397995, + "learning_rate": 0.0015, + "loss": 1.7549, + "step": 2661 + }, + { + "epoch": 0.28080168776371306, + "grad_norm": 0.5107341408729553, + "learning_rate": 0.0015, + "loss": 1.7493, + "step": 2662 + }, + { + "epoch": 0.2809071729957806, + "grad_norm": 0.4697841703891754, + "learning_rate": 0.0015, + "loss": 1.7258, + "step": 2663 + }, + { + "epoch": 0.2810126582278481, + "grad_norm": 0.5092480182647705, + "learning_rate": 0.0015, + "loss": 1.724, + "step": 2664 + }, + { + "epoch": 0.2811181434599156, + "grad_norm": 0.5099025964736938, + "learning_rate": 0.0015, + "loss": 1.7351, + "step": 2665 + }, + { + "epoch": 0.28122362869198314, + "grad_norm": 0.5085151791572571, + "learning_rate": 0.0015, + "loss": 1.7288, + "step": 2666 + }, + { + "epoch": 0.28132911392405063, + "grad_norm": 0.505859375, + "learning_rate": 0.0015, + "loss": 1.724, + "step": 2667 + }, + { + "epoch": 0.2814345991561181, + "grad_norm": 0.5873626470565796, + "learning_rate": 0.0015, + "loss": 1.728, + "step": 2668 + }, + { + "epoch": 0.2815400843881857, + "grad_norm": 0.49014726281166077, + "learning_rate": 0.0015, + "loss": 1.733, + "step": 2669 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.5928011536598206, + "learning_rate": 0.0015, + "loss": 1.7303, + "step": 2670 + }, + { + "epoch": 0.28175105485232066, + "grad_norm": 0.5059046745300293, + "learning_rate": 0.0015, + "loss": 1.7654, + "step": 2671 + }, + { + "epoch": 0.2818565400843882, + "grad_norm": 0.5453937649726868, + "learning_rate": 0.0015, + "loss": 1.7406, + "step": 2672 + }, + { + "epoch": 0.2819620253164557, + "grad_norm": 0.8845718502998352, + "learning_rate": 0.0015, + "loss": 1.781, + "step": 2673 + }, + { + "epoch": 0.2820675105485232, + "grad_norm": 0.6845380663871765, + "learning_rate": 0.0015, + "loss": 1.7183, + "step": 2674 + }, + { + "epoch": 0.28217299578059074, + "grad_norm": 0.6184827089309692, + "learning_rate": 0.0015, + "loss": 1.734, + "step": 2675 + }, + { + "epoch": 0.28227848101265823, + "grad_norm": 1.1802616119384766, + "learning_rate": 0.0015, + "loss": 1.755, + "step": 2676 + }, + { + "epoch": 0.2823839662447257, + "grad_norm": 0.759573757648468, + "learning_rate": 0.0015, + "loss": 1.712, + "step": 2677 + }, + { + "epoch": 0.2824894514767933, + "grad_norm": 0.49365097284317017, + "learning_rate": 0.0015, + "loss": 1.7034, + "step": 2678 + }, + { + "epoch": 0.28259493670886077, + "grad_norm": 0.6150100231170654, + "learning_rate": 0.0015, + "loss": 1.7474, + "step": 2679 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.5978943705558777, + "learning_rate": 0.0015, + "loss": 1.7187, + "step": 2680 + }, + { + "epoch": 0.2828059071729958, + "grad_norm": 0.5664392113685608, + "learning_rate": 0.0015, + "loss": 1.7102, + "step": 2681 + }, + { + "epoch": 0.2829113924050633, + "grad_norm": 0.6059300303459167, + "learning_rate": 0.0015, + "loss": 1.6932, + "step": 2682 + }, + { + "epoch": 0.2830168776371308, + "grad_norm": 0.5169327855110168, + "learning_rate": 0.0015, + "loss": 1.7585, + "step": 2683 + }, + { + "epoch": 0.2831223628691983, + "grad_norm": 0.4633955955505371, + "learning_rate": 0.0015, + "loss": 1.7438, + "step": 2684 + }, + { + "epoch": 0.28322784810126583, + "grad_norm": 0.5879747271537781, + "learning_rate": 0.0015, + "loss": 1.7128, + "step": 2685 + }, + { + "epoch": 0.2833333333333333, + "grad_norm": 0.6198854446411133, + "learning_rate": 0.0015, + "loss": 1.7334, + "step": 2686 + }, + { + "epoch": 0.2834388185654008, + "grad_norm": 0.526696503162384, + "learning_rate": 0.0015, + "loss": 1.7489, + "step": 2687 + }, + { + "epoch": 0.28354430379746837, + "grad_norm": 0.5070342421531677, + "learning_rate": 0.0015, + "loss": 1.7274, + "step": 2688 + }, + { + "epoch": 0.28364978902953586, + "grad_norm": 0.6639285087585449, + "learning_rate": 0.0015, + "loss": 1.7399, + "step": 2689 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.6574095487594604, + "learning_rate": 0.0015, + "loss": 1.776, + "step": 2690 + }, + { + "epoch": 0.2838607594936709, + "grad_norm": 0.5485591888427734, + "learning_rate": 0.0015, + "loss": 1.7485, + "step": 2691 + }, + { + "epoch": 0.2839662447257384, + "grad_norm": 0.5603123903274536, + "learning_rate": 0.0015, + "loss": 1.7286, + "step": 2692 + }, + { + "epoch": 0.2840717299578059, + "grad_norm": 0.5913242101669312, + "learning_rate": 0.0015, + "loss": 1.7524, + "step": 2693 + }, + { + "epoch": 0.28417721518987343, + "grad_norm": 0.498909056186676, + "learning_rate": 0.0015, + "loss": 1.7077, + "step": 2694 + }, + { + "epoch": 0.2842827004219409, + "grad_norm": 0.6316589713096619, + "learning_rate": 0.0015, + "loss": 1.7474, + "step": 2695 + }, + { + "epoch": 0.2843881856540084, + "grad_norm": 0.7538981437683105, + "learning_rate": 0.0015, + "loss": 1.7706, + "step": 2696 + }, + { + "epoch": 0.28449367088607597, + "grad_norm": 0.5047091841697693, + "learning_rate": 0.0015, + "loss": 1.7044, + "step": 2697 + }, + { + "epoch": 0.28459915611814346, + "grad_norm": 0.6115390062332153, + "learning_rate": 0.0015, + "loss": 1.7308, + "step": 2698 + }, + { + "epoch": 0.28470464135021095, + "grad_norm": 0.5837851762771606, + "learning_rate": 0.0015, + "loss": 1.7239, + "step": 2699 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.5235541462898254, + "learning_rate": 0.0015, + "loss": 1.7002, + "step": 2700 + }, + { + "epoch": 0.284915611814346, + "grad_norm": 0.8157408237457275, + "learning_rate": 0.0015, + "loss": 1.7714, + "step": 2701 + }, + { + "epoch": 0.2850210970464135, + "grad_norm": 0.9191502928733826, + "learning_rate": 0.0015, + "loss": 1.7593, + "step": 2702 + }, + { + "epoch": 0.28512658227848103, + "grad_norm": 0.6521652340888977, + "learning_rate": 0.0015, + "loss": 1.7492, + "step": 2703 + }, + { + "epoch": 0.2852320675105485, + "grad_norm": 0.616782546043396, + "learning_rate": 0.0015, + "loss": 1.718, + "step": 2704 + }, + { + "epoch": 0.285337552742616, + "grad_norm": 0.956119954586029, + "learning_rate": 0.0015, + "loss": 1.7578, + "step": 2705 + }, + { + "epoch": 0.28544303797468357, + "grad_norm": 0.7189751267433167, + "learning_rate": 0.0015, + "loss": 1.7401, + "step": 2706 + }, + { + "epoch": 0.28554852320675106, + "grad_norm": 0.7041029930114746, + "learning_rate": 0.0015, + "loss": 1.7295, + "step": 2707 + }, + { + "epoch": 0.28565400843881855, + "grad_norm": 1.0130025148391724, + "learning_rate": 0.0015, + "loss": 1.7218, + "step": 2708 + }, + { + "epoch": 0.2857594936708861, + "grad_norm": 0.5713807940483093, + "learning_rate": 0.0015, + "loss": 1.7234, + "step": 2709 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.8522231578826904, + "learning_rate": 0.0015, + "loss": 1.7381, + "step": 2710 + }, + { + "epoch": 0.2859704641350211, + "grad_norm": 0.9587810635566711, + "learning_rate": 0.0015, + "loss": 1.7452, + "step": 2711 + }, + { + "epoch": 0.28607594936708863, + "grad_norm": 0.5355757474899292, + "learning_rate": 0.0015, + "loss": 1.7689, + "step": 2712 + }, + { + "epoch": 0.2861814345991561, + "grad_norm": 0.9634062647819519, + "learning_rate": 0.0015, + "loss": 1.7289, + "step": 2713 + }, + { + "epoch": 0.2862869198312236, + "grad_norm": 0.7838209867477417, + "learning_rate": 0.0015, + "loss": 1.7679, + "step": 2714 + }, + { + "epoch": 0.28639240506329117, + "grad_norm": 0.5818883180618286, + "learning_rate": 0.0015, + "loss": 1.7443, + "step": 2715 + }, + { + "epoch": 0.28649789029535866, + "grad_norm": 0.944904088973999, + "learning_rate": 0.0015, + "loss": 1.7506, + "step": 2716 + }, + { + "epoch": 0.28660337552742615, + "grad_norm": 0.5879671573638916, + "learning_rate": 0.0015, + "loss": 1.7703, + "step": 2717 + }, + { + "epoch": 0.28670886075949364, + "grad_norm": 0.6757920384407043, + "learning_rate": 0.0015, + "loss": 1.7026, + "step": 2718 + }, + { + "epoch": 0.2868143459915612, + "grad_norm": 0.5808043479919434, + "learning_rate": 0.0015, + "loss": 1.6967, + "step": 2719 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.5420714020729065, + "learning_rate": 0.0015, + "loss": 1.7191, + "step": 2720 + }, + { + "epoch": 0.2870253164556962, + "grad_norm": 0.5985084176063538, + "learning_rate": 0.0015, + "loss": 1.7173, + "step": 2721 + }, + { + "epoch": 0.2871308016877637, + "grad_norm": 0.4714157283306122, + "learning_rate": 0.0015, + "loss": 1.7065, + "step": 2722 + }, + { + "epoch": 0.2872362869198312, + "grad_norm": 0.5194176435470581, + "learning_rate": 0.0015, + "loss": 1.7402, + "step": 2723 + }, + { + "epoch": 0.2873417721518987, + "grad_norm": 0.5509008169174194, + "learning_rate": 0.0015, + "loss": 1.6874, + "step": 2724 + }, + { + "epoch": 0.28744725738396626, + "grad_norm": 0.5198971629142761, + "learning_rate": 0.0015, + "loss": 1.7208, + "step": 2725 + }, + { + "epoch": 0.28755274261603375, + "grad_norm": 0.595416784286499, + "learning_rate": 0.0015, + "loss": 1.7457, + "step": 2726 + }, + { + "epoch": 0.28765822784810124, + "grad_norm": 0.6461116671562195, + "learning_rate": 0.0015, + "loss": 1.7234, + "step": 2727 + }, + { + "epoch": 0.2877637130801688, + "grad_norm": 0.5399662256240845, + "learning_rate": 0.0015, + "loss": 1.6971, + "step": 2728 + }, + { + "epoch": 0.2878691983122363, + "grad_norm": 0.5240736603736877, + "learning_rate": 0.0015, + "loss": 1.7337, + "step": 2729 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.5210794806480408, + "learning_rate": 0.0015, + "loss": 1.727, + "step": 2730 + }, + { + "epoch": 0.2880801687763713, + "grad_norm": 0.5225235223770142, + "learning_rate": 0.0015, + "loss": 1.7336, + "step": 2731 + }, + { + "epoch": 0.2881856540084388, + "grad_norm": 0.4826267957687378, + "learning_rate": 0.0015, + "loss": 1.7363, + "step": 2732 + }, + { + "epoch": 0.2882911392405063, + "grad_norm": 0.5007849931716919, + "learning_rate": 0.0015, + "loss": 1.7366, + "step": 2733 + }, + { + "epoch": 0.28839662447257386, + "grad_norm": 0.46511006355285645, + "learning_rate": 0.0015, + "loss": 1.708, + "step": 2734 + }, + { + "epoch": 0.28850210970464135, + "grad_norm": 0.5794470906257629, + "learning_rate": 0.0015, + "loss": 1.7063, + "step": 2735 + }, + { + "epoch": 0.28860759493670884, + "grad_norm": 0.5770504474639893, + "learning_rate": 0.0015, + "loss": 1.7044, + "step": 2736 + }, + { + "epoch": 0.2887130801687764, + "grad_norm": 0.644075870513916, + "learning_rate": 0.0015, + "loss": 1.7161, + "step": 2737 + }, + { + "epoch": 0.2888185654008439, + "grad_norm": 0.4943702518939972, + "learning_rate": 0.0015, + "loss": 1.7368, + "step": 2738 + }, + { + "epoch": 0.2889240506329114, + "grad_norm": 0.8831140995025635, + "learning_rate": 0.0015, + "loss": 1.7638, + "step": 2739 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.8674169182777405, + "learning_rate": 0.0015, + "loss": 1.7741, + "step": 2740 + }, + { + "epoch": 0.2891350210970464, + "grad_norm": 0.4986017644405365, + "learning_rate": 0.0015, + "loss": 1.721, + "step": 2741 + }, + { + "epoch": 0.2892405063291139, + "grad_norm": 0.6774818897247314, + "learning_rate": 0.0015, + "loss": 1.7467, + "step": 2742 + }, + { + "epoch": 0.28934599156118146, + "grad_norm": 0.6801589131355286, + "learning_rate": 0.0015, + "loss": 1.7066, + "step": 2743 + }, + { + "epoch": 0.28945147679324895, + "grad_norm": 0.48362305760383606, + "learning_rate": 0.0015, + "loss": 1.7435, + "step": 2744 + }, + { + "epoch": 0.28955696202531644, + "grad_norm": 0.5517992377281189, + "learning_rate": 0.0015, + "loss": 1.7368, + "step": 2745 + }, + { + "epoch": 0.289662447257384, + "grad_norm": 0.5449758172035217, + "learning_rate": 0.0015, + "loss": 1.75, + "step": 2746 + }, + { + "epoch": 0.2897679324894515, + "grad_norm": 0.45939669013023376, + "learning_rate": 0.0015, + "loss": 1.7162, + "step": 2747 + }, + { + "epoch": 0.289873417721519, + "grad_norm": 0.5620844960212708, + "learning_rate": 0.0015, + "loss": 1.7443, + "step": 2748 + }, + { + "epoch": 0.28997890295358647, + "grad_norm": 0.5152401328086853, + "learning_rate": 0.0015, + "loss": 1.7468, + "step": 2749 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.5323880910873413, + "learning_rate": 0.0015, + "loss": 1.7283, + "step": 2750 + }, + { + "epoch": 0.2901898734177215, + "grad_norm": 0.6154477596282959, + "learning_rate": 0.0015, + "loss": 1.7117, + "step": 2751 + }, + { + "epoch": 0.290295358649789, + "grad_norm": 0.5759016275405884, + "learning_rate": 0.0015, + "loss": 1.7555, + "step": 2752 + }, + { + "epoch": 0.29040084388185655, + "grad_norm": 0.6234085559844971, + "learning_rate": 0.0015, + "loss": 1.7218, + "step": 2753 + }, + { + "epoch": 0.29050632911392404, + "grad_norm": 0.5297909379005432, + "learning_rate": 0.0015, + "loss": 1.7581, + "step": 2754 + }, + { + "epoch": 0.29061181434599154, + "grad_norm": 0.5211373567581177, + "learning_rate": 0.0015, + "loss": 1.7784, + "step": 2755 + }, + { + "epoch": 0.2907172995780591, + "grad_norm": 0.6669690012931824, + "learning_rate": 0.0015, + "loss": 1.7462, + "step": 2756 + }, + { + "epoch": 0.2908227848101266, + "grad_norm": 0.47888511419296265, + "learning_rate": 0.0015, + "loss": 1.7186, + "step": 2757 + }, + { + "epoch": 0.29092827004219407, + "grad_norm": 0.5764559507369995, + "learning_rate": 0.0015, + "loss": 1.7295, + "step": 2758 + }, + { + "epoch": 0.2910337552742616, + "grad_norm": 0.4933163523674011, + "learning_rate": 0.0015, + "loss": 1.7892, + "step": 2759 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.5667641162872314, + "learning_rate": 0.0015, + "loss": 1.7445, + "step": 2760 + }, + { + "epoch": 0.2912447257383966, + "grad_norm": 0.537183940410614, + "learning_rate": 0.0015, + "loss": 1.7431, + "step": 2761 + }, + { + "epoch": 0.29135021097046415, + "grad_norm": 0.5711116790771484, + "learning_rate": 0.0015, + "loss": 1.7607, + "step": 2762 + }, + { + "epoch": 0.29145569620253164, + "grad_norm": 0.5116228461265564, + "learning_rate": 0.0015, + "loss": 1.763, + "step": 2763 + }, + { + "epoch": 0.29156118143459914, + "grad_norm": 0.5435190796852112, + "learning_rate": 0.0015, + "loss": 1.6975, + "step": 2764 + }, + { + "epoch": 0.2916666666666667, + "grad_norm": 0.6140767931938171, + "learning_rate": 0.0015, + "loss": 1.7492, + "step": 2765 + }, + { + "epoch": 0.2917721518987342, + "grad_norm": 0.7195400595664978, + "learning_rate": 0.0015, + "loss": 1.7246, + "step": 2766 + }, + { + "epoch": 0.29187763713080167, + "grad_norm": 0.4888113737106323, + "learning_rate": 0.0015, + "loss": 1.7272, + "step": 2767 + }, + { + "epoch": 0.2919831223628692, + "grad_norm": 0.5987370014190674, + "learning_rate": 0.0015, + "loss": 1.7215, + "step": 2768 + }, + { + "epoch": 0.2920886075949367, + "grad_norm": 0.6660099625587463, + "learning_rate": 0.0015, + "loss": 1.7405, + "step": 2769 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.5677423477172852, + "learning_rate": 0.0015, + "loss": 1.7637, + "step": 2770 + }, + { + "epoch": 0.29229957805907175, + "grad_norm": 0.5299758911132812, + "learning_rate": 0.0015, + "loss": 1.7331, + "step": 2771 + }, + { + "epoch": 0.29240506329113924, + "grad_norm": 0.5895119309425354, + "learning_rate": 0.0015, + "loss": 1.7466, + "step": 2772 + }, + { + "epoch": 0.29251054852320674, + "grad_norm": 0.673026978969574, + "learning_rate": 0.0015, + "loss": 1.7351, + "step": 2773 + }, + { + "epoch": 0.2926160337552743, + "grad_norm": 0.48601093888282776, + "learning_rate": 0.0015, + "loss": 1.7307, + "step": 2774 + }, + { + "epoch": 0.2927215189873418, + "grad_norm": 0.6198064684867859, + "learning_rate": 0.0015, + "loss": 1.7285, + "step": 2775 + }, + { + "epoch": 0.29282700421940927, + "grad_norm": 0.6173463463783264, + "learning_rate": 0.0015, + "loss": 1.7415, + "step": 2776 + }, + { + "epoch": 0.2929324894514768, + "grad_norm": 0.5128932595252991, + "learning_rate": 0.0015, + "loss": 1.7547, + "step": 2777 + }, + { + "epoch": 0.2930379746835443, + "grad_norm": 0.4993072748184204, + "learning_rate": 0.0015, + "loss": 1.7219, + "step": 2778 + }, + { + "epoch": 0.2931434599156118, + "grad_norm": 0.5432206988334656, + "learning_rate": 0.0015, + "loss": 1.7338, + "step": 2779 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.45908355712890625, + "learning_rate": 0.0015, + "loss": 1.7145, + "step": 2780 + }, + { + "epoch": 0.29335443037974684, + "grad_norm": 0.6190879940986633, + "learning_rate": 0.0015, + "loss": 1.7317, + "step": 2781 + }, + { + "epoch": 0.29345991561181434, + "grad_norm": 0.5287309885025024, + "learning_rate": 0.0015, + "loss": 1.7292, + "step": 2782 + }, + { + "epoch": 0.29356540084388183, + "grad_norm": 0.5456938743591309, + "learning_rate": 0.0015, + "loss": 1.7226, + "step": 2783 + }, + { + "epoch": 0.2936708860759494, + "grad_norm": 0.6921320557594299, + "learning_rate": 0.0015, + "loss": 1.7396, + "step": 2784 + }, + { + "epoch": 0.29377637130801687, + "grad_norm": 0.6134920120239258, + "learning_rate": 0.0015, + "loss": 1.7344, + "step": 2785 + }, + { + "epoch": 0.29388185654008436, + "grad_norm": 0.5075782537460327, + "learning_rate": 0.0015, + "loss": 1.6991, + "step": 2786 + }, + { + "epoch": 0.2939873417721519, + "grad_norm": 0.6686756610870361, + "learning_rate": 0.0015, + "loss": 1.7203, + "step": 2787 + }, + { + "epoch": 0.2940928270042194, + "grad_norm": 0.6644595265388489, + "learning_rate": 0.0015, + "loss": 1.7523, + "step": 2788 + }, + { + "epoch": 0.2941983122362869, + "grad_norm": 0.6102534532546997, + "learning_rate": 0.0015, + "loss": 1.7682, + "step": 2789 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.5273255705833435, + "learning_rate": 0.0015, + "loss": 1.739, + "step": 2790 + }, + { + "epoch": 0.29440928270042194, + "grad_norm": 0.6021966934204102, + "learning_rate": 0.0015, + "loss": 1.7521, + "step": 2791 + }, + { + "epoch": 0.29451476793248943, + "grad_norm": 0.5897009968757629, + "learning_rate": 0.0015, + "loss": 1.6936, + "step": 2792 + }, + { + "epoch": 0.294620253164557, + "grad_norm": 0.4723966419696808, + "learning_rate": 0.0015, + "loss": 1.7393, + "step": 2793 + }, + { + "epoch": 0.29472573839662447, + "grad_norm": 0.6469066143035889, + "learning_rate": 0.0015, + "loss": 1.745, + "step": 2794 + }, + { + "epoch": 0.29483122362869196, + "grad_norm": 0.6034923195838928, + "learning_rate": 0.0015, + "loss": 1.7117, + "step": 2795 + }, + { + "epoch": 0.2949367088607595, + "grad_norm": 0.6067705750465393, + "learning_rate": 0.0015, + "loss": 1.7498, + "step": 2796 + }, + { + "epoch": 0.295042194092827, + "grad_norm": 0.5731272101402283, + "learning_rate": 0.0015, + "loss": 1.7323, + "step": 2797 + }, + { + "epoch": 0.2951476793248945, + "grad_norm": 0.5448641180992126, + "learning_rate": 0.0015, + "loss": 1.7512, + "step": 2798 + }, + { + "epoch": 0.29525316455696204, + "grad_norm": 0.7797780632972717, + "learning_rate": 0.0015, + "loss": 1.7388, + "step": 2799 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.6417586803436279, + "learning_rate": 0.0015, + "loss": 1.7027, + "step": 2800 + }, + { + "epoch": 0.29546413502109703, + "grad_norm": 0.6263667941093445, + "learning_rate": 0.0015, + "loss": 1.7426, + "step": 2801 + }, + { + "epoch": 0.2955696202531646, + "grad_norm": 0.738879382610321, + "learning_rate": 0.0015, + "loss": 1.7164, + "step": 2802 + }, + { + "epoch": 0.29567510548523207, + "grad_norm": 0.5009618997573853, + "learning_rate": 0.0015, + "loss": 1.7453, + "step": 2803 + }, + { + "epoch": 0.29578059071729956, + "grad_norm": 0.6718484163284302, + "learning_rate": 0.0015, + "loss": 1.7328, + "step": 2804 + }, + { + "epoch": 0.2958860759493671, + "grad_norm": 0.6552501916885376, + "learning_rate": 0.0015, + "loss": 1.7161, + "step": 2805 + }, + { + "epoch": 0.2959915611814346, + "grad_norm": 0.5674899816513062, + "learning_rate": 0.0015, + "loss": 1.7329, + "step": 2806 + }, + { + "epoch": 0.2960970464135021, + "grad_norm": 0.7102161049842834, + "learning_rate": 0.0015, + "loss": 1.7392, + "step": 2807 + }, + { + "epoch": 0.29620253164556964, + "grad_norm": 0.6125746965408325, + "learning_rate": 0.0015, + "loss": 1.7171, + "step": 2808 + }, + { + "epoch": 0.29630801687763714, + "grad_norm": 0.5231237411499023, + "learning_rate": 0.0015, + "loss": 1.7608, + "step": 2809 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.5651828050613403, + "learning_rate": 0.0015, + "loss": 1.7352, + "step": 2810 + }, + { + "epoch": 0.2965189873417722, + "grad_norm": 0.6057689189910889, + "learning_rate": 0.0015, + "loss": 1.7257, + "step": 2811 + }, + { + "epoch": 0.29662447257383967, + "grad_norm": 0.46668606996536255, + "learning_rate": 0.0015, + "loss": 1.7183, + "step": 2812 + }, + { + "epoch": 0.29672995780590716, + "grad_norm": 0.5443451404571533, + "learning_rate": 0.0015, + "loss": 1.7253, + "step": 2813 + }, + { + "epoch": 0.2968354430379747, + "grad_norm": 0.5878294110298157, + "learning_rate": 0.0015, + "loss": 1.7629, + "step": 2814 + }, + { + "epoch": 0.2969409282700422, + "grad_norm": 0.6273327469825745, + "learning_rate": 0.0015, + "loss": 1.7293, + "step": 2815 + }, + { + "epoch": 0.2970464135021097, + "grad_norm": 0.5364030003547668, + "learning_rate": 0.0015, + "loss": 1.6985, + "step": 2816 + }, + { + "epoch": 0.2971518987341772, + "grad_norm": 0.6072271466255188, + "learning_rate": 0.0015, + "loss": 1.7349, + "step": 2817 + }, + { + "epoch": 0.29725738396624474, + "grad_norm": 0.5207487344741821, + "learning_rate": 0.0015, + "loss": 1.7655, + "step": 2818 + }, + { + "epoch": 0.29736286919831223, + "grad_norm": 0.5988492965698242, + "learning_rate": 0.0015, + "loss": 1.7504, + "step": 2819 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.7192591428756714, + "learning_rate": 0.0015, + "loss": 1.7633, + "step": 2820 + }, + { + "epoch": 0.29757383966244727, + "grad_norm": 0.5050835013389587, + "learning_rate": 0.0015, + "loss": 1.7105, + "step": 2821 + }, + { + "epoch": 0.29767932489451476, + "grad_norm": 0.6090943813323975, + "learning_rate": 0.0015, + "loss": 1.7095, + "step": 2822 + }, + { + "epoch": 0.29778481012658226, + "grad_norm": 0.6430670022964478, + "learning_rate": 0.0015, + "loss": 1.7344, + "step": 2823 + }, + { + "epoch": 0.2978902953586498, + "grad_norm": 0.5029327869415283, + "learning_rate": 0.0015, + "loss": 1.7001, + "step": 2824 + }, + { + "epoch": 0.2979957805907173, + "grad_norm": 0.7795506715774536, + "learning_rate": 0.0015, + "loss": 1.7041, + "step": 2825 + }, + { + "epoch": 0.2981012658227848, + "grad_norm": 0.5343037247657776, + "learning_rate": 0.0015, + "loss": 1.7147, + "step": 2826 + }, + { + "epoch": 0.29820675105485234, + "grad_norm": 0.8471195101737976, + "learning_rate": 0.0015, + "loss": 1.6798, + "step": 2827 + }, + { + "epoch": 0.29831223628691983, + "grad_norm": 1.0902858972549438, + "learning_rate": 0.0015, + "loss": 1.7153, + "step": 2828 + }, + { + "epoch": 0.2984177215189873, + "grad_norm": 0.5062287449836731, + "learning_rate": 0.0015, + "loss": 1.7136, + "step": 2829 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 1.2234694957733154, + "learning_rate": 0.0015, + "loss": 1.7266, + "step": 2830 + }, + { + "epoch": 0.29862869198312236, + "grad_norm": 0.6013873815536499, + "learning_rate": 0.0015, + "loss": 1.7032, + "step": 2831 + }, + { + "epoch": 0.29873417721518986, + "grad_norm": 0.7629626393318176, + "learning_rate": 0.0015, + "loss": 1.7367, + "step": 2832 + }, + { + "epoch": 0.2988396624472574, + "grad_norm": 0.7414044141769409, + "learning_rate": 0.0015, + "loss": 1.708, + "step": 2833 + }, + { + "epoch": 0.2989451476793249, + "grad_norm": 0.5013367533683777, + "learning_rate": 0.0015, + "loss": 1.7726, + "step": 2834 + }, + { + "epoch": 0.2990506329113924, + "grad_norm": 0.9379156231880188, + "learning_rate": 0.0015, + "loss": 1.7, + "step": 2835 + }, + { + "epoch": 0.29915611814345994, + "grad_norm": 0.7603035569190979, + "learning_rate": 0.0015, + "loss": 1.6982, + "step": 2836 + }, + { + "epoch": 0.29926160337552743, + "grad_norm": 0.6432632207870483, + "learning_rate": 0.0015, + "loss": 1.7146, + "step": 2837 + }, + { + "epoch": 0.2993670886075949, + "grad_norm": 0.9125514030456543, + "learning_rate": 0.0015, + "loss": 1.6937, + "step": 2838 + }, + { + "epoch": 0.29947257383966247, + "grad_norm": 0.4466330409049988, + "learning_rate": 0.0015, + "loss": 1.7059, + "step": 2839 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.8298995494842529, + "learning_rate": 0.0015, + "loss": 1.7424, + "step": 2840 + }, + { + "epoch": 0.29968354430379746, + "grad_norm": 0.7232890725135803, + "learning_rate": 0.0015, + "loss": 1.703, + "step": 2841 + }, + { + "epoch": 0.299789029535865, + "grad_norm": 0.575537383556366, + "learning_rate": 0.0015, + "loss": 1.7472, + "step": 2842 + }, + { + "epoch": 0.2998945147679325, + "grad_norm": 0.9341709613800049, + "learning_rate": 0.0015, + "loss": 1.7129, + "step": 2843 + }, + { + "epoch": 0.3, + "grad_norm": 0.513477623462677, + "learning_rate": 0.0015, + "loss": 1.7432, + "step": 2844 + }, + { + "epoch": 0.30010548523206754, + "grad_norm": 0.7083409428596497, + "learning_rate": 0.0015, + "loss": 1.7367, + "step": 2845 + }, + { + "epoch": 0.30021097046413503, + "grad_norm": 0.7591075301170349, + "learning_rate": 0.0015, + "loss": 1.7368, + "step": 2846 + }, + { + "epoch": 0.3003164556962025, + "grad_norm": 0.494125097990036, + "learning_rate": 0.0015, + "loss": 1.7292, + "step": 2847 + }, + { + "epoch": 0.30042194092827, + "grad_norm": 0.6274526715278625, + "learning_rate": 0.0015, + "loss": 1.6976, + "step": 2848 + }, + { + "epoch": 0.30052742616033756, + "grad_norm": 0.6833388805389404, + "learning_rate": 0.0015, + "loss": 1.7105, + "step": 2849 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.5163332223892212, + "learning_rate": 0.0015, + "loss": 1.7262, + "step": 2850 + }, + { + "epoch": 0.30073839662447255, + "grad_norm": 0.7678579688072205, + "learning_rate": 0.0015, + "loss": 1.6895, + "step": 2851 + }, + { + "epoch": 0.3008438818565401, + "grad_norm": 0.681102991104126, + "learning_rate": 0.0015, + "loss": 1.7337, + "step": 2852 + }, + { + "epoch": 0.3009493670886076, + "grad_norm": 0.4950621724128723, + "learning_rate": 0.0015, + "loss": 1.7244, + "step": 2853 + }, + { + "epoch": 0.3010548523206751, + "grad_norm": 0.6309622526168823, + "learning_rate": 0.0015, + "loss": 1.7224, + "step": 2854 + }, + { + "epoch": 0.30116033755274263, + "grad_norm": 0.49587345123291016, + "learning_rate": 0.0015, + "loss": 1.7716, + "step": 2855 + }, + { + "epoch": 0.3012658227848101, + "grad_norm": 0.6604416966438293, + "learning_rate": 0.0015, + "loss": 1.7472, + "step": 2856 + }, + { + "epoch": 0.3013713080168776, + "grad_norm": 0.6719710826873779, + "learning_rate": 0.0015, + "loss": 1.7316, + "step": 2857 + }, + { + "epoch": 0.30147679324894516, + "grad_norm": 0.5016152262687683, + "learning_rate": 0.0015, + "loss": 1.7132, + "step": 2858 + }, + { + "epoch": 0.30158227848101266, + "grad_norm": 0.8282347321510315, + "learning_rate": 0.0015, + "loss": 1.705, + "step": 2859 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.5938814878463745, + "learning_rate": 0.0015, + "loss": 1.7257, + "step": 2860 + }, + { + "epoch": 0.3017932489451477, + "grad_norm": 0.5885493755340576, + "learning_rate": 0.0015, + "loss": 1.7371, + "step": 2861 + }, + { + "epoch": 0.3018987341772152, + "grad_norm": 0.8027797937393188, + "learning_rate": 0.0015, + "loss": 1.7325, + "step": 2862 + }, + { + "epoch": 0.3020042194092827, + "grad_norm": 0.4907360374927521, + "learning_rate": 0.0015, + "loss": 1.7129, + "step": 2863 + }, + { + "epoch": 0.30210970464135023, + "grad_norm": 0.6164902448654175, + "learning_rate": 0.0015, + "loss": 1.6973, + "step": 2864 + }, + { + "epoch": 0.3022151898734177, + "grad_norm": 0.6104419231414795, + "learning_rate": 0.0015, + "loss": 1.7314, + "step": 2865 + }, + { + "epoch": 0.3023206751054852, + "grad_norm": 0.5501196384429932, + "learning_rate": 0.0015, + "loss": 1.7788, + "step": 2866 + }, + { + "epoch": 0.30242616033755276, + "grad_norm": 0.7411067485809326, + "learning_rate": 0.0015, + "loss": 1.7342, + "step": 2867 + }, + { + "epoch": 0.30253164556962026, + "grad_norm": 0.48683708906173706, + "learning_rate": 0.0015, + "loss": 1.7198, + "step": 2868 + }, + { + "epoch": 0.30263713080168775, + "grad_norm": 0.6525079011917114, + "learning_rate": 0.0015, + "loss": 1.6716, + "step": 2869 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.6791862845420837, + "learning_rate": 0.0015, + "loss": 1.7684, + "step": 2870 + }, + { + "epoch": 0.3028481012658228, + "grad_norm": 0.4720754623413086, + "learning_rate": 0.0015, + "loss": 1.7144, + "step": 2871 + }, + { + "epoch": 0.3029535864978903, + "grad_norm": 0.6469252109527588, + "learning_rate": 0.0015, + "loss": 1.726, + "step": 2872 + }, + { + "epoch": 0.30305907172995783, + "grad_norm": 0.4780896306037903, + "learning_rate": 0.0015, + "loss": 1.7315, + "step": 2873 + }, + { + "epoch": 0.3031645569620253, + "grad_norm": 0.737733006477356, + "learning_rate": 0.0015, + "loss": 1.6692, + "step": 2874 + }, + { + "epoch": 0.3032700421940928, + "grad_norm": 0.5257196426391602, + "learning_rate": 0.0015, + "loss": 1.7096, + "step": 2875 + }, + { + "epoch": 0.30337552742616036, + "grad_norm": 0.6989250779151917, + "learning_rate": 0.0015, + "loss": 1.7032, + "step": 2876 + }, + { + "epoch": 0.30348101265822786, + "grad_norm": 0.6388779878616333, + "learning_rate": 0.0015, + "loss": 1.708, + "step": 2877 + }, + { + "epoch": 0.30358649789029535, + "grad_norm": 0.6642478704452515, + "learning_rate": 0.0015, + "loss": 1.7282, + "step": 2878 + }, + { + "epoch": 0.3036919831223629, + "grad_norm": 0.8404462933540344, + "learning_rate": 0.0015, + "loss": 1.7249, + "step": 2879 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.6265426278114319, + "learning_rate": 0.0015, + "loss": 1.7113, + "step": 2880 + }, + { + "epoch": 0.3039029535864979, + "grad_norm": 0.5950288772583008, + "learning_rate": 0.0015, + "loss": 1.7172, + "step": 2881 + }, + { + "epoch": 0.3040084388185654, + "grad_norm": 0.619999349117279, + "learning_rate": 0.0015, + "loss": 1.7168, + "step": 2882 + }, + { + "epoch": 0.3041139240506329, + "grad_norm": 0.5390220284461975, + "learning_rate": 0.0015, + "loss": 1.68, + "step": 2883 + }, + { + "epoch": 0.3042194092827004, + "grad_norm": 0.5932281613349915, + "learning_rate": 0.0015, + "loss": 1.7146, + "step": 2884 + }, + { + "epoch": 0.3043248945147679, + "grad_norm": 0.5918823480606079, + "learning_rate": 0.0015, + "loss": 1.7275, + "step": 2885 + }, + { + "epoch": 0.30443037974683546, + "grad_norm": 0.5813516974449158, + "learning_rate": 0.0015, + "loss": 1.7332, + "step": 2886 + }, + { + "epoch": 0.30453586497890295, + "grad_norm": 0.6132948398590088, + "learning_rate": 0.0015, + "loss": 1.7404, + "step": 2887 + }, + { + "epoch": 0.30464135021097044, + "grad_norm": 0.5324879288673401, + "learning_rate": 0.0015, + "loss": 1.6782, + "step": 2888 + }, + { + "epoch": 0.304746835443038, + "grad_norm": 0.7246313095092773, + "learning_rate": 0.0015, + "loss": 1.7571, + "step": 2889 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.5872277617454529, + "learning_rate": 0.0015, + "loss": 1.7026, + "step": 2890 + }, + { + "epoch": 0.304957805907173, + "grad_norm": 0.48824891448020935, + "learning_rate": 0.0015, + "loss": 1.7329, + "step": 2891 + }, + { + "epoch": 0.3050632911392405, + "grad_norm": 0.4681529402732849, + "learning_rate": 0.0015, + "loss": 1.7251, + "step": 2892 + }, + { + "epoch": 0.305168776371308, + "grad_norm": 0.4926391839981079, + "learning_rate": 0.0015, + "loss": 1.7054, + "step": 2893 + }, + { + "epoch": 0.3052742616033755, + "grad_norm": 0.5023272633552551, + "learning_rate": 0.0015, + "loss": 1.7257, + "step": 2894 + }, + { + "epoch": 0.30537974683544306, + "grad_norm": 0.4859912693500519, + "learning_rate": 0.0015, + "loss": 1.715, + "step": 2895 + }, + { + "epoch": 0.30548523206751055, + "grad_norm": 0.5448835492134094, + "learning_rate": 0.0015, + "loss": 1.7207, + "step": 2896 + }, + { + "epoch": 0.30559071729957804, + "grad_norm": 0.5105636715888977, + "learning_rate": 0.0015, + "loss": 1.7434, + "step": 2897 + }, + { + "epoch": 0.3056962025316456, + "grad_norm": 0.6418217420578003, + "learning_rate": 0.0015, + "loss": 1.7409, + "step": 2898 + }, + { + "epoch": 0.3058016877637131, + "grad_norm": 0.6759374737739563, + "learning_rate": 0.0015, + "loss": 1.7238, + "step": 2899 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.629976749420166, + "learning_rate": 0.0015, + "loss": 1.7007, + "step": 2900 + }, + { + "epoch": 0.3060126582278481, + "grad_norm": 0.7432752847671509, + "learning_rate": 0.0015, + "loss": 1.7249, + "step": 2901 + }, + { + "epoch": 0.3061181434599156, + "grad_norm": 0.5830813050270081, + "learning_rate": 0.0015, + "loss": 1.7463, + "step": 2902 + }, + { + "epoch": 0.3062236286919831, + "grad_norm": 0.6720617413520813, + "learning_rate": 0.0015, + "loss": 1.7277, + "step": 2903 + }, + { + "epoch": 0.30632911392405066, + "grad_norm": 0.7102701663970947, + "learning_rate": 0.0015, + "loss": 1.7351, + "step": 2904 + }, + { + "epoch": 0.30643459915611815, + "grad_norm": 0.5022579431533813, + "learning_rate": 0.0015, + "loss": 1.7315, + "step": 2905 + }, + { + "epoch": 0.30654008438818564, + "grad_norm": 0.5378018021583557, + "learning_rate": 0.0015, + "loss": 1.7517, + "step": 2906 + }, + { + "epoch": 0.3066455696202532, + "grad_norm": 0.5608876943588257, + "learning_rate": 0.0015, + "loss": 1.6931, + "step": 2907 + }, + { + "epoch": 0.3067510548523207, + "grad_norm": 0.4984451234340668, + "learning_rate": 0.0015, + "loss": 1.719, + "step": 2908 + }, + { + "epoch": 0.3068565400843882, + "grad_norm": 0.5655184984207153, + "learning_rate": 0.0015, + "loss": 1.7065, + "step": 2909 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.6536985039710999, + "learning_rate": 0.0015, + "loss": 1.6989, + "step": 2910 + }, + { + "epoch": 0.3070675105485232, + "grad_norm": 0.459410160779953, + "learning_rate": 0.0015, + "loss": 1.6869, + "step": 2911 + }, + { + "epoch": 0.3071729957805907, + "grad_norm": 0.6984475255012512, + "learning_rate": 0.0015, + "loss": 1.733, + "step": 2912 + }, + { + "epoch": 0.30727848101265826, + "grad_norm": 0.6061699986457825, + "learning_rate": 0.0015, + "loss": 1.6939, + "step": 2913 + }, + { + "epoch": 0.30738396624472575, + "grad_norm": 0.5774768590927124, + "learning_rate": 0.0015, + "loss": 1.7363, + "step": 2914 + }, + { + "epoch": 0.30748945147679324, + "grad_norm": 0.8641024231910706, + "learning_rate": 0.0015, + "loss": 1.7554, + "step": 2915 + }, + { + "epoch": 0.30759493670886073, + "grad_norm": 0.5568138957023621, + "learning_rate": 0.0015, + "loss": 1.7343, + "step": 2916 + }, + { + "epoch": 0.3077004219409283, + "grad_norm": 0.7313710451126099, + "learning_rate": 0.0015, + "loss": 1.6874, + "step": 2917 + }, + { + "epoch": 0.3078059071729958, + "grad_norm": 0.853188693523407, + "learning_rate": 0.0015, + "loss": 1.7139, + "step": 2918 + }, + { + "epoch": 0.30791139240506327, + "grad_norm": 0.49975845217704773, + "learning_rate": 0.0015, + "loss": 1.7565, + "step": 2919 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.656856894493103, + "learning_rate": 0.0015, + "loss": 1.7059, + "step": 2920 + }, + { + "epoch": 0.3081223628691983, + "grad_norm": 0.6693185567855835, + "learning_rate": 0.0015, + "loss": 1.679, + "step": 2921 + }, + { + "epoch": 0.3082278481012658, + "grad_norm": 0.4762989580631256, + "learning_rate": 0.0015, + "loss": 1.7177, + "step": 2922 + }, + { + "epoch": 0.30833333333333335, + "grad_norm": 0.489740252494812, + "learning_rate": 0.0015, + "loss": 1.7347, + "step": 2923 + }, + { + "epoch": 0.30843881856540084, + "grad_norm": 0.5571384429931641, + "learning_rate": 0.0015, + "loss": 1.7074, + "step": 2924 + }, + { + "epoch": 0.30854430379746833, + "grad_norm": 0.5981157422065735, + "learning_rate": 0.0015, + "loss": 1.7049, + "step": 2925 + }, + { + "epoch": 0.3086497890295359, + "grad_norm": 0.5322088599205017, + "learning_rate": 0.0015, + "loss": 1.718, + "step": 2926 + }, + { + "epoch": 0.3087552742616034, + "grad_norm": 0.4946162700653076, + "learning_rate": 0.0015, + "loss": 1.7362, + "step": 2927 + }, + { + "epoch": 0.30886075949367087, + "grad_norm": 0.5483956336975098, + "learning_rate": 0.0015, + "loss": 1.7498, + "step": 2928 + }, + { + "epoch": 0.3089662447257384, + "grad_norm": 0.574830174446106, + "learning_rate": 0.0015, + "loss": 1.6847, + "step": 2929 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.4502004384994507, + "learning_rate": 0.0015, + "loss": 1.6995, + "step": 2930 + }, + { + "epoch": 0.3091772151898734, + "grad_norm": 0.5166926980018616, + "learning_rate": 0.0015, + "loss": 1.7415, + "step": 2931 + }, + { + "epoch": 0.30928270042194095, + "grad_norm": 0.4438752830028534, + "learning_rate": 0.0015, + "loss": 1.7599, + "step": 2932 + }, + { + "epoch": 0.30938818565400844, + "grad_norm": 0.5299575328826904, + "learning_rate": 0.0015, + "loss": 1.7177, + "step": 2933 + }, + { + "epoch": 0.30949367088607593, + "grad_norm": 0.4622742831707001, + "learning_rate": 0.0015, + "loss": 1.7428, + "step": 2934 + }, + { + "epoch": 0.3095991561181435, + "grad_norm": 0.5356643795967102, + "learning_rate": 0.0015, + "loss": 1.7258, + "step": 2935 + }, + { + "epoch": 0.309704641350211, + "grad_norm": 0.6198638677597046, + "learning_rate": 0.0015, + "loss": 1.762, + "step": 2936 + }, + { + "epoch": 0.30981012658227847, + "grad_norm": 0.4738200008869171, + "learning_rate": 0.0015, + "loss": 1.6758, + "step": 2937 + }, + { + "epoch": 0.309915611814346, + "grad_norm": 0.5630542039871216, + "learning_rate": 0.0015, + "loss": 1.7026, + "step": 2938 + }, + { + "epoch": 0.3100210970464135, + "grad_norm": 0.6402138471603394, + "learning_rate": 0.0015, + "loss": 1.671, + "step": 2939 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.6138157844543457, + "learning_rate": 0.0015, + "loss": 1.7062, + "step": 2940 + }, + { + "epoch": 0.31023206751054855, + "grad_norm": 0.5457233190536499, + "learning_rate": 0.0015, + "loss": 1.7429, + "step": 2941 + }, + { + "epoch": 0.31033755274261604, + "grad_norm": 0.5193164348602295, + "learning_rate": 0.0015, + "loss": 1.7104, + "step": 2942 + }, + { + "epoch": 0.31044303797468353, + "grad_norm": 0.5176413059234619, + "learning_rate": 0.0015, + "loss": 1.7357, + "step": 2943 + }, + { + "epoch": 0.3105485232067511, + "grad_norm": 0.5201972723007202, + "learning_rate": 0.0015, + "loss": 1.7581, + "step": 2944 + }, + { + "epoch": 0.3106540084388186, + "grad_norm": 0.5292931795120239, + "learning_rate": 0.0015, + "loss": 1.7433, + "step": 2945 + }, + { + "epoch": 0.31075949367088607, + "grad_norm": 0.5314915776252747, + "learning_rate": 0.0015, + "loss": 1.7061, + "step": 2946 + }, + { + "epoch": 0.31086497890295356, + "grad_norm": 0.5484752655029297, + "learning_rate": 0.0015, + "loss": 1.7291, + "step": 2947 + }, + { + "epoch": 0.3109704641350211, + "grad_norm": 0.47969773411750793, + "learning_rate": 0.0015, + "loss": 1.733, + "step": 2948 + }, + { + "epoch": 0.3110759493670886, + "grad_norm": 0.528062641620636, + "learning_rate": 0.0015, + "loss": 1.7333, + "step": 2949 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.4798565208911896, + "learning_rate": 0.0015, + "loss": 1.6886, + "step": 2950 + }, + { + "epoch": 0.31128691983122364, + "grad_norm": 0.5226189494132996, + "learning_rate": 0.0015, + "loss": 1.6907, + "step": 2951 + }, + { + "epoch": 0.31139240506329113, + "grad_norm": 0.698269248008728, + "learning_rate": 0.0015, + "loss": 1.7147, + "step": 2952 + }, + { + "epoch": 0.3114978902953586, + "grad_norm": 0.7282271981239319, + "learning_rate": 0.0015, + "loss": 1.726, + "step": 2953 + }, + { + "epoch": 0.3116033755274262, + "grad_norm": 0.4884694516658783, + "learning_rate": 0.0015, + "loss": 1.6855, + "step": 2954 + }, + { + "epoch": 0.31170886075949367, + "grad_norm": 0.8627981543540955, + "learning_rate": 0.0015, + "loss": 1.7064, + "step": 2955 + }, + { + "epoch": 0.31181434599156116, + "grad_norm": 0.6796111464500427, + "learning_rate": 0.0015, + "loss": 1.7161, + "step": 2956 + }, + { + "epoch": 0.3119198312236287, + "grad_norm": 0.7278868556022644, + "learning_rate": 0.0015, + "loss": 1.732, + "step": 2957 + }, + { + "epoch": 0.3120253164556962, + "grad_norm": 0.7733821272850037, + "learning_rate": 0.0015, + "loss": 1.6823, + "step": 2958 + }, + { + "epoch": 0.3121308016877637, + "grad_norm": 0.4810079038143158, + "learning_rate": 0.0015, + "loss": 1.7187, + "step": 2959 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.7000905275344849, + "learning_rate": 0.0015, + "loss": 1.7062, + "step": 2960 + }, + { + "epoch": 0.31234177215189873, + "grad_norm": 0.5330955982208252, + "learning_rate": 0.0015, + "loss": 1.6838, + "step": 2961 + }, + { + "epoch": 0.3124472573839662, + "grad_norm": 0.5643135905265808, + "learning_rate": 0.0015, + "loss": 1.7133, + "step": 2962 + }, + { + "epoch": 0.3125527426160338, + "grad_norm": 0.49947667121887207, + "learning_rate": 0.0015, + "loss": 1.7098, + "step": 2963 + }, + { + "epoch": 0.31265822784810127, + "grad_norm": 0.508200466632843, + "learning_rate": 0.0015, + "loss": 1.6731, + "step": 2964 + }, + { + "epoch": 0.31276371308016876, + "grad_norm": 0.721143364906311, + "learning_rate": 0.0015, + "loss": 1.7061, + "step": 2965 + }, + { + "epoch": 0.3128691983122363, + "grad_norm": 0.4859545826911926, + "learning_rate": 0.0015, + "loss": 1.7129, + "step": 2966 + }, + { + "epoch": 0.3129746835443038, + "grad_norm": 0.651206910610199, + "learning_rate": 0.0015, + "loss": 1.7326, + "step": 2967 + }, + { + "epoch": 0.3130801687763713, + "grad_norm": 0.6910227537155151, + "learning_rate": 0.0015, + "loss": 1.7347, + "step": 2968 + }, + { + "epoch": 0.31318565400843884, + "grad_norm": 0.5312160849571228, + "learning_rate": 0.0015, + "loss": 1.6943, + "step": 2969 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.6909522414207458, + "learning_rate": 0.0015, + "loss": 1.7064, + "step": 2970 + }, + { + "epoch": 0.3133966244725738, + "grad_norm": 0.527764618396759, + "learning_rate": 0.0015, + "loss": 1.6689, + "step": 2971 + }, + { + "epoch": 0.3135021097046414, + "grad_norm": 0.5629932284355164, + "learning_rate": 0.0015, + "loss": 1.6881, + "step": 2972 + }, + { + "epoch": 0.31360759493670887, + "grad_norm": 0.5080920457839966, + "learning_rate": 0.0015, + "loss": 1.7148, + "step": 2973 + }, + { + "epoch": 0.31371308016877636, + "grad_norm": 0.5579750537872314, + "learning_rate": 0.0015, + "loss": 1.7264, + "step": 2974 + }, + { + "epoch": 0.3138185654008439, + "grad_norm": 0.6935030817985535, + "learning_rate": 0.0015, + "loss": 1.6895, + "step": 2975 + }, + { + "epoch": 0.3139240506329114, + "grad_norm": 0.47138023376464844, + "learning_rate": 0.0015, + "loss": 1.7101, + "step": 2976 + }, + { + "epoch": 0.3140295358649789, + "grad_norm": 0.5409736037254333, + "learning_rate": 0.0015, + "loss": 1.7181, + "step": 2977 + }, + { + "epoch": 0.31413502109704644, + "grad_norm": 0.5488565564155579, + "learning_rate": 0.0015, + "loss": 1.7199, + "step": 2978 + }, + { + "epoch": 0.31424050632911393, + "grad_norm": 0.5884570479393005, + "learning_rate": 0.0015, + "loss": 1.7214, + "step": 2979 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.5556483268737793, + "learning_rate": 0.0015, + "loss": 1.7449, + "step": 2980 + }, + { + "epoch": 0.3144514767932489, + "grad_norm": 0.5683891177177429, + "learning_rate": 0.0015, + "loss": 1.7487, + "step": 2981 + }, + { + "epoch": 0.31455696202531647, + "grad_norm": 0.6940754652023315, + "learning_rate": 0.0015, + "loss": 1.6898, + "step": 2982 + }, + { + "epoch": 0.31466244725738396, + "grad_norm": 0.5430545806884766, + "learning_rate": 0.0015, + "loss": 1.7081, + "step": 2983 + }, + { + "epoch": 0.31476793248945145, + "grad_norm": 0.5181131958961487, + "learning_rate": 0.0015, + "loss": 1.7108, + "step": 2984 + }, + { + "epoch": 0.314873417721519, + "grad_norm": 0.5396592020988464, + "learning_rate": 0.0015, + "loss": 1.7025, + "step": 2985 + }, + { + "epoch": 0.3149789029535865, + "grad_norm": 0.45588329434394836, + "learning_rate": 0.0015, + "loss": 1.7474, + "step": 2986 + }, + { + "epoch": 0.315084388185654, + "grad_norm": 0.5171492099761963, + "learning_rate": 0.0015, + "loss": 1.7475, + "step": 2987 + }, + { + "epoch": 0.31518987341772153, + "grad_norm": 0.4931403398513794, + "learning_rate": 0.0015, + "loss": 1.7322, + "step": 2988 + }, + { + "epoch": 0.315295358649789, + "grad_norm": 0.5297608375549316, + "learning_rate": 0.0015, + "loss": 1.7583, + "step": 2989 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.4950697422027588, + "learning_rate": 0.0015, + "loss": 1.7387, + "step": 2990 + }, + { + "epoch": 0.31550632911392407, + "grad_norm": 0.5757505893707275, + "learning_rate": 0.0015, + "loss": 1.6847, + "step": 2991 + }, + { + "epoch": 0.31561181434599156, + "grad_norm": 0.5489315390586853, + "learning_rate": 0.0015, + "loss": 1.6818, + "step": 2992 + }, + { + "epoch": 0.31571729957805905, + "grad_norm": 0.5464658141136169, + "learning_rate": 0.0015, + "loss": 1.6652, + "step": 2993 + }, + { + "epoch": 0.3158227848101266, + "grad_norm": 0.5630892515182495, + "learning_rate": 0.0015, + "loss": 1.7377, + "step": 2994 + }, + { + "epoch": 0.3159282700421941, + "grad_norm": 0.6165306568145752, + "learning_rate": 0.0015, + "loss": 1.7545, + "step": 2995 + }, + { + "epoch": 0.3160337552742616, + "grad_norm": 0.5498163104057312, + "learning_rate": 0.0015, + "loss": 1.6951, + "step": 2996 + }, + { + "epoch": 0.31613924050632913, + "grad_norm": 0.5323984622955322, + "learning_rate": 0.0015, + "loss": 1.6957, + "step": 2997 + }, + { + "epoch": 0.3162447257383966, + "grad_norm": 0.5057029128074646, + "learning_rate": 0.0015, + "loss": 1.7063, + "step": 2998 + }, + { + "epoch": 0.3163502109704641, + "grad_norm": 0.4992408752441406, + "learning_rate": 0.0015, + "loss": 1.7293, + "step": 2999 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.4843266010284424, + "learning_rate": 0.0015, + "loss": 1.7005, + "step": 3000 + }, + { + "epoch": 0.31656118143459916, + "grad_norm": 0.5039722323417664, + "learning_rate": 0.0015, + "loss": 1.7098, + "step": 3001 + }, + { + "epoch": 0.31666666666666665, + "grad_norm": 0.5445347428321838, + "learning_rate": 0.0015, + "loss": 1.693, + "step": 3002 + }, + { + "epoch": 0.3167721518987342, + "grad_norm": 0.6046627759933472, + "learning_rate": 0.0015, + "loss": 1.6847, + "step": 3003 + }, + { + "epoch": 0.3168776371308017, + "grad_norm": 0.6104879379272461, + "learning_rate": 0.0015, + "loss": 1.7645, + "step": 3004 + }, + { + "epoch": 0.3169831223628692, + "grad_norm": 0.5497164726257324, + "learning_rate": 0.0015, + "loss": 1.7295, + "step": 3005 + }, + { + "epoch": 0.31708860759493673, + "grad_norm": 0.5327602624893188, + "learning_rate": 0.0015, + "loss": 1.7384, + "step": 3006 + }, + { + "epoch": 0.3171940928270042, + "grad_norm": 0.6222022175788879, + "learning_rate": 0.0015, + "loss": 1.7203, + "step": 3007 + }, + { + "epoch": 0.3172995780590717, + "grad_norm": 0.5037438273429871, + "learning_rate": 0.0015, + "loss": 1.7107, + "step": 3008 + }, + { + "epoch": 0.31740506329113927, + "grad_norm": 0.6280902028083801, + "learning_rate": 0.0015, + "loss": 1.702, + "step": 3009 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.7413647770881653, + "learning_rate": 0.0015, + "loss": 1.7166, + "step": 3010 + }, + { + "epoch": 0.31761603375527425, + "grad_norm": 0.4961790144443512, + "learning_rate": 0.0015, + "loss": 1.6976, + "step": 3011 + }, + { + "epoch": 0.31772151898734174, + "grad_norm": 0.7486870884895325, + "learning_rate": 0.0015, + "loss": 1.7016, + "step": 3012 + }, + { + "epoch": 0.3178270042194093, + "grad_norm": 0.7110238075256348, + "learning_rate": 0.0015, + "loss": 1.6876, + "step": 3013 + }, + { + "epoch": 0.3179324894514768, + "grad_norm": 0.5200576186180115, + "learning_rate": 0.0015, + "loss": 1.7166, + "step": 3014 + }, + { + "epoch": 0.3180379746835443, + "grad_norm": 0.8104680180549622, + "learning_rate": 0.0015, + "loss": 1.7449, + "step": 3015 + }, + { + "epoch": 0.3181434599156118, + "grad_norm": 0.7071999311447144, + "learning_rate": 0.0015, + "loss": 1.7138, + "step": 3016 + }, + { + "epoch": 0.3182489451476793, + "grad_norm": 0.5642274022102356, + "learning_rate": 0.0015, + "loss": 1.7436, + "step": 3017 + }, + { + "epoch": 0.3183544303797468, + "grad_norm": 0.8863946795463562, + "learning_rate": 0.0015, + "loss": 1.7472, + "step": 3018 + }, + { + "epoch": 0.31845991561181436, + "grad_norm": 0.8234111070632935, + "learning_rate": 0.0015, + "loss": 1.7178, + "step": 3019 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.6567312479019165, + "learning_rate": 0.0015, + "loss": 1.7014, + "step": 3020 + }, + { + "epoch": 0.31867088607594934, + "grad_norm": 0.8978010416030884, + "learning_rate": 0.0015, + "loss": 1.6899, + "step": 3021 + }, + { + "epoch": 0.3187763713080169, + "grad_norm": 0.7658066153526306, + "learning_rate": 0.0015, + "loss": 1.7148, + "step": 3022 + }, + { + "epoch": 0.3188818565400844, + "grad_norm": 0.4774928390979767, + "learning_rate": 0.0015, + "loss": 1.6676, + "step": 3023 + }, + { + "epoch": 0.3189873417721519, + "grad_norm": 0.7254661321640015, + "learning_rate": 0.0015, + "loss": 1.7017, + "step": 3024 + }, + { + "epoch": 0.3190928270042194, + "grad_norm": 0.536165714263916, + "learning_rate": 0.0015, + "loss": 1.6692, + "step": 3025 + }, + { + "epoch": 0.3191983122362869, + "grad_norm": 0.663199245929718, + "learning_rate": 0.0015, + "loss": 1.7304, + "step": 3026 + }, + { + "epoch": 0.3193037974683544, + "grad_norm": 0.654486894607544, + "learning_rate": 0.0015, + "loss": 1.7107, + "step": 3027 + }, + { + "epoch": 0.31940928270042196, + "grad_norm": 0.6329241991043091, + "learning_rate": 0.0015, + "loss": 1.7341, + "step": 3028 + }, + { + "epoch": 0.31951476793248945, + "grad_norm": 0.5985587239265442, + "learning_rate": 0.0015, + "loss": 1.6952, + "step": 3029 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.8229667544364929, + "learning_rate": 0.0015, + "loss": 1.747, + "step": 3030 + }, + { + "epoch": 0.3197257383966245, + "grad_norm": 0.5880804657936096, + "learning_rate": 0.0015, + "loss": 1.7174, + "step": 3031 + }, + { + "epoch": 0.319831223628692, + "grad_norm": 0.5924350023269653, + "learning_rate": 0.0015, + "loss": 1.7108, + "step": 3032 + }, + { + "epoch": 0.3199367088607595, + "grad_norm": 0.5878428220748901, + "learning_rate": 0.0015, + "loss": 1.6887, + "step": 3033 + }, + { + "epoch": 0.320042194092827, + "grad_norm": 0.5516447424888611, + "learning_rate": 0.0015, + "loss": 1.7428, + "step": 3034 + }, + { + "epoch": 0.3201476793248945, + "grad_norm": 0.7194355130195618, + "learning_rate": 0.0015, + "loss": 1.7037, + "step": 3035 + }, + { + "epoch": 0.320253164556962, + "grad_norm": 0.47816890478134155, + "learning_rate": 0.0015, + "loss": 1.7629, + "step": 3036 + }, + { + "epoch": 0.32035864978902956, + "grad_norm": 0.6952998638153076, + "learning_rate": 0.0015, + "loss": 1.7469, + "step": 3037 + }, + { + "epoch": 0.32046413502109705, + "grad_norm": 0.5314906239509583, + "learning_rate": 0.0015, + "loss": 1.6811, + "step": 3038 + }, + { + "epoch": 0.32056962025316454, + "grad_norm": 0.5869095921516418, + "learning_rate": 0.0015, + "loss": 1.693, + "step": 3039 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.5194697380065918, + "learning_rate": 0.0015, + "loss": 1.6861, + "step": 3040 + }, + { + "epoch": 0.3207805907172996, + "grad_norm": 0.641189694404602, + "learning_rate": 0.0015, + "loss": 1.7105, + "step": 3041 + }, + { + "epoch": 0.3208860759493671, + "grad_norm": 0.5106596946716309, + "learning_rate": 0.0015, + "loss": 1.7043, + "step": 3042 + }, + { + "epoch": 0.3209915611814346, + "grad_norm": 0.6200544238090515, + "learning_rate": 0.0015, + "loss": 1.7395, + "step": 3043 + }, + { + "epoch": 0.3210970464135021, + "grad_norm": 0.5583987832069397, + "learning_rate": 0.0015, + "loss": 1.7112, + "step": 3044 + }, + { + "epoch": 0.3212025316455696, + "grad_norm": 0.5801132321357727, + "learning_rate": 0.0015, + "loss": 1.7095, + "step": 3045 + }, + { + "epoch": 0.3213080168776371, + "grad_norm": 0.536475419998169, + "learning_rate": 0.0015, + "loss": 1.757, + "step": 3046 + }, + { + "epoch": 0.32141350210970465, + "grad_norm": 0.5250360369682312, + "learning_rate": 0.0015, + "loss": 1.6634, + "step": 3047 + }, + { + "epoch": 0.32151898734177214, + "grad_norm": 0.5058603882789612, + "learning_rate": 0.0015, + "loss": 1.7073, + "step": 3048 + }, + { + "epoch": 0.32162447257383964, + "grad_norm": 0.5103098154067993, + "learning_rate": 0.0015, + "loss": 1.6794, + "step": 3049 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.4832077622413635, + "learning_rate": 0.0015, + "loss": 1.7062, + "step": 3050 + }, + { + "epoch": 0.3218354430379747, + "grad_norm": 0.5129528641700745, + "learning_rate": 0.0015, + "loss": 1.699, + "step": 3051 + }, + { + "epoch": 0.32194092827004217, + "grad_norm": 0.5048354268074036, + "learning_rate": 0.0015, + "loss": 1.6927, + "step": 3052 + }, + { + "epoch": 0.3220464135021097, + "grad_norm": 0.5547788143157959, + "learning_rate": 0.0015, + "loss": 1.7307, + "step": 3053 + }, + { + "epoch": 0.3221518987341772, + "grad_norm": 0.5981379151344299, + "learning_rate": 0.0015, + "loss": 1.7147, + "step": 3054 + }, + { + "epoch": 0.3222573839662447, + "grad_norm": 0.46103549003601074, + "learning_rate": 0.0015, + "loss": 1.7425, + "step": 3055 + }, + { + "epoch": 0.32236286919831225, + "grad_norm": 0.601043701171875, + "learning_rate": 0.0015, + "loss": 1.7254, + "step": 3056 + }, + { + "epoch": 0.32246835443037974, + "grad_norm": 0.5374560952186584, + "learning_rate": 0.0015, + "loss": 1.7188, + "step": 3057 + }, + { + "epoch": 0.32257383966244724, + "grad_norm": 0.4482406973838806, + "learning_rate": 0.0015, + "loss": 1.7083, + "step": 3058 + }, + { + "epoch": 0.3226793248945148, + "grad_norm": 0.5059371590614319, + "learning_rate": 0.0015, + "loss": 1.7242, + "step": 3059 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5071547031402588, + "learning_rate": 0.0015, + "loss": 1.7208, + "step": 3060 + }, + { + "epoch": 0.32289029535864977, + "grad_norm": 0.46012645959854126, + "learning_rate": 0.0015, + "loss": 1.6941, + "step": 3061 + }, + { + "epoch": 0.3229957805907173, + "grad_norm": 0.5413433909416199, + "learning_rate": 0.0015, + "loss": 1.6894, + "step": 3062 + }, + { + "epoch": 0.3231012658227848, + "grad_norm": 0.5513799786567688, + "learning_rate": 0.0015, + "loss": 1.7154, + "step": 3063 + }, + { + "epoch": 0.3232067510548523, + "grad_norm": 0.4934634864330292, + "learning_rate": 0.0015, + "loss": 1.7136, + "step": 3064 + }, + { + "epoch": 0.32331223628691985, + "grad_norm": 0.49460723996162415, + "learning_rate": 0.0015, + "loss": 1.6926, + "step": 3065 + }, + { + "epoch": 0.32341772151898734, + "grad_norm": 0.5192188024520874, + "learning_rate": 0.0015, + "loss": 1.707, + "step": 3066 + }, + { + "epoch": 0.32352320675105484, + "grad_norm": 0.4758225381374359, + "learning_rate": 0.0015, + "loss": 1.7233, + "step": 3067 + }, + { + "epoch": 0.3236286919831224, + "grad_norm": 0.4821212887763977, + "learning_rate": 0.0015, + "loss": 1.7053, + "step": 3068 + }, + { + "epoch": 0.3237341772151899, + "grad_norm": 0.4905916154384613, + "learning_rate": 0.0015, + "loss": 1.7183, + "step": 3069 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5138663649559021, + "learning_rate": 0.0015, + "loss": 1.7293, + "step": 3070 + }, + { + "epoch": 0.3239451476793249, + "grad_norm": 0.5315477848052979, + "learning_rate": 0.0015, + "loss": 1.7419, + "step": 3071 + }, + { + "epoch": 0.3240506329113924, + "grad_norm": 0.4920712113380432, + "learning_rate": 0.0015, + "loss": 1.6559, + "step": 3072 + }, + { + "epoch": 0.3241561181434599, + "grad_norm": 0.48032838106155396, + "learning_rate": 0.0015, + "loss": 1.7018, + "step": 3073 + }, + { + "epoch": 0.32426160337552745, + "grad_norm": 0.5117155909538269, + "learning_rate": 0.0015, + "loss": 1.7267, + "step": 3074 + }, + { + "epoch": 0.32436708860759494, + "grad_norm": 0.4488002359867096, + "learning_rate": 0.0015, + "loss": 1.6693, + "step": 3075 + }, + { + "epoch": 0.32447257383966244, + "grad_norm": 0.48116782307624817, + "learning_rate": 0.0015, + "loss": 1.6683, + "step": 3076 + }, + { + "epoch": 0.32457805907173, + "grad_norm": 0.5115820169448853, + "learning_rate": 0.0015, + "loss": 1.6754, + "step": 3077 + }, + { + "epoch": 0.3246835443037975, + "grad_norm": 0.49160560965538025, + "learning_rate": 0.0015, + "loss": 1.6932, + "step": 3078 + }, + { + "epoch": 0.32478902953586497, + "grad_norm": 0.5501735210418701, + "learning_rate": 0.0015, + "loss": 1.6965, + "step": 3079 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.5930479764938354, + "learning_rate": 0.0015, + "loss": 1.749, + "step": 3080 + }, + { + "epoch": 0.325, + "grad_norm": 0.4873003363609314, + "learning_rate": 0.0015, + "loss": 1.6475, + "step": 3081 + }, + { + "epoch": 0.3251054852320675, + "grad_norm": 0.5444014072418213, + "learning_rate": 0.0015, + "loss": 1.7198, + "step": 3082 + }, + { + "epoch": 0.325210970464135, + "grad_norm": 0.6106745600700378, + "learning_rate": 0.0015, + "loss": 1.7062, + "step": 3083 + }, + { + "epoch": 0.32531645569620254, + "grad_norm": 0.45090463757514954, + "learning_rate": 0.0015, + "loss": 1.7129, + "step": 3084 + }, + { + "epoch": 0.32542194092827004, + "grad_norm": 0.5389440655708313, + "learning_rate": 0.0015, + "loss": 1.7278, + "step": 3085 + }, + { + "epoch": 0.32552742616033753, + "grad_norm": 0.5276144742965698, + "learning_rate": 0.0015, + "loss": 1.7077, + "step": 3086 + }, + { + "epoch": 0.3256329113924051, + "grad_norm": 0.45780766010284424, + "learning_rate": 0.0015, + "loss": 1.761, + "step": 3087 + }, + { + "epoch": 0.32573839662447257, + "grad_norm": 0.5008159279823303, + "learning_rate": 0.0015, + "loss": 1.6925, + "step": 3088 + }, + { + "epoch": 0.32584388185654006, + "grad_norm": 0.49904295802116394, + "learning_rate": 0.0015, + "loss": 1.7068, + "step": 3089 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.504729151725769, + "learning_rate": 0.0015, + "loss": 1.7373, + "step": 3090 + }, + { + "epoch": 0.3260548523206751, + "grad_norm": 0.494392991065979, + "learning_rate": 0.0015, + "loss": 1.7291, + "step": 3091 + }, + { + "epoch": 0.3261603375527426, + "grad_norm": 0.46707990765571594, + "learning_rate": 0.0015, + "loss": 1.6951, + "step": 3092 + }, + { + "epoch": 0.32626582278481014, + "grad_norm": 0.4759694039821625, + "learning_rate": 0.0015, + "loss": 1.6798, + "step": 3093 + }, + { + "epoch": 0.32637130801687764, + "grad_norm": 0.4642212986946106, + "learning_rate": 0.0015, + "loss": 1.6961, + "step": 3094 + }, + { + "epoch": 0.32647679324894513, + "grad_norm": 0.47130927443504333, + "learning_rate": 0.0015, + "loss": 1.694, + "step": 3095 + }, + { + "epoch": 0.3265822784810127, + "grad_norm": 0.5033254027366638, + "learning_rate": 0.0015, + "loss": 1.7039, + "step": 3096 + }, + { + "epoch": 0.32668776371308017, + "grad_norm": 0.5418142080307007, + "learning_rate": 0.0015, + "loss": 1.6881, + "step": 3097 + }, + { + "epoch": 0.32679324894514766, + "grad_norm": 0.4699496924877167, + "learning_rate": 0.0015, + "loss": 1.7271, + "step": 3098 + }, + { + "epoch": 0.3268987341772152, + "grad_norm": 0.7430257797241211, + "learning_rate": 0.0015, + "loss": 1.6879, + "step": 3099 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.8292415142059326, + "learning_rate": 0.0015, + "loss": 1.7252, + "step": 3100 + }, + { + "epoch": 0.3271097046413502, + "grad_norm": 0.5245999097824097, + "learning_rate": 0.0015, + "loss": 1.7084, + "step": 3101 + }, + { + "epoch": 0.32721518987341774, + "grad_norm": 0.5449711084365845, + "learning_rate": 0.0015, + "loss": 1.717, + "step": 3102 + }, + { + "epoch": 0.32732067510548524, + "grad_norm": 0.6642726063728333, + "learning_rate": 0.0015, + "loss": 1.7238, + "step": 3103 + }, + { + "epoch": 0.32742616033755273, + "grad_norm": 0.5721220970153809, + "learning_rate": 0.0015, + "loss": 1.69, + "step": 3104 + }, + { + "epoch": 0.3275316455696203, + "grad_norm": 0.5738294124603271, + "learning_rate": 0.0015, + "loss": 1.6925, + "step": 3105 + }, + { + "epoch": 0.32763713080168777, + "grad_norm": 0.5988900065422058, + "learning_rate": 0.0015, + "loss": 1.7273, + "step": 3106 + }, + { + "epoch": 0.32774261603375526, + "grad_norm": 0.4937974214553833, + "learning_rate": 0.0015, + "loss": 1.6868, + "step": 3107 + }, + { + "epoch": 0.3278481012658228, + "grad_norm": 0.684008002281189, + "learning_rate": 0.0015, + "loss": 1.6909, + "step": 3108 + }, + { + "epoch": 0.3279535864978903, + "grad_norm": 0.5166898965835571, + "learning_rate": 0.0015, + "loss": 1.7227, + "step": 3109 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.661108136177063, + "learning_rate": 0.0015, + "loss": 1.7195, + "step": 3110 + }, + { + "epoch": 0.3281645569620253, + "grad_norm": 0.5461713075637817, + "learning_rate": 0.0015, + "loss": 1.7208, + "step": 3111 + }, + { + "epoch": 0.32827004219409284, + "grad_norm": 0.5846986174583435, + "learning_rate": 0.0015, + "loss": 1.7103, + "step": 3112 + }, + { + "epoch": 0.32837552742616033, + "grad_norm": 0.6091597676277161, + "learning_rate": 0.0015, + "loss": 1.7008, + "step": 3113 + }, + { + "epoch": 0.3284810126582278, + "grad_norm": 0.6286957859992981, + "learning_rate": 0.0015, + "loss": 1.7222, + "step": 3114 + }, + { + "epoch": 0.32858649789029537, + "grad_norm": 0.6247303485870361, + "learning_rate": 0.0015, + "loss": 1.7073, + "step": 3115 + }, + { + "epoch": 0.32869198312236286, + "grad_norm": 0.57159024477005, + "learning_rate": 0.0015, + "loss": 1.678, + "step": 3116 + }, + { + "epoch": 0.32879746835443036, + "grad_norm": 0.735164999961853, + "learning_rate": 0.0015, + "loss": 1.7103, + "step": 3117 + }, + { + "epoch": 0.3289029535864979, + "grad_norm": 0.5963243246078491, + "learning_rate": 0.0015, + "loss": 1.6868, + "step": 3118 + }, + { + "epoch": 0.3290084388185654, + "grad_norm": 0.5325501561164856, + "learning_rate": 0.0015, + "loss": 1.7046, + "step": 3119 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.7457340955734253, + "learning_rate": 0.0015, + "loss": 1.7651, + "step": 3120 + }, + { + "epoch": 0.32921940928270044, + "grad_norm": 0.6895079016685486, + "learning_rate": 0.0015, + "loss": 1.7074, + "step": 3121 + }, + { + "epoch": 0.32932489451476793, + "grad_norm": 0.5428807735443115, + "learning_rate": 0.0015, + "loss": 1.689, + "step": 3122 + }, + { + "epoch": 0.3294303797468354, + "grad_norm": 0.5404313802719116, + "learning_rate": 0.0015, + "loss": 1.7589, + "step": 3123 + }, + { + "epoch": 0.32953586497890297, + "grad_norm": 0.5746237635612488, + "learning_rate": 0.0015, + "loss": 1.686, + "step": 3124 + }, + { + "epoch": 0.32964135021097046, + "grad_norm": 0.49167463183403015, + "learning_rate": 0.0015, + "loss": 1.743, + "step": 3125 + }, + { + "epoch": 0.32974683544303796, + "grad_norm": 0.7660045623779297, + "learning_rate": 0.0015, + "loss": 1.7277, + "step": 3126 + }, + { + "epoch": 0.3298523206751055, + "grad_norm": 0.9002141356468201, + "learning_rate": 0.0015, + "loss": 1.7226, + "step": 3127 + }, + { + "epoch": 0.329957805907173, + "grad_norm": 0.4946906268596649, + "learning_rate": 0.0015, + "loss": 1.7029, + "step": 3128 + }, + { + "epoch": 0.3300632911392405, + "grad_norm": 0.8130706548690796, + "learning_rate": 0.0015, + "loss": 1.7452, + "step": 3129 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6559513211250305, + "learning_rate": 0.0015, + "loss": 1.7304, + "step": 3130 + }, + { + "epoch": 0.33027426160337553, + "grad_norm": 0.5682690143585205, + "learning_rate": 0.0015, + "loss": 1.6972, + "step": 3131 + }, + { + "epoch": 0.330379746835443, + "grad_norm": 0.88380366563797, + "learning_rate": 0.0015, + "loss": 1.7255, + "step": 3132 + }, + { + "epoch": 0.33048523206751057, + "grad_norm": 0.8223167061805725, + "learning_rate": 0.0015, + "loss": 1.6579, + "step": 3133 + }, + { + "epoch": 0.33059071729957806, + "grad_norm": 0.46287649869918823, + "learning_rate": 0.0015, + "loss": 1.734, + "step": 3134 + }, + { + "epoch": 0.33069620253164556, + "grad_norm": 0.6548208594322205, + "learning_rate": 0.0015, + "loss": 1.6651, + "step": 3135 + }, + { + "epoch": 0.3308016877637131, + "grad_norm": 0.6544517874717712, + "learning_rate": 0.0015, + "loss": 1.6888, + "step": 3136 + }, + { + "epoch": 0.3309071729957806, + "grad_norm": 0.4662328064441681, + "learning_rate": 0.0015, + "loss": 1.6692, + "step": 3137 + }, + { + "epoch": 0.3310126582278481, + "grad_norm": 0.6554684042930603, + "learning_rate": 0.0015, + "loss": 1.7382, + "step": 3138 + }, + { + "epoch": 0.33111814345991564, + "grad_norm": 0.7112213373184204, + "learning_rate": 0.0015, + "loss": 1.7187, + "step": 3139 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.6192386150360107, + "learning_rate": 0.0015, + "loss": 1.6896, + "step": 3140 + }, + { + "epoch": 0.3313291139240506, + "grad_norm": 0.5745393633842468, + "learning_rate": 0.0015, + "loss": 1.7242, + "step": 3141 + }, + { + "epoch": 0.33143459915611817, + "grad_norm": 0.5276301503181458, + "learning_rate": 0.0015, + "loss": 1.6827, + "step": 3142 + }, + { + "epoch": 0.33154008438818566, + "grad_norm": 0.49275004863739014, + "learning_rate": 0.0015, + "loss": 1.7192, + "step": 3143 + }, + { + "epoch": 0.33164556962025316, + "grad_norm": 0.5142596364021301, + "learning_rate": 0.0015, + "loss": 1.7245, + "step": 3144 + }, + { + "epoch": 0.33175105485232065, + "grad_norm": 0.4448336362838745, + "learning_rate": 0.0015, + "loss": 1.7213, + "step": 3145 + }, + { + "epoch": 0.3318565400843882, + "grad_norm": 0.554429829120636, + "learning_rate": 0.0015, + "loss": 1.7104, + "step": 3146 + }, + { + "epoch": 0.3319620253164557, + "grad_norm": 0.515129804611206, + "learning_rate": 0.0015, + "loss": 1.6915, + "step": 3147 + }, + { + "epoch": 0.3320675105485232, + "grad_norm": 0.5072916746139526, + "learning_rate": 0.0015, + "loss": 1.6949, + "step": 3148 + }, + { + "epoch": 0.33217299578059073, + "grad_norm": 0.5899186134338379, + "learning_rate": 0.0015, + "loss": 1.6962, + "step": 3149 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.4840143620967865, + "learning_rate": 0.0015, + "loss": 1.6973, + "step": 3150 + }, + { + "epoch": 0.3323839662447257, + "grad_norm": 0.6333123445510864, + "learning_rate": 0.0015, + "loss": 1.7048, + "step": 3151 + }, + { + "epoch": 0.33248945147679326, + "grad_norm": 0.6574004292488098, + "learning_rate": 0.0015, + "loss": 1.6943, + "step": 3152 + }, + { + "epoch": 0.33259493670886076, + "grad_norm": 0.5177522301673889, + "learning_rate": 0.0015, + "loss": 1.7207, + "step": 3153 + }, + { + "epoch": 0.33270042194092825, + "grad_norm": 0.5760917663574219, + "learning_rate": 0.0015, + "loss": 1.7131, + "step": 3154 + }, + { + "epoch": 0.3328059071729958, + "grad_norm": 0.5342174172401428, + "learning_rate": 0.0015, + "loss": 1.7097, + "step": 3155 + }, + { + "epoch": 0.3329113924050633, + "grad_norm": 0.5675168037414551, + "learning_rate": 0.0015, + "loss": 1.7096, + "step": 3156 + }, + { + "epoch": 0.3330168776371308, + "grad_norm": 0.6600531339645386, + "learning_rate": 0.0015, + "loss": 1.7023, + "step": 3157 + }, + { + "epoch": 0.33312236286919833, + "grad_norm": 0.5559056401252747, + "learning_rate": 0.0015, + "loss": 1.6869, + "step": 3158 + }, + { + "epoch": 0.3332278481012658, + "grad_norm": 0.57248455286026, + "learning_rate": 0.0015, + "loss": 1.7187, + "step": 3159 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5283870697021484, + "learning_rate": 0.0015, + "loss": 1.7089, + "step": 3160 + }, + { + "epoch": 0.33343881856540086, + "grad_norm": 0.4742368459701538, + "learning_rate": 0.0015, + "loss": 1.7136, + "step": 3161 + }, + { + "epoch": 0.33354430379746836, + "grad_norm": 0.7028859853744507, + "learning_rate": 0.0015, + "loss": 1.707, + "step": 3162 + }, + { + "epoch": 0.33364978902953585, + "grad_norm": 0.7126950621604919, + "learning_rate": 0.0015, + "loss": 1.7071, + "step": 3163 + }, + { + "epoch": 0.3337552742616034, + "grad_norm": 0.4972867965698242, + "learning_rate": 0.0015, + "loss": 1.716, + "step": 3164 + }, + { + "epoch": 0.3338607594936709, + "grad_norm": 0.7651951909065247, + "learning_rate": 0.0015, + "loss": 1.7067, + "step": 3165 + }, + { + "epoch": 0.3339662447257384, + "grad_norm": 0.5141854882240295, + "learning_rate": 0.0015, + "loss": 1.7433, + "step": 3166 + }, + { + "epoch": 0.33407172995780593, + "grad_norm": 0.6878163814544678, + "learning_rate": 0.0015, + "loss": 1.6786, + "step": 3167 + }, + { + "epoch": 0.3341772151898734, + "grad_norm": 0.7308759093284607, + "learning_rate": 0.0015, + "loss": 1.7179, + "step": 3168 + }, + { + "epoch": 0.3342827004219409, + "grad_norm": 0.5225549340248108, + "learning_rate": 0.0015, + "loss": 1.7305, + "step": 3169 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.5995153784751892, + "learning_rate": 0.0015, + "loss": 1.6682, + "step": 3170 + }, + { + "epoch": 0.33449367088607596, + "grad_norm": 0.5042096972465515, + "learning_rate": 0.0015, + "loss": 1.6979, + "step": 3171 + }, + { + "epoch": 0.33459915611814345, + "grad_norm": 0.5545108914375305, + "learning_rate": 0.0015, + "loss": 1.6876, + "step": 3172 + }, + { + "epoch": 0.334704641350211, + "grad_norm": 0.7023755311965942, + "learning_rate": 0.0015, + "loss": 1.7053, + "step": 3173 + }, + { + "epoch": 0.3348101265822785, + "grad_norm": 0.7239559888839722, + "learning_rate": 0.0015, + "loss": 1.7178, + "step": 3174 + }, + { + "epoch": 0.334915611814346, + "grad_norm": 0.6107579469680786, + "learning_rate": 0.0015, + "loss": 1.7103, + "step": 3175 + }, + { + "epoch": 0.33502109704641353, + "grad_norm": 0.4597165584564209, + "learning_rate": 0.0015, + "loss": 1.7002, + "step": 3176 + }, + { + "epoch": 0.335126582278481, + "grad_norm": 0.5891026258468628, + "learning_rate": 0.0015, + "loss": 1.7052, + "step": 3177 + }, + { + "epoch": 0.3352320675105485, + "grad_norm": 0.47593066096305847, + "learning_rate": 0.0015, + "loss": 1.6954, + "step": 3178 + }, + { + "epoch": 0.335337552742616, + "grad_norm": 0.5646541118621826, + "learning_rate": 0.0015, + "loss": 1.6659, + "step": 3179 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.503830075263977, + "learning_rate": 0.0015, + "loss": 1.7446, + "step": 3180 + }, + { + "epoch": 0.33554852320675105, + "grad_norm": 0.631014883518219, + "learning_rate": 0.0015, + "loss": 1.6698, + "step": 3181 + }, + { + "epoch": 0.33565400843881854, + "grad_norm": 0.7992089986801147, + "learning_rate": 0.0015, + "loss": 1.6867, + "step": 3182 + }, + { + "epoch": 0.3357594936708861, + "grad_norm": 0.6834596991539001, + "learning_rate": 0.0015, + "loss": 1.7043, + "step": 3183 + }, + { + "epoch": 0.3358649789029536, + "grad_norm": 0.46089988946914673, + "learning_rate": 0.0015, + "loss": 1.7046, + "step": 3184 + }, + { + "epoch": 0.3359704641350211, + "grad_norm": 0.6685697436332703, + "learning_rate": 0.0015, + "loss": 1.7151, + "step": 3185 + }, + { + "epoch": 0.3360759493670886, + "grad_norm": 0.6084441542625427, + "learning_rate": 0.0015, + "loss": 1.7049, + "step": 3186 + }, + { + "epoch": 0.3361814345991561, + "grad_norm": 0.45125362277030945, + "learning_rate": 0.0015, + "loss": 1.7253, + "step": 3187 + }, + { + "epoch": 0.3362869198312236, + "grad_norm": 0.5677869319915771, + "learning_rate": 0.0015, + "loss": 1.708, + "step": 3188 + }, + { + "epoch": 0.33639240506329116, + "grad_norm": 0.671842098236084, + "learning_rate": 0.0015, + "loss": 1.6861, + "step": 3189 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.5831478238105774, + "learning_rate": 0.0015, + "loss": 1.734, + "step": 3190 + }, + { + "epoch": 0.33660337552742614, + "grad_norm": 0.4758757948875427, + "learning_rate": 0.0015, + "loss": 1.6978, + "step": 3191 + }, + { + "epoch": 0.3367088607594937, + "grad_norm": 0.5622855424880981, + "learning_rate": 0.0015, + "loss": 1.6779, + "step": 3192 + }, + { + "epoch": 0.3368143459915612, + "grad_norm": 0.5027768015861511, + "learning_rate": 0.0015, + "loss": 1.7407, + "step": 3193 + }, + { + "epoch": 0.3369198312236287, + "grad_norm": 0.5022798776626587, + "learning_rate": 0.0015, + "loss": 1.7238, + "step": 3194 + }, + { + "epoch": 0.3370253164556962, + "grad_norm": 0.4664076864719391, + "learning_rate": 0.0015, + "loss": 1.6678, + "step": 3195 + }, + { + "epoch": 0.3371308016877637, + "grad_norm": 0.4845595061779022, + "learning_rate": 0.0015, + "loss": 1.7356, + "step": 3196 + }, + { + "epoch": 0.3372362869198312, + "grad_norm": 0.5015625357627869, + "learning_rate": 0.0015, + "loss": 1.6471, + "step": 3197 + }, + { + "epoch": 0.33734177215189876, + "grad_norm": 0.5129271745681763, + "learning_rate": 0.0015, + "loss": 1.7026, + "step": 3198 + }, + { + "epoch": 0.33744725738396625, + "grad_norm": 0.5271135568618774, + "learning_rate": 0.0015, + "loss": 1.6674, + "step": 3199 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.503288209438324, + "learning_rate": 0.0015, + "loss": 1.707, + "step": 3200 + }, + { + "epoch": 0.3376582278481013, + "grad_norm": 0.45782169699668884, + "learning_rate": 0.0015, + "loss": 1.6913, + "step": 3201 + }, + { + "epoch": 0.3377637130801688, + "grad_norm": 0.5114582777023315, + "learning_rate": 0.0015, + "loss": 1.737, + "step": 3202 + }, + { + "epoch": 0.3378691983122363, + "grad_norm": 0.5901846289634705, + "learning_rate": 0.0015, + "loss": 1.7242, + "step": 3203 + }, + { + "epoch": 0.3379746835443038, + "grad_norm": 0.672936737537384, + "learning_rate": 0.0015, + "loss": 1.7104, + "step": 3204 + }, + { + "epoch": 0.3380801687763713, + "grad_norm": 0.5681142210960388, + "learning_rate": 0.0015, + "loss": 1.7256, + "step": 3205 + }, + { + "epoch": 0.3381856540084388, + "grad_norm": 0.5117339491844177, + "learning_rate": 0.0015, + "loss": 1.7215, + "step": 3206 + }, + { + "epoch": 0.33829113924050636, + "grad_norm": 0.5524427890777588, + "learning_rate": 0.0015, + "loss": 1.7258, + "step": 3207 + }, + { + "epoch": 0.33839662447257385, + "grad_norm": 0.5113054513931274, + "learning_rate": 0.0015, + "loss": 1.6759, + "step": 3208 + }, + { + "epoch": 0.33850210970464134, + "grad_norm": 0.4930611848831177, + "learning_rate": 0.0015, + "loss": 1.6929, + "step": 3209 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.5216551423072815, + "learning_rate": 0.0015, + "loss": 1.7071, + "step": 3210 + }, + { + "epoch": 0.3387130801687764, + "grad_norm": 0.5543256402015686, + "learning_rate": 0.0015, + "loss": 1.6885, + "step": 3211 + }, + { + "epoch": 0.3388185654008439, + "grad_norm": 0.5151799321174622, + "learning_rate": 0.0015, + "loss": 1.684, + "step": 3212 + }, + { + "epoch": 0.33892405063291137, + "grad_norm": 0.49063053727149963, + "learning_rate": 0.0015, + "loss": 1.682, + "step": 3213 + }, + { + "epoch": 0.3390295358649789, + "grad_norm": 0.5205222368240356, + "learning_rate": 0.0015, + "loss": 1.733, + "step": 3214 + }, + { + "epoch": 0.3391350210970464, + "grad_norm": 0.5935371518135071, + "learning_rate": 0.0015, + "loss": 1.7125, + "step": 3215 + }, + { + "epoch": 0.3392405063291139, + "grad_norm": 0.6582415103912354, + "learning_rate": 0.0015, + "loss": 1.7074, + "step": 3216 + }, + { + "epoch": 0.33934599156118145, + "grad_norm": 0.5578335523605347, + "learning_rate": 0.0015, + "loss": 1.6992, + "step": 3217 + }, + { + "epoch": 0.33945147679324894, + "grad_norm": 0.5155923366546631, + "learning_rate": 0.0015, + "loss": 1.7109, + "step": 3218 + }, + { + "epoch": 0.33955696202531643, + "grad_norm": 0.6253606677055359, + "learning_rate": 0.0015, + "loss": 1.6796, + "step": 3219 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.5452165603637695, + "learning_rate": 0.0015, + "loss": 1.7119, + "step": 3220 + }, + { + "epoch": 0.3397679324894515, + "grad_norm": 0.4783080220222473, + "learning_rate": 0.0015, + "loss": 1.7015, + "step": 3221 + }, + { + "epoch": 0.33987341772151897, + "grad_norm": 0.605499804019928, + "learning_rate": 0.0015, + "loss": 1.7164, + "step": 3222 + }, + { + "epoch": 0.3399789029535865, + "grad_norm": 0.6417616009712219, + "learning_rate": 0.0015, + "loss": 1.7096, + "step": 3223 + }, + { + "epoch": 0.340084388185654, + "grad_norm": 0.49299174547195435, + "learning_rate": 0.0015, + "loss": 1.6775, + "step": 3224 + }, + { + "epoch": 0.3401898734177215, + "grad_norm": 0.6858759522438049, + "learning_rate": 0.0015, + "loss": 1.7109, + "step": 3225 + }, + { + "epoch": 0.34029535864978905, + "grad_norm": 0.7292691469192505, + "learning_rate": 0.0015, + "loss": 1.6802, + "step": 3226 + }, + { + "epoch": 0.34040084388185654, + "grad_norm": 0.5831139087677002, + "learning_rate": 0.0015, + "loss": 1.6743, + "step": 3227 + }, + { + "epoch": 0.34050632911392403, + "grad_norm": 0.5639909505844116, + "learning_rate": 0.0015, + "loss": 1.6706, + "step": 3228 + }, + { + "epoch": 0.3406118143459916, + "grad_norm": 0.7858826518058777, + "learning_rate": 0.0015, + "loss": 1.7151, + "step": 3229 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.4706067442893982, + "learning_rate": 0.0015, + "loss": 1.7146, + "step": 3230 + }, + { + "epoch": 0.34082278481012657, + "grad_norm": 0.8369712233543396, + "learning_rate": 0.0015, + "loss": 1.686, + "step": 3231 + }, + { + "epoch": 0.3409282700421941, + "grad_norm": 0.716052234172821, + "learning_rate": 0.0015, + "loss": 1.6772, + "step": 3232 + }, + { + "epoch": 0.3410337552742616, + "grad_norm": 0.5431948900222778, + "learning_rate": 0.0015, + "loss": 1.6752, + "step": 3233 + }, + { + "epoch": 0.3411392405063291, + "grad_norm": 0.8990529775619507, + "learning_rate": 0.0015, + "loss": 1.6745, + "step": 3234 + }, + { + "epoch": 0.34124472573839665, + "grad_norm": 0.6069479584693909, + "learning_rate": 0.0015, + "loss": 1.6627, + "step": 3235 + }, + { + "epoch": 0.34135021097046414, + "grad_norm": 0.6533997654914856, + "learning_rate": 0.0015, + "loss": 1.7082, + "step": 3236 + }, + { + "epoch": 0.34145569620253163, + "grad_norm": 0.7731319665908813, + "learning_rate": 0.0015, + "loss": 1.6739, + "step": 3237 + }, + { + "epoch": 0.3415611814345992, + "grad_norm": 0.7820937037467957, + "learning_rate": 0.0015, + "loss": 1.682, + "step": 3238 + }, + { + "epoch": 0.3416666666666667, + "grad_norm": 0.5627657771110535, + "learning_rate": 0.0015, + "loss": 1.663, + "step": 3239 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.5278221964836121, + "learning_rate": 0.0015, + "loss": 1.7004, + "step": 3240 + }, + { + "epoch": 0.3418776371308017, + "grad_norm": 0.5488499402999878, + "learning_rate": 0.0015, + "loss": 1.6825, + "step": 3241 + }, + { + "epoch": 0.3419831223628692, + "grad_norm": 0.524059534072876, + "learning_rate": 0.0015, + "loss": 1.7194, + "step": 3242 + }, + { + "epoch": 0.3420886075949367, + "grad_norm": 0.4945333003997803, + "learning_rate": 0.0015, + "loss": 1.6782, + "step": 3243 + }, + { + "epoch": 0.3421940928270042, + "grad_norm": 0.5472710728645325, + "learning_rate": 0.0015, + "loss": 1.7152, + "step": 3244 + }, + { + "epoch": 0.34229957805907174, + "grad_norm": 0.46813690662384033, + "learning_rate": 0.0015, + "loss": 1.6773, + "step": 3245 + }, + { + "epoch": 0.34240506329113923, + "grad_norm": 0.5513242483139038, + "learning_rate": 0.0015, + "loss": 1.6979, + "step": 3246 + }, + { + "epoch": 0.3425105485232067, + "grad_norm": 0.4633464217185974, + "learning_rate": 0.0015, + "loss": 1.7162, + "step": 3247 + }, + { + "epoch": 0.3426160337552743, + "grad_norm": 0.5948613286018372, + "learning_rate": 0.0015, + "loss": 1.7221, + "step": 3248 + }, + { + "epoch": 0.34272151898734177, + "grad_norm": 0.6489377617835999, + "learning_rate": 0.0015, + "loss": 1.7034, + "step": 3249 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.5473290085792542, + "learning_rate": 0.0015, + "loss": 1.7181, + "step": 3250 + }, + { + "epoch": 0.3429324894514768, + "grad_norm": 0.6085308194160461, + "learning_rate": 0.0015, + "loss": 1.6865, + "step": 3251 + }, + { + "epoch": 0.3430379746835443, + "grad_norm": 0.8947362899780273, + "learning_rate": 0.0015, + "loss": 1.7276, + "step": 3252 + }, + { + "epoch": 0.3431434599156118, + "grad_norm": 0.6903753876686096, + "learning_rate": 0.0015, + "loss": 1.7199, + "step": 3253 + }, + { + "epoch": 0.34324894514767934, + "grad_norm": 0.5013677477836609, + "learning_rate": 0.0015, + "loss": 1.6789, + "step": 3254 + }, + { + "epoch": 0.34335443037974683, + "grad_norm": 0.690724790096283, + "learning_rate": 0.0015, + "loss": 1.671, + "step": 3255 + }, + { + "epoch": 0.3434599156118143, + "grad_norm": 0.5765038132667542, + "learning_rate": 0.0015, + "loss": 1.6566, + "step": 3256 + }, + { + "epoch": 0.3435654008438819, + "grad_norm": 0.5136321187019348, + "learning_rate": 0.0015, + "loss": 1.7373, + "step": 3257 + }, + { + "epoch": 0.34367088607594937, + "grad_norm": 0.5360373854637146, + "learning_rate": 0.0015, + "loss": 1.6834, + "step": 3258 + }, + { + "epoch": 0.34377637130801686, + "grad_norm": 0.4985573887825012, + "learning_rate": 0.0015, + "loss": 1.693, + "step": 3259 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5356287360191345, + "learning_rate": 0.0015, + "loss": 1.6777, + "step": 3260 + }, + { + "epoch": 0.3439873417721519, + "grad_norm": 0.6050174832344055, + "learning_rate": 0.0015, + "loss": 1.7266, + "step": 3261 + }, + { + "epoch": 0.3440928270042194, + "grad_norm": 0.6644989848136902, + "learning_rate": 0.0015, + "loss": 1.7146, + "step": 3262 + }, + { + "epoch": 0.34419831223628694, + "grad_norm": 0.6384313106536865, + "learning_rate": 0.0015, + "loss": 1.7211, + "step": 3263 + }, + { + "epoch": 0.34430379746835443, + "grad_norm": 0.6091856360435486, + "learning_rate": 0.0015, + "loss": 1.646, + "step": 3264 + }, + { + "epoch": 0.3444092827004219, + "grad_norm": 0.6349808573722839, + "learning_rate": 0.0015, + "loss": 1.6829, + "step": 3265 + }, + { + "epoch": 0.3445147679324895, + "grad_norm": 0.5130767226219177, + "learning_rate": 0.0015, + "loss": 1.7253, + "step": 3266 + }, + { + "epoch": 0.34462025316455697, + "grad_norm": 0.6537332534790039, + "learning_rate": 0.0015, + "loss": 1.7367, + "step": 3267 + }, + { + "epoch": 0.34472573839662446, + "grad_norm": 0.7101198434829712, + "learning_rate": 0.0015, + "loss": 1.7207, + "step": 3268 + }, + { + "epoch": 0.344831223628692, + "grad_norm": 0.49545493721961975, + "learning_rate": 0.0015, + "loss": 1.7137, + "step": 3269 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.646725058555603, + "learning_rate": 0.0015, + "loss": 1.7131, + "step": 3270 + }, + { + "epoch": 0.345042194092827, + "grad_norm": 0.7167512774467468, + "learning_rate": 0.0015, + "loss": 1.6758, + "step": 3271 + }, + { + "epoch": 0.34514767932489454, + "grad_norm": 0.6485745310783386, + "learning_rate": 0.0015, + "loss": 1.7517, + "step": 3272 + }, + { + "epoch": 0.34525316455696203, + "grad_norm": 0.5508519411087036, + "learning_rate": 0.0015, + "loss": 1.7139, + "step": 3273 + }, + { + "epoch": 0.3453586497890295, + "grad_norm": 0.7126834988594055, + "learning_rate": 0.0015, + "loss": 1.7087, + "step": 3274 + }, + { + "epoch": 0.3454641350210971, + "grad_norm": 0.6241485476493835, + "learning_rate": 0.0015, + "loss": 1.7243, + "step": 3275 + }, + { + "epoch": 0.34556962025316457, + "grad_norm": 0.55812007188797, + "learning_rate": 0.0015, + "loss": 1.7246, + "step": 3276 + }, + { + "epoch": 0.34567510548523206, + "grad_norm": 0.5232017636299133, + "learning_rate": 0.0015, + "loss": 1.7105, + "step": 3277 + }, + { + "epoch": 0.34578059071729955, + "grad_norm": 0.4928762912750244, + "learning_rate": 0.0015, + "loss": 1.6624, + "step": 3278 + }, + { + "epoch": 0.3458860759493671, + "grad_norm": 0.5147992372512817, + "learning_rate": 0.0015, + "loss": 1.7166, + "step": 3279 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.6979365348815918, + "learning_rate": 0.0015, + "loss": 1.7093, + "step": 3280 + }, + { + "epoch": 0.3460970464135021, + "grad_norm": 0.5939792394638062, + "learning_rate": 0.0015, + "loss": 1.6913, + "step": 3281 + }, + { + "epoch": 0.34620253164556963, + "grad_norm": 0.5686169266700745, + "learning_rate": 0.0015, + "loss": 1.7079, + "step": 3282 + }, + { + "epoch": 0.3463080168776371, + "grad_norm": 0.7128338813781738, + "learning_rate": 0.0015, + "loss": 1.6631, + "step": 3283 + }, + { + "epoch": 0.3464135021097046, + "grad_norm": 0.6922389268875122, + "learning_rate": 0.0015, + "loss": 1.713, + "step": 3284 + }, + { + "epoch": 0.34651898734177217, + "grad_norm": 0.5139843821525574, + "learning_rate": 0.0015, + "loss": 1.6965, + "step": 3285 + }, + { + "epoch": 0.34662447257383966, + "grad_norm": 0.5343854427337646, + "learning_rate": 0.0015, + "loss": 1.721, + "step": 3286 + }, + { + "epoch": 0.34672995780590715, + "grad_norm": 0.5553687810897827, + "learning_rate": 0.0015, + "loss": 1.7585, + "step": 3287 + }, + { + "epoch": 0.3468354430379747, + "grad_norm": 0.504325270652771, + "learning_rate": 0.0015, + "loss": 1.6627, + "step": 3288 + }, + { + "epoch": 0.3469409282700422, + "grad_norm": 0.5255725979804993, + "learning_rate": 0.0015, + "loss": 1.6705, + "step": 3289 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.507466733455658, + "learning_rate": 0.0015, + "loss": 1.6276, + "step": 3290 + }, + { + "epoch": 0.34715189873417723, + "grad_norm": 0.5654557943344116, + "learning_rate": 0.0015, + "loss": 1.6744, + "step": 3291 + }, + { + "epoch": 0.3472573839662447, + "grad_norm": 0.671724259853363, + "learning_rate": 0.0015, + "loss": 1.6957, + "step": 3292 + }, + { + "epoch": 0.3473628691983122, + "grad_norm": 0.4625714421272278, + "learning_rate": 0.0015, + "loss": 1.6956, + "step": 3293 + }, + { + "epoch": 0.34746835443037977, + "grad_norm": 0.6441212296485901, + "learning_rate": 0.0015, + "loss": 1.697, + "step": 3294 + }, + { + "epoch": 0.34757383966244726, + "grad_norm": 0.6679786443710327, + "learning_rate": 0.0015, + "loss": 1.688, + "step": 3295 + }, + { + "epoch": 0.34767932489451475, + "grad_norm": 0.4963196814060211, + "learning_rate": 0.0015, + "loss": 1.7361, + "step": 3296 + }, + { + "epoch": 0.3477848101265823, + "grad_norm": 0.5643613338470459, + "learning_rate": 0.0015, + "loss": 1.7034, + "step": 3297 + }, + { + "epoch": 0.3478902953586498, + "grad_norm": 0.6231514811515808, + "learning_rate": 0.0015, + "loss": 1.688, + "step": 3298 + }, + { + "epoch": 0.3479957805907173, + "grad_norm": 0.5145208835601807, + "learning_rate": 0.0015, + "loss": 1.7219, + "step": 3299 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.5588470697402954, + "learning_rate": 0.0015, + "loss": 1.7082, + "step": 3300 + }, + { + "epoch": 0.3482067510548523, + "grad_norm": 0.661913275718689, + "learning_rate": 0.0015, + "loss": 1.7018, + "step": 3301 + }, + { + "epoch": 0.3483122362869198, + "grad_norm": 0.551858127117157, + "learning_rate": 0.0015, + "loss": 1.6866, + "step": 3302 + }, + { + "epoch": 0.34841772151898737, + "grad_norm": 0.5006975531578064, + "learning_rate": 0.0015, + "loss": 1.7312, + "step": 3303 + }, + { + "epoch": 0.34852320675105486, + "grad_norm": 0.5418669581413269, + "learning_rate": 0.0015, + "loss": 1.7068, + "step": 3304 + }, + { + "epoch": 0.34862869198312235, + "grad_norm": 0.6015976071357727, + "learning_rate": 0.0015, + "loss": 1.7178, + "step": 3305 + }, + { + "epoch": 0.3487341772151899, + "grad_norm": 0.6234529614448547, + "learning_rate": 0.0015, + "loss": 1.6744, + "step": 3306 + }, + { + "epoch": 0.3488396624472574, + "grad_norm": 0.5737875699996948, + "learning_rate": 0.0015, + "loss": 1.6975, + "step": 3307 + }, + { + "epoch": 0.3489451476793249, + "grad_norm": 0.44024980068206787, + "learning_rate": 0.0015, + "loss": 1.7066, + "step": 3308 + }, + { + "epoch": 0.3490506329113924, + "grad_norm": 0.5740571022033691, + "learning_rate": 0.0015, + "loss": 1.7012, + "step": 3309 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.5544969439506531, + "learning_rate": 0.0015, + "loss": 1.6758, + "step": 3310 + }, + { + "epoch": 0.3492616033755274, + "grad_norm": 0.46073102951049805, + "learning_rate": 0.0015, + "loss": 1.6946, + "step": 3311 + }, + { + "epoch": 0.3493670886075949, + "grad_norm": 0.4999867081642151, + "learning_rate": 0.0015, + "loss": 1.7307, + "step": 3312 + }, + { + "epoch": 0.34947257383966246, + "grad_norm": 0.5623162984848022, + "learning_rate": 0.0015, + "loss": 1.647, + "step": 3313 + }, + { + "epoch": 0.34957805907172995, + "grad_norm": 0.5500815510749817, + "learning_rate": 0.0015, + "loss": 1.7309, + "step": 3314 + }, + { + "epoch": 0.34968354430379744, + "grad_norm": 0.5207936763763428, + "learning_rate": 0.0015, + "loss": 1.6843, + "step": 3315 + }, + { + "epoch": 0.349789029535865, + "grad_norm": 0.5976893305778503, + "learning_rate": 0.0015, + "loss": 1.6713, + "step": 3316 + }, + { + "epoch": 0.3498945147679325, + "grad_norm": 0.5292049050331116, + "learning_rate": 0.0015, + "loss": 1.7348, + "step": 3317 + }, + { + "epoch": 0.35, + "grad_norm": 0.48040205240249634, + "learning_rate": 0.0015, + "loss": 1.7415, + "step": 3318 + }, + { + "epoch": 0.3501054852320675, + "grad_norm": 0.5762725472450256, + "learning_rate": 0.0015, + "loss": 1.6996, + "step": 3319 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.4599584639072418, + "learning_rate": 0.0015, + "loss": 1.7012, + "step": 3320 + }, + { + "epoch": 0.3503164556962025, + "grad_norm": 0.5592414736747742, + "learning_rate": 0.0015, + "loss": 1.6472, + "step": 3321 + }, + { + "epoch": 0.35042194092827006, + "grad_norm": 0.6016160845756531, + "learning_rate": 0.0015, + "loss": 1.6848, + "step": 3322 + }, + { + "epoch": 0.35052742616033755, + "grad_norm": 0.5023660659790039, + "learning_rate": 0.0015, + "loss": 1.7341, + "step": 3323 + }, + { + "epoch": 0.35063291139240504, + "grad_norm": 0.5222054123878479, + "learning_rate": 0.0015, + "loss": 1.6773, + "step": 3324 + }, + { + "epoch": 0.3507383966244726, + "grad_norm": 0.5310649275779724, + "learning_rate": 0.0015, + "loss": 1.7295, + "step": 3325 + }, + { + "epoch": 0.3508438818565401, + "grad_norm": 0.5232838988304138, + "learning_rate": 0.0015, + "loss": 1.6732, + "step": 3326 + }, + { + "epoch": 0.3509493670886076, + "grad_norm": 0.511332631111145, + "learning_rate": 0.0015, + "loss": 1.7079, + "step": 3327 + }, + { + "epoch": 0.3510548523206751, + "grad_norm": 0.5557478666305542, + "learning_rate": 0.0015, + "loss": 1.7198, + "step": 3328 + }, + { + "epoch": 0.3511603375527426, + "grad_norm": 0.5167666673660278, + "learning_rate": 0.0015, + "loss": 1.7036, + "step": 3329 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.5409457087516785, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 3330 + }, + { + "epoch": 0.35137130801687766, + "grad_norm": 0.6356794834136963, + "learning_rate": 0.0015, + "loss": 1.6929, + "step": 3331 + }, + { + "epoch": 0.35147679324894515, + "grad_norm": 0.5135475397109985, + "learning_rate": 0.0015, + "loss": 1.6845, + "step": 3332 + }, + { + "epoch": 0.35158227848101264, + "grad_norm": 0.5805373191833496, + "learning_rate": 0.0015, + "loss": 1.7524, + "step": 3333 + }, + { + "epoch": 0.3516877637130802, + "grad_norm": 0.865698516368866, + "learning_rate": 0.0015, + "loss": 1.6803, + "step": 3334 + }, + { + "epoch": 0.3517932489451477, + "grad_norm": 0.718709409236908, + "learning_rate": 0.0015, + "loss": 1.6771, + "step": 3335 + }, + { + "epoch": 0.3518987341772152, + "grad_norm": 0.5176778435707092, + "learning_rate": 0.0015, + "loss": 1.6953, + "step": 3336 + }, + { + "epoch": 0.3520042194092827, + "grad_norm": 0.835837721824646, + "learning_rate": 0.0015, + "loss": 1.698, + "step": 3337 + }, + { + "epoch": 0.3521097046413502, + "grad_norm": 0.7923001646995544, + "learning_rate": 0.0015, + "loss": 1.7018, + "step": 3338 + }, + { + "epoch": 0.3522151898734177, + "grad_norm": 0.6018409729003906, + "learning_rate": 0.0015, + "loss": 1.7262, + "step": 3339 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.5894249677658081, + "learning_rate": 0.0015, + "loss": 1.678, + "step": 3340 + }, + { + "epoch": 0.35242616033755275, + "grad_norm": 0.6654856204986572, + "learning_rate": 0.0015, + "loss": 1.661, + "step": 3341 + }, + { + "epoch": 0.35253164556962024, + "grad_norm": 0.5093351006507874, + "learning_rate": 0.0015, + "loss": 1.7185, + "step": 3342 + }, + { + "epoch": 0.35263713080168774, + "grad_norm": 0.6728792786598206, + "learning_rate": 0.0015, + "loss": 1.6778, + "step": 3343 + }, + { + "epoch": 0.3527426160337553, + "grad_norm": 0.5834736227989197, + "learning_rate": 0.0015, + "loss": 1.7246, + "step": 3344 + }, + { + "epoch": 0.3528481012658228, + "grad_norm": 0.5098950266838074, + "learning_rate": 0.0015, + "loss": 1.7201, + "step": 3345 + }, + { + "epoch": 0.35295358649789027, + "grad_norm": 0.6730625033378601, + "learning_rate": 0.0015, + "loss": 1.6871, + "step": 3346 + }, + { + "epoch": 0.3530590717299578, + "grad_norm": 0.6545631289482117, + "learning_rate": 0.0015, + "loss": 1.705, + "step": 3347 + }, + { + "epoch": 0.3531645569620253, + "grad_norm": 0.5896464586257935, + "learning_rate": 0.0015, + "loss": 1.6806, + "step": 3348 + }, + { + "epoch": 0.3532700421940928, + "grad_norm": 0.5974704027175903, + "learning_rate": 0.0015, + "loss": 1.6507, + "step": 3349 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.7370885014533997, + "learning_rate": 0.0015, + "loss": 1.6856, + "step": 3350 + }, + { + "epoch": 0.35348101265822784, + "grad_norm": 0.651846706867218, + "learning_rate": 0.0015, + "loss": 1.6806, + "step": 3351 + }, + { + "epoch": 0.35358649789029534, + "grad_norm": 0.5507676601409912, + "learning_rate": 0.0015, + "loss": 1.7234, + "step": 3352 + }, + { + "epoch": 0.3536919831223629, + "grad_norm": 0.8470252752304077, + "learning_rate": 0.0015, + "loss": 1.6935, + "step": 3353 + }, + { + "epoch": 0.3537974683544304, + "grad_norm": 0.6926529407501221, + "learning_rate": 0.0015, + "loss": 1.6617, + "step": 3354 + }, + { + "epoch": 0.35390295358649787, + "grad_norm": 0.5039480924606323, + "learning_rate": 0.0015, + "loss": 1.7087, + "step": 3355 + }, + { + "epoch": 0.3540084388185654, + "grad_norm": 0.6330252885818481, + "learning_rate": 0.0015, + "loss": 1.6756, + "step": 3356 + }, + { + "epoch": 0.3541139240506329, + "grad_norm": 0.6043148040771484, + "learning_rate": 0.0015, + "loss": 1.6967, + "step": 3357 + }, + { + "epoch": 0.3542194092827004, + "grad_norm": 0.5623552203178406, + "learning_rate": 0.0015, + "loss": 1.6649, + "step": 3358 + }, + { + "epoch": 0.35432489451476795, + "grad_norm": 0.633874237537384, + "learning_rate": 0.0015, + "loss": 1.6943, + "step": 3359 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.6607420444488525, + "learning_rate": 0.0015, + "loss": 1.715, + "step": 3360 + }, + { + "epoch": 0.35453586497890294, + "grad_norm": 0.5188843011856079, + "learning_rate": 0.0015, + "loss": 1.7042, + "step": 3361 + }, + { + "epoch": 0.3546413502109705, + "grad_norm": 0.6697191596031189, + "learning_rate": 0.0015, + "loss": 1.7113, + "step": 3362 + }, + { + "epoch": 0.354746835443038, + "grad_norm": 0.6414597630500793, + "learning_rate": 0.0015, + "loss": 1.7045, + "step": 3363 + }, + { + "epoch": 0.35485232067510547, + "grad_norm": 0.49116820096969604, + "learning_rate": 0.0015, + "loss": 1.6996, + "step": 3364 + }, + { + "epoch": 0.354957805907173, + "grad_norm": 0.5321905016899109, + "learning_rate": 0.0015, + "loss": 1.6555, + "step": 3365 + }, + { + "epoch": 0.3550632911392405, + "grad_norm": 0.587928831577301, + "learning_rate": 0.0015, + "loss": 1.69, + "step": 3366 + }, + { + "epoch": 0.355168776371308, + "grad_norm": 0.46752479672431946, + "learning_rate": 0.0015, + "loss": 1.7064, + "step": 3367 + }, + { + "epoch": 0.35527426160337555, + "grad_norm": 0.5310870409011841, + "learning_rate": 0.0015, + "loss": 1.7015, + "step": 3368 + }, + { + "epoch": 0.35537974683544304, + "grad_norm": 0.47283926606178284, + "learning_rate": 0.0015, + "loss": 1.725, + "step": 3369 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.48126205801963806, + "learning_rate": 0.0015, + "loss": 1.6881, + "step": 3370 + }, + { + "epoch": 0.3555907172995781, + "grad_norm": 0.49770766496658325, + "learning_rate": 0.0015, + "loss": 1.6998, + "step": 3371 + }, + { + "epoch": 0.3556962025316456, + "grad_norm": 0.6166954040527344, + "learning_rate": 0.0015, + "loss": 1.6695, + "step": 3372 + }, + { + "epoch": 0.35580168776371307, + "grad_norm": 0.5064780712127686, + "learning_rate": 0.0015, + "loss": 1.6667, + "step": 3373 + }, + { + "epoch": 0.35590717299578056, + "grad_norm": 0.559788703918457, + "learning_rate": 0.0015, + "loss": 1.7025, + "step": 3374 + }, + { + "epoch": 0.3560126582278481, + "grad_norm": 0.5952900648117065, + "learning_rate": 0.0015, + "loss": 1.714, + "step": 3375 + }, + { + "epoch": 0.3561181434599156, + "grad_norm": 0.6047338843345642, + "learning_rate": 0.0015, + "loss": 1.7156, + "step": 3376 + }, + { + "epoch": 0.3562236286919831, + "grad_norm": 0.6075417995452881, + "learning_rate": 0.0015, + "loss": 1.6837, + "step": 3377 + }, + { + "epoch": 0.35632911392405064, + "grad_norm": 0.5179601907730103, + "learning_rate": 0.0015, + "loss": 1.6903, + "step": 3378 + }, + { + "epoch": 0.35643459915611814, + "grad_norm": 0.6053937673568726, + "learning_rate": 0.0015, + "loss": 1.7, + "step": 3379 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.6172323822975159, + "learning_rate": 0.0015, + "loss": 1.6984, + "step": 3380 + }, + { + "epoch": 0.3566455696202532, + "grad_norm": 0.5589747428894043, + "learning_rate": 0.0015, + "loss": 1.6824, + "step": 3381 + }, + { + "epoch": 0.35675105485232067, + "grad_norm": 0.6385650038719177, + "learning_rate": 0.0015, + "loss": 1.6437, + "step": 3382 + }, + { + "epoch": 0.35685654008438816, + "grad_norm": 0.5532436370849609, + "learning_rate": 0.0015, + "loss": 1.7325, + "step": 3383 + }, + { + "epoch": 0.3569620253164557, + "grad_norm": 0.6474589705467224, + "learning_rate": 0.0015, + "loss": 1.7054, + "step": 3384 + }, + { + "epoch": 0.3570675105485232, + "grad_norm": 0.6483468413352966, + "learning_rate": 0.0015, + "loss": 1.6472, + "step": 3385 + }, + { + "epoch": 0.3571729957805907, + "grad_norm": 0.4812987148761749, + "learning_rate": 0.0015, + "loss": 1.6583, + "step": 3386 + }, + { + "epoch": 0.35727848101265824, + "grad_norm": 0.6307127475738525, + "learning_rate": 0.0015, + "loss": 1.7284, + "step": 3387 + }, + { + "epoch": 0.35738396624472574, + "grad_norm": 0.6140430569648743, + "learning_rate": 0.0015, + "loss": 1.714, + "step": 3388 + }, + { + "epoch": 0.35748945147679323, + "grad_norm": 0.5917832255363464, + "learning_rate": 0.0015, + "loss": 1.7245, + "step": 3389 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.5120094418525696, + "learning_rate": 0.0015, + "loss": 1.6674, + "step": 3390 + }, + { + "epoch": 0.35770042194092827, + "grad_norm": 0.5447616577148438, + "learning_rate": 0.0015, + "loss": 1.6904, + "step": 3391 + }, + { + "epoch": 0.35780590717299576, + "grad_norm": 0.6497238278388977, + "learning_rate": 0.0015, + "loss": 1.6673, + "step": 3392 + }, + { + "epoch": 0.3579113924050633, + "grad_norm": 0.526225745677948, + "learning_rate": 0.0015, + "loss": 1.6847, + "step": 3393 + }, + { + "epoch": 0.3580168776371308, + "grad_norm": 0.5980810523033142, + "learning_rate": 0.0015, + "loss": 1.6607, + "step": 3394 + }, + { + "epoch": 0.3581223628691983, + "grad_norm": 0.6649575233459473, + "learning_rate": 0.0015, + "loss": 1.7108, + "step": 3395 + }, + { + "epoch": 0.35822784810126584, + "grad_norm": 0.4878542125225067, + "learning_rate": 0.0015, + "loss": 1.7025, + "step": 3396 + }, + { + "epoch": 0.35833333333333334, + "grad_norm": 0.6462936401367188, + "learning_rate": 0.0015, + "loss": 1.6887, + "step": 3397 + }, + { + "epoch": 0.35843881856540083, + "grad_norm": 0.6519804000854492, + "learning_rate": 0.0015, + "loss": 1.6975, + "step": 3398 + }, + { + "epoch": 0.3585443037974684, + "grad_norm": 0.5274231433868408, + "learning_rate": 0.0015, + "loss": 1.7348, + "step": 3399 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.5992415547370911, + "learning_rate": 0.0015, + "loss": 1.746, + "step": 3400 + }, + { + "epoch": 0.35875527426160336, + "grad_norm": 0.6707643866539001, + "learning_rate": 0.0015, + "loss": 1.7174, + "step": 3401 + }, + { + "epoch": 0.3588607594936709, + "grad_norm": 0.5592296719551086, + "learning_rate": 0.0015, + "loss": 1.6891, + "step": 3402 + }, + { + "epoch": 0.3589662447257384, + "grad_norm": 0.8578243255615234, + "learning_rate": 0.0015, + "loss": 1.7337, + "step": 3403 + }, + { + "epoch": 0.3590717299578059, + "grad_norm": 1.013196587562561, + "learning_rate": 0.0015, + "loss": 1.6993, + "step": 3404 + }, + { + "epoch": 0.35917721518987344, + "grad_norm": 0.48511528968811035, + "learning_rate": 0.0015, + "loss": 1.6781, + "step": 3405 + }, + { + "epoch": 0.35928270042194094, + "grad_norm": 0.7466984391212463, + "learning_rate": 0.0015, + "loss": 1.7116, + "step": 3406 + }, + { + "epoch": 0.35938818565400843, + "grad_norm": 0.7377890944480896, + "learning_rate": 0.0015, + "loss": 1.6786, + "step": 3407 + }, + { + "epoch": 0.3594936708860759, + "grad_norm": 0.5139774084091187, + "learning_rate": 0.0015, + "loss": 1.6971, + "step": 3408 + }, + { + "epoch": 0.35959915611814347, + "grad_norm": 0.5240907073020935, + "learning_rate": 0.0015, + "loss": 1.7014, + "step": 3409 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.5521343350410461, + "learning_rate": 0.0015, + "loss": 1.6623, + "step": 3410 + }, + { + "epoch": 0.35981012658227846, + "grad_norm": 0.5915968418121338, + "learning_rate": 0.0015, + "loss": 1.6941, + "step": 3411 + }, + { + "epoch": 0.359915611814346, + "grad_norm": 0.4795050024986267, + "learning_rate": 0.0015, + "loss": 1.6764, + "step": 3412 + }, + { + "epoch": 0.3600210970464135, + "grad_norm": 0.5866679549217224, + "learning_rate": 0.0015, + "loss": 1.688, + "step": 3413 + }, + { + "epoch": 0.360126582278481, + "grad_norm": 0.5616958737373352, + "learning_rate": 0.0015, + "loss": 1.6571, + "step": 3414 + }, + { + "epoch": 0.36023206751054854, + "grad_norm": 0.5041908025741577, + "learning_rate": 0.0015, + "loss": 1.654, + "step": 3415 + }, + { + "epoch": 0.36033755274261603, + "grad_norm": 0.5769246220588684, + "learning_rate": 0.0015, + "loss": 1.6911, + "step": 3416 + }, + { + "epoch": 0.3604430379746835, + "grad_norm": 0.4528326094150543, + "learning_rate": 0.0015, + "loss": 1.6819, + "step": 3417 + }, + { + "epoch": 0.36054852320675107, + "grad_norm": 0.6060860753059387, + "learning_rate": 0.0015, + "loss": 1.6593, + "step": 3418 + }, + { + "epoch": 0.36065400843881856, + "grad_norm": 0.6590308547019958, + "learning_rate": 0.0015, + "loss": 1.658, + "step": 3419 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.4749763309955597, + "learning_rate": 0.0015, + "loss": 1.6813, + "step": 3420 + }, + { + "epoch": 0.3608649789029536, + "grad_norm": 0.5621888637542725, + "learning_rate": 0.0015, + "loss": 1.7103, + "step": 3421 + }, + { + "epoch": 0.3609704641350211, + "grad_norm": 0.5397945642471313, + "learning_rate": 0.0015, + "loss": 1.7349, + "step": 3422 + }, + { + "epoch": 0.3610759493670886, + "grad_norm": 0.5255492925643921, + "learning_rate": 0.0015, + "loss": 1.69, + "step": 3423 + }, + { + "epoch": 0.36118143459915614, + "grad_norm": 0.5748757123947144, + "learning_rate": 0.0015, + "loss": 1.7167, + "step": 3424 + }, + { + "epoch": 0.36128691983122363, + "grad_norm": 0.5286357998847961, + "learning_rate": 0.0015, + "loss": 1.7132, + "step": 3425 + }, + { + "epoch": 0.3613924050632911, + "grad_norm": 0.5291661620140076, + "learning_rate": 0.0015, + "loss": 1.666, + "step": 3426 + }, + { + "epoch": 0.36149789029535867, + "grad_norm": 0.6045889854431152, + "learning_rate": 0.0015, + "loss": 1.7058, + "step": 3427 + }, + { + "epoch": 0.36160337552742616, + "grad_norm": 0.5522410869598389, + "learning_rate": 0.0015, + "loss": 1.6802, + "step": 3428 + }, + { + "epoch": 0.36170886075949366, + "grad_norm": 0.5047609806060791, + "learning_rate": 0.0015, + "loss": 1.6703, + "step": 3429 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.6251084804534912, + "learning_rate": 0.0015, + "loss": 1.6601, + "step": 3430 + }, + { + "epoch": 0.3619198312236287, + "grad_norm": 0.5223478674888611, + "learning_rate": 0.0015, + "loss": 1.6906, + "step": 3431 + }, + { + "epoch": 0.3620253164556962, + "grad_norm": 0.5176872611045837, + "learning_rate": 0.0015, + "loss": 1.6721, + "step": 3432 + }, + { + "epoch": 0.36213080168776374, + "grad_norm": 0.648583710193634, + "learning_rate": 0.0015, + "loss": 1.6961, + "step": 3433 + }, + { + "epoch": 0.36223628691983123, + "grad_norm": 0.5592969655990601, + "learning_rate": 0.0015, + "loss": 1.665, + "step": 3434 + }, + { + "epoch": 0.3623417721518987, + "grad_norm": 0.5014697313308716, + "learning_rate": 0.0015, + "loss": 1.7031, + "step": 3435 + }, + { + "epoch": 0.36244725738396627, + "grad_norm": 0.4879317879676819, + "learning_rate": 0.0015, + "loss": 1.6671, + "step": 3436 + }, + { + "epoch": 0.36255274261603376, + "grad_norm": 0.5001959204673767, + "learning_rate": 0.0015, + "loss": 1.7284, + "step": 3437 + }, + { + "epoch": 0.36265822784810126, + "grad_norm": 0.50409334897995, + "learning_rate": 0.0015, + "loss": 1.7124, + "step": 3438 + }, + { + "epoch": 0.3627637130801688, + "grad_norm": 0.4925854504108429, + "learning_rate": 0.0015, + "loss": 1.6638, + "step": 3439 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.5322001576423645, + "learning_rate": 0.0015, + "loss": 1.6791, + "step": 3440 + }, + { + "epoch": 0.3629746835443038, + "grad_norm": 0.5502275824546814, + "learning_rate": 0.0015, + "loss": 1.6581, + "step": 3441 + }, + { + "epoch": 0.3630801687763713, + "grad_norm": 0.5756074786186218, + "learning_rate": 0.0015, + "loss": 1.697, + "step": 3442 + }, + { + "epoch": 0.36318565400843883, + "grad_norm": 0.5817195773124695, + "learning_rate": 0.0015, + "loss": 1.7084, + "step": 3443 + }, + { + "epoch": 0.3632911392405063, + "grad_norm": 0.5023086667060852, + "learning_rate": 0.0015, + "loss": 1.6589, + "step": 3444 + }, + { + "epoch": 0.3633966244725738, + "grad_norm": 0.5629333257675171, + "learning_rate": 0.0015, + "loss": 1.7129, + "step": 3445 + }, + { + "epoch": 0.36350210970464136, + "grad_norm": 0.6314513087272644, + "learning_rate": 0.0015, + "loss": 1.7207, + "step": 3446 + }, + { + "epoch": 0.36360759493670886, + "grad_norm": 0.46431276202201843, + "learning_rate": 0.0015, + "loss": 1.6628, + "step": 3447 + }, + { + "epoch": 0.36371308016877635, + "grad_norm": 0.5817503333091736, + "learning_rate": 0.0015, + "loss": 1.6999, + "step": 3448 + }, + { + "epoch": 0.3638185654008439, + "grad_norm": 0.5450338125228882, + "learning_rate": 0.0015, + "loss": 1.6854, + "step": 3449 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.4929513931274414, + "learning_rate": 0.0015, + "loss": 1.6556, + "step": 3450 + }, + { + "epoch": 0.3640295358649789, + "grad_norm": 0.5406349897384644, + "learning_rate": 0.0015, + "loss": 1.6832, + "step": 3451 + }, + { + "epoch": 0.36413502109704643, + "grad_norm": 0.5617015361785889, + "learning_rate": 0.0015, + "loss": 1.6448, + "step": 3452 + }, + { + "epoch": 0.3642405063291139, + "grad_norm": 0.4781974256038666, + "learning_rate": 0.0015, + "loss": 1.686, + "step": 3453 + }, + { + "epoch": 0.3643459915611814, + "grad_norm": 0.46796074509620667, + "learning_rate": 0.0015, + "loss": 1.6588, + "step": 3454 + }, + { + "epoch": 0.36445147679324896, + "grad_norm": 0.4808714687824249, + "learning_rate": 0.0015, + "loss": 1.7232, + "step": 3455 + }, + { + "epoch": 0.36455696202531646, + "grad_norm": 0.508586585521698, + "learning_rate": 0.0015, + "loss": 1.6751, + "step": 3456 + }, + { + "epoch": 0.36466244725738395, + "grad_norm": 0.5230977535247803, + "learning_rate": 0.0015, + "loss": 1.6868, + "step": 3457 + }, + { + "epoch": 0.3647679324894515, + "grad_norm": 0.75013267993927, + "learning_rate": 0.0015, + "loss": 1.7005, + "step": 3458 + }, + { + "epoch": 0.364873417721519, + "grad_norm": 0.889573872089386, + "learning_rate": 0.0015, + "loss": 1.6587, + "step": 3459 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.5489333271980286, + "learning_rate": 0.0015, + "loss": 1.6915, + "step": 3460 + }, + { + "epoch": 0.36508438818565403, + "grad_norm": 0.7285661697387695, + "learning_rate": 0.0015, + "loss": 1.7091, + "step": 3461 + }, + { + "epoch": 0.3651898734177215, + "grad_norm": 0.6302562355995178, + "learning_rate": 0.0015, + "loss": 1.6871, + "step": 3462 + }, + { + "epoch": 0.365295358649789, + "grad_norm": 0.6151975989341736, + "learning_rate": 0.0015, + "loss": 1.704, + "step": 3463 + }, + { + "epoch": 0.36540084388185656, + "grad_norm": 0.749674916267395, + "learning_rate": 0.0015, + "loss": 1.7232, + "step": 3464 + }, + { + "epoch": 0.36550632911392406, + "grad_norm": 0.6529875993728638, + "learning_rate": 0.0015, + "loss": 1.6926, + "step": 3465 + }, + { + "epoch": 0.36561181434599155, + "grad_norm": 0.5716555714607239, + "learning_rate": 0.0015, + "loss": 1.6607, + "step": 3466 + }, + { + "epoch": 0.3657172995780591, + "grad_norm": 0.5743492245674133, + "learning_rate": 0.0015, + "loss": 1.72, + "step": 3467 + }, + { + "epoch": 0.3658227848101266, + "grad_norm": 0.5516340136528015, + "learning_rate": 0.0015, + "loss": 1.6534, + "step": 3468 + }, + { + "epoch": 0.3659282700421941, + "grad_norm": 0.5176778435707092, + "learning_rate": 0.0015, + "loss": 1.7363, + "step": 3469 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.7524895071983337, + "learning_rate": 0.0015, + "loss": 1.7043, + "step": 3470 + }, + { + "epoch": 0.3661392405063291, + "grad_norm": 0.6692596673965454, + "learning_rate": 0.0015, + "loss": 1.6868, + "step": 3471 + }, + { + "epoch": 0.3662447257383966, + "grad_norm": 0.5070008039474487, + "learning_rate": 0.0015, + "loss": 1.7232, + "step": 3472 + }, + { + "epoch": 0.3663502109704641, + "grad_norm": 0.5661128163337708, + "learning_rate": 0.0015, + "loss": 1.7427, + "step": 3473 + }, + { + "epoch": 0.36645569620253166, + "grad_norm": 0.5610405802726746, + "learning_rate": 0.0015, + "loss": 1.7179, + "step": 3474 + }, + { + "epoch": 0.36656118143459915, + "grad_norm": 0.5527447462081909, + "learning_rate": 0.0015, + "loss": 1.7357, + "step": 3475 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 0.6391531229019165, + "learning_rate": 0.0015, + "loss": 1.6924, + "step": 3476 + }, + { + "epoch": 0.3667721518987342, + "grad_norm": 0.5474950075149536, + "learning_rate": 0.0015, + "loss": 1.693, + "step": 3477 + }, + { + "epoch": 0.3668776371308017, + "grad_norm": 0.5395394563674927, + "learning_rate": 0.0015, + "loss": 1.6549, + "step": 3478 + }, + { + "epoch": 0.3669831223628692, + "grad_norm": 0.7075352668762207, + "learning_rate": 0.0015, + "loss": 1.6958, + "step": 3479 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.6539794206619263, + "learning_rate": 0.0015, + "loss": 1.6706, + "step": 3480 + }, + { + "epoch": 0.3671940928270042, + "grad_norm": 0.4946688115596771, + "learning_rate": 0.0015, + "loss": 1.705, + "step": 3481 + }, + { + "epoch": 0.3672995780590717, + "grad_norm": 0.7209545373916626, + "learning_rate": 0.0015, + "loss": 1.7183, + "step": 3482 + }, + { + "epoch": 0.36740506329113926, + "grad_norm": 0.526837170124054, + "learning_rate": 0.0015, + "loss": 1.6724, + "step": 3483 + }, + { + "epoch": 0.36751054852320675, + "grad_norm": 0.7148000001907349, + "learning_rate": 0.0015, + "loss": 1.7337, + "step": 3484 + }, + { + "epoch": 0.36761603375527424, + "grad_norm": 0.7433557510375977, + "learning_rate": 0.0015, + "loss": 1.7034, + "step": 3485 + }, + { + "epoch": 0.3677215189873418, + "grad_norm": 0.6516178250312805, + "learning_rate": 0.0015, + "loss": 1.6902, + "step": 3486 + }, + { + "epoch": 0.3678270042194093, + "grad_norm": 0.6386226415634155, + "learning_rate": 0.0015, + "loss": 1.6812, + "step": 3487 + }, + { + "epoch": 0.3679324894514768, + "grad_norm": 0.6712808012962341, + "learning_rate": 0.0015, + "loss": 1.6975, + "step": 3488 + }, + { + "epoch": 0.3680379746835443, + "grad_norm": 0.5676007866859436, + "learning_rate": 0.0015, + "loss": 1.69, + "step": 3489 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.6194564700126648, + "learning_rate": 0.0015, + "loss": 1.706, + "step": 3490 + }, + { + "epoch": 0.3682489451476793, + "grad_norm": 0.4818286895751953, + "learning_rate": 0.0015, + "loss": 1.6859, + "step": 3491 + }, + { + "epoch": 0.36835443037974686, + "grad_norm": 0.6806401610374451, + "learning_rate": 0.0015, + "loss": 1.6761, + "step": 3492 + }, + { + "epoch": 0.36845991561181435, + "grad_norm": 0.6845285296440125, + "learning_rate": 0.0015, + "loss": 1.6628, + "step": 3493 + }, + { + "epoch": 0.36856540084388184, + "grad_norm": 0.6205683350563049, + "learning_rate": 0.0015, + "loss": 1.7087, + "step": 3494 + }, + { + "epoch": 0.3686708860759494, + "grad_norm": 0.8780078887939453, + "learning_rate": 0.0015, + "loss": 1.6959, + "step": 3495 + }, + { + "epoch": 0.3687763713080169, + "grad_norm": 0.9626905918121338, + "learning_rate": 0.0015, + "loss": 1.6691, + "step": 3496 + }, + { + "epoch": 0.3688818565400844, + "grad_norm": 0.5961605906486511, + "learning_rate": 0.0015, + "loss": 1.6969, + "step": 3497 + }, + { + "epoch": 0.3689873417721519, + "grad_norm": 0.5036250948905945, + "learning_rate": 0.0015, + "loss": 1.646, + "step": 3498 + }, + { + "epoch": 0.3690928270042194, + "grad_norm": 0.5939358472824097, + "learning_rate": 0.0015, + "loss": 1.6715, + "step": 3499 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.5004081130027771, + "learning_rate": 0.0015, + "loss": 1.6406, + "step": 3500 + }, + { + "epoch": 0.36930379746835446, + "grad_norm": 0.5770102739334106, + "learning_rate": 0.0015, + "loss": 1.669, + "step": 3501 + }, + { + "epoch": 0.36940928270042195, + "grad_norm": 0.6931666135787964, + "learning_rate": 0.0015, + "loss": 1.7007, + "step": 3502 + }, + { + "epoch": 0.36951476793248944, + "grad_norm": 0.48336613178253174, + "learning_rate": 0.0015, + "loss": 1.7153, + "step": 3503 + }, + { + "epoch": 0.369620253164557, + "grad_norm": 0.6822316646575928, + "learning_rate": 0.0015, + "loss": 1.7156, + "step": 3504 + }, + { + "epoch": 0.3697257383966245, + "grad_norm": 0.7500801682472229, + "learning_rate": 0.0015, + "loss": 1.6706, + "step": 3505 + }, + { + "epoch": 0.369831223628692, + "grad_norm": 0.52363520860672, + "learning_rate": 0.0015, + "loss": 1.6952, + "step": 3506 + }, + { + "epoch": 0.36993670886075947, + "grad_norm": 0.5796246528625488, + "learning_rate": 0.0015, + "loss": 1.7191, + "step": 3507 + }, + { + "epoch": 0.370042194092827, + "grad_norm": 0.6043279767036438, + "learning_rate": 0.0015, + "loss": 1.6686, + "step": 3508 + }, + { + "epoch": 0.3701476793248945, + "grad_norm": 0.5411955118179321, + "learning_rate": 0.0015, + "loss": 1.6904, + "step": 3509 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6494709849357605, + "learning_rate": 0.0015, + "loss": 1.6949, + "step": 3510 + }, + { + "epoch": 0.37035864978902955, + "grad_norm": 0.5679773092269897, + "learning_rate": 0.0015, + "loss": 1.6993, + "step": 3511 + }, + { + "epoch": 0.37046413502109704, + "grad_norm": 0.5008822083473206, + "learning_rate": 0.0015, + "loss": 1.6691, + "step": 3512 + }, + { + "epoch": 0.37056962025316453, + "grad_norm": 0.597727358341217, + "learning_rate": 0.0015, + "loss": 1.6763, + "step": 3513 + }, + { + "epoch": 0.3706751054852321, + "grad_norm": 0.44443100690841675, + "learning_rate": 0.0015, + "loss": 1.6866, + "step": 3514 + }, + { + "epoch": 0.3707805907172996, + "grad_norm": 0.6263906955718994, + "learning_rate": 0.0015, + "loss": 1.6779, + "step": 3515 + }, + { + "epoch": 0.37088607594936707, + "grad_norm": 0.5585700273513794, + "learning_rate": 0.0015, + "loss": 1.72, + "step": 3516 + }, + { + "epoch": 0.3709915611814346, + "grad_norm": 0.5889336466789246, + "learning_rate": 0.0015, + "loss": 1.7009, + "step": 3517 + }, + { + "epoch": 0.3710970464135021, + "grad_norm": 0.6008414626121521, + "learning_rate": 0.0015, + "loss": 1.682, + "step": 3518 + }, + { + "epoch": 0.3712025316455696, + "grad_norm": 0.5108413100242615, + "learning_rate": 0.0015, + "loss": 1.7061, + "step": 3519 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.5178086757659912, + "learning_rate": 0.0015, + "loss": 1.6999, + "step": 3520 + }, + { + "epoch": 0.37141350210970464, + "grad_norm": 0.5006667971611023, + "learning_rate": 0.0015, + "loss": 1.7104, + "step": 3521 + }, + { + "epoch": 0.37151898734177213, + "grad_norm": 0.44895803928375244, + "learning_rate": 0.0015, + "loss": 1.6833, + "step": 3522 + }, + { + "epoch": 0.3716244725738397, + "grad_norm": 0.4816954731941223, + "learning_rate": 0.0015, + "loss": 1.6781, + "step": 3523 + }, + { + "epoch": 0.3717299578059072, + "grad_norm": 0.5086579918861389, + "learning_rate": 0.0015, + "loss": 1.6406, + "step": 3524 + }, + { + "epoch": 0.37183544303797467, + "grad_norm": 0.5456579327583313, + "learning_rate": 0.0015, + "loss": 1.7272, + "step": 3525 + }, + { + "epoch": 0.3719409282700422, + "grad_norm": 0.45087578892707825, + "learning_rate": 0.0015, + "loss": 1.7092, + "step": 3526 + }, + { + "epoch": 0.3720464135021097, + "grad_norm": 0.46919184923171997, + "learning_rate": 0.0015, + "loss": 1.6826, + "step": 3527 + }, + { + "epoch": 0.3721518987341772, + "grad_norm": 0.47783172130584717, + "learning_rate": 0.0015, + "loss": 1.7211, + "step": 3528 + }, + { + "epoch": 0.37225738396624475, + "grad_norm": 0.4555913209915161, + "learning_rate": 0.0015, + "loss": 1.6597, + "step": 3529 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.514130711555481, + "learning_rate": 0.0015, + "loss": 1.7586, + "step": 3530 + }, + { + "epoch": 0.37246835443037973, + "grad_norm": 0.6121523380279541, + "learning_rate": 0.0015, + "loss": 1.7147, + "step": 3531 + }, + { + "epoch": 0.3725738396624473, + "grad_norm": 0.6196393370628357, + "learning_rate": 0.0015, + "loss": 1.7315, + "step": 3532 + }, + { + "epoch": 0.3726793248945148, + "grad_norm": 0.4390181303024292, + "learning_rate": 0.0015, + "loss": 1.668, + "step": 3533 + }, + { + "epoch": 0.37278481012658227, + "grad_norm": 0.47705987095832825, + "learning_rate": 0.0015, + "loss": 1.7102, + "step": 3534 + }, + { + "epoch": 0.3728902953586498, + "grad_norm": 0.4802907705307007, + "learning_rate": 0.0015, + "loss": 1.7155, + "step": 3535 + }, + { + "epoch": 0.3729957805907173, + "grad_norm": 0.4534493386745453, + "learning_rate": 0.0015, + "loss": 1.671, + "step": 3536 + }, + { + "epoch": 0.3731012658227848, + "grad_norm": 0.5185301899909973, + "learning_rate": 0.0015, + "loss": 1.66, + "step": 3537 + }, + { + "epoch": 0.37320675105485235, + "grad_norm": 0.5581870675086975, + "learning_rate": 0.0015, + "loss": 1.7176, + "step": 3538 + }, + { + "epoch": 0.37331223628691984, + "grad_norm": 0.5368343591690063, + "learning_rate": 0.0015, + "loss": 1.6821, + "step": 3539 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.5538505911827087, + "learning_rate": 0.0015, + "loss": 1.676, + "step": 3540 + }, + { + "epoch": 0.3735232067510548, + "grad_norm": 0.7850205302238464, + "learning_rate": 0.0015, + "loss": 1.6848, + "step": 3541 + }, + { + "epoch": 0.3736286919831224, + "grad_norm": 0.8336942791938782, + "learning_rate": 0.0015, + "loss": 1.7212, + "step": 3542 + }, + { + "epoch": 0.37373417721518987, + "grad_norm": 0.5784370303153992, + "learning_rate": 0.0015, + "loss": 1.676, + "step": 3543 + }, + { + "epoch": 0.37383966244725736, + "grad_norm": 0.5390932559967041, + "learning_rate": 0.0015, + "loss": 1.671, + "step": 3544 + }, + { + "epoch": 0.3739451476793249, + "grad_norm": 0.530036449432373, + "learning_rate": 0.0015, + "loss": 1.7075, + "step": 3545 + }, + { + "epoch": 0.3740506329113924, + "grad_norm": 0.5599520802497864, + "learning_rate": 0.0015, + "loss": 1.6757, + "step": 3546 + }, + { + "epoch": 0.3741561181434599, + "grad_norm": 0.5387759208679199, + "learning_rate": 0.0015, + "loss": 1.6474, + "step": 3547 + }, + { + "epoch": 0.37426160337552744, + "grad_norm": 0.47868961095809937, + "learning_rate": 0.0015, + "loss": 1.6744, + "step": 3548 + }, + { + "epoch": 0.37436708860759493, + "grad_norm": 0.5268681049346924, + "learning_rate": 0.0015, + "loss": 1.6768, + "step": 3549 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.5130245089530945, + "learning_rate": 0.0015, + "loss": 1.6945, + "step": 3550 + }, + { + "epoch": 0.37457805907173, + "grad_norm": 0.5134006142616272, + "learning_rate": 0.0015, + "loss": 1.668, + "step": 3551 + }, + { + "epoch": 0.37468354430379747, + "grad_norm": 0.4919327199459076, + "learning_rate": 0.0015, + "loss": 1.6785, + "step": 3552 + }, + { + "epoch": 0.37478902953586496, + "grad_norm": 0.48739781975746155, + "learning_rate": 0.0015, + "loss": 1.6938, + "step": 3553 + }, + { + "epoch": 0.3748945147679325, + "grad_norm": 0.48200276494026184, + "learning_rate": 0.0015, + "loss": 1.6828, + "step": 3554 + }, + { + "epoch": 0.375, + "grad_norm": 0.4728035628795624, + "learning_rate": 0.0015, + "loss": 1.6841, + "step": 3555 + }, + { + "epoch": 0.3751054852320675, + "grad_norm": 0.5085388422012329, + "learning_rate": 0.0015, + "loss": 1.688, + "step": 3556 + }, + { + "epoch": 0.37521097046413504, + "grad_norm": 0.44792282581329346, + "learning_rate": 0.0015, + "loss": 1.69, + "step": 3557 + }, + { + "epoch": 0.37531645569620253, + "grad_norm": 0.4960862398147583, + "learning_rate": 0.0015, + "loss": 1.6586, + "step": 3558 + }, + { + "epoch": 0.37542194092827, + "grad_norm": 0.47321751713752747, + "learning_rate": 0.0015, + "loss": 1.6827, + "step": 3559 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.45882028341293335, + "learning_rate": 0.0015, + "loss": 1.6731, + "step": 3560 + }, + { + "epoch": 0.37563291139240507, + "grad_norm": 0.4504358768463135, + "learning_rate": 0.0015, + "loss": 1.6895, + "step": 3561 + }, + { + "epoch": 0.37573839662447256, + "grad_norm": 0.5343787670135498, + "learning_rate": 0.0015, + "loss": 1.6657, + "step": 3562 + }, + { + "epoch": 0.3758438818565401, + "grad_norm": 0.4659075140953064, + "learning_rate": 0.0015, + "loss": 1.6703, + "step": 3563 + }, + { + "epoch": 0.3759493670886076, + "grad_norm": 0.5344029664993286, + "learning_rate": 0.0015, + "loss": 1.6789, + "step": 3564 + }, + { + "epoch": 0.3760548523206751, + "grad_norm": 0.5266735553741455, + "learning_rate": 0.0015, + "loss": 1.6796, + "step": 3565 + }, + { + "epoch": 0.37616033755274264, + "grad_norm": 0.5037487149238586, + "learning_rate": 0.0015, + "loss": 1.6501, + "step": 3566 + }, + { + "epoch": 0.37626582278481013, + "grad_norm": 0.56449294090271, + "learning_rate": 0.0015, + "loss": 1.6799, + "step": 3567 + }, + { + "epoch": 0.3763713080168776, + "grad_norm": 0.5012770295143127, + "learning_rate": 0.0015, + "loss": 1.7164, + "step": 3568 + }, + { + "epoch": 0.3764767932489452, + "grad_norm": 0.6296719312667847, + "learning_rate": 0.0015, + "loss": 1.6677, + "step": 3569 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.7946227788925171, + "learning_rate": 0.0015, + "loss": 1.7011, + "step": 3570 + }, + { + "epoch": 0.37668776371308016, + "grad_norm": 0.5402594208717346, + "learning_rate": 0.0015, + "loss": 1.6501, + "step": 3571 + }, + { + "epoch": 0.37679324894514765, + "grad_norm": 0.6937465071678162, + "learning_rate": 0.0015, + "loss": 1.6744, + "step": 3572 + }, + { + "epoch": 0.3768987341772152, + "grad_norm": 0.9502573013305664, + "learning_rate": 0.0015, + "loss": 1.6942, + "step": 3573 + }, + { + "epoch": 0.3770042194092827, + "grad_norm": 0.7560267448425293, + "learning_rate": 0.0015, + "loss": 1.7044, + "step": 3574 + }, + { + "epoch": 0.3771097046413502, + "grad_norm": 0.6838465929031372, + "learning_rate": 0.0015, + "loss": 1.6969, + "step": 3575 + }, + { + "epoch": 0.37721518987341773, + "grad_norm": 0.5273986458778381, + "learning_rate": 0.0015, + "loss": 1.6884, + "step": 3576 + }, + { + "epoch": 0.3773206751054852, + "grad_norm": 0.6105398535728455, + "learning_rate": 0.0015, + "loss": 1.6734, + "step": 3577 + }, + { + "epoch": 0.3774261603375527, + "grad_norm": 0.6233408451080322, + "learning_rate": 0.0015, + "loss": 1.683, + "step": 3578 + }, + { + "epoch": 0.37753164556962027, + "grad_norm": 0.5808678865432739, + "learning_rate": 0.0015, + "loss": 1.6754, + "step": 3579 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.8989353179931641, + "learning_rate": 0.0015, + "loss": 1.7109, + "step": 3580 + }, + { + "epoch": 0.37774261603375525, + "grad_norm": 0.8094796538352966, + "learning_rate": 0.0015, + "loss": 1.7011, + "step": 3581 + }, + { + "epoch": 0.3778481012658228, + "grad_norm": 0.5858950614929199, + "learning_rate": 0.0015, + "loss": 1.6706, + "step": 3582 + }, + { + "epoch": 0.3779535864978903, + "grad_norm": 0.7963961362838745, + "learning_rate": 0.0015, + "loss": 1.6938, + "step": 3583 + }, + { + "epoch": 0.3780590717299578, + "grad_norm": 0.8486836552619934, + "learning_rate": 0.0015, + "loss": 1.6775, + "step": 3584 + }, + { + "epoch": 0.37816455696202533, + "grad_norm": 0.502236008644104, + "learning_rate": 0.0015, + "loss": 1.7269, + "step": 3585 + }, + { + "epoch": 0.3782700421940928, + "grad_norm": 0.8007808327674866, + "learning_rate": 0.0015, + "loss": 1.7224, + "step": 3586 + }, + { + "epoch": 0.3783755274261603, + "grad_norm": 0.6669566631317139, + "learning_rate": 0.0015, + "loss": 1.6765, + "step": 3587 + }, + { + "epoch": 0.37848101265822787, + "grad_norm": 0.5253190398216248, + "learning_rate": 0.0015, + "loss": 1.6831, + "step": 3588 + }, + { + "epoch": 0.37858649789029536, + "grad_norm": 0.6445272564888, + "learning_rate": 0.0015, + "loss": 1.727, + "step": 3589 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.5694237947463989, + "learning_rate": 0.0015, + "loss": 1.7016, + "step": 3590 + }, + { + "epoch": 0.3787974683544304, + "grad_norm": 0.5158687829971313, + "learning_rate": 0.0015, + "loss": 1.6597, + "step": 3591 + }, + { + "epoch": 0.3789029535864979, + "grad_norm": 0.5416021347045898, + "learning_rate": 0.0015, + "loss": 1.6818, + "step": 3592 + }, + { + "epoch": 0.3790084388185654, + "grad_norm": 0.5724902153015137, + "learning_rate": 0.0015, + "loss": 1.7238, + "step": 3593 + }, + { + "epoch": 0.37911392405063293, + "grad_norm": 0.5864480137825012, + "learning_rate": 0.0015, + "loss": 1.6853, + "step": 3594 + }, + { + "epoch": 0.3792194092827004, + "grad_norm": 0.516903281211853, + "learning_rate": 0.0015, + "loss": 1.6973, + "step": 3595 + }, + { + "epoch": 0.3793248945147679, + "grad_norm": 0.6607723236083984, + "learning_rate": 0.0015, + "loss": 1.6425, + "step": 3596 + }, + { + "epoch": 0.37943037974683547, + "grad_norm": 0.5852009654045105, + "learning_rate": 0.0015, + "loss": 1.6743, + "step": 3597 + }, + { + "epoch": 0.37953586497890296, + "grad_norm": 0.5943871140480042, + "learning_rate": 0.0015, + "loss": 1.6878, + "step": 3598 + }, + { + "epoch": 0.37964135021097045, + "grad_norm": 0.5274428129196167, + "learning_rate": 0.0015, + "loss": 1.661, + "step": 3599 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.9265272617340088, + "learning_rate": 0.0015, + "loss": 1.6695, + "step": 3600 + }, + { + "epoch": 0.3798523206751055, + "grad_norm": 0.7401799559593201, + "learning_rate": 0.0015, + "loss": 1.6856, + "step": 3601 + }, + { + "epoch": 0.379957805907173, + "grad_norm": 0.5657160878181458, + "learning_rate": 0.0015, + "loss": 1.6746, + "step": 3602 + }, + { + "epoch": 0.38006329113924053, + "grad_norm": 0.5467101335525513, + "learning_rate": 0.0015, + "loss": 1.6838, + "step": 3603 + }, + { + "epoch": 0.380168776371308, + "grad_norm": 0.6029835343360901, + "learning_rate": 0.0015, + "loss": 1.6596, + "step": 3604 + }, + { + "epoch": 0.3802742616033755, + "grad_norm": 0.5620792508125305, + "learning_rate": 0.0015, + "loss": 1.7066, + "step": 3605 + }, + { + "epoch": 0.380379746835443, + "grad_norm": 0.5603548288345337, + "learning_rate": 0.0015, + "loss": 1.6749, + "step": 3606 + }, + { + "epoch": 0.38048523206751056, + "grad_norm": 0.6930073499679565, + "learning_rate": 0.0015, + "loss": 1.6731, + "step": 3607 + }, + { + "epoch": 0.38059071729957805, + "grad_norm": 0.5464161038398743, + "learning_rate": 0.0015, + "loss": 1.7097, + "step": 3608 + }, + { + "epoch": 0.38069620253164554, + "grad_norm": 0.5768519639968872, + "learning_rate": 0.0015, + "loss": 1.7014, + "step": 3609 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.5359461307525635, + "learning_rate": 0.0015, + "loss": 1.6362, + "step": 3610 + }, + { + "epoch": 0.3809071729957806, + "grad_norm": 0.6051581501960754, + "learning_rate": 0.0015, + "loss": 1.7067, + "step": 3611 + }, + { + "epoch": 0.3810126582278481, + "grad_norm": 0.7465226054191589, + "learning_rate": 0.0015, + "loss": 1.6769, + "step": 3612 + }, + { + "epoch": 0.3811181434599156, + "grad_norm": 0.7165592908859253, + "learning_rate": 0.0015, + "loss": 1.6878, + "step": 3613 + }, + { + "epoch": 0.3812236286919831, + "grad_norm": 0.6232821941375732, + "learning_rate": 0.0015, + "loss": 1.6578, + "step": 3614 + }, + { + "epoch": 0.3813291139240506, + "grad_norm": 0.6930992603302002, + "learning_rate": 0.0015, + "loss": 1.6385, + "step": 3615 + }, + { + "epoch": 0.38143459915611816, + "grad_norm": 0.6751329302787781, + "learning_rate": 0.0015, + "loss": 1.6862, + "step": 3616 + }, + { + "epoch": 0.38154008438818565, + "grad_norm": 0.5513862371444702, + "learning_rate": 0.0015, + "loss": 1.7347, + "step": 3617 + }, + { + "epoch": 0.38164556962025314, + "grad_norm": 0.689549446105957, + "learning_rate": 0.0015, + "loss": 1.7143, + "step": 3618 + }, + { + "epoch": 0.3817510548523207, + "grad_norm": 0.6732983589172363, + "learning_rate": 0.0015, + "loss": 1.6811, + "step": 3619 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.5490033030509949, + "learning_rate": 0.0015, + "loss": 1.648, + "step": 3620 + }, + { + "epoch": 0.3819620253164557, + "grad_norm": 0.6244326233863831, + "learning_rate": 0.0015, + "loss": 1.6913, + "step": 3621 + }, + { + "epoch": 0.3820675105485232, + "grad_norm": 0.6083469986915588, + "learning_rate": 0.0015, + "loss": 1.6538, + "step": 3622 + }, + { + "epoch": 0.3821729957805907, + "grad_norm": 0.4693094789981842, + "learning_rate": 0.0015, + "loss": 1.6975, + "step": 3623 + }, + { + "epoch": 0.3822784810126582, + "grad_norm": 0.6091998219490051, + "learning_rate": 0.0015, + "loss": 1.6792, + "step": 3624 + }, + { + "epoch": 0.38238396624472576, + "grad_norm": 0.4976237416267395, + "learning_rate": 0.0015, + "loss": 1.6932, + "step": 3625 + }, + { + "epoch": 0.38248945147679325, + "grad_norm": 0.5420821905136108, + "learning_rate": 0.0015, + "loss": 1.6832, + "step": 3626 + }, + { + "epoch": 0.38259493670886074, + "grad_norm": 0.5881454944610596, + "learning_rate": 0.0015, + "loss": 1.6845, + "step": 3627 + }, + { + "epoch": 0.3827004219409283, + "grad_norm": 0.6723463535308838, + "learning_rate": 0.0015, + "loss": 1.6944, + "step": 3628 + }, + { + "epoch": 0.3828059071729958, + "grad_norm": 0.4542931020259857, + "learning_rate": 0.0015, + "loss": 1.6569, + "step": 3629 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.7008521556854248, + "learning_rate": 0.0015, + "loss": 1.6824, + "step": 3630 + }, + { + "epoch": 0.3830168776371308, + "grad_norm": 0.8241995573043823, + "learning_rate": 0.0015, + "loss": 1.6934, + "step": 3631 + }, + { + "epoch": 0.3831223628691983, + "grad_norm": 0.5964398980140686, + "learning_rate": 0.0015, + "loss": 1.672, + "step": 3632 + }, + { + "epoch": 0.3832278481012658, + "grad_norm": 0.61642986536026, + "learning_rate": 0.0015, + "loss": 1.712, + "step": 3633 + }, + { + "epoch": 0.38333333333333336, + "grad_norm": 0.7122082710266113, + "learning_rate": 0.0015, + "loss": 1.6445, + "step": 3634 + }, + { + "epoch": 0.38343881856540085, + "grad_norm": 0.4951452314853668, + "learning_rate": 0.0015, + "loss": 1.6825, + "step": 3635 + }, + { + "epoch": 0.38354430379746834, + "grad_norm": 0.7421321272850037, + "learning_rate": 0.0015, + "loss": 1.667, + "step": 3636 + }, + { + "epoch": 0.3836497890295359, + "grad_norm": 0.8305424451828003, + "learning_rate": 0.0015, + "loss": 1.7546, + "step": 3637 + }, + { + "epoch": 0.3837552742616034, + "grad_norm": 0.5369387269020081, + "learning_rate": 0.0015, + "loss": 1.6798, + "step": 3638 + }, + { + "epoch": 0.3838607594936709, + "grad_norm": 1.0308220386505127, + "learning_rate": 0.0015, + "loss": 1.6897, + "step": 3639 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.7028903365135193, + "learning_rate": 0.0015, + "loss": 1.6426, + "step": 3640 + }, + { + "epoch": 0.3840717299578059, + "grad_norm": 0.6929757595062256, + "learning_rate": 0.0015, + "loss": 1.676, + "step": 3641 + }, + { + "epoch": 0.3841772151898734, + "grad_norm": 0.8227697610855103, + "learning_rate": 0.0015, + "loss": 1.7256, + "step": 3642 + }, + { + "epoch": 0.3842827004219409, + "grad_norm": 0.641666829586029, + "learning_rate": 0.0015, + "loss": 1.6802, + "step": 3643 + }, + { + "epoch": 0.38438818565400845, + "grad_norm": 0.5322589874267578, + "learning_rate": 0.0015, + "loss": 1.6676, + "step": 3644 + }, + { + "epoch": 0.38449367088607594, + "grad_norm": 0.5577198266983032, + "learning_rate": 0.0015, + "loss": 1.6821, + "step": 3645 + }, + { + "epoch": 0.38459915611814344, + "grad_norm": 0.6370434761047363, + "learning_rate": 0.0015, + "loss": 1.6542, + "step": 3646 + }, + { + "epoch": 0.384704641350211, + "grad_norm": 0.6994079351425171, + "learning_rate": 0.0015, + "loss": 1.6883, + "step": 3647 + }, + { + "epoch": 0.3848101265822785, + "grad_norm": 0.5557304620742798, + "learning_rate": 0.0015, + "loss": 1.7202, + "step": 3648 + }, + { + "epoch": 0.38491561181434597, + "grad_norm": 0.5214542150497437, + "learning_rate": 0.0015, + "loss": 1.7004, + "step": 3649 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.6828441023826599, + "learning_rate": 0.0015, + "loss": 1.6893, + "step": 3650 + }, + { + "epoch": 0.385126582278481, + "grad_norm": 0.5492691993713379, + "learning_rate": 0.0015, + "loss": 1.6958, + "step": 3651 + }, + { + "epoch": 0.3852320675105485, + "grad_norm": 0.5482195019721985, + "learning_rate": 0.0015, + "loss": 1.6727, + "step": 3652 + }, + { + "epoch": 0.38533755274261605, + "grad_norm": 0.7347174286842346, + "learning_rate": 0.0015, + "loss": 1.6721, + "step": 3653 + }, + { + "epoch": 0.38544303797468354, + "grad_norm": 0.5783145427703857, + "learning_rate": 0.0015, + "loss": 1.707, + "step": 3654 + }, + { + "epoch": 0.38554852320675104, + "grad_norm": 0.5347406268119812, + "learning_rate": 0.0015, + "loss": 1.6933, + "step": 3655 + }, + { + "epoch": 0.3856540084388186, + "grad_norm": 0.5907634496688843, + "learning_rate": 0.0015, + "loss": 1.6676, + "step": 3656 + }, + { + "epoch": 0.3857594936708861, + "grad_norm": 0.44710153341293335, + "learning_rate": 0.0015, + "loss": 1.6503, + "step": 3657 + }, + { + "epoch": 0.38586497890295357, + "grad_norm": 0.5879906415939331, + "learning_rate": 0.0015, + "loss": 1.6786, + "step": 3658 + }, + { + "epoch": 0.3859704641350211, + "grad_norm": 0.5563458800315857, + "learning_rate": 0.0015, + "loss": 1.7, + "step": 3659 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.5694467425346375, + "learning_rate": 0.0015, + "loss": 1.6745, + "step": 3660 + }, + { + "epoch": 0.3861814345991561, + "grad_norm": 0.603987991809845, + "learning_rate": 0.0015, + "loss": 1.696, + "step": 3661 + }, + { + "epoch": 0.38628691983122365, + "grad_norm": 0.5052303671836853, + "learning_rate": 0.0015, + "loss": 1.6646, + "step": 3662 + }, + { + "epoch": 0.38639240506329114, + "grad_norm": 0.626789391040802, + "learning_rate": 0.0015, + "loss": 1.6805, + "step": 3663 + }, + { + "epoch": 0.38649789029535864, + "grad_norm": 0.47768598794937134, + "learning_rate": 0.0015, + "loss": 1.6612, + "step": 3664 + }, + { + "epoch": 0.3866033755274262, + "grad_norm": 0.6381677389144897, + "learning_rate": 0.0015, + "loss": 1.6644, + "step": 3665 + }, + { + "epoch": 0.3867088607594937, + "grad_norm": 0.673236608505249, + "learning_rate": 0.0015, + "loss": 1.6773, + "step": 3666 + }, + { + "epoch": 0.38681434599156117, + "grad_norm": 0.6323572993278503, + "learning_rate": 0.0015, + "loss": 1.6956, + "step": 3667 + }, + { + "epoch": 0.3869198312236287, + "grad_norm": 0.5218010544776917, + "learning_rate": 0.0015, + "loss": 1.7002, + "step": 3668 + }, + { + "epoch": 0.3870253164556962, + "grad_norm": 0.6393988728523254, + "learning_rate": 0.0015, + "loss": 1.667, + "step": 3669 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.5353789925575256, + "learning_rate": 0.0015, + "loss": 1.6668, + "step": 3670 + }, + { + "epoch": 0.3872362869198312, + "grad_norm": 0.5642716288566589, + "learning_rate": 0.0015, + "loss": 1.6861, + "step": 3671 + }, + { + "epoch": 0.38734177215189874, + "grad_norm": 0.5615326762199402, + "learning_rate": 0.0015, + "loss": 1.6695, + "step": 3672 + }, + { + "epoch": 0.38744725738396624, + "grad_norm": 0.5854267477989197, + "learning_rate": 0.0015, + "loss": 1.6752, + "step": 3673 + }, + { + "epoch": 0.38755274261603373, + "grad_norm": 0.6541895270347595, + "learning_rate": 0.0015, + "loss": 1.6762, + "step": 3674 + }, + { + "epoch": 0.3876582278481013, + "grad_norm": 0.5889910459518433, + "learning_rate": 0.0015, + "loss": 1.6572, + "step": 3675 + }, + { + "epoch": 0.38776371308016877, + "grad_norm": 0.7357600331306458, + "learning_rate": 0.0015, + "loss": 1.6721, + "step": 3676 + }, + { + "epoch": 0.38786919831223626, + "grad_norm": 0.48305729031562805, + "learning_rate": 0.0015, + "loss": 1.692, + "step": 3677 + }, + { + "epoch": 0.3879746835443038, + "grad_norm": 0.7296749353408813, + "learning_rate": 0.0015, + "loss": 1.7031, + "step": 3678 + }, + { + "epoch": 0.3880801687763713, + "grad_norm": 0.4649110734462738, + "learning_rate": 0.0015, + "loss": 1.7132, + "step": 3679 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.8049523830413818, + "learning_rate": 0.0015, + "loss": 1.6519, + "step": 3680 + }, + { + "epoch": 0.38829113924050634, + "grad_norm": 0.903431236743927, + "learning_rate": 0.0015, + "loss": 1.6914, + "step": 3681 + }, + { + "epoch": 0.38839662447257384, + "grad_norm": 0.594926118850708, + "learning_rate": 0.0015, + "loss": 1.6904, + "step": 3682 + }, + { + "epoch": 0.38850210970464133, + "grad_norm": 0.647506594657898, + "learning_rate": 0.0015, + "loss": 1.6956, + "step": 3683 + }, + { + "epoch": 0.3886075949367089, + "grad_norm": 0.6687235832214355, + "learning_rate": 0.0015, + "loss": 1.7001, + "step": 3684 + }, + { + "epoch": 0.38871308016877637, + "grad_norm": 0.5746626257896423, + "learning_rate": 0.0015, + "loss": 1.7193, + "step": 3685 + }, + { + "epoch": 0.38881856540084386, + "grad_norm": 0.544668436050415, + "learning_rate": 0.0015, + "loss": 1.6703, + "step": 3686 + }, + { + "epoch": 0.3889240506329114, + "grad_norm": 0.5631168484687805, + "learning_rate": 0.0015, + "loss": 1.6649, + "step": 3687 + }, + { + "epoch": 0.3890295358649789, + "grad_norm": 0.6437835097312927, + "learning_rate": 0.0015, + "loss": 1.6632, + "step": 3688 + }, + { + "epoch": 0.3891350210970464, + "grad_norm": 0.6942353248596191, + "learning_rate": 0.0015, + "loss": 1.6989, + "step": 3689 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.5834630727767944, + "learning_rate": 0.0015, + "loss": 1.6953, + "step": 3690 + }, + { + "epoch": 0.38934599156118144, + "grad_norm": 0.6948363780975342, + "learning_rate": 0.0015, + "loss": 1.7541, + "step": 3691 + }, + { + "epoch": 0.38945147679324893, + "grad_norm": 0.7779099345207214, + "learning_rate": 0.0015, + "loss": 1.683, + "step": 3692 + }, + { + "epoch": 0.3895569620253165, + "grad_norm": 0.5741925239562988, + "learning_rate": 0.0015, + "loss": 1.6449, + "step": 3693 + }, + { + "epoch": 0.38966244725738397, + "grad_norm": 0.6894943118095398, + "learning_rate": 0.0015, + "loss": 1.682, + "step": 3694 + }, + { + "epoch": 0.38976793248945146, + "grad_norm": 0.5647335052490234, + "learning_rate": 0.0015, + "loss": 1.696, + "step": 3695 + }, + { + "epoch": 0.389873417721519, + "grad_norm": 0.6527907252311707, + "learning_rate": 0.0015, + "loss": 1.6848, + "step": 3696 + }, + { + "epoch": 0.3899789029535865, + "grad_norm": 0.5589083433151245, + "learning_rate": 0.0015, + "loss": 1.698, + "step": 3697 + }, + { + "epoch": 0.390084388185654, + "grad_norm": 0.5453910827636719, + "learning_rate": 0.0015, + "loss": 1.7233, + "step": 3698 + }, + { + "epoch": 0.39018987341772154, + "grad_norm": 0.563664972782135, + "learning_rate": 0.0015, + "loss": 1.6532, + "step": 3699 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.46357178688049316, + "learning_rate": 0.0015, + "loss": 1.7509, + "step": 3700 + }, + { + "epoch": 0.39040084388185653, + "grad_norm": 0.6345269083976746, + "learning_rate": 0.0015, + "loss": 1.696, + "step": 3701 + }, + { + "epoch": 0.3905063291139241, + "grad_norm": 0.5798743367195129, + "learning_rate": 0.0015, + "loss": 1.6799, + "step": 3702 + }, + { + "epoch": 0.39061181434599157, + "grad_norm": 0.5159527659416199, + "learning_rate": 0.0015, + "loss": 1.6489, + "step": 3703 + }, + { + "epoch": 0.39071729957805906, + "grad_norm": 0.5321524739265442, + "learning_rate": 0.0015, + "loss": 1.6931, + "step": 3704 + }, + { + "epoch": 0.39082278481012656, + "grad_norm": 0.4516725540161133, + "learning_rate": 0.0015, + "loss": 1.6685, + "step": 3705 + }, + { + "epoch": 0.3909282700421941, + "grad_norm": 0.6135842800140381, + "learning_rate": 0.0015, + "loss": 1.7076, + "step": 3706 + }, + { + "epoch": 0.3910337552742616, + "grad_norm": 0.53651362657547, + "learning_rate": 0.0015, + "loss": 1.7377, + "step": 3707 + }, + { + "epoch": 0.3911392405063291, + "grad_norm": 0.4846319258213043, + "learning_rate": 0.0015, + "loss": 1.6698, + "step": 3708 + }, + { + "epoch": 0.39124472573839664, + "grad_norm": 0.747732400894165, + "learning_rate": 0.0015, + "loss": 1.6904, + "step": 3709 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.6844327449798584, + "learning_rate": 0.0015, + "loss": 1.6227, + "step": 3710 + }, + { + "epoch": 0.3914556962025316, + "grad_norm": 0.4895083010196686, + "learning_rate": 0.0015, + "loss": 1.6522, + "step": 3711 + }, + { + "epoch": 0.39156118143459917, + "grad_norm": 0.5964961647987366, + "learning_rate": 0.0015, + "loss": 1.639, + "step": 3712 + }, + { + "epoch": 0.39166666666666666, + "grad_norm": 0.5458452105522156, + "learning_rate": 0.0015, + "loss": 1.6464, + "step": 3713 + }, + { + "epoch": 0.39177215189873416, + "grad_norm": 0.525438129901886, + "learning_rate": 0.0015, + "loss": 1.6764, + "step": 3714 + }, + { + "epoch": 0.3918776371308017, + "grad_norm": 0.5726062059402466, + "learning_rate": 0.0015, + "loss": 1.6551, + "step": 3715 + }, + { + "epoch": 0.3919831223628692, + "grad_norm": 0.5745319724082947, + "learning_rate": 0.0015, + "loss": 1.6547, + "step": 3716 + }, + { + "epoch": 0.3920886075949367, + "grad_norm": 0.49845850467681885, + "learning_rate": 0.0015, + "loss": 1.6947, + "step": 3717 + }, + { + "epoch": 0.39219409282700424, + "grad_norm": 0.5194739699363708, + "learning_rate": 0.0015, + "loss": 1.6184, + "step": 3718 + }, + { + "epoch": 0.39229957805907173, + "grad_norm": 0.5362001657485962, + "learning_rate": 0.0015, + "loss": 1.6903, + "step": 3719 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.4627896845340729, + "learning_rate": 0.0015, + "loss": 1.6628, + "step": 3720 + }, + { + "epoch": 0.39251054852320677, + "grad_norm": 0.4402790665626526, + "learning_rate": 0.0015, + "loss": 1.6595, + "step": 3721 + }, + { + "epoch": 0.39261603375527426, + "grad_norm": 0.532944917678833, + "learning_rate": 0.0015, + "loss": 1.6875, + "step": 3722 + }, + { + "epoch": 0.39272151898734176, + "grad_norm": 0.47930651903152466, + "learning_rate": 0.0015, + "loss": 1.6574, + "step": 3723 + }, + { + "epoch": 0.3928270042194093, + "grad_norm": 0.5077054500579834, + "learning_rate": 0.0015, + "loss": 1.6914, + "step": 3724 + }, + { + "epoch": 0.3929324894514768, + "grad_norm": 0.6024961471557617, + "learning_rate": 0.0015, + "loss": 1.6527, + "step": 3725 + }, + { + "epoch": 0.3930379746835443, + "grad_norm": 0.5431039929389954, + "learning_rate": 0.0015, + "loss": 1.6847, + "step": 3726 + }, + { + "epoch": 0.39314345991561184, + "grad_norm": 0.5446413159370422, + "learning_rate": 0.0015, + "loss": 1.6777, + "step": 3727 + }, + { + "epoch": 0.39324894514767933, + "grad_norm": 0.5875133275985718, + "learning_rate": 0.0015, + "loss": 1.6922, + "step": 3728 + }, + { + "epoch": 0.3933544303797468, + "grad_norm": 0.4683308005332947, + "learning_rate": 0.0015, + "loss": 1.6927, + "step": 3729 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.5920150876045227, + "learning_rate": 0.0015, + "loss": 1.6566, + "step": 3730 + }, + { + "epoch": 0.39356540084388186, + "grad_norm": 0.5581077933311462, + "learning_rate": 0.0015, + "loss": 1.678, + "step": 3731 + }, + { + "epoch": 0.39367088607594936, + "grad_norm": 0.4967675805091858, + "learning_rate": 0.0015, + "loss": 1.7229, + "step": 3732 + }, + { + "epoch": 0.3937763713080169, + "grad_norm": 0.5351163744926453, + "learning_rate": 0.0015, + "loss": 1.6764, + "step": 3733 + }, + { + "epoch": 0.3938818565400844, + "grad_norm": 0.5785093307495117, + "learning_rate": 0.0015, + "loss": 1.6846, + "step": 3734 + }, + { + "epoch": 0.3939873417721519, + "grad_norm": 0.6192387342453003, + "learning_rate": 0.0015, + "loss": 1.698, + "step": 3735 + }, + { + "epoch": 0.39409282700421944, + "grad_norm": 0.6738994121551514, + "learning_rate": 0.0015, + "loss": 1.6925, + "step": 3736 + }, + { + "epoch": 0.39419831223628693, + "grad_norm": 0.48425158858299255, + "learning_rate": 0.0015, + "loss": 1.6502, + "step": 3737 + }, + { + "epoch": 0.3943037974683544, + "grad_norm": 0.6813925504684448, + "learning_rate": 0.0015, + "loss": 1.6548, + "step": 3738 + }, + { + "epoch": 0.3944092827004219, + "grad_norm": 0.6188768148422241, + "learning_rate": 0.0015, + "loss": 1.638, + "step": 3739 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.5234917998313904, + "learning_rate": 0.0015, + "loss": 1.7169, + "step": 3740 + }, + { + "epoch": 0.39462025316455696, + "grad_norm": 0.5968658924102783, + "learning_rate": 0.0015, + "loss": 1.6956, + "step": 3741 + }, + { + "epoch": 0.39472573839662445, + "grad_norm": 0.5261411070823669, + "learning_rate": 0.0015, + "loss": 1.6959, + "step": 3742 + }, + { + "epoch": 0.394831223628692, + "grad_norm": 0.46268901228904724, + "learning_rate": 0.0015, + "loss": 1.6725, + "step": 3743 + }, + { + "epoch": 0.3949367088607595, + "grad_norm": 0.4687832295894623, + "learning_rate": 0.0015, + "loss": 1.7272, + "step": 3744 + }, + { + "epoch": 0.395042194092827, + "grad_norm": 0.44519439339637756, + "learning_rate": 0.0015, + "loss": 1.6461, + "step": 3745 + }, + { + "epoch": 0.39514767932489453, + "grad_norm": 0.5245404839515686, + "learning_rate": 0.0015, + "loss": 1.6872, + "step": 3746 + }, + { + "epoch": 0.395253164556962, + "grad_norm": 0.6123425960540771, + "learning_rate": 0.0015, + "loss": 1.7192, + "step": 3747 + }, + { + "epoch": 0.3953586497890295, + "grad_norm": 0.5783941149711609, + "learning_rate": 0.0015, + "loss": 1.6711, + "step": 3748 + }, + { + "epoch": 0.39546413502109706, + "grad_norm": 0.5005989074707031, + "learning_rate": 0.0015, + "loss": 1.6433, + "step": 3749 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.8036031723022461, + "learning_rate": 0.0015, + "loss": 1.6805, + "step": 3750 + }, + { + "epoch": 0.39567510548523205, + "grad_norm": 0.5644263029098511, + "learning_rate": 0.0015, + "loss": 1.679, + "step": 3751 + }, + { + "epoch": 0.3957805907172996, + "grad_norm": 0.6158437728881836, + "learning_rate": 0.0015, + "loss": 1.6959, + "step": 3752 + }, + { + "epoch": 0.3958860759493671, + "grad_norm": 0.6980195641517639, + "learning_rate": 0.0015, + "loss": 1.6971, + "step": 3753 + }, + { + "epoch": 0.3959915611814346, + "grad_norm": 0.5789486169815063, + "learning_rate": 0.0015, + "loss": 1.6834, + "step": 3754 + }, + { + "epoch": 0.39609704641350213, + "grad_norm": 0.6175979971885681, + "learning_rate": 0.0015, + "loss": 1.6789, + "step": 3755 + }, + { + "epoch": 0.3962025316455696, + "grad_norm": 0.771101176738739, + "learning_rate": 0.0015, + "loss": 1.6868, + "step": 3756 + }, + { + "epoch": 0.3963080168776371, + "grad_norm": 0.6815974116325378, + "learning_rate": 0.0015, + "loss": 1.6821, + "step": 3757 + }, + { + "epoch": 0.39641350210970466, + "grad_norm": 0.5077992677688599, + "learning_rate": 0.0015, + "loss": 1.6602, + "step": 3758 + }, + { + "epoch": 0.39651898734177216, + "grad_norm": 0.5861387252807617, + "learning_rate": 0.0015, + "loss": 1.6371, + "step": 3759 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.5313556790351868, + "learning_rate": 0.0015, + "loss": 1.6614, + "step": 3760 + }, + { + "epoch": 0.3967299578059072, + "grad_norm": 0.7050797939300537, + "learning_rate": 0.0015, + "loss": 1.7039, + "step": 3761 + }, + { + "epoch": 0.3968354430379747, + "grad_norm": 0.7246404886245728, + "learning_rate": 0.0015, + "loss": 1.6759, + "step": 3762 + }, + { + "epoch": 0.3969409282700422, + "grad_norm": 0.49107906222343445, + "learning_rate": 0.0015, + "loss": 1.6749, + "step": 3763 + }, + { + "epoch": 0.39704641350210973, + "grad_norm": 0.7503883838653564, + "learning_rate": 0.0015, + "loss": 1.6641, + "step": 3764 + }, + { + "epoch": 0.3971518987341772, + "grad_norm": 0.5992181897163391, + "learning_rate": 0.0015, + "loss": 1.6818, + "step": 3765 + }, + { + "epoch": 0.3972573839662447, + "grad_norm": 0.5650216937065125, + "learning_rate": 0.0015, + "loss": 1.6864, + "step": 3766 + }, + { + "epoch": 0.39736286919831226, + "grad_norm": 0.726324737071991, + "learning_rate": 0.0015, + "loss": 1.7338, + "step": 3767 + }, + { + "epoch": 0.39746835443037976, + "grad_norm": 0.5421167612075806, + "learning_rate": 0.0015, + "loss": 1.7374, + "step": 3768 + }, + { + "epoch": 0.39757383966244725, + "grad_norm": 0.5234367847442627, + "learning_rate": 0.0015, + "loss": 1.6793, + "step": 3769 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.7114390730857849, + "learning_rate": 0.0015, + "loss": 1.6803, + "step": 3770 + }, + { + "epoch": 0.3977848101265823, + "grad_norm": 0.6242325305938721, + "learning_rate": 0.0015, + "loss": 1.6519, + "step": 3771 + }, + { + "epoch": 0.3978902953586498, + "grad_norm": 0.4838646650314331, + "learning_rate": 0.0015, + "loss": 1.6856, + "step": 3772 + }, + { + "epoch": 0.3979957805907173, + "grad_norm": 0.6684880256652832, + "learning_rate": 0.0015, + "loss": 1.6957, + "step": 3773 + }, + { + "epoch": 0.3981012658227848, + "grad_norm": 0.5313114523887634, + "learning_rate": 0.0015, + "loss": 1.7024, + "step": 3774 + }, + { + "epoch": 0.3982067510548523, + "grad_norm": 0.4991389811038971, + "learning_rate": 0.0015, + "loss": 1.6574, + "step": 3775 + }, + { + "epoch": 0.3983122362869198, + "grad_norm": 0.5732834935188293, + "learning_rate": 0.0015, + "loss": 1.6676, + "step": 3776 + }, + { + "epoch": 0.39841772151898736, + "grad_norm": 0.4613068401813507, + "learning_rate": 0.0015, + "loss": 1.6583, + "step": 3777 + }, + { + "epoch": 0.39852320675105485, + "grad_norm": 0.5855815410614014, + "learning_rate": 0.0015, + "loss": 1.6774, + "step": 3778 + }, + { + "epoch": 0.39862869198312234, + "grad_norm": 0.6076484322547913, + "learning_rate": 0.0015, + "loss": 1.6883, + "step": 3779 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5026043057441711, + "learning_rate": 0.0015, + "loss": 1.6551, + "step": 3780 + }, + { + "epoch": 0.3988396624472574, + "grad_norm": 0.6369363069534302, + "learning_rate": 0.0015, + "loss": 1.66, + "step": 3781 + }, + { + "epoch": 0.3989451476793249, + "grad_norm": 0.6031306982040405, + "learning_rate": 0.0015, + "loss": 1.6576, + "step": 3782 + }, + { + "epoch": 0.3990506329113924, + "grad_norm": 0.43691784143447876, + "learning_rate": 0.0015, + "loss": 1.7045, + "step": 3783 + }, + { + "epoch": 0.3991561181434599, + "grad_norm": 0.6470429301261902, + "learning_rate": 0.0015, + "loss": 1.6453, + "step": 3784 + }, + { + "epoch": 0.3992616033755274, + "grad_norm": 0.5600720643997192, + "learning_rate": 0.0015, + "loss": 1.7107, + "step": 3785 + }, + { + "epoch": 0.39936708860759496, + "grad_norm": 0.4862491488456726, + "learning_rate": 0.0015, + "loss": 1.6978, + "step": 3786 + }, + { + "epoch": 0.39947257383966245, + "grad_norm": 0.8172095417976379, + "learning_rate": 0.0015, + "loss": 1.6486, + "step": 3787 + }, + { + "epoch": 0.39957805907172994, + "grad_norm": 0.7997248768806458, + "learning_rate": 0.0015, + "loss": 1.7115, + "step": 3788 + }, + { + "epoch": 0.3996835443037975, + "grad_norm": 0.5401503443717957, + "learning_rate": 0.0015, + "loss": 1.672, + "step": 3789 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.47454023361206055, + "learning_rate": 0.0015, + "loss": 1.6829, + "step": 3790 + }, + { + "epoch": 0.3998945147679325, + "grad_norm": 0.5431615710258484, + "learning_rate": 0.0015, + "loss": 1.6758, + "step": 3791 + }, + { + "epoch": 0.4, + "grad_norm": 0.4792156517505646, + "learning_rate": 0.0015, + "loss": 1.6456, + "step": 3792 + }, + { + "epoch": 0.4001054852320675, + "grad_norm": 0.5675703287124634, + "learning_rate": 0.0015, + "loss": 1.7152, + "step": 3793 + }, + { + "epoch": 0.400210970464135, + "grad_norm": 0.5410193204879761, + "learning_rate": 0.0015, + "loss": 1.6924, + "step": 3794 + }, + { + "epoch": 0.40031645569620256, + "grad_norm": 0.535546600818634, + "learning_rate": 0.0015, + "loss": 1.6979, + "step": 3795 + }, + { + "epoch": 0.40042194092827005, + "grad_norm": 0.4831826090812683, + "learning_rate": 0.0015, + "loss": 1.7037, + "step": 3796 + }, + { + "epoch": 0.40052742616033754, + "grad_norm": 0.5048959851264954, + "learning_rate": 0.0015, + "loss": 1.6748, + "step": 3797 + }, + { + "epoch": 0.4006329113924051, + "grad_norm": 0.6044279932975769, + "learning_rate": 0.0015, + "loss": 1.7105, + "step": 3798 + }, + { + "epoch": 0.4007383966244726, + "grad_norm": 0.5984317064285278, + "learning_rate": 0.0015, + "loss": 1.6391, + "step": 3799 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.476684033870697, + "learning_rate": 0.0015, + "loss": 1.6675, + "step": 3800 + }, + { + "epoch": 0.4009493670886076, + "grad_norm": 0.5303037166595459, + "learning_rate": 0.0015, + "loss": 1.6328, + "step": 3801 + }, + { + "epoch": 0.4010548523206751, + "grad_norm": 0.6264271140098572, + "learning_rate": 0.0015, + "loss": 1.6645, + "step": 3802 + }, + { + "epoch": 0.4011603375527426, + "grad_norm": 0.6265786290168762, + "learning_rate": 0.0015, + "loss": 1.7183, + "step": 3803 + }, + { + "epoch": 0.4012658227848101, + "grad_norm": 0.6287268400192261, + "learning_rate": 0.0015, + "loss": 1.7058, + "step": 3804 + }, + { + "epoch": 0.40137130801687765, + "grad_norm": 0.6235065460205078, + "learning_rate": 0.0015, + "loss": 1.6452, + "step": 3805 + }, + { + "epoch": 0.40147679324894514, + "grad_norm": 0.523852527141571, + "learning_rate": 0.0015, + "loss": 1.6339, + "step": 3806 + }, + { + "epoch": 0.40158227848101263, + "grad_norm": 0.6052106022834778, + "learning_rate": 0.0015, + "loss": 1.6541, + "step": 3807 + }, + { + "epoch": 0.4016877637130802, + "grad_norm": 0.6712532043457031, + "learning_rate": 0.0015, + "loss": 1.6944, + "step": 3808 + }, + { + "epoch": 0.4017932489451477, + "grad_norm": 0.5733087658882141, + "learning_rate": 0.0015, + "loss": 1.7142, + "step": 3809 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.6598334312438965, + "learning_rate": 0.0015, + "loss": 1.714, + "step": 3810 + }, + { + "epoch": 0.4020042194092827, + "grad_norm": 0.7662893533706665, + "learning_rate": 0.0015, + "loss": 1.6801, + "step": 3811 + }, + { + "epoch": 0.4021097046413502, + "grad_norm": 0.5452477335929871, + "learning_rate": 0.0015, + "loss": 1.6553, + "step": 3812 + }, + { + "epoch": 0.4022151898734177, + "grad_norm": 0.6024952530860901, + "learning_rate": 0.0015, + "loss": 1.6529, + "step": 3813 + }, + { + "epoch": 0.40232067510548525, + "grad_norm": 0.6055412888526917, + "learning_rate": 0.0015, + "loss": 1.6617, + "step": 3814 + }, + { + "epoch": 0.40242616033755274, + "grad_norm": 0.6142078638076782, + "learning_rate": 0.0015, + "loss": 1.6929, + "step": 3815 + }, + { + "epoch": 0.40253164556962023, + "grad_norm": 0.5741285681724548, + "learning_rate": 0.0015, + "loss": 1.6574, + "step": 3816 + }, + { + "epoch": 0.4026371308016878, + "grad_norm": 0.5197274088859558, + "learning_rate": 0.0015, + "loss": 1.6384, + "step": 3817 + }, + { + "epoch": 0.4027426160337553, + "grad_norm": 0.5791266560554504, + "learning_rate": 0.0015, + "loss": 1.6669, + "step": 3818 + }, + { + "epoch": 0.40284810126582277, + "grad_norm": 0.5819272994995117, + "learning_rate": 0.0015, + "loss": 1.6887, + "step": 3819 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5928919911384583, + "learning_rate": 0.0015, + "loss": 1.6404, + "step": 3820 + }, + { + "epoch": 0.4030590717299578, + "grad_norm": 0.5293534994125366, + "learning_rate": 0.0015, + "loss": 1.6744, + "step": 3821 + }, + { + "epoch": 0.4031645569620253, + "grad_norm": 0.6402086019515991, + "learning_rate": 0.0015, + "loss": 1.6326, + "step": 3822 + }, + { + "epoch": 0.40327004219409285, + "grad_norm": 0.575337827205658, + "learning_rate": 0.0015, + "loss": 1.6765, + "step": 3823 + }, + { + "epoch": 0.40337552742616034, + "grad_norm": 0.5857341289520264, + "learning_rate": 0.0015, + "loss": 1.6779, + "step": 3824 + }, + { + "epoch": 0.40348101265822783, + "grad_norm": 0.6004738211631775, + "learning_rate": 0.0015, + "loss": 1.6775, + "step": 3825 + }, + { + "epoch": 0.4035864978902954, + "grad_norm": 0.5944442749023438, + "learning_rate": 0.0015, + "loss": 1.7099, + "step": 3826 + }, + { + "epoch": 0.4036919831223629, + "grad_norm": 0.46930718421936035, + "learning_rate": 0.0015, + "loss": 1.682, + "step": 3827 + }, + { + "epoch": 0.40379746835443037, + "grad_norm": 0.5892956256866455, + "learning_rate": 0.0015, + "loss": 1.6539, + "step": 3828 + }, + { + "epoch": 0.4039029535864979, + "grad_norm": 0.50191730260849, + "learning_rate": 0.0015, + "loss": 1.6632, + "step": 3829 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.6760581731796265, + "learning_rate": 0.0015, + "loss": 1.6473, + "step": 3830 + }, + { + "epoch": 0.4041139240506329, + "grad_norm": 0.5088383555412292, + "learning_rate": 0.0015, + "loss": 1.6884, + "step": 3831 + }, + { + "epoch": 0.40421940928270045, + "grad_norm": 0.5728570818901062, + "learning_rate": 0.0015, + "loss": 1.7008, + "step": 3832 + }, + { + "epoch": 0.40432489451476794, + "grad_norm": 0.5390267372131348, + "learning_rate": 0.0015, + "loss": 1.6782, + "step": 3833 + }, + { + "epoch": 0.40443037974683543, + "grad_norm": 0.67784184217453, + "learning_rate": 0.0015, + "loss": 1.6698, + "step": 3834 + }, + { + "epoch": 0.4045358649789029, + "grad_norm": 0.5902341604232788, + "learning_rate": 0.0015, + "loss": 1.6753, + "step": 3835 + }, + { + "epoch": 0.4046413502109705, + "grad_norm": 0.6139640808105469, + "learning_rate": 0.0015, + "loss": 1.7032, + "step": 3836 + }, + { + "epoch": 0.40474683544303797, + "grad_norm": 0.5352752804756165, + "learning_rate": 0.0015, + "loss": 1.6542, + "step": 3837 + }, + { + "epoch": 0.40485232067510546, + "grad_norm": 0.6846832036972046, + "learning_rate": 0.0015, + "loss": 1.6845, + "step": 3838 + }, + { + "epoch": 0.404957805907173, + "grad_norm": 0.5652015805244446, + "learning_rate": 0.0015, + "loss": 1.7339, + "step": 3839 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.6455852389335632, + "learning_rate": 0.0015, + "loss": 1.7196, + "step": 3840 + }, + { + "epoch": 0.405168776371308, + "grad_norm": 0.5148798823356628, + "learning_rate": 0.0015, + "loss": 1.7008, + "step": 3841 + }, + { + "epoch": 0.40527426160337554, + "grad_norm": 0.4962198734283447, + "learning_rate": 0.0015, + "loss": 1.6705, + "step": 3842 + }, + { + "epoch": 0.40537974683544303, + "grad_norm": 0.47228044271469116, + "learning_rate": 0.0015, + "loss": 1.6685, + "step": 3843 + }, + { + "epoch": 0.4054852320675105, + "grad_norm": 0.4935452342033386, + "learning_rate": 0.0015, + "loss": 1.6626, + "step": 3844 + }, + { + "epoch": 0.4055907172995781, + "grad_norm": 0.4716067612171173, + "learning_rate": 0.0015, + "loss": 1.6946, + "step": 3845 + }, + { + "epoch": 0.40569620253164557, + "grad_norm": 0.5209110975265503, + "learning_rate": 0.0015, + "loss": 1.6575, + "step": 3846 + }, + { + "epoch": 0.40580168776371306, + "grad_norm": 0.46358731389045715, + "learning_rate": 0.0015, + "loss": 1.646, + "step": 3847 + }, + { + "epoch": 0.4059071729957806, + "grad_norm": 0.4754665791988373, + "learning_rate": 0.0015, + "loss": 1.6636, + "step": 3848 + }, + { + "epoch": 0.4060126582278481, + "grad_norm": 0.5017957091331482, + "learning_rate": 0.0015, + "loss": 1.6498, + "step": 3849 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.4882739782333374, + "learning_rate": 0.0015, + "loss": 1.6542, + "step": 3850 + }, + { + "epoch": 0.40622362869198314, + "grad_norm": 0.5001218318939209, + "learning_rate": 0.0015, + "loss": 1.728, + "step": 3851 + }, + { + "epoch": 0.40632911392405063, + "grad_norm": 0.4525369107723236, + "learning_rate": 0.0015, + "loss": 1.6816, + "step": 3852 + }, + { + "epoch": 0.4064345991561181, + "grad_norm": 0.47382867336273193, + "learning_rate": 0.0015, + "loss": 1.6877, + "step": 3853 + }, + { + "epoch": 0.4065400843881857, + "grad_norm": 0.48720699548721313, + "learning_rate": 0.0015, + "loss": 1.689, + "step": 3854 + }, + { + "epoch": 0.40664556962025317, + "grad_norm": 0.547744631767273, + "learning_rate": 0.0015, + "loss": 1.6695, + "step": 3855 + }, + { + "epoch": 0.40675105485232066, + "grad_norm": 0.47330442070961, + "learning_rate": 0.0015, + "loss": 1.6891, + "step": 3856 + }, + { + "epoch": 0.4068565400843882, + "grad_norm": 0.5014797449111938, + "learning_rate": 0.0015, + "loss": 1.6693, + "step": 3857 + }, + { + "epoch": 0.4069620253164557, + "grad_norm": 0.5758227109909058, + "learning_rate": 0.0015, + "loss": 1.6735, + "step": 3858 + }, + { + "epoch": 0.4070675105485232, + "grad_norm": 0.5283249616622925, + "learning_rate": 0.0015, + "loss": 1.6549, + "step": 3859 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.4935728907585144, + "learning_rate": 0.0015, + "loss": 1.6629, + "step": 3860 + }, + { + "epoch": 0.40727848101265823, + "grad_norm": 0.6933180689811707, + "learning_rate": 0.0015, + "loss": 1.6698, + "step": 3861 + }, + { + "epoch": 0.4073839662447257, + "grad_norm": 0.6887080073356628, + "learning_rate": 0.0015, + "loss": 1.6715, + "step": 3862 + }, + { + "epoch": 0.4074894514767933, + "grad_norm": 0.5577717423439026, + "learning_rate": 0.0015, + "loss": 1.6984, + "step": 3863 + }, + { + "epoch": 0.40759493670886077, + "grad_norm": 0.487842857837677, + "learning_rate": 0.0015, + "loss": 1.6372, + "step": 3864 + }, + { + "epoch": 0.40770042194092826, + "grad_norm": 0.5179023742675781, + "learning_rate": 0.0015, + "loss": 1.6469, + "step": 3865 + }, + { + "epoch": 0.4078059071729958, + "grad_norm": 0.6370635628700256, + "learning_rate": 0.0015, + "loss": 1.7037, + "step": 3866 + }, + { + "epoch": 0.4079113924050633, + "grad_norm": 0.6766543388366699, + "learning_rate": 0.0015, + "loss": 1.6504, + "step": 3867 + }, + { + "epoch": 0.4080168776371308, + "grad_norm": 0.5281383991241455, + "learning_rate": 0.0015, + "loss": 1.6563, + "step": 3868 + }, + { + "epoch": 0.4081223628691983, + "grad_norm": 0.5986506342887878, + "learning_rate": 0.0015, + "loss": 1.71, + "step": 3869 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.7948133945465088, + "learning_rate": 0.0015, + "loss": 1.6859, + "step": 3870 + }, + { + "epoch": 0.4083333333333333, + "grad_norm": 0.6055651903152466, + "learning_rate": 0.0015, + "loss": 1.6562, + "step": 3871 + }, + { + "epoch": 0.4084388185654008, + "grad_norm": 0.6119669079780579, + "learning_rate": 0.0015, + "loss": 1.7158, + "step": 3872 + }, + { + "epoch": 0.40854430379746837, + "grad_norm": 0.7874894142150879, + "learning_rate": 0.0015, + "loss": 1.6664, + "step": 3873 + }, + { + "epoch": 0.40864978902953586, + "grad_norm": 0.45923927426338196, + "learning_rate": 0.0015, + "loss": 1.6798, + "step": 3874 + }, + { + "epoch": 0.40875527426160335, + "grad_norm": 0.5760701298713684, + "learning_rate": 0.0015, + "loss": 1.6773, + "step": 3875 + }, + { + "epoch": 0.4088607594936709, + "grad_norm": 0.5290331244468689, + "learning_rate": 0.0015, + "loss": 1.6343, + "step": 3876 + }, + { + "epoch": 0.4089662447257384, + "grad_norm": 0.49807408452033997, + "learning_rate": 0.0015, + "loss": 1.6388, + "step": 3877 + }, + { + "epoch": 0.4090717299578059, + "grad_norm": 0.6555059552192688, + "learning_rate": 0.0015, + "loss": 1.635, + "step": 3878 + }, + { + "epoch": 0.40917721518987343, + "grad_norm": 0.6197727918624878, + "learning_rate": 0.0015, + "loss": 1.6773, + "step": 3879 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.5036622881889343, + "learning_rate": 0.0015, + "loss": 1.6352, + "step": 3880 + }, + { + "epoch": 0.4093881856540084, + "grad_norm": 0.9330415725708008, + "learning_rate": 0.0015, + "loss": 1.6869, + "step": 3881 + }, + { + "epoch": 0.40949367088607597, + "grad_norm": 0.7329707741737366, + "learning_rate": 0.0015, + "loss": 1.6606, + "step": 3882 + }, + { + "epoch": 0.40959915611814346, + "grad_norm": 0.6673707962036133, + "learning_rate": 0.0015, + "loss": 1.6589, + "step": 3883 + }, + { + "epoch": 0.40970464135021095, + "grad_norm": 1.2548285722732544, + "learning_rate": 0.0015, + "loss": 1.6533, + "step": 3884 + }, + { + "epoch": 0.4098101265822785, + "grad_norm": 0.49074581265449524, + "learning_rate": 0.0015, + "loss": 1.6531, + "step": 3885 + }, + { + "epoch": 0.409915611814346, + "grad_norm": 1.0034308433532715, + "learning_rate": 0.0015, + "loss": 1.6957, + "step": 3886 + }, + { + "epoch": 0.4100210970464135, + "grad_norm": 0.7492836713790894, + "learning_rate": 0.0015, + "loss": 1.6752, + "step": 3887 + }, + { + "epoch": 0.41012658227848103, + "grad_norm": 0.6365775465965271, + "learning_rate": 0.0015, + "loss": 1.6898, + "step": 3888 + }, + { + "epoch": 0.4102320675105485, + "grad_norm": 1.0673199892044067, + "learning_rate": 0.0015, + "loss": 1.6529, + "step": 3889 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.5610344409942627, + "learning_rate": 0.0015, + "loss": 1.6325, + "step": 3890 + }, + { + "epoch": 0.41044303797468357, + "grad_norm": 0.8004913330078125, + "learning_rate": 0.0015, + "loss": 1.6674, + "step": 3891 + }, + { + "epoch": 0.41054852320675106, + "grad_norm": 0.7554417848587036, + "learning_rate": 0.0015, + "loss": 1.7075, + "step": 3892 + }, + { + "epoch": 0.41065400843881855, + "grad_norm": 0.591577410697937, + "learning_rate": 0.0015, + "loss": 1.6813, + "step": 3893 + }, + { + "epoch": 0.4107594936708861, + "grad_norm": 0.8795300722122192, + "learning_rate": 0.0015, + "loss": 1.6723, + "step": 3894 + }, + { + "epoch": 0.4108649789029536, + "grad_norm": 0.5651753544807434, + "learning_rate": 0.0015, + "loss": 1.6678, + "step": 3895 + }, + { + "epoch": 0.4109704641350211, + "grad_norm": 0.7733017802238464, + "learning_rate": 0.0015, + "loss": 1.6354, + "step": 3896 + }, + { + "epoch": 0.41107594936708863, + "grad_norm": 0.5560656785964966, + "learning_rate": 0.0015, + "loss": 1.6528, + "step": 3897 + }, + { + "epoch": 0.4111814345991561, + "grad_norm": 0.5901712775230408, + "learning_rate": 0.0015, + "loss": 1.661, + "step": 3898 + }, + { + "epoch": 0.4112869198312236, + "grad_norm": 0.5299215316772461, + "learning_rate": 0.0015, + "loss": 1.6341, + "step": 3899 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.7282924056053162, + "learning_rate": 0.0015, + "loss": 1.6906, + "step": 3900 + }, + { + "epoch": 0.41149789029535866, + "grad_norm": 0.7884114384651184, + "learning_rate": 0.0015, + "loss": 1.647, + "step": 3901 + }, + { + "epoch": 0.41160337552742615, + "grad_norm": 0.5577768087387085, + "learning_rate": 0.0015, + "loss": 1.7069, + "step": 3902 + }, + { + "epoch": 0.41170886075949364, + "grad_norm": 0.7427865862846375, + "learning_rate": 0.0015, + "loss": 1.671, + "step": 3903 + }, + { + "epoch": 0.4118143459915612, + "grad_norm": 0.5344990491867065, + "learning_rate": 0.0015, + "loss": 1.6519, + "step": 3904 + }, + { + "epoch": 0.4119198312236287, + "grad_norm": 0.6284624934196472, + "learning_rate": 0.0015, + "loss": 1.662, + "step": 3905 + }, + { + "epoch": 0.4120253164556962, + "grad_norm": 0.5771468877792358, + "learning_rate": 0.0015, + "loss": 1.6791, + "step": 3906 + }, + { + "epoch": 0.4121308016877637, + "grad_norm": 0.5664799809455872, + "learning_rate": 0.0015, + "loss": 1.6313, + "step": 3907 + }, + { + "epoch": 0.4122362869198312, + "grad_norm": 0.533612847328186, + "learning_rate": 0.0015, + "loss": 1.6675, + "step": 3908 + }, + { + "epoch": 0.4123417721518987, + "grad_norm": 0.5474640727043152, + "learning_rate": 0.0015, + "loss": 1.6573, + "step": 3909 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.5034437775611877, + "learning_rate": 0.0015, + "loss": 1.6944, + "step": 3910 + }, + { + "epoch": 0.41255274261603375, + "grad_norm": 0.4837951064109802, + "learning_rate": 0.0015, + "loss": 1.6777, + "step": 3911 + }, + { + "epoch": 0.41265822784810124, + "grad_norm": 0.5077068209648132, + "learning_rate": 0.0015, + "loss": 1.7078, + "step": 3912 + }, + { + "epoch": 0.4127637130801688, + "grad_norm": 0.5174497365951538, + "learning_rate": 0.0015, + "loss": 1.6754, + "step": 3913 + }, + { + "epoch": 0.4128691983122363, + "grad_norm": 0.547358512878418, + "learning_rate": 0.0015, + "loss": 1.6041, + "step": 3914 + }, + { + "epoch": 0.4129746835443038, + "grad_norm": 0.5077178478240967, + "learning_rate": 0.0015, + "loss": 1.6516, + "step": 3915 + }, + { + "epoch": 0.4130801687763713, + "grad_norm": 0.5167761445045471, + "learning_rate": 0.0015, + "loss": 1.6626, + "step": 3916 + }, + { + "epoch": 0.4131856540084388, + "grad_norm": 0.6522733569145203, + "learning_rate": 0.0015, + "loss": 1.6408, + "step": 3917 + }, + { + "epoch": 0.4132911392405063, + "grad_norm": 0.4896742105484009, + "learning_rate": 0.0015, + "loss": 1.6819, + "step": 3918 + }, + { + "epoch": 0.41339662447257386, + "grad_norm": 0.6426261067390442, + "learning_rate": 0.0015, + "loss": 1.6478, + "step": 3919 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.5804246068000793, + "learning_rate": 0.0015, + "loss": 1.6911, + "step": 3920 + }, + { + "epoch": 0.41360759493670884, + "grad_norm": 0.4942731261253357, + "learning_rate": 0.0015, + "loss": 1.6418, + "step": 3921 + }, + { + "epoch": 0.4137130801687764, + "grad_norm": 0.6210718750953674, + "learning_rate": 0.0015, + "loss": 1.7146, + "step": 3922 + }, + { + "epoch": 0.4138185654008439, + "grad_norm": 0.5851173400878906, + "learning_rate": 0.0015, + "loss": 1.681, + "step": 3923 + }, + { + "epoch": 0.4139240506329114, + "grad_norm": 0.502113401889801, + "learning_rate": 0.0015, + "loss": 1.6466, + "step": 3924 + }, + { + "epoch": 0.4140295358649789, + "grad_norm": 0.5490169525146484, + "learning_rate": 0.0015, + "loss": 1.6177, + "step": 3925 + }, + { + "epoch": 0.4141350210970464, + "grad_norm": 0.5450034141540527, + "learning_rate": 0.0015, + "loss": 1.668, + "step": 3926 + }, + { + "epoch": 0.4142405063291139, + "grad_norm": 0.6607049107551575, + "learning_rate": 0.0015, + "loss": 1.6594, + "step": 3927 + }, + { + "epoch": 0.41434599156118146, + "grad_norm": 0.5202460885047913, + "learning_rate": 0.0015, + "loss": 1.669, + "step": 3928 + }, + { + "epoch": 0.41445147679324895, + "grad_norm": 0.648923397064209, + "learning_rate": 0.0015, + "loss": 1.6864, + "step": 3929 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.5905894637107849, + "learning_rate": 0.0015, + "loss": 1.6805, + "step": 3930 + }, + { + "epoch": 0.414662447257384, + "grad_norm": 0.6553009748458862, + "learning_rate": 0.0015, + "loss": 1.6409, + "step": 3931 + }, + { + "epoch": 0.4147679324894515, + "grad_norm": 0.728967010974884, + "learning_rate": 0.0015, + "loss": 1.6485, + "step": 3932 + }, + { + "epoch": 0.414873417721519, + "grad_norm": 0.4935154616832733, + "learning_rate": 0.0015, + "loss": 1.6606, + "step": 3933 + }, + { + "epoch": 0.41497890295358647, + "grad_norm": 0.5506753921508789, + "learning_rate": 0.0015, + "loss": 1.6696, + "step": 3934 + }, + { + "epoch": 0.415084388185654, + "grad_norm": 0.5010538697242737, + "learning_rate": 0.0015, + "loss": 1.6827, + "step": 3935 + }, + { + "epoch": 0.4151898734177215, + "grad_norm": 0.5543103814125061, + "learning_rate": 0.0015, + "loss": 1.6587, + "step": 3936 + }, + { + "epoch": 0.415295358649789, + "grad_norm": 0.5275358557701111, + "learning_rate": 0.0015, + "loss": 1.6995, + "step": 3937 + }, + { + "epoch": 0.41540084388185655, + "grad_norm": 0.5093750357627869, + "learning_rate": 0.0015, + "loss": 1.6232, + "step": 3938 + }, + { + "epoch": 0.41550632911392404, + "grad_norm": 0.5419853925704956, + "learning_rate": 0.0015, + "loss": 1.6734, + "step": 3939 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.5158277153968811, + "learning_rate": 0.0015, + "loss": 1.6718, + "step": 3940 + }, + { + "epoch": 0.4157172995780591, + "grad_norm": 0.5883190035820007, + "learning_rate": 0.0015, + "loss": 1.6727, + "step": 3941 + }, + { + "epoch": 0.4158227848101266, + "grad_norm": 0.5016283988952637, + "learning_rate": 0.0015, + "loss": 1.7008, + "step": 3942 + }, + { + "epoch": 0.41592827004219407, + "grad_norm": 0.6370705962181091, + "learning_rate": 0.0015, + "loss": 1.6594, + "step": 3943 + }, + { + "epoch": 0.4160337552742616, + "grad_norm": 0.5819518566131592, + "learning_rate": 0.0015, + "loss": 1.6843, + "step": 3944 + }, + { + "epoch": 0.4161392405063291, + "grad_norm": 0.540444016456604, + "learning_rate": 0.0015, + "loss": 1.6469, + "step": 3945 + }, + { + "epoch": 0.4162447257383966, + "grad_norm": 0.5240791440010071, + "learning_rate": 0.0015, + "loss": 1.6361, + "step": 3946 + }, + { + "epoch": 0.41635021097046415, + "grad_norm": 0.5113990306854248, + "learning_rate": 0.0015, + "loss": 1.6945, + "step": 3947 + }, + { + "epoch": 0.41645569620253164, + "grad_norm": 0.599273145198822, + "learning_rate": 0.0015, + "loss": 1.6811, + "step": 3948 + }, + { + "epoch": 0.41656118143459914, + "grad_norm": 0.510252833366394, + "learning_rate": 0.0015, + "loss": 1.6549, + "step": 3949 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.6712703704833984, + "learning_rate": 0.0015, + "loss": 1.6848, + "step": 3950 + }, + { + "epoch": 0.4167721518987342, + "grad_norm": 0.45828548073768616, + "learning_rate": 0.0015, + "loss": 1.6814, + "step": 3951 + }, + { + "epoch": 0.41687763713080167, + "grad_norm": 0.5784968137741089, + "learning_rate": 0.0015, + "loss": 1.6674, + "step": 3952 + }, + { + "epoch": 0.4169831223628692, + "grad_norm": 0.5279408693313599, + "learning_rate": 0.0015, + "loss": 1.7094, + "step": 3953 + }, + { + "epoch": 0.4170886075949367, + "grad_norm": 0.4787351191043854, + "learning_rate": 0.0015, + "loss": 1.666, + "step": 3954 + }, + { + "epoch": 0.4171940928270042, + "grad_norm": 0.6963359713554382, + "learning_rate": 0.0015, + "loss": 1.6586, + "step": 3955 + }, + { + "epoch": 0.41729957805907175, + "grad_norm": 0.623216450214386, + "learning_rate": 0.0015, + "loss": 1.6478, + "step": 3956 + }, + { + "epoch": 0.41740506329113924, + "grad_norm": 0.5411447286605835, + "learning_rate": 0.0015, + "loss": 1.67, + "step": 3957 + }, + { + "epoch": 0.41751054852320674, + "grad_norm": 0.5261410474777222, + "learning_rate": 0.0015, + "loss": 1.6986, + "step": 3958 + }, + { + "epoch": 0.4176160337552743, + "grad_norm": 0.5788530111312866, + "learning_rate": 0.0015, + "loss": 1.6235, + "step": 3959 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.5429050922393799, + "learning_rate": 0.0015, + "loss": 1.6809, + "step": 3960 + }, + { + "epoch": 0.41782700421940927, + "grad_norm": 0.551701009273529, + "learning_rate": 0.0015, + "loss": 1.6272, + "step": 3961 + }, + { + "epoch": 0.4179324894514768, + "grad_norm": 0.8163561820983887, + "learning_rate": 0.0015, + "loss": 1.6448, + "step": 3962 + }, + { + "epoch": 0.4180379746835443, + "grad_norm": 0.5044242739677429, + "learning_rate": 0.0015, + "loss": 1.6468, + "step": 3963 + }, + { + "epoch": 0.4181434599156118, + "grad_norm": 0.6471570730209351, + "learning_rate": 0.0015, + "loss": 1.6875, + "step": 3964 + }, + { + "epoch": 0.41824894514767935, + "grad_norm": 0.7842706441879272, + "learning_rate": 0.0015, + "loss": 1.6914, + "step": 3965 + }, + { + "epoch": 0.41835443037974684, + "grad_norm": 0.568332850933075, + "learning_rate": 0.0015, + "loss": 1.6541, + "step": 3966 + }, + { + "epoch": 0.41845991561181434, + "grad_norm": 0.5272161364555359, + "learning_rate": 0.0015, + "loss": 1.6552, + "step": 3967 + }, + { + "epoch": 0.41856540084388183, + "grad_norm": 0.5295596718788147, + "learning_rate": 0.0015, + "loss": 1.6694, + "step": 3968 + }, + { + "epoch": 0.4186708860759494, + "grad_norm": 0.5039467811584473, + "learning_rate": 0.0015, + "loss": 1.6773, + "step": 3969 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.5752148628234863, + "learning_rate": 0.0015, + "loss": 1.6357, + "step": 3970 + }, + { + "epoch": 0.41888185654008436, + "grad_norm": 0.5082814693450928, + "learning_rate": 0.0015, + "loss": 1.6872, + "step": 3971 + }, + { + "epoch": 0.4189873417721519, + "grad_norm": 0.501728892326355, + "learning_rate": 0.0015, + "loss": 1.6604, + "step": 3972 + }, + { + "epoch": 0.4190928270042194, + "grad_norm": 0.45531946420669556, + "learning_rate": 0.0015, + "loss": 1.6869, + "step": 3973 + }, + { + "epoch": 0.4191983122362869, + "grad_norm": 0.5419966578483582, + "learning_rate": 0.0015, + "loss": 1.6608, + "step": 3974 + }, + { + "epoch": 0.41930379746835444, + "grad_norm": 0.5244120955467224, + "learning_rate": 0.0015, + "loss": 1.6713, + "step": 3975 + }, + { + "epoch": 0.41940928270042194, + "grad_norm": 0.574030876159668, + "learning_rate": 0.0015, + "loss": 1.6646, + "step": 3976 + }, + { + "epoch": 0.41951476793248943, + "grad_norm": 0.46261152625083923, + "learning_rate": 0.0015, + "loss": 1.6704, + "step": 3977 + }, + { + "epoch": 0.419620253164557, + "grad_norm": 0.5169928073883057, + "learning_rate": 0.0015, + "loss": 1.6222, + "step": 3978 + }, + { + "epoch": 0.41972573839662447, + "grad_norm": 0.44602909684181213, + "learning_rate": 0.0015, + "loss": 1.677, + "step": 3979 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.4354664087295532, + "learning_rate": 0.0015, + "loss": 1.7032, + "step": 3980 + }, + { + "epoch": 0.4199367088607595, + "grad_norm": 0.4882993698120117, + "learning_rate": 0.0015, + "loss": 1.6481, + "step": 3981 + }, + { + "epoch": 0.420042194092827, + "grad_norm": 0.4510979652404785, + "learning_rate": 0.0015, + "loss": 1.6696, + "step": 3982 + }, + { + "epoch": 0.4201476793248945, + "grad_norm": 0.5278876423835754, + "learning_rate": 0.0015, + "loss": 1.6578, + "step": 3983 + }, + { + "epoch": 0.42025316455696204, + "grad_norm": 0.6052321195602417, + "learning_rate": 0.0015, + "loss": 1.6769, + "step": 3984 + }, + { + "epoch": 0.42035864978902954, + "grad_norm": 0.5470100045204163, + "learning_rate": 0.0015, + "loss": 1.6523, + "step": 3985 + }, + { + "epoch": 0.42046413502109703, + "grad_norm": 0.5092055201530457, + "learning_rate": 0.0015, + "loss": 1.6519, + "step": 3986 + }, + { + "epoch": 0.4205696202531646, + "grad_norm": 0.5102941393852234, + "learning_rate": 0.0015, + "loss": 1.643, + "step": 3987 + }, + { + "epoch": 0.42067510548523207, + "grad_norm": 0.48482275009155273, + "learning_rate": 0.0015, + "loss": 1.665, + "step": 3988 + }, + { + "epoch": 0.42078059071729956, + "grad_norm": 0.5031700730323792, + "learning_rate": 0.0015, + "loss": 1.6675, + "step": 3989 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.4970076382160187, + "learning_rate": 0.0015, + "loss": 1.6725, + "step": 3990 + }, + { + "epoch": 0.4209915611814346, + "grad_norm": 0.4832526445388794, + "learning_rate": 0.0015, + "loss": 1.6727, + "step": 3991 + }, + { + "epoch": 0.4210970464135021, + "grad_norm": 0.5345883369445801, + "learning_rate": 0.0015, + "loss": 1.6683, + "step": 3992 + }, + { + "epoch": 0.42120253164556964, + "grad_norm": 0.5662575364112854, + "learning_rate": 0.0015, + "loss": 1.6635, + "step": 3993 + }, + { + "epoch": 0.42130801687763714, + "grad_norm": 0.5999237298965454, + "learning_rate": 0.0015, + "loss": 1.6769, + "step": 3994 + }, + { + "epoch": 0.42141350210970463, + "grad_norm": 0.5003252029418945, + "learning_rate": 0.0015, + "loss": 1.6645, + "step": 3995 + }, + { + "epoch": 0.4215189873417722, + "grad_norm": 0.5532550811767578, + "learning_rate": 0.0015, + "loss": 1.6893, + "step": 3996 + }, + { + "epoch": 0.42162447257383967, + "grad_norm": 0.6924083232879639, + "learning_rate": 0.0015, + "loss": 1.6921, + "step": 3997 + }, + { + "epoch": 0.42172995780590716, + "grad_norm": 0.582459032535553, + "learning_rate": 0.0015, + "loss": 1.6621, + "step": 3998 + }, + { + "epoch": 0.4218354430379747, + "grad_norm": 0.6234803199768066, + "learning_rate": 0.0015, + "loss": 1.6913, + "step": 3999 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.6062827110290527, + "learning_rate": 0.0015, + "loss": 1.6683, + "step": 4000 + }, + { + "epoch": 0.4220464135021097, + "grad_norm": 0.5449821352958679, + "learning_rate": 0.0015, + "loss": 1.6439, + "step": 4001 + }, + { + "epoch": 0.4221518987341772, + "grad_norm": 0.6629012227058411, + "learning_rate": 0.0015, + "loss": 1.6638, + "step": 4002 + }, + { + "epoch": 0.42225738396624474, + "grad_norm": 0.5163016319274902, + "learning_rate": 0.0015, + "loss": 1.6389, + "step": 4003 + }, + { + "epoch": 0.42236286919831223, + "grad_norm": 0.6885299682617188, + "learning_rate": 0.0015, + "loss": 1.6826, + "step": 4004 + }, + { + "epoch": 0.4224683544303797, + "grad_norm": 0.7004706859588623, + "learning_rate": 0.0015, + "loss": 1.6637, + "step": 4005 + }, + { + "epoch": 0.42257383966244727, + "grad_norm": 0.573915958404541, + "learning_rate": 0.0015, + "loss": 1.6827, + "step": 4006 + }, + { + "epoch": 0.42267932489451476, + "grad_norm": 0.657016396522522, + "learning_rate": 0.0015, + "loss": 1.6699, + "step": 4007 + }, + { + "epoch": 0.42278481012658226, + "grad_norm": 0.6307787299156189, + "learning_rate": 0.0015, + "loss": 1.6404, + "step": 4008 + }, + { + "epoch": 0.4228902953586498, + "grad_norm": 0.6289223432540894, + "learning_rate": 0.0015, + "loss": 1.6671, + "step": 4009 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.4873087406158447, + "learning_rate": 0.0015, + "loss": 1.6915, + "step": 4010 + }, + { + "epoch": 0.4231012658227848, + "grad_norm": 0.5554637312889099, + "learning_rate": 0.0015, + "loss": 1.6478, + "step": 4011 + }, + { + "epoch": 0.42320675105485234, + "grad_norm": 0.508516788482666, + "learning_rate": 0.0015, + "loss": 1.6355, + "step": 4012 + }, + { + "epoch": 0.42331223628691983, + "grad_norm": 0.5363007187843323, + "learning_rate": 0.0015, + "loss": 1.6457, + "step": 4013 + }, + { + "epoch": 0.4234177215189873, + "grad_norm": 0.4818873405456543, + "learning_rate": 0.0015, + "loss": 1.6696, + "step": 4014 + }, + { + "epoch": 0.42352320675105487, + "grad_norm": 0.45898446440696716, + "learning_rate": 0.0015, + "loss": 1.6888, + "step": 4015 + }, + { + "epoch": 0.42362869198312236, + "grad_norm": 0.5161341428756714, + "learning_rate": 0.0015, + "loss": 1.6806, + "step": 4016 + }, + { + "epoch": 0.42373417721518986, + "grad_norm": 0.448438435792923, + "learning_rate": 0.0015, + "loss": 1.6496, + "step": 4017 + }, + { + "epoch": 0.4238396624472574, + "grad_norm": 0.4921002686023712, + "learning_rate": 0.0015, + "loss": 1.6778, + "step": 4018 + }, + { + "epoch": 0.4239451476793249, + "grad_norm": 0.5122791528701782, + "learning_rate": 0.0015, + "loss": 1.6428, + "step": 4019 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.5509474873542786, + "learning_rate": 0.0015, + "loss": 1.7036, + "step": 4020 + }, + { + "epoch": 0.42415611814345994, + "grad_norm": 0.5201058387756348, + "learning_rate": 0.0015, + "loss": 1.6388, + "step": 4021 + }, + { + "epoch": 0.42426160337552743, + "grad_norm": 0.4961116313934326, + "learning_rate": 0.0015, + "loss": 1.6838, + "step": 4022 + }, + { + "epoch": 0.4243670886075949, + "grad_norm": 0.5693700313568115, + "learning_rate": 0.0015, + "loss": 1.6423, + "step": 4023 + }, + { + "epoch": 0.42447257383966247, + "grad_norm": 0.550399124622345, + "learning_rate": 0.0015, + "loss": 1.6541, + "step": 4024 + }, + { + "epoch": 0.42457805907172996, + "grad_norm": 0.475479394197464, + "learning_rate": 0.0015, + "loss": 1.6854, + "step": 4025 + }, + { + "epoch": 0.42468354430379746, + "grad_norm": 0.5067935585975647, + "learning_rate": 0.0015, + "loss": 1.6734, + "step": 4026 + }, + { + "epoch": 0.424789029535865, + "grad_norm": 0.5102432370185852, + "learning_rate": 0.0015, + "loss": 1.6296, + "step": 4027 + }, + { + "epoch": 0.4248945147679325, + "grad_norm": 0.5276506543159485, + "learning_rate": 0.0015, + "loss": 1.6656, + "step": 4028 + }, + { + "epoch": 0.425, + "grad_norm": 0.5145576596260071, + "learning_rate": 0.0015, + "loss": 1.6396, + "step": 4029 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.5211308598518372, + "learning_rate": 0.0015, + "loss": 1.664, + "step": 4030 + }, + { + "epoch": 0.42521097046413503, + "grad_norm": 0.5144537687301636, + "learning_rate": 0.0015, + "loss": 1.6439, + "step": 4031 + }, + { + "epoch": 0.4253164556962025, + "grad_norm": 0.5701487064361572, + "learning_rate": 0.0015, + "loss": 1.6957, + "step": 4032 + }, + { + "epoch": 0.42542194092827, + "grad_norm": 0.508618175983429, + "learning_rate": 0.0015, + "loss": 1.6771, + "step": 4033 + }, + { + "epoch": 0.42552742616033756, + "grad_norm": 0.5428481101989746, + "learning_rate": 0.0015, + "loss": 1.6609, + "step": 4034 + }, + { + "epoch": 0.42563291139240506, + "grad_norm": 0.5794505476951599, + "learning_rate": 0.0015, + "loss": 1.6721, + "step": 4035 + }, + { + "epoch": 0.42573839662447255, + "grad_norm": 0.5958222150802612, + "learning_rate": 0.0015, + "loss": 1.6602, + "step": 4036 + }, + { + "epoch": 0.4258438818565401, + "grad_norm": 0.4970637559890747, + "learning_rate": 0.0015, + "loss": 1.6913, + "step": 4037 + }, + { + "epoch": 0.4259493670886076, + "grad_norm": 0.6706885099411011, + "learning_rate": 0.0015, + "loss": 1.6809, + "step": 4038 + }, + { + "epoch": 0.4260548523206751, + "grad_norm": 0.6228687763214111, + "learning_rate": 0.0015, + "loss": 1.6472, + "step": 4039 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5116509199142456, + "learning_rate": 0.0015, + "loss": 1.6557, + "step": 4040 + }, + { + "epoch": 0.4262658227848101, + "grad_norm": 0.5696230530738831, + "learning_rate": 0.0015, + "loss": 1.6606, + "step": 4041 + }, + { + "epoch": 0.4263713080168776, + "grad_norm": 0.5878680944442749, + "learning_rate": 0.0015, + "loss": 1.6875, + "step": 4042 + }, + { + "epoch": 0.42647679324894516, + "grad_norm": 0.5070853233337402, + "learning_rate": 0.0015, + "loss": 1.6489, + "step": 4043 + }, + { + "epoch": 0.42658227848101266, + "grad_norm": 0.5454328060150146, + "learning_rate": 0.0015, + "loss": 1.6657, + "step": 4044 + }, + { + "epoch": 0.42668776371308015, + "grad_norm": 0.6633443236351013, + "learning_rate": 0.0015, + "loss": 1.7154, + "step": 4045 + }, + { + "epoch": 0.4267932489451477, + "grad_norm": 0.4916249215602875, + "learning_rate": 0.0015, + "loss": 1.6465, + "step": 4046 + }, + { + "epoch": 0.4268987341772152, + "grad_norm": 0.710806667804718, + "learning_rate": 0.0015, + "loss": 1.6683, + "step": 4047 + }, + { + "epoch": 0.4270042194092827, + "grad_norm": 0.6222331523895264, + "learning_rate": 0.0015, + "loss": 1.675, + "step": 4048 + }, + { + "epoch": 0.42710970464135023, + "grad_norm": 0.467486172914505, + "learning_rate": 0.0015, + "loss": 1.6515, + "step": 4049 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.6178696751594543, + "learning_rate": 0.0015, + "loss": 1.6746, + "step": 4050 + }, + { + "epoch": 0.4273206751054852, + "grad_norm": 0.516190230846405, + "learning_rate": 0.0015, + "loss": 1.6403, + "step": 4051 + }, + { + "epoch": 0.42742616033755276, + "grad_norm": 0.520756721496582, + "learning_rate": 0.0015, + "loss": 1.6978, + "step": 4052 + }, + { + "epoch": 0.42753164556962026, + "grad_norm": 0.7068386673927307, + "learning_rate": 0.0015, + "loss": 1.6823, + "step": 4053 + }, + { + "epoch": 0.42763713080168775, + "grad_norm": 0.510657787322998, + "learning_rate": 0.0015, + "loss": 1.6672, + "step": 4054 + }, + { + "epoch": 0.4277426160337553, + "grad_norm": 0.5260396003723145, + "learning_rate": 0.0015, + "loss": 1.6803, + "step": 4055 + }, + { + "epoch": 0.4278481012658228, + "grad_norm": 0.779559850692749, + "learning_rate": 0.0015, + "loss": 1.6562, + "step": 4056 + }, + { + "epoch": 0.4279535864978903, + "grad_norm": 0.5969604253768921, + "learning_rate": 0.0015, + "loss": 1.6619, + "step": 4057 + }, + { + "epoch": 0.42805907172995783, + "grad_norm": 0.5348222255706787, + "learning_rate": 0.0015, + "loss": 1.6853, + "step": 4058 + }, + { + "epoch": 0.4281645569620253, + "grad_norm": 0.6839258074760437, + "learning_rate": 0.0015, + "loss": 1.7112, + "step": 4059 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.4636399447917938, + "learning_rate": 0.0015, + "loss": 1.6606, + "step": 4060 + }, + { + "epoch": 0.42837552742616036, + "grad_norm": 0.6213461756706238, + "learning_rate": 0.0015, + "loss": 1.6851, + "step": 4061 + }, + { + "epoch": 0.42848101265822786, + "grad_norm": 0.5466126799583435, + "learning_rate": 0.0015, + "loss": 1.667, + "step": 4062 + }, + { + "epoch": 0.42858649789029535, + "grad_norm": 0.4445098340511322, + "learning_rate": 0.0015, + "loss": 1.6253, + "step": 4063 + }, + { + "epoch": 0.4286919831223629, + "grad_norm": 0.5658165216445923, + "learning_rate": 0.0015, + "loss": 1.6474, + "step": 4064 + }, + { + "epoch": 0.4287974683544304, + "grad_norm": 0.547552227973938, + "learning_rate": 0.0015, + "loss": 1.7097, + "step": 4065 + }, + { + "epoch": 0.4289029535864979, + "grad_norm": 0.6399693489074707, + "learning_rate": 0.0015, + "loss": 1.6581, + "step": 4066 + }, + { + "epoch": 0.4290084388185654, + "grad_norm": 0.47368189692497253, + "learning_rate": 0.0015, + "loss": 1.6551, + "step": 4067 + }, + { + "epoch": 0.4291139240506329, + "grad_norm": 0.504464864730835, + "learning_rate": 0.0015, + "loss": 1.6445, + "step": 4068 + }, + { + "epoch": 0.4292194092827004, + "grad_norm": 0.4534807801246643, + "learning_rate": 0.0015, + "loss": 1.709, + "step": 4069 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.5343837141990662, + "learning_rate": 0.0015, + "loss": 1.6656, + "step": 4070 + }, + { + "epoch": 0.42943037974683546, + "grad_norm": 0.4665624797344208, + "learning_rate": 0.0015, + "loss": 1.7028, + "step": 4071 + }, + { + "epoch": 0.42953586497890295, + "grad_norm": 0.5926090478897095, + "learning_rate": 0.0015, + "loss": 1.6518, + "step": 4072 + }, + { + "epoch": 0.42964135021097044, + "grad_norm": 0.5662062764167786, + "learning_rate": 0.0015, + "loss": 1.6537, + "step": 4073 + }, + { + "epoch": 0.429746835443038, + "grad_norm": 0.5438088774681091, + "learning_rate": 0.0015, + "loss": 1.6588, + "step": 4074 + }, + { + "epoch": 0.4298523206751055, + "grad_norm": 0.7898258566856384, + "learning_rate": 0.0015, + "loss": 1.7159, + "step": 4075 + }, + { + "epoch": 0.429957805907173, + "grad_norm": 0.6432499885559082, + "learning_rate": 0.0015, + "loss": 1.6759, + "step": 4076 + }, + { + "epoch": 0.4300632911392405, + "grad_norm": 0.5525928139686584, + "learning_rate": 0.0015, + "loss": 1.6838, + "step": 4077 + }, + { + "epoch": 0.430168776371308, + "grad_norm": 0.6819308400154114, + "learning_rate": 0.0015, + "loss": 1.6729, + "step": 4078 + }, + { + "epoch": 0.4302742616033755, + "grad_norm": 0.5028029084205627, + "learning_rate": 0.0015, + "loss": 1.6731, + "step": 4079 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.7023353576660156, + "learning_rate": 0.0015, + "loss": 1.6505, + "step": 4080 + }, + { + "epoch": 0.43048523206751055, + "grad_norm": 0.5001094937324524, + "learning_rate": 0.0015, + "loss": 1.6696, + "step": 4081 + }, + { + "epoch": 0.43059071729957804, + "grad_norm": 0.6945774555206299, + "learning_rate": 0.0015, + "loss": 1.696, + "step": 4082 + }, + { + "epoch": 0.4306962025316456, + "grad_norm": 0.7422482967376709, + "learning_rate": 0.0015, + "loss": 1.7003, + "step": 4083 + }, + { + "epoch": 0.4308016877637131, + "grad_norm": 0.5532791018486023, + "learning_rate": 0.0015, + "loss": 1.6301, + "step": 4084 + }, + { + "epoch": 0.4309071729957806, + "grad_norm": 0.6867392659187317, + "learning_rate": 0.0015, + "loss": 1.6546, + "step": 4085 + }, + { + "epoch": 0.4310126582278481, + "grad_norm": 0.7010858654975891, + "learning_rate": 0.0015, + "loss": 1.6825, + "step": 4086 + }, + { + "epoch": 0.4311181434599156, + "grad_norm": 0.5495920181274414, + "learning_rate": 0.0015, + "loss": 1.684, + "step": 4087 + }, + { + "epoch": 0.4312236286919831, + "grad_norm": 0.778850257396698, + "learning_rate": 0.0015, + "loss": 1.6569, + "step": 4088 + }, + { + "epoch": 0.43132911392405066, + "grad_norm": 0.7368293404579163, + "learning_rate": 0.0015, + "loss": 1.6376, + "step": 4089 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.49126002192497253, + "learning_rate": 0.0015, + "loss": 1.647, + "step": 4090 + }, + { + "epoch": 0.43154008438818564, + "grad_norm": 0.7283414006233215, + "learning_rate": 0.0015, + "loss": 1.7055, + "step": 4091 + }, + { + "epoch": 0.4316455696202532, + "grad_norm": 0.547957181930542, + "learning_rate": 0.0015, + "loss": 1.6671, + "step": 4092 + }, + { + "epoch": 0.4317510548523207, + "grad_norm": 0.5564680099487305, + "learning_rate": 0.0015, + "loss": 1.6707, + "step": 4093 + }, + { + "epoch": 0.4318565400843882, + "grad_norm": 0.5678845047950745, + "learning_rate": 0.0015, + "loss": 1.6775, + "step": 4094 + }, + { + "epoch": 0.4319620253164557, + "grad_norm": 0.614836573600769, + "learning_rate": 0.0015, + "loss": 1.6653, + "step": 4095 + }, + { + "epoch": 0.4320675105485232, + "grad_norm": 0.5518618226051331, + "learning_rate": 0.0015, + "loss": 1.6632, + "step": 4096 + }, + { + "epoch": 0.4321729957805907, + "grad_norm": 0.49032047390937805, + "learning_rate": 0.0015, + "loss": 1.6562, + "step": 4097 + }, + { + "epoch": 0.43227848101265826, + "grad_norm": 0.5626969933509827, + "learning_rate": 0.0015, + "loss": 1.677, + "step": 4098 + }, + { + "epoch": 0.43238396624472575, + "grad_norm": 0.4826257824897766, + "learning_rate": 0.0015, + "loss": 1.6186, + "step": 4099 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.5447328090667725, + "learning_rate": 0.0015, + "loss": 1.6441, + "step": 4100 + }, + { + "epoch": 0.43259493670886073, + "grad_norm": 0.523389458656311, + "learning_rate": 0.0015, + "loss": 1.6542, + "step": 4101 + }, + { + "epoch": 0.4327004219409283, + "grad_norm": 0.46412360668182373, + "learning_rate": 0.0015, + "loss": 1.6321, + "step": 4102 + }, + { + "epoch": 0.4328059071729958, + "grad_norm": 0.49491190910339355, + "learning_rate": 0.0015, + "loss": 1.6035, + "step": 4103 + }, + { + "epoch": 0.43291139240506327, + "grad_norm": 0.478269100189209, + "learning_rate": 0.0015, + "loss": 1.6544, + "step": 4104 + }, + { + "epoch": 0.4330168776371308, + "grad_norm": 0.4965742528438568, + "learning_rate": 0.0015, + "loss": 1.6399, + "step": 4105 + }, + { + "epoch": 0.4331223628691983, + "grad_norm": 0.49467846751213074, + "learning_rate": 0.0015, + "loss": 1.6821, + "step": 4106 + }, + { + "epoch": 0.4332278481012658, + "grad_norm": 0.5212065577507019, + "learning_rate": 0.0015, + "loss": 1.6796, + "step": 4107 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.484543114900589, + "learning_rate": 0.0015, + "loss": 1.6438, + "step": 4108 + }, + { + "epoch": 0.43343881856540084, + "grad_norm": 0.47857120633125305, + "learning_rate": 0.0015, + "loss": 1.6884, + "step": 4109 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.497016042470932, + "learning_rate": 0.0015, + "loss": 1.6906, + "step": 4110 + }, + { + "epoch": 0.4336497890295359, + "grad_norm": 0.5423179864883423, + "learning_rate": 0.0015, + "loss": 1.6412, + "step": 4111 + }, + { + "epoch": 0.4337552742616034, + "grad_norm": 0.6989853382110596, + "learning_rate": 0.0015, + "loss": 1.6309, + "step": 4112 + }, + { + "epoch": 0.43386075949367087, + "grad_norm": 0.6580510139465332, + "learning_rate": 0.0015, + "loss": 1.671, + "step": 4113 + }, + { + "epoch": 0.4339662447257384, + "grad_norm": 0.5103244185447693, + "learning_rate": 0.0015, + "loss": 1.6546, + "step": 4114 + }, + { + "epoch": 0.4340717299578059, + "grad_norm": 0.6883593797683716, + "learning_rate": 0.0015, + "loss": 1.6612, + "step": 4115 + }, + { + "epoch": 0.4341772151898734, + "grad_norm": 0.48387381434440613, + "learning_rate": 0.0015, + "loss": 1.6506, + "step": 4116 + }, + { + "epoch": 0.43428270042194095, + "grad_norm": 0.8011125326156616, + "learning_rate": 0.0015, + "loss": 1.6675, + "step": 4117 + }, + { + "epoch": 0.43438818565400844, + "grad_norm": 0.7306374907493591, + "learning_rate": 0.0015, + "loss": 1.6583, + "step": 4118 + }, + { + "epoch": 0.43449367088607593, + "grad_norm": 0.6169559955596924, + "learning_rate": 0.0015, + "loss": 1.6843, + "step": 4119 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.8769050240516663, + "learning_rate": 0.0015, + "loss": 1.6846, + "step": 4120 + }, + { + "epoch": 0.434704641350211, + "grad_norm": 0.5737379193305969, + "learning_rate": 0.0015, + "loss": 1.6742, + "step": 4121 + }, + { + "epoch": 0.43481012658227847, + "grad_norm": 0.6177200675010681, + "learning_rate": 0.0015, + "loss": 1.6509, + "step": 4122 + }, + { + "epoch": 0.434915611814346, + "grad_norm": 0.5053166151046753, + "learning_rate": 0.0015, + "loss": 1.6283, + "step": 4123 + }, + { + "epoch": 0.4350210970464135, + "grad_norm": 0.6265692114830017, + "learning_rate": 0.0015, + "loss": 1.6743, + "step": 4124 + }, + { + "epoch": 0.435126582278481, + "grad_norm": 0.47219255566596985, + "learning_rate": 0.0015, + "loss": 1.6459, + "step": 4125 + }, + { + "epoch": 0.43523206751054855, + "grad_norm": 0.6443513631820679, + "learning_rate": 0.0015, + "loss": 1.6619, + "step": 4126 + }, + { + "epoch": 0.43533755274261604, + "grad_norm": 0.5777150392532349, + "learning_rate": 0.0015, + "loss": 1.6495, + "step": 4127 + }, + { + "epoch": 0.43544303797468353, + "grad_norm": 0.5461890697479248, + "learning_rate": 0.0015, + "loss": 1.6613, + "step": 4128 + }, + { + "epoch": 0.4355485232067511, + "grad_norm": 0.5516852736473083, + "learning_rate": 0.0015, + "loss": 1.6762, + "step": 4129 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.5438812375068665, + "learning_rate": 0.0015, + "loss": 1.6687, + "step": 4130 + }, + { + "epoch": 0.43575949367088607, + "grad_norm": 0.5853663682937622, + "learning_rate": 0.0015, + "loss": 1.6746, + "step": 4131 + }, + { + "epoch": 0.43586497890295356, + "grad_norm": 0.6321829557418823, + "learning_rate": 0.0015, + "loss": 1.6493, + "step": 4132 + }, + { + "epoch": 0.4359704641350211, + "grad_norm": 0.5404390692710876, + "learning_rate": 0.0015, + "loss": 1.6841, + "step": 4133 + }, + { + "epoch": 0.4360759493670886, + "grad_norm": 0.5971294045448303, + "learning_rate": 0.0015, + "loss": 1.6786, + "step": 4134 + }, + { + "epoch": 0.4361814345991561, + "grad_norm": 0.5202499628067017, + "learning_rate": 0.0015, + "loss": 1.6258, + "step": 4135 + }, + { + "epoch": 0.43628691983122364, + "grad_norm": 0.5141414999961853, + "learning_rate": 0.0015, + "loss": 1.6493, + "step": 4136 + }, + { + "epoch": 0.43639240506329113, + "grad_norm": 0.6311295628547668, + "learning_rate": 0.0015, + "loss": 1.649, + "step": 4137 + }, + { + "epoch": 0.4364978902953586, + "grad_norm": 0.56368488073349, + "learning_rate": 0.0015, + "loss": 1.6611, + "step": 4138 + }, + { + "epoch": 0.4366033755274262, + "grad_norm": 0.5762485265731812, + "learning_rate": 0.0015, + "loss": 1.6692, + "step": 4139 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.6775660514831543, + "learning_rate": 0.0015, + "loss": 1.6498, + "step": 4140 + }, + { + "epoch": 0.43681434599156116, + "grad_norm": 0.7037628889083862, + "learning_rate": 0.0015, + "loss": 1.6521, + "step": 4141 + }, + { + "epoch": 0.4369198312236287, + "grad_norm": 0.6395363211631775, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 4142 + }, + { + "epoch": 0.4370253164556962, + "grad_norm": 0.630176842212677, + "learning_rate": 0.0015, + "loss": 1.675, + "step": 4143 + }, + { + "epoch": 0.4371308016877637, + "grad_norm": 0.549116849899292, + "learning_rate": 0.0015, + "loss": 1.6775, + "step": 4144 + }, + { + "epoch": 0.43723628691983124, + "grad_norm": 0.6416967511177063, + "learning_rate": 0.0015, + "loss": 1.6469, + "step": 4145 + }, + { + "epoch": 0.43734177215189873, + "grad_norm": 0.47405681014060974, + "learning_rate": 0.0015, + "loss": 1.6487, + "step": 4146 + }, + { + "epoch": 0.4374472573839662, + "grad_norm": 0.5568174123764038, + "learning_rate": 0.0015, + "loss": 1.6764, + "step": 4147 + }, + { + "epoch": 0.4375527426160338, + "grad_norm": 0.49878600239753723, + "learning_rate": 0.0015, + "loss": 1.7047, + "step": 4148 + }, + { + "epoch": 0.43765822784810127, + "grad_norm": 0.5601268410682678, + "learning_rate": 0.0015, + "loss": 1.6497, + "step": 4149 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.6269808411598206, + "learning_rate": 0.0015, + "loss": 1.6658, + "step": 4150 + }, + { + "epoch": 0.4378691983122363, + "grad_norm": 0.5254350304603577, + "learning_rate": 0.0015, + "loss": 1.6674, + "step": 4151 + }, + { + "epoch": 0.4379746835443038, + "grad_norm": 0.5711461901664734, + "learning_rate": 0.0015, + "loss": 1.6834, + "step": 4152 + }, + { + "epoch": 0.4380801687763713, + "grad_norm": 0.640809178352356, + "learning_rate": 0.0015, + "loss": 1.6707, + "step": 4153 + }, + { + "epoch": 0.43818565400843884, + "grad_norm": 0.665875256061554, + "learning_rate": 0.0015, + "loss": 1.6188, + "step": 4154 + }, + { + "epoch": 0.43829113924050633, + "grad_norm": 0.6013915538787842, + "learning_rate": 0.0015, + "loss": 1.661, + "step": 4155 + }, + { + "epoch": 0.4383966244725738, + "grad_norm": 0.5874208807945251, + "learning_rate": 0.0015, + "loss": 1.6708, + "step": 4156 + }, + { + "epoch": 0.4385021097046414, + "grad_norm": 0.5663081407546997, + "learning_rate": 0.0015, + "loss": 1.6761, + "step": 4157 + }, + { + "epoch": 0.43860759493670887, + "grad_norm": 0.7340829968452454, + "learning_rate": 0.0015, + "loss": 1.6359, + "step": 4158 + }, + { + "epoch": 0.43871308016877636, + "grad_norm": 0.649359405040741, + "learning_rate": 0.0015, + "loss": 1.6562, + "step": 4159 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.5673907995223999, + "learning_rate": 0.0015, + "loss": 1.6347, + "step": 4160 + }, + { + "epoch": 0.4389240506329114, + "grad_norm": 0.5553560256958008, + "learning_rate": 0.0015, + "loss": 1.6557, + "step": 4161 + }, + { + "epoch": 0.4390295358649789, + "grad_norm": 0.5494565367698669, + "learning_rate": 0.0015, + "loss": 1.6755, + "step": 4162 + }, + { + "epoch": 0.43913502109704644, + "grad_norm": 0.653363823890686, + "learning_rate": 0.0015, + "loss": 1.6753, + "step": 4163 + }, + { + "epoch": 0.43924050632911393, + "grad_norm": 0.6180105209350586, + "learning_rate": 0.0015, + "loss": 1.6262, + "step": 4164 + }, + { + "epoch": 0.4393459915611814, + "grad_norm": 0.5122617483139038, + "learning_rate": 0.0015, + "loss": 1.6662, + "step": 4165 + }, + { + "epoch": 0.4394514767932489, + "grad_norm": 0.4645233452320099, + "learning_rate": 0.0015, + "loss": 1.6479, + "step": 4166 + }, + { + "epoch": 0.43955696202531647, + "grad_norm": 0.5114132165908813, + "learning_rate": 0.0015, + "loss": 1.6444, + "step": 4167 + }, + { + "epoch": 0.43966244725738396, + "grad_norm": 0.46179649233818054, + "learning_rate": 0.0015, + "loss": 1.657, + "step": 4168 + }, + { + "epoch": 0.43976793248945145, + "grad_norm": 0.5351398587226868, + "learning_rate": 0.0015, + "loss": 1.67, + "step": 4169 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.5336154103279114, + "learning_rate": 0.0015, + "loss": 1.6925, + "step": 4170 + }, + { + "epoch": 0.4399789029535865, + "grad_norm": 0.5214482545852661, + "learning_rate": 0.0015, + "loss": 1.6554, + "step": 4171 + }, + { + "epoch": 0.440084388185654, + "grad_norm": 0.5894679427146912, + "learning_rate": 0.0015, + "loss": 1.6484, + "step": 4172 + }, + { + "epoch": 0.44018987341772153, + "grad_norm": 0.6982522010803223, + "learning_rate": 0.0015, + "loss": 1.6913, + "step": 4173 + }, + { + "epoch": 0.440295358649789, + "grad_norm": 0.671720564365387, + "learning_rate": 0.0015, + "loss": 1.6704, + "step": 4174 + }, + { + "epoch": 0.4404008438818565, + "grad_norm": 0.5447161197662354, + "learning_rate": 0.0015, + "loss": 1.6359, + "step": 4175 + }, + { + "epoch": 0.44050632911392407, + "grad_norm": 0.583261251449585, + "learning_rate": 0.0015, + "loss": 1.6277, + "step": 4176 + }, + { + "epoch": 0.44061181434599156, + "grad_norm": 0.59373939037323, + "learning_rate": 0.0015, + "loss": 1.6726, + "step": 4177 + }, + { + "epoch": 0.44071729957805905, + "grad_norm": 0.6165011525154114, + "learning_rate": 0.0015, + "loss": 1.6313, + "step": 4178 + }, + { + "epoch": 0.4408227848101266, + "grad_norm": 0.5901514887809753, + "learning_rate": 0.0015, + "loss": 1.6789, + "step": 4179 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.7335607409477234, + "learning_rate": 0.0015, + "loss": 1.6407, + "step": 4180 + }, + { + "epoch": 0.4410337552742616, + "grad_norm": 0.6217585206031799, + "learning_rate": 0.0015, + "loss": 1.6664, + "step": 4181 + }, + { + "epoch": 0.44113924050632913, + "grad_norm": 0.47675660252571106, + "learning_rate": 0.0015, + "loss": 1.6681, + "step": 4182 + }, + { + "epoch": 0.4412447257383966, + "grad_norm": 0.6070347428321838, + "learning_rate": 0.0015, + "loss": 1.6508, + "step": 4183 + }, + { + "epoch": 0.4413502109704641, + "grad_norm": 0.5597372651100159, + "learning_rate": 0.0015, + "loss": 1.6563, + "step": 4184 + }, + { + "epoch": 0.44145569620253167, + "grad_norm": 0.5119388699531555, + "learning_rate": 0.0015, + "loss": 1.6509, + "step": 4185 + }, + { + "epoch": 0.44156118143459916, + "grad_norm": 0.5131213665008545, + "learning_rate": 0.0015, + "loss": 1.6509, + "step": 4186 + }, + { + "epoch": 0.44166666666666665, + "grad_norm": 0.5310493111610413, + "learning_rate": 0.0015, + "loss": 1.6765, + "step": 4187 + }, + { + "epoch": 0.4417721518987342, + "grad_norm": 0.6309808492660522, + "learning_rate": 0.0015, + "loss": 1.6822, + "step": 4188 + }, + { + "epoch": 0.4418776371308017, + "grad_norm": 0.5191870331764221, + "learning_rate": 0.0015, + "loss": 1.6573, + "step": 4189 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.5047029852867126, + "learning_rate": 0.0015, + "loss": 1.6729, + "step": 4190 + }, + { + "epoch": 0.44208860759493673, + "grad_norm": 0.5043741464614868, + "learning_rate": 0.0015, + "loss": 1.6488, + "step": 4191 + }, + { + "epoch": 0.4421940928270042, + "grad_norm": 0.5557615756988525, + "learning_rate": 0.0015, + "loss": 1.6806, + "step": 4192 + }, + { + "epoch": 0.4422995780590717, + "grad_norm": 0.5199964046478271, + "learning_rate": 0.0015, + "loss": 1.6463, + "step": 4193 + }, + { + "epoch": 0.44240506329113927, + "grad_norm": 0.6202685236930847, + "learning_rate": 0.0015, + "loss": 1.6954, + "step": 4194 + }, + { + "epoch": 0.44251054852320676, + "grad_norm": 0.49905499815940857, + "learning_rate": 0.0015, + "loss": 1.6574, + "step": 4195 + }, + { + "epoch": 0.44261603375527425, + "grad_norm": 0.6017361283302307, + "learning_rate": 0.0015, + "loss": 1.6308, + "step": 4196 + }, + { + "epoch": 0.44272151898734174, + "grad_norm": 0.6024526357650757, + "learning_rate": 0.0015, + "loss": 1.6564, + "step": 4197 + }, + { + "epoch": 0.4428270042194093, + "grad_norm": 0.5642350912094116, + "learning_rate": 0.0015, + "loss": 1.6663, + "step": 4198 + }, + { + "epoch": 0.4429324894514768, + "grad_norm": 0.6400055289268494, + "learning_rate": 0.0015, + "loss": 1.6394, + "step": 4199 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.48151862621307373, + "learning_rate": 0.0015, + "loss": 1.6997, + "step": 4200 + }, + { + "epoch": 0.4431434599156118, + "grad_norm": 0.6156302094459534, + "learning_rate": 0.0015, + "loss": 1.6637, + "step": 4201 + }, + { + "epoch": 0.4432489451476793, + "grad_norm": 0.5519492626190186, + "learning_rate": 0.0015, + "loss": 1.6594, + "step": 4202 + }, + { + "epoch": 0.4433544303797468, + "grad_norm": 0.5420129299163818, + "learning_rate": 0.0015, + "loss": 1.6267, + "step": 4203 + }, + { + "epoch": 0.44345991561181436, + "grad_norm": 0.5757969617843628, + "learning_rate": 0.0015, + "loss": 1.6709, + "step": 4204 + }, + { + "epoch": 0.44356540084388185, + "grad_norm": 0.5687883496284485, + "learning_rate": 0.0015, + "loss": 1.6236, + "step": 4205 + }, + { + "epoch": 0.44367088607594934, + "grad_norm": 0.5870119333267212, + "learning_rate": 0.0015, + "loss": 1.6791, + "step": 4206 + }, + { + "epoch": 0.4437763713080169, + "grad_norm": 0.5836820006370544, + "learning_rate": 0.0015, + "loss": 1.6354, + "step": 4207 + }, + { + "epoch": 0.4438818565400844, + "grad_norm": 0.5888475775718689, + "learning_rate": 0.0015, + "loss": 1.6497, + "step": 4208 + }, + { + "epoch": 0.4439873417721519, + "grad_norm": 0.6259241700172424, + "learning_rate": 0.0015, + "loss": 1.6546, + "step": 4209 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.5528420209884644, + "learning_rate": 0.0015, + "loss": 1.6648, + "step": 4210 + }, + { + "epoch": 0.4441983122362869, + "grad_norm": 0.5624960064888, + "learning_rate": 0.0015, + "loss": 1.682, + "step": 4211 + }, + { + "epoch": 0.4443037974683544, + "grad_norm": 0.571373462677002, + "learning_rate": 0.0015, + "loss": 1.6479, + "step": 4212 + }, + { + "epoch": 0.44440928270042196, + "grad_norm": 0.5219664573669434, + "learning_rate": 0.0015, + "loss": 1.6802, + "step": 4213 + }, + { + "epoch": 0.44451476793248945, + "grad_norm": 0.59315425157547, + "learning_rate": 0.0015, + "loss": 1.6413, + "step": 4214 + }, + { + "epoch": 0.44462025316455694, + "grad_norm": 0.6662893295288086, + "learning_rate": 0.0015, + "loss": 1.6371, + "step": 4215 + }, + { + "epoch": 0.4447257383966245, + "grad_norm": 0.6315421462059021, + "learning_rate": 0.0015, + "loss": 1.6688, + "step": 4216 + }, + { + "epoch": 0.444831223628692, + "grad_norm": 0.45515403151512146, + "learning_rate": 0.0015, + "loss": 1.6564, + "step": 4217 + }, + { + "epoch": 0.4449367088607595, + "grad_norm": 0.7152280211448669, + "learning_rate": 0.0015, + "loss": 1.6295, + "step": 4218 + }, + { + "epoch": 0.445042194092827, + "grad_norm": 0.7748788595199585, + "learning_rate": 0.0015, + "loss": 1.6777, + "step": 4219 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.6827630996704102, + "learning_rate": 0.0015, + "loss": 1.6737, + "step": 4220 + }, + { + "epoch": 0.445253164556962, + "grad_norm": 0.5411749482154846, + "learning_rate": 0.0015, + "loss": 1.6607, + "step": 4221 + }, + { + "epoch": 0.44535864978902956, + "grad_norm": 0.6788249015808105, + "learning_rate": 0.0015, + "loss": 1.6741, + "step": 4222 + }, + { + "epoch": 0.44546413502109705, + "grad_norm": 0.5095070600509644, + "learning_rate": 0.0015, + "loss": 1.6767, + "step": 4223 + }, + { + "epoch": 0.44556962025316454, + "grad_norm": 0.5586391687393188, + "learning_rate": 0.0015, + "loss": 1.6403, + "step": 4224 + }, + { + "epoch": 0.4456751054852321, + "grad_norm": 0.5388392806053162, + "learning_rate": 0.0015, + "loss": 1.6549, + "step": 4225 + }, + { + "epoch": 0.4457805907172996, + "grad_norm": 0.5191367864608765, + "learning_rate": 0.0015, + "loss": 1.6517, + "step": 4226 + }, + { + "epoch": 0.4458860759493671, + "grad_norm": 0.5413681864738464, + "learning_rate": 0.0015, + "loss": 1.6524, + "step": 4227 + }, + { + "epoch": 0.4459915611814346, + "grad_norm": 0.44679126143455505, + "learning_rate": 0.0015, + "loss": 1.669, + "step": 4228 + }, + { + "epoch": 0.4460970464135021, + "grad_norm": 0.5609684586524963, + "learning_rate": 0.0015, + "loss": 1.6519, + "step": 4229 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.4728687107563019, + "learning_rate": 0.0015, + "loss": 1.6526, + "step": 4230 + }, + { + "epoch": 0.4463080168776371, + "grad_norm": 0.6355165243148804, + "learning_rate": 0.0015, + "loss": 1.7058, + "step": 4231 + }, + { + "epoch": 0.44641350210970465, + "grad_norm": 0.4868917763233185, + "learning_rate": 0.0015, + "loss": 1.6585, + "step": 4232 + }, + { + "epoch": 0.44651898734177214, + "grad_norm": 0.5380212664604187, + "learning_rate": 0.0015, + "loss": 1.6511, + "step": 4233 + }, + { + "epoch": 0.44662447257383964, + "grad_norm": 0.5393729209899902, + "learning_rate": 0.0015, + "loss": 1.6858, + "step": 4234 + }, + { + "epoch": 0.4467299578059072, + "grad_norm": 0.559303879737854, + "learning_rate": 0.0015, + "loss": 1.6569, + "step": 4235 + }, + { + "epoch": 0.4468354430379747, + "grad_norm": 0.5218153595924377, + "learning_rate": 0.0015, + "loss": 1.6294, + "step": 4236 + }, + { + "epoch": 0.44694092827004217, + "grad_norm": 0.5358302593231201, + "learning_rate": 0.0015, + "loss": 1.6735, + "step": 4237 + }, + { + "epoch": 0.4470464135021097, + "grad_norm": 0.6105931401252747, + "learning_rate": 0.0015, + "loss": 1.6052, + "step": 4238 + }, + { + "epoch": 0.4471518987341772, + "grad_norm": 0.5396553874015808, + "learning_rate": 0.0015, + "loss": 1.6401, + "step": 4239 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.47959256172180176, + "learning_rate": 0.0015, + "loss": 1.6405, + "step": 4240 + }, + { + "epoch": 0.44736286919831225, + "grad_norm": 0.6182860732078552, + "learning_rate": 0.0015, + "loss": 1.6376, + "step": 4241 + }, + { + "epoch": 0.44746835443037974, + "grad_norm": 0.552215039730072, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 4242 + }, + { + "epoch": 0.44757383966244724, + "grad_norm": 0.59195876121521, + "learning_rate": 0.0015, + "loss": 1.6673, + "step": 4243 + }, + { + "epoch": 0.4476793248945148, + "grad_norm": 0.5790740847587585, + "learning_rate": 0.0015, + "loss": 1.6685, + "step": 4244 + }, + { + "epoch": 0.4477848101265823, + "grad_norm": 0.5079163908958435, + "learning_rate": 0.0015, + "loss": 1.6931, + "step": 4245 + }, + { + "epoch": 0.44789029535864977, + "grad_norm": 0.6338910460472107, + "learning_rate": 0.0015, + "loss": 1.6364, + "step": 4246 + }, + { + "epoch": 0.4479957805907173, + "grad_norm": 0.7259997725486755, + "learning_rate": 0.0015, + "loss": 1.6124, + "step": 4247 + }, + { + "epoch": 0.4481012658227848, + "grad_norm": 0.5896361470222473, + "learning_rate": 0.0015, + "loss": 1.6721, + "step": 4248 + }, + { + "epoch": 0.4482067510548523, + "grad_norm": 0.6032668948173523, + "learning_rate": 0.0015, + "loss": 1.6878, + "step": 4249 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.6996051669120789, + "learning_rate": 0.0015, + "loss": 1.6764, + "step": 4250 + }, + { + "epoch": 0.44841772151898734, + "grad_norm": 0.4835391342639923, + "learning_rate": 0.0015, + "loss": 1.6569, + "step": 4251 + }, + { + "epoch": 0.44852320675105484, + "grad_norm": 0.6426658630371094, + "learning_rate": 0.0015, + "loss": 1.6771, + "step": 4252 + }, + { + "epoch": 0.4486286919831224, + "grad_norm": 0.6918848156929016, + "learning_rate": 0.0015, + "loss": 1.6617, + "step": 4253 + }, + { + "epoch": 0.4487341772151899, + "grad_norm": 0.5265471339225769, + "learning_rate": 0.0015, + "loss": 1.6584, + "step": 4254 + }, + { + "epoch": 0.44883966244725737, + "grad_norm": 0.7319307923316956, + "learning_rate": 0.0015, + "loss": 1.7017, + "step": 4255 + }, + { + "epoch": 0.4489451476793249, + "grad_norm": 0.8076063990592957, + "learning_rate": 0.0015, + "loss": 1.6984, + "step": 4256 + }, + { + "epoch": 0.4490506329113924, + "grad_norm": 0.48832979798316956, + "learning_rate": 0.0015, + "loss": 1.6249, + "step": 4257 + }, + { + "epoch": 0.4491561181434599, + "grad_norm": 0.8370751142501831, + "learning_rate": 0.0015, + "loss": 1.6584, + "step": 4258 + }, + { + "epoch": 0.44926160337552745, + "grad_norm": 0.6099622249603271, + "learning_rate": 0.0015, + "loss": 1.6869, + "step": 4259 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.7295295000076294, + "learning_rate": 0.0015, + "loss": 1.6452, + "step": 4260 + }, + { + "epoch": 0.44947257383966244, + "grad_norm": 0.8976764678955078, + "learning_rate": 0.0015, + "loss": 1.6872, + "step": 4261 + }, + { + "epoch": 0.44957805907173, + "grad_norm": 0.5718755125999451, + "learning_rate": 0.0015, + "loss": 1.6774, + "step": 4262 + }, + { + "epoch": 0.4496835443037975, + "grad_norm": 0.6952678561210632, + "learning_rate": 0.0015, + "loss": 1.6282, + "step": 4263 + }, + { + "epoch": 0.44978902953586497, + "grad_norm": 0.9510587453842163, + "learning_rate": 0.0015, + "loss": 1.6858, + "step": 4264 + }, + { + "epoch": 0.44989451476793246, + "grad_norm": 0.5682377219200134, + "learning_rate": 0.0015, + "loss": 1.6521, + "step": 4265 + }, + { + "epoch": 0.45, + "grad_norm": 0.8752272129058838, + "learning_rate": 0.0015, + "loss": 1.665, + "step": 4266 + }, + { + "epoch": 0.4501054852320675, + "grad_norm": 0.8616081476211548, + "learning_rate": 0.0015, + "loss": 1.627, + "step": 4267 + }, + { + "epoch": 0.450210970464135, + "grad_norm": 0.5683330297470093, + "learning_rate": 0.0015, + "loss": 1.644, + "step": 4268 + }, + { + "epoch": 0.45031645569620254, + "grad_norm": 0.6447488069534302, + "learning_rate": 0.0015, + "loss": 1.6388, + "step": 4269 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.7037097811698914, + "learning_rate": 0.0015, + "loss": 1.6874, + "step": 4270 + }, + { + "epoch": 0.45052742616033753, + "grad_norm": 0.6476287841796875, + "learning_rate": 0.0015, + "loss": 1.6214, + "step": 4271 + }, + { + "epoch": 0.4506329113924051, + "grad_norm": 0.6518765687942505, + "learning_rate": 0.0015, + "loss": 1.6615, + "step": 4272 + }, + { + "epoch": 0.45073839662447257, + "grad_norm": 0.5910608768463135, + "learning_rate": 0.0015, + "loss": 1.6516, + "step": 4273 + }, + { + "epoch": 0.45084388185654006, + "grad_norm": 0.4365657866001129, + "learning_rate": 0.0015, + "loss": 1.6485, + "step": 4274 + }, + { + "epoch": 0.4509493670886076, + "grad_norm": 0.6063331961631775, + "learning_rate": 0.0015, + "loss": 1.637, + "step": 4275 + }, + { + "epoch": 0.4510548523206751, + "grad_norm": 0.45042088627815247, + "learning_rate": 0.0015, + "loss": 1.6264, + "step": 4276 + }, + { + "epoch": 0.4511603375527426, + "grad_norm": 0.5372198224067688, + "learning_rate": 0.0015, + "loss": 1.6842, + "step": 4277 + }, + { + "epoch": 0.45126582278481014, + "grad_norm": 0.5263211727142334, + "learning_rate": 0.0015, + "loss": 1.6348, + "step": 4278 + }, + { + "epoch": 0.45137130801687764, + "grad_norm": 0.7315890789031982, + "learning_rate": 0.0015, + "loss": 1.6393, + "step": 4279 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.6854990720748901, + "learning_rate": 0.0015, + "loss": 1.6867, + "step": 4280 + }, + { + "epoch": 0.4515822784810127, + "grad_norm": 0.5408880710601807, + "learning_rate": 0.0015, + "loss": 1.6473, + "step": 4281 + }, + { + "epoch": 0.45168776371308017, + "grad_norm": 0.5557799935340881, + "learning_rate": 0.0015, + "loss": 1.7033, + "step": 4282 + }, + { + "epoch": 0.45179324894514766, + "grad_norm": 0.5755519866943359, + "learning_rate": 0.0015, + "loss": 1.6741, + "step": 4283 + }, + { + "epoch": 0.4518987341772152, + "grad_norm": 0.4988951086997986, + "learning_rate": 0.0015, + "loss": 1.6618, + "step": 4284 + }, + { + "epoch": 0.4520042194092827, + "grad_norm": 0.612045407295227, + "learning_rate": 0.0015, + "loss": 1.6337, + "step": 4285 + }, + { + "epoch": 0.4521097046413502, + "grad_norm": 0.5338757634162903, + "learning_rate": 0.0015, + "loss": 1.6275, + "step": 4286 + }, + { + "epoch": 0.45221518987341774, + "grad_norm": 0.58327716588974, + "learning_rate": 0.0015, + "loss": 1.6386, + "step": 4287 + }, + { + "epoch": 0.45232067510548524, + "grad_norm": 0.5607724785804749, + "learning_rate": 0.0015, + "loss": 1.6941, + "step": 4288 + }, + { + "epoch": 0.45242616033755273, + "grad_norm": 0.5698756575584412, + "learning_rate": 0.0015, + "loss": 1.6878, + "step": 4289 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.5892707705497742, + "learning_rate": 0.0015, + "loss": 1.6408, + "step": 4290 + }, + { + "epoch": 0.45263713080168777, + "grad_norm": 0.5143903493881226, + "learning_rate": 0.0015, + "loss": 1.6467, + "step": 4291 + }, + { + "epoch": 0.45274261603375526, + "grad_norm": 0.5467579364776611, + "learning_rate": 0.0015, + "loss": 1.6578, + "step": 4292 + }, + { + "epoch": 0.4528481012658228, + "grad_norm": 0.49005115032196045, + "learning_rate": 0.0015, + "loss": 1.6943, + "step": 4293 + }, + { + "epoch": 0.4529535864978903, + "grad_norm": 0.6001521944999695, + "learning_rate": 0.0015, + "loss": 1.6692, + "step": 4294 + }, + { + "epoch": 0.4530590717299578, + "grad_norm": 0.5293867588043213, + "learning_rate": 0.0015, + "loss": 1.653, + "step": 4295 + }, + { + "epoch": 0.4531645569620253, + "grad_norm": 0.46255889534950256, + "learning_rate": 0.0015, + "loss": 1.6647, + "step": 4296 + }, + { + "epoch": 0.45327004219409284, + "grad_norm": 0.6147642135620117, + "learning_rate": 0.0015, + "loss": 1.6334, + "step": 4297 + }, + { + "epoch": 0.45337552742616033, + "grad_norm": 0.6130396723747253, + "learning_rate": 0.0015, + "loss": 1.6642, + "step": 4298 + }, + { + "epoch": 0.4534810126582278, + "grad_norm": 0.4362105429172516, + "learning_rate": 0.0015, + "loss": 1.6549, + "step": 4299 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.7801738977432251, + "learning_rate": 0.0015, + "loss": 1.6513, + "step": 4300 + }, + { + "epoch": 0.45369198312236286, + "grad_norm": 0.7096121311187744, + "learning_rate": 0.0015, + "loss": 1.6991, + "step": 4301 + }, + { + "epoch": 0.45379746835443036, + "grad_norm": 0.5057912468910217, + "learning_rate": 0.0015, + "loss": 1.646, + "step": 4302 + }, + { + "epoch": 0.4539029535864979, + "grad_norm": 0.9966181516647339, + "learning_rate": 0.0015, + "loss": 1.6613, + "step": 4303 + }, + { + "epoch": 0.4540084388185654, + "grad_norm": 0.7544216513633728, + "learning_rate": 0.0015, + "loss": 1.6546, + "step": 4304 + }, + { + "epoch": 0.4541139240506329, + "grad_norm": 0.5713809728622437, + "learning_rate": 0.0015, + "loss": 1.6053, + "step": 4305 + }, + { + "epoch": 0.45421940928270044, + "grad_norm": 0.9811868071556091, + "learning_rate": 0.0015, + "loss": 1.6968, + "step": 4306 + }, + { + "epoch": 0.45432489451476793, + "grad_norm": 0.7886048555374146, + "learning_rate": 0.0015, + "loss": 1.6897, + "step": 4307 + }, + { + "epoch": 0.4544303797468354, + "grad_norm": 0.6720010042190552, + "learning_rate": 0.0015, + "loss": 1.6582, + "step": 4308 + }, + { + "epoch": 0.45453586497890297, + "grad_norm": 0.9948484897613525, + "learning_rate": 0.0015, + "loss": 1.6721, + "step": 4309 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5053322315216064, + "learning_rate": 0.0015, + "loss": 1.6139, + "step": 4310 + }, + { + "epoch": 0.45474683544303796, + "grad_norm": 0.6952977776527405, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 4311 + }, + { + "epoch": 0.4548523206751055, + "grad_norm": 0.48643603920936584, + "learning_rate": 0.0015, + "loss": 1.6445, + "step": 4312 + }, + { + "epoch": 0.454957805907173, + "grad_norm": 0.7242816686630249, + "learning_rate": 0.0015, + "loss": 1.6639, + "step": 4313 + }, + { + "epoch": 0.4550632911392405, + "grad_norm": 0.5559763312339783, + "learning_rate": 0.0015, + "loss": 1.6218, + "step": 4314 + }, + { + "epoch": 0.45516877637130804, + "grad_norm": 0.5705804824829102, + "learning_rate": 0.0015, + "loss": 1.6421, + "step": 4315 + }, + { + "epoch": 0.45527426160337553, + "grad_norm": 0.6401084065437317, + "learning_rate": 0.0015, + "loss": 1.6325, + "step": 4316 + }, + { + "epoch": 0.455379746835443, + "grad_norm": 0.5702002644538879, + "learning_rate": 0.0015, + "loss": 1.6649, + "step": 4317 + }, + { + "epoch": 0.45548523206751057, + "grad_norm": 0.8799147009849548, + "learning_rate": 0.0015, + "loss": 1.6446, + "step": 4318 + }, + { + "epoch": 0.45559071729957806, + "grad_norm": 0.6108373403549194, + "learning_rate": 0.0015, + "loss": 1.6356, + "step": 4319 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.6824080348014832, + "learning_rate": 0.0015, + "loss": 1.6971, + "step": 4320 + }, + { + "epoch": 0.4558016877637131, + "grad_norm": 0.6777485609054565, + "learning_rate": 0.0015, + "loss": 1.6272, + "step": 4321 + }, + { + "epoch": 0.4559071729957806, + "grad_norm": 0.5570958852767944, + "learning_rate": 0.0015, + "loss": 1.6372, + "step": 4322 + }, + { + "epoch": 0.4560126582278481, + "grad_norm": 0.6561368703842163, + "learning_rate": 0.0015, + "loss": 1.7168, + "step": 4323 + }, + { + "epoch": 0.45611814345991564, + "grad_norm": 0.6748980283737183, + "learning_rate": 0.0015, + "loss": 1.6374, + "step": 4324 + }, + { + "epoch": 0.45622362869198313, + "grad_norm": 0.5916723012924194, + "learning_rate": 0.0015, + "loss": 1.6766, + "step": 4325 + }, + { + "epoch": 0.4563291139240506, + "grad_norm": 0.4420872926712036, + "learning_rate": 0.0015, + "loss": 1.632, + "step": 4326 + }, + { + "epoch": 0.45643459915611817, + "grad_norm": 0.6043102145195007, + "learning_rate": 0.0015, + "loss": 1.6976, + "step": 4327 + }, + { + "epoch": 0.45654008438818566, + "grad_norm": 0.4901691675186157, + "learning_rate": 0.0015, + "loss": 1.6634, + "step": 4328 + }, + { + "epoch": 0.45664556962025316, + "grad_norm": 0.47698915004730225, + "learning_rate": 0.0015, + "loss": 1.6684, + "step": 4329 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.4869694411754608, + "learning_rate": 0.0015, + "loss": 1.6374, + "step": 4330 + }, + { + "epoch": 0.4568565400843882, + "grad_norm": 0.46680837869644165, + "learning_rate": 0.0015, + "loss": 1.6454, + "step": 4331 + }, + { + "epoch": 0.4569620253164557, + "grad_norm": 0.49599412083625793, + "learning_rate": 0.0015, + "loss": 1.6664, + "step": 4332 + }, + { + "epoch": 0.4570675105485232, + "grad_norm": 0.572851300239563, + "learning_rate": 0.0015, + "loss": 1.6163, + "step": 4333 + }, + { + "epoch": 0.45717299578059073, + "grad_norm": 0.49066606163978577, + "learning_rate": 0.0015, + "loss": 1.6635, + "step": 4334 + }, + { + "epoch": 0.4572784810126582, + "grad_norm": 0.5614000558853149, + "learning_rate": 0.0015, + "loss": 1.6465, + "step": 4335 + }, + { + "epoch": 0.4573839662447257, + "grad_norm": 0.6668849587440491, + "learning_rate": 0.0015, + "loss": 1.6914, + "step": 4336 + }, + { + "epoch": 0.45748945147679326, + "grad_norm": 0.4986950755119324, + "learning_rate": 0.0015, + "loss": 1.6379, + "step": 4337 + }, + { + "epoch": 0.45759493670886076, + "grad_norm": 0.5193601250648499, + "learning_rate": 0.0015, + "loss": 1.6406, + "step": 4338 + }, + { + "epoch": 0.45770042194092825, + "grad_norm": 0.6118957996368408, + "learning_rate": 0.0015, + "loss": 1.6771, + "step": 4339 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.4993157684803009, + "learning_rate": 0.0015, + "loss": 1.6348, + "step": 4340 + }, + { + "epoch": 0.4579113924050633, + "grad_norm": 0.47754546999931335, + "learning_rate": 0.0015, + "loss": 1.6352, + "step": 4341 + }, + { + "epoch": 0.4580168776371308, + "grad_norm": 0.5599583387374878, + "learning_rate": 0.0015, + "loss": 1.6321, + "step": 4342 + }, + { + "epoch": 0.45812236286919833, + "grad_norm": 0.5248196721076965, + "learning_rate": 0.0015, + "loss": 1.6151, + "step": 4343 + }, + { + "epoch": 0.4582278481012658, + "grad_norm": 0.5224418044090271, + "learning_rate": 0.0015, + "loss": 1.6795, + "step": 4344 + }, + { + "epoch": 0.4583333333333333, + "grad_norm": 0.5129813551902771, + "learning_rate": 0.0015, + "loss": 1.6573, + "step": 4345 + }, + { + "epoch": 0.45843881856540086, + "grad_norm": 0.5238183736801147, + "learning_rate": 0.0015, + "loss": 1.6941, + "step": 4346 + }, + { + "epoch": 0.45854430379746836, + "grad_norm": 0.49654272198677063, + "learning_rate": 0.0015, + "loss": 1.6387, + "step": 4347 + }, + { + "epoch": 0.45864978902953585, + "grad_norm": 0.5397877097129822, + "learning_rate": 0.0015, + "loss": 1.6485, + "step": 4348 + }, + { + "epoch": 0.4587552742616034, + "grad_norm": 0.47956404089927673, + "learning_rate": 0.0015, + "loss": 1.6822, + "step": 4349 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.5442126989364624, + "learning_rate": 0.0015, + "loss": 1.6715, + "step": 4350 + }, + { + "epoch": 0.4589662447257384, + "grad_norm": 0.6239200830459595, + "learning_rate": 0.0015, + "loss": 1.6973, + "step": 4351 + }, + { + "epoch": 0.45907172995780593, + "grad_norm": 0.528907060623169, + "learning_rate": 0.0015, + "loss": 1.6857, + "step": 4352 + }, + { + "epoch": 0.4591772151898734, + "grad_norm": 0.5882149934768677, + "learning_rate": 0.0015, + "loss": 1.6559, + "step": 4353 + }, + { + "epoch": 0.4592827004219409, + "grad_norm": 0.677436888217926, + "learning_rate": 0.0015, + "loss": 1.6864, + "step": 4354 + }, + { + "epoch": 0.45938818565400846, + "grad_norm": 0.5961720943450928, + "learning_rate": 0.0015, + "loss": 1.6884, + "step": 4355 + }, + { + "epoch": 0.45949367088607596, + "grad_norm": 0.46816152334213257, + "learning_rate": 0.0015, + "loss": 1.6373, + "step": 4356 + }, + { + "epoch": 0.45959915611814345, + "grad_norm": 0.5927190780639648, + "learning_rate": 0.0015, + "loss": 1.6636, + "step": 4357 + }, + { + "epoch": 0.459704641350211, + "grad_norm": 0.5788471102714539, + "learning_rate": 0.0015, + "loss": 1.6787, + "step": 4358 + }, + { + "epoch": 0.4598101265822785, + "grad_norm": 0.628187894821167, + "learning_rate": 0.0015, + "loss": 1.662, + "step": 4359 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.8854433298110962, + "learning_rate": 0.0015, + "loss": 1.6552, + "step": 4360 + }, + { + "epoch": 0.46002109704641353, + "grad_norm": 0.48503169417381287, + "learning_rate": 0.0015, + "loss": 1.6539, + "step": 4361 + }, + { + "epoch": 0.460126582278481, + "grad_norm": 0.7586188912391663, + "learning_rate": 0.0015, + "loss": 1.6212, + "step": 4362 + }, + { + "epoch": 0.4602320675105485, + "grad_norm": 0.5986793041229248, + "learning_rate": 0.0015, + "loss": 1.6408, + "step": 4363 + }, + { + "epoch": 0.460337552742616, + "grad_norm": 0.5295442342758179, + "learning_rate": 0.0015, + "loss": 1.6626, + "step": 4364 + }, + { + "epoch": 0.46044303797468356, + "grad_norm": 0.5004334449768066, + "learning_rate": 0.0015, + "loss": 1.6642, + "step": 4365 + }, + { + "epoch": 0.46054852320675105, + "grad_norm": 0.520413875579834, + "learning_rate": 0.0015, + "loss": 1.6191, + "step": 4366 + }, + { + "epoch": 0.46065400843881854, + "grad_norm": 0.552975594997406, + "learning_rate": 0.0015, + "loss": 1.6584, + "step": 4367 + }, + { + "epoch": 0.4607594936708861, + "grad_norm": 0.5053632259368896, + "learning_rate": 0.0015, + "loss": 1.6398, + "step": 4368 + }, + { + "epoch": 0.4608649789029536, + "grad_norm": 0.7521233558654785, + "learning_rate": 0.0015, + "loss": 1.6503, + "step": 4369 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.6210848689079285, + "learning_rate": 0.0015, + "loss": 1.6445, + "step": 4370 + }, + { + "epoch": 0.4610759493670886, + "grad_norm": 0.5326166749000549, + "learning_rate": 0.0015, + "loss": 1.6024, + "step": 4371 + }, + { + "epoch": 0.4611814345991561, + "grad_norm": 0.5367142558097839, + "learning_rate": 0.0015, + "loss": 1.6619, + "step": 4372 + }, + { + "epoch": 0.4612869198312236, + "grad_norm": 0.4858086109161377, + "learning_rate": 0.0015, + "loss": 1.6264, + "step": 4373 + }, + { + "epoch": 0.46139240506329116, + "grad_norm": 0.541311502456665, + "learning_rate": 0.0015, + "loss": 1.6591, + "step": 4374 + }, + { + "epoch": 0.46149789029535865, + "grad_norm": 0.5433955192565918, + "learning_rate": 0.0015, + "loss": 1.6647, + "step": 4375 + }, + { + "epoch": 0.46160337552742614, + "grad_norm": 0.46293917298316956, + "learning_rate": 0.0015, + "loss": 1.6638, + "step": 4376 + }, + { + "epoch": 0.4617088607594937, + "grad_norm": 0.4877987802028656, + "learning_rate": 0.0015, + "loss": 1.6225, + "step": 4377 + }, + { + "epoch": 0.4618143459915612, + "grad_norm": 0.443900465965271, + "learning_rate": 0.0015, + "loss": 1.6633, + "step": 4378 + }, + { + "epoch": 0.4619198312236287, + "grad_norm": 0.48014596104621887, + "learning_rate": 0.0015, + "loss": 1.6762, + "step": 4379 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.4642293453216553, + "learning_rate": 0.0015, + "loss": 1.6567, + "step": 4380 + }, + { + "epoch": 0.4621308016877637, + "grad_norm": 0.453438937664032, + "learning_rate": 0.0015, + "loss": 1.6814, + "step": 4381 + }, + { + "epoch": 0.4622362869198312, + "grad_norm": 0.516966700553894, + "learning_rate": 0.0015, + "loss": 1.6418, + "step": 4382 + }, + { + "epoch": 0.46234177215189876, + "grad_norm": 0.4654674232006073, + "learning_rate": 0.0015, + "loss": 1.6411, + "step": 4383 + }, + { + "epoch": 0.46244725738396625, + "grad_norm": 0.5884807705879211, + "learning_rate": 0.0015, + "loss": 1.6194, + "step": 4384 + }, + { + "epoch": 0.46255274261603374, + "grad_norm": 0.5772904753684998, + "learning_rate": 0.0015, + "loss": 1.6616, + "step": 4385 + }, + { + "epoch": 0.4626582278481013, + "grad_norm": 0.4588421583175659, + "learning_rate": 0.0015, + "loss": 1.6574, + "step": 4386 + }, + { + "epoch": 0.4627637130801688, + "grad_norm": 0.5177502632141113, + "learning_rate": 0.0015, + "loss": 1.6254, + "step": 4387 + }, + { + "epoch": 0.4628691983122363, + "grad_norm": 0.5180033445358276, + "learning_rate": 0.0015, + "loss": 1.6537, + "step": 4388 + }, + { + "epoch": 0.4629746835443038, + "grad_norm": 0.5307671427726746, + "learning_rate": 0.0015, + "loss": 1.6451, + "step": 4389 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5913004875183105, + "learning_rate": 0.0015, + "loss": 1.6414, + "step": 4390 + }, + { + "epoch": 0.4631856540084388, + "grad_norm": 0.504273533821106, + "learning_rate": 0.0015, + "loss": 1.6423, + "step": 4391 + }, + { + "epoch": 0.46329113924050636, + "grad_norm": 0.5816996097564697, + "learning_rate": 0.0015, + "loss": 1.6286, + "step": 4392 + }, + { + "epoch": 0.46339662447257385, + "grad_norm": 0.8179287314414978, + "learning_rate": 0.0015, + "loss": 1.6497, + "step": 4393 + }, + { + "epoch": 0.46350210970464134, + "grad_norm": 0.7486006021499634, + "learning_rate": 0.0015, + "loss": 1.665, + "step": 4394 + }, + { + "epoch": 0.46360759493670883, + "grad_norm": 0.4848342537879944, + "learning_rate": 0.0015, + "loss": 1.6656, + "step": 4395 + }, + { + "epoch": 0.4637130801687764, + "grad_norm": 0.951631486415863, + "learning_rate": 0.0015, + "loss": 1.6568, + "step": 4396 + }, + { + "epoch": 0.4638185654008439, + "grad_norm": 0.7860904335975647, + "learning_rate": 0.0015, + "loss": 1.6536, + "step": 4397 + }, + { + "epoch": 0.46392405063291137, + "grad_norm": 0.5099451541900635, + "learning_rate": 0.0015, + "loss": 1.6841, + "step": 4398 + }, + { + "epoch": 0.4640295358649789, + "grad_norm": 0.9489009976387024, + "learning_rate": 0.0015, + "loss": 1.6274, + "step": 4399 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.7093892693519592, + "learning_rate": 0.0015, + "loss": 1.6884, + "step": 4400 + }, + { + "epoch": 0.4642405063291139, + "grad_norm": 0.6286365985870361, + "learning_rate": 0.0015, + "loss": 1.6354, + "step": 4401 + }, + { + "epoch": 0.46434599156118145, + "grad_norm": 0.8489190936088562, + "learning_rate": 0.0015, + "loss": 1.6849, + "step": 4402 + }, + { + "epoch": 0.46445147679324894, + "grad_norm": 0.5102296471595764, + "learning_rate": 0.0015, + "loss": 1.7035, + "step": 4403 + }, + { + "epoch": 0.46455696202531643, + "grad_norm": 0.8167166709899902, + "learning_rate": 0.0015, + "loss": 1.6814, + "step": 4404 + }, + { + "epoch": 0.464662447257384, + "grad_norm": 0.702418863773346, + "learning_rate": 0.0015, + "loss": 1.6658, + "step": 4405 + }, + { + "epoch": 0.4647679324894515, + "grad_norm": 0.49861350655555725, + "learning_rate": 0.0015, + "loss": 1.6625, + "step": 4406 + }, + { + "epoch": 0.46487341772151897, + "grad_norm": 0.5561988353729248, + "learning_rate": 0.0015, + "loss": 1.6472, + "step": 4407 + }, + { + "epoch": 0.4649789029535865, + "grad_norm": 0.5472853183746338, + "learning_rate": 0.0015, + "loss": 1.6755, + "step": 4408 + }, + { + "epoch": 0.465084388185654, + "grad_norm": 0.5895664095878601, + "learning_rate": 0.0015, + "loss": 1.6528, + "step": 4409 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.7193066477775574, + "learning_rate": 0.0015, + "loss": 1.6648, + "step": 4410 + }, + { + "epoch": 0.46529535864978905, + "grad_norm": 0.5323827266693115, + "learning_rate": 0.0015, + "loss": 1.6458, + "step": 4411 + }, + { + "epoch": 0.46540084388185654, + "grad_norm": 0.5354639887809753, + "learning_rate": 0.0015, + "loss": 1.6918, + "step": 4412 + }, + { + "epoch": 0.46550632911392403, + "grad_norm": 0.5667396187782288, + "learning_rate": 0.0015, + "loss": 1.6149, + "step": 4413 + }, + { + "epoch": 0.4656118143459916, + "grad_norm": 0.569074809551239, + "learning_rate": 0.0015, + "loss": 1.6393, + "step": 4414 + }, + { + "epoch": 0.4657172995780591, + "grad_norm": 0.5345011353492737, + "learning_rate": 0.0015, + "loss": 1.6651, + "step": 4415 + }, + { + "epoch": 0.46582278481012657, + "grad_norm": 0.6622505784034729, + "learning_rate": 0.0015, + "loss": 1.6819, + "step": 4416 + }, + { + "epoch": 0.4659282700421941, + "grad_norm": 0.5924422740936279, + "learning_rate": 0.0015, + "loss": 1.6242, + "step": 4417 + }, + { + "epoch": 0.4660337552742616, + "grad_norm": 0.5936039686203003, + "learning_rate": 0.0015, + "loss": 1.6564, + "step": 4418 + }, + { + "epoch": 0.4661392405063291, + "grad_norm": 0.5590488910675049, + "learning_rate": 0.0015, + "loss": 1.6627, + "step": 4419 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.4982949495315552, + "learning_rate": 0.0015, + "loss": 1.6343, + "step": 4420 + }, + { + "epoch": 0.46635021097046414, + "grad_norm": 0.511516809463501, + "learning_rate": 0.0015, + "loss": 1.6492, + "step": 4421 + }, + { + "epoch": 0.46645569620253163, + "grad_norm": 0.6481553912162781, + "learning_rate": 0.0015, + "loss": 1.6536, + "step": 4422 + }, + { + "epoch": 0.4665611814345992, + "grad_norm": 0.577252209186554, + "learning_rate": 0.0015, + "loss": 1.6732, + "step": 4423 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 0.5631588697433472, + "learning_rate": 0.0015, + "loss": 1.6768, + "step": 4424 + }, + { + "epoch": 0.46677215189873417, + "grad_norm": 0.5087147951126099, + "learning_rate": 0.0015, + "loss": 1.6449, + "step": 4425 + }, + { + "epoch": 0.4668776371308017, + "grad_norm": 0.6231689453125, + "learning_rate": 0.0015, + "loss": 1.6368, + "step": 4426 + }, + { + "epoch": 0.4669831223628692, + "grad_norm": 0.5108701586723328, + "learning_rate": 0.0015, + "loss": 1.6494, + "step": 4427 + }, + { + "epoch": 0.4670886075949367, + "grad_norm": 0.5215011239051819, + "learning_rate": 0.0015, + "loss": 1.6576, + "step": 4428 + }, + { + "epoch": 0.4671940928270042, + "grad_norm": 0.5362898111343384, + "learning_rate": 0.0015, + "loss": 1.6811, + "step": 4429 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.5128081440925598, + "learning_rate": 0.0015, + "loss": 1.6234, + "step": 4430 + }, + { + "epoch": 0.46740506329113923, + "grad_norm": 0.645807683467865, + "learning_rate": 0.0015, + "loss": 1.6446, + "step": 4431 + }, + { + "epoch": 0.4675105485232067, + "grad_norm": 0.6203972101211548, + "learning_rate": 0.0015, + "loss": 1.6856, + "step": 4432 + }, + { + "epoch": 0.4676160337552743, + "grad_norm": 0.6224325895309448, + "learning_rate": 0.0015, + "loss": 1.6468, + "step": 4433 + }, + { + "epoch": 0.46772151898734177, + "grad_norm": 0.9138966798782349, + "learning_rate": 0.0015, + "loss": 1.6826, + "step": 4434 + }, + { + "epoch": 0.46782700421940926, + "grad_norm": 0.7643339037895203, + "learning_rate": 0.0015, + "loss": 1.626, + "step": 4435 + }, + { + "epoch": 0.4679324894514768, + "grad_norm": 0.4755401611328125, + "learning_rate": 0.0015, + "loss": 1.6263, + "step": 4436 + }, + { + "epoch": 0.4680379746835443, + "grad_norm": 0.6597535014152527, + "learning_rate": 0.0015, + "loss": 1.6512, + "step": 4437 + }, + { + "epoch": 0.4681434599156118, + "grad_norm": 0.6575095653533936, + "learning_rate": 0.0015, + "loss": 1.6671, + "step": 4438 + }, + { + "epoch": 0.46824894514767934, + "grad_norm": 0.5518959760665894, + "learning_rate": 0.0015, + "loss": 1.6256, + "step": 4439 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.6916687488555908, + "learning_rate": 0.0015, + "loss": 1.6769, + "step": 4440 + }, + { + "epoch": 0.4684599156118143, + "grad_norm": 0.5765088796615601, + "learning_rate": 0.0015, + "loss": 1.6447, + "step": 4441 + }, + { + "epoch": 0.4685654008438819, + "grad_norm": 0.7362664341926575, + "learning_rate": 0.0015, + "loss": 1.6807, + "step": 4442 + }, + { + "epoch": 0.46867088607594937, + "grad_norm": 0.8707485795021057, + "learning_rate": 0.0015, + "loss": 1.6769, + "step": 4443 + }, + { + "epoch": 0.46877637130801686, + "grad_norm": 0.5041620135307312, + "learning_rate": 0.0015, + "loss": 1.6382, + "step": 4444 + }, + { + "epoch": 0.4688818565400844, + "grad_norm": 0.8869928121566772, + "learning_rate": 0.0015, + "loss": 1.6819, + "step": 4445 + }, + { + "epoch": 0.4689873417721519, + "grad_norm": 0.9099951386451721, + "learning_rate": 0.0015, + "loss": 1.6215, + "step": 4446 + }, + { + "epoch": 0.4690928270042194, + "grad_norm": 0.5322218537330627, + "learning_rate": 0.0015, + "loss": 1.6714, + "step": 4447 + }, + { + "epoch": 0.46919831223628694, + "grad_norm": 0.8943033814430237, + "learning_rate": 0.0015, + "loss": 1.693, + "step": 4448 + }, + { + "epoch": 0.46930379746835443, + "grad_norm": 0.6124472618103027, + "learning_rate": 0.0015, + "loss": 1.6572, + "step": 4449 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.673154890537262, + "learning_rate": 0.0015, + "loss": 1.6399, + "step": 4450 + }, + { + "epoch": 0.4695147679324895, + "grad_norm": 0.715014636516571, + "learning_rate": 0.0015, + "loss": 1.6534, + "step": 4451 + }, + { + "epoch": 0.46962025316455697, + "grad_norm": 0.5323740243911743, + "learning_rate": 0.0015, + "loss": 1.6249, + "step": 4452 + }, + { + "epoch": 0.46972573839662446, + "grad_norm": 0.6634638905525208, + "learning_rate": 0.0015, + "loss": 1.6746, + "step": 4453 + }, + { + "epoch": 0.469831223628692, + "grad_norm": 0.8106464147567749, + "learning_rate": 0.0015, + "loss": 1.6049, + "step": 4454 + }, + { + "epoch": 0.4699367088607595, + "grad_norm": 0.5180960893630981, + "learning_rate": 0.0015, + "loss": 1.6292, + "step": 4455 + }, + { + "epoch": 0.470042194092827, + "grad_norm": 0.7639739513397217, + "learning_rate": 0.0015, + "loss": 1.6442, + "step": 4456 + }, + { + "epoch": 0.47014767932489454, + "grad_norm": 0.6277756094932556, + "learning_rate": 0.0015, + "loss": 1.6372, + "step": 4457 + }, + { + "epoch": 0.47025316455696203, + "grad_norm": 0.612694501876831, + "learning_rate": 0.0015, + "loss": 1.6344, + "step": 4458 + }, + { + "epoch": 0.4703586497890295, + "grad_norm": 0.6587413549423218, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 4459 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.550410807132721, + "learning_rate": 0.0015, + "loss": 1.6931, + "step": 4460 + }, + { + "epoch": 0.47056962025316457, + "grad_norm": 0.6902750134468079, + "learning_rate": 0.0015, + "loss": 1.6545, + "step": 4461 + }, + { + "epoch": 0.47067510548523206, + "grad_norm": 0.6775808930397034, + "learning_rate": 0.0015, + "loss": 1.6586, + "step": 4462 + }, + { + "epoch": 0.47078059071729955, + "grad_norm": 0.5754194259643555, + "learning_rate": 0.0015, + "loss": 1.6369, + "step": 4463 + }, + { + "epoch": 0.4708860759493671, + "grad_norm": 0.8526375889778137, + "learning_rate": 0.0015, + "loss": 1.654, + "step": 4464 + }, + { + "epoch": 0.4709915611814346, + "grad_norm": 0.667432963848114, + "learning_rate": 0.0015, + "loss": 1.6348, + "step": 4465 + }, + { + "epoch": 0.4710970464135021, + "grad_norm": 0.48168909549713135, + "learning_rate": 0.0015, + "loss": 1.6636, + "step": 4466 + }, + { + "epoch": 0.47120253164556963, + "grad_norm": 0.5759777426719666, + "learning_rate": 0.0015, + "loss": 1.637, + "step": 4467 + }, + { + "epoch": 0.4713080168776371, + "grad_norm": 0.5050199627876282, + "learning_rate": 0.0015, + "loss": 1.6402, + "step": 4468 + }, + { + "epoch": 0.4714135021097046, + "grad_norm": 0.5121300220489502, + "learning_rate": 0.0015, + "loss": 1.6902, + "step": 4469 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.546608030796051, + "learning_rate": 0.0015, + "loss": 1.6852, + "step": 4470 + }, + { + "epoch": 0.47162447257383966, + "grad_norm": 0.5345836281776428, + "learning_rate": 0.0015, + "loss": 1.6391, + "step": 4471 + }, + { + "epoch": 0.47172995780590715, + "grad_norm": 0.5008557438850403, + "learning_rate": 0.0015, + "loss": 1.6374, + "step": 4472 + }, + { + "epoch": 0.4718354430379747, + "grad_norm": 0.64007169008255, + "learning_rate": 0.0015, + "loss": 1.6716, + "step": 4473 + }, + { + "epoch": 0.4719409282700422, + "grad_norm": 0.6776872277259827, + "learning_rate": 0.0015, + "loss": 1.7013, + "step": 4474 + }, + { + "epoch": 0.4720464135021097, + "grad_norm": 0.602365255355835, + "learning_rate": 0.0015, + "loss": 1.6872, + "step": 4475 + }, + { + "epoch": 0.47215189873417723, + "grad_norm": 0.5588603019714355, + "learning_rate": 0.0015, + "loss": 1.6333, + "step": 4476 + }, + { + "epoch": 0.4722573839662447, + "grad_norm": 0.5542494654655457, + "learning_rate": 0.0015, + "loss": 1.668, + "step": 4477 + }, + { + "epoch": 0.4723628691983122, + "grad_norm": 0.5087571144104004, + "learning_rate": 0.0015, + "loss": 1.6803, + "step": 4478 + }, + { + "epoch": 0.47246835443037977, + "grad_norm": 0.6984165906906128, + "learning_rate": 0.0015, + "loss": 1.6555, + "step": 4479 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.5771257281303406, + "learning_rate": 0.0015, + "loss": 1.6652, + "step": 4480 + }, + { + "epoch": 0.47267932489451475, + "grad_norm": 0.5852707028388977, + "learning_rate": 0.0015, + "loss": 1.6296, + "step": 4481 + }, + { + "epoch": 0.4727848101265823, + "grad_norm": 0.6187868118286133, + "learning_rate": 0.0015, + "loss": 1.6424, + "step": 4482 + }, + { + "epoch": 0.4728902953586498, + "grad_norm": 0.5727245807647705, + "learning_rate": 0.0015, + "loss": 1.6306, + "step": 4483 + }, + { + "epoch": 0.4729957805907173, + "grad_norm": 0.5415666103363037, + "learning_rate": 0.0015, + "loss": 1.6305, + "step": 4484 + }, + { + "epoch": 0.47310126582278483, + "grad_norm": 0.5851130485534668, + "learning_rate": 0.0015, + "loss": 1.6555, + "step": 4485 + }, + { + "epoch": 0.4732067510548523, + "grad_norm": 0.5078525543212891, + "learning_rate": 0.0015, + "loss": 1.6474, + "step": 4486 + }, + { + "epoch": 0.4733122362869198, + "grad_norm": 0.4962426722049713, + "learning_rate": 0.0015, + "loss": 1.6397, + "step": 4487 + }, + { + "epoch": 0.47341772151898737, + "grad_norm": 0.5256374478340149, + "learning_rate": 0.0015, + "loss": 1.6701, + "step": 4488 + }, + { + "epoch": 0.47352320675105486, + "grad_norm": 0.6503275632858276, + "learning_rate": 0.0015, + "loss": 1.6524, + "step": 4489 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.70665043592453, + "learning_rate": 0.0015, + "loss": 1.6993, + "step": 4490 + }, + { + "epoch": 0.4737341772151899, + "grad_norm": 0.4727391302585602, + "learning_rate": 0.0015, + "loss": 1.6407, + "step": 4491 + }, + { + "epoch": 0.4738396624472574, + "grad_norm": 0.7271082401275635, + "learning_rate": 0.0015, + "loss": 1.6665, + "step": 4492 + }, + { + "epoch": 0.4739451476793249, + "grad_norm": 0.6968852877616882, + "learning_rate": 0.0015, + "loss": 1.6323, + "step": 4493 + }, + { + "epoch": 0.4740506329113924, + "grad_norm": 0.5151492357254028, + "learning_rate": 0.0015, + "loss": 1.6287, + "step": 4494 + }, + { + "epoch": 0.4741561181434599, + "grad_norm": 0.8903449177742004, + "learning_rate": 0.0015, + "loss": 1.6769, + "step": 4495 + }, + { + "epoch": 0.4742616033755274, + "grad_norm": 0.7018641233444214, + "learning_rate": 0.0015, + "loss": 1.6437, + "step": 4496 + }, + { + "epoch": 0.4743670886075949, + "grad_norm": 0.5498849153518677, + "learning_rate": 0.0015, + "loss": 1.6276, + "step": 4497 + }, + { + "epoch": 0.47447257383966246, + "grad_norm": 0.6099660992622375, + "learning_rate": 0.0015, + "loss": 1.6588, + "step": 4498 + }, + { + "epoch": 0.47457805907172995, + "grad_norm": 0.5692307353019714, + "learning_rate": 0.0015, + "loss": 1.6566, + "step": 4499 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.49323442578315735, + "learning_rate": 0.0015, + "loss": 1.6528, + "step": 4500 + }, + { + "epoch": 0.474789029535865, + "grad_norm": 0.5160009264945984, + "learning_rate": 0.0015, + "loss": 1.6727, + "step": 4501 + }, + { + "epoch": 0.4748945147679325, + "grad_norm": 0.5155899524688721, + "learning_rate": 0.0015, + "loss": 1.6691, + "step": 4502 + }, + { + "epoch": 0.475, + "grad_norm": 0.5003455281257629, + "learning_rate": 0.0015, + "loss": 1.6442, + "step": 4503 + }, + { + "epoch": 0.4751054852320675, + "grad_norm": 0.47251251339912415, + "learning_rate": 0.0015, + "loss": 1.6633, + "step": 4504 + }, + { + "epoch": 0.475210970464135, + "grad_norm": 0.4946487247943878, + "learning_rate": 0.0015, + "loss": 1.6484, + "step": 4505 + }, + { + "epoch": 0.4753164556962025, + "grad_norm": 0.4878123104572296, + "learning_rate": 0.0015, + "loss": 1.6432, + "step": 4506 + }, + { + "epoch": 0.47542194092827006, + "grad_norm": 0.5271539688110352, + "learning_rate": 0.0015, + "loss": 1.6725, + "step": 4507 + }, + { + "epoch": 0.47552742616033755, + "grad_norm": 0.4943915903568268, + "learning_rate": 0.0015, + "loss": 1.6381, + "step": 4508 + }, + { + "epoch": 0.47563291139240504, + "grad_norm": 0.4758211672306061, + "learning_rate": 0.0015, + "loss": 1.6582, + "step": 4509 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.46259981393814087, + "learning_rate": 0.0015, + "loss": 1.6105, + "step": 4510 + }, + { + "epoch": 0.4758438818565401, + "grad_norm": 0.44713056087493896, + "learning_rate": 0.0015, + "loss": 1.6255, + "step": 4511 + }, + { + "epoch": 0.4759493670886076, + "grad_norm": 0.5059557557106018, + "learning_rate": 0.0015, + "loss": 1.6363, + "step": 4512 + }, + { + "epoch": 0.4760548523206751, + "grad_norm": 0.5178486704826355, + "learning_rate": 0.0015, + "loss": 1.6535, + "step": 4513 + }, + { + "epoch": 0.4761603375527426, + "grad_norm": 0.4474097192287445, + "learning_rate": 0.0015, + "loss": 1.6575, + "step": 4514 + }, + { + "epoch": 0.4762658227848101, + "grad_norm": 0.49275943636894226, + "learning_rate": 0.0015, + "loss": 1.6745, + "step": 4515 + }, + { + "epoch": 0.47637130801687766, + "grad_norm": 0.5427425503730774, + "learning_rate": 0.0015, + "loss": 1.6252, + "step": 4516 + }, + { + "epoch": 0.47647679324894515, + "grad_norm": 0.5037579536437988, + "learning_rate": 0.0015, + "loss": 1.6231, + "step": 4517 + }, + { + "epoch": 0.47658227848101264, + "grad_norm": 0.46757787466049194, + "learning_rate": 0.0015, + "loss": 1.607, + "step": 4518 + }, + { + "epoch": 0.4766877637130802, + "grad_norm": 0.518986701965332, + "learning_rate": 0.0015, + "loss": 1.6044, + "step": 4519 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.483244389295578, + "learning_rate": 0.0015, + "loss": 1.6185, + "step": 4520 + }, + { + "epoch": 0.4768987341772152, + "grad_norm": 0.4733765423297882, + "learning_rate": 0.0015, + "loss": 1.672, + "step": 4521 + }, + { + "epoch": 0.4770042194092827, + "grad_norm": 0.469696968793869, + "learning_rate": 0.0015, + "loss": 1.6809, + "step": 4522 + }, + { + "epoch": 0.4771097046413502, + "grad_norm": 0.5405987501144409, + "learning_rate": 0.0015, + "loss": 1.6566, + "step": 4523 + }, + { + "epoch": 0.4772151898734177, + "grad_norm": 0.5330632925033569, + "learning_rate": 0.0015, + "loss": 1.6529, + "step": 4524 + }, + { + "epoch": 0.47732067510548526, + "grad_norm": 0.47293856739997864, + "learning_rate": 0.0015, + "loss": 1.6202, + "step": 4525 + }, + { + "epoch": 0.47742616033755275, + "grad_norm": 0.5860047340393066, + "learning_rate": 0.0015, + "loss": 1.672, + "step": 4526 + }, + { + "epoch": 0.47753164556962024, + "grad_norm": 0.5031240582466125, + "learning_rate": 0.0015, + "loss": 1.6216, + "step": 4527 + }, + { + "epoch": 0.47763713080168774, + "grad_norm": 0.6522221565246582, + "learning_rate": 0.0015, + "loss": 1.6586, + "step": 4528 + }, + { + "epoch": 0.4777426160337553, + "grad_norm": 0.6988711953163147, + "learning_rate": 0.0015, + "loss": 1.6484, + "step": 4529 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.6758490800857544, + "learning_rate": 0.0015, + "loss": 1.6676, + "step": 4530 + }, + { + "epoch": 0.47795358649789027, + "grad_norm": 0.4575705826282501, + "learning_rate": 0.0015, + "loss": 1.6192, + "step": 4531 + }, + { + "epoch": 0.4780590717299578, + "grad_norm": 0.5926907062530518, + "learning_rate": 0.0015, + "loss": 1.6439, + "step": 4532 + }, + { + "epoch": 0.4781645569620253, + "grad_norm": 0.5273928046226501, + "learning_rate": 0.0015, + "loss": 1.6814, + "step": 4533 + }, + { + "epoch": 0.4782700421940928, + "grad_norm": 0.5159898996353149, + "learning_rate": 0.0015, + "loss": 1.6732, + "step": 4534 + }, + { + "epoch": 0.47837552742616035, + "grad_norm": 0.6231305003166199, + "learning_rate": 0.0015, + "loss": 1.6264, + "step": 4535 + }, + { + "epoch": 0.47848101265822784, + "grad_norm": 0.4684787690639496, + "learning_rate": 0.0015, + "loss": 1.6437, + "step": 4536 + }, + { + "epoch": 0.47858649789029534, + "grad_norm": 0.5544801950454712, + "learning_rate": 0.0015, + "loss": 1.6519, + "step": 4537 + }, + { + "epoch": 0.4786919831223629, + "grad_norm": 0.5347890257835388, + "learning_rate": 0.0015, + "loss": 1.6577, + "step": 4538 + }, + { + "epoch": 0.4787974683544304, + "grad_norm": 0.551781415939331, + "learning_rate": 0.0015, + "loss": 1.6513, + "step": 4539 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.5763802528381348, + "learning_rate": 0.0015, + "loss": 1.6893, + "step": 4540 + }, + { + "epoch": 0.4790084388185654, + "grad_norm": 0.540147066116333, + "learning_rate": 0.0015, + "loss": 1.6221, + "step": 4541 + }, + { + "epoch": 0.4791139240506329, + "grad_norm": 0.5349014401435852, + "learning_rate": 0.0015, + "loss": 1.623, + "step": 4542 + }, + { + "epoch": 0.4792194092827004, + "grad_norm": 0.5280075669288635, + "learning_rate": 0.0015, + "loss": 1.6606, + "step": 4543 + }, + { + "epoch": 0.47932489451476795, + "grad_norm": 0.5085506439208984, + "learning_rate": 0.0015, + "loss": 1.662, + "step": 4544 + }, + { + "epoch": 0.47943037974683544, + "grad_norm": 0.5173304080963135, + "learning_rate": 0.0015, + "loss": 1.6438, + "step": 4545 + }, + { + "epoch": 0.47953586497890294, + "grad_norm": 0.6235553026199341, + "learning_rate": 0.0015, + "loss": 1.6657, + "step": 4546 + }, + { + "epoch": 0.4796413502109705, + "grad_norm": 0.5665614008903503, + "learning_rate": 0.0015, + "loss": 1.655, + "step": 4547 + }, + { + "epoch": 0.479746835443038, + "grad_norm": 0.44198301434516907, + "learning_rate": 0.0015, + "loss": 1.6794, + "step": 4548 + }, + { + "epoch": 0.47985232067510547, + "grad_norm": 0.5924791693687439, + "learning_rate": 0.0015, + "loss": 1.6639, + "step": 4549 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.4326235353946686, + "learning_rate": 0.0015, + "loss": 1.6162, + "step": 4550 + }, + { + "epoch": 0.4800632911392405, + "grad_norm": 0.6195024251937866, + "learning_rate": 0.0015, + "loss": 1.6312, + "step": 4551 + }, + { + "epoch": 0.480168776371308, + "grad_norm": 0.631605863571167, + "learning_rate": 0.0015, + "loss": 1.6192, + "step": 4552 + }, + { + "epoch": 0.48027426160337555, + "grad_norm": 0.4493882954120636, + "learning_rate": 0.0015, + "loss": 1.6217, + "step": 4553 + }, + { + "epoch": 0.48037974683544304, + "grad_norm": 0.807175874710083, + "learning_rate": 0.0015, + "loss": 1.6678, + "step": 4554 + }, + { + "epoch": 0.48048523206751054, + "grad_norm": 0.8161116242408752, + "learning_rate": 0.0015, + "loss": 1.6227, + "step": 4555 + }, + { + "epoch": 0.4805907172995781, + "grad_norm": 0.463988333940506, + "learning_rate": 0.0015, + "loss": 1.6616, + "step": 4556 + }, + { + "epoch": 0.4806962025316456, + "grad_norm": 0.6695824265480042, + "learning_rate": 0.0015, + "loss": 1.6393, + "step": 4557 + }, + { + "epoch": 0.48080168776371307, + "grad_norm": 0.5819977521896362, + "learning_rate": 0.0015, + "loss": 1.6552, + "step": 4558 + }, + { + "epoch": 0.48090717299578056, + "grad_norm": 0.5306731462478638, + "learning_rate": 0.0015, + "loss": 1.6546, + "step": 4559 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.6184477806091309, + "learning_rate": 0.0015, + "loss": 1.6615, + "step": 4560 + }, + { + "epoch": 0.4811181434599156, + "grad_norm": 0.507031261920929, + "learning_rate": 0.0015, + "loss": 1.6048, + "step": 4561 + }, + { + "epoch": 0.4812236286919831, + "grad_norm": 0.6315982937812805, + "learning_rate": 0.0015, + "loss": 1.6079, + "step": 4562 + }, + { + "epoch": 0.48132911392405064, + "grad_norm": 0.6824647784233093, + "learning_rate": 0.0015, + "loss": 1.6289, + "step": 4563 + }, + { + "epoch": 0.48143459915611814, + "grad_norm": 0.49028703570365906, + "learning_rate": 0.0015, + "loss": 1.6405, + "step": 4564 + }, + { + "epoch": 0.48154008438818563, + "grad_norm": 0.5238708257675171, + "learning_rate": 0.0015, + "loss": 1.5992, + "step": 4565 + }, + { + "epoch": 0.4816455696202532, + "grad_norm": 0.5775980353355408, + "learning_rate": 0.0015, + "loss": 1.6615, + "step": 4566 + }, + { + "epoch": 0.48175105485232067, + "grad_norm": 0.45707377791404724, + "learning_rate": 0.0015, + "loss": 1.6192, + "step": 4567 + }, + { + "epoch": 0.48185654008438816, + "grad_norm": 0.5543075203895569, + "learning_rate": 0.0015, + "loss": 1.6355, + "step": 4568 + }, + { + "epoch": 0.4819620253164557, + "grad_norm": 0.6481205821037292, + "learning_rate": 0.0015, + "loss": 1.6028, + "step": 4569 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.5371230840682983, + "learning_rate": 0.0015, + "loss": 1.6352, + "step": 4570 + }, + { + "epoch": 0.4821729957805907, + "grad_norm": 0.5408508777618408, + "learning_rate": 0.0015, + "loss": 1.6286, + "step": 4571 + }, + { + "epoch": 0.48227848101265824, + "grad_norm": 0.46124279499053955, + "learning_rate": 0.0015, + "loss": 1.606, + "step": 4572 + }, + { + "epoch": 0.48238396624472574, + "grad_norm": 0.4997161328792572, + "learning_rate": 0.0015, + "loss": 1.6171, + "step": 4573 + }, + { + "epoch": 0.48248945147679323, + "grad_norm": 0.5767737030982971, + "learning_rate": 0.0015, + "loss": 1.6953, + "step": 4574 + }, + { + "epoch": 0.4825949367088608, + "grad_norm": 0.4893116354942322, + "learning_rate": 0.0015, + "loss": 1.6182, + "step": 4575 + }, + { + "epoch": 0.48270042194092827, + "grad_norm": 0.47082701325416565, + "learning_rate": 0.0015, + "loss": 1.6746, + "step": 4576 + }, + { + "epoch": 0.48280590717299576, + "grad_norm": 0.5216884613037109, + "learning_rate": 0.0015, + "loss": 1.6195, + "step": 4577 + }, + { + "epoch": 0.4829113924050633, + "grad_norm": 0.5208689570426941, + "learning_rate": 0.0015, + "loss": 1.6425, + "step": 4578 + }, + { + "epoch": 0.4830168776371308, + "grad_norm": 0.4750952422618866, + "learning_rate": 0.0015, + "loss": 1.6755, + "step": 4579 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.5047564506530762, + "learning_rate": 0.0015, + "loss": 1.6673, + "step": 4580 + }, + { + "epoch": 0.48322784810126584, + "grad_norm": 0.5329234004020691, + "learning_rate": 0.0015, + "loss": 1.598, + "step": 4581 + }, + { + "epoch": 0.48333333333333334, + "grad_norm": 0.489246129989624, + "learning_rate": 0.0015, + "loss": 1.6191, + "step": 4582 + }, + { + "epoch": 0.48343881856540083, + "grad_norm": 0.5560640692710876, + "learning_rate": 0.0015, + "loss": 1.6317, + "step": 4583 + }, + { + "epoch": 0.4835443037974684, + "grad_norm": 0.4825168251991272, + "learning_rate": 0.0015, + "loss": 1.6698, + "step": 4584 + }, + { + "epoch": 0.48364978902953587, + "grad_norm": 0.523622453212738, + "learning_rate": 0.0015, + "loss": 1.6574, + "step": 4585 + }, + { + "epoch": 0.48375527426160336, + "grad_norm": 0.5375120639801025, + "learning_rate": 0.0015, + "loss": 1.6471, + "step": 4586 + }, + { + "epoch": 0.4838607594936709, + "grad_norm": 0.4922407269477844, + "learning_rate": 0.0015, + "loss": 1.6484, + "step": 4587 + }, + { + "epoch": 0.4839662447257384, + "grad_norm": 0.5101624131202698, + "learning_rate": 0.0015, + "loss": 1.5948, + "step": 4588 + }, + { + "epoch": 0.4840717299578059, + "grad_norm": 0.5333686470985413, + "learning_rate": 0.0015, + "loss": 1.639, + "step": 4589 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.5021880269050598, + "learning_rate": 0.0015, + "loss": 1.6358, + "step": 4590 + }, + { + "epoch": 0.48428270042194094, + "grad_norm": 0.4982892870903015, + "learning_rate": 0.0015, + "loss": 1.6505, + "step": 4591 + }, + { + "epoch": 0.48438818565400843, + "grad_norm": 0.5568966865539551, + "learning_rate": 0.0015, + "loss": 1.6312, + "step": 4592 + }, + { + "epoch": 0.4844936708860759, + "grad_norm": 0.5701368451118469, + "learning_rate": 0.0015, + "loss": 1.6386, + "step": 4593 + }, + { + "epoch": 0.48459915611814347, + "grad_norm": 0.5515512228012085, + "learning_rate": 0.0015, + "loss": 1.6112, + "step": 4594 + }, + { + "epoch": 0.48470464135021096, + "grad_norm": 0.670576810836792, + "learning_rate": 0.0015, + "loss": 1.6643, + "step": 4595 + }, + { + "epoch": 0.48481012658227846, + "grad_norm": 0.6857293844223022, + "learning_rate": 0.0015, + "loss": 1.6653, + "step": 4596 + }, + { + "epoch": 0.484915611814346, + "grad_norm": 0.6367458701133728, + "learning_rate": 0.0015, + "loss": 1.6324, + "step": 4597 + }, + { + "epoch": 0.4850210970464135, + "grad_norm": 0.5497254729270935, + "learning_rate": 0.0015, + "loss": 1.6578, + "step": 4598 + }, + { + "epoch": 0.485126582278481, + "grad_norm": 0.6092268228530884, + "learning_rate": 0.0015, + "loss": 1.6968, + "step": 4599 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.6367563009262085, + "learning_rate": 0.0015, + "loss": 1.6674, + "step": 4600 + }, + { + "epoch": 0.48533755274261603, + "grad_norm": 0.5361546277999878, + "learning_rate": 0.0015, + "loss": 1.6887, + "step": 4601 + }, + { + "epoch": 0.4854430379746835, + "grad_norm": 0.7194101214408875, + "learning_rate": 0.0015, + "loss": 1.6575, + "step": 4602 + }, + { + "epoch": 0.48554852320675107, + "grad_norm": 0.5410552620887756, + "learning_rate": 0.0015, + "loss": 1.6047, + "step": 4603 + }, + { + "epoch": 0.48565400843881856, + "grad_norm": 0.689990222454071, + "learning_rate": 0.0015, + "loss": 1.631, + "step": 4604 + }, + { + "epoch": 0.48575949367088606, + "grad_norm": 0.7121012806892395, + "learning_rate": 0.0015, + "loss": 1.6747, + "step": 4605 + }, + { + "epoch": 0.4858649789029536, + "grad_norm": 0.48668235540390015, + "learning_rate": 0.0015, + "loss": 1.6728, + "step": 4606 + }, + { + "epoch": 0.4859704641350211, + "grad_norm": 0.6600933074951172, + "learning_rate": 0.0015, + "loss": 1.6645, + "step": 4607 + }, + { + "epoch": 0.4860759493670886, + "grad_norm": 0.6586343050003052, + "learning_rate": 0.0015, + "loss": 1.6371, + "step": 4608 + }, + { + "epoch": 0.48618143459915614, + "grad_norm": 0.5716184377670288, + "learning_rate": 0.0015, + "loss": 1.659, + "step": 4609 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.9299889802932739, + "learning_rate": 0.0015, + "loss": 1.684, + "step": 4610 + }, + { + "epoch": 0.4863924050632911, + "grad_norm": 0.6306243538856506, + "learning_rate": 0.0015, + "loss": 1.6159, + "step": 4611 + }, + { + "epoch": 0.48649789029535867, + "grad_norm": 0.5388479828834534, + "learning_rate": 0.0015, + "loss": 1.6437, + "step": 4612 + }, + { + "epoch": 0.48660337552742616, + "grad_norm": 0.6615060567855835, + "learning_rate": 0.0015, + "loss": 1.6175, + "step": 4613 + }, + { + "epoch": 0.48670886075949366, + "grad_norm": 0.4851313531398773, + "learning_rate": 0.0015, + "loss": 1.6217, + "step": 4614 + }, + { + "epoch": 0.4868143459915612, + "grad_norm": 0.5432100296020508, + "learning_rate": 0.0015, + "loss": 1.6692, + "step": 4615 + }, + { + "epoch": 0.4869198312236287, + "grad_norm": 0.625106155872345, + "learning_rate": 0.0015, + "loss": 1.6601, + "step": 4616 + }, + { + "epoch": 0.4870253164556962, + "grad_norm": 0.4862165153026581, + "learning_rate": 0.0015, + "loss": 1.6729, + "step": 4617 + }, + { + "epoch": 0.48713080168776374, + "grad_norm": 0.5295156240463257, + "learning_rate": 0.0015, + "loss": 1.5984, + "step": 4618 + }, + { + "epoch": 0.48723628691983123, + "grad_norm": 0.5893027186393738, + "learning_rate": 0.0015, + "loss": 1.5798, + "step": 4619 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.4951131343841553, + "learning_rate": 0.0015, + "loss": 1.6432, + "step": 4620 + }, + { + "epoch": 0.48744725738396627, + "grad_norm": 0.6460741758346558, + "learning_rate": 0.0015, + "loss": 1.6633, + "step": 4621 + }, + { + "epoch": 0.48755274261603376, + "grad_norm": 0.5386523008346558, + "learning_rate": 0.0015, + "loss": 1.6184, + "step": 4622 + }, + { + "epoch": 0.48765822784810126, + "grad_norm": 0.6607567667961121, + "learning_rate": 0.0015, + "loss": 1.6348, + "step": 4623 + }, + { + "epoch": 0.4877637130801688, + "grad_norm": 0.7228679060935974, + "learning_rate": 0.0015, + "loss": 1.6527, + "step": 4624 + }, + { + "epoch": 0.4878691983122363, + "grad_norm": 0.566042959690094, + "learning_rate": 0.0015, + "loss": 1.6481, + "step": 4625 + }, + { + "epoch": 0.4879746835443038, + "grad_norm": 0.6492327451705933, + "learning_rate": 0.0015, + "loss": 1.6848, + "step": 4626 + }, + { + "epoch": 0.4880801687763713, + "grad_norm": 0.6564940214157104, + "learning_rate": 0.0015, + "loss": 1.681, + "step": 4627 + }, + { + "epoch": 0.48818565400843883, + "grad_norm": 0.5838112235069275, + "learning_rate": 0.0015, + "loss": 1.6521, + "step": 4628 + }, + { + "epoch": 0.4882911392405063, + "grad_norm": 0.5898457765579224, + "learning_rate": 0.0015, + "loss": 1.6313, + "step": 4629 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6300853490829468, + "learning_rate": 0.0015, + "loss": 1.6843, + "step": 4630 + }, + { + "epoch": 0.48850210970464136, + "grad_norm": 0.6254135370254517, + "learning_rate": 0.0015, + "loss": 1.6537, + "step": 4631 + }, + { + "epoch": 0.48860759493670886, + "grad_norm": 0.5185118913650513, + "learning_rate": 0.0015, + "loss": 1.6439, + "step": 4632 + }, + { + "epoch": 0.48871308016877635, + "grad_norm": 0.4746887981891632, + "learning_rate": 0.0015, + "loss": 1.6455, + "step": 4633 + }, + { + "epoch": 0.4888185654008439, + "grad_norm": 0.48079928755760193, + "learning_rate": 0.0015, + "loss": 1.6467, + "step": 4634 + }, + { + "epoch": 0.4889240506329114, + "grad_norm": 0.6092884540557861, + "learning_rate": 0.0015, + "loss": 1.6342, + "step": 4635 + }, + { + "epoch": 0.4890295358649789, + "grad_norm": 0.6327087879180908, + "learning_rate": 0.0015, + "loss": 1.6639, + "step": 4636 + }, + { + "epoch": 0.48913502109704643, + "grad_norm": 0.47679081559181213, + "learning_rate": 0.0015, + "loss": 1.6481, + "step": 4637 + }, + { + "epoch": 0.4892405063291139, + "grad_norm": 0.5547592043876648, + "learning_rate": 0.0015, + "loss": 1.659, + "step": 4638 + }, + { + "epoch": 0.4893459915611814, + "grad_norm": 0.5467628836631775, + "learning_rate": 0.0015, + "loss": 1.6528, + "step": 4639 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.5302047729492188, + "learning_rate": 0.0015, + "loss": 1.6743, + "step": 4640 + }, + { + "epoch": 0.48955696202531646, + "grad_norm": 0.6105912923812866, + "learning_rate": 0.0015, + "loss": 1.6672, + "step": 4641 + }, + { + "epoch": 0.48966244725738395, + "grad_norm": 0.5176563262939453, + "learning_rate": 0.0015, + "loss": 1.6191, + "step": 4642 + }, + { + "epoch": 0.4897679324894515, + "grad_norm": 0.5613191723823547, + "learning_rate": 0.0015, + "loss": 1.6513, + "step": 4643 + }, + { + "epoch": 0.489873417721519, + "grad_norm": 0.7750377058982849, + "learning_rate": 0.0015, + "loss": 1.6598, + "step": 4644 + }, + { + "epoch": 0.4899789029535865, + "grad_norm": 0.5680108666419983, + "learning_rate": 0.0015, + "loss": 1.6847, + "step": 4645 + }, + { + "epoch": 0.49008438818565403, + "grad_norm": 0.6858214139938354, + "learning_rate": 0.0015, + "loss": 1.6429, + "step": 4646 + }, + { + "epoch": 0.4901898734177215, + "grad_norm": 0.9767922163009644, + "learning_rate": 0.0015, + "loss": 1.6738, + "step": 4647 + }, + { + "epoch": 0.490295358649789, + "grad_norm": 0.7666829824447632, + "learning_rate": 0.0015, + "loss": 1.6439, + "step": 4648 + }, + { + "epoch": 0.49040084388185656, + "grad_norm": 0.475898414850235, + "learning_rate": 0.0015, + "loss": 1.6195, + "step": 4649 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.6132450699806213, + "learning_rate": 0.0015, + "loss": 1.6599, + "step": 4650 + }, + { + "epoch": 0.49061181434599155, + "grad_norm": 0.5402424335479736, + "learning_rate": 0.0015, + "loss": 1.6498, + "step": 4651 + }, + { + "epoch": 0.4907172995780591, + "grad_norm": 0.5422917008399963, + "learning_rate": 0.0015, + "loss": 1.6444, + "step": 4652 + }, + { + "epoch": 0.4908227848101266, + "grad_norm": 0.5834674835205078, + "learning_rate": 0.0015, + "loss": 1.6266, + "step": 4653 + }, + { + "epoch": 0.4909282700421941, + "grad_norm": 0.5989320278167725, + "learning_rate": 0.0015, + "loss": 1.6632, + "step": 4654 + }, + { + "epoch": 0.49103375527426163, + "grad_norm": 0.494384229183197, + "learning_rate": 0.0015, + "loss": 1.6282, + "step": 4655 + }, + { + "epoch": 0.4911392405063291, + "grad_norm": 0.5339090824127197, + "learning_rate": 0.0015, + "loss": 1.6324, + "step": 4656 + }, + { + "epoch": 0.4912447257383966, + "grad_norm": 0.5074661374092102, + "learning_rate": 0.0015, + "loss": 1.6351, + "step": 4657 + }, + { + "epoch": 0.4913502109704641, + "grad_norm": 0.45555511116981506, + "learning_rate": 0.0015, + "loss": 1.6501, + "step": 4658 + }, + { + "epoch": 0.49145569620253166, + "grad_norm": 0.5435980558395386, + "learning_rate": 0.0015, + "loss": 1.6333, + "step": 4659 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.4912318289279938, + "learning_rate": 0.0015, + "loss": 1.6498, + "step": 4660 + }, + { + "epoch": 0.49166666666666664, + "grad_norm": 0.5439279079437256, + "learning_rate": 0.0015, + "loss": 1.6591, + "step": 4661 + }, + { + "epoch": 0.4917721518987342, + "grad_norm": 0.5071859359741211, + "learning_rate": 0.0015, + "loss": 1.6196, + "step": 4662 + }, + { + "epoch": 0.4918776371308017, + "grad_norm": 0.5272972583770752, + "learning_rate": 0.0015, + "loss": 1.6231, + "step": 4663 + }, + { + "epoch": 0.4919831223628692, + "grad_norm": 0.6428019404411316, + "learning_rate": 0.0015, + "loss": 1.6345, + "step": 4664 + }, + { + "epoch": 0.4920886075949367, + "grad_norm": 0.473804771900177, + "learning_rate": 0.0015, + "loss": 1.6446, + "step": 4665 + }, + { + "epoch": 0.4921940928270042, + "grad_norm": 0.5919150114059448, + "learning_rate": 0.0015, + "loss": 1.6339, + "step": 4666 + }, + { + "epoch": 0.4922995780590717, + "grad_norm": 0.5289254784584045, + "learning_rate": 0.0015, + "loss": 1.6599, + "step": 4667 + }, + { + "epoch": 0.49240506329113926, + "grad_norm": 0.7686335444450378, + "learning_rate": 0.0015, + "loss": 1.6529, + "step": 4668 + }, + { + "epoch": 0.49251054852320675, + "grad_norm": 0.560459554195404, + "learning_rate": 0.0015, + "loss": 1.646, + "step": 4669 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.5330448150634766, + "learning_rate": 0.0015, + "loss": 1.6361, + "step": 4670 + }, + { + "epoch": 0.4927215189873418, + "grad_norm": 0.49647217988967896, + "learning_rate": 0.0015, + "loss": 1.6425, + "step": 4671 + }, + { + "epoch": 0.4928270042194093, + "grad_norm": 0.5947394967079163, + "learning_rate": 0.0015, + "loss": 1.6126, + "step": 4672 + }, + { + "epoch": 0.4929324894514768, + "grad_norm": 0.5732641816139221, + "learning_rate": 0.0015, + "loss": 1.6616, + "step": 4673 + }, + { + "epoch": 0.4930379746835443, + "grad_norm": 0.47771984338760376, + "learning_rate": 0.0015, + "loss": 1.6787, + "step": 4674 + }, + { + "epoch": 0.4931434599156118, + "grad_norm": 0.5954535007476807, + "learning_rate": 0.0015, + "loss": 1.6261, + "step": 4675 + }, + { + "epoch": 0.4932489451476793, + "grad_norm": 0.6330942511558533, + "learning_rate": 0.0015, + "loss": 1.6123, + "step": 4676 + }, + { + "epoch": 0.49335443037974686, + "grad_norm": 0.5292167067527771, + "learning_rate": 0.0015, + "loss": 1.6349, + "step": 4677 + }, + { + "epoch": 0.49345991561181435, + "grad_norm": 0.5785264372825623, + "learning_rate": 0.0015, + "loss": 1.6203, + "step": 4678 + }, + { + "epoch": 0.49356540084388184, + "grad_norm": 0.6675677299499512, + "learning_rate": 0.0015, + "loss": 1.6629, + "step": 4679 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.6064327359199524, + "learning_rate": 0.0015, + "loss": 1.6139, + "step": 4680 + }, + { + "epoch": 0.4937763713080169, + "grad_norm": 0.5640125274658203, + "learning_rate": 0.0015, + "loss": 1.6328, + "step": 4681 + }, + { + "epoch": 0.4938818565400844, + "grad_norm": 0.5230752229690552, + "learning_rate": 0.0015, + "loss": 1.612, + "step": 4682 + }, + { + "epoch": 0.4939873417721519, + "grad_norm": 0.6954069137573242, + "learning_rate": 0.0015, + "loss": 1.6413, + "step": 4683 + }, + { + "epoch": 0.4940928270042194, + "grad_norm": 0.5776487588882446, + "learning_rate": 0.0015, + "loss": 1.6094, + "step": 4684 + }, + { + "epoch": 0.4941983122362869, + "grad_norm": 0.5650107860565186, + "learning_rate": 0.0015, + "loss": 1.5985, + "step": 4685 + }, + { + "epoch": 0.49430379746835446, + "grad_norm": 0.5918284058570862, + "learning_rate": 0.0015, + "loss": 1.6491, + "step": 4686 + }, + { + "epoch": 0.49440928270042195, + "grad_norm": 0.5596849918365479, + "learning_rate": 0.0015, + "loss": 1.6727, + "step": 4687 + }, + { + "epoch": 0.49451476793248944, + "grad_norm": 0.6597175002098083, + "learning_rate": 0.0015, + "loss": 1.634, + "step": 4688 + }, + { + "epoch": 0.494620253164557, + "grad_norm": 0.5488419532775879, + "learning_rate": 0.0015, + "loss": 1.6222, + "step": 4689 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.5714768767356873, + "learning_rate": 0.0015, + "loss": 1.6793, + "step": 4690 + }, + { + "epoch": 0.494831223628692, + "grad_norm": 0.7337178587913513, + "learning_rate": 0.0015, + "loss": 1.6811, + "step": 4691 + }, + { + "epoch": 0.49493670886075947, + "grad_norm": 0.5429230332374573, + "learning_rate": 0.0015, + "loss": 1.6785, + "step": 4692 + }, + { + "epoch": 0.495042194092827, + "grad_norm": 0.5269771218299866, + "learning_rate": 0.0015, + "loss": 1.6384, + "step": 4693 + }, + { + "epoch": 0.4951476793248945, + "grad_norm": 0.5756490230560303, + "learning_rate": 0.0015, + "loss": 1.6645, + "step": 4694 + }, + { + "epoch": 0.495253164556962, + "grad_norm": 0.49909448623657227, + "learning_rate": 0.0015, + "loss": 1.6276, + "step": 4695 + }, + { + "epoch": 0.49535864978902955, + "grad_norm": 0.7548465728759766, + "learning_rate": 0.0015, + "loss": 1.6664, + "step": 4696 + }, + { + "epoch": 0.49546413502109704, + "grad_norm": 0.7117580771446228, + "learning_rate": 0.0015, + "loss": 1.6349, + "step": 4697 + }, + { + "epoch": 0.49556962025316453, + "grad_norm": 0.6276371479034424, + "learning_rate": 0.0015, + "loss": 1.619, + "step": 4698 + }, + { + "epoch": 0.4956751054852321, + "grad_norm": 0.6697174906730652, + "learning_rate": 0.0015, + "loss": 1.6496, + "step": 4699 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.9186984300613403, + "learning_rate": 0.0015, + "loss": 1.6335, + "step": 4700 + }, + { + "epoch": 0.49588607594936707, + "grad_norm": 0.7723588943481445, + "learning_rate": 0.0015, + "loss": 1.658, + "step": 4701 + }, + { + "epoch": 0.4959915611814346, + "grad_norm": 0.6396031379699707, + "learning_rate": 0.0015, + "loss": 1.661, + "step": 4702 + }, + { + "epoch": 0.4960970464135021, + "grad_norm": 0.47685229778289795, + "learning_rate": 0.0015, + "loss": 1.6174, + "step": 4703 + }, + { + "epoch": 0.4962025316455696, + "grad_norm": 0.5432143807411194, + "learning_rate": 0.0015, + "loss": 1.6531, + "step": 4704 + }, + { + "epoch": 0.49630801687763715, + "grad_norm": 0.48465240001678467, + "learning_rate": 0.0015, + "loss": 1.6282, + "step": 4705 + }, + { + "epoch": 0.49641350210970464, + "grad_norm": 0.6186333894729614, + "learning_rate": 0.0015, + "loss": 1.6332, + "step": 4706 + }, + { + "epoch": 0.49651898734177213, + "grad_norm": 0.5811643004417419, + "learning_rate": 0.0015, + "loss": 1.6903, + "step": 4707 + }, + { + "epoch": 0.4966244725738397, + "grad_norm": 0.492428719997406, + "learning_rate": 0.0015, + "loss": 1.6558, + "step": 4708 + }, + { + "epoch": 0.4967299578059072, + "grad_norm": 0.5256556272506714, + "learning_rate": 0.0015, + "loss": 1.6339, + "step": 4709 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.5144762396812439, + "learning_rate": 0.0015, + "loss": 1.6486, + "step": 4710 + }, + { + "epoch": 0.4969409282700422, + "grad_norm": 0.49647825956344604, + "learning_rate": 0.0015, + "loss": 1.6574, + "step": 4711 + }, + { + "epoch": 0.4970464135021097, + "grad_norm": 0.5308802127838135, + "learning_rate": 0.0015, + "loss": 1.6385, + "step": 4712 + }, + { + "epoch": 0.4971518987341772, + "grad_norm": 0.4832683801651001, + "learning_rate": 0.0015, + "loss": 1.617, + "step": 4713 + }, + { + "epoch": 0.49725738396624475, + "grad_norm": 0.48039448261260986, + "learning_rate": 0.0015, + "loss": 1.653, + "step": 4714 + }, + { + "epoch": 0.49736286919831224, + "grad_norm": 0.5602380037307739, + "learning_rate": 0.0015, + "loss": 1.6386, + "step": 4715 + }, + { + "epoch": 0.49746835443037973, + "grad_norm": 0.6170874834060669, + "learning_rate": 0.0015, + "loss": 1.63, + "step": 4716 + }, + { + "epoch": 0.4975738396624473, + "grad_norm": 0.6201112270355225, + "learning_rate": 0.0015, + "loss": 1.6332, + "step": 4717 + }, + { + "epoch": 0.4976793248945148, + "grad_norm": 0.5356724262237549, + "learning_rate": 0.0015, + "loss": 1.6842, + "step": 4718 + }, + { + "epoch": 0.49778481012658227, + "grad_norm": 0.7496811151504517, + "learning_rate": 0.0015, + "loss": 1.6512, + "step": 4719 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.8356648087501526, + "learning_rate": 0.0015, + "loss": 1.6168, + "step": 4720 + }, + { + "epoch": 0.4979957805907173, + "grad_norm": 0.5207118988037109, + "learning_rate": 0.0015, + "loss": 1.6345, + "step": 4721 + }, + { + "epoch": 0.4981012658227848, + "grad_norm": 0.5726657509803772, + "learning_rate": 0.0015, + "loss": 1.6463, + "step": 4722 + }, + { + "epoch": 0.49820675105485235, + "grad_norm": 0.6330668926239014, + "learning_rate": 0.0015, + "loss": 1.6498, + "step": 4723 + }, + { + "epoch": 0.49831223628691984, + "grad_norm": 0.4804544448852539, + "learning_rate": 0.0015, + "loss": 1.6554, + "step": 4724 + }, + { + "epoch": 0.49841772151898733, + "grad_norm": 0.6218848824501038, + "learning_rate": 0.0015, + "loss": 1.6458, + "step": 4725 + }, + { + "epoch": 0.4985232067510548, + "grad_norm": 0.45861953496932983, + "learning_rate": 0.0015, + "loss": 1.6344, + "step": 4726 + }, + { + "epoch": 0.4986286919831224, + "grad_norm": 0.744053840637207, + "learning_rate": 0.0015, + "loss": 1.6163, + "step": 4727 + }, + { + "epoch": 0.49873417721518987, + "grad_norm": 1.0661535263061523, + "learning_rate": 0.0015, + "loss": 1.6813, + "step": 4728 + }, + { + "epoch": 0.49883966244725736, + "grad_norm": 0.6184759140014648, + "learning_rate": 0.0015, + "loss": 1.6245, + "step": 4729 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.6284570097923279, + "learning_rate": 0.0015, + "loss": 1.6343, + "step": 4730 + }, + { + "epoch": 0.4990506329113924, + "grad_norm": 0.7645490169525146, + "learning_rate": 0.0015, + "loss": 1.6667, + "step": 4731 + }, + { + "epoch": 0.4991561181434599, + "grad_norm": 0.5360997319221497, + "learning_rate": 0.0015, + "loss": 1.6227, + "step": 4732 + }, + { + "epoch": 0.49926160337552744, + "grad_norm": 0.6242209672927856, + "learning_rate": 0.0015, + "loss": 1.6637, + "step": 4733 + }, + { + "epoch": 0.49936708860759493, + "grad_norm": 0.6186540722846985, + "learning_rate": 0.0015, + "loss": 1.6483, + "step": 4734 + }, + { + "epoch": 0.4994725738396624, + "grad_norm": 0.5110530257225037, + "learning_rate": 0.0015, + "loss": 1.6626, + "step": 4735 + }, + { + "epoch": 0.49957805907173, + "grad_norm": 0.6061602234840393, + "learning_rate": 0.0015, + "loss": 1.6434, + "step": 4736 + }, + { + "epoch": 0.49968354430379747, + "grad_norm": 0.5486086010932922, + "learning_rate": 0.0015, + "loss": 1.6149, + "step": 4737 + }, + { + "epoch": 0.49978902953586496, + "grad_norm": 0.4641488492488861, + "learning_rate": 0.0015, + "loss": 1.5949, + "step": 4738 + }, + { + "epoch": 0.4998945147679325, + "grad_norm": 0.6686543226242065, + "learning_rate": 0.0015, + "loss": 1.6386, + "step": 4739 + }, + { + "epoch": 0.5, + "grad_norm": 0.5206875801086426, + "learning_rate": 0.0015, + "loss": 1.6413, + "step": 4740 + }, + { + "epoch": 0.5001054852320675, + "grad_norm": 0.643432080745697, + "learning_rate": 0.0015, + "loss": 1.6758, + "step": 4741 + }, + { + "epoch": 0.500210970464135, + "grad_norm": 0.8481569886207581, + "learning_rate": 0.0015, + "loss": 1.6349, + "step": 4742 + }, + { + "epoch": 0.5003164556962025, + "grad_norm": 0.519643247127533, + "learning_rate": 0.0015, + "loss": 1.6272, + "step": 4743 + }, + { + "epoch": 0.5004219409282701, + "grad_norm": 0.567777693271637, + "learning_rate": 0.0015, + "loss": 1.6879, + "step": 4744 + }, + { + "epoch": 0.5005274261603375, + "grad_norm": 0.5984745025634766, + "learning_rate": 0.0015, + "loss": 1.6387, + "step": 4745 + }, + { + "epoch": 0.5006329113924051, + "grad_norm": 0.43486952781677246, + "learning_rate": 0.0015, + "loss": 1.666, + "step": 4746 + }, + { + "epoch": 0.5007383966244726, + "grad_norm": 0.5758285522460938, + "learning_rate": 0.0015, + "loss": 1.6617, + "step": 4747 + }, + { + "epoch": 0.50084388185654, + "grad_norm": 0.6878244280815125, + "learning_rate": 0.0015, + "loss": 1.6734, + "step": 4748 + }, + { + "epoch": 0.5009493670886076, + "grad_norm": 0.529334306716919, + "learning_rate": 0.0015, + "loss": 1.6676, + "step": 4749 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.7365033030509949, + "learning_rate": 0.0015, + "loss": 1.6557, + "step": 4750 + }, + { + "epoch": 0.5011603375527426, + "grad_norm": 0.8762412071228027, + "learning_rate": 0.0015, + "loss": 1.6714, + "step": 4751 + }, + { + "epoch": 0.5012658227848101, + "grad_norm": 0.4611147344112396, + "learning_rate": 0.0015, + "loss": 1.6379, + "step": 4752 + }, + { + "epoch": 0.5013713080168777, + "grad_norm": 0.7059435248374939, + "learning_rate": 0.0015, + "loss": 1.6626, + "step": 4753 + }, + { + "epoch": 0.5014767932489451, + "grad_norm": 0.6425533294677734, + "learning_rate": 0.0015, + "loss": 1.6433, + "step": 4754 + }, + { + "epoch": 0.5015822784810127, + "grad_norm": 0.50490802526474, + "learning_rate": 0.0015, + "loss": 1.629, + "step": 4755 + }, + { + "epoch": 0.5016877637130802, + "grad_norm": 0.5531265139579773, + "learning_rate": 0.0015, + "loss": 1.596, + "step": 4756 + }, + { + "epoch": 0.5017932489451477, + "grad_norm": 0.514526903629303, + "learning_rate": 0.0015, + "loss": 1.6577, + "step": 4757 + }, + { + "epoch": 0.5018987341772152, + "grad_norm": 0.6004590392112732, + "learning_rate": 0.0015, + "loss": 1.6317, + "step": 4758 + }, + { + "epoch": 0.5020042194092827, + "grad_norm": 0.5942376852035522, + "learning_rate": 0.0015, + "loss": 1.6236, + "step": 4759 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.5438563823699951, + "learning_rate": 0.0015, + "loss": 1.651, + "step": 4760 + }, + { + "epoch": 0.5022151898734177, + "grad_norm": 0.5856278538703918, + "learning_rate": 0.0015, + "loss": 1.6466, + "step": 4761 + }, + { + "epoch": 0.5023206751054853, + "grad_norm": 0.5899941921234131, + "learning_rate": 0.0015, + "loss": 1.6684, + "step": 4762 + }, + { + "epoch": 0.5024261603375527, + "grad_norm": 0.5922358632087708, + "learning_rate": 0.0015, + "loss": 1.63, + "step": 4763 + }, + { + "epoch": 0.5025316455696203, + "grad_norm": 0.6511942148208618, + "learning_rate": 0.0015, + "loss": 1.6443, + "step": 4764 + }, + { + "epoch": 0.5026371308016878, + "grad_norm": 0.5567985773086548, + "learning_rate": 0.0015, + "loss": 1.6243, + "step": 4765 + }, + { + "epoch": 0.5027426160337553, + "grad_norm": 0.55110764503479, + "learning_rate": 0.0015, + "loss": 1.6096, + "step": 4766 + }, + { + "epoch": 0.5028481012658228, + "grad_norm": 0.505946934223175, + "learning_rate": 0.0015, + "loss": 1.6504, + "step": 4767 + }, + { + "epoch": 0.5029535864978903, + "grad_norm": 0.5708931684494019, + "learning_rate": 0.0015, + "loss": 1.6218, + "step": 4768 + }, + { + "epoch": 0.5030590717299578, + "grad_norm": 0.49275654554367065, + "learning_rate": 0.0015, + "loss": 1.683, + "step": 4769 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.5598103404045105, + "learning_rate": 0.0015, + "loss": 1.6453, + "step": 4770 + }, + { + "epoch": 0.5032700421940929, + "grad_norm": 0.5133016109466553, + "learning_rate": 0.0015, + "loss": 1.6256, + "step": 4771 + }, + { + "epoch": 0.5033755274261603, + "grad_norm": 0.5452487468719482, + "learning_rate": 0.0015, + "loss": 1.7077, + "step": 4772 + }, + { + "epoch": 0.5034810126582279, + "grad_norm": 0.59088534116745, + "learning_rate": 0.0015, + "loss": 1.6465, + "step": 4773 + }, + { + "epoch": 0.5035864978902953, + "grad_norm": 0.4592975378036499, + "learning_rate": 0.0015, + "loss": 1.6184, + "step": 4774 + }, + { + "epoch": 0.5036919831223629, + "grad_norm": 0.6327562928199768, + "learning_rate": 0.0015, + "loss": 1.6795, + "step": 4775 + }, + { + "epoch": 0.5037974683544304, + "grad_norm": 0.511746346950531, + "learning_rate": 0.0015, + "loss": 1.656, + "step": 4776 + }, + { + "epoch": 0.5039029535864978, + "grad_norm": 0.6945894360542297, + "learning_rate": 0.0015, + "loss": 1.6304, + "step": 4777 + }, + { + "epoch": 0.5040084388185654, + "grad_norm": 0.7663832306861877, + "learning_rate": 0.0015, + "loss": 1.6725, + "step": 4778 + }, + { + "epoch": 0.5041139240506329, + "grad_norm": 0.5099506974220276, + "learning_rate": 0.0015, + "loss": 1.6322, + "step": 4779 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.666743278503418, + "learning_rate": 0.0015, + "loss": 1.6362, + "step": 4780 + }, + { + "epoch": 0.5043248945147679, + "grad_norm": 0.5317825675010681, + "learning_rate": 0.0015, + "loss": 1.6101, + "step": 4781 + }, + { + "epoch": 0.5044303797468355, + "grad_norm": 0.5478038191795349, + "learning_rate": 0.0015, + "loss": 1.6717, + "step": 4782 + }, + { + "epoch": 0.5045358649789029, + "grad_norm": 0.4518919885158539, + "learning_rate": 0.0015, + "loss": 1.6503, + "step": 4783 + }, + { + "epoch": 0.5046413502109705, + "grad_norm": 0.5621270537376404, + "learning_rate": 0.0015, + "loss": 1.6205, + "step": 4784 + }, + { + "epoch": 0.504746835443038, + "grad_norm": 0.6832736134529114, + "learning_rate": 0.0015, + "loss": 1.6376, + "step": 4785 + }, + { + "epoch": 0.5048523206751054, + "grad_norm": 0.5388296842575073, + "learning_rate": 0.0015, + "loss": 1.6243, + "step": 4786 + }, + { + "epoch": 0.504957805907173, + "grad_norm": 0.584375262260437, + "learning_rate": 0.0015, + "loss": 1.6339, + "step": 4787 + }, + { + "epoch": 0.5050632911392405, + "grad_norm": 0.4646458625793457, + "learning_rate": 0.0015, + "loss": 1.6313, + "step": 4788 + }, + { + "epoch": 0.505168776371308, + "grad_norm": 0.6249669194221497, + "learning_rate": 0.0015, + "loss": 1.5907, + "step": 4789 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.534736156463623, + "learning_rate": 0.0015, + "loss": 1.6697, + "step": 4790 + }, + { + "epoch": 0.5053797468354431, + "grad_norm": 0.57759028673172, + "learning_rate": 0.0015, + "loss": 1.6691, + "step": 4791 + }, + { + "epoch": 0.5054852320675105, + "grad_norm": 0.557238757610321, + "learning_rate": 0.0015, + "loss": 1.6001, + "step": 4792 + }, + { + "epoch": 0.505590717299578, + "grad_norm": 0.5487537980079651, + "learning_rate": 0.0015, + "loss": 1.6694, + "step": 4793 + }, + { + "epoch": 0.5056962025316456, + "grad_norm": 0.5272213816642761, + "learning_rate": 0.0015, + "loss": 1.7048, + "step": 4794 + }, + { + "epoch": 0.505801687763713, + "grad_norm": 0.5248759984970093, + "learning_rate": 0.0015, + "loss": 1.6327, + "step": 4795 + }, + { + "epoch": 0.5059071729957806, + "grad_norm": 0.5483149290084839, + "learning_rate": 0.0015, + "loss": 1.6294, + "step": 4796 + }, + { + "epoch": 0.5060126582278481, + "grad_norm": 0.5631021857261658, + "learning_rate": 0.0015, + "loss": 1.6312, + "step": 4797 + }, + { + "epoch": 0.5061181434599156, + "grad_norm": 0.497884064912796, + "learning_rate": 0.0015, + "loss": 1.594, + "step": 4798 + }, + { + "epoch": 0.5062236286919831, + "grad_norm": 0.6340633034706116, + "learning_rate": 0.0015, + "loss": 1.6395, + "step": 4799 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.542874276638031, + "learning_rate": 0.0015, + "loss": 1.6131, + "step": 4800 + }, + { + "epoch": 0.5064345991561181, + "grad_norm": 0.6450417041778564, + "learning_rate": 0.0015, + "loss": 1.6165, + "step": 4801 + }, + { + "epoch": 0.5065400843881857, + "grad_norm": 0.486097127199173, + "learning_rate": 0.0015, + "loss": 1.6436, + "step": 4802 + }, + { + "epoch": 0.5066455696202532, + "grad_norm": 0.6658429503440857, + "learning_rate": 0.0015, + "loss": 1.6454, + "step": 4803 + }, + { + "epoch": 0.5067510548523206, + "grad_norm": 0.6243430972099304, + "learning_rate": 0.0015, + "loss": 1.6151, + "step": 4804 + }, + { + "epoch": 0.5068565400843882, + "grad_norm": 0.4700041115283966, + "learning_rate": 0.0015, + "loss": 1.6298, + "step": 4805 + }, + { + "epoch": 0.5069620253164557, + "grad_norm": 0.667640209197998, + "learning_rate": 0.0015, + "loss": 1.639, + "step": 4806 + }, + { + "epoch": 0.5070675105485232, + "grad_norm": 0.5673599243164062, + "learning_rate": 0.0015, + "loss": 1.6693, + "step": 4807 + }, + { + "epoch": 0.5071729957805907, + "grad_norm": 0.5642109513282776, + "learning_rate": 0.0015, + "loss": 1.6456, + "step": 4808 + }, + { + "epoch": 0.5072784810126583, + "grad_norm": 0.5934702754020691, + "learning_rate": 0.0015, + "loss": 1.6516, + "step": 4809 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.5233937501907349, + "learning_rate": 0.0015, + "loss": 1.6393, + "step": 4810 + }, + { + "epoch": 0.5074894514767933, + "grad_norm": 0.5223708748817444, + "learning_rate": 0.0015, + "loss": 1.6375, + "step": 4811 + }, + { + "epoch": 0.5075949367088608, + "grad_norm": 0.5699358582496643, + "learning_rate": 0.0015, + "loss": 1.6364, + "step": 4812 + }, + { + "epoch": 0.5077004219409282, + "grad_norm": 0.5589818358421326, + "learning_rate": 0.0015, + "loss": 1.6891, + "step": 4813 + }, + { + "epoch": 0.5078059071729958, + "grad_norm": 0.4734857380390167, + "learning_rate": 0.0015, + "loss": 1.668, + "step": 4814 + }, + { + "epoch": 0.5079113924050633, + "grad_norm": 0.506865918636322, + "learning_rate": 0.0015, + "loss": 1.6596, + "step": 4815 + }, + { + "epoch": 0.5080168776371308, + "grad_norm": 0.6197706460952759, + "learning_rate": 0.0015, + "loss": 1.6259, + "step": 4816 + }, + { + "epoch": 0.5081223628691983, + "grad_norm": 0.6048730611801147, + "learning_rate": 0.0015, + "loss": 1.6418, + "step": 4817 + }, + { + "epoch": 0.5082278481012659, + "grad_norm": 0.5068001747131348, + "learning_rate": 0.0015, + "loss": 1.6979, + "step": 4818 + }, + { + "epoch": 0.5083333333333333, + "grad_norm": 0.5281519889831543, + "learning_rate": 0.0015, + "loss": 1.6754, + "step": 4819 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.5379570126533508, + "learning_rate": 0.0015, + "loss": 1.6316, + "step": 4820 + }, + { + "epoch": 0.5085443037974684, + "grad_norm": 0.5386903882026672, + "learning_rate": 0.0015, + "loss": 1.666, + "step": 4821 + }, + { + "epoch": 0.5086497890295358, + "grad_norm": 0.5416865348815918, + "learning_rate": 0.0015, + "loss": 1.6314, + "step": 4822 + }, + { + "epoch": 0.5087552742616034, + "grad_norm": 0.5622119307518005, + "learning_rate": 0.0015, + "loss": 1.6555, + "step": 4823 + }, + { + "epoch": 0.5088607594936709, + "grad_norm": 0.823489248752594, + "learning_rate": 0.0015, + "loss": 1.6658, + "step": 4824 + }, + { + "epoch": 0.5089662447257384, + "grad_norm": 0.5296961665153503, + "learning_rate": 0.0015, + "loss": 1.6504, + "step": 4825 + }, + { + "epoch": 0.5090717299578059, + "grad_norm": 0.6514083743095398, + "learning_rate": 0.0015, + "loss": 1.6075, + "step": 4826 + }, + { + "epoch": 0.5091772151898735, + "grad_norm": 0.7773762345314026, + "learning_rate": 0.0015, + "loss": 1.6214, + "step": 4827 + }, + { + "epoch": 0.5092827004219409, + "grad_norm": 0.6508411169052124, + "learning_rate": 0.0015, + "loss": 1.6495, + "step": 4828 + }, + { + "epoch": 0.5093881856540085, + "grad_norm": 0.45788708329200745, + "learning_rate": 0.0015, + "loss": 1.6281, + "step": 4829 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.73536217212677, + "learning_rate": 0.0015, + "loss": 1.6321, + "step": 4830 + }, + { + "epoch": 0.5095991561181434, + "grad_norm": 0.7188004851341248, + "learning_rate": 0.0015, + "loss": 1.6507, + "step": 4831 + }, + { + "epoch": 0.509704641350211, + "grad_norm": 0.6408347487449646, + "learning_rate": 0.0015, + "loss": 1.6482, + "step": 4832 + }, + { + "epoch": 0.5098101265822785, + "grad_norm": 0.5778459906578064, + "learning_rate": 0.0015, + "loss": 1.5965, + "step": 4833 + }, + { + "epoch": 0.509915611814346, + "grad_norm": 1.011449933052063, + "learning_rate": 0.0015, + "loss": 1.6383, + "step": 4834 + }, + { + "epoch": 0.5100210970464135, + "grad_norm": 0.954604983329773, + "learning_rate": 0.0015, + "loss": 1.6479, + "step": 4835 + }, + { + "epoch": 0.5101265822784811, + "grad_norm": 0.4972829222679138, + "learning_rate": 0.0015, + "loss": 1.6097, + "step": 4836 + }, + { + "epoch": 0.5102320675105485, + "grad_norm": 0.7298729419708252, + "learning_rate": 0.0015, + "loss": 1.6101, + "step": 4837 + }, + { + "epoch": 0.510337552742616, + "grad_norm": 0.6276075839996338, + "learning_rate": 0.0015, + "loss": 1.6452, + "step": 4838 + }, + { + "epoch": 0.5104430379746835, + "grad_norm": 0.582192063331604, + "learning_rate": 0.0015, + "loss": 1.6358, + "step": 4839 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.5772690176963806, + "learning_rate": 0.0015, + "loss": 1.6852, + "step": 4840 + }, + { + "epoch": 0.5106540084388186, + "grad_norm": 0.6315447092056274, + "learning_rate": 0.0015, + "loss": 1.6337, + "step": 4841 + }, + { + "epoch": 0.510759493670886, + "grad_norm": 0.49861079454421997, + "learning_rate": 0.0015, + "loss": 1.6454, + "step": 4842 + }, + { + "epoch": 0.5108649789029536, + "grad_norm": 0.5529313683509827, + "learning_rate": 0.0015, + "loss": 1.6479, + "step": 4843 + }, + { + "epoch": 0.5109704641350211, + "grad_norm": 0.5122413635253906, + "learning_rate": 0.0015, + "loss": 1.6475, + "step": 4844 + }, + { + "epoch": 0.5110759493670886, + "grad_norm": 0.6824837327003479, + "learning_rate": 0.0015, + "loss": 1.6361, + "step": 4845 + }, + { + "epoch": 0.5111814345991561, + "grad_norm": 0.5071849822998047, + "learning_rate": 0.0015, + "loss": 1.6658, + "step": 4846 + }, + { + "epoch": 0.5112869198312237, + "grad_norm": 0.5284269452095032, + "learning_rate": 0.0015, + "loss": 1.6377, + "step": 4847 + }, + { + "epoch": 0.5113924050632911, + "grad_norm": 0.6052508354187012, + "learning_rate": 0.0015, + "loss": 1.6621, + "step": 4848 + }, + { + "epoch": 0.5114978902953586, + "grad_norm": 0.5683450102806091, + "learning_rate": 0.0015, + "loss": 1.6405, + "step": 4849 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.5185849070549011, + "learning_rate": 0.0015, + "loss": 1.6658, + "step": 4850 + }, + { + "epoch": 0.5117088607594936, + "grad_norm": 0.5939214825630188, + "learning_rate": 0.0015, + "loss": 1.6825, + "step": 4851 + }, + { + "epoch": 0.5118143459915612, + "grad_norm": 0.5073562860488892, + "learning_rate": 0.0015, + "loss": 1.665, + "step": 4852 + }, + { + "epoch": 0.5119198312236287, + "grad_norm": 0.5370201468467712, + "learning_rate": 0.0015, + "loss": 1.6071, + "step": 4853 + }, + { + "epoch": 0.5120253164556962, + "grad_norm": 0.5023263096809387, + "learning_rate": 0.0015, + "loss": 1.6767, + "step": 4854 + }, + { + "epoch": 0.5121308016877637, + "grad_norm": 0.5580131411552429, + "learning_rate": 0.0015, + "loss": 1.6175, + "step": 4855 + }, + { + "epoch": 0.5122362869198313, + "grad_norm": 0.48221713304519653, + "learning_rate": 0.0015, + "loss": 1.6257, + "step": 4856 + }, + { + "epoch": 0.5123417721518987, + "grad_norm": 0.5159028172492981, + "learning_rate": 0.0015, + "loss": 1.624, + "step": 4857 + }, + { + "epoch": 0.5124472573839662, + "grad_norm": 0.6387952566146851, + "learning_rate": 0.0015, + "loss": 1.6069, + "step": 4858 + }, + { + "epoch": 0.5125527426160338, + "grad_norm": 0.5266417860984802, + "learning_rate": 0.0015, + "loss": 1.5892, + "step": 4859 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.5424797534942627, + "learning_rate": 0.0015, + "loss": 1.642, + "step": 4860 + }, + { + "epoch": 0.5127637130801688, + "grad_norm": 0.6165294051170349, + "learning_rate": 0.0015, + "loss": 1.6258, + "step": 4861 + }, + { + "epoch": 0.5128691983122363, + "grad_norm": 0.5575075745582581, + "learning_rate": 0.0015, + "loss": 1.6256, + "step": 4862 + }, + { + "epoch": 0.5129746835443038, + "grad_norm": 0.5405846238136292, + "learning_rate": 0.0015, + "loss": 1.6547, + "step": 4863 + }, + { + "epoch": 0.5130801687763713, + "grad_norm": 0.546652615070343, + "learning_rate": 0.0015, + "loss": 1.6493, + "step": 4864 + }, + { + "epoch": 0.5131856540084389, + "grad_norm": 0.5599865317344666, + "learning_rate": 0.0015, + "loss": 1.6049, + "step": 4865 + }, + { + "epoch": 0.5132911392405063, + "grad_norm": 0.5863654017448425, + "learning_rate": 0.0015, + "loss": 1.6331, + "step": 4866 + }, + { + "epoch": 0.5133966244725738, + "grad_norm": 0.5758659839630127, + "learning_rate": 0.0015, + "loss": 1.597, + "step": 4867 + }, + { + "epoch": 0.5135021097046414, + "grad_norm": 0.6346961855888367, + "learning_rate": 0.0015, + "loss": 1.6356, + "step": 4868 + }, + { + "epoch": 0.5136075949367088, + "grad_norm": 0.5564453601837158, + "learning_rate": 0.0015, + "loss": 1.627, + "step": 4869 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.6270017027854919, + "learning_rate": 0.0015, + "loss": 1.633, + "step": 4870 + }, + { + "epoch": 0.5138185654008439, + "grad_norm": 0.5450406670570374, + "learning_rate": 0.0015, + "loss": 1.6603, + "step": 4871 + }, + { + "epoch": 0.5139240506329114, + "grad_norm": 0.5726514458656311, + "learning_rate": 0.0015, + "loss": 1.6238, + "step": 4872 + }, + { + "epoch": 0.5140295358649789, + "grad_norm": 0.5533527731895447, + "learning_rate": 0.0015, + "loss": 1.6888, + "step": 4873 + }, + { + "epoch": 0.5141350210970465, + "grad_norm": 0.6688078045845032, + "learning_rate": 0.0015, + "loss": 1.6466, + "step": 4874 + }, + { + "epoch": 0.5142405063291139, + "grad_norm": 0.5136394500732422, + "learning_rate": 0.0015, + "loss": 1.6096, + "step": 4875 + }, + { + "epoch": 0.5143459915611814, + "grad_norm": 0.5708884000778198, + "learning_rate": 0.0015, + "loss": 1.6575, + "step": 4876 + }, + { + "epoch": 0.514451476793249, + "grad_norm": 0.6574445366859436, + "learning_rate": 0.0015, + "loss": 1.644, + "step": 4877 + }, + { + "epoch": 0.5145569620253164, + "grad_norm": 0.4927603304386139, + "learning_rate": 0.0015, + "loss": 1.6781, + "step": 4878 + }, + { + "epoch": 0.514662447257384, + "grad_norm": 0.6540015339851379, + "learning_rate": 0.0015, + "loss": 1.669, + "step": 4879 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.6037447452545166, + "learning_rate": 0.0015, + "loss": 1.618, + "step": 4880 + }, + { + "epoch": 0.514873417721519, + "grad_norm": 0.5282309651374817, + "learning_rate": 0.0015, + "loss": 1.6535, + "step": 4881 + }, + { + "epoch": 0.5149789029535865, + "grad_norm": 0.6613801717758179, + "learning_rate": 0.0015, + "loss": 1.6405, + "step": 4882 + }, + { + "epoch": 0.515084388185654, + "grad_norm": 0.6758782863616943, + "learning_rate": 0.0015, + "loss": 1.6524, + "step": 4883 + }, + { + "epoch": 0.5151898734177215, + "grad_norm": 0.6558852791786194, + "learning_rate": 0.0015, + "loss": 1.6456, + "step": 4884 + }, + { + "epoch": 0.515295358649789, + "grad_norm": 0.5303662419319153, + "learning_rate": 0.0015, + "loss": 1.6178, + "step": 4885 + }, + { + "epoch": 0.5154008438818566, + "grad_norm": 0.6675775051116943, + "learning_rate": 0.0015, + "loss": 1.6559, + "step": 4886 + }, + { + "epoch": 0.515506329113924, + "grad_norm": 0.6730503439903259, + "learning_rate": 0.0015, + "loss": 1.6317, + "step": 4887 + }, + { + "epoch": 0.5156118143459916, + "grad_norm": 0.4933164715766907, + "learning_rate": 0.0015, + "loss": 1.6638, + "step": 4888 + }, + { + "epoch": 0.5157172995780591, + "grad_norm": 0.7364172339439392, + "learning_rate": 0.0015, + "loss": 1.6215, + "step": 4889 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.5155131816864014, + "learning_rate": 0.0015, + "loss": 1.6181, + "step": 4890 + }, + { + "epoch": 0.5159282700421941, + "grad_norm": 0.6437991261482239, + "learning_rate": 0.0015, + "loss": 1.6047, + "step": 4891 + }, + { + "epoch": 0.5160337552742617, + "grad_norm": 0.6346082091331482, + "learning_rate": 0.0015, + "loss": 1.6714, + "step": 4892 + }, + { + "epoch": 0.5161392405063291, + "grad_norm": 0.5846510529518127, + "learning_rate": 0.0015, + "loss": 1.6392, + "step": 4893 + }, + { + "epoch": 0.5162447257383966, + "grad_norm": 0.7635490894317627, + "learning_rate": 0.0015, + "loss": 1.6601, + "step": 4894 + }, + { + "epoch": 0.5163502109704642, + "grad_norm": 0.7379271984100342, + "learning_rate": 0.0015, + "loss": 1.6253, + "step": 4895 + }, + { + "epoch": 0.5164556962025316, + "grad_norm": 0.5082283616065979, + "learning_rate": 0.0015, + "loss": 1.6866, + "step": 4896 + }, + { + "epoch": 0.5165611814345992, + "grad_norm": 0.8635253310203552, + "learning_rate": 0.0015, + "loss": 1.648, + "step": 4897 + }, + { + "epoch": 0.5166666666666667, + "grad_norm": 0.7167245149612427, + "learning_rate": 0.0015, + "loss": 1.6734, + "step": 4898 + }, + { + "epoch": 0.5167721518987342, + "grad_norm": 0.5967913269996643, + "learning_rate": 0.0015, + "loss": 1.6351, + "step": 4899 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.6967950463294983, + "learning_rate": 0.0015, + "loss": 1.6612, + "step": 4900 + }, + { + "epoch": 0.5169831223628693, + "grad_norm": 0.5675439238548279, + "learning_rate": 0.0015, + "loss": 1.6622, + "step": 4901 + }, + { + "epoch": 0.5170886075949367, + "grad_norm": 0.6265988945960999, + "learning_rate": 0.0015, + "loss": 1.6105, + "step": 4902 + }, + { + "epoch": 0.5171940928270042, + "grad_norm": 0.6491437554359436, + "learning_rate": 0.0015, + "loss": 1.6309, + "step": 4903 + }, + { + "epoch": 0.5172995780590718, + "grad_norm": 0.5626640319824219, + "learning_rate": 0.0015, + "loss": 1.642, + "step": 4904 + }, + { + "epoch": 0.5174050632911392, + "grad_norm": 0.8992338180541992, + "learning_rate": 0.0015, + "loss": 1.6709, + "step": 4905 + }, + { + "epoch": 0.5175105485232068, + "grad_norm": 0.7224078178405762, + "learning_rate": 0.0015, + "loss": 1.6374, + "step": 4906 + }, + { + "epoch": 0.5176160337552742, + "grad_norm": 0.58208829164505, + "learning_rate": 0.0015, + "loss": 1.5838, + "step": 4907 + }, + { + "epoch": 0.5177215189873418, + "grad_norm": 0.9976869821548462, + "learning_rate": 0.0015, + "loss": 1.6737, + "step": 4908 + }, + { + "epoch": 0.5178270042194093, + "grad_norm": 0.7647194266319275, + "learning_rate": 0.0015, + "loss": 1.6272, + "step": 4909 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6444775462150574, + "learning_rate": 0.0015, + "loss": 1.6813, + "step": 4910 + }, + { + "epoch": 0.5180379746835443, + "grad_norm": 0.905405580997467, + "learning_rate": 0.0015, + "loss": 1.6476, + "step": 4911 + }, + { + "epoch": 0.5181434599156118, + "grad_norm": 0.582550048828125, + "learning_rate": 0.0015, + "loss": 1.6165, + "step": 4912 + }, + { + "epoch": 0.5182489451476793, + "grad_norm": 0.6779419183731079, + "learning_rate": 0.0015, + "loss": 1.6152, + "step": 4913 + }, + { + "epoch": 0.5183544303797468, + "grad_norm": 0.7489119172096252, + "learning_rate": 0.0015, + "loss": 1.6171, + "step": 4914 + }, + { + "epoch": 0.5184599156118144, + "grad_norm": 0.5231460332870483, + "learning_rate": 0.0015, + "loss": 1.6445, + "step": 4915 + }, + { + "epoch": 0.5185654008438818, + "grad_norm": 0.5889079570770264, + "learning_rate": 0.0015, + "loss": 1.6552, + "step": 4916 + }, + { + "epoch": 0.5186708860759494, + "grad_norm": 0.6349860429763794, + "learning_rate": 0.0015, + "loss": 1.6536, + "step": 4917 + }, + { + "epoch": 0.5187763713080169, + "grad_norm": 0.5343783497810364, + "learning_rate": 0.0015, + "loss": 1.6088, + "step": 4918 + }, + { + "epoch": 0.5188818565400843, + "grad_norm": 0.5360980033874512, + "learning_rate": 0.0015, + "loss": 1.6209, + "step": 4919 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.5116620659828186, + "learning_rate": 0.0015, + "loss": 1.6622, + "step": 4920 + }, + { + "epoch": 0.5190928270042194, + "grad_norm": 0.478810578584671, + "learning_rate": 0.0015, + "loss": 1.6475, + "step": 4921 + }, + { + "epoch": 0.5191983122362869, + "grad_norm": 0.523101270198822, + "learning_rate": 0.0015, + "loss": 1.6779, + "step": 4922 + }, + { + "epoch": 0.5193037974683544, + "grad_norm": 0.4698803722858429, + "learning_rate": 0.0015, + "loss": 1.6519, + "step": 4923 + }, + { + "epoch": 0.519409282700422, + "grad_norm": 0.5162774920463562, + "learning_rate": 0.0015, + "loss": 1.5954, + "step": 4924 + }, + { + "epoch": 0.5195147679324894, + "grad_norm": 0.45604798197746277, + "learning_rate": 0.0015, + "loss": 1.6294, + "step": 4925 + }, + { + "epoch": 0.519620253164557, + "grad_norm": 0.5285675525665283, + "learning_rate": 0.0015, + "loss": 1.6519, + "step": 4926 + }, + { + "epoch": 0.5197257383966245, + "grad_norm": 0.5319159626960754, + "learning_rate": 0.0015, + "loss": 1.619, + "step": 4927 + }, + { + "epoch": 0.5198312236286919, + "grad_norm": 0.5538773536682129, + "learning_rate": 0.0015, + "loss": 1.6532, + "step": 4928 + }, + { + "epoch": 0.5199367088607595, + "grad_norm": 0.5458510518074036, + "learning_rate": 0.0015, + "loss": 1.6437, + "step": 4929 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.5238522887229919, + "learning_rate": 0.0015, + "loss": 1.6354, + "step": 4930 + }, + { + "epoch": 0.5201476793248945, + "grad_norm": 0.5384401082992554, + "learning_rate": 0.0015, + "loss": 1.6361, + "step": 4931 + }, + { + "epoch": 0.520253164556962, + "grad_norm": 0.44328439235687256, + "learning_rate": 0.0015, + "loss": 1.6557, + "step": 4932 + }, + { + "epoch": 0.5203586497890296, + "grad_norm": 0.5922768115997314, + "learning_rate": 0.0015, + "loss": 1.6737, + "step": 4933 + }, + { + "epoch": 0.520464135021097, + "grad_norm": 0.5494799017906189, + "learning_rate": 0.0015, + "loss": 1.637, + "step": 4934 + }, + { + "epoch": 0.5205696202531646, + "grad_norm": 0.6524472832679749, + "learning_rate": 0.0015, + "loss": 1.5953, + "step": 4935 + }, + { + "epoch": 0.5206751054852321, + "grad_norm": 0.5072101950645447, + "learning_rate": 0.0015, + "loss": 1.6284, + "step": 4936 + }, + { + "epoch": 0.5207805907172995, + "grad_norm": 0.5628498196601868, + "learning_rate": 0.0015, + "loss": 1.6373, + "step": 4937 + }, + { + "epoch": 0.5208860759493671, + "grad_norm": 0.5973775386810303, + "learning_rate": 0.0015, + "loss": 1.6429, + "step": 4938 + }, + { + "epoch": 0.5209915611814346, + "grad_norm": 0.5659748315811157, + "learning_rate": 0.0015, + "loss": 1.6514, + "step": 4939 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.5384728312492371, + "learning_rate": 0.0015, + "loss": 1.6249, + "step": 4940 + }, + { + "epoch": 0.5212025316455696, + "grad_norm": 0.572237491607666, + "learning_rate": 0.0015, + "loss": 1.6696, + "step": 4941 + }, + { + "epoch": 0.5213080168776372, + "grad_norm": 0.7187435030937195, + "learning_rate": 0.0015, + "loss": 1.6518, + "step": 4942 + }, + { + "epoch": 0.5214135021097046, + "grad_norm": 0.5063067078590393, + "learning_rate": 0.0015, + "loss": 1.635, + "step": 4943 + }, + { + "epoch": 0.5215189873417722, + "grad_norm": 0.5599090456962585, + "learning_rate": 0.0015, + "loss": 1.6486, + "step": 4944 + }, + { + "epoch": 0.5216244725738397, + "grad_norm": 0.5347083210945129, + "learning_rate": 0.0015, + "loss": 1.6313, + "step": 4945 + }, + { + "epoch": 0.5217299578059071, + "grad_norm": 0.6137127876281738, + "learning_rate": 0.0015, + "loss": 1.632, + "step": 4946 + }, + { + "epoch": 0.5218354430379747, + "grad_norm": 0.6572962403297424, + "learning_rate": 0.0015, + "loss": 1.6544, + "step": 4947 + }, + { + "epoch": 0.5219409282700422, + "grad_norm": 0.5356124043464661, + "learning_rate": 0.0015, + "loss": 1.6358, + "step": 4948 + }, + { + "epoch": 0.5220464135021097, + "grad_norm": 0.5502074956893921, + "learning_rate": 0.0015, + "loss": 1.6444, + "step": 4949 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.647217333316803, + "learning_rate": 0.0015, + "loss": 1.6601, + "step": 4950 + }, + { + "epoch": 0.5222573839662448, + "grad_norm": 0.5100189447402954, + "learning_rate": 0.0015, + "loss": 1.6567, + "step": 4951 + }, + { + "epoch": 0.5223628691983122, + "grad_norm": 0.49887460470199585, + "learning_rate": 0.0015, + "loss": 1.6334, + "step": 4952 + }, + { + "epoch": 0.5224683544303798, + "grad_norm": 0.4950576424598694, + "learning_rate": 0.0015, + "loss": 1.5806, + "step": 4953 + }, + { + "epoch": 0.5225738396624473, + "grad_norm": 0.6040221452713013, + "learning_rate": 0.0015, + "loss": 1.6344, + "step": 4954 + }, + { + "epoch": 0.5226793248945147, + "grad_norm": 0.6119283437728882, + "learning_rate": 0.0015, + "loss": 1.6442, + "step": 4955 + }, + { + "epoch": 0.5227848101265823, + "grad_norm": 0.5912838578224182, + "learning_rate": 0.0015, + "loss": 1.6273, + "step": 4956 + }, + { + "epoch": 0.5228902953586498, + "grad_norm": 0.7666657567024231, + "learning_rate": 0.0015, + "loss": 1.6637, + "step": 4957 + }, + { + "epoch": 0.5229957805907173, + "grad_norm": 0.6835048198699951, + "learning_rate": 0.0015, + "loss": 1.6345, + "step": 4958 + }, + { + "epoch": 0.5231012658227848, + "grad_norm": 0.5428978204727173, + "learning_rate": 0.0015, + "loss": 1.5995, + "step": 4959 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.6137120127677917, + "learning_rate": 0.0015, + "loss": 1.6267, + "step": 4960 + }, + { + "epoch": 0.5233122362869198, + "grad_norm": 0.6189130544662476, + "learning_rate": 0.0015, + "loss": 1.6223, + "step": 4961 + }, + { + "epoch": 0.5234177215189874, + "grad_norm": 0.47976619005203247, + "learning_rate": 0.0015, + "loss": 1.633, + "step": 4962 + }, + { + "epoch": 0.5235232067510549, + "grad_norm": 0.6020201444625854, + "learning_rate": 0.0015, + "loss": 1.6545, + "step": 4963 + }, + { + "epoch": 0.5236286919831223, + "grad_norm": 0.7014598846435547, + "learning_rate": 0.0015, + "loss": 1.6266, + "step": 4964 + }, + { + "epoch": 0.5237341772151899, + "grad_norm": 0.5283344984054565, + "learning_rate": 0.0015, + "loss": 1.7071, + "step": 4965 + }, + { + "epoch": 0.5238396624472574, + "grad_norm": 0.7310873866081238, + "learning_rate": 0.0015, + "loss": 1.628, + "step": 4966 + }, + { + "epoch": 0.5239451476793249, + "grad_norm": 0.6351194381713867, + "learning_rate": 0.0015, + "loss": 1.6009, + "step": 4967 + }, + { + "epoch": 0.5240506329113924, + "grad_norm": 0.6132206320762634, + "learning_rate": 0.0015, + "loss": 1.5803, + "step": 4968 + }, + { + "epoch": 0.52415611814346, + "grad_norm": 0.6869380474090576, + "learning_rate": 0.0015, + "loss": 1.6447, + "step": 4969 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.7326971888542175, + "learning_rate": 0.0015, + "loss": 1.677, + "step": 4970 + }, + { + "epoch": 0.524367088607595, + "grad_norm": 0.7327278256416321, + "learning_rate": 0.0015, + "loss": 1.696, + "step": 4971 + }, + { + "epoch": 0.5244725738396624, + "grad_norm": 0.4774857461452484, + "learning_rate": 0.0015, + "loss": 1.5709, + "step": 4972 + }, + { + "epoch": 0.5245780590717299, + "grad_norm": 0.6482288241386414, + "learning_rate": 0.0015, + "loss": 1.6253, + "step": 4973 + }, + { + "epoch": 0.5246835443037975, + "grad_norm": 0.6765118837356567, + "learning_rate": 0.0015, + "loss": 1.6312, + "step": 4974 + }, + { + "epoch": 0.5247890295358649, + "grad_norm": 0.601466715335846, + "learning_rate": 0.0015, + "loss": 1.6815, + "step": 4975 + }, + { + "epoch": 0.5248945147679325, + "grad_norm": 0.6466884016990662, + "learning_rate": 0.0015, + "loss": 1.6359, + "step": 4976 + }, + { + "epoch": 0.525, + "grad_norm": 0.5307595729827881, + "learning_rate": 0.0015, + "loss": 1.6444, + "step": 4977 + }, + { + "epoch": 0.5251054852320675, + "grad_norm": 0.6720360517501831, + "learning_rate": 0.0015, + "loss": 1.6455, + "step": 4978 + }, + { + "epoch": 0.525210970464135, + "grad_norm": 0.5458463430404663, + "learning_rate": 0.0015, + "loss": 1.5929, + "step": 4979 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.5916789770126343, + "learning_rate": 0.0015, + "loss": 1.6045, + "step": 4980 + }, + { + "epoch": 0.52542194092827, + "grad_norm": 0.580977737903595, + "learning_rate": 0.0015, + "loss": 1.6505, + "step": 4981 + }, + { + "epoch": 0.5255274261603375, + "grad_norm": 0.6447780728340149, + "learning_rate": 0.0015, + "loss": 1.6168, + "step": 4982 + }, + { + "epoch": 0.5256329113924051, + "grad_norm": 0.5232994556427002, + "learning_rate": 0.0015, + "loss": 1.6191, + "step": 4983 + }, + { + "epoch": 0.5257383966244725, + "grad_norm": 0.8021718859672546, + "learning_rate": 0.0015, + "loss": 1.6183, + "step": 4984 + }, + { + "epoch": 0.5258438818565401, + "grad_norm": 0.5380504131317139, + "learning_rate": 0.0015, + "loss": 1.6314, + "step": 4985 + }, + { + "epoch": 0.5259493670886076, + "grad_norm": 0.7381505370140076, + "learning_rate": 0.0015, + "loss": 1.6183, + "step": 4986 + }, + { + "epoch": 0.5260548523206751, + "grad_norm": 0.7041741609573364, + "learning_rate": 0.0015, + "loss": 1.6035, + "step": 4987 + }, + { + "epoch": 0.5261603375527426, + "grad_norm": 0.6164463758468628, + "learning_rate": 0.0015, + "loss": 1.6462, + "step": 4988 + }, + { + "epoch": 0.5262658227848102, + "grad_norm": 0.5790919661521912, + "learning_rate": 0.0015, + "loss": 1.6714, + "step": 4989 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.7210583090782166, + "learning_rate": 0.0015, + "loss": 1.641, + "step": 4990 + }, + { + "epoch": 0.5264767932489451, + "grad_norm": 0.4991927444934845, + "learning_rate": 0.0015, + "loss": 1.6432, + "step": 4991 + }, + { + "epoch": 0.5265822784810127, + "grad_norm": 0.6949573159217834, + "learning_rate": 0.0015, + "loss": 1.6356, + "step": 4992 + }, + { + "epoch": 0.5266877637130801, + "grad_norm": 0.6759629845619202, + "learning_rate": 0.0015, + "loss": 1.6448, + "step": 4993 + }, + { + "epoch": 0.5267932489451477, + "grad_norm": 0.6050535440444946, + "learning_rate": 0.0015, + "loss": 1.6786, + "step": 4994 + }, + { + "epoch": 0.5268987341772152, + "grad_norm": 0.7116928100585938, + "learning_rate": 0.0015, + "loss": 1.6533, + "step": 4995 + }, + { + "epoch": 0.5270042194092827, + "grad_norm": 0.48686733841896057, + "learning_rate": 0.0015, + "loss": 1.6318, + "step": 4996 + }, + { + "epoch": 0.5271097046413502, + "grad_norm": 0.5821570158004761, + "learning_rate": 0.0015, + "loss": 1.6185, + "step": 4997 + }, + { + "epoch": 0.5272151898734178, + "grad_norm": 0.5504971146583557, + "learning_rate": 0.0015, + "loss": 1.6608, + "step": 4998 + }, + { + "epoch": 0.5273206751054852, + "grad_norm": 0.5670160055160522, + "learning_rate": 0.0015, + "loss": 1.6732, + "step": 4999 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.7006347179412842, + "learning_rate": 0.0015, + "loss": 1.6271, + "step": 5000 + }, + { + "epoch": 0.5275316455696203, + "grad_norm": 0.5208625197410583, + "learning_rate": 0.0015, + "loss": 1.6649, + "step": 5001 + }, + { + "epoch": 0.5276371308016877, + "grad_norm": 0.6777536869049072, + "learning_rate": 0.0015, + "loss": 1.6677, + "step": 5002 + }, + { + "epoch": 0.5277426160337553, + "grad_norm": 0.7044886350631714, + "learning_rate": 0.0015, + "loss": 1.6461, + "step": 5003 + }, + { + "epoch": 0.5278481012658228, + "grad_norm": 0.4330019950866699, + "learning_rate": 0.0015, + "loss": 1.626, + "step": 5004 + }, + { + "epoch": 0.5279535864978903, + "grad_norm": 0.6689701676368713, + "learning_rate": 0.0015, + "loss": 1.63, + "step": 5005 + }, + { + "epoch": 0.5280590717299578, + "grad_norm": 0.4450782835483551, + "learning_rate": 0.0015, + "loss": 1.5885, + "step": 5006 + }, + { + "epoch": 0.5281645569620254, + "grad_norm": 0.7085813879966736, + "learning_rate": 0.0015, + "loss": 1.6255, + "step": 5007 + }, + { + "epoch": 0.5282700421940928, + "grad_norm": 0.7168097496032715, + "learning_rate": 0.0015, + "loss": 1.6524, + "step": 5008 + }, + { + "epoch": 0.5283755274261603, + "grad_norm": 0.47027990221977234, + "learning_rate": 0.0015, + "loss": 1.6362, + "step": 5009 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.633066713809967, + "learning_rate": 0.0015, + "loss": 1.6409, + "step": 5010 + }, + { + "epoch": 0.5285864978902953, + "grad_norm": 0.6865650415420532, + "learning_rate": 0.0015, + "loss": 1.6142, + "step": 5011 + }, + { + "epoch": 0.5286919831223629, + "grad_norm": 0.44040533900260925, + "learning_rate": 0.0015, + "loss": 1.6588, + "step": 5012 + }, + { + "epoch": 0.5287974683544304, + "grad_norm": 0.6548410058021545, + "learning_rate": 0.0015, + "loss": 1.6372, + "step": 5013 + }, + { + "epoch": 0.5289029535864979, + "grad_norm": 0.6385186910629272, + "learning_rate": 0.0015, + "loss": 1.6215, + "step": 5014 + }, + { + "epoch": 0.5290084388185654, + "grad_norm": 0.5002903938293457, + "learning_rate": 0.0015, + "loss": 1.5852, + "step": 5015 + }, + { + "epoch": 0.529113924050633, + "grad_norm": 0.7450860738754272, + "learning_rate": 0.0015, + "loss": 1.6672, + "step": 5016 + }, + { + "epoch": 0.5292194092827004, + "grad_norm": 0.5648825764656067, + "learning_rate": 0.0015, + "loss": 1.6524, + "step": 5017 + }, + { + "epoch": 0.5293248945147679, + "grad_norm": 0.6444371342658997, + "learning_rate": 0.0015, + "loss": 1.6332, + "step": 5018 + }, + { + "epoch": 0.5294303797468355, + "grad_norm": 0.8067348599433899, + "learning_rate": 0.0015, + "loss": 1.6153, + "step": 5019 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5678833723068237, + "learning_rate": 0.0015, + "loss": 1.6489, + "step": 5020 + }, + { + "epoch": 0.5296413502109705, + "grad_norm": 0.5519526600837708, + "learning_rate": 0.0015, + "loss": 1.6129, + "step": 5021 + }, + { + "epoch": 0.529746835443038, + "grad_norm": 0.5630823373794556, + "learning_rate": 0.0015, + "loss": 1.6246, + "step": 5022 + }, + { + "epoch": 0.5298523206751055, + "grad_norm": 0.48882824182510376, + "learning_rate": 0.0015, + "loss": 1.6213, + "step": 5023 + }, + { + "epoch": 0.529957805907173, + "grad_norm": 0.5829674601554871, + "learning_rate": 0.0015, + "loss": 1.6195, + "step": 5024 + }, + { + "epoch": 0.5300632911392406, + "grad_norm": 0.5589535236358643, + "learning_rate": 0.0015, + "loss": 1.6059, + "step": 5025 + }, + { + "epoch": 0.530168776371308, + "grad_norm": 0.5363820195198059, + "learning_rate": 0.0015, + "loss": 1.6193, + "step": 5026 + }, + { + "epoch": 0.5302742616033755, + "grad_norm": 0.5025756359100342, + "learning_rate": 0.0015, + "loss": 1.6466, + "step": 5027 + }, + { + "epoch": 0.5303797468354431, + "grad_norm": 0.5299649238586426, + "learning_rate": 0.0015, + "loss": 1.618, + "step": 5028 + }, + { + "epoch": 0.5304852320675105, + "grad_norm": 0.552548348903656, + "learning_rate": 0.0015, + "loss": 1.585, + "step": 5029 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.561339795589447, + "learning_rate": 0.0015, + "loss": 1.5962, + "step": 5030 + }, + { + "epoch": 0.5306962025316456, + "grad_norm": 0.5364262461662292, + "learning_rate": 0.0015, + "loss": 1.6338, + "step": 5031 + }, + { + "epoch": 0.5308016877637131, + "grad_norm": 0.6337389945983887, + "learning_rate": 0.0015, + "loss": 1.6588, + "step": 5032 + }, + { + "epoch": 0.5309071729957806, + "grad_norm": 0.5296711325645447, + "learning_rate": 0.0015, + "loss": 1.6531, + "step": 5033 + }, + { + "epoch": 0.5310126582278482, + "grad_norm": 0.5987748503684998, + "learning_rate": 0.0015, + "loss": 1.6448, + "step": 5034 + }, + { + "epoch": 0.5311181434599156, + "grad_norm": 0.5677121877670288, + "learning_rate": 0.0015, + "loss": 1.6581, + "step": 5035 + }, + { + "epoch": 0.5312236286919831, + "grad_norm": 0.5264080762863159, + "learning_rate": 0.0015, + "loss": 1.643, + "step": 5036 + }, + { + "epoch": 0.5313291139240506, + "grad_norm": 0.5983706712722778, + "learning_rate": 0.0015, + "loss": 1.6172, + "step": 5037 + }, + { + "epoch": 0.5314345991561181, + "grad_norm": 0.746576726436615, + "learning_rate": 0.0015, + "loss": 1.6786, + "step": 5038 + }, + { + "epoch": 0.5315400843881857, + "grad_norm": 0.6579143404960632, + "learning_rate": 0.0015, + "loss": 1.6393, + "step": 5039 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.4936109781265259, + "learning_rate": 0.0015, + "loss": 1.6116, + "step": 5040 + }, + { + "epoch": 0.5317510548523207, + "grad_norm": 0.565092146396637, + "learning_rate": 0.0015, + "loss": 1.6366, + "step": 5041 + }, + { + "epoch": 0.5318565400843882, + "grad_norm": 0.5107846260070801, + "learning_rate": 0.0015, + "loss": 1.6411, + "step": 5042 + }, + { + "epoch": 0.5319620253164556, + "grad_norm": 0.554541289806366, + "learning_rate": 0.0015, + "loss": 1.599, + "step": 5043 + }, + { + "epoch": 0.5320675105485232, + "grad_norm": 0.7011889219284058, + "learning_rate": 0.0015, + "loss": 1.6418, + "step": 5044 + }, + { + "epoch": 0.5321729957805907, + "grad_norm": 0.4913009703159332, + "learning_rate": 0.0015, + "loss": 1.6105, + "step": 5045 + }, + { + "epoch": 0.5322784810126582, + "grad_norm": 0.5300242304801941, + "learning_rate": 0.0015, + "loss": 1.673, + "step": 5046 + }, + { + "epoch": 0.5323839662447257, + "grad_norm": 0.5980767607688904, + "learning_rate": 0.0015, + "loss": 1.6262, + "step": 5047 + }, + { + "epoch": 0.5324894514767933, + "grad_norm": 0.444725900888443, + "learning_rate": 0.0015, + "loss": 1.594, + "step": 5048 + }, + { + "epoch": 0.5325949367088607, + "grad_norm": 0.5098974108695984, + "learning_rate": 0.0015, + "loss": 1.649, + "step": 5049 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.49392300844192505, + "learning_rate": 0.0015, + "loss": 1.6269, + "step": 5050 + }, + { + "epoch": 0.5328059071729958, + "grad_norm": 0.5042253732681274, + "learning_rate": 0.0015, + "loss": 1.6478, + "step": 5051 + }, + { + "epoch": 0.5329113924050632, + "grad_norm": 0.5419065952301025, + "learning_rate": 0.0015, + "loss": 1.639, + "step": 5052 + }, + { + "epoch": 0.5330168776371308, + "grad_norm": 0.5093225240707397, + "learning_rate": 0.0015, + "loss": 1.6463, + "step": 5053 + }, + { + "epoch": 0.5331223628691983, + "grad_norm": 0.6130210161209106, + "learning_rate": 0.0015, + "loss": 1.6475, + "step": 5054 + }, + { + "epoch": 0.5332278481012658, + "grad_norm": 0.6211649179458618, + "learning_rate": 0.0015, + "loss": 1.6419, + "step": 5055 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.5461240410804749, + "learning_rate": 0.0015, + "loss": 1.637, + "step": 5056 + }, + { + "epoch": 0.5334388185654009, + "grad_norm": 0.6642283201217651, + "learning_rate": 0.0015, + "loss": 1.6697, + "step": 5057 + }, + { + "epoch": 0.5335443037974683, + "grad_norm": 0.6176104545593262, + "learning_rate": 0.0015, + "loss": 1.6579, + "step": 5058 + }, + { + "epoch": 0.5336497890295359, + "grad_norm": 0.587188184261322, + "learning_rate": 0.0015, + "loss": 1.629, + "step": 5059 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.6248210668563843, + "learning_rate": 0.0015, + "loss": 1.63, + "step": 5060 + }, + { + "epoch": 0.5338607594936708, + "grad_norm": 0.49455729126930237, + "learning_rate": 0.0015, + "loss": 1.6565, + "step": 5061 + }, + { + "epoch": 0.5339662447257384, + "grad_norm": 0.7127228379249573, + "learning_rate": 0.0015, + "loss": 1.6056, + "step": 5062 + }, + { + "epoch": 0.5340717299578059, + "grad_norm": 0.5559295415878296, + "learning_rate": 0.0015, + "loss": 1.614, + "step": 5063 + }, + { + "epoch": 0.5341772151898734, + "grad_norm": 0.5910193920135498, + "learning_rate": 0.0015, + "loss": 1.6795, + "step": 5064 + }, + { + "epoch": 0.5342827004219409, + "grad_norm": 0.8118014335632324, + "learning_rate": 0.0015, + "loss": 1.6185, + "step": 5065 + }, + { + "epoch": 0.5343881856540085, + "grad_norm": 0.6377518177032471, + "learning_rate": 0.0015, + "loss": 1.6347, + "step": 5066 + }, + { + "epoch": 0.5344936708860759, + "grad_norm": 0.5281541347503662, + "learning_rate": 0.0015, + "loss": 1.623, + "step": 5067 + }, + { + "epoch": 0.5345991561181435, + "grad_norm": 0.7031581997871399, + "learning_rate": 0.0015, + "loss": 1.6607, + "step": 5068 + }, + { + "epoch": 0.534704641350211, + "grad_norm": 0.48935553431510925, + "learning_rate": 0.0015, + "loss": 1.618, + "step": 5069 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.8359583020210266, + "learning_rate": 0.0015, + "loss": 1.6005, + "step": 5070 + }, + { + "epoch": 0.534915611814346, + "grad_norm": 0.897618293762207, + "learning_rate": 0.0015, + "loss": 1.6434, + "step": 5071 + }, + { + "epoch": 0.5350210970464135, + "grad_norm": 0.4645049571990967, + "learning_rate": 0.0015, + "loss": 1.6397, + "step": 5072 + }, + { + "epoch": 0.535126582278481, + "grad_norm": 0.790713906288147, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 5073 + }, + { + "epoch": 0.5352320675105485, + "grad_norm": 0.5804443359375, + "learning_rate": 0.0015, + "loss": 1.6751, + "step": 5074 + }, + { + "epoch": 0.5353375527426161, + "grad_norm": 0.5091162323951721, + "learning_rate": 0.0015, + "loss": 1.6703, + "step": 5075 + }, + { + "epoch": 0.5354430379746835, + "grad_norm": 0.5991224646568298, + "learning_rate": 0.0015, + "loss": 1.6468, + "step": 5076 + }, + { + "epoch": 0.5355485232067511, + "grad_norm": 0.5701445937156677, + "learning_rate": 0.0015, + "loss": 1.6427, + "step": 5077 + }, + { + "epoch": 0.5356540084388186, + "grad_norm": 0.4461466073989868, + "learning_rate": 0.0015, + "loss": 1.653, + "step": 5078 + }, + { + "epoch": 0.535759493670886, + "grad_norm": 0.5141837000846863, + "learning_rate": 0.0015, + "loss": 1.6234, + "step": 5079 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.5066314339637756, + "learning_rate": 0.0015, + "loss": 1.6366, + "step": 5080 + }, + { + "epoch": 0.5359704641350211, + "grad_norm": 0.5522492527961731, + "learning_rate": 0.0015, + "loss": 1.6229, + "step": 5081 + }, + { + "epoch": 0.5360759493670886, + "grad_norm": 0.4731145203113556, + "learning_rate": 0.0015, + "loss": 1.6476, + "step": 5082 + }, + { + "epoch": 0.5361814345991561, + "grad_norm": 0.6168615221977234, + "learning_rate": 0.0015, + "loss": 1.6154, + "step": 5083 + }, + { + "epoch": 0.5362869198312237, + "grad_norm": 0.6220751404762268, + "learning_rate": 0.0015, + "loss": 1.6546, + "step": 5084 + }, + { + "epoch": 0.5363924050632911, + "grad_norm": 0.5353846549987793, + "learning_rate": 0.0015, + "loss": 1.6454, + "step": 5085 + }, + { + "epoch": 0.5364978902953587, + "grad_norm": 0.5959115624427795, + "learning_rate": 0.0015, + "loss": 1.5894, + "step": 5086 + }, + { + "epoch": 0.5366033755274262, + "grad_norm": 0.6136371493339539, + "learning_rate": 0.0015, + "loss": 1.594, + "step": 5087 + }, + { + "epoch": 0.5367088607594936, + "grad_norm": 0.5750802159309387, + "learning_rate": 0.0015, + "loss": 1.6766, + "step": 5088 + }, + { + "epoch": 0.5368143459915612, + "grad_norm": 0.6107059717178345, + "learning_rate": 0.0015, + "loss": 1.6479, + "step": 5089 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.5770487189292908, + "learning_rate": 0.0015, + "loss": 1.6736, + "step": 5090 + }, + { + "epoch": 0.5370253164556962, + "grad_norm": 0.49546897411346436, + "learning_rate": 0.0015, + "loss": 1.6664, + "step": 5091 + }, + { + "epoch": 0.5371308016877637, + "grad_norm": 0.6239323616027832, + "learning_rate": 0.0015, + "loss": 1.5621, + "step": 5092 + }, + { + "epoch": 0.5372362869198313, + "grad_norm": 0.5572430491447449, + "learning_rate": 0.0015, + "loss": 1.6214, + "step": 5093 + }, + { + "epoch": 0.5373417721518987, + "grad_norm": 0.5014874935150146, + "learning_rate": 0.0015, + "loss": 1.6049, + "step": 5094 + }, + { + "epoch": 0.5374472573839663, + "grad_norm": 0.5005751252174377, + "learning_rate": 0.0015, + "loss": 1.6052, + "step": 5095 + }, + { + "epoch": 0.5375527426160338, + "grad_norm": 0.5073537826538086, + "learning_rate": 0.0015, + "loss": 1.6233, + "step": 5096 + }, + { + "epoch": 0.5376582278481012, + "grad_norm": 0.5209341049194336, + "learning_rate": 0.0015, + "loss": 1.6447, + "step": 5097 + }, + { + "epoch": 0.5377637130801688, + "grad_norm": 0.5044030547142029, + "learning_rate": 0.0015, + "loss": 1.6346, + "step": 5098 + }, + { + "epoch": 0.5378691983122363, + "grad_norm": 0.5082756280899048, + "learning_rate": 0.0015, + "loss": 1.6456, + "step": 5099 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.5281136631965637, + "learning_rate": 0.0015, + "loss": 1.5906, + "step": 5100 + }, + { + "epoch": 0.5380801687763713, + "grad_norm": 0.4830903112888336, + "learning_rate": 0.0015, + "loss": 1.6463, + "step": 5101 + }, + { + "epoch": 0.5381856540084389, + "grad_norm": 0.5429915189743042, + "learning_rate": 0.0015, + "loss": 1.6571, + "step": 5102 + }, + { + "epoch": 0.5382911392405063, + "grad_norm": 0.5040405988693237, + "learning_rate": 0.0015, + "loss": 1.6247, + "step": 5103 + }, + { + "epoch": 0.5383966244725739, + "grad_norm": 0.7375873327255249, + "learning_rate": 0.0015, + "loss": 1.6126, + "step": 5104 + }, + { + "epoch": 0.5385021097046413, + "grad_norm": 0.5359514355659485, + "learning_rate": 0.0015, + "loss": 1.5928, + "step": 5105 + }, + { + "epoch": 0.5386075949367088, + "grad_norm": 0.5888318419456482, + "learning_rate": 0.0015, + "loss": 1.6362, + "step": 5106 + }, + { + "epoch": 0.5387130801687764, + "grad_norm": 0.5835240483283997, + "learning_rate": 0.0015, + "loss": 1.679, + "step": 5107 + }, + { + "epoch": 0.5388185654008438, + "grad_norm": 0.5429015755653381, + "learning_rate": 0.0015, + "loss": 1.6443, + "step": 5108 + }, + { + "epoch": 0.5389240506329114, + "grad_norm": 0.6073294878005981, + "learning_rate": 0.0015, + "loss": 1.6466, + "step": 5109 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.59377521276474, + "learning_rate": 0.0015, + "loss": 1.593, + "step": 5110 + }, + { + "epoch": 0.5391350210970464, + "grad_norm": 0.5483939051628113, + "learning_rate": 0.0015, + "loss": 1.6507, + "step": 5111 + }, + { + "epoch": 0.5392405063291139, + "grad_norm": 0.5142219662666321, + "learning_rate": 0.0015, + "loss": 1.6261, + "step": 5112 + }, + { + "epoch": 0.5393459915611815, + "grad_norm": 0.5235267877578735, + "learning_rate": 0.0015, + "loss": 1.6637, + "step": 5113 + }, + { + "epoch": 0.5394514767932489, + "grad_norm": 0.5899415016174316, + "learning_rate": 0.0015, + "loss": 1.6229, + "step": 5114 + }, + { + "epoch": 0.5395569620253164, + "grad_norm": 0.5644646883010864, + "learning_rate": 0.0015, + "loss": 1.608, + "step": 5115 + }, + { + "epoch": 0.539662447257384, + "grad_norm": 0.5044073462486267, + "learning_rate": 0.0015, + "loss": 1.6276, + "step": 5116 + }, + { + "epoch": 0.5397679324894514, + "grad_norm": 0.6431909799575806, + "learning_rate": 0.0015, + "loss": 1.6187, + "step": 5117 + }, + { + "epoch": 0.539873417721519, + "grad_norm": 0.6481715440750122, + "learning_rate": 0.0015, + "loss": 1.6481, + "step": 5118 + }, + { + "epoch": 0.5399789029535865, + "grad_norm": 0.49285146594047546, + "learning_rate": 0.0015, + "loss": 1.6267, + "step": 5119 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.671585202217102, + "learning_rate": 0.0015, + "loss": 1.6086, + "step": 5120 + }, + { + "epoch": 0.5401898734177215, + "grad_norm": 0.679915189743042, + "learning_rate": 0.0015, + "loss": 1.6235, + "step": 5121 + }, + { + "epoch": 0.5402953586497891, + "grad_norm": 0.5546087026596069, + "learning_rate": 0.0015, + "loss": 1.6149, + "step": 5122 + }, + { + "epoch": 0.5404008438818565, + "grad_norm": 0.8165748119354248, + "learning_rate": 0.0015, + "loss": 1.617, + "step": 5123 + }, + { + "epoch": 0.540506329113924, + "grad_norm": 0.693085253238678, + "learning_rate": 0.0015, + "loss": 1.6316, + "step": 5124 + }, + { + "epoch": 0.5406118143459916, + "grad_norm": 0.5697019696235657, + "learning_rate": 0.0015, + "loss": 1.6325, + "step": 5125 + }, + { + "epoch": 0.540717299578059, + "grad_norm": 0.5882627367973328, + "learning_rate": 0.0015, + "loss": 1.6175, + "step": 5126 + }, + { + "epoch": 0.5408227848101266, + "grad_norm": 0.616887629032135, + "learning_rate": 0.0015, + "loss": 1.6275, + "step": 5127 + }, + { + "epoch": 0.5409282700421941, + "grad_norm": 0.46310409903526306, + "learning_rate": 0.0015, + "loss": 1.6289, + "step": 5128 + }, + { + "epoch": 0.5410337552742616, + "grad_norm": 0.5147756338119507, + "learning_rate": 0.0015, + "loss": 1.6384, + "step": 5129 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5554012060165405, + "learning_rate": 0.0015, + "loss": 1.6357, + "step": 5130 + }, + { + "epoch": 0.5412447257383967, + "grad_norm": 0.5255956053733826, + "learning_rate": 0.0015, + "loss": 1.6538, + "step": 5131 + }, + { + "epoch": 0.5413502109704641, + "grad_norm": 0.5103606581687927, + "learning_rate": 0.0015, + "loss": 1.6756, + "step": 5132 + }, + { + "epoch": 0.5414556962025316, + "grad_norm": 0.5117038488388062, + "learning_rate": 0.0015, + "loss": 1.6254, + "step": 5133 + }, + { + "epoch": 0.5415611814345992, + "grad_norm": 0.532572865486145, + "learning_rate": 0.0015, + "loss": 1.6394, + "step": 5134 + }, + { + "epoch": 0.5416666666666666, + "grad_norm": 0.5013951063156128, + "learning_rate": 0.0015, + "loss": 1.6284, + "step": 5135 + }, + { + "epoch": 0.5417721518987342, + "grad_norm": 0.5052446126937866, + "learning_rate": 0.0015, + "loss": 1.6227, + "step": 5136 + }, + { + "epoch": 0.5418776371308017, + "grad_norm": 0.4503644108772278, + "learning_rate": 0.0015, + "loss": 1.6458, + "step": 5137 + }, + { + "epoch": 0.5419831223628692, + "grad_norm": 0.4754565954208374, + "learning_rate": 0.0015, + "loss": 1.6398, + "step": 5138 + }, + { + "epoch": 0.5420886075949367, + "grad_norm": 0.4887687563896179, + "learning_rate": 0.0015, + "loss": 1.642, + "step": 5139 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.5226295590400696, + "learning_rate": 0.0015, + "loss": 1.6358, + "step": 5140 + }, + { + "epoch": 0.5422995780590717, + "grad_norm": 0.5884148478507996, + "learning_rate": 0.0015, + "loss": 1.6196, + "step": 5141 + }, + { + "epoch": 0.5424050632911392, + "grad_norm": 0.6582362055778503, + "learning_rate": 0.0015, + "loss": 1.6639, + "step": 5142 + }, + { + "epoch": 0.5425105485232068, + "grad_norm": 0.514098048210144, + "learning_rate": 0.0015, + "loss": 1.6572, + "step": 5143 + }, + { + "epoch": 0.5426160337552742, + "grad_norm": 0.5832661986351013, + "learning_rate": 0.0015, + "loss": 1.6223, + "step": 5144 + }, + { + "epoch": 0.5427215189873418, + "grad_norm": 0.6021525263786316, + "learning_rate": 0.0015, + "loss": 1.647, + "step": 5145 + }, + { + "epoch": 0.5428270042194093, + "grad_norm": 0.5034875273704529, + "learning_rate": 0.0015, + "loss": 1.6254, + "step": 5146 + }, + { + "epoch": 0.5429324894514768, + "grad_norm": 0.5350997447967529, + "learning_rate": 0.0015, + "loss": 1.6697, + "step": 5147 + }, + { + "epoch": 0.5430379746835443, + "grad_norm": 0.5650411248207092, + "learning_rate": 0.0015, + "loss": 1.603, + "step": 5148 + }, + { + "epoch": 0.5431434599156119, + "grad_norm": 0.4724980890750885, + "learning_rate": 0.0015, + "loss": 1.6373, + "step": 5149 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5728057622909546, + "learning_rate": 0.0015, + "loss": 1.6602, + "step": 5150 + }, + { + "epoch": 0.5433544303797468, + "grad_norm": 0.573001503944397, + "learning_rate": 0.0015, + "loss": 1.6035, + "step": 5151 + }, + { + "epoch": 0.5434599156118144, + "grad_norm": 0.6479495167732239, + "learning_rate": 0.0015, + "loss": 1.6581, + "step": 5152 + }, + { + "epoch": 0.5435654008438818, + "grad_norm": 0.6770612001419067, + "learning_rate": 0.0015, + "loss": 1.6246, + "step": 5153 + }, + { + "epoch": 0.5436708860759494, + "grad_norm": 0.5020831823348999, + "learning_rate": 0.0015, + "loss": 1.6484, + "step": 5154 + }, + { + "epoch": 0.5437763713080169, + "grad_norm": 0.8978963494300842, + "learning_rate": 0.0015, + "loss": 1.6686, + "step": 5155 + }, + { + "epoch": 0.5438818565400844, + "grad_norm": 0.8261247873306274, + "learning_rate": 0.0015, + "loss": 1.604, + "step": 5156 + }, + { + "epoch": 0.5439873417721519, + "grad_norm": 0.5348539352416992, + "learning_rate": 0.0015, + "loss": 1.6377, + "step": 5157 + }, + { + "epoch": 0.5440928270042195, + "grad_norm": 0.952217698097229, + "learning_rate": 0.0015, + "loss": 1.6277, + "step": 5158 + }, + { + "epoch": 0.5441983122362869, + "grad_norm": 0.9551669955253601, + "learning_rate": 0.0015, + "loss": 1.6583, + "step": 5159 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5607153177261353, + "learning_rate": 0.0015, + "loss": 1.6342, + "step": 5160 + }, + { + "epoch": 0.544409282700422, + "grad_norm": 1.0147234201431274, + "learning_rate": 0.0015, + "loss": 1.6858, + "step": 5161 + }, + { + "epoch": 0.5445147679324894, + "grad_norm": 0.7260619401931763, + "learning_rate": 0.0015, + "loss": 1.6721, + "step": 5162 + }, + { + "epoch": 0.544620253164557, + "grad_norm": 0.7279376983642578, + "learning_rate": 0.0015, + "loss": 1.6074, + "step": 5163 + }, + { + "epoch": 0.5447257383966245, + "grad_norm": 0.9652494788169861, + "learning_rate": 0.0015, + "loss": 1.6398, + "step": 5164 + }, + { + "epoch": 0.544831223628692, + "grad_norm": 0.6029044985771179, + "learning_rate": 0.0015, + "loss": 1.6484, + "step": 5165 + }, + { + "epoch": 0.5449367088607595, + "grad_norm": 0.9093937873840332, + "learning_rate": 0.0015, + "loss": 1.607, + "step": 5166 + }, + { + "epoch": 0.5450421940928271, + "grad_norm": 0.808569610118866, + "learning_rate": 0.0015, + "loss": 1.644, + "step": 5167 + }, + { + "epoch": 0.5451476793248945, + "grad_norm": 0.5974361300468445, + "learning_rate": 0.0015, + "loss": 1.6277, + "step": 5168 + }, + { + "epoch": 0.545253164556962, + "grad_norm": 0.6575527787208557, + "learning_rate": 0.0015, + "loss": 1.6639, + "step": 5169 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5841599106788635, + "learning_rate": 0.0015, + "loss": 1.5768, + "step": 5170 + }, + { + "epoch": 0.545464135021097, + "grad_norm": 0.6911558508872986, + "learning_rate": 0.0015, + "loss": 1.6134, + "step": 5171 + }, + { + "epoch": 0.5455696202531646, + "grad_norm": 0.6017004251480103, + "learning_rate": 0.0015, + "loss": 1.6437, + "step": 5172 + }, + { + "epoch": 0.545675105485232, + "grad_norm": 0.5507343411445618, + "learning_rate": 0.0015, + "loss": 1.647, + "step": 5173 + }, + { + "epoch": 0.5457805907172996, + "grad_norm": 0.6274258494377136, + "learning_rate": 0.0015, + "loss": 1.6637, + "step": 5174 + }, + { + "epoch": 0.5458860759493671, + "grad_norm": 0.6216654181480408, + "learning_rate": 0.0015, + "loss": 1.6395, + "step": 5175 + }, + { + "epoch": 0.5459915611814345, + "grad_norm": 0.6118292212486267, + "learning_rate": 0.0015, + "loss": 1.6315, + "step": 5176 + }, + { + "epoch": 0.5460970464135021, + "grad_norm": 0.5252754092216492, + "learning_rate": 0.0015, + "loss": 1.6619, + "step": 5177 + }, + { + "epoch": 0.5462025316455696, + "grad_norm": 0.5460687875747681, + "learning_rate": 0.0015, + "loss": 1.6939, + "step": 5178 + }, + { + "epoch": 0.5463080168776371, + "grad_norm": 0.6485275626182556, + "learning_rate": 0.0015, + "loss": 1.6295, + "step": 5179 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.7728783488273621, + "learning_rate": 0.0015, + "loss": 1.6491, + "step": 5180 + }, + { + "epoch": 0.5465189873417722, + "grad_norm": 0.6418474316596985, + "learning_rate": 0.0015, + "loss": 1.6404, + "step": 5181 + }, + { + "epoch": 0.5466244725738396, + "grad_norm": 0.6431019902229309, + "learning_rate": 0.0015, + "loss": 1.633, + "step": 5182 + }, + { + "epoch": 0.5467299578059072, + "grad_norm": 0.5730225443840027, + "learning_rate": 0.0015, + "loss": 1.6144, + "step": 5183 + }, + { + "epoch": 0.5468354430379747, + "grad_norm": 0.5020373463630676, + "learning_rate": 0.0015, + "loss": 1.6465, + "step": 5184 + }, + { + "epoch": 0.5469409282700421, + "grad_norm": 0.6241316199302673, + "learning_rate": 0.0015, + "loss": 1.6155, + "step": 5185 + }, + { + "epoch": 0.5470464135021097, + "grad_norm": 0.6311151385307312, + "learning_rate": 0.0015, + "loss": 1.6609, + "step": 5186 + }, + { + "epoch": 0.5471518987341772, + "grad_norm": 0.5612585544586182, + "learning_rate": 0.0015, + "loss": 1.6147, + "step": 5187 + }, + { + "epoch": 0.5472573839662447, + "grad_norm": 0.6356328129768372, + "learning_rate": 0.0015, + "loss": 1.5925, + "step": 5188 + }, + { + "epoch": 0.5473628691983122, + "grad_norm": 0.7096157670021057, + "learning_rate": 0.0015, + "loss": 1.6358, + "step": 5189 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 1.1665655374526978, + "learning_rate": 0.0015, + "loss": 1.6176, + "step": 5190 + }, + { + "epoch": 0.5475738396624472, + "grad_norm": 0.582997739315033, + "learning_rate": 0.0015, + "loss": 1.645, + "step": 5191 + }, + { + "epoch": 0.5476793248945148, + "grad_norm": 0.7276178002357483, + "learning_rate": 0.0015, + "loss": 1.6477, + "step": 5192 + }, + { + "epoch": 0.5477848101265823, + "grad_norm": 0.6223065257072449, + "learning_rate": 0.0015, + "loss": 1.6278, + "step": 5193 + }, + { + "epoch": 0.5478902953586497, + "grad_norm": 0.5812250971794128, + "learning_rate": 0.0015, + "loss": 1.655, + "step": 5194 + }, + { + "epoch": 0.5479957805907173, + "grad_norm": 0.7441231608390808, + "learning_rate": 0.0015, + "loss": 1.6377, + "step": 5195 + }, + { + "epoch": 0.5481012658227848, + "grad_norm": 0.5721290707588196, + "learning_rate": 0.0015, + "loss": 1.667, + "step": 5196 + }, + { + "epoch": 0.5482067510548523, + "grad_norm": 0.5468820333480835, + "learning_rate": 0.0015, + "loss": 1.6474, + "step": 5197 + }, + { + "epoch": 0.5483122362869198, + "grad_norm": 0.6002963185310364, + "learning_rate": 0.0015, + "loss": 1.6186, + "step": 5198 + }, + { + "epoch": 0.5484177215189874, + "grad_norm": 0.5369439721107483, + "learning_rate": 0.0015, + "loss": 1.6254, + "step": 5199 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.7350257039070129, + "learning_rate": 0.0015, + "loss": 1.6316, + "step": 5200 + }, + { + "epoch": 0.5486286919831224, + "grad_norm": 0.558731198310852, + "learning_rate": 0.0015, + "loss": 1.5774, + "step": 5201 + }, + { + "epoch": 0.5487341772151899, + "grad_norm": 0.5892989635467529, + "learning_rate": 0.0015, + "loss": 1.6431, + "step": 5202 + }, + { + "epoch": 0.5488396624472573, + "grad_norm": 0.6152883172035217, + "learning_rate": 0.0015, + "loss": 1.6195, + "step": 5203 + }, + { + "epoch": 0.5489451476793249, + "grad_norm": 0.5052489042282104, + "learning_rate": 0.0015, + "loss": 1.6132, + "step": 5204 + }, + { + "epoch": 0.5490506329113924, + "grad_norm": 0.6244813203811646, + "learning_rate": 0.0015, + "loss": 1.6328, + "step": 5205 + }, + { + "epoch": 0.5491561181434599, + "grad_norm": 0.6771020889282227, + "learning_rate": 0.0015, + "loss": 1.6152, + "step": 5206 + }, + { + "epoch": 0.5492616033755274, + "grad_norm": 0.5401136875152588, + "learning_rate": 0.0015, + "loss": 1.6074, + "step": 5207 + }, + { + "epoch": 0.549367088607595, + "grad_norm": 0.5238175392150879, + "learning_rate": 0.0015, + "loss": 1.6482, + "step": 5208 + }, + { + "epoch": 0.5494725738396624, + "grad_norm": 0.6082211136817932, + "learning_rate": 0.0015, + "loss": 1.6361, + "step": 5209 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.5441169738769531, + "learning_rate": 0.0015, + "loss": 1.623, + "step": 5210 + }, + { + "epoch": 0.5496835443037975, + "grad_norm": 0.5103268623352051, + "learning_rate": 0.0015, + "loss": 1.6481, + "step": 5211 + }, + { + "epoch": 0.549789029535865, + "grad_norm": 0.45983317494392395, + "learning_rate": 0.0015, + "loss": 1.5892, + "step": 5212 + }, + { + "epoch": 0.5498945147679325, + "grad_norm": 0.6080833673477173, + "learning_rate": 0.0015, + "loss": 1.6702, + "step": 5213 + }, + { + "epoch": 0.55, + "grad_norm": 0.5307289958000183, + "learning_rate": 0.0015, + "loss": 1.6438, + "step": 5214 + }, + { + "epoch": 0.5501054852320675, + "grad_norm": 0.5215746760368347, + "learning_rate": 0.0015, + "loss": 1.6425, + "step": 5215 + }, + { + "epoch": 0.550210970464135, + "grad_norm": 0.5658318996429443, + "learning_rate": 0.0015, + "loss": 1.6604, + "step": 5216 + }, + { + "epoch": 0.5503164556962026, + "grad_norm": 0.5127245187759399, + "learning_rate": 0.0015, + "loss": 1.6281, + "step": 5217 + }, + { + "epoch": 0.55042194092827, + "grad_norm": 0.46836280822753906, + "learning_rate": 0.0015, + "loss": 1.6318, + "step": 5218 + }, + { + "epoch": 0.5505274261603376, + "grad_norm": 0.5214895009994507, + "learning_rate": 0.0015, + "loss": 1.6509, + "step": 5219 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.5331636667251587, + "learning_rate": 0.0015, + "loss": 1.5931, + "step": 5220 + }, + { + "epoch": 0.5507383966244725, + "grad_norm": 0.5437734127044678, + "learning_rate": 0.0015, + "loss": 1.6069, + "step": 5221 + }, + { + "epoch": 0.5508438818565401, + "grad_norm": 0.4964209496974945, + "learning_rate": 0.0015, + "loss": 1.6017, + "step": 5222 + }, + { + "epoch": 0.5509493670886076, + "grad_norm": 0.5475440621376038, + "learning_rate": 0.0015, + "loss": 1.5937, + "step": 5223 + }, + { + "epoch": 0.5510548523206751, + "grad_norm": 0.45664384961128235, + "learning_rate": 0.0015, + "loss": 1.6396, + "step": 5224 + }, + { + "epoch": 0.5511603375527426, + "grad_norm": 0.5754068493843079, + "learning_rate": 0.0015, + "loss": 1.6341, + "step": 5225 + }, + { + "epoch": 0.5512658227848102, + "grad_norm": 0.5156245827674866, + "learning_rate": 0.0015, + "loss": 1.6283, + "step": 5226 + }, + { + "epoch": 0.5513713080168776, + "grad_norm": 0.47930315136909485, + "learning_rate": 0.0015, + "loss": 1.5954, + "step": 5227 + }, + { + "epoch": 0.5514767932489452, + "grad_norm": 0.6200971007347107, + "learning_rate": 0.0015, + "loss": 1.6742, + "step": 5228 + }, + { + "epoch": 0.5515822784810127, + "grad_norm": 0.540560781955719, + "learning_rate": 0.0015, + "loss": 1.6429, + "step": 5229 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.618925929069519, + "learning_rate": 0.0015, + "loss": 1.6361, + "step": 5230 + }, + { + "epoch": 0.5517932489451477, + "grad_norm": 0.6381429433822632, + "learning_rate": 0.0015, + "loss": 1.6592, + "step": 5231 + }, + { + "epoch": 0.5518987341772152, + "grad_norm": 0.6361313462257385, + "learning_rate": 0.0015, + "loss": 1.6214, + "step": 5232 + }, + { + "epoch": 0.5520042194092827, + "grad_norm": 0.5562092065811157, + "learning_rate": 0.0015, + "loss": 1.5821, + "step": 5233 + }, + { + "epoch": 0.5521097046413502, + "grad_norm": 0.6058759093284607, + "learning_rate": 0.0015, + "loss": 1.6272, + "step": 5234 + }, + { + "epoch": 0.5522151898734177, + "grad_norm": 0.5033120512962341, + "learning_rate": 0.0015, + "loss": 1.6419, + "step": 5235 + }, + { + "epoch": 0.5523206751054852, + "grad_norm": 0.5114763975143433, + "learning_rate": 0.0015, + "loss": 1.6345, + "step": 5236 + }, + { + "epoch": 0.5524261603375528, + "grad_norm": 0.5686569213867188, + "learning_rate": 0.0015, + "loss": 1.6294, + "step": 5237 + }, + { + "epoch": 0.5525316455696202, + "grad_norm": 0.48996034264564514, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 5238 + }, + { + "epoch": 0.5526371308016877, + "grad_norm": 0.5686507821083069, + "learning_rate": 0.0015, + "loss": 1.6487, + "step": 5239 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.5463162660598755, + "learning_rate": 0.0015, + "loss": 1.6673, + "step": 5240 + }, + { + "epoch": 0.5528481012658227, + "grad_norm": 0.5198691487312317, + "learning_rate": 0.0015, + "loss": 1.6433, + "step": 5241 + }, + { + "epoch": 0.5529535864978903, + "grad_norm": 0.6054553985595703, + "learning_rate": 0.0015, + "loss": 1.6192, + "step": 5242 + }, + { + "epoch": 0.5530590717299578, + "grad_norm": 0.5022717714309692, + "learning_rate": 0.0015, + "loss": 1.6126, + "step": 5243 + }, + { + "epoch": 0.5531645569620253, + "grad_norm": 0.564944326877594, + "learning_rate": 0.0015, + "loss": 1.6088, + "step": 5244 + }, + { + "epoch": 0.5532700421940928, + "grad_norm": 0.5960189700126648, + "learning_rate": 0.0015, + "loss": 1.6333, + "step": 5245 + }, + { + "epoch": 0.5533755274261604, + "grad_norm": 0.5729047060012817, + "learning_rate": 0.0015, + "loss": 1.6345, + "step": 5246 + }, + { + "epoch": 0.5534810126582278, + "grad_norm": 0.6358652114868164, + "learning_rate": 0.0015, + "loss": 1.6406, + "step": 5247 + }, + { + "epoch": 0.5535864978902953, + "grad_norm": 0.6519100666046143, + "learning_rate": 0.0015, + "loss": 1.6376, + "step": 5248 + }, + { + "epoch": 0.5536919831223629, + "grad_norm": 0.7309291362762451, + "learning_rate": 0.0015, + "loss": 1.6088, + "step": 5249 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.5511775612831116, + "learning_rate": 0.0015, + "loss": 1.6542, + "step": 5250 + }, + { + "epoch": 0.5539029535864979, + "grad_norm": 0.6198769211769104, + "learning_rate": 0.0015, + "loss": 1.659, + "step": 5251 + }, + { + "epoch": 0.5540084388185654, + "grad_norm": 0.5296935439109802, + "learning_rate": 0.0015, + "loss": 1.6185, + "step": 5252 + }, + { + "epoch": 0.5541139240506329, + "grad_norm": 0.557013988494873, + "learning_rate": 0.0015, + "loss": 1.63, + "step": 5253 + }, + { + "epoch": 0.5542194092827004, + "grad_norm": 0.6444569230079651, + "learning_rate": 0.0015, + "loss": 1.6477, + "step": 5254 + }, + { + "epoch": 0.554324894514768, + "grad_norm": 0.48016121983528137, + "learning_rate": 0.0015, + "loss": 1.6353, + "step": 5255 + }, + { + "epoch": 0.5544303797468354, + "grad_norm": 0.6487542986869812, + "learning_rate": 0.0015, + "loss": 1.6025, + "step": 5256 + }, + { + "epoch": 0.554535864978903, + "grad_norm": 0.7259236574172974, + "learning_rate": 0.0015, + "loss": 1.6449, + "step": 5257 + }, + { + "epoch": 0.5546413502109705, + "grad_norm": 0.5721498727798462, + "learning_rate": 0.0015, + "loss": 1.6361, + "step": 5258 + }, + { + "epoch": 0.5547468354430379, + "grad_norm": 0.6241754293441772, + "learning_rate": 0.0015, + "loss": 1.651, + "step": 5259 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.5566892027854919, + "learning_rate": 0.0015, + "loss": 1.6166, + "step": 5260 + }, + { + "epoch": 0.554957805907173, + "grad_norm": 0.5055679678916931, + "learning_rate": 0.0015, + "loss": 1.6346, + "step": 5261 + }, + { + "epoch": 0.5550632911392405, + "grad_norm": 0.5378907322883606, + "learning_rate": 0.0015, + "loss": 1.6429, + "step": 5262 + }, + { + "epoch": 0.555168776371308, + "grad_norm": 0.47854986786842346, + "learning_rate": 0.0015, + "loss": 1.6407, + "step": 5263 + }, + { + "epoch": 0.5552742616033756, + "grad_norm": 0.5072697401046753, + "learning_rate": 0.0015, + "loss": 1.6116, + "step": 5264 + }, + { + "epoch": 0.555379746835443, + "grad_norm": 0.5652428269386292, + "learning_rate": 0.0015, + "loss": 1.6292, + "step": 5265 + }, + { + "epoch": 0.5554852320675105, + "grad_norm": 0.6590447425842285, + "learning_rate": 0.0015, + "loss": 1.629, + "step": 5266 + }, + { + "epoch": 0.5555907172995781, + "grad_norm": 0.5257437229156494, + "learning_rate": 0.0015, + "loss": 1.647, + "step": 5267 + }, + { + "epoch": 0.5556962025316455, + "grad_norm": 0.7118067741394043, + "learning_rate": 0.0015, + "loss": 1.6545, + "step": 5268 + }, + { + "epoch": 0.5558016877637131, + "grad_norm": 0.6626205444335938, + "learning_rate": 0.0015, + "loss": 1.6533, + "step": 5269 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.5884685516357422, + "learning_rate": 0.0015, + "loss": 1.6497, + "step": 5270 + }, + { + "epoch": 0.5560126582278481, + "grad_norm": 0.7747166156768799, + "learning_rate": 0.0015, + "loss": 1.6286, + "step": 5271 + }, + { + "epoch": 0.5561181434599156, + "grad_norm": 0.9125948548316956, + "learning_rate": 0.0015, + "loss": 1.6546, + "step": 5272 + }, + { + "epoch": 0.5562236286919832, + "grad_norm": 0.7135335803031921, + "learning_rate": 0.0015, + "loss": 1.5997, + "step": 5273 + }, + { + "epoch": 0.5563291139240506, + "grad_norm": 0.5398093461990356, + "learning_rate": 0.0015, + "loss": 1.5996, + "step": 5274 + }, + { + "epoch": 0.5564345991561181, + "grad_norm": 0.6518300175666809, + "learning_rate": 0.0015, + "loss": 1.6135, + "step": 5275 + }, + { + "epoch": 0.5565400843881857, + "grad_norm": 0.5473427772521973, + "learning_rate": 0.0015, + "loss": 1.6393, + "step": 5276 + }, + { + "epoch": 0.5566455696202531, + "grad_norm": 0.5354215502738953, + "learning_rate": 0.0015, + "loss": 1.654, + "step": 5277 + }, + { + "epoch": 0.5567510548523207, + "grad_norm": 0.5475490093231201, + "learning_rate": 0.0015, + "loss": 1.6302, + "step": 5278 + }, + { + "epoch": 0.5568565400843882, + "grad_norm": 0.4854798913002014, + "learning_rate": 0.0015, + "loss": 1.642, + "step": 5279 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.4936612844467163, + "learning_rate": 0.0015, + "loss": 1.6175, + "step": 5280 + }, + { + "epoch": 0.5570675105485232, + "grad_norm": 0.5744133591651917, + "learning_rate": 0.0015, + "loss": 1.6412, + "step": 5281 + }, + { + "epoch": 0.5571729957805908, + "grad_norm": 0.4862012267112732, + "learning_rate": 0.0015, + "loss": 1.6175, + "step": 5282 + }, + { + "epoch": 0.5572784810126582, + "grad_norm": 0.5772653222084045, + "learning_rate": 0.0015, + "loss": 1.6024, + "step": 5283 + }, + { + "epoch": 0.5573839662447257, + "grad_norm": 0.620054304599762, + "learning_rate": 0.0015, + "loss": 1.6391, + "step": 5284 + }, + { + "epoch": 0.5574894514767933, + "grad_norm": 0.6145972013473511, + "learning_rate": 0.0015, + "loss": 1.6216, + "step": 5285 + }, + { + "epoch": 0.5575949367088607, + "grad_norm": 0.47198721766471863, + "learning_rate": 0.0015, + "loss": 1.6235, + "step": 5286 + }, + { + "epoch": 0.5577004219409283, + "grad_norm": 0.4644085764884949, + "learning_rate": 0.0015, + "loss": 1.6061, + "step": 5287 + }, + { + "epoch": 0.5578059071729958, + "grad_norm": 0.4495280683040619, + "learning_rate": 0.0015, + "loss": 1.6333, + "step": 5288 + }, + { + "epoch": 0.5579113924050633, + "grad_norm": 0.5854806900024414, + "learning_rate": 0.0015, + "loss": 1.6526, + "step": 5289 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.5856172442436218, + "learning_rate": 0.0015, + "loss": 1.6654, + "step": 5290 + }, + { + "epoch": 0.5581223628691984, + "grad_norm": 0.5038925409317017, + "learning_rate": 0.0015, + "loss": 1.644, + "step": 5291 + }, + { + "epoch": 0.5582278481012658, + "grad_norm": 0.5857557654380798, + "learning_rate": 0.0015, + "loss": 1.6041, + "step": 5292 + }, + { + "epoch": 0.5583333333333333, + "grad_norm": 0.5493942499160767, + "learning_rate": 0.0015, + "loss": 1.6316, + "step": 5293 + }, + { + "epoch": 0.5584388185654009, + "grad_norm": 0.7882180213928223, + "learning_rate": 0.0015, + "loss": 1.652, + "step": 5294 + }, + { + "epoch": 0.5585443037974683, + "grad_norm": 0.9469513297080994, + "learning_rate": 0.0015, + "loss": 1.6256, + "step": 5295 + }, + { + "epoch": 0.5586497890295359, + "grad_norm": 0.7776630520820618, + "learning_rate": 0.0015, + "loss": 1.6557, + "step": 5296 + }, + { + "epoch": 0.5587552742616034, + "grad_norm": 0.5740671753883362, + "learning_rate": 0.0015, + "loss": 1.6239, + "step": 5297 + }, + { + "epoch": 0.5588607594936709, + "grad_norm": 0.8411259055137634, + "learning_rate": 0.0015, + "loss": 1.6224, + "step": 5298 + }, + { + "epoch": 0.5589662447257384, + "grad_norm": 0.5124502182006836, + "learning_rate": 0.0015, + "loss": 1.657, + "step": 5299 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.6385250091552734, + "learning_rate": 0.0015, + "loss": 1.6495, + "step": 5300 + }, + { + "epoch": 0.5591772151898734, + "grad_norm": 0.6861783266067505, + "learning_rate": 0.0015, + "loss": 1.6141, + "step": 5301 + }, + { + "epoch": 0.559282700421941, + "grad_norm": 0.5415860414505005, + "learning_rate": 0.0015, + "loss": 1.6295, + "step": 5302 + }, + { + "epoch": 0.5593881856540084, + "grad_norm": 0.5780659317970276, + "learning_rate": 0.0015, + "loss": 1.6418, + "step": 5303 + }, + { + "epoch": 0.5594936708860759, + "grad_norm": 0.6026530265808105, + "learning_rate": 0.0015, + "loss": 1.6151, + "step": 5304 + }, + { + "epoch": 0.5595991561181435, + "grad_norm": 0.46989232301712036, + "learning_rate": 0.0015, + "loss": 1.6531, + "step": 5305 + }, + { + "epoch": 0.5597046413502109, + "grad_norm": 0.5512782335281372, + "learning_rate": 0.0015, + "loss": 1.6447, + "step": 5306 + }, + { + "epoch": 0.5598101265822785, + "grad_norm": 0.5197358727455139, + "learning_rate": 0.0015, + "loss": 1.6308, + "step": 5307 + }, + { + "epoch": 0.559915611814346, + "grad_norm": 0.4554623067378998, + "learning_rate": 0.0015, + "loss": 1.587, + "step": 5308 + }, + { + "epoch": 0.5600210970464135, + "grad_norm": 0.47824883460998535, + "learning_rate": 0.0015, + "loss": 1.6189, + "step": 5309 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.5234121680259705, + "learning_rate": 0.0015, + "loss": 1.6175, + "step": 5310 + }, + { + "epoch": 0.5602320675105485, + "grad_norm": 0.5879049897193909, + "learning_rate": 0.0015, + "loss": 1.634, + "step": 5311 + }, + { + "epoch": 0.560337552742616, + "grad_norm": 0.47663185000419617, + "learning_rate": 0.0015, + "loss": 1.6143, + "step": 5312 + }, + { + "epoch": 0.5604430379746835, + "grad_norm": 0.610676109790802, + "learning_rate": 0.0015, + "loss": 1.6077, + "step": 5313 + }, + { + "epoch": 0.5605485232067511, + "grad_norm": 0.515486478805542, + "learning_rate": 0.0015, + "loss": 1.5981, + "step": 5314 + }, + { + "epoch": 0.5606540084388185, + "grad_norm": 0.5926443934440613, + "learning_rate": 0.0015, + "loss": 1.6243, + "step": 5315 + }, + { + "epoch": 0.5607594936708861, + "grad_norm": 0.6808208227157593, + "learning_rate": 0.0015, + "loss": 1.6473, + "step": 5316 + }, + { + "epoch": 0.5608649789029536, + "grad_norm": 0.62575364112854, + "learning_rate": 0.0015, + "loss": 1.5944, + "step": 5317 + }, + { + "epoch": 0.560970464135021, + "grad_norm": 0.5204060673713684, + "learning_rate": 0.0015, + "loss": 1.6593, + "step": 5318 + }, + { + "epoch": 0.5610759493670886, + "grad_norm": 0.6099985241889954, + "learning_rate": 0.0015, + "loss": 1.6265, + "step": 5319 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.7034229636192322, + "learning_rate": 0.0015, + "loss": 1.6248, + "step": 5320 + }, + { + "epoch": 0.5612869198312236, + "grad_norm": 0.5411562919616699, + "learning_rate": 0.0015, + "loss": 1.6601, + "step": 5321 + }, + { + "epoch": 0.5613924050632911, + "grad_norm": 0.6399033665657043, + "learning_rate": 0.0015, + "loss": 1.6489, + "step": 5322 + }, + { + "epoch": 0.5614978902953587, + "grad_norm": 0.535225510597229, + "learning_rate": 0.0015, + "loss": 1.646, + "step": 5323 + }, + { + "epoch": 0.5616033755274261, + "grad_norm": 0.5888151526451111, + "learning_rate": 0.0015, + "loss": 1.6392, + "step": 5324 + }, + { + "epoch": 0.5617088607594937, + "grad_norm": 0.6667788624763489, + "learning_rate": 0.0015, + "loss": 1.619, + "step": 5325 + }, + { + "epoch": 0.5618143459915612, + "grad_norm": 0.7977067232131958, + "learning_rate": 0.0015, + "loss": 1.6292, + "step": 5326 + }, + { + "epoch": 0.5619198312236287, + "grad_norm": 0.5076849460601807, + "learning_rate": 0.0015, + "loss": 1.6028, + "step": 5327 + }, + { + "epoch": 0.5620253164556962, + "grad_norm": 0.5596925020217896, + "learning_rate": 0.0015, + "loss": 1.6012, + "step": 5328 + }, + { + "epoch": 0.5621308016877637, + "grad_norm": 0.5293904542922974, + "learning_rate": 0.0015, + "loss": 1.6392, + "step": 5329 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.598557710647583, + "learning_rate": 0.0015, + "loss": 1.6443, + "step": 5330 + }, + { + "epoch": 0.5623417721518987, + "grad_norm": 0.5245181918144226, + "learning_rate": 0.0015, + "loss": 1.6348, + "step": 5331 + }, + { + "epoch": 0.5624472573839663, + "grad_norm": 0.5318989157676697, + "learning_rate": 0.0015, + "loss": 1.6329, + "step": 5332 + }, + { + "epoch": 0.5625527426160337, + "grad_norm": 0.49922168254852295, + "learning_rate": 0.0015, + "loss": 1.6056, + "step": 5333 + }, + { + "epoch": 0.5626582278481013, + "grad_norm": 0.47323620319366455, + "learning_rate": 0.0015, + "loss": 1.6365, + "step": 5334 + }, + { + "epoch": 0.5627637130801688, + "grad_norm": 0.6924019455909729, + "learning_rate": 0.0015, + "loss": 1.6395, + "step": 5335 + }, + { + "epoch": 0.5628691983122363, + "grad_norm": 0.6119868159294128, + "learning_rate": 0.0015, + "loss": 1.6402, + "step": 5336 + }, + { + "epoch": 0.5629746835443038, + "grad_norm": 0.48253345489501953, + "learning_rate": 0.0015, + "loss": 1.6144, + "step": 5337 + }, + { + "epoch": 0.5630801687763713, + "grad_norm": 0.5210509896278381, + "learning_rate": 0.0015, + "loss": 1.613, + "step": 5338 + }, + { + "epoch": 0.5631856540084388, + "grad_norm": 0.5134233236312866, + "learning_rate": 0.0015, + "loss": 1.6311, + "step": 5339 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.4461381435394287, + "learning_rate": 0.0015, + "loss": 1.6321, + "step": 5340 + }, + { + "epoch": 0.5633966244725739, + "grad_norm": 0.5335596203804016, + "learning_rate": 0.0015, + "loss": 1.6686, + "step": 5341 + }, + { + "epoch": 0.5635021097046413, + "grad_norm": 0.45899948477745056, + "learning_rate": 0.0015, + "loss": 1.6152, + "step": 5342 + }, + { + "epoch": 0.5636075949367089, + "grad_norm": 0.5987926721572876, + "learning_rate": 0.0015, + "loss": 1.64, + "step": 5343 + }, + { + "epoch": 0.5637130801687764, + "grad_norm": 0.524309515953064, + "learning_rate": 0.0015, + "loss": 1.6046, + "step": 5344 + }, + { + "epoch": 0.5638185654008439, + "grad_norm": 0.4791477620601654, + "learning_rate": 0.0015, + "loss": 1.6378, + "step": 5345 + }, + { + "epoch": 0.5639240506329114, + "grad_norm": 0.49584996700286865, + "learning_rate": 0.0015, + "loss": 1.6134, + "step": 5346 + }, + { + "epoch": 0.564029535864979, + "grad_norm": 0.5700047612190247, + "learning_rate": 0.0015, + "loss": 1.5724, + "step": 5347 + }, + { + "epoch": 0.5641350210970464, + "grad_norm": 0.4859113097190857, + "learning_rate": 0.0015, + "loss": 1.6003, + "step": 5348 + }, + { + "epoch": 0.5642405063291139, + "grad_norm": 0.5591371059417725, + "learning_rate": 0.0015, + "loss": 1.6279, + "step": 5349 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.5093802809715271, + "learning_rate": 0.0015, + "loss": 1.6032, + "step": 5350 + }, + { + "epoch": 0.5644514767932489, + "grad_norm": 0.6617925763130188, + "learning_rate": 0.0015, + "loss": 1.6107, + "step": 5351 + }, + { + "epoch": 0.5645569620253165, + "grad_norm": 0.6160645484924316, + "learning_rate": 0.0015, + "loss": 1.621, + "step": 5352 + }, + { + "epoch": 0.564662447257384, + "grad_norm": 0.5530300736427307, + "learning_rate": 0.0015, + "loss": 1.6396, + "step": 5353 + }, + { + "epoch": 0.5647679324894515, + "grad_norm": 0.5163722038269043, + "learning_rate": 0.0015, + "loss": 1.6255, + "step": 5354 + }, + { + "epoch": 0.564873417721519, + "grad_norm": 0.5374283194541931, + "learning_rate": 0.0015, + "loss": 1.6338, + "step": 5355 + }, + { + "epoch": 0.5649789029535865, + "grad_norm": 0.49130526185035706, + "learning_rate": 0.0015, + "loss": 1.5974, + "step": 5356 + }, + { + "epoch": 0.565084388185654, + "grad_norm": 0.5461239814758301, + "learning_rate": 0.0015, + "loss": 1.6248, + "step": 5357 + }, + { + "epoch": 0.5651898734177215, + "grad_norm": 0.5294837951660156, + "learning_rate": 0.0015, + "loss": 1.6321, + "step": 5358 + }, + { + "epoch": 0.5652953586497891, + "grad_norm": 0.49759408831596375, + "learning_rate": 0.0015, + "loss": 1.6372, + "step": 5359 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.5359417200088501, + "learning_rate": 0.0015, + "loss": 1.6665, + "step": 5360 + }, + { + "epoch": 0.5655063291139241, + "grad_norm": 0.47370314598083496, + "learning_rate": 0.0015, + "loss": 1.6385, + "step": 5361 + }, + { + "epoch": 0.5656118143459916, + "grad_norm": 0.5473756790161133, + "learning_rate": 0.0015, + "loss": 1.6366, + "step": 5362 + }, + { + "epoch": 0.565717299578059, + "grad_norm": 0.45411214232444763, + "learning_rate": 0.0015, + "loss": 1.6972, + "step": 5363 + }, + { + "epoch": 0.5658227848101266, + "grad_norm": 0.4845397472381592, + "learning_rate": 0.0015, + "loss": 1.6152, + "step": 5364 + }, + { + "epoch": 0.5659282700421941, + "grad_norm": 0.5022138953208923, + "learning_rate": 0.0015, + "loss": 1.6173, + "step": 5365 + }, + { + "epoch": 0.5660337552742616, + "grad_norm": 0.6727507710456848, + "learning_rate": 0.0015, + "loss": 1.6413, + "step": 5366 + }, + { + "epoch": 0.5661392405063291, + "grad_norm": 0.532525360584259, + "learning_rate": 0.0015, + "loss": 1.6213, + "step": 5367 + }, + { + "epoch": 0.5662447257383966, + "grad_norm": 0.4713844656944275, + "learning_rate": 0.0015, + "loss": 1.6208, + "step": 5368 + }, + { + "epoch": 0.5663502109704641, + "grad_norm": 0.6724514365196228, + "learning_rate": 0.0015, + "loss": 1.58, + "step": 5369 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.6096547245979309, + "learning_rate": 0.0015, + "loss": 1.6407, + "step": 5370 + }, + { + "epoch": 0.5665611814345991, + "grad_norm": 0.5276410579681396, + "learning_rate": 0.0015, + "loss": 1.64, + "step": 5371 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 0.5840702056884766, + "learning_rate": 0.0015, + "loss": 1.6208, + "step": 5372 + }, + { + "epoch": 0.5667721518987342, + "grad_norm": 0.56119304895401, + "learning_rate": 0.0015, + "loss": 1.5967, + "step": 5373 + }, + { + "epoch": 0.5668776371308016, + "grad_norm": 0.4861997365951538, + "learning_rate": 0.0015, + "loss": 1.6365, + "step": 5374 + }, + { + "epoch": 0.5669831223628692, + "grad_norm": 0.5228224396705627, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 5375 + }, + { + "epoch": 0.5670886075949367, + "grad_norm": 0.5005890727043152, + "learning_rate": 0.0015, + "loss": 1.5852, + "step": 5376 + }, + { + "epoch": 0.5671940928270042, + "grad_norm": 0.5229915380477905, + "learning_rate": 0.0015, + "loss": 1.6726, + "step": 5377 + }, + { + "epoch": 0.5672995780590717, + "grad_norm": 0.5707440972328186, + "learning_rate": 0.0015, + "loss": 1.5763, + "step": 5378 + }, + { + "epoch": 0.5674050632911393, + "grad_norm": 0.5765089988708496, + "learning_rate": 0.0015, + "loss": 1.6413, + "step": 5379 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.5625852346420288, + "learning_rate": 0.0015, + "loss": 1.6154, + "step": 5380 + }, + { + "epoch": 0.5676160337552743, + "grad_norm": 0.563363254070282, + "learning_rate": 0.0015, + "loss": 1.5958, + "step": 5381 + }, + { + "epoch": 0.5677215189873418, + "grad_norm": 0.6581175923347473, + "learning_rate": 0.0015, + "loss": 1.5799, + "step": 5382 + }, + { + "epoch": 0.5678270042194092, + "grad_norm": 0.5173786878585815, + "learning_rate": 0.0015, + "loss": 1.6262, + "step": 5383 + }, + { + "epoch": 0.5679324894514768, + "grad_norm": 0.5384842753410339, + "learning_rate": 0.0015, + "loss": 1.6297, + "step": 5384 + }, + { + "epoch": 0.5680379746835443, + "grad_norm": 0.7846764922142029, + "learning_rate": 0.0015, + "loss": 1.6302, + "step": 5385 + }, + { + "epoch": 0.5681434599156118, + "grad_norm": 0.895563006401062, + "learning_rate": 0.0015, + "loss": 1.6506, + "step": 5386 + }, + { + "epoch": 0.5682489451476793, + "grad_norm": 0.5403962135314941, + "learning_rate": 0.0015, + "loss": 1.6369, + "step": 5387 + }, + { + "epoch": 0.5683544303797469, + "grad_norm": 0.9266017079353333, + "learning_rate": 0.0015, + "loss": 1.6305, + "step": 5388 + }, + { + "epoch": 0.5684599156118143, + "grad_norm": 0.7382924556732178, + "learning_rate": 0.0015, + "loss": 1.6365, + "step": 5389 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.6781361699104309, + "learning_rate": 0.0015, + "loss": 1.632, + "step": 5390 + }, + { + "epoch": 0.5686708860759494, + "grad_norm": 1.043846607208252, + "learning_rate": 0.0015, + "loss": 1.6228, + "step": 5391 + }, + { + "epoch": 0.5687763713080168, + "grad_norm": 0.4971105456352234, + "learning_rate": 0.0015, + "loss": 1.6222, + "step": 5392 + }, + { + "epoch": 0.5688818565400844, + "grad_norm": 0.8194665312767029, + "learning_rate": 0.0015, + "loss": 1.6542, + "step": 5393 + }, + { + "epoch": 0.5689873417721519, + "grad_norm": 0.6555083990097046, + "learning_rate": 0.0015, + "loss": 1.6323, + "step": 5394 + }, + { + "epoch": 0.5690928270042194, + "grad_norm": 0.6417784094810486, + "learning_rate": 0.0015, + "loss": 1.6116, + "step": 5395 + }, + { + "epoch": 0.5691983122362869, + "grad_norm": 0.7614520788192749, + "learning_rate": 0.0015, + "loss": 1.6269, + "step": 5396 + }, + { + "epoch": 0.5693037974683545, + "grad_norm": 0.6229608654975891, + "learning_rate": 0.0015, + "loss": 1.655, + "step": 5397 + }, + { + "epoch": 0.5694092827004219, + "grad_norm": 0.7862270474433899, + "learning_rate": 0.0015, + "loss": 1.632, + "step": 5398 + }, + { + "epoch": 0.5695147679324895, + "grad_norm": 0.6895025968551636, + "learning_rate": 0.0015, + "loss": 1.604, + "step": 5399 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.6094022393226624, + "learning_rate": 0.0015, + "loss": 1.5997, + "step": 5400 + }, + { + "epoch": 0.5697257383966244, + "grad_norm": 0.6876747608184814, + "learning_rate": 0.0015, + "loss": 1.6291, + "step": 5401 + }, + { + "epoch": 0.569831223628692, + "grad_norm": 0.6110780239105225, + "learning_rate": 0.0015, + "loss": 1.6281, + "step": 5402 + }, + { + "epoch": 0.5699367088607595, + "grad_norm": 0.5970811247825623, + "learning_rate": 0.0015, + "loss": 1.6446, + "step": 5403 + }, + { + "epoch": 0.570042194092827, + "grad_norm": 0.5428264141082764, + "learning_rate": 0.0015, + "loss": 1.6744, + "step": 5404 + }, + { + "epoch": 0.5701476793248945, + "grad_norm": 0.6532614827156067, + "learning_rate": 0.0015, + "loss": 1.6178, + "step": 5405 + }, + { + "epoch": 0.5702531645569621, + "grad_norm": 0.4627121388912201, + "learning_rate": 0.0015, + "loss": 1.584, + "step": 5406 + }, + { + "epoch": 0.5703586497890295, + "grad_norm": 0.5733729004859924, + "learning_rate": 0.0015, + "loss": 1.6214, + "step": 5407 + }, + { + "epoch": 0.570464135021097, + "grad_norm": 0.5524259209632874, + "learning_rate": 0.0015, + "loss": 1.6239, + "step": 5408 + }, + { + "epoch": 0.5705696202531646, + "grad_norm": 0.49608471989631653, + "learning_rate": 0.0015, + "loss": 1.6361, + "step": 5409 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.521388053894043, + "learning_rate": 0.0015, + "loss": 1.5991, + "step": 5410 + }, + { + "epoch": 0.5707805907172996, + "grad_norm": 0.5712475776672363, + "learning_rate": 0.0015, + "loss": 1.6184, + "step": 5411 + }, + { + "epoch": 0.5708860759493671, + "grad_norm": 0.47182101011276245, + "learning_rate": 0.0015, + "loss": 1.6278, + "step": 5412 + }, + { + "epoch": 0.5709915611814346, + "grad_norm": 0.5659692883491516, + "learning_rate": 0.0015, + "loss": 1.6434, + "step": 5413 + }, + { + "epoch": 0.5710970464135021, + "grad_norm": 0.7929262518882751, + "learning_rate": 0.0015, + "loss": 1.5769, + "step": 5414 + }, + { + "epoch": 0.5712025316455697, + "grad_norm": 0.5173774361610413, + "learning_rate": 0.0015, + "loss": 1.6585, + "step": 5415 + }, + { + "epoch": 0.5713080168776371, + "grad_norm": 0.5311462879180908, + "learning_rate": 0.0015, + "loss": 1.611, + "step": 5416 + }, + { + "epoch": 0.5714135021097047, + "grad_norm": 0.5997411012649536, + "learning_rate": 0.0015, + "loss": 1.6346, + "step": 5417 + }, + { + "epoch": 0.5715189873417722, + "grad_norm": 0.5332018733024597, + "learning_rate": 0.0015, + "loss": 1.6247, + "step": 5418 + }, + { + "epoch": 0.5716244725738396, + "grad_norm": 0.5726498365402222, + "learning_rate": 0.0015, + "loss": 1.603, + "step": 5419 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.550566554069519, + "learning_rate": 0.0015, + "loss": 1.6147, + "step": 5420 + }, + { + "epoch": 0.5718354430379747, + "grad_norm": 0.4947754144668579, + "learning_rate": 0.0015, + "loss": 1.6544, + "step": 5421 + }, + { + "epoch": 0.5719409282700422, + "grad_norm": 0.4676307737827301, + "learning_rate": 0.0015, + "loss": 1.634, + "step": 5422 + }, + { + "epoch": 0.5720464135021097, + "grad_norm": 0.4953192472457886, + "learning_rate": 0.0015, + "loss": 1.5834, + "step": 5423 + }, + { + "epoch": 0.5721518987341773, + "grad_norm": 0.49735572934150696, + "learning_rate": 0.0015, + "loss": 1.601, + "step": 5424 + }, + { + "epoch": 0.5722573839662447, + "grad_norm": 0.6120027303695679, + "learning_rate": 0.0015, + "loss": 1.6274, + "step": 5425 + }, + { + "epoch": 0.5723628691983123, + "grad_norm": 0.5690551400184631, + "learning_rate": 0.0015, + "loss": 1.64, + "step": 5426 + }, + { + "epoch": 0.5724683544303798, + "grad_norm": 0.6098531484603882, + "learning_rate": 0.0015, + "loss": 1.6364, + "step": 5427 + }, + { + "epoch": 0.5725738396624472, + "grad_norm": 0.5622851252555847, + "learning_rate": 0.0015, + "loss": 1.616, + "step": 5428 + }, + { + "epoch": 0.5726793248945148, + "grad_norm": 0.4972100555896759, + "learning_rate": 0.0015, + "loss": 1.6117, + "step": 5429 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.6018389463424683, + "learning_rate": 0.0015, + "loss": 1.5915, + "step": 5430 + }, + { + "epoch": 0.5728902953586498, + "grad_norm": 0.6689639687538147, + "learning_rate": 0.0015, + "loss": 1.6499, + "step": 5431 + }, + { + "epoch": 0.5729957805907173, + "grad_norm": 0.5190061926841736, + "learning_rate": 0.0015, + "loss": 1.6036, + "step": 5432 + }, + { + "epoch": 0.5731012658227848, + "grad_norm": 0.6291723847389221, + "learning_rate": 0.0015, + "loss": 1.5978, + "step": 5433 + }, + { + "epoch": 0.5732067510548523, + "grad_norm": 0.6594454050064087, + "learning_rate": 0.0015, + "loss": 1.6667, + "step": 5434 + }, + { + "epoch": 0.5733122362869199, + "grad_norm": 0.46946433186531067, + "learning_rate": 0.0015, + "loss": 1.5823, + "step": 5435 + }, + { + "epoch": 0.5734177215189873, + "grad_norm": 0.8045597076416016, + "learning_rate": 0.0015, + "loss": 1.6093, + "step": 5436 + }, + { + "epoch": 0.5735232067510548, + "grad_norm": 1.0402706861495972, + "learning_rate": 0.0015, + "loss": 1.5974, + "step": 5437 + }, + { + "epoch": 0.5736286919831224, + "grad_norm": 0.4941970407962799, + "learning_rate": 0.0015, + "loss": 1.6368, + "step": 5438 + }, + { + "epoch": 0.5737341772151898, + "grad_norm": 0.8785650730133057, + "learning_rate": 0.0015, + "loss": 1.6295, + "step": 5439 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.8148274421691895, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 5440 + }, + { + "epoch": 0.5739451476793249, + "grad_norm": 0.5330122709274292, + "learning_rate": 0.0015, + "loss": 1.6256, + "step": 5441 + }, + { + "epoch": 0.5740506329113924, + "grad_norm": 0.8619452714920044, + "learning_rate": 0.0015, + "loss": 1.6285, + "step": 5442 + }, + { + "epoch": 0.5741561181434599, + "grad_norm": 0.6316558122634888, + "learning_rate": 0.0015, + "loss": 1.635, + "step": 5443 + }, + { + "epoch": 0.5742616033755275, + "grad_norm": 0.7529088854789734, + "learning_rate": 0.0015, + "loss": 1.6314, + "step": 5444 + }, + { + "epoch": 0.5743670886075949, + "grad_norm": 0.8501078486442566, + "learning_rate": 0.0015, + "loss": 1.606, + "step": 5445 + }, + { + "epoch": 0.5744725738396624, + "grad_norm": 0.6048441529273987, + "learning_rate": 0.0015, + "loss": 1.6464, + "step": 5446 + }, + { + "epoch": 0.57457805907173, + "grad_norm": 0.7871187925338745, + "learning_rate": 0.0015, + "loss": 1.6494, + "step": 5447 + }, + { + "epoch": 0.5746835443037974, + "grad_norm": 0.7705832719802856, + "learning_rate": 0.0015, + "loss": 1.636, + "step": 5448 + }, + { + "epoch": 0.574789029535865, + "grad_norm": 0.629921555519104, + "learning_rate": 0.0015, + "loss": 1.6192, + "step": 5449 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.606794536113739, + "learning_rate": 0.0015, + "loss": 1.6081, + "step": 5450 + }, + { + "epoch": 0.575, + "grad_norm": 0.73259037733078, + "learning_rate": 0.0015, + "loss": 1.6302, + "step": 5451 + }, + { + "epoch": 0.5751054852320675, + "grad_norm": 0.5201510787010193, + "learning_rate": 0.0015, + "loss": 1.5911, + "step": 5452 + }, + { + "epoch": 0.575210970464135, + "grad_norm": 0.6375489234924316, + "learning_rate": 0.0015, + "loss": 1.6514, + "step": 5453 + }, + { + "epoch": 0.5753164556962025, + "grad_norm": 0.5524571537971497, + "learning_rate": 0.0015, + "loss": 1.6288, + "step": 5454 + }, + { + "epoch": 0.57542194092827, + "grad_norm": 0.6841790080070496, + "learning_rate": 0.0015, + "loss": 1.6723, + "step": 5455 + }, + { + "epoch": 0.5755274261603376, + "grad_norm": 0.5184100270271301, + "learning_rate": 0.0015, + "loss": 1.6012, + "step": 5456 + }, + { + "epoch": 0.575632911392405, + "grad_norm": 0.5849893689155579, + "learning_rate": 0.0015, + "loss": 1.6336, + "step": 5457 + }, + { + "epoch": 0.5757383966244726, + "grad_norm": 0.5877789855003357, + "learning_rate": 0.0015, + "loss": 1.6124, + "step": 5458 + }, + { + "epoch": 0.5758438818565401, + "grad_norm": 0.5175401568412781, + "learning_rate": 0.0015, + "loss": 1.6113, + "step": 5459 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.553992509841919, + "learning_rate": 0.0015, + "loss": 1.634, + "step": 5460 + }, + { + "epoch": 0.5760548523206751, + "grad_norm": 0.4861304759979248, + "learning_rate": 0.0015, + "loss": 1.5881, + "step": 5461 + }, + { + "epoch": 0.5761603375527427, + "grad_norm": 0.623470664024353, + "learning_rate": 0.0015, + "loss": 1.5811, + "step": 5462 + }, + { + "epoch": 0.5762658227848101, + "grad_norm": 0.6538658738136292, + "learning_rate": 0.0015, + "loss": 1.6206, + "step": 5463 + }, + { + "epoch": 0.5763713080168776, + "grad_norm": 0.4597054421901703, + "learning_rate": 0.0015, + "loss": 1.6034, + "step": 5464 + }, + { + "epoch": 0.5764767932489452, + "grad_norm": 0.6882071495056152, + "learning_rate": 0.0015, + "loss": 1.6632, + "step": 5465 + }, + { + "epoch": 0.5765822784810126, + "grad_norm": 0.5916186571121216, + "learning_rate": 0.0015, + "loss": 1.6543, + "step": 5466 + }, + { + "epoch": 0.5766877637130802, + "grad_norm": 0.5639544725418091, + "learning_rate": 0.0015, + "loss": 1.626, + "step": 5467 + }, + { + "epoch": 0.5767932489451477, + "grad_norm": 0.7871806025505066, + "learning_rate": 0.0015, + "loss": 1.6444, + "step": 5468 + }, + { + "epoch": 0.5768987341772152, + "grad_norm": 0.5329530835151672, + "learning_rate": 0.0015, + "loss": 1.6369, + "step": 5469 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.5972363948822021, + "learning_rate": 0.0015, + "loss": 1.6298, + "step": 5470 + }, + { + "epoch": 0.5771097046413503, + "grad_norm": 0.5829533934593201, + "learning_rate": 0.0015, + "loss": 1.6347, + "step": 5471 + }, + { + "epoch": 0.5772151898734177, + "grad_norm": 0.5822646617889404, + "learning_rate": 0.0015, + "loss": 1.6127, + "step": 5472 + }, + { + "epoch": 0.5773206751054852, + "grad_norm": 0.5193725824356079, + "learning_rate": 0.0015, + "loss": 1.5926, + "step": 5473 + }, + { + "epoch": 0.5774261603375528, + "grad_norm": 0.5308032035827637, + "learning_rate": 0.0015, + "loss": 1.5972, + "step": 5474 + }, + { + "epoch": 0.5775316455696202, + "grad_norm": 0.548362135887146, + "learning_rate": 0.0015, + "loss": 1.6194, + "step": 5475 + }, + { + "epoch": 0.5776371308016878, + "grad_norm": 0.5116716027259827, + "learning_rate": 0.0015, + "loss": 1.605, + "step": 5476 + }, + { + "epoch": 0.5777426160337553, + "grad_norm": 0.5850107073783875, + "learning_rate": 0.0015, + "loss": 1.6641, + "step": 5477 + }, + { + "epoch": 0.5778481012658228, + "grad_norm": 0.5661741495132446, + "learning_rate": 0.0015, + "loss": 1.6372, + "step": 5478 + }, + { + "epoch": 0.5779535864978903, + "grad_norm": 0.5660033226013184, + "learning_rate": 0.0015, + "loss": 1.6333, + "step": 5479 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.7832927107810974, + "learning_rate": 0.0015, + "loss": 1.6677, + "step": 5480 + }, + { + "epoch": 0.5781645569620253, + "grad_norm": 0.6293277740478516, + "learning_rate": 0.0015, + "loss": 1.5881, + "step": 5481 + }, + { + "epoch": 0.5782700421940928, + "grad_norm": 0.6669265627861023, + "learning_rate": 0.0015, + "loss": 1.6425, + "step": 5482 + }, + { + "epoch": 0.5783755274261604, + "grad_norm": 0.8398557901382446, + "learning_rate": 0.0015, + "loss": 1.6499, + "step": 5483 + }, + { + "epoch": 0.5784810126582278, + "grad_norm": 0.5337482690811157, + "learning_rate": 0.0015, + "loss": 1.6316, + "step": 5484 + }, + { + "epoch": 0.5785864978902954, + "grad_norm": 1.0220376253128052, + "learning_rate": 0.0015, + "loss": 1.6422, + "step": 5485 + }, + { + "epoch": 0.5786919831223629, + "grad_norm": 0.6867162585258484, + "learning_rate": 0.0015, + "loss": 1.6111, + "step": 5486 + }, + { + "epoch": 0.5787974683544304, + "grad_norm": 0.783750057220459, + "learning_rate": 0.0015, + "loss": 1.6436, + "step": 5487 + }, + { + "epoch": 0.5789029535864979, + "grad_norm": 0.9936336278915405, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 5488 + }, + { + "epoch": 0.5790084388185655, + "grad_norm": 0.6602028608322144, + "learning_rate": 0.0015, + "loss": 1.6845, + "step": 5489 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.8614243268966675, + "learning_rate": 0.0015, + "loss": 1.6211, + "step": 5490 + }, + { + "epoch": 0.5792194092827004, + "grad_norm": 0.7624712586402893, + "learning_rate": 0.0015, + "loss": 1.6184, + "step": 5491 + }, + { + "epoch": 0.579324894514768, + "grad_norm": 0.5247186422348022, + "learning_rate": 0.0015, + "loss": 1.6313, + "step": 5492 + }, + { + "epoch": 0.5794303797468354, + "grad_norm": 0.6216135025024414, + "learning_rate": 0.0015, + "loss": 1.638, + "step": 5493 + }, + { + "epoch": 0.579535864978903, + "grad_norm": 0.5212397575378418, + "learning_rate": 0.0015, + "loss": 1.606, + "step": 5494 + }, + { + "epoch": 0.5796413502109705, + "grad_norm": 0.5704751014709473, + "learning_rate": 0.0015, + "loss": 1.6519, + "step": 5495 + }, + { + "epoch": 0.579746835443038, + "grad_norm": 0.5986801385879517, + "learning_rate": 0.0015, + "loss": 1.5896, + "step": 5496 + }, + { + "epoch": 0.5798523206751055, + "grad_norm": 0.493880033493042, + "learning_rate": 0.0015, + "loss": 1.6191, + "step": 5497 + }, + { + "epoch": 0.5799578059071729, + "grad_norm": 0.4813474714756012, + "learning_rate": 0.0015, + "loss": 1.6106, + "step": 5498 + }, + { + "epoch": 0.5800632911392405, + "grad_norm": 0.5063060522079468, + "learning_rate": 0.0015, + "loss": 1.6582, + "step": 5499 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.45211276412010193, + "learning_rate": 0.0015, + "loss": 1.5961, + "step": 5500 + }, + { + "epoch": 0.5802742616033755, + "grad_norm": 0.6631359457969666, + "learning_rate": 0.0015, + "loss": 1.6114, + "step": 5501 + }, + { + "epoch": 0.580379746835443, + "grad_norm": 0.5923169255256653, + "learning_rate": 0.0015, + "loss": 1.6518, + "step": 5502 + }, + { + "epoch": 0.5804852320675106, + "grad_norm": 0.531610906124115, + "learning_rate": 0.0015, + "loss": 1.6384, + "step": 5503 + }, + { + "epoch": 0.580590717299578, + "grad_norm": 0.5216389894485474, + "learning_rate": 0.0015, + "loss": 1.6108, + "step": 5504 + }, + { + "epoch": 0.5806962025316456, + "grad_norm": 0.4604436755180359, + "learning_rate": 0.0015, + "loss": 1.606, + "step": 5505 + }, + { + "epoch": 0.5808016877637131, + "grad_norm": 0.5579452514648438, + "learning_rate": 0.0015, + "loss": 1.6573, + "step": 5506 + }, + { + "epoch": 0.5809071729957805, + "grad_norm": 0.5045171976089478, + "learning_rate": 0.0015, + "loss": 1.6395, + "step": 5507 + }, + { + "epoch": 0.5810126582278481, + "grad_norm": 0.49698296189308167, + "learning_rate": 0.0015, + "loss": 1.5988, + "step": 5508 + }, + { + "epoch": 0.5811181434599156, + "grad_norm": 0.49496710300445557, + "learning_rate": 0.0015, + "loss": 1.6378, + "step": 5509 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.6719024777412415, + "learning_rate": 0.0015, + "loss": 1.612, + "step": 5510 + }, + { + "epoch": 0.5813291139240506, + "grad_norm": 0.6405683159828186, + "learning_rate": 0.0015, + "loss": 1.5809, + "step": 5511 + }, + { + "epoch": 0.5814345991561182, + "grad_norm": 0.5058594346046448, + "learning_rate": 0.0015, + "loss": 1.615, + "step": 5512 + }, + { + "epoch": 0.5815400843881856, + "grad_norm": 0.6330063343048096, + "learning_rate": 0.0015, + "loss": 1.6339, + "step": 5513 + }, + { + "epoch": 0.5816455696202532, + "grad_norm": 0.49307113885879517, + "learning_rate": 0.0015, + "loss": 1.6381, + "step": 5514 + }, + { + "epoch": 0.5817510548523207, + "grad_norm": 0.4965069591999054, + "learning_rate": 0.0015, + "loss": 1.6238, + "step": 5515 + }, + { + "epoch": 0.5818565400843881, + "grad_norm": 0.5093645453453064, + "learning_rate": 0.0015, + "loss": 1.5887, + "step": 5516 + }, + { + "epoch": 0.5819620253164557, + "grad_norm": 0.45555707812309265, + "learning_rate": 0.0015, + "loss": 1.6051, + "step": 5517 + }, + { + "epoch": 0.5820675105485232, + "grad_norm": 0.4636574983596802, + "learning_rate": 0.0015, + "loss": 1.6377, + "step": 5518 + }, + { + "epoch": 0.5821729957805907, + "grad_norm": 0.5562624931335449, + "learning_rate": 0.0015, + "loss": 1.6118, + "step": 5519 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.5193811655044556, + "learning_rate": 0.0015, + "loss": 1.6555, + "step": 5520 + }, + { + "epoch": 0.5823839662447258, + "grad_norm": 0.6242823004722595, + "learning_rate": 0.0015, + "loss": 1.6047, + "step": 5521 + }, + { + "epoch": 0.5824894514767932, + "grad_norm": 0.6714182496070862, + "learning_rate": 0.0015, + "loss": 1.6045, + "step": 5522 + }, + { + "epoch": 0.5825949367088608, + "grad_norm": 0.4450235962867737, + "learning_rate": 0.0015, + "loss": 1.5997, + "step": 5523 + }, + { + "epoch": 0.5827004219409283, + "grad_norm": 0.581916093826294, + "learning_rate": 0.0015, + "loss": 1.626, + "step": 5524 + }, + { + "epoch": 0.5828059071729957, + "grad_norm": 0.4704454243183136, + "learning_rate": 0.0015, + "loss": 1.608, + "step": 5525 + }, + { + "epoch": 0.5829113924050633, + "grad_norm": 0.5212851762771606, + "learning_rate": 0.0015, + "loss": 1.6189, + "step": 5526 + }, + { + "epoch": 0.5830168776371308, + "grad_norm": 0.5930542349815369, + "learning_rate": 0.0015, + "loss": 1.6078, + "step": 5527 + }, + { + "epoch": 0.5831223628691983, + "grad_norm": 0.5354674458503723, + "learning_rate": 0.0015, + "loss": 1.6332, + "step": 5528 + }, + { + "epoch": 0.5832278481012658, + "grad_norm": 0.6332305669784546, + "learning_rate": 0.0015, + "loss": 1.6052, + "step": 5529 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.5576563477516174, + "learning_rate": 0.0015, + "loss": 1.6536, + "step": 5530 + }, + { + "epoch": 0.5834388185654008, + "grad_norm": 0.6107875108718872, + "learning_rate": 0.0015, + "loss": 1.6279, + "step": 5531 + }, + { + "epoch": 0.5835443037974684, + "grad_norm": 0.5109343528747559, + "learning_rate": 0.0015, + "loss": 1.5876, + "step": 5532 + }, + { + "epoch": 0.5836497890295359, + "grad_norm": 0.6298612356185913, + "learning_rate": 0.0015, + "loss": 1.5928, + "step": 5533 + }, + { + "epoch": 0.5837552742616033, + "grad_norm": 0.6461126208305359, + "learning_rate": 0.0015, + "loss": 1.5934, + "step": 5534 + }, + { + "epoch": 0.5838607594936709, + "grad_norm": 0.5158522725105286, + "learning_rate": 0.0015, + "loss": 1.6065, + "step": 5535 + }, + { + "epoch": 0.5839662447257384, + "grad_norm": 0.5445433855056763, + "learning_rate": 0.0015, + "loss": 1.6041, + "step": 5536 + }, + { + "epoch": 0.5840717299578059, + "grad_norm": 0.6405567526817322, + "learning_rate": 0.0015, + "loss": 1.6052, + "step": 5537 + }, + { + "epoch": 0.5841772151898734, + "grad_norm": 0.5018154978752136, + "learning_rate": 0.0015, + "loss": 1.6743, + "step": 5538 + }, + { + "epoch": 0.584282700421941, + "grad_norm": 0.5532844066619873, + "learning_rate": 0.0015, + "loss": 1.6058, + "step": 5539 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.6547574400901794, + "learning_rate": 0.0015, + "loss": 1.6429, + "step": 5540 + }, + { + "epoch": 0.584493670886076, + "grad_norm": 0.4973040521144867, + "learning_rate": 0.0015, + "loss": 1.627, + "step": 5541 + }, + { + "epoch": 0.5845991561181435, + "grad_norm": 0.5852428674697876, + "learning_rate": 0.0015, + "loss": 1.6282, + "step": 5542 + }, + { + "epoch": 0.5847046413502109, + "grad_norm": 0.6409162878990173, + "learning_rate": 0.0015, + "loss": 1.6331, + "step": 5543 + }, + { + "epoch": 0.5848101265822785, + "grad_norm": 0.5296412706375122, + "learning_rate": 0.0015, + "loss": 1.6087, + "step": 5544 + }, + { + "epoch": 0.584915611814346, + "grad_norm": 0.5370193123817444, + "learning_rate": 0.0015, + "loss": 1.6305, + "step": 5545 + }, + { + "epoch": 0.5850210970464135, + "grad_norm": 0.6180422902107239, + "learning_rate": 0.0015, + "loss": 1.614, + "step": 5546 + }, + { + "epoch": 0.585126582278481, + "grad_norm": 0.5282862186431885, + "learning_rate": 0.0015, + "loss": 1.6094, + "step": 5547 + }, + { + "epoch": 0.5852320675105486, + "grad_norm": 0.5119280815124512, + "learning_rate": 0.0015, + "loss": 1.6524, + "step": 5548 + }, + { + "epoch": 0.585337552742616, + "grad_norm": 0.664736807346344, + "learning_rate": 0.0015, + "loss": 1.6351, + "step": 5549 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.6273255944252014, + "learning_rate": 0.0015, + "loss": 1.6662, + "step": 5550 + }, + { + "epoch": 0.5855485232067511, + "grad_norm": 0.4826644957065582, + "learning_rate": 0.0015, + "loss": 1.6528, + "step": 5551 + }, + { + "epoch": 0.5856540084388185, + "grad_norm": 0.5993300676345825, + "learning_rate": 0.0015, + "loss": 1.6886, + "step": 5552 + }, + { + "epoch": 0.5857594936708861, + "grad_norm": 0.4966449737548828, + "learning_rate": 0.0015, + "loss": 1.6166, + "step": 5553 + }, + { + "epoch": 0.5858649789029536, + "grad_norm": 0.4663368761539459, + "learning_rate": 0.0015, + "loss": 1.6331, + "step": 5554 + }, + { + "epoch": 0.5859704641350211, + "grad_norm": 0.46478691697120667, + "learning_rate": 0.0015, + "loss": 1.6354, + "step": 5555 + }, + { + "epoch": 0.5860759493670886, + "grad_norm": 0.4704135060310364, + "learning_rate": 0.0015, + "loss": 1.6025, + "step": 5556 + }, + { + "epoch": 0.5861814345991562, + "grad_norm": 0.484254390001297, + "learning_rate": 0.0015, + "loss": 1.6087, + "step": 5557 + }, + { + "epoch": 0.5862869198312236, + "grad_norm": 0.4876977503299713, + "learning_rate": 0.0015, + "loss": 1.647, + "step": 5558 + }, + { + "epoch": 0.5863924050632912, + "grad_norm": 0.5591520071029663, + "learning_rate": 0.0015, + "loss": 1.6153, + "step": 5559 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.5457459688186646, + "learning_rate": 0.0015, + "loss": 1.6292, + "step": 5560 + }, + { + "epoch": 0.5866033755274261, + "grad_norm": 0.5583513975143433, + "learning_rate": 0.0015, + "loss": 1.6203, + "step": 5561 + }, + { + "epoch": 0.5867088607594937, + "grad_norm": 0.4454638957977295, + "learning_rate": 0.0015, + "loss": 1.634, + "step": 5562 + }, + { + "epoch": 0.5868143459915611, + "grad_norm": 0.6796378493309021, + "learning_rate": 0.0015, + "loss": 1.6119, + "step": 5563 + }, + { + "epoch": 0.5869198312236287, + "grad_norm": 0.47859227657318115, + "learning_rate": 0.0015, + "loss": 1.6313, + "step": 5564 + }, + { + "epoch": 0.5870253164556962, + "grad_norm": 0.5425044894218445, + "learning_rate": 0.0015, + "loss": 1.6182, + "step": 5565 + }, + { + "epoch": 0.5871308016877637, + "grad_norm": 0.5822433233261108, + "learning_rate": 0.0015, + "loss": 1.6353, + "step": 5566 + }, + { + "epoch": 0.5872362869198312, + "grad_norm": 0.4745939373970032, + "learning_rate": 0.0015, + "loss": 1.6471, + "step": 5567 + }, + { + "epoch": 0.5873417721518988, + "grad_norm": 0.48696377873420715, + "learning_rate": 0.0015, + "loss": 1.5975, + "step": 5568 + }, + { + "epoch": 0.5874472573839662, + "grad_norm": 0.5826190710067749, + "learning_rate": 0.0015, + "loss": 1.602, + "step": 5569 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.47986218333244324, + "learning_rate": 0.0015, + "loss": 1.5995, + "step": 5570 + }, + { + "epoch": 0.5876582278481013, + "grad_norm": 0.5235339403152466, + "learning_rate": 0.0015, + "loss": 1.6131, + "step": 5571 + }, + { + "epoch": 0.5877637130801687, + "grad_norm": 0.5208390951156616, + "learning_rate": 0.0015, + "loss": 1.6309, + "step": 5572 + }, + { + "epoch": 0.5878691983122363, + "grad_norm": 0.6189125776290894, + "learning_rate": 0.0015, + "loss": 1.6259, + "step": 5573 + }, + { + "epoch": 0.5879746835443038, + "grad_norm": 0.6651872396469116, + "learning_rate": 0.0015, + "loss": 1.6211, + "step": 5574 + }, + { + "epoch": 0.5880801687763713, + "grad_norm": 0.5567631721496582, + "learning_rate": 0.0015, + "loss": 1.6324, + "step": 5575 + }, + { + "epoch": 0.5881856540084388, + "grad_norm": 0.5870737433433533, + "learning_rate": 0.0015, + "loss": 1.6106, + "step": 5576 + }, + { + "epoch": 0.5882911392405064, + "grad_norm": 0.6546647548675537, + "learning_rate": 0.0015, + "loss": 1.6366, + "step": 5577 + }, + { + "epoch": 0.5883966244725738, + "grad_norm": 0.6215531826019287, + "learning_rate": 0.0015, + "loss": 1.6081, + "step": 5578 + }, + { + "epoch": 0.5885021097046413, + "grad_norm": 0.7473545074462891, + "learning_rate": 0.0015, + "loss": 1.6366, + "step": 5579 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.6567901968955994, + "learning_rate": 0.0015, + "loss": 1.5861, + "step": 5580 + }, + { + "epoch": 0.5887130801687763, + "grad_norm": 0.6100008487701416, + "learning_rate": 0.0015, + "loss": 1.6193, + "step": 5581 + }, + { + "epoch": 0.5888185654008439, + "grad_norm": 0.5344525575637817, + "learning_rate": 0.0015, + "loss": 1.6265, + "step": 5582 + }, + { + "epoch": 0.5889240506329114, + "grad_norm": 0.5835578441619873, + "learning_rate": 0.0015, + "loss": 1.5907, + "step": 5583 + }, + { + "epoch": 0.5890295358649789, + "grad_norm": 0.4616762101650238, + "learning_rate": 0.0015, + "loss": 1.6182, + "step": 5584 + }, + { + "epoch": 0.5891350210970464, + "grad_norm": 0.7263842821121216, + "learning_rate": 0.0015, + "loss": 1.651, + "step": 5585 + }, + { + "epoch": 0.589240506329114, + "grad_norm": 0.6085631847381592, + "learning_rate": 0.0015, + "loss": 1.6172, + "step": 5586 + }, + { + "epoch": 0.5893459915611814, + "grad_norm": 0.5674893260002136, + "learning_rate": 0.0015, + "loss": 1.6151, + "step": 5587 + }, + { + "epoch": 0.5894514767932489, + "grad_norm": 0.5729935765266418, + "learning_rate": 0.0015, + "loss": 1.6611, + "step": 5588 + }, + { + "epoch": 0.5895569620253165, + "grad_norm": 0.7947391271591187, + "learning_rate": 0.0015, + "loss": 1.6111, + "step": 5589 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.48430517315864563, + "learning_rate": 0.0015, + "loss": 1.6017, + "step": 5590 + }, + { + "epoch": 0.5897679324894515, + "grad_norm": 0.7709295749664307, + "learning_rate": 0.0015, + "loss": 1.6046, + "step": 5591 + }, + { + "epoch": 0.589873417721519, + "grad_norm": 0.6639266610145569, + "learning_rate": 0.0015, + "loss": 1.655, + "step": 5592 + }, + { + "epoch": 0.5899789029535865, + "grad_norm": 0.6433471441268921, + "learning_rate": 0.0015, + "loss": 1.6102, + "step": 5593 + }, + { + "epoch": 0.590084388185654, + "grad_norm": 0.6052055954933167, + "learning_rate": 0.0015, + "loss": 1.6102, + "step": 5594 + }, + { + "epoch": 0.5901898734177216, + "grad_norm": 0.5959548354148865, + "learning_rate": 0.0015, + "loss": 1.6164, + "step": 5595 + }, + { + "epoch": 0.590295358649789, + "grad_norm": 0.6258935332298279, + "learning_rate": 0.0015, + "loss": 1.5975, + "step": 5596 + }, + { + "epoch": 0.5904008438818565, + "grad_norm": 0.6642781496047974, + "learning_rate": 0.0015, + "loss": 1.6069, + "step": 5597 + }, + { + "epoch": 0.5905063291139241, + "grad_norm": 0.7683665752410889, + "learning_rate": 0.0015, + "loss": 1.6317, + "step": 5598 + }, + { + "epoch": 0.5906118143459915, + "grad_norm": 0.5938720107078552, + "learning_rate": 0.0015, + "loss": 1.6324, + "step": 5599 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.5939382910728455, + "learning_rate": 0.0015, + "loss": 1.641, + "step": 5600 + }, + { + "epoch": 0.5908227848101266, + "grad_norm": 0.6148390769958496, + "learning_rate": 0.0015, + "loss": 1.6301, + "step": 5601 + }, + { + "epoch": 0.5909282700421941, + "grad_norm": 0.5103157758712769, + "learning_rate": 0.0015, + "loss": 1.6503, + "step": 5602 + }, + { + "epoch": 0.5910337552742616, + "grad_norm": 0.6613872051239014, + "learning_rate": 0.0015, + "loss": 1.6367, + "step": 5603 + }, + { + "epoch": 0.5911392405063292, + "grad_norm": 0.7048614621162415, + "learning_rate": 0.0015, + "loss": 1.6078, + "step": 5604 + }, + { + "epoch": 0.5912447257383966, + "grad_norm": 0.5022804737091064, + "learning_rate": 0.0015, + "loss": 1.5921, + "step": 5605 + }, + { + "epoch": 0.5913502109704641, + "grad_norm": 0.7162870764732361, + "learning_rate": 0.0015, + "loss": 1.6307, + "step": 5606 + }, + { + "epoch": 0.5914556962025317, + "grad_norm": 0.5350881218910217, + "learning_rate": 0.0015, + "loss": 1.6294, + "step": 5607 + }, + { + "epoch": 0.5915611814345991, + "grad_norm": 0.5418235063552856, + "learning_rate": 0.0015, + "loss": 1.6208, + "step": 5608 + }, + { + "epoch": 0.5916666666666667, + "grad_norm": 0.5851300358772278, + "learning_rate": 0.0015, + "loss": 1.6147, + "step": 5609 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.577905535697937, + "learning_rate": 0.0015, + "loss": 1.6272, + "step": 5610 + }, + { + "epoch": 0.5918776371308017, + "grad_norm": 0.5166160464286804, + "learning_rate": 0.0015, + "loss": 1.6197, + "step": 5611 + }, + { + "epoch": 0.5919831223628692, + "grad_norm": 0.5385041832923889, + "learning_rate": 0.0015, + "loss": 1.6395, + "step": 5612 + }, + { + "epoch": 0.5920886075949368, + "grad_norm": 0.6943890452384949, + "learning_rate": 0.0015, + "loss": 1.6297, + "step": 5613 + }, + { + "epoch": 0.5921940928270042, + "grad_norm": 0.5998681783676147, + "learning_rate": 0.0015, + "loss": 1.6314, + "step": 5614 + }, + { + "epoch": 0.5922995780590717, + "grad_norm": 0.5115747451782227, + "learning_rate": 0.0015, + "loss": 1.6429, + "step": 5615 + }, + { + "epoch": 0.5924050632911393, + "grad_norm": 0.5083685517311096, + "learning_rate": 0.0015, + "loss": 1.6219, + "step": 5616 + }, + { + "epoch": 0.5925105485232067, + "grad_norm": 0.46947669982910156, + "learning_rate": 0.0015, + "loss": 1.6026, + "step": 5617 + }, + { + "epoch": 0.5926160337552743, + "grad_norm": 0.5864566564559937, + "learning_rate": 0.0015, + "loss": 1.6297, + "step": 5618 + }, + { + "epoch": 0.5927215189873418, + "grad_norm": 0.6747725605964661, + "learning_rate": 0.0015, + "loss": 1.6308, + "step": 5619 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.51981121301651, + "learning_rate": 0.0015, + "loss": 1.6377, + "step": 5620 + }, + { + "epoch": 0.5929324894514768, + "grad_norm": 0.47632312774658203, + "learning_rate": 0.0015, + "loss": 1.6284, + "step": 5621 + }, + { + "epoch": 0.5930379746835444, + "grad_norm": 0.5157601833343506, + "learning_rate": 0.0015, + "loss": 1.6444, + "step": 5622 + }, + { + "epoch": 0.5931434599156118, + "grad_norm": 0.5353105664253235, + "learning_rate": 0.0015, + "loss": 1.5813, + "step": 5623 + }, + { + "epoch": 0.5932489451476793, + "grad_norm": 0.5490435361862183, + "learning_rate": 0.0015, + "loss": 1.6462, + "step": 5624 + }, + { + "epoch": 0.5933544303797469, + "grad_norm": 0.4852970838546753, + "learning_rate": 0.0015, + "loss": 1.645, + "step": 5625 + }, + { + "epoch": 0.5934599156118143, + "grad_norm": 0.503825306892395, + "learning_rate": 0.0015, + "loss": 1.6216, + "step": 5626 + }, + { + "epoch": 0.5935654008438819, + "grad_norm": 0.5791821479797363, + "learning_rate": 0.0015, + "loss": 1.6472, + "step": 5627 + }, + { + "epoch": 0.5936708860759494, + "grad_norm": 0.5218534469604492, + "learning_rate": 0.0015, + "loss": 1.5899, + "step": 5628 + }, + { + "epoch": 0.5937763713080169, + "grad_norm": 0.6484091281890869, + "learning_rate": 0.0015, + "loss": 1.6264, + "step": 5629 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.6759604215621948, + "learning_rate": 0.0015, + "loss": 1.6077, + "step": 5630 + }, + { + "epoch": 0.5939873417721518, + "grad_norm": 0.4621976315975189, + "learning_rate": 0.0015, + "loss": 1.638, + "step": 5631 + }, + { + "epoch": 0.5940928270042194, + "grad_norm": 0.7069104313850403, + "learning_rate": 0.0015, + "loss": 1.6385, + "step": 5632 + }, + { + "epoch": 0.5941983122362869, + "grad_norm": 0.6323444247245789, + "learning_rate": 0.0015, + "loss": 1.6159, + "step": 5633 + }, + { + "epoch": 0.5943037974683544, + "grad_norm": 0.5433825850486755, + "learning_rate": 0.0015, + "loss": 1.6355, + "step": 5634 + }, + { + "epoch": 0.5944092827004219, + "grad_norm": 0.7824275493621826, + "learning_rate": 0.0015, + "loss": 1.6282, + "step": 5635 + }, + { + "epoch": 0.5945147679324895, + "grad_norm": 0.6480106711387634, + "learning_rate": 0.0015, + "loss": 1.6246, + "step": 5636 + }, + { + "epoch": 0.5946202531645569, + "grad_norm": 0.5131890177726746, + "learning_rate": 0.0015, + "loss": 1.6227, + "step": 5637 + }, + { + "epoch": 0.5947257383966245, + "grad_norm": 0.6877164840698242, + "learning_rate": 0.0015, + "loss": 1.6798, + "step": 5638 + }, + { + "epoch": 0.594831223628692, + "grad_norm": 0.5200788378715515, + "learning_rate": 0.0015, + "loss": 1.6289, + "step": 5639 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.5588069558143616, + "learning_rate": 0.0015, + "loss": 1.6494, + "step": 5640 + }, + { + "epoch": 0.595042194092827, + "grad_norm": 0.7360785007476807, + "learning_rate": 0.0015, + "loss": 1.6531, + "step": 5641 + }, + { + "epoch": 0.5951476793248945, + "grad_norm": 0.4863540828227997, + "learning_rate": 0.0015, + "loss": 1.6284, + "step": 5642 + }, + { + "epoch": 0.595253164556962, + "grad_norm": 0.6577567458152771, + "learning_rate": 0.0015, + "loss": 1.6265, + "step": 5643 + }, + { + "epoch": 0.5953586497890295, + "grad_norm": 0.5978899598121643, + "learning_rate": 0.0015, + "loss": 1.6343, + "step": 5644 + }, + { + "epoch": 0.5954641350210971, + "grad_norm": 0.4906193017959595, + "learning_rate": 0.0015, + "loss": 1.634, + "step": 5645 + }, + { + "epoch": 0.5955696202531645, + "grad_norm": 0.5887708067893982, + "learning_rate": 0.0015, + "loss": 1.6155, + "step": 5646 + }, + { + "epoch": 0.5956751054852321, + "grad_norm": 0.5471548438072205, + "learning_rate": 0.0015, + "loss": 1.632, + "step": 5647 + }, + { + "epoch": 0.5957805907172996, + "grad_norm": 0.5576574206352234, + "learning_rate": 0.0015, + "loss": 1.6248, + "step": 5648 + }, + { + "epoch": 0.595886075949367, + "grad_norm": 0.4525201618671417, + "learning_rate": 0.0015, + "loss": 1.64, + "step": 5649 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.579256534576416, + "learning_rate": 0.0015, + "loss": 1.616, + "step": 5650 + }, + { + "epoch": 0.5960970464135021, + "grad_norm": 0.4890976846218109, + "learning_rate": 0.0015, + "loss": 1.6213, + "step": 5651 + }, + { + "epoch": 0.5962025316455696, + "grad_norm": 0.4881046414375305, + "learning_rate": 0.0015, + "loss": 1.6122, + "step": 5652 + }, + { + "epoch": 0.5963080168776371, + "grad_norm": 0.5339732766151428, + "learning_rate": 0.0015, + "loss": 1.5761, + "step": 5653 + }, + { + "epoch": 0.5964135021097047, + "grad_norm": 0.48167338967323303, + "learning_rate": 0.0015, + "loss": 1.6168, + "step": 5654 + }, + { + "epoch": 0.5965189873417721, + "grad_norm": 0.6510812044143677, + "learning_rate": 0.0015, + "loss": 1.6351, + "step": 5655 + }, + { + "epoch": 0.5966244725738397, + "grad_norm": 0.6494855284690857, + "learning_rate": 0.0015, + "loss": 1.6396, + "step": 5656 + }, + { + "epoch": 0.5967299578059072, + "grad_norm": 0.5147927403450012, + "learning_rate": 0.0015, + "loss": 1.6353, + "step": 5657 + }, + { + "epoch": 0.5968354430379746, + "grad_norm": 0.6979304552078247, + "learning_rate": 0.0015, + "loss": 1.6597, + "step": 5658 + }, + { + "epoch": 0.5969409282700422, + "grad_norm": 0.5883269906044006, + "learning_rate": 0.0015, + "loss": 1.6104, + "step": 5659 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.4970814883708954, + "learning_rate": 0.0015, + "loss": 1.608, + "step": 5660 + }, + { + "epoch": 0.5971518987341772, + "grad_norm": 0.5940549969673157, + "learning_rate": 0.0015, + "loss": 1.6499, + "step": 5661 + }, + { + "epoch": 0.5972573839662447, + "grad_norm": 0.5467049479484558, + "learning_rate": 0.0015, + "loss": 1.592, + "step": 5662 + }, + { + "epoch": 0.5973628691983123, + "grad_norm": 0.4867047071456909, + "learning_rate": 0.0015, + "loss": 1.649, + "step": 5663 + }, + { + "epoch": 0.5974683544303797, + "grad_norm": 0.4736812710762024, + "learning_rate": 0.0015, + "loss": 1.6379, + "step": 5664 + }, + { + "epoch": 0.5975738396624473, + "grad_norm": 0.5190638899803162, + "learning_rate": 0.0015, + "loss": 1.6284, + "step": 5665 + }, + { + "epoch": 0.5976793248945148, + "grad_norm": 0.4780181050300598, + "learning_rate": 0.0015, + "loss": 1.5853, + "step": 5666 + }, + { + "epoch": 0.5977848101265822, + "grad_norm": 0.503572404384613, + "learning_rate": 0.0015, + "loss": 1.6551, + "step": 5667 + }, + { + "epoch": 0.5978902953586498, + "grad_norm": 0.5043089389801025, + "learning_rate": 0.0015, + "loss": 1.6613, + "step": 5668 + }, + { + "epoch": 0.5979957805907173, + "grad_norm": 0.5234181880950928, + "learning_rate": 0.0015, + "loss": 1.6303, + "step": 5669 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.519684910774231, + "learning_rate": 0.0015, + "loss": 1.5951, + "step": 5670 + }, + { + "epoch": 0.5982067510548523, + "grad_norm": 0.4918913245201111, + "learning_rate": 0.0015, + "loss": 1.6307, + "step": 5671 + }, + { + "epoch": 0.5983122362869199, + "grad_norm": 0.5128886103630066, + "learning_rate": 0.0015, + "loss": 1.6155, + "step": 5672 + }, + { + "epoch": 0.5984177215189873, + "grad_norm": 0.5375071167945862, + "learning_rate": 0.0015, + "loss": 1.6597, + "step": 5673 + }, + { + "epoch": 0.5985232067510549, + "grad_norm": 0.47754842042922974, + "learning_rate": 0.0015, + "loss": 1.646, + "step": 5674 + }, + { + "epoch": 0.5986286919831224, + "grad_norm": 0.6603662371635437, + "learning_rate": 0.0015, + "loss": 1.6129, + "step": 5675 + }, + { + "epoch": 0.5987341772151898, + "grad_norm": 0.7207350730895996, + "learning_rate": 0.0015, + "loss": 1.6491, + "step": 5676 + }, + { + "epoch": 0.5988396624472574, + "grad_norm": 0.5981911420822144, + "learning_rate": 0.0015, + "loss": 1.5973, + "step": 5677 + }, + { + "epoch": 0.5989451476793249, + "grad_norm": 0.7006922364234924, + "learning_rate": 0.0015, + "loss": 1.6631, + "step": 5678 + }, + { + "epoch": 0.5990506329113924, + "grad_norm": 0.7043522000312805, + "learning_rate": 0.0015, + "loss": 1.593, + "step": 5679 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.47904908657073975, + "learning_rate": 0.0015, + "loss": 1.6443, + "step": 5680 + }, + { + "epoch": 0.5992616033755275, + "grad_norm": 0.6863402128219604, + "learning_rate": 0.0015, + "loss": 1.6248, + "step": 5681 + }, + { + "epoch": 0.5993670886075949, + "grad_norm": 0.8411417603492737, + "learning_rate": 0.0015, + "loss": 1.6169, + "step": 5682 + }, + { + "epoch": 0.5994725738396625, + "grad_norm": 0.4999617040157318, + "learning_rate": 0.0015, + "loss": 1.6088, + "step": 5683 + }, + { + "epoch": 0.59957805907173, + "grad_norm": 0.61651611328125, + "learning_rate": 0.0015, + "loss": 1.5913, + "step": 5684 + }, + { + "epoch": 0.5996835443037974, + "grad_norm": 0.6154442429542542, + "learning_rate": 0.0015, + "loss": 1.609, + "step": 5685 + }, + { + "epoch": 0.599789029535865, + "grad_norm": 0.5308151841163635, + "learning_rate": 0.0015, + "loss": 1.6353, + "step": 5686 + }, + { + "epoch": 0.5998945147679325, + "grad_norm": 0.5686870217323303, + "learning_rate": 0.0015, + "loss": 1.6175, + "step": 5687 + }, + { + "epoch": 0.6, + "grad_norm": 0.5483143329620361, + "learning_rate": 0.0015, + "loss": 1.6121, + "step": 5688 + }, + { + "epoch": 0.6001054852320675, + "grad_norm": 0.73167884349823, + "learning_rate": 0.0015, + "loss": 1.5947, + "step": 5689 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.46591293811798096, + "learning_rate": 0.0015, + "loss": 1.6132, + "step": 5690 + }, + { + "epoch": 0.6003164556962025, + "grad_norm": 0.8627923727035522, + "learning_rate": 0.0015, + "loss": 1.6571, + "step": 5691 + }, + { + "epoch": 0.6004219409282701, + "grad_norm": 0.5825620889663696, + "learning_rate": 0.0015, + "loss": 1.5811, + "step": 5692 + }, + { + "epoch": 0.6005274261603376, + "grad_norm": 0.6405885815620422, + "learning_rate": 0.0015, + "loss": 1.624, + "step": 5693 + }, + { + "epoch": 0.600632911392405, + "grad_norm": 0.7344543933868408, + "learning_rate": 0.0015, + "loss": 1.6012, + "step": 5694 + }, + { + "epoch": 0.6007383966244726, + "grad_norm": 0.5269743800163269, + "learning_rate": 0.0015, + "loss": 1.5742, + "step": 5695 + }, + { + "epoch": 0.60084388185654, + "grad_norm": 0.6673415899276733, + "learning_rate": 0.0015, + "loss": 1.6384, + "step": 5696 + }, + { + "epoch": 0.6009493670886076, + "grad_norm": 0.6590900421142578, + "learning_rate": 0.0015, + "loss": 1.627, + "step": 5697 + }, + { + "epoch": 0.6010548523206751, + "grad_norm": 0.5728099942207336, + "learning_rate": 0.0015, + "loss": 1.6321, + "step": 5698 + }, + { + "epoch": 0.6011603375527426, + "grad_norm": 0.7162034511566162, + "learning_rate": 0.0015, + "loss": 1.6005, + "step": 5699 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.8475410342216492, + "learning_rate": 0.0015, + "loss": 1.6492, + "step": 5700 + }, + { + "epoch": 0.6013713080168777, + "grad_norm": 0.5202561616897583, + "learning_rate": 0.0015, + "loss": 1.5997, + "step": 5701 + }, + { + "epoch": 0.6014767932489451, + "grad_norm": 0.7218925356864929, + "learning_rate": 0.0015, + "loss": 1.6295, + "step": 5702 + }, + { + "epoch": 0.6015822784810126, + "grad_norm": 0.8395099639892578, + "learning_rate": 0.0015, + "loss": 1.6446, + "step": 5703 + }, + { + "epoch": 0.6016877637130802, + "grad_norm": 0.5557817220687866, + "learning_rate": 0.0015, + "loss": 1.6082, + "step": 5704 + }, + { + "epoch": 0.6017932489451476, + "grad_norm": 0.6226076483726501, + "learning_rate": 0.0015, + "loss": 1.6395, + "step": 5705 + }, + { + "epoch": 0.6018987341772152, + "grad_norm": 0.7292788028717041, + "learning_rate": 0.0015, + "loss": 1.5953, + "step": 5706 + }, + { + "epoch": 0.6020042194092827, + "grad_norm": 0.5449414253234863, + "learning_rate": 0.0015, + "loss": 1.5922, + "step": 5707 + }, + { + "epoch": 0.6021097046413502, + "grad_norm": 0.6932792663574219, + "learning_rate": 0.0015, + "loss": 1.6076, + "step": 5708 + }, + { + "epoch": 0.6022151898734177, + "grad_norm": 0.5870957374572754, + "learning_rate": 0.0015, + "loss": 1.6195, + "step": 5709 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.7367919087409973, + "learning_rate": 0.0015, + "loss": 1.6621, + "step": 5710 + }, + { + "epoch": 0.6024261603375527, + "grad_norm": 0.5735125541687012, + "learning_rate": 0.0015, + "loss": 1.6422, + "step": 5711 + }, + { + "epoch": 0.6025316455696202, + "grad_norm": 0.590947687625885, + "learning_rate": 0.0015, + "loss": 1.6146, + "step": 5712 + }, + { + "epoch": 0.6026371308016878, + "grad_norm": 0.515593945980072, + "learning_rate": 0.0015, + "loss": 1.6127, + "step": 5713 + }, + { + "epoch": 0.6027426160337552, + "grad_norm": 0.7016730308532715, + "learning_rate": 0.0015, + "loss": 1.6089, + "step": 5714 + }, + { + "epoch": 0.6028481012658228, + "grad_norm": 0.47112807631492615, + "learning_rate": 0.0015, + "loss": 1.5973, + "step": 5715 + }, + { + "epoch": 0.6029535864978903, + "grad_norm": 0.5997288227081299, + "learning_rate": 0.0015, + "loss": 1.653, + "step": 5716 + }, + { + "epoch": 0.6030590717299578, + "grad_norm": 0.5407869815826416, + "learning_rate": 0.0015, + "loss": 1.6433, + "step": 5717 + }, + { + "epoch": 0.6031645569620253, + "grad_norm": 0.527229905128479, + "learning_rate": 0.0015, + "loss": 1.6204, + "step": 5718 + }, + { + "epoch": 0.6032700421940929, + "grad_norm": 0.5542311072349548, + "learning_rate": 0.0015, + "loss": 1.618, + "step": 5719 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5649443864822388, + "learning_rate": 0.0015, + "loss": 1.6146, + "step": 5720 + }, + { + "epoch": 0.6034810126582278, + "grad_norm": 0.4718017280101776, + "learning_rate": 0.0015, + "loss": 1.6105, + "step": 5721 + }, + { + "epoch": 0.6035864978902954, + "grad_norm": 0.5023795366287231, + "learning_rate": 0.0015, + "loss": 1.6215, + "step": 5722 + }, + { + "epoch": 0.6036919831223628, + "grad_norm": 0.5101316571235657, + "learning_rate": 0.0015, + "loss": 1.6153, + "step": 5723 + }, + { + "epoch": 0.6037974683544304, + "grad_norm": 0.4487985372543335, + "learning_rate": 0.0015, + "loss": 1.6342, + "step": 5724 + }, + { + "epoch": 0.6039029535864979, + "grad_norm": 0.5671184062957764, + "learning_rate": 0.0015, + "loss": 1.6086, + "step": 5725 + }, + { + "epoch": 0.6040084388185654, + "grad_norm": 0.538108229637146, + "learning_rate": 0.0015, + "loss": 1.6256, + "step": 5726 + }, + { + "epoch": 0.6041139240506329, + "grad_norm": 0.5328302979469299, + "learning_rate": 0.0015, + "loss": 1.6043, + "step": 5727 + }, + { + "epoch": 0.6042194092827005, + "grad_norm": 0.6336845755577087, + "learning_rate": 0.0015, + "loss": 1.6115, + "step": 5728 + }, + { + "epoch": 0.6043248945147679, + "grad_norm": 0.6453084349632263, + "learning_rate": 0.0015, + "loss": 1.6251, + "step": 5729 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.5204219818115234, + "learning_rate": 0.0015, + "loss": 1.6296, + "step": 5730 + }, + { + "epoch": 0.604535864978903, + "grad_norm": 0.4975261390209198, + "learning_rate": 0.0015, + "loss": 1.6248, + "step": 5731 + }, + { + "epoch": 0.6046413502109704, + "grad_norm": 0.6070615649223328, + "learning_rate": 0.0015, + "loss": 1.6001, + "step": 5732 + }, + { + "epoch": 0.604746835443038, + "grad_norm": 0.47148597240448, + "learning_rate": 0.0015, + "loss": 1.6211, + "step": 5733 + }, + { + "epoch": 0.6048523206751055, + "grad_norm": 0.7071391940116882, + "learning_rate": 0.0015, + "loss": 1.603, + "step": 5734 + }, + { + "epoch": 0.604957805907173, + "grad_norm": 0.6131590604782104, + "learning_rate": 0.0015, + "loss": 1.6079, + "step": 5735 + }, + { + "epoch": 0.6050632911392405, + "grad_norm": 0.5092592835426331, + "learning_rate": 0.0015, + "loss": 1.6459, + "step": 5736 + }, + { + "epoch": 0.6051687763713081, + "grad_norm": 0.667983889579773, + "learning_rate": 0.0015, + "loss": 1.6173, + "step": 5737 + }, + { + "epoch": 0.6052742616033755, + "grad_norm": 0.5349023342132568, + "learning_rate": 0.0015, + "loss": 1.6159, + "step": 5738 + }, + { + "epoch": 0.605379746835443, + "grad_norm": 0.5172088742256165, + "learning_rate": 0.0015, + "loss": 1.6661, + "step": 5739 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.5283232927322388, + "learning_rate": 0.0015, + "loss": 1.6037, + "step": 5740 + }, + { + "epoch": 0.605590717299578, + "grad_norm": 0.47503188252449036, + "learning_rate": 0.0015, + "loss": 1.5955, + "step": 5741 + }, + { + "epoch": 0.6056962025316456, + "grad_norm": 0.5385279655456543, + "learning_rate": 0.0015, + "loss": 1.6232, + "step": 5742 + }, + { + "epoch": 0.6058016877637131, + "grad_norm": 0.5128896832466125, + "learning_rate": 0.0015, + "loss": 1.5765, + "step": 5743 + }, + { + "epoch": 0.6059071729957806, + "grad_norm": 0.5275681614875793, + "learning_rate": 0.0015, + "loss": 1.592, + "step": 5744 + }, + { + "epoch": 0.6060126582278481, + "grad_norm": 0.5630440711975098, + "learning_rate": 0.0015, + "loss": 1.5831, + "step": 5745 + }, + { + "epoch": 0.6061181434599157, + "grad_norm": 0.5439804792404175, + "learning_rate": 0.0015, + "loss": 1.6014, + "step": 5746 + }, + { + "epoch": 0.6062236286919831, + "grad_norm": 0.641860842704773, + "learning_rate": 0.0015, + "loss": 1.5944, + "step": 5747 + }, + { + "epoch": 0.6063291139240506, + "grad_norm": 0.724949836730957, + "learning_rate": 0.0015, + "loss": 1.6154, + "step": 5748 + }, + { + "epoch": 0.6064345991561182, + "grad_norm": 0.5219384431838989, + "learning_rate": 0.0015, + "loss": 1.6187, + "step": 5749 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.9701561331748962, + "learning_rate": 0.0015, + "loss": 1.5626, + "step": 5750 + }, + { + "epoch": 0.6066455696202532, + "grad_norm": 0.7944713830947876, + "learning_rate": 0.0015, + "loss": 1.5801, + "step": 5751 + }, + { + "epoch": 0.6067510548523207, + "grad_norm": 0.5765522718429565, + "learning_rate": 0.0015, + "loss": 1.5814, + "step": 5752 + }, + { + "epoch": 0.6068565400843882, + "grad_norm": 0.655381441116333, + "learning_rate": 0.0015, + "loss": 1.6227, + "step": 5753 + }, + { + "epoch": 0.6069620253164557, + "grad_norm": 0.6935734748840332, + "learning_rate": 0.0015, + "loss": 1.6107, + "step": 5754 + }, + { + "epoch": 0.6070675105485233, + "grad_norm": 0.5127397775650024, + "learning_rate": 0.0015, + "loss": 1.6313, + "step": 5755 + }, + { + "epoch": 0.6071729957805907, + "grad_norm": 0.6720975637435913, + "learning_rate": 0.0015, + "loss": 1.5996, + "step": 5756 + }, + { + "epoch": 0.6072784810126582, + "grad_norm": 0.7431419491767883, + "learning_rate": 0.0015, + "loss": 1.5775, + "step": 5757 + }, + { + "epoch": 0.6073839662447258, + "grad_norm": 0.47397467494010925, + "learning_rate": 0.0015, + "loss": 1.6319, + "step": 5758 + }, + { + "epoch": 0.6074894514767932, + "grad_norm": 0.5727673172950745, + "learning_rate": 0.0015, + "loss": 1.6192, + "step": 5759 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.5239375829696655, + "learning_rate": 0.0015, + "loss": 1.6203, + "step": 5760 + }, + { + "epoch": 0.6077004219409282, + "grad_norm": 0.5244763493537903, + "learning_rate": 0.0015, + "loss": 1.5693, + "step": 5761 + }, + { + "epoch": 0.6078059071729958, + "grad_norm": 0.5762230157852173, + "learning_rate": 0.0015, + "loss": 1.6229, + "step": 5762 + }, + { + "epoch": 0.6079113924050633, + "grad_norm": 0.5437536239624023, + "learning_rate": 0.0015, + "loss": 1.5972, + "step": 5763 + }, + { + "epoch": 0.6080168776371307, + "grad_norm": 0.5512168407440186, + "learning_rate": 0.0015, + "loss": 1.6059, + "step": 5764 + }, + { + "epoch": 0.6081223628691983, + "grad_norm": 0.7002081871032715, + "learning_rate": 0.0015, + "loss": 1.604, + "step": 5765 + }, + { + "epoch": 0.6082278481012658, + "grad_norm": 0.45743414759635925, + "learning_rate": 0.0015, + "loss": 1.6318, + "step": 5766 + }, + { + "epoch": 0.6083333333333333, + "grad_norm": 0.6875436902046204, + "learning_rate": 0.0015, + "loss": 1.6227, + "step": 5767 + }, + { + "epoch": 0.6084388185654008, + "grad_norm": 0.6525773406028748, + "learning_rate": 0.0015, + "loss": 1.5984, + "step": 5768 + }, + { + "epoch": 0.6085443037974684, + "grad_norm": 0.7001569867134094, + "learning_rate": 0.0015, + "loss": 1.6079, + "step": 5769 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.6135374903678894, + "learning_rate": 0.0015, + "loss": 1.6293, + "step": 5770 + }, + { + "epoch": 0.6087552742616034, + "grad_norm": 0.9716602563858032, + "learning_rate": 0.0015, + "loss": 1.5995, + "step": 5771 + }, + { + "epoch": 0.6088607594936709, + "grad_norm": 0.8283466696739197, + "learning_rate": 0.0015, + "loss": 1.6205, + "step": 5772 + }, + { + "epoch": 0.6089662447257383, + "grad_norm": 0.5932331085205078, + "learning_rate": 0.0015, + "loss": 1.6275, + "step": 5773 + }, + { + "epoch": 0.6090717299578059, + "grad_norm": 0.8104361295700073, + "learning_rate": 0.0015, + "loss": 1.6086, + "step": 5774 + }, + { + "epoch": 0.6091772151898734, + "grad_norm": 0.5436210632324219, + "learning_rate": 0.0015, + "loss": 1.6024, + "step": 5775 + }, + { + "epoch": 0.6092827004219409, + "grad_norm": 0.7614966034889221, + "learning_rate": 0.0015, + "loss": 1.6373, + "step": 5776 + }, + { + "epoch": 0.6093881856540084, + "grad_norm": 0.6954220533370972, + "learning_rate": 0.0015, + "loss": 1.6088, + "step": 5777 + }, + { + "epoch": 0.609493670886076, + "grad_norm": 0.557464063167572, + "learning_rate": 0.0015, + "loss": 1.634, + "step": 5778 + }, + { + "epoch": 0.6095991561181434, + "grad_norm": 0.5181831121444702, + "learning_rate": 0.0015, + "loss": 1.614, + "step": 5779 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.544055700302124, + "learning_rate": 0.0015, + "loss": 1.6268, + "step": 5780 + }, + { + "epoch": 0.6098101265822785, + "grad_norm": 0.4917396008968353, + "learning_rate": 0.0015, + "loss": 1.6202, + "step": 5781 + }, + { + "epoch": 0.609915611814346, + "grad_norm": 0.5461342930793762, + "learning_rate": 0.0015, + "loss": 1.6204, + "step": 5782 + }, + { + "epoch": 0.6100210970464135, + "grad_norm": 0.5721676349639893, + "learning_rate": 0.0015, + "loss": 1.6446, + "step": 5783 + }, + { + "epoch": 0.610126582278481, + "grad_norm": 0.5346076488494873, + "learning_rate": 0.0015, + "loss": 1.6545, + "step": 5784 + }, + { + "epoch": 0.6102320675105485, + "grad_norm": 0.5412622690200806, + "learning_rate": 0.0015, + "loss": 1.6004, + "step": 5785 + }, + { + "epoch": 0.610337552742616, + "grad_norm": 0.5639292001724243, + "learning_rate": 0.0015, + "loss": 1.588, + "step": 5786 + }, + { + "epoch": 0.6104430379746836, + "grad_norm": 0.4506779611110687, + "learning_rate": 0.0015, + "loss": 1.5823, + "step": 5787 + }, + { + "epoch": 0.610548523206751, + "grad_norm": 0.6533520221710205, + "learning_rate": 0.0015, + "loss": 1.6031, + "step": 5788 + }, + { + "epoch": 0.6106540084388186, + "grad_norm": 0.6267601251602173, + "learning_rate": 0.0015, + "loss": 1.6281, + "step": 5789 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.446891725063324, + "learning_rate": 0.0015, + "loss": 1.5948, + "step": 5790 + }, + { + "epoch": 0.6108649789029535, + "grad_norm": 0.6829448938369751, + "learning_rate": 0.0015, + "loss": 1.6464, + "step": 5791 + }, + { + "epoch": 0.6109704641350211, + "grad_norm": 0.6592504382133484, + "learning_rate": 0.0015, + "loss": 1.6136, + "step": 5792 + }, + { + "epoch": 0.6110759493670886, + "grad_norm": 0.5578280687332153, + "learning_rate": 0.0015, + "loss": 1.6358, + "step": 5793 + }, + { + "epoch": 0.6111814345991561, + "grad_norm": 0.5601876378059387, + "learning_rate": 0.0015, + "loss": 1.6245, + "step": 5794 + }, + { + "epoch": 0.6112869198312236, + "grad_norm": 0.5430405735969543, + "learning_rate": 0.0015, + "loss": 1.6, + "step": 5795 + }, + { + "epoch": 0.6113924050632912, + "grad_norm": 0.5691314935684204, + "learning_rate": 0.0015, + "loss": 1.6085, + "step": 5796 + }, + { + "epoch": 0.6114978902953586, + "grad_norm": 0.7052751183509827, + "learning_rate": 0.0015, + "loss": 1.6144, + "step": 5797 + }, + { + "epoch": 0.6116033755274262, + "grad_norm": 0.6696596145629883, + "learning_rate": 0.0015, + "loss": 1.5813, + "step": 5798 + }, + { + "epoch": 0.6117088607594937, + "grad_norm": 0.49856799840927124, + "learning_rate": 0.0015, + "loss": 1.6123, + "step": 5799 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.602507472038269, + "learning_rate": 0.0015, + "loss": 1.625, + "step": 5800 + }, + { + "epoch": 0.6119198312236287, + "grad_norm": 0.5198401808738708, + "learning_rate": 0.0015, + "loss": 1.623, + "step": 5801 + }, + { + "epoch": 0.6120253164556962, + "grad_norm": 0.49069100618362427, + "learning_rate": 0.0015, + "loss": 1.6083, + "step": 5802 + }, + { + "epoch": 0.6121308016877637, + "grad_norm": 0.645224392414093, + "learning_rate": 0.0015, + "loss": 1.5971, + "step": 5803 + }, + { + "epoch": 0.6122362869198312, + "grad_norm": 0.4484640657901764, + "learning_rate": 0.0015, + "loss": 1.618, + "step": 5804 + }, + { + "epoch": 0.6123417721518988, + "grad_norm": 0.47885531187057495, + "learning_rate": 0.0015, + "loss": 1.5912, + "step": 5805 + }, + { + "epoch": 0.6124472573839662, + "grad_norm": 0.49233120679855347, + "learning_rate": 0.0015, + "loss": 1.6376, + "step": 5806 + }, + { + "epoch": 0.6125527426160338, + "grad_norm": 0.47025009989738464, + "learning_rate": 0.0015, + "loss": 1.632, + "step": 5807 + }, + { + "epoch": 0.6126582278481013, + "grad_norm": 0.5202906131744385, + "learning_rate": 0.0015, + "loss": 1.6325, + "step": 5808 + }, + { + "epoch": 0.6127637130801687, + "grad_norm": 0.55734783411026, + "learning_rate": 0.0015, + "loss": 1.6377, + "step": 5809 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.5348408818244934, + "learning_rate": 0.0015, + "loss": 1.6151, + "step": 5810 + }, + { + "epoch": 0.6129746835443038, + "grad_norm": 0.5444215536117554, + "learning_rate": 0.0015, + "loss": 1.6495, + "step": 5811 + }, + { + "epoch": 0.6130801687763713, + "grad_norm": 0.5886389017105103, + "learning_rate": 0.0015, + "loss": 1.6202, + "step": 5812 + }, + { + "epoch": 0.6131856540084388, + "grad_norm": 0.49230071902275085, + "learning_rate": 0.0015, + "loss": 1.6004, + "step": 5813 + }, + { + "epoch": 0.6132911392405064, + "grad_norm": 0.5946803092956543, + "learning_rate": 0.0015, + "loss": 1.6136, + "step": 5814 + }, + { + "epoch": 0.6133966244725738, + "grad_norm": 0.5703949928283691, + "learning_rate": 0.0015, + "loss": 1.6559, + "step": 5815 + }, + { + "epoch": 0.6135021097046414, + "grad_norm": 0.5334026217460632, + "learning_rate": 0.0015, + "loss": 1.6435, + "step": 5816 + }, + { + "epoch": 0.6136075949367089, + "grad_norm": 0.5981934666633606, + "learning_rate": 0.0015, + "loss": 1.6332, + "step": 5817 + }, + { + "epoch": 0.6137130801687763, + "grad_norm": 0.5421572327613831, + "learning_rate": 0.0015, + "loss": 1.6466, + "step": 5818 + }, + { + "epoch": 0.6138185654008439, + "grad_norm": 0.4682169258594513, + "learning_rate": 0.0015, + "loss": 1.6214, + "step": 5819 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.5654427409172058, + "learning_rate": 0.0015, + "loss": 1.634, + "step": 5820 + }, + { + "epoch": 0.6140295358649789, + "grad_norm": 0.5109453201293945, + "learning_rate": 0.0015, + "loss": 1.6458, + "step": 5821 + }, + { + "epoch": 0.6141350210970464, + "grad_norm": 0.5720937848091125, + "learning_rate": 0.0015, + "loss": 1.6071, + "step": 5822 + }, + { + "epoch": 0.614240506329114, + "grad_norm": 0.5675110220909119, + "learning_rate": 0.0015, + "loss": 1.6038, + "step": 5823 + }, + { + "epoch": 0.6143459915611814, + "grad_norm": 0.4660292863845825, + "learning_rate": 0.0015, + "loss": 1.6226, + "step": 5824 + }, + { + "epoch": 0.614451476793249, + "grad_norm": 0.5449029207229614, + "learning_rate": 0.0015, + "loss": 1.6177, + "step": 5825 + }, + { + "epoch": 0.6145569620253165, + "grad_norm": 0.462939590215683, + "learning_rate": 0.0015, + "loss": 1.6128, + "step": 5826 + }, + { + "epoch": 0.614662447257384, + "grad_norm": 0.575222909450531, + "learning_rate": 0.0015, + "loss": 1.6242, + "step": 5827 + }, + { + "epoch": 0.6147679324894515, + "grad_norm": 0.5540435910224915, + "learning_rate": 0.0015, + "loss": 1.5909, + "step": 5828 + }, + { + "epoch": 0.6148734177215189, + "grad_norm": 0.45478394627571106, + "learning_rate": 0.0015, + "loss": 1.6245, + "step": 5829 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.5058201551437378, + "learning_rate": 0.0015, + "loss": 1.5887, + "step": 5830 + }, + { + "epoch": 0.615084388185654, + "grad_norm": 0.6100703477859497, + "learning_rate": 0.0015, + "loss": 1.6205, + "step": 5831 + }, + { + "epoch": 0.6151898734177215, + "grad_norm": 0.4519583284854889, + "learning_rate": 0.0015, + "loss": 1.6471, + "step": 5832 + }, + { + "epoch": 0.615295358649789, + "grad_norm": 0.5411374568939209, + "learning_rate": 0.0015, + "loss": 1.5915, + "step": 5833 + }, + { + "epoch": 0.6154008438818566, + "grad_norm": 0.44370853900909424, + "learning_rate": 0.0015, + "loss": 1.6349, + "step": 5834 + }, + { + "epoch": 0.615506329113924, + "grad_norm": 0.5247477889060974, + "learning_rate": 0.0015, + "loss": 1.6121, + "step": 5835 + }, + { + "epoch": 0.6156118143459915, + "grad_norm": 0.5240331292152405, + "learning_rate": 0.0015, + "loss": 1.6613, + "step": 5836 + }, + { + "epoch": 0.6157172995780591, + "grad_norm": 0.4902103841304779, + "learning_rate": 0.0015, + "loss": 1.5931, + "step": 5837 + }, + { + "epoch": 0.6158227848101265, + "grad_norm": 0.6526527404785156, + "learning_rate": 0.0015, + "loss": 1.6518, + "step": 5838 + }, + { + "epoch": 0.6159282700421941, + "grad_norm": 0.4508664011955261, + "learning_rate": 0.0015, + "loss": 1.6285, + "step": 5839 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.6527950167655945, + "learning_rate": 0.0015, + "loss": 1.6554, + "step": 5840 + }, + { + "epoch": 0.6161392405063291, + "grad_norm": 0.6767658591270447, + "learning_rate": 0.0015, + "loss": 1.6492, + "step": 5841 + }, + { + "epoch": 0.6162447257383966, + "grad_norm": 0.49983566999435425, + "learning_rate": 0.0015, + "loss": 1.6543, + "step": 5842 + }, + { + "epoch": 0.6163502109704642, + "grad_norm": 0.6607310175895691, + "learning_rate": 0.0015, + "loss": 1.5831, + "step": 5843 + }, + { + "epoch": 0.6164556962025316, + "grad_norm": 0.679019570350647, + "learning_rate": 0.0015, + "loss": 1.6287, + "step": 5844 + }, + { + "epoch": 0.6165611814345991, + "grad_norm": 0.5855091214179993, + "learning_rate": 0.0015, + "loss": 1.6463, + "step": 5845 + }, + { + "epoch": 0.6166666666666667, + "grad_norm": 0.4927886426448822, + "learning_rate": 0.0015, + "loss": 1.6072, + "step": 5846 + }, + { + "epoch": 0.6167721518987341, + "grad_norm": 0.5097840428352356, + "learning_rate": 0.0015, + "loss": 1.6259, + "step": 5847 + }, + { + "epoch": 0.6168776371308017, + "grad_norm": 0.4735028147697449, + "learning_rate": 0.0015, + "loss": 1.6517, + "step": 5848 + }, + { + "epoch": 0.6169831223628692, + "grad_norm": 0.5716160535812378, + "learning_rate": 0.0015, + "loss": 1.6735, + "step": 5849 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.5068883299827576, + "learning_rate": 0.0015, + "loss": 1.6179, + "step": 5850 + }, + { + "epoch": 0.6171940928270042, + "grad_norm": 0.6885242462158203, + "learning_rate": 0.0015, + "loss": 1.6302, + "step": 5851 + }, + { + "epoch": 0.6172995780590718, + "grad_norm": 0.5677760243415833, + "learning_rate": 0.0015, + "loss": 1.6092, + "step": 5852 + }, + { + "epoch": 0.6174050632911392, + "grad_norm": 0.5608734488487244, + "learning_rate": 0.0015, + "loss": 1.6143, + "step": 5853 + }, + { + "epoch": 0.6175105485232067, + "grad_norm": 0.5561981797218323, + "learning_rate": 0.0015, + "loss": 1.5826, + "step": 5854 + }, + { + "epoch": 0.6176160337552743, + "grad_norm": 0.6651297211647034, + "learning_rate": 0.0015, + "loss": 1.6783, + "step": 5855 + }, + { + "epoch": 0.6177215189873417, + "grad_norm": 0.5847839117050171, + "learning_rate": 0.0015, + "loss": 1.6284, + "step": 5856 + }, + { + "epoch": 0.6178270042194093, + "grad_norm": 0.7147279381752014, + "learning_rate": 0.0015, + "loss": 1.597, + "step": 5857 + }, + { + "epoch": 0.6179324894514768, + "grad_norm": 0.6902298927307129, + "learning_rate": 0.0015, + "loss": 1.6324, + "step": 5858 + }, + { + "epoch": 0.6180379746835443, + "grad_norm": 0.6074957847595215, + "learning_rate": 0.0015, + "loss": 1.6391, + "step": 5859 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.6798166632652283, + "learning_rate": 0.0015, + "loss": 1.6323, + "step": 5860 + }, + { + "epoch": 0.6182489451476794, + "grad_norm": 0.6154612302780151, + "learning_rate": 0.0015, + "loss": 1.6487, + "step": 5861 + }, + { + "epoch": 0.6183544303797468, + "grad_norm": 0.6916834115982056, + "learning_rate": 0.0015, + "loss": 1.6737, + "step": 5862 + }, + { + "epoch": 0.6184599156118143, + "grad_norm": 0.9251354336738586, + "learning_rate": 0.0015, + "loss": 1.5949, + "step": 5863 + }, + { + "epoch": 0.6185654008438819, + "grad_norm": 0.7731589674949646, + "learning_rate": 0.0015, + "loss": 1.5898, + "step": 5864 + }, + { + "epoch": 0.6186708860759493, + "grad_norm": 0.5317090749740601, + "learning_rate": 0.0015, + "loss": 1.6159, + "step": 5865 + }, + { + "epoch": 0.6187763713080169, + "grad_norm": 0.7302789092063904, + "learning_rate": 0.0015, + "loss": 1.6303, + "step": 5866 + }, + { + "epoch": 0.6188818565400844, + "grad_norm": 0.6556691527366638, + "learning_rate": 0.0015, + "loss": 1.5668, + "step": 5867 + }, + { + "epoch": 0.6189873417721519, + "grad_norm": 0.5228979587554932, + "learning_rate": 0.0015, + "loss": 1.5929, + "step": 5868 + }, + { + "epoch": 0.6190928270042194, + "grad_norm": 0.6936362981796265, + "learning_rate": 0.0015, + "loss": 1.6343, + "step": 5869 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5519838929176331, + "learning_rate": 0.0015, + "loss": 1.6058, + "step": 5870 + }, + { + "epoch": 0.6193037974683544, + "grad_norm": 0.5528889298439026, + "learning_rate": 0.0015, + "loss": 1.6553, + "step": 5871 + }, + { + "epoch": 0.619409282700422, + "grad_norm": 0.5852952003479004, + "learning_rate": 0.0015, + "loss": 1.6176, + "step": 5872 + }, + { + "epoch": 0.6195147679324895, + "grad_norm": 0.5672625303268433, + "learning_rate": 0.0015, + "loss": 1.6019, + "step": 5873 + }, + { + "epoch": 0.6196202531645569, + "grad_norm": 0.5782454013824463, + "learning_rate": 0.0015, + "loss": 1.6073, + "step": 5874 + }, + { + "epoch": 0.6197257383966245, + "grad_norm": 0.6658385992050171, + "learning_rate": 0.0015, + "loss": 1.6785, + "step": 5875 + }, + { + "epoch": 0.619831223628692, + "grad_norm": 0.5504497289657593, + "learning_rate": 0.0015, + "loss": 1.5839, + "step": 5876 + }, + { + "epoch": 0.6199367088607595, + "grad_norm": 0.6169859170913696, + "learning_rate": 0.0015, + "loss": 1.6356, + "step": 5877 + }, + { + "epoch": 0.620042194092827, + "grad_norm": 0.7992807030677795, + "learning_rate": 0.0015, + "loss": 1.6373, + "step": 5878 + }, + { + "epoch": 0.6201476793248946, + "grad_norm": 0.5499757528305054, + "learning_rate": 0.0015, + "loss": 1.6084, + "step": 5879 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.6638899445533752, + "learning_rate": 0.0015, + "loss": 1.618, + "step": 5880 + }, + { + "epoch": 0.6203586497890295, + "grad_norm": 0.7057284116744995, + "learning_rate": 0.0015, + "loss": 1.61, + "step": 5881 + }, + { + "epoch": 0.6204641350210971, + "grad_norm": 0.5079201459884644, + "learning_rate": 0.0015, + "loss": 1.6263, + "step": 5882 + }, + { + "epoch": 0.6205696202531645, + "grad_norm": 0.6595799922943115, + "learning_rate": 0.0015, + "loss": 1.5922, + "step": 5883 + }, + { + "epoch": 0.6206751054852321, + "grad_norm": 0.5134573578834534, + "learning_rate": 0.0015, + "loss": 1.588, + "step": 5884 + }, + { + "epoch": 0.6207805907172996, + "grad_norm": 0.6489550471305847, + "learning_rate": 0.0015, + "loss": 1.5965, + "step": 5885 + }, + { + "epoch": 0.6208860759493671, + "grad_norm": 0.5748170614242554, + "learning_rate": 0.0015, + "loss": 1.6496, + "step": 5886 + }, + { + "epoch": 0.6209915611814346, + "grad_norm": 0.5478618144989014, + "learning_rate": 0.0015, + "loss": 1.6332, + "step": 5887 + }, + { + "epoch": 0.6210970464135022, + "grad_norm": 0.5693939328193665, + "learning_rate": 0.0015, + "loss": 1.5937, + "step": 5888 + }, + { + "epoch": 0.6212025316455696, + "grad_norm": 0.5580261945724487, + "learning_rate": 0.0015, + "loss": 1.6273, + "step": 5889 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.6235843300819397, + "learning_rate": 0.0015, + "loss": 1.6454, + "step": 5890 + }, + { + "epoch": 0.6214135021097047, + "grad_norm": 0.6731160879135132, + "learning_rate": 0.0015, + "loss": 1.5879, + "step": 5891 + }, + { + "epoch": 0.6215189873417721, + "grad_norm": 0.47634485363960266, + "learning_rate": 0.0015, + "loss": 1.6291, + "step": 5892 + }, + { + "epoch": 0.6216244725738397, + "grad_norm": 0.6869895458221436, + "learning_rate": 0.0015, + "loss": 1.5821, + "step": 5893 + }, + { + "epoch": 0.6217299578059071, + "grad_norm": 0.5553609132766724, + "learning_rate": 0.0015, + "loss": 1.5941, + "step": 5894 + }, + { + "epoch": 0.6218354430379747, + "grad_norm": 0.6233806610107422, + "learning_rate": 0.0015, + "loss": 1.6253, + "step": 5895 + }, + { + "epoch": 0.6219409282700422, + "grad_norm": 0.7386407852172852, + "learning_rate": 0.0015, + "loss": 1.6253, + "step": 5896 + }, + { + "epoch": 0.6220464135021097, + "grad_norm": 0.6310761570930481, + "learning_rate": 0.0015, + "loss": 1.6501, + "step": 5897 + }, + { + "epoch": 0.6221518987341772, + "grad_norm": 0.5247330665588379, + "learning_rate": 0.0015, + "loss": 1.6417, + "step": 5898 + }, + { + "epoch": 0.6222573839662447, + "grad_norm": 0.644690215587616, + "learning_rate": 0.0015, + "loss": 1.5999, + "step": 5899 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.4776199758052826, + "learning_rate": 0.0015, + "loss": 1.6332, + "step": 5900 + }, + { + "epoch": 0.6224683544303797, + "grad_norm": 0.5957555770874023, + "learning_rate": 0.0015, + "loss": 1.6078, + "step": 5901 + }, + { + "epoch": 0.6225738396624473, + "grad_norm": 0.6283895373344421, + "learning_rate": 0.0015, + "loss": 1.6617, + "step": 5902 + }, + { + "epoch": 0.6226793248945147, + "grad_norm": 0.6419785618782043, + "learning_rate": 0.0015, + "loss": 1.5818, + "step": 5903 + }, + { + "epoch": 0.6227848101265823, + "grad_norm": 0.5279337763786316, + "learning_rate": 0.0015, + "loss": 1.6173, + "step": 5904 + }, + { + "epoch": 0.6228902953586498, + "grad_norm": 0.7001970410346985, + "learning_rate": 0.0015, + "loss": 1.5934, + "step": 5905 + }, + { + "epoch": 0.6229957805907173, + "grad_norm": 0.5482293367385864, + "learning_rate": 0.0015, + "loss": 1.6168, + "step": 5906 + }, + { + "epoch": 0.6231012658227848, + "grad_norm": 0.5112176537513733, + "learning_rate": 0.0015, + "loss": 1.5851, + "step": 5907 + }, + { + "epoch": 0.6232067510548523, + "grad_norm": 0.5309563279151917, + "learning_rate": 0.0015, + "loss": 1.6221, + "step": 5908 + }, + { + "epoch": 0.6233122362869198, + "grad_norm": 0.4815877377986908, + "learning_rate": 0.0015, + "loss": 1.621, + "step": 5909 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.5217291712760925, + "learning_rate": 0.0015, + "loss": 1.6123, + "step": 5910 + }, + { + "epoch": 0.6235232067510549, + "grad_norm": 0.503728985786438, + "learning_rate": 0.0015, + "loss": 1.5732, + "step": 5911 + }, + { + "epoch": 0.6236286919831223, + "grad_norm": 0.5668125152587891, + "learning_rate": 0.0015, + "loss": 1.6498, + "step": 5912 + }, + { + "epoch": 0.6237341772151899, + "grad_norm": 0.5331942439079285, + "learning_rate": 0.0015, + "loss": 1.5858, + "step": 5913 + }, + { + "epoch": 0.6238396624472574, + "grad_norm": 0.6289189457893372, + "learning_rate": 0.0015, + "loss": 1.5957, + "step": 5914 + }, + { + "epoch": 0.6239451476793249, + "grad_norm": 0.5665965676307678, + "learning_rate": 0.0015, + "loss": 1.5923, + "step": 5915 + }, + { + "epoch": 0.6240506329113924, + "grad_norm": 0.5158137083053589, + "learning_rate": 0.0015, + "loss": 1.6104, + "step": 5916 + }, + { + "epoch": 0.62415611814346, + "grad_norm": 0.5021527409553528, + "learning_rate": 0.0015, + "loss": 1.6349, + "step": 5917 + }, + { + "epoch": 0.6242616033755274, + "grad_norm": 0.5725732445716858, + "learning_rate": 0.0015, + "loss": 1.6131, + "step": 5918 + }, + { + "epoch": 0.6243670886075949, + "grad_norm": 0.7081011533737183, + "learning_rate": 0.0015, + "loss": 1.6149, + "step": 5919 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.5116013884544373, + "learning_rate": 0.0015, + "loss": 1.6051, + "step": 5920 + }, + { + "epoch": 0.6245780590717299, + "grad_norm": 0.6848795413970947, + "learning_rate": 0.0015, + "loss": 1.6293, + "step": 5921 + }, + { + "epoch": 0.6246835443037975, + "grad_norm": 0.7860782742500305, + "learning_rate": 0.0015, + "loss": 1.5985, + "step": 5922 + }, + { + "epoch": 0.624789029535865, + "grad_norm": 0.44655081629753113, + "learning_rate": 0.0015, + "loss": 1.625, + "step": 5923 + }, + { + "epoch": 0.6248945147679325, + "grad_norm": 0.7083460688591003, + "learning_rate": 0.0015, + "loss": 1.6344, + "step": 5924 + }, + { + "epoch": 0.625, + "grad_norm": 0.7011964917182922, + "learning_rate": 0.0015, + "loss": 1.633, + "step": 5925 + }, + { + "epoch": 0.6251054852320675, + "grad_norm": 0.5411670804023743, + "learning_rate": 0.0015, + "loss": 1.5601, + "step": 5926 + }, + { + "epoch": 0.625210970464135, + "grad_norm": 0.8851147294044495, + "learning_rate": 0.0015, + "loss": 1.6247, + "step": 5927 + }, + { + "epoch": 0.6253164556962025, + "grad_norm": 0.8171315789222717, + "learning_rate": 0.0015, + "loss": 1.6091, + "step": 5928 + }, + { + "epoch": 0.6254219409282701, + "grad_norm": 0.6932423710823059, + "learning_rate": 0.0015, + "loss": 1.603, + "step": 5929 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.0889321565628052, + "learning_rate": 0.0015, + "loss": 1.6293, + "step": 5930 + }, + { + "epoch": 0.6256329113924051, + "grad_norm": 0.7974080443382263, + "learning_rate": 0.0015, + "loss": 1.6181, + "step": 5931 + }, + { + "epoch": 0.6257383966244726, + "grad_norm": 0.5604573488235474, + "learning_rate": 0.0015, + "loss": 1.6313, + "step": 5932 + }, + { + "epoch": 0.62584388185654, + "grad_norm": 0.8448460698127747, + "learning_rate": 0.0015, + "loss": 1.6522, + "step": 5933 + }, + { + "epoch": 0.6259493670886076, + "grad_norm": 0.5236601829528809, + "learning_rate": 0.0015, + "loss": 1.6332, + "step": 5934 + }, + { + "epoch": 0.6260548523206751, + "grad_norm": 0.7095980048179626, + "learning_rate": 0.0015, + "loss": 1.6279, + "step": 5935 + }, + { + "epoch": 0.6261603375527426, + "grad_norm": 0.628194272518158, + "learning_rate": 0.0015, + "loss": 1.6135, + "step": 5936 + }, + { + "epoch": 0.6262658227848101, + "grad_norm": 0.5797224640846252, + "learning_rate": 0.0015, + "loss": 1.6225, + "step": 5937 + }, + { + "epoch": 0.6263713080168777, + "grad_norm": 0.5181826949119568, + "learning_rate": 0.0015, + "loss": 1.5771, + "step": 5938 + }, + { + "epoch": 0.6264767932489451, + "grad_norm": 0.5678123235702515, + "learning_rate": 0.0015, + "loss": 1.5644, + "step": 5939 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.5376322269439697, + "learning_rate": 0.0015, + "loss": 1.6502, + "step": 5940 + }, + { + "epoch": 0.6266877637130802, + "grad_norm": 0.6702601909637451, + "learning_rate": 0.0015, + "loss": 1.6377, + "step": 5941 + }, + { + "epoch": 0.6267932489451477, + "grad_norm": 0.5268521904945374, + "learning_rate": 0.0015, + "loss": 1.6228, + "step": 5942 + }, + { + "epoch": 0.6268987341772152, + "grad_norm": 0.5045256614685059, + "learning_rate": 0.0015, + "loss": 1.5645, + "step": 5943 + }, + { + "epoch": 0.6270042194092827, + "grad_norm": 0.5835480690002441, + "learning_rate": 0.0015, + "loss": 1.6261, + "step": 5944 + }, + { + "epoch": 0.6271097046413502, + "grad_norm": 0.512340247631073, + "learning_rate": 0.0015, + "loss": 1.5797, + "step": 5945 + }, + { + "epoch": 0.6272151898734177, + "grad_norm": 0.4840671122074127, + "learning_rate": 0.0015, + "loss": 1.6288, + "step": 5946 + }, + { + "epoch": 0.6273206751054853, + "grad_norm": 0.5077359080314636, + "learning_rate": 0.0015, + "loss": 1.6037, + "step": 5947 + }, + { + "epoch": 0.6274261603375527, + "grad_norm": 0.5138733983039856, + "learning_rate": 0.0015, + "loss": 1.5926, + "step": 5948 + }, + { + "epoch": 0.6275316455696203, + "grad_norm": 0.5429332852363586, + "learning_rate": 0.0015, + "loss": 1.6139, + "step": 5949 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.5711822509765625, + "learning_rate": 0.0015, + "loss": 1.6051, + "step": 5950 + }, + { + "epoch": 0.6277426160337553, + "grad_norm": 0.5061172842979431, + "learning_rate": 0.0015, + "loss": 1.6585, + "step": 5951 + }, + { + "epoch": 0.6278481012658228, + "grad_norm": 0.572616696357727, + "learning_rate": 0.0015, + "loss": 1.6299, + "step": 5952 + }, + { + "epoch": 0.6279535864978903, + "grad_norm": 0.49323344230651855, + "learning_rate": 0.0015, + "loss": 1.6045, + "step": 5953 + }, + { + "epoch": 0.6280590717299578, + "grad_norm": 0.5708640217781067, + "learning_rate": 0.0015, + "loss": 1.5992, + "step": 5954 + }, + { + "epoch": 0.6281645569620253, + "grad_norm": 0.623921275138855, + "learning_rate": 0.0015, + "loss": 1.6263, + "step": 5955 + }, + { + "epoch": 0.6282700421940929, + "grad_norm": 0.6181391477584839, + "learning_rate": 0.0015, + "loss": 1.6326, + "step": 5956 + }, + { + "epoch": 0.6283755274261603, + "grad_norm": 0.6751028895378113, + "learning_rate": 0.0015, + "loss": 1.5871, + "step": 5957 + }, + { + "epoch": 0.6284810126582279, + "grad_norm": 0.5583983063697815, + "learning_rate": 0.0015, + "loss": 1.5975, + "step": 5958 + }, + { + "epoch": 0.6285864978902953, + "grad_norm": 0.6823524832725525, + "learning_rate": 0.0015, + "loss": 1.5976, + "step": 5959 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.7429535984992981, + "learning_rate": 0.0015, + "loss": 1.6315, + "step": 5960 + }, + { + "epoch": 0.6287974683544304, + "grad_norm": 0.5538409352302551, + "learning_rate": 0.0015, + "loss": 1.6202, + "step": 5961 + }, + { + "epoch": 0.6289029535864978, + "grad_norm": 0.9482542276382446, + "learning_rate": 0.0015, + "loss": 1.6132, + "step": 5962 + }, + { + "epoch": 0.6290084388185654, + "grad_norm": 0.7959316372871399, + "learning_rate": 0.0015, + "loss": 1.6312, + "step": 5963 + }, + { + "epoch": 0.6291139240506329, + "grad_norm": 0.6570045948028564, + "learning_rate": 0.0015, + "loss": 1.6305, + "step": 5964 + }, + { + "epoch": 0.6292194092827004, + "grad_norm": 0.6505491733551025, + "learning_rate": 0.0015, + "loss": 1.6524, + "step": 5965 + }, + { + "epoch": 0.6293248945147679, + "grad_norm": 0.5877590179443359, + "learning_rate": 0.0015, + "loss": 1.6133, + "step": 5966 + }, + { + "epoch": 0.6294303797468355, + "grad_norm": 0.5810253024101257, + "learning_rate": 0.0015, + "loss": 1.6161, + "step": 5967 + }, + { + "epoch": 0.6295358649789029, + "grad_norm": 0.571105420589447, + "learning_rate": 0.0015, + "loss": 1.5749, + "step": 5968 + }, + { + "epoch": 0.6296413502109705, + "grad_norm": 0.5433521270751953, + "learning_rate": 0.0015, + "loss": 1.588, + "step": 5969 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5073991417884827, + "learning_rate": 0.0015, + "loss": 1.6402, + "step": 5970 + }, + { + "epoch": 0.6298523206751054, + "grad_norm": 0.6024935245513916, + "learning_rate": 0.0015, + "loss": 1.6367, + "step": 5971 + }, + { + "epoch": 0.629957805907173, + "grad_norm": 0.4808545708656311, + "learning_rate": 0.0015, + "loss": 1.5751, + "step": 5972 + }, + { + "epoch": 0.6300632911392405, + "grad_norm": 0.5532571077346802, + "learning_rate": 0.0015, + "loss": 1.6197, + "step": 5973 + }, + { + "epoch": 0.630168776371308, + "grad_norm": 0.566626250743866, + "learning_rate": 0.0015, + "loss": 1.6278, + "step": 5974 + }, + { + "epoch": 0.6302742616033755, + "grad_norm": 0.6555110812187195, + "learning_rate": 0.0015, + "loss": 1.6046, + "step": 5975 + }, + { + "epoch": 0.6303797468354431, + "grad_norm": 0.6266216039657593, + "learning_rate": 0.0015, + "loss": 1.6078, + "step": 5976 + }, + { + "epoch": 0.6304852320675105, + "grad_norm": 0.5250189304351807, + "learning_rate": 0.0015, + "loss": 1.5797, + "step": 5977 + }, + { + "epoch": 0.630590717299578, + "grad_norm": 0.654657244682312, + "learning_rate": 0.0015, + "loss": 1.613, + "step": 5978 + }, + { + "epoch": 0.6306962025316456, + "grad_norm": 0.5831957459449768, + "learning_rate": 0.0015, + "loss": 1.5601, + "step": 5979 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5356829166412354, + "learning_rate": 0.0015, + "loss": 1.5873, + "step": 5980 + }, + { + "epoch": 0.6309071729957806, + "grad_norm": 0.6585840582847595, + "learning_rate": 0.0015, + "loss": 1.6178, + "step": 5981 + }, + { + "epoch": 0.6310126582278481, + "grad_norm": 0.5871908068656921, + "learning_rate": 0.0015, + "loss": 1.6202, + "step": 5982 + }, + { + "epoch": 0.6311181434599156, + "grad_norm": 0.6139295697212219, + "learning_rate": 0.0015, + "loss": 1.5875, + "step": 5983 + }, + { + "epoch": 0.6312236286919831, + "grad_norm": 0.5918750762939453, + "learning_rate": 0.0015, + "loss": 1.6258, + "step": 5984 + }, + { + "epoch": 0.6313291139240507, + "grad_norm": 0.600638210773468, + "learning_rate": 0.0015, + "loss": 1.6141, + "step": 5985 + }, + { + "epoch": 0.6314345991561181, + "grad_norm": 0.5739317536354065, + "learning_rate": 0.0015, + "loss": 1.6324, + "step": 5986 + }, + { + "epoch": 0.6315400843881857, + "grad_norm": 0.7033628225326538, + "learning_rate": 0.0015, + "loss": 1.6094, + "step": 5987 + }, + { + "epoch": 0.6316455696202532, + "grad_norm": 0.6276975274085999, + "learning_rate": 0.0015, + "loss": 1.6339, + "step": 5988 + }, + { + "epoch": 0.6317510548523206, + "grad_norm": 0.8046818375587463, + "learning_rate": 0.0015, + "loss": 1.654, + "step": 5989 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.5364254117012024, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 5990 + }, + { + "epoch": 0.6319620253164557, + "grad_norm": 0.6985353231430054, + "learning_rate": 0.0015, + "loss": 1.6248, + "step": 5991 + }, + { + "epoch": 0.6320675105485232, + "grad_norm": 0.6330892443656921, + "learning_rate": 0.0015, + "loss": 1.6067, + "step": 5992 + }, + { + "epoch": 0.6321729957805907, + "grad_norm": 0.5060620307922363, + "learning_rate": 0.0015, + "loss": 1.6879, + "step": 5993 + }, + { + "epoch": 0.6322784810126583, + "grad_norm": 0.5791623592376709, + "learning_rate": 0.0015, + "loss": 1.6367, + "step": 5994 + }, + { + "epoch": 0.6323839662447257, + "grad_norm": 0.5025582313537598, + "learning_rate": 0.0015, + "loss": 1.5957, + "step": 5995 + }, + { + "epoch": 0.6324894514767933, + "grad_norm": 0.662771463394165, + "learning_rate": 0.0015, + "loss": 1.6256, + "step": 5996 + }, + { + "epoch": 0.6325949367088608, + "grad_norm": 0.6416019201278687, + "learning_rate": 0.0015, + "loss": 1.6356, + "step": 5997 + }, + { + "epoch": 0.6327004219409282, + "grad_norm": 0.5294812321662903, + "learning_rate": 0.0015, + "loss": 1.6018, + "step": 5998 + }, + { + "epoch": 0.6328059071729958, + "grad_norm": 0.6193150281906128, + "learning_rate": 0.0015, + "loss": 1.6267, + "step": 5999 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.6678804755210876, + "learning_rate": 0.0015, + "loss": 1.6405, + "step": 6000 + }, + { + "epoch": 0.6330168776371308, + "grad_norm": 0.46960487961769104, + "learning_rate": 0.0015, + "loss": 1.6028, + "step": 6001 + }, + { + "epoch": 0.6331223628691983, + "grad_norm": 0.6118060946464539, + "learning_rate": 0.0015, + "loss": 1.5838, + "step": 6002 + }, + { + "epoch": 0.6332278481012659, + "grad_norm": 0.6287263035774231, + "learning_rate": 0.0015, + "loss": 1.6326, + "step": 6003 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 0.5200289487838745, + "learning_rate": 0.0015, + "loss": 1.6523, + "step": 6004 + }, + { + "epoch": 0.6334388185654009, + "grad_norm": 0.49725160002708435, + "learning_rate": 0.0015, + "loss": 1.6041, + "step": 6005 + }, + { + "epoch": 0.6335443037974684, + "grad_norm": 0.49891433119773865, + "learning_rate": 0.0015, + "loss": 1.6142, + "step": 6006 + }, + { + "epoch": 0.6336497890295358, + "grad_norm": 0.6071246266365051, + "learning_rate": 0.0015, + "loss": 1.6106, + "step": 6007 + }, + { + "epoch": 0.6337552742616034, + "grad_norm": 0.4743974208831787, + "learning_rate": 0.0015, + "loss": 1.6182, + "step": 6008 + }, + { + "epoch": 0.6338607594936709, + "grad_norm": 0.6092520356178284, + "learning_rate": 0.0015, + "loss": 1.643, + "step": 6009 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.6248684525489807, + "learning_rate": 0.0015, + "loss": 1.5923, + "step": 6010 + }, + { + "epoch": 0.6340717299578059, + "grad_norm": 0.4810630679130554, + "learning_rate": 0.0015, + "loss": 1.5926, + "step": 6011 + }, + { + "epoch": 0.6341772151898735, + "grad_norm": 0.7272651195526123, + "learning_rate": 0.0015, + "loss": 1.6445, + "step": 6012 + }, + { + "epoch": 0.6342827004219409, + "grad_norm": 0.8365352749824524, + "learning_rate": 0.0015, + "loss": 1.6409, + "step": 6013 + }, + { + "epoch": 0.6343881856540085, + "grad_norm": 0.6300891637802124, + "learning_rate": 0.0015, + "loss": 1.6287, + "step": 6014 + }, + { + "epoch": 0.634493670886076, + "grad_norm": 0.5885586142539978, + "learning_rate": 0.0015, + "loss": 1.6237, + "step": 6015 + }, + { + "epoch": 0.6345991561181434, + "grad_norm": 0.6359388828277588, + "learning_rate": 0.0015, + "loss": 1.6061, + "step": 6016 + }, + { + "epoch": 0.634704641350211, + "grad_norm": 0.5030336380004883, + "learning_rate": 0.0015, + "loss": 1.6466, + "step": 6017 + }, + { + "epoch": 0.6348101265822785, + "grad_norm": 0.7077120542526245, + "learning_rate": 0.0015, + "loss": 1.5889, + "step": 6018 + }, + { + "epoch": 0.634915611814346, + "grad_norm": 0.5615753531455994, + "learning_rate": 0.0015, + "loss": 1.5846, + "step": 6019 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.5832058191299438, + "learning_rate": 0.0015, + "loss": 1.6358, + "step": 6020 + }, + { + "epoch": 0.6351265822784811, + "grad_norm": 0.7304758429527283, + "learning_rate": 0.0015, + "loss": 1.6031, + "step": 6021 + }, + { + "epoch": 0.6352320675105485, + "grad_norm": 0.4968035817146301, + "learning_rate": 0.0015, + "loss": 1.5802, + "step": 6022 + }, + { + "epoch": 0.635337552742616, + "grad_norm": 0.5690223574638367, + "learning_rate": 0.0015, + "loss": 1.6213, + "step": 6023 + }, + { + "epoch": 0.6354430379746835, + "grad_norm": 0.5893293023109436, + "learning_rate": 0.0015, + "loss": 1.5834, + "step": 6024 + }, + { + "epoch": 0.635548523206751, + "grad_norm": 0.45773980021476746, + "learning_rate": 0.0015, + "loss": 1.6117, + "step": 6025 + }, + { + "epoch": 0.6356540084388186, + "grad_norm": 0.7095865607261658, + "learning_rate": 0.0015, + "loss": 1.5999, + "step": 6026 + }, + { + "epoch": 0.635759493670886, + "grad_norm": 0.6526431441307068, + "learning_rate": 0.0015, + "loss": 1.6153, + "step": 6027 + }, + { + "epoch": 0.6358649789029536, + "grad_norm": 0.4970572590827942, + "learning_rate": 0.0015, + "loss": 1.6235, + "step": 6028 + }, + { + "epoch": 0.6359704641350211, + "grad_norm": 0.5066674947738647, + "learning_rate": 0.0015, + "loss": 1.5961, + "step": 6029 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.5693353414535522, + "learning_rate": 0.0015, + "loss": 1.6099, + "step": 6030 + }, + { + "epoch": 0.6361814345991561, + "grad_norm": 0.48609164357185364, + "learning_rate": 0.0015, + "loss": 1.6379, + "step": 6031 + }, + { + "epoch": 0.6362869198312237, + "grad_norm": 0.5931279063224792, + "learning_rate": 0.0015, + "loss": 1.6172, + "step": 6032 + }, + { + "epoch": 0.6363924050632911, + "grad_norm": 0.5124304890632629, + "learning_rate": 0.0015, + "loss": 1.61, + "step": 6033 + }, + { + "epoch": 0.6364978902953586, + "grad_norm": 0.5319472551345825, + "learning_rate": 0.0015, + "loss": 1.6412, + "step": 6034 + }, + { + "epoch": 0.6366033755274262, + "grad_norm": 0.590932309627533, + "learning_rate": 0.0015, + "loss": 1.6152, + "step": 6035 + }, + { + "epoch": 0.6367088607594936, + "grad_norm": 0.5260143876075745, + "learning_rate": 0.0015, + "loss": 1.6013, + "step": 6036 + }, + { + "epoch": 0.6368143459915612, + "grad_norm": 0.5621371269226074, + "learning_rate": 0.0015, + "loss": 1.6234, + "step": 6037 + }, + { + "epoch": 0.6369198312236287, + "grad_norm": 0.6405736207962036, + "learning_rate": 0.0015, + "loss": 1.616, + "step": 6038 + }, + { + "epoch": 0.6370253164556962, + "grad_norm": 0.5318799614906311, + "learning_rate": 0.0015, + "loss": 1.5909, + "step": 6039 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.79112708568573, + "learning_rate": 0.0015, + "loss": 1.6369, + "step": 6040 + }, + { + "epoch": 0.6372362869198313, + "grad_norm": 0.7743318676948547, + "learning_rate": 0.0015, + "loss": 1.5944, + "step": 6041 + }, + { + "epoch": 0.6373417721518987, + "grad_norm": 0.5720817446708679, + "learning_rate": 0.0015, + "loss": 1.6102, + "step": 6042 + }, + { + "epoch": 0.6374472573839662, + "grad_norm": 0.5739864706993103, + "learning_rate": 0.0015, + "loss": 1.6027, + "step": 6043 + }, + { + "epoch": 0.6375527426160338, + "grad_norm": 0.5801692605018616, + "learning_rate": 0.0015, + "loss": 1.5922, + "step": 6044 + }, + { + "epoch": 0.6376582278481012, + "grad_norm": 0.5075503587722778, + "learning_rate": 0.0015, + "loss": 1.5981, + "step": 6045 + }, + { + "epoch": 0.6377637130801688, + "grad_norm": 0.5207285284996033, + "learning_rate": 0.0015, + "loss": 1.5909, + "step": 6046 + }, + { + "epoch": 0.6378691983122363, + "grad_norm": 0.6117684245109558, + "learning_rate": 0.0015, + "loss": 1.6199, + "step": 6047 + }, + { + "epoch": 0.6379746835443038, + "grad_norm": 0.513938844203949, + "learning_rate": 0.0015, + "loss": 1.6304, + "step": 6048 + }, + { + "epoch": 0.6380801687763713, + "grad_norm": 0.7167081236839294, + "learning_rate": 0.0015, + "loss": 1.6011, + "step": 6049 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.5096848607063293, + "learning_rate": 0.0015, + "loss": 1.6114, + "step": 6050 + }, + { + "epoch": 0.6382911392405063, + "grad_norm": 0.7479782104492188, + "learning_rate": 0.0015, + "loss": 1.6132, + "step": 6051 + }, + { + "epoch": 0.6383966244725738, + "grad_norm": 0.6062121987342834, + "learning_rate": 0.0015, + "loss": 1.6583, + "step": 6052 + }, + { + "epoch": 0.6385021097046414, + "grad_norm": 0.5051358938217163, + "learning_rate": 0.0015, + "loss": 1.5881, + "step": 6053 + }, + { + "epoch": 0.6386075949367088, + "grad_norm": 0.5258079171180725, + "learning_rate": 0.0015, + "loss": 1.6374, + "step": 6054 + }, + { + "epoch": 0.6387130801687764, + "grad_norm": 0.48984280228614807, + "learning_rate": 0.0015, + "loss": 1.5783, + "step": 6055 + }, + { + "epoch": 0.6388185654008439, + "grad_norm": 0.49099859595298767, + "learning_rate": 0.0015, + "loss": 1.5901, + "step": 6056 + }, + { + "epoch": 0.6389240506329114, + "grad_norm": 0.47011294960975647, + "learning_rate": 0.0015, + "loss": 1.622, + "step": 6057 + }, + { + "epoch": 0.6390295358649789, + "grad_norm": 0.5314732789993286, + "learning_rate": 0.0015, + "loss": 1.6279, + "step": 6058 + }, + { + "epoch": 0.6391350210970465, + "grad_norm": 0.4974585771560669, + "learning_rate": 0.0015, + "loss": 1.63, + "step": 6059 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.5173642635345459, + "learning_rate": 0.0015, + "loss": 1.6379, + "step": 6060 + }, + { + "epoch": 0.6393459915611814, + "grad_norm": 0.5478307604789734, + "learning_rate": 0.0015, + "loss": 1.6224, + "step": 6061 + }, + { + "epoch": 0.639451476793249, + "grad_norm": 0.5353797078132629, + "learning_rate": 0.0015, + "loss": 1.5966, + "step": 6062 + }, + { + "epoch": 0.6395569620253164, + "grad_norm": 0.5645928978919983, + "learning_rate": 0.0015, + "loss": 1.6005, + "step": 6063 + }, + { + "epoch": 0.639662447257384, + "grad_norm": 0.5212071537971497, + "learning_rate": 0.0015, + "loss": 1.6645, + "step": 6064 + }, + { + "epoch": 0.6397679324894515, + "grad_norm": 0.5072381496429443, + "learning_rate": 0.0015, + "loss": 1.593, + "step": 6065 + }, + { + "epoch": 0.639873417721519, + "grad_norm": 0.5284490585327148, + "learning_rate": 0.0015, + "loss": 1.5814, + "step": 6066 + }, + { + "epoch": 0.6399789029535865, + "grad_norm": 0.5199257135391235, + "learning_rate": 0.0015, + "loss": 1.5931, + "step": 6067 + }, + { + "epoch": 0.640084388185654, + "grad_norm": 0.4942372143268585, + "learning_rate": 0.0015, + "loss": 1.5915, + "step": 6068 + }, + { + "epoch": 0.6401898734177215, + "grad_norm": 0.5032808780670166, + "learning_rate": 0.0015, + "loss": 1.6549, + "step": 6069 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.610774040222168, + "learning_rate": 0.0015, + "loss": 1.6112, + "step": 6070 + }, + { + "epoch": 0.6404008438818566, + "grad_norm": 0.5463709235191345, + "learning_rate": 0.0015, + "loss": 1.6137, + "step": 6071 + }, + { + "epoch": 0.640506329113924, + "grad_norm": 0.48926249146461487, + "learning_rate": 0.0015, + "loss": 1.6076, + "step": 6072 + }, + { + "epoch": 0.6406118143459916, + "grad_norm": 0.491427481174469, + "learning_rate": 0.0015, + "loss": 1.6413, + "step": 6073 + }, + { + "epoch": 0.6407172995780591, + "grad_norm": 0.4824755787849426, + "learning_rate": 0.0015, + "loss": 1.6407, + "step": 6074 + }, + { + "epoch": 0.6408227848101266, + "grad_norm": 0.5169383883476257, + "learning_rate": 0.0015, + "loss": 1.5855, + "step": 6075 + }, + { + "epoch": 0.6409282700421941, + "grad_norm": 0.6159296631813049, + "learning_rate": 0.0015, + "loss": 1.5848, + "step": 6076 + }, + { + "epoch": 0.6410337552742617, + "grad_norm": 0.4957151710987091, + "learning_rate": 0.0015, + "loss": 1.6154, + "step": 6077 + }, + { + "epoch": 0.6411392405063291, + "grad_norm": 0.5977268815040588, + "learning_rate": 0.0015, + "loss": 1.6075, + "step": 6078 + }, + { + "epoch": 0.6412447257383966, + "grad_norm": 0.7418320178985596, + "learning_rate": 0.0015, + "loss": 1.6379, + "step": 6079 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.5622373223304749, + "learning_rate": 0.0015, + "loss": 1.5845, + "step": 6080 + }, + { + "epoch": 0.6414556962025316, + "grad_norm": 0.5169886946678162, + "learning_rate": 0.0015, + "loss": 1.5967, + "step": 6081 + }, + { + "epoch": 0.6415611814345992, + "grad_norm": 0.5313584208488464, + "learning_rate": 0.0015, + "loss": 1.5757, + "step": 6082 + }, + { + "epoch": 0.6416666666666667, + "grad_norm": 0.6163620352745056, + "learning_rate": 0.0015, + "loss": 1.6558, + "step": 6083 + }, + { + "epoch": 0.6417721518987342, + "grad_norm": 0.4772838056087494, + "learning_rate": 0.0015, + "loss": 1.59, + "step": 6084 + }, + { + "epoch": 0.6418776371308017, + "grad_norm": 0.5533785223960876, + "learning_rate": 0.0015, + "loss": 1.5964, + "step": 6085 + }, + { + "epoch": 0.6419831223628693, + "grad_norm": 0.5664098262786865, + "learning_rate": 0.0015, + "loss": 1.6369, + "step": 6086 + }, + { + "epoch": 0.6420886075949367, + "grad_norm": 0.6539817452430725, + "learning_rate": 0.0015, + "loss": 1.6367, + "step": 6087 + }, + { + "epoch": 0.6421940928270042, + "grad_norm": 0.5820465087890625, + "learning_rate": 0.0015, + "loss": 1.5957, + "step": 6088 + }, + { + "epoch": 0.6422995780590718, + "grad_norm": 0.548473596572876, + "learning_rate": 0.0015, + "loss": 1.602, + "step": 6089 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.513701856136322, + "learning_rate": 0.0015, + "loss": 1.6172, + "step": 6090 + }, + { + "epoch": 0.6425105485232068, + "grad_norm": 0.5873857736587524, + "learning_rate": 0.0015, + "loss": 1.6358, + "step": 6091 + }, + { + "epoch": 0.6426160337552742, + "grad_norm": 0.5551254153251648, + "learning_rate": 0.0015, + "loss": 1.6041, + "step": 6092 + }, + { + "epoch": 0.6427215189873418, + "grad_norm": 0.5355796813964844, + "learning_rate": 0.0015, + "loss": 1.6182, + "step": 6093 + }, + { + "epoch": 0.6428270042194093, + "grad_norm": 0.6306756734848022, + "learning_rate": 0.0015, + "loss": 1.6073, + "step": 6094 + }, + { + "epoch": 0.6429324894514767, + "grad_norm": 0.7098020315170288, + "learning_rate": 0.0015, + "loss": 1.5638, + "step": 6095 + }, + { + "epoch": 0.6430379746835443, + "grad_norm": 0.6527588963508606, + "learning_rate": 0.0015, + "loss": 1.6019, + "step": 6096 + }, + { + "epoch": 0.6431434599156118, + "grad_norm": 0.5478804111480713, + "learning_rate": 0.0015, + "loss": 1.5903, + "step": 6097 + }, + { + "epoch": 0.6432489451476793, + "grad_norm": 0.5948776006698608, + "learning_rate": 0.0015, + "loss": 1.6301, + "step": 6098 + }, + { + "epoch": 0.6433544303797468, + "grad_norm": 0.8403067588806152, + "learning_rate": 0.0015, + "loss": 1.5863, + "step": 6099 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6364478468894958, + "learning_rate": 0.0015, + "loss": 1.6109, + "step": 6100 + }, + { + "epoch": 0.6435654008438818, + "grad_norm": 0.6228508949279785, + "learning_rate": 0.0015, + "loss": 1.6484, + "step": 6101 + }, + { + "epoch": 0.6436708860759494, + "grad_norm": 0.7421320676803589, + "learning_rate": 0.0015, + "loss": 1.5813, + "step": 6102 + }, + { + "epoch": 0.6437763713080169, + "grad_norm": 0.7950869202613831, + "learning_rate": 0.0015, + "loss": 1.5982, + "step": 6103 + }, + { + "epoch": 0.6438818565400843, + "grad_norm": 0.5360313653945923, + "learning_rate": 0.0015, + "loss": 1.6181, + "step": 6104 + }, + { + "epoch": 0.6439873417721519, + "grad_norm": 0.6788882613182068, + "learning_rate": 0.0015, + "loss": 1.6443, + "step": 6105 + }, + { + "epoch": 0.6440928270042194, + "grad_norm": 0.7845962047576904, + "learning_rate": 0.0015, + "loss": 1.6162, + "step": 6106 + }, + { + "epoch": 0.6441983122362869, + "grad_norm": 0.46688973903656006, + "learning_rate": 0.0015, + "loss": 1.6479, + "step": 6107 + }, + { + "epoch": 0.6443037974683544, + "grad_norm": 0.9376731514930725, + "learning_rate": 0.0015, + "loss": 1.6114, + "step": 6108 + }, + { + "epoch": 0.644409282700422, + "grad_norm": 0.9342179894447327, + "learning_rate": 0.0015, + "loss": 1.641, + "step": 6109 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.5573187470436096, + "learning_rate": 0.0015, + "loss": 1.6083, + "step": 6110 + }, + { + "epoch": 0.644620253164557, + "grad_norm": 1.011339545249939, + "learning_rate": 0.0015, + "loss": 1.621, + "step": 6111 + }, + { + "epoch": 0.6447257383966245, + "grad_norm": 0.795906126499176, + "learning_rate": 0.0015, + "loss": 1.5891, + "step": 6112 + }, + { + "epoch": 0.6448312236286919, + "grad_norm": 0.6206145882606506, + "learning_rate": 0.0015, + "loss": 1.6364, + "step": 6113 + }, + { + "epoch": 0.6449367088607595, + "grad_norm": 0.7847033143043518, + "learning_rate": 0.0015, + "loss": 1.6097, + "step": 6114 + }, + { + "epoch": 0.645042194092827, + "grad_norm": 0.5453957915306091, + "learning_rate": 0.0015, + "loss": 1.6232, + "step": 6115 + }, + { + "epoch": 0.6451476793248945, + "grad_norm": 0.6635275483131409, + "learning_rate": 0.0015, + "loss": 1.645, + "step": 6116 + }, + { + "epoch": 0.645253164556962, + "grad_norm": 0.6184625625610352, + "learning_rate": 0.0015, + "loss": 1.5987, + "step": 6117 + }, + { + "epoch": 0.6453586497890296, + "grad_norm": 0.6471688151359558, + "learning_rate": 0.0015, + "loss": 1.6335, + "step": 6118 + }, + { + "epoch": 0.645464135021097, + "grad_norm": 0.49944692850112915, + "learning_rate": 0.0015, + "loss": 1.597, + "step": 6119 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.5704482197761536, + "learning_rate": 0.0015, + "loss": 1.5783, + "step": 6120 + }, + { + "epoch": 0.6456751054852321, + "grad_norm": 0.6205321550369263, + "learning_rate": 0.0015, + "loss": 1.6034, + "step": 6121 + }, + { + "epoch": 0.6457805907172995, + "grad_norm": 0.5599362850189209, + "learning_rate": 0.0015, + "loss": 1.6301, + "step": 6122 + }, + { + "epoch": 0.6458860759493671, + "grad_norm": 0.6305397152900696, + "learning_rate": 0.0015, + "loss": 1.6074, + "step": 6123 + }, + { + "epoch": 0.6459915611814346, + "grad_norm": 0.442531555891037, + "learning_rate": 0.0015, + "loss": 1.6088, + "step": 6124 + }, + { + "epoch": 0.6460970464135021, + "grad_norm": 0.5729601383209229, + "learning_rate": 0.0015, + "loss": 1.6051, + "step": 6125 + }, + { + "epoch": 0.6462025316455696, + "grad_norm": 0.5429379343986511, + "learning_rate": 0.0015, + "loss": 1.6025, + "step": 6126 + }, + { + "epoch": 0.6463080168776372, + "grad_norm": 0.5254256725311279, + "learning_rate": 0.0015, + "loss": 1.5951, + "step": 6127 + }, + { + "epoch": 0.6464135021097046, + "grad_norm": 0.5388456583023071, + "learning_rate": 0.0015, + "loss": 1.5848, + "step": 6128 + }, + { + "epoch": 0.6465189873417722, + "grad_norm": 0.5493997931480408, + "learning_rate": 0.0015, + "loss": 1.6044, + "step": 6129 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.45452308654785156, + "learning_rate": 0.0015, + "loss": 1.6456, + "step": 6130 + }, + { + "epoch": 0.6467299578059071, + "grad_norm": 0.5767956972122192, + "learning_rate": 0.0015, + "loss": 1.6493, + "step": 6131 + }, + { + "epoch": 0.6468354430379747, + "grad_norm": 0.5556018948554993, + "learning_rate": 0.0015, + "loss": 1.6296, + "step": 6132 + }, + { + "epoch": 0.6469409282700422, + "grad_norm": 0.49602171778678894, + "learning_rate": 0.0015, + "loss": 1.6, + "step": 6133 + }, + { + "epoch": 0.6470464135021097, + "grad_norm": 0.6708524823188782, + "learning_rate": 0.0015, + "loss": 1.6204, + "step": 6134 + }, + { + "epoch": 0.6471518987341772, + "grad_norm": 0.5771464705467224, + "learning_rate": 0.0015, + "loss": 1.5842, + "step": 6135 + }, + { + "epoch": 0.6472573839662448, + "grad_norm": 0.5093113780021667, + "learning_rate": 0.0015, + "loss": 1.6388, + "step": 6136 + }, + { + "epoch": 0.6473628691983122, + "grad_norm": 0.6123894453048706, + "learning_rate": 0.0015, + "loss": 1.6135, + "step": 6137 + }, + { + "epoch": 0.6474683544303798, + "grad_norm": 0.5309610962867737, + "learning_rate": 0.0015, + "loss": 1.62, + "step": 6138 + }, + { + "epoch": 0.6475738396624473, + "grad_norm": 0.5773170590400696, + "learning_rate": 0.0015, + "loss": 1.6176, + "step": 6139 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.48332715034484863, + "learning_rate": 0.0015, + "loss": 1.5468, + "step": 6140 + }, + { + "epoch": 0.6477848101265823, + "grad_norm": 0.6897808909416199, + "learning_rate": 0.0015, + "loss": 1.6216, + "step": 6141 + }, + { + "epoch": 0.6478902953586498, + "grad_norm": 0.7130773663520813, + "learning_rate": 0.0015, + "loss": 1.5861, + "step": 6142 + }, + { + "epoch": 0.6479957805907173, + "grad_norm": 0.5731837749481201, + "learning_rate": 0.0015, + "loss": 1.5846, + "step": 6143 + }, + { + "epoch": 0.6481012658227848, + "grad_norm": 0.6874064207077026, + "learning_rate": 0.0015, + "loss": 1.6256, + "step": 6144 + }, + { + "epoch": 0.6482067510548524, + "grad_norm": 0.7943158745765686, + "learning_rate": 0.0015, + "loss": 1.6472, + "step": 6145 + }, + { + "epoch": 0.6483122362869198, + "grad_norm": 0.5174962878227234, + "learning_rate": 0.0015, + "loss": 1.6209, + "step": 6146 + }, + { + "epoch": 0.6484177215189874, + "grad_norm": 0.781793475151062, + "learning_rate": 0.0015, + "loss": 1.6306, + "step": 6147 + }, + { + "epoch": 0.6485232067510549, + "grad_norm": 0.6787018179893494, + "learning_rate": 0.0015, + "loss": 1.6049, + "step": 6148 + }, + { + "epoch": 0.6486286919831223, + "grad_norm": 0.6252360343933105, + "learning_rate": 0.0015, + "loss": 1.6248, + "step": 6149 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.7669170498847961, + "learning_rate": 0.0015, + "loss": 1.6585, + "step": 6150 + }, + { + "epoch": 0.6488396624472574, + "grad_norm": 0.6429520845413208, + "learning_rate": 0.0015, + "loss": 1.6207, + "step": 6151 + }, + { + "epoch": 0.6489451476793249, + "grad_norm": 0.5717629790306091, + "learning_rate": 0.0015, + "loss": 1.641, + "step": 6152 + }, + { + "epoch": 0.6490506329113924, + "grad_norm": 0.6288654208183289, + "learning_rate": 0.0015, + "loss": 1.5998, + "step": 6153 + }, + { + "epoch": 0.64915611814346, + "grad_norm": 0.5095597505569458, + "learning_rate": 0.0015, + "loss": 1.595, + "step": 6154 + }, + { + "epoch": 0.6492616033755274, + "grad_norm": 0.6188340783119202, + "learning_rate": 0.0015, + "loss": 1.6095, + "step": 6155 + }, + { + "epoch": 0.649367088607595, + "grad_norm": 0.6024876832962036, + "learning_rate": 0.0015, + "loss": 1.5919, + "step": 6156 + }, + { + "epoch": 0.6494725738396624, + "grad_norm": 0.5583590865135193, + "learning_rate": 0.0015, + "loss": 1.6147, + "step": 6157 + }, + { + "epoch": 0.6495780590717299, + "grad_norm": 0.4911785125732422, + "learning_rate": 0.0015, + "loss": 1.643, + "step": 6158 + }, + { + "epoch": 0.6496835443037975, + "grad_norm": 0.520928144454956, + "learning_rate": 0.0015, + "loss": 1.576, + "step": 6159 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.5773823261260986, + "learning_rate": 0.0015, + "loss": 1.6427, + "step": 6160 + }, + { + "epoch": 0.6498945147679325, + "grad_norm": 0.5181955695152283, + "learning_rate": 0.0015, + "loss": 1.6087, + "step": 6161 + }, + { + "epoch": 0.65, + "grad_norm": 0.5335739850997925, + "learning_rate": 0.0015, + "loss": 1.602, + "step": 6162 + }, + { + "epoch": 0.6501054852320675, + "grad_norm": 0.5832805037498474, + "learning_rate": 0.0014979195407665976, + "loss": 1.5409, + "step": 6163 + }, + { + "epoch": 0.650210970464135, + "grad_norm": 0.5168347358703613, + "learning_rate": 0.00149584196707361, + "loss": 1.575, + "step": 6164 + }, + { + "epoch": 0.6503164556962026, + "grad_norm": 0.5147069692611694, + "learning_rate": 0.0014937672749188704, + "loss": 1.5896, + "step": 6165 + }, + { + "epoch": 0.65042194092827, + "grad_norm": 0.5709660649299622, + "learning_rate": 0.0014916954603057643, + "loss": 1.599, + "step": 6166 + }, + { + "epoch": 0.6505274261603375, + "grad_norm": 0.49591460824012756, + "learning_rate": 0.0014896265192432194, + "loss": 1.5824, + "step": 6167 + }, + { + "epoch": 0.6506329113924051, + "grad_norm": 0.48474690318107605, + "learning_rate": 0.001487560447745699, + "loss": 1.5815, + "step": 6168 + }, + { + "epoch": 0.6507383966244725, + "grad_norm": 0.5037993788719177, + "learning_rate": 0.0014854972418331944, + "loss": 1.6377, + "step": 6169 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.4749622046947479, + "learning_rate": 0.0014834368975312174, + "loss": 1.5855, + "step": 6170 + }, + { + "epoch": 0.6509493670886076, + "grad_norm": 0.5159726738929749, + "learning_rate": 0.0014813794108707917, + "loss": 1.609, + "step": 6171 + }, + { + "epoch": 0.6510548523206751, + "grad_norm": 0.469255656003952, + "learning_rate": 0.0014793247778884461, + "loss": 1.5809, + "step": 6172 + }, + { + "epoch": 0.6511603375527426, + "grad_norm": 0.5568392276763916, + "learning_rate": 0.0014772729946262069, + "loss": 1.6286, + "step": 6173 + }, + { + "epoch": 0.6512658227848102, + "grad_norm": 0.6769633293151855, + "learning_rate": 0.0014752240571315894, + "loss": 1.6391, + "step": 6174 + }, + { + "epoch": 0.6513713080168776, + "grad_norm": 0.5625841021537781, + "learning_rate": 0.0014731779614575917, + "loss": 1.5954, + "step": 6175 + }, + { + "epoch": 0.6514767932489451, + "grad_norm": 0.6080664396286011, + "learning_rate": 0.0014711347036626854, + "loss": 1.6225, + "step": 6176 + }, + { + "epoch": 0.6515822784810127, + "grad_norm": 0.7148519158363342, + "learning_rate": 0.0014690942798108097, + "loss": 1.6261, + "step": 6177 + }, + { + "epoch": 0.6516877637130801, + "grad_norm": 0.5286668539047241, + "learning_rate": 0.0014670566859713624, + "loss": 1.6047, + "step": 6178 + }, + { + "epoch": 0.6517932489451477, + "grad_norm": 0.570422887802124, + "learning_rate": 0.0014650219182191931, + "loss": 1.6317, + "step": 6179 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.533623218536377, + "learning_rate": 0.0014629899726345957, + "loss": 1.6166, + "step": 6180 + }, + { + "epoch": 0.6520042194092827, + "grad_norm": 0.49662795662879944, + "learning_rate": 0.0014609608453033007, + "loss": 1.6382, + "step": 6181 + }, + { + "epoch": 0.6521097046413502, + "grad_norm": 0.5145726799964905, + "learning_rate": 0.001458934532316467, + "loss": 1.626, + "step": 6182 + }, + { + "epoch": 0.6522151898734178, + "grad_norm": 0.5761494636535645, + "learning_rate": 0.0014569110297706755, + "loss": 1.6098, + "step": 6183 + }, + { + "epoch": 0.6523206751054852, + "grad_norm": 0.4725034832954407, + "learning_rate": 0.0014548903337679206, + "loss": 1.6107, + "step": 6184 + }, + { + "epoch": 0.6524261603375527, + "grad_norm": 0.5854225754737854, + "learning_rate": 0.0014528724404156037, + "loss": 1.6193, + "step": 6185 + }, + { + "epoch": 0.6525316455696203, + "grad_norm": 0.5390808582305908, + "learning_rate": 0.0014508573458265248, + "loss": 1.6286, + "step": 6186 + }, + { + "epoch": 0.6526371308016877, + "grad_norm": 0.6974532008171082, + "learning_rate": 0.0014488450461188752, + "loss": 1.5973, + "step": 6187 + }, + { + "epoch": 0.6527426160337553, + "grad_norm": 0.5741993188858032, + "learning_rate": 0.0014468355374162303, + "loss": 1.6076, + "step": 6188 + }, + { + "epoch": 0.6528481012658228, + "grad_norm": 0.5944492816925049, + "learning_rate": 0.001444828815847542, + "loss": 1.5961, + "step": 6189 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.6624055504798889, + "learning_rate": 0.0014428248775471316, + "loss": 1.6427, + "step": 6190 + }, + { + "epoch": 0.6530590717299578, + "grad_norm": 0.5096984505653381, + "learning_rate": 0.0014408237186546813, + "loss": 1.6364, + "step": 6191 + }, + { + "epoch": 0.6531645569620254, + "grad_norm": 0.6230143904685974, + "learning_rate": 0.0014388253353152278, + "loss": 1.5722, + "step": 6192 + }, + { + "epoch": 0.6532700421940928, + "grad_norm": 0.4900723695755005, + "learning_rate": 0.0014368297236791545, + "loss": 1.6226, + "step": 6193 + }, + { + "epoch": 0.6533755274261603, + "grad_norm": 0.5623832941055298, + "learning_rate": 0.0014348368799021844, + "loss": 1.5723, + "step": 6194 + }, + { + "epoch": 0.6534810126582279, + "grad_norm": 0.6694051623344421, + "learning_rate": 0.0014328468001453718, + "loss": 1.6187, + "step": 6195 + }, + { + "epoch": 0.6535864978902953, + "grad_norm": 0.4565059244632721, + "learning_rate": 0.001430859480575096, + "loss": 1.5848, + "step": 6196 + }, + { + "epoch": 0.6536919831223629, + "grad_norm": 0.5141550898551941, + "learning_rate": 0.0014288749173630535, + "loss": 1.6061, + "step": 6197 + }, + { + "epoch": 0.6537974683544304, + "grad_norm": 0.46880844235420227, + "learning_rate": 0.0014268931066862504, + "loss": 1.6278, + "step": 6198 + }, + { + "epoch": 0.6539029535864979, + "grad_norm": 0.4959510564804077, + "learning_rate": 0.0014249140447269945, + "loss": 1.6452, + "step": 6199 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.4055125117301941, + "learning_rate": 0.00142293772767289, + "loss": 1.6074, + "step": 6200 + }, + { + "epoch": 0.654113924050633, + "grad_norm": 0.46506351232528687, + "learning_rate": 0.0014209641517168275, + "loss": 1.5739, + "step": 6201 + }, + { + "epoch": 0.6542194092827004, + "grad_norm": 0.4965575337409973, + "learning_rate": 0.001418993313056979, + "loss": 1.6063, + "step": 6202 + }, + { + "epoch": 0.6543248945147679, + "grad_norm": 0.4766387641429901, + "learning_rate": 0.0014170252078967885, + "loss": 1.5947, + "step": 6203 + }, + { + "epoch": 0.6544303797468355, + "grad_norm": 0.5006406307220459, + "learning_rate": 0.0014150598324449667, + "loss": 1.6039, + "step": 6204 + }, + { + "epoch": 0.6545358649789029, + "grad_norm": 0.5292978882789612, + "learning_rate": 0.001413097182915482, + "loss": 1.6045, + "step": 6205 + }, + { + "epoch": 0.6546413502109705, + "grad_norm": 0.5393363833427429, + "learning_rate": 0.0014111372555275542, + "loss": 1.6359, + "step": 6206 + }, + { + "epoch": 0.654746835443038, + "grad_norm": 0.5250663757324219, + "learning_rate": 0.0014091800465056473, + "loss": 1.631, + "step": 6207 + }, + { + "epoch": 0.6548523206751055, + "grad_norm": 0.6228719353675842, + "learning_rate": 0.0014072255520794614, + "loss": 1.6064, + "step": 6208 + }, + { + "epoch": 0.654957805907173, + "grad_norm": 0.6128799319267273, + "learning_rate": 0.0014052737684839257, + "loss": 1.5979, + "step": 6209 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.5618577599525452, + "learning_rate": 0.001403324691959192, + "loss": 1.598, + "step": 6210 + }, + { + "epoch": 0.655168776371308, + "grad_norm": 0.5514854788780212, + "learning_rate": 0.0014013783187506268, + "loss": 1.618, + "step": 6211 + }, + { + "epoch": 0.6552742616033755, + "grad_norm": 0.5488616824150085, + "learning_rate": 0.0013994346451088036, + "loss": 1.6058, + "step": 6212 + }, + { + "epoch": 0.6553797468354431, + "grad_norm": 0.5754488706588745, + "learning_rate": 0.0013974936672894972, + "loss": 1.6188, + "step": 6213 + }, + { + "epoch": 0.6554852320675105, + "grad_norm": 0.5307894945144653, + "learning_rate": 0.0013955553815536747, + "loss": 1.5847, + "step": 6214 + }, + { + "epoch": 0.6555907172995781, + "grad_norm": 0.5454530119895935, + "learning_rate": 0.0013936197841674894, + "loss": 1.6157, + "step": 6215 + }, + { + "epoch": 0.6556962025316456, + "grad_norm": 0.505645215511322, + "learning_rate": 0.0013916868714022737, + "loss": 1.6416, + "step": 6216 + }, + { + "epoch": 0.6558016877637131, + "grad_norm": 0.5031726956367493, + "learning_rate": 0.0013897566395345313, + "loss": 1.6161, + "step": 6217 + }, + { + "epoch": 0.6559071729957806, + "grad_norm": 0.5135219097137451, + "learning_rate": 0.0013878290848459301, + "loss": 1.6263, + "step": 6218 + }, + { + "epoch": 0.6560126582278482, + "grad_norm": 0.5340127944946289, + "learning_rate": 0.0013859042036232954, + "loss": 1.5921, + "step": 6219 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.5894213914871216, + "learning_rate": 0.0013839819921586025, + "loss": 1.5843, + "step": 6220 + }, + { + "epoch": 0.6562236286919831, + "grad_norm": 0.48287490010261536, + "learning_rate": 0.00138206244674897, + "loss": 1.6115, + "step": 6221 + }, + { + "epoch": 0.6563291139240506, + "grad_norm": 0.6512756943702698, + "learning_rate": 0.0013801455636966516, + "loss": 1.568, + "step": 6222 + }, + { + "epoch": 0.6564345991561181, + "grad_norm": 0.5339322686195374, + "learning_rate": 0.0013782313393090303, + "loss": 1.5928, + "step": 6223 + }, + { + "epoch": 0.6565400843881857, + "grad_norm": 0.5671067237854004, + "learning_rate": 0.0013763197698986101, + "loss": 1.5917, + "step": 6224 + }, + { + "epoch": 0.6566455696202531, + "grad_norm": 0.5248642563819885, + "learning_rate": 0.0013744108517830104, + "loss": 1.5972, + "step": 6225 + }, + { + "epoch": 0.6567510548523207, + "grad_norm": 0.4802647829055786, + "learning_rate": 0.0013725045812849569, + "loss": 1.6163, + "step": 6226 + }, + { + "epoch": 0.6568565400843882, + "grad_norm": 0.5078423619270325, + "learning_rate": 0.001370600954732276, + "loss": 1.5962, + "step": 6227 + }, + { + "epoch": 0.6569620253164556, + "grad_norm": 0.49496930837631226, + "learning_rate": 0.0013686999684578874, + "loss": 1.5901, + "step": 6228 + }, + { + "epoch": 0.6570675105485232, + "grad_norm": 0.6016704440116882, + "learning_rate": 0.001366801618799797, + "loss": 1.5926, + "step": 6229 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.5402101874351501, + "learning_rate": 0.0013649059021010894, + "loss": 1.6073, + "step": 6230 + }, + { + "epoch": 0.6572784810126582, + "grad_norm": 0.6456952691078186, + "learning_rate": 0.0013630128147099215, + "loss": 1.597, + "step": 6231 + }, + { + "epoch": 0.6573839662447257, + "grad_norm": 0.5368161797523499, + "learning_rate": 0.0013611223529795156, + "loss": 1.6236, + "step": 6232 + }, + { + "epoch": 0.6574894514767933, + "grad_norm": 0.5132485032081604, + "learning_rate": 0.001359234513268151, + "loss": 1.6261, + "step": 6233 + }, + { + "epoch": 0.6575949367088607, + "grad_norm": 0.5268896818161011, + "learning_rate": 0.0013573492919391594, + "loss": 1.6197, + "step": 6234 + }, + { + "epoch": 0.6577004219409283, + "grad_norm": 0.5224751234054565, + "learning_rate": 0.0013554666853609146, + "loss": 1.6148, + "step": 6235 + }, + { + "epoch": 0.6578059071729958, + "grad_norm": 0.5029194355010986, + "learning_rate": 0.001353586689906829, + "loss": 1.5926, + "step": 6236 + }, + { + "epoch": 0.6579113924050632, + "grad_norm": 0.5476905703544617, + "learning_rate": 0.0013517093019553442, + "loss": 1.5828, + "step": 6237 + }, + { + "epoch": 0.6580168776371308, + "grad_norm": 0.47993701696395874, + "learning_rate": 0.001349834517889925, + "loss": 1.5973, + "step": 6238 + }, + { + "epoch": 0.6581223628691983, + "grad_norm": 0.4334630072116852, + "learning_rate": 0.001347962334099052, + "loss": 1.5764, + "step": 6239 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.5023558735847473, + "learning_rate": 0.0013460927469762154, + "loss": 1.5915, + "step": 6240 + }, + { + "epoch": 0.6583333333333333, + "grad_norm": 0.5679836869239807, + "learning_rate": 0.0013442257529199069, + "loss": 1.5917, + "step": 6241 + }, + { + "epoch": 0.6584388185654009, + "grad_norm": 0.4634576141834259, + "learning_rate": 0.0013423613483336142, + "loss": 1.596, + "step": 6242 + }, + { + "epoch": 0.6585443037974683, + "grad_norm": 0.5537039637565613, + "learning_rate": 0.001340499529625812, + "loss": 1.6381, + "step": 6243 + }, + { + "epoch": 0.6586497890295359, + "grad_norm": 0.48177632689476013, + "learning_rate": 0.0013386402932099575, + "loss": 1.6078, + "step": 6244 + }, + { + "epoch": 0.6587552742616034, + "grad_norm": 0.5461094379425049, + "learning_rate": 0.0013367836355044822, + "loss": 1.6046, + "step": 6245 + }, + { + "epoch": 0.6588607594936708, + "grad_norm": 0.5001852512359619, + "learning_rate": 0.0013349295529327845, + "loss": 1.57, + "step": 6246 + }, + { + "epoch": 0.6589662447257384, + "grad_norm": 0.5089578628540039, + "learning_rate": 0.0013330780419232241, + "loss": 1.6072, + "step": 6247 + }, + { + "epoch": 0.6590717299578059, + "grad_norm": 0.513290524482727, + "learning_rate": 0.001331229098909114, + "loss": 1.5841, + "step": 6248 + }, + { + "epoch": 0.6591772151898734, + "grad_norm": 0.6138255000114441, + "learning_rate": 0.0013293827203287143, + "loss": 1.6423, + "step": 6249 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.518519401550293, + "learning_rate": 0.0013275389026252255, + "loss": 1.6392, + "step": 6250 + }, + { + "epoch": 0.6593881856540085, + "grad_norm": 0.5507919192314148, + "learning_rate": 0.0013256976422467803, + "loss": 1.5877, + "step": 6251 + }, + { + "epoch": 0.6594936708860759, + "grad_norm": 0.5733009576797485, + "learning_rate": 0.001323858935646439, + "loss": 1.6122, + "step": 6252 + }, + { + "epoch": 0.6595991561181435, + "grad_norm": 0.5359091758728027, + "learning_rate": 0.0013220227792821804, + "loss": 1.5781, + "step": 6253 + }, + { + "epoch": 0.659704641350211, + "grad_norm": 0.6037209630012512, + "learning_rate": 0.0013201891696168965, + "loss": 1.6048, + "step": 6254 + }, + { + "epoch": 0.6598101265822784, + "grad_norm": 0.5205094814300537, + "learning_rate": 0.001318358103118385, + "loss": 1.6156, + "step": 6255 + }, + { + "epoch": 0.659915611814346, + "grad_norm": 0.5952802300453186, + "learning_rate": 0.0013165295762593426, + "loss": 1.6016, + "step": 6256 + }, + { + "epoch": 0.6600210970464135, + "grad_norm": 0.4881475865840912, + "learning_rate": 0.0013147035855173587, + "loss": 1.5574, + "step": 6257 + }, + { + "epoch": 0.660126582278481, + "grad_norm": 0.5686355233192444, + "learning_rate": 0.0013128801273749075, + "loss": 1.6119, + "step": 6258 + }, + { + "epoch": 0.6602320675105485, + "grad_norm": 0.5342782735824585, + "learning_rate": 0.0013110591983193423, + "loss": 1.6093, + "step": 6259 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.5252060294151306, + "learning_rate": 0.0013092407948428887, + "loss": 1.6088, + "step": 6260 + }, + { + "epoch": 0.6604430379746835, + "grad_norm": 0.5563986897468567, + "learning_rate": 0.0013074249134426368, + "loss": 1.6147, + "step": 6261 + }, + { + "epoch": 0.6605485232067511, + "grad_norm": 0.4771369397640228, + "learning_rate": 0.0013056115506205354, + "loss": 1.6167, + "step": 6262 + }, + { + "epoch": 0.6606540084388186, + "grad_norm": 0.5097554326057434, + "learning_rate": 0.0013038007028833853, + "loss": 1.5617, + "step": 6263 + }, + { + "epoch": 0.660759493670886, + "grad_norm": 0.560704231262207, + "learning_rate": 0.001301992366742832, + "loss": 1.6432, + "step": 6264 + }, + { + "epoch": 0.6608649789029536, + "grad_norm": 0.5001439452171326, + "learning_rate": 0.0013001865387153588, + "loss": 1.6021, + "step": 6265 + }, + { + "epoch": 0.6609704641350211, + "grad_norm": 0.49058055877685547, + "learning_rate": 0.0012983832153222814, + "loss": 1.6046, + "step": 6266 + }, + { + "epoch": 0.6610759493670886, + "grad_norm": 0.48805370926856995, + "learning_rate": 0.0012965823930897401, + "loss": 1.6331, + "step": 6267 + }, + { + "epoch": 0.6611814345991561, + "grad_norm": 0.579522430896759, + "learning_rate": 0.0012947840685486932, + "loss": 1.5785, + "step": 6268 + }, + { + "epoch": 0.6612869198312237, + "grad_norm": 0.5598164200782776, + "learning_rate": 0.0012929882382349102, + "loss": 1.6291, + "step": 6269 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.5173453688621521, + "learning_rate": 0.001291194898688966, + "loss": 1.5746, + "step": 6270 + }, + { + "epoch": 0.6614978902953587, + "grad_norm": 0.5323518514633179, + "learning_rate": 0.001289404046456233, + "loss": 1.5848, + "step": 6271 + }, + { + "epoch": 0.6616033755274262, + "grad_norm": 0.5157877802848816, + "learning_rate": 0.0012876156780868755, + "loss": 1.6074, + "step": 6272 + }, + { + "epoch": 0.6617088607594936, + "grad_norm": 0.5611578822135925, + "learning_rate": 0.0012858297901358424, + "loss": 1.6519, + "step": 6273 + }, + { + "epoch": 0.6618143459915612, + "grad_norm": 0.5870733261108398, + "learning_rate": 0.001284046379162861, + "loss": 1.5992, + "step": 6274 + }, + { + "epoch": 0.6619198312236287, + "grad_norm": 0.5836647152900696, + "learning_rate": 0.0012822654417324305, + "loss": 1.6038, + "step": 6275 + }, + { + "epoch": 0.6620253164556962, + "grad_norm": 0.5240967869758606, + "learning_rate": 0.0012804869744138137, + "loss": 1.6002, + "step": 6276 + }, + { + "epoch": 0.6621308016877637, + "grad_norm": 0.6198570728302002, + "learning_rate": 0.0012787109737810332, + "loss": 1.5796, + "step": 6277 + }, + { + "epoch": 0.6622362869198313, + "grad_norm": 0.49905139207839966, + "learning_rate": 0.0012769374364128628, + "loss": 1.6228, + "step": 6278 + }, + { + "epoch": 0.6623417721518987, + "grad_norm": 0.6339825391769409, + "learning_rate": 0.0012751663588928214, + "loss": 1.5957, + "step": 6279 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.49158141016960144, + "learning_rate": 0.001273397737809166, + "loss": 1.6048, + "step": 6280 + }, + { + "epoch": 0.6625527426160338, + "grad_norm": 0.5476797223091125, + "learning_rate": 0.001271631569754887, + "loss": 1.5466, + "step": 6281 + }, + { + "epoch": 0.6626582278481012, + "grad_norm": 0.6192284226417542, + "learning_rate": 0.0012698678513276987, + "loss": 1.5831, + "step": 6282 + }, + { + "epoch": 0.6627637130801688, + "grad_norm": 0.4934277832508087, + "learning_rate": 0.0012681065791300351, + "loss": 1.5746, + "step": 6283 + }, + { + "epoch": 0.6628691983122363, + "grad_norm": 0.6882602572441101, + "learning_rate": 0.0012663477497690421, + "loss": 1.6384, + "step": 6284 + }, + { + "epoch": 0.6629746835443038, + "grad_norm": 0.5981414914131165, + "learning_rate": 0.0012645913598565719, + "loss": 1.6155, + "step": 6285 + }, + { + "epoch": 0.6630801687763713, + "grad_norm": 0.6074311137199402, + "learning_rate": 0.0012628374060091757, + "loss": 1.5985, + "step": 6286 + }, + { + "epoch": 0.6631856540084389, + "grad_norm": 0.6158648133277893, + "learning_rate": 0.0012610858848480973, + "loss": 1.6191, + "step": 6287 + }, + { + "epoch": 0.6632911392405063, + "grad_norm": 0.5439046025276184, + "learning_rate": 0.0012593367929992667, + "loss": 1.6113, + "step": 6288 + }, + { + "epoch": 0.6633966244725739, + "grad_norm": 0.576534628868103, + "learning_rate": 0.0012575901270932943, + "loss": 1.6132, + "step": 6289 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.6843683123588562, + "learning_rate": 0.001255845883765463, + "loss": 1.5776, + "step": 6290 + }, + { + "epoch": 0.6636075949367088, + "grad_norm": 0.4963736832141876, + "learning_rate": 0.0012541040596557229, + "loss": 1.5631, + "step": 6291 + }, + { + "epoch": 0.6637130801687764, + "grad_norm": 0.5085331201553345, + "learning_rate": 0.001252364651408684, + "loss": 1.6155, + "step": 6292 + }, + { + "epoch": 0.6638185654008438, + "grad_norm": 0.6039493680000305, + "learning_rate": 0.001250627655673611, + "loss": 1.5934, + "step": 6293 + }, + { + "epoch": 0.6639240506329114, + "grad_norm": 0.530308723449707, + "learning_rate": 0.0012488930691044145, + "loss": 1.6114, + "step": 6294 + }, + { + "epoch": 0.6640295358649789, + "grad_norm": 0.4819502532482147, + "learning_rate": 0.0012471608883596475, + "loss": 1.5915, + "step": 6295 + }, + { + "epoch": 0.6641350210970464, + "grad_norm": 0.5258892774581909, + "learning_rate": 0.0012454311101024967, + "loss": 1.5958, + "step": 6296 + }, + { + "epoch": 0.6642405063291139, + "grad_norm": 0.5121935606002808, + "learning_rate": 0.0012437037310007774, + "loss": 1.5899, + "step": 6297 + }, + { + "epoch": 0.6643459915611815, + "grad_norm": 0.4811895489692688, + "learning_rate": 0.0012419787477269257, + "loss": 1.6169, + "step": 6298 + }, + { + "epoch": 0.6644514767932489, + "grad_norm": 0.5116422176361084, + "learning_rate": 0.0012402561569579936, + "loss": 1.5668, + "step": 6299 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.4468708634376526, + "learning_rate": 0.001238535955375642, + "loss": 1.5695, + "step": 6300 + }, + { + "epoch": 0.664662447257384, + "grad_norm": 0.5267335772514343, + "learning_rate": 0.001236818139666134, + "loss": 1.5943, + "step": 6301 + }, + { + "epoch": 0.6647679324894514, + "grad_norm": 0.5234312415122986, + "learning_rate": 0.0012351027065203286, + "loss": 1.5761, + "step": 6302 + }, + { + "epoch": 0.664873417721519, + "grad_norm": 0.4723226726055145, + "learning_rate": 0.001233389652633675, + "loss": 1.5605, + "step": 6303 + }, + { + "epoch": 0.6649789029535865, + "grad_norm": 0.4188882112503052, + "learning_rate": 0.001231678974706205, + "loss": 1.5742, + "step": 6304 + }, + { + "epoch": 0.665084388185654, + "grad_norm": 0.5146017074584961, + "learning_rate": 0.0012299706694425285, + "loss": 1.5992, + "step": 6305 + }, + { + "epoch": 0.6651898734177215, + "grad_norm": 0.45604467391967773, + "learning_rate": 0.0012282647335518245, + "loss": 1.5687, + "step": 6306 + }, + { + "epoch": 0.6652953586497891, + "grad_norm": 0.4664759933948517, + "learning_rate": 0.0012265611637478377, + "loss": 1.6354, + "step": 6307 + }, + { + "epoch": 0.6654008438818565, + "grad_norm": 0.4594112038612366, + "learning_rate": 0.0012248599567488698, + "loss": 1.601, + "step": 6308 + }, + { + "epoch": 0.665506329113924, + "grad_norm": 0.41812050342559814, + "learning_rate": 0.0012231611092777745, + "loss": 1.5699, + "step": 6309 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.5122634768486023, + "learning_rate": 0.0012214646180619506, + "loss": 1.6065, + "step": 6310 + }, + { + "epoch": 0.665717299578059, + "grad_norm": 0.4882543683052063, + "learning_rate": 0.0012197704798333365, + "loss": 1.6161, + "step": 6311 + }, + { + "epoch": 0.6658227848101266, + "grad_norm": 0.48394256830215454, + "learning_rate": 0.0012180786913284026, + "loss": 1.5558, + "step": 6312 + }, + { + "epoch": 0.6659282700421941, + "grad_norm": 0.545910656452179, + "learning_rate": 0.001216389249288146, + "loss": 1.5944, + "step": 6313 + }, + { + "epoch": 0.6660337552742616, + "grad_norm": 0.46415403485298157, + "learning_rate": 0.0012147021504580842, + "loss": 1.5633, + "step": 6314 + }, + { + "epoch": 0.6661392405063291, + "grad_norm": 0.5876246094703674, + "learning_rate": 0.0012130173915882478, + "loss": 1.57, + "step": 6315 + }, + { + "epoch": 0.6662447257383967, + "grad_norm": 0.5909762978553772, + "learning_rate": 0.0012113349694331762, + "loss": 1.5887, + "step": 6316 + }, + { + "epoch": 0.6663502109704641, + "grad_norm": 0.5569766163825989, + "learning_rate": 0.0012096548807519092, + "loss": 1.6191, + "step": 6317 + }, + { + "epoch": 0.6664556962025316, + "grad_norm": 0.6135898232460022, + "learning_rate": 0.0012079771223079822, + "loss": 1.5971, + "step": 6318 + }, + { + "epoch": 0.6665611814345992, + "grad_norm": 0.5652821660041809, + "learning_rate": 0.0012063016908694193, + "loss": 1.6093, + "step": 6319 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5761544704437256, + "learning_rate": 0.001204628583208727, + "loss": 1.5671, + "step": 6320 + }, + { + "epoch": 0.6667721518987342, + "grad_norm": 0.6448853015899658, + "learning_rate": 0.0012029577961028893, + "loss": 1.603, + "step": 6321 + }, + { + "epoch": 0.6668776371308017, + "grad_norm": 0.6403090357780457, + "learning_rate": 0.0012012893263333587, + "loss": 1.5955, + "step": 6322 + }, + { + "epoch": 0.6669831223628692, + "grad_norm": 0.5241579413414001, + "learning_rate": 0.0011996231706860535, + "loss": 1.5721, + "step": 6323 + }, + { + "epoch": 0.6670886075949367, + "grad_norm": 0.6040076017379761, + "learning_rate": 0.0011979593259513486, + "loss": 1.6376, + "step": 6324 + }, + { + "epoch": 0.6671940928270043, + "grad_norm": 0.5078284740447998, + "learning_rate": 0.0011962977889240713, + "loss": 1.6035, + "step": 6325 + }, + { + "epoch": 0.6672995780590717, + "grad_norm": 0.5907483696937561, + "learning_rate": 0.001194638556403494, + "loss": 1.5757, + "step": 6326 + }, + { + "epoch": 0.6674050632911392, + "grad_norm": 0.5162091851234436, + "learning_rate": 0.0011929816251933286, + "loss": 1.5983, + "step": 6327 + }, + { + "epoch": 0.6675105485232068, + "grad_norm": 0.5676397085189819, + "learning_rate": 0.0011913269921017202, + "loss": 1.5361, + "step": 6328 + }, + { + "epoch": 0.6676160337552742, + "grad_norm": 0.4341263175010681, + "learning_rate": 0.0011896746539412405, + "loss": 1.5804, + "step": 6329 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.4882994592189789, + "learning_rate": 0.0011880246075288827, + "loss": 1.6195, + "step": 6330 + }, + { + "epoch": 0.6678270042194093, + "grad_norm": 0.4783797562122345, + "learning_rate": 0.001186376849686054, + "loss": 1.5526, + "step": 6331 + }, + { + "epoch": 0.6679324894514768, + "grad_norm": 0.5487940907478333, + "learning_rate": 0.0011847313772385714, + "loss": 1.6263, + "step": 6332 + }, + { + "epoch": 0.6680379746835443, + "grad_norm": 0.5111870169639587, + "learning_rate": 0.0011830881870166531, + "loss": 1.6047, + "step": 6333 + }, + { + "epoch": 0.6681434599156119, + "grad_norm": 0.5094480514526367, + "learning_rate": 0.0011814472758549144, + "loss": 1.5745, + "step": 6334 + }, + { + "epoch": 0.6682489451476793, + "grad_norm": 0.48153698444366455, + "learning_rate": 0.0011798086405923607, + "loss": 1.561, + "step": 6335 + }, + { + "epoch": 0.6683544303797468, + "grad_norm": 0.5202271342277527, + "learning_rate": 0.0011781722780723819, + "loss": 1.6094, + "step": 6336 + }, + { + "epoch": 0.6684599156118144, + "grad_norm": 0.6091830730438232, + "learning_rate": 0.0011765381851427457, + "loss": 1.5831, + "step": 6337 + }, + { + "epoch": 0.6685654008438818, + "grad_norm": 0.49341899156570435, + "learning_rate": 0.0011749063586555919, + "loss": 1.5811, + "step": 6338 + }, + { + "epoch": 0.6686708860759494, + "grad_norm": 0.617054283618927, + "learning_rate": 0.0011732767954674265, + "loss": 1.6074, + "step": 6339 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.48009762167930603, + "learning_rate": 0.001171649492439115, + "loss": 1.5744, + "step": 6340 + }, + { + "epoch": 0.6688818565400844, + "grad_norm": 0.5383430123329163, + "learning_rate": 0.0011700244464358776, + "loss": 1.5833, + "step": 6341 + }, + { + "epoch": 0.6689873417721519, + "grad_norm": 0.457782119512558, + "learning_rate": 0.0011684016543272815, + "loss": 1.6314, + "step": 6342 + }, + { + "epoch": 0.6690928270042195, + "grad_norm": 0.4396111071109772, + "learning_rate": 0.0011667811129872365, + "loss": 1.5998, + "step": 6343 + }, + { + "epoch": 0.6691983122362869, + "grad_norm": 0.4802881181240082, + "learning_rate": 0.0011651628192939872, + "loss": 1.5782, + "step": 6344 + }, + { + "epoch": 0.6693037974683544, + "grad_norm": 0.4975625276565552, + "learning_rate": 0.001163546770130109, + "loss": 1.5992, + "step": 6345 + }, + { + "epoch": 0.669409282700422, + "grad_norm": 0.49034079909324646, + "learning_rate": 0.0011619329623825006, + "loss": 1.6007, + "step": 6346 + }, + { + "epoch": 0.6695147679324894, + "grad_norm": 0.5298082232475281, + "learning_rate": 0.0011603213929423785, + "loss": 1.5753, + "step": 6347 + }, + { + "epoch": 0.669620253164557, + "grad_norm": 0.519305944442749, + "learning_rate": 0.001158712058705271, + "loss": 1.5663, + "step": 6348 + }, + { + "epoch": 0.6697257383966245, + "grad_norm": 0.5319370627403259, + "learning_rate": 0.0011571049565710122, + "loss": 1.5674, + "step": 6349 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.5216324925422668, + "learning_rate": 0.0011555000834437364, + "loss": 1.5919, + "step": 6350 + }, + { + "epoch": 0.6699367088607595, + "grad_norm": 0.5723080039024353, + "learning_rate": 0.0011538974362318712, + "loss": 1.5839, + "step": 6351 + }, + { + "epoch": 0.6700421940928271, + "grad_norm": 0.4261742830276489, + "learning_rate": 0.0011522970118481325, + "loss": 1.5743, + "step": 6352 + }, + { + "epoch": 0.6701476793248945, + "grad_norm": 0.5841290354728699, + "learning_rate": 0.0011506988072095183, + "loss": 1.5658, + "step": 6353 + }, + { + "epoch": 0.670253164556962, + "grad_norm": 0.5021260976791382, + "learning_rate": 0.0011491028192373023, + "loss": 1.628, + "step": 6354 + }, + { + "epoch": 0.6703586497890295, + "grad_norm": 0.639585018157959, + "learning_rate": 0.0011475090448570281, + "loss": 1.5459, + "step": 6355 + }, + { + "epoch": 0.670464135021097, + "grad_norm": 0.6755632162094116, + "learning_rate": 0.0011459174809985047, + "loss": 1.581, + "step": 6356 + }, + { + "epoch": 0.6705696202531646, + "grad_norm": 0.4771636426448822, + "learning_rate": 0.0011443281245957977, + "loss": 1.5929, + "step": 6357 + }, + { + "epoch": 0.670675105485232, + "grad_norm": 0.6079390645027161, + "learning_rate": 0.0011427409725872262, + "loss": 1.621, + "step": 6358 + }, + { + "epoch": 0.6707805907172996, + "grad_norm": 0.587040901184082, + "learning_rate": 0.001141156021915355, + "loss": 1.5891, + "step": 6359 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.6071764826774597, + "learning_rate": 0.0011395732695269908, + "loss": 1.6022, + "step": 6360 + }, + { + "epoch": 0.6709915611814345, + "grad_norm": 0.5442952513694763, + "learning_rate": 0.0011379927123731737, + "loss": 1.5671, + "step": 6361 + }, + { + "epoch": 0.6710970464135021, + "grad_norm": 0.5130666494369507, + "learning_rate": 0.0011364143474091727, + "loss": 1.5919, + "step": 6362 + }, + { + "epoch": 0.6712025316455696, + "grad_norm": 0.6559984087944031, + "learning_rate": 0.0011348381715944804, + "loss": 1.5961, + "step": 6363 + }, + { + "epoch": 0.6713080168776371, + "grad_norm": 0.5327663421630859, + "learning_rate": 0.0011332641818928063, + "loss": 1.5638, + "step": 6364 + }, + { + "epoch": 0.6714135021097046, + "grad_norm": 0.6795653104782104, + "learning_rate": 0.001131692375272071, + "loss": 1.5515, + "step": 6365 + }, + { + "epoch": 0.6715189873417722, + "grad_norm": 0.49332743883132935, + "learning_rate": 0.0011301227487044005, + "loss": 1.5454, + "step": 6366 + }, + { + "epoch": 0.6716244725738396, + "grad_norm": 0.6678374409675598, + "learning_rate": 0.0011285552991661203, + "loss": 1.5252, + "step": 6367 + }, + { + "epoch": 0.6717299578059072, + "grad_norm": 0.5235859155654907, + "learning_rate": 0.00112699002363775, + "loss": 1.5745, + "step": 6368 + }, + { + "epoch": 0.6718354430379747, + "grad_norm": 0.6558351516723633, + "learning_rate": 0.001125426919103997, + "loss": 1.5968, + "step": 6369 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.5537667870521545, + "learning_rate": 0.0011238659825537505, + "loss": 1.5846, + "step": 6370 + }, + { + "epoch": 0.6720464135021097, + "grad_norm": 0.5901890397071838, + "learning_rate": 0.0011223072109800768, + "loss": 1.5461, + "step": 6371 + }, + { + "epoch": 0.6721518987341772, + "grad_norm": 0.6574958562850952, + "learning_rate": 0.0011207506013802117, + "loss": 1.5991, + "step": 6372 + }, + { + "epoch": 0.6722573839662447, + "grad_norm": 0.5097714066505432, + "learning_rate": 0.0011191961507555567, + "loss": 1.5982, + "step": 6373 + }, + { + "epoch": 0.6723628691983122, + "grad_norm": 0.6988516449928284, + "learning_rate": 0.0011176438561116713, + "loss": 1.5623, + "step": 6374 + }, + { + "epoch": 0.6724683544303798, + "grad_norm": 0.4968533217906952, + "learning_rate": 0.0011160937144582695, + "loss": 1.546, + "step": 6375 + }, + { + "epoch": 0.6725738396624472, + "grad_norm": 0.7256531715393066, + "learning_rate": 0.0011145457228092116, + "loss": 1.5632, + "step": 6376 + }, + { + "epoch": 0.6726793248945148, + "grad_norm": 0.5300629138946533, + "learning_rate": 0.0011129998781824997, + "loss": 1.5966, + "step": 6377 + }, + { + "epoch": 0.6727848101265823, + "grad_norm": 0.6823804974555969, + "learning_rate": 0.0011114561776002726, + "loss": 1.5763, + "step": 6378 + }, + { + "epoch": 0.6728902953586497, + "grad_norm": 0.7479099631309509, + "learning_rate": 0.001109914618088799, + "loss": 1.5844, + "step": 6379 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.4734697937965393, + "learning_rate": 0.0011083751966784717, + "loss": 1.5715, + "step": 6380 + }, + { + "epoch": 0.6731012658227848, + "grad_norm": 0.8295102715492249, + "learning_rate": 0.0011068379104038023, + "loss": 1.5896, + "step": 6381 + }, + { + "epoch": 0.6732067510548523, + "grad_norm": 0.4697086811065674, + "learning_rate": 0.0011053027563034162, + "loss": 1.5633, + "step": 6382 + }, + { + "epoch": 0.6733122362869198, + "grad_norm": 0.622559666633606, + "learning_rate": 0.001103769731420045, + "loss": 1.6436, + "step": 6383 + }, + { + "epoch": 0.6734177215189874, + "grad_norm": 0.45600736141204834, + "learning_rate": 0.0011022388328005232, + "loss": 1.576, + "step": 6384 + }, + { + "epoch": 0.6735232067510548, + "grad_norm": 0.573523998260498, + "learning_rate": 0.0011007100574957802, + "loss": 1.6194, + "step": 6385 + }, + { + "epoch": 0.6736286919831224, + "grad_norm": 0.44580280780792236, + "learning_rate": 0.0010991834025608363, + "loss": 1.6139, + "step": 6386 + }, + { + "epoch": 0.6737341772151899, + "grad_norm": 0.6450295448303223, + "learning_rate": 0.001097658865054796, + "loss": 1.592, + "step": 6387 + }, + { + "epoch": 0.6738396624472573, + "grad_norm": 0.5917106866836548, + "learning_rate": 0.001096136442040843, + "loss": 1.5868, + "step": 6388 + }, + { + "epoch": 0.6739451476793249, + "grad_norm": 0.6550825238227844, + "learning_rate": 0.0010946161305862348, + "loss": 1.5981, + "step": 6389 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.8302057385444641, + "learning_rate": 0.0010930979277622953, + "loss": 1.5301, + "step": 6390 + }, + { + "epoch": 0.6741561181434599, + "grad_norm": 0.43817853927612305, + "learning_rate": 0.0010915818306444112, + "loss": 1.5929, + "step": 6391 + }, + { + "epoch": 0.6742616033755274, + "grad_norm": 0.5853847861289978, + "learning_rate": 0.0010900678363120256, + "loss": 1.6225, + "step": 6392 + }, + { + "epoch": 0.674367088607595, + "grad_norm": 0.4759158492088318, + "learning_rate": 0.001088555941848632, + "loss": 1.5829, + "step": 6393 + }, + { + "epoch": 0.6744725738396624, + "grad_norm": 0.5427954196929932, + "learning_rate": 0.0010870461443417694, + "loss": 1.5835, + "step": 6394 + }, + { + "epoch": 0.67457805907173, + "grad_norm": 0.45315021276474, + "learning_rate": 0.001085538440883016, + "loss": 1.574, + "step": 6395 + }, + { + "epoch": 0.6746835443037975, + "grad_norm": 0.5170419812202454, + "learning_rate": 0.0010840328285679837, + "loss": 1.5532, + "step": 6396 + }, + { + "epoch": 0.674789029535865, + "grad_norm": 0.5563789010047913, + "learning_rate": 0.0010825293044963132, + "loss": 1.6181, + "step": 6397 + }, + { + "epoch": 0.6748945147679325, + "grad_norm": 0.5063940286636353, + "learning_rate": 0.001081027865771668, + "loss": 1.5592, + "step": 6398 + }, + { + "epoch": 0.675, + "grad_norm": 0.47441422939300537, + "learning_rate": 0.001079528509501728, + "loss": 1.5673, + "step": 6399 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.559227705001831, + "learning_rate": 0.0010780312327981854, + "loss": 1.6173, + "step": 6400 + }, + { + "epoch": 0.675210970464135, + "grad_norm": 0.5507892370223999, + "learning_rate": 0.001076536032776738, + "loss": 1.5578, + "step": 6401 + }, + { + "epoch": 0.6753164556962026, + "grad_norm": 0.4988034665584564, + "learning_rate": 0.0010750429065570842, + "loss": 1.5814, + "step": 6402 + }, + { + "epoch": 0.67542194092827, + "grad_norm": 0.5726801156997681, + "learning_rate": 0.0010735518512629172, + "loss": 1.6166, + "step": 6403 + }, + { + "epoch": 0.6755274261603376, + "grad_norm": 0.5148823261260986, + "learning_rate": 0.00107206286402192, + "loss": 1.6174, + "step": 6404 + }, + { + "epoch": 0.6756329113924051, + "grad_norm": 0.5496354699134827, + "learning_rate": 0.0010705759419657585, + "loss": 1.5994, + "step": 6405 + }, + { + "epoch": 0.6757383966244725, + "grad_norm": 0.5202333331108093, + "learning_rate": 0.0010690910822300777, + "loss": 1.5915, + "step": 6406 + }, + { + "epoch": 0.6758438818565401, + "grad_norm": 0.5268917679786682, + "learning_rate": 0.0010676082819544952, + "loss": 1.57, + "step": 6407 + }, + { + "epoch": 0.6759493670886076, + "grad_norm": 0.48678407073020935, + "learning_rate": 0.0010661275382825958, + "loss": 1.5684, + "step": 6408 + }, + { + "epoch": 0.6760548523206751, + "grad_norm": 0.5924727916717529, + "learning_rate": 0.0010646488483619261, + "loss": 1.5661, + "step": 6409 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.6849046349525452, + "learning_rate": 0.0010631722093439888, + "loss": 1.5561, + "step": 6410 + }, + { + "epoch": 0.6762658227848102, + "grad_norm": 0.5055809617042542, + "learning_rate": 0.0010616976183842378, + "loss": 1.5934, + "step": 6411 + }, + { + "epoch": 0.6763713080168776, + "grad_norm": 0.594955325126648, + "learning_rate": 0.001060225072642072, + "loss": 1.5516, + "step": 6412 + }, + { + "epoch": 0.6764767932489452, + "grad_norm": 0.4963608384132385, + "learning_rate": 0.0010587545692808302, + "loss": 1.5613, + "step": 6413 + }, + { + "epoch": 0.6765822784810127, + "grad_norm": 0.5594482421875, + "learning_rate": 0.0010572861054677853, + "loss": 1.5496, + "step": 6414 + }, + { + "epoch": 0.6766877637130801, + "grad_norm": 0.42274507880210876, + "learning_rate": 0.0010558196783741396, + "loss": 1.5883, + "step": 6415 + }, + { + "epoch": 0.6767932489451477, + "grad_norm": 0.572669267654419, + "learning_rate": 0.0010543552851750187, + "loss": 1.5471, + "step": 6416 + }, + { + "epoch": 0.6768987341772152, + "grad_norm": 0.4971157908439636, + "learning_rate": 0.001052892923049466, + "loss": 1.5903, + "step": 6417 + }, + { + "epoch": 0.6770042194092827, + "grad_norm": 0.7142729163169861, + "learning_rate": 0.0010514325891804379, + "loss": 1.5983, + "step": 6418 + }, + { + "epoch": 0.6771097046413502, + "grad_norm": 0.5082519054412842, + "learning_rate": 0.0010499742807547976, + "loss": 1.5506, + "step": 6419 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.645077645778656, + "learning_rate": 0.00104851799496331, + "loss": 1.5915, + "step": 6420 + }, + { + "epoch": 0.6773206751054852, + "grad_norm": 0.5200485587120056, + "learning_rate": 0.0010470637290006365, + "loss": 1.5817, + "step": 6421 + }, + { + "epoch": 0.6774261603375528, + "grad_norm": 0.6938650608062744, + "learning_rate": 0.00104561148006533, + "loss": 1.5685, + "step": 6422 + }, + { + "epoch": 0.6775316455696202, + "grad_norm": 0.5358496308326721, + "learning_rate": 0.0010441612453598276, + "loss": 1.5528, + "step": 6423 + }, + { + "epoch": 0.6776371308016877, + "grad_norm": 0.5471860766410828, + "learning_rate": 0.001042713022090448, + "loss": 1.6125, + "step": 6424 + }, + { + "epoch": 0.6777426160337553, + "grad_norm": 0.5146363973617554, + "learning_rate": 0.0010412668074673832, + "loss": 1.6071, + "step": 6425 + }, + { + "epoch": 0.6778481012658227, + "grad_norm": 0.5841184258460999, + "learning_rate": 0.0010398225987046958, + "loss": 1.5716, + "step": 6426 + }, + { + "epoch": 0.6779535864978903, + "grad_norm": 0.5139831900596619, + "learning_rate": 0.001038380393020312, + "loss": 1.5923, + "step": 6427 + }, + { + "epoch": 0.6780590717299578, + "grad_norm": 0.5693835020065308, + "learning_rate": 0.0010369401876360166, + "loss": 1.5781, + "step": 6428 + }, + { + "epoch": 0.6781645569620253, + "grad_norm": 0.4554748833179474, + "learning_rate": 0.0010355019797774478, + "loss": 1.5312, + "step": 6429 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.4807449281215668, + "learning_rate": 0.0010340657666740914, + "loss": 1.5943, + "step": 6430 + }, + { + "epoch": 0.6783755274261604, + "grad_norm": 0.4526333510875702, + "learning_rate": 0.0010326315455592766, + "loss": 1.5456, + "step": 6431 + }, + { + "epoch": 0.6784810126582278, + "grad_norm": 0.5008558034896851, + "learning_rate": 0.001031199313670169, + "loss": 1.5741, + "step": 6432 + }, + { + "epoch": 0.6785864978902953, + "grad_norm": 0.5095291137695312, + "learning_rate": 0.0010297690682477669, + "loss": 1.5431, + "step": 6433 + }, + { + "epoch": 0.6786919831223629, + "grad_norm": 0.4715951681137085, + "learning_rate": 0.0010283408065368948, + "loss": 1.5626, + "step": 6434 + }, + { + "epoch": 0.6787974683544303, + "grad_norm": 0.5276069045066833, + "learning_rate": 0.0010269145257861987, + "loss": 1.5411, + "step": 6435 + }, + { + "epoch": 0.6789029535864979, + "grad_norm": 0.4964304268360138, + "learning_rate": 0.0010254902232481407, + "loss": 1.5656, + "step": 6436 + }, + { + "epoch": 0.6790084388185654, + "grad_norm": 0.574787437915802, + "learning_rate": 0.0010240678961789937, + "loss": 1.5609, + "step": 6437 + }, + { + "epoch": 0.6791139240506329, + "grad_norm": 0.6316975355148315, + "learning_rate": 0.001022647541838836, + "loss": 1.5743, + "step": 6438 + }, + { + "epoch": 0.6792194092827004, + "grad_norm": 0.5257666110992432, + "learning_rate": 0.001021229157491546, + "loss": 1.5655, + "step": 6439 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.49049559235572815, + "learning_rate": 0.0010198127404047975, + "loss": 1.5776, + "step": 6440 + }, + { + "epoch": 0.6794303797468354, + "grad_norm": 0.46640104055404663, + "learning_rate": 0.001018398287850053, + "loss": 1.5996, + "step": 6441 + }, + { + "epoch": 0.679535864978903, + "grad_norm": 0.46320921182632446, + "learning_rate": 0.0010169857971025606, + "loss": 1.5284, + "step": 6442 + }, + { + "epoch": 0.6796413502109705, + "grad_norm": 0.5223776698112488, + "learning_rate": 0.0010155752654413468, + "loss": 1.5752, + "step": 6443 + }, + { + "epoch": 0.6797468354430379, + "grad_norm": 0.43690183758735657, + "learning_rate": 0.0010141666901492116, + "loss": 1.5857, + "step": 6444 + }, + { + "epoch": 0.6798523206751055, + "grad_norm": 0.5709619522094727, + "learning_rate": 0.0010127600685127247, + "loss": 1.5694, + "step": 6445 + }, + { + "epoch": 0.679957805907173, + "grad_norm": 0.42933282256126404, + "learning_rate": 0.0010113553978222192, + "loss": 1.5595, + "step": 6446 + }, + { + "epoch": 0.6800632911392405, + "grad_norm": 0.5185548663139343, + "learning_rate": 0.0010099526753717856, + "loss": 1.5642, + "step": 6447 + }, + { + "epoch": 0.680168776371308, + "grad_norm": 0.5778734087944031, + "learning_rate": 0.0010085518984592678, + "loss": 1.5457, + "step": 6448 + }, + { + "epoch": 0.6802742616033756, + "grad_norm": 0.5577317476272583, + "learning_rate": 0.0010071530643862578, + "loss": 1.5403, + "step": 6449 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.6061927676200867, + "learning_rate": 0.0010057561704580897, + "loss": 1.5824, + "step": 6450 + }, + { + "epoch": 0.6804852320675105, + "grad_norm": 0.5703639984130859, + "learning_rate": 0.001004361213983836, + "loss": 1.5662, + "step": 6451 + }, + { + "epoch": 0.6805907172995781, + "grad_norm": 0.49401646852493286, + "learning_rate": 0.0010029681922762998, + "loss": 1.6089, + "step": 6452 + }, + { + "epoch": 0.6806962025316455, + "grad_norm": 0.5242007374763489, + "learning_rate": 0.0010015771026520132, + "loss": 1.5994, + "step": 6453 + }, + { + "epoch": 0.6808016877637131, + "grad_norm": 0.47871851921081543, + "learning_rate": 0.0010001879424312286, + "loss": 1.5509, + "step": 6454 + }, + { + "epoch": 0.6809071729957806, + "grad_norm": 0.5109929442405701, + "learning_rate": 0.0009988007089379162, + "loss": 1.5669, + "step": 6455 + }, + { + "epoch": 0.6810126582278481, + "grad_norm": 0.4957762658596039, + "learning_rate": 0.000997415399499757, + "loss": 1.5608, + "step": 6456 + }, + { + "epoch": 0.6811181434599156, + "grad_norm": 0.4794304072856903, + "learning_rate": 0.000996032011448139, + "loss": 1.592, + "step": 6457 + }, + { + "epoch": 0.6812236286919832, + "grad_norm": 0.4826592803001404, + "learning_rate": 0.0009946505421181513, + "loss": 1.5718, + "step": 6458 + }, + { + "epoch": 0.6813291139240506, + "grad_norm": 0.46919921040534973, + "learning_rate": 0.000993270988848579, + "loss": 1.5934, + "step": 6459 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.5313397645950317, + "learning_rate": 0.0009918933489818985, + "loss": 1.5905, + "step": 6460 + }, + { + "epoch": 0.6815400843881857, + "grad_norm": 0.48957186937332153, + "learning_rate": 0.000990517619864272, + "loss": 1.5732, + "step": 6461 + }, + { + "epoch": 0.6816455696202531, + "grad_norm": 0.5460265278816223, + "learning_rate": 0.0009891437988455425, + "loss": 1.579, + "step": 6462 + }, + { + "epoch": 0.6817510548523207, + "grad_norm": 0.45310288667678833, + "learning_rate": 0.0009877718832792285, + "loss": 1.5617, + "step": 6463 + }, + { + "epoch": 0.6818565400843882, + "grad_norm": 0.43776223063468933, + "learning_rate": 0.0009864018705225196, + "loss": 1.5731, + "step": 6464 + }, + { + "epoch": 0.6819620253164557, + "grad_norm": 0.504557728767395, + "learning_rate": 0.0009850337579362701, + "loss": 1.5676, + "step": 6465 + }, + { + "epoch": 0.6820675105485232, + "grad_norm": 0.48897743225097656, + "learning_rate": 0.000983667542884996, + "loss": 1.5462, + "step": 6466 + }, + { + "epoch": 0.6821729957805908, + "grad_norm": 0.5264372825622559, + "learning_rate": 0.000982303222736867, + "loss": 1.5713, + "step": 6467 + }, + { + "epoch": 0.6822784810126582, + "grad_norm": 0.45343002676963806, + "learning_rate": 0.0009809407948637044, + "loss": 1.589, + "step": 6468 + }, + { + "epoch": 0.6823839662447257, + "grad_norm": 0.5581714510917664, + "learning_rate": 0.0009795802566409742, + "loss": 1.5558, + "step": 6469 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.5224825739860535, + "learning_rate": 0.0009782216054477827, + "loss": 1.6053, + "step": 6470 + }, + { + "epoch": 0.6825949367088607, + "grad_norm": 0.4501763880252838, + "learning_rate": 0.000976864838666871, + "loss": 1.5557, + "step": 6471 + }, + { + "epoch": 0.6827004219409283, + "grad_norm": 0.4748144745826721, + "learning_rate": 0.0009755099536846105, + "loss": 1.6209, + "step": 6472 + }, + { + "epoch": 0.6828059071729958, + "grad_norm": 0.5101902484893799, + "learning_rate": 0.0009741569478909979, + "loss": 1.5864, + "step": 6473 + }, + { + "epoch": 0.6829113924050633, + "grad_norm": 0.4839603006839752, + "learning_rate": 0.0009728058186796492, + "loss": 1.556, + "step": 6474 + }, + { + "epoch": 0.6830168776371308, + "grad_norm": 0.47969716787338257, + "learning_rate": 0.0009714565634477962, + "loss": 1.5887, + "step": 6475 + }, + { + "epoch": 0.6831223628691984, + "grad_norm": 0.43456801772117615, + "learning_rate": 0.00097010917959628, + "loss": 1.6041, + "step": 6476 + }, + { + "epoch": 0.6832278481012658, + "grad_norm": 0.4625198543071747, + "learning_rate": 0.0009687636645295469, + "loss": 1.5682, + "step": 6477 + }, + { + "epoch": 0.6833333333333333, + "grad_norm": 0.5103845596313477, + "learning_rate": 0.0009674200156556436, + "loss": 1.5815, + "step": 6478 + }, + { + "epoch": 0.6834388185654009, + "grad_norm": 0.6014232635498047, + "learning_rate": 0.0009660782303862109, + "loss": 1.5554, + "step": 6479 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.539760172367096, + "learning_rate": 0.0009647383061364801, + "loss": 1.5845, + "step": 6480 + }, + { + "epoch": 0.6836497890295359, + "grad_norm": 0.6342958807945251, + "learning_rate": 0.0009634002403252678, + "loss": 1.5832, + "step": 6481 + }, + { + "epoch": 0.6837552742616034, + "grad_norm": 0.5717540979385376, + "learning_rate": 0.00096206403037497, + "loss": 1.5361, + "step": 6482 + }, + { + "epoch": 0.6838607594936709, + "grad_norm": 0.5399227142333984, + "learning_rate": 0.000960729673711558, + "loss": 1.5326, + "step": 6483 + }, + { + "epoch": 0.6839662447257384, + "grad_norm": 0.5392372012138367, + "learning_rate": 0.0009593971677645735, + "loss": 1.604, + "step": 6484 + }, + { + "epoch": 0.6840717299578059, + "grad_norm": 0.5298331379890442, + "learning_rate": 0.0009580665099671228, + "loss": 1.5759, + "step": 6485 + }, + { + "epoch": 0.6841772151898734, + "grad_norm": 0.4869494140148163, + "learning_rate": 0.000956737697755873, + "loss": 1.5695, + "step": 6486 + }, + { + "epoch": 0.684282700421941, + "grad_norm": 0.5326877236366272, + "learning_rate": 0.0009554107285710461, + "loss": 1.5787, + "step": 6487 + }, + { + "epoch": 0.6843881856540084, + "grad_norm": 0.5505571365356445, + "learning_rate": 0.0009540855998564147, + "loss": 1.607, + "step": 6488 + }, + { + "epoch": 0.6844936708860759, + "grad_norm": 0.5273861289024353, + "learning_rate": 0.0009527623090592963, + "loss": 1.5825, + "step": 6489 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.5127564072608948, + "learning_rate": 0.0009514408536305495, + "loss": 1.5864, + "step": 6490 + }, + { + "epoch": 0.6847046413502109, + "grad_norm": 0.5200434327125549, + "learning_rate": 0.0009501212310245682, + "loss": 1.5992, + "step": 6491 + }, + { + "epoch": 0.6848101265822785, + "grad_norm": 0.5136992931365967, + "learning_rate": 0.0009488034386992771, + "loss": 1.6061, + "step": 6492 + }, + { + "epoch": 0.684915611814346, + "grad_norm": 0.4220972955226898, + "learning_rate": 0.0009474874741161266, + "loss": 1.5787, + "step": 6493 + }, + { + "epoch": 0.6850210970464135, + "grad_norm": 0.46680107712745667, + "learning_rate": 0.0009461733347400879, + "loss": 1.5614, + "step": 6494 + }, + { + "epoch": 0.685126582278481, + "grad_norm": 0.4657742381095886, + "learning_rate": 0.0009448610180396485, + "loss": 1.6051, + "step": 6495 + }, + { + "epoch": 0.6852320675105485, + "grad_norm": 0.4843524694442749, + "learning_rate": 0.0009435505214868068, + "loss": 1.582, + "step": 6496 + }, + { + "epoch": 0.685337552742616, + "grad_norm": 0.45350903272628784, + "learning_rate": 0.0009422418425570675, + "loss": 1.6047, + "step": 6497 + }, + { + "epoch": 0.6854430379746835, + "grad_norm": 0.47634002566337585, + "learning_rate": 0.000940934978729437, + "loss": 1.565, + "step": 6498 + }, + { + "epoch": 0.6855485232067511, + "grad_norm": 0.46629229187965393, + "learning_rate": 0.0009396299274864177, + "loss": 1.6145, + "step": 6499 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.5664366483688354, + "learning_rate": 0.0009383266863140042, + "loss": 1.5605, + "step": 6500 + }, + { + "epoch": 0.6857594936708861, + "grad_norm": 0.4939168691635132, + "learning_rate": 0.0009370252527016777, + "loss": 1.5816, + "step": 6501 + }, + { + "epoch": 0.6858649789029536, + "grad_norm": 0.5229809880256653, + "learning_rate": 0.0009357256241424013, + "loss": 1.5639, + "step": 6502 + }, + { + "epoch": 0.685970464135021, + "grad_norm": 0.49933311343193054, + "learning_rate": 0.0009344277981326158, + "loss": 1.6144, + "step": 6503 + }, + { + "epoch": 0.6860759493670886, + "grad_norm": 0.5490395426750183, + "learning_rate": 0.0009331317721722339, + "loss": 1.6002, + "step": 6504 + }, + { + "epoch": 0.6861814345991561, + "grad_norm": 0.4353964626789093, + "learning_rate": 0.0009318375437646361, + "loss": 1.5893, + "step": 6505 + }, + { + "epoch": 0.6862869198312236, + "grad_norm": 0.5495443940162659, + "learning_rate": 0.0009305451104166652, + "loss": 1.6009, + "step": 6506 + }, + { + "epoch": 0.6863924050632911, + "grad_norm": 0.4636639952659607, + "learning_rate": 0.0009292544696386228, + "loss": 1.5774, + "step": 6507 + }, + { + "epoch": 0.6864978902953587, + "grad_norm": 0.6736634373664856, + "learning_rate": 0.0009279656189442628, + "loss": 1.5518, + "step": 6508 + }, + { + "epoch": 0.6866033755274261, + "grad_norm": 0.5159350633621216, + "learning_rate": 0.0009266785558507877, + "loss": 1.6017, + "step": 6509 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.501348614692688, + "learning_rate": 0.000925393277878844, + "loss": 1.5657, + "step": 6510 + }, + { + "epoch": 0.6868143459915612, + "grad_norm": 0.5065723061561584, + "learning_rate": 0.0009241097825525162, + "loss": 1.569, + "step": 6511 + }, + { + "epoch": 0.6869198312236287, + "grad_norm": 0.4902362525463104, + "learning_rate": 0.0009228280673993236, + "loss": 1.5852, + "step": 6512 + }, + { + "epoch": 0.6870253164556962, + "grad_norm": 0.46798020601272583, + "learning_rate": 0.0009215481299502144, + "loss": 1.5695, + "step": 6513 + }, + { + "epoch": 0.6871308016877637, + "grad_norm": 0.5281558036804199, + "learning_rate": 0.0009202699677395614, + "loss": 1.5829, + "step": 6514 + }, + { + "epoch": 0.6872362869198312, + "grad_norm": 0.5488610863685608, + "learning_rate": 0.000918993578305157, + "loss": 1.5587, + "step": 6515 + }, + { + "epoch": 0.6873417721518987, + "grad_norm": 0.5047544240951538, + "learning_rate": 0.0009177189591882088, + "loss": 1.5715, + "step": 6516 + }, + { + "epoch": 0.6874472573839663, + "grad_norm": 0.5690421462059021, + "learning_rate": 0.0009164461079333344, + "loss": 1.6138, + "step": 6517 + }, + { + "epoch": 0.6875527426160337, + "grad_norm": 0.5395167469978333, + "learning_rate": 0.0009151750220885573, + "loss": 1.5522, + "step": 6518 + }, + { + "epoch": 0.6876582278481013, + "grad_norm": 0.4937494695186615, + "learning_rate": 0.0009139056992053016, + "loss": 1.5303, + "step": 6519 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.6328351497650146, + "learning_rate": 0.0009126381368383879, + "loss": 1.5712, + "step": 6520 + }, + { + "epoch": 0.6878691983122363, + "grad_norm": 0.5024483799934387, + "learning_rate": 0.0009113723325460275, + "loss": 1.5727, + "step": 6521 + }, + { + "epoch": 0.6879746835443038, + "grad_norm": 0.44090771675109863, + "learning_rate": 0.000910108283889819, + "loss": 1.5399, + "step": 6522 + }, + { + "epoch": 0.6880801687763713, + "grad_norm": 0.5520035028457642, + "learning_rate": 0.0009088459884347425, + "loss": 1.5385, + "step": 6523 + }, + { + "epoch": 0.6881856540084388, + "grad_norm": 0.4542565643787384, + "learning_rate": 0.0009075854437491562, + "loss": 1.5719, + "step": 6524 + }, + { + "epoch": 0.6882911392405063, + "grad_norm": 0.51065993309021, + "learning_rate": 0.0009063266474047897, + "loss": 1.58, + "step": 6525 + }, + { + "epoch": 0.6883966244725739, + "grad_norm": 0.4812704920768738, + "learning_rate": 0.0009050695969767418, + "loss": 1.575, + "step": 6526 + }, + { + "epoch": 0.6885021097046413, + "grad_norm": 0.4445286989212036, + "learning_rate": 0.0009038142900434738, + "loss": 1.5785, + "step": 6527 + }, + { + "epoch": 0.6886075949367089, + "grad_norm": 0.4607788324356079, + "learning_rate": 0.0009025607241868057, + "loss": 1.5669, + "step": 6528 + }, + { + "epoch": 0.6887130801687764, + "grad_norm": 0.44700443744659424, + "learning_rate": 0.000901308896991912, + "loss": 1.5556, + "step": 6529 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.4597567021846771, + "learning_rate": 0.0009000588060473156, + "loss": 1.5587, + "step": 6530 + }, + { + "epoch": 0.6889240506329114, + "grad_norm": 0.480972021818161, + "learning_rate": 0.0008988104489448847, + "loss": 1.5182, + "step": 6531 + }, + { + "epoch": 0.689029535864979, + "grad_norm": 0.44952064752578735, + "learning_rate": 0.0008975638232798275, + "loss": 1.5398, + "step": 6532 + }, + { + "epoch": 0.6891350210970464, + "grad_norm": 0.5170565247535706, + "learning_rate": 0.0008963189266506873, + "loss": 1.5435, + "step": 6533 + }, + { + "epoch": 0.6892405063291139, + "grad_norm": 0.4291788935661316, + "learning_rate": 0.000895075756659338, + "loss": 1.5614, + "step": 6534 + }, + { + "epoch": 0.6893459915611815, + "grad_norm": 0.571100115776062, + "learning_rate": 0.0008938343109109804, + "loss": 1.606, + "step": 6535 + }, + { + "epoch": 0.6894514767932489, + "grad_norm": 0.4427125155925751, + "learning_rate": 0.0008925945870141361, + "loss": 1.5812, + "step": 6536 + }, + { + "epoch": 0.6895569620253165, + "grad_norm": 0.4918278455734253, + "learning_rate": 0.0008913565825806437, + "loss": 1.593, + "step": 6537 + }, + { + "epoch": 0.689662447257384, + "grad_norm": 0.4837626516819, + "learning_rate": 0.0008901202952256545, + "loss": 1.5812, + "step": 6538 + }, + { + "epoch": 0.6897679324894515, + "grad_norm": 0.4777262210845947, + "learning_rate": 0.000888885722567627, + "loss": 1.6016, + "step": 6539 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.4948436915874481, + "learning_rate": 0.0008876528622283235, + "loss": 1.5775, + "step": 6540 + }, + { + "epoch": 0.6899789029535865, + "grad_norm": 0.49163320660591125, + "learning_rate": 0.0008864217118328042, + "loss": 1.5581, + "step": 6541 + }, + { + "epoch": 0.690084388185654, + "grad_norm": 0.5227521657943726, + "learning_rate": 0.0008851922690094236, + "loss": 1.5612, + "step": 6542 + }, + { + "epoch": 0.6901898734177215, + "grad_norm": 0.5023651123046875, + "learning_rate": 0.0008839645313898255, + "loss": 1.5396, + "step": 6543 + }, + { + "epoch": 0.6902953586497891, + "grad_norm": 0.5832700133323669, + "learning_rate": 0.0008827384966089386, + "loss": 1.5889, + "step": 6544 + }, + { + "epoch": 0.6904008438818565, + "grad_norm": 0.5308181047439575, + "learning_rate": 0.0008815141623049723, + "loss": 1.5654, + "step": 6545 + }, + { + "epoch": 0.6905063291139241, + "grad_norm": 0.5272881984710693, + "learning_rate": 0.0008802915261194108, + "loss": 1.5759, + "step": 6546 + }, + { + "epoch": 0.6906118143459916, + "grad_norm": 0.5909549593925476, + "learning_rate": 0.00087907058569701, + "loss": 1.5925, + "step": 6547 + }, + { + "epoch": 0.690717299578059, + "grad_norm": 0.4635099768638611, + "learning_rate": 0.0008778513386857928, + "loss": 1.5697, + "step": 6548 + }, + { + "epoch": 0.6908227848101266, + "grad_norm": 0.5166774988174438, + "learning_rate": 0.0008766337827370438, + "loss": 1.5476, + "step": 6549 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.4826642870903015, + "learning_rate": 0.0008754179155053053, + "loss": 1.5229, + "step": 6550 + }, + { + "epoch": 0.6910337552742616, + "grad_norm": 0.4578114449977875, + "learning_rate": 0.0008742037346483729, + "loss": 1.5979, + "step": 6551 + }, + { + "epoch": 0.6911392405063291, + "grad_norm": 0.5516717433929443, + "learning_rate": 0.00087299123782729, + "loss": 1.5412, + "step": 6552 + }, + { + "epoch": 0.6912447257383966, + "grad_norm": 0.44047558307647705, + "learning_rate": 0.0008717804227063454, + "loss": 1.5782, + "step": 6553 + }, + { + "epoch": 0.6913502109704641, + "grad_norm": 0.46936869621276855, + "learning_rate": 0.0008705712869530661, + "loss": 1.5913, + "step": 6554 + }, + { + "epoch": 0.6914556962025317, + "grad_norm": 0.4681449234485626, + "learning_rate": 0.0008693638282382152, + "loss": 1.5446, + "step": 6555 + }, + { + "epoch": 0.6915611814345991, + "grad_norm": 0.5338939428329468, + "learning_rate": 0.0008681580442357857, + "loss": 1.5814, + "step": 6556 + }, + { + "epoch": 0.6916666666666667, + "grad_norm": 0.5230638384819031, + "learning_rate": 0.000866953932622997, + "loss": 1.5655, + "step": 6557 + }, + { + "epoch": 0.6917721518987342, + "grad_norm": 0.48675185441970825, + "learning_rate": 0.0008657514910802905, + "loss": 1.5576, + "step": 6558 + }, + { + "epoch": 0.6918776371308016, + "grad_norm": 0.5382813811302185, + "learning_rate": 0.000864550717291324, + "loss": 1.5421, + "step": 6559 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.5913761258125305, + "learning_rate": 0.0008633516089429683, + "loss": 1.5629, + "step": 6560 + }, + { + "epoch": 0.6920886075949367, + "grad_norm": 0.6232990622520447, + "learning_rate": 0.0008621541637253029, + "loss": 1.5848, + "step": 6561 + }, + { + "epoch": 0.6921940928270042, + "grad_norm": 0.4897494614124298, + "learning_rate": 0.0008609583793316104, + "loss": 1.5525, + "step": 6562 + }, + { + "epoch": 0.6922995780590717, + "grad_norm": 0.5203216075897217, + "learning_rate": 0.0008597642534583734, + "loss": 1.522, + "step": 6563 + }, + { + "epoch": 0.6924050632911393, + "grad_norm": 0.5382830500602722, + "learning_rate": 0.0008585717838052689, + "loss": 1.5407, + "step": 6564 + }, + { + "epoch": 0.6925105485232067, + "grad_norm": 0.5280312299728394, + "learning_rate": 0.0008573809680751646, + "loss": 1.5775, + "step": 6565 + }, + { + "epoch": 0.6926160337552743, + "grad_norm": 0.5594722628593445, + "learning_rate": 0.0008561918039741143, + "loss": 1.5636, + "step": 6566 + }, + { + "epoch": 0.6927215189873418, + "grad_norm": 0.4756142497062683, + "learning_rate": 0.0008550042892113534, + "loss": 1.5952, + "step": 6567 + }, + { + "epoch": 0.6928270042194092, + "grad_norm": 0.6093976497650146, + "learning_rate": 0.0008538184214992943, + "loss": 1.5593, + "step": 6568 + }, + { + "epoch": 0.6929324894514768, + "grad_norm": 0.46203407645225525, + "learning_rate": 0.0008526341985535229, + "loss": 1.5786, + "step": 6569 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.5525863766670227, + "learning_rate": 0.0008514516180927928, + "loss": 1.5517, + "step": 6570 + }, + { + "epoch": 0.6931434599156118, + "grad_norm": 0.49556243419647217, + "learning_rate": 0.0008502706778390216, + "loss": 1.5781, + "step": 6571 + }, + { + "epoch": 0.6932489451476793, + "grad_norm": 0.5265051126480103, + "learning_rate": 0.0008490913755172875, + "loss": 1.5707, + "step": 6572 + }, + { + "epoch": 0.6933544303797469, + "grad_norm": 0.5411252379417419, + "learning_rate": 0.0008479137088558226, + "loss": 1.5391, + "step": 6573 + }, + { + "epoch": 0.6934599156118143, + "grad_norm": 0.47814619541168213, + "learning_rate": 0.0008467376755860108, + "loss": 1.5752, + "step": 6574 + }, + { + "epoch": 0.6935654008438819, + "grad_norm": 0.47830256819725037, + "learning_rate": 0.0008455632734423824, + "loss": 1.5643, + "step": 6575 + }, + { + "epoch": 0.6936708860759494, + "grad_norm": 0.607549250125885, + "learning_rate": 0.0008443905001626097, + "loss": 1.5168, + "step": 6576 + }, + { + "epoch": 0.6937763713080168, + "grad_norm": 0.45923298597335815, + "learning_rate": 0.0008432193534875027, + "loss": 1.5424, + "step": 6577 + }, + { + "epoch": 0.6938818565400844, + "grad_norm": 0.7608394026756287, + "learning_rate": 0.0008420498311610049, + "loss": 1.5573, + "step": 6578 + }, + { + "epoch": 0.6939873417721519, + "grad_norm": 0.6764566898345947, + "learning_rate": 0.0008408819309301891, + "loss": 1.5478, + "step": 6579 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.5462896227836609, + "learning_rate": 0.0008397156505452524, + "loss": 1.5959, + "step": 6580 + }, + { + "epoch": 0.6941983122362869, + "grad_norm": 0.6780955195426941, + "learning_rate": 0.0008385509877595129, + "loss": 1.5372, + "step": 6581 + }, + { + "epoch": 0.6943037974683545, + "grad_norm": 0.5146687626838684, + "learning_rate": 0.0008373879403294043, + "loss": 1.5595, + "step": 6582 + }, + { + "epoch": 0.6944092827004219, + "grad_norm": 0.6291818022727966, + "learning_rate": 0.0008362265060144721, + "loss": 1.5619, + "step": 6583 + }, + { + "epoch": 0.6945147679324895, + "grad_norm": 0.5552919507026672, + "learning_rate": 0.0008350666825773697, + "loss": 1.5476, + "step": 6584 + }, + { + "epoch": 0.694620253164557, + "grad_norm": 0.6432161927223206, + "learning_rate": 0.0008339084677838532, + "loss": 1.5743, + "step": 6585 + }, + { + "epoch": 0.6947257383966244, + "grad_norm": 0.49259400367736816, + "learning_rate": 0.0008327518594027778, + "loss": 1.5471, + "step": 6586 + }, + { + "epoch": 0.694831223628692, + "grad_norm": 0.6837813258171082, + "learning_rate": 0.0008315968552060928, + "loss": 1.5693, + "step": 6587 + }, + { + "epoch": 0.6949367088607595, + "grad_norm": 0.4736441671848297, + "learning_rate": 0.0008304434529688382, + "loss": 1.5642, + "step": 6588 + }, + { + "epoch": 0.695042194092827, + "grad_norm": 0.5561773180961609, + "learning_rate": 0.0008292916504691397, + "loss": 1.5588, + "step": 6589 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.4884398579597473, + "learning_rate": 0.0008281414454882051, + "loss": 1.6241, + "step": 6590 + }, + { + "epoch": 0.6952531645569621, + "grad_norm": 0.5623233318328857, + "learning_rate": 0.000826992835810319, + "loss": 1.5559, + "step": 6591 + }, + { + "epoch": 0.6953586497890295, + "grad_norm": 0.4664805829524994, + "learning_rate": 0.0008258458192228395, + "loss": 1.5807, + "step": 6592 + }, + { + "epoch": 0.695464135021097, + "grad_norm": 0.5534698367118835, + "learning_rate": 0.0008247003935161936, + "loss": 1.5873, + "step": 6593 + }, + { + "epoch": 0.6955696202531646, + "grad_norm": 0.4882756173610687, + "learning_rate": 0.0008235565564838727, + "loss": 1.5443, + "step": 6594 + }, + { + "epoch": 0.695675105485232, + "grad_norm": 0.5256763696670532, + "learning_rate": 0.0008224143059224287, + "loss": 1.5387, + "step": 6595 + }, + { + "epoch": 0.6957805907172996, + "grad_norm": 0.45350080728530884, + "learning_rate": 0.0008212736396314697, + "loss": 1.5834, + "step": 6596 + }, + { + "epoch": 0.6958860759493671, + "grad_norm": 0.55854731798172, + "learning_rate": 0.0008201345554136556, + "loss": 1.5803, + "step": 6597 + }, + { + "epoch": 0.6959915611814346, + "grad_norm": 0.44322511553764343, + "learning_rate": 0.0008189970510746938, + "loss": 1.5674, + "step": 6598 + }, + { + "epoch": 0.6960970464135021, + "grad_norm": 0.47145506739616394, + "learning_rate": 0.0008178611244233354, + "loss": 1.5709, + "step": 6599 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.4970199763774872, + "learning_rate": 0.0008167267732713704, + "loss": 1.5835, + "step": 6600 + }, + { + "epoch": 0.6963080168776371, + "grad_norm": 0.5133510231971741, + "learning_rate": 0.0008155939954336242, + "loss": 1.5401, + "step": 6601 + }, + { + "epoch": 0.6964135021097047, + "grad_norm": 0.4554741680622101, + "learning_rate": 0.0008144627887279526, + "loss": 1.549, + "step": 6602 + }, + { + "epoch": 0.6965189873417722, + "grad_norm": 0.5472342371940613, + "learning_rate": 0.0008133331509752381, + "loss": 1.5807, + "step": 6603 + }, + { + "epoch": 0.6966244725738396, + "grad_norm": 0.5181341171264648, + "learning_rate": 0.0008122050799993857, + "loss": 1.5762, + "step": 6604 + }, + { + "epoch": 0.6967299578059072, + "grad_norm": 0.5634217858314514, + "learning_rate": 0.0008110785736273183, + "loss": 1.5564, + "step": 6605 + }, + { + "epoch": 0.6968354430379747, + "grad_norm": 0.5411979556083679, + "learning_rate": 0.0008099536296889731, + "loss": 1.5235, + "step": 6606 + }, + { + "epoch": 0.6969409282700422, + "grad_norm": 0.4497706890106201, + "learning_rate": 0.0008088302460172971, + "loss": 1.553, + "step": 6607 + }, + { + "epoch": 0.6970464135021097, + "grad_norm": 0.5087087154388428, + "learning_rate": 0.0008077084204482425, + "loss": 1.5056, + "step": 6608 + }, + { + "epoch": 0.6971518987341773, + "grad_norm": 0.5459806323051453, + "learning_rate": 0.0008065881508207637, + "loss": 1.5539, + "step": 6609 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.5198091864585876, + "learning_rate": 0.0008054694349768117, + "loss": 1.6082, + "step": 6610 + }, + { + "epoch": 0.6973628691983123, + "grad_norm": 0.5953304171562195, + "learning_rate": 0.000804352270761331, + "loss": 1.5139, + "step": 6611 + }, + { + "epoch": 0.6974683544303798, + "grad_norm": 0.577004611492157, + "learning_rate": 0.0008032366560222553, + "loss": 1.569, + "step": 6612 + }, + { + "epoch": 0.6975738396624472, + "grad_norm": 0.5869729518890381, + "learning_rate": 0.0008021225886105027, + "loss": 1.608, + "step": 6613 + }, + { + "epoch": 0.6976793248945148, + "grad_norm": 0.48819971084594727, + "learning_rate": 0.0008010100663799726, + "loss": 1.5605, + "step": 6614 + }, + { + "epoch": 0.6977848101265823, + "grad_norm": 0.5203465223312378, + "learning_rate": 0.0007998990871875402, + "loss": 1.5404, + "step": 6615 + }, + { + "epoch": 0.6978902953586498, + "grad_norm": 0.5282949209213257, + "learning_rate": 0.0007987896488930539, + "loss": 1.5809, + "step": 6616 + }, + { + "epoch": 0.6979957805907173, + "grad_norm": 0.48796385526657104, + "learning_rate": 0.0007976817493593302, + "loss": 1.5812, + "step": 6617 + }, + { + "epoch": 0.6981012658227848, + "grad_norm": 0.5409809350967407, + "learning_rate": 0.0007965753864521494, + "loss": 1.5467, + "step": 6618 + }, + { + "epoch": 0.6982067510548523, + "grad_norm": 0.4720846116542816, + "learning_rate": 0.0007954705580402523, + "loss": 1.5555, + "step": 6619 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.5541364550590515, + "learning_rate": 0.0007943672619953359, + "loss": 1.5407, + "step": 6620 + }, + { + "epoch": 0.6984177215189873, + "grad_norm": 0.4393485188484192, + "learning_rate": 0.0007932654961920488, + "loss": 1.5584, + "step": 6621 + }, + { + "epoch": 0.6985232067510548, + "grad_norm": 0.4939269721508026, + "learning_rate": 0.0007921652585079873, + "loss": 1.5851, + "step": 6622 + }, + { + "epoch": 0.6986286919831224, + "grad_norm": 0.5119544267654419, + "learning_rate": 0.0007910665468236916, + "loss": 1.5439, + "step": 6623 + }, + { + "epoch": 0.6987341772151898, + "grad_norm": 0.47787803411483765, + "learning_rate": 0.0007899693590226415, + "loss": 1.5496, + "step": 6624 + }, + { + "epoch": 0.6988396624472574, + "grad_norm": 0.48977434635162354, + "learning_rate": 0.0007888736929912525, + "loss": 1.5652, + "step": 6625 + }, + { + "epoch": 0.6989451476793249, + "grad_norm": 0.505007803440094, + "learning_rate": 0.0007877795466188712, + "loss": 1.5323, + "step": 6626 + }, + { + "epoch": 0.6990506329113924, + "grad_norm": 0.48375818133354187, + "learning_rate": 0.0007866869177977721, + "loss": 1.5483, + "step": 6627 + }, + { + "epoch": 0.6991561181434599, + "grad_norm": 0.5514267683029175, + "learning_rate": 0.0007855958044231527, + "loss": 1.5648, + "step": 6628 + }, + { + "epoch": 0.6992616033755275, + "grad_norm": 0.5183826088905334, + "learning_rate": 0.0007845062043931298, + "loss": 1.5337, + "step": 6629 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.4456199109554291, + "learning_rate": 0.0007834181156087356, + "loss": 1.5538, + "step": 6630 + }, + { + "epoch": 0.6994725738396624, + "grad_norm": 0.7880083918571472, + "learning_rate": 0.0007823315359739137, + "loss": 1.5667, + "step": 6631 + }, + { + "epoch": 0.69957805907173, + "grad_norm": 0.48204830288887024, + "learning_rate": 0.0007812464633955144, + "loss": 1.5504, + "step": 6632 + }, + { + "epoch": 0.6996835443037974, + "grad_norm": 0.6027332544326782, + "learning_rate": 0.0007801628957832918, + "loss": 1.5507, + "step": 6633 + }, + { + "epoch": 0.699789029535865, + "grad_norm": 0.4713844060897827, + "learning_rate": 0.0007790808310498984, + "loss": 1.5784, + "step": 6634 + }, + { + "epoch": 0.6998945147679325, + "grad_norm": 0.6253005862236023, + "learning_rate": 0.0007780002671108819, + "loss": 1.554, + "step": 6635 + }, + { + "epoch": 0.7, + "grad_norm": 0.46921494603157043, + "learning_rate": 0.0007769212018846818, + "loss": 1.533, + "step": 6636 + }, + { + "epoch": 0.7001054852320675, + "grad_norm": 0.4672645926475525, + "learning_rate": 0.0007758436332926237, + "loss": 1.5921, + "step": 6637 + }, + { + "epoch": 0.700210970464135, + "grad_norm": 0.492880642414093, + "learning_rate": 0.000774767559258917, + "loss": 1.5715, + "step": 6638 + }, + { + "epoch": 0.7003164556962025, + "grad_norm": 0.5257558226585388, + "learning_rate": 0.0007736929777106497, + "loss": 1.544, + "step": 6639 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.4666813611984253, + "learning_rate": 0.0007726198865777852, + "loss": 1.5434, + "step": 6640 + }, + { + "epoch": 0.7005274261603376, + "grad_norm": 0.4529757499694824, + "learning_rate": 0.000771548283793158, + "loss": 1.5697, + "step": 6641 + }, + { + "epoch": 0.700632911392405, + "grad_norm": 0.4499722123146057, + "learning_rate": 0.000770478167292469, + "loss": 1.5468, + "step": 6642 + }, + { + "epoch": 0.7007383966244726, + "grad_norm": 0.4351818859577179, + "learning_rate": 0.0007694095350142834, + "loss": 1.5656, + "step": 6643 + }, + { + "epoch": 0.7008438818565401, + "grad_norm": 0.4683161973953247, + "learning_rate": 0.0007683423849000246, + "loss": 1.5193, + "step": 6644 + }, + { + "epoch": 0.7009493670886076, + "grad_norm": 0.4370659589767456, + "learning_rate": 0.0007672767148939714, + "loss": 1.5203, + "step": 6645 + }, + { + "epoch": 0.7010548523206751, + "grad_norm": 0.4852113723754883, + "learning_rate": 0.0007662125229432543, + "loss": 1.5383, + "step": 6646 + }, + { + "epoch": 0.7011603375527427, + "grad_norm": 0.4382689297199249, + "learning_rate": 0.0007651498069978504, + "loss": 1.5637, + "step": 6647 + }, + { + "epoch": 0.7012658227848101, + "grad_norm": 0.47351962327957153, + "learning_rate": 0.0007640885650105806, + "loss": 1.5911, + "step": 6648 + }, + { + "epoch": 0.7013713080168776, + "grad_norm": 0.46821945905685425, + "learning_rate": 0.000763028794937105, + "loss": 1.5085, + "step": 6649 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.5545336604118347, + "learning_rate": 0.0007619704947359191, + "loss": 1.5797, + "step": 6650 + }, + { + "epoch": 0.7015822784810126, + "grad_norm": 0.4496048092842102, + "learning_rate": 0.0007609136623683499, + "loss": 1.5488, + "step": 6651 + }, + { + "epoch": 0.7016877637130802, + "grad_norm": 0.5856061577796936, + "learning_rate": 0.0007598582957985525, + "loss": 1.5727, + "step": 6652 + }, + { + "epoch": 0.7017932489451477, + "grad_norm": 0.5694226026535034, + "learning_rate": 0.000758804392993505, + "loss": 1.5304, + "step": 6653 + }, + { + "epoch": 0.7018987341772152, + "grad_norm": 0.4599759578704834, + "learning_rate": 0.0007577519519230052, + "loss": 1.5606, + "step": 6654 + }, + { + "epoch": 0.7020042194092827, + "grad_norm": 0.48302337527275085, + "learning_rate": 0.0007567009705596673, + "loss": 1.5549, + "step": 6655 + }, + { + "epoch": 0.7021097046413503, + "grad_norm": 0.621795654296875, + "learning_rate": 0.0007556514468789169, + "loss": 1.5369, + "step": 6656 + }, + { + "epoch": 0.7022151898734177, + "grad_norm": 0.48464787006378174, + "learning_rate": 0.0007546033788589883, + "loss": 1.5485, + "step": 6657 + }, + { + "epoch": 0.7023206751054852, + "grad_norm": 0.5004467368125916, + "learning_rate": 0.0007535567644809191, + "loss": 1.5845, + "step": 6658 + }, + { + "epoch": 0.7024261603375528, + "grad_norm": 0.4963955581188202, + "learning_rate": 0.0007525116017285476, + "loss": 1.5577, + "step": 6659 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.5065571665763855, + "learning_rate": 0.0007514678885885087, + "loss": 1.5333, + "step": 6660 + }, + { + "epoch": 0.7026371308016878, + "grad_norm": 0.5023692846298218, + "learning_rate": 0.000750425623050229, + "loss": 1.525, + "step": 6661 + }, + { + "epoch": 0.7027426160337553, + "grad_norm": 0.5315647721290588, + "learning_rate": 0.0007493848031059247, + "loss": 1.5581, + "step": 6662 + }, + { + "epoch": 0.7028481012658228, + "grad_norm": 0.4617088735103607, + "learning_rate": 0.0007483454267505959, + "loss": 1.5489, + "step": 6663 + }, + { + "epoch": 0.7029535864978903, + "grad_norm": 0.4618825614452362, + "learning_rate": 0.000747307491982024, + "loss": 1.5284, + "step": 6664 + }, + { + "epoch": 0.7030590717299579, + "grad_norm": 0.5144922137260437, + "learning_rate": 0.0007462709968007675, + "loss": 1.5541, + "step": 6665 + }, + { + "epoch": 0.7031645569620253, + "grad_norm": 0.5299652218818665, + "learning_rate": 0.0007452359392101578, + "loss": 1.557, + "step": 6666 + }, + { + "epoch": 0.7032700421940928, + "grad_norm": 0.5746594071388245, + "learning_rate": 0.0007442023172162958, + "loss": 1.5452, + "step": 6667 + }, + { + "epoch": 0.7033755274261604, + "grad_norm": 0.4928874969482422, + "learning_rate": 0.0007431701288280478, + "loss": 1.5495, + "step": 6668 + }, + { + "epoch": 0.7034810126582278, + "grad_norm": 0.6005679965019226, + "learning_rate": 0.0007421393720570417, + "loss": 1.5909, + "step": 6669 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.5231202244758606, + "learning_rate": 0.0007411100449176633, + "loss": 1.5639, + "step": 6670 + }, + { + "epoch": 0.7036919831223629, + "grad_norm": 0.5319112539291382, + "learning_rate": 0.0007400821454270525, + "loss": 1.5808, + "step": 6671 + }, + { + "epoch": 0.7037974683544304, + "grad_norm": 0.7475935220718384, + "learning_rate": 0.0007390556716050993, + "loss": 1.5525, + "step": 6672 + }, + { + "epoch": 0.7039029535864979, + "grad_norm": 0.5199599266052246, + "learning_rate": 0.0007380306214744398, + "loss": 1.5733, + "step": 6673 + }, + { + "epoch": 0.7040084388185655, + "grad_norm": 0.656639814376831, + "learning_rate": 0.000737006993060453, + "loss": 1.5876, + "step": 6674 + }, + { + "epoch": 0.7041139240506329, + "grad_norm": 0.48102647066116333, + "learning_rate": 0.0007359847843912564, + "loss": 1.5304, + "step": 6675 + }, + { + "epoch": 0.7042194092827004, + "grad_norm": 0.6165723204612732, + "learning_rate": 0.0007349639934977029, + "loss": 1.5622, + "step": 6676 + }, + { + "epoch": 0.704324894514768, + "grad_norm": 0.4821445047855377, + "learning_rate": 0.0007339446184133759, + "loss": 1.5614, + "step": 6677 + }, + { + "epoch": 0.7044303797468354, + "grad_norm": 0.5459588170051575, + "learning_rate": 0.0007329266571745864, + "loss": 1.569, + "step": 6678 + }, + { + "epoch": 0.704535864978903, + "grad_norm": 0.5332533717155457, + "learning_rate": 0.0007319101078203694, + "loss": 1.5115, + "step": 6679 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.4783789813518524, + "learning_rate": 0.0007308949683924791, + "loss": 1.533, + "step": 6680 + }, + { + "epoch": 0.704746835443038, + "grad_norm": 0.5693449378013611, + "learning_rate": 0.0007298812369353862, + "loss": 1.5556, + "step": 6681 + }, + { + "epoch": 0.7048523206751055, + "grad_norm": 0.4317052662372589, + "learning_rate": 0.0007288689114962731, + "loss": 1.5578, + "step": 6682 + }, + { + "epoch": 0.7049578059071729, + "grad_norm": 0.7175117135047913, + "learning_rate": 0.0007278579901250316, + "loss": 1.5323, + "step": 6683 + }, + { + "epoch": 0.7050632911392405, + "grad_norm": 0.43420615792274475, + "learning_rate": 0.0007268484708742574, + "loss": 1.5126, + "step": 6684 + }, + { + "epoch": 0.705168776371308, + "grad_norm": 0.5980640649795532, + "learning_rate": 0.0007258403517992476, + "loss": 1.5645, + "step": 6685 + }, + { + "epoch": 0.7052742616033755, + "grad_norm": 0.5155321955680847, + "learning_rate": 0.0007248336309579965, + "loss": 1.5639, + "step": 6686 + }, + { + "epoch": 0.705379746835443, + "grad_norm": 0.46816548705101013, + "learning_rate": 0.0007238283064111917, + "loss": 1.5626, + "step": 6687 + }, + { + "epoch": 0.7054852320675106, + "grad_norm": 0.5139795541763306, + "learning_rate": 0.0007228243762222109, + "loss": 1.521, + "step": 6688 + }, + { + "epoch": 0.705590717299578, + "grad_norm": 0.4960365891456604, + "learning_rate": 0.0007218218384571178, + "loss": 1.5857, + "step": 6689 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.540518581867218, + "learning_rate": 0.000720820691184658, + "loss": 1.5441, + "step": 6690 + }, + { + "epoch": 0.7058016877637131, + "grad_norm": 0.6019273996353149, + "learning_rate": 0.0007198209324762563, + "loss": 1.5547, + "step": 6691 + }, + { + "epoch": 0.7059071729957805, + "grad_norm": 0.42266330122947693, + "learning_rate": 0.0007188225604060119, + "loss": 1.5589, + "step": 6692 + }, + { + "epoch": 0.7060126582278481, + "grad_norm": 0.4965789020061493, + "learning_rate": 0.0007178255730506955, + "loss": 1.5742, + "step": 6693 + }, + { + "epoch": 0.7061181434599156, + "grad_norm": 0.4750126004219055, + "learning_rate": 0.0007168299684897451, + "loss": 1.5545, + "step": 6694 + }, + { + "epoch": 0.7062236286919831, + "grad_norm": 0.46209055185317993, + "learning_rate": 0.0007158357448052624, + "loss": 1.5373, + "step": 6695 + }, + { + "epoch": 0.7063291139240506, + "grad_norm": 0.44234421849250793, + "learning_rate": 0.0007148429000820094, + "loss": 1.5492, + "step": 6696 + }, + { + "epoch": 0.7064345991561182, + "grad_norm": 0.45361265540122986, + "learning_rate": 0.0007138514324074042, + "loss": 1.5253, + "step": 6697 + }, + { + "epoch": 0.7065400843881856, + "grad_norm": 0.5187786221504211, + "learning_rate": 0.0007128613398715179, + "loss": 1.5796, + "step": 6698 + }, + { + "epoch": 0.7066455696202532, + "grad_norm": 0.4797686040401459, + "learning_rate": 0.0007118726205670703, + "loss": 1.501, + "step": 6699 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.4801947772502899, + "learning_rate": 0.0007108852725894269, + "loss": 1.544, + "step": 6700 + }, + { + "epoch": 0.7068565400843881, + "grad_norm": 0.5309736728668213, + "learning_rate": 0.0007098992940365947, + "loss": 1.5786, + "step": 6701 + }, + { + "epoch": 0.7069620253164557, + "grad_norm": 0.4449637532234192, + "learning_rate": 0.0007089146830092185, + "loss": 1.5686, + "step": 6702 + }, + { + "epoch": 0.7070675105485232, + "grad_norm": 0.5048536658287048, + "learning_rate": 0.0007079314376105778, + "loss": 1.5446, + "step": 6703 + }, + { + "epoch": 0.7071729957805907, + "grad_norm": 0.5020825266838074, + "learning_rate": 0.0007069495559465826, + "loss": 1.5345, + "step": 6704 + }, + { + "epoch": 0.7072784810126582, + "grad_norm": 0.45239654183387756, + "learning_rate": 0.0007059690361257701, + "loss": 1.564, + "step": 6705 + }, + { + "epoch": 0.7073839662447258, + "grad_norm": 0.4716244339942932, + "learning_rate": 0.0007049898762593007, + "loss": 1.5339, + "step": 6706 + }, + { + "epoch": 0.7074894514767932, + "grad_norm": 0.4415547251701355, + "learning_rate": 0.0007040120744609548, + "loss": 1.5754, + "step": 6707 + }, + { + "epoch": 0.7075949367088608, + "grad_norm": 0.5089999437332153, + "learning_rate": 0.0007030356288471288, + "loss": 1.533, + "step": 6708 + }, + { + "epoch": 0.7077004219409283, + "grad_norm": 0.5045871138572693, + "learning_rate": 0.0007020605375368316, + "loss": 1.5544, + "step": 6709 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.468222975730896, + "learning_rate": 0.000701086798651681, + "loss": 1.5268, + "step": 6710 + }, + { + "epoch": 0.7079113924050633, + "grad_norm": 0.43993693590164185, + "learning_rate": 0.0007001144103159, + "loss": 1.5239, + "step": 6711 + }, + { + "epoch": 0.7080168776371308, + "grad_norm": 0.4992451071739197, + "learning_rate": 0.0006991433706563135, + "loss": 1.5415, + "step": 6712 + }, + { + "epoch": 0.7081223628691983, + "grad_norm": 0.4695892035961151, + "learning_rate": 0.0006981736778023443, + "loss": 1.5456, + "step": 6713 + }, + { + "epoch": 0.7082278481012658, + "grad_norm": 0.46786677837371826, + "learning_rate": 0.0006972053298860092, + "loss": 1.5512, + "step": 6714 + }, + { + "epoch": 0.7083333333333334, + "grad_norm": 0.5553925037384033, + "learning_rate": 0.0006962383250419168, + "loss": 1.5008, + "step": 6715 + }, + { + "epoch": 0.7084388185654008, + "grad_norm": 0.4732776880264282, + "learning_rate": 0.0006952726614072621, + "loss": 1.5253, + "step": 6716 + }, + { + "epoch": 0.7085443037974684, + "grad_norm": 0.7153058052062988, + "learning_rate": 0.0006943083371218242, + "loss": 1.575, + "step": 6717 + }, + { + "epoch": 0.7086497890295359, + "grad_norm": 0.4462694227695465, + "learning_rate": 0.0006933453503279619, + "loss": 1.5211, + "step": 6718 + }, + { + "epoch": 0.7087552742616033, + "grad_norm": 0.8087603449821472, + "learning_rate": 0.000692383699170611, + "loss": 1.5559, + "step": 6719 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.4940249025821686, + "learning_rate": 0.0006914233817972798, + "loss": 1.5628, + "step": 6720 + }, + { + "epoch": 0.7089662447257384, + "grad_norm": 0.6718744039535522, + "learning_rate": 0.0006904643963580462, + "loss": 1.5474, + "step": 6721 + }, + { + "epoch": 0.7090717299578059, + "grad_norm": 0.4637923240661621, + "learning_rate": 0.0006895067410055538, + "loss": 1.5666, + "step": 6722 + }, + { + "epoch": 0.7091772151898734, + "grad_norm": 0.6198895573616028, + "learning_rate": 0.0006885504138950082, + "loss": 1.5473, + "step": 6723 + }, + { + "epoch": 0.709282700421941, + "grad_norm": 0.49211519956588745, + "learning_rate": 0.0006875954131841743, + "loss": 1.5466, + "step": 6724 + }, + { + "epoch": 0.7093881856540084, + "grad_norm": 0.5213711857795715, + "learning_rate": 0.0006866417370333716, + "loss": 1.5409, + "step": 6725 + }, + { + "epoch": 0.709493670886076, + "grad_norm": 0.4944036602973938, + "learning_rate": 0.0006856893836054712, + "loss": 1.5572, + "step": 6726 + }, + { + "epoch": 0.7095991561181435, + "grad_norm": 0.4943283200263977, + "learning_rate": 0.0006847383510658927, + "loss": 1.5455, + "step": 6727 + }, + { + "epoch": 0.7097046413502109, + "grad_norm": 0.4628939628601074, + "learning_rate": 0.0006837886375825994, + "loss": 1.572, + "step": 6728 + }, + { + "epoch": 0.7098101265822785, + "grad_norm": 0.5107986927032471, + "learning_rate": 0.0006828402413260966, + "loss": 1.5497, + "step": 6729 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.5592716336250305, + "learning_rate": 0.0006818931604694261, + "loss": 1.538, + "step": 6730 + }, + { + "epoch": 0.7100210970464135, + "grad_norm": 0.43582016229629517, + "learning_rate": 0.0006809473931881645, + "loss": 1.5641, + "step": 6731 + }, + { + "epoch": 0.710126582278481, + "grad_norm": 0.5421050190925598, + "learning_rate": 0.0006800029376604181, + "loss": 1.5455, + "step": 6732 + }, + { + "epoch": 0.7102320675105486, + "grad_norm": 0.49958208203315735, + "learning_rate": 0.0006790597920668204, + "loss": 1.5634, + "step": 6733 + }, + { + "epoch": 0.710337552742616, + "grad_norm": 0.5179060101509094, + "learning_rate": 0.0006781179545905287, + "loss": 1.5224, + "step": 6734 + }, + { + "epoch": 0.7104430379746836, + "grad_norm": 0.461525559425354, + "learning_rate": 0.0006771774234172195, + "loss": 1.5529, + "step": 6735 + }, + { + "epoch": 0.7105485232067511, + "grad_norm": 0.48807981610298157, + "learning_rate": 0.0006762381967350861, + "loss": 1.5602, + "step": 6736 + }, + { + "epoch": 0.7106540084388185, + "grad_norm": 0.433828741312027, + "learning_rate": 0.0006753002727348349, + "loss": 1.5287, + "step": 6737 + }, + { + "epoch": 0.7107594936708861, + "grad_norm": 0.5105122923851013, + "learning_rate": 0.0006743636496096813, + "loss": 1.5554, + "step": 6738 + }, + { + "epoch": 0.7108649789029536, + "grad_norm": 0.5092545747756958, + "learning_rate": 0.0006734283255553471, + "loss": 1.5854, + "step": 6739 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.438706636428833, + "learning_rate": 0.0006724942987700563, + "loss": 1.539, + "step": 6740 + }, + { + "epoch": 0.7110759493670886, + "grad_norm": 0.5282023549079895, + "learning_rate": 0.0006715615674545319, + "loss": 1.5423, + "step": 6741 + }, + { + "epoch": 0.7111814345991562, + "grad_norm": 0.47605130076408386, + "learning_rate": 0.0006706301298119925, + "loss": 1.5498, + "step": 6742 + }, + { + "epoch": 0.7112869198312236, + "grad_norm": 0.4943104088306427, + "learning_rate": 0.0006696999840481491, + "loss": 1.5332, + "step": 6743 + }, + { + "epoch": 0.7113924050632912, + "grad_norm": 0.4608191251754761, + "learning_rate": 0.0006687711283712009, + "loss": 1.5101, + "step": 6744 + }, + { + "epoch": 0.7114978902953587, + "grad_norm": 0.49573028087615967, + "learning_rate": 0.0006678435609918323, + "loss": 1.5847, + "step": 6745 + }, + { + "epoch": 0.7116033755274261, + "grad_norm": 0.5111983418464661, + "learning_rate": 0.0006669172801232098, + "loss": 1.5657, + "step": 6746 + }, + { + "epoch": 0.7117088607594937, + "grad_norm": 0.4408809244632721, + "learning_rate": 0.0006659922839809779, + "loss": 1.5509, + "step": 6747 + }, + { + "epoch": 0.7118143459915611, + "grad_norm": 0.48499247431755066, + "learning_rate": 0.0006650685707832559, + "loss": 1.5468, + "step": 6748 + }, + { + "epoch": 0.7119198312236287, + "grad_norm": 0.4597436487674713, + "learning_rate": 0.0006641461387506347, + "loss": 1.5816, + "step": 6749 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.5312247276306152, + "learning_rate": 0.0006632249861061732, + "loss": 1.542, + "step": 6750 + }, + { + "epoch": 0.7121308016877637, + "grad_norm": 0.41715967655181885, + "learning_rate": 0.0006623051110753947, + "loss": 1.5182, + "step": 6751 + }, + { + "epoch": 0.7122362869198312, + "grad_norm": 0.47443118691444397, + "learning_rate": 0.0006613865118862837, + "loss": 1.5406, + "step": 6752 + }, + { + "epoch": 0.7123417721518988, + "grad_norm": 0.5484854578971863, + "learning_rate": 0.0006604691867692828, + "loss": 1.5397, + "step": 6753 + }, + { + "epoch": 0.7124472573839662, + "grad_norm": 0.43628114461898804, + "learning_rate": 0.0006595531339572881, + "loss": 1.5931, + "step": 6754 + }, + { + "epoch": 0.7125527426160337, + "grad_norm": 0.5638396739959717, + "learning_rate": 0.0006586383516856475, + "loss": 1.5484, + "step": 6755 + }, + { + "epoch": 0.7126582278481013, + "grad_norm": 0.4444471597671509, + "learning_rate": 0.000657724838192156, + "loss": 1.5138, + "step": 6756 + }, + { + "epoch": 0.7127637130801687, + "grad_norm": 0.4277331531047821, + "learning_rate": 0.0006568125917170526, + "loss": 1.5556, + "step": 6757 + }, + { + "epoch": 0.7128691983122363, + "grad_norm": 0.5183447003364563, + "learning_rate": 0.0006559016105030176, + "loss": 1.56, + "step": 6758 + }, + { + "epoch": 0.7129746835443038, + "grad_norm": 0.5353155732154846, + "learning_rate": 0.0006549918927951678, + "loss": 1.5639, + "step": 6759 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.4568440914154053, + "learning_rate": 0.0006540834368410549, + "loss": 1.5383, + "step": 6760 + }, + { + "epoch": 0.7131856540084388, + "grad_norm": 0.5182054042816162, + "learning_rate": 0.0006531762408906606, + "loss": 1.5635, + "step": 6761 + }, + { + "epoch": 0.7132911392405064, + "grad_norm": 0.5307048559188843, + "learning_rate": 0.0006522703031963939, + "loss": 1.5344, + "step": 6762 + }, + { + "epoch": 0.7133966244725738, + "grad_norm": 0.4830094873905182, + "learning_rate": 0.0006513656220130878, + "loss": 1.5648, + "step": 6763 + }, + { + "epoch": 0.7135021097046413, + "grad_norm": 0.5077260732650757, + "learning_rate": 0.0006504621955979959, + "loss": 1.5575, + "step": 6764 + }, + { + "epoch": 0.7136075949367089, + "grad_norm": 0.5386867523193359, + "learning_rate": 0.0006495600222107885, + "loss": 1.5691, + "step": 6765 + }, + { + "epoch": 0.7137130801687763, + "grad_norm": 0.4486021399497986, + "learning_rate": 0.0006486591001135502, + "loss": 1.5083, + "step": 6766 + }, + { + "epoch": 0.7138185654008439, + "grad_norm": 0.5497661828994751, + "learning_rate": 0.0006477594275707758, + "loss": 1.5345, + "step": 6767 + }, + { + "epoch": 0.7139240506329114, + "grad_norm": 0.45292559266090393, + "learning_rate": 0.000646861002849367, + "loss": 1.5525, + "step": 6768 + }, + { + "epoch": 0.7140295358649789, + "grad_norm": 0.5148671269416809, + "learning_rate": 0.0006459638242186297, + "loss": 1.5703, + "step": 6769 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.5652586817741394, + "learning_rate": 0.0006450678899502701, + "loss": 1.5541, + "step": 6770 + }, + { + "epoch": 0.714240506329114, + "grad_norm": 0.47230979800224304, + "learning_rate": 0.0006441731983183911, + "loss": 1.5991, + "step": 6771 + }, + { + "epoch": 0.7143459915611814, + "grad_norm": 0.4903254508972168, + "learning_rate": 0.0006432797475994899, + "loss": 1.5503, + "step": 6772 + }, + { + "epoch": 0.7144514767932489, + "grad_norm": 0.508822500705719, + "learning_rate": 0.0006423875360724538, + "loss": 1.57, + "step": 6773 + }, + { + "epoch": 0.7145569620253165, + "grad_norm": 0.48720335960388184, + "learning_rate": 0.0006414965620185574, + "loss": 1.51, + "step": 6774 + }, + { + "epoch": 0.7146624472573839, + "grad_norm": 0.5062872767448425, + "learning_rate": 0.0006406068237214593, + "loss": 1.534, + "step": 6775 + }, + { + "epoch": 0.7147679324894515, + "grad_norm": 0.5193167328834534, + "learning_rate": 0.000639718319467198, + "loss": 1.5091, + "step": 6776 + }, + { + "epoch": 0.714873417721519, + "grad_norm": 0.5054948925971985, + "learning_rate": 0.0006388310475441899, + "loss": 1.5396, + "step": 6777 + }, + { + "epoch": 0.7149789029535865, + "grad_norm": 0.538214921951294, + "learning_rate": 0.0006379450062432248, + "loss": 1.5514, + "step": 6778 + }, + { + "epoch": 0.715084388185654, + "grad_norm": 0.5200361013412476, + "learning_rate": 0.0006370601938574639, + "loss": 1.5492, + "step": 6779 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5863214135169983, + "learning_rate": 0.0006361766086824345, + "loss": 1.5541, + "step": 6780 + }, + { + "epoch": 0.715295358649789, + "grad_norm": 0.5286057591438293, + "learning_rate": 0.0006352942490160293, + "loss": 1.5844, + "step": 6781 + }, + { + "epoch": 0.7154008438818565, + "grad_norm": 0.5346243977546692, + "learning_rate": 0.0006344131131585007, + "loss": 1.5468, + "step": 6782 + }, + { + "epoch": 0.7155063291139241, + "grad_norm": 0.4713151752948761, + "learning_rate": 0.0006335331994124592, + "loss": 1.5269, + "step": 6783 + }, + { + "epoch": 0.7156118143459915, + "grad_norm": 0.6031091809272766, + "learning_rate": 0.0006326545060828696, + "loss": 1.5856, + "step": 6784 + }, + { + "epoch": 0.7157172995780591, + "grad_norm": 0.504538357257843, + "learning_rate": 0.000631777031477047, + "loss": 1.5647, + "step": 6785 + }, + { + "epoch": 0.7158227848101266, + "grad_norm": 0.5394483804702759, + "learning_rate": 0.0006309007739046551, + "loss": 1.5109, + "step": 6786 + }, + { + "epoch": 0.7159282700421941, + "grad_norm": 0.4800265431404114, + "learning_rate": 0.0006300257316777014, + "loss": 1.5292, + "step": 6787 + }, + { + "epoch": 0.7160337552742616, + "grad_norm": 0.596564769744873, + "learning_rate": 0.0006291519031105347, + "loss": 1.5496, + "step": 6788 + }, + { + "epoch": 0.7161392405063292, + "grad_norm": 0.5047953724861145, + "learning_rate": 0.0006282792865198421, + "loss": 1.5348, + "step": 6789 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.5160011649131775, + "learning_rate": 0.000627407880224645, + "loss": 1.5847, + "step": 6790 + }, + { + "epoch": 0.7163502109704641, + "grad_norm": 0.43249937891960144, + "learning_rate": 0.0006265376825462964, + "loss": 1.541, + "step": 6791 + }, + { + "epoch": 0.7164556962025317, + "grad_norm": 0.4553639590740204, + "learning_rate": 0.0006256686918084777, + "loss": 1.5413, + "step": 6792 + }, + { + "epoch": 0.7165611814345991, + "grad_norm": 0.48277103900909424, + "learning_rate": 0.0006248009063371953, + "loss": 1.5554, + "step": 6793 + }, + { + "epoch": 0.7166666666666667, + "grad_norm": 0.4682178199291229, + "learning_rate": 0.0006239343244607771, + "loss": 1.5349, + "step": 6794 + }, + { + "epoch": 0.7167721518987342, + "grad_norm": 0.458538293838501, + "learning_rate": 0.0006230689445098697, + "loss": 1.5283, + "step": 6795 + }, + { + "epoch": 0.7168776371308017, + "grad_norm": 0.5126941204071045, + "learning_rate": 0.0006222047648174351, + "loss": 1.5005, + "step": 6796 + }, + { + "epoch": 0.7169831223628692, + "grad_norm": 0.5203979015350342, + "learning_rate": 0.0006213417837187475, + "loss": 1.5298, + "step": 6797 + }, + { + "epoch": 0.7170886075949368, + "grad_norm": 0.47017431259155273, + "learning_rate": 0.00062047999955139, + "loss": 1.5489, + "step": 6798 + }, + { + "epoch": 0.7171940928270042, + "grad_norm": 0.5460437536239624, + "learning_rate": 0.0006196194106552512, + "loss": 1.5707, + "step": 6799 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.4368559718132019, + "learning_rate": 0.0006187600153725225, + "loss": 1.5052, + "step": 6800 + }, + { + "epoch": 0.7174050632911393, + "grad_norm": 0.44950324296951294, + "learning_rate": 0.0006179018120476945, + "loss": 1.5724, + "step": 6801 + }, + { + "epoch": 0.7175105485232067, + "grad_norm": 0.48503077030181885, + "learning_rate": 0.000617044799027554, + "loss": 1.5275, + "step": 6802 + }, + { + "epoch": 0.7176160337552743, + "grad_norm": 0.4647253751754761, + "learning_rate": 0.0006161889746611808, + "loss": 1.5534, + "step": 6803 + }, + { + "epoch": 0.7177215189873418, + "grad_norm": 0.43151435256004333, + "learning_rate": 0.0006153343372999444, + "loss": 1.5005, + "step": 6804 + }, + { + "epoch": 0.7178270042194093, + "grad_norm": 0.4642019271850586, + "learning_rate": 0.0006144808852975009, + "loss": 1.5747, + "step": 6805 + }, + { + "epoch": 0.7179324894514768, + "grad_norm": 0.48096582293510437, + "learning_rate": 0.00061362861700979, + "loss": 1.5541, + "step": 6806 + }, + { + "epoch": 0.7180379746835444, + "grad_norm": 0.4491911828517914, + "learning_rate": 0.0006127775307950314, + "loss": 1.5223, + "step": 6807 + }, + { + "epoch": 0.7181434599156118, + "grad_norm": 0.4846862554550171, + "learning_rate": 0.000611927625013722, + "loss": 1.5723, + "step": 6808 + }, + { + "epoch": 0.7182489451476793, + "grad_norm": 0.467145711183548, + "learning_rate": 0.0006110788980286328, + "loss": 1.5508, + "step": 6809 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.5330292582511902, + "learning_rate": 0.0006102313482048055, + "loss": 1.4719, + "step": 6810 + }, + { + "epoch": 0.7184599156118143, + "grad_norm": 0.4568015933036804, + "learning_rate": 0.0006093849739095494, + "loss": 1.5626, + "step": 6811 + }, + { + "epoch": 0.7185654008438819, + "grad_norm": 0.4917406439781189, + "learning_rate": 0.0006085397735124382, + "loss": 1.5604, + "step": 6812 + }, + { + "epoch": 0.7186708860759494, + "grad_norm": 0.47678980231285095, + "learning_rate": 0.0006076957453853072, + "loss": 1.5474, + "step": 6813 + }, + { + "epoch": 0.7187763713080169, + "grad_norm": 0.5251429677009583, + "learning_rate": 0.0006068528879022496, + "loss": 1.5151, + "step": 6814 + }, + { + "epoch": 0.7188818565400844, + "grad_norm": 0.4720397889614105, + "learning_rate": 0.0006060111994396143, + "loss": 1.5487, + "step": 6815 + }, + { + "epoch": 0.7189873417721518, + "grad_norm": 0.49456480145454407, + "learning_rate": 0.0006051706783760013, + "loss": 1.5377, + "step": 6816 + }, + { + "epoch": 0.7190928270042194, + "grad_norm": 0.4655502736568451, + "learning_rate": 0.0006043313230922601, + "loss": 1.5338, + "step": 6817 + }, + { + "epoch": 0.7191983122362869, + "grad_norm": 0.44060802459716797, + "learning_rate": 0.0006034931319714858, + "loss": 1.5246, + "step": 6818 + }, + { + "epoch": 0.7193037974683544, + "grad_norm": 0.4712231755256653, + "learning_rate": 0.0006026561033990158, + "loss": 1.5401, + "step": 6819 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.48318424820899963, + "learning_rate": 0.0006018202357624274, + "loss": 1.5455, + "step": 6820 + }, + { + "epoch": 0.7195147679324895, + "grad_norm": 0.5258792042732239, + "learning_rate": 0.0006009855274515337, + "loss": 1.5436, + "step": 6821 + }, + { + "epoch": 0.7196202531645569, + "grad_norm": 0.4671013057231903, + "learning_rate": 0.0006001519768583819, + "loss": 1.5901, + "step": 6822 + }, + { + "epoch": 0.7197257383966245, + "grad_norm": 0.4964064061641693, + "learning_rate": 0.0005993195823772488, + "loss": 1.4981, + "step": 6823 + }, + { + "epoch": 0.719831223628692, + "grad_norm": 0.4290485382080078, + "learning_rate": 0.0005984883424046384, + "loss": 1.5417, + "step": 6824 + }, + { + "epoch": 0.7199367088607594, + "grad_norm": 0.48920175433158875, + "learning_rate": 0.0005976582553392788, + "loss": 1.5413, + "step": 6825 + }, + { + "epoch": 0.720042194092827, + "grad_norm": 0.4913826286792755, + "learning_rate": 0.000596829319582119, + "loss": 1.5136, + "step": 6826 + }, + { + "epoch": 0.7201476793248945, + "grad_norm": 0.4746927320957184, + "learning_rate": 0.0005960015335363258, + "loss": 1.5734, + "step": 6827 + }, + { + "epoch": 0.720253164556962, + "grad_norm": 0.4370986521244049, + "learning_rate": 0.0005951748956072806, + "loss": 1.5327, + "step": 6828 + }, + { + "epoch": 0.7203586497890295, + "grad_norm": 0.4876527488231659, + "learning_rate": 0.000594349404202577, + "loss": 1.5264, + "step": 6829 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.4784664809703827, + "learning_rate": 0.0005935250577320168, + "loss": 1.53, + "step": 6830 + }, + { + "epoch": 0.7205696202531645, + "grad_norm": 0.51405268907547, + "learning_rate": 0.0005927018546076072, + "loss": 1.5459, + "step": 6831 + }, + { + "epoch": 0.7206751054852321, + "grad_norm": 0.47052356600761414, + "learning_rate": 0.0005918797932435585, + "loss": 1.5165, + "step": 6832 + }, + { + "epoch": 0.7207805907172996, + "grad_norm": 0.5199061036109924, + "learning_rate": 0.0005910588720562799, + "loss": 1.5278, + "step": 6833 + }, + { + "epoch": 0.720886075949367, + "grad_norm": 0.46353626251220703, + "learning_rate": 0.0005902390894643773, + "loss": 1.5231, + "step": 6834 + }, + { + "epoch": 0.7209915611814346, + "grad_norm": 0.49449896812438965, + "learning_rate": 0.0005894204438886498, + "loss": 1.5408, + "step": 6835 + }, + { + "epoch": 0.7210970464135021, + "grad_norm": 0.48055365681648254, + "learning_rate": 0.0005886029337520871, + "loss": 1.5254, + "step": 6836 + }, + { + "epoch": 0.7212025316455696, + "grad_norm": 0.465086430311203, + "learning_rate": 0.0005877865574798655, + "loss": 1.5349, + "step": 6837 + }, + { + "epoch": 0.7213080168776371, + "grad_norm": 0.47648733854293823, + "learning_rate": 0.0005869713134993463, + "loss": 1.5483, + "step": 6838 + }, + { + "epoch": 0.7214135021097047, + "grad_norm": 0.4783395528793335, + "learning_rate": 0.0005861572002400716, + "loss": 1.5161, + "step": 6839 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.46843791007995605, + "learning_rate": 0.0005853442161337618, + "loss": 1.5241, + "step": 6840 + }, + { + "epoch": 0.7216244725738397, + "grad_norm": 0.44689735770225525, + "learning_rate": 0.0005845323596143124, + "loss": 1.5196, + "step": 6841 + }, + { + "epoch": 0.7217299578059072, + "grad_norm": 0.4600822627544403, + "learning_rate": 0.0005837216291177911, + "loss": 1.5461, + "step": 6842 + }, + { + "epoch": 0.7218354430379746, + "grad_norm": 0.4526119530200958, + "learning_rate": 0.0005829120230824345, + "loss": 1.5256, + "step": 6843 + }, + { + "epoch": 0.7219409282700422, + "grad_norm": 0.49139970541000366, + "learning_rate": 0.0005821035399486458, + "loss": 1.563, + "step": 6844 + }, + { + "epoch": 0.7220464135021097, + "grad_norm": 0.4772544801235199, + "learning_rate": 0.0005812961781589908, + "loss": 1.5491, + "step": 6845 + }, + { + "epoch": 0.7221518987341772, + "grad_norm": 0.4974606931209564, + "learning_rate": 0.000580489936158196, + "loss": 1.5521, + "step": 6846 + }, + { + "epoch": 0.7222573839662447, + "grad_norm": 0.4158450663089752, + "learning_rate": 0.0005796848123931443, + "loss": 1.5462, + "step": 6847 + }, + { + "epoch": 0.7223628691983123, + "grad_norm": 0.5039369463920593, + "learning_rate": 0.0005788808053128734, + "loss": 1.5191, + "step": 6848 + }, + { + "epoch": 0.7224683544303797, + "grad_norm": 0.48272404074668884, + "learning_rate": 0.0005780779133685717, + "loss": 1.553, + "step": 6849 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.47118261456489563, + "learning_rate": 0.0005772761350135759, + "loss": 1.5135, + "step": 6850 + }, + { + "epoch": 0.7226793248945148, + "grad_norm": 0.47816649079322815, + "learning_rate": 0.000576475468703368, + "loss": 1.5575, + "step": 6851 + }, + { + "epoch": 0.7227848101265822, + "grad_norm": 0.48343923687934875, + "learning_rate": 0.0005756759128955721, + "loss": 1.5462, + "step": 6852 + }, + { + "epoch": 0.7228902953586498, + "grad_norm": 0.5833190679550171, + "learning_rate": 0.0005748774660499515, + "loss": 1.5293, + "step": 6853 + }, + { + "epoch": 0.7229957805907173, + "grad_norm": 0.4382333755493164, + "learning_rate": 0.0005740801266284058, + "loss": 1.533, + "step": 6854 + }, + { + "epoch": 0.7231012658227848, + "grad_norm": 0.5338608622550964, + "learning_rate": 0.0005732838930949678, + "loss": 1.4874, + "step": 6855 + }, + { + "epoch": 0.7232067510548523, + "grad_norm": 0.5252207517623901, + "learning_rate": 0.000572488763915801, + "loss": 1.5237, + "step": 6856 + }, + { + "epoch": 0.7233122362869199, + "grad_norm": 0.532107412815094, + "learning_rate": 0.0005716947375591958, + "loss": 1.5034, + "step": 6857 + }, + { + "epoch": 0.7234177215189873, + "grad_norm": 0.5585890412330627, + "learning_rate": 0.0005709018124955674, + "loss": 1.5689, + "step": 6858 + }, + { + "epoch": 0.7235232067510549, + "grad_norm": 0.4830462336540222, + "learning_rate": 0.0005701099871974524, + "loss": 1.5637, + "step": 6859 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.5976365208625793, + "learning_rate": 0.0005693192601395058, + "loss": 1.5257, + "step": 6860 + }, + { + "epoch": 0.7237341772151898, + "grad_norm": 0.5155830383300781, + "learning_rate": 0.0005685296297984985, + "loss": 1.5014, + "step": 6861 + }, + { + "epoch": 0.7238396624472574, + "grad_norm": 0.4879033863544464, + "learning_rate": 0.0005677410946533138, + "loss": 1.5686, + "step": 6862 + }, + { + "epoch": 0.7239451476793249, + "grad_norm": 0.611248791217804, + "learning_rate": 0.0005669536531849449, + "loss": 1.56, + "step": 6863 + }, + { + "epoch": 0.7240506329113924, + "grad_norm": 0.4879395663738251, + "learning_rate": 0.0005661673038764916, + "loss": 1.5521, + "step": 6864 + }, + { + "epoch": 0.7241561181434599, + "grad_norm": 0.5838509202003479, + "learning_rate": 0.000565382045213158, + "loss": 1.54, + "step": 6865 + }, + { + "epoch": 0.7242616033755275, + "grad_norm": 0.4879651963710785, + "learning_rate": 0.000564597875682249, + "loss": 1.5229, + "step": 6866 + }, + { + "epoch": 0.7243670886075949, + "grad_norm": 0.5476601719856262, + "learning_rate": 0.0005638147937731673, + "loss": 1.5195, + "step": 6867 + }, + { + "epoch": 0.7244725738396625, + "grad_norm": 0.5659372806549072, + "learning_rate": 0.0005630327979774111, + "loss": 1.5301, + "step": 6868 + }, + { + "epoch": 0.72457805907173, + "grad_norm": 0.5292827486991882, + "learning_rate": 0.0005622518867885708, + "loss": 1.5105, + "step": 6869 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.5626305341720581, + "learning_rate": 0.000561472058702326, + "loss": 1.5146, + "step": 6870 + }, + { + "epoch": 0.724789029535865, + "grad_norm": 0.5764346122741699, + "learning_rate": 0.0005606933122164428, + "loss": 1.5445, + "step": 6871 + }, + { + "epoch": 0.7248945147679325, + "grad_norm": 0.598465085029602, + "learning_rate": 0.000559915645830771, + "loss": 1.5468, + "step": 6872 + }, + { + "epoch": 0.725, + "grad_norm": 0.507113516330719, + "learning_rate": 0.0005591390580472411, + "loss": 1.5384, + "step": 6873 + }, + { + "epoch": 0.7251054852320675, + "grad_norm": 0.5054839849472046, + "learning_rate": 0.0005583635473698608, + "loss": 1.539, + "step": 6874 + }, + { + "epoch": 0.7252109704641351, + "grad_norm": 0.5646956562995911, + "learning_rate": 0.0005575891123047136, + "loss": 1.5366, + "step": 6875 + }, + { + "epoch": 0.7253164556962025, + "grad_norm": 0.5350216031074524, + "learning_rate": 0.0005568157513599543, + "loss": 1.5278, + "step": 6876 + }, + { + "epoch": 0.7254219409282701, + "grad_norm": 0.5685900449752808, + "learning_rate": 0.0005560434630458071, + "loss": 1.528, + "step": 6877 + }, + { + "epoch": 0.7255274261603376, + "grad_norm": 0.5420767068862915, + "learning_rate": 0.0005552722458745627, + "loss": 1.5287, + "step": 6878 + }, + { + "epoch": 0.725632911392405, + "grad_norm": 0.5841749906539917, + "learning_rate": 0.0005545020983605748, + "loss": 1.5748, + "step": 6879 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5920206308364868, + "learning_rate": 0.000553733019020258, + "loss": 1.5368, + "step": 6880 + }, + { + "epoch": 0.72584388185654, + "grad_norm": 0.44279634952545166, + "learning_rate": 0.0005529650063720844, + "loss": 1.5332, + "step": 6881 + }, + { + "epoch": 0.7259493670886076, + "grad_norm": 0.660584032535553, + "learning_rate": 0.0005521980589365809, + "loss": 1.5267, + "step": 6882 + }, + { + "epoch": 0.7260548523206751, + "grad_norm": 0.572847843170166, + "learning_rate": 0.0005514321752363265, + "loss": 1.5362, + "step": 6883 + }, + { + "epoch": 0.7261603375527426, + "grad_norm": 0.47688448429107666, + "learning_rate": 0.0005506673537959495, + "loss": 1.5292, + "step": 6884 + }, + { + "epoch": 0.7262658227848101, + "grad_norm": 0.5942841172218323, + "learning_rate": 0.0005499035931421242, + "loss": 1.5507, + "step": 6885 + }, + { + "epoch": 0.7263713080168777, + "grad_norm": 0.5383564829826355, + "learning_rate": 0.0005491408918035683, + "loss": 1.5042, + "step": 6886 + }, + { + "epoch": 0.7264767932489451, + "grad_norm": 0.5472618937492371, + "learning_rate": 0.0005483792483110407, + "loss": 1.5407, + "step": 6887 + }, + { + "epoch": 0.7265822784810126, + "grad_norm": 0.573840320110321, + "learning_rate": 0.0005476186611973374, + "loss": 1.5409, + "step": 6888 + }, + { + "epoch": 0.7266877637130802, + "grad_norm": 0.5130054354667664, + "learning_rate": 0.0005468591289972898, + "loss": 1.5172, + "step": 6889 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.5248138308525085, + "learning_rate": 0.0005461006502477612, + "loss": 1.5026, + "step": 6890 + }, + { + "epoch": 0.7268987341772152, + "grad_norm": 0.4802354574203491, + "learning_rate": 0.0005453432234876445, + "loss": 1.5522, + "step": 6891 + }, + { + "epoch": 0.7270042194092827, + "grad_norm": 0.47751444578170776, + "learning_rate": 0.000544586847257859, + "loss": 1.5515, + "step": 6892 + }, + { + "epoch": 0.7271097046413502, + "grad_norm": 0.5061583518981934, + "learning_rate": 0.0005438315201013477, + "loss": 1.5465, + "step": 6893 + }, + { + "epoch": 0.7272151898734177, + "grad_norm": 0.4960957467556, + "learning_rate": 0.0005430772405630743, + "loss": 1.5079, + "step": 6894 + }, + { + "epoch": 0.7273206751054853, + "grad_norm": 0.5025561451911926, + "learning_rate": 0.0005423240071900209, + "loss": 1.5009, + "step": 6895 + }, + { + "epoch": 0.7274261603375527, + "grad_norm": 0.5052862763404846, + "learning_rate": 0.0005415718185311847, + "loss": 1.5264, + "step": 6896 + }, + { + "epoch": 0.7275316455696202, + "grad_norm": 0.5196452140808105, + "learning_rate": 0.0005408206731375755, + "loss": 1.5369, + "step": 6897 + }, + { + "epoch": 0.7276371308016878, + "grad_norm": 0.5555493831634521, + "learning_rate": 0.000540070569562213, + "loss": 1.5405, + "step": 6898 + }, + { + "epoch": 0.7277426160337552, + "grad_norm": 0.4979768693447113, + "learning_rate": 0.0005393215063601232, + "loss": 1.5278, + "step": 6899 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5646880865097046, + "learning_rate": 0.0005385734820883369, + "loss": 1.525, + "step": 6900 + }, + { + "epoch": 0.7279535864978903, + "grad_norm": 0.5847175121307373, + "learning_rate": 0.000537826495305886, + "loss": 1.562, + "step": 6901 + }, + { + "epoch": 0.7280590717299578, + "grad_norm": 0.5824342370033264, + "learning_rate": 0.000537080544573801, + "loss": 1.5441, + "step": 6902 + }, + { + "epoch": 0.7281645569620253, + "grad_norm": 0.49832651019096375, + "learning_rate": 0.000536335628455108, + "loss": 1.5617, + "step": 6903 + }, + { + "epoch": 0.7282700421940929, + "grad_norm": 0.5771176218986511, + "learning_rate": 0.0005355917455148267, + "loss": 1.5337, + "step": 6904 + }, + { + "epoch": 0.7283755274261603, + "grad_norm": 0.5652545690536499, + "learning_rate": 0.0005348488943199665, + "loss": 1.5294, + "step": 6905 + }, + { + "epoch": 0.7284810126582278, + "grad_norm": 0.6075310707092285, + "learning_rate": 0.0005341070734395245, + "loss": 1.5677, + "step": 6906 + }, + { + "epoch": 0.7285864978902954, + "grad_norm": 0.5299625992774963, + "learning_rate": 0.0005333662814444825, + "loss": 1.5506, + "step": 6907 + }, + { + "epoch": 0.7286919831223628, + "grad_norm": 0.5377302765846252, + "learning_rate": 0.0005326265169078048, + "loss": 1.5475, + "step": 6908 + }, + { + "epoch": 0.7287974683544304, + "grad_norm": 0.6283926367759705, + "learning_rate": 0.0005318877784044343, + "loss": 1.605, + "step": 6909 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.49732524156570435, + "learning_rate": 0.0005311500645112907, + "loss": 1.5416, + "step": 6910 + }, + { + "epoch": 0.7290084388185654, + "grad_norm": 0.593315839767456, + "learning_rate": 0.0005304133738072676, + "loss": 1.5539, + "step": 6911 + }, + { + "epoch": 0.7291139240506329, + "grad_norm": 0.5013739466667175, + "learning_rate": 0.0005296777048732292, + "loss": 1.4969, + "step": 6912 + }, + { + "epoch": 0.7292194092827005, + "grad_norm": 0.5769602656364441, + "learning_rate": 0.0005289430562920086, + "loss": 1.5487, + "step": 6913 + }, + { + "epoch": 0.7293248945147679, + "grad_norm": 0.6069418787956238, + "learning_rate": 0.0005282094266484041, + "loss": 1.519, + "step": 6914 + }, + { + "epoch": 0.7294303797468354, + "grad_norm": 0.6460288166999817, + "learning_rate": 0.0005274768145291769, + "loss": 1.5384, + "step": 6915 + }, + { + "epoch": 0.729535864978903, + "grad_norm": 0.5016931891441345, + "learning_rate": 0.0005267452185230483, + "loss": 1.5303, + "step": 6916 + }, + { + "epoch": 0.7296413502109704, + "grad_norm": 0.557963490486145, + "learning_rate": 0.000526014637220697, + "loss": 1.5459, + "step": 6917 + }, + { + "epoch": 0.729746835443038, + "grad_norm": 0.4795740842819214, + "learning_rate": 0.0005252850692147567, + "loss": 1.5441, + "step": 6918 + }, + { + "epoch": 0.7298523206751055, + "grad_norm": 0.46857258677482605, + "learning_rate": 0.0005245565130998126, + "loss": 1.5326, + "step": 6919 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.4902258813381195, + "learning_rate": 0.0005238289674723993, + "loss": 1.53, + "step": 6920 + }, + { + "epoch": 0.7300632911392405, + "grad_norm": 0.5052496790885925, + "learning_rate": 0.0005231024309309981, + "loss": 1.5458, + "step": 6921 + }, + { + "epoch": 0.7301687763713081, + "grad_norm": 0.4708039164543152, + "learning_rate": 0.0005223769020760345, + "loss": 1.5223, + "step": 6922 + }, + { + "epoch": 0.7302742616033755, + "grad_norm": 0.49621108174324036, + "learning_rate": 0.0005216523795098743, + "loss": 1.5444, + "step": 6923 + }, + { + "epoch": 0.730379746835443, + "grad_norm": 0.5251426100730896, + "learning_rate": 0.0005209288618368225, + "loss": 1.5455, + "step": 6924 + }, + { + "epoch": 0.7304852320675106, + "grad_norm": 0.45124509930610657, + "learning_rate": 0.0005202063476631198, + "loss": 1.5344, + "step": 6925 + }, + { + "epoch": 0.730590717299578, + "grad_norm": 0.5204045176506042, + "learning_rate": 0.0005194848355969396, + "loss": 1.5593, + "step": 6926 + }, + { + "epoch": 0.7306962025316456, + "grad_norm": 0.5032989382743835, + "learning_rate": 0.0005187643242483862, + "loss": 1.5308, + "step": 6927 + }, + { + "epoch": 0.7308016877637131, + "grad_norm": 0.4412859380245209, + "learning_rate": 0.0005180448122294913, + "loss": 1.5187, + "step": 6928 + }, + { + "epoch": 0.7309071729957806, + "grad_norm": 0.4908093214035034, + "learning_rate": 0.000517326298154212, + "loss": 1.5394, + "step": 6929 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.5381917953491211, + "learning_rate": 0.0005166087806384274, + "loss": 1.5714, + "step": 6930 + }, + { + "epoch": 0.7311181434599157, + "grad_norm": 0.43251463770866394, + "learning_rate": 0.0005158922582999367, + "loss": 1.5488, + "step": 6931 + }, + { + "epoch": 0.7312236286919831, + "grad_norm": 0.5274459719657898, + "learning_rate": 0.0005151767297584562, + "loss": 1.5127, + "step": 6932 + }, + { + "epoch": 0.7313291139240506, + "grad_norm": 0.575049638748169, + "learning_rate": 0.0005144621936356161, + "loss": 1.5735, + "step": 6933 + }, + { + "epoch": 0.7314345991561182, + "grad_norm": 0.4349435865879059, + "learning_rate": 0.000513748648554959, + "loss": 1.5318, + "step": 6934 + }, + { + "epoch": 0.7315400843881856, + "grad_norm": 0.5491678714752197, + "learning_rate": 0.0005130360931419364, + "loss": 1.5873, + "step": 6935 + }, + { + "epoch": 0.7316455696202532, + "grad_norm": 0.5107643604278564, + "learning_rate": 0.0005123245260239057, + "loss": 1.4928, + "step": 6936 + }, + { + "epoch": 0.7317510548523207, + "grad_norm": 0.48784181475639343, + "learning_rate": 0.0005116139458301291, + "loss": 1.5172, + "step": 6937 + }, + { + "epoch": 0.7318565400843882, + "grad_norm": 0.5244420766830444, + "learning_rate": 0.0005109043511917693, + "loss": 1.5583, + "step": 6938 + }, + { + "epoch": 0.7319620253164557, + "grad_norm": 0.49956828355789185, + "learning_rate": 0.0005101957407418877, + "loss": 1.5531, + "step": 6939 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.5397307872772217, + "learning_rate": 0.0005094881131154418, + "loss": 1.5638, + "step": 6940 + }, + { + "epoch": 0.7321729957805907, + "grad_norm": 0.49124979972839355, + "learning_rate": 0.0005087814669492819, + "loss": 1.5554, + "step": 6941 + }, + { + "epoch": 0.7322784810126582, + "grad_norm": 0.5433766841888428, + "learning_rate": 0.0005080758008821495, + "loss": 1.5034, + "step": 6942 + }, + { + "epoch": 0.7323839662447258, + "grad_norm": 0.5484342575073242, + "learning_rate": 0.0005073711135546738, + "loss": 1.5219, + "step": 6943 + }, + { + "epoch": 0.7324894514767932, + "grad_norm": 0.4700629711151123, + "learning_rate": 0.0005066674036093695, + "loss": 1.5259, + "step": 6944 + }, + { + "epoch": 0.7325949367088608, + "grad_norm": 0.7126783728599548, + "learning_rate": 0.000505964669690634, + "loss": 1.5439, + "step": 6945 + }, + { + "epoch": 0.7327004219409282, + "grad_norm": 0.4676685929298401, + "learning_rate": 0.0005052629104447452, + "loss": 1.5116, + "step": 6946 + }, + { + "epoch": 0.7328059071729958, + "grad_norm": 0.6898078918457031, + "learning_rate": 0.0005045621245198582, + "loss": 1.542, + "step": 6947 + }, + { + "epoch": 0.7329113924050633, + "grad_norm": 0.4831641614437103, + "learning_rate": 0.0005038623105660032, + "loss": 1.5742, + "step": 6948 + }, + { + "epoch": 0.7330168776371307, + "grad_norm": 0.60582035779953, + "learning_rate": 0.0005031634672350829, + "loss": 1.5797, + "step": 6949 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.4761418402194977, + "learning_rate": 0.0005024655931808696, + "loss": 1.5374, + "step": 6950 + }, + { + "epoch": 0.7332278481012658, + "grad_norm": 0.47191429138183594, + "learning_rate": 0.0005017686870590029, + "loss": 1.5326, + "step": 6951 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.5319638252258301, + "learning_rate": 0.0005010727475269868, + "loss": 1.5304, + "step": 6952 + }, + { + "epoch": 0.7334388185654008, + "grad_norm": 0.4492718577384949, + "learning_rate": 0.0005003777732441875, + "loss": 1.5262, + "step": 6953 + }, + { + "epoch": 0.7335443037974684, + "grad_norm": 0.496799498796463, + "learning_rate": 0.0004996837628718307, + "loss": 1.5241, + "step": 6954 + }, + { + "epoch": 0.7336497890295358, + "grad_norm": 0.5978667736053467, + "learning_rate": 0.0004989907150729988, + "loss": 1.5351, + "step": 6955 + }, + { + "epoch": 0.7337552742616034, + "grad_norm": 0.45638683438301086, + "learning_rate": 0.0004982986285126283, + "loss": 1.5107, + "step": 6956 + }, + { + "epoch": 0.7338607594936709, + "grad_norm": 0.5724701285362244, + "learning_rate": 0.0004976075018575078, + "loss": 1.5852, + "step": 6957 + }, + { + "epoch": 0.7339662447257383, + "grad_norm": 0.5027989745140076, + "learning_rate": 0.0004969173337762747, + "loss": 1.4972, + "step": 6958 + }, + { + "epoch": 0.7340717299578059, + "grad_norm": 0.4325365126132965, + "learning_rate": 0.0004962281229394129, + "loss": 1.4934, + "step": 6959 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.5468115210533142, + "learning_rate": 0.0004955398680192508, + "loss": 1.5455, + "step": 6960 + }, + { + "epoch": 0.7342827004219409, + "grad_norm": 0.49039095640182495, + "learning_rate": 0.0004948525676899577, + "loss": 1.5706, + "step": 6961 + }, + { + "epoch": 0.7343881856540084, + "grad_norm": 0.48494425415992737, + "learning_rate": 0.0004941662206275422, + "loss": 1.5111, + "step": 6962 + }, + { + "epoch": 0.734493670886076, + "grad_norm": 0.5570217967033386, + "learning_rate": 0.0004934808255098488, + "loss": 1.4724, + "step": 6963 + }, + { + "epoch": 0.7345991561181434, + "grad_norm": 0.47119924426078796, + "learning_rate": 0.000492796381016556, + "loss": 1.5405, + "step": 6964 + }, + { + "epoch": 0.734704641350211, + "grad_norm": 0.49364224076271057, + "learning_rate": 0.0004921128858291739, + "loss": 1.5082, + "step": 6965 + }, + { + "epoch": 0.7348101265822785, + "grad_norm": 0.439891517162323, + "learning_rate": 0.0004914303386310408, + "loss": 1.5345, + "step": 6966 + }, + { + "epoch": 0.734915611814346, + "grad_norm": 0.4645954668521881, + "learning_rate": 0.0004907487381073215, + "loss": 1.505, + "step": 6967 + }, + { + "epoch": 0.7350210970464135, + "grad_norm": 0.4426006078720093, + "learning_rate": 0.0004900680829450042, + "loss": 1.5281, + "step": 6968 + }, + { + "epoch": 0.735126582278481, + "grad_norm": 0.435122013092041, + "learning_rate": 0.0004893883718328983, + "loss": 1.5188, + "step": 6969 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.46116241812705994, + "learning_rate": 0.000488709603461632, + "loss": 1.5286, + "step": 6970 + }, + { + "epoch": 0.735337552742616, + "grad_norm": 0.4583013355731964, + "learning_rate": 0.00048803177652364935, + "loss": 1.5102, + "step": 6971 + }, + { + "epoch": 0.7354430379746836, + "grad_norm": 0.4829154908657074, + "learning_rate": 0.0004873548897132077, + "loss": 1.5188, + "step": 6972 + }, + { + "epoch": 0.735548523206751, + "grad_norm": 0.42616409063339233, + "learning_rate": 0.000486678941726376, + "loss": 1.5118, + "step": 6973 + }, + { + "epoch": 0.7356540084388186, + "grad_norm": 0.4721052348613739, + "learning_rate": 0.00048600393126103117, + "loss": 1.5222, + "step": 6974 + }, + { + "epoch": 0.7357594936708861, + "grad_norm": 0.47642770409584045, + "learning_rate": 0.0004853298570168566, + "loss": 1.5636, + "step": 6975 + }, + { + "epoch": 0.7358649789029535, + "grad_norm": 0.47210177779197693, + "learning_rate": 0.00048465671769533884, + "loss": 1.5015, + "step": 6976 + }, + { + "epoch": 0.7359704641350211, + "grad_norm": 0.4095679223537445, + "learning_rate": 0.00048398451199976574, + "loss": 1.5571, + "step": 6977 + }, + { + "epoch": 0.7360759493670886, + "grad_norm": 0.453061580657959, + "learning_rate": 0.0004833132386352233, + "loss": 1.5364, + "step": 6978 + }, + { + "epoch": 0.7361814345991561, + "grad_norm": 0.4312068223953247, + "learning_rate": 0.0004826428963085938, + "loss": 1.5308, + "step": 6979 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.4213617146015167, + "learning_rate": 0.000481973483728553, + "loss": 1.5181, + "step": 6980 + }, + { + "epoch": 0.7363924050632912, + "grad_norm": 0.46180662512779236, + "learning_rate": 0.0004813049996055675, + "loss": 1.5322, + "step": 6981 + }, + { + "epoch": 0.7364978902953586, + "grad_norm": 0.44460228085517883, + "learning_rate": 0.00048063744265189275, + "loss": 1.5111, + "step": 6982 + }, + { + "epoch": 0.7366033755274262, + "grad_norm": 0.4618487060070038, + "learning_rate": 0.0004799708115815701, + "loss": 1.4954, + "step": 6983 + }, + { + "epoch": 0.7367088607594937, + "grad_norm": 0.4412814676761627, + "learning_rate": 0.0004793051051104244, + "loss": 1.5431, + "step": 6984 + }, + { + "epoch": 0.7368143459915611, + "grad_norm": 0.5162926912307739, + "learning_rate": 0.0004786403219560618, + "loss": 1.5496, + "step": 6985 + }, + { + "epoch": 0.7369198312236287, + "grad_norm": 0.42973792552948, + "learning_rate": 0.000477976460837867, + "loss": 1.4997, + "step": 6986 + }, + { + "epoch": 0.7370253164556962, + "grad_norm": 0.47544416785240173, + "learning_rate": 0.00047731352047700095, + "loss": 1.492, + "step": 6987 + }, + { + "epoch": 0.7371308016877637, + "grad_norm": 0.467846155166626, + "learning_rate": 0.00047665149959639813, + "loss": 1.5516, + "step": 6988 + }, + { + "epoch": 0.7372362869198312, + "grad_norm": 0.478471040725708, + "learning_rate": 0.00047599039692076457, + "loss": 1.5659, + "step": 6989 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.47790589928627014, + "learning_rate": 0.0004753302111765748, + "loss": 1.5022, + "step": 6990 + }, + { + "epoch": 0.7374472573839662, + "grad_norm": 0.47267937660217285, + "learning_rate": 0.00047467094109206984, + "loss": 1.5571, + "step": 6991 + }, + { + "epoch": 0.7375527426160338, + "grad_norm": 0.4550853669643402, + "learning_rate": 0.0004740125853972546, + "loss": 1.5711, + "step": 6992 + }, + { + "epoch": 0.7376582278481013, + "grad_norm": 0.49018993973731995, + "learning_rate": 0.00047335514282389557, + "loss": 1.5107, + "step": 6993 + }, + { + "epoch": 0.7377637130801687, + "grad_norm": 0.4644816517829895, + "learning_rate": 0.0004726986121055179, + "loss": 1.5286, + "step": 6994 + }, + { + "epoch": 0.7378691983122363, + "grad_norm": 0.49155911803245544, + "learning_rate": 0.00047204299197740364, + "loss": 1.5393, + "step": 6995 + }, + { + "epoch": 0.7379746835443038, + "grad_norm": 0.3947535753250122, + "learning_rate": 0.0004713882811765889, + "loss": 1.5335, + "step": 6996 + }, + { + "epoch": 0.7380801687763713, + "grad_norm": 0.49089890718460083, + "learning_rate": 0.0004707344784418611, + "loss": 1.5233, + "step": 6997 + }, + { + "epoch": 0.7381856540084388, + "grad_norm": 0.45698991417884827, + "learning_rate": 0.0004700815825137577, + "loss": 1.5086, + "step": 6998 + }, + { + "epoch": 0.7382911392405064, + "grad_norm": 0.4739130437374115, + "learning_rate": 0.0004694295921345622, + "loss": 1.5711, + "step": 6999 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.4922913610935211, + "learning_rate": 0.0004687785060483032, + "loss": 1.5269, + "step": 7000 + }, + { + "epoch": 0.7385021097046414, + "grad_norm": 0.5071092844009399, + "learning_rate": 0.0004681283230007507, + "loss": 1.5113, + "step": 7001 + }, + { + "epoch": 0.7386075949367089, + "grad_norm": 0.484287828207016, + "learning_rate": 0.0004674790417394145, + "loss": 1.5328, + "step": 7002 + }, + { + "epoch": 0.7387130801687763, + "grad_norm": 0.5072734951972961, + "learning_rate": 0.00046683066101354197, + "loss": 1.5261, + "step": 7003 + }, + { + "epoch": 0.7388185654008439, + "grad_norm": 0.4238506555557251, + "learning_rate": 0.00046618317957411475, + "loss": 1.4935, + "step": 7004 + }, + { + "epoch": 0.7389240506329114, + "grad_norm": 0.45852428674697876, + "learning_rate": 0.00046553659617384684, + "loss": 1.4803, + "step": 7005 + }, + { + "epoch": 0.7390295358649789, + "grad_norm": 0.5891818404197693, + "learning_rate": 0.00046489090956718234, + "loss": 1.4994, + "step": 7006 + }, + { + "epoch": 0.7391350210970464, + "grad_norm": 0.5202815532684326, + "learning_rate": 0.00046424611851029313, + "loss": 1.5159, + "step": 7007 + }, + { + "epoch": 0.739240506329114, + "grad_norm": 0.547779381275177, + "learning_rate": 0.00046360222176107584, + "loss": 1.4958, + "step": 7008 + }, + { + "epoch": 0.7393459915611814, + "grad_norm": 0.4343096613883972, + "learning_rate": 0.00046295921807915015, + "loss": 1.5259, + "step": 7009 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.5305264592170715, + "learning_rate": 0.0004623171062258558, + "loss": 1.5139, + "step": 7010 + }, + { + "epoch": 0.7395569620253165, + "grad_norm": 0.5664477348327637, + "learning_rate": 0.00046167588496425074, + "loss": 1.541, + "step": 7011 + }, + { + "epoch": 0.739662447257384, + "grad_norm": 0.4491846263408661, + "learning_rate": 0.0004610355530591087, + "loss": 1.5137, + "step": 7012 + }, + { + "epoch": 0.7397679324894515, + "grad_norm": 0.5544329881668091, + "learning_rate": 0.0004603961092769163, + "loss": 1.5303, + "step": 7013 + }, + { + "epoch": 0.7398734177215189, + "grad_norm": 0.49536335468292236, + "learning_rate": 0.0004597575523858713, + "loss": 1.5008, + "step": 7014 + }, + { + "epoch": 0.7399789029535865, + "grad_norm": 0.631169855594635, + "learning_rate": 0.0004591198811558795, + "loss": 1.5377, + "step": 7015 + }, + { + "epoch": 0.740084388185654, + "grad_norm": 0.5485548377037048, + "learning_rate": 0.0004584830943585531, + "loss": 1.5411, + "step": 7016 + }, + { + "epoch": 0.7401898734177215, + "grad_norm": 0.5110917091369629, + "learning_rate": 0.0004578471907672084, + "loss": 1.514, + "step": 7017 + }, + { + "epoch": 0.740295358649789, + "grad_norm": 0.5429385304450989, + "learning_rate": 0.0004572121691568625, + "loss": 1.5476, + "step": 7018 + }, + { + "epoch": 0.7404008438818566, + "grad_norm": 0.4515678584575653, + "learning_rate": 0.00045657802830423164, + "loss": 1.4886, + "step": 7019 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.5016543865203857, + "learning_rate": 0.0004559447669877288, + "loss": 1.54, + "step": 7020 + }, + { + "epoch": 0.7406118143459915, + "grad_norm": 0.4992453455924988, + "learning_rate": 0.00045531238398746133, + "loss": 1.5308, + "step": 7021 + }, + { + "epoch": 0.7407172995780591, + "grad_norm": 0.4468490481376648, + "learning_rate": 0.0004546808780852286, + "loss": 1.5373, + "step": 7022 + }, + { + "epoch": 0.7408227848101265, + "grad_norm": 0.4730173945426941, + "learning_rate": 0.0004540502480645194, + "loss": 1.4811, + "step": 7023 + }, + { + "epoch": 0.7409282700421941, + "grad_norm": 0.5012477040290833, + "learning_rate": 0.0004534204927105097, + "loss": 1.5054, + "step": 7024 + }, + { + "epoch": 0.7410337552742616, + "grad_norm": 0.49354612827301025, + "learning_rate": 0.0004527916108100607, + "loss": 1.5146, + "step": 7025 + }, + { + "epoch": 0.7411392405063291, + "grad_norm": 0.45262235403060913, + "learning_rate": 0.00045216360115171613, + "loss": 1.5229, + "step": 7026 + }, + { + "epoch": 0.7412447257383966, + "grad_norm": 0.5362615585327148, + "learning_rate": 0.00045153646252569976, + "loss": 1.5665, + "step": 7027 + }, + { + "epoch": 0.7413502109704642, + "grad_norm": 0.5683720707893372, + "learning_rate": 0.00045091019372391354, + "loss": 1.5533, + "step": 7028 + }, + { + "epoch": 0.7414556962025316, + "grad_norm": 0.45193395018577576, + "learning_rate": 0.00045028479353993473, + "loss": 1.5484, + "step": 7029 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.5412737131118774, + "learning_rate": 0.00044966026076901413, + "loss": 1.5052, + "step": 7030 + }, + { + "epoch": 0.7416666666666667, + "grad_norm": 0.5181034803390503, + "learning_rate": 0.00044903659420807347, + "loss": 1.5274, + "step": 7031 + }, + { + "epoch": 0.7417721518987341, + "grad_norm": 0.46654418110847473, + "learning_rate": 0.000448413792655703, + "loss": 1.5547, + "step": 7032 + }, + { + "epoch": 0.7418776371308017, + "grad_norm": 0.5146116614341736, + "learning_rate": 0.0004477918549121593, + "loss": 1.5585, + "step": 7033 + }, + { + "epoch": 0.7419831223628692, + "grad_norm": 0.5036954283714294, + "learning_rate": 0.0004471707797793631, + "loss": 1.5209, + "step": 7034 + }, + { + "epoch": 0.7420886075949367, + "grad_norm": 0.4394099712371826, + "learning_rate": 0.00044655056606089655, + "loss": 1.5359, + "step": 7035 + }, + { + "epoch": 0.7421940928270042, + "grad_norm": 0.5671720504760742, + "learning_rate": 0.00044593121256200163, + "loss": 1.5198, + "step": 7036 + }, + { + "epoch": 0.7422995780590718, + "grad_norm": 0.5348916053771973, + "learning_rate": 0.000445312718089577, + "loss": 1.527, + "step": 7037 + }, + { + "epoch": 0.7424050632911392, + "grad_norm": 0.43203607201576233, + "learning_rate": 0.0004446950814521764, + "loss": 1.5401, + "step": 7038 + }, + { + "epoch": 0.7425105485232067, + "grad_norm": 0.5797217488288879, + "learning_rate": 0.00044407830146000587, + "loss": 1.5604, + "step": 7039 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.47146809101104736, + "learning_rate": 0.00044346237692492177, + "loss": 1.5016, + "step": 7040 + }, + { + "epoch": 0.7427215189873417, + "grad_norm": 0.5115475058555603, + "learning_rate": 0.0004428473066604284, + "loss": 1.4883, + "step": 7041 + }, + { + "epoch": 0.7428270042194093, + "grad_norm": 0.48475152254104614, + "learning_rate": 0.0004422330894816757, + "loss": 1.5874, + "step": 7042 + }, + { + "epoch": 0.7429324894514768, + "grad_norm": 0.5007703900337219, + "learning_rate": 0.0004416197242054569, + "loss": 1.5528, + "step": 7043 + }, + { + "epoch": 0.7430379746835443, + "grad_norm": 0.48052096366882324, + "learning_rate": 0.0004410072096502064, + "loss": 1.4997, + "step": 7044 + }, + { + "epoch": 0.7431434599156118, + "grad_norm": 0.4661366045475006, + "learning_rate": 0.00044039554463599716, + "loss": 1.5311, + "step": 7045 + }, + { + "epoch": 0.7432489451476794, + "grad_norm": 0.4294412136077881, + "learning_rate": 0.00043978472798453895, + "loss": 1.533, + "step": 7046 + }, + { + "epoch": 0.7433544303797468, + "grad_norm": 0.4827269911766052, + "learning_rate": 0.0004391747585191759, + "loss": 1.4939, + "step": 7047 + }, + { + "epoch": 0.7434599156118143, + "grad_norm": 0.4447539746761322, + "learning_rate": 0.0004385656350648835, + "loss": 1.5213, + "step": 7048 + }, + { + "epoch": 0.7435654008438819, + "grad_norm": 0.46135860681533813, + "learning_rate": 0.0004379573564482676, + "loss": 1.5203, + "step": 7049 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.45153218507766724, + "learning_rate": 0.0004373499214975615, + "loss": 1.4899, + "step": 7050 + }, + { + "epoch": 0.7437763713080169, + "grad_norm": 0.44256845116615295, + "learning_rate": 0.0004367433290426232, + "loss": 1.5204, + "step": 7051 + }, + { + "epoch": 0.7438818565400844, + "grad_norm": 0.45578858256340027, + "learning_rate": 0.0004361375779149342, + "loss": 1.4973, + "step": 7052 + }, + { + "epoch": 0.7439873417721519, + "grad_norm": 0.43108853697776794, + "learning_rate": 0.0004355326669475963, + "loss": 1.4828, + "step": 7053 + }, + { + "epoch": 0.7440928270042194, + "grad_norm": 0.563687264919281, + "learning_rate": 0.0004349285949753299, + "loss": 1.5206, + "step": 7054 + }, + { + "epoch": 0.744198312236287, + "grad_norm": 0.4552002549171448, + "learning_rate": 0.0004343253608344718, + "loss": 1.5621, + "step": 7055 + }, + { + "epoch": 0.7443037974683544, + "grad_norm": 0.4878755807876587, + "learning_rate": 0.0004337229633629726, + "loss": 1.5592, + "step": 7056 + }, + { + "epoch": 0.744409282700422, + "grad_norm": 0.49062827229499817, + "learning_rate": 0.0004331214014003945, + "loss": 1.5106, + "step": 7057 + }, + { + "epoch": 0.7445147679324895, + "grad_norm": 0.4523552358150482, + "learning_rate": 0.00043252067378790946, + "loss": 1.5539, + "step": 7058 + }, + { + "epoch": 0.7446202531645569, + "grad_norm": 0.4506322145462036, + "learning_rate": 0.0004319207793682963, + "loss": 1.5164, + "step": 7059 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.5151163935661316, + "learning_rate": 0.0004313217169859396, + "loss": 1.538, + "step": 7060 + }, + { + "epoch": 0.744831223628692, + "grad_norm": 0.49112361669540405, + "learning_rate": 0.0004307234854868261, + "loss": 1.5865, + "step": 7061 + }, + { + "epoch": 0.7449367088607595, + "grad_norm": 0.5302866101264954, + "learning_rate": 0.00043012608371854324, + "loss": 1.5167, + "step": 7062 + }, + { + "epoch": 0.745042194092827, + "grad_norm": 0.4698333740234375, + "learning_rate": 0.00042952951053027696, + "loss": 1.5089, + "step": 7063 + }, + { + "epoch": 0.7451476793248946, + "grad_norm": 0.5170987844467163, + "learning_rate": 0.0004289337647728092, + "loss": 1.5267, + "step": 7064 + }, + { + "epoch": 0.745253164556962, + "grad_norm": 0.5629542469978333, + "learning_rate": 0.00042833884529851614, + "loss": 1.5204, + "step": 7065 + }, + { + "epoch": 0.7453586497890295, + "grad_norm": 0.46659767627716064, + "learning_rate": 0.0004277447509613654, + "loss": 1.5224, + "step": 7066 + }, + { + "epoch": 0.7454641350210971, + "grad_norm": 0.5514228940010071, + "learning_rate": 0.00042715148061691407, + "loss": 1.5248, + "step": 7067 + }, + { + "epoch": 0.7455696202531645, + "grad_norm": 0.5443357229232788, + "learning_rate": 0.00042655903312230673, + "loss": 1.5547, + "step": 7068 + }, + { + "epoch": 0.7456751054852321, + "grad_norm": 0.4754493236541748, + "learning_rate": 0.0004259674073362731, + "loss": 1.5481, + "step": 7069 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.4532555043697357, + "learning_rate": 0.0004253766021191256, + "loss": 1.5337, + "step": 7070 + }, + { + "epoch": 0.7458860759493671, + "grad_norm": 0.52836012840271, + "learning_rate": 0.0004247866163327576, + "loss": 1.5501, + "step": 7071 + }, + { + "epoch": 0.7459915611814346, + "grad_norm": 0.4554440975189209, + "learning_rate": 0.00042419744884064083, + "loss": 1.543, + "step": 7072 + }, + { + "epoch": 0.7460970464135022, + "grad_norm": 0.5174999833106995, + "learning_rate": 0.00042360909850782324, + "loss": 1.5271, + "step": 7073 + }, + { + "epoch": 0.7462025316455696, + "grad_norm": 0.4821670949459076, + "learning_rate": 0.0004230215642009273, + "loss": 1.5177, + "step": 7074 + }, + { + "epoch": 0.7463080168776371, + "grad_norm": 0.4322897493839264, + "learning_rate": 0.0004224348447881472, + "loss": 1.5072, + "step": 7075 + }, + { + "epoch": 0.7464135021097047, + "grad_norm": 0.4397633969783783, + "learning_rate": 0.000421848939139247, + "loss": 1.5055, + "step": 7076 + }, + { + "epoch": 0.7465189873417721, + "grad_norm": 0.46378999948501587, + "learning_rate": 0.0004212638461255582, + "loss": 1.4972, + "step": 7077 + }, + { + "epoch": 0.7466244725738397, + "grad_norm": 0.42818915843963623, + "learning_rate": 0.0004206795646199778, + "loss": 1.5395, + "step": 7078 + }, + { + "epoch": 0.7467299578059071, + "grad_norm": 0.4345000088214874, + "learning_rate": 0.00042009609349696626, + "loss": 1.5503, + "step": 7079 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.46376457810401917, + "learning_rate": 0.00041951343163254497, + "loss": 1.5453, + "step": 7080 + }, + { + "epoch": 0.7469409282700422, + "grad_norm": 0.43095022439956665, + "learning_rate": 0.0004189315779042942, + "loss": 1.5034, + "step": 7081 + }, + { + "epoch": 0.7470464135021097, + "grad_norm": 0.47321557998657227, + "learning_rate": 0.00041835053119135095, + "loss": 1.554, + "step": 7082 + }, + { + "epoch": 0.7471518987341772, + "grad_norm": 0.43503642082214355, + "learning_rate": 0.00041777029037440695, + "loss": 1.5445, + "step": 7083 + }, + { + "epoch": 0.7472573839662447, + "grad_norm": 0.5028491020202637, + "learning_rate": 0.00041719085433570657, + "loss": 1.5054, + "step": 7084 + }, + { + "epoch": 0.7473628691983122, + "grad_norm": 0.49268436431884766, + "learning_rate": 0.0004166122219590441, + "loss": 1.5206, + "step": 7085 + }, + { + "epoch": 0.7474683544303797, + "grad_norm": 0.43791013956069946, + "learning_rate": 0.00041603439212976217, + "loss": 1.5056, + "step": 7086 + }, + { + "epoch": 0.7475738396624473, + "grad_norm": 0.5332858562469482, + "learning_rate": 0.00041545736373474934, + "loss": 1.5927, + "step": 7087 + }, + { + "epoch": 0.7476793248945147, + "grad_norm": 0.5037378072738647, + "learning_rate": 0.0004148811356624379, + "loss": 1.5337, + "step": 7088 + }, + { + "epoch": 0.7477848101265823, + "grad_norm": 0.48495596647262573, + "learning_rate": 0.00041430570680280233, + "loss": 1.545, + "step": 7089 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.523064374923706, + "learning_rate": 0.00041373107604735626, + "loss": 1.4616, + "step": 7090 + }, + { + "epoch": 0.7479957805907173, + "grad_norm": 0.49792978167533875, + "learning_rate": 0.00041315724228915075, + "loss": 1.5022, + "step": 7091 + }, + { + "epoch": 0.7481012658227848, + "grad_norm": 0.4592820703983307, + "learning_rate": 0.00041258420442277235, + "loss": 1.4974, + "step": 7092 + }, + { + "epoch": 0.7482067510548523, + "grad_norm": 0.4402250349521637, + "learning_rate": 0.0004120119613443408, + "loss": 1.5278, + "step": 7093 + }, + { + "epoch": 0.7483122362869198, + "grad_norm": 0.4806707203388214, + "learning_rate": 0.00041144051195150685, + "loss": 1.5402, + "step": 7094 + }, + { + "epoch": 0.7484177215189873, + "grad_norm": 0.5291142463684082, + "learning_rate": 0.00041086985514345004, + "loss": 1.5437, + "step": 7095 + }, + { + "epoch": 0.7485232067510549, + "grad_norm": 0.4284331798553467, + "learning_rate": 0.0004102999898208767, + "loss": 1.5422, + "step": 7096 + }, + { + "epoch": 0.7486286919831223, + "grad_norm": 0.48729121685028076, + "learning_rate": 0.00040973091488601815, + "loss": 1.52, + "step": 7097 + }, + { + "epoch": 0.7487341772151899, + "grad_norm": 0.41935768723487854, + "learning_rate": 0.0004091626292426282, + "loss": 1.5477, + "step": 7098 + }, + { + "epoch": 0.7488396624472574, + "grad_norm": 0.4817926287651062, + "learning_rate": 0.0004085951317959809, + "loss": 1.5047, + "step": 7099 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.4516688585281372, + "learning_rate": 0.0004080284214528687, + "loss": 1.4943, + "step": 7100 + }, + { + "epoch": 0.7490506329113924, + "grad_norm": 0.5066152215003967, + "learning_rate": 0.00040746249712160065, + "loss": 1.5581, + "step": 7101 + }, + { + "epoch": 0.74915611814346, + "grad_norm": 0.4841637909412384, + "learning_rate": 0.0004068973577119993, + "loss": 1.5554, + "step": 7102 + }, + { + "epoch": 0.7492616033755274, + "grad_norm": 0.4826940894126892, + "learning_rate": 0.0004063330021354, + "loss": 1.5217, + "step": 7103 + }, + { + "epoch": 0.7493670886075949, + "grad_norm": 0.48390793800354004, + "learning_rate": 0.0004057694293046476, + "loss": 1.4964, + "step": 7104 + }, + { + "epoch": 0.7494725738396625, + "grad_norm": 0.4363178014755249, + "learning_rate": 0.00040520663813409474, + "loss": 1.5488, + "step": 7105 + }, + { + "epoch": 0.7495780590717299, + "grad_norm": 0.46231809258461, + "learning_rate": 0.0004046446275396001, + "loss": 1.5577, + "step": 7106 + }, + { + "epoch": 0.7496835443037975, + "grad_norm": 0.5574545860290527, + "learning_rate": 0.00040408339643852574, + "loss": 1.5326, + "step": 7107 + }, + { + "epoch": 0.749789029535865, + "grad_norm": 0.43836510181427, + "learning_rate": 0.0004035229437497357, + "loss": 1.5138, + "step": 7108 + }, + { + "epoch": 0.7498945147679325, + "grad_norm": 0.5714179277420044, + "learning_rate": 0.00040296326839359315, + "loss": 1.5134, + "step": 7109 + }, + { + "epoch": 0.75, + "grad_norm": 0.5309195518493652, + "learning_rate": 0.0004024043692919589, + "loss": 1.5416, + "step": 7110 + }, + { + "epoch": 0.7501054852320675, + "grad_norm": 0.5411048531532288, + "learning_rate": 0.000401846245368189, + "loss": 1.5792, + "step": 7111 + }, + { + "epoch": 0.750210970464135, + "grad_norm": 0.5172308087348938, + "learning_rate": 0.00040128889554713273, + "loss": 1.5278, + "step": 7112 + }, + { + "epoch": 0.7503164556962025, + "grad_norm": 0.5390828847885132, + "learning_rate": 0.0004007323187551308, + "loss": 1.5863, + "step": 7113 + }, + { + "epoch": 0.7504219409282701, + "grad_norm": 0.48591747879981995, + "learning_rate": 0.0004001765139200129, + "loss": 1.51, + "step": 7114 + }, + { + "epoch": 0.7505274261603375, + "grad_norm": 0.4886431097984314, + "learning_rate": 0.00039962147997109587, + "loss": 1.5196, + "step": 7115 + }, + { + "epoch": 0.7506329113924051, + "grad_norm": 0.4695136249065399, + "learning_rate": 0.00039906721583918124, + "loss": 1.4883, + "step": 7116 + }, + { + "epoch": 0.7507383966244726, + "grad_norm": 0.47730475664138794, + "learning_rate": 0.0003985137204565541, + "loss": 1.5297, + "step": 7117 + }, + { + "epoch": 0.75084388185654, + "grad_norm": 0.44424834847450256, + "learning_rate": 0.00039796099275697986, + "loss": 1.4922, + "step": 7118 + }, + { + "epoch": 0.7509493670886076, + "grad_norm": 0.4683012068271637, + "learning_rate": 0.000397409031675703, + "loss": 1.4801, + "step": 7119 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.4301441013813019, + "learning_rate": 0.0003968578361494449, + "loss": 1.5608, + "step": 7120 + }, + { + "epoch": 0.7511603375527426, + "grad_norm": 0.47329288721084595, + "learning_rate": 0.0003963074051164014, + "loss": 1.5743, + "step": 7121 + }, + { + "epoch": 0.7512658227848101, + "grad_norm": 0.4397728741168976, + "learning_rate": 0.0003957577375162413, + "loss": 1.5339, + "step": 7122 + }, + { + "epoch": 0.7513713080168777, + "grad_norm": 0.4293994605541229, + "learning_rate": 0.0003952088322901039, + "loss": 1.531, + "step": 7123 + }, + { + "epoch": 0.7514767932489451, + "grad_norm": 0.45915931463241577, + "learning_rate": 0.0003946606883805972, + "loss": 1.5436, + "step": 7124 + }, + { + "epoch": 0.7515822784810127, + "grad_norm": 0.48117274045944214, + "learning_rate": 0.0003941133047317957, + "loss": 1.5483, + "step": 7125 + }, + { + "epoch": 0.7516877637130802, + "grad_norm": 0.5470882058143616, + "learning_rate": 0.0003935666802892382, + "loss": 1.5483, + "step": 7126 + }, + { + "epoch": 0.7517932489451477, + "grad_norm": 0.4597419202327728, + "learning_rate": 0.00039302081399992676, + "loss": 1.5455, + "step": 7127 + }, + { + "epoch": 0.7518987341772152, + "grad_norm": 0.5148063898086548, + "learning_rate": 0.0003924757048123232, + "loss": 1.5311, + "step": 7128 + }, + { + "epoch": 0.7520042194092827, + "grad_norm": 0.4838639199733734, + "learning_rate": 0.00039193135167634786, + "loss": 1.475, + "step": 7129 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.4336302578449249, + "learning_rate": 0.000391387753543378, + "loss": 1.526, + "step": 7130 + }, + { + "epoch": 0.7522151898734177, + "grad_norm": 0.4885268211364746, + "learning_rate": 0.0003908449093662446, + "loss": 1.5235, + "step": 7131 + }, + { + "epoch": 0.7523206751054853, + "grad_norm": 0.4380110502243042, + "learning_rate": 0.00039030281809923173, + "loss": 1.5032, + "step": 7132 + }, + { + "epoch": 0.7524261603375527, + "grad_norm": 0.40179675817489624, + "learning_rate": 0.00038976147869807345, + "loss": 1.5034, + "step": 7133 + }, + { + "epoch": 0.7525316455696203, + "grad_norm": 0.5261813402175903, + "learning_rate": 0.00038922089011995216, + "loss": 1.4981, + "step": 7134 + }, + { + "epoch": 0.7526371308016878, + "grad_norm": 0.448726624250412, + "learning_rate": 0.0003886810513234966, + "loss": 1.4806, + "step": 7135 + }, + { + "epoch": 0.7527426160337553, + "grad_norm": 0.45645344257354736, + "learning_rate": 0.0003881419612687803, + "loss": 1.5357, + "step": 7136 + }, + { + "epoch": 0.7528481012658228, + "grad_norm": 0.620891809463501, + "learning_rate": 0.0003876036189173186, + "loss": 1.5556, + "step": 7137 + }, + { + "epoch": 0.7529535864978903, + "grad_norm": 0.4438353478908539, + "learning_rate": 0.0003870660232320675, + "loss": 1.5235, + "step": 7138 + }, + { + "epoch": 0.7530590717299578, + "grad_norm": 0.4684808552265167, + "learning_rate": 0.00038652917317742123, + "loss": 1.5346, + "step": 7139 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.4858201742172241, + "learning_rate": 0.00038599306771921023, + "loss": 1.5245, + "step": 7140 + }, + { + "epoch": 0.7532700421940929, + "grad_norm": 0.43313518166542053, + "learning_rate": 0.00038545770582469976, + "loss": 1.5265, + "step": 7141 + }, + { + "epoch": 0.7533755274261603, + "grad_norm": 0.49534353613853455, + "learning_rate": 0.00038492308646258705, + "loss": 1.5201, + "step": 7142 + }, + { + "epoch": 0.7534810126582279, + "grad_norm": 0.450639545917511, + "learning_rate": 0.0003843892086029999, + "loss": 1.5105, + "step": 7143 + }, + { + "epoch": 0.7535864978902953, + "grad_norm": 0.47175243496894836, + "learning_rate": 0.0003838560712174944, + "loss": 1.5325, + "step": 7144 + }, + { + "epoch": 0.7536919831223629, + "grad_norm": 0.4821106195449829, + "learning_rate": 0.00038332367327905293, + "loss": 1.5106, + "step": 7145 + }, + { + "epoch": 0.7537974683544304, + "grad_norm": 0.49153727293014526, + "learning_rate": 0.00038279201376208285, + "loss": 1.5349, + "step": 7146 + }, + { + "epoch": 0.7539029535864978, + "grad_norm": 0.49545401334762573, + "learning_rate": 0.00038226109164241355, + "loss": 1.5253, + "step": 7147 + }, + { + "epoch": 0.7540084388185654, + "grad_norm": 0.4788646399974823, + "learning_rate": 0.000381730905897295, + "loss": 1.4928, + "step": 7148 + }, + { + "epoch": 0.7541139240506329, + "grad_norm": 0.5294283032417297, + "learning_rate": 0.0003812014555053956, + "loss": 1.4732, + "step": 7149 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.5039865374565125, + "learning_rate": 0.0003806727394468004, + "loss": 1.5323, + "step": 7150 + }, + { + "epoch": 0.7543248945147679, + "grad_norm": 0.473507821559906, + "learning_rate": 0.00038014475670300935, + "loss": 1.5052, + "step": 7151 + }, + { + "epoch": 0.7544303797468355, + "grad_norm": 0.4828464984893799, + "learning_rate": 0.0003796175062569345, + "loss": 1.5139, + "step": 7152 + }, + { + "epoch": 0.7545358649789029, + "grad_norm": 0.6287026405334473, + "learning_rate": 0.0003790909870928989, + "loss": 1.5082, + "step": 7153 + }, + { + "epoch": 0.7546413502109705, + "grad_norm": 0.4311036765575409, + "learning_rate": 0.0003785651981966342, + "loss": 1.489, + "step": 7154 + }, + { + "epoch": 0.754746835443038, + "grad_norm": 0.5604236721992493, + "learning_rate": 0.00037804013855527886, + "loss": 1.5698, + "step": 7155 + }, + { + "epoch": 0.7548523206751054, + "grad_norm": 0.5759506821632385, + "learning_rate": 0.0003775158071573762, + "loss": 1.5462, + "step": 7156 + }, + { + "epoch": 0.754957805907173, + "grad_norm": 0.48802420496940613, + "learning_rate": 0.0003769922029928723, + "loss": 1.4814, + "step": 7157 + }, + { + "epoch": 0.7550632911392405, + "grad_norm": 0.5481204986572266, + "learning_rate": 0.0003764693250531141, + "loss": 1.5176, + "step": 7158 + }, + { + "epoch": 0.755168776371308, + "grad_norm": 0.6281586289405823, + "learning_rate": 0.00037594717233084774, + "loss": 1.5191, + "step": 7159 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.47377708554267883, + "learning_rate": 0.0003754257438202162, + "loss": 1.5617, + "step": 7160 + }, + { + "epoch": 0.7553797468354431, + "grad_norm": 0.5991808176040649, + "learning_rate": 0.0003749050385167578, + "loss": 1.5371, + "step": 7161 + }, + { + "epoch": 0.7554852320675105, + "grad_norm": 0.5201156139373779, + "learning_rate": 0.00037438505541740366, + "loss": 1.519, + "step": 7162 + }, + { + "epoch": 0.755590717299578, + "grad_norm": 0.520068883895874, + "learning_rate": 0.0003738657935204763, + "loss": 1.5107, + "step": 7163 + }, + { + "epoch": 0.7556962025316456, + "grad_norm": 0.6284279823303223, + "learning_rate": 0.00037334725182568764, + "loss": 1.5356, + "step": 7164 + }, + { + "epoch": 0.755801687763713, + "grad_norm": 0.45455965399742126, + "learning_rate": 0.00037282942933413685, + "loss": 1.5394, + "step": 7165 + }, + { + "epoch": 0.7559071729957806, + "grad_norm": 0.5479934215545654, + "learning_rate": 0.00037231232504830866, + "loss": 1.492, + "step": 7166 + }, + { + "epoch": 0.7560126582278481, + "grad_norm": 0.6013789772987366, + "learning_rate": 0.0003717959379720712, + "loss": 1.5226, + "step": 7167 + }, + { + "epoch": 0.7561181434599156, + "grad_norm": 0.46028947830200195, + "learning_rate": 0.0003712802671106742, + "loss": 1.5332, + "step": 7168 + }, + { + "epoch": 0.7562236286919831, + "grad_norm": 0.6012073755264282, + "learning_rate": 0.0003707653114707471, + "loss": 1.5523, + "step": 7169 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.5725463032722473, + "learning_rate": 0.0003702510700602974, + "loss": 1.5618, + "step": 7170 + }, + { + "epoch": 0.7564345991561181, + "grad_norm": 0.4723758101463318, + "learning_rate": 0.00036973754188870803, + "loss": 1.513, + "step": 7171 + }, + { + "epoch": 0.7565400843881857, + "grad_norm": 0.612431526184082, + "learning_rate": 0.00036922472596673614, + "loss": 1.5214, + "step": 7172 + }, + { + "epoch": 0.7566455696202532, + "grad_norm": 0.5117740035057068, + "learning_rate": 0.0003687126213065109, + "loss": 1.5223, + "step": 7173 + }, + { + "epoch": 0.7567510548523206, + "grad_norm": 0.6062926054000854, + "learning_rate": 0.0003682012269215314, + "loss": 1.4937, + "step": 7174 + }, + { + "epoch": 0.7568565400843882, + "grad_norm": 0.5521675944328308, + "learning_rate": 0.0003676905418266654, + "loss": 1.5327, + "step": 7175 + }, + { + "epoch": 0.7569620253164557, + "grad_norm": 0.4586500823497772, + "learning_rate": 0.00036718056503814674, + "loss": 1.5036, + "step": 7176 + }, + { + "epoch": 0.7570675105485232, + "grad_norm": 0.5651522278785706, + "learning_rate": 0.00036667129557357375, + "loss": 1.525, + "step": 7177 + }, + { + "epoch": 0.7571729957805907, + "grad_norm": 0.5301135182380676, + "learning_rate": 0.0003661627324519073, + "loss": 1.4717, + "step": 7178 + }, + { + "epoch": 0.7572784810126583, + "grad_norm": 0.44709864258766174, + "learning_rate": 0.00036565487469346906, + "loss": 1.4948, + "step": 7179 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.6086869835853577, + "learning_rate": 0.0003651477213199393, + "loss": 1.5189, + "step": 7180 + }, + { + "epoch": 0.7574894514767933, + "grad_norm": 0.4575928747653961, + "learning_rate": 0.0003646412713543554, + "loss": 1.4973, + "step": 7181 + }, + { + "epoch": 0.7575949367088608, + "grad_norm": 0.46181899309158325, + "learning_rate": 0.0003641355238211096, + "loss": 1.5286, + "step": 7182 + }, + { + "epoch": 0.7577004219409282, + "grad_norm": 0.4870898127555847, + "learning_rate": 0.0003636304777459472, + "loss": 1.4782, + "step": 7183 + }, + { + "epoch": 0.7578059071729958, + "grad_norm": 0.46541211009025574, + "learning_rate": 0.0003631261321559652, + "loss": 1.53, + "step": 7184 + }, + { + "epoch": 0.7579113924050633, + "grad_norm": 0.5230966210365295, + "learning_rate": 0.0003626224860796095, + "loss": 1.5529, + "step": 7185 + }, + { + "epoch": 0.7580168776371308, + "grad_norm": 0.47401633858680725, + "learning_rate": 0.0003621195385466738, + "loss": 1.4692, + "step": 7186 + }, + { + "epoch": 0.7581223628691983, + "grad_norm": 0.5433784127235413, + "learning_rate": 0.0003616172885882972, + "loss": 1.541, + "step": 7187 + }, + { + "epoch": 0.7582278481012659, + "grad_norm": 0.4681251347064972, + "learning_rate": 0.0003611157352369628, + "loss": 1.4892, + "step": 7188 + }, + { + "epoch": 0.7583333333333333, + "grad_norm": 0.5100746750831604, + "learning_rate": 0.0003606148775264958, + "loss": 1.5083, + "step": 7189 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.4604494869709015, + "learning_rate": 0.000360114714492061, + "loss": 1.4956, + "step": 7190 + }, + { + "epoch": 0.7585443037974684, + "grad_norm": 0.4779258072376251, + "learning_rate": 0.0003596152451701616, + "loss": 1.5057, + "step": 7191 + }, + { + "epoch": 0.7586497890295358, + "grad_norm": 0.4830056428909302, + "learning_rate": 0.00035911646859863725, + "loss": 1.5108, + "step": 7192 + }, + { + "epoch": 0.7587552742616034, + "grad_norm": 0.4447198510169983, + "learning_rate": 0.00035861838381666194, + "loss": 1.5272, + "step": 7193 + }, + { + "epoch": 0.7588607594936709, + "grad_norm": 0.5201935172080994, + "learning_rate": 0.0003581209898647425, + "loss": 1.4899, + "step": 7194 + }, + { + "epoch": 0.7589662447257384, + "grad_norm": 0.45016908645629883, + "learning_rate": 0.0003576242857847163, + "loss": 1.5496, + "step": 7195 + }, + { + "epoch": 0.7590717299578059, + "grad_norm": 0.5461925268173218, + "learning_rate": 0.0003571282706197498, + "loss": 1.5556, + "step": 7196 + }, + { + "epoch": 0.7591772151898735, + "grad_norm": 0.44843021035194397, + "learning_rate": 0.0003566329434143366, + "loss": 1.4923, + "step": 7197 + }, + { + "epoch": 0.7592827004219409, + "grad_norm": 0.5161300301551819, + "learning_rate": 0.00035613830321429534, + "loss": 1.5262, + "step": 7198 + }, + { + "epoch": 0.7593881856540085, + "grad_norm": 0.4729826748371124, + "learning_rate": 0.0003556443490667684, + "loss": 1.5095, + "step": 7199 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.45623841881752014, + "learning_rate": 0.0003551510800202195, + "loss": 1.497, + "step": 7200 + }, + { + "epoch": 0.7595991561181434, + "grad_norm": 0.5028412938117981, + "learning_rate": 0.0003546584951244323, + "loss": 1.5002, + "step": 7201 + }, + { + "epoch": 0.759704641350211, + "grad_norm": 0.4603593349456787, + "learning_rate": 0.00035416659343050807, + "loss": 1.508, + "step": 7202 + }, + { + "epoch": 0.7598101265822785, + "grad_norm": 0.48026788234710693, + "learning_rate": 0.0003536753739908646, + "loss": 1.5279, + "step": 7203 + }, + { + "epoch": 0.759915611814346, + "grad_norm": 0.43785303831100464, + "learning_rate": 0.0003531848358592338, + "loss": 1.5189, + "step": 7204 + }, + { + "epoch": 0.7600210970464135, + "grad_norm": 0.4649600088596344, + "learning_rate": 0.00035269497809065976, + "loss": 1.5537, + "step": 7205 + }, + { + "epoch": 0.7601265822784811, + "grad_norm": 0.4919481873512268, + "learning_rate": 0.00035220579974149755, + "loss": 1.5368, + "step": 7206 + }, + { + "epoch": 0.7602320675105485, + "grad_norm": 0.4992193281650543, + "learning_rate": 0.00035171729986941075, + "loss": 1.5118, + "step": 7207 + }, + { + "epoch": 0.760337552742616, + "grad_norm": 0.5554006099700928, + "learning_rate": 0.00035122947753337037, + "loss": 1.5393, + "step": 7208 + }, + { + "epoch": 0.7604430379746835, + "grad_norm": 0.5136773586273193, + "learning_rate": 0.0003507423317936521, + "loss": 1.499, + "step": 7209 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.4227517247200012, + "learning_rate": 0.0003502558617118353, + "loss": 1.5237, + "step": 7210 + }, + { + "epoch": 0.7606540084388186, + "grad_norm": 0.46549299359321594, + "learning_rate": 0.0003497700663508009, + "loss": 1.5517, + "step": 7211 + }, + { + "epoch": 0.760759493670886, + "grad_norm": 0.5477752089500427, + "learning_rate": 0.0003492849447747293, + "loss": 1.5116, + "step": 7212 + }, + { + "epoch": 0.7608649789029536, + "grad_norm": 0.5222634077072144, + "learning_rate": 0.00034880049604909933, + "loss": 1.5321, + "step": 7213 + }, + { + "epoch": 0.7609704641350211, + "grad_norm": 0.5421080589294434, + "learning_rate": 0.00034831671924068555, + "loss": 1.5408, + "step": 7214 + }, + { + "epoch": 0.7610759493670886, + "grad_norm": 0.5208792090415955, + "learning_rate": 0.00034783361341755707, + "loss": 1.5309, + "step": 7215 + }, + { + "epoch": 0.7611814345991561, + "grad_norm": 0.49873366951942444, + "learning_rate": 0.0003473511776490756, + "loss": 1.5203, + "step": 7216 + }, + { + "epoch": 0.7612869198312237, + "grad_norm": 0.5086596608161926, + "learning_rate": 0.00034686941100589336, + "loss": 1.5412, + "step": 7217 + }, + { + "epoch": 0.7613924050632911, + "grad_norm": 0.47228971123695374, + "learning_rate": 0.0003463883125599521, + "loss": 1.5427, + "step": 7218 + }, + { + "epoch": 0.7614978902953586, + "grad_norm": 0.4718664586544037, + "learning_rate": 0.00034590788138448006, + "loss": 1.5179, + "step": 7219 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.48186349868774414, + "learning_rate": 0.0003454281165539914, + "loss": 1.5043, + "step": 7220 + }, + { + "epoch": 0.7617088607594936, + "grad_norm": 0.4828650951385498, + "learning_rate": 0.00034494901714428365, + "loss": 1.536, + "step": 7221 + }, + { + "epoch": 0.7618143459915612, + "grad_norm": 0.49547502398490906, + "learning_rate": 0.0003444705822324364, + "loss": 1.5191, + "step": 7222 + }, + { + "epoch": 0.7619198312236287, + "grad_norm": 0.45523950457572937, + "learning_rate": 0.0003439928108968091, + "loss": 1.5047, + "step": 7223 + }, + { + "epoch": 0.7620253164556962, + "grad_norm": 0.4441123306751251, + "learning_rate": 0.0003435157022170396, + "loss": 1.5123, + "step": 7224 + }, + { + "epoch": 0.7621308016877637, + "grad_norm": 0.50005704164505, + "learning_rate": 0.0003430392552740422, + "loss": 1.5472, + "step": 7225 + }, + { + "epoch": 0.7622362869198313, + "grad_norm": 0.4155592620372772, + "learning_rate": 0.0003425634691500059, + "loss": 1.527, + "step": 7226 + }, + { + "epoch": 0.7623417721518987, + "grad_norm": 0.47661009430885315, + "learning_rate": 0.0003420883429283929, + "loss": 1.4991, + "step": 7227 + }, + { + "epoch": 0.7624472573839662, + "grad_norm": 0.4774928689002991, + "learning_rate": 0.00034161387569393647, + "loss": 1.5245, + "step": 7228 + }, + { + "epoch": 0.7625527426160338, + "grad_norm": 0.5262956619262695, + "learning_rate": 0.0003411400665326393, + "loss": 1.5734, + "step": 7229 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.46142587065696716, + "learning_rate": 0.00034066691453177176, + "loss": 1.5297, + "step": 7230 + }, + { + "epoch": 0.7627637130801688, + "grad_norm": 0.49738645553588867, + "learning_rate": 0.00034019441877987015, + "loss": 1.5514, + "step": 7231 + }, + { + "epoch": 0.7628691983122363, + "grad_norm": 0.48326608538627625, + "learning_rate": 0.00033972257836673513, + "loss": 1.5221, + "step": 7232 + }, + { + "epoch": 0.7629746835443038, + "grad_norm": 0.46682408452033997, + "learning_rate": 0.00033925139238342956, + "loss": 1.5107, + "step": 7233 + }, + { + "epoch": 0.7630801687763713, + "grad_norm": 0.559844970703125, + "learning_rate": 0.0003387808599222771, + "loss": 1.5261, + "step": 7234 + }, + { + "epoch": 0.7631856540084389, + "grad_norm": 0.586301863193512, + "learning_rate": 0.0003383109800768603, + "loss": 1.541, + "step": 7235 + }, + { + "epoch": 0.7632911392405063, + "grad_norm": 0.4854789078235626, + "learning_rate": 0.0003378417519420187, + "loss": 1.5032, + "step": 7236 + }, + { + "epoch": 0.7633966244725738, + "grad_norm": 0.5131997466087341, + "learning_rate": 0.00033737317461384766, + "loss": 1.556, + "step": 7237 + }, + { + "epoch": 0.7635021097046414, + "grad_norm": 0.5481990575790405, + "learning_rate": 0.00033690524718969593, + "loss": 1.5492, + "step": 7238 + }, + { + "epoch": 0.7636075949367088, + "grad_norm": 0.48803794384002686, + "learning_rate": 0.00033643796876816424, + "loss": 1.5082, + "step": 7239 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.5214624404907227, + "learning_rate": 0.0003359713384491037, + "loss": 1.5122, + "step": 7240 + }, + { + "epoch": 0.7638185654008439, + "grad_norm": 0.5683119893074036, + "learning_rate": 0.00033550535533361366, + "loss": 1.4833, + "step": 7241 + }, + { + "epoch": 0.7639240506329114, + "grad_norm": 0.44384464621543884, + "learning_rate": 0.0003350400185240405, + "loss": 1.569, + "step": 7242 + }, + { + "epoch": 0.7640295358649789, + "grad_norm": 0.48305878043174744, + "learning_rate": 0.0003345753271239754, + "loss": 1.5355, + "step": 7243 + }, + { + "epoch": 0.7641350210970465, + "grad_norm": 0.565956711769104, + "learning_rate": 0.00033411128023825296, + "loss": 1.5162, + "step": 7244 + }, + { + "epoch": 0.7642405063291139, + "grad_norm": 0.4637809991836548, + "learning_rate": 0.0003336478769729492, + "loss": 1.4923, + "step": 7245 + }, + { + "epoch": 0.7643459915611814, + "grad_norm": 0.4785992205142975, + "learning_rate": 0.0003331851164353802, + "loss": 1.4947, + "step": 7246 + }, + { + "epoch": 0.764451476793249, + "grad_norm": 0.4922090470790863, + "learning_rate": 0.00033272299773410007, + "loss": 1.5279, + "step": 7247 + }, + { + "epoch": 0.7645569620253164, + "grad_norm": 0.43748775124549866, + "learning_rate": 0.0003322615199788993, + "loss": 1.494, + "step": 7248 + }, + { + "epoch": 0.764662447257384, + "grad_norm": 0.46993544697761536, + "learning_rate": 0.000331800682280803, + "loss": 1.568, + "step": 7249 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.5029410719871521, + "learning_rate": 0.00033134048375206944, + "loss": 1.5134, + "step": 7250 + }, + { + "epoch": 0.764873417721519, + "grad_norm": 0.4553038775920868, + "learning_rate": 0.0003308809235061881, + "loss": 1.527, + "step": 7251 + }, + { + "epoch": 0.7649789029535865, + "grad_norm": 0.4251484274864197, + "learning_rate": 0.000330422000657878, + "loss": 1.489, + "step": 7252 + }, + { + "epoch": 0.765084388185654, + "grad_norm": 0.49786296486854553, + "learning_rate": 0.00032996371432308605, + "loss": 1.5262, + "step": 7253 + }, + { + "epoch": 0.7651898734177215, + "grad_norm": 0.576773464679718, + "learning_rate": 0.00032950606361898527, + "loss": 1.5277, + "step": 7254 + }, + { + "epoch": 0.765295358649789, + "grad_norm": 0.42370814085006714, + "learning_rate": 0.0003290490476639731, + "loss": 1.5282, + "step": 7255 + }, + { + "epoch": 0.7654008438818566, + "grad_norm": 0.5374912023544312, + "learning_rate": 0.00032859266557766996, + "loss": 1.5236, + "step": 7256 + }, + { + "epoch": 0.765506329113924, + "grad_norm": 0.627208948135376, + "learning_rate": 0.000328136916480917, + "loss": 1.5342, + "step": 7257 + }, + { + "epoch": 0.7656118143459916, + "grad_norm": 0.4333391487598419, + "learning_rate": 0.00032768179949577516, + "loss": 1.5022, + "step": 7258 + }, + { + "epoch": 0.7657172995780591, + "grad_norm": 0.5399424433708191, + "learning_rate": 0.0003272273137455226, + "loss": 1.5452, + "step": 7259 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.4923236072063446, + "learning_rate": 0.0003267734583546536, + "loss": 1.4685, + "step": 7260 + }, + { + "epoch": 0.7659282700421941, + "grad_norm": 0.4698296785354614, + "learning_rate": 0.0003263202324488771, + "loss": 1.4848, + "step": 7261 + }, + { + "epoch": 0.7660337552742617, + "grad_norm": 0.4685630202293396, + "learning_rate": 0.0003258676351551143, + "loss": 1.4939, + "step": 7262 + }, + { + "epoch": 0.7661392405063291, + "grad_norm": 0.43930912017822266, + "learning_rate": 0.0003254156656014973, + "loss": 1.512, + "step": 7263 + }, + { + "epoch": 0.7662447257383966, + "grad_norm": 0.46170657873153687, + "learning_rate": 0.0003249643229173677, + "loss": 1.5429, + "step": 7264 + }, + { + "epoch": 0.7663502109704642, + "grad_norm": 0.5445401072502136, + "learning_rate": 0.0003245136062332745, + "loss": 1.4954, + "step": 7265 + }, + { + "epoch": 0.7664556962025316, + "grad_norm": 0.4706728458404541, + "learning_rate": 0.0003240635146809727, + "loss": 1.4842, + "step": 7266 + }, + { + "epoch": 0.7665611814345992, + "grad_norm": 0.465275377035141, + "learning_rate": 0.0003236140473934215, + "loss": 1.5023, + "step": 7267 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 0.4905329942703247, + "learning_rate": 0.0003231652035047826, + "loss": 1.5378, + "step": 7268 + }, + { + "epoch": 0.7667721518987342, + "grad_norm": 0.433250367641449, + "learning_rate": 0.00032271698215041863, + "loss": 1.5278, + "step": 7269 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.4492381811141968, + "learning_rate": 0.00032226938246689157, + "loss": 1.5558, + "step": 7270 + }, + { + "epoch": 0.7669831223628693, + "grad_norm": 0.4395883083343506, + "learning_rate": 0.00032182240359196083, + "loss": 1.5235, + "step": 7271 + }, + { + "epoch": 0.7670886075949367, + "grad_norm": 0.46264567971229553, + "learning_rate": 0.0003213760446645818, + "loss": 1.5227, + "step": 7272 + }, + { + "epoch": 0.7671940928270042, + "grad_norm": 0.5490449070930481, + "learning_rate": 0.00032093030482490396, + "loss": 1.5246, + "step": 7273 + }, + { + "epoch": 0.7672995780590718, + "grad_norm": 0.4366523325443268, + "learning_rate": 0.00032048518321426946, + "loss": 1.481, + "step": 7274 + }, + { + "epoch": 0.7674050632911392, + "grad_norm": 0.4229549169540405, + "learning_rate": 0.0003200406789752116, + "loss": 1.5131, + "step": 7275 + }, + { + "epoch": 0.7675105485232068, + "grad_norm": 0.514065146446228, + "learning_rate": 0.00031959679125145277, + "loss": 1.56, + "step": 7276 + }, + { + "epoch": 0.7676160337552742, + "grad_norm": 0.5058407187461853, + "learning_rate": 0.0003191535191879029, + "loss": 1.4931, + "step": 7277 + }, + { + "epoch": 0.7677215189873418, + "grad_norm": 0.4359470009803772, + "learning_rate": 0.000318710861930658, + "loss": 1.5412, + "step": 7278 + }, + { + "epoch": 0.7678270042194093, + "grad_norm": 0.46236300468444824, + "learning_rate": 0.0003182688186269984, + "loss": 1.5219, + "step": 7279 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.4985194206237793, + "learning_rate": 0.0003178273884253874, + "loss": 1.4942, + "step": 7280 + }, + { + "epoch": 0.7680379746835443, + "grad_norm": 0.466630220413208, + "learning_rate": 0.0003173865704754689, + "loss": 1.5575, + "step": 7281 + }, + { + "epoch": 0.7681434599156118, + "grad_norm": 0.5047515630722046, + "learning_rate": 0.0003169463639280665, + "loss": 1.5704, + "step": 7282 + }, + { + "epoch": 0.7682489451476793, + "grad_norm": 0.4358955919742584, + "learning_rate": 0.00031650676793518157, + "loss": 1.5231, + "step": 7283 + }, + { + "epoch": 0.7683544303797468, + "grad_norm": 0.4824666678905487, + "learning_rate": 0.00031606778164999143, + "loss": 1.5114, + "step": 7284 + }, + { + "epoch": 0.7684599156118144, + "grad_norm": 0.4649924337863922, + "learning_rate": 0.00031562940422684833, + "loss": 1.5346, + "step": 7285 + }, + { + "epoch": 0.7685654008438818, + "grad_norm": 0.5150036215782166, + "learning_rate": 0.00031519163482127696, + "loss": 1.5197, + "step": 7286 + }, + { + "epoch": 0.7686708860759494, + "grad_norm": 0.49742093682289124, + "learning_rate": 0.00031475447258997355, + "loss": 1.5136, + "step": 7287 + }, + { + "epoch": 0.7687763713080169, + "grad_norm": 0.48550671339035034, + "learning_rate": 0.0003143179166908038, + "loss": 1.5061, + "step": 7288 + }, + { + "epoch": 0.7688818565400843, + "grad_norm": 0.5942526459693909, + "learning_rate": 0.0003138819662828017, + "loss": 1.5384, + "step": 7289 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.463473916053772, + "learning_rate": 0.0003134466205261674, + "loss": 1.5046, + "step": 7290 + }, + { + "epoch": 0.7690928270042194, + "grad_norm": 0.4554780125617981, + "learning_rate": 0.0003130118785822658, + "loss": 1.5911, + "step": 7291 + }, + { + "epoch": 0.7691983122362869, + "grad_norm": 0.5002447366714478, + "learning_rate": 0.0003125777396136251, + "loss": 1.5398, + "step": 7292 + }, + { + "epoch": 0.7693037974683544, + "grad_norm": 0.4926147758960724, + "learning_rate": 0.00031214420278393487, + "loss": 1.4932, + "step": 7293 + }, + { + "epoch": 0.769409282700422, + "grad_norm": 0.49334704875946045, + "learning_rate": 0.00031171126725804496, + "loss": 1.4974, + "step": 7294 + }, + { + "epoch": 0.7695147679324894, + "grad_norm": 0.545994222164154, + "learning_rate": 0.0003112789322019633, + "loss": 1.5533, + "step": 7295 + }, + { + "epoch": 0.769620253164557, + "grad_norm": 0.5108402371406555, + "learning_rate": 0.0003108471967828545, + "loss": 1.5345, + "step": 7296 + }, + { + "epoch": 0.7697257383966245, + "grad_norm": 0.4606148898601532, + "learning_rate": 0.00031041606016903847, + "loss": 1.517, + "step": 7297 + }, + { + "epoch": 0.7698312236286919, + "grad_norm": 0.5326936841011047, + "learning_rate": 0.00030998552152998834, + "loss": 1.5076, + "step": 7298 + }, + { + "epoch": 0.7699367088607595, + "grad_norm": 0.4955883324146271, + "learning_rate": 0.00030955558003632966, + "loss": 1.521, + "step": 7299 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.4963744878768921, + "learning_rate": 0.0003091262348598378, + "loss": 1.5626, + "step": 7300 + }, + { + "epoch": 0.7701476793248945, + "grad_norm": 0.45918890833854675, + "learning_rate": 0.00030869748517343705, + "loss": 1.5241, + "step": 7301 + }, + { + "epoch": 0.770253164556962, + "grad_norm": 0.5075101852416992, + "learning_rate": 0.000308269330151199, + "loss": 1.474, + "step": 7302 + }, + { + "epoch": 0.7703586497890296, + "grad_norm": 0.48181527853012085, + "learning_rate": 0.00030784176896834033, + "loss": 1.5195, + "step": 7303 + }, + { + "epoch": 0.770464135021097, + "grad_norm": 0.5091479420661926, + "learning_rate": 0.0003074148008012223, + "loss": 1.5256, + "step": 7304 + }, + { + "epoch": 0.7705696202531646, + "grad_norm": 0.594851553440094, + "learning_rate": 0.00030698842482734806, + "loss": 1.4932, + "step": 7305 + }, + { + "epoch": 0.7706751054852321, + "grad_norm": 0.42567554116249084, + "learning_rate": 0.0003065626402253616, + "loss": 1.5179, + "step": 7306 + }, + { + "epoch": 0.7707805907172995, + "grad_norm": 0.4837222993373871, + "learning_rate": 0.00030613744617504624, + "loss": 1.5344, + "step": 7307 + }, + { + "epoch": 0.7708860759493671, + "grad_norm": 0.6072279810905457, + "learning_rate": 0.00030571284185732276, + "loss": 1.5176, + "step": 7308 + }, + { + "epoch": 0.7709915611814346, + "grad_norm": 0.48138466477394104, + "learning_rate": 0.0003052888264542483, + "loss": 1.5147, + "step": 7309 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.45973870158195496, + "learning_rate": 0.0003048653991490141, + "loss": 1.503, + "step": 7310 + }, + { + "epoch": 0.7712025316455696, + "grad_norm": 0.5439104437828064, + "learning_rate": 0.0003044425591259445, + "loss": 1.5084, + "step": 7311 + }, + { + "epoch": 0.7713080168776372, + "grad_norm": 0.48476725816726685, + "learning_rate": 0.0003040203055704949, + "loss": 1.5319, + "step": 7312 + }, + { + "epoch": 0.7714135021097046, + "grad_norm": 0.43819084763526917, + "learning_rate": 0.000303598637669251, + "loss": 1.5226, + "step": 7313 + }, + { + "epoch": 0.7715189873417722, + "grad_norm": 0.4905666410923004, + "learning_rate": 0.0003031775546099261, + "loss": 1.4973, + "step": 7314 + }, + { + "epoch": 0.7716244725738397, + "grad_norm": 0.44105851650238037, + "learning_rate": 0.0003027570555813604, + "loss": 1.4925, + "step": 7315 + }, + { + "epoch": 0.7717299578059071, + "grad_norm": 0.5060027837753296, + "learning_rate": 0.00030233713977351906, + "loss": 1.5271, + "step": 7316 + }, + { + "epoch": 0.7718354430379747, + "grad_norm": 0.462352454662323, + "learning_rate": 0.00030191780637749084, + "loss": 1.479, + "step": 7317 + }, + { + "epoch": 0.7719409282700422, + "grad_norm": 0.4320354163646698, + "learning_rate": 0.0003014990545854864, + "loss": 1.4897, + "step": 7318 + }, + { + "epoch": 0.7720464135021097, + "grad_norm": 0.5208417773246765, + "learning_rate": 0.0003010808835908368, + "loss": 1.528, + "step": 7319 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.526212215423584, + "learning_rate": 0.00030066329258799187, + "loss": 1.5097, + "step": 7320 + }, + { + "epoch": 0.7722573839662448, + "grad_norm": 0.4905802309513092, + "learning_rate": 0.0003002462807725186, + "loss": 1.5602, + "step": 7321 + }, + { + "epoch": 0.7723628691983122, + "grad_norm": 0.5224194526672363, + "learning_rate": 0.00029982984734109995, + "loss": 1.4966, + "step": 7322 + }, + { + "epoch": 0.7724683544303798, + "grad_norm": 0.41340774297714233, + "learning_rate": 0.00029941399149153303, + "loss": 1.4991, + "step": 7323 + }, + { + "epoch": 0.7725738396624473, + "grad_norm": 0.45729508996009827, + "learning_rate": 0.00029899871242272745, + "loss": 1.5118, + "step": 7324 + }, + { + "epoch": 0.7726793248945147, + "grad_norm": 0.5007904767990112, + "learning_rate": 0.000298584009334704, + "loss": 1.5371, + "step": 7325 + }, + { + "epoch": 0.7727848101265823, + "grad_norm": 0.4312205910682678, + "learning_rate": 0.00029816988142859286, + "loss": 1.511, + "step": 7326 + }, + { + "epoch": 0.7728902953586498, + "grad_norm": 0.419544517993927, + "learning_rate": 0.0002977563279066324, + "loss": 1.4808, + "step": 7327 + }, + { + "epoch": 0.7729957805907173, + "grad_norm": 0.4282711148262024, + "learning_rate": 0.0002973433479721675, + "loss": 1.5094, + "step": 7328 + }, + { + "epoch": 0.7731012658227848, + "grad_norm": 0.48061439394950867, + "learning_rate": 0.00029693094082964785, + "loss": 1.4829, + "step": 7329 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.44154366850852966, + "learning_rate": 0.0002965191056846266, + "loss": 1.568, + "step": 7330 + }, + { + "epoch": 0.7733122362869198, + "grad_norm": 0.4813012182712555, + "learning_rate": 0.0002961078417437587, + "loss": 1.4933, + "step": 7331 + }, + { + "epoch": 0.7734177215189874, + "grad_norm": 0.480756551027298, + "learning_rate": 0.0002956971482147996, + "loss": 1.5202, + "step": 7332 + }, + { + "epoch": 0.7735232067510549, + "grad_norm": 0.48635730147361755, + "learning_rate": 0.0002952870243066035, + "loss": 1.5246, + "step": 7333 + }, + { + "epoch": 0.7736286919831223, + "grad_norm": 0.4382462799549103, + "learning_rate": 0.00029487746922912173, + "loss": 1.4973, + "step": 7334 + }, + { + "epoch": 0.7737341772151899, + "grad_norm": 0.44947320222854614, + "learning_rate": 0.00029446848219340173, + "loss": 1.542, + "step": 7335 + }, + { + "epoch": 0.7738396624472574, + "grad_norm": 0.4732278287410736, + "learning_rate": 0.00029406006241158487, + "loss": 1.4938, + "step": 7336 + }, + { + "epoch": 0.7739451476793249, + "grad_norm": 0.43985113501548767, + "learning_rate": 0.0002936522090969055, + "loss": 1.4811, + "step": 7337 + }, + { + "epoch": 0.7740506329113924, + "grad_norm": 0.4508075416088104, + "learning_rate": 0.00029324492146368906, + "loss": 1.5084, + "step": 7338 + }, + { + "epoch": 0.77415611814346, + "grad_norm": 0.45869970321655273, + "learning_rate": 0.0002928381987273508, + "loss": 1.5337, + "step": 7339 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.5038483738899231, + "learning_rate": 0.000292432040104394, + "loss": 1.505, + "step": 7340 + }, + { + "epoch": 0.774367088607595, + "grad_norm": 0.46021801233291626, + "learning_rate": 0.00029202644481240867, + "loss": 1.4973, + "step": 7341 + }, + { + "epoch": 0.7744725738396624, + "grad_norm": 0.4308036267757416, + "learning_rate": 0.0002916214120700702, + "loss": 1.5141, + "step": 7342 + }, + { + "epoch": 0.7745780590717299, + "grad_norm": 0.533600926399231, + "learning_rate": 0.00029121694109713757, + "loss": 1.4802, + "step": 7343 + }, + { + "epoch": 0.7746835443037975, + "grad_norm": 0.5042786598205566, + "learning_rate": 0.0002908130311144518, + "loss": 1.5374, + "step": 7344 + }, + { + "epoch": 0.7747890295358649, + "grad_norm": 0.4555796980857849, + "learning_rate": 0.0002904096813439346, + "loss": 1.502, + "step": 7345 + }, + { + "epoch": 0.7748945147679325, + "grad_norm": 0.5745311975479126, + "learning_rate": 0.00029000689100858694, + "loss": 1.4566, + "step": 7346 + }, + { + "epoch": 0.775, + "grad_norm": 0.44467997550964355, + "learning_rate": 0.0002896046593324875, + "loss": 1.5242, + "step": 7347 + }, + { + "epoch": 0.7751054852320675, + "grad_norm": 0.41884422302246094, + "learning_rate": 0.00028920298554079113, + "loss": 1.536, + "step": 7348 + }, + { + "epoch": 0.775210970464135, + "grad_norm": 0.467204749584198, + "learning_rate": 0.0002888018688597272, + "loss": 1.5134, + "step": 7349 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.5112577080726624, + "learning_rate": 0.00028840130851659853, + "loss": 1.481, + "step": 7350 + }, + { + "epoch": 0.77542194092827, + "grad_norm": 0.4778899848461151, + "learning_rate": 0.00028800130373977934, + "loss": 1.4945, + "step": 7351 + }, + { + "epoch": 0.7755274261603375, + "grad_norm": 0.4700300991535187, + "learning_rate": 0.00028760185375871445, + "loss": 1.5319, + "step": 7352 + }, + { + "epoch": 0.7756329113924051, + "grad_norm": 0.49284234642982483, + "learning_rate": 0.0002872029578039172, + "loss": 1.5118, + "step": 7353 + }, + { + "epoch": 0.7757383966244725, + "grad_norm": 0.4522155523300171, + "learning_rate": 0.0002868046151069681, + "loss": 1.4854, + "step": 7354 + }, + { + "epoch": 0.7758438818565401, + "grad_norm": 0.4400084912776947, + "learning_rate": 0.0002864068249005136, + "loss": 1.5163, + "step": 7355 + }, + { + "epoch": 0.7759493670886076, + "grad_norm": 0.49960556626319885, + "learning_rate": 0.0002860095864182644, + "loss": 1.5449, + "step": 7356 + }, + { + "epoch": 0.7760548523206751, + "grad_norm": 0.46481797099113464, + "learning_rate": 0.00028561289889499417, + "loss": 1.519, + "step": 7357 + }, + { + "epoch": 0.7761603375527426, + "grad_norm": 0.45330649614334106, + "learning_rate": 0.00028521676156653756, + "loss": 1.5286, + "step": 7358 + }, + { + "epoch": 0.7762658227848102, + "grad_norm": 0.5569223761558533, + "learning_rate": 0.0002848211736697894, + "loss": 1.5152, + "step": 7359 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.4300593435764313, + "learning_rate": 0.0002844261344427028, + "loss": 1.5043, + "step": 7360 + }, + { + "epoch": 0.7764767932489451, + "grad_norm": 0.4349600076675415, + "learning_rate": 0.000284031643124288, + "loss": 1.5451, + "step": 7361 + }, + { + "epoch": 0.7765822784810127, + "grad_norm": 0.5500975251197815, + "learning_rate": 0.00028363769895461044, + "loss": 1.546, + "step": 7362 + }, + { + "epoch": 0.7766877637130801, + "grad_norm": 0.5433265566825867, + "learning_rate": 0.00028324430117478974, + "loss": 1.4898, + "step": 7363 + }, + { + "epoch": 0.7767932489451477, + "grad_norm": 0.4187507629394531, + "learning_rate": 0.0002828514490269979, + "loss": 1.4834, + "step": 7364 + }, + { + "epoch": 0.7768987341772152, + "grad_norm": 0.4652557075023651, + "learning_rate": 0.0002824591417544582, + "loss": 1.5001, + "step": 7365 + }, + { + "epoch": 0.7770042194092827, + "grad_norm": 0.46672526001930237, + "learning_rate": 0.0002820673786014436, + "loss": 1.5035, + "step": 7366 + }, + { + "epoch": 0.7771097046413502, + "grad_norm": 0.450577974319458, + "learning_rate": 0.00028167615881327494, + "loss": 1.5084, + "step": 7367 + }, + { + "epoch": 0.7772151898734178, + "grad_norm": 0.4311329424381256, + "learning_rate": 0.00028128548163632006, + "loss": 1.4888, + "step": 7368 + }, + { + "epoch": 0.7773206751054852, + "grad_norm": 0.45180225372314453, + "learning_rate": 0.00028089534631799183, + "loss": 1.5481, + "step": 7369 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.4694453775882721, + "learning_rate": 0.0002805057521067471, + "loss": 1.5533, + "step": 7370 + }, + { + "epoch": 0.7775316455696203, + "grad_norm": 0.49632954597473145, + "learning_rate": 0.0002801166982520851, + "loss": 1.5252, + "step": 7371 + }, + { + "epoch": 0.7776371308016877, + "grad_norm": 0.4226900041103363, + "learning_rate": 0.000279728184004546, + "loss": 1.5397, + "step": 7372 + }, + { + "epoch": 0.7777426160337553, + "grad_norm": 0.462501585483551, + "learning_rate": 0.0002793402086157093, + "loss": 1.4715, + "step": 7373 + }, + { + "epoch": 0.7778481012658228, + "grad_norm": 0.45349717140197754, + "learning_rate": 0.0002789527713381925, + "loss": 1.5208, + "step": 7374 + }, + { + "epoch": 0.7779535864978903, + "grad_norm": 0.4472450613975525, + "learning_rate": 0.00027856587142565005, + "loss": 1.5468, + "step": 7375 + }, + { + "epoch": 0.7780590717299578, + "grad_norm": 0.43740612268447876, + "learning_rate": 0.0002781795081327712, + "loss": 1.4809, + "step": 7376 + }, + { + "epoch": 0.7781645569620254, + "grad_norm": 0.4535679817199707, + "learning_rate": 0.0002777936807152791, + "loss": 1.49, + "step": 7377 + }, + { + "epoch": 0.7782700421940928, + "grad_norm": 0.4833335280418396, + "learning_rate": 0.0002774083884299292, + "loss": 1.4678, + "step": 7378 + }, + { + "epoch": 0.7783755274261603, + "grad_norm": 0.41335001587867737, + "learning_rate": 0.0002770236305345076, + "loss": 1.5071, + "step": 7379 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.472217321395874, + "learning_rate": 0.00027663940628783017, + "loss": 1.5059, + "step": 7380 + }, + { + "epoch": 0.7785864978902953, + "grad_norm": 0.4293840229511261, + "learning_rate": 0.0002762557149497405, + "loss": 1.5123, + "step": 7381 + }, + { + "epoch": 0.7786919831223629, + "grad_norm": 0.44424277544021606, + "learning_rate": 0.00027587255578110894, + "loss": 1.5928, + "step": 7382 + }, + { + "epoch": 0.7787974683544304, + "grad_norm": 0.4720429480075836, + "learning_rate": 0.0002754899280438309, + "loss": 1.5077, + "step": 7383 + }, + { + "epoch": 0.7789029535864979, + "grad_norm": 0.4808584749698639, + "learning_rate": 0.0002751078310008254, + "loss": 1.5046, + "step": 7384 + }, + { + "epoch": 0.7790084388185654, + "grad_norm": 0.5304023623466492, + "learning_rate": 0.0002747262639160341, + "loss": 1.5095, + "step": 7385 + }, + { + "epoch": 0.779113924050633, + "grad_norm": 0.4539065659046173, + "learning_rate": 0.0002743452260544193, + "loss": 1.4633, + "step": 7386 + }, + { + "epoch": 0.7792194092827004, + "grad_norm": 0.4765965938568115, + "learning_rate": 0.0002739647166819628, + "loss": 1.5318, + "step": 7387 + }, + { + "epoch": 0.7793248945147679, + "grad_norm": 0.4670567810535431, + "learning_rate": 0.0002735847350656645, + "loss": 1.4936, + "step": 7388 + }, + { + "epoch": 0.7794303797468355, + "grad_norm": 0.4970821738243103, + "learning_rate": 0.00027320528047354093, + "loss": 1.5027, + "step": 7389 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.45334967970848083, + "learning_rate": 0.00027282635217462393, + "loss": 1.5042, + "step": 7390 + }, + { + "epoch": 0.7796413502109705, + "grad_norm": 0.4550299644470215, + "learning_rate": 0.0002724479494389592, + "loss": 1.5229, + "step": 7391 + }, + { + "epoch": 0.779746835443038, + "grad_norm": 0.4761160612106323, + "learning_rate": 0.00027207007153760463, + "loss": 1.5415, + "step": 7392 + }, + { + "epoch": 0.7798523206751055, + "grad_norm": 0.45894306898117065, + "learning_rate": 0.0002716927177426294, + "loss": 1.5126, + "step": 7393 + }, + { + "epoch": 0.779957805907173, + "grad_norm": 0.4720706343650818, + "learning_rate": 0.0002713158873271122, + "loss": 1.5146, + "step": 7394 + }, + { + "epoch": 0.7800632911392406, + "grad_norm": 0.4470229744911194, + "learning_rate": 0.00027093957956513985, + "loss": 1.501, + "step": 7395 + }, + { + "epoch": 0.780168776371308, + "grad_norm": 0.486998975276947, + "learning_rate": 0.0002705637937318062, + "loss": 1.5661, + "step": 7396 + }, + { + "epoch": 0.7802742616033755, + "grad_norm": 0.46125513315200806, + "learning_rate": 0.00027018852910321045, + "loss": 1.5188, + "step": 7397 + }, + { + "epoch": 0.7803797468354431, + "grad_norm": 0.4309431314468384, + "learning_rate": 0.0002698137849564556, + "loss": 1.4961, + "step": 7398 + }, + { + "epoch": 0.7804852320675105, + "grad_norm": 0.477863073348999, + "learning_rate": 0.00026943956056964773, + "loss": 1.5175, + "step": 7399 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.5207541584968567, + "learning_rate": 0.0002690658552218937, + "loss": 1.5303, + "step": 7400 + }, + { + "epoch": 0.7806962025316456, + "grad_norm": 0.48380061984062195, + "learning_rate": 0.0002686926681933006, + "loss": 1.469, + "step": 7401 + }, + { + "epoch": 0.7808016877637131, + "grad_norm": 0.5058482885360718, + "learning_rate": 0.00026831999876497376, + "loss": 1.5277, + "step": 7402 + }, + { + "epoch": 0.7809071729957806, + "grad_norm": 0.4312707483768463, + "learning_rate": 0.00026794784621901564, + "loss": 1.4871, + "step": 7403 + }, + { + "epoch": 0.7810126582278482, + "grad_norm": 0.4509865343570709, + "learning_rate": 0.0002675762098385246, + "loss": 1.5093, + "step": 7404 + }, + { + "epoch": 0.7811181434599156, + "grad_norm": 0.496855765581131, + "learning_rate": 0.000267205088907593, + "loss": 1.4943, + "step": 7405 + }, + { + "epoch": 0.7812236286919831, + "grad_norm": 0.45943617820739746, + "learning_rate": 0.00026683448271130645, + "loss": 1.4995, + "step": 7406 + }, + { + "epoch": 0.7813291139240506, + "grad_norm": 0.42863786220550537, + "learning_rate": 0.0002664643905357418, + "loss": 1.5013, + "step": 7407 + }, + { + "epoch": 0.7814345991561181, + "grad_norm": 0.4518774151802063, + "learning_rate": 0.0002660948116679665, + "loss": 1.5079, + "step": 7408 + }, + { + "epoch": 0.7815400843881857, + "grad_norm": 0.4638252258300781, + "learning_rate": 0.0002657257453960364, + "loss": 1.5269, + "step": 7409 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.43134576082229614, + "learning_rate": 0.00026535719100899516, + "loss": 1.4836, + "step": 7410 + }, + { + "epoch": 0.7817510548523207, + "grad_norm": 0.4494491219520569, + "learning_rate": 0.00026498914779687227, + "loss": 1.5187, + "step": 7411 + }, + { + "epoch": 0.7818565400843882, + "grad_norm": 0.4336400330066681, + "learning_rate": 0.000264621615050682, + "loss": 1.5312, + "step": 7412 + }, + { + "epoch": 0.7819620253164556, + "grad_norm": 0.4288300573825836, + "learning_rate": 0.0002642545920624219, + "loss": 1.5182, + "step": 7413 + }, + { + "epoch": 0.7820675105485232, + "grad_norm": 0.4808462858200073, + "learning_rate": 0.0002638880781250718, + "loss": 1.5173, + "step": 7414 + }, + { + "epoch": 0.7821729957805907, + "grad_norm": 0.4376299977302551, + "learning_rate": 0.00026352207253259167, + "loss": 1.4805, + "step": 7415 + }, + { + "epoch": 0.7822784810126582, + "grad_norm": 0.471394807100296, + "learning_rate": 0.0002631565745799212, + "loss": 1.4662, + "step": 7416 + }, + { + "epoch": 0.7823839662447257, + "grad_norm": 0.4295235872268677, + "learning_rate": 0.0002627915835629777, + "loss": 1.4779, + "step": 7417 + }, + { + "epoch": 0.7824894514767933, + "grad_norm": 0.4546544551849365, + "learning_rate": 0.00026242709877865493, + "loss": 1.5356, + "step": 7418 + }, + { + "epoch": 0.7825949367088607, + "grad_norm": 0.46728840470314026, + "learning_rate": 0.0002620631195248222, + "loss": 1.5327, + "step": 7419 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.41823697090148926, + "learning_rate": 0.00026169964510032245, + "loss": 1.498, + "step": 7420 + }, + { + "epoch": 0.7828059071729958, + "grad_norm": 0.4348681569099426, + "learning_rate": 0.0002613366748049711, + "loss": 1.5058, + "step": 7421 + }, + { + "epoch": 0.7829113924050632, + "grad_norm": 0.5143720507621765, + "learning_rate": 0.0002609742079395546, + "loss": 1.4959, + "step": 7422 + }, + { + "epoch": 0.7830168776371308, + "grad_norm": 0.4234554171562195, + "learning_rate": 0.0002606122438058295, + "loss": 1.472, + "step": 7423 + }, + { + "epoch": 0.7831223628691983, + "grad_norm": 0.4421462118625641, + "learning_rate": 0.00026025078170652043, + "loss": 1.523, + "step": 7424 + }, + { + "epoch": 0.7832278481012658, + "grad_norm": 0.4718989431858063, + "learning_rate": 0.00025988982094531945, + "loss": 1.5049, + "step": 7425 + }, + { + "epoch": 0.7833333333333333, + "grad_norm": 0.47068876028060913, + "learning_rate": 0.00025952936082688415, + "loss": 1.4611, + "step": 7426 + }, + { + "epoch": 0.7834388185654009, + "grad_norm": 0.44434478878974915, + "learning_rate": 0.00025916940065683655, + "loss": 1.4884, + "step": 7427 + }, + { + "epoch": 0.7835443037974683, + "grad_norm": 0.4254003167152405, + "learning_rate": 0.00025880993974176204, + "loss": 1.4767, + "step": 7428 + }, + { + "epoch": 0.7836497890295359, + "grad_norm": 0.4874972403049469, + "learning_rate": 0.00025845097738920735, + "loss": 1.4991, + "step": 7429 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.4271732270717621, + "learning_rate": 0.00025809251290767984, + "loss": 1.5084, + "step": 7430 + }, + { + "epoch": 0.7838607594936708, + "grad_norm": 0.43641841411590576, + "learning_rate": 0.000257734545606646, + "loss": 1.4919, + "step": 7431 + }, + { + "epoch": 0.7839662447257384, + "grad_norm": 0.44413816928863525, + "learning_rate": 0.00025737707479652985, + "loss": 1.4937, + "step": 7432 + }, + { + "epoch": 0.7840717299578059, + "grad_norm": 0.4260719418525696, + "learning_rate": 0.0002570200997887122, + "loss": 1.4854, + "step": 7433 + }, + { + "epoch": 0.7841772151898734, + "grad_norm": 0.45341113209724426, + "learning_rate": 0.0002566636198955286, + "loss": 1.514, + "step": 7434 + }, + { + "epoch": 0.7842827004219409, + "grad_norm": 0.49296027421951294, + "learning_rate": 0.0002563076344302685, + "loss": 1.4864, + "step": 7435 + }, + { + "epoch": 0.7843881856540085, + "grad_norm": 0.4983070194721222, + "learning_rate": 0.00025595214270717387, + "loss": 1.4561, + "step": 7436 + }, + { + "epoch": 0.7844936708860759, + "grad_norm": 0.45008277893066406, + "learning_rate": 0.00025559714404143767, + "loss": 1.5255, + "step": 7437 + }, + { + "epoch": 0.7845991561181435, + "grad_norm": 0.5047111511230469, + "learning_rate": 0.0002552426377492028, + "loss": 1.491, + "step": 7438 + }, + { + "epoch": 0.784704641350211, + "grad_norm": 0.5234826803207397, + "learning_rate": 0.0002548886231475606, + "loss": 1.5264, + "step": 7439 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.44678300619125366, + "learning_rate": 0.00025453509955454957, + "loss": 1.493, + "step": 7440 + }, + { + "epoch": 0.784915611814346, + "grad_norm": 0.4589504897594452, + "learning_rate": 0.00025418206628915406, + "loss": 1.5097, + "step": 7441 + }, + { + "epoch": 0.7850210970464135, + "grad_norm": 0.47607266902923584, + "learning_rate": 0.00025382952267130306, + "loss": 1.4914, + "step": 7442 + }, + { + "epoch": 0.785126582278481, + "grad_norm": 0.4706583619117737, + "learning_rate": 0.0002534774680218686, + "loss": 1.4746, + "step": 7443 + }, + { + "epoch": 0.7852320675105485, + "grad_norm": 0.4593980014324188, + "learning_rate": 0.00025312590166266493, + "loss": 1.4927, + "step": 7444 + }, + { + "epoch": 0.7853375527426161, + "grad_norm": 0.500469446182251, + "learning_rate": 0.00025277482291644667, + "loss": 1.5049, + "step": 7445 + }, + { + "epoch": 0.7854430379746835, + "grad_norm": 0.5348123908042908, + "learning_rate": 0.00025242423110690787, + "loss": 1.4939, + "step": 7446 + }, + { + "epoch": 0.7855485232067511, + "grad_norm": 0.44165948033332825, + "learning_rate": 0.0002520741255586806, + "loss": 1.5236, + "step": 7447 + }, + { + "epoch": 0.7856540084388186, + "grad_norm": 0.507296621799469, + "learning_rate": 0.0002517245055973337, + "loss": 1.4897, + "step": 7448 + }, + { + "epoch": 0.785759493670886, + "grad_norm": 0.5057051777839661, + "learning_rate": 0.0002513753705493713, + "loss": 1.5185, + "step": 7449 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.45048296451568604, + "learning_rate": 0.00025102671974223175, + "loss": 1.5152, + "step": 7450 + }, + { + "epoch": 0.7859704641350211, + "grad_norm": 0.41650232672691345, + "learning_rate": 0.0002506785525042861, + "loss": 1.5169, + "step": 7451 + }, + { + "epoch": 0.7860759493670886, + "grad_norm": 0.4856704771518707, + "learning_rate": 0.0002503308681648371, + "loss": 1.5135, + "step": 7452 + }, + { + "epoch": 0.7861814345991561, + "grad_norm": 0.47491446137428284, + "learning_rate": 0.0002499836660541177, + "loss": 1.4863, + "step": 7453 + }, + { + "epoch": 0.7862869198312237, + "grad_norm": 0.44051873683929443, + "learning_rate": 0.00024963694550328967, + "loss": 1.5167, + "step": 7454 + }, + { + "epoch": 0.7863924050632911, + "grad_norm": 0.40391796827316284, + "learning_rate": 0.0002492907058444425, + "loss": 1.5129, + "step": 7455 + }, + { + "epoch": 0.7864978902953587, + "grad_norm": 0.4652332067489624, + "learning_rate": 0.00024894494641059217, + "loss": 1.5171, + "step": 7456 + }, + { + "epoch": 0.7866033755274262, + "grad_norm": 0.46692216396331787, + "learning_rate": 0.00024859966653567963, + "loss": 1.5061, + "step": 7457 + }, + { + "epoch": 0.7867088607594936, + "grad_norm": 0.4094427824020386, + "learning_rate": 0.00024825486555456975, + "loss": 1.5116, + "step": 7458 + }, + { + "epoch": 0.7868143459915612, + "grad_norm": 0.44194483757019043, + "learning_rate": 0.0002479105428030497, + "loss": 1.5216, + "step": 7459 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.4444366991519928, + "learning_rate": 0.00024756669761782815, + "loss": 1.5073, + "step": 7460 + }, + { + "epoch": 0.7870253164556962, + "grad_norm": 0.43794217705726624, + "learning_rate": 0.00024722332933653344, + "loss": 1.5121, + "step": 7461 + }, + { + "epoch": 0.7871308016877637, + "grad_norm": 0.4627552330493927, + "learning_rate": 0.000246880437297713, + "loss": 1.5001, + "step": 7462 + }, + { + "epoch": 0.7872362869198313, + "grad_norm": 0.4435422122478485, + "learning_rate": 0.0002465380208408314, + "loss": 1.5001, + "step": 7463 + }, + { + "epoch": 0.7873417721518987, + "grad_norm": 0.43845805525779724, + "learning_rate": 0.0002461960793062694, + "loss": 1.4777, + "step": 7464 + }, + { + "epoch": 0.7874472573839663, + "grad_norm": 0.43793636560440063, + "learning_rate": 0.00024585461203532254, + "loss": 1.5037, + "step": 7465 + }, + { + "epoch": 0.7875527426160338, + "grad_norm": 0.4776095747947693, + "learning_rate": 0.00024551361837020025, + "loss": 1.5231, + "step": 7466 + }, + { + "epoch": 0.7876582278481012, + "grad_norm": 0.5564210414886475, + "learning_rate": 0.0002451730976540241, + "loss": 1.4908, + "step": 7467 + }, + { + "epoch": 0.7877637130801688, + "grad_norm": 0.4627194404602051, + "learning_rate": 0.00024483304923082663, + "loss": 1.5115, + "step": 7468 + }, + { + "epoch": 0.7878691983122363, + "grad_norm": 0.5744985342025757, + "learning_rate": 0.00024449347244555043, + "loss": 1.476, + "step": 7469 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.5017590522766113, + "learning_rate": 0.0002441543666440464, + "loss": 1.4964, + "step": 7470 + }, + { + "epoch": 0.7880801687763713, + "grad_norm": 0.44695737957954407, + "learning_rate": 0.00024381573117307302, + "loss": 1.4929, + "step": 7471 + }, + { + "epoch": 0.7881856540084389, + "grad_norm": 0.5167922973632812, + "learning_rate": 0.00024347756538029453, + "loss": 1.4742, + "step": 7472 + }, + { + "epoch": 0.7882911392405063, + "grad_norm": 0.516939103603363, + "learning_rate": 0.00024313986861428, + "loss": 1.5389, + "step": 7473 + }, + { + "epoch": 0.7883966244725739, + "grad_norm": 0.4884980618953705, + "learning_rate": 0.00024280264022450215, + "loss": 1.4988, + "step": 7474 + }, + { + "epoch": 0.7885021097046413, + "grad_norm": 0.4783535599708557, + "learning_rate": 0.00024246587956133572, + "loss": 1.4991, + "step": 7475 + }, + { + "epoch": 0.7886075949367088, + "grad_norm": 0.5935856699943542, + "learning_rate": 0.0002421295859760568, + "loss": 1.52, + "step": 7476 + }, + { + "epoch": 0.7887130801687764, + "grad_norm": 0.4903963506221771, + "learning_rate": 0.00024179375882084098, + "loss": 1.5396, + "step": 7477 + }, + { + "epoch": 0.7888185654008438, + "grad_norm": 0.42041078209877014, + "learning_rate": 0.0002414583974487624, + "loss": 1.5074, + "step": 7478 + }, + { + "epoch": 0.7889240506329114, + "grad_norm": 0.46421265602111816, + "learning_rate": 0.00024112350121379254, + "loss": 1.465, + "step": 7479 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.5674994587898254, + "learning_rate": 0.00024078906947079878, + "loss": 1.5258, + "step": 7480 + }, + { + "epoch": 0.7891350210970464, + "grad_norm": 0.4770188629627228, + "learning_rate": 0.00024045510157554356, + "loss": 1.5213, + "step": 7481 + }, + { + "epoch": 0.7892405063291139, + "grad_norm": 0.4871588945388794, + "learning_rate": 0.00024012159688468254, + "loss": 1.5423, + "step": 7482 + }, + { + "epoch": 0.7893459915611815, + "grad_norm": 0.4368646442890167, + "learning_rate": 0.00023978855475576384, + "loss": 1.5012, + "step": 7483 + }, + { + "epoch": 0.7894514767932489, + "grad_norm": 0.47414425015449524, + "learning_rate": 0.00023945597454722657, + "loss": 1.5298, + "step": 7484 + }, + { + "epoch": 0.7895569620253164, + "grad_norm": 0.46037307381629944, + "learning_rate": 0.00023912385561839983, + "loss": 1.5251, + "step": 7485 + }, + { + "epoch": 0.789662447257384, + "grad_norm": 0.44129765033721924, + "learning_rate": 0.00023879219732950114, + "loss": 1.4795, + "step": 7486 + }, + { + "epoch": 0.7897679324894514, + "grad_norm": 0.4440038204193115, + "learning_rate": 0.0002384609990416354, + "loss": 1.5026, + "step": 7487 + }, + { + "epoch": 0.789873417721519, + "grad_norm": 0.4725959002971649, + "learning_rate": 0.00023813026011679372, + "loss": 1.5209, + "step": 7488 + }, + { + "epoch": 0.7899789029535865, + "grad_norm": 0.45418548583984375, + "learning_rate": 0.000237799979917852, + "loss": 1.5176, + "step": 7489 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.476900577545166, + "learning_rate": 0.00023747015780857005, + "loss": 1.5084, + "step": 7490 + }, + { + "epoch": 0.7901898734177215, + "grad_norm": 0.44245731830596924, + "learning_rate": 0.0002371407931535898, + "loss": 1.4933, + "step": 7491 + }, + { + "epoch": 0.7902953586497891, + "grad_norm": 0.5115064978599548, + "learning_rate": 0.00023681188531843469, + "loss": 1.5354, + "step": 7492 + }, + { + "epoch": 0.7904008438818565, + "grad_norm": 0.4417183995246887, + "learning_rate": 0.00023648343366950792, + "loss": 1.5229, + "step": 7493 + }, + { + "epoch": 0.790506329113924, + "grad_norm": 0.45005807280540466, + "learning_rate": 0.0002361554375740916, + "loss": 1.4984, + "step": 7494 + }, + { + "epoch": 0.7906118143459916, + "grad_norm": 0.43941158056259155, + "learning_rate": 0.00023582789640034545, + "loss": 1.5512, + "step": 7495 + }, + { + "epoch": 0.790717299578059, + "grad_norm": 0.42703139781951904, + "learning_rate": 0.0002355008095173055, + "loss": 1.4684, + "step": 7496 + }, + { + "epoch": 0.7908227848101266, + "grad_norm": 0.42848125100135803, + "learning_rate": 0.00023517417629488285, + "loss": 1.487, + "step": 7497 + }, + { + "epoch": 0.7909282700421941, + "grad_norm": 0.4281219244003296, + "learning_rate": 0.0002348479961038625, + "loss": 1.5075, + "step": 7498 + }, + { + "epoch": 0.7910337552742616, + "grad_norm": 0.4721951186656952, + "learning_rate": 0.00023452226831590227, + "loss": 1.483, + "step": 7499 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.42719757556915283, + "learning_rate": 0.00023419699230353144, + "loss": 1.5469, + "step": 7500 + }, + { + "epoch": 0.7912447257383967, + "grad_norm": 0.5172522068023682, + "learning_rate": 0.00023387216744014946, + "loss": 1.5574, + "step": 7501 + }, + { + "epoch": 0.7913502109704641, + "grad_norm": 0.4999002516269684, + "learning_rate": 0.00023354779310002504, + "loss": 1.5101, + "step": 7502 + }, + { + "epoch": 0.7914556962025316, + "grad_norm": 0.45073187351226807, + "learning_rate": 0.00023322386865829456, + "loss": 1.5085, + "step": 7503 + }, + { + "epoch": 0.7915611814345992, + "grad_norm": 0.46254685521125793, + "learning_rate": 0.00023290039349096122, + "loss": 1.5656, + "step": 7504 + }, + { + "epoch": 0.7916666666666666, + "grad_norm": 0.5351743102073669, + "learning_rate": 0.0002325773669748937, + "loss": 1.4905, + "step": 7505 + }, + { + "epoch": 0.7917721518987342, + "grad_norm": 0.5738751292228699, + "learning_rate": 0.00023225478848782483, + "loss": 1.5303, + "step": 7506 + }, + { + "epoch": 0.7918776371308017, + "grad_norm": 0.4692639708518982, + "learning_rate": 0.00023193265740835056, + "loss": 1.5452, + "step": 7507 + }, + { + "epoch": 0.7919831223628692, + "grad_norm": 0.688265323638916, + "learning_rate": 0.00023161097311592867, + "loss": 1.4936, + "step": 7508 + }, + { + "epoch": 0.7920886075949367, + "grad_norm": 0.4623848497867584, + "learning_rate": 0.00023128973499087779, + "loss": 1.4539, + "step": 7509 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.4545189142227173, + "learning_rate": 0.00023096894241437586, + "loss": 1.5028, + "step": 7510 + }, + { + "epoch": 0.7922995780590717, + "grad_norm": 0.5511388778686523, + "learning_rate": 0.00023064859476845908, + "loss": 1.4953, + "step": 7511 + }, + { + "epoch": 0.7924050632911392, + "grad_norm": 0.5001651644706726, + "learning_rate": 0.00023032869143602085, + "loss": 1.5024, + "step": 7512 + }, + { + "epoch": 0.7925105485232068, + "grad_norm": 0.41897332668304443, + "learning_rate": 0.00023000923180081047, + "loss": 1.4915, + "step": 7513 + }, + { + "epoch": 0.7926160337552742, + "grad_norm": 0.5125803351402283, + "learning_rate": 0.00022969021524743197, + "loss": 1.5093, + "step": 7514 + }, + { + "epoch": 0.7927215189873418, + "grad_norm": 0.5241802930831909, + "learning_rate": 0.00022937164116134282, + "loss": 1.4723, + "step": 7515 + }, + { + "epoch": 0.7928270042194093, + "grad_norm": 0.4525551199913025, + "learning_rate": 0.00022905350892885298, + "loss": 1.5446, + "step": 7516 + }, + { + "epoch": 0.7929324894514768, + "grad_norm": 0.5055121183395386, + "learning_rate": 0.0002287358179371235, + "loss": 1.4842, + "step": 7517 + }, + { + "epoch": 0.7930379746835443, + "grad_norm": 0.49243614077568054, + "learning_rate": 0.00022841856757416538, + "loss": 1.5157, + "step": 7518 + }, + { + "epoch": 0.7931434599156119, + "grad_norm": 0.531338632106781, + "learning_rate": 0.00022810175722883858, + "loss": 1.5024, + "step": 7519 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.5198273062705994, + "learning_rate": 0.00022778538629085056, + "loss": 1.486, + "step": 7520 + }, + { + "epoch": 0.7933544303797468, + "grad_norm": 0.5028226375579834, + "learning_rate": 0.0002274694541507553, + "loss": 1.483, + "step": 7521 + }, + { + "epoch": 0.7934599156118144, + "grad_norm": 0.4740949869155884, + "learning_rate": 0.00022715396019995203, + "loss": 1.5081, + "step": 7522 + }, + { + "epoch": 0.7935654008438818, + "grad_norm": 0.4856537878513336, + "learning_rate": 0.00022683890383068403, + "loss": 1.4973, + "step": 7523 + }, + { + "epoch": 0.7936708860759494, + "grad_norm": 0.5377978086471558, + "learning_rate": 0.00022652428443603774, + "loss": 1.5097, + "step": 7524 + }, + { + "epoch": 0.7937763713080169, + "grad_norm": 0.4413415193557739, + "learning_rate": 0.00022621010140994125, + "loss": 1.5301, + "step": 7525 + }, + { + "epoch": 0.7938818565400844, + "grad_norm": 0.4731031358242035, + "learning_rate": 0.0002258963541471631, + "loss": 1.4899, + "step": 7526 + }, + { + "epoch": 0.7939873417721519, + "grad_norm": 0.550666093826294, + "learning_rate": 0.00022558304204331147, + "loss": 1.5005, + "step": 7527 + }, + { + "epoch": 0.7940928270042195, + "grad_norm": 0.4401571750640869, + "learning_rate": 0.0002252701644948328, + "loss": 1.4955, + "step": 7528 + }, + { + "epoch": 0.7941983122362869, + "grad_norm": 0.4887373149394989, + "learning_rate": 0.0002249577208990106, + "loss": 1.5072, + "step": 7529 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.458577424287796, + "learning_rate": 0.00022464571065396427, + "loss": 1.5069, + "step": 7530 + }, + { + "epoch": 0.794409282700422, + "grad_norm": 0.5035319924354553, + "learning_rate": 0.0002243341331586481, + "loss": 1.5273, + "step": 7531 + }, + { + "epoch": 0.7945147679324894, + "grad_norm": 0.4222524166107178, + "learning_rate": 0.0002240229878128499, + "loss": 1.4949, + "step": 7532 + }, + { + "epoch": 0.794620253164557, + "grad_norm": 0.46664515137672424, + "learning_rate": 0.00022371227401719017, + "loss": 1.523, + "step": 7533 + }, + { + "epoch": 0.7947257383966245, + "grad_norm": 0.4458043575286865, + "learning_rate": 0.00022340199117312052, + "loss": 1.5176, + "step": 7534 + }, + { + "epoch": 0.794831223628692, + "grad_norm": 0.44229182600975037, + "learning_rate": 0.00022309213868292277, + "loss": 1.4647, + "step": 7535 + }, + { + "epoch": 0.7949367088607595, + "grad_norm": 0.4581355154514313, + "learning_rate": 0.0002227827159497079, + "loss": 1.4758, + "step": 7536 + }, + { + "epoch": 0.7950421940928271, + "grad_norm": 0.5061090588569641, + "learning_rate": 0.0002224737223774145, + "loss": 1.5041, + "step": 7537 + }, + { + "epoch": 0.7951476793248945, + "grad_norm": 0.43893036246299744, + "learning_rate": 0.00022216515737080817, + "loss": 1.5021, + "step": 7538 + }, + { + "epoch": 0.795253164556962, + "grad_norm": 0.4617495536804199, + "learning_rate": 0.00022185702033547996, + "loss": 1.4818, + "step": 7539 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.46008166670799255, + "learning_rate": 0.00022154931067784521, + "loss": 1.507, + "step": 7540 + }, + { + "epoch": 0.795464135021097, + "grad_norm": 0.4736064672470093, + "learning_rate": 0.0002212420278051428, + "loss": 1.4692, + "step": 7541 + }, + { + "epoch": 0.7955696202531646, + "grad_norm": 0.48402008414268494, + "learning_rate": 0.0002209351711254335, + "loss": 1.4942, + "step": 7542 + }, + { + "epoch": 0.795675105485232, + "grad_norm": 0.4922538697719574, + "learning_rate": 0.00022062874004759935, + "loss": 1.5029, + "step": 7543 + }, + { + "epoch": 0.7957805907172996, + "grad_norm": 0.4429817497730255, + "learning_rate": 0.00022032273398134208, + "loss": 1.4971, + "step": 7544 + }, + { + "epoch": 0.7958860759493671, + "grad_norm": 0.5070194602012634, + "learning_rate": 0.00022001715233718213, + "loss": 1.5149, + "step": 7545 + }, + { + "epoch": 0.7959915611814345, + "grad_norm": 0.47218137979507446, + "learning_rate": 0.0002197119945264576, + "loss": 1.5235, + "step": 7546 + }, + { + "epoch": 0.7960970464135021, + "grad_norm": 0.4475003778934479, + "learning_rate": 0.00021940725996132303, + "loss": 1.476, + "step": 7547 + }, + { + "epoch": 0.7962025316455696, + "grad_norm": 0.46315744519233704, + "learning_rate": 0.00021910294805474833, + "loss": 1.4808, + "step": 7548 + }, + { + "epoch": 0.7963080168776371, + "grad_norm": 0.4656524360179901, + "learning_rate": 0.00021879905822051756, + "loss": 1.5053, + "step": 7549 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.4421415328979492, + "learning_rate": 0.00021849558987322782, + "loss": 1.512, + "step": 7550 + }, + { + "epoch": 0.7965189873417722, + "grad_norm": 0.447036474943161, + "learning_rate": 0.00021819254242828816, + "loss": 1.4906, + "step": 7551 + }, + { + "epoch": 0.7966244725738396, + "grad_norm": 0.5037019848823547, + "learning_rate": 0.0002178899153019185, + "loss": 1.5291, + "step": 7552 + }, + { + "epoch": 0.7967299578059072, + "grad_norm": 0.44761893153190613, + "learning_rate": 0.00021758770791114845, + "loss": 1.525, + "step": 7553 + }, + { + "epoch": 0.7968354430379747, + "grad_norm": 0.49607813358306885, + "learning_rate": 0.00021728591967381606, + "loss": 1.4785, + "step": 7554 + }, + { + "epoch": 0.7969409282700421, + "grad_norm": 0.5385924577713013, + "learning_rate": 0.0002169845500085669, + "loss": 1.436, + "step": 7555 + }, + { + "epoch": 0.7970464135021097, + "grad_norm": 0.4618675708770752, + "learning_rate": 0.00021668359833485287, + "loss": 1.5542, + "step": 7556 + }, + { + "epoch": 0.7971518987341772, + "grad_norm": 0.4341605007648468, + "learning_rate": 0.00021638306407293116, + "loss": 1.4901, + "step": 7557 + }, + { + "epoch": 0.7972573839662447, + "grad_norm": 0.48466330766677856, + "learning_rate": 0.0002160829466438629, + "loss": 1.4762, + "step": 7558 + }, + { + "epoch": 0.7973628691983122, + "grad_norm": 0.4358654022216797, + "learning_rate": 0.00021578324546951222, + "loss": 1.4916, + "step": 7559 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.46213844418525696, + "learning_rate": 0.0002154839599725452, + "loss": 1.5033, + "step": 7560 + }, + { + "epoch": 0.7975738396624472, + "grad_norm": 0.44458338618278503, + "learning_rate": 0.0002151850895764285, + "loss": 1.4937, + "step": 7561 + }, + { + "epoch": 0.7976793248945148, + "grad_norm": 0.48640087246894836, + "learning_rate": 0.00021488663370542862, + "loss": 1.5282, + "step": 7562 + }, + { + "epoch": 0.7977848101265823, + "grad_norm": 0.48494136333465576, + "learning_rate": 0.00021458859178461048, + "loss": 1.5035, + "step": 7563 + }, + { + "epoch": 0.7978902953586497, + "grad_norm": 0.4744728207588196, + "learning_rate": 0.00021429096323983645, + "loss": 1.5076, + "step": 7564 + }, + { + "epoch": 0.7979957805907173, + "grad_norm": 0.511477530002594, + "learning_rate": 0.00021399374749776512, + "loss": 1.5255, + "step": 7565 + }, + { + "epoch": 0.7981012658227848, + "grad_norm": 0.4830210506916046, + "learning_rate": 0.00021369694398585033, + "loss": 1.4794, + "step": 7566 + }, + { + "epoch": 0.7982067510548523, + "grad_norm": 0.48343178629875183, + "learning_rate": 0.0002134005521323402, + "loss": 1.528, + "step": 7567 + }, + { + "epoch": 0.7983122362869198, + "grad_norm": 0.4899161756038666, + "learning_rate": 0.00021310457136627562, + "loss": 1.5103, + "step": 7568 + }, + { + "epoch": 0.7984177215189874, + "grad_norm": 0.5383829474449158, + "learning_rate": 0.00021280900111748948, + "loss": 1.5332, + "step": 7569 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.4811061918735504, + "learning_rate": 0.00021251384081660544, + "loss": 1.5007, + "step": 7570 + }, + { + "epoch": 0.7986286919831224, + "grad_norm": 0.45103538036346436, + "learning_rate": 0.00021221908989503698, + "loss": 1.4703, + "step": 7571 + }, + { + "epoch": 0.7987341772151899, + "grad_norm": 0.5614848732948303, + "learning_rate": 0.00021192474778498606, + "loss": 1.5151, + "step": 7572 + }, + { + "epoch": 0.7988396624472573, + "grad_norm": 0.433655709028244, + "learning_rate": 0.00021163081391944227, + "loss": 1.5056, + "step": 7573 + }, + { + "epoch": 0.7989451476793249, + "grad_norm": 0.4817821979522705, + "learning_rate": 0.00021133728773218148, + "loss": 1.524, + "step": 7574 + }, + { + "epoch": 0.7990506329113924, + "grad_norm": 0.5563144087791443, + "learning_rate": 0.00021104416865776502, + "loss": 1.4911, + "step": 7575 + }, + { + "epoch": 0.7991561181434599, + "grad_norm": 0.434097558259964, + "learning_rate": 0.00021075145613153853, + "loss": 1.5114, + "step": 7576 + }, + { + "epoch": 0.7992616033755274, + "grad_norm": 0.40812230110168457, + "learning_rate": 0.0002104591495896306, + "loss": 1.5087, + "step": 7577 + }, + { + "epoch": 0.799367088607595, + "grad_norm": 0.5394563674926758, + "learning_rate": 0.00021016724846895213, + "loss": 1.5171, + "step": 7578 + }, + { + "epoch": 0.7994725738396624, + "grad_norm": 0.45321643352508545, + "learning_rate": 0.00020987575220719483, + "loss": 1.5285, + "step": 7579 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.4194830656051636, + "learning_rate": 0.0002095846602428303, + "loss": 1.4965, + "step": 7580 + }, + { + "epoch": 0.7996835443037975, + "grad_norm": 0.5129013061523438, + "learning_rate": 0.00020929397201510915, + "loss": 1.482, + "step": 7581 + }, + { + "epoch": 0.799789029535865, + "grad_norm": 0.45665669441223145, + "learning_rate": 0.00020900368696405964, + "loss": 1.5112, + "step": 7582 + }, + { + "epoch": 0.7998945147679325, + "grad_norm": 0.4345030188560486, + "learning_rate": 0.00020871380453048668, + "loss": 1.5262, + "step": 7583 + }, + { + "epoch": 0.8, + "grad_norm": 0.42984485626220703, + "learning_rate": 0.00020842432415597067, + "loss": 1.4959, + "step": 7584 + }, + { + "epoch": 0.8001054852320675, + "grad_norm": 0.4645373225212097, + "learning_rate": 0.0002081352452828667, + "loss": 1.5096, + "step": 7585 + }, + { + "epoch": 0.800210970464135, + "grad_norm": 0.44757378101348877, + "learning_rate": 0.0002078465673543032, + "loss": 1.4863, + "step": 7586 + }, + { + "epoch": 0.8003164556962026, + "grad_norm": 0.4787604510784149, + "learning_rate": 0.00020755828981418106, + "loss": 1.4866, + "step": 7587 + }, + { + "epoch": 0.80042194092827, + "grad_norm": 0.43615758419036865, + "learning_rate": 0.00020727041210717235, + "loss": 1.4999, + "step": 7588 + }, + { + "epoch": 0.8005274261603376, + "grad_norm": 0.4338921308517456, + "learning_rate": 0.00020698293367871933, + "loss": 1.5085, + "step": 7589 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.41479068994522095, + "learning_rate": 0.00020669585397503358, + "loss": 1.4724, + "step": 7590 + }, + { + "epoch": 0.8007383966244725, + "grad_norm": 0.46705856919288635, + "learning_rate": 0.0002064091724430947, + "loss": 1.5077, + "step": 7591 + }, + { + "epoch": 0.8008438818565401, + "grad_norm": 0.4474402070045471, + "learning_rate": 0.00020612288853064925, + "loss": 1.5107, + "step": 7592 + }, + { + "epoch": 0.8009493670886076, + "grad_norm": 0.43865668773651123, + "learning_rate": 0.00020583700168620985, + "loss": 1.5285, + "step": 7593 + }, + { + "epoch": 0.8010548523206751, + "grad_norm": 0.45596176385879517, + "learning_rate": 0.00020555151135905384, + "loss": 1.4841, + "step": 7594 + }, + { + "epoch": 0.8011603375527426, + "grad_norm": 0.44505131244659424, + "learning_rate": 0.00020526641699922268, + "loss": 1.5454, + "step": 7595 + }, + { + "epoch": 0.8012658227848102, + "grad_norm": 0.5033265352249146, + "learning_rate": 0.00020498171805752037, + "loss": 1.474, + "step": 7596 + }, + { + "epoch": 0.8013713080168776, + "grad_norm": 0.4670073986053467, + "learning_rate": 0.00020469741398551278, + "loss": 1.4506, + "step": 7597 + }, + { + "epoch": 0.8014767932489452, + "grad_norm": 0.49587923288345337, + "learning_rate": 0.00020441350423552624, + "loss": 1.4867, + "step": 7598 + }, + { + "epoch": 0.8015822784810127, + "grad_norm": 0.47923460602760315, + "learning_rate": 0.00020412998826064692, + "loss": 1.5082, + "step": 7599 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.4406692385673523, + "learning_rate": 0.0002038468655147195, + "loss": 1.4801, + "step": 7600 + }, + { + "epoch": 0.8017932489451477, + "grad_norm": 0.4876795709133148, + "learning_rate": 0.00020356413545234603, + "loss": 1.5331, + "step": 7601 + }, + { + "epoch": 0.8018987341772152, + "grad_norm": 0.5542328357696533, + "learning_rate": 0.0002032817975288851, + "loss": 1.4935, + "step": 7602 + }, + { + "epoch": 0.8020042194092827, + "grad_norm": 0.45491883158683777, + "learning_rate": 0.00020299985120045069, + "loss": 1.5498, + "step": 7603 + }, + { + "epoch": 0.8021097046413502, + "grad_norm": 0.4663879871368408, + "learning_rate": 0.00020271829592391114, + "loss": 1.5282, + "step": 7604 + }, + { + "epoch": 0.8022151898734177, + "grad_norm": 0.4307539463043213, + "learning_rate": 0.0002024371311568882, + "loss": 1.4928, + "step": 7605 + }, + { + "epoch": 0.8023206751054852, + "grad_norm": 0.4613824188709259, + "learning_rate": 0.0002021563563577556, + "loss": 1.5147, + "step": 7606 + }, + { + "epoch": 0.8024261603375528, + "grad_norm": 0.45845553278923035, + "learning_rate": 0.00020187597098563864, + "loss": 1.4852, + "step": 7607 + }, + { + "epoch": 0.8025316455696202, + "grad_norm": 0.45621466636657715, + "learning_rate": 0.00020159597450041257, + "loss": 1.4777, + "step": 7608 + }, + { + "epoch": 0.8026371308016877, + "grad_norm": 0.48014456033706665, + "learning_rate": 0.00020131636636270178, + "loss": 1.5213, + "step": 7609 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.5431879758834839, + "learning_rate": 0.00020103714603387894, + "loss": 1.5394, + "step": 7610 + }, + { + "epoch": 0.8028481012658227, + "grad_norm": 0.48309117555618286, + "learning_rate": 0.00020075831297606357, + "loss": 1.4741, + "step": 7611 + }, + { + "epoch": 0.8029535864978903, + "grad_norm": 0.46558430790901184, + "learning_rate": 0.00020047986665212137, + "loss": 1.5177, + "step": 7612 + }, + { + "epoch": 0.8030590717299578, + "grad_norm": 0.4218272268772125, + "learning_rate": 0.0002002018065256629, + "loss": 1.4803, + "step": 7613 + }, + { + "epoch": 0.8031645569620253, + "grad_norm": 0.45957159996032715, + "learning_rate": 0.00019992413206104277, + "loss": 1.4725, + "step": 7614 + }, + { + "epoch": 0.8032700421940928, + "grad_norm": 0.4866376221179962, + "learning_rate": 0.00019964684272335854, + "loss": 1.5096, + "step": 7615 + }, + { + "epoch": 0.8033755274261604, + "grad_norm": 0.420400470495224, + "learning_rate": 0.0001993699379784496, + "loss": 1.4884, + "step": 7616 + }, + { + "epoch": 0.8034810126582278, + "grad_norm": 0.4081100821495056, + "learning_rate": 0.0001990934172928962, + "loss": 1.4676, + "step": 7617 + }, + { + "epoch": 0.8035864978902953, + "grad_norm": 0.4189686179161072, + "learning_rate": 0.00019881728013401842, + "loss": 1.4769, + "step": 7618 + }, + { + "epoch": 0.8036919831223629, + "grad_norm": 0.4664131700992584, + "learning_rate": 0.00019854152596987523, + "loss": 1.5003, + "step": 7619 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.4068298935890198, + "learning_rate": 0.00019826615426926338, + "loss": 1.4765, + "step": 7620 + }, + { + "epoch": 0.8039029535864979, + "grad_norm": 0.4602712392807007, + "learning_rate": 0.00019799116450171624, + "loss": 1.568, + "step": 7621 + }, + { + "epoch": 0.8040084388185654, + "grad_norm": 0.46461352705955505, + "learning_rate": 0.00019771655613750317, + "loss": 1.4763, + "step": 7622 + }, + { + "epoch": 0.8041139240506329, + "grad_norm": 0.501839816570282, + "learning_rate": 0.0001974423286476279, + "loss": 1.493, + "step": 7623 + }, + { + "epoch": 0.8042194092827004, + "grad_norm": 0.49715879559516907, + "learning_rate": 0.0001971684815038283, + "loss": 1.5178, + "step": 7624 + }, + { + "epoch": 0.804324894514768, + "grad_norm": 0.5090494155883789, + "learning_rate": 0.00019689501417857462, + "loss": 1.4937, + "step": 7625 + }, + { + "epoch": 0.8044303797468354, + "grad_norm": 0.4455357491970062, + "learning_rate": 0.00019662192614506883, + "loss": 1.4912, + "step": 7626 + }, + { + "epoch": 0.804535864978903, + "grad_norm": 0.4066576063632965, + "learning_rate": 0.00019634921687724358, + "loss": 1.5072, + "step": 7627 + }, + { + "epoch": 0.8046413502109705, + "grad_norm": 0.43859365582466125, + "learning_rate": 0.00019607688584976116, + "loss": 1.5178, + "step": 7628 + }, + { + "epoch": 0.8047468354430379, + "grad_norm": 0.567074716091156, + "learning_rate": 0.00019580493253801255, + "loss": 1.5171, + "step": 7629 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.44157594442367554, + "learning_rate": 0.00019553335641811625, + "loss": 1.5401, + "step": 7630 + }, + { + "epoch": 0.804957805907173, + "grad_norm": 0.4535594880580902, + "learning_rate": 0.00019526215696691747, + "loss": 1.5383, + "step": 7631 + }, + { + "epoch": 0.8050632911392405, + "grad_norm": 0.4694213271141052, + "learning_rate": 0.00019499133366198686, + "loss": 1.4817, + "step": 7632 + }, + { + "epoch": 0.805168776371308, + "grad_norm": 0.48461052775382996, + "learning_rate": 0.00019472088598161984, + "loss": 1.5058, + "step": 7633 + }, + { + "epoch": 0.8052742616033756, + "grad_norm": 0.43508461117744446, + "learning_rate": 0.00019445081340483536, + "loss": 1.5178, + "step": 7634 + }, + { + "epoch": 0.805379746835443, + "grad_norm": 0.46978190541267395, + "learning_rate": 0.0001941811154113749, + "loss": 1.5306, + "step": 7635 + }, + { + "epoch": 0.8054852320675105, + "grad_norm": 0.4854903519153595, + "learning_rate": 0.0001939117914817016, + "loss": 1.4822, + "step": 7636 + }, + { + "epoch": 0.8055907172995781, + "grad_norm": 0.4828737676143646, + "learning_rate": 0.0001936428410969991, + "loss": 1.4794, + "step": 7637 + }, + { + "epoch": 0.8056962025316455, + "grad_norm": 0.44565919041633606, + "learning_rate": 0.00019337426373917076, + "loss": 1.4901, + "step": 7638 + }, + { + "epoch": 0.8058016877637131, + "grad_norm": 0.45560306310653687, + "learning_rate": 0.00019310605889083838, + "loss": 1.5186, + "step": 7639 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.4514120817184448, + "learning_rate": 0.0001928382260353415, + "loss": 1.477, + "step": 7640 + }, + { + "epoch": 0.8060126582278481, + "grad_norm": 0.4615853726863861, + "learning_rate": 0.00019257076465673605, + "loss": 1.5186, + "step": 7641 + }, + { + "epoch": 0.8061181434599156, + "grad_norm": 0.44695156812667847, + "learning_rate": 0.00019230367423979372, + "loss": 1.5419, + "step": 7642 + }, + { + "epoch": 0.8062236286919832, + "grad_norm": 0.4508327841758728, + "learning_rate": 0.0001920369542700008, + "loss": 1.4762, + "step": 7643 + }, + { + "epoch": 0.8063291139240506, + "grad_norm": 0.4132992625236511, + "learning_rate": 0.00019177060423355714, + "loss": 1.5286, + "step": 7644 + }, + { + "epoch": 0.8064345991561181, + "grad_norm": 0.46180111169815063, + "learning_rate": 0.00019150462361737527, + "loss": 1.5159, + "step": 7645 + }, + { + "epoch": 0.8065400843881857, + "grad_norm": 0.42987874150276184, + "learning_rate": 0.00019123901190907928, + "loss": 1.5419, + "step": 7646 + }, + { + "epoch": 0.8066455696202531, + "grad_norm": 0.47122010588645935, + "learning_rate": 0.00019097376859700393, + "loss": 1.5015, + "step": 7647 + }, + { + "epoch": 0.8067510548523207, + "grad_norm": 0.437377005815506, + "learning_rate": 0.00019070889317019375, + "loss": 1.4865, + "step": 7648 + }, + { + "epoch": 0.8068565400843882, + "grad_norm": 0.4346117079257965, + "learning_rate": 0.0001904443851184018, + "loss": 1.5061, + "step": 7649 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.45330169796943665, + "learning_rate": 0.00019018024393208902, + "loss": 1.5374, + "step": 7650 + }, + { + "epoch": 0.8070675105485232, + "grad_norm": 0.4814563989639282, + "learning_rate": 0.00018991646910242288, + "loss": 1.5025, + "step": 7651 + }, + { + "epoch": 0.8071729957805908, + "grad_norm": 0.4183913469314575, + "learning_rate": 0.00018965306012127663, + "loss": 1.4314, + "step": 7652 + }, + { + "epoch": 0.8072784810126582, + "grad_norm": 0.4228047728538513, + "learning_rate": 0.00018939001648122844, + "loss": 1.5049, + "step": 7653 + }, + { + "epoch": 0.8073839662447257, + "grad_norm": 0.49773094058036804, + "learning_rate": 0.00018912733767556005, + "loss": 1.5389, + "step": 7654 + }, + { + "epoch": 0.8074894514767933, + "grad_norm": 0.4487854540348053, + "learning_rate": 0.00018886502319825612, + "loss": 1.4722, + "step": 7655 + }, + { + "epoch": 0.8075949367088607, + "grad_norm": 0.48282310366630554, + "learning_rate": 0.00018860307254400305, + "loss": 1.4709, + "step": 7656 + }, + { + "epoch": 0.8077004219409283, + "grad_norm": 0.4292629659175873, + "learning_rate": 0.0001883414852081882, + "loss": 1.5248, + "step": 7657 + }, + { + "epoch": 0.8078059071729958, + "grad_norm": 0.46244215965270996, + "learning_rate": 0.00018808026068689883, + "loss": 1.5144, + "step": 7658 + }, + { + "epoch": 0.8079113924050633, + "grad_norm": 0.4608635902404785, + "learning_rate": 0.00018781939847692096, + "loss": 1.4986, + "step": 7659 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.4507381021976471, + "learning_rate": 0.00018755889807573872, + "loss": 1.4778, + "step": 7660 + }, + { + "epoch": 0.8081223628691984, + "grad_norm": 0.4129912555217743, + "learning_rate": 0.00018729875898153305, + "loss": 1.4702, + "step": 7661 + }, + { + "epoch": 0.8082278481012658, + "grad_norm": 0.4801464080810547, + "learning_rate": 0.00018703898069318112, + "loss": 1.5099, + "step": 7662 + }, + { + "epoch": 0.8083333333333333, + "grad_norm": 0.4517633616924286, + "learning_rate": 0.00018677956271025492, + "loss": 1.5218, + "step": 7663 + }, + { + "epoch": 0.8084388185654009, + "grad_norm": 0.49452513456344604, + "learning_rate": 0.00018652050453302066, + "loss": 1.4985, + "step": 7664 + }, + { + "epoch": 0.8085443037974683, + "grad_norm": 0.4616701602935791, + "learning_rate": 0.0001862618056624376, + "loss": 1.5108, + "step": 7665 + }, + { + "epoch": 0.8086497890295359, + "grad_norm": 0.4375028908252716, + "learning_rate": 0.00018600346560015716, + "loss": 1.455, + "step": 7666 + }, + { + "epoch": 0.8087552742616034, + "grad_norm": 0.4428229033946991, + "learning_rate": 0.00018574548384852206, + "loss": 1.4951, + "step": 7667 + }, + { + "epoch": 0.8088607594936709, + "grad_norm": 0.40699702501296997, + "learning_rate": 0.00018548785991056508, + "loss": 1.5383, + "step": 7668 + }, + { + "epoch": 0.8089662447257384, + "grad_norm": 0.458650678396225, + "learning_rate": 0.00018523059329000844, + "loss": 1.4976, + "step": 7669 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.43317022919654846, + "learning_rate": 0.00018497368349126262, + "loss": 1.5115, + "step": 7670 + }, + { + "epoch": 0.8091772151898734, + "grad_norm": 0.4401693046092987, + "learning_rate": 0.00018471713001942533, + "loss": 1.51, + "step": 7671 + }, + { + "epoch": 0.809282700421941, + "grad_norm": 0.42360663414001465, + "learning_rate": 0.000184460932380281, + "loss": 1.5366, + "step": 7672 + }, + { + "epoch": 0.8093881856540084, + "grad_norm": 0.41593116521835327, + "learning_rate": 0.00018420509008029931, + "loss": 1.5379, + "step": 7673 + }, + { + "epoch": 0.8094936708860759, + "grad_norm": 0.445113867521286, + "learning_rate": 0.00018394960262663448, + "loss": 1.5271, + "step": 7674 + }, + { + "epoch": 0.8095991561181435, + "grad_norm": 0.45435038208961487, + "learning_rate": 0.0001836944695271243, + "loss": 1.472, + "step": 7675 + }, + { + "epoch": 0.8097046413502109, + "grad_norm": 0.4509572386741638, + "learning_rate": 0.00018343969029028915, + "loss": 1.5007, + "step": 7676 + }, + { + "epoch": 0.8098101265822785, + "grad_norm": 0.4851086139678955, + "learning_rate": 0.00018318526442533123, + "loss": 1.491, + "step": 7677 + }, + { + "epoch": 0.809915611814346, + "grad_norm": 0.4428175389766693, + "learning_rate": 0.00018293119144213328, + "loss": 1.5429, + "step": 7678 + }, + { + "epoch": 0.8100210970464135, + "grad_norm": 0.4549797475337982, + "learning_rate": 0.0001826774708512579, + "loss": 1.4834, + "step": 7679 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.4545847475528717, + "learning_rate": 0.00018242410216394648, + "loss": 1.5446, + "step": 7680 + }, + { + "epoch": 0.8102320675105485, + "grad_norm": 0.40937238931655884, + "learning_rate": 0.00018217108489211841, + "loss": 1.5011, + "step": 7681 + }, + { + "epoch": 0.810337552742616, + "grad_norm": 0.4805527329444885, + "learning_rate": 0.00018191841854836994, + "loss": 1.4952, + "step": 7682 + }, + { + "epoch": 0.8104430379746835, + "grad_norm": 0.41970404982566833, + "learning_rate": 0.00018166610264597332, + "loss": 1.4804, + "step": 7683 + }, + { + "epoch": 0.8105485232067511, + "grad_norm": 0.4368318021297455, + "learning_rate": 0.00018141413669887598, + "loss": 1.4863, + "step": 7684 + }, + { + "epoch": 0.8106540084388185, + "grad_norm": 0.45059943199157715, + "learning_rate": 0.00018116252022169936, + "loss": 1.5212, + "step": 7685 + }, + { + "epoch": 0.8107594936708861, + "grad_norm": 0.4365903437137604, + "learning_rate": 0.00018091125272973825, + "loss": 1.5077, + "step": 7686 + }, + { + "epoch": 0.8108649789029536, + "grad_norm": 0.43383705615997314, + "learning_rate": 0.0001806603337389596, + "loss": 1.502, + "step": 7687 + }, + { + "epoch": 0.810970464135021, + "grad_norm": 0.4276760518550873, + "learning_rate": 0.00018040976276600176, + "loss": 1.4718, + "step": 7688 + }, + { + "epoch": 0.8110759493670886, + "grad_norm": 0.46179482340812683, + "learning_rate": 0.00018015953932817348, + "loss": 1.4835, + "step": 7689 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.44500598311424255, + "learning_rate": 0.0001799096629434529, + "loss": 1.4523, + "step": 7690 + }, + { + "epoch": 0.8112869198312236, + "grad_norm": 0.4377223253250122, + "learning_rate": 0.00017966013313048688, + "loss": 1.51, + "step": 7691 + }, + { + "epoch": 0.8113924050632911, + "grad_norm": 0.44538792967796326, + "learning_rate": 0.00017941094940858982, + "loss": 1.5185, + "step": 7692 + }, + { + "epoch": 0.8114978902953587, + "grad_norm": 0.4273275136947632, + "learning_rate": 0.00017916211129774277, + "loss": 1.4906, + "step": 7693 + }, + { + "epoch": 0.8116033755274261, + "grad_norm": 0.4187488853931427, + "learning_rate": 0.00017891361831859262, + "loss": 1.4839, + "step": 7694 + }, + { + "epoch": 0.8117088607594937, + "grad_norm": 0.4449957609176636, + "learning_rate": 0.00017866546999245102, + "loss": 1.4872, + "step": 7695 + }, + { + "epoch": 0.8118143459915612, + "grad_norm": 0.4378944933414459, + "learning_rate": 0.00017841766584129372, + "loss": 1.5277, + "step": 7696 + }, + { + "epoch": 0.8119198312236287, + "grad_norm": 0.46003258228302, + "learning_rate": 0.00017817020538775933, + "loss": 1.4837, + "step": 7697 + }, + { + "epoch": 0.8120253164556962, + "grad_norm": 0.44949719309806824, + "learning_rate": 0.00017792308815514854, + "loss": 1.4938, + "step": 7698 + }, + { + "epoch": 0.8121308016877637, + "grad_norm": 0.4335648715496063, + "learning_rate": 0.00017767631366742332, + "loss": 1.5005, + "step": 7699 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.4382060766220093, + "learning_rate": 0.00017742988144920578, + "loss": 1.5221, + "step": 7700 + }, + { + "epoch": 0.8123417721518987, + "grad_norm": 0.48175734281539917, + "learning_rate": 0.00017718379102577746, + "loss": 1.4764, + "step": 7701 + }, + { + "epoch": 0.8124472573839663, + "grad_norm": 0.4919948875904083, + "learning_rate": 0.00017693804192307826, + "loss": 1.5199, + "step": 7702 + }, + { + "epoch": 0.8125527426160337, + "grad_norm": 0.4192477762699127, + "learning_rate": 0.0001766926336677056, + "loss": 1.4669, + "step": 7703 + }, + { + "epoch": 0.8126582278481013, + "grad_norm": 0.44176772236824036, + "learning_rate": 0.00017644756578691345, + "loss": 1.4892, + "step": 7704 + }, + { + "epoch": 0.8127637130801688, + "grad_norm": 0.4153943359851837, + "learning_rate": 0.0001762028378086116, + "loss": 1.4875, + "step": 7705 + }, + { + "epoch": 0.8128691983122363, + "grad_norm": 0.4569021165370941, + "learning_rate": 0.00017595844926136456, + "loss": 1.4811, + "step": 7706 + }, + { + "epoch": 0.8129746835443038, + "grad_norm": 0.4806217551231384, + "learning_rate": 0.0001757143996743906, + "loss": 1.5111, + "step": 7707 + }, + { + "epoch": 0.8130801687763713, + "grad_norm": 0.4286656379699707, + "learning_rate": 0.00017547068857756104, + "loss": 1.4941, + "step": 7708 + }, + { + "epoch": 0.8131856540084388, + "grad_norm": 0.4367987811565399, + "learning_rate": 0.00017522731550139922, + "loss": 1.5301, + "step": 7709 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.546692430973053, + "learning_rate": 0.00017498427997707976, + "loss": 1.4884, + "step": 7710 + }, + { + "epoch": 0.8133966244725739, + "grad_norm": 0.4957149028778076, + "learning_rate": 0.0001747415815364274, + "loss": 1.5226, + "step": 7711 + }, + { + "epoch": 0.8135021097046413, + "grad_norm": 0.4620266854763031, + "learning_rate": 0.00017449921971191622, + "loss": 1.4886, + "step": 7712 + }, + { + "epoch": 0.8136075949367089, + "grad_norm": 0.4796673655509949, + "learning_rate": 0.00017425719403666877, + "loss": 1.4918, + "step": 7713 + }, + { + "epoch": 0.8137130801687764, + "grad_norm": 0.5266776084899902, + "learning_rate": 0.00017401550404445515, + "loss": 1.5231, + "step": 7714 + }, + { + "epoch": 0.8138185654008439, + "grad_norm": 0.4618789851665497, + "learning_rate": 0.0001737741492696922, + "loss": 1.4943, + "step": 7715 + }, + { + "epoch": 0.8139240506329114, + "grad_norm": 0.4553345739841461, + "learning_rate": 0.00017353312924744236, + "loss": 1.5164, + "step": 7716 + }, + { + "epoch": 0.814029535864979, + "grad_norm": 0.4804789423942566, + "learning_rate": 0.000173292443513413, + "loss": 1.5474, + "step": 7717 + }, + { + "epoch": 0.8141350210970464, + "grad_norm": 0.4306790828704834, + "learning_rate": 0.00017305209160395547, + "loss": 1.4529, + "step": 7718 + }, + { + "epoch": 0.8142405063291139, + "grad_norm": 0.4695904552936554, + "learning_rate": 0.00017281207305606407, + "loss": 1.4703, + "step": 7719 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.49509137868881226, + "learning_rate": 0.00017257238740737548, + "loss": 1.499, + "step": 7720 + }, + { + "epoch": 0.8144514767932489, + "grad_norm": 0.4523547887802124, + "learning_rate": 0.0001723330341961675, + "loss": 1.52, + "step": 7721 + }, + { + "epoch": 0.8145569620253165, + "grad_norm": 0.4584343731403351, + "learning_rate": 0.0001720940129613584, + "loss": 1.4952, + "step": 7722 + }, + { + "epoch": 0.814662447257384, + "grad_norm": 0.46166375279426575, + "learning_rate": 0.0001718553232425059, + "loss": 1.5241, + "step": 7723 + }, + { + "epoch": 0.8147679324894515, + "grad_norm": 0.48565253615379333, + "learning_rate": 0.00017161696457980641, + "loss": 1.4841, + "step": 7724 + }, + { + "epoch": 0.814873417721519, + "grad_norm": 0.454905241727829, + "learning_rate": 0.00017137893651409406, + "loss": 1.5503, + "step": 7725 + }, + { + "epoch": 0.8149789029535865, + "grad_norm": 0.43393051624298096, + "learning_rate": 0.0001711412385868398, + "loss": 1.5087, + "step": 7726 + }, + { + "epoch": 0.815084388185654, + "grad_norm": 0.4867404103279114, + "learning_rate": 0.00017090387034015054, + "loss": 1.488, + "step": 7727 + }, + { + "epoch": 0.8151898734177215, + "grad_norm": 0.5071909427642822, + "learning_rate": 0.00017066683131676825, + "loss": 1.494, + "step": 7728 + }, + { + "epoch": 0.8152953586497891, + "grad_norm": 0.43942975997924805, + "learning_rate": 0.00017043012106006926, + "loss": 1.4962, + "step": 7729 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.46662601828575134, + "learning_rate": 0.00017019373911406307, + "loss": 1.5157, + "step": 7730 + }, + { + "epoch": 0.8155063291139241, + "grad_norm": 0.43356215953826904, + "learning_rate": 0.00016995768502339165, + "loss": 1.4811, + "step": 7731 + }, + { + "epoch": 0.8156118143459916, + "grad_norm": 0.45512300729751587, + "learning_rate": 0.00016972195833332858, + "loss": 1.5206, + "step": 7732 + }, + { + "epoch": 0.815717299578059, + "grad_norm": 0.48683232069015503, + "learning_rate": 0.00016948655858977808, + "loss": 1.5083, + "step": 7733 + }, + { + "epoch": 0.8158227848101266, + "grad_norm": 0.44006553292274475, + "learning_rate": 0.00016925148533927429, + "loss": 1.4814, + "step": 7734 + }, + { + "epoch": 0.8159282700421941, + "grad_norm": 0.49457958340644836, + "learning_rate": 0.00016901673812898022, + "loss": 1.4982, + "step": 7735 + }, + { + "epoch": 0.8160337552742616, + "grad_norm": 0.4615689814090729, + "learning_rate": 0.0001687823165066869, + "loss": 1.5066, + "step": 7736 + }, + { + "epoch": 0.8161392405063291, + "grad_norm": 0.4143151044845581, + "learning_rate": 0.00016854822002081266, + "loss": 1.4655, + "step": 7737 + }, + { + "epoch": 0.8162447257383966, + "grad_norm": 0.48641398549079895, + "learning_rate": 0.00016831444822040207, + "loss": 1.5175, + "step": 7738 + }, + { + "epoch": 0.8163502109704641, + "grad_norm": 0.45126456022262573, + "learning_rate": 0.00016808100065512528, + "loss": 1.5488, + "step": 7739 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.42679527401924133, + "learning_rate": 0.000167847876875277, + "loss": 1.5194, + "step": 7740 + }, + { + "epoch": 0.8165611814345991, + "grad_norm": 0.44492048025131226, + "learning_rate": 0.00016761507643177557, + "loss": 1.4682, + "step": 7741 + }, + { + "epoch": 0.8166666666666667, + "grad_norm": 0.46783220767974854, + "learning_rate": 0.0001673825988761623, + "loss": 1.4597, + "step": 7742 + }, + { + "epoch": 0.8167721518987342, + "grad_norm": 0.4900056719779968, + "learning_rate": 0.00016715044376060042, + "loss": 1.5206, + "step": 7743 + }, + { + "epoch": 0.8168776371308016, + "grad_norm": 0.5590119361877441, + "learning_rate": 0.00016691861063787436, + "loss": 1.5112, + "step": 7744 + }, + { + "epoch": 0.8169831223628692, + "grad_norm": 0.49072030186653137, + "learning_rate": 0.0001666870990613889, + "loss": 1.4525, + "step": 7745 + }, + { + "epoch": 0.8170886075949367, + "grad_norm": 0.4988759458065033, + "learning_rate": 0.000166455908585168, + "loss": 1.4933, + "step": 7746 + }, + { + "epoch": 0.8171940928270042, + "grad_norm": 0.4702509045600891, + "learning_rate": 0.00016622503876385437, + "loss": 1.4759, + "step": 7747 + }, + { + "epoch": 0.8172995780590717, + "grad_norm": 0.5422484278678894, + "learning_rate": 0.00016599448915270845, + "loss": 1.4785, + "step": 7748 + }, + { + "epoch": 0.8174050632911393, + "grad_norm": 0.4553377330303192, + "learning_rate": 0.00016576425930760734, + "loss": 1.5389, + "step": 7749 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.43447399139404297, + "learning_rate": 0.00016553434878504428, + "loss": 1.4951, + "step": 7750 + }, + { + "epoch": 0.8176160337552743, + "grad_norm": 0.5603171586990356, + "learning_rate": 0.00016530475714212755, + "loss": 1.5108, + "step": 7751 + }, + { + "epoch": 0.8177215189873418, + "grad_norm": 0.5328144431114197, + "learning_rate": 0.00016507548393657973, + "loss": 1.4536, + "step": 7752 + }, + { + "epoch": 0.8178270042194092, + "grad_norm": 0.48087170720100403, + "learning_rate": 0.00016484652872673694, + "loss": 1.4849, + "step": 7753 + }, + { + "epoch": 0.8179324894514768, + "grad_norm": 0.4372412860393524, + "learning_rate": 0.00016461789107154767, + "loss": 1.4906, + "step": 7754 + }, + { + "epoch": 0.8180379746835443, + "grad_norm": 0.5650653839111328, + "learning_rate": 0.00016438957053057236, + "loss": 1.5046, + "step": 7755 + }, + { + "epoch": 0.8181434599156118, + "grad_norm": 0.48459675908088684, + "learning_rate": 0.0001641615666639821, + "loss": 1.4975, + "step": 7756 + }, + { + "epoch": 0.8182489451476793, + "grad_norm": 0.4763208329677582, + "learning_rate": 0.00016393387903255815, + "loss": 1.4954, + "step": 7757 + }, + { + "epoch": 0.8183544303797469, + "grad_norm": 0.4780988097190857, + "learning_rate": 0.000163706507197691, + "loss": 1.4581, + "step": 7758 + }, + { + "epoch": 0.8184599156118143, + "grad_norm": 0.4592362642288208, + "learning_rate": 0.00016347945072137934, + "loss": 1.5242, + "step": 7759 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.47767072916030884, + "learning_rate": 0.00016325270916622947, + "loss": 1.5218, + "step": 7760 + }, + { + "epoch": 0.8186708860759494, + "grad_norm": 0.4654233753681183, + "learning_rate": 0.00016302628209545426, + "loss": 1.4811, + "step": 7761 + }, + { + "epoch": 0.8187763713080168, + "grad_norm": 0.47363775968551636, + "learning_rate": 0.00016280016907287237, + "loss": 1.468, + "step": 7762 + }, + { + "epoch": 0.8188818565400844, + "grad_norm": 0.45746245980262756, + "learning_rate": 0.00016257436966290764, + "loss": 1.5372, + "step": 7763 + }, + { + "epoch": 0.8189873417721519, + "grad_norm": 0.4786800444126129, + "learning_rate": 0.0001623488834305878, + "loss": 1.5259, + "step": 7764 + }, + { + "epoch": 0.8190928270042194, + "grad_norm": 0.4710049033164978, + "learning_rate": 0.000162123709941544, + "loss": 1.4906, + "step": 7765 + }, + { + "epoch": 0.8191983122362869, + "grad_norm": 0.44269460439682007, + "learning_rate": 0.00016189884876200979, + "loss": 1.5263, + "step": 7766 + }, + { + "epoch": 0.8193037974683545, + "grad_norm": 0.5046398639678955, + "learning_rate": 0.00016167429945882031, + "loss": 1.5018, + "step": 7767 + }, + { + "epoch": 0.8194092827004219, + "grad_norm": 0.5724698901176453, + "learning_rate": 0.0001614500615994117, + "loss": 1.5016, + "step": 7768 + }, + { + "epoch": 0.8195147679324895, + "grad_norm": 0.44181177020072937, + "learning_rate": 0.00016122613475181977, + "loss": 1.5316, + "step": 7769 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.4330645799636841, + "learning_rate": 0.00016100251848467966, + "loss": 1.4414, + "step": 7770 + }, + { + "epoch": 0.8197257383966244, + "grad_norm": 0.5350200533866882, + "learning_rate": 0.00016077921236722464, + "loss": 1.487, + "step": 7771 + }, + { + "epoch": 0.819831223628692, + "grad_norm": 0.5268557071685791, + "learning_rate": 0.00016055621596928563, + "loss": 1.4756, + "step": 7772 + }, + { + "epoch": 0.8199367088607595, + "grad_norm": 0.5045581459999084, + "learning_rate": 0.00016033352886129, + "loss": 1.4838, + "step": 7773 + }, + { + "epoch": 0.820042194092827, + "grad_norm": 0.4686928987503052, + "learning_rate": 0.00016011115061426103, + "loss": 1.4462, + "step": 7774 + }, + { + "epoch": 0.8201476793248945, + "grad_norm": 0.4847986698150635, + "learning_rate": 0.00015988908079981698, + "loss": 1.514, + "step": 7775 + }, + { + "epoch": 0.8202531645569621, + "grad_norm": 0.443629652261734, + "learning_rate": 0.00015966731899017014, + "loss": 1.5264, + "step": 7776 + }, + { + "epoch": 0.8203586497890295, + "grad_norm": 0.4528484046459198, + "learning_rate": 0.00015944586475812633, + "loss": 1.5244, + "step": 7777 + }, + { + "epoch": 0.820464135021097, + "grad_norm": 0.4824484884738922, + "learning_rate": 0.00015922471767708377, + "loss": 1.4958, + "step": 7778 + }, + { + "epoch": 0.8205696202531646, + "grad_norm": 0.5121501088142395, + "learning_rate": 0.00015900387732103232, + "loss": 1.4616, + "step": 7779 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.5574297308921814, + "learning_rate": 0.0001587833432645528, + "loss": 1.5236, + "step": 7780 + }, + { + "epoch": 0.8207805907172996, + "grad_norm": 0.4380872845649719, + "learning_rate": 0.00015856311508281594, + "loss": 1.5378, + "step": 7781 + }, + { + "epoch": 0.8208860759493671, + "grad_norm": 0.5012332797050476, + "learning_rate": 0.00015834319235158187, + "loss": 1.504, + "step": 7782 + }, + { + "epoch": 0.8209915611814346, + "grad_norm": 0.5149298310279846, + "learning_rate": 0.00015812357464719905, + "loss": 1.5073, + "step": 7783 + }, + { + "epoch": 0.8210970464135021, + "grad_norm": 0.4839160144329071, + "learning_rate": 0.0001579042615466035, + "loss": 1.527, + "step": 7784 + }, + { + "epoch": 0.8212025316455697, + "grad_norm": 0.43355682492256165, + "learning_rate": 0.00015768525262731804, + "loss": 1.492, + "step": 7785 + }, + { + "epoch": 0.8213080168776371, + "grad_norm": 0.44033581018447876, + "learning_rate": 0.0001574665474674514, + "loss": 1.5354, + "step": 7786 + }, + { + "epoch": 0.8214135021097047, + "grad_norm": 0.4705195128917694, + "learning_rate": 0.00015724814564569765, + "loss": 1.5337, + "step": 7787 + }, + { + "epoch": 0.8215189873417722, + "grad_norm": 0.4969990849494934, + "learning_rate": 0.00015703004674133498, + "loss": 1.4949, + "step": 7788 + }, + { + "epoch": 0.8216244725738396, + "grad_norm": 0.4627792537212372, + "learning_rate": 0.00015681225033422526, + "loss": 1.4784, + "step": 7789 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.41960787773132324, + "learning_rate": 0.00015659475600481292, + "loss": 1.4926, + "step": 7790 + }, + { + "epoch": 0.8218354430379747, + "grad_norm": 0.48010876774787903, + "learning_rate": 0.00015637756333412454, + "loss": 1.5055, + "step": 7791 + }, + { + "epoch": 0.8219409282700422, + "grad_norm": 0.4434579312801361, + "learning_rate": 0.0001561606719037676, + "loss": 1.544, + "step": 7792 + }, + { + "epoch": 0.8220464135021097, + "grad_norm": 0.47622019052505493, + "learning_rate": 0.00015594408129592993, + "loss": 1.5105, + "step": 7793 + }, + { + "epoch": 0.8221518987341773, + "grad_norm": 0.511711061000824, + "learning_rate": 0.00015572779109337888, + "loss": 1.4681, + "step": 7794 + }, + { + "epoch": 0.8222573839662447, + "grad_norm": 0.43236175179481506, + "learning_rate": 0.00015551180087946046, + "loss": 1.4648, + "step": 7795 + }, + { + "epoch": 0.8223628691983123, + "grad_norm": 0.39645713567733765, + "learning_rate": 0.00015529611023809868, + "loss": 1.5043, + "step": 7796 + }, + { + "epoch": 0.8224683544303798, + "grad_norm": 0.45807018876075745, + "learning_rate": 0.00015508071875379448, + "loss": 1.4905, + "step": 7797 + }, + { + "epoch": 0.8225738396624472, + "grad_norm": 0.44831138849258423, + "learning_rate": 0.00015486562601162512, + "loss": 1.4911, + "step": 7798 + }, + { + "epoch": 0.8226793248945148, + "grad_norm": 0.45922088623046875, + "learning_rate": 0.00015465083159724345, + "loss": 1.5301, + "step": 7799 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.42836442589759827, + "learning_rate": 0.00015443633509687688, + "loss": 1.4681, + "step": 7800 + }, + { + "epoch": 0.8228902953586498, + "grad_norm": 0.45813196897506714, + "learning_rate": 0.0001542221360973268, + "loss": 1.5095, + "step": 7801 + }, + { + "epoch": 0.8229957805907173, + "grad_norm": 0.44777148962020874, + "learning_rate": 0.00015400823418596765, + "loss": 1.4879, + "step": 7802 + }, + { + "epoch": 0.8231012658227848, + "grad_norm": 0.48104265332221985, + "learning_rate": 0.0001537946289507462, + "loss": 1.4895, + "step": 7803 + }, + { + "epoch": 0.8232067510548523, + "grad_norm": 0.46618038415908813, + "learning_rate": 0.00015358131998018072, + "loss": 1.5082, + "step": 7804 + }, + { + "epoch": 0.8233122362869199, + "grad_norm": 0.42976656556129456, + "learning_rate": 0.00015336830686336008, + "loss": 1.466, + "step": 7805 + }, + { + "epoch": 0.8234177215189873, + "grad_norm": 0.44920340180397034, + "learning_rate": 0.00015315558918994331, + "loss": 1.5368, + "step": 7806 + }, + { + "epoch": 0.8235232067510548, + "grad_norm": 0.47994476556777954, + "learning_rate": 0.0001529431665501584, + "loss": 1.5114, + "step": 7807 + }, + { + "epoch": 0.8236286919831224, + "grad_norm": 0.43936634063720703, + "learning_rate": 0.0001527310385348017, + "loss": 1.4638, + "step": 7808 + }, + { + "epoch": 0.8237341772151898, + "grad_norm": 0.4491763412952423, + "learning_rate": 0.0001525192047352371, + "loss": 1.5098, + "step": 7809 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.43240979313850403, + "learning_rate": 0.00015230766474339536, + "loss": 1.4836, + "step": 7810 + }, + { + "epoch": 0.8239451476793249, + "grad_norm": 0.4447120428085327, + "learning_rate": 0.0001520964181517731, + "loss": 1.4735, + "step": 7811 + }, + { + "epoch": 0.8240506329113924, + "grad_norm": 0.44551825523376465, + "learning_rate": 0.00015188546455343228, + "loss": 1.5006, + "step": 7812 + }, + { + "epoch": 0.8241561181434599, + "grad_norm": 0.42466315627098083, + "learning_rate": 0.00015167480354199908, + "loss": 1.4889, + "step": 7813 + }, + { + "epoch": 0.8242616033755275, + "grad_norm": 0.47459515929222107, + "learning_rate": 0.00015146443471166345, + "loss": 1.5504, + "step": 7814 + }, + { + "epoch": 0.8243670886075949, + "grad_norm": 0.4613172113895416, + "learning_rate": 0.00015125435765717815, + "loss": 1.5016, + "step": 7815 + }, + { + "epoch": 0.8244725738396624, + "grad_norm": 0.43431389331817627, + "learning_rate": 0.000151044571973858, + "loss": 1.468, + "step": 7816 + }, + { + "epoch": 0.82457805907173, + "grad_norm": 0.44141438603401184, + "learning_rate": 0.00015083507725757912, + "loss": 1.4772, + "step": 7817 + }, + { + "epoch": 0.8246835443037974, + "grad_norm": 0.4615953862667084, + "learning_rate": 0.00015062587310477813, + "loss": 1.5334, + "step": 7818 + }, + { + "epoch": 0.824789029535865, + "grad_norm": 0.413897305727005, + "learning_rate": 0.00015041695911245136, + "loss": 1.511, + "step": 7819 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.42081931233406067, + "learning_rate": 0.00015020833487815416, + "loss": 1.5277, + "step": 7820 + }, + { + "epoch": 0.825, + "grad_norm": 0.46726006269454956, + "learning_rate": 0.00015000000000000001, + "loss": 1.5148, + "step": 7821 + }, + { + "epoch": 0.8251054852320675, + "grad_norm": 0.4627990424633026, + "learning_rate": 0.00014979195407665975, + "loss": 1.4826, + "step": 7822 + }, + { + "epoch": 0.825210970464135, + "grad_norm": 0.4164053797721863, + "learning_rate": 0.000149584196707361, + "loss": 1.4979, + "step": 7823 + }, + { + "epoch": 0.8253164556962025, + "grad_norm": 0.4177986681461334, + "learning_rate": 0.00014937672749188704, + "loss": 1.4729, + "step": 7824 + }, + { + "epoch": 0.82542194092827, + "grad_norm": 0.4489454925060272, + "learning_rate": 0.0001491695460305765, + "loss": 1.4819, + "step": 7825 + }, + { + "epoch": 0.8255274261603376, + "grad_norm": 0.45009520649909973, + "learning_rate": 0.00014896265192432194, + "loss": 1.4989, + "step": 7826 + }, + { + "epoch": 0.825632911392405, + "grad_norm": 0.4520403742790222, + "learning_rate": 0.00014875604477456987, + "loss": 1.5242, + "step": 7827 + }, + { + "epoch": 0.8257383966244726, + "grad_norm": 0.4293029010295868, + "learning_rate": 0.00014854972418331948, + "loss": 1.508, + "step": 7828 + }, + { + "epoch": 0.8258438818565401, + "grad_norm": 0.4253372251987457, + "learning_rate": 0.00014834368975312172, + "loss": 1.5103, + "step": 7829 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.4367186725139618, + "learning_rate": 0.0001481379410870792, + "loss": 1.4541, + "step": 7830 + }, + { + "epoch": 0.8260548523206751, + "grad_norm": 0.4056040346622467, + "learning_rate": 0.00014793247778884463, + "loss": 1.5022, + "step": 7831 + }, + { + "epoch": 0.8261603375527427, + "grad_norm": 0.43779054284095764, + "learning_rate": 0.00014772729946262067, + "loss": 1.4817, + "step": 7832 + }, + { + "epoch": 0.8262658227848101, + "grad_norm": 0.4522009491920471, + "learning_rate": 0.00014752240571315898, + "loss": 1.4895, + "step": 7833 + }, + { + "epoch": 0.8263713080168776, + "grad_norm": 0.4671722650527954, + "learning_rate": 0.00014731779614575915, + "loss": 1.4807, + "step": 7834 + }, + { + "epoch": 0.8264767932489452, + "grad_norm": 0.45991694927215576, + "learning_rate": 0.00014711347036626857, + "loss": 1.4628, + "step": 7835 + }, + { + "epoch": 0.8265822784810126, + "grad_norm": 0.4185478985309601, + "learning_rate": 0.00014690942798108097, + "loss": 1.4468, + "step": 7836 + }, + { + "epoch": 0.8266877637130802, + "grad_norm": 0.4673352539539337, + "learning_rate": 0.00014670566859713622, + "loss": 1.5137, + "step": 7837 + }, + { + "epoch": 0.8267932489451477, + "grad_norm": 0.5092229843139648, + "learning_rate": 0.00014650219182191934, + "loss": 1.5215, + "step": 7838 + }, + { + "epoch": 0.8268987341772152, + "grad_norm": 0.4105064570903778, + "learning_rate": 0.00014629899726345958, + "loss": 1.4216, + "step": 7839 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.47240203619003296, + "learning_rate": 0.00014609608453033013, + "loss": 1.4881, + "step": 7840 + }, + { + "epoch": 0.8271097046413503, + "grad_norm": 0.4400210976600647, + "learning_rate": 0.0001458934532316467, + "loss": 1.5094, + "step": 7841 + }, + { + "epoch": 0.8272151898734177, + "grad_norm": 0.45975586771965027, + "learning_rate": 0.0001456911029770675, + "loss": 1.4988, + "step": 7842 + }, + { + "epoch": 0.8273206751054852, + "grad_norm": 0.4276962876319885, + "learning_rate": 0.00014548903337679208, + "loss": 1.5001, + "step": 7843 + }, + { + "epoch": 0.8274261603375528, + "grad_norm": 0.47978460788726807, + "learning_rate": 0.0001452872440415604, + "loss": 1.4588, + "step": 7844 + }, + { + "epoch": 0.8275316455696202, + "grad_norm": 0.49116069078445435, + "learning_rate": 0.00014508573458265245, + "loss": 1.4796, + "step": 7845 + }, + { + "epoch": 0.8276371308016878, + "grad_norm": 0.4531501829624176, + "learning_rate": 0.00014488450461188753, + "loss": 1.496, + "step": 7846 + }, + { + "epoch": 0.8277426160337553, + "grad_norm": 0.47048357129096985, + "learning_rate": 0.000144683553741623, + "loss": 1.5074, + "step": 7847 + }, + { + "epoch": 0.8278481012658228, + "grad_norm": 0.5059932470321655, + "learning_rate": 0.00014448288158475423, + "loss": 1.5094, + "step": 7848 + }, + { + "epoch": 0.8279535864978903, + "grad_norm": 0.4376550614833832, + "learning_rate": 0.00014428248775471316, + "loss": 1.4607, + "step": 7849 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.4301762878894806, + "learning_rate": 0.00014408237186546807, + "loss": 1.4945, + "step": 7850 + }, + { + "epoch": 0.8281645569620253, + "grad_norm": 0.421731561422348, + "learning_rate": 0.00014388253353152277, + "loss": 1.5044, + "step": 7851 + }, + { + "epoch": 0.8282700421940928, + "grad_norm": 0.4436945915222168, + "learning_rate": 0.00014368297236791545, + "loss": 1.4603, + "step": 7852 + }, + { + "epoch": 0.8283755274261604, + "grad_norm": 0.5222391486167908, + "learning_rate": 0.00014348368799021845, + "loss": 1.4921, + "step": 7853 + }, + { + "epoch": 0.8284810126582278, + "grad_norm": 0.45426493883132935, + "learning_rate": 0.00014328468001453717, + "loss": 1.5011, + "step": 7854 + }, + { + "epoch": 0.8285864978902954, + "grad_norm": 0.49810025095939636, + "learning_rate": 0.00014308594805750958, + "loss": 1.4833, + "step": 7855 + }, + { + "epoch": 0.8286919831223629, + "grad_norm": 0.45834463834762573, + "learning_rate": 0.0001428874917363054, + "loss": 1.4696, + "step": 7856 + }, + { + "epoch": 0.8287974683544304, + "grad_norm": 0.43906545639038086, + "learning_rate": 0.000142689310668625, + "loss": 1.4697, + "step": 7857 + }, + { + "epoch": 0.8289029535864979, + "grad_norm": 0.5387390851974487, + "learning_rate": 0.0001424914044726995, + "loss": 1.4665, + "step": 7858 + }, + { + "epoch": 0.8290084388185655, + "grad_norm": 0.45019060373306274, + "learning_rate": 0.000142293772767289, + "loss": 1.4622, + "step": 7859 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.4874425232410431, + "learning_rate": 0.00014209641517168273, + "loss": 1.4639, + "step": 7860 + }, + { + "epoch": 0.8292194092827004, + "grad_norm": 0.4442334473133087, + "learning_rate": 0.0001418993313056979, + "loss": 1.5061, + "step": 7861 + }, + { + "epoch": 0.829324894514768, + "grad_norm": 0.4124840199947357, + "learning_rate": 0.0001417025207896788, + "loss": 1.5127, + "step": 7862 + }, + { + "epoch": 0.8294303797468354, + "grad_norm": 0.4346408545970917, + "learning_rate": 0.00014150598324449667, + "loss": 1.4777, + "step": 7863 + }, + { + "epoch": 0.829535864978903, + "grad_norm": 0.44085627794265747, + "learning_rate": 0.0001413097182915482, + "loss": 1.5012, + "step": 7864 + }, + { + "epoch": 0.8296413502109705, + "grad_norm": 0.49189162254333496, + "learning_rate": 0.0001411137255527554, + "loss": 1.4575, + "step": 7865 + }, + { + "epoch": 0.829746835443038, + "grad_norm": 0.42766907811164856, + "learning_rate": 0.00014091800465056476, + "loss": 1.5303, + "step": 7866 + }, + { + "epoch": 0.8298523206751055, + "grad_norm": 0.4196573793888092, + "learning_rate": 0.00014072255520794613, + "loss": 1.4627, + "step": 7867 + }, + { + "epoch": 0.8299578059071729, + "grad_norm": 0.49347805976867676, + "learning_rate": 0.0001405273768483926, + "loss": 1.4547, + "step": 7868 + }, + { + "epoch": 0.8300632911392405, + "grad_norm": 0.4952002167701721, + "learning_rate": 0.00014033246919591922, + "loss": 1.5215, + "step": 7869 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.46306660771369934, + "learning_rate": 0.00014013783187506265, + "loss": 1.497, + "step": 7870 + }, + { + "epoch": 0.8302742616033755, + "grad_norm": 0.5201023817062378, + "learning_rate": 0.00013994346451088037, + "loss": 1.5562, + "step": 7871 + }, + { + "epoch": 0.830379746835443, + "grad_norm": 0.4886961579322815, + "learning_rate": 0.00013974936672894972, + "loss": 1.5095, + "step": 7872 + }, + { + "epoch": 0.8304852320675106, + "grad_norm": 0.44482308626174927, + "learning_rate": 0.0001395555381553675, + "loss": 1.4824, + "step": 7873 + }, + { + "epoch": 0.830590717299578, + "grad_norm": 0.43957915902137756, + "learning_rate": 0.00013936197841674894, + "loss": 1.5163, + "step": 7874 + }, + { + "epoch": 0.8306962025316456, + "grad_norm": 0.42717117071151733, + "learning_rate": 0.00013916868714022735, + "loss": 1.4975, + "step": 7875 + }, + { + "epoch": 0.8308016877637131, + "grad_norm": 0.4543043076992035, + "learning_rate": 0.00013897566395345315, + "loss": 1.5152, + "step": 7876 + }, + { + "epoch": 0.8309071729957805, + "grad_norm": 0.47359344363212585, + "learning_rate": 0.000138782908484593, + "loss": 1.5193, + "step": 7877 + }, + { + "epoch": 0.8310126582278481, + "grad_norm": 0.4322127401828766, + "learning_rate": 0.0001385904203623296, + "loss": 1.5129, + "step": 7878 + }, + { + "epoch": 0.8311181434599156, + "grad_norm": 0.4316220283508301, + "learning_rate": 0.00013839819921586025, + "loss": 1.5055, + "step": 7879 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.4473347067832947, + "learning_rate": 0.00013820624467489697, + "loss": 1.4959, + "step": 7880 + }, + { + "epoch": 0.8313291139240506, + "grad_norm": 0.47121867537498474, + "learning_rate": 0.00013801455636966518, + "loss": 1.4996, + "step": 7881 + }, + { + "epoch": 0.8314345991561182, + "grad_norm": 0.44001927971839905, + "learning_rate": 0.00013782313393090301, + "loss": 1.4859, + "step": 7882 + }, + { + "epoch": 0.8315400843881856, + "grad_norm": 0.46005189418792725, + "learning_rate": 0.00013763197698986107, + "loss": 1.4819, + "step": 7883 + }, + { + "epoch": 0.8316455696202532, + "grad_norm": 0.5089910626411438, + "learning_rate": 0.00013744108517830105, + "loss": 1.548, + "step": 7884 + }, + { + "epoch": 0.8317510548523207, + "grad_norm": 0.4660554826259613, + "learning_rate": 0.00013725045812849567, + "loss": 1.5342, + "step": 7885 + }, + { + "epoch": 0.8318565400843881, + "grad_norm": 0.4434336721897125, + "learning_rate": 0.00013706009547322762, + "loss": 1.5392, + "step": 7886 + }, + { + "epoch": 0.8319620253164557, + "grad_norm": 0.4478782117366791, + "learning_rate": 0.00013686999684578871, + "loss": 1.5216, + "step": 7887 + }, + { + "epoch": 0.8320675105485232, + "grad_norm": 0.4423481523990631, + "learning_rate": 0.00013668016187997964, + "loss": 1.4767, + "step": 7888 + }, + { + "epoch": 0.8321729957805907, + "grad_norm": 0.4352787733078003, + "learning_rate": 0.00013649059021010894, + "loss": 1.4948, + "step": 7889 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.4673626720905304, + "learning_rate": 0.00013630128147099213, + "loss": 1.5057, + "step": 7890 + }, + { + "epoch": 0.8323839662447258, + "grad_norm": 0.42983680963516235, + "learning_rate": 0.00013611223529795156, + "loss": 1.4659, + "step": 7891 + }, + { + "epoch": 0.8324894514767932, + "grad_norm": 0.41277554631233215, + "learning_rate": 0.00013592345132681512, + "loss": 1.4549, + "step": 7892 + }, + { + "epoch": 0.8325949367088608, + "grad_norm": 0.41870588064193726, + "learning_rate": 0.0001357349291939159, + "loss": 1.4922, + "step": 7893 + }, + { + "epoch": 0.8327004219409283, + "grad_norm": 0.4255370497703552, + "learning_rate": 0.00013554666853609146, + "loss": 1.469, + "step": 7894 + }, + { + "epoch": 0.8328059071729957, + "grad_norm": 0.52479088306427, + "learning_rate": 0.0001353586689906829, + "loss": 1.5346, + "step": 7895 + }, + { + "epoch": 0.8329113924050633, + "grad_norm": 0.4748759865760803, + "learning_rate": 0.00013517093019553444, + "loss": 1.4955, + "step": 7896 + }, + { + "epoch": 0.8330168776371308, + "grad_norm": 0.44746047258377075, + "learning_rate": 0.00013498345178899248, + "loss": 1.4736, + "step": 7897 + }, + { + "epoch": 0.8331223628691983, + "grad_norm": 0.466625452041626, + "learning_rate": 0.0001347962334099052, + "loss": 1.5018, + "step": 7898 + }, + { + "epoch": 0.8332278481012658, + "grad_norm": 0.5671567320823669, + "learning_rate": 0.00013460927469762155, + "loss": 1.4827, + "step": 7899 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.42769524455070496, + "learning_rate": 0.00013442257529199068, + "loss": 1.5205, + "step": 7900 + }, + { + "epoch": 0.8334388185654008, + "grad_norm": 0.48645901679992676, + "learning_rate": 0.00013423613483336144, + "loss": 1.4899, + "step": 7901 + }, + { + "epoch": 0.8335443037974684, + "grad_norm": 0.544087827205658, + "learning_rate": 0.00013404995296258118, + "loss": 1.5081, + "step": 7902 + }, + { + "epoch": 0.8336497890295359, + "grad_norm": 0.5929046869277954, + "learning_rate": 0.00013386402932099572, + "loss": 1.456, + "step": 7903 + }, + { + "epoch": 0.8337552742616033, + "grad_norm": 0.4338153004646301, + "learning_rate": 0.0001336783635504482, + "loss": 1.4701, + "step": 7904 + }, + { + "epoch": 0.8338607594936709, + "grad_norm": 0.49112027883529663, + "learning_rate": 0.00013349295529327843, + "loss": 1.5106, + "step": 7905 + }, + { + "epoch": 0.8339662447257384, + "grad_norm": 0.5577452778816223, + "learning_rate": 0.00013330780419232245, + "loss": 1.5185, + "step": 7906 + }, + { + "epoch": 0.8340717299578059, + "grad_norm": 0.5064457654953003, + "learning_rate": 0.0001331229098909114, + "loss": 1.4973, + "step": 7907 + }, + { + "epoch": 0.8341772151898734, + "grad_norm": 0.5183221697807312, + "learning_rate": 0.00013293827203287141, + "loss": 1.5074, + "step": 7908 + }, + { + "epoch": 0.834282700421941, + "grad_norm": 0.5224149227142334, + "learning_rate": 0.00013275389026252255, + "loss": 1.4958, + "step": 7909 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.5373936891555786, + "learning_rate": 0.00013256976422467803, + "loss": 1.5091, + "step": 7910 + }, + { + "epoch": 0.834493670886076, + "grad_norm": 0.5740803480148315, + "learning_rate": 0.00013238589356464394, + "loss": 1.4995, + "step": 7911 + }, + { + "epoch": 0.8345991561181435, + "grad_norm": 0.45745906233787537, + "learning_rate": 0.00013220227792821806, + "loss": 1.4902, + "step": 7912 + }, + { + "epoch": 0.8347046413502109, + "grad_norm": 0.475676029920578, + "learning_rate": 0.00013201891696168963, + "loss": 1.5058, + "step": 7913 + }, + { + "epoch": 0.8348101265822785, + "grad_norm": 0.46853286027908325, + "learning_rate": 0.00013183581031183853, + "loss": 1.4598, + "step": 7914 + }, + { + "epoch": 0.834915611814346, + "grad_norm": 0.5400652289390564, + "learning_rate": 0.00013165295762593426, + "loss": 1.5235, + "step": 7915 + }, + { + "epoch": 0.8350210970464135, + "grad_norm": 0.4536226987838745, + "learning_rate": 0.0001314703585517359, + "loss": 1.4968, + "step": 7916 + }, + { + "epoch": 0.835126582278481, + "grad_norm": 0.42296475172042847, + "learning_rate": 0.00013128801273749074, + "loss": 1.4985, + "step": 7917 + }, + { + "epoch": 0.8352320675105486, + "grad_norm": 0.4564037024974823, + "learning_rate": 0.00013110591983193424, + "loss": 1.4992, + "step": 7918 + }, + { + "epoch": 0.835337552742616, + "grad_norm": 0.5482998490333557, + "learning_rate": 0.0001309240794842889, + "loss": 1.4901, + "step": 7919 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.5197405815124512, + "learning_rate": 0.00013074249134426366, + "loss": 1.5032, + "step": 7920 + }, + { + "epoch": 0.8355485232067511, + "grad_norm": 0.4468212127685547, + "learning_rate": 0.00013056115506205357, + "loss": 1.5176, + "step": 7921 + }, + { + "epoch": 0.8356540084388185, + "grad_norm": 0.6020689010620117, + "learning_rate": 0.00013038007028833853, + "loss": 1.508, + "step": 7922 + }, + { + "epoch": 0.8357594936708861, + "grad_norm": 0.5337532758712769, + "learning_rate": 0.00013019923667428315, + "loss": 1.5274, + "step": 7923 + }, + { + "epoch": 0.8358649789029536, + "grad_norm": 0.5087836980819702, + "learning_rate": 0.0001300186538715359, + "loss": 1.5086, + "step": 7924 + }, + { + "epoch": 0.8359704641350211, + "grad_norm": 0.4363085627555847, + "learning_rate": 0.00012983832153222816, + "loss": 1.4885, + "step": 7925 + }, + { + "epoch": 0.8360759493670886, + "grad_norm": 0.5247862339019775, + "learning_rate": 0.00012965823930897406, + "loss": 1.5262, + "step": 7926 + }, + { + "epoch": 0.8361814345991562, + "grad_norm": 0.4489036798477173, + "learning_rate": 0.00012947840685486933, + "loss": 1.4673, + "step": 7927 + }, + { + "epoch": 0.8362869198312236, + "grad_norm": 0.4503890573978424, + "learning_rate": 0.00012929882382349103, + "loss": 1.5023, + "step": 7928 + }, + { + "epoch": 0.8363924050632912, + "grad_norm": 0.4541943073272705, + "learning_rate": 0.00012911948986889664, + "loss": 1.5163, + "step": 7929 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.46518415212631226, + "learning_rate": 0.0001289404046456233, + "loss": 1.4874, + "step": 7930 + }, + { + "epoch": 0.8366033755274261, + "grad_norm": 0.5406955480575562, + "learning_rate": 0.00012876156780868752, + "loss": 1.5014, + "step": 7931 + }, + { + "epoch": 0.8367088607594937, + "grad_norm": 0.4911135733127594, + "learning_rate": 0.00012858297901358425, + "loss": 1.4872, + "step": 7932 + }, + { + "epoch": 0.8368143459915611, + "grad_norm": 0.44557490944862366, + "learning_rate": 0.0001284046379162861, + "loss": 1.5097, + "step": 7933 + }, + { + "epoch": 0.8369198312236287, + "grad_norm": 0.47949543595314026, + "learning_rate": 0.00012822654417324306, + "loss": 1.499, + "step": 7934 + }, + { + "epoch": 0.8370253164556962, + "grad_norm": 0.4387923777103424, + "learning_rate": 0.00012804869744138136, + "loss": 1.4771, + "step": 7935 + }, + { + "epoch": 0.8371308016877637, + "grad_norm": 0.43199142813682556, + "learning_rate": 0.0001278710973781033, + "loss": 1.4906, + "step": 7936 + }, + { + "epoch": 0.8372362869198312, + "grad_norm": 0.44868308305740356, + "learning_rate": 0.00012769374364128628, + "loss": 1.4728, + "step": 7937 + }, + { + "epoch": 0.8373417721518988, + "grad_norm": 0.45661893486976624, + "learning_rate": 0.0001275166358892821, + "loss": 1.4951, + "step": 7938 + }, + { + "epoch": 0.8374472573839662, + "grad_norm": 0.4271854758262634, + "learning_rate": 0.00012733977378091664, + "loss": 1.5025, + "step": 7939 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.4594980478286743, + "learning_rate": 0.0001271631569754887, + "loss": 1.5093, + "step": 7940 + }, + { + "epoch": 0.8376582278481013, + "grad_norm": 0.4268159866333008, + "learning_rate": 0.00012698678513276985, + "loss": 1.5065, + "step": 7941 + }, + { + "epoch": 0.8377637130801687, + "grad_norm": 0.4283410608768463, + "learning_rate": 0.00012681065791300352, + "loss": 1.5137, + "step": 7942 + }, + { + "epoch": 0.8378691983122363, + "grad_norm": 0.4050031006336212, + "learning_rate": 0.00012663477497690422, + "loss": 1.4993, + "step": 7943 + }, + { + "epoch": 0.8379746835443038, + "grad_norm": 0.4224381446838379, + "learning_rate": 0.0001264591359856572, + "loss": 1.5073, + "step": 7944 + }, + { + "epoch": 0.8380801687763713, + "grad_norm": 0.4567984938621521, + "learning_rate": 0.00012628374060091757, + "loss": 1.4819, + "step": 7945 + }, + { + "epoch": 0.8381856540084388, + "grad_norm": 0.45892590284347534, + "learning_rate": 0.00012610858848480968, + "loss": 1.5071, + "step": 7946 + }, + { + "epoch": 0.8382911392405064, + "grad_norm": 0.41472315788269043, + "learning_rate": 0.00012593367929992667, + "loss": 1.5063, + "step": 7947 + }, + { + "epoch": 0.8383966244725738, + "grad_norm": 0.42245355248451233, + "learning_rate": 0.00012575901270932944, + "loss": 1.4663, + "step": 7948 + }, + { + "epoch": 0.8385021097046413, + "grad_norm": 0.4199659526348114, + "learning_rate": 0.00012558458837654633, + "loss": 1.497, + "step": 7949 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.43542659282684326, + "learning_rate": 0.0001254104059655723, + "loss": 1.5093, + "step": 7950 + }, + { + "epoch": 0.8387130801687763, + "grad_norm": 0.4467887580394745, + "learning_rate": 0.0001252364651408684, + "loss": 1.5066, + "step": 7951 + }, + { + "epoch": 0.8388185654008439, + "grad_norm": 0.42111241817474365, + "learning_rate": 0.00012506276556736108, + "loss": 1.4532, + "step": 7952 + }, + { + "epoch": 0.8389240506329114, + "grad_norm": 0.42740586400032043, + "learning_rate": 0.00012488930691044144, + "loss": 1.4988, + "step": 7953 + }, + { + "epoch": 0.8390295358649789, + "grad_norm": 0.3941822350025177, + "learning_rate": 0.00012471608883596476, + "loss": 1.4486, + "step": 7954 + }, + { + "epoch": 0.8391350210970464, + "grad_norm": 0.41999900341033936, + "learning_rate": 0.00012454311101024967, + "loss": 1.466, + "step": 7955 + }, + { + "epoch": 0.839240506329114, + "grad_norm": 0.4354103207588196, + "learning_rate": 0.0001243703731000777, + "loss": 1.4809, + "step": 7956 + }, + { + "epoch": 0.8393459915611814, + "grad_norm": 0.4210897386074066, + "learning_rate": 0.00012419787477269256, + "loss": 1.4956, + "step": 7957 + }, + { + "epoch": 0.8394514767932489, + "grad_norm": 0.43319812417030334, + "learning_rate": 0.00012402561569579935, + "loss": 1.5169, + "step": 7958 + }, + { + "epoch": 0.8395569620253165, + "grad_norm": 0.42039015889167786, + "learning_rate": 0.00012385359553756422, + "loss": 1.5027, + "step": 7959 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.41190609335899353, + "learning_rate": 0.00012368181396661337, + "loss": 1.4953, + "step": 7960 + }, + { + "epoch": 0.8397679324894515, + "grad_norm": 0.45963868498802185, + "learning_rate": 0.00012351027065203284, + "loss": 1.459, + "step": 7961 + }, + { + "epoch": 0.839873417721519, + "grad_norm": 0.4377284646034241, + "learning_rate": 0.00012333896526336752, + "loss": 1.4867, + "step": 7962 + }, + { + "epoch": 0.8399789029535865, + "grad_norm": 0.4265303909778595, + "learning_rate": 0.0001231678974706205, + "loss": 1.4896, + "step": 7963 + }, + { + "epoch": 0.840084388185654, + "grad_norm": 0.42678651213645935, + "learning_rate": 0.00012299706694425286, + "loss": 1.5024, + "step": 7964 + }, + { + "epoch": 0.8401898734177216, + "grad_norm": 0.46492063999176025, + "learning_rate": 0.00012282647335518248, + "loss": 1.4917, + "step": 7965 + }, + { + "epoch": 0.840295358649789, + "grad_norm": 0.4300294518470764, + "learning_rate": 0.00012265611637478376, + "loss": 1.4924, + "step": 7966 + }, + { + "epoch": 0.8404008438818565, + "grad_norm": 0.4165033996105194, + "learning_rate": 0.00012248599567488697, + "loss": 1.4996, + "step": 7967 + }, + { + "epoch": 0.8405063291139241, + "grad_norm": 0.47845935821533203, + "learning_rate": 0.00012231611092777743, + "loss": 1.5015, + "step": 7968 + }, + { + "epoch": 0.8406118143459915, + "grad_norm": 0.414779931306839, + "learning_rate": 0.0001221464618061951, + "loss": 1.4643, + "step": 7969 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.4585050940513611, + "learning_rate": 0.00012197704798333364, + "loss": 1.4804, + "step": 7970 + }, + { + "epoch": 0.8408227848101266, + "grad_norm": 0.5141719579696655, + "learning_rate": 0.00012180786913284024, + "loss": 1.5097, + "step": 7971 + }, + { + "epoch": 0.8409282700421941, + "grad_norm": 0.4613543152809143, + "learning_rate": 0.00012163892492881463, + "loss": 1.5212, + "step": 7972 + }, + { + "epoch": 0.8410337552742616, + "grad_norm": 0.4344992935657501, + "learning_rate": 0.00012147021504580839, + "loss": 1.4745, + "step": 7973 + }, + { + "epoch": 0.8411392405063292, + "grad_norm": 0.47057846188545227, + "learning_rate": 0.00012130173915882475, + "loss": 1.4612, + "step": 7974 + }, + { + "epoch": 0.8412447257383966, + "grad_norm": 0.46060535311698914, + "learning_rate": 0.00012113349694331764, + "loss": 1.4989, + "step": 7975 + }, + { + "epoch": 0.8413502109704641, + "grad_norm": 0.4393010139465332, + "learning_rate": 0.0001209654880751909, + "loss": 1.4976, + "step": 7976 + }, + { + "epoch": 0.8414556962025317, + "grad_norm": 0.4202762246131897, + "learning_rate": 0.00012079771223079825, + "loss": 1.4643, + "step": 7977 + }, + { + "epoch": 0.8415611814345991, + "grad_norm": 0.3892786204814911, + "learning_rate": 0.00012063016908694192, + "loss": 1.4769, + "step": 7978 + }, + { + "epoch": 0.8416666666666667, + "grad_norm": 0.4344770908355713, + "learning_rate": 0.0001204628583208727, + "loss": 1.4901, + "step": 7979 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.4278308153152466, + "learning_rate": 0.00012029577961028894, + "loss": 1.4604, + "step": 7980 + }, + { + "epoch": 0.8418776371308017, + "grad_norm": 0.4394586980342865, + "learning_rate": 0.00012012893263333586, + "loss": 1.4804, + "step": 7981 + }, + { + "epoch": 0.8419831223628692, + "grad_norm": 0.44682395458221436, + "learning_rate": 0.00011996231706860537, + "loss": 1.4723, + "step": 7982 + }, + { + "epoch": 0.8420886075949368, + "grad_norm": 0.4192502498626709, + "learning_rate": 0.00011979593259513487, + "loss": 1.5238, + "step": 7983 + }, + { + "epoch": 0.8421940928270042, + "grad_norm": 0.4484304189682007, + "learning_rate": 0.00011962977889240712, + "loss": 1.4605, + "step": 7984 + }, + { + "epoch": 0.8422995780590717, + "grad_norm": 0.43339478969573975, + "learning_rate": 0.00011946385564034942, + "loss": 1.5264, + "step": 7985 + }, + { + "epoch": 0.8424050632911393, + "grad_norm": 0.4308030605316162, + "learning_rate": 0.00011929816251933285, + "loss": 1.5039, + "step": 7986 + }, + { + "epoch": 0.8425105485232067, + "grad_norm": 0.48961499333381653, + "learning_rate": 0.00011913269921017203, + "loss": 1.5141, + "step": 7987 + }, + { + "epoch": 0.8426160337552743, + "grad_norm": 0.46717384457588196, + "learning_rate": 0.00011896746539412405, + "loss": 1.5326, + "step": 7988 + }, + { + "epoch": 0.8427215189873418, + "grad_norm": 0.4560268521308899, + "learning_rate": 0.00011880246075288824, + "loss": 1.4736, + "step": 7989 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.4474226236343384, + "learning_rate": 0.00011863768496860542, + "loss": 1.493, + "step": 7990 + }, + { + "epoch": 0.8429324894514768, + "grad_norm": 0.42042869329452515, + "learning_rate": 0.00011847313772385713, + "loss": 1.4915, + "step": 7991 + }, + { + "epoch": 0.8430379746835444, + "grad_norm": 0.434748113155365, + "learning_rate": 0.00011830881870166533, + "loss": 1.4714, + "step": 7992 + }, + { + "epoch": 0.8431434599156118, + "grad_norm": 0.4294203221797943, + "learning_rate": 0.00011814472758549143, + "loss": 1.5106, + "step": 7993 + }, + { + "epoch": 0.8432489451476793, + "grad_norm": 0.4985603094100952, + "learning_rate": 0.00011798086405923605, + "loss": 1.4789, + "step": 7994 + }, + { + "epoch": 0.8433544303797469, + "grad_norm": 0.4327056407928467, + "learning_rate": 0.0001178172278072382, + "loss": 1.4834, + "step": 7995 + }, + { + "epoch": 0.8434599156118143, + "grad_norm": 0.44394436478614807, + "learning_rate": 0.00011765381851427454, + "loss": 1.5142, + "step": 7996 + }, + { + "epoch": 0.8435654008438819, + "grad_norm": 0.4221540093421936, + "learning_rate": 0.00011749063586555921, + "loss": 1.4887, + "step": 7997 + }, + { + "epoch": 0.8436708860759494, + "grad_norm": 0.4521441161632538, + "learning_rate": 0.00011732767954674264, + "loss": 1.4921, + "step": 7998 + }, + { + "epoch": 0.8437763713080169, + "grad_norm": 0.48578810691833496, + "learning_rate": 0.00011716494924391148, + "loss": 1.4957, + "step": 7999 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.4415311813354492, + "learning_rate": 0.00011700244464358777, + "loss": 1.4783, + "step": 8000 + }, + { + "epoch": 0.8439873417721518, + "grad_norm": 0.4834960401058197, + "learning_rate": 0.00011684016543272816, + "loss": 1.5062, + "step": 8001 + }, + { + "epoch": 0.8440928270042194, + "grad_norm": 0.4440629482269287, + "learning_rate": 0.00011667811129872368, + "loss": 1.5352, + "step": 8002 + }, + { + "epoch": 0.8441983122362869, + "grad_norm": 0.43222010135650635, + "learning_rate": 0.00011651628192939872, + "loss": 1.5149, + "step": 8003 + }, + { + "epoch": 0.8443037974683544, + "grad_norm": 0.5237811207771301, + "learning_rate": 0.0001163546770130109, + "loss": 1.4665, + "step": 8004 + }, + { + "epoch": 0.8444092827004219, + "grad_norm": 0.52041095495224, + "learning_rate": 0.00011619329623825008, + "loss": 1.4809, + "step": 8005 + }, + { + "epoch": 0.8445147679324895, + "grad_norm": 0.4715472459793091, + "learning_rate": 0.00011603213929423784, + "loss": 1.5065, + "step": 8006 + }, + { + "epoch": 0.8446202531645569, + "grad_norm": 0.4383639395236969, + "learning_rate": 0.0001158712058705271, + "loss": 1.5431, + "step": 8007 + }, + { + "epoch": 0.8447257383966245, + "grad_norm": 0.5159831047058105, + "learning_rate": 0.00011571049565710122, + "loss": 1.4806, + "step": 8008 + }, + { + "epoch": 0.844831223628692, + "grad_norm": 0.41650891304016113, + "learning_rate": 0.00011555000834437363, + "loss": 1.4621, + "step": 8009 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.43178558349609375, + "learning_rate": 0.00011538974362318715, + "loss": 1.4795, + "step": 8010 + }, + { + "epoch": 0.845042194092827, + "grad_norm": 0.40558838844299316, + "learning_rate": 0.00011522970118481326, + "loss": 1.4972, + "step": 8011 + }, + { + "epoch": 0.8451476793248945, + "grad_norm": 0.41141316294670105, + "learning_rate": 0.00011506988072095186, + "loss": 1.4808, + "step": 8012 + }, + { + "epoch": 0.845253164556962, + "grad_norm": 0.41347137093544006, + "learning_rate": 0.00011491028192373022, + "loss": 1.4857, + "step": 8013 + }, + { + "epoch": 0.8453586497890295, + "grad_norm": 0.4615418016910553, + "learning_rate": 0.00011475090448570282, + "loss": 1.4939, + "step": 8014 + }, + { + "epoch": 0.8454641350210971, + "grad_norm": 0.4312148988246918, + "learning_rate": 0.00011459174809985047, + "loss": 1.4876, + "step": 8015 + }, + { + "epoch": 0.8455696202531645, + "grad_norm": 0.41781219840049744, + "learning_rate": 0.00011443281245957975, + "loss": 1.4842, + "step": 8016 + }, + { + "epoch": 0.8456751054852321, + "grad_norm": 0.39488255977630615, + "learning_rate": 0.00011427409725872264, + "loss": 1.4404, + "step": 8017 + }, + { + "epoch": 0.8457805907172996, + "grad_norm": 0.4170038402080536, + "learning_rate": 0.00011411560219153552, + "loss": 1.5243, + "step": 8018 + }, + { + "epoch": 0.845886075949367, + "grad_norm": 0.4558310806751251, + "learning_rate": 0.00011395732695269907, + "loss": 1.5108, + "step": 8019 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.4590003788471222, + "learning_rate": 0.00011379927123731737, + "loss": 1.4902, + "step": 8020 + }, + { + "epoch": 0.8460970464135021, + "grad_norm": 0.41422438621520996, + "learning_rate": 0.00011364143474091725, + "loss": 1.5009, + "step": 8021 + }, + { + "epoch": 0.8462025316455696, + "grad_norm": 0.41619187593460083, + "learning_rate": 0.00011348381715944802, + "loss": 1.4814, + "step": 8022 + }, + { + "epoch": 0.8463080168776371, + "grad_norm": 0.42624762654304504, + "learning_rate": 0.00011332641818928063, + "loss": 1.4605, + "step": 8023 + }, + { + "epoch": 0.8464135021097047, + "grad_norm": 0.4099412262439728, + "learning_rate": 0.00011316923752720708, + "loss": 1.5359, + "step": 8024 + }, + { + "epoch": 0.8465189873417721, + "grad_norm": 0.4438430368900299, + "learning_rate": 0.00011301227487044006, + "loss": 1.5129, + "step": 8025 + }, + { + "epoch": 0.8466244725738397, + "grad_norm": 0.4400119185447693, + "learning_rate": 0.00011285552991661202, + "loss": 1.4865, + "step": 8026 + }, + { + "epoch": 0.8467299578059072, + "grad_norm": 0.4254751205444336, + "learning_rate": 0.00011269900236377497, + "loss": 1.4742, + "step": 8027 + }, + { + "epoch": 0.8468354430379746, + "grad_norm": 0.453046590089798, + "learning_rate": 0.0001125426919103997, + "loss": 1.4691, + "step": 8028 + }, + { + "epoch": 0.8469409282700422, + "grad_norm": 0.4189947545528412, + "learning_rate": 0.00011238659825537507, + "loss": 1.5128, + "step": 8029 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.4589027464389801, + "learning_rate": 0.0001122307210980077, + "loss": 1.4782, + "step": 8030 + }, + { + "epoch": 0.8471518987341772, + "grad_norm": 0.4292374849319458, + "learning_rate": 0.00011207506013802118, + "loss": 1.5112, + "step": 8031 + }, + { + "epoch": 0.8472573839662447, + "grad_norm": 0.4513934254646301, + "learning_rate": 0.00011191961507555564, + "loss": 1.4644, + "step": 8032 + }, + { + "epoch": 0.8473628691983123, + "grad_norm": 0.42146608233451843, + "learning_rate": 0.00011176438561116717, + "loss": 1.5161, + "step": 8033 + }, + { + "epoch": 0.8474683544303797, + "grad_norm": 0.41826725006103516, + "learning_rate": 0.00011160937144582693, + "loss": 1.4671, + "step": 8034 + }, + { + "epoch": 0.8475738396624473, + "grad_norm": 0.5036130547523499, + "learning_rate": 0.00011145457228092119, + "loss": 1.504, + "step": 8035 + }, + { + "epoch": 0.8476793248945148, + "grad_norm": 0.43344515562057495, + "learning_rate": 0.00011129998781824997, + "loss": 1.5119, + "step": 8036 + }, + { + "epoch": 0.8477848101265822, + "grad_norm": 0.44761455059051514, + "learning_rate": 0.00011114561776002725, + "loss": 1.4994, + "step": 8037 + }, + { + "epoch": 0.8478902953586498, + "grad_norm": 0.44356489181518555, + "learning_rate": 0.00011099146180887992, + "loss": 1.5404, + "step": 8038 + }, + { + "epoch": 0.8479957805907173, + "grad_norm": 0.4488217830657959, + "learning_rate": 0.00011083751966784716, + "loss": 1.5245, + "step": 8039 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.40449029207229614, + "learning_rate": 0.00011068379104038026, + "loss": 1.5084, + "step": 8040 + }, + { + "epoch": 0.8482067510548523, + "grad_norm": 0.4726657569408417, + "learning_rate": 0.00011053027563034162, + "loss": 1.4816, + "step": 8041 + }, + { + "epoch": 0.8483122362869199, + "grad_norm": 0.45385703444480896, + "learning_rate": 0.0001103769731420045, + "loss": 1.4382, + "step": 8042 + }, + { + "epoch": 0.8484177215189873, + "grad_norm": 0.469437837600708, + "learning_rate": 0.00011022388328005234, + "loss": 1.5195, + "step": 8043 + }, + { + "epoch": 0.8485232067510549, + "grad_norm": 0.4331592619419098, + "learning_rate": 0.00011007100574957802, + "loss": 1.4807, + "step": 8044 + }, + { + "epoch": 0.8486286919831224, + "grad_norm": 0.44067615270614624, + "learning_rate": 0.00010991834025608366, + "loss": 1.4719, + "step": 8045 + }, + { + "epoch": 0.8487341772151898, + "grad_norm": 0.446806937456131, + "learning_rate": 0.00010976588650547961, + "loss": 1.4813, + "step": 8046 + }, + { + "epoch": 0.8488396624472574, + "grad_norm": 0.4759073853492737, + "learning_rate": 0.00010961364420408429, + "loss": 1.4596, + "step": 8047 + }, + { + "epoch": 0.8489451476793249, + "grad_norm": 0.4522542953491211, + "learning_rate": 0.0001094616130586235, + "loss": 1.4473, + "step": 8048 + }, + { + "epoch": 0.8490506329113924, + "grad_norm": 0.45733293890953064, + "learning_rate": 0.00010930979277622952, + "loss": 1.541, + "step": 8049 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.4479137361049652, + "learning_rate": 0.00010915818306444116, + "loss": 1.5032, + "step": 8050 + }, + { + "epoch": 0.8492616033755275, + "grad_norm": 0.48625561594963074, + "learning_rate": 0.00010900678363120256, + "loss": 1.5059, + "step": 8051 + }, + { + "epoch": 0.8493670886075949, + "grad_norm": 0.44374218583106995, + "learning_rate": 0.00010885559418486318, + "loss": 1.4674, + "step": 8052 + }, + { + "epoch": 0.8494725738396625, + "grad_norm": 0.4311735928058624, + "learning_rate": 0.00010870461443417695, + "loss": 1.5055, + "step": 8053 + }, + { + "epoch": 0.84957805907173, + "grad_norm": 0.42234835028648376, + "learning_rate": 0.00010855384408830159, + "loss": 1.4839, + "step": 8054 + }, + { + "epoch": 0.8496835443037974, + "grad_norm": 0.44683894515037537, + "learning_rate": 0.0001084032828567984, + "loss": 1.509, + "step": 8055 + }, + { + "epoch": 0.849789029535865, + "grad_norm": 0.4713289737701416, + "learning_rate": 0.00010825293044963134, + "loss": 1.4915, + "step": 8056 + }, + { + "epoch": 0.8498945147679325, + "grad_norm": 0.42036235332489014, + "learning_rate": 0.00010810278657716679, + "loss": 1.4966, + "step": 8057 + }, + { + "epoch": 0.85, + "grad_norm": 0.4391077756881714, + "learning_rate": 0.00010795285095017282, + "loss": 1.5194, + "step": 8058 + }, + { + "epoch": 0.8501054852320675, + "grad_norm": 0.4358140826225281, + "learning_rate": 0.00010780312327981853, + "loss": 1.4412, + "step": 8059 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.4823141098022461, + "learning_rate": 0.00010765360327767384, + "loss": 1.4924, + "step": 8060 + }, + { + "epoch": 0.8503164556962025, + "grad_norm": 0.45511600375175476, + "learning_rate": 0.00010750429065570842, + "loss": 1.4973, + "step": 8061 + }, + { + "epoch": 0.8504219409282701, + "grad_norm": 0.4123309552669525, + "learning_rate": 0.0001073551851262917, + "loss": 1.4953, + "step": 8062 + }, + { + "epoch": 0.8505274261603376, + "grad_norm": 0.46833568811416626, + "learning_rate": 0.000107206286402192, + "loss": 1.4532, + "step": 8063 + }, + { + "epoch": 0.850632911392405, + "grad_norm": 0.44981813430786133, + "learning_rate": 0.00010705759419657583, + "loss": 1.5022, + "step": 8064 + }, + { + "epoch": 0.8507383966244726, + "grad_norm": 0.45205560326576233, + "learning_rate": 0.00010690910822300775, + "loss": 1.5429, + "step": 8065 + }, + { + "epoch": 0.85084388185654, + "grad_norm": 0.4144775867462158, + "learning_rate": 0.00010676082819544952, + "loss": 1.4768, + "step": 8066 + }, + { + "epoch": 0.8509493670886076, + "grad_norm": 0.4440843462944031, + "learning_rate": 0.00010661275382825955, + "loss": 1.4812, + "step": 8067 + }, + { + "epoch": 0.8510548523206751, + "grad_norm": 0.4347103238105774, + "learning_rate": 0.00010646488483619263, + "loss": 1.5075, + "step": 8068 + }, + { + "epoch": 0.8511603375527426, + "grad_norm": 0.42615094780921936, + "learning_rate": 0.0001063172209343989, + "loss": 1.5088, + "step": 8069 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.42323145270347595, + "learning_rate": 0.00010616976183842376, + "loss": 1.4937, + "step": 8070 + }, + { + "epoch": 0.8513713080168777, + "grad_norm": 0.4129416346549988, + "learning_rate": 0.00010602250726420722, + "loss": 1.4648, + "step": 8071 + }, + { + "epoch": 0.8514767932489451, + "grad_norm": 0.4267805218696594, + "learning_rate": 0.00010587545692808299, + "loss": 1.4962, + "step": 8072 + }, + { + "epoch": 0.8515822784810126, + "grad_norm": 0.44005367159843445, + "learning_rate": 0.00010572861054677855, + "loss": 1.4967, + "step": 8073 + }, + { + "epoch": 0.8516877637130802, + "grad_norm": 0.43603193759918213, + "learning_rate": 0.00010558196783741396, + "loss": 1.5079, + "step": 8074 + }, + { + "epoch": 0.8517932489451476, + "grad_norm": 0.42224976420402527, + "learning_rate": 0.00010543552851750185, + "loss": 1.5001, + "step": 8075 + }, + { + "epoch": 0.8518987341772152, + "grad_norm": 0.44029930233955383, + "learning_rate": 0.00010528929230494662, + "loss": 1.5462, + "step": 8076 + }, + { + "epoch": 0.8520042194092827, + "grad_norm": 0.4320455491542816, + "learning_rate": 0.00010514325891804378, + "loss": 1.4978, + "step": 8077 + }, + { + "epoch": 0.8521097046413502, + "grad_norm": 0.4057701826095581, + "learning_rate": 0.00010499742807547978, + "loss": 1.4619, + "step": 8078 + }, + { + "epoch": 0.8522151898734177, + "grad_norm": 0.4253707528114319, + "learning_rate": 0.000104851799496331, + "loss": 1.4994, + "step": 8079 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.450650155544281, + "learning_rate": 0.00010470637290006365, + "loss": 1.5115, + "step": 8080 + }, + { + "epoch": 0.8524261603375527, + "grad_norm": 0.42391568422317505, + "learning_rate": 0.000104561148006533, + "loss": 1.4405, + "step": 8081 + }, + { + "epoch": 0.8525316455696202, + "grad_norm": 0.4349210858345032, + "learning_rate": 0.00010441612453598276, + "loss": 1.4248, + "step": 8082 + }, + { + "epoch": 0.8526371308016878, + "grad_norm": 0.4500083327293396, + "learning_rate": 0.0001042713022090448, + "loss": 1.4697, + "step": 8083 + }, + { + "epoch": 0.8527426160337552, + "grad_norm": 0.4362918734550476, + "learning_rate": 0.00010412668074673832, + "loss": 1.483, + "step": 8084 + }, + { + "epoch": 0.8528481012658228, + "grad_norm": 0.4373531639575958, + "learning_rate": 0.00010398225987046957, + "loss": 1.4973, + "step": 8085 + }, + { + "epoch": 0.8529535864978903, + "grad_norm": 0.4534114599227905, + "learning_rate": 0.00010383803930203124, + "loss": 1.4944, + "step": 8086 + }, + { + "epoch": 0.8530590717299578, + "grad_norm": 0.43459632992744446, + "learning_rate": 0.00010369401876360168, + "loss": 1.4851, + "step": 8087 + }, + { + "epoch": 0.8531645569620253, + "grad_norm": 0.4869888424873352, + "learning_rate": 0.0001035501979777448, + "loss": 1.508, + "step": 8088 + }, + { + "epoch": 0.8532700421940929, + "grad_norm": 0.4642571806907654, + "learning_rate": 0.00010340657666740917, + "loss": 1.5368, + "step": 8089 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.4289516806602478, + "learning_rate": 0.00010326315455592764, + "loss": 1.5203, + "step": 8090 + }, + { + "epoch": 0.8534810126582278, + "grad_norm": 0.4206893742084503, + "learning_rate": 0.00010311993136701693, + "loss": 1.493, + "step": 8091 + }, + { + "epoch": 0.8535864978902954, + "grad_norm": 0.44617852568626404, + "learning_rate": 0.00010297690682477669, + "loss": 1.4847, + "step": 8092 + }, + { + "epoch": 0.8536919831223628, + "grad_norm": 0.4692865014076233, + "learning_rate": 0.00010283408065368951, + "loss": 1.4852, + "step": 8093 + }, + { + "epoch": 0.8537974683544304, + "grad_norm": 0.40832483768463135, + "learning_rate": 0.00010269145257861988, + "loss": 1.4798, + "step": 8094 + }, + { + "epoch": 0.8539029535864979, + "grad_norm": 0.42477303743362427, + "learning_rate": 0.00010254902232481406, + "loss": 1.4398, + "step": 8095 + }, + { + "epoch": 0.8540084388185654, + "grad_norm": 0.4500899910926819, + "learning_rate": 0.0001024067896178994, + "loss": 1.4706, + "step": 8096 + }, + { + "epoch": 0.8541139240506329, + "grad_norm": 0.42628321051597595, + "learning_rate": 0.0001022647541838836, + "loss": 1.5005, + "step": 8097 + }, + { + "epoch": 0.8542194092827005, + "grad_norm": 0.4567301273345947, + "learning_rate": 0.00010212291574915464, + "loss": 1.4651, + "step": 8098 + }, + { + "epoch": 0.8543248945147679, + "grad_norm": 0.466998428106308, + "learning_rate": 0.00010198127404047976, + "loss": 1.4847, + "step": 8099 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.4482998847961426, + "learning_rate": 0.0001018398287850053, + "loss": 1.498, + "step": 8100 + }, + { + "epoch": 0.854535864978903, + "grad_norm": 0.46892642974853516, + "learning_rate": 0.00010169857971025608, + "loss": 1.506, + "step": 8101 + }, + { + "epoch": 0.8546413502109704, + "grad_norm": 0.43885427713394165, + "learning_rate": 0.00010155752654413465, + "loss": 1.5372, + "step": 8102 + }, + { + "epoch": 0.854746835443038, + "grad_norm": 0.4367867112159729, + "learning_rate": 0.00010141666901492119, + "loss": 1.4863, + "step": 8103 + }, + { + "epoch": 0.8548523206751055, + "grad_norm": 0.45450180768966675, + "learning_rate": 0.00010127600685127249, + "loss": 1.5218, + "step": 8104 + }, + { + "epoch": 0.854957805907173, + "grad_norm": 0.40008068084716797, + "learning_rate": 0.0001011355397822219, + "loss": 1.5028, + "step": 8105 + }, + { + "epoch": 0.8550632911392405, + "grad_norm": 0.4291958808898926, + "learning_rate": 0.00010099526753717856, + "loss": 1.4834, + "step": 8106 + }, + { + "epoch": 0.8551687763713081, + "grad_norm": 0.43436047434806824, + "learning_rate": 0.00010085518984592678, + "loss": 1.4702, + "step": 8107 + }, + { + "epoch": 0.8552742616033755, + "grad_norm": 0.48040902614593506, + "learning_rate": 0.00010071530643862575, + "loss": 1.4593, + "step": 8108 + }, + { + "epoch": 0.855379746835443, + "grad_norm": 0.4501969516277313, + "learning_rate": 0.00010057561704580898, + "loss": 1.499, + "step": 8109 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.4556061029434204, + "learning_rate": 0.00010043612139838357, + "loss": 1.5299, + "step": 8110 + }, + { + "epoch": 0.855590717299578, + "grad_norm": 0.44409841299057007, + "learning_rate": 0.00010029681922763002, + "loss": 1.501, + "step": 8111 + }, + { + "epoch": 0.8556962025316456, + "grad_norm": 0.4314638674259186, + "learning_rate": 0.0001001577102652013, + "loss": 1.5157, + "step": 8112 + }, + { + "epoch": 0.8558016877637131, + "grad_norm": 0.45977160334587097, + "learning_rate": 0.00010001879424312283, + "loss": 1.445, + "step": 8113 + }, + { + "epoch": 0.8559071729957806, + "grad_norm": 0.4589596092700958, + "learning_rate": 9.988007089379161e-05, + "loss": 1.5187, + "step": 8114 + }, + { + "epoch": 0.8560126582278481, + "grad_norm": 0.42022794485092163, + "learning_rate": 9.974153994997569e-05, + "loss": 1.4916, + "step": 8115 + }, + { + "epoch": 0.8561181434599157, + "grad_norm": 0.4470304548740387, + "learning_rate": 9.960320114481391e-05, + "loss": 1.4911, + "step": 8116 + }, + { + "epoch": 0.8562236286919831, + "grad_norm": 0.4570592939853668, + "learning_rate": 9.946505421181513e-05, + "loss": 1.514, + "step": 8117 + }, + { + "epoch": 0.8563291139240506, + "grad_norm": 0.41842663288116455, + "learning_rate": 9.932709888485788e-05, + "loss": 1.4606, + "step": 8118 + }, + { + "epoch": 0.8564345991561182, + "grad_norm": 0.44179171323776245, + "learning_rate": 9.918933489818986e-05, + "loss": 1.4711, + "step": 8119 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.4474397599697113, + "learning_rate": 9.905176198642719e-05, + "loss": 1.4614, + "step": 8120 + }, + { + "epoch": 0.8566455696202532, + "grad_norm": 0.45716580748558044, + "learning_rate": 9.891437988455427e-05, + "loss": 1.4908, + "step": 8121 + }, + { + "epoch": 0.8567510548523207, + "grad_norm": 0.4659733474254608, + "learning_rate": 9.877718832792286e-05, + "loss": 1.484, + "step": 8122 + }, + { + "epoch": 0.8568565400843882, + "grad_norm": 0.4287756085395813, + "learning_rate": 9.864018705225193e-05, + "loss": 1.4818, + "step": 8123 + }, + { + "epoch": 0.8569620253164557, + "grad_norm": 0.4370841085910797, + "learning_rate": 9.850337579362703e-05, + "loss": 1.4859, + "step": 8124 + }, + { + "epoch": 0.8570675105485233, + "grad_norm": 0.43437787890434265, + "learning_rate": 9.836675428849958e-05, + "loss": 1.4864, + "step": 8125 + }, + { + "epoch": 0.8571729957805907, + "grad_norm": 0.4429851174354553, + "learning_rate": 9.823032227368671e-05, + "loss": 1.4941, + "step": 8126 + }, + { + "epoch": 0.8572784810126582, + "grad_norm": 0.4362345337867737, + "learning_rate": 9.809407948637043e-05, + "loss": 1.4925, + "step": 8127 + }, + { + "epoch": 0.8573839662447258, + "grad_norm": 0.43401530385017395, + "learning_rate": 9.79580256640974e-05, + "loss": 1.4923, + "step": 8128 + }, + { + "epoch": 0.8574894514767932, + "grad_norm": 0.4468132555484772, + "learning_rate": 9.782216054477828e-05, + "loss": 1.5269, + "step": 8129 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.4374653398990631, + "learning_rate": 9.76864838666871e-05, + "loss": 1.4567, + "step": 8130 + }, + { + "epoch": 0.8577004219409282, + "grad_norm": 0.4234811067581177, + "learning_rate": 9.755099536846107e-05, + "loss": 1.4524, + "step": 8131 + }, + { + "epoch": 0.8578059071729958, + "grad_norm": 0.4564134180545807, + "learning_rate": 9.741569478909979e-05, + "loss": 1.4862, + "step": 8132 + }, + { + "epoch": 0.8579113924050633, + "grad_norm": 0.44542422890663147, + "learning_rate": 9.728058186796492e-05, + "loss": 1.5007, + "step": 8133 + }, + { + "epoch": 0.8580168776371307, + "grad_norm": 0.42091959714889526, + "learning_rate": 9.714565634477964e-05, + "loss": 1.4717, + "step": 8134 + }, + { + "epoch": 0.8581223628691983, + "grad_norm": 0.4152686595916748, + "learning_rate": 9.7010917959628e-05, + "loss": 1.4923, + "step": 8135 + }, + { + "epoch": 0.8582278481012658, + "grad_norm": 0.4614063799381256, + "learning_rate": 9.687636645295472e-05, + "loss": 1.5078, + "step": 8136 + }, + { + "epoch": 0.8583333333333333, + "grad_norm": 0.42790308594703674, + "learning_rate": 9.674200156556436e-05, + "loss": 1.4799, + "step": 8137 + }, + { + "epoch": 0.8584388185654008, + "grad_norm": 0.4314062297344208, + "learning_rate": 9.660782303862107e-05, + "loss": 1.4988, + "step": 8138 + }, + { + "epoch": 0.8585443037974684, + "grad_norm": 0.46413227915763855, + "learning_rate": 9.647383061364803e-05, + "loss": 1.4954, + "step": 8139 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.43041038513183594, + "learning_rate": 9.634002403252676e-05, + "loss": 1.4878, + "step": 8140 + }, + { + "epoch": 0.8587552742616034, + "grad_norm": 0.41232773661613464, + "learning_rate": 9.6206403037497e-05, + "loss": 1.4551, + "step": 8141 + }, + { + "epoch": 0.8588607594936709, + "grad_norm": 0.4198618233203888, + "learning_rate": 9.60729673711558e-05, + "loss": 1.4883, + "step": 8142 + }, + { + "epoch": 0.8589662447257383, + "grad_norm": 0.4003661274909973, + "learning_rate": 9.593971677645732e-05, + "loss": 1.5032, + "step": 8143 + }, + { + "epoch": 0.8590717299578059, + "grad_norm": 0.4171888530254364, + "learning_rate": 9.58066509967123e-05, + "loss": 1.47, + "step": 8144 + }, + { + "epoch": 0.8591772151898734, + "grad_norm": 0.4533592164516449, + "learning_rate": 9.56737697755873e-05, + "loss": 1.4715, + "step": 8145 + }, + { + "epoch": 0.8592827004219409, + "grad_norm": 0.4214538633823395, + "learning_rate": 9.554107285710464e-05, + "loss": 1.5216, + "step": 8146 + }, + { + "epoch": 0.8593881856540084, + "grad_norm": 0.4249246418476105, + "learning_rate": 9.540855998564147e-05, + "loss": 1.4804, + "step": 8147 + }, + { + "epoch": 0.859493670886076, + "grad_norm": 0.46606525778770447, + "learning_rate": 9.527623090592962e-05, + "loss": 1.4876, + "step": 8148 + }, + { + "epoch": 0.8595991561181434, + "grad_norm": 0.4458865821361542, + "learning_rate": 9.514408536305497e-05, + "loss": 1.4607, + "step": 8149 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.42472967505455017, + "learning_rate": 9.501212310245681e-05, + "loss": 1.5226, + "step": 8150 + }, + { + "epoch": 0.8598101265822785, + "grad_norm": 0.41407743096351624, + "learning_rate": 9.488034386992769e-05, + "loss": 1.4903, + "step": 8151 + }, + { + "epoch": 0.859915611814346, + "grad_norm": 0.4198327362537384, + "learning_rate": 9.474874741161267e-05, + "loss": 1.4715, + "step": 8152 + }, + { + "epoch": 0.8600210970464135, + "grad_norm": 0.4602448344230652, + "learning_rate": 9.461733347400877e-05, + "loss": 1.5297, + "step": 8153 + }, + { + "epoch": 0.860126582278481, + "grad_norm": 0.44824647903442383, + "learning_rate": 9.448610180396487e-05, + "loss": 1.5045, + "step": 8154 + }, + { + "epoch": 0.8602320675105485, + "grad_norm": 0.43564558029174805, + "learning_rate": 9.435505214868068e-05, + "loss": 1.5135, + "step": 8155 + }, + { + "epoch": 0.860337552742616, + "grad_norm": 0.4624049961566925, + "learning_rate": 9.422418425570673e-05, + "loss": 1.516, + "step": 8156 + }, + { + "epoch": 0.8604430379746836, + "grad_norm": 0.4127613604068756, + "learning_rate": 9.409349787294371e-05, + "loss": 1.4763, + "step": 8157 + }, + { + "epoch": 0.860548523206751, + "grad_norm": 0.43876078724861145, + "learning_rate": 9.396299274864176e-05, + "loss": 1.4847, + "step": 8158 + }, + { + "epoch": 0.8606540084388186, + "grad_norm": 0.40391695499420166, + "learning_rate": 9.383266863140043e-05, + "loss": 1.461, + "step": 8159 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.4061271846294403, + "learning_rate": 9.370252527016777e-05, + "loss": 1.4961, + "step": 8160 + }, + { + "epoch": 0.8608649789029535, + "grad_norm": 0.4176800847053528, + "learning_rate": 9.357256241424012e-05, + "loss": 1.4585, + "step": 8161 + }, + { + "epoch": 0.8609704641350211, + "grad_norm": 0.4413154125213623, + "learning_rate": 9.34427798132616e-05, + "loss": 1.5116, + "step": 8162 + }, + { + "epoch": 0.8610759493670886, + "grad_norm": 0.46794503927230835, + "learning_rate": 9.331317721722338e-05, + "loss": 1.5133, + "step": 8163 + }, + { + "epoch": 0.8611814345991561, + "grad_norm": 0.4146137237548828, + "learning_rate": 9.318375437646363e-05, + "loss": 1.4636, + "step": 8164 + }, + { + "epoch": 0.8612869198312236, + "grad_norm": 0.4689750075340271, + "learning_rate": 9.305451104166652e-05, + "loss": 1.4809, + "step": 8165 + }, + { + "epoch": 0.8613924050632912, + "grad_norm": 0.3947790265083313, + "learning_rate": 9.292544696386227e-05, + "loss": 1.5177, + "step": 8166 + }, + { + "epoch": 0.8614978902953586, + "grad_norm": 0.4780673682689667, + "learning_rate": 9.279656189442629e-05, + "loss": 1.494, + "step": 8167 + }, + { + "epoch": 0.8616033755274262, + "grad_norm": 0.41865167021751404, + "learning_rate": 9.266785558507876e-05, + "loss": 1.5005, + "step": 8168 + }, + { + "epoch": 0.8617088607594937, + "grad_norm": 0.43959128856658936, + "learning_rate": 9.25393277878844e-05, + "loss": 1.4673, + "step": 8169 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.4763071537017822, + "learning_rate": 9.241097825525163e-05, + "loss": 1.4544, + "step": 8170 + }, + { + "epoch": 0.8619198312236287, + "grad_norm": 0.4068686068058014, + "learning_rate": 9.228280673993234e-05, + "loss": 1.4956, + "step": 8171 + }, + { + "epoch": 0.8620253164556962, + "grad_norm": 0.4224543273448944, + "learning_rate": 9.215481299502145e-05, + "loss": 1.4857, + "step": 8172 + }, + { + "epoch": 0.8621308016877637, + "grad_norm": 0.4506922662258148, + "learning_rate": 9.202699677395613e-05, + "loss": 1.5292, + "step": 8173 + }, + { + "epoch": 0.8622362869198312, + "grad_norm": 0.45534196496009827, + "learning_rate": 9.189935783051572e-05, + "loss": 1.4902, + "step": 8174 + }, + { + "epoch": 0.8623417721518988, + "grad_norm": 0.48427584767341614, + "learning_rate": 9.177189591882086e-05, + "loss": 1.4493, + "step": 8175 + }, + { + "epoch": 0.8624472573839662, + "grad_norm": 0.47470489144325256, + "learning_rate": 9.164461079333342e-05, + "loss": 1.4936, + "step": 8176 + }, + { + "epoch": 0.8625527426160338, + "grad_norm": 0.4574161469936371, + "learning_rate": 9.151750220885574e-05, + "loss": 1.5146, + "step": 8177 + }, + { + "epoch": 0.8626582278481013, + "grad_norm": 0.44946157932281494, + "learning_rate": 9.139056992053017e-05, + "loss": 1.4772, + "step": 8178 + }, + { + "epoch": 0.8627637130801687, + "grad_norm": 0.4752751588821411, + "learning_rate": 9.126381368383881e-05, + "loss": 1.4814, + "step": 8179 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.4545336067676544, + "learning_rate": 9.113723325460276e-05, + "loss": 1.5289, + "step": 8180 + }, + { + "epoch": 0.8629746835443038, + "grad_norm": 0.46342095732688904, + "learning_rate": 9.101082838898188e-05, + "loss": 1.4866, + "step": 8181 + }, + { + "epoch": 0.8630801687763713, + "grad_norm": 0.4117826223373413, + "learning_rate": 9.088459884347427e-05, + "loss": 1.4883, + "step": 8182 + }, + { + "epoch": 0.8631856540084388, + "grad_norm": 0.4462929666042328, + "learning_rate": 9.07585443749156e-05, + "loss": 1.4792, + "step": 8183 + }, + { + "epoch": 0.8632911392405064, + "grad_norm": 0.46005716919898987, + "learning_rate": 9.063266474047899e-05, + "loss": 1.5008, + "step": 8184 + }, + { + "epoch": 0.8633966244725738, + "grad_norm": 0.42831310629844666, + "learning_rate": 9.050695969767418e-05, + "loss": 1.4929, + "step": 8185 + }, + { + "epoch": 0.8635021097046414, + "grad_norm": 0.46223482489585876, + "learning_rate": 9.038142900434736e-05, + "loss": 1.4957, + "step": 8186 + }, + { + "epoch": 0.8636075949367089, + "grad_norm": 0.4762667119503021, + "learning_rate": 9.02560724186806e-05, + "loss": 1.4786, + "step": 8187 + }, + { + "epoch": 0.8637130801687763, + "grad_norm": 0.4461197853088379, + "learning_rate": 9.01308896991912e-05, + "loss": 1.4724, + "step": 8188 + }, + { + "epoch": 0.8638185654008439, + "grad_norm": 0.4695635735988617, + "learning_rate": 9.000588060473158e-05, + "loss": 1.4956, + "step": 8189 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.4212546646595001, + "learning_rate": 8.988104489448849e-05, + "loss": 1.48, + "step": 8190 + }, + { + "epoch": 0.8640295358649789, + "grad_norm": 0.4889835715293884, + "learning_rate": 8.975638232798275e-05, + "loss": 1.4948, + "step": 8191 + }, + { + "epoch": 0.8641350210970464, + "grad_norm": 0.42373472452163696, + "learning_rate": 8.963189266506874e-05, + "loss": 1.4802, + "step": 8192 + }, + { + "epoch": 0.864240506329114, + "grad_norm": 0.43615368008613586, + "learning_rate": 8.950757566593381e-05, + "loss": 1.4774, + "step": 8193 + }, + { + "epoch": 0.8643459915611814, + "grad_norm": 0.45238667726516724, + "learning_rate": 8.938343109109802e-05, + "loss": 1.5119, + "step": 8194 + }, + { + "epoch": 0.864451476793249, + "grad_norm": 0.42367780208587646, + "learning_rate": 8.925945870141361e-05, + "loss": 1.4737, + "step": 8195 + }, + { + "epoch": 0.8645569620253165, + "grad_norm": 0.4457396864891052, + "learning_rate": 8.913565825806436e-05, + "loss": 1.5324, + "step": 8196 + }, + { + "epoch": 0.864662447257384, + "grad_norm": 0.4391903281211853, + "learning_rate": 8.901202952256546e-05, + "loss": 1.5069, + "step": 8197 + }, + { + "epoch": 0.8647679324894515, + "grad_norm": 0.42450007796287537, + "learning_rate": 8.88885722567627e-05, + "loss": 1.4933, + "step": 8198 + }, + { + "epoch": 0.8648734177215189, + "grad_norm": 0.40664932131767273, + "learning_rate": 8.876528622283232e-05, + "loss": 1.5231, + "step": 8199 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.4372657835483551, + "learning_rate": 8.864217118328042e-05, + "loss": 1.4769, + "step": 8200 + }, + { + "epoch": 0.865084388185654, + "grad_norm": 0.411454439163208, + "learning_rate": 8.851922690094234e-05, + "loss": 1.4916, + "step": 8201 + }, + { + "epoch": 0.8651898734177215, + "grad_norm": 0.43713468313217163, + "learning_rate": 8.839645313898257e-05, + "loss": 1.5292, + "step": 8202 + }, + { + "epoch": 0.865295358649789, + "grad_norm": 0.4268801510334015, + "learning_rate": 8.827384966089387e-05, + "loss": 1.4786, + "step": 8203 + }, + { + "epoch": 0.8654008438818566, + "grad_norm": 0.45576122403144836, + "learning_rate": 8.81514162304972e-05, + "loss": 1.468, + "step": 8204 + }, + { + "epoch": 0.865506329113924, + "grad_norm": 0.49695488810539246, + "learning_rate": 8.802915261194108e-05, + "loss": 1.4838, + "step": 8205 + }, + { + "epoch": 0.8656118143459915, + "grad_norm": 0.45548149943351746, + "learning_rate": 8.7907058569701e-05, + "loss": 1.4867, + "step": 8206 + }, + { + "epoch": 0.8657172995780591, + "grad_norm": 0.5141830444335938, + "learning_rate": 8.778513386857931e-05, + "loss": 1.5063, + "step": 8207 + }, + { + "epoch": 0.8658227848101265, + "grad_norm": 0.4497213065624237, + "learning_rate": 8.766337827370438e-05, + "loss": 1.5157, + "step": 8208 + }, + { + "epoch": 0.8659282700421941, + "grad_norm": 0.40176472067832947, + "learning_rate": 8.754179155053052e-05, + "loss": 1.461, + "step": 8209 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.4418811500072479, + "learning_rate": 8.742037346483729e-05, + "loss": 1.477, + "step": 8210 + }, + { + "epoch": 0.8661392405063291, + "grad_norm": 0.5150174498558044, + "learning_rate": 8.7299123782729e-05, + "loss": 1.4924, + "step": 8211 + }, + { + "epoch": 0.8662447257383966, + "grad_norm": 0.45674073696136475, + "learning_rate": 8.717804227063455e-05, + "loss": 1.4728, + "step": 8212 + }, + { + "epoch": 0.8663502109704642, + "grad_norm": 0.4809952974319458, + "learning_rate": 8.705712869530661e-05, + "loss": 1.4933, + "step": 8213 + }, + { + "epoch": 0.8664556962025316, + "grad_norm": 0.4204850196838379, + "learning_rate": 8.69363828238215e-05, + "loss": 1.4935, + "step": 8214 + }, + { + "epoch": 0.8665611814345991, + "grad_norm": 0.44213756918907166, + "learning_rate": 8.681580442357857e-05, + "loss": 1.5134, + "step": 8215 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 0.43290165066719055, + "learning_rate": 8.66953932622997e-05, + "loss": 1.4845, + "step": 8216 + }, + { + "epoch": 0.8667721518987341, + "grad_norm": 0.4767615497112274, + "learning_rate": 8.657514910802906e-05, + "loss": 1.5176, + "step": 8217 + }, + { + "epoch": 0.8668776371308017, + "grad_norm": 0.45503509044647217, + "learning_rate": 8.645507172913238e-05, + "loss": 1.4719, + "step": 8218 + }, + { + "epoch": 0.8669831223628692, + "grad_norm": 0.4896306097507477, + "learning_rate": 8.63351608942968e-05, + "loss": 1.5206, + "step": 8219 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.471882164478302, + "learning_rate": 8.62154163725303e-05, + "loss": 1.5162, + "step": 8220 + }, + { + "epoch": 0.8671940928270042, + "grad_norm": 0.44179245829582214, + "learning_rate": 8.609583793316103e-05, + "loss": 1.456, + "step": 8221 + }, + { + "epoch": 0.8672995780590718, + "grad_norm": 0.42603135108947754, + "learning_rate": 8.597642534583735e-05, + "loss": 1.4733, + "step": 8222 + }, + { + "epoch": 0.8674050632911392, + "grad_norm": 0.48473796248435974, + "learning_rate": 8.585717838052689e-05, + "loss": 1.5052, + "step": 8223 + }, + { + "epoch": 0.8675105485232067, + "grad_norm": 0.41506513953208923, + "learning_rate": 8.573809680751644e-05, + "loss": 1.4988, + "step": 8224 + }, + { + "epoch": 0.8676160337552743, + "grad_norm": 0.49285179376602173, + "learning_rate": 8.561918039741144e-05, + "loss": 1.524, + "step": 8225 + }, + { + "epoch": 0.8677215189873417, + "grad_norm": 0.5024579167366028, + "learning_rate": 8.550042892113534e-05, + "loss": 1.4928, + "step": 8226 + }, + { + "epoch": 0.8678270042194093, + "grad_norm": 0.4057333767414093, + "learning_rate": 8.538184214992946e-05, + "loss": 1.47, + "step": 8227 + }, + { + "epoch": 0.8679324894514768, + "grad_norm": 0.42193830013275146, + "learning_rate": 8.52634198553523e-05, + "loss": 1.5053, + "step": 8228 + }, + { + "epoch": 0.8680379746835443, + "grad_norm": 0.4237918257713318, + "learning_rate": 8.514516180927926e-05, + "loss": 1.4862, + "step": 8229 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.45510023832321167, + "learning_rate": 8.502706778390219e-05, + "loss": 1.5171, + "step": 8230 + }, + { + "epoch": 0.8682489451476794, + "grad_norm": 0.4293450713157654, + "learning_rate": 8.490913755172874e-05, + "loss": 1.4994, + "step": 8231 + }, + { + "epoch": 0.8683544303797468, + "grad_norm": 0.42206162214279175, + "learning_rate": 8.479137088558228e-05, + "loss": 1.46, + "step": 8232 + }, + { + "epoch": 0.8684599156118143, + "grad_norm": 0.41428276896476746, + "learning_rate": 8.467376755860109e-05, + "loss": 1.5131, + "step": 8233 + }, + { + "epoch": 0.8685654008438819, + "grad_norm": 0.44789931178092957, + "learning_rate": 8.455632734423823e-05, + "loss": 1.4944, + "step": 8234 + }, + { + "epoch": 0.8686708860759493, + "grad_norm": 0.42949697375297546, + "learning_rate": 8.443905001626099e-05, + "loss": 1.4808, + "step": 8235 + }, + { + "epoch": 0.8687763713080169, + "grad_norm": 0.4153139293193817, + "learning_rate": 8.432193534875027e-05, + "loss": 1.4921, + "step": 8236 + }, + { + "epoch": 0.8688818565400844, + "grad_norm": 0.4378165602684021, + "learning_rate": 8.420498311610047e-05, + "loss": 1.5136, + "step": 8237 + }, + { + "epoch": 0.8689873417721519, + "grad_norm": 0.46940332651138306, + "learning_rate": 8.408819309301891e-05, + "loss": 1.5129, + "step": 8238 + }, + { + "epoch": 0.8690928270042194, + "grad_norm": 0.4306110441684723, + "learning_rate": 8.397156505452524e-05, + "loss": 1.4427, + "step": 8239 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.4263593256473541, + "learning_rate": 8.38550987759513e-05, + "loss": 1.4869, + "step": 8240 + }, + { + "epoch": 0.8693037974683544, + "grad_norm": 0.42645734548568726, + "learning_rate": 8.373879403294042e-05, + "loss": 1.5074, + "step": 8241 + }, + { + "epoch": 0.869409282700422, + "grad_norm": 0.43293336033821106, + "learning_rate": 8.36226506014472e-05, + "loss": 1.4801, + "step": 8242 + }, + { + "epoch": 0.8695147679324895, + "grad_norm": 0.4358377754688263, + "learning_rate": 8.350666825773698e-05, + "loss": 1.5189, + "step": 8243 + }, + { + "epoch": 0.8696202531645569, + "grad_norm": 0.4333524703979492, + "learning_rate": 8.339084677838533e-05, + "loss": 1.4927, + "step": 8244 + }, + { + "epoch": 0.8697257383966245, + "grad_norm": 0.44201257824897766, + "learning_rate": 8.327518594027779e-05, + "loss": 1.4753, + "step": 8245 + }, + { + "epoch": 0.869831223628692, + "grad_norm": 0.39594489336013794, + "learning_rate": 8.315968552060927e-05, + "loss": 1.5101, + "step": 8246 + }, + { + "epoch": 0.8699367088607595, + "grad_norm": 0.4450787305831909, + "learning_rate": 8.304434529688379e-05, + "loss": 1.5248, + "step": 8247 + }, + { + "epoch": 0.870042194092827, + "grad_norm": 0.4281655251979828, + "learning_rate": 8.292916504691398e-05, + "loss": 1.5298, + "step": 8248 + }, + { + "epoch": 0.8701476793248946, + "grad_norm": 0.4180588722229004, + "learning_rate": 8.28141445488205e-05, + "loss": 1.4985, + "step": 8249 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.43443799018859863, + "learning_rate": 8.269928358103191e-05, + "loss": 1.5182, + "step": 8250 + }, + { + "epoch": 0.8703586497890295, + "grad_norm": 0.4392465353012085, + "learning_rate": 8.258458192228395e-05, + "loss": 1.5234, + "step": 8251 + }, + { + "epoch": 0.8704641350210971, + "grad_norm": 0.42838016152381897, + "learning_rate": 8.247003935161934e-05, + "loss": 1.4707, + "step": 8252 + }, + { + "epoch": 0.8705696202531645, + "grad_norm": 0.44824132323265076, + "learning_rate": 8.235565564838727e-05, + "loss": 1.4869, + "step": 8253 + }, + { + "epoch": 0.8706751054852321, + "grad_norm": 0.44147592782974243, + "learning_rate": 8.224143059224287e-05, + "loss": 1.4861, + "step": 8254 + }, + { + "epoch": 0.8707805907172996, + "grad_norm": 0.4234228730201721, + "learning_rate": 8.2127363963147e-05, + "loss": 1.5512, + "step": 8255 + }, + { + "epoch": 0.8708860759493671, + "grad_norm": 0.4180000424385071, + "learning_rate": 8.201345554136556e-05, + "loss": 1.5041, + "step": 8256 + }, + { + "epoch": 0.8709915611814346, + "grad_norm": 0.4617927074432373, + "learning_rate": 8.189970510746936e-05, + "loss": 1.4989, + "step": 8257 + }, + { + "epoch": 0.8710970464135022, + "grad_norm": 0.4595535397529602, + "learning_rate": 8.178611244233354e-05, + "loss": 1.4776, + "step": 8258 + }, + { + "epoch": 0.8712025316455696, + "grad_norm": 0.43817704916000366, + "learning_rate": 8.167267732713705e-05, + "loss": 1.4993, + "step": 8259 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.449745237827301, + "learning_rate": 8.155939954336243e-05, + "loss": 1.4664, + "step": 8260 + }, + { + "epoch": 0.8714135021097047, + "grad_norm": 0.417012482881546, + "learning_rate": 8.144627887279526e-05, + "loss": 1.4869, + "step": 8261 + }, + { + "epoch": 0.8715189873417721, + "grad_norm": 0.46489375829696655, + "learning_rate": 8.13333150975238e-05, + "loss": 1.4831, + "step": 8262 + }, + { + "epoch": 0.8716244725738397, + "grad_norm": 0.41472750902175903, + "learning_rate": 8.122050799993858e-05, + "loss": 1.5064, + "step": 8263 + }, + { + "epoch": 0.8717299578059071, + "grad_norm": 0.4289701581001282, + "learning_rate": 8.110785736273183e-05, + "loss": 1.467, + "step": 8264 + }, + { + "epoch": 0.8718354430379747, + "grad_norm": 0.4322354793548584, + "learning_rate": 8.099536296889734e-05, + "loss": 1.4674, + "step": 8265 + }, + { + "epoch": 0.8719409282700422, + "grad_norm": 0.4393194615840912, + "learning_rate": 8.08830246017297e-05, + "loss": 1.4951, + "step": 8266 + }, + { + "epoch": 0.8720464135021097, + "grad_norm": 0.4156050980091095, + "learning_rate": 8.077084204482424e-05, + "loss": 1.4897, + "step": 8267 + }, + { + "epoch": 0.8721518987341772, + "grad_norm": 0.509539783000946, + "learning_rate": 8.065881508207636e-05, + "loss": 1.47, + "step": 8268 + }, + { + "epoch": 0.8722573839662447, + "grad_norm": 0.4308638572692871, + "learning_rate": 8.054694349768114e-05, + "loss": 1.5139, + "step": 8269 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.41226568818092346, + "learning_rate": 8.043522707613312e-05, + "loss": 1.5157, + "step": 8270 + }, + { + "epoch": 0.8724683544303797, + "grad_norm": 0.4736034572124481, + "learning_rate": 8.032366560222553e-05, + "loss": 1.5354, + "step": 8271 + }, + { + "epoch": 0.8725738396624473, + "grad_norm": 0.42093440890312195, + "learning_rate": 8.021225886105027e-05, + "loss": 1.4631, + "step": 8272 + }, + { + "epoch": 0.8726793248945147, + "grad_norm": 0.42206841707229614, + "learning_rate": 8.010100663799726e-05, + "loss": 1.4761, + "step": 8273 + }, + { + "epoch": 0.8727848101265823, + "grad_norm": 0.4045555591583252, + "learning_rate": 7.998990871875402e-05, + "loss": 1.475, + "step": 8274 + }, + { + "epoch": 0.8728902953586498, + "grad_norm": 0.46745604276657104, + "learning_rate": 7.987896488930541e-05, + "loss": 1.4565, + "step": 8275 + }, + { + "epoch": 0.8729957805907173, + "grad_norm": 0.4449433982372284, + "learning_rate": 7.976817493593301e-05, + "loss": 1.4939, + "step": 8276 + }, + { + "epoch": 0.8731012658227848, + "grad_norm": 0.410609632730484, + "learning_rate": 7.965753864521492e-05, + "loss": 1.4703, + "step": 8277 + }, + { + "epoch": 0.8732067510548523, + "grad_norm": 0.3928322494029999, + "learning_rate": 7.954705580402525e-05, + "loss": 1.4539, + "step": 8278 + }, + { + "epoch": 0.8733122362869198, + "grad_norm": 0.45473843812942505, + "learning_rate": 7.943672619953359e-05, + "loss": 1.4915, + "step": 8279 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.4489142596721649, + "learning_rate": 7.932654961920486e-05, + "loss": 1.445, + "step": 8280 + }, + { + "epoch": 0.8735232067510549, + "grad_norm": 0.4001033902168274, + "learning_rate": 7.921652585079873e-05, + "loss": 1.477, + "step": 8281 + }, + { + "epoch": 0.8736286919831223, + "grad_norm": 0.4195241928100586, + "learning_rate": 7.910665468236916e-05, + "loss": 1.5, + "step": 8282 + }, + { + "epoch": 0.8737341772151899, + "grad_norm": 0.42359504103660583, + "learning_rate": 7.899693590226418e-05, + "loss": 1.494, + "step": 8283 + }, + { + "epoch": 0.8738396624472574, + "grad_norm": 0.4651046097278595, + "learning_rate": 7.888736929912525e-05, + "loss": 1.4526, + "step": 8284 + }, + { + "epoch": 0.8739451476793249, + "grad_norm": 0.4119933545589447, + "learning_rate": 7.877795466188711e-05, + "loss": 1.4588, + "step": 8285 + }, + { + "epoch": 0.8740506329113924, + "grad_norm": 0.4278581738471985, + "learning_rate": 7.866869177977722e-05, + "loss": 1.4648, + "step": 8286 + }, + { + "epoch": 0.87415611814346, + "grad_norm": 0.4105331599712372, + "learning_rate": 7.855958044231526e-05, + "loss": 1.5003, + "step": 8287 + }, + { + "epoch": 0.8742616033755274, + "grad_norm": 0.41706085205078125, + "learning_rate": 7.845062043931299e-05, + "loss": 1.4973, + "step": 8288 + }, + { + "epoch": 0.8743670886075949, + "grad_norm": 0.40420183539390564, + "learning_rate": 7.834181156087357e-05, + "loss": 1.4884, + "step": 8289 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.421173632144928, + "learning_rate": 7.823315359739135e-05, + "loss": 1.4451, + "step": 8290 + }, + { + "epoch": 0.8745780590717299, + "grad_norm": 0.4299216866493225, + "learning_rate": 7.812464633955146e-05, + "loss": 1.5048, + "step": 8291 + }, + { + "epoch": 0.8746835443037975, + "grad_norm": 0.48581114411354065, + "learning_rate": 7.801628957832916e-05, + "loss": 1.5211, + "step": 8292 + }, + { + "epoch": 0.874789029535865, + "grad_norm": 0.4901425242424011, + "learning_rate": 7.790808310498984e-05, + "loss": 1.4738, + "step": 8293 + }, + { + "epoch": 0.8748945147679325, + "grad_norm": 0.4534350633621216, + "learning_rate": 7.78000267110882e-05, + "loss": 1.4988, + "step": 8294 + }, + { + "epoch": 0.875, + "grad_norm": 0.4104546010494232, + "learning_rate": 7.769212018846815e-05, + "loss": 1.5203, + "step": 8295 + }, + { + "epoch": 0.8751054852320675, + "grad_norm": 0.3996504545211792, + "learning_rate": 7.758436332926238e-05, + "loss": 1.4781, + "step": 8296 + }, + { + "epoch": 0.875210970464135, + "grad_norm": 0.41142135858535767, + "learning_rate": 7.747675592589168e-05, + "loss": 1.516, + "step": 8297 + }, + { + "epoch": 0.8753164556962025, + "grad_norm": 0.4289790689945221, + "learning_rate": 7.736929777106499e-05, + "loss": 1.5019, + "step": 8298 + }, + { + "epoch": 0.8754219409282701, + "grad_norm": 0.42494845390319824, + "learning_rate": 7.726198865777852e-05, + "loss": 1.5232, + "step": 8299 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.40735816955566406, + "learning_rate": 7.715482837931577e-05, + "loss": 1.4852, + "step": 8300 + }, + { + "epoch": 0.8756329113924051, + "grad_norm": 0.41134464740753174, + "learning_rate": 7.704781672924692e-05, + "loss": 1.4973, + "step": 8301 + }, + { + "epoch": 0.8757383966244726, + "grad_norm": 0.46109405159950256, + "learning_rate": 7.694095350142833e-05, + "loss": 1.4769, + "step": 8302 + }, + { + "epoch": 0.87584388185654, + "grad_norm": 0.44002947211265564, + "learning_rate": 7.683423849000246e-05, + "loss": 1.4615, + "step": 8303 + }, + { + "epoch": 0.8759493670886076, + "grad_norm": 0.42934295535087585, + "learning_rate": 7.672767148939714e-05, + "loss": 1.4892, + "step": 8304 + }, + { + "epoch": 0.8760548523206751, + "grad_norm": 0.49174800515174866, + "learning_rate": 7.66212522943254e-05, + "loss": 1.4637, + "step": 8305 + }, + { + "epoch": 0.8761603375527426, + "grad_norm": 0.42006009817123413, + "learning_rate": 7.651498069978505e-05, + "loss": 1.4986, + "step": 8306 + }, + { + "epoch": 0.8762658227848101, + "grad_norm": 0.4598025977611542, + "learning_rate": 7.640885650105804e-05, + "loss": 1.4671, + "step": 8307 + }, + { + "epoch": 0.8763713080168777, + "grad_norm": 0.4459536373615265, + "learning_rate": 7.630287949371051e-05, + "loss": 1.5022, + "step": 8308 + }, + { + "epoch": 0.8764767932489451, + "grad_norm": 0.4235796630382538, + "learning_rate": 7.61970494735919e-05, + "loss": 1.4598, + "step": 8309 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.4373488128185272, + "learning_rate": 7.6091366236835e-05, + "loss": 1.4938, + "step": 8310 + }, + { + "epoch": 0.8766877637130802, + "grad_norm": 0.4334496557712555, + "learning_rate": 7.598582957985526e-05, + "loss": 1.5341, + "step": 8311 + }, + { + "epoch": 0.8767932489451477, + "grad_norm": 0.4387611448764801, + "learning_rate": 7.588043929935049e-05, + "loss": 1.5054, + "step": 8312 + }, + { + "epoch": 0.8768987341772152, + "grad_norm": 0.45723050832748413, + "learning_rate": 7.577519519230054e-05, + "loss": 1.5161, + "step": 8313 + }, + { + "epoch": 0.8770042194092827, + "grad_norm": 0.46606290340423584, + "learning_rate": 7.567009705596672e-05, + "loss": 1.4851, + "step": 8314 + }, + { + "epoch": 0.8771097046413502, + "grad_norm": 0.474518358707428, + "learning_rate": 7.556514468789169e-05, + "loss": 1.4996, + "step": 8315 + }, + { + "epoch": 0.8772151898734177, + "grad_norm": 0.4362832307815552, + "learning_rate": 7.546033788589884e-05, + "loss": 1.4971, + "step": 8316 + }, + { + "epoch": 0.8773206751054853, + "grad_norm": 0.45857396721839905, + "learning_rate": 7.53556764480919e-05, + "loss": 1.4835, + "step": 8317 + }, + { + "epoch": 0.8774261603375527, + "grad_norm": 0.39667078852653503, + "learning_rate": 7.525116017285479e-05, + "loss": 1.4921, + "step": 8318 + }, + { + "epoch": 0.8775316455696203, + "grad_norm": 0.418235719203949, + "learning_rate": 7.514678885885086e-05, + "loss": 1.491, + "step": 8319 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4538114368915558, + "learning_rate": 7.504256230502289e-05, + "loss": 1.4799, + "step": 8320 + }, + { + "epoch": 0.8777426160337553, + "grad_norm": 0.409012109041214, + "learning_rate": 7.493848031059248e-05, + "loss": 1.4632, + "step": 8321 + }, + { + "epoch": 0.8778481012658228, + "grad_norm": 0.45271778106689453, + "learning_rate": 7.483454267505959e-05, + "loss": 1.5076, + "step": 8322 + }, + { + "epoch": 0.8779535864978903, + "grad_norm": 0.4084085524082184, + "learning_rate": 7.473074919820243e-05, + "loss": 1.5085, + "step": 8323 + }, + { + "epoch": 0.8780590717299578, + "grad_norm": 0.4420888125896454, + "learning_rate": 7.462709968007676e-05, + "loss": 1.5149, + "step": 8324 + }, + { + "epoch": 0.8781645569620253, + "grad_norm": 0.4088989198207855, + "learning_rate": 7.452359392101578e-05, + "loss": 1.4962, + "step": 8325 + }, + { + "epoch": 0.8782700421940929, + "grad_norm": 0.43204084038734436, + "learning_rate": 7.442023172162959e-05, + "loss": 1.4997, + "step": 8326 + }, + { + "epoch": 0.8783755274261603, + "grad_norm": 0.4234655499458313, + "learning_rate": 7.431701288280477e-05, + "loss": 1.5129, + "step": 8327 + }, + { + "epoch": 0.8784810126582279, + "grad_norm": 0.4055323600769043, + "learning_rate": 7.421393720570416e-05, + "loss": 1.4775, + "step": 8328 + }, + { + "epoch": 0.8785864978902953, + "grad_norm": 0.4314819574356079, + "learning_rate": 7.411100449176634e-05, + "loss": 1.4602, + "step": 8329 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.43039757013320923, + "learning_rate": 7.400821454270524e-05, + "loss": 1.497, + "step": 8330 + }, + { + "epoch": 0.8787974683544304, + "grad_norm": 0.4349774420261383, + "learning_rate": 7.390556716050994e-05, + "loss": 1.5127, + "step": 8331 + }, + { + "epoch": 0.8789029535864978, + "grad_norm": 0.42883288860321045, + "learning_rate": 7.380306214744398e-05, + "loss": 1.5239, + "step": 8332 + }, + { + "epoch": 0.8790084388185654, + "grad_norm": 0.3942241370677948, + "learning_rate": 7.370069930604528e-05, + "loss": 1.4702, + "step": 8333 + }, + { + "epoch": 0.8791139240506329, + "grad_norm": 0.41544026136398315, + "learning_rate": 7.359847843912566e-05, + "loss": 1.4862, + "step": 8334 + }, + { + "epoch": 0.8792194092827004, + "grad_norm": 0.4057771861553192, + "learning_rate": 7.349639934977028e-05, + "loss": 1.5055, + "step": 8335 + }, + { + "epoch": 0.8793248945147679, + "grad_norm": 0.40864747762680054, + "learning_rate": 7.33944618413376e-05, + "loss": 1.4702, + "step": 8336 + }, + { + "epoch": 0.8794303797468355, + "grad_norm": 0.4636353552341461, + "learning_rate": 7.329266571745865e-05, + "loss": 1.4882, + "step": 8337 + }, + { + "epoch": 0.8795358649789029, + "grad_norm": 0.4824613928794861, + "learning_rate": 7.319101078203692e-05, + "loss": 1.4888, + "step": 8338 + }, + { + "epoch": 0.8796413502109705, + "grad_norm": 0.4449959993362427, + "learning_rate": 7.308949683924792e-05, + "loss": 1.4091, + "step": 8339 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.4504435658454895, + "learning_rate": 7.29881236935386e-05, + "loss": 1.4652, + "step": 8340 + }, + { + "epoch": 0.8798523206751054, + "grad_norm": 0.40106186270713806, + "learning_rate": 7.288689114962734e-05, + "loss": 1.4853, + "step": 8341 + }, + { + "epoch": 0.879957805907173, + "grad_norm": 0.41365739703178406, + "learning_rate": 7.278579901250316e-05, + "loss": 1.4698, + "step": 8342 + }, + { + "epoch": 0.8800632911392405, + "grad_norm": 0.40213650465011597, + "learning_rate": 7.268484708742574e-05, + "loss": 1.4445, + "step": 8343 + }, + { + "epoch": 0.880168776371308, + "grad_norm": 0.40378764271736145, + "learning_rate": 7.258403517992476e-05, + "loss": 1.4589, + "step": 8344 + }, + { + "epoch": 0.8802742616033755, + "grad_norm": 0.4202226400375366, + "learning_rate": 7.248336309579965e-05, + "loss": 1.4998, + "step": 8345 + }, + { + "epoch": 0.8803797468354431, + "grad_norm": 0.44546592235565186, + "learning_rate": 7.238283064111919e-05, + "loss": 1.4763, + "step": 8346 + }, + { + "epoch": 0.8804852320675105, + "grad_norm": 0.45787084102630615, + "learning_rate": 7.228243762222109e-05, + "loss": 1.5235, + "step": 8347 + }, + { + "epoch": 0.880590717299578, + "grad_norm": 0.46078041195869446, + "learning_rate": 7.218218384571176e-05, + "loss": 1.4947, + "step": 8348 + }, + { + "epoch": 0.8806962025316456, + "grad_norm": 0.41745516657829285, + "learning_rate": 7.208206911846581e-05, + "loss": 1.4762, + "step": 8349 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.46912646293640137, + "learning_rate": 7.198209324762562e-05, + "loss": 1.5113, + "step": 8350 + }, + { + "epoch": 0.8809071729957806, + "grad_norm": 0.41433611512184143, + "learning_rate": 7.188225604060121e-05, + "loss": 1.5035, + "step": 8351 + }, + { + "epoch": 0.8810126582278481, + "grad_norm": 0.42304790019989014, + "learning_rate": 7.178255730506956e-05, + "loss": 1.4617, + "step": 8352 + }, + { + "epoch": 0.8811181434599156, + "grad_norm": 0.41195395588874817, + "learning_rate": 7.16829968489745e-05, + "loss": 1.4362, + "step": 8353 + }, + { + "epoch": 0.8812236286919831, + "grad_norm": 0.4199661314487457, + "learning_rate": 7.158357448052624e-05, + "loss": 1.4761, + "step": 8354 + }, + { + "epoch": 0.8813291139240507, + "grad_norm": 0.4273452162742615, + "learning_rate": 7.148429000820093e-05, + "loss": 1.4561, + "step": 8355 + }, + { + "epoch": 0.8814345991561181, + "grad_norm": 0.415198415517807, + "learning_rate": 7.138514324074043e-05, + "loss": 1.4841, + "step": 8356 + }, + { + "epoch": 0.8815400843881857, + "grad_norm": 0.43732890486717224, + "learning_rate": 7.128613398715179e-05, + "loss": 1.5199, + "step": 8357 + }, + { + "epoch": 0.8816455696202532, + "grad_norm": 0.4501116871833801, + "learning_rate": 7.118726205670702e-05, + "loss": 1.4847, + "step": 8358 + }, + { + "epoch": 0.8817510548523206, + "grad_norm": 0.4412800669670105, + "learning_rate": 7.10885272589427e-05, + "loss": 1.5215, + "step": 8359 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.43446093797683716, + "learning_rate": 7.098992940365946e-05, + "loss": 1.4707, + "step": 8360 + }, + { + "epoch": 0.8819620253164557, + "grad_norm": 0.44392797350883484, + "learning_rate": 7.089146830092187e-05, + "loss": 1.4917, + "step": 8361 + }, + { + "epoch": 0.8820675105485232, + "grad_norm": 0.4270941913127899, + "learning_rate": 7.079314376105778e-05, + "loss": 1.5309, + "step": 8362 + }, + { + "epoch": 0.8821729957805907, + "grad_norm": 0.41732388734817505, + "learning_rate": 7.069495559465825e-05, + "loss": 1.4886, + "step": 8363 + }, + { + "epoch": 0.8822784810126583, + "grad_norm": 0.4092230200767517, + "learning_rate": 7.059690361257703e-05, + "loss": 1.5017, + "step": 8364 + }, + { + "epoch": 0.8823839662447257, + "grad_norm": 0.4176844656467438, + "learning_rate": 7.049898762593007e-05, + "loss": 1.5066, + "step": 8365 + }, + { + "epoch": 0.8824894514767933, + "grad_norm": 0.47050392627716064, + "learning_rate": 7.04012074460955e-05, + "loss": 1.4678, + "step": 8366 + }, + { + "epoch": 0.8825949367088608, + "grad_norm": 0.45269033312797546, + "learning_rate": 7.030356288471289e-05, + "loss": 1.4788, + "step": 8367 + }, + { + "epoch": 0.8827004219409282, + "grad_norm": 0.4728701412677765, + "learning_rate": 7.020605375368314e-05, + "loss": 1.49, + "step": 8368 + }, + { + "epoch": 0.8828059071729958, + "grad_norm": 0.4621485769748688, + "learning_rate": 7.010867986516811e-05, + "loss": 1.4744, + "step": 8369 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.4135444462299347, + "learning_rate": 7.001144103159e-05, + "loss": 1.4441, + "step": 8370 + }, + { + "epoch": 0.8830168776371308, + "grad_norm": 0.4157642424106598, + "learning_rate": 6.991433706563134e-05, + "loss": 1.4749, + "step": 8371 + }, + { + "epoch": 0.8831223628691983, + "grad_norm": 0.45952558517456055, + "learning_rate": 6.981736778023443e-05, + "loss": 1.5109, + "step": 8372 + }, + { + "epoch": 0.8832278481012659, + "grad_norm": 0.4747614562511444, + "learning_rate": 6.972053298860092e-05, + "loss": 1.4749, + "step": 8373 + }, + { + "epoch": 0.8833333333333333, + "grad_norm": 0.4265487790107727, + "learning_rate": 6.962383250419169e-05, + "loss": 1.4799, + "step": 8374 + }, + { + "epoch": 0.8834388185654009, + "grad_norm": 0.46043136715888977, + "learning_rate": 6.952726614072621e-05, + "loss": 1.5161, + "step": 8375 + }, + { + "epoch": 0.8835443037974684, + "grad_norm": 0.44654718041419983, + "learning_rate": 6.94308337121824e-05, + "loss": 1.5292, + "step": 8376 + }, + { + "epoch": 0.8836497890295358, + "grad_norm": 0.44159990549087524, + "learning_rate": 6.93345350327962e-05, + "loss": 1.4887, + "step": 8377 + }, + { + "epoch": 0.8837552742616034, + "grad_norm": 0.44495388865470886, + "learning_rate": 6.923836991706108e-05, + "loss": 1.4829, + "step": 8378 + }, + { + "epoch": 0.8838607594936709, + "grad_norm": 0.45411524176597595, + "learning_rate": 6.914233817972799e-05, + "loss": 1.476, + "step": 8379 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.4120056927204132, + "learning_rate": 6.904643963580461e-05, + "loss": 1.5208, + "step": 8380 + }, + { + "epoch": 0.8840717299578059, + "grad_norm": 0.43202728033065796, + "learning_rate": 6.895067410055536e-05, + "loss": 1.4677, + "step": 8381 + }, + { + "epoch": 0.8841772151898735, + "grad_norm": 0.4222124516963959, + "learning_rate": 6.885504138950084e-05, + "loss": 1.5084, + "step": 8382 + }, + { + "epoch": 0.8842827004219409, + "grad_norm": 0.48025941848754883, + "learning_rate": 6.875954131841743e-05, + "loss": 1.483, + "step": 8383 + }, + { + "epoch": 0.8843881856540085, + "grad_norm": 0.4524250328540802, + "learning_rate": 6.866417370333717e-05, + "loss": 1.4878, + "step": 8384 + }, + { + "epoch": 0.884493670886076, + "grad_norm": 0.41338518261909485, + "learning_rate": 6.856893836054713e-05, + "loss": 1.5055, + "step": 8385 + }, + { + "epoch": 0.8845991561181434, + "grad_norm": 0.42971885204315186, + "learning_rate": 6.847383510658925e-05, + "loss": 1.4931, + "step": 8386 + }, + { + "epoch": 0.884704641350211, + "grad_norm": 0.48985370993614197, + "learning_rate": 6.837886375825995e-05, + "loss": 1.4849, + "step": 8387 + }, + { + "epoch": 0.8848101265822785, + "grad_norm": 0.42879849672317505, + "learning_rate": 6.828402413260965e-05, + "loss": 1.4597, + "step": 8388 + }, + { + "epoch": 0.884915611814346, + "grad_norm": 0.39670246839523315, + "learning_rate": 6.818931604694264e-05, + "loss": 1.4705, + "step": 8389 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.4451974630355835, + "learning_rate": 6.809473931881644e-05, + "loss": 1.5007, + "step": 8390 + }, + { + "epoch": 0.8851265822784811, + "grad_norm": 0.4537569582462311, + "learning_rate": 6.800029376604179e-05, + "loss": 1.5034, + "step": 8391 + }, + { + "epoch": 0.8852320675105485, + "grad_norm": 0.4212094843387604, + "learning_rate": 6.790597920668206e-05, + "loss": 1.4657, + "step": 8392 + }, + { + "epoch": 0.885337552742616, + "grad_norm": 0.49739083647727966, + "learning_rate": 6.781179545905287e-05, + "loss": 1.4854, + "step": 8393 + }, + { + "epoch": 0.8854430379746835, + "grad_norm": 0.4821615517139435, + "learning_rate": 6.771774234172196e-05, + "loss": 1.4712, + "step": 8394 + }, + { + "epoch": 0.885548523206751, + "grad_norm": 0.42705926299095154, + "learning_rate": 6.762381967350862e-05, + "loss": 1.4806, + "step": 8395 + }, + { + "epoch": 0.8856540084388186, + "grad_norm": 0.4525908827781677, + "learning_rate": 6.753002727348348e-05, + "loss": 1.4928, + "step": 8396 + }, + { + "epoch": 0.885759493670886, + "grad_norm": 0.4396267235279083, + "learning_rate": 6.743636496096815e-05, + "loss": 1.4926, + "step": 8397 + }, + { + "epoch": 0.8858649789029536, + "grad_norm": 0.4494251310825348, + "learning_rate": 6.73428325555347e-05, + "loss": 1.4733, + "step": 8398 + }, + { + "epoch": 0.8859704641350211, + "grad_norm": 0.4896926283836365, + "learning_rate": 6.724942987700563e-05, + "loss": 1.5003, + "step": 8399 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.407871276140213, + "learning_rate": 6.71561567454532e-05, + "loss": 1.4802, + "step": 8400 + }, + { + "epoch": 0.8861814345991561, + "grad_norm": 0.4104156196117401, + "learning_rate": 6.706301298119924e-05, + "loss": 1.4989, + "step": 8401 + }, + { + "epoch": 0.8862869198312237, + "grad_norm": 0.4375029504299164, + "learning_rate": 6.696999840481492e-05, + "loss": 1.4952, + "step": 8402 + }, + { + "epoch": 0.8863924050632911, + "grad_norm": 0.41010478138923645, + "learning_rate": 6.687711283712008e-05, + "loss": 1.5152, + "step": 8403 + }, + { + "epoch": 0.8864978902953586, + "grad_norm": 0.49851664900779724, + "learning_rate": 6.678435609918325e-05, + "loss": 1.5076, + "step": 8404 + }, + { + "epoch": 0.8866033755274262, + "grad_norm": 0.532653272151947, + "learning_rate": 6.669172801232099e-05, + "loss": 1.5003, + "step": 8405 + }, + { + "epoch": 0.8867088607594936, + "grad_norm": 0.4978204667568207, + "learning_rate": 6.659922839809777e-05, + "loss": 1.5218, + "step": 8406 + }, + { + "epoch": 0.8868143459915612, + "grad_norm": 0.43870192766189575, + "learning_rate": 6.65068570783256e-05, + "loss": 1.5256, + "step": 8407 + }, + { + "epoch": 0.8869198312236287, + "grad_norm": 0.45217710733413696, + "learning_rate": 6.641461387506347e-05, + "loss": 1.4964, + "step": 8408 + }, + { + "epoch": 0.8870253164556962, + "grad_norm": 0.4979802668094635, + "learning_rate": 6.632249861061733e-05, + "loss": 1.5278, + "step": 8409 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.5015259385108948, + "learning_rate": 6.623051110753948e-05, + "loss": 1.4785, + "step": 8410 + }, + { + "epoch": 0.8872362869198313, + "grad_norm": 0.42942774295806885, + "learning_rate": 6.613865118862837e-05, + "loss": 1.5149, + "step": 8411 + }, + { + "epoch": 0.8873417721518987, + "grad_norm": 0.433816522359848, + "learning_rate": 6.604691867692828e-05, + "loss": 1.4773, + "step": 8412 + }, + { + "epoch": 0.8874472573839662, + "grad_norm": 0.43319758772850037, + "learning_rate": 6.595531339572882e-05, + "loss": 1.5179, + "step": 8413 + }, + { + "epoch": 0.8875527426160338, + "grad_norm": 0.4250449538230896, + "learning_rate": 6.586383516856473e-05, + "loss": 1.4993, + "step": 8414 + }, + { + "epoch": 0.8876582278481012, + "grad_norm": 0.4036279320716858, + "learning_rate": 6.57724838192156e-05, + "loss": 1.5048, + "step": 8415 + }, + { + "epoch": 0.8877637130801688, + "grad_norm": 0.4795280992984772, + "learning_rate": 6.568125917170527e-05, + "loss": 1.455, + "step": 8416 + }, + { + "epoch": 0.8878691983122363, + "grad_norm": 0.4198485314846039, + "learning_rate": 6.559016105030177e-05, + "loss": 1.4741, + "step": 8417 + }, + { + "epoch": 0.8879746835443038, + "grad_norm": 0.42640748620033264, + "learning_rate": 6.549918927951679e-05, + "loss": 1.4831, + "step": 8418 + }, + { + "epoch": 0.8880801687763713, + "grad_norm": 0.45151999592781067, + "learning_rate": 6.540834368410549e-05, + "loss": 1.4818, + "step": 8419 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.42480534315109253, + "learning_rate": 6.531762408906607e-05, + "loss": 1.4962, + "step": 8420 + }, + { + "epoch": 0.8882911392405063, + "grad_norm": 0.4287979006767273, + "learning_rate": 6.522703031963938e-05, + "loss": 1.4934, + "step": 8421 + }, + { + "epoch": 0.8883966244725738, + "grad_norm": 0.4689832627773285, + "learning_rate": 6.513656220130879e-05, + "loss": 1.4902, + "step": 8422 + }, + { + "epoch": 0.8885021097046414, + "grad_norm": 0.514316201210022, + "learning_rate": 6.504621955979958e-05, + "loss": 1.5132, + "step": 8423 + }, + { + "epoch": 0.8886075949367088, + "grad_norm": 0.47200873494148254, + "learning_rate": 6.495600222107884e-05, + "loss": 1.456, + "step": 8424 + }, + { + "epoch": 0.8887130801687764, + "grad_norm": 0.4109126329421997, + "learning_rate": 6.486591001135503e-05, + "loss": 1.4728, + "step": 8425 + }, + { + "epoch": 0.8888185654008439, + "grad_norm": 0.41009843349456787, + "learning_rate": 6.477594275707757e-05, + "loss": 1.4522, + "step": 8426 + }, + { + "epoch": 0.8889240506329114, + "grad_norm": 0.423771470785141, + "learning_rate": 6.468610028493671e-05, + "loss": 1.5272, + "step": 8427 + }, + { + "epoch": 0.8890295358649789, + "grad_norm": 0.46336105465888977, + "learning_rate": 6.459638242186298e-05, + "loss": 1.4836, + "step": 8428 + }, + { + "epoch": 0.8891350210970465, + "grad_norm": 0.4478496313095093, + "learning_rate": 6.4506788995027e-05, + "loss": 1.499, + "step": 8429 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.42656466364860535, + "learning_rate": 6.441731983183912e-05, + "loss": 1.4779, + "step": 8430 + }, + { + "epoch": 0.8893459915611814, + "grad_norm": 0.4402346909046173, + "learning_rate": 6.432797475994898e-05, + "loss": 1.4632, + "step": 8431 + }, + { + "epoch": 0.889451476793249, + "grad_norm": 0.41152986884117126, + "learning_rate": 6.42387536072454e-05, + "loss": 1.4622, + "step": 8432 + }, + { + "epoch": 0.8895569620253164, + "grad_norm": 0.4462791383266449, + "learning_rate": 6.414965620185575e-05, + "loss": 1.4554, + "step": 8433 + }, + { + "epoch": 0.889662447257384, + "grad_norm": 0.4228445291519165, + "learning_rate": 6.406068237214591e-05, + "loss": 1.4586, + "step": 8434 + }, + { + "epoch": 0.8897679324894515, + "grad_norm": 0.4302632510662079, + "learning_rate": 6.39718319467198e-05, + "loss": 1.5139, + "step": 8435 + }, + { + "epoch": 0.889873417721519, + "grad_norm": 0.4118238389492035, + "learning_rate": 6.388310475441898e-05, + "loss": 1.5033, + "step": 8436 + }, + { + "epoch": 0.8899789029535865, + "grad_norm": 0.4312010705471039, + "learning_rate": 6.379450062432251e-05, + "loss": 1.4702, + "step": 8437 + }, + { + "epoch": 0.890084388185654, + "grad_norm": 0.4147666394710541, + "learning_rate": 6.370601938574637e-05, + "loss": 1.5071, + "step": 8438 + }, + { + "epoch": 0.8901898734177215, + "grad_norm": 0.4334884583950043, + "learning_rate": 6.361766086824344e-05, + "loss": 1.4834, + "step": 8439 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.471153199672699, + "learning_rate": 6.352942490160292e-05, + "loss": 1.5024, + "step": 8440 + }, + { + "epoch": 0.8904008438818566, + "grad_norm": 0.43951278924942017, + "learning_rate": 6.344131131585007e-05, + "loss": 1.4916, + "step": 8441 + }, + { + "epoch": 0.890506329113924, + "grad_norm": 0.4353131055831909, + "learning_rate": 6.335331994124594e-05, + "loss": 1.4756, + "step": 8442 + }, + { + "epoch": 0.8906118143459916, + "grad_norm": 0.4283924102783203, + "learning_rate": 6.326545060828696e-05, + "loss": 1.4942, + "step": 8443 + }, + { + "epoch": 0.8907172995780591, + "grad_norm": 0.45377227663993835, + "learning_rate": 6.31777031477047e-05, + "loss": 1.4732, + "step": 8444 + }, + { + "epoch": 0.8908227848101266, + "grad_norm": 0.4111506938934326, + "learning_rate": 6.309007739046552e-05, + "loss": 1.4686, + "step": 8445 + }, + { + "epoch": 0.8909282700421941, + "grad_norm": 0.4356135129928589, + "learning_rate": 6.300257316777014e-05, + "loss": 1.4944, + "step": 8446 + }, + { + "epoch": 0.8910337552742617, + "grad_norm": 0.4620656371116638, + "learning_rate": 6.291519031105349e-05, + "loss": 1.5234, + "step": 8447 + }, + { + "epoch": 0.8911392405063291, + "grad_norm": 0.40962132811546326, + "learning_rate": 6.282792865198421e-05, + "loss": 1.5097, + "step": 8448 + }, + { + "epoch": 0.8912447257383966, + "grad_norm": 0.44323867559432983, + "learning_rate": 6.274078802246449e-05, + "loss": 1.5173, + "step": 8449 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.42913782596588135, + "learning_rate": 6.265376825462966e-05, + "loss": 1.4881, + "step": 8450 + }, + { + "epoch": 0.8914556962025316, + "grad_norm": 0.4396832585334778, + "learning_rate": 6.256686918084778e-05, + "loss": 1.4764, + "step": 8451 + }, + { + "epoch": 0.8915611814345992, + "grad_norm": 0.44532984495162964, + "learning_rate": 6.248009063371955e-05, + "loss": 1.4959, + "step": 8452 + }, + { + "epoch": 0.8916666666666667, + "grad_norm": 0.41534358263015747, + "learning_rate": 6.239343244607771e-05, + "loss": 1.4803, + "step": 8453 + }, + { + "epoch": 0.8917721518987342, + "grad_norm": 0.469448983669281, + "learning_rate": 6.230689445098696e-05, + "loss": 1.5099, + "step": 8454 + }, + { + "epoch": 0.8918776371308017, + "grad_norm": 0.44022998213768005, + "learning_rate": 6.222047648174353e-05, + "loss": 1.451, + "step": 8455 + }, + { + "epoch": 0.8919831223628693, + "grad_norm": 0.4374964237213135, + "learning_rate": 6.213417837187475e-05, + "loss": 1.5057, + "step": 8456 + }, + { + "epoch": 0.8920886075949367, + "grad_norm": 0.41490861773490906, + "learning_rate": 6.204799995513898e-05, + "loss": 1.4671, + "step": 8457 + }, + { + "epoch": 0.8921940928270042, + "grad_norm": 0.4097163677215576, + "learning_rate": 6.196194106552512e-05, + "loss": 1.4868, + "step": 8458 + }, + { + "epoch": 0.8922995780590718, + "grad_norm": 0.4366304874420166, + "learning_rate": 6.187600153725223e-05, + "loss": 1.4949, + "step": 8459 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.42559051513671875, + "learning_rate": 6.179018120476945e-05, + "loss": 1.5068, + "step": 8460 + }, + { + "epoch": 0.8925105485232068, + "grad_norm": 0.526786208152771, + "learning_rate": 6.17044799027554e-05, + "loss": 1.5065, + "step": 8461 + }, + { + "epoch": 0.8926160337552742, + "grad_norm": 0.41393354535102844, + "learning_rate": 6.161889746611807e-05, + "loss": 1.4841, + "step": 8462 + }, + { + "epoch": 0.8927215189873418, + "grad_norm": 0.4025609493255615, + "learning_rate": 6.153343372999445e-05, + "loss": 1.4799, + "step": 8463 + }, + { + "epoch": 0.8928270042194093, + "grad_norm": 0.4296261668205261, + "learning_rate": 6.14480885297501e-05, + "loss": 1.4796, + "step": 8464 + }, + { + "epoch": 0.8929324894514767, + "grad_norm": 0.3971109092235565, + "learning_rate": 6.1362861700979e-05, + "loss": 1.4973, + "step": 8465 + }, + { + "epoch": 0.8930379746835443, + "grad_norm": 0.44303449988365173, + "learning_rate": 6.127775307950314e-05, + "loss": 1.4709, + "step": 8466 + }, + { + "epoch": 0.8931434599156118, + "grad_norm": 0.44857221841812134, + "learning_rate": 6.119276250137219e-05, + "loss": 1.4841, + "step": 8467 + }, + { + "epoch": 0.8932489451476793, + "grad_norm": 0.4154420495033264, + "learning_rate": 6.110788980286329e-05, + "loss": 1.4679, + "step": 8468 + }, + { + "epoch": 0.8933544303797468, + "grad_norm": 0.447130411863327, + "learning_rate": 6.1023134820480546e-05, + "loss": 1.4959, + "step": 8469 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.416172593832016, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.5046, + "step": 8470 + }, + { + "epoch": 0.8935654008438818, + "grad_norm": 0.4664374887943268, + "learning_rate": 6.0853977351243815e-05, + "loss": 1.483, + "step": 8471 + }, + { + "epoch": 0.8936708860759494, + "grad_norm": 0.423372358083725, + "learning_rate": 6.0769574538530704e-05, + "loss": 1.4736, + "step": 8472 + }, + { + "epoch": 0.8937763713080169, + "grad_norm": 0.4114157259464264, + "learning_rate": 6.0685288790224975e-05, + "loss": 1.5107, + "step": 8473 + }, + { + "epoch": 0.8938818565400843, + "grad_norm": 0.4139949381351471, + "learning_rate": 6.0601119943961425e-05, + "loss": 1.4657, + "step": 8474 + }, + { + "epoch": 0.8939873417721519, + "grad_norm": 0.4176963269710541, + "learning_rate": 6.0517067837600144e-05, + "loss": 1.4962, + "step": 8475 + }, + { + "epoch": 0.8940928270042194, + "grad_norm": 0.39363715052604675, + "learning_rate": 6.0433132309226017e-05, + "loss": 1.4312, + "step": 8476 + }, + { + "epoch": 0.8941983122362869, + "grad_norm": 0.42532992362976074, + "learning_rate": 6.034931319714857e-05, + "loss": 1.4908, + "step": 8477 + }, + { + "epoch": 0.8943037974683544, + "grad_norm": 0.41083085536956787, + "learning_rate": 6.026561033990159e-05, + "loss": 1.4956, + "step": 8478 + }, + { + "epoch": 0.894409282700422, + "grad_norm": 0.4373190999031067, + "learning_rate": 6.0182023576242725e-05, + "loss": 1.4621, + "step": 8479 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.42408502101898193, + "learning_rate": 6.009855274515339e-05, + "loss": 1.4893, + "step": 8480 + }, + { + "epoch": 0.894620253164557, + "grad_norm": 0.42819565534591675, + "learning_rate": 6.001519768583819e-05, + "loss": 1.5027, + "step": 8481 + }, + { + "epoch": 0.8947257383966245, + "grad_norm": 0.40804967284202576, + "learning_rate": 5.993195823772487e-05, + "loss": 1.5159, + "step": 8482 + }, + { + "epoch": 0.8948312236286919, + "grad_norm": 0.41719532012939453, + "learning_rate": 5.9848834240463846e-05, + "loss": 1.4917, + "step": 8483 + }, + { + "epoch": 0.8949367088607595, + "grad_norm": 0.43063557147979736, + "learning_rate": 5.976582553392788e-05, + "loss": 1.4739, + "step": 8484 + }, + { + "epoch": 0.895042194092827, + "grad_norm": 0.4129980504512787, + "learning_rate": 5.968293195821191e-05, + "loss": 1.485, + "step": 8485 + }, + { + "epoch": 0.8951476793248945, + "grad_norm": 0.4312863051891327, + "learning_rate": 5.960015335363258e-05, + "loss": 1.4964, + "step": 8486 + }, + { + "epoch": 0.895253164556962, + "grad_norm": 0.44003424048423767, + "learning_rate": 5.9517489560728056e-05, + "loss": 1.5156, + "step": 8487 + }, + { + "epoch": 0.8953586497890296, + "grad_norm": 0.4447747766971588, + "learning_rate": 5.943494042025771e-05, + "loss": 1.4593, + "step": 8488 + }, + { + "epoch": 0.895464135021097, + "grad_norm": 0.4447772204875946, + "learning_rate": 5.9352505773201664e-05, + "loss": 1.4862, + "step": 8489 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.41685861349105835, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.4635, + "step": 8490 + }, + { + "epoch": 0.8956751054852321, + "grad_norm": 0.4261850416660309, + "learning_rate": 5.918797932435585e-05, + "loss": 1.4678, + "step": 8491 + }, + { + "epoch": 0.8957805907172995, + "grad_norm": 0.4361521303653717, + "learning_rate": 5.9105887205627985e-05, + "loss": 1.4565, + "step": 8492 + }, + { + "epoch": 0.8958860759493671, + "grad_norm": 0.48076456785202026, + "learning_rate": 5.9023908946437736e-05, + "loss": 1.4374, + "step": 8493 + }, + { + "epoch": 0.8959915611814346, + "grad_norm": 0.49267327785491943, + "learning_rate": 5.894204438886499e-05, + "loss": 1.4508, + "step": 8494 + }, + { + "epoch": 0.8960970464135021, + "grad_norm": 0.41653257608413696, + "learning_rate": 5.886029337520872e-05, + "loss": 1.542, + "step": 8495 + }, + { + "epoch": 0.8962025316455696, + "grad_norm": 0.424049437046051, + "learning_rate": 5.877865574798656e-05, + "loss": 1.4884, + "step": 8496 + }, + { + "epoch": 0.8963080168776372, + "grad_norm": 0.4379669725894928, + "learning_rate": 5.869713134993462e-05, + "loss": 1.4796, + "step": 8497 + }, + { + "epoch": 0.8964135021097046, + "grad_norm": 0.4263968765735626, + "learning_rate": 5.8615720024007174e-05, + "loss": 1.4667, + "step": 8498 + }, + { + "epoch": 0.8965189873417722, + "grad_norm": 0.44898489117622375, + "learning_rate": 5.8534421613376175e-05, + "loss": 1.4899, + "step": 8499 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.4628767669200897, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.4843, + "step": 8500 + }, + { + "epoch": 0.8967299578059071, + "grad_norm": 0.3954363465309143, + "learning_rate": 5.837216291177911e-05, + "loss": 1.496, + "step": 8501 + }, + { + "epoch": 0.8968354430379747, + "grad_norm": 0.4127616286277771, + "learning_rate": 5.829120230824344e-05, + "loss": 1.5091, + "step": 8502 + }, + { + "epoch": 0.8969409282700422, + "grad_norm": 0.42761513590812683, + "learning_rate": 5.821035399486458e-05, + "loss": 1.4671, + "step": 8503 + }, + { + "epoch": 0.8970464135021097, + "grad_norm": 0.4368777573108673, + "learning_rate": 5.8129617815899086e-05, + "loss": 1.4995, + "step": 8504 + }, + { + "epoch": 0.8971518987341772, + "grad_norm": 0.5013472437858582, + "learning_rate": 5.8048993615819584e-05, + "loss": 1.4686, + "step": 8505 + }, + { + "epoch": 0.8972573839662448, + "grad_norm": 0.43895700573921204, + "learning_rate": 5.7968481239314435e-05, + "loss": 1.4609, + "step": 8506 + }, + { + "epoch": 0.8973628691983122, + "grad_norm": 0.4145542085170746, + "learning_rate": 5.788808053128733e-05, + "loss": 1.4926, + "step": 8507 + }, + { + "epoch": 0.8974683544303798, + "grad_norm": 0.4039081931114197, + "learning_rate": 5.780779133685717e-05, + "loss": 1.5155, + "step": 8508 + }, + { + "epoch": 0.8975738396624473, + "grad_norm": 0.4022243022918701, + "learning_rate": 5.772761350135759e-05, + "loss": 1.505, + "step": 8509 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.4137919247150421, + "learning_rate": 5.764754687033678e-05, + "loss": 1.4678, + "step": 8510 + }, + { + "epoch": 0.8977848101265823, + "grad_norm": 0.42878103256225586, + "learning_rate": 5.756759128955722e-05, + "loss": 1.4717, + "step": 8511 + }, + { + "epoch": 0.8978902953586498, + "grad_norm": 0.4243728518486023, + "learning_rate": 5.748774660499514e-05, + "loss": 1.4851, + "step": 8512 + }, + { + "epoch": 0.8979957805907173, + "grad_norm": 0.43055763840675354, + "learning_rate": 5.740801266284059e-05, + "loss": 1.4735, + "step": 8513 + }, + { + "epoch": 0.8981012658227848, + "grad_norm": 0.41846656799316406, + "learning_rate": 5.732838930949679e-05, + "loss": 1.4505, + "step": 8514 + }, + { + "epoch": 0.8982067510548524, + "grad_norm": 0.44019097089767456, + "learning_rate": 5.724887639158008e-05, + "loss": 1.4933, + "step": 8515 + }, + { + "epoch": 0.8983122362869198, + "grad_norm": 0.43868181109428406, + "learning_rate": 5.716947375591959e-05, + "loss": 1.5146, + "step": 8516 + }, + { + "epoch": 0.8984177215189874, + "grad_norm": 0.42221599817276, + "learning_rate": 5.709018124955674e-05, + "loss": 1.4996, + "step": 8517 + }, + { + "epoch": 0.8985232067510549, + "grad_norm": 0.4055478572845459, + "learning_rate": 5.701099871974525e-05, + "loss": 1.4902, + "step": 8518 + }, + { + "epoch": 0.8986286919831223, + "grad_norm": 0.4379352629184723, + "learning_rate": 5.6931926013950586e-05, + "loss": 1.4605, + "step": 8519 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.4605368971824646, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.5155, + "step": 8520 + }, + { + "epoch": 0.8988396624472574, + "grad_norm": 0.4232887923717499, + "learning_rate": 5.677410946533138e-05, + "loss": 1.4908, + "step": 8521 + }, + { + "epoch": 0.8989451476793249, + "grad_norm": 0.42216604948043823, + "learning_rate": 5.6695365318494475e-05, + "loss": 1.5028, + "step": 8522 + }, + { + "epoch": 0.8990506329113924, + "grad_norm": 0.4300329089164734, + "learning_rate": 5.6616730387649173e-05, + "loss": 1.5008, + "step": 8523 + }, + { + "epoch": 0.89915611814346, + "grad_norm": 0.44915470480918884, + "learning_rate": 5.6538204521315804e-05, + "loss": 1.4754, + "step": 8524 + }, + { + "epoch": 0.8992616033755274, + "grad_norm": 0.4172717332839966, + "learning_rate": 5.6459787568224886e-05, + "loss": 1.5211, + "step": 8525 + }, + { + "epoch": 0.899367088607595, + "grad_norm": 0.4495176076889038, + "learning_rate": 5.6381479377316726e-05, + "loss": 1.5289, + "step": 8526 + }, + { + "epoch": 0.8994725738396624, + "grad_norm": 0.39566850662231445, + "learning_rate": 5.630327979774111e-05, + "loss": 1.4936, + "step": 8527 + }, + { + "epoch": 0.8995780590717299, + "grad_norm": 0.5286000967025757, + "learning_rate": 5.6225188678857095e-05, + "loss": 1.4968, + "step": 8528 + }, + { + "epoch": 0.8996835443037975, + "grad_norm": 0.4285947382450104, + "learning_rate": 5.61472058702326e-05, + "loss": 1.4689, + "step": 8529 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.42458072304725647, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.5222, + "step": 8530 + }, + { + "epoch": 0.8998945147679325, + "grad_norm": 0.44740793108940125, + "learning_rate": 5.599156458307712e-05, + "loss": 1.4906, + "step": 8531 + }, + { + "epoch": 0.9, + "grad_norm": 0.47773995995521545, + "learning_rate": 5.5913905804724106e-05, + "loss": 1.4882, + "step": 8532 + }, + { + "epoch": 0.9001054852320675, + "grad_norm": 0.4227980375289917, + "learning_rate": 5.58363547369861e-05, + "loss": 1.4687, + "step": 8533 + }, + { + "epoch": 0.900210970464135, + "grad_norm": 0.45700153708457947, + "learning_rate": 5.575891123047136e-05, + "loss": 1.5177, + "step": 8534 + }, + { + "epoch": 0.9003164556962026, + "grad_norm": 0.412743479013443, + "learning_rate": 5.568157513599542e-05, + "loss": 1.4807, + "step": 8535 + }, + { + "epoch": 0.90042194092827, + "grad_norm": 0.4132397174835205, + "learning_rate": 5.5604346304580727e-05, + "loss": 1.5148, + "step": 8536 + }, + { + "epoch": 0.9005274261603375, + "grad_norm": 0.4457518756389618, + "learning_rate": 5.552722458745626e-05, + "loss": 1.5337, + "step": 8537 + }, + { + "epoch": 0.9006329113924051, + "grad_norm": 0.4229176342487335, + "learning_rate": 5.545020983605749e-05, + "loss": 1.4872, + "step": 8538 + }, + { + "epoch": 0.9007383966244725, + "grad_norm": 0.4493631422519684, + "learning_rate": 5.53733019020258e-05, + "loss": 1.4789, + "step": 8539 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.4571084976196289, + "learning_rate": 5.529650063720842e-05, + "loss": 1.5118, + "step": 8540 + }, + { + "epoch": 0.9009493670886076, + "grad_norm": 0.39605796337127686, + "learning_rate": 5.52198058936581e-05, + "loss": 1.4886, + "step": 8541 + }, + { + "epoch": 0.9010548523206751, + "grad_norm": 0.4505995810031891, + "learning_rate": 5.5143217523632655e-05, + "loss": 1.4948, + "step": 8542 + }, + { + "epoch": 0.9011603375527426, + "grad_norm": 0.41481825709342957, + "learning_rate": 5.5066735379594944e-05, + "loss": 1.4733, + "step": 8543 + }, + { + "epoch": 0.9012658227848102, + "grad_norm": 0.4302605092525482, + "learning_rate": 5.4990359314212424e-05, + "loss": 1.514, + "step": 8544 + }, + { + "epoch": 0.9013713080168776, + "grad_norm": 0.42191892862319946, + "learning_rate": 5.491408918035683e-05, + "loss": 1.4478, + "step": 8545 + }, + { + "epoch": 0.9014767932489451, + "grad_norm": 0.451427698135376, + "learning_rate": 5.483792483110408e-05, + "loss": 1.4911, + "step": 8546 + }, + { + "epoch": 0.9015822784810127, + "grad_norm": 0.43870601058006287, + "learning_rate": 5.476186611973374e-05, + "loss": 1.4691, + "step": 8547 + }, + { + "epoch": 0.9016877637130801, + "grad_norm": 0.42102065682411194, + "learning_rate": 5.4685912899728965e-05, + "loss": 1.5076, + "step": 8548 + }, + { + "epoch": 0.9017932489451477, + "grad_norm": 0.4193611741065979, + "learning_rate": 5.4610065024776125e-05, + "loss": 1.4602, + "step": 8549 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.4292870759963989, + "learning_rate": 5.453432234876445e-05, + "loss": 1.5048, + "step": 8550 + }, + { + "epoch": 0.9020042194092827, + "grad_norm": 0.4297439157962799, + "learning_rate": 5.445868472578592e-05, + "loss": 1.4623, + "step": 8551 + }, + { + "epoch": 0.9021097046413502, + "grad_norm": 0.420657753944397, + "learning_rate": 5.438315201013476e-05, + "loss": 1.4664, + "step": 8552 + }, + { + "epoch": 0.9022151898734178, + "grad_norm": 0.43493518233299255, + "learning_rate": 5.430772405630742e-05, + "loss": 1.4871, + "step": 8553 + }, + { + "epoch": 0.9023206751054852, + "grad_norm": 0.42774251103401184, + "learning_rate": 5.423240071900209e-05, + "loss": 1.507, + "step": 8554 + }, + { + "epoch": 0.9024261603375527, + "grad_norm": 0.4181724190711975, + "learning_rate": 5.4157181853118464e-05, + "loss": 1.4872, + "step": 8555 + }, + { + "epoch": 0.9025316455696203, + "grad_norm": 0.4257463216781616, + "learning_rate": 5.408206731375757e-05, + "loss": 1.502, + "step": 8556 + }, + { + "epoch": 0.9026371308016877, + "grad_norm": 0.41938287019729614, + "learning_rate": 5.400705695622129e-05, + "loss": 1.4289, + "step": 8557 + }, + { + "epoch": 0.9027426160337553, + "grad_norm": 0.4359043836593628, + "learning_rate": 5.39321506360123e-05, + "loss": 1.4593, + "step": 8558 + }, + { + "epoch": 0.9028481012658228, + "grad_norm": 0.406623899936676, + "learning_rate": 5.38573482088337e-05, + "loss": 1.4932, + "step": 8559 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.430521696805954, + "learning_rate": 5.37826495305886e-05, + "loss": 1.4614, + "step": 8560 + }, + { + "epoch": 0.9030590717299578, + "grad_norm": 0.4151771664619446, + "learning_rate": 5.370805445738011e-05, + "loss": 1.49, + "step": 8561 + }, + { + "epoch": 0.9031645569620254, + "grad_norm": 0.42271867394447327, + "learning_rate": 5.3633562845510806e-05, + "loss": 1.5204, + "step": 8562 + }, + { + "epoch": 0.9032700421940928, + "grad_norm": 0.45540252327919006, + "learning_rate": 5.3559174551482656e-05, + "loss": 1.4847, + "step": 8563 + }, + { + "epoch": 0.9033755274261603, + "grad_norm": 0.4780273735523224, + "learning_rate": 5.3484889431996646e-05, + "loss": 1.4652, + "step": 8564 + }, + { + "epoch": 0.9034810126582279, + "grad_norm": 0.4422934353351593, + "learning_rate": 5.341070734395244e-05, + "loss": 1.4931, + "step": 8565 + }, + { + "epoch": 0.9035864978902953, + "grad_norm": 0.43156108260154724, + "learning_rate": 5.3336628144448266e-05, + "loss": 1.5259, + "step": 8566 + }, + { + "epoch": 0.9036919831223629, + "grad_norm": 0.4315359890460968, + "learning_rate": 5.326265169078048e-05, + "loss": 1.498, + "step": 8567 + }, + { + "epoch": 0.9037974683544304, + "grad_norm": 0.40850797295570374, + "learning_rate": 5.318877784044342e-05, + "loss": 1.4639, + "step": 8568 + }, + { + "epoch": 0.9039029535864979, + "grad_norm": 0.42627614736557007, + "learning_rate": 5.3115006451129075e-05, + "loss": 1.4955, + "step": 8569 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.45638468861579895, + "learning_rate": 5.304133738072674e-05, + "loss": 1.5209, + "step": 8570 + }, + { + "epoch": 0.904113924050633, + "grad_norm": 0.44763416051864624, + "learning_rate": 5.296777048732293e-05, + "loss": 1.4884, + "step": 8571 + }, + { + "epoch": 0.9042194092827004, + "grad_norm": 0.42213135957717896, + "learning_rate": 5.289430562920086e-05, + "loss": 1.4612, + "step": 8572 + }, + { + "epoch": 0.9043248945147679, + "grad_norm": 0.41777390241622925, + "learning_rate": 5.2820942664840405e-05, + "loss": 1.4913, + "step": 8573 + }, + { + "epoch": 0.9044303797468355, + "grad_norm": 0.4218050241470337, + "learning_rate": 5.2747681452917697e-05, + "loss": 1.5012, + "step": 8574 + }, + { + "epoch": 0.9045358649789029, + "grad_norm": 0.4458419978618622, + "learning_rate": 5.267452185230482e-05, + "loss": 1.4912, + "step": 8575 + }, + { + "epoch": 0.9046413502109705, + "grad_norm": 0.48722466826438904, + "learning_rate": 5.260146372206972e-05, + "loss": 1.4705, + "step": 8576 + }, + { + "epoch": 0.904746835443038, + "grad_norm": 0.41805747151374817, + "learning_rate": 5.2528506921475664e-05, + "loss": 1.51, + "step": 8577 + }, + { + "epoch": 0.9048523206751055, + "grad_norm": 0.43927815556526184, + "learning_rate": 5.245565130998124e-05, + "loss": 1.4641, + "step": 8578 + }, + { + "epoch": 0.904957805907173, + "grad_norm": 0.4201597273349762, + "learning_rate": 5.2382896747239935e-05, + "loss": 1.5162, + "step": 8579 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.4533957242965698, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.4939, + "step": 8580 + }, + { + "epoch": 0.905168776371308, + "grad_norm": 0.4219772219657898, + "learning_rate": 5.223769020760346e-05, + "loss": 1.4705, + "step": 8581 + }, + { + "epoch": 0.9052742616033755, + "grad_norm": 0.42240914702415466, + "learning_rate": 5.216523795098743e-05, + "loss": 1.5021, + "step": 8582 + }, + { + "epoch": 0.9053797468354431, + "grad_norm": 0.45295533537864685, + "learning_rate": 5.209288618368225e-05, + "loss": 1.4939, + "step": 8583 + }, + { + "epoch": 0.9054852320675105, + "grad_norm": 0.3956073522567749, + "learning_rate": 5.202063476631199e-05, + "loss": 1.4733, + "step": 8584 + }, + { + "epoch": 0.9055907172995781, + "grad_norm": 0.42122554779052734, + "learning_rate": 5.194848355969396e-05, + "loss": 1.4857, + "step": 8585 + }, + { + "epoch": 0.9056962025316456, + "grad_norm": 0.4341195821762085, + "learning_rate": 5.18764324248386e-05, + "loss": 1.4678, + "step": 8586 + }, + { + "epoch": 0.9058016877637131, + "grad_norm": 0.46259692311286926, + "learning_rate": 5.180448122294913e-05, + "loss": 1.496, + "step": 8587 + }, + { + "epoch": 0.9059071729957806, + "grad_norm": 0.4379418194293976, + "learning_rate": 5.173262981542119e-05, + "loss": 1.4672, + "step": 8588 + }, + { + "epoch": 0.9060126582278482, + "grad_norm": 0.4257470965385437, + "learning_rate": 5.166087806384275e-05, + "loss": 1.5093, + "step": 8589 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.44887909293174744, + "learning_rate": 5.158922582999368e-05, + "loss": 1.526, + "step": 8590 + }, + { + "epoch": 0.9062236286919831, + "grad_norm": 0.42257651686668396, + "learning_rate": 5.1517672975845604e-05, + "loss": 1.4833, + "step": 8591 + }, + { + "epoch": 0.9063291139240506, + "grad_norm": 0.4242796301841736, + "learning_rate": 5.144621936356162e-05, + "loss": 1.486, + "step": 8592 + }, + { + "epoch": 0.9064345991561181, + "grad_norm": 0.4501091539859772, + "learning_rate": 5.1374864855495894e-05, + "loss": 1.5073, + "step": 8593 + }, + { + "epoch": 0.9065400843881857, + "grad_norm": 0.4101604223251343, + "learning_rate": 5.130360931419364e-05, + "loss": 1.4529, + "step": 8594 + }, + { + "epoch": 0.9066455696202531, + "grad_norm": 0.4798033833503723, + "learning_rate": 5.123245260239058e-05, + "loss": 1.5441, + "step": 8595 + }, + { + "epoch": 0.9067510548523207, + "grad_norm": 0.46724724769592285, + "learning_rate": 5.1161394583012904e-05, + "loss": 1.4261, + "step": 8596 + }, + { + "epoch": 0.9068565400843882, + "grad_norm": 0.404538094997406, + "learning_rate": 5.109043511917694e-05, + "loss": 1.4516, + "step": 8597 + }, + { + "epoch": 0.9069620253164556, + "grad_norm": 0.46111708879470825, + "learning_rate": 5.101957407418877e-05, + "loss": 1.5065, + "step": 8598 + }, + { + "epoch": 0.9070675105485232, + "grad_norm": 0.43945568799972534, + "learning_rate": 5.0948811311544186e-05, + "loss": 1.4739, + "step": 8599 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.45501163601875305, + "learning_rate": 5.087814669492819e-05, + "loss": 1.507, + "step": 8600 + }, + { + "epoch": 0.9072784810126582, + "grad_norm": 0.4359736442565918, + "learning_rate": 5.080758008821494e-05, + "loss": 1.5128, + "step": 8601 + }, + { + "epoch": 0.9073839662447257, + "grad_norm": 0.42234253883361816, + "learning_rate": 5.073711135546738e-05, + "loss": 1.4824, + "step": 8602 + }, + { + "epoch": 0.9074894514767933, + "grad_norm": 0.41872018575668335, + "learning_rate": 5.0666740360936944e-05, + "loss": 1.4724, + "step": 8603 + }, + { + "epoch": 0.9075949367088607, + "grad_norm": 0.4337361454963684, + "learning_rate": 5.0596466969063415e-05, + "loss": 1.5038, + "step": 8604 + }, + { + "epoch": 0.9077004219409283, + "grad_norm": 0.4256981611251831, + "learning_rate": 5.052629104447452e-05, + "loss": 1.4745, + "step": 8605 + }, + { + "epoch": 0.9078059071729958, + "grad_norm": 0.4462968707084656, + "learning_rate": 5.0456212451985806e-05, + "loss": 1.4767, + "step": 8606 + }, + { + "epoch": 0.9079113924050632, + "grad_norm": 0.45027652382850647, + "learning_rate": 5.038623105660033e-05, + "loss": 1.4734, + "step": 8607 + }, + { + "epoch": 0.9080168776371308, + "grad_norm": 0.47832170128822327, + "learning_rate": 5.0316346723508287e-05, + "loss": 1.4719, + "step": 8608 + }, + { + "epoch": 0.9081223628691983, + "grad_norm": 0.4299798905849457, + "learning_rate": 5.024655931808697e-05, + "loss": 1.5033, + "step": 8609 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.4214524030685425, + "learning_rate": 5.017686870590028e-05, + "loss": 1.4771, + "step": 8610 + }, + { + "epoch": 0.9083333333333333, + "grad_norm": 0.4417315125465393, + "learning_rate": 5.010727475269867e-05, + "loss": 1.4945, + "step": 8611 + }, + { + "epoch": 0.9084388185654009, + "grad_norm": 0.39711448550224304, + "learning_rate": 5.0037777324418756e-05, + "loss": 1.4856, + "step": 8612 + }, + { + "epoch": 0.9085443037974683, + "grad_norm": 0.44426894187927246, + "learning_rate": 4.9968376287183074e-05, + "loss": 1.4691, + "step": 8613 + }, + { + "epoch": 0.9086497890295359, + "grad_norm": 0.43709734082221985, + "learning_rate": 4.989907150729989e-05, + "loss": 1.5143, + "step": 8614 + }, + { + "epoch": 0.9087552742616034, + "grad_norm": 0.44660085439682007, + "learning_rate": 4.9829862851262845e-05, + "loss": 1.4331, + "step": 8615 + }, + { + "epoch": 0.9088607594936708, + "grad_norm": 0.4466531574726105, + "learning_rate": 4.976075018575077e-05, + "loss": 1.5082, + "step": 8616 + }, + { + "epoch": 0.9089662447257384, + "grad_norm": 0.40991637110710144, + "learning_rate": 4.9691733377627475e-05, + "loss": 1.4561, + "step": 8617 + }, + { + "epoch": 0.9090717299578059, + "grad_norm": 0.4238305389881134, + "learning_rate": 4.962281229394129e-05, + "loss": 1.5013, + "step": 8618 + }, + { + "epoch": 0.9091772151898734, + "grad_norm": 0.45339417457580566, + "learning_rate": 4.955398680192509e-05, + "loss": 1.4504, + "step": 8619 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.42653295397758484, + "learning_rate": 4.948525676899577e-05, + "loss": 1.5011, + "step": 8620 + }, + { + "epoch": 0.9093881856540085, + "grad_norm": 0.42224910855293274, + "learning_rate": 4.9416622062754195e-05, + "loss": 1.4931, + "step": 8621 + }, + { + "epoch": 0.9094936708860759, + "grad_norm": 0.45977023243904114, + "learning_rate": 4.934808255098487e-05, + "loss": 1.4436, + "step": 8622 + }, + { + "epoch": 0.9095991561181435, + "grad_norm": 0.4222732484340668, + "learning_rate": 4.92796381016556e-05, + "loss": 1.4895, + "step": 8623 + }, + { + "epoch": 0.909704641350211, + "grad_norm": 0.39933010935783386, + "learning_rate": 4.9211288582917396e-05, + "loss": 1.4771, + "step": 8624 + }, + { + "epoch": 0.9098101265822784, + "grad_norm": 0.41332337260246277, + "learning_rate": 4.9143033863104094e-05, + "loss": 1.5038, + "step": 8625 + }, + { + "epoch": 0.909915611814346, + "grad_norm": 0.416359543800354, + "learning_rate": 4.907487381073214e-05, + "loss": 1.5047, + "step": 8626 + }, + { + "epoch": 0.9100210970464135, + "grad_norm": 0.43971553444862366, + "learning_rate": 4.900680829450043e-05, + "loss": 1.4957, + "step": 8627 + }, + { + "epoch": 0.910126582278481, + "grad_norm": 0.45200228691101074, + "learning_rate": 4.893883718328984e-05, + "loss": 1.446, + "step": 8628 + }, + { + "epoch": 0.9102320675105485, + "grad_norm": 0.4479728639125824, + "learning_rate": 4.887096034616319e-05, + "loss": 1.5115, + "step": 8629 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.41752520203590393, + "learning_rate": 4.880317765236493e-05, + "loss": 1.5308, + "step": 8630 + }, + { + "epoch": 0.9104430379746835, + "grad_norm": 0.4065489172935486, + "learning_rate": 4.873548897132076e-05, + "loss": 1.5043, + "step": 8631 + }, + { + "epoch": 0.9105485232067511, + "grad_norm": 0.4325360357761383, + "learning_rate": 4.8667894172637606e-05, + "loss": 1.4526, + "step": 8632 + }, + { + "epoch": 0.9106540084388186, + "grad_norm": 0.42594823241233826, + "learning_rate": 4.860039312610312e-05, + "loss": 1.4496, + "step": 8633 + }, + { + "epoch": 0.910759493670886, + "grad_norm": 0.4176139831542969, + "learning_rate": 4.8532985701685654e-05, + "loss": 1.4972, + "step": 8634 + }, + { + "epoch": 0.9108649789029536, + "grad_norm": 0.4402667284011841, + "learning_rate": 4.846567176953389e-05, + "loss": 1.4864, + "step": 8635 + }, + { + "epoch": 0.9109704641350211, + "grad_norm": 0.4351549446582794, + "learning_rate": 4.839845119997657e-05, + "loss": 1.4483, + "step": 8636 + }, + { + "epoch": 0.9110759493670886, + "grad_norm": 0.45790067315101624, + "learning_rate": 4.833132386352234e-05, + "loss": 1.5047, + "step": 8637 + }, + { + "epoch": 0.9111814345991561, + "grad_norm": 0.4096372425556183, + "learning_rate": 4.8264289630859386e-05, + "loss": 1.4668, + "step": 8638 + }, + { + "epoch": 0.9112869198312237, + "grad_norm": 0.41209712624549866, + "learning_rate": 4.819734837285529e-05, + "loss": 1.4655, + "step": 8639 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.5742860436439514, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.4536, + "step": 8640 + }, + { + "epoch": 0.9114978902953587, + "grad_norm": 0.4653583765029907, + "learning_rate": 4.806374426518927e-05, + "loss": 1.465, + "step": 8641 + }, + { + "epoch": 0.9116033755274262, + "grad_norm": 0.4464600682258606, + "learning_rate": 4.799708115815702e-05, + "loss": 1.4845, + "step": 8642 + }, + { + "epoch": 0.9117088607594936, + "grad_norm": 0.43407562375068665, + "learning_rate": 4.793051051104244e-05, + "loss": 1.4728, + "step": 8643 + }, + { + "epoch": 0.9118143459915612, + "grad_norm": 0.41169705986976624, + "learning_rate": 4.786403219560617e-05, + "loss": 1.4592, + "step": 8644 + }, + { + "epoch": 0.9119198312236287, + "grad_norm": 0.4779900312423706, + "learning_rate": 4.779764608378671e-05, + "loss": 1.4238, + "step": 8645 + }, + { + "epoch": 0.9120253164556962, + "grad_norm": 0.4812563955783844, + "learning_rate": 4.7731352047700095e-05, + "loss": 1.4934, + "step": 8646 + }, + { + "epoch": 0.9121308016877637, + "grad_norm": 0.43489304184913635, + "learning_rate": 4.7665149959639824e-05, + "loss": 1.4639, + "step": 8647 + }, + { + "epoch": 0.9122362869198313, + "grad_norm": 0.4822406470775604, + "learning_rate": 4.759903969207646e-05, + "loss": 1.5363, + "step": 8648 + }, + { + "epoch": 0.9123417721518987, + "grad_norm": 0.40874624252319336, + "learning_rate": 4.7533021117657475e-05, + "loss": 1.4827, + "step": 8649 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.41634196043014526, + "learning_rate": 4.746709410920699e-05, + "loss": 1.4977, + "step": 8650 + }, + { + "epoch": 0.9125527426160338, + "grad_norm": 0.42765235900878906, + "learning_rate": 4.740125853972546e-05, + "loss": 1.4933, + "step": 8651 + }, + { + "epoch": 0.9126582278481012, + "grad_norm": 0.4047209620475769, + "learning_rate": 4.733551428238957e-05, + "loss": 1.5106, + "step": 8652 + }, + { + "epoch": 0.9127637130801688, + "grad_norm": 0.4538222551345825, + "learning_rate": 4.726986121055179e-05, + "loss": 1.486, + "step": 8653 + }, + { + "epoch": 0.9128691983122363, + "grad_norm": 0.42175963521003723, + "learning_rate": 4.720429919774036e-05, + "loss": 1.5079, + "step": 8654 + }, + { + "epoch": 0.9129746835443038, + "grad_norm": 0.4042593538761139, + "learning_rate": 4.713882811765889e-05, + "loss": 1.4582, + "step": 8655 + }, + { + "epoch": 0.9130801687763713, + "grad_norm": 0.429984450340271, + "learning_rate": 4.7073447844186114e-05, + "loss": 1.4635, + "step": 8656 + }, + { + "epoch": 0.9131856540084389, + "grad_norm": 0.42478910088539124, + "learning_rate": 4.700815825137578e-05, + "loss": 1.4412, + "step": 8657 + }, + { + "epoch": 0.9132911392405063, + "grad_norm": 0.4158935844898224, + "learning_rate": 4.694295921345623e-05, + "loss": 1.4753, + "step": 8658 + }, + { + "epoch": 0.9133966244725739, + "grad_norm": 0.43298426270484924, + "learning_rate": 4.687785060483031e-05, + "loss": 1.4619, + "step": 8659 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5064635276794434, + "learning_rate": 4.681283230007507e-05, + "loss": 1.5002, + "step": 8660 + }, + { + "epoch": 0.9136075949367088, + "grad_norm": 0.42793628573417664, + "learning_rate": 4.674790417394145e-05, + "loss": 1.4471, + "step": 8661 + }, + { + "epoch": 0.9137130801687764, + "grad_norm": 0.43282264471054077, + "learning_rate": 4.6683066101354215e-05, + "loss": 1.4773, + "step": 8662 + }, + { + "epoch": 0.9138185654008438, + "grad_norm": 0.42674756050109863, + "learning_rate": 4.661831795741148e-05, + "loss": 1.5025, + "step": 8663 + }, + { + "epoch": 0.9139240506329114, + "grad_norm": 0.42112410068511963, + "learning_rate": 4.655365961738467e-05, + "loss": 1.504, + "step": 8664 + }, + { + "epoch": 0.9140295358649789, + "grad_norm": 0.4230802357196808, + "learning_rate": 4.648909095671825e-05, + "loss": 1.4939, + "step": 8665 + }, + { + "epoch": 0.9141350210970464, + "grad_norm": 0.4045351445674896, + "learning_rate": 4.6424611851029316e-05, + "loss": 1.4667, + "step": 8666 + }, + { + "epoch": 0.9142405063291139, + "grad_norm": 0.4341583847999573, + "learning_rate": 4.63602221761076e-05, + "loss": 1.4771, + "step": 8667 + }, + { + "epoch": 0.9143459915611815, + "grad_norm": 0.4149828255176544, + "learning_rate": 4.629592180791501e-05, + "loss": 1.4638, + "step": 8668 + }, + { + "epoch": 0.9144514767932489, + "grad_norm": 0.4330116808414459, + "learning_rate": 4.623171062258557e-05, + "loss": 1.4888, + "step": 8669 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.4128780961036682, + "learning_rate": 4.616758849642509e-05, + "loss": 1.5071, + "step": 8670 + }, + { + "epoch": 0.914662447257384, + "grad_norm": 0.4493173360824585, + "learning_rate": 4.610355530591087e-05, + "loss": 1.5305, + "step": 8671 + }, + { + "epoch": 0.9147679324894514, + "grad_norm": 0.4235368072986603, + "learning_rate": 4.6039610927691646e-05, + "loss": 1.5272, + "step": 8672 + }, + { + "epoch": 0.914873417721519, + "grad_norm": 0.38943812251091003, + "learning_rate": 4.597575523858712e-05, + "loss": 1.4731, + "step": 8673 + }, + { + "epoch": 0.9149789029535865, + "grad_norm": 0.38808494806289673, + "learning_rate": 4.5911988115587936e-05, + "loss": 1.4741, + "step": 8674 + }, + { + "epoch": 0.915084388185654, + "grad_norm": 0.4508820176124573, + "learning_rate": 4.584830943585533e-05, + "loss": 1.4989, + "step": 8675 + }, + { + "epoch": 0.9151898734177215, + "grad_norm": 0.42681100964546204, + "learning_rate": 4.5784719076720844e-05, + "loss": 1.4887, + "step": 8676 + }, + { + "epoch": 0.9152953586497891, + "grad_norm": 0.46018165349960327, + "learning_rate": 4.572121691568624e-05, + "loss": 1.475, + "step": 8677 + }, + { + "epoch": 0.9154008438818565, + "grad_norm": 0.38980650901794434, + "learning_rate": 4.565780283042316e-05, + "loss": 1.5026, + "step": 8678 + }, + { + "epoch": 0.915506329113924, + "grad_norm": 0.4348743259906769, + "learning_rate": 4.559447669877288e-05, + "loss": 1.5131, + "step": 8679 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.4234529435634613, + "learning_rate": 4.553123839874615e-05, + "loss": 1.4619, + "step": 8680 + }, + { + "epoch": 0.915717299578059, + "grad_norm": 0.49747994542121887, + "learning_rate": 4.546808780852286e-05, + "loss": 1.525, + "step": 8681 + }, + { + "epoch": 0.9158227848101266, + "grad_norm": 0.4310455322265625, + "learning_rate": 4.5405024806451926e-05, + "loss": 1.4902, + "step": 8682 + }, + { + "epoch": 0.9159282700421941, + "grad_norm": 0.45674219727516174, + "learning_rate": 4.534204927105098e-05, + "loss": 1.4646, + "step": 8683 + }, + { + "epoch": 0.9160337552742616, + "grad_norm": 0.4182247817516327, + "learning_rate": 4.5279161081006076e-05, + "loss": 1.4861, + "step": 8684 + }, + { + "epoch": 0.9161392405063291, + "grad_norm": 0.44030308723449707, + "learning_rate": 4.521636011517162e-05, + "loss": 1.4806, + "step": 8685 + }, + { + "epoch": 0.9162447257383967, + "grad_norm": 0.4233447313308716, + "learning_rate": 4.515364625256998e-05, + "loss": 1.4938, + "step": 8686 + }, + { + "epoch": 0.9163502109704641, + "grad_norm": 0.4525858461856842, + "learning_rate": 4.5091019372391345e-05, + "loss": 1.4813, + "step": 8687 + }, + { + "epoch": 0.9164556962025316, + "grad_norm": 0.4176623225212097, + "learning_rate": 4.502847935399348e-05, + "loss": 1.4965, + "step": 8688 + }, + { + "epoch": 0.9165611814345992, + "grad_norm": 0.4262511134147644, + "learning_rate": 4.496602607690141e-05, + "loss": 1.5199, + "step": 8689 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.4176427721977234, + "learning_rate": 4.490365942080736e-05, + "loss": 1.4616, + "step": 8690 + }, + { + "epoch": 0.9167721518987342, + "grad_norm": 0.4170861542224884, + "learning_rate": 4.48413792655703e-05, + "loss": 1.5228, + "step": 8691 + }, + { + "epoch": 0.9168776371308017, + "grad_norm": 0.42994213104248047, + "learning_rate": 4.4779185491215926e-05, + "loss": 1.4898, + "step": 8692 + }, + { + "epoch": 0.9169831223628692, + "grad_norm": 0.42551156878471375, + "learning_rate": 4.471707797793631e-05, + "loss": 1.4855, + "step": 8693 + }, + { + "epoch": 0.9170886075949367, + "grad_norm": 0.41395485401153564, + "learning_rate": 4.465505660608965e-05, + "loss": 1.5009, + "step": 8694 + }, + { + "epoch": 0.9171940928270043, + "grad_norm": 0.4158569574356079, + "learning_rate": 4.459312125620017e-05, + "loss": 1.4634, + "step": 8695 + }, + { + "epoch": 0.9172995780590717, + "grad_norm": 0.4420301616191864, + "learning_rate": 4.4531271808957704e-05, + "loss": 1.4883, + "step": 8696 + }, + { + "epoch": 0.9174050632911392, + "grad_norm": 0.44408783316612244, + "learning_rate": 4.4469508145217626e-05, + "loss": 1.473, + "step": 8697 + }, + { + "epoch": 0.9175105485232068, + "grad_norm": 0.44045335054397583, + "learning_rate": 4.440783014600059e-05, + "loss": 1.4707, + "step": 8698 + }, + { + "epoch": 0.9176160337552742, + "grad_norm": 0.4094163477420807, + "learning_rate": 4.434623769249217e-05, + "loss": 1.5064, + "step": 8699 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.4268554151058197, + "learning_rate": 4.428473066604285e-05, + "loss": 1.4774, + "step": 8700 + }, + { + "epoch": 0.9178270042194093, + "grad_norm": 0.45362943410873413, + "learning_rate": 4.422330894816757e-05, + "loss": 1.4788, + "step": 8701 + }, + { + "epoch": 0.9179324894514768, + "grad_norm": 0.4464951753616333, + "learning_rate": 4.4161972420545684e-05, + "loss": 1.4517, + "step": 8702 + }, + { + "epoch": 0.9180379746835443, + "grad_norm": 0.44934725761413574, + "learning_rate": 4.410072096502064e-05, + "loss": 1.5036, + "step": 8703 + }, + { + "epoch": 0.9181434599156119, + "grad_norm": 0.4298765957355499, + "learning_rate": 4.403955446359971e-05, + "loss": 1.4768, + "step": 8704 + }, + { + "epoch": 0.9182489451476793, + "grad_norm": 0.42397987842559814, + "learning_rate": 4.397847279845391e-05, + "loss": 1.5047, + "step": 8705 + }, + { + "epoch": 0.9183544303797468, + "grad_norm": 0.4378174841403961, + "learning_rate": 4.391747585191758e-05, + "loss": 1.4753, + "step": 8706 + }, + { + "epoch": 0.9184599156118144, + "grad_norm": 0.4254377782344818, + "learning_rate": 4.385656350648834e-05, + "loss": 1.494, + "step": 8707 + }, + { + "epoch": 0.9185654008438818, + "grad_norm": 0.40909865498542786, + "learning_rate": 4.3795735644826776e-05, + "loss": 1.4708, + "step": 8708 + }, + { + "epoch": 0.9186708860759494, + "grad_norm": 0.41189640760421753, + "learning_rate": 4.373499214975615e-05, + "loss": 1.4893, + "step": 8709 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.4533466696739197, + "learning_rate": 4.367433290426233e-05, + "loss": 1.5144, + "step": 8710 + }, + { + "epoch": 0.9188818565400844, + "grad_norm": 0.49745699763298035, + "learning_rate": 4.361375779149342e-05, + "loss": 1.4996, + "step": 8711 + }, + { + "epoch": 0.9189873417721519, + "grad_norm": 0.44249609112739563, + "learning_rate": 4.3553266694759614e-05, + "loss": 1.4717, + "step": 8712 + }, + { + "epoch": 0.9190928270042195, + "grad_norm": 0.4108584523200989, + "learning_rate": 4.3492859497533e-05, + "loss": 1.4927, + "step": 8713 + }, + { + "epoch": 0.9191983122362869, + "grad_norm": 0.4718115031719208, + "learning_rate": 4.343253608344718e-05, + "loss": 1.4801, + "step": 8714 + }, + { + "epoch": 0.9193037974683544, + "grad_norm": 0.42481932044029236, + "learning_rate": 4.337229633629727e-05, + "loss": 1.4924, + "step": 8715 + }, + { + "epoch": 0.919409282700422, + "grad_norm": 0.45272520184516907, + "learning_rate": 4.3312140140039447e-05, + "loss": 1.4589, + "step": 8716 + }, + { + "epoch": 0.9195147679324894, + "grad_norm": 0.4103776514530182, + "learning_rate": 4.3252067378790934e-05, + "loss": 1.4301, + "step": 8717 + }, + { + "epoch": 0.919620253164557, + "grad_norm": 0.4505520761013031, + "learning_rate": 4.319207793682965e-05, + "loss": 1.5018, + "step": 8718 + }, + { + "epoch": 0.9197257383966245, + "grad_norm": 0.4533357322216034, + "learning_rate": 4.313217169859397e-05, + "loss": 1.4859, + "step": 8719 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.4405952990055084, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.4656, + "step": 8720 + }, + { + "epoch": 0.9199367088607595, + "grad_norm": 0.43280160427093506, + "learning_rate": 4.3012608371854326e-05, + "loss": 1.4976, + "step": 8721 + }, + { + "epoch": 0.9200421940928271, + "grad_norm": 0.4532991051673889, + "learning_rate": 4.2952951053027684e-05, + "loss": 1.4918, + "step": 8722 + }, + { + "epoch": 0.9201476793248945, + "grad_norm": 0.41342782974243164, + "learning_rate": 4.2893376477280934e-05, + "loss": 1.5023, + "step": 8723 + }, + { + "epoch": 0.920253164556962, + "grad_norm": 0.4268062710762024, + "learning_rate": 4.283388452985162e-05, + "loss": 1.5335, + "step": 8724 + }, + { + "epoch": 0.9203586497890295, + "grad_norm": 0.4370148181915283, + "learning_rate": 4.2774475096136525e-05, + "loss": 1.4778, + "step": 8725 + }, + { + "epoch": 0.920464135021097, + "grad_norm": 0.4215529263019562, + "learning_rate": 4.271514806169141e-05, + "loss": 1.442, + "step": 8726 + }, + { + "epoch": 0.9205696202531646, + "grad_norm": 0.43270763754844666, + "learning_rate": 4.265590331223067e-05, + "loss": 1.4888, + "step": 8727 + }, + { + "epoch": 0.920675105485232, + "grad_norm": 0.41159388422966003, + "learning_rate": 4.259674073362732e-05, + "loss": 1.4481, + "step": 8728 + }, + { + "epoch": 0.9207805907172996, + "grad_norm": 0.4133281111717224, + "learning_rate": 4.253766021191256e-05, + "loss": 1.54, + "step": 8729 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.42678168416023254, + "learning_rate": 4.247866163327575e-05, + "loss": 1.4846, + "step": 8730 + }, + { + "epoch": 0.9209915611814345, + "grad_norm": 0.41132545471191406, + "learning_rate": 4.241974488406408e-05, + "loss": 1.4704, + "step": 8731 + }, + { + "epoch": 0.9210970464135021, + "grad_norm": 0.41572389006614685, + "learning_rate": 4.236090985078232e-05, + "loss": 1.4853, + "step": 8732 + }, + { + "epoch": 0.9212025316455696, + "grad_norm": 0.4238037168979645, + "learning_rate": 4.230215642009275e-05, + "loss": 1.5133, + "step": 8733 + }, + { + "epoch": 0.9213080168776371, + "grad_norm": 0.43294909596443176, + "learning_rate": 4.224348447881473e-05, + "loss": 1.4609, + "step": 8734 + }, + { + "epoch": 0.9214135021097046, + "grad_norm": 0.4385758638381958, + "learning_rate": 4.218489391392469e-05, + "loss": 1.466, + "step": 8735 + }, + { + "epoch": 0.9215189873417722, + "grad_norm": 0.4836699664592743, + "learning_rate": 4.212638461255582e-05, + "loss": 1.4657, + "step": 8736 + }, + { + "epoch": 0.9216244725738396, + "grad_norm": 0.45895034074783325, + "learning_rate": 4.206795646199778e-05, + "loss": 1.4668, + "step": 8737 + }, + { + "epoch": 0.9217299578059072, + "grad_norm": 0.4387611150741577, + "learning_rate": 4.200960934969664e-05, + "loss": 1.5008, + "step": 8738 + }, + { + "epoch": 0.9218354430379747, + "grad_norm": 0.3952721655368805, + "learning_rate": 4.19513431632545e-05, + "loss": 1.4963, + "step": 8739 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.44427233934402466, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.4801, + "step": 8740 + }, + { + "epoch": 0.9220464135021097, + "grad_norm": 0.47141897678375244, + "learning_rate": 4.1835053119135096e-05, + "loss": 1.4709, + "step": 8741 + }, + { + "epoch": 0.9221518987341772, + "grad_norm": 0.40252095460891724, + "learning_rate": 4.17770290374407e-05, + "loss": 1.4659, + "step": 8742 + }, + { + "epoch": 0.9222573839662447, + "grad_norm": 0.42490851879119873, + "learning_rate": 4.171908543357067e-05, + "loss": 1.423, + "step": 8743 + }, + { + "epoch": 0.9223628691983122, + "grad_norm": 0.43121129274368286, + "learning_rate": 4.166122219590441e-05, + "loss": 1.4882, + "step": 8744 + }, + { + "epoch": 0.9224683544303798, + "grad_norm": 0.4186294972896576, + "learning_rate": 4.1603439212976205e-05, + "loss": 1.5205, + "step": 8745 + }, + { + "epoch": 0.9225738396624472, + "grad_norm": 0.4393500089645386, + "learning_rate": 4.1545736373474935e-05, + "loss": 1.4874, + "step": 8746 + }, + { + "epoch": 0.9226793248945148, + "grad_norm": 0.4342838227748871, + "learning_rate": 4.148811356624379e-05, + "loss": 1.477, + "step": 8747 + }, + { + "epoch": 0.9227848101265823, + "grad_norm": 0.4205479323863983, + "learning_rate": 4.143057068028024e-05, + "loss": 1.4729, + "step": 8748 + }, + { + "epoch": 0.9228902953586497, + "grad_norm": 0.44601762294769287, + "learning_rate": 4.1373107604735626e-05, + "loss": 1.4914, + "step": 8749 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.4681358337402344, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.5047, + "step": 8750 + }, + { + "epoch": 0.9231012658227848, + "grad_norm": 0.4017626643180847, + "learning_rate": 4.125842044227725e-05, + "loss": 1.4495, + "step": 8751 + }, + { + "epoch": 0.9232067510548523, + "grad_norm": 0.4187672436237335, + "learning_rate": 4.120119613443409e-05, + "loss": 1.5042, + "step": 8752 + }, + { + "epoch": 0.9233122362869198, + "grad_norm": 0.4254954159259796, + "learning_rate": 4.114405119515069e-05, + "loss": 1.4548, + "step": 8753 + }, + { + "epoch": 0.9234177215189874, + "grad_norm": 0.43452268838882446, + "learning_rate": 4.1086985514344996e-05, + "loss": 1.4782, + "step": 8754 + }, + { + "epoch": 0.9235232067510548, + "grad_norm": 0.4373135566711426, + "learning_rate": 4.102999898208766e-05, + "loss": 1.4828, + "step": 8755 + }, + { + "epoch": 0.9236286919831224, + "grad_norm": 0.40655672550201416, + "learning_rate": 4.0973091488601826e-05, + "loss": 1.4741, + "step": 8756 + }, + { + "epoch": 0.9237341772151899, + "grad_norm": 0.4433337450027466, + "learning_rate": 4.091626292426282e-05, + "loss": 1.4806, + "step": 8757 + }, + { + "epoch": 0.9238396624472573, + "grad_norm": 0.4077359735965729, + "learning_rate": 4.0859513179598096e-05, + "loss": 1.4127, + "step": 8758 + }, + { + "epoch": 0.9239451476793249, + "grad_norm": 0.4369218945503235, + "learning_rate": 4.0802842145286876e-05, + "loss": 1.4951, + "step": 8759 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.45306119322776794, + "learning_rate": 4.074624971216005e-05, + "loss": 1.4595, + "step": 8760 + }, + { + "epoch": 0.9241561181434599, + "grad_norm": 0.43634605407714844, + "learning_rate": 4.0689735771199944e-05, + "loss": 1.4991, + "step": 8761 + }, + { + "epoch": 0.9242616033755274, + "grad_norm": 0.39773571491241455, + "learning_rate": 4.0633300213540004e-05, + "loss": 1.4482, + "step": 8762 + }, + { + "epoch": 0.924367088607595, + "grad_norm": 0.44319018721580505, + "learning_rate": 4.057694293046475e-05, + "loss": 1.5368, + "step": 8763 + }, + { + "epoch": 0.9244725738396624, + "grad_norm": 0.4130871593952179, + "learning_rate": 4.052066381340948e-05, + "loss": 1.4888, + "step": 8764 + }, + { + "epoch": 0.92457805907173, + "grad_norm": 0.43742549419403076, + "learning_rate": 4.0464462753960006e-05, + "loss": 1.4546, + "step": 8765 + }, + { + "epoch": 0.9246835443037975, + "grad_norm": 0.41864845156669617, + "learning_rate": 4.040833964385259e-05, + "loss": 1.4767, + "step": 8766 + }, + { + "epoch": 0.924789029535865, + "grad_norm": 0.4739699959754944, + "learning_rate": 4.035229437497357e-05, + "loss": 1.4836, + "step": 8767 + }, + { + "epoch": 0.9248945147679325, + "grad_norm": 0.40845295786857605, + "learning_rate": 4.02963268393593e-05, + "loss": 1.4727, + "step": 8768 + }, + { + "epoch": 0.925, + "grad_norm": 0.4212695360183716, + "learning_rate": 4.024043692919589e-05, + "loss": 1.4748, + "step": 8769 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4217427372932434, + "learning_rate": 4.018462453681889e-05, + "loss": 1.4581, + "step": 8770 + }, + { + "epoch": 0.925210970464135, + "grad_norm": 0.40990760922431946, + "learning_rate": 4.0128889554713276e-05, + "loss": 1.5143, + "step": 8771 + }, + { + "epoch": 0.9253164556962026, + "grad_norm": 0.45956286787986755, + "learning_rate": 4.007323187551308e-05, + "loss": 1.4487, + "step": 8772 + }, + { + "epoch": 0.92542194092827, + "grad_norm": 0.45178842544555664, + "learning_rate": 4.0017651392001285e-05, + "loss": 1.5012, + "step": 8773 + }, + { + "epoch": 0.9255274261603376, + "grad_norm": 0.42147213220596313, + "learning_rate": 3.9962147997109584e-05, + "loss": 1.4642, + "step": 8774 + }, + { + "epoch": 0.9256329113924051, + "grad_norm": 0.44957101345062256, + "learning_rate": 3.990672158391812e-05, + "loss": 1.497, + "step": 8775 + }, + { + "epoch": 0.9257383966244725, + "grad_norm": 0.4100128710269928, + "learning_rate": 3.9851372045655414e-05, + "loss": 1.4838, + "step": 8776 + }, + { + "epoch": 0.9258438818565401, + "grad_norm": 0.4358738660812378, + "learning_rate": 3.979609927569798e-05, + "loss": 1.4819, + "step": 8777 + }, + { + "epoch": 0.9259493670886076, + "grad_norm": 0.4237467646598816, + "learning_rate": 3.974090316757029e-05, + "loss": 1.4763, + "step": 8778 + }, + { + "epoch": 0.9260548523206751, + "grad_norm": 0.4317324757575989, + "learning_rate": 3.968578361494449e-05, + "loss": 1.5093, + "step": 8779 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.42482826113700867, + "learning_rate": 3.963074051164014e-05, + "loss": 1.4984, + "step": 8780 + }, + { + "epoch": 0.9262658227848102, + "grad_norm": 0.44853195548057556, + "learning_rate": 3.957577375162414e-05, + "loss": 1.4992, + "step": 8781 + }, + { + "epoch": 0.9263713080168776, + "grad_norm": 0.4298540949821472, + "learning_rate": 3.952088322901039e-05, + "loss": 1.521, + "step": 8782 + }, + { + "epoch": 0.9264767932489452, + "grad_norm": 0.4193238615989685, + "learning_rate": 3.946606883805971e-05, + "loss": 1.4506, + "step": 8783 + }, + { + "epoch": 0.9265822784810127, + "grad_norm": 0.432110995054245, + "learning_rate": 3.941133047317956e-05, + "loss": 1.4679, + "step": 8784 + }, + { + "epoch": 0.9266877637130801, + "grad_norm": 0.4807945191860199, + "learning_rate": 3.9356668028923825e-05, + "loss": 1.4871, + "step": 8785 + }, + { + "epoch": 0.9267932489451477, + "grad_norm": 0.4376818835735321, + "learning_rate": 3.930208139999269e-05, + "loss": 1.4673, + "step": 8786 + }, + { + "epoch": 0.9268987341772152, + "grad_norm": 0.40703439712524414, + "learning_rate": 3.9247570481232314e-05, + "loss": 1.4521, + "step": 8787 + }, + { + "epoch": 0.9270042194092827, + "grad_norm": 0.41416510939598083, + "learning_rate": 3.919313516763478e-05, + "loss": 1.4776, + "step": 8788 + }, + { + "epoch": 0.9271097046413502, + "grad_norm": 0.40741485357284546, + "learning_rate": 3.91387753543378e-05, + "loss": 1.4904, + "step": 8789 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.4166701138019562, + "learning_rate": 3.908449093662446e-05, + "loss": 1.493, + "step": 8790 + }, + { + "epoch": 0.9273206751054852, + "grad_norm": 0.40980982780456543, + "learning_rate": 3.9030281809923186e-05, + "loss": 1.5011, + "step": 8791 + }, + { + "epoch": 0.9274261603375528, + "grad_norm": 0.4167083203792572, + "learning_rate": 3.897614786980734e-05, + "loss": 1.4579, + "step": 8792 + }, + { + "epoch": 0.9275316455696202, + "grad_norm": 0.4176901876926422, + "learning_rate": 3.892208901199521e-05, + "loss": 1.4854, + "step": 8793 + }, + { + "epoch": 0.9276371308016877, + "grad_norm": 0.41362348198890686, + "learning_rate": 3.886810513234967e-05, + "loss": 1.4591, + "step": 8794 + }, + { + "epoch": 0.9277426160337553, + "grad_norm": 0.42431047558784485, + "learning_rate": 3.881419612687803e-05, + "loss": 1.47, + "step": 8795 + }, + { + "epoch": 0.9278481012658227, + "grad_norm": 0.42078936100006104, + "learning_rate": 3.8760361891731874e-05, + "loss": 1.4951, + "step": 8796 + }, + { + "epoch": 0.9279535864978903, + "grad_norm": 0.43294990062713623, + "learning_rate": 3.870660232320675e-05, + "loss": 1.4807, + "step": 8797 + }, + { + "epoch": 0.9280590717299578, + "grad_norm": 0.4440297782421112, + "learning_rate": 3.8652917317742106e-05, + "loss": 1.4889, + "step": 8798 + }, + { + "epoch": 0.9281645569620253, + "grad_norm": 0.42233535647392273, + "learning_rate": 3.859930677192103e-05, + "loss": 1.4586, + "step": 8799 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.40886858105659485, + "learning_rate": 3.854577058246998e-05, + "loss": 1.4785, + "step": 8800 + }, + { + "epoch": 0.9283755274261604, + "grad_norm": 0.43115973472595215, + "learning_rate": 3.8492308646258714e-05, + "loss": 1.5125, + "step": 8801 + }, + { + "epoch": 0.9284810126582278, + "grad_norm": 0.44896188378334045, + "learning_rate": 3.843892086029999e-05, + "loss": 1.4777, + "step": 8802 + }, + { + "epoch": 0.9285864978902953, + "grad_norm": 0.42077797651290894, + "learning_rate": 3.8385607121749426e-05, + "loss": 1.4761, + "step": 8803 + }, + { + "epoch": 0.9286919831223629, + "grad_norm": 0.42118996381759644, + "learning_rate": 3.83323673279053e-05, + "loss": 1.4781, + "step": 8804 + }, + { + "epoch": 0.9287974683544303, + "grad_norm": 0.4214320480823517, + "learning_rate": 3.827920137620828e-05, + "loss": 1.483, + "step": 8805 + }, + { + "epoch": 0.9289029535864979, + "grad_norm": 0.4134511947631836, + "learning_rate": 3.822610916424134e-05, + "loss": 1.4853, + "step": 8806 + }, + { + "epoch": 0.9290084388185654, + "grad_norm": 0.42818230390548706, + "learning_rate": 3.81730905897295e-05, + "loss": 1.4904, + "step": 8807 + }, + { + "epoch": 0.9291139240506329, + "grad_norm": 0.44591575860977173, + "learning_rate": 3.812014555053955e-05, + "loss": 1.5103, + "step": 8808 + }, + { + "epoch": 0.9292194092827004, + "grad_norm": 0.4346730411052704, + "learning_rate": 3.806727394468005e-05, + "loss": 1.518, + "step": 8809 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.4165668785572052, + "learning_rate": 3.801447567030094e-05, + "loss": 1.5227, + "step": 8810 + }, + { + "epoch": 0.9294303797468354, + "grad_norm": 0.4201684892177582, + "learning_rate": 3.796175062569344e-05, + "loss": 1.4928, + "step": 8811 + }, + { + "epoch": 0.929535864978903, + "grad_norm": 0.4057765305042267, + "learning_rate": 3.790909870928989e-05, + "loss": 1.4675, + "step": 8812 + }, + { + "epoch": 0.9296413502109705, + "grad_norm": 0.4186115562915802, + "learning_rate": 3.785651981966341e-05, + "loss": 1.4885, + "step": 8813 + }, + { + "epoch": 0.9297468354430379, + "grad_norm": 0.4474346339702606, + "learning_rate": 3.7804013855527896e-05, + "loss": 1.5056, + "step": 8814 + }, + { + "epoch": 0.9298523206751055, + "grad_norm": 0.3840869963169098, + "learning_rate": 3.7751580715737614e-05, + "loss": 1.4743, + "step": 8815 + }, + { + "epoch": 0.929957805907173, + "grad_norm": 0.4033017158508301, + "learning_rate": 3.7699220299287214e-05, + "loss": 1.4991, + "step": 8816 + }, + { + "epoch": 0.9300632911392405, + "grad_norm": 0.40306559205055237, + "learning_rate": 3.764693250531141e-05, + "loss": 1.4951, + "step": 8817 + }, + { + "epoch": 0.930168776371308, + "grad_norm": 0.42635712027549744, + "learning_rate": 3.759471723308477e-05, + "loss": 1.4819, + "step": 8818 + }, + { + "epoch": 0.9302742616033756, + "grad_norm": 0.4280884265899658, + "learning_rate": 3.7542574382021635e-05, + "loss": 1.4658, + "step": 8819 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.4127960503101349, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.4662, + "step": 8820 + }, + { + "epoch": 0.9304852320675105, + "grad_norm": 0.4023503065109253, + "learning_rate": 3.7438505541740356e-05, + "loss": 1.4964, + "step": 8821 + }, + { + "epoch": 0.9305907172995781, + "grad_norm": 0.41875559091567993, + "learning_rate": 3.738657935204763e-05, + "loss": 1.5565, + "step": 8822 + }, + { + "epoch": 0.9306962025316455, + "grad_norm": 0.4306015968322754, + "learning_rate": 3.733472518256876e-05, + "loss": 1.4737, + "step": 8823 + }, + { + "epoch": 0.9308016877637131, + "grad_norm": 0.41580015420913696, + "learning_rate": 3.7282942933413696e-05, + "loss": 1.5252, + "step": 8824 + }, + { + "epoch": 0.9309071729957806, + "grad_norm": 0.41895830631256104, + "learning_rate": 3.723123250483086e-05, + "loss": 1.4942, + "step": 8825 + }, + { + "epoch": 0.9310126582278481, + "grad_norm": 0.4369781017303467, + "learning_rate": 3.717959379720711e-05, + "loss": 1.4983, + "step": 8826 + }, + { + "epoch": 0.9311181434599156, + "grad_norm": 0.3920938968658447, + "learning_rate": 3.712802671106742e-05, + "loss": 1.4917, + "step": 8827 + }, + { + "epoch": 0.9312236286919832, + "grad_norm": 0.42213407158851624, + "learning_rate": 3.707653114707471e-05, + "loss": 1.5117, + "step": 8828 + }, + { + "epoch": 0.9313291139240506, + "grad_norm": 0.458345502614975, + "learning_rate": 3.702510700602975e-05, + "loss": 1.4798, + "step": 8829 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.4231816232204437, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.4565, + "step": 8830 + }, + { + "epoch": 0.9315400843881857, + "grad_norm": 0.40538638830184937, + "learning_rate": 3.692247259667361e-05, + "loss": 1.4443, + "step": 8831 + }, + { + "epoch": 0.9316455696202531, + "grad_norm": 0.3995458781719208, + "learning_rate": 3.687126213065109e-05, + "loss": 1.5157, + "step": 8832 + }, + { + "epoch": 0.9317510548523207, + "grad_norm": 0.4182884693145752, + "learning_rate": 3.682012269215314e-05, + "loss": 1.4712, + "step": 8833 + }, + { + "epoch": 0.9318565400843882, + "grad_norm": 0.40237295627593994, + "learning_rate": 3.676905418266655e-05, + "loss": 1.4699, + "step": 8834 + }, + { + "epoch": 0.9319620253164557, + "grad_norm": 0.4152362048625946, + "learning_rate": 3.671805650381468e-05, + "loss": 1.4907, + "step": 8835 + }, + { + "epoch": 0.9320675105485232, + "grad_norm": 0.44561782479286194, + "learning_rate": 3.666712955735737e-05, + "loss": 1.5013, + "step": 8836 + }, + { + "epoch": 0.9321729957805908, + "grad_norm": 0.4342091679573059, + "learning_rate": 3.661627324519074e-05, + "loss": 1.508, + "step": 8837 + }, + { + "epoch": 0.9322784810126582, + "grad_norm": 0.39658495783805847, + "learning_rate": 3.6565487469346904e-05, + "loss": 1.501, + "step": 8838 + }, + { + "epoch": 0.9323839662447257, + "grad_norm": 0.4108422100543976, + "learning_rate": 3.651477213199394e-05, + "loss": 1.4541, + "step": 8839 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.4589267671108246, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.4936, + "step": 8840 + }, + { + "epoch": 0.9325949367088607, + "grad_norm": 0.3822910189628601, + "learning_rate": 3.641355238211095e-05, + "loss": 1.462, + "step": 8841 + }, + { + "epoch": 0.9327004219409283, + "grad_norm": 0.41139668226242065, + "learning_rate": 3.6363047774594736e-05, + "loss": 1.5275, + "step": 8842 + }, + { + "epoch": 0.9328059071729958, + "grad_norm": 0.3933762311935425, + "learning_rate": 3.631261321559652e-05, + "loss": 1.4796, + "step": 8843 + }, + { + "epoch": 0.9329113924050633, + "grad_norm": 0.4006390869617462, + "learning_rate": 3.626224860796096e-05, + "loss": 1.4826, + "step": 8844 + }, + { + "epoch": 0.9330168776371308, + "grad_norm": 0.4192161560058594, + "learning_rate": 3.6211953854667373e-05, + "loss": 1.4709, + "step": 8845 + }, + { + "epoch": 0.9331223628691984, + "grad_norm": 0.4311361610889435, + "learning_rate": 3.616172885882972e-05, + "loss": 1.4792, + "step": 8846 + }, + { + "epoch": 0.9332278481012658, + "grad_norm": 0.3900858461856842, + "learning_rate": 3.6111573523696295e-05, + "loss": 1.436, + "step": 8847 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.40646398067474365, + "learning_rate": 3.606148775264958e-05, + "loss": 1.4986, + "step": 8848 + }, + { + "epoch": 0.9334388185654009, + "grad_norm": 0.4317643642425537, + "learning_rate": 3.601147144920609e-05, + "loss": 1.5078, + "step": 8849 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.46351680159568787, + "learning_rate": 3.596152451701616e-05, + "loss": 1.4602, + "step": 8850 + }, + { + "epoch": 0.9336497890295359, + "grad_norm": 0.45119708776474, + "learning_rate": 3.591164685986372e-05, + "loss": 1.4958, + "step": 8851 + }, + { + "epoch": 0.9337552742616034, + "grad_norm": 0.4067215323448181, + "learning_rate": 3.58618383816662e-05, + "loss": 1.4883, + "step": 8852 + }, + { + "epoch": 0.9338607594936709, + "grad_norm": 0.4363328516483307, + "learning_rate": 3.581209898647425e-05, + "loss": 1.4718, + "step": 8853 + }, + { + "epoch": 0.9339662447257384, + "grad_norm": 0.4221624433994293, + "learning_rate": 3.576242857847162e-05, + "loss": 1.4761, + "step": 8854 + }, + { + "epoch": 0.9340717299578059, + "grad_norm": 0.4309861958026886, + "learning_rate": 3.5712827061974984e-05, + "loss": 1.5065, + "step": 8855 + }, + { + "epoch": 0.9341772151898734, + "grad_norm": 0.44510766863822937, + "learning_rate": 3.566329434143365e-05, + "loss": 1.4844, + "step": 8856 + }, + { + "epoch": 0.934282700421941, + "grad_norm": 0.411089688539505, + "learning_rate": 3.5613830321429545e-05, + "loss": 1.4546, + "step": 8857 + }, + { + "epoch": 0.9343881856540084, + "grad_norm": 0.41197821497917175, + "learning_rate": 3.5564434906676834e-05, + "loss": 1.5001, + "step": 8858 + }, + { + "epoch": 0.9344936708860759, + "grad_norm": 0.4134894013404846, + "learning_rate": 3.5515108002021946e-05, + "loss": 1.4967, + "step": 8859 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.4444829523563385, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.4623, + "step": 8860 + }, + { + "epoch": 0.9347046413502109, + "grad_norm": 0.4189072251319885, + "learning_rate": 3.541665934305081e-05, + "loss": 1.4498, + "step": 8861 + }, + { + "epoch": 0.9348101265822785, + "grad_norm": 0.3960464894771576, + "learning_rate": 3.5367537399086476e-05, + "loss": 1.5137, + "step": 8862 + }, + { + "epoch": 0.934915611814346, + "grad_norm": 0.4477297067642212, + "learning_rate": 3.531848358592338e-05, + "loss": 1.4866, + "step": 8863 + }, + { + "epoch": 0.9350210970464135, + "grad_norm": 0.40495413541793823, + "learning_rate": 3.5269497809065966e-05, + "loss": 1.4682, + "step": 8864 + }, + { + "epoch": 0.935126582278481, + "grad_norm": 0.4138062298297882, + "learning_rate": 3.522057997414975e-05, + "loss": 1.5021, + "step": 8865 + }, + { + "epoch": 0.9352320675105485, + "grad_norm": 0.4065333306789398, + "learning_rate": 3.517172998694108e-05, + "loss": 1.4902, + "step": 8866 + }, + { + "epoch": 0.935337552742616, + "grad_norm": 0.42695364356040955, + "learning_rate": 3.512294775333705e-05, + "loss": 1.5104, + "step": 8867 + }, + { + "epoch": 0.9354430379746835, + "grad_norm": 0.41356247663497925, + "learning_rate": 3.507423317936521e-05, + "loss": 1.4824, + "step": 8868 + }, + { + "epoch": 0.9355485232067511, + "grad_norm": 0.44853776693344116, + "learning_rate": 3.502558617118352e-05, + "loss": 1.4828, + "step": 8869 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.4080412983894348, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.448, + "step": 8870 + }, + { + "epoch": 0.9357594936708861, + "grad_norm": 0.40535593032836914, + "learning_rate": 3.4928494477472926e-05, + "loss": 1.5186, + "step": 8871 + }, + { + "epoch": 0.9358649789029536, + "grad_norm": 0.4081481099128723, + "learning_rate": 3.488004960490994e-05, + "loss": 1.4428, + "step": 8872 + }, + { + "epoch": 0.935970464135021, + "grad_norm": 0.4423657953739166, + "learning_rate": 3.4831671924068555e-05, + "loss": 1.5068, + "step": 8873 + }, + { + "epoch": 0.9360759493670886, + "grad_norm": 0.4110405147075653, + "learning_rate": 3.47833613417557e-05, + "loss": 1.4691, + "step": 8874 + }, + { + "epoch": 0.9361814345991561, + "grad_norm": 0.4406619966030121, + "learning_rate": 3.473511776490756e-05, + "loss": 1.4696, + "step": 8875 + }, + { + "epoch": 0.9362869198312236, + "grad_norm": 0.4179662764072418, + "learning_rate": 3.4686941100589344e-05, + "loss": 1.4982, + "step": 8876 + }, + { + "epoch": 0.9363924050632911, + "grad_norm": 0.3849756419658661, + "learning_rate": 3.463883125599521e-05, + "loss": 1.4852, + "step": 8877 + }, + { + "epoch": 0.9364978902953587, + "grad_norm": 0.43558844923973083, + "learning_rate": 3.4590788138448004e-05, + "loss": 1.5128, + "step": 8878 + }, + { + "epoch": 0.9366033755274261, + "grad_norm": 0.4147240221500397, + "learning_rate": 3.454281165539913e-05, + "loss": 1.5067, + "step": 8879 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.4129604995250702, + "learning_rate": 3.449490171442838e-05, + "loss": 1.4887, + "step": 8880 + }, + { + "epoch": 0.9368143459915612, + "grad_norm": 0.4376642107963562, + "learning_rate": 3.444705822324364e-05, + "loss": 1.4898, + "step": 8881 + }, + { + "epoch": 0.9369198312236287, + "grad_norm": 0.4201657176017761, + "learning_rate": 3.4399281089680924e-05, + "loss": 1.5274, + "step": 8882 + }, + { + "epoch": 0.9370253164556962, + "grad_norm": 0.46530795097351074, + "learning_rate": 3.435157022170396e-05, + "loss": 1.4604, + "step": 8883 + }, + { + "epoch": 0.9371308016877637, + "grad_norm": 0.42180606722831726, + "learning_rate": 3.430392552740421e-05, + "loss": 1.4986, + "step": 8884 + }, + { + "epoch": 0.9372362869198312, + "grad_norm": 0.4004462659358978, + "learning_rate": 3.42563469150006e-05, + "loss": 1.4714, + "step": 8885 + }, + { + "epoch": 0.9373417721518987, + "grad_norm": 0.43905842304229736, + "learning_rate": 3.42088342928393e-05, + "loss": 1.5073, + "step": 8886 + }, + { + "epoch": 0.9374472573839663, + "grad_norm": 0.43236976861953735, + "learning_rate": 3.416138756939366e-05, + "loss": 1.4783, + "step": 8887 + }, + { + "epoch": 0.9375527426160337, + "grad_norm": 0.4080219864845276, + "learning_rate": 3.411400665326393e-05, + "loss": 1.4877, + "step": 8888 + }, + { + "epoch": 0.9376582278481013, + "grad_norm": 0.43416228890419006, + "learning_rate": 3.406669145317717e-05, + "loss": 1.4884, + "step": 8889 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.4268539547920227, + "learning_rate": 3.401944187798702e-05, + "loss": 1.4628, + "step": 8890 + }, + { + "epoch": 0.9378691983122363, + "grad_norm": 0.4611803889274597, + "learning_rate": 3.397225783667351e-05, + "loss": 1.4823, + "step": 8891 + }, + { + "epoch": 0.9379746835443038, + "grad_norm": 0.42448803782463074, + "learning_rate": 3.3925139238342954e-05, + "loss": 1.4854, + "step": 8892 + }, + { + "epoch": 0.9380801687763713, + "grad_norm": 0.4257838726043701, + "learning_rate": 3.387808599222771e-05, + "loss": 1.4608, + "step": 8893 + }, + { + "epoch": 0.9381856540084388, + "grad_norm": 0.41752883791923523, + "learning_rate": 3.383109800768602e-05, + "loss": 1.4622, + "step": 8894 + }, + { + "epoch": 0.9382911392405063, + "grad_norm": 0.42213907837867737, + "learning_rate": 3.378417519420188e-05, + "loss": 1.4974, + "step": 8895 + }, + { + "epoch": 0.9383966244725739, + "grad_norm": 0.42600783705711365, + "learning_rate": 3.373731746138477e-05, + "loss": 1.4779, + "step": 8896 + }, + { + "epoch": 0.9385021097046413, + "grad_norm": 0.40594252943992615, + "learning_rate": 3.3690524718969586e-05, + "loss": 1.4885, + "step": 8897 + }, + { + "epoch": 0.9386075949367089, + "grad_norm": 0.43008822202682495, + "learning_rate": 3.364379687681642e-05, + "loss": 1.4475, + "step": 8898 + }, + { + "epoch": 0.9387130801687764, + "grad_norm": 0.4326474070549011, + "learning_rate": 3.359713384491036e-05, + "loss": 1.5037, + "step": 8899 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.4208463430404663, + "learning_rate": 3.355053553336137e-05, + "loss": 1.4792, + "step": 8900 + }, + { + "epoch": 0.9389240506329114, + "grad_norm": 0.40826287865638733, + "learning_rate": 3.350400185240404e-05, + "loss": 1.4914, + "step": 8901 + }, + { + "epoch": 0.939029535864979, + "grad_norm": 0.41179195046424866, + "learning_rate": 3.345753271239753e-05, + "loss": 1.46, + "step": 8902 + }, + { + "epoch": 0.9391350210970464, + "grad_norm": 0.40700075030326843, + "learning_rate": 3.3411128023825295e-05, + "loss": 1.4839, + "step": 8903 + }, + { + "epoch": 0.9392405063291139, + "grad_norm": 0.3948523700237274, + "learning_rate": 3.336478769729492e-05, + "loss": 1.4832, + "step": 8904 + }, + { + "epoch": 0.9393459915611815, + "grad_norm": 0.45551279187202454, + "learning_rate": 3.331851164353803e-05, + "loss": 1.4862, + "step": 8905 + }, + { + "epoch": 0.9394514767932489, + "grad_norm": 0.4131926894187927, + "learning_rate": 3.327229977341001e-05, + "loss": 1.5137, + "step": 8906 + }, + { + "epoch": 0.9395569620253165, + "grad_norm": 0.4327172040939331, + "learning_rate": 3.322615199788992e-05, + "loss": 1.472, + "step": 8907 + }, + { + "epoch": 0.939662447257384, + "grad_norm": 0.4147562086582184, + "learning_rate": 3.31800682280803e-05, + "loss": 1.4664, + "step": 8908 + }, + { + "epoch": 0.9397679324894515, + "grad_norm": 0.4207040071487427, + "learning_rate": 3.313404837520694e-05, + "loss": 1.4617, + "step": 8909 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.5038466453552246, + "learning_rate": 3.308809235061882e-05, + "loss": 1.4536, + "step": 8910 + }, + { + "epoch": 0.9399789029535865, + "grad_norm": 0.4140454828739166, + "learning_rate": 3.30422000657878e-05, + "loss": 1.4652, + "step": 8911 + }, + { + "epoch": 0.940084388185654, + "grad_norm": 0.3985019028186798, + "learning_rate": 3.2996371432308596e-05, + "loss": 1.5085, + "step": 8912 + }, + { + "epoch": 0.9401898734177215, + "grad_norm": 0.39875873923301697, + "learning_rate": 3.295060636189853e-05, + "loss": 1.4854, + "step": 8913 + }, + { + "epoch": 0.9402953586497891, + "grad_norm": 0.4082636535167694, + "learning_rate": 3.290490476639731e-05, + "loss": 1.4904, + "step": 8914 + }, + { + "epoch": 0.9404008438818565, + "grad_norm": 0.428849995136261, + "learning_rate": 3.2859266557767e-05, + "loss": 1.4726, + "step": 8915 + }, + { + "epoch": 0.9405063291139241, + "grad_norm": 0.4269358515739441, + "learning_rate": 3.2813691648091704e-05, + "loss": 1.5335, + "step": 8916 + }, + { + "epoch": 0.9406118143459916, + "grad_norm": 0.4042153060436249, + "learning_rate": 3.2768179949577505e-05, + "loss": 1.5043, + "step": 8917 + }, + { + "epoch": 0.940717299578059, + "grad_norm": 0.4402705430984497, + "learning_rate": 3.272273137455225e-05, + "loss": 1.4492, + "step": 8918 + }, + { + "epoch": 0.9408227848101266, + "grad_norm": 0.4324682950973511, + "learning_rate": 3.267734583546536e-05, + "loss": 1.4817, + "step": 8919 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.4331611394882202, + "learning_rate": 3.263202324488772e-05, + "loss": 1.4556, + "step": 8920 + }, + { + "epoch": 0.9410337552742616, + "grad_norm": 0.43215611577033997, + "learning_rate": 3.258676351551143e-05, + "loss": 1.4953, + "step": 8921 + }, + { + "epoch": 0.9411392405063291, + "grad_norm": 0.4027799069881439, + "learning_rate": 3.2541566560149726e-05, + "loss": 1.4736, + "step": 8922 + }, + { + "epoch": 0.9412447257383966, + "grad_norm": 0.4355621933937073, + "learning_rate": 3.249643229173678e-05, + "loss": 1.4826, + "step": 8923 + }, + { + "epoch": 0.9413502109704641, + "grad_norm": 0.40698695182800293, + "learning_rate": 3.245136062332745e-05, + "loss": 1.4803, + "step": 8924 + }, + { + "epoch": 0.9414556962025317, + "grad_norm": 0.4355679154396057, + "learning_rate": 3.240635146809728e-05, + "loss": 1.5119, + "step": 8925 + }, + { + "epoch": 0.9415611814345991, + "grad_norm": 0.40811678767204285, + "learning_rate": 3.236140473934215e-05, + "loss": 1.46, + "step": 8926 + }, + { + "epoch": 0.9416666666666667, + "grad_norm": 0.4243764281272888, + "learning_rate": 3.231652035047825e-05, + "loss": 1.5004, + "step": 8927 + }, + { + "epoch": 0.9417721518987342, + "grad_norm": 0.4042734205722809, + "learning_rate": 3.227169821504187e-05, + "loss": 1.4992, + "step": 8928 + }, + { + "epoch": 0.9418776371308016, + "grad_norm": 0.3997011184692383, + "learning_rate": 3.222693824668916e-05, + "loss": 1.4497, + "step": 8929 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.4170098900794983, + "learning_rate": 3.218224035919609e-05, + "loss": 1.4648, + "step": 8930 + }, + { + "epoch": 0.9420886075949367, + "grad_norm": 0.4268629252910614, + "learning_rate": 3.213760446645818e-05, + "loss": 1.4642, + "step": 8931 + }, + { + "epoch": 0.9421940928270042, + "grad_norm": 0.4398791193962097, + "learning_rate": 3.2093030482490385e-05, + "loss": 1.4695, + "step": 8932 + }, + { + "epoch": 0.9422995780590717, + "grad_norm": 0.38858598470687866, + "learning_rate": 3.204851832142696e-05, + "loss": 1.4788, + "step": 8933 + }, + { + "epoch": 0.9424050632911393, + "grad_norm": 0.4406694173812866, + "learning_rate": 3.200406789752116e-05, + "loss": 1.4962, + "step": 8934 + }, + { + "epoch": 0.9425105485232067, + "grad_norm": 0.4146156907081604, + "learning_rate": 3.195967912514527e-05, + "loss": 1.4952, + "step": 8935 + }, + { + "epoch": 0.9426160337552743, + "grad_norm": 0.41857171058654785, + "learning_rate": 3.191535191879029e-05, + "loss": 1.4725, + "step": 8936 + }, + { + "epoch": 0.9427215189873418, + "grad_norm": 0.4443456828594208, + "learning_rate": 3.1871086193065794e-05, + "loss": 1.4938, + "step": 8937 + }, + { + "epoch": 0.9428270042194092, + "grad_norm": 0.4142570495605469, + "learning_rate": 3.182688186269985e-05, + "loss": 1.4546, + "step": 8938 + }, + { + "epoch": 0.9429324894514768, + "grad_norm": 0.43589162826538086, + "learning_rate": 3.178273884253874e-05, + "loss": 1.5063, + "step": 8939 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.4269370436668396, + "learning_rate": 3.173865704754688e-05, + "loss": 1.4894, + "step": 8940 + }, + { + "epoch": 0.9431434599156118, + "grad_norm": 0.41896170377731323, + "learning_rate": 3.169463639280665e-05, + "loss": 1.4516, + "step": 8941 + }, + { + "epoch": 0.9432489451476793, + "grad_norm": 0.42555418610572815, + "learning_rate": 3.1650676793518144e-05, + "loss": 1.4785, + "step": 8942 + }, + { + "epoch": 0.9433544303797469, + "grad_norm": 0.4105265140533447, + "learning_rate": 3.1606778164999155e-05, + "loss": 1.4828, + "step": 8943 + }, + { + "epoch": 0.9434599156118143, + "grad_norm": 0.42442068457603455, + "learning_rate": 3.156294042268483e-05, + "loss": 1.5177, + "step": 8944 + }, + { + "epoch": 0.9435654008438819, + "grad_norm": 0.4226602613925934, + "learning_rate": 3.151916348212769e-05, + "loss": 1.4704, + "step": 8945 + }, + { + "epoch": 0.9436708860759494, + "grad_norm": 0.4202861785888672, + "learning_rate": 3.147544725899736e-05, + "loss": 1.4867, + "step": 8946 + }, + { + "epoch": 0.9437763713080168, + "grad_norm": 0.4213012456893921, + "learning_rate": 3.1431791669080386e-05, + "loss": 1.4731, + "step": 8947 + }, + { + "epoch": 0.9438818565400844, + "grad_norm": 0.41682150959968567, + "learning_rate": 3.138819662828018e-05, + "loss": 1.4751, + "step": 8948 + }, + { + "epoch": 0.9439873417721519, + "grad_norm": 0.4381452202796936, + "learning_rate": 3.134466205261674e-05, + "loss": 1.5183, + "step": 8949 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.4284520745277405, + "learning_rate": 3.130118785822657e-05, + "loss": 1.4833, + "step": 8950 + }, + { + "epoch": 0.9441983122362869, + "grad_norm": 0.40535178780555725, + "learning_rate": 3.125777396136251e-05, + "loss": 1.4758, + "step": 8951 + }, + { + "epoch": 0.9443037974683545, + "grad_norm": 0.4109024107456207, + "learning_rate": 3.121442027839349e-05, + "loss": 1.4847, + "step": 8952 + }, + { + "epoch": 0.9444092827004219, + "grad_norm": 0.4157865643501282, + "learning_rate": 3.117112672580451e-05, + "loss": 1.507, + "step": 8953 + }, + { + "epoch": 0.9445147679324895, + "grad_norm": 0.4004267156124115, + "learning_rate": 3.112789322019633e-05, + "loss": 1.4884, + "step": 8954 + }, + { + "epoch": 0.944620253164557, + "grad_norm": 0.41594013571739197, + "learning_rate": 3.108471967828545e-05, + "loss": 1.5182, + "step": 8955 + }, + { + "epoch": 0.9447257383966244, + "grad_norm": 0.40083497762680054, + "learning_rate": 3.1041606016903844e-05, + "loss": 1.4747, + "step": 8956 + }, + { + "epoch": 0.944831223628692, + "grad_norm": 0.4216148257255554, + "learning_rate": 3.0998552152998837e-05, + "loss": 1.5007, + "step": 8957 + }, + { + "epoch": 0.9449367088607595, + "grad_norm": 0.41635462641716003, + "learning_rate": 3.095555800363297e-05, + "loss": 1.4557, + "step": 8958 + }, + { + "epoch": 0.945042194092827, + "grad_norm": 0.39775124192237854, + "learning_rate": 3.0912623485983774e-05, + "loss": 1.4672, + "step": 8959 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.42075133323669434, + "learning_rate": 3.08697485173437e-05, + "loss": 1.498, + "step": 8960 + }, + { + "epoch": 0.9452531645569621, + "grad_norm": 0.40941378474235535, + "learning_rate": 3.0826933015119895e-05, + "loss": 1.4668, + "step": 8961 + }, + { + "epoch": 0.9453586497890295, + "grad_norm": 0.4217377305030823, + "learning_rate": 3.0784176896834036e-05, + "loss": 1.4781, + "step": 8962 + }, + { + "epoch": 0.945464135021097, + "grad_norm": 0.41346275806427, + "learning_rate": 3.074148008012224e-05, + "loss": 1.4892, + "step": 8963 + }, + { + "epoch": 0.9455696202531646, + "grad_norm": 0.4290733337402344, + "learning_rate": 3.06988424827348e-05, + "loss": 1.4734, + "step": 8964 + }, + { + "epoch": 0.945675105485232, + "grad_norm": 0.45879629254341125, + "learning_rate": 3.0656264022536146e-05, + "loss": 1.4996, + "step": 8965 + }, + { + "epoch": 0.9457805907172996, + "grad_norm": 0.41879481077194214, + "learning_rate": 3.0613744617504625e-05, + "loss": 1.5236, + "step": 8966 + }, + { + "epoch": 0.9458860759493671, + "grad_norm": 0.40473487973213196, + "learning_rate": 3.0571284185732275e-05, + "loss": 1.4391, + "step": 8967 + }, + { + "epoch": 0.9459915611814346, + "grad_norm": 0.44342854619026184, + "learning_rate": 3.052888264542484e-05, + "loss": 1.4955, + "step": 8968 + }, + { + "epoch": 0.9460970464135021, + "grad_norm": 0.43697628378868103, + "learning_rate": 3.048653991490141e-05, + "loss": 1.5029, + "step": 8969 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.4347820281982422, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.4782, + "step": 8970 + }, + { + "epoch": 0.9463080168776371, + "grad_norm": 0.40996524691581726, + "learning_rate": 3.0402030557049503e-05, + "loss": 1.4923, + "step": 8971 + }, + { + "epoch": 0.9464135021097047, + "grad_norm": 0.4250170886516571, + "learning_rate": 3.0359863766925097e-05, + "loss": 1.4683, + "step": 8972 + }, + { + "epoch": 0.9465189873417722, + "grad_norm": 0.3988959491252899, + "learning_rate": 3.0317755460992616e-05, + "loss": 1.4424, + "step": 8973 + }, + { + "epoch": 0.9466244725738396, + "grad_norm": 0.4415169954299927, + "learning_rate": 3.027570555813604e-05, + "loss": 1.4726, + "step": 8974 + }, + { + "epoch": 0.9467299578059072, + "grad_norm": 0.4174901843070984, + "learning_rate": 3.0233713977351904e-05, + "loss": 1.4975, + "step": 8975 + }, + { + "epoch": 0.9468354430379747, + "grad_norm": 0.42810869216918945, + "learning_rate": 3.0191780637749097e-05, + "loss": 1.5003, + "step": 8976 + }, + { + "epoch": 0.9469409282700422, + "grad_norm": 0.4096430242061615, + "learning_rate": 3.0149905458548646e-05, + "loss": 1.4803, + "step": 8977 + }, + { + "epoch": 0.9470464135021097, + "grad_norm": 0.41747745871543884, + "learning_rate": 3.0108088359083675e-05, + "loss": 1.5027, + "step": 8978 + }, + { + "epoch": 0.9471518987341773, + "grad_norm": 0.40123745799064636, + "learning_rate": 3.0066329258799184e-05, + "loss": 1.454, + "step": 8979 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.42217034101486206, + "learning_rate": 3.002462807725185e-05, + "loss": 1.4994, + "step": 8980 + }, + { + "epoch": 0.9473628691983123, + "grad_norm": 0.4400993883609772, + "learning_rate": 2.9982984734110005e-05, + "loss": 1.4885, + "step": 8981 + }, + { + "epoch": 0.9474683544303798, + "grad_norm": 0.4110012650489807, + "learning_rate": 2.9941399149153305e-05, + "loss": 1.4778, + "step": 8982 + }, + { + "epoch": 0.9475738396624472, + "grad_norm": 0.4226163923740387, + "learning_rate": 2.9899871242272736e-05, + "loss": 1.5008, + "step": 8983 + }, + { + "epoch": 0.9476793248945148, + "grad_norm": 0.39635103940963745, + "learning_rate": 2.9858400933470395e-05, + "loss": 1.506, + "step": 8984 + }, + { + "epoch": 0.9477848101265823, + "grad_norm": 0.42404916882514954, + "learning_rate": 2.9816988142859272e-05, + "loss": 1.4987, + "step": 8985 + }, + { + "epoch": 0.9478902953586498, + "grad_norm": 0.42395398020744324, + "learning_rate": 2.9775632790663244e-05, + "loss": 1.4889, + "step": 8986 + }, + { + "epoch": 0.9479957805907173, + "grad_norm": 0.4184955060482025, + "learning_rate": 2.973433479721675e-05, + "loss": 1.4723, + "step": 8987 + }, + { + "epoch": 0.9481012658227848, + "grad_norm": 0.42186737060546875, + "learning_rate": 2.9693094082964775e-05, + "loss": 1.4778, + "step": 8988 + }, + { + "epoch": 0.9482067510548523, + "grad_norm": 0.409239798784256, + "learning_rate": 2.965191056846266e-05, + "loss": 1.4953, + "step": 8989 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.4182921051979065, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.4708, + "step": 8990 + }, + { + "epoch": 0.9484177215189873, + "grad_norm": 0.41539087891578674, + "learning_rate": 2.9569714821479966e-05, + "loss": 1.4463, + "step": 8991 + }, + { + "epoch": 0.9485232067510548, + "grad_norm": 0.4016892910003662, + "learning_rate": 2.9528702430660346e-05, + "loss": 1.4675, + "step": 8992 + }, + { + "epoch": 0.9486286919831224, + "grad_norm": 0.4415346682071686, + "learning_rate": 2.948774692291217e-05, + "loss": 1.4691, + "step": 8993 + }, + { + "epoch": 0.9487341772151898, + "grad_norm": 0.4102402329444885, + "learning_rate": 2.9446848219340173e-05, + "loss": 1.4967, + "step": 8994 + }, + { + "epoch": 0.9488396624472574, + "grad_norm": 0.4085753858089447, + "learning_rate": 2.9406006241158485e-05, + "loss": 1.5326, + "step": 8995 + }, + { + "epoch": 0.9489451476793249, + "grad_norm": 0.4016677439212799, + "learning_rate": 2.936522090969056e-05, + "loss": 1.4653, + "step": 8996 + }, + { + "epoch": 0.9490506329113924, + "grad_norm": 0.4078262150287628, + "learning_rate": 2.9324492146368908e-05, + "loss": 1.4843, + "step": 8997 + }, + { + "epoch": 0.9491561181434599, + "grad_norm": 0.40966683626174927, + "learning_rate": 2.928381987273507e-05, + "loss": 1.5045, + "step": 8998 + }, + { + "epoch": 0.9492616033755275, + "grad_norm": 0.43814384937286377, + "learning_rate": 2.9243204010439396e-05, + "loss": 1.4812, + "step": 8999 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.4208926260471344, + "learning_rate": 2.920264448124087e-05, + "loss": 1.4672, + "step": 9000 + }, + { + "epoch": 0.9494725738396624, + "grad_norm": 0.4082159101963043, + "learning_rate": 2.916214120700703e-05, + "loss": 1.4853, + "step": 9001 + }, + { + "epoch": 0.94957805907173, + "grad_norm": 0.4235600531101227, + "learning_rate": 2.9121694109713758e-05, + "loss": 1.4884, + "step": 9002 + }, + { + "epoch": 0.9496835443037974, + "grad_norm": 0.44142910838127136, + "learning_rate": 2.9081303111445172e-05, + "loss": 1.4962, + "step": 9003 + }, + { + "epoch": 0.949789029535865, + "grad_norm": 0.42570793628692627, + "learning_rate": 2.904096813439346e-05, + "loss": 1.4594, + "step": 9004 + }, + { + "epoch": 0.9498945147679325, + "grad_norm": 0.41383957862854004, + "learning_rate": 2.9000689100858695e-05, + "loss": 1.4459, + "step": 9005 + }, + { + "epoch": 0.95, + "grad_norm": 0.4112379252910614, + "learning_rate": 2.896046593324876e-05, + "loss": 1.4787, + "step": 9006 + }, + { + "epoch": 0.9501054852320675, + "grad_norm": 0.43490731716156006, + "learning_rate": 2.8920298554079114e-05, + "loss": 1.5016, + "step": 9007 + }, + { + "epoch": 0.950210970464135, + "grad_norm": 0.4120459258556366, + "learning_rate": 2.8880186885972716e-05, + "loss": 1.4997, + "step": 9008 + }, + { + "epoch": 0.9503164556962025, + "grad_norm": 0.4352714419364929, + "learning_rate": 2.8840130851659852e-05, + "loss": 1.4638, + "step": 9009 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.4593077301979065, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.476, + "step": 9010 + }, + { + "epoch": 0.9505274261603376, + "grad_norm": 0.4143889546394348, + "learning_rate": 2.876018537587146e-05, + "loss": 1.4503, + "step": 9011 + }, + { + "epoch": 0.950632911392405, + "grad_norm": 0.4272405207157135, + "learning_rate": 2.8720295780391722e-05, + "loss": 1.4982, + "step": 9012 + }, + { + "epoch": 0.9507383966244726, + "grad_norm": 0.43704649806022644, + "learning_rate": 2.8680461510696802e-05, + "loss": 1.4944, + "step": 9013 + }, + { + "epoch": 0.9508438818565401, + "grad_norm": 0.43519794940948486, + "learning_rate": 2.8640682490051365e-05, + "loss": 1.488, + "step": 9014 + }, + { + "epoch": 0.9509493670886076, + "grad_norm": 0.4351371228694916, + "learning_rate": 2.8600958641826447e-05, + "loss": 1.4774, + "step": 9015 + }, + { + "epoch": 0.9510548523206751, + "grad_norm": 0.40209436416625977, + "learning_rate": 2.8561289889499422e-05, + "loss": 1.4337, + "step": 9016 + }, + { + "epoch": 0.9511603375527427, + "grad_norm": 0.4071151912212372, + "learning_rate": 2.8521676156653756e-05, + "loss": 1.459, + "step": 9017 + }, + { + "epoch": 0.9512658227848101, + "grad_norm": 0.42927947640419006, + "learning_rate": 2.8482117366978935e-05, + "loss": 1.4955, + "step": 9018 + }, + { + "epoch": 0.9513713080168776, + "grad_norm": 0.40375077724456787, + "learning_rate": 2.844261344427029e-05, + "loss": 1.4769, + "step": 9019 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.4263576567173004, + "learning_rate": 2.84031643124288e-05, + "loss": 1.5201, + "step": 9020 + }, + { + "epoch": 0.9515822784810126, + "grad_norm": 0.428631067276001, + "learning_rate": 2.8363769895461053e-05, + "loss": 1.4924, + "step": 9021 + }, + { + "epoch": 0.9516877637130802, + "grad_norm": 0.4250580370426178, + "learning_rate": 2.8324430117478972e-05, + "loss": 1.5346, + "step": 9022 + }, + { + "epoch": 0.9517932489451477, + "grad_norm": 0.42467814683914185, + "learning_rate": 2.8285144902699785e-05, + "loss": 1.5153, + "step": 9023 + }, + { + "epoch": 0.9518987341772152, + "grad_norm": 0.39394938945770264, + "learning_rate": 2.824591417544583e-05, + "loss": 1.4603, + "step": 9024 + }, + { + "epoch": 0.9520042194092827, + "grad_norm": 0.4156087338924408, + "learning_rate": 2.820673786014436e-05, + "loss": 1.4977, + "step": 9025 + }, + { + "epoch": 0.9521097046413503, + "grad_norm": 0.42903465032577515, + "learning_rate": 2.816761588132749e-05, + "loss": 1.5351, + "step": 9026 + }, + { + "epoch": 0.9522151898734177, + "grad_norm": 0.4756835103034973, + "learning_rate": 2.8128548163632005e-05, + "loss": 1.4729, + "step": 9027 + }, + { + "epoch": 0.9523206751054852, + "grad_norm": 0.4078097343444824, + "learning_rate": 2.808953463179918e-05, + "loss": 1.49, + "step": 9028 + }, + { + "epoch": 0.9524261603375528, + "grad_norm": 0.466543585062027, + "learning_rate": 2.805057521067472e-05, + "loss": 1.5201, + "step": 9029 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.4056161046028137, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.4754, + "step": 9030 + }, + { + "epoch": 0.9526371308016878, + "grad_norm": 0.4038311541080475, + "learning_rate": 2.7972818400454596e-05, + "loss": 1.4466, + "step": 9031 + }, + { + "epoch": 0.9527426160337553, + "grad_norm": 0.42786046862602234, + "learning_rate": 2.7934020861570928e-05, + "loss": 1.5102, + "step": 9032 + }, + { + "epoch": 0.9528481012658228, + "grad_norm": 0.44332414865493774, + "learning_rate": 2.789527713381925e-05, + "loss": 1.4857, + "step": 9033 + }, + { + "epoch": 0.9529535864978903, + "grad_norm": 0.4434492886066437, + "learning_rate": 2.7856587142565008e-05, + "loss": 1.518, + "step": 9034 + }, + { + "epoch": 0.9530590717299579, + "grad_norm": 0.4417775571346283, + "learning_rate": 2.781795081327712e-05, + "loss": 1.4519, + "step": 9035 + }, + { + "epoch": 0.9531645569620253, + "grad_norm": 0.41022711992263794, + "learning_rate": 2.7779368071527907e-05, + "loss": 1.4438, + "step": 9036 + }, + { + "epoch": 0.9532700421940928, + "grad_norm": 0.4475588798522949, + "learning_rate": 2.7740838842992916e-05, + "loss": 1.4384, + "step": 9037 + }, + { + "epoch": 0.9533755274261604, + "grad_norm": 0.4204328656196594, + "learning_rate": 2.770236305345076e-05, + "loss": 1.4726, + "step": 9038 + }, + { + "epoch": 0.9534810126582278, + "grad_norm": 0.46305549144744873, + "learning_rate": 2.766394062878302e-05, + "loss": 1.4689, + "step": 9039 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.4167904257774353, + "learning_rate": 2.762557149497405e-05, + "loss": 1.4952, + "step": 9040 + }, + { + "epoch": 0.9536919831223629, + "grad_norm": 0.435484915971756, + "learning_rate": 2.758725557811089e-05, + "loss": 1.4985, + "step": 9041 + }, + { + "epoch": 0.9537974683544304, + "grad_norm": 0.40769943594932556, + "learning_rate": 2.754899280438309e-05, + "loss": 1.4905, + "step": 9042 + }, + { + "epoch": 0.9539029535864979, + "grad_norm": 0.4437706470489502, + "learning_rate": 2.7510783100082544e-05, + "loss": 1.4612, + "step": 9043 + }, + { + "epoch": 0.9540084388185655, + "grad_norm": 0.40064650774002075, + "learning_rate": 2.747262639160342e-05, + "loss": 1.4796, + "step": 9044 + }, + { + "epoch": 0.9541139240506329, + "grad_norm": 0.41875338554382324, + "learning_rate": 2.743452260544193e-05, + "loss": 1.4933, + "step": 9045 + }, + { + "epoch": 0.9542194092827004, + "grad_norm": 0.4265892803668976, + "learning_rate": 2.7396471668196274e-05, + "loss": 1.4777, + "step": 9046 + }, + { + "epoch": 0.954324894514768, + "grad_norm": 0.43435630202293396, + "learning_rate": 2.7358473506566453e-05, + "loss": 1.5325, + "step": 9047 + }, + { + "epoch": 0.9544303797468354, + "grad_norm": 0.41065502166748047, + "learning_rate": 2.732052804735409e-05, + "loss": 1.4744, + "step": 9048 + }, + { + "epoch": 0.954535864978903, + "grad_norm": 0.4011315405368805, + "learning_rate": 2.7282635217462405e-05, + "loss": 1.4712, + "step": 9049 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.41347742080688477, + "learning_rate": 2.724479494389592e-05, + "loss": 1.4808, + "step": 9050 + }, + { + "epoch": 0.954746835443038, + "grad_norm": 0.43332159519195557, + "learning_rate": 2.7207007153760456e-05, + "loss": 1.5044, + "step": 9051 + }, + { + "epoch": 0.9548523206751055, + "grad_norm": 0.4061329662799835, + "learning_rate": 2.7169271774262942e-05, + "loss": 1.4843, + "step": 9052 + }, + { + "epoch": 0.9549578059071729, + "grad_norm": 0.41654402017593384, + "learning_rate": 2.7131588732711214e-05, + "loss": 1.4729, + "step": 9053 + }, + { + "epoch": 0.9550632911392405, + "grad_norm": 0.42718738317489624, + "learning_rate": 2.7093957956513993e-05, + "loss": 1.5102, + "step": 9054 + }, + { + "epoch": 0.955168776371308, + "grad_norm": 0.4063757658004761, + "learning_rate": 2.7056379373180626e-05, + "loss": 1.4599, + "step": 9055 + }, + { + "epoch": 0.9552742616033755, + "grad_norm": 0.40532445907592773, + "learning_rate": 2.701885291032104e-05, + "loss": 1.5087, + "step": 9056 + }, + { + "epoch": 0.955379746835443, + "grad_norm": 0.41733431816101074, + "learning_rate": 2.6981378495645566e-05, + "loss": 1.4763, + "step": 9057 + }, + { + "epoch": 0.9554852320675106, + "grad_norm": 0.4174603819847107, + "learning_rate": 2.6943956056964773e-05, + "loss": 1.5041, + "step": 9058 + }, + { + "epoch": 0.955590717299578, + "grad_norm": 0.4342232644557953, + "learning_rate": 2.6906585522189378e-05, + "loss": 1.4641, + "step": 9059 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.3987276256084442, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.477, + "step": 9060 + }, + { + "epoch": 0.9558016877637131, + "grad_norm": 0.40920597314834595, + "learning_rate": 2.6831999876497372e-05, + "loss": 1.5008, + "step": 9061 + }, + { + "epoch": 0.9559071729957805, + "grad_norm": 0.4030047059059143, + "learning_rate": 2.6794784621901574e-05, + "loss": 1.5007, + "step": 9062 + }, + { + "epoch": 0.9560126582278481, + "grad_norm": 0.4014549255371094, + "learning_rate": 2.675762098385246e-05, + "loss": 1.4896, + "step": 9063 + }, + { + "epoch": 0.9561181434599156, + "grad_norm": 0.42015427350997925, + "learning_rate": 2.672050889075931e-05, + "loss": 1.4725, + "step": 9064 + }, + { + "epoch": 0.9562236286919831, + "grad_norm": 0.41310104727745056, + "learning_rate": 2.6683448271130646e-05, + "loss": 1.5127, + "step": 9065 + }, + { + "epoch": 0.9563291139240506, + "grad_norm": 0.4185239374637604, + "learning_rate": 2.6646439053574176e-05, + "loss": 1.4699, + "step": 9066 + }, + { + "epoch": 0.9564345991561182, + "grad_norm": 0.4112013578414917, + "learning_rate": 2.6609481166796652e-05, + "loss": 1.5027, + "step": 9067 + }, + { + "epoch": 0.9565400843881856, + "grad_norm": 0.40110740065574646, + "learning_rate": 2.6572574539603643e-05, + "loss": 1.4796, + "step": 9068 + }, + { + "epoch": 0.9566455696202532, + "grad_norm": 0.43752819299697876, + "learning_rate": 2.653571910089951e-05, + "loss": 1.476, + "step": 9069 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.42355623841285706, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.453, + "step": 9070 + }, + { + "epoch": 0.9568565400843881, + "grad_norm": 0.39908015727996826, + "learning_rate": 2.646216150506819e-05, + "loss": 1.5225, + "step": 9071 + }, + { + "epoch": 0.9569620253164557, + "grad_norm": 0.4024796783924103, + "learning_rate": 2.6425459206242196e-05, + "loss": 1.463, + "step": 9072 + }, + { + "epoch": 0.9570675105485232, + "grad_norm": 0.4407555162906647, + "learning_rate": 2.6388807812507172e-05, + "loss": 1.4992, + "step": 9073 + }, + { + "epoch": 0.9571729957805907, + "grad_norm": 0.4400232434272766, + "learning_rate": 2.6352207253259166e-05, + "loss": 1.5028, + "step": 9074 + }, + { + "epoch": 0.9572784810126582, + "grad_norm": 0.4058392643928528, + "learning_rate": 2.6315657457992123e-05, + "loss": 1.463, + "step": 9075 + }, + { + "epoch": 0.9573839662447258, + "grad_norm": 0.4155997335910797, + "learning_rate": 2.627915835629776e-05, + "loss": 1.4509, + "step": 9076 + }, + { + "epoch": 0.9574894514767932, + "grad_norm": 0.4103969931602478, + "learning_rate": 2.62427098778655e-05, + "loss": 1.5034, + "step": 9077 + }, + { + "epoch": 0.9575949367088608, + "grad_norm": 0.4225928485393524, + "learning_rate": 2.6206311952482224e-05, + "loss": 1.5109, + "step": 9078 + }, + { + "epoch": 0.9577004219409283, + "grad_norm": 0.4252573549747467, + "learning_rate": 2.6169964510032243e-05, + "loss": 1.4454, + "step": 9079 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.42723262310028076, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.5067, + "step": 9080 + }, + { + "epoch": 0.9579113924050633, + "grad_norm": 0.42512160539627075, + "learning_rate": 2.6097420793955464e-05, + "loss": 1.5057, + "step": 9081 + }, + { + "epoch": 0.9580168776371308, + "grad_norm": 0.41484829783439636, + "learning_rate": 2.6061224380582955e-05, + "loss": 1.499, + "step": 9082 + }, + { + "epoch": 0.9581223628691983, + "grad_norm": 0.4189888536930084, + "learning_rate": 2.6025078170652045e-05, + "loss": 1.4917, + "step": 9083 + }, + { + "epoch": 0.9582278481012658, + "grad_norm": 0.42506399750709534, + "learning_rate": 2.5988982094531942e-05, + "loss": 1.4828, + "step": 9084 + }, + { + "epoch": 0.9583333333333334, + "grad_norm": 0.42952707409858704, + "learning_rate": 2.595293608268842e-05, + "loss": 1.4544, + "step": 9085 + }, + { + "epoch": 0.9584388185654008, + "grad_norm": 0.3887086510658264, + "learning_rate": 2.591694006568366e-05, + "loss": 1.4844, + "step": 9086 + }, + { + "epoch": 0.9585443037974684, + "grad_norm": 0.45235297083854675, + "learning_rate": 2.588099397417621e-05, + "loss": 1.47, + "step": 9087 + }, + { + "epoch": 0.9586497890295359, + "grad_norm": 0.44235000014305115, + "learning_rate": 2.584509773892073e-05, + "loss": 1.5028, + "step": 9088 + }, + { + "epoch": 0.9587552742616033, + "grad_norm": 0.4243945777416229, + "learning_rate": 2.580925129076798e-05, + "loss": 1.4854, + "step": 9089 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.43124672770500183, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.5133, + "step": 9090 + }, + { + "epoch": 0.9589662447257384, + "grad_norm": 0.408216655254364, + "learning_rate": 2.5737707479652988e-05, + "loss": 1.4438, + "step": 9091 + }, + { + "epoch": 0.9590717299578059, + "grad_norm": 0.41597023606300354, + "learning_rate": 2.5702009978871223e-05, + "loss": 1.4667, + "step": 9092 + }, + { + "epoch": 0.9591772151898734, + "grad_norm": 0.4255770146846771, + "learning_rate": 2.566636198955286e-05, + "loss": 1.4763, + "step": 9093 + }, + { + "epoch": 0.959282700421941, + "grad_norm": 0.40157386660575867, + "learning_rate": 2.5630763443026845e-05, + "loss": 1.4559, + "step": 9094 + }, + { + "epoch": 0.9593881856540084, + "grad_norm": 0.40931519865989685, + "learning_rate": 2.5595214270717388e-05, + "loss": 1.463, + "step": 9095 + }, + { + "epoch": 0.959493670886076, + "grad_norm": 0.4324822425842285, + "learning_rate": 2.5559714404143766e-05, + "loss": 1.4965, + "step": 9096 + }, + { + "epoch": 0.9595991561181435, + "grad_norm": 0.42928382754325867, + "learning_rate": 2.5524263774920287e-05, + "loss": 1.5136, + "step": 9097 + }, + { + "epoch": 0.9597046413502109, + "grad_norm": 0.41698092222213745, + "learning_rate": 2.5488862314756066e-05, + "loss": 1.4392, + "step": 9098 + }, + { + "epoch": 0.9598101265822785, + "grad_norm": 0.4160037338733673, + "learning_rate": 2.5453509955454954e-05, + "loss": 1.4255, + "step": 9099 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.3954348862171173, + "learning_rate": 2.541820662891541e-05, + "loss": 1.4704, + "step": 9100 + }, + { + "epoch": 0.9600210970464135, + "grad_norm": 0.4444388449192047, + "learning_rate": 2.53829522671303e-05, + "loss": 1.5075, + "step": 9101 + }, + { + "epoch": 0.960126582278481, + "grad_norm": 0.4075108468532562, + "learning_rate": 2.5347746802186866e-05, + "loss": 1.4699, + "step": 9102 + }, + { + "epoch": 0.9602320675105486, + "grad_norm": 0.41993606090545654, + "learning_rate": 2.531259016626649e-05, + "loss": 1.4717, + "step": 9103 + }, + { + "epoch": 0.960337552742616, + "grad_norm": 0.41762638092041016, + "learning_rate": 2.5277482291644662e-05, + "loss": 1.4883, + "step": 9104 + }, + { + "epoch": 0.9604430379746836, + "grad_norm": 0.4408147633075714, + "learning_rate": 2.524242311069079e-05, + "loss": 1.4797, + "step": 9105 + }, + { + "epoch": 0.9605485232067511, + "grad_norm": 0.42754238843917847, + "learning_rate": 2.520741255586806e-05, + "loss": 1.4971, + "step": 9106 + }, + { + "epoch": 0.9606540084388185, + "grad_norm": 0.40313130617141724, + "learning_rate": 2.5172450559733375e-05, + "loss": 1.4655, + "step": 9107 + }, + { + "epoch": 0.9607594936708861, + "grad_norm": 0.4181784689426422, + "learning_rate": 2.513753705493713e-05, + "loss": 1.4592, + "step": 9108 + }, + { + "epoch": 0.9608649789029536, + "grad_norm": 0.40294092893600464, + "learning_rate": 2.510267197422317e-05, + "loss": 1.5101, + "step": 9109 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.4241686165332794, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.4662, + "step": 9110 + }, + { + "epoch": 0.9610759493670886, + "grad_norm": 0.43026596307754517, + "learning_rate": 2.5033086816483705e-05, + "loss": 1.4781, + "step": 9111 + }, + { + "epoch": 0.9611814345991562, + "grad_norm": 0.40692874789237976, + "learning_rate": 2.499836660541176e-05, + "loss": 1.4924, + "step": 9112 + }, + { + "epoch": 0.9612869198312236, + "grad_norm": 0.4048171043395996, + "learning_rate": 2.4963694550328964e-05, + "loss": 1.4767, + "step": 9113 + }, + { + "epoch": 0.9613924050632912, + "grad_norm": 0.40509337186813354, + "learning_rate": 2.492907058444425e-05, + "loss": 1.4785, + "step": 9114 + }, + { + "epoch": 0.9614978902953587, + "grad_norm": 0.39835599064826965, + "learning_rate": 2.489449464105922e-05, + "loss": 1.4829, + "step": 9115 + }, + { + "epoch": 0.9616033755274261, + "grad_norm": 0.4123097062110901, + "learning_rate": 2.4859966653567965e-05, + "loss": 1.5266, + "step": 9116 + }, + { + "epoch": 0.9617088607594937, + "grad_norm": 0.43954089283943176, + "learning_rate": 2.482548655545697e-05, + "loss": 1.4887, + "step": 9117 + }, + { + "epoch": 0.9618143459915611, + "grad_norm": 0.4100317656993866, + "learning_rate": 2.4791054280304972e-05, + "loss": 1.5334, + "step": 9118 + }, + { + "epoch": 0.9619198312236287, + "grad_norm": 0.45626506209373474, + "learning_rate": 2.4756669761782806e-05, + "loss": 1.5027, + "step": 9119 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.41467225551605225, + "learning_rate": 2.472233293365335e-05, + "loss": 1.4324, + "step": 9120 + }, + { + "epoch": 0.9621308016877637, + "grad_norm": 0.4484040439128876, + "learning_rate": 2.4688043729771304e-05, + "loss": 1.4928, + "step": 9121 + }, + { + "epoch": 0.9622362869198312, + "grad_norm": 0.4317052662372589, + "learning_rate": 2.4653802084083134e-05, + "loss": 1.4589, + "step": 9122 + }, + { + "epoch": 0.9623417721518988, + "grad_norm": 0.4185953438282013, + "learning_rate": 2.4619607930626937e-05, + "loss": 1.4897, + "step": 9123 + }, + { + "epoch": 0.9624472573839662, + "grad_norm": 0.3883945941925049, + "learning_rate": 2.4585461203532253e-05, + "loss": 1.4608, + "step": 9124 + }, + { + "epoch": 0.9625527426160337, + "grad_norm": 0.39219507575035095, + "learning_rate": 2.4551361837020032e-05, + "loss": 1.4543, + "step": 9125 + }, + { + "epoch": 0.9626582278481013, + "grad_norm": 0.4329569339752197, + "learning_rate": 2.4517309765402408e-05, + "loss": 1.4565, + "step": 9126 + }, + { + "epoch": 0.9627637130801687, + "grad_norm": 0.4056680202484131, + "learning_rate": 2.448330492308266e-05, + "loss": 1.4935, + "step": 9127 + }, + { + "epoch": 0.9628691983122363, + "grad_norm": 0.3911163806915283, + "learning_rate": 2.4449347244555043e-05, + "loss": 1.4487, + "step": 9128 + }, + { + "epoch": 0.9629746835443038, + "grad_norm": 0.4146103262901306, + "learning_rate": 2.4415436664404643e-05, + "loss": 1.4401, + "step": 9129 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.44063320755958557, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.5436, + "step": 9130 + }, + { + "epoch": 0.9631856540084388, + "grad_norm": 0.40029656887054443, + "learning_rate": 2.4347756538029454e-05, + "loss": 1.501, + "step": 9131 + }, + { + "epoch": 0.9632911392405064, + "grad_norm": 0.41834521293640137, + "learning_rate": 2.4313986861427997e-05, + "loss": 1.5183, + "step": 9132 + }, + { + "epoch": 0.9633966244725738, + "grad_norm": 0.437883198261261, + "learning_rate": 2.4280264022450212e-05, + "loss": 1.5414, + "step": 9133 + }, + { + "epoch": 0.9635021097046413, + "grad_norm": 0.4124026894569397, + "learning_rate": 2.4246587956133573e-05, + "loss": 1.4557, + "step": 9134 + }, + { + "epoch": 0.9636075949367089, + "grad_norm": 0.435684472322464, + "learning_rate": 2.421295859760569e-05, + "loss": 1.4734, + "step": 9135 + }, + { + "epoch": 0.9637130801687763, + "grad_norm": 0.4155442416667938, + "learning_rate": 2.4179375882084098e-05, + "loss": 1.4934, + "step": 9136 + }, + { + "epoch": 0.9638185654008439, + "grad_norm": 0.39959219098091125, + "learning_rate": 2.4145839744876232e-05, + "loss": 1.4883, + "step": 9137 + }, + { + "epoch": 0.9639240506329114, + "grad_norm": 0.43970921635627747, + "learning_rate": 2.4112350121379255e-05, + "loss": 1.494, + "step": 9138 + }, + { + "epoch": 0.9640295358649789, + "grad_norm": 0.42591482400894165, + "learning_rate": 2.4078906947079882e-05, + "loss": 1.4931, + "step": 9139 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.4545920193195343, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.4886, + "step": 9140 + }, + { + "epoch": 0.964240506329114, + "grad_norm": 0.45448213815689087, + "learning_rate": 2.4012159688468252e-05, + "loss": 1.4651, + "step": 9141 + }, + { + "epoch": 0.9643459915611814, + "grad_norm": 0.42389097809791565, + "learning_rate": 2.397885547557638e-05, + "loss": 1.4738, + "step": 9142 + }, + { + "epoch": 0.9644514767932489, + "grad_norm": 0.45470306277275085, + "learning_rate": 2.3945597454722663e-05, + "loss": 1.4846, + "step": 9143 + }, + { + "epoch": 0.9645569620253165, + "grad_norm": 0.38892287015914917, + "learning_rate": 2.3912385561839984e-05, + "loss": 1.4856, + "step": 9144 + }, + { + "epoch": 0.9646624472573839, + "grad_norm": 0.41130340099334717, + "learning_rate": 2.3879219732950117e-05, + "loss": 1.4719, + "step": 9145 + }, + { + "epoch": 0.9647679324894515, + "grad_norm": 0.40683552622795105, + "learning_rate": 2.384609990416354e-05, + "loss": 1.5036, + "step": 9146 + }, + { + "epoch": 0.964873417721519, + "grad_norm": 0.4692808985710144, + "learning_rate": 2.3813026011679366e-05, + "loss": 1.4981, + "step": 9147 + }, + { + "epoch": 0.9649789029535865, + "grad_norm": 0.4820142686367035, + "learning_rate": 2.3779997991785207e-05, + "loss": 1.4675, + "step": 9148 + }, + { + "epoch": 0.965084388185654, + "grad_norm": 0.4010757505893707, + "learning_rate": 2.3747015780857007e-05, + "loss": 1.4877, + "step": 9149 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.4162488579750061, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.4712, + "step": 9150 + }, + { + "epoch": 0.965295358649789, + "grad_norm": 0.42586034536361694, + "learning_rate": 2.3681188531843466e-05, + "loss": 1.4496, + "step": 9151 + }, + { + "epoch": 0.9654008438818565, + "grad_norm": 0.4219142198562622, + "learning_rate": 2.3648343366950783e-05, + "loss": 1.5054, + "step": 9152 + }, + { + "epoch": 0.9655063291139241, + "grad_norm": 0.4600399136543274, + "learning_rate": 2.3615543757409166e-05, + "loss": 1.5034, + "step": 9153 + }, + { + "epoch": 0.9656118143459915, + "grad_norm": 0.408123642206192, + "learning_rate": 2.3582789640034548e-05, + "loss": 1.4917, + "step": 9154 + }, + { + "epoch": 0.9657172995780591, + "grad_norm": 0.3981526792049408, + "learning_rate": 2.3550080951730548e-05, + "loss": 1.4988, + "step": 9155 + }, + { + "epoch": 0.9658227848101266, + "grad_norm": 0.43337729573249817, + "learning_rate": 2.3517417629488286e-05, + "loss": 1.4862, + "step": 9156 + }, + { + "epoch": 0.9659282700421941, + "grad_norm": 0.39067745208740234, + "learning_rate": 2.3484799610386246e-05, + "loss": 1.4731, + "step": 9157 + }, + { + "epoch": 0.9660337552742616, + "grad_norm": 0.4386807680130005, + "learning_rate": 2.3452226831590232e-05, + "loss": 1.5109, + "step": 9158 + }, + { + "epoch": 0.9661392405063292, + "grad_norm": 0.485840767621994, + "learning_rate": 2.3419699230353144e-05, + "loss": 1.4642, + "step": 9159 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.4368363618850708, + "learning_rate": 2.338721674401494e-05, + "loss": 1.5036, + "step": 9160 + }, + { + "epoch": 0.9663502109704641, + "grad_norm": 0.4158051609992981, + "learning_rate": 2.3354779310002503e-05, + "loss": 1.4417, + "step": 9161 + }, + { + "epoch": 0.9664556962025317, + "grad_norm": 0.4055517017841339, + "learning_rate": 2.3322386865829453e-05, + "loss": 1.476, + "step": 9162 + }, + { + "epoch": 0.9665611814345991, + "grad_norm": 0.41120645403862, + "learning_rate": 2.3290039349096127e-05, + "loss": 1.4507, + "step": 9163 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 0.4314862787723541, + "learning_rate": 2.325773669748937e-05, + "loss": 1.5242, + "step": 9164 + }, + { + "epoch": 0.9667721518987342, + "grad_norm": 0.4585617184638977, + "learning_rate": 2.3225478848782476e-05, + "loss": 1.5275, + "step": 9165 + }, + { + "epoch": 0.9668776371308017, + "grad_norm": 0.4475068747997284, + "learning_rate": 2.3193265740835058e-05, + "loss": 1.5003, + "step": 9166 + }, + { + "epoch": 0.9669831223628692, + "grad_norm": 0.416963666677475, + "learning_rate": 2.3161097311592866e-05, + "loss": 1.4908, + "step": 9167 + }, + { + "epoch": 0.9670886075949368, + "grad_norm": 0.42864346504211426, + "learning_rate": 2.3128973499087785e-05, + "loss": 1.4747, + "step": 9168 + }, + { + "epoch": 0.9671940928270042, + "grad_norm": 0.45106953382492065, + "learning_rate": 2.3096894241437583e-05, + "loss": 1.5217, + "step": 9169 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.47146642208099365, + "learning_rate": 2.30648594768459e-05, + "loss": 1.4461, + "step": 9170 + }, + { + "epoch": 0.9674050632911393, + "grad_norm": 0.40260064601898193, + "learning_rate": 2.3032869143602086e-05, + "loss": 1.4385, + "step": 9171 + }, + { + "epoch": 0.9675105485232067, + "grad_norm": 0.43034082651138306, + "learning_rate": 2.3000923180081046e-05, + "loss": 1.5207, + "step": 9172 + }, + { + "epoch": 0.9676160337552743, + "grad_norm": 0.41302481293678284, + "learning_rate": 2.29690215247432e-05, + "loss": 1.5081, + "step": 9173 + }, + { + "epoch": 0.9677215189873418, + "grad_norm": 0.429710328578949, + "learning_rate": 2.293716411613428e-05, + "loss": 1.4861, + "step": 9174 + }, + { + "epoch": 0.9678270042194093, + "grad_norm": 0.42314034700393677, + "learning_rate": 2.2905350892885293e-05, + "loss": 1.4748, + "step": 9175 + }, + { + "epoch": 0.9679324894514768, + "grad_norm": 0.44195985794067383, + "learning_rate": 2.287358179371235e-05, + "loss": 1.4618, + "step": 9176 + }, + { + "epoch": 0.9680379746835444, + "grad_norm": 0.4341367781162262, + "learning_rate": 2.2841856757416538e-05, + "loss": 1.481, + "step": 9177 + }, + { + "epoch": 0.9681434599156118, + "grad_norm": 0.4109688699245453, + "learning_rate": 2.2810175722883866e-05, + "loss": 1.5089, + "step": 9178 + }, + { + "epoch": 0.9682489451476793, + "grad_norm": 0.41590961813926697, + "learning_rate": 2.2778538629085057e-05, + "loss": 1.4911, + "step": 9179 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.4081091284751892, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.4514, + "step": 9180 + }, + { + "epoch": 0.9684599156118143, + "grad_norm": 0.4389456510543823, + "learning_rate": 2.27153960199952e-05, + "loss": 1.4929, + "step": 9181 + }, + { + "epoch": 0.9685654008438819, + "grad_norm": 0.4213710129261017, + "learning_rate": 2.26838903830684e-05, + "loss": 1.4786, + "step": 9182 + }, + { + "epoch": 0.9686708860759494, + "grad_norm": 0.41330716013908386, + "learning_rate": 2.2652428443603782e-05, + "loss": 1.4048, + "step": 9183 + }, + { + "epoch": 0.9687763713080169, + "grad_norm": 0.43445727229118347, + "learning_rate": 2.2621010140994126e-05, + "loss": 1.4949, + "step": 9184 + }, + { + "epoch": 0.9688818565400844, + "grad_norm": 0.41455334424972534, + "learning_rate": 2.2589635414716307e-05, + "loss": 1.4997, + "step": 9185 + }, + { + "epoch": 0.9689873417721518, + "grad_norm": 0.41250765323638916, + "learning_rate": 2.2558304204331152e-05, + "loss": 1.478, + "step": 9186 + }, + { + "epoch": 0.9690928270042194, + "grad_norm": 0.46336549520492554, + "learning_rate": 2.2527016449483282e-05, + "loss": 1.4975, + "step": 9187 + }, + { + "epoch": 0.9691983122362869, + "grad_norm": 0.4073362648487091, + "learning_rate": 2.2495772089901067e-05, + "loss": 1.4822, + "step": 9188 + }, + { + "epoch": 0.9693037974683544, + "grad_norm": 0.45580729842185974, + "learning_rate": 2.2464571065396428e-05, + "loss": 1.5042, + "step": 9189 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.4042230546474457, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.458, + "step": 9190 + }, + { + "epoch": 0.9695147679324895, + "grad_norm": 0.4477214813232422, + "learning_rate": 2.2402298781284995e-05, + "loss": 1.5238, + "step": 9191 + }, + { + "epoch": 0.9696202531645569, + "grad_norm": 0.38857710361480713, + "learning_rate": 2.2371227401719017e-05, + "loss": 1.4755, + "step": 9192 + }, + { + "epoch": 0.9697257383966245, + "grad_norm": 0.39348363876342773, + "learning_rate": 2.2340199117312058e-05, + "loss": 1.4535, + "step": 9193 + }, + { + "epoch": 0.969831223628692, + "grad_norm": 0.4211290776729584, + "learning_rate": 2.2309213868292277e-05, + "loss": 1.4945, + "step": 9194 + }, + { + "epoch": 0.9699367088607594, + "grad_norm": 0.4405970573425293, + "learning_rate": 2.2278271594970783e-05, + "loss": 1.4782, + "step": 9195 + }, + { + "epoch": 0.970042194092827, + "grad_norm": 0.4098755419254303, + "learning_rate": 2.2247372237741457e-05, + "loss": 1.4882, + "step": 9196 + }, + { + "epoch": 0.9701476793248945, + "grad_norm": 0.4063092768192291, + "learning_rate": 2.2216515737080818e-05, + "loss": 1.4987, + "step": 9197 + }, + { + "epoch": 0.970253164556962, + "grad_norm": 0.4036597013473511, + "learning_rate": 2.218570203354799e-05, + "loss": 1.4521, + "step": 9198 + }, + { + "epoch": 0.9703586497890295, + "grad_norm": 0.4226246476173401, + "learning_rate": 2.2154931067784525e-05, + "loss": 1.517, + "step": 9199 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.4177335500717163, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.4491, + "step": 9200 + }, + { + "epoch": 0.9705696202531645, + "grad_norm": 0.42933499813079834, + "learning_rate": 2.2093517112543358e-05, + "loss": 1.4871, + "step": 9201 + }, + { + "epoch": 0.9706751054852321, + "grad_norm": 0.4181468188762665, + "learning_rate": 2.2062874004759936e-05, + "loss": 1.497, + "step": 9202 + }, + { + "epoch": 0.9707805907172996, + "grad_norm": 0.43489357829093933, + "learning_rate": 2.20322733981342e-05, + "loss": 1.4567, + "step": 9203 + }, + { + "epoch": 0.970886075949367, + "grad_norm": 0.4201034903526306, + "learning_rate": 2.200171523371821e-05, + "loss": 1.4985, + "step": 9204 + }, + { + "epoch": 0.9709915611814346, + "grad_norm": 0.42879852652549744, + "learning_rate": 2.197119945264576e-05, + "loss": 1.5035, + "step": 9205 + }, + { + "epoch": 0.9710970464135021, + "grad_norm": 0.4183104634284973, + "learning_rate": 2.1940725996132308e-05, + "loss": 1.4483, + "step": 9206 + }, + { + "epoch": 0.9712025316455696, + "grad_norm": 0.47237175703048706, + "learning_rate": 2.1910294805474834e-05, + "loss": 1.5052, + "step": 9207 + }, + { + "epoch": 0.9713080168776371, + "grad_norm": 0.4298136532306671, + "learning_rate": 2.187990582205175e-05, + "loss": 1.4729, + "step": 9208 + }, + { + "epoch": 0.9714135021097047, + "grad_norm": 0.3972509801387787, + "learning_rate": 2.1849558987322783e-05, + "loss": 1.4684, + "step": 9209 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.4220639765262604, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.4796, + "step": 9210 + }, + { + "epoch": 0.9716244725738397, + "grad_norm": 0.4244382679462433, + "learning_rate": 2.1788991530191857e-05, + "loss": 1.4894, + "step": 9211 + }, + { + "epoch": 0.9717299578059072, + "grad_norm": 0.4105396866798401, + "learning_rate": 2.1758770791114845e-05, + "loss": 1.5095, + "step": 9212 + }, + { + "epoch": 0.9718354430379746, + "grad_norm": 0.4359987676143646, + "learning_rate": 2.17285919673816e-05, + "loss": 1.5202, + "step": 9213 + }, + { + "epoch": 0.9719409282700422, + "grad_norm": 0.44183844327926636, + "learning_rate": 2.1698455000856692e-05, + "loss": 1.4652, + "step": 9214 + }, + { + "epoch": 0.9720464135021097, + "grad_norm": 0.4069547951221466, + "learning_rate": 2.1668359833485287e-05, + "loss": 1.4815, + "step": 9215 + }, + { + "epoch": 0.9721518987341772, + "grad_norm": 0.431631863117218, + "learning_rate": 2.1638306407293122e-05, + "loss": 1.4504, + "step": 9216 + }, + { + "epoch": 0.9722573839662447, + "grad_norm": 0.43690598011016846, + "learning_rate": 2.160829466438629e-05, + "loss": 1.4899, + "step": 9217 + }, + { + "epoch": 0.9723628691983123, + "grad_norm": 0.4441690444946289, + "learning_rate": 2.157832454695122e-05, + "loss": 1.4484, + "step": 9218 + }, + { + "epoch": 0.9724683544303797, + "grad_norm": 0.45397254824638367, + "learning_rate": 2.1548395997254516e-05, + "loss": 1.4878, + "step": 9219 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.4372881054878235, + "learning_rate": 2.151850895764285e-05, + "loss": 1.4812, + "step": 9220 + }, + { + "epoch": 0.9726793248945148, + "grad_norm": 0.4144532084465027, + "learning_rate": 2.148866337054287e-05, + "loss": 1.495, + "step": 9221 + }, + { + "epoch": 0.9727848101265822, + "grad_norm": 0.41240862011909485, + "learning_rate": 2.145885917846105e-05, + "loss": 1.448, + "step": 9222 + }, + { + "epoch": 0.9728902953586498, + "grad_norm": 0.4212949275970459, + "learning_rate": 2.1429096323983638e-05, + "loss": 1.4764, + "step": 9223 + }, + { + "epoch": 0.9729957805907173, + "grad_norm": 0.4755083918571472, + "learning_rate": 2.1399374749776512e-05, + "loss": 1.4609, + "step": 9224 + }, + { + "epoch": 0.9731012658227848, + "grad_norm": 0.40488022565841675, + "learning_rate": 2.1369694398585035e-05, + "loss": 1.4677, + "step": 9225 + }, + { + "epoch": 0.9732067510548523, + "grad_norm": 0.41448238492012024, + "learning_rate": 2.1340055213234025e-05, + "loss": 1.4872, + "step": 9226 + }, + { + "epoch": 0.9733122362869199, + "grad_norm": 0.4279082715511322, + "learning_rate": 2.131045713662756e-05, + "loss": 1.4441, + "step": 9227 + }, + { + "epoch": 0.9734177215189873, + "grad_norm": 0.40274637937545776, + "learning_rate": 2.1280900111748943e-05, + "loss": 1.4822, + "step": 9228 + }, + { + "epoch": 0.9735232067510549, + "grad_norm": 0.40205472707748413, + "learning_rate": 2.1251384081660546e-05, + "loss": 1.4564, + "step": 9229 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.40804171562194824, + "learning_rate": 2.12219089895037e-05, + "loss": 1.4977, + "step": 9230 + }, + { + "epoch": 0.9737341772151898, + "grad_norm": 0.41353917121887207, + "learning_rate": 2.1192474778498613e-05, + "loss": 1.4942, + "step": 9231 + }, + { + "epoch": 0.9738396624472574, + "grad_norm": 0.41624319553375244, + "learning_rate": 2.1163081391944224e-05, + "loss": 1.4342, + "step": 9232 + }, + { + "epoch": 0.9739451476793249, + "grad_norm": 0.4061141908168793, + "learning_rate": 2.1133728773218143e-05, + "loss": 1.4569, + "step": 9233 + }, + { + "epoch": 0.9740506329113924, + "grad_norm": 0.39513930678367615, + "learning_rate": 2.1104416865776507e-05, + "loss": 1.4791, + "step": 9234 + }, + { + "epoch": 0.9741561181434599, + "grad_norm": 0.43303805589675903, + "learning_rate": 2.1075145613153853e-05, + "loss": 1.4755, + "step": 9235 + }, + { + "epoch": 0.9742616033755275, + "grad_norm": 0.40141043066978455, + "learning_rate": 2.104591495896307e-05, + "loss": 1.4624, + "step": 9236 + }, + { + "epoch": 0.9743670886075949, + "grad_norm": 0.4169008731842041, + "learning_rate": 2.1016724846895213e-05, + "loss": 1.4707, + "step": 9237 + }, + { + "epoch": 0.9744725738396625, + "grad_norm": 0.40253081917762756, + "learning_rate": 2.0987575220719476e-05, + "loss": 1.4834, + "step": 9238 + }, + { + "epoch": 0.97457805907173, + "grad_norm": 0.4180942475795746, + "learning_rate": 2.0958466024283035e-05, + "loss": 1.4337, + "step": 9239 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.3958808481693268, + "learning_rate": 2.092939720151092e-05, + "loss": 1.4722, + "step": 9240 + }, + { + "epoch": 0.974789029535865, + "grad_norm": 0.4027206003665924, + "learning_rate": 2.090036869640596e-05, + "loss": 1.4484, + "step": 9241 + }, + { + "epoch": 0.9748945147679325, + "grad_norm": 0.39618241786956787, + "learning_rate": 2.0871380453048667e-05, + "loss": 1.5062, + "step": 9242 + }, + { + "epoch": 0.975, + "grad_norm": 0.3900398015975952, + "learning_rate": 2.0842432415597064e-05, + "loss": 1.4529, + "step": 9243 + }, + { + "epoch": 0.9751054852320675, + "grad_norm": 0.4288707673549652, + "learning_rate": 2.0813524528286672e-05, + "loss": 1.499, + "step": 9244 + }, + { + "epoch": 0.9752109704641351, + "grad_norm": 0.42837774753570557, + "learning_rate": 2.0784656735430323e-05, + "loss": 1.4378, + "step": 9245 + }, + { + "epoch": 0.9753164556962025, + "grad_norm": 0.42409801483154297, + "learning_rate": 2.07558289814181e-05, + "loss": 1.4858, + "step": 9246 + }, + { + "epoch": 0.9754219409282701, + "grad_norm": 0.4169222414493561, + "learning_rate": 2.0727041210717232e-05, + "loss": 1.5086, + "step": 9247 + }, + { + "epoch": 0.9755274261603376, + "grad_norm": 0.40484586358070374, + "learning_rate": 2.069829336787193e-05, + "loss": 1.4701, + "step": 9248 + }, + { + "epoch": 0.975632911392405, + "grad_norm": 0.40863052010536194, + "learning_rate": 2.0669585397503362e-05, + "loss": 1.504, + "step": 9249 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.4081283509731293, + "learning_rate": 2.064091724430947e-05, + "loss": 1.4696, + "step": 9250 + }, + { + "epoch": 0.97584388185654, + "grad_norm": 0.4262232482433319, + "learning_rate": 2.061228885306492e-05, + "loss": 1.4609, + "step": 9251 + }, + { + "epoch": 0.9759493670886076, + "grad_norm": 0.4494875967502594, + "learning_rate": 2.0583700168620984e-05, + "loss": 1.4834, + "step": 9252 + }, + { + "epoch": 0.9760548523206751, + "grad_norm": 0.4169006645679474, + "learning_rate": 2.055515113590538e-05, + "loss": 1.4814, + "step": 9253 + }, + { + "epoch": 0.9761603375527426, + "grad_norm": 0.4058118760585785, + "learning_rate": 2.0526641699922274e-05, + "loss": 1.4778, + "step": 9254 + }, + { + "epoch": 0.9762658227848101, + "grad_norm": 0.440136581659317, + "learning_rate": 2.0498171805752038e-05, + "loss": 1.4669, + "step": 9255 + }, + { + "epoch": 0.9763713080168777, + "grad_norm": 0.3998717665672302, + "learning_rate": 2.0469741398551272e-05, + "loss": 1.4589, + "step": 9256 + }, + { + "epoch": 0.9764767932489451, + "grad_norm": 0.41716086864471436, + "learning_rate": 2.0441350423552625e-05, + "loss": 1.5096, + "step": 9257 + }, + { + "epoch": 0.9765822784810126, + "grad_norm": 0.43359634280204773, + "learning_rate": 2.0412998826064695e-05, + "loss": 1.4654, + "step": 9258 + }, + { + "epoch": 0.9766877637130802, + "grad_norm": 0.4217475354671478, + "learning_rate": 2.0384686551471954e-05, + "loss": 1.4759, + "step": 9259 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.41475754976272583, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.4628, + "step": 9260 + }, + { + "epoch": 0.9768987341772152, + "grad_norm": 0.4203028380870819, + "learning_rate": 2.0328179752888504e-05, + "loss": 1.4866, + "step": 9261 + }, + { + "epoch": 0.9770042194092827, + "grad_norm": 0.41151440143585205, + "learning_rate": 2.029998512004507e-05, + "loss": 1.4797, + "step": 9262 + }, + { + "epoch": 0.9771097046413502, + "grad_norm": 0.4004734456539154, + "learning_rate": 2.0271829592391113e-05, + "loss": 1.4902, + "step": 9263 + }, + { + "epoch": 0.9772151898734177, + "grad_norm": 0.4614029824733734, + "learning_rate": 2.0243713115688823e-05, + "loss": 1.5083, + "step": 9264 + }, + { + "epoch": 0.9773206751054853, + "grad_norm": 0.43761423230171204, + "learning_rate": 2.021563563577556e-05, + "loss": 1.4815, + "step": 9265 + }, + { + "epoch": 0.9774261603375527, + "grad_norm": 0.426094114780426, + "learning_rate": 2.0187597098563862e-05, + "loss": 1.5057, + "step": 9266 + }, + { + "epoch": 0.9775316455696202, + "grad_norm": 0.3993452489376068, + "learning_rate": 2.0159597450041257e-05, + "loss": 1.4761, + "step": 9267 + }, + { + "epoch": 0.9776371308016878, + "grad_norm": 0.43500107526779175, + "learning_rate": 2.0131636636270178e-05, + "loss": 1.477, + "step": 9268 + }, + { + "epoch": 0.9777426160337552, + "grad_norm": 0.45615869760513306, + "learning_rate": 2.0103714603387898e-05, + "loss": 1.4863, + "step": 9269 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.4383704662322998, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.4893, + "step": 9270 + }, + { + "epoch": 0.9779535864978903, + "grad_norm": 0.41047871112823486, + "learning_rate": 2.004798666521213e-05, + "loss": 1.4418, + "step": 9271 + }, + { + "epoch": 0.9780590717299578, + "grad_norm": 0.41966596245765686, + "learning_rate": 2.0020180652566292e-05, + "loss": 1.4769, + "step": 9272 + }, + { + "epoch": 0.9781645569620253, + "grad_norm": 0.4164266884326935, + "learning_rate": 1.999241320610428e-05, + "loss": 1.4869, + "step": 9273 + }, + { + "epoch": 0.9782700421940929, + "grad_norm": 0.3983272314071655, + "learning_rate": 1.996468427233586e-05, + "loss": 1.4991, + "step": 9274 + }, + { + "epoch": 0.9783755274261603, + "grad_norm": 0.45602473616600037, + "learning_rate": 1.9936993797844958e-05, + "loss": 1.4895, + "step": 9275 + }, + { + "epoch": 0.9784810126582278, + "grad_norm": 0.4188278019428253, + "learning_rate": 1.9909341729289613e-05, + "loss": 1.4588, + "step": 9276 + }, + { + "epoch": 0.9785864978902954, + "grad_norm": 0.3984006643295288, + "learning_rate": 1.9881728013401843e-05, + "loss": 1.465, + "step": 9277 + }, + { + "epoch": 0.9786919831223628, + "grad_norm": 0.41702425479888916, + "learning_rate": 1.9854152596987523e-05, + "loss": 1.4988, + "step": 9278 + }, + { + "epoch": 0.9787974683544304, + "grad_norm": 0.40520593523979187, + "learning_rate": 1.9826615426926342e-05, + "loss": 1.4731, + "step": 9279 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.4267204701900482, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.436, + "step": 9280 + }, + { + "epoch": 0.9790084388185654, + "grad_norm": 0.4072263836860657, + "learning_rate": 1.9771655613750312e-05, + "loss": 1.4747, + "step": 9281 + }, + { + "epoch": 0.9791139240506329, + "grad_norm": 0.40923407673835754, + "learning_rate": 1.9744232864762798e-05, + "loss": 1.4754, + "step": 9282 + }, + { + "epoch": 0.9792194092827005, + "grad_norm": 0.406464159488678, + "learning_rate": 1.971684815038283e-05, + "loss": 1.4756, + "step": 9283 + }, + { + "epoch": 0.9793248945147679, + "grad_norm": 0.41171348094940186, + "learning_rate": 1.9689501417857458e-05, + "loss": 1.4873, + "step": 9284 + }, + { + "epoch": 0.9794303797468354, + "grad_norm": 0.4129951000213623, + "learning_rate": 1.9662192614506883e-05, + "loss": 1.4979, + "step": 9285 + }, + { + "epoch": 0.979535864978903, + "grad_norm": 0.4049890637397766, + "learning_rate": 1.9634921687724354e-05, + "loss": 1.4657, + "step": 9286 + }, + { + "epoch": 0.9796413502109704, + "grad_norm": 0.423679381608963, + "learning_rate": 1.960768858497612e-05, + "loss": 1.4898, + "step": 9287 + }, + { + "epoch": 0.979746835443038, + "grad_norm": 0.41526612639427185, + "learning_rate": 1.9580493253801253e-05, + "loss": 1.4776, + "step": 9288 + }, + { + "epoch": 0.9798523206751055, + "grad_norm": 0.4138219356536865, + "learning_rate": 1.9553335641811623e-05, + "loss": 1.4729, + "step": 9289 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.4148155748844147, + "learning_rate": 1.952621569669175e-05, + "loss": 1.4926, + "step": 9290 + }, + { + "epoch": 0.9800632911392405, + "grad_norm": 0.3987088203430176, + "learning_rate": 1.9499133366198684e-05, + "loss": 1.4524, + "step": 9291 + }, + { + "epoch": 0.9801687763713081, + "grad_norm": 0.42366352677345276, + "learning_rate": 1.947208859816199e-05, + "loss": 1.4562, + "step": 9292 + }, + { + "epoch": 0.9802742616033755, + "grad_norm": 0.4108063578605652, + "learning_rate": 1.9445081340483534e-05, + "loss": 1.4961, + "step": 9293 + }, + { + "epoch": 0.980379746835443, + "grad_norm": 0.4230404794216156, + "learning_rate": 1.9418111541137484e-05, + "loss": 1.502, + "step": 9294 + }, + { + "epoch": 0.9804852320675106, + "grad_norm": 0.4407208263874054, + "learning_rate": 1.939117914817016e-05, + "loss": 1.4918, + "step": 9295 + }, + { + "epoch": 0.980590717299578, + "grad_norm": 0.4105750620365143, + "learning_rate": 1.936428410969991e-05, + "loss": 1.4586, + "step": 9296 + }, + { + "epoch": 0.9806962025316456, + "grad_norm": 0.42926931381225586, + "learning_rate": 1.933742637391708e-05, + "loss": 1.4716, + "step": 9297 + }, + { + "epoch": 0.9808016877637131, + "grad_norm": 0.42408859729766846, + "learning_rate": 1.9310605889083842e-05, + "loss": 1.5031, + "step": 9298 + }, + { + "epoch": 0.9809071729957806, + "grad_norm": 0.4241902232170105, + "learning_rate": 1.9283822603534143e-05, + "loss": 1.4835, + "step": 9299 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.40558022260665894, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.4956, + "step": 9300 + }, + { + "epoch": 0.9811181434599157, + "grad_norm": 0.4021380543708801, + "learning_rate": 1.923036742397937e-05, + "loss": 1.4691, + "step": 9301 + }, + { + "epoch": 0.9812236286919831, + "grad_norm": 0.41294413805007935, + "learning_rate": 1.9203695427000086e-05, + "loss": 1.4787, + "step": 9302 + }, + { + "epoch": 0.9813291139240506, + "grad_norm": 0.41563257575035095, + "learning_rate": 1.9177060423355717e-05, + "loss": 1.4989, + "step": 9303 + }, + { + "epoch": 0.9814345991561182, + "grad_norm": 0.41813787817955017, + "learning_rate": 1.9150462361737524e-05, + "loss": 1.4605, + "step": 9304 + }, + { + "epoch": 0.9815400843881856, + "grad_norm": 0.44080349802970886, + "learning_rate": 1.912390119090793e-05, + "loss": 1.4871, + "step": 9305 + }, + { + "epoch": 0.9816455696202532, + "grad_norm": 0.4605334401130676, + "learning_rate": 1.909737685970039e-05, + "loss": 1.487, + "step": 9306 + }, + { + "epoch": 0.9817510548523207, + "grad_norm": 0.40913528203964233, + "learning_rate": 1.9070889317019377e-05, + "loss": 1.5157, + "step": 9307 + }, + { + "epoch": 0.9818565400843882, + "grad_norm": 0.4434133768081665, + "learning_rate": 1.904443851184018e-05, + "loss": 1.494, + "step": 9308 + }, + { + "epoch": 0.9819620253164557, + "grad_norm": 0.41206395626068115, + "learning_rate": 1.90180243932089e-05, + "loss": 1.4345, + "step": 9309 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.4508298635482788, + "learning_rate": 1.899164691024229e-05, + "loss": 1.4925, + "step": 9310 + }, + { + "epoch": 0.9821729957805907, + "grad_norm": 0.43016839027404785, + "learning_rate": 1.8965306012127665e-05, + "loss": 1.464, + "step": 9311 + }, + { + "epoch": 0.9822784810126582, + "grad_norm": 0.3965884745121002, + "learning_rate": 1.8939001648122847e-05, + "loss": 1.4872, + "step": 9312 + }, + { + "epoch": 0.9823839662447258, + "grad_norm": 0.41789162158966064, + "learning_rate": 1.8912733767556005e-05, + "loss": 1.4708, + "step": 9313 + }, + { + "epoch": 0.9824894514767932, + "grad_norm": 0.4155394732952118, + "learning_rate": 1.8886502319825606e-05, + "loss": 1.5208, + "step": 9314 + }, + { + "epoch": 0.9825949367088608, + "grad_norm": 0.40481072664260864, + "learning_rate": 1.8860307254400307e-05, + "loss": 1.4554, + "step": 9315 + }, + { + "epoch": 0.9827004219409282, + "grad_norm": 0.40905100107192993, + "learning_rate": 1.883414852081882e-05, + "loss": 1.5075, + "step": 9316 + }, + { + "epoch": 0.9828059071729958, + "grad_norm": 0.42430728673934937, + "learning_rate": 1.8808026068689887e-05, + "loss": 1.4601, + "step": 9317 + }, + { + "epoch": 0.9829113924050633, + "grad_norm": 0.4392472207546234, + "learning_rate": 1.87819398476921e-05, + "loss": 1.4612, + "step": 9318 + }, + { + "epoch": 0.9830168776371307, + "grad_norm": 0.4241482615470886, + "learning_rate": 1.8755889807573868e-05, + "loss": 1.4666, + "step": 9319 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.4253840446472168, + "learning_rate": 1.872987589815331e-05, + "loss": 1.4576, + "step": 9320 + }, + { + "epoch": 0.9832278481012658, + "grad_norm": 0.3936906158924103, + "learning_rate": 1.870389806931811e-05, + "loss": 1.5007, + "step": 9321 + }, + { + "epoch": 0.9833333333333333, + "grad_norm": 0.45343899726867676, + "learning_rate": 1.8677956271025497e-05, + "loss": 1.4946, + "step": 9322 + }, + { + "epoch": 0.9834388185654008, + "grad_norm": 0.39700406789779663, + "learning_rate": 1.865205045330207e-05, + "loss": 1.546, + "step": 9323 + }, + { + "epoch": 0.9835443037974684, + "grad_norm": 0.4086047112941742, + "learning_rate": 1.8626180566243758e-05, + "loss": 1.4917, + "step": 9324 + }, + { + "epoch": 0.9836497890295358, + "grad_norm": 0.4209315776824951, + "learning_rate": 1.8600346560015723e-05, + "loss": 1.4773, + "step": 9325 + }, + { + "epoch": 0.9837552742616034, + "grad_norm": 0.44885584712028503, + "learning_rate": 1.8574548384852206e-05, + "loss": 1.4954, + "step": 9326 + }, + { + "epoch": 0.9838607594936709, + "grad_norm": 0.4245237708091736, + "learning_rate": 1.8548785991056514e-05, + "loss": 1.4984, + "step": 9327 + }, + { + "epoch": 0.9839662447257383, + "grad_norm": 0.42421185970306396, + "learning_rate": 1.8523059329000848e-05, + "loss": 1.4559, + "step": 9328 + }, + { + "epoch": 0.9840717299578059, + "grad_norm": 0.4430307149887085, + "learning_rate": 1.8497368349126255e-05, + "loss": 1.4642, + "step": 9329 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.4064572751522064, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.4581, + "step": 9330 + }, + { + "epoch": 0.9842827004219409, + "grad_norm": 0.42986389994621277, + "learning_rate": 1.84460932380281e-05, + "loss": 1.4979, + "step": 9331 + }, + { + "epoch": 0.9843881856540084, + "grad_norm": 0.4210165739059448, + "learning_rate": 1.842050900802993e-05, + "loss": 1.465, + "step": 9332 + }, + { + "epoch": 0.984493670886076, + "grad_norm": 0.4063452184200287, + "learning_rate": 1.8394960262663446e-05, + "loss": 1.5021, + "step": 9333 + }, + { + "epoch": 0.9845991561181434, + "grad_norm": 0.41390594840049744, + "learning_rate": 1.8369446952712427e-05, + "loss": 1.5027, + "step": 9334 + }, + { + "epoch": 0.984704641350211, + "grad_norm": 0.4068928062915802, + "learning_rate": 1.834396902902892e-05, + "loss": 1.4561, + "step": 9335 + }, + { + "epoch": 0.9848101265822785, + "grad_norm": 0.42157015204429626, + "learning_rate": 1.8318526442533124e-05, + "loss": 1.4861, + "step": 9336 + }, + { + "epoch": 0.984915611814346, + "grad_norm": 0.3931799829006195, + "learning_rate": 1.8293119144213324e-05, + "loss": 1.4875, + "step": 9337 + }, + { + "epoch": 0.9850210970464135, + "grad_norm": 0.39976540207862854, + "learning_rate": 1.826774708512579e-05, + "loss": 1.4696, + "step": 9338 + }, + { + "epoch": 0.985126582278481, + "grad_norm": 0.40549996495246887, + "learning_rate": 1.824241021639465e-05, + "loss": 1.4769, + "step": 9339 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.39983344078063965, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.5113, + "step": 9340 + }, + { + "epoch": 0.985337552742616, + "grad_norm": 0.44870525598526, + "learning_rate": 1.8191841854836994e-05, + "loss": 1.508, + "step": 9341 + }, + { + "epoch": 0.9854430379746836, + "grad_norm": 0.4412899315357208, + "learning_rate": 1.8166610264597328e-05, + "loss": 1.5076, + "step": 9342 + }, + { + "epoch": 0.985548523206751, + "grad_norm": 0.42450806498527527, + "learning_rate": 1.8141413669887598e-05, + "loss": 1.5215, + "step": 9343 + }, + { + "epoch": 0.9856540084388186, + "grad_norm": 0.4241693913936615, + "learning_rate": 1.8116252022169935e-05, + "loss": 1.4754, + "step": 9344 + }, + { + "epoch": 0.9857594936708861, + "grad_norm": 0.4270302355289459, + "learning_rate": 1.809112527297383e-05, + "loss": 1.4897, + "step": 9345 + }, + { + "epoch": 0.9858649789029535, + "grad_norm": 0.3991768956184387, + "learning_rate": 1.8066033373895962e-05, + "loss": 1.5017, + "step": 9346 + }, + { + "epoch": 0.9859704641350211, + "grad_norm": 0.4303628206253052, + "learning_rate": 1.804097627660017e-05, + "loss": 1.5028, + "step": 9347 + }, + { + "epoch": 0.9860759493670886, + "grad_norm": 0.40191367268562317, + "learning_rate": 1.8015953932817347e-05, + "loss": 1.5174, + "step": 9348 + }, + { + "epoch": 0.9861814345991561, + "grad_norm": 0.414073646068573, + "learning_rate": 1.799096629434529e-05, + "loss": 1.4858, + "step": 9349 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.3921477496623993, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.4696, + "step": 9350 + }, + { + "epoch": 0.9863924050632912, + "grad_norm": 0.4376528561115265, + "learning_rate": 1.794109494085898e-05, + "loss": 1.5231, + "step": 9351 + }, + { + "epoch": 0.9864978902953586, + "grad_norm": 0.4133533537387848, + "learning_rate": 1.7916211129774273e-05, + "loss": 1.4892, + "step": 9352 + }, + { + "epoch": 0.9866033755274262, + "grad_norm": 0.47707170248031616, + "learning_rate": 1.7891361831859263e-05, + "loss": 1.5059, + "step": 9353 + }, + { + "epoch": 0.9867088607594937, + "grad_norm": 0.4317123293876648, + "learning_rate": 1.78665469992451e-05, + "loss": 1.4695, + "step": 9354 + }, + { + "epoch": 0.9868143459915611, + "grad_norm": 0.4074588418006897, + "learning_rate": 1.7841766584129377e-05, + "loss": 1.4988, + "step": 9355 + }, + { + "epoch": 0.9869198312236287, + "grad_norm": 0.43971219658851624, + "learning_rate": 1.7817020538775933e-05, + "loss": 1.479, + "step": 9356 + }, + { + "epoch": 0.9870253164556962, + "grad_norm": 0.4198453724384308, + "learning_rate": 1.779230881551485e-05, + "loss": 1.4675, + "step": 9357 + }, + { + "epoch": 0.9871308016877637, + "grad_norm": 0.4048851728439331, + "learning_rate": 1.7767631366742332e-05, + "loss": 1.4631, + "step": 9358 + }, + { + "epoch": 0.9872362869198312, + "grad_norm": 0.4280070662498474, + "learning_rate": 1.7742988144920578e-05, + "loss": 1.5103, + "step": 9359 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.40196070075035095, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.4855, + "step": 9360 + }, + { + "epoch": 0.9874472573839662, + "grad_norm": 0.4261288046836853, + "learning_rate": 1.7693804192307827e-05, + "loss": 1.4829, + "step": 9361 + }, + { + "epoch": 0.9875527426160338, + "grad_norm": 0.40299469232559204, + "learning_rate": 1.7669263366770554e-05, + "loss": 1.4925, + "step": 9362 + }, + { + "epoch": 0.9876582278481013, + "grad_norm": 0.43445515632629395, + "learning_rate": 1.7644756578691348e-05, + "loss": 1.4827, + "step": 9363 + }, + { + "epoch": 0.9877637130801687, + "grad_norm": 0.43816348910331726, + "learning_rate": 1.7620283780861163e-05, + "loss": 1.4904, + "step": 9364 + }, + { + "epoch": 0.9878691983122363, + "grad_norm": 0.4279087781906128, + "learning_rate": 1.759584492613646e-05, + "loss": 1.4878, + "step": 9365 + }, + { + "epoch": 0.9879746835443038, + "grad_norm": 0.4332057237625122, + "learning_rate": 1.757143996743906e-05, + "loss": 1.5004, + "step": 9366 + }, + { + "epoch": 0.9880801687763713, + "grad_norm": 0.4087739884853363, + "learning_rate": 1.75470688577561e-05, + "loss": 1.4455, + "step": 9367 + }, + { + "epoch": 0.9881856540084388, + "grad_norm": 0.4181956648826599, + "learning_rate": 1.7522731550139926e-05, + "loss": 1.4949, + "step": 9368 + }, + { + "epoch": 0.9882911392405064, + "grad_norm": 0.42697709798812866, + "learning_rate": 1.7498427997707978e-05, + "loss": 1.4721, + "step": 9369 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.4501078724861145, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.5217, + "step": 9370 + }, + { + "epoch": 0.9885021097046414, + "grad_norm": 0.4283146560192108, + "learning_rate": 1.744992197119162e-05, + "loss": 1.4699, + "step": 9371 + }, + { + "epoch": 0.9886075949367089, + "grad_norm": 0.395073801279068, + "learning_rate": 1.7425719403666873e-05, + "loss": 1.4714, + "step": 9372 + }, + { + "epoch": 0.9887130801687763, + "grad_norm": 0.40190500020980835, + "learning_rate": 1.7401550404445523e-05, + "loss": 1.4857, + "step": 9373 + }, + { + "epoch": 0.9888185654008439, + "grad_norm": 0.41541337966918945, + "learning_rate": 1.737741492696922e-05, + "loss": 1.4331, + "step": 9374 + }, + { + "epoch": 0.9889240506329114, + "grad_norm": 0.40836068987846375, + "learning_rate": 1.735331292474423e-05, + "loss": 1.5041, + "step": 9375 + }, + { + "epoch": 0.9890295358649789, + "grad_norm": 0.3903765082359314, + "learning_rate": 1.73292443513413e-05, + "loss": 1.4428, + "step": 9376 + }, + { + "epoch": 0.9891350210970464, + "grad_norm": 0.41661468148231506, + "learning_rate": 1.730520916039554e-05, + "loss": 1.4502, + "step": 9377 + }, + { + "epoch": 0.989240506329114, + "grad_norm": 0.4188127815723419, + "learning_rate": 1.728120730560641e-05, + "loss": 1.5105, + "step": 9378 + }, + { + "epoch": 0.9893459915611814, + "grad_norm": 0.4256414771080017, + "learning_rate": 1.7257238740737548e-05, + "loss": 1.4926, + "step": 9379 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.40655890107154846, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.4738, + "step": 9380 + }, + { + "epoch": 0.9895569620253165, + "grad_norm": 0.4350295662879944, + "learning_rate": 1.720940129613584e-05, + "loss": 1.4408, + "step": 9381 + }, + { + "epoch": 0.989662447257384, + "grad_norm": 0.4115702509880066, + "learning_rate": 1.718553232425059e-05, + "loss": 1.4874, + "step": 9382 + }, + { + "epoch": 0.9897679324894515, + "grad_norm": 0.43595340847969055, + "learning_rate": 1.7161696457980646e-05, + "loss": 1.4917, + "step": 9383 + }, + { + "epoch": 0.9898734177215189, + "grad_norm": 0.42772090435028076, + "learning_rate": 1.7137893651409406e-05, + "loss": 1.4736, + "step": 9384 + }, + { + "epoch": 0.9899789029535865, + "grad_norm": 0.4015375077724457, + "learning_rate": 1.7114123858683976e-05, + "loss": 1.4666, + "step": 9385 + }, + { + "epoch": 0.990084388185654, + "grad_norm": 0.4261699616909027, + "learning_rate": 1.7090387034015054e-05, + "loss": 1.4738, + "step": 9386 + }, + { + "epoch": 0.9901898734177215, + "grad_norm": 0.42017146944999695, + "learning_rate": 1.7066683131676825e-05, + "loss": 1.4937, + "step": 9387 + }, + { + "epoch": 0.990295358649789, + "grad_norm": 0.4136383831501007, + "learning_rate": 1.704301210600693e-05, + "loss": 1.4649, + "step": 9388 + }, + { + "epoch": 0.9904008438818566, + "grad_norm": 0.42785435914993286, + "learning_rate": 1.7019373911406307e-05, + "loss": 1.4825, + "step": 9389 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.4480273127555847, + "learning_rate": 1.699576850233916e-05, + "loss": 1.4794, + "step": 9390 + }, + { + "epoch": 0.9906118143459915, + "grad_norm": 0.4351575970649719, + "learning_rate": 1.697219583333286e-05, + "loss": 1.4851, + "step": 9391 + }, + { + "epoch": 0.9907172995780591, + "grad_norm": 0.3972213864326477, + "learning_rate": 1.694865585897781e-05, + "loss": 1.4876, + "step": 9392 + }, + { + "epoch": 0.9908227848101265, + "grad_norm": 0.4147304594516754, + "learning_rate": 1.6925148533927435e-05, + "loss": 1.4884, + "step": 9393 + }, + { + "epoch": 0.9909282700421941, + "grad_norm": 0.4648360013961792, + "learning_rate": 1.690167381289802e-05, + "loss": 1.4689, + "step": 9394 + }, + { + "epoch": 0.9910337552742616, + "grad_norm": 0.4486113488674164, + "learning_rate": 1.6878231650668686e-05, + "loss": 1.5359, + "step": 9395 + }, + { + "epoch": 0.9911392405063291, + "grad_norm": 0.41870036721229553, + "learning_rate": 1.6854822002081265e-05, + "loss": 1.4622, + "step": 9396 + }, + { + "epoch": 0.9912447257383966, + "grad_norm": 0.40832987427711487, + "learning_rate": 1.6831444822040207e-05, + "loss": 1.4846, + "step": 9397 + }, + { + "epoch": 0.9913502109704642, + "grad_norm": 0.39077070355415344, + "learning_rate": 1.6808100065512536e-05, + "loss": 1.4909, + "step": 9398 + }, + { + "epoch": 0.9914556962025316, + "grad_norm": 0.41672998666763306, + "learning_rate": 1.67847876875277e-05, + "loss": 1.5076, + "step": 9399 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.42821717262268066, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.4917, + "step": 9400 + }, + { + "epoch": 0.9916666666666667, + "grad_norm": 0.42270588874816895, + "learning_rate": 1.673825988761623e-05, + "loss": 1.4375, + "step": 9401 + }, + { + "epoch": 0.9917721518987341, + "grad_norm": 0.43380117416381836, + "learning_rate": 1.671504437606004e-05, + "loss": 1.4832, + "step": 9402 + }, + { + "epoch": 0.9918776371308017, + "grad_norm": 0.48586305975914, + "learning_rate": 1.6691861063787444e-05, + "loss": 1.4342, + "step": 9403 + }, + { + "epoch": 0.9919831223628692, + "grad_norm": 0.4231918156147003, + "learning_rate": 1.666870990613889e-05, + "loss": 1.4704, + "step": 9404 + }, + { + "epoch": 0.9920886075949367, + "grad_norm": 0.4204140305519104, + "learning_rate": 1.6645590858516798e-05, + "loss": 1.4609, + "step": 9405 + }, + { + "epoch": 0.9921940928270042, + "grad_norm": 0.4043950140476227, + "learning_rate": 1.662250387638544e-05, + "loss": 1.4548, + "step": 9406 + }, + { + "epoch": 0.9922995780590718, + "grad_norm": 0.40539121627807617, + "learning_rate": 1.6599448915270843e-05, + "loss": 1.4638, + "step": 9407 + }, + { + "epoch": 0.9924050632911392, + "grad_norm": 0.4334234893321991, + "learning_rate": 1.657642593076074e-05, + "loss": 1.4846, + "step": 9408 + }, + { + "epoch": 0.9925105485232067, + "grad_norm": 0.40530937910079956, + "learning_rate": 1.655343487850443e-05, + "loss": 1.4879, + "step": 9409 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.4055024981498718, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.4713, + "step": 9410 + }, + { + "epoch": 0.9927215189873417, + "grad_norm": 0.42495599389076233, + "learning_rate": 1.6507548393657978e-05, + "loss": 1.5232, + "step": 9411 + }, + { + "epoch": 0.9928270042194093, + "grad_norm": 0.4067240357398987, + "learning_rate": 1.6484652872673692e-05, + "loss": 1.4391, + "step": 9412 + }, + { + "epoch": 0.9929324894514768, + "grad_norm": 0.4060860574245453, + "learning_rate": 1.6461789107154772e-05, + "loss": 1.4521, + "step": 9413 + }, + { + "epoch": 0.9930379746835443, + "grad_norm": 0.4327999949455261, + "learning_rate": 1.6438957053057234e-05, + "loss": 1.5112, + "step": 9414 + }, + { + "epoch": 0.9931434599156118, + "grad_norm": 0.47486430406570435, + "learning_rate": 1.6416156666398208e-05, + "loss": 1.5041, + "step": 9415 + }, + { + "epoch": 0.9932489451476794, + "grad_norm": 0.41256389021873474, + "learning_rate": 1.6393387903255822e-05, + "loss": 1.4746, + "step": 9416 + }, + { + "epoch": 0.9933544303797468, + "grad_norm": 0.4513871669769287, + "learning_rate": 1.63706507197691e-05, + "loss": 1.5179, + "step": 9417 + }, + { + "epoch": 0.9934599156118143, + "grad_norm": 0.4124189615249634, + "learning_rate": 1.634794507213793e-05, + "loss": 1.4735, + "step": 9418 + }, + { + "epoch": 0.9935654008438819, + "grad_norm": 0.4506292939186096, + "learning_rate": 1.6325270916622947e-05, + "loss": 1.506, + "step": 9419 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.41866230964660645, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.4476, + "step": 9420 + }, + { + "epoch": 0.9937763713080169, + "grad_norm": 0.4115118384361267, + "learning_rate": 1.6280016907287243e-05, + "loss": 1.5209, + "step": 9421 + }, + { + "epoch": 0.9938818565400844, + "grad_norm": 0.4035124182701111, + "learning_rate": 1.6257436966290764e-05, + "loss": 1.5037, + "step": 9422 + }, + { + "epoch": 0.9939873417721519, + "grad_norm": 0.42556723952293396, + "learning_rate": 1.623488834305878e-05, + "loss": 1.4688, + "step": 9423 + }, + { + "epoch": 0.9940928270042194, + "grad_norm": 0.40388932824134827, + "learning_rate": 1.62123709941544e-05, + "loss": 1.4537, + "step": 9424 + }, + { + "epoch": 0.994198312236287, + "grad_norm": 0.40017464756965637, + "learning_rate": 1.6189884876200976e-05, + "loss": 1.4763, + "step": 9425 + }, + { + "epoch": 0.9943037974683544, + "grad_norm": 0.4098859131336212, + "learning_rate": 1.6167429945882038e-05, + "loss": 1.514, + "step": 9426 + }, + { + "epoch": 0.994409282700422, + "grad_norm": 0.4903082847595215, + "learning_rate": 1.6145006159941168e-05, + "loss": 1.5052, + "step": 9427 + }, + { + "epoch": 0.9945147679324895, + "grad_norm": 0.448175847530365, + "learning_rate": 1.6122613475181976e-05, + "loss": 1.462, + "step": 9428 + }, + { + "epoch": 0.9946202531645569, + "grad_norm": 0.4094811975955963, + "learning_rate": 1.610025184846797e-05, + "loss": 1.5111, + "step": 9429 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.43230900168418884, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.4393, + "step": 9430 + }, + { + "epoch": 0.994831223628692, + "grad_norm": 0.4265371859073639, + "learning_rate": 1.6055621596928567e-05, + "loss": 1.4648, + "step": 9431 + }, + { + "epoch": 0.9949367088607595, + "grad_norm": 0.4402812421321869, + "learning_rate": 1.6033352886129e-05, + "loss": 1.4972, + "step": 9432 + }, + { + "epoch": 0.995042194092827, + "grad_norm": 0.38128983974456787, + "learning_rate": 1.60111150614261e-05, + "loss": 1.4803, + "step": 9433 + }, + { + "epoch": 0.9951476793248946, + "grad_norm": 0.40581005811691284, + "learning_rate": 1.5988908079981696e-05, + "loss": 1.4758, + "step": 9434 + }, + { + "epoch": 0.995253164556962, + "grad_norm": 0.42678025364875793, + "learning_rate": 1.5966731899017015e-05, + "loss": 1.478, + "step": 9435 + }, + { + "epoch": 0.9953586497890295, + "grad_norm": 0.44256579875946045, + "learning_rate": 1.5944586475812638e-05, + "loss": 1.4658, + "step": 9436 + }, + { + "epoch": 0.9954641350210971, + "grad_norm": 0.4607081115245819, + "learning_rate": 1.592247176770838e-05, + "loss": 1.4687, + "step": 9437 + }, + { + "epoch": 0.9955696202531645, + "grad_norm": 0.43047481775283813, + "learning_rate": 1.590038773210323e-05, + "loss": 1.4775, + "step": 9438 + }, + { + "epoch": 0.9956751054852321, + "grad_norm": 0.43549367785453796, + "learning_rate": 1.587833432645528e-05, + "loss": 1.5243, + "step": 9439 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.4098023772239685, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.4736, + "step": 9440 + }, + { + "epoch": 0.9958860759493671, + "grad_norm": 0.41330060362815857, + "learning_rate": 1.5834319235158193e-05, + "loss": 1.4905, + "step": 9441 + }, + { + "epoch": 0.9959915611814346, + "grad_norm": 0.4074678122997284, + "learning_rate": 1.5812357464719904e-05, + "loss": 1.5066, + "step": 9442 + }, + { + "epoch": 0.9960970464135022, + "grad_norm": 0.4042770564556122, + "learning_rate": 1.5790426154660347e-05, + "loss": 1.4628, + "step": 9443 + }, + { + "epoch": 0.9962025316455696, + "grad_norm": 0.43630751967430115, + "learning_rate": 1.5768525262731804e-05, + "loss": 1.4804, + "step": 9444 + }, + { + "epoch": 0.9963080168776371, + "grad_norm": 0.40686318278312683, + "learning_rate": 1.574665474674514e-05, + "loss": 1.4822, + "step": 9445 + }, + { + "epoch": 0.9964135021097047, + "grad_norm": 0.4143632650375366, + "learning_rate": 1.5724814564569767e-05, + "loss": 1.4769, + "step": 9446 + }, + { + "epoch": 0.9965189873417721, + "grad_norm": 0.4255259335041046, + "learning_rate": 1.57030046741335e-05, + "loss": 1.4876, + "step": 9447 + }, + { + "epoch": 0.9966244725738397, + "grad_norm": 0.38538506627082825, + "learning_rate": 1.568122503342252e-05, + "loss": 1.4923, + "step": 9448 + }, + { + "epoch": 0.9967299578059071, + "grad_norm": 0.4252411723136902, + "learning_rate": 1.5659475600481297e-05, + "loss": 1.5136, + "step": 9449 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.4337424337863922, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.5092, + "step": 9450 + }, + { + "epoch": 0.9969409282700422, + "grad_norm": 0.42196324467658997, + "learning_rate": 1.5616067190376765e-05, + "loss": 1.478, + "step": 9451 + }, + { + "epoch": 0.9970464135021097, + "grad_norm": 0.4065301716327667, + "learning_rate": 1.559440812959299e-05, + "loss": 1.5299, + "step": 9452 + }, + { + "epoch": 0.9971518987341772, + "grad_norm": 0.40432223677635193, + "learning_rate": 1.5572779109337886e-05, + "loss": 1.4636, + "step": 9453 + }, + { + "epoch": 0.9972573839662447, + "grad_norm": 0.4223288893699646, + "learning_rate": 1.555118008794605e-05, + "loss": 1.4642, + "step": 9454 + }, + { + "epoch": 0.9973628691983122, + "grad_norm": 0.4262867271900177, + "learning_rate": 1.552961102380987e-05, + "loss": 1.462, + "step": 9455 + }, + { + "epoch": 0.9974683544303797, + "grad_norm": 0.41093143820762634, + "learning_rate": 1.550807187537945e-05, + "loss": 1.4688, + "step": 9456 + }, + { + "epoch": 0.9975738396624473, + "grad_norm": 0.40972617268562317, + "learning_rate": 1.5486562601162513e-05, + "loss": 1.4575, + "step": 9457 + }, + { + "epoch": 0.9976793248945147, + "grad_norm": 0.4389844536781311, + "learning_rate": 1.5465083159724344e-05, + "loss": 1.4596, + "step": 9458 + }, + { + "epoch": 0.9977848101265823, + "grad_norm": 0.41310906410217285, + "learning_rate": 1.544363350968769e-05, + "loss": 1.4525, + "step": 9459 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.4174353778362274, + "learning_rate": 1.542221360973268e-05, + "loss": 1.4883, + "step": 9460 + }, + { + "epoch": 0.9979957805907173, + "grad_norm": 0.41183555126190186, + "learning_rate": 1.5400823418596764e-05, + "loss": 1.4778, + "step": 9461 + }, + { + "epoch": 0.9981012658227848, + "grad_norm": 0.3995887339115143, + "learning_rate": 1.537946289507462e-05, + "loss": 1.4755, + "step": 9462 + }, + { + "epoch": 0.9982067510548523, + "grad_norm": 0.45345938205718994, + "learning_rate": 1.5358131998018067e-05, + "loss": 1.4989, + "step": 9463 + }, + { + "epoch": 0.9983122362869198, + "grad_norm": 0.4081273078918457, + "learning_rate": 1.5336830686336012e-05, + "loss": 1.499, + "step": 9464 + }, + { + "epoch": 0.9984177215189873, + "grad_norm": 0.39568179845809937, + "learning_rate": 1.5315558918994333e-05, + "loss": 1.5205, + "step": 9465 + }, + { + "epoch": 0.9985232067510549, + "grad_norm": 0.43256375193595886, + "learning_rate": 1.5294316655015837e-05, + "loss": 1.4661, + "step": 9466 + }, + { + "epoch": 0.9986286919831223, + "grad_norm": 0.3998488783836365, + "learning_rate": 1.527310385348017e-05, + "loss": 1.474, + "step": 9467 + }, + { + "epoch": 0.9987341772151899, + "grad_norm": 0.4185790419578552, + "learning_rate": 1.5251920473523708e-05, + "loss": 1.4758, + "step": 9468 + }, + { + "epoch": 0.9988396624472574, + "grad_norm": 0.43349960446357727, + "learning_rate": 1.523076647433954e-05, + "loss": 1.4707, + "step": 9469 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.4138621389865875, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.4934, + "step": 9470 + }, + { + "epoch": 0.9990506329113924, + "grad_norm": 0.4318915009498596, + "learning_rate": 1.5188546455343223e-05, + "loss": 1.4917, + "step": 9471 + }, + { + "epoch": 0.99915611814346, + "grad_norm": 0.42510899901390076, + "learning_rate": 1.5167480354199909e-05, + "loss": 1.4478, + "step": 9472 + }, + { + "epoch": 0.9992616033755274, + "grad_norm": 0.39650705456733704, + "learning_rate": 1.5146443471166345e-05, + "loss": 1.4837, + "step": 9473 + }, + { + "epoch": 0.9993670886075949, + "grad_norm": 0.4121500551700592, + "learning_rate": 1.5125435765717816e-05, + "loss": 1.5017, + "step": 9474 + }, + { + "epoch": 0.9994725738396625, + "grad_norm": 0.3991440236568451, + "learning_rate": 1.5104457197385799e-05, + "loss": 1.4771, + "step": 9475 + }, + { + "epoch": 0.9995780590717299, + "grad_norm": 0.41974425315856934, + "learning_rate": 1.508350772575791e-05, + "loss": 1.4759, + "step": 9476 + }, + { + "epoch": 0.9996835443037975, + "grad_norm": 0.4257811903953552, + "learning_rate": 1.5062587310477816e-05, + "loss": 1.469, + "step": 9477 + }, + { + "epoch": 0.999789029535865, + "grad_norm": 0.3897549510002136, + "learning_rate": 1.5041695911245136e-05, + "loss": 1.5022, + "step": 9478 + }, + { + "epoch": 0.9998945147679325, + "grad_norm": 0.4248228967189789, + "learning_rate": 1.5020833487815421e-05, + "loss": 1.4829, + "step": 9479 + }, + { + "epoch": 1.0, + "grad_norm": 1.152031421661377, + "learning_rate": 1.5e-05, + "loss": 1.4378, + "step": 9480 + } + ], + "logging_steps": 1, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.6871337555631488e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-phi/checkpoint-9480/training_args.bin b/saves-phi/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..be8370cebf2ebe3c234938cd285184806b0d49c2 --- /dev/null +++ b/saves-phi/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0204bbceb9327b2ca997655bcece8da9d1f6f1457941840c5d8d4ede0f6275c6 +size 5112 diff --git a/saves-phi/config.json b/saves-phi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3fb5342bea6ae91c8f86ba3e8c07161371845c2a --- /dev/null +++ b/saves-phi/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "PhiForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "embd_pdrop": 0.0, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 1024, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 2048, + "model_type": "phi", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "partial_rotary_factor": 0.5, + "qk_layernorm": false, + "resid_pdrop": 0.0, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-phi/generation_config.json b/saves-phi/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2fc224da8a3685f78c733a0ef85e67242c17b5a --- /dev/null +++ b/saves-phi/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.42.4" +} diff --git a/saves-phi/model.safetensors b/saves-phi/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69f04e8e7486a18258f458ce9367d8820406425a --- /dev/null +++ b/saves-phi/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf90c00665a3e7557aa7e8faf3dd2d1d9d3466763fbf635fbd74532d2000842b +size 7848944 diff --git a/saves-phi/result.log b/saves-phi/result.log new file mode 100644 index 0000000000000000000000000000000000000000..885137522a289c8c1de7ffe751e212de48414c2f --- /dev/null +++ b/saves-phi/result.log @@ -0,0 +1 @@ +{'train_runtime': 1640.145, 'train_samples_per_second': 5918.138, 'train_steps_per_second': 5.78, 'train_loss': 1.7524697405502263, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-phi/special_tokens_map.json b/saves-phi/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-phi/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-phi/tokenizer.json b/saves-phi/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-phi/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-phi/tokenizer_config.json b/saves-phi/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-phi/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-qwen2-bf16/checkpoint-9480/config.json b/saves-qwen2-bf16/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e33d1998b689b761506a91592eb7dca0e68f762d --- /dev/null +++ b/saves-qwen2-bf16/checkpoint-9480/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen2", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 2000 +} diff --git a/saves-qwen2-bf16/checkpoint-9480/generation_config.json b/saves-qwen2-bf16/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..589084e0e774668a082e0ca2290729cd99323199 --- /dev/null +++ b/saves-qwen2-bf16/checkpoint-9480/generation_config.json @@ -0,0 +1,4 @@ +{ + "_from_model_config": true, + "transformers_version": "4.42.0" +} diff --git a/saves-qwen2-bf16/checkpoint-9480/model.safetensors b/saves-qwen2-bf16/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df481c812590997300b0ca047b956469cd422cf7 --- /dev/null +++ b/saves-qwen2-bf16/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb44588318a94c53b132008a197bda33538e1c9d29ae0b62576ed6676dd9f79f +size 8351424 diff --git a/saves-qwen2-bf16/checkpoint-9480/optimizer.pt b/saves-qwen2-bf16/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f7b01b18a375cc7910346ef78c3a200e12e83d7 --- /dev/null +++ b/saves-qwen2-bf16/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:500529de803ceabf46792e522a2e7dd573bddbdb9aae3ace7f333c695cb4662b +size 16719504 diff --git a/saves-qwen2-bf16/checkpoint-9480/rng_state.pth b/saves-qwen2-bf16/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-qwen2-bf16/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-qwen2-bf16/checkpoint-9480/scheduler.pt b/saves-qwen2-bf16/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..63473f23a031ab0f869bb406d5cf89839262f03d --- /dev/null +++ b/saves-qwen2-bf16/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb2bea2f7536b844ad9bb1bf6c3877fce0b1eb4d96764e140560dbf207ce6aa +size 1064 diff --git a/saves-qwen2-bf16/checkpoint-9480/special_tokens_map.json b/saves-qwen2-bf16/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-qwen2-bf16/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-qwen2-bf16/checkpoint-9480/tokenizer.json b/saves-qwen2-bf16/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-qwen2-bf16/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-qwen2-bf16/checkpoint-9480/tokenizer_config.json b/saves-qwen2-bf16/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-qwen2-bf16/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-qwen2-bf16/checkpoint-9480/trainer_state.json b/saves-qwen2-bf16/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..59277ee6987143d2f7155fb28629561d7bc8c5ad --- /dev/null +++ b/saves-qwen2-bf16/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.287057638168335, + "learning_rate": 0.00015822784810126583, + "loss": 7.493, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1503698825836182, + "learning_rate": 0.00031645569620253165, + "loss": 6.8817, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8247020244598389, + "learning_rate": 0.00047468354430379745, + "loss": 6.2305, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 1.1445114612579346, + "learning_rate": 0.0006329113924050633, + "loss": 5.7539, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 1.1456326246261597, + "learning_rate": 0.0007911392405063291, + "loss": 5.2901, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 1.277427315711975, + "learning_rate": 0.0009493670886075949, + "loss": 4.7825, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 0.8188556432723999, + "learning_rate": 0.0011075949367088608, + "loss": 4.3843, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.1751933097839355, + "learning_rate": 0.0012658227848101266, + "loss": 4.1334, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.001449465751648, + "learning_rate": 0.0014240506329113926, + "loss": 3.9229, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.0194555521011353, + "learning_rate": 0.0015, + "loss": 3.7921, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.8997014760971069, + "learning_rate": 0.0015, + "loss": 3.6375, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.8892844915390015, + "learning_rate": 0.0015, + "loss": 3.5209, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.8641502261161804, + "learning_rate": 0.0015, + "loss": 3.4067, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.6706660985946655, + "learning_rate": 0.0015, + "loss": 3.303, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 1.0603797435760498, + "learning_rate": 0.0015, + "loss": 3.2231, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.6161502003669739, + "learning_rate": 0.0015, + "loss": 3.1518, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.8447583913803101, + "learning_rate": 0.0015, + "loss": 3.077, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.9560238122940063, + "learning_rate": 0.0015, + "loss": 3.032, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.7279493808746338, + "learning_rate": 0.0015, + "loss": 2.9698, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.7134253978729248, + "learning_rate": 0.0015, + "loss": 2.9251, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.8752930760383606, + "learning_rate": 0.0015, + "loss": 2.8832, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.9525153636932373, + "learning_rate": 0.0015, + "loss": 2.8475, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.7820525765419006, + "learning_rate": 0.0015, + "loss": 2.7967, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 1.0114189386367798, + "learning_rate": 0.0015, + "loss": 2.7558, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.8659444451332092, + "learning_rate": 0.0015, + "loss": 2.7227, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.7615576982498169, + "learning_rate": 0.0015, + "loss": 2.6843, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.966295599937439, + "learning_rate": 0.0015, + "loss": 2.6542, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.6935604810714722, + "learning_rate": 0.0015, + "loss": 2.6189, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.9959357976913452, + "learning_rate": 0.0015, + "loss": 2.5925, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.9867107272148132, + "learning_rate": 0.0015, + "loss": 2.5665, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 1.0502711534500122, + "learning_rate": 0.0015, + "loss": 2.543, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 1.0446499586105347, + "learning_rate": 0.0015, + "loss": 2.5109, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 1.0539124011993408, + "learning_rate": 0.0015, + "loss": 2.5022, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.9436084628105164, + "learning_rate": 0.0015, + "loss": 2.4776, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 1.2166537046432495, + "learning_rate": 0.0015, + "loss": 2.4373, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.8346443772315979, + "learning_rate": 0.0015, + "loss": 2.4208, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 1.111228346824646, + "learning_rate": 0.0015, + "loss": 2.4084, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.9931886196136475, + "learning_rate": 0.0015, + "loss": 2.3971, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.8816179037094116, + "learning_rate": 0.0015, + "loss": 2.3766, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 1.3231067657470703, + "learning_rate": 0.0015, + "loss": 2.3473, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 1.3582069873809814, + "learning_rate": 0.0015, + "loss": 2.3382, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.9265342950820923, + "learning_rate": 0.0015, + "loss": 2.3211, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.8295995593070984, + "learning_rate": 0.0015, + "loss": 2.2953, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.7636935710906982, + "learning_rate": 0.0015, + "loss": 2.2797, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.9759564399719238, + "learning_rate": 0.0015, + "loss": 2.2836, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.954789936542511, + "learning_rate": 0.0015, + "loss": 2.2589, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.9172025322914124, + "learning_rate": 0.0015, + "loss": 2.2413, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.5287584066390991, + "learning_rate": 0.0015, + "loss": 2.2391, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.9626224637031555, + "learning_rate": 0.0015, + "loss": 2.2156, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 1.1066144704818726, + "learning_rate": 0.0015, + "loss": 2.204, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 1.016810655593872, + "learning_rate": 0.0015, + "loss": 2.1937, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 1.048801302909851, + "learning_rate": 0.0015, + "loss": 2.1789, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.7893497347831726, + "learning_rate": 0.0015, + "loss": 2.1647, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.7989654541015625, + "learning_rate": 0.0015, + "loss": 2.1446, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.6843968033790588, + "learning_rate": 0.0015, + "loss": 2.1471, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.6927700042724609, + "learning_rate": 0.0015, + "loss": 2.1136, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.7592052221298218, + "learning_rate": 0.0015, + "loss": 2.1168, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.8444345593452454, + "learning_rate": 0.0015, + "loss": 2.1171, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.8080502152442932, + "learning_rate": 0.0015, + "loss": 2.0922, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.9479168057441711, + "learning_rate": 0.0015, + "loss": 2.0748, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.8750613331794739, + "learning_rate": 0.0015, + "loss": 2.0789, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.7039671540260315, + "learning_rate": 0.0015, + "loss": 2.0664, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 1.0618560314178467, + "learning_rate": 0.0015, + "loss": 2.0536, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.769457221031189, + "learning_rate": 0.0015, + "loss": 2.0597, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 1.1503156423568726, + "learning_rate": 0.0015, + "loss": 2.0516, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.7246300578117371, + "learning_rate": 0.0015, + "loss": 2.0387, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.9715001583099365, + "learning_rate": 0.0015, + "loss": 2.0169, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.9007167816162109, + "learning_rate": 0.0015, + "loss": 2.014, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.6881843209266663, + "learning_rate": 0.0015, + "loss": 2.0238, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.91755211353302, + "learning_rate": 0.0015, + "loss": 2.0044, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.9991128444671631, + "learning_rate": 0.0015, + "loss": 1.9964, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.063872218132019, + "learning_rate": 0.0015, + "loss": 1.9769, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.8963314890861511, + "learning_rate": 0.0015, + "loss": 1.9727, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.8157698512077332, + "learning_rate": 0.0015, + "loss": 1.9798, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.9056829214096069, + "learning_rate": 0.0015, + "loss": 1.9594, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 2.2598671913146973, + "learning_rate": 0.0015, + "loss": 1.966, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 1.7556381225585938, + "learning_rate": 0.0015, + "loss": 1.9694, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.961376965045929, + "learning_rate": 0.0015, + "loss": 1.9446, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.6460594534873962, + "learning_rate": 0.0015, + "loss": 1.9423, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.685418426990509, + "learning_rate": 0.0015, + "loss": 1.9244, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.7907118797302246, + "learning_rate": 0.0015, + "loss": 1.9153, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 1.173556923866272, + "learning_rate": 0.0015, + "loss": 1.9268, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.8807255029678345, + "learning_rate": 0.0015, + "loss": 1.9277, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.8901069760322571, + "learning_rate": 0.0015, + "loss": 1.906, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.6625086665153503, + "learning_rate": 0.0015, + "loss": 1.9081, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.8131834268569946, + "learning_rate": 0.0015, + "loss": 1.9053, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.7555605173110962, + "learning_rate": 0.0015, + "loss": 1.902, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 1.0310981273651123, + "learning_rate": 0.0015, + "loss": 1.8961, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.8803991079330444, + "learning_rate": 0.0015, + "loss": 1.8881, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 1.0702396631240845, + "learning_rate": 0.0015, + "loss": 1.8865, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.9415758848190308, + "learning_rate": 0.0015, + "loss": 1.8874, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.6488352417945862, + "learning_rate": 0.0015, + "loss": 1.8798, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.7208075523376465, + "learning_rate": 0.0015, + "loss": 1.8594, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.7142037749290466, + "learning_rate": 0.0015, + "loss": 1.8703, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.9566423892974854, + "learning_rate": 0.0015, + "loss": 1.8532, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.7550307512283325, + "learning_rate": 0.0015, + "loss": 1.8603, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 1.5818613767623901, + "learning_rate": 0.0015, + "loss": 1.8627, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.8488968014717102, + "learning_rate": 0.0015, + "loss": 1.8433, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.6491256356239319, + "learning_rate": 0.0015, + "loss": 1.8331, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 1.0147662162780762, + "learning_rate": 0.0015, + "loss": 1.8486, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.7335547804832458, + "learning_rate": 0.0015, + "loss": 1.8418, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.9607897996902466, + "learning_rate": 0.0015, + "loss": 1.8293, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.8564672470092773, + "learning_rate": 0.0015, + "loss": 1.8439, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.733195424079895, + "learning_rate": 0.0015, + "loss": 1.8199, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.1174650192260742, + "learning_rate": 0.0015, + "loss": 1.8218, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.6878591179847717, + "learning_rate": 0.0015, + "loss": 1.8116, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.6646955013275146, + "learning_rate": 0.0015, + "loss": 1.8037, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.9651567339897156, + "learning_rate": 0.0015, + "loss": 1.8073, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.684890627861023, + "learning_rate": 0.0015, + "loss": 1.816, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.7184986472129822, + "learning_rate": 0.0015, + "loss": 1.8012, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.7160841822624207, + "learning_rate": 0.0015, + "loss": 1.8065, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.6141655445098877, + "learning_rate": 0.0015, + "loss": 1.8002, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.2699943780899048, + "learning_rate": 0.0015, + "loss": 1.7895, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.6769657731056213, + "learning_rate": 0.0015, + "loss": 1.8034, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.2477600574493408, + "learning_rate": 0.0015, + "loss": 1.7845, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.64894038438797, + "learning_rate": 0.0015, + "loss": 1.7927, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 1.0948996543884277, + "learning_rate": 0.0015, + "loss": 1.7806, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.7002706527709961, + "learning_rate": 0.0015, + "loss": 1.7644, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.7476921081542969, + "learning_rate": 0.0015, + "loss": 1.7703, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.6751740574836731, + "learning_rate": 0.0015, + "loss": 1.781, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.6483523845672607, + "learning_rate": 0.0015, + "loss": 1.7759, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6249744892120361, + "learning_rate": 0.0015, + "loss": 1.76, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.947671115398407, + "learning_rate": 0.0015, + "loss": 1.749, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.8034379482269287, + "learning_rate": 0.0015, + "loss": 1.77, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.7275480628013611, + "learning_rate": 0.0015, + "loss": 1.7711, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.6855311989784241, + "learning_rate": 0.0015, + "loss": 1.7571, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.6835574507713318, + "learning_rate": 0.0015, + "loss": 1.7524, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.7019007802009583, + "learning_rate": 0.0015, + "loss": 1.7617, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.7281892895698547, + "learning_rate": 0.0015, + "loss": 1.7492, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.7096613645553589, + "learning_rate": 0.0015, + "loss": 1.736, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.6526144742965698, + "learning_rate": 0.0015, + "loss": 1.7432, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 1.017628788948059, + "learning_rate": 0.0015, + "loss": 1.749, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.7124719619750977, + "learning_rate": 0.0015, + "loss": 1.735, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.6463326215744019, + "learning_rate": 0.0015, + "loss": 1.7382, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.6644458770751953, + "learning_rate": 0.0015, + "loss": 1.7433, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.6406483054161072, + "learning_rate": 0.0015, + "loss": 1.7316, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.7143809199333191, + "learning_rate": 0.0015, + "loss": 1.729, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.7517651319503784, + "learning_rate": 0.0015, + "loss": 1.7217, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.6045621037483215, + "learning_rate": 0.0015, + "loss": 1.7301, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.7317776679992676, + "learning_rate": 0.0015, + "loss": 1.7126, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6872985363006592, + "learning_rate": 0.0015, + "loss": 1.7179, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 1.0588369369506836, + "learning_rate": 0.0015, + "loss": 1.726, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.721889317035675, + "learning_rate": 0.0015, + "loss": 1.7155, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.6461079716682434, + "learning_rate": 0.0015, + "loss": 1.71, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.6328428983688354, + "learning_rate": 0.0015, + "loss": 1.7119, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.7252086400985718, + "learning_rate": 0.0015, + "loss": 1.7071, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.762008786201477, + "learning_rate": 0.0015, + "loss": 1.7101, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.6981770992279053, + "learning_rate": 0.0015, + "loss": 1.7072, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.6552959680557251, + "learning_rate": 0.0015, + "loss": 1.705, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.6281026601791382, + "learning_rate": 0.0015, + "loss": 1.7046, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.7670705914497375, + "learning_rate": 0.0015, + "loss": 1.7056, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.6418710350990295, + "learning_rate": 0.0015, + "loss": 1.708, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.6629166603088379, + "learning_rate": 0.0015, + "loss": 1.6885, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6065171360969543, + "learning_rate": 0.0015, + "loss": 1.6795, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.7254469990730286, + "learning_rate": 0.0015, + "loss": 1.6805, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.7255260944366455, + "learning_rate": 0.0015, + "loss": 1.684, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.7033355832099915, + "learning_rate": 0.0015, + "loss": 1.6746, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.717984139919281, + "learning_rate": 0.0015, + "loss": 1.6859, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 1.111241102218628, + "learning_rate": 0.0015, + "loss": 1.6855, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.6471601128578186, + "learning_rate": 0.0015, + "loss": 1.6874, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.7955036163330078, + "learning_rate": 0.0015, + "loss": 1.6845, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6496426463127136, + "learning_rate": 0.0015, + "loss": 1.6521, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.6558811664581299, + "learning_rate": 0.0015, + "loss": 1.6693, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.6988083720207214, + "learning_rate": 0.0015, + "loss": 1.6729, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.0070551633834839, + "learning_rate": 0.0015, + "loss": 1.6632, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.6134887337684631, + "learning_rate": 0.0015, + "loss": 1.6628, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.9757758378982544, + "learning_rate": 0.0015, + "loss": 1.6783, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.0612666606903076, + "learning_rate": 0.0015, + "loss": 1.6654, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.880418062210083, + "learning_rate": 0.0015, + "loss": 1.6585, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.795171856880188, + "learning_rate": 0.0015, + "loss": 1.669, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.7361051440238953, + "learning_rate": 0.0015, + "loss": 1.6457, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.6480020880699158, + "learning_rate": 0.0015, + "loss": 1.6483, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.6146023869514465, + "learning_rate": 0.0015, + "loss": 1.6529, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.8712873458862305, + "learning_rate": 0.0015, + "loss": 1.6534, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.6795451045036316, + "learning_rate": 0.0015, + "loss": 1.677, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.6434656381607056, + "learning_rate": 0.0015, + "loss": 1.6534, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.5913978815078735, + "learning_rate": 0.0015, + "loss": 1.6559, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.7321423888206482, + "learning_rate": 0.0015, + "loss": 1.655, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.8003367781639099, + "learning_rate": 0.0015, + "loss": 1.6598, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.6629058718681335, + "learning_rate": 0.0015, + "loss": 1.6509, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.7159475088119507, + "learning_rate": 0.0015, + "loss": 1.6404, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.7505967020988464, + "learning_rate": 0.0015, + "loss": 1.6398, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.7928625345230103, + "learning_rate": 0.0015, + "loss": 1.6411, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.6108897924423218, + "learning_rate": 0.0015, + "loss": 1.6375, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.5757226943969727, + "learning_rate": 0.0015, + "loss": 1.641, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.6521726250648499, + "learning_rate": 0.0015, + "loss": 1.6436, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.6217014193534851, + "learning_rate": 0.0015, + "loss": 1.6433, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.5712935328483582, + "learning_rate": 0.0015, + "loss": 1.6323, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.5738839507102966, + "learning_rate": 0.0015, + "loss": 1.6389, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 1.0032706260681152, + "learning_rate": 0.0015, + "loss": 1.6353, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.9717868566513062, + "learning_rate": 0.0015, + "loss": 1.6325, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.6785035729408264, + "learning_rate": 0.0015, + "loss": 1.63, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.9020541906356812, + "learning_rate": 0.0015, + "loss": 1.6334, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.6292390823364258, + "learning_rate": 0.0015, + "loss": 1.6229, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.5943929553031921, + "learning_rate": 0.0015, + "loss": 1.6215, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.8829261064529419, + "learning_rate": 0.0015, + "loss": 1.6241, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.588376522064209, + "learning_rate": 0.0015, + "loss": 1.6362, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.6464346647262573, + "learning_rate": 0.0015, + "loss": 1.6171, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.9764966368675232, + "learning_rate": 0.0015, + "loss": 1.6159, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.6633862853050232, + "learning_rate": 0.0015, + "loss": 1.6163, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.5747328996658325, + "learning_rate": 0.0015, + "loss": 1.6201, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.72202467918396, + "learning_rate": 0.0015, + "loss": 1.6308, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.6208357810974121, + "learning_rate": 0.0015, + "loss": 1.6182, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.5996553301811218, + "learning_rate": 0.0015, + "loss": 1.616, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.6539575457572937, + "learning_rate": 0.0015, + "loss": 1.6152, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.6510769724845886, + "learning_rate": 0.0015, + "loss": 1.6067, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.9695384502410889, + "learning_rate": 0.0015, + "loss": 1.6117, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.7847108840942383, + "learning_rate": 0.0015, + "loss": 1.6173, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.9148688316345215, + "learning_rate": 0.0015, + "loss": 1.6027, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 1.5051568746566772, + "learning_rate": 0.0015, + "loss": 1.6142, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 1.2215869426727295, + "learning_rate": 0.0015, + "loss": 1.6157, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6897009611129761, + "learning_rate": 0.0015, + "loss": 1.5938, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.6504973769187927, + "learning_rate": 0.0015, + "loss": 1.5992, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6026249527931213, + "learning_rate": 0.0015, + "loss": 1.5935, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.8656939268112183, + "learning_rate": 0.0015, + "loss": 1.6027, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.7233303189277649, + "learning_rate": 0.0015, + "loss": 1.6188, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.5548332929611206, + "learning_rate": 0.0015, + "loss": 1.6011, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.5613821148872375, + "learning_rate": 0.0015, + "loss": 1.5798, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.6372296810150146, + "learning_rate": 0.0015, + "loss": 1.6007, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.6220502853393555, + "learning_rate": 0.0015, + "loss": 1.6027, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.6468200087547302, + "learning_rate": 0.0015, + "loss": 1.586, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.5483685731887817, + "learning_rate": 0.0015, + "loss": 1.5976, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.6158987283706665, + "learning_rate": 0.0015, + "loss": 1.5956, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.7173906564712524, + "learning_rate": 0.0015, + "loss": 1.591, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.8330214023590088, + "learning_rate": 0.0015, + "loss": 1.5947, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.9982806444168091, + "learning_rate": 0.0015, + "loss": 1.5824, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.7386844754219055, + "learning_rate": 0.0015, + "loss": 1.6135, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.7794486284255981, + "learning_rate": 0.0015, + "loss": 1.5971, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.6920127868652344, + "learning_rate": 0.0015, + "loss": 1.5859, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.6448893547058105, + "learning_rate": 0.0015, + "loss": 1.5838, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.6755775809288025, + "learning_rate": 0.0015, + "loss": 1.5821, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.6389298439025879, + "learning_rate": 0.0015, + "loss": 1.594, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.7176726460456848, + "learning_rate": 0.0015, + "loss": 1.5784, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.900411069393158, + "learning_rate": 0.0015, + "loss": 1.5744, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.6456978917121887, + "learning_rate": 0.0015, + "loss": 1.5759, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.6599090695381165, + "learning_rate": 0.0015, + "loss": 1.5817, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.8039332032203674, + "learning_rate": 0.0015, + "loss": 1.5822, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.5929813385009766, + "learning_rate": 0.0015, + "loss": 1.5788, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.5901683568954468, + "learning_rate": 0.0015, + "loss": 1.5762, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.5641725659370422, + "learning_rate": 0.0015, + "loss": 1.5796, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.8158353567123413, + "learning_rate": 0.0015, + "loss": 1.5798, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.6062980890274048, + "learning_rate": 0.0015, + "loss": 1.5776, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.6236847639083862, + "learning_rate": 0.0015, + "loss": 1.5721, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.8233560919761658, + "learning_rate": 0.0015, + "loss": 1.5822, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.5583575367927551, + "learning_rate": 0.0015, + "loss": 1.5671, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.624183177947998, + "learning_rate": 0.0015, + "loss": 1.5848, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.9696111679077148, + "learning_rate": 0.0015, + "loss": 1.5736, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.7765077352523804, + "learning_rate": 0.0015, + "loss": 1.5799, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.5858256816864014, + "learning_rate": 0.0015, + "loss": 1.5623, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.7113674283027649, + "learning_rate": 0.0015, + "loss": 1.5721, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.6105078458786011, + "learning_rate": 0.0015, + "loss": 1.5703, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.5850993990898132, + "learning_rate": 0.0015, + "loss": 1.571, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.5758876800537109, + "learning_rate": 0.0015, + "loss": 1.5656, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.937073290348053, + "learning_rate": 0.0015, + "loss": 1.5667, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.8201426863670349, + "learning_rate": 0.0015, + "loss": 1.5633, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.7046356797218323, + "learning_rate": 0.0015, + "loss": 1.5732, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.5730412602424622, + "learning_rate": 0.0015, + "loss": 1.5643, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.6409282684326172, + "learning_rate": 0.0015, + "loss": 1.5635, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6453495025634766, + "learning_rate": 0.0015, + "loss": 1.5704, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.583181619644165, + "learning_rate": 0.0015, + "loss": 1.5686, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.591073215007782, + "learning_rate": 0.0015, + "loss": 1.5657, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.7396721243858337, + "learning_rate": 0.0015, + "loss": 1.5672, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.6038991212844849, + "learning_rate": 0.0015, + "loss": 1.5564, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.5844075679779053, + "learning_rate": 0.0015, + "loss": 1.5652, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.608181357383728, + "learning_rate": 0.0015, + "loss": 1.5589, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.6012187600135803, + "learning_rate": 0.0015, + "loss": 1.5528, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.5790621638298035, + "learning_rate": 0.0015, + "loss": 1.5533, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.588222324848175, + "learning_rate": 0.0015, + "loss": 1.5533, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.6873937249183655, + "learning_rate": 0.0015, + "loss": 1.5546, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.6786103248596191, + "learning_rate": 0.0015, + "loss": 1.5508, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.6643319725990295, + "learning_rate": 0.0015, + "loss": 1.5603, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.6390370726585388, + "learning_rate": 0.0015, + "loss": 1.5569, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.6126376390457153, + "learning_rate": 0.0015, + "loss": 1.5368, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.9072438478469849, + "learning_rate": 0.0015, + "loss": 1.5517, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.5998278856277466, + "learning_rate": 0.0015, + "loss": 1.5553, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.7560575008392334, + "learning_rate": 0.0015, + "loss": 1.5626, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.6209825277328491, + "learning_rate": 0.0015, + "loss": 1.5596, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.5998792052268982, + "learning_rate": 0.0015, + "loss": 1.5538, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.5925680994987488, + "learning_rate": 0.0015, + "loss": 1.5511, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.6163322925567627, + "learning_rate": 0.0015, + "loss": 1.5482, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.5704933404922485, + "learning_rate": 0.0015, + "loss": 1.5517, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.7040290236473083, + "learning_rate": 0.0015, + "loss": 1.5578, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 1.134084701538086, + "learning_rate": 0.0015, + "loss": 1.5333, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.8534347414970398, + "learning_rate": 0.0015, + "loss": 1.536, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.6510926485061646, + "learning_rate": 0.0015, + "loss": 1.5402, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.6544208526611328, + "learning_rate": 0.0015, + "loss": 1.5485, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.8508293628692627, + "learning_rate": 0.0015, + "loss": 1.5484, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.727138102054596, + "learning_rate": 0.0015, + "loss": 1.5327, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.6009069085121155, + "learning_rate": 0.0015, + "loss": 1.5375, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.655423104763031, + "learning_rate": 0.0015, + "loss": 1.5445, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.6968240141868591, + "learning_rate": 0.0015, + "loss": 1.5421, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.6441303491592407, + "learning_rate": 0.0015, + "loss": 1.538, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.5854324102401733, + "learning_rate": 0.0015, + "loss": 1.5354, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.764230489730835, + "learning_rate": 0.0015, + "loss": 1.5394, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.5712303519248962, + "learning_rate": 0.0015, + "loss": 1.5532, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.7080296277999878, + "learning_rate": 0.0015, + "loss": 1.5318, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.8196799159049988, + "learning_rate": 0.0015, + "loss": 1.522, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.5653908252716064, + "learning_rate": 0.0015, + "loss": 1.5286, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.5506159067153931, + "learning_rate": 0.0015, + "loss": 1.5425, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.5746380686759949, + "learning_rate": 0.0015, + "loss": 1.5282, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.6572897434234619, + "learning_rate": 0.0015, + "loss": 1.5325, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.8061972856521606, + "learning_rate": 0.0015, + "loss": 1.5413, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6429711580276489, + "learning_rate": 0.0015, + "loss": 1.5306, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.7164819240570068, + "learning_rate": 0.0015, + "loss": 1.5265, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.6932748556137085, + "learning_rate": 0.0015, + "loss": 1.5249, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5477355718612671, + "learning_rate": 0.0015, + "loss": 1.537, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.7214810848236084, + "learning_rate": 0.0015, + "loss": 1.5272, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.552919328212738, + "learning_rate": 0.0015, + "loss": 1.5181, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.5526236295700073, + "learning_rate": 0.0015, + "loss": 1.5351, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.654093861579895, + "learning_rate": 0.0015, + "loss": 1.5216, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.5659670233726501, + "learning_rate": 0.0015, + "loss": 1.5279, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.6855720281600952, + "learning_rate": 0.0015, + "loss": 1.5274, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6791499257087708, + "learning_rate": 0.0015, + "loss": 1.5403, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.5190626382827759, + "learning_rate": 0.0015, + "loss": 1.5176, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.6249583959579468, + "learning_rate": 0.0015, + "loss": 1.5276, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5765541195869446, + "learning_rate": 0.0015, + "loss": 1.5267, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.5980014801025391, + "learning_rate": 0.0015, + "loss": 1.5295, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.7137629985809326, + "learning_rate": 0.0015, + "loss": 1.5256, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.7207750082015991, + "learning_rate": 0.0015, + "loss": 1.5235, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.6043395400047302, + "learning_rate": 0.0015, + "loss": 1.5185, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.6252288818359375, + "learning_rate": 0.0015, + "loss": 1.5332, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.671881377696991, + "learning_rate": 0.0015, + "loss": 1.5218, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.5771369338035583, + "learning_rate": 0.0015, + "loss": 1.5155, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.5985080003738403, + "learning_rate": 0.0015, + "loss": 1.497, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.5753167867660522, + "learning_rate": 0.0015, + "loss": 1.5201, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.6402337551116943, + "learning_rate": 0.0015, + "loss": 1.5114, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.5531371831893921, + "learning_rate": 0.0015, + "loss": 1.5299, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.7978616952896118, + "learning_rate": 0.0015, + "loss": 1.5292, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 1.0608034133911133, + "learning_rate": 0.0015, + "loss": 1.5115, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.5867448449134827, + "learning_rate": 0.0015, + "loss": 1.5211, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.650109052658081, + "learning_rate": 0.0015, + "loss": 1.5204, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.5901459455490112, + "learning_rate": 0.0015, + "loss": 1.5275, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.564175546169281, + "learning_rate": 0.0015, + "loss": 1.5155, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.612292468547821, + "learning_rate": 0.0015, + "loss": 1.5163, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.5642638802528381, + "learning_rate": 0.0015, + "loss": 1.5093, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.6836768984794617, + "learning_rate": 0.0015, + "loss": 1.5109, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.5777236819267273, + "learning_rate": 0.0015, + "loss": 1.5191, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.5549036264419556, + "learning_rate": 0.0015, + "loss": 1.5119, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.5068067908287048, + "learning_rate": 0.0015, + "loss": 1.5091, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 1.0121623277664185, + "learning_rate": 0.0015, + "loss": 1.5237, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.8498140573501587, + "learning_rate": 0.0015, + "loss": 1.5158, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5821650624275208, + "learning_rate": 0.0015, + "loss": 1.4927, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.5692257285118103, + "learning_rate": 0.0015, + "loss": 1.5085, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.5395897626876831, + "learning_rate": 0.0015, + "loss": 1.5076, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.6442540287971497, + "learning_rate": 0.0015, + "loss": 1.5048, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.757856547832489, + "learning_rate": 0.0015, + "loss": 1.4975, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.8908534646034241, + "learning_rate": 0.0015, + "loss": 1.5149, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.7035073041915894, + "learning_rate": 0.0015, + "loss": 1.5206, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.6767370104789734, + "learning_rate": 0.0015, + "loss": 1.5121, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.7631784081459045, + "learning_rate": 0.0015, + "loss": 1.4937, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.7743115425109863, + "learning_rate": 0.0015, + "loss": 1.5156, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.5623247623443604, + "learning_rate": 0.0015, + "loss": 1.5102, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.5539934635162354, + "learning_rate": 0.0015, + "loss": 1.52, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.8669719696044922, + "learning_rate": 0.0015, + "loss": 1.5121, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.7609106302261353, + "learning_rate": 0.0015, + "loss": 1.5022, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.5223315358161926, + "learning_rate": 0.0015, + "loss": 1.5022, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.6668152213096619, + "learning_rate": 0.0015, + "loss": 1.4971, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.6139928698539734, + "learning_rate": 0.0015, + "loss": 1.5007, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.5609003305435181, + "learning_rate": 0.0015, + "loss": 1.5118, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.6565743088722229, + "learning_rate": 0.0015, + "loss": 1.4974, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.59303879737854, + "learning_rate": 0.0015, + "loss": 1.5003, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.5740610361099243, + "learning_rate": 0.0015, + "loss": 1.4995, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.6683691740036011, + "learning_rate": 0.0015, + "loss": 1.5021, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.6576066613197327, + "learning_rate": 0.0015, + "loss": 1.5002, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.6867544651031494, + "learning_rate": 0.0015, + "loss": 1.5098, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.6396692395210266, + "learning_rate": 0.0015, + "loss": 1.5042, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.7358971834182739, + "learning_rate": 0.0015, + "loss": 1.5005, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.7496529221534729, + "learning_rate": 0.0015, + "loss": 1.4978, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.7263187766075134, + "learning_rate": 0.0015, + "loss": 1.5092, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6615262031555176, + "learning_rate": 0.0015, + "loss": 1.5096, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.5842890739440918, + "learning_rate": 0.0015, + "loss": 1.4985, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.5979508757591248, + "learning_rate": 0.0015, + "loss": 1.4786, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.6373541951179504, + "learning_rate": 0.0015, + "loss": 1.4962, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.5276340246200562, + "learning_rate": 0.0015, + "loss": 1.4974, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.6625521183013916, + "learning_rate": 0.0015, + "loss": 1.5011, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.9527451395988464, + "learning_rate": 0.0015, + "loss": 1.4941, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.577045738697052, + "learning_rate": 0.0015, + "loss": 1.5092, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5803396105766296, + "learning_rate": 0.0015, + "loss": 1.4919, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.8232485055923462, + "learning_rate": 0.0015, + "loss": 1.4985, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.7094146609306335, + "learning_rate": 0.0015, + "loss": 1.5032, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.5717623233795166, + "learning_rate": 0.0015, + "loss": 1.4909, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5873256921768188, + "learning_rate": 0.0015, + "loss": 1.4807, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.5121378898620605, + "learning_rate": 0.0015, + "loss": 1.4857, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.6827294826507568, + "learning_rate": 0.0015, + "loss": 1.5033, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.757856011390686, + "learning_rate": 0.0015, + "loss": 1.4858, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.7232317924499512, + "learning_rate": 0.0015, + "loss": 1.4996, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.5782884955406189, + "learning_rate": 0.0015, + "loss": 1.4885, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.5484322309494019, + "learning_rate": 0.0015, + "loss": 1.4819, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.5790377855300903, + "learning_rate": 0.0015, + "loss": 1.4803, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.9478213787078857, + "learning_rate": 0.0015, + "loss": 1.4817, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.6198221445083618, + "learning_rate": 0.0015, + "loss": 1.4852, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.5510705709457397, + "learning_rate": 0.0015, + "loss": 1.4816, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.6039211750030518, + "learning_rate": 0.0015, + "loss": 1.4861, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.6572819352149963, + "learning_rate": 0.0015, + "loss": 1.482, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.6410369277000427, + "learning_rate": 0.0015, + "loss": 1.487, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.5450556874275208, + "learning_rate": 0.0015, + "loss": 1.4878, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.5562349557876587, + "learning_rate": 0.0015, + "loss": 1.4784, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.5236467719078064, + "learning_rate": 0.0015, + "loss": 1.486, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.5885394215583801, + "learning_rate": 0.0015, + "loss": 1.4787, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.5541431903839111, + "learning_rate": 0.0015, + "loss": 1.4913, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.5044839978218079, + "learning_rate": 0.0015, + "loss": 1.4815, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.5214411020278931, + "learning_rate": 0.0015, + "loss": 1.4853, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.5292476415634155, + "learning_rate": 0.0015, + "loss": 1.4768, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5479418635368347, + "learning_rate": 0.0015, + "loss": 1.4874, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.5096611976623535, + "learning_rate": 0.0015, + "loss": 1.485, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.7310690879821777, + "learning_rate": 0.0015, + "loss": 1.4901, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.7201768755912781, + "learning_rate": 0.0015, + "loss": 1.4856, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.6559078097343445, + "learning_rate": 0.0015, + "loss": 1.4914, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.5437326431274414, + "learning_rate": 0.0015, + "loss": 1.4879, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.6245301365852356, + "learning_rate": 0.0015, + "loss": 1.48, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.76190584897995, + "learning_rate": 0.0015, + "loss": 1.4751, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.6346284747123718, + "learning_rate": 0.0015, + "loss": 1.4807, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.5197469592094421, + "learning_rate": 0.0015, + "loss": 1.4746, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.5903440117835999, + "learning_rate": 0.0015, + "loss": 1.476, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.5719578862190247, + "learning_rate": 0.0015, + "loss": 1.4832, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.5282468795776367, + "learning_rate": 0.0015, + "loss": 1.4761, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.634348452091217, + "learning_rate": 0.0015, + "loss": 1.4761, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5045709609985352, + "learning_rate": 0.0015, + "loss": 1.4732, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.5601181387901306, + "learning_rate": 0.0015, + "loss": 1.4782, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.6216740608215332, + "learning_rate": 0.0015, + "loss": 1.4799, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.5911687612533569, + "learning_rate": 0.0015, + "loss": 1.4684, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.6351373791694641, + "learning_rate": 0.0015, + "loss": 1.4785, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.8020367622375488, + "learning_rate": 0.0015, + "loss": 1.4766, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.7376629114151001, + "learning_rate": 0.0015, + "loss": 1.4748, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.6472592949867249, + "learning_rate": 0.0015, + "loss": 1.4762, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.6546268463134766, + "learning_rate": 0.0015, + "loss": 1.483, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.522511899471283, + "learning_rate": 0.0015, + "loss": 1.4714, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.8491767048835754, + "learning_rate": 0.0015, + "loss": 1.4682, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.5159468650817871, + "learning_rate": 0.0015, + "loss": 1.4784, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.8218620419502258, + "learning_rate": 0.0015, + "loss": 1.4759, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5754440426826477, + "learning_rate": 0.0015, + "loss": 1.4731, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.7513793110847473, + "learning_rate": 0.0015, + "loss": 1.4716, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.5255351066589355, + "learning_rate": 0.0015, + "loss": 1.4759, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.6012159585952759, + "learning_rate": 0.0015, + "loss": 1.4681, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.7910928726196289, + "learning_rate": 0.0015, + "loss": 1.4714, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.5239707827568054, + "learning_rate": 0.0015, + "loss": 1.4874, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.5172773003578186, + "learning_rate": 0.0015, + "loss": 1.4628, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.6210146546363831, + "learning_rate": 0.0015, + "loss": 1.4697, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5867434740066528, + "learning_rate": 0.0015, + "loss": 1.4665, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.6506653428077698, + "learning_rate": 0.0015, + "loss": 1.4698, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.5838228464126587, + "learning_rate": 0.0015, + "loss": 1.4849, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.7401859164237976, + "learning_rate": 0.0015, + "loss": 1.4671, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.5894936323165894, + "learning_rate": 0.0015, + "loss": 1.4717, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.7206613421440125, + "learning_rate": 0.0015, + "loss": 1.4685, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.672594428062439, + "learning_rate": 0.0015, + "loss": 1.4757, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.49490922689437866, + "learning_rate": 0.0015, + "loss": 1.4603, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.5114975571632385, + "learning_rate": 0.0015, + "loss": 1.471, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.596366286277771, + "learning_rate": 0.0015, + "loss": 1.4771, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.5820817351341248, + "learning_rate": 0.0015, + "loss": 1.4641, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.5126115083694458, + "learning_rate": 0.0015, + "loss": 1.4645, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.5245903134346008, + "learning_rate": 0.0015, + "loss": 1.4679, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.5689454674720764, + "learning_rate": 0.0015, + "loss": 1.4523, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.5443896055221558, + "learning_rate": 0.0015, + "loss": 1.4732, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.5211290717124939, + "learning_rate": 0.0015, + "loss": 1.4715, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.5262769460678101, + "learning_rate": 0.0015, + "loss": 1.4684, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.5398851037025452, + "learning_rate": 0.0015, + "loss": 1.4587, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.7674636840820312, + "learning_rate": 0.0015, + "loss": 1.4421, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.6799883842468262, + "learning_rate": 0.0015, + "loss": 1.4659, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.6476281881332397, + "learning_rate": 0.0015, + "loss": 1.4518, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.5203509330749512, + "learning_rate": 0.0015, + "loss": 1.473, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.8790157437324524, + "learning_rate": 0.0015, + "loss": 1.4751, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.5221405029296875, + "learning_rate": 0.0015, + "loss": 1.4508, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.5264320969581604, + "learning_rate": 0.0015, + "loss": 1.4686, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.5238685607910156, + "learning_rate": 0.0015, + "loss": 1.469, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.6246622204780579, + "learning_rate": 0.0015, + "loss": 1.4701, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.5449501872062683, + "learning_rate": 0.0015, + "loss": 1.4587, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.6455876231193542, + "learning_rate": 0.0015, + "loss": 1.4572, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5345314741134644, + "learning_rate": 0.0015, + "loss": 1.4561, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.682424783706665, + "learning_rate": 0.0015, + "loss": 1.4515, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.574175238609314, + "learning_rate": 0.0015, + "loss": 1.4604, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.593390166759491, + "learning_rate": 0.0015, + "loss": 1.462, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.6850962042808533, + "learning_rate": 0.0015, + "loss": 1.4572, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.585054874420166, + "learning_rate": 0.0015, + "loss": 1.4569, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.5914109349250793, + "learning_rate": 0.0015, + "loss": 1.4559, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.8646875023841858, + "learning_rate": 0.0015, + "loss": 1.4754, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.5117290616035461, + "learning_rate": 0.0015, + "loss": 1.459, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.47725585103034973, + "learning_rate": 0.0015, + "loss": 1.4595, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.4821884334087372, + "learning_rate": 0.0015, + "loss": 1.4676, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.7134470343589783, + "learning_rate": 0.0015, + "loss": 1.4519, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.5524974465370178, + "learning_rate": 0.0015, + "loss": 1.4559, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.5358974933624268, + "learning_rate": 0.0015, + "loss": 1.4546, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.7514449954032898, + "learning_rate": 0.0015, + "loss": 1.474, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.5801400542259216, + "learning_rate": 0.0015, + "loss": 1.4584, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.5107520818710327, + "learning_rate": 0.0015, + "loss": 1.4498, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.503389298915863, + "learning_rate": 0.0015, + "loss": 1.4641, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.6112860441207886, + "learning_rate": 0.0015, + "loss": 1.4503, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.7044478058815002, + "learning_rate": 0.0015, + "loss": 1.444, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.9283525943756104, + "learning_rate": 0.0015, + "loss": 1.4676, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.5343573093414307, + "learning_rate": 0.0015, + "loss": 1.4547, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.5741806030273438, + "learning_rate": 0.0015, + "loss": 1.4633, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.49245160818099976, + "learning_rate": 0.0015, + "loss": 1.4589, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 1.0656530857086182, + "learning_rate": 0.0015, + "loss": 1.4482, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.8917819261550903, + "learning_rate": 0.0015, + "loss": 1.4562, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.5806669592857361, + "learning_rate": 0.0015, + "loss": 1.4565, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.6887030005455017, + "learning_rate": 0.0015, + "loss": 1.4625, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.6455850005149841, + "learning_rate": 0.0015, + "loss": 1.443, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.6216683983802795, + "learning_rate": 0.0015, + "loss": 1.454, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.5712945461273193, + "learning_rate": 0.0015, + "loss": 1.4471, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.4948515295982361, + "learning_rate": 0.0015, + "loss": 1.4468, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.5773718357086182, + "learning_rate": 0.0015, + "loss": 1.4607, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.5605436563491821, + "learning_rate": 0.0015, + "loss": 1.451, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5638588666915894, + "learning_rate": 0.0015, + "loss": 1.4471, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.4892915189266205, + "learning_rate": 0.0015, + "loss": 1.4307, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5163865089416504, + "learning_rate": 0.0015, + "loss": 1.4585, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.5103743076324463, + "learning_rate": 0.0015, + "loss": 1.4489, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.6309067606925964, + "learning_rate": 0.0015, + "loss": 1.4599, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.6969106197357178, + "learning_rate": 0.0015, + "loss": 1.4496, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.6979928016662598, + "learning_rate": 0.0015, + "loss": 1.4631, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.5774954557418823, + "learning_rate": 0.0015, + "loss": 1.4531, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.5764656662940979, + "learning_rate": 0.0015, + "loss": 1.4368, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.6822845935821533, + "learning_rate": 0.0015, + "loss": 1.4515, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.6386136412620544, + "learning_rate": 0.0015, + "loss": 1.4481, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5767738223075867, + "learning_rate": 0.0015, + "loss": 1.4451, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.6771249771118164, + "learning_rate": 0.0015, + "loss": 1.46, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.5186454057693481, + "learning_rate": 0.0015, + "loss": 1.455, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5029733777046204, + "learning_rate": 0.0015, + "loss": 1.4483, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5092059373855591, + "learning_rate": 0.0015, + "loss": 1.448, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.6321627497673035, + "learning_rate": 0.0015, + "loss": 1.4585, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.5579228401184082, + "learning_rate": 0.0015, + "loss": 1.4437, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.6607871651649475, + "learning_rate": 0.0015, + "loss": 1.4559, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.8128629326820374, + "learning_rate": 0.0015, + "loss": 1.4422, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.5744751691818237, + "learning_rate": 0.0015, + "loss": 1.4501, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.49478304386138916, + "learning_rate": 0.0015, + "loss": 1.4443, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.5468266010284424, + "learning_rate": 0.0015, + "loss": 1.4515, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.8689888715744019, + "learning_rate": 0.0015, + "loss": 1.4417, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.5586831569671631, + "learning_rate": 0.0015, + "loss": 1.449, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.49822619557380676, + "learning_rate": 0.0015, + "loss": 1.4535, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.779325008392334, + "learning_rate": 0.0015, + "loss": 1.4429, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.525687038898468, + "learning_rate": 0.0015, + "loss": 1.445, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.7627896070480347, + "learning_rate": 0.0015, + "loss": 1.4522, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.7745131254196167, + "learning_rate": 0.0015, + "loss": 1.4402, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.7163421511650085, + "learning_rate": 0.0015, + "loss": 1.4404, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.4869208335876465, + "learning_rate": 0.0015, + "loss": 1.4489, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.5700405240058899, + "learning_rate": 0.0015, + "loss": 1.4406, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.4926275908946991, + "learning_rate": 0.0015, + "loss": 1.4348, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.6980654001235962, + "learning_rate": 0.0015, + "loss": 1.4462, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.4659419655799866, + "learning_rate": 0.0015, + "loss": 1.4437, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.5911593437194824, + "learning_rate": 0.0015, + "loss": 1.4395, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.6820240616798401, + "learning_rate": 0.0015, + "loss": 1.4387, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.5553368926048279, + "learning_rate": 0.0015, + "loss": 1.439, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.5284712910652161, + "learning_rate": 0.0015, + "loss": 1.4402, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.4866778552532196, + "learning_rate": 0.0015, + "loss": 1.4353, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.6628943085670471, + "learning_rate": 0.0015, + "loss": 1.4335, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.5917623043060303, + "learning_rate": 0.0015, + "loss": 1.4297, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.5535296201705933, + "learning_rate": 0.0015, + "loss": 1.4389, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.7727319598197937, + "learning_rate": 0.0015, + "loss": 1.4402, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.7488126754760742, + "learning_rate": 0.0015, + "loss": 1.441, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.49722662568092346, + "learning_rate": 0.0015, + "loss": 1.4451, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.5322569012641907, + "learning_rate": 0.0015, + "loss": 1.4477, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.5261563062667847, + "learning_rate": 0.0015, + "loss": 1.4374, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.7318642735481262, + "learning_rate": 0.0015, + "loss": 1.444, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.5357314944267273, + "learning_rate": 0.0015, + "loss": 1.4344, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.8815802931785583, + "learning_rate": 0.0015, + "loss": 1.4308, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.7622281908988953, + "learning_rate": 0.0015, + "loss": 1.4353, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 1.0153924226760864, + "learning_rate": 0.0015, + "loss": 1.4541, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.4632505178451538, + "learning_rate": 0.0015, + "loss": 1.445, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.4806533753871918, + "learning_rate": 0.0015, + "loss": 1.4308, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.5095115900039673, + "learning_rate": 0.0015, + "loss": 1.4322, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.5210410952568054, + "learning_rate": 0.0015, + "loss": 1.4353, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.916880190372467, + "learning_rate": 0.0015, + "loss": 1.4326, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.5186699032783508, + "learning_rate": 0.0015, + "loss": 1.4402, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.5431554913520813, + "learning_rate": 0.0015, + "loss": 1.4485, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.795992910861969, + "learning_rate": 0.0015, + "loss": 1.4392, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.6337339282035828, + "learning_rate": 0.0015, + "loss": 1.4493, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.5427796244621277, + "learning_rate": 0.0015, + "loss": 1.4438, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.8991336226463318, + "learning_rate": 0.0015, + "loss": 1.4417, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.46260586380958557, + "learning_rate": 0.0015, + "loss": 1.4432, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.5012056827545166, + "learning_rate": 0.0015, + "loss": 1.4457, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.8590911030769348, + "learning_rate": 0.0015, + "loss": 1.4259, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.6026596426963806, + "learning_rate": 0.0015, + "loss": 1.4315, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.500627338886261, + "learning_rate": 0.0015, + "loss": 1.4334, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5152232050895691, + "learning_rate": 0.0015, + "loss": 1.4378, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.640267014503479, + "learning_rate": 0.0015, + "loss": 1.4349, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.4892360270023346, + "learning_rate": 0.0015, + "loss": 1.4318, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.5285003781318665, + "learning_rate": 0.0015, + "loss": 1.408, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.6576545238494873, + "learning_rate": 0.0015, + "loss": 1.4222, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.5402347445487976, + "learning_rate": 0.0015, + "loss": 1.4228, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.5078423619270325, + "learning_rate": 0.0015, + "loss": 1.4292, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.5238718390464783, + "learning_rate": 0.0015, + "loss": 1.4272, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.5361505150794983, + "learning_rate": 0.0015, + "loss": 1.4318, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.518470823764801, + "learning_rate": 0.0015, + "loss": 1.4362, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.5448722839355469, + "learning_rate": 0.0015, + "loss": 1.4482, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.6580650806427002, + "learning_rate": 0.0015, + "loss": 1.4313, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.5401043891906738, + "learning_rate": 0.0015, + "loss": 1.4419, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.5187148451805115, + "learning_rate": 0.0015, + "loss": 1.4435, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.5287416577339172, + "learning_rate": 0.0015, + "loss": 1.433, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.47637939453125, + "learning_rate": 0.0015, + "loss": 1.4278, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.5101406574249268, + "learning_rate": 0.0015, + "loss": 1.4351, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.5212313532829285, + "learning_rate": 0.0015, + "loss": 1.4282, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.49211445450782776, + "learning_rate": 0.0015, + "loss": 1.4304, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.71826171875, + "learning_rate": 0.0015, + "loss": 1.4259, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.7862614393234253, + "learning_rate": 0.0015, + "loss": 1.4205, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.6212243437767029, + "learning_rate": 0.0015, + "loss": 1.4265, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.5889667868614197, + "learning_rate": 0.0015, + "loss": 1.4336, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.5171526670455933, + "learning_rate": 0.0015, + "loss": 1.4217, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.4913538098335266, + "learning_rate": 0.0015, + "loss": 1.4293, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5577138066291809, + "learning_rate": 0.0015, + "loss": 1.4321, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5826645493507385, + "learning_rate": 0.0015, + "loss": 1.417, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.7300624251365662, + "learning_rate": 0.0015, + "loss": 1.4326, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.5512855648994446, + "learning_rate": 0.0015, + "loss": 1.4392, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.4720965325832367, + "learning_rate": 0.0015, + "loss": 1.4295, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.7857840657234192, + "learning_rate": 0.0015, + "loss": 1.4315, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.5015669465065002, + "learning_rate": 0.0015, + "loss": 1.4127, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.5542659759521484, + "learning_rate": 0.0015, + "loss": 1.433, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.5255542993545532, + "learning_rate": 0.0015, + "loss": 1.4176, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.5467193126678467, + "learning_rate": 0.0015, + "loss": 1.4318, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.6633917093276978, + "learning_rate": 0.0015, + "loss": 1.4249, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.4869944155216217, + "learning_rate": 0.0015, + "loss": 1.4248, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.6345862150192261, + "learning_rate": 0.0015, + "loss": 1.4247, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.5846347808837891, + "learning_rate": 0.0015, + "loss": 1.4155, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.5600190758705139, + "learning_rate": 0.0015, + "loss": 1.4285, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.5569438338279724, + "learning_rate": 0.0015, + "loss": 1.4262, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.5091149806976318, + "learning_rate": 0.0015, + "loss": 1.4236, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.9297945499420166, + "learning_rate": 0.0015, + "loss": 1.4242, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.5270900130271912, + "learning_rate": 0.0015, + "loss": 1.435, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.5317861437797546, + "learning_rate": 0.0015, + "loss": 1.4305, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.5058794021606445, + "learning_rate": 0.0014834368975312174, + "loss": 1.4039, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.5960896611213684, + "learning_rate": 0.0014629899726345957, + "loss": 1.4285, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.5103433728218079, + "learning_rate": 0.0014428248775471316, + "loss": 1.4316, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.7470860481262207, + "learning_rate": 0.00142293772767289, + "loss": 1.4222, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.5231701731681824, + "learning_rate": 0.001403324691959192, + "loss": 1.4167, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.47347888350486755, + "learning_rate": 0.0013839819921586025, + "loss": 1.4235, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.6941648125648499, + "learning_rate": 0.0013649059021010894, + "loss": 1.4119, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.5365994572639465, + "learning_rate": 0.0013460927469762154, + "loss": 1.4132, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.506763219833374, + "learning_rate": 0.0013275389026252255, + "loss": 1.4223, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.511383593082428, + "learning_rate": 0.0013092407948428887, + "loss": 1.4082, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.7219920754432678, + "learning_rate": 0.001291194898688966, + "loss": 1.4188, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.5096638798713684, + "learning_rate": 0.001273397737809166, + "loss": 1.414, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.4544900059700012, + "learning_rate": 0.001255845883765463, + "loss": 1.4105, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.6520730257034302, + "learning_rate": 0.001238535955375642, + "loss": 1.4045, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.5283231735229492, + "learning_rate": 0.0012214646180619506, + "loss": 1.4023, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.48720583319664, + "learning_rate": 0.001204628583208727, + "loss": 1.3973, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.5155741572380066, + "learning_rate": 0.0011880246075288827, + "loss": 1.4046, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.5268201231956482, + "learning_rate": 0.001171649492439115, + "loss": 1.3973, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.49016422033309937, + "learning_rate": 0.0011555000834437364, + "loss": 1.4011, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.5848821401596069, + "learning_rate": 0.0011395732695269908, + "loss": 1.4003, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.5101801753044128, + "learning_rate": 0.0011238659825537505, + "loss": 1.3787, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5121952295303345, + "learning_rate": 0.0011083751966784717, + "loss": 1.3821, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 1.0398744344711304, + "learning_rate": 0.0010930979277622953, + "loss": 1.3971, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.5912628769874573, + "learning_rate": 0.0010780312327981854, + "loss": 1.3963, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.47290322184562683, + "learning_rate": 0.0010631722093439888, + "loss": 1.3885, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.4813903272151947, + "learning_rate": 0.00104851799496331, + "loss": 1.3794, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.4812224209308624, + "learning_rate": 0.0010340657666740914, + "loss": 1.3879, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.6159099340438843, + "learning_rate": 0.0010198127404047975, + "loss": 1.3715, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.528146505355835, + "learning_rate": 0.0010057561704580897, + "loss": 1.3755, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.5106189846992493, + "learning_rate": 0.0009918933489818985, + "loss": 1.3873, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.46552208065986633, + "learning_rate": 0.0009782216054477827, + "loss": 1.3795, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.6433939337730408, + "learning_rate": 0.0009647383061364801, + "loss": 1.386, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.45110300183296204, + "learning_rate": 0.0009514408536305495, + "loss": 1.3803, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.47798672318458557, + "learning_rate": 0.0009383266863140042, + "loss": 1.3944, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.484041690826416, + "learning_rate": 0.000925393277878844, + "loss": 1.3933, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.4432898163795471, + "learning_rate": 0.0009126381368383879, + "loss": 1.3769, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.5166557431221008, + "learning_rate": 0.0009000588060473156, + "loss": 1.3702, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.554893434047699, + "learning_rate": 0.0008876528622283235, + "loss": 1.3785, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.4747559130191803, + "learning_rate": 0.0008754179155053053, + "loss": 1.3695, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.5023005604743958, + "learning_rate": 0.0008633516089429683, + "loss": 1.3726, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.4733406901359558, + "learning_rate": 0.0008514516180927928, + "loss": 1.3682, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.5170013904571533, + "learning_rate": 0.0008397156505452524, + "loss": 1.365, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.5229912400245667, + "learning_rate": 0.0008281414454882051, + "loss": 1.3693, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.5185620188713074, + "learning_rate": 0.0008167267732713704, + "loss": 1.3725, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.44782182574272156, + "learning_rate": 0.0008054694349768117, + "loss": 1.3589, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.4861155152320862, + "learning_rate": 0.0007943672619953359, + "loss": 1.3664, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.5309790372848511, + "learning_rate": 0.0007834181156087356, + "loss": 1.3607, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.583339512348175, + "learning_rate": 0.0007726198865777852, + "loss": 1.3637, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.4820302426815033, + "learning_rate": 0.0007619704947359191, + "loss": 1.3567, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.4411340653896332, + "learning_rate": 0.0007514678885885087, + "loss": 1.3555, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.5487621426582336, + "learning_rate": 0.0007411100449176633, + "loss": 1.3593, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.45803865790367126, + "learning_rate": 0.0007308949683924791, + "loss": 1.361, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.5200080871582031, + "learning_rate": 0.000720820691184658, + "loss": 1.3527, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.5422410368919373, + "learning_rate": 0.0007108852725894269, + "loss": 1.355, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.549124538898468, + "learning_rate": 0.000701086798651681, + "loss": 1.3548, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.6717361807823181, + "learning_rate": 0.0006914233817972798, + "loss": 1.3441, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.47141385078430176, + "learning_rate": 0.0006818931604694261, + "loss": 1.3531, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.5401121377944946, + "learning_rate": 0.0006724942987700563, + "loss": 1.3554, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.45940276980400085, + "learning_rate": 0.0006632249861061732, + "loss": 1.3587, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.6030657291412354, + "learning_rate": 0.0006540834368410549, + "loss": 1.3511, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.5427432060241699, + "learning_rate": 0.0006450678899502701, + "loss": 1.3525, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.47614455223083496, + "learning_rate": 0.0006361766086824345, + "loss": 1.3521, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.521483302116394, + "learning_rate": 0.000627407880224645, + "loss": 1.3564, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.4478306174278259, + "learning_rate": 0.0006187600153725225, + "loss": 1.3411, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.7660099864006042, + "learning_rate": 0.0006102313482048055, + "loss": 1.3471, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5013904571533203, + "learning_rate": 0.0006018202357624274, + "loss": 1.3473, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.5144420266151428, + "learning_rate": 0.0005935250577320168, + "loss": 1.3428, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.5867617726325989, + "learning_rate": 0.0005853442161337618, + "loss": 1.3347, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.4562567472457886, + "learning_rate": 0.0005772761350135759, + "loss": 1.3429, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.5033745765686035, + "learning_rate": 0.0005693192601395058, + "loss": 1.3374, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.458080530166626, + "learning_rate": 0.000561472058702326, + "loss": 1.3334, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5429106950759888, + "learning_rate": 0.000553733019020258, + "loss": 1.3422, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.4628061056137085, + "learning_rate": 0.0005461006502477612, + "loss": 1.3305, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.48667672276496887, + "learning_rate": 0.0005385734820883369, + "loss": 1.3325, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.48949795961380005, + "learning_rate": 0.0005311500645112907, + "loss": 1.3571, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.5208607316017151, + "learning_rate": 0.0005238289674723993, + "loss": 1.3346, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.44086530804634094, + "learning_rate": 0.0005166087806384274, + "loss": 1.3396, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.49967527389526367, + "learning_rate": 0.0005094881131154418, + "loss": 1.3422, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.46255460381507874, + "learning_rate": 0.0005024655931808696, + "loss": 1.339, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.458722323179245, + "learning_rate": 0.0004955398680192508, + "loss": 1.3294, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.45329201221466064, + "learning_rate": 0.000488709603461632, + "loss": 1.3266, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.48035183548927307, + "learning_rate": 0.000481973483728553, + "loss": 1.3301, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.4558931291103363, + "learning_rate": 0.0004753302111765748, + "loss": 1.3267, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.48222649097442627, + "learning_rate": 0.0004687785060483032, + "loss": 1.3419, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.4744454324245453, + "learning_rate": 0.0004623171062258558, + "loss": 1.3102, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.6059104204177856, + "learning_rate": 0.0004559447669877288, + "loss": 1.3238, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.483710914850235, + "learning_rate": 0.00044966026076901413, + "loss": 1.3307, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.7889571785926819, + "learning_rate": 0.00044346237692492177, + "loss": 1.3351, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.4923046827316284, + "learning_rate": 0.0004373499214975615, + "loss": 1.3228, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.5098389387130737, + "learning_rate": 0.0004313217169859396, + "loss": 1.3265, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.525782585144043, + "learning_rate": 0.0004253766021191256, + "loss": 1.3322, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.45359766483306885, + "learning_rate": 0.00041951343163254497, + "loss": 1.3282, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.5485321283340454, + "learning_rate": 0.00041373107604735626, + "loss": 1.326, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.45252397656440735, + "learning_rate": 0.0004080284214528687, + "loss": 1.3231, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.46911129355430603, + "learning_rate": 0.0004024043692919589, + "loss": 1.3321, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.6181014776229858, + "learning_rate": 0.0003968578361494449, + "loss": 1.3265, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.4632599353790283, + "learning_rate": 0.000391387753543378, + "loss": 1.3333, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.47347453236579895, + "learning_rate": 0.00038599306771921023, + "loss": 1.3179, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.5055296421051025, + "learning_rate": 0.0003806727394468004, + "loss": 1.3138, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.49436309933662415, + "learning_rate": 0.0003754257438202162, + "loss": 1.3251, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.5135729312896729, + "learning_rate": 0.0003702510700602974, + "loss": 1.3313, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.474187433719635, + "learning_rate": 0.0003651477213199393, + "loss": 1.3079, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.45640918612480164, + "learning_rate": 0.000360114714492061, + "loss": 1.3116, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.48335134983062744, + "learning_rate": 0.0003551510800202195, + "loss": 1.3161, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.6336145401000977, + "learning_rate": 0.0003502558617118353, + "loss": 1.3199, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.45102179050445557, + "learning_rate": 0.0003454281165539914, + "loss": 1.3327, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.4698895215988159, + "learning_rate": 0.00034066691453177176, + "loss": 1.3291, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.48220446705818176, + "learning_rate": 0.0003359713384491037, + "loss": 1.3238, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.45443829894065857, + "learning_rate": 0.00033134048375206944, + "loss": 1.3214, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.47124338150024414, + "learning_rate": 0.0003267734583546536, + "loss": 1.3166, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.451641321182251, + "learning_rate": 0.00032226938246689157, + "loss": 1.3144, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.45359188318252563, + "learning_rate": 0.0003178273884253874, + "loss": 1.3152, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.5116612315177917, + "learning_rate": 0.0003134466205261674, + "loss": 1.3269, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.48015034198760986, + "learning_rate": 0.0003091262348598378, + "loss": 1.3301, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.45035305619239807, + "learning_rate": 0.0003048653991490141, + "loss": 1.3112, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.4732269048690796, + "learning_rate": 0.00030066329258799187, + "loss": 1.3067, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.5562229156494141, + "learning_rate": 0.0002965191056846266, + "loss": 1.3113, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.5415487885475159, + "learning_rate": 0.000292432040104394, + "loss": 1.3091, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.4745350778102875, + "learning_rate": 0.00028840130851659853, + "loss": 1.3042, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.4584297239780426, + "learning_rate": 0.0002844261344427028, + "loss": 1.3122, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5179589986801147, + "learning_rate": 0.0002805057521067471, + "loss": 1.3135, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.4719301760196686, + "learning_rate": 0.00027663940628783017, + "loss": 1.3013, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.45558246970176697, + "learning_rate": 0.00027282635217462393, + "loss": 1.3106, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.47958579659461975, + "learning_rate": 0.0002690658552218937, + "loss": 1.3187, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.44435548782348633, + "learning_rate": 0.00026535719100899516, + "loss": 1.2973, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.4782116115093231, + "learning_rate": 0.00026169964510032245, + "loss": 1.3045, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.4518148601055145, + "learning_rate": 0.00025809251290767984, + "loss": 1.2897, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.4496789872646332, + "learning_rate": 0.00025453509955454957, + "loss": 1.2964, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.44662004709243774, + "learning_rate": 0.00025102671974223175, + "loss": 1.2985, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.45507729053497314, + "learning_rate": 0.00024756669761782815, + "loss": 1.3085, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.4831513464450836, + "learning_rate": 0.0002441543666440464, + "loss": 1.3018, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.4986232817173004, + "learning_rate": 0.00024078906947079878, + "loss": 1.3051, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.4647130072116852, + "learning_rate": 0.00023747015780857005, + "loss": 1.3119, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.4940994679927826, + "learning_rate": 0.00023419699230353144, + "loss": 1.3066, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.5053297877311707, + "learning_rate": 0.00023096894241437586, + "loss": 1.3132, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.43774738907814026, + "learning_rate": 0.00022778538629085056, + "loss": 1.2967, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.4893394410610199, + "learning_rate": 0.00022464571065396427, + "loss": 1.2988, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.4648034870624542, + "learning_rate": 0.00022154931067784521, + "loss": 1.2975, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.4623718857765198, + "learning_rate": 0.00021849558987322782, + "loss": 1.2934, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.49164512753486633, + "learning_rate": 0.0002154839599725452, + "loss": 1.297, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.4767402410507202, + "learning_rate": 0.00021251384081660544, + "loss": 1.3072, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.45326516032218933, + "learning_rate": 0.0002095846602428303, + "loss": 1.3039, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.4626515805721283, + "learning_rate": 0.00020669585397503358, + "loss": 1.2943, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.5333366990089417, + "learning_rate": 0.0002038468655147195, + "loss": 1.2978, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.46550512313842773, + "learning_rate": 0.00020103714603387894, + "loss": 1.3115, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.471002995967865, + "learning_rate": 0.00019826615426926338, + "loss": 1.2836, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.4654116630554199, + "learning_rate": 0.00019553335641811625, + "loss": 1.3087, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.4869087040424347, + "learning_rate": 0.0001928382260353415, + "loss": 1.2974, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.45881882309913635, + "learning_rate": 0.00019018024393208902, + "loss": 1.3155, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.46567991375923157, + "learning_rate": 0.00018755889807573872, + "loss": 1.2918, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.45518314838409424, + "learning_rate": 0.00018497368349126262, + "loss": 1.2962, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.5840961933135986, + "learning_rate": 0.00018242410216394648, + "loss": 1.3106, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.5052498579025269, + "learning_rate": 0.0001799096629434529, + "loss": 1.2875, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.4697405695915222, + "learning_rate": 0.00017742988144920578, + "loss": 1.2954, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.6029151678085327, + "learning_rate": 0.00017498427997707976, + "loss": 1.2945, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.45356860756874084, + "learning_rate": 0.00017257238740737548, + "loss": 1.2984, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.4896215796470642, + "learning_rate": 0.00017019373911406307, + "loss": 1.3037, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.4377332329750061, + "learning_rate": 0.000167847876875277, + "loss": 1.304, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.49951452016830444, + "learning_rate": 0.00016553434878504428, + "loss": 1.2869, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.5342950820922852, + "learning_rate": 0.00016325270916622947, + "loss": 1.2892, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.47930166125297546, + "learning_rate": 0.00016100251848467966, + "loss": 1.2981, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.5480371117591858, + "learning_rate": 0.0001587833432645528, + "loss": 1.2891, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.5335478186607361, + "learning_rate": 0.00015659475600481292, + "loss": 1.3058, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.4627496898174286, + "learning_rate": 0.00015443633509687688, + "loss": 1.2934, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.4837168753147125, + "learning_rate": 0.00015230766474339536, + "loss": 1.2941, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.4632161557674408, + "learning_rate": 0.00015020833487815416, + "loss": 1.3028, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.462186723947525, + "learning_rate": 0.0001481379410870792, + "loss": 1.2894, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.45506542921066284, + "learning_rate": 0.00014609608453033013, + "loss": 1.2765, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.46044138073921204, + "learning_rate": 0.00014408237186546807, + "loss": 1.2864, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.4451313018798828, + "learning_rate": 0.00014209641517168273, + "loss": 1.2744, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5132520198822021, + "learning_rate": 0.00014013783187506265, + "loss": 1.2894, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.48073527216911316, + "learning_rate": 0.00013820624467489697, + "loss": 1.3076, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.4535788297653198, + "learning_rate": 0.00013630128147099213, + "loss": 1.303, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.4657829701900482, + "learning_rate": 0.00013442257529199068, + "loss": 1.2821, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.4929972290992737, + "learning_rate": 0.00013256976422467803, + "loss": 1.2925, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.49591654539108276, + "learning_rate": 0.00013074249134426366, + "loss": 1.2919, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.47268256545066833, + "learning_rate": 0.0001289404046456233, + "loss": 1.3015, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.480153888463974, + "learning_rate": 0.0001271631569754887, + "loss": 1.2945, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.4759581685066223, + "learning_rate": 0.0001254104059655723, + "loss": 1.2985, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.42623016238212585, + "learning_rate": 0.00012368181396661337, + "loss": 1.2832, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.46416598558425903, + "learning_rate": 0.00012197704798333364, + "loss": 1.2832, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.4567941129207611, + "learning_rate": 0.00012029577961028894, + "loss": 1.2829, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.570604145526886, + "learning_rate": 0.00011863768496860542, + "loss": 1.2949, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.45113933086395264, + "learning_rate": 0.00011700244464358777, + "loss": 1.2881, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.49941763281822205, + "learning_rate": 0.00011538974362318715, + "loss": 1.2957, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.4934632182121277, + "learning_rate": 0.00011379927123731737, + "loss": 1.2884, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.4868738651275635, + "learning_rate": 0.0001122307210980077, + "loss": 1.2876, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.4697803258895874, + "learning_rate": 0.00011068379104038026, + "loss": 1.2961, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.44700032472610474, + "learning_rate": 0.00010915818306444116, + "loss": 1.2803, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.44790133833885193, + "learning_rate": 0.00010765360327767384, + "loss": 1.2904, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.4414917528629303, + "learning_rate": 0.00010616976183842376, + "loss": 1.2914, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.4861648976802826, + "learning_rate": 0.00010470637290006365, + "loss": 1.2938, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.46201643347740173, + "learning_rate": 0.00010326315455592764, + "loss": 1.284, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.5016726851463318, + "learning_rate": 0.0001018398287850053, + "loss": 1.2758, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.4872339963912964, + "learning_rate": 0.00010043612139838357, + "loss": 1.2959, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.4454995095729828, + "learning_rate": 9.905176198642719e-05, + "loss": 1.2849, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.45504704117774963, + "learning_rate": 9.76864838666871e-05, + "loss": 1.2873, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.4604946970939636, + "learning_rate": 9.634002403252676e-05, + "loss": 1.2822, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.43320751190185547, + "learning_rate": 9.501212310245681e-05, + "loss": 1.2803, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.4747781753540039, + "learning_rate": 9.370252527016777e-05, + "loss": 1.292, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.4684956669807434, + "learning_rate": 9.241097825525163e-05, + "loss": 1.2824, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.4697885513305664, + "learning_rate": 9.113723325460276e-05, + "loss": 1.289, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.57652747631073, + "learning_rate": 8.988104489448849e-05, + "loss": 1.2845, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.46039921045303345, + "learning_rate": 8.864217118328042e-05, + "loss": 1.2937, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.45358946919441223, + "learning_rate": 8.742037346483729e-05, + "loss": 1.2864, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.46776285767555237, + "learning_rate": 8.62154163725303e-05, + "loss": 1.2926, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.4323977828025818, + "learning_rate": 8.502706778390219e-05, + "loss": 1.2901, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.4356761574745178, + "learning_rate": 8.38550987759513e-05, + "loss": 1.2866, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.45652660727500916, + "learning_rate": 8.269928358103191e-05, + "loss": 1.3016, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.4988255798816681, + "learning_rate": 8.155939954336243e-05, + "loss": 1.2945, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.4944089949131012, + "learning_rate": 8.043522707613312e-05, + "loss": 1.2897, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.44518062472343445, + "learning_rate": 7.932654961920486e-05, + "loss": 1.2724, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.49174994230270386, + "learning_rate": 7.823315359739135e-05, + "loss": 1.2732, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.4520399272441864, + "learning_rate": 7.715482837931577e-05, + "loss": 1.2987, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.4695223271846771, + "learning_rate": 7.6091366236835e-05, + "loss": 1.2764, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4494788944721222, + "learning_rate": 7.504256230502289e-05, + "loss": 1.2933, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.4499630630016327, + "learning_rate": 7.400821454270524e-05, + "loss": 1.2884, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.4443351924419403, + "learning_rate": 7.29881236935386e-05, + "loss": 1.2758, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.45193472504615784, + "learning_rate": 7.198209324762562e-05, + "loss": 1.2798, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.4622740149497986, + "learning_rate": 7.098992940365946e-05, + "loss": 1.2762, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.442023366689682, + "learning_rate": 7.001144103159e-05, + "loss": 1.2863, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.45359861850738525, + "learning_rate": 6.904643963580461e-05, + "loss": 1.2924, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.44865256547927856, + "learning_rate": 6.809473931881644e-05, + "loss": 1.2812, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.43512946367263794, + "learning_rate": 6.71561567454532e-05, + "loss": 1.2812, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.44648268818855286, + "learning_rate": 6.623051110753948e-05, + "loss": 1.3014, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.4547604024410248, + "learning_rate": 6.531762408906607e-05, + "loss": 1.2877, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.4438287615776062, + "learning_rate": 6.441731983183912e-05, + "loss": 1.2848, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.44475919008255005, + "learning_rate": 6.352942490160292e-05, + "loss": 1.2769, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.49799758195877075, + "learning_rate": 6.265376825462966e-05, + "loss": 1.2861, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.45425400137901306, + "learning_rate": 6.179018120476945e-05, + "loss": 1.2815, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.4422910809516907, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.2835, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.4422453045845032, + "learning_rate": 6.009855274515339e-05, + "loss": 1.2766, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.4605245292186737, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.2832, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.43407219648361206, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.2733, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.48641687631607056, + "learning_rate": 5.764754687033678e-05, + "loss": 1.2842, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.4426994025707245, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.2791, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.4594253599643707, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.2937, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.4511406719684601, + "learning_rate": 5.529650063720842e-05, + "loss": 1.2897, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.48013877868652344, + "learning_rate": 5.453432234876445e-05, + "loss": 1.2818, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.4629244804382324, + "learning_rate": 5.37826495305886e-05, + "loss": 1.2728, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.5308079123497009, + "learning_rate": 5.304133738072674e-05, + "loss": 1.2912, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.4697956144809723, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.284, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.49656587839126587, + "learning_rate": 5.158922582999368e-05, + "loss": 1.2875, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.4452250599861145, + "learning_rate": 5.087814669492819e-05, + "loss": 1.2792, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.46281588077545166, + "learning_rate": 5.017686870590028e-05, + "loss": 1.281, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.4981115758419037, + "learning_rate": 4.948525676899577e-05, + "loss": 1.2771, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.4706834852695465, + "learning_rate": 4.880317765236493e-05, + "loss": 1.2874, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.5250294804573059, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.2672, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.4542900323867798, + "learning_rate": 4.746709410920699e-05, + "loss": 1.2737, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5390377044677734, + "learning_rate": 4.681283230007507e-05, + "loss": 1.2745, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.4557817578315735, + "learning_rate": 4.616758849642509e-05, + "loss": 1.2836, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.446895033121109, + "learning_rate": 4.553123839874615e-05, + "loss": 1.2922, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.4644860625267029, + "learning_rate": 4.490365942080736e-05, + "loss": 1.2832, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.46043819189071655, + "learning_rate": 4.428473066604285e-05, + "loss": 1.2813, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.4642040729522705, + "learning_rate": 4.367433290426233e-05, + "loss": 1.281, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.4453197419643402, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.2764, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.45742228627204895, + "learning_rate": 4.247866163327575e-05, + "loss": 1.2878, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.44467175006866455, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.2782, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.44879403710365295, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.2744, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.5003914833068848, + "learning_rate": 4.074624971216005e-05, + "loss": 1.2633, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.43093347549438477, + "learning_rate": 4.018462453681889e-05, + "loss": 1.2738, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.45481476187705994, + "learning_rate": 3.963074051164014e-05, + "loss": 1.2806, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.44279158115386963, + "learning_rate": 3.908449093662446e-05, + "loss": 1.2753, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.4584684371948242, + "learning_rate": 3.854577058246998e-05, + "loss": 1.272, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.44226986169815063, + "learning_rate": 3.801447567030094e-05, + "loss": 1.2926, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.43229052424430847, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.2785, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.44430503249168396, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.2925, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.439351350069046, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.2797, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.5136128664016724, + "learning_rate": 3.596152451701616e-05, + "loss": 1.2785, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5367308259010315, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.279, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.4388138949871063, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.2818, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.46991193294525146, + "learning_rate": 3.449490171442838e-05, + "loss": 1.285, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.4393314719200134, + "learning_rate": 3.401944187798702e-05, + "loss": 1.2844, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.4360581934452057, + "learning_rate": 3.355053553336137e-05, + "loss": 1.2736, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.46192970871925354, + "learning_rate": 3.308809235061882e-05, + "loss": 1.2738, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.45846766233444214, + "learning_rate": 3.263202324488772e-05, + "loss": 1.2816, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.45226266980171204, + "learning_rate": 3.218224035919609e-05, + "loss": 1.276, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.44694724678993225, + "learning_rate": 3.173865704754688e-05, + "loss": 1.2776, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.44097021222114563, + "learning_rate": 3.130118785822657e-05, + "loss": 1.2783, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.4459550380706787, + "learning_rate": 3.08697485173437e-05, + "loss": 1.2839, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5135442614555359, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.2796, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5376752018928528, + "learning_rate": 3.002462807725185e-05, + "loss": 1.2766, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.45783647894859314, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.2825, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.44302478432655334, + "learning_rate": 2.920264448124087e-05, + "loss": 1.2746, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.45025965571403503, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.2745, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.4385267496109009, + "learning_rate": 2.84031643124288e-05, + "loss": 1.275, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.5064539313316345, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.2921, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.45993825793266296, + "learning_rate": 2.762557149497405e-05, + "loss": 1.271, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.43950140476226807, + "learning_rate": 2.724479494389592e-05, + "loss": 1.2812, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.44806602597236633, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.2812, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.44085240364074707, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.2822, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.4372364580631256, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.2841, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.4652843475341797, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.2842, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.43991342186927795, + "learning_rate": 2.541820662891541e-05, + "loss": 1.2643, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.43201401829719543, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.2751, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.45959627628326416, + "learning_rate": 2.472233293365335e-05, + "loss": 1.2856, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.4455472528934479, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.2697, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.4947901666164398, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.2891, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.46208250522613525, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.2769, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.4391043484210968, + "learning_rate": 2.338721674401494e-05, + "loss": 1.2841, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.44544529914855957, + "learning_rate": 2.30648594768459e-05, + "loss": 1.2806, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.4403120279312134, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.2772, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.4416530132293701, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.273, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.48513439297676086, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.2764, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.45357707142829895, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.2783, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.44976767897605896, + "learning_rate": 2.151850895764285e-05, + "loss": 1.2753, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.42965638637542725, + "learning_rate": 2.12219089895037e-05, + "loss": 1.2672, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.463337242603302, + "learning_rate": 2.092939720151092e-05, + "loss": 1.2605, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.43859803676605225, + "learning_rate": 2.064091724430947e-05, + "loss": 1.274, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.45552858710289, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.2699, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.44371435046195984, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.2815, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.44908347725868225, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.2692, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.4544650614261627, + "learning_rate": 1.952621569669175e-05, + "loss": 1.2748, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.447017639875412, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.2767, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.5013083815574646, + "learning_rate": 1.899164691024229e-05, + "loss": 1.2767, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.4764220118522644, + "learning_rate": 1.872987589815331e-05, + "loss": 1.2719, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.4521596133708954, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.2836, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.4511817693710327, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.2813, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.45081964135169983, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.2914, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.4383713901042938, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.2836, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.44428008794784546, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.283, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.43199053406715393, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.2668, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.4628349244594574, + "learning_rate": 1.699576850233916e-05, + "loss": 1.2725, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.45799240469932556, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.2869, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.454426109790802, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.2621, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.43662726879119873, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.28, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.46751806139945984, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.2791, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.44598811864852905, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.2755, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.47813931107521057, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.2843, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.44852492213249207, + "learning_rate": 1.542221360973268e-05, + "loss": 1.2692, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.44214802980422974, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.2839, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.3379602432250977, + "learning_rate": 1.5e-05, + "loss": 1.2704, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.8335001376290816e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-qwen2-bf16/checkpoint-9480/training_args.bin b/saves-qwen2-bf16/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..22e06656789a6d294f211bc8fbdf55908f7ae6cc --- /dev/null +++ b/saves-qwen2-bf16/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c9b0d30aa4879a82351071fc4734ee032f6480c83add315b137d2912fa636d9 +size 5112 diff --git a/saves-qwen2-bf16/config.json b/saves-qwen2-bf16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e33d1998b689b761506a91592eb7dca0e68f762d --- /dev/null +++ b/saves-qwen2-bf16/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen2", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 2000 +} diff --git a/saves-qwen2-bf16/generation_config.json b/saves-qwen2-bf16/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..589084e0e774668a082e0ca2290729cd99323199 --- /dev/null +++ b/saves-qwen2-bf16/generation_config.json @@ -0,0 +1,4 @@ +{ + "_from_model_config": true, + "transformers_version": "4.42.0" +} diff --git a/saves-qwen2-bf16/model.safetensors b/saves-qwen2-bf16/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df481c812590997300b0ca047b956469cd422cf7 --- /dev/null +++ b/saves-qwen2-bf16/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb44588318a94c53b132008a197bda33538e1c9d29ae0b62576ed6676dd9f79f +size 8351424 diff --git a/saves-qwen2-bf16/result.log b/saves-qwen2-bf16/result.log new file mode 100644 index 0000000000000000000000000000000000000000..229c8b3041a0ac33a89aac1bce7ea717159f4374 --- /dev/null +++ b/saves-qwen2-bf16/result.log @@ -0,0 +1 @@ +{'train_runtime': 5645.2094, 'train_samples_per_second': 1719.441, 'train_steps_per_second': 1.679, 'train_loss': 1.5675457151630257, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-qwen2-bf16/special_tokens_map.json b/saves-qwen2-bf16/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-qwen2-bf16/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-qwen2-bf16/tokenizer.json b/saves-qwen2-bf16/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-qwen2-bf16/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-qwen2-bf16/tokenizer_config.json b/saves-qwen2-bf16/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-qwen2-bf16/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-qwen2-cosine/checkpoint-9480/config.json b/saves-qwen2-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a785d3506e02f39ed9708141afd519ae6edaa6ea --- /dev/null +++ b/saves-qwen2-cosine/checkpoint-9480/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen2", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 2000 +} diff --git a/saves-qwen2-cosine/checkpoint-9480/generation_config.json b/saves-qwen2-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ca31b478470766d67155dda554564aa41d8444b --- /dev/null +++ b/saves-qwen2-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,4 @@ +{ + "_from_model_config": true, + "transformers_version": "4.42.4" +} diff --git a/saves-qwen2-cosine/checkpoint-9480/model.safetensors b/saves-qwen2-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf66ca0e98f3e825a615ad050eda1f52b048eb31 --- /dev/null +++ b/saves-qwen2-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:828b0a3e09b0a35f4892c520947d7d864a12a61fe79a4d5b1b7ae9ff7df3e75b +size 8351424 diff --git a/saves-qwen2-cosine/checkpoint-9480/optimizer.pt b/saves-qwen2-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff4da847efc93a3d4283d016365d3425b1845c22 --- /dev/null +++ b/saves-qwen2-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c830e613888ef6723b929d61bf6ad5d3165bc512b67aef4c3a5ee40a579393 +size 16719632 diff --git a/saves-qwen2-cosine/checkpoint-9480/rng_state.pth b/saves-qwen2-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-qwen2-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-qwen2-cosine/checkpoint-9480/scheduler.pt b/saves-qwen2-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..74b04fc48c12ecef4ed191c0e0bab93fab8eb99a --- /dev/null +++ b/saves-qwen2-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec59c7f60583f92116a8c17261df6f5e6643e0df2f9a66b3c4ae6ce50b33704 +size 1064 diff --git a/saves-qwen2-cosine/checkpoint-9480/special_tokens_map.json b/saves-qwen2-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-qwen2-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-qwen2-cosine/checkpoint-9480/tokenizer.json b/saves-qwen2-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-qwen2-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-qwen2-cosine/checkpoint-9480/tokenizer_config.json b/saves-qwen2-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-qwen2-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-qwen2-cosine/checkpoint-9480/trainer_state.json b/saves-qwen2-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f215a3b358e7c22976745fc0b2294e149320befd --- /dev/null +++ b/saves-qwen2-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2915687561035156, + "learning_rate": 0.00015789473684210527, + "loss": 7.5147, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.150234341621399, + "learning_rate": 0.00031578947368421053, + "loss": 6.9043, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8262501955032349, + "learning_rate": 0.00047368421052631577, + "loss": 6.2531, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 1.0096925497055054, + "learning_rate": 0.0006315789473684211, + "loss": 5.7511, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.921198844909668, + "learning_rate": 0.0007894736842105263, + "loss": 5.2799, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.6396533846855164, + "learning_rate": 0.0009473684210526315, + "loss": 4.7623, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.1035351753234863, + "learning_rate": 0.0011052631578947368, + "loss": 4.3618, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.5946455001831055, + "learning_rate": 0.0012631578947368421, + "loss": 4.1172, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 0.8093262910842896, + "learning_rate": 0.0014210526315789472, + "loss": 3.9136, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.3977932929992676, + "learning_rate": 0.0014999989494847376, + "loss": 3.7642, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.6807733178138733, + "learning_rate": 0.0014999905453802946, + "loss": 3.6047, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 1.0931824445724487, + "learning_rate": 0.0014999737372655805, + "loss": 3.4906, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.8575250506401062, + "learning_rate": 0.0014999485253289388, + "loss": 3.3919, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.239012598991394, + "learning_rate": 0.0014999149098528814, + "loss": 3.2891, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.7162935733795166, + "learning_rate": 0.0014998728912140862, + "loss": 3.208, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.8411149382591248, + "learning_rate": 0.0014998224698833922, + "loss": 3.1421, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 1.107505202293396, + "learning_rate": 0.0014997636464257956, + "loss": 3.0662, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.8071396350860596, + "learning_rate": 0.0014996964215004416, + "loss": 3.0318, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.8110716938972473, + "learning_rate": 0.0014996207958606182, + "loss": 2.9656, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.6659255623817444, + "learning_rate": 0.001499536770353748, + "loss": 2.9161, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.9761127233505249, + "learning_rate": 0.0014994443459213774, + "loss": 2.873, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.816493570804596, + "learning_rate": 0.001499343523599168, + "loss": 2.8434, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.8392878174781799, + "learning_rate": 0.0014992343045168823, + "loss": 2.7875, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 1.1818407773971558, + "learning_rate": 0.0014991166898983739, + "loss": 2.7482, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 1.0544509887695312, + "learning_rate": 0.001498990681061572, + "loss": 2.7166, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.7872912883758545, + "learning_rate": 0.001498856279418467, + "loss": 2.678, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.8795846104621887, + "learning_rate": 0.0014987134864750948, + "loss": 2.651, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 1.0243735313415527, + "learning_rate": 0.0014985623038315206, + "loss": 2.6152, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.9576835632324219, + "learning_rate": 0.0014984027331818193, + "loss": 2.5882, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 1.0083080530166626, + "learning_rate": 0.0014982347763140584, + "loss": 2.5662, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.889911413192749, + "learning_rate": 0.0014980584351102762, + "loss": 2.5398, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.8950763940811157, + "learning_rate": 0.001497873711546462, + "loss": 2.5095, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.7593084573745728, + "learning_rate": 0.0014976806076925334, + "loss": 2.5009, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.7483264803886414, + "learning_rate": 0.0014974791257123137, + "loss": 2.474, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.8509273529052734, + "learning_rate": 0.001497269267863507, + "loss": 2.4372, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.9738366603851318, + "learning_rate": 0.0014970510364976724, + "loss": 2.4162, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 1.254429817199707, + "learning_rate": 0.0014968244340601996, + "loss": 2.4239, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 1.0971674919128418, + "learning_rate": 0.001496589463090279, + "loss": 2.4026, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.3947056531906128, + "learning_rate": 0.001496346126220875, + "loss": 2.3737, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 1.1339510679244995, + "learning_rate": 0.0014960944261786966, + "loss": 2.3481, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8295434713363647, + "learning_rate": 0.0014958343657841655, + "loss": 2.3407, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.8295255899429321, + "learning_rate": 0.001495565947951385, + "loss": 2.3213, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.7937383651733398, + "learning_rate": 0.0014952891756881085, + "loss": 2.2979, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.7477364540100098, + "learning_rate": 0.0014950040520957037, + "loss": 2.2796, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.781963050365448, + "learning_rate": 0.0014947105803691204, + "loss": 2.2812, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.7430346608161926, + "learning_rate": 0.0014944087637968522, + "loss": 2.2619, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.7792104482650757, + "learning_rate": 0.0014940986057609012, + "loss": 2.2408, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.9940721988677979, + "learning_rate": 0.0014937801097367396, + "loss": 2.2322, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.7907801270484924, + "learning_rate": 0.001493453279293271, + "loss": 2.2076, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.8319170475006104, + "learning_rate": 0.0014931181180927902, + "loss": 2.2024, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.8538334369659424, + "learning_rate": 0.001492774629890942, + "loss": 2.1935, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.9568904638290405, + "learning_rate": 0.001492422818536679, + "loss": 2.1807, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.9535566568374634, + "learning_rate": 0.00149206268797222, + "loss": 2.167, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.7389276623725891, + "learning_rate": 0.0014916942422330032, + "loss": 2.1466, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.9615774154663086, + "learning_rate": 0.001491317485447643, + "loss": 2.1504, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.7810622453689575, + "learning_rate": 0.0014909324218378838, + "loss": 2.1124, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.948574423789978, + "learning_rate": 0.0014905390557185508, + "loss": 2.1168, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.016525387763977, + "learning_rate": 0.0014901373914975036, + "loss": 2.122, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 1.1645904779434204, + "learning_rate": 0.0014897274336755856, + "loss": 2.1001, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.6901299953460693, + "learning_rate": 0.001489309186846575, + "loss": 2.0772, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 1.0734965801239014, + "learning_rate": 0.0014888826556971313, + "loss": 2.0813, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.6970210075378418, + "learning_rate": 0.0014884478450067444, + "loss": 2.0713, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.7480238676071167, + "learning_rate": 0.0014880047596476807, + "loss": 2.0517, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 1.0822690725326538, + "learning_rate": 0.0014875534045849274, + "loss": 2.0661, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 1.0357532501220703, + "learning_rate": 0.0014870937848761388, + "loss": 2.0532, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.8150244355201721, + "learning_rate": 0.001486625905671578, + "loss": 2.0424, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.7702606916427612, + "learning_rate": 0.00148614977221406, + "loss": 2.0266, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.6838421821594238, + "learning_rate": 0.0014856653898388927, + "loss": 2.0221, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.6826958656311035, + "learning_rate": 0.001485172763973817, + "loss": 2.0291, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.7765556573867798, + "learning_rate": 0.0014846719001389466, + "loss": 2.0132, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.7373443245887756, + "learning_rate": 0.001484162803946705, + "loss": 2.0016, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.7777355313301086, + "learning_rate": 0.0014836454811017635, + "loss": 1.9894, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.8153260350227356, + "learning_rate": 0.0014831199374009778, + "loss": 1.9798, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.7664873600006104, + "learning_rate": 0.0014825861787333208, + "loss": 1.986, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.7785695195198059, + "learning_rate": 0.0014820442110798197, + "loss": 1.965, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.718776524066925, + "learning_rate": 0.0014814940405134865, + "loss": 1.9671, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.873417317867279, + "learning_rate": 0.001480935673199251, + "loss": 1.9575, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.7487492561340332, + "learning_rate": 0.0014803691153938915, + "loss": 1.9593, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.7315303683280945, + "learning_rate": 0.0014797943734459653, + "loss": 1.9597, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.8763788938522339, + "learning_rate": 0.001479211453795736, + "loss": 1.9342, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 1.1143440008163452, + "learning_rate": 0.0014786203629751033, + "loss": 1.9231, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.7945889830589294, + "learning_rate": 0.0014780211076075279, + "loss": 1.9314, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.7053716778755188, + "learning_rate": 0.0014774136944079594, + "loss": 1.9365, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.9266723394393921, + "learning_rate": 0.0014767981301827592, + "loss": 1.9121, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.769698441028595, + "learning_rate": 0.0014761744218296249, + "loss": 1.9152, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.682551383972168, + "learning_rate": 0.001475542576337513, + "loss": 1.9122, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.8614901304244995, + "learning_rate": 0.001474902600786561, + "loss": 1.9066, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.8131840825080872, + "learning_rate": 0.0014742545023480075, + "loss": 1.9004, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 1.1395957469940186, + "learning_rate": 0.0014735982882841117, + "loss": 1.8947, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.8723341226577759, + "learning_rate": 0.0014729339659480727, + "loss": 1.8909, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.8527464270591736, + "learning_rate": 0.0014722615427839468, + "loss": 1.8964, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.7265653014183044, + "learning_rate": 0.0014715810263265633, + "loss": 1.8845, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.8338080644607544, + "learning_rate": 0.0014708924242014423, + "loss": 1.8691, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.7501885294914246, + "learning_rate": 0.0014701957441247064, + "loss": 1.8772, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.7609304189682007, + "learning_rate": 0.0014694909939029959, + "loss": 1.8587, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.0206756591796875, + "learning_rate": 0.0014687781814333814, + "loss": 1.8648, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 1.2987595796585083, + "learning_rate": 0.0014680573147032746, + "loss": 1.8702, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 1.9214754104614258, + "learning_rate": 0.0014673284017903392, + "loss": 1.8545, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 1.080756664276123, + "learning_rate": 0.0014665914508624, + "loss": 1.8525, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.6982481479644775, + "learning_rate": 0.0014658464701773526, + "loss": 1.8497, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.7574365735054016, + "learning_rate": 0.0014650934680830688, + "loss": 1.8414, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.8112171292304993, + "learning_rate": 0.0014643324530173051, + "loss": 1.8392, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 1.0897507667541504, + "learning_rate": 0.0014635634335076067, + "loss": 1.8421, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.7258804440498352, + "learning_rate": 0.001462786418171213, + "loss": 1.8293, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.0777992010116577, + "learning_rate": 0.0014620014157149597, + "loss": 1.8316, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.730086624622345, + "learning_rate": 0.001461208434935183, + "loss": 1.8224, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.6664548516273499, + "learning_rate": 0.0014604074847176197, + "loss": 1.8123, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.900333821773529, + "learning_rate": 0.0014595985740373082, + "loss": 1.8147, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.7085007429122925, + "learning_rate": 0.0014587817119584873, + "loss": 1.8238, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.7526223659515381, + "learning_rate": 0.001457956907634496, + "loss": 1.8101, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.7018643021583557, + "learning_rate": 0.0014571241703076692, + "loss": 1.8137, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.8205339312553406, + "learning_rate": 0.0014562835093092348, + "loss": 1.8092, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.7558243870735168, + "learning_rate": 0.0014554349340592104, + "loss": 1.7966, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.9665574431419373, + "learning_rate": 0.001454578454066296, + "loss": 1.8085, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.4096592664718628, + "learning_rate": 0.0014537140789277678, + "loss": 1.7937, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 1.3244471549987793, + "learning_rate": 0.0014528418183293716, + "loss": 1.806, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 1.023123860359192, + "learning_rate": 0.001451961682045213, + "loss": 1.7904, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.7269262671470642, + "learning_rate": 0.001451073679937649, + "loss": 1.7672, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.6785959005355835, + "learning_rate": 0.0014501778219571766, + "loss": 1.7722, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.859392523765564, + "learning_rate": 0.0014492741181423225, + "loss": 1.7872, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.7017579078674316, + "learning_rate": 0.0014483625786195285, + "loss": 1.7818, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6827753186225891, + "learning_rate": 0.0014474432136030405, + "loss": 1.7682, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.9476015567779541, + "learning_rate": 0.0014465160333947923, + "loss": 1.7576, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.7696287035942078, + "learning_rate": 0.0014455810483842908, + "loss": 1.7739, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.6830731630325317, + "learning_rate": 0.0014446382690484997, + "loss": 1.7752, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.907674252986908, + "learning_rate": 0.0014436877059517215, + "loss": 1.7579, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.7155635356903076, + "learning_rate": 0.0014427293697454803, + "loss": 1.755, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.7013174295425415, + "learning_rate": 0.001441763271168401, + "loss": 1.7703, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.6876915693283081, + "learning_rate": 0.00144078942104609, + "loss": 1.7592, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.7317845821380615, + "learning_rate": 0.001439807830291013, + "loss": 1.746, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.6359264254570007, + "learning_rate": 0.0014388185099023744, + "loss": 1.7526, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.6847758293151855, + "learning_rate": 0.0014378214709659916, + "loss": 1.7508, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.0915815830230713, + "learning_rate": 0.0014368167246541733, + "loss": 1.7379, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.8375098705291748, + "learning_rate": 0.0014358042822255918, + "loss": 1.7442, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.7108977437019348, + "learning_rate": 0.0014347841550251597, + "loss": 1.7447, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.7159097194671631, + "learning_rate": 0.0014337563544838997, + "loss": 1.7387, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.8628783822059631, + "learning_rate": 0.001432720892118819, + "loss": 1.7371, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.6720645427703857, + "learning_rate": 0.0014316777795327794, + "loss": 1.7272, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.8924336433410645, + "learning_rate": 0.001430627028414366, + "loss": 1.7382, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 1.1842200756072998, + "learning_rate": 0.0014295686505377586, + "loss": 1.7177, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.8452802300453186, + "learning_rate": 0.0014285026577625982, + "loss": 1.7262, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.7226518988609314, + "learning_rate": 0.0014274290620338542, + "loss": 1.7294, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.8550010323524475, + "learning_rate": 0.0014263478753816906, + "loss": 1.7252, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.6901659369468689, + "learning_rate": 0.0014252591099213326, + "loss": 1.715, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.6522703170776367, + "learning_rate": 0.001424162777852928, + "loss": 1.7155, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.7451887130737305, + "learning_rate": 0.0014230588914614134, + "loss": 1.715, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.7578398585319519, + "learning_rate": 0.0014219474631163745, + "loss": 1.7118, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.9531257748603821, + "learning_rate": 0.001420828505271909, + "loss": 1.7162, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.9767435193061829, + "learning_rate": 0.0014197020304664856, + "loss": 1.7145, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.6117169260978699, + "learning_rate": 0.0014185680513228048, + "loss": 1.7114, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.6931943893432617, + "learning_rate": 0.0014174265805476564, + "loss": 1.7119, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.8043635487556458, + "learning_rate": 0.0014162776309317778, + "loss": 1.7101, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.629835844039917, + "learning_rate": 0.0014151212153497108, + "loss": 1.6905, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6673084497451782, + "learning_rate": 0.0014139573467596561, + "loss": 1.6848, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 1.0766055583953857, + "learning_rate": 0.00141278603820333, + "loss": 1.687, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 1.1556854248046875, + "learning_rate": 0.0014116073028058165, + "loss": 1.6969, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.6995120644569397, + "learning_rate": 0.0014104211537754217, + "loss": 1.6843, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.8594464063644409, + "learning_rate": 0.001409227604403524, + "loss": 1.6882, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.8815358281135559, + "learning_rate": 0.0014080266680644277, + "loss": 1.6841, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.6067768931388855, + "learning_rate": 0.0014068183582152103, + "loss": 1.6891, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.6464521884918213, + "learning_rate": 0.001405602688395574, + "loss": 1.6884, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.6827923059463501, + "learning_rate": 0.0014043796722276924, + "loss": 1.6574, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.7020195722579956, + "learning_rate": 0.0014031493234160591, + "loss": 1.6733, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.6551419496536255, + "learning_rate": 0.0014019116557473332, + "loss": 1.6752, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.8313617706298828, + "learning_rate": 0.0014006666830901854, + "loss": 1.6657, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.6255762577056885, + "learning_rate": 0.001399414419395142, + "loss": 1.6651, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.7334127426147461, + "learning_rate": 0.0013981548786944293, + "loss": 1.6781, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.8487969636917114, + "learning_rate": 0.0013968880751018158, + "loss": 1.6643, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.8578473925590515, + "learning_rate": 0.0013956140228124545, + "loss": 1.6571, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.8169883489608765, + "learning_rate": 0.0013943327361027231, + "loss": 1.6741, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.6358838081359863, + "learning_rate": 0.0013930442293300649, + "loss": 1.6531, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.661726713180542, + "learning_rate": 0.0013917485169328279, + "loss": 1.6503, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.6508963704109192, + "learning_rate": 0.0013904456134301016, + "loss": 1.6539, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.7921327948570251, + "learning_rate": 0.0013891355334215562, + "loss": 1.6556, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.6540847420692444, + "learning_rate": 0.0013878182915872776, + "loss": 1.677, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.6192095875740051, + "learning_rate": 0.001386493902687604, + "loss": 1.6529, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.6428976058959961, + "learning_rate": 0.00138516238156296, + "loss": 1.6557, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.6307291388511658, + "learning_rate": 0.0013838237431336895, + "loss": 1.6592, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.6049667596817017, + "learning_rate": 0.0013824780023998899, + "loss": 1.6564, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.732274055480957, + "learning_rate": 0.0013811251744412431, + "loss": 1.6558, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.7281314730644226, + "learning_rate": 0.0013797652744168473, + "loss": 1.6437, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.651372492313385, + "learning_rate": 0.0013783983175650457, + "loss": 1.6422, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.6432363390922546, + "learning_rate": 0.0013770243192032581, + "loss": 1.6394, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.7011226415634155, + "learning_rate": 0.0013756432947278064, + "loss": 1.6397, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.5884276628494263, + "learning_rate": 0.0013742552596137444, + "loss": 1.6398, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.6046627759933472, + "learning_rate": 0.0013728602294146833, + "loss": 1.6454, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.6389930844306946, + "learning_rate": 0.0013714582197626175, + "loss": 1.6427, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.82037752866745, + "learning_rate": 0.0013700492463677501, + "loss": 1.632, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.8241061568260193, + "learning_rate": 0.0013686333250183154, + "loss": 1.639, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.6699489951133728, + "learning_rate": 0.001367210471580404, + "loss": 1.6345, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.6850857734680176, + "learning_rate": 0.0013657807019977835, + "loss": 1.6356, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.6504431962966919, + "learning_rate": 0.0013643440322917198, + "loss": 1.633, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.6054185628890991, + "learning_rate": 0.0013629004785607989, + "loss": 1.6308, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.7195996046066284, + "learning_rate": 0.0013614500569807445, + "loss": 1.6238, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.6041688919067383, + "learning_rate": 0.0013599927838042394, + "loss": 1.6231, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.6145355701446533, + "learning_rate": 0.0013585286753607408, + "loss": 1.6258, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.5808355808258057, + "learning_rate": 0.0013570577480562986, + "loss": 1.6365, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.5985338687896729, + "learning_rate": 0.0013555800183733717, + "loss": 1.6152, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.6158758997917175, + "learning_rate": 0.0013540955028706425, + "loss": 1.6172, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.6747137904167175, + "learning_rate": 0.0013526042181828324, + "loss": 1.6155, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.5812236666679382, + "learning_rate": 0.0013511061810205143, + "loss": 1.6188, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.9612152576446533, + "learning_rate": 0.001349601408169926, + "loss": 1.6319, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.9364573359489441, + "learning_rate": 0.0013480899164927823, + "loss": 1.6177, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.660479724407196, + "learning_rate": 0.0013465717229260853, + "loss": 1.6164, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.6479663848876953, + "learning_rate": 0.001345046844481935, + "loss": 1.6171, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.6411908864974976, + "learning_rate": 0.0013435152982473396, + "loss": 1.6062, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.7929542660713196, + "learning_rate": 0.0013419771013840217, + "loss": 1.6114, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.7732736468315125, + "learning_rate": 0.001340432271128229, + "loss": 1.613, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.5926094651222229, + "learning_rate": 0.0013388808247905381, + "loss": 1.6049, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.9917119145393372, + "learning_rate": 0.0013373227797556634, + "loss": 1.6118, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.6367053389549255, + "learning_rate": 0.00133575815348226, + "loss": 1.6018, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6140070557594299, + "learning_rate": 0.0013341869635027292, + "loss": 1.6008, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.6564701795578003, + "learning_rate": 0.001332609227423022, + "loss": 1.6015, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.5905231833457947, + "learning_rate": 0.0013310249629224417, + "loss": 1.5974, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.9076852798461914, + "learning_rate": 0.0013294341877534454, + "loss": 1.6058, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.8390849828720093, + "learning_rate": 0.0013278369197414458, + "loss": 1.6208, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.6586330533027649, + "learning_rate": 0.0013262331767846104, + "loss": 1.6015, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.5857061743736267, + "learning_rate": 0.0013246229768536628, + "loss": 1.5836, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.6810007691383362, + "learning_rate": 0.001323006337991679, + "loss": 1.599, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.6509931087493896, + "learning_rate": 0.0013213832783138873, + "loss": 1.5995, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.6927504539489746, + "learning_rate": 0.0013197538160074633, + "loss": 1.5864, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.5739829540252686, + "learning_rate": 0.0013181179693313283, + "loss": 1.5989, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.6222516894340515, + "learning_rate": 0.0013164757566159428, + "loss": 1.5928, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.5849514603614807, + "learning_rate": 0.001314827196263102, + "loss": 1.5911, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.7482506632804871, + "learning_rate": 0.0013131723067457302, + "loss": 1.587, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.7725493311882019, + "learning_rate": 0.0013115111066076721, + "loss": 1.5782, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.6317006945610046, + "learning_rate": 0.0013098436144634862, + "loss": 1.6099, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.589344322681427, + "learning_rate": 0.0013081698489982364, + "loss": 1.5982, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.6428040266036987, + "learning_rate": 0.001306489828967282, + "loss": 1.5824, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.6076661348342896, + "learning_rate": 0.0013048035731960679, + "loss": 1.5799, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.5798097848892212, + "learning_rate": 0.0013031111005799133, + "loss": 1.5808, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.6156730055809021, + "learning_rate": 0.0013014124300838004, + "loss": 1.5919, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.6144553422927856, + "learning_rate": 0.0012997075807421612, + "loss": 1.5734, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.8700230121612549, + "learning_rate": 0.0012979965716586653, + "loss": 1.5688, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.6609969735145569, + "learning_rate": 0.0012962794220060048, + "loss": 1.5666, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.7983867526054382, + "learning_rate": 0.0012945561510256801, + "loss": 1.5813, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.6822909712791443, + "learning_rate": 0.001292826778027784, + "loss": 1.5795, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.6678816080093384, + "learning_rate": 0.0012910913223907856, + "loss": 1.5745, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.7211169600486755, + "learning_rate": 0.0012893498035613123, + "loss": 1.5735, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6313039660453796, + "learning_rate": 0.001287602241053933, + "loss": 1.5745, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.6936741471290588, + "learning_rate": 0.0012858486544509392, + "loss": 1.5778, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.6482908725738525, + "learning_rate": 0.0012840890634021249, + "loss": 1.5715, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.5932878255844116, + "learning_rate": 0.0012823234876245667, + "loss": 1.568, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.6601869463920593, + "learning_rate": 0.0012805519469024035, + "loss": 1.5777, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.7646185755729675, + "learning_rate": 0.0012787744610866143, + "loss": 1.5623, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.5993582010269165, + "learning_rate": 0.0012769910500947954, + "loss": 1.5767, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.6157863140106201, + "learning_rate": 0.0012752017339109376, + "loss": 1.568, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.787861704826355, + "learning_rate": 0.0012734065325852029, + "loss": 1.5706, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.6114040017127991, + "learning_rate": 0.0012716054662336987, + "loss": 1.5579, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.7357029914855957, + "learning_rate": 0.001269798555038252, + "loss": 1.5651, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.6027712821960449, + "learning_rate": 0.0012679858192461864, + "loss": 1.5621, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.7697759866714478, + "learning_rate": 0.0012661672791700906, + "loss": 1.5658, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.6694803237915039, + "learning_rate": 0.0012643429551875945, + "loss": 1.5591, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.7663103342056274, + "learning_rate": 0.0012625128677411388, + "loss": 1.5616, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.5824065804481506, + "learning_rate": 0.0012606770373377475, + "loss": 1.5555, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.8711327314376831, + "learning_rate": 0.0012588354845487959, + "loss": 1.5662, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.717494785785675, + "learning_rate": 0.001256988230009783, + "loss": 1.5597, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.6158130168914795, + "learning_rate": 0.0012551352944200976, + "loss": 1.5575, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6116865873336792, + "learning_rate": 0.0012532766985427874, + "loss": 1.5649, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.6094300746917725, + "learning_rate": 0.0012514124632043272, + "loss": 1.5644, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.6006965637207031, + "learning_rate": 0.0012495426092943842, + "loss": 1.5596, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.6760967373847961, + "learning_rate": 0.0012476671577655845, + "loss": 1.5596, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.6141475439071655, + "learning_rate": 0.0012457861296332774, + "loss": 1.545, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.8365676403045654, + "learning_rate": 0.001243899545975303, + "loss": 1.5577, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.6943111419677734, + "learning_rate": 0.0012420074279317515, + "loss": 1.5512, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.6891856789588928, + "learning_rate": 0.0012401097967047298, + "loss": 1.5462, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.6379979848861694, + "learning_rate": 0.001238206673558122, + "loss": 1.5478, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.5563579797744751, + "learning_rate": 0.0012362980798173526, + "loss": 1.5451, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.5765627026557922, + "learning_rate": 0.0012343840368691462, + "loss": 1.5453, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.63944011926651, + "learning_rate": 0.0012324645661612886, + "loss": 1.5406, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.5873876810073853, + "learning_rate": 0.0012305396892023867, + "loss": 1.5485, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.6467324495315552, + "learning_rate": 0.0012286094275616264, + "loss": 1.5481, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.6518478989601135, + "learning_rate": 0.0012266738028685318, + "loss": 1.5303, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.5957792401313782, + "learning_rate": 0.001224732836812723, + "loss": 1.5416, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.6633485555648804, + "learning_rate": 0.0012227865511436724, + "loss": 1.5466, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.8054096698760986, + "learning_rate": 0.001220834967670461, + "loss": 1.5531, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.5939935445785522, + "learning_rate": 0.0012188781082615346, + "loss": 1.5489, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.7417927980422974, + "learning_rate": 0.0012169159948444588, + "loss": 1.5466, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.5846324563026428, + "learning_rate": 0.001214948649405672, + "loss": 1.5444, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.5466947555541992, + "learning_rate": 0.0012129760939902407, + "loss": 1.5388, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.637851357460022, + "learning_rate": 0.0012109983507016114, + "loss": 1.5412, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.6293614506721497, + "learning_rate": 0.0012090154417013636, + "loss": 1.5475, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.6581830382347107, + "learning_rate": 0.0012070273892089605, + "loss": 1.5204, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.5989952087402344, + "learning_rate": 0.0012050342155015012, + "loss": 1.5228, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.6208792924880981, + "learning_rate": 0.0012030359429134707, + "loss": 1.5284, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.6334675550460815, + "learning_rate": 0.0012010325938364883, + "loss": 1.5377, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.7662240862846375, + "learning_rate": 0.0011990241907190592, + "loss": 1.5369, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.6226905584335327, + "learning_rate": 0.001197010756066321, + "loss": 1.5227, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.5965680480003357, + "learning_rate": 0.0011949923124397917, + "loss": 1.526, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.6389878988265991, + "learning_rate": 0.001192968882457118, + "loss": 1.5317, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.5802280306816101, + "learning_rate": 0.001190940488791821, + "loss": 1.531, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.6262878775596619, + "learning_rate": 0.0011889071541730419, + "loss": 1.5291, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.600366473197937, + "learning_rate": 0.001186868901385288, + "loss": 1.523, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.6489313840866089, + "learning_rate": 0.001184825753268177, + "loss": 1.5302, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.6296570301055908, + "learning_rate": 0.0011827777327161814, + "loss": 1.539, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.8344880938529968, + "learning_rate": 0.0011807248626783714, + "loss": 1.5179, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.5954292416572571, + "learning_rate": 0.0011786671661581584, + "loss": 1.5112, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.6386845111846924, + "learning_rate": 0.001176604666213036, + "loss": 1.5197, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.561570942401886, + "learning_rate": 0.0011745373859543236, + "loss": 1.5329, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.5886351466178894, + "learning_rate": 0.0011724653485469063, + "loss": 1.5143, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.5922853946685791, + "learning_rate": 0.0011703885772089743, + "loss": 1.5225, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.6738707423210144, + "learning_rate": 0.0011683070952117646, + "loss": 1.5222, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6255045533180237, + "learning_rate": 0.0011662209258792998, + "loss": 1.509, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.8234590888023376, + "learning_rate": 0.0011641300925881257, + "loss": 1.5201, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.5764292478561401, + "learning_rate": 0.0011620346187670501, + "loss": 1.5131, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.6660158038139343, + "learning_rate": 0.0011599345278968806, + "loss": 1.5262, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5639556646347046, + "learning_rate": 0.0011578298435101604, + "loss": 1.5147, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.5862758159637451, + "learning_rate": 0.0011557205891909062, + "loss": 1.5035, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.6001995801925659, + "learning_rate": 0.0011536067885743423, + "loss": 1.521, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.6851192712783813, + "learning_rate": 0.001151488465346637, + "loss": 1.5078, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.6347275376319885, + "learning_rate": 0.0011493656432446362, + "loss": 1.5145, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.6555883884429932, + "learning_rate": 0.0011472383460555983, + "loss": 1.5162, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6473186016082764, + "learning_rate": 0.001145106597616927, + "loss": 1.5258, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.6326327323913574, + "learning_rate": 0.001142970421815904, + "loss": 1.5018, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.575519859790802, + "learning_rate": 0.0011408298425894226, + "loss": 1.5105, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.6293684244155884, + "learning_rate": 0.0011386848839237186, + "loss": 1.5118, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.7003412842750549, + "learning_rate": 0.0011365355698541005, + "loss": 1.5153, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.7129136323928833, + "learning_rate": 0.0011343819244646824, + "loss": 1.5107, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.7145129442214966, + "learning_rate": 0.001132223971888112, + "loss": 1.509, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.5614217519760132, + "learning_rate": 0.0011300617363053024, + "loss": 1.5052, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.6394768953323364, + "learning_rate": 0.0011278952419451586, + "loss": 1.5152, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.6327441334724426, + "learning_rate": 0.0011257245130843077, + "loss": 1.5055, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.6421175599098206, + "learning_rate": 0.0011235495740468265, + "loss": 1.4991, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.63056880235672, + "learning_rate": 0.0011213704492039694, + "loss": 1.4816, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.6368049383163452, + "learning_rate": 0.001119187162973894, + "loss": 1.5055, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.7214128971099854, + "learning_rate": 0.001116999739821388, + "loss": 1.4941, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.6327651739120483, + "learning_rate": 0.0011148082042575968, + "loss": 1.5157, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.5672954320907593, + "learning_rate": 0.0011126125808397461, + "loss": 1.5116, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.7400348782539368, + "learning_rate": 0.0011104128941708683, + "loss": 1.4945, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.5492693185806274, + "learning_rate": 0.001108209168899527, + "loss": 1.5022, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.6273090243339539, + "learning_rate": 0.0011060014297195396, + "loss": 1.5046, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.6279908418655396, + "learning_rate": 0.0011037897013697015, + "loss": 1.5077, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.6551921367645264, + "learning_rate": 0.0011015740086335092, + "loss": 1.4971, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.56218022108078, + "learning_rate": 0.0010993543763388814, + "loss": 1.4978, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.6214286684989929, + "learning_rate": 0.0010971308293578814, + "loss": 1.494, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.641476035118103, + "learning_rate": 0.0010949033926064397, + "loss": 1.4904, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.7065024971961975, + "learning_rate": 0.0010926720910440725, + "loss": 1.5007, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.6150898933410645, + "learning_rate": 0.001090436949673603, + "loss": 1.4943, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.6640717387199402, + "learning_rate": 0.0010881979935408815, + "loss": 1.4906, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.5973417162895203, + "learning_rate": 0.0010859552477345052, + "loss": 1.4967, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6140454411506653, + "learning_rate": 0.001083708737385536, + "loss": 1.4956, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5711833834648132, + "learning_rate": 0.0010814584876672187, + "loss": 1.4765, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.5786462426185608, + "learning_rate": 0.0010792045237947008, + "loss": 1.4933, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.613167405128479, + "learning_rate": 0.0010769468710247478, + "loss": 1.4887, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.5962417721748352, + "learning_rate": 0.0010746855546554612, + "loss": 1.4865, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.6069571375846863, + "learning_rate": 0.0010724206000259954, + "loss": 1.4811, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.6359487771987915, + "learning_rate": 0.0010701520325162727, + "loss": 1.4939, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.6059635877609253, + "learning_rate": 0.0010678798775467001, + "loss": 1.5005, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.551054060459137, + "learning_rate": 0.0010656041605778832, + "loss": 1.4921, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.6179496049880981, + "learning_rate": 0.001063324907110342, + "loss": 1.4742, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6031610369682312, + "learning_rate": 0.0010610421426842241, + "loss": 1.4948, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.6608986854553223, + "learning_rate": 0.00105875589287902, + "loss": 1.4931, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.6621637344360352, + "learning_rate": 0.0010564661833132752, + "loss": 1.5008, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.632276177406311, + "learning_rate": 0.001054173039644303, + "loss": 1.4938, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.5957940220832825, + "learning_rate": 0.0010518764875678981, + "loss": 1.4814, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.5904439091682434, + "learning_rate": 0.001049576552818048, + "loss": 1.4808, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.6586924195289612, + "learning_rate": 0.0010472732611666448, + "loss": 1.4769, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.6911676526069641, + "learning_rate": 0.0010449666384231954, + "loss": 1.4796, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.5835255980491638, + "learning_rate": 0.0010426567104345346, + "loss": 1.4909, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.6622236967086792, + "learning_rate": 0.0010403435030845332, + "loss": 1.4755, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.6872473359107971, + "learning_rate": 0.0010380270422938093, + "loss": 1.4752, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.7359724044799805, + "learning_rate": 0.0010357073540194362, + "loss": 1.477, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.6550962924957275, + "learning_rate": 0.001033384464254655, + "loss": 1.4794, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.6337891817092896, + "learning_rate": 0.001031058399028579, + "loss": 1.4793, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.5658158659934998, + "learning_rate": 0.001028729184405905, + "loss": 1.486, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.6056289076805115, + "learning_rate": 0.0010263968464866201, + "loss": 1.4803, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.7544041275978088, + "learning_rate": 0.0010240614114057098, + "loss": 1.4709, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.7441290020942688, + "learning_rate": 0.001021722905332864, + "loss": 1.4731, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.6342928409576416, + "learning_rate": 0.0010193813544721855, + "loss": 1.4826, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6773887276649475, + "learning_rate": 0.001017036785061895, + "loss": 1.4872, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.6571568250656128, + "learning_rate": 0.0010146892233740376, + "loss": 1.4771, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.5731022953987122, + "learning_rate": 0.0010123386957141883, + "loss": 1.4544, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.6896867752075195, + "learning_rate": 0.0010099852284211573, + "loss": 1.4758, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.5666677355766296, + "learning_rate": 0.0010076288478666944, + "loss": 1.4735, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.6820061802864075, + "learning_rate": 0.0010052695804551946, + "loss": 1.4771, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.6579675674438477, + "learning_rate": 0.0010029074526234014, + "loss": 1.4641, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.5246252417564392, + "learning_rate": 0.0010005424908401104, + "loss": 1.4833, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5764563679695129, + "learning_rate": 0.0009981747216058728, + "loss": 1.4678, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.6743926405906677, + "learning_rate": 0.0009958041714526998, + "loss": 1.4719, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.7351746559143066, + "learning_rate": 0.0009934308669437627, + "loss": 1.4776, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.7997708320617676, + "learning_rate": 0.0009910548346730972, + "loss": 1.4652, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.6010289192199707, + "learning_rate": 0.0009886761012653062, + "loss": 1.459, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.7269366383552551, + "learning_rate": 0.000986294693375258, + "loss": 1.4629, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.5862137675285339, + "learning_rate": 0.000983910637687791, + "loss": 1.4782, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.5845527648925781, + "learning_rate": 0.0009815239609174138, + "loss": 1.4601, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.5732600092887878, + "learning_rate": 0.0009791346898080043, + "loss": 1.4733, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.664968729019165, + "learning_rate": 0.0009767428511325122, + "loss": 1.4638, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.6303493976593018, + "learning_rate": 0.0009743484716926576, + "loss": 1.4525, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.6190375685691833, + "learning_rate": 0.0009719515783186319, + "loss": 1.4531, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.6889409422874451, + "learning_rate": 0.0009695521978687951, + "loss": 1.4546, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.6062855124473572, + "learning_rate": 0.0009671503572293767, + "loss": 1.4567, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.5428312420845032, + "learning_rate": 0.0009647460833141742, + "loss": 1.452, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.6007335782051086, + "learning_rate": 0.0009623394030642507, + "loss": 1.4601, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.6294459104537964, + "learning_rate": 0.0009599303434476334, + "loss": 1.4544, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.6380193829536438, + "learning_rate": 0.0009575189314590118, + "loss": 1.4607, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.585292398929596, + "learning_rate": 0.0009551051941194346, + "loss": 1.4591, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.6717321276664734, + "learning_rate": 0.0009526891584760071, + "loss": 1.4509, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.6088680624961853, + "learning_rate": 0.0009502708516015889, + "loss": 1.4638, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.5960768461227417, + "learning_rate": 0.0009478503005944888, + "loss": 1.4554, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.9329041242599487, + "learning_rate": 0.0009454275325781632, + "loss": 1.4684, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.7009010314941406, + "learning_rate": 0.0009430025747009104, + "loss": 1.4517, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.6686537861824036, + "learning_rate": 0.0009405754541355677, + "loss": 1.4571, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.5881796479225159, + "learning_rate": 0.0009381461980792061, + "loss": 1.4479, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.6210187077522278, + "learning_rate": 0.0009357148337528256, + "loss": 1.458, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.7937957048416138, + "learning_rate": 0.0009332813884010511, + "loss": 1.4593, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.5672842860221863, + "learning_rate": 0.0009308458892918259, + "loss": 1.4629, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.6146813035011292, + "learning_rate": 0.0009284083637161064, + "loss": 1.4568, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.7801781296730042, + "learning_rate": 0.0009259688389875574, + "loss": 1.4609, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.7081396579742432, + "learning_rate": 0.0009235273424422442, + "loss": 1.455, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.6945393681526184, + "learning_rate": 0.0009210839014383282, + "loss": 1.4535, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.592399001121521, + "learning_rate": 0.0009186385433557584, + "loss": 1.4466, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.7348710298538208, + "learning_rate": 0.0009161912955959668, + "loss": 1.451, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.5794745683670044, + "learning_rate": 0.000913742185581559, + "loss": 1.4452, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.5987210869789124, + "learning_rate": 0.0009112912407560086, + "loss": 1.4455, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.5650314092636108, + "learning_rate": 0.0009088384885833495, + "loss": 1.4509, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.5825815796852112, + "learning_rate": 0.000906383956547867, + "loss": 1.4448, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.6351372599601746, + "learning_rate": 0.0009039276721537915, + "loss": 1.4458, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5926342606544495, + "learning_rate": 0.0009014696629249886, + "loss": 1.441, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.6246255040168762, + "learning_rate": 0.0008990099564046522, + "loss": 1.4478, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.6055649518966675, + "learning_rate": 0.0008965485801549946, + "loss": 1.4474, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.6279090642929077, + "learning_rate": 0.000894085561756939, + "loss": 1.4378, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.6265918612480164, + "learning_rate": 0.0008916209288098088, + "loss": 1.4481, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.6063783764839172, + "learning_rate": 0.0008891547089310198, + "loss": 1.4428, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.5740835666656494, + "learning_rate": 0.0008866869297557699, + "loss": 1.4424, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.6003664135932922, + "learning_rate": 0.0008842176189367299, + "loss": 1.4453, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.6865047812461853, + "learning_rate": 0.0008817468041437329, + "loss": 1.4511, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.624553918838501, + "learning_rate": 0.0008792745130634654, + "loss": 1.4408, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.5670589208602905, + "learning_rate": 0.0008768007733991561, + "loss": 1.4364, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.532307505607605, + "learning_rate": 0.0008743256128702658, + "loss": 1.4489, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.6228511929512024, + "learning_rate": 0.0008718490592121768, + "loss": 1.4451, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.5872250199317932, + "learning_rate": 0.0008693711401758822, + "loss": 1.4389, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.6121786236763, + "learning_rate": 0.0008668918835276747, + "loss": 1.4386, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.6411699652671814, + "learning_rate": 0.0008644113170488355, + "loss": 1.4453, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.6698163151741028, + "learning_rate": 0.0008619294685353235, + "loss": 1.4354, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.6251499056816101, + "learning_rate": 0.0008594463657974627, + "loss": 1.4358, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.5319044589996338, + "learning_rate": 0.0008569620366596322, + "loss": 1.4526, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.5559026002883911, + "learning_rate": 0.000854476508959953, + "loss": 1.4289, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.6209953427314758, + "learning_rate": 0.0008519898105499762, + "loss": 1.4356, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.6708519458770752, + "learning_rate": 0.0008495019692943721, + "loss": 1.4303, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.6308231949806213, + "learning_rate": 0.0008470130130706166, + "loss": 1.4371, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.5832933187484741, + "learning_rate": 0.0008445229697686795, + "loss": 1.4487, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.5599019527435303, + "learning_rate": 0.0008420318672907119, + "loss": 1.4354, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.6720965504646301, + "learning_rate": 0.0008395397335507334, + "loss": 1.4378, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.6007701754570007, + "learning_rate": 0.0008370465964743196, + "loss": 1.4298, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.6820139288902283, + "learning_rate": 0.0008345524839982886, + "loss": 1.443, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.5810084939002991, + "learning_rate": 0.0008320574240703886, + "loss": 1.4262, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.5858860015869141, + "learning_rate": 0.0008295614446489842, + "loss": 1.435, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.6018589735031128, + "learning_rate": 0.0008270645737027441, + "loss": 1.4417, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.6977600455284119, + "learning_rate": 0.0008245668392103259, + "loss": 1.4292, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.5679542422294617, + "learning_rate": 0.0008220682691600645, + "loss": 1.4289, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.5886189937591553, + "learning_rate": 0.0008195688915496571, + "loss": 1.4323, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.5459170937538147, + "learning_rate": 0.0008170687343858506, + "loss": 1.416, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.8906171917915344, + "learning_rate": 0.0008145678256841265, + "loss": 1.4359, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.6278981566429138, + "learning_rate": 0.0008120661934683879, + "loss": 1.436, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.5852528214454651, + "learning_rate": 0.0008095638657706456, + "loss": 1.4316, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.593532145023346, + "learning_rate": 0.000807060870630703, + "loss": 1.4227, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.6235283613204956, + "learning_rate": 0.000804557236095843, + "loss": 1.4037, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.7002753615379333, + "learning_rate": 0.0008020529902205129, + "loss": 1.4298, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.5629510879516602, + "learning_rate": 0.0007995481610660108, + "loss": 1.4128, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.761268138885498, + "learning_rate": 0.0007970427767001702, + "loss": 1.4323, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.5685955286026001, + "learning_rate": 0.0007945368651970464, + "loss": 1.4347, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.5835021734237671, + "learning_rate": 0.0007920304546366013, + "loss": 1.411, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6290881633758545, + "learning_rate": 0.000789523573104389, + "loss": 1.4296, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6260693669319153, + "learning_rate": 0.0007870162486912414, + "loss": 1.4304, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.6415714621543884, + "learning_rate": 0.0007845085094929527, + "loss": 1.4322, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.6698681116104126, + "learning_rate": 0.0007820003836099649, + "loss": 1.4212, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.634046733379364, + "learning_rate": 0.0007794918991470537, + "loss": 1.4208, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5765496492385864, + "learning_rate": 0.0007769830842130119, + "loss": 1.4169, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.567621648311615, + "learning_rate": 0.0007744739669203361, + "loss": 1.4142, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.7992517352104187, + "learning_rate": 0.0007719645753849108, + "loss": 1.4234, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.5466929078102112, + "learning_rate": 0.0007694549377256932, + "loss": 1.4238, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.6625158786773682, + "learning_rate": 0.0007669450820643987, + "loss": 1.421, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.5856000185012817, + "learning_rate": 0.0007644350365251855, + "loss": 1.4158, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.6091915965080261, + "learning_rate": 0.0007619248292343399, + "loss": 1.4168, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.5884057283401489, + "learning_rate": 0.0007594144883199599, + "loss": 1.434, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.5624389052391052, + "learning_rate": 0.0007569040419116413, + "loss": 1.4182, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.6008890271186829, + "learning_rate": 0.000754393518140162, + "loss": 1.4153, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.5704989433288574, + "learning_rate": 0.0007518829451371665, + "loss": 1.4277, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.5493587851524353, + "learning_rate": 0.0007493723510348516, + "loss": 1.4091, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.6183404326438904, + "learning_rate": 0.0007468617639656496, + "loss": 1.4147, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.5674797296524048, + "learning_rate": 0.0007443512120619144, + "loss": 1.4135, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.5839575529098511, + "learning_rate": 0.0007418407234556067, + "loss": 1.4334, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.6846141219139099, + "learning_rate": 0.0007393303262779767, + "loss": 1.4163, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.6386917233467102, + "learning_rate": 0.0007368200486592507, + "loss": 1.4099, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.6039613485336304, + "learning_rate": 0.0007343099187283149, + "loss": 1.4232, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.6636369824409485, + "learning_rate": 0.0007317999646124011, + "loss": 1.411, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.62837815284729, + "learning_rate": 0.0007292902144367704, + "loss": 1.4029, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.5656679272651672, + "learning_rate": 0.0007267806963243995, + "loss": 1.4235, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.5949025750160217, + "learning_rate": 0.0007242714383956639, + "loss": 1.4148, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.6017699241638184, + "learning_rate": 0.000721762468768024, + "loss": 1.4221, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.6424805521965027, + "learning_rate": 0.0007192538155557094, + "loss": 1.4156, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.6976399421691895, + "learning_rate": 0.0007167455068694046, + "loss": 1.4042, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.6909814476966858, + "learning_rate": 0.000714237570815933, + "loss": 1.4125, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.5892294645309448, + "learning_rate": 0.0007117300354979423, + "loss": 1.4113, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.6162419319152832, + "learning_rate": 0.000709222929013591, + "loss": 1.4198, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.6535078883171082, + "learning_rate": 0.0007067162794562309, + "loss": 1.4016, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.7089105844497681, + "learning_rate": 0.0007042101149140943, + "loss": 1.4072, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.6197832822799683, + "learning_rate": 0.0007017044634699787, + "loss": 1.4011, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.5824099779129028, + "learning_rate": 0.0006991993532009319, + "loss": 1.4044, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.5494335889816284, + "learning_rate": 0.0006966948121779378, + "loss": 1.4155, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.5640929937362671, + "learning_rate": 0.000694190868465601, + "loss": 1.4071, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5955397486686707, + "learning_rate": 0.0006916875501218343, + "loss": 1.4007, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.5995765924453735, + "learning_rate": 0.0006891848851975416, + "loss": 1.3888, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5937900543212891, + "learning_rate": 0.0006866829017363054, + "loss": 1.4133, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.5794184803962708, + "learning_rate": 0.0006841816277740722, + "loss": 1.4021, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.7464228868484497, + "learning_rate": 0.0006816810913388379, + "loss": 1.4146, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.5913816690444946, + "learning_rate": 0.0006791813204503342, + "loss": 1.4018, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.6194383502006531, + "learning_rate": 0.0006766823431197147, + "loss": 1.4152, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.5733729004859924, + "learning_rate": 0.0006741841873492406, + "loss": 1.4072, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.5485455989837646, + "learning_rate": 0.0006716868811319671, + "loss": 1.3896, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.6225220561027527, + "learning_rate": 0.0006691904524514297, + "loss": 1.4046, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.6376716494560242, + "learning_rate": 0.0006666949292813306, + "loss": 1.4011, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5947856903076172, + "learning_rate": 0.0006642003395852258, + "loss": 1.3964, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.5466342568397522, + "learning_rate": 0.0006617067113162103, + "loss": 1.4148, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.6072582006454468, + "learning_rate": 0.0006592140724166073, + "loss": 1.4095, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.6281409859657288, + "learning_rate": 0.0006567224508176523, + "loss": 1.4042, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5820067524909973, + "learning_rate": 0.0006542318744391821, + "loss": 1.4039, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.6584967970848083, + "learning_rate": 0.0006517423711893209, + "loss": 1.4104, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.6093493103981018, + "learning_rate": 0.0006492539689641685, + "loss": 1.3949, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.5570839643478394, + "learning_rate": 0.0006467666956474865, + "loss": 1.406, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.6009849309921265, + "learning_rate": 0.0006442805791103873, + "loss": 1.3907, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.608741819858551, + "learning_rate": 0.0006417956472110205, + "loss": 1.4008, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.6685856580734253, + "learning_rate": 0.0006393119277942614, + "loss": 1.3941, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6943889260292053, + "learning_rate": 0.0006368294486913987, + "loss": 1.4018, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.5557668209075928, + "learning_rate": 0.0006343482377198232, + "loss": 1.3931, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.6426723599433899, + "learning_rate": 0.0006318683226827151, + "loss": 1.3992, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.6642425656318665, + "learning_rate": 0.0006293897313687331, + "loss": 1.406, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.6035251617431641, + "learning_rate": 0.0006269124915517037, + "loss": 1.3955, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.6521209478378296, + "learning_rate": 0.0006244366309903084, + "loss": 1.3976, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.5901432037353516, + "learning_rate": 0.0006219621774277737, + "loss": 1.3998, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.6010416150093079, + "learning_rate": 0.00061948915859156, + "loss": 1.3899, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.5760391354560852, + "learning_rate": 0.0006170176021930509, + "loss": 1.3878, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.5761234760284424, + "learning_rate": 0.0006145475359272424, + "loss": 1.4008, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.621872067451477, + "learning_rate": 0.0006120789874724336, + "loss": 1.391, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.5928296446800232, + "learning_rate": 0.0006096119844899151, + "loss": 1.3867, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.5362096428871155, + "learning_rate": 0.0006071465546236601, + "loss": 1.3942, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.7099489569664001, + "learning_rate": 0.0006046827255000135, + "loss": 1.3924, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.6403958797454834, + "learning_rate": 0.0006022205247273845, + "loss": 1.387, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.6576436161994934, + "learning_rate": 0.0005997599798959343, + "loss": 1.3886, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.6673580408096313, + "learning_rate": 0.0005973011185772694, + "loss": 1.3886, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.6391315460205078, + "learning_rate": 0.0005948439683241318, + "loss": 1.3906, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.6208692193031311, + "learning_rate": 0.0005923885566700896, + "loss": 1.3852, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.5703340172767639, + "learning_rate": 0.0005899349111292293, + "loss": 1.3806, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.6887893080711365, + "learning_rate": 0.0005874830591958474, + "loss": 1.379, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.6070958375930786, + "learning_rate": 0.000585033028344142, + "loss": 1.3867, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.6386216878890991, + "learning_rate": 0.0005825848460279048, + "loss": 1.3898, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.5764682292938232, + "learning_rate": 0.0005801385396802146, + "loss": 1.3868, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.6286607384681702, + "learning_rate": 0.0005776941367131282, + "loss": 1.39, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.7298633456230164, + "learning_rate": 0.0005752516645173745, + "loss": 1.3933, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.6224152445793152, + "learning_rate": 0.0005728111504620472, + "loss": 1.3836, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.6824096441268921, + "learning_rate": 0.0005703726218942976, + "loss": 1.39, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.6303598284721375, + "learning_rate": 0.0005679361061390295, + "loss": 1.3832, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.6258459091186523, + "learning_rate": 0.0005655016304985908, + "loss": 1.3771, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.624071478843689, + "learning_rate": 0.0005630692222524709, + "loss": 1.3768, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.6074104905128479, + "learning_rate": 0.0005606389086569911, + "loss": 1.3922, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.6143749952316284, + "learning_rate": 0.0005582107169450023, + "loss": 1.3971, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.5675649046897888, + "learning_rate": 0.0005557846743255783, + "loss": 1.3807, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.6075764894485474, + "learning_rate": 0.0005533608079837109, + "loss": 1.3801, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.5620693564414978, + "learning_rate": 0.0005509391450800061, + "loss": 1.3836, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.6026065945625305, + "learning_rate": 0.0005485197127503795, + "loss": 1.3775, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.5391370058059692, + "learning_rate": 0.0005461025381057516, + "loss": 1.38, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.6115685105323792, + "learning_rate": 0.0005436876482317444, + "loss": 1.3923, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.6195604801177979, + "learning_rate": 0.0005412750701883782, + "loss": 1.3855, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.712783932685852, + "learning_rate": 0.0005388648310097682, + "loss": 1.3931, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.646199107170105, + "learning_rate": 0.000536456957703821, + "loss": 1.3868, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.6280226111412048, + "learning_rate": 0.0005340514772519324, + "loss": 1.3831, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6073784232139587, + "learning_rate": 0.0005316484166086863, + "loss": 1.3886, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.7679116725921631, + "learning_rate": 0.00052924780270155, + "loss": 1.3895, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.650935709476471, + "learning_rate": 0.0005268496624305747, + "loss": 1.3706, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.5862375497817993, + "learning_rate": 0.0005244540226680931, + "loss": 1.3714, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.5742993950843811, + "learning_rate": 0.0005220609102584185, + "loss": 1.3776, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.6022215485572815, + "learning_rate": 0.0005196703520175437, + "loss": 1.3809, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.6646663546562195, + "learning_rate": 0.0005172823747328415, + "loss": 1.3797, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.6221502423286438, + "learning_rate": 0.0005148970051627632, + "loss": 1.376, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.5920573472976685, + "learning_rate": 0.0005125142700365394, + "loss": 1.3514, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.568248987197876, + "learning_rate": 0.000510134196053881, + "loss": 1.3623, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.7080852389335632, + "learning_rate": 0.0005077568098846789, + "loss": 1.3649, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.6030312776565552, + "learning_rate": 0.000505382138168706, + "loss": 1.3724, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.6298080086708069, + "learning_rate": 0.0005030102075153181, + "loss": 1.3706, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.5907483100891113, + "learning_rate": 0.0005006410445031569, + "loss": 1.3716, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.5847482085227966, + "learning_rate": 0.0004982746756798507, + "loss": 1.3769, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.594397783279419, + "learning_rate": 0.0004959111275617174, + "loss": 1.3906, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.5636789202690125, + "learning_rate": 0.0004935504266334677, + "loss": 1.3719, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.5728419423103333, + "learning_rate": 0.0004911925993479085, + "loss": 1.3835, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.5640576481819153, + "learning_rate": 0.0004888376721256456, + "loss": 1.3857, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.5926122069358826, + "learning_rate": 0.00048648567135478805, + "loss": 1.3749, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5798872113227844, + "learning_rate": 0.0004841366233906538, + "loss": 1.367, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.5878182053565979, + "learning_rate": 0.0004817905545554717, + "loss": 1.3781, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.6136149168014526, + "learning_rate": 0.00047944749113808884, + "loss": 1.3691, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.5998097658157349, + "learning_rate": 0.00047710745939367474, + "loss": 1.3703, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.6143873929977417, + "learning_rate": 0.0004747704855434278, + "loss": 1.364, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.5997321605682373, + "learning_rate": 0.0004724365957742809, + "loss": 1.3594, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.5753656029701233, + "learning_rate": 0.00047010581623860883, + "loss": 1.3664, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.5773864984512329, + "learning_rate": 0.0004677781730539342, + "loss": 1.3738, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.6707257032394409, + "learning_rate": 0.0004654536923026356, + "loss": 1.3618, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.582830548286438, + "learning_rate": 0.00046313240003165466, + "loss": 1.3699, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.6294563412666321, + "learning_rate": 0.0004608143222522048, + "loss": 1.3691, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.6379166841506958, + "learning_rate": 0.0004584994849394795, + "loss": 1.3532, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.6422489285469055, + "learning_rate": 0.0004561879140323607, + "loss": 1.3737, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.6311469078063965, + "learning_rate": 0.0004538796354331298, + "loss": 1.3775, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.6179303526878357, + "learning_rate": 0.0004515746750071754, + "loss": 1.3698, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.6508504748344421, + "learning_rate": 0.0004492730585827046, + "loss": 1.3676, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.6842401623725891, + "learning_rate": 0.0004469748119504529, + "loss": 1.3489, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.5755738615989685, + "learning_rate": 0.0004446799608633964, + "loss": 1.3692, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.6145057082176208, + "learning_rate": 0.00044238853103646154, + "loss": 1.3533, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.6916087865829468, + "learning_rate": 0.00044010054814623925, + "loss": 1.3713, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.6032170653343201, + "learning_rate": 0.0004378160378306944, + "loss": 1.3623, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.5934379696846008, + "learning_rate": 0.00043553502568888095, + "loss": 1.3609, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.6031732559204102, + "learning_rate": 0.0004332575372806534, + "loss": 1.3601, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6373910903930664, + "learning_rate": 0.00043098359812638145, + "loss": 1.3563, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.70721435546875, + "learning_rate": 0.00042871323370666383, + "loss": 1.3659, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.596162736415863, + "learning_rate": 0.0004264464694620421, + "loss": 1.3631, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.5988572239875793, + "learning_rate": 0.000424183330792717, + "loss": 1.3603, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.8230482935905457, + "learning_rate": 0.0004219238430582621, + "loss": 1.3602, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.6497411131858826, + "learning_rate": 0.0004196680315773408, + "loss": 1.3706, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.5734637379646301, + "learning_rate": 0.00041741592162742214, + "loss": 1.3641, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.5776511430740356, + "learning_rate": 0.0004151675384444978, + "loss": 1.3417, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.6540325284004211, + "learning_rate": 0.00041292290722279914, + "loss": 1.3655, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.6068879961967468, + "learning_rate": 0.00041068205311451517, + "loss": 1.3647, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.5669481754302979, + "learning_rate": 0.00040844500122951026, + "loss": 1.3597, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.6072540283203125, + "learning_rate": 0.00040621177663504313, + "loss": 1.3569, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.6753110885620117, + "learning_rate": 0.00040398240435548583, + "loss": 1.3658, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.5967310667037964, + "learning_rate": 0.00040175690937204324, + "loss": 1.3537, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.6347169280052185, + "learning_rate": 0.00039953531662247343, + "loss": 1.3561, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.649140477180481, + "learning_rate": 0.0003973176510008075, + "loss": 1.3656, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.6360961198806763, + "learning_rate": 0.00039532513017512694, + "loss": 1.3571, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.5700550675392151, + "learning_rate": 0.00039311499452170665, + "loss": 1.3655, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.6357924342155457, + "learning_rate": 0.0003909088579390347, + "loss": 1.3649, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.7913262248039246, + "learning_rate": 0.00038870674514794877, + "loss": 1.3586, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.6092245578765869, + "learning_rate": 0.0003865086808241979, + "loss": 1.3519, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.5676957368850708, + "learning_rate": 0.0003843146895981661, + "loss": 1.3542, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5927744507789612, + "learning_rate": 0.00038212479605459617, + "loss": 1.3499, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.6527295112609863, + "learning_rate": 0.000379939024732315, + "loss": 1.3565, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6072148680686951, + "learning_rate": 0.0003777574001239573, + "loss": 1.3495, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.5665411949157715, + "learning_rate": 0.00037557994667569217, + "loss": 1.3561, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.5967687964439392, + "learning_rate": 0.0003734066887869485, + "loss": 1.3553, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.6137552857398987, + "learning_rate": 0.0003712376508101424, + "loss": 1.3369, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.5536494851112366, + "learning_rate": 0.0003690728570504032, + "loss": 1.3407, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.7315941452980042, + "learning_rate": 0.00036691233176530197, + "loss": 1.3561, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.6040874123573303, + "learning_rate": 0.00036475609916457996, + "loss": 1.3568, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.6501857042312622, + "learning_rate": 0.000362604183409876, + "loss": 1.3529, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.5707489252090454, + "learning_rate": 0.00036045660861445684, + "loss": 1.3439, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.6650882363319397, + "learning_rate": 0.0003583133988429468, + "loss": 1.352, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.5824476480484009, + "learning_rate": 0.0003561745781110579, + "loss": 1.3384, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.6014812588691711, + "learning_rate": 0.00035404017038532045, + "loss": 1.3415, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.7181161642074585, + "learning_rate": 0.00035191019958281575, + "loss": 1.353, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.6522383689880371, + "learning_rate": 0.00034978468957090635, + "loss": 1.3484, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.6620539426803589, + "learning_rate": 0.0003476636641669699, + "loss": 1.3564, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.63149493932724, + "learning_rate": 0.0003455471471381318, + "loss": 1.3504, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.5890700221061707, + "learning_rate": 0.0003434351622009985, + "loss": 1.3648, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.6058970093727112, + "learning_rate": 0.0003413277330213928, + "loss": 1.3645, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.5623061060905457, + "learning_rate": 0.0003392248832140876, + "loss": 1.3491, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.5655862092971802, + "learning_rate": 0.00033712663634254163, + "loss": 1.342, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.6322851777076721, + "learning_rate": 0.00033503301591863586, + "loss": 1.3515, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.8220563530921936, + "learning_rate": 0.0003329440454024092, + "loss": 1.3454, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.6130277514457703, + "learning_rate": 0.0003308597482017965, + "loss": 1.3475, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.6431471109390259, + "learning_rate": 0.0003287801476723656, + "loss": 1.3436, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.6429498195648193, + "learning_rate": 0.00032670526711705536, + "loss": 1.3417, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.5852764248847961, + "learning_rate": 0.0003246351297859164, + "loss": 1.3476, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.5901224613189697, + "learning_rate": 0.00032256975887584783, + "loss": 1.3528, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.7467357516288757, + "learning_rate": 0.00032050917753033935, + "loss": 1.3369, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.5989060401916504, + "learning_rate": 0.000318453408839211, + "loss": 1.3487, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.684201180934906, + "learning_rate": 0.0003164024758383548, + "loss": 1.34, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.6392472982406616, + "learning_rate": 0.00031435640150947645, + "loss": 1.3443, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.7154044508934021, + "learning_rate": 0.0003123152087798376, + "loss": 1.3395, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.6322211623191833, + "learning_rate": 0.00031027892052200003, + "loss": 1.3409, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.5588740706443787, + "learning_rate": 0.0003082475595535677, + "loss": 1.3421, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.5747382640838623, + "learning_rate": 0.00030622114863693205, + "loss": 1.3429, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.5672285556793213, + "learning_rate": 0.00030419971047901704, + "loss": 1.3361, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.5690937042236328, + "learning_rate": 0.00030218326773102407, + "loss": 1.3402, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.5620949268341064, + "learning_rate": 0.00030017184298817873, + "loss": 1.3399, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.5401585102081299, + "learning_rate": 0.00029816545878947763, + "loss": 1.3314, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.6108183264732361, + "learning_rate": 0.00029616413761743537, + "loss": 1.3394, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.6697292327880859, + "learning_rate": 0.00029416790189783286, + "loss": 1.3438, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.5676804780960083, + "learning_rate": 0.000292176773999466, + "loss": 1.345, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.6287891864776611, + "learning_rate": 0.0002901907762338952, + "loss": 1.3413, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.5678309202194214, + "learning_rate": 0.0002882099308551951, + "loss": 1.3419, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5844984650611877, + "learning_rate": 0.00028623426005970517, + "loss": 1.341, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.6462956666946411, + "learning_rate": 0.00028426378598578187, + "loss": 1.3458, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.5956695079803467, + "learning_rate": 0.0002822985307135491, + "loss": 1.3319, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.5724600553512573, + "learning_rate": 0.0002803385162646518, + "loss": 1.3364, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5979747772216797, + "learning_rate": 0.0002783837646020089, + "loss": 1.3384, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6499559879302979, + "learning_rate": 0.0002764342976295673, + "loss": 1.3349, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.5975880026817322, + "learning_rate": 0.00027449013719205623, + "loss": 1.3277, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.5659971833229065, + "learning_rate": 0.00027255130507474276, + "loss": 1.3343, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.6004643440246582, + "learning_rate": 0.00027061782300318726, + "loss": 1.3302, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.6204254031181335, + "learning_rate": 0.0002686897126430009, + "loss": 1.3287, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5833061933517456, + "learning_rate": 0.00026676699559960145, + "loss": 1.3357, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.5982642769813538, + "learning_rate": 0.00026484969341797224, + "loss": 1.3265, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.67094886302948, + "learning_rate": 0.0002629378275824204, + "loss": 1.3292, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.6300591230392456, + "learning_rate": 0.00026103141951633617, + "loss": 1.3508, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.5601769685745239, + "learning_rate": 0.00025913049058195277, + "loss": 1.3316, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.5781593918800354, + "learning_rate": 0.0002572350620801072, + "loss": 1.3374, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.5719791054725647, + "learning_rate": 0.0002553451552500012, + "loss": 1.3409, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.630037784576416, + "learning_rate": 0.0002534607912689637, + "loss": 1.3375, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.6389343738555908, + "learning_rate": 0.00025158199125221325, + "loss": 1.3303, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.6358131170272827, + "learning_rate": 0.0002497087762526211, + "loss": 1.3241, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.5579487681388855, + "learning_rate": 0.0002478411672604766, + "loss": 1.3294, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.6271383166313171, + "learning_rate": 0.00024597918520324994, + "loss": 1.3316, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.6242262125015259, + "learning_rate": 0.00024412285094535952, + "loss": 1.3405, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.5699384808540344, + "learning_rate": 0.00024227218528793696, + "loss": 1.3086, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.5961085557937622, + "learning_rate": 0.00024042720896859471, + "loss": 1.3262, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.5666136145591736, + "learning_rate": 0.00023858794266119323, + "loss": 1.3315, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.6562379598617554, + "learning_rate": 0.00023675440697560943, + "loss": 1.3367, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.5832165479660034, + "learning_rate": 0.0002349266224575063, + "loss": 1.3239, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.592017650604248, + "learning_rate": 0.0002331046095881017, + "loss": 1.3293, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.6305391788482666, + "learning_rate": 0.00023128838878393946, + "loss": 1.3348, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.5972729325294495, + "learning_rate": 0.00022947798039666051, + "loss": 1.3313, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.5541612505912781, + "learning_rate": 0.00022767340471277492, + "loss": 1.3316, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.5696297883987427, + "learning_rate": 0.00022587468195343436, + "loss": 1.3271, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.6019385457038879, + "learning_rate": 0.00022408183227420528, + "loss": 1.3369, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.6632696390151978, + "learning_rate": 0.0002222948757648443, + "loss": 1.3309, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.6223411560058594, + "learning_rate": 0.00022051383244907143, + "loss": 1.3382, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.605879545211792, + "learning_rate": 0.0002187387222843467, + "loss": 1.3237, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.646635890007019, + "learning_rate": 0.0002169695651616463, + "loss": 1.3201, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.5785254836082458, + "learning_rate": 0.00021520638090523955, + "loss": 1.3299, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.5867812037467957, + "learning_rate": 0.00021344918927246678, + "loss": 1.3357, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.6338456273078918, + "learning_rate": 0.00021169800995351874, + "loss": 1.3147, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.6108083724975586, + "learning_rate": 0.00020995286257121453, + "loss": 1.3179, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.6565657258033752, + "learning_rate": 0.00020821376668078264, + "loss": 1.323, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.7382002472877502, + "learning_rate": 0.00020648074176964182, + "loss": 1.3284, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.5952702164649963, + "learning_rate": 0.00020475380725718228, + "loss": 1.3419, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.6728077530860901, + "learning_rate": 0.00020303298249454857, + "loss": 1.3355, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.5764117240905762, + "learning_rate": 0.00020131828676442237, + "loss": 1.3349, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.6966121792793274, + "learning_rate": 0.00019960973928080666, + "loss": 1.3272, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.6339370608329773, + "learning_rate": 0.0001979073591888101, + "loss": 1.3255, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.6080300211906433, + "learning_rate": 0.000196211165564433, + "loss": 1.323, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.6445187330245972, + "learning_rate": 0.00019452117741435314, + "loss": 1.3264, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.6663262844085693, + "learning_rate": 0.00019283741367571294, + "loss": 1.3348, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.6093352437019348, + "learning_rate": 0.00019115989321590694, + "loss": 1.3416, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.5845469236373901, + "learning_rate": 0.00018948863483237154, + "loss": 1.3225, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.6263412833213806, + "learning_rate": 0.00018782365725237272, + "loss": 1.3158, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.620195746421814, + "learning_rate": 0.00018616497913279728, + "loss": 1.3222, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.575092613697052, + "learning_rate": 0.0001845126190599434, + "loss": 1.3201, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.5514320731163025, + "learning_rate": 0.00018286659554931254, + "loss": 1.3174, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.5683166980743408, + "learning_rate": 0.00018122692704540194, + "loss": 1.324, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.645595908164978, + "learning_rate": 0.00017959363192149752, + "loss": 1.3245, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.5672932267189026, + "learning_rate": 0.00017796672847946905, + "loss": 1.314, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.5594964027404785, + "learning_rate": 0.0001763462349495639, + "loss": 1.3221, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.5857065916061401, + "learning_rate": 0.00017473216949020326, + "loss": 1.3293, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.5674099326133728, + "learning_rate": 0.0001731245501877787, + "loss": 1.313, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.6285955309867859, + "learning_rate": 0.00017152339505644963, + "loss": 1.3163, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.5605405569076538, + "learning_rate": 0.0001699287220379407, + "loss": 1.3034, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.594635546207428, + "learning_rate": 0.00016834054900134228, + "loss": 1.3073, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.5646359324455261, + "learning_rate": 0.00016675889374290852, + "loss": 1.3104, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.6040270924568176, + "learning_rate": 0.0001651837739858589, + "loss": 1.3199, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.6090522408485413, + "learning_rate": 0.00016361520738017934, + "loss": 1.3121, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.5741193294525146, + "learning_rate": 0.00016205321150242454, + "loss": 1.3192, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.5636429190635681, + "learning_rate": 0.00016049780385552113, + "loss": 1.326, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.6021049618721008, + "learning_rate": 0.00015894900186857105, + "loss": 1.3206, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.592934250831604, + "learning_rate": 0.00015740682289665714, + "loss": 1.3272, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.5478881597518921, + "learning_rate": 0.0001558712842206477, + "loss": 1.3118, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.5807749032974243, + "learning_rate": 0.00015434240304700332, + "loss": 1.3145, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.6597018837928772, + "learning_rate": 0.0001528201965075841, + "loss": 1.3103, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.619184672832489, + "learning_rate": 0.0001513046816594575, + "loss": 1.3109, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5903917551040649, + "learning_rate": 0.0001497958754847076, + "loss": 1.309, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.5717349648475647, + "learning_rate": 0.00014829379489024415, + "loss": 1.3202, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.6212629675865173, + "learning_rate": 0.0001467984567076137, + "loss": 1.3181, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.6061719655990601, + "learning_rate": 0.00014530987769281075, + "loss": 1.3103, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.5794782638549805, + "learning_rate": 0.00014382807452609003, + "loss": 1.312, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.6121061444282532, + "learning_rate": 0.00014235306381177952, + "loss": 1.3245, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.5613883137702942, + "learning_rate": 0.00014088486207809449, + "loss": 1.3012, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.5766070485115051, + "learning_rate": 0.0001394234857769521, + "loss": 1.3236, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.6340107321739197, + "learning_rate": 0.0001379689512837878, + "loss": 1.3139, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.5758427977561951, + "learning_rate": 0.00013652127489737067, + "loss": 1.3295, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.5671823024749756, + "learning_rate": 0.00013508047283962137, + "loss": 1.3069, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.5820562243461609, + "learning_rate": 0.00013364656125543044, + "loss": 1.3152, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.6163503527641296, + "learning_rate": 0.00013221955621247749, + "loss": 1.3282, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.6215647459030151, + "learning_rate": 0.00013079947370105057, + "loss": 1.3014, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.5766898989677429, + "learning_rate": 0.00012938632963386808, + "loss": 1.3108, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.5947498679161072, + "learning_rate": 0.00012798013984589894, + "loss": 1.3093, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.5990689992904663, + "learning_rate": 0.00012658092009418652, + "loss": 1.3125, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.6105595231056213, + "learning_rate": 0.00012518868605767118, + "loss": 1.3186, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.5685436725616455, + "learning_rate": 0.0001238034533370153, + "loss": 1.3177, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.598069965839386, + "learning_rate": 0.0001224252374544278, + "loss": 1.3059, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.6107468008995056, + "learning_rate": 0.00012105405385349047, + "loss": 1.3038, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.6076748967170715, + "learning_rate": 0.00011968991789898533, + "loss": 1.3119, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.6020756959915161, + "learning_rate": 0.00011833284487672185, + "loss": 1.3058, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.5894330739974976, + "learning_rate": 0.00011698284999336578, + "loss": 1.3231, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.6068540215492249, + "learning_rate": 0.00011563994837626898, + "loss": 1.3127, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.5670442581176758, + "learning_rate": 0.00011430415507329975, + "loss": 1.3107, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.57861328125, + "learning_rate": 0.00011297548505267424, + "loss": 1.3182, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.5925154089927673, + "learning_rate": 0.00011165395320278898, + "loss": 1.3072, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.5760761499404907, + "learning_rate": 0.00011033957433205364, + "loss": 1.2944, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.5584725737571716, + "learning_rate": 0.00010903236316872514, + "loss": 1.3038, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.5495910048484802, + "learning_rate": 0.00010773233436074287, + "loss": 1.292, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.6482455730438232, + "learning_rate": 0.00010643950247556447, + "loss": 1.3041, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.5980952978134155, + "learning_rate": 0.00010515388200000245, + "loss": 1.3239, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.5795890092849731, + "learning_rate": 0.00010387548734006195, + "loss": 1.321, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5872337222099304, + "learning_rate": 0.00010260433282077944, + "loss": 1.3005, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.5804054141044617, + "learning_rate": 0.00010134043268606191, + "loss": 1.3111, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.6046608686447144, + "learning_rate": 0.00010008380109852752, + "loss": 1.3096, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.5552932620048523, + "learning_rate": 9.883445213934675e-05, + "loss": 1.3181, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.586566686630249, + "learning_rate": 9.759239980808494e-05, + "loss": 1.3117, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.603042483329773, + "learning_rate": 9.635765802254482e-05, + "loss": 1.3141, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.5557070970535278, + "learning_rate": 9.5130240618611e-05, + "loss": 1.3013, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.5837118029594421, + "learning_rate": 9.391016135009484e-05, + "loss": 1.3, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.5799198746681213, + "learning_rate": 9.269743388858019e-05, + "loss": 1.2987, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.659946858882904, + "learning_rate": 9.149207182327054e-05, + "loss": 1.3105, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.5449208617210388, + "learning_rate": 9.029408866083638e-05, + "loss": 1.306, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.6184920072555542, + "learning_rate": 8.910349782526394e-05, + "loss": 1.3137, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.5873808860778809, + "learning_rate": 8.792031265770475e-05, + "loss": 1.3061, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.6238742470741272, + "learning_rate": 8.67445464163267e-05, + "loss": 1.304, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.5751802325248718, + "learning_rate": 8.557621227616444e-05, + "loss": 1.3167, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.590232253074646, + "learning_rate": 8.441532332897248e-05, + "loss": 1.2975, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.5637131333351135, + "learning_rate": 8.326189258307832e-05, + "loss": 1.3049, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.5756136775016785, + "learning_rate": 8.211593296323672e-05, + "loss": 1.3102, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.5913081169128418, + "learning_rate": 8.097745731048475e-05, + "loss": 1.3109, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.5604097843170166, + "learning_rate": 7.984647838199773e-05, + "loss": 1.3015, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.5691359639167786, + "learning_rate": 7.872300885094736e-05, + "loss": 1.2939, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.5882915258407593, + "learning_rate": 7.760706130635792e-05, + "loss": 1.3123, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.5634912252426147, + "learning_rate": 7.649864825296669e-05, + "loss": 1.3017, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.6093071103096008, + "learning_rate": 7.539778211108309e-05, + "loss": 1.3019, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.5688495635986328, + "learning_rate": 7.430447521644973e-05, + "loss": 1.3007, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.5709282159805298, + "learning_rate": 7.321873982010422e-05, + "loss": 1.2975, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.5879061818122864, + "learning_rate": 7.214058808824192e-05, + "loss": 1.3082, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.5791057348251343, + "learning_rate": 7.107003210207947e-05, + "loss": 1.3013, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.5582627058029175, + "learning_rate": 7.000708385771928e-05, + "loss": 1.3089, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.640566349029541, + "learning_rate": 6.89517552660156e-05, + "loss": 1.2999, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.5824441909790039, + "learning_rate": 6.790405815244044e-05, + "loss": 1.3121, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.5699599385261536, + "learning_rate": 6.686400425695171e-05, + "loss": 1.3043, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.5442389249801636, + "learning_rate": 6.583160523386086e-05, + "loss": 1.3104, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.5581492185592651, + "learning_rate": 6.480687265170342e-05, + "loss": 1.3083, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.5486487746238708, + "learning_rate": 6.37898179931081e-05, + "loss": 1.3045, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.5841355323791504, + "learning_rate": 6.278045265466911e-05, + "loss": 1.3185, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.5917774438858032, + "learning_rate": 6.177878794681782e-05, + "loss": 1.3088, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.5629198551177979, + "learning_rate": 6.078483509369642e-05, + "loss": 1.3047, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.5627507567405701, + "learning_rate": 5.9798605233031904e-05, + "loss": 1.29, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.5794485807418823, + "learning_rate": 5.8820109416011485e-05, + "loss": 1.2896, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.5691803097724915, + "learning_rate": 5.784935860715862e-05, + "loss": 1.3153, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.5831617712974548, + "learning_rate": 5.6886363684210016e-05, + "loss": 1.2924, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.5539091229438782, + "learning_rate": 5.5931135437993994e-05, + "loss": 1.3122, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.543411135673523, + "learning_rate": 5.498368457230965e-05, + "loss": 1.3069, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.5651594996452332, + "learning_rate": 5.4044021703806375e-05, + "loss": 1.292, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.5593839287757874, + "learning_rate": 5.311215736186536e-05, + "loss": 1.2971, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.5657485127449036, + "learning_rate": 5.21881019884819e-05, + "loss": 1.2937, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.5551851987838745, + "learning_rate": 5.127186593814748e-05, + "loss": 1.3017, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.5931308269500732, + "learning_rate": 5.0363459477734464e-05, + "loss": 1.3101, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.6110640168190002, + "learning_rate": 4.946289278638064e-05, + "loss": 1.2983, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.565481424331665, + "learning_rate": 4.8570175955375715e-05, + "loss": 1.2971, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.5643355846405029, + "learning_rate": 4.768531898804754e-05, + "loss": 1.3176, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.5691997408866882, + "learning_rate": 4.680833179965063e-05, + "loss": 1.3048, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.5752100944519043, + "learning_rate": 4.5939224217254574e-05, + "loss": 1.3031, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.5415009260177612, + "learning_rate": 4.507800597963424e-05, + "loss": 1.2942, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.6092227101325989, + "learning_rate": 4.422468673716054e-05, + "loss": 1.3031, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.5794483423233032, + "learning_rate": 4.337927605169212e-05, + "loss": 1.2991, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.5633105039596558, + "learning_rate": 4.2541783396468584e-05, + "loss": 1.3013, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.5504446029663086, + "learning_rate": 4.1712218156004014e-05, + "loss": 1.2974, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.5712250471115112, + "learning_rate": 4.089058962598213e-05, + "loss": 1.301, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.5549702048301697, + "learning_rate": 4.0076907013151726e-05, + "loss": 1.2914, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.5792744755744934, + "learning_rate": 3.927117943522379e-05, + "loss": 1.3026, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.558269202709198, + "learning_rate": 3.8473415920769304e-05, + "loss": 1.298, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.5533990263938904, + "learning_rate": 3.768362540911788e-05, + "loss": 1.3106, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.5806937217712402, + "learning_rate": 3.690181675025775e-05, + "loss": 1.3088, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.5757838487625122, + "learning_rate": 3.612799870473696e-05, + "loss": 1.2995, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.579710841178894, + "learning_rate": 3.5362179943564496e-05, + "loss": 1.2905, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.5804997682571411, + "learning_rate": 3.46043690481134e-05, + "loss": 1.3084, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.5598936676979065, + "learning_rate": 3.38545745100248e-05, + "loss": 1.3, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.5837939977645874, + "learning_rate": 3.31128047311127e-05, + "loss": 1.3063, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.5428255796432495, + "learning_rate": 3.237906802326951e-05, + "loss": 1.2985, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.5702002644538879, + "learning_rate": 3.165337260837351e-05, + "loss": 1.2977, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.585951030254364, + "learning_rate": 3.093572661819602e-05, + "loss": 1.2961, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.5561054348945618, + "learning_rate": 3.022613809431088e-05, + "loss": 1.3064, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.6306205987930298, + "learning_rate": 2.952461498800388e-05, + "loss": 1.2857, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.5532601475715637, + "learning_rate": 2.8831165160184024e-05, + "loss": 1.2907, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5695055723190308, + "learning_rate": 2.8145796381295276e-05, + "loss": 1.2924, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.5833194851875305, + "learning_rate": 2.7468516331229432e-05, + "loss": 1.2994, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.5433433055877686, + "learning_rate": 2.6799332599239974e-05, + "loss": 1.3074, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.5729235410690308, + "learning_rate": 2.6138252683857693e-05, + "loss": 1.3018, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.5436496734619141, + "learning_rate": 2.5485283992805615e-05, + "loss": 1.3017, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.5690861940383911, + "learning_rate": 2.4840433842916872e-05, + "loss": 1.2991, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.5662671327590942, + "learning_rate": 2.4203709460052292e-05, + "loss": 1.2918, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.5606457591056824, + "learning_rate": 2.357511797901929e-05, + "loss": 1.3047, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.5727195739746094, + "learning_rate": 2.2954666443492505e-05, + "loss": 1.2935, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.579071581363678, + "learning_rate": 2.2342361805934297e-05, + "loss": 1.2919, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.5587451457977295, + "learning_rate": 2.173821092751721e-05, + "loss": 1.2806, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.5462833046913147, + "learning_rate": 2.1142220578046712e-05, + "loss": 1.2912, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.56135493516922, + "learning_rate": 2.0554397435885746e-05, + "loss": 1.2976, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.5459587574005127, + "learning_rate": 1.9974748087879636e-05, + "loss": 1.2936, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.5576261281967163, + "learning_rate": 1.9403279029282376e-05, + "loss": 1.2913, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.5497505068778992, + "learning_rate": 1.8839996663683635e-05, + "loss": 1.3108, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.5431159138679504, + "learning_rate": 1.8284907302937608e-05, + "loss": 1.2942, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.5710598230361938, + "learning_rate": 1.773801716709153e-05, + "loss": 1.3122, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.5452271699905396, + "learning_rate": 1.719933238431645e-05, + "loss": 1.2983, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.5779011845588684, + "learning_rate": 1.666885899083831e-05, + "loss": 1.2962, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5630154609680176, + "learning_rate": 1.614660293087056e-05, + "loss": 1.296, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.5631548166275024, + "learning_rate": 1.5632570056547308e-05, + "loss": 1.2989, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.5896672606468201, + "learning_rate": 1.512676612785796e-05, + "loss": 1.3003, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.5557523369789124, + "learning_rate": 1.4629196812582513e-05, + "loss": 1.3026, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.5593646168708801, + "learning_rate": 1.4139867686228102e-05, + "loss": 1.2918, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.5900135636329651, + "learning_rate": 1.3658784231966481e-05, + "loss": 1.2907, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.5584891438484192, + "learning_rate": 1.3185951840572723e-05, + "loss": 1.299, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.5664926767349243, + "learning_rate": 1.2721375810364616e-05, + "loss": 1.2939, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.5543514490127563, + "learning_rate": 1.2265061347143447e-05, + "loss": 1.2962, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.5429769158363342, + "learning_rate": 1.1817013564135475e-05, + "loss": 1.2978, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.5650258660316467, + "learning_rate": 1.137723748193506e-05, + "loss": 1.3011, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5684301257133484, + "learning_rate": 1.0945738028447783e-05, + "loss": 1.2982, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5940498113632202, + "learning_rate": 1.0522520038835831e-05, + "loss": 1.2946, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.5583057999610901, + "learning_rate": 1.0107588255463373e-05, + "loss": 1.2995, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.5749169588088989, + "learning_rate": 9.700947327843685e-06, + "loss": 1.2946, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.5841549634933472, + "learning_rate": 9.302601812586852e-06, + "loss": 1.2889, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.5559630990028381, + "learning_rate": 8.912556173348907e-06, + "loss": 1.292, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.5571407675743103, + "learning_rate": 8.53081478078177e-06, + "loss": 1.3129, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.5615086555480957, + "learning_rate": 8.157381912484053e-06, + "loss": 1.2887, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.5664913654327393, + "learning_rate": 7.792261752953333e-06, + "loss": 1.2984, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.5425529479980469, + "learning_rate": 7.435458393539457e-06, + "loss": 1.2994, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.5745935440063477, + "learning_rate": 7.086975832398146e-06, + "loss": 1.3013, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.5569350123405457, + "learning_rate": 6.746817974446706e-06, + "loss": 1.301, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.5810770988464355, + "learning_rate": 6.414988631320062e-06, + "loss": 1.3014, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.5547894239425659, + "learning_rate": 6.091491521327958e-06, + "loss": 1.2809, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.5757883787155151, + "learning_rate": 5.776330269413488e-06, + "loss": 1.2926, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.553692102432251, + "learning_rate": 5.469508407112467e-06, + "loss": 1.3016, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.5613481998443604, + "learning_rate": 5.171029372513458e-06, + "loss": 1.2878, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.5582309365272522, + "learning_rate": 4.880896510220056e-06, + "loss": 1.3057, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.5564761161804199, + "learning_rate": 4.5991130713124995e-06, + "loss": 1.2942, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.5449462532997131, + "learning_rate": 4.325682213311782e-06, + "loss": 1.3007, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.5774485468864441, + "learning_rate": 4.060607000144351e-06, + "loss": 1.2992, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.5418827533721924, + "learning_rate": 3.803890402107213e-06, + "loss": 1.2967, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.5563338398933411, + "learning_rate": 3.555535295835216e-06, + "loss": 1.2929, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.5867204666137695, + "learning_rate": 3.3155444642687384e-06, + "loss": 1.2974, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.5570861101150513, + "learning_rate": 3.0839205966220474e-06, + "loss": 1.2942, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.5846670269966125, + "learning_rate": 2.8606662883539082e-06, + "loss": 1.2942, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.5587746500968933, + "learning_rate": 2.6457840411376888e-06, + "loss": 1.2852, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.5595304369926453, + "learning_rate": 2.4392762628338838e-06, + "loss": 1.2801, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.5355478525161743, + "learning_rate": 2.2411452674630517e-06, + "loss": 1.292, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.5358405709266663, + "learning_rate": 2.0513932751796695e-06, + "loss": 1.2881, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.5516250729560852, + "learning_rate": 1.8700224122475683e-06, + "loss": 1.2981, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.5519884824752808, + "learning_rate": 1.6970347110157879e-06, + "loss": 1.2866, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.5483550429344177, + "learning_rate": 1.532432109895926e-06, + "loss": 1.2949, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.5939180850982666, + "learning_rate": 1.376216453340573e-06, + "loss": 1.2931, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.5513581037521362, + "learning_rate": 1.2283894918224125e-06, + "loss": 1.2932, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.561181902885437, + "learning_rate": 1.0889528818147366e-06, + "loss": 1.2906, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.5493670701980591, + "learning_rate": 9.579081857728766e-07, + "loss": 1.3019, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.5611178874969482, + "learning_rate": 8.352568721165521e-07, + "loss": 1.2997, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.5325267314910889, + "learning_rate": 7.210003152136324e-07, + "loss": 1.3091, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.5422888994216919, + "learning_rate": 6.151397953647331e-07, + "loss": 1.3011, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.570167064666748, + "learning_rate": 5.176764987885607e-07, + "loss": 1.3017, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.5586701035499573, + "learning_rate": 4.2861151760900665e-07, + "loss": 1.2845, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.5682274699211121, + "learning_rate": 3.479458498426569e-07, + "loss": 1.2889, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.5616357922554016, + "learning_rate": 2.756803993877177e-07, + "loss": 1.3036, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.5785879492759705, + "learning_rate": 2.1181597601385716e-07, + "loss": 1.279, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.5519407987594604, + "learning_rate": 1.5635329535304554e-07, + "loss": 1.2976, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.5431817173957825, + "learning_rate": 1.0929297889172852e-07, + "loss": 1.296, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.55473393201828, + "learning_rate": 7.063555396383259e-08, + "loss": 1.2941, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.5389657616615295, + "learning_rate": 4.038145374460345e-08, + "loss": 1.3043, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.5544410943984985, + "learning_rate": 1.8531017245942972e-08, + "loss": 1.2865, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.5414763689041138, + "learning_rate": 5.084489312745521e-09, + "loss": 1.3005, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.6001386642456055, + "learning_rate": 4.202061990032924e-11, + "loss": 1.2929, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.8335001376290816e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-qwen2-cosine/checkpoint-9480/training_args.bin b/saves-qwen2-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ff450350e5986218f73a4e50ce2ce0244bd11c4 --- /dev/null +++ b/saves-qwen2-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44af0175c017fb00bfdde9dffe54a16ed1f107819fa90e76bcecb24068d6ff04 +size 5176 diff --git a/saves-qwen2-cosine/config.json b/saves-qwen2-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a785d3506e02f39ed9708141afd519ae6edaa6ea --- /dev/null +++ b/saves-qwen2-cosine/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen2", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 2000 +} diff --git a/saves-qwen2-cosine/generation_config.json b/saves-qwen2-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ca31b478470766d67155dda554564aa41d8444b --- /dev/null +++ b/saves-qwen2-cosine/generation_config.json @@ -0,0 +1,4 @@ +{ + "_from_model_config": true, + "transformers_version": "4.42.4" +} diff --git a/saves-qwen2-cosine/model.safetensors b/saves-qwen2-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf66ca0e98f3e825a615ad050eda1f52b048eb31 --- /dev/null +++ b/saves-qwen2-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:828b0a3e09b0a35f4892c520947d7d864a12a61fe79a4d5b1b7ae9ff7df3e75b +size 8351424 diff --git a/saves-qwen2-cosine/result.log b/saves-qwen2-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..ed3dd2cb0c6ef255100021b6997ff6591ed59945 --- /dev/null +++ b/saves-qwen2-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 2194.4682, 'train_samples_per_second': 4423.215, 'train_steps_per_second': 4.32, 'train_loss': 1.5565085962351868, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-qwen2-cosine/special_tokens_map.json b/saves-qwen2-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-qwen2-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-qwen2-cosine/tokenizer.json b/saves-qwen2-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-qwen2-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-qwen2-cosine/tokenizer_config.json b/saves-qwen2-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-qwen2-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-qwen2/checkpoint-9480/config.json b/saves-qwen2/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a785d3506e02f39ed9708141afd519ae6edaa6ea --- /dev/null +++ b/saves-qwen2/checkpoint-9480/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen2", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 2000 +} diff --git a/saves-qwen2/checkpoint-9480/generation_config.json b/saves-qwen2/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ca31b478470766d67155dda554564aa41d8444b --- /dev/null +++ b/saves-qwen2/checkpoint-9480/generation_config.json @@ -0,0 +1,4 @@ +{ + "_from_model_config": true, + "transformers_version": "4.42.4" +} diff --git a/saves-qwen2/checkpoint-9480/model.safetensors b/saves-qwen2/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b8dc8738d5a38d0975e9c9ea3430293d096f5f9c --- /dev/null +++ b/saves-qwen2/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0550995c0bba2a4ac014b832d38ebfcbaf367e0b3c1342b68d8ca1f35a60ea1b +size 8351424 diff --git a/saves-qwen2/checkpoint-9480/optimizer.pt b/saves-qwen2/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ed607be695ed68b1bcc24882dc60c7202eb1d6c --- /dev/null +++ b/saves-qwen2/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76184f96f4ea8c40ba4f5225dac13b367cd79386d97e96dfea3bfee4c349a787 +size 16719504 diff --git a/saves-qwen2/checkpoint-9480/rng_state.pth b/saves-qwen2/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-qwen2/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-qwen2/checkpoint-9480/scheduler.pt b/saves-qwen2/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..75fc58e9b05cc951a82cac092de91cd65804440d --- /dev/null +++ b/saves-qwen2/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0a00f09d701c4f602dd021702c8cfc44bc37c286d3a858d845780a823871eb9 +size 1064 diff --git a/saves-qwen2/checkpoint-9480/special_tokens_map.json b/saves-qwen2/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-qwen2/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-qwen2/checkpoint-9480/tokenizer.json b/saves-qwen2/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-qwen2/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-qwen2/checkpoint-9480/tokenizer_config.json b/saves-qwen2/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-qwen2/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-qwen2/checkpoint-9480/trainer_state.json b/saves-qwen2/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a3eef7a75dcb537672aaaaead389216bffef1e97 --- /dev/null +++ b/saves-qwen2/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2806342840194702, + "learning_rate": 0.00015822784810126583, + "loss": 7.5215, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1760597229003906, + "learning_rate": 0.00031645569620253165, + "loss": 6.9094, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8538024425506592, + "learning_rate": 0.00047468354430379745, + "loss": 6.2502, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.9497146606445312, + "learning_rate": 0.0006329113924050633, + "loss": 5.7609, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 1.1998716592788696, + "learning_rate": 0.0007911392405063291, + "loss": 5.31, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 1.0191789865493774, + "learning_rate": 0.0009493670886075949, + "loss": 4.7995, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.4629141092300415, + "learning_rate": 0.0011075949367088608, + "loss": 4.3866, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 0.9979167580604553, + "learning_rate": 0.0012658227848101266, + "loss": 4.1273, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.1925581693649292, + "learning_rate": 0.0014240506329113926, + "loss": 3.9193, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.8199864625930786, + "learning_rate": 0.0015, + "loss": 3.7846, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.6942883133888245, + "learning_rate": 0.0015, + "loss": 3.632, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.971352756023407, + "learning_rate": 0.0015, + "loss": 3.5183, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.9205856323242188, + "learning_rate": 0.0015, + "loss": 3.4123, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.195121169090271, + "learning_rate": 0.0015, + "loss": 3.3153, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.9240769743919373, + "learning_rate": 0.0015, + "loss": 3.2401, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.9060587882995605, + "learning_rate": 0.0015, + "loss": 3.165, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 1.4201149940490723, + "learning_rate": 0.0015, + "loss": 3.0976, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.7570425271987915, + "learning_rate": 0.0015, + "loss": 3.0547, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.7830666899681091, + "learning_rate": 0.0015, + "loss": 2.9899, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.9701388478279114, + "learning_rate": 0.0015, + "loss": 2.9465, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.8196717500686646, + "learning_rate": 0.0015, + "loss": 2.9101, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.8286615610122681, + "learning_rate": 0.0015, + "loss": 2.8724, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 1.096436858177185, + "learning_rate": 0.0015, + "loss": 2.8215, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.8319011330604553, + "learning_rate": 0.0015, + "loss": 2.7821, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 1.0509744882583618, + "learning_rate": 0.0015, + "loss": 2.7461, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.9317680597305298, + "learning_rate": 0.0015, + "loss": 2.7163, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 1.1785050630569458, + "learning_rate": 0.0015, + "loss": 2.6796, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.9368894100189209, + "learning_rate": 0.0015, + "loss": 2.6475, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 1.3169634342193604, + "learning_rate": 0.0015, + "loss": 2.6201, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.8132220506668091, + "learning_rate": 0.0015, + "loss": 2.598, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.7414838671684265, + "learning_rate": 0.0015, + "loss": 2.567, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 1.1422909498214722, + "learning_rate": 0.0015, + "loss": 2.5433, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 1.0550543069839478, + "learning_rate": 0.0015, + "loss": 2.529, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 1.1679412126541138, + "learning_rate": 0.0015, + "loss": 2.5035, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.7540435194969177, + "learning_rate": 0.0015, + "loss": 2.4617, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 1.3122093677520752, + "learning_rate": 0.0015, + "loss": 2.4557, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.8378611207008362, + "learning_rate": 0.0015, + "loss": 2.4313, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 1.0392382144927979, + "learning_rate": 0.0015, + "loss": 2.4266, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.125244379043579, + "learning_rate": 0.0015, + "loss": 2.4273, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 1.113390564918518, + "learning_rate": 0.0015, + "loss": 2.3835, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8564067482948303, + "learning_rate": 0.0015, + "loss": 2.3699, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.7774996757507324, + "learning_rate": 0.0015, + "loss": 2.348, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.8310631513595581, + "learning_rate": 0.0015, + "loss": 2.3277, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 1.1351619958877563, + "learning_rate": 0.0015, + "loss": 2.3135, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 1.0552862882614136, + "learning_rate": 0.0015, + "loss": 2.3128, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 1.1699281930923462, + "learning_rate": 0.0015, + "loss": 2.2951, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.7804955244064331, + "learning_rate": 0.0015, + "loss": 2.2699, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.7691866755485535, + "learning_rate": 0.0015, + "loss": 2.2613, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.7474718689918518, + "learning_rate": 0.0015, + "loss": 2.237, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 1.5697402954101562, + "learning_rate": 0.0015, + "loss": 2.2442, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 1.3457592725753784, + "learning_rate": 0.0015, + "loss": 2.2274, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 1.226845145225525, + "learning_rate": 0.0015, + "loss": 2.2029, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.950566291809082, + "learning_rate": 0.0015, + "loss": 2.1893, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.8612412810325623, + "learning_rate": 0.0015, + "loss": 2.1691, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.7417976260185242, + "learning_rate": 0.0015, + "loss": 2.1768, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.672450065612793, + "learning_rate": 0.0015, + "loss": 2.1389, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.8100742697715759, + "learning_rate": 0.0015, + "loss": 2.1478, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.0130438804626465, + "learning_rate": 0.0015, + "loss": 2.1491, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.9498407244682312, + "learning_rate": 0.0015, + "loss": 2.1258, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 1.2642830610275269, + "learning_rate": 0.0015, + "loss": 2.1046, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.8031076192855835, + "learning_rate": 0.0015, + "loss": 2.1057, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.695659339427948, + "learning_rate": 0.0015, + "loss": 2.0904, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.9128444194793701, + "learning_rate": 0.0015, + "loss": 2.0747, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.8383111953735352, + "learning_rate": 0.0015, + "loss": 2.0877, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.7684810757637024, + "learning_rate": 0.0015, + "loss": 2.0799, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.7186339497566223, + "learning_rate": 0.0015, + "loss": 2.066, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.8268847465515137, + "learning_rate": 0.0015, + "loss": 2.0449, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.876383364200592, + "learning_rate": 0.0015, + "loss": 2.0385, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.911506712436676, + "learning_rate": 0.0015, + "loss": 2.0492, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.766332745552063, + "learning_rate": 0.0015, + "loss": 2.0314, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.8816922307014465, + "learning_rate": 0.0015, + "loss": 2.0177, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.8093923926353455, + "learning_rate": 0.0015, + "loss": 2.0037, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.8848650455474854, + "learning_rate": 0.0015, + "loss": 2.001, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.7209733724594116, + "learning_rate": 0.0015, + "loss": 2.0055, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.8340552449226379, + "learning_rate": 0.0015, + "loss": 1.9856, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 2.06022572517395, + "learning_rate": 0.0015, + "loss": 1.9857, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 1.4183058738708496, + "learning_rate": 0.0015, + "loss": 1.9881, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.950039803981781, + "learning_rate": 0.0015, + "loss": 1.9714, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.8031538128852844, + "learning_rate": 0.0015, + "loss": 1.9676, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.8551667332649231, + "learning_rate": 0.0015, + "loss": 1.9491, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.9857729077339172, + "learning_rate": 0.0015, + "loss": 1.937, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.6813510060310364, + "learning_rate": 0.0015, + "loss": 1.9439, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.8941097259521484, + "learning_rate": 0.0015, + "loss": 1.9488, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.7286725044250488, + "learning_rate": 0.0015, + "loss": 1.9277, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 1.0762876272201538, + "learning_rate": 0.0015, + "loss": 1.9292, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.7775930166244507, + "learning_rate": 0.0015, + "loss": 1.9282, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.9856833219528198, + "learning_rate": 0.0015, + "loss": 1.9235, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.8709974884986877, + "learning_rate": 0.0015, + "loss": 1.9191, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 1.1382310390472412, + "learning_rate": 0.0015, + "loss": 1.9083, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.7559429407119751, + "learning_rate": 0.0015, + "loss": 1.9057, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.9810446500778198, + "learning_rate": 0.0015, + "loss": 1.9241, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.8326524496078491, + "learning_rate": 0.0015, + "loss": 1.8949, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 1.1991360187530518, + "learning_rate": 0.0015, + "loss": 1.8739, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.7341702580451965, + "learning_rate": 0.0015, + "loss": 1.8885, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.8404623866081238, + "learning_rate": 0.0015, + "loss": 1.8712, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.8448720574378967, + "learning_rate": 0.0015, + "loss": 1.8794, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.7987045645713806, + "learning_rate": 0.0015, + "loss": 1.883, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.6390947103500366, + "learning_rate": 0.0015, + "loss": 1.8609, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.830974280834198, + "learning_rate": 0.0015, + "loss": 1.8608, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.850219190120697, + "learning_rate": 0.0015, + "loss": 1.8717, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.713367223739624, + "learning_rate": 0.0015, + "loss": 1.8622, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 1.5319929122924805, + "learning_rate": 0.0015, + "loss": 1.8517, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 1.1368683576583862, + "learning_rate": 0.0015, + "loss": 1.8642, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.7220431566238403, + "learning_rate": 0.0015, + "loss": 1.8436, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.7265199422836304, + "learning_rate": 0.0015, + "loss": 1.8453, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.7204943299293518, + "learning_rate": 0.0015, + "loss": 1.8352, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.7295821905136108, + "learning_rate": 0.0015, + "loss": 1.8258, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.9046159386634827, + "learning_rate": 0.0015, + "loss": 1.8269, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.857608437538147, + "learning_rate": 0.0015, + "loss": 1.837, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.6817290186882019, + "learning_rate": 0.0015, + "loss": 1.8231, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.6669089794158936, + "learning_rate": 0.0015, + "loss": 1.8263, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 1.4850902557373047, + "learning_rate": 0.0015, + "loss": 1.8197, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.0368800163269043, + "learning_rate": 0.0015, + "loss": 1.8213, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 1.33066725730896, + "learning_rate": 0.0015, + "loss": 1.8192, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.9209538698196411, + "learning_rate": 0.0015, + "loss": 1.8017, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.770684003829956, + "learning_rate": 0.0015, + "loss": 1.812, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.6091917753219604, + "learning_rate": 0.0015, + "loss": 1.7942, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.6978843212127686, + "learning_rate": 0.0015, + "loss": 1.7826, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.6720295548439026, + "learning_rate": 0.0015, + "loss": 1.7883, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.809714138507843, + "learning_rate": 0.0015, + "loss": 1.8018, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.7365082502365112, + "learning_rate": 0.0015, + "loss": 1.797, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6678728461265564, + "learning_rate": 0.0015, + "loss": 1.7804, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.7388237118721008, + "learning_rate": 0.0015, + "loss": 1.7697, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.8108173608779907, + "learning_rate": 0.0015, + "loss": 1.7869, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.8109773397445679, + "learning_rate": 0.0015, + "loss": 1.7884, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.6703519225120544, + "learning_rate": 0.0015, + "loss": 1.7704, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.7193007469177246, + "learning_rate": 0.0015, + "loss": 1.7677, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.645974338054657, + "learning_rate": 0.0015, + "loss": 1.7834, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.6644572019577026, + "learning_rate": 0.0015, + "loss": 1.7688, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.7001248598098755, + "learning_rate": 0.0015, + "loss": 1.7559, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.7161771059036255, + "learning_rate": 0.0015, + "loss": 1.7625, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.8758841156959534, + "learning_rate": 0.0015, + "loss": 1.7648, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.0882781744003296, + "learning_rate": 0.0015, + "loss": 1.7545, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.7270399928092957, + "learning_rate": 0.0015, + "loss": 1.7552, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.6997400522232056, + "learning_rate": 0.0015, + "loss": 1.7578, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.6069106459617615, + "learning_rate": 0.0015, + "loss": 1.7474, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.7863442301750183, + "learning_rate": 0.0015, + "loss": 1.7474, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.7006204724311829, + "learning_rate": 0.0015, + "loss": 1.7371, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.6867172122001648, + "learning_rate": 0.0015, + "loss": 1.746, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.7964699268341064, + "learning_rate": 0.0015, + "loss": 1.7314, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6391115188598633, + "learning_rate": 0.0015, + "loss": 1.7328, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.6883602142333984, + "learning_rate": 0.0015, + "loss": 1.7389, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.6849308609962463, + "learning_rate": 0.0015, + "loss": 1.7298, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.9294276237487793, + "learning_rate": 0.0015, + "loss": 1.7282, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.6881417632102966, + "learning_rate": 0.0015, + "loss": 1.7304, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.7681412696838379, + "learning_rate": 0.0015, + "loss": 1.7229, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.7433717846870422, + "learning_rate": 0.0015, + "loss": 1.7199, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 1.03323495388031, + "learning_rate": 0.0015, + "loss": 1.7277, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.7045835256576538, + "learning_rate": 0.0015, + "loss": 1.7212, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.8270373940467834, + "learning_rate": 0.0015, + "loss": 1.7211, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.8151040077209473, + "learning_rate": 0.0015, + "loss": 1.7233, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.6330779790878296, + "learning_rate": 0.0015, + "loss": 1.7187, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.6375407576560974, + "learning_rate": 0.0015, + "loss": 1.7011, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.5960315465927124, + "learning_rate": 0.0015, + "loss": 1.6919, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 1.1120203733444214, + "learning_rate": 0.0015, + "loss": 1.6986, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.6751340627670288, + "learning_rate": 0.0015, + "loss": 1.6984, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.7697312831878662, + "learning_rate": 0.0015, + "loss": 1.6884, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.63634192943573, + "learning_rate": 0.0015, + "loss": 1.7005, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.9557291865348816, + "learning_rate": 0.0015, + "loss": 1.6978, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.6814422607421875, + "learning_rate": 0.0015, + "loss": 1.7021, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.6844719052314758, + "learning_rate": 0.0015, + "loss": 1.6995, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.7683190703392029, + "learning_rate": 0.0015, + "loss": 1.6692, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.7743059396743774, + "learning_rate": 0.0015, + "loss": 1.6829, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.650581955909729, + "learning_rate": 0.0015, + "loss": 1.6871, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.8814953565597534, + "learning_rate": 0.0015, + "loss": 1.677, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.6455058455467224, + "learning_rate": 0.0015, + "loss": 1.6756, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.8890596032142639, + "learning_rate": 0.0015, + "loss": 1.6854, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.4528861045837402, + "learning_rate": 0.0015, + "loss": 1.6738, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.7478601932525635, + "learning_rate": 0.0015, + "loss": 1.6691, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.8809925317764282, + "learning_rate": 0.0015, + "loss": 1.6833, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.7994898557662964, + "learning_rate": 0.0015, + "loss": 1.6622, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.6942868232727051, + "learning_rate": 0.0015, + "loss": 1.6623, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.6879727244377136, + "learning_rate": 0.0015, + "loss": 1.6664, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.6466996669769287, + "learning_rate": 0.0015, + "loss": 1.6686, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.5975399017333984, + "learning_rate": 0.0015, + "loss": 1.6886, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.6716293692588806, + "learning_rate": 0.0015, + "loss": 1.6665, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.7008141875267029, + "learning_rate": 0.0015, + "loss": 1.6698, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.95294189453125, + "learning_rate": 0.0015, + "loss": 1.669, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.8300499320030212, + "learning_rate": 0.0015, + "loss": 1.669, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.58116215467453, + "learning_rate": 0.0015, + "loss": 1.6633, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.899095356464386, + "learning_rate": 0.0015, + "loss": 1.6506, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.6602675318717957, + "learning_rate": 0.0015, + "loss": 1.6469, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.7812621593475342, + "learning_rate": 0.0015, + "loss": 1.6477, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.6211776733398438, + "learning_rate": 0.0015, + "loss": 1.6485, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.6210444569587708, + "learning_rate": 0.0015, + "loss": 1.6531, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.7021569013595581, + "learning_rate": 0.0015, + "loss": 1.6557, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.7302125692367554, + "learning_rate": 0.0015, + "loss": 1.6566, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.7046329379081726, + "learning_rate": 0.0015, + "loss": 1.6423, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.7809279561042786, + "learning_rate": 0.0015, + "loss": 1.6498, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 1.0289695262908936, + "learning_rate": 0.0015, + "loss": 1.6518, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.8597456812858582, + "learning_rate": 0.0015, + "loss": 1.6368, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.6375691294670105, + "learning_rate": 0.0015, + "loss": 1.6372, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.7567825317382812, + "learning_rate": 0.0015, + "loss": 1.6435, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.6211913228034973, + "learning_rate": 0.0015, + "loss": 1.6314, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.6424610018730164, + "learning_rate": 0.0015, + "loss": 1.632, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.6862204670906067, + "learning_rate": 0.0015, + "loss": 1.6371, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.6278738975524902, + "learning_rate": 0.0015, + "loss": 1.6481, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.6842531561851501, + "learning_rate": 0.0015, + "loss": 1.6296, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.7904012799263, + "learning_rate": 0.0015, + "loss": 1.6275, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 1.007664680480957, + "learning_rate": 0.0015, + "loss": 1.6275, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.7655643820762634, + "learning_rate": 0.0015, + "loss": 1.6321, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 1.1572829484939575, + "learning_rate": 0.0015, + "loss": 1.644, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.7052775621414185, + "learning_rate": 0.0015, + "loss": 1.6289, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.6005434393882751, + "learning_rate": 0.0015, + "loss": 1.6274, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.6911314129829407, + "learning_rate": 0.0015, + "loss": 1.6259, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.8317500948905945, + "learning_rate": 0.0015, + "loss": 1.619, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.833850085735321, + "learning_rate": 0.0015, + "loss": 1.6216, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.6570703387260437, + "learning_rate": 0.0015, + "loss": 1.6263, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.988843560218811, + "learning_rate": 0.0015, + "loss": 1.6137, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.9754023551940918, + "learning_rate": 0.0015, + "loss": 1.6229, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 1.1097123622894287, + "learning_rate": 0.0015, + "loss": 1.6139, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.7060465216636658, + "learning_rate": 0.0015, + "loss": 1.6097, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.5796406865119934, + "learning_rate": 0.0015, + "loss": 1.6129, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.5976617336273193, + "learning_rate": 0.0015, + "loss": 1.6077, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.8853664398193359, + "learning_rate": 0.0015, + "loss": 1.6165, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.9624297022819519, + "learning_rate": 0.0015, + "loss": 1.6276, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.8721666932106018, + "learning_rate": 0.0015, + "loss": 1.6127, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.6377700567245483, + "learning_rate": 0.0015, + "loss": 1.5955, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.8996946215629578, + "learning_rate": 0.0015, + "loss": 1.613, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.6748794913291931, + "learning_rate": 0.0015, + "loss": 1.6173, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.6428014039993286, + "learning_rate": 0.0015, + "loss": 1.5968, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.5703840851783752, + "learning_rate": 0.0015, + "loss": 1.6088, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.6045317649841309, + "learning_rate": 0.0015, + "loss": 1.607, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.6860820055007935, + "learning_rate": 0.0015, + "loss": 1.5989, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.824304461479187, + "learning_rate": 0.0015, + "loss": 1.5969, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.7699740529060364, + "learning_rate": 0.0015, + "loss": 1.5881, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.9184193015098572, + "learning_rate": 0.0015, + "loss": 1.6235, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 1.0606364011764526, + "learning_rate": 0.0015, + "loss": 1.6099, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.8909003734588623, + "learning_rate": 0.0015, + "loss": 1.5979, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.6684753894805908, + "learning_rate": 0.0015, + "loss": 1.5894, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.5911417603492737, + "learning_rate": 0.0015, + "loss": 1.5916, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.6023501753807068, + "learning_rate": 0.0015, + "loss": 1.6015, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.7245410084724426, + "learning_rate": 0.0015, + "loss": 1.5855, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.8492023348808289, + "learning_rate": 0.0015, + "loss": 1.581, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.7202883958816528, + "learning_rate": 0.0015, + "loss": 1.5835, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.6428688764572144, + "learning_rate": 0.0015, + "loss": 1.5932, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.6176289319992065, + "learning_rate": 0.0015, + "loss": 1.5895, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.58685702085495, + "learning_rate": 0.0015, + "loss": 1.5855, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.6196684241294861, + "learning_rate": 0.0015, + "loss": 1.5888, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.5961893796920776, + "learning_rate": 0.0015, + "loss": 1.587, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.7569496631622314, + "learning_rate": 0.0015, + "loss": 1.5893, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.6933283805847168, + "learning_rate": 0.0015, + "loss": 1.5838, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.5842498540878296, + "learning_rate": 0.0015, + "loss": 1.5811, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.8091155290603638, + "learning_rate": 0.0015, + "loss": 1.5908, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6274420619010925, + "learning_rate": 0.0015, + "loss": 1.5744, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.7425103783607483, + "learning_rate": 0.0015, + "loss": 1.5901, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 1.0398924350738525, + "learning_rate": 0.0015, + "loss": 1.5823, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.6561729907989502, + "learning_rate": 0.0015, + "loss": 1.5872, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.5862956643104553, + "learning_rate": 0.0015, + "loss": 1.5706, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.672885000705719, + "learning_rate": 0.0015, + "loss": 1.5754, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.6303860545158386, + "learning_rate": 0.0015, + "loss": 1.5772, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.7278957366943359, + "learning_rate": 0.0015, + "loss": 1.5807, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.627494752407074, + "learning_rate": 0.0015, + "loss": 1.5768, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.9720372557640076, + "learning_rate": 0.0015, + "loss": 1.5779, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.7690454721450806, + "learning_rate": 0.0015, + "loss": 1.5742, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.6532604694366455, + "learning_rate": 0.0015, + "loss": 1.5826, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.5873631834983826, + "learning_rate": 0.0015, + "loss": 1.5744, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.5726386904716492, + "learning_rate": 0.0015, + "loss": 1.5763, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.6167312860488892, + "learning_rate": 0.0015, + "loss": 1.5801, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.5835269689559937, + "learning_rate": 0.0015, + "loss": 1.5768, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.5940921902656555, + "learning_rate": 0.0015, + "loss": 1.5762, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.6355006098747253, + "learning_rate": 0.0015, + "loss": 1.5755, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.6540349721908569, + "learning_rate": 0.0015, + "loss": 1.5629, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.6579221487045288, + "learning_rate": 0.0015, + "loss": 1.5772, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.6749079823493958, + "learning_rate": 0.0015, + "loss": 1.5668, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.7748566269874573, + "learning_rate": 0.0015, + "loss": 1.5592, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.5989497303962708, + "learning_rate": 0.0015, + "loss": 1.5616, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.5915218591690063, + "learning_rate": 0.0015, + "loss": 1.5615, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.945878267288208, + "learning_rate": 0.0015, + "loss": 1.5648, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.7878134846687317, + "learning_rate": 0.0015, + "loss": 1.5566, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.7741822600364685, + "learning_rate": 0.0015, + "loss": 1.5653, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.6077671051025391, + "learning_rate": 0.0015, + "loss": 1.564, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.8397111892700195, + "learning_rate": 0.0015, + "loss": 1.544, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.6295060515403748, + "learning_rate": 0.0015, + "loss": 1.559, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.5670669674873352, + "learning_rate": 0.0015, + "loss": 1.5621, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.7970763444900513, + "learning_rate": 0.0015, + "loss": 1.5711, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.6325806975364685, + "learning_rate": 0.0015, + "loss": 1.5639, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.5956951379776001, + "learning_rate": 0.0015, + "loss": 1.5605, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.5744134187698364, + "learning_rate": 0.0015, + "loss": 1.5598, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.6545819044113159, + "learning_rate": 0.0015, + "loss": 1.5574, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.5304187536239624, + "learning_rate": 0.0015, + "loss": 1.5567, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.682017982006073, + "learning_rate": 0.0015, + "loss": 1.5627, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 1.3158142566680908, + "learning_rate": 0.0015, + "loss": 1.5406, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 1.2576978206634521, + "learning_rate": 0.0015, + "loss": 1.5534, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.7173641920089722, + "learning_rate": 0.0015, + "loss": 1.5492, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.5971997380256653, + "learning_rate": 0.0015, + "loss": 1.5463, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.8635569214820862, + "learning_rate": 0.0015, + "loss": 1.5493, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.7581474184989929, + "learning_rate": 0.0015, + "loss": 1.5382, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.6210002899169922, + "learning_rate": 0.0015, + "loss": 1.5437, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.8539509773254395, + "learning_rate": 0.0015, + "loss": 1.5493, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.6014382839202881, + "learning_rate": 0.0015, + "loss": 1.5524, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.7142052054405212, + "learning_rate": 0.0015, + "loss": 1.5429, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.5726633667945862, + "learning_rate": 0.0015, + "loss": 1.5401, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.6365280747413635, + "learning_rate": 0.0015, + "loss": 1.5454, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.603902280330658, + "learning_rate": 0.0015, + "loss": 1.5593, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 1.0410481691360474, + "learning_rate": 0.0015, + "loss": 1.5395, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.7541502118110657, + "learning_rate": 0.0015, + "loss": 1.5279, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.6524734497070312, + "learning_rate": 0.0015, + "loss": 1.537, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.679301917552948, + "learning_rate": 0.0015, + "loss": 1.5511, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.8344805836677551, + "learning_rate": 0.0015, + "loss": 1.5325, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.6176565289497375, + "learning_rate": 0.0015, + "loss": 1.5403, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.7117000222206116, + "learning_rate": 0.0015, + "loss": 1.5427, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6211459040641785, + "learning_rate": 0.0015, + "loss": 1.5283, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.6853720545768738, + "learning_rate": 0.0015, + "loss": 1.5366, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.578183650970459, + "learning_rate": 0.0015, + "loss": 1.5306, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.5567953586578369, + "learning_rate": 0.0015, + "loss": 1.5443, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.5239658355712891, + "learning_rate": 0.0015, + "loss": 1.5335, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.5808655023574829, + "learning_rate": 0.0015, + "loss": 1.5246, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.5554609298706055, + "learning_rate": 0.0015, + "loss": 1.5392, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.7155588269233704, + "learning_rate": 0.0015, + "loss": 1.5273, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.5930044651031494, + "learning_rate": 0.0015, + "loss": 1.5325, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.6310344934463501, + "learning_rate": 0.0015, + "loss": 1.5321, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.6500780582427979, + "learning_rate": 0.0015, + "loss": 1.5452, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.6284987926483154, + "learning_rate": 0.0015, + "loss": 1.5247, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.6179961562156677, + "learning_rate": 0.0015, + "loss": 1.5321, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.6366827487945557, + "learning_rate": 0.0015, + "loss": 1.5337, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.6704961061477661, + "learning_rate": 0.0015, + "loss": 1.5344, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.7158640623092651, + "learning_rate": 0.0015, + "loss": 1.5298, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.6761768460273743, + "learning_rate": 0.0015, + "loss": 1.5311, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.6904314160346985, + "learning_rate": 0.0015, + "loss": 1.5243, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.7411789894104004, + "learning_rate": 0.0015, + "loss": 1.5355, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.6276416182518005, + "learning_rate": 0.0015, + "loss": 1.5281, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.9376272559165955, + "learning_rate": 0.0015, + "loss": 1.521, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.5644498467445374, + "learning_rate": 0.0015, + "loss": 1.5052, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.5602062940597534, + "learning_rate": 0.0015, + "loss": 1.526, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.5296338200569153, + "learning_rate": 0.0015, + "loss": 1.5147, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.777691662311554, + "learning_rate": 0.0015, + "loss": 1.5343, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.8887121081352234, + "learning_rate": 0.0015, + "loss": 1.5321, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.9320899844169617, + "learning_rate": 0.0015, + "loss": 1.5147, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.6108123660087585, + "learning_rate": 0.0015, + "loss": 1.5256, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.5436858534812927, + "learning_rate": 0.0015, + "loss": 1.527, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.6518274545669556, + "learning_rate": 0.0015, + "loss": 1.5341, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.5777610540390015, + "learning_rate": 0.0015, + "loss": 1.5187, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.8288917541503906, + "learning_rate": 0.0015, + "loss": 1.521, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.5676469802856445, + "learning_rate": 0.0015, + "loss": 1.5147, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.8349984288215637, + "learning_rate": 0.0015, + "loss": 1.5141, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.6100229620933533, + "learning_rate": 0.0015, + "loss": 1.5237, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.6039717197418213, + "learning_rate": 0.0015, + "loss": 1.5167, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.5780033469200134, + "learning_rate": 0.0015, + "loss": 1.5103, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.6542384028434753, + "learning_rate": 0.0015, + "loss": 1.5208, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.6112498044967651, + "learning_rate": 0.0015, + "loss": 1.5184, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.8052992820739746, + "learning_rate": 0.0015, + "loss": 1.4979, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.5642687082290649, + "learning_rate": 0.0015, + "loss": 1.5152, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.5802172422409058, + "learning_rate": 0.0015, + "loss": 1.5125, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.6300274133682251, + "learning_rate": 0.0015, + "loss": 1.5083, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.9260715246200562, + "learning_rate": 0.0015, + "loss": 1.5008, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.5329908728599548, + "learning_rate": 0.0015, + "loss": 1.5174, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.583369255065918, + "learning_rate": 0.0015, + "loss": 1.5246, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.5699556469917297, + "learning_rate": 0.0015, + "loss": 1.5158, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.6627075672149658, + "learning_rate": 0.0015, + "loss": 1.4985, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.6062209010124207, + "learning_rate": 0.0015, + "loss": 1.5209, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.5941247344017029, + "learning_rate": 0.0015, + "loss": 1.5147, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.6869729161262512, + "learning_rate": 0.0015, + "loss": 1.5244, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 1.0854729413986206, + "learning_rate": 0.0015, + "loss": 1.5185, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.5772907137870789, + "learning_rate": 0.0015, + "loss": 1.5072, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.5391974449157715, + "learning_rate": 0.0015, + "loss": 1.5086, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.9309356212615967, + "learning_rate": 0.0015, + "loss": 1.5032, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.6355001330375671, + "learning_rate": 0.0015, + "loss": 1.5061, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.6061651706695557, + "learning_rate": 0.0015, + "loss": 1.516, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.7389352917671204, + "learning_rate": 0.0015, + "loss": 1.4994, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.5810843706130981, + "learning_rate": 0.0015, + "loss": 1.5027, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.642076849937439, + "learning_rate": 0.0015, + "loss": 1.5019, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.5598652362823486, + "learning_rate": 0.0015, + "loss": 1.5035, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.7780454754829407, + "learning_rate": 0.0015, + "loss": 1.5061, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.812456488609314, + "learning_rate": 0.0015, + "loss": 1.5132, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.7260994911193848, + "learning_rate": 0.0015, + "loss": 1.5075, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.6154223084449768, + "learning_rate": 0.0015, + "loss": 1.5002, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.7957944273948669, + "learning_rate": 0.0015, + "loss": 1.4996, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.6166795492172241, + "learning_rate": 0.0015, + "loss": 1.5133, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.8378509283065796, + "learning_rate": 0.0015, + "loss": 1.5157, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.7237342000007629, + "learning_rate": 0.0015, + "loss": 1.5031, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.541682243347168, + "learning_rate": 0.0015, + "loss": 1.4812, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.5357847809791565, + "learning_rate": 0.0015, + "loss": 1.4995, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.5915465354919434, + "learning_rate": 0.0015, + "loss": 1.5014, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.6309832334518433, + "learning_rate": 0.0015, + "loss": 1.5039, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.958678126335144, + "learning_rate": 0.0015, + "loss": 1.4967, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.839051365852356, + "learning_rate": 0.0015, + "loss": 1.5099, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.5315883755683899, + "learning_rate": 0.0015, + "loss": 1.494, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.8885833621025085, + "learning_rate": 0.0015, + "loss": 1.5005, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.8959977030754089, + "learning_rate": 0.0015, + "loss": 1.5104, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.550995409488678, + "learning_rate": 0.0015, + "loss": 1.4925, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.5550857782363892, + "learning_rate": 0.0015, + "loss": 1.4858, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.5689728260040283, + "learning_rate": 0.0015, + "loss": 1.4866, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.5753951072692871, + "learning_rate": 0.0015, + "loss": 1.5071, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.7183377742767334, + "learning_rate": 0.0015, + "loss": 1.4895, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.811546802520752, + "learning_rate": 0.0015, + "loss": 1.5033, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.5546271204948425, + "learning_rate": 0.0015, + "loss": 1.4924, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.5506630539894104, + "learning_rate": 0.0015, + "loss": 1.4846, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.7391477227210999, + "learning_rate": 0.0015, + "loss": 1.483, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.7728936672210693, + "learning_rate": 0.0015, + "loss": 1.4844, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.5560399293899536, + "learning_rate": 0.0015, + "loss": 1.4876, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.571331799030304, + "learning_rate": 0.0015, + "loss": 1.4847, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.5048092603683472, + "learning_rate": 0.0015, + "loss": 1.4893, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.6338497996330261, + "learning_rate": 0.0015, + "loss": 1.484, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.7444871068000793, + "learning_rate": 0.0015, + "loss": 1.4897, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.618892252445221, + "learning_rate": 0.0015, + "loss": 1.4922, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.9204555153846741, + "learning_rate": 0.0015, + "loss": 1.4817, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.5919119119644165, + "learning_rate": 0.0015, + "loss": 1.4927, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.5490244626998901, + "learning_rate": 0.0015, + "loss": 1.483, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.6510594487190247, + "learning_rate": 0.0015, + "loss": 1.4966, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.712866485118866, + "learning_rate": 0.0015, + "loss": 1.4814, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.5949064493179321, + "learning_rate": 0.0015, + "loss": 1.4881, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.5261542797088623, + "learning_rate": 0.0015, + "loss": 1.4768, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5530981421470642, + "learning_rate": 0.0015, + "loss": 1.49, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.4799722135066986, + "learning_rate": 0.0015, + "loss": 1.4873, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.8407880067825317, + "learning_rate": 0.0015, + "loss": 1.4934, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.5783693790435791, + "learning_rate": 0.0015, + "loss": 1.4871, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.1772202253341675, + "learning_rate": 0.0015, + "loss": 1.4931, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.6160233616828918, + "learning_rate": 0.0015, + "loss": 1.4861, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.5388474464416504, + "learning_rate": 0.0015, + "loss": 1.4827, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.7346330285072327, + "learning_rate": 0.0015, + "loss": 1.4792, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.8873032927513123, + "learning_rate": 0.0015, + "loss": 1.4848, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.6161367297172546, + "learning_rate": 0.0015, + "loss": 1.4764, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.690104067325592, + "learning_rate": 0.0015, + "loss": 1.4783, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.6172806620597839, + "learning_rate": 0.0015, + "loss": 1.4829, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.8052006363868713, + "learning_rate": 0.0015, + "loss": 1.4764, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.5306074619293213, + "learning_rate": 0.0015, + "loss": 1.4767, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.5288421511650085, + "learning_rate": 0.0015, + "loss": 1.4716, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.6664320230484009, + "learning_rate": 0.0015, + "loss": 1.4808, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.7073042988777161, + "learning_rate": 0.0015, + "loss": 1.4819, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.5821166038513184, + "learning_rate": 0.0015, + "loss": 1.4733, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.6406621932983398, + "learning_rate": 0.0015, + "loss": 1.4804, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 1.0056570768356323, + "learning_rate": 0.0015, + "loss": 1.4773, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.5506517887115479, + "learning_rate": 0.0015, + "loss": 1.4754, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.649193286895752, + "learning_rate": 0.0015, + "loss": 1.4804, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.7297791838645935, + "learning_rate": 0.0015, + "loss": 1.4855, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.532581627368927, + "learning_rate": 0.0015, + "loss": 1.4734, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.6852807402610779, + "learning_rate": 0.0015, + "loss": 1.4696, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.8694332838058472, + "learning_rate": 0.0015, + "loss": 1.4798, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.8203960657119751, + "learning_rate": 0.0015, + "loss": 1.48, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.6243816614151001, + "learning_rate": 0.0015, + "loss": 1.4777, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.8832417726516724, + "learning_rate": 0.0015, + "loss": 1.4736, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.5411526560783386, + "learning_rate": 0.0015, + "loss": 1.4801, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.6407099366188049, + "learning_rate": 0.0015, + "loss": 1.4733, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.9877379536628723, + "learning_rate": 0.0015, + "loss": 1.4721, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.5923700928688049, + "learning_rate": 0.0015, + "loss": 1.4911, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.564254105091095, + "learning_rate": 0.0015, + "loss": 1.4669, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.5808565020561218, + "learning_rate": 0.0015, + "loss": 1.473, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5405387282371521, + "learning_rate": 0.0015, + "loss": 1.468, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.7100303173065186, + "learning_rate": 0.0015, + "loss": 1.472, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.6284264922142029, + "learning_rate": 0.0015, + "loss": 1.4851, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.6218607425689697, + "learning_rate": 0.0015, + "loss": 1.4712, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.6870793700218201, + "learning_rate": 0.0015, + "loss": 1.4748, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.8388074040412903, + "learning_rate": 0.0015, + "loss": 1.4694, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.6786519885063171, + "learning_rate": 0.0015, + "loss": 1.48, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.5861133337020874, + "learning_rate": 0.0015, + "loss": 1.4622, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.585155189037323, + "learning_rate": 0.0015, + "loss": 1.4728, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.5707352161407471, + "learning_rate": 0.0015, + "loss": 1.4803, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.6790894865989685, + "learning_rate": 0.0015, + "loss": 1.4672, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.6349377036094666, + "learning_rate": 0.0015, + "loss": 1.4657, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.519831120967865, + "learning_rate": 0.0015, + "loss": 1.4694, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.6475289463996887, + "learning_rate": 0.0015, + "loss": 1.4536, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.8085472583770752, + "learning_rate": 0.0015, + "loss": 1.473, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.556867241859436, + "learning_rate": 0.0015, + "loss": 1.4745, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.9104698896408081, + "learning_rate": 0.0015, + "loss": 1.471, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.6034371256828308, + "learning_rate": 0.0015, + "loss": 1.4619, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.5832459926605225, + "learning_rate": 0.0015, + "loss": 1.445, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.6280292272567749, + "learning_rate": 0.0015, + "loss": 1.4676, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.6227408647537231, + "learning_rate": 0.0015, + "loss": 1.4532, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.5720923542976379, + "learning_rate": 0.0015, + "loss": 1.4735, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.9153615832328796, + "learning_rate": 0.0015, + "loss": 1.4749, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.5959382057189941, + "learning_rate": 0.0015, + "loss": 1.4528, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6096983551979065, + "learning_rate": 0.0015, + "loss": 1.4694, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6150070428848267, + "learning_rate": 0.0015, + "loss": 1.4708, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.6063257455825806, + "learning_rate": 0.0015, + "loss": 1.4729, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.5810568928718567, + "learning_rate": 0.0015, + "loss": 1.461, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.8787219524383545, + "learning_rate": 0.0015, + "loss": 1.4599, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.5396552681922913, + "learning_rate": 0.0015, + "loss": 1.4572, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.7619385719299316, + "learning_rate": 0.0015, + "loss": 1.4503, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.5797851085662842, + "learning_rate": 0.0015, + "loss": 1.4641, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.5601385831832886, + "learning_rate": 0.0015, + "loss": 1.4659, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.7970922589302063, + "learning_rate": 0.0015, + "loss": 1.4619, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.6073303818702698, + "learning_rate": 0.0015, + "loss": 1.4589, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.6987572312355042, + "learning_rate": 0.0015, + "loss": 1.4575, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.7734752893447876, + "learning_rate": 0.0015, + "loss": 1.4764, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.5538147687911987, + "learning_rate": 0.0015, + "loss": 1.4599, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.6562402248382568, + "learning_rate": 0.0015, + "loss": 1.4576, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.6011418700218201, + "learning_rate": 0.0015, + "loss": 1.468, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.6132929921150208, + "learning_rate": 0.0015, + "loss": 1.4508, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.555518388748169, + "learning_rate": 0.0015, + "loss": 1.4573, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.5309966206550598, + "learning_rate": 0.0015, + "loss": 1.4576, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.5885314345359802, + "learning_rate": 0.0015, + "loss": 1.4749, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.7609729766845703, + "learning_rate": 0.0015, + "loss": 1.4584, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.6413836479187012, + "learning_rate": 0.0015, + "loss": 1.4547, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.5320971608161926, + "learning_rate": 0.0015, + "loss": 1.4671, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.5723182559013367, + "learning_rate": 0.0015, + "loss": 1.4552, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.7459675669670105, + "learning_rate": 0.0015, + "loss": 1.4461, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.7086186408996582, + "learning_rate": 0.0015, + "loss": 1.4676, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.5106179118156433, + "learning_rate": 0.0015, + "loss": 1.4554, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.5558242797851562, + "learning_rate": 0.0015, + "loss": 1.4657, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.67197185754776, + "learning_rate": 0.0015, + "loss": 1.4603, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.5699183344841003, + "learning_rate": 0.0015, + "loss": 1.4492, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.6840021014213562, + "learning_rate": 0.0015, + "loss": 1.4589, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.5706806778907776, + "learning_rate": 0.0015, + "loss": 1.4576, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 1.0218054056167603, + "learning_rate": 0.0015, + "loss": 1.4653, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.6607853770256042, + "learning_rate": 0.0015, + "loss": 1.4489, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.5810460448265076, + "learning_rate": 0.0015, + "loss": 1.4552, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.5207366347312927, + "learning_rate": 0.0015, + "loss": 1.4477, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.6496361494064331, + "learning_rate": 0.0015, + "loss": 1.4479, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.6957794427871704, + "learning_rate": 0.0015, + "loss": 1.4638, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.676959216594696, + "learning_rate": 0.0015, + "loss": 1.4553, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.5800243020057678, + "learning_rate": 0.0015, + "loss": 1.4478, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.503159761428833, + "learning_rate": 0.0015, + "loss": 1.4335, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.5524765253067017, + "learning_rate": 0.0015, + "loss": 1.4602, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.6221948266029358, + "learning_rate": 0.0015, + "loss": 1.4484, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.4997245967388153, + "learning_rate": 0.0015, + "loss": 1.4607, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.7629484534263611, + "learning_rate": 0.0015, + "loss": 1.4467, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.8345067501068115, + "learning_rate": 0.0015, + "loss": 1.4649, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.5711155533790588, + "learning_rate": 0.0015, + "loss": 1.4567, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.6207240223884583, + "learning_rate": 0.0015, + "loss": 1.4371, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.7142829895019531, + "learning_rate": 0.0015, + "loss": 1.4506, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.7609028220176697, + "learning_rate": 0.0015, + "loss": 1.4478, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.5903275012969971, + "learning_rate": 0.0015, + "loss": 1.4457, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.7226777076721191, + "learning_rate": 0.0015, + "loss": 1.461, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.588779091835022, + "learning_rate": 0.0015, + "loss": 1.4561, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.5290944576263428, + "learning_rate": 0.0015, + "loss": 1.4487, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.5501238703727722, + "learning_rate": 0.0015, + "loss": 1.4495, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.6539481282234192, + "learning_rate": 0.0015, + "loss": 1.4611, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.6416603922843933, + "learning_rate": 0.0015, + "loss": 1.4458, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.6550890803337097, + "learning_rate": 0.0015, + "loss": 1.4544, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.7331244349479675, + "learning_rate": 0.0015, + "loss": 1.4406, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.5937452912330627, + "learning_rate": 0.0015, + "loss": 1.4513, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.6458474397659302, + "learning_rate": 0.0015, + "loss": 1.4454, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6012911200523376, + "learning_rate": 0.0015, + "loss": 1.452, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.7571543455123901, + "learning_rate": 0.0015, + "loss": 1.4433, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.5183377265930176, + "learning_rate": 0.0015, + "loss": 1.4493, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.7201360464096069, + "learning_rate": 0.0015, + "loss": 1.4571, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.8829711079597473, + "learning_rate": 0.0015, + "loss": 1.4469, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.5061177015304565, + "learning_rate": 0.0015, + "loss": 1.4444, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.5218251347541809, + "learning_rate": 0.0015, + "loss": 1.4509, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.7922905087471008, + "learning_rate": 0.0015, + "loss": 1.4395, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.567891538143158, + "learning_rate": 0.0015, + "loss": 1.4404, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.5370602011680603, + "learning_rate": 0.0015, + "loss": 1.4498, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.5814762711524963, + "learning_rate": 0.0015, + "loss": 1.4406, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.5381694436073303, + "learning_rate": 0.0015, + "loss": 1.4371, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.6336235404014587, + "learning_rate": 0.0015, + "loss": 1.446, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.5424358248710632, + "learning_rate": 0.0015, + "loss": 1.4464, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.5514902472496033, + "learning_rate": 0.0015, + "loss": 1.4383, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.7581789493560791, + "learning_rate": 0.0015, + "loss": 1.4401, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.6078736782073975, + "learning_rate": 0.0015, + "loss": 1.4409, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.5329536199569702, + "learning_rate": 0.0015, + "loss": 1.4429, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.5062630772590637, + "learning_rate": 0.0015, + "loss": 1.4362, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.5976478457450867, + "learning_rate": 0.0015, + "loss": 1.4353, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.507644772529602, + "learning_rate": 0.0015, + "loss": 1.4324, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.5964802503585815, + "learning_rate": 0.0015, + "loss": 1.4404, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.5266268253326416, + "learning_rate": 0.0015, + "loss": 1.4418, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.7458702921867371, + "learning_rate": 0.0015, + "loss": 1.4419, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.7108213305473328, + "learning_rate": 0.0015, + "loss": 1.4444, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.5516433119773865, + "learning_rate": 0.0015, + "loss": 1.449, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.6754648089408875, + "learning_rate": 0.0015, + "loss": 1.4397, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.8560307025909424, + "learning_rate": 0.0015, + "loss": 1.447, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.51972895860672, + "learning_rate": 0.0015, + "loss": 1.4365, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.5647733807563782, + "learning_rate": 0.0015, + "loss": 1.4294, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.5745040774345398, + "learning_rate": 0.0015, + "loss": 1.429, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.5790467262268066, + "learning_rate": 0.0015, + "loss": 1.4486, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.4903717637062073, + "learning_rate": 0.0015, + "loss": 1.4513, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.49134671688079834, + "learning_rate": 0.0015, + "loss": 1.4365, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.5762231349945068, + "learning_rate": 0.0015, + "loss": 1.4361, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.6097369194030762, + "learning_rate": 0.0015, + "loss": 1.4402, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.5275094509124756, + "learning_rate": 0.0015, + "loss": 1.4334, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.4936537444591522, + "learning_rate": 0.0015, + "loss": 1.4393, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.5518372654914856, + "learning_rate": 0.0015, + "loss": 1.4457, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.9003373384475708, + "learning_rate": 0.0015, + "loss": 1.4389, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.5239720344543457, + "learning_rate": 0.0015, + "loss": 1.4505, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.6822075843811035, + "learning_rate": 0.0015, + "loss": 1.4457, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.8161158561706543, + "learning_rate": 0.0015, + "loss": 1.438, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6059395670890808, + "learning_rate": 0.0015, + "loss": 1.4471, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.6067211627960205, + "learning_rate": 0.0015, + "loss": 1.4476, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.8239142894744873, + "learning_rate": 0.0015, + "loss": 1.4259, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.5309951305389404, + "learning_rate": 0.0015, + "loss": 1.4326, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.641196608543396, + "learning_rate": 0.0015, + "loss": 1.436, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.5817484855651855, + "learning_rate": 0.0015, + "loss": 1.4384, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.6535966396331787, + "learning_rate": 0.0015, + "loss": 1.4369, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.5039991140365601, + "learning_rate": 0.0015, + "loss": 1.434, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.6101067662239075, + "learning_rate": 0.0015, + "loss": 1.4112, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.5119408369064331, + "learning_rate": 0.0015, + "loss": 1.4201, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.6057926416397095, + "learning_rate": 0.0015, + "loss": 1.4208, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.5651533603668213, + "learning_rate": 0.0015, + "loss": 1.4322, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.6814081072807312, + "learning_rate": 0.0015, + "loss": 1.4294, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.6172468662261963, + "learning_rate": 0.0015, + "loss": 1.4322, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.6167010068893433, + "learning_rate": 0.0015, + "loss": 1.4386, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.5427538752555847, + "learning_rate": 0.0015, + "loss": 1.45, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.5122411251068115, + "learning_rate": 0.0015, + "loss": 1.433, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.5474509596824646, + "learning_rate": 0.0015, + "loss": 1.445, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.5342562198638916, + "learning_rate": 0.0015, + "loss": 1.4495, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.5069245100021362, + "learning_rate": 0.0015, + "loss": 1.4343, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.5667345523834229, + "learning_rate": 0.0015, + "loss": 1.4271, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.5197315216064453, + "learning_rate": 0.0015, + "loss": 1.4396, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.6950623393058777, + "learning_rate": 0.0015, + "loss": 1.4281, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.5150302052497864, + "learning_rate": 0.0015, + "loss": 1.4327, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.6885280609130859, + "learning_rate": 0.0015, + "loss": 1.4258, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.5735525488853455, + "learning_rate": 0.0015, + "loss": 1.4205, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.095780372619629, + "learning_rate": 0.0015, + "loss": 1.4288, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.6270949840545654, + "learning_rate": 0.0015, + "loss": 1.4371, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.5307191610336304, + "learning_rate": 0.0015, + "loss": 1.4216, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.6046649813652039, + "learning_rate": 0.0015, + "loss": 1.4316, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.564121425151825, + "learning_rate": 0.0015, + "loss": 1.4342, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5525282621383667, + "learning_rate": 0.0015, + "loss": 1.4172, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.6527619361877441, + "learning_rate": 0.0015, + "loss": 1.4387, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.4984775185585022, + "learning_rate": 0.0015, + "loss": 1.4436, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.8228127360343933, + "learning_rate": 0.0015, + "loss": 1.431, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.6635581851005554, + "learning_rate": 0.0015, + "loss": 1.4314, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.5862621068954468, + "learning_rate": 0.0015, + "loss": 1.4139, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.5322771072387695, + "learning_rate": 0.0015, + "loss": 1.4347, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.54828941822052, + "learning_rate": 0.0015, + "loss": 1.4188, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.5668056607246399, + "learning_rate": 0.0015, + "loss": 1.4336, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.6533656120300293, + "learning_rate": 0.0015, + "loss": 1.427, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.555002748966217, + "learning_rate": 0.0015, + "loss": 1.4231, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.703021228313446, + "learning_rate": 0.0015, + "loss": 1.4261, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.49974727630615234, + "learning_rate": 0.0015, + "loss": 1.4166, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.5857552289962769, + "learning_rate": 0.0015, + "loss": 1.4297, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.514546811580658, + "learning_rate": 0.0015, + "loss": 1.428, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.5859714150428772, + "learning_rate": 0.0015, + "loss": 1.4249, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.7818124294281006, + "learning_rate": 0.0015, + "loss": 1.4261, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.5644360184669495, + "learning_rate": 0.0015, + "loss": 1.4361, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.530302882194519, + "learning_rate": 0.0015, + "loss": 1.4293, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.5373342037200928, + "learning_rate": 0.001487560447745699, + "loss": 1.4089, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.6444567441940308, + "learning_rate": 0.0014670566859713624, + "loss": 1.4289, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.5322826504707336, + "learning_rate": 0.0014468355374162303, + "loss": 1.434, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.5358772277832031, + "learning_rate": 0.0014268931066862504, + "loss": 1.4238, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.5704453587532043, + "learning_rate": 0.0014072255520794614, + "loss": 1.4203, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.4918915927410126, + "learning_rate": 0.0013878290848459301, + "loss": 1.4249, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.7140066027641296, + "learning_rate": 0.0013686999684578874, + "loss": 1.4117, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.6190429329872131, + "learning_rate": 0.001349834517889925, + "loss": 1.4147, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.5167576670646667, + "learning_rate": 0.001331229098909114, + "loss": 1.4209, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.5087065696716309, + "learning_rate": 0.0013128801273749075, + "loss": 1.4108, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.7848103642463684, + "learning_rate": 0.0012947840685486932, + "loss": 1.4214, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.5387411117553711, + "learning_rate": 0.0012769374364128628, + "loss": 1.4162, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.5020235776901245, + "learning_rate": 0.0012593367929992667, + "loss": 1.4117, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.5899006128311157, + "learning_rate": 0.0012419787477269257, + "loss": 1.4035, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.6065137982368469, + "learning_rate": 0.0012248599567488698, + "loss": 1.4023, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.4920519292354584, + "learning_rate": 0.0012079771223079822, + "loss": 1.3994, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.5466246008872986, + "learning_rate": 0.0011913269921017202, + "loss": 1.4045, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6251968741416931, + "learning_rate": 0.0011749063586555919, + "loss": 1.3969, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.5658255815505981, + "learning_rate": 0.001158712058705271, + "loss": 1.4009, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.7017289400100708, + "learning_rate": 0.0011427409725872262, + "loss": 1.4007, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.48242494463920593, + "learning_rate": 0.00112699002363775, + "loss": 1.3827, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.4930438697338104, + "learning_rate": 0.0011114561776002726, + "loss": 1.3845, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.8275454640388489, + "learning_rate": 0.001096136442040843, + "loss": 1.3992, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.7757958769798279, + "learning_rate": 0.001081027865771668, + "loss": 1.3968, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.6227315068244934, + "learning_rate": 0.0010661275382825958, + "loss": 1.3917, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.5329786539077759, + "learning_rate": 0.0010514325891804379, + "loss": 1.3801, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.5015116930007935, + "learning_rate": 0.0010369401876360166, + "loss": 1.3899, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.7147842645645142, + "learning_rate": 0.001022647541838836, + "loss": 1.3738, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.5241576433181763, + "learning_rate": 0.0010085518984592678, + "loss": 1.3745, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.5033132433891296, + "learning_rate": 0.0009946505421181513, + "loss": 1.3897, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.5533695220947266, + "learning_rate": 0.0009809407948637044, + "loss": 1.3832, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.6914740800857544, + "learning_rate": 0.0009674200156556436, + "loss": 1.3875, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.4672880172729492, + "learning_rate": 0.0009540855998564147, + "loss": 1.38, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.5120308995246887, + "learning_rate": 0.000940934978729437, + "loss": 1.396, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.4719778597354889, + "learning_rate": 0.0009279656189442628, + "loss": 1.3942, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.4703037142753601, + "learning_rate": 0.0009151750220885573, + "loss": 1.3788, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.5431548357009888, + "learning_rate": 0.0009025607241868057, + "loss": 1.3698, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.6896421909332275, + "learning_rate": 0.0008901202952256545, + "loss": 1.3793, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.5038896799087524, + "learning_rate": 0.0008778513386857928, + "loss": 1.373, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.6642139554023743, + "learning_rate": 0.0008657514910802905, + "loss": 1.3722, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.5895769000053406, + "learning_rate": 0.0008538184214992943, + "loss": 1.3694, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.4995010793209076, + "learning_rate": 0.0008420498311610049, + "loss": 1.3646, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.5526382327079773, + "learning_rate": 0.0008304434529688382, + "loss": 1.3697, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.5263835787773132, + "learning_rate": 0.0008189970510746938, + "loss": 1.3734, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.48899415135383606, + "learning_rate": 0.0008077084204482425, + "loss": 1.3596, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.49490150809288025, + "learning_rate": 0.0007965753864521494, + "loss": 1.368, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.5191816091537476, + "learning_rate": 0.0007855958044231527, + "loss": 1.3615, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.6512613296508789, + "learning_rate": 0.000774767559258917, + "loss": 1.3635, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.5101611614227295, + "learning_rate": 0.0007640885650105806, + "loss": 1.3568, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.5285460948944092, + "learning_rate": 0.0007535567644809191, + "loss": 1.3575, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.4990067183971405, + "learning_rate": 0.0007431701288280478, + "loss": 1.3597, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.4479198753833771, + "learning_rate": 0.0007329266571745864, + "loss": 1.36, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.4862308204174042, + "learning_rate": 0.0007228243762222109, + "loss": 1.3536, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.4721124470233917, + "learning_rate": 0.0007128613398715179, + "loss": 1.3555, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.5222131609916687, + "learning_rate": 0.0007030356288471288, + "loss": 1.3554, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.604771077632904, + "learning_rate": 0.0006933453503279619, + "loss": 1.3455, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.5300591588020325, + "learning_rate": 0.0006837886375825994, + "loss": 1.3516, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.49359503388404846, + "learning_rate": 0.0006743636496096813, + "loss": 1.3553, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.48862412571907043, + "learning_rate": 0.0006650685707832559, + "loss": 1.3579, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.5212936401367188, + "learning_rate": 0.0006559016105030176, + "loss": 1.3532, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.5354087352752686, + "learning_rate": 0.000646861002849367, + "loss": 1.356, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.5177834630012512, + "learning_rate": 0.0006379450062432248, + "loss": 1.3514, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.47574642300605774, + "learning_rate": 0.0006291519031105347, + "loss": 1.3577, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.4653569161891937, + "learning_rate": 0.00062047999955139, + "loss": 1.3408, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.6710003614425659, + "learning_rate": 0.000611927625013722, + "loss": 1.3451, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5257753729820251, + "learning_rate": 0.0006034931319714858, + "loss": 1.3482, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.6125897169113159, + "learning_rate": 0.0005951748956072806, + "loss": 1.3438, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.5772028565406799, + "learning_rate": 0.0005869713134993463, + "loss": 1.3338, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.4690326452255249, + "learning_rate": 0.0005788808053128734, + "loss": 1.3416, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.5361828207969666, + "learning_rate": 0.0005709018124955674, + "loss": 1.3375, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.4756416082382202, + "learning_rate": 0.0005630327979774111, + "loss": 1.3334, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5803757905960083, + "learning_rate": 0.0005552722458745627, + "loss": 1.3416, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.5282648801803589, + "learning_rate": 0.0005476186611973374, + "loss": 1.3315, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5026461482048035, + "learning_rate": 0.000540070569562213, + "loss": 1.3339, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.48198971152305603, + "learning_rate": 0.0005326265169078048, + "loss": 1.3557, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.526611328125, + "learning_rate": 0.0005252850692147567, + "loss": 1.3353, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.4811290204524994, + "learning_rate": 0.0005180448122294913, + "loss": 1.3401, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.5054536461830139, + "learning_rate": 0.0005109043511917693, + "loss": 1.3441, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.46921008825302124, + "learning_rate": 0.0005038623105660032, + "loss": 1.3401, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.477500319480896, + "learning_rate": 0.0004969173337762747, + "loss": 1.3297, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.5194097757339478, + "learning_rate": 0.0004900680829450042, + "loss": 1.3246, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.494023859500885, + "learning_rate": 0.0004833132386352233, + "loss": 1.3289, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.49618563055992126, + "learning_rate": 0.00047665149959639813, + "loss": 1.33, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.5184317827224731, + "learning_rate": 0.0004700815825137577, + "loss": 1.3413, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.4838079512119293, + "learning_rate": 0.00046360222176107584, + "loss": 1.3108, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.6089664101600647, + "learning_rate": 0.0004572121691568625, + "loss": 1.3242, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.5210044980049133, + "learning_rate": 0.00045091019372391354, + "loss": 1.3302, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.6672475337982178, + "learning_rate": 0.0004446950814521764, + "loss": 1.3348, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.5345211029052734, + "learning_rate": 0.0004385656350648835, + "loss": 1.3237, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.5275806188583374, + "learning_rate": 0.00043252067378790946, + "loss": 1.3268, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.494088739156723, + "learning_rate": 0.00042655903312230673, + "loss": 1.3322, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.4805038571357727, + "learning_rate": 0.0004206795646199778, + "loss": 1.329, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.5606774687767029, + "learning_rate": 0.0004148811356624379, + "loss": 1.3266, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.48633837699890137, + "learning_rate": 0.0004091626292426282, + "loss": 1.3229, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.5181665420532227, + "learning_rate": 0.0004035229437497357, + "loss": 1.3321, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.6352419853210449, + "learning_rate": 0.00039796099275697986, + "loss": 1.3256, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.4542626738548279, + "learning_rate": 0.0003924757048123232, + "loss": 1.3341, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.5226581692695618, + "learning_rate": 0.0003870660232320675, + "loss": 1.3178, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.5094959139823914, + "learning_rate": 0.000381730905897295, + "loss": 1.3154, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.48237210512161255, + "learning_rate": 0.0003764693250531141, + "loss": 1.3239, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.5723801851272583, + "learning_rate": 0.0003712802671106742, + "loss": 1.3328, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.5506534576416016, + "learning_rate": 0.0003661627324519073, + "loss": 1.3111, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.5179747343063354, + "learning_rate": 0.0003611157352369628, + "loss": 1.3137, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.4826015532016754, + "learning_rate": 0.00035613830321429534, + "loss": 1.3182, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.641946017742157, + "learning_rate": 0.00035122947753337037, + "loss": 1.32, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.5081714391708374, + "learning_rate": 0.0003463883125599521, + "loss": 1.3349, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.5510414242744446, + "learning_rate": 0.00034161387569393647, + "loss": 1.3296, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.5122982263565063, + "learning_rate": 0.00033690524718969593, + "loss": 1.3247, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.46928784251213074, + "learning_rate": 0.0003322615199788993, + "loss": 1.3213, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.46525609493255615, + "learning_rate": 0.00032768179949577516, + "loss": 1.317, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.48168259859085083, + "learning_rate": 0.0003231652035047826, + "loss": 1.3144, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.4579719305038452, + "learning_rate": 0.000318710861930658, + "loss": 1.3179, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.5255410075187683, + "learning_rate": 0.0003143179166908038, + "loss": 1.3263, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.5361279845237732, + "learning_rate": 0.00030998552152998834, + "loss": 1.3311, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.445333868265152, + "learning_rate": 0.00030571284185732276, + "loss": 1.3125, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.5655251145362854, + "learning_rate": 0.0003014990545854864, + "loss": 1.3077, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.4569258391857147, + "learning_rate": 0.0002973433479721675, + "loss": 1.3129, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.5192192196846008, + "learning_rate": 0.00029324492146368906, + "loss": 1.3105, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.4713016748428345, + "learning_rate": 0.00028920298554079113, + "loss": 1.3056, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.4577460289001465, + "learning_rate": 0.00028521676156653756, + "loss": 1.3145, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5324769020080566, + "learning_rate": 0.00028128548163632006, + "loss": 1.314, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.4924927353858948, + "learning_rate": 0.0002774083884299292, + "loss": 1.3036, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.4569038450717926, + "learning_rate": 0.0002735847350656645, + "loss": 1.3138, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.5252165794372559, + "learning_rate": 0.0002698137849564556, + "loss": 1.3195, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.47857287526130676, + "learning_rate": 0.0002660948116679665, + "loss": 1.3006, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.520463764667511, + "learning_rate": 0.00026242709877865493, + "loss": 1.3055, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.46325406432151794, + "learning_rate": 0.00025880993974176204, + "loss": 1.2909, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.46916434168815613, + "learning_rate": 0.0002552426377492028, + "loss": 1.2968, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.4678511917591095, + "learning_rate": 0.0002517245055973337, + "loss": 1.2996, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.5233687162399292, + "learning_rate": 0.00024825486555456975, + "loss": 1.3083, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.4865364730358124, + "learning_rate": 0.00024483304923082663, + "loss": 1.3028, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.4645150899887085, + "learning_rate": 0.0002414583974487624, + "loss": 1.3067, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.4486704468727112, + "learning_rate": 0.00023813026011679372, + "loss": 1.3126, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.4984494149684906, + "learning_rate": 0.0002348479961038625, + "loss": 1.3077, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.49964821338653564, + "learning_rate": 0.00023161097311592867, + "loss": 1.3131, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.47146445512771606, + "learning_rate": 0.00022841856757416538, + "loss": 1.2975, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.4717871844768524, + "learning_rate": 0.0002252701644948328, + "loss": 1.2999, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.5229275226593018, + "learning_rate": 0.00022216515737080817, + "loss": 1.2984, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.5039898157119751, + "learning_rate": 0.00021910294805474833, + "loss": 1.2955, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5792648792266846, + "learning_rate": 0.0002160829466438629, + "loss": 1.2969, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.45985865592956543, + "learning_rate": 0.00021310457136627562, + "loss": 1.3082, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.4425286650657654, + "learning_rate": 0.00021016724846895213, + "loss": 1.3044, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.49352750182151794, + "learning_rate": 0.00020727041210717235, + "loss": 1.2947, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.4980930984020233, + "learning_rate": 0.00020441350423552624, + "loss": 1.298, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.47221896052360535, + "learning_rate": 0.00020159597450041257, + "loss": 1.3101, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.4941273033618927, + "learning_rate": 0.00019881728013401842, + "loss": 1.2848, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.5175561308860779, + "learning_rate": 0.00019607688584976116, + "loss": 1.3082, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.5500489473342896, + "learning_rate": 0.00019337426373917076, + "loss": 1.2986, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.47605860233306885, + "learning_rate": 0.00019070889317019375, + "loss": 1.316, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.47969958186149597, + "learning_rate": 0.00018808026068689883, + "loss": 1.2918, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.44478949904441833, + "learning_rate": 0.00018548785991056508, + "loss": 1.299, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.5349645614624023, + "learning_rate": 0.00018293119144213328, + "loss": 1.3128, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.48608171939849854, + "learning_rate": 0.00018040976276600176, + "loss": 1.2866, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.4845902621746063, + "learning_rate": 0.00017792308815514854, + "loss": 1.2961, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.5377745628356934, + "learning_rate": 0.00017547068857756104, + "loss": 1.2942, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.4662752151489258, + "learning_rate": 0.00017305209160395547, + "loss": 1.2982, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.5359383821487427, + "learning_rate": 0.00017066683131676825, + "loss": 1.304, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.4550548791885376, + "learning_rate": 0.00016831444822040207, + "loss": 1.3024, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.5258935689926147, + "learning_rate": 0.00016599448915270845, + "loss": 1.2895, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.588305652141571, + "learning_rate": 0.000163706507197691, + "loss": 1.29, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.47811877727508545, + "learning_rate": 0.0001614500615994117, + "loss": 1.2968, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.513242781162262, + "learning_rate": 0.00015922471767708377, + "loss": 1.2896, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.5355343222618103, + "learning_rate": 0.00015703004674133498, + "loss": 1.3078, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.46494224667549133, + "learning_rate": 0.00015486562601162512, + "loss": 1.2975, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.4649417996406555, + "learning_rate": 0.0001527310385348017, + "loss": 1.2938, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.49366864562034607, + "learning_rate": 0.00015062587310477813, + "loss": 1.3024, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.488582581281662, + "learning_rate": 0.00014854972418331948, + "loss": 1.2927, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.48359256982803345, + "learning_rate": 0.00014650219182191934, + "loss": 1.2765, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.5472168326377869, + "learning_rate": 0.00014448288158475423, + "loss": 1.2875, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.44724100828170776, + "learning_rate": 0.0001424914044726995, + "loss": 1.2759, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5006916522979736, + "learning_rate": 0.0001405273768483926, + "loss": 1.2895, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.4745523929595947, + "learning_rate": 0.0001385904203623296, + "loss": 1.3084, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.473987877368927, + "learning_rate": 0.00013668016187997964, + "loss": 1.3034, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5028970241546631, + "learning_rate": 0.0001347962334099052, + "loss": 1.2847, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.45214754343032837, + "learning_rate": 0.00013293827203287141, + "loss": 1.2947, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.4775562286376953, + "learning_rate": 0.00013110591983193424, + "loss": 1.292, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.46456408500671387, + "learning_rate": 0.00012929882382349103, + "loss": 1.3015, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.46624886989593506, + "learning_rate": 0.0001275166358892821, + "loss": 1.294, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.495012491941452, + "learning_rate": 0.00012575901270932944, + "loss": 1.2962, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.46296992897987366, + "learning_rate": 0.00012402561569579935, + "loss": 1.2831, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.4812520742416382, + "learning_rate": 0.00012231611092777743, + "loss": 1.2857, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.46543583273887634, + "learning_rate": 0.00012063016908694192, + "loss": 1.2839, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.6159443259239197, + "learning_rate": 0.00011896746539412405, + "loss": 1.2942, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.5004511475563049, + "learning_rate": 0.00011732767954674264, + "loss": 1.2882, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.4842396676540375, + "learning_rate": 0.00011571049565710122, + "loss": 1.2963, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.47212883830070496, + "learning_rate": 0.00011411560219153552, + "loss": 1.2914, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.47493845224380493, + "learning_rate": 0.0001125426919103997, + "loss": 1.2884, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.501886248588562, + "learning_rate": 0.00011099146180887992, + "loss": 1.2979, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.4947645664215088, + "learning_rate": 0.0001094616130586235, + "loss": 1.2805, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.4746250808238983, + "learning_rate": 0.00010795285095017282, + "loss": 1.2891, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.44147855043411255, + "learning_rate": 0.00010646488483619263, + "loss": 1.2907, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.48036879301071167, + "learning_rate": 0.00010499742807547978, + "loss": 1.2952, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.4607331156730652, + "learning_rate": 0.0001035501979777448, + "loss": 1.2837, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.48059582710266113, + "learning_rate": 0.00010212291574915464, + "loss": 1.2781, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.5390638113021851, + "learning_rate": 0.00010071530643862575, + "loss": 1.2956, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.46891868114471436, + "learning_rate": 9.932709888485788e-05, + "loss": 1.285, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.5059880018234253, + "learning_rate": 9.79580256640974e-05, + "loss": 1.2873, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.4536162316799164, + "learning_rate": 9.660782303862107e-05, + "loss": 1.284, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.4523012042045593, + "learning_rate": 9.527623090592962e-05, + "loss": 1.2795, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.4588717520236969, + "learning_rate": 9.396299274864176e-05, + "loss": 1.2917, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.4557141363620758, + "learning_rate": 9.266785558507876e-05, + "loss": 1.2841, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.4628904461860657, + "learning_rate": 9.139056992053017e-05, + "loss": 1.2878, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.5263970494270325, + "learning_rate": 9.01308896991912e-05, + "loss": 1.2831, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.4881422519683838, + "learning_rate": 8.88885722567627e-05, + "loss": 1.293, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.45603007078170776, + "learning_rate": 8.766337827370438e-05, + "loss": 1.2878, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.48692816495895386, + "learning_rate": 8.645507172913238e-05, + "loss": 1.2943, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.46994394063949585, + "learning_rate": 8.52634198553523e-05, + "loss": 1.2908, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.46883875131607056, + "learning_rate": 8.408819309301891e-05, + "loss": 1.2876, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.4687853753566742, + "learning_rate": 8.292916504691398e-05, + "loss": 1.3017, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.49034225940704346, + "learning_rate": 8.178611244233354e-05, + "loss": 1.2924, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.45703232288360596, + "learning_rate": 8.065881508207636e-05, + "loss": 1.2903, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.4571961462497711, + "learning_rate": 7.954705580402525e-05, + "loss": 1.2714, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.4844617545604706, + "learning_rate": 7.845062043931299e-05, + "loss": 1.2728, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.43997877836227417, + "learning_rate": 7.736929777106499e-05, + "loss": 1.2998, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.5061412453651428, + "learning_rate": 7.630287949371051e-05, + "loss": 1.2768, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4763779640197754, + "learning_rate": 7.525116017285479e-05, + "loss": 1.2959, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.4762308895587921, + "learning_rate": 7.421393720570416e-05, + "loss": 1.2891, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.47142669558525085, + "learning_rate": 7.319101078203692e-05, + "loss": 1.2772, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.4615560472011566, + "learning_rate": 7.218218384571176e-05, + "loss": 1.2788, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.46845415234565735, + "learning_rate": 7.118726205670702e-05, + "loss": 1.2778, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.4686196744441986, + "learning_rate": 7.020605375368314e-05, + "loss": 1.2844, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.4607507288455963, + "learning_rate": 6.923836991706108e-05, + "loss": 1.2901, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.48103025555610657, + "learning_rate": 6.828402413260965e-05, + "loss": 1.2807, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.4515518546104431, + "learning_rate": 6.73428325555347e-05, + "loss": 1.2826, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.4447067081928253, + "learning_rate": 6.641461387506347e-05, + "loss": 1.301, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.44366255402565, + "learning_rate": 6.549918927951679e-05, + "loss": 1.2872, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.47630569338798523, + "learning_rate": 6.459638242186298e-05, + "loss": 1.2849, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.4725378751754761, + "learning_rate": 6.370601938574637e-05, + "loss": 1.2787, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.5515309572219849, + "learning_rate": 6.282792865198421e-05, + "loss": 1.2867, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.5004201531410217, + "learning_rate": 6.196194106552512e-05, + "loss": 1.2817, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.4434049725532532, + "learning_rate": 6.110788980286329e-05, + "loss": 1.2846, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.4570600688457489, + "learning_rate": 6.026561033990159e-05, + "loss": 1.278, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.5288087129592896, + "learning_rate": 5.943494042025771e-05, + "loss": 1.2829, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.45638394355773926, + "learning_rate": 5.8615720024007174e-05, + "loss": 1.2749, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.4415667653083801, + "learning_rate": 5.780779133685717e-05, + "loss": 1.2858, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.4950889050960541, + "learning_rate": 5.701099871974525e-05, + "loss": 1.2817, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.46818482875823975, + "learning_rate": 5.6225188678857095e-05, + "loss": 1.2943, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.4480957090854645, + "learning_rate": 5.545020983605749e-05, + "loss": 1.2903, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.48208552598953247, + "learning_rate": 5.4685912899728965e-05, + "loss": 1.2822, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.4542950987815857, + "learning_rate": 5.39321506360123e-05, + "loss": 1.274, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.5314316153526306, + "learning_rate": 5.318877784044342e-05, + "loss": 1.2908, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.45914578437805176, + "learning_rate": 5.245565130998124e-05, + "loss": 1.2821, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.5519391894340515, + "learning_rate": 5.173262981542119e-05, + "loss": 1.2875, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.4828607141971588, + "learning_rate": 5.101957407418877e-05, + "loss": 1.2801, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.49433600902557373, + "learning_rate": 5.0316346723508287e-05, + "loss": 1.282, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.4978744089603424, + "learning_rate": 4.962281229394129e-05, + "loss": 1.2787, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.47791722416877747, + "learning_rate": 4.893883718328984e-05, + "loss": 1.2897, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.5092481970787048, + "learning_rate": 4.8264289630859386e-05, + "loss": 1.2698, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.48861607909202576, + "learning_rate": 4.759903969207646e-05, + "loss": 1.2742, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.5034738183021545, + "learning_rate": 4.694295921345623e-05, + "loss": 1.2738, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.4502802789211273, + "learning_rate": 4.629592180791501e-05, + "loss": 1.2835, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.4390706717967987, + "learning_rate": 4.565780283042316e-05, + "loss": 1.2941, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.47274646162986755, + "learning_rate": 4.502847935399348e-05, + "loss": 1.2843, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.453307569026947, + "learning_rate": 4.440783014600059e-05, + "loss": 1.2832, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.46348509192466736, + "learning_rate": 4.3795735644826776e-05, + "loss": 1.2813, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.46553558111190796, + "learning_rate": 4.319207793682965e-05, + "loss": 1.2763, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.4694517254829407, + "learning_rate": 4.259674073362732e-05, + "loss": 1.288, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.4665028750896454, + "learning_rate": 4.200960934969664e-05, + "loss": 1.2776, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.4893859922885895, + "learning_rate": 4.143057068028024e-05, + "loss": 1.2746, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.5004509687423706, + "learning_rate": 4.0859513179598096e-05, + "loss": 1.2626, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.4732063114643097, + "learning_rate": 4.02963268393593e-05, + "loss": 1.2746, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.47840139269828796, + "learning_rate": 3.974090316757029e-05, + "loss": 1.2802, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.46703195571899414, + "learning_rate": 3.919313516763478e-05, + "loss": 1.2757, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.4635733664035797, + "learning_rate": 3.8652917317742106e-05, + "loss": 1.2735, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.447155237197876, + "learning_rate": 3.812014555053955e-05, + "loss": 1.2928, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.4499013423919678, + "learning_rate": 3.759471723308477e-05, + "loss": 1.2777, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.4745861887931824, + "learning_rate": 3.707653114707471e-05, + "loss": 1.2952, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.45845767855644226, + "learning_rate": 3.6565487469346904e-05, + "loss": 1.2805, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.4781351089477539, + "learning_rate": 3.606148775264958e-05, + "loss": 1.2802, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5536774396896362, + "learning_rate": 3.5564434906676834e-05, + "loss": 1.2789, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.45513197779655457, + "learning_rate": 3.507423317936521e-05, + "loss": 1.283, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.4706946015357971, + "learning_rate": 3.4590788138448004e-05, + "loss": 1.286, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.4533765912055969, + "learning_rate": 3.411400665326393e-05, + "loss": 1.2835, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.45024073123931885, + "learning_rate": 3.364379687681642e-05, + "loss": 1.2734, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.4632062315940857, + "learning_rate": 3.31800682280803e-05, + "loss": 1.2742, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.440384179353714, + "learning_rate": 3.272273137455225e-05, + "loss": 1.2829, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.4651128053665161, + "learning_rate": 3.227169821504187e-05, + "loss": 1.277, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.4590139091014862, + "learning_rate": 3.182688186269985e-05, + "loss": 1.277, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.46052318811416626, + "learning_rate": 3.138819662828018e-05, + "loss": 1.2807, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.4494156539440155, + "learning_rate": 3.095555800363297e-05, + "loss": 1.2835, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5149436593055725, + "learning_rate": 3.052888264542484e-05, + "loss": 1.2802, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5089802145957947, + "learning_rate": 3.0108088359083675e-05, + "loss": 1.2779, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.44702383875846863, + "learning_rate": 2.9693094082964775e-05, + "loss": 1.2834, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.45987626910209656, + "learning_rate": 2.928381987273507e-05, + "loss": 1.2767, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.47154590487480164, + "learning_rate": 2.8880186885972716e-05, + "loss": 1.2753, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.4788615107536316, + "learning_rate": 2.8482117366978935e-05, + "loss": 1.2744, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.5002933144569397, + "learning_rate": 2.808953463179918e-05, + "loss": 1.2961, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.4627452492713928, + "learning_rate": 2.770236305345076e-05, + "loss": 1.2722, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.44955623149871826, + "learning_rate": 2.732052804735409e-05, + "loss": 1.2822, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.4532535672187805, + "learning_rate": 2.6943956056964773e-05, + "loss": 1.2822, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.4534349739551544, + "learning_rate": 2.6572574539603643e-05, + "loss": 1.2837, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.4556085467338562, + "learning_rate": 2.6206311952482224e-05, + "loss": 1.285, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.4649789333343506, + "learning_rate": 2.584509773892073e-05, + "loss": 1.2854, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.4726730287075043, + "learning_rate": 2.5488862314756066e-05, + "loss": 1.2645, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.46748146414756775, + "learning_rate": 2.513753705493713e-05, + "loss": 1.2753, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.4547409117221832, + "learning_rate": 2.4791054280304972e-05, + "loss": 1.2856, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.48537737131118774, + "learning_rate": 2.4449347244555043e-05, + "loss": 1.2696, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.571936845779419, + "learning_rate": 2.4112350121379255e-05, + "loss": 1.29, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.4952663779258728, + "learning_rate": 2.3779997991785207e-05, + "loss": 1.2768, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.4477560520172119, + "learning_rate": 2.3452226831590232e-05, + "loss": 1.2839, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.4672028124332428, + "learning_rate": 2.3128973499087785e-05, + "loss": 1.283, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.4587574899196625, + "learning_rate": 2.2810175722883866e-05, + "loss": 1.2772, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.4619159996509552, + "learning_rate": 2.2495772089901067e-05, + "loss": 1.2743, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.5093298554420471, + "learning_rate": 2.218570203354799e-05, + "loss": 1.2778, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.46323612332344055, + "learning_rate": 2.187990582205175e-05, + "loss": 1.2786, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.4710935652256012, + "learning_rate": 2.157832454695122e-05, + "loss": 1.2775, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.44593656063079834, + "learning_rate": 2.1280900111748943e-05, + "loss": 1.2686, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.4625449776649475, + "learning_rate": 2.0987575220719476e-05, + "loss": 1.2622, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.44963812828063965, + "learning_rate": 2.069829336787193e-05, + "loss": 1.2732, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.4785225987434387, + "learning_rate": 2.0412998826064695e-05, + "loss": 1.2694, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.4495578706264496, + "learning_rate": 2.0131636636270178e-05, + "loss": 1.2814, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.4522004723548889, + "learning_rate": 1.9854152596987523e-05, + "loss": 1.2688, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.4472133219242096, + "learning_rate": 1.9580493253801253e-05, + "loss": 1.2759, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.46734291315078735, + "learning_rate": 1.9310605889083842e-05, + "loss": 1.2756, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.4891567826271057, + "learning_rate": 1.904443851184018e-05, + "loss": 1.2751, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.4750478267669678, + "learning_rate": 1.87819398476921e-05, + "loss": 1.2718, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.4508619010448456, + "learning_rate": 1.8523059329000848e-05, + "loss": 1.2826, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.4747237265110016, + "learning_rate": 1.826774708512579e-05, + "loss": 1.283, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.4569663405418396, + "learning_rate": 1.8015953932817347e-05, + "loss": 1.2904, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.4401055574417114, + "learning_rate": 1.7767631366742332e-05, + "loss": 1.2843, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.4622843861579895, + "learning_rate": 1.7522731550139926e-05, + "loss": 1.28, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.4621565341949463, + "learning_rate": 1.728120730560641e-05, + "loss": 1.2671, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.4712284207344055, + "learning_rate": 1.704301210600693e-05, + "loss": 1.2721, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.44979676604270935, + "learning_rate": 1.6808100065512536e-05, + "loss": 1.2863, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.4668086767196655, + "learning_rate": 1.657642593076074e-05, + "loss": 1.2601, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.4591745138168335, + "learning_rate": 1.634794507213793e-05, + "loss": 1.2788, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.4544673562049866, + "learning_rate": 1.6122613475181976e-05, + "loss": 1.279, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.43978649377822876, + "learning_rate": 1.590038773210323e-05, + "loss": 1.2751, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.4516960084438324, + "learning_rate": 1.568122503342252e-05, + "loss": 1.2841, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.4651743471622467, + "learning_rate": 1.5465083159724344e-05, + "loss": 1.2691, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.43556979298591614, + "learning_rate": 1.5251920473523708e-05, + "loss": 1.2826, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.3617901802062988, + "learning_rate": 1.5041695911245136e-05, + "loss": 1.2731, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.8335001376290816e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-qwen2/checkpoint-9480/training_args.bin b/saves-qwen2/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..5e62989db3138fa04345cd8727f32437026b2cee --- /dev/null +++ b/saves-qwen2/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77e331b333a2cc321ba889139a24869ba644726fa158ec29ebf5dcfd1ebf1be9 +size 5112 diff --git a/saves-qwen2/config.json b/saves-qwen2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a785d3506e02f39ed9708141afd519ae6edaa6ea --- /dev/null +++ b/saves-qwen2/config.json @@ -0,0 +1,25 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen2", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 2000 +} diff --git a/saves-qwen2/generation_config.json b/saves-qwen2/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ca31b478470766d67155dda554564aa41d8444b --- /dev/null +++ b/saves-qwen2/generation_config.json @@ -0,0 +1,4 @@ +{ + "_from_model_config": true, + "transformers_version": "4.42.4" +} diff --git a/saves-qwen2/model.safetensors b/saves-qwen2/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b8dc8738d5a38d0975e9c9ea3430293d096f5f9c --- /dev/null +++ b/saves-qwen2/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0550995c0bba2a4ac014b832d38ebfcbaf367e0b3c1342b68d8ca1f35a60ea1b +size 8351424 diff --git a/saves-qwen2/result.log b/saves-qwen2/result.log new file mode 100644 index 0000000000000000000000000000000000000000..e890d275a0146c9c706373c4e5c64ac0068fd6a4 --- /dev/null +++ b/saves-qwen2/result.log @@ -0,0 +1 @@ +{'train_runtime': 2149.5137, 'train_samples_per_second': 4515.721, 'train_steps_per_second': 4.41, 'train_loss': 1.5735396882149741, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-qwen2/special_tokens_map.json b/saves-qwen2/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-qwen2/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-qwen2/tokenizer.json b/saves-qwen2/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-qwen2/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-qwen2/tokenizer_config.json b/saves-qwen2/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-qwen2/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-rwkv-cosine/checkpoint-9480/config.json b/saves-rwkv-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fba72b626605f3d995236b195dd7881fa619390d --- /dev/null +++ b/saves-rwkv-cosine/checkpoint-9480/config.json @@ -0,0 +1,22 @@ +{ + "architectures": [ + "RwkvForCausalLM" + ], + "attention_hidden_size": 256, + "bos_token_id": 0, + "context_length": 1024, + "eos_token_id": 0, + "hidden_size": 256, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "rwkv", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "rescale_every": 6, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-rwkv-cosine/checkpoint-9480/generation_config.json b/saves-rwkv-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..14e4f03d0d73dc2707d488ac8f586bd62ef72a7e --- /dev/null +++ b/saves-rwkv-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-rwkv-cosine/checkpoint-9480/model.safetensors b/saves-rwkv-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ca51955a22fb2dec6846962dba6ff879c628ac2 --- /dev/null +++ b/saves-rwkv-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54a81441cd9133223a3db4ef8b8448fde4c5c8e2c41c75b36e115c428aa38707 +size 8894568 diff --git a/saves-rwkv-cosine/checkpoint-9480/optimizer.pt b/saves-rwkv-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..146866ff1586b5424b67d53a9cc4bb4a76d97273 --- /dev/null +++ b/saves-rwkv-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c41d2be281b17560cf8c1f9c033261ade3c47c9f326c429ea8082457ec5da3b +size 17815610 diff --git a/saves-rwkv-cosine/checkpoint-9480/rng_state.pth b/saves-rwkv-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-rwkv-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-rwkv-cosine/checkpoint-9480/scheduler.pt b/saves-rwkv-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..47ca193b702fc31e51e3ee0689a4054b394880b6 --- /dev/null +++ b/saves-rwkv-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f195640e66bde784a0961679ecd73c2a561c5a12962a7316325d731f304936 +size 1064 diff --git a/saves-rwkv-cosine/checkpoint-9480/special_tokens_map.json b/saves-rwkv-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-rwkv-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-rwkv-cosine/checkpoint-9480/tokenizer.json b/saves-rwkv-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-rwkv-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-rwkv-cosine/checkpoint-9480/tokenizer_config.json b/saves-rwkv-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-rwkv-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-rwkv-cosine/checkpoint-9480/trainer_state.json b/saves-rwkv-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..471dbd7267ff090ec3d651219a36cff764fb751e --- /dev/null +++ b/saves-rwkv-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 18.722108840942383, + "learning_rate": 0.00015789473684210527, + "loss": 250.2608, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 45.634803771972656, + "learning_rate": 0.00031578947368421053, + "loss": 232.0468, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 96.65724182128906, + "learning_rate": 0.0004421052631578947, + "loss": 115.5184, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 23.42929458618164, + "learning_rate": 0.0006000000000000001, + "loss": 45.9972, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 9.147716522216797, + "learning_rate": 0.0007578947368421053, + "loss": 28.6208, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 5.247542381286621, + "learning_rate": 0.0009157894736842105, + "loss": 20.7648, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 3.367560386657715, + "learning_rate": 0.0010736842105263159, + "loss": 16.0778, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 2.61910343170166, + "learning_rate": 0.001231578947368421, + "loss": 12.4211, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.9960277080535889, + "learning_rate": 0.0013894736842105264, + "loss": 9.6581, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 2.2125940322875977, + "learning_rate": 0.001499999621814449, + "loss": 7.7822, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 2.806112766265869, + "learning_rate": 0.0014999928985263743, + "loss": 6.5887, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 1.7982168197631836, + "learning_rate": 0.0014999777712016607, + "loss": 5.917, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 4.552407264709473, + "learning_rate": 0.0014999542400098169, + "loss": 5.4641, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.6131575107574463, + "learning_rate": 0.0014999223052145215, + "loss": 5.1688, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 1.692301869392395, + "learning_rate": 0.0014998819671736198, + "loss": 4.986, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 3.1329479217529297, + "learning_rate": 0.0014998332263391192, + "loss": 4.8732, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 3.0287599563598633, + "learning_rate": 0.0014997760832571839, + "loss": 4.7395, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 3.261260986328125, + "learning_rate": 0.0014997105385681306, + "loss": 4.6606, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 2.0352933406829834, + "learning_rate": 0.0014996365930064197, + "loss": 4.5638, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 1.9503201246261597, + "learning_rate": 0.001499554247400647, + "loss": 4.485, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 1.7879048585891724, + "learning_rate": 0.001499463502673535, + "loss": 4.4099, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 2.812309741973877, + "learning_rate": 0.0014993643598419234, + "loss": 4.3488, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 1.129703402519226, + "learning_rate": 0.001499256820016755, + "loss": 4.2425, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 1.2690770626068115, + "learning_rate": 0.0014991408844030672, + "loss": 4.1608, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 1.8461711406707764, + "learning_rate": 0.0014990165542999746, + "loss": 4.0789, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 1.1240200996398926, + "learning_rate": 0.0014988838311006565, + "loss": 4.0108, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 1.4990788698196411, + "learning_rate": 0.0014987427162923416, + "loss": 3.9503, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 3.3958492279052734, + "learning_rate": 0.0014985932114562896, + "loss": 3.9145, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 2.517460823059082, + "learning_rate": 0.0014984353182677759, + "loss": 3.9125, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 1.1665889024734497, + "learning_rate": 0.0014982690384960705, + "loss": 3.8308, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 1.1971615552902222, + "learning_rate": 0.0014980943740044196, + "loss": 3.7881, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 1.3058909177780151, + "learning_rate": 0.0014979113267500235, + "loss": 3.7542, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 1.5051803588867188, + "learning_rate": 0.0014977198987840168, + "loss": 3.7379, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 1.3812662363052368, + "learning_rate": 0.0014975200922514428, + "loss": 3.6975, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 1.181865930557251, + "learning_rate": 0.0014973119093912317, + "loss": 3.6456, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 1.8039225339889526, + "learning_rate": 0.0014970953525361738, + "loss": 3.6184, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.578270673751831, + "learning_rate": 0.0014968704241128947, + "loss": 3.5951, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 1.3508307933807373, + "learning_rate": 0.0014966371266418267, + "loss": 3.5884, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.1480920314788818, + "learning_rate": 0.0014963954627371823, + "loss": 3.5642, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 1.7993396520614624, + "learning_rate": 0.0014961454351069233, + "loss": 3.5283, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 1.3833714723587036, + "learning_rate": 0.0014958870465527317, + "loss": 3.5122, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 1.0338820219039917, + "learning_rate": 0.0014956202999699773, + "loss": 3.4873, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.71613609790802, + "learning_rate": 0.0014953451983476854, + "loss": 3.4505, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 1.5218381881713867, + "learning_rate": 0.0014950617447685047, + "loss": 3.4329, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 1.8887343406677246, + "learning_rate": 0.0014947699424086704, + "loss": 3.4354, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 1.257785439491272, + "learning_rate": 0.0014944697945379708, + "loss": 3.4184, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 1.844351887702942, + "learning_rate": 0.001494161304519709, + "loss": 3.3885, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.202177882194519, + "learning_rate": 0.0014938444758106665, + "loss": 3.3886, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 1.5565966367721558, + "learning_rate": 0.0014935193119610638, + "loss": 3.3534, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 1.0104739665985107, + "learning_rate": 0.0014931858166145203, + "loss": 3.3435, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 1.067876935005188, + "learning_rate": 0.0014928439935080143, + "loss": 3.3297, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.5259475708007812, + "learning_rate": 0.001492493846471841, + "loss": 3.2978, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.5997281074523926, + "learning_rate": 0.0014921353794295684, + "loss": 3.2827, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.7401397824287415, + "learning_rate": 0.0014917685963979949, + "loss": 3.2662, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 1.1510061025619507, + "learning_rate": 0.0014913935014871035, + "loss": 3.272, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.47159454226493835, + "learning_rate": 0.0014910100989000159, + "loss": 3.2197, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 1.1835722923278809, + "learning_rate": 0.0014906183929329455, + "loss": 3.222, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.9146947860717773, + "learning_rate": 0.0014902183879751483, + "loss": 3.2406, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.858796238899231, + "learning_rate": 0.0014898100885088754, + "loss": 3.2066, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.8963838815689087, + "learning_rate": 0.0014893934991093221, + "loss": 3.1846, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.6229057312011719, + "learning_rate": 0.0014889686244445755, + "loss": 3.1745, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.7667859792709351, + "learning_rate": 0.0014885354692755642, + "loss": 3.1698, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.7055838108062744, + "learning_rate": 0.0014880940384560028, + "loss": 3.1488, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.7363503575325012, + "learning_rate": 0.0014876443369323397, + "loss": 3.1544, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.8960818648338318, + "learning_rate": 0.0014871863697436998, + "loss": 3.1446, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.8842419385910034, + "learning_rate": 0.0014867201420218292, + "loss": 3.1299, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.9691068530082703, + "learning_rate": 0.0014862456589910368, + "loss": 3.1157, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 1.1290061473846436, + "learning_rate": 0.001485762925968137, + "loss": 3.1027, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.723625123500824, + "learning_rate": 0.0014852719483623893, + "loss": 3.1158, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.8670489192008972, + "learning_rate": 0.0014847727316754367, + "loss": 3.0894, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.6375906467437744, + "learning_rate": 0.0014842652815012466, + "loss": 3.0809, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.5091776251792908, + "learning_rate": 0.0014837496035260457, + "loss": 3.0608, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.5687161087989807, + "learning_rate": 0.0014832257035282577, + "loss": 3.0499, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.6067019701004028, + "learning_rate": 0.0014826935873784378, + "loss": 3.0612, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.5617740750312805, + "learning_rate": 0.001482153261039207, + "loss": 3.0396, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 1.2799808979034424, + "learning_rate": 0.0014816047305651863, + "loss": 3.0417, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.9442919492721558, + "learning_rate": 0.001481048002102927, + "loss": 3.0303, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.8312678933143616, + "learning_rate": 0.0014804830818908438, + "loss": 3.0274, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.5618844628334045, + "learning_rate": 0.0014799099762591434, + "loss": 3.0304, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.8328171968460083, + "learning_rate": 0.001479328691629754, + "loss": 3.0064, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.8458903431892395, + "learning_rate": 0.0014787392345162538, + "loss": 2.9981, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.7068672180175781, + "learning_rate": 0.0014781416115237976, + "loss": 3.0036, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.5836540460586548, + "learning_rate": 0.001477535829349043, + "loss": 3.0002, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.47853389382362366, + "learning_rate": 0.0014769218947800749, + "loss": 2.9726, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.5822595357894897, + "learning_rate": 0.00147629981469633, + "loss": 2.9776, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.5078749060630798, + "learning_rate": 0.0014756695960685194, + "loss": 2.9745, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.49856433272361755, + "learning_rate": 0.0014750312459585505, + "loss": 2.9699, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.509080171585083, + "learning_rate": 0.001474384771519448, + "loss": 2.9672, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.6919294595718384, + "learning_rate": 0.0014737301799952734, + "loss": 2.9658, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.8457950353622437, + "learning_rate": 0.0014730674787210448, + "loss": 2.9596, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.6739800572395325, + "learning_rate": 0.0014723966751226535, + "loss": 2.955, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.5299966931343079, + "learning_rate": 0.0014717177767167812, + "loss": 2.9466, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.5009211301803589, + "learning_rate": 0.0014710307911108159, + "loss": 2.9299, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.5819326043128967, + "learning_rate": 0.0014703357260027667, + "loss": 2.9301, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.9864362478256226, + "learning_rate": 0.001469632589181178, + "loss": 2.9159, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.7671509981155396, + "learning_rate": 0.0014689213885250411, + "loss": 2.9248, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.9565985798835754, + "learning_rate": 0.0014682021320037064, + "loss": 2.9256, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.6852691769599915, + "learning_rate": 0.0014674748276767944, + "loss": 2.9027, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.7652585506439209, + "learning_rate": 0.0014667394836941055, + "loss": 2.9037, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.595356822013855, + "learning_rate": 0.0014659961082955277, + "loss": 2.9052, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.7724465131759644, + "learning_rate": 0.0014652447098109458, + "loss": 2.9051, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.48912882804870605, + "learning_rate": 0.0014644852966601463, + "loss": 2.8938, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.6455403566360474, + "learning_rate": 0.0014637178773527246, + "loss": 2.8856, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.799552857875824, + "learning_rate": 0.0014629424604879885, + "loss": 2.882, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.651103675365448, + "learning_rate": 0.001462159054754863, + "loss": 2.8923, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.6565688848495483, + "learning_rate": 0.0014613676689317916, + "loss": 2.8762, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.5928733348846436, + "learning_rate": 0.0014605683118866387, + "loss": 2.8683, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.4926123023033142, + "learning_rate": 0.0014597609925765906, + "loss": 2.8673, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.5748282670974731, + "learning_rate": 0.0014589457200480543, + "loss": 2.8632, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.5291131734848022, + "learning_rate": 0.0014581225034365564, + "loss": 2.8556, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.4873255491256714, + "learning_rate": 0.0014572913519666417, + "loss": 2.8616, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.6300792694091797, + "learning_rate": 0.001456452274951767, + "loss": 2.8538, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.562624990940094, + "learning_rate": 0.0014556052817942013, + "loss": 2.8401, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.5021618604660034, + "learning_rate": 0.0014547503819849154, + "loss": 2.8561, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.5232928395271301, + "learning_rate": 0.0014538875851034798, + "loss": 2.8326, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.37592458724975586, + "learning_rate": 0.0014530169008179546, + "loss": 2.8466, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.5733973979949951, + "learning_rate": 0.0014521383388847824, + "loss": 2.8269, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.8537278175354004, + "learning_rate": 0.0014512519091486786, + "loss": 2.8214, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.8803452849388123, + "learning_rate": 0.0014503576215425212, + "loss": 2.8337, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.6182850003242493, + "learning_rate": 0.0014494554860872398, + "loss": 2.8342, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.6760320663452148, + "learning_rate": 0.001448545512891702, + "loss": 2.8313, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.75913006067276, + "learning_rate": 0.0014476277121526027, + "loss": 2.8183, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.5579585433006287, + "learning_rate": 0.0014467020941543464, + "loss": 2.8064, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.4450322091579437, + "learning_rate": 0.0014457686692689355, + "loss": 2.8159, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.4494950771331787, + "learning_rate": 0.0014448274479558513, + "loss": 2.8112, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.5273593664169312, + "learning_rate": 0.001443878440761938, + "loss": 2.8012, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.545518696308136, + "learning_rate": 0.001442921658321285, + "loss": 2.7987, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.6408368349075317, + "learning_rate": 0.0014419571113551063, + "loss": 2.8113, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.7538550496101379, + "learning_rate": 0.001440984810671622, + "loss": 2.7978, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.49550363421440125, + "learning_rate": 0.001440004767165936, + "loss": 2.7801, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.45245295763015747, + "learning_rate": 0.001439016991819914, + "loss": 2.7844, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.7467061281204224, + "learning_rate": 0.0014380214957020613, + "loss": 2.7886, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.5311141610145569, + "learning_rate": 0.0014370182899673982, + "loss": 2.78, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.5393499732017517, + "learning_rate": 0.0014360073858573341, + "loss": 2.7859, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.4717414081096649, + "learning_rate": 0.0014349887946995441, + "loss": 2.787, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.6122130751609802, + "learning_rate": 0.0014339625279078388, + "loss": 2.7703, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.6104434132575989, + "learning_rate": 0.0014329285969820389, + "loss": 2.7759, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.617061972618103, + "learning_rate": 0.0014318870135078452, + "loss": 2.7664, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.4779430031776428, + "learning_rate": 0.0014308377891567095, + "loss": 2.7704, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.7619314193725586, + "learning_rate": 0.0014297809356857026, + "loss": 2.7582, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.6303430795669556, + "learning_rate": 0.0014287164649373837, + "loss": 2.7609, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.7697232961654663, + "learning_rate": 0.0014276443888396675, + "loss": 2.7634, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.6786683201789856, + "learning_rate": 0.00142656471940569, + "loss": 2.7572, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.509779155254364, + "learning_rate": 0.0014254774687336744, + "loss": 2.7483, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.47729647159576416, + "learning_rate": 0.0014243826490067954, + "loss": 2.7462, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.7989035844802856, + "learning_rate": 0.0014232802724930427, + "loss": 2.746, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.8849595785140991, + "learning_rate": 0.0014221703515450834, + "loss": 2.7424, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.6138654947280884, + "learning_rate": 0.0014210528986001237, + "loss": 2.744, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.4273254871368408, + "learning_rate": 0.0014199279261797692, + "loss": 2.7382, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.49582597613334656, + "learning_rate": 0.0014187954468898854, + "loss": 2.7391, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.8283353447914124, + "learning_rate": 0.0014176554734204557, + "loss": 2.7403, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.6724714636802673, + "learning_rate": 0.0014165080185454396, + "loss": 2.7439, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.5909962058067322, + "learning_rate": 0.001415353095122629, + "loss": 2.7205, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.44444817304611206, + "learning_rate": 0.001414190716093505, + "loss": 2.7094, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.7448601126670837, + "learning_rate": 0.0014130208944830923, + "loss": 2.7138, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.47710293531417847, + "learning_rate": 0.0014118436433998127, + "loss": 2.7169, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.42166373133659363, + "learning_rate": 0.00141065897603534, + "loss": 2.7063, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.7401782274246216, + "learning_rate": 0.0014094669056644502, + "loss": 2.7127, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.7469227910041809, + "learning_rate": 0.0014082674456448738, + "loss": 2.719, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.7091121673583984, + "learning_rate": 0.0014070606094171464, + "loss": 2.7143, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.44061294198036194, + "learning_rate": 0.0014058464105044567, + "loss": 2.7112, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.48778235912323, + "learning_rate": 0.001404624862512497, + "loss": 2.6848, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.5753692388534546, + "learning_rate": 0.0014033959791293082, + "loss": 2.6968, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.4225934147834778, + "learning_rate": 0.0014021597741251295, + "loss": 2.6944, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.9755870699882507, + "learning_rate": 0.001400916261352241, + "loss": 2.691, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.6635668277740479, + "learning_rate": 0.0013996654547448106, + "loss": 2.7012, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.6195884943008423, + "learning_rate": 0.0013984073683187374, + "loss": 2.6978, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.5282768607139587, + "learning_rate": 0.001397142016171494, + "loss": 2.6839, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.5947687029838562, + "learning_rate": 0.0013958694124819688, + "loss": 2.6742, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.5252786874771118, + "learning_rate": 0.0013945895715103077, + "loss": 2.6801, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.44888177514076233, + "learning_rate": 0.0013933025075977539, + "loss": 2.6707, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.44331827759742737, + "learning_rate": 0.0013920082351664867, + "loss": 2.6633, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.39956197142601013, + "learning_rate": 0.0013907067687194607, + "loss": 2.6645, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.4587402939796448, + "learning_rate": 0.001389398122840243, + "loss": 2.6694, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.5803056955337524, + "learning_rate": 0.0013880823121928498, + "loss": 2.6838, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 1.516646146774292, + "learning_rate": 0.001386759351521582, + "loss": 2.6703, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.860973060131073, + "learning_rate": 0.0013854292556508593, + "loss": 2.6763, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.558430016040802, + "learning_rate": 0.001384092039485056, + "loss": 2.6675, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.508121907711029, + "learning_rate": 0.001382747718008332, + "loss": 2.6582, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.49521535634994507, + "learning_rate": 0.001381396306284466, + "loss": 2.6617, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.4005928337574005, + "learning_rate": 0.0013800378194566856, + "loss": 2.6481, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.4798736274242401, + "learning_rate": 0.0013786722727474998, + "loss": 2.641, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.4158448874950409, + "learning_rate": 0.0013772996814585261, + "loss": 2.6342, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.4519105553627014, + "learning_rate": 0.0013759200609703196, + "loss": 2.6385, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.44247961044311523, + "learning_rate": 0.001374533426742202, + "loss": 2.6457, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.9962069392204285, + "learning_rate": 0.0013731397943120868, + "loss": 2.647, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.780658483505249, + "learning_rate": 0.0013717391792963062, + "loss": 2.6434, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.642623245716095, + "learning_rate": 0.0013703315973894346, + "loss": 2.6352, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.533208966255188, + "learning_rate": 0.001368917064364115, + "loss": 2.6246, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.47372129559516907, + "learning_rate": 0.0013674955960708808, + "loss": 2.6288, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.5056690573692322, + "learning_rate": 0.0013660672084379781, + "loss": 2.6271, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.5467988848686218, + "learning_rate": 0.0013646319174711878, + "loss": 2.6243, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.5231145024299622, + "learning_rate": 0.0013631897392536463, + "loss": 2.6228, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.5767257809638977, + "learning_rate": 0.001361740689945664, + "loss": 2.6136, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.45038655400276184, + "learning_rate": 0.0013602847857845466, + "loss": 2.6098, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.776145875453949, + "learning_rate": 0.001358822043084411, + "loss": 2.6184, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.676280677318573, + "learning_rate": 0.0013573524782360034, + "loss": 2.622, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.6665925979614258, + "learning_rate": 0.0013558761077065154, + "loss": 2.6042, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.5586668848991394, + "learning_rate": 0.0013543929480393994, + "loss": 2.6043, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.5482926368713379, + "learning_rate": 0.0013529030158541842, + "loss": 2.601, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.42713695764541626, + "learning_rate": 0.001351406327846287, + "loss": 2.6056, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.3917507827281952, + "learning_rate": 0.0013499029007868284, + "loss": 2.6081, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.45374611020088196, + "learning_rate": 0.0013483927515224418, + "loss": 2.597, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.4193324148654938, + "learning_rate": 0.001346875896975088, + "loss": 2.5902, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.7912830114364624, + "learning_rate": 0.0013453523541418623, + "loss": 2.5924, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.5670852065086365, + "learning_rate": 0.001343822140094806, + "loss": 2.5825, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.8157739043235779, + "learning_rate": 0.001342285271980715, + "loss": 2.5957, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.5087153911590576, + "learning_rate": 0.0013407417670209467, + "loss": 2.5935, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.42263007164001465, + "learning_rate": 0.001339191642511228, + "loss": 2.5776, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.6196709275245667, + "learning_rate": 0.0013376349158214609, + "loss": 2.5818, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.5099305510520935, + "learning_rate": 0.001336071604395528, + "loss": 2.5718, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.548096776008606, + "learning_rate": 0.0013345017257510975, + "loss": 2.5651, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.7013881206512451, + "learning_rate": 0.0013329252974794256, + "loss": 2.5704, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.7780211567878723, + "learning_rate": 0.0013313423372451614, + "loss": 2.5704, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.4621606469154358, + "learning_rate": 0.001329752862786147, + "loss": 2.5638, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.5396859645843506, + "learning_rate": 0.0013281568919132198, + "loss": 2.575, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.4384825527667999, + "learning_rate": 0.0013265544425100128, + "loss": 2.552, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.7992722988128662, + "learning_rate": 0.001324945532532754, + "loss": 2.5482, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.7087538838386536, + "learning_rate": 0.0013233301800100652, + "loss": 2.5681, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.5795441269874573, + "learning_rate": 0.0013217084030427604, + "loss": 2.5581, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.4453881084918976, + "learning_rate": 0.001320080219803642, + "loss": 2.5398, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.45181000232696533, + "learning_rate": 0.0013184456485372986, + "loss": 2.5489, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.526557207107544, + "learning_rate": 0.0013168047075598993, + "loss": 2.5359, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.42824089527130127, + "learning_rate": 0.0013151574152589888, + "loss": 2.5325, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.42745351791381836, + "learning_rate": 0.0013135037900932822, + "loss": 2.5352, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.5970489978790283, + "learning_rate": 0.0013118438505924563, + "loss": 2.5287, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.5591424703598022, + "learning_rate": 0.001310177615356944, + "loss": 2.5415, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.49273398518562317, + "learning_rate": 0.0013085051030577246, + "loss": 2.5364, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.5714364647865295, + "learning_rate": 0.0013068263324361156, + "loss": 2.5252, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.5370242595672607, + "learning_rate": 0.0013051413223035607, + "loss": 2.5201, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.466673344373703, + "learning_rate": 0.0013034500915414218, + "loss": 2.5249, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.46676868200302124, + "learning_rate": 0.001301752659100765, + "loss": 2.524, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.5667905211448669, + "learning_rate": 0.0013000490440021502, + "loss": 2.5134, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.5045638084411621, + "learning_rate": 0.001298339265335416, + "loss": 2.5055, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.4429648816585541, + "learning_rate": 0.001296623342259467, + "loss": 2.499, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.6394183039665222, + "learning_rate": 0.0012949012940020599, + "loss": 2.5088, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.627779483795166, + "learning_rate": 0.0012931731398595854, + "loss": 2.5108, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.4193165898323059, + "learning_rate": 0.001291438899196855, + "loss": 2.5044, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.4456976056098938, + "learning_rate": 0.001289698591446882, + "loss": 2.4964, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.43345141410827637, + "learning_rate": 0.0012879522361106646, + "loss": 2.4958, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.5602006316184998, + "learning_rate": 0.001286199852756967, + "loss": 2.4974, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.552234947681427, + "learning_rate": 0.0012844414610221006, + "loss": 2.4905, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.45673105120658875, + "learning_rate": 0.001282677080609703, + "loss": 2.4874, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.4601892828941345, + "learning_rate": 0.0012809067312905182, + "loss": 2.4883, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.40713736414909363, + "learning_rate": 0.0012791304329021751, + "loss": 2.4729, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.3938315212726593, + "learning_rate": 0.0012773482053489642, + "loss": 2.4892, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.5014756321907043, + "learning_rate": 0.0012755600686016155, + "loss": 2.4756, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.5351433753967285, + "learning_rate": 0.0012737660426970748, + "loss": 2.4776, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.4729582369327545, + "learning_rate": 0.0012719661477382778, + "loss": 2.4675, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.5881186127662659, + "learning_rate": 0.0012701604038939268, + "loss": 2.4741, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.40729084610939026, + "learning_rate": 0.0012683488313982628, + "loss": 2.4602, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.43295612931251526, + "learning_rate": 0.0012665314505508406, + "loss": 2.4631, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.4432455003261566, + "learning_rate": 0.0012647082817162998, + "loss": 2.4582, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.41734954714775085, + "learning_rate": 0.0012628793453241377, + "loss": 2.4614, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.477624773979187, + "learning_rate": 0.0012610446618684793, + "loss": 2.4514, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.4708152115345001, + "learning_rate": 0.0012592042519078486, + "loss": 2.4579, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.8564942479133606, + "learning_rate": 0.001257358136064938, + "loss": 2.4608, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.5224834084510803, + "learning_rate": 0.0012555063350263768, + "loss": 2.4603, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.4171922504901886, + "learning_rate": 0.0012536488695425003, + "loss": 2.4573, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.36475786566734314, + "learning_rate": 0.0012517857604271156, + "loss": 2.4519, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.4186481833457947, + "learning_rate": 0.0012499170285572702, + "loss": 2.4501, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.4235629439353943, + "learning_rate": 0.0012480426948730174, + "loss": 2.4474, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.7054567933082581, + "learning_rate": 0.0012461627803771812, + "loss": 2.4354, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.6245128512382507, + "learning_rate": 0.0012442773061351216, + "loss": 2.4496, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.5391449332237244, + "learning_rate": 0.001242386293274498, + "loss": 2.4356, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.40505120158195496, + "learning_rate": 0.001240489762985033, + "loss": 2.4307, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.40098732709884644, + "learning_rate": 0.0012385877365182743, + "loss": 2.4305, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.427351176738739, + "learning_rate": 0.0012366802351873574, + "loss": 2.4313, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.46775996685028076, + "learning_rate": 0.0012347672803667662, + "loss": 2.4166, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.424709677696228, + "learning_rate": 0.0012328488934920932, + "loss": 2.4224, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.717592179775238, + "learning_rate": 0.0012309250960598, + "loss": 2.4292, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.4441307485103607, + "learning_rate": 0.0012289959096269767, + "loss": 2.4257, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.4283949136734009, + "learning_rate": 0.0012270613558110993, + "loss": 2.4061, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.5827478170394897, + "learning_rate": 0.0012251214562897872, + "loss": 2.4204, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.4936122000217438, + "learning_rate": 0.0012231762328005623, + "loss": 2.4165, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.5221071839332581, + "learning_rate": 0.0012212257071406037, + "loss": 2.4187, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.4400736093521118, + "learning_rate": 0.0012192699011665034, + "loss": 2.4159, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.4372219443321228, + "learning_rate": 0.0012173088367940228, + "loss": 2.4162, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.49315884709358215, + "learning_rate": 0.0012153425359978452, + "loss": 2.4093, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.47952815890312195, + "learning_rate": 0.0012133710208113318, + "loss": 2.3991, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.6401770710945129, + "learning_rate": 0.0012113943133262722, + "loss": 2.4039, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.4167127311229706, + "learning_rate": 0.0012094124356926397, + "loss": 2.4064, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.9363223910331726, + "learning_rate": 0.0012074254101183408, + "loss": 2.3924, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.6671643257141113, + "learning_rate": 0.0012054332588689667, + "loss": 2.3839, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.4426012933254242, + "learning_rate": 0.0012034360042675453, + "loss": 2.3935, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.46842101216316223, + "learning_rate": 0.0012014336686942898, + "loss": 2.3883, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.4300665557384491, + "learning_rate": 0.0011994262745863478, + "loss": 2.3896, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.46797651052474976, + "learning_rate": 0.00119741384443755, + "loss": 2.3752, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.41439276933670044, + "learning_rate": 0.0011953964007981601, + "loss": 2.3762, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.45518720149993896, + "learning_rate": 0.0011933739662746178, + "loss": 2.3855, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.4685756266117096, + "learning_rate": 0.0011913465635292903, + "loss": 2.3802, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.4205142557621002, + "learning_rate": 0.0011893142152802152, + "loss": 2.3782, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.42350858449935913, + "learning_rate": 0.0011872769443008466, + "loss": 2.3663, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.3990439474582672, + "learning_rate": 0.001185234773419801, + "loss": 2.3785, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.4046371281147003, + "learning_rate": 0.0011831877255206002, + "loss": 2.3809, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.41716068983078003, + "learning_rate": 0.0011811358235414154, + "loss": 2.3679, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.3807591497898102, + "learning_rate": 0.0011790790904748103, + "loss": 2.3547, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.4075852632522583, + "learning_rate": 0.0011770175493674827, + "loss": 2.3634, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.4196593463420868, + "learning_rate": 0.0011749512233200081, + "loss": 2.3753, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.4466218948364258, + "learning_rate": 0.0011728801354865786, + "loss": 2.3565, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.38949665427207947, + "learning_rate": 0.0011708043090747442, + "loss": 2.3629, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.6327291131019592, + "learning_rate": 0.0011687237673451538, + "loss": 2.3568, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.4857200086116791, + "learning_rate": 0.0011666385336112934, + "loss": 2.3568, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.4315875768661499, + "learning_rate": 0.001164548631239225, + "loss": 2.359, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.4319855570793152, + "learning_rate": 0.0011624540836473252, + "loss": 2.3497, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.4156710207462311, + "learning_rate": 0.0011603549143060225, + "loss": 2.3582, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.41173264384269714, + "learning_rate": 0.0011582511467375346, + "loss": 2.344, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.49878790974617004, + "learning_rate": 0.0011561428045156043, + "loss": 2.3371, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.41194814443588257, + "learning_rate": 0.0011540299112652351, + "loss": 2.3437, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.4208649694919586, + "learning_rate": 0.0011519124906624284, + "loss": 2.3314, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.4466190040111542, + "learning_rate": 0.0011497905664339153, + "loss": 2.3373, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.41847074031829834, + "learning_rate": 0.0011476641623568934, + "loss": 2.3375, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.5314446091651917, + "learning_rate": 0.0011455333022587582, + "loss": 2.3468, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.43514466285705566, + "learning_rate": 0.0011433980100168382, + "loss": 2.3268, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.42210546135902405, + "learning_rate": 0.0011412583095581248, + "loss": 2.3321, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.4617511034011841, + "learning_rate": 0.001139114224859007, + "loss": 2.3365, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.4712236523628235, + "learning_rate": 0.0011369657799450005, + "loss": 2.3367, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.46658217906951904, + "learning_rate": 0.0011348129988904797, + "loss": 2.3314, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.42238283157348633, + "learning_rate": 0.0011326559058184075, + "loss": 2.324, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.40795573592185974, + "learning_rate": 0.001130494524900065, + "loss": 2.3244, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.44495320320129395, + "learning_rate": 0.0011283288803547809, + "loss": 2.3319, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.4128229320049286, + "learning_rate": 0.0011261589964496597, + "loss": 2.3141, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.425805926322937, + "learning_rate": 0.0011239848974993103, + "loss": 2.3142, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.41521772742271423, + "learning_rate": 0.0011218066078655725, + "loss": 2.3, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.4224522113800049, + "learning_rate": 0.0011196241519572457, + "loss": 2.3164, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.4201813340187073, + "learning_rate": 0.0011174375542298142, + "loss": 2.3096, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.390402227640152, + "learning_rate": 0.0011152468391851724, + "loss": 2.3219, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.422137588262558, + "learning_rate": 0.0011130520313713528, + "loss": 2.3167, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.42554888129234314, + "learning_rate": 0.0011108531553822485, + "loss": 2.3065, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.4574882686138153, + "learning_rate": 0.0011086502358573387, + "loss": 2.3123, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.5798978805541992, + "learning_rate": 0.001106443297481412, + "loss": 2.3202, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.4273766279220581, + "learning_rate": 0.001104232364984291, + "loss": 2.3147, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.428865522146225, + "learning_rate": 0.0011020174631405533, + "loss": 2.3001, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.4030214548110962, + "learning_rate": 0.001099798616769256, + "loss": 2.3048, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.45166194438934326, + "learning_rate": 0.001097575850733656, + "loss": 2.297, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.4285772740840912, + "learning_rate": 0.0010953491899409321, + "loss": 2.2878, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.45250067114830017, + "learning_rate": 0.0010931186593419059, + "loss": 2.298, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.512752115726471, + "learning_rate": 0.0010908842839307614, + "loss": 2.3001, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.5028396248817444, + "learning_rate": 0.0010886460887447667, + "loss": 2.2968, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.4306299686431885, + "learning_rate": 0.0010864040988639912, + "loss": 2.3039, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.47806107997894287, + "learning_rate": 0.0010841583394110266, + "loss": 2.2905, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.6504234075546265, + "learning_rate": 0.0010819088355507043, + "loss": 2.2849, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.4498230516910553, + "learning_rate": 0.0010796556124898127, + "loss": 2.2874, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.4281620681285858, + "learning_rate": 0.0010773986954768172, + "loss": 2.28, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.45419439673423767, + "learning_rate": 0.0010751381098015747, + "loss": 2.2857, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.5198122262954712, + "learning_rate": 0.0010728738807950515, + "loss": 2.2779, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.39555108547210693, + "learning_rate": 0.001070606033829039, + "loss": 2.2811, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.4088149666786194, + "learning_rate": 0.00106833459431587, + "loss": 2.2885, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.4838487505912781, + "learning_rate": 0.0010660595877081335, + "loss": 2.2813, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.5017942190170288, + "learning_rate": 0.0010637810394983893, + "loss": 2.2677, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.4555388391017914, + "learning_rate": 0.0010614989752188823, + "loss": 2.2788, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.4430163502693176, + "learning_rate": 0.0010592134204412578, + "loss": 2.2668, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.5356369614601135, + "learning_rate": 0.0010569244007762723, + "loss": 2.2848, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.49036598205566406, + "learning_rate": 0.0010546319418735094, + "loss": 2.2857, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.42299696803092957, + "learning_rate": 0.00105233606942109, + "loss": 2.2655, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.4656181037425995, + "learning_rate": 0.0010500368091453864, + "loss": 2.2624, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.46805286407470703, + "learning_rate": 0.0010477341868107327, + "loss": 2.2649, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.3904317617416382, + "learning_rate": 0.0010454282282191362, + "loss": 2.2644, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.4970293641090393, + "learning_rate": 0.001043118959209989, + "loss": 2.2681, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.3930929899215698, + "learning_rate": 0.001040806405659778, + "loss": 2.2532, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.4014798402786255, + "learning_rate": 0.001038490593481795, + "loss": 2.2544, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.49569305777549744, + "learning_rate": 0.001036171548625846, + "loss": 2.2641, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.4134742319583893, + "learning_rate": 0.0010338492970779606, + "loss": 2.26, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.40861770510673523, + "learning_rate": 0.0010315238648601025, + "loss": 2.2565, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.4085552394390106, + "learning_rate": 0.001029195278029874, + "loss": 2.2623, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.4927079379558563, + "learning_rate": 0.0010268635626802282, + "loss": 2.2573, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.4324089288711548, + "learning_rate": 0.001024528744939174, + "loss": 2.2457, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.5047134160995483, + "learning_rate": 0.0010221908509694842, + "loss": 2.2454, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.5293835401535034, + "learning_rate": 0.0010198499069684023, + "loss": 2.253, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.48898184299468994, + "learning_rate": 0.0010175059391673486, + "loss": 2.2604, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.5082082748413086, + "learning_rate": 0.0010151589738316275, + "loss": 2.2461, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.43640613555908203, + "learning_rate": 0.001012809037260131, + "loss": 2.2309, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.47532567381858826, + "learning_rate": 0.0010104561557850457, + "loss": 2.2454, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.4522501528263092, + "learning_rate": 0.0010081003557715583, + "loss": 2.2434, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.46055611968040466, + "learning_rate": 0.0010057416636175575, + "loss": 2.2437, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.4741487205028534, + "learning_rate": 0.0010033801057533404, + "loss": 2.2416, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.4095255434513092, + "learning_rate": 0.0010010157086413167, + "loss": 2.2471, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.39936351776123047, + "learning_rate": 0.0009986484987757102, + "loss": 2.2322, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.4228968918323517, + "learning_rate": 0.0009962785026822632, + "loss": 2.2409, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.43429166078567505, + "learning_rate": 0.0009939057469179394, + "loss": 2.2431, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.457498699426651, + "learning_rate": 0.0009915302580706256, + "loss": 2.2325, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.3893581032752991, + "learning_rate": 0.0009891520627588342, + "loss": 2.225, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.4595489799976349, + "learning_rate": 0.0009867711876314052, + "loss": 2.2202, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.44134455919265747, + "learning_rate": 0.0009843876593672064, + "loss": 2.2384, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.5476853251457214, + "learning_rate": 0.0009820015046748366, + "loss": 2.2259, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.4314012825489044, + "learning_rate": 0.0009796127502923232, + "loss": 2.2391, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.4098243713378906, + "learning_rate": 0.0009772214229868265, + "loss": 2.2215, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.4241116940975189, + "learning_rate": 0.000974827549554336, + "loss": 2.2149, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.391810804605484, + "learning_rate": 0.0009724311568193726, + "loss": 2.2136, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.37744298577308655, + "learning_rate": 0.000970032271634687, + "loss": 2.2164, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.4127761125564575, + "learning_rate": 0.0009676309208809592, + "loss": 2.2128, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.4060923159122467, + "learning_rate": 0.0009652271314664966, + "loss": 2.2148, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.4290322959423065, + "learning_rate": 0.0009628209303269335, + "loss": 2.2178, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.41268229484558105, + "learning_rate": 0.0009604123444249288, + "loss": 2.2114, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.3722761869430542, + "learning_rate": 0.0009580014007498634, + "loss": 2.2169, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.3922075033187866, + "learning_rate": 0.0009555881263175381, + "loss": 2.2154, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.45886096358299255, + "learning_rate": 0.0009531725481698719, + "loss": 2.2047, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.4347380995750427, + "learning_rate": 0.000950754693374597, + "loss": 2.212, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.40444234013557434, + "learning_rate": 0.0009483345890249571, + "loss": 2.205, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.4722711145877838, + "learning_rate": 0.0009459122622394033, + "loss": 2.2164, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.404418408870697, + "learning_rate": 0.0009434877401612898, + "loss": 2.2042, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.508589506149292, + "learning_rate": 0.0009410610499585705, + "loss": 2.2065, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.43359309434890747, + "learning_rate": 0.0009386322188234941, + "loss": 2.2004, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.42548608779907227, + "learning_rate": 0.000936201273972299, + "loss": 2.2055, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.39856281876564026, + "learning_rate": 0.0009337682426449097, + "loss": 2.2071, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.39244934916496277, + "learning_rate": 0.0009313331521046299, + "loss": 2.2081, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.6449292302131653, + "learning_rate": 0.0009288960296378386, + "loss": 2.2058, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.5066844820976257, + "learning_rate": 0.0009264569025536825, + "loss": 2.2029, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.42528676986694336, + "learning_rate": 0.000924015798183772, + "loss": 2.1984, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.41071704030036926, + "learning_rate": 0.0009215727438818733, + "loss": 2.1926, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.44346991181373596, + "learning_rate": 0.000919127767023603, + "loss": 2.1924, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.4017937183380127, + "learning_rate": 0.0009166808950061202, + "loss": 2.1975, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.3833090364933014, + "learning_rate": 0.000914232155247821, + "loss": 2.1898, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.47410884499549866, + "learning_rate": 0.0009117815751880301, + "loss": 2.1877, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.41743582487106323, + "learning_rate": 0.0009093291822866933, + "loss": 2.2011, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.4676332473754883, + "learning_rate": 0.0009068750040240709, + "loss": 2.1915, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.4097175598144531, + "learning_rate": 0.0009044190679004286, + "loss": 2.187, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.4006868898868561, + "learning_rate": 0.0009019614014357298, + "loss": 2.1821, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.38009005784988403, + "learning_rate": 0.0008995020321693274, + "loss": 2.1884, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.4209294021129608, + "learning_rate": 0.0008970409876596545, + "loss": 2.1847, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.44231361150741577, + "learning_rate": 0.000894578295483917, + "loss": 2.1771, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.40467679500579834, + "learning_rate": 0.0008921139832377829, + "loss": 2.1866, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.41321098804473877, + "learning_rate": 0.0008896480785350743, + "loss": 2.1826, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.44940176606178284, + "learning_rate": 0.0008871806090074577, + "loss": 2.1851, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.46205848455429077, + "learning_rate": 0.0008847116023041336, + "loss": 2.1844, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.4308578372001648, + "learning_rate": 0.0008822410860915281, + "loss": 2.1887, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.4286726415157318, + "learning_rate": 0.0008797690880529813, + "loss": 2.1792, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.43327629566192627, + "learning_rate": 0.0008772956358884383, + "loss": 2.1752, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.41824856400489807, + "learning_rate": 0.0008748207573141388, + "loss": 2.1795, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.39699214696884155, + "learning_rate": 0.0008723444800623053, + "loss": 2.1811, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.4050501585006714, + "learning_rate": 0.0008698668318808334, + "loss": 2.1742, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.422493040561676, + "learning_rate": 0.000867387840532981, + "loss": 2.1746, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.42847445607185364, + "learning_rate": 0.0008649075337970567, + "loss": 2.1765, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.3992847204208374, + "learning_rate": 0.0008624259394661085, + "loss": 2.169, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.45211532711982727, + "learning_rate": 0.0008599430853476126, + "loss": 2.1696, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.5124645233154297, + "learning_rate": 0.0008574589992631617, + "loss": 2.1839, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.49876466393470764, + "learning_rate": 0.0008549737090481532, + "loss": 2.1654, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.4245542585849762, + "learning_rate": 0.0008524872425514775, + "loss": 2.1674, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.4410756230354309, + "learning_rate": 0.0008499996276352061, + "loss": 2.1617, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.45472452044487, + "learning_rate": 0.0008475108921742787, + "loss": 2.1693, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.4176275432109833, + "learning_rate": 0.0008450210640561912, + "loss": 2.1739, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.43107298016548157, + "learning_rate": 0.0008425301711806833, + "loss": 2.1649, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.39568591117858887, + "learning_rate": 0.0008400382414594263, + "loss": 2.1684, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.46640804409980774, + "learning_rate": 0.0008375453028157093, + "loss": 2.1625, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.4027540981769562, + "learning_rate": 0.0008350513831841271, + "loss": 2.1707, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.5162912607192993, + "learning_rate": 0.000832556510510267, + "loss": 2.1572, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.4208495020866394, + "learning_rate": 0.0008300607127503952, + "loss": 2.1649, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.4132097661495209, + "learning_rate": 0.0008275640178711447, + "loss": 2.1664, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.42272451519966125, + "learning_rate": 0.0008250664538492006, + "loss": 2.1508, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.4160158038139343, + "learning_rate": 0.0008225680486709871, + "loss": 2.155, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.39632904529571533, + "learning_rate": 0.0008200688303323542, + "loss": 2.1576, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.37165600061416626, + "learning_rate": 0.0008175688268382639, + "loss": 2.1393, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.40933850407600403, + "learning_rate": 0.0008150680662024761, + "loss": 2.1528, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.39269039034843445, + "learning_rate": 0.0008125665764472345, + "loss": 2.1615, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.4397440552711487, + "learning_rate": 0.0008100643856029534, + "loss": 2.1602, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.43031734228134155, + "learning_rate": 0.000807561521707903, + "loss": 2.1446, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.4234278202056885, + "learning_rate": 0.0008050580128078957, + "loss": 2.1335, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.43014636635780334, + "learning_rate": 0.0008025538869559703, + "loss": 2.1552, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.4468829929828644, + "learning_rate": 0.0008000491722120806, + "loss": 2.1341, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.4165453016757965, + "learning_rate": 0.0007975438966427778, + "loss": 2.1516, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.4040280878543854, + "learning_rate": 0.0007950380883208981, + "loss": 2.1516, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.41979843378067017, + "learning_rate": 0.0007925317753252473, + "loss": 2.1327, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.5332528948783875, + "learning_rate": 0.0007900249857402863, + "loss": 2.1494, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.44417786598205566, + "learning_rate": 0.0007875177476558165, + "loss": 2.1514, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.4315895438194275, + "learning_rate": 0.0007850100891666648, + "loss": 2.1486, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.40638604760169983, + "learning_rate": 0.0007825020383723692, + "loss": 2.1411, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.49242982268333435, + "learning_rate": 0.0007799936233768632, + "loss": 2.1401, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.45747435092926025, + "learning_rate": 0.0007774848722881616, + "loss": 2.1363, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.46447691321372986, + "learning_rate": 0.0007749758132180459, + "loss": 2.1269, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.48377740383148193, + "learning_rate": 0.0007724664742817475, + "loss": 2.1416, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.41989490389823914, + "learning_rate": 0.0007699568835976348, + "loss": 2.1417, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.4614754617214203, + "learning_rate": 0.0007674470692868967, + "loss": 2.1367, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.4183652102947235, + "learning_rate": 0.0007649370594732282, + "loss": 2.1359, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.4590315520763397, + "learning_rate": 0.0007624268822825145, + "loss": 2.1356, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.4808858036994934, + "learning_rate": 0.000759916565842517, + "loss": 2.1491, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.4539988040924072, + "learning_rate": 0.0007574061382825572, + "loss": 2.1355, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.37826961278915405, + "learning_rate": 0.0007548956277332016, + "loss": 2.1256, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.43078774213790894, + "learning_rate": 0.0007523850623259469, + "loss": 2.1382, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.4312061071395874, + "learning_rate": 0.0007498744701929041, + "loss": 2.123, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.45597848296165466, + "learning_rate": 0.0007473638794664841, + "loss": 2.1342, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.41749200224876404, + "learning_rate": 0.000744853318279082, + "loss": 2.1313, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.40577471256256104, + "learning_rate": 0.0007423428147627613, + "loss": 2.1404, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.39981526136398315, + "learning_rate": 0.0007398323970489402, + "loss": 2.1243, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.41416123509407043, + "learning_rate": 0.0007373220932680751, + "loss": 2.1204, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.39555513858795166, + "learning_rate": 0.0007348119315493453, + "loss": 2.1362, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.44723576307296753, + "learning_rate": 0.0007323019400203386, + "loss": 2.1177, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.4077317416667938, + "learning_rate": 0.0007297921468067357, + "loss": 2.1113, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.4205627739429474, + "learning_rate": 0.000727282580031995, + "loss": 2.1326, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.5090042948722839, + "learning_rate": 0.0007247732678170375, + "loss": 2.1248, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.4085707366466522, + "learning_rate": 0.0007222642382799322, + "loss": 2.1285, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.473204106092453, + "learning_rate": 0.0007197555195355799, + "loss": 2.1247, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.41196292638778687, + "learning_rate": 0.0007172471396953991, + "loss": 2.1183, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.4053807854652405, + "learning_rate": 0.0007147391268670109, + "loss": 2.122, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.4309380352497101, + "learning_rate": 0.0007122315091539234, + "loss": 2.116, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.4071693420410156, + "learning_rate": 0.0007097243146552175, + "loss": 2.1275, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.42244482040405273, + "learning_rate": 0.0007072175714652321, + "loss": 2.1086, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.4667263627052307, + "learning_rate": 0.0007047113076732485, + "loss": 2.1236, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.44247594475746155, + "learning_rate": 0.0007022055513631764, + "loss": 2.1117, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.41164082288742065, + "learning_rate": 0.0006997003306132386, + "loss": 2.1093, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.4272598326206207, + "learning_rate": 0.0006971956734956569, + "loss": 2.12, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.40883365273475647, + "learning_rate": 0.0006946916080763373, + "loss": 2.1094, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.46363192796707153, + "learning_rate": 0.0006921881624145554, + "loss": 2.1068, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.38344845175743103, + "learning_rate": 0.0006896853645626424, + "loss": 2.0972, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.4073318839073181, + "learning_rate": 0.0006871832425656702, + "loss": 2.1147, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.4208357334136963, + "learning_rate": 0.0006846818244611376, + "loss": 2.1029, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.3977333903312683, + "learning_rate": 0.0006821811382786561, + "loss": 2.1205, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.4147312343120575, + "learning_rate": 0.0006796812120396351, + "loss": 2.1072, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.4664631187915802, + "learning_rate": 0.0006771820737569689, + "loss": 2.1214, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.41525933146476746, + "learning_rate": 0.000674683751434722, + "loss": 2.1112, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.4228687584400177, + "learning_rate": 0.0006721862730678164, + "loss": 2.0993, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.40616574883461, + "learning_rate": 0.0006696896666417163, + "loss": 2.1061, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.4019353687763214, + "learning_rate": 0.000667193960132116, + "loss": 2.1024, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.3973188102245331, + "learning_rate": 0.0006646991815046254, + "loss": 2.0974, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.40654581785202026, + "learning_rate": 0.0006622053587144572, + "loss": 2.1146, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.39715680480003357, + "learning_rate": 0.0006597125197061133, + "loss": 2.1113, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.38343438506126404, + "learning_rate": 0.0006572206924130725, + "loss": 2.0994, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.3871074914932251, + "learning_rate": 0.0006547299047574761, + "loss": 2.1028, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.4398958384990692, + "learning_rate": 0.000652240184649816, + "loss": 2.1074, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.40567082166671753, + "learning_rate": 0.0006497515599886214, + "loss": 2.0958, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.4051438570022583, + "learning_rate": 0.0006472640586601472, + "loss": 2.103, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.41590529680252075, + "learning_rate": 0.0006447777085380603, + "loss": 2.0908, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.43218934535980225, + "learning_rate": 0.0006422925374831275, + "loss": 2.0995, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.42082661390304565, + "learning_rate": 0.0006398085733429045, + "loss": 2.0957, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.44035834074020386, + "learning_rate": 0.0006373258439514221, + "loss": 2.0985, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.46288391947746277, + "learning_rate": 0.0006348443771288755, + "loss": 2.0923, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.4198758602142334, + "learning_rate": 0.000632364200681312, + "loss": 2.1003, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.438401460647583, + "learning_rate": 0.0006298853424003199, + "loss": 2.1021, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.4995393753051758, + "learning_rate": 0.000627407830062716, + "loss": 2.0954, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.4367104172706604, + "learning_rate": 0.0006249316914302368, + "loss": 2.0943, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.43427273631095886, + "learning_rate": 0.0006224569542492241, + "loss": 2.0973, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.40362513065338135, + "learning_rate": 0.0006199836462503166, + "loss": 2.0882, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.38409361243247986, + "learning_rate": 0.000617511795148138, + "loss": 2.0868, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.42920365929603577, + "learning_rate": 0.0006150414286409869, + "loss": 2.0956, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.3968173861503601, + "learning_rate": 0.0006125725744105263, + "loss": 2.0866, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.4031311273574829, + "learning_rate": 0.000610105260121473, + "loss": 2.0782, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.42098668217658997, + "learning_rate": 0.000607639513421289, + "loss": 2.0871, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.4605480432510376, + "learning_rate": 0.0006051753619398697, + "loss": 2.0927, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.41338497400283813, + "learning_rate": 0.0006027128332892358, + "loss": 2.0832, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.46238163113594055, + "learning_rate": 0.0006002519550632232, + "loss": 2.08, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.44284114241600037, + "learning_rate": 0.000597792754837174, + "loss": 2.0844, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.44104424118995667, + "learning_rate": 0.0005953352601676272, + "loss": 2.0792, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.40266528725624084, + "learning_rate": 0.000592879498592011, + "loss": 2.0762, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.4076545238494873, + "learning_rate": 0.0005904254976283331, + "loss": 2.0775, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.40915238857269287, + "learning_rate": 0.0005879732847748721, + "loss": 2.0736, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.395208477973938, + "learning_rate": 0.0005855228875098706, + "loss": 2.0818, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.40539273619651794, + "learning_rate": 0.0005830743332912264, + "loss": 2.0793, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.432981014251709, + "learning_rate": 0.0005806276495561852, + "loss": 2.0824, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.4845345616340637, + "learning_rate": 0.0005781828637210325, + "loss": 2.0847, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.4534595310688019, + "learning_rate": 0.0005757400031807881, + "loss": 2.08, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.4447915256023407, + "learning_rate": 0.0005732990953088968, + "loss": 2.0774, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.40797609090805054, + "learning_rate": 0.0005708601674569232, + "loss": 2.0821, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.419791042804718, + "learning_rate": 0.0005684232469542446, + "loss": 2.0747, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.4456191062927246, + "learning_rate": 0.0005659883611077453, + "loss": 2.0703, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.44808104634284973, + "learning_rate": 0.0005635555372015099, + "loss": 2.0713, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.40683653950691223, + "learning_rate": 0.0005611248024965186, + "loss": 2.0794, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.44421523809432983, + "learning_rate": 0.0005586961842303405, + "loss": 2.0805, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.3788703978061676, + "learning_rate": 0.0005562697096168289, + "loss": 2.0693, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.4097938537597656, + "learning_rate": 0.0005538454058458171, + "loss": 2.0659, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.4189620018005371, + "learning_rate": 0.0005514233000828121, + "loss": 2.0717, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.45838916301727295, + "learning_rate": 0.000549003419468692, + "loss": 2.0663, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.41338080167770386, + "learning_rate": 0.0005465857911194006, + "loss": 2.0692, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.4163554608821869, + "learning_rate": 0.000544170442125644, + "loss": 2.0811, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.45799753069877625, + "learning_rate": 0.0005417573995525871, + "loss": 2.0725, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.4094287157058716, + "learning_rate": 0.0005393466904395503, + "loss": 2.0794, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.40283438563346863, + "learning_rate": 0.000536938341799706, + "loss": 2.0765, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.4225766956806183, + "learning_rate": 0.0005345323806197771, + "loss": 2.0643, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.4280329942703247, + "learning_rate": 0.0005321288338597327, + "loss": 2.0778, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.4239858388900757, + "learning_rate": 0.0005297277284524888, + "loss": 2.0715, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.4156089127063751, + "learning_rate": 0.0005273290913036033, + "loss": 2.059, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.4095378518104553, + "learning_rate": 0.000524932949290977, + "loss": 2.064, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.4149723947048187, + "learning_rate": 0.0005225393292645509, + "loss": 2.0625, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.44716504216194153, + "learning_rate": 0.0005201482580460063, + "loss": 2.0635, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.42696863412857056, + "learning_rate": 0.0005177597624284637, + "loss": 2.0618, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.4218822121620178, + "learning_rate": 0.0005153738691761826, + "loss": 2.0664, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.43501466512680054, + "learning_rate": 0.0005129906050242622, + "loss": 2.0416, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.41891658306121826, + "learning_rate": 0.0005106099966783409, + "loss": 2.0571, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.4566188156604767, + "learning_rate": 0.0005082320708142975, + "loss": 2.0522, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.3990718424320221, + "learning_rate": 0.0005058568540779526, + "loss": 2.0594, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.45364299416542053, + "learning_rate": 0.0005034843730847696, + "loss": 2.0559, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.4347531795501709, + "learning_rate": 0.0005011146544195559, + "loss": 2.06, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.4552476704120636, + "learning_rate": 0.000498747724636167, + "loss": 2.0608, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.44876980781555176, + "learning_rate": 0.0004963836102572065, + "loss": 2.0699, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.447122722864151, + "learning_rate": 0.0004940223377737304, + "loss": 2.0573, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.40661096572875977, + "learning_rate": 0.0004916639336449499, + "loss": 2.0672, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.39093858003616333, + "learning_rate": 0.0004893084242979348, + "loss": 2.0716, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.41336727142333984, + "learning_rate": 0.0004869558361273175, + "loss": 2.0586, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.43309593200683594, + "learning_rate": 0.000484606195494997, + "loss": 2.0486, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.4084915518760681, + "learning_rate": 0.0004822595287298442, + "loss": 2.0643, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.4720529317855835, + "learning_rate": 0.000479915862127406, + "loss": 2.0546, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.44278010725975037, + "learning_rate": 0.0004775752219496109, + "loss": 2.0568, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.42724645137786865, + "learning_rate": 0.0004752376344244752, + "loss": 2.0513, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.4635745584964752, + "learning_rate": 0.00047290312574580835, + "loss": 2.0476, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.4230916202068329, + "learning_rate": 0.00047057172207292004, + "loss": 2.0534, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.4273993968963623, + "learning_rate": 0.0004682434495303267, + "loss": 2.0602, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.4643547832965851, + "learning_rate": 0.0004659183342074594, + "loss": 2.0466, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.43346771597862244, + "learning_rate": 0.0004635964021583703, + "loss": 2.0489, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.41942155361175537, + "learning_rate": 0.0004612776794014419, + "loss": 2.0468, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.39621153473854065, + "learning_rate": 0.00045896219191909486, + "loss": 2.0367, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.4197236895561218, + "learning_rate": 0.00045664996565749716, + "loss": 2.0533, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.4198110103607178, + "learning_rate": 0.0004543410265262727, + "loss": 2.056, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.4319012463092804, + "learning_rate": 0.0004520354003982125, + "loss": 2.0557, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.42678725719451904, + "learning_rate": 0.00044973311310898275, + "loss": 2.0532, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.4581203758716583, + "learning_rate": 0.00044743419045683674, + "loss": 2.0415, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.4381617605686188, + "learning_rate": 0.00044513865820232525, + "loss": 2.0507, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.4007517099380493, + "learning_rate": 0.00044284654206800826, + "loss": 2.0398, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.39860790967941284, + "learning_rate": 0.0004405578677381661, + "loss": 2.0495, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.4133116602897644, + "learning_rate": 0.00043827266085851203, + "loss": 2.042, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.42411765456199646, + "learning_rate": 0.00043599094703590524, + "loss": 2.0408, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.42060187458992004, + "learning_rate": 0.0004337127518380632, + "loss": 2.0419, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.422609806060791, + "learning_rate": 0.0004314381007932756, + "loss": 2.0388, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.4257788360118866, + "learning_rate": 0.00042916701939011787, + "loss": 2.0484, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.42463093996047974, + "learning_rate": 0.0004268995330771661, + "loss": 2.0453, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.42739665508270264, + "learning_rate": 0.00042463566726271137, + "loss": 2.0442, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.39006122946739197, + "learning_rate": 0.00042237544731447616, + "loss": 2.0435, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.4013984799385071, + "learning_rate": 0.0004201188985593283, + "loss": 2.0457, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.46564167737960815, + "learning_rate": 0.00041786604628299846, + "loss": 2.0452, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.4192144274711609, + "learning_rate": 0.00041561691572979624, + "loss": 2.0274, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.425772100687027, + "learning_rate": 0.000413371532102328, + "loss": 2.0401, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.39904317259788513, + "learning_rate": 0.0004111299205612135, + "loss": 2.0446, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.4872260093688965, + "learning_rate": 0.00040889210622480467, + "loss": 2.0369, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.42429015040397644, + "learning_rate": 0.0004066581141689038, + "loss": 2.036, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.44552692770957947, + "learning_rate": 0.00040442796942648273, + "loss": 2.0445, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.4957484006881714, + "learning_rate": 0.0004022016969874023, + "loss": 2.0327, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.3897000253200531, + "learning_rate": 0.00039997932179813205, + "loss": 2.0343, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.40000972151756287, + "learning_rate": 0.00039776086876147133, + "loss": 2.0468, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.41824623942375183, + "learning_rate": 0.0003955463627362694, + "loss": 2.0403, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.39792174100875854, + "learning_rate": 0.00039333582853714793, + "loss": 2.0357, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.42067378759384155, + "learning_rate": 0.00039112929093422185, + "loss": 2.0406, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.39623352885246277, + "learning_rate": 0.0003889267746528225, + "loss": 2.0327, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.4079453647136688, + "learning_rate": 0.00038672830437322007, + "loss": 2.0253, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.3968707025051117, + "learning_rate": 0.0003845339047303477, + "loss": 2.0265, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.39924386143684387, + "learning_rate": 0.00038234360031352485, + "loss": 2.0237, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.4304143190383911, + "learning_rate": 0.0003801574156661817, + "loss": 2.0371, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.4250577986240387, + "learning_rate": 0.0003779753752855853, + "loss": 2.0349, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.46291443705558777, + "learning_rate": 0.0003757975036225632, + "loss": 2.029, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.4757707715034485, + "learning_rate": 0.0003736238250812308, + "loss": 2.0323, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.45264002680778503, + "learning_rate": 0.0003714543640187177, + "loss": 2.0168, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.3988111913204193, + "learning_rate": 0.0003692891447448943, + "loss": 2.0184, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.4482176601886749, + "learning_rate": 0.00036712819152209954, + "loss": 2.0286, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.4128490090370178, + "learning_rate": 0.0003649715285648701, + "loss": 2.0331, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.42828041315078735, + "learning_rate": 0.000362819180039667, + "loss": 2.028, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.40934526920318604, + "learning_rate": 0.0003606711700646067, + "loss": 2.0236, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.41190746426582336, + "learning_rate": 0.00035852752270918955, + "loss": 2.024, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.4268088638782501, + "learning_rate": 0.00035638826199403103, + "loss": 2.016, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.44792720675468445, + "learning_rate": 0.000354253411890592, + "loss": 2.0151, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.44301944971084595, + "learning_rate": 0.00035212299632090996, + "loss": 2.0311, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.4218709468841553, + "learning_rate": 0.0003499970391573322, + "loss": 2.0229, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.42842522263526917, + "learning_rate": 0.0003478755642222466, + "loss": 2.0348, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.43303772807121277, + "learning_rate": 0.0003457585952878156, + "loss": 2.0195, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.442860871553421, + "learning_rate": 0.00034364615607570994, + "loss": 2.0423, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.4097784161567688, + "learning_rate": 0.00034153827025684225, + "loss": 2.034, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.39457035064697266, + "learning_rate": 0.0003394349614511026, + "loss": 2.0262, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.42623841762542725, + "learning_rate": 0.000337336253227093, + "loss": 2.014, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.42138513922691345, + "learning_rate": 0.00033524216910186394, + "loss": 2.0205, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.4012819826602936, + "learning_rate": 0.0003331527325406506, + "loss": 2.02, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.4361356496810913, + "learning_rate": 0.00033106796695660983, + "loss": 2.0188, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.43680787086486816, + "learning_rate": 0.00032898789571055796, + "loss": 2.0174, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.409736305475235, + "learning_rate": 0.0003269125421107091, + "loss": 2.0131, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.44094401597976685, + "learning_rate": 0.00032484192941241316, + "loss": 2.0248, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.44629523158073425, + "learning_rate": 0.0003227760808178973, + "loss": 2.0261, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.4376542568206787, + "learning_rate": 0.00032071501947600334, + "loss": 2.0189, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.43051040172576904, + "learning_rate": 0.00031865876848192993, + "loss": 2.0191, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.4309435188770294, + "learning_rate": 0.00031660735087697363, + "loss": 2.0132, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.43572279810905457, + "learning_rate": 0.0003145607896482704, + "loss": 2.0223, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.4560849666595459, + "learning_rate": 0.000312519107728538, + "loss": 2.0164, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.4057128131389618, + "learning_rate": 0.0003104823279958191, + "loss": 2.0173, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.45008811354637146, + "learning_rate": 0.00030845047327322556, + "loss": 2.0079, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.4316631555557251, + "learning_rate": 0.0003064235663286815, + "loss": 2.024, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.43289369344711304, + "learning_rate": 0.00030440162987466896, + "loss": 2.0091, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.39821943640708923, + "learning_rate": 0.0003023846865679731, + "loss": 2.0097, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.40832656621932983, + "learning_rate": 0.0003003727590094285, + "loss": 2.0094, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.4410429000854492, + "learning_rate": 0.00029836586974366574, + "loss": 2.0072, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.43210917711257935, + "learning_rate": 0.00029636404125885936, + "loss": 2.0172, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.422147274017334, + "learning_rate": 0.00029436729598647483, + "loss": 2.0102, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.4023926854133606, + "learning_rate": 0.0002923756563010179, + "loss": 2.0233, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.40471142530441284, + "learning_rate": 0.0002903891445197836, + "loss": 2.0144, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.4113386571407318, + "learning_rate": 0.0002884077829026066, + "loss": 2.0159, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.42899373173713684, + "learning_rate": 0.00028643159365161113, + "loss": 2.0091, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.42821699380874634, + "learning_rate": 0.00028446059891096265, + "loss": 2.0195, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.39951053261756897, + "learning_rate": 0.0002824948207666199, + "loss": 2.0086, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.40438732504844666, + "learning_rate": 0.00028053428124608684, + "loss": 2.0085, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.42672163248062134, + "learning_rate": 0.00027857900231816594, + "loss": 2.0125, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.47871389985084534, + "learning_rate": 0.0002766290058927123, + "loss": 2.0112, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.4250808656215668, + "learning_rate": 0.00027468431382038816, + "loss": 1.9994, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.435791015625, + "learning_rate": 0.00027274494789241766, + "loss": 2.0071, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.42433029413223267, + "learning_rate": 0.00027081092984034303, + "loss": 2.0039, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.43372049927711487, + "learning_rate": 0.00026888228133578086, + "loss": 2.003, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.4100553095340729, + "learning_rate": 0.00026695902399017935, + "loss": 2.0102, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.4229019582271576, + "learning_rate": 0.0002650411793545763, + "loss": 1.9973, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.42594701051712036, + "learning_rate": 0.0002631287689193571, + "loss": 2.0031, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.4173966944217682, + "learning_rate": 0.00026122181411401444, + "loss": 2.0234, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.41529059410095215, + "learning_rate": 0.0002593203363069084, + "loss": 2.0076, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.40560516715049744, + "learning_rate": 0.0002574243568050261, + "loss": 2.0125, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.4458000063896179, + "learning_rate": 0.0002555338968537436, + "loss": 2.0145, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.4204067885875702, + "learning_rate": 0.00025364897763658777, + "loss": 2.0128, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.4047413170337677, + "learning_rate": 0.0002517696202749988, + "loss": 1.9997, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.4366636574268341, + "learning_rate": 0.0002498958458280936, + "loss": 2.0023, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.4308086633682251, + "learning_rate": 0.0002480276752924295, + "loss": 1.999, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.47375115752220154, + "learning_rate": 0.00024616512960177014, + "loss": 2.0031, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.4351649582386017, + "learning_rate": 0.00024430822962684905, + "loss": 2.0127, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.4361037313938141, + "learning_rate": 0.00024245699617513733, + "loss": 1.984, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.41323596239089966, + "learning_rate": 0.00024061144999060956, + "loss": 1.9987, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.4562653601169586, + "learning_rate": 0.00023877161175351206, + "loss": 2.0027, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.4161659777164459, + "learning_rate": 0.00023693750208013045, + "loss": 2.0027, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.4228169322013855, + "learning_rate": 0.0002351091415225591, + "loss": 1.9969, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.4377746880054474, + "learning_rate": 0.00023328655056847124, + "loss": 1.9992, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.3960226774215698, + "learning_rate": 0.00023146974964088825, + "loss": 2.0016, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.4060283303260803, + "learning_rate": 0.00022965875909795164, + "loss": 2.0026, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.43912994861602783, + "learning_rate": 0.0002278535992326947, + "loss": 2.0001, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.41928860545158386, + "learning_rate": 0.0002260542902728151, + "loss": 1.9978, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.4305204153060913, + "learning_rate": 0.00022426085238044823, + "loss": 2.005, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.4599253535270691, + "learning_rate": 0.00022247330565194171, + "loss": 2.0033, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.41744956374168396, + "learning_rate": 0.0002206916701176293, + "loss": 2.0185, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.42286521196365356, + "learning_rate": 0.00021891596574160715, + "loss": 1.9918, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.4187549352645874, + "learning_rate": 0.00021714621242150973, + "loss": 1.989, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.42223358154296875, + "learning_rate": 0.0002153824299882872, + "loss": 1.9967, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.41517671942710876, + "learning_rate": 0.00021362463820598297, + "loss": 2.0108, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.4170437753200531, + "learning_rate": 0.00021187285677151205, + "loss": 1.9915, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.4304288625717163, + "learning_rate": 0.00021012710531444112, + "loss": 1.9922, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.43260830640792847, + "learning_rate": 0.00020838740339676763, + "loss": 1.9993, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.43122145533561707, + "learning_rate": 0.00020665377051270095, + "loss": 1.9992, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.4274400770664215, + "learning_rate": 0.0002049262260884441, + "loss": 2.004, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.43203145265579224, + "learning_rate": 0.0002032047894819758, + "loss": 2.0058, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.41178610920906067, + "learning_rate": 0.00020148947998283381, + "loss": 1.998, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.3972046375274658, + "learning_rate": 0.00019978031681189864, + "loss": 2.0, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.3840601444244385, + "learning_rate": 0.00019807731912117828, + "loss": 1.9901, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.4307912588119507, + "learning_rate": 0.00019638050599359326, + "loss": 1.9938, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.4113411009311676, + "learning_rate": 0.0001946898964427633, + "loss": 1.9927, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.4045986831188202, + "learning_rate": 0.0001930055094127938, + "loss": 2.0054, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.41823092103004456, + "learning_rate": 0.00019132736377806394, + "loss": 2.0053, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.4122791290283203, + "learning_rate": 0.0001896554783430149, + "loss": 1.9881, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.4283318519592285, + "learning_rate": 0.00018798987184193963, + "loss": 1.9887, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.43920645117759705, + "learning_rate": 0.00018633056293877203, + "loss": 1.9874, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.4231961667537689, + "learning_rate": 0.00018467757022687864, + "loss": 1.9927, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.3960602581501007, + "learning_rate": 0.00018303091222884998, + "loss": 1.9885, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.42977961897850037, + "learning_rate": 0.00018139060739629287, + "loss": 1.9959, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.4063451290130615, + "learning_rate": 0.00017975667410962366, + "loss": 1.9925, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.4116809666156769, + "learning_rate": 0.00017812913067786313, + "loss": 1.9916, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.4191359281539917, + "learning_rate": 0.00017650799533842996, + "loss": 1.9931, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.4071071743965149, + "learning_rate": 0.00017489328625693715, + "loss": 1.9965, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.39670810103416443, + "learning_rate": 0.0001732850215269885, + "loss": 1.9851, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.42304039001464844, + "learning_rate": 0.00017168321916997547, + "loss": 1.9913, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.41014552116394043, + "learning_rate": 0.00017008789713487558, + "loss": 1.9822, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.41000765562057495, + "learning_rate": 0.00016849907329805118, + "loss": 1.9791, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.41291967034339905, + "learning_rate": 0.00016691676546304936, + "loss": 1.9842, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.43991175293922424, + "learning_rate": 0.00016534099136040207, + "loss": 1.9885, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.4192153811454773, + "learning_rate": 0.00016377176864742734, + "loss": 1.9883, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.41214990615844727, + "learning_rate": 0.00016220911490803206, + "loss": 1.9862, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.4211282730102539, + "learning_rate": 0.00016065304765251423, + "loss": 1.9891, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.39386671781539917, + "learning_rate": 0.00015910358431736745, + "loss": 1.989, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.4106917977333069, + "learning_rate": 0.0001575607422650846, + "loss": 1.9951, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.4198332130908966, + "learning_rate": 0.00015602453878396479, + "loss": 1.9814, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.3960287272930145, + "learning_rate": 0.0001544949910879177, + "loss": 1.9875, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.4111611843109131, + "learning_rate": 0.00015297211631627234, + "loss": 1.9892, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.4152844548225403, + "learning_rate": 0.00015145593153358412, + "loss": 1.9785, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.40522298216819763, + "learning_rate": 0.00014994645372944367, + "loss": 1.983, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.42012640833854675, + "learning_rate": 0.00014844369981828698, + "loss": 1.9874, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.41362667083740234, + "learning_rate": 0.00014694768663920537, + "loss": 1.9874, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.4217645525932312, + "learning_rate": 0.00014545843095575709, + "loss": 1.9828, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.40708214044570923, + "learning_rate": 0.00014397594945577912, + "loss": 1.9818, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.4276680052280426, + "learning_rate": 0.0001425002587512005, + "loss": 1.993, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.3931733965873718, + "learning_rate": 0.00014103137537785633, + "loss": 1.9661, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.4122258126735687, + "learning_rate": 0.00013956931579530194, + "loss": 1.9899, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.4040488302707672, + "learning_rate": 0.00013811409638662858, + "loss": 1.9765, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.4091789722442627, + "learning_rate": 0.00013666573345828083, + "loss": 1.9897, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.42233946919441223, + "learning_rate": 0.0001352242432398723, + "loss": 1.9776, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.42547842860221863, + "learning_rate": 0.00013378964188400457, + "loss": 1.9822, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.40196487307548523, + "learning_rate": 0.00013236194546608645, + "loss": 1.9974, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.41302430629730225, + "learning_rate": 0.00013094116998415358, + "loss": 1.9736, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.3987058997154236, + "learning_rate": 0.0001295273313586885, + "loss": 1.9734, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.40168219804763794, + "learning_rate": 0.00012812044543244395, + "loss": 1.9795, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.4119061827659607, + "learning_rate": 0.00012672052797026344, + "loss": 1.9787, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.4514375329017639, + "learning_rate": 0.00012532759465890567, + "loss": 1.9881, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.40260615944862366, + "learning_rate": 0.00012394166110686857, + "loss": 1.9843, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.4041300117969513, + "learning_rate": 0.0001225627428442143, + "loss": 1.9721, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.4261089265346527, + "learning_rate": 0.0001211908553223954, + "loss": 1.9764, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.3963260054588318, + "learning_rate": 0.00011982601391408115, + "loss": 1.9873, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.4119138717651367, + "learning_rate": 0.00011846823391298628, + "loss": 1.9707, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.4397176206111908, + "learning_rate": 0.00011711753053369861, + "loss": 1.9975, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.39763253927230835, + "learning_rate": 0.00011577391891150901, + "loss": 1.9816, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.4036940038204193, + "learning_rate": 0.00011443741410224173, + "loss": 1.9775, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.3965287506580353, + "learning_rate": 0.00011310803108208581, + "loss": 1.9901, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.43402767181396484, + "learning_rate": 0.00011178578474742687, + "loss": 1.9766, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.4054641127586365, + "learning_rate": 0.00011047068991468118, + "loss": 1.9621, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.4033876359462738, + "learning_rate": 0.00010916276132012818, + "loss": 1.972, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.3926706612110138, + "learning_rate": 0.00010786201361974646, + "loss": 1.9642, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.40310266613960266, + "learning_rate": 0.00010656846138904916, + "loss": 1.9755, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.4143635332584381, + "learning_rate": 0.00010528211912292066, + "loss": 1.9865, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.415465772151947, + "learning_rate": 0.0001040030012354542, + "loss": 1.989, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.39793363213539124, + "learning_rate": 0.00010273112205979012, + "loss": 1.9699, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.39508792757987976, + "learning_rate": 0.00010146649584795575, + "loss": 1.9762, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.38980454206466675, + "learning_rate": 0.0001002091367707053, + "loss": 1.9781, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.4160945415496826, + "learning_rate": 9.895905891736118e-05, + "loss": 1.984, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.40413954854011536, + "learning_rate": 9.771627629565599e-05, + "loss": 1.981, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.4090052843093872, + "learning_rate": 9.648080283157604e-05, + "loss": 1.982, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.3922998905181885, + "learning_rate": 9.525265236920452e-05, + "loss": 1.9656, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.39917078614234924, + "learning_rate": 9.40318386705673e-05, + "loss": 1.9712, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.40210479497909546, + "learning_rate": 9.281837541547791e-05, + "loss": 1.9667, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.40558287501335144, + "learning_rate": 9.161227620138468e-05, + "loss": 1.9834, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.40320611000061035, + "learning_rate": 9.041355454321803e-05, + "loss": 1.9715, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.39627528190612793, + "learning_rate": 8.92222238732397e-05, + "loss": 1.979, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.3898746073246002, + "learning_rate": 8.803829754089138e-05, + "loss": 1.9719, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.40841376781463623, + "learning_rate": 8.686178881264568e-05, + "loss": 1.9646, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.40895649790763855, + "learning_rate": 8.569271087185756e-05, + "loss": 1.9865, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.4127788841724396, + "learning_rate": 8.453107681861616e-05, + "loss": 1.9648, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.3967271149158478, + "learning_rate": 8.337689966959819e-05, + "loss": 1.9737, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.4001125693321228, + "learning_rate": 8.223019235792214e-05, + "loss": 1.9792, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.39851799607276917, + "learning_rate": 8.109096773300348e-05, + "loss": 1.9838, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.3754591643810272, + "learning_rate": 7.995923856041013e-05, + "loss": 1.9753, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.40135321021080017, + "learning_rate": 7.883501752172038e-05, + "loss": 1.9653, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.4163255989551544, + "learning_rate": 7.771831721437989e-05, + "loss": 1.9778, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.39435306191444397, + "learning_rate": 7.660915015156067e-05, + "loss": 1.9697, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.392154723405838, + "learning_rate": 7.55075287620215e-05, + "loss": 1.9751, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.39433789253234863, + "learning_rate": 7.441346538996769e-05, + "loss": 1.9719, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.3905210793018341, + "learning_rate": 7.332697229491373e-05, + "loss": 1.9677, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.3925706744194031, + "learning_rate": 7.224806165154504e-05, + "loss": 1.9804, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.42540591955184937, + "learning_rate": 7.117674554958253e-05, + "loss": 1.9651, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.4071645438671112, + "learning_rate": 7.011303599364608e-05, + "loss": 1.9777, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.39975661039352417, + "learning_rate": 6.905694490312064e-05, + "loss": 1.9733, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.39922386407852173, + "learning_rate": 6.80084841120226e-05, + "loss": 1.9807, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.39386624097824097, + "learning_rate": 6.696766536886692e-05, + "loss": 1.9687, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.3941761255264282, + "learning_rate": 6.593450033653586e-05, + "loss": 1.9743, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.3986801207065582, + "learning_rate": 6.490900059214836e-05, + "loss": 1.9724, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.40113934874534607, + "learning_rate": 6.389117762692952e-05, + "loss": 1.9749, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.410634845495224, + "learning_rate": 6.288104284608284e-05, + "loss": 1.9841, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.3994336724281311, + "learning_rate": 6.187860756866157e-05, + "loss": 1.976, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.3887355923652649, + "learning_rate": 6.088388302744266e-05, + "loss": 1.9677, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.3923799693584442, + "learning_rate": 5.9896880368800115e-05, + "loss": 1.9628, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.39411938190460205, + "learning_rate": 5.891761065258089e-05, + "loss": 1.9586, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.4026041328907013, + "learning_rate": 5.794608485198008e-05, + "loss": 1.9854, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.4119473993778229, + "learning_rate": 5.698231385341887e-05, + "loss": 1.9611, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.4250369369983673, + "learning_rate": 5.60263084564217e-05, + "loss": 1.9696, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.4059505760669708, + "learning_rate": 5.507807937349604e-05, + "loss": 1.9727, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.39970889687538147, + "learning_rate": 5.413763723001164e-05, + "loss": 1.9598, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.3886041045188904, + "learning_rate": 5.320499256408204e-05, + "loss": 1.9641, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.397043377161026, + "learning_rate": 5.228015582644585e-05, + "loss": 1.9621, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.39508455991744995, + "learning_rate": 5.136313738035059e-05, + "loss": 1.9714, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.3957898020744324, + "learning_rate": 5.045394750143567e-05, + "loss": 1.9791, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.41096875071525574, + "learning_rate": 4.955259637761761e-05, + "loss": 1.9678, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.3961681127548218, + "learning_rate": 4.865909410897576e-05, + "loss": 1.9705, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.39333462715148926, + "learning_rate": 4.7773450707639414e-05, + "loss": 1.9822, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.3869766592979431, + "learning_rate": 4.6895676097675225e-05, + "loss": 1.9697, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.4025413691997528, + "learning_rate": 4.6025780114976545e-05, + "loss": 1.9723, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.3929356336593628, + "learning_rate": 4.5163772507152425e-05, + "loss": 1.9612, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.403622031211853, + "learning_rate": 4.430966293341912e-05, + "loss": 1.9741, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.3930189907550812, + "learning_rate": 4.346346096449136e-05, + "loss": 1.967, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.40248000621795654, + "learning_rate": 4.26251760824754e-05, + "loss": 1.9684, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.38733232021331787, + "learning_rate": 4.179481768076274e-05, + "loss": 1.9634, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.39052367210388184, + "learning_rate": 4.0972395063924554e-05, + "loss": 1.9655, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.3884296417236328, + "learning_rate": 4.015791744760811e-05, + "loss": 1.9594, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.40075260400772095, + "learning_rate": 3.93513939584326e-05, + "loss": 1.9711, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.3949390649795532, + "learning_rate": 3.855283363388762e-05, + "loss": 1.9637, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.38954097032546997, + "learning_rate": 3.7762245422231476e-05, + "loss": 1.9749, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.39936041831970215, + "learning_rate": 3.697963818239117e-05, + "loss": 1.9772, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.3984736502170563, + "learning_rate": 3.6205020683862836e-05, + "loss": 1.9696, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.4023587107658386, + "learning_rate": 3.543840160661396e-05, + "loss": 1.954, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.39339399337768555, + "learning_rate": 3.467978954098549e-05, + "loss": 1.9723, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.3851708471775055, + "learning_rate": 3.392919298759623e-05, + "loss": 1.966, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.39568132162094116, + "learning_rate": 3.318662035724679e-05, + "loss": 1.9722, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.3886665403842926, + "learning_rate": 3.2452079970826335e-05, + "loss": 1.9652, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.3863869309425354, + "learning_rate": 3.172558005921841e-05, + "loss": 1.9663, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.38743042945861816, + "learning_rate": 3.100712876320924e-05, + "loss": 1.9629, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.4073689877986908, + "learning_rate": 3.029673413339651e-05, + "loss": 1.9754, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.41656437516212463, + "learning_rate": 2.959440413009895e-05, + "loss": 1.9542, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.4011094868183136, + "learning_rate": 2.890014662326701e-05, + "loss": 1.965, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.4070962369441986, + "learning_rate": 2.8213969392395233e-05, + "loss": 1.9644, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.38844817876815796, + "learning_rate": 2.7535880126434433e-05, + "loss": 1.966, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.39364027976989746, + "learning_rate": 2.686588642370591e-05, + "loss": 1.9768, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.4274598956108093, + "learning_rate": 2.6203995791816372e-05, + "loss": 1.9729, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.3864382207393646, + "learning_rate": 2.5550215647573482e-05, + "loss": 1.969, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.39018940925598145, + "learning_rate": 2.490455331690303e-05, + "loss": 1.9732, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.39040523767471313, + "learning_rate": 2.4267016034766637e-05, + "loss": 1.958, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.3905726671218872, + "learning_rate": 2.363761094508085e-05, + "loss": 1.9745, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.39570626616477966, + "learning_rate": 2.301634510063702e-05, + "loss": 1.9581, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.38429251313209534, + "learning_rate": 2.2403225463022288e-05, + "loss": 1.9648, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.37935736775398254, + "learning_rate": 2.1798258902541723e-05, + "loss": 1.9501, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.37167224287986755, + "learning_rate": 2.120145219814082e-05, + "loss": 1.9617, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.3779657185077667, + "learning_rate": 2.0612812037330202e-05, + "loss": 1.9676, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.3890843093395233, + "learning_rate": 2.003234501611037e-05, + "loss": 1.9635, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.39056921005249023, + "learning_rate": 1.9460057638897578e-05, + "loss": 1.9597, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.39952948689460754, + "learning_rate": 1.8895956318451398e-05, + "loss": 1.9796, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.4020664095878601, + "learning_rate": 1.8340047375802693e-05, + "loss": 1.9657, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.39704957604408264, + "learning_rate": 1.7792337040182434e-05, + "loss": 1.9783, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.4094266891479492, + "learning_rate": 1.72528314489524e-05, + "loss": 1.9672, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.3981424868106842, + "learning_rate": 1.6721536647536255e-05, + "loss": 1.9652, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.37875208258628845, + "learning_rate": 1.6198458589351595e-05, + "loss": 1.9647, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.3954843282699585, + "learning_rate": 1.568360313574349e-05, + "loss": 1.9694, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.39149436354637146, + "learning_rate": 1.517697605591864e-05, + "loss": 1.9685, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.39722591638565063, + "learning_rate": 1.4678583026880993e-05, + "loss": 1.9668, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.39836880564689636, + "learning_rate": 1.4188429633367721e-05, + "loss": 1.956, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.3957841098308563, + "learning_rate": 1.370652136778694e-05, + "loss": 1.9566, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.38802453875541687, + "learning_rate": 1.3232863630156077e-05, + "loss": 1.9631, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.3933161795139313, + "learning_rate": 1.2767461728041357e-05, + "loss": 1.9601, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.3894447088241577, + "learning_rate": 1.2310320876498333e-05, + "loss": 1.9692, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.3871317505836487, + "learning_rate": 1.186144619801352e-05, + "loss": 1.9618, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.39421528577804565, + "learning_rate": 1.14208427224467e-05, + "loss": 1.9697, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.3862394094467163, + "learning_rate": 1.0988515386975206e-05, + "loss": 1.9705, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.3839489221572876, + "learning_rate": 1.0564469036037722e-05, + "loss": 1.9668, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.38525593280792236, + "learning_rate": 1.0148708421280822e-05, + "loss": 1.9654, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.38716089725494385, + "learning_rate": 9.74123820150502e-06, + "loss": 1.9624, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.38509002327919006, + "learning_rate": 9.342062942613222e-06, + "loss": 1.9617, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.3848653733730316, + "learning_rate": 8.9511871175591e-06, + "loss": 1.9606, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.3866124749183655, + "learning_rate": 8.568615106297223e-06, + "loss": 1.9823, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.3887442946434021, + "learning_rate": 8.194351195733585e-06, + "loss": 1.9623, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.39260828495025635, + "learning_rate": 7.828399579678153e-06, + "loss": 1.967, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.3878902792930603, + "learning_rate": 7.470764358797566e-06, + "loss": 1.9766, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.402909517288208, + "learning_rate": 7.121449540568842e-06, + "loss": 1.9721, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.3900144100189209, + "learning_rate": 6.780459039235409e-06, + "loss": 1.9639, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.39032885432243347, + "learning_rate": 6.447796675762146e-06, + "loss": 1.9755, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.3828786015510559, + "learning_rate": 6.123466177793247e-06, + "loss": 1.9541, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.3875948190689087, + "learning_rate": 5.807471179610418e-06, + "loss": 1.9623, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.3894394636154175, + "learning_rate": 5.499815222091836e-06, + "loss": 1.9675, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.38778528571128845, + "learning_rate": 5.200501752672754e-06, + "loss": 1.9544, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.39072951674461365, + "learning_rate": 4.909534125306702e-06, + "loss": 1.9747, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.39563342928886414, + "learning_rate": 4.626915600428105e-06, + "loss": 1.9681, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.3839572072029114, + "learning_rate": 4.352649344915471e-06, + "loss": 1.9661, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.3794493079185486, + "learning_rate": 4.086738432056092e-06, + "loss": 1.9649, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.3925245702266693, + "learning_rate": 3.8291858415117344e-06, + "loss": 1.9679, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.39005380868911743, + "learning_rate": 3.579994459284752e-06, + "loss": 1.9632, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.3798932731151581, + "learning_rate": 3.339167077686278e-06, + "loss": 1.9615, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.38514089584350586, + "learning_rate": 3.1067063953048313e-06, + "loss": 1.9646, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.39145952463150024, + "learning_rate": 2.8826150169758425e-06, + "loss": 1.9636, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.3868762254714966, + "learning_rate": 2.66689545375251e-06, + "loss": 1.9564, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.3975444436073303, + "learning_rate": 2.4595501228779906e-06, + "loss": 1.952, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.37457162141799927, + "learning_rate": 2.2605813477579172e-06, + "loss": 1.9617, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.39098936319351196, + "learning_rate": 2.069991357934592e-06, + "loss": 1.9591, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.3781387507915497, + "learning_rate": 1.8877822890618346e-06, + "loss": 1.9716, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.38485920429229736, + "learning_rate": 1.7139561828813377e-06, + "loss": 1.962, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.3896081745624542, + "learning_rate": 1.5485149871995175e-06, + "loss": 1.9606, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.3861989974975586, + "learning_rate": 1.3914605558656146e-06, + "loss": 1.9598, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.3821124732494354, + "learning_rate": 1.2427946487512941e-06, + "loss": 1.963, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.39667096734046936, + "learning_rate": 1.1025189317305784e-06, + "loss": 1.9582, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.3883521258831024, + "learning_rate": 9.706349766615275e-07, + "loss": 1.9622, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.38799479603767395, + "learning_rate": 8.47144261368088e-07, + "loss": 1.9642, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.3777782618999481, + "learning_rate": 7.320481696241887e-07, + "loss": 1.9778, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.3823244273662567, + "learning_rate": 6.253479911375037e-07, + "loss": 1.9688, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.38999292254447937, + "learning_rate": 5.270449215358797e-07, + "loss": 1.9682, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.3977888822555542, + "learning_rate": 4.371400623530142e-07, + "loss": 1.9528, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.38712644577026367, + "learning_rate": 3.5563442101696486e-07, + "loss": 1.9598, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.3826579749584198, + "learning_rate": 2.825289108379925e-07, + "loss": 1.9698, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.3845973610877991, + "learning_rate": 2.1782435099923503e-07, + "loss": 1.9463, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.38658881187438965, + "learning_rate": 1.6152146654671573e-07, + "loss": 1.9664, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.38687312602996826, + "learning_rate": 1.1362088838193229e-07, + "loss": 1.9707, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.3780154585838318, + "learning_rate": 7.412315325411312e-08, + "loss": 1.9594, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.3811144232749939, + "learning_rate": 4.302870375472168e-08, + "loss": 1.9675, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.3939819633960724, + "learning_rate": 2.0337888312210727e-08, + "loss": 1.9594, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.38289374113082886, + "learning_rate": 6.050961188358573e-09, + "loss": 1.9674, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.1091125011444092, + "learning_rate": 1.6808247493838026e-10, + "loss": 1.9609, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.9911340678754304e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-rwkv-cosine/checkpoint-9480/training_args.bin b/saves-rwkv-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..813eaf6ec0b42bf6e6a4c5ad6542842d6176ca0d --- /dev/null +++ b/saves-rwkv-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5da622a56dffba2b96ef66a9c7600744aa08d5e68f47283693fecc23fb98acf +size 5176 diff --git a/saves-rwkv-cosine/config.json b/saves-rwkv-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fba72b626605f3d995236b195dd7881fa619390d --- /dev/null +++ b/saves-rwkv-cosine/config.json @@ -0,0 +1,22 @@ +{ + "architectures": [ + "RwkvForCausalLM" + ], + "attention_hidden_size": 256, + "bos_token_id": 0, + "context_length": 1024, + "eos_token_id": 0, + "hidden_size": 256, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "rwkv", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "rescale_every": 6, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-rwkv-cosine/generation_config.json b/saves-rwkv-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..14e4f03d0d73dc2707d488ac8f586bd62ef72a7e --- /dev/null +++ b/saves-rwkv-cosine/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-rwkv-cosine/model.safetensors b/saves-rwkv-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ca51955a22fb2dec6846962dba6ff879c628ac2 --- /dev/null +++ b/saves-rwkv-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54a81441cd9133223a3db4ef8b8448fde4c5c8e2c41c75b36e115c428aa38707 +size 8894568 diff --git a/saves-rwkv-cosine/result.log b/saves-rwkv-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..38efc1f4ce2230dd48cac66b9133bb8e7194a348 --- /dev/null +++ b/saves-rwkv-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 13033.0456, 'train_samples_per_second': 744.769, 'train_steps_per_second': 0.727, 'train_loss': 3.0810169242102385, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-rwkv-cosine/special_tokens_map.json b/saves-rwkv-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-rwkv-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-rwkv-cosine/tokenizer.json b/saves-rwkv-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-rwkv-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-rwkv-cosine/tokenizer_config.json b/saves-rwkv-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-rwkv-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-rwkv/checkpoint-9480/config.json b/saves-rwkv/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fba72b626605f3d995236b195dd7881fa619390d --- /dev/null +++ b/saves-rwkv/checkpoint-9480/config.json @@ -0,0 +1,22 @@ +{ + "architectures": [ + "RwkvForCausalLM" + ], + "attention_hidden_size": 256, + "bos_token_id": 0, + "context_length": 1024, + "eos_token_id": 0, + "hidden_size": 256, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "rwkv", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "rescale_every": 6, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-rwkv/checkpoint-9480/generation_config.json b/saves-rwkv/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..14e4f03d0d73dc2707d488ac8f586bd62ef72a7e --- /dev/null +++ b/saves-rwkv/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-rwkv/checkpoint-9480/model.safetensors b/saves-rwkv/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ecf29ae079005556035aa74d1f06e51347d43d1 --- /dev/null +++ b/saves-rwkv/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba962d913f6c1b9a7456214da0cdd86502672a90974aa3f746673733543ffe9 +size 8894568 diff --git a/saves-rwkv/checkpoint-9480/optimizer.pt b/saves-rwkv/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b504d71210f4f2df0edb8ab011743ea6033590b0 --- /dev/null +++ b/saves-rwkv/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a1aef759797b86b025f63d485618502d2186baa1b040f4f974dbe60b98edfa +size 17815482 diff --git a/saves-rwkv/checkpoint-9480/rng_state.pth b/saves-rwkv/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-rwkv/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-rwkv/checkpoint-9480/scheduler.pt b/saves-rwkv/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4e146fb9369424bca1e920276a86162b00d56fd --- /dev/null +++ b/saves-rwkv/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c33e6451a8a4598628b3479890d40774857cdcb0d8604c19f1bee5bdefe1e2f9 +size 1064 diff --git a/saves-rwkv/checkpoint-9480/special_tokens_map.json b/saves-rwkv/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-rwkv/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-rwkv/checkpoint-9480/tokenizer.json b/saves-rwkv/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-rwkv/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-rwkv/checkpoint-9480/tokenizer_config.json b/saves-rwkv/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-rwkv/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-rwkv/checkpoint-9480/trainer_state.json b/saves-rwkv/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b1b526b190c41bc38f3a456cf389575c358ebfcb --- /dev/null +++ b/saves-rwkv/checkpoint-9480/trainer_state.json @@ -0,0 +1,66393 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00010548523206751055, + "grad_norm": 17.572973251342773, + "learning_rate": 1.5822784810126583e-05, + "loss": 251.0501, + "step": 1 + }, + { + "epoch": 0.0002109704641350211, + "grad_norm": 17.53040313720703, + "learning_rate": 3.1645569620253167e-05, + "loss": 250.9482, + "step": 2 + }, + { + "epoch": 0.00031645569620253165, + "grad_norm": 17.557844161987305, + "learning_rate": 4.746835443037975e-05, + "loss": 250.9948, + "step": 3 + }, + { + "epoch": 0.0004219409282700422, + "grad_norm": 17.640644073486328, + "learning_rate": 6.329113924050633e-05, + "loss": 250.7585, + "step": 4 + }, + { + "epoch": 0.0005274261603375527, + "grad_norm": 17.775373458862305, + "learning_rate": 7.911392405063291e-05, + "loss": 250.5413, + "step": 5 + }, + { + "epoch": 0.0006329113924050633, + "grad_norm": 17.761709213256836, + "learning_rate": 9.49367088607595e-05, + "loss": 250.3037, + "step": 6 + }, + { + "epoch": 0.0007383966244725738, + "grad_norm": 17.920230865478516, + "learning_rate": 0.00011075949367088609, + "loss": 249.7064, + "step": 7 + }, + { + "epoch": 0.0008438818565400844, + "grad_norm": 18.03495216369629, + "learning_rate": 0.00012658227848101267, + "loss": 248.6979, + "step": 8 + }, + { + "epoch": 0.0009493670886075949, + "grad_norm": 18.370254516601562, + "learning_rate": 0.00014240506329113925, + "loss": 248.0219, + "step": 9 + }, + { + "epoch": 0.0010548523206751054, + "grad_norm": 18.604124069213867, + "learning_rate": 0.00015822784810126583, + "loss": 247.2785, + "step": 10 + }, + { + "epoch": 0.001160337552742616, + "grad_norm": 19.150192260742188, + "learning_rate": 0.0001740506329113924, + "loss": 246.0427, + "step": 11 + }, + { + "epoch": 0.0012658227848101266, + "grad_norm": 19.681184768676758, + "learning_rate": 0.000189873417721519, + "loss": 244.4001, + "step": 12 + }, + { + "epoch": 0.0013713080168776372, + "grad_norm": 20.390687942504883, + "learning_rate": 0.00020569620253164557, + "loss": 242.7969, + "step": 13 + }, + { + "epoch": 0.0014767932489451476, + "grad_norm": 21.39630889892578, + "learning_rate": 0.00022151898734177217, + "loss": 240.4147, + "step": 14 + }, + { + "epoch": 0.0015822784810126582, + "grad_norm": 22.64838409423828, + "learning_rate": 0.00023734177215189873, + "loss": 237.8657, + "step": 15 + }, + { + "epoch": 0.0016877637130801688, + "grad_norm": 24.30998992919922, + "learning_rate": 0.00025316455696202533, + "loss": 234.817, + "step": 16 + }, + { + "epoch": 0.0017932489451476794, + "grad_norm": 26.623497009277344, + "learning_rate": 0.0002689873417721519, + "loss": 230.5802, + "step": 17 + }, + { + "epoch": 0.0018987341772151898, + "grad_norm": 29.72104835510254, + "learning_rate": 0.0002848101265822785, + "loss": 225.1458, + "step": 18 + }, + { + "epoch": 0.0020042194092827004, + "grad_norm": 34.049400329589844, + "learning_rate": 0.00030063291139240507, + "loss": 218.3528, + "step": 19 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 39.678436279296875, + "learning_rate": 0.00031645569620253165, + "loss": 210.0924, + "step": 20 + }, + { + "epoch": 0.0022151898734177216, + "grad_norm": 47.04718017578125, + "learning_rate": 0.00033227848101265823, + "loss": 199.882, + "step": 21 + }, + { + "epoch": 0.002320675105485232, + "grad_norm": 56.78538131713867, + "learning_rate": 0.0003481012658227848, + "loss": 186.1237, + "step": 22 + }, + { + "epoch": 0.002426160337552743, + "grad_norm": 67.4872055053711, + "learning_rate": 0.00036392405063291145, + "loss": 168.4388, + "step": 23 + }, + { + "epoch": 0.002531645569620253, + "grad_norm": 77.31148529052734, + "learning_rate": 0.000379746835443038, + "loss": 147.1716, + "step": 24 + }, + { + "epoch": 0.0026371308016877636, + "grad_norm": 81.88947296142578, + "learning_rate": 0.00039556962025316455, + "loss": 122.7151, + "step": 25 + }, + { + "epoch": 0.0027426160337552744, + "grad_norm": 77.55339050292969, + "learning_rate": 0.00041139240506329113, + "loss": 96.8208, + "step": 26 + }, + { + "epoch": 0.002848101265822785, + "grad_norm": 59.03907012939453, + "learning_rate": 0.00042721518987341776, + "loss": 74.1823, + "step": 27 + }, + { + "epoch": 0.002953586497890295, + "grad_norm": Infinity, + "learning_rate": 0.00042721518987341776, + "loss": 60.0571, + "step": 28 + }, + { + "epoch": 0.003059071729957806, + "grad_norm": 47.43195343017578, + "learning_rate": 0.00044303797468354434, + "loss": 59.9601, + "step": 29 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 50.84574508666992, + "learning_rate": 0.0004588607594936709, + "loss": 53.8831, + "step": 30 + }, + { + "epoch": 0.003270042194092827, + "grad_norm": 48.61702346801758, + "learning_rate": 0.00047468354430379745, + "loss": 49.944, + "step": 31 + }, + { + "epoch": 0.0033755274261603376, + "grad_norm": 59.33281707763672, + "learning_rate": 0.0004905063291139241, + "loss": 46.9571, + "step": 32 + }, + { + "epoch": 0.003481012658227848, + "grad_norm": 49.43178176879883, + "learning_rate": 0.0005063291139240507, + "loss": 43.5588, + "step": 33 + }, + { + "epoch": 0.003586497890295359, + "grad_norm": 40.51719665527344, + "learning_rate": 0.0005221518987341772, + "loss": 40.6373, + "step": 34 + }, + { + "epoch": 0.003691983122362869, + "grad_norm": 42.22645568847656, + "learning_rate": 0.0005379746835443038, + "loss": 38.0224, + "step": 35 + }, + { + "epoch": 0.0037974683544303796, + "grad_norm": 34.44194412231445, + "learning_rate": 0.0005537974683544304, + "loss": 35.6362, + "step": 36 + }, + { + "epoch": 0.0039029535864978904, + "grad_norm": 27.75926399230957, + "learning_rate": 0.000569620253164557, + "loss": 33.3136, + "step": 37 + }, + { + "epoch": 0.004008438818565401, + "grad_norm": 23.498273849487305, + "learning_rate": 0.0005854430379746836, + "loss": 31.8025, + "step": 38 + }, + { + "epoch": 0.004113924050632912, + "grad_norm": 19.771770477294922, + "learning_rate": 0.0006012658227848101, + "loss": 30.4242, + "step": 39 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 18.668527603149414, + "learning_rate": 0.0006170886075949367, + "loss": 29.5101, + "step": 40 + }, + { + "epoch": 0.004324894514767932, + "grad_norm": 15.621329307556152, + "learning_rate": 0.0006329113924050633, + "loss": 28.5796, + "step": 41 + }, + { + "epoch": 0.004430379746835443, + "grad_norm": 14.31041431427002, + "learning_rate": 0.0006487341772151899, + "loss": 27.7443, + "step": 42 + }, + { + "epoch": 0.004535864978902953, + "grad_norm": 15.416545867919922, + "learning_rate": 0.0006645569620253165, + "loss": 26.8687, + "step": 43 + }, + { + "epoch": 0.004641350210970464, + "grad_norm": 9.796165466308594, + "learning_rate": 0.000680379746835443, + "loss": 26.1326, + "step": 44 + }, + { + "epoch": 0.004746835443037975, + "grad_norm": 9.708291053771973, + "learning_rate": 0.0006962025316455696, + "loss": 25.0893, + "step": 45 + }, + { + "epoch": 0.004852320675105486, + "grad_norm": 9.979117393493652, + "learning_rate": 0.0007120253164556963, + "loss": 24.4682, + "step": 46 + }, + { + "epoch": 0.004957805907172996, + "grad_norm": 8.970297813415527, + "learning_rate": 0.0007278481012658229, + "loss": 23.8333, + "step": 47 + }, + { + "epoch": 0.005063291139240506, + "grad_norm": 8.589787483215332, + "learning_rate": 0.0007436708860759495, + "loss": 23.122, + "step": 48 + }, + { + "epoch": 0.005168776371308017, + "grad_norm": 9.56105899810791, + "learning_rate": 0.000759493670886076, + "loss": 22.3168, + "step": 49 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 7.6871209144592285, + "learning_rate": 0.0007753164556962025, + "loss": 21.5088, + "step": 50 + }, + { + "epoch": 0.005379746835443038, + "grad_norm": 6.878053188323975, + "learning_rate": 0.0007911392405063291, + "loss": 21.0196, + "step": 51 + }, + { + "epoch": 0.005485232067510549, + "grad_norm": 5.893653869628906, + "learning_rate": 0.0008069620253164557, + "loss": 20.2726, + "step": 52 + }, + { + "epoch": 0.005590717299578059, + "grad_norm": 5.694672584533691, + "learning_rate": 0.0008227848101265823, + "loss": 19.7271, + "step": 53 + }, + { + "epoch": 0.00569620253164557, + "grad_norm": 5.476981163024902, + "learning_rate": 0.000838607594936709, + "loss": 18.9299, + "step": 54 + }, + { + "epoch": 0.0058016877637130804, + "grad_norm": 5.156509876251221, + "learning_rate": 0.0008544303797468355, + "loss": 18.5478, + "step": 55 + }, + { + "epoch": 0.00590717299578059, + "grad_norm": 5.055478096008301, + "learning_rate": 0.0008702531645569621, + "loss": 17.8173, + "step": 56 + }, + { + "epoch": 0.006012658227848101, + "grad_norm": 5.305833339691162, + "learning_rate": 0.0008860759493670887, + "loss": 17.5576, + "step": 57 + }, + { + "epoch": 0.006118143459915612, + "grad_norm": 4.440700531005859, + "learning_rate": 0.0009018987341772153, + "loss": 16.9979, + "step": 58 + }, + { + "epoch": 0.006223628691983122, + "grad_norm": 3.9629855155944824, + "learning_rate": 0.0009177215189873418, + "loss": 16.6182, + "step": 59 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 3.873786687850952, + "learning_rate": 0.0009335443037974683, + "loss": 16.0762, + "step": 60 + }, + { + "epoch": 0.006434599156118144, + "grad_norm": 3.7512636184692383, + "learning_rate": 0.0009493670886075949, + "loss": 15.7247, + "step": 61 + }, + { + "epoch": 0.006540084388185654, + "grad_norm": 3.734560966491699, + "learning_rate": 0.0009651898734177215, + "loss": 15.2673, + "step": 62 + }, + { + "epoch": 0.006645569620253164, + "grad_norm": 3.18464994430542, + "learning_rate": 0.0009810126582278482, + "loss": 14.989, + "step": 63 + }, + { + "epoch": 0.006751054852320675, + "grad_norm": 3.2864668369293213, + "learning_rate": 0.0009968354430379747, + "loss": 14.518, + "step": 64 + }, + { + "epoch": 0.006856540084388186, + "grad_norm": 3.429194450378418, + "learning_rate": 0.0010126582278481013, + "loss": 14.2016, + "step": 65 + }, + { + "epoch": 0.006962025316455696, + "grad_norm": 2.9148104190826416, + "learning_rate": 0.001028481012658228, + "loss": 13.8699, + "step": 66 + }, + { + "epoch": 0.007067510548523207, + "grad_norm": 2.837296485900879, + "learning_rate": 0.0010443037974683545, + "loss": 13.5786, + "step": 67 + }, + { + "epoch": 0.007172995780590718, + "grad_norm": 2.608259439468384, + "learning_rate": 0.001060126582278481, + "loss": 13.1931, + "step": 68 + }, + { + "epoch": 0.007278481012658228, + "grad_norm": 2.525197744369507, + "learning_rate": 0.0010759493670886076, + "loss": 12.8808, + "step": 69 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 2.4360272884368896, + "learning_rate": 0.0010917721518987342, + "loss": 12.4889, + "step": 70 + }, + { + "epoch": 0.007489451476793249, + "grad_norm": 2.4229767322540283, + "learning_rate": 0.0011075949367088608, + "loss": 12.2596, + "step": 71 + }, + { + "epoch": 0.007594936708860759, + "grad_norm": 2.1703333854675293, + "learning_rate": 0.0011234177215189874, + "loss": 11.888, + "step": 72 + }, + { + "epoch": 0.00770042194092827, + "grad_norm": 2.122751474380493, + "learning_rate": 0.001139240506329114, + "loss": 11.7009, + "step": 73 + }, + { + "epoch": 0.007805907172995781, + "grad_norm": 2.0113112926483154, + "learning_rate": 0.0011550632911392405, + "loss": 11.3932, + "step": 74 + }, + { + "epoch": 0.007911392405063292, + "grad_norm": 1.7697006464004517, + "learning_rate": 0.0011708860759493671, + "loss": 11.1465, + "step": 75 + }, + { + "epoch": 0.008016877637130802, + "grad_norm": 1.9372621774673462, + "learning_rate": 0.0011867088607594937, + "loss": 10.8474, + "step": 76 + }, + { + "epoch": 0.008122362869198312, + "grad_norm": 1.8286631107330322, + "learning_rate": 0.0012025316455696203, + "loss": 10.4471, + "step": 77 + }, + { + "epoch": 0.008227848101265823, + "grad_norm": 2.0615549087524414, + "learning_rate": 0.0012183544303797469, + "loss": 10.2116, + "step": 78 + }, + { + "epoch": 0.008333333333333333, + "grad_norm": 1.6820805072784424, + "learning_rate": 0.0012341772151898734, + "loss": 9.912, + "step": 79 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.7878479957580566, + "learning_rate": 0.00125, + "loss": 9.7902, + "step": 80 + }, + { + "epoch": 0.008544303797468355, + "grad_norm": 1.5089387893676758, + "learning_rate": 0.0012658227848101266, + "loss": 9.489, + "step": 81 + }, + { + "epoch": 0.008649789029535865, + "grad_norm": 1.7321656942367554, + "learning_rate": 0.0012816455696202532, + "loss": 9.22, + "step": 82 + }, + { + "epoch": 0.008755274261603375, + "grad_norm": 2.3092963695526123, + "learning_rate": 0.0012974683544303798, + "loss": 9.0164, + "step": 83 + }, + { + "epoch": 0.008860759493670886, + "grad_norm": 1.8824485540390015, + "learning_rate": 0.0013132911392405063, + "loss": 8.7961, + "step": 84 + }, + { + "epoch": 0.008966244725738396, + "grad_norm": 1.6138931512832642, + "learning_rate": 0.001329113924050633, + "loss": 8.5849, + "step": 85 + }, + { + "epoch": 0.009071729957805906, + "grad_norm": 2.6879501342773438, + "learning_rate": 0.0013449367088607595, + "loss": 8.423, + "step": 86 + }, + { + "epoch": 0.009177215189873418, + "grad_norm": 1.8025970458984375, + "learning_rate": 0.001360759493670886, + "loss": 8.2328, + "step": 87 + }, + { + "epoch": 0.009282700421940928, + "grad_norm": 1.5726999044418335, + "learning_rate": 0.0013765822784810127, + "loss": 8.0188, + "step": 88 + }, + { + "epoch": 0.009388185654008438, + "grad_norm": 4.1710205078125, + "learning_rate": 0.0013924050632911392, + "loss": 7.9223, + "step": 89 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.6504859924316406, + "learning_rate": 0.001408227848101266, + "loss": 7.8135, + "step": 90 + }, + { + "epoch": 0.00959915611814346, + "grad_norm": 2.0915162563323975, + "learning_rate": 0.0014240506329113926, + "loss": 7.5641, + "step": 91 + }, + { + "epoch": 0.009704641350210971, + "grad_norm": 1.5370910167694092, + "learning_rate": 0.0014398734177215192, + "loss": 7.4497, + "step": 92 + }, + { + "epoch": 0.009810126582278481, + "grad_norm": 4.767701148986816, + "learning_rate": 0.0014556962025316458, + "loss": 7.4169, + "step": 93 + }, + { + "epoch": 0.009915611814345991, + "grad_norm": 2.054208755493164, + "learning_rate": 0.0014715189873417724, + "loss": 7.2951, + "step": 94 + }, + { + "epoch": 0.010021097046413503, + "grad_norm": 2.3305563926696777, + "learning_rate": 0.001487341772151899, + "loss": 7.1743, + "step": 95 + }, + { + "epoch": 0.010126582278481013, + "grad_norm": 2.3305232524871826, + "learning_rate": 0.0015, + "loss": 7.0715, + "step": 96 + }, + { + "epoch": 0.010232067510548523, + "grad_norm": 2.597180128097534, + "learning_rate": 0.0015, + "loss": 6.9373, + "step": 97 + }, + { + "epoch": 0.010337552742616034, + "grad_norm": 2.3387911319732666, + "learning_rate": 0.0015, + "loss": 6.8612, + "step": 98 + }, + { + "epoch": 0.010443037974683544, + "grad_norm": 1.793911337852478, + "learning_rate": 0.0015, + "loss": 6.7812, + "step": 99 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.8205612897872925, + "learning_rate": 0.0015, + "loss": 6.6143, + "step": 100 + }, + { + "epoch": 0.010654008438818566, + "grad_norm": 1.3155461549758911, + "learning_rate": 0.0015, + "loss": 6.5102, + "step": 101 + }, + { + "epoch": 0.010759493670886076, + "grad_norm": 3.74252986907959, + "learning_rate": 0.0015, + "loss": 6.4708, + "step": 102 + }, + { + "epoch": 0.010864978902953586, + "grad_norm": 1.8994747400283813, + "learning_rate": 0.0015, + "loss": 6.3777, + "step": 103 + }, + { + "epoch": 0.010970464135021098, + "grad_norm": 3.6241838932037354, + "learning_rate": 0.0015, + "loss": 6.3383, + "step": 104 + }, + { + "epoch": 0.011075949367088608, + "grad_norm": 2.11909818649292, + "learning_rate": 0.0015, + "loss": 6.3114, + "step": 105 + }, + { + "epoch": 0.011181434599156118, + "grad_norm": 1.9279237985610962, + "learning_rate": 0.0015, + "loss": 6.2014, + "step": 106 + }, + { + "epoch": 0.01128691983122363, + "grad_norm": 4.4135026931762695, + "learning_rate": 0.0015, + "loss": 6.1904, + "step": 107 + }, + { + "epoch": 0.01139240506329114, + "grad_norm": 2.8534224033355713, + "learning_rate": 0.0015, + "loss": 6.0916, + "step": 108 + }, + { + "epoch": 0.01149789029535865, + "grad_norm": 1.6974148750305176, + "learning_rate": 0.0015, + "loss": 6.075, + "step": 109 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 1.5966553688049316, + "learning_rate": 0.0015, + "loss": 5.9691, + "step": 110 + }, + { + "epoch": 0.01170886075949367, + "grad_norm": 2.3013463020324707, + "learning_rate": 0.0015, + "loss": 5.9254, + "step": 111 + }, + { + "epoch": 0.01181434599156118, + "grad_norm": 1.6460784673690796, + "learning_rate": 0.0015, + "loss": 5.8863, + "step": 112 + }, + { + "epoch": 0.011919831223628692, + "grad_norm": 1.7964836359024048, + "learning_rate": 0.0015, + "loss": 5.8337, + "step": 113 + }, + { + "epoch": 0.012025316455696202, + "grad_norm": 1.4708895683288574, + "learning_rate": 0.0015, + "loss": 5.7767, + "step": 114 + }, + { + "epoch": 0.012130801687763712, + "grad_norm": 2.4011635780334473, + "learning_rate": 0.0015, + "loss": 5.7451, + "step": 115 + }, + { + "epoch": 0.012236286919831224, + "grad_norm": 2.6402406692504883, + "learning_rate": 0.0015, + "loss": 5.711, + "step": 116 + }, + { + "epoch": 0.012341772151898734, + "grad_norm": 1.5417157411575317, + "learning_rate": 0.0015, + "loss": 5.6874, + "step": 117 + }, + { + "epoch": 0.012447257383966244, + "grad_norm": 1.573742151260376, + "learning_rate": 0.0015, + "loss": 5.6221, + "step": 118 + }, + { + "epoch": 0.012552742616033756, + "grad_norm": 1.0425119400024414, + "learning_rate": 0.0015, + "loss": 5.4991, + "step": 119 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 2.8544440269470215, + "learning_rate": 0.0015, + "loss": 5.5429, + "step": 120 + }, + { + "epoch": 0.012763713080168776, + "grad_norm": 1.8981497287750244, + "learning_rate": 0.0015, + "loss": 5.5391, + "step": 121 + }, + { + "epoch": 0.012869198312236287, + "grad_norm": 1.6323693990707397, + "learning_rate": 0.0015, + "loss": 5.4735, + "step": 122 + }, + { + "epoch": 0.012974683544303797, + "grad_norm": 1.1948492527008057, + "learning_rate": 0.0015, + "loss": 5.3688, + "step": 123 + }, + { + "epoch": 0.013080168776371307, + "grad_norm": 2.444502115249634, + "learning_rate": 0.0015, + "loss": 5.3517, + "step": 124 + }, + { + "epoch": 0.013185654008438819, + "grad_norm": 1.6461869478225708, + "learning_rate": 0.0015, + "loss": 5.3725, + "step": 125 + }, + { + "epoch": 0.013291139240506329, + "grad_norm": 1.5192331075668335, + "learning_rate": 0.0015, + "loss": 5.3198, + "step": 126 + }, + { + "epoch": 0.01339662447257384, + "grad_norm": 1.276066780090332, + "learning_rate": 0.0015, + "loss": 5.3323, + "step": 127 + }, + { + "epoch": 0.01350210970464135, + "grad_norm": 2.0355000495910645, + "learning_rate": 0.0015, + "loss": 5.3056, + "step": 128 + }, + { + "epoch": 0.01360759493670886, + "grad_norm": 1.2561554908752441, + "learning_rate": 0.0015, + "loss": 5.2065, + "step": 129 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 1.8382107019424438, + "learning_rate": 0.0015, + "loss": 5.2395, + "step": 130 + }, + { + "epoch": 0.013818565400843882, + "grad_norm": 1.5140044689178467, + "learning_rate": 0.0015, + "loss": 5.1583, + "step": 131 + }, + { + "epoch": 0.013924050632911392, + "grad_norm": 1.35004723072052, + "learning_rate": 0.0015, + "loss": 5.2067, + "step": 132 + }, + { + "epoch": 0.014029535864978904, + "grad_norm": 1.3346604108810425, + "learning_rate": 0.0015, + "loss": 5.1407, + "step": 133 + }, + { + "epoch": 0.014135021097046414, + "grad_norm": 1.463438630104065, + "learning_rate": 0.0015, + "loss": 5.0815, + "step": 134 + }, + { + "epoch": 0.014240506329113924, + "grad_norm": 1.1700129508972168, + "learning_rate": 0.0015, + "loss": 5.0648, + "step": 135 + }, + { + "epoch": 0.014345991561181435, + "grad_norm": 2.377117395401001, + "learning_rate": 0.0015, + "loss": 5.0772, + "step": 136 + }, + { + "epoch": 0.014451476793248945, + "grad_norm": 2.7960591316223145, + "learning_rate": 0.0015, + "loss": 5.094, + "step": 137 + }, + { + "epoch": 0.014556962025316455, + "grad_norm": 1.3638007640838623, + "learning_rate": 0.0015, + "loss": 5.0096, + "step": 138 + }, + { + "epoch": 0.014662447257383967, + "grad_norm": 2.133676767349243, + "learning_rate": 0.0015, + "loss": 4.9985, + "step": 139 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.3584480285644531, + "learning_rate": 0.0015, + "loss": 4.9763, + "step": 140 + }, + { + "epoch": 0.014873417721518987, + "grad_norm": 1.4077796936035156, + "learning_rate": 0.0015, + "loss": 4.9583, + "step": 141 + }, + { + "epoch": 0.014978902953586498, + "grad_norm": 1.5043329000473022, + "learning_rate": 0.0015, + "loss": 4.9342, + "step": 142 + }, + { + "epoch": 0.015084388185654008, + "grad_norm": 1.4470824003219604, + "learning_rate": 0.0015, + "loss": 4.9471, + "step": 143 + }, + { + "epoch": 0.015189873417721518, + "grad_norm": 1.8876885175704956, + "learning_rate": 0.0015, + "loss": 4.9509, + "step": 144 + }, + { + "epoch": 0.01529535864978903, + "grad_norm": 1.4684858322143555, + "learning_rate": 0.0015, + "loss": 4.9169, + "step": 145 + }, + { + "epoch": 0.01540084388185654, + "grad_norm": 2.090817928314209, + "learning_rate": 0.0015, + "loss": 4.8608, + "step": 146 + }, + { + "epoch": 0.01550632911392405, + "grad_norm": 1.936529517173767, + "learning_rate": 0.0015, + "loss": 4.9383, + "step": 147 + }, + { + "epoch": 0.015611814345991562, + "grad_norm": 1.3230787515640259, + "learning_rate": 0.0015, + "loss": 4.8021, + "step": 148 + }, + { + "epoch": 0.015717299578059073, + "grad_norm": 1.5674653053283691, + "learning_rate": 0.0015, + "loss": 4.8407, + "step": 149 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 1.6310316324234009, + "learning_rate": 0.0015, + "loss": 4.8283, + "step": 150 + }, + { + "epoch": 0.015928270042194093, + "grad_norm": 2.4742841720581055, + "learning_rate": 0.0015, + "loss": 4.8693, + "step": 151 + }, + { + "epoch": 0.016033755274261603, + "grad_norm": 1.9962555170059204, + "learning_rate": 0.0015, + "loss": 4.8067, + "step": 152 + }, + { + "epoch": 0.016139240506329113, + "grad_norm": 1.3192980289459229, + "learning_rate": 0.0015, + "loss": 4.7982, + "step": 153 + }, + { + "epoch": 0.016244725738396623, + "grad_norm": 1.1292109489440918, + "learning_rate": 0.0015, + "loss": 4.7559, + "step": 154 + }, + { + "epoch": 0.016350210970464137, + "grad_norm": 2.0209500789642334, + "learning_rate": 0.0015, + "loss": 4.7768, + "step": 155 + }, + { + "epoch": 0.016455696202531647, + "grad_norm": 1.229773998260498, + "learning_rate": 0.0015, + "loss": 4.727, + "step": 156 + }, + { + "epoch": 0.016561181434599156, + "grad_norm": 1.6710070371627808, + "learning_rate": 0.0015, + "loss": 4.7555, + "step": 157 + }, + { + "epoch": 0.016666666666666666, + "grad_norm": 1.1893903017044067, + "learning_rate": 0.0015, + "loss": 4.699, + "step": 158 + }, + { + "epoch": 0.016772151898734176, + "grad_norm": 1.4513366222381592, + "learning_rate": 0.0015, + "loss": 4.7205, + "step": 159 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 1.1809194087982178, + "learning_rate": 0.0015, + "loss": 4.7117, + "step": 160 + }, + { + "epoch": 0.0169831223628692, + "grad_norm": 1.2902393341064453, + "learning_rate": 0.0015, + "loss": 4.6507, + "step": 161 + }, + { + "epoch": 0.01708860759493671, + "grad_norm": 1.6813087463378906, + "learning_rate": 0.0015, + "loss": 4.7321, + "step": 162 + }, + { + "epoch": 0.01719409282700422, + "grad_norm": 1.2281099557876587, + "learning_rate": 0.0015, + "loss": 4.6949, + "step": 163 + }, + { + "epoch": 0.01729957805907173, + "grad_norm": 1.2403007745742798, + "learning_rate": 0.0015, + "loss": 4.6419, + "step": 164 + }, + { + "epoch": 0.01740506329113924, + "grad_norm": 1.6633118391036987, + "learning_rate": 0.0015, + "loss": 4.6809, + "step": 165 + }, + { + "epoch": 0.01751054852320675, + "grad_norm": 0.963398277759552, + "learning_rate": 0.0015, + "loss": 4.6299, + "step": 166 + }, + { + "epoch": 0.017616033755274263, + "grad_norm": 1.378148078918457, + "learning_rate": 0.0015, + "loss": 4.5943, + "step": 167 + }, + { + "epoch": 0.017721518987341773, + "grad_norm": 1.522545337677002, + "learning_rate": 0.0015, + "loss": 4.5927, + "step": 168 + }, + { + "epoch": 0.017827004219409283, + "grad_norm": 1.1972851753234863, + "learning_rate": 0.0015, + "loss": 4.567, + "step": 169 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 1.2328261137008667, + "learning_rate": 0.0015, + "loss": 4.5952, + "step": 170 + }, + { + "epoch": 0.018037974683544303, + "grad_norm": 1.7029229402542114, + "learning_rate": 0.0015, + "loss": 4.5582, + "step": 171 + }, + { + "epoch": 0.018143459915611813, + "grad_norm": 1.3853172063827515, + "learning_rate": 0.0015, + "loss": 4.5896, + "step": 172 + }, + { + "epoch": 0.018248945147679326, + "grad_norm": 0.8850933313369751, + "learning_rate": 0.0015, + "loss": 4.5539, + "step": 173 + }, + { + "epoch": 0.018354430379746836, + "grad_norm": 3.482740879058838, + "learning_rate": 0.0015, + "loss": 4.5616, + "step": 174 + }, + { + "epoch": 0.018459915611814346, + "grad_norm": 1.552054524421692, + "learning_rate": 0.0015, + "loss": 4.6109, + "step": 175 + }, + { + "epoch": 0.018565400843881856, + "grad_norm": 1.9429446458816528, + "learning_rate": 0.0015, + "loss": 4.6011, + "step": 176 + }, + { + "epoch": 0.018670886075949366, + "grad_norm": 1.9528660774230957, + "learning_rate": 0.0015, + "loss": 4.604, + "step": 177 + }, + { + "epoch": 0.018776371308016876, + "grad_norm": 0.9907557368278503, + "learning_rate": 0.0015, + "loss": 4.5421, + "step": 178 + }, + { + "epoch": 0.01888185654008439, + "grad_norm": 7.398935794830322, + "learning_rate": 0.0015, + "loss": 4.7135, + "step": 179 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 6.9721760749816895, + "learning_rate": 0.0015, + "loss": 4.7222, + "step": 180 + }, + { + "epoch": 0.01909282700421941, + "grad_norm": 2.4383673667907715, + "learning_rate": 0.0015, + "loss": 4.5824, + "step": 181 + }, + { + "epoch": 0.01919831223628692, + "grad_norm": 1.6616612672805786, + "learning_rate": 0.0015, + "loss": 4.5654, + "step": 182 + }, + { + "epoch": 0.01930379746835443, + "grad_norm": 1.6923820972442627, + "learning_rate": 0.0015, + "loss": 4.5161, + "step": 183 + }, + { + "epoch": 0.019409282700421943, + "grad_norm": 1.8274204730987549, + "learning_rate": 0.0015, + "loss": 4.5226, + "step": 184 + }, + { + "epoch": 0.019514767932489453, + "grad_norm": 2.7481629848480225, + "learning_rate": 0.0015, + "loss": 4.4966, + "step": 185 + }, + { + "epoch": 0.019620253164556962, + "grad_norm": 2.1743571758270264, + "learning_rate": 0.0015, + "loss": 4.4764, + "step": 186 + }, + { + "epoch": 0.019725738396624472, + "grad_norm": 1.679015874862671, + "learning_rate": 0.0015, + "loss": 4.4432, + "step": 187 + }, + { + "epoch": 0.019831223628691982, + "grad_norm": 1.3686461448669434, + "learning_rate": 0.0015, + "loss": 4.4345, + "step": 188 + }, + { + "epoch": 0.019936708860759492, + "grad_norm": 1.4201180934906006, + "learning_rate": 0.0015, + "loss": 4.4451, + "step": 189 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 2.456158399581909, + "learning_rate": 0.0015, + "loss": 4.4883, + "step": 190 + }, + { + "epoch": 0.020147679324894516, + "grad_norm": 1.4764904975891113, + "learning_rate": 0.0015, + "loss": 4.4263, + "step": 191 + }, + { + "epoch": 0.020253164556962026, + "grad_norm": 1.9297494888305664, + "learning_rate": 0.0015, + "loss": 4.4883, + "step": 192 + }, + { + "epoch": 0.020358649789029536, + "grad_norm": 1.7313969135284424, + "learning_rate": 0.0015, + "loss": 4.4124, + "step": 193 + }, + { + "epoch": 0.020464135021097046, + "grad_norm": 1.5488179922103882, + "learning_rate": 0.0015, + "loss": 4.4192, + "step": 194 + }, + { + "epoch": 0.020569620253164556, + "grad_norm": 1.4109926223754883, + "learning_rate": 0.0015, + "loss": 4.3901, + "step": 195 + }, + { + "epoch": 0.02067510548523207, + "grad_norm": 2.1155948638916016, + "learning_rate": 0.0015, + "loss": 4.404, + "step": 196 + }, + { + "epoch": 0.02078059071729958, + "grad_norm": 1.2960556745529175, + "learning_rate": 0.0015, + "loss": 4.3769, + "step": 197 + }, + { + "epoch": 0.02088607594936709, + "grad_norm": 1.6195720434188843, + "learning_rate": 0.0015, + "loss": 4.3928, + "step": 198 + }, + { + "epoch": 0.0209915611814346, + "grad_norm": 1.4014438390731812, + "learning_rate": 0.0015, + "loss": 4.3943, + "step": 199 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 1.9307334423065186, + "learning_rate": 0.0015, + "loss": 4.3421, + "step": 200 + }, + { + "epoch": 0.02120253164556962, + "grad_norm": 1.4786574840545654, + "learning_rate": 0.0015, + "loss": 4.3774, + "step": 201 + }, + { + "epoch": 0.021308016877637132, + "grad_norm": 1.668962836265564, + "learning_rate": 0.0015, + "loss": 4.3703, + "step": 202 + }, + { + "epoch": 0.021413502109704642, + "grad_norm": 2.0469655990600586, + "learning_rate": 0.0015, + "loss": 4.351, + "step": 203 + }, + { + "epoch": 0.021518987341772152, + "grad_norm": 1.077816128730774, + "learning_rate": 0.0015, + "loss": 4.315, + "step": 204 + }, + { + "epoch": 0.021624472573839662, + "grad_norm": 0.8175413608551025, + "learning_rate": 0.0015, + "loss": 4.2716, + "step": 205 + }, + { + "epoch": 0.021729957805907172, + "grad_norm": 1.214090347290039, + "learning_rate": 0.0015, + "loss": 4.2746, + "step": 206 + }, + { + "epoch": 0.021835443037974682, + "grad_norm": 0.8224858641624451, + "learning_rate": 0.0015, + "loss": 4.3121, + "step": 207 + }, + { + "epoch": 0.021940928270042195, + "grad_norm": 0.9095271229743958, + "learning_rate": 0.0015, + "loss": 4.2562, + "step": 208 + }, + { + "epoch": 0.022046413502109705, + "grad_norm": 0.879135251045227, + "learning_rate": 0.0015, + "loss": 4.2788, + "step": 209 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.9262049794197083, + "learning_rate": 0.0015, + "loss": 4.3034, + "step": 210 + }, + { + "epoch": 0.022257383966244725, + "grad_norm": 0.9237472414970398, + "learning_rate": 0.0015, + "loss": 4.2607, + "step": 211 + }, + { + "epoch": 0.022362869198312235, + "grad_norm": 0.856684684753418, + "learning_rate": 0.0015, + "loss": 4.2278, + "step": 212 + }, + { + "epoch": 0.022468354430379745, + "grad_norm": 1.3599907159805298, + "learning_rate": 0.0015, + "loss": 4.2822, + "step": 213 + }, + { + "epoch": 0.02257383966244726, + "grad_norm": 1.089147925376892, + "learning_rate": 0.0015, + "loss": 4.2345, + "step": 214 + }, + { + "epoch": 0.02267932489451477, + "grad_norm": 1.279386281967163, + "learning_rate": 0.0015, + "loss": 4.2058, + "step": 215 + }, + { + "epoch": 0.02278481012658228, + "grad_norm": 1.1269630193710327, + "learning_rate": 0.0015, + "loss": 4.2239, + "step": 216 + }, + { + "epoch": 0.02289029535864979, + "grad_norm": 1.7589246034622192, + "learning_rate": 0.0015, + "loss": 4.2115, + "step": 217 + }, + { + "epoch": 0.0229957805907173, + "grad_norm": 0.9989317655563354, + "learning_rate": 0.0015, + "loss": 4.2189, + "step": 218 + }, + { + "epoch": 0.023101265822784812, + "grad_norm": 1.8037734031677246, + "learning_rate": 0.0015, + "loss": 4.2149, + "step": 219 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.9036387205123901, + "learning_rate": 0.0015, + "loss": 4.1779, + "step": 220 + }, + { + "epoch": 0.02331223628691983, + "grad_norm": 1.3706892728805542, + "learning_rate": 0.0015, + "loss": 4.1796, + "step": 221 + }, + { + "epoch": 0.02341772151898734, + "grad_norm": 1.0497188568115234, + "learning_rate": 0.0015, + "loss": 4.1433, + "step": 222 + }, + { + "epoch": 0.02352320675105485, + "grad_norm": 1.9405304193496704, + "learning_rate": 0.0015, + "loss": 4.1594, + "step": 223 + }, + { + "epoch": 0.02362869198312236, + "grad_norm": 1.3048264980316162, + "learning_rate": 0.0015, + "loss": 4.1455, + "step": 224 + }, + { + "epoch": 0.023734177215189875, + "grad_norm": 1.0403116941452026, + "learning_rate": 0.0015, + "loss": 4.1181, + "step": 225 + }, + { + "epoch": 0.023839662447257385, + "grad_norm": 1.0528110265731812, + "learning_rate": 0.0015, + "loss": 4.1724, + "step": 226 + }, + { + "epoch": 0.023945147679324895, + "grad_norm": 1.0902891159057617, + "learning_rate": 0.0015, + "loss": 4.1209, + "step": 227 + }, + { + "epoch": 0.024050632911392405, + "grad_norm": 1.3682677745819092, + "learning_rate": 0.0015, + "loss": 4.1494, + "step": 228 + }, + { + "epoch": 0.024156118143459915, + "grad_norm": 0.8521319031715393, + "learning_rate": 0.0015, + "loss": 4.085, + "step": 229 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 1.6877565383911133, + "learning_rate": 0.0015, + "loss": 4.1338, + "step": 230 + }, + { + "epoch": 0.024367088607594938, + "grad_norm": 1.059357762336731, + "learning_rate": 0.0015, + "loss": 4.1076, + "step": 231 + }, + { + "epoch": 0.024472573839662448, + "grad_norm": 1.1576213836669922, + "learning_rate": 0.0015, + "loss": 4.1245, + "step": 232 + }, + { + "epoch": 0.024578059071729958, + "grad_norm": 1.2020525932312012, + "learning_rate": 0.0015, + "loss": 4.0713, + "step": 233 + }, + { + "epoch": 0.024683544303797468, + "grad_norm": 1.595170259475708, + "learning_rate": 0.0015, + "loss": 4.1301, + "step": 234 + }, + { + "epoch": 0.024789029535864978, + "grad_norm": 1.1874459981918335, + "learning_rate": 0.0015, + "loss": 4.0573, + "step": 235 + }, + { + "epoch": 0.024894514767932488, + "grad_norm": 1.1711580753326416, + "learning_rate": 0.0015, + "loss": 4.0641, + "step": 236 + }, + { + "epoch": 0.025, + "grad_norm": 0.8554295897483826, + "learning_rate": 0.0015, + "loss": 4.0314, + "step": 237 + }, + { + "epoch": 0.02510548523206751, + "grad_norm": 1.223017930984497, + "learning_rate": 0.0015, + "loss": 4.0473, + "step": 238 + }, + { + "epoch": 0.02521097046413502, + "grad_norm": 0.755368709564209, + "learning_rate": 0.0015, + "loss": 4.0511, + "step": 239 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 1.6311113834381104, + "learning_rate": 0.0015, + "loss": 4.0787, + "step": 240 + }, + { + "epoch": 0.02542194092827004, + "grad_norm": 0.6820424795150757, + "learning_rate": 0.0015, + "loss": 3.9967, + "step": 241 + }, + { + "epoch": 0.02552742616033755, + "grad_norm": 1.1365361213684082, + "learning_rate": 0.0015, + "loss": 4.037, + "step": 242 + }, + { + "epoch": 0.025632911392405065, + "grad_norm": 0.7740467190742493, + "learning_rate": 0.0015, + "loss": 4.038, + "step": 243 + }, + { + "epoch": 0.025738396624472575, + "grad_norm": 1.7688915729522705, + "learning_rate": 0.0015, + "loss": 4.0466, + "step": 244 + }, + { + "epoch": 0.025843881856540084, + "grad_norm": 1.1134791374206543, + "learning_rate": 0.0015, + "loss": 3.9961, + "step": 245 + }, + { + "epoch": 0.025949367088607594, + "grad_norm": 0.9898406267166138, + "learning_rate": 0.0015, + "loss": 3.9946, + "step": 246 + }, + { + "epoch": 0.026054852320675104, + "grad_norm": 0.9530130624771118, + "learning_rate": 0.0015, + "loss": 3.9802, + "step": 247 + }, + { + "epoch": 0.026160337552742614, + "grad_norm": 0.8578491806983948, + "learning_rate": 0.0015, + "loss": 4.0179, + "step": 248 + }, + { + "epoch": 0.026265822784810128, + "grad_norm": 0.9737657904624939, + "learning_rate": 0.0015, + "loss": 3.9777, + "step": 249 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.9034956097602844, + "learning_rate": 0.0015, + "loss": 3.9434, + "step": 250 + }, + { + "epoch": 0.026476793248945148, + "grad_norm": 1.289752721786499, + "learning_rate": 0.0015, + "loss": 4.0161, + "step": 251 + }, + { + "epoch": 0.026582278481012658, + "grad_norm": 1.1200958490371704, + "learning_rate": 0.0015, + "loss": 3.9408, + "step": 252 + }, + { + "epoch": 0.026687763713080168, + "grad_norm": 0.9046483039855957, + "learning_rate": 0.0015, + "loss": 3.9634, + "step": 253 + }, + { + "epoch": 0.02679324894514768, + "grad_norm": 1.3035368919372559, + "learning_rate": 0.0015, + "loss": 3.9632, + "step": 254 + }, + { + "epoch": 0.02689873417721519, + "grad_norm": 0.8343423008918762, + "learning_rate": 0.0015, + "loss": 3.9573, + "step": 255 + }, + { + "epoch": 0.0270042194092827, + "grad_norm": 1.9525237083435059, + "learning_rate": 0.0015, + "loss": 3.9246, + "step": 256 + }, + { + "epoch": 0.02710970464135021, + "grad_norm": 1.2027548551559448, + "learning_rate": 0.0015, + "loss": 3.9619, + "step": 257 + }, + { + "epoch": 0.02721518987341772, + "grad_norm": 1.1350517272949219, + "learning_rate": 0.0015, + "loss": 3.9598, + "step": 258 + }, + { + "epoch": 0.02732067510548523, + "grad_norm": 0.9410971403121948, + "learning_rate": 0.0015, + "loss": 3.9695, + "step": 259 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 1.7766046524047852, + "learning_rate": 0.0015, + "loss": 3.9019, + "step": 260 + }, + { + "epoch": 0.027531645569620254, + "grad_norm": 1.0737040042877197, + "learning_rate": 0.0015, + "loss": 3.9274, + "step": 261 + }, + { + "epoch": 0.027637130801687764, + "grad_norm": 1.334532380104065, + "learning_rate": 0.0015, + "loss": 3.9337, + "step": 262 + }, + { + "epoch": 0.027742616033755274, + "grad_norm": 1.174573302268982, + "learning_rate": 0.0015, + "loss": 3.9302, + "step": 263 + }, + { + "epoch": 0.027848101265822784, + "grad_norm": 1.272551417350769, + "learning_rate": 0.0015, + "loss": 3.9239, + "step": 264 + }, + { + "epoch": 0.027953586497890294, + "grad_norm": 1.1538094282150269, + "learning_rate": 0.0015, + "loss": 3.8966, + "step": 265 + }, + { + "epoch": 0.028059071729957807, + "grad_norm": 1.131325602531433, + "learning_rate": 0.0015, + "loss": 3.9072, + "step": 266 + }, + { + "epoch": 0.028164556962025317, + "grad_norm": 1.0059494972229004, + "learning_rate": 0.0015, + "loss": 3.8864, + "step": 267 + }, + { + "epoch": 0.028270042194092827, + "grad_norm": 1.8131972551345825, + "learning_rate": 0.0015, + "loss": 3.8771, + "step": 268 + }, + { + "epoch": 0.028375527426160337, + "grad_norm": 1.057883858680725, + "learning_rate": 0.0015, + "loss": 3.9026, + "step": 269 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 1.124192476272583, + "learning_rate": 0.0015, + "loss": 3.9195, + "step": 270 + }, + { + "epoch": 0.028586497890295357, + "grad_norm": 1.139320969581604, + "learning_rate": 0.0015, + "loss": 3.8847, + "step": 271 + }, + { + "epoch": 0.02869198312236287, + "grad_norm": 1.347235918045044, + "learning_rate": 0.0015, + "loss": 3.8877, + "step": 272 + }, + { + "epoch": 0.02879746835443038, + "grad_norm": 0.9774066805839539, + "learning_rate": 0.0015, + "loss": 3.8518, + "step": 273 + }, + { + "epoch": 0.02890295358649789, + "grad_norm": 0.9926044344902039, + "learning_rate": 0.0015, + "loss": 3.8555, + "step": 274 + }, + { + "epoch": 0.0290084388185654, + "grad_norm": 0.9755109548568726, + "learning_rate": 0.0015, + "loss": 3.8654, + "step": 275 + }, + { + "epoch": 0.02911392405063291, + "grad_norm": 1.744920253753662, + "learning_rate": 0.0015, + "loss": 3.8329, + "step": 276 + }, + { + "epoch": 0.02921940928270042, + "grad_norm": 0.971073567867279, + "learning_rate": 0.0015, + "loss": 3.8584, + "step": 277 + }, + { + "epoch": 0.029324894514767934, + "grad_norm": 1.18274986743927, + "learning_rate": 0.0015, + "loss": 3.7912, + "step": 278 + }, + { + "epoch": 0.029430379746835444, + "grad_norm": 1.0123088359832764, + "learning_rate": 0.0015, + "loss": 3.8484, + "step": 279 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 1.4069111347198486, + "learning_rate": 0.0015, + "loss": 3.8369, + "step": 280 + }, + { + "epoch": 0.029641350210970464, + "grad_norm": 1.2339911460876465, + "learning_rate": 0.0015, + "loss": 3.8105, + "step": 281 + }, + { + "epoch": 0.029746835443037974, + "grad_norm": 1.1302036046981812, + "learning_rate": 0.0015, + "loss": 3.8712, + "step": 282 + }, + { + "epoch": 0.029852320675105484, + "grad_norm": 1.0892914533615112, + "learning_rate": 0.0015, + "loss": 3.8036, + "step": 283 + }, + { + "epoch": 0.029957805907172997, + "grad_norm": 0.9638130068778992, + "learning_rate": 0.0015, + "loss": 3.7985, + "step": 284 + }, + { + "epoch": 0.030063291139240507, + "grad_norm": 0.8913674354553223, + "learning_rate": 0.0015, + "loss": 3.841, + "step": 285 + }, + { + "epoch": 0.030168776371308017, + "grad_norm": 1.0390292406082153, + "learning_rate": 0.0015, + "loss": 3.8221, + "step": 286 + }, + { + "epoch": 0.030274261603375527, + "grad_norm": 0.7766991853713989, + "learning_rate": 0.0015, + "loss": 3.8069, + "step": 287 + }, + { + "epoch": 0.030379746835443037, + "grad_norm": 1.190450668334961, + "learning_rate": 0.0015, + "loss": 3.7996, + "step": 288 + }, + { + "epoch": 0.03048523206751055, + "grad_norm": 0.7481834292411804, + "learning_rate": 0.0015, + "loss": 3.7964, + "step": 289 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.9152899384498596, + "learning_rate": 0.0015, + "loss": 3.7901, + "step": 290 + }, + { + "epoch": 0.03069620253164557, + "grad_norm": 0.5574901103973389, + "learning_rate": 0.0015, + "loss": 3.7862, + "step": 291 + }, + { + "epoch": 0.03080168776371308, + "grad_norm": 0.9546592235565186, + "learning_rate": 0.0015, + "loss": 3.8157, + "step": 292 + }, + { + "epoch": 0.03090717299578059, + "grad_norm": 0.7918180823326111, + "learning_rate": 0.0015, + "loss": 3.7862, + "step": 293 + }, + { + "epoch": 0.0310126582278481, + "grad_norm": 0.6883128881454468, + "learning_rate": 0.0015, + "loss": 3.7274, + "step": 294 + }, + { + "epoch": 0.031118143459915613, + "grad_norm": 0.6574051976203918, + "learning_rate": 0.0015, + "loss": 3.7342, + "step": 295 + }, + { + "epoch": 0.031223628691983123, + "grad_norm": 0.5315104722976685, + "learning_rate": 0.0015, + "loss": 3.7749, + "step": 296 + }, + { + "epoch": 0.03132911392405063, + "grad_norm": 0.6974501609802246, + "learning_rate": 0.0015, + "loss": 3.7828, + "step": 297 + }, + { + "epoch": 0.03143459915611815, + "grad_norm": 0.4303453266620636, + "learning_rate": 0.0015, + "loss": 3.7717, + "step": 298 + }, + { + "epoch": 0.03154008438818565, + "grad_norm": 0.6408708095550537, + "learning_rate": 0.0015, + "loss": 3.7301, + "step": 299 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.6948697566986084, + "learning_rate": 0.0015, + "loss": 3.7377, + "step": 300 + }, + { + "epoch": 0.03175105485232067, + "grad_norm": 0.8092914819717407, + "learning_rate": 0.0015, + "loss": 3.7239, + "step": 301 + }, + { + "epoch": 0.03185654008438819, + "grad_norm": 0.5960732102394104, + "learning_rate": 0.0015, + "loss": 3.7093, + "step": 302 + }, + { + "epoch": 0.03196202531645569, + "grad_norm": 0.7611662149429321, + "learning_rate": 0.0015, + "loss": 3.7177, + "step": 303 + }, + { + "epoch": 0.032067510548523206, + "grad_norm": 1.0412101745605469, + "learning_rate": 0.0015, + "loss": 3.7437, + "step": 304 + }, + { + "epoch": 0.03217299578059072, + "grad_norm": 0.5257025361061096, + "learning_rate": 0.0015, + "loss": 3.7395, + "step": 305 + }, + { + "epoch": 0.032278481012658226, + "grad_norm": 0.9643297791481018, + "learning_rate": 0.0015, + "loss": 3.7345, + "step": 306 + }, + { + "epoch": 0.03238396624472574, + "grad_norm": 1.0180543661117554, + "learning_rate": 0.0015, + "loss": 3.7389, + "step": 307 + }, + { + "epoch": 0.032489451476793246, + "grad_norm": 0.6072368621826172, + "learning_rate": 0.0015, + "loss": 3.6994, + "step": 308 + }, + { + "epoch": 0.03259493670886076, + "grad_norm": 1.355405330657959, + "learning_rate": 0.0015, + "loss": 3.7389, + "step": 309 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 1.4211293458938599, + "learning_rate": 0.0015, + "loss": 3.7452, + "step": 310 + }, + { + "epoch": 0.03280590717299578, + "grad_norm": 0.9843271374702454, + "learning_rate": 0.0015, + "loss": 3.6977, + "step": 311 + }, + { + "epoch": 0.03291139240506329, + "grad_norm": 2.023285150527954, + "learning_rate": 0.0015, + "loss": 3.7314, + "step": 312 + }, + { + "epoch": 0.0330168776371308, + "grad_norm": 0.7431539297103882, + "learning_rate": 0.0015, + "loss": 3.6881, + "step": 313 + }, + { + "epoch": 0.03312236286919831, + "grad_norm": 1.8234211206436157, + "learning_rate": 0.0015, + "loss": 3.7164, + "step": 314 + }, + { + "epoch": 0.03322784810126582, + "grad_norm": 1.303822636604309, + "learning_rate": 0.0015, + "loss": 3.7225, + "step": 315 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 1.2829095125198364, + "learning_rate": 0.0015, + "loss": 3.6723, + "step": 316 + }, + { + "epoch": 0.033438818565400846, + "grad_norm": 1.2429672479629517, + "learning_rate": 0.0015, + "loss": 3.6982, + "step": 317 + }, + { + "epoch": 0.03354430379746835, + "grad_norm": 2.0541017055511475, + "learning_rate": 0.0015, + "loss": 3.7401, + "step": 318 + }, + { + "epoch": 0.033649789029535866, + "grad_norm": 1.5766971111297607, + "learning_rate": 0.0015, + "loss": 3.7086, + "step": 319 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.8509153723716736, + "learning_rate": 0.0015, + "loss": 3.701, + "step": 320 + }, + { + "epoch": 0.033860759493670886, + "grad_norm": 2.485198736190796, + "learning_rate": 0.0015, + "loss": 3.7268, + "step": 321 + }, + { + "epoch": 0.0339662447257384, + "grad_norm": 1.134730339050293, + "learning_rate": 0.0015, + "loss": 3.7029, + "step": 322 + }, + { + "epoch": 0.034071729957805906, + "grad_norm": 1.1209100484848022, + "learning_rate": 0.0015, + "loss": 3.7136, + "step": 323 + }, + { + "epoch": 0.03417721518987342, + "grad_norm": 1.0050996541976929, + "learning_rate": 0.0015, + "loss": 3.735, + "step": 324 + }, + { + "epoch": 0.034282700421940926, + "grad_norm": 2.056427478790283, + "learning_rate": 0.0015, + "loss": 3.69, + "step": 325 + }, + { + "epoch": 0.03438818565400844, + "grad_norm": 1.5127931833267212, + "learning_rate": 0.0015, + "loss": 3.7166, + "step": 326 + }, + { + "epoch": 0.03449367088607595, + "grad_norm": 1.0613232851028442, + "learning_rate": 0.0015, + "loss": 3.716, + "step": 327 + }, + { + "epoch": 0.03459915611814346, + "grad_norm": 1.1210507154464722, + "learning_rate": 0.0015, + "loss": 3.6645, + "step": 328 + }, + { + "epoch": 0.03470464135021097, + "grad_norm": 1.6259101629257202, + "learning_rate": 0.0015, + "loss": 3.6486, + "step": 329 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 1.148025393486023, + "learning_rate": 0.0015, + "loss": 3.6691, + "step": 330 + }, + { + "epoch": 0.03491561181434599, + "grad_norm": 1.0762622356414795, + "learning_rate": 0.0015, + "loss": 3.623, + "step": 331 + }, + { + "epoch": 0.0350210970464135, + "grad_norm": 0.8812925219535828, + "learning_rate": 0.0015, + "loss": 3.6428, + "step": 332 + }, + { + "epoch": 0.03512658227848101, + "grad_norm": 0.9755251407623291, + "learning_rate": 0.0015, + "loss": 3.6695, + "step": 333 + }, + { + "epoch": 0.035232067510548526, + "grad_norm": 1.1907674074172974, + "learning_rate": 0.0015, + "loss": 3.6643, + "step": 334 + }, + { + "epoch": 0.03533755274261603, + "grad_norm": 0.8855742812156677, + "learning_rate": 0.0015, + "loss": 3.6391, + "step": 335 + }, + { + "epoch": 0.035443037974683546, + "grad_norm": 1.316272258758545, + "learning_rate": 0.0015, + "loss": 3.6401, + "step": 336 + }, + { + "epoch": 0.03554852320675105, + "grad_norm": 0.9464013576507568, + "learning_rate": 0.0015, + "loss": 3.6475, + "step": 337 + }, + { + "epoch": 0.035654008438818566, + "grad_norm": 0.7438543438911438, + "learning_rate": 0.0015, + "loss": 3.6766, + "step": 338 + }, + { + "epoch": 0.03575949367088608, + "grad_norm": 0.7739266157150269, + "learning_rate": 0.0015, + "loss": 3.6478, + "step": 339 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.6543859243392944, + "learning_rate": 0.0015, + "loss": 3.6284, + "step": 340 + }, + { + "epoch": 0.0359704641350211, + "grad_norm": 0.7955721020698547, + "learning_rate": 0.0015, + "loss": 3.6147, + "step": 341 + }, + { + "epoch": 0.036075949367088606, + "grad_norm": 0.6843664646148682, + "learning_rate": 0.0015, + "loss": 3.5515, + "step": 342 + }, + { + "epoch": 0.03618143459915612, + "grad_norm": 0.8965316414833069, + "learning_rate": 0.0015, + "loss": 3.6351, + "step": 343 + }, + { + "epoch": 0.036286919831223625, + "grad_norm": 0.7478509545326233, + "learning_rate": 0.0015, + "loss": 3.594, + "step": 344 + }, + { + "epoch": 0.03639240506329114, + "grad_norm": 0.6486328840255737, + "learning_rate": 0.0015, + "loss": 3.6041, + "step": 345 + }, + { + "epoch": 0.03649789029535865, + "grad_norm": 1.2601206302642822, + "learning_rate": 0.0015, + "loss": 3.5977, + "step": 346 + }, + { + "epoch": 0.03660337552742616, + "grad_norm": 0.7157853245735168, + "learning_rate": 0.0015, + "loss": 3.6098, + "step": 347 + }, + { + "epoch": 0.03670886075949367, + "grad_norm": 0.7331436276435852, + "learning_rate": 0.0015, + "loss": 3.6008, + "step": 348 + }, + { + "epoch": 0.03681434599156118, + "grad_norm": 0.881771981716156, + "learning_rate": 0.0015, + "loss": 3.5664, + "step": 349 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.5379813313484192, + "learning_rate": 0.0015, + "loss": 3.5583, + "step": 350 + }, + { + "epoch": 0.037025316455696206, + "grad_norm": 0.6771138310432434, + "learning_rate": 0.0015, + "loss": 3.5623, + "step": 351 + }, + { + "epoch": 0.03713080168776371, + "grad_norm": 0.8033321499824524, + "learning_rate": 0.0015, + "loss": 3.5892, + "step": 352 + }, + { + "epoch": 0.037236286919831225, + "grad_norm": 0.6115626096725464, + "learning_rate": 0.0015, + "loss": 3.5906, + "step": 353 + }, + { + "epoch": 0.03734177215189873, + "grad_norm": 0.5267861485481262, + "learning_rate": 0.0015, + "loss": 3.5399, + "step": 354 + }, + { + "epoch": 0.037447257383966245, + "grad_norm": 0.8427168130874634, + "learning_rate": 0.0015, + "loss": 3.5679, + "step": 355 + }, + { + "epoch": 0.03755274261603375, + "grad_norm": 0.5752675533294678, + "learning_rate": 0.0015, + "loss": 3.5913, + "step": 356 + }, + { + "epoch": 0.037658227848101265, + "grad_norm": 0.5359956622123718, + "learning_rate": 0.0015, + "loss": 3.543, + "step": 357 + }, + { + "epoch": 0.03776371308016878, + "grad_norm": 0.8076744675636292, + "learning_rate": 0.0015, + "loss": 3.5791, + "step": 358 + }, + { + "epoch": 0.037869198312236285, + "grad_norm": 0.6877927184104919, + "learning_rate": 0.0015, + "loss": 3.535, + "step": 359 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.5323793888092041, + "learning_rate": 0.0015, + "loss": 3.5794, + "step": 360 + }, + { + "epoch": 0.038080168776371305, + "grad_norm": 0.8313357830047607, + "learning_rate": 0.0015, + "loss": 3.5672, + "step": 361 + }, + { + "epoch": 0.03818565400843882, + "grad_norm": 0.8978613615036011, + "learning_rate": 0.0015, + "loss": 3.5792, + "step": 362 + }, + { + "epoch": 0.03829113924050633, + "grad_norm": 0.7147537469863892, + "learning_rate": 0.0015, + "loss": 3.5355, + "step": 363 + }, + { + "epoch": 0.03839662447257384, + "grad_norm": 0.9650090336799622, + "learning_rate": 0.0015, + "loss": 3.5463, + "step": 364 + }, + { + "epoch": 0.03850210970464135, + "grad_norm": 1.3532636165618896, + "learning_rate": 0.0015, + "loss": 3.5763, + "step": 365 + }, + { + "epoch": 0.03860759493670886, + "grad_norm": 0.8418766856193542, + "learning_rate": 0.0015, + "loss": 3.5576, + "step": 366 + }, + { + "epoch": 0.03871308016877637, + "grad_norm": 0.9084733128547668, + "learning_rate": 0.0015, + "loss": 3.5416, + "step": 367 + }, + { + "epoch": 0.038818565400843885, + "grad_norm": 1.1795523166656494, + "learning_rate": 0.0015, + "loss": 3.5511, + "step": 368 + }, + { + "epoch": 0.03892405063291139, + "grad_norm": 0.538345992565155, + "learning_rate": 0.0015, + "loss": 3.5513, + "step": 369 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.6773996949195862, + "learning_rate": 0.0015, + "loss": 3.5197, + "step": 370 + }, + { + "epoch": 0.03913502109704641, + "grad_norm": 0.8091341257095337, + "learning_rate": 0.0015, + "loss": 3.5296, + "step": 371 + }, + { + "epoch": 0.039240506329113925, + "grad_norm": 0.5321422219276428, + "learning_rate": 0.0015, + "loss": 3.497, + "step": 372 + }, + { + "epoch": 0.03934599156118143, + "grad_norm": 0.5103781223297119, + "learning_rate": 0.0015, + "loss": 3.538, + "step": 373 + }, + { + "epoch": 0.039451476793248945, + "grad_norm": 0.6154399514198303, + "learning_rate": 0.0015, + "loss": 3.5814, + "step": 374 + }, + { + "epoch": 0.03955696202531646, + "grad_norm": 0.592639684677124, + "learning_rate": 0.0015, + "loss": 3.5076, + "step": 375 + }, + { + "epoch": 0.039662447257383965, + "grad_norm": 0.5350256562232971, + "learning_rate": 0.0015, + "loss": 3.5204, + "step": 376 + }, + { + "epoch": 0.03976793248945148, + "grad_norm": 0.6853824853897095, + "learning_rate": 0.0015, + "loss": 3.5282, + "step": 377 + }, + { + "epoch": 0.039873417721518985, + "grad_norm": 0.7554806470870972, + "learning_rate": 0.0015, + "loss": 3.5199, + "step": 378 + }, + { + "epoch": 0.0399789029535865, + "grad_norm": 0.705016016960144, + "learning_rate": 0.0015, + "loss": 3.5059, + "step": 379 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.5440376996994019, + "learning_rate": 0.0015, + "loss": 3.5268, + "step": 380 + }, + { + "epoch": 0.04018987341772152, + "grad_norm": 1.1280972957611084, + "learning_rate": 0.0015, + "loss": 3.5193, + "step": 381 + }, + { + "epoch": 0.04029535864978903, + "grad_norm": 0.8029055595397949, + "learning_rate": 0.0015, + "loss": 3.5497, + "step": 382 + }, + { + "epoch": 0.04040084388185654, + "grad_norm": 0.7061363458633423, + "learning_rate": 0.0015, + "loss": 3.5044, + "step": 383 + }, + { + "epoch": 0.04050632911392405, + "grad_norm": 1.1606122255325317, + "learning_rate": 0.0015, + "loss": 3.515, + "step": 384 + }, + { + "epoch": 0.04061181434599156, + "grad_norm": 0.88869309425354, + "learning_rate": 0.0015, + "loss": 3.4932, + "step": 385 + }, + { + "epoch": 0.04071729957805907, + "grad_norm": 0.7289702892303467, + "learning_rate": 0.0015, + "loss": 3.4963, + "step": 386 + }, + { + "epoch": 0.040822784810126585, + "grad_norm": 0.726488471031189, + "learning_rate": 0.0015, + "loss": 3.5095, + "step": 387 + }, + { + "epoch": 0.04092827004219409, + "grad_norm": 1.2118362188339233, + "learning_rate": 0.0015, + "loss": 3.5149, + "step": 388 + }, + { + "epoch": 0.041033755274261605, + "grad_norm": 0.697326123714447, + "learning_rate": 0.0015, + "loss": 3.5075, + "step": 389 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.7146037220954895, + "learning_rate": 0.0015, + "loss": 3.4964, + "step": 390 + }, + { + "epoch": 0.041244725738396625, + "grad_norm": 1.112696886062622, + "learning_rate": 0.0015, + "loss": 3.4924, + "step": 391 + }, + { + "epoch": 0.04135021097046414, + "grad_norm": 0.7572354078292847, + "learning_rate": 0.0015, + "loss": 3.479, + "step": 392 + }, + { + "epoch": 0.041455696202531644, + "grad_norm": 0.6712968349456787, + "learning_rate": 0.0015, + "loss": 3.4545, + "step": 393 + }, + { + "epoch": 0.04156118143459916, + "grad_norm": 1.0475510358810425, + "learning_rate": 0.0015, + "loss": 3.4633, + "step": 394 + }, + { + "epoch": 0.041666666666666664, + "grad_norm": 0.8445262312889099, + "learning_rate": 0.0015, + "loss": 3.4566, + "step": 395 + }, + { + "epoch": 0.04177215189873418, + "grad_norm": 0.7380586266517639, + "learning_rate": 0.0015, + "loss": 3.4652, + "step": 396 + }, + { + "epoch": 0.04187763713080169, + "grad_norm": 1.1317059993743896, + "learning_rate": 0.0015, + "loss": 3.4471, + "step": 397 + }, + { + "epoch": 0.0419831223628692, + "grad_norm": 0.9440023899078369, + "learning_rate": 0.0015, + "loss": 3.469, + "step": 398 + }, + { + "epoch": 0.04208860759493671, + "grad_norm": 0.7039048075675964, + "learning_rate": 0.0015, + "loss": 3.482, + "step": 399 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 1.0976388454437256, + "learning_rate": 0.0015, + "loss": 3.4748, + "step": 400 + }, + { + "epoch": 0.04229957805907173, + "grad_norm": 1.0696287155151367, + "learning_rate": 0.0015, + "loss": 3.4696, + "step": 401 + }, + { + "epoch": 0.04240506329113924, + "grad_norm": 0.7420902848243713, + "learning_rate": 0.0015, + "loss": 3.4302, + "step": 402 + }, + { + "epoch": 0.04251054852320675, + "grad_norm": 1.112281322479248, + "learning_rate": 0.0015, + "loss": 3.4884, + "step": 403 + }, + { + "epoch": 0.042616033755274264, + "grad_norm": 1.0400307178497314, + "learning_rate": 0.0015, + "loss": 3.4833, + "step": 404 + }, + { + "epoch": 0.04272151898734177, + "grad_norm": 0.7042508125305176, + "learning_rate": 0.0015, + "loss": 3.4332, + "step": 405 + }, + { + "epoch": 0.042827004219409284, + "grad_norm": 1.154610276222229, + "learning_rate": 0.0015, + "loss": 3.4723, + "step": 406 + }, + { + "epoch": 0.04293248945147679, + "grad_norm": 0.9252082705497742, + "learning_rate": 0.0015, + "loss": 3.4984, + "step": 407 + }, + { + "epoch": 0.043037974683544304, + "grad_norm": 0.694697916507721, + "learning_rate": 0.0015, + "loss": 3.4384, + "step": 408 + }, + { + "epoch": 0.04314345991561182, + "grad_norm": 1.0477066040039062, + "learning_rate": 0.0015, + "loss": 3.4589, + "step": 409 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 0.8460642099380493, + "learning_rate": 0.0015, + "loss": 3.4476, + "step": 410 + }, + { + "epoch": 0.04335443037974684, + "grad_norm": 0.49504128098487854, + "learning_rate": 0.0015, + "loss": 3.412, + "step": 411 + }, + { + "epoch": 0.043459915611814344, + "grad_norm": 0.8905629515647888, + "learning_rate": 0.0015, + "loss": 3.4397, + "step": 412 + }, + { + "epoch": 0.04356540084388186, + "grad_norm": 0.8196352124214172, + "learning_rate": 0.0015, + "loss": 3.429, + "step": 413 + }, + { + "epoch": 0.043670886075949364, + "grad_norm": 0.5023813247680664, + "learning_rate": 0.0015, + "loss": 3.3905, + "step": 414 + }, + { + "epoch": 0.04377637130801688, + "grad_norm": 1.1454191207885742, + "learning_rate": 0.0015, + "loss": 3.4695, + "step": 415 + }, + { + "epoch": 0.04388185654008439, + "grad_norm": 0.8253992199897766, + "learning_rate": 0.0015, + "loss": 3.42, + "step": 416 + }, + { + "epoch": 0.0439873417721519, + "grad_norm": 0.5960416793823242, + "learning_rate": 0.0015, + "loss": 3.4351, + "step": 417 + }, + { + "epoch": 0.04409282700421941, + "grad_norm": 1.0252472162246704, + "learning_rate": 0.0015, + "loss": 3.4446, + "step": 418 + }, + { + "epoch": 0.04419831223628692, + "grad_norm": 0.8405422568321228, + "learning_rate": 0.0015, + "loss": 3.4451, + "step": 419 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.570079505443573, + "learning_rate": 0.0015, + "loss": 3.3993, + "step": 420 + }, + { + "epoch": 0.044409282700421944, + "grad_norm": 1.1116654872894287, + "learning_rate": 0.0015, + "loss": 3.4372, + "step": 421 + }, + { + "epoch": 0.04451476793248945, + "grad_norm": 0.7882356643676758, + "learning_rate": 0.0015, + "loss": 3.406, + "step": 422 + }, + { + "epoch": 0.044620253164556964, + "grad_norm": 0.6612509489059448, + "learning_rate": 0.0015, + "loss": 3.3972, + "step": 423 + }, + { + "epoch": 0.04472573839662447, + "grad_norm": 1.0393998622894287, + "learning_rate": 0.0015, + "loss": 3.4172, + "step": 424 + }, + { + "epoch": 0.044831223628691984, + "grad_norm": 1.0395925045013428, + "learning_rate": 0.0015, + "loss": 3.3846, + "step": 425 + }, + { + "epoch": 0.04493670886075949, + "grad_norm": 0.8121010065078735, + "learning_rate": 0.0015, + "loss": 3.4203, + "step": 426 + }, + { + "epoch": 0.045042194092827004, + "grad_norm": 1.0141360759735107, + "learning_rate": 0.0015, + "loss": 3.4033, + "step": 427 + }, + { + "epoch": 0.04514767932489452, + "grad_norm": 1.1913188695907593, + "learning_rate": 0.0015, + "loss": 3.4217, + "step": 428 + }, + { + "epoch": 0.045253164556962024, + "grad_norm": 0.824227511882782, + "learning_rate": 0.0015, + "loss": 3.3969, + "step": 429 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 1.114970088005066, + "learning_rate": 0.0015, + "loss": 3.3835, + "step": 430 + }, + { + "epoch": 0.045464135021097044, + "grad_norm": 0.9091669917106628, + "learning_rate": 0.0015, + "loss": 3.3828, + "step": 431 + }, + { + "epoch": 0.04556962025316456, + "grad_norm": 0.6076409816741943, + "learning_rate": 0.0015, + "loss": 3.3886, + "step": 432 + }, + { + "epoch": 0.04567510548523207, + "grad_norm": 0.6892703175544739, + "learning_rate": 0.0015, + "loss": 3.367, + "step": 433 + }, + { + "epoch": 0.04578059071729958, + "grad_norm": 0.7865586280822754, + "learning_rate": 0.0015, + "loss": 3.3913, + "step": 434 + }, + { + "epoch": 0.04588607594936709, + "grad_norm": 0.6033107042312622, + "learning_rate": 0.0015, + "loss": 3.3785, + "step": 435 + }, + { + "epoch": 0.0459915611814346, + "grad_norm": 0.6410105228424072, + "learning_rate": 0.0015, + "loss": 3.3758, + "step": 436 + }, + { + "epoch": 0.04609704641350211, + "grad_norm": 0.9925895929336548, + "learning_rate": 0.0015, + "loss": 3.3821, + "step": 437 + }, + { + "epoch": 0.046202531645569624, + "grad_norm": 0.8203887343406677, + "learning_rate": 0.0015, + "loss": 3.4164, + "step": 438 + }, + { + "epoch": 0.04630801687763713, + "grad_norm": 0.7326500415802002, + "learning_rate": 0.0015, + "loss": 3.3653, + "step": 439 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 1.2598742246627808, + "learning_rate": 0.0015, + "loss": 3.3868, + "step": 440 + }, + { + "epoch": 0.04651898734177215, + "grad_norm": 0.6901750564575195, + "learning_rate": 0.0015, + "loss": 3.3786, + "step": 441 + }, + { + "epoch": 0.04662447257383966, + "grad_norm": 0.6992195844650269, + "learning_rate": 0.0015, + "loss": 3.3807, + "step": 442 + }, + { + "epoch": 0.04672995780590717, + "grad_norm": 0.6946080923080444, + "learning_rate": 0.0015, + "loss": 3.3452, + "step": 443 + }, + { + "epoch": 0.04683544303797468, + "grad_norm": 0.6416733860969543, + "learning_rate": 0.0015, + "loss": 3.3839, + "step": 444 + }, + { + "epoch": 0.0469409282700422, + "grad_norm": 0.7935798168182373, + "learning_rate": 0.0015, + "loss": 3.3847, + "step": 445 + }, + { + "epoch": 0.0470464135021097, + "grad_norm": 0.542780876159668, + "learning_rate": 0.0015, + "loss": 3.3858, + "step": 446 + }, + { + "epoch": 0.04715189873417722, + "grad_norm": 1.1476181745529175, + "learning_rate": 0.0015, + "loss": 3.3848, + "step": 447 + }, + { + "epoch": 0.04725738396624472, + "grad_norm": 0.6796368956565857, + "learning_rate": 0.0015, + "loss": 3.3793, + "step": 448 + }, + { + "epoch": 0.04736286919831224, + "grad_norm": 0.7184032201766968, + "learning_rate": 0.0015, + "loss": 3.3975, + "step": 449 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.7406795024871826, + "learning_rate": 0.0015, + "loss": 3.3571, + "step": 450 + }, + { + "epoch": 0.047573839662447256, + "grad_norm": 0.5455744862556458, + "learning_rate": 0.0015, + "loss": 3.3736, + "step": 451 + }, + { + "epoch": 0.04767932489451477, + "grad_norm": 0.6663585901260376, + "learning_rate": 0.0015, + "loss": 3.3434, + "step": 452 + }, + { + "epoch": 0.047784810126582276, + "grad_norm": 0.4740994870662689, + "learning_rate": 0.0015, + "loss": 3.3966, + "step": 453 + }, + { + "epoch": 0.04789029535864979, + "grad_norm": 0.7141053676605225, + "learning_rate": 0.0015, + "loss": 3.3837, + "step": 454 + }, + { + "epoch": 0.047995780590717296, + "grad_norm": 0.649634063243866, + "learning_rate": 0.0015, + "loss": 3.3429, + "step": 455 + }, + { + "epoch": 0.04810126582278481, + "grad_norm": 0.5861679315567017, + "learning_rate": 0.0015, + "loss": 3.3306, + "step": 456 + }, + { + "epoch": 0.04820675105485232, + "grad_norm": 0.966793954372406, + "learning_rate": 0.0015, + "loss": 3.3434, + "step": 457 + }, + { + "epoch": 0.04831223628691983, + "grad_norm": 0.686845064163208, + "learning_rate": 0.0015, + "loss": 3.3382, + "step": 458 + }, + { + "epoch": 0.04841772151898734, + "grad_norm": 0.6551228165626526, + "learning_rate": 0.0015, + "loss": 3.3335, + "step": 459 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 1.0122923851013184, + "learning_rate": 0.0015, + "loss": 3.3768, + "step": 460 + }, + { + "epoch": 0.04862869198312236, + "grad_norm": 0.7260514497756958, + "learning_rate": 0.0015, + "loss": 3.3418, + "step": 461 + }, + { + "epoch": 0.048734177215189876, + "grad_norm": 0.596616268157959, + "learning_rate": 0.0015, + "loss": 3.3495, + "step": 462 + }, + { + "epoch": 0.04883966244725738, + "grad_norm": 0.7178287506103516, + "learning_rate": 0.0015, + "loss": 3.3277, + "step": 463 + }, + { + "epoch": 0.048945147679324896, + "grad_norm": 0.6255931258201599, + "learning_rate": 0.0015, + "loss": 3.3353, + "step": 464 + }, + { + "epoch": 0.0490506329113924, + "grad_norm": 0.6549305319786072, + "learning_rate": 0.0015, + "loss": 3.3255, + "step": 465 + }, + { + "epoch": 0.049156118143459916, + "grad_norm": 0.699002206325531, + "learning_rate": 0.0015, + "loss": 3.3144, + "step": 466 + }, + { + "epoch": 0.04926160337552743, + "grad_norm": 0.5931620001792908, + "learning_rate": 0.0015, + "loss": 3.3382, + "step": 467 + }, + { + "epoch": 0.049367088607594936, + "grad_norm": 0.6257835030555725, + "learning_rate": 0.0015, + "loss": 3.3297, + "step": 468 + }, + { + "epoch": 0.04947257383966245, + "grad_norm": 0.8185201287269592, + "learning_rate": 0.0015, + "loss": 3.3084, + "step": 469 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.6005059480667114, + "learning_rate": 0.0015, + "loss": 3.3243, + "step": 470 + }, + { + "epoch": 0.04968354430379747, + "grad_norm": 0.5609045028686523, + "learning_rate": 0.0015, + "loss": 3.2994, + "step": 471 + }, + { + "epoch": 0.049789029535864976, + "grad_norm": 0.6800411939620972, + "learning_rate": 0.0015, + "loss": 3.3198, + "step": 472 + }, + { + "epoch": 0.04989451476793249, + "grad_norm": 0.5176652073860168, + "learning_rate": 0.0015, + "loss": 3.3455, + "step": 473 + }, + { + "epoch": 0.05, + "grad_norm": 0.4832858145236969, + "learning_rate": 0.0015, + "loss": 3.3448, + "step": 474 + }, + { + "epoch": 0.05010548523206751, + "grad_norm": 0.6302670240402222, + "learning_rate": 0.0015, + "loss": 3.3405, + "step": 475 + }, + { + "epoch": 0.05021097046413502, + "grad_norm": 0.487811803817749, + "learning_rate": 0.0015, + "loss": 3.3255, + "step": 476 + }, + { + "epoch": 0.05031645569620253, + "grad_norm": 0.47167783975601196, + "learning_rate": 0.0015, + "loss": 3.2834, + "step": 477 + }, + { + "epoch": 0.05042194092827004, + "grad_norm": 0.5664178133010864, + "learning_rate": 0.0015, + "loss": 3.3507, + "step": 478 + }, + { + "epoch": 0.050527426160337556, + "grad_norm": 0.5259492993354797, + "learning_rate": 0.0015, + "loss": 3.2879, + "step": 479 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 0.5188157558441162, + "learning_rate": 0.0015, + "loss": 3.3126, + "step": 480 + }, + { + "epoch": 0.050738396624472576, + "grad_norm": 0.672374963760376, + "learning_rate": 0.0015, + "loss": 3.2638, + "step": 481 + }, + { + "epoch": 0.05084388185654008, + "grad_norm": 0.5739619135856628, + "learning_rate": 0.0015, + "loss": 3.3091, + "step": 482 + }, + { + "epoch": 0.050949367088607596, + "grad_norm": 0.505463719367981, + "learning_rate": 0.0015, + "loss": 3.3338, + "step": 483 + }, + { + "epoch": 0.0510548523206751, + "grad_norm": 0.7676723599433899, + "learning_rate": 0.0015, + "loss": 3.3105, + "step": 484 + }, + { + "epoch": 0.051160337552742616, + "grad_norm": 0.582621693611145, + "learning_rate": 0.0015, + "loss": 3.2536, + "step": 485 + }, + { + "epoch": 0.05126582278481013, + "grad_norm": 0.5612682700157166, + "learning_rate": 0.0015, + "loss": 3.2441, + "step": 486 + }, + { + "epoch": 0.051371308016877636, + "grad_norm": 0.7770277261734009, + "learning_rate": 0.0015, + "loss": 3.3269, + "step": 487 + }, + { + "epoch": 0.05147679324894515, + "grad_norm": 0.4848266541957855, + "learning_rate": 0.0015, + "loss": 3.2842, + "step": 488 + }, + { + "epoch": 0.051582278481012656, + "grad_norm": 0.5776078104972839, + "learning_rate": 0.0015, + "loss": 3.2678, + "step": 489 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.6414769291877747, + "learning_rate": 0.0015, + "loss": 3.3015, + "step": 490 + }, + { + "epoch": 0.05179324894514768, + "grad_norm": 0.5491862297058105, + "learning_rate": 0.0015, + "loss": 3.2944, + "step": 491 + }, + { + "epoch": 0.05189873417721519, + "grad_norm": 0.5949646234512329, + "learning_rate": 0.0015, + "loss": 3.28, + "step": 492 + }, + { + "epoch": 0.0520042194092827, + "grad_norm": 0.5341890454292297, + "learning_rate": 0.0015, + "loss": 3.2855, + "step": 493 + }, + { + "epoch": 0.05210970464135021, + "grad_norm": 0.6858221888542175, + "learning_rate": 0.0015, + "loss": 3.2751, + "step": 494 + }, + { + "epoch": 0.05221518987341772, + "grad_norm": 0.7016404271125793, + "learning_rate": 0.0015, + "loss": 3.2713, + "step": 495 + }, + { + "epoch": 0.05232067510548523, + "grad_norm": 0.46430981159210205, + "learning_rate": 0.0015, + "loss": 3.264, + "step": 496 + }, + { + "epoch": 0.05242616033755274, + "grad_norm": 0.7281006574630737, + "learning_rate": 0.0015, + "loss": 3.2912, + "step": 497 + }, + { + "epoch": 0.052531645569620256, + "grad_norm": 0.710918664932251, + "learning_rate": 0.0015, + "loss": 3.2683, + "step": 498 + }, + { + "epoch": 0.05263713080168776, + "grad_norm": 0.4440087080001831, + "learning_rate": 0.0015, + "loss": 3.27, + "step": 499 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.7855714559555054, + "learning_rate": 0.0015, + "loss": 3.2714, + "step": 500 + }, + { + "epoch": 0.05284810126582278, + "grad_norm": 0.6666215062141418, + "learning_rate": 0.0015, + "loss": 3.2683, + "step": 501 + }, + { + "epoch": 0.052953586497890295, + "grad_norm": 0.4987586736679077, + "learning_rate": 0.0015, + "loss": 3.3008, + "step": 502 + }, + { + "epoch": 0.05305907172995781, + "grad_norm": 1.054502248764038, + "learning_rate": 0.0015, + "loss": 3.2466, + "step": 503 + }, + { + "epoch": 0.053164556962025315, + "grad_norm": 0.9316742420196533, + "learning_rate": 0.0015, + "loss": 3.2713, + "step": 504 + }, + { + "epoch": 0.05327004219409283, + "grad_norm": 0.7625137567520142, + "learning_rate": 0.0015, + "loss": 3.2841, + "step": 505 + }, + { + "epoch": 0.053375527426160335, + "grad_norm": 1.4953062534332275, + "learning_rate": 0.0015, + "loss": 3.315, + "step": 506 + }, + { + "epoch": 0.05348101265822785, + "grad_norm": 0.882449209690094, + "learning_rate": 0.0015, + "loss": 3.3224, + "step": 507 + }, + { + "epoch": 0.05358649789029536, + "grad_norm": 0.9902450442314148, + "learning_rate": 0.0015, + "loss": 3.2861, + "step": 508 + }, + { + "epoch": 0.05369198312236287, + "grad_norm": 0.5175333619117737, + "learning_rate": 0.0015, + "loss": 3.2269, + "step": 509 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 1.1526293754577637, + "learning_rate": 0.0015, + "loss": 3.3091, + "step": 510 + }, + { + "epoch": 0.05390295358649789, + "grad_norm": 0.7523472309112549, + "learning_rate": 0.0015, + "loss": 3.2498, + "step": 511 + }, + { + "epoch": 0.0540084388185654, + "grad_norm": 0.7373870611190796, + "learning_rate": 0.0015, + "loss": 3.2743, + "step": 512 + }, + { + "epoch": 0.05411392405063291, + "grad_norm": 0.949036717414856, + "learning_rate": 0.0015, + "loss": 3.2592, + "step": 513 + }, + { + "epoch": 0.05421940928270042, + "grad_norm": 0.5273122191429138, + "learning_rate": 0.0015, + "loss": 3.2534, + "step": 514 + }, + { + "epoch": 0.054324894514767935, + "grad_norm": 0.7010025978088379, + "learning_rate": 0.0015, + "loss": 3.2454, + "step": 515 + }, + { + "epoch": 0.05443037974683544, + "grad_norm": 0.5511760115623474, + "learning_rate": 0.0015, + "loss": 3.2558, + "step": 516 + }, + { + "epoch": 0.054535864978902955, + "grad_norm": 0.614859938621521, + "learning_rate": 0.0015, + "loss": 3.2439, + "step": 517 + }, + { + "epoch": 0.05464135021097046, + "grad_norm": 0.5797116756439209, + "learning_rate": 0.0015, + "loss": 3.228, + "step": 518 + }, + { + "epoch": 0.054746835443037975, + "grad_norm": 0.5249442458152771, + "learning_rate": 0.0015, + "loss": 3.2537, + "step": 519 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.7572943568229675, + "learning_rate": 0.0015, + "loss": 3.2569, + "step": 520 + }, + { + "epoch": 0.054957805907172995, + "grad_norm": 0.574742317199707, + "learning_rate": 0.0015, + "loss": 3.2495, + "step": 521 + }, + { + "epoch": 0.05506329113924051, + "grad_norm": 0.6520138382911682, + "learning_rate": 0.0015, + "loss": 3.2062, + "step": 522 + }, + { + "epoch": 0.055168776371308015, + "grad_norm": 0.8512445688247681, + "learning_rate": 0.0015, + "loss": 3.2402, + "step": 523 + }, + { + "epoch": 0.05527426160337553, + "grad_norm": 0.7800602912902832, + "learning_rate": 0.0015, + "loss": 3.2554, + "step": 524 + }, + { + "epoch": 0.055379746835443035, + "grad_norm": 0.6584099531173706, + "learning_rate": 0.0015, + "loss": 3.2336, + "step": 525 + }, + { + "epoch": 0.05548523206751055, + "grad_norm": 0.9304000735282898, + "learning_rate": 0.0015, + "loss": 3.2325, + "step": 526 + }, + { + "epoch": 0.05559071729957806, + "grad_norm": 1.1555660963058472, + "learning_rate": 0.0015, + "loss": 3.2176, + "step": 527 + }, + { + "epoch": 0.05569620253164557, + "grad_norm": 0.7664372324943542, + "learning_rate": 0.0015, + "loss": 3.2399, + "step": 528 + }, + { + "epoch": 0.05580168776371308, + "grad_norm": 1.1681687831878662, + "learning_rate": 0.0015, + "loss": 3.2427, + "step": 529 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 1.130263328552246, + "learning_rate": 0.0015, + "loss": 3.2535, + "step": 530 + }, + { + "epoch": 0.0560126582278481, + "grad_norm": 0.7328198552131653, + "learning_rate": 0.0015, + "loss": 3.2319, + "step": 531 + }, + { + "epoch": 0.056118143459915615, + "grad_norm": 0.9135684967041016, + "learning_rate": 0.0015, + "loss": 3.2046, + "step": 532 + }, + { + "epoch": 0.05622362869198312, + "grad_norm": 1.1031025648117065, + "learning_rate": 0.0015, + "loss": 3.2248, + "step": 533 + }, + { + "epoch": 0.056329113924050635, + "grad_norm": 0.6610575318336487, + "learning_rate": 0.0015, + "loss": 3.1976, + "step": 534 + }, + { + "epoch": 0.05643459915611814, + "grad_norm": 0.7743790149688721, + "learning_rate": 0.0015, + "loss": 3.2349, + "step": 535 + }, + { + "epoch": 0.056540084388185655, + "grad_norm": 0.8870817422866821, + "learning_rate": 0.0015, + "loss": 3.2239, + "step": 536 + }, + { + "epoch": 0.05664556962025316, + "grad_norm": 0.5295478105545044, + "learning_rate": 0.0015, + "loss": 3.1924, + "step": 537 + }, + { + "epoch": 0.056751054852320675, + "grad_norm": 0.5034154653549194, + "learning_rate": 0.0015, + "loss": 3.2336, + "step": 538 + }, + { + "epoch": 0.05685654008438819, + "grad_norm": 0.6550244092941284, + "learning_rate": 0.0015, + "loss": 3.2182, + "step": 539 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.5512349009513855, + "learning_rate": 0.0015, + "loss": 3.2254, + "step": 540 + }, + { + "epoch": 0.05706751054852321, + "grad_norm": 0.5507791042327881, + "learning_rate": 0.0015, + "loss": 3.2632, + "step": 541 + }, + { + "epoch": 0.057172995780590714, + "grad_norm": 0.487902969121933, + "learning_rate": 0.0015, + "loss": 3.2199, + "step": 542 + }, + { + "epoch": 0.05727848101265823, + "grad_norm": 0.5236054062843323, + "learning_rate": 0.0015, + "loss": 3.1988, + "step": 543 + }, + { + "epoch": 0.05738396624472574, + "grad_norm": 0.5172393321990967, + "learning_rate": 0.0015, + "loss": 3.2091, + "step": 544 + }, + { + "epoch": 0.05748945147679325, + "grad_norm": 0.5030975341796875, + "learning_rate": 0.0015, + "loss": 3.222, + "step": 545 + }, + { + "epoch": 0.05759493670886076, + "grad_norm": 0.6034338474273682, + "learning_rate": 0.0015, + "loss": 3.2011, + "step": 546 + }, + { + "epoch": 0.05770042194092827, + "grad_norm": 0.5257682204246521, + "learning_rate": 0.0015, + "loss": 3.2147, + "step": 547 + }, + { + "epoch": 0.05780590717299578, + "grad_norm": 0.6029785871505737, + "learning_rate": 0.0015, + "loss": 3.2218, + "step": 548 + }, + { + "epoch": 0.057911392405063294, + "grad_norm": 0.4461432695388794, + "learning_rate": 0.0015, + "loss": 3.2115, + "step": 549 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.5864081978797913, + "learning_rate": 0.0015, + "loss": 3.1829, + "step": 550 + }, + { + "epoch": 0.058122362869198314, + "grad_norm": 0.5822709202766418, + "learning_rate": 0.0015, + "loss": 3.1604, + "step": 551 + }, + { + "epoch": 0.05822784810126582, + "grad_norm": 0.49668949842453003, + "learning_rate": 0.0015, + "loss": 3.1963, + "step": 552 + }, + { + "epoch": 0.058333333333333334, + "grad_norm": 0.5484792590141296, + "learning_rate": 0.0015, + "loss": 3.2187, + "step": 553 + }, + { + "epoch": 0.05843881856540084, + "grad_norm": 0.511347234249115, + "learning_rate": 0.0015, + "loss": 3.1645, + "step": 554 + }, + { + "epoch": 0.058544303797468354, + "grad_norm": 0.5356816053390503, + "learning_rate": 0.0015, + "loss": 3.1537, + "step": 555 + }, + { + "epoch": 0.05864978902953587, + "grad_norm": 0.5142372250556946, + "learning_rate": 0.0015, + "loss": 3.1859, + "step": 556 + }, + { + "epoch": 0.058755274261603374, + "grad_norm": 0.5563471913337708, + "learning_rate": 0.0015, + "loss": 3.2072, + "step": 557 + }, + { + "epoch": 0.05886075949367089, + "grad_norm": 0.508149266242981, + "learning_rate": 0.0015, + "loss": 3.1627, + "step": 558 + }, + { + "epoch": 0.058966244725738394, + "grad_norm": 0.46678608655929565, + "learning_rate": 0.0015, + "loss": 3.1511, + "step": 559 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.6474284529685974, + "learning_rate": 0.0015, + "loss": 3.1691, + "step": 560 + }, + { + "epoch": 0.05917721518987342, + "grad_norm": 0.6137100458145142, + "learning_rate": 0.0015, + "loss": 3.171, + "step": 561 + }, + { + "epoch": 0.05928270042194093, + "grad_norm": 0.5073055028915405, + "learning_rate": 0.0015, + "loss": 3.1679, + "step": 562 + }, + { + "epoch": 0.05938818565400844, + "grad_norm": 0.5824939608573914, + "learning_rate": 0.0015, + "loss": 3.2039, + "step": 563 + }, + { + "epoch": 0.05949367088607595, + "grad_norm": 0.6161354780197144, + "learning_rate": 0.0015, + "loss": 3.1718, + "step": 564 + }, + { + "epoch": 0.05959915611814346, + "grad_norm": 0.5175783634185791, + "learning_rate": 0.0015, + "loss": 3.1962, + "step": 565 + }, + { + "epoch": 0.05970464135021097, + "grad_norm": 0.5944175124168396, + "learning_rate": 0.0015, + "loss": 3.1896, + "step": 566 + }, + { + "epoch": 0.05981012658227848, + "grad_norm": 0.59116131067276, + "learning_rate": 0.0015, + "loss": 3.1871, + "step": 567 + }, + { + "epoch": 0.059915611814345994, + "grad_norm": 0.5140199661254883, + "learning_rate": 0.0015, + "loss": 3.1564, + "step": 568 + }, + { + "epoch": 0.0600210970464135, + "grad_norm": 0.6444306373596191, + "learning_rate": 0.0015, + "loss": 3.1475, + "step": 569 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.759310245513916, + "learning_rate": 0.0015, + "loss": 3.2124, + "step": 570 + }, + { + "epoch": 0.06023206751054852, + "grad_norm": 0.6156466007232666, + "learning_rate": 0.0015, + "loss": 3.2025, + "step": 571 + }, + { + "epoch": 0.060337552742616034, + "grad_norm": 0.7697983384132385, + "learning_rate": 0.0015, + "loss": 3.183, + "step": 572 + }, + { + "epoch": 0.06044303797468355, + "grad_norm": 0.9740505218505859, + "learning_rate": 0.0015, + "loss": 3.206, + "step": 573 + }, + { + "epoch": 0.060548523206751054, + "grad_norm": 0.7364935874938965, + "learning_rate": 0.0015, + "loss": 3.1537, + "step": 574 + }, + { + "epoch": 0.06065400843881857, + "grad_norm": 1.174822449684143, + "learning_rate": 0.0015, + "loss": 3.2311, + "step": 575 + }, + { + "epoch": 0.060759493670886074, + "grad_norm": 1.4780375957489014, + "learning_rate": 0.0015, + "loss": 3.199, + "step": 576 + }, + { + "epoch": 0.06086497890295359, + "grad_norm": 0.9758797287940979, + "learning_rate": 0.0015, + "loss": 3.1833, + "step": 577 + }, + { + "epoch": 0.0609704641350211, + "grad_norm": 1.5799816846847534, + "learning_rate": 0.0015, + "loss": 3.1906, + "step": 578 + }, + { + "epoch": 0.06107594936708861, + "grad_norm": 0.6482163071632385, + "learning_rate": 0.0015, + "loss": 3.1772, + "step": 579 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.325924038887024, + "learning_rate": 0.0015, + "loss": 3.1989, + "step": 580 + }, + { + "epoch": 0.06128691983122363, + "grad_norm": 0.86517333984375, + "learning_rate": 0.0015, + "loss": 3.1777, + "step": 581 + }, + { + "epoch": 0.06139240506329114, + "grad_norm": 1.9185975790023804, + "learning_rate": 0.0015, + "loss": 3.1665, + "step": 582 + }, + { + "epoch": 0.06149789029535865, + "grad_norm": 0.9382328391075134, + "learning_rate": 0.0015, + "loss": 3.1817, + "step": 583 + }, + { + "epoch": 0.06160337552742616, + "grad_norm": 1.5296608209609985, + "learning_rate": 0.0015, + "loss": 3.1883, + "step": 584 + }, + { + "epoch": 0.061708860759493674, + "grad_norm": 1.3789610862731934, + "learning_rate": 0.0015, + "loss": 3.2288, + "step": 585 + }, + { + "epoch": 0.06181434599156118, + "grad_norm": 0.931191623210907, + "learning_rate": 0.0015, + "loss": 3.1753, + "step": 586 + }, + { + "epoch": 0.061919831223628694, + "grad_norm": 1.4776453971862793, + "learning_rate": 0.0015, + "loss": 3.1537, + "step": 587 + }, + { + "epoch": 0.0620253164556962, + "grad_norm": 1.239021897315979, + "learning_rate": 0.0015, + "loss": 3.1476, + "step": 588 + }, + { + "epoch": 0.06213080168776371, + "grad_norm": 1.0757865905761719, + "learning_rate": 0.0015, + "loss": 3.1959, + "step": 589 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 1.0297361612319946, + "learning_rate": 0.0015, + "loss": 3.1238, + "step": 590 + }, + { + "epoch": 0.06234177215189873, + "grad_norm": 0.9390610456466675, + "learning_rate": 0.0015, + "loss": 3.1389, + "step": 591 + }, + { + "epoch": 0.06244725738396625, + "grad_norm": 1.0065451860427856, + "learning_rate": 0.0015, + "loss": 3.137, + "step": 592 + }, + { + "epoch": 0.06255274261603376, + "grad_norm": 0.9784303903579712, + "learning_rate": 0.0015, + "loss": 3.1812, + "step": 593 + }, + { + "epoch": 0.06265822784810127, + "grad_norm": 1.1172298192977905, + "learning_rate": 0.0015, + "loss": 3.1385, + "step": 594 + }, + { + "epoch": 0.06276371308016877, + "grad_norm": 1.4299421310424805, + "learning_rate": 0.0015, + "loss": 3.1642, + "step": 595 + }, + { + "epoch": 0.0628691983122363, + "grad_norm": 0.7640246152877808, + "learning_rate": 0.0015, + "loss": 3.1119, + "step": 596 + }, + { + "epoch": 0.0629746835443038, + "grad_norm": 1.1053696870803833, + "learning_rate": 0.0015, + "loss": 3.1585, + "step": 597 + }, + { + "epoch": 0.0630801687763713, + "grad_norm": 0.6866852045059204, + "learning_rate": 0.0015, + "loss": 3.1671, + "step": 598 + }, + { + "epoch": 0.06318565400843881, + "grad_norm": 1.3460876941680908, + "learning_rate": 0.0015, + "loss": 3.1317, + "step": 599 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.6371838450431824, + "learning_rate": 0.0015, + "loss": 3.1508, + "step": 600 + }, + { + "epoch": 0.06339662447257384, + "grad_norm": 0.8368716239929199, + "learning_rate": 0.0015, + "loss": 3.1281, + "step": 601 + }, + { + "epoch": 0.06350210970464135, + "grad_norm": 0.7810425758361816, + "learning_rate": 0.0015, + "loss": 3.1408, + "step": 602 + }, + { + "epoch": 0.06360759493670887, + "grad_norm": 0.5206443071365356, + "learning_rate": 0.0015, + "loss": 3.1675, + "step": 603 + }, + { + "epoch": 0.06371308016877637, + "grad_norm": 0.881493091583252, + "learning_rate": 0.0015, + "loss": 3.1124, + "step": 604 + }, + { + "epoch": 0.06381856540084388, + "grad_norm": 0.6039766669273376, + "learning_rate": 0.0015, + "loss": 3.1263, + "step": 605 + }, + { + "epoch": 0.06392405063291139, + "grad_norm": 0.6369601488113403, + "learning_rate": 0.0015, + "loss": 3.1162, + "step": 606 + }, + { + "epoch": 0.0640295358649789, + "grad_norm": 0.878419041633606, + "learning_rate": 0.0015, + "loss": 3.1465, + "step": 607 + }, + { + "epoch": 0.06413502109704641, + "grad_norm": 0.5873783230781555, + "learning_rate": 0.0015, + "loss": 3.1635, + "step": 608 + }, + { + "epoch": 0.06424050632911392, + "grad_norm": 0.6876022815704346, + "learning_rate": 0.0015, + "loss": 3.15, + "step": 609 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.8638718128204346, + "learning_rate": 0.0015, + "loss": 3.119, + "step": 610 + }, + { + "epoch": 0.06445147679324895, + "grad_norm": 0.5753387212753296, + "learning_rate": 0.0015, + "loss": 3.1378, + "step": 611 + }, + { + "epoch": 0.06455696202531645, + "grad_norm": 0.6115850210189819, + "learning_rate": 0.0015, + "loss": 3.1164, + "step": 612 + }, + { + "epoch": 0.06466244725738397, + "grad_norm": 0.6145036220550537, + "learning_rate": 0.0015, + "loss": 3.1142, + "step": 613 + }, + { + "epoch": 0.06476793248945148, + "grad_norm": 0.5647176504135132, + "learning_rate": 0.0015, + "loss": 3.0912, + "step": 614 + }, + { + "epoch": 0.06487341772151899, + "grad_norm": 0.5785834789276123, + "learning_rate": 0.0015, + "loss": 3.1369, + "step": 615 + }, + { + "epoch": 0.06497890295358649, + "grad_norm": 0.43457841873168945, + "learning_rate": 0.0015, + "loss": 3.108, + "step": 616 + }, + { + "epoch": 0.06508438818565401, + "grad_norm": 0.5103294253349304, + "learning_rate": 0.0015, + "loss": 3.1256, + "step": 617 + }, + { + "epoch": 0.06518987341772152, + "grad_norm": 0.47751566767692566, + "learning_rate": 0.0015, + "loss": 3.1355, + "step": 618 + }, + { + "epoch": 0.06529535864978903, + "grad_norm": 0.4460393190383911, + "learning_rate": 0.0015, + "loss": 3.147, + "step": 619 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.44253718852996826, + "learning_rate": 0.0015, + "loss": 3.1193, + "step": 620 + }, + { + "epoch": 0.06550632911392405, + "grad_norm": 0.42924782633781433, + "learning_rate": 0.0015, + "loss": 3.1167, + "step": 621 + }, + { + "epoch": 0.06561181434599156, + "grad_norm": 0.5237078070640564, + "learning_rate": 0.0015, + "loss": 3.1393, + "step": 622 + }, + { + "epoch": 0.06571729957805907, + "grad_norm": 0.41758623719215393, + "learning_rate": 0.0015, + "loss": 3.1058, + "step": 623 + }, + { + "epoch": 0.06582278481012659, + "grad_norm": 0.5520530343055725, + "learning_rate": 0.0015, + "loss": 3.095, + "step": 624 + }, + { + "epoch": 0.06592827004219409, + "grad_norm": 0.49183741211891174, + "learning_rate": 0.0015, + "loss": 3.1055, + "step": 625 + }, + { + "epoch": 0.0660337552742616, + "grad_norm": 0.5270841121673584, + "learning_rate": 0.0015, + "loss": 3.086, + "step": 626 + }, + { + "epoch": 0.06613924050632912, + "grad_norm": 0.5913766026496887, + "learning_rate": 0.0015, + "loss": 3.1304, + "step": 627 + }, + { + "epoch": 0.06624472573839663, + "grad_norm": 0.45158347487449646, + "learning_rate": 0.0015, + "loss": 3.114, + "step": 628 + }, + { + "epoch": 0.06635021097046413, + "grad_norm": 0.4512562155723572, + "learning_rate": 0.0015, + "loss": 3.1203, + "step": 629 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.48179155588150024, + "learning_rate": 0.0015, + "loss": 3.0673, + "step": 630 + }, + { + "epoch": 0.06656118143459916, + "grad_norm": 0.5022712349891663, + "learning_rate": 0.0015, + "loss": 3.0845, + "step": 631 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 0.4585723578929901, + "learning_rate": 0.0015, + "loss": 3.1226, + "step": 632 + }, + { + "epoch": 0.06677215189873417, + "grad_norm": 0.43004077672958374, + "learning_rate": 0.0015, + "loss": 3.1093, + "step": 633 + }, + { + "epoch": 0.06687763713080169, + "grad_norm": 0.5047146677970886, + "learning_rate": 0.0015, + "loss": 3.0756, + "step": 634 + }, + { + "epoch": 0.0669831223628692, + "grad_norm": 0.3901086449623108, + "learning_rate": 0.0015, + "loss": 3.1357, + "step": 635 + }, + { + "epoch": 0.0670886075949367, + "grad_norm": 0.4737209379673004, + "learning_rate": 0.0015, + "loss": 3.1053, + "step": 636 + }, + { + "epoch": 0.06719409282700423, + "grad_norm": 0.4106265902519226, + "learning_rate": 0.0015, + "loss": 3.1399, + "step": 637 + }, + { + "epoch": 0.06729957805907173, + "grad_norm": 0.5158549547195435, + "learning_rate": 0.0015, + "loss": 3.1215, + "step": 638 + }, + { + "epoch": 0.06740506329113924, + "grad_norm": 0.5201126337051392, + "learning_rate": 0.0015, + "loss": 3.1057, + "step": 639 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.49978145956993103, + "learning_rate": 0.0015, + "loss": 3.1163, + "step": 640 + }, + { + "epoch": 0.06761603375527427, + "grad_norm": 0.6185200214385986, + "learning_rate": 0.0015, + "loss": 3.1323, + "step": 641 + }, + { + "epoch": 0.06772151898734177, + "grad_norm": 0.5139747858047485, + "learning_rate": 0.0015, + "loss": 3.1059, + "step": 642 + }, + { + "epoch": 0.06782700421940928, + "grad_norm": 0.4814824163913727, + "learning_rate": 0.0015, + "loss": 3.1213, + "step": 643 + }, + { + "epoch": 0.0679324894514768, + "grad_norm": 0.5252774953842163, + "learning_rate": 0.0015, + "loss": 3.0598, + "step": 644 + }, + { + "epoch": 0.0680379746835443, + "grad_norm": 0.56795734167099, + "learning_rate": 0.0015, + "loss": 3.0776, + "step": 645 + }, + { + "epoch": 0.06814345991561181, + "grad_norm": 0.5285614728927612, + "learning_rate": 0.0015, + "loss": 3.0606, + "step": 646 + }, + { + "epoch": 0.06824894514767932, + "grad_norm": 0.484030544757843, + "learning_rate": 0.0015, + "loss": 3.1222, + "step": 647 + }, + { + "epoch": 0.06835443037974684, + "grad_norm": 0.5548883676528931, + "learning_rate": 0.0015, + "loss": 3.1413, + "step": 648 + }, + { + "epoch": 0.06845991561181435, + "grad_norm": 0.5328906178474426, + "learning_rate": 0.0015, + "loss": 3.0925, + "step": 649 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.46222105622291565, + "learning_rate": 0.0015, + "loss": 3.1246, + "step": 650 + }, + { + "epoch": 0.06867088607594937, + "grad_norm": 0.4432888627052307, + "learning_rate": 0.0015, + "loss": 3.0734, + "step": 651 + }, + { + "epoch": 0.06877637130801688, + "grad_norm": 0.5511232018470764, + "learning_rate": 0.0015, + "loss": 3.0934, + "step": 652 + }, + { + "epoch": 0.06888185654008439, + "grad_norm": 0.5209508538246155, + "learning_rate": 0.0015, + "loss": 3.1037, + "step": 653 + }, + { + "epoch": 0.0689873417721519, + "grad_norm": 0.4218731224536896, + "learning_rate": 0.0015, + "loss": 3.1123, + "step": 654 + }, + { + "epoch": 0.06909282700421941, + "grad_norm": 0.4828660190105438, + "learning_rate": 0.0015, + "loss": 3.0746, + "step": 655 + }, + { + "epoch": 0.06919831223628692, + "grad_norm": 0.5086220502853394, + "learning_rate": 0.0015, + "loss": 3.1077, + "step": 656 + }, + { + "epoch": 0.06930379746835443, + "grad_norm": 0.4445730149745941, + "learning_rate": 0.0015, + "loss": 3.0658, + "step": 657 + }, + { + "epoch": 0.06940928270042195, + "grad_norm": 0.44237861037254333, + "learning_rate": 0.0015, + "loss": 3.0746, + "step": 658 + }, + { + "epoch": 0.06951476793248945, + "grad_norm": 0.46931931376457214, + "learning_rate": 0.0015, + "loss": 3.1025, + "step": 659 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.41710901260375977, + "learning_rate": 0.0015, + "loss": 3.0789, + "step": 660 + }, + { + "epoch": 0.06972573839662448, + "grad_norm": 0.46843045949935913, + "learning_rate": 0.0015, + "loss": 3.0693, + "step": 661 + }, + { + "epoch": 0.06983122362869199, + "grad_norm": 0.46493035554885864, + "learning_rate": 0.0015, + "loss": 3.0554, + "step": 662 + }, + { + "epoch": 0.06993670886075949, + "grad_norm": 0.41282370686531067, + "learning_rate": 0.0015, + "loss": 3.0336, + "step": 663 + }, + { + "epoch": 0.070042194092827, + "grad_norm": 0.46115541458129883, + "learning_rate": 0.0015, + "loss": 3.0934, + "step": 664 + }, + { + "epoch": 0.07014767932489452, + "grad_norm": 0.4591176211833954, + "learning_rate": 0.0015, + "loss": 3.0752, + "step": 665 + }, + { + "epoch": 0.07025316455696203, + "grad_norm": 0.4208686947822571, + "learning_rate": 0.0015, + "loss": 3.0396, + "step": 666 + }, + { + "epoch": 0.07035864978902953, + "grad_norm": 0.47774219512939453, + "learning_rate": 0.0015, + "loss": 3.0979, + "step": 667 + }, + { + "epoch": 0.07046413502109705, + "grad_norm": 0.43898165225982666, + "learning_rate": 0.0015, + "loss": 3.0565, + "step": 668 + }, + { + "epoch": 0.07056962025316456, + "grad_norm": 0.5001561045646667, + "learning_rate": 0.0015, + "loss": 3.0954, + "step": 669 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.5637317895889282, + "learning_rate": 0.0015, + "loss": 3.107, + "step": 670 + }, + { + "epoch": 0.07078059071729957, + "grad_norm": 0.5220135450363159, + "learning_rate": 0.0015, + "loss": 3.1026, + "step": 671 + }, + { + "epoch": 0.07088607594936709, + "grad_norm": 0.5539985299110413, + "learning_rate": 0.0015, + "loss": 3.0624, + "step": 672 + }, + { + "epoch": 0.0709915611814346, + "grad_norm": 0.5476277470588684, + "learning_rate": 0.0015, + "loss": 3.0556, + "step": 673 + }, + { + "epoch": 0.0710970464135021, + "grad_norm": 0.5018784403800964, + "learning_rate": 0.0015, + "loss": 3.0297, + "step": 674 + }, + { + "epoch": 0.07120253164556962, + "grad_norm": 0.5817114114761353, + "learning_rate": 0.0015, + "loss": 3.0724, + "step": 675 + }, + { + "epoch": 0.07130801687763713, + "grad_norm": 0.4553978145122528, + "learning_rate": 0.0015, + "loss": 3.0854, + "step": 676 + }, + { + "epoch": 0.07141350210970464, + "grad_norm": 0.522916853427887, + "learning_rate": 0.0015, + "loss": 3.0549, + "step": 677 + }, + { + "epoch": 0.07151898734177216, + "grad_norm": 0.49535462260246277, + "learning_rate": 0.0015, + "loss": 3.0656, + "step": 678 + }, + { + "epoch": 0.07162447257383966, + "grad_norm": 0.4972098469734192, + "learning_rate": 0.0015, + "loss": 3.0547, + "step": 679 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.47824743390083313, + "learning_rate": 0.0015, + "loss": 3.0469, + "step": 680 + }, + { + "epoch": 0.07183544303797468, + "grad_norm": 0.5616192817687988, + "learning_rate": 0.0015, + "loss": 3.0535, + "step": 681 + }, + { + "epoch": 0.0719409282700422, + "grad_norm": 0.5125538110733032, + "learning_rate": 0.0015, + "loss": 3.1093, + "step": 682 + }, + { + "epoch": 0.0720464135021097, + "grad_norm": 0.4394545555114746, + "learning_rate": 0.0015, + "loss": 3.0693, + "step": 683 + }, + { + "epoch": 0.07215189873417721, + "grad_norm": 0.6071129441261292, + "learning_rate": 0.0015, + "loss": 3.0709, + "step": 684 + }, + { + "epoch": 0.07225738396624473, + "grad_norm": 0.5002769827842712, + "learning_rate": 0.0015, + "loss": 3.0741, + "step": 685 + }, + { + "epoch": 0.07236286919831224, + "grad_norm": 0.47487396001815796, + "learning_rate": 0.0015, + "loss": 3.0734, + "step": 686 + }, + { + "epoch": 0.07246835443037974, + "grad_norm": 0.5490345358848572, + "learning_rate": 0.0015, + "loss": 3.0644, + "step": 687 + }, + { + "epoch": 0.07257383966244725, + "grad_norm": 0.5156499147415161, + "learning_rate": 0.0015, + "loss": 3.1006, + "step": 688 + }, + { + "epoch": 0.07267932489451477, + "grad_norm": 0.4547760486602783, + "learning_rate": 0.0015, + "loss": 3.0231, + "step": 689 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.562214732170105, + "learning_rate": 0.0015, + "loss": 3.0767, + "step": 690 + }, + { + "epoch": 0.07289029535864978, + "grad_norm": 0.4598149061203003, + "learning_rate": 0.0015, + "loss": 3.0627, + "step": 691 + }, + { + "epoch": 0.0729957805907173, + "grad_norm": 0.4721287488937378, + "learning_rate": 0.0015, + "loss": 3.0346, + "step": 692 + }, + { + "epoch": 0.07310126582278481, + "grad_norm": 0.49867546558380127, + "learning_rate": 0.0015, + "loss": 3.0574, + "step": 693 + }, + { + "epoch": 0.07320675105485232, + "grad_norm": 0.5229291319847107, + "learning_rate": 0.0015, + "loss": 3.1, + "step": 694 + }, + { + "epoch": 0.07331223628691984, + "grad_norm": 0.5544513463973999, + "learning_rate": 0.0015, + "loss": 3.0767, + "step": 695 + }, + { + "epoch": 0.07341772151898734, + "grad_norm": 0.4474407732486725, + "learning_rate": 0.0015, + "loss": 3.0306, + "step": 696 + }, + { + "epoch": 0.07352320675105485, + "grad_norm": 0.5820875763893127, + "learning_rate": 0.0015, + "loss": 3.0572, + "step": 697 + }, + { + "epoch": 0.07362869198312236, + "grad_norm": 0.5545796751976013, + "learning_rate": 0.0015, + "loss": 3.0421, + "step": 698 + }, + { + "epoch": 0.07373417721518988, + "grad_norm": 0.4231954514980316, + "learning_rate": 0.0015, + "loss": 3.0225, + "step": 699 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.630807638168335, + "learning_rate": 0.0015, + "loss": 3.0242, + "step": 700 + }, + { + "epoch": 0.07394514767932489, + "grad_norm": 0.6516137719154358, + "learning_rate": 0.0015, + "loss": 3.0246, + "step": 701 + }, + { + "epoch": 0.07405063291139241, + "grad_norm": 0.45636406540870667, + "learning_rate": 0.0015, + "loss": 3.023, + "step": 702 + }, + { + "epoch": 0.07415611814345992, + "grad_norm": 0.6516038179397583, + "learning_rate": 0.0015, + "loss": 3.0837, + "step": 703 + }, + { + "epoch": 0.07426160337552742, + "grad_norm": 0.6241170167922974, + "learning_rate": 0.0015, + "loss": 3.0506, + "step": 704 + }, + { + "epoch": 0.07436708860759493, + "grad_norm": 0.4966233968734741, + "learning_rate": 0.0015, + "loss": 3.027, + "step": 705 + }, + { + "epoch": 0.07447257383966245, + "grad_norm": 0.828010618686676, + "learning_rate": 0.0015, + "loss": 3.0748, + "step": 706 + }, + { + "epoch": 0.07457805907172996, + "grad_norm": 0.8169804215431213, + "learning_rate": 0.0015, + "loss": 3.0267, + "step": 707 + }, + { + "epoch": 0.07468354430379746, + "grad_norm": 0.6743940711021423, + "learning_rate": 0.0015, + "loss": 3.0515, + "step": 708 + }, + { + "epoch": 0.07478902953586498, + "grad_norm": 0.9747117757797241, + "learning_rate": 0.0015, + "loss": 3.0688, + "step": 709 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 1.169893503189087, + "learning_rate": 0.0015, + "loss": 3.0843, + "step": 710 + }, + { + "epoch": 0.075, + "grad_norm": 0.8380711078643799, + "learning_rate": 0.0015, + "loss": 3.0594, + "step": 711 + }, + { + "epoch": 0.0751054852320675, + "grad_norm": 1.1844730377197266, + "learning_rate": 0.0015, + "loss": 3.0849, + "step": 712 + }, + { + "epoch": 0.07521097046413502, + "grad_norm": 1.168959379196167, + "learning_rate": 0.0015, + "loss": 3.0559, + "step": 713 + }, + { + "epoch": 0.07531645569620253, + "grad_norm": 0.8586068749427795, + "learning_rate": 0.0015, + "loss": 3.0124, + "step": 714 + }, + { + "epoch": 0.07542194092827004, + "grad_norm": 0.9942464232444763, + "learning_rate": 0.0015, + "loss": 3.0606, + "step": 715 + }, + { + "epoch": 0.07552742616033756, + "grad_norm": 1.3676265478134155, + "learning_rate": 0.0015, + "loss": 3.0427, + "step": 716 + }, + { + "epoch": 0.07563291139240506, + "grad_norm": 0.6815967559814453, + "learning_rate": 0.0015, + "loss": 3.0242, + "step": 717 + }, + { + "epoch": 0.07573839662447257, + "grad_norm": 1.1302754878997803, + "learning_rate": 0.0015, + "loss": 3.0149, + "step": 718 + }, + { + "epoch": 0.07584388185654009, + "grad_norm": 0.5320830345153809, + "learning_rate": 0.0015, + "loss": 3.0299, + "step": 719 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.8621982336044312, + "learning_rate": 0.0015, + "loss": 3.0248, + "step": 720 + }, + { + "epoch": 0.0760548523206751, + "grad_norm": 0.832206130027771, + "learning_rate": 0.0015, + "loss": 3.023, + "step": 721 + }, + { + "epoch": 0.07616033755274261, + "grad_norm": 0.5479657053947449, + "learning_rate": 0.0015, + "loss": 3.0218, + "step": 722 + }, + { + "epoch": 0.07626582278481013, + "grad_norm": 1.1381653547286987, + "learning_rate": 0.0015, + "loss": 3.037, + "step": 723 + }, + { + "epoch": 0.07637130801687764, + "grad_norm": 0.7673267126083374, + "learning_rate": 0.0015, + "loss": 3.0137, + "step": 724 + }, + { + "epoch": 0.07647679324894514, + "grad_norm": 0.7518500685691833, + "learning_rate": 0.0015, + "loss": 3.0301, + "step": 725 + }, + { + "epoch": 0.07658227848101266, + "grad_norm": 1.0449618101119995, + "learning_rate": 0.0015, + "loss": 3.0159, + "step": 726 + }, + { + "epoch": 0.07668776371308017, + "grad_norm": 1.2969492673873901, + "learning_rate": 0.0015, + "loss": 3.0257, + "step": 727 + }, + { + "epoch": 0.07679324894514768, + "grad_norm": 0.7651019096374512, + "learning_rate": 0.0015, + "loss": 3.0396, + "step": 728 + }, + { + "epoch": 0.07689873417721518, + "grad_norm": 1.0376302003860474, + "learning_rate": 0.0015, + "loss": 3.0301, + "step": 729 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.843839704990387, + "learning_rate": 0.0015, + "loss": 3.0219, + "step": 730 + }, + { + "epoch": 0.07710970464135021, + "grad_norm": 0.5256530046463013, + "learning_rate": 0.0015, + "loss": 3.0121, + "step": 731 + }, + { + "epoch": 0.07721518987341772, + "grad_norm": 0.6752529740333557, + "learning_rate": 0.0015, + "loss": 3.0253, + "step": 732 + }, + { + "epoch": 0.07732067510548524, + "grad_norm": 0.569753885269165, + "learning_rate": 0.0015, + "loss": 3.0441, + "step": 733 + }, + { + "epoch": 0.07742616033755274, + "grad_norm": 0.5648934245109558, + "learning_rate": 0.0015, + "loss": 3.0618, + "step": 734 + }, + { + "epoch": 0.07753164556962025, + "grad_norm": 0.7011926174163818, + "learning_rate": 0.0015, + "loss": 3.0409, + "step": 735 + }, + { + "epoch": 0.07763713080168777, + "grad_norm": 0.5351795554161072, + "learning_rate": 0.0015, + "loss": 3.0126, + "step": 736 + }, + { + "epoch": 0.07774261603375528, + "grad_norm": 0.6133047938346863, + "learning_rate": 0.0015, + "loss": 3.0409, + "step": 737 + }, + { + "epoch": 0.07784810126582278, + "grad_norm": 0.6023247838020325, + "learning_rate": 0.0015, + "loss": 3.0166, + "step": 738 + }, + { + "epoch": 0.07795358649789029, + "grad_norm": 0.5131109952926636, + "learning_rate": 0.0015, + "loss": 3.0406, + "step": 739 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.6745800971984863, + "learning_rate": 0.0015, + "loss": 2.9958, + "step": 740 + }, + { + "epoch": 0.07816455696202532, + "grad_norm": 0.6143474578857422, + "learning_rate": 0.0015, + "loss": 3.0039, + "step": 741 + }, + { + "epoch": 0.07827004219409282, + "grad_norm": 0.4929799735546112, + "learning_rate": 0.0015, + "loss": 3.0353, + "step": 742 + }, + { + "epoch": 0.07837552742616034, + "grad_norm": 0.6372641324996948, + "learning_rate": 0.0015, + "loss": 3.0084, + "step": 743 + }, + { + "epoch": 0.07848101265822785, + "grad_norm": 0.5492898225784302, + "learning_rate": 0.0015, + "loss": 3.0221, + "step": 744 + }, + { + "epoch": 0.07858649789029536, + "grad_norm": 0.5363095998764038, + "learning_rate": 0.0015, + "loss": 3.0263, + "step": 745 + }, + { + "epoch": 0.07869198312236286, + "grad_norm": 0.4884260892868042, + "learning_rate": 0.0015, + "loss": 3.02, + "step": 746 + }, + { + "epoch": 0.07879746835443038, + "grad_norm": 0.49694332480430603, + "learning_rate": 0.0015, + "loss": 2.9926, + "step": 747 + }, + { + "epoch": 0.07890295358649789, + "grad_norm": 0.48911386728286743, + "learning_rate": 0.0015, + "loss": 2.9919, + "step": 748 + }, + { + "epoch": 0.0790084388185654, + "grad_norm": 0.419879674911499, + "learning_rate": 0.0015, + "loss": 3.0113, + "step": 749 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.450794517993927, + "learning_rate": 0.0015, + "loss": 2.9902, + "step": 750 + }, + { + "epoch": 0.07921940928270042, + "grad_norm": 0.5566920638084412, + "learning_rate": 0.0015, + "loss": 3.0041, + "step": 751 + }, + { + "epoch": 0.07932489451476793, + "grad_norm": 0.4014185667037964, + "learning_rate": 0.0015, + "loss": 3.0176, + "step": 752 + }, + { + "epoch": 0.07943037974683544, + "grad_norm": 0.44727712869644165, + "learning_rate": 0.0015, + "loss": 3.0004, + "step": 753 + }, + { + "epoch": 0.07953586497890296, + "grad_norm": 0.4687018394470215, + "learning_rate": 0.0015, + "loss": 3.0129, + "step": 754 + }, + { + "epoch": 0.07964135021097046, + "grad_norm": 0.4373103082180023, + "learning_rate": 0.0015, + "loss": 3.0351, + "step": 755 + }, + { + "epoch": 0.07974683544303797, + "grad_norm": 0.4262562096118927, + "learning_rate": 0.0015, + "loss": 3.0074, + "step": 756 + }, + { + "epoch": 0.07985232067510549, + "grad_norm": 0.4350946843624115, + "learning_rate": 0.0015, + "loss": 2.98, + "step": 757 + }, + { + "epoch": 0.079957805907173, + "grad_norm": 0.41331973671913147, + "learning_rate": 0.0015, + "loss": 3.0107, + "step": 758 + }, + { + "epoch": 0.0800632911392405, + "grad_norm": 0.5090905427932739, + "learning_rate": 0.0015, + "loss": 2.9644, + "step": 759 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.4196911156177521, + "learning_rate": 0.0015, + "loss": 2.9808, + "step": 760 + }, + { + "epoch": 0.08027426160337553, + "grad_norm": 0.5121602416038513, + "learning_rate": 0.0015, + "loss": 3.0505, + "step": 761 + }, + { + "epoch": 0.08037974683544304, + "grad_norm": 0.4797646105289459, + "learning_rate": 0.0015, + "loss": 3.0004, + "step": 762 + }, + { + "epoch": 0.08048523206751054, + "grad_norm": 0.4792652130126953, + "learning_rate": 0.0015, + "loss": 2.9906, + "step": 763 + }, + { + "epoch": 0.08059071729957806, + "grad_norm": 0.4771900177001953, + "learning_rate": 0.0015, + "loss": 3.0007, + "step": 764 + }, + { + "epoch": 0.08069620253164557, + "grad_norm": 0.473699688911438, + "learning_rate": 0.0015, + "loss": 2.9805, + "step": 765 + }, + { + "epoch": 0.08080168776371308, + "grad_norm": 0.45518332719802856, + "learning_rate": 0.0015, + "loss": 2.9733, + "step": 766 + }, + { + "epoch": 0.0809071729957806, + "grad_norm": 0.5595031380653381, + "learning_rate": 0.0015, + "loss": 2.9817, + "step": 767 + }, + { + "epoch": 0.0810126582278481, + "grad_norm": 0.46510258316993713, + "learning_rate": 0.0015, + "loss": 2.9943, + "step": 768 + }, + { + "epoch": 0.08111814345991561, + "grad_norm": 0.49714407324790955, + "learning_rate": 0.0015, + "loss": 3.0123, + "step": 769 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.530739963054657, + "learning_rate": 0.0015, + "loss": 2.9733, + "step": 770 + }, + { + "epoch": 0.08132911392405064, + "grad_norm": 0.5226621627807617, + "learning_rate": 0.0015, + "loss": 2.959, + "step": 771 + }, + { + "epoch": 0.08143459915611814, + "grad_norm": 0.4975307285785675, + "learning_rate": 0.0015, + "loss": 3.0416, + "step": 772 + }, + { + "epoch": 0.08154008438818565, + "grad_norm": 0.5275937914848328, + "learning_rate": 0.0015, + "loss": 3.0006, + "step": 773 + }, + { + "epoch": 0.08164556962025317, + "grad_norm": 0.5809838771820068, + "learning_rate": 0.0015, + "loss": 2.9862, + "step": 774 + }, + { + "epoch": 0.08175105485232068, + "grad_norm": 0.49886810779571533, + "learning_rate": 0.0015, + "loss": 2.9618, + "step": 775 + }, + { + "epoch": 0.08185654008438818, + "grad_norm": 0.6355060935020447, + "learning_rate": 0.0015, + "loss": 2.9517, + "step": 776 + }, + { + "epoch": 0.0819620253164557, + "grad_norm": 0.627461850643158, + "learning_rate": 0.0015, + "loss": 2.9822, + "step": 777 + }, + { + "epoch": 0.08206751054852321, + "grad_norm": 0.6164768934249878, + "learning_rate": 0.0015, + "loss": 3.01, + "step": 778 + }, + { + "epoch": 0.08217299578059072, + "grad_norm": 0.6753571629524231, + "learning_rate": 0.0015, + "loss": 3.023, + "step": 779 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.9624497890472412, + "learning_rate": 0.0015, + "loss": 3.0218, + "step": 780 + }, + { + "epoch": 0.08238396624472574, + "grad_norm": 0.7728069424629211, + "learning_rate": 0.0015, + "loss": 2.993, + "step": 781 + }, + { + "epoch": 0.08248945147679325, + "grad_norm": 0.8021749258041382, + "learning_rate": 0.0015, + "loss": 2.9851, + "step": 782 + }, + { + "epoch": 0.08259493670886076, + "grad_norm": 1.4636996984481812, + "learning_rate": 0.0015, + "loss": 3.0222, + "step": 783 + }, + { + "epoch": 0.08270042194092828, + "grad_norm": 0.6051256060600281, + "learning_rate": 0.0015, + "loss": 3.007, + "step": 784 + }, + { + "epoch": 0.08280590717299578, + "grad_norm": 1.0736114978790283, + "learning_rate": 0.0015, + "loss": 3.0333, + "step": 785 + }, + { + "epoch": 0.08291139240506329, + "grad_norm": 0.6293458342552185, + "learning_rate": 0.0015, + "loss": 2.9805, + "step": 786 + }, + { + "epoch": 0.0830168776371308, + "grad_norm": 0.8743335604667664, + "learning_rate": 0.0015, + "loss": 3.033, + "step": 787 + }, + { + "epoch": 0.08312236286919832, + "grad_norm": 1.0874354839324951, + "learning_rate": 0.0015, + "loss": 3.0435, + "step": 788 + }, + { + "epoch": 0.08322784810126582, + "grad_norm": 0.6234689354896545, + "learning_rate": 0.0015, + "loss": 3.0, + "step": 789 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 1.4459065198898315, + "learning_rate": 0.0015, + "loss": 3.0374, + "step": 790 + }, + { + "epoch": 0.08343881856540085, + "grad_norm": 0.5264022350311279, + "learning_rate": 0.0015, + "loss": 3.0007, + "step": 791 + }, + { + "epoch": 0.08354430379746836, + "grad_norm": 0.9182020425796509, + "learning_rate": 0.0015, + "loss": 3.0174, + "step": 792 + }, + { + "epoch": 0.08364978902953586, + "grad_norm": 0.5987018346786499, + "learning_rate": 0.0015, + "loss": 2.9813, + "step": 793 + }, + { + "epoch": 0.08375527426160338, + "grad_norm": 0.9652836322784424, + "learning_rate": 0.0015, + "loss": 3.0046, + "step": 794 + }, + { + "epoch": 0.08386075949367089, + "grad_norm": 1.0641913414001465, + "learning_rate": 0.0015, + "loss": 2.9724, + "step": 795 + }, + { + "epoch": 0.0839662447257384, + "grad_norm": 0.8325082659721375, + "learning_rate": 0.0015, + "loss": 3.0046, + "step": 796 + }, + { + "epoch": 0.0840717299578059, + "grad_norm": 1.2881215810775757, + "learning_rate": 0.0015, + "loss": 3.0179, + "step": 797 + }, + { + "epoch": 0.08417721518987342, + "grad_norm": 0.8323678374290466, + "learning_rate": 0.0015, + "loss": 2.9696, + "step": 798 + }, + { + "epoch": 0.08428270042194093, + "grad_norm": 0.7636913061141968, + "learning_rate": 0.0015, + "loss": 2.9806, + "step": 799 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.6060933470726013, + "learning_rate": 0.0015, + "loss": 2.9323, + "step": 800 + }, + { + "epoch": 0.08449367088607596, + "grad_norm": 0.6221925616264343, + "learning_rate": 0.0015, + "loss": 2.9553, + "step": 801 + }, + { + "epoch": 0.08459915611814346, + "grad_norm": 0.6081045866012573, + "learning_rate": 0.0015, + "loss": 2.9739, + "step": 802 + }, + { + "epoch": 0.08470464135021097, + "grad_norm": 0.5571026802062988, + "learning_rate": 0.0015, + "loss": 2.9455, + "step": 803 + }, + { + "epoch": 0.08481012658227848, + "grad_norm": 0.5787800550460815, + "learning_rate": 0.0015, + "loss": 2.9325, + "step": 804 + }, + { + "epoch": 0.084915611814346, + "grad_norm": 0.6988334655761719, + "learning_rate": 0.0015, + "loss": 2.9859, + "step": 805 + }, + { + "epoch": 0.0850210970464135, + "grad_norm": 0.5071077346801758, + "learning_rate": 0.0015, + "loss": 3.0022, + "step": 806 + }, + { + "epoch": 0.08512658227848101, + "grad_norm": 0.682289183139801, + "learning_rate": 0.0015, + "loss": 2.975, + "step": 807 + }, + { + "epoch": 0.08523206751054853, + "grad_norm": 0.7403824925422668, + "learning_rate": 0.0015, + "loss": 2.9706, + "step": 808 + }, + { + "epoch": 0.08533755274261604, + "grad_norm": 0.4382854402065277, + "learning_rate": 0.0015, + "loss": 2.9478, + "step": 809 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.6631742715835571, + "learning_rate": 0.0015, + "loss": 2.9573, + "step": 810 + }, + { + "epoch": 0.08554852320675105, + "grad_norm": 0.5227664113044739, + "learning_rate": 0.0015, + "loss": 2.9812, + "step": 811 + }, + { + "epoch": 0.08565400843881857, + "grad_norm": 0.4954914152622223, + "learning_rate": 0.0015, + "loss": 2.9574, + "step": 812 + }, + { + "epoch": 0.08575949367088608, + "grad_norm": 0.6132584810256958, + "learning_rate": 0.0015, + "loss": 3.0187, + "step": 813 + }, + { + "epoch": 0.08586497890295358, + "grad_norm": 0.5018720030784607, + "learning_rate": 0.0015, + "loss": 2.9817, + "step": 814 + }, + { + "epoch": 0.0859704641350211, + "grad_norm": 0.5767136216163635, + "learning_rate": 0.0015, + "loss": 2.9649, + "step": 815 + }, + { + "epoch": 0.08607594936708861, + "grad_norm": 0.6246969699859619, + "learning_rate": 0.0015, + "loss": 2.9611, + "step": 816 + }, + { + "epoch": 0.08618143459915611, + "grad_norm": 0.42162755131721497, + "learning_rate": 0.0015, + "loss": 2.9761, + "step": 817 + }, + { + "epoch": 0.08628691983122364, + "grad_norm": 0.5638765692710876, + "learning_rate": 0.0015, + "loss": 2.9609, + "step": 818 + }, + { + "epoch": 0.08639240506329114, + "grad_norm": 0.5347734689712524, + "learning_rate": 0.0015, + "loss": 2.9654, + "step": 819 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.47347506880760193, + "learning_rate": 0.0015, + "loss": 2.9694, + "step": 820 + }, + { + "epoch": 0.08660337552742615, + "grad_norm": 0.5972186326980591, + "learning_rate": 0.0015, + "loss": 2.9603, + "step": 821 + }, + { + "epoch": 0.08670886075949367, + "grad_norm": 0.5494314432144165, + "learning_rate": 0.0015, + "loss": 2.9842, + "step": 822 + }, + { + "epoch": 0.08681434599156118, + "grad_norm": 0.4612133800983429, + "learning_rate": 0.0015, + "loss": 2.9877, + "step": 823 + }, + { + "epoch": 0.08691983122362869, + "grad_norm": 0.6885764002799988, + "learning_rate": 0.0015, + "loss": 2.993, + "step": 824 + }, + { + "epoch": 0.08702531645569621, + "grad_norm": 0.49941450357437134, + "learning_rate": 0.0015, + "loss": 2.9453, + "step": 825 + }, + { + "epoch": 0.08713080168776371, + "grad_norm": 0.5059396624565125, + "learning_rate": 0.0015, + "loss": 2.9942, + "step": 826 + }, + { + "epoch": 0.08723628691983122, + "grad_norm": 0.6841118335723877, + "learning_rate": 0.0015, + "loss": 2.9908, + "step": 827 + }, + { + "epoch": 0.08734177215189873, + "grad_norm": 0.483401358127594, + "learning_rate": 0.0015, + "loss": 2.9432, + "step": 828 + }, + { + "epoch": 0.08744725738396625, + "grad_norm": 0.587744414806366, + "learning_rate": 0.0015, + "loss": 2.9468, + "step": 829 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.760633647441864, + "learning_rate": 0.0015, + "loss": 2.9735, + "step": 830 + }, + { + "epoch": 0.08765822784810126, + "grad_norm": 0.5023952126502991, + "learning_rate": 0.0015, + "loss": 2.9315, + "step": 831 + }, + { + "epoch": 0.08776371308016878, + "grad_norm": 0.6293758153915405, + "learning_rate": 0.0015, + "loss": 2.9275, + "step": 832 + }, + { + "epoch": 0.08786919831223629, + "grad_norm": 0.7692920565605164, + "learning_rate": 0.0015, + "loss": 2.9628, + "step": 833 + }, + { + "epoch": 0.0879746835443038, + "grad_norm": 0.5072264671325684, + "learning_rate": 0.0015, + "loss": 2.9216, + "step": 834 + }, + { + "epoch": 0.08808016877637131, + "grad_norm": 0.5579574108123779, + "learning_rate": 0.0015, + "loss": 2.9693, + "step": 835 + }, + { + "epoch": 0.08818565400843882, + "grad_norm": 0.7007578015327454, + "learning_rate": 0.0015, + "loss": 2.9424, + "step": 836 + }, + { + "epoch": 0.08829113924050633, + "grad_norm": 0.4414587616920471, + "learning_rate": 0.0015, + "loss": 2.9698, + "step": 837 + }, + { + "epoch": 0.08839662447257383, + "grad_norm": 0.527445375919342, + "learning_rate": 0.0015, + "loss": 2.9882, + "step": 838 + }, + { + "epoch": 0.08850210970464135, + "grad_norm": 0.6269580721855164, + "learning_rate": 0.0015, + "loss": 2.9392, + "step": 839 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.4592359662055969, + "learning_rate": 0.0015, + "loss": 2.9551, + "step": 840 + }, + { + "epoch": 0.08871308016877637, + "grad_norm": 0.4743657410144806, + "learning_rate": 0.0015, + "loss": 2.965, + "step": 841 + }, + { + "epoch": 0.08881856540084389, + "grad_norm": 0.4750756621360779, + "learning_rate": 0.0015, + "loss": 2.931, + "step": 842 + }, + { + "epoch": 0.0889240506329114, + "grad_norm": 0.4536568522453308, + "learning_rate": 0.0015, + "loss": 2.9503, + "step": 843 + }, + { + "epoch": 0.0890295358649789, + "grad_norm": 0.4943138360977173, + "learning_rate": 0.0015, + "loss": 2.9371, + "step": 844 + }, + { + "epoch": 0.08913502109704641, + "grad_norm": 0.4668751657009125, + "learning_rate": 0.0015, + "loss": 2.9589, + "step": 845 + }, + { + "epoch": 0.08924050632911393, + "grad_norm": 0.4779433310031891, + "learning_rate": 0.0015, + "loss": 2.9874, + "step": 846 + }, + { + "epoch": 0.08934599156118143, + "grad_norm": 0.5255986452102661, + "learning_rate": 0.0015, + "loss": 2.9598, + "step": 847 + }, + { + "epoch": 0.08945147679324894, + "grad_norm": 0.47420164942741394, + "learning_rate": 0.0015, + "loss": 2.9608, + "step": 848 + }, + { + "epoch": 0.08955696202531646, + "grad_norm": 0.5533533096313477, + "learning_rate": 0.0015, + "loss": 2.9398, + "step": 849 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.5280824303627014, + "learning_rate": 0.0015, + "loss": 2.9258, + "step": 850 + }, + { + "epoch": 0.08976793248945147, + "grad_norm": 0.40862008929252625, + "learning_rate": 0.0015, + "loss": 2.9608, + "step": 851 + }, + { + "epoch": 0.08987341772151898, + "grad_norm": 0.6544182300567627, + "learning_rate": 0.0015, + "loss": 2.971, + "step": 852 + }, + { + "epoch": 0.0899789029535865, + "grad_norm": 0.6455007791519165, + "learning_rate": 0.0015, + "loss": 2.9505, + "step": 853 + }, + { + "epoch": 0.09008438818565401, + "grad_norm": 0.4623076617717743, + "learning_rate": 0.0015, + "loss": 2.9871, + "step": 854 + }, + { + "epoch": 0.09018987341772151, + "grad_norm": 0.7972654104232788, + "learning_rate": 0.0015, + "loss": 2.9444, + "step": 855 + }, + { + "epoch": 0.09029535864978903, + "grad_norm": 0.6785705089569092, + "learning_rate": 0.0015, + "loss": 2.9628, + "step": 856 + }, + { + "epoch": 0.09040084388185654, + "grad_norm": 0.6565016508102417, + "learning_rate": 0.0015, + "loss": 2.9393, + "step": 857 + }, + { + "epoch": 0.09050632911392405, + "grad_norm": 0.8708064556121826, + "learning_rate": 0.0015, + "loss": 2.9675, + "step": 858 + }, + { + "epoch": 0.09061181434599157, + "grad_norm": 0.588962197303772, + "learning_rate": 0.0015, + "loss": 2.934, + "step": 859 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.8808808326721191, + "learning_rate": 0.0015, + "loss": 2.9116, + "step": 860 + }, + { + "epoch": 0.09082278481012658, + "grad_norm": 0.9529517292976379, + "learning_rate": 0.0015, + "loss": 2.9706, + "step": 861 + }, + { + "epoch": 0.09092827004219409, + "grad_norm": 0.7462377548217773, + "learning_rate": 0.0015, + "loss": 2.9386, + "step": 862 + }, + { + "epoch": 0.09103375527426161, + "grad_norm": 0.9578644633293152, + "learning_rate": 0.0015, + "loss": 2.9656, + "step": 863 + }, + { + "epoch": 0.09113924050632911, + "grad_norm": 0.8777565360069275, + "learning_rate": 0.0015, + "loss": 2.9491, + "step": 864 + }, + { + "epoch": 0.09124472573839662, + "grad_norm": 0.7169105410575867, + "learning_rate": 0.0015, + "loss": 2.9837, + "step": 865 + }, + { + "epoch": 0.09135021097046414, + "grad_norm": 0.9519692659378052, + "learning_rate": 0.0015, + "loss": 2.9173, + "step": 866 + }, + { + "epoch": 0.09145569620253165, + "grad_norm": 0.8175250887870789, + "learning_rate": 0.0015, + "loss": 2.9389, + "step": 867 + }, + { + "epoch": 0.09156118143459915, + "grad_norm": 0.7335312366485596, + "learning_rate": 0.0015, + "loss": 2.9715, + "step": 868 + }, + { + "epoch": 0.09166666666666666, + "grad_norm": 0.7991451025009155, + "learning_rate": 0.0015, + "loss": 2.927, + "step": 869 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.5210023522377014, + "learning_rate": 0.0015, + "loss": 2.963, + "step": 870 + }, + { + "epoch": 0.09187763713080169, + "grad_norm": 0.5822790861129761, + "learning_rate": 0.0015, + "loss": 2.9572, + "step": 871 + }, + { + "epoch": 0.0919831223628692, + "grad_norm": 0.5056639313697815, + "learning_rate": 0.0015, + "loss": 2.9249, + "step": 872 + }, + { + "epoch": 0.09208860759493671, + "grad_norm": 0.6594166159629822, + "learning_rate": 0.0015, + "loss": 2.9573, + "step": 873 + }, + { + "epoch": 0.09219409282700422, + "grad_norm": 0.9099550247192383, + "learning_rate": 0.0015, + "loss": 2.9581, + "step": 874 + }, + { + "epoch": 0.09229957805907173, + "grad_norm": 0.6542021036148071, + "learning_rate": 0.0015, + "loss": 2.9322, + "step": 875 + }, + { + "epoch": 0.09240506329113925, + "grad_norm": 0.8091128468513489, + "learning_rate": 0.0015, + "loss": 2.9617, + "step": 876 + }, + { + "epoch": 0.09251054852320675, + "grad_norm": 0.8922035098075867, + "learning_rate": 0.0015, + "loss": 2.9482, + "step": 877 + }, + { + "epoch": 0.09261603375527426, + "grad_norm": 0.6337334513664246, + "learning_rate": 0.0015, + "loss": 2.9268, + "step": 878 + }, + { + "epoch": 0.09272151898734177, + "grad_norm": 0.9616796374320984, + "learning_rate": 0.0015, + "loss": 2.9791, + "step": 879 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.856194794178009, + "learning_rate": 0.0015, + "loss": 2.9382, + "step": 880 + }, + { + "epoch": 0.0929324894514768, + "grad_norm": 0.6638111472129822, + "learning_rate": 0.0015, + "loss": 2.9579, + "step": 881 + }, + { + "epoch": 0.0930379746835443, + "grad_norm": 1.1599043607711792, + "learning_rate": 0.0015, + "loss": 2.9249, + "step": 882 + }, + { + "epoch": 0.09314345991561182, + "grad_norm": 0.6337917447090149, + "learning_rate": 0.0015, + "loss": 2.9003, + "step": 883 + }, + { + "epoch": 0.09324894514767933, + "grad_norm": 0.7314314246177673, + "learning_rate": 0.0015, + "loss": 2.9245, + "step": 884 + }, + { + "epoch": 0.09335443037974683, + "grad_norm": 0.786834180355072, + "learning_rate": 0.0015, + "loss": 2.9907, + "step": 885 + }, + { + "epoch": 0.09345991561181434, + "grad_norm": 0.4712405204772949, + "learning_rate": 0.0015, + "loss": 2.9118, + "step": 886 + }, + { + "epoch": 0.09356540084388186, + "grad_norm": 0.6266620755195618, + "learning_rate": 0.0015, + "loss": 2.9223, + "step": 887 + }, + { + "epoch": 0.09367088607594937, + "grad_norm": 0.6381389498710632, + "learning_rate": 0.0015, + "loss": 2.9721, + "step": 888 + }, + { + "epoch": 0.09377637130801687, + "grad_norm": 0.5161409974098206, + "learning_rate": 0.0015, + "loss": 2.9234, + "step": 889 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.523490846157074, + "learning_rate": 0.0015, + "loss": 2.9482, + "step": 890 + }, + { + "epoch": 0.0939873417721519, + "grad_norm": 0.4777434170246124, + "learning_rate": 0.0015, + "loss": 2.965, + "step": 891 + }, + { + "epoch": 0.0940928270042194, + "grad_norm": 0.49906080961227417, + "learning_rate": 0.0015, + "loss": 2.9382, + "step": 892 + }, + { + "epoch": 0.09419831223628691, + "grad_norm": 0.40105658769607544, + "learning_rate": 0.0015, + "loss": 2.9099, + "step": 893 + }, + { + "epoch": 0.09430379746835443, + "grad_norm": 0.5296884775161743, + "learning_rate": 0.0015, + "loss": 2.9547, + "step": 894 + }, + { + "epoch": 0.09440928270042194, + "grad_norm": 0.48172852396965027, + "learning_rate": 0.0015, + "loss": 2.9206, + "step": 895 + }, + { + "epoch": 0.09451476793248945, + "grad_norm": 0.44204649329185486, + "learning_rate": 0.0015, + "loss": 2.9241, + "step": 896 + }, + { + "epoch": 0.09462025316455697, + "grad_norm": 0.575105607509613, + "learning_rate": 0.0015, + "loss": 2.9155, + "step": 897 + }, + { + "epoch": 0.09472573839662447, + "grad_norm": 0.407831609249115, + "learning_rate": 0.0015, + "loss": 2.9188, + "step": 898 + }, + { + "epoch": 0.09483122362869198, + "grad_norm": 0.5273159742355347, + "learning_rate": 0.0015, + "loss": 2.9122, + "step": 899 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.501473069190979, + "learning_rate": 0.0015, + "loss": 2.9209, + "step": 900 + }, + { + "epoch": 0.095042194092827, + "grad_norm": 0.4200875461101532, + "learning_rate": 0.0015, + "loss": 2.9259, + "step": 901 + }, + { + "epoch": 0.09514767932489451, + "grad_norm": 0.5698111057281494, + "learning_rate": 0.0015, + "loss": 2.9326, + "step": 902 + }, + { + "epoch": 0.09525316455696202, + "grad_norm": 0.4641309678554535, + "learning_rate": 0.0015, + "loss": 2.918, + "step": 903 + }, + { + "epoch": 0.09535864978902954, + "grad_norm": 0.5188429951667786, + "learning_rate": 0.0015, + "loss": 2.9277, + "step": 904 + }, + { + "epoch": 0.09546413502109705, + "grad_norm": 0.5630412697792053, + "learning_rate": 0.0015, + "loss": 2.9159, + "step": 905 + }, + { + "epoch": 0.09556962025316455, + "grad_norm": 0.4843507409095764, + "learning_rate": 0.0015, + "loss": 2.939, + "step": 906 + }, + { + "epoch": 0.09567510548523207, + "grad_norm": 0.684741735458374, + "learning_rate": 0.0015, + "loss": 2.9333, + "step": 907 + }, + { + "epoch": 0.09578059071729958, + "grad_norm": 0.6220889687538147, + "learning_rate": 0.0015, + "loss": 2.9079, + "step": 908 + }, + { + "epoch": 0.09588607594936709, + "grad_norm": 0.5400215983390808, + "learning_rate": 0.0015, + "loss": 2.935, + "step": 909 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.9717395901679993, + "learning_rate": 0.0015, + "loss": 2.9628, + "step": 910 + }, + { + "epoch": 0.09609704641350211, + "grad_norm": 0.597578763961792, + "learning_rate": 0.0015, + "loss": 2.9311, + "step": 911 + }, + { + "epoch": 0.09620253164556962, + "grad_norm": 0.6230468153953552, + "learning_rate": 0.0015, + "loss": 2.9144, + "step": 912 + }, + { + "epoch": 0.09630801687763713, + "grad_norm": 0.7090437412261963, + "learning_rate": 0.0015, + "loss": 2.9426, + "step": 913 + }, + { + "epoch": 0.09641350210970465, + "grad_norm": 0.4818892180919647, + "learning_rate": 0.0015, + "loss": 2.9244, + "step": 914 + }, + { + "epoch": 0.09651898734177215, + "grad_norm": 0.6427623629570007, + "learning_rate": 0.0015, + "loss": 2.9466, + "step": 915 + }, + { + "epoch": 0.09662447257383966, + "grad_norm": 0.48251768946647644, + "learning_rate": 0.0015, + "loss": 2.9327, + "step": 916 + }, + { + "epoch": 0.09672995780590718, + "grad_norm": 0.7022861242294312, + "learning_rate": 0.0015, + "loss": 2.8909, + "step": 917 + }, + { + "epoch": 0.09683544303797469, + "grad_norm": 0.627341628074646, + "learning_rate": 0.0015, + "loss": 2.9111, + "step": 918 + }, + { + "epoch": 0.09694092827004219, + "grad_norm": 0.6335179805755615, + "learning_rate": 0.0015, + "loss": 2.9413, + "step": 919 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.7952059507369995, + "learning_rate": 0.0015, + "loss": 2.8946, + "step": 920 + }, + { + "epoch": 0.09715189873417722, + "grad_norm": 0.5575441718101501, + "learning_rate": 0.0015, + "loss": 2.941, + "step": 921 + }, + { + "epoch": 0.09725738396624473, + "grad_norm": 0.7226571440696716, + "learning_rate": 0.0015, + "loss": 2.9198, + "step": 922 + }, + { + "epoch": 0.09736286919831223, + "grad_norm": 0.5605053901672363, + "learning_rate": 0.0015, + "loss": 2.8847, + "step": 923 + }, + { + "epoch": 0.09746835443037975, + "grad_norm": 0.5676932334899902, + "learning_rate": 0.0015, + "loss": 2.9348, + "step": 924 + }, + { + "epoch": 0.09757383966244726, + "grad_norm": 0.6304050087928772, + "learning_rate": 0.0015, + "loss": 2.914, + "step": 925 + }, + { + "epoch": 0.09767932489451477, + "grad_norm": 0.4344809055328369, + "learning_rate": 0.0015, + "loss": 2.9003, + "step": 926 + }, + { + "epoch": 0.09778481012658227, + "grad_norm": 0.654318630695343, + "learning_rate": 0.0015, + "loss": 2.9332, + "step": 927 + }, + { + "epoch": 0.09789029535864979, + "grad_norm": 0.5133137702941895, + "learning_rate": 0.0015, + "loss": 2.8707, + "step": 928 + }, + { + "epoch": 0.0979957805907173, + "grad_norm": 0.5493823885917664, + "learning_rate": 0.0015, + "loss": 2.8982, + "step": 929 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.5590183734893799, + "learning_rate": 0.0015, + "loss": 2.9157, + "step": 930 + }, + { + "epoch": 0.09820675105485233, + "grad_norm": 0.5329678058624268, + "learning_rate": 0.0015, + "loss": 2.886, + "step": 931 + }, + { + "epoch": 0.09831223628691983, + "grad_norm": 0.6902244091033936, + "learning_rate": 0.0015, + "loss": 2.9334, + "step": 932 + }, + { + "epoch": 0.09841772151898734, + "grad_norm": 0.6206153035163879, + "learning_rate": 0.0015, + "loss": 2.9249, + "step": 933 + }, + { + "epoch": 0.09852320675105486, + "grad_norm": 0.5196724534034729, + "learning_rate": 0.0015, + "loss": 2.9077, + "step": 934 + }, + { + "epoch": 0.09862869198312237, + "grad_norm": 0.5990458130836487, + "learning_rate": 0.0015, + "loss": 2.9, + "step": 935 + }, + { + "epoch": 0.09873417721518987, + "grad_norm": 0.5999747514724731, + "learning_rate": 0.0015, + "loss": 2.9033, + "step": 936 + }, + { + "epoch": 0.09883966244725738, + "grad_norm": 0.4821512997150421, + "learning_rate": 0.0015, + "loss": 2.9341, + "step": 937 + }, + { + "epoch": 0.0989451476793249, + "grad_norm": 0.5723498463630676, + "learning_rate": 0.0015, + "loss": 2.9264, + "step": 938 + }, + { + "epoch": 0.0990506329113924, + "grad_norm": 0.6894911527633667, + "learning_rate": 0.0015, + "loss": 2.8989, + "step": 939 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.5078843235969543, + "learning_rate": 0.0015, + "loss": 2.9, + "step": 940 + }, + { + "epoch": 0.09926160337552743, + "grad_norm": 0.5586221814155579, + "learning_rate": 0.0015, + "loss": 2.8883, + "step": 941 + }, + { + "epoch": 0.09936708860759494, + "grad_norm": 0.6653425097465515, + "learning_rate": 0.0015, + "loss": 2.9263, + "step": 942 + }, + { + "epoch": 0.09947257383966245, + "grad_norm": 0.4263627231121063, + "learning_rate": 0.0015, + "loss": 2.8899, + "step": 943 + }, + { + "epoch": 0.09957805907172995, + "grad_norm": 0.7270148992538452, + "learning_rate": 0.0015, + "loss": 2.8823, + "step": 944 + }, + { + "epoch": 0.09968354430379747, + "grad_norm": 0.5990016460418701, + "learning_rate": 0.0015, + "loss": 2.9002, + "step": 945 + }, + { + "epoch": 0.09978902953586498, + "grad_norm": 0.4989110231399536, + "learning_rate": 0.0015, + "loss": 2.9009, + "step": 946 + }, + { + "epoch": 0.09989451476793249, + "grad_norm": 0.7857534885406494, + "learning_rate": 0.0015, + "loss": 2.8649, + "step": 947 + }, + { + "epoch": 0.1, + "grad_norm": 0.5032544136047363, + "learning_rate": 0.0015, + "loss": 2.9132, + "step": 948 + }, + { + "epoch": 0.10010548523206751, + "grad_norm": 0.552017867565155, + "learning_rate": 0.0015, + "loss": 2.8862, + "step": 949 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.7245096564292908, + "learning_rate": 0.0015, + "loss": 2.9079, + "step": 950 + }, + { + "epoch": 0.10031645569620253, + "grad_norm": 0.4902867078781128, + "learning_rate": 0.0015, + "loss": 2.9133, + "step": 951 + }, + { + "epoch": 0.10042194092827005, + "grad_norm": 0.7002772688865662, + "learning_rate": 0.0015, + "loss": 2.8963, + "step": 952 + }, + { + "epoch": 0.10052742616033755, + "grad_norm": 0.6898626089096069, + "learning_rate": 0.0015, + "loss": 2.9273, + "step": 953 + }, + { + "epoch": 0.10063291139240506, + "grad_norm": 0.5476285815238953, + "learning_rate": 0.0015, + "loss": 2.9123, + "step": 954 + }, + { + "epoch": 0.10073839662447258, + "grad_norm": 0.7755641937255859, + "learning_rate": 0.0015, + "loss": 2.8941, + "step": 955 + }, + { + "epoch": 0.10084388185654009, + "grad_norm": 0.628899872303009, + "learning_rate": 0.0015, + "loss": 2.8832, + "step": 956 + }, + { + "epoch": 0.10094936708860759, + "grad_norm": 0.5683516263961792, + "learning_rate": 0.0015, + "loss": 2.8777, + "step": 957 + }, + { + "epoch": 0.10105485232067511, + "grad_norm": 0.6631934642791748, + "learning_rate": 0.0015, + "loss": 2.9216, + "step": 958 + }, + { + "epoch": 0.10116033755274262, + "grad_norm": 0.5724136233329773, + "learning_rate": 0.0015, + "loss": 2.8618, + "step": 959 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.5282420516014099, + "learning_rate": 0.0015, + "loss": 2.8716, + "step": 960 + }, + { + "epoch": 0.10137130801687763, + "grad_norm": 0.420282244682312, + "learning_rate": 0.0015, + "loss": 2.8816, + "step": 961 + }, + { + "epoch": 0.10147679324894515, + "grad_norm": 0.48751765489578247, + "learning_rate": 0.0015, + "loss": 2.8801, + "step": 962 + }, + { + "epoch": 0.10158227848101266, + "grad_norm": 0.3800605535507202, + "learning_rate": 0.0015, + "loss": 2.9232, + "step": 963 + }, + { + "epoch": 0.10168776371308016, + "grad_norm": 0.4721890687942505, + "learning_rate": 0.0015, + "loss": 2.8716, + "step": 964 + }, + { + "epoch": 0.10179324894514769, + "grad_norm": 0.46780484914779663, + "learning_rate": 0.0015, + "loss": 2.9489, + "step": 965 + }, + { + "epoch": 0.10189873417721519, + "grad_norm": 0.4162719249725342, + "learning_rate": 0.0015, + "loss": 2.8679, + "step": 966 + }, + { + "epoch": 0.1020042194092827, + "grad_norm": 0.39395347237586975, + "learning_rate": 0.0015, + "loss": 2.8802, + "step": 967 + }, + { + "epoch": 0.1021097046413502, + "grad_norm": 0.4530376195907593, + "learning_rate": 0.0015, + "loss": 2.9116, + "step": 968 + }, + { + "epoch": 0.10221518987341772, + "grad_norm": 0.4676388204097748, + "learning_rate": 0.0015, + "loss": 2.8807, + "step": 969 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.41765955090522766, + "learning_rate": 0.0015, + "loss": 2.921, + "step": 970 + }, + { + "epoch": 0.10242616033755274, + "grad_norm": 0.46632620692253113, + "learning_rate": 0.0015, + "loss": 2.8687, + "step": 971 + }, + { + "epoch": 0.10253164556962026, + "grad_norm": 0.4703198969364166, + "learning_rate": 0.0015, + "loss": 2.8717, + "step": 972 + }, + { + "epoch": 0.10263713080168776, + "grad_norm": 0.42713093757629395, + "learning_rate": 0.0015, + "loss": 2.8785, + "step": 973 + }, + { + "epoch": 0.10274261603375527, + "grad_norm": 0.46193772554397583, + "learning_rate": 0.0015, + "loss": 2.8932, + "step": 974 + }, + { + "epoch": 0.10284810126582279, + "grad_norm": 0.4856361746788025, + "learning_rate": 0.0015, + "loss": 2.8647, + "step": 975 + }, + { + "epoch": 0.1029535864978903, + "grad_norm": 0.46683937311172485, + "learning_rate": 0.0015, + "loss": 2.8786, + "step": 976 + }, + { + "epoch": 0.1030590717299578, + "grad_norm": 0.49519339203834534, + "learning_rate": 0.0015, + "loss": 2.8841, + "step": 977 + }, + { + "epoch": 0.10316455696202531, + "grad_norm": 0.5264483094215393, + "learning_rate": 0.0015, + "loss": 2.879, + "step": 978 + }, + { + "epoch": 0.10327004219409283, + "grad_norm": 0.4646764397621155, + "learning_rate": 0.0015, + "loss": 2.861, + "step": 979 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.6260772943496704, + "learning_rate": 0.0015, + "loss": 2.893, + "step": 980 + }, + { + "epoch": 0.10348101265822784, + "grad_norm": 0.6089738607406616, + "learning_rate": 0.0015, + "loss": 2.8892, + "step": 981 + }, + { + "epoch": 0.10358649789029536, + "grad_norm": 0.46166014671325684, + "learning_rate": 0.0015, + "loss": 2.9151, + "step": 982 + }, + { + "epoch": 0.10369198312236287, + "grad_norm": 0.6577445268630981, + "learning_rate": 0.0015, + "loss": 2.8729, + "step": 983 + }, + { + "epoch": 0.10379746835443038, + "grad_norm": 0.5543642044067383, + "learning_rate": 0.0015, + "loss": 2.8593, + "step": 984 + }, + { + "epoch": 0.10390295358649788, + "grad_norm": 0.5284603238105774, + "learning_rate": 0.0015, + "loss": 2.8769, + "step": 985 + }, + { + "epoch": 0.1040084388185654, + "grad_norm": 0.7198099493980408, + "learning_rate": 0.0015, + "loss": 2.8913, + "step": 986 + }, + { + "epoch": 0.10411392405063291, + "grad_norm": 0.5935599207878113, + "learning_rate": 0.0015, + "loss": 2.9107, + "step": 987 + }, + { + "epoch": 0.10421940928270042, + "grad_norm": 0.5482720732688904, + "learning_rate": 0.0015, + "loss": 2.87, + "step": 988 + }, + { + "epoch": 0.10432489451476794, + "grad_norm": 0.692064642906189, + "learning_rate": 0.0015, + "loss": 2.8846, + "step": 989 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.7239595651626587, + "learning_rate": 0.0015, + "loss": 2.8809, + "step": 990 + }, + { + "epoch": 0.10453586497890295, + "grad_norm": 0.5092764496803284, + "learning_rate": 0.0015, + "loss": 2.9333, + "step": 991 + }, + { + "epoch": 0.10464135021097046, + "grad_norm": 0.6064428091049194, + "learning_rate": 0.0015, + "loss": 2.8618, + "step": 992 + }, + { + "epoch": 0.10474683544303798, + "grad_norm": 0.7136892080307007, + "learning_rate": 0.0015, + "loss": 2.8588, + "step": 993 + }, + { + "epoch": 0.10485232067510548, + "grad_norm": 0.531305193901062, + "learning_rate": 0.0015, + "loss": 2.8943, + "step": 994 + }, + { + "epoch": 0.10495780590717299, + "grad_norm": 0.49219855666160583, + "learning_rate": 0.0015, + "loss": 2.8578, + "step": 995 + }, + { + "epoch": 0.10506329113924051, + "grad_norm": 0.5564682483673096, + "learning_rate": 0.0015, + "loss": 2.8795, + "step": 996 + }, + { + "epoch": 0.10516877637130802, + "grad_norm": 0.4454730451107025, + "learning_rate": 0.0015, + "loss": 2.8639, + "step": 997 + }, + { + "epoch": 0.10527426160337552, + "grad_norm": 0.5121499300003052, + "learning_rate": 0.0015, + "loss": 2.9023, + "step": 998 + }, + { + "epoch": 0.10537974683544304, + "grad_norm": 0.5052222609519958, + "learning_rate": 0.0015, + "loss": 2.8884, + "step": 999 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.4403526186943054, + "learning_rate": 0.0015, + "loss": 2.902, + "step": 1000 + }, + { + "epoch": 0.10559071729957806, + "grad_norm": 0.45347860455513, + "learning_rate": 0.0015, + "loss": 2.8679, + "step": 1001 + }, + { + "epoch": 0.10569620253164556, + "grad_norm": 0.4991253912448883, + "learning_rate": 0.0015, + "loss": 2.8828, + "step": 1002 + }, + { + "epoch": 0.10580168776371308, + "grad_norm": 0.4763750433921814, + "learning_rate": 0.0015, + "loss": 2.8983, + "step": 1003 + }, + { + "epoch": 0.10590717299578059, + "grad_norm": 0.4128043055534363, + "learning_rate": 0.0015, + "loss": 2.8603, + "step": 1004 + }, + { + "epoch": 0.1060126582278481, + "grad_norm": 0.5347931981086731, + "learning_rate": 0.0015, + "loss": 2.864, + "step": 1005 + }, + { + "epoch": 0.10611814345991562, + "grad_norm": 0.5492053031921387, + "learning_rate": 0.0015, + "loss": 2.8736, + "step": 1006 + }, + { + "epoch": 0.10622362869198312, + "grad_norm": 0.6203314661979675, + "learning_rate": 0.0015, + "loss": 2.906, + "step": 1007 + }, + { + "epoch": 0.10632911392405063, + "grad_norm": 0.5637145042419434, + "learning_rate": 0.0015, + "loss": 2.8793, + "step": 1008 + }, + { + "epoch": 0.10643459915611814, + "grad_norm": 0.5920233130455017, + "learning_rate": 0.0015, + "loss": 2.888, + "step": 1009 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.6050498485565186, + "learning_rate": 0.0015, + "loss": 2.8863, + "step": 1010 + }, + { + "epoch": 0.10664556962025316, + "grad_norm": 0.6644845604896545, + "learning_rate": 0.0015, + "loss": 2.8699, + "step": 1011 + }, + { + "epoch": 0.10675105485232067, + "grad_norm": 0.49428752064704895, + "learning_rate": 0.0015, + "loss": 2.8638, + "step": 1012 + }, + { + "epoch": 0.10685654008438819, + "grad_norm": 0.620546281337738, + "learning_rate": 0.0015, + "loss": 2.846, + "step": 1013 + }, + { + "epoch": 0.1069620253164557, + "grad_norm": 0.6927658915519714, + "learning_rate": 0.0015, + "loss": 2.8823, + "step": 1014 + }, + { + "epoch": 0.1070675105485232, + "grad_norm": 0.5742068886756897, + "learning_rate": 0.0015, + "loss": 2.8654, + "step": 1015 + }, + { + "epoch": 0.10717299578059072, + "grad_norm": 0.6160545349121094, + "learning_rate": 0.0015, + "loss": 2.8357, + "step": 1016 + }, + { + "epoch": 0.10727848101265823, + "grad_norm": 0.8451694846153259, + "learning_rate": 0.0015, + "loss": 2.9048, + "step": 1017 + }, + { + "epoch": 0.10738396624472574, + "grad_norm": 0.637244701385498, + "learning_rate": 0.0015, + "loss": 2.8918, + "step": 1018 + }, + { + "epoch": 0.10748945147679324, + "grad_norm": 0.587176501750946, + "learning_rate": 0.0015, + "loss": 2.9355, + "step": 1019 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 0.8754919171333313, + "learning_rate": 0.0015, + "loss": 2.8774, + "step": 1020 + }, + { + "epoch": 0.10770042194092827, + "grad_norm": 0.5445207357406616, + "learning_rate": 0.0015, + "loss": 2.855, + "step": 1021 + }, + { + "epoch": 0.10780590717299578, + "grad_norm": 0.6932568550109863, + "learning_rate": 0.0015, + "loss": 2.8733, + "step": 1022 + }, + { + "epoch": 0.1079113924050633, + "grad_norm": 0.6348987817764282, + "learning_rate": 0.0015, + "loss": 2.8492, + "step": 1023 + }, + { + "epoch": 0.1080168776371308, + "grad_norm": 0.6121981143951416, + "learning_rate": 0.0015, + "loss": 2.8685, + "step": 1024 + }, + { + "epoch": 0.10812236286919831, + "grad_norm": 0.4719456434249878, + "learning_rate": 0.0015, + "loss": 2.9102, + "step": 1025 + }, + { + "epoch": 0.10822784810126582, + "grad_norm": 0.6142137050628662, + "learning_rate": 0.0015, + "loss": 2.8588, + "step": 1026 + }, + { + "epoch": 0.10833333333333334, + "grad_norm": 0.5082595944404602, + "learning_rate": 0.0015, + "loss": 2.8776, + "step": 1027 + }, + { + "epoch": 0.10843881856540084, + "grad_norm": 0.4885730743408203, + "learning_rate": 0.0015, + "loss": 2.8421, + "step": 1028 + }, + { + "epoch": 0.10854430379746835, + "grad_norm": 0.5368741154670715, + "learning_rate": 0.0015, + "loss": 2.8992, + "step": 1029 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.5948108434677124, + "learning_rate": 0.0015, + "loss": 2.8646, + "step": 1030 + }, + { + "epoch": 0.10875527426160338, + "grad_norm": 0.4723275303840637, + "learning_rate": 0.0015, + "loss": 2.8373, + "step": 1031 + }, + { + "epoch": 0.10886075949367088, + "grad_norm": 0.4531865119934082, + "learning_rate": 0.0015, + "loss": 2.827, + "step": 1032 + }, + { + "epoch": 0.10896624472573839, + "grad_norm": 0.5663767457008362, + "learning_rate": 0.0015, + "loss": 2.8651, + "step": 1033 + }, + { + "epoch": 0.10907172995780591, + "grad_norm": 0.46339669823646545, + "learning_rate": 0.0015, + "loss": 2.8884, + "step": 1034 + }, + { + "epoch": 0.10917721518987342, + "grad_norm": 0.46554034948349, + "learning_rate": 0.0015, + "loss": 2.8724, + "step": 1035 + }, + { + "epoch": 0.10928270042194092, + "grad_norm": 0.5545483827590942, + "learning_rate": 0.0015, + "loss": 2.8299, + "step": 1036 + }, + { + "epoch": 0.10938818565400844, + "grad_norm": 0.44137853384017944, + "learning_rate": 0.0015, + "loss": 2.8632, + "step": 1037 + }, + { + "epoch": 0.10949367088607595, + "grad_norm": 0.48164239525794983, + "learning_rate": 0.0015, + "loss": 2.8887, + "step": 1038 + }, + { + "epoch": 0.10959915611814346, + "grad_norm": 0.46908658742904663, + "learning_rate": 0.0015, + "loss": 2.8606, + "step": 1039 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.4266047477722168, + "learning_rate": 0.0015, + "loss": 2.8651, + "step": 1040 + }, + { + "epoch": 0.10981012658227848, + "grad_norm": 0.4386937618255615, + "learning_rate": 0.0015, + "loss": 2.8773, + "step": 1041 + }, + { + "epoch": 0.10991561181434599, + "grad_norm": 0.5100957155227661, + "learning_rate": 0.0015, + "loss": 2.8773, + "step": 1042 + }, + { + "epoch": 0.1100210970464135, + "grad_norm": 0.4115121364593506, + "learning_rate": 0.0015, + "loss": 2.8685, + "step": 1043 + }, + { + "epoch": 0.11012658227848102, + "grad_norm": 0.40965408086776733, + "learning_rate": 0.0015, + "loss": 2.8599, + "step": 1044 + }, + { + "epoch": 0.11023206751054852, + "grad_norm": 0.44422417879104614, + "learning_rate": 0.0015, + "loss": 2.8852, + "step": 1045 + }, + { + "epoch": 0.11033755274261603, + "grad_norm": 0.4233503043651581, + "learning_rate": 0.0015, + "loss": 2.8776, + "step": 1046 + }, + { + "epoch": 0.11044303797468355, + "grad_norm": 0.4791722595691681, + "learning_rate": 0.0015, + "loss": 2.8581, + "step": 1047 + }, + { + "epoch": 0.11054852320675106, + "grad_norm": 0.4469628632068634, + "learning_rate": 0.0015, + "loss": 2.8483, + "step": 1048 + }, + { + "epoch": 0.11065400843881856, + "grad_norm": 0.4507243037223816, + "learning_rate": 0.0015, + "loss": 2.8422, + "step": 1049 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 0.581147313117981, + "learning_rate": 0.0015, + "loss": 2.8803, + "step": 1050 + }, + { + "epoch": 0.11086497890295359, + "grad_norm": 0.550684928894043, + "learning_rate": 0.0015, + "loss": 2.8627, + "step": 1051 + }, + { + "epoch": 0.1109704641350211, + "grad_norm": 0.6346836090087891, + "learning_rate": 0.0015, + "loss": 2.8597, + "step": 1052 + }, + { + "epoch": 0.1110759493670886, + "grad_norm": 0.6131190657615662, + "learning_rate": 0.0015, + "loss": 2.8811, + "step": 1053 + }, + { + "epoch": 0.11118143459915612, + "grad_norm": 0.49163487553596497, + "learning_rate": 0.0015, + "loss": 2.8702, + "step": 1054 + }, + { + "epoch": 0.11128691983122363, + "grad_norm": 0.589361310005188, + "learning_rate": 0.0015, + "loss": 2.8409, + "step": 1055 + }, + { + "epoch": 0.11139240506329114, + "grad_norm": 0.517943263053894, + "learning_rate": 0.0015, + "loss": 2.8434, + "step": 1056 + }, + { + "epoch": 0.11149789029535866, + "grad_norm": 0.505882203578949, + "learning_rate": 0.0015, + "loss": 2.8648, + "step": 1057 + }, + { + "epoch": 0.11160337552742616, + "grad_norm": 0.6418063640594482, + "learning_rate": 0.0015, + "loss": 2.8404, + "step": 1058 + }, + { + "epoch": 0.11170886075949367, + "grad_norm": 0.5202580690383911, + "learning_rate": 0.0015, + "loss": 2.8427, + "step": 1059 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.5091648101806641, + "learning_rate": 0.0015, + "loss": 2.8411, + "step": 1060 + }, + { + "epoch": 0.1119198312236287, + "grad_norm": 0.5300213694572449, + "learning_rate": 0.0015, + "loss": 2.8813, + "step": 1061 + }, + { + "epoch": 0.1120253164556962, + "grad_norm": 0.4600299894809723, + "learning_rate": 0.0015, + "loss": 2.839, + "step": 1062 + }, + { + "epoch": 0.11213080168776371, + "grad_norm": 0.5196762681007385, + "learning_rate": 0.0015, + "loss": 2.8586, + "step": 1063 + }, + { + "epoch": 0.11223628691983123, + "grad_norm": 0.4784868061542511, + "learning_rate": 0.0015, + "loss": 2.8403, + "step": 1064 + }, + { + "epoch": 0.11234177215189874, + "grad_norm": 0.6557206511497498, + "learning_rate": 0.0015, + "loss": 2.8328, + "step": 1065 + }, + { + "epoch": 0.11244725738396624, + "grad_norm": 0.7235183715820312, + "learning_rate": 0.0015, + "loss": 2.8456, + "step": 1066 + }, + { + "epoch": 0.11255274261603375, + "grad_norm": 0.3905777037143707, + "learning_rate": 0.0015, + "loss": 2.8525, + "step": 1067 + }, + { + "epoch": 0.11265822784810127, + "grad_norm": 0.8277051448822021, + "learning_rate": 0.0015, + "loss": 2.8443, + "step": 1068 + }, + { + "epoch": 0.11276371308016878, + "grad_norm": 0.6809840798377991, + "learning_rate": 0.0015, + "loss": 2.8508, + "step": 1069 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.539665162563324, + "learning_rate": 0.0015, + "loss": 2.8601, + "step": 1070 + }, + { + "epoch": 0.1129746835443038, + "grad_norm": 0.8827725052833557, + "learning_rate": 0.0015, + "loss": 2.842, + "step": 1071 + }, + { + "epoch": 0.11308016877637131, + "grad_norm": 0.711921751499176, + "learning_rate": 0.0015, + "loss": 2.8778, + "step": 1072 + }, + { + "epoch": 0.11318565400843882, + "grad_norm": 0.5437031984329224, + "learning_rate": 0.0015, + "loss": 2.8468, + "step": 1073 + }, + { + "epoch": 0.11329113924050632, + "grad_norm": 0.7619648575782776, + "learning_rate": 0.0015, + "loss": 2.852, + "step": 1074 + }, + { + "epoch": 0.11339662447257384, + "grad_norm": 0.5963110327720642, + "learning_rate": 0.0015, + "loss": 2.8686, + "step": 1075 + }, + { + "epoch": 0.11350210970464135, + "grad_norm": 0.4900503158569336, + "learning_rate": 0.0015, + "loss": 2.8655, + "step": 1076 + }, + { + "epoch": 0.11360759493670886, + "grad_norm": 0.7489104866981506, + "learning_rate": 0.0015, + "loss": 2.8707, + "step": 1077 + }, + { + "epoch": 0.11371308016877638, + "grad_norm": 0.5800290703773499, + "learning_rate": 0.0015, + "loss": 2.805, + "step": 1078 + }, + { + "epoch": 0.11381856540084388, + "grad_norm": 0.8196313381195068, + "learning_rate": 0.0015, + "loss": 2.86, + "step": 1079 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 0.718258261680603, + "learning_rate": 0.0015, + "loss": 2.8509, + "step": 1080 + }, + { + "epoch": 0.11402953586497891, + "grad_norm": 0.5795497894287109, + "learning_rate": 0.0015, + "loss": 2.8465, + "step": 1081 + }, + { + "epoch": 0.11413502109704642, + "grad_norm": 0.9426462650299072, + "learning_rate": 0.0015, + "loss": 2.8615, + "step": 1082 + }, + { + "epoch": 0.11424050632911392, + "grad_norm": 0.6713368892669678, + "learning_rate": 0.0015, + "loss": 2.8385, + "step": 1083 + }, + { + "epoch": 0.11434599156118143, + "grad_norm": 0.629662275314331, + "learning_rate": 0.0015, + "loss": 2.8721, + "step": 1084 + }, + { + "epoch": 0.11445147679324895, + "grad_norm": 0.730789840221405, + "learning_rate": 0.0015, + "loss": 2.8055, + "step": 1085 + }, + { + "epoch": 0.11455696202531646, + "grad_norm": 0.5912795662879944, + "learning_rate": 0.0015, + "loss": 2.8246, + "step": 1086 + }, + { + "epoch": 0.11466244725738396, + "grad_norm": 0.7066127061843872, + "learning_rate": 0.0015, + "loss": 2.8859, + "step": 1087 + }, + { + "epoch": 0.11476793248945148, + "grad_norm": 0.5333911776542664, + "learning_rate": 0.0015, + "loss": 2.852, + "step": 1088 + }, + { + "epoch": 0.11487341772151899, + "grad_norm": 0.5594263076782227, + "learning_rate": 0.0015, + "loss": 2.8846, + "step": 1089 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.6386085152626038, + "learning_rate": 0.0015, + "loss": 2.8477, + "step": 1090 + }, + { + "epoch": 0.115084388185654, + "grad_norm": 0.4957248270511627, + "learning_rate": 0.0015, + "loss": 2.8076, + "step": 1091 + }, + { + "epoch": 0.11518987341772152, + "grad_norm": 0.5157161951065063, + "learning_rate": 0.0015, + "loss": 2.8124, + "step": 1092 + }, + { + "epoch": 0.11529535864978903, + "grad_norm": 0.5403198599815369, + "learning_rate": 0.0015, + "loss": 2.8279, + "step": 1093 + }, + { + "epoch": 0.11540084388185654, + "grad_norm": 0.4824497103691101, + "learning_rate": 0.0015, + "loss": 2.8658, + "step": 1094 + }, + { + "epoch": 0.11550632911392406, + "grad_norm": 0.5625380873680115, + "learning_rate": 0.0015, + "loss": 2.8394, + "step": 1095 + }, + { + "epoch": 0.11561181434599156, + "grad_norm": 0.4321615695953369, + "learning_rate": 0.0015, + "loss": 2.8487, + "step": 1096 + }, + { + "epoch": 0.11571729957805907, + "grad_norm": 0.5608127117156982, + "learning_rate": 0.0015, + "loss": 2.8508, + "step": 1097 + }, + { + "epoch": 0.11582278481012659, + "grad_norm": 0.48991286754608154, + "learning_rate": 0.0015, + "loss": 2.863, + "step": 1098 + }, + { + "epoch": 0.1159282700421941, + "grad_norm": 0.5135901570320129, + "learning_rate": 0.0015, + "loss": 2.8243, + "step": 1099 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 0.7596499919891357, + "learning_rate": 0.0015, + "loss": 2.836, + "step": 1100 + }, + { + "epoch": 0.11613924050632911, + "grad_norm": 0.6442409157752991, + "learning_rate": 0.0015, + "loss": 2.8549, + "step": 1101 + }, + { + "epoch": 0.11624472573839663, + "grad_norm": 0.42111778259277344, + "learning_rate": 0.0015, + "loss": 2.8097, + "step": 1102 + }, + { + "epoch": 0.11635021097046414, + "grad_norm": 0.7737486362457275, + "learning_rate": 0.0015, + "loss": 2.8432, + "step": 1103 + }, + { + "epoch": 0.11645569620253164, + "grad_norm": 0.5781596899032593, + "learning_rate": 0.0015, + "loss": 2.8473, + "step": 1104 + }, + { + "epoch": 0.11656118143459916, + "grad_norm": 0.5334538221359253, + "learning_rate": 0.0015, + "loss": 2.8639, + "step": 1105 + }, + { + "epoch": 0.11666666666666667, + "grad_norm": 0.8447474837303162, + "learning_rate": 0.0015, + "loss": 2.8383, + "step": 1106 + }, + { + "epoch": 0.11677215189873418, + "grad_norm": 0.4802440106868744, + "learning_rate": 0.0015, + "loss": 2.845, + "step": 1107 + }, + { + "epoch": 0.11687763713080168, + "grad_norm": 0.5542643666267395, + "learning_rate": 0.0015, + "loss": 2.8341, + "step": 1108 + }, + { + "epoch": 0.1169831223628692, + "grad_norm": 0.6110594272613525, + "learning_rate": 0.0015, + "loss": 2.8598, + "step": 1109 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.4318973422050476, + "learning_rate": 0.0015, + "loss": 2.809, + "step": 1110 + }, + { + "epoch": 0.11719409282700421, + "grad_norm": 0.5356847047805786, + "learning_rate": 0.0015, + "loss": 2.8545, + "step": 1111 + }, + { + "epoch": 0.11729957805907174, + "grad_norm": 0.4645156264305115, + "learning_rate": 0.0015, + "loss": 2.8057, + "step": 1112 + }, + { + "epoch": 0.11740506329113924, + "grad_norm": 0.4336893856525421, + "learning_rate": 0.0015, + "loss": 2.8522, + "step": 1113 + }, + { + "epoch": 0.11751054852320675, + "grad_norm": 0.44382184743881226, + "learning_rate": 0.0015, + "loss": 2.8309, + "step": 1114 + }, + { + "epoch": 0.11761603375527427, + "grad_norm": 0.4469183683395386, + "learning_rate": 0.0015, + "loss": 2.877, + "step": 1115 + }, + { + "epoch": 0.11772151898734177, + "grad_norm": 0.4374777376651764, + "learning_rate": 0.0015, + "loss": 2.8222, + "step": 1116 + }, + { + "epoch": 0.11782700421940928, + "grad_norm": 0.4206322133541107, + "learning_rate": 0.0015, + "loss": 2.8169, + "step": 1117 + }, + { + "epoch": 0.11793248945147679, + "grad_norm": 0.471677303314209, + "learning_rate": 0.0015, + "loss": 2.8616, + "step": 1118 + }, + { + "epoch": 0.11803797468354431, + "grad_norm": 0.5530781149864197, + "learning_rate": 0.0015, + "loss": 2.8263, + "step": 1119 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.5236142873764038, + "learning_rate": 0.0015, + "loss": 2.8141, + "step": 1120 + }, + { + "epoch": 0.11824894514767932, + "grad_norm": 0.42675769329071045, + "learning_rate": 0.0015, + "loss": 2.787, + "step": 1121 + }, + { + "epoch": 0.11835443037974684, + "grad_norm": 0.525407075881958, + "learning_rate": 0.0015, + "loss": 2.8244, + "step": 1122 + }, + { + "epoch": 0.11845991561181435, + "grad_norm": 0.5346665978431702, + "learning_rate": 0.0015, + "loss": 2.8374, + "step": 1123 + }, + { + "epoch": 0.11856540084388185, + "grad_norm": 0.4540368616580963, + "learning_rate": 0.0015, + "loss": 2.8158, + "step": 1124 + }, + { + "epoch": 0.11867088607594936, + "grad_norm": 0.4864867925643921, + "learning_rate": 0.0015, + "loss": 2.8356, + "step": 1125 + }, + { + "epoch": 0.11877637130801688, + "grad_norm": 0.44257092475891113, + "learning_rate": 0.0015, + "loss": 2.8133, + "step": 1126 + }, + { + "epoch": 0.11888185654008439, + "grad_norm": 0.4477391541004181, + "learning_rate": 0.0015, + "loss": 2.818, + "step": 1127 + }, + { + "epoch": 0.1189873417721519, + "grad_norm": 0.5049386620521545, + "learning_rate": 0.0015, + "loss": 2.808, + "step": 1128 + }, + { + "epoch": 0.11909282700421941, + "grad_norm": 0.3925820291042328, + "learning_rate": 0.0015, + "loss": 2.8434, + "step": 1129 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.5452298521995544, + "learning_rate": 0.0015, + "loss": 2.8202, + "step": 1130 + }, + { + "epoch": 0.11930379746835443, + "grad_norm": 0.43454137444496155, + "learning_rate": 0.0015, + "loss": 2.8833, + "step": 1131 + }, + { + "epoch": 0.11940928270042193, + "grad_norm": 0.47034525871276855, + "learning_rate": 0.0015, + "loss": 2.8253, + "step": 1132 + }, + { + "epoch": 0.11951476793248945, + "grad_norm": 0.585041344165802, + "learning_rate": 0.0015, + "loss": 2.8516, + "step": 1133 + }, + { + "epoch": 0.11962025316455696, + "grad_norm": 0.5661970376968384, + "learning_rate": 0.0015, + "loss": 2.8441, + "step": 1134 + }, + { + "epoch": 0.11972573839662447, + "grad_norm": 0.40993785858154297, + "learning_rate": 0.0015, + "loss": 2.8111, + "step": 1135 + }, + { + "epoch": 0.11983122362869199, + "grad_norm": 0.4586499333381653, + "learning_rate": 0.0015, + "loss": 2.8044, + "step": 1136 + }, + { + "epoch": 0.1199367088607595, + "grad_norm": 0.48985588550567627, + "learning_rate": 0.0015, + "loss": 2.8493, + "step": 1137 + }, + { + "epoch": 0.120042194092827, + "grad_norm": 0.3706333339214325, + "learning_rate": 0.0015, + "loss": 2.8469, + "step": 1138 + }, + { + "epoch": 0.12014767932489452, + "grad_norm": 0.5028852820396423, + "learning_rate": 0.0015, + "loss": 2.8277, + "step": 1139 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.5102935433387756, + "learning_rate": 0.0015, + "loss": 2.8449, + "step": 1140 + }, + { + "epoch": 0.12035864978902953, + "grad_norm": 0.4387604892253876, + "learning_rate": 0.0015, + "loss": 2.7828, + "step": 1141 + }, + { + "epoch": 0.12046413502109704, + "grad_norm": 0.5170074105262756, + "learning_rate": 0.0015, + "loss": 2.8203, + "step": 1142 + }, + { + "epoch": 0.12056962025316456, + "grad_norm": 0.547421932220459, + "learning_rate": 0.0015, + "loss": 2.8457, + "step": 1143 + }, + { + "epoch": 0.12067510548523207, + "grad_norm": 0.5224800109863281, + "learning_rate": 0.0015, + "loss": 2.7818, + "step": 1144 + }, + { + "epoch": 0.12078059071729957, + "grad_norm": 0.4704647660255432, + "learning_rate": 0.0015, + "loss": 2.8144, + "step": 1145 + }, + { + "epoch": 0.1208860759493671, + "grad_norm": 0.4835972487926483, + "learning_rate": 0.0015, + "loss": 2.8238, + "step": 1146 + }, + { + "epoch": 0.1209915611814346, + "grad_norm": 0.44974279403686523, + "learning_rate": 0.0015, + "loss": 2.8115, + "step": 1147 + }, + { + "epoch": 0.12109704641350211, + "grad_norm": 0.5564624071121216, + "learning_rate": 0.0015, + "loss": 2.8396, + "step": 1148 + }, + { + "epoch": 0.12120253164556961, + "grad_norm": 0.4398743808269501, + "learning_rate": 0.0015, + "loss": 2.8364, + "step": 1149 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 0.4690723717212677, + "learning_rate": 0.0015, + "loss": 2.8265, + "step": 1150 + }, + { + "epoch": 0.12141350210970464, + "grad_norm": 0.5606921911239624, + "learning_rate": 0.0015, + "loss": 2.8131, + "step": 1151 + }, + { + "epoch": 0.12151898734177215, + "grad_norm": 0.4372590482234955, + "learning_rate": 0.0015, + "loss": 2.858, + "step": 1152 + }, + { + "epoch": 0.12162447257383967, + "grad_norm": 0.49062854051589966, + "learning_rate": 0.0015, + "loss": 2.8308, + "step": 1153 + }, + { + "epoch": 0.12172995780590717, + "grad_norm": 0.4974188804626465, + "learning_rate": 0.0015, + "loss": 2.8325, + "step": 1154 + }, + { + "epoch": 0.12183544303797468, + "grad_norm": 0.42898672819137573, + "learning_rate": 0.0015, + "loss": 2.8352, + "step": 1155 + }, + { + "epoch": 0.1219409282700422, + "grad_norm": 0.4497793912887573, + "learning_rate": 0.0015, + "loss": 2.8539, + "step": 1156 + }, + { + "epoch": 0.12204641350210971, + "grad_norm": 0.47027528285980225, + "learning_rate": 0.0015, + "loss": 2.8201, + "step": 1157 + }, + { + "epoch": 0.12215189873417721, + "grad_norm": 0.44897574186325073, + "learning_rate": 0.0015, + "loss": 2.8326, + "step": 1158 + }, + { + "epoch": 0.12225738396624472, + "grad_norm": 0.46971821784973145, + "learning_rate": 0.0015, + "loss": 2.8186, + "step": 1159 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.41399139165878296, + "learning_rate": 0.0015, + "loss": 2.8065, + "step": 1160 + }, + { + "epoch": 0.12246835443037975, + "grad_norm": 0.42769643664360046, + "learning_rate": 0.0015, + "loss": 2.8317, + "step": 1161 + }, + { + "epoch": 0.12257383966244725, + "grad_norm": 0.4470098316669464, + "learning_rate": 0.0015, + "loss": 2.8338, + "step": 1162 + }, + { + "epoch": 0.12267932489451477, + "grad_norm": 0.37840354442596436, + "learning_rate": 0.0015, + "loss": 2.804, + "step": 1163 + }, + { + "epoch": 0.12278481012658228, + "grad_norm": 0.40888485312461853, + "learning_rate": 0.0015, + "loss": 2.824, + "step": 1164 + }, + { + "epoch": 0.12289029535864979, + "grad_norm": 0.44438010454177856, + "learning_rate": 0.0015, + "loss": 2.7923, + "step": 1165 + }, + { + "epoch": 0.1229957805907173, + "grad_norm": 0.4080260694026947, + "learning_rate": 0.0015, + "loss": 2.7943, + "step": 1166 + }, + { + "epoch": 0.12310126582278481, + "grad_norm": 0.450685977935791, + "learning_rate": 0.0015, + "loss": 2.8343, + "step": 1167 + }, + { + "epoch": 0.12320675105485232, + "grad_norm": 0.33700844645500183, + "learning_rate": 0.0015, + "loss": 2.7775, + "step": 1168 + }, + { + "epoch": 0.12331223628691983, + "grad_norm": 0.4067819118499756, + "learning_rate": 0.0015, + "loss": 2.8081, + "step": 1169 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.4017290472984314, + "learning_rate": 0.0015, + "loss": 2.7853, + "step": 1170 + }, + { + "epoch": 0.12352320675105485, + "grad_norm": 0.4019535779953003, + "learning_rate": 0.0015, + "loss": 2.806, + "step": 1171 + }, + { + "epoch": 0.12362869198312236, + "grad_norm": 0.41790613532066345, + "learning_rate": 0.0015, + "loss": 2.7963, + "step": 1172 + }, + { + "epoch": 0.12373417721518987, + "grad_norm": 0.4144482910633087, + "learning_rate": 0.0015, + "loss": 2.772, + "step": 1173 + }, + { + "epoch": 0.12383966244725739, + "grad_norm": 0.4183686673641205, + "learning_rate": 0.0015, + "loss": 2.8119, + "step": 1174 + }, + { + "epoch": 0.1239451476793249, + "grad_norm": 0.4100803732872009, + "learning_rate": 0.0015, + "loss": 2.7767, + "step": 1175 + }, + { + "epoch": 0.1240506329113924, + "grad_norm": 0.393638551235199, + "learning_rate": 0.0015, + "loss": 2.7552, + "step": 1176 + }, + { + "epoch": 0.12415611814345992, + "grad_norm": 0.4172229766845703, + "learning_rate": 0.0015, + "loss": 2.7675, + "step": 1177 + }, + { + "epoch": 0.12426160337552743, + "grad_norm": 0.39418843388557434, + "learning_rate": 0.0015, + "loss": 2.8324, + "step": 1178 + }, + { + "epoch": 0.12436708860759493, + "grad_norm": 0.37790799140930176, + "learning_rate": 0.0015, + "loss": 2.8238, + "step": 1179 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.40273961424827576, + "learning_rate": 0.0015, + "loss": 2.8182, + "step": 1180 + }, + { + "epoch": 0.12457805907172996, + "grad_norm": 0.45938295125961304, + "learning_rate": 0.0015, + "loss": 2.7966, + "step": 1181 + }, + { + "epoch": 0.12468354430379747, + "grad_norm": 0.38280555605888367, + "learning_rate": 0.0015, + "loss": 2.7994, + "step": 1182 + }, + { + "epoch": 0.12478902953586497, + "grad_norm": 0.43699488043785095, + "learning_rate": 0.0015, + "loss": 2.8144, + "step": 1183 + }, + { + "epoch": 0.1248945147679325, + "grad_norm": 0.46912962198257446, + "learning_rate": 0.0015, + "loss": 2.8151, + "step": 1184 + }, + { + "epoch": 0.125, + "grad_norm": 0.36381271481513977, + "learning_rate": 0.0015, + "loss": 2.8268, + "step": 1185 + }, + { + "epoch": 0.12510548523206752, + "grad_norm": 0.49130430817604065, + "learning_rate": 0.0015, + "loss": 2.8077, + "step": 1186 + }, + { + "epoch": 0.125210970464135, + "grad_norm": 0.5208228826522827, + "learning_rate": 0.0015, + "loss": 2.8137, + "step": 1187 + }, + { + "epoch": 0.12531645569620253, + "grad_norm": 0.44456350803375244, + "learning_rate": 0.0015, + "loss": 2.8219, + "step": 1188 + }, + { + "epoch": 0.12542194092827005, + "grad_norm": 0.5972092151641846, + "learning_rate": 0.0015, + "loss": 2.8023, + "step": 1189 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.5356767177581787, + "learning_rate": 0.0015, + "loss": 2.7861, + "step": 1190 + }, + { + "epoch": 0.12563291139240507, + "grad_norm": 0.5163045525550842, + "learning_rate": 0.0015, + "loss": 2.8094, + "step": 1191 + }, + { + "epoch": 0.1257383966244726, + "grad_norm": 0.8831205368041992, + "learning_rate": 0.0015, + "loss": 2.8162, + "step": 1192 + }, + { + "epoch": 0.12584388185654008, + "grad_norm": 0.7643307447433472, + "learning_rate": 0.0015, + "loss": 2.8446, + "step": 1193 + }, + { + "epoch": 0.1259493670886076, + "grad_norm": 0.604121208190918, + "learning_rate": 0.0015, + "loss": 2.8542, + "step": 1194 + }, + { + "epoch": 0.1260548523206751, + "grad_norm": 1.4376014471054077, + "learning_rate": 0.0015, + "loss": 2.8258, + "step": 1195 + }, + { + "epoch": 0.1261603375527426, + "grad_norm": 0.542466402053833, + "learning_rate": 0.0015, + "loss": 2.7914, + "step": 1196 + }, + { + "epoch": 0.12626582278481013, + "grad_norm": 0.7157185673713684, + "learning_rate": 0.0015, + "loss": 2.8319, + "step": 1197 + }, + { + "epoch": 0.12637130801687763, + "grad_norm": 0.6073369383811951, + "learning_rate": 0.0015, + "loss": 2.8219, + "step": 1198 + }, + { + "epoch": 0.12647679324894515, + "grad_norm": 0.7943068742752075, + "learning_rate": 0.0015, + "loss": 2.8319, + "step": 1199 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.7464427947998047, + "learning_rate": 0.0015, + "loss": 2.8232, + "step": 1200 + }, + { + "epoch": 0.12668776371308016, + "grad_norm": 0.8229198455810547, + "learning_rate": 0.0015, + "loss": 2.8214, + "step": 1201 + }, + { + "epoch": 0.12679324894514768, + "grad_norm": 1.2434725761413574, + "learning_rate": 0.0015, + "loss": 2.8356, + "step": 1202 + }, + { + "epoch": 0.1268987341772152, + "grad_norm": 0.706492006778717, + "learning_rate": 0.0015, + "loss": 2.8145, + "step": 1203 + }, + { + "epoch": 0.1270042194092827, + "grad_norm": 0.8572579622268677, + "learning_rate": 0.0015, + "loss": 2.8206, + "step": 1204 + }, + { + "epoch": 0.1271097046413502, + "grad_norm": 0.5313473343849182, + "learning_rate": 0.0015, + "loss": 2.841, + "step": 1205 + }, + { + "epoch": 0.12721518987341773, + "grad_norm": 0.7148097157478333, + "learning_rate": 0.0015, + "loss": 2.8213, + "step": 1206 + }, + { + "epoch": 0.12732067510548523, + "grad_norm": 0.5838662981987, + "learning_rate": 0.0015, + "loss": 2.8036, + "step": 1207 + }, + { + "epoch": 0.12742616033755275, + "grad_norm": 0.5362080335617065, + "learning_rate": 0.0015, + "loss": 2.8185, + "step": 1208 + }, + { + "epoch": 0.12753164556962027, + "grad_norm": 0.6269358396530151, + "learning_rate": 0.0015, + "loss": 2.812, + "step": 1209 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.5914692282676697, + "learning_rate": 0.0015, + "loss": 2.8346, + "step": 1210 + }, + { + "epoch": 0.12774261603375528, + "grad_norm": 0.6348531246185303, + "learning_rate": 0.0015, + "loss": 2.8196, + "step": 1211 + }, + { + "epoch": 0.12784810126582277, + "grad_norm": 0.5353901982307434, + "learning_rate": 0.0015, + "loss": 2.7688, + "step": 1212 + }, + { + "epoch": 0.1279535864978903, + "grad_norm": 0.7804548144340515, + "learning_rate": 0.0015, + "loss": 2.8352, + "step": 1213 + }, + { + "epoch": 0.1280590717299578, + "grad_norm": 0.8086503148078918, + "learning_rate": 0.0015, + "loss": 2.8037, + "step": 1214 + }, + { + "epoch": 0.1281645569620253, + "grad_norm": 0.7420225143432617, + "learning_rate": 0.0015, + "loss": 2.7881, + "step": 1215 + }, + { + "epoch": 0.12827004219409283, + "grad_norm": 1.1631051301956177, + "learning_rate": 0.0015, + "loss": 2.819, + "step": 1216 + }, + { + "epoch": 0.12837552742616035, + "grad_norm": 0.7904253602027893, + "learning_rate": 0.0015, + "loss": 2.8058, + "step": 1217 + }, + { + "epoch": 0.12848101265822784, + "grad_norm": 0.9564961791038513, + "learning_rate": 0.0015, + "loss": 2.8509, + "step": 1218 + }, + { + "epoch": 0.12858649789029536, + "grad_norm": 0.8065664172172546, + "learning_rate": 0.0015, + "loss": 2.7801, + "step": 1219 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.6455373167991638, + "learning_rate": 0.0015, + "loss": 2.7871, + "step": 1220 + }, + { + "epoch": 0.12879746835443037, + "grad_norm": 0.5756880044937134, + "learning_rate": 0.0015, + "loss": 2.8064, + "step": 1221 + }, + { + "epoch": 0.1289029535864979, + "grad_norm": 0.5650623440742493, + "learning_rate": 0.0015, + "loss": 2.8146, + "step": 1222 + }, + { + "epoch": 0.1290084388185654, + "grad_norm": 0.505327582359314, + "learning_rate": 0.0015, + "loss": 2.7904, + "step": 1223 + }, + { + "epoch": 0.1291139240506329, + "grad_norm": 0.7151728868484497, + "learning_rate": 0.0015, + "loss": 2.8166, + "step": 1224 + }, + { + "epoch": 0.12921940928270043, + "grad_norm": 0.4122193157672882, + "learning_rate": 0.0015, + "loss": 2.7701, + "step": 1225 + }, + { + "epoch": 0.12932489451476795, + "grad_norm": 0.5976579189300537, + "learning_rate": 0.0015, + "loss": 2.767, + "step": 1226 + }, + { + "epoch": 0.12943037974683544, + "grad_norm": 0.49117985367774963, + "learning_rate": 0.0015, + "loss": 2.7718, + "step": 1227 + }, + { + "epoch": 0.12953586497890296, + "grad_norm": 0.5079287886619568, + "learning_rate": 0.0015, + "loss": 2.7796, + "step": 1228 + }, + { + "epoch": 0.12964135021097045, + "grad_norm": 0.4631164073944092, + "learning_rate": 0.0015, + "loss": 2.8083, + "step": 1229 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.4257996380329132, + "learning_rate": 0.0015, + "loss": 2.7553, + "step": 1230 + }, + { + "epoch": 0.1298523206751055, + "grad_norm": 0.4839034378528595, + "learning_rate": 0.0015, + "loss": 2.7978, + "step": 1231 + }, + { + "epoch": 0.12995780590717299, + "grad_norm": 0.40911799669265747, + "learning_rate": 0.0015, + "loss": 2.8206, + "step": 1232 + }, + { + "epoch": 0.1300632911392405, + "grad_norm": 0.47710496187210083, + "learning_rate": 0.0015, + "loss": 2.7849, + "step": 1233 + }, + { + "epoch": 0.13016877637130803, + "grad_norm": 0.4517521858215332, + "learning_rate": 0.0015, + "loss": 2.8202, + "step": 1234 + }, + { + "epoch": 0.13027426160337552, + "grad_norm": 0.5095358490943909, + "learning_rate": 0.0015, + "loss": 2.8119, + "step": 1235 + }, + { + "epoch": 0.13037974683544304, + "grad_norm": 0.48434245586395264, + "learning_rate": 0.0015, + "loss": 2.7917, + "step": 1236 + }, + { + "epoch": 0.13048523206751056, + "grad_norm": 0.42411941289901733, + "learning_rate": 0.0015, + "loss": 2.7905, + "step": 1237 + }, + { + "epoch": 0.13059071729957805, + "grad_norm": 0.5286077857017517, + "learning_rate": 0.0015, + "loss": 2.7992, + "step": 1238 + }, + { + "epoch": 0.13069620253164557, + "grad_norm": 0.4496643841266632, + "learning_rate": 0.0015, + "loss": 2.7932, + "step": 1239 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.5154181122779846, + "learning_rate": 0.0015, + "loss": 2.7889, + "step": 1240 + }, + { + "epoch": 0.13090717299578059, + "grad_norm": 0.4064103066921234, + "learning_rate": 0.0015, + "loss": 2.7793, + "step": 1241 + }, + { + "epoch": 0.1310126582278481, + "grad_norm": 0.4910350739955902, + "learning_rate": 0.0015, + "loss": 2.8112, + "step": 1242 + }, + { + "epoch": 0.1311181434599156, + "grad_norm": 0.4678806662559509, + "learning_rate": 0.0015, + "loss": 2.7927, + "step": 1243 + }, + { + "epoch": 0.13122362869198312, + "grad_norm": 0.4455227553844452, + "learning_rate": 0.0015, + "loss": 2.7879, + "step": 1244 + }, + { + "epoch": 0.13132911392405064, + "grad_norm": 0.5290395021438599, + "learning_rate": 0.0015, + "loss": 2.8126, + "step": 1245 + }, + { + "epoch": 0.13143459915611813, + "grad_norm": 0.4528297781944275, + "learning_rate": 0.0015, + "loss": 2.8243, + "step": 1246 + }, + { + "epoch": 0.13154008438818565, + "grad_norm": 0.5001853108406067, + "learning_rate": 0.0015, + "loss": 2.7731, + "step": 1247 + }, + { + "epoch": 0.13164556962025317, + "grad_norm": 0.53053879737854, + "learning_rate": 0.0015, + "loss": 2.801, + "step": 1248 + }, + { + "epoch": 0.13175105485232066, + "grad_norm": 0.5342417359352112, + "learning_rate": 0.0015, + "loss": 2.8045, + "step": 1249 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.5084194540977478, + "learning_rate": 0.0015, + "loss": 2.7742, + "step": 1250 + }, + { + "epoch": 0.1319620253164557, + "grad_norm": 0.4754837155342102, + "learning_rate": 0.0015, + "loss": 2.8071, + "step": 1251 + }, + { + "epoch": 0.1320675105485232, + "grad_norm": 0.49347323179244995, + "learning_rate": 0.0015, + "loss": 2.7835, + "step": 1252 + }, + { + "epoch": 0.13217299578059072, + "grad_norm": 0.4102420508861542, + "learning_rate": 0.0015, + "loss": 2.7638, + "step": 1253 + }, + { + "epoch": 0.13227848101265824, + "grad_norm": 0.5136781334877014, + "learning_rate": 0.0015, + "loss": 2.7715, + "step": 1254 + }, + { + "epoch": 0.13238396624472573, + "grad_norm": 0.42347702383995056, + "learning_rate": 0.0015, + "loss": 2.7549, + "step": 1255 + }, + { + "epoch": 0.13248945147679325, + "grad_norm": 0.4647488594055176, + "learning_rate": 0.0015, + "loss": 2.787, + "step": 1256 + }, + { + "epoch": 0.13259493670886077, + "grad_norm": 0.49475446343421936, + "learning_rate": 0.0015, + "loss": 2.8079, + "step": 1257 + }, + { + "epoch": 0.13270042194092826, + "grad_norm": 0.37510862946510315, + "learning_rate": 0.0015, + "loss": 2.787, + "step": 1258 + }, + { + "epoch": 0.13280590717299579, + "grad_norm": 0.5195196270942688, + "learning_rate": 0.0015, + "loss": 2.798, + "step": 1259 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 0.4970589578151703, + "learning_rate": 0.0015, + "loss": 2.7932, + "step": 1260 + }, + { + "epoch": 0.1330168776371308, + "grad_norm": 0.43608129024505615, + "learning_rate": 0.0015, + "loss": 2.7954, + "step": 1261 + }, + { + "epoch": 0.13312236286919832, + "grad_norm": 0.41890910267829895, + "learning_rate": 0.0015, + "loss": 2.7629, + "step": 1262 + }, + { + "epoch": 0.1332278481012658, + "grad_norm": 0.3963739573955536, + "learning_rate": 0.0015, + "loss": 2.7537, + "step": 1263 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 0.4376368224620819, + "learning_rate": 0.0015, + "loss": 2.7598, + "step": 1264 + }, + { + "epoch": 0.13343881856540085, + "grad_norm": 0.4040215015411377, + "learning_rate": 0.0015, + "loss": 2.7806, + "step": 1265 + }, + { + "epoch": 0.13354430379746834, + "grad_norm": 0.41687384247779846, + "learning_rate": 0.0015, + "loss": 2.771, + "step": 1266 + }, + { + "epoch": 0.13364978902953586, + "grad_norm": 0.4483718276023865, + "learning_rate": 0.0015, + "loss": 2.8046, + "step": 1267 + }, + { + "epoch": 0.13375527426160339, + "grad_norm": 0.3724249303340912, + "learning_rate": 0.0015, + "loss": 2.755, + "step": 1268 + }, + { + "epoch": 0.13386075949367088, + "grad_norm": 0.4535517692565918, + "learning_rate": 0.0015, + "loss": 2.8163, + "step": 1269 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.3636702597141266, + "learning_rate": 0.0015, + "loss": 2.7712, + "step": 1270 + }, + { + "epoch": 0.13407172995780592, + "grad_norm": 0.4589552581310272, + "learning_rate": 0.0015, + "loss": 2.8164, + "step": 1271 + }, + { + "epoch": 0.1341772151898734, + "grad_norm": 0.3886342942714691, + "learning_rate": 0.0015, + "loss": 2.8375, + "step": 1272 + }, + { + "epoch": 0.13428270042194093, + "grad_norm": 0.41857093572616577, + "learning_rate": 0.0015, + "loss": 2.7808, + "step": 1273 + }, + { + "epoch": 0.13438818565400845, + "grad_norm": 0.4222840964794159, + "learning_rate": 0.0015, + "loss": 2.8093, + "step": 1274 + }, + { + "epoch": 0.13449367088607594, + "grad_norm": 0.41402825713157654, + "learning_rate": 0.0015, + "loss": 2.7822, + "step": 1275 + }, + { + "epoch": 0.13459915611814346, + "grad_norm": 0.3795495331287384, + "learning_rate": 0.0015, + "loss": 2.7734, + "step": 1276 + }, + { + "epoch": 0.13470464135021096, + "grad_norm": 0.4229728877544403, + "learning_rate": 0.0015, + "loss": 2.8051, + "step": 1277 + }, + { + "epoch": 0.13481012658227848, + "grad_norm": 0.4090929329395294, + "learning_rate": 0.0015, + "loss": 2.7656, + "step": 1278 + }, + { + "epoch": 0.134915611814346, + "grad_norm": 0.5262954831123352, + "learning_rate": 0.0015, + "loss": 2.7912, + "step": 1279 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.5005655288696289, + "learning_rate": 0.0015, + "loss": 2.748, + "step": 1280 + }, + { + "epoch": 0.135126582278481, + "grad_norm": 0.37675994634628296, + "learning_rate": 0.0015, + "loss": 2.7759, + "step": 1281 + }, + { + "epoch": 0.13523206751054853, + "grad_norm": 0.41920313239097595, + "learning_rate": 0.0015, + "loss": 2.7417, + "step": 1282 + }, + { + "epoch": 0.13533755274261602, + "grad_norm": 0.41676291823387146, + "learning_rate": 0.0015, + "loss": 2.7862, + "step": 1283 + }, + { + "epoch": 0.13544303797468354, + "grad_norm": 0.4282758831977844, + "learning_rate": 0.0015, + "loss": 2.7611, + "step": 1284 + }, + { + "epoch": 0.13554852320675106, + "grad_norm": 0.4829050898551941, + "learning_rate": 0.0015, + "loss": 2.7662, + "step": 1285 + }, + { + "epoch": 0.13565400843881856, + "grad_norm": 0.4634682238101959, + "learning_rate": 0.0015, + "loss": 2.7568, + "step": 1286 + }, + { + "epoch": 0.13575949367088608, + "grad_norm": 0.5314525365829468, + "learning_rate": 0.0015, + "loss": 2.7599, + "step": 1287 + }, + { + "epoch": 0.1358649789029536, + "grad_norm": 0.5217589139938354, + "learning_rate": 0.0015, + "loss": 2.7881, + "step": 1288 + }, + { + "epoch": 0.1359704641350211, + "grad_norm": 0.489427387714386, + "learning_rate": 0.0015, + "loss": 2.8315, + "step": 1289 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.8143189549446106, + "learning_rate": 0.0015, + "loss": 2.7883, + "step": 1290 + }, + { + "epoch": 0.13618143459915613, + "grad_norm": 0.6907418966293335, + "learning_rate": 0.0015, + "loss": 2.7837, + "step": 1291 + }, + { + "epoch": 0.13628691983122362, + "grad_norm": 0.4975929856300354, + "learning_rate": 0.0015, + "loss": 2.7659, + "step": 1292 + }, + { + "epoch": 0.13639240506329114, + "grad_norm": 0.7800167798995972, + "learning_rate": 0.0015, + "loss": 2.7824, + "step": 1293 + }, + { + "epoch": 0.13649789029535864, + "grad_norm": 0.7115898132324219, + "learning_rate": 0.0015, + "loss": 2.8005, + "step": 1294 + }, + { + "epoch": 0.13660337552742616, + "grad_norm": 0.6820878386497498, + "learning_rate": 0.0015, + "loss": 2.7277, + "step": 1295 + }, + { + "epoch": 0.13670886075949368, + "grad_norm": 0.8066052794456482, + "learning_rate": 0.0015, + "loss": 2.778, + "step": 1296 + }, + { + "epoch": 0.13681434599156117, + "grad_norm": 0.739962100982666, + "learning_rate": 0.0015, + "loss": 2.7898, + "step": 1297 + }, + { + "epoch": 0.1369198312236287, + "grad_norm": 0.7868672609329224, + "learning_rate": 0.0015, + "loss": 2.7314, + "step": 1298 + }, + { + "epoch": 0.1370253164556962, + "grad_norm": 0.6627511382102966, + "learning_rate": 0.0015, + "loss": 2.7606, + "step": 1299 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.7786152958869934, + "learning_rate": 0.0015, + "loss": 2.7835, + "step": 1300 + }, + { + "epoch": 0.13723628691983122, + "grad_norm": 0.68060302734375, + "learning_rate": 0.0015, + "loss": 2.7922, + "step": 1301 + }, + { + "epoch": 0.13734177215189874, + "grad_norm": 0.4978809058666229, + "learning_rate": 0.0015, + "loss": 2.7913, + "step": 1302 + }, + { + "epoch": 0.13744725738396624, + "grad_norm": 0.571426272392273, + "learning_rate": 0.0015, + "loss": 2.7739, + "step": 1303 + }, + { + "epoch": 0.13755274261603376, + "grad_norm": 0.47984063625335693, + "learning_rate": 0.0015, + "loss": 2.7299, + "step": 1304 + }, + { + "epoch": 0.13765822784810128, + "grad_norm": 0.4890850782394409, + "learning_rate": 0.0015, + "loss": 2.788, + "step": 1305 + }, + { + "epoch": 0.13776371308016877, + "grad_norm": 0.4210553467273712, + "learning_rate": 0.0015, + "loss": 2.7534, + "step": 1306 + }, + { + "epoch": 0.1378691983122363, + "grad_norm": 0.5724374055862427, + "learning_rate": 0.0015, + "loss": 2.771, + "step": 1307 + }, + { + "epoch": 0.1379746835443038, + "grad_norm": 0.47498148679733276, + "learning_rate": 0.0015, + "loss": 2.7937, + "step": 1308 + }, + { + "epoch": 0.1380801687763713, + "grad_norm": 0.4936680197715759, + "learning_rate": 0.0015, + "loss": 2.7469, + "step": 1309 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.6462669968605042, + "learning_rate": 0.0015, + "loss": 2.7812, + "step": 1310 + }, + { + "epoch": 0.13829113924050632, + "grad_norm": 0.48340898752212524, + "learning_rate": 0.0015, + "loss": 2.7854, + "step": 1311 + }, + { + "epoch": 0.13839662447257384, + "grad_norm": 0.5280654430389404, + "learning_rate": 0.0015, + "loss": 2.7736, + "step": 1312 + }, + { + "epoch": 0.13850210970464136, + "grad_norm": 0.6895527243614197, + "learning_rate": 0.0015, + "loss": 2.7898, + "step": 1313 + }, + { + "epoch": 0.13860759493670885, + "grad_norm": 0.8488339781761169, + "learning_rate": 0.0015, + "loss": 2.809, + "step": 1314 + }, + { + "epoch": 0.13871308016877637, + "grad_norm": 0.5635599493980408, + "learning_rate": 0.0015, + "loss": 2.7891, + "step": 1315 + }, + { + "epoch": 0.1388185654008439, + "grad_norm": 0.7376696467399597, + "learning_rate": 0.0015, + "loss": 2.7439, + "step": 1316 + }, + { + "epoch": 0.13892405063291138, + "grad_norm": 0.9510250687599182, + "learning_rate": 0.0015, + "loss": 2.7796, + "step": 1317 + }, + { + "epoch": 0.1390295358649789, + "grad_norm": 0.5468886494636536, + "learning_rate": 0.0015, + "loss": 2.7711, + "step": 1318 + }, + { + "epoch": 0.13913502109704642, + "grad_norm": 0.9575920104980469, + "learning_rate": 0.0015, + "loss": 2.7874, + "step": 1319 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.9347231388092041, + "learning_rate": 0.0015, + "loss": 2.7883, + "step": 1320 + }, + { + "epoch": 0.13934599156118144, + "grad_norm": 0.5686253309249878, + "learning_rate": 0.0015, + "loss": 2.7567, + "step": 1321 + }, + { + "epoch": 0.13945147679324896, + "grad_norm": 1.2832822799682617, + "learning_rate": 0.0015, + "loss": 2.7855, + "step": 1322 + }, + { + "epoch": 0.13955696202531645, + "grad_norm": 0.5757610201835632, + "learning_rate": 0.0015, + "loss": 2.7607, + "step": 1323 + }, + { + "epoch": 0.13966244725738397, + "grad_norm": 0.8989657759666443, + "learning_rate": 0.0015, + "loss": 2.7936, + "step": 1324 + }, + { + "epoch": 0.13976793248945146, + "grad_norm": 0.7939106822013855, + "learning_rate": 0.0015, + "loss": 2.774, + "step": 1325 + }, + { + "epoch": 0.13987341772151898, + "grad_norm": 0.5022808313369751, + "learning_rate": 0.0015, + "loss": 2.7837, + "step": 1326 + }, + { + "epoch": 0.1399789029535865, + "grad_norm": 0.6745582222938538, + "learning_rate": 0.0015, + "loss": 2.7639, + "step": 1327 + }, + { + "epoch": 0.140084388185654, + "grad_norm": 0.45836877822875977, + "learning_rate": 0.0015, + "loss": 2.7473, + "step": 1328 + }, + { + "epoch": 0.14018987341772152, + "grad_norm": 0.5717602968215942, + "learning_rate": 0.0015, + "loss": 2.7536, + "step": 1329 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.52215576171875, + "learning_rate": 0.0015, + "loss": 2.7917, + "step": 1330 + }, + { + "epoch": 0.14040084388185653, + "grad_norm": 0.4936349391937256, + "learning_rate": 0.0015, + "loss": 2.784, + "step": 1331 + }, + { + "epoch": 0.14050632911392405, + "grad_norm": 0.624698281288147, + "learning_rate": 0.0015, + "loss": 2.7726, + "step": 1332 + }, + { + "epoch": 0.14061181434599157, + "grad_norm": 0.40858766436576843, + "learning_rate": 0.0015, + "loss": 2.7503, + "step": 1333 + }, + { + "epoch": 0.14071729957805906, + "grad_norm": 0.4911259710788727, + "learning_rate": 0.0015, + "loss": 2.7644, + "step": 1334 + }, + { + "epoch": 0.14082278481012658, + "grad_norm": 0.44764450192451477, + "learning_rate": 0.0015, + "loss": 2.7576, + "step": 1335 + }, + { + "epoch": 0.1409282700421941, + "grad_norm": 0.4538794457912445, + "learning_rate": 0.0015, + "loss": 2.7686, + "step": 1336 + }, + { + "epoch": 0.1410337552742616, + "grad_norm": 0.4494558572769165, + "learning_rate": 0.0015, + "loss": 2.7679, + "step": 1337 + }, + { + "epoch": 0.14113924050632912, + "grad_norm": 0.4341670572757721, + "learning_rate": 0.0015, + "loss": 2.776, + "step": 1338 + }, + { + "epoch": 0.14124472573839664, + "grad_norm": 0.5193992853164673, + "learning_rate": 0.0015, + "loss": 2.7835, + "step": 1339 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.4005005359649658, + "learning_rate": 0.0015, + "loss": 2.7629, + "step": 1340 + }, + { + "epoch": 0.14145569620253165, + "grad_norm": 0.5381936430931091, + "learning_rate": 0.0015, + "loss": 2.7908, + "step": 1341 + }, + { + "epoch": 0.14156118143459914, + "grad_norm": 0.4718489944934845, + "learning_rate": 0.0015, + "loss": 2.801, + "step": 1342 + }, + { + "epoch": 0.14166666666666666, + "grad_norm": 0.4231264889240265, + "learning_rate": 0.0015, + "loss": 2.761, + "step": 1343 + }, + { + "epoch": 0.14177215189873418, + "grad_norm": 0.5411812663078308, + "learning_rate": 0.0015, + "loss": 2.7792, + "step": 1344 + }, + { + "epoch": 0.14187763713080168, + "grad_norm": 0.4356623888015747, + "learning_rate": 0.0015, + "loss": 2.7594, + "step": 1345 + }, + { + "epoch": 0.1419831223628692, + "grad_norm": 0.4575501084327698, + "learning_rate": 0.0015, + "loss": 2.7621, + "step": 1346 + }, + { + "epoch": 0.14208860759493672, + "grad_norm": 0.5200966000556946, + "learning_rate": 0.0015, + "loss": 2.7651, + "step": 1347 + }, + { + "epoch": 0.1421940928270042, + "grad_norm": 0.3994283676147461, + "learning_rate": 0.0015, + "loss": 2.7483, + "step": 1348 + }, + { + "epoch": 0.14229957805907173, + "grad_norm": 0.45608288049697876, + "learning_rate": 0.0015, + "loss": 2.7731, + "step": 1349 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.4300677478313446, + "learning_rate": 0.0015, + "loss": 2.7787, + "step": 1350 + }, + { + "epoch": 0.14251054852320674, + "grad_norm": 0.45228683948516846, + "learning_rate": 0.0015, + "loss": 2.7676, + "step": 1351 + }, + { + "epoch": 0.14261603375527426, + "grad_norm": 0.5377377867698669, + "learning_rate": 0.0015, + "loss": 2.7564, + "step": 1352 + }, + { + "epoch": 0.14272151898734178, + "grad_norm": 0.39975935220718384, + "learning_rate": 0.0015, + "loss": 2.7661, + "step": 1353 + }, + { + "epoch": 0.14282700421940928, + "grad_norm": 0.43251386284828186, + "learning_rate": 0.0015, + "loss": 2.7719, + "step": 1354 + }, + { + "epoch": 0.1429324894514768, + "grad_norm": 0.5352707505226135, + "learning_rate": 0.0015, + "loss": 2.737, + "step": 1355 + }, + { + "epoch": 0.14303797468354432, + "grad_norm": 0.3885495960712433, + "learning_rate": 0.0015, + "loss": 2.7414, + "step": 1356 + }, + { + "epoch": 0.1431434599156118, + "grad_norm": 0.44647231698036194, + "learning_rate": 0.0015, + "loss": 2.7674, + "step": 1357 + }, + { + "epoch": 0.14324894514767933, + "grad_norm": 0.505479633808136, + "learning_rate": 0.0015, + "loss": 2.7486, + "step": 1358 + }, + { + "epoch": 0.14335443037974682, + "grad_norm": 0.41953039169311523, + "learning_rate": 0.0015, + "loss": 2.7355, + "step": 1359 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.4313969016075134, + "learning_rate": 0.0015, + "loss": 2.7604, + "step": 1360 + }, + { + "epoch": 0.14356540084388186, + "grad_norm": 0.3910362422466278, + "learning_rate": 0.0015, + "loss": 2.7631, + "step": 1361 + }, + { + "epoch": 0.14367088607594936, + "grad_norm": 0.3847757577896118, + "learning_rate": 0.0015, + "loss": 2.7667, + "step": 1362 + }, + { + "epoch": 0.14377637130801688, + "grad_norm": 0.48768988251686096, + "learning_rate": 0.0015, + "loss": 2.7363, + "step": 1363 + }, + { + "epoch": 0.1438818565400844, + "grad_norm": 0.41084912419319153, + "learning_rate": 0.0015, + "loss": 2.7504, + "step": 1364 + }, + { + "epoch": 0.1439873417721519, + "grad_norm": 0.3830481469631195, + "learning_rate": 0.0015, + "loss": 2.7636, + "step": 1365 + }, + { + "epoch": 0.1440928270042194, + "grad_norm": 0.4857964515686035, + "learning_rate": 0.0015, + "loss": 2.7703, + "step": 1366 + }, + { + "epoch": 0.14419831223628693, + "grad_norm": 0.42920953035354614, + "learning_rate": 0.0015, + "loss": 2.7569, + "step": 1367 + }, + { + "epoch": 0.14430379746835442, + "grad_norm": 0.403693288564682, + "learning_rate": 0.0015, + "loss": 2.7601, + "step": 1368 + }, + { + "epoch": 0.14440928270042194, + "grad_norm": 0.4533381164073944, + "learning_rate": 0.0015, + "loss": 2.7583, + "step": 1369 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.42771074175834656, + "learning_rate": 0.0015, + "loss": 2.7422, + "step": 1370 + }, + { + "epoch": 0.14462025316455696, + "grad_norm": 0.43944650888442993, + "learning_rate": 0.0015, + "loss": 2.755, + "step": 1371 + }, + { + "epoch": 0.14472573839662448, + "grad_norm": 0.42012882232666016, + "learning_rate": 0.0015, + "loss": 2.7533, + "step": 1372 + }, + { + "epoch": 0.144831223628692, + "grad_norm": 0.45953890681266785, + "learning_rate": 0.0015, + "loss": 2.7469, + "step": 1373 + }, + { + "epoch": 0.1449367088607595, + "grad_norm": 0.4251090884208679, + "learning_rate": 0.0015, + "loss": 2.7189, + "step": 1374 + }, + { + "epoch": 0.145042194092827, + "grad_norm": 0.39889657497406006, + "learning_rate": 0.0015, + "loss": 2.7555, + "step": 1375 + }, + { + "epoch": 0.1451476793248945, + "grad_norm": 0.44467055797576904, + "learning_rate": 0.0015, + "loss": 2.7411, + "step": 1376 + }, + { + "epoch": 0.14525316455696202, + "grad_norm": 0.5577545762062073, + "learning_rate": 0.0015, + "loss": 2.7324, + "step": 1377 + }, + { + "epoch": 0.14535864978902954, + "grad_norm": 0.4556241035461426, + "learning_rate": 0.0015, + "loss": 2.7297, + "step": 1378 + }, + { + "epoch": 0.14546413502109704, + "grad_norm": 0.40799570083618164, + "learning_rate": 0.0015, + "loss": 2.7615, + "step": 1379 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.4562379717826843, + "learning_rate": 0.0015, + "loss": 2.7379, + "step": 1380 + }, + { + "epoch": 0.14567510548523208, + "grad_norm": 0.501980721950531, + "learning_rate": 0.0015, + "loss": 2.777, + "step": 1381 + }, + { + "epoch": 0.14578059071729957, + "grad_norm": 0.378348708152771, + "learning_rate": 0.0015, + "loss": 2.7561, + "step": 1382 + }, + { + "epoch": 0.1458860759493671, + "grad_norm": 0.44007155299186707, + "learning_rate": 0.0015, + "loss": 2.7409, + "step": 1383 + }, + { + "epoch": 0.1459915611814346, + "grad_norm": 0.4473247826099396, + "learning_rate": 0.0015, + "loss": 2.7528, + "step": 1384 + }, + { + "epoch": 0.1460970464135021, + "grad_norm": 0.3980141878128052, + "learning_rate": 0.0015, + "loss": 2.7689, + "step": 1385 + }, + { + "epoch": 0.14620253164556962, + "grad_norm": 0.4616040587425232, + "learning_rate": 0.0015, + "loss": 2.7389, + "step": 1386 + }, + { + "epoch": 0.14630801687763714, + "grad_norm": 0.5126967430114746, + "learning_rate": 0.0015, + "loss": 2.741, + "step": 1387 + }, + { + "epoch": 0.14641350210970464, + "grad_norm": 0.3777044117450714, + "learning_rate": 0.0015, + "loss": 2.7454, + "step": 1388 + }, + { + "epoch": 0.14651898734177216, + "grad_norm": 0.46159660816192627, + "learning_rate": 0.0015, + "loss": 2.7467, + "step": 1389 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.4507737457752228, + "learning_rate": 0.0015, + "loss": 2.7627, + "step": 1390 + }, + { + "epoch": 0.14672995780590717, + "grad_norm": 0.47503969073295593, + "learning_rate": 0.0015, + "loss": 2.7673, + "step": 1391 + }, + { + "epoch": 0.1468354430379747, + "grad_norm": 0.44566622376441956, + "learning_rate": 0.0015, + "loss": 2.7317, + "step": 1392 + }, + { + "epoch": 0.14694092827004218, + "grad_norm": 0.5465106964111328, + "learning_rate": 0.0015, + "loss": 2.7336, + "step": 1393 + }, + { + "epoch": 0.1470464135021097, + "grad_norm": 0.5260345935821533, + "learning_rate": 0.0015, + "loss": 2.7556, + "step": 1394 + }, + { + "epoch": 0.14715189873417722, + "grad_norm": 0.43616899847984314, + "learning_rate": 0.0015, + "loss": 2.7659, + "step": 1395 + }, + { + "epoch": 0.14725738396624471, + "grad_norm": 0.6206331253051758, + "learning_rate": 0.0015, + "loss": 2.7592, + "step": 1396 + }, + { + "epoch": 0.14736286919831224, + "grad_norm": 0.5321158170700073, + "learning_rate": 0.0015, + "loss": 2.7137, + "step": 1397 + }, + { + "epoch": 0.14746835443037976, + "grad_norm": 0.45111674070358276, + "learning_rate": 0.0015, + "loss": 2.6893, + "step": 1398 + }, + { + "epoch": 0.14757383966244725, + "grad_norm": 0.5734481811523438, + "learning_rate": 0.0015, + "loss": 2.7585, + "step": 1399 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.5147172212600708, + "learning_rate": 0.0015, + "loss": 2.7266, + "step": 1400 + }, + { + "epoch": 0.1477848101265823, + "grad_norm": 0.48464512825012207, + "learning_rate": 0.0015, + "loss": 2.7599, + "step": 1401 + }, + { + "epoch": 0.14789029535864978, + "grad_norm": 0.5902037620544434, + "learning_rate": 0.0015, + "loss": 2.7513, + "step": 1402 + }, + { + "epoch": 0.1479957805907173, + "grad_norm": 0.4255846440792084, + "learning_rate": 0.0015, + "loss": 2.7644, + "step": 1403 + }, + { + "epoch": 0.14810126582278482, + "grad_norm": 0.5917258858680725, + "learning_rate": 0.0015, + "loss": 2.7422, + "step": 1404 + }, + { + "epoch": 0.14820675105485231, + "grad_norm": 0.5758952498435974, + "learning_rate": 0.0015, + "loss": 2.7309, + "step": 1405 + }, + { + "epoch": 0.14831223628691984, + "grad_norm": 0.4550987482070923, + "learning_rate": 0.0015, + "loss": 2.733, + "step": 1406 + }, + { + "epoch": 0.14841772151898736, + "grad_norm": 0.8086510896682739, + "learning_rate": 0.0015, + "loss": 2.7038, + "step": 1407 + }, + { + "epoch": 0.14852320675105485, + "grad_norm": 0.7425371408462524, + "learning_rate": 0.0015, + "loss": 2.7526, + "step": 1408 + }, + { + "epoch": 0.14862869198312237, + "grad_norm": 0.542976975440979, + "learning_rate": 0.0015, + "loss": 2.7725, + "step": 1409 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 1.1368170976638794, + "learning_rate": 0.0015, + "loss": 2.7696, + "step": 1410 + }, + { + "epoch": 0.14883966244725738, + "grad_norm": 0.6707020998001099, + "learning_rate": 0.0015, + "loss": 2.7708, + "step": 1411 + }, + { + "epoch": 0.1489451476793249, + "grad_norm": 0.7109842300415039, + "learning_rate": 0.0015, + "loss": 2.7465, + "step": 1412 + }, + { + "epoch": 0.1490506329113924, + "grad_norm": 1.1766566038131714, + "learning_rate": 0.0015, + "loss": 2.7649, + "step": 1413 + }, + { + "epoch": 0.14915611814345991, + "grad_norm": 0.5334993600845337, + "learning_rate": 0.0015, + "loss": 2.7629, + "step": 1414 + }, + { + "epoch": 0.14926160337552744, + "grad_norm": 0.705773115158081, + "learning_rate": 0.0015, + "loss": 2.7249, + "step": 1415 + }, + { + "epoch": 0.14936708860759493, + "grad_norm": 0.5396654009819031, + "learning_rate": 0.0015, + "loss": 2.7668, + "step": 1416 + }, + { + "epoch": 0.14947257383966245, + "grad_norm": 0.5358840823173523, + "learning_rate": 0.0015, + "loss": 2.7376, + "step": 1417 + }, + { + "epoch": 0.14957805907172997, + "grad_norm": 0.5978990197181702, + "learning_rate": 0.0015, + "loss": 2.7368, + "step": 1418 + }, + { + "epoch": 0.14968354430379746, + "grad_norm": 0.40801018476486206, + "learning_rate": 0.0015, + "loss": 2.754, + "step": 1419 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.5140160322189331, + "learning_rate": 0.0015, + "loss": 2.7388, + "step": 1420 + }, + { + "epoch": 0.1498945147679325, + "grad_norm": 0.4281938672065735, + "learning_rate": 0.0015, + "loss": 2.7603, + "step": 1421 + }, + { + "epoch": 0.15, + "grad_norm": 0.4290008246898651, + "learning_rate": 0.0015, + "loss": 2.7316, + "step": 1422 + }, + { + "epoch": 0.15010548523206751, + "grad_norm": 0.4608164131641388, + "learning_rate": 0.0015, + "loss": 2.7405, + "step": 1423 + }, + { + "epoch": 0.150210970464135, + "grad_norm": 0.4408256709575653, + "learning_rate": 0.0015, + "loss": 2.7412, + "step": 1424 + }, + { + "epoch": 0.15031645569620253, + "grad_norm": 0.43383872509002686, + "learning_rate": 0.0015, + "loss": 2.7335, + "step": 1425 + }, + { + "epoch": 0.15042194092827005, + "grad_norm": 0.4543629288673401, + "learning_rate": 0.0015, + "loss": 2.7531, + "step": 1426 + }, + { + "epoch": 0.15052742616033754, + "grad_norm": 0.4168683886528015, + "learning_rate": 0.0015, + "loss": 2.7645, + "step": 1427 + }, + { + "epoch": 0.15063291139240506, + "grad_norm": 0.46285122632980347, + "learning_rate": 0.0015, + "loss": 2.7423, + "step": 1428 + }, + { + "epoch": 0.15073839662447258, + "grad_norm": 0.43665531277656555, + "learning_rate": 0.0015, + "loss": 2.7193, + "step": 1429 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.44647476077079773, + "learning_rate": 0.0015, + "loss": 2.7342, + "step": 1430 + }, + { + "epoch": 0.1509493670886076, + "grad_norm": 0.45548927783966064, + "learning_rate": 0.0015, + "loss": 2.749, + "step": 1431 + }, + { + "epoch": 0.15105485232067511, + "grad_norm": 0.4477362036705017, + "learning_rate": 0.0015, + "loss": 2.7437, + "step": 1432 + }, + { + "epoch": 0.1511603375527426, + "grad_norm": 0.4201948344707489, + "learning_rate": 0.0015, + "loss": 2.7008, + "step": 1433 + }, + { + "epoch": 0.15126582278481013, + "grad_norm": 0.4154314696788788, + "learning_rate": 0.0015, + "loss": 2.7324, + "step": 1434 + }, + { + "epoch": 0.15137130801687765, + "grad_norm": 0.48551565408706665, + "learning_rate": 0.0015, + "loss": 2.7619, + "step": 1435 + }, + { + "epoch": 0.15147679324894514, + "grad_norm": 0.3917662501335144, + "learning_rate": 0.0015, + "loss": 2.7241, + "step": 1436 + }, + { + "epoch": 0.15158227848101266, + "grad_norm": 0.46196818351745605, + "learning_rate": 0.0015, + "loss": 2.7497, + "step": 1437 + }, + { + "epoch": 0.15168776371308018, + "grad_norm": 0.4083606004714966, + "learning_rate": 0.0015, + "loss": 2.7032, + "step": 1438 + }, + { + "epoch": 0.15179324894514767, + "grad_norm": 0.49610573053359985, + "learning_rate": 0.0015, + "loss": 2.728, + "step": 1439 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.5978586673736572, + "learning_rate": 0.0015, + "loss": 2.73, + "step": 1440 + }, + { + "epoch": 0.1520042194092827, + "grad_norm": 0.4742000699043274, + "learning_rate": 0.0015, + "loss": 2.7006, + "step": 1441 + }, + { + "epoch": 0.1521097046413502, + "grad_norm": 0.6701253652572632, + "learning_rate": 0.0015, + "loss": 2.7423, + "step": 1442 + }, + { + "epoch": 0.15221518987341773, + "grad_norm": 0.6949282884597778, + "learning_rate": 0.0015, + "loss": 2.7312, + "step": 1443 + }, + { + "epoch": 0.15232067510548522, + "grad_norm": 0.4704185724258423, + "learning_rate": 0.0015, + "loss": 2.7367, + "step": 1444 + }, + { + "epoch": 0.15242616033755274, + "grad_norm": 0.775181233882904, + "learning_rate": 0.0015, + "loss": 2.7354, + "step": 1445 + }, + { + "epoch": 0.15253164556962026, + "grad_norm": 0.71864253282547, + "learning_rate": 0.0015, + "loss": 2.751, + "step": 1446 + }, + { + "epoch": 0.15263713080168775, + "grad_norm": 0.6914138197898865, + "learning_rate": 0.0015, + "loss": 2.7558, + "step": 1447 + }, + { + "epoch": 0.15274261603375527, + "grad_norm": 0.7028330564498901, + "learning_rate": 0.0015, + "loss": 2.7191, + "step": 1448 + }, + { + "epoch": 0.1528481012658228, + "grad_norm": 0.5699003338813782, + "learning_rate": 0.0015, + "loss": 2.7586, + "step": 1449 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.665145218372345, + "learning_rate": 0.0015, + "loss": 2.7437, + "step": 1450 + }, + { + "epoch": 0.1530590717299578, + "grad_norm": 0.42447224259376526, + "learning_rate": 0.0015, + "loss": 2.731, + "step": 1451 + }, + { + "epoch": 0.15316455696202533, + "grad_norm": 0.5718211531639099, + "learning_rate": 0.0015, + "loss": 2.7372, + "step": 1452 + }, + { + "epoch": 0.15327004219409282, + "grad_norm": 0.46767526865005493, + "learning_rate": 0.0015, + "loss": 2.7095, + "step": 1453 + }, + { + "epoch": 0.15337552742616034, + "grad_norm": 0.5185298323631287, + "learning_rate": 0.0015, + "loss": 2.7176, + "step": 1454 + }, + { + "epoch": 0.15348101265822786, + "grad_norm": 0.5544909834861755, + "learning_rate": 0.0015, + "loss": 2.7176, + "step": 1455 + }, + { + "epoch": 0.15358649789029535, + "grad_norm": 0.45130449533462524, + "learning_rate": 0.0015, + "loss": 2.7602, + "step": 1456 + }, + { + "epoch": 0.15369198312236287, + "grad_norm": 0.5703650712966919, + "learning_rate": 0.0015, + "loss": 2.7171, + "step": 1457 + }, + { + "epoch": 0.15379746835443037, + "grad_norm": 0.41374439001083374, + "learning_rate": 0.0015, + "loss": 2.7415, + "step": 1458 + }, + { + "epoch": 0.1539029535864979, + "grad_norm": 0.5500354170799255, + "learning_rate": 0.0015, + "loss": 2.7428, + "step": 1459 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.3661830723285675, + "learning_rate": 0.0015, + "loss": 2.753, + "step": 1460 + }, + { + "epoch": 0.1541139240506329, + "grad_norm": 0.4670107066631317, + "learning_rate": 0.0015, + "loss": 2.7436, + "step": 1461 + }, + { + "epoch": 0.15421940928270042, + "grad_norm": 0.4150688052177429, + "learning_rate": 0.0015, + "loss": 2.6831, + "step": 1462 + }, + { + "epoch": 0.15432489451476794, + "grad_norm": 0.417240709066391, + "learning_rate": 0.0015, + "loss": 2.7118, + "step": 1463 + }, + { + "epoch": 0.15443037974683543, + "grad_norm": 0.5359029173851013, + "learning_rate": 0.0015, + "loss": 2.7238, + "step": 1464 + }, + { + "epoch": 0.15453586497890295, + "grad_norm": 0.43162423372268677, + "learning_rate": 0.0015, + "loss": 2.7188, + "step": 1465 + }, + { + "epoch": 0.15464135021097047, + "grad_norm": 0.5503636002540588, + "learning_rate": 0.0015, + "loss": 2.7129, + "step": 1466 + }, + { + "epoch": 0.15474683544303797, + "grad_norm": 0.5036489367485046, + "learning_rate": 0.0015, + "loss": 2.7715, + "step": 1467 + }, + { + "epoch": 0.1548523206751055, + "grad_norm": 0.519731879234314, + "learning_rate": 0.0015, + "loss": 2.7407, + "step": 1468 + }, + { + "epoch": 0.154957805907173, + "grad_norm": 0.573375940322876, + "learning_rate": 0.0015, + "loss": 2.7313, + "step": 1469 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.5914857983589172, + "learning_rate": 0.0015, + "loss": 2.7303, + "step": 1470 + }, + { + "epoch": 0.15516877637130802, + "grad_norm": 0.5116535425186157, + "learning_rate": 0.0015, + "loss": 2.7261, + "step": 1471 + }, + { + "epoch": 0.15527426160337554, + "grad_norm": 0.4899287819862366, + "learning_rate": 0.0015, + "loss": 2.7227, + "step": 1472 + }, + { + "epoch": 0.15537974683544303, + "grad_norm": 0.5329233407974243, + "learning_rate": 0.0015, + "loss": 2.7327, + "step": 1473 + }, + { + "epoch": 0.15548523206751055, + "grad_norm": 0.37282443046569824, + "learning_rate": 0.0015, + "loss": 2.704, + "step": 1474 + }, + { + "epoch": 0.15559071729957805, + "grad_norm": 0.48982855677604675, + "learning_rate": 0.0015, + "loss": 2.7279, + "step": 1475 + }, + { + "epoch": 0.15569620253164557, + "grad_norm": 0.4571092128753662, + "learning_rate": 0.0015, + "loss": 2.7349, + "step": 1476 + }, + { + "epoch": 0.1558016877637131, + "grad_norm": 0.4274985194206238, + "learning_rate": 0.0015, + "loss": 2.7323, + "step": 1477 + }, + { + "epoch": 0.15590717299578058, + "grad_norm": 0.46729883551597595, + "learning_rate": 0.0015, + "loss": 2.7329, + "step": 1478 + }, + { + "epoch": 0.1560126582278481, + "grad_norm": 0.4234873056411743, + "learning_rate": 0.0015, + "loss": 2.7515, + "step": 1479 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.5081548094749451, + "learning_rate": 0.0015, + "loss": 2.6837, + "step": 1480 + }, + { + "epoch": 0.1562236286919831, + "grad_norm": 0.4749017059803009, + "learning_rate": 0.0015, + "loss": 2.7312, + "step": 1481 + }, + { + "epoch": 0.15632911392405063, + "grad_norm": 0.5300248265266418, + "learning_rate": 0.0015, + "loss": 2.7338, + "step": 1482 + }, + { + "epoch": 0.15643459915611815, + "grad_norm": 0.472893089056015, + "learning_rate": 0.0015, + "loss": 2.7242, + "step": 1483 + }, + { + "epoch": 0.15654008438818565, + "grad_norm": 0.5770610570907593, + "learning_rate": 0.0015, + "loss": 2.7382, + "step": 1484 + }, + { + "epoch": 0.15664556962025317, + "grad_norm": 0.57819664478302, + "learning_rate": 0.0015, + "loss": 2.7204, + "step": 1485 + }, + { + "epoch": 0.1567510548523207, + "grad_norm": 0.5219398736953735, + "learning_rate": 0.0015, + "loss": 2.7406, + "step": 1486 + }, + { + "epoch": 0.15685654008438818, + "grad_norm": 0.4309442639350891, + "learning_rate": 0.0015, + "loss": 2.6832, + "step": 1487 + }, + { + "epoch": 0.1569620253164557, + "grad_norm": 0.4405316114425659, + "learning_rate": 0.0015, + "loss": 2.7294, + "step": 1488 + }, + { + "epoch": 0.15706751054852322, + "grad_norm": 0.45265907049179077, + "learning_rate": 0.0015, + "loss": 2.7043, + "step": 1489 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.5205717086791992, + "learning_rate": 0.0015, + "loss": 2.7173, + "step": 1490 + }, + { + "epoch": 0.15727848101265823, + "grad_norm": 0.4210909307003021, + "learning_rate": 0.0015, + "loss": 2.7124, + "step": 1491 + }, + { + "epoch": 0.15738396624472573, + "grad_norm": 0.45223885774612427, + "learning_rate": 0.0015, + "loss": 2.7217, + "step": 1492 + }, + { + "epoch": 0.15748945147679325, + "grad_norm": 0.45459744334220886, + "learning_rate": 0.0015, + "loss": 2.7152, + "step": 1493 + }, + { + "epoch": 0.15759493670886077, + "grad_norm": 0.49764934182167053, + "learning_rate": 0.0015, + "loss": 2.7149, + "step": 1494 + }, + { + "epoch": 0.15770042194092826, + "grad_norm": 0.4169052541255951, + "learning_rate": 0.0015, + "loss": 2.6775, + "step": 1495 + }, + { + "epoch": 0.15780590717299578, + "grad_norm": 0.524732768535614, + "learning_rate": 0.0015, + "loss": 2.7562, + "step": 1496 + }, + { + "epoch": 0.1579113924050633, + "grad_norm": 0.4388597011566162, + "learning_rate": 0.0015, + "loss": 2.7384, + "step": 1497 + }, + { + "epoch": 0.1580168776371308, + "grad_norm": 0.5187046527862549, + "learning_rate": 0.0015, + "loss": 2.7305, + "step": 1498 + }, + { + "epoch": 0.1581223628691983, + "grad_norm": 0.4088917374610901, + "learning_rate": 0.0015, + "loss": 2.7593, + "step": 1499 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.4559454917907715, + "learning_rate": 0.0015, + "loss": 2.7357, + "step": 1500 + }, + { + "epoch": 0.15833333333333333, + "grad_norm": 0.42568960785865784, + "learning_rate": 0.0015, + "loss": 2.7285, + "step": 1501 + }, + { + "epoch": 0.15843881856540085, + "grad_norm": 0.4221448004245758, + "learning_rate": 0.0015, + "loss": 2.7064, + "step": 1502 + }, + { + "epoch": 0.15854430379746837, + "grad_norm": 0.3667161166667938, + "learning_rate": 0.0015, + "loss": 2.7083, + "step": 1503 + }, + { + "epoch": 0.15864978902953586, + "grad_norm": 0.49551084637641907, + "learning_rate": 0.0015, + "loss": 2.7504, + "step": 1504 + }, + { + "epoch": 0.15875527426160338, + "grad_norm": 0.48652705550193787, + "learning_rate": 0.0015, + "loss": 2.7037, + "step": 1505 + }, + { + "epoch": 0.15886075949367087, + "grad_norm": 0.48890116810798645, + "learning_rate": 0.0015, + "loss": 2.7206, + "step": 1506 + }, + { + "epoch": 0.1589662447257384, + "grad_norm": 0.4936200976371765, + "learning_rate": 0.0015, + "loss": 2.7558, + "step": 1507 + }, + { + "epoch": 0.1590717299578059, + "grad_norm": 0.4912688136100769, + "learning_rate": 0.0015, + "loss": 2.7046, + "step": 1508 + }, + { + "epoch": 0.1591772151898734, + "grad_norm": 0.5503796339035034, + "learning_rate": 0.0015, + "loss": 2.7192, + "step": 1509 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.507095217704773, + "learning_rate": 0.0015, + "loss": 2.7151, + "step": 1510 + }, + { + "epoch": 0.15938818565400845, + "grad_norm": 0.43129271268844604, + "learning_rate": 0.0015, + "loss": 2.7103, + "step": 1511 + }, + { + "epoch": 0.15949367088607594, + "grad_norm": 0.5424764752388, + "learning_rate": 0.0015, + "loss": 2.7366, + "step": 1512 + }, + { + "epoch": 0.15959915611814346, + "grad_norm": 0.5641195178031921, + "learning_rate": 0.0015, + "loss": 2.7468, + "step": 1513 + }, + { + "epoch": 0.15970464135021098, + "grad_norm": 0.42085540294647217, + "learning_rate": 0.0015, + "loss": 2.6989, + "step": 1514 + }, + { + "epoch": 0.15981012658227847, + "grad_norm": 0.5431761741638184, + "learning_rate": 0.0015, + "loss": 2.7136, + "step": 1515 + }, + { + "epoch": 0.159915611814346, + "grad_norm": 0.702133297920227, + "learning_rate": 0.0015, + "loss": 2.7297, + "step": 1516 + }, + { + "epoch": 0.1600210970464135, + "grad_norm": 0.5589029788970947, + "learning_rate": 0.0015, + "loss": 2.7177, + "step": 1517 + }, + { + "epoch": 0.160126582278481, + "grad_norm": 0.5911984443664551, + "learning_rate": 0.0015, + "loss": 2.709, + "step": 1518 + }, + { + "epoch": 0.16023206751054853, + "grad_norm": 0.6510465741157532, + "learning_rate": 0.0015, + "loss": 2.7325, + "step": 1519 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.46753764152526855, + "learning_rate": 0.0015, + "loss": 2.712, + "step": 1520 + }, + { + "epoch": 0.16044303797468354, + "grad_norm": 0.5507661700248718, + "learning_rate": 0.0015, + "loss": 2.7358, + "step": 1521 + }, + { + "epoch": 0.16054852320675106, + "grad_norm": 0.6265037059783936, + "learning_rate": 0.0015, + "loss": 2.7122, + "step": 1522 + }, + { + "epoch": 0.16065400843881855, + "grad_norm": 0.40747615694999695, + "learning_rate": 0.0015, + "loss": 2.6866, + "step": 1523 + }, + { + "epoch": 0.16075949367088607, + "grad_norm": 0.5122784376144409, + "learning_rate": 0.0015, + "loss": 2.7238, + "step": 1524 + }, + { + "epoch": 0.1608649789029536, + "grad_norm": 0.552236020565033, + "learning_rate": 0.0015, + "loss": 2.7001, + "step": 1525 + }, + { + "epoch": 0.16097046413502109, + "grad_norm": 0.4184838533401489, + "learning_rate": 0.0015, + "loss": 2.6489, + "step": 1526 + }, + { + "epoch": 0.1610759493670886, + "grad_norm": 0.489470899105072, + "learning_rate": 0.0015, + "loss": 2.7264, + "step": 1527 + }, + { + "epoch": 0.16118143459915613, + "grad_norm": 0.5090662240982056, + "learning_rate": 0.0015, + "loss": 2.6693, + "step": 1528 + }, + { + "epoch": 0.16128691983122362, + "grad_norm": 0.5724756121635437, + "learning_rate": 0.0015, + "loss": 2.7285, + "step": 1529 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.5564016103744507, + "learning_rate": 0.0015, + "loss": 2.6917, + "step": 1530 + }, + { + "epoch": 0.16149789029535866, + "grad_norm": 0.4978943467140198, + "learning_rate": 0.0015, + "loss": 2.6792, + "step": 1531 + }, + { + "epoch": 0.16160337552742615, + "grad_norm": 0.655291736125946, + "learning_rate": 0.0015, + "loss": 2.6859, + "step": 1532 + }, + { + "epoch": 0.16170886075949367, + "grad_norm": 0.6296129822731018, + "learning_rate": 0.0015, + "loss": 2.7084, + "step": 1533 + }, + { + "epoch": 0.1618143459915612, + "grad_norm": 0.5535954833030701, + "learning_rate": 0.0015, + "loss": 2.6862, + "step": 1534 + }, + { + "epoch": 0.16191983122362869, + "grad_norm": 1.0580259561538696, + "learning_rate": 0.0015, + "loss": 2.7421, + "step": 1535 + }, + { + "epoch": 0.1620253164556962, + "grad_norm": 0.8065771460533142, + "learning_rate": 0.0015, + "loss": 2.684, + "step": 1536 + }, + { + "epoch": 0.16213080168776373, + "grad_norm": 0.5887819528579712, + "learning_rate": 0.0015, + "loss": 2.6814, + "step": 1537 + }, + { + "epoch": 0.16223628691983122, + "grad_norm": 1.2025620937347412, + "learning_rate": 0.0015, + "loss": 2.7166, + "step": 1538 + }, + { + "epoch": 0.16234177215189874, + "grad_norm": 0.4360479414463043, + "learning_rate": 0.0015, + "loss": 2.7134, + "step": 1539 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.6372429132461548, + "learning_rate": 0.0015, + "loss": 2.7227, + "step": 1540 + }, + { + "epoch": 0.16255274261603375, + "grad_norm": 0.48028409481048584, + "learning_rate": 0.0015, + "loss": 2.703, + "step": 1541 + }, + { + "epoch": 0.16265822784810127, + "grad_norm": 0.5997057557106018, + "learning_rate": 0.0015, + "loss": 2.6806, + "step": 1542 + }, + { + "epoch": 0.16276371308016876, + "grad_norm": 0.6320812106132507, + "learning_rate": 0.0015, + "loss": 2.7133, + "step": 1543 + }, + { + "epoch": 0.16286919831223629, + "grad_norm": 0.523780882358551, + "learning_rate": 0.0015, + "loss": 2.7191, + "step": 1544 + }, + { + "epoch": 0.1629746835443038, + "grad_norm": 0.766128659248352, + "learning_rate": 0.0015, + "loss": 2.701, + "step": 1545 + }, + { + "epoch": 0.1630801687763713, + "grad_norm": 0.4976692795753479, + "learning_rate": 0.0015, + "loss": 2.6842, + "step": 1546 + }, + { + "epoch": 0.16318565400843882, + "grad_norm": 0.506844162940979, + "learning_rate": 0.0015, + "loss": 2.6921, + "step": 1547 + }, + { + "epoch": 0.16329113924050634, + "grad_norm": 0.5263179540634155, + "learning_rate": 0.0015, + "loss": 2.7342, + "step": 1548 + }, + { + "epoch": 0.16339662447257383, + "grad_norm": 0.48113420605659485, + "learning_rate": 0.0015, + "loss": 2.7169, + "step": 1549 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.48666515946388245, + "learning_rate": 0.0015, + "loss": 2.6701, + "step": 1550 + }, + { + "epoch": 0.16360759493670887, + "grad_norm": 0.42556771636009216, + "learning_rate": 0.0015, + "loss": 2.7085, + "step": 1551 + }, + { + "epoch": 0.16371308016877636, + "grad_norm": 0.5118756890296936, + "learning_rate": 0.0015, + "loss": 2.7138, + "step": 1552 + }, + { + "epoch": 0.16381856540084389, + "grad_norm": 0.45778968930244446, + "learning_rate": 0.0015, + "loss": 2.7605, + "step": 1553 + }, + { + "epoch": 0.1639240506329114, + "grad_norm": 0.4645363390445709, + "learning_rate": 0.0015, + "loss": 2.6737, + "step": 1554 + }, + { + "epoch": 0.1640295358649789, + "grad_norm": 0.4798252582550049, + "learning_rate": 0.0015, + "loss": 2.7053, + "step": 1555 + }, + { + "epoch": 0.16413502109704642, + "grad_norm": 0.46691465377807617, + "learning_rate": 0.0015, + "loss": 2.698, + "step": 1556 + }, + { + "epoch": 0.1642405063291139, + "grad_norm": 0.5108565092086792, + "learning_rate": 0.0015, + "loss": 2.7344, + "step": 1557 + }, + { + "epoch": 0.16434599156118143, + "grad_norm": 0.6091669201850891, + "learning_rate": 0.0015, + "loss": 2.6827, + "step": 1558 + }, + { + "epoch": 0.16445147679324895, + "grad_norm": 0.4376429617404938, + "learning_rate": 0.0015, + "loss": 2.6687, + "step": 1559 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.478695809841156, + "learning_rate": 0.0015, + "loss": 2.6913, + "step": 1560 + }, + { + "epoch": 0.16466244725738396, + "grad_norm": 0.47615325450897217, + "learning_rate": 0.0015, + "loss": 2.6529, + "step": 1561 + }, + { + "epoch": 0.16476793248945149, + "grad_norm": 0.4306524991989136, + "learning_rate": 0.0015, + "loss": 2.7266, + "step": 1562 + }, + { + "epoch": 0.16487341772151898, + "grad_norm": 0.44654878973960876, + "learning_rate": 0.0015, + "loss": 2.6944, + "step": 1563 + }, + { + "epoch": 0.1649789029535865, + "grad_norm": 0.44473880529403687, + "learning_rate": 0.0015, + "loss": 2.6794, + "step": 1564 + }, + { + "epoch": 0.16508438818565402, + "grad_norm": 0.5647839307785034, + "learning_rate": 0.0015, + "loss": 2.6579, + "step": 1565 + }, + { + "epoch": 0.1651898734177215, + "grad_norm": 0.4367271065711975, + "learning_rate": 0.0015, + "loss": 2.7015, + "step": 1566 + }, + { + "epoch": 0.16529535864978903, + "grad_norm": 0.4928111433982849, + "learning_rate": 0.0015, + "loss": 2.6995, + "step": 1567 + }, + { + "epoch": 0.16540084388185655, + "grad_norm": 0.526547908782959, + "learning_rate": 0.0015, + "loss": 2.75, + "step": 1568 + }, + { + "epoch": 0.16550632911392404, + "grad_norm": 0.49563658237457275, + "learning_rate": 0.0015, + "loss": 2.6756, + "step": 1569 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.44986793398857117, + "learning_rate": 0.0015, + "loss": 2.6952, + "step": 1570 + }, + { + "epoch": 0.16571729957805909, + "grad_norm": 0.5243343710899353, + "learning_rate": 0.0015, + "loss": 2.6978, + "step": 1571 + }, + { + "epoch": 0.16582278481012658, + "grad_norm": 0.4728015661239624, + "learning_rate": 0.0015, + "loss": 2.7023, + "step": 1572 + }, + { + "epoch": 0.1659282700421941, + "grad_norm": 0.5276522636413574, + "learning_rate": 0.0015, + "loss": 2.7059, + "step": 1573 + }, + { + "epoch": 0.1660337552742616, + "grad_norm": 0.6432439684867859, + "learning_rate": 0.0015, + "loss": 2.661, + "step": 1574 + }, + { + "epoch": 0.1661392405063291, + "grad_norm": 0.45691221952438354, + "learning_rate": 0.0015, + "loss": 2.6965, + "step": 1575 + }, + { + "epoch": 0.16624472573839663, + "grad_norm": 0.5476827621459961, + "learning_rate": 0.0015, + "loss": 2.7142, + "step": 1576 + }, + { + "epoch": 0.16635021097046412, + "grad_norm": 0.5086904764175415, + "learning_rate": 0.0015, + "loss": 2.7113, + "step": 1577 + }, + { + "epoch": 0.16645569620253164, + "grad_norm": 0.47672203183174133, + "learning_rate": 0.0015, + "loss": 2.6859, + "step": 1578 + }, + { + "epoch": 0.16656118143459916, + "grad_norm": 0.6001810431480408, + "learning_rate": 0.0015, + "loss": 2.7062, + "step": 1579 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.4118253290653229, + "learning_rate": 0.0015, + "loss": 2.7049, + "step": 1580 + }, + { + "epoch": 0.16677215189873418, + "grad_norm": 0.6295565962791443, + "learning_rate": 0.0015, + "loss": 2.717, + "step": 1581 + }, + { + "epoch": 0.1668776371308017, + "grad_norm": 0.45562487840652466, + "learning_rate": 0.0015, + "loss": 2.7008, + "step": 1582 + }, + { + "epoch": 0.1669831223628692, + "grad_norm": 0.5487342476844788, + "learning_rate": 0.0015, + "loss": 2.6839, + "step": 1583 + }, + { + "epoch": 0.1670886075949367, + "grad_norm": 0.5414087772369385, + "learning_rate": 0.0015, + "loss": 2.6559, + "step": 1584 + }, + { + "epoch": 0.16719409282700423, + "grad_norm": 0.44960981607437134, + "learning_rate": 0.0015, + "loss": 2.7286, + "step": 1585 + }, + { + "epoch": 0.16729957805907172, + "grad_norm": 0.7316034436225891, + "learning_rate": 0.0015, + "loss": 2.7174, + "step": 1586 + }, + { + "epoch": 0.16740506329113924, + "grad_norm": 0.6532284617424011, + "learning_rate": 0.0015, + "loss": 2.6814, + "step": 1587 + }, + { + "epoch": 0.16751054852320676, + "grad_norm": 0.5324395895004272, + "learning_rate": 0.0015, + "loss": 2.6849, + "step": 1588 + }, + { + "epoch": 0.16761603375527426, + "grad_norm": 0.7528373003005981, + "learning_rate": 0.0015, + "loss": 2.7134, + "step": 1589 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.6701654195785522, + "learning_rate": 0.0015, + "loss": 2.6773, + "step": 1590 + }, + { + "epoch": 0.16782700421940927, + "grad_norm": 0.6753311157226562, + "learning_rate": 0.0015, + "loss": 2.6884, + "step": 1591 + }, + { + "epoch": 0.1679324894514768, + "grad_norm": 0.6951602101325989, + "learning_rate": 0.0015, + "loss": 2.6638, + "step": 1592 + }, + { + "epoch": 0.1680379746835443, + "grad_norm": 0.5746335387229919, + "learning_rate": 0.0015, + "loss": 2.7162, + "step": 1593 + }, + { + "epoch": 0.1681434599156118, + "grad_norm": 0.6366451382637024, + "learning_rate": 0.0015, + "loss": 2.6787, + "step": 1594 + }, + { + "epoch": 0.16824894514767932, + "grad_norm": 0.7181317806243896, + "learning_rate": 0.0015, + "loss": 2.7244, + "step": 1595 + }, + { + "epoch": 0.16835443037974684, + "grad_norm": 0.6119691729545593, + "learning_rate": 0.0015, + "loss": 2.7059, + "step": 1596 + }, + { + "epoch": 0.16845991561181434, + "grad_norm": 0.5841014385223389, + "learning_rate": 0.0015, + "loss": 2.6946, + "step": 1597 + }, + { + "epoch": 0.16856540084388186, + "grad_norm": 0.824434220790863, + "learning_rate": 0.0015, + "loss": 2.7111, + "step": 1598 + }, + { + "epoch": 0.16867088607594938, + "grad_norm": 0.6831082105636597, + "learning_rate": 0.0015, + "loss": 2.6991, + "step": 1599 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.5571727156639099, + "learning_rate": 0.0015, + "loss": 2.73, + "step": 1600 + }, + { + "epoch": 0.1688818565400844, + "grad_norm": 0.8566171526908875, + "learning_rate": 0.0015, + "loss": 2.697, + "step": 1601 + }, + { + "epoch": 0.1689873417721519, + "grad_norm": 0.5415863394737244, + "learning_rate": 0.0015, + "loss": 2.7064, + "step": 1602 + }, + { + "epoch": 0.1690928270042194, + "grad_norm": 0.5697672367095947, + "learning_rate": 0.0015, + "loss": 2.7145, + "step": 1603 + }, + { + "epoch": 0.16919831223628692, + "grad_norm": 0.6877610087394714, + "learning_rate": 0.0015, + "loss": 2.6689, + "step": 1604 + }, + { + "epoch": 0.16930379746835442, + "grad_norm": 0.47764232754707336, + "learning_rate": 0.0015, + "loss": 2.6622, + "step": 1605 + }, + { + "epoch": 0.16940928270042194, + "grad_norm": 0.5416072010993958, + "learning_rate": 0.0015, + "loss": 2.7005, + "step": 1606 + }, + { + "epoch": 0.16951476793248946, + "grad_norm": 0.46019652485847473, + "learning_rate": 0.0015, + "loss": 2.7211, + "step": 1607 + }, + { + "epoch": 0.16962025316455695, + "grad_norm": 0.46187588572502136, + "learning_rate": 0.0015, + "loss": 2.6707, + "step": 1608 + }, + { + "epoch": 0.16972573839662447, + "grad_norm": 0.44410428404808044, + "learning_rate": 0.0015, + "loss": 2.6954, + "step": 1609 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.3880448043346405, + "learning_rate": 0.0015, + "loss": 2.7075, + "step": 1610 + }, + { + "epoch": 0.16993670886075948, + "grad_norm": 0.5183799266815186, + "learning_rate": 0.0015, + "loss": 2.6604, + "step": 1611 + }, + { + "epoch": 0.170042194092827, + "grad_norm": 0.3754582107067108, + "learning_rate": 0.0015, + "loss": 2.6434, + "step": 1612 + }, + { + "epoch": 0.17014767932489452, + "grad_norm": 0.44736260175704956, + "learning_rate": 0.0015, + "loss": 2.6579, + "step": 1613 + }, + { + "epoch": 0.17025316455696202, + "grad_norm": 0.4734921455383301, + "learning_rate": 0.0015, + "loss": 2.6896, + "step": 1614 + }, + { + "epoch": 0.17035864978902954, + "grad_norm": 0.43359291553497314, + "learning_rate": 0.0015, + "loss": 2.6728, + "step": 1615 + }, + { + "epoch": 0.17046413502109706, + "grad_norm": 0.4890199899673462, + "learning_rate": 0.0015, + "loss": 2.6828, + "step": 1616 + }, + { + "epoch": 0.17056962025316455, + "grad_norm": 0.4323119819164276, + "learning_rate": 0.0015, + "loss": 2.6629, + "step": 1617 + }, + { + "epoch": 0.17067510548523207, + "grad_norm": 0.48427316546440125, + "learning_rate": 0.0015, + "loss": 2.6694, + "step": 1618 + }, + { + "epoch": 0.1707805907172996, + "grad_norm": 0.4372445046901703, + "learning_rate": 0.0015, + "loss": 2.6903, + "step": 1619 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.4480740427970886, + "learning_rate": 0.0015, + "loss": 2.6902, + "step": 1620 + }, + { + "epoch": 0.1709915611814346, + "grad_norm": 0.47956758737564087, + "learning_rate": 0.0015, + "loss": 2.7026, + "step": 1621 + }, + { + "epoch": 0.1710970464135021, + "grad_norm": 0.4706164300441742, + "learning_rate": 0.0015, + "loss": 2.7089, + "step": 1622 + }, + { + "epoch": 0.17120253164556962, + "grad_norm": 0.40603166818618774, + "learning_rate": 0.0015, + "loss": 2.6963, + "step": 1623 + }, + { + "epoch": 0.17130801687763714, + "grad_norm": 0.4787512421607971, + "learning_rate": 0.0015, + "loss": 2.6596, + "step": 1624 + }, + { + "epoch": 0.17141350210970463, + "grad_norm": 0.4685289263725281, + "learning_rate": 0.0015, + "loss": 2.6918, + "step": 1625 + }, + { + "epoch": 0.17151898734177215, + "grad_norm": 0.47233474254608154, + "learning_rate": 0.0015, + "loss": 2.6659, + "step": 1626 + }, + { + "epoch": 0.17162447257383967, + "grad_norm": 0.4361104965209961, + "learning_rate": 0.0015, + "loss": 2.6559, + "step": 1627 + }, + { + "epoch": 0.17172995780590716, + "grad_norm": 0.4988097846508026, + "learning_rate": 0.0015, + "loss": 2.6752, + "step": 1628 + }, + { + "epoch": 0.17183544303797468, + "grad_norm": 0.5087558031082153, + "learning_rate": 0.0015, + "loss": 2.6898, + "step": 1629 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 0.37765514850616455, + "learning_rate": 0.0015, + "loss": 2.6677, + "step": 1630 + }, + { + "epoch": 0.1720464135021097, + "grad_norm": 0.5341050624847412, + "learning_rate": 0.0015, + "loss": 2.6829, + "step": 1631 + }, + { + "epoch": 0.17215189873417722, + "grad_norm": 0.46676284074783325, + "learning_rate": 0.0015, + "loss": 2.6922, + "step": 1632 + }, + { + "epoch": 0.17225738396624474, + "grad_norm": 0.41728633642196655, + "learning_rate": 0.0015, + "loss": 2.6854, + "step": 1633 + }, + { + "epoch": 0.17236286919831223, + "grad_norm": 0.4679127335548401, + "learning_rate": 0.0015, + "loss": 2.6918, + "step": 1634 + }, + { + "epoch": 0.17246835443037975, + "grad_norm": 0.42945048213005066, + "learning_rate": 0.0015, + "loss": 2.6741, + "step": 1635 + }, + { + "epoch": 0.17257383966244727, + "grad_norm": 0.42323508858680725, + "learning_rate": 0.0015, + "loss": 2.6723, + "step": 1636 + }, + { + "epoch": 0.17267932489451476, + "grad_norm": 0.46580034494400024, + "learning_rate": 0.0015, + "loss": 2.6749, + "step": 1637 + }, + { + "epoch": 0.17278481012658228, + "grad_norm": 0.4224132299423218, + "learning_rate": 0.0015, + "loss": 2.6813, + "step": 1638 + }, + { + "epoch": 0.17289029535864978, + "grad_norm": 0.4943835735321045, + "learning_rate": 0.0015, + "loss": 2.692, + "step": 1639 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.4891497492790222, + "learning_rate": 0.0015, + "loss": 2.6461, + "step": 1640 + }, + { + "epoch": 0.17310126582278482, + "grad_norm": 0.44449499249458313, + "learning_rate": 0.0015, + "loss": 2.6746, + "step": 1641 + }, + { + "epoch": 0.1732067510548523, + "grad_norm": 0.5422544479370117, + "learning_rate": 0.0015, + "loss": 2.6558, + "step": 1642 + }, + { + "epoch": 0.17331223628691983, + "grad_norm": 0.45045775175094604, + "learning_rate": 0.0015, + "loss": 2.6802, + "step": 1643 + }, + { + "epoch": 0.17341772151898735, + "grad_norm": 0.4124016761779785, + "learning_rate": 0.0015, + "loss": 2.6683, + "step": 1644 + }, + { + "epoch": 0.17352320675105484, + "grad_norm": 0.5098683834075928, + "learning_rate": 0.0015, + "loss": 2.6785, + "step": 1645 + }, + { + "epoch": 0.17362869198312236, + "grad_norm": 0.3945719003677368, + "learning_rate": 0.0015, + "loss": 2.6656, + "step": 1646 + }, + { + "epoch": 0.17373417721518988, + "grad_norm": 0.47477173805236816, + "learning_rate": 0.0015, + "loss": 2.6616, + "step": 1647 + }, + { + "epoch": 0.17383966244725738, + "grad_norm": 0.4771926701068878, + "learning_rate": 0.0015, + "loss": 2.6691, + "step": 1648 + }, + { + "epoch": 0.1739451476793249, + "grad_norm": 0.38090401887893677, + "learning_rate": 0.0015, + "loss": 2.6877, + "step": 1649 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 0.5173758268356323, + "learning_rate": 0.0015, + "loss": 2.6838, + "step": 1650 + }, + { + "epoch": 0.1741561181434599, + "grad_norm": 0.44456955790519714, + "learning_rate": 0.0015, + "loss": 2.6751, + "step": 1651 + }, + { + "epoch": 0.17426160337552743, + "grad_norm": 0.4908968210220337, + "learning_rate": 0.0015, + "loss": 2.6459, + "step": 1652 + }, + { + "epoch": 0.17436708860759495, + "grad_norm": 0.7334900498390198, + "learning_rate": 0.0015, + "loss": 2.6362, + "step": 1653 + }, + { + "epoch": 0.17447257383966244, + "grad_norm": 0.6145528554916382, + "learning_rate": 0.0015, + "loss": 2.6919, + "step": 1654 + }, + { + "epoch": 0.17457805907172996, + "grad_norm": 0.579958438873291, + "learning_rate": 0.0015, + "loss": 2.6659, + "step": 1655 + }, + { + "epoch": 0.17468354430379746, + "grad_norm": 0.9881561398506165, + "learning_rate": 0.0015, + "loss": 2.7013, + "step": 1656 + }, + { + "epoch": 0.17478902953586498, + "grad_norm": 0.6457716822624207, + "learning_rate": 0.0015, + "loss": 2.6962, + "step": 1657 + }, + { + "epoch": 0.1748945147679325, + "grad_norm": 0.6905468106269836, + "learning_rate": 0.0015, + "loss": 2.6863, + "step": 1658 + }, + { + "epoch": 0.175, + "grad_norm": 0.7087091207504272, + "learning_rate": 0.0015, + "loss": 2.6927, + "step": 1659 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.5394203662872314, + "learning_rate": 0.0015, + "loss": 2.6565, + "step": 1660 + }, + { + "epoch": 0.17521097046413503, + "grad_norm": 0.6894462704658508, + "learning_rate": 0.0015, + "loss": 2.7195, + "step": 1661 + }, + { + "epoch": 0.17531645569620252, + "grad_norm": 0.4492405652999878, + "learning_rate": 0.0015, + "loss": 2.6745, + "step": 1662 + }, + { + "epoch": 0.17542194092827004, + "grad_norm": 0.6275168061256409, + "learning_rate": 0.0015, + "loss": 2.677, + "step": 1663 + }, + { + "epoch": 0.17552742616033756, + "grad_norm": 0.45525550842285156, + "learning_rate": 0.0015, + "loss": 2.6631, + "step": 1664 + }, + { + "epoch": 0.17563291139240506, + "grad_norm": 0.5102692246437073, + "learning_rate": 0.0015, + "loss": 2.683, + "step": 1665 + }, + { + "epoch": 0.17573839662447258, + "grad_norm": 0.4354912042617798, + "learning_rate": 0.0015, + "loss": 2.7046, + "step": 1666 + }, + { + "epoch": 0.1758438818565401, + "grad_norm": 0.5065811276435852, + "learning_rate": 0.0015, + "loss": 2.6864, + "step": 1667 + }, + { + "epoch": 0.1759493670886076, + "grad_norm": 0.4788636267185211, + "learning_rate": 0.0015, + "loss": 2.6686, + "step": 1668 + }, + { + "epoch": 0.1760548523206751, + "grad_norm": 0.5696634650230408, + "learning_rate": 0.0015, + "loss": 2.6487, + "step": 1669 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.5338993072509766, + "learning_rate": 0.0015, + "loss": 2.6459, + "step": 1670 + }, + { + "epoch": 0.17626582278481012, + "grad_norm": 0.43971899151802063, + "learning_rate": 0.0015, + "loss": 2.6319, + "step": 1671 + }, + { + "epoch": 0.17637130801687764, + "grad_norm": 0.5882403254508972, + "learning_rate": 0.0015, + "loss": 2.6665, + "step": 1672 + }, + { + "epoch": 0.17647679324894514, + "grad_norm": 0.3795609176158905, + "learning_rate": 0.0015, + "loss": 2.6934, + "step": 1673 + }, + { + "epoch": 0.17658227848101266, + "grad_norm": 0.4887135326862335, + "learning_rate": 0.0015, + "loss": 2.6823, + "step": 1674 + }, + { + "epoch": 0.17668776371308018, + "grad_norm": 0.4657441973686218, + "learning_rate": 0.0015, + "loss": 2.6599, + "step": 1675 + }, + { + "epoch": 0.17679324894514767, + "grad_norm": 0.4498196244239807, + "learning_rate": 0.0015, + "loss": 2.6725, + "step": 1676 + }, + { + "epoch": 0.1768987341772152, + "grad_norm": 0.5465762615203857, + "learning_rate": 0.0015, + "loss": 2.6396, + "step": 1677 + }, + { + "epoch": 0.1770042194092827, + "grad_norm": 0.44330522418022156, + "learning_rate": 0.0015, + "loss": 2.698, + "step": 1678 + }, + { + "epoch": 0.1771097046413502, + "grad_norm": 0.5805647969245911, + "learning_rate": 0.0015, + "loss": 2.6351, + "step": 1679 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 0.6048887372016907, + "learning_rate": 0.0015, + "loss": 2.6936, + "step": 1680 + }, + { + "epoch": 0.17732067510548524, + "grad_norm": 0.5321792960166931, + "learning_rate": 0.0015, + "loss": 2.6729, + "step": 1681 + }, + { + "epoch": 0.17742616033755274, + "grad_norm": 0.5125168561935425, + "learning_rate": 0.0015, + "loss": 2.665, + "step": 1682 + }, + { + "epoch": 0.17753164556962026, + "grad_norm": 0.7131422162055969, + "learning_rate": 0.0015, + "loss": 2.7, + "step": 1683 + }, + { + "epoch": 0.17763713080168778, + "grad_norm": 0.4981690049171448, + "learning_rate": 0.0015, + "loss": 2.6699, + "step": 1684 + }, + { + "epoch": 0.17774261603375527, + "grad_norm": 0.5056214332580566, + "learning_rate": 0.0015, + "loss": 2.6375, + "step": 1685 + }, + { + "epoch": 0.1778481012658228, + "grad_norm": 0.5562819242477417, + "learning_rate": 0.0015, + "loss": 2.6228, + "step": 1686 + }, + { + "epoch": 0.17795358649789028, + "grad_norm": 0.450074166059494, + "learning_rate": 0.0015, + "loss": 2.665, + "step": 1687 + }, + { + "epoch": 0.1780590717299578, + "grad_norm": 0.47937852144241333, + "learning_rate": 0.0015, + "loss": 2.6274, + "step": 1688 + }, + { + "epoch": 0.17816455696202532, + "grad_norm": 0.4065687954425812, + "learning_rate": 0.0015, + "loss": 2.6721, + "step": 1689 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.5151090025901794, + "learning_rate": 0.0015, + "loss": 2.656, + "step": 1690 + }, + { + "epoch": 0.17837552742616034, + "grad_norm": 0.39196884632110596, + "learning_rate": 0.0015, + "loss": 2.6891, + "step": 1691 + }, + { + "epoch": 0.17848101265822786, + "grad_norm": 0.4143036901950836, + "learning_rate": 0.0015, + "loss": 2.668, + "step": 1692 + }, + { + "epoch": 0.17858649789029535, + "grad_norm": 0.4394056499004364, + "learning_rate": 0.0015, + "loss": 2.6417, + "step": 1693 + }, + { + "epoch": 0.17869198312236287, + "grad_norm": 0.3988606631755829, + "learning_rate": 0.0015, + "loss": 2.6641, + "step": 1694 + }, + { + "epoch": 0.1787974683544304, + "grad_norm": 0.4357118308544159, + "learning_rate": 0.0015, + "loss": 2.6458, + "step": 1695 + }, + { + "epoch": 0.17890295358649788, + "grad_norm": 0.36867472529411316, + "learning_rate": 0.0015, + "loss": 2.6661, + "step": 1696 + }, + { + "epoch": 0.1790084388185654, + "grad_norm": 0.4144207239151001, + "learning_rate": 0.0015, + "loss": 2.6409, + "step": 1697 + }, + { + "epoch": 0.17911392405063292, + "grad_norm": 0.42774444818496704, + "learning_rate": 0.0015, + "loss": 2.6512, + "step": 1698 + }, + { + "epoch": 0.17921940928270041, + "grad_norm": 0.3681671619415283, + "learning_rate": 0.0015, + "loss": 2.6903, + "step": 1699 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.4302609860897064, + "learning_rate": 0.0015, + "loss": 2.675, + "step": 1700 + }, + { + "epoch": 0.17943037974683546, + "grad_norm": 0.47765466570854187, + "learning_rate": 0.0015, + "loss": 2.6625, + "step": 1701 + }, + { + "epoch": 0.17953586497890295, + "grad_norm": 0.3620653450489044, + "learning_rate": 0.0015, + "loss": 2.6547, + "step": 1702 + }, + { + "epoch": 0.17964135021097047, + "grad_norm": 0.4399275481700897, + "learning_rate": 0.0015, + "loss": 2.6202, + "step": 1703 + }, + { + "epoch": 0.17974683544303796, + "grad_norm": 0.44350284337997437, + "learning_rate": 0.0015, + "loss": 2.6577, + "step": 1704 + }, + { + "epoch": 0.17985232067510548, + "grad_norm": 0.39279282093048096, + "learning_rate": 0.0015, + "loss": 2.6487, + "step": 1705 + }, + { + "epoch": 0.179957805907173, + "grad_norm": 0.3959294259548187, + "learning_rate": 0.0015, + "loss": 2.6429, + "step": 1706 + }, + { + "epoch": 0.1800632911392405, + "grad_norm": 0.4091836214065552, + "learning_rate": 0.0015, + "loss": 2.6641, + "step": 1707 + }, + { + "epoch": 0.18016877637130801, + "grad_norm": 0.46646758913993835, + "learning_rate": 0.0015, + "loss": 2.6626, + "step": 1708 + }, + { + "epoch": 0.18027426160337554, + "grad_norm": 0.454160213470459, + "learning_rate": 0.0015, + "loss": 2.6289, + "step": 1709 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.422782301902771, + "learning_rate": 0.0015, + "loss": 2.6847, + "step": 1710 + }, + { + "epoch": 0.18048523206751055, + "grad_norm": 0.45958152413368225, + "learning_rate": 0.0015, + "loss": 2.6634, + "step": 1711 + }, + { + "epoch": 0.18059071729957807, + "grad_norm": 0.46928876638412476, + "learning_rate": 0.0015, + "loss": 2.6684, + "step": 1712 + }, + { + "epoch": 0.18069620253164556, + "grad_norm": 0.4914240837097168, + "learning_rate": 0.0015, + "loss": 2.6428, + "step": 1713 + }, + { + "epoch": 0.18080168776371308, + "grad_norm": 0.4752049148082733, + "learning_rate": 0.0015, + "loss": 2.6468, + "step": 1714 + }, + { + "epoch": 0.1809071729957806, + "grad_norm": 0.49168485403060913, + "learning_rate": 0.0015, + "loss": 2.6265, + "step": 1715 + }, + { + "epoch": 0.1810126582278481, + "grad_norm": 0.4285229742527008, + "learning_rate": 0.0015, + "loss": 2.6469, + "step": 1716 + }, + { + "epoch": 0.18111814345991561, + "grad_norm": 0.46837037801742554, + "learning_rate": 0.0015, + "loss": 2.6371, + "step": 1717 + }, + { + "epoch": 0.18122362869198314, + "grad_norm": 0.41020655632019043, + "learning_rate": 0.0015, + "loss": 2.642, + "step": 1718 + }, + { + "epoch": 0.18132911392405063, + "grad_norm": 0.37446555495262146, + "learning_rate": 0.0015, + "loss": 2.6404, + "step": 1719 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.37812599539756775, + "learning_rate": 0.0015, + "loss": 2.673, + "step": 1720 + }, + { + "epoch": 0.18154008438818564, + "grad_norm": 0.45243731141090393, + "learning_rate": 0.0015, + "loss": 2.6393, + "step": 1721 + }, + { + "epoch": 0.18164556962025316, + "grad_norm": 0.47596850991249084, + "learning_rate": 0.0015, + "loss": 2.6162, + "step": 1722 + }, + { + "epoch": 0.18175105485232068, + "grad_norm": 0.4330315887928009, + "learning_rate": 0.0015, + "loss": 2.6447, + "step": 1723 + }, + { + "epoch": 0.18185654008438817, + "grad_norm": 0.4147796034812927, + "learning_rate": 0.0015, + "loss": 2.6392, + "step": 1724 + }, + { + "epoch": 0.1819620253164557, + "grad_norm": 0.45582693815231323, + "learning_rate": 0.0015, + "loss": 2.6334, + "step": 1725 + }, + { + "epoch": 0.18206751054852321, + "grad_norm": 0.3955390751361847, + "learning_rate": 0.0015, + "loss": 2.6591, + "step": 1726 + }, + { + "epoch": 0.1821729957805907, + "grad_norm": 0.4543634057044983, + "learning_rate": 0.0015, + "loss": 2.6719, + "step": 1727 + }, + { + "epoch": 0.18227848101265823, + "grad_norm": 0.4109266698360443, + "learning_rate": 0.0015, + "loss": 2.6426, + "step": 1728 + }, + { + "epoch": 0.18238396624472575, + "grad_norm": 0.4265183210372925, + "learning_rate": 0.0015, + "loss": 2.6556, + "step": 1729 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.41550973057746887, + "learning_rate": 0.0015, + "loss": 2.6618, + "step": 1730 + }, + { + "epoch": 0.18259493670886076, + "grad_norm": 0.3854300379753113, + "learning_rate": 0.0015, + "loss": 2.6432, + "step": 1731 + }, + { + "epoch": 0.18270042194092828, + "grad_norm": 0.3768821358680725, + "learning_rate": 0.0015, + "loss": 2.6422, + "step": 1732 + }, + { + "epoch": 0.18280590717299577, + "grad_norm": 0.3881377577781677, + "learning_rate": 0.0015, + "loss": 2.6287, + "step": 1733 + }, + { + "epoch": 0.1829113924050633, + "grad_norm": 0.3650208115577698, + "learning_rate": 0.0015, + "loss": 2.6348, + "step": 1734 + }, + { + "epoch": 0.18301687763713081, + "grad_norm": 0.39299750328063965, + "learning_rate": 0.0015, + "loss": 2.6597, + "step": 1735 + }, + { + "epoch": 0.1831223628691983, + "grad_norm": 0.4385109841823578, + "learning_rate": 0.0015, + "loss": 2.66, + "step": 1736 + }, + { + "epoch": 0.18322784810126583, + "grad_norm": 0.34645166993141174, + "learning_rate": 0.0015, + "loss": 2.6344, + "step": 1737 + }, + { + "epoch": 0.18333333333333332, + "grad_norm": 0.5122538805007935, + "learning_rate": 0.0015, + "loss": 2.6367, + "step": 1738 + }, + { + "epoch": 0.18343881856540084, + "grad_norm": 0.4806522727012634, + "learning_rate": 0.0015, + "loss": 2.6836, + "step": 1739 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.36930906772613525, + "learning_rate": 0.0015, + "loss": 2.6702, + "step": 1740 + }, + { + "epoch": 0.18364978902953585, + "grad_norm": 0.6573727130889893, + "learning_rate": 0.0015, + "loss": 2.6663, + "step": 1741 + }, + { + "epoch": 0.18375527426160337, + "grad_norm": 0.6350655555725098, + "learning_rate": 0.0015, + "loss": 2.6754, + "step": 1742 + }, + { + "epoch": 0.1838607594936709, + "grad_norm": 0.3637509047985077, + "learning_rate": 0.0015, + "loss": 2.6468, + "step": 1743 + }, + { + "epoch": 0.1839662447257384, + "grad_norm": 0.5940505266189575, + "learning_rate": 0.0015, + "loss": 2.6774, + "step": 1744 + }, + { + "epoch": 0.1840717299578059, + "grad_norm": 0.42591384053230286, + "learning_rate": 0.0015, + "loss": 2.6491, + "step": 1745 + }, + { + "epoch": 0.18417721518987343, + "grad_norm": 0.43996667861938477, + "learning_rate": 0.0015, + "loss": 2.686, + "step": 1746 + }, + { + "epoch": 0.18428270042194092, + "grad_norm": 0.5234412550926208, + "learning_rate": 0.0015, + "loss": 2.6833, + "step": 1747 + }, + { + "epoch": 0.18438818565400844, + "grad_norm": 0.47828978300094604, + "learning_rate": 0.0015, + "loss": 2.6564, + "step": 1748 + }, + { + "epoch": 0.18449367088607596, + "grad_norm": 0.4260103106498718, + "learning_rate": 0.0015, + "loss": 2.6726, + "step": 1749 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.46488526463508606, + "learning_rate": 0.0015, + "loss": 2.6636, + "step": 1750 + }, + { + "epoch": 0.18470464135021097, + "grad_norm": 0.48951253294944763, + "learning_rate": 0.0015, + "loss": 2.647, + "step": 1751 + }, + { + "epoch": 0.1848101265822785, + "grad_norm": 0.3754575550556183, + "learning_rate": 0.0015, + "loss": 2.6507, + "step": 1752 + }, + { + "epoch": 0.184915611814346, + "grad_norm": 0.43996915221214294, + "learning_rate": 0.0015, + "loss": 2.6435, + "step": 1753 + }, + { + "epoch": 0.1850210970464135, + "grad_norm": 0.36103877425193787, + "learning_rate": 0.0015, + "loss": 2.6437, + "step": 1754 + }, + { + "epoch": 0.185126582278481, + "grad_norm": 0.4442853331565857, + "learning_rate": 0.0015, + "loss": 2.6537, + "step": 1755 + }, + { + "epoch": 0.18523206751054852, + "grad_norm": 0.4577201008796692, + "learning_rate": 0.0015, + "loss": 2.6234, + "step": 1756 + }, + { + "epoch": 0.18533755274261604, + "grad_norm": 0.4048396050930023, + "learning_rate": 0.0015, + "loss": 2.6713, + "step": 1757 + }, + { + "epoch": 0.18544303797468353, + "grad_norm": 0.45493417978286743, + "learning_rate": 0.0015, + "loss": 2.6588, + "step": 1758 + }, + { + "epoch": 0.18554852320675105, + "grad_norm": 0.5380886793136597, + "learning_rate": 0.0015, + "loss": 2.6509, + "step": 1759 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.4839567542076111, + "learning_rate": 0.0015, + "loss": 2.6236, + "step": 1760 + }, + { + "epoch": 0.18575949367088607, + "grad_norm": 0.3915058374404907, + "learning_rate": 0.0015, + "loss": 2.6419, + "step": 1761 + }, + { + "epoch": 0.1858649789029536, + "grad_norm": 0.40922579169273376, + "learning_rate": 0.0015, + "loss": 2.661, + "step": 1762 + }, + { + "epoch": 0.1859704641350211, + "grad_norm": 0.4944421350955963, + "learning_rate": 0.0015, + "loss": 2.6379, + "step": 1763 + }, + { + "epoch": 0.1860759493670886, + "grad_norm": 0.5242069959640503, + "learning_rate": 0.0015, + "loss": 2.6391, + "step": 1764 + }, + { + "epoch": 0.18618143459915612, + "grad_norm": 0.38007134199142456, + "learning_rate": 0.0015, + "loss": 2.66, + "step": 1765 + }, + { + "epoch": 0.18628691983122364, + "grad_norm": 0.3998432159423828, + "learning_rate": 0.0015, + "loss": 2.6286, + "step": 1766 + }, + { + "epoch": 0.18639240506329113, + "grad_norm": 0.5454912781715393, + "learning_rate": 0.0015, + "loss": 2.662, + "step": 1767 + }, + { + "epoch": 0.18649789029535865, + "grad_norm": 0.47067034244537354, + "learning_rate": 0.0015, + "loss": 2.6429, + "step": 1768 + }, + { + "epoch": 0.18660337552742617, + "grad_norm": 0.47926080226898193, + "learning_rate": 0.0015, + "loss": 2.6554, + "step": 1769 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.5984610319137573, + "learning_rate": 0.0015, + "loss": 2.6566, + "step": 1770 + }, + { + "epoch": 0.1868143459915612, + "grad_norm": 0.5955515503883362, + "learning_rate": 0.0015, + "loss": 2.6601, + "step": 1771 + }, + { + "epoch": 0.18691983122362868, + "grad_norm": 0.4215623438358307, + "learning_rate": 0.0015, + "loss": 2.6529, + "step": 1772 + }, + { + "epoch": 0.1870253164556962, + "grad_norm": 0.7792022824287415, + "learning_rate": 0.0015, + "loss": 2.6207, + "step": 1773 + }, + { + "epoch": 0.18713080168776372, + "grad_norm": 0.6396433711051941, + "learning_rate": 0.0015, + "loss": 2.6864, + "step": 1774 + }, + { + "epoch": 0.1872362869198312, + "grad_norm": 0.5176938772201538, + "learning_rate": 0.0015, + "loss": 2.6877, + "step": 1775 + }, + { + "epoch": 0.18734177215189873, + "grad_norm": 0.9113414883613586, + "learning_rate": 0.0015, + "loss": 2.6689, + "step": 1776 + }, + { + "epoch": 0.18744725738396625, + "grad_norm": 0.6943142414093018, + "learning_rate": 0.0015, + "loss": 2.6523, + "step": 1777 + }, + { + "epoch": 0.18755274261603375, + "grad_norm": 0.6021560430526733, + "learning_rate": 0.0015, + "loss": 2.6108, + "step": 1778 + }, + { + "epoch": 0.18765822784810127, + "grad_norm": 0.8015873432159424, + "learning_rate": 0.0015, + "loss": 2.6436, + "step": 1779 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.5421191453933716, + "learning_rate": 0.0015, + "loss": 2.6511, + "step": 1780 + }, + { + "epoch": 0.18786919831223628, + "grad_norm": 0.5187629461288452, + "learning_rate": 0.0015, + "loss": 2.6413, + "step": 1781 + }, + { + "epoch": 0.1879746835443038, + "grad_norm": 0.5701574087142944, + "learning_rate": 0.0015, + "loss": 2.6959, + "step": 1782 + }, + { + "epoch": 0.18808016877637132, + "grad_norm": 0.5268704295158386, + "learning_rate": 0.0015, + "loss": 2.6464, + "step": 1783 + }, + { + "epoch": 0.1881856540084388, + "grad_norm": 0.4775199592113495, + "learning_rate": 0.0015, + "loss": 2.6443, + "step": 1784 + }, + { + "epoch": 0.18829113924050633, + "grad_norm": 0.6284894943237305, + "learning_rate": 0.0015, + "loss": 2.6295, + "step": 1785 + }, + { + "epoch": 0.18839662447257383, + "grad_norm": 0.5539766550064087, + "learning_rate": 0.0015, + "loss": 2.628, + "step": 1786 + }, + { + "epoch": 0.18850210970464135, + "grad_norm": 0.4498697519302368, + "learning_rate": 0.0015, + "loss": 2.6362, + "step": 1787 + }, + { + "epoch": 0.18860759493670887, + "grad_norm": 0.6480385065078735, + "learning_rate": 0.0015, + "loss": 2.6521, + "step": 1788 + }, + { + "epoch": 0.18871308016877636, + "grad_norm": 0.5737924575805664, + "learning_rate": 0.0015, + "loss": 2.6397, + "step": 1789 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 0.5107496976852417, + "learning_rate": 0.0015, + "loss": 2.6377, + "step": 1790 + }, + { + "epoch": 0.1889240506329114, + "grad_norm": 0.5114548802375793, + "learning_rate": 0.0015, + "loss": 2.6065, + "step": 1791 + }, + { + "epoch": 0.1890295358649789, + "grad_norm": 0.5089956521987915, + "learning_rate": 0.0015, + "loss": 2.6403, + "step": 1792 + }, + { + "epoch": 0.1891350210970464, + "grad_norm": 0.5401355624198914, + "learning_rate": 0.0015, + "loss": 2.6712, + "step": 1793 + }, + { + "epoch": 0.18924050632911393, + "grad_norm": 0.47141584753990173, + "learning_rate": 0.0015, + "loss": 2.7097, + "step": 1794 + }, + { + "epoch": 0.18934599156118143, + "grad_norm": 0.479478120803833, + "learning_rate": 0.0015, + "loss": 2.6425, + "step": 1795 + }, + { + "epoch": 0.18945147679324895, + "grad_norm": 0.42200803756713867, + "learning_rate": 0.0015, + "loss": 2.6526, + "step": 1796 + }, + { + "epoch": 0.18955696202531647, + "grad_norm": 0.5233957767486572, + "learning_rate": 0.0015, + "loss": 2.6462, + "step": 1797 + }, + { + "epoch": 0.18966244725738396, + "grad_norm": 0.4030999541282654, + "learning_rate": 0.0015, + "loss": 2.6547, + "step": 1798 + }, + { + "epoch": 0.18976793248945148, + "grad_norm": 0.4576791524887085, + "learning_rate": 0.0015, + "loss": 2.6485, + "step": 1799 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.41674160957336426, + "learning_rate": 0.0015, + "loss": 2.634, + "step": 1800 + }, + { + "epoch": 0.1899789029535865, + "grad_norm": 0.46542367339134216, + "learning_rate": 0.0015, + "loss": 2.603, + "step": 1801 + }, + { + "epoch": 0.190084388185654, + "grad_norm": 0.5270007252693176, + "learning_rate": 0.0015, + "loss": 2.6307, + "step": 1802 + }, + { + "epoch": 0.1901898734177215, + "grad_norm": 0.38396015763282776, + "learning_rate": 0.0015, + "loss": 2.6301, + "step": 1803 + }, + { + "epoch": 0.19029535864978903, + "grad_norm": 0.5024202466011047, + "learning_rate": 0.0015, + "loss": 2.6191, + "step": 1804 + }, + { + "epoch": 0.19040084388185655, + "grad_norm": 0.3507899343967438, + "learning_rate": 0.0015, + "loss": 2.6598, + "step": 1805 + }, + { + "epoch": 0.19050632911392404, + "grad_norm": 0.5364141464233398, + "learning_rate": 0.0015, + "loss": 2.6432, + "step": 1806 + }, + { + "epoch": 0.19061181434599156, + "grad_norm": 0.4091859757900238, + "learning_rate": 0.0015, + "loss": 2.6394, + "step": 1807 + }, + { + "epoch": 0.19071729957805908, + "grad_norm": 0.4593219459056854, + "learning_rate": 0.0015, + "loss": 2.6313, + "step": 1808 + }, + { + "epoch": 0.19082278481012657, + "grad_norm": 0.36020755767822266, + "learning_rate": 0.0015, + "loss": 2.6763, + "step": 1809 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.4429502487182617, + "learning_rate": 0.0015, + "loss": 2.6051, + "step": 1810 + }, + { + "epoch": 0.1910337552742616, + "grad_norm": 0.36146795749664307, + "learning_rate": 0.0015, + "loss": 2.5997, + "step": 1811 + }, + { + "epoch": 0.1911392405063291, + "grad_norm": 0.4242223799228668, + "learning_rate": 0.0015, + "loss": 2.6304, + "step": 1812 + }, + { + "epoch": 0.19124472573839663, + "grad_norm": 0.4527395963668823, + "learning_rate": 0.0015, + "loss": 2.6217, + "step": 1813 + }, + { + "epoch": 0.19135021097046415, + "grad_norm": 0.35330453515052795, + "learning_rate": 0.0015, + "loss": 2.6091, + "step": 1814 + }, + { + "epoch": 0.19145569620253164, + "grad_norm": 0.43358156085014343, + "learning_rate": 0.0015, + "loss": 2.6253, + "step": 1815 + }, + { + "epoch": 0.19156118143459916, + "grad_norm": 0.42647784948349, + "learning_rate": 0.0015, + "loss": 2.6497, + "step": 1816 + }, + { + "epoch": 0.19166666666666668, + "grad_norm": 0.42635348439216614, + "learning_rate": 0.0015, + "loss": 2.6043, + "step": 1817 + }, + { + "epoch": 0.19177215189873417, + "grad_norm": 0.41173309087753296, + "learning_rate": 0.0015, + "loss": 2.6001, + "step": 1818 + }, + { + "epoch": 0.1918776371308017, + "grad_norm": 0.43988049030303955, + "learning_rate": 0.0015, + "loss": 2.6387, + "step": 1819 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.41004425287246704, + "learning_rate": 0.0015, + "loss": 2.6721, + "step": 1820 + }, + { + "epoch": 0.1920886075949367, + "grad_norm": 0.44661998748779297, + "learning_rate": 0.0015, + "loss": 2.5992, + "step": 1821 + }, + { + "epoch": 0.19219409282700423, + "grad_norm": 0.4231852889060974, + "learning_rate": 0.0015, + "loss": 2.6332, + "step": 1822 + }, + { + "epoch": 0.19229957805907172, + "grad_norm": 0.47741881012916565, + "learning_rate": 0.0015, + "loss": 2.6293, + "step": 1823 + }, + { + "epoch": 0.19240506329113924, + "grad_norm": 0.3641420304775238, + "learning_rate": 0.0015, + "loss": 2.6441, + "step": 1824 + }, + { + "epoch": 0.19251054852320676, + "grad_norm": 0.41938456892967224, + "learning_rate": 0.0015, + "loss": 2.6378, + "step": 1825 + }, + { + "epoch": 0.19261603375527425, + "grad_norm": 0.3667297065258026, + "learning_rate": 0.0015, + "loss": 2.6247, + "step": 1826 + }, + { + "epoch": 0.19272151898734177, + "grad_norm": 0.35756704211235046, + "learning_rate": 0.0015, + "loss": 2.6089, + "step": 1827 + }, + { + "epoch": 0.1928270042194093, + "grad_norm": 0.37722069025039673, + "learning_rate": 0.0015, + "loss": 2.5666, + "step": 1828 + }, + { + "epoch": 0.19293248945147679, + "grad_norm": 0.4125153124332428, + "learning_rate": 0.0015, + "loss": 2.614, + "step": 1829 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.3949940800666809, + "learning_rate": 0.0015, + "loss": 2.5989, + "step": 1830 + }, + { + "epoch": 0.19314345991561183, + "grad_norm": 0.42427489161491394, + "learning_rate": 0.0015, + "loss": 2.6177, + "step": 1831 + }, + { + "epoch": 0.19324894514767932, + "grad_norm": 0.37355947494506836, + "learning_rate": 0.0015, + "loss": 2.6341, + "step": 1832 + }, + { + "epoch": 0.19335443037974684, + "grad_norm": 0.3869922459125519, + "learning_rate": 0.0015, + "loss": 2.6535, + "step": 1833 + }, + { + "epoch": 0.19345991561181436, + "grad_norm": 0.3926865756511688, + "learning_rate": 0.0015, + "loss": 2.6438, + "step": 1834 + }, + { + "epoch": 0.19356540084388185, + "grad_norm": 0.4055884778499603, + "learning_rate": 0.0015, + "loss": 2.599, + "step": 1835 + }, + { + "epoch": 0.19367088607594937, + "grad_norm": 0.36052796244621277, + "learning_rate": 0.0015, + "loss": 2.6222, + "step": 1836 + }, + { + "epoch": 0.19377637130801686, + "grad_norm": 0.39562997221946716, + "learning_rate": 0.0015, + "loss": 2.5976, + "step": 1837 + }, + { + "epoch": 0.19388185654008439, + "grad_norm": 0.4271751940250397, + "learning_rate": 0.0015, + "loss": 2.6015, + "step": 1838 + }, + { + "epoch": 0.1939873417721519, + "grad_norm": 0.39201706647872925, + "learning_rate": 0.0015, + "loss": 2.6238, + "step": 1839 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.3508588373661041, + "learning_rate": 0.0015, + "loss": 2.6023, + "step": 1840 + }, + { + "epoch": 0.19419831223628692, + "grad_norm": 0.40121006965637207, + "learning_rate": 0.0015, + "loss": 2.6441, + "step": 1841 + }, + { + "epoch": 0.19430379746835444, + "grad_norm": 0.4151688814163208, + "learning_rate": 0.0015, + "loss": 2.6261, + "step": 1842 + }, + { + "epoch": 0.19440928270042193, + "grad_norm": 0.3882525861263275, + "learning_rate": 0.0015, + "loss": 2.6265, + "step": 1843 + }, + { + "epoch": 0.19451476793248945, + "grad_norm": 0.4311433434486389, + "learning_rate": 0.0015, + "loss": 2.6423, + "step": 1844 + }, + { + "epoch": 0.19462025316455697, + "grad_norm": 0.5190882682800293, + "learning_rate": 0.0015, + "loss": 2.6191, + "step": 1845 + }, + { + "epoch": 0.19472573839662446, + "grad_norm": 0.39242708683013916, + "learning_rate": 0.0015, + "loss": 2.6265, + "step": 1846 + }, + { + "epoch": 0.19483122362869199, + "grad_norm": 0.4750228226184845, + "learning_rate": 0.0015, + "loss": 2.6421, + "step": 1847 + }, + { + "epoch": 0.1949367088607595, + "grad_norm": 0.5547316074371338, + "learning_rate": 0.0015, + "loss": 2.6018, + "step": 1848 + }, + { + "epoch": 0.195042194092827, + "grad_norm": 0.40657439827919006, + "learning_rate": 0.0015, + "loss": 2.577, + "step": 1849 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.4860747754573822, + "learning_rate": 0.0015, + "loss": 2.6509, + "step": 1850 + }, + { + "epoch": 0.19525316455696204, + "grad_norm": 0.5451474785804749, + "learning_rate": 0.0015, + "loss": 2.6078, + "step": 1851 + }, + { + "epoch": 0.19535864978902953, + "grad_norm": 0.45310351252555847, + "learning_rate": 0.0015, + "loss": 2.6334, + "step": 1852 + }, + { + "epoch": 0.19546413502109705, + "grad_norm": 0.43949946761131287, + "learning_rate": 0.0015, + "loss": 2.6004, + "step": 1853 + }, + { + "epoch": 0.19556962025316454, + "grad_norm": 0.5123153328895569, + "learning_rate": 0.0015, + "loss": 2.6404, + "step": 1854 + }, + { + "epoch": 0.19567510548523206, + "grad_norm": 0.3963913023471832, + "learning_rate": 0.0015, + "loss": 2.611, + "step": 1855 + }, + { + "epoch": 0.19578059071729959, + "grad_norm": 0.5090537071228027, + "learning_rate": 0.0015, + "loss": 2.6241, + "step": 1856 + }, + { + "epoch": 0.19588607594936708, + "grad_norm": 0.5399793982505798, + "learning_rate": 0.0015, + "loss": 2.6268, + "step": 1857 + }, + { + "epoch": 0.1959915611814346, + "grad_norm": 0.46462303400039673, + "learning_rate": 0.0015, + "loss": 2.6318, + "step": 1858 + }, + { + "epoch": 0.19609704641350212, + "grad_norm": 0.5340750813484192, + "learning_rate": 0.0015, + "loss": 2.6273, + "step": 1859 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.44384080171585083, + "learning_rate": 0.0015, + "loss": 2.6736, + "step": 1860 + }, + { + "epoch": 0.19630801687763713, + "grad_norm": 0.44626039266586304, + "learning_rate": 0.0015, + "loss": 2.6303, + "step": 1861 + }, + { + "epoch": 0.19641350210970465, + "grad_norm": 0.42490091919898987, + "learning_rate": 0.0015, + "loss": 2.6129, + "step": 1862 + }, + { + "epoch": 0.19651898734177214, + "grad_norm": 0.5230178833007812, + "learning_rate": 0.0015, + "loss": 2.6876, + "step": 1863 + }, + { + "epoch": 0.19662447257383966, + "grad_norm": 0.5862250328063965, + "learning_rate": 0.0015, + "loss": 2.6148, + "step": 1864 + }, + { + "epoch": 0.19672995780590719, + "grad_norm": 0.390642911195755, + "learning_rate": 0.0015, + "loss": 2.6058, + "step": 1865 + }, + { + "epoch": 0.19683544303797468, + "grad_norm": 0.5948270559310913, + "learning_rate": 0.0015, + "loss": 2.6068, + "step": 1866 + }, + { + "epoch": 0.1969409282700422, + "grad_norm": 0.569868803024292, + "learning_rate": 0.0015, + "loss": 2.6282, + "step": 1867 + }, + { + "epoch": 0.19704641350210972, + "grad_norm": 0.5777739882469177, + "learning_rate": 0.0015, + "loss": 2.5714, + "step": 1868 + }, + { + "epoch": 0.1971518987341772, + "grad_norm": 0.6943336129188538, + "learning_rate": 0.0015, + "loss": 2.6479, + "step": 1869 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.5434960722923279, + "learning_rate": 0.0015, + "loss": 2.6249, + "step": 1870 + }, + { + "epoch": 0.19736286919831222, + "grad_norm": 0.6565618515014648, + "learning_rate": 0.0015, + "loss": 2.5991, + "step": 1871 + }, + { + "epoch": 0.19746835443037974, + "grad_norm": 0.5706204771995544, + "learning_rate": 0.0015, + "loss": 2.6043, + "step": 1872 + }, + { + "epoch": 0.19757383966244726, + "grad_norm": 0.48199185729026794, + "learning_rate": 0.0015, + "loss": 2.5819, + "step": 1873 + }, + { + "epoch": 0.19767932489451476, + "grad_norm": 0.6568314433097839, + "learning_rate": 0.0015, + "loss": 2.6165, + "step": 1874 + }, + { + "epoch": 0.19778481012658228, + "grad_norm": 0.46016719937324524, + "learning_rate": 0.0015, + "loss": 2.622, + "step": 1875 + }, + { + "epoch": 0.1978902953586498, + "grad_norm": 0.4956842064857483, + "learning_rate": 0.0015, + "loss": 2.6549, + "step": 1876 + }, + { + "epoch": 0.1979957805907173, + "grad_norm": 0.5722628235816956, + "learning_rate": 0.0015, + "loss": 2.6688, + "step": 1877 + }, + { + "epoch": 0.1981012658227848, + "grad_norm": 0.47711923718452454, + "learning_rate": 0.0015, + "loss": 2.6126, + "step": 1878 + }, + { + "epoch": 0.19820675105485233, + "grad_norm": 0.4961562752723694, + "learning_rate": 0.0015, + "loss": 2.5957, + "step": 1879 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.4957275092601776, + "learning_rate": 0.0015, + "loss": 2.646, + "step": 1880 + }, + { + "epoch": 0.19841772151898734, + "grad_norm": 0.4470681846141815, + "learning_rate": 0.0015, + "loss": 2.6209, + "step": 1881 + }, + { + "epoch": 0.19852320675105486, + "grad_norm": 0.5001372694969177, + "learning_rate": 0.0015, + "loss": 2.5955, + "step": 1882 + }, + { + "epoch": 0.19862869198312236, + "grad_norm": 0.37293684482574463, + "learning_rate": 0.0015, + "loss": 2.6023, + "step": 1883 + }, + { + "epoch": 0.19873417721518988, + "grad_norm": 0.4559798240661621, + "learning_rate": 0.0015, + "loss": 2.6194, + "step": 1884 + }, + { + "epoch": 0.19883966244725737, + "grad_norm": 0.4511626958847046, + "learning_rate": 0.0015, + "loss": 2.5939, + "step": 1885 + }, + { + "epoch": 0.1989451476793249, + "grad_norm": 0.44044777750968933, + "learning_rate": 0.0015, + "loss": 2.634, + "step": 1886 + }, + { + "epoch": 0.1990506329113924, + "grad_norm": 0.44606903195381165, + "learning_rate": 0.0015, + "loss": 2.613, + "step": 1887 + }, + { + "epoch": 0.1991561181434599, + "grad_norm": 0.5108492374420166, + "learning_rate": 0.0015, + "loss": 2.6205, + "step": 1888 + }, + { + "epoch": 0.19926160337552742, + "grad_norm": 0.5252469182014465, + "learning_rate": 0.0015, + "loss": 2.63, + "step": 1889 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.39880281686782837, + "learning_rate": 0.0015, + "loss": 2.5889, + "step": 1890 + }, + { + "epoch": 0.19947257383966244, + "grad_norm": 0.6042729616165161, + "learning_rate": 0.0015, + "loss": 2.6354, + "step": 1891 + }, + { + "epoch": 0.19957805907172996, + "grad_norm": 0.5175554156303406, + "learning_rate": 0.0015, + "loss": 2.5795, + "step": 1892 + }, + { + "epoch": 0.19968354430379748, + "grad_norm": 0.43800583481788635, + "learning_rate": 0.0015, + "loss": 2.5855, + "step": 1893 + }, + { + "epoch": 0.19978902953586497, + "grad_norm": 0.7872032523155212, + "learning_rate": 0.0015, + "loss": 2.6205, + "step": 1894 + }, + { + "epoch": 0.1998945147679325, + "grad_norm": 0.6513321399688721, + "learning_rate": 0.0015, + "loss": 2.652, + "step": 1895 + }, + { + "epoch": 0.2, + "grad_norm": 0.5694076418876648, + "learning_rate": 0.0015, + "loss": 2.6128, + "step": 1896 + }, + { + "epoch": 0.2001054852320675, + "grad_norm": 0.95693039894104, + "learning_rate": 0.0015, + "loss": 2.6597, + "step": 1897 + }, + { + "epoch": 0.20021097046413502, + "grad_norm": 0.7018605470657349, + "learning_rate": 0.0015, + "loss": 2.6324, + "step": 1898 + }, + { + "epoch": 0.20031645569620254, + "grad_norm": 0.5766605734825134, + "learning_rate": 0.0015, + "loss": 2.6217, + "step": 1899 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.8260414600372314, + "learning_rate": 0.0015, + "loss": 2.6395, + "step": 1900 + }, + { + "epoch": 0.20052742616033756, + "grad_norm": 0.7133459448814392, + "learning_rate": 0.0015, + "loss": 2.628, + "step": 1901 + }, + { + "epoch": 0.20063291139240505, + "grad_norm": 0.5956565737724304, + "learning_rate": 0.0015, + "loss": 2.5951, + "step": 1902 + }, + { + "epoch": 0.20073839662447257, + "grad_norm": 0.9376426339149475, + "learning_rate": 0.0015, + "loss": 2.6496, + "step": 1903 + }, + { + "epoch": 0.2008438818565401, + "grad_norm": 0.8223220109939575, + "learning_rate": 0.0015, + "loss": 2.6313, + "step": 1904 + }, + { + "epoch": 0.20094936708860758, + "grad_norm": 0.5997200608253479, + "learning_rate": 0.0015, + "loss": 2.5976, + "step": 1905 + }, + { + "epoch": 0.2010548523206751, + "grad_norm": 0.8039960265159607, + "learning_rate": 0.0015, + "loss": 2.6486, + "step": 1906 + }, + { + "epoch": 0.20116033755274262, + "grad_norm": 0.6800093650817871, + "learning_rate": 0.0015, + "loss": 2.6211, + "step": 1907 + }, + { + "epoch": 0.20126582278481012, + "grad_norm": 0.5921891331672668, + "learning_rate": 0.0015, + "loss": 2.6207, + "step": 1908 + }, + { + "epoch": 0.20137130801687764, + "grad_norm": 0.5600051879882812, + "learning_rate": 0.0015, + "loss": 2.6444, + "step": 1909 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.5620603561401367, + "learning_rate": 0.0015, + "loss": 2.6185, + "step": 1910 + }, + { + "epoch": 0.20158227848101265, + "grad_norm": 0.44269052147865295, + "learning_rate": 0.0015, + "loss": 2.6335, + "step": 1911 + }, + { + "epoch": 0.20168776371308017, + "grad_norm": 0.5061421990394592, + "learning_rate": 0.0015, + "loss": 2.6102, + "step": 1912 + }, + { + "epoch": 0.2017932489451477, + "grad_norm": 0.4005381762981415, + "learning_rate": 0.0015, + "loss": 2.5897, + "step": 1913 + }, + { + "epoch": 0.20189873417721518, + "grad_norm": 0.4429921507835388, + "learning_rate": 0.0015, + "loss": 2.6144, + "step": 1914 + }, + { + "epoch": 0.2020042194092827, + "grad_norm": 0.4594302177429199, + "learning_rate": 0.0015, + "loss": 2.6021, + "step": 1915 + }, + { + "epoch": 0.20210970464135022, + "grad_norm": 0.4356192350387573, + "learning_rate": 0.0015, + "loss": 2.6062, + "step": 1916 + }, + { + "epoch": 0.20221518987341772, + "grad_norm": 0.4497615396976471, + "learning_rate": 0.0015, + "loss": 2.5936, + "step": 1917 + }, + { + "epoch": 0.20232067510548524, + "grad_norm": 0.46152204275131226, + "learning_rate": 0.0015, + "loss": 2.6228, + "step": 1918 + }, + { + "epoch": 0.20242616033755273, + "grad_norm": 0.45384255051612854, + "learning_rate": 0.0015, + "loss": 2.6531, + "step": 1919 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.4038790762424469, + "learning_rate": 0.0015, + "loss": 2.5823, + "step": 1920 + }, + { + "epoch": 0.20263713080168777, + "grad_norm": 0.4992937743663788, + "learning_rate": 0.0015, + "loss": 2.6035, + "step": 1921 + }, + { + "epoch": 0.20274261603375526, + "grad_norm": 0.3839251697063446, + "learning_rate": 0.0015, + "loss": 2.5915, + "step": 1922 + }, + { + "epoch": 0.20284810126582278, + "grad_norm": 0.44896161556243896, + "learning_rate": 0.0015, + "loss": 2.622, + "step": 1923 + }, + { + "epoch": 0.2029535864978903, + "grad_norm": 0.38991615176200867, + "learning_rate": 0.0015, + "loss": 2.5688, + "step": 1924 + }, + { + "epoch": 0.2030590717299578, + "grad_norm": 0.4253697991371155, + "learning_rate": 0.0015, + "loss": 2.6328, + "step": 1925 + }, + { + "epoch": 0.20316455696202532, + "grad_norm": 0.4887075424194336, + "learning_rate": 0.0015, + "loss": 2.6033, + "step": 1926 + }, + { + "epoch": 0.20327004219409284, + "grad_norm": 0.3510946035385132, + "learning_rate": 0.0015, + "loss": 2.6211, + "step": 1927 + }, + { + "epoch": 0.20337552742616033, + "grad_norm": 0.540607213973999, + "learning_rate": 0.0015, + "loss": 2.6419, + "step": 1928 + }, + { + "epoch": 0.20348101265822785, + "grad_norm": 0.4679645001888275, + "learning_rate": 0.0015, + "loss": 2.5783, + "step": 1929 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.4489510655403137, + "learning_rate": 0.0015, + "loss": 2.6284, + "step": 1930 + }, + { + "epoch": 0.20369198312236286, + "grad_norm": 0.5243116021156311, + "learning_rate": 0.0015, + "loss": 2.5576, + "step": 1931 + }, + { + "epoch": 0.20379746835443038, + "grad_norm": 0.37414848804473877, + "learning_rate": 0.0015, + "loss": 2.5924, + "step": 1932 + }, + { + "epoch": 0.2039029535864979, + "grad_norm": 0.43537968397140503, + "learning_rate": 0.0015, + "loss": 2.5995, + "step": 1933 + }, + { + "epoch": 0.2040084388185654, + "grad_norm": 0.4572514593601227, + "learning_rate": 0.0015, + "loss": 2.6205, + "step": 1934 + }, + { + "epoch": 0.20411392405063292, + "grad_norm": 0.36321672797203064, + "learning_rate": 0.0015, + "loss": 2.5868, + "step": 1935 + }, + { + "epoch": 0.2042194092827004, + "grad_norm": 0.4521811306476593, + "learning_rate": 0.0015, + "loss": 2.6112, + "step": 1936 + }, + { + "epoch": 0.20432489451476793, + "grad_norm": 0.39938241243362427, + "learning_rate": 0.0015, + "loss": 2.6026, + "step": 1937 + }, + { + "epoch": 0.20443037974683545, + "grad_norm": 0.47030773758888245, + "learning_rate": 0.0015, + "loss": 2.5939, + "step": 1938 + }, + { + "epoch": 0.20453586497890294, + "grad_norm": 0.4599655270576477, + "learning_rate": 0.0015, + "loss": 2.601, + "step": 1939 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.3830592632293701, + "learning_rate": 0.0015, + "loss": 2.573, + "step": 1940 + }, + { + "epoch": 0.20474683544303798, + "grad_norm": 0.4363763928413391, + "learning_rate": 0.0015, + "loss": 2.6251, + "step": 1941 + }, + { + "epoch": 0.20485232067510548, + "grad_norm": 0.42555129528045654, + "learning_rate": 0.0015, + "loss": 2.6249, + "step": 1942 + }, + { + "epoch": 0.204957805907173, + "grad_norm": 0.4869263768196106, + "learning_rate": 0.0015, + "loss": 2.5847, + "step": 1943 + }, + { + "epoch": 0.20506329113924052, + "grad_norm": 0.5114881992340088, + "learning_rate": 0.0015, + "loss": 2.5795, + "step": 1944 + }, + { + "epoch": 0.205168776371308, + "grad_norm": 0.4430542290210724, + "learning_rate": 0.0015, + "loss": 2.633, + "step": 1945 + }, + { + "epoch": 0.20527426160337553, + "grad_norm": 0.46150848269462585, + "learning_rate": 0.0015, + "loss": 2.5573, + "step": 1946 + }, + { + "epoch": 0.20537974683544305, + "grad_norm": 0.4808204770088196, + "learning_rate": 0.0015, + "loss": 2.6264, + "step": 1947 + }, + { + "epoch": 0.20548523206751054, + "grad_norm": 0.4018331468105316, + "learning_rate": 0.0015, + "loss": 2.5724, + "step": 1948 + }, + { + "epoch": 0.20559071729957806, + "grad_norm": 0.48080340027809143, + "learning_rate": 0.0015, + "loss": 2.5736, + "step": 1949 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.43017280101776123, + "learning_rate": 0.0015, + "loss": 2.5784, + "step": 1950 + }, + { + "epoch": 0.20580168776371308, + "grad_norm": 0.40713632106781006, + "learning_rate": 0.0015, + "loss": 2.636, + "step": 1951 + }, + { + "epoch": 0.2059071729957806, + "grad_norm": 0.5057435631752014, + "learning_rate": 0.0015, + "loss": 2.6119, + "step": 1952 + }, + { + "epoch": 0.2060126582278481, + "grad_norm": 0.4160159230232239, + "learning_rate": 0.0015, + "loss": 2.6224, + "step": 1953 + }, + { + "epoch": 0.2061181434599156, + "grad_norm": 0.4864172339439392, + "learning_rate": 0.0015, + "loss": 2.5694, + "step": 1954 + }, + { + "epoch": 0.20622362869198313, + "grad_norm": 0.4461929202079773, + "learning_rate": 0.0015, + "loss": 2.6053, + "step": 1955 + }, + { + "epoch": 0.20632911392405062, + "grad_norm": 0.41023552417755127, + "learning_rate": 0.0015, + "loss": 2.5803, + "step": 1956 + }, + { + "epoch": 0.20643459915611814, + "grad_norm": 0.36789509654045105, + "learning_rate": 0.0015, + "loss": 2.581, + "step": 1957 + }, + { + "epoch": 0.20654008438818566, + "grad_norm": 0.44003576040267944, + "learning_rate": 0.0015, + "loss": 2.6025, + "step": 1958 + }, + { + "epoch": 0.20664556962025316, + "grad_norm": 0.3749352693557739, + "learning_rate": 0.0015, + "loss": 2.6206, + "step": 1959 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.4279178977012634, + "learning_rate": 0.0015, + "loss": 2.5939, + "step": 1960 + }, + { + "epoch": 0.2068565400843882, + "grad_norm": 0.3645850121974945, + "learning_rate": 0.0015, + "loss": 2.6063, + "step": 1961 + }, + { + "epoch": 0.2069620253164557, + "grad_norm": 0.40675342082977295, + "learning_rate": 0.0015, + "loss": 2.6085, + "step": 1962 + }, + { + "epoch": 0.2070675105485232, + "grad_norm": 0.39544811844825745, + "learning_rate": 0.0015, + "loss": 2.6231, + "step": 1963 + }, + { + "epoch": 0.20717299578059073, + "grad_norm": 0.4138580858707428, + "learning_rate": 0.0015, + "loss": 2.5773, + "step": 1964 + }, + { + "epoch": 0.20727848101265822, + "grad_norm": 0.3775704801082611, + "learning_rate": 0.0015, + "loss": 2.5951, + "step": 1965 + }, + { + "epoch": 0.20738396624472574, + "grad_norm": 0.3976186513900757, + "learning_rate": 0.0015, + "loss": 2.5739, + "step": 1966 + }, + { + "epoch": 0.20748945147679324, + "grad_norm": 0.4299464523792267, + "learning_rate": 0.0015, + "loss": 2.6168, + "step": 1967 + }, + { + "epoch": 0.20759493670886076, + "grad_norm": 0.3546941578388214, + "learning_rate": 0.0015, + "loss": 2.5847, + "step": 1968 + }, + { + "epoch": 0.20770042194092828, + "grad_norm": 0.43423500657081604, + "learning_rate": 0.0015, + "loss": 2.5904, + "step": 1969 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.3818896412849426, + "learning_rate": 0.0015, + "loss": 2.5933, + "step": 1970 + }, + { + "epoch": 0.2079113924050633, + "grad_norm": 0.4535094201564789, + "learning_rate": 0.0015, + "loss": 2.5942, + "step": 1971 + }, + { + "epoch": 0.2080168776371308, + "grad_norm": 0.41401755809783936, + "learning_rate": 0.0015, + "loss": 2.5608, + "step": 1972 + }, + { + "epoch": 0.2081223628691983, + "grad_norm": 0.4796600639820099, + "learning_rate": 0.0015, + "loss": 2.6061, + "step": 1973 + }, + { + "epoch": 0.20822784810126582, + "grad_norm": 0.42547065019607544, + "learning_rate": 0.0015, + "loss": 2.5789, + "step": 1974 + }, + { + "epoch": 0.20833333333333334, + "grad_norm": 0.3967663049697876, + "learning_rate": 0.0015, + "loss": 2.5883, + "step": 1975 + }, + { + "epoch": 0.20843881856540084, + "grad_norm": 0.4374746084213257, + "learning_rate": 0.0015, + "loss": 2.5869, + "step": 1976 + }, + { + "epoch": 0.20854430379746836, + "grad_norm": 0.43292778730392456, + "learning_rate": 0.0015, + "loss": 2.588, + "step": 1977 + }, + { + "epoch": 0.20864978902953588, + "grad_norm": 0.3559218943119049, + "learning_rate": 0.0015, + "loss": 2.5839, + "step": 1978 + }, + { + "epoch": 0.20875527426160337, + "grad_norm": 0.4528844356536865, + "learning_rate": 0.0015, + "loss": 2.5726, + "step": 1979 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.3550187945365906, + "learning_rate": 0.0015, + "loss": 2.5855, + "step": 1980 + }, + { + "epoch": 0.2089662447257384, + "grad_norm": 0.3901500999927521, + "learning_rate": 0.0015, + "loss": 2.5696, + "step": 1981 + }, + { + "epoch": 0.2090717299578059, + "grad_norm": 0.46282249689102173, + "learning_rate": 0.0015, + "loss": 2.5475, + "step": 1982 + }, + { + "epoch": 0.20917721518987342, + "grad_norm": 0.4129039943218231, + "learning_rate": 0.0015, + "loss": 2.5794, + "step": 1983 + }, + { + "epoch": 0.20928270042194091, + "grad_norm": 0.37987250089645386, + "learning_rate": 0.0015, + "loss": 2.6066, + "step": 1984 + }, + { + "epoch": 0.20938818565400844, + "grad_norm": 0.4127688407897949, + "learning_rate": 0.0015, + "loss": 2.6076, + "step": 1985 + }, + { + "epoch": 0.20949367088607596, + "grad_norm": 0.4043574035167694, + "learning_rate": 0.0015, + "loss": 2.5706, + "step": 1986 + }, + { + "epoch": 0.20959915611814345, + "grad_norm": 0.4606841802597046, + "learning_rate": 0.0015, + "loss": 2.5727, + "step": 1987 + }, + { + "epoch": 0.20970464135021097, + "grad_norm": 0.44200852513313293, + "learning_rate": 0.0015, + "loss": 2.5932, + "step": 1988 + }, + { + "epoch": 0.2098101265822785, + "grad_norm": 0.3856538236141205, + "learning_rate": 0.0015, + "loss": 2.5948, + "step": 1989 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.4725104868412018, + "learning_rate": 0.0015, + "loss": 2.6187, + "step": 1990 + }, + { + "epoch": 0.2100210970464135, + "grad_norm": 0.44667285680770874, + "learning_rate": 0.0015, + "loss": 2.5901, + "step": 1991 + }, + { + "epoch": 0.21012658227848102, + "grad_norm": 0.4643024504184723, + "learning_rate": 0.0015, + "loss": 2.5847, + "step": 1992 + }, + { + "epoch": 0.21023206751054851, + "grad_norm": 0.5167530179023743, + "learning_rate": 0.0015, + "loss": 2.6043, + "step": 1993 + }, + { + "epoch": 0.21033755274261604, + "grad_norm": 0.4730245769023895, + "learning_rate": 0.0015, + "loss": 2.5552, + "step": 1994 + }, + { + "epoch": 0.21044303797468356, + "grad_norm": 0.4156949818134308, + "learning_rate": 0.0015, + "loss": 2.6153, + "step": 1995 + }, + { + "epoch": 0.21054852320675105, + "grad_norm": 0.4206639230251312, + "learning_rate": 0.0015, + "loss": 2.5597, + "step": 1996 + }, + { + "epoch": 0.21065400843881857, + "grad_norm": 0.40730899572372437, + "learning_rate": 0.0015, + "loss": 2.5647, + "step": 1997 + }, + { + "epoch": 0.2107594936708861, + "grad_norm": 0.3746362328529358, + "learning_rate": 0.0015, + "loss": 2.5523, + "step": 1998 + }, + { + "epoch": 0.21086497890295358, + "grad_norm": 0.37700769305229187, + "learning_rate": 0.0015, + "loss": 2.5587, + "step": 1999 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.3864564001560211, + "learning_rate": 0.0015, + "loss": 2.582, + "step": 2000 + }, + { + "epoch": 0.2110759493670886, + "grad_norm": 0.39877402782440186, + "learning_rate": 0.0015, + "loss": 2.61, + "step": 2001 + }, + { + "epoch": 0.21118143459915611, + "grad_norm": 0.39525729417800903, + "learning_rate": 0.0015, + "loss": 2.5818, + "step": 2002 + }, + { + "epoch": 0.21128691983122364, + "grad_norm": 0.39100736379623413, + "learning_rate": 0.0015, + "loss": 2.5396, + "step": 2003 + }, + { + "epoch": 0.21139240506329113, + "grad_norm": 0.3661050796508789, + "learning_rate": 0.0015, + "loss": 2.5826, + "step": 2004 + }, + { + "epoch": 0.21149789029535865, + "grad_norm": 0.3696499764919281, + "learning_rate": 0.0015, + "loss": 2.6098, + "step": 2005 + }, + { + "epoch": 0.21160337552742617, + "grad_norm": 0.39480412006378174, + "learning_rate": 0.0015, + "loss": 2.5971, + "step": 2006 + }, + { + "epoch": 0.21170886075949366, + "grad_norm": 0.47629988193511963, + "learning_rate": 0.0015, + "loss": 2.6157, + "step": 2007 + }, + { + "epoch": 0.21181434599156118, + "grad_norm": 0.41158491373062134, + "learning_rate": 0.0015, + "loss": 2.6072, + "step": 2008 + }, + { + "epoch": 0.2119198312236287, + "grad_norm": 0.4281735420227051, + "learning_rate": 0.0015, + "loss": 2.5667, + "step": 2009 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.4346429109573364, + "learning_rate": 0.0015, + "loss": 2.5701, + "step": 2010 + }, + { + "epoch": 0.21213080168776371, + "grad_norm": 0.3752724230289459, + "learning_rate": 0.0015, + "loss": 2.581, + "step": 2011 + }, + { + "epoch": 0.21223628691983124, + "grad_norm": 0.4398358464241028, + "learning_rate": 0.0015, + "loss": 2.5905, + "step": 2012 + }, + { + "epoch": 0.21234177215189873, + "grad_norm": 0.47354641556739807, + "learning_rate": 0.0015, + "loss": 2.6002, + "step": 2013 + }, + { + "epoch": 0.21244725738396625, + "grad_norm": 0.4315589666366577, + "learning_rate": 0.0015, + "loss": 2.5928, + "step": 2014 + }, + { + "epoch": 0.21255274261603377, + "grad_norm": 0.4485074579715729, + "learning_rate": 0.0015, + "loss": 2.5822, + "step": 2015 + }, + { + "epoch": 0.21265822784810126, + "grad_norm": 0.41797807812690735, + "learning_rate": 0.0015, + "loss": 2.5961, + "step": 2016 + }, + { + "epoch": 0.21276371308016878, + "grad_norm": 0.4421110451221466, + "learning_rate": 0.0015, + "loss": 2.5964, + "step": 2017 + }, + { + "epoch": 0.21286919831223627, + "grad_norm": 0.48968812823295593, + "learning_rate": 0.0015, + "loss": 2.5925, + "step": 2018 + }, + { + "epoch": 0.2129746835443038, + "grad_norm": 0.41556698083877563, + "learning_rate": 0.0015, + "loss": 2.5991, + "step": 2019 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.42174214124679565, + "learning_rate": 0.0015, + "loss": 2.5729, + "step": 2020 + }, + { + "epoch": 0.2131856540084388, + "grad_norm": 0.5064943432807922, + "learning_rate": 0.0015, + "loss": 2.5832, + "step": 2021 + }, + { + "epoch": 0.21329113924050633, + "grad_norm": 0.5232344269752502, + "learning_rate": 0.0015, + "loss": 2.5717, + "step": 2022 + }, + { + "epoch": 0.21339662447257385, + "grad_norm": 0.3616650700569153, + "learning_rate": 0.0015, + "loss": 2.5957, + "step": 2023 + }, + { + "epoch": 0.21350210970464134, + "grad_norm": 0.4624003767967224, + "learning_rate": 0.0015, + "loss": 2.617, + "step": 2024 + }, + { + "epoch": 0.21360759493670886, + "grad_norm": 0.42172515392303467, + "learning_rate": 0.0015, + "loss": 2.5458, + "step": 2025 + }, + { + "epoch": 0.21371308016877638, + "grad_norm": 0.45679497718811035, + "learning_rate": 0.0015, + "loss": 2.5618, + "step": 2026 + }, + { + "epoch": 0.21381856540084387, + "grad_norm": 0.4484020471572876, + "learning_rate": 0.0015, + "loss": 2.5795, + "step": 2027 + }, + { + "epoch": 0.2139240506329114, + "grad_norm": 0.45178529620170593, + "learning_rate": 0.0015, + "loss": 2.5997, + "step": 2028 + }, + { + "epoch": 0.21402953586497891, + "grad_norm": 0.4195375442504883, + "learning_rate": 0.0015, + "loss": 2.6018, + "step": 2029 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.42188143730163574, + "learning_rate": 0.0015, + "loss": 2.5742, + "step": 2030 + }, + { + "epoch": 0.21424050632911393, + "grad_norm": 0.39536044001579285, + "learning_rate": 0.0015, + "loss": 2.5809, + "step": 2031 + }, + { + "epoch": 0.21434599156118145, + "grad_norm": 0.39114049077033997, + "learning_rate": 0.0015, + "loss": 2.5601, + "step": 2032 + }, + { + "epoch": 0.21445147679324894, + "grad_norm": 0.37014633417129517, + "learning_rate": 0.0015, + "loss": 2.58, + "step": 2033 + }, + { + "epoch": 0.21455696202531646, + "grad_norm": 0.39694949984550476, + "learning_rate": 0.0015, + "loss": 2.5725, + "step": 2034 + }, + { + "epoch": 0.21466244725738395, + "grad_norm": 0.3949100077152252, + "learning_rate": 0.0015, + "loss": 2.5719, + "step": 2035 + }, + { + "epoch": 0.21476793248945147, + "grad_norm": 0.3549565076828003, + "learning_rate": 0.0015, + "loss": 2.5862, + "step": 2036 + }, + { + "epoch": 0.214873417721519, + "grad_norm": 0.41476717591285706, + "learning_rate": 0.0015, + "loss": 2.5755, + "step": 2037 + }, + { + "epoch": 0.2149789029535865, + "grad_norm": 0.41238805651664734, + "learning_rate": 0.0015, + "loss": 2.5667, + "step": 2038 + }, + { + "epoch": 0.215084388185654, + "grad_norm": 0.4483872354030609, + "learning_rate": 0.0015, + "loss": 2.5955, + "step": 2039 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.4063268005847931, + "learning_rate": 0.0015, + "loss": 2.5895, + "step": 2040 + }, + { + "epoch": 0.21529535864978902, + "grad_norm": 0.44289880990982056, + "learning_rate": 0.0015, + "loss": 2.5878, + "step": 2041 + }, + { + "epoch": 0.21540084388185654, + "grad_norm": 0.45221516489982605, + "learning_rate": 0.0015, + "loss": 2.5608, + "step": 2042 + }, + { + "epoch": 0.21550632911392406, + "grad_norm": 0.48345947265625, + "learning_rate": 0.0015, + "loss": 2.6036, + "step": 2043 + }, + { + "epoch": 0.21561181434599155, + "grad_norm": 0.4146663248538971, + "learning_rate": 0.0015, + "loss": 2.6095, + "step": 2044 + }, + { + "epoch": 0.21571729957805907, + "grad_norm": 0.42423126101493835, + "learning_rate": 0.0015, + "loss": 2.5701, + "step": 2045 + }, + { + "epoch": 0.2158227848101266, + "grad_norm": 0.48801735043525696, + "learning_rate": 0.0015, + "loss": 2.5772, + "step": 2046 + }, + { + "epoch": 0.2159282700421941, + "grad_norm": 0.5302500128746033, + "learning_rate": 0.0015, + "loss": 2.5529, + "step": 2047 + }, + { + "epoch": 0.2160337552742616, + "grad_norm": 0.3909532427787781, + "learning_rate": 0.0015, + "loss": 2.574, + "step": 2048 + }, + { + "epoch": 0.21613924050632913, + "grad_norm": 0.44731560349464417, + "learning_rate": 0.0015, + "loss": 2.5946, + "step": 2049 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.4968436658382416, + "learning_rate": 0.0015, + "loss": 2.5635, + "step": 2050 + }, + { + "epoch": 0.21635021097046414, + "grad_norm": 0.4080902636051178, + "learning_rate": 0.0015, + "loss": 2.5715, + "step": 2051 + }, + { + "epoch": 0.21645569620253163, + "grad_norm": 0.42851001024246216, + "learning_rate": 0.0015, + "loss": 2.5899, + "step": 2052 + }, + { + "epoch": 0.21656118143459915, + "grad_norm": 0.45848536491394043, + "learning_rate": 0.0015, + "loss": 2.5629, + "step": 2053 + }, + { + "epoch": 0.21666666666666667, + "grad_norm": 0.4482588469982147, + "learning_rate": 0.0015, + "loss": 2.572, + "step": 2054 + }, + { + "epoch": 0.21677215189873417, + "grad_norm": 0.3822450041770935, + "learning_rate": 0.0015, + "loss": 2.5652, + "step": 2055 + }, + { + "epoch": 0.2168776371308017, + "grad_norm": 0.43190038204193115, + "learning_rate": 0.0015, + "loss": 2.5758, + "step": 2056 + }, + { + "epoch": 0.2169831223628692, + "grad_norm": 0.39374032616615295, + "learning_rate": 0.0015, + "loss": 2.5679, + "step": 2057 + }, + { + "epoch": 0.2170886075949367, + "grad_norm": 0.4300993084907532, + "learning_rate": 0.0015, + "loss": 2.5412, + "step": 2058 + }, + { + "epoch": 0.21719409282700422, + "grad_norm": 0.47064125537872314, + "learning_rate": 0.0015, + "loss": 2.5408, + "step": 2059 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.43430107831954956, + "learning_rate": 0.0015, + "loss": 2.5495, + "step": 2060 + }, + { + "epoch": 0.21740506329113923, + "grad_norm": 0.5584786534309387, + "learning_rate": 0.0015, + "loss": 2.5437, + "step": 2061 + }, + { + "epoch": 0.21751054852320675, + "grad_norm": 0.5246803760528564, + "learning_rate": 0.0015, + "loss": 2.5957, + "step": 2062 + }, + { + "epoch": 0.21761603375527427, + "grad_norm": 0.41062089800834656, + "learning_rate": 0.0015, + "loss": 2.5545, + "step": 2063 + }, + { + "epoch": 0.21772151898734177, + "grad_norm": 0.488696813583374, + "learning_rate": 0.0015, + "loss": 2.5517, + "step": 2064 + }, + { + "epoch": 0.2178270042194093, + "grad_norm": 0.5345264673233032, + "learning_rate": 0.0015, + "loss": 2.5581, + "step": 2065 + }, + { + "epoch": 0.21793248945147678, + "grad_norm": 0.41732004284858704, + "learning_rate": 0.0015, + "loss": 2.5827, + "step": 2066 + }, + { + "epoch": 0.2180379746835443, + "grad_norm": 0.5045616030693054, + "learning_rate": 0.0015, + "loss": 2.5653, + "step": 2067 + }, + { + "epoch": 0.21814345991561182, + "grad_norm": 0.48464053869247437, + "learning_rate": 0.0015, + "loss": 2.5958, + "step": 2068 + }, + { + "epoch": 0.2182489451476793, + "grad_norm": 0.3878413736820221, + "learning_rate": 0.0015, + "loss": 2.5573, + "step": 2069 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.47974270582199097, + "learning_rate": 0.0015, + "loss": 2.5653, + "step": 2070 + }, + { + "epoch": 0.21845991561181435, + "grad_norm": 0.4800712764263153, + "learning_rate": 0.0015, + "loss": 2.5952, + "step": 2071 + }, + { + "epoch": 0.21856540084388185, + "grad_norm": 0.37429577112197876, + "learning_rate": 0.0015, + "loss": 2.5638, + "step": 2072 + }, + { + "epoch": 0.21867088607594937, + "grad_norm": 0.5214272141456604, + "learning_rate": 0.0015, + "loss": 2.5805, + "step": 2073 + }, + { + "epoch": 0.2187763713080169, + "grad_norm": 0.46908825635910034, + "learning_rate": 0.0015, + "loss": 2.5484, + "step": 2074 + }, + { + "epoch": 0.21888185654008438, + "grad_norm": 0.4827989637851715, + "learning_rate": 0.0015, + "loss": 2.582, + "step": 2075 + }, + { + "epoch": 0.2189873417721519, + "grad_norm": 0.5751751065254211, + "learning_rate": 0.0015, + "loss": 2.5505, + "step": 2076 + }, + { + "epoch": 0.21909282700421942, + "grad_norm": 0.49084389209747314, + "learning_rate": 0.0015, + "loss": 2.5738, + "step": 2077 + }, + { + "epoch": 0.2191983122362869, + "grad_norm": 0.3533143699169159, + "learning_rate": 0.0015, + "loss": 2.5663, + "step": 2078 + }, + { + "epoch": 0.21930379746835443, + "grad_norm": 0.40551501512527466, + "learning_rate": 0.0015, + "loss": 2.5769, + "step": 2079 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.36977726221084595, + "learning_rate": 0.0015, + "loss": 2.5968, + "step": 2080 + }, + { + "epoch": 0.21951476793248945, + "grad_norm": 0.4644162058830261, + "learning_rate": 0.0015, + "loss": 2.5759, + "step": 2081 + }, + { + "epoch": 0.21962025316455697, + "grad_norm": 0.47240740060806274, + "learning_rate": 0.0015, + "loss": 2.5631, + "step": 2082 + }, + { + "epoch": 0.21972573839662446, + "grad_norm": 0.40127697587013245, + "learning_rate": 0.0015, + "loss": 2.5427, + "step": 2083 + }, + { + "epoch": 0.21983122362869198, + "grad_norm": 0.37068140506744385, + "learning_rate": 0.0015, + "loss": 2.5718, + "step": 2084 + }, + { + "epoch": 0.2199367088607595, + "grad_norm": 0.35981467366218567, + "learning_rate": 0.0015, + "loss": 2.5643, + "step": 2085 + }, + { + "epoch": 0.220042194092827, + "grad_norm": 0.38794225454330444, + "learning_rate": 0.0015, + "loss": 2.5213, + "step": 2086 + }, + { + "epoch": 0.2201476793248945, + "grad_norm": 0.3861929178237915, + "learning_rate": 0.0015, + "loss": 2.605, + "step": 2087 + }, + { + "epoch": 0.22025316455696203, + "grad_norm": 0.3891788125038147, + "learning_rate": 0.0015, + "loss": 2.5374, + "step": 2088 + }, + { + "epoch": 0.22035864978902953, + "grad_norm": 0.49424684047698975, + "learning_rate": 0.0015, + "loss": 2.576, + "step": 2089 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.5416702628135681, + "learning_rate": 0.0015, + "loss": 2.5669, + "step": 2090 + }, + { + "epoch": 0.22056962025316457, + "grad_norm": 0.4282401204109192, + "learning_rate": 0.0015, + "loss": 2.5856, + "step": 2091 + }, + { + "epoch": 0.22067510548523206, + "grad_norm": 0.3825411796569824, + "learning_rate": 0.0015, + "loss": 2.5603, + "step": 2092 + }, + { + "epoch": 0.22078059071729958, + "grad_norm": 0.3893886208534241, + "learning_rate": 0.0015, + "loss": 2.5501, + "step": 2093 + }, + { + "epoch": 0.2208860759493671, + "grad_norm": 0.4478728771209717, + "learning_rate": 0.0015, + "loss": 2.5661, + "step": 2094 + }, + { + "epoch": 0.2209915611814346, + "grad_norm": 0.4365382194519043, + "learning_rate": 0.0015, + "loss": 2.5226, + "step": 2095 + }, + { + "epoch": 0.2210970464135021, + "grad_norm": 0.3527461588382721, + "learning_rate": 0.0015, + "loss": 2.5573, + "step": 2096 + }, + { + "epoch": 0.22120253164556963, + "grad_norm": 0.4105367064476013, + "learning_rate": 0.0015, + "loss": 2.5497, + "step": 2097 + }, + { + "epoch": 0.22130801687763713, + "grad_norm": 0.46730178594589233, + "learning_rate": 0.0015, + "loss": 2.5796, + "step": 2098 + }, + { + "epoch": 0.22141350210970465, + "grad_norm": 0.36951684951782227, + "learning_rate": 0.0015, + "loss": 2.6203, + "step": 2099 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.41266706585884094, + "learning_rate": 0.0015, + "loss": 2.5419, + "step": 2100 + }, + { + "epoch": 0.22162447257383966, + "grad_norm": 0.5672953724861145, + "learning_rate": 0.0015, + "loss": 2.5742, + "step": 2101 + }, + { + "epoch": 0.22172995780590718, + "grad_norm": 0.47271299362182617, + "learning_rate": 0.0015, + "loss": 2.5933, + "step": 2102 + }, + { + "epoch": 0.22183544303797467, + "grad_norm": 0.4156918525695801, + "learning_rate": 0.0015, + "loss": 2.5189, + "step": 2103 + }, + { + "epoch": 0.2219409282700422, + "grad_norm": 0.45790931582450867, + "learning_rate": 0.0015, + "loss": 2.5695, + "step": 2104 + }, + { + "epoch": 0.2220464135021097, + "grad_norm": 0.3820226192474365, + "learning_rate": 0.0015, + "loss": 2.5724, + "step": 2105 + }, + { + "epoch": 0.2221518987341772, + "grad_norm": 0.4665733575820923, + "learning_rate": 0.0015, + "loss": 2.5444, + "step": 2106 + }, + { + "epoch": 0.22225738396624473, + "grad_norm": 0.47079429030418396, + "learning_rate": 0.0015, + "loss": 2.5474, + "step": 2107 + }, + { + "epoch": 0.22236286919831225, + "grad_norm": 0.3766719400882721, + "learning_rate": 0.0015, + "loss": 2.5362, + "step": 2108 + }, + { + "epoch": 0.22246835443037974, + "grad_norm": 0.41901862621307373, + "learning_rate": 0.0015, + "loss": 2.5467, + "step": 2109 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.4736518859863281, + "learning_rate": 0.0015, + "loss": 2.56, + "step": 2110 + }, + { + "epoch": 0.22267932489451478, + "grad_norm": 0.42605161666870117, + "learning_rate": 0.0015, + "loss": 2.5292, + "step": 2111 + }, + { + "epoch": 0.22278481012658227, + "grad_norm": 0.4015689194202423, + "learning_rate": 0.0015, + "loss": 2.5556, + "step": 2112 + }, + { + "epoch": 0.2228902953586498, + "grad_norm": 0.40324053168296814, + "learning_rate": 0.0015, + "loss": 2.5766, + "step": 2113 + }, + { + "epoch": 0.2229957805907173, + "grad_norm": 0.48004233837127686, + "learning_rate": 0.0015, + "loss": 2.5513, + "step": 2114 + }, + { + "epoch": 0.2231012658227848, + "grad_norm": 0.39179298281669617, + "learning_rate": 0.0015, + "loss": 2.5588, + "step": 2115 + }, + { + "epoch": 0.22320675105485233, + "grad_norm": 0.4402623176574707, + "learning_rate": 0.0015, + "loss": 2.5372, + "step": 2116 + }, + { + "epoch": 0.22331223628691982, + "grad_norm": 0.45569366216659546, + "learning_rate": 0.0015, + "loss": 2.5608, + "step": 2117 + }, + { + "epoch": 0.22341772151898734, + "grad_norm": 0.385938823223114, + "learning_rate": 0.0015, + "loss": 2.5393, + "step": 2118 + }, + { + "epoch": 0.22352320675105486, + "grad_norm": 0.4196917712688446, + "learning_rate": 0.0015, + "loss": 2.5642, + "step": 2119 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.45284003019332886, + "learning_rate": 0.0015, + "loss": 2.542, + "step": 2120 + }, + { + "epoch": 0.22373417721518987, + "grad_norm": 0.48221102356910706, + "learning_rate": 0.0015, + "loss": 2.5645, + "step": 2121 + }, + { + "epoch": 0.2238396624472574, + "grad_norm": 0.5328884124755859, + "learning_rate": 0.0015, + "loss": 2.5361, + "step": 2122 + }, + { + "epoch": 0.22394514767932489, + "grad_norm": 0.3711521029472351, + "learning_rate": 0.0015, + "loss": 2.5628, + "step": 2123 + }, + { + "epoch": 0.2240506329113924, + "grad_norm": 0.5117353796958923, + "learning_rate": 0.0015, + "loss": 2.5378, + "step": 2124 + }, + { + "epoch": 0.22415611814345993, + "grad_norm": 0.6003317832946777, + "learning_rate": 0.0015, + "loss": 2.5525, + "step": 2125 + }, + { + "epoch": 0.22426160337552742, + "grad_norm": 0.5133798718452454, + "learning_rate": 0.0015, + "loss": 2.5619, + "step": 2126 + }, + { + "epoch": 0.22436708860759494, + "grad_norm": 0.4689140319824219, + "learning_rate": 0.0015, + "loss": 2.5792, + "step": 2127 + }, + { + "epoch": 0.22447257383966246, + "grad_norm": 0.40861329436302185, + "learning_rate": 0.0015, + "loss": 2.5466, + "step": 2128 + }, + { + "epoch": 0.22457805907172995, + "grad_norm": 0.4845236539840698, + "learning_rate": 0.0015, + "loss": 2.5688, + "step": 2129 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.5673550963401794, + "learning_rate": 0.0015, + "loss": 2.5687, + "step": 2130 + }, + { + "epoch": 0.224789029535865, + "grad_norm": 0.3671455383300781, + "learning_rate": 0.0015, + "loss": 2.5231, + "step": 2131 + }, + { + "epoch": 0.22489451476793249, + "grad_norm": 0.523500382900238, + "learning_rate": 0.0015, + "loss": 2.514, + "step": 2132 + }, + { + "epoch": 0.225, + "grad_norm": 0.46512094140052795, + "learning_rate": 0.0015, + "loss": 2.5133, + "step": 2133 + }, + { + "epoch": 0.2251054852320675, + "grad_norm": 0.4700636565685272, + "learning_rate": 0.0015, + "loss": 2.5682, + "step": 2134 + }, + { + "epoch": 0.22521097046413502, + "grad_norm": 0.37558072805404663, + "learning_rate": 0.0015, + "loss": 2.5462, + "step": 2135 + }, + { + "epoch": 0.22531645569620254, + "grad_norm": 0.433097243309021, + "learning_rate": 0.0015, + "loss": 2.5769, + "step": 2136 + }, + { + "epoch": 0.22542194092827003, + "grad_norm": 0.38083508610725403, + "learning_rate": 0.0015, + "loss": 2.5376, + "step": 2137 + }, + { + "epoch": 0.22552742616033755, + "grad_norm": 0.42277997732162476, + "learning_rate": 0.0015, + "loss": 2.5455, + "step": 2138 + }, + { + "epoch": 0.22563291139240507, + "grad_norm": 0.4890255331993103, + "learning_rate": 0.0015, + "loss": 2.5883, + "step": 2139 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.42207977175712585, + "learning_rate": 0.0015, + "loss": 2.5595, + "step": 2140 + }, + { + "epoch": 0.22584388185654009, + "grad_norm": 0.44935256242752075, + "learning_rate": 0.0015, + "loss": 2.5255, + "step": 2141 + }, + { + "epoch": 0.2259493670886076, + "grad_norm": 0.48910030722618103, + "learning_rate": 0.0015, + "loss": 2.5206, + "step": 2142 + }, + { + "epoch": 0.2260548523206751, + "grad_norm": 0.4770548939704895, + "learning_rate": 0.0015, + "loss": 2.5302, + "step": 2143 + }, + { + "epoch": 0.22616033755274262, + "grad_norm": 0.4247133433818817, + "learning_rate": 0.0015, + "loss": 2.5684, + "step": 2144 + }, + { + "epoch": 0.22626582278481014, + "grad_norm": 0.48735299706459045, + "learning_rate": 0.0015, + "loss": 2.5727, + "step": 2145 + }, + { + "epoch": 0.22637130801687763, + "grad_norm": 0.44292375445365906, + "learning_rate": 0.0015, + "loss": 2.5407, + "step": 2146 + }, + { + "epoch": 0.22647679324894515, + "grad_norm": 0.47562581300735474, + "learning_rate": 0.0015, + "loss": 2.5719, + "step": 2147 + }, + { + "epoch": 0.22658227848101264, + "grad_norm": 0.48726651072502136, + "learning_rate": 0.0015, + "loss": 2.5923, + "step": 2148 + }, + { + "epoch": 0.22668776371308016, + "grad_norm": 0.3614077568054199, + "learning_rate": 0.0015, + "loss": 2.5402, + "step": 2149 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.47714948654174805, + "learning_rate": 0.0015, + "loss": 2.5276, + "step": 2150 + }, + { + "epoch": 0.22689873417721518, + "grad_norm": 0.39746999740600586, + "learning_rate": 0.0015, + "loss": 2.5925, + "step": 2151 + }, + { + "epoch": 0.2270042194092827, + "grad_norm": 0.4026148021221161, + "learning_rate": 0.0015, + "loss": 2.6063, + "step": 2152 + }, + { + "epoch": 0.22710970464135022, + "grad_norm": 0.3808761537075043, + "learning_rate": 0.0015, + "loss": 2.5273, + "step": 2153 + }, + { + "epoch": 0.2272151898734177, + "grad_norm": 0.4074101746082306, + "learning_rate": 0.0015, + "loss": 2.5712, + "step": 2154 + }, + { + "epoch": 0.22732067510548523, + "grad_norm": 0.43758124113082886, + "learning_rate": 0.0015, + "loss": 2.5697, + "step": 2155 + }, + { + "epoch": 0.22742616033755275, + "grad_norm": 0.38032859563827515, + "learning_rate": 0.0015, + "loss": 2.5226, + "step": 2156 + }, + { + "epoch": 0.22753164556962024, + "grad_norm": 0.48575323820114136, + "learning_rate": 0.0015, + "loss": 2.548, + "step": 2157 + }, + { + "epoch": 0.22763713080168776, + "grad_norm": 0.5355602502822876, + "learning_rate": 0.0015, + "loss": 2.5293, + "step": 2158 + }, + { + "epoch": 0.22774261603375529, + "grad_norm": 0.42533162236213684, + "learning_rate": 0.0015, + "loss": 2.5887, + "step": 2159 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 0.5095113515853882, + "learning_rate": 0.0015, + "loss": 2.5662, + "step": 2160 + }, + { + "epoch": 0.2279535864978903, + "grad_norm": 0.41199827194213867, + "learning_rate": 0.0015, + "loss": 2.5291, + "step": 2161 + }, + { + "epoch": 0.22805907172995782, + "grad_norm": 0.45272931456565857, + "learning_rate": 0.0015, + "loss": 2.5298, + "step": 2162 + }, + { + "epoch": 0.2281645569620253, + "grad_norm": 0.46712347865104675, + "learning_rate": 0.0015, + "loss": 2.5288, + "step": 2163 + }, + { + "epoch": 0.22827004219409283, + "grad_norm": 0.5288627743721008, + "learning_rate": 0.0015, + "loss": 2.5369, + "step": 2164 + }, + { + "epoch": 0.22837552742616032, + "grad_norm": 0.43102461099624634, + "learning_rate": 0.0015, + "loss": 2.5189, + "step": 2165 + }, + { + "epoch": 0.22848101265822784, + "grad_norm": 0.4397103190422058, + "learning_rate": 0.0015, + "loss": 2.5719, + "step": 2166 + }, + { + "epoch": 0.22858649789029536, + "grad_norm": 0.4748649597167969, + "learning_rate": 0.0015, + "loss": 2.5711, + "step": 2167 + }, + { + "epoch": 0.22869198312236286, + "grad_norm": 0.3559859097003937, + "learning_rate": 0.0015, + "loss": 2.5562, + "step": 2168 + }, + { + "epoch": 0.22879746835443038, + "grad_norm": 0.4675925672054291, + "learning_rate": 0.0015, + "loss": 2.5344, + "step": 2169 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.511146068572998, + "learning_rate": 0.0015, + "loss": 2.5614, + "step": 2170 + }, + { + "epoch": 0.2290084388185654, + "grad_norm": 0.40332338213920593, + "learning_rate": 0.0015, + "loss": 2.5231, + "step": 2171 + }, + { + "epoch": 0.2291139240506329, + "grad_norm": 0.5226577520370483, + "learning_rate": 0.0015, + "loss": 2.5334, + "step": 2172 + }, + { + "epoch": 0.22921940928270043, + "grad_norm": 0.5002861618995667, + "learning_rate": 0.0015, + "loss": 2.5224, + "step": 2173 + }, + { + "epoch": 0.22932489451476792, + "grad_norm": 0.4230425953865051, + "learning_rate": 0.0015, + "loss": 2.5334, + "step": 2174 + }, + { + "epoch": 0.22943037974683544, + "grad_norm": 0.6185898780822754, + "learning_rate": 0.0015, + "loss": 2.5313, + "step": 2175 + }, + { + "epoch": 0.22953586497890296, + "grad_norm": 0.5030134916305542, + "learning_rate": 0.0015, + "loss": 2.5463, + "step": 2176 + }, + { + "epoch": 0.22964135021097046, + "grad_norm": 0.5691347122192383, + "learning_rate": 0.0015, + "loss": 2.5427, + "step": 2177 + }, + { + "epoch": 0.22974683544303798, + "grad_norm": 0.6596776843070984, + "learning_rate": 0.0015, + "loss": 2.5503, + "step": 2178 + }, + { + "epoch": 0.2298523206751055, + "grad_norm": 0.49750030040740967, + "learning_rate": 0.0015, + "loss": 2.5651, + "step": 2179 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.7051473259925842, + "learning_rate": 0.0015, + "loss": 2.5357, + "step": 2180 + }, + { + "epoch": 0.2300632911392405, + "grad_norm": 0.6256813406944275, + "learning_rate": 0.0015, + "loss": 2.5754, + "step": 2181 + }, + { + "epoch": 0.230168776371308, + "grad_norm": 0.5641756057739258, + "learning_rate": 0.0015, + "loss": 2.5675, + "step": 2182 + }, + { + "epoch": 0.23027426160337552, + "grad_norm": 0.7070826292037964, + "learning_rate": 0.0015, + "loss": 2.5651, + "step": 2183 + }, + { + "epoch": 0.23037974683544304, + "grad_norm": 0.8000948429107666, + "learning_rate": 0.0015, + "loss": 2.5651, + "step": 2184 + }, + { + "epoch": 0.23048523206751054, + "grad_norm": 0.5387428402900696, + "learning_rate": 0.0015, + "loss": 2.5496, + "step": 2185 + }, + { + "epoch": 0.23059071729957806, + "grad_norm": 0.6052926778793335, + "learning_rate": 0.0015, + "loss": 2.5621, + "step": 2186 + }, + { + "epoch": 0.23069620253164558, + "grad_norm": 0.6352716088294983, + "learning_rate": 0.0015, + "loss": 2.5494, + "step": 2187 + }, + { + "epoch": 0.23080168776371307, + "grad_norm": 0.5052357912063599, + "learning_rate": 0.0015, + "loss": 2.556, + "step": 2188 + }, + { + "epoch": 0.2309071729957806, + "grad_norm": 0.5400410294532776, + "learning_rate": 0.0015, + "loss": 2.5332, + "step": 2189 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.44049903750419617, + "learning_rate": 0.0015, + "loss": 2.5643, + "step": 2190 + }, + { + "epoch": 0.2311181434599156, + "grad_norm": 0.43918684124946594, + "learning_rate": 0.0015, + "loss": 2.55, + "step": 2191 + }, + { + "epoch": 0.23122362869198312, + "grad_norm": 0.46745994687080383, + "learning_rate": 0.0015, + "loss": 2.5345, + "step": 2192 + }, + { + "epoch": 0.23132911392405064, + "grad_norm": 0.38615384697914124, + "learning_rate": 0.0015, + "loss": 2.5874, + "step": 2193 + }, + { + "epoch": 0.23143459915611814, + "grad_norm": 0.4963182806968689, + "learning_rate": 0.0015, + "loss": 2.5745, + "step": 2194 + }, + { + "epoch": 0.23154008438818566, + "grad_norm": 0.3908597528934479, + "learning_rate": 0.0015, + "loss": 2.5433, + "step": 2195 + }, + { + "epoch": 0.23164556962025318, + "grad_norm": 0.47162026166915894, + "learning_rate": 0.0015, + "loss": 2.5184, + "step": 2196 + }, + { + "epoch": 0.23175105485232067, + "grad_norm": 0.4046810269355774, + "learning_rate": 0.0015, + "loss": 2.5385, + "step": 2197 + }, + { + "epoch": 0.2318565400843882, + "grad_norm": 0.5121220350265503, + "learning_rate": 0.0015, + "loss": 2.512, + "step": 2198 + }, + { + "epoch": 0.23196202531645568, + "grad_norm": 0.5585511922836304, + "learning_rate": 0.0015, + "loss": 2.5276, + "step": 2199 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.4000113308429718, + "learning_rate": 0.0015, + "loss": 2.5739, + "step": 2200 + }, + { + "epoch": 0.23217299578059072, + "grad_norm": 0.5217675566673279, + "learning_rate": 0.0015, + "loss": 2.5445, + "step": 2201 + }, + { + "epoch": 0.23227848101265822, + "grad_norm": 0.4602759778499603, + "learning_rate": 0.0015, + "loss": 2.5282, + "step": 2202 + }, + { + "epoch": 0.23238396624472574, + "grad_norm": 0.4055638313293457, + "learning_rate": 0.0015, + "loss": 2.5218, + "step": 2203 + }, + { + "epoch": 0.23248945147679326, + "grad_norm": 0.46049341559410095, + "learning_rate": 0.0015, + "loss": 2.5336, + "step": 2204 + }, + { + "epoch": 0.23259493670886075, + "grad_norm": 0.4094389081001282, + "learning_rate": 0.0015, + "loss": 2.4806, + "step": 2205 + }, + { + "epoch": 0.23270042194092827, + "grad_norm": 0.4343118667602539, + "learning_rate": 0.0015, + "loss": 2.5798, + "step": 2206 + }, + { + "epoch": 0.2328059071729958, + "grad_norm": 0.42354804277420044, + "learning_rate": 0.0015, + "loss": 2.5193, + "step": 2207 + }, + { + "epoch": 0.23291139240506328, + "grad_norm": 0.4063993990421295, + "learning_rate": 0.0015, + "loss": 2.5206, + "step": 2208 + }, + { + "epoch": 0.2330168776371308, + "grad_norm": 0.43064072728157043, + "learning_rate": 0.0015, + "loss": 2.5164, + "step": 2209 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.37034761905670166, + "learning_rate": 0.0015, + "loss": 2.5622, + "step": 2210 + }, + { + "epoch": 0.23322784810126582, + "grad_norm": 0.45121946930885315, + "learning_rate": 0.0015, + "loss": 2.5231, + "step": 2211 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 0.4374580979347229, + "learning_rate": 0.0015, + "loss": 2.5441, + "step": 2212 + }, + { + "epoch": 0.23343881856540086, + "grad_norm": 0.413290411233902, + "learning_rate": 0.0015, + "loss": 2.5128, + "step": 2213 + }, + { + "epoch": 0.23354430379746835, + "grad_norm": 0.47446879744529724, + "learning_rate": 0.0015, + "loss": 2.5183, + "step": 2214 + }, + { + "epoch": 0.23364978902953587, + "grad_norm": 0.38226792216300964, + "learning_rate": 0.0015, + "loss": 2.5705, + "step": 2215 + }, + { + "epoch": 0.23375527426160336, + "grad_norm": 0.41188785433769226, + "learning_rate": 0.0015, + "loss": 2.5442, + "step": 2216 + }, + { + "epoch": 0.23386075949367088, + "grad_norm": 0.35999953746795654, + "learning_rate": 0.0015, + "loss": 2.5378, + "step": 2217 + }, + { + "epoch": 0.2339662447257384, + "grad_norm": 0.39946216344833374, + "learning_rate": 0.0015, + "loss": 2.5264, + "step": 2218 + }, + { + "epoch": 0.2340717299578059, + "grad_norm": 0.37992748618125916, + "learning_rate": 0.0015, + "loss": 2.5363, + "step": 2219 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.3665207326412201, + "learning_rate": 0.0015, + "loss": 2.542, + "step": 2220 + }, + { + "epoch": 0.23428270042194094, + "grad_norm": 0.35841625928878784, + "learning_rate": 0.0015, + "loss": 2.5226, + "step": 2221 + }, + { + "epoch": 0.23438818565400843, + "grad_norm": 0.42835691571235657, + "learning_rate": 0.0015, + "loss": 2.487, + "step": 2222 + }, + { + "epoch": 0.23449367088607595, + "grad_norm": 0.41972318291664124, + "learning_rate": 0.0015, + "loss": 2.5337, + "step": 2223 + }, + { + "epoch": 0.23459915611814347, + "grad_norm": 0.4241740107536316, + "learning_rate": 0.0015, + "loss": 2.5399, + "step": 2224 + }, + { + "epoch": 0.23470464135021096, + "grad_norm": 0.4100624918937683, + "learning_rate": 0.0015, + "loss": 2.5168, + "step": 2225 + }, + { + "epoch": 0.23481012658227848, + "grad_norm": 0.40058135986328125, + "learning_rate": 0.0015, + "loss": 2.5413, + "step": 2226 + }, + { + "epoch": 0.234915611814346, + "grad_norm": 0.4589844346046448, + "learning_rate": 0.0015, + "loss": 2.5185, + "step": 2227 + }, + { + "epoch": 0.2350210970464135, + "grad_norm": 0.39440929889678955, + "learning_rate": 0.0015, + "loss": 2.5305, + "step": 2228 + }, + { + "epoch": 0.23512658227848102, + "grad_norm": 0.38786476850509644, + "learning_rate": 0.0015, + "loss": 2.5466, + "step": 2229 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.44065630435943604, + "learning_rate": 0.0015, + "loss": 2.4997, + "step": 2230 + }, + { + "epoch": 0.23533755274261603, + "grad_norm": 0.39689183235168457, + "learning_rate": 0.0015, + "loss": 2.5212, + "step": 2231 + }, + { + "epoch": 0.23544303797468355, + "grad_norm": 0.4081087112426758, + "learning_rate": 0.0015, + "loss": 2.5398, + "step": 2232 + }, + { + "epoch": 0.23554852320675104, + "grad_norm": 0.43143436312675476, + "learning_rate": 0.0015, + "loss": 2.5349, + "step": 2233 + }, + { + "epoch": 0.23565400843881856, + "grad_norm": 0.44519278407096863, + "learning_rate": 0.0015, + "loss": 2.5219, + "step": 2234 + }, + { + "epoch": 0.23575949367088608, + "grad_norm": 0.54099041223526, + "learning_rate": 0.0015, + "loss": 2.5062, + "step": 2235 + }, + { + "epoch": 0.23586497890295358, + "grad_norm": 0.4051193594932556, + "learning_rate": 0.0015, + "loss": 2.5063, + "step": 2236 + }, + { + "epoch": 0.2359704641350211, + "grad_norm": 0.4313126504421234, + "learning_rate": 0.0015, + "loss": 2.4917, + "step": 2237 + }, + { + "epoch": 0.23607594936708862, + "grad_norm": 0.49297720193862915, + "learning_rate": 0.0015, + "loss": 2.5691, + "step": 2238 + }, + { + "epoch": 0.2361814345991561, + "grad_norm": 0.49202534556388855, + "learning_rate": 0.0015, + "loss": 2.5332, + "step": 2239 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.46540993452072144, + "learning_rate": 0.0015, + "loss": 2.5139, + "step": 2240 + }, + { + "epoch": 0.23639240506329115, + "grad_norm": 0.4612337350845337, + "learning_rate": 0.0015, + "loss": 2.5259, + "step": 2241 + }, + { + "epoch": 0.23649789029535864, + "grad_norm": 0.4359237253665924, + "learning_rate": 0.0015, + "loss": 2.5028, + "step": 2242 + }, + { + "epoch": 0.23660337552742616, + "grad_norm": 0.48532572388648987, + "learning_rate": 0.0015, + "loss": 2.5107, + "step": 2243 + }, + { + "epoch": 0.23670886075949368, + "grad_norm": 0.4884185791015625, + "learning_rate": 0.0015, + "loss": 2.5321, + "step": 2244 + }, + { + "epoch": 0.23681434599156118, + "grad_norm": 0.45878520607948303, + "learning_rate": 0.0015, + "loss": 2.5238, + "step": 2245 + }, + { + "epoch": 0.2369198312236287, + "grad_norm": 0.5449734926223755, + "learning_rate": 0.0015, + "loss": 2.5646, + "step": 2246 + }, + { + "epoch": 0.2370253164556962, + "grad_norm": 0.5645663142204285, + "learning_rate": 0.0015, + "loss": 2.509, + "step": 2247 + }, + { + "epoch": 0.2371308016877637, + "grad_norm": 0.380401074886322, + "learning_rate": 0.0015, + "loss": 2.5308, + "step": 2248 + }, + { + "epoch": 0.23723628691983123, + "grad_norm": 0.5746028423309326, + "learning_rate": 0.0015, + "loss": 2.5557, + "step": 2249 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.5399616956710815, + "learning_rate": 0.0015, + "loss": 2.5095, + "step": 2250 + }, + { + "epoch": 0.23744725738396624, + "grad_norm": 0.4502820372581482, + "learning_rate": 0.0015, + "loss": 2.5075, + "step": 2251 + }, + { + "epoch": 0.23755274261603376, + "grad_norm": 0.5821545720100403, + "learning_rate": 0.0015, + "loss": 2.5232, + "step": 2252 + }, + { + "epoch": 0.23765822784810126, + "grad_norm": 0.5515171885490417, + "learning_rate": 0.0015, + "loss": 2.5318, + "step": 2253 + }, + { + "epoch": 0.23776371308016878, + "grad_norm": 0.47724348306655884, + "learning_rate": 0.0015, + "loss": 2.4983, + "step": 2254 + }, + { + "epoch": 0.2378691983122363, + "grad_norm": 0.4754248559474945, + "learning_rate": 0.0015, + "loss": 2.506, + "step": 2255 + }, + { + "epoch": 0.2379746835443038, + "grad_norm": 0.5328887104988098, + "learning_rate": 0.0015, + "loss": 2.5269, + "step": 2256 + }, + { + "epoch": 0.2380801687763713, + "grad_norm": 0.4551731050014496, + "learning_rate": 0.0015, + "loss": 2.4926, + "step": 2257 + }, + { + "epoch": 0.23818565400843883, + "grad_norm": 0.45196548104286194, + "learning_rate": 0.0015, + "loss": 2.5229, + "step": 2258 + }, + { + "epoch": 0.23829113924050632, + "grad_norm": 0.4988562762737274, + "learning_rate": 0.0015, + "loss": 2.5555, + "step": 2259 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.42193326354026794, + "learning_rate": 0.0015, + "loss": 2.5454, + "step": 2260 + }, + { + "epoch": 0.23850210970464136, + "grad_norm": 0.4158322215080261, + "learning_rate": 0.0015, + "loss": 2.526, + "step": 2261 + }, + { + "epoch": 0.23860759493670886, + "grad_norm": 0.4743303656578064, + "learning_rate": 0.0015, + "loss": 2.4936, + "step": 2262 + }, + { + "epoch": 0.23871308016877638, + "grad_norm": 0.38717207312583923, + "learning_rate": 0.0015, + "loss": 2.5247, + "step": 2263 + }, + { + "epoch": 0.23881856540084387, + "grad_norm": 0.4337318539619446, + "learning_rate": 0.0015, + "loss": 2.5613, + "step": 2264 + }, + { + "epoch": 0.2389240506329114, + "grad_norm": 0.4994328022003174, + "learning_rate": 0.0015, + "loss": 2.5514, + "step": 2265 + }, + { + "epoch": 0.2390295358649789, + "grad_norm": 0.3936751186847687, + "learning_rate": 0.0015, + "loss": 2.5293, + "step": 2266 + }, + { + "epoch": 0.2391350210970464, + "grad_norm": 0.4827611744403839, + "learning_rate": 0.0015, + "loss": 2.5603, + "step": 2267 + }, + { + "epoch": 0.23924050632911392, + "grad_norm": 0.4766075313091278, + "learning_rate": 0.0015, + "loss": 2.5347, + "step": 2268 + }, + { + "epoch": 0.23934599156118144, + "grad_norm": 0.39400529861450195, + "learning_rate": 0.0015, + "loss": 2.5098, + "step": 2269 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.4866008460521698, + "learning_rate": 0.0015, + "loss": 2.5343, + "step": 2270 + }, + { + "epoch": 0.23955696202531646, + "grad_norm": 0.4162564277648926, + "learning_rate": 0.0015, + "loss": 2.5123, + "step": 2271 + }, + { + "epoch": 0.23966244725738398, + "grad_norm": 0.4027601182460785, + "learning_rate": 0.0015, + "loss": 2.4935, + "step": 2272 + }, + { + "epoch": 0.23976793248945147, + "grad_norm": 0.4716659188270569, + "learning_rate": 0.0015, + "loss": 2.5141, + "step": 2273 + }, + { + "epoch": 0.239873417721519, + "grad_norm": 0.42601415514945984, + "learning_rate": 0.0015, + "loss": 2.533, + "step": 2274 + }, + { + "epoch": 0.2399789029535865, + "grad_norm": 0.4054941236972809, + "learning_rate": 0.0015, + "loss": 2.5042, + "step": 2275 + }, + { + "epoch": 0.240084388185654, + "grad_norm": 0.5366954803466797, + "learning_rate": 0.0015, + "loss": 2.5286, + "step": 2276 + }, + { + "epoch": 0.24018987341772152, + "grad_norm": 0.5468725562095642, + "learning_rate": 0.0015, + "loss": 2.5268, + "step": 2277 + }, + { + "epoch": 0.24029535864978904, + "grad_norm": 0.37918031215667725, + "learning_rate": 0.0015, + "loss": 2.522, + "step": 2278 + }, + { + "epoch": 0.24040084388185654, + "grad_norm": 0.4943424463272095, + "learning_rate": 0.0015, + "loss": 2.5414, + "step": 2279 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.4957751929759979, + "learning_rate": 0.0015, + "loss": 2.5317, + "step": 2280 + }, + { + "epoch": 0.24061181434599155, + "grad_norm": 0.4346116781234741, + "learning_rate": 0.0015, + "loss": 2.5106, + "step": 2281 + }, + { + "epoch": 0.24071729957805907, + "grad_norm": 0.472502201795578, + "learning_rate": 0.0015, + "loss": 2.4999, + "step": 2282 + }, + { + "epoch": 0.2408227848101266, + "grad_norm": 0.5759377479553223, + "learning_rate": 0.0015, + "loss": 2.5089, + "step": 2283 + }, + { + "epoch": 0.24092827004219408, + "grad_norm": 0.41717249155044556, + "learning_rate": 0.0015, + "loss": 2.4802, + "step": 2284 + }, + { + "epoch": 0.2410337552742616, + "grad_norm": 0.5134925246238708, + "learning_rate": 0.0015, + "loss": 2.5229, + "step": 2285 + }, + { + "epoch": 0.24113924050632912, + "grad_norm": 0.4806291162967682, + "learning_rate": 0.0015, + "loss": 2.5501, + "step": 2286 + }, + { + "epoch": 0.24124472573839661, + "grad_norm": 0.4252064824104309, + "learning_rate": 0.0015, + "loss": 2.5221, + "step": 2287 + }, + { + "epoch": 0.24135021097046414, + "grad_norm": 0.5157855153083801, + "learning_rate": 0.0015, + "loss": 2.5132, + "step": 2288 + }, + { + "epoch": 0.24145569620253166, + "grad_norm": 0.4990512728691101, + "learning_rate": 0.0015, + "loss": 2.5078, + "step": 2289 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.4440288543701172, + "learning_rate": 0.0015, + "loss": 2.54, + "step": 2290 + }, + { + "epoch": 0.24166666666666667, + "grad_norm": 0.3885575234889984, + "learning_rate": 0.0015, + "loss": 2.5053, + "step": 2291 + }, + { + "epoch": 0.2417721518987342, + "grad_norm": 0.5008754730224609, + "learning_rate": 0.0015, + "loss": 2.4802, + "step": 2292 + }, + { + "epoch": 0.24187763713080168, + "grad_norm": 0.40832144021987915, + "learning_rate": 0.0015, + "loss": 2.5032, + "step": 2293 + }, + { + "epoch": 0.2419831223628692, + "grad_norm": 0.4844641089439392, + "learning_rate": 0.0015, + "loss": 2.5275, + "step": 2294 + }, + { + "epoch": 0.24208860759493672, + "grad_norm": 0.46654337644577026, + "learning_rate": 0.0015, + "loss": 2.5056, + "step": 2295 + }, + { + "epoch": 0.24219409282700421, + "grad_norm": 0.39009085297584534, + "learning_rate": 0.0015, + "loss": 2.5034, + "step": 2296 + }, + { + "epoch": 0.24229957805907174, + "grad_norm": 0.45757678151130676, + "learning_rate": 0.0015, + "loss": 2.5627, + "step": 2297 + }, + { + "epoch": 0.24240506329113923, + "grad_norm": 0.5127370357513428, + "learning_rate": 0.0015, + "loss": 2.4913, + "step": 2298 + }, + { + "epoch": 0.24251054852320675, + "grad_norm": 0.38129231333732605, + "learning_rate": 0.0015, + "loss": 2.513, + "step": 2299 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.4601955711841583, + "learning_rate": 0.0015, + "loss": 2.473, + "step": 2300 + }, + { + "epoch": 0.24272151898734176, + "grad_norm": 0.4805368185043335, + "learning_rate": 0.0015, + "loss": 2.5357, + "step": 2301 + }, + { + "epoch": 0.24282700421940928, + "grad_norm": 0.404734343290329, + "learning_rate": 0.0015, + "loss": 2.4983, + "step": 2302 + }, + { + "epoch": 0.2429324894514768, + "grad_norm": 0.5793467164039612, + "learning_rate": 0.0015, + "loss": 2.5379, + "step": 2303 + }, + { + "epoch": 0.2430379746835443, + "grad_norm": 0.7046868801116943, + "learning_rate": 0.0015, + "loss": 2.526, + "step": 2304 + }, + { + "epoch": 0.24314345991561181, + "grad_norm": 0.44611427187919617, + "learning_rate": 0.0015, + "loss": 2.4779, + "step": 2305 + }, + { + "epoch": 0.24324894514767934, + "grad_norm": 0.6458744406700134, + "learning_rate": 0.0015, + "loss": 2.5391, + "step": 2306 + }, + { + "epoch": 0.24335443037974683, + "grad_norm": 0.6441597938537598, + "learning_rate": 0.0015, + "loss": 2.5037, + "step": 2307 + }, + { + "epoch": 0.24345991561181435, + "grad_norm": 0.4945175051689148, + "learning_rate": 0.0015, + "loss": 2.4913, + "step": 2308 + }, + { + "epoch": 0.24356540084388187, + "grad_norm": 0.8494849801063538, + "learning_rate": 0.0015, + "loss": 2.5529, + "step": 2309 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.5763487219810486, + "learning_rate": 0.0015, + "loss": 2.5441, + "step": 2310 + }, + { + "epoch": 0.24377637130801688, + "grad_norm": 0.5762405395507812, + "learning_rate": 0.0015, + "loss": 2.502, + "step": 2311 + }, + { + "epoch": 0.2438818565400844, + "grad_norm": 0.5729879140853882, + "learning_rate": 0.0015, + "loss": 2.5408, + "step": 2312 + }, + { + "epoch": 0.2439873417721519, + "grad_norm": 0.4753135144710541, + "learning_rate": 0.0015, + "loss": 2.5569, + "step": 2313 + }, + { + "epoch": 0.24409282700421941, + "grad_norm": 0.5614451766014099, + "learning_rate": 0.0015, + "loss": 2.5117, + "step": 2314 + }, + { + "epoch": 0.2441983122362869, + "grad_norm": 0.4939771294593811, + "learning_rate": 0.0015, + "loss": 2.5249, + "step": 2315 + }, + { + "epoch": 0.24430379746835443, + "grad_norm": 0.7476122975349426, + "learning_rate": 0.0015, + "loss": 2.5487, + "step": 2316 + }, + { + "epoch": 0.24440928270042195, + "grad_norm": 0.44980067014694214, + "learning_rate": 0.0015, + "loss": 2.4929, + "step": 2317 + }, + { + "epoch": 0.24451476793248944, + "grad_norm": 0.587297797203064, + "learning_rate": 0.0015, + "loss": 2.4995, + "step": 2318 + }, + { + "epoch": 0.24462025316455696, + "grad_norm": 0.5404728651046753, + "learning_rate": 0.0015, + "loss": 2.5451, + "step": 2319 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.42276760935783386, + "learning_rate": 0.0015, + "loss": 2.4893, + "step": 2320 + }, + { + "epoch": 0.24483122362869197, + "grad_norm": 0.5517118573188782, + "learning_rate": 0.0015, + "loss": 2.5148, + "step": 2321 + }, + { + "epoch": 0.2449367088607595, + "grad_norm": 0.4073682129383087, + "learning_rate": 0.0015, + "loss": 2.5296, + "step": 2322 + }, + { + "epoch": 0.24504219409282701, + "grad_norm": 0.5313929319381714, + "learning_rate": 0.0015, + "loss": 2.4987, + "step": 2323 + }, + { + "epoch": 0.2451476793248945, + "grad_norm": 0.45905977487564087, + "learning_rate": 0.0015, + "loss": 2.5127, + "step": 2324 + }, + { + "epoch": 0.24525316455696203, + "grad_norm": 0.5163031220436096, + "learning_rate": 0.0015, + "loss": 2.4996, + "step": 2325 + }, + { + "epoch": 0.24535864978902955, + "grad_norm": 0.48720043897628784, + "learning_rate": 0.0015, + "loss": 2.5118, + "step": 2326 + }, + { + "epoch": 0.24546413502109704, + "grad_norm": 0.36923107504844666, + "learning_rate": 0.0015, + "loss": 2.4911, + "step": 2327 + }, + { + "epoch": 0.24556962025316456, + "grad_norm": 0.4320174753665924, + "learning_rate": 0.0015, + "loss": 2.5104, + "step": 2328 + }, + { + "epoch": 0.24567510548523205, + "grad_norm": 0.36719226837158203, + "learning_rate": 0.0015, + "loss": 2.4908, + "step": 2329 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.3904745578765869, + "learning_rate": 0.0015, + "loss": 2.4906, + "step": 2330 + }, + { + "epoch": 0.2458860759493671, + "grad_norm": 0.36669033765792847, + "learning_rate": 0.0015, + "loss": 2.4679, + "step": 2331 + }, + { + "epoch": 0.2459915611814346, + "grad_norm": 0.38059914112091064, + "learning_rate": 0.0015, + "loss": 2.5145, + "step": 2332 + }, + { + "epoch": 0.2460970464135021, + "grad_norm": 0.3712816536426544, + "learning_rate": 0.0015, + "loss": 2.5027, + "step": 2333 + }, + { + "epoch": 0.24620253164556963, + "grad_norm": 0.3778960108757019, + "learning_rate": 0.0015, + "loss": 2.5012, + "step": 2334 + }, + { + "epoch": 0.24630801687763712, + "grad_norm": 0.3625120222568512, + "learning_rate": 0.0015, + "loss": 2.5098, + "step": 2335 + }, + { + "epoch": 0.24641350210970464, + "grad_norm": 0.4402901232242584, + "learning_rate": 0.0015, + "loss": 2.5318, + "step": 2336 + }, + { + "epoch": 0.24651898734177216, + "grad_norm": 0.4168432652950287, + "learning_rate": 0.0015, + "loss": 2.4431, + "step": 2337 + }, + { + "epoch": 0.24662447257383965, + "grad_norm": 0.39726895093917847, + "learning_rate": 0.0015, + "loss": 2.4917, + "step": 2338 + }, + { + "epoch": 0.24672995780590717, + "grad_norm": 0.4571531414985657, + "learning_rate": 0.0015, + "loss": 2.4891, + "step": 2339 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.4050256609916687, + "learning_rate": 0.0015, + "loss": 2.4979, + "step": 2340 + }, + { + "epoch": 0.2469409282700422, + "grad_norm": 0.34094446897506714, + "learning_rate": 0.0015, + "loss": 2.5205, + "step": 2341 + }, + { + "epoch": 0.2470464135021097, + "grad_norm": 0.47350695729255676, + "learning_rate": 0.0015, + "loss": 2.5008, + "step": 2342 + }, + { + "epoch": 0.24715189873417723, + "grad_norm": 0.40698036551475525, + "learning_rate": 0.0015, + "loss": 2.4927, + "step": 2343 + }, + { + "epoch": 0.24725738396624472, + "grad_norm": 0.41465631127357483, + "learning_rate": 0.0015, + "loss": 2.517, + "step": 2344 + }, + { + "epoch": 0.24736286919831224, + "grad_norm": 0.4548913240432739, + "learning_rate": 0.0015, + "loss": 2.4753, + "step": 2345 + }, + { + "epoch": 0.24746835443037973, + "grad_norm": 0.4131084680557251, + "learning_rate": 0.0015, + "loss": 2.498, + "step": 2346 + }, + { + "epoch": 0.24757383966244725, + "grad_norm": 0.3931244909763336, + "learning_rate": 0.0015, + "loss": 2.4675, + "step": 2347 + }, + { + "epoch": 0.24767932489451477, + "grad_norm": 0.41909387707710266, + "learning_rate": 0.0015, + "loss": 2.4659, + "step": 2348 + }, + { + "epoch": 0.24778481012658227, + "grad_norm": 0.3494201898574829, + "learning_rate": 0.0015, + "loss": 2.485, + "step": 2349 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.4469168484210968, + "learning_rate": 0.0015, + "loss": 2.5184, + "step": 2350 + }, + { + "epoch": 0.2479957805907173, + "grad_norm": 0.38097211718559265, + "learning_rate": 0.0015, + "loss": 2.4758, + "step": 2351 + }, + { + "epoch": 0.2481012658227848, + "grad_norm": 0.4478161334991455, + "learning_rate": 0.0015, + "loss": 2.5187, + "step": 2352 + }, + { + "epoch": 0.24820675105485232, + "grad_norm": 0.4042988717556, + "learning_rate": 0.0015, + "loss": 2.4741, + "step": 2353 + }, + { + "epoch": 0.24831223628691984, + "grad_norm": 0.36697301268577576, + "learning_rate": 0.0015, + "loss": 2.5246, + "step": 2354 + }, + { + "epoch": 0.24841772151898733, + "grad_norm": 0.37469860911369324, + "learning_rate": 0.0015, + "loss": 2.5043, + "step": 2355 + }, + { + "epoch": 0.24852320675105485, + "grad_norm": 0.3844299912452698, + "learning_rate": 0.0015, + "loss": 2.4744, + "step": 2356 + }, + { + "epoch": 0.24862869198312237, + "grad_norm": 0.47963449358940125, + "learning_rate": 0.0015, + "loss": 2.5074, + "step": 2357 + }, + { + "epoch": 0.24873417721518987, + "grad_norm": 0.43908587098121643, + "learning_rate": 0.0015, + "loss": 2.5186, + "step": 2358 + }, + { + "epoch": 0.2488396624472574, + "grad_norm": 0.4097568094730377, + "learning_rate": 0.0015, + "loss": 2.498, + "step": 2359 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.3947514593601227, + "learning_rate": 0.0015, + "loss": 2.4852, + "step": 2360 + }, + { + "epoch": 0.2490506329113924, + "grad_norm": 0.38210707902908325, + "learning_rate": 0.0015, + "loss": 2.4905, + "step": 2361 + }, + { + "epoch": 0.24915611814345992, + "grad_norm": 0.3849688172340393, + "learning_rate": 0.0015, + "loss": 2.503, + "step": 2362 + }, + { + "epoch": 0.2492616033755274, + "grad_norm": 0.3863074779510498, + "learning_rate": 0.0015, + "loss": 2.5116, + "step": 2363 + }, + { + "epoch": 0.24936708860759493, + "grad_norm": 0.39212411642074585, + "learning_rate": 0.0015, + "loss": 2.4756, + "step": 2364 + }, + { + "epoch": 0.24947257383966245, + "grad_norm": 0.34919172525405884, + "learning_rate": 0.0015, + "loss": 2.4797, + "step": 2365 + }, + { + "epoch": 0.24957805907172995, + "grad_norm": 0.4101690649986267, + "learning_rate": 0.0015, + "loss": 2.4699, + "step": 2366 + }, + { + "epoch": 0.24968354430379747, + "grad_norm": 0.41372621059417725, + "learning_rate": 0.0015, + "loss": 2.5114, + "step": 2367 + }, + { + "epoch": 0.249789029535865, + "grad_norm": 0.37746965885162354, + "learning_rate": 0.0015, + "loss": 2.5169, + "step": 2368 + }, + { + "epoch": 0.24989451476793248, + "grad_norm": 0.4595271348953247, + "learning_rate": 0.0015, + "loss": 2.513, + "step": 2369 + }, + { + "epoch": 0.25, + "grad_norm": 0.5409361720085144, + "learning_rate": 0.0015, + "loss": 2.498, + "step": 2370 + }, + { + "epoch": 0.2501054852320675, + "grad_norm": 0.39690086245536804, + "learning_rate": 0.0015, + "loss": 2.4999, + "step": 2371 + }, + { + "epoch": 0.25021097046413504, + "grad_norm": 0.4176957607269287, + "learning_rate": 0.0015, + "loss": 2.4727, + "step": 2372 + }, + { + "epoch": 0.25031645569620253, + "grad_norm": 0.48397889733314514, + "learning_rate": 0.0015, + "loss": 2.484, + "step": 2373 + }, + { + "epoch": 0.25042194092827, + "grad_norm": 0.4503740966320038, + "learning_rate": 0.0015, + "loss": 2.4982, + "step": 2374 + }, + { + "epoch": 0.2505274261603376, + "grad_norm": 0.46063604950904846, + "learning_rate": 0.0015, + "loss": 2.5001, + "step": 2375 + }, + { + "epoch": 0.25063291139240507, + "grad_norm": 0.4397942125797272, + "learning_rate": 0.0015, + "loss": 2.5254, + "step": 2376 + }, + { + "epoch": 0.25073839662447256, + "grad_norm": 0.5056261420249939, + "learning_rate": 0.0015, + "loss": 2.4726, + "step": 2377 + }, + { + "epoch": 0.2508438818565401, + "grad_norm": 0.44094541668891907, + "learning_rate": 0.0015, + "loss": 2.4994, + "step": 2378 + }, + { + "epoch": 0.2509493670886076, + "grad_norm": 0.40925779938697815, + "learning_rate": 0.0015, + "loss": 2.5102, + "step": 2379 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.4627247154712677, + "learning_rate": 0.0015, + "loss": 2.4925, + "step": 2380 + }, + { + "epoch": 0.25116033755274264, + "grad_norm": 0.4023042321205139, + "learning_rate": 0.0015, + "loss": 2.52, + "step": 2381 + }, + { + "epoch": 0.25126582278481013, + "grad_norm": 0.40755319595336914, + "learning_rate": 0.0015, + "loss": 2.4674, + "step": 2382 + }, + { + "epoch": 0.2513713080168776, + "grad_norm": 0.36364802718162537, + "learning_rate": 0.0015, + "loss": 2.4959, + "step": 2383 + }, + { + "epoch": 0.2514767932489452, + "grad_norm": 0.37130939960479736, + "learning_rate": 0.0015, + "loss": 2.4433, + "step": 2384 + }, + { + "epoch": 0.25158227848101267, + "grad_norm": 0.36010631918907166, + "learning_rate": 0.0015, + "loss": 2.5132, + "step": 2385 + }, + { + "epoch": 0.25168776371308016, + "grad_norm": 0.4242200553417206, + "learning_rate": 0.0015, + "loss": 2.4776, + "step": 2386 + }, + { + "epoch": 0.25179324894514765, + "grad_norm": 0.43281492590904236, + "learning_rate": 0.0015, + "loss": 2.5345, + "step": 2387 + }, + { + "epoch": 0.2518987341772152, + "grad_norm": 0.3726690411567688, + "learning_rate": 0.0015, + "loss": 2.4556, + "step": 2388 + }, + { + "epoch": 0.2520042194092827, + "grad_norm": 0.3700355291366577, + "learning_rate": 0.0015, + "loss": 2.4558, + "step": 2389 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.41882815957069397, + "learning_rate": 0.0015, + "loss": 2.4869, + "step": 2390 + }, + { + "epoch": 0.25221518987341773, + "grad_norm": 0.35976943373680115, + "learning_rate": 0.0015, + "loss": 2.4703, + "step": 2391 + }, + { + "epoch": 0.2523206751054852, + "grad_norm": 0.38617590069770813, + "learning_rate": 0.0015, + "loss": 2.4639, + "step": 2392 + }, + { + "epoch": 0.2524261603375527, + "grad_norm": 0.43850570917129517, + "learning_rate": 0.0015, + "loss": 2.4598, + "step": 2393 + }, + { + "epoch": 0.25253164556962027, + "grad_norm": 0.4428946375846863, + "learning_rate": 0.0015, + "loss": 2.4929, + "step": 2394 + }, + { + "epoch": 0.25263713080168776, + "grad_norm": 0.36452969908714294, + "learning_rate": 0.0015, + "loss": 2.4537, + "step": 2395 + }, + { + "epoch": 0.25274261603375525, + "grad_norm": 0.3951815962791443, + "learning_rate": 0.0015, + "loss": 2.501, + "step": 2396 + }, + { + "epoch": 0.2528481012658228, + "grad_norm": 0.397956520318985, + "learning_rate": 0.0015, + "loss": 2.5328, + "step": 2397 + }, + { + "epoch": 0.2529535864978903, + "grad_norm": 0.4026815891265869, + "learning_rate": 0.0015, + "loss": 2.4946, + "step": 2398 + }, + { + "epoch": 0.2530590717299578, + "grad_norm": 0.4413904845714569, + "learning_rate": 0.0015, + "loss": 2.4986, + "step": 2399 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.40433740615844727, + "learning_rate": 0.0015, + "loss": 2.5025, + "step": 2400 + }, + { + "epoch": 0.2532700421940928, + "grad_norm": 0.4245668649673462, + "learning_rate": 0.0015, + "loss": 2.4629, + "step": 2401 + }, + { + "epoch": 0.2533755274261603, + "grad_norm": 0.4825747311115265, + "learning_rate": 0.0015, + "loss": 2.5027, + "step": 2402 + }, + { + "epoch": 0.25348101265822787, + "grad_norm": 0.4318208396434784, + "learning_rate": 0.0015, + "loss": 2.5142, + "step": 2403 + }, + { + "epoch": 0.25358649789029536, + "grad_norm": 0.4000312387943268, + "learning_rate": 0.0015, + "loss": 2.4483, + "step": 2404 + }, + { + "epoch": 0.25369198312236285, + "grad_norm": 0.5037655830383301, + "learning_rate": 0.0015, + "loss": 2.5294, + "step": 2405 + }, + { + "epoch": 0.2537974683544304, + "grad_norm": 0.5065974593162537, + "learning_rate": 0.0015, + "loss": 2.4968, + "step": 2406 + }, + { + "epoch": 0.2539029535864979, + "grad_norm": 0.4090733528137207, + "learning_rate": 0.0015, + "loss": 2.5094, + "step": 2407 + }, + { + "epoch": 0.2540084388185654, + "grad_norm": 0.4350174367427826, + "learning_rate": 0.0015, + "loss": 2.4593, + "step": 2408 + }, + { + "epoch": 0.25411392405063293, + "grad_norm": 0.4286699593067169, + "learning_rate": 0.0015, + "loss": 2.4461, + "step": 2409 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.3501421809196472, + "learning_rate": 0.0015, + "loss": 2.5138, + "step": 2410 + }, + { + "epoch": 0.2543248945147679, + "grad_norm": 0.3732633590698242, + "learning_rate": 0.0015, + "loss": 2.4946, + "step": 2411 + }, + { + "epoch": 0.25443037974683547, + "grad_norm": 0.4336225390434265, + "learning_rate": 0.0015, + "loss": 2.4837, + "step": 2412 + }, + { + "epoch": 0.25453586497890296, + "grad_norm": 0.3570307791233063, + "learning_rate": 0.0015, + "loss": 2.4769, + "step": 2413 + }, + { + "epoch": 0.25464135021097045, + "grad_norm": 0.41874486207962036, + "learning_rate": 0.0015, + "loss": 2.4624, + "step": 2414 + }, + { + "epoch": 0.254746835443038, + "grad_norm": 0.38182884454727173, + "learning_rate": 0.0015, + "loss": 2.4766, + "step": 2415 + }, + { + "epoch": 0.2548523206751055, + "grad_norm": 0.41855818033218384, + "learning_rate": 0.0015, + "loss": 2.4865, + "step": 2416 + }, + { + "epoch": 0.254957805907173, + "grad_norm": 0.4646444022655487, + "learning_rate": 0.0015, + "loss": 2.453, + "step": 2417 + }, + { + "epoch": 0.25506329113924053, + "grad_norm": 0.3802734315395355, + "learning_rate": 0.0015, + "loss": 2.4932, + "step": 2418 + }, + { + "epoch": 0.255168776371308, + "grad_norm": 0.40814530849456787, + "learning_rate": 0.0015, + "loss": 2.4547, + "step": 2419 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.48615145683288574, + "learning_rate": 0.0015, + "loss": 2.4968, + "step": 2420 + }, + { + "epoch": 0.255379746835443, + "grad_norm": 0.38677769899368286, + "learning_rate": 0.0015, + "loss": 2.4482, + "step": 2421 + }, + { + "epoch": 0.25548523206751056, + "grad_norm": 0.43495064973831177, + "learning_rate": 0.0015, + "loss": 2.5424, + "step": 2422 + }, + { + "epoch": 0.25559071729957805, + "grad_norm": 0.5029717087745667, + "learning_rate": 0.0015, + "loss": 2.5284, + "step": 2423 + }, + { + "epoch": 0.25569620253164554, + "grad_norm": 0.47635766863822937, + "learning_rate": 0.0015, + "loss": 2.467, + "step": 2424 + }, + { + "epoch": 0.2558016877637131, + "grad_norm": 0.4483652710914612, + "learning_rate": 0.0015, + "loss": 2.4502, + "step": 2425 + }, + { + "epoch": 0.2559071729957806, + "grad_norm": 0.4118058383464813, + "learning_rate": 0.0015, + "loss": 2.4768, + "step": 2426 + }, + { + "epoch": 0.2560126582278481, + "grad_norm": 0.48467281460762024, + "learning_rate": 0.0015, + "loss": 2.5059, + "step": 2427 + }, + { + "epoch": 0.2561181434599156, + "grad_norm": 0.4171231985092163, + "learning_rate": 0.0015, + "loss": 2.4485, + "step": 2428 + }, + { + "epoch": 0.2562236286919831, + "grad_norm": 0.5478450655937195, + "learning_rate": 0.0015, + "loss": 2.45, + "step": 2429 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.3873675465583801, + "learning_rate": 0.0015, + "loss": 2.4522, + "step": 2430 + }, + { + "epoch": 0.25643459915611816, + "grad_norm": 0.4452742040157318, + "learning_rate": 0.0015, + "loss": 2.4883, + "step": 2431 + }, + { + "epoch": 0.25654008438818565, + "grad_norm": 0.5229427218437195, + "learning_rate": 0.0015, + "loss": 2.4927, + "step": 2432 + }, + { + "epoch": 0.25664556962025314, + "grad_norm": 0.48670485615730286, + "learning_rate": 0.0015, + "loss": 2.503, + "step": 2433 + }, + { + "epoch": 0.2567510548523207, + "grad_norm": 0.45003989338874817, + "learning_rate": 0.0015, + "loss": 2.441, + "step": 2434 + }, + { + "epoch": 0.2568565400843882, + "grad_norm": 0.4135567843914032, + "learning_rate": 0.0015, + "loss": 2.4635, + "step": 2435 + }, + { + "epoch": 0.2569620253164557, + "grad_norm": 0.4418054521083832, + "learning_rate": 0.0015, + "loss": 2.4663, + "step": 2436 + }, + { + "epoch": 0.2570675105485232, + "grad_norm": 0.3641505837440491, + "learning_rate": 0.0015, + "loss": 2.4942, + "step": 2437 + }, + { + "epoch": 0.2571729957805907, + "grad_norm": 0.4356619417667389, + "learning_rate": 0.0015, + "loss": 2.4913, + "step": 2438 + }, + { + "epoch": 0.2572784810126582, + "grad_norm": 0.3911343216896057, + "learning_rate": 0.0015, + "loss": 2.476, + "step": 2439 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.41461166739463806, + "learning_rate": 0.0015, + "loss": 2.5222, + "step": 2440 + }, + { + "epoch": 0.25748945147679325, + "grad_norm": 0.46661216020584106, + "learning_rate": 0.0015, + "loss": 2.4698, + "step": 2441 + }, + { + "epoch": 0.25759493670886074, + "grad_norm": 0.4152948558330536, + "learning_rate": 0.0015, + "loss": 2.4565, + "step": 2442 + }, + { + "epoch": 0.2577004219409283, + "grad_norm": 0.4052931070327759, + "learning_rate": 0.0015, + "loss": 2.4795, + "step": 2443 + }, + { + "epoch": 0.2578059071729958, + "grad_norm": 0.3972504734992981, + "learning_rate": 0.0015, + "loss": 2.481, + "step": 2444 + }, + { + "epoch": 0.2579113924050633, + "grad_norm": 0.42381924390792847, + "learning_rate": 0.0015, + "loss": 2.4993, + "step": 2445 + }, + { + "epoch": 0.2580168776371308, + "grad_norm": 0.3841414749622345, + "learning_rate": 0.0015, + "loss": 2.456, + "step": 2446 + }, + { + "epoch": 0.2581223628691983, + "grad_norm": 0.4060139060020447, + "learning_rate": 0.0015, + "loss": 2.4607, + "step": 2447 + }, + { + "epoch": 0.2582278481012658, + "grad_norm": 0.4268529415130615, + "learning_rate": 0.0015, + "loss": 2.4648, + "step": 2448 + }, + { + "epoch": 0.25833333333333336, + "grad_norm": 0.44977816939353943, + "learning_rate": 0.0015, + "loss": 2.4495, + "step": 2449 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.44888898730278015, + "learning_rate": 0.0015, + "loss": 2.4466, + "step": 2450 + }, + { + "epoch": 0.25854430379746834, + "grad_norm": 0.36805856227874756, + "learning_rate": 0.0015, + "loss": 2.4581, + "step": 2451 + }, + { + "epoch": 0.2586497890295359, + "grad_norm": 0.4653225839138031, + "learning_rate": 0.0015, + "loss": 2.496, + "step": 2452 + }, + { + "epoch": 0.2587552742616034, + "grad_norm": 0.46608567237854004, + "learning_rate": 0.0015, + "loss": 2.4791, + "step": 2453 + }, + { + "epoch": 0.2588607594936709, + "grad_norm": 0.4017769992351532, + "learning_rate": 0.0015, + "loss": 2.5017, + "step": 2454 + }, + { + "epoch": 0.25896624472573837, + "grad_norm": 0.42488664388656616, + "learning_rate": 0.0015, + "loss": 2.4427, + "step": 2455 + }, + { + "epoch": 0.2590717299578059, + "grad_norm": 0.4483640491962433, + "learning_rate": 0.0015, + "loss": 2.4938, + "step": 2456 + }, + { + "epoch": 0.2591772151898734, + "grad_norm": 0.45927178859710693, + "learning_rate": 0.0015, + "loss": 2.5111, + "step": 2457 + }, + { + "epoch": 0.2592827004219409, + "grad_norm": 0.4034360945224762, + "learning_rate": 0.0015, + "loss": 2.4973, + "step": 2458 + }, + { + "epoch": 0.25938818565400845, + "grad_norm": 0.4848073720932007, + "learning_rate": 0.0015, + "loss": 2.4927, + "step": 2459 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.453267365694046, + "learning_rate": 0.0015, + "loss": 2.4775, + "step": 2460 + }, + { + "epoch": 0.25959915611814344, + "grad_norm": 0.41534286737442017, + "learning_rate": 0.0015, + "loss": 2.4551, + "step": 2461 + }, + { + "epoch": 0.259704641350211, + "grad_norm": 0.4075576663017273, + "learning_rate": 0.0015, + "loss": 2.4588, + "step": 2462 + }, + { + "epoch": 0.2598101265822785, + "grad_norm": 0.4858362376689911, + "learning_rate": 0.0015, + "loss": 2.4696, + "step": 2463 + }, + { + "epoch": 0.25991561181434597, + "grad_norm": 0.486209511756897, + "learning_rate": 0.0015, + "loss": 2.4603, + "step": 2464 + }, + { + "epoch": 0.2600210970464135, + "grad_norm": 0.37515634298324585, + "learning_rate": 0.0015, + "loss": 2.4984, + "step": 2465 + }, + { + "epoch": 0.260126582278481, + "grad_norm": 0.43924298882484436, + "learning_rate": 0.0015, + "loss": 2.4619, + "step": 2466 + }, + { + "epoch": 0.2602320675105485, + "grad_norm": 0.40010562539100647, + "learning_rate": 0.0015, + "loss": 2.4659, + "step": 2467 + }, + { + "epoch": 0.26033755274261605, + "grad_norm": 0.4446922540664673, + "learning_rate": 0.0015, + "loss": 2.4777, + "step": 2468 + }, + { + "epoch": 0.26044303797468354, + "grad_norm": 0.454057514667511, + "learning_rate": 0.0015, + "loss": 2.4613, + "step": 2469 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.42017611861228943, + "learning_rate": 0.0015, + "loss": 2.4772, + "step": 2470 + }, + { + "epoch": 0.2606540084388186, + "grad_norm": 0.4381926953792572, + "learning_rate": 0.0015, + "loss": 2.501, + "step": 2471 + }, + { + "epoch": 0.2607594936708861, + "grad_norm": 0.34640634059906006, + "learning_rate": 0.0015, + "loss": 2.4587, + "step": 2472 + }, + { + "epoch": 0.26086497890295357, + "grad_norm": 0.46491727232933044, + "learning_rate": 0.0015, + "loss": 2.4698, + "step": 2473 + }, + { + "epoch": 0.2609704641350211, + "grad_norm": 0.47137582302093506, + "learning_rate": 0.0015, + "loss": 2.4612, + "step": 2474 + }, + { + "epoch": 0.2610759493670886, + "grad_norm": 0.38769084215164185, + "learning_rate": 0.0015, + "loss": 2.4768, + "step": 2475 + }, + { + "epoch": 0.2611814345991561, + "grad_norm": 0.49050173163414, + "learning_rate": 0.0015, + "loss": 2.4487, + "step": 2476 + }, + { + "epoch": 0.26128691983122365, + "grad_norm": 0.44861963391304016, + "learning_rate": 0.0015, + "loss": 2.4792, + "step": 2477 + }, + { + "epoch": 0.26139240506329114, + "grad_norm": 0.4416331350803375, + "learning_rate": 0.0015, + "loss": 2.4544, + "step": 2478 + }, + { + "epoch": 0.26149789029535864, + "grad_norm": 0.6002423763275146, + "learning_rate": 0.0015, + "loss": 2.4915, + "step": 2479 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.5066868662834167, + "learning_rate": 0.0015, + "loss": 2.4758, + "step": 2480 + }, + { + "epoch": 0.2617088607594937, + "grad_norm": 0.5343090295791626, + "learning_rate": 0.0015, + "loss": 2.4504, + "step": 2481 + }, + { + "epoch": 0.26181434599156117, + "grad_norm": 0.5569011569023132, + "learning_rate": 0.0015, + "loss": 2.4567, + "step": 2482 + }, + { + "epoch": 0.2619198312236287, + "grad_norm": 0.5295875668525696, + "learning_rate": 0.0015, + "loss": 2.4758, + "step": 2483 + }, + { + "epoch": 0.2620253164556962, + "grad_norm": 0.61580890417099, + "learning_rate": 0.0015, + "loss": 2.4481, + "step": 2484 + }, + { + "epoch": 0.2621308016877637, + "grad_norm": 0.44092991948127747, + "learning_rate": 0.0015, + "loss": 2.4786, + "step": 2485 + }, + { + "epoch": 0.2622362869198312, + "grad_norm": 0.47669804096221924, + "learning_rate": 0.0015, + "loss": 2.4651, + "step": 2486 + }, + { + "epoch": 0.26234177215189874, + "grad_norm": 0.5263328552246094, + "learning_rate": 0.0015, + "loss": 2.4806, + "step": 2487 + }, + { + "epoch": 0.26244725738396624, + "grad_norm": 0.5220320820808411, + "learning_rate": 0.0015, + "loss": 2.4881, + "step": 2488 + }, + { + "epoch": 0.26255274261603373, + "grad_norm": 0.5408862829208374, + "learning_rate": 0.0015, + "loss": 2.4219, + "step": 2489 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.4479482173919678, + "learning_rate": 0.0015, + "loss": 2.4528, + "step": 2490 + }, + { + "epoch": 0.26276371308016877, + "grad_norm": 0.4832651913166046, + "learning_rate": 0.0015, + "loss": 2.457, + "step": 2491 + }, + { + "epoch": 0.26286919831223626, + "grad_norm": 0.4597185254096985, + "learning_rate": 0.0015, + "loss": 2.4767, + "step": 2492 + }, + { + "epoch": 0.2629746835443038, + "grad_norm": 0.43852555751800537, + "learning_rate": 0.0015, + "loss": 2.4977, + "step": 2493 + }, + { + "epoch": 0.2630801687763713, + "grad_norm": 0.5210453271865845, + "learning_rate": 0.0015, + "loss": 2.459, + "step": 2494 + }, + { + "epoch": 0.2631856540084388, + "grad_norm": 0.4597354233264923, + "learning_rate": 0.0015, + "loss": 2.4622, + "step": 2495 + }, + { + "epoch": 0.26329113924050634, + "grad_norm": 0.4515848159790039, + "learning_rate": 0.0015, + "loss": 2.4429, + "step": 2496 + }, + { + "epoch": 0.26339662447257384, + "grad_norm": 0.47084861993789673, + "learning_rate": 0.0015, + "loss": 2.4841, + "step": 2497 + }, + { + "epoch": 0.26350210970464133, + "grad_norm": 0.4295664131641388, + "learning_rate": 0.0015, + "loss": 2.4711, + "step": 2498 + }, + { + "epoch": 0.2636075949367089, + "grad_norm": 0.502130389213562, + "learning_rate": 0.0015, + "loss": 2.4823, + "step": 2499 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.48684754967689514, + "learning_rate": 0.0015, + "loss": 2.4504, + "step": 2500 + }, + { + "epoch": 0.26381856540084386, + "grad_norm": 0.45722299814224243, + "learning_rate": 0.0015, + "loss": 2.4387, + "step": 2501 + }, + { + "epoch": 0.2639240506329114, + "grad_norm": 0.375613808631897, + "learning_rate": 0.0015, + "loss": 2.4828, + "step": 2502 + }, + { + "epoch": 0.2640295358649789, + "grad_norm": 0.381069153547287, + "learning_rate": 0.0015, + "loss": 2.4514, + "step": 2503 + }, + { + "epoch": 0.2641350210970464, + "grad_norm": 0.41698312759399414, + "learning_rate": 0.0015, + "loss": 2.4356, + "step": 2504 + }, + { + "epoch": 0.26424050632911394, + "grad_norm": 0.3620361089706421, + "learning_rate": 0.0015, + "loss": 2.4914, + "step": 2505 + }, + { + "epoch": 0.26434599156118144, + "grad_norm": 0.48045584559440613, + "learning_rate": 0.0015, + "loss": 2.4364, + "step": 2506 + }, + { + "epoch": 0.26445147679324893, + "grad_norm": 0.5088294148445129, + "learning_rate": 0.0015, + "loss": 2.4576, + "step": 2507 + }, + { + "epoch": 0.2645569620253165, + "grad_norm": 0.39225587248802185, + "learning_rate": 0.0015, + "loss": 2.4526, + "step": 2508 + }, + { + "epoch": 0.26466244725738397, + "grad_norm": 0.4407937824726105, + "learning_rate": 0.0015, + "loss": 2.451, + "step": 2509 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.41679513454437256, + "learning_rate": 0.0015, + "loss": 2.4717, + "step": 2510 + }, + { + "epoch": 0.264873417721519, + "grad_norm": 0.4177749454975128, + "learning_rate": 0.0015, + "loss": 2.479, + "step": 2511 + }, + { + "epoch": 0.2649789029535865, + "grad_norm": 0.4077315032482147, + "learning_rate": 0.0015, + "loss": 2.4622, + "step": 2512 + }, + { + "epoch": 0.265084388185654, + "grad_norm": 0.48201340436935425, + "learning_rate": 0.0015, + "loss": 2.4269, + "step": 2513 + }, + { + "epoch": 0.26518987341772154, + "grad_norm": 0.3601517081260681, + "learning_rate": 0.0015, + "loss": 2.4552, + "step": 2514 + }, + { + "epoch": 0.26529535864978904, + "grad_norm": 0.4240029454231262, + "learning_rate": 0.0015, + "loss": 2.4391, + "step": 2515 + }, + { + "epoch": 0.26540084388185653, + "grad_norm": 0.4402133822441101, + "learning_rate": 0.0015, + "loss": 2.4716, + "step": 2516 + }, + { + "epoch": 0.2655063291139241, + "grad_norm": 0.35527268052101135, + "learning_rate": 0.0015, + "loss": 2.4578, + "step": 2517 + }, + { + "epoch": 0.26561181434599157, + "grad_norm": 0.42087411880493164, + "learning_rate": 0.0015, + "loss": 2.473, + "step": 2518 + }, + { + "epoch": 0.26571729957805906, + "grad_norm": 0.41496503353118896, + "learning_rate": 0.0015, + "loss": 2.4522, + "step": 2519 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 0.3882802426815033, + "learning_rate": 0.0015, + "loss": 2.454, + "step": 2520 + }, + { + "epoch": 0.2659282700421941, + "grad_norm": 0.3650716245174408, + "learning_rate": 0.0015, + "loss": 2.4609, + "step": 2521 + }, + { + "epoch": 0.2660337552742616, + "grad_norm": 0.4080566167831421, + "learning_rate": 0.0015, + "loss": 2.4573, + "step": 2522 + }, + { + "epoch": 0.2661392405063291, + "grad_norm": 0.3326928913593292, + "learning_rate": 0.0015, + "loss": 2.4415, + "step": 2523 + }, + { + "epoch": 0.26624472573839664, + "grad_norm": 0.38840362429618835, + "learning_rate": 0.0015, + "loss": 2.4227, + "step": 2524 + }, + { + "epoch": 0.26635021097046413, + "grad_norm": 0.34778985381126404, + "learning_rate": 0.0015, + "loss": 2.4335, + "step": 2525 + }, + { + "epoch": 0.2664556962025316, + "grad_norm": 0.38465145230293274, + "learning_rate": 0.0015, + "loss": 2.4311, + "step": 2526 + }, + { + "epoch": 0.26656118143459917, + "grad_norm": 0.3434242606163025, + "learning_rate": 0.0015, + "loss": 2.4412, + "step": 2527 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.4310702085494995, + "learning_rate": 0.0015, + "loss": 2.466, + "step": 2528 + }, + { + "epoch": 0.26677215189873416, + "grad_norm": 0.4205184876918793, + "learning_rate": 0.0015, + "loss": 2.5221, + "step": 2529 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 0.3702510595321655, + "learning_rate": 0.0015, + "loss": 2.4434, + "step": 2530 + }, + { + "epoch": 0.2669831223628692, + "grad_norm": 0.47050541639328003, + "learning_rate": 0.0015, + "loss": 2.4619, + "step": 2531 + }, + { + "epoch": 0.2670886075949367, + "grad_norm": 0.42509016394615173, + "learning_rate": 0.0015, + "loss": 2.4358, + "step": 2532 + }, + { + "epoch": 0.26719409282700424, + "grad_norm": 0.4329249858856201, + "learning_rate": 0.0015, + "loss": 2.4097, + "step": 2533 + }, + { + "epoch": 0.26729957805907173, + "grad_norm": 0.3905951976776123, + "learning_rate": 0.0015, + "loss": 2.466, + "step": 2534 + }, + { + "epoch": 0.2674050632911392, + "grad_norm": 0.42269107699394226, + "learning_rate": 0.0015, + "loss": 2.4796, + "step": 2535 + }, + { + "epoch": 0.26751054852320677, + "grad_norm": 0.4046883285045624, + "learning_rate": 0.0015, + "loss": 2.4722, + "step": 2536 + }, + { + "epoch": 0.26761603375527426, + "grad_norm": 0.37225234508514404, + "learning_rate": 0.0015, + "loss": 2.4362, + "step": 2537 + }, + { + "epoch": 0.26772151898734176, + "grad_norm": 0.41454043984413147, + "learning_rate": 0.0015, + "loss": 2.4532, + "step": 2538 + }, + { + "epoch": 0.2678270042194093, + "grad_norm": 0.3868294954299927, + "learning_rate": 0.0015, + "loss": 2.4715, + "step": 2539 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.355835497379303, + "learning_rate": 0.0015, + "loss": 2.4549, + "step": 2540 + }, + { + "epoch": 0.2680379746835443, + "grad_norm": 0.3843865990638733, + "learning_rate": 0.0015, + "loss": 2.4395, + "step": 2541 + }, + { + "epoch": 0.26814345991561184, + "grad_norm": 0.39358657598495483, + "learning_rate": 0.0015, + "loss": 2.4633, + "step": 2542 + }, + { + "epoch": 0.26824894514767933, + "grad_norm": 0.35991138219833374, + "learning_rate": 0.0015, + "loss": 2.4293, + "step": 2543 + }, + { + "epoch": 0.2683544303797468, + "grad_norm": 0.4702073931694031, + "learning_rate": 0.0015, + "loss": 2.416, + "step": 2544 + }, + { + "epoch": 0.26845991561181437, + "grad_norm": 0.4806153476238251, + "learning_rate": 0.0015, + "loss": 2.435, + "step": 2545 + }, + { + "epoch": 0.26856540084388186, + "grad_norm": 0.3969864845275879, + "learning_rate": 0.0015, + "loss": 2.4567, + "step": 2546 + }, + { + "epoch": 0.26867088607594936, + "grad_norm": 0.4008203148841858, + "learning_rate": 0.0015, + "loss": 2.4375, + "step": 2547 + }, + { + "epoch": 0.2687763713080169, + "grad_norm": 0.39770376682281494, + "learning_rate": 0.0015, + "loss": 2.4316, + "step": 2548 + }, + { + "epoch": 0.2688818565400844, + "grad_norm": 0.37636643648147583, + "learning_rate": 0.0015, + "loss": 2.4722, + "step": 2549 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.4241926074028015, + "learning_rate": 0.0015, + "loss": 2.4441, + "step": 2550 + }, + { + "epoch": 0.26909282700421944, + "grad_norm": 0.3508222997188568, + "learning_rate": 0.0015, + "loss": 2.4532, + "step": 2551 + }, + { + "epoch": 0.26919831223628693, + "grad_norm": 0.42968466877937317, + "learning_rate": 0.0015, + "loss": 2.4607, + "step": 2552 + }, + { + "epoch": 0.2693037974683544, + "grad_norm": 0.44135570526123047, + "learning_rate": 0.0015, + "loss": 2.4734, + "step": 2553 + }, + { + "epoch": 0.2694092827004219, + "grad_norm": 0.44174015522003174, + "learning_rate": 0.0015, + "loss": 2.4445, + "step": 2554 + }, + { + "epoch": 0.26951476793248946, + "grad_norm": 0.4345652461051941, + "learning_rate": 0.0015, + "loss": 2.4544, + "step": 2555 + }, + { + "epoch": 0.26962025316455696, + "grad_norm": 0.3872082233428955, + "learning_rate": 0.0015, + "loss": 2.4512, + "step": 2556 + }, + { + "epoch": 0.26972573839662445, + "grad_norm": 0.3527587950229645, + "learning_rate": 0.0015, + "loss": 2.4322, + "step": 2557 + }, + { + "epoch": 0.269831223628692, + "grad_norm": 0.3703422546386719, + "learning_rate": 0.0015, + "loss": 2.4575, + "step": 2558 + }, + { + "epoch": 0.2699367088607595, + "grad_norm": 0.36153414845466614, + "learning_rate": 0.0015, + "loss": 2.4305, + "step": 2559 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.3674074113368988, + "learning_rate": 0.0015, + "loss": 2.4264, + "step": 2560 + }, + { + "epoch": 0.27014767932489453, + "grad_norm": 0.3918725550174713, + "learning_rate": 0.0015, + "loss": 2.4384, + "step": 2561 + }, + { + "epoch": 0.270253164556962, + "grad_norm": 0.36156710982322693, + "learning_rate": 0.0015, + "loss": 2.446, + "step": 2562 + }, + { + "epoch": 0.2703586497890295, + "grad_norm": 0.3759543299674988, + "learning_rate": 0.0015, + "loss": 2.4742, + "step": 2563 + }, + { + "epoch": 0.27046413502109706, + "grad_norm": 0.4149315655231476, + "learning_rate": 0.0015, + "loss": 2.452, + "step": 2564 + }, + { + "epoch": 0.27056962025316456, + "grad_norm": 0.3886055648326874, + "learning_rate": 0.0015, + "loss": 2.4212, + "step": 2565 + }, + { + "epoch": 0.27067510548523205, + "grad_norm": 0.3882490396499634, + "learning_rate": 0.0015, + "loss": 2.4492, + "step": 2566 + }, + { + "epoch": 0.2707805907172996, + "grad_norm": 0.3444245755672455, + "learning_rate": 0.0015, + "loss": 2.447, + "step": 2567 + }, + { + "epoch": 0.2708860759493671, + "grad_norm": 0.38279154896736145, + "learning_rate": 0.0015, + "loss": 2.4486, + "step": 2568 + }, + { + "epoch": 0.2709915611814346, + "grad_norm": 0.37464961409568787, + "learning_rate": 0.0015, + "loss": 2.419, + "step": 2569 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.4020625054836273, + "learning_rate": 0.0015, + "loss": 2.4727, + "step": 2570 + }, + { + "epoch": 0.2712025316455696, + "grad_norm": 0.448788583278656, + "learning_rate": 0.0015, + "loss": 2.421, + "step": 2571 + }, + { + "epoch": 0.2713080168776371, + "grad_norm": 0.399604856967926, + "learning_rate": 0.0015, + "loss": 2.4394, + "step": 2572 + }, + { + "epoch": 0.27141350210970466, + "grad_norm": 0.4599316716194153, + "learning_rate": 0.0015, + "loss": 2.4544, + "step": 2573 + }, + { + "epoch": 0.27151898734177216, + "grad_norm": 0.39430272579193115, + "learning_rate": 0.0015, + "loss": 2.4542, + "step": 2574 + }, + { + "epoch": 0.27162447257383965, + "grad_norm": 0.5033408999443054, + "learning_rate": 0.0015, + "loss": 2.4553, + "step": 2575 + }, + { + "epoch": 0.2717299578059072, + "grad_norm": 0.4905431270599365, + "learning_rate": 0.0015, + "loss": 2.4404, + "step": 2576 + }, + { + "epoch": 0.2718354430379747, + "grad_norm": 0.39347508549690247, + "learning_rate": 0.0015, + "loss": 2.4727, + "step": 2577 + }, + { + "epoch": 0.2719409282700422, + "grad_norm": 0.42803266644477844, + "learning_rate": 0.0015, + "loss": 2.4802, + "step": 2578 + }, + { + "epoch": 0.27204641350210973, + "grad_norm": 0.4874904453754425, + "learning_rate": 0.0015, + "loss": 2.4393, + "step": 2579 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.4507482349872589, + "learning_rate": 0.0015, + "loss": 2.4501, + "step": 2580 + }, + { + "epoch": 0.2722573839662447, + "grad_norm": 0.3987235724925995, + "learning_rate": 0.0015, + "loss": 2.4345, + "step": 2581 + }, + { + "epoch": 0.27236286919831226, + "grad_norm": 0.4321129024028778, + "learning_rate": 0.0015, + "loss": 2.4448, + "step": 2582 + }, + { + "epoch": 0.27246835443037976, + "grad_norm": 0.40418675541877747, + "learning_rate": 0.0015, + "loss": 2.448, + "step": 2583 + }, + { + "epoch": 0.27257383966244725, + "grad_norm": 0.4954458475112915, + "learning_rate": 0.0015, + "loss": 2.4439, + "step": 2584 + }, + { + "epoch": 0.27267932489451474, + "grad_norm": 0.4334389269351959, + "learning_rate": 0.0015, + "loss": 2.4605, + "step": 2585 + }, + { + "epoch": 0.2727848101265823, + "grad_norm": 0.44160589575767517, + "learning_rate": 0.0015, + "loss": 2.448, + "step": 2586 + }, + { + "epoch": 0.2728902953586498, + "grad_norm": 0.402527391910553, + "learning_rate": 0.0015, + "loss": 2.4647, + "step": 2587 + }, + { + "epoch": 0.2729957805907173, + "grad_norm": 0.46725520491600037, + "learning_rate": 0.0015, + "loss": 2.4544, + "step": 2588 + }, + { + "epoch": 0.2731012658227848, + "grad_norm": 0.4857245981693268, + "learning_rate": 0.0015, + "loss": 2.4495, + "step": 2589 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.4610395133495331, + "learning_rate": 0.0015, + "loss": 2.435, + "step": 2590 + }, + { + "epoch": 0.2733122362869198, + "grad_norm": 0.482776015996933, + "learning_rate": 0.0015, + "loss": 2.4337, + "step": 2591 + }, + { + "epoch": 0.27341772151898736, + "grad_norm": 0.49218422174453735, + "learning_rate": 0.0015, + "loss": 2.4646, + "step": 2592 + }, + { + "epoch": 0.27352320675105485, + "grad_norm": 0.5062264800071716, + "learning_rate": 0.0015, + "loss": 2.4523, + "step": 2593 + }, + { + "epoch": 0.27362869198312234, + "grad_norm": 0.5528573393821716, + "learning_rate": 0.0015, + "loss": 2.4437, + "step": 2594 + }, + { + "epoch": 0.2737341772151899, + "grad_norm": 0.5217202305793762, + "learning_rate": 0.0015, + "loss": 2.4677, + "step": 2595 + }, + { + "epoch": 0.2738396624472574, + "grad_norm": 0.4054888188838959, + "learning_rate": 0.0015, + "loss": 2.4502, + "step": 2596 + }, + { + "epoch": 0.2739451476793249, + "grad_norm": 0.4592357575893402, + "learning_rate": 0.0015, + "loss": 2.4315, + "step": 2597 + }, + { + "epoch": 0.2740506329113924, + "grad_norm": 0.4401114583015442, + "learning_rate": 0.0015, + "loss": 2.4159, + "step": 2598 + }, + { + "epoch": 0.2741561181434599, + "grad_norm": 0.515995442867279, + "learning_rate": 0.0015, + "loss": 2.4259, + "step": 2599 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.5377696752548218, + "learning_rate": 0.0015, + "loss": 2.4941, + "step": 2600 + }, + { + "epoch": 0.27436708860759496, + "grad_norm": 0.3450435698032379, + "learning_rate": 0.0015, + "loss": 2.4352, + "step": 2601 + }, + { + "epoch": 0.27447257383966245, + "grad_norm": 0.5074176788330078, + "learning_rate": 0.0015, + "loss": 2.4138, + "step": 2602 + }, + { + "epoch": 0.27457805907172994, + "grad_norm": 0.521711528301239, + "learning_rate": 0.0015, + "loss": 2.3994, + "step": 2603 + }, + { + "epoch": 0.2746835443037975, + "grad_norm": 0.44454509019851685, + "learning_rate": 0.0015, + "loss": 2.4624, + "step": 2604 + }, + { + "epoch": 0.274789029535865, + "grad_norm": 0.45018765330314636, + "learning_rate": 0.0015, + "loss": 2.4678, + "step": 2605 + }, + { + "epoch": 0.2748945147679325, + "grad_norm": 0.5103918313980103, + "learning_rate": 0.0015, + "loss": 2.4693, + "step": 2606 + }, + { + "epoch": 0.275, + "grad_norm": 0.4957732558250427, + "learning_rate": 0.0015, + "loss": 2.4646, + "step": 2607 + }, + { + "epoch": 0.2751054852320675, + "grad_norm": 0.3813236355781555, + "learning_rate": 0.0015, + "loss": 2.4332, + "step": 2608 + }, + { + "epoch": 0.275210970464135, + "grad_norm": 0.40212327241897583, + "learning_rate": 0.0015, + "loss": 2.461, + "step": 2609 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.4075862169265747, + "learning_rate": 0.0015, + "loss": 2.4557, + "step": 2610 + }, + { + "epoch": 0.27542194092827005, + "grad_norm": 0.4228242039680481, + "learning_rate": 0.0015, + "loss": 2.4621, + "step": 2611 + }, + { + "epoch": 0.27552742616033754, + "grad_norm": 0.3909834027290344, + "learning_rate": 0.0015, + "loss": 2.4601, + "step": 2612 + }, + { + "epoch": 0.2756329113924051, + "grad_norm": 0.43872159719467163, + "learning_rate": 0.0015, + "loss": 2.4591, + "step": 2613 + }, + { + "epoch": 0.2757383966244726, + "grad_norm": 0.4132627844810486, + "learning_rate": 0.0015, + "loss": 2.4321, + "step": 2614 + }, + { + "epoch": 0.2758438818565401, + "grad_norm": 0.410988450050354, + "learning_rate": 0.0015, + "loss": 2.4614, + "step": 2615 + }, + { + "epoch": 0.2759493670886076, + "grad_norm": 0.4363310933113098, + "learning_rate": 0.0015, + "loss": 2.4278, + "step": 2616 + }, + { + "epoch": 0.2760548523206751, + "grad_norm": 0.35787534713745117, + "learning_rate": 0.0015, + "loss": 2.4151, + "step": 2617 + }, + { + "epoch": 0.2761603375527426, + "grad_norm": 0.5052613615989685, + "learning_rate": 0.0015, + "loss": 2.4141, + "step": 2618 + }, + { + "epoch": 0.2762658227848101, + "grad_norm": 0.4317731261253357, + "learning_rate": 0.0015, + "loss": 2.4443, + "step": 2619 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.3955075442790985, + "learning_rate": 0.0015, + "loss": 2.4442, + "step": 2620 + }, + { + "epoch": 0.27647679324894514, + "grad_norm": 0.3945864737033844, + "learning_rate": 0.0015, + "loss": 2.4214, + "step": 2621 + }, + { + "epoch": 0.27658227848101263, + "grad_norm": 0.3363588750362396, + "learning_rate": 0.0015, + "loss": 2.3979, + "step": 2622 + }, + { + "epoch": 0.2766877637130802, + "grad_norm": 0.4719012677669525, + "learning_rate": 0.0015, + "loss": 2.4152, + "step": 2623 + }, + { + "epoch": 0.2767932489451477, + "grad_norm": 0.3925080895423889, + "learning_rate": 0.0015, + "loss": 2.4519, + "step": 2624 + }, + { + "epoch": 0.27689873417721517, + "grad_norm": 0.39716506004333496, + "learning_rate": 0.0015, + "loss": 2.4245, + "step": 2625 + }, + { + "epoch": 0.2770042194092827, + "grad_norm": 0.4802018702030182, + "learning_rate": 0.0015, + "loss": 2.4168, + "step": 2626 + }, + { + "epoch": 0.2771097046413502, + "grad_norm": 0.5356960892677307, + "learning_rate": 0.0015, + "loss": 2.4333, + "step": 2627 + }, + { + "epoch": 0.2772151898734177, + "grad_norm": 0.44550374150276184, + "learning_rate": 0.0015, + "loss": 2.4382, + "step": 2628 + }, + { + "epoch": 0.27732067510548525, + "grad_norm": 0.45259058475494385, + "learning_rate": 0.0015, + "loss": 2.4302, + "step": 2629 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.4016267955303192, + "learning_rate": 0.0015, + "loss": 2.4681, + "step": 2630 + }, + { + "epoch": 0.27753164556962023, + "grad_norm": 0.44585487246513367, + "learning_rate": 0.0015, + "loss": 2.4503, + "step": 2631 + }, + { + "epoch": 0.2776371308016878, + "grad_norm": 0.46381455659866333, + "learning_rate": 0.0015, + "loss": 2.4363, + "step": 2632 + }, + { + "epoch": 0.2777426160337553, + "grad_norm": 0.44911983609199524, + "learning_rate": 0.0015, + "loss": 2.4202, + "step": 2633 + }, + { + "epoch": 0.27784810126582277, + "grad_norm": 0.4195655286312103, + "learning_rate": 0.0015, + "loss": 2.4489, + "step": 2634 + }, + { + "epoch": 0.2779535864978903, + "grad_norm": 0.5265651345252991, + "learning_rate": 0.0015, + "loss": 2.4843, + "step": 2635 + }, + { + "epoch": 0.2780590717299578, + "grad_norm": 0.4550987482070923, + "learning_rate": 0.0015, + "loss": 2.4057, + "step": 2636 + }, + { + "epoch": 0.2781645569620253, + "grad_norm": 0.5069044828414917, + "learning_rate": 0.0015, + "loss": 2.4558, + "step": 2637 + }, + { + "epoch": 0.27827004219409285, + "grad_norm": 0.4980974495410919, + "learning_rate": 0.0015, + "loss": 2.4295, + "step": 2638 + }, + { + "epoch": 0.27837552742616034, + "grad_norm": 0.43595626950263977, + "learning_rate": 0.0015, + "loss": 2.4168, + "step": 2639 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.49514374136924744, + "learning_rate": 0.0015, + "loss": 2.4285, + "step": 2640 + }, + { + "epoch": 0.2785864978902954, + "grad_norm": 0.4413341283798218, + "learning_rate": 0.0015, + "loss": 2.4494, + "step": 2641 + }, + { + "epoch": 0.2786919831223629, + "grad_norm": 0.49037644267082214, + "learning_rate": 0.0015, + "loss": 2.4115, + "step": 2642 + }, + { + "epoch": 0.27879746835443037, + "grad_norm": 0.3868851363658905, + "learning_rate": 0.0015, + "loss": 2.43, + "step": 2643 + }, + { + "epoch": 0.2789029535864979, + "grad_norm": 0.4782319962978363, + "learning_rate": 0.0015, + "loss": 2.39, + "step": 2644 + }, + { + "epoch": 0.2790084388185654, + "grad_norm": 0.3848482072353363, + "learning_rate": 0.0015, + "loss": 2.4623, + "step": 2645 + }, + { + "epoch": 0.2791139240506329, + "grad_norm": 0.4417005777359009, + "learning_rate": 0.0015, + "loss": 2.4312, + "step": 2646 + }, + { + "epoch": 0.27921940928270045, + "grad_norm": 0.3891116678714752, + "learning_rate": 0.0015, + "loss": 2.4284, + "step": 2647 + }, + { + "epoch": 0.27932489451476794, + "grad_norm": 0.4140455722808838, + "learning_rate": 0.0015, + "loss": 2.443, + "step": 2648 + }, + { + "epoch": 0.27943037974683543, + "grad_norm": 0.43879640102386475, + "learning_rate": 0.0015, + "loss": 2.4134, + "step": 2649 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.40763500332832336, + "learning_rate": 0.0015, + "loss": 2.413, + "step": 2650 + }, + { + "epoch": 0.2796413502109705, + "grad_norm": 0.3749247193336487, + "learning_rate": 0.0015, + "loss": 2.4164, + "step": 2651 + }, + { + "epoch": 0.27974683544303797, + "grad_norm": 0.46869727969169617, + "learning_rate": 0.0015, + "loss": 2.4535, + "step": 2652 + }, + { + "epoch": 0.27985232067510546, + "grad_norm": 0.4432630240917206, + "learning_rate": 0.0015, + "loss": 2.4532, + "step": 2653 + }, + { + "epoch": 0.279957805907173, + "grad_norm": 0.3923470377922058, + "learning_rate": 0.0015, + "loss": 2.4408, + "step": 2654 + }, + { + "epoch": 0.2800632911392405, + "grad_norm": 0.4126985967159271, + "learning_rate": 0.0015, + "loss": 2.4244, + "step": 2655 + }, + { + "epoch": 0.280168776371308, + "grad_norm": 0.3841682970523834, + "learning_rate": 0.0015, + "loss": 2.4218, + "step": 2656 + }, + { + "epoch": 0.28027426160337554, + "grad_norm": 0.3892408013343811, + "learning_rate": 0.0015, + "loss": 2.4091, + "step": 2657 + }, + { + "epoch": 0.28037974683544303, + "grad_norm": 0.35383790731430054, + "learning_rate": 0.0015, + "loss": 2.3828, + "step": 2658 + }, + { + "epoch": 0.2804852320675105, + "grad_norm": 0.4032454192638397, + "learning_rate": 0.0015, + "loss": 2.4362, + "step": 2659 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.38401898741722107, + "learning_rate": 0.0015, + "loss": 2.4246, + "step": 2660 + }, + { + "epoch": 0.28069620253164557, + "grad_norm": 0.3763688802719116, + "learning_rate": 0.0015, + "loss": 2.4537, + "step": 2661 + }, + { + "epoch": 0.28080168776371306, + "grad_norm": 0.4531441032886505, + "learning_rate": 0.0015, + "loss": 2.423, + "step": 2662 + }, + { + "epoch": 0.2809071729957806, + "grad_norm": 0.3790852129459381, + "learning_rate": 0.0015, + "loss": 2.4242, + "step": 2663 + }, + { + "epoch": 0.2810126582278481, + "grad_norm": 0.4300045371055603, + "learning_rate": 0.0015, + "loss": 2.4286, + "step": 2664 + }, + { + "epoch": 0.2811181434599156, + "grad_norm": 0.5249015688896179, + "learning_rate": 0.0015, + "loss": 2.4058, + "step": 2665 + }, + { + "epoch": 0.28122362869198314, + "grad_norm": 0.4780619144439697, + "learning_rate": 0.0015, + "loss": 2.4219, + "step": 2666 + }, + { + "epoch": 0.28132911392405063, + "grad_norm": 0.39965859055519104, + "learning_rate": 0.0015, + "loss": 2.4202, + "step": 2667 + }, + { + "epoch": 0.2814345991561181, + "grad_norm": 0.46764466166496277, + "learning_rate": 0.0015, + "loss": 2.4173, + "step": 2668 + }, + { + "epoch": 0.2815400843881857, + "grad_norm": 0.417925089597702, + "learning_rate": 0.0015, + "loss": 2.4297, + "step": 2669 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.4362967610359192, + "learning_rate": 0.0015, + "loss": 2.4315, + "step": 2670 + }, + { + "epoch": 0.28175105485232066, + "grad_norm": 0.4653046131134033, + "learning_rate": 0.0015, + "loss": 2.4652, + "step": 2671 + }, + { + "epoch": 0.2818565400843882, + "grad_norm": 0.3681575059890747, + "learning_rate": 0.0015, + "loss": 2.4419, + "step": 2672 + }, + { + "epoch": 0.2819620253164557, + "grad_norm": 0.40231913328170776, + "learning_rate": 0.0015, + "loss": 2.4698, + "step": 2673 + }, + { + "epoch": 0.2820675105485232, + "grad_norm": 0.4264129102230072, + "learning_rate": 0.0015, + "loss": 2.3935, + "step": 2674 + }, + { + "epoch": 0.28217299578059074, + "grad_norm": 0.34283265471458435, + "learning_rate": 0.0015, + "loss": 2.4297, + "step": 2675 + }, + { + "epoch": 0.28227848101265823, + "grad_norm": 0.42373785376548767, + "learning_rate": 0.0015, + "loss": 2.4346, + "step": 2676 + }, + { + "epoch": 0.2823839662447257, + "grad_norm": 0.4277489483356476, + "learning_rate": 0.0015, + "loss": 2.3905, + "step": 2677 + }, + { + "epoch": 0.2824894514767933, + "grad_norm": 0.43266600370407104, + "learning_rate": 0.0015, + "loss": 2.4002, + "step": 2678 + }, + { + "epoch": 0.28259493670886077, + "grad_norm": 0.4038338363170624, + "learning_rate": 0.0015, + "loss": 2.4196, + "step": 2679 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.36826997995376587, + "learning_rate": 0.0015, + "loss": 2.3932, + "step": 2680 + }, + { + "epoch": 0.2828059071729958, + "grad_norm": 0.4532821476459503, + "learning_rate": 0.0015, + "loss": 2.3962, + "step": 2681 + }, + { + "epoch": 0.2829113924050633, + "grad_norm": 0.5599679946899414, + "learning_rate": 0.0015, + "loss": 2.3867, + "step": 2682 + }, + { + "epoch": 0.2830168776371308, + "grad_norm": 0.5079588294029236, + "learning_rate": 0.0015, + "loss": 2.4311, + "step": 2683 + }, + { + "epoch": 0.2831223628691983, + "grad_norm": 0.37661492824554443, + "learning_rate": 0.0015, + "loss": 2.4177, + "step": 2684 + }, + { + "epoch": 0.28322784810126583, + "grad_norm": 0.3885810077190399, + "learning_rate": 0.0015, + "loss": 2.3839, + "step": 2685 + }, + { + "epoch": 0.2833333333333333, + "grad_norm": 0.3881590962409973, + "learning_rate": 0.0015, + "loss": 2.4086, + "step": 2686 + }, + { + "epoch": 0.2834388185654008, + "grad_norm": 0.4369927644729614, + "learning_rate": 0.0015, + "loss": 2.4075, + "step": 2687 + }, + { + "epoch": 0.28354430379746837, + "grad_norm": 0.4158192276954651, + "learning_rate": 0.0015, + "loss": 2.4012, + "step": 2688 + }, + { + "epoch": 0.28364978902953586, + "grad_norm": 0.48053765296936035, + "learning_rate": 0.0015, + "loss": 2.4181, + "step": 2689 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.3355393707752228, + "learning_rate": 0.0015, + "loss": 2.4299, + "step": 2690 + }, + { + "epoch": 0.2838607594936709, + "grad_norm": 0.45023325085639954, + "learning_rate": 0.0015, + "loss": 2.4266, + "step": 2691 + }, + { + "epoch": 0.2839662447257384, + "grad_norm": 0.41959577798843384, + "learning_rate": 0.0015, + "loss": 2.4033, + "step": 2692 + }, + { + "epoch": 0.2840717299578059, + "grad_norm": 0.4009360373020172, + "learning_rate": 0.0015, + "loss": 2.4451, + "step": 2693 + }, + { + "epoch": 0.28417721518987343, + "grad_norm": 0.39836740493774414, + "learning_rate": 0.0015, + "loss": 2.4045, + "step": 2694 + }, + { + "epoch": 0.2842827004219409, + "grad_norm": 0.37957674264907837, + "learning_rate": 0.0015, + "loss": 2.4223, + "step": 2695 + }, + { + "epoch": 0.2843881856540084, + "grad_norm": 0.4865906834602356, + "learning_rate": 0.0015, + "loss": 2.4262, + "step": 2696 + }, + { + "epoch": 0.28449367088607597, + "grad_norm": 0.48795461654663086, + "learning_rate": 0.0015, + "loss": 2.4005, + "step": 2697 + }, + { + "epoch": 0.28459915611814346, + "grad_norm": 0.5489049553871155, + "learning_rate": 0.0015, + "loss": 2.4318, + "step": 2698 + }, + { + "epoch": 0.28470464135021095, + "grad_norm": 0.5865594744682312, + "learning_rate": 0.0015, + "loss": 2.408, + "step": 2699 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.36475953459739685, + "learning_rate": 0.0015, + "loss": 2.3925, + "step": 2700 + }, + { + "epoch": 0.284915611814346, + "grad_norm": 0.5356622338294983, + "learning_rate": 0.0015, + "loss": 2.4336, + "step": 2701 + }, + { + "epoch": 0.2850210970464135, + "grad_norm": 0.6625538468360901, + "learning_rate": 0.0015, + "loss": 2.4665, + "step": 2702 + }, + { + "epoch": 0.28512658227848103, + "grad_norm": 0.4178686738014221, + "learning_rate": 0.0015, + "loss": 2.426, + "step": 2703 + }, + { + "epoch": 0.2852320675105485, + "grad_norm": 0.5744515657424927, + "learning_rate": 0.0015, + "loss": 2.4126, + "step": 2704 + }, + { + "epoch": 0.285337552742616, + "grad_norm": 0.6385703086853027, + "learning_rate": 0.0015, + "loss": 2.4279, + "step": 2705 + }, + { + "epoch": 0.28544303797468357, + "grad_norm": 0.3941457271575928, + "learning_rate": 0.0015, + "loss": 2.4116, + "step": 2706 + }, + { + "epoch": 0.28554852320675106, + "grad_norm": 0.7798815369606018, + "learning_rate": 0.0015, + "loss": 2.4176, + "step": 2707 + }, + { + "epoch": 0.28565400843881855, + "grad_norm": 0.6254736185073853, + "learning_rate": 0.0015, + "loss": 2.4222, + "step": 2708 + }, + { + "epoch": 0.2857594936708861, + "grad_norm": 0.4590211510658264, + "learning_rate": 0.0015, + "loss": 2.3963, + "step": 2709 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.7303619384765625, + "learning_rate": 0.0015, + "loss": 2.4188, + "step": 2710 + }, + { + "epoch": 0.2859704641350211, + "grad_norm": 0.5554119348526001, + "learning_rate": 0.0015, + "loss": 2.4125, + "step": 2711 + }, + { + "epoch": 0.28607594936708863, + "grad_norm": 0.589583158493042, + "learning_rate": 0.0015, + "loss": 2.4437, + "step": 2712 + }, + { + "epoch": 0.2861814345991561, + "grad_norm": 0.5159534811973572, + "learning_rate": 0.0015, + "loss": 2.427, + "step": 2713 + }, + { + "epoch": 0.2862869198312236, + "grad_norm": 0.4568880498409271, + "learning_rate": 0.0015, + "loss": 2.4335, + "step": 2714 + }, + { + "epoch": 0.28639240506329117, + "grad_norm": 0.4641784429550171, + "learning_rate": 0.0015, + "loss": 2.4375, + "step": 2715 + }, + { + "epoch": 0.28649789029535866, + "grad_norm": 0.438240110874176, + "learning_rate": 0.0015, + "loss": 2.4148, + "step": 2716 + }, + { + "epoch": 0.28660337552742615, + "grad_norm": 0.4451633095741272, + "learning_rate": 0.0015, + "loss": 2.4471, + "step": 2717 + }, + { + "epoch": 0.28670886075949364, + "grad_norm": 0.4865260720252991, + "learning_rate": 0.0015, + "loss": 2.388, + "step": 2718 + }, + { + "epoch": 0.2868143459915612, + "grad_norm": 0.41030004620552063, + "learning_rate": 0.0015, + "loss": 2.3786, + "step": 2719 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.4228115975856781, + "learning_rate": 0.0015, + "loss": 2.3924, + "step": 2720 + }, + { + "epoch": 0.2870253164556962, + "grad_norm": 0.3876941502094269, + "learning_rate": 0.0015, + "loss": 2.3934, + "step": 2721 + }, + { + "epoch": 0.2871308016877637, + "grad_norm": 0.3726053833961487, + "learning_rate": 0.0015, + "loss": 2.3927, + "step": 2722 + }, + { + "epoch": 0.2872362869198312, + "grad_norm": 0.3710802495479584, + "learning_rate": 0.0015, + "loss": 2.4194, + "step": 2723 + }, + { + "epoch": 0.2873417721518987, + "grad_norm": 0.3406830132007599, + "learning_rate": 0.0015, + "loss": 2.3717, + "step": 2724 + }, + { + "epoch": 0.28744725738396626, + "grad_norm": 0.3577049970626831, + "learning_rate": 0.0015, + "loss": 2.3897, + "step": 2725 + }, + { + "epoch": 0.28755274261603375, + "grad_norm": 0.34568721055984497, + "learning_rate": 0.0015, + "loss": 2.4368, + "step": 2726 + }, + { + "epoch": 0.28765822784810124, + "grad_norm": 0.3907458186149597, + "learning_rate": 0.0015, + "loss": 2.3956, + "step": 2727 + }, + { + "epoch": 0.2877637130801688, + "grad_norm": 0.3762935400009155, + "learning_rate": 0.0015, + "loss": 2.3793, + "step": 2728 + }, + { + "epoch": 0.2878691983122363, + "grad_norm": 0.4240577220916748, + "learning_rate": 0.0015, + "loss": 2.4198, + "step": 2729 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.3890191614627838, + "learning_rate": 0.0015, + "loss": 2.395, + "step": 2730 + }, + { + "epoch": 0.2880801687763713, + "grad_norm": 0.4119868278503418, + "learning_rate": 0.0015, + "loss": 2.4099, + "step": 2731 + }, + { + "epoch": 0.2881856540084388, + "grad_norm": 0.4571007490158081, + "learning_rate": 0.0015, + "loss": 2.4224, + "step": 2732 + }, + { + "epoch": 0.2882911392405063, + "grad_norm": 0.38360095024108887, + "learning_rate": 0.0015, + "loss": 2.3964, + "step": 2733 + }, + { + "epoch": 0.28839662447257386, + "grad_norm": 0.4217468202114105, + "learning_rate": 0.0015, + "loss": 2.4024, + "step": 2734 + }, + { + "epoch": 0.28850210970464135, + "grad_norm": 0.5377964973449707, + "learning_rate": 0.0015, + "loss": 2.4007, + "step": 2735 + }, + { + "epoch": 0.28860759493670884, + "grad_norm": 0.5319667458534241, + "learning_rate": 0.0015, + "loss": 2.4019, + "step": 2736 + }, + { + "epoch": 0.2887130801687764, + "grad_norm": 0.3855518400669098, + "learning_rate": 0.0015, + "loss": 2.41, + "step": 2737 + }, + { + "epoch": 0.2888185654008439, + "grad_norm": 0.3776300847530365, + "learning_rate": 0.0015, + "loss": 2.4214, + "step": 2738 + }, + { + "epoch": 0.2889240506329114, + "grad_norm": 0.4586566090583801, + "learning_rate": 0.0015, + "loss": 2.4326, + "step": 2739 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 0.39681509137153625, + "learning_rate": 0.0015, + "loss": 2.4386, + "step": 2740 + }, + { + "epoch": 0.2891350210970464, + "grad_norm": 0.38987693190574646, + "learning_rate": 0.0015, + "loss": 2.4074, + "step": 2741 + }, + { + "epoch": 0.2892405063291139, + "grad_norm": 0.5575326085090637, + "learning_rate": 0.0015, + "loss": 2.4216, + "step": 2742 + }, + { + "epoch": 0.28934599156118146, + "grad_norm": 0.5320124626159668, + "learning_rate": 0.0015, + "loss": 2.3993, + "step": 2743 + }, + { + "epoch": 0.28945147679324895, + "grad_norm": 0.3861165940761566, + "learning_rate": 0.0015, + "loss": 2.4107, + "step": 2744 + }, + { + "epoch": 0.28955696202531644, + "grad_norm": 0.5643187165260315, + "learning_rate": 0.0015, + "loss": 2.4111, + "step": 2745 + }, + { + "epoch": 0.289662447257384, + "grad_norm": 0.5785874128341675, + "learning_rate": 0.0015, + "loss": 2.4234, + "step": 2746 + }, + { + "epoch": 0.2897679324894515, + "grad_norm": 0.40231919288635254, + "learning_rate": 0.0015, + "loss": 2.4106, + "step": 2747 + }, + { + "epoch": 0.289873417721519, + "grad_norm": 0.5778517127037048, + "learning_rate": 0.0015, + "loss": 2.435, + "step": 2748 + }, + { + "epoch": 0.28997890295358647, + "grad_norm": 0.4534483253955841, + "learning_rate": 0.0015, + "loss": 2.4184, + "step": 2749 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.45446616411209106, + "learning_rate": 0.0015, + "loss": 2.3837, + "step": 2750 + }, + { + "epoch": 0.2901898734177215, + "grad_norm": 0.5534870028495789, + "learning_rate": 0.0015, + "loss": 2.3981, + "step": 2751 + }, + { + "epoch": 0.290295358649789, + "grad_norm": 0.4050524830818176, + "learning_rate": 0.0015, + "loss": 2.4204, + "step": 2752 + }, + { + "epoch": 0.29040084388185655, + "grad_norm": 0.42153045535087585, + "learning_rate": 0.0015, + "loss": 2.3839, + "step": 2753 + }, + { + "epoch": 0.29050632911392404, + "grad_norm": 0.4709899425506592, + "learning_rate": 0.0015, + "loss": 2.4124, + "step": 2754 + }, + { + "epoch": 0.29061181434599154, + "grad_norm": 0.4657578468322754, + "learning_rate": 0.0015, + "loss": 2.4237, + "step": 2755 + }, + { + "epoch": 0.2907172995780591, + "grad_norm": 0.42559748888015747, + "learning_rate": 0.0015, + "loss": 2.4064, + "step": 2756 + }, + { + "epoch": 0.2908227848101266, + "grad_norm": 0.4883682429790497, + "learning_rate": 0.0015, + "loss": 2.3991, + "step": 2757 + }, + { + "epoch": 0.29092827004219407, + "grad_norm": 0.4892362356185913, + "learning_rate": 0.0015, + "loss": 2.4078, + "step": 2758 + }, + { + "epoch": 0.2910337552742616, + "grad_norm": 0.3885108530521393, + "learning_rate": 0.0015, + "loss": 2.451, + "step": 2759 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.5161545872688293, + "learning_rate": 0.0015, + "loss": 2.3973, + "step": 2760 + }, + { + "epoch": 0.2912447257383966, + "grad_norm": 0.49955281615257263, + "learning_rate": 0.0015, + "loss": 2.4303, + "step": 2761 + }, + { + "epoch": 0.29135021097046415, + "grad_norm": 0.43027350306510925, + "learning_rate": 0.0015, + "loss": 2.4318, + "step": 2762 + }, + { + "epoch": 0.29145569620253164, + "grad_norm": 0.5154582262039185, + "learning_rate": 0.0015, + "loss": 2.4181, + "step": 2763 + }, + { + "epoch": 0.29156118143459914, + "grad_norm": 0.4321637749671936, + "learning_rate": 0.0015, + "loss": 2.3813, + "step": 2764 + }, + { + "epoch": 0.2916666666666667, + "grad_norm": 0.45488303899765015, + "learning_rate": 0.0015, + "loss": 2.4047, + "step": 2765 + }, + { + "epoch": 0.2917721518987342, + "grad_norm": 0.4413897395133972, + "learning_rate": 0.0015, + "loss": 2.3849, + "step": 2766 + }, + { + "epoch": 0.29187763713080167, + "grad_norm": 0.3659706115722656, + "learning_rate": 0.0015, + "loss": 2.3973, + "step": 2767 + }, + { + "epoch": 0.2919831223628692, + "grad_norm": 0.46425533294677734, + "learning_rate": 0.0015, + "loss": 2.3957, + "step": 2768 + }, + { + "epoch": 0.2920886075949367, + "grad_norm": 0.3904953896999359, + "learning_rate": 0.0015, + "loss": 2.4054, + "step": 2769 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.44406917691230774, + "learning_rate": 0.0015, + "loss": 2.4123, + "step": 2770 + }, + { + "epoch": 0.29229957805907175, + "grad_norm": 0.40390080213546753, + "learning_rate": 0.0015, + "loss": 2.4103, + "step": 2771 + }, + { + "epoch": 0.29240506329113924, + "grad_norm": 0.36141130328178406, + "learning_rate": 0.0015, + "loss": 2.4166, + "step": 2772 + }, + { + "epoch": 0.29251054852320674, + "grad_norm": 0.3874060809612274, + "learning_rate": 0.0015, + "loss": 2.4087, + "step": 2773 + }, + { + "epoch": 0.2926160337552743, + "grad_norm": 0.37182292342185974, + "learning_rate": 0.0015, + "loss": 2.3848, + "step": 2774 + }, + { + "epoch": 0.2927215189873418, + "grad_norm": 0.41688376665115356, + "learning_rate": 0.0015, + "loss": 2.4092, + "step": 2775 + }, + { + "epoch": 0.29282700421940927, + "grad_norm": 0.37813958525657654, + "learning_rate": 0.0015, + "loss": 2.3893, + "step": 2776 + }, + { + "epoch": 0.2929324894514768, + "grad_norm": 0.3771017789840698, + "learning_rate": 0.0015, + "loss": 2.4274, + "step": 2777 + }, + { + "epoch": 0.2930379746835443, + "grad_norm": 0.39569640159606934, + "learning_rate": 0.0015, + "loss": 2.3968, + "step": 2778 + }, + { + "epoch": 0.2931434599156118, + "grad_norm": 0.3330463469028473, + "learning_rate": 0.0015, + "loss": 2.4103, + "step": 2779 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.4170871675014496, + "learning_rate": 0.0015, + "loss": 2.4015, + "step": 2780 + }, + { + "epoch": 0.29335443037974684, + "grad_norm": 0.3451877236366272, + "learning_rate": 0.0015, + "loss": 2.4136, + "step": 2781 + }, + { + "epoch": 0.29345991561181434, + "grad_norm": 0.38500112295150757, + "learning_rate": 0.0015, + "loss": 2.375, + "step": 2782 + }, + { + "epoch": 0.29356540084388183, + "grad_norm": 0.33936458826065063, + "learning_rate": 0.0015, + "loss": 2.3886, + "step": 2783 + }, + { + "epoch": 0.2936708860759494, + "grad_norm": 0.3586674928665161, + "learning_rate": 0.0015, + "loss": 2.4088, + "step": 2784 + }, + { + "epoch": 0.29377637130801687, + "grad_norm": 0.387324720621109, + "learning_rate": 0.0015, + "loss": 2.3676, + "step": 2785 + }, + { + "epoch": 0.29388185654008436, + "grad_norm": 0.33738940954208374, + "learning_rate": 0.0015, + "loss": 2.3725, + "step": 2786 + }, + { + "epoch": 0.2939873417721519, + "grad_norm": 0.4170359671115875, + "learning_rate": 0.0015, + "loss": 2.3964, + "step": 2787 + }, + { + "epoch": 0.2940928270042194, + "grad_norm": 0.4191721975803375, + "learning_rate": 0.0015, + "loss": 2.414, + "step": 2788 + }, + { + "epoch": 0.2941983122362869, + "grad_norm": 0.4268380403518677, + "learning_rate": 0.0015, + "loss": 2.4339, + "step": 2789 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.40556374192237854, + "learning_rate": 0.0015, + "loss": 2.3995, + "step": 2790 + }, + { + "epoch": 0.29440928270042194, + "grad_norm": 0.4313223659992218, + "learning_rate": 0.0015, + "loss": 2.4065, + "step": 2791 + }, + { + "epoch": 0.29451476793248943, + "grad_norm": 0.4725036025047302, + "learning_rate": 0.0015, + "loss": 2.3562, + "step": 2792 + }, + { + "epoch": 0.294620253164557, + "grad_norm": 0.5019059181213379, + "learning_rate": 0.0015, + "loss": 2.4036, + "step": 2793 + }, + { + "epoch": 0.29472573839662447, + "grad_norm": 0.3900093138217926, + "learning_rate": 0.0015, + "loss": 2.4032, + "step": 2794 + }, + { + "epoch": 0.29483122362869196, + "grad_norm": 0.44297218322753906, + "learning_rate": 0.0015, + "loss": 2.3924, + "step": 2795 + }, + { + "epoch": 0.2949367088607595, + "grad_norm": 0.45383402705192566, + "learning_rate": 0.0015, + "loss": 2.3935, + "step": 2796 + }, + { + "epoch": 0.295042194092827, + "grad_norm": 0.449659138917923, + "learning_rate": 0.0015, + "loss": 2.4041, + "step": 2797 + }, + { + "epoch": 0.2951476793248945, + "grad_norm": 0.5053336024284363, + "learning_rate": 0.0015, + "loss": 2.4098, + "step": 2798 + }, + { + "epoch": 0.29525316455696204, + "grad_norm": 0.4857564866542816, + "learning_rate": 0.0015, + "loss": 2.408, + "step": 2799 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.5894517302513123, + "learning_rate": 0.0015, + "loss": 2.3825, + "step": 2800 + }, + { + "epoch": 0.29546413502109703, + "grad_norm": 0.4534296691417694, + "learning_rate": 0.0015, + "loss": 2.4142, + "step": 2801 + }, + { + "epoch": 0.2955696202531646, + "grad_norm": 0.4144063889980316, + "learning_rate": 0.0015, + "loss": 2.3707, + "step": 2802 + }, + { + "epoch": 0.29567510548523207, + "grad_norm": 0.5179435610771179, + "learning_rate": 0.0015, + "loss": 2.4183, + "step": 2803 + }, + { + "epoch": 0.29578059071729956, + "grad_norm": 0.4608791470527649, + "learning_rate": 0.0015, + "loss": 2.4194, + "step": 2804 + }, + { + "epoch": 0.2958860759493671, + "grad_norm": 0.44288238883018494, + "learning_rate": 0.0015, + "loss": 2.3685, + "step": 2805 + }, + { + "epoch": 0.2959915611814346, + "grad_norm": 0.42615482211112976, + "learning_rate": 0.0015, + "loss": 2.392, + "step": 2806 + }, + { + "epoch": 0.2960970464135021, + "grad_norm": 0.45283544063568115, + "learning_rate": 0.0015, + "loss": 2.3786, + "step": 2807 + }, + { + "epoch": 0.29620253164556964, + "grad_norm": 0.4785565435886383, + "learning_rate": 0.0015, + "loss": 2.397, + "step": 2808 + }, + { + "epoch": 0.29630801687763714, + "grad_norm": 0.4039643108844757, + "learning_rate": 0.0015, + "loss": 2.4211, + "step": 2809 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.4733390212059021, + "learning_rate": 0.0015, + "loss": 2.3825, + "step": 2810 + }, + { + "epoch": 0.2965189873417722, + "grad_norm": 0.5396400690078735, + "learning_rate": 0.0015, + "loss": 2.3972, + "step": 2811 + }, + { + "epoch": 0.29662447257383967, + "grad_norm": 0.41574546694755554, + "learning_rate": 0.0015, + "loss": 2.378, + "step": 2812 + }, + { + "epoch": 0.29672995780590716, + "grad_norm": 0.34130921959877014, + "learning_rate": 0.0015, + "loss": 2.3943, + "step": 2813 + }, + { + "epoch": 0.2968354430379747, + "grad_norm": 0.512970507144928, + "learning_rate": 0.0015, + "loss": 2.4306, + "step": 2814 + }, + { + "epoch": 0.2969409282700422, + "grad_norm": 0.5649516582489014, + "learning_rate": 0.0015, + "loss": 2.4098, + "step": 2815 + }, + { + "epoch": 0.2970464135021097, + "grad_norm": 0.35082197189331055, + "learning_rate": 0.0015, + "loss": 2.36, + "step": 2816 + }, + { + "epoch": 0.2971518987341772, + "grad_norm": 0.5128579139709473, + "learning_rate": 0.0015, + "loss": 2.3776, + "step": 2817 + }, + { + "epoch": 0.29725738396624474, + "grad_norm": 0.4643462598323822, + "learning_rate": 0.0015, + "loss": 2.4344, + "step": 2818 + }, + { + "epoch": 0.29736286919831223, + "grad_norm": 0.39130333065986633, + "learning_rate": 0.0015, + "loss": 2.4036, + "step": 2819 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.5477041602134705, + "learning_rate": 0.0015, + "loss": 2.4072, + "step": 2820 + }, + { + "epoch": 0.29757383966244727, + "grad_norm": 0.4206145107746124, + "learning_rate": 0.0015, + "loss": 2.3809, + "step": 2821 + }, + { + "epoch": 0.29767932489451476, + "grad_norm": 0.377450168132782, + "learning_rate": 0.0015, + "loss": 2.38, + "step": 2822 + }, + { + "epoch": 0.29778481012658226, + "grad_norm": 0.49586498737335205, + "learning_rate": 0.0015, + "loss": 2.4064, + "step": 2823 + }, + { + "epoch": 0.2978902953586498, + "grad_norm": 0.41729938983917236, + "learning_rate": 0.0015, + "loss": 2.3631, + "step": 2824 + }, + { + "epoch": 0.2979957805907173, + "grad_norm": 0.4061506688594818, + "learning_rate": 0.0015, + "loss": 2.3759, + "step": 2825 + }, + { + "epoch": 0.2981012658227848, + "grad_norm": 0.4209458827972412, + "learning_rate": 0.0015, + "loss": 2.3881, + "step": 2826 + }, + { + "epoch": 0.29820675105485234, + "grad_norm": 0.42353326082229614, + "learning_rate": 0.0015, + "loss": 2.3467, + "step": 2827 + }, + { + "epoch": 0.29831223628691983, + "grad_norm": 0.4299774765968323, + "learning_rate": 0.0015, + "loss": 2.3834, + "step": 2828 + }, + { + "epoch": 0.2984177215189873, + "grad_norm": 0.4348611533641815, + "learning_rate": 0.0015, + "loss": 2.3971, + "step": 2829 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.4220806658267975, + "learning_rate": 0.0015, + "loss": 2.3897, + "step": 2830 + }, + { + "epoch": 0.29862869198312236, + "grad_norm": 0.4729754626750946, + "learning_rate": 0.0015, + "loss": 2.3629, + "step": 2831 + }, + { + "epoch": 0.29873417721518986, + "grad_norm": 0.39992618560791016, + "learning_rate": 0.0015, + "loss": 2.4106, + "step": 2832 + }, + { + "epoch": 0.2988396624472574, + "grad_norm": 0.4482656717300415, + "learning_rate": 0.0015, + "loss": 2.357, + "step": 2833 + }, + { + "epoch": 0.2989451476793249, + "grad_norm": 0.4347230792045593, + "learning_rate": 0.0015, + "loss": 2.3888, + "step": 2834 + }, + { + "epoch": 0.2990506329113924, + "grad_norm": 0.4497527480125427, + "learning_rate": 0.0015, + "loss": 2.3362, + "step": 2835 + }, + { + "epoch": 0.29915611814345994, + "grad_norm": 0.4323689937591553, + "learning_rate": 0.0015, + "loss": 2.3616, + "step": 2836 + }, + { + "epoch": 0.29926160337552743, + "grad_norm": 0.43943849205970764, + "learning_rate": 0.0015, + "loss": 2.3876, + "step": 2837 + }, + { + "epoch": 0.2993670886075949, + "grad_norm": 0.4326552450656891, + "learning_rate": 0.0015, + "loss": 2.367, + "step": 2838 + }, + { + "epoch": 0.29947257383966247, + "grad_norm": 0.3847145736217499, + "learning_rate": 0.0015, + "loss": 2.3514, + "step": 2839 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.3929300904273987, + "learning_rate": 0.0015, + "loss": 2.3947, + "step": 2840 + }, + { + "epoch": 0.29968354430379746, + "grad_norm": 0.4088045358657837, + "learning_rate": 0.0015, + "loss": 2.3788, + "step": 2841 + }, + { + "epoch": 0.299789029535865, + "grad_norm": 0.36684438586235046, + "learning_rate": 0.0015, + "loss": 2.3893, + "step": 2842 + }, + { + "epoch": 0.2998945147679325, + "grad_norm": 0.3817586898803711, + "learning_rate": 0.0015, + "loss": 2.369, + "step": 2843 + }, + { + "epoch": 0.3, + "grad_norm": 0.40417569875717163, + "learning_rate": 0.0015, + "loss": 2.3986, + "step": 2844 + }, + { + "epoch": 0.30010548523206754, + "grad_norm": 0.3699309229850769, + "learning_rate": 0.0015, + "loss": 2.3921, + "step": 2845 + }, + { + "epoch": 0.30021097046413503, + "grad_norm": 0.38915735483169556, + "learning_rate": 0.0015, + "loss": 2.4096, + "step": 2846 + }, + { + "epoch": 0.3003164556962025, + "grad_norm": 0.42253926396369934, + "learning_rate": 0.0015, + "loss": 2.3938, + "step": 2847 + }, + { + "epoch": 0.30042194092827, + "grad_norm": 0.37027114629745483, + "learning_rate": 0.0015, + "loss": 2.3502, + "step": 2848 + }, + { + "epoch": 0.30052742616033756, + "grad_norm": 0.3699391484260559, + "learning_rate": 0.0015, + "loss": 2.3779, + "step": 2849 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.44685012102127075, + "learning_rate": 0.0015, + "loss": 2.3886, + "step": 2850 + }, + { + "epoch": 0.30073839662447255, + "grad_norm": 0.3721829056739807, + "learning_rate": 0.0015, + "loss": 2.3455, + "step": 2851 + }, + { + "epoch": 0.3008438818565401, + "grad_norm": 0.4437352120876312, + "learning_rate": 0.0015, + "loss": 2.393, + "step": 2852 + }, + { + "epoch": 0.3009493670886076, + "grad_norm": 0.5091273784637451, + "learning_rate": 0.0015, + "loss": 2.3823, + "step": 2853 + }, + { + "epoch": 0.3010548523206751, + "grad_norm": 0.3529938757419586, + "learning_rate": 0.0015, + "loss": 2.3922, + "step": 2854 + }, + { + "epoch": 0.30116033755274263, + "grad_norm": 0.4367988705635071, + "learning_rate": 0.0015, + "loss": 2.4255, + "step": 2855 + }, + { + "epoch": 0.3012658227848101, + "grad_norm": 0.42455586791038513, + "learning_rate": 0.0015, + "loss": 2.3789, + "step": 2856 + }, + { + "epoch": 0.3013713080168776, + "grad_norm": 0.38922008872032166, + "learning_rate": 0.0015, + "loss": 2.3705, + "step": 2857 + }, + { + "epoch": 0.30147679324894516, + "grad_norm": 0.4382610321044922, + "learning_rate": 0.0015, + "loss": 2.3759, + "step": 2858 + }, + { + "epoch": 0.30158227848101266, + "grad_norm": 0.4015330672264099, + "learning_rate": 0.0015, + "loss": 2.3548, + "step": 2859 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.4405531585216522, + "learning_rate": 0.0015, + "loss": 2.3762, + "step": 2860 + }, + { + "epoch": 0.3017932489451477, + "grad_norm": 0.34485623240470886, + "learning_rate": 0.0015, + "loss": 2.3879, + "step": 2861 + }, + { + "epoch": 0.3018987341772152, + "grad_norm": 0.4100823402404785, + "learning_rate": 0.0015, + "loss": 2.3688, + "step": 2862 + }, + { + "epoch": 0.3020042194092827, + "grad_norm": 0.4297555685043335, + "learning_rate": 0.0015, + "loss": 2.371, + "step": 2863 + }, + { + "epoch": 0.30210970464135023, + "grad_norm": 0.32991835474967957, + "learning_rate": 0.0015, + "loss": 2.3487, + "step": 2864 + }, + { + "epoch": 0.3022151898734177, + "grad_norm": 0.40665143728256226, + "learning_rate": 0.0015, + "loss": 2.3949, + "step": 2865 + }, + { + "epoch": 0.3023206751054852, + "grad_norm": 0.352870374917984, + "learning_rate": 0.0015, + "loss": 2.4142, + "step": 2866 + }, + { + "epoch": 0.30242616033755276, + "grad_norm": 0.3814266324043274, + "learning_rate": 0.0015, + "loss": 2.3787, + "step": 2867 + }, + { + "epoch": 0.30253164556962026, + "grad_norm": 0.3791300654411316, + "learning_rate": 0.0015, + "loss": 2.3947, + "step": 2868 + }, + { + "epoch": 0.30263713080168775, + "grad_norm": 0.3677670657634735, + "learning_rate": 0.0015, + "loss": 2.3218, + "step": 2869 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.45813512802124023, + "learning_rate": 0.0015, + "loss": 2.4222, + "step": 2870 + }, + { + "epoch": 0.3028481012658228, + "grad_norm": 0.42967039346694946, + "learning_rate": 0.0015, + "loss": 2.3873, + "step": 2871 + }, + { + "epoch": 0.3029535864978903, + "grad_norm": 0.43081292510032654, + "learning_rate": 0.0015, + "loss": 2.3657, + "step": 2872 + }, + { + "epoch": 0.30305907172995783, + "grad_norm": 0.39393100142478943, + "learning_rate": 0.0015, + "loss": 2.4063, + "step": 2873 + }, + { + "epoch": 0.3031645569620253, + "grad_norm": 0.41922736167907715, + "learning_rate": 0.0015, + "loss": 2.3296, + "step": 2874 + }, + { + "epoch": 0.3032700421940928, + "grad_norm": 0.50397789478302, + "learning_rate": 0.0015, + "loss": 2.372, + "step": 2875 + }, + { + "epoch": 0.30337552742616036, + "grad_norm": 0.36101946234703064, + "learning_rate": 0.0015, + "loss": 2.3548, + "step": 2876 + }, + { + "epoch": 0.30348101265822786, + "grad_norm": 0.4128842353820801, + "learning_rate": 0.0015, + "loss": 2.3728, + "step": 2877 + }, + { + "epoch": 0.30358649789029535, + "grad_norm": 0.48648518323898315, + "learning_rate": 0.0015, + "loss": 2.3858, + "step": 2878 + }, + { + "epoch": 0.3036919831223629, + "grad_norm": 0.4173881709575653, + "learning_rate": 0.0015, + "loss": 2.3475, + "step": 2879 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.35626208782196045, + "learning_rate": 0.0015, + "loss": 2.368, + "step": 2880 + }, + { + "epoch": 0.3039029535864979, + "grad_norm": 0.4991363286972046, + "learning_rate": 0.0015, + "loss": 2.3738, + "step": 2881 + }, + { + "epoch": 0.3040084388185654, + "grad_norm": 0.5091440081596375, + "learning_rate": 0.0015, + "loss": 2.3945, + "step": 2882 + }, + { + "epoch": 0.3041139240506329, + "grad_norm": 0.39526936411857605, + "learning_rate": 0.0015, + "loss": 2.3343, + "step": 2883 + }, + { + "epoch": 0.3042194092827004, + "grad_norm": 0.42277494072914124, + "learning_rate": 0.0015, + "loss": 2.3629, + "step": 2884 + }, + { + "epoch": 0.3043248945147679, + "grad_norm": 0.4656774699687958, + "learning_rate": 0.0015, + "loss": 2.3668, + "step": 2885 + }, + { + "epoch": 0.30443037974683546, + "grad_norm": 0.40140995383262634, + "learning_rate": 0.0015, + "loss": 2.3739, + "step": 2886 + }, + { + "epoch": 0.30453586497890295, + "grad_norm": 0.41971147060394287, + "learning_rate": 0.0015, + "loss": 2.3723, + "step": 2887 + }, + { + "epoch": 0.30464135021097044, + "grad_norm": 0.47477081418037415, + "learning_rate": 0.0015, + "loss": 2.34, + "step": 2888 + }, + { + "epoch": 0.304746835443038, + "grad_norm": 0.39442548155784607, + "learning_rate": 0.0015, + "loss": 2.4056, + "step": 2889 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.3555215299129486, + "learning_rate": 0.0015, + "loss": 2.3812, + "step": 2890 + }, + { + "epoch": 0.304957805907173, + "grad_norm": 0.40653154253959656, + "learning_rate": 0.0015, + "loss": 2.3892, + "step": 2891 + }, + { + "epoch": 0.3050632911392405, + "grad_norm": 0.3601367175579071, + "learning_rate": 0.0015, + "loss": 2.3781, + "step": 2892 + }, + { + "epoch": 0.305168776371308, + "grad_norm": 0.4350811243057251, + "learning_rate": 0.0015, + "loss": 2.372, + "step": 2893 + }, + { + "epoch": 0.3052742616033755, + "grad_norm": 0.3579307198524475, + "learning_rate": 0.0015, + "loss": 2.3783, + "step": 2894 + }, + { + "epoch": 0.30537974683544306, + "grad_norm": 0.4776327610015869, + "learning_rate": 0.0015, + "loss": 2.3607, + "step": 2895 + }, + { + "epoch": 0.30548523206751055, + "grad_norm": 0.4114859104156494, + "learning_rate": 0.0015, + "loss": 2.3917, + "step": 2896 + }, + { + "epoch": 0.30559071729957804, + "grad_norm": 0.4537831246852875, + "learning_rate": 0.0015, + "loss": 2.4035, + "step": 2897 + }, + { + "epoch": 0.3056962025316456, + "grad_norm": 0.4289948046207428, + "learning_rate": 0.0015, + "loss": 2.3864, + "step": 2898 + }, + { + "epoch": 0.3058016877637131, + "grad_norm": 0.43372994661331177, + "learning_rate": 0.0015, + "loss": 2.372, + "step": 2899 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.3946252763271332, + "learning_rate": 0.0015, + "loss": 2.3434, + "step": 2900 + }, + { + "epoch": 0.3060126582278481, + "grad_norm": 0.40343499183654785, + "learning_rate": 0.0015, + "loss": 2.3779, + "step": 2901 + }, + { + "epoch": 0.3061181434599156, + "grad_norm": 0.40466970205307007, + "learning_rate": 0.0015, + "loss": 2.3868, + "step": 2902 + }, + { + "epoch": 0.3062236286919831, + "grad_norm": 0.42477983236312866, + "learning_rate": 0.0015, + "loss": 2.3619, + "step": 2903 + }, + { + "epoch": 0.30632911392405066, + "grad_norm": 0.35511893033981323, + "learning_rate": 0.0015, + "loss": 2.3952, + "step": 2904 + }, + { + "epoch": 0.30643459915611815, + "grad_norm": 0.45248979330062866, + "learning_rate": 0.0015, + "loss": 2.3973, + "step": 2905 + }, + { + "epoch": 0.30654008438818564, + "grad_norm": 0.3856984078884125, + "learning_rate": 0.0015, + "loss": 2.3852, + "step": 2906 + }, + { + "epoch": 0.3066455696202532, + "grad_norm": 0.40479758381843567, + "learning_rate": 0.0015, + "loss": 2.3392, + "step": 2907 + }, + { + "epoch": 0.3067510548523207, + "grad_norm": 0.3639548122882843, + "learning_rate": 0.0015, + "loss": 2.3767, + "step": 2908 + }, + { + "epoch": 0.3068565400843882, + "grad_norm": 0.4264355003833771, + "learning_rate": 0.0015, + "loss": 2.3491, + "step": 2909 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.3524112105369568, + "learning_rate": 0.0015, + "loss": 2.3636, + "step": 2910 + }, + { + "epoch": 0.3070675105485232, + "grad_norm": 0.4174242317676544, + "learning_rate": 0.0015, + "loss": 2.3395, + "step": 2911 + }, + { + "epoch": 0.3071729957805907, + "grad_norm": 0.4641706347465515, + "learning_rate": 0.0015, + "loss": 2.3741, + "step": 2912 + }, + { + "epoch": 0.30727848101265826, + "grad_norm": 0.49629199504852295, + "learning_rate": 0.0015, + "loss": 2.3671, + "step": 2913 + }, + { + "epoch": 0.30738396624472575, + "grad_norm": 0.36893609166145325, + "learning_rate": 0.0015, + "loss": 2.3888, + "step": 2914 + }, + { + "epoch": 0.30748945147679324, + "grad_norm": 0.42724674940109253, + "learning_rate": 0.0015, + "loss": 2.3924, + "step": 2915 + }, + { + "epoch": 0.30759493670886073, + "grad_norm": 0.40661418437957764, + "learning_rate": 0.0015, + "loss": 2.3773, + "step": 2916 + }, + { + "epoch": 0.3077004219409283, + "grad_norm": 0.37002474069595337, + "learning_rate": 0.0015, + "loss": 2.354, + "step": 2917 + }, + { + "epoch": 0.3078059071729958, + "grad_norm": 0.40333306789398193, + "learning_rate": 0.0015, + "loss": 2.339, + "step": 2918 + }, + { + "epoch": 0.30791139240506327, + "grad_norm": 0.47983217239379883, + "learning_rate": 0.0015, + "loss": 2.3981, + "step": 2919 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.3569065034389496, + "learning_rate": 0.0015, + "loss": 2.357, + "step": 2920 + }, + { + "epoch": 0.3081223628691983, + "grad_norm": 0.38966959714889526, + "learning_rate": 0.0015, + "loss": 2.3178, + "step": 2921 + }, + { + "epoch": 0.3082278481012658, + "grad_norm": 0.42317578196525574, + "learning_rate": 0.0015, + "loss": 2.3821, + "step": 2922 + }, + { + "epoch": 0.30833333333333335, + "grad_norm": 0.3711182177066803, + "learning_rate": 0.0015, + "loss": 2.3643, + "step": 2923 + }, + { + "epoch": 0.30843881856540084, + "grad_norm": 0.45307838916778564, + "learning_rate": 0.0015, + "loss": 2.3561, + "step": 2924 + }, + { + "epoch": 0.30854430379746833, + "grad_norm": 0.4667065143585205, + "learning_rate": 0.0015, + "loss": 2.3706, + "step": 2925 + }, + { + "epoch": 0.3086497890295359, + "grad_norm": 0.3405604362487793, + "learning_rate": 0.0015, + "loss": 2.3579, + "step": 2926 + }, + { + "epoch": 0.3087552742616034, + "grad_norm": 0.43673914670944214, + "learning_rate": 0.0015, + "loss": 2.3777, + "step": 2927 + }, + { + "epoch": 0.30886075949367087, + "grad_norm": 0.4031408727169037, + "learning_rate": 0.0015, + "loss": 2.3807, + "step": 2928 + }, + { + "epoch": 0.3089662447257384, + "grad_norm": 0.3738328814506531, + "learning_rate": 0.0015, + "loss": 2.3242, + "step": 2929 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.47400718927383423, + "learning_rate": 0.0015, + "loss": 2.351, + "step": 2930 + }, + { + "epoch": 0.3091772151898734, + "grad_norm": 0.4981643259525299, + "learning_rate": 0.0015, + "loss": 2.3796, + "step": 2931 + }, + { + "epoch": 0.30928270042194095, + "grad_norm": 0.4099635183811188, + "learning_rate": 0.0015, + "loss": 2.3773, + "step": 2932 + }, + { + "epoch": 0.30938818565400844, + "grad_norm": 0.4320606589317322, + "learning_rate": 0.0015, + "loss": 2.3666, + "step": 2933 + }, + { + "epoch": 0.30949367088607593, + "grad_norm": 0.43424561619758606, + "learning_rate": 0.0015, + "loss": 2.3861, + "step": 2934 + }, + { + "epoch": 0.3095991561181435, + "grad_norm": 0.4722565710544586, + "learning_rate": 0.0015, + "loss": 2.3713, + "step": 2935 + }, + { + "epoch": 0.309704641350211, + "grad_norm": 0.4310441017150879, + "learning_rate": 0.0015, + "loss": 2.4079, + "step": 2936 + }, + { + "epoch": 0.30981012658227847, + "grad_norm": 0.3581497073173523, + "learning_rate": 0.0015, + "loss": 2.366, + "step": 2937 + }, + { + "epoch": 0.309915611814346, + "grad_norm": 0.5037932395935059, + "learning_rate": 0.0015, + "loss": 2.3424, + "step": 2938 + }, + { + "epoch": 0.3100210970464135, + "grad_norm": 0.48014014959335327, + "learning_rate": 0.0015, + "loss": 2.3238, + "step": 2939 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.4266440272331238, + "learning_rate": 0.0015, + "loss": 2.3643, + "step": 2940 + }, + { + "epoch": 0.31023206751054855, + "grad_norm": 0.42506009340286255, + "learning_rate": 0.0015, + "loss": 2.3714, + "step": 2941 + }, + { + "epoch": 0.31033755274261604, + "grad_norm": 0.3877812623977661, + "learning_rate": 0.0015, + "loss": 2.351, + "step": 2942 + }, + { + "epoch": 0.31044303797468353, + "grad_norm": 0.46639490127563477, + "learning_rate": 0.0015, + "loss": 2.3813, + "step": 2943 + }, + { + "epoch": 0.3105485232067511, + "grad_norm": 0.44640782475471497, + "learning_rate": 0.0015, + "loss": 2.3911, + "step": 2944 + }, + { + "epoch": 0.3106540084388186, + "grad_norm": 0.3805692195892334, + "learning_rate": 0.0015, + "loss": 2.3934, + "step": 2945 + }, + { + "epoch": 0.31075949367088607, + "grad_norm": 0.5094541907310486, + "learning_rate": 0.0015, + "loss": 2.3601, + "step": 2946 + }, + { + "epoch": 0.31086497890295356, + "grad_norm": 0.40930309891700745, + "learning_rate": 0.0015, + "loss": 2.3739, + "step": 2947 + }, + { + "epoch": 0.3109704641350211, + "grad_norm": 0.4280940294265747, + "learning_rate": 0.0015, + "loss": 2.3825, + "step": 2948 + }, + { + "epoch": 0.3110759493670886, + "grad_norm": 0.4612037241458893, + "learning_rate": 0.0015, + "loss": 2.3798, + "step": 2949 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 0.44752681255340576, + "learning_rate": 0.0015, + "loss": 2.3406, + "step": 2950 + }, + { + "epoch": 0.31128691983122364, + "grad_norm": 0.4910968840122223, + "learning_rate": 0.0015, + "loss": 2.354, + "step": 2951 + }, + { + "epoch": 0.31139240506329113, + "grad_norm": 0.5026883482933044, + "learning_rate": 0.0015, + "loss": 2.349, + "step": 2952 + }, + { + "epoch": 0.3114978902953586, + "grad_norm": 0.4166754484176636, + "learning_rate": 0.0015, + "loss": 2.3772, + "step": 2953 + }, + { + "epoch": 0.3116033755274262, + "grad_norm": 0.44065535068511963, + "learning_rate": 0.0015, + "loss": 2.3474, + "step": 2954 + }, + { + "epoch": 0.31170886075949367, + "grad_norm": 0.41469132900238037, + "learning_rate": 0.0015, + "loss": 2.3666, + "step": 2955 + }, + { + "epoch": 0.31181434599156116, + "grad_norm": 0.3492048382759094, + "learning_rate": 0.0015, + "loss": 2.363, + "step": 2956 + }, + { + "epoch": 0.3119198312236287, + "grad_norm": 0.45489248633384705, + "learning_rate": 0.0015, + "loss": 2.372, + "step": 2957 + }, + { + "epoch": 0.3120253164556962, + "grad_norm": 0.4153963625431061, + "learning_rate": 0.0015, + "loss": 2.3302, + "step": 2958 + }, + { + "epoch": 0.3121308016877637, + "grad_norm": 0.4598146080970764, + "learning_rate": 0.0015, + "loss": 2.3779, + "step": 2959 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.47791972756385803, + "learning_rate": 0.0015, + "loss": 2.3553, + "step": 2960 + }, + { + "epoch": 0.31234177215189873, + "grad_norm": 0.37645891308784485, + "learning_rate": 0.0015, + "loss": 2.3245, + "step": 2961 + }, + { + "epoch": 0.3124472573839662, + "grad_norm": 0.5233430862426758, + "learning_rate": 0.0015, + "loss": 2.3554, + "step": 2962 + }, + { + "epoch": 0.3125527426160338, + "grad_norm": 0.5249641537666321, + "learning_rate": 0.0015, + "loss": 2.3521, + "step": 2963 + }, + { + "epoch": 0.31265822784810127, + "grad_norm": 0.48366987705230713, + "learning_rate": 0.0015, + "loss": 2.3153, + "step": 2964 + }, + { + "epoch": 0.31276371308016876, + "grad_norm": 0.4431849718093872, + "learning_rate": 0.0015, + "loss": 2.3411, + "step": 2965 + }, + { + "epoch": 0.3128691983122363, + "grad_norm": 0.5086143612861633, + "learning_rate": 0.0015, + "loss": 2.3556, + "step": 2966 + }, + { + "epoch": 0.3129746835443038, + "grad_norm": 0.48720696568489075, + "learning_rate": 0.0015, + "loss": 2.3706, + "step": 2967 + }, + { + "epoch": 0.3130801687763713, + "grad_norm": 0.41640543937683105, + "learning_rate": 0.0015, + "loss": 2.383, + "step": 2968 + }, + { + "epoch": 0.31318565400843884, + "grad_norm": 0.5122390389442444, + "learning_rate": 0.0015, + "loss": 2.3545, + "step": 2969 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.5144877433776855, + "learning_rate": 0.0015, + "loss": 2.3584, + "step": 2970 + }, + { + "epoch": 0.3133966244725738, + "grad_norm": 0.35709837079048157, + "learning_rate": 0.0015, + "loss": 2.3264, + "step": 2971 + }, + { + "epoch": 0.3135021097046414, + "grad_norm": 0.5002418756484985, + "learning_rate": 0.0015, + "loss": 2.3343, + "step": 2972 + }, + { + "epoch": 0.31360759493670887, + "grad_norm": 0.41554227471351624, + "learning_rate": 0.0015, + "loss": 2.3661, + "step": 2973 + }, + { + "epoch": 0.31371308016877636, + "grad_norm": 0.41495636105537415, + "learning_rate": 0.0015, + "loss": 2.37, + "step": 2974 + }, + { + "epoch": 0.3138185654008439, + "grad_norm": 0.38574692606925964, + "learning_rate": 0.0015, + "loss": 2.3502, + "step": 2975 + }, + { + "epoch": 0.3139240506329114, + "grad_norm": 0.473649799823761, + "learning_rate": 0.0015, + "loss": 2.3482, + "step": 2976 + }, + { + "epoch": 0.3140295358649789, + "grad_norm": 0.3985459804534912, + "learning_rate": 0.0015, + "loss": 2.3564, + "step": 2977 + }, + { + "epoch": 0.31413502109704644, + "grad_norm": 0.407502144575119, + "learning_rate": 0.0015, + "loss": 2.3609, + "step": 2978 + }, + { + "epoch": 0.31424050632911393, + "grad_norm": 0.4793303608894348, + "learning_rate": 0.0015, + "loss": 2.3554, + "step": 2979 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.4678103029727936, + "learning_rate": 0.0015, + "loss": 2.3848, + "step": 2980 + }, + { + "epoch": 0.3144514767932489, + "grad_norm": 0.36374086141586304, + "learning_rate": 0.0015, + "loss": 2.3726, + "step": 2981 + }, + { + "epoch": 0.31455696202531647, + "grad_norm": 0.4100324809551239, + "learning_rate": 0.0015, + "loss": 2.3399, + "step": 2982 + }, + { + "epoch": 0.31466244725738396, + "grad_norm": 0.3543737530708313, + "learning_rate": 0.0015, + "loss": 2.3611, + "step": 2983 + }, + { + "epoch": 0.31476793248945145, + "grad_norm": 0.38561511039733887, + "learning_rate": 0.0015, + "loss": 2.3608, + "step": 2984 + }, + { + "epoch": 0.314873417721519, + "grad_norm": 0.35633018612861633, + "learning_rate": 0.0015, + "loss": 2.3525, + "step": 2985 + }, + { + "epoch": 0.3149789029535865, + "grad_norm": 0.3694033920764923, + "learning_rate": 0.0015, + "loss": 2.3713, + "step": 2986 + }, + { + "epoch": 0.315084388185654, + "grad_norm": 0.3286552131175995, + "learning_rate": 0.0015, + "loss": 2.3501, + "step": 2987 + }, + { + "epoch": 0.31518987341772153, + "grad_norm": 0.4469401240348816, + "learning_rate": 0.0015, + "loss": 2.3794, + "step": 2988 + }, + { + "epoch": 0.315295358649789, + "grad_norm": 0.3352842330932617, + "learning_rate": 0.0015, + "loss": 2.3877, + "step": 2989 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.39739930629730225, + "learning_rate": 0.0015, + "loss": 2.3637, + "step": 2990 + }, + { + "epoch": 0.31550632911392407, + "grad_norm": 0.3386562764644623, + "learning_rate": 0.0015, + "loss": 2.3306, + "step": 2991 + }, + { + "epoch": 0.31561181434599156, + "grad_norm": 0.403971403837204, + "learning_rate": 0.0015, + "loss": 2.3393, + "step": 2992 + }, + { + "epoch": 0.31571729957805905, + "grad_norm": 0.36144909262657166, + "learning_rate": 0.0015, + "loss": 2.3044, + "step": 2993 + }, + { + "epoch": 0.3158227848101266, + "grad_norm": 0.41723892092704773, + "learning_rate": 0.0015, + "loss": 2.3786, + "step": 2994 + }, + { + "epoch": 0.3159282700421941, + "grad_norm": 0.41876810789108276, + "learning_rate": 0.0015, + "loss": 2.3899, + "step": 2995 + }, + { + "epoch": 0.3160337552742616, + "grad_norm": 0.4246068596839905, + "learning_rate": 0.0015, + "loss": 2.3244, + "step": 2996 + }, + { + "epoch": 0.31613924050632913, + "grad_norm": 0.4164517819881439, + "learning_rate": 0.0015, + "loss": 2.346, + "step": 2997 + }, + { + "epoch": 0.3162447257383966, + "grad_norm": 0.44160196185112, + "learning_rate": 0.0015, + "loss": 2.345, + "step": 2998 + }, + { + "epoch": 0.3163502109704641, + "grad_norm": 0.37589696049690247, + "learning_rate": 0.0015, + "loss": 2.3532, + "step": 2999 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.44378140568733215, + "learning_rate": 0.0015, + "loss": 2.3524, + "step": 3000 + }, + { + "epoch": 0.31656118143459916, + "grad_norm": 0.3712596893310547, + "learning_rate": 0.0015, + "loss": 2.3549, + "step": 3001 + }, + { + "epoch": 0.31666666666666665, + "grad_norm": 0.3702702224254608, + "learning_rate": 0.0015, + "loss": 2.3508, + "step": 3002 + }, + { + "epoch": 0.3167721518987342, + "grad_norm": 0.3780547082424164, + "learning_rate": 0.0015, + "loss": 2.3072, + "step": 3003 + }, + { + "epoch": 0.3168776371308017, + "grad_norm": 0.36802971363067627, + "learning_rate": 0.0015, + "loss": 2.3999, + "step": 3004 + }, + { + "epoch": 0.3169831223628692, + "grad_norm": 0.4305969476699829, + "learning_rate": 0.0015, + "loss": 2.3738, + "step": 3005 + }, + { + "epoch": 0.31708860759493673, + "grad_norm": 0.41926833987236023, + "learning_rate": 0.0015, + "loss": 2.3544, + "step": 3006 + }, + { + "epoch": 0.3171940928270042, + "grad_norm": 0.4112190306186676, + "learning_rate": 0.0015, + "loss": 2.3614, + "step": 3007 + }, + { + "epoch": 0.3172995780590717, + "grad_norm": 0.43936145305633545, + "learning_rate": 0.0015, + "loss": 2.3434, + "step": 3008 + }, + { + "epoch": 0.31740506329113927, + "grad_norm": 0.5297348499298096, + "learning_rate": 0.0015, + "loss": 2.3491, + "step": 3009 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.4405179023742676, + "learning_rate": 0.0015, + "loss": 2.3452, + "step": 3010 + }, + { + "epoch": 0.31761603375527425, + "grad_norm": 0.37449413537979126, + "learning_rate": 0.0015, + "loss": 2.339, + "step": 3011 + }, + { + "epoch": 0.31772151898734174, + "grad_norm": 0.44163307547569275, + "learning_rate": 0.0015, + "loss": 2.3506, + "step": 3012 + }, + { + "epoch": 0.3178270042194093, + "grad_norm": 0.5431195497512817, + "learning_rate": 0.0015, + "loss": 2.3267, + "step": 3013 + }, + { + "epoch": 0.3179324894514768, + "grad_norm": 0.5347753167152405, + "learning_rate": 0.0015, + "loss": 2.3449, + "step": 3014 + }, + { + "epoch": 0.3180379746835443, + "grad_norm": 0.4215836524963379, + "learning_rate": 0.0015, + "loss": 2.3604, + "step": 3015 + }, + { + "epoch": 0.3181434599156118, + "grad_norm": 0.3731619417667389, + "learning_rate": 0.0015, + "loss": 2.345, + "step": 3016 + }, + { + "epoch": 0.3182489451476793, + "grad_norm": 0.5204503536224365, + "learning_rate": 0.0015, + "loss": 2.3815, + "step": 3017 + }, + { + "epoch": 0.3183544303797468, + "grad_norm": 0.3830946981906891, + "learning_rate": 0.0015, + "loss": 2.3448, + "step": 3018 + }, + { + "epoch": 0.31845991561181436, + "grad_norm": 0.47006240487098694, + "learning_rate": 0.0015, + "loss": 2.343, + "step": 3019 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.5281461477279663, + "learning_rate": 0.0015, + "loss": 2.3468, + "step": 3020 + }, + { + "epoch": 0.31867088607594934, + "grad_norm": 0.45958009362220764, + "learning_rate": 0.0015, + "loss": 2.3102, + "step": 3021 + }, + { + "epoch": 0.3187763713080169, + "grad_norm": 0.4461733102798462, + "learning_rate": 0.0015, + "loss": 2.3747, + "step": 3022 + }, + { + "epoch": 0.3188818565400844, + "grad_norm": 0.49410584568977356, + "learning_rate": 0.0015, + "loss": 2.3235, + "step": 3023 + }, + { + "epoch": 0.3189873417721519, + "grad_norm": 0.4257568120956421, + "learning_rate": 0.0015, + "loss": 2.3401, + "step": 3024 + }, + { + "epoch": 0.3190928270042194, + "grad_norm": 0.40064528584480286, + "learning_rate": 0.0015, + "loss": 2.3133, + "step": 3025 + }, + { + "epoch": 0.3191983122362869, + "grad_norm": 0.46230655908584595, + "learning_rate": 0.0015, + "loss": 2.3586, + "step": 3026 + }, + { + "epoch": 0.3193037974683544, + "grad_norm": 0.4658045470714569, + "learning_rate": 0.0015, + "loss": 2.3517, + "step": 3027 + }, + { + "epoch": 0.31940928270042196, + "grad_norm": 0.3831866383552551, + "learning_rate": 0.0015, + "loss": 2.3438, + "step": 3028 + }, + { + "epoch": 0.31951476793248945, + "grad_norm": 0.419153094291687, + "learning_rate": 0.0015, + "loss": 2.3233, + "step": 3029 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.4613170325756073, + "learning_rate": 0.0015, + "loss": 2.3955, + "step": 3030 + }, + { + "epoch": 0.3197257383966245, + "grad_norm": 0.38516390323638916, + "learning_rate": 0.0015, + "loss": 2.3386, + "step": 3031 + }, + { + "epoch": 0.319831223628692, + "grad_norm": 0.4181787669658661, + "learning_rate": 0.0015, + "loss": 2.3372, + "step": 3032 + }, + { + "epoch": 0.3199367088607595, + "grad_norm": 0.42860904335975647, + "learning_rate": 0.0015, + "loss": 2.3327, + "step": 3033 + }, + { + "epoch": 0.320042194092827, + "grad_norm": 0.39201417565345764, + "learning_rate": 0.0015, + "loss": 2.378, + "step": 3034 + }, + { + "epoch": 0.3201476793248945, + "grad_norm": 0.42141643166542053, + "learning_rate": 0.0015, + "loss": 2.3545, + "step": 3035 + }, + { + "epoch": 0.320253164556962, + "grad_norm": 0.3673221468925476, + "learning_rate": 0.0015, + "loss": 2.3833, + "step": 3036 + }, + { + "epoch": 0.32035864978902956, + "grad_norm": 0.43436405062675476, + "learning_rate": 0.0015, + "loss": 2.3673, + "step": 3037 + }, + { + "epoch": 0.32046413502109705, + "grad_norm": 0.44362249970436096, + "learning_rate": 0.0015, + "loss": 2.3251, + "step": 3038 + }, + { + "epoch": 0.32056962025316454, + "grad_norm": 0.4313865303993225, + "learning_rate": 0.0015, + "loss": 2.3332, + "step": 3039 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.4907679259777069, + "learning_rate": 0.0015, + "loss": 2.3143, + "step": 3040 + }, + { + "epoch": 0.3207805907172996, + "grad_norm": 0.4211444556713104, + "learning_rate": 0.0015, + "loss": 2.3335, + "step": 3041 + }, + { + "epoch": 0.3208860759493671, + "grad_norm": 0.43560078740119934, + "learning_rate": 0.0015, + "loss": 2.3437, + "step": 3042 + }, + { + "epoch": 0.3209915611814346, + "grad_norm": 0.3968098759651184, + "learning_rate": 0.0015, + "loss": 2.3783, + "step": 3043 + }, + { + "epoch": 0.3210970464135021, + "grad_norm": 0.4172217845916748, + "learning_rate": 0.0015, + "loss": 2.3387, + "step": 3044 + }, + { + "epoch": 0.3212025316455696, + "grad_norm": 0.47198691964149475, + "learning_rate": 0.0015, + "loss": 2.338, + "step": 3045 + }, + { + "epoch": 0.3213080168776371, + "grad_norm": 0.39610743522644043, + "learning_rate": 0.0015, + "loss": 2.3646, + "step": 3046 + }, + { + "epoch": 0.32141350210970465, + "grad_norm": 0.46966275572776794, + "learning_rate": 0.0015, + "loss": 2.3285, + "step": 3047 + }, + { + "epoch": 0.32151898734177214, + "grad_norm": 0.39159396290779114, + "learning_rate": 0.0015, + "loss": 2.3284, + "step": 3048 + }, + { + "epoch": 0.32162447257383964, + "grad_norm": 0.43263450264930725, + "learning_rate": 0.0015, + "loss": 2.3227, + "step": 3049 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.44122421741485596, + "learning_rate": 0.0015, + "loss": 2.3478, + "step": 3050 + }, + { + "epoch": 0.3218354430379747, + "grad_norm": 0.4518132507801056, + "learning_rate": 0.0015, + "loss": 2.3379, + "step": 3051 + }, + { + "epoch": 0.32194092827004217, + "grad_norm": 0.4318621754646301, + "learning_rate": 0.0015, + "loss": 2.326, + "step": 3052 + }, + { + "epoch": 0.3220464135021097, + "grad_norm": 0.4539203345775604, + "learning_rate": 0.0015, + "loss": 2.3324, + "step": 3053 + }, + { + "epoch": 0.3221518987341772, + "grad_norm": 0.44322171807289124, + "learning_rate": 0.0015, + "loss": 2.3457, + "step": 3054 + }, + { + "epoch": 0.3222573839662447, + "grad_norm": 0.4235475957393646, + "learning_rate": 0.0015, + "loss": 2.3682, + "step": 3055 + }, + { + "epoch": 0.32236286919831225, + "grad_norm": 0.4082552194595337, + "learning_rate": 0.0015, + "loss": 2.3596, + "step": 3056 + }, + { + "epoch": 0.32246835443037974, + "grad_norm": 0.4590214788913727, + "learning_rate": 0.0015, + "loss": 2.3468, + "step": 3057 + }, + { + "epoch": 0.32257383966244724, + "grad_norm": 0.3992818593978882, + "learning_rate": 0.0015, + "loss": 2.3364, + "step": 3058 + }, + { + "epoch": 0.3226793248945148, + "grad_norm": 0.44476285576820374, + "learning_rate": 0.0015, + "loss": 2.3652, + "step": 3059 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.4027647078037262, + "learning_rate": 0.0015, + "loss": 2.3433, + "step": 3060 + }, + { + "epoch": 0.32289029535864977, + "grad_norm": 0.39875540137290955, + "learning_rate": 0.0015, + "loss": 2.346, + "step": 3061 + }, + { + "epoch": 0.3229957805907173, + "grad_norm": 0.46490490436553955, + "learning_rate": 0.0015, + "loss": 2.3243, + "step": 3062 + }, + { + "epoch": 0.3231012658227848, + "grad_norm": 0.35596251487731934, + "learning_rate": 0.0015, + "loss": 2.338, + "step": 3063 + }, + { + "epoch": 0.3232067510548523, + "grad_norm": 0.48178592324256897, + "learning_rate": 0.0015, + "loss": 2.3379, + "step": 3064 + }, + { + "epoch": 0.32331223628691985, + "grad_norm": 0.4949420392513275, + "learning_rate": 0.0015, + "loss": 2.3166, + "step": 3065 + }, + { + "epoch": 0.32341772151898734, + "grad_norm": 0.4207111597061157, + "learning_rate": 0.0015, + "loss": 2.3257, + "step": 3066 + }, + { + "epoch": 0.32352320675105484, + "grad_norm": 0.5137282013893127, + "learning_rate": 0.0015, + "loss": 2.3429, + "step": 3067 + }, + { + "epoch": 0.3236286919831224, + "grad_norm": 0.5780287981033325, + "learning_rate": 0.0015, + "loss": 2.3314, + "step": 3068 + }, + { + "epoch": 0.3237341772151899, + "grad_norm": 0.4541032910346985, + "learning_rate": 0.0015, + "loss": 2.3434, + "step": 3069 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.44292110204696655, + "learning_rate": 0.0015, + "loss": 2.365, + "step": 3070 + }, + { + "epoch": 0.3239451476793249, + "grad_norm": 0.5540505647659302, + "learning_rate": 0.0015, + "loss": 2.3661, + "step": 3071 + }, + { + "epoch": 0.3240506329113924, + "grad_norm": 0.46432697772979736, + "learning_rate": 0.0015, + "loss": 2.283, + "step": 3072 + }, + { + "epoch": 0.3241561181434599, + "grad_norm": 0.37047287821769714, + "learning_rate": 0.0015, + "loss": 2.3307, + "step": 3073 + }, + { + "epoch": 0.32426160337552745, + "grad_norm": 0.39605897665023804, + "learning_rate": 0.0015, + "loss": 2.3486, + "step": 3074 + }, + { + "epoch": 0.32436708860759494, + "grad_norm": 0.39015042781829834, + "learning_rate": 0.0015, + "loss": 2.3153, + "step": 3075 + }, + { + "epoch": 0.32447257383966244, + "grad_norm": 0.38822078704833984, + "learning_rate": 0.0015, + "loss": 2.3065, + "step": 3076 + }, + { + "epoch": 0.32457805907173, + "grad_norm": 0.4377744793891907, + "learning_rate": 0.0015, + "loss": 2.2984, + "step": 3077 + }, + { + "epoch": 0.3246835443037975, + "grad_norm": 0.35922062397003174, + "learning_rate": 0.0015, + "loss": 2.2887, + "step": 3078 + }, + { + "epoch": 0.32478902953586497, + "grad_norm": 0.5549656748771667, + "learning_rate": 0.0015, + "loss": 2.3263, + "step": 3079 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.5398890972137451, + "learning_rate": 0.0015, + "loss": 2.38, + "step": 3080 + }, + { + "epoch": 0.325, + "grad_norm": 0.44960156083106995, + "learning_rate": 0.0015, + "loss": 2.2963, + "step": 3081 + }, + { + "epoch": 0.3251054852320675, + "grad_norm": 0.46962299942970276, + "learning_rate": 0.0015, + "loss": 2.3516, + "step": 3082 + }, + { + "epoch": 0.325210970464135, + "grad_norm": 0.46924519538879395, + "learning_rate": 0.0015, + "loss": 2.3265, + "step": 3083 + }, + { + "epoch": 0.32531645569620254, + "grad_norm": 0.38555777072906494, + "learning_rate": 0.0015, + "loss": 2.3343, + "step": 3084 + }, + { + "epoch": 0.32542194092827004, + "grad_norm": 0.442982017993927, + "learning_rate": 0.0015, + "loss": 2.3379, + "step": 3085 + }, + { + "epoch": 0.32552742616033753, + "grad_norm": 0.49080535769462585, + "learning_rate": 0.0015, + "loss": 2.3262, + "step": 3086 + }, + { + "epoch": 0.3256329113924051, + "grad_norm": 0.41607972979545593, + "learning_rate": 0.0015, + "loss": 2.3724, + "step": 3087 + }, + { + "epoch": 0.32573839662447257, + "grad_norm": 0.42181795835494995, + "learning_rate": 0.0015, + "loss": 2.3239, + "step": 3088 + }, + { + "epoch": 0.32584388185654006, + "grad_norm": 0.4559599459171295, + "learning_rate": 0.0015, + "loss": 2.3359, + "step": 3089 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.4377025365829468, + "learning_rate": 0.0015, + "loss": 2.3438, + "step": 3090 + }, + { + "epoch": 0.3260548523206751, + "grad_norm": 0.48531374335289, + "learning_rate": 0.0015, + "loss": 2.3578, + "step": 3091 + }, + { + "epoch": 0.3261603375527426, + "grad_norm": 0.4105640649795532, + "learning_rate": 0.0015, + "loss": 2.3207, + "step": 3092 + }, + { + "epoch": 0.32626582278481014, + "grad_norm": 0.5469980239868164, + "learning_rate": 0.0015, + "loss": 2.3293, + "step": 3093 + }, + { + "epoch": 0.32637130801687764, + "grad_norm": 0.39946457743644714, + "learning_rate": 0.0015, + "loss": 2.3324, + "step": 3094 + }, + { + "epoch": 0.32647679324894513, + "grad_norm": 0.4480961859226227, + "learning_rate": 0.0015, + "loss": 2.3124, + "step": 3095 + }, + { + "epoch": 0.3265822784810127, + "grad_norm": 0.44019827246665955, + "learning_rate": 0.0015, + "loss": 2.3272, + "step": 3096 + }, + { + "epoch": 0.32668776371308017, + "grad_norm": 0.39859485626220703, + "learning_rate": 0.0015, + "loss": 2.299, + "step": 3097 + }, + { + "epoch": 0.32679324894514766, + "grad_norm": 0.40375789999961853, + "learning_rate": 0.0015, + "loss": 2.3369, + "step": 3098 + }, + { + "epoch": 0.3268987341772152, + "grad_norm": 0.3783697187900543, + "learning_rate": 0.0015, + "loss": 2.308, + "step": 3099 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.38902950286865234, + "learning_rate": 0.0015, + "loss": 2.3279, + "step": 3100 + }, + { + "epoch": 0.3271097046413502, + "grad_norm": 0.33765000104904175, + "learning_rate": 0.0015, + "loss": 2.3285, + "step": 3101 + }, + { + "epoch": 0.32721518987341774, + "grad_norm": 0.39003169536590576, + "learning_rate": 0.0015, + "loss": 2.3396, + "step": 3102 + }, + { + "epoch": 0.32732067510548524, + "grad_norm": 0.33897876739501953, + "learning_rate": 0.0015, + "loss": 2.3612, + "step": 3103 + }, + { + "epoch": 0.32742616033755273, + "grad_norm": 0.38490694761276245, + "learning_rate": 0.0015, + "loss": 2.3225, + "step": 3104 + }, + { + "epoch": 0.3275316455696203, + "grad_norm": 0.35184550285339355, + "learning_rate": 0.0015, + "loss": 2.3133, + "step": 3105 + }, + { + "epoch": 0.32763713080168777, + "grad_norm": 0.39834725856781006, + "learning_rate": 0.0015, + "loss": 2.3364, + "step": 3106 + }, + { + "epoch": 0.32774261603375526, + "grad_norm": 0.4205032289028168, + "learning_rate": 0.0015, + "loss": 2.3037, + "step": 3107 + }, + { + "epoch": 0.3278481012658228, + "grad_norm": 0.3877774178981781, + "learning_rate": 0.0015, + "loss": 2.3002, + "step": 3108 + }, + { + "epoch": 0.3279535864978903, + "grad_norm": 0.3867678940296173, + "learning_rate": 0.0015, + "loss": 2.3239, + "step": 3109 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.4225291609764099, + "learning_rate": 0.0015, + "loss": 2.3266, + "step": 3110 + }, + { + "epoch": 0.3281645569620253, + "grad_norm": 0.408651739358902, + "learning_rate": 0.0015, + "loss": 2.3393, + "step": 3111 + }, + { + "epoch": 0.32827004219409284, + "grad_norm": 0.34592387080192566, + "learning_rate": 0.0015, + "loss": 2.3291, + "step": 3112 + }, + { + "epoch": 0.32837552742616033, + "grad_norm": 0.37444937229156494, + "learning_rate": 0.0015, + "loss": 2.3295, + "step": 3113 + }, + { + "epoch": 0.3284810126582278, + "grad_norm": 0.40215373039245605, + "learning_rate": 0.0015, + "loss": 2.3289, + "step": 3114 + }, + { + "epoch": 0.32858649789029537, + "grad_norm": 0.3787889778614044, + "learning_rate": 0.0015, + "loss": 2.3166, + "step": 3115 + }, + { + "epoch": 0.32869198312236286, + "grad_norm": 0.36656346917152405, + "learning_rate": 0.0015, + "loss": 2.3044, + "step": 3116 + }, + { + "epoch": 0.32879746835443036, + "grad_norm": 0.4932112991809845, + "learning_rate": 0.0015, + "loss": 2.3113, + "step": 3117 + }, + { + "epoch": 0.3289029535864979, + "grad_norm": 0.5116373896598816, + "learning_rate": 0.0015, + "loss": 2.3111, + "step": 3118 + }, + { + "epoch": 0.3290084388185654, + "grad_norm": 0.3638853132724762, + "learning_rate": 0.0015, + "loss": 2.3289, + "step": 3119 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.4394342005252838, + "learning_rate": 0.0015, + "loss": 2.3614, + "step": 3120 + }, + { + "epoch": 0.32921940928270044, + "grad_norm": 0.4938507080078125, + "learning_rate": 0.0015, + "loss": 2.3241, + "step": 3121 + }, + { + "epoch": 0.32932489451476793, + "grad_norm": 0.42015597224235535, + "learning_rate": 0.0015, + "loss": 2.3077, + "step": 3122 + }, + { + "epoch": 0.3294303797468354, + "grad_norm": 0.47932884097099304, + "learning_rate": 0.0015, + "loss": 2.3712, + "step": 3123 + }, + { + "epoch": 0.32953586497890297, + "grad_norm": 0.49079254269599915, + "learning_rate": 0.0015, + "loss": 2.3179, + "step": 3124 + }, + { + "epoch": 0.32964135021097046, + "grad_norm": 0.42855289578437805, + "learning_rate": 0.0015, + "loss": 2.356, + "step": 3125 + }, + { + "epoch": 0.32974683544303796, + "grad_norm": 0.4163735508918762, + "learning_rate": 0.0015, + "loss": 2.3582, + "step": 3126 + }, + { + "epoch": 0.3298523206751055, + "grad_norm": 0.4802001714706421, + "learning_rate": 0.0015, + "loss": 2.3316, + "step": 3127 + }, + { + "epoch": 0.329957805907173, + "grad_norm": 0.42130047082901, + "learning_rate": 0.0015, + "loss": 2.3382, + "step": 3128 + }, + { + "epoch": 0.3300632911392405, + "grad_norm": 0.4281897246837616, + "learning_rate": 0.0015, + "loss": 2.3466, + "step": 3129 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.47074222564697266, + "learning_rate": 0.0015, + "loss": 2.3433, + "step": 3130 + }, + { + "epoch": 0.33027426160337553, + "grad_norm": 0.43685483932495117, + "learning_rate": 0.0015, + "loss": 2.3043, + "step": 3131 + }, + { + "epoch": 0.330379746835443, + "grad_norm": 0.48543551564216614, + "learning_rate": 0.0015, + "loss": 2.334, + "step": 3132 + }, + { + "epoch": 0.33048523206751057, + "grad_norm": 0.5141048431396484, + "learning_rate": 0.0015, + "loss": 2.2891, + "step": 3133 + }, + { + "epoch": 0.33059071729957806, + "grad_norm": 0.48435327410697937, + "learning_rate": 0.0015, + "loss": 2.3655, + "step": 3134 + }, + { + "epoch": 0.33069620253164556, + "grad_norm": 0.40695974230766296, + "learning_rate": 0.0015, + "loss": 2.2989, + "step": 3135 + }, + { + "epoch": 0.3308016877637131, + "grad_norm": 0.4420720934867859, + "learning_rate": 0.0015, + "loss": 2.3222, + "step": 3136 + }, + { + "epoch": 0.3309071729957806, + "grad_norm": 0.3421173393726349, + "learning_rate": 0.0015, + "loss": 2.2909, + "step": 3137 + }, + { + "epoch": 0.3310126582278481, + "grad_norm": 0.4157831370830536, + "learning_rate": 0.0015, + "loss": 2.3464, + "step": 3138 + }, + { + "epoch": 0.33111814345991564, + "grad_norm": 0.5435379147529602, + "learning_rate": 0.0015, + "loss": 2.3277, + "step": 3139 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.47755900025367737, + "learning_rate": 0.0015, + "loss": 2.3055, + "step": 3140 + }, + { + "epoch": 0.3313291139240506, + "grad_norm": 0.3802216351032257, + "learning_rate": 0.0015, + "loss": 2.3353, + "step": 3141 + }, + { + "epoch": 0.33143459915611817, + "grad_norm": 0.6070103645324707, + "learning_rate": 0.0015, + "loss": 2.3223, + "step": 3142 + }, + { + "epoch": 0.33154008438818566, + "grad_norm": 0.46253374218940735, + "learning_rate": 0.0015, + "loss": 2.3367, + "step": 3143 + }, + { + "epoch": 0.33164556962025316, + "grad_norm": 0.4125393033027649, + "learning_rate": 0.0015, + "loss": 2.3608, + "step": 3144 + }, + { + "epoch": 0.33175105485232065, + "grad_norm": 0.4812757074832916, + "learning_rate": 0.0015, + "loss": 2.3304, + "step": 3145 + }, + { + "epoch": 0.3318565400843882, + "grad_norm": 0.35807496309280396, + "learning_rate": 0.0015, + "loss": 2.3127, + "step": 3146 + }, + { + "epoch": 0.3319620253164557, + "grad_norm": 0.48202434182167053, + "learning_rate": 0.0015, + "loss": 2.319, + "step": 3147 + }, + { + "epoch": 0.3320675105485232, + "grad_norm": 0.430908739566803, + "learning_rate": 0.0015, + "loss": 2.3159, + "step": 3148 + }, + { + "epoch": 0.33217299578059073, + "grad_norm": 0.3729066550731659, + "learning_rate": 0.0015, + "loss": 2.3113, + "step": 3149 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.46815577149391174, + "learning_rate": 0.0015, + "loss": 2.3095, + "step": 3150 + }, + { + "epoch": 0.3323839662447257, + "grad_norm": 0.4611875116825104, + "learning_rate": 0.0015, + "loss": 2.3194, + "step": 3151 + }, + { + "epoch": 0.33248945147679326, + "grad_norm": 0.3913194239139557, + "learning_rate": 0.0015, + "loss": 2.3153, + "step": 3152 + }, + { + "epoch": 0.33259493670886076, + "grad_norm": 0.44758081436157227, + "learning_rate": 0.0015, + "loss": 2.3475, + "step": 3153 + }, + { + "epoch": 0.33270042194092825, + "grad_norm": 0.40514373779296875, + "learning_rate": 0.0015, + "loss": 2.3181, + "step": 3154 + }, + { + "epoch": 0.3328059071729958, + "grad_norm": 0.40096473693847656, + "learning_rate": 0.0015, + "loss": 2.3243, + "step": 3155 + }, + { + "epoch": 0.3329113924050633, + "grad_norm": 0.4078724980354309, + "learning_rate": 0.0015, + "loss": 2.3116, + "step": 3156 + }, + { + "epoch": 0.3330168776371308, + "grad_norm": 0.38583052158355713, + "learning_rate": 0.0015, + "loss": 2.3124, + "step": 3157 + }, + { + "epoch": 0.33312236286919833, + "grad_norm": 0.3852410316467285, + "learning_rate": 0.0015, + "loss": 2.3179, + "step": 3158 + }, + { + "epoch": 0.3332278481012658, + "grad_norm": 0.3609490990638733, + "learning_rate": 0.0015, + "loss": 2.3457, + "step": 3159 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.4108952581882477, + "learning_rate": 0.0015, + "loss": 2.3325, + "step": 3160 + }, + { + "epoch": 0.33343881856540086, + "grad_norm": 0.3791021704673767, + "learning_rate": 0.0015, + "loss": 2.3303, + "step": 3161 + }, + { + "epoch": 0.33354430379746836, + "grad_norm": 0.39888128638267517, + "learning_rate": 0.0015, + "loss": 2.3284, + "step": 3162 + }, + { + "epoch": 0.33364978902953585, + "grad_norm": 0.4001410901546478, + "learning_rate": 0.0015, + "loss": 2.3067, + "step": 3163 + }, + { + "epoch": 0.3337552742616034, + "grad_norm": 0.40565910935401917, + "learning_rate": 0.0015, + "loss": 2.3303, + "step": 3164 + }, + { + "epoch": 0.3338607594936709, + "grad_norm": 0.4474175274372101, + "learning_rate": 0.0015, + "loss": 2.3194, + "step": 3165 + }, + { + "epoch": 0.3339662447257384, + "grad_norm": 0.4199009835720062, + "learning_rate": 0.0015, + "loss": 2.3468, + "step": 3166 + }, + { + "epoch": 0.33407172995780593, + "grad_norm": 0.4099526107311249, + "learning_rate": 0.0015, + "loss": 2.305, + "step": 3167 + }, + { + "epoch": 0.3341772151898734, + "grad_norm": 0.40184837579727173, + "learning_rate": 0.0015, + "loss": 2.3331, + "step": 3168 + }, + { + "epoch": 0.3342827004219409, + "grad_norm": 0.4898586869239807, + "learning_rate": 0.0015, + "loss": 2.3415, + "step": 3169 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.40862807631492615, + "learning_rate": 0.0015, + "loss": 2.2869, + "step": 3170 + }, + { + "epoch": 0.33449367088607596, + "grad_norm": 0.38056135177612305, + "learning_rate": 0.0015, + "loss": 2.3343, + "step": 3171 + }, + { + "epoch": 0.33459915611814345, + "grad_norm": 0.4096638858318329, + "learning_rate": 0.0015, + "loss": 2.2893, + "step": 3172 + }, + { + "epoch": 0.334704641350211, + "grad_norm": 0.394092321395874, + "learning_rate": 0.0015, + "loss": 2.3315, + "step": 3173 + }, + { + "epoch": 0.3348101265822785, + "grad_norm": 0.39373302459716797, + "learning_rate": 0.0015, + "loss": 2.3374, + "step": 3174 + }, + { + "epoch": 0.334915611814346, + "grad_norm": 0.4229651987552643, + "learning_rate": 0.0015, + "loss": 2.317, + "step": 3175 + }, + { + "epoch": 0.33502109704641353, + "grad_norm": 0.4484521448612213, + "learning_rate": 0.0015, + "loss": 2.3118, + "step": 3176 + }, + { + "epoch": 0.335126582278481, + "grad_norm": 0.412743479013443, + "learning_rate": 0.0015, + "loss": 2.3353, + "step": 3177 + }, + { + "epoch": 0.3352320675105485, + "grad_norm": 0.3866250813007355, + "learning_rate": 0.0015, + "loss": 2.318, + "step": 3178 + }, + { + "epoch": 0.335337552742616, + "grad_norm": 0.4189819395542145, + "learning_rate": 0.0015, + "loss": 2.2814, + "step": 3179 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.37317413091659546, + "learning_rate": 0.0015, + "loss": 2.3483, + "step": 3180 + }, + { + "epoch": 0.33554852320675105, + "grad_norm": 0.38140514492988586, + "learning_rate": 0.0015, + "loss": 2.2774, + "step": 3181 + }, + { + "epoch": 0.33565400843881854, + "grad_norm": 0.36204057931900024, + "learning_rate": 0.0015, + "loss": 2.2916, + "step": 3182 + }, + { + "epoch": 0.3357594936708861, + "grad_norm": 0.36663228273391724, + "learning_rate": 0.0015, + "loss": 2.3176, + "step": 3183 + }, + { + "epoch": 0.3358649789029536, + "grad_norm": 0.4048280715942383, + "learning_rate": 0.0015, + "loss": 2.3503, + "step": 3184 + }, + { + "epoch": 0.3359704641350211, + "grad_norm": 0.38067594170570374, + "learning_rate": 0.0015, + "loss": 2.3155, + "step": 3185 + }, + { + "epoch": 0.3360759493670886, + "grad_norm": 0.36979299783706665, + "learning_rate": 0.0015, + "loss": 2.2924, + "step": 3186 + }, + { + "epoch": 0.3361814345991561, + "grad_norm": 0.4038313627243042, + "learning_rate": 0.0015, + "loss": 2.3353, + "step": 3187 + }, + { + "epoch": 0.3362869198312236, + "grad_norm": 0.36493536829948425, + "learning_rate": 0.0015, + "loss": 2.3166, + "step": 3188 + }, + { + "epoch": 0.33639240506329116, + "grad_norm": 0.4535239636898041, + "learning_rate": 0.0015, + "loss": 2.282, + "step": 3189 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.3959764838218689, + "learning_rate": 0.0015, + "loss": 2.353, + "step": 3190 + }, + { + "epoch": 0.33660337552742614, + "grad_norm": 0.379930704832077, + "learning_rate": 0.0015, + "loss": 2.3133, + "step": 3191 + }, + { + "epoch": 0.3367088607594937, + "grad_norm": 0.4072665870189667, + "learning_rate": 0.0015, + "loss": 2.2987, + "step": 3192 + }, + { + "epoch": 0.3368143459915612, + "grad_norm": 0.4153999388217926, + "learning_rate": 0.0015, + "loss": 2.3525, + "step": 3193 + }, + { + "epoch": 0.3369198312236287, + "grad_norm": 0.5175278782844543, + "learning_rate": 0.0015, + "loss": 2.3435, + "step": 3194 + }, + { + "epoch": 0.3370253164556962, + "grad_norm": 0.42729589343070984, + "learning_rate": 0.0015, + "loss": 2.2964, + "step": 3195 + }, + { + "epoch": 0.3371308016877637, + "grad_norm": 0.4056847393512726, + "learning_rate": 0.0015, + "loss": 2.3402, + "step": 3196 + }, + { + "epoch": 0.3372362869198312, + "grad_norm": 0.48186591267585754, + "learning_rate": 0.0015, + "loss": 2.2668, + "step": 3197 + }, + { + "epoch": 0.33734177215189876, + "grad_norm": 0.4288673996925354, + "learning_rate": 0.0015, + "loss": 2.3157, + "step": 3198 + }, + { + "epoch": 0.33744725738396625, + "grad_norm": 0.3702959418296814, + "learning_rate": 0.0015, + "loss": 2.2917, + "step": 3199 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.48229750990867615, + "learning_rate": 0.0015, + "loss": 2.3205, + "step": 3200 + }, + { + "epoch": 0.3376582278481013, + "grad_norm": 0.42526277899742126, + "learning_rate": 0.0015, + "loss": 2.3161, + "step": 3201 + }, + { + "epoch": 0.3377637130801688, + "grad_norm": 0.36951306462287903, + "learning_rate": 0.0015, + "loss": 2.3357, + "step": 3202 + }, + { + "epoch": 0.3378691983122363, + "grad_norm": 0.43272194266319275, + "learning_rate": 0.0015, + "loss": 2.3292, + "step": 3203 + }, + { + "epoch": 0.3379746835443038, + "grad_norm": 0.40316054224967957, + "learning_rate": 0.0015, + "loss": 2.3211, + "step": 3204 + }, + { + "epoch": 0.3380801687763713, + "grad_norm": 0.39637067914009094, + "learning_rate": 0.0015, + "loss": 2.3191, + "step": 3205 + }, + { + "epoch": 0.3381856540084388, + "grad_norm": 0.4459872543811798, + "learning_rate": 0.0015, + "loss": 2.3349, + "step": 3206 + }, + { + "epoch": 0.33829113924050636, + "grad_norm": 0.3512580096721649, + "learning_rate": 0.0015, + "loss": 2.3355, + "step": 3207 + }, + { + "epoch": 0.33839662447257385, + "grad_norm": 0.4265896677970886, + "learning_rate": 0.0015, + "loss": 2.2951, + "step": 3208 + }, + { + "epoch": 0.33850210970464134, + "grad_norm": 0.36360251903533936, + "learning_rate": 0.0015, + "loss": 2.2916, + "step": 3209 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.4049573540687561, + "learning_rate": 0.0015, + "loss": 2.3217, + "step": 3210 + }, + { + "epoch": 0.3387130801687764, + "grad_norm": 0.4918650686740875, + "learning_rate": 0.0015, + "loss": 2.3089, + "step": 3211 + }, + { + "epoch": 0.3388185654008439, + "grad_norm": 0.5247867703437805, + "learning_rate": 0.0015, + "loss": 2.2828, + "step": 3212 + }, + { + "epoch": 0.33892405063291137, + "grad_norm": 0.4646093249320984, + "learning_rate": 0.0015, + "loss": 2.2761, + "step": 3213 + }, + { + "epoch": 0.3390295358649789, + "grad_norm": 0.46797502040863037, + "learning_rate": 0.0015, + "loss": 2.3297, + "step": 3214 + }, + { + "epoch": 0.3391350210970464, + "grad_norm": 0.4903671145439148, + "learning_rate": 0.0015, + "loss": 2.3169, + "step": 3215 + }, + { + "epoch": 0.3392405063291139, + "grad_norm": 0.4265798330307007, + "learning_rate": 0.0015, + "loss": 2.3286, + "step": 3216 + }, + { + "epoch": 0.33934599156118145, + "grad_norm": 0.44953176379203796, + "learning_rate": 0.0015, + "loss": 2.2964, + "step": 3217 + }, + { + "epoch": 0.33945147679324894, + "grad_norm": 0.47324898838996887, + "learning_rate": 0.0015, + "loss": 2.3179, + "step": 3218 + }, + { + "epoch": 0.33955696202531643, + "grad_norm": 0.4370453357696533, + "learning_rate": 0.0015, + "loss": 2.3045, + "step": 3219 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.4053555727005005, + "learning_rate": 0.0015, + "loss": 2.3223, + "step": 3220 + }, + { + "epoch": 0.3397679324894515, + "grad_norm": 0.47018560767173767, + "learning_rate": 0.0015, + "loss": 2.3205, + "step": 3221 + }, + { + "epoch": 0.33987341772151897, + "grad_norm": 0.37496957182884216, + "learning_rate": 0.0015, + "loss": 2.3094, + "step": 3222 + }, + { + "epoch": 0.3399789029535865, + "grad_norm": 0.4181782901287079, + "learning_rate": 0.0015, + "loss": 2.2971, + "step": 3223 + }, + { + "epoch": 0.340084388185654, + "grad_norm": 0.4483899176120758, + "learning_rate": 0.0015, + "loss": 2.2926, + "step": 3224 + }, + { + "epoch": 0.3401898734177215, + "grad_norm": 0.41218283772468567, + "learning_rate": 0.0015, + "loss": 2.2993, + "step": 3225 + }, + { + "epoch": 0.34029535864978905, + "grad_norm": 0.4184455871582031, + "learning_rate": 0.0015, + "loss": 2.3001, + "step": 3226 + }, + { + "epoch": 0.34040084388185654, + "grad_norm": 0.4167124927043915, + "learning_rate": 0.0015, + "loss": 2.2839, + "step": 3227 + }, + { + "epoch": 0.34050632911392403, + "grad_norm": 0.39146339893341064, + "learning_rate": 0.0015, + "loss": 2.2881, + "step": 3228 + }, + { + "epoch": 0.3406118143459916, + "grad_norm": 0.45810753107070923, + "learning_rate": 0.0015, + "loss": 2.3371, + "step": 3229 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.4581602215766907, + "learning_rate": 0.0015, + "loss": 2.3252, + "step": 3230 + }, + { + "epoch": 0.34082278481012657, + "grad_norm": 0.4544985294342041, + "learning_rate": 0.0015, + "loss": 2.2939, + "step": 3231 + }, + { + "epoch": 0.3409282700421941, + "grad_norm": 0.36762481927871704, + "learning_rate": 0.0015, + "loss": 2.2872, + "step": 3232 + }, + { + "epoch": 0.3410337552742616, + "grad_norm": 0.430843710899353, + "learning_rate": 0.0015, + "loss": 2.3049, + "step": 3233 + }, + { + "epoch": 0.3411392405063291, + "grad_norm": 0.3773117959499359, + "learning_rate": 0.0015, + "loss": 2.2924, + "step": 3234 + }, + { + "epoch": 0.34124472573839665, + "grad_norm": 0.37823227047920227, + "learning_rate": 0.0015, + "loss": 2.2777, + "step": 3235 + }, + { + "epoch": 0.34135021097046414, + "grad_norm": 0.4004173278808594, + "learning_rate": 0.0015, + "loss": 2.3172, + "step": 3236 + }, + { + "epoch": 0.34145569620253163, + "grad_norm": 0.34695589542388916, + "learning_rate": 0.0015, + "loss": 2.2706, + "step": 3237 + }, + { + "epoch": 0.3415611814345992, + "grad_norm": 0.3738159239292145, + "learning_rate": 0.0015, + "loss": 2.2704, + "step": 3238 + }, + { + "epoch": 0.3416666666666667, + "grad_norm": 0.3541819453239441, + "learning_rate": 0.0015, + "loss": 2.2756, + "step": 3239 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.37642887234687805, + "learning_rate": 0.0015, + "loss": 2.2911, + "step": 3240 + }, + { + "epoch": 0.3418776371308017, + "grad_norm": 0.3608163297176361, + "learning_rate": 0.0015, + "loss": 2.3061, + "step": 3241 + }, + { + "epoch": 0.3419831223628692, + "grad_norm": 0.40459224581718445, + "learning_rate": 0.0015, + "loss": 2.3155, + "step": 3242 + }, + { + "epoch": 0.3420886075949367, + "grad_norm": 0.3277207612991333, + "learning_rate": 0.0015, + "loss": 2.2753, + "step": 3243 + }, + { + "epoch": 0.3421940928270042, + "grad_norm": 0.33582091331481934, + "learning_rate": 0.0015, + "loss": 2.2966, + "step": 3244 + }, + { + "epoch": 0.34229957805907174, + "grad_norm": 0.35052087903022766, + "learning_rate": 0.0015, + "loss": 2.2886, + "step": 3245 + }, + { + "epoch": 0.34240506329113923, + "grad_norm": 0.38568297028541565, + "learning_rate": 0.0015, + "loss": 2.2983, + "step": 3246 + }, + { + "epoch": 0.3425105485232067, + "grad_norm": 0.3629501760005951, + "learning_rate": 0.0015, + "loss": 2.318, + "step": 3247 + }, + { + "epoch": 0.3426160337552743, + "grad_norm": 0.4077763259410858, + "learning_rate": 0.0015, + "loss": 2.3211, + "step": 3248 + }, + { + "epoch": 0.34272151898734177, + "grad_norm": 0.4437202513217926, + "learning_rate": 0.0015, + "loss": 2.2936, + "step": 3249 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.45059433579444885, + "learning_rate": 0.0015, + "loss": 2.3266, + "step": 3250 + }, + { + "epoch": 0.3429324894514768, + "grad_norm": 0.3827095031738281, + "learning_rate": 0.0015, + "loss": 2.2681, + "step": 3251 + }, + { + "epoch": 0.3430379746835443, + "grad_norm": 0.4865387976169586, + "learning_rate": 0.0015, + "loss": 2.3305, + "step": 3252 + }, + { + "epoch": 0.3431434599156118, + "grad_norm": 0.43029025197029114, + "learning_rate": 0.0015, + "loss": 2.3284, + "step": 3253 + }, + { + "epoch": 0.34324894514767934, + "grad_norm": 0.3943480849266052, + "learning_rate": 0.0015, + "loss": 2.294, + "step": 3254 + }, + { + "epoch": 0.34335443037974683, + "grad_norm": 0.5119949579238892, + "learning_rate": 0.0015, + "loss": 2.2804, + "step": 3255 + }, + { + "epoch": 0.3434599156118143, + "grad_norm": 0.5075473189353943, + "learning_rate": 0.0015, + "loss": 2.2916, + "step": 3256 + }, + { + "epoch": 0.3435654008438819, + "grad_norm": 0.4397566616535187, + "learning_rate": 0.0015, + "loss": 2.3301, + "step": 3257 + }, + { + "epoch": 0.34367088607594937, + "grad_norm": 0.3716385066509247, + "learning_rate": 0.0015, + "loss": 2.2759, + "step": 3258 + }, + { + "epoch": 0.34377637130801686, + "grad_norm": 0.41902434825897217, + "learning_rate": 0.0015, + "loss": 2.2812, + "step": 3259 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.3802131116390228, + "learning_rate": 0.0015, + "loss": 2.2814, + "step": 3260 + }, + { + "epoch": 0.3439873417721519, + "grad_norm": 0.3618800640106201, + "learning_rate": 0.0015, + "loss": 2.3283, + "step": 3261 + }, + { + "epoch": 0.3440928270042194, + "grad_norm": 0.35178911685943604, + "learning_rate": 0.0015, + "loss": 2.3155, + "step": 3262 + }, + { + "epoch": 0.34419831223628694, + "grad_norm": 0.39684808254241943, + "learning_rate": 0.0015, + "loss": 2.3139, + "step": 3263 + }, + { + "epoch": 0.34430379746835443, + "grad_norm": 0.38200387358665466, + "learning_rate": 0.0015, + "loss": 2.2503, + "step": 3264 + }, + { + "epoch": 0.3444092827004219, + "grad_norm": 0.3833887577056885, + "learning_rate": 0.0015, + "loss": 2.2915, + "step": 3265 + }, + { + "epoch": 0.3445147679324895, + "grad_norm": 0.38842448592185974, + "learning_rate": 0.0015, + "loss": 2.3275, + "step": 3266 + }, + { + "epoch": 0.34462025316455697, + "grad_norm": 0.39213165640830994, + "learning_rate": 0.0015, + "loss": 2.3092, + "step": 3267 + }, + { + "epoch": 0.34472573839662446, + "grad_norm": 0.4151538014411926, + "learning_rate": 0.0015, + "loss": 2.3167, + "step": 3268 + }, + { + "epoch": 0.344831223628692, + "grad_norm": 0.37662607431411743, + "learning_rate": 0.0015, + "loss": 2.3102, + "step": 3269 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.34117892384529114, + "learning_rate": 0.0015, + "loss": 2.3037, + "step": 3270 + }, + { + "epoch": 0.345042194092827, + "grad_norm": 0.4083194434642792, + "learning_rate": 0.0015, + "loss": 2.2692, + "step": 3271 + }, + { + "epoch": 0.34514767932489454, + "grad_norm": 0.3940260410308838, + "learning_rate": 0.0015, + "loss": 2.3445, + "step": 3272 + }, + { + "epoch": 0.34525316455696203, + "grad_norm": 0.3627338111400604, + "learning_rate": 0.0015, + "loss": 2.3011, + "step": 3273 + }, + { + "epoch": 0.3453586497890295, + "grad_norm": 0.4405283033847809, + "learning_rate": 0.0015, + "loss": 2.3078, + "step": 3274 + }, + { + "epoch": 0.3454641350210971, + "grad_norm": 0.4329347014427185, + "learning_rate": 0.0015, + "loss": 2.3388, + "step": 3275 + }, + { + "epoch": 0.34556962025316457, + "grad_norm": 0.3807879388332367, + "learning_rate": 0.0015, + "loss": 2.324, + "step": 3276 + }, + { + "epoch": 0.34567510548523206, + "grad_norm": 0.4660666286945343, + "learning_rate": 0.0015, + "loss": 2.3046, + "step": 3277 + }, + { + "epoch": 0.34578059071729955, + "grad_norm": 0.4544469118118286, + "learning_rate": 0.0015, + "loss": 2.2729, + "step": 3278 + }, + { + "epoch": 0.3458860759493671, + "grad_norm": 0.4125431180000305, + "learning_rate": 0.0015, + "loss": 2.2952, + "step": 3279 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.3809702396392822, + "learning_rate": 0.0015, + "loss": 2.3033, + "step": 3280 + }, + { + "epoch": 0.3460970464135021, + "grad_norm": 0.36588215827941895, + "learning_rate": 0.0015, + "loss": 2.3015, + "step": 3281 + }, + { + "epoch": 0.34620253164556963, + "grad_norm": 0.3948478102684021, + "learning_rate": 0.0015, + "loss": 2.3054, + "step": 3282 + }, + { + "epoch": 0.3463080168776371, + "grad_norm": 0.4156758785247803, + "learning_rate": 0.0015, + "loss": 2.2741, + "step": 3283 + }, + { + "epoch": 0.3464135021097046, + "grad_norm": 0.35298672318458557, + "learning_rate": 0.0015, + "loss": 2.3284, + "step": 3284 + }, + { + "epoch": 0.34651898734177217, + "grad_norm": 0.40928012132644653, + "learning_rate": 0.0015, + "loss": 2.3141, + "step": 3285 + }, + { + "epoch": 0.34662447257383966, + "grad_norm": 0.3381887972354889, + "learning_rate": 0.0015, + "loss": 2.3149, + "step": 3286 + }, + { + "epoch": 0.34672995780590715, + "grad_norm": 0.378593772649765, + "learning_rate": 0.0015, + "loss": 2.3412, + "step": 3287 + }, + { + "epoch": 0.3468354430379747, + "grad_norm": 0.3445456326007843, + "learning_rate": 0.0015, + "loss": 2.2566, + "step": 3288 + }, + { + "epoch": 0.3469409282700422, + "grad_norm": 0.3872128129005432, + "learning_rate": 0.0015, + "loss": 2.2736, + "step": 3289 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.3967720568180084, + "learning_rate": 0.0015, + "loss": 2.2429, + "step": 3290 + }, + { + "epoch": 0.34715189873417723, + "grad_norm": 0.4044495224952698, + "learning_rate": 0.0015, + "loss": 2.282, + "step": 3291 + }, + { + "epoch": 0.3472573839662447, + "grad_norm": 0.34882718324661255, + "learning_rate": 0.0015, + "loss": 2.2862, + "step": 3292 + }, + { + "epoch": 0.3473628691983122, + "grad_norm": 0.41744017601013184, + "learning_rate": 0.0015, + "loss": 2.3025, + "step": 3293 + }, + { + "epoch": 0.34746835443037977, + "grad_norm": 0.42246437072753906, + "learning_rate": 0.0015, + "loss": 2.2928, + "step": 3294 + }, + { + "epoch": 0.34757383966244726, + "grad_norm": 0.4129502475261688, + "learning_rate": 0.0015, + "loss": 2.2674, + "step": 3295 + }, + { + "epoch": 0.34767932489451475, + "grad_norm": 0.3745570480823517, + "learning_rate": 0.0015, + "loss": 2.327, + "step": 3296 + }, + { + "epoch": 0.3477848101265823, + "grad_norm": 0.34848853945732117, + "learning_rate": 0.0015, + "loss": 2.3019, + "step": 3297 + }, + { + "epoch": 0.3478902953586498, + "grad_norm": 0.35983091592788696, + "learning_rate": 0.0015, + "loss": 2.2908, + "step": 3298 + }, + { + "epoch": 0.3479957805907173, + "grad_norm": 0.35406893491744995, + "learning_rate": 0.0015, + "loss": 2.3124, + "step": 3299 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.3797648549079895, + "learning_rate": 0.0015, + "loss": 2.3182, + "step": 3300 + }, + { + "epoch": 0.3482067510548523, + "grad_norm": 0.36634954810142517, + "learning_rate": 0.0015, + "loss": 2.308, + "step": 3301 + }, + { + "epoch": 0.3483122362869198, + "grad_norm": 0.39548125863075256, + "learning_rate": 0.0015, + "loss": 2.2987, + "step": 3302 + }, + { + "epoch": 0.34841772151898737, + "grad_norm": 0.42232152819633484, + "learning_rate": 0.0015, + "loss": 2.342, + "step": 3303 + }, + { + "epoch": 0.34852320675105486, + "grad_norm": 0.4177054464817047, + "learning_rate": 0.0015, + "loss": 2.3001, + "step": 3304 + }, + { + "epoch": 0.34862869198312235, + "grad_norm": 0.3751761317253113, + "learning_rate": 0.0015, + "loss": 2.3126, + "step": 3305 + }, + { + "epoch": 0.3487341772151899, + "grad_norm": 0.411359965801239, + "learning_rate": 0.0015, + "loss": 2.2912, + "step": 3306 + }, + { + "epoch": 0.3488396624472574, + "grad_norm": 0.373325914144516, + "learning_rate": 0.0015, + "loss": 2.315, + "step": 3307 + }, + { + "epoch": 0.3489451476793249, + "grad_norm": 0.3777744770050049, + "learning_rate": 0.0015, + "loss": 2.3182, + "step": 3308 + }, + { + "epoch": 0.3490506329113924, + "grad_norm": 0.38512152433395386, + "learning_rate": 0.0015, + "loss": 2.3139, + "step": 3309 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.37209025025367737, + "learning_rate": 0.0015, + "loss": 2.2706, + "step": 3310 + }, + { + "epoch": 0.3492616033755274, + "grad_norm": 0.3692111074924469, + "learning_rate": 0.0015, + "loss": 2.2843, + "step": 3311 + }, + { + "epoch": 0.3493670886075949, + "grad_norm": 0.36051997542381287, + "learning_rate": 0.0015, + "loss": 2.3266, + "step": 3312 + }, + { + "epoch": 0.34947257383966246, + "grad_norm": 0.3868694305419922, + "learning_rate": 0.0015, + "loss": 2.2493, + "step": 3313 + }, + { + "epoch": 0.34957805907172995, + "grad_norm": 0.40492308139801025, + "learning_rate": 0.0015, + "loss": 2.3068, + "step": 3314 + }, + { + "epoch": 0.34968354430379744, + "grad_norm": 0.3489324152469635, + "learning_rate": 0.0015, + "loss": 2.2835, + "step": 3315 + }, + { + "epoch": 0.349789029535865, + "grad_norm": 0.3482295274734497, + "learning_rate": 0.0015, + "loss": 2.2758, + "step": 3316 + }, + { + "epoch": 0.3498945147679325, + "grad_norm": 0.39414456486701965, + "learning_rate": 0.0015, + "loss": 2.3442, + "step": 3317 + }, + { + "epoch": 0.35, + "grad_norm": 0.37623047828674316, + "learning_rate": 0.0015, + "loss": 2.3228, + "step": 3318 + }, + { + "epoch": 0.3501054852320675, + "grad_norm": 0.40859872102737427, + "learning_rate": 0.0015, + "loss": 2.2994, + "step": 3319 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.4000176787376404, + "learning_rate": 0.0015, + "loss": 2.3102, + "step": 3320 + }, + { + "epoch": 0.3503164556962025, + "grad_norm": 0.37057802081108093, + "learning_rate": 0.0015, + "loss": 2.2514, + "step": 3321 + }, + { + "epoch": 0.35042194092827006, + "grad_norm": 0.3657781779766083, + "learning_rate": 0.0015, + "loss": 2.285, + "step": 3322 + }, + { + "epoch": 0.35052742616033755, + "grad_norm": 0.3888745903968811, + "learning_rate": 0.0015, + "loss": 2.31, + "step": 3323 + }, + { + "epoch": 0.35063291139240504, + "grad_norm": 0.36025530099868774, + "learning_rate": 0.0015, + "loss": 2.2702, + "step": 3324 + }, + { + "epoch": 0.3507383966244726, + "grad_norm": 0.3696732223033905, + "learning_rate": 0.0015, + "loss": 2.3124, + "step": 3325 + }, + { + "epoch": 0.3508438818565401, + "grad_norm": 0.3522142171859741, + "learning_rate": 0.0015, + "loss": 2.2689, + "step": 3326 + }, + { + "epoch": 0.3509493670886076, + "grad_norm": 0.35130035877227783, + "learning_rate": 0.0015, + "loss": 2.3137, + "step": 3327 + }, + { + "epoch": 0.3510548523206751, + "grad_norm": 0.36405807733535767, + "learning_rate": 0.0015, + "loss": 2.3098, + "step": 3328 + }, + { + "epoch": 0.3511603375527426, + "grad_norm": 0.3673488199710846, + "learning_rate": 0.0015, + "loss": 2.2944, + "step": 3329 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.3402048349380493, + "learning_rate": 0.0015, + "loss": 2.2652, + "step": 3330 + }, + { + "epoch": 0.35137130801687766, + "grad_norm": 0.36169904470443726, + "learning_rate": 0.0015, + "loss": 2.292, + "step": 3331 + }, + { + "epoch": 0.35147679324894515, + "grad_norm": 0.36311957240104675, + "learning_rate": 0.0015, + "loss": 2.2731, + "step": 3332 + }, + { + "epoch": 0.35158227848101264, + "grad_norm": 0.3563656806945801, + "learning_rate": 0.0015, + "loss": 2.3361, + "step": 3333 + }, + { + "epoch": 0.3516877637130802, + "grad_norm": 0.44335561990737915, + "learning_rate": 0.0015, + "loss": 2.2693, + "step": 3334 + }, + { + "epoch": 0.3517932489451477, + "grad_norm": 0.4030573070049286, + "learning_rate": 0.0015, + "loss": 2.2674, + "step": 3335 + }, + { + "epoch": 0.3518987341772152, + "grad_norm": 0.42510172724723816, + "learning_rate": 0.0015, + "loss": 2.2948, + "step": 3336 + }, + { + "epoch": 0.3520042194092827, + "grad_norm": 0.34668222069740295, + "learning_rate": 0.0015, + "loss": 2.2913, + "step": 3337 + }, + { + "epoch": 0.3521097046413502, + "grad_norm": 0.4564485251903534, + "learning_rate": 0.0015, + "loss": 2.2952, + "step": 3338 + }, + { + "epoch": 0.3522151898734177, + "grad_norm": 0.5305027365684509, + "learning_rate": 0.0015, + "loss": 2.3079, + "step": 3339 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.5031300187110901, + "learning_rate": 0.0015, + "loss": 2.2589, + "step": 3340 + }, + { + "epoch": 0.35242616033755275, + "grad_norm": 0.364876389503479, + "learning_rate": 0.0015, + "loss": 2.2686, + "step": 3341 + }, + { + "epoch": 0.35253164556962024, + "grad_norm": 0.4505840241909027, + "learning_rate": 0.0015, + "loss": 2.303, + "step": 3342 + }, + { + "epoch": 0.35263713080168774, + "grad_norm": 0.4729323089122772, + "learning_rate": 0.0015, + "loss": 2.2671, + "step": 3343 + }, + { + "epoch": 0.3527426160337553, + "grad_norm": 0.44357338547706604, + "learning_rate": 0.0015, + "loss": 2.2992, + "step": 3344 + }, + { + "epoch": 0.3528481012658228, + "grad_norm": 0.36547452211380005, + "learning_rate": 0.0015, + "loss": 2.32, + "step": 3345 + }, + { + "epoch": 0.35295358649789027, + "grad_norm": 0.388144314289093, + "learning_rate": 0.0015, + "loss": 2.2874, + "step": 3346 + }, + { + "epoch": 0.3530590717299578, + "grad_norm": 0.3654184937477112, + "learning_rate": 0.0015, + "loss": 2.2936, + "step": 3347 + }, + { + "epoch": 0.3531645569620253, + "grad_norm": 0.37702086567878723, + "learning_rate": 0.0015, + "loss": 2.2816, + "step": 3348 + }, + { + "epoch": 0.3532700421940928, + "grad_norm": 0.39131584763526917, + "learning_rate": 0.0015, + "loss": 2.257, + "step": 3349 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.33040979504585266, + "learning_rate": 0.0015, + "loss": 2.2634, + "step": 3350 + }, + { + "epoch": 0.35348101265822784, + "grad_norm": 0.40049681067466736, + "learning_rate": 0.0015, + "loss": 2.2678, + "step": 3351 + }, + { + "epoch": 0.35358649789029534, + "grad_norm": 0.4034467339515686, + "learning_rate": 0.0015, + "loss": 2.3123, + "step": 3352 + }, + { + "epoch": 0.3536919831223629, + "grad_norm": 0.3596336841583252, + "learning_rate": 0.0015, + "loss": 2.279, + "step": 3353 + }, + { + "epoch": 0.3537974683544304, + "grad_norm": 0.4258441925048828, + "learning_rate": 0.0015, + "loss": 2.251, + "step": 3354 + }, + { + "epoch": 0.35390295358649787, + "grad_norm": 0.425508052110672, + "learning_rate": 0.0015, + "loss": 2.295, + "step": 3355 + }, + { + "epoch": 0.3540084388185654, + "grad_norm": 0.3733656704425812, + "learning_rate": 0.0015, + "loss": 2.2493, + "step": 3356 + }, + { + "epoch": 0.3541139240506329, + "grad_norm": 0.48628735542297363, + "learning_rate": 0.0015, + "loss": 2.2752, + "step": 3357 + }, + { + "epoch": 0.3542194092827004, + "grad_norm": 0.44300636649131775, + "learning_rate": 0.0015, + "loss": 2.2529, + "step": 3358 + }, + { + "epoch": 0.35432489451476795, + "grad_norm": 0.41034385561943054, + "learning_rate": 0.0015, + "loss": 2.2588, + "step": 3359 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.4015595316886902, + "learning_rate": 0.0015, + "loss": 2.3001, + "step": 3360 + }, + { + "epoch": 0.35453586497890294, + "grad_norm": 0.3931252360343933, + "learning_rate": 0.0015, + "loss": 2.2896, + "step": 3361 + }, + { + "epoch": 0.3546413502109705, + "grad_norm": 0.3953695595264435, + "learning_rate": 0.0015, + "loss": 2.3034, + "step": 3362 + }, + { + "epoch": 0.354746835443038, + "grad_norm": 0.36592379212379456, + "learning_rate": 0.0015, + "loss": 2.2919, + "step": 3363 + }, + { + "epoch": 0.35485232067510547, + "grad_norm": 0.39643123745918274, + "learning_rate": 0.0015, + "loss": 2.286, + "step": 3364 + }, + { + "epoch": 0.354957805907173, + "grad_norm": 0.4507441818714142, + "learning_rate": 0.0015, + "loss": 2.2476, + "step": 3365 + }, + { + "epoch": 0.3550632911392405, + "grad_norm": 0.4617077112197876, + "learning_rate": 0.0015, + "loss": 2.2859, + "step": 3366 + }, + { + "epoch": 0.355168776371308, + "grad_norm": 0.39295417070388794, + "learning_rate": 0.0015, + "loss": 2.2947, + "step": 3367 + }, + { + "epoch": 0.35527426160337555, + "grad_norm": 0.37985971570014954, + "learning_rate": 0.0015, + "loss": 2.2881, + "step": 3368 + }, + { + "epoch": 0.35537974683544304, + "grad_norm": 0.37860268354415894, + "learning_rate": 0.0015, + "loss": 2.2984, + "step": 3369 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.3898333013057709, + "learning_rate": 0.0015, + "loss": 2.2736, + "step": 3370 + }, + { + "epoch": 0.3555907172995781, + "grad_norm": 0.3895968794822693, + "learning_rate": 0.0015, + "loss": 2.2901, + "step": 3371 + }, + { + "epoch": 0.3556962025316456, + "grad_norm": 0.38689035177230835, + "learning_rate": 0.0015, + "loss": 2.2634, + "step": 3372 + }, + { + "epoch": 0.35580168776371307, + "grad_norm": 0.4919207990169525, + "learning_rate": 0.0015, + "loss": 2.2832, + "step": 3373 + }, + { + "epoch": 0.35590717299578056, + "grad_norm": 0.5486292839050293, + "learning_rate": 0.0015, + "loss": 2.2832, + "step": 3374 + }, + { + "epoch": 0.3560126582278481, + "grad_norm": 0.4743271470069885, + "learning_rate": 0.0015, + "loss": 2.2959, + "step": 3375 + }, + { + "epoch": 0.3561181434599156, + "grad_norm": 0.37073639035224915, + "learning_rate": 0.0015, + "loss": 2.2977, + "step": 3376 + }, + { + "epoch": 0.3562236286919831, + "grad_norm": 0.5609073042869568, + "learning_rate": 0.0015, + "loss": 2.2774, + "step": 3377 + }, + { + "epoch": 0.35632911392405064, + "grad_norm": 0.5233321785926819, + "learning_rate": 0.0015, + "loss": 2.2764, + "step": 3378 + }, + { + "epoch": 0.35643459915611814, + "grad_norm": 0.39955732226371765, + "learning_rate": 0.0015, + "loss": 2.2936, + "step": 3379 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.5914952754974365, + "learning_rate": 0.0015, + "loss": 2.3025, + "step": 3380 + }, + { + "epoch": 0.3566455696202532, + "grad_norm": 0.6039817929267883, + "learning_rate": 0.0015, + "loss": 2.2679, + "step": 3381 + }, + { + "epoch": 0.35675105485232067, + "grad_norm": 0.4786936938762665, + "learning_rate": 0.0015, + "loss": 2.2274, + "step": 3382 + }, + { + "epoch": 0.35685654008438816, + "grad_norm": 0.47789517045021057, + "learning_rate": 0.0015, + "loss": 2.2972, + "step": 3383 + }, + { + "epoch": 0.3569620253164557, + "grad_norm": 0.6196950674057007, + "learning_rate": 0.0015, + "loss": 2.2983, + "step": 3384 + }, + { + "epoch": 0.3570675105485232, + "grad_norm": 0.4179438352584839, + "learning_rate": 0.0015, + "loss": 2.236, + "step": 3385 + }, + { + "epoch": 0.3571729957805907, + "grad_norm": 0.44847598671913147, + "learning_rate": 0.0015, + "loss": 2.2685, + "step": 3386 + }, + { + "epoch": 0.35727848101265824, + "grad_norm": 0.5196597576141357, + "learning_rate": 0.0015, + "loss": 2.3284, + "step": 3387 + }, + { + "epoch": 0.35738396624472574, + "grad_norm": 0.37340375781059265, + "learning_rate": 0.0015, + "loss": 2.29, + "step": 3388 + }, + { + "epoch": 0.35748945147679323, + "grad_norm": 0.5688936114311218, + "learning_rate": 0.0015, + "loss": 2.3174, + "step": 3389 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.5915834903717041, + "learning_rate": 0.0015, + "loss": 2.2694, + "step": 3390 + }, + { + "epoch": 0.35770042194092827, + "grad_norm": 0.3927062153816223, + "learning_rate": 0.0015, + "loss": 2.2772, + "step": 3391 + }, + { + "epoch": 0.35780590717299576, + "grad_norm": 0.45102861523628235, + "learning_rate": 0.0015, + "loss": 2.2786, + "step": 3392 + }, + { + "epoch": 0.3579113924050633, + "grad_norm": 0.4191957414150238, + "learning_rate": 0.0015, + "loss": 2.2763, + "step": 3393 + }, + { + "epoch": 0.3580168776371308, + "grad_norm": 0.3596945106983185, + "learning_rate": 0.0015, + "loss": 2.2671, + "step": 3394 + }, + { + "epoch": 0.3581223628691983, + "grad_norm": 0.43102210760116577, + "learning_rate": 0.0015, + "loss": 2.2875, + "step": 3395 + }, + { + "epoch": 0.35822784810126584, + "grad_norm": 0.40735378861427307, + "learning_rate": 0.0015, + "loss": 2.2872, + "step": 3396 + }, + { + "epoch": 0.35833333333333334, + "grad_norm": 0.4312075674533844, + "learning_rate": 0.0015, + "loss": 2.2627, + "step": 3397 + }, + { + "epoch": 0.35843881856540083, + "grad_norm": 0.49566900730133057, + "learning_rate": 0.0015, + "loss": 2.2656, + "step": 3398 + }, + { + "epoch": 0.3585443037974684, + "grad_norm": 0.428244948387146, + "learning_rate": 0.0015, + "loss": 2.3245, + "step": 3399 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.35436978936195374, + "learning_rate": 0.0015, + "loss": 2.322, + "step": 3400 + }, + { + "epoch": 0.35875527426160336, + "grad_norm": 0.5054922103881836, + "learning_rate": 0.0015, + "loss": 2.2896, + "step": 3401 + }, + { + "epoch": 0.3588607594936709, + "grad_norm": 0.5432666540145874, + "learning_rate": 0.0015, + "loss": 2.2763, + "step": 3402 + }, + { + "epoch": 0.3589662447257384, + "grad_norm": 0.46555671095848083, + "learning_rate": 0.0015, + "loss": 2.3119, + "step": 3403 + }, + { + "epoch": 0.3590717299578059, + "grad_norm": 0.4232676923274994, + "learning_rate": 0.0015, + "loss": 2.2684, + "step": 3404 + }, + { + "epoch": 0.35917721518987344, + "grad_norm": 0.5543973445892334, + "learning_rate": 0.0015, + "loss": 2.2687, + "step": 3405 + }, + { + "epoch": 0.35928270042194094, + "grad_norm": 0.4982137382030487, + "learning_rate": 0.0015, + "loss": 2.3003, + "step": 3406 + }, + { + "epoch": 0.35938818565400843, + "grad_norm": 0.4761847257614136, + "learning_rate": 0.0015, + "loss": 2.2507, + "step": 3407 + }, + { + "epoch": 0.3594936708860759, + "grad_norm": 0.46559062600135803, + "learning_rate": 0.0015, + "loss": 2.2846, + "step": 3408 + }, + { + "epoch": 0.35959915611814347, + "grad_norm": 0.43900594115257263, + "learning_rate": 0.0015, + "loss": 2.2843, + "step": 3409 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.47311916947364807, + "learning_rate": 0.0015, + "loss": 2.2491, + "step": 3410 + }, + { + "epoch": 0.35981012658227846, + "grad_norm": 0.5051692128181458, + "learning_rate": 0.0015, + "loss": 2.2963, + "step": 3411 + }, + { + "epoch": 0.359915611814346, + "grad_norm": 0.4623546600341797, + "learning_rate": 0.0015, + "loss": 2.282, + "step": 3412 + }, + { + "epoch": 0.3600210970464135, + "grad_norm": 0.4427788555622101, + "learning_rate": 0.0015, + "loss": 2.2626, + "step": 3413 + }, + { + "epoch": 0.360126582278481, + "grad_norm": 0.4681359529495239, + "learning_rate": 0.0015, + "loss": 2.2432, + "step": 3414 + }, + { + "epoch": 0.36023206751054854, + "grad_norm": 0.5229707360267639, + "learning_rate": 0.0015, + "loss": 2.2675, + "step": 3415 + }, + { + "epoch": 0.36033755274261603, + "grad_norm": 0.3909667730331421, + "learning_rate": 0.0015, + "loss": 2.2813, + "step": 3416 + }, + { + "epoch": 0.3604430379746835, + "grad_norm": 0.5658714175224304, + "learning_rate": 0.0015, + "loss": 2.2907, + "step": 3417 + }, + { + "epoch": 0.36054852320675107, + "grad_norm": 0.5053144693374634, + "learning_rate": 0.0015, + "loss": 2.2681, + "step": 3418 + }, + { + "epoch": 0.36065400843881856, + "grad_norm": 0.4941186010837555, + "learning_rate": 0.0015, + "loss": 2.2662, + "step": 3419 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.5311992168426514, + "learning_rate": 0.0015, + "loss": 2.2914, + "step": 3420 + }, + { + "epoch": 0.3608649789029536, + "grad_norm": 0.4804255962371826, + "learning_rate": 0.0015, + "loss": 2.2919, + "step": 3421 + }, + { + "epoch": 0.3609704641350211, + "grad_norm": 0.4719026982784271, + "learning_rate": 0.0015, + "loss": 2.275, + "step": 3422 + }, + { + "epoch": 0.3610759493670886, + "grad_norm": 0.45860564708709717, + "learning_rate": 0.0015, + "loss": 2.2653, + "step": 3423 + }, + { + "epoch": 0.36118143459915614, + "grad_norm": 0.4562648832798004, + "learning_rate": 0.0015, + "loss": 2.2934, + "step": 3424 + }, + { + "epoch": 0.36128691983122363, + "grad_norm": 0.4304810166358948, + "learning_rate": 0.0015, + "loss": 2.289, + "step": 3425 + }, + { + "epoch": 0.3613924050632911, + "grad_norm": 0.4578821063041687, + "learning_rate": 0.0015, + "loss": 2.2602, + "step": 3426 + }, + { + "epoch": 0.36149789029535867, + "grad_norm": 0.38880595564842224, + "learning_rate": 0.0015, + "loss": 2.2823, + "step": 3427 + }, + { + "epoch": 0.36160337552742616, + "grad_norm": 0.4366943836212158, + "learning_rate": 0.0015, + "loss": 2.2569, + "step": 3428 + }, + { + "epoch": 0.36170886075949366, + "grad_norm": 0.3930543065071106, + "learning_rate": 0.0015, + "loss": 2.2542, + "step": 3429 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.36626121401786804, + "learning_rate": 0.0015, + "loss": 2.2684, + "step": 3430 + }, + { + "epoch": 0.3619198312236287, + "grad_norm": 0.41073423624038696, + "learning_rate": 0.0015, + "loss": 2.2591, + "step": 3431 + }, + { + "epoch": 0.3620253164556962, + "grad_norm": 0.38751325011253357, + "learning_rate": 0.0015, + "loss": 2.2507, + "step": 3432 + }, + { + "epoch": 0.36213080168776374, + "grad_norm": 0.4647718071937561, + "learning_rate": 0.0015, + "loss": 2.2759, + "step": 3433 + }, + { + "epoch": 0.36223628691983123, + "grad_norm": 0.43405020236968994, + "learning_rate": 0.0015, + "loss": 2.2268, + "step": 3434 + }, + { + "epoch": 0.3623417721518987, + "grad_norm": 0.44544124603271484, + "learning_rate": 0.0015, + "loss": 2.2691, + "step": 3435 + }, + { + "epoch": 0.36244725738396627, + "grad_norm": 0.4101932644844055, + "learning_rate": 0.0015, + "loss": 2.254, + "step": 3436 + }, + { + "epoch": 0.36255274261603376, + "grad_norm": 0.37541115283966064, + "learning_rate": 0.0015, + "loss": 2.3097, + "step": 3437 + }, + { + "epoch": 0.36265822784810126, + "grad_norm": 0.4375191926956177, + "learning_rate": 0.0015, + "loss": 2.2873, + "step": 3438 + }, + { + "epoch": 0.3627637130801688, + "grad_norm": 0.3971523344516754, + "learning_rate": 0.0015, + "loss": 2.255, + "step": 3439 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.3918175995349884, + "learning_rate": 0.0015, + "loss": 2.2506, + "step": 3440 + }, + { + "epoch": 0.3629746835443038, + "grad_norm": 0.39432263374328613, + "learning_rate": 0.0015, + "loss": 2.2472, + "step": 3441 + }, + { + "epoch": 0.3630801687763713, + "grad_norm": 0.39756596088409424, + "learning_rate": 0.0015, + "loss": 2.2779, + "step": 3442 + }, + { + "epoch": 0.36318565400843883, + "grad_norm": 0.45297160744667053, + "learning_rate": 0.0015, + "loss": 2.298, + "step": 3443 + }, + { + "epoch": 0.3632911392405063, + "grad_norm": 0.4166678190231323, + "learning_rate": 0.0015, + "loss": 2.2485, + "step": 3444 + }, + { + "epoch": 0.3633966244725738, + "grad_norm": 0.4179295599460602, + "learning_rate": 0.0015, + "loss": 2.2873, + "step": 3445 + }, + { + "epoch": 0.36350210970464136, + "grad_norm": 0.4963417947292328, + "learning_rate": 0.0015, + "loss": 2.2985, + "step": 3446 + }, + { + "epoch": 0.36360759493670886, + "grad_norm": 0.39269188046455383, + "learning_rate": 0.0015, + "loss": 2.2598, + "step": 3447 + }, + { + "epoch": 0.36371308016877635, + "grad_norm": 0.3814387619495392, + "learning_rate": 0.0015, + "loss": 2.275, + "step": 3448 + }, + { + "epoch": 0.3638185654008439, + "grad_norm": 0.39486488699913025, + "learning_rate": 0.0015, + "loss": 2.2684, + "step": 3449 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.38020527362823486, + "learning_rate": 0.0015, + "loss": 2.2559, + "step": 3450 + }, + { + "epoch": 0.3640295358649789, + "grad_norm": 0.3437806963920593, + "learning_rate": 0.0015, + "loss": 2.2615, + "step": 3451 + }, + { + "epoch": 0.36413502109704643, + "grad_norm": 0.4403918385505676, + "learning_rate": 0.0015, + "loss": 2.2309, + "step": 3452 + }, + { + "epoch": 0.3642405063291139, + "grad_norm": 0.377186119556427, + "learning_rate": 0.0015, + "loss": 2.2656, + "step": 3453 + }, + { + "epoch": 0.3643459915611814, + "grad_norm": 0.4298524856567383, + "learning_rate": 0.0015, + "loss": 2.2529, + "step": 3454 + }, + { + "epoch": 0.36445147679324896, + "grad_norm": 0.3623707890510559, + "learning_rate": 0.0015, + "loss": 2.2869, + "step": 3455 + }, + { + "epoch": 0.36455696202531646, + "grad_norm": 0.4141888916492462, + "learning_rate": 0.0015, + "loss": 2.259, + "step": 3456 + }, + { + "epoch": 0.36466244725738395, + "grad_norm": 0.46748897433280945, + "learning_rate": 0.0015, + "loss": 2.2805, + "step": 3457 + }, + { + "epoch": 0.3647679324894515, + "grad_norm": 0.4316830635070801, + "learning_rate": 0.0015, + "loss": 2.2734, + "step": 3458 + }, + { + "epoch": 0.364873417721519, + "grad_norm": 0.40387091040611267, + "learning_rate": 0.0015, + "loss": 2.2184, + "step": 3459 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.45868733525276184, + "learning_rate": 0.0015, + "loss": 2.2835, + "step": 3460 + }, + { + "epoch": 0.36508438818565403, + "grad_norm": 0.37106725573539734, + "learning_rate": 0.0015, + "loss": 2.2885, + "step": 3461 + }, + { + "epoch": 0.3651898734177215, + "grad_norm": 0.44843071699142456, + "learning_rate": 0.0015, + "loss": 2.2411, + "step": 3462 + }, + { + "epoch": 0.365295358649789, + "grad_norm": 0.39545220136642456, + "learning_rate": 0.0015, + "loss": 2.2639, + "step": 3463 + }, + { + "epoch": 0.36540084388185656, + "grad_norm": 0.41260504722595215, + "learning_rate": 0.0015, + "loss": 2.291, + "step": 3464 + }, + { + "epoch": 0.36550632911392406, + "grad_norm": 0.4916541576385498, + "learning_rate": 0.0015, + "loss": 2.2631, + "step": 3465 + }, + { + "epoch": 0.36561181434599155, + "grad_norm": 0.40944963693618774, + "learning_rate": 0.0015, + "loss": 2.2385, + "step": 3466 + }, + { + "epoch": 0.3657172995780591, + "grad_norm": 0.43343207240104675, + "learning_rate": 0.0015, + "loss": 2.284, + "step": 3467 + }, + { + "epoch": 0.3658227848101266, + "grad_norm": 0.459460973739624, + "learning_rate": 0.0015, + "loss": 2.2191, + "step": 3468 + }, + { + "epoch": 0.3659282700421941, + "grad_norm": 0.3652094602584839, + "learning_rate": 0.0015, + "loss": 2.2833, + "step": 3469 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.38742780685424805, + "learning_rate": 0.0015, + "loss": 2.286, + "step": 3470 + }, + { + "epoch": 0.3661392405063291, + "grad_norm": 0.34479793906211853, + "learning_rate": 0.0015, + "loss": 2.278, + "step": 3471 + }, + { + "epoch": 0.3662447257383966, + "grad_norm": 0.4328395128250122, + "learning_rate": 0.0015, + "loss": 2.2817, + "step": 3472 + }, + { + "epoch": 0.3663502109704641, + "grad_norm": 0.39692679047584534, + "learning_rate": 0.0015, + "loss": 2.3062, + "step": 3473 + }, + { + "epoch": 0.36645569620253166, + "grad_norm": 0.3364737927913666, + "learning_rate": 0.0015, + "loss": 2.2972, + "step": 3474 + }, + { + "epoch": 0.36656118143459915, + "grad_norm": 0.41231557726860046, + "learning_rate": 0.0015, + "loss": 2.3084, + "step": 3475 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 0.39488232135772705, + "learning_rate": 0.0015, + "loss": 2.2637, + "step": 3476 + }, + { + "epoch": 0.3667721518987342, + "grad_norm": 0.3859366774559021, + "learning_rate": 0.0015, + "loss": 2.2546, + "step": 3477 + }, + { + "epoch": 0.3668776371308017, + "grad_norm": 0.37818753719329834, + "learning_rate": 0.0015, + "loss": 2.2301, + "step": 3478 + }, + { + "epoch": 0.3669831223628692, + "grad_norm": 0.4877581000328064, + "learning_rate": 0.0015, + "loss": 2.2699, + "step": 3479 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.4090389907360077, + "learning_rate": 0.0015, + "loss": 2.2625, + "step": 3480 + }, + { + "epoch": 0.3671940928270042, + "grad_norm": 0.38346266746520996, + "learning_rate": 0.0015, + "loss": 2.2669, + "step": 3481 + }, + { + "epoch": 0.3672995780590717, + "grad_norm": 0.5014040470123291, + "learning_rate": 0.0015, + "loss": 2.2871, + "step": 3482 + }, + { + "epoch": 0.36740506329113926, + "grad_norm": 0.4785529375076294, + "learning_rate": 0.0015, + "loss": 2.2492, + "step": 3483 + }, + { + "epoch": 0.36751054852320675, + "grad_norm": 0.45892155170440674, + "learning_rate": 0.0015, + "loss": 2.3005, + "step": 3484 + }, + { + "epoch": 0.36761603375527424, + "grad_norm": 0.3638256788253784, + "learning_rate": 0.0015, + "loss": 2.2796, + "step": 3485 + }, + { + "epoch": 0.3677215189873418, + "grad_norm": 0.37487807869911194, + "learning_rate": 0.0015, + "loss": 2.261, + "step": 3486 + }, + { + "epoch": 0.3678270042194093, + "grad_norm": 0.391204833984375, + "learning_rate": 0.0015, + "loss": 2.2606, + "step": 3487 + }, + { + "epoch": 0.3679324894514768, + "grad_norm": 0.40982311964035034, + "learning_rate": 0.0015, + "loss": 2.2626, + "step": 3488 + }, + { + "epoch": 0.3680379746835443, + "grad_norm": 0.3575272262096405, + "learning_rate": 0.0015, + "loss": 2.251, + "step": 3489 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.3853103816509247, + "learning_rate": 0.0015, + "loss": 2.2691, + "step": 3490 + }, + { + "epoch": 0.3682489451476793, + "grad_norm": 0.42500990629196167, + "learning_rate": 0.0015, + "loss": 2.257, + "step": 3491 + }, + { + "epoch": 0.36835443037974686, + "grad_norm": 0.43740683794021606, + "learning_rate": 0.0015, + "loss": 2.2585, + "step": 3492 + }, + { + "epoch": 0.36845991561181435, + "grad_norm": 0.3650633990764618, + "learning_rate": 0.0015, + "loss": 2.2455, + "step": 3493 + }, + { + "epoch": 0.36856540084388184, + "grad_norm": 0.4825221002101898, + "learning_rate": 0.0015, + "loss": 2.3126, + "step": 3494 + }, + { + "epoch": 0.3686708860759494, + "grad_norm": 0.4173322021961212, + "learning_rate": 0.0015, + "loss": 2.2675, + "step": 3495 + }, + { + "epoch": 0.3687763713080169, + "grad_norm": 0.40567660331726074, + "learning_rate": 0.0015, + "loss": 2.2285, + "step": 3496 + }, + { + "epoch": 0.3688818565400844, + "grad_norm": 0.41087594628334045, + "learning_rate": 0.0015, + "loss": 2.2674, + "step": 3497 + }, + { + "epoch": 0.3689873417721519, + "grad_norm": 0.3905235528945923, + "learning_rate": 0.0015, + "loss": 2.2291, + "step": 3498 + }, + { + "epoch": 0.3690928270042194, + "grad_norm": 0.4448557496070862, + "learning_rate": 0.0015, + "loss": 2.2539, + "step": 3499 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.4598749279975891, + "learning_rate": 0.0015, + "loss": 2.2368, + "step": 3500 + }, + { + "epoch": 0.36930379746835446, + "grad_norm": 0.4825080633163452, + "learning_rate": 0.0015, + "loss": 2.2409, + "step": 3501 + }, + { + "epoch": 0.36940928270042195, + "grad_norm": 0.3937256634235382, + "learning_rate": 0.0015, + "loss": 2.2631, + "step": 3502 + }, + { + "epoch": 0.36951476793248944, + "grad_norm": 0.41078734397888184, + "learning_rate": 0.0015, + "loss": 2.2868, + "step": 3503 + }, + { + "epoch": 0.369620253164557, + "grad_norm": 0.4125535488128662, + "learning_rate": 0.0015, + "loss": 2.3, + "step": 3504 + }, + { + "epoch": 0.3697257383966245, + "grad_norm": 0.41993582248687744, + "learning_rate": 0.0015, + "loss": 2.2477, + "step": 3505 + }, + { + "epoch": 0.369831223628692, + "grad_norm": 0.41882503032684326, + "learning_rate": 0.0015, + "loss": 2.269, + "step": 3506 + }, + { + "epoch": 0.36993670886075947, + "grad_norm": 0.3986961543560028, + "learning_rate": 0.0015, + "loss": 2.2652, + "step": 3507 + }, + { + "epoch": 0.370042194092827, + "grad_norm": 0.3747757375240326, + "learning_rate": 0.0015, + "loss": 2.2375, + "step": 3508 + }, + { + "epoch": 0.3701476793248945, + "grad_norm": 0.39686039090156555, + "learning_rate": 0.0015, + "loss": 2.2647, + "step": 3509 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.37833961844444275, + "learning_rate": 0.0015, + "loss": 2.2614, + "step": 3510 + }, + { + "epoch": 0.37035864978902955, + "grad_norm": 0.41705888509750366, + "learning_rate": 0.0015, + "loss": 2.2609, + "step": 3511 + }, + { + "epoch": 0.37046413502109704, + "grad_norm": 0.37973976135253906, + "learning_rate": 0.0015, + "loss": 2.2482, + "step": 3512 + }, + { + "epoch": 0.37056962025316453, + "grad_norm": 0.4108153283596039, + "learning_rate": 0.0015, + "loss": 2.2412, + "step": 3513 + }, + { + "epoch": 0.3706751054852321, + "grad_norm": 0.40535369515419006, + "learning_rate": 0.0015, + "loss": 2.2622, + "step": 3514 + }, + { + "epoch": 0.3707805907172996, + "grad_norm": 0.3937755823135376, + "learning_rate": 0.0015, + "loss": 2.2591, + "step": 3515 + }, + { + "epoch": 0.37088607594936707, + "grad_norm": 0.38850218057632446, + "learning_rate": 0.0015, + "loss": 2.2615, + "step": 3516 + }, + { + "epoch": 0.3709915611814346, + "grad_norm": 0.38661524653434753, + "learning_rate": 0.0015, + "loss": 2.2485, + "step": 3517 + }, + { + "epoch": 0.3710970464135021, + "grad_norm": 0.4243146479129791, + "learning_rate": 0.0015, + "loss": 2.2501, + "step": 3518 + }, + { + "epoch": 0.3712025316455696, + "grad_norm": 0.33869868516921997, + "learning_rate": 0.0015, + "loss": 2.2686, + "step": 3519 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.45538070797920227, + "learning_rate": 0.0015, + "loss": 2.2552, + "step": 3520 + }, + { + "epoch": 0.37141350210970464, + "grad_norm": 0.593413770198822, + "learning_rate": 0.0015, + "loss": 2.2867, + "step": 3521 + }, + { + "epoch": 0.37151898734177213, + "grad_norm": 0.6876039505004883, + "learning_rate": 0.0015, + "loss": 2.2664, + "step": 3522 + }, + { + "epoch": 0.3716244725738397, + "grad_norm": 0.46357762813568115, + "learning_rate": 0.0015, + "loss": 2.2466, + "step": 3523 + }, + { + "epoch": 0.3717299578059072, + "grad_norm": 0.4596219062805176, + "learning_rate": 0.0015, + "loss": 2.2142, + "step": 3524 + }, + { + "epoch": 0.37183544303797467, + "grad_norm": 0.5580347180366516, + "learning_rate": 0.0015, + "loss": 2.3197, + "step": 3525 + }, + { + "epoch": 0.3719409282700422, + "grad_norm": 0.3539455235004425, + "learning_rate": 0.0015, + "loss": 2.268, + "step": 3526 + }, + { + "epoch": 0.3720464135021097, + "grad_norm": 0.5424857139587402, + "learning_rate": 0.0015, + "loss": 2.2516, + "step": 3527 + }, + { + "epoch": 0.3721518987341772, + "grad_norm": 0.5148676633834839, + "learning_rate": 0.0015, + "loss": 2.2953, + "step": 3528 + }, + { + "epoch": 0.37225738396624475, + "grad_norm": 0.3836022913455963, + "learning_rate": 0.0015, + "loss": 2.249, + "step": 3529 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.567938506603241, + "learning_rate": 0.0015, + "loss": 2.308, + "step": 3530 + }, + { + "epoch": 0.37246835443037973, + "grad_norm": 0.42352715134620667, + "learning_rate": 0.0015, + "loss": 2.2679, + "step": 3531 + }, + { + "epoch": 0.3725738396624473, + "grad_norm": 0.41488462686538696, + "learning_rate": 0.0015, + "loss": 2.3025, + "step": 3532 + }, + { + "epoch": 0.3726793248945148, + "grad_norm": 0.6243921518325806, + "learning_rate": 0.0015, + "loss": 2.2471, + "step": 3533 + }, + { + "epoch": 0.37278481012658227, + "grad_norm": 0.4481073021888733, + "learning_rate": 0.0015, + "loss": 2.2899, + "step": 3534 + }, + { + "epoch": 0.3728902953586498, + "grad_norm": 0.4651174247264862, + "learning_rate": 0.0015, + "loss": 2.2922, + "step": 3535 + }, + { + "epoch": 0.3729957805907173, + "grad_norm": 0.5341436862945557, + "learning_rate": 0.0015, + "loss": 2.2499, + "step": 3536 + }, + { + "epoch": 0.3731012658227848, + "grad_norm": 0.465154230594635, + "learning_rate": 0.0015, + "loss": 2.2582, + "step": 3537 + }, + { + "epoch": 0.37320675105485235, + "grad_norm": 0.4165588617324829, + "learning_rate": 0.0015, + "loss": 2.2877, + "step": 3538 + }, + { + "epoch": 0.37331223628691984, + "grad_norm": 0.43693408370018005, + "learning_rate": 0.0015, + "loss": 2.2646, + "step": 3539 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 0.41572749614715576, + "learning_rate": 0.0015, + "loss": 2.2618, + "step": 3540 + }, + { + "epoch": 0.3735232067510548, + "grad_norm": 0.399691641330719, + "learning_rate": 0.0015, + "loss": 2.2421, + "step": 3541 + }, + { + "epoch": 0.3736286919831224, + "grad_norm": 0.41363272070884705, + "learning_rate": 0.0015, + "loss": 2.2805, + "step": 3542 + }, + { + "epoch": 0.37373417721518987, + "grad_norm": 0.38568785786628723, + "learning_rate": 0.0015, + "loss": 2.2689, + "step": 3543 + }, + { + "epoch": 0.37383966244725736, + "grad_norm": 0.3710682690143585, + "learning_rate": 0.0015, + "loss": 2.2403, + "step": 3544 + }, + { + "epoch": 0.3739451476793249, + "grad_norm": 0.3587670922279358, + "learning_rate": 0.0015, + "loss": 2.2754, + "step": 3545 + }, + { + "epoch": 0.3740506329113924, + "grad_norm": 0.39388230443000793, + "learning_rate": 0.0015, + "loss": 2.2393, + "step": 3546 + }, + { + "epoch": 0.3741561181434599, + "grad_norm": 0.34844475984573364, + "learning_rate": 0.0015, + "loss": 2.2261, + "step": 3547 + }, + { + "epoch": 0.37426160337552744, + "grad_norm": 0.3800446391105652, + "learning_rate": 0.0015, + "loss": 2.2331, + "step": 3548 + }, + { + "epoch": 0.37436708860759493, + "grad_norm": 0.37848302721977234, + "learning_rate": 0.0015, + "loss": 2.2447, + "step": 3549 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.3766256272792816, + "learning_rate": 0.0015, + "loss": 2.2893, + "step": 3550 + }, + { + "epoch": 0.37457805907173, + "grad_norm": 0.39022189378738403, + "learning_rate": 0.0015, + "loss": 2.2332, + "step": 3551 + }, + { + "epoch": 0.37468354430379747, + "grad_norm": 0.38862210512161255, + "learning_rate": 0.0015, + "loss": 2.2379, + "step": 3552 + }, + { + "epoch": 0.37478902953586496, + "grad_norm": 0.37477564811706543, + "learning_rate": 0.0015, + "loss": 2.2583, + "step": 3553 + }, + { + "epoch": 0.3748945147679325, + "grad_norm": 0.3886670172214508, + "learning_rate": 0.0015, + "loss": 2.2644, + "step": 3554 + }, + { + "epoch": 0.375, + "grad_norm": 0.38992029428482056, + "learning_rate": 0.0015, + "loss": 2.2393, + "step": 3555 + }, + { + "epoch": 0.3751054852320675, + "grad_norm": 0.35113394260406494, + "learning_rate": 0.0015, + "loss": 2.2619, + "step": 3556 + }, + { + "epoch": 0.37521097046413504, + "grad_norm": 0.37939906120300293, + "learning_rate": 0.0015, + "loss": 2.2443, + "step": 3557 + }, + { + "epoch": 0.37531645569620253, + "grad_norm": 0.35269537568092346, + "learning_rate": 0.0015, + "loss": 2.2197, + "step": 3558 + }, + { + "epoch": 0.37542194092827, + "grad_norm": 0.42203015089035034, + "learning_rate": 0.0015, + "loss": 2.2644, + "step": 3559 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.37625226378440857, + "learning_rate": 0.0015, + "loss": 2.2679, + "step": 3560 + }, + { + "epoch": 0.37563291139240507, + "grad_norm": 0.4006570875644684, + "learning_rate": 0.0015, + "loss": 2.2688, + "step": 3561 + }, + { + "epoch": 0.37573839662447256, + "grad_norm": 0.4120563566684723, + "learning_rate": 0.0015, + "loss": 2.2274, + "step": 3562 + }, + { + "epoch": 0.3758438818565401, + "grad_norm": 0.3742722272872925, + "learning_rate": 0.0015, + "loss": 2.246, + "step": 3563 + }, + { + "epoch": 0.3759493670886076, + "grad_norm": 0.37228646874427795, + "learning_rate": 0.0015, + "loss": 2.2323, + "step": 3564 + }, + { + "epoch": 0.3760548523206751, + "grad_norm": 0.37009796500205994, + "learning_rate": 0.0015, + "loss": 2.2627, + "step": 3565 + }, + { + "epoch": 0.37616033755274264, + "grad_norm": 0.36259591579437256, + "learning_rate": 0.0015, + "loss": 2.2166, + "step": 3566 + }, + { + "epoch": 0.37626582278481013, + "grad_norm": 0.3526706397533417, + "learning_rate": 0.0015, + "loss": 2.2562, + "step": 3567 + }, + { + "epoch": 0.3763713080168776, + "grad_norm": 0.397365927696228, + "learning_rate": 0.0015, + "loss": 2.2928, + "step": 3568 + }, + { + "epoch": 0.3764767932489452, + "grad_norm": 0.35818442702293396, + "learning_rate": 0.0015, + "loss": 2.2381, + "step": 3569 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.4118390679359436, + "learning_rate": 0.0015, + "loss": 2.2515, + "step": 3570 + }, + { + "epoch": 0.37668776371308016, + "grad_norm": 0.3585085868835449, + "learning_rate": 0.0015, + "loss": 2.2349, + "step": 3571 + }, + { + "epoch": 0.37679324894514765, + "grad_norm": 0.4117731750011444, + "learning_rate": 0.0015, + "loss": 2.2466, + "step": 3572 + }, + { + "epoch": 0.3768987341772152, + "grad_norm": 0.3831518292427063, + "learning_rate": 0.0015, + "loss": 2.2486, + "step": 3573 + }, + { + "epoch": 0.3770042194092827, + "grad_norm": 0.37224990129470825, + "learning_rate": 0.0015, + "loss": 2.271, + "step": 3574 + }, + { + "epoch": 0.3771097046413502, + "grad_norm": 0.4251154363155365, + "learning_rate": 0.0015, + "loss": 2.2533, + "step": 3575 + }, + { + "epoch": 0.37721518987341773, + "grad_norm": 0.40590840578079224, + "learning_rate": 0.0015, + "loss": 2.2604, + "step": 3576 + }, + { + "epoch": 0.3773206751054852, + "grad_norm": 0.3487290143966675, + "learning_rate": 0.0015, + "loss": 2.2436, + "step": 3577 + }, + { + "epoch": 0.3774261603375527, + "grad_norm": 0.43828338384628296, + "learning_rate": 0.0015, + "loss": 2.2595, + "step": 3578 + }, + { + "epoch": 0.37753164556962027, + "grad_norm": 0.42772480845451355, + "learning_rate": 0.0015, + "loss": 2.2445, + "step": 3579 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.3948749303817749, + "learning_rate": 0.0015, + "loss": 2.2583, + "step": 3580 + }, + { + "epoch": 0.37774261603375525, + "grad_norm": 0.4319857060909271, + "learning_rate": 0.0015, + "loss": 2.2521, + "step": 3581 + }, + { + "epoch": 0.3778481012658228, + "grad_norm": 0.3502231538295746, + "learning_rate": 0.0015, + "loss": 2.2314, + "step": 3582 + }, + { + "epoch": 0.3779535864978903, + "grad_norm": 0.4018506109714508, + "learning_rate": 0.0015, + "loss": 2.2596, + "step": 3583 + }, + { + "epoch": 0.3780590717299578, + "grad_norm": 0.3695201277732849, + "learning_rate": 0.0015, + "loss": 2.2315, + "step": 3584 + }, + { + "epoch": 0.37816455696202533, + "grad_norm": 0.4156683087348938, + "learning_rate": 0.0015, + "loss": 2.2898, + "step": 3585 + }, + { + "epoch": 0.3782700421940928, + "grad_norm": 0.3720765709877014, + "learning_rate": 0.0015, + "loss": 2.2693, + "step": 3586 + }, + { + "epoch": 0.3783755274261603, + "grad_norm": 0.3841859698295593, + "learning_rate": 0.0015, + "loss": 2.2414, + "step": 3587 + }, + { + "epoch": 0.37848101265822787, + "grad_norm": 0.42237675189971924, + "learning_rate": 0.0015, + "loss": 2.2623, + "step": 3588 + }, + { + "epoch": 0.37858649789029536, + "grad_norm": 0.37989145517349243, + "learning_rate": 0.0015, + "loss": 2.2721, + "step": 3589 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.41645678877830505, + "learning_rate": 0.0015, + "loss": 2.2419, + "step": 3590 + }, + { + "epoch": 0.3787974683544304, + "grad_norm": 0.3765563368797302, + "learning_rate": 0.0015, + "loss": 2.2138, + "step": 3591 + }, + { + "epoch": 0.3789029535864979, + "grad_norm": 0.3639881908893585, + "learning_rate": 0.0015, + "loss": 2.2531, + "step": 3592 + }, + { + "epoch": 0.3790084388185654, + "grad_norm": 0.43341460824012756, + "learning_rate": 0.0015, + "loss": 2.2847, + "step": 3593 + }, + { + "epoch": 0.37911392405063293, + "grad_norm": 0.47840026021003723, + "learning_rate": 0.0015, + "loss": 2.2496, + "step": 3594 + }, + { + "epoch": 0.3792194092827004, + "grad_norm": 0.4074309170246124, + "learning_rate": 0.0015, + "loss": 2.2623, + "step": 3595 + }, + { + "epoch": 0.3793248945147679, + "grad_norm": 0.5444462895393372, + "learning_rate": 0.0015, + "loss": 2.2175, + "step": 3596 + }, + { + "epoch": 0.37943037974683547, + "grad_norm": 0.5184288620948792, + "learning_rate": 0.0015, + "loss": 2.2626, + "step": 3597 + }, + { + "epoch": 0.37953586497890296, + "grad_norm": 0.378292053937912, + "learning_rate": 0.0015, + "loss": 2.2422, + "step": 3598 + }, + { + "epoch": 0.37964135021097045, + "grad_norm": 0.5090464353561401, + "learning_rate": 0.0015, + "loss": 2.2295, + "step": 3599 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.4340112507343292, + "learning_rate": 0.0015, + "loss": 2.2319, + "step": 3600 + }, + { + "epoch": 0.3798523206751055, + "grad_norm": 0.4298442006111145, + "learning_rate": 0.0015, + "loss": 2.2556, + "step": 3601 + }, + { + "epoch": 0.379957805907173, + "grad_norm": 0.48323896527290344, + "learning_rate": 0.0015, + "loss": 2.251, + "step": 3602 + }, + { + "epoch": 0.38006329113924053, + "grad_norm": 0.38664084672927856, + "learning_rate": 0.0015, + "loss": 2.2394, + "step": 3603 + }, + { + "epoch": 0.380168776371308, + "grad_norm": 0.44738930463790894, + "learning_rate": 0.0015, + "loss": 2.2067, + "step": 3604 + }, + { + "epoch": 0.3802742616033755, + "grad_norm": 0.38903120160102844, + "learning_rate": 0.0015, + "loss": 2.2747, + "step": 3605 + }, + { + "epoch": 0.380379746835443, + "grad_norm": 0.4782590866088867, + "learning_rate": 0.0015, + "loss": 2.238, + "step": 3606 + }, + { + "epoch": 0.38048523206751056, + "grad_norm": 0.39938098192214966, + "learning_rate": 0.0015, + "loss": 2.2089, + "step": 3607 + }, + { + "epoch": 0.38059071729957805, + "grad_norm": 0.398523211479187, + "learning_rate": 0.0015, + "loss": 2.2631, + "step": 3608 + }, + { + "epoch": 0.38069620253164554, + "grad_norm": 0.4473811089992523, + "learning_rate": 0.0015, + "loss": 2.2569, + "step": 3609 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.40535876154899597, + "learning_rate": 0.0015, + "loss": 2.2136, + "step": 3610 + }, + { + "epoch": 0.3809071729957806, + "grad_norm": 0.4407278895378113, + "learning_rate": 0.0015, + "loss": 2.2641, + "step": 3611 + }, + { + "epoch": 0.3810126582278481, + "grad_norm": 0.4150991439819336, + "learning_rate": 0.0015, + "loss": 2.2411, + "step": 3612 + }, + { + "epoch": 0.3811181434599156, + "grad_norm": 0.5932329297065735, + "learning_rate": 0.0015, + "loss": 2.258, + "step": 3613 + }, + { + "epoch": 0.3812236286919831, + "grad_norm": 0.48109471797943115, + "learning_rate": 0.0015, + "loss": 2.2135, + "step": 3614 + }, + { + "epoch": 0.3813291139240506, + "grad_norm": 0.40406379103660583, + "learning_rate": 0.0015, + "loss": 2.2314, + "step": 3615 + }, + { + "epoch": 0.38143459915611816, + "grad_norm": 0.4942953288555145, + "learning_rate": 0.0015, + "loss": 2.2416, + "step": 3616 + }, + { + "epoch": 0.38154008438818565, + "grad_norm": 0.4078257083892822, + "learning_rate": 0.0015, + "loss": 2.2819, + "step": 3617 + }, + { + "epoch": 0.38164556962025314, + "grad_norm": 0.4502702057361603, + "learning_rate": 0.0015, + "loss": 2.2702, + "step": 3618 + }, + { + "epoch": 0.3817510548523207, + "grad_norm": 0.46147873997688293, + "learning_rate": 0.0015, + "loss": 2.2706, + "step": 3619 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.42090722918510437, + "learning_rate": 0.0015, + "loss": 2.2224, + "step": 3620 + }, + { + "epoch": 0.3819620253164557, + "grad_norm": 0.40782907605171204, + "learning_rate": 0.0015, + "loss": 2.2769, + "step": 3621 + }, + { + "epoch": 0.3820675105485232, + "grad_norm": 0.3698021471500397, + "learning_rate": 0.0015, + "loss": 2.1997, + "step": 3622 + }, + { + "epoch": 0.3821729957805907, + "grad_norm": 0.3779067099094391, + "learning_rate": 0.0015, + "loss": 2.2663, + "step": 3623 + }, + { + "epoch": 0.3822784810126582, + "grad_norm": 0.37905415892601013, + "learning_rate": 0.0015, + "loss": 2.2528, + "step": 3624 + }, + { + "epoch": 0.38238396624472576, + "grad_norm": 0.36711588501930237, + "learning_rate": 0.0015, + "loss": 2.2413, + "step": 3625 + }, + { + "epoch": 0.38248945147679325, + "grad_norm": 0.3954015374183655, + "learning_rate": 0.0015, + "loss": 2.2319, + "step": 3626 + }, + { + "epoch": 0.38259493670886074, + "grad_norm": 0.3745918273925781, + "learning_rate": 0.0015, + "loss": 2.2396, + "step": 3627 + }, + { + "epoch": 0.3827004219409283, + "grad_norm": 0.3725420832633972, + "learning_rate": 0.0015, + "loss": 2.2553, + "step": 3628 + }, + { + "epoch": 0.3828059071729958, + "grad_norm": 0.38307201862335205, + "learning_rate": 0.0015, + "loss": 2.2287, + "step": 3629 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.369858980178833, + "learning_rate": 0.0015, + "loss": 2.2454, + "step": 3630 + }, + { + "epoch": 0.3830168776371308, + "grad_norm": 0.43379244208335876, + "learning_rate": 0.0015, + "loss": 2.256, + "step": 3631 + }, + { + "epoch": 0.3831223628691983, + "grad_norm": 0.37179887294769287, + "learning_rate": 0.0015, + "loss": 2.2296, + "step": 3632 + }, + { + "epoch": 0.3832278481012658, + "grad_norm": 0.3985079228878021, + "learning_rate": 0.0015, + "loss": 2.246, + "step": 3633 + }, + { + "epoch": 0.38333333333333336, + "grad_norm": 0.4416363835334778, + "learning_rate": 0.0015, + "loss": 2.2067, + "step": 3634 + }, + { + "epoch": 0.38343881856540085, + "grad_norm": 0.3296619653701782, + "learning_rate": 0.0015, + "loss": 2.2432, + "step": 3635 + }, + { + "epoch": 0.38354430379746834, + "grad_norm": 0.39497607946395874, + "learning_rate": 0.0015, + "loss": 2.2179, + "step": 3636 + }, + { + "epoch": 0.3836497890295359, + "grad_norm": 0.35021400451660156, + "learning_rate": 0.0015, + "loss": 2.3063, + "step": 3637 + }, + { + "epoch": 0.3837552742616034, + "grad_norm": 0.3690424859523773, + "learning_rate": 0.0015, + "loss": 2.2369, + "step": 3638 + }, + { + "epoch": 0.3838607594936709, + "grad_norm": 0.39422184228897095, + "learning_rate": 0.0015, + "loss": 2.2231, + "step": 3639 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.40510380268096924, + "learning_rate": 0.0015, + "loss": 2.2233, + "step": 3640 + }, + { + "epoch": 0.3840717299578059, + "grad_norm": 0.3617255389690399, + "learning_rate": 0.0015, + "loss": 2.2254, + "step": 3641 + }, + { + "epoch": 0.3841772151898734, + "grad_norm": 0.3897545039653778, + "learning_rate": 0.0015, + "loss": 2.2695, + "step": 3642 + }, + { + "epoch": 0.3842827004219409, + "grad_norm": 0.41921916604042053, + "learning_rate": 0.0015, + "loss": 2.2473, + "step": 3643 + }, + { + "epoch": 0.38438818565400845, + "grad_norm": 0.4164195656776428, + "learning_rate": 0.0015, + "loss": 2.2221, + "step": 3644 + }, + { + "epoch": 0.38449367088607594, + "grad_norm": 0.339032918214798, + "learning_rate": 0.0015, + "loss": 2.243, + "step": 3645 + }, + { + "epoch": 0.38459915611814344, + "grad_norm": 0.5070293545722961, + "learning_rate": 0.0015, + "loss": 2.2232, + "step": 3646 + }, + { + "epoch": 0.384704641350211, + "grad_norm": 0.4455765187740326, + "learning_rate": 0.0015, + "loss": 2.2382, + "step": 3647 + }, + { + "epoch": 0.3848101265822785, + "grad_norm": 0.3905869126319885, + "learning_rate": 0.0015, + "loss": 2.2644, + "step": 3648 + }, + { + "epoch": 0.38491561181434597, + "grad_norm": 0.3444572985172272, + "learning_rate": 0.0015, + "loss": 2.2609, + "step": 3649 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.4056805372238159, + "learning_rate": 0.0015, + "loss": 2.2453, + "step": 3650 + }, + { + "epoch": 0.385126582278481, + "grad_norm": 0.39660197496414185, + "learning_rate": 0.0015, + "loss": 2.2809, + "step": 3651 + }, + { + "epoch": 0.3852320675105485, + "grad_norm": 0.3517308235168457, + "learning_rate": 0.0015, + "loss": 2.2335, + "step": 3652 + }, + { + "epoch": 0.38533755274261605, + "grad_norm": 0.5414156913757324, + "learning_rate": 0.0015, + "loss": 2.2337, + "step": 3653 + }, + { + "epoch": 0.38544303797468354, + "grad_norm": 0.6521390676498413, + "learning_rate": 0.0015, + "loss": 2.2761, + "step": 3654 + }, + { + "epoch": 0.38554852320675104, + "grad_norm": 0.493118017911911, + "learning_rate": 0.0015, + "loss": 2.2482, + "step": 3655 + }, + { + "epoch": 0.3856540084388186, + "grad_norm": 0.40390047430992126, + "learning_rate": 0.0015, + "loss": 2.2223, + "step": 3656 + }, + { + "epoch": 0.3857594936708861, + "grad_norm": 0.5269008278846741, + "learning_rate": 0.0015, + "loss": 2.2065, + "step": 3657 + }, + { + "epoch": 0.38586497890295357, + "grad_norm": 0.4880521297454834, + "learning_rate": 0.0015, + "loss": 2.2207, + "step": 3658 + }, + { + "epoch": 0.3859704641350211, + "grad_norm": 0.34227681159973145, + "learning_rate": 0.0015, + "loss": 2.2455, + "step": 3659 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.42245253920555115, + "learning_rate": 0.0015, + "loss": 2.2264, + "step": 3660 + }, + { + "epoch": 0.3861814345991561, + "grad_norm": 0.5005381107330322, + "learning_rate": 0.0015, + "loss": 2.2411, + "step": 3661 + }, + { + "epoch": 0.38628691983122365, + "grad_norm": 0.3887198865413666, + "learning_rate": 0.0015, + "loss": 2.2181, + "step": 3662 + }, + { + "epoch": 0.38639240506329114, + "grad_norm": 0.3955639600753784, + "learning_rate": 0.0015, + "loss": 2.2307, + "step": 3663 + }, + { + "epoch": 0.38649789029535864, + "grad_norm": 0.4228692650794983, + "learning_rate": 0.0015, + "loss": 2.2147, + "step": 3664 + }, + { + "epoch": 0.3866033755274262, + "grad_norm": 0.4092079699039459, + "learning_rate": 0.0015, + "loss": 2.217, + "step": 3665 + }, + { + "epoch": 0.3867088607594937, + "grad_norm": 0.372229665517807, + "learning_rate": 0.0015, + "loss": 2.2292, + "step": 3666 + }, + { + "epoch": 0.38681434599156117, + "grad_norm": 0.4081123471260071, + "learning_rate": 0.0015, + "loss": 2.2577, + "step": 3667 + }, + { + "epoch": 0.3869198312236287, + "grad_norm": 0.38480016589164734, + "learning_rate": 0.0015, + "loss": 2.2658, + "step": 3668 + }, + { + "epoch": 0.3870253164556962, + "grad_norm": 0.3823778033256531, + "learning_rate": 0.0015, + "loss": 2.2149, + "step": 3669 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.4007967412471771, + "learning_rate": 0.0015, + "loss": 2.2083, + "step": 3670 + }, + { + "epoch": 0.3872362869198312, + "grad_norm": 0.38028469681739807, + "learning_rate": 0.0015, + "loss": 2.2356, + "step": 3671 + }, + { + "epoch": 0.38734177215189874, + "grad_norm": 0.3634367883205414, + "learning_rate": 0.0015, + "loss": 2.2321, + "step": 3672 + }, + { + "epoch": 0.38744725738396624, + "grad_norm": 0.3856334388256073, + "learning_rate": 0.0015, + "loss": 2.2119, + "step": 3673 + }, + { + "epoch": 0.38755274261603373, + "grad_norm": 0.3912821412086487, + "learning_rate": 0.0015, + "loss": 2.2229, + "step": 3674 + }, + { + "epoch": 0.3876582278481013, + "grad_norm": 0.38605982065200806, + "learning_rate": 0.0015, + "loss": 2.2125, + "step": 3675 + }, + { + "epoch": 0.38776371308016877, + "grad_norm": 0.3976181745529175, + "learning_rate": 0.0015, + "loss": 2.2204, + "step": 3676 + }, + { + "epoch": 0.38786919831223626, + "grad_norm": 0.3820282816886902, + "learning_rate": 0.0015, + "loss": 2.2605, + "step": 3677 + }, + { + "epoch": 0.3879746835443038, + "grad_norm": 0.3669843375682831, + "learning_rate": 0.0015, + "loss": 2.2535, + "step": 3678 + }, + { + "epoch": 0.3880801687763713, + "grad_norm": 0.36120450496673584, + "learning_rate": 0.0015, + "loss": 2.2546, + "step": 3679 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.3502272665500641, + "learning_rate": 0.0015, + "loss": 2.2017, + "step": 3680 + }, + { + "epoch": 0.38829113924050634, + "grad_norm": 0.4180753529071808, + "learning_rate": 0.0015, + "loss": 2.259, + "step": 3681 + }, + { + "epoch": 0.38839662447257384, + "grad_norm": 0.40814080834388733, + "learning_rate": 0.0015, + "loss": 2.229, + "step": 3682 + }, + { + "epoch": 0.38850210970464133, + "grad_norm": 0.4355412721633911, + "learning_rate": 0.0015, + "loss": 2.2444, + "step": 3683 + }, + { + "epoch": 0.3886075949367089, + "grad_norm": 0.36337175965309143, + "learning_rate": 0.0015, + "loss": 2.2425, + "step": 3684 + }, + { + "epoch": 0.38871308016877637, + "grad_norm": 0.4451058804988861, + "learning_rate": 0.0015, + "loss": 2.2484, + "step": 3685 + }, + { + "epoch": 0.38881856540084386, + "grad_norm": 0.3951578140258789, + "learning_rate": 0.0015, + "loss": 2.2204, + "step": 3686 + }, + { + "epoch": 0.3889240506329114, + "grad_norm": 0.44703930616378784, + "learning_rate": 0.0015, + "loss": 2.2323, + "step": 3687 + }, + { + "epoch": 0.3890295358649789, + "grad_norm": 0.48147764801979065, + "learning_rate": 0.0015, + "loss": 2.2279, + "step": 3688 + }, + { + "epoch": 0.3891350210970464, + "grad_norm": 0.370686411857605, + "learning_rate": 0.0015, + "loss": 2.258, + "step": 3689 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.45103368163108826, + "learning_rate": 0.0015, + "loss": 2.2334, + "step": 3690 + }, + { + "epoch": 0.38934599156118144, + "grad_norm": 0.4531734883785248, + "learning_rate": 0.0015, + "loss": 2.287, + "step": 3691 + }, + { + "epoch": 0.38945147679324893, + "grad_norm": 0.47291260957717896, + "learning_rate": 0.0015, + "loss": 2.2516, + "step": 3692 + }, + { + "epoch": 0.3895569620253165, + "grad_norm": 0.502051591873169, + "learning_rate": 0.0015, + "loss": 2.2109, + "step": 3693 + }, + { + "epoch": 0.38966244725738397, + "grad_norm": 0.36059385538101196, + "learning_rate": 0.0015, + "loss": 2.2454, + "step": 3694 + }, + { + "epoch": 0.38976793248945146, + "grad_norm": 0.5719208121299744, + "learning_rate": 0.0015, + "loss": 2.2496, + "step": 3695 + }, + { + "epoch": 0.389873417721519, + "grad_norm": 0.47917360067367554, + "learning_rate": 0.0015, + "loss": 2.2284, + "step": 3696 + }, + { + "epoch": 0.3899789029535865, + "grad_norm": 0.4594693183898926, + "learning_rate": 0.0015, + "loss": 2.246, + "step": 3697 + }, + { + "epoch": 0.390084388185654, + "grad_norm": 0.5052757263183594, + "learning_rate": 0.0015, + "loss": 2.2833, + "step": 3698 + }, + { + "epoch": 0.39018987341772154, + "grad_norm": 0.37332743406295776, + "learning_rate": 0.0015, + "loss": 2.1985, + "step": 3699 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.5083350539207458, + "learning_rate": 0.0015, + "loss": 2.2957, + "step": 3700 + }, + { + "epoch": 0.39040084388185653, + "grad_norm": 0.4298132061958313, + "learning_rate": 0.0015, + "loss": 2.2392, + "step": 3701 + }, + { + "epoch": 0.3905063291139241, + "grad_norm": 0.4535556137561798, + "learning_rate": 0.0015, + "loss": 2.228, + "step": 3702 + }, + { + "epoch": 0.39061181434599157, + "grad_norm": 0.46336594223976135, + "learning_rate": 0.0015, + "loss": 2.2076, + "step": 3703 + }, + { + "epoch": 0.39071729957805906, + "grad_norm": 0.43353772163391113, + "learning_rate": 0.0015, + "loss": 2.2508, + "step": 3704 + }, + { + "epoch": 0.39082278481012656, + "grad_norm": 0.4861854910850525, + "learning_rate": 0.0015, + "loss": 2.2183, + "step": 3705 + }, + { + "epoch": 0.3909282700421941, + "grad_norm": 0.4857858121395111, + "learning_rate": 0.0015, + "loss": 2.2551, + "step": 3706 + }, + { + "epoch": 0.3910337552742616, + "grad_norm": 0.48157593607902527, + "learning_rate": 0.0015, + "loss": 2.286, + "step": 3707 + }, + { + "epoch": 0.3911392405063291, + "grad_norm": 0.42506319284439087, + "learning_rate": 0.0015, + "loss": 2.2252, + "step": 3708 + }, + { + "epoch": 0.39124472573839664, + "grad_norm": 0.48369866609573364, + "learning_rate": 0.0015, + "loss": 2.2046, + "step": 3709 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.5299753546714783, + "learning_rate": 0.0015, + "loss": 2.1821, + "step": 3710 + }, + { + "epoch": 0.3914556962025316, + "grad_norm": 0.3720400333404541, + "learning_rate": 0.0015, + "loss": 2.1964, + "step": 3711 + }, + { + "epoch": 0.39156118143459917, + "grad_norm": 0.4106769561767578, + "learning_rate": 0.0015, + "loss": 2.1928, + "step": 3712 + }, + { + "epoch": 0.39166666666666666, + "grad_norm": 0.3845234513282776, + "learning_rate": 0.0015, + "loss": 2.1973, + "step": 3713 + }, + { + "epoch": 0.39177215189873416, + "grad_norm": 0.4278480112552643, + "learning_rate": 0.0015, + "loss": 2.2317, + "step": 3714 + }, + { + "epoch": 0.3918776371308017, + "grad_norm": 0.3629172742366791, + "learning_rate": 0.0015, + "loss": 2.2062, + "step": 3715 + }, + { + "epoch": 0.3919831223628692, + "grad_norm": 0.47276198863983154, + "learning_rate": 0.0015, + "loss": 2.2078, + "step": 3716 + }, + { + "epoch": 0.3920886075949367, + "grad_norm": 0.4685583710670471, + "learning_rate": 0.0015, + "loss": 2.2582, + "step": 3717 + }, + { + "epoch": 0.39219409282700424, + "grad_norm": 0.3718152642250061, + "learning_rate": 0.0015, + "loss": 2.1961, + "step": 3718 + }, + { + "epoch": 0.39229957805907173, + "grad_norm": 0.6273871064186096, + "learning_rate": 0.0015, + "loss": 2.2507, + "step": 3719 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.5691996216773987, + "learning_rate": 0.0015, + "loss": 2.2256, + "step": 3720 + }, + { + "epoch": 0.39251054852320677, + "grad_norm": 0.502264678478241, + "learning_rate": 0.0015, + "loss": 2.2268, + "step": 3721 + }, + { + "epoch": 0.39261603375527426, + "grad_norm": 0.5184580087661743, + "learning_rate": 0.0015, + "loss": 2.223, + "step": 3722 + }, + { + "epoch": 0.39272151898734176, + "grad_norm": 0.6465486884117126, + "learning_rate": 0.0015, + "loss": 2.2249, + "step": 3723 + }, + { + "epoch": 0.3928270042194093, + "grad_norm": 0.4363558292388916, + "learning_rate": 0.0015, + "loss": 2.2588, + "step": 3724 + }, + { + "epoch": 0.3929324894514768, + "grad_norm": 0.457606703042984, + "learning_rate": 0.0015, + "loss": 2.2133, + "step": 3725 + }, + { + "epoch": 0.3930379746835443, + "grad_norm": 0.49587783217430115, + "learning_rate": 0.0015, + "loss": 2.2385, + "step": 3726 + }, + { + "epoch": 0.39314345991561184, + "grad_norm": 0.3737594187259674, + "learning_rate": 0.0015, + "loss": 2.2269, + "step": 3727 + }, + { + "epoch": 0.39324894514767933, + "grad_norm": 0.5079815983772278, + "learning_rate": 0.0015, + "loss": 2.2364, + "step": 3728 + }, + { + "epoch": 0.3933544303797468, + "grad_norm": 0.43904855847358704, + "learning_rate": 0.0015, + "loss": 2.2475, + "step": 3729 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.41565096378326416, + "learning_rate": 0.0015, + "loss": 2.225, + "step": 3730 + }, + { + "epoch": 0.39356540084388186, + "grad_norm": 0.5234997272491455, + "learning_rate": 0.0015, + "loss": 2.2324, + "step": 3731 + }, + { + "epoch": 0.39367088607594936, + "grad_norm": 0.4390241801738739, + "learning_rate": 0.0015, + "loss": 2.2589, + "step": 3732 + }, + { + "epoch": 0.3937763713080169, + "grad_norm": 0.403001606464386, + "learning_rate": 0.0015, + "loss": 2.2354, + "step": 3733 + }, + { + "epoch": 0.3938818565400844, + "grad_norm": 0.4310881495475769, + "learning_rate": 0.0015, + "loss": 2.2294, + "step": 3734 + }, + { + "epoch": 0.3939873417721519, + "grad_norm": 0.4560384452342987, + "learning_rate": 0.0015, + "loss": 2.2496, + "step": 3735 + }, + { + "epoch": 0.39409282700421944, + "grad_norm": 0.44698700308799744, + "learning_rate": 0.0015, + "loss": 2.2379, + "step": 3736 + }, + { + "epoch": 0.39419831223628693, + "grad_norm": 0.4883176386356354, + "learning_rate": 0.0015, + "loss": 2.2073, + "step": 3737 + }, + { + "epoch": 0.3943037974683544, + "grad_norm": 0.48100632429122925, + "learning_rate": 0.0015, + "loss": 2.2084, + "step": 3738 + }, + { + "epoch": 0.3944092827004219, + "grad_norm": 0.3731934726238251, + "learning_rate": 0.0015, + "loss": 2.1999, + "step": 3739 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.5244914293289185, + "learning_rate": 0.0015, + "loss": 2.2309, + "step": 3740 + }, + { + "epoch": 0.39462025316455696, + "grad_norm": 0.48470228910446167, + "learning_rate": 0.0015, + "loss": 2.2478, + "step": 3741 + }, + { + "epoch": 0.39472573839662445, + "grad_norm": 0.3829101622104645, + "learning_rate": 0.0015, + "loss": 2.2298, + "step": 3742 + }, + { + "epoch": 0.394831223628692, + "grad_norm": 0.41840583086013794, + "learning_rate": 0.0015, + "loss": 2.226, + "step": 3743 + }, + { + "epoch": 0.3949367088607595, + "grad_norm": 0.3842753767967224, + "learning_rate": 0.0015, + "loss": 2.2897, + "step": 3744 + }, + { + "epoch": 0.395042194092827, + "grad_norm": 0.4292815029621124, + "learning_rate": 0.0015, + "loss": 2.2072, + "step": 3745 + }, + { + "epoch": 0.39514767932489453, + "grad_norm": 0.5336382985115051, + "learning_rate": 0.0015, + "loss": 2.2387, + "step": 3746 + }, + { + "epoch": 0.395253164556962, + "grad_norm": 0.3524492681026459, + "learning_rate": 0.0015, + "loss": 2.2497, + "step": 3747 + }, + { + "epoch": 0.3953586497890295, + "grad_norm": 0.4694857895374298, + "learning_rate": 0.0015, + "loss": 2.2211, + "step": 3748 + }, + { + "epoch": 0.39546413502109706, + "grad_norm": 0.42999398708343506, + "learning_rate": 0.0015, + "loss": 2.1864, + "step": 3749 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.38292622566223145, + "learning_rate": 0.0015, + "loss": 2.2113, + "step": 3750 + }, + { + "epoch": 0.39567510548523205, + "grad_norm": 0.49523741006851196, + "learning_rate": 0.0015, + "loss": 2.2323, + "step": 3751 + }, + { + "epoch": 0.3957805907172996, + "grad_norm": 0.4767902195453644, + "learning_rate": 0.0015, + "loss": 2.2396, + "step": 3752 + }, + { + "epoch": 0.3958860759493671, + "grad_norm": 0.39093178510665894, + "learning_rate": 0.0015, + "loss": 2.2349, + "step": 3753 + }, + { + "epoch": 0.3959915611814346, + "grad_norm": 0.5042339563369751, + "learning_rate": 0.0015, + "loss": 2.2355, + "step": 3754 + }, + { + "epoch": 0.39609704641350213, + "grad_norm": 0.40422695875167847, + "learning_rate": 0.0015, + "loss": 2.215, + "step": 3755 + }, + { + "epoch": 0.3962025316455696, + "grad_norm": 0.42472904920578003, + "learning_rate": 0.0015, + "loss": 2.2342, + "step": 3756 + }, + { + "epoch": 0.3963080168776371, + "grad_norm": 0.38626599311828613, + "learning_rate": 0.0015, + "loss": 2.2362, + "step": 3757 + }, + { + "epoch": 0.39641350210970466, + "grad_norm": 0.3602786660194397, + "learning_rate": 0.0015, + "loss": 2.2197, + "step": 3758 + }, + { + "epoch": 0.39651898734177216, + "grad_norm": 0.3964826166629791, + "learning_rate": 0.0015, + "loss": 2.1989, + "step": 3759 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.3751980662345886, + "learning_rate": 0.0015, + "loss": 2.2214, + "step": 3760 + }, + { + "epoch": 0.3967299578059072, + "grad_norm": 0.36858147382736206, + "learning_rate": 0.0015, + "loss": 2.2445, + "step": 3761 + }, + { + "epoch": 0.3968354430379747, + "grad_norm": 0.37675338983535767, + "learning_rate": 0.0015, + "loss": 2.2146, + "step": 3762 + }, + { + "epoch": 0.3969409282700422, + "grad_norm": 0.4288838803768158, + "learning_rate": 0.0015, + "loss": 2.2326, + "step": 3763 + }, + { + "epoch": 0.39704641350210973, + "grad_norm": 0.37312623858451843, + "learning_rate": 0.0015, + "loss": 2.1959, + "step": 3764 + }, + { + "epoch": 0.3971518987341772, + "grad_norm": 0.38050201535224915, + "learning_rate": 0.0015, + "loss": 2.2264, + "step": 3765 + }, + { + "epoch": 0.3972573839662447, + "grad_norm": 0.3667964041233063, + "learning_rate": 0.0015, + "loss": 2.222, + "step": 3766 + }, + { + "epoch": 0.39736286919831226, + "grad_norm": 0.3766205310821533, + "learning_rate": 0.0015, + "loss": 2.2608, + "step": 3767 + }, + { + "epoch": 0.39746835443037976, + "grad_norm": 0.35654884576797485, + "learning_rate": 0.0015, + "loss": 2.2637, + "step": 3768 + }, + { + "epoch": 0.39757383966244725, + "grad_norm": 0.3699779212474823, + "learning_rate": 0.0015, + "loss": 2.2244, + "step": 3769 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.3579014837741852, + "learning_rate": 0.0015, + "loss": 2.2221, + "step": 3770 + }, + { + "epoch": 0.3977848101265823, + "grad_norm": 0.3575526475906372, + "learning_rate": 0.0015, + "loss": 2.2073, + "step": 3771 + }, + { + "epoch": 0.3978902953586498, + "grad_norm": 0.36731207370758057, + "learning_rate": 0.0015, + "loss": 2.2178, + "step": 3772 + }, + { + "epoch": 0.3979957805907173, + "grad_norm": 0.3816739618778229, + "learning_rate": 0.0015, + "loss": 2.2266, + "step": 3773 + }, + { + "epoch": 0.3981012658227848, + "grad_norm": 0.3786596357822418, + "learning_rate": 0.0015, + "loss": 2.2379, + "step": 3774 + }, + { + "epoch": 0.3982067510548523, + "grad_norm": 0.4580654203891754, + "learning_rate": 0.0015, + "loss": 2.2075, + "step": 3775 + }, + { + "epoch": 0.3983122362869198, + "grad_norm": 0.5100055932998657, + "learning_rate": 0.0015, + "loss": 2.2147, + "step": 3776 + }, + { + "epoch": 0.39841772151898736, + "grad_norm": 0.4485255777835846, + "learning_rate": 0.0015, + "loss": 2.2017, + "step": 3777 + }, + { + "epoch": 0.39852320675105485, + "grad_norm": 0.4088060259819031, + "learning_rate": 0.0015, + "loss": 2.2455, + "step": 3778 + }, + { + "epoch": 0.39862869198312234, + "grad_norm": 0.583339273929596, + "learning_rate": 0.0015, + "loss": 2.2268, + "step": 3779 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.47858431935310364, + "learning_rate": 0.0015, + "loss": 2.1977, + "step": 3780 + }, + { + "epoch": 0.3988396624472574, + "grad_norm": 0.4496332108974457, + "learning_rate": 0.0015, + "loss": 2.2045, + "step": 3781 + }, + { + "epoch": 0.3989451476793249, + "grad_norm": 0.5671302676200867, + "learning_rate": 0.0015, + "loss": 2.2229, + "step": 3782 + }, + { + "epoch": 0.3990506329113924, + "grad_norm": 0.3979887068271637, + "learning_rate": 0.0015, + "loss": 2.23, + "step": 3783 + }, + { + "epoch": 0.3991561181434599, + "grad_norm": 0.42422643303871155, + "learning_rate": 0.0015, + "loss": 2.2116, + "step": 3784 + }, + { + "epoch": 0.3992616033755274, + "grad_norm": 0.4017632305622101, + "learning_rate": 0.0015, + "loss": 2.2618, + "step": 3785 + }, + { + "epoch": 0.39936708860759496, + "grad_norm": 0.3812304437160492, + "learning_rate": 0.0015, + "loss": 2.2557, + "step": 3786 + }, + { + "epoch": 0.39947257383966245, + "grad_norm": 0.3994193375110626, + "learning_rate": 0.0015, + "loss": 2.1925, + "step": 3787 + }, + { + "epoch": 0.39957805907172994, + "grad_norm": 0.40743017196655273, + "learning_rate": 0.0015, + "loss": 2.239, + "step": 3788 + }, + { + "epoch": 0.3996835443037975, + "grad_norm": 0.42304110527038574, + "learning_rate": 0.0015, + "loss": 2.209, + "step": 3789 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.34740471839904785, + "learning_rate": 0.0015, + "loss": 2.2083, + "step": 3790 + }, + { + "epoch": 0.3998945147679325, + "grad_norm": 0.40840253233909607, + "learning_rate": 0.0015, + "loss": 2.2115, + "step": 3791 + }, + { + "epoch": 0.4, + "grad_norm": 0.3351449966430664, + "learning_rate": 0.0015, + "loss": 2.1981, + "step": 3792 + }, + { + "epoch": 0.4001054852320675, + "grad_norm": 0.39813634753227234, + "learning_rate": 0.0015, + "loss": 2.2525, + "step": 3793 + }, + { + "epoch": 0.400210970464135, + "grad_norm": 0.35810524225234985, + "learning_rate": 0.0015, + "loss": 2.229, + "step": 3794 + }, + { + "epoch": 0.40031645569620256, + "grad_norm": 0.3880290985107422, + "learning_rate": 0.0015, + "loss": 2.2363, + "step": 3795 + }, + { + "epoch": 0.40042194092827005, + "grad_norm": 0.3651406466960907, + "learning_rate": 0.0015, + "loss": 2.2645, + "step": 3796 + }, + { + "epoch": 0.40052742616033754, + "grad_norm": 0.35373181104660034, + "learning_rate": 0.0015, + "loss": 2.211, + "step": 3797 + }, + { + "epoch": 0.4006329113924051, + "grad_norm": 0.40690967440605164, + "learning_rate": 0.0015, + "loss": 2.2464, + "step": 3798 + }, + { + "epoch": 0.4007383966244726, + "grad_norm": 0.406516969203949, + "learning_rate": 0.0015, + "loss": 2.1762, + "step": 3799 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.39239391684532166, + "learning_rate": 0.0015, + "loss": 2.2137, + "step": 3800 + }, + { + "epoch": 0.4009493670886076, + "grad_norm": 0.4261245131492615, + "learning_rate": 0.0015, + "loss": 2.1849, + "step": 3801 + }, + { + "epoch": 0.4010548523206751, + "grad_norm": 0.4522721767425537, + "learning_rate": 0.0015, + "loss": 2.1941, + "step": 3802 + }, + { + "epoch": 0.4011603375527426, + "grad_norm": 0.42487195134162903, + "learning_rate": 0.0015, + "loss": 2.2591, + "step": 3803 + }, + { + "epoch": 0.4012658227848101, + "grad_norm": 0.3584560453891754, + "learning_rate": 0.0015, + "loss": 2.2417, + "step": 3804 + }, + { + "epoch": 0.40137130801687765, + "grad_norm": 0.36932942271232605, + "learning_rate": 0.0015, + "loss": 2.1891, + "step": 3805 + }, + { + "epoch": 0.40147679324894514, + "grad_norm": 0.4123816192150116, + "learning_rate": 0.0015, + "loss": 2.1743, + "step": 3806 + }, + { + "epoch": 0.40158227848101263, + "grad_norm": 0.3215305507183075, + "learning_rate": 0.0015, + "loss": 2.1963, + "step": 3807 + }, + { + "epoch": 0.4016877637130802, + "grad_norm": 0.42769691348075867, + "learning_rate": 0.0015, + "loss": 2.2163, + "step": 3808 + }, + { + "epoch": 0.4017932489451477, + "grad_norm": 0.3901493549346924, + "learning_rate": 0.0015, + "loss": 2.2309, + "step": 3809 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.42822906374931335, + "learning_rate": 0.0015, + "loss": 2.2402, + "step": 3810 + }, + { + "epoch": 0.4020042194092827, + "grad_norm": 0.5289933085441589, + "learning_rate": 0.0015, + "loss": 2.2041, + "step": 3811 + }, + { + "epoch": 0.4021097046413502, + "grad_norm": 0.4178166091442108, + "learning_rate": 0.0015, + "loss": 2.2146, + "step": 3812 + }, + { + "epoch": 0.4022151898734177, + "grad_norm": 0.39929911494255066, + "learning_rate": 0.0015, + "loss": 2.1931, + "step": 3813 + }, + { + "epoch": 0.40232067510548525, + "grad_norm": 0.480050653219223, + "learning_rate": 0.0015, + "loss": 2.2018, + "step": 3814 + }, + { + "epoch": 0.40242616033755274, + "grad_norm": 0.3818921446800232, + "learning_rate": 0.0015, + "loss": 2.224, + "step": 3815 + }, + { + "epoch": 0.40253164556962023, + "grad_norm": 0.4308498501777649, + "learning_rate": 0.0015, + "loss": 2.2091, + "step": 3816 + }, + { + "epoch": 0.4026371308016878, + "grad_norm": 0.49613556265830994, + "learning_rate": 0.0015, + "loss": 2.1917, + "step": 3817 + }, + { + "epoch": 0.4027426160337553, + "grad_norm": 0.3991309702396393, + "learning_rate": 0.0015, + "loss": 2.1921, + "step": 3818 + }, + { + "epoch": 0.40284810126582277, + "grad_norm": 0.3875136375427246, + "learning_rate": 0.0015, + "loss": 2.2373, + "step": 3819 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.4296235740184784, + "learning_rate": 0.0015, + "loss": 2.1766, + "step": 3820 + }, + { + "epoch": 0.4030590717299578, + "grad_norm": 0.4313364028930664, + "learning_rate": 0.0015, + "loss": 2.2122, + "step": 3821 + }, + { + "epoch": 0.4031645569620253, + "grad_norm": 0.44212251901626587, + "learning_rate": 0.0015, + "loss": 2.166, + "step": 3822 + }, + { + "epoch": 0.40327004219409285, + "grad_norm": 0.5144267678260803, + "learning_rate": 0.0015, + "loss": 2.2153, + "step": 3823 + }, + { + "epoch": 0.40337552742616034, + "grad_norm": 0.43220630288124084, + "learning_rate": 0.0015, + "loss": 2.2167, + "step": 3824 + }, + { + "epoch": 0.40348101265822783, + "grad_norm": 0.5008938312530518, + "learning_rate": 0.0015, + "loss": 2.2196, + "step": 3825 + }, + { + "epoch": 0.4035864978902954, + "grad_norm": 0.5035905838012695, + "learning_rate": 0.0015, + "loss": 2.2283, + "step": 3826 + }, + { + "epoch": 0.4036919831223629, + "grad_norm": 0.35392504930496216, + "learning_rate": 0.0015, + "loss": 2.2112, + "step": 3827 + }, + { + "epoch": 0.40379746835443037, + "grad_norm": 0.5169402360916138, + "learning_rate": 0.0015, + "loss": 2.2022, + "step": 3828 + }, + { + "epoch": 0.4039029535864979, + "grad_norm": 0.44294649362564087, + "learning_rate": 0.0015, + "loss": 2.1861, + "step": 3829 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.3918903172016144, + "learning_rate": 0.0015, + "loss": 2.1913, + "step": 3830 + }, + { + "epoch": 0.4041139240506329, + "grad_norm": 0.4181306064128876, + "learning_rate": 0.0015, + "loss": 2.2085, + "step": 3831 + }, + { + "epoch": 0.40421940928270045, + "grad_norm": 0.4784049391746521, + "learning_rate": 0.0015, + "loss": 2.2268, + "step": 3832 + }, + { + "epoch": 0.40432489451476794, + "grad_norm": 0.3494509160518646, + "learning_rate": 0.0015, + "loss": 2.1975, + "step": 3833 + }, + { + "epoch": 0.40443037974683543, + "grad_norm": 0.4354662299156189, + "learning_rate": 0.0015, + "loss": 2.2032, + "step": 3834 + }, + { + "epoch": 0.4045358649789029, + "grad_norm": 0.3652125895023346, + "learning_rate": 0.0015, + "loss": 2.2057, + "step": 3835 + }, + { + "epoch": 0.4046413502109705, + "grad_norm": 0.41151949763298035, + "learning_rate": 0.0015, + "loss": 2.2254, + "step": 3836 + }, + { + "epoch": 0.40474683544303797, + "grad_norm": 0.43047064542770386, + "learning_rate": 0.0015, + "loss": 2.2081, + "step": 3837 + }, + { + "epoch": 0.40485232067510546, + "grad_norm": 0.35809412598609924, + "learning_rate": 0.0015, + "loss": 2.2268, + "step": 3838 + }, + { + "epoch": 0.404957805907173, + "grad_norm": 0.4212457835674286, + "learning_rate": 0.0015, + "loss": 2.2606, + "step": 3839 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.3913457691669464, + "learning_rate": 0.0015, + "loss": 2.2352, + "step": 3840 + }, + { + "epoch": 0.405168776371308, + "grad_norm": 0.41688042879104614, + "learning_rate": 0.0015, + "loss": 2.2254, + "step": 3841 + }, + { + "epoch": 0.40527426160337554, + "grad_norm": 0.4290103316307068, + "learning_rate": 0.0015, + "loss": 2.2036, + "step": 3842 + }, + { + "epoch": 0.40537974683544303, + "grad_norm": 0.4017077088356018, + "learning_rate": 0.0015, + "loss": 2.2064, + "step": 3843 + }, + { + "epoch": 0.4054852320675105, + "grad_norm": 0.39683783054351807, + "learning_rate": 0.0015, + "loss": 2.2048, + "step": 3844 + }, + { + "epoch": 0.4055907172995781, + "grad_norm": 0.3884183466434479, + "learning_rate": 0.0015, + "loss": 2.2358, + "step": 3845 + }, + { + "epoch": 0.40569620253164557, + "grad_norm": 0.3879995346069336, + "learning_rate": 0.0015, + "loss": 2.1951, + "step": 3846 + }, + { + "epoch": 0.40580168776371306, + "grad_norm": 0.3773990273475647, + "learning_rate": 0.0015, + "loss": 2.18, + "step": 3847 + }, + { + "epoch": 0.4059071729957806, + "grad_norm": 0.3869315981864929, + "learning_rate": 0.0015, + "loss": 2.2117, + "step": 3848 + }, + { + "epoch": 0.4060126582278481, + "grad_norm": 0.37321239709854126, + "learning_rate": 0.0015, + "loss": 2.1983, + "step": 3849 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.39391371607780457, + "learning_rate": 0.0015, + "loss": 2.1967, + "step": 3850 + }, + { + "epoch": 0.40622362869198314, + "grad_norm": 0.38085445761680603, + "learning_rate": 0.0015, + "loss": 2.2577, + "step": 3851 + }, + { + "epoch": 0.40632911392405063, + "grad_norm": 0.39738813042640686, + "learning_rate": 0.0015, + "loss": 2.2264, + "step": 3852 + }, + { + "epoch": 0.4064345991561181, + "grad_norm": 0.40257975459098816, + "learning_rate": 0.0015, + "loss": 2.2345, + "step": 3853 + }, + { + "epoch": 0.4065400843881857, + "grad_norm": 0.364960253238678, + "learning_rate": 0.0015, + "loss": 2.2248, + "step": 3854 + }, + { + "epoch": 0.40664556962025317, + "grad_norm": 0.4040592908859253, + "learning_rate": 0.0015, + "loss": 2.238, + "step": 3855 + }, + { + "epoch": 0.40675105485232066, + "grad_norm": 0.4107151925563812, + "learning_rate": 0.0015, + "loss": 2.2536, + "step": 3856 + }, + { + "epoch": 0.4068565400843882, + "grad_norm": 0.3970092833042145, + "learning_rate": 0.0015, + "loss": 2.2024, + "step": 3857 + }, + { + "epoch": 0.4069620253164557, + "grad_norm": 0.38208889961242676, + "learning_rate": 0.0015, + "loss": 2.2206, + "step": 3858 + }, + { + "epoch": 0.4070675105485232, + "grad_norm": 0.3948633372783661, + "learning_rate": 0.0015, + "loss": 2.2039, + "step": 3859 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.4279940724372864, + "learning_rate": 0.0015, + "loss": 2.1896, + "step": 3860 + }, + { + "epoch": 0.40727848101265823, + "grad_norm": 0.4384349286556244, + "learning_rate": 0.0015, + "loss": 2.198, + "step": 3861 + }, + { + "epoch": 0.4073839662447257, + "grad_norm": 0.3675940930843353, + "learning_rate": 0.0015, + "loss": 2.2018, + "step": 3862 + }, + { + "epoch": 0.4074894514767933, + "grad_norm": 0.3540358543395996, + "learning_rate": 0.0015, + "loss": 2.2248, + "step": 3863 + }, + { + "epoch": 0.40759493670886077, + "grad_norm": 0.3667376935482025, + "learning_rate": 0.0015, + "loss": 2.1817, + "step": 3864 + }, + { + "epoch": 0.40770042194092826, + "grad_norm": 0.3719305992126465, + "learning_rate": 0.0015, + "loss": 2.2028, + "step": 3865 + }, + { + "epoch": 0.4078059071729958, + "grad_norm": 0.4065187871456146, + "learning_rate": 0.0015, + "loss": 2.2238, + "step": 3866 + }, + { + "epoch": 0.4079113924050633, + "grad_norm": 0.4817030429840088, + "learning_rate": 0.0015, + "loss": 2.211, + "step": 3867 + }, + { + "epoch": 0.4080168776371308, + "grad_norm": 0.4229491949081421, + "learning_rate": 0.0015, + "loss": 2.1813, + "step": 3868 + }, + { + "epoch": 0.4081223628691983, + "grad_norm": 0.4566723108291626, + "learning_rate": 0.0015, + "loss": 2.2271, + "step": 3869 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.4807823896408081, + "learning_rate": 0.0015, + "loss": 2.2194, + "step": 3870 + }, + { + "epoch": 0.4083333333333333, + "grad_norm": 0.4109219014644623, + "learning_rate": 0.0015, + "loss": 2.1828, + "step": 3871 + }, + { + "epoch": 0.4084388185654008, + "grad_norm": 0.4462801218032837, + "learning_rate": 0.0015, + "loss": 2.2596, + "step": 3872 + }, + { + "epoch": 0.40854430379746837, + "grad_norm": 0.5492852926254272, + "learning_rate": 0.0015, + "loss": 2.2167, + "step": 3873 + }, + { + "epoch": 0.40864978902953586, + "grad_norm": 0.4629358649253845, + "learning_rate": 0.0015, + "loss": 2.218, + "step": 3874 + }, + { + "epoch": 0.40875527426160335, + "grad_norm": 0.40620023012161255, + "learning_rate": 0.0015, + "loss": 2.2013, + "step": 3875 + }, + { + "epoch": 0.4088607594936709, + "grad_norm": 0.488741934299469, + "learning_rate": 0.0015, + "loss": 2.189, + "step": 3876 + }, + { + "epoch": 0.4089662447257384, + "grad_norm": 0.3957139551639557, + "learning_rate": 0.0015, + "loss": 2.1812, + "step": 3877 + }, + { + "epoch": 0.4090717299578059, + "grad_norm": 0.3874252736568451, + "learning_rate": 0.0015, + "loss": 2.1631, + "step": 3878 + }, + { + "epoch": 0.40917721518987343, + "grad_norm": 0.4932963252067566, + "learning_rate": 0.0015, + "loss": 2.2104, + "step": 3879 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.3693149983882904, + "learning_rate": 0.0015, + "loss": 2.1606, + "step": 3880 + }, + { + "epoch": 0.4093881856540084, + "grad_norm": 0.4175870716571808, + "learning_rate": 0.0015, + "loss": 2.2149, + "step": 3881 + }, + { + "epoch": 0.40949367088607597, + "grad_norm": 0.366446852684021, + "learning_rate": 0.0015, + "loss": 2.2036, + "step": 3882 + }, + { + "epoch": 0.40959915611814346, + "grad_norm": 0.4204014241695404, + "learning_rate": 0.0015, + "loss": 2.1895, + "step": 3883 + }, + { + "epoch": 0.40970464135021095, + "grad_norm": 0.44405174255371094, + "learning_rate": 0.0015, + "loss": 2.1885, + "step": 3884 + }, + { + "epoch": 0.4098101265822785, + "grad_norm": 0.39356327056884766, + "learning_rate": 0.0015, + "loss": 2.1847, + "step": 3885 + }, + { + "epoch": 0.409915611814346, + "grad_norm": 0.4243465065956116, + "learning_rate": 0.0015, + "loss": 2.2002, + "step": 3886 + }, + { + "epoch": 0.4100210970464135, + "grad_norm": 0.3558335304260254, + "learning_rate": 0.0015, + "loss": 2.2153, + "step": 3887 + }, + { + "epoch": 0.41012658227848103, + "grad_norm": 0.386326402425766, + "learning_rate": 0.0015, + "loss": 2.2079, + "step": 3888 + }, + { + "epoch": 0.4102320675105485, + "grad_norm": 0.38505589962005615, + "learning_rate": 0.0015, + "loss": 2.1808, + "step": 3889 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.3998761773109436, + "learning_rate": 0.0015, + "loss": 2.1833, + "step": 3890 + }, + { + "epoch": 0.41044303797468357, + "grad_norm": 0.38191860914230347, + "learning_rate": 0.0015, + "loss": 2.1693, + "step": 3891 + }, + { + "epoch": 0.41054852320675106, + "grad_norm": 0.3897309899330139, + "learning_rate": 0.0015, + "loss": 2.2261, + "step": 3892 + }, + { + "epoch": 0.41065400843881855, + "grad_norm": 0.4735731780529022, + "learning_rate": 0.0015, + "loss": 2.2282, + "step": 3893 + }, + { + "epoch": 0.4107594936708861, + "grad_norm": 0.45027872920036316, + "learning_rate": 0.0015, + "loss": 2.2171, + "step": 3894 + }, + { + "epoch": 0.4108649789029536, + "grad_norm": 0.3872361183166504, + "learning_rate": 0.0015, + "loss": 2.2063, + "step": 3895 + }, + { + "epoch": 0.4109704641350211, + "grad_norm": 0.44448792934417725, + "learning_rate": 0.0015, + "loss": 2.1732, + "step": 3896 + }, + { + "epoch": 0.41107594936708863, + "grad_norm": 0.3881934881210327, + "learning_rate": 0.0015, + "loss": 2.188, + "step": 3897 + }, + { + "epoch": 0.4111814345991561, + "grad_norm": 0.43452608585357666, + "learning_rate": 0.0015, + "loss": 2.1798, + "step": 3898 + }, + { + "epoch": 0.4112869198312236, + "grad_norm": 0.3892233669757843, + "learning_rate": 0.0015, + "loss": 2.1711, + "step": 3899 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.370259553194046, + "learning_rate": 0.0015, + "loss": 2.2218, + "step": 3900 + }, + { + "epoch": 0.41149789029535866, + "grad_norm": 0.4103665053844452, + "learning_rate": 0.0015, + "loss": 2.184, + "step": 3901 + }, + { + "epoch": 0.41160337552742615, + "grad_norm": 0.36754587292671204, + "learning_rate": 0.0015, + "loss": 2.2325, + "step": 3902 + }, + { + "epoch": 0.41170886075949364, + "grad_norm": 0.3499051034450531, + "learning_rate": 0.0015, + "loss": 2.2001, + "step": 3903 + }, + { + "epoch": 0.4118143459915612, + "grad_norm": 0.4144093990325928, + "learning_rate": 0.0015, + "loss": 2.1882, + "step": 3904 + }, + { + "epoch": 0.4119198312236287, + "grad_norm": 0.33557072281837463, + "learning_rate": 0.0015, + "loss": 2.1883, + "step": 3905 + }, + { + "epoch": 0.4120253164556962, + "grad_norm": 0.38060659170150757, + "learning_rate": 0.0015, + "loss": 2.201, + "step": 3906 + }, + { + "epoch": 0.4121308016877637, + "grad_norm": 0.3384730815887451, + "learning_rate": 0.0015, + "loss": 2.1551, + "step": 3907 + }, + { + "epoch": 0.4122362869198312, + "grad_norm": 0.45019224286079407, + "learning_rate": 0.0015, + "loss": 2.1946, + "step": 3908 + }, + { + "epoch": 0.4123417721518987, + "grad_norm": 0.43915703892707825, + "learning_rate": 0.0015, + "loss": 2.1708, + "step": 3909 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.41329091787338257, + "learning_rate": 0.0015, + "loss": 2.2428, + "step": 3910 + }, + { + "epoch": 0.41255274261603375, + "grad_norm": 0.4476642608642578, + "learning_rate": 0.0015, + "loss": 2.2193, + "step": 3911 + }, + { + "epoch": 0.41265822784810124, + "grad_norm": 0.37301838397979736, + "learning_rate": 0.0015, + "loss": 2.2258, + "step": 3912 + }, + { + "epoch": 0.4127637130801688, + "grad_norm": 0.4124680757522583, + "learning_rate": 0.0015, + "loss": 2.2212, + "step": 3913 + }, + { + "epoch": 0.4128691983122363, + "grad_norm": 0.4643934667110443, + "learning_rate": 0.0015, + "loss": 2.1473, + "step": 3914 + }, + { + "epoch": 0.4129746835443038, + "grad_norm": 0.4440596103668213, + "learning_rate": 0.0015, + "loss": 2.1963, + "step": 3915 + }, + { + "epoch": 0.4130801687763713, + "grad_norm": 0.4357234835624695, + "learning_rate": 0.0015, + "loss": 2.1879, + "step": 3916 + }, + { + "epoch": 0.4131856540084388, + "grad_norm": 0.4586533308029175, + "learning_rate": 0.0015, + "loss": 2.1737, + "step": 3917 + }, + { + "epoch": 0.4132911392405063, + "grad_norm": 0.46765103936195374, + "learning_rate": 0.0015, + "loss": 2.2085, + "step": 3918 + }, + { + "epoch": 0.41339662447257386, + "grad_norm": 0.39478179812431335, + "learning_rate": 0.0015, + "loss": 2.184, + "step": 3919 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.4935877323150635, + "learning_rate": 0.0015, + "loss": 2.2214, + "step": 3920 + }, + { + "epoch": 0.41360759493670884, + "grad_norm": 0.44667690992355347, + "learning_rate": 0.0015, + "loss": 2.1952, + "step": 3921 + }, + { + "epoch": 0.4137130801687764, + "grad_norm": 0.41009122133255005, + "learning_rate": 0.0015, + "loss": 2.2235, + "step": 3922 + }, + { + "epoch": 0.4138185654008439, + "grad_norm": 0.48189112544059753, + "learning_rate": 0.0015, + "loss": 2.2018, + "step": 3923 + }, + { + "epoch": 0.4139240506329114, + "grad_norm": 0.3616495728492737, + "learning_rate": 0.0015, + "loss": 2.1912, + "step": 3924 + }, + { + "epoch": 0.4140295358649789, + "grad_norm": 0.438299298286438, + "learning_rate": 0.0015, + "loss": 2.1486, + "step": 3925 + }, + { + "epoch": 0.4141350210970464, + "grad_norm": 0.3888595700263977, + "learning_rate": 0.0015, + "loss": 2.1995, + "step": 3926 + }, + { + "epoch": 0.4142405063291139, + "grad_norm": 0.36905616521835327, + "learning_rate": 0.0015, + "loss": 2.1982, + "step": 3927 + }, + { + "epoch": 0.41434599156118146, + "grad_norm": 0.39376381039619446, + "learning_rate": 0.0015, + "loss": 2.2115, + "step": 3928 + }, + { + "epoch": 0.41445147679324895, + "grad_norm": 0.37106770277023315, + "learning_rate": 0.0015, + "loss": 2.2059, + "step": 3929 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.3912833631038666, + "learning_rate": 0.0015, + "loss": 2.2035, + "step": 3930 + }, + { + "epoch": 0.414662447257384, + "grad_norm": 0.3921981453895569, + "learning_rate": 0.0015, + "loss": 2.1561, + "step": 3931 + }, + { + "epoch": 0.4147679324894515, + "grad_norm": 0.3936716318130493, + "learning_rate": 0.0015, + "loss": 2.1917, + "step": 3932 + }, + { + "epoch": 0.414873417721519, + "grad_norm": 0.38278448581695557, + "learning_rate": 0.0015, + "loss": 2.1976, + "step": 3933 + }, + { + "epoch": 0.41497890295358647, + "grad_norm": 0.36213359236717224, + "learning_rate": 0.0015, + "loss": 2.1937, + "step": 3934 + }, + { + "epoch": 0.415084388185654, + "grad_norm": 0.4165155589580536, + "learning_rate": 0.0015, + "loss": 2.2086, + "step": 3935 + }, + { + "epoch": 0.4151898734177215, + "grad_norm": 0.3701380789279938, + "learning_rate": 0.0015, + "loss": 2.1835, + "step": 3936 + }, + { + "epoch": 0.415295358649789, + "grad_norm": 0.41745173931121826, + "learning_rate": 0.0015, + "loss": 2.2116, + "step": 3937 + }, + { + "epoch": 0.41540084388185655, + "grad_norm": 0.36424514651298523, + "learning_rate": 0.0015, + "loss": 2.1605, + "step": 3938 + }, + { + "epoch": 0.41550632911392404, + "grad_norm": 0.4006170332431793, + "learning_rate": 0.0015, + "loss": 2.2146, + "step": 3939 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.4263356029987335, + "learning_rate": 0.0015, + "loss": 2.2084, + "step": 3940 + }, + { + "epoch": 0.4157172995780591, + "grad_norm": 0.3593056797981262, + "learning_rate": 0.0015, + "loss": 2.1975, + "step": 3941 + }, + { + "epoch": 0.4158227848101266, + "grad_norm": 0.36087146401405334, + "learning_rate": 0.0015, + "loss": 2.2125, + "step": 3942 + }, + { + "epoch": 0.41592827004219407, + "grad_norm": 0.3814224898815155, + "learning_rate": 0.0015, + "loss": 2.1961, + "step": 3943 + }, + { + "epoch": 0.4160337552742616, + "grad_norm": 0.33020615577697754, + "learning_rate": 0.0015, + "loss": 2.2196, + "step": 3944 + }, + { + "epoch": 0.4161392405063291, + "grad_norm": 0.39744871854782104, + "learning_rate": 0.0015, + "loss": 2.1869, + "step": 3945 + }, + { + "epoch": 0.4162447257383966, + "grad_norm": 0.34757885336875916, + "learning_rate": 0.0015, + "loss": 2.1816, + "step": 3946 + }, + { + "epoch": 0.41635021097046415, + "grad_norm": 0.33559584617614746, + "learning_rate": 0.0015, + "loss": 2.1986, + "step": 3947 + }, + { + "epoch": 0.41645569620253164, + "grad_norm": 0.3315998911857605, + "learning_rate": 0.0015, + "loss": 2.2013, + "step": 3948 + }, + { + "epoch": 0.41656118143459914, + "grad_norm": 0.3618162274360657, + "learning_rate": 0.0015, + "loss": 2.1833, + "step": 3949 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.3283132314682007, + "learning_rate": 0.0015, + "loss": 2.1867, + "step": 3950 + }, + { + "epoch": 0.4167721518987342, + "grad_norm": 0.3774600028991699, + "learning_rate": 0.0015, + "loss": 2.2056, + "step": 3951 + }, + { + "epoch": 0.41687763713080167, + "grad_norm": 0.34798070788383484, + "learning_rate": 0.0015, + "loss": 2.1962, + "step": 3952 + }, + { + "epoch": 0.4169831223628692, + "grad_norm": 0.386250764131546, + "learning_rate": 0.0015, + "loss": 2.2149, + "step": 3953 + }, + { + "epoch": 0.4170886075949367, + "grad_norm": 0.3519758880138397, + "learning_rate": 0.0015, + "loss": 2.2012, + "step": 3954 + }, + { + "epoch": 0.4171940928270042, + "grad_norm": 0.3738254904747009, + "learning_rate": 0.0015, + "loss": 2.1922, + "step": 3955 + }, + { + "epoch": 0.41729957805907175, + "grad_norm": 0.41641995310783386, + "learning_rate": 0.0015, + "loss": 2.1816, + "step": 3956 + }, + { + "epoch": 0.41740506329113924, + "grad_norm": 0.3560936152935028, + "learning_rate": 0.0015, + "loss": 2.1984, + "step": 3957 + }, + { + "epoch": 0.41751054852320674, + "grad_norm": 0.38599705696105957, + "learning_rate": 0.0015, + "loss": 2.2224, + "step": 3958 + }, + { + "epoch": 0.4176160337552743, + "grad_norm": 0.38618093729019165, + "learning_rate": 0.0015, + "loss": 2.1581, + "step": 3959 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.37357139587402344, + "learning_rate": 0.0015, + "loss": 2.2057, + "step": 3960 + }, + { + "epoch": 0.41782700421940927, + "grad_norm": 0.404980331659317, + "learning_rate": 0.0015, + "loss": 2.1704, + "step": 3961 + }, + { + "epoch": 0.4179324894514768, + "grad_norm": 0.4297816753387451, + "learning_rate": 0.0015, + "loss": 2.1848, + "step": 3962 + }, + { + "epoch": 0.4180379746835443, + "grad_norm": 0.3677355647087097, + "learning_rate": 0.0015, + "loss": 2.1781, + "step": 3963 + }, + { + "epoch": 0.4181434599156118, + "grad_norm": 0.38341450691223145, + "learning_rate": 0.0015, + "loss": 2.2031, + "step": 3964 + }, + { + "epoch": 0.41824894514767935, + "grad_norm": 0.43679922819137573, + "learning_rate": 0.0015, + "loss": 2.2124, + "step": 3965 + }, + { + "epoch": 0.41835443037974684, + "grad_norm": 0.4077657461166382, + "learning_rate": 0.0015, + "loss": 2.1689, + "step": 3966 + }, + { + "epoch": 0.41845991561181434, + "grad_norm": 0.3903958201408386, + "learning_rate": 0.0015, + "loss": 2.194, + "step": 3967 + }, + { + "epoch": 0.41856540084388183, + "grad_norm": 0.378981351852417, + "learning_rate": 0.0015, + "loss": 2.1949, + "step": 3968 + }, + { + "epoch": 0.4186708860759494, + "grad_norm": 0.39318740367889404, + "learning_rate": 0.0015, + "loss": 2.1982, + "step": 3969 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.44557270407676697, + "learning_rate": 0.0015, + "loss": 2.1767, + "step": 3970 + }, + { + "epoch": 0.41888185654008436, + "grad_norm": 0.38401105999946594, + "learning_rate": 0.0015, + "loss": 2.1996, + "step": 3971 + }, + { + "epoch": 0.4189873417721519, + "grad_norm": 0.40039366483688354, + "learning_rate": 0.0015, + "loss": 2.1859, + "step": 3972 + }, + { + "epoch": 0.4190928270042194, + "grad_norm": 0.48922812938690186, + "learning_rate": 0.0015, + "loss": 2.218, + "step": 3973 + }, + { + "epoch": 0.4191983122362869, + "grad_norm": 0.4407954514026642, + "learning_rate": 0.0015, + "loss": 2.1905, + "step": 3974 + }, + { + "epoch": 0.41930379746835444, + "grad_norm": 0.3963122069835663, + "learning_rate": 0.0015, + "loss": 2.2006, + "step": 3975 + }, + { + "epoch": 0.41940928270042194, + "grad_norm": 0.4414304196834564, + "learning_rate": 0.0015, + "loss": 2.1773, + "step": 3976 + }, + { + "epoch": 0.41951476793248943, + "grad_norm": 0.4413270056247711, + "learning_rate": 0.0015, + "loss": 2.2026, + "step": 3977 + }, + { + "epoch": 0.419620253164557, + "grad_norm": 0.3964082598686218, + "learning_rate": 0.0015, + "loss": 2.155, + "step": 3978 + }, + { + "epoch": 0.41972573839662447, + "grad_norm": 0.404108464717865, + "learning_rate": 0.0015, + "loss": 2.2011, + "step": 3979 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.4288543462753296, + "learning_rate": 0.0015, + "loss": 2.215, + "step": 3980 + }, + { + "epoch": 0.4199367088607595, + "grad_norm": 0.3606318533420563, + "learning_rate": 0.0015, + "loss": 2.1817, + "step": 3981 + }, + { + "epoch": 0.420042194092827, + "grad_norm": 0.38253888487815857, + "learning_rate": 0.0015, + "loss": 2.1955, + "step": 3982 + }, + { + "epoch": 0.4201476793248945, + "grad_norm": 0.3503696620464325, + "learning_rate": 0.0015, + "loss": 2.1953, + "step": 3983 + }, + { + "epoch": 0.42025316455696204, + "grad_norm": 0.37260469794273376, + "learning_rate": 0.0015, + "loss": 2.2065, + "step": 3984 + }, + { + "epoch": 0.42035864978902954, + "grad_norm": 0.40082836151123047, + "learning_rate": 0.0015, + "loss": 2.1674, + "step": 3985 + }, + { + "epoch": 0.42046413502109703, + "grad_norm": 0.39629340171813965, + "learning_rate": 0.0015, + "loss": 2.1658, + "step": 3986 + }, + { + "epoch": 0.4205696202531646, + "grad_norm": 0.40786290168762207, + "learning_rate": 0.0015, + "loss": 2.1631, + "step": 3987 + }, + { + "epoch": 0.42067510548523207, + "grad_norm": 0.35053402185440063, + "learning_rate": 0.0015, + "loss": 2.2103, + "step": 3988 + }, + { + "epoch": 0.42078059071729956, + "grad_norm": 0.3839280903339386, + "learning_rate": 0.0015, + "loss": 2.1919, + "step": 3989 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.35137900710105896, + "learning_rate": 0.0015, + "loss": 2.1897, + "step": 3990 + }, + { + "epoch": 0.4209915611814346, + "grad_norm": 0.38802236318588257, + "learning_rate": 0.0015, + "loss": 2.1827, + "step": 3991 + }, + { + "epoch": 0.4210970464135021, + "grad_norm": 0.3792721629142761, + "learning_rate": 0.0015, + "loss": 2.2063, + "step": 3992 + }, + { + "epoch": 0.42120253164556964, + "grad_norm": 0.38886046409606934, + "learning_rate": 0.0015, + "loss": 2.1794, + "step": 3993 + }, + { + "epoch": 0.42130801687763714, + "grad_norm": 0.43524065613746643, + "learning_rate": 0.0015, + "loss": 2.2043, + "step": 3994 + }, + { + "epoch": 0.42141350210970463, + "grad_norm": 0.44078660011291504, + "learning_rate": 0.0015, + "loss": 2.2022, + "step": 3995 + }, + { + "epoch": 0.4215189873417722, + "grad_norm": 0.3817399740219116, + "learning_rate": 0.0015, + "loss": 2.1984, + "step": 3996 + }, + { + "epoch": 0.42162447257383967, + "grad_norm": 0.39368388056755066, + "learning_rate": 0.0015, + "loss": 2.2067, + "step": 3997 + }, + { + "epoch": 0.42172995780590716, + "grad_norm": 0.3554587960243225, + "learning_rate": 0.0015, + "loss": 2.1734, + "step": 3998 + }, + { + "epoch": 0.4218354430379747, + "grad_norm": 0.3697257339954376, + "learning_rate": 0.0015, + "loss": 2.2059, + "step": 3999 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.36136528849601746, + "learning_rate": 0.0015, + "loss": 2.1875, + "step": 4000 + }, + { + "epoch": 0.4220464135021097, + "grad_norm": 0.392017662525177, + "learning_rate": 0.0015, + "loss": 2.1869, + "step": 4001 + }, + { + "epoch": 0.4221518987341772, + "grad_norm": 0.37608206272125244, + "learning_rate": 0.0015, + "loss": 2.1872, + "step": 4002 + }, + { + "epoch": 0.42225738396624474, + "grad_norm": 0.3885951042175293, + "learning_rate": 0.0015, + "loss": 2.144, + "step": 4003 + }, + { + "epoch": 0.42236286919831223, + "grad_norm": 0.34449541568756104, + "learning_rate": 0.0015, + "loss": 2.1903, + "step": 4004 + }, + { + "epoch": 0.4224683544303797, + "grad_norm": 0.4005509912967682, + "learning_rate": 0.0015, + "loss": 2.1658, + "step": 4005 + }, + { + "epoch": 0.42257383966244727, + "grad_norm": 0.353355348110199, + "learning_rate": 0.0015, + "loss": 2.2084, + "step": 4006 + }, + { + "epoch": 0.42267932489451476, + "grad_norm": 0.39567217230796814, + "learning_rate": 0.0015, + "loss": 2.192, + "step": 4007 + }, + { + "epoch": 0.42278481012658226, + "grad_norm": 0.3950141668319702, + "learning_rate": 0.0015, + "loss": 2.1757, + "step": 4008 + }, + { + "epoch": 0.4228902953586498, + "grad_norm": 0.421934574842453, + "learning_rate": 0.0015, + "loss": 2.1733, + "step": 4009 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.391949325799942, + "learning_rate": 0.0015, + "loss": 2.2081, + "step": 4010 + }, + { + "epoch": 0.4231012658227848, + "grad_norm": 0.44231101870536804, + "learning_rate": 0.0015, + "loss": 2.1724, + "step": 4011 + }, + { + "epoch": 0.42320675105485234, + "grad_norm": 0.4037548303604126, + "learning_rate": 0.0015, + "loss": 2.1591, + "step": 4012 + }, + { + "epoch": 0.42331223628691983, + "grad_norm": 0.4177047312259674, + "learning_rate": 0.0015, + "loss": 2.1871, + "step": 4013 + }, + { + "epoch": 0.4234177215189873, + "grad_norm": 0.40617844462394714, + "learning_rate": 0.0015, + "loss": 2.1664, + "step": 4014 + }, + { + "epoch": 0.42352320675105487, + "grad_norm": 0.42033886909484863, + "learning_rate": 0.0015, + "loss": 2.2027, + "step": 4015 + }, + { + "epoch": 0.42362869198312236, + "grad_norm": 0.48890817165374756, + "learning_rate": 0.0015, + "loss": 2.1924, + "step": 4016 + }, + { + "epoch": 0.42373417721518986, + "grad_norm": 0.4181896150112152, + "learning_rate": 0.0015, + "loss": 2.1815, + "step": 4017 + }, + { + "epoch": 0.4238396624472574, + "grad_norm": 0.3722687065601349, + "learning_rate": 0.0015, + "loss": 2.2089, + "step": 4018 + }, + { + "epoch": 0.4239451476793249, + "grad_norm": 0.45785650610923767, + "learning_rate": 0.0015, + "loss": 2.1823, + "step": 4019 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.42453882098197937, + "learning_rate": 0.0015, + "loss": 2.2049, + "step": 4020 + }, + { + "epoch": 0.42415611814345994, + "grad_norm": 0.356078565120697, + "learning_rate": 0.0015, + "loss": 2.1493, + "step": 4021 + }, + { + "epoch": 0.42426160337552743, + "grad_norm": 0.43644633889198303, + "learning_rate": 0.0015, + "loss": 2.1938, + "step": 4022 + }, + { + "epoch": 0.4243670886075949, + "grad_norm": 0.4110719561576843, + "learning_rate": 0.0015, + "loss": 2.1698, + "step": 4023 + }, + { + "epoch": 0.42447257383966247, + "grad_norm": 0.4292996823787689, + "learning_rate": 0.0015, + "loss": 2.203, + "step": 4024 + }, + { + "epoch": 0.42457805907172996, + "grad_norm": 0.4265879690647125, + "learning_rate": 0.0015, + "loss": 2.21, + "step": 4025 + }, + { + "epoch": 0.42468354430379746, + "grad_norm": 0.3809967339038849, + "learning_rate": 0.0015, + "loss": 2.195, + "step": 4026 + }, + { + "epoch": 0.424789029535865, + "grad_norm": 0.4299618601799011, + "learning_rate": 0.0015, + "loss": 2.1432, + "step": 4027 + }, + { + "epoch": 0.4248945147679325, + "grad_norm": 0.38657522201538086, + "learning_rate": 0.0015, + "loss": 2.1824, + "step": 4028 + }, + { + "epoch": 0.425, + "grad_norm": 0.4583083391189575, + "learning_rate": 0.0015, + "loss": 2.1642, + "step": 4029 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.5558947920799255, + "learning_rate": 0.0015, + "loss": 2.1922, + "step": 4030 + }, + { + "epoch": 0.42521097046413503, + "grad_norm": 0.5034643411636353, + "learning_rate": 0.0015, + "loss": 2.1852, + "step": 4031 + }, + { + "epoch": 0.4253164556962025, + "grad_norm": 0.3832836449146271, + "learning_rate": 0.0015, + "loss": 2.2083, + "step": 4032 + }, + { + "epoch": 0.42542194092827, + "grad_norm": 0.5490725040435791, + "learning_rate": 0.0015, + "loss": 2.2035, + "step": 4033 + }, + { + "epoch": 0.42552742616033756, + "grad_norm": 0.5866567492485046, + "learning_rate": 0.0015, + "loss": 2.1781, + "step": 4034 + }, + { + "epoch": 0.42563291139240506, + "grad_norm": 0.4354483187198639, + "learning_rate": 0.0015, + "loss": 2.1862, + "step": 4035 + }, + { + "epoch": 0.42573839662447255, + "grad_norm": 0.4967651665210724, + "learning_rate": 0.0015, + "loss": 2.1705, + "step": 4036 + }, + { + "epoch": 0.4258438818565401, + "grad_norm": 0.5377905368804932, + "learning_rate": 0.0015, + "loss": 2.1893, + "step": 4037 + }, + { + "epoch": 0.4259493670886076, + "grad_norm": 0.4044438600540161, + "learning_rate": 0.0015, + "loss": 2.2162, + "step": 4038 + }, + { + "epoch": 0.4260548523206751, + "grad_norm": 0.5197533965110779, + "learning_rate": 0.0015, + "loss": 2.1788, + "step": 4039 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.5697380900382996, + "learning_rate": 0.0015, + "loss": 2.1688, + "step": 4040 + }, + { + "epoch": 0.4262658227848101, + "grad_norm": 0.46668991446495056, + "learning_rate": 0.0015, + "loss": 2.1823, + "step": 4041 + }, + { + "epoch": 0.4263713080168776, + "grad_norm": 0.5434666275978088, + "learning_rate": 0.0015, + "loss": 2.2134, + "step": 4042 + }, + { + "epoch": 0.42647679324894516, + "grad_norm": 0.7120156288146973, + "learning_rate": 0.0015, + "loss": 2.1599, + "step": 4043 + }, + { + "epoch": 0.42658227848101266, + "grad_norm": 0.6580579280853271, + "learning_rate": 0.0015, + "loss": 2.1997, + "step": 4044 + }, + { + "epoch": 0.42668776371308015, + "grad_norm": 0.4506515860557556, + "learning_rate": 0.0015, + "loss": 2.2314, + "step": 4045 + }, + { + "epoch": 0.4267932489451477, + "grad_norm": 0.7132940888404846, + "learning_rate": 0.0015, + "loss": 2.1841, + "step": 4046 + }, + { + "epoch": 0.4268987341772152, + "grad_norm": 0.5096327066421509, + "learning_rate": 0.0015, + "loss": 2.1844, + "step": 4047 + }, + { + "epoch": 0.4270042194092827, + "grad_norm": 0.6203842759132385, + "learning_rate": 0.0015, + "loss": 2.1994, + "step": 4048 + }, + { + "epoch": 0.42710970464135023, + "grad_norm": 0.6585789918899536, + "learning_rate": 0.0015, + "loss": 2.1727, + "step": 4049 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.3788371682167053, + "learning_rate": 0.0015, + "loss": 2.1855, + "step": 4050 + }, + { + "epoch": 0.4273206751054852, + "grad_norm": 0.6665502190589905, + "learning_rate": 0.0015, + "loss": 2.2041, + "step": 4051 + }, + { + "epoch": 0.42742616033755276, + "grad_norm": 0.435242623090744, + "learning_rate": 0.0015, + "loss": 2.2056, + "step": 4052 + }, + { + "epoch": 0.42753164556962026, + "grad_norm": 0.5758107900619507, + "learning_rate": 0.0015, + "loss": 2.1971, + "step": 4053 + }, + { + "epoch": 0.42763713080168775, + "grad_norm": 0.5313095450401306, + "learning_rate": 0.0015, + "loss": 2.2029, + "step": 4054 + }, + { + "epoch": 0.4277426160337553, + "grad_norm": 0.5345791578292847, + "learning_rate": 0.0015, + "loss": 2.1867, + "step": 4055 + }, + { + "epoch": 0.4278481012658228, + "grad_norm": 0.4334048926830292, + "learning_rate": 0.0015, + "loss": 2.17, + "step": 4056 + }, + { + "epoch": 0.4279535864978903, + "grad_norm": 0.4776614308357239, + "learning_rate": 0.0015, + "loss": 2.1673, + "step": 4057 + }, + { + "epoch": 0.42805907172995783, + "grad_norm": 0.4558835029602051, + "learning_rate": 0.0015, + "loss": 2.207, + "step": 4058 + }, + { + "epoch": 0.4281645569620253, + "grad_norm": 0.421091228723526, + "learning_rate": 0.0015, + "loss": 2.2186, + "step": 4059 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.4331853985786438, + "learning_rate": 0.0015, + "loss": 2.1701, + "step": 4060 + }, + { + "epoch": 0.42837552742616036, + "grad_norm": 0.38193362951278687, + "learning_rate": 0.0015, + "loss": 2.193, + "step": 4061 + }, + { + "epoch": 0.42848101265822786, + "grad_norm": 0.47319790720939636, + "learning_rate": 0.0015, + "loss": 2.1692, + "step": 4062 + }, + { + "epoch": 0.42858649789029535, + "grad_norm": 0.42932620644569397, + "learning_rate": 0.0015, + "loss": 2.1393, + "step": 4063 + }, + { + "epoch": 0.4286919831223629, + "grad_norm": 0.4152498245239258, + "learning_rate": 0.0015, + "loss": 2.1765, + "step": 4064 + }, + { + "epoch": 0.4287974683544304, + "grad_norm": 0.570166826248169, + "learning_rate": 0.0015, + "loss": 2.2204, + "step": 4065 + }, + { + "epoch": 0.4289029535864979, + "grad_norm": 0.4379292130470276, + "learning_rate": 0.0015, + "loss": 2.1777, + "step": 4066 + }, + { + "epoch": 0.4290084388185654, + "grad_norm": 0.5040015578269958, + "learning_rate": 0.0015, + "loss": 2.182, + "step": 4067 + }, + { + "epoch": 0.4291139240506329, + "grad_norm": 0.42154309153556824, + "learning_rate": 0.0015, + "loss": 2.1709, + "step": 4068 + }, + { + "epoch": 0.4292194092827004, + "grad_norm": 0.4516788423061371, + "learning_rate": 0.0015, + "loss": 2.2348, + "step": 4069 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.5125277638435364, + "learning_rate": 0.0015, + "loss": 2.2032, + "step": 4070 + }, + { + "epoch": 0.42943037974683546, + "grad_norm": 0.3752480447292328, + "learning_rate": 0.0015, + "loss": 2.2082, + "step": 4071 + }, + { + "epoch": 0.42953586497890295, + "grad_norm": 0.42821750044822693, + "learning_rate": 0.0015, + "loss": 2.175, + "step": 4072 + }, + { + "epoch": 0.42964135021097044, + "grad_norm": 0.47106024622917175, + "learning_rate": 0.0015, + "loss": 2.1871, + "step": 4073 + }, + { + "epoch": 0.429746835443038, + "grad_norm": 0.36860397458076477, + "learning_rate": 0.0015, + "loss": 2.1576, + "step": 4074 + }, + { + "epoch": 0.4298523206751055, + "grad_norm": 0.41805145144462585, + "learning_rate": 0.0015, + "loss": 2.2136, + "step": 4075 + }, + { + "epoch": 0.429957805907173, + "grad_norm": 0.39740756154060364, + "learning_rate": 0.0015, + "loss": 2.1809, + "step": 4076 + }, + { + "epoch": 0.4300632911392405, + "grad_norm": 0.3719877302646637, + "learning_rate": 0.0015, + "loss": 2.2034, + "step": 4077 + }, + { + "epoch": 0.430168776371308, + "grad_norm": 0.36100852489471436, + "learning_rate": 0.0015, + "loss": 2.1763, + "step": 4078 + }, + { + "epoch": 0.4302742616033755, + "grad_norm": 0.36234453320503235, + "learning_rate": 0.0015, + "loss": 2.1792, + "step": 4079 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.4118110239505768, + "learning_rate": 0.0015, + "loss": 2.1673, + "step": 4080 + }, + { + "epoch": 0.43048523206751055, + "grad_norm": 0.3651139438152313, + "learning_rate": 0.0015, + "loss": 2.176, + "step": 4081 + }, + { + "epoch": 0.43059071729957804, + "grad_norm": 0.41043582558631897, + "learning_rate": 0.0015, + "loss": 2.2051, + "step": 4082 + }, + { + "epoch": 0.4306962025316456, + "grad_norm": 0.3400387763977051, + "learning_rate": 0.0015, + "loss": 2.1909, + "step": 4083 + }, + { + "epoch": 0.4308016877637131, + "grad_norm": 0.37694069743156433, + "learning_rate": 0.0015, + "loss": 2.1505, + "step": 4084 + }, + { + "epoch": 0.4309071729957806, + "grad_norm": 0.36470794677734375, + "learning_rate": 0.0015, + "loss": 2.1712, + "step": 4085 + }, + { + "epoch": 0.4310126582278481, + "grad_norm": 0.39450252056121826, + "learning_rate": 0.0015, + "loss": 2.2116, + "step": 4086 + }, + { + "epoch": 0.4311181434599156, + "grad_norm": 0.3609990179538727, + "learning_rate": 0.0015, + "loss": 2.2065, + "step": 4087 + }, + { + "epoch": 0.4312236286919831, + "grad_norm": 0.40124520659446716, + "learning_rate": 0.0015, + "loss": 2.1552, + "step": 4088 + }, + { + "epoch": 0.43132911392405066, + "grad_norm": 0.41669073700904846, + "learning_rate": 0.0015, + "loss": 2.1542, + "step": 4089 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.40116629004478455, + "learning_rate": 0.0015, + "loss": 2.1562, + "step": 4090 + }, + { + "epoch": 0.43154008438818564, + "grad_norm": 0.3425223231315613, + "learning_rate": 0.0015, + "loss": 2.2165, + "step": 4091 + }, + { + "epoch": 0.4316455696202532, + "grad_norm": 0.3809245228767395, + "learning_rate": 0.0015, + "loss": 2.1706, + "step": 4092 + }, + { + "epoch": 0.4317510548523207, + "grad_norm": 0.35666346549987793, + "learning_rate": 0.0015, + "loss": 2.1748, + "step": 4093 + }, + { + "epoch": 0.4318565400843882, + "grad_norm": 0.40348687767982483, + "learning_rate": 0.0015, + "loss": 2.1854, + "step": 4094 + }, + { + "epoch": 0.4319620253164557, + "grad_norm": 0.40386494994163513, + "learning_rate": 0.0015, + "loss": 2.1731, + "step": 4095 + }, + { + "epoch": 0.4320675105485232, + "grad_norm": 0.37805289030075073, + "learning_rate": 0.0015, + "loss": 2.1664, + "step": 4096 + }, + { + "epoch": 0.4321729957805907, + "grad_norm": 0.3513408303260803, + "learning_rate": 0.0015, + "loss": 2.1648, + "step": 4097 + }, + { + "epoch": 0.43227848101265826, + "grad_norm": 0.4019124507904053, + "learning_rate": 0.0015, + "loss": 2.1943, + "step": 4098 + }, + { + "epoch": 0.43238396624472575, + "grad_norm": 0.4296889901161194, + "learning_rate": 0.0015, + "loss": 2.1328, + "step": 4099 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.3701075315475464, + "learning_rate": 0.0015, + "loss": 2.1442, + "step": 4100 + }, + { + "epoch": 0.43259493670886073, + "grad_norm": 0.4000721573829651, + "learning_rate": 0.0015, + "loss": 2.165, + "step": 4101 + }, + { + "epoch": 0.4327004219409283, + "grad_norm": 0.43859586119651794, + "learning_rate": 0.0015, + "loss": 2.1583, + "step": 4102 + }, + { + "epoch": 0.4328059071729958, + "grad_norm": 0.3807271122932434, + "learning_rate": 0.0015, + "loss": 2.1159, + "step": 4103 + }, + { + "epoch": 0.43291139240506327, + "grad_norm": 0.39134931564331055, + "learning_rate": 0.0015, + "loss": 2.1578, + "step": 4104 + }, + { + "epoch": 0.4330168776371308, + "grad_norm": 0.38830456137657166, + "learning_rate": 0.0015, + "loss": 2.1571, + "step": 4105 + }, + { + "epoch": 0.4331223628691983, + "grad_norm": 0.35006892681121826, + "learning_rate": 0.0015, + "loss": 2.2003, + "step": 4106 + }, + { + "epoch": 0.4332278481012658, + "grad_norm": 0.359717458486557, + "learning_rate": 0.0015, + "loss": 2.1786, + "step": 4107 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.329541951417923, + "learning_rate": 0.0015, + "loss": 2.1813, + "step": 4108 + }, + { + "epoch": 0.43343881856540084, + "grad_norm": 0.36240822076797485, + "learning_rate": 0.0015, + "loss": 2.1896, + "step": 4109 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.3596387505531311, + "learning_rate": 0.0015, + "loss": 2.1883, + "step": 4110 + }, + { + "epoch": 0.4336497890295359, + "grad_norm": 0.35475149750709534, + "learning_rate": 0.0015, + "loss": 2.1717, + "step": 4111 + }, + { + "epoch": 0.4337552742616034, + "grad_norm": 0.418201744556427, + "learning_rate": 0.0015, + "loss": 2.1457, + "step": 4112 + }, + { + "epoch": 0.43386075949367087, + "grad_norm": 0.37636515498161316, + "learning_rate": 0.0015, + "loss": 2.1899, + "step": 4113 + }, + { + "epoch": 0.4339662447257384, + "grad_norm": 0.3351913094520569, + "learning_rate": 0.0015, + "loss": 2.1529, + "step": 4114 + }, + { + "epoch": 0.4340717299578059, + "grad_norm": 0.3650878965854645, + "learning_rate": 0.0015, + "loss": 2.176, + "step": 4115 + }, + { + "epoch": 0.4341772151898734, + "grad_norm": 0.4313245713710785, + "learning_rate": 0.0015, + "loss": 2.1677, + "step": 4116 + }, + { + "epoch": 0.43428270042194095, + "grad_norm": 0.42349401116371155, + "learning_rate": 0.0015, + "loss": 2.1916, + "step": 4117 + }, + { + "epoch": 0.43438818565400844, + "grad_norm": 0.35222291946411133, + "learning_rate": 0.0015, + "loss": 2.1701, + "step": 4118 + }, + { + "epoch": 0.43449367088607593, + "grad_norm": 0.4947056174278259, + "learning_rate": 0.0015, + "loss": 2.2095, + "step": 4119 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.3744601607322693, + "learning_rate": 0.0015, + "loss": 2.1911, + "step": 4120 + }, + { + "epoch": 0.434704641350211, + "grad_norm": 0.39133763313293457, + "learning_rate": 0.0015, + "loss": 2.178, + "step": 4121 + }, + { + "epoch": 0.43481012658227847, + "grad_norm": 0.4855934679508209, + "learning_rate": 0.0015, + "loss": 2.1764, + "step": 4122 + }, + { + "epoch": 0.434915611814346, + "grad_norm": 0.434122234582901, + "learning_rate": 0.0015, + "loss": 2.134, + "step": 4123 + }, + { + "epoch": 0.4350210970464135, + "grad_norm": 0.39320725202560425, + "learning_rate": 0.0015, + "loss": 2.183, + "step": 4124 + }, + { + "epoch": 0.435126582278481, + "grad_norm": 0.40252718329429626, + "learning_rate": 0.0015, + "loss": 2.1428, + "step": 4125 + }, + { + "epoch": 0.43523206751054855, + "grad_norm": 0.4324040412902832, + "learning_rate": 0.0015, + "loss": 2.1696, + "step": 4126 + }, + { + "epoch": 0.43533755274261604, + "grad_norm": 0.38199201226234436, + "learning_rate": 0.0015, + "loss": 2.1553, + "step": 4127 + }, + { + "epoch": 0.43544303797468353, + "grad_norm": 0.39265361428260803, + "learning_rate": 0.0015, + "loss": 2.1781, + "step": 4128 + }, + { + "epoch": 0.4355485232067511, + "grad_norm": 0.34391024708747864, + "learning_rate": 0.0015, + "loss": 2.1779, + "step": 4129 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.37315499782562256, + "learning_rate": 0.0015, + "loss": 2.1784, + "step": 4130 + }, + { + "epoch": 0.43575949367088607, + "grad_norm": 0.33761024475097656, + "learning_rate": 0.0015, + "loss": 2.1678, + "step": 4131 + }, + { + "epoch": 0.43586497890295356, + "grad_norm": 0.36392152309417725, + "learning_rate": 0.0015, + "loss": 2.1482, + "step": 4132 + }, + { + "epoch": 0.4359704641350211, + "grad_norm": 0.3514942526817322, + "learning_rate": 0.0015, + "loss": 2.1835, + "step": 4133 + }, + { + "epoch": 0.4360759493670886, + "grad_norm": 0.3744677007198334, + "learning_rate": 0.0015, + "loss": 2.1808, + "step": 4134 + }, + { + "epoch": 0.4361814345991561, + "grad_norm": 0.3501388132572174, + "learning_rate": 0.0015, + "loss": 2.1431, + "step": 4135 + }, + { + "epoch": 0.43628691983122364, + "grad_norm": 0.4043705463409424, + "learning_rate": 0.0015, + "loss": 2.1573, + "step": 4136 + }, + { + "epoch": 0.43639240506329113, + "grad_norm": 0.3908718228340149, + "learning_rate": 0.0015, + "loss": 2.1751, + "step": 4137 + }, + { + "epoch": 0.4364978902953586, + "grad_norm": 0.42944103479385376, + "learning_rate": 0.0015, + "loss": 2.1823, + "step": 4138 + }, + { + "epoch": 0.4366033755274262, + "grad_norm": 0.37580424547195435, + "learning_rate": 0.0015, + "loss": 2.166, + "step": 4139 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.4216992259025574, + "learning_rate": 0.0015, + "loss": 2.1731, + "step": 4140 + }, + { + "epoch": 0.43681434599156116, + "grad_norm": 0.4908992648124695, + "learning_rate": 0.0015, + "loss": 2.1646, + "step": 4141 + }, + { + "epoch": 0.4369198312236287, + "grad_norm": 0.36054661870002747, + "learning_rate": 0.0015, + "loss": 2.1721, + "step": 4142 + }, + { + "epoch": 0.4370253164556962, + "grad_norm": 0.4131866991519928, + "learning_rate": 0.0015, + "loss": 2.1844, + "step": 4143 + }, + { + "epoch": 0.4371308016877637, + "grad_norm": 0.3731575310230255, + "learning_rate": 0.0015, + "loss": 2.1816, + "step": 4144 + }, + { + "epoch": 0.43723628691983124, + "grad_norm": 0.4449811279773712, + "learning_rate": 0.0015, + "loss": 2.1704, + "step": 4145 + }, + { + "epoch": 0.43734177215189873, + "grad_norm": 0.3926078677177429, + "learning_rate": 0.0015, + "loss": 2.1447, + "step": 4146 + }, + { + "epoch": 0.4374472573839662, + "grad_norm": 0.40212392807006836, + "learning_rate": 0.0015, + "loss": 2.1955, + "step": 4147 + }, + { + "epoch": 0.4375527426160338, + "grad_norm": 0.4078380763530731, + "learning_rate": 0.0015, + "loss": 2.2131, + "step": 4148 + }, + { + "epoch": 0.43765822784810127, + "grad_norm": 0.40613457560539246, + "learning_rate": 0.0015, + "loss": 2.1726, + "step": 4149 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.4193389415740967, + "learning_rate": 0.0015, + "loss": 2.1916, + "step": 4150 + }, + { + "epoch": 0.4378691983122363, + "grad_norm": 0.39105379581451416, + "learning_rate": 0.0015, + "loss": 2.1712, + "step": 4151 + }, + { + "epoch": 0.4379746835443038, + "grad_norm": 0.4216941297054291, + "learning_rate": 0.0015, + "loss": 2.2063, + "step": 4152 + }, + { + "epoch": 0.4380801687763713, + "grad_norm": 0.3991769850254059, + "learning_rate": 0.0015, + "loss": 2.1813, + "step": 4153 + }, + { + "epoch": 0.43818565400843884, + "grad_norm": 0.4538174271583557, + "learning_rate": 0.0015, + "loss": 2.1522, + "step": 4154 + }, + { + "epoch": 0.43829113924050633, + "grad_norm": 0.40062013268470764, + "learning_rate": 0.0015, + "loss": 2.1513, + "step": 4155 + }, + { + "epoch": 0.4383966244725738, + "grad_norm": 0.3897168040275574, + "learning_rate": 0.0015, + "loss": 2.1725, + "step": 4156 + }, + { + "epoch": 0.4385021097046414, + "grad_norm": 0.4070018231868744, + "learning_rate": 0.0015, + "loss": 2.1955, + "step": 4157 + }, + { + "epoch": 0.43860759493670887, + "grad_norm": 0.4279712438583374, + "learning_rate": 0.0015, + "loss": 2.1469, + "step": 4158 + }, + { + "epoch": 0.43871308016877636, + "grad_norm": 0.4178614020347595, + "learning_rate": 0.0015, + "loss": 2.1609, + "step": 4159 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.4292597770690918, + "learning_rate": 0.0015, + "loss": 2.1645, + "step": 4160 + }, + { + "epoch": 0.4389240506329114, + "grad_norm": 0.35632580518722534, + "learning_rate": 0.0015, + "loss": 2.1422, + "step": 4161 + }, + { + "epoch": 0.4390295358649789, + "grad_norm": 0.4013102650642395, + "learning_rate": 0.0015, + "loss": 2.1771, + "step": 4162 + }, + { + "epoch": 0.43913502109704644, + "grad_norm": 0.40805763006210327, + "learning_rate": 0.0015, + "loss": 2.1564, + "step": 4163 + }, + { + "epoch": 0.43924050632911393, + "grad_norm": 0.39425864815711975, + "learning_rate": 0.0015, + "loss": 2.1441, + "step": 4164 + }, + { + "epoch": 0.4393459915611814, + "grad_norm": 0.3734298646450043, + "learning_rate": 0.0015, + "loss": 2.1662, + "step": 4165 + }, + { + "epoch": 0.4394514767932489, + "grad_norm": 0.3805868327617645, + "learning_rate": 0.0015, + "loss": 2.1748, + "step": 4166 + }, + { + "epoch": 0.43955696202531647, + "grad_norm": 0.398338258266449, + "learning_rate": 0.0015, + "loss": 2.1619, + "step": 4167 + }, + { + "epoch": 0.43966244725738396, + "grad_norm": 0.3809880316257477, + "learning_rate": 0.0015, + "loss": 2.1709, + "step": 4168 + }, + { + "epoch": 0.43976793248945145, + "grad_norm": 0.3935762941837311, + "learning_rate": 0.0015, + "loss": 2.1888, + "step": 4169 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.36645984649658203, + "learning_rate": 0.0015, + "loss": 2.1674, + "step": 4170 + }, + { + "epoch": 0.4399789029535865, + "grad_norm": 0.38558074831962585, + "learning_rate": 0.0015, + "loss": 2.1688, + "step": 4171 + }, + { + "epoch": 0.440084388185654, + "grad_norm": 0.4213567078113556, + "learning_rate": 0.0015, + "loss": 2.1623, + "step": 4172 + }, + { + "epoch": 0.44018987341772153, + "grad_norm": 0.38595473766326904, + "learning_rate": 0.0015, + "loss": 2.1855, + "step": 4173 + }, + { + "epoch": 0.440295358649789, + "grad_norm": 0.3869059681892395, + "learning_rate": 0.0015, + "loss": 2.167, + "step": 4174 + }, + { + "epoch": 0.4404008438818565, + "grad_norm": 0.3713687062263489, + "learning_rate": 0.0015, + "loss": 2.1622, + "step": 4175 + }, + { + "epoch": 0.44050632911392407, + "grad_norm": 0.41329720616340637, + "learning_rate": 0.0015, + "loss": 2.1301, + "step": 4176 + }, + { + "epoch": 0.44061181434599156, + "grad_norm": 0.4507656991481781, + "learning_rate": 0.0015, + "loss": 2.1679, + "step": 4177 + }, + { + "epoch": 0.44071729957805905, + "grad_norm": 0.339520663022995, + "learning_rate": 0.0015, + "loss": 2.1416, + "step": 4178 + }, + { + "epoch": 0.4408227848101266, + "grad_norm": 0.4383125305175781, + "learning_rate": 0.0015, + "loss": 2.1804, + "step": 4179 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.4152977764606476, + "learning_rate": 0.0015, + "loss": 2.1526, + "step": 4180 + }, + { + "epoch": 0.4410337552742616, + "grad_norm": 0.40396660566329956, + "learning_rate": 0.0015, + "loss": 2.1776, + "step": 4181 + }, + { + "epoch": 0.44113924050632913, + "grad_norm": 0.40534672141075134, + "learning_rate": 0.0015, + "loss": 2.1806, + "step": 4182 + }, + { + "epoch": 0.4412447257383966, + "grad_norm": 0.4196752905845642, + "learning_rate": 0.0015, + "loss": 2.1625, + "step": 4183 + }, + { + "epoch": 0.4413502109704641, + "grad_norm": 0.43805554509162903, + "learning_rate": 0.0015, + "loss": 2.1571, + "step": 4184 + }, + { + "epoch": 0.44145569620253167, + "grad_norm": 0.45618411898612976, + "learning_rate": 0.0015, + "loss": 2.1669, + "step": 4185 + }, + { + "epoch": 0.44156118143459916, + "grad_norm": 0.357536643743515, + "learning_rate": 0.0015, + "loss": 2.1438, + "step": 4186 + }, + { + "epoch": 0.44166666666666665, + "grad_norm": 0.4523632228374481, + "learning_rate": 0.0015, + "loss": 2.1811, + "step": 4187 + }, + { + "epoch": 0.4417721518987342, + "grad_norm": 0.4248042404651642, + "learning_rate": 0.0015, + "loss": 2.1759, + "step": 4188 + }, + { + "epoch": 0.4418776371308017, + "grad_norm": 0.3993827700614929, + "learning_rate": 0.0015, + "loss": 2.1678, + "step": 4189 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.4416627585887909, + "learning_rate": 0.0015, + "loss": 2.1641, + "step": 4190 + }, + { + "epoch": 0.44208860759493673, + "grad_norm": 0.464828222990036, + "learning_rate": 0.0015, + "loss": 2.1406, + "step": 4191 + }, + { + "epoch": 0.4421940928270042, + "grad_norm": 0.3977198004722595, + "learning_rate": 0.0015, + "loss": 2.1811, + "step": 4192 + }, + { + "epoch": 0.4422995780590717, + "grad_norm": 0.43209365010261536, + "learning_rate": 0.0015, + "loss": 2.1456, + "step": 4193 + }, + { + "epoch": 0.44240506329113927, + "grad_norm": 0.39321592450141907, + "learning_rate": 0.0015, + "loss": 2.1917, + "step": 4194 + }, + { + "epoch": 0.44251054852320676, + "grad_norm": 0.48815566301345825, + "learning_rate": 0.0015, + "loss": 2.1672, + "step": 4195 + }, + { + "epoch": 0.44261603375527425, + "grad_norm": 0.40885186195373535, + "learning_rate": 0.0015, + "loss": 2.1464, + "step": 4196 + }, + { + "epoch": 0.44272151898734174, + "grad_norm": 0.4871405065059662, + "learning_rate": 0.0015, + "loss": 2.1675, + "step": 4197 + }, + { + "epoch": 0.4428270042194093, + "grad_norm": 0.4155302047729492, + "learning_rate": 0.0015, + "loss": 2.1884, + "step": 4198 + }, + { + "epoch": 0.4429324894514768, + "grad_norm": 0.3932490050792694, + "learning_rate": 0.0015, + "loss": 2.128, + "step": 4199 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.4337933361530304, + "learning_rate": 0.0015, + "loss": 2.2227, + "step": 4200 + }, + { + "epoch": 0.4431434599156118, + "grad_norm": 0.4488857388496399, + "learning_rate": 0.0015, + "loss": 2.1782, + "step": 4201 + }, + { + "epoch": 0.4432489451476793, + "grad_norm": 0.35520580410957336, + "learning_rate": 0.0015, + "loss": 2.1658, + "step": 4202 + }, + { + "epoch": 0.4433544303797468, + "grad_norm": 0.3913552165031433, + "learning_rate": 0.0015, + "loss": 2.1326, + "step": 4203 + }, + { + "epoch": 0.44345991561181436, + "grad_norm": 0.4282710552215576, + "learning_rate": 0.0015, + "loss": 2.1689, + "step": 4204 + }, + { + "epoch": 0.44356540084388185, + "grad_norm": 0.43546849489212036, + "learning_rate": 0.0015, + "loss": 2.1356, + "step": 4205 + }, + { + "epoch": 0.44367088607594934, + "grad_norm": 0.34222424030303955, + "learning_rate": 0.0015, + "loss": 2.1584, + "step": 4206 + }, + { + "epoch": 0.4437763713080169, + "grad_norm": 0.4150950312614441, + "learning_rate": 0.0015, + "loss": 2.1199, + "step": 4207 + }, + { + "epoch": 0.4438818565400844, + "grad_norm": 0.374062180519104, + "learning_rate": 0.0015, + "loss": 2.1557, + "step": 4208 + }, + { + "epoch": 0.4439873417721519, + "grad_norm": 0.4256983995437622, + "learning_rate": 0.0015, + "loss": 2.1682, + "step": 4209 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.3972112834453583, + "learning_rate": 0.0015, + "loss": 2.1611, + "step": 4210 + }, + { + "epoch": 0.4441983122362869, + "grad_norm": 0.3856118321418762, + "learning_rate": 0.0015, + "loss": 2.1868, + "step": 4211 + }, + { + "epoch": 0.4443037974683544, + "grad_norm": 0.4341582953929901, + "learning_rate": 0.0015, + "loss": 2.1558, + "step": 4212 + }, + { + "epoch": 0.44440928270042196, + "grad_norm": 0.43997451663017273, + "learning_rate": 0.0015, + "loss": 2.2034, + "step": 4213 + }, + { + "epoch": 0.44451476793248945, + "grad_norm": 0.3523603677749634, + "learning_rate": 0.0015, + "loss": 2.1459, + "step": 4214 + }, + { + "epoch": 0.44462025316455694, + "grad_norm": 0.49361440539360046, + "learning_rate": 0.0015, + "loss": 2.1559, + "step": 4215 + }, + { + "epoch": 0.4447257383966245, + "grad_norm": 0.41937071084976196, + "learning_rate": 0.0015, + "loss": 2.1855, + "step": 4216 + }, + { + "epoch": 0.444831223628692, + "grad_norm": 0.42646199464797974, + "learning_rate": 0.0015, + "loss": 2.1535, + "step": 4217 + }, + { + "epoch": 0.4449367088607595, + "grad_norm": 0.39234089851379395, + "learning_rate": 0.0015, + "loss": 2.1386, + "step": 4218 + }, + { + "epoch": 0.445042194092827, + "grad_norm": 0.4420657455921173, + "learning_rate": 0.0015, + "loss": 2.1725, + "step": 4219 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.4058372974395752, + "learning_rate": 0.0015, + "loss": 2.1632, + "step": 4220 + }, + { + "epoch": 0.445253164556962, + "grad_norm": 0.453978955745697, + "learning_rate": 0.0015, + "loss": 2.1672, + "step": 4221 + }, + { + "epoch": 0.44535864978902956, + "grad_norm": 0.466207891702652, + "learning_rate": 0.0015, + "loss": 2.1532, + "step": 4222 + }, + { + "epoch": 0.44546413502109705, + "grad_norm": 0.478267103433609, + "learning_rate": 0.0015, + "loss": 2.1987, + "step": 4223 + }, + { + "epoch": 0.44556962025316454, + "grad_norm": 0.39326420426368713, + "learning_rate": 0.0015, + "loss": 2.1493, + "step": 4224 + }, + { + "epoch": 0.4456751054852321, + "grad_norm": 0.4140819311141968, + "learning_rate": 0.0015, + "loss": 2.1742, + "step": 4225 + }, + { + "epoch": 0.4457805907172996, + "grad_norm": 0.42187973856925964, + "learning_rate": 0.0015, + "loss": 2.1577, + "step": 4226 + }, + { + "epoch": 0.4458860759493671, + "grad_norm": 0.3569812774658203, + "learning_rate": 0.0015, + "loss": 2.1546, + "step": 4227 + }, + { + "epoch": 0.4459915611814346, + "grad_norm": 0.38916000723838806, + "learning_rate": 0.0015, + "loss": 2.1801, + "step": 4228 + }, + { + "epoch": 0.4460970464135021, + "grad_norm": 0.3839969336986542, + "learning_rate": 0.0015, + "loss": 2.1532, + "step": 4229 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 0.3962419033050537, + "learning_rate": 0.0015, + "loss": 2.1518, + "step": 4230 + }, + { + "epoch": 0.4463080168776371, + "grad_norm": 0.4536250829696655, + "learning_rate": 0.0015, + "loss": 2.207, + "step": 4231 + }, + { + "epoch": 0.44641350210970465, + "grad_norm": 0.36513566970825195, + "learning_rate": 0.0015, + "loss": 2.1639, + "step": 4232 + }, + { + "epoch": 0.44651898734177214, + "grad_norm": 0.4331674575805664, + "learning_rate": 0.0015, + "loss": 2.1731, + "step": 4233 + }, + { + "epoch": 0.44662447257383964, + "grad_norm": 0.3953793942928314, + "learning_rate": 0.0015, + "loss": 2.1748, + "step": 4234 + }, + { + "epoch": 0.4467299578059072, + "grad_norm": 0.42545244097709656, + "learning_rate": 0.0015, + "loss": 2.1701, + "step": 4235 + }, + { + "epoch": 0.4468354430379747, + "grad_norm": 0.44016221165657043, + "learning_rate": 0.0015, + "loss": 2.1571, + "step": 4236 + }, + { + "epoch": 0.44694092827004217, + "grad_norm": 0.363809198141098, + "learning_rate": 0.0015, + "loss": 2.1939, + "step": 4237 + }, + { + "epoch": 0.4470464135021097, + "grad_norm": 0.4266525208950043, + "learning_rate": 0.0015, + "loss": 2.1291, + "step": 4238 + }, + { + "epoch": 0.4471518987341772, + "grad_norm": 0.4756101071834564, + "learning_rate": 0.0015, + "loss": 2.1361, + "step": 4239 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.47795847058296204, + "learning_rate": 0.0015, + "loss": 2.1487, + "step": 4240 + }, + { + "epoch": 0.44736286919831225, + "grad_norm": 0.3994237184524536, + "learning_rate": 0.0015, + "loss": 2.1468, + "step": 4241 + }, + { + "epoch": 0.44746835443037974, + "grad_norm": 0.40365538001060486, + "learning_rate": 0.0015, + "loss": 2.1785, + "step": 4242 + }, + { + "epoch": 0.44757383966244724, + "grad_norm": 0.3879595696926117, + "learning_rate": 0.0015, + "loss": 2.1653, + "step": 4243 + }, + { + "epoch": 0.4476793248945148, + "grad_norm": 0.4268890917301178, + "learning_rate": 0.0015, + "loss": 2.1828, + "step": 4244 + }, + { + "epoch": 0.4477848101265823, + "grad_norm": 0.3796004056930542, + "learning_rate": 0.0015, + "loss": 2.1859, + "step": 4245 + }, + { + "epoch": 0.44789029535864977, + "grad_norm": 0.4086673855781555, + "learning_rate": 0.0015, + "loss": 2.1431, + "step": 4246 + }, + { + "epoch": 0.4479957805907173, + "grad_norm": 0.42927929759025574, + "learning_rate": 0.0015, + "loss": 2.1041, + "step": 4247 + }, + { + "epoch": 0.4481012658227848, + "grad_norm": 0.42626726627349854, + "learning_rate": 0.0015, + "loss": 2.1674, + "step": 4248 + }, + { + "epoch": 0.4482067510548523, + "grad_norm": 0.5595776438713074, + "learning_rate": 0.0015, + "loss": 2.1892, + "step": 4249 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.49288445711135864, + "learning_rate": 0.0015, + "loss": 2.1822, + "step": 4250 + }, + { + "epoch": 0.44841772151898734, + "grad_norm": 0.39576810598373413, + "learning_rate": 0.0015, + "loss": 2.1539, + "step": 4251 + }, + { + "epoch": 0.44852320675105484, + "grad_norm": 0.475742369890213, + "learning_rate": 0.0015, + "loss": 2.1632, + "step": 4252 + }, + { + "epoch": 0.4486286919831224, + "grad_norm": 0.4127964377403259, + "learning_rate": 0.0015, + "loss": 2.1644, + "step": 4253 + }, + { + "epoch": 0.4487341772151899, + "grad_norm": 0.38632577657699585, + "learning_rate": 0.0015, + "loss": 2.1632, + "step": 4254 + }, + { + "epoch": 0.44883966244725737, + "grad_norm": 0.42026227712631226, + "learning_rate": 0.0015, + "loss": 2.195, + "step": 4255 + }, + { + "epoch": 0.4489451476793249, + "grad_norm": 0.36742493510246277, + "learning_rate": 0.0015, + "loss": 2.1881, + "step": 4256 + }, + { + "epoch": 0.4490506329113924, + "grad_norm": 0.45282068848609924, + "learning_rate": 0.0015, + "loss": 2.132, + "step": 4257 + }, + { + "epoch": 0.4491561181434599, + "grad_norm": 0.33267685770988464, + "learning_rate": 0.0015, + "loss": 2.1537, + "step": 4258 + }, + { + "epoch": 0.44926160337552745, + "grad_norm": 0.4505932629108429, + "learning_rate": 0.0015, + "loss": 2.1797, + "step": 4259 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.40053117275238037, + "learning_rate": 0.0015, + "loss": 2.1729, + "step": 4260 + }, + { + "epoch": 0.44947257383966244, + "grad_norm": 0.3544174134731293, + "learning_rate": 0.0015, + "loss": 2.1789, + "step": 4261 + }, + { + "epoch": 0.44957805907173, + "grad_norm": 0.3996596038341522, + "learning_rate": 0.0015, + "loss": 2.1898, + "step": 4262 + }, + { + "epoch": 0.4496835443037975, + "grad_norm": 0.3681468367576599, + "learning_rate": 0.0015, + "loss": 2.1083, + "step": 4263 + }, + { + "epoch": 0.44978902953586497, + "grad_norm": 0.3941190540790558, + "learning_rate": 0.0015, + "loss": 2.1673, + "step": 4264 + }, + { + "epoch": 0.44989451476793246, + "grad_norm": 0.34624919295310974, + "learning_rate": 0.0015, + "loss": 2.1524, + "step": 4265 + }, + { + "epoch": 0.45, + "grad_norm": 0.36586055159568787, + "learning_rate": 0.0015, + "loss": 2.1564, + "step": 4266 + }, + { + "epoch": 0.4501054852320675, + "grad_norm": 0.3687277138233185, + "learning_rate": 0.0015, + "loss": 2.1279, + "step": 4267 + }, + { + "epoch": 0.450210970464135, + "grad_norm": 0.3486778140068054, + "learning_rate": 0.0015, + "loss": 2.1582, + "step": 4268 + }, + { + "epoch": 0.45031645569620254, + "grad_norm": 0.3611434996128082, + "learning_rate": 0.0015, + "loss": 2.1551, + "step": 4269 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.3852330446243286, + "learning_rate": 0.0015, + "loss": 2.1721, + "step": 4270 + }, + { + "epoch": 0.45052742616033753, + "grad_norm": 0.33593443036079407, + "learning_rate": 0.0015, + "loss": 2.1242, + "step": 4271 + }, + { + "epoch": 0.4506329113924051, + "grad_norm": 0.4158691465854645, + "learning_rate": 0.0015, + "loss": 2.1549, + "step": 4272 + }, + { + "epoch": 0.45073839662447257, + "grad_norm": 0.3616487979888916, + "learning_rate": 0.0015, + "loss": 2.1489, + "step": 4273 + }, + { + "epoch": 0.45084388185654006, + "grad_norm": 0.38693591952323914, + "learning_rate": 0.0015, + "loss": 2.1658, + "step": 4274 + }, + { + "epoch": 0.4509493670886076, + "grad_norm": 0.36164411902427673, + "learning_rate": 0.0015, + "loss": 2.1486, + "step": 4275 + }, + { + "epoch": 0.4510548523206751, + "grad_norm": 0.4238393306732178, + "learning_rate": 0.0015, + "loss": 2.1242, + "step": 4276 + }, + { + "epoch": 0.4511603375527426, + "grad_norm": 0.3609554171562195, + "learning_rate": 0.0015, + "loss": 2.1609, + "step": 4277 + }, + { + "epoch": 0.45126582278481014, + "grad_norm": 0.42604178190231323, + "learning_rate": 0.0015, + "loss": 2.1451, + "step": 4278 + }, + { + "epoch": 0.45137130801687764, + "grad_norm": 0.43915998935699463, + "learning_rate": 0.0015, + "loss": 2.1457, + "step": 4279 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.44457152485847473, + "learning_rate": 0.0015, + "loss": 2.1866, + "step": 4280 + }, + { + "epoch": 0.4515822784810127, + "grad_norm": 0.38006356358528137, + "learning_rate": 0.0015, + "loss": 2.1475, + "step": 4281 + }, + { + "epoch": 0.45168776371308017, + "grad_norm": 0.38411739468574524, + "learning_rate": 0.0015, + "loss": 2.1791, + "step": 4282 + }, + { + "epoch": 0.45179324894514766, + "grad_norm": 0.3917278051376343, + "learning_rate": 0.0015, + "loss": 2.1754, + "step": 4283 + }, + { + "epoch": 0.4518987341772152, + "grad_norm": 0.34901344776153564, + "learning_rate": 0.0015, + "loss": 2.1394, + "step": 4284 + }, + { + "epoch": 0.4520042194092827, + "grad_norm": 0.3466821610927582, + "learning_rate": 0.0015, + "loss": 2.1418, + "step": 4285 + }, + { + "epoch": 0.4521097046413502, + "grad_norm": 0.39533674716949463, + "learning_rate": 0.0015, + "loss": 2.1316, + "step": 4286 + }, + { + "epoch": 0.45221518987341774, + "grad_norm": 0.35919493436813354, + "learning_rate": 0.0015, + "loss": 2.168, + "step": 4287 + }, + { + "epoch": 0.45232067510548524, + "grad_norm": 0.3537154793739319, + "learning_rate": 0.0015, + "loss": 2.177, + "step": 4288 + }, + { + "epoch": 0.45242616033755273, + "grad_norm": 0.33653756976127625, + "learning_rate": 0.0015, + "loss": 2.1888, + "step": 4289 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.3354164659976959, + "learning_rate": 0.0015, + "loss": 2.1331, + "step": 4290 + }, + { + "epoch": 0.45263713080168777, + "grad_norm": 0.3700128495693207, + "learning_rate": 0.0015, + "loss": 2.1465, + "step": 4291 + }, + { + "epoch": 0.45274261603375526, + "grad_norm": 0.3583616316318512, + "learning_rate": 0.0015, + "loss": 2.1508, + "step": 4292 + }, + { + "epoch": 0.4528481012658228, + "grad_norm": 0.38022351264953613, + "learning_rate": 0.0015, + "loss": 2.1785, + "step": 4293 + }, + { + "epoch": 0.4529535864978903, + "grad_norm": 0.37246111035346985, + "learning_rate": 0.0015, + "loss": 2.175, + "step": 4294 + }, + { + "epoch": 0.4530590717299578, + "grad_norm": 0.3529389798641205, + "learning_rate": 0.0015, + "loss": 2.1395, + "step": 4295 + }, + { + "epoch": 0.4531645569620253, + "grad_norm": 0.3672192692756653, + "learning_rate": 0.0015, + "loss": 2.1493, + "step": 4296 + }, + { + "epoch": 0.45327004219409284, + "grad_norm": 0.3622736632823944, + "learning_rate": 0.0015, + "loss": 2.1558, + "step": 4297 + }, + { + "epoch": 0.45337552742616033, + "grad_norm": 0.38357409834861755, + "learning_rate": 0.0015, + "loss": 2.1505, + "step": 4298 + }, + { + "epoch": 0.4534810126582278, + "grad_norm": 0.4193418622016907, + "learning_rate": 0.0015, + "loss": 2.1347, + "step": 4299 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.5119828581809998, + "learning_rate": 0.0015, + "loss": 2.1699, + "step": 4300 + }, + { + "epoch": 0.45369198312236286, + "grad_norm": 0.4641883075237274, + "learning_rate": 0.0015, + "loss": 2.1798, + "step": 4301 + }, + { + "epoch": 0.45379746835443036, + "grad_norm": 0.37225380539894104, + "learning_rate": 0.0015, + "loss": 2.1613, + "step": 4302 + }, + { + "epoch": 0.4539029535864979, + "grad_norm": 0.42989224195480347, + "learning_rate": 0.0015, + "loss": 2.1637, + "step": 4303 + }, + { + "epoch": 0.4540084388185654, + "grad_norm": 0.4407418370246887, + "learning_rate": 0.0015, + "loss": 2.1379, + "step": 4304 + }, + { + "epoch": 0.4541139240506329, + "grad_norm": 0.4282253384590149, + "learning_rate": 0.0015, + "loss": 2.1102, + "step": 4305 + }, + { + "epoch": 0.45421940928270044, + "grad_norm": 0.3935062289237976, + "learning_rate": 0.0015, + "loss": 2.1639, + "step": 4306 + }, + { + "epoch": 0.45432489451476793, + "grad_norm": 0.4230659008026123, + "learning_rate": 0.0015, + "loss": 2.1555, + "step": 4307 + }, + { + "epoch": 0.4544303797468354, + "grad_norm": 0.4252847135066986, + "learning_rate": 0.0015, + "loss": 2.1452, + "step": 4308 + }, + { + "epoch": 0.45453586497890297, + "grad_norm": 0.4069507420063019, + "learning_rate": 0.0015, + "loss": 2.1659, + "step": 4309 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.4851328730583191, + "learning_rate": 0.0015, + "loss": 2.1193, + "step": 4310 + }, + { + "epoch": 0.45474683544303796, + "grad_norm": 0.4957869052886963, + "learning_rate": 0.0015, + "loss": 2.1495, + "step": 4311 + }, + { + "epoch": 0.4548523206751055, + "grad_norm": 0.36416611075401306, + "learning_rate": 0.0015, + "loss": 2.1416, + "step": 4312 + }, + { + "epoch": 0.454957805907173, + "grad_norm": 0.6154764890670776, + "learning_rate": 0.0015, + "loss": 2.1707, + "step": 4313 + }, + { + "epoch": 0.4550632911392405, + "grad_norm": 0.5722134113311768, + "learning_rate": 0.0015, + "loss": 2.1109, + "step": 4314 + }, + { + "epoch": 0.45516877637130804, + "grad_norm": 0.5274409055709839, + "learning_rate": 0.0015, + "loss": 2.1822, + "step": 4315 + }, + { + "epoch": 0.45527426160337553, + "grad_norm": 0.43689727783203125, + "learning_rate": 0.0015, + "loss": 2.1361, + "step": 4316 + }, + { + "epoch": 0.455379746835443, + "grad_norm": 0.4130375385284424, + "learning_rate": 0.0015, + "loss": 2.1735, + "step": 4317 + }, + { + "epoch": 0.45548523206751057, + "grad_norm": 0.4708172678947449, + "learning_rate": 0.0015, + "loss": 2.131, + "step": 4318 + }, + { + "epoch": 0.45559071729957806, + "grad_norm": 0.39309683442115784, + "learning_rate": 0.0015, + "loss": 2.1342, + "step": 4319 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.4296458959579468, + "learning_rate": 0.0015, + "loss": 2.2109, + "step": 4320 + }, + { + "epoch": 0.4558016877637131, + "grad_norm": 0.3842943608760834, + "learning_rate": 0.0015, + "loss": 2.1152, + "step": 4321 + }, + { + "epoch": 0.4559071729957806, + "grad_norm": 0.4752368927001953, + "learning_rate": 0.0015, + "loss": 2.1258, + "step": 4322 + }, + { + "epoch": 0.4560126582278481, + "grad_norm": 0.41520771384239197, + "learning_rate": 0.0015, + "loss": 2.1889, + "step": 4323 + }, + { + "epoch": 0.45611814345991564, + "grad_norm": 0.4782331883907318, + "learning_rate": 0.0015, + "loss": 2.153, + "step": 4324 + }, + { + "epoch": 0.45622362869198313, + "grad_norm": 0.5569310784339905, + "learning_rate": 0.0015, + "loss": 2.1758, + "step": 4325 + }, + { + "epoch": 0.4563291139240506, + "grad_norm": 0.47908321022987366, + "learning_rate": 0.0015, + "loss": 2.1245, + "step": 4326 + }, + { + "epoch": 0.45643459915611817, + "grad_norm": 0.41977474093437195, + "learning_rate": 0.0015, + "loss": 2.1827, + "step": 4327 + }, + { + "epoch": 0.45654008438818566, + "grad_norm": 0.4153732657432556, + "learning_rate": 0.0015, + "loss": 2.1472, + "step": 4328 + }, + { + "epoch": 0.45664556962025316, + "grad_norm": 0.562528133392334, + "learning_rate": 0.0015, + "loss": 2.1843, + "step": 4329 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.4445885717868805, + "learning_rate": 0.0015, + "loss": 2.1481, + "step": 4330 + }, + { + "epoch": 0.4568565400843882, + "grad_norm": 0.4065922498703003, + "learning_rate": 0.0015, + "loss": 2.1294, + "step": 4331 + }, + { + "epoch": 0.4569620253164557, + "grad_norm": 0.41303902864456177, + "learning_rate": 0.0015, + "loss": 2.1606, + "step": 4332 + }, + { + "epoch": 0.4570675105485232, + "grad_norm": 0.4090600311756134, + "learning_rate": 0.0015, + "loss": 2.1352, + "step": 4333 + }, + { + "epoch": 0.45717299578059073, + "grad_norm": 0.4251597821712494, + "learning_rate": 0.0015, + "loss": 2.1701, + "step": 4334 + }, + { + "epoch": 0.4572784810126582, + "grad_norm": 0.43833863735198975, + "learning_rate": 0.0015, + "loss": 2.1467, + "step": 4335 + }, + { + "epoch": 0.4573839662447257, + "grad_norm": 0.43155723810195923, + "learning_rate": 0.0015, + "loss": 2.1719, + "step": 4336 + }, + { + "epoch": 0.45748945147679326, + "grad_norm": 0.3618454933166504, + "learning_rate": 0.0015, + "loss": 2.1298, + "step": 4337 + }, + { + "epoch": 0.45759493670886076, + "grad_norm": 0.46540650725364685, + "learning_rate": 0.0015, + "loss": 2.1223, + "step": 4338 + }, + { + "epoch": 0.45770042194092825, + "grad_norm": 0.38219913840293884, + "learning_rate": 0.0015, + "loss": 2.1539, + "step": 4339 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.4057912230491638, + "learning_rate": 0.0015, + "loss": 2.1398, + "step": 4340 + }, + { + "epoch": 0.4579113924050633, + "grad_norm": 0.3864065408706665, + "learning_rate": 0.0015, + "loss": 2.1497, + "step": 4341 + }, + { + "epoch": 0.4580168776371308, + "grad_norm": 0.41860219836235046, + "learning_rate": 0.0015, + "loss": 2.1138, + "step": 4342 + }, + { + "epoch": 0.45812236286919833, + "grad_norm": 0.448764443397522, + "learning_rate": 0.0015, + "loss": 2.1254, + "step": 4343 + }, + { + "epoch": 0.4582278481012658, + "grad_norm": 0.42405983805656433, + "learning_rate": 0.0015, + "loss": 2.1617, + "step": 4344 + }, + { + "epoch": 0.4583333333333333, + "grad_norm": 0.3501245081424713, + "learning_rate": 0.0015, + "loss": 2.1491, + "step": 4345 + }, + { + "epoch": 0.45843881856540086, + "grad_norm": 0.44190043210983276, + "learning_rate": 0.0015, + "loss": 2.1822, + "step": 4346 + }, + { + "epoch": 0.45854430379746836, + "grad_norm": 0.42783239483833313, + "learning_rate": 0.0015, + "loss": 2.1384, + "step": 4347 + }, + { + "epoch": 0.45864978902953585, + "grad_norm": 0.39620667695999146, + "learning_rate": 0.0015, + "loss": 2.1514, + "step": 4348 + }, + { + "epoch": 0.4587552742616034, + "grad_norm": 0.34765273332595825, + "learning_rate": 0.0015, + "loss": 2.1714, + "step": 4349 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.37337571382522583, + "learning_rate": 0.0015, + "loss": 2.1449, + "step": 4350 + }, + { + "epoch": 0.4589662447257384, + "grad_norm": 0.4194610118865967, + "learning_rate": 0.0015, + "loss": 2.1826, + "step": 4351 + }, + { + "epoch": 0.45907172995780593, + "grad_norm": 0.40279343724250793, + "learning_rate": 0.0015, + "loss": 2.1952, + "step": 4352 + }, + { + "epoch": 0.4591772151898734, + "grad_norm": 0.3611183762550354, + "learning_rate": 0.0015, + "loss": 2.1525, + "step": 4353 + }, + { + "epoch": 0.4592827004219409, + "grad_norm": 0.4029195308685303, + "learning_rate": 0.0015, + "loss": 2.1608, + "step": 4354 + }, + { + "epoch": 0.45938818565400846, + "grad_norm": 0.3219645023345947, + "learning_rate": 0.0015, + "loss": 2.1472, + "step": 4355 + }, + { + "epoch": 0.45949367088607596, + "grad_norm": 0.4230107367038727, + "learning_rate": 0.0015, + "loss": 2.1354, + "step": 4356 + }, + { + "epoch": 0.45959915611814345, + "grad_norm": 0.40178802609443665, + "learning_rate": 0.0015, + "loss": 2.1673, + "step": 4357 + }, + { + "epoch": 0.459704641350211, + "grad_norm": 0.4051763117313385, + "learning_rate": 0.0015, + "loss": 2.1679, + "step": 4358 + }, + { + "epoch": 0.4598101265822785, + "grad_norm": 0.38236573338508606, + "learning_rate": 0.0015, + "loss": 2.1551, + "step": 4359 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.44868388772010803, + "learning_rate": 0.0015, + "loss": 2.1297, + "step": 4360 + }, + { + "epoch": 0.46002109704641353, + "grad_norm": 0.44350606203079224, + "learning_rate": 0.0015, + "loss": 2.1395, + "step": 4361 + }, + { + "epoch": 0.460126582278481, + "grad_norm": 0.43707889318466187, + "learning_rate": 0.0015, + "loss": 2.1345, + "step": 4362 + }, + { + "epoch": 0.4602320675105485, + "grad_norm": 0.407160222530365, + "learning_rate": 0.0015, + "loss": 2.1372, + "step": 4363 + }, + { + "epoch": 0.460337552742616, + "grad_norm": 0.5132961273193359, + "learning_rate": 0.0015, + "loss": 2.1639, + "step": 4364 + }, + { + "epoch": 0.46044303797468356, + "grad_norm": 0.4089624881744385, + "learning_rate": 0.0015, + "loss": 2.15, + "step": 4365 + }, + { + "epoch": 0.46054852320675105, + "grad_norm": 0.4111815094947815, + "learning_rate": 0.0015, + "loss": 2.125, + "step": 4366 + }, + { + "epoch": 0.46065400843881854, + "grad_norm": 0.47905924916267395, + "learning_rate": 0.0015, + "loss": 2.1465, + "step": 4367 + }, + { + "epoch": 0.4607594936708861, + "grad_norm": 0.40657544136047363, + "learning_rate": 0.0015, + "loss": 2.1293, + "step": 4368 + }, + { + "epoch": 0.4608649789029536, + "grad_norm": 0.4421742856502533, + "learning_rate": 0.0015, + "loss": 2.1407, + "step": 4369 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.4439545273780823, + "learning_rate": 0.0015, + "loss": 2.1482, + "step": 4370 + }, + { + "epoch": 0.4610759493670886, + "grad_norm": 0.47736266255378723, + "learning_rate": 0.0015, + "loss": 2.1096, + "step": 4371 + }, + { + "epoch": 0.4611814345991561, + "grad_norm": 0.3569861054420471, + "learning_rate": 0.0015, + "loss": 2.1491, + "step": 4372 + }, + { + "epoch": 0.4612869198312236, + "grad_norm": 0.44926756620407104, + "learning_rate": 0.0015, + "loss": 2.1156, + "step": 4373 + }, + { + "epoch": 0.46139240506329116, + "grad_norm": 0.4570119082927704, + "learning_rate": 0.0015, + "loss": 2.1546, + "step": 4374 + }, + { + "epoch": 0.46149789029535865, + "grad_norm": 0.3828253149986267, + "learning_rate": 0.0015, + "loss": 2.1432, + "step": 4375 + }, + { + "epoch": 0.46160337552742614, + "grad_norm": 0.41572731733322144, + "learning_rate": 0.0015, + "loss": 2.1515, + "step": 4376 + }, + { + "epoch": 0.4617088607594937, + "grad_norm": 0.47938022017478943, + "learning_rate": 0.0015, + "loss": 2.1322, + "step": 4377 + }, + { + "epoch": 0.4618143459915612, + "grad_norm": 0.370201975107193, + "learning_rate": 0.0015, + "loss": 2.1529, + "step": 4378 + }, + { + "epoch": 0.4619198312236287, + "grad_norm": 0.39518478512763977, + "learning_rate": 0.0015, + "loss": 2.1676, + "step": 4379 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.4063190519809723, + "learning_rate": 0.0015, + "loss": 2.1584, + "step": 4380 + }, + { + "epoch": 0.4621308016877637, + "grad_norm": 0.5086203217506409, + "learning_rate": 0.0015, + "loss": 2.1775, + "step": 4381 + }, + { + "epoch": 0.4622362869198312, + "grad_norm": 0.3895716369152069, + "learning_rate": 0.0015, + "loss": 2.127, + "step": 4382 + }, + { + "epoch": 0.46234177215189876, + "grad_norm": 0.45404258370399475, + "learning_rate": 0.0015, + "loss": 2.1257, + "step": 4383 + }, + { + "epoch": 0.46244725738396625, + "grad_norm": 0.41899293661117554, + "learning_rate": 0.0015, + "loss": 2.1263, + "step": 4384 + }, + { + "epoch": 0.46255274261603374, + "grad_norm": 0.45937690138816833, + "learning_rate": 0.0015, + "loss": 2.1536, + "step": 4385 + }, + { + "epoch": 0.4626582278481013, + "grad_norm": 0.5787555575370789, + "learning_rate": 0.0015, + "loss": 2.1555, + "step": 4386 + }, + { + "epoch": 0.4627637130801688, + "grad_norm": 0.45703932642936707, + "learning_rate": 0.0015, + "loss": 2.1469, + "step": 4387 + }, + { + "epoch": 0.4628691983122363, + "grad_norm": 0.4314914643764496, + "learning_rate": 0.0015, + "loss": 2.1411, + "step": 4388 + }, + { + "epoch": 0.4629746835443038, + "grad_norm": 0.5410627722740173, + "learning_rate": 0.0015, + "loss": 2.1495, + "step": 4389 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.5534672141075134, + "learning_rate": 0.0015, + "loss": 2.1313, + "step": 4390 + }, + { + "epoch": 0.4631856540084388, + "grad_norm": 0.42480623722076416, + "learning_rate": 0.0015, + "loss": 2.1615, + "step": 4391 + }, + { + "epoch": 0.46329113924050636, + "grad_norm": 0.4242737293243408, + "learning_rate": 0.0015, + "loss": 2.1131, + "step": 4392 + }, + { + "epoch": 0.46339662447257385, + "grad_norm": 0.430671751499176, + "learning_rate": 0.0015, + "loss": 2.1426, + "step": 4393 + }, + { + "epoch": 0.46350210970464134, + "grad_norm": 0.4358769357204437, + "learning_rate": 0.0015, + "loss": 2.1464, + "step": 4394 + }, + { + "epoch": 0.46360759493670883, + "grad_norm": 0.4397463798522949, + "learning_rate": 0.0015, + "loss": 2.1721, + "step": 4395 + }, + { + "epoch": 0.4637130801687764, + "grad_norm": 0.44792088866233826, + "learning_rate": 0.0015, + "loss": 2.1246, + "step": 4396 + }, + { + "epoch": 0.4638185654008439, + "grad_norm": 0.38144564628601074, + "learning_rate": 0.0015, + "loss": 2.1441, + "step": 4397 + }, + { + "epoch": 0.46392405063291137, + "grad_norm": 0.4652213752269745, + "learning_rate": 0.0015, + "loss": 2.1649, + "step": 4398 + }, + { + "epoch": 0.4640295358649789, + "grad_norm": 0.5228257179260254, + "learning_rate": 0.0015, + "loss": 2.1203, + "step": 4399 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.37017494440078735, + "learning_rate": 0.0015, + "loss": 2.1819, + "step": 4400 + }, + { + "epoch": 0.4642405063291139, + "grad_norm": 0.3955875337123871, + "learning_rate": 0.0015, + "loss": 2.1362, + "step": 4401 + }, + { + "epoch": 0.46434599156118145, + "grad_norm": 0.43338778614997864, + "learning_rate": 0.0015, + "loss": 2.1542, + "step": 4402 + }, + { + "epoch": 0.46445147679324894, + "grad_norm": 0.47363898158073425, + "learning_rate": 0.0015, + "loss": 2.1774, + "step": 4403 + }, + { + "epoch": 0.46455696202531643, + "grad_norm": 0.4334666430950165, + "learning_rate": 0.0015, + "loss": 2.1671, + "step": 4404 + }, + { + "epoch": 0.464662447257384, + "grad_norm": 0.3417091369628906, + "learning_rate": 0.0015, + "loss": 2.1372, + "step": 4405 + }, + { + "epoch": 0.4647679324894515, + "grad_norm": 0.37285029888153076, + "learning_rate": 0.0015, + "loss": 2.1603, + "step": 4406 + }, + { + "epoch": 0.46487341772151897, + "grad_norm": 0.3570164442062378, + "learning_rate": 0.0015, + "loss": 2.1454, + "step": 4407 + }, + { + "epoch": 0.4649789029535865, + "grad_norm": 0.3833616077899933, + "learning_rate": 0.0015, + "loss": 2.151, + "step": 4408 + }, + { + "epoch": 0.465084388185654, + "grad_norm": 0.42945703864097595, + "learning_rate": 0.0015, + "loss": 2.1434, + "step": 4409 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.45817941427230835, + "learning_rate": 0.0015, + "loss": 2.1461, + "step": 4410 + }, + { + "epoch": 0.46529535864978905, + "grad_norm": 0.40012818574905396, + "learning_rate": 0.0015, + "loss": 2.1149, + "step": 4411 + }, + { + "epoch": 0.46540084388185654, + "grad_norm": 0.4315342903137207, + "learning_rate": 0.0015, + "loss": 2.1731, + "step": 4412 + }, + { + "epoch": 0.46550632911392403, + "grad_norm": 0.44470319151878357, + "learning_rate": 0.0015, + "loss": 2.1193, + "step": 4413 + }, + { + "epoch": 0.4656118143459916, + "grad_norm": 0.4701484441757202, + "learning_rate": 0.0015, + "loss": 2.122, + "step": 4414 + }, + { + "epoch": 0.4657172995780591, + "grad_norm": 0.3588160574436188, + "learning_rate": 0.0015, + "loss": 2.1403, + "step": 4415 + }, + { + "epoch": 0.46582278481012657, + "grad_norm": 0.3676307797431946, + "learning_rate": 0.0015, + "loss": 2.1623, + "step": 4416 + }, + { + "epoch": 0.4659282700421941, + "grad_norm": 0.4195525646209717, + "learning_rate": 0.0015, + "loss": 2.1211, + "step": 4417 + }, + { + "epoch": 0.4660337552742616, + "grad_norm": 0.3841911554336548, + "learning_rate": 0.0015, + "loss": 2.152, + "step": 4418 + }, + { + "epoch": 0.4661392405063291, + "grad_norm": 0.400666743516922, + "learning_rate": 0.0015, + "loss": 2.1799, + "step": 4419 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.4350036084651947, + "learning_rate": 0.0015, + "loss": 2.1264, + "step": 4420 + }, + { + "epoch": 0.46635021097046414, + "grad_norm": 0.3816302716732025, + "learning_rate": 0.0015, + "loss": 2.1435, + "step": 4421 + }, + { + "epoch": 0.46645569620253163, + "grad_norm": 0.43761202692985535, + "learning_rate": 0.0015, + "loss": 2.1563, + "step": 4422 + }, + { + "epoch": 0.4665611814345992, + "grad_norm": 0.4921247065067291, + "learning_rate": 0.0015, + "loss": 2.1674, + "step": 4423 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 0.4246840476989746, + "learning_rate": 0.0015, + "loss": 2.1671, + "step": 4424 + }, + { + "epoch": 0.46677215189873417, + "grad_norm": 0.4151056408882141, + "learning_rate": 0.0015, + "loss": 2.1258, + "step": 4425 + }, + { + "epoch": 0.4668776371308017, + "grad_norm": 0.6184752583503723, + "learning_rate": 0.0015, + "loss": 2.1355, + "step": 4426 + }, + { + "epoch": 0.4669831223628692, + "grad_norm": 0.42498233914375305, + "learning_rate": 0.0015, + "loss": 2.1394, + "step": 4427 + }, + { + "epoch": 0.4670886075949367, + "grad_norm": 0.48781099915504456, + "learning_rate": 0.0015, + "loss": 2.1477, + "step": 4428 + }, + { + "epoch": 0.4671940928270042, + "grad_norm": 0.5822206735610962, + "learning_rate": 0.0015, + "loss": 2.1801, + "step": 4429 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.5129970908164978, + "learning_rate": 0.0015, + "loss": 2.1086, + "step": 4430 + }, + { + "epoch": 0.46740506329113923, + "grad_norm": 0.3537416458129883, + "learning_rate": 0.0015, + "loss": 2.1256, + "step": 4431 + }, + { + "epoch": 0.4675105485232067, + "grad_norm": 0.5207294225692749, + "learning_rate": 0.0015, + "loss": 2.1581, + "step": 4432 + }, + { + "epoch": 0.4676160337552743, + "grad_norm": 0.4164009690284729, + "learning_rate": 0.0015, + "loss": 2.1418, + "step": 4433 + }, + { + "epoch": 0.46772151898734177, + "grad_norm": 0.3845253586769104, + "learning_rate": 0.0015, + "loss": 2.1593, + "step": 4434 + }, + { + "epoch": 0.46782700421940926, + "grad_norm": 0.4191027283668518, + "learning_rate": 0.0015, + "loss": 2.1157, + "step": 4435 + }, + { + "epoch": 0.4679324894514768, + "grad_norm": 0.38029664754867554, + "learning_rate": 0.0015, + "loss": 2.1147, + "step": 4436 + }, + { + "epoch": 0.4680379746835443, + "grad_norm": 0.3993150591850281, + "learning_rate": 0.0015, + "loss": 2.1252, + "step": 4437 + }, + { + "epoch": 0.4681434599156118, + "grad_norm": 0.4099550247192383, + "learning_rate": 0.0015, + "loss": 2.1589, + "step": 4438 + }, + { + "epoch": 0.46824894514767934, + "grad_norm": 0.42703694105148315, + "learning_rate": 0.0015, + "loss": 2.1209, + "step": 4439 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.37458935379981995, + "learning_rate": 0.0015, + "loss": 2.1615, + "step": 4440 + }, + { + "epoch": 0.4684599156118143, + "grad_norm": 0.43191251158714294, + "learning_rate": 0.0015, + "loss": 2.1308, + "step": 4441 + }, + { + "epoch": 0.4685654008438819, + "grad_norm": 0.42292484641075134, + "learning_rate": 0.0015, + "loss": 2.1509, + "step": 4442 + }, + { + "epoch": 0.46867088607594937, + "grad_norm": 0.3894800841808319, + "learning_rate": 0.0015, + "loss": 2.1572, + "step": 4443 + }, + { + "epoch": 0.46877637130801686, + "grad_norm": 0.40542325377464294, + "learning_rate": 0.0015, + "loss": 2.1294, + "step": 4444 + }, + { + "epoch": 0.4688818565400844, + "grad_norm": 0.42681625485420227, + "learning_rate": 0.0015, + "loss": 2.1543, + "step": 4445 + }, + { + "epoch": 0.4689873417721519, + "grad_norm": 0.4088082015514374, + "learning_rate": 0.0015, + "loss": 2.1233, + "step": 4446 + }, + { + "epoch": 0.4690928270042194, + "grad_norm": 0.4060318171977997, + "learning_rate": 0.0015, + "loss": 2.1611, + "step": 4447 + }, + { + "epoch": 0.46919831223628694, + "grad_norm": 0.46903544664382935, + "learning_rate": 0.0015, + "loss": 2.1606, + "step": 4448 + }, + { + "epoch": 0.46930379746835443, + "grad_norm": 0.44681355357170105, + "learning_rate": 0.0015, + "loss": 2.1618, + "step": 4449 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.4293249547481537, + "learning_rate": 0.0015, + "loss": 2.1248, + "step": 4450 + }, + { + "epoch": 0.4695147679324895, + "grad_norm": 0.42330414056777954, + "learning_rate": 0.0015, + "loss": 2.1319, + "step": 4451 + }, + { + "epoch": 0.46962025316455697, + "grad_norm": 0.4294838309288025, + "learning_rate": 0.0015, + "loss": 2.1152, + "step": 4452 + }, + { + "epoch": 0.46972573839662446, + "grad_norm": 0.3676440119743347, + "learning_rate": 0.0015, + "loss": 2.1563, + "step": 4453 + }, + { + "epoch": 0.469831223628692, + "grad_norm": 0.45238786935806274, + "learning_rate": 0.0015, + "loss": 2.1083, + "step": 4454 + }, + { + "epoch": 0.4699367088607595, + "grad_norm": 0.4146382510662079, + "learning_rate": 0.0015, + "loss": 2.122, + "step": 4455 + }, + { + "epoch": 0.470042194092827, + "grad_norm": 0.5232771635055542, + "learning_rate": 0.0015, + "loss": 2.1189, + "step": 4456 + }, + { + "epoch": 0.47014767932489454, + "grad_norm": 0.4282921850681305, + "learning_rate": 0.0015, + "loss": 2.1233, + "step": 4457 + }, + { + "epoch": 0.47025316455696203, + "grad_norm": 0.43301934003829956, + "learning_rate": 0.0015, + "loss": 2.1132, + "step": 4458 + }, + { + "epoch": 0.4703586497890295, + "grad_norm": 0.521898090839386, + "learning_rate": 0.0015, + "loss": 2.1489, + "step": 4459 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.459868848323822, + "learning_rate": 0.0015, + "loss": 2.1716, + "step": 4460 + }, + { + "epoch": 0.47056962025316457, + "grad_norm": 0.3787025809288025, + "learning_rate": 0.0015, + "loss": 2.135, + "step": 4461 + }, + { + "epoch": 0.47067510548523206, + "grad_norm": 0.5067360401153564, + "learning_rate": 0.0015, + "loss": 2.1365, + "step": 4462 + }, + { + "epoch": 0.47078059071729955, + "grad_norm": 0.4492904841899872, + "learning_rate": 0.0015, + "loss": 2.123, + "step": 4463 + }, + { + "epoch": 0.4708860759493671, + "grad_norm": 0.3507622480392456, + "learning_rate": 0.0015, + "loss": 2.1316, + "step": 4464 + }, + { + "epoch": 0.4709915611814346, + "grad_norm": 0.45040178298950195, + "learning_rate": 0.0015, + "loss": 2.1354, + "step": 4465 + }, + { + "epoch": 0.4710970464135021, + "grad_norm": 0.42860016226768494, + "learning_rate": 0.0015, + "loss": 2.1495, + "step": 4466 + }, + { + "epoch": 0.47120253164556963, + "grad_norm": 0.40120846033096313, + "learning_rate": 0.0015, + "loss": 2.1268, + "step": 4467 + }, + { + "epoch": 0.4713080168776371, + "grad_norm": 0.39659884572029114, + "learning_rate": 0.0015, + "loss": 2.1368, + "step": 4468 + }, + { + "epoch": 0.4714135021097046, + "grad_norm": 0.45763394236564636, + "learning_rate": 0.0015, + "loss": 2.1659, + "step": 4469 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.36481595039367676, + "learning_rate": 0.0015, + "loss": 2.1583, + "step": 4470 + }, + { + "epoch": 0.47162447257383966, + "grad_norm": 0.45294299721717834, + "learning_rate": 0.0015, + "loss": 2.1209, + "step": 4471 + }, + { + "epoch": 0.47172995780590715, + "grad_norm": 0.4750426411628723, + "learning_rate": 0.0015, + "loss": 2.124, + "step": 4472 + }, + { + "epoch": 0.4718354430379747, + "grad_norm": 0.3778705894947052, + "learning_rate": 0.0015, + "loss": 2.1699, + "step": 4473 + }, + { + "epoch": 0.4719409282700422, + "grad_norm": 0.41777294874191284, + "learning_rate": 0.0015, + "loss": 2.1692, + "step": 4474 + }, + { + "epoch": 0.4720464135021097, + "grad_norm": 0.4060443341732025, + "learning_rate": 0.0015, + "loss": 2.1744, + "step": 4475 + }, + { + "epoch": 0.47215189873417723, + "grad_norm": 0.3798860013484955, + "learning_rate": 0.0015, + "loss": 2.1206, + "step": 4476 + }, + { + "epoch": 0.4722573839662447, + "grad_norm": 0.39848294854164124, + "learning_rate": 0.0015, + "loss": 2.1468, + "step": 4477 + }, + { + "epoch": 0.4723628691983122, + "grad_norm": 0.41429847478866577, + "learning_rate": 0.0015, + "loss": 2.1608, + "step": 4478 + }, + { + "epoch": 0.47246835443037977, + "grad_norm": 0.3674967288970947, + "learning_rate": 0.0015, + "loss": 2.1253, + "step": 4479 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.3888319134712219, + "learning_rate": 0.0015, + "loss": 2.1293, + "step": 4480 + }, + { + "epoch": 0.47267932489451475, + "grad_norm": 0.389058381319046, + "learning_rate": 0.0015, + "loss": 2.1071, + "step": 4481 + }, + { + "epoch": 0.4727848101265823, + "grad_norm": 0.3779825270175934, + "learning_rate": 0.0015, + "loss": 2.1198, + "step": 4482 + }, + { + "epoch": 0.4728902953586498, + "grad_norm": 0.4275137186050415, + "learning_rate": 0.0015, + "loss": 2.1121, + "step": 4483 + }, + { + "epoch": 0.4729957805907173, + "grad_norm": 0.37659335136413574, + "learning_rate": 0.0015, + "loss": 2.1347, + "step": 4484 + }, + { + "epoch": 0.47310126582278483, + "grad_norm": 0.3684637248516083, + "learning_rate": 0.0015, + "loss": 2.1265, + "step": 4485 + }, + { + "epoch": 0.4732067510548523, + "grad_norm": 0.4403951168060303, + "learning_rate": 0.0015, + "loss": 2.1276, + "step": 4486 + }, + { + "epoch": 0.4733122362869198, + "grad_norm": 0.3765217959880829, + "learning_rate": 0.0015, + "loss": 2.1073, + "step": 4487 + }, + { + "epoch": 0.47341772151898737, + "grad_norm": 0.37279248237609863, + "learning_rate": 0.0015, + "loss": 2.1486, + "step": 4488 + }, + { + "epoch": 0.47352320675105486, + "grad_norm": 0.4244574308395386, + "learning_rate": 0.0015, + "loss": 2.1272, + "step": 4489 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.521838366985321, + "learning_rate": 0.0015, + "loss": 2.1677, + "step": 4490 + }, + { + "epoch": 0.4737341772151899, + "grad_norm": 0.41214075684547424, + "learning_rate": 0.0015, + "loss": 2.1213, + "step": 4491 + }, + { + "epoch": 0.4738396624472574, + "grad_norm": 0.46184924244880676, + "learning_rate": 0.0015, + "loss": 2.1416, + "step": 4492 + }, + { + "epoch": 0.4739451476793249, + "grad_norm": 0.4662754237651825, + "learning_rate": 0.0015, + "loss": 2.0992, + "step": 4493 + }, + { + "epoch": 0.4740506329113924, + "grad_norm": 0.45586299896240234, + "learning_rate": 0.0015, + "loss": 2.114, + "step": 4494 + }, + { + "epoch": 0.4741561181434599, + "grad_norm": 0.4234987497329712, + "learning_rate": 0.0015, + "loss": 2.1684, + "step": 4495 + }, + { + "epoch": 0.4742616033755274, + "grad_norm": 0.4842904806137085, + "learning_rate": 0.0015, + "loss": 2.1476, + "step": 4496 + }, + { + "epoch": 0.4743670886075949, + "grad_norm": 0.43310460448265076, + "learning_rate": 0.0015, + "loss": 2.1257, + "step": 4497 + }, + { + "epoch": 0.47447257383966246, + "grad_norm": 0.47363394498825073, + "learning_rate": 0.0015, + "loss": 2.1329, + "step": 4498 + }, + { + "epoch": 0.47457805907172995, + "grad_norm": 0.4466949999332428, + "learning_rate": 0.0015, + "loss": 2.138, + "step": 4499 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.48626312613487244, + "learning_rate": 0.0015, + "loss": 2.1345, + "step": 4500 + }, + { + "epoch": 0.474789029535865, + "grad_norm": 0.3992825746536255, + "learning_rate": 0.0015, + "loss": 2.1475, + "step": 4501 + }, + { + "epoch": 0.4748945147679325, + "grad_norm": 0.420169860124588, + "learning_rate": 0.0015, + "loss": 2.1575, + "step": 4502 + }, + { + "epoch": 0.475, + "grad_norm": 0.4167858064174652, + "learning_rate": 0.0015, + "loss": 2.124, + "step": 4503 + }, + { + "epoch": 0.4751054852320675, + "grad_norm": 0.3821766972541809, + "learning_rate": 0.0015, + "loss": 2.132, + "step": 4504 + }, + { + "epoch": 0.475210970464135, + "grad_norm": 0.39751482009887695, + "learning_rate": 0.0015, + "loss": 2.1357, + "step": 4505 + }, + { + "epoch": 0.4753164556962025, + "grad_norm": 0.382872998714447, + "learning_rate": 0.0015, + "loss": 2.1134, + "step": 4506 + }, + { + "epoch": 0.47542194092827006, + "grad_norm": 0.4330842196941376, + "learning_rate": 0.0015, + "loss": 2.157, + "step": 4507 + }, + { + "epoch": 0.47552742616033755, + "grad_norm": 0.3957405388355255, + "learning_rate": 0.0015, + "loss": 2.1253, + "step": 4508 + }, + { + "epoch": 0.47563291139240504, + "grad_norm": 0.4118802845478058, + "learning_rate": 0.0015, + "loss": 2.1369, + "step": 4509 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.4010600447654724, + "learning_rate": 0.0015, + "loss": 2.0944, + "step": 4510 + }, + { + "epoch": 0.4758438818565401, + "grad_norm": 0.3591671288013458, + "learning_rate": 0.0015, + "loss": 2.1208, + "step": 4511 + }, + { + "epoch": 0.4759493670886076, + "grad_norm": 0.4428859353065491, + "learning_rate": 0.0015, + "loss": 2.1164, + "step": 4512 + }, + { + "epoch": 0.4760548523206751, + "grad_norm": 0.40479639172554016, + "learning_rate": 0.0015, + "loss": 2.1532, + "step": 4513 + }, + { + "epoch": 0.4761603375527426, + "grad_norm": 0.38171690702438354, + "learning_rate": 0.0015, + "loss": 2.1202, + "step": 4514 + }, + { + "epoch": 0.4762658227848101, + "grad_norm": 0.43925219774246216, + "learning_rate": 0.0015, + "loss": 2.1472, + "step": 4515 + }, + { + "epoch": 0.47637130801687766, + "grad_norm": 0.3399203419685364, + "learning_rate": 0.0015, + "loss": 2.1221, + "step": 4516 + }, + { + "epoch": 0.47647679324894515, + "grad_norm": 0.3962112069129944, + "learning_rate": 0.0015, + "loss": 2.1132, + "step": 4517 + }, + { + "epoch": 0.47658227848101264, + "grad_norm": 0.40336742997169495, + "learning_rate": 0.0015, + "loss": 2.0988, + "step": 4518 + }, + { + "epoch": 0.4766877637130802, + "grad_norm": 0.422545462846756, + "learning_rate": 0.0015, + "loss": 2.0832, + "step": 4519 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.45571500062942505, + "learning_rate": 0.0015, + "loss": 2.0998, + "step": 4520 + }, + { + "epoch": 0.4768987341772152, + "grad_norm": 0.3805270493030548, + "learning_rate": 0.0015, + "loss": 2.1334, + "step": 4521 + }, + { + "epoch": 0.4770042194092827, + "grad_norm": 0.3481237292289734, + "learning_rate": 0.0015, + "loss": 2.1494, + "step": 4522 + }, + { + "epoch": 0.4771097046413502, + "grad_norm": 0.4136608839035034, + "learning_rate": 0.0015, + "loss": 2.12, + "step": 4523 + }, + { + "epoch": 0.4772151898734177, + "grad_norm": 0.3403613865375519, + "learning_rate": 0.0015, + "loss": 2.1075, + "step": 4524 + }, + { + "epoch": 0.47732067510548526, + "grad_norm": 0.43155184388160706, + "learning_rate": 0.0015, + "loss": 2.1223, + "step": 4525 + }, + { + "epoch": 0.47742616033755275, + "grad_norm": 0.39738142490386963, + "learning_rate": 0.0015, + "loss": 2.1643, + "step": 4526 + }, + { + "epoch": 0.47753164556962024, + "grad_norm": 0.4443683922290802, + "learning_rate": 0.0015, + "loss": 2.1192, + "step": 4527 + }, + { + "epoch": 0.47763713080168774, + "grad_norm": 0.4055785834789276, + "learning_rate": 0.0015, + "loss": 2.1424, + "step": 4528 + }, + { + "epoch": 0.4777426160337553, + "grad_norm": 0.36083826422691345, + "learning_rate": 0.0015, + "loss": 2.119, + "step": 4529 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.5197532773017883, + "learning_rate": 0.0015, + "loss": 2.1299, + "step": 4530 + }, + { + "epoch": 0.47795358649789027, + "grad_norm": 0.4112592041492462, + "learning_rate": 0.0015, + "loss": 2.135, + "step": 4531 + }, + { + "epoch": 0.4780590717299578, + "grad_norm": 0.46575915813446045, + "learning_rate": 0.0015, + "loss": 2.1301, + "step": 4532 + }, + { + "epoch": 0.4781645569620253, + "grad_norm": 0.3637301027774811, + "learning_rate": 0.0015, + "loss": 2.1478, + "step": 4533 + }, + { + "epoch": 0.4782700421940928, + "grad_norm": 0.3983105719089508, + "learning_rate": 0.0015, + "loss": 2.1429, + "step": 4534 + }, + { + "epoch": 0.47837552742616035, + "grad_norm": 0.4245951771736145, + "learning_rate": 0.0015, + "loss": 2.1119, + "step": 4535 + }, + { + "epoch": 0.47848101265822784, + "grad_norm": 0.3980995714664459, + "learning_rate": 0.0015, + "loss": 2.1322, + "step": 4536 + }, + { + "epoch": 0.47858649789029534, + "grad_norm": 0.3734186589717865, + "learning_rate": 0.0015, + "loss": 2.1301, + "step": 4537 + }, + { + "epoch": 0.4786919831223629, + "grad_norm": 0.40586015582084656, + "learning_rate": 0.0015, + "loss": 2.1548, + "step": 4538 + }, + { + "epoch": 0.4787974683544304, + "grad_norm": 0.36586496233940125, + "learning_rate": 0.0015, + "loss": 2.1338, + "step": 4539 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.4750443398952484, + "learning_rate": 0.0015, + "loss": 2.1532, + "step": 4540 + }, + { + "epoch": 0.4790084388185654, + "grad_norm": 0.3998688757419586, + "learning_rate": 0.0015, + "loss": 2.1202, + "step": 4541 + }, + { + "epoch": 0.4791139240506329, + "grad_norm": 0.41658613085746765, + "learning_rate": 0.0015, + "loss": 2.1025, + "step": 4542 + }, + { + "epoch": 0.4792194092827004, + "grad_norm": 0.4608440399169922, + "learning_rate": 0.0015, + "loss": 2.1602, + "step": 4543 + }, + { + "epoch": 0.47932489451476795, + "grad_norm": 0.400722473859787, + "learning_rate": 0.0015, + "loss": 2.1464, + "step": 4544 + }, + { + "epoch": 0.47943037974683544, + "grad_norm": 0.44909730553627014, + "learning_rate": 0.0015, + "loss": 2.1421, + "step": 4545 + }, + { + "epoch": 0.47953586497890294, + "grad_norm": 0.4728160500526428, + "learning_rate": 0.0015, + "loss": 2.1353, + "step": 4546 + }, + { + "epoch": 0.4796413502109705, + "grad_norm": 0.3494606912136078, + "learning_rate": 0.0015, + "loss": 2.1423, + "step": 4547 + }, + { + "epoch": 0.479746835443038, + "grad_norm": 0.4962591826915741, + "learning_rate": 0.0015, + "loss": 2.1588, + "step": 4548 + }, + { + "epoch": 0.47985232067510547, + "grad_norm": 0.46616873145103455, + "learning_rate": 0.0015, + "loss": 2.1535, + "step": 4549 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.3610198497772217, + "learning_rate": 0.0015, + "loss": 2.0959, + "step": 4550 + }, + { + "epoch": 0.4800632911392405, + "grad_norm": 0.577853262424469, + "learning_rate": 0.0015, + "loss": 2.1161, + "step": 4551 + }, + { + "epoch": 0.480168776371308, + "grad_norm": 0.5892685651779175, + "learning_rate": 0.0015, + "loss": 2.1237, + "step": 4552 + }, + { + "epoch": 0.48027426160337555, + "grad_norm": 0.4363468885421753, + "learning_rate": 0.0015, + "loss": 2.1075, + "step": 4553 + }, + { + "epoch": 0.48037974683544304, + "grad_norm": 0.5506545305252075, + "learning_rate": 0.0015, + "loss": 2.1546, + "step": 4554 + }, + { + "epoch": 0.48048523206751054, + "grad_norm": 0.6101478338241577, + "learning_rate": 0.0015, + "loss": 2.1088, + "step": 4555 + }, + { + "epoch": 0.4805907172995781, + "grad_norm": 0.42150211334228516, + "learning_rate": 0.0015, + "loss": 2.1459, + "step": 4556 + }, + { + "epoch": 0.4806962025316456, + "grad_norm": 0.477932870388031, + "learning_rate": 0.0015, + "loss": 2.112, + "step": 4557 + }, + { + "epoch": 0.48080168776371307, + "grad_norm": 0.46473073959350586, + "learning_rate": 0.0015, + "loss": 2.1316, + "step": 4558 + }, + { + "epoch": 0.48090717299578056, + "grad_norm": 0.4322526454925537, + "learning_rate": 0.0015, + "loss": 2.1353, + "step": 4559 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.46533384919166565, + "learning_rate": 0.0015, + "loss": 2.122, + "step": 4560 + }, + { + "epoch": 0.4811181434599156, + "grad_norm": 0.5032854676246643, + "learning_rate": 0.0015, + "loss": 2.0899, + "step": 4561 + }, + { + "epoch": 0.4812236286919831, + "grad_norm": 0.3944512903690338, + "learning_rate": 0.0015, + "loss": 2.1055, + "step": 4562 + }, + { + "epoch": 0.48132911392405064, + "grad_norm": 0.36811283230781555, + "learning_rate": 0.0015, + "loss": 2.0994, + "step": 4563 + }, + { + "epoch": 0.48143459915611814, + "grad_norm": 0.4192855656147003, + "learning_rate": 0.0015, + "loss": 2.1311, + "step": 4564 + }, + { + "epoch": 0.48154008438818563, + "grad_norm": 0.40523719787597656, + "learning_rate": 0.0015, + "loss": 2.1, + "step": 4565 + }, + { + "epoch": 0.4816455696202532, + "grad_norm": 0.4005654752254486, + "learning_rate": 0.0015, + "loss": 2.1359, + "step": 4566 + }, + { + "epoch": 0.48175105485232067, + "grad_norm": 0.3645026385784149, + "learning_rate": 0.0015, + "loss": 2.0941, + "step": 4567 + }, + { + "epoch": 0.48185654008438816, + "grad_norm": 0.42036864161491394, + "learning_rate": 0.0015, + "loss": 2.1166, + "step": 4568 + }, + { + "epoch": 0.4819620253164557, + "grad_norm": 0.3725198805332184, + "learning_rate": 0.0015, + "loss": 2.0924, + "step": 4569 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.4842985272407532, + "learning_rate": 0.0015, + "loss": 2.1384, + "step": 4570 + }, + { + "epoch": 0.4821729957805907, + "grad_norm": 0.3979546129703522, + "learning_rate": 0.0015, + "loss": 2.1204, + "step": 4571 + }, + { + "epoch": 0.48227848101265824, + "grad_norm": 0.41386884450912476, + "learning_rate": 0.0015, + "loss": 2.0919, + "step": 4572 + }, + { + "epoch": 0.48238396624472574, + "grad_norm": 0.410055935382843, + "learning_rate": 0.0015, + "loss": 2.1039, + "step": 4573 + }, + { + "epoch": 0.48248945147679323, + "grad_norm": 0.4205067455768585, + "learning_rate": 0.0015, + "loss": 2.1798, + "step": 4574 + }, + { + "epoch": 0.4825949367088608, + "grad_norm": 0.4388815462589264, + "learning_rate": 0.0015, + "loss": 2.1024, + "step": 4575 + }, + { + "epoch": 0.48270042194092827, + "grad_norm": 0.5788388252258301, + "learning_rate": 0.0015, + "loss": 2.161, + "step": 4576 + }, + { + "epoch": 0.48280590717299576, + "grad_norm": 0.42059752345085144, + "learning_rate": 0.0015, + "loss": 2.0999, + "step": 4577 + }, + { + "epoch": 0.4829113924050633, + "grad_norm": 0.41431087255477905, + "learning_rate": 0.0015, + "loss": 2.1328, + "step": 4578 + }, + { + "epoch": 0.4830168776371308, + "grad_norm": 0.44733285903930664, + "learning_rate": 0.0015, + "loss": 2.1797, + "step": 4579 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.3802553117275238, + "learning_rate": 0.0015, + "loss": 2.1307, + "step": 4580 + }, + { + "epoch": 0.48322784810126584, + "grad_norm": 0.4459879994392395, + "learning_rate": 0.0015, + "loss": 2.066, + "step": 4581 + }, + { + "epoch": 0.48333333333333334, + "grad_norm": 0.4367404282093048, + "learning_rate": 0.0015, + "loss": 2.0926, + "step": 4582 + }, + { + "epoch": 0.48343881856540083, + "grad_norm": 0.37750527262687683, + "learning_rate": 0.0015, + "loss": 2.1056, + "step": 4583 + }, + { + "epoch": 0.4835443037974684, + "grad_norm": 0.3946988582611084, + "learning_rate": 0.0015, + "loss": 2.1311, + "step": 4584 + }, + { + "epoch": 0.48364978902953587, + "grad_norm": 0.38590413331985474, + "learning_rate": 0.0015, + "loss": 2.1405, + "step": 4585 + }, + { + "epoch": 0.48375527426160336, + "grad_norm": 0.3740186095237732, + "learning_rate": 0.0015, + "loss": 2.1261, + "step": 4586 + }, + { + "epoch": 0.4838607594936709, + "grad_norm": 0.3884057402610779, + "learning_rate": 0.0015, + "loss": 2.1251, + "step": 4587 + }, + { + "epoch": 0.4839662447257384, + "grad_norm": 0.41016292572021484, + "learning_rate": 0.0015, + "loss": 2.0606, + "step": 4588 + }, + { + "epoch": 0.4840717299578059, + "grad_norm": 0.4307028353214264, + "learning_rate": 0.0015, + "loss": 2.1168, + "step": 4589 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.4195629060268402, + "learning_rate": 0.0015, + "loss": 2.1102, + "step": 4590 + }, + { + "epoch": 0.48428270042194094, + "grad_norm": 0.4085480272769928, + "learning_rate": 0.0015, + "loss": 2.1264, + "step": 4591 + }, + { + "epoch": 0.48438818565400843, + "grad_norm": 0.4713435769081116, + "learning_rate": 0.0015, + "loss": 2.1067, + "step": 4592 + }, + { + "epoch": 0.4844936708860759, + "grad_norm": 0.44562846422195435, + "learning_rate": 0.0015, + "loss": 2.1148, + "step": 4593 + }, + { + "epoch": 0.48459915611814347, + "grad_norm": 0.418464332818985, + "learning_rate": 0.0015, + "loss": 2.0832, + "step": 4594 + }, + { + "epoch": 0.48470464135021096, + "grad_norm": 0.4620264172554016, + "learning_rate": 0.0015, + "loss": 2.1324, + "step": 4595 + }, + { + "epoch": 0.48481012658227846, + "grad_norm": 0.32329604029655457, + "learning_rate": 0.0015, + "loss": 2.1421, + "step": 4596 + }, + { + "epoch": 0.484915611814346, + "grad_norm": 0.4387017488479614, + "learning_rate": 0.0015, + "loss": 2.117, + "step": 4597 + }, + { + "epoch": 0.4850210970464135, + "grad_norm": 0.3887992799282074, + "learning_rate": 0.0015, + "loss": 2.1271, + "step": 4598 + }, + { + "epoch": 0.485126582278481, + "grad_norm": 0.3714919984340668, + "learning_rate": 0.0015, + "loss": 2.175, + "step": 4599 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.41034749150276184, + "learning_rate": 0.0015, + "loss": 2.1367, + "step": 4600 + }, + { + "epoch": 0.48533755274261603, + "grad_norm": 0.3998156189918518, + "learning_rate": 0.0015, + "loss": 2.1793, + "step": 4601 + }, + { + "epoch": 0.4854430379746835, + "grad_norm": 0.4124917984008789, + "learning_rate": 0.0015, + "loss": 2.1147, + "step": 4602 + }, + { + "epoch": 0.48554852320675107, + "grad_norm": 0.38656511902809143, + "learning_rate": 0.0015, + "loss": 2.078, + "step": 4603 + }, + { + "epoch": 0.48565400843881856, + "grad_norm": 0.4360000789165497, + "learning_rate": 0.0015, + "loss": 2.1056, + "step": 4604 + }, + { + "epoch": 0.48575949367088606, + "grad_norm": 0.40390515327453613, + "learning_rate": 0.0015, + "loss": 2.1522, + "step": 4605 + }, + { + "epoch": 0.4858649789029536, + "grad_norm": 0.39110586047172546, + "learning_rate": 0.0015, + "loss": 2.1664, + "step": 4606 + }, + { + "epoch": 0.4859704641350211, + "grad_norm": 0.4183284342288971, + "learning_rate": 0.0015, + "loss": 2.1126, + "step": 4607 + }, + { + "epoch": 0.4860759493670886, + "grad_norm": 0.41899940371513367, + "learning_rate": 0.0015, + "loss": 2.0999, + "step": 4608 + }, + { + "epoch": 0.48618143459915614, + "grad_norm": 0.49305444955825806, + "learning_rate": 0.0015, + "loss": 2.1266, + "step": 4609 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.43450069427490234, + "learning_rate": 0.0015, + "loss": 2.1458, + "step": 4610 + }, + { + "epoch": 0.4863924050632911, + "grad_norm": 0.45175012946128845, + "learning_rate": 0.0015, + "loss": 2.0816, + "step": 4611 + }, + { + "epoch": 0.48649789029535867, + "grad_norm": 0.5347709059715271, + "learning_rate": 0.0015, + "loss": 2.1214, + "step": 4612 + }, + { + "epoch": 0.48660337552742616, + "grad_norm": 0.4532971680164337, + "learning_rate": 0.0015, + "loss": 2.1021, + "step": 4613 + }, + { + "epoch": 0.48670886075949366, + "grad_norm": 0.43240126967430115, + "learning_rate": 0.0015, + "loss": 2.1057, + "step": 4614 + }, + { + "epoch": 0.4868143459915612, + "grad_norm": 0.5196889638900757, + "learning_rate": 0.0015, + "loss": 2.1393, + "step": 4615 + }, + { + "epoch": 0.4869198312236287, + "grad_norm": 0.3776147961616516, + "learning_rate": 0.0015, + "loss": 2.1345, + "step": 4616 + }, + { + "epoch": 0.4870253164556962, + "grad_norm": 0.3997880220413208, + "learning_rate": 0.0015, + "loss": 2.1439, + "step": 4617 + }, + { + "epoch": 0.48713080168776374, + "grad_norm": 0.48128199577331543, + "learning_rate": 0.0015, + "loss": 2.0837, + "step": 4618 + }, + { + "epoch": 0.48723628691983123, + "grad_norm": 0.34499576687812805, + "learning_rate": 0.0015, + "loss": 2.0526, + "step": 4619 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.3909364938735962, + "learning_rate": 0.0015, + "loss": 2.0998, + "step": 4620 + }, + { + "epoch": 0.48744725738396627, + "grad_norm": 0.41276153922080994, + "learning_rate": 0.0015, + "loss": 2.1376, + "step": 4621 + }, + { + "epoch": 0.48755274261603376, + "grad_norm": 0.3936212956905365, + "learning_rate": 0.0015, + "loss": 2.0867, + "step": 4622 + }, + { + "epoch": 0.48765822784810126, + "grad_norm": 0.41149187088012695, + "learning_rate": 0.0015, + "loss": 2.1016, + "step": 4623 + }, + { + "epoch": 0.4877637130801688, + "grad_norm": 0.38356494903564453, + "learning_rate": 0.0015, + "loss": 2.1241, + "step": 4624 + }, + { + "epoch": 0.4878691983122363, + "grad_norm": 0.3616800308227539, + "learning_rate": 0.0015, + "loss": 2.1097, + "step": 4625 + }, + { + "epoch": 0.4879746835443038, + "grad_norm": 0.3539635241031647, + "learning_rate": 0.0015, + "loss": 2.1548, + "step": 4626 + }, + { + "epoch": 0.4880801687763713, + "grad_norm": 0.3414154350757599, + "learning_rate": 0.0015, + "loss": 2.1417, + "step": 4627 + }, + { + "epoch": 0.48818565400843883, + "grad_norm": 0.39286351203918457, + "learning_rate": 0.0015, + "loss": 2.1106, + "step": 4628 + }, + { + "epoch": 0.4882911392405063, + "grad_norm": 0.33329659700393677, + "learning_rate": 0.0015, + "loss": 2.0994, + "step": 4629 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.4011025130748749, + "learning_rate": 0.0015, + "loss": 2.162, + "step": 4630 + }, + { + "epoch": 0.48850210970464136, + "grad_norm": 0.43274739384651184, + "learning_rate": 0.0015, + "loss": 2.1251, + "step": 4631 + }, + { + "epoch": 0.48860759493670886, + "grad_norm": 0.4052983522415161, + "learning_rate": 0.0015, + "loss": 2.1088, + "step": 4632 + }, + { + "epoch": 0.48871308016877635, + "grad_norm": 0.42022714018821716, + "learning_rate": 0.0015, + "loss": 2.1204, + "step": 4633 + }, + { + "epoch": 0.4888185654008439, + "grad_norm": 0.41749608516693115, + "learning_rate": 0.0015, + "loss": 2.1225, + "step": 4634 + }, + { + "epoch": 0.4889240506329114, + "grad_norm": 0.3990372121334076, + "learning_rate": 0.0015, + "loss": 2.112, + "step": 4635 + }, + { + "epoch": 0.4890295358649789, + "grad_norm": 0.4693222641944885, + "learning_rate": 0.0015, + "loss": 2.134, + "step": 4636 + }, + { + "epoch": 0.48913502109704643, + "grad_norm": 0.5061273574829102, + "learning_rate": 0.0015, + "loss": 2.1273, + "step": 4637 + }, + { + "epoch": 0.4892405063291139, + "grad_norm": 0.35104021430015564, + "learning_rate": 0.0015, + "loss": 2.1121, + "step": 4638 + }, + { + "epoch": 0.4893459915611814, + "grad_norm": 0.413353830575943, + "learning_rate": 0.0015, + "loss": 2.15, + "step": 4639 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.38155487179756165, + "learning_rate": 0.0015, + "loss": 2.1494, + "step": 4640 + }, + { + "epoch": 0.48955696202531646, + "grad_norm": 0.3969692587852478, + "learning_rate": 0.0015, + "loss": 2.1478, + "step": 4641 + }, + { + "epoch": 0.48966244725738395, + "grad_norm": 0.4515286386013031, + "learning_rate": 0.0015, + "loss": 2.1109, + "step": 4642 + }, + { + "epoch": 0.4897679324894515, + "grad_norm": 0.3918905258178711, + "learning_rate": 0.0015, + "loss": 2.1223, + "step": 4643 + }, + { + "epoch": 0.489873417721519, + "grad_norm": 0.3949102461338043, + "learning_rate": 0.0015, + "loss": 2.1284, + "step": 4644 + }, + { + "epoch": 0.4899789029535865, + "grad_norm": 0.422773540019989, + "learning_rate": 0.0015, + "loss": 2.145, + "step": 4645 + }, + { + "epoch": 0.49008438818565403, + "grad_norm": 0.3786967694759369, + "learning_rate": 0.0015, + "loss": 2.112, + "step": 4646 + }, + { + "epoch": 0.4901898734177215, + "grad_norm": 0.418410062789917, + "learning_rate": 0.0015, + "loss": 2.1339, + "step": 4647 + }, + { + "epoch": 0.490295358649789, + "grad_norm": 0.35142844915390015, + "learning_rate": 0.0015, + "loss": 2.1318, + "step": 4648 + }, + { + "epoch": 0.49040084388185656, + "grad_norm": 0.4025801122188568, + "learning_rate": 0.0015, + "loss": 2.0993, + "step": 4649 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.42356109619140625, + "learning_rate": 0.0015, + "loss": 2.1213, + "step": 4650 + }, + { + "epoch": 0.49061181434599155, + "grad_norm": 0.47228875756263733, + "learning_rate": 0.0015, + "loss": 2.1247, + "step": 4651 + }, + { + "epoch": 0.4907172995780591, + "grad_norm": 0.45975130796432495, + "learning_rate": 0.0015, + "loss": 2.1172, + "step": 4652 + }, + { + "epoch": 0.4908227848101266, + "grad_norm": 0.3723204433917999, + "learning_rate": 0.0015, + "loss": 2.1043, + "step": 4653 + }, + { + "epoch": 0.4909282700421941, + "grad_norm": 0.44053035974502563, + "learning_rate": 0.0015, + "loss": 2.138, + "step": 4654 + }, + { + "epoch": 0.49103375527426163, + "grad_norm": 0.4220074415206909, + "learning_rate": 0.0015, + "loss": 2.1078, + "step": 4655 + }, + { + "epoch": 0.4911392405063291, + "grad_norm": 0.3875063359737396, + "learning_rate": 0.0015, + "loss": 2.0922, + "step": 4656 + }, + { + "epoch": 0.4912447257383966, + "grad_norm": 0.3962198495864868, + "learning_rate": 0.0015, + "loss": 2.1163, + "step": 4657 + }, + { + "epoch": 0.4913502109704641, + "grad_norm": 0.3686353862285614, + "learning_rate": 0.0015, + "loss": 2.1197, + "step": 4658 + }, + { + "epoch": 0.49145569620253166, + "grad_norm": 0.37661898136138916, + "learning_rate": 0.0015, + "loss": 2.1094, + "step": 4659 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.41759610176086426, + "learning_rate": 0.0015, + "loss": 2.134, + "step": 4660 + }, + { + "epoch": 0.49166666666666664, + "grad_norm": 0.3597700595855713, + "learning_rate": 0.0015, + "loss": 2.1185, + "step": 4661 + }, + { + "epoch": 0.4917721518987342, + "grad_norm": 0.4064253866672516, + "learning_rate": 0.0015, + "loss": 2.0929, + "step": 4662 + }, + { + "epoch": 0.4918776371308017, + "grad_norm": 0.4097134470939636, + "learning_rate": 0.0015, + "loss": 2.0839, + "step": 4663 + }, + { + "epoch": 0.4919831223628692, + "grad_norm": 0.4017532467842102, + "learning_rate": 0.0015, + "loss": 2.1151, + "step": 4664 + }, + { + "epoch": 0.4920886075949367, + "grad_norm": 0.37544170022010803, + "learning_rate": 0.0015, + "loss": 2.1103, + "step": 4665 + }, + { + "epoch": 0.4921940928270042, + "grad_norm": 0.43717071413993835, + "learning_rate": 0.0015, + "loss": 2.1107, + "step": 4666 + }, + { + "epoch": 0.4922995780590717, + "grad_norm": 0.3414668142795563, + "learning_rate": 0.0015, + "loss": 2.1288, + "step": 4667 + }, + { + "epoch": 0.49240506329113926, + "grad_norm": 0.42861708998680115, + "learning_rate": 0.0015, + "loss": 2.1146, + "step": 4668 + }, + { + "epoch": 0.49251054852320675, + "grad_norm": 0.43411773443222046, + "learning_rate": 0.0015, + "loss": 2.1166, + "step": 4669 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.3561021387577057, + "learning_rate": 0.0015, + "loss": 2.1204, + "step": 4670 + }, + { + "epoch": 0.4927215189873418, + "grad_norm": 0.4697127938270569, + "learning_rate": 0.0015, + "loss": 2.1256, + "step": 4671 + }, + { + "epoch": 0.4928270042194093, + "grad_norm": 0.4485023319721222, + "learning_rate": 0.0015, + "loss": 2.1048, + "step": 4672 + }, + { + "epoch": 0.4929324894514768, + "grad_norm": 0.3609883189201355, + "learning_rate": 0.0015, + "loss": 2.1464, + "step": 4673 + }, + { + "epoch": 0.4930379746835443, + "grad_norm": 0.4128670394420624, + "learning_rate": 0.0015, + "loss": 2.1482, + "step": 4674 + }, + { + "epoch": 0.4931434599156118, + "grad_norm": 0.43786245584487915, + "learning_rate": 0.0015, + "loss": 2.0925, + "step": 4675 + }, + { + "epoch": 0.4932489451476793, + "grad_norm": 0.3756186366081238, + "learning_rate": 0.0015, + "loss": 2.0853, + "step": 4676 + }, + { + "epoch": 0.49335443037974686, + "grad_norm": 0.3746248483657837, + "learning_rate": 0.0015, + "loss": 2.0972, + "step": 4677 + }, + { + "epoch": 0.49345991561181435, + "grad_norm": 0.4354593753814697, + "learning_rate": 0.0015, + "loss": 2.0935, + "step": 4678 + }, + { + "epoch": 0.49356540084388184, + "grad_norm": 0.401054710149765, + "learning_rate": 0.0015, + "loss": 2.1387, + "step": 4679 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.4585861563682556, + "learning_rate": 0.0015, + "loss": 2.0806, + "step": 4680 + }, + { + "epoch": 0.4937763713080169, + "grad_norm": 0.4175602197647095, + "learning_rate": 0.0015, + "loss": 2.1226, + "step": 4681 + }, + { + "epoch": 0.4938818565400844, + "grad_norm": 0.41115841269493103, + "learning_rate": 0.0015, + "loss": 2.0867, + "step": 4682 + }, + { + "epoch": 0.4939873417721519, + "grad_norm": 0.4601125419139862, + "learning_rate": 0.0015, + "loss": 2.1119, + "step": 4683 + }, + { + "epoch": 0.4940928270042194, + "grad_norm": 0.39295217394828796, + "learning_rate": 0.0015, + "loss": 2.0933, + "step": 4684 + }, + { + "epoch": 0.4941983122362869, + "grad_norm": 0.3968357443809509, + "learning_rate": 0.0015, + "loss": 2.066, + "step": 4685 + }, + { + "epoch": 0.49430379746835446, + "grad_norm": 0.47142156958580017, + "learning_rate": 0.0015, + "loss": 2.0937, + "step": 4686 + }, + { + "epoch": 0.49440928270042195, + "grad_norm": 0.39157024025917053, + "learning_rate": 0.0015, + "loss": 2.1337, + "step": 4687 + }, + { + "epoch": 0.49451476793248944, + "grad_norm": 0.37167662382125854, + "learning_rate": 0.0015, + "loss": 2.0911, + "step": 4688 + }, + { + "epoch": 0.494620253164557, + "grad_norm": 0.3778229057788849, + "learning_rate": 0.0015, + "loss": 2.0807, + "step": 4689 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.42426180839538574, + "learning_rate": 0.0015, + "loss": 2.1427, + "step": 4690 + }, + { + "epoch": 0.494831223628692, + "grad_norm": 0.3843879699707031, + "learning_rate": 0.0015, + "loss": 2.1274, + "step": 4691 + }, + { + "epoch": 0.49493670886075947, + "grad_norm": 0.42804718017578125, + "learning_rate": 0.0015, + "loss": 2.1569, + "step": 4692 + }, + { + "epoch": 0.495042194092827, + "grad_norm": 0.48774853348731995, + "learning_rate": 0.0015, + "loss": 2.1202, + "step": 4693 + }, + { + "epoch": 0.4951476793248945, + "grad_norm": 0.3821241557598114, + "learning_rate": 0.0015, + "loss": 2.1246, + "step": 4694 + }, + { + "epoch": 0.495253164556962, + "grad_norm": 0.36568784713745117, + "learning_rate": 0.0015, + "loss": 2.0998, + "step": 4695 + }, + { + "epoch": 0.49535864978902955, + "grad_norm": 0.46596869826316833, + "learning_rate": 0.0015, + "loss": 2.1357, + "step": 4696 + }, + { + "epoch": 0.49546413502109704, + "grad_norm": 0.4583687484264374, + "learning_rate": 0.0015, + "loss": 2.1038, + "step": 4697 + }, + { + "epoch": 0.49556962025316453, + "grad_norm": 0.3796868324279785, + "learning_rate": 0.0015, + "loss": 2.0837, + "step": 4698 + }, + { + "epoch": 0.4956751054852321, + "grad_norm": 0.4734113812446594, + "learning_rate": 0.0015, + "loss": 2.1128, + "step": 4699 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.4604989290237427, + "learning_rate": 0.0015, + "loss": 2.1069, + "step": 4700 + }, + { + "epoch": 0.49588607594936707, + "grad_norm": 0.4277646541595459, + "learning_rate": 0.0015, + "loss": 2.1015, + "step": 4701 + }, + { + "epoch": 0.4959915611814346, + "grad_norm": 0.33987802267074585, + "learning_rate": 0.0015, + "loss": 2.1356, + "step": 4702 + }, + { + "epoch": 0.4960970464135021, + "grad_norm": 0.4079280495643616, + "learning_rate": 0.0015, + "loss": 2.0874, + "step": 4703 + }, + { + "epoch": 0.4962025316455696, + "grad_norm": 0.3514738380908966, + "learning_rate": 0.0015, + "loss": 2.1149, + "step": 4704 + }, + { + "epoch": 0.49630801687763715, + "grad_norm": 0.4014057219028473, + "learning_rate": 0.0015, + "loss": 2.0959, + "step": 4705 + }, + { + "epoch": 0.49641350210970464, + "grad_norm": 0.42156916856765747, + "learning_rate": 0.0015, + "loss": 2.098, + "step": 4706 + }, + { + "epoch": 0.49651898734177213, + "grad_norm": 0.39309918880462646, + "learning_rate": 0.0015, + "loss": 2.1282, + "step": 4707 + }, + { + "epoch": 0.4966244725738397, + "grad_norm": 0.3725399076938629, + "learning_rate": 0.0015, + "loss": 2.1394, + "step": 4708 + }, + { + "epoch": 0.4967299578059072, + "grad_norm": 0.43021610379219055, + "learning_rate": 0.0015, + "loss": 2.1053, + "step": 4709 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.3914791941642761, + "learning_rate": 0.0015, + "loss": 2.1193, + "step": 4710 + }, + { + "epoch": 0.4969409282700422, + "grad_norm": 0.4027750790119171, + "learning_rate": 0.0015, + "loss": 2.1162, + "step": 4711 + }, + { + "epoch": 0.4970464135021097, + "grad_norm": 0.3999054431915283, + "learning_rate": 0.0015, + "loss": 2.1018, + "step": 4712 + }, + { + "epoch": 0.4971518987341772, + "grad_norm": 0.404785692691803, + "learning_rate": 0.0015, + "loss": 2.0883, + "step": 4713 + }, + { + "epoch": 0.49725738396624475, + "grad_norm": 0.41521987318992615, + "learning_rate": 0.0015, + "loss": 2.1405, + "step": 4714 + }, + { + "epoch": 0.49736286919831224, + "grad_norm": 0.3595982491970062, + "learning_rate": 0.0015, + "loss": 2.1084, + "step": 4715 + }, + { + "epoch": 0.49746835443037973, + "grad_norm": 0.47044986486434937, + "learning_rate": 0.0015, + "loss": 2.0982, + "step": 4716 + }, + { + "epoch": 0.4975738396624473, + "grad_norm": 0.3935399651527405, + "learning_rate": 0.0015, + "loss": 2.0945, + "step": 4717 + }, + { + "epoch": 0.4976793248945148, + "grad_norm": 0.4241366684436798, + "learning_rate": 0.0015, + "loss": 2.1637, + "step": 4718 + }, + { + "epoch": 0.49778481012658227, + "grad_norm": 0.5032041072845459, + "learning_rate": 0.0015, + "loss": 2.1221, + "step": 4719 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.4503293037414551, + "learning_rate": 0.0015, + "loss": 2.0957, + "step": 4720 + }, + { + "epoch": 0.4979957805907173, + "grad_norm": 0.3983801603317261, + "learning_rate": 0.0015, + "loss": 2.1231, + "step": 4721 + }, + { + "epoch": 0.4981012658227848, + "grad_norm": 0.39245718717575073, + "learning_rate": 0.0015, + "loss": 2.1096, + "step": 4722 + }, + { + "epoch": 0.49820675105485235, + "grad_norm": 0.4893776774406433, + "learning_rate": 0.0015, + "loss": 2.1235, + "step": 4723 + }, + { + "epoch": 0.49831223628691984, + "grad_norm": 0.3669581413269043, + "learning_rate": 0.0015, + "loss": 2.121, + "step": 4724 + }, + { + "epoch": 0.49841772151898733, + "grad_norm": 0.43021002411842346, + "learning_rate": 0.0015, + "loss": 2.1015, + "step": 4725 + }, + { + "epoch": 0.4985232067510548, + "grad_norm": 0.42949435114860535, + "learning_rate": 0.0015, + "loss": 2.119, + "step": 4726 + }, + { + "epoch": 0.4986286919831224, + "grad_norm": 0.40685656666755676, + "learning_rate": 0.0015, + "loss": 2.085, + "step": 4727 + }, + { + "epoch": 0.49873417721518987, + "grad_norm": 0.5373295545578003, + "learning_rate": 0.0015, + "loss": 2.1308, + "step": 4728 + }, + { + "epoch": 0.49883966244725736, + "grad_norm": 0.5754332542419434, + "learning_rate": 0.0015, + "loss": 2.0957, + "step": 4729 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.3756369948387146, + "learning_rate": 0.0015, + "loss": 2.0945, + "step": 4730 + }, + { + "epoch": 0.4990506329113924, + "grad_norm": 0.5273646116256714, + "learning_rate": 0.0015, + "loss": 2.1344, + "step": 4731 + }, + { + "epoch": 0.4991561181434599, + "grad_norm": 0.5503566861152649, + "learning_rate": 0.0015, + "loss": 2.1022, + "step": 4732 + }, + { + "epoch": 0.49926160337552744, + "grad_norm": 0.39387235045433044, + "learning_rate": 0.0015, + "loss": 2.1217, + "step": 4733 + }, + { + "epoch": 0.49936708860759493, + "grad_norm": 0.5460361242294312, + "learning_rate": 0.0015, + "loss": 2.1187, + "step": 4734 + }, + { + "epoch": 0.4994725738396624, + "grad_norm": 0.5333127379417419, + "learning_rate": 0.0015, + "loss": 2.146, + "step": 4735 + }, + { + "epoch": 0.49957805907173, + "grad_norm": 0.4165312349796295, + "learning_rate": 0.0015, + "loss": 2.11, + "step": 4736 + }, + { + "epoch": 0.49968354430379747, + "grad_norm": 0.5701345801353455, + "learning_rate": 0.0015, + "loss": 2.0931, + "step": 4737 + }, + { + "epoch": 0.49978902953586496, + "grad_norm": 0.4424220621585846, + "learning_rate": 0.0015, + "loss": 2.0692, + "step": 4738 + }, + { + "epoch": 0.4998945147679325, + "grad_norm": 0.49343594908714294, + "learning_rate": 0.0015, + "loss": 2.092, + "step": 4739 + }, + { + "epoch": 0.5, + "grad_norm": 0.42762991786003113, + "learning_rate": 0.0015, + "loss": 2.0965, + "step": 4740 + }, + { + "epoch": 0.5001054852320675, + "grad_norm": 0.4780443608760834, + "learning_rate": 0.0015, + "loss": 2.1375, + "step": 4741 + }, + { + "epoch": 0.500210970464135, + "grad_norm": 0.4702475070953369, + "learning_rate": 0.0015, + "loss": 2.1048, + "step": 4742 + }, + { + "epoch": 0.5003164556962025, + "grad_norm": 0.39582669734954834, + "learning_rate": 0.0015, + "loss": 2.0868, + "step": 4743 + }, + { + "epoch": 0.5004219409282701, + "grad_norm": 0.45954346656799316, + "learning_rate": 0.0015, + "loss": 2.1477, + "step": 4744 + }, + { + "epoch": 0.5005274261603375, + "grad_norm": 0.39784133434295654, + "learning_rate": 0.0015, + "loss": 2.1015, + "step": 4745 + }, + { + "epoch": 0.5006329113924051, + "grad_norm": 0.3633357882499695, + "learning_rate": 0.0015, + "loss": 2.117, + "step": 4746 + }, + { + "epoch": 0.5007383966244726, + "grad_norm": 0.37878596782684326, + "learning_rate": 0.0015, + "loss": 2.1169, + "step": 4747 + }, + { + "epoch": 0.50084388185654, + "grad_norm": 0.3702596127986908, + "learning_rate": 0.0015, + "loss": 2.148, + "step": 4748 + }, + { + "epoch": 0.5009493670886076, + "grad_norm": 0.41675499081611633, + "learning_rate": 0.0015, + "loss": 2.1494, + "step": 4749 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.41092628240585327, + "learning_rate": 0.0015, + "loss": 2.1329, + "step": 4750 + }, + { + "epoch": 0.5011603375527426, + "grad_norm": 0.37569400668144226, + "learning_rate": 0.0015, + "loss": 2.1273, + "step": 4751 + }, + { + "epoch": 0.5012658227848101, + "grad_norm": 0.4422067701816559, + "learning_rate": 0.0015, + "loss": 2.0952, + "step": 4752 + }, + { + "epoch": 0.5013713080168777, + "grad_norm": 0.43429428339004517, + "learning_rate": 0.0015, + "loss": 2.1202, + "step": 4753 + }, + { + "epoch": 0.5014767932489451, + "grad_norm": 0.3620462417602539, + "learning_rate": 0.0015, + "loss": 2.1084, + "step": 4754 + }, + { + "epoch": 0.5015822784810127, + "grad_norm": 0.37459614872932434, + "learning_rate": 0.0015, + "loss": 2.1014, + "step": 4755 + }, + { + "epoch": 0.5016877637130802, + "grad_norm": 0.40684306621551514, + "learning_rate": 0.0015, + "loss": 2.088, + "step": 4756 + }, + { + "epoch": 0.5017932489451477, + "grad_norm": 0.4428795874118805, + "learning_rate": 0.0015, + "loss": 2.1179, + "step": 4757 + }, + { + "epoch": 0.5018987341772152, + "grad_norm": 0.43519213795661926, + "learning_rate": 0.0015, + "loss": 2.089, + "step": 4758 + }, + { + "epoch": 0.5020042194092827, + "grad_norm": 0.41313791275024414, + "learning_rate": 0.0015, + "loss": 2.0955, + "step": 4759 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.4571726620197296, + "learning_rate": 0.0015, + "loss": 2.1225, + "step": 4760 + }, + { + "epoch": 0.5022151898734177, + "grad_norm": 0.3887038230895996, + "learning_rate": 0.0015, + "loss": 2.084, + "step": 4761 + }, + { + "epoch": 0.5023206751054853, + "grad_norm": 0.40406227111816406, + "learning_rate": 0.0015, + "loss": 2.1323, + "step": 4762 + }, + { + "epoch": 0.5024261603375527, + "grad_norm": 0.4124753475189209, + "learning_rate": 0.0015, + "loss": 2.0948, + "step": 4763 + }, + { + "epoch": 0.5025316455696203, + "grad_norm": 0.37521177530288696, + "learning_rate": 0.0015, + "loss": 2.112, + "step": 4764 + }, + { + "epoch": 0.5026371308016878, + "grad_norm": 0.4362151622772217, + "learning_rate": 0.0015, + "loss": 2.0808, + "step": 4765 + }, + { + "epoch": 0.5027426160337553, + "grad_norm": 0.42300713062286377, + "learning_rate": 0.0015, + "loss": 2.0781, + "step": 4766 + }, + { + "epoch": 0.5028481012658228, + "grad_norm": 0.371273010969162, + "learning_rate": 0.0015, + "loss": 2.1141, + "step": 4767 + }, + { + "epoch": 0.5029535864978903, + "grad_norm": 0.4629524350166321, + "learning_rate": 0.0015, + "loss": 2.0886, + "step": 4768 + }, + { + "epoch": 0.5030590717299578, + "grad_norm": 0.5317617058753967, + "learning_rate": 0.0015, + "loss": 2.1217, + "step": 4769 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.397170752286911, + "learning_rate": 0.0015, + "loss": 2.1141, + "step": 4770 + }, + { + "epoch": 0.5032700421940929, + "grad_norm": 0.4298158586025238, + "learning_rate": 0.0015, + "loss": 2.1125, + "step": 4771 + }, + { + "epoch": 0.5033755274261603, + "grad_norm": 0.533865213394165, + "learning_rate": 0.0015, + "loss": 2.1585, + "step": 4772 + }, + { + "epoch": 0.5034810126582279, + "grad_norm": 0.4867939352989197, + "learning_rate": 0.0015, + "loss": 2.1269, + "step": 4773 + }, + { + "epoch": 0.5035864978902953, + "grad_norm": 0.4146290719509125, + "learning_rate": 0.0015, + "loss": 2.0892, + "step": 4774 + }, + { + "epoch": 0.5036919831223629, + "grad_norm": 0.5489625930786133, + "learning_rate": 0.0015, + "loss": 2.1457, + "step": 4775 + }, + { + "epoch": 0.5037974683544304, + "grad_norm": 0.4553894102573395, + "learning_rate": 0.0015, + "loss": 2.1161, + "step": 4776 + }, + { + "epoch": 0.5039029535864978, + "grad_norm": 0.4428175091743469, + "learning_rate": 0.0015, + "loss": 2.0684, + "step": 4777 + }, + { + "epoch": 0.5040084388185654, + "grad_norm": 0.45797696709632874, + "learning_rate": 0.0015, + "loss": 2.1228, + "step": 4778 + }, + { + "epoch": 0.5041139240506329, + "grad_norm": 0.3935784101486206, + "learning_rate": 0.0015, + "loss": 2.094, + "step": 4779 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.4334127604961395, + "learning_rate": 0.0015, + "loss": 2.1058, + "step": 4780 + }, + { + "epoch": 0.5043248945147679, + "grad_norm": 0.4702568054199219, + "learning_rate": 0.0015, + "loss": 2.0942, + "step": 4781 + }, + { + "epoch": 0.5044303797468355, + "grad_norm": 0.47782015800476074, + "learning_rate": 0.0015, + "loss": 2.1355, + "step": 4782 + }, + { + "epoch": 0.5045358649789029, + "grad_norm": 0.5240076184272766, + "learning_rate": 0.0015, + "loss": 2.1206, + "step": 4783 + }, + { + "epoch": 0.5046413502109705, + "grad_norm": 0.4728156328201294, + "learning_rate": 0.0015, + "loss": 2.0846, + "step": 4784 + }, + { + "epoch": 0.504746835443038, + "grad_norm": 0.579682469367981, + "learning_rate": 0.0015, + "loss": 2.1056, + "step": 4785 + }, + { + "epoch": 0.5048523206751054, + "grad_norm": 0.3781294524669647, + "learning_rate": 0.0015, + "loss": 2.0894, + "step": 4786 + }, + { + "epoch": 0.504957805907173, + "grad_norm": 0.5493254661560059, + "learning_rate": 0.0015, + "loss": 2.1096, + "step": 4787 + }, + { + "epoch": 0.5050632911392405, + "grad_norm": 0.4073833227157593, + "learning_rate": 0.0015, + "loss": 2.089, + "step": 4788 + }, + { + "epoch": 0.505168776371308, + "grad_norm": 0.4643096923828125, + "learning_rate": 0.0015, + "loss": 2.0527, + "step": 4789 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.38135871291160583, + "learning_rate": 0.0015, + "loss": 2.1259, + "step": 4790 + }, + { + "epoch": 0.5053797468354431, + "grad_norm": 0.49053025245666504, + "learning_rate": 0.0015, + "loss": 2.14, + "step": 4791 + }, + { + "epoch": 0.5054852320675105, + "grad_norm": 0.380625456571579, + "learning_rate": 0.0015, + "loss": 2.0732, + "step": 4792 + }, + { + "epoch": 0.505590717299578, + "grad_norm": 0.5376477241516113, + "learning_rate": 0.0015, + "loss": 2.1311, + "step": 4793 + }, + { + "epoch": 0.5056962025316456, + "grad_norm": 0.44578829407691956, + "learning_rate": 0.0015, + "loss": 2.1746, + "step": 4794 + }, + { + "epoch": 0.505801687763713, + "grad_norm": 0.48295921087265015, + "learning_rate": 0.0015, + "loss": 2.1108, + "step": 4795 + }, + { + "epoch": 0.5059071729957806, + "grad_norm": 0.4691965878009796, + "learning_rate": 0.0015, + "loss": 2.0926, + "step": 4796 + }, + { + "epoch": 0.5060126582278481, + "grad_norm": 0.35916075110435486, + "learning_rate": 0.0015, + "loss": 2.0946, + "step": 4797 + }, + { + "epoch": 0.5061181434599156, + "grad_norm": 0.4516467750072479, + "learning_rate": 0.0015, + "loss": 2.0888, + "step": 4798 + }, + { + "epoch": 0.5062236286919831, + "grad_norm": 0.5511289834976196, + "learning_rate": 0.0015, + "loss": 2.0984, + "step": 4799 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.4124204218387604, + "learning_rate": 0.0015, + "loss": 2.0862, + "step": 4800 + }, + { + "epoch": 0.5064345991561181, + "grad_norm": 0.4854283928871155, + "learning_rate": 0.0015, + "loss": 2.0856, + "step": 4801 + }, + { + "epoch": 0.5065400843881857, + "grad_norm": 0.4682022035121918, + "learning_rate": 0.0015, + "loss": 2.1215, + "step": 4802 + }, + { + "epoch": 0.5066455696202532, + "grad_norm": 0.41010382771492004, + "learning_rate": 0.0015, + "loss": 2.1047, + "step": 4803 + }, + { + "epoch": 0.5067510548523206, + "grad_norm": 0.4094730019569397, + "learning_rate": 0.0015, + "loss": 2.0871, + "step": 4804 + }, + { + "epoch": 0.5068565400843882, + "grad_norm": 0.4230351448059082, + "learning_rate": 0.0015, + "loss": 2.1133, + "step": 4805 + }, + { + "epoch": 0.5069620253164557, + "grad_norm": 0.41492971777915955, + "learning_rate": 0.0015, + "loss": 2.1046, + "step": 4806 + }, + { + "epoch": 0.5070675105485232, + "grad_norm": 0.38033372163772583, + "learning_rate": 0.0015, + "loss": 2.1295, + "step": 4807 + }, + { + "epoch": 0.5071729957805907, + "grad_norm": 0.4382295608520508, + "learning_rate": 0.0015, + "loss": 2.115, + "step": 4808 + }, + { + "epoch": 0.5072784810126583, + "grad_norm": 0.39440760016441345, + "learning_rate": 0.0015, + "loss": 2.116, + "step": 4809 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.397226482629776, + "learning_rate": 0.0015, + "loss": 2.0951, + "step": 4810 + }, + { + "epoch": 0.5074894514767933, + "grad_norm": 0.37146487832069397, + "learning_rate": 0.0015, + "loss": 2.0961, + "step": 4811 + }, + { + "epoch": 0.5075949367088608, + "grad_norm": 0.40587663650512695, + "learning_rate": 0.0015, + "loss": 2.0931, + "step": 4812 + }, + { + "epoch": 0.5077004219409282, + "grad_norm": 0.3605974614620209, + "learning_rate": 0.0015, + "loss": 2.1463, + "step": 4813 + }, + { + "epoch": 0.5078059071729958, + "grad_norm": 0.38365933299064636, + "learning_rate": 0.0015, + "loss": 2.1352, + "step": 4814 + }, + { + "epoch": 0.5079113924050633, + "grad_norm": 0.38185739517211914, + "learning_rate": 0.0015, + "loss": 2.1197, + "step": 4815 + }, + { + "epoch": 0.5080168776371308, + "grad_norm": 0.4008060097694397, + "learning_rate": 0.0015, + "loss": 2.0857, + "step": 4816 + }, + { + "epoch": 0.5081223628691983, + "grad_norm": 0.4218250811100006, + "learning_rate": 0.0015, + "loss": 2.1024, + "step": 4817 + }, + { + "epoch": 0.5082278481012659, + "grad_norm": 0.39476722478866577, + "learning_rate": 0.0015, + "loss": 2.1433, + "step": 4818 + }, + { + "epoch": 0.5083333333333333, + "grad_norm": 0.36046281456947327, + "learning_rate": 0.0015, + "loss": 2.1265, + "step": 4819 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.3979828357696533, + "learning_rate": 0.0015, + "loss": 2.0974, + "step": 4820 + }, + { + "epoch": 0.5085443037974684, + "grad_norm": 0.36804965138435364, + "learning_rate": 0.0015, + "loss": 2.1288, + "step": 4821 + }, + { + "epoch": 0.5086497890295358, + "grad_norm": 0.4003746211528778, + "learning_rate": 0.0015, + "loss": 2.0816, + "step": 4822 + }, + { + "epoch": 0.5087552742616034, + "grad_norm": 0.3696097433567047, + "learning_rate": 0.0015, + "loss": 2.1153, + "step": 4823 + }, + { + "epoch": 0.5088607594936709, + "grad_norm": 0.3812231719493866, + "learning_rate": 0.0015, + "loss": 2.1201, + "step": 4824 + }, + { + "epoch": 0.5089662447257384, + "grad_norm": 0.38278841972351074, + "learning_rate": 0.0015, + "loss": 2.0899, + "step": 4825 + }, + { + "epoch": 0.5090717299578059, + "grad_norm": 0.40828344225883484, + "learning_rate": 0.0015, + "loss": 2.0678, + "step": 4826 + }, + { + "epoch": 0.5091772151898735, + "grad_norm": 0.39474207162857056, + "learning_rate": 0.0015, + "loss": 2.0854, + "step": 4827 + }, + { + "epoch": 0.5092827004219409, + "grad_norm": 0.4017040431499481, + "learning_rate": 0.0015, + "loss": 2.105, + "step": 4828 + }, + { + "epoch": 0.5093881856540085, + "grad_norm": 0.38097473978996277, + "learning_rate": 0.0015, + "loss": 2.0898, + "step": 4829 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.3523244261741638, + "learning_rate": 0.0015, + "loss": 2.0945, + "step": 4830 + }, + { + "epoch": 0.5095991561181434, + "grad_norm": 0.4102436602115631, + "learning_rate": 0.0015, + "loss": 2.114, + "step": 4831 + }, + { + "epoch": 0.509704641350211, + "grad_norm": 0.35818415880203247, + "learning_rate": 0.0015, + "loss": 2.1082, + "step": 4832 + }, + { + "epoch": 0.5098101265822785, + "grad_norm": 0.38440656661987305, + "learning_rate": 0.0015, + "loss": 2.0615, + "step": 4833 + }, + { + "epoch": 0.509915611814346, + "grad_norm": 0.45474475622177124, + "learning_rate": 0.0015, + "loss": 2.1018, + "step": 4834 + }, + { + "epoch": 0.5100210970464135, + "grad_norm": 0.41214945912361145, + "learning_rate": 0.0015, + "loss": 2.0873, + "step": 4835 + }, + { + "epoch": 0.5101265822784811, + "grad_norm": 0.4181593060493469, + "learning_rate": 0.0015, + "loss": 2.0694, + "step": 4836 + }, + { + "epoch": 0.5102320675105485, + "grad_norm": 0.4711325466632843, + "learning_rate": 0.0015, + "loss": 2.0676, + "step": 4837 + }, + { + "epoch": 0.510337552742616, + "grad_norm": 0.45928049087524414, + "learning_rate": 0.0015, + "loss": 2.1162, + "step": 4838 + }, + { + "epoch": 0.5104430379746835, + "grad_norm": 0.3828759789466858, + "learning_rate": 0.0015, + "loss": 2.0991, + "step": 4839 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.5646106600761414, + "learning_rate": 0.0015, + "loss": 2.1292, + "step": 4840 + }, + { + "epoch": 0.5106540084388186, + "grad_norm": 0.48211225867271423, + "learning_rate": 0.0015, + "loss": 2.1072, + "step": 4841 + }, + { + "epoch": 0.510759493670886, + "grad_norm": 0.3986974358558655, + "learning_rate": 0.0015, + "loss": 2.1138, + "step": 4842 + }, + { + "epoch": 0.5108649789029536, + "grad_norm": 0.4330477714538574, + "learning_rate": 0.0015, + "loss": 2.1156, + "step": 4843 + }, + { + "epoch": 0.5109704641350211, + "grad_norm": 0.5307260751724243, + "learning_rate": 0.0015, + "loss": 2.1095, + "step": 4844 + }, + { + "epoch": 0.5110759493670886, + "grad_norm": 0.4130432903766632, + "learning_rate": 0.0015, + "loss": 2.1125, + "step": 4845 + }, + { + "epoch": 0.5111814345991561, + "grad_norm": 0.3996891975402832, + "learning_rate": 0.0015, + "loss": 2.1176, + "step": 4846 + }, + { + "epoch": 0.5112869198312237, + "grad_norm": 0.41428399085998535, + "learning_rate": 0.0015, + "loss": 2.094, + "step": 4847 + }, + { + "epoch": 0.5113924050632911, + "grad_norm": 0.3948892056941986, + "learning_rate": 0.0015, + "loss": 2.1288, + "step": 4848 + }, + { + "epoch": 0.5114978902953586, + "grad_norm": 0.40019354224205017, + "learning_rate": 0.0015, + "loss": 2.0997, + "step": 4849 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.41669607162475586, + "learning_rate": 0.0015, + "loss": 2.1183, + "step": 4850 + }, + { + "epoch": 0.5117088607594936, + "grad_norm": 0.44210442900657654, + "learning_rate": 0.0015, + "loss": 2.1234, + "step": 4851 + }, + { + "epoch": 0.5118143459915612, + "grad_norm": 0.4331668019294739, + "learning_rate": 0.0015, + "loss": 2.1156, + "step": 4852 + }, + { + "epoch": 0.5119198312236287, + "grad_norm": 0.38395386934280396, + "learning_rate": 0.0015, + "loss": 2.0629, + "step": 4853 + }, + { + "epoch": 0.5120253164556962, + "grad_norm": 0.502906322479248, + "learning_rate": 0.0015, + "loss": 2.1298, + "step": 4854 + }, + { + "epoch": 0.5121308016877637, + "grad_norm": 0.44717705249786377, + "learning_rate": 0.0015, + "loss": 2.0996, + "step": 4855 + }, + { + "epoch": 0.5122362869198313, + "grad_norm": 0.5078164339065552, + "learning_rate": 0.0015, + "loss": 2.0762, + "step": 4856 + }, + { + "epoch": 0.5123417721518987, + "grad_norm": 0.4771537184715271, + "learning_rate": 0.0015, + "loss": 2.093, + "step": 4857 + }, + { + "epoch": 0.5124472573839662, + "grad_norm": 0.5211459994316101, + "learning_rate": 0.0015, + "loss": 2.0839, + "step": 4858 + }, + { + "epoch": 0.5125527426160338, + "grad_norm": 0.47580280900001526, + "learning_rate": 0.0015, + "loss": 2.058, + "step": 4859 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.40446436405181885, + "learning_rate": 0.0015, + "loss": 2.1023, + "step": 4860 + }, + { + "epoch": 0.5127637130801688, + "grad_norm": 0.5625984072685242, + "learning_rate": 0.0015, + "loss": 2.101, + "step": 4861 + }, + { + "epoch": 0.5128691983122363, + "grad_norm": 0.43740561604499817, + "learning_rate": 0.0015, + "loss": 2.077, + "step": 4862 + }, + { + "epoch": 0.5129746835443038, + "grad_norm": 0.4265163540840149, + "learning_rate": 0.0015, + "loss": 2.0974, + "step": 4863 + }, + { + "epoch": 0.5130801687763713, + "grad_norm": 0.42696434259414673, + "learning_rate": 0.0015, + "loss": 2.1159, + "step": 4864 + }, + { + "epoch": 0.5131856540084389, + "grad_norm": 0.4199480712413788, + "learning_rate": 0.0015, + "loss": 2.0718, + "step": 4865 + }, + { + "epoch": 0.5132911392405063, + "grad_norm": 0.47570401430130005, + "learning_rate": 0.0015, + "loss": 2.0703, + "step": 4866 + }, + { + "epoch": 0.5133966244725738, + "grad_norm": 0.3404565155506134, + "learning_rate": 0.0015, + "loss": 2.0702, + "step": 4867 + }, + { + "epoch": 0.5135021097046414, + "grad_norm": 0.3899995684623718, + "learning_rate": 0.0015, + "loss": 2.0864, + "step": 4868 + }, + { + "epoch": 0.5136075949367088, + "grad_norm": 0.36822497844696045, + "learning_rate": 0.0015, + "loss": 2.0758, + "step": 4869 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.3895780146121979, + "learning_rate": 0.0015, + "loss": 2.0876, + "step": 4870 + }, + { + "epoch": 0.5138185654008439, + "grad_norm": 0.40487539768218994, + "learning_rate": 0.0015, + "loss": 2.1015, + "step": 4871 + }, + { + "epoch": 0.5139240506329114, + "grad_norm": 0.36285004019737244, + "learning_rate": 0.0015, + "loss": 2.0797, + "step": 4872 + }, + { + "epoch": 0.5140295358649789, + "grad_norm": 0.4284467399120331, + "learning_rate": 0.0015, + "loss": 2.1416, + "step": 4873 + }, + { + "epoch": 0.5141350210970465, + "grad_norm": 0.4263356328010559, + "learning_rate": 0.0015, + "loss": 2.1129, + "step": 4874 + }, + { + "epoch": 0.5142405063291139, + "grad_norm": 0.35996654629707336, + "learning_rate": 0.0015, + "loss": 2.062, + "step": 4875 + }, + { + "epoch": 0.5143459915611814, + "grad_norm": 0.49340954422950745, + "learning_rate": 0.0015, + "loss": 2.1216, + "step": 4876 + }, + { + "epoch": 0.514451476793249, + "grad_norm": 0.36090087890625, + "learning_rate": 0.0015, + "loss": 2.0912, + "step": 4877 + }, + { + "epoch": 0.5145569620253164, + "grad_norm": 0.4596453607082367, + "learning_rate": 0.0015, + "loss": 2.1271, + "step": 4878 + }, + { + "epoch": 0.514662447257384, + "grad_norm": 0.4310050308704376, + "learning_rate": 0.0015, + "loss": 2.1288, + "step": 4879 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.43968668580055237, + "learning_rate": 0.0015, + "loss": 2.069, + "step": 4880 + }, + { + "epoch": 0.514873417721519, + "grad_norm": 0.49320167303085327, + "learning_rate": 0.0015, + "loss": 2.1081, + "step": 4881 + }, + { + "epoch": 0.5149789029535865, + "grad_norm": 0.38081127405166626, + "learning_rate": 0.0015, + "loss": 2.1056, + "step": 4882 + }, + { + "epoch": 0.515084388185654, + "grad_norm": 0.4832468330860138, + "learning_rate": 0.0015, + "loss": 2.113, + "step": 4883 + }, + { + "epoch": 0.5151898734177215, + "grad_norm": 0.3925957679748535, + "learning_rate": 0.0015, + "loss": 2.0984, + "step": 4884 + }, + { + "epoch": 0.515295358649789, + "grad_norm": 0.47304075956344604, + "learning_rate": 0.0015, + "loss": 2.0477, + "step": 4885 + }, + { + "epoch": 0.5154008438818566, + "grad_norm": 0.5317825078964233, + "learning_rate": 0.0015, + "loss": 2.1106, + "step": 4886 + }, + { + "epoch": 0.515506329113924, + "grad_norm": 0.4393816292285919, + "learning_rate": 0.0015, + "loss": 2.0659, + "step": 4887 + }, + { + "epoch": 0.5156118143459916, + "grad_norm": 0.4326223134994507, + "learning_rate": 0.0015, + "loss": 2.1387, + "step": 4888 + }, + { + "epoch": 0.5157172995780591, + "grad_norm": 0.4062981903553009, + "learning_rate": 0.0015, + "loss": 2.0807, + "step": 4889 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.4125778079032898, + "learning_rate": 0.0015, + "loss": 2.0796, + "step": 4890 + }, + { + "epoch": 0.5159282700421941, + "grad_norm": 0.44272929430007935, + "learning_rate": 0.0015, + "loss": 2.0687, + "step": 4891 + }, + { + "epoch": 0.5160337552742617, + "grad_norm": 0.4491164982318878, + "learning_rate": 0.0015, + "loss": 2.1191, + "step": 4892 + }, + { + "epoch": 0.5161392405063291, + "grad_norm": 0.3818008005619049, + "learning_rate": 0.0015, + "loss": 2.1121, + "step": 4893 + }, + { + "epoch": 0.5162447257383966, + "grad_norm": 0.38002514839172363, + "learning_rate": 0.0015, + "loss": 2.1136, + "step": 4894 + }, + { + "epoch": 0.5163502109704642, + "grad_norm": 0.3874662220478058, + "learning_rate": 0.0015, + "loss": 2.0602, + "step": 4895 + }, + { + "epoch": 0.5164556962025316, + "grad_norm": 0.38681864738464355, + "learning_rate": 0.0015, + "loss": 2.1225, + "step": 4896 + }, + { + "epoch": 0.5165611814345992, + "grad_norm": 0.40136632323265076, + "learning_rate": 0.0015, + "loss": 2.0963, + "step": 4897 + }, + { + "epoch": 0.5166666666666667, + "grad_norm": 0.4293530285358429, + "learning_rate": 0.0015, + "loss": 2.1343, + "step": 4898 + }, + { + "epoch": 0.5167721518987342, + "grad_norm": 0.4060026705265045, + "learning_rate": 0.0015, + "loss": 2.0808, + "step": 4899 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.38310620188713074, + "learning_rate": 0.0015, + "loss": 2.1082, + "step": 4900 + }, + { + "epoch": 0.5169831223628693, + "grad_norm": 0.5624974966049194, + "learning_rate": 0.0015, + "loss": 2.1183, + "step": 4901 + }, + { + "epoch": 0.5170886075949367, + "grad_norm": 0.398515909910202, + "learning_rate": 0.0015, + "loss": 2.0732, + "step": 4902 + }, + { + "epoch": 0.5171940928270042, + "grad_norm": 0.4688109755516052, + "learning_rate": 0.0015, + "loss": 2.0902, + "step": 4903 + }, + { + "epoch": 0.5172995780590718, + "grad_norm": 0.3888874053955078, + "learning_rate": 0.0015, + "loss": 2.1109, + "step": 4904 + }, + { + "epoch": 0.5174050632911392, + "grad_norm": 0.4174090027809143, + "learning_rate": 0.0015, + "loss": 2.1177, + "step": 4905 + }, + { + "epoch": 0.5175105485232068, + "grad_norm": 0.4362810254096985, + "learning_rate": 0.0015, + "loss": 2.0799, + "step": 4906 + }, + { + "epoch": 0.5176160337552742, + "grad_norm": 0.4490891695022583, + "learning_rate": 0.0015, + "loss": 2.0653, + "step": 4907 + }, + { + "epoch": 0.5177215189873418, + "grad_norm": 0.38070616126060486, + "learning_rate": 0.0015, + "loss": 2.1218, + "step": 4908 + }, + { + "epoch": 0.5178270042194093, + "grad_norm": 0.456001341342926, + "learning_rate": 0.0015, + "loss": 2.0867, + "step": 4909 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.44573184847831726, + "learning_rate": 0.0015, + "loss": 2.1323, + "step": 4910 + }, + { + "epoch": 0.5180379746835443, + "grad_norm": 0.3721276521682739, + "learning_rate": 0.0015, + "loss": 2.1011, + "step": 4911 + }, + { + "epoch": 0.5181434599156118, + "grad_norm": 0.44168558716773987, + "learning_rate": 0.0015, + "loss": 2.0773, + "step": 4912 + }, + { + "epoch": 0.5182489451476793, + "grad_norm": 0.4256632924079895, + "learning_rate": 0.0015, + "loss": 2.0769, + "step": 4913 + }, + { + "epoch": 0.5183544303797468, + "grad_norm": 0.3887871503829956, + "learning_rate": 0.0015, + "loss": 2.0616, + "step": 4914 + }, + { + "epoch": 0.5184599156118144, + "grad_norm": 0.4918808937072754, + "learning_rate": 0.0015, + "loss": 2.0997, + "step": 4915 + }, + { + "epoch": 0.5185654008438818, + "grad_norm": 0.39549198746681213, + "learning_rate": 0.0015, + "loss": 2.1026, + "step": 4916 + }, + { + "epoch": 0.5186708860759494, + "grad_norm": 0.47487008571624756, + "learning_rate": 0.0015, + "loss": 2.1035, + "step": 4917 + }, + { + "epoch": 0.5187763713080169, + "grad_norm": 0.48352837562561035, + "learning_rate": 0.0015, + "loss": 2.0762, + "step": 4918 + }, + { + "epoch": 0.5188818565400843, + "grad_norm": 0.49861088395118713, + "learning_rate": 0.0015, + "loss": 2.0942, + "step": 4919 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.4281407594680786, + "learning_rate": 0.0015, + "loss": 2.1176, + "step": 4920 + }, + { + "epoch": 0.5190928270042194, + "grad_norm": 0.485360324382782, + "learning_rate": 0.0015, + "loss": 2.1184, + "step": 4921 + }, + { + "epoch": 0.5191983122362869, + "grad_norm": 0.430396169424057, + "learning_rate": 0.0015, + "loss": 2.1423, + "step": 4922 + }, + { + "epoch": 0.5193037974683544, + "grad_norm": 0.39980560541152954, + "learning_rate": 0.0015, + "loss": 2.0942, + "step": 4923 + }, + { + "epoch": 0.519409282700422, + "grad_norm": 0.4188731014728546, + "learning_rate": 0.0015, + "loss": 2.0591, + "step": 4924 + }, + { + "epoch": 0.5195147679324894, + "grad_norm": 0.36922821402549744, + "learning_rate": 0.0015, + "loss": 2.0886, + "step": 4925 + }, + { + "epoch": 0.519620253164557, + "grad_norm": 0.4626424014568329, + "learning_rate": 0.0015, + "loss": 2.1198, + "step": 4926 + }, + { + "epoch": 0.5197257383966245, + "grad_norm": 0.3673723340034485, + "learning_rate": 0.0015, + "loss": 2.0638, + "step": 4927 + }, + { + "epoch": 0.5198312236286919, + "grad_norm": 0.4568602442741394, + "learning_rate": 0.0015, + "loss": 2.0972, + "step": 4928 + }, + { + "epoch": 0.5199367088607595, + "grad_norm": 0.4002932906150818, + "learning_rate": 0.0015, + "loss": 2.1035, + "step": 4929 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.40836066007614136, + "learning_rate": 0.0015, + "loss": 2.096, + "step": 4930 + }, + { + "epoch": 0.5201476793248945, + "grad_norm": 0.4515625536441803, + "learning_rate": 0.0015, + "loss": 2.0848, + "step": 4931 + }, + { + "epoch": 0.520253164556962, + "grad_norm": 0.3502800464630127, + "learning_rate": 0.0015, + "loss": 2.1017, + "step": 4932 + }, + { + "epoch": 0.5203586497890296, + "grad_norm": 0.5028491616249084, + "learning_rate": 0.0015, + "loss": 2.1085, + "step": 4933 + }, + { + "epoch": 0.520464135021097, + "grad_norm": 0.521587073802948, + "learning_rate": 0.0015, + "loss": 2.0985, + "step": 4934 + }, + { + "epoch": 0.5205696202531646, + "grad_norm": 0.45293277502059937, + "learning_rate": 0.0015, + "loss": 2.0416, + "step": 4935 + }, + { + "epoch": 0.5206751054852321, + "grad_norm": 0.4305177330970764, + "learning_rate": 0.0015, + "loss": 2.1063, + "step": 4936 + }, + { + "epoch": 0.5207805907172995, + "grad_norm": 0.39992016553878784, + "learning_rate": 0.0015, + "loss": 2.0683, + "step": 4937 + }, + { + "epoch": 0.5208860759493671, + "grad_norm": 0.38112613558769226, + "learning_rate": 0.0015, + "loss": 2.0953, + "step": 4938 + }, + { + "epoch": 0.5209915611814346, + "grad_norm": 0.4043692648410797, + "learning_rate": 0.0015, + "loss": 2.1165, + "step": 4939 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.40614354610443115, + "learning_rate": 0.0015, + "loss": 2.0838, + "step": 4940 + }, + { + "epoch": 0.5212025316455696, + "grad_norm": 0.3374157249927521, + "learning_rate": 0.0015, + "loss": 2.1219, + "step": 4941 + }, + { + "epoch": 0.5213080168776372, + "grad_norm": 0.39858266711235046, + "learning_rate": 0.0015, + "loss": 2.1029, + "step": 4942 + }, + { + "epoch": 0.5214135021097046, + "grad_norm": 0.4249323606491089, + "learning_rate": 0.0015, + "loss": 2.1014, + "step": 4943 + }, + { + "epoch": 0.5215189873417722, + "grad_norm": 0.4277331829071045, + "learning_rate": 0.0015, + "loss": 2.0986, + "step": 4944 + }, + { + "epoch": 0.5216244725738397, + "grad_norm": 0.3876212537288666, + "learning_rate": 0.0015, + "loss": 2.0985, + "step": 4945 + }, + { + "epoch": 0.5217299578059071, + "grad_norm": 0.38793298602104187, + "learning_rate": 0.0015, + "loss": 2.0689, + "step": 4946 + }, + { + "epoch": 0.5218354430379747, + "grad_norm": 0.3734416663646698, + "learning_rate": 0.0015, + "loss": 2.1092, + "step": 4947 + }, + { + "epoch": 0.5219409282700422, + "grad_norm": 0.37270084023475647, + "learning_rate": 0.0015, + "loss": 2.0909, + "step": 4948 + }, + { + "epoch": 0.5220464135021097, + "grad_norm": 0.44632580876350403, + "learning_rate": 0.0015, + "loss": 2.0933, + "step": 4949 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.4448528587818146, + "learning_rate": 0.0015, + "loss": 2.1116, + "step": 4950 + }, + { + "epoch": 0.5222573839662448, + "grad_norm": 0.39171430468559265, + "learning_rate": 0.0015, + "loss": 2.1033, + "step": 4951 + }, + { + "epoch": 0.5223628691983122, + "grad_norm": 0.42637890577316284, + "learning_rate": 0.0015, + "loss": 2.0833, + "step": 4952 + }, + { + "epoch": 0.5224683544303798, + "grad_norm": 0.346876859664917, + "learning_rate": 0.0015, + "loss": 2.047, + "step": 4953 + }, + { + "epoch": 0.5225738396624473, + "grad_norm": 0.37462544441223145, + "learning_rate": 0.0015, + "loss": 2.094, + "step": 4954 + }, + { + "epoch": 0.5226793248945147, + "grad_norm": 0.3776819109916687, + "learning_rate": 0.0015, + "loss": 2.0808, + "step": 4955 + }, + { + "epoch": 0.5227848101265823, + "grad_norm": 0.36002618074417114, + "learning_rate": 0.0015, + "loss": 2.1018, + "step": 4956 + }, + { + "epoch": 0.5228902953586498, + "grad_norm": 0.41827428340911865, + "learning_rate": 0.0015, + "loss": 2.1067, + "step": 4957 + }, + { + "epoch": 0.5229957805907173, + "grad_norm": 0.33838629722595215, + "learning_rate": 0.0015, + "loss": 2.0813, + "step": 4958 + }, + { + "epoch": 0.5231012658227848, + "grad_norm": 0.40573838353157043, + "learning_rate": 0.0015, + "loss": 2.0477, + "step": 4959 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.39303871989250183, + "learning_rate": 0.0015, + "loss": 2.0849, + "step": 4960 + }, + { + "epoch": 0.5233122362869198, + "grad_norm": 0.36242246627807617, + "learning_rate": 0.0015, + "loss": 2.0877, + "step": 4961 + }, + { + "epoch": 0.5234177215189874, + "grad_norm": 0.40373972058296204, + "learning_rate": 0.0015, + "loss": 2.0997, + "step": 4962 + }, + { + "epoch": 0.5235232067510549, + "grad_norm": 0.4450739622116089, + "learning_rate": 0.0015, + "loss": 2.0999, + "step": 4963 + }, + { + "epoch": 0.5236286919831223, + "grad_norm": 0.4059443175792694, + "learning_rate": 0.0015, + "loss": 2.1072, + "step": 4964 + }, + { + "epoch": 0.5237341772151899, + "grad_norm": 0.33791476488113403, + "learning_rate": 0.0015, + "loss": 2.1637, + "step": 4965 + }, + { + "epoch": 0.5238396624472574, + "grad_norm": 0.4334389269351959, + "learning_rate": 0.0015, + "loss": 2.0898, + "step": 4966 + }, + { + "epoch": 0.5239451476793249, + "grad_norm": 0.4680154323577881, + "learning_rate": 0.0015, + "loss": 2.0822, + "step": 4967 + }, + { + "epoch": 0.5240506329113924, + "grad_norm": 0.3938485085964203, + "learning_rate": 0.0015, + "loss": 2.0344, + "step": 4968 + }, + { + "epoch": 0.52415611814346, + "grad_norm": 0.3719005584716797, + "learning_rate": 0.0015, + "loss": 2.0748, + "step": 4969 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.4333193004131317, + "learning_rate": 0.0015, + "loss": 2.1309, + "step": 4970 + }, + { + "epoch": 0.524367088607595, + "grad_norm": 0.3975122272968292, + "learning_rate": 0.0015, + "loss": 2.1446, + "step": 4971 + }, + { + "epoch": 0.5244725738396624, + "grad_norm": 0.3215698301792145, + "learning_rate": 0.0015, + "loss": 2.0317, + "step": 4972 + }, + { + "epoch": 0.5245780590717299, + "grad_norm": 0.41148436069488525, + "learning_rate": 0.0015, + "loss": 2.0755, + "step": 4973 + }, + { + "epoch": 0.5246835443037975, + "grad_norm": 0.4008338451385498, + "learning_rate": 0.0015, + "loss": 2.0757, + "step": 4974 + }, + { + "epoch": 0.5247890295358649, + "grad_norm": 0.3864641487598419, + "learning_rate": 0.0015, + "loss": 2.1152, + "step": 4975 + }, + { + "epoch": 0.5248945147679325, + "grad_norm": 0.4118689000606537, + "learning_rate": 0.0015, + "loss": 2.0833, + "step": 4976 + }, + { + "epoch": 0.525, + "grad_norm": 0.40383389592170715, + "learning_rate": 0.0015, + "loss": 2.1041, + "step": 4977 + }, + { + "epoch": 0.5251054852320675, + "grad_norm": 0.34992918372154236, + "learning_rate": 0.0015, + "loss": 2.0935, + "step": 4978 + }, + { + "epoch": 0.525210970464135, + "grad_norm": 0.3571225702762604, + "learning_rate": 0.0015, + "loss": 2.0657, + "step": 4979 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.3659745752811432, + "learning_rate": 0.0015, + "loss": 2.0493, + "step": 4980 + }, + { + "epoch": 0.52542194092827, + "grad_norm": 0.34581178426742554, + "learning_rate": 0.0015, + "loss": 2.0984, + "step": 4981 + }, + { + "epoch": 0.5255274261603375, + "grad_norm": 0.41777628660202026, + "learning_rate": 0.0015, + "loss": 2.0787, + "step": 4982 + }, + { + "epoch": 0.5256329113924051, + "grad_norm": 0.39917898178100586, + "learning_rate": 0.0015, + "loss": 2.0678, + "step": 4983 + }, + { + "epoch": 0.5257383966244725, + "grad_norm": 0.3658931851387024, + "learning_rate": 0.0015, + "loss": 2.0811, + "step": 4984 + }, + { + "epoch": 0.5258438818565401, + "grad_norm": 0.3714138865470886, + "learning_rate": 0.0015, + "loss": 2.0838, + "step": 4985 + }, + { + "epoch": 0.5259493670886076, + "grad_norm": 0.3980266749858856, + "learning_rate": 0.0015, + "loss": 2.0656, + "step": 4986 + }, + { + "epoch": 0.5260548523206751, + "grad_norm": 0.4134921431541443, + "learning_rate": 0.0015, + "loss": 2.0445, + "step": 4987 + }, + { + "epoch": 0.5261603375527426, + "grad_norm": 0.3771642744541168, + "learning_rate": 0.0015, + "loss": 2.093, + "step": 4988 + }, + { + "epoch": 0.5262658227848102, + "grad_norm": 0.48399588465690613, + "learning_rate": 0.0015, + "loss": 2.1215, + "step": 4989 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.4107123911380768, + "learning_rate": 0.0015, + "loss": 2.0905, + "step": 4990 + }, + { + "epoch": 0.5264767932489451, + "grad_norm": 0.4375760853290558, + "learning_rate": 0.0015, + "loss": 2.1118, + "step": 4991 + }, + { + "epoch": 0.5265822784810127, + "grad_norm": 0.362479031085968, + "learning_rate": 0.0015, + "loss": 2.0759, + "step": 4992 + }, + { + "epoch": 0.5266877637130801, + "grad_norm": 0.43497851490974426, + "learning_rate": 0.0015, + "loss": 2.0897, + "step": 4993 + }, + { + "epoch": 0.5267932489451477, + "grad_norm": 0.4233604967594147, + "learning_rate": 0.0015, + "loss": 2.1222, + "step": 4994 + }, + { + "epoch": 0.5268987341772152, + "grad_norm": 0.38639965653419495, + "learning_rate": 0.0015, + "loss": 2.1106, + "step": 4995 + }, + { + "epoch": 0.5270042194092827, + "grad_norm": 0.4023979604244232, + "learning_rate": 0.0015, + "loss": 2.1058, + "step": 4996 + }, + { + "epoch": 0.5271097046413502, + "grad_norm": 0.42065826058387756, + "learning_rate": 0.0015, + "loss": 2.0777, + "step": 4997 + }, + { + "epoch": 0.5272151898734178, + "grad_norm": 0.40315306186676025, + "learning_rate": 0.0015, + "loss": 2.0997, + "step": 4998 + }, + { + "epoch": 0.5273206751054852, + "grad_norm": 0.40293407440185547, + "learning_rate": 0.0015, + "loss": 2.1024, + "step": 4999 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.3636614978313446, + "learning_rate": 0.0015, + "loss": 2.0626, + "step": 5000 + }, + { + "epoch": 0.5275316455696203, + "grad_norm": 0.3712257146835327, + "learning_rate": 0.0015, + "loss": 2.1252, + "step": 5001 + }, + { + "epoch": 0.5276371308016877, + "grad_norm": 0.37443453073501587, + "learning_rate": 0.0015, + "loss": 2.1208, + "step": 5002 + }, + { + "epoch": 0.5277426160337553, + "grad_norm": 0.36476656794548035, + "learning_rate": 0.0015, + "loss": 2.0707, + "step": 5003 + }, + { + "epoch": 0.5278481012658228, + "grad_norm": 0.4024512469768524, + "learning_rate": 0.0015, + "loss": 2.0661, + "step": 5004 + }, + { + "epoch": 0.5279535864978903, + "grad_norm": 0.36505141854286194, + "learning_rate": 0.0015, + "loss": 2.0581, + "step": 5005 + }, + { + "epoch": 0.5280590717299578, + "grad_norm": 0.4182271361351013, + "learning_rate": 0.0015, + "loss": 2.0428, + "step": 5006 + }, + { + "epoch": 0.5281645569620254, + "grad_norm": 0.38016659021377563, + "learning_rate": 0.0015, + "loss": 2.0784, + "step": 5007 + }, + { + "epoch": 0.5282700421940928, + "grad_norm": 0.40903085470199585, + "learning_rate": 0.0015, + "loss": 2.1074, + "step": 5008 + }, + { + "epoch": 0.5283755274261603, + "grad_norm": 0.4831714630126953, + "learning_rate": 0.0015, + "loss": 2.074, + "step": 5009 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.46944841742515564, + "learning_rate": 0.0015, + "loss": 2.0968, + "step": 5010 + }, + { + "epoch": 0.5285864978902953, + "grad_norm": 0.3653191328048706, + "learning_rate": 0.0015, + "loss": 2.0627, + "step": 5011 + }, + { + "epoch": 0.5286919831223629, + "grad_norm": 0.4128586947917938, + "learning_rate": 0.0015, + "loss": 2.0975, + "step": 5012 + }, + { + "epoch": 0.5287974683544304, + "grad_norm": 0.33169057965278625, + "learning_rate": 0.0015, + "loss": 2.0837, + "step": 5013 + }, + { + "epoch": 0.5289029535864979, + "grad_norm": 0.39099571108818054, + "learning_rate": 0.0015, + "loss": 2.0649, + "step": 5014 + }, + { + "epoch": 0.5290084388185654, + "grad_norm": 0.342160165309906, + "learning_rate": 0.0015, + "loss": 2.0334, + "step": 5015 + }, + { + "epoch": 0.529113924050633, + "grad_norm": 0.3437144160270691, + "learning_rate": 0.0015, + "loss": 2.0957, + "step": 5016 + }, + { + "epoch": 0.5292194092827004, + "grad_norm": 0.382292240858078, + "learning_rate": 0.0015, + "loss": 2.108, + "step": 5017 + }, + { + "epoch": 0.5293248945147679, + "grad_norm": 0.3539496958255768, + "learning_rate": 0.0015, + "loss": 2.0656, + "step": 5018 + }, + { + "epoch": 0.5294303797468355, + "grad_norm": 0.3591260015964508, + "learning_rate": 0.0015, + "loss": 2.0737, + "step": 5019 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.38616710901260376, + "learning_rate": 0.0015, + "loss": 2.1059, + "step": 5020 + }, + { + "epoch": 0.5296413502109705, + "grad_norm": 0.37400415539741516, + "learning_rate": 0.0015, + "loss": 2.0704, + "step": 5021 + }, + { + "epoch": 0.529746835443038, + "grad_norm": 0.4768916070461273, + "learning_rate": 0.0015, + "loss": 2.0651, + "step": 5022 + }, + { + "epoch": 0.5298523206751055, + "grad_norm": 0.4248548746109009, + "learning_rate": 0.0015, + "loss": 2.0698, + "step": 5023 + }, + { + "epoch": 0.529957805907173, + "grad_norm": 0.4016299545764923, + "learning_rate": 0.0015, + "loss": 2.0619, + "step": 5024 + }, + { + "epoch": 0.5300632911392406, + "grad_norm": 0.42835310101509094, + "learning_rate": 0.0015, + "loss": 2.0498, + "step": 5025 + }, + { + "epoch": 0.530168776371308, + "grad_norm": 0.6222458481788635, + "learning_rate": 0.0015, + "loss": 2.0833, + "step": 5026 + }, + { + "epoch": 0.5302742616033755, + "grad_norm": 0.47393620014190674, + "learning_rate": 0.0015, + "loss": 2.104, + "step": 5027 + }, + { + "epoch": 0.5303797468354431, + "grad_norm": 0.38993293046951294, + "learning_rate": 0.0015, + "loss": 2.0614, + "step": 5028 + }, + { + "epoch": 0.5304852320675105, + "grad_norm": 0.49576085805892944, + "learning_rate": 0.0015, + "loss": 2.069, + "step": 5029 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.44600993394851685, + "learning_rate": 0.0015, + "loss": 2.0453, + "step": 5030 + }, + { + "epoch": 0.5306962025316456, + "grad_norm": 0.44353917241096497, + "learning_rate": 0.0015, + "loss": 2.0823, + "step": 5031 + }, + { + "epoch": 0.5308016877637131, + "grad_norm": 0.365750253200531, + "learning_rate": 0.0015, + "loss": 2.0956, + "step": 5032 + }, + { + "epoch": 0.5309071729957806, + "grad_norm": 0.47149452567100525, + "learning_rate": 0.0015, + "loss": 2.0954, + "step": 5033 + }, + { + "epoch": 0.5310126582278482, + "grad_norm": 0.34499457478523254, + "learning_rate": 0.0015, + "loss": 2.0771, + "step": 5034 + }, + { + "epoch": 0.5311181434599156, + "grad_norm": 0.4670141339302063, + "learning_rate": 0.0015, + "loss": 2.1007, + "step": 5035 + }, + { + "epoch": 0.5312236286919831, + "grad_norm": 0.4206826090812683, + "learning_rate": 0.0015, + "loss": 2.0773, + "step": 5036 + }, + { + "epoch": 0.5313291139240506, + "grad_norm": 0.41883206367492676, + "learning_rate": 0.0015, + "loss": 2.0731, + "step": 5037 + }, + { + "epoch": 0.5314345991561181, + "grad_norm": 0.4471744894981384, + "learning_rate": 0.0015, + "loss": 2.1265, + "step": 5038 + }, + { + "epoch": 0.5315400843881857, + "grad_norm": 0.41390836238861084, + "learning_rate": 0.0015, + "loss": 2.0872, + "step": 5039 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.3858494162559509, + "learning_rate": 0.0015, + "loss": 2.0535, + "step": 5040 + }, + { + "epoch": 0.5317510548523207, + "grad_norm": 0.4197840988636017, + "learning_rate": 0.0015, + "loss": 2.0909, + "step": 5041 + }, + { + "epoch": 0.5318565400843882, + "grad_norm": 0.44953083992004395, + "learning_rate": 0.0015, + "loss": 2.0896, + "step": 5042 + }, + { + "epoch": 0.5319620253164556, + "grad_norm": 0.42545416951179504, + "learning_rate": 0.0015, + "loss": 2.0605, + "step": 5043 + }, + { + "epoch": 0.5320675105485232, + "grad_norm": 0.47806045413017273, + "learning_rate": 0.0015, + "loss": 2.0905, + "step": 5044 + }, + { + "epoch": 0.5321729957805907, + "grad_norm": 0.3779779374599457, + "learning_rate": 0.0015, + "loss": 2.0551, + "step": 5045 + }, + { + "epoch": 0.5322784810126582, + "grad_norm": 0.45260873436927795, + "learning_rate": 0.0015, + "loss": 2.122, + "step": 5046 + }, + { + "epoch": 0.5323839662447257, + "grad_norm": 0.40472540259361267, + "learning_rate": 0.0015, + "loss": 2.083, + "step": 5047 + }, + { + "epoch": 0.5324894514767933, + "grad_norm": 0.3857751190662384, + "learning_rate": 0.0015, + "loss": 2.0479, + "step": 5048 + }, + { + "epoch": 0.5325949367088607, + "grad_norm": 0.40075427293777466, + "learning_rate": 0.0015, + "loss": 2.0784, + "step": 5049 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.4033453166484833, + "learning_rate": 0.0015, + "loss": 2.0632, + "step": 5050 + }, + { + "epoch": 0.5328059071729958, + "grad_norm": 0.39700552821159363, + "learning_rate": 0.0015, + "loss": 2.1122, + "step": 5051 + }, + { + "epoch": 0.5329113924050632, + "grad_norm": 0.38965949416160583, + "learning_rate": 0.0015, + "loss": 2.0835, + "step": 5052 + }, + { + "epoch": 0.5330168776371308, + "grad_norm": 0.3924352526664734, + "learning_rate": 0.0015, + "loss": 2.1017, + "step": 5053 + }, + { + "epoch": 0.5331223628691983, + "grad_norm": 0.36168110370635986, + "learning_rate": 0.0015, + "loss": 2.095, + "step": 5054 + }, + { + "epoch": 0.5332278481012658, + "grad_norm": 0.389156699180603, + "learning_rate": 0.0015, + "loss": 2.1034, + "step": 5055 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.3682924509048462, + "learning_rate": 0.0015, + "loss": 2.0843, + "step": 5056 + }, + { + "epoch": 0.5334388185654009, + "grad_norm": 0.38966888189315796, + "learning_rate": 0.0015, + "loss": 2.1127, + "step": 5057 + }, + { + "epoch": 0.5335443037974683, + "grad_norm": 0.4079562723636627, + "learning_rate": 0.0015, + "loss": 2.0983, + "step": 5058 + }, + { + "epoch": 0.5336497890295359, + "grad_norm": 0.4310813248157501, + "learning_rate": 0.0015, + "loss": 2.0809, + "step": 5059 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.5130252242088318, + "learning_rate": 0.0015, + "loss": 2.0756, + "step": 5060 + }, + { + "epoch": 0.5338607594936708, + "grad_norm": 0.4514263868331909, + "learning_rate": 0.0015, + "loss": 2.1177, + "step": 5061 + }, + { + "epoch": 0.5339662447257384, + "grad_norm": 0.4267367124557495, + "learning_rate": 0.0015, + "loss": 2.0581, + "step": 5062 + }, + { + "epoch": 0.5340717299578059, + "grad_norm": 0.4090350866317749, + "learning_rate": 0.0015, + "loss": 2.0757, + "step": 5063 + }, + { + "epoch": 0.5341772151898734, + "grad_norm": 0.5755271315574646, + "learning_rate": 0.0015, + "loss": 2.1163, + "step": 5064 + }, + { + "epoch": 0.5342827004219409, + "grad_norm": 0.5400944948196411, + "learning_rate": 0.0015, + "loss": 2.0708, + "step": 5065 + }, + { + "epoch": 0.5343881856540085, + "grad_norm": 0.45348528027534485, + "learning_rate": 0.0015, + "loss": 2.0886, + "step": 5066 + }, + { + "epoch": 0.5344936708860759, + "grad_norm": 0.42287084460258484, + "learning_rate": 0.0015, + "loss": 2.0761, + "step": 5067 + }, + { + "epoch": 0.5345991561181435, + "grad_norm": 0.44966021180152893, + "learning_rate": 0.0015, + "loss": 2.1144, + "step": 5068 + }, + { + "epoch": 0.534704641350211, + "grad_norm": 0.37180885672569275, + "learning_rate": 0.0015, + "loss": 2.0712, + "step": 5069 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.4641990065574646, + "learning_rate": 0.0015, + "loss": 2.0372, + "step": 5070 + }, + { + "epoch": 0.534915611814346, + "grad_norm": 0.5013905763626099, + "learning_rate": 0.0015, + "loss": 2.0852, + "step": 5071 + }, + { + "epoch": 0.5350210970464135, + "grad_norm": 0.3887631595134735, + "learning_rate": 0.0015, + "loss": 2.0875, + "step": 5072 + }, + { + "epoch": 0.535126582278481, + "grad_norm": 0.3857819139957428, + "learning_rate": 0.0015, + "loss": 2.0594, + "step": 5073 + }, + { + "epoch": 0.5352320675105485, + "grad_norm": 0.400033563375473, + "learning_rate": 0.0015, + "loss": 2.133, + "step": 5074 + }, + { + "epoch": 0.5353375527426161, + "grad_norm": 0.34476661682128906, + "learning_rate": 0.0015, + "loss": 2.1204, + "step": 5075 + }, + { + "epoch": 0.5354430379746835, + "grad_norm": 0.368056058883667, + "learning_rate": 0.0015, + "loss": 2.0888, + "step": 5076 + }, + { + "epoch": 0.5355485232067511, + "grad_norm": 0.3581677973270416, + "learning_rate": 0.0015, + "loss": 2.0957, + "step": 5077 + }, + { + "epoch": 0.5356540084388186, + "grad_norm": 0.3680741488933563, + "learning_rate": 0.0015, + "loss": 2.1043, + "step": 5078 + }, + { + "epoch": 0.535759493670886, + "grad_norm": 0.357268750667572, + "learning_rate": 0.0015, + "loss": 2.0836, + "step": 5079 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.33007457852363586, + "learning_rate": 0.0015, + "loss": 2.0795, + "step": 5080 + }, + { + "epoch": 0.5359704641350211, + "grad_norm": 0.4155196249485016, + "learning_rate": 0.0015, + "loss": 2.0764, + "step": 5081 + }, + { + "epoch": 0.5360759493670886, + "grad_norm": 0.4158727526664734, + "learning_rate": 0.0015, + "loss": 2.0945, + "step": 5082 + }, + { + "epoch": 0.5361814345991561, + "grad_norm": 0.3682062029838562, + "learning_rate": 0.0015, + "loss": 2.0745, + "step": 5083 + }, + { + "epoch": 0.5362869198312237, + "grad_norm": 0.4095255434513092, + "learning_rate": 0.0015, + "loss": 2.1069, + "step": 5084 + }, + { + "epoch": 0.5363924050632911, + "grad_norm": 0.464287132024765, + "learning_rate": 0.0015, + "loss": 2.0822, + "step": 5085 + }, + { + "epoch": 0.5364978902953587, + "grad_norm": 0.44075775146484375, + "learning_rate": 0.0015, + "loss": 2.0609, + "step": 5086 + }, + { + "epoch": 0.5366033755274262, + "grad_norm": 0.37155240774154663, + "learning_rate": 0.0015, + "loss": 2.0303, + "step": 5087 + }, + { + "epoch": 0.5367088607594936, + "grad_norm": 0.3717164993286133, + "learning_rate": 0.0015, + "loss": 2.1348, + "step": 5088 + }, + { + "epoch": 0.5368143459915612, + "grad_norm": 0.36541539430618286, + "learning_rate": 0.0015, + "loss": 2.0692, + "step": 5089 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.38192033767700195, + "learning_rate": 0.0015, + "loss": 2.1196, + "step": 5090 + }, + { + "epoch": 0.5370253164556962, + "grad_norm": 0.3672389090061188, + "learning_rate": 0.0015, + "loss": 2.1165, + "step": 5091 + }, + { + "epoch": 0.5371308016877637, + "grad_norm": 0.384398490190506, + "learning_rate": 0.0015, + "loss": 2.0065, + "step": 5092 + }, + { + "epoch": 0.5372362869198313, + "grad_norm": 0.42110633850097656, + "learning_rate": 0.0015, + "loss": 2.0683, + "step": 5093 + }, + { + "epoch": 0.5373417721518987, + "grad_norm": 0.5054833292961121, + "learning_rate": 0.0015, + "loss": 2.0698, + "step": 5094 + }, + { + "epoch": 0.5374472573839663, + "grad_norm": 0.4341154396533966, + "learning_rate": 0.0015, + "loss": 2.0804, + "step": 5095 + }, + { + "epoch": 0.5375527426160338, + "grad_norm": 0.3684111535549164, + "learning_rate": 0.0015, + "loss": 2.072, + "step": 5096 + }, + { + "epoch": 0.5376582278481012, + "grad_norm": 0.4337363541126251, + "learning_rate": 0.0015, + "loss": 2.0784, + "step": 5097 + }, + { + "epoch": 0.5377637130801688, + "grad_norm": 0.3944597840309143, + "learning_rate": 0.0015, + "loss": 2.0741, + "step": 5098 + }, + { + "epoch": 0.5378691983122363, + "grad_norm": 0.4456782639026642, + "learning_rate": 0.0015, + "loss": 2.0885, + "step": 5099 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.3884119391441345, + "learning_rate": 0.0015, + "loss": 2.0598, + "step": 5100 + }, + { + "epoch": 0.5380801687763713, + "grad_norm": 0.43456876277923584, + "learning_rate": 0.0015, + "loss": 2.1036, + "step": 5101 + }, + { + "epoch": 0.5381856540084389, + "grad_norm": 0.4083278477191925, + "learning_rate": 0.0015, + "loss": 2.0937, + "step": 5102 + }, + { + "epoch": 0.5382911392405063, + "grad_norm": 0.42757338285446167, + "learning_rate": 0.0015, + "loss": 2.0732, + "step": 5103 + }, + { + "epoch": 0.5383966244725739, + "grad_norm": 0.3992786407470703, + "learning_rate": 0.0015, + "loss": 2.0736, + "step": 5104 + }, + { + "epoch": 0.5385021097046413, + "grad_norm": 0.38133442401885986, + "learning_rate": 0.0015, + "loss": 2.0296, + "step": 5105 + }, + { + "epoch": 0.5386075949367088, + "grad_norm": 0.3716793656349182, + "learning_rate": 0.0015, + "loss": 2.0947, + "step": 5106 + }, + { + "epoch": 0.5387130801687764, + "grad_norm": 0.3793415129184723, + "learning_rate": 0.0015, + "loss": 2.1111, + "step": 5107 + }, + { + "epoch": 0.5388185654008438, + "grad_norm": 0.38801249861717224, + "learning_rate": 0.0015, + "loss": 2.0864, + "step": 5108 + }, + { + "epoch": 0.5389240506329114, + "grad_norm": 0.4420320689678192, + "learning_rate": 0.0015, + "loss": 2.0876, + "step": 5109 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.40212035179138184, + "learning_rate": 0.0015, + "loss": 2.0466, + "step": 5110 + }, + { + "epoch": 0.5391350210970464, + "grad_norm": 0.43986475467681885, + "learning_rate": 0.0015, + "loss": 2.0773, + "step": 5111 + }, + { + "epoch": 0.5392405063291139, + "grad_norm": 0.4097394347190857, + "learning_rate": 0.0015, + "loss": 2.0746, + "step": 5112 + }, + { + "epoch": 0.5393459915611815, + "grad_norm": 0.3981195092201233, + "learning_rate": 0.0015, + "loss": 2.1104, + "step": 5113 + }, + { + "epoch": 0.5394514767932489, + "grad_norm": 0.411871075630188, + "learning_rate": 0.0015, + "loss": 2.0636, + "step": 5114 + }, + { + "epoch": 0.5395569620253164, + "grad_norm": 0.47694364190101624, + "learning_rate": 0.0015, + "loss": 2.0505, + "step": 5115 + }, + { + "epoch": 0.539662447257384, + "grad_norm": 0.5024381875991821, + "learning_rate": 0.0015, + "loss": 2.0997, + "step": 5116 + }, + { + "epoch": 0.5397679324894514, + "grad_norm": 0.3986127972602844, + "learning_rate": 0.0015, + "loss": 2.0804, + "step": 5117 + }, + { + "epoch": 0.539873417721519, + "grad_norm": 0.40092194080352783, + "learning_rate": 0.0015, + "loss": 2.0827, + "step": 5118 + }, + { + "epoch": 0.5399789029535865, + "grad_norm": 0.4228558838367462, + "learning_rate": 0.0015, + "loss": 2.0526, + "step": 5119 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.5246742367744446, + "learning_rate": 0.0015, + "loss": 2.0667, + "step": 5120 + }, + { + "epoch": 0.5401898734177215, + "grad_norm": 0.4049888253211975, + "learning_rate": 0.0015, + "loss": 2.0608, + "step": 5121 + }, + { + "epoch": 0.5402953586497891, + "grad_norm": 0.4357462227344513, + "learning_rate": 0.0015, + "loss": 2.0646, + "step": 5122 + }, + { + "epoch": 0.5404008438818565, + "grad_norm": 0.43963074684143066, + "learning_rate": 0.0015, + "loss": 2.0697, + "step": 5123 + }, + { + "epoch": 0.540506329113924, + "grad_norm": 0.4454375207424164, + "learning_rate": 0.0015, + "loss": 2.0702, + "step": 5124 + }, + { + "epoch": 0.5406118143459916, + "grad_norm": 0.3821699321269989, + "learning_rate": 0.0015, + "loss": 2.0873, + "step": 5125 + }, + { + "epoch": 0.540717299578059, + "grad_norm": 0.4702279567718506, + "learning_rate": 0.0015, + "loss": 2.0597, + "step": 5126 + }, + { + "epoch": 0.5408227848101266, + "grad_norm": 0.38880470395088196, + "learning_rate": 0.0015, + "loss": 2.0547, + "step": 5127 + }, + { + "epoch": 0.5409282700421941, + "grad_norm": 0.422706663608551, + "learning_rate": 0.0015, + "loss": 2.0621, + "step": 5128 + }, + { + "epoch": 0.5410337552742616, + "grad_norm": 0.5003085136413574, + "learning_rate": 0.0015, + "loss": 2.1061, + "step": 5129 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.46580615639686584, + "learning_rate": 0.0015, + "loss": 2.0687, + "step": 5130 + }, + { + "epoch": 0.5412447257383967, + "grad_norm": 0.38916346430778503, + "learning_rate": 0.0015, + "loss": 2.1196, + "step": 5131 + }, + { + "epoch": 0.5413502109704641, + "grad_norm": 0.4491898715496063, + "learning_rate": 0.0015, + "loss": 2.0936, + "step": 5132 + }, + { + "epoch": 0.5414556962025316, + "grad_norm": 0.4771844148635864, + "learning_rate": 0.0015, + "loss": 2.0693, + "step": 5133 + }, + { + "epoch": 0.5415611814345992, + "grad_norm": 0.37933459877967834, + "learning_rate": 0.0015, + "loss": 2.0825, + "step": 5134 + }, + { + "epoch": 0.5416666666666666, + "grad_norm": 0.4312952756881714, + "learning_rate": 0.0015, + "loss": 2.0565, + "step": 5135 + }, + { + "epoch": 0.5417721518987342, + "grad_norm": 0.48716795444488525, + "learning_rate": 0.0015, + "loss": 2.0939, + "step": 5136 + }, + { + "epoch": 0.5418776371308017, + "grad_norm": 0.37644460797309875, + "learning_rate": 0.0015, + "loss": 2.0878, + "step": 5137 + }, + { + "epoch": 0.5419831223628692, + "grad_norm": 0.44075721502304077, + "learning_rate": 0.0015, + "loss": 2.0913, + "step": 5138 + }, + { + "epoch": 0.5420886075949367, + "grad_norm": 0.4306235909461975, + "learning_rate": 0.0015, + "loss": 2.0989, + "step": 5139 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.4094460904598236, + "learning_rate": 0.0015, + "loss": 2.0886, + "step": 5140 + }, + { + "epoch": 0.5422995780590717, + "grad_norm": 0.4964393377304077, + "learning_rate": 0.0015, + "loss": 2.0756, + "step": 5141 + }, + { + "epoch": 0.5424050632911392, + "grad_norm": 0.3804299533367157, + "learning_rate": 0.0015, + "loss": 2.0955, + "step": 5142 + }, + { + "epoch": 0.5425105485232068, + "grad_norm": 0.4761493504047394, + "learning_rate": 0.0015, + "loss": 2.0955, + "step": 5143 + }, + { + "epoch": 0.5426160337552742, + "grad_norm": 0.4900030195713043, + "learning_rate": 0.0015, + "loss": 2.0761, + "step": 5144 + }, + { + "epoch": 0.5427215189873418, + "grad_norm": 0.4615054726600647, + "learning_rate": 0.0015, + "loss": 2.0889, + "step": 5145 + }, + { + "epoch": 0.5428270042194093, + "grad_norm": 0.4259750247001648, + "learning_rate": 0.0015, + "loss": 2.0515, + "step": 5146 + }, + { + "epoch": 0.5429324894514768, + "grad_norm": 0.4633713960647583, + "learning_rate": 0.0015, + "loss": 2.1081, + "step": 5147 + }, + { + "epoch": 0.5430379746835443, + "grad_norm": 0.47892045974731445, + "learning_rate": 0.0015, + "loss": 2.0455, + "step": 5148 + }, + { + "epoch": 0.5431434599156119, + "grad_norm": 0.5195466876029968, + "learning_rate": 0.0015, + "loss": 2.0902, + "step": 5149 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.4265201985836029, + "learning_rate": 0.0015, + "loss": 2.099, + "step": 5150 + }, + { + "epoch": 0.5433544303797468, + "grad_norm": 0.4635160267353058, + "learning_rate": 0.0015, + "loss": 2.0594, + "step": 5151 + }, + { + "epoch": 0.5434599156118144, + "grad_norm": 0.5664486289024353, + "learning_rate": 0.0015, + "loss": 2.1069, + "step": 5152 + }, + { + "epoch": 0.5435654008438818, + "grad_norm": 0.49194449186325073, + "learning_rate": 0.0015, + "loss": 2.0746, + "step": 5153 + }, + { + "epoch": 0.5436708860759494, + "grad_norm": 0.40384548902511597, + "learning_rate": 0.0015, + "loss": 2.0621, + "step": 5154 + }, + { + "epoch": 0.5437763713080169, + "grad_norm": 0.5628482103347778, + "learning_rate": 0.0015, + "loss": 2.081, + "step": 5155 + }, + { + "epoch": 0.5438818565400844, + "grad_norm": 0.5904060006141663, + "learning_rate": 0.0015, + "loss": 2.06, + "step": 5156 + }, + { + "epoch": 0.5439873417721519, + "grad_norm": 0.43712249398231506, + "learning_rate": 0.0015, + "loss": 2.0801, + "step": 5157 + }, + { + "epoch": 0.5440928270042195, + "grad_norm": 0.6594637632369995, + "learning_rate": 0.0015, + "loss": 2.0595, + "step": 5158 + }, + { + "epoch": 0.5441983122362869, + "grad_norm": 0.5801767706871033, + "learning_rate": 0.0015, + "loss": 2.0898, + "step": 5159 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.48449453711509705, + "learning_rate": 0.0015, + "loss": 2.0686, + "step": 5160 + }, + { + "epoch": 0.544409282700422, + "grad_norm": 0.6259701251983643, + "learning_rate": 0.0015, + "loss": 2.117, + "step": 5161 + }, + { + "epoch": 0.5445147679324894, + "grad_norm": 0.4959178566932678, + "learning_rate": 0.0015, + "loss": 2.1148, + "step": 5162 + }, + { + "epoch": 0.544620253164557, + "grad_norm": 0.4372932016849518, + "learning_rate": 0.0015, + "loss": 2.0503, + "step": 5163 + }, + { + "epoch": 0.5447257383966245, + "grad_norm": 0.5475062131881714, + "learning_rate": 0.0015, + "loss": 2.0783, + "step": 5164 + }, + { + "epoch": 0.544831223628692, + "grad_norm": 0.5308472514152527, + "learning_rate": 0.0015, + "loss": 2.0957, + "step": 5165 + }, + { + "epoch": 0.5449367088607595, + "grad_norm": 0.4184712767601013, + "learning_rate": 0.0015, + "loss": 2.0381, + "step": 5166 + }, + { + "epoch": 0.5450421940928271, + "grad_norm": 0.5582336187362671, + "learning_rate": 0.0015, + "loss": 2.0747, + "step": 5167 + }, + { + "epoch": 0.5451476793248945, + "grad_norm": 0.4433176517486572, + "learning_rate": 0.0015, + "loss": 2.0772, + "step": 5168 + }, + { + "epoch": 0.545253164556962, + "grad_norm": 0.4344033896923065, + "learning_rate": 0.0015, + "loss": 2.0872, + "step": 5169 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.4791746735572815, + "learning_rate": 0.0015, + "loss": 2.0254, + "step": 5170 + }, + { + "epoch": 0.545464135021097, + "grad_norm": 0.34436845779418945, + "learning_rate": 0.0015, + "loss": 2.0565, + "step": 5171 + }, + { + "epoch": 0.5455696202531646, + "grad_norm": 0.451264888048172, + "learning_rate": 0.0015, + "loss": 2.0725, + "step": 5172 + }, + { + "epoch": 0.545675105485232, + "grad_norm": 0.351448118686676, + "learning_rate": 0.0015, + "loss": 2.0794, + "step": 5173 + }, + { + "epoch": 0.5457805907172996, + "grad_norm": 0.4693310260772705, + "learning_rate": 0.0015, + "loss": 2.1046, + "step": 5174 + }, + { + "epoch": 0.5458860759493671, + "grad_norm": 0.47555863857269287, + "learning_rate": 0.0015, + "loss": 2.0888, + "step": 5175 + }, + { + "epoch": 0.5459915611814345, + "grad_norm": 0.39987143874168396, + "learning_rate": 0.0015, + "loss": 2.0498, + "step": 5176 + }, + { + "epoch": 0.5460970464135021, + "grad_norm": 0.36017870903015137, + "learning_rate": 0.0015, + "loss": 2.0935, + "step": 5177 + }, + { + "epoch": 0.5462025316455696, + "grad_norm": 0.37940287590026855, + "learning_rate": 0.0015, + "loss": 2.0996, + "step": 5178 + }, + { + "epoch": 0.5463080168776371, + "grad_norm": 0.37218204140663147, + "learning_rate": 0.0015, + "loss": 2.0654, + "step": 5179 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.3986240327358246, + "learning_rate": 0.0015, + "loss": 2.0914, + "step": 5180 + }, + { + "epoch": 0.5465189873417722, + "grad_norm": 0.3876081705093384, + "learning_rate": 0.0015, + "loss": 2.0874, + "step": 5181 + }, + { + "epoch": 0.5466244725738396, + "grad_norm": 0.40417298674583435, + "learning_rate": 0.0015, + "loss": 2.0901, + "step": 5182 + }, + { + "epoch": 0.5467299578059072, + "grad_norm": 0.40287941694259644, + "learning_rate": 0.0015, + "loss": 2.0403, + "step": 5183 + }, + { + "epoch": 0.5468354430379747, + "grad_norm": 0.45426779985427856, + "learning_rate": 0.0015, + "loss": 2.0775, + "step": 5184 + }, + { + "epoch": 0.5469409282700421, + "grad_norm": 0.42458707094192505, + "learning_rate": 0.0015, + "loss": 2.061, + "step": 5185 + }, + { + "epoch": 0.5470464135021097, + "grad_norm": 0.5109040141105652, + "learning_rate": 0.0015, + "loss": 2.091, + "step": 5186 + }, + { + "epoch": 0.5471518987341772, + "grad_norm": 0.3558690547943115, + "learning_rate": 0.0015, + "loss": 2.0519, + "step": 5187 + }, + { + "epoch": 0.5472573839662447, + "grad_norm": 0.44371962547302246, + "learning_rate": 0.0015, + "loss": 2.0494, + "step": 5188 + }, + { + "epoch": 0.5473628691983122, + "grad_norm": 0.36271482706069946, + "learning_rate": 0.0015, + "loss": 2.0655, + "step": 5189 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.39818164706230164, + "learning_rate": 0.0015, + "loss": 2.062, + "step": 5190 + }, + { + "epoch": 0.5475738396624472, + "grad_norm": 0.40701112151145935, + "learning_rate": 0.0015, + "loss": 2.0638, + "step": 5191 + }, + { + "epoch": 0.5476793248945148, + "grad_norm": 0.3827551305294037, + "learning_rate": 0.0015, + "loss": 2.0802, + "step": 5192 + }, + { + "epoch": 0.5477848101265823, + "grad_norm": 0.35165467858314514, + "learning_rate": 0.0015, + "loss": 2.0721, + "step": 5193 + }, + { + "epoch": 0.5478902953586497, + "grad_norm": 0.4004387855529785, + "learning_rate": 0.0015, + "loss": 2.0871, + "step": 5194 + }, + { + "epoch": 0.5479957805907173, + "grad_norm": 0.3694967031478882, + "learning_rate": 0.0015, + "loss": 2.0707, + "step": 5195 + }, + { + "epoch": 0.5481012658227848, + "grad_norm": 0.40399953722953796, + "learning_rate": 0.0015, + "loss": 2.0843, + "step": 5196 + }, + { + "epoch": 0.5482067510548523, + "grad_norm": 0.36466264724731445, + "learning_rate": 0.0015, + "loss": 2.0837, + "step": 5197 + }, + { + "epoch": 0.5483122362869198, + "grad_norm": 0.4083206355571747, + "learning_rate": 0.0015, + "loss": 2.0646, + "step": 5198 + }, + { + "epoch": 0.5484177215189874, + "grad_norm": 0.3567611575126648, + "learning_rate": 0.0015, + "loss": 2.0953, + "step": 5199 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.4325101971626282, + "learning_rate": 0.0015, + "loss": 2.071, + "step": 5200 + }, + { + "epoch": 0.5486286919831224, + "grad_norm": 0.3671211302280426, + "learning_rate": 0.0015, + "loss": 2.0286, + "step": 5201 + }, + { + "epoch": 0.5487341772151899, + "grad_norm": 0.40272974967956543, + "learning_rate": 0.0015, + "loss": 2.0895, + "step": 5202 + }, + { + "epoch": 0.5488396624472573, + "grad_norm": 0.4118332862854004, + "learning_rate": 0.0015, + "loss": 2.0794, + "step": 5203 + }, + { + "epoch": 0.5489451476793249, + "grad_norm": 0.42445358633995056, + "learning_rate": 0.0015, + "loss": 2.0544, + "step": 5204 + }, + { + "epoch": 0.5490506329113924, + "grad_norm": 0.3625504970550537, + "learning_rate": 0.0015, + "loss": 2.0709, + "step": 5205 + }, + { + "epoch": 0.5491561181434599, + "grad_norm": 0.3882637321949005, + "learning_rate": 0.0015, + "loss": 2.0568, + "step": 5206 + }, + { + "epoch": 0.5492616033755274, + "grad_norm": 0.38766542077064514, + "learning_rate": 0.0015, + "loss": 2.0347, + "step": 5207 + }, + { + "epoch": 0.549367088607595, + "grad_norm": 0.3714372515678406, + "learning_rate": 0.0015, + "loss": 2.1037, + "step": 5208 + }, + { + "epoch": 0.5494725738396624, + "grad_norm": 0.34622395038604736, + "learning_rate": 0.0015, + "loss": 2.0704, + "step": 5209 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.41933518648147583, + "learning_rate": 0.0015, + "loss": 2.0523, + "step": 5210 + }, + { + "epoch": 0.5496835443037975, + "grad_norm": 0.34808725118637085, + "learning_rate": 0.0015, + "loss": 2.074, + "step": 5211 + }, + { + "epoch": 0.549789029535865, + "grad_norm": 0.36907774209976196, + "learning_rate": 0.0015, + "loss": 2.0313, + "step": 5212 + }, + { + "epoch": 0.5498945147679325, + "grad_norm": 0.438463032245636, + "learning_rate": 0.0015, + "loss": 2.1022, + "step": 5213 + }, + { + "epoch": 0.55, + "grad_norm": 0.38135987520217896, + "learning_rate": 0.0015, + "loss": 2.083, + "step": 5214 + }, + { + "epoch": 0.5501054852320675, + "grad_norm": 0.4124135971069336, + "learning_rate": 0.0015, + "loss": 2.0824, + "step": 5215 + }, + { + "epoch": 0.550210970464135, + "grad_norm": 0.38042253255844116, + "learning_rate": 0.0015, + "loss": 2.0718, + "step": 5216 + }, + { + "epoch": 0.5503164556962026, + "grad_norm": 0.4012112319469452, + "learning_rate": 0.0015, + "loss": 2.0622, + "step": 5217 + }, + { + "epoch": 0.55042194092827, + "grad_norm": 0.37617257237434387, + "learning_rate": 0.0015, + "loss": 2.0659, + "step": 5218 + }, + { + "epoch": 0.5505274261603376, + "grad_norm": 0.3589226007461548, + "learning_rate": 0.0015, + "loss": 2.0789, + "step": 5219 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.39242368936538696, + "learning_rate": 0.0015, + "loss": 2.0332, + "step": 5220 + }, + { + "epoch": 0.5507383966244725, + "grad_norm": 0.39510655403137207, + "learning_rate": 0.0015, + "loss": 2.0494, + "step": 5221 + }, + { + "epoch": 0.5508438818565401, + "grad_norm": 0.40191975235939026, + "learning_rate": 0.0015, + "loss": 2.0303, + "step": 5222 + }, + { + "epoch": 0.5509493670886076, + "grad_norm": 0.399222195148468, + "learning_rate": 0.0015, + "loss": 2.0528, + "step": 5223 + }, + { + "epoch": 0.5510548523206751, + "grad_norm": 0.4183252155780792, + "learning_rate": 0.0015, + "loss": 2.0788, + "step": 5224 + }, + { + "epoch": 0.5511603375527426, + "grad_norm": 0.47323161363601685, + "learning_rate": 0.0015, + "loss": 2.0741, + "step": 5225 + }, + { + "epoch": 0.5512658227848102, + "grad_norm": 0.41788119077682495, + "learning_rate": 0.0015, + "loss": 2.0715, + "step": 5226 + }, + { + "epoch": 0.5513713080168776, + "grad_norm": 0.4583255350589752, + "learning_rate": 0.0015, + "loss": 2.0484, + "step": 5227 + }, + { + "epoch": 0.5514767932489452, + "grad_norm": 0.43516817688941956, + "learning_rate": 0.0015, + "loss": 2.11, + "step": 5228 + }, + { + "epoch": 0.5515822784810127, + "grad_norm": 0.47897836565971375, + "learning_rate": 0.0015, + "loss": 2.0751, + "step": 5229 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.45981350541114807, + "learning_rate": 0.0015, + "loss": 2.0876, + "step": 5230 + }, + { + "epoch": 0.5517932489451477, + "grad_norm": 0.44203999638557434, + "learning_rate": 0.0015, + "loss": 2.0803, + "step": 5231 + }, + { + "epoch": 0.5518987341772152, + "grad_norm": 0.3978458344936371, + "learning_rate": 0.0015, + "loss": 2.0655, + "step": 5232 + }, + { + "epoch": 0.5520042194092827, + "grad_norm": 0.4299060106277466, + "learning_rate": 0.0015, + "loss": 2.0313, + "step": 5233 + }, + { + "epoch": 0.5521097046413502, + "grad_norm": 0.4793154299259186, + "learning_rate": 0.0015, + "loss": 2.0662, + "step": 5234 + }, + { + "epoch": 0.5522151898734177, + "grad_norm": 0.448578417301178, + "learning_rate": 0.0015, + "loss": 2.0733, + "step": 5235 + }, + { + "epoch": 0.5523206751054852, + "grad_norm": 0.47207698225975037, + "learning_rate": 0.0015, + "loss": 2.0831, + "step": 5236 + }, + { + "epoch": 0.5524261603375528, + "grad_norm": 0.4600767493247986, + "learning_rate": 0.0015, + "loss": 2.0781, + "step": 5237 + }, + { + "epoch": 0.5525316455696202, + "grad_norm": 0.49941521883010864, + "learning_rate": 0.0015, + "loss": 2.0589, + "step": 5238 + }, + { + "epoch": 0.5526371308016877, + "grad_norm": 0.4281676709651947, + "learning_rate": 0.0015, + "loss": 2.0749, + "step": 5239 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.4500032067298889, + "learning_rate": 0.0015, + "loss": 2.0922, + "step": 5240 + }, + { + "epoch": 0.5528481012658227, + "grad_norm": 0.4862096905708313, + "learning_rate": 0.0015, + "loss": 2.0758, + "step": 5241 + }, + { + "epoch": 0.5529535864978903, + "grad_norm": 0.388731986284256, + "learning_rate": 0.0015, + "loss": 2.0507, + "step": 5242 + }, + { + "epoch": 0.5530590717299578, + "grad_norm": 0.5148726105690002, + "learning_rate": 0.0015, + "loss": 2.0714, + "step": 5243 + }, + { + "epoch": 0.5531645569620253, + "grad_norm": 0.4203075170516968, + "learning_rate": 0.0015, + "loss": 2.0289, + "step": 5244 + }, + { + "epoch": 0.5532700421940928, + "grad_norm": 0.4435712993144989, + "learning_rate": 0.0015, + "loss": 2.0786, + "step": 5245 + }, + { + "epoch": 0.5533755274261604, + "grad_norm": 0.38415786623954773, + "learning_rate": 0.0015, + "loss": 2.0768, + "step": 5246 + }, + { + "epoch": 0.5534810126582278, + "grad_norm": 0.4030483663082123, + "learning_rate": 0.0015, + "loss": 2.0816, + "step": 5247 + }, + { + "epoch": 0.5535864978902953, + "grad_norm": 0.4537275731563568, + "learning_rate": 0.0015, + "loss": 2.0708, + "step": 5248 + }, + { + "epoch": 0.5536919831223629, + "grad_norm": 0.36114585399627686, + "learning_rate": 0.0015, + "loss": 2.0385, + "step": 5249 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.40549933910369873, + "learning_rate": 0.0015, + "loss": 2.0787, + "step": 5250 + }, + { + "epoch": 0.5539029535864979, + "grad_norm": 0.376192182302475, + "learning_rate": 0.0015, + "loss": 2.0767, + "step": 5251 + }, + { + "epoch": 0.5540084388185654, + "grad_norm": 0.515574038028717, + "learning_rate": 0.0015, + "loss": 2.0582, + "step": 5252 + }, + { + "epoch": 0.5541139240506329, + "grad_norm": 0.4138428568840027, + "learning_rate": 0.0015, + "loss": 2.0643, + "step": 5253 + }, + { + "epoch": 0.5542194092827004, + "grad_norm": 0.4135080873966217, + "learning_rate": 0.0015, + "loss": 2.0792, + "step": 5254 + }, + { + "epoch": 0.554324894514768, + "grad_norm": 0.4683692157268524, + "learning_rate": 0.0015, + "loss": 2.068, + "step": 5255 + }, + { + "epoch": 0.5544303797468354, + "grad_norm": 0.5090756416320801, + "learning_rate": 0.0015, + "loss": 2.0494, + "step": 5256 + }, + { + "epoch": 0.554535864978903, + "grad_norm": 0.43940040469169617, + "learning_rate": 0.0015, + "loss": 2.0797, + "step": 5257 + }, + { + "epoch": 0.5546413502109705, + "grad_norm": 0.4126850962638855, + "learning_rate": 0.0015, + "loss": 2.0818, + "step": 5258 + }, + { + "epoch": 0.5547468354430379, + "grad_norm": 0.3883168399333954, + "learning_rate": 0.0015, + "loss": 2.0904, + "step": 5259 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.39251986145973206, + "learning_rate": 0.0015, + "loss": 2.046, + "step": 5260 + }, + { + "epoch": 0.554957805907173, + "grad_norm": 0.34681808948516846, + "learning_rate": 0.0015, + "loss": 2.0669, + "step": 5261 + }, + { + "epoch": 0.5550632911392405, + "grad_norm": 0.468466579914093, + "learning_rate": 0.0015, + "loss": 2.0839, + "step": 5262 + }, + { + "epoch": 0.555168776371308, + "grad_norm": 0.4040527939796448, + "learning_rate": 0.0015, + "loss": 2.0713, + "step": 5263 + }, + { + "epoch": 0.5552742616033756, + "grad_norm": 0.37478771805763245, + "learning_rate": 0.0015, + "loss": 2.0751, + "step": 5264 + }, + { + "epoch": 0.555379746835443, + "grad_norm": 0.5053312182426453, + "learning_rate": 0.0015, + "loss": 2.0622, + "step": 5265 + }, + { + "epoch": 0.5554852320675105, + "grad_norm": 0.5198706984519958, + "learning_rate": 0.0015, + "loss": 2.0764, + "step": 5266 + }, + { + "epoch": 0.5555907172995781, + "grad_norm": 0.3805682063102722, + "learning_rate": 0.0015, + "loss": 2.093, + "step": 5267 + }, + { + "epoch": 0.5556962025316455, + "grad_norm": 0.5439099073410034, + "learning_rate": 0.0015, + "loss": 2.0933, + "step": 5268 + }, + { + "epoch": 0.5558016877637131, + "grad_norm": 0.4786386489868164, + "learning_rate": 0.0015, + "loss": 2.079, + "step": 5269 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.4507853388786316, + "learning_rate": 0.0015, + "loss": 2.0748, + "step": 5270 + }, + { + "epoch": 0.5560126582278481, + "grad_norm": 0.5688821077346802, + "learning_rate": 0.0015, + "loss": 2.077, + "step": 5271 + }, + { + "epoch": 0.5561181434599156, + "grad_norm": 0.5891649723052979, + "learning_rate": 0.0015, + "loss": 2.0874, + "step": 5272 + }, + { + "epoch": 0.5562236286919832, + "grad_norm": 0.4781874120235443, + "learning_rate": 0.0015, + "loss": 2.0453, + "step": 5273 + }, + { + "epoch": 0.5563291139240506, + "grad_norm": 0.39596015214920044, + "learning_rate": 0.0015, + "loss": 2.0552, + "step": 5274 + }, + { + "epoch": 0.5564345991561181, + "grad_norm": 0.48366579413414, + "learning_rate": 0.0015, + "loss": 2.0486, + "step": 5275 + }, + { + "epoch": 0.5565400843881857, + "grad_norm": 0.4473855197429657, + "learning_rate": 0.0015, + "loss": 2.0689, + "step": 5276 + }, + { + "epoch": 0.5566455696202531, + "grad_norm": 0.39536887407302856, + "learning_rate": 0.0015, + "loss": 2.0892, + "step": 5277 + }, + { + "epoch": 0.5567510548523207, + "grad_norm": 0.44684502482414246, + "learning_rate": 0.0015, + "loss": 2.0396, + "step": 5278 + }, + { + "epoch": 0.5568565400843882, + "grad_norm": 0.4118596315383911, + "learning_rate": 0.0015, + "loss": 2.085, + "step": 5279 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.3897136449813843, + "learning_rate": 0.0015, + "loss": 2.0711, + "step": 5280 + }, + { + "epoch": 0.5570675105485232, + "grad_norm": 0.3639134466648102, + "learning_rate": 0.0015, + "loss": 2.0846, + "step": 5281 + }, + { + "epoch": 0.5571729957805908, + "grad_norm": 0.38456061482429504, + "learning_rate": 0.0015, + "loss": 2.046, + "step": 5282 + }, + { + "epoch": 0.5572784810126582, + "grad_norm": 0.35337021946907043, + "learning_rate": 0.0015, + "loss": 2.0369, + "step": 5283 + }, + { + "epoch": 0.5573839662447257, + "grad_norm": 0.37722447514533997, + "learning_rate": 0.0015, + "loss": 2.0717, + "step": 5284 + }, + { + "epoch": 0.5574894514767933, + "grad_norm": 0.38811615109443665, + "learning_rate": 0.0015, + "loss": 2.0556, + "step": 5285 + }, + { + "epoch": 0.5575949367088607, + "grad_norm": 0.3653903603553772, + "learning_rate": 0.0015, + "loss": 2.0536, + "step": 5286 + }, + { + "epoch": 0.5577004219409283, + "grad_norm": 0.4289991557598114, + "learning_rate": 0.0015, + "loss": 2.0448, + "step": 5287 + }, + { + "epoch": 0.5578059071729958, + "grad_norm": 0.34725943207740784, + "learning_rate": 0.0015, + "loss": 2.0772, + "step": 5288 + }, + { + "epoch": 0.5579113924050633, + "grad_norm": 0.3838806748390198, + "learning_rate": 0.0015, + "loss": 2.0921, + "step": 5289 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.35693055391311646, + "learning_rate": 0.0015, + "loss": 2.0926, + "step": 5290 + }, + { + "epoch": 0.5581223628691984, + "grad_norm": 0.40740370750427246, + "learning_rate": 0.0015, + "loss": 2.0739, + "step": 5291 + }, + { + "epoch": 0.5582278481012658, + "grad_norm": 0.3709173798561096, + "learning_rate": 0.0015, + "loss": 2.0355, + "step": 5292 + }, + { + "epoch": 0.5583333333333333, + "grad_norm": 0.4041983187198639, + "learning_rate": 0.0015, + "loss": 2.0751, + "step": 5293 + }, + { + "epoch": 0.5584388185654009, + "grad_norm": 0.45887935161590576, + "learning_rate": 0.0015, + "loss": 2.1006, + "step": 5294 + }, + { + "epoch": 0.5585443037974683, + "grad_norm": 0.37539204955101013, + "learning_rate": 0.0015, + "loss": 2.0495, + "step": 5295 + }, + { + "epoch": 0.5586497890295359, + "grad_norm": 0.42784103751182556, + "learning_rate": 0.0015, + "loss": 2.0712, + "step": 5296 + }, + { + "epoch": 0.5587552742616034, + "grad_norm": 0.4131256639957428, + "learning_rate": 0.0015, + "loss": 2.0372, + "step": 5297 + }, + { + "epoch": 0.5588607594936709, + "grad_norm": 0.43555888533592224, + "learning_rate": 0.0015, + "loss": 2.045, + "step": 5298 + }, + { + "epoch": 0.5589662447257384, + "grad_norm": 0.4058712422847748, + "learning_rate": 0.0015, + "loss": 2.0954, + "step": 5299 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.43535658717155457, + "learning_rate": 0.0015, + "loss": 2.0861, + "step": 5300 + }, + { + "epoch": 0.5591772151898734, + "grad_norm": 0.5165896415710449, + "learning_rate": 0.0015, + "loss": 2.0551, + "step": 5301 + }, + { + "epoch": 0.559282700421941, + "grad_norm": 0.3936571180820465, + "learning_rate": 0.0015, + "loss": 2.0808, + "step": 5302 + }, + { + "epoch": 0.5593881856540084, + "grad_norm": 0.5037376880645752, + "learning_rate": 0.0015, + "loss": 2.0801, + "step": 5303 + }, + { + "epoch": 0.5594936708860759, + "grad_norm": 0.527123749256134, + "learning_rate": 0.0015, + "loss": 2.0358, + "step": 5304 + }, + { + "epoch": 0.5595991561181435, + "grad_norm": 0.3896724581718445, + "learning_rate": 0.0015, + "loss": 2.0754, + "step": 5305 + }, + { + "epoch": 0.5597046413502109, + "grad_norm": 0.425620436668396, + "learning_rate": 0.0015, + "loss": 2.0781, + "step": 5306 + }, + { + "epoch": 0.5598101265822785, + "grad_norm": 0.5206269025802612, + "learning_rate": 0.0015, + "loss": 2.0694, + "step": 5307 + }, + { + "epoch": 0.559915611814346, + "grad_norm": 0.39874032139778137, + "learning_rate": 0.0015, + "loss": 2.0269, + "step": 5308 + }, + { + "epoch": 0.5600210970464135, + "grad_norm": 0.4288325309753418, + "learning_rate": 0.0015, + "loss": 2.0496, + "step": 5309 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.45499375462532043, + "learning_rate": 0.0015, + "loss": 2.058, + "step": 5310 + }, + { + "epoch": 0.5602320675105485, + "grad_norm": 0.4145779609680176, + "learning_rate": 0.0015, + "loss": 2.057, + "step": 5311 + }, + { + "epoch": 0.560337552742616, + "grad_norm": 0.4528033435344696, + "learning_rate": 0.0015, + "loss": 2.0496, + "step": 5312 + }, + { + "epoch": 0.5604430379746835, + "grad_norm": 0.4080968201160431, + "learning_rate": 0.0015, + "loss": 2.0558, + "step": 5313 + }, + { + "epoch": 0.5605485232067511, + "grad_norm": 0.44135555624961853, + "learning_rate": 0.0015, + "loss": 2.0579, + "step": 5314 + }, + { + "epoch": 0.5606540084388185, + "grad_norm": 0.4038299322128296, + "learning_rate": 0.0015, + "loss": 2.0652, + "step": 5315 + }, + { + "epoch": 0.5607594936708861, + "grad_norm": 0.37837788462638855, + "learning_rate": 0.0015, + "loss": 2.0587, + "step": 5316 + }, + { + "epoch": 0.5608649789029536, + "grad_norm": 0.3697032630443573, + "learning_rate": 0.0015, + "loss": 2.0319, + "step": 5317 + }, + { + "epoch": 0.560970464135021, + "grad_norm": 0.39593198895454407, + "learning_rate": 0.0015, + "loss": 2.0804, + "step": 5318 + }, + { + "epoch": 0.5610759493670886, + "grad_norm": 0.407795786857605, + "learning_rate": 0.0015, + "loss": 2.0651, + "step": 5319 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.3572378158569336, + "learning_rate": 0.0015, + "loss": 2.0479, + "step": 5320 + }, + { + "epoch": 0.5612869198312236, + "grad_norm": 0.3678308427333832, + "learning_rate": 0.0015, + "loss": 2.1041, + "step": 5321 + }, + { + "epoch": 0.5613924050632911, + "grad_norm": 0.3662284016609192, + "learning_rate": 0.0015, + "loss": 2.0808, + "step": 5322 + }, + { + "epoch": 0.5614978902953587, + "grad_norm": 0.3414258062839508, + "learning_rate": 0.0015, + "loss": 2.0756, + "step": 5323 + }, + { + "epoch": 0.5616033755274261, + "grad_norm": 0.3708357512950897, + "learning_rate": 0.0015, + "loss": 2.0723, + "step": 5324 + }, + { + "epoch": 0.5617088607594937, + "grad_norm": 0.3758963346481323, + "learning_rate": 0.0015, + "loss": 2.0624, + "step": 5325 + }, + { + "epoch": 0.5618143459915612, + "grad_norm": 0.536601722240448, + "learning_rate": 0.0015, + "loss": 2.0574, + "step": 5326 + }, + { + "epoch": 0.5619198312236287, + "grad_norm": 0.5524950623512268, + "learning_rate": 0.0015, + "loss": 2.0497, + "step": 5327 + }, + { + "epoch": 0.5620253164556962, + "grad_norm": 0.3933788239955902, + "learning_rate": 0.0015, + "loss": 2.0286, + "step": 5328 + }, + { + "epoch": 0.5621308016877637, + "grad_norm": 0.4368268549442291, + "learning_rate": 0.0015, + "loss": 2.0529, + "step": 5329 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.48142677545547485, + "learning_rate": 0.0015, + "loss": 2.0656, + "step": 5330 + }, + { + "epoch": 0.5623417721518987, + "grad_norm": 0.4068501889705658, + "learning_rate": 0.0015, + "loss": 2.0491, + "step": 5331 + }, + { + "epoch": 0.5624472573839663, + "grad_norm": 0.39004355669021606, + "learning_rate": 0.0015, + "loss": 2.0673, + "step": 5332 + }, + { + "epoch": 0.5625527426160337, + "grad_norm": 0.42312464118003845, + "learning_rate": 0.0015, + "loss": 2.0332, + "step": 5333 + }, + { + "epoch": 0.5626582278481013, + "grad_norm": 0.3779086172580719, + "learning_rate": 0.0015, + "loss": 2.0542, + "step": 5334 + }, + { + "epoch": 0.5627637130801688, + "grad_norm": 0.3778851330280304, + "learning_rate": 0.0015, + "loss": 2.0686, + "step": 5335 + }, + { + "epoch": 0.5628691983122363, + "grad_norm": 0.4233105480670929, + "learning_rate": 0.0015, + "loss": 2.0685, + "step": 5336 + }, + { + "epoch": 0.5629746835443038, + "grad_norm": 0.38238391280174255, + "learning_rate": 0.0015, + "loss": 2.0379, + "step": 5337 + }, + { + "epoch": 0.5630801687763713, + "grad_norm": 0.34800782799720764, + "learning_rate": 0.0015, + "loss": 2.0438, + "step": 5338 + }, + { + "epoch": 0.5631856540084388, + "grad_norm": 0.40691545605659485, + "learning_rate": 0.0015, + "loss": 2.0692, + "step": 5339 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.46234267950057983, + "learning_rate": 0.0015, + "loss": 2.0687, + "step": 5340 + }, + { + "epoch": 0.5633966244725739, + "grad_norm": 0.38771510124206543, + "learning_rate": 0.0015, + "loss": 2.0894, + "step": 5341 + }, + { + "epoch": 0.5635021097046413, + "grad_norm": 0.48386654257774353, + "learning_rate": 0.0015, + "loss": 2.056, + "step": 5342 + }, + { + "epoch": 0.5636075949367089, + "grad_norm": 0.4163360893726349, + "learning_rate": 0.0015, + "loss": 2.046, + "step": 5343 + }, + { + "epoch": 0.5637130801687764, + "grad_norm": 0.3661503791809082, + "learning_rate": 0.0015, + "loss": 2.0175, + "step": 5344 + }, + { + "epoch": 0.5638185654008439, + "grad_norm": 0.4114222228527069, + "learning_rate": 0.0015, + "loss": 2.0612, + "step": 5345 + }, + { + "epoch": 0.5639240506329114, + "grad_norm": 0.4102807641029358, + "learning_rate": 0.0015, + "loss": 2.0518, + "step": 5346 + }, + { + "epoch": 0.564029535864979, + "grad_norm": 0.38806262612342834, + "learning_rate": 0.0015, + "loss": 2.0116, + "step": 5347 + }, + { + "epoch": 0.5641350210970464, + "grad_norm": 0.45722630620002747, + "learning_rate": 0.0015, + "loss": 2.0608, + "step": 5348 + }, + { + "epoch": 0.5642405063291139, + "grad_norm": 0.43557223677635193, + "learning_rate": 0.0015, + "loss": 2.0557, + "step": 5349 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.38934326171875, + "learning_rate": 0.0015, + "loss": 2.0301, + "step": 5350 + }, + { + "epoch": 0.5644514767932489, + "grad_norm": 0.3977692425251007, + "learning_rate": 0.0015, + "loss": 2.0377, + "step": 5351 + }, + { + "epoch": 0.5645569620253165, + "grad_norm": 0.417532742023468, + "learning_rate": 0.0015, + "loss": 2.0562, + "step": 5352 + }, + { + "epoch": 0.564662447257384, + "grad_norm": 0.5329822301864624, + "learning_rate": 0.0015, + "loss": 2.0646, + "step": 5353 + }, + { + "epoch": 0.5647679324894515, + "grad_norm": 0.4017999470233917, + "learning_rate": 0.0015, + "loss": 2.0498, + "step": 5354 + }, + { + "epoch": 0.564873417721519, + "grad_norm": 0.5488272905349731, + "learning_rate": 0.0015, + "loss": 2.0579, + "step": 5355 + }, + { + "epoch": 0.5649789029535865, + "grad_norm": 0.49898219108581543, + "learning_rate": 0.0015, + "loss": 2.0122, + "step": 5356 + }, + { + "epoch": 0.565084388185654, + "grad_norm": 0.4643761217594147, + "learning_rate": 0.0015, + "loss": 2.0536, + "step": 5357 + }, + { + "epoch": 0.5651898734177215, + "grad_norm": 0.4486091434955597, + "learning_rate": 0.0015, + "loss": 2.0444, + "step": 5358 + }, + { + "epoch": 0.5652953586497891, + "grad_norm": 0.4974011778831482, + "learning_rate": 0.0015, + "loss": 2.1001, + "step": 5359 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.4347749650478363, + "learning_rate": 0.0015, + "loss": 2.103, + "step": 5360 + }, + { + "epoch": 0.5655063291139241, + "grad_norm": 0.5491346716880798, + "learning_rate": 0.0015, + "loss": 2.0654, + "step": 5361 + }, + { + "epoch": 0.5656118143459916, + "grad_norm": 0.5211014747619629, + "learning_rate": 0.0015, + "loss": 2.0748, + "step": 5362 + }, + { + "epoch": 0.565717299578059, + "grad_norm": 0.46464046835899353, + "learning_rate": 0.0015, + "loss": 2.1078, + "step": 5363 + }, + { + "epoch": 0.5658227848101266, + "grad_norm": 0.41091370582580566, + "learning_rate": 0.0015, + "loss": 2.0517, + "step": 5364 + }, + { + "epoch": 0.5659282700421941, + "grad_norm": 0.42585229873657227, + "learning_rate": 0.0015, + "loss": 2.0513, + "step": 5365 + }, + { + "epoch": 0.5660337552742616, + "grad_norm": 0.4452191889286041, + "learning_rate": 0.0015, + "loss": 2.0802, + "step": 5366 + }, + { + "epoch": 0.5661392405063291, + "grad_norm": 0.3814232647418976, + "learning_rate": 0.0015, + "loss": 2.0503, + "step": 5367 + }, + { + "epoch": 0.5662447257383966, + "grad_norm": 0.3972460627555847, + "learning_rate": 0.0015, + "loss": 2.0516, + "step": 5368 + }, + { + "epoch": 0.5663502109704641, + "grad_norm": 0.37361857295036316, + "learning_rate": 0.0015, + "loss": 2.0191, + "step": 5369 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.40817761421203613, + "learning_rate": 0.0015, + "loss": 2.0735, + "step": 5370 + }, + { + "epoch": 0.5665611814345991, + "grad_norm": 0.3636001646518707, + "learning_rate": 0.0015, + "loss": 2.0639, + "step": 5371 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 0.4093266725540161, + "learning_rate": 0.0015, + "loss": 2.0555, + "step": 5372 + }, + { + "epoch": 0.5667721518987342, + "grad_norm": 0.4085620939731598, + "learning_rate": 0.0015, + "loss": 2.0377, + "step": 5373 + }, + { + "epoch": 0.5668776371308016, + "grad_norm": 0.3743188679218292, + "learning_rate": 0.0015, + "loss": 2.0789, + "step": 5374 + }, + { + "epoch": 0.5669831223628692, + "grad_norm": 0.39882856607437134, + "learning_rate": 0.0015, + "loss": 2.0496, + "step": 5375 + }, + { + "epoch": 0.5670886075949367, + "grad_norm": 0.4126757085323334, + "learning_rate": 0.0015, + "loss": 2.0363, + "step": 5376 + }, + { + "epoch": 0.5671940928270042, + "grad_norm": 0.3722619414329529, + "learning_rate": 0.0015, + "loss": 2.091, + "step": 5377 + }, + { + "epoch": 0.5672995780590717, + "grad_norm": 0.4633464217185974, + "learning_rate": 0.0015, + "loss": 2.0239, + "step": 5378 + }, + { + "epoch": 0.5674050632911393, + "grad_norm": 0.49151894450187683, + "learning_rate": 0.0015, + "loss": 2.0549, + "step": 5379 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.49586379528045654, + "learning_rate": 0.0015, + "loss": 2.0531, + "step": 5380 + }, + { + "epoch": 0.5676160337552743, + "grad_norm": 0.3721591532230377, + "learning_rate": 0.0015, + "loss": 2.0119, + "step": 5381 + }, + { + "epoch": 0.5677215189873418, + "grad_norm": 0.5680374503135681, + "learning_rate": 0.0015, + "loss": 2.013, + "step": 5382 + }, + { + "epoch": 0.5678270042194092, + "grad_norm": 0.44761767983436584, + "learning_rate": 0.0015, + "loss": 2.0564, + "step": 5383 + }, + { + "epoch": 0.5679324894514768, + "grad_norm": 0.507785975933075, + "learning_rate": 0.0015, + "loss": 2.0624, + "step": 5384 + }, + { + "epoch": 0.5680379746835443, + "grad_norm": 0.3976094424724579, + "learning_rate": 0.0015, + "loss": 2.0562, + "step": 5385 + }, + { + "epoch": 0.5681434599156118, + "grad_norm": 0.5804311037063599, + "learning_rate": 0.0015, + "loss": 2.0715, + "step": 5386 + }, + { + "epoch": 0.5682489451476793, + "grad_norm": 0.4269832968711853, + "learning_rate": 0.0015, + "loss": 2.0708, + "step": 5387 + }, + { + "epoch": 0.5683544303797469, + "grad_norm": 0.5104051828384399, + "learning_rate": 0.0015, + "loss": 2.0422, + "step": 5388 + }, + { + "epoch": 0.5684599156118143, + "grad_norm": 0.42477476596832275, + "learning_rate": 0.0015, + "loss": 2.0549, + "step": 5389 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.5200402736663818, + "learning_rate": 0.0015, + "loss": 2.0592, + "step": 5390 + }, + { + "epoch": 0.5686708860759494, + "grad_norm": 0.36830031871795654, + "learning_rate": 0.0015, + "loss": 2.05, + "step": 5391 + }, + { + "epoch": 0.5687763713080168, + "grad_norm": 0.4431939423084259, + "learning_rate": 0.0015, + "loss": 2.0555, + "step": 5392 + }, + { + "epoch": 0.5688818565400844, + "grad_norm": 0.46084102988243103, + "learning_rate": 0.0015, + "loss": 2.0588, + "step": 5393 + }, + { + "epoch": 0.5689873417721519, + "grad_norm": 0.4230668246746063, + "learning_rate": 0.0015, + "loss": 2.0601, + "step": 5394 + }, + { + "epoch": 0.5690928270042194, + "grad_norm": 0.4092566967010498, + "learning_rate": 0.0015, + "loss": 2.0402, + "step": 5395 + }, + { + "epoch": 0.5691983122362869, + "grad_norm": 0.5310227274894714, + "learning_rate": 0.0015, + "loss": 2.0438, + "step": 5396 + }, + { + "epoch": 0.5693037974683545, + "grad_norm": 0.4846002757549286, + "learning_rate": 0.0015, + "loss": 2.0846, + "step": 5397 + }, + { + "epoch": 0.5694092827004219, + "grad_norm": 0.3446415960788727, + "learning_rate": 0.0015, + "loss": 2.0465, + "step": 5398 + }, + { + "epoch": 0.5695147679324895, + "grad_norm": 0.5764602422714233, + "learning_rate": 0.0015, + "loss": 2.0618, + "step": 5399 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.4082357883453369, + "learning_rate": 0.0015, + "loss": 2.035, + "step": 5400 + }, + { + "epoch": 0.5697257383966244, + "grad_norm": 0.4126167893409729, + "learning_rate": 0.0015, + "loss": 2.0636, + "step": 5401 + }, + { + "epoch": 0.569831223628692, + "grad_norm": 0.4605918228626251, + "learning_rate": 0.0015, + "loss": 2.0589, + "step": 5402 + }, + { + "epoch": 0.5699367088607595, + "grad_norm": 0.4164193868637085, + "learning_rate": 0.0015, + "loss": 2.0617, + "step": 5403 + }, + { + "epoch": 0.570042194092827, + "grad_norm": 0.47513529658317566, + "learning_rate": 0.0015, + "loss": 2.0819, + "step": 5404 + }, + { + "epoch": 0.5701476793248945, + "grad_norm": 0.4241865575313568, + "learning_rate": 0.0015, + "loss": 2.0195, + "step": 5405 + }, + { + "epoch": 0.5702531645569621, + "grad_norm": 0.34662917256355286, + "learning_rate": 0.0015, + "loss": 2.0269, + "step": 5406 + }, + { + "epoch": 0.5703586497890295, + "grad_norm": 0.4576583206653595, + "learning_rate": 0.0015, + "loss": 2.0432, + "step": 5407 + }, + { + "epoch": 0.570464135021097, + "grad_norm": 0.3996623456478119, + "learning_rate": 0.0015, + "loss": 2.0596, + "step": 5408 + }, + { + "epoch": 0.5705696202531646, + "grad_norm": 0.3497978448867798, + "learning_rate": 0.0015, + "loss": 2.0453, + "step": 5409 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.4418317377567291, + "learning_rate": 0.0015, + "loss": 2.0226, + "step": 5410 + }, + { + "epoch": 0.5707805907172996, + "grad_norm": 0.39860761165618896, + "learning_rate": 0.0015, + "loss": 2.0554, + "step": 5411 + }, + { + "epoch": 0.5708860759493671, + "grad_norm": 0.3637651801109314, + "learning_rate": 0.0015, + "loss": 2.0523, + "step": 5412 + }, + { + "epoch": 0.5709915611814346, + "grad_norm": 0.43948185443878174, + "learning_rate": 0.0015, + "loss": 2.0598, + "step": 5413 + }, + { + "epoch": 0.5710970464135021, + "grad_norm": 0.3689970374107361, + "learning_rate": 0.0015, + "loss": 1.9964, + "step": 5414 + }, + { + "epoch": 0.5712025316455697, + "grad_norm": 0.38532108068466187, + "learning_rate": 0.0015, + "loss": 2.0788, + "step": 5415 + }, + { + "epoch": 0.5713080168776371, + "grad_norm": 0.42696812748908997, + "learning_rate": 0.0015, + "loss": 2.047, + "step": 5416 + }, + { + "epoch": 0.5714135021097047, + "grad_norm": 0.3871408998966217, + "learning_rate": 0.0015, + "loss": 2.0547, + "step": 5417 + }, + { + "epoch": 0.5715189873417722, + "grad_norm": 0.3921942114830017, + "learning_rate": 0.0015, + "loss": 2.0611, + "step": 5418 + }, + { + "epoch": 0.5716244725738396, + "grad_norm": 0.3899243474006653, + "learning_rate": 0.0015, + "loss": 2.0261, + "step": 5419 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.38679179549217224, + "learning_rate": 0.0015, + "loss": 2.0258, + "step": 5420 + }, + { + "epoch": 0.5718354430379747, + "grad_norm": 0.4662359952926636, + "learning_rate": 0.0015, + "loss": 2.0552, + "step": 5421 + }, + { + "epoch": 0.5719409282700422, + "grad_norm": 0.4815455973148346, + "learning_rate": 0.0015, + "loss": 2.0372, + "step": 5422 + }, + { + "epoch": 0.5720464135021097, + "grad_norm": 0.3837960958480835, + "learning_rate": 0.0015, + "loss": 2.0151, + "step": 5423 + }, + { + "epoch": 0.5721518987341773, + "grad_norm": 0.45775318145751953, + "learning_rate": 0.0015, + "loss": 2.0096, + "step": 5424 + }, + { + "epoch": 0.5722573839662447, + "grad_norm": 0.4894465208053589, + "learning_rate": 0.0015, + "loss": 2.0544, + "step": 5425 + }, + { + "epoch": 0.5723628691983123, + "grad_norm": 0.40482980012893677, + "learning_rate": 0.0015, + "loss": 2.0653, + "step": 5426 + }, + { + "epoch": 0.5724683544303798, + "grad_norm": 0.5842303037643433, + "learning_rate": 0.0015, + "loss": 2.0645, + "step": 5427 + }, + { + "epoch": 0.5725738396624472, + "grad_norm": 0.5294345021247864, + "learning_rate": 0.0015, + "loss": 2.057, + "step": 5428 + }, + { + "epoch": 0.5726793248945148, + "grad_norm": 0.40281957387924194, + "learning_rate": 0.0015, + "loss": 2.0604, + "step": 5429 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.5406246185302734, + "learning_rate": 0.0015, + "loss": 2.0367, + "step": 5430 + }, + { + "epoch": 0.5728902953586498, + "grad_norm": 0.43023112416267395, + "learning_rate": 0.0015, + "loss": 2.0606, + "step": 5431 + }, + { + "epoch": 0.5729957805907173, + "grad_norm": 0.4147226810455322, + "learning_rate": 0.0015, + "loss": 2.0292, + "step": 5432 + }, + { + "epoch": 0.5731012658227848, + "grad_norm": 0.42885977029800415, + "learning_rate": 0.0015, + "loss": 2.0236, + "step": 5433 + }, + { + "epoch": 0.5732067510548523, + "grad_norm": 0.4306090772151947, + "learning_rate": 0.0015, + "loss": 2.0882, + "step": 5434 + }, + { + "epoch": 0.5733122362869199, + "grad_norm": 0.41882646083831787, + "learning_rate": 0.0015, + "loss": 2.0307, + "step": 5435 + }, + { + "epoch": 0.5734177215189873, + "grad_norm": 0.4787736237049103, + "learning_rate": 0.0015, + "loss": 2.0314, + "step": 5436 + }, + { + "epoch": 0.5735232067510548, + "grad_norm": 0.4032834768295288, + "learning_rate": 0.0015, + "loss": 2.0222, + "step": 5437 + }, + { + "epoch": 0.5736286919831224, + "grad_norm": 0.44591236114501953, + "learning_rate": 0.0015, + "loss": 2.0476, + "step": 5438 + }, + { + "epoch": 0.5737341772151898, + "grad_norm": 0.4409850239753723, + "learning_rate": 0.0015, + "loss": 2.0594, + "step": 5439 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.46543994545936584, + "learning_rate": 0.0015, + "loss": 2.0333, + "step": 5440 + }, + { + "epoch": 0.5739451476793249, + "grad_norm": 0.45350298285484314, + "learning_rate": 0.0015, + "loss": 2.0555, + "step": 5441 + }, + { + "epoch": 0.5740506329113924, + "grad_norm": 0.5256867408752441, + "learning_rate": 0.0015, + "loss": 2.0487, + "step": 5442 + }, + { + "epoch": 0.5741561181434599, + "grad_norm": 0.44681718945503235, + "learning_rate": 0.0015, + "loss": 2.0537, + "step": 5443 + }, + { + "epoch": 0.5742616033755275, + "grad_norm": 0.485035240650177, + "learning_rate": 0.0015, + "loss": 2.0419, + "step": 5444 + }, + { + "epoch": 0.5743670886075949, + "grad_norm": 0.5564507842063904, + "learning_rate": 0.0015, + "loss": 2.0487, + "step": 5445 + }, + { + "epoch": 0.5744725738396624, + "grad_norm": 0.4058856666088104, + "learning_rate": 0.0015, + "loss": 2.064, + "step": 5446 + }, + { + "epoch": 0.57457805907173, + "grad_norm": 0.47207725048065186, + "learning_rate": 0.0015, + "loss": 2.0735, + "step": 5447 + }, + { + "epoch": 0.5746835443037974, + "grad_norm": 0.44115546345710754, + "learning_rate": 0.0015, + "loss": 2.057, + "step": 5448 + }, + { + "epoch": 0.574789029535865, + "grad_norm": 0.4927281141281128, + "learning_rate": 0.0015, + "loss": 2.0477, + "step": 5449 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.42926687002182007, + "learning_rate": 0.0015, + "loss": 2.0504, + "step": 5450 + }, + { + "epoch": 0.575, + "grad_norm": 0.4816991686820984, + "learning_rate": 0.0015, + "loss": 2.0483, + "step": 5451 + }, + { + "epoch": 0.5751054852320675, + "grad_norm": 0.37037721276283264, + "learning_rate": 0.0015, + "loss": 2.0201, + "step": 5452 + }, + { + "epoch": 0.575210970464135, + "grad_norm": 0.4942268133163452, + "learning_rate": 0.0015, + "loss": 2.0851, + "step": 5453 + }, + { + "epoch": 0.5753164556962025, + "grad_norm": 0.40658608078956604, + "learning_rate": 0.0015, + "loss": 2.0424, + "step": 5454 + }, + { + "epoch": 0.57542194092827, + "grad_norm": 0.5078874230384827, + "learning_rate": 0.0015, + "loss": 2.0864, + "step": 5455 + }, + { + "epoch": 0.5755274261603376, + "grad_norm": 0.45148229598999023, + "learning_rate": 0.0015, + "loss": 2.0412, + "step": 5456 + }, + { + "epoch": 0.575632911392405, + "grad_norm": 0.47936519980430603, + "learning_rate": 0.0015, + "loss": 2.0544, + "step": 5457 + }, + { + "epoch": 0.5757383966244726, + "grad_norm": 0.3975873589515686, + "learning_rate": 0.0015, + "loss": 2.0421, + "step": 5458 + }, + { + "epoch": 0.5758438818565401, + "grad_norm": 0.4699929654598236, + "learning_rate": 0.0015, + "loss": 2.0443, + "step": 5459 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.3760715425014496, + "learning_rate": 0.0015, + "loss": 2.0422, + "step": 5460 + }, + { + "epoch": 0.5760548523206751, + "grad_norm": 0.4590809941291809, + "learning_rate": 0.0015, + "loss": 2.0227, + "step": 5461 + }, + { + "epoch": 0.5761603375527427, + "grad_norm": 0.3553166687488556, + "learning_rate": 0.0015, + "loss": 2.0277, + "step": 5462 + }, + { + "epoch": 0.5762658227848101, + "grad_norm": 0.45987892150878906, + "learning_rate": 0.0015, + "loss": 2.0541, + "step": 5463 + }, + { + "epoch": 0.5763713080168776, + "grad_norm": 0.40723326802253723, + "learning_rate": 0.0015, + "loss": 2.042, + "step": 5464 + }, + { + "epoch": 0.5764767932489452, + "grad_norm": 0.4565507769584656, + "learning_rate": 0.0015, + "loss": 2.0866, + "step": 5465 + }, + { + "epoch": 0.5765822784810126, + "grad_norm": 0.41819727420806885, + "learning_rate": 0.0015, + "loss": 2.0548, + "step": 5466 + }, + { + "epoch": 0.5766877637130802, + "grad_norm": 0.452158659696579, + "learning_rate": 0.0015, + "loss": 2.0485, + "step": 5467 + }, + { + "epoch": 0.5767932489451477, + "grad_norm": 0.43328121304512024, + "learning_rate": 0.0015, + "loss": 2.0497, + "step": 5468 + }, + { + "epoch": 0.5768987341772152, + "grad_norm": 0.3649432957172394, + "learning_rate": 0.0015, + "loss": 2.0653, + "step": 5469 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.41044220328330994, + "learning_rate": 0.0015, + "loss": 2.0677, + "step": 5470 + }, + { + "epoch": 0.5771097046413503, + "grad_norm": 0.40024784207344055, + "learning_rate": 0.0015, + "loss": 2.0724, + "step": 5471 + }, + { + "epoch": 0.5772151898734177, + "grad_norm": 0.3615894019603729, + "learning_rate": 0.0015, + "loss": 2.0603, + "step": 5472 + }, + { + "epoch": 0.5773206751054852, + "grad_norm": 0.41444945335388184, + "learning_rate": 0.0015, + "loss": 2.0359, + "step": 5473 + }, + { + "epoch": 0.5774261603375528, + "grad_norm": 0.37813153862953186, + "learning_rate": 0.0015, + "loss": 2.0026, + "step": 5474 + }, + { + "epoch": 0.5775316455696202, + "grad_norm": 0.4552324414253235, + "learning_rate": 0.0015, + "loss": 2.0562, + "step": 5475 + }, + { + "epoch": 0.5776371308016878, + "grad_norm": 0.47065696120262146, + "learning_rate": 0.0015, + "loss": 2.0354, + "step": 5476 + }, + { + "epoch": 0.5777426160337553, + "grad_norm": 0.40533044934272766, + "learning_rate": 0.0015, + "loss": 2.0646, + "step": 5477 + }, + { + "epoch": 0.5778481012658228, + "grad_norm": 0.4581947922706604, + "learning_rate": 0.0015, + "loss": 2.0585, + "step": 5478 + }, + { + "epoch": 0.5779535864978903, + "grad_norm": 0.3921948969364166, + "learning_rate": 0.0015, + "loss": 2.059, + "step": 5479 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.5151875019073486, + "learning_rate": 0.0015, + "loss": 2.0891, + "step": 5480 + }, + { + "epoch": 0.5781645569620253, + "grad_norm": 0.45674076676368713, + "learning_rate": 0.0015, + "loss": 2.0045, + "step": 5481 + }, + { + "epoch": 0.5782700421940928, + "grad_norm": 0.3868505656719208, + "learning_rate": 0.0015, + "loss": 2.0651, + "step": 5482 + }, + { + "epoch": 0.5783755274261604, + "grad_norm": 0.4242245852947235, + "learning_rate": 0.0015, + "loss": 2.0482, + "step": 5483 + }, + { + "epoch": 0.5784810126582278, + "grad_norm": 0.3986978828907013, + "learning_rate": 0.0015, + "loss": 2.0691, + "step": 5484 + }, + { + "epoch": 0.5785864978902954, + "grad_norm": 0.4229939877986908, + "learning_rate": 0.0015, + "loss": 2.0539, + "step": 5485 + }, + { + "epoch": 0.5786919831223629, + "grad_norm": 0.4845801591873169, + "learning_rate": 0.0015, + "loss": 2.039, + "step": 5486 + }, + { + "epoch": 0.5787974683544304, + "grad_norm": 0.3685852885246277, + "learning_rate": 0.0015, + "loss": 2.0412, + "step": 5487 + }, + { + "epoch": 0.5789029535864979, + "grad_norm": 0.4217224419116974, + "learning_rate": 0.0015, + "loss": 2.0475, + "step": 5488 + }, + { + "epoch": 0.5790084388185655, + "grad_norm": 0.4470313787460327, + "learning_rate": 0.0015, + "loss": 2.0961, + "step": 5489 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.3639305830001831, + "learning_rate": 0.0015, + "loss": 2.0396, + "step": 5490 + }, + { + "epoch": 0.5792194092827004, + "grad_norm": 0.47840332984924316, + "learning_rate": 0.0015, + "loss": 2.0435, + "step": 5491 + }, + { + "epoch": 0.579324894514768, + "grad_norm": 0.36879920959472656, + "learning_rate": 0.0015, + "loss": 2.0489, + "step": 5492 + }, + { + "epoch": 0.5794303797468354, + "grad_norm": 0.49443933367729187, + "learning_rate": 0.0015, + "loss": 2.0779, + "step": 5493 + }, + { + "epoch": 0.579535864978903, + "grad_norm": 0.4046226441860199, + "learning_rate": 0.0015, + "loss": 2.0281, + "step": 5494 + }, + { + "epoch": 0.5796413502109705, + "grad_norm": 0.49987828731536865, + "learning_rate": 0.0015, + "loss": 2.069, + "step": 5495 + }, + { + "epoch": 0.579746835443038, + "grad_norm": 0.5830404162406921, + "learning_rate": 0.0015, + "loss": 2.0288, + "step": 5496 + }, + { + "epoch": 0.5798523206751055, + "grad_norm": 0.5870864987373352, + "learning_rate": 0.0015, + "loss": 2.038, + "step": 5497 + }, + { + "epoch": 0.5799578059071729, + "grad_norm": 0.4765099287033081, + "learning_rate": 0.0015, + "loss": 2.0374, + "step": 5498 + }, + { + "epoch": 0.5800632911392405, + "grad_norm": 0.38983041048049927, + "learning_rate": 0.0015, + "loss": 2.0734, + "step": 5499 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.4464476406574249, + "learning_rate": 0.0015, + "loss": 2.0475, + "step": 5500 + }, + { + "epoch": 0.5802742616033755, + "grad_norm": 0.4041212499141693, + "learning_rate": 0.0015, + "loss": 2.0359, + "step": 5501 + }, + { + "epoch": 0.580379746835443, + "grad_norm": 0.3913680911064148, + "learning_rate": 0.0015, + "loss": 2.0596, + "step": 5502 + }, + { + "epoch": 0.5804852320675106, + "grad_norm": 0.43186938762664795, + "learning_rate": 0.0015, + "loss": 2.0629, + "step": 5503 + }, + { + "epoch": 0.580590717299578, + "grad_norm": 0.3858523368835449, + "learning_rate": 0.0015, + "loss": 2.0276, + "step": 5504 + }, + { + "epoch": 0.5806962025316456, + "grad_norm": 0.4293634295463562, + "learning_rate": 0.0015, + "loss": 2.0466, + "step": 5505 + }, + { + "epoch": 0.5808016877637131, + "grad_norm": 0.39333754777908325, + "learning_rate": 0.0015, + "loss": 2.0803, + "step": 5506 + }, + { + "epoch": 0.5809071729957805, + "grad_norm": 0.39828407764434814, + "learning_rate": 0.0015, + "loss": 2.0631, + "step": 5507 + }, + { + "epoch": 0.5810126582278481, + "grad_norm": 0.4165082573890686, + "learning_rate": 0.0015, + "loss": 2.03, + "step": 5508 + }, + { + "epoch": 0.5811181434599156, + "grad_norm": 0.4489726424217224, + "learning_rate": 0.0015, + "loss": 2.0661, + "step": 5509 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.410009503364563, + "learning_rate": 0.0015, + "loss": 2.0398, + "step": 5510 + }, + { + "epoch": 0.5813291139240506, + "grad_norm": 0.41193878650665283, + "learning_rate": 0.0015, + "loss": 2.018, + "step": 5511 + }, + { + "epoch": 0.5814345991561182, + "grad_norm": 0.435441792011261, + "learning_rate": 0.0015, + "loss": 2.0502, + "step": 5512 + }, + { + "epoch": 0.5815400843881856, + "grad_norm": 0.386137992143631, + "learning_rate": 0.0015, + "loss": 2.0505, + "step": 5513 + }, + { + "epoch": 0.5816455696202532, + "grad_norm": 0.4072999656200409, + "learning_rate": 0.0015, + "loss": 2.0674, + "step": 5514 + }, + { + "epoch": 0.5817510548523207, + "grad_norm": 0.3949166536331177, + "learning_rate": 0.0015, + "loss": 2.0372, + "step": 5515 + }, + { + "epoch": 0.5818565400843881, + "grad_norm": 0.3963389992713928, + "learning_rate": 0.0015, + "loss": 2.0064, + "step": 5516 + }, + { + "epoch": 0.5819620253164557, + "grad_norm": 0.3466372787952423, + "learning_rate": 0.0015, + "loss": 2.0269, + "step": 5517 + }, + { + "epoch": 0.5820675105485232, + "grad_norm": 0.41131141781806946, + "learning_rate": 0.0015, + "loss": 2.0715, + "step": 5518 + }, + { + "epoch": 0.5821729957805907, + "grad_norm": 0.4090421199798584, + "learning_rate": 0.0015, + "loss": 2.0413, + "step": 5519 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.4085858464241028, + "learning_rate": 0.0015, + "loss": 2.0785, + "step": 5520 + }, + { + "epoch": 0.5823839662447258, + "grad_norm": 0.5715591907501221, + "learning_rate": 0.0015, + "loss": 2.0287, + "step": 5521 + }, + { + "epoch": 0.5824894514767932, + "grad_norm": 0.466688871383667, + "learning_rate": 0.0015, + "loss": 2.0357, + "step": 5522 + }, + { + "epoch": 0.5825949367088608, + "grad_norm": 0.4285401701927185, + "learning_rate": 0.0015, + "loss": 2.0211, + "step": 5523 + }, + { + "epoch": 0.5827004219409283, + "grad_norm": 0.5093737244606018, + "learning_rate": 0.0015, + "loss": 2.0373, + "step": 5524 + }, + { + "epoch": 0.5828059071729957, + "grad_norm": 0.4205966889858246, + "learning_rate": 0.0015, + "loss": 2.0401, + "step": 5525 + }, + { + "epoch": 0.5829113924050633, + "grad_norm": 0.4851149916648865, + "learning_rate": 0.0015, + "loss": 2.0416, + "step": 5526 + }, + { + "epoch": 0.5830168776371308, + "grad_norm": 0.5053011178970337, + "learning_rate": 0.0015, + "loss": 2.0535, + "step": 5527 + }, + { + "epoch": 0.5831223628691983, + "grad_norm": 0.37869974970817566, + "learning_rate": 0.0015, + "loss": 2.0568, + "step": 5528 + }, + { + "epoch": 0.5832278481012658, + "grad_norm": 0.4506447911262512, + "learning_rate": 0.0015, + "loss": 2.0331, + "step": 5529 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.3610948920249939, + "learning_rate": 0.0015, + "loss": 2.0887, + "step": 5530 + }, + { + "epoch": 0.5834388185654008, + "grad_norm": 0.5073449015617371, + "learning_rate": 0.0015, + "loss": 2.0704, + "step": 5531 + }, + { + "epoch": 0.5835443037974684, + "grad_norm": 0.35118958353996277, + "learning_rate": 0.0015, + "loss": 2.0202, + "step": 5532 + }, + { + "epoch": 0.5836497890295359, + "grad_norm": 0.5009763836860657, + "learning_rate": 0.0015, + "loss": 2.0322, + "step": 5533 + }, + { + "epoch": 0.5837552742616033, + "grad_norm": 0.42076194286346436, + "learning_rate": 0.0015, + "loss": 2.0145, + "step": 5534 + }, + { + "epoch": 0.5838607594936709, + "grad_norm": 0.41570430994033813, + "learning_rate": 0.0015, + "loss": 2.0256, + "step": 5535 + }, + { + "epoch": 0.5839662447257384, + "grad_norm": 0.4592829644680023, + "learning_rate": 0.0015, + "loss": 2.02, + "step": 5536 + }, + { + "epoch": 0.5840717299578059, + "grad_norm": 0.40193837881088257, + "learning_rate": 0.0015, + "loss": 2.0308, + "step": 5537 + }, + { + "epoch": 0.5841772151898734, + "grad_norm": 0.5007033348083496, + "learning_rate": 0.0015, + "loss": 2.0846, + "step": 5538 + }, + { + "epoch": 0.584282700421941, + "grad_norm": 0.5651435256004333, + "learning_rate": 0.0015, + "loss": 2.0297, + "step": 5539 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.4311482906341553, + "learning_rate": 0.0015, + "loss": 2.0599, + "step": 5540 + }, + { + "epoch": 0.584493670886076, + "grad_norm": 0.49570441246032715, + "learning_rate": 0.0015, + "loss": 2.0626, + "step": 5541 + }, + { + "epoch": 0.5845991561181435, + "grad_norm": 0.5225094556808472, + "learning_rate": 0.0015, + "loss": 2.0419, + "step": 5542 + }, + { + "epoch": 0.5847046413502109, + "grad_norm": 0.4426600933074951, + "learning_rate": 0.0015, + "loss": 2.048, + "step": 5543 + }, + { + "epoch": 0.5848101265822785, + "grad_norm": 0.37098515033721924, + "learning_rate": 0.0015, + "loss": 2.0389, + "step": 5544 + }, + { + "epoch": 0.584915611814346, + "grad_norm": 0.43667879700660706, + "learning_rate": 0.0015, + "loss": 2.0479, + "step": 5545 + }, + { + "epoch": 0.5850210970464135, + "grad_norm": 0.3540003001689911, + "learning_rate": 0.0015, + "loss": 2.0308, + "step": 5546 + }, + { + "epoch": 0.585126582278481, + "grad_norm": 0.36643078923225403, + "learning_rate": 0.0015, + "loss": 2.0333, + "step": 5547 + }, + { + "epoch": 0.5852320675105486, + "grad_norm": 0.36744239926338196, + "learning_rate": 0.0015, + "loss": 2.0662, + "step": 5548 + }, + { + "epoch": 0.585337552742616, + "grad_norm": 0.38220900297164917, + "learning_rate": 0.0015, + "loss": 2.0489, + "step": 5549 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.39490681886672974, + "learning_rate": 0.0015, + "loss": 2.0734, + "step": 5550 + }, + { + "epoch": 0.5855485232067511, + "grad_norm": 0.38809242844581604, + "learning_rate": 0.0015, + "loss": 2.0828, + "step": 5551 + }, + { + "epoch": 0.5856540084388185, + "grad_norm": 0.3715904951095581, + "learning_rate": 0.0015, + "loss": 2.0867, + "step": 5552 + }, + { + "epoch": 0.5857594936708861, + "grad_norm": 0.33975401520729065, + "learning_rate": 0.0015, + "loss": 2.0281, + "step": 5553 + }, + { + "epoch": 0.5858649789029536, + "grad_norm": 0.3479039669036865, + "learning_rate": 0.0015, + "loss": 2.0548, + "step": 5554 + }, + { + "epoch": 0.5859704641350211, + "grad_norm": 0.37504836916923523, + "learning_rate": 0.0015, + "loss": 2.0436, + "step": 5555 + }, + { + "epoch": 0.5860759493670886, + "grad_norm": 0.41222134232521057, + "learning_rate": 0.0015, + "loss": 2.0367, + "step": 5556 + }, + { + "epoch": 0.5861814345991562, + "grad_norm": 0.3547363579273224, + "learning_rate": 0.0015, + "loss": 2.035, + "step": 5557 + }, + { + "epoch": 0.5862869198312236, + "grad_norm": 0.4214532971382141, + "learning_rate": 0.0015, + "loss": 2.0683, + "step": 5558 + }, + { + "epoch": 0.5863924050632912, + "grad_norm": 0.40514421463012695, + "learning_rate": 0.0015, + "loss": 2.028, + "step": 5559 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.4207899570465088, + "learning_rate": 0.0015, + "loss": 2.0381, + "step": 5560 + }, + { + "epoch": 0.5866033755274261, + "grad_norm": 0.39712750911712646, + "learning_rate": 0.0015, + "loss": 2.0423, + "step": 5561 + }, + { + "epoch": 0.5867088607594937, + "grad_norm": 0.39221620559692383, + "learning_rate": 0.0015, + "loss": 2.0447, + "step": 5562 + }, + { + "epoch": 0.5868143459915611, + "grad_norm": 0.3702133297920227, + "learning_rate": 0.0015, + "loss": 2.044, + "step": 5563 + }, + { + "epoch": 0.5869198312236287, + "grad_norm": 0.41677799820899963, + "learning_rate": 0.0015, + "loss": 2.0519, + "step": 5564 + }, + { + "epoch": 0.5870253164556962, + "grad_norm": 0.4410463273525238, + "learning_rate": 0.0015, + "loss": 2.0448, + "step": 5565 + }, + { + "epoch": 0.5871308016877637, + "grad_norm": 0.4372425377368927, + "learning_rate": 0.0015, + "loss": 2.0546, + "step": 5566 + }, + { + "epoch": 0.5872362869198312, + "grad_norm": 0.4041476547718048, + "learning_rate": 0.0015, + "loss": 2.0715, + "step": 5567 + }, + { + "epoch": 0.5873417721518988, + "grad_norm": 0.4111455976963043, + "learning_rate": 0.0015, + "loss": 2.0257, + "step": 5568 + }, + { + "epoch": 0.5874472573839662, + "grad_norm": 0.41737890243530273, + "learning_rate": 0.0015, + "loss": 2.0123, + "step": 5569 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.46476978063583374, + "learning_rate": 0.0015, + "loss": 2.0328, + "step": 5570 + }, + { + "epoch": 0.5876582278481013, + "grad_norm": 0.3990022540092468, + "learning_rate": 0.0015, + "loss": 2.051, + "step": 5571 + }, + { + "epoch": 0.5877637130801687, + "grad_norm": 0.48004150390625, + "learning_rate": 0.0015, + "loss": 2.0266, + "step": 5572 + }, + { + "epoch": 0.5878691983122363, + "grad_norm": 0.42924264073371887, + "learning_rate": 0.0015, + "loss": 2.0352, + "step": 5573 + }, + { + "epoch": 0.5879746835443038, + "grad_norm": 0.3996509611606598, + "learning_rate": 0.0015, + "loss": 2.027, + "step": 5574 + }, + { + "epoch": 0.5880801687763713, + "grad_norm": 0.45021533966064453, + "learning_rate": 0.0015, + "loss": 2.0572, + "step": 5575 + }, + { + "epoch": 0.5881856540084388, + "grad_norm": 0.3596097230911255, + "learning_rate": 0.0015, + "loss": 2.0345, + "step": 5576 + }, + { + "epoch": 0.5882911392405064, + "grad_norm": 0.4718737006187439, + "learning_rate": 0.0015, + "loss": 2.0566, + "step": 5577 + }, + { + "epoch": 0.5883966244725738, + "grad_norm": 0.3935296833515167, + "learning_rate": 0.0015, + "loss": 2.0186, + "step": 5578 + }, + { + "epoch": 0.5885021097046413, + "grad_norm": 0.477977454662323, + "learning_rate": 0.0015, + "loss": 2.0367, + "step": 5579 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.47294679284095764, + "learning_rate": 0.0015, + "loss": 2.0127, + "step": 5580 + }, + { + "epoch": 0.5887130801687763, + "grad_norm": 0.4550211429595947, + "learning_rate": 0.0015, + "loss": 2.0432, + "step": 5581 + }, + { + "epoch": 0.5888185654008439, + "grad_norm": 0.4549914300441742, + "learning_rate": 0.0015, + "loss": 2.0436, + "step": 5582 + }, + { + "epoch": 0.5889240506329114, + "grad_norm": 0.4815577268600464, + "learning_rate": 0.0015, + "loss": 2.0179, + "step": 5583 + }, + { + "epoch": 0.5890295358649789, + "grad_norm": 0.45081645250320435, + "learning_rate": 0.0015, + "loss": 2.0367, + "step": 5584 + }, + { + "epoch": 0.5891350210970464, + "grad_norm": 0.40829506516456604, + "learning_rate": 0.0015, + "loss": 2.0527, + "step": 5585 + }, + { + "epoch": 0.589240506329114, + "grad_norm": 0.4548438787460327, + "learning_rate": 0.0015, + "loss": 2.0472, + "step": 5586 + }, + { + "epoch": 0.5893459915611814, + "grad_norm": 0.5260445475578308, + "learning_rate": 0.0015, + "loss": 2.0521, + "step": 5587 + }, + { + "epoch": 0.5894514767932489, + "grad_norm": 0.4012746512889862, + "learning_rate": 0.0015, + "loss": 2.0796, + "step": 5588 + }, + { + "epoch": 0.5895569620253165, + "grad_norm": 0.4997289180755615, + "learning_rate": 0.0015, + "loss": 2.0256, + "step": 5589 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.4876507520675659, + "learning_rate": 0.0015, + "loss": 2.0272, + "step": 5590 + }, + { + "epoch": 0.5897679324894515, + "grad_norm": 0.3865363597869873, + "learning_rate": 0.0015, + "loss": 2.0034, + "step": 5591 + }, + { + "epoch": 0.589873417721519, + "grad_norm": 0.5370485186576843, + "learning_rate": 0.0015, + "loss": 2.0594, + "step": 5592 + }, + { + "epoch": 0.5899789029535865, + "grad_norm": 0.47392037510871887, + "learning_rate": 0.0015, + "loss": 2.0303, + "step": 5593 + }, + { + "epoch": 0.590084388185654, + "grad_norm": 0.42652368545532227, + "learning_rate": 0.0015, + "loss": 2.0195, + "step": 5594 + }, + { + "epoch": 0.5901898734177216, + "grad_norm": 0.5128001570701599, + "learning_rate": 0.0015, + "loss": 2.0495, + "step": 5595 + }, + { + "epoch": 0.590295358649789, + "grad_norm": 0.4892159104347229, + "learning_rate": 0.0015, + "loss": 2.0215, + "step": 5596 + }, + { + "epoch": 0.5904008438818565, + "grad_norm": 0.4726697504520416, + "learning_rate": 0.0015, + "loss": 2.0182, + "step": 5597 + }, + { + "epoch": 0.5905063291139241, + "grad_norm": 0.5399698615074158, + "learning_rate": 0.0015, + "loss": 2.053, + "step": 5598 + }, + { + "epoch": 0.5906118143459915, + "grad_norm": 0.5391969680786133, + "learning_rate": 0.0015, + "loss": 2.0648, + "step": 5599 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.3842872679233551, + "learning_rate": 0.0015, + "loss": 2.0596, + "step": 5600 + }, + { + "epoch": 0.5908227848101266, + "grad_norm": 0.512725293636322, + "learning_rate": 0.0015, + "loss": 2.0493, + "step": 5601 + }, + { + "epoch": 0.5909282700421941, + "grad_norm": 0.43375909328460693, + "learning_rate": 0.0015, + "loss": 2.0596, + "step": 5602 + }, + { + "epoch": 0.5910337552742616, + "grad_norm": 0.4138663709163666, + "learning_rate": 0.0015, + "loss": 2.0486, + "step": 5603 + }, + { + "epoch": 0.5911392405063292, + "grad_norm": 0.4286508858203888, + "learning_rate": 0.0015, + "loss": 2.0147, + "step": 5604 + }, + { + "epoch": 0.5912447257383966, + "grad_norm": 0.4294685125350952, + "learning_rate": 0.0015, + "loss": 2.013, + "step": 5605 + }, + { + "epoch": 0.5913502109704641, + "grad_norm": 0.40170907974243164, + "learning_rate": 0.0015, + "loss": 2.0328, + "step": 5606 + }, + { + "epoch": 0.5914556962025317, + "grad_norm": 0.4468423128128052, + "learning_rate": 0.0015, + "loss": 2.0448, + "step": 5607 + }, + { + "epoch": 0.5915611814345991, + "grad_norm": 0.3509105443954468, + "learning_rate": 0.0015, + "loss": 2.0292, + "step": 5608 + }, + { + "epoch": 0.5916666666666667, + "grad_norm": 0.3770703673362732, + "learning_rate": 0.0015, + "loss": 2.0287, + "step": 5609 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.48631349205970764, + "learning_rate": 0.0015, + "loss": 2.0683, + "step": 5610 + }, + { + "epoch": 0.5918776371308017, + "grad_norm": 0.44503679871559143, + "learning_rate": 0.0015, + "loss": 2.0396, + "step": 5611 + }, + { + "epoch": 0.5919831223628692, + "grad_norm": 0.41816407442092896, + "learning_rate": 0.0015, + "loss": 2.055, + "step": 5612 + }, + { + "epoch": 0.5920886075949368, + "grad_norm": 0.47627994418144226, + "learning_rate": 0.0015, + "loss": 2.0428, + "step": 5613 + }, + { + "epoch": 0.5921940928270042, + "grad_norm": 0.4918190836906433, + "learning_rate": 0.0015, + "loss": 2.0531, + "step": 5614 + }, + { + "epoch": 0.5922995780590717, + "grad_norm": 0.4008800685405731, + "learning_rate": 0.0015, + "loss": 2.0604, + "step": 5615 + }, + { + "epoch": 0.5924050632911393, + "grad_norm": 0.4592895805835724, + "learning_rate": 0.0015, + "loss": 2.0429, + "step": 5616 + }, + { + "epoch": 0.5925105485232067, + "grad_norm": 0.3916281461715698, + "learning_rate": 0.0015, + "loss": 2.0374, + "step": 5617 + }, + { + "epoch": 0.5926160337552743, + "grad_norm": 0.40293148159980774, + "learning_rate": 0.0015, + "loss": 2.0509, + "step": 5618 + }, + { + "epoch": 0.5927215189873418, + "grad_norm": 0.4035172462463379, + "learning_rate": 0.0015, + "loss": 2.0599, + "step": 5619 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.3800513744354248, + "learning_rate": 0.0015, + "loss": 2.0529, + "step": 5620 + }, + { + "epoch": 0.5929324894514768, + "grad_norm": 0.3695085048675537, + "learning_rate": 0.0015, + "loss": 2.0367, + "step": 5621 + }, + { + "epoch": 0.5930379746835444, + "grad_norm": 0.41454657912254333, + "learning_rate": 0.0015, + "loss": 2.0546, + "step": 5622 + }, + { + "epoch": 0.5931434599156118, + "grad_norm": 0.3968769609928131, + "learning_rate": 0.0015, + "loss": 1.9971, + "step": 5623 + }, + { + "epoch": 0.5932489451476793, + "grad_norm": 0.42851752042770386, + "learning_rate": 0.0015, + "loss": 2.0639, + "step": 5624 + }, + { + "epoch": 0.5933544303797469, + "grad_norm": 0.49221065640449524, + "learning_rate": 0.0015, + "loss": 2.0701, + "step": 5625 + }, + { + "epoch": 0.5934599156118143, + "grad_norm": 0.4299011826515198, + "learning_rate": 0.0015, + "loss": 2.0287, + "step": 5626 + }, + { + "epoch": 0.5935654008438819, + "grad_norm": 0.4303654730319977, + "learning_rate": 0.0015, + "loss": 2.0644, + "step": 5627 + }, + { + "epoch": 0.5936708860759494, + "grad_norm": 0.43324369192123413, + "learning_rate": 0.0015, + "loss": 2.0259, + "step": 5628 + }, + { + "epoch": 0.5937763713080169, + "grad_norm": 0.37996411323547363, + "learning_rate": 0.0015, + "loss": 2.0455, + "step": 5629 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.4318780303001404, + "learning_rate": 0.0015, + "loss": 2.0221, + "step": 5630 + }, + { + "epoch": 0.5939873417721518, + "grad_norm": 0.3793397545814514, + "learning_rate": 0.0015, + "loss": 2.0533, + "step": 5631 + }, + { + "epoch": 0.5940928270042194, + "grad_norm": 0.48560869693756104, + "learning_rate": 0.0015, + "loss": 2.0541, + "step": 5632 + }, + { + "epoch": 0.5941983122362869, + "grad_norm": 0.4874274432659149, + "learning_rate": 0.0015, + "loss": 2.0375, + "step": 5633 + }, + { + "epoch": 0.5943037974683544, + "grad_norm": 0.43591201305389404, + "learning_rate": 0.0015, + "loss": 2.0598, + "step": 5634 + }, + { + "epoch": 0.5944092827004219, + "grad_norm": 0.42714157700538635, + "learning_rate": 0.0015, + "loss": 2.0318, + "step": 5635 + }, + { + "epoch": 0.5945147679324895, + "grad_norm": 0.4399926960468292, + "learning_rate": 0.0015, + "loss": 2.0431, + "step": 5636 + }, + { + "epoch": 0.5946202531645569, + "grad_norm": 0.3764267563819885, + "learning_rate": 0.0015, + "loss": 2.0398, + "step": 5637 + }, + { + "epoch": 0.5947257383966245, + "grad_norm": 0.5426293015480042, + "learning_rate": 0.0015, + "loss": 2.1037, + "step": 5638 + }, + { + "epoch": 0.594831223628692, + "grad_norm": 0.4089203476905823, + "learning_rate": 0.0015, + "loss": 2.0432, + "step": 5639 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.47386080026626587, + "learning_rate": 0.0015, + "loss": 2.0616, + "step": 5640 + }, + { + "epoch": 0.595042194092827, + "grad_norm": 0.43693962693214417, + "learning_rate": 0.0015, + "loss": 2.0555, + "step": 5641 + }, + { + "epoch": 0.5951476793248945, + "grad_norm": 0.43692445755004883, + "learning_rate": 0.0015, + "loss": 2.0367, + "step": 5642 + }, + { + "epoch": 0.595253164556962, + "grad_norm": 0.4681776165962219, + "learning_rate": 0.0015, + "loss": 2.0483, + "step": 5643 + }, + { + "epoch": 0.5953586497890295, + "grad_norm": 0.4462319016456604, + "learning_rate": 0.0015, + "loss": 2.0518, + "step": 5644 + }, + { + "epoch": 0.5954641350210971, + "grad_norm": 0.425896018743515, + "learning_rate": 0.0015, + "loss": 2.0406, + "step": 5645 + }, + { + "epoch": 0.5955696202531645, + "grad_norm": 0.40688392519950867, + "learning_rate": 0.0015, + "loss": 2.0329, + "step": 5646 + }, + { + "epoch": 0.5956751054852321, + "grad_norm": 0.433320552110672, + "learning_rate": 0.0015, + "loss": 2.042, + "step": 5647 + }, + { + "epoch": 0.5957805907172996, + "grad_norm": 0.41704338788986206, + "learning_rate": 0.0015, + "loss": 2.0497, + "step": 5648 + }, + { + "epoch": 0.595886075949367, + "grad_norm": 0.46559178829193115, + "learning_rate": 0.0015, + "loss": 2.0633, + "step": 5649 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.4270322024822235, + "learning_rate": 0.0015, + "loss": 2.0408, + "step": 5650 + }, + { + "epoch": 0.5960970464135021, + "grad_norm": 0.5272566080093384, + "learning_rate": 0.0015, + "loss": 2.0454, + "step": 5651 + }, + { + "epoch": 0.5962025316455696, + "grad_norm": 0.5304468274116516, + "learning_rate": 0.0015, + "loss": 2.039, + "step": 5652 + }, + { + "epoch": 0.5963080168776371, + "grad_norm": 0.4157659709453583, + "learning_rate": 0.0015, + "loss": 2.002, + "step": 5653 + }, + { + "epoch": 0.5964135021097047, + "grad_norm": 0.4766652584075928, + "learning_rate": 0.0015, + "loss": 2.0261, + "step": 5654 + }, + { + "epoch": 0.5965189873417721, + "grad_norm": 0.4558732211589813, + "learning_rate": 0.0015, + "loss": 2.0367, + "step": 5655 + }, + { + "epoch": 0.5966244725738397, + "grad_norm": 0.4170081913471222, + "learning_rate": 0.0015, + "loss": 2.0311, + "step": 5656 + }, + { + "epoch": 0.5967299578059072, + "grad_norm": 0.41681674122810364, + "learning_rate": 0.0015, + "loss": 2.0701, + "step": 5657 + }, + { + "epoch": 0.5968354430379746, + "grad_norm": 0.4109189212322235, + "learning_rate": 0.0015, + "loss": 2.0742, + "step": 5658 + }, + { + "epoch": 0.5969409282700422, + "grad_norm": 0.3522888123989105, + "learning_rate": 0.0015, + "loss": 2.0017, + "step": 5659 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.4160808026790619, + "learning_rate": 0.0015, + "loss": 2.0168, + "step": 5660 + }, + { + "epoch": 0.5971518987341772, + "grad_norm": 0.3667967915534973, + "learning_rate": 0.0015, + "loss": 2.0805, + "step": 5661 + }, + { + "epoch": 0.5972573839662447, + "grad_norm": 0.4309985637664795, + "learning_rate": 0.0015, + "loss": 2.0412, + "step": 5662 + }, + { + "epoch": 0.5973628691983123, + "grad_norm": 0.4372972249984741, + "learning_rate": 0.0015, + "loss": 2.0453, + "step": 5663 + }, + { + "epoch": 0.5974683544303797, + "grad_norm": 0.39438700675964355, + "learning_rate": 0.0015, + "loss": 2.0451, + "step": 5664 + }, + { + "epoch": 0.5975738396624473, + "grad_norm": 0.4201143682003021, + "learning_rate": 0.0015, + "loss": 2.0367, + "step": 5665 + }, + { + "epoch": 0.5976793248945148, + "grad_norm": 0.38880833983421326, + "learning_rate": 0.0015, + "loss": 1.999, + "step": 5666 + }, + { + "epoch": 0.5977848101265822, + "grad_norm": 0.38845083117485046, + "learning_rate": 0.0015, + "loss": 2.0702, + "step": 5667 + }, + { + "epoch": 0.5978902953586498, + "grad_norm": 0.4989733397960663, + "learning_rate": 0.0015, + "loss": 2.0595, + "step": 5668 + }, + { + "epoch": 0.5979957805907173, + "grad_norm": 0.4622576832771301, + "learning_rate": 0.0015, + "loss": 2.049, + "step": 5669 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.38691720366477966, + "learning_rate": 0.0015, + "loss": 2.0239, + "step": 5670 + }, + { + "epoch": 0.5982067510548523, + "grad_norm": 0.48614954948425293, + "learning_rate": 0.0015, + "loss": 2.0313, + "step": 5671 + }, + { + "epoch": 0.5983122362869199, + "grad_norm": 0.5109745264053345, + "learning_rate": 0.0015, + "loss": 2.0344, + "step": 5672 + }, + { + "epoch": 0.5984177215189873, + "grad_norm": 0.37403127551078796, + "learning_rate": 0.0015, + "loss": 2.0535, + "step": 5673 + }, + { + "epoch": 0.5985232067510549, + "grad_norm": 0.43536949157714844, + "learning_rate": 0.0015, + "loss": 2.0521, + "step": 5674 + }, + { + "epoch": 0.5986286919831224, + "grad_norm": 0.41906559467315674, + "learning_rate": 0.0015, + "loss": 2.0382, + "step": 5675 + }, + { + "epoch": 0.5987341772151898, + "grad_norm": 0.442566841840744, + "learning_rate": 0.0015, + "loss": 2.0597, + "step": 5676 + }, + { + "epoch": 0.5988396624472574, + "grad_norm": 0.3885989487171173, + "learning_rate": 0.0015, + "loss": 2.0101, + "step": 5677 + }, + { + "epoch": 0.5989451476793249, + "grad_norm": 0.4865771234035492, + "learning_rate": 0.0015, + "loss": 2.0625, + "step": 5678 + }, + { + "epoch": 0.5990506329113924, + "grad_norm": 0.4275457262992859, + "learning_rate": 0.0015, + "loss": 2.0167, + "step": 5679 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.40998727083206177, + "learning_rate": 0.0015, + "loss": 2.0399, + "step": 5680 + }, + { + "epoch": 0.5992616033755275, + "grad_norm": 0.4887460172176361, + "learning_rate": 0.0015, + "loss": 2.0405, + "step": 5681 + }, + { + "epoch": 0.5993670886075949, + "grad_norm": 0.4343344271183014, + "learning_rate": 0.0015, + "loss": 2.0178, + "step": 5682 + }, + { + "epoch": 0.5994725738396625, + "grad_norm": 0.3967795670032501, + "learning_rate": 0.0015, + "loss": 2.0193, + "step": 5683 + }, + { + "epoch": 0.59957805907173, + "grad_norm": 0.4038105010986328, + "learning_rate": 0.0015, + "loss": 2.0033, + "step": 5684 + }, + { + "epoch": 0.5996835443037974, + "grad_norm": 0.416652113199234, + "learning_rate": 0.0015, + "loss": 2.0414, + "step": 5685 + }, + { + "epoch": 0.599789029535865, + "grad_norm": 0.3887330889701843, + "learning_rate": 0.0015, + "loss": 2.0619, + "step": 5686 + }, + { + "epoch": 0.5998945147679325, + "grad_norm": 0.434994637966156, + "learning_rate": 0.0015, + "loss": 2.0469, + "step": 5687 + }, + { + "epoch": 0.6, + "grad_norm": 0.39768511056900024, + "learning_rate": 0.0015, + "loss": 2.0068, + "step": 5688 + }, + { + "epoch": 0.6001054852320675, + "grad_norm": 0.4087360203266144, + "learning_rate": 0.0015, + "loss": 2.0208, + "step": 5689 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.3956158757209778, + "learning_rate": 0.0015, + "loss": 2.0453, + "step": 5690 + }, + { + "epoch": 0.6003164556962025, + "grad_norm": 0.41546961665153503, + "learning_rate": 0.0015, + "loss": 2.0604, + "step": 5691 + }, + { + "epoch": 0.6004219409282701, + "grad_norm": 0.3818667531013489, + "learning_rate": 0.0015, + "loss": 2.0016, + "step": 5692 + }, + { + "epoch": 0.6005274261603376, + "grad_norm": 0.4003254771232605, + "learning_rate": 0.0015, + "loss": 2.0506, + "step": 5693 + }, + { + "epoch": 0.600632911392405, + "grad_norm": 0.4333629608154297, + "learning_rate": 0.0015, + "loss": 2.0006, + "step": 5694 + }, + { + "epoch": 0.6007383966244726, + "grad_norm": 0.4359264075756073, + "learning_rate": 0.0015, + "loss": 1.9896, + "step": 5695 + }, + { + "epoch": 0.60084388185654, + "grad_norm": 0.42684704065322876, + "learning_rate": 0.0015, + "loss": 2.0427, + "step": 5696 + }, + { + "epoch": 0.6009493670886076, + "grad_norm": 0.37560728192329407, + "learning_rate": 0.0015, + "loss": 2.0461, + "step": 5697 + }, + { + "epoch": 0.6010548523206751, + "grad_norm": 0.3961176872253418, + "learning_rate": 0.0015, + "loss": 2.0488, + "step": 5698 + }, + { + "epoch": 0.6011603375527426, + "grad_norm": 0.41494736075401306, + "learning_rate": 0.0015, + "loss": 2.0192, + "step": 5699 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.4057249426841736, + "learning_rate": 0.0015, + "loss": 2.0574, + "step": 5700 + }, + { + "epoch": 0.6013713080168777, + "grad_norm": 0.45370393991470337, + "learning_rate": 0.0015, + "loss": 2.0092, + "step": 5701 + }, + { + "epoch": 0.6014767932489451, + "grad_norm": 0.35691362619400024, + "learning_rate": 0.0015, + "loss": 2.0477, + "step": 5702 + }, + { + "epoch": 0.6015822784810126, + "grad_norm": 0.4557761549949646, + "learning_rate": 0.0015, + "loss": 2.057, + "step": 5703 + }, + { + "epoch": 0.6016877637130802, + "grad_norm": 0.36018043756484985, + "learning_rate": 0.0015, + "loss": 2.0138, + "step": 5704 + }, + { + "epoch": 0.6017932489451476, + "grad_norm": 0.44469359517097473, + "learning_rate": 0.0015, + "loss": 2.031, + "step": 5705 + }, + { + "epoch": 0.6018987341772152, + "grad_norm": 0.396267831325531, + "learning_rate": 0.0015, + "loss": 2.0091, + "step": 5706 + }, + { + "epoch": 0.6020042194092827, + "grad_norm": 0.4645672142505646, + "learning_rate": 0.0015, + "loss": 2.0236, + "step": 5707 + }, + { + "epoch": 0.6021097046413502, + "grad_norm": 0.3724682033061981, + "learning_rate": 0.0015, + "loss": 2.0246, + "step": 5708 + }, + { + "epoch": 0.6022151898734177, + "grad_norm": 0.439416766166687, + "learning_rate": 0.0015, + "loss": 2.0249, + "step": 5709 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.38472023606300354, + "learning_rate": 0.0015, + "loss": 2.0667, + "step": 5710 + }, + { + "epoch": 0.6024261603375527, + "grad_norm": 0.4381917417049408, + "learning_rate": 0.0015, + "loss": 2.0515, + "step": 5711 + }, + { + "epoch": 0.6025316455696202, + "grad_norm": 0.35117271542549133, + "learning_rate": 0.0015, + "loss": 2.0115, + "step": 5712 + }, + { + "epoch": 0.6026371308016878, + "grad_norm": 0.4212937355041504, + "learning_rate": 0.0015, + "loss": 2.0276, + "step": 5713 + }, + { + "epoch": 0.6027426160337552, + "grad_norm": 0.3468867540359497, + "learning_rate": 0.0015, + "loss": 2.0146, + "step": 5714 + }, + { + "epoch": 0.6028481012658228, + "grad_norm": 0.36728593707084656, + "learning_rate": 0.0015, + "loss": 2.0139, + "step": 5715 + }, + { + "epoch": 0.6029535864978903, + "grad_norm": 0.37054869532585144, + "learning_rate": 0.0015, + "loss": 2.0378, + "step": 5716 + }, + { + "epoch": 0.6030590717299578, + "grad_norm": 0.39117538928985596, + "learning_rate": 0.0015, + "loss": 2.0614, + "step": 5717 + }, + { + "epoch": 0.6031645569620253, + "grad_norm": 0.3736092746257782, + "learning_rate": 0.0015, + "loss": 2.0373, + "step": 5718 + }, + { + "epoch": 0.6032700421940929, + "grad_norm": 0.4374220371246338, + "learning_rate": 0.0015, + "loss": 2.0358, + "step": 5719 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.37360644340515137, + "learning_rate": 0.0015, + "loss": 2.0244, + "step": 5720 + }, + { + "epoch": 0.6034810126582278, + "grad_norm": 0.43736276030540466, + "learning_rate": 0.0015, + "loss": 2.0226, + "step": 5721 + }, + { + "epoch": 0.6035864978902954, + "grad_norm": 0.41860154271125793, + "learning_rate": 0.0015, + "loss": 2.0479, + "step": 5722 + }, + { + "epoch": 0.6036919831223628, + "grad_norm": 0.4572315812110901, + "learning_rate": 0.0015, + "loss": 2.0357, + "step": 5723 + }, + { + "epoch": 0.6037974683544304, + "grad_norm": 0.4222533702850342, + "learning_rate": 0.0015, + "loss": 2.0304, + "step": 5724 + }, + { + "epoch": 0.6039029535864979, + "grad_norm": 0.515019953250885, + "learning_rate": 0.0015, + "loss": 2.0229, + "step": 5725 + }, + { + "epoch": 0.6040084388185654, + "grad_norm": 0.5089294910430908, + "learning_rate": 0.0015, + "loss": 2.056, + "step": 5726 + }, + { + "epoch": 0.6041139240506329, + "grad_norm": 0.5820645689964294, + "learning_rate": 0.0015, + "loss": 2.0229, + "step": 5727 + }, + { + "epoch": 0.6042194092827005, + "grad_norm": 0.389914870262146, + "learning_rate": 0.0015, + "loss": 2.0254, + "step": 5728 + }, + { + "epoch": 0.6043248945147679, + "grad_norm": 0.48906001448631287, + "learning_rate": 0.0015, + "loss": 2.0305, + "step": 5729 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.43169131875038147, + "learning_rate": 0.0015, + "loss": 2.0377, + "step": 5730 + }, + { + "epoch": 0.604535864978903, + "grad_norm": 0.4496570825576782, + "learning_rate": 0.0015, + "loss": 2.0347, + "step": 5731 + }, + { + "epoch": 0.6046413502109704, + "grad_norm": 0.41676005721092224, + "learning_rate": 0.0015, + "loss": 2.0242, + "step": 5732 + }, + { + "epoch": 0.604746835443038, + "grad_norm": 0.4253094792366028, + "learning_rate": 0.0015, + "loss": 2.0415, + "step": 5733 + }, + { + "epoch": 0.6048523206751055, + "grad_norm": 0.36468884348869324, + "learning_rate": 0.0015, + "loss": 2.0023, + "step": 5734 + }, + { + "epoch": 0.604957805907173, + "grad_norm": 0.41535255312919617, + "learning_rate": 0.0015, + "loss": 2.0337, + "step": 5735 + }, + { + "epoch": 0.6050632911392405, + "grad_norm": 0.3676040470600128, + "learning_rate": 0.0015, + "loss": 2.0543, + "step": 5736 + }, + { + "epoch": 0.6051687763713081, + "grad_norm": 0.40479710698127747, + "learning_rate": 0.0015, + "loss": 2.0425, + "step": 5737 + }, + { + "epoch": 0.6052742616033755, + "grad_norm": 0.41566702723503113, + "learning_rate": 0.0015, + "loss": 2.0385, + "step": 5738 + }, + { + "epoch": 0.605379746835443, + "grad_norm": 0.4507865607738495, + "learning_rate": 0.0015, + "loss": 2.0626, + "step": 5739 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.3737233281135559, + "learning_rate": 0.0015, + "loss": 2.0178, + "step": 5740 + }, + { + "epoch": 0.605590717299578, + "grad_norm": 0.421364963054657, + "learning_rate": 0.0015, + "loss": 2.019, + "step": 5741 + }, + { + "epoch": 0.6056962025316456, + "grad_norm": 0.40618258714675903, + "learning_rate": 0.0015, + "loss": 2.037, + "step": 5742 + }, + { + "epoch": 0.6058016877637131, + "grad_norm": 0.36256539821624756, + "learning_rate": 0.0015, + "loss": 1.9971, + "step": 5743 + }, + { + "epoch": 0.6059071729957806, + "grad_norm": 0.4469764828681946, + "learning_rate": 0.0015, + "loss": 2.0077, + "step": 5744 + }, + { + "epoch": 0.6060126582278481, + "grad_norm": 0.4446444809436798, + "learning_rate": 0.0015, + "loss": 2.0063, + "step": 5745 + }, + { + "epoch": 0.6061181434599157, + "grad_norm": 0.39029738306999207, + "learning_rate": 0.0015, + "loss": 2.0083, + "step": 5746 + }, + { + "epoch": 0.6062236286919831, + "grad_norm": 0.45442304015159607, + "learning_rate": 0.0015, + "loss": 2.0072, + "step": 5747 + }, + { + "epoch": 0.6063291139240506, + "grad_norm": 0.5138792395591736, + "learning_rate": 0.0015, + "loss": 2.0174, + "step": 5748 + }, + { + "epoch": 0.6064345991561182, + "grad_norm": 0.35626500844955444, + "learning_rate": 0.0015, + "loss": 2.0259, + "step": 5749 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.4692634642124176, + "learning_rate": 0.0015, + "loss": 1.9857, + "step": 5750 + }, + { + "epoch": 0.6066455696202532, + "grad_norm": 0.4319239854812622, + "learning_rate": 0.0015, + "loss": 2.0064, + "step": 5751 + }, + { + "epoch": 0.6067510548523207, + "grad_norm": 0.38391590118408203, + "learning_rate": 0.0015, + "loss": 2.0053, + "step": 5752 + }, + { + "epoch": 0.6068565400843882, + "grad_norm": 0.4518257677555084, + "learning_rate": 0.0015, + "loss": 2.033, + "step": 5753 + }, + { + "epoch": 0.6069620253164557, + "grad_norm": 0.47067031264305115, + "learning_rate": 0.0015, + "loss": 2.0261, + "step": 5754 + }, + { + "epoch": 0.6070675105485233, + "grad_norm": 0.3764389455318451, + "learning_rate": 0.0015, + "loss": 2.0347, + "step": 5755 + }, + { + "epoch": 0.6071729957805907, + "grad_norm": 0.4523206949234009, + "learning_rate": 0.0015, + "loss": 2.0128, + "step": 5756 + }, + { + "epoch": 0.6072784810126582, + "grad_norm": 0.42482882738113403, + "learning_rate": 0.0015, + "loss": 2.0163, + "step": 5757 + }, + { + "epoch": 0.6073839662447258, + "grad_norm": 0.38369765877723694, + "learning_rate": 0.0015, + "loss": 2.0492, + "step": 5758 + }, + { + "epoch": 0.6074894514767932, + "grad_norm": 0.45390477776527405, + "learning_rate": 0.0015, + "loss": 2.0221, + "step": 5759 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.39259231090545654, + "learning_rate": 0.0015, + "loss": 2.0265, + "step": 5760 + }, + { + "epoch": 0.6077004219409282, + "grad_norm": 0.39065980911254883, + "learning_rate": 0.0015, + "loss": 1.9805, + "step": 5761 + }, + { + "epoch": 0.6078059071729958, + "grad_norm": 0.3911009430885315, + "learning_rate": 0.0015, + "loss": 2.0385, + "step": 5762 + }, + { + "epoch": 0.6079113924050633, + "grad_norm": 0.4149273633956909, + "learning_rate": 0.0015, + "loss": 1.9939, + "step": 5763 + }, + { + "epoch": 0.6080168776371307, + "grad_norm": 0.3869858682155609, + "learning_rate": 0.0015, + "loss": 2.0155, + "step": 5764 + }, + { + "epoch": 0.6081223628691983, + "grad_norm": 0.4480913281440735, + "learning_rate": 0.0015, + "loss": 2.0255, + "step": 5765 + }, + { + "epoch": 0.6082278481012658, + "grad_norm": 0.40908512473106384, + "learning_rate": 0.0015, + "loss": 2.0371, + "step": 5766 + }, + { + "epoch": 0.6083333333333333, + "grad_norm": 0.4367819130420685, + "learning_rate": 0.0015, + "loss": 2.041, + "step": 5767 + }, + { + "epoch": 0.6084388185654008, + "grad_norm": 0.39495110511779785, + "learning_rate": 0.0015, + "loss": 2.0025, + "step": 5768 + }, + { + "epoch": 0.6085443037974684, + "grad_norm": 0.3789714574813843, + "learning_rate": 0.0015, + "loss": 2.025, + "step": 5769 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.42063426971435547, + "learning_rate": 0.0015, + "loss": 2.0423, + "step": 5770 + }, + { + "epoch": 0.6087552742616034, + "grad_norm": 0.42066100239753723, + "learning_rate": 0.0015, + "loss": 2.009, + "step": 5771 + }, + { + "epoch": 0.6088607594936709, + "grad_norm": 0.35826894640922546, + "learning_rate": 0.0015, + "loss": 2.0214, + "step": 5772 + }, + { + "epoch": 0.6089662447257383, + "grad_norm": 0.4923109710216522, + "learning_rate": 0.0015, + "loss": 2.0426, + "step": 5773 + }, + { + "epoch": 0.6090717299578059, + "grad_norm": 0.4821831285953522, + "learning_rate": 0.0015, + "loss": 2.0336, + "step": 5774 + }, + { + "epoch": 0.6091772151898734, + "grad_norm": 0.4170941710472107, + "learning_rate": 0.0015, + "loss": 2.0051, + "step": 5775 + }, + { + "epoch": 0.6092827004219409, + "grad_norm": 0.4188048541545868, + "learning_rate": 0.0015, + "loss": 2.0422, + "step": 5776 + }, + { + "epoch": 0.6093881856540084, + "grad_norm": 0.4380027651786804, + "learning_rate": 0.0015, + "loss": 2.0304, + "step": 5777 + }, + { + "epoch": 0.609493670886076, + "grad_norm": 0.4250798225402832, + "learning_rate": 0.0015, + "loss": 2.0354, + "step": 5778 + }, + { + "epoch": 0.6095991561181434, + "grad_norm": 0.417670875787735, + "learning_rate": 0.0015, + "loss": 2.0248, + "step": 5779 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.4232276976108551, + "learning_rate": 0.0015, + "loss": 2.0358, + "step": 5780 + }, + { + "epoch": 0.6098101265822785, + "grad_norm": 0.391531378030777, + "learning_rate": 0.0015, + "loss": 2.0223, + "step": 5781 + }, + { + "epoch": 0.609915611814346, + "grad_norm": 0.3871886730194092, + "learning_rate": 0.0015, + "loss": 2.0162, + "step": 5782 + }, + { + "epoch": 0.6100210970464135, + "grad_norm": 0.3657761514186859, + "learning_rate": 0.0015, + "loss": 2.0636, + "step": 5783 + }, + { + "epoch": 0.610126582278481, + "grad_norm": 0.4344370365142822, + "learning_rate": 0.0015, + "loss": 2.0581, + "step": 5784 + }, + { + "epoch": 0.6102320675105485, + "grad_norm": 0.38285359740257263, + "learning_rate": 0.0015, + "loss": 2.0255, + "step": 5785 + }, + { + "epoch": 0.610337552742616, + "grad_norm": 0.39095088839530945, + "learning_rate": 0.0015, + "loss": 1.9877, + "step": 5786 + }, + { + "epoch": 0.6104430379746836, + "grad_norm": 0.4492734670639038, + "learning_rate": 0.0015, + "loss": 2.0122, + "step": 5787 + }, + { + "epoch": 0.610548523206751, + "grad_norm": 0.381883442401886, + "learning_rate": 0.0015, + "loss": 2.0064, + "step": 5788 + }, + { + "epoch": 0.6106540084388186, + "grad_norm": 0.46091753244400024, + "learning_rate": 0.0015, + "loss": 2.0299, + "step": 5789 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.4418451189994812, + "learning_rate": 0.0015, + "loss": 1.9922, + "step": 5790 + }, + { + "epoch": 0.6108649789029535, + "grad_norm": 0.3980960547924042, + "learning_rate": 0.0015, + "loss": 2.0487, + "step": 5791 + }, + { + "epoch": 0.6109704641350211, + "grad_norm": 0.47650012373924255, + "learning_rate": 0.0015, + "loss": 2.0236, + "step": 5792 + }, + { + "epoch": 0.6110759493670886, + "grad_norm": 0.45873400568962097, + "learning_rate": 0.0015, + "loss": 2.0479, + "step": 5793 + }, + { + "epoch": 0.6111814345991561, + "grad_norm": 0.4432169795036316, + "learning_rate": 0.0015, + "loss": 2.0242, + "step": 5794 + }, + { + "epoch": 0.6112869198312236, + "grad_norm": 0.4673086404800415, + "learning_rate": 0.0015, + "loss": 2.0082, + "step": 5795 + }, + { + "epoch": 0.6113924050632912, + "grad_norm": 0.4281800389289856, + "learning_rate": 0.0015, + "loss": 2.0101, + "step": 5796 + }, + { + "epoch": 0.6114978902953586, + "grad_norm": 0.3884713053703308, + "learning_rate": 0.0015, + "loss": 2.0351, + "step": 5797 + }, + { + "epoch": 0.6116033755274262, + "grad_norm": 0.47366514801979065, + "learning_rate": 0.0015, + "loss": 2.011, + "step": 5798 + }, + { + "epoch": 0.6117088607594937, + "grad_norm": 0.4430273175239563, + "learning_rate": 0.0015, + "loss": 2.0222, + "step": 5799 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.3475148379802704, + "learning_rate": 0.0015, + "loss": 2.0393, + "step": 5800 + }, + { + "epoch": 0.6119198312236287, + "grad_norm": 0.40847310423851013, + "learning_rate": 0.0015, + "loss": 2.0321, + "step": 5801 + }, + { + "epoch": 0.6120253164556962, + "grad_norm": 0.39568084478378296, + "learning_rate": 0.0015, + "loss": 2.0096, + "step": 5802 + }, + { + "epoch": 0.6121308016877637, + "grad_norm": 0.3930375277996063, + "learning_rate": 0.0015, + "loss": 2.0161, + "step": 5803 + }, + { + "epoch": 0.6122362869198312, + "grad_norm": 0.39850175380706787, + "learning_rate": 0.0015, + "loss": 2.0366, + "step": 5804 + }, + { + "epoch": 0.6123417721518988, + "grad_norm": 0.41296881437301636, + "learning_rate": 0.0015, + "loss": 1.9912, + "step": 5805 + }, + { + "epoch": 0.6124472573839662, + "grad_norm": 0.4685303568840027, + "learning_rate": 0.0015, + "loss": 2.0398, + "step": 5806 + }, + { + "epoch": 0.6125527426160338, + "grad_norm": 0.3946267068386078, + "learning_rate": 0.0015, + "loss": 2.0344, + "step": 5807 + }, + { + "epoch": 0.6126582278481013, + "grad_norm": 0.44790494441986084, + "learning_rate": 0.0015, + "loss": 2.0481, + "step": 5808 + }, + { + "epoch": 0.6127637130801687, + "grad_norm": 0.42401060461997986, + "learning_rate": 0.0015, + "loss": 2.0497, + "step": 5809 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.38424232602119446, + "learning_rate": 0.0015, + "loss": 2.0267, + "step": 5810 + }, + { + "epoch": 0.6129746835443038, + "grad_norm": 0.3685046434402466, + "learning_rate": 0.0015, + "loss": 2.0567, + "step": 5811 + }, + { + "epoch": 0.6130801687763713, + "grad_norm": 0.43187177181243896, + "learning_rate": 0.0015, + "loss": 2.0314, + "step": 5812 + }, + { + "epoch": 0.6131856540084388, + "grad_norm": 0.3978344798088074, + "learning_rate": 0.0015, + "loss": 2.0051, + "step": 5813 + }, + { + "epoch": 0.6132911392405064, + "grad_norm": 0.4466049075126648, + "learning_rate": 0.0015, + "loss": 2.0104, + "step": 5814 + }, + { + "epoch": 0.6133966244725738, + "grad_norm": 0.5007913112640381, + "learning_rate": 0.0015, + "loss": 2.0772, + "step": 5815 + }, + { + "epoch": 0.6135021097046414, + "grad_norm": 0.4062153995037079, + "learning_rate": 0.0015, + "loss": 2.0532, + "step": 5816 + }, + { + "epoch": 0.6136075949367089, + "grad_norm": 0.40058428049087524, + "learning_rate": 0.0015, + "loss": 2.0311, + "step": 5817 + }, + { + "epoch": 0.6137130801687763, + "grad_norm": 0.41537487506866455, + "learning_rate": 0.0015, + "loss": 2.0492, + "step": 5818 + }, + { + "epoch": 0.6138185654008439, + "grad_norm": 0.4109538197517395, + "learning_rate": 0.0015, + "loss": 2.0233, + "step": 5819 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.3827114701271057, + "learning_rate": 0.0015, + "loss": 2.041, + "step": 5820 + }, + { + "epoch": 0.6140295358649789, + "grad_norm": 0.40461280941963196, + "learning_rate": 0.0015, + "loss": 2.0526, + "step": 5821 + }, + { + "epoch": 0.6141350210970464, + "grad_norm": 0.42570415139198303, + "learning_rate": 0.0015, + "loss": 2.0026, + "step": 5822 + }, + { + "epoch": 0.614240506329114, + "grad_norm": 0.4326819181442261, + "learning_rate": 0.0015, + "loss": 2.0344, + "step": 5823 + }, + { + "epoch": 0.6143459915611814, + "grad_norm": 0.47373056411743164, + "learning_rate": 0.0015, + "loss": 2.0207, + "step": 5824 + }, + { + "epoch": 0.614451476793249, + "grad_norm": 0.40178805589675903, + "learning_rate": 0.0015, + "loss": 2.0294, + "step": 5825 + }, + { + "epoch": 0.6145569620253165, + "grad_norm": 0.48931628465652466, + "learning_rate": 0.0015, + "loss": 2.0348, + "step": 5826 + }, + { + "epoch": 0.614662447257384, + "grad_norm": 0.526762068271637, + "learning_rate": 0.0015, + "loss": 2.0428, + "step": 5827 + }, + { + "epoch": 0.6147679324894515, + "grad_norm": 0.4469139277935028, + "learning_rate": 0.0015, + "loss": 1.9904, + "step": 5828 + }, + { + "epoch": 0.6148734177215189, + "grad_norm": 0.4196818172931671, + "learning_rate": 0.0015, + "loss": 2.0418, + "step": 5829 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.45218417048454285, + "learning_rate": 0.0015, + "loss": 1.9879, + "step": 5830 + }, + { + "epoch": 0.615084388185654, + "grad_norm": 0.43667104840278625, + "learning_rate": 0.0015, + "loss": 2.0297, + "step": 5831 + }, + { + "epoch": 0.6151898734177215, + "grad_norm": 0.405807763338089, + "learning_rate": 0.0015, + "loss": 2.0348, + "step": 5832 + }, + { + "epoch": 0.615295358649789, + "grad_norm": 0.49931663274765015, + "learning_rate": 0.0015, + "loss": 2.0154, + "step": 5833 + }, + { + "epoch": 0.6154008438818566, + "grad_norm": 0.39057716727256775, + "learning_rate": 0.0015, + "loss": 2.0566, + "step": 5834 + }, + { + "epoch": 0.615506329113924, + "grad_norm": 0.4531722664833069, + "learning_rate": 0.0015, + "loss": 2.0278, + "step": 5835 + }, + { + "epoch": 0.6156118143459915, + "grad_norm": 0.4236658811569214, + "learning_rate": 0.0015, + "loss": 2.0584, + "step": 5836 + }, + { + "epoch": 0.6157172995780591, + "grad_norm": 0.4404565691947937, + "learning_rate": 0.0015, + "loss": 1.9967, + "step": 5837 + }, + { + "epoch": 0.6158227848101265, + "grad_norm": 0.4798680543899536, + "learning_rate": 0.0015, + "loss": 2.0636, + "step": 5838 + }, + { + "epoch": 0.6159282700421941, + "grad_norm": 0.4114275276660919, + "learning_rate": 0.0015, + "loss": 2.0148, + "step": 5839 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.4477064609527588, + "learning_rate": 0.0015, + "loss": 2.044, + "step": 5840 + }, + { + "epoch": 0.6161392405063291, + "grad_norm": 0.4689315855503082, + "learning_rate": 0.0015, + "loss": 2.0478, + "step": 5841 + }, + { + "epoch": 0.6162447257383966, + "grad_norm": 0.42197760939598083, + "learning_rate": 0.0015, + "loss": 2.0456, + "step": 5842 + }, + { + "epoch": 0.6163502109704642, + "grad_norm": 0.39996543526649475, + "learning_rate": 0.0015, + "loss": 1.9827, + "step": 5843 + }, + { + "epoch": 0.6164556962025316, + "grad_norm": 0.43458861112594604, + "learning_rate": 0.0015, + "loss": 2.0579, + "step": 5844 + }, + { + "epoch": 0.6165611814345991, + "grad_norm": 0.3636605441570282, + "learning_rate": 0.0015, + "loss": 2.0234, + "step": 5845 + }, + { + "epoch": 0.6166666666666667, + "grad_norm": 0.4187416732311249, + "learning_rate": 0.0015, + "loss": 2.0253, + "step": 5846 + }, + { + "epoch": 0.6167721518987341, + "grad_norm": 0.4242717921733856, + "learning_rate": 0.0015, + "loss": 2.0368, + "step": 5847 + }, + { + "epoch": 0.6168776371308017, + "grad_norm": 0.3843770921230316, + "learning_rate": 0.0015, + "loss": 2.0502, + "step": 5848 + }, + { + "epoch": 0.6169831223628692, + "grad_norm": 0.4727897644042969, + "learning_rate": 0.0015, + "loss": 2.074, + "step": 5849 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.44855499267578125, + "learning_rate": 0.0015, + "loss": 2.0328, + "step": 5850 + }, + { + "epoch": 0.6171940928270042, + "grad_norm": 0.3880438506603241, + "learning_rate": 0.0015, + "loss": 2.0272, + "step": 5851 + }, + { + "epoch": 0.6172995780590718, + "grad_norm": 0.4602985978126526, + "learning_rate": 0.0015, + "loss": 2.0231, + "step": 5852 + }, + { + "epoch": 0.6174050632911392, + "grad_norm": 0.3950703740119934, + "learning_rate": 0.0015, + "loss": 2.0153, + "step": 5853 + }, + { + "epoch": 0.6175105485232067, + "grad_norm": 0.49151477217674255, + "learning_rate": 0.0015, + "loss": 1.9898, + "step": 5854 + }, + { + "epoch": 0.6176160337552743, + "grad_norm": 0.3904816508293152, + "learning_rate": 0.0015, + "loss": 2.0473, + "step": 5855 + }, + { + "epoch": 0.6177215189873417, + "grad_norm": 0.4612749218940735, + "learning_rate": 0.0015, + "loss": 2.0331, + "step": 5856 + }, + { + "epoch": 0.6178270042194093, + "grad_norm": 0.47229066491127014, + "learning_rate": 0.0015, + "loss": 2.0244, + "step": 5857 + }, + { + "epoch": 0.6179324894514768, + "grad_norm": 0.3797162175178528, + "learning_rate": 0.0015, + "loss": 2.0405, + "step": 5858 + }, + { + "epoch": 0.6180379746835443, + "grad_norm": 0.5392406582832336, + "learning_rate": 0.0015, + "loss": 2.0397, + "step": 5859 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.5383774042129517, + "learning_rate": 0.0015, + "loss": 2.0347, + "step": 5860 + }, + { + "epoch": 0.6182489451476794, + "grad_norm": 0.405314564704895, + "learning_rate": 0.0015, + "loss": 2.04, + "step": 5861 + }, + { + "epoch": 0.6183544303797468, + "grad_norm": 0.5279885530471802, + "learning_rate": 0.0015, + "loss": 2.0706, + "step": 5862 + }, + { + "epoch": 0.6184599156118143, + "grad_norm": 0.4600779116153717, + "learning_rate": 0.0015, + "loss": 2.0041, + "step": 5863 + }, + { + "epoch": 0.6185654008438819, + "grad_norm": 0.5530429482460022, + "learning_rate": 0.0015, + "loss": 1.9921, + "step": 5864 + }, + { + "epoch": 0.6186708860759493, + "grad_norm": 0.4134112000465393, + "learning_rate": 0.0015, + "loss": 2.0118, + "step": 5865 + }, + { + "epoch": 0.6187763713080169, + "grad_norm": 0.5506809949874878, + "learning_rate": 0.0015, + "loss": 2.0291, + "step": 5866 + }, + { + "epoch": 0.6188818565400844, + "grad_norm": 0.47463059425354004, + "learning_rate": 0.0015, + "loss": 1.9698, + "step": 5867 + }, + { + "epoch": 0.6189873417721519, + "grad_norm": 0.4568637013435364, + "learning_rate": 0.0015, + "loss": 2.0068, + "step": 5868 + }, + { + "epoch": 0.6190928270042194, + "grad_norm": 0.4549083411693573, + "learning_rate": 0.0015, + "loss": 2.0267, + "step": 5869 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.48197847604751587, + "learning_rate": 0.0015, + "loss": 2.0161, + "step": 5870 + }, + { + "epoch": 0.6193037974683544, + "grad_norm": 0.47238314151763916, + "learning_rate": 0.0015, + "loss": 2.0633, + "step": 5871 + }, + { + "epoch": 0.619409282700422, + "grad_norm": 0.5266290903091431, + "learning_rate": 0.0015, + "loss": 2.0354, + "step": 5872 + }, + { + "epoch": 0.6195147679324895, + "grad_norm": 0.45928138494491577, + "learning_rate": 0.0015, + "loss": 2.0057, + "step": 5873 + }, + { + "epoch": 0.6196202531645569, + "grad_norm": 0.5012140870094299, + "learning_rate": 0.0015, + "loss": 2.02, + "step": 5874 + }, + { + "epoch": 0.6197257383966245, + "grad_norm": 0.42334774136543274, + "learning_rate": 0.0015, + "loss": 2.0576, + "step": 5875 + }, + { + "epoch": 0.619831223628692, + "grad_norm": 0.49743208289146423, + "learning_rate": 0.0015, + "loss": 2.0305, + "step": 5876 + }, + { + "epoch": 0.6199367088607595, + "grad_norm": 0.49385595321655273, + "learning_rate": 0.0015, + "loss": 2.0364, + "step": 5877 + }, + { + "epoch": 0.620042194092827, + "grad_norm": 0.39411234855651855, + "learning_rate": 0.0015, + "loss": 2.0406, + "step": 5878 + }, + { + "epoch": 0.6201476793248946, + "grad_norm": 0.48487168550491333, + "learning_rate": 0.0015, + "loss": 2.0228, + "step": 5879 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.478971928358078, + "learning_rate": 0.0015, + "loss": 2.0292, + "step": 5880 + }, + { + "epoch": 0.6203586497890295, + "grad_norm": 0.5064371228218079, + "learning_rate": 0.0015, + "loss": 2.0193, + "step": 5881 + }, + { + "epoch": 0.6204641350210971, + "grad_norm": 0.428057998418808, + "learning_rate": 0.0015, + "loss": 2.0372, + "step": 5882 + }, + { + "epoch": 0.6205696202531645, + "grad_norm": 0.6158875823020935, + "learning_rate": 0.0015, + "loss": 2.0141, + "step": 5883 + }, + { + "epoch": 0.6206751054852321, + "grad_norm": 0.5336908102035522, + "learning_rate": 0.0015, + "loss": 2.0005, + "step": 5884 + }, + { + "epoch": 0.6207805907172996, + "grad_norm": 0.44721174240112305, + "learning_rate": 0.0015, + "loss": 2.0154, + "step": 5885 + }, + { + "epoch": 0.6208860759493671, + "grad_norm": 0.4401302933692932, + "learning_rate": 0.0015, + "loss": 2.0608, + "step": 5886 + }, + { + "epoch": 0.6209915611814346, + "grad_norm": 0.38423025608062744, + "learning_rate": 0.0015, + "loss": 2.0465, + "step": 5887 + }, + { + "epoch": 0.6210970464135022, + "grad_norm": 0.3671785593032837, + "learning_rate": 0.0015, + "loss": 1.9973, + "step": 5888 + }, + { + "epoch": 0.6212025316455696, + "grad_norm": 0.43578246235847473, + "learning_rate": 0.0015, + "loss": 2.0204, + "step": 5889 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.514335036277771, + "learning_rate": 0.0015, + "loss": 2.0534, + "step": 5890 + }, + { + "epoch": 0.6214135021097047, + "grad_norm": 0.3555508255958557, + "learning_rate": 0.0015, + "loss": 1.9965, + "step": 5891 + }, + { + "epoch": 0.6215189873417721, + "grad_norm": 0.44933146238327026, + "learning_rate": 0.0015, + "loss": 2.038, + "step": 5892 + }, + { + "epoch": 0.6216244725738397, + "grad_norm": 0.45027756690979004, + "learning_rate": 0.0015, + "loss": 1.9988, + "step": 5893 + }, + { + "epoch": 0.6217299578059071, + "grad_norm": 0.35374099016189575, + "learning_rate": 0.0015, + "loss": 1.9894, + "step": 5894 + }, + { + "epoch": 0.6218354430379747, + "grad_norm": 0.41885942220687866, + "learning_rate": 0.0015, + "loss": 2.0359, + "step": 5895 + }, + { + "epoch": 0.6219409282700422, + "grad_norm": 0.40587443113327026, + "learning_rate": 0.0015, + "loss": 2.0271, + "step": 5896 + }, + { + "epoch": 0.6220464135021097, + "grad_norm": 0.4004913866519928, + "learning_rate": 0.0015, + "loss": 2.0392, + "step": 5897 + }, + { + "epoch": 0.6221518987341772, + "grad_norm": 0.34470900893211365, + "learning_rate": 0.0015, + "loss": 2.0363, + "step": 5898 + }, + { + "epoch": 0.6222573839662447, + "grad_norm": 0.4815920293331146, + "learning_rate": 0.0015, + "loss": 2.0172, + "step": 5899 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.42085161805152893, + "learning_rate": 0.0015, + "loss": 2.0366, + "step": 5900 + }, + { + "epoch": 0.6224683544303797, + "grad_norm": 0.5636794567108154, + "learning_rate": 0.0015, + "loss": 2.0188, + "step": 5901 + }, + { + "epoch": 0.6225738396624473, + "grad_norm": 0.40213701128959656, + "learning_rate": 0.0015, + "loss": 2.0516, + "step": 5902 + }, + { + "epoch": 0.6226793248945147, + "grad_norm": 0.49426335096359253, + "learning_rate": 0.0015, + "loss": 1.99, + "step": 5903 + }, + { + "epoch": 0.6227848101265823, + "grad_norm": 0.4158560335636139, + "learning_rate": 0.0015, + "loss": 2.047, + "step": 5904 + }, + { + "epoch": 0.6228902953586498, + "grad_norm": 0.5661061406135559, + "learning_rate": 0.0015, + "loss": 1.9902, + "step": 5905 + }, + { + "epoch": 0.6229957805907173, + "grad_norm": 0.4020337462425232, + "learning_rate": 0.0015, + "loss": 2.0188, + "step": 5906 + }, + { + "epoch": 0.6231012658227848, + "grad_norm": 0.48945096135139465, + "learning_rate": 0.0015, + "loss": 1.9891, + "step": 5907 + }, + { + "epoch": 0.6232067510548523, + "grad_norm": 0.398019015789032, + "learning_rate": 0.0015, + "loss": 2.0229, + "step": 5908 + }, + { + "epoch": 0.6233122362869198, + "grad_norm": 0.394083172082901, + "learning_rate": 0.0015, + "loss": 2.0119, + "step": 5909 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.3993809223175049, + "learning_rate": 0.0015, + "loss": 2.0351, + "step": 5910 + }, + { + "epoch": 0.6235232067510549, + "grad_norm": 0.392826110124588, + "learning_rate": 0.0015, + "loss": 1.9822, + "step": 5911 + }, + { + "epoch": 0.6236286919831223, + "grad_norm": 0.4104556441307068, + "learning_rate": 0.0015, + "loss": 2.0345, + "step": 5912 + }, + { + "epoch": 0.6237341772151899, + "grad_norm": 0.4197947382926941, + "learning_rate": 0.0015, + "loss": 2.0095, + "step": 5913 + }, + { + "epoch": 0.6238396624472574, + "grad_norm": 0.4463631212711334, + "learning_rate": 0.0015, + "loss": 2.0049, + "step": 5914 + }, + { + "epoch": 0.6239451476793249, + "grad_norm": 0.4399239718914032, + "learning_rate": 0.0015, + "loss": 2.0029, + "step": 5915 + }, + { + "epoch": 0.6240506329113924, + "grad_norm": 0.43658894300460815, + "learning_rate": 0.0015, + "loss": 1.998, + "step": 5916 + }, + { + "epoch": 0.62415611814346, + "grad_norm": 0.4701091945171356, + "learning_rate": 0.0015, + "loss": 2.0518, + "step": 5917 + }, + { + "epoch": 0.6242616033755274, + "grad_norm": 0.47502824664115906, + "learning_rate": 0.0015, + "loss": 2.0242, + "step": 5918 + }, + { + "epoch": 0.6243670886075949, + "grad_norm": 0.3615197241306305, + "learning_rate": 0.0015, + "loss": 2.0247, + "step": 5919 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.4346238076686859, + "learning_rate": 0.0015, + "loss": 2.0095, + "step": 5920 + }, + { + "epoch": 0.6245780590717299, + "grad_norm": 0.4159817099571228, + "learning_rate": 0.0015, + "loss": 2.0439, + "step": 5921 + }, + { + "epoch": 0.6246835443037975, + "grad_norm": 0.4108104705810547, + "learning_rate": 0.0015, + "loss": 2.0099, + "step": 5922 + }, + { + "epoch": 0.624789029535865, + "grad_norm": 0.47243618965148926, + "learning_rate": 0.0015, + "loss": 2.025, + "step": 5923 + }, + { + "epoch": 0.6248945147679325, + "grad_norm": 0.4056318700313568, + "learning_rate": 0.0015, + "loss": 2.0362, + "step": 5924 + }, + { + "epoch": 0.625, + "grad_norm": 0.3639354705810547, + "learning_rate": 0.0015, + "loss": 2.0189, + "step": 5925 + }, + { + "epoch": 0.6251054852320675, + "grad_norm": 0.3951359689235687, + "learning_rate": 0.0015, + "loss": 1.9765, + "step": 5926 + }, + { + "epoch": 0.625210970464135, + "grad_norm": 0.3776671290397644, + "learning_rate": 0.0015, + "loss": 2.0404, + "step": 5927 + }, + { + "epoch": 0.6253164556962025, + "grad_norm": 0.49338358640670776, + "learning_rate": 0.0015, + "loss": 2.0087, + "step": 5928 + }, + { + "epoch": 0.6254219409282701, + "grad_norm": 0.4354079067707062, + "learning_rate": 0.0015, + "loss": 2.0163, + "step": 5929 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.38955095410346985, + "learning_rate": 0.0015, + "loss": 2.0215, + "step": 5930 + }, + { + "epoch": 0.6256329113924051, + "grad_norm": 0.43913063406944275, + "learning_rate": 0.0015, + "loss": 2.0198, + "step": 5931 + }, + { + "epoch": 0.6257383966244726, + "grad_norm": 0.4378643333911896, + "learning_rate": 0.0015, + "loss": 2.0449, + "step": 5932 + }, + { + "epoch": 0.62584388185654, + "grad_norm": 0.41498062014579773, + "learning_rate": 0.0015, + "loss": 2.0572, + "step": 5933 + }, + { + "epoch": 0.6259493670886076, + "grad_norm": 0.6591501832008362, + "learning_rate": 0.0015, + "loss": 2.0418, + "step": 5934 + }, + { + "epoch": 0.6260548523206751, + "grad_norm": 0.5118796229362488, + "learning_rate": 0.0015, + "loss": 2.0332, + "step": 5935 + }, + { + "epoch": 0.6261603375527426, + "grad_norm": 0.4615508019924164, + "learning_rate": 0.0015, + "loss": 2.0227, + "step": 5936 + }, + { + "epoch": 0.6262658227848101, + "grad_norm": 0.6197395920753479, + "learning_rate": 0.0015, + "loss": 2.0492, + "step": 5937 + }, + { + "epoch": 0.6263713080168777, + "grad_norm": 0.4782489538192749, + "learning_rate": 0.0015, + "loss": 1.9804, + "step": 5938 + }, + { + "epoch": 0.6264767932489451, + "grad_norm": 0.42350322008132935, + "learning_rate": 0.0015, + "loss": 1.979, + "step": 5939 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.48295673727989197, + "learning_rate": 0.0015, + "loss": 2.0624, + "step": 5940 + }, + { + "epoch": 0.6266877637130802, + "grad_norm": 0.47139453887939453, + "learning_rate": 0.0015, + "loss": 2.0251, + "step": 5941 + }, + { + "epoch": 0.6267932489451477, + "grad_norm": 0.47714856266975403, + "learning_rate": 0.0015, + "loss": 2.0039, + "step": 5942 + }, + { + "epoch": 0.6268987341772152, + "grad_norm": 0.4843169152736664, + "learning_rate": 0.0015, + "loss": 1.9863, + "step": 5943 + }, + { + "epoch": 0.6270042194092827, + "grad_norm": 0.45902055501937866, + "learning_rate": 0.0015, + "loss": 2.0253, + "step": 5944 + }, + { + "epoch": 0.6271097046413502, + "grad_norm": 0.5356256365776062, + "learning_rate": 0.0015, + "loss": 2.0093, + "step": 5945 + }, + { + "epoch": 0.6272151898734177, + "grad_norm": 0.550555944442749, + "learning_rate": 0.0015, + "loss": 2.0417, + "step": 5946 + }, + { + "epoch": 0.6273206751054853, + "grad_norm": 0.4313272535800934, + "learning_rate": 0.0015, + "loss": 2.0016, + "step": 5947 + }, + { + "epoch": 0.6274261603375527, + "grad_norm": 0.5108353495597839, + "learning_rate": 0.0015, + "loss": 1.9977, + "step": 5948 + }, + { + "epoch": 0.6275316455696203, + "grad_norm": 0.3618285059928894, + "learning_rate": 0.0015, + "loss": 2.0225, + "step": 5949 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.43989458680152893, + "learning_rate": 0.0015, + "loss": 2.0329, + "step": 5950 + }, + { + "epoch": 0.6277426160337553, + "grad_norm": 0.41809138655662537, + "learning_rate": 0.0015, + "loss": 2.0409, + "step": 5951 + }, + { + "epoch": 0.6278481012658228, + "grad_norm": 0.3602738380432129, + "learning_rate": 0.0015, + "loss": 2.0137, + "step": 5952 + }, + { + "epoch": 0.6279535864978903, + "grad_norm": 0.4487210810184479, + "learning_rate": 0.0015, + "loss": 2.0137, + "step": 5953 + }, + { + "epoch": 0.6280590717299578, + "grad_norm": 0.4093342125415802, + "learning_rate": 0.0015, + "loss": 2.0049, + "step": 5954 + }, + { + "epoch": 0.6281645569620253, + "grad_norm": 0.4097048044204712, + "learning_rate": 0.0015, + "loss": 2.0145, + "step": 5955 + }, + { + "epoch": 0.6282700421940929, + "grad_norm": 0.3524760901927948, + "learning_rate": 0.0015, + "loss": 2.0279, + "step": 5956 + }, + { + "epoch": 0.6283755274261603, + "grad_norm": 0.41196709871292114, + "learning_rate": 0.0015, + "loss": 2.0057, + "step": 5957 + }, + { + "epoch": 0.6284810126582279, + "grad_norm": 0.4447898864746094, + "learning_rate": 0.0015, + "loss": 1.9932, + "step": 5958 + }, + { + "epoch": 0.6285864978902953, + "grad_norm": 0.4480624496936798, + "learning_rate": 0.0015, + "loss": 2.0138, + "step": 5959 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.41188302636146545, + "learning_rate": 0.0015, + "loss": 2.0328, + "step": 5960 + }, + { + "epoch": 0.6287974683544304, + "grad_norm": 0.39568209648132324, + "learning_rate": 0.0015, + "loss": 2.0232, + "step": 5961 + }, + { + "epoch": 0.6289029535864978, + "grad_norm": 0.4427250623703003, + "learning_rate": 0.0015, + "loss": 2.0147, + "step": 5962 + }, + { + "epoch": 0.6290084388185654, + "grad_norm": 0.3938210904598236, + "learning_rate": 0.0015, + "loss": 2.0078, + "step": 5963 + }, + { + "epoch": 0.6291139240506329, + "grad_norm": 0.43604162335395813, + "learning_rate": 0.0015, + "loss": 2.0245, + "step": 5964 + }, + { + "epoch": 0.6292194092827004, + "grad_norm": 0.44147709012031555, + "learning_rate": 0.0015, + "loss": 2.0445, + "step": 5965 + }, + { + "epoch": 0.6293248945147679, + "grad_norm": 0.37140926718711853, + "learning_rate": 0.0015, + "loss": 2.0011, + "step": 5966 + }, + { + "epoch": 0.6294303797468355, + "grad_norm": 0.5560932755470276, + "learning_rate": 0.0015, + "loss": 2.0227, + "step": 5967 + }, + { + "epoch": 0.6295358649789029, + "grad_norm": 0.4814307987689972, + "learning_rate": 0.0015, + "loss": 1.9788, + "step": 5968 + }, + { + "epoch": 0.6296413502109705, + "grad_norm": 0.4624018371105194, + "learning_rate": 0.0015, + "loss": 1.9768, + "step": 5969 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.5738750696182251, + "learning_rate": 0.0015, + "loss": 2.0395, + "step": 5970 + }, + { + "epoch": 0.6298523206751054, + "grad_norm": 0.36142316460609436, + "learning_rate": 0.0015, + "loss": 2.026, + "step": 5971 + }, + { + "epoch": 0.629957805907173, + "grad_norm": 0.603236973285675, + "learning_rate": 0.0015, + "loss": 2.0017, + "step": 5972 + }, + { + "epoch": 0.6300632911392405, + "grad_norm": 0.5192906856536865, + "learning_rate": 0.0015, + "loss": 2.0245, + "step": 5973 + }, + { + "epoch": 0.630168776371308, + "grad_norm": 0.4117656946182251, + "learning_rate": 0.0015, + "loss": 2.0232, + "step": 5974 + }, + { + "epoch": 0.6302742616033755, + "grad_norm": 0.4755204916000366, + "learning_rate": 0.0015, + "loss": 1.994, + "step": 5975 + }, + { + "epoch": 0.6303797468354431, + "grad_norm": 0.40970611572265625, + "learning_rate": 0.0015, + "loss": 2.0023, + "step": 5976 + }, + { + "epoch": 0.6304852320675105, + "grad_norm": 0.38528940081596375, + "learning_rate": 0.0015, + "loss": 1.9876, + "step": 5977 + }, + { + "epoch": 0.630590717299578, + "grad_norm": 0.39671453833580017, + "learning_rate": 0.0015, + "loss": 2.0152, + "step": 5978 + }, + { + "epoch": 0.6306962025316456, + "grad_norm": 0.4331350326538086, + "learning_rate": 0.0015, + "loss": 1.9691, + "step": 5979 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.4372469186782837, + "learning_rate": 0.0015, + "loss": 1.9989, + "step": 5980 + }, + { + "epoch": 0.6309071729957806, + "grad_norm": 0.47980985045433044, + "learning_rate": 0.0015, + "loss": 2.0367, + "step": 5981 + }, + { + "epoch": 0.6310126582278481, + "grad_norm": 0.43620285391807556, + "learning_rate": 0.0015, + "loss": 2.0089, + "step": 5982 + }, + { + "epoch": 0.6311181434599156, + "grad_norm": 0.4597388505935669, + "learning_rate": 0.0015, + "loss": 1.9742, + "step": 5983 + }, + { + "epoch": 0.6312236286919831, + "grad_norm": 0.4750670790672302, + "learning_rate": 0.0015, + "loss": 2.0269, + "step": 5984 + }, + { + "epoch": 0.6313291139240507, + "grad_norm": 0.43615275621414185, + "learning_rate": 0.0015, + "loss": 2.0224, + "step": 5985 + }, + { + "epoch": 0.6314345991561181, + "grad_norm": 0.40970584750175476, + "learning_rate": 0.0015, + "loss": 2.0163, + "step": 5986 + }, + { + "epoch": 0.6315400843881857, + "grad_norm": 0.41204121708869934, + "learning_rate": 0.0015, + "loss": 2.0045, + "step": 5987 + }, + { + "epoch": 0.6316455696202532, + "grad_norm": 0.417417973279953, + "learning_rate": 0.0015, + "loss": 2.0408, + "step": 5988 + }, + { + "epoch": 0.6317510548523206, + "grad_norm": 0.41146427392959595, + "learning_rate": 0.0015, + "loss": 2.0249, + "step": 5989 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.4423775374889374, + "learning_rate": 0.0015, + "loss": 2.0125, + "step": 5990 + }, + { + "epoch": 0.6319620253164557, + "grad_norm": 0.45304012298583984, + "learning_rate": 0.0015, + "loss": 2.0296, + "step": 5991 + }, + { + "epoch": 0.6320675105485232, + "grad_norm": 0.4659304618835449, + "learning_rate": 0.0015, + "loss": 2.003, + "step": 5992 + }, + { + "epoch": 0.6321729957805907, + "grad_norm": 0.3473758101463318, + "learning_rate": 0.0015, + "loss": 2.0576, + "step": 5993 + }, + { + "epoch": 0.6322784810126583, + "grad_norm": 0.4944387972354889, + "learning_rate": 0.0015, + "loss": 2.0209, + "step": 5994 + }, + { + "epoch": 0.6323839662447257, + "grad_norm": 0.5395835638046265, + "learning_rate": 0.0015, + "loss": 2.0109, + "step": 5995 + }, + { + "epoch": 0.6324894514767933, + "grad_norm": 0.41002994775772095, + "learning_rate": 0.0015, + "loss": 2.0432, + "step": 5996 + }, + { + "epoch": 0.6325949367088608, + "grad_norm": 0.5051733255386353, + "learning_rate": 0.0015, + "loss": 2.0336, + "step": 5997 + }, + { + "epoch": 0.6327004219409282, + "grad_norm": 0.5952811241149902, + "learning_rate": 0.0015, + "loss": 2.0104, + "step": 5998 + }, + { + "epoch": 0.6328059071729958, + "grad_norm": 0.5297322273254395, + "learning_rate": 0.0015, + "loss": 2.0287, + "step": 5999 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.3999346196651459, + "learning_rate": 0.0015, + "loss": 2.0164, + "step": 6000 + }, + { + "epoch": 0.6330168776371308, + "grad_norm": 0.4709080159664154, + "learning_rate": 0.0015, + "loss": 1.9858, + "step": 6001 + }, + { + "epoch": 0.6331223628691983, + "grad_norm": 0.3564704358577728, + "learning_rate": 0.0015, + "loss": 1.9822, + "step": 6002 + }, + { + "epoch": 0.6332278481012659, + "grad_norm": 0.5051965117454529, + "learning_rate": 0.0015, + "loss": 2.0479, + "step": 6003 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 0.4070793390274048, + "learning_rate": 0.0015, + "loss": 2.0502, + "step": 6004 + }, + { + "epoch": 0.6334388185654009, + "grad_norm": 0.4348352253437042, + "learning_rate": 0.0015, + "loss": 2.0319, + "step": 6005 + }, + { + "epoch": 0.6335443037974684, + "grad_norm": 0.47261685132980347, + "learning_rate": 0.0015, + "loss": 2.0113, + "step": 6006 + }, + { + "epoch": 0.6336497890295358, + "grad_norm": 0.4829621911048889, + "learning_rate": 0.0015, + "loss": 2.0263, + "step": 6007 + }, + { + "epoch": 0.6337552742616034, + "grad_norm": 0.41176608204841614, + "learning_rate": 0.0015, + "loss": 2.0464, + "step": 6008 + }, + { + "epoch": 0.6338607594936709, + "grad_norm": 0.4416154623031616, + "learning_rate": 0.0015, + "loss": 2.0364, + "step": 6009 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.47480931878089905, + "learning_rate": 0.0015, + "loss": 1.9956, + "step": 6010 + }, + { + "epoch": 0.6340717299578059, + "grad_norm": 0.48238709568977356, + "learning_rate": 0.0015, + "loss": 1.9902, + "step": 6011 + }, + { + "epoch": 0.6341772151898735, + "grad_norm": 0.6565226912498474, + "learning_rate": 0.0015, + "loss": 2.0383, + "step": 6012 + }, + { + "epoch": 0.6342827004219409, + "grad_norm": 0.5074025392532349, + "learning_rate": 0.0015, + "loss": 2.042, + "step": 6013 + }, + { + "epoch": 0.6343881856540085, + "grad_norm": 0.5033599734306335, + "learning_rate": 0.0015, + "loss": 2.0517, + "step": 6014 + }, + { + "epoch": 0.634493670886076, + "grad_norm": 0.4767807126045227, + "learning_rate": 0.0015, + "loss": 2.0197, + "step": 6015 + }, + { + "epoch": 0.6345991561181434, + "grad_norm": 0.4260616898536682, + "learning_rate": 0.0015, + "loss": 2.0051, + "step": 6016 + }, + { + "epoch": 0.634704641350211, + "grad_norm": 0.446330189704895, + "learning_rate": 0.0015, + "loss": 2.041, + "step": 6017 + }, + { + "epoch": 0.6348101265822785, + "grad_norm": 0.4597092568874359, + "learning_rate": 0.0015, + "loss": 2.0026, + "step": 6018 + }, + { + "epoch": 0.634915611814346, + "grad_norm": 0.47185298800468445, + "learning_rate": 0.0015, + "loss": 2.0086, + "step": 6019 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.47426339983940125, + "learning_rate": 0.0015, + "loss": 2.0301, + "step": 6020 + }, + { + "epoch": 0.6351265822784811, + "grad_norm": 0.47350361943244934, + "learning_rate": 0.0015, + "loss": 1.9974, + "step": 6021 + }, + { + "epoch": 0.6352320675105485, + "grad_norm": 0.40135180950164795, + "learning_rate": 0.0015, + "loss": 1.9866, + "step": 6022 + }, + { + "epoch": 0.635337552742616, + "grad_norm": 0.4399847984313965, + "learning_rate": 0.0015, + "loss": 2.0272, + "step": 6023 + }, + { + "epoch": 0.6354430379746835, + "grad_norm": 0.37440961599349976, + "learning_rate": 0.0015, + "loss": 2.0127, + "step": 6024 + }, + { + "epoch": 0.635548523206751, + "grad_norm": 0.42676010727882385, + "learning_rate": 0.0015, + "loss": 2.0055, + "step": 6025 + }, + { + "epoch": 0.6356540084388186, + "grad_norm": 0.3734491467475891, + "learning_rate": 0.0015, + "loss": 2.0054, + "step": 6026 + }, + { + "epoch": 0.635759493670886, + "grad_norm": 0.4314201772212982, + "learning_rate": 0.0015, + "loss": 2.0042, + "step": 6027 + }, + { + "epoch": 0.6358649789029536, + "grad_norm": 0.38206344842910767, + "learning_rate": 0.0015, + "loss": 2.0197, + "step": 6028 + }, + { + "epoch": 0.6359704641350211, + "grad_norm": 0.4160510003566742, + "learning_rate": 0.0015, + "loss": 2.0024, + "step": 6029 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.41863808035850525, + "learning_rate": 0.0015, + "loss": 1.992, + "step": 6030 + }, + { + "epoch": 0.6361814345991561, + "grad_norm": 0.4465966820716858, + "learning_rate": 0.0015, + "loss": 2.0415, + "step": 6031 + }, + { + "epoch": 0.6362869198312237, + "grad_norm": 0.48269546031951904, + "learning_rate": 0.0015, + "loss": 2.0043, + "step": 6032 + }, + { + "epoch": 0.6363924050632911, + "grad_norm": 0.34539714455604553, + "learning_rate": 0.0015, + "loss": 1.9963, + "step": 6033 + }, + { + "epoch": 0.6364978902953586, + "grad_norm": 0.49054357409477234, + "learning_rate": 0.0015, + "loss": 2.0314, + "step": 6034 + }, + { + "epoch": 0.6366033755274262, + "grad_norm": 0.46951043605804443, + "learning_rate": 0.0015, + "loss": 2.025, + "step": 6035 + }, + { + "epoch": 0.6367088607594936, + "grad_norm": 0.4718550741672516, + "learning_rate": 0.0015, + "loss": 2.0146, + "step": 6036 + }, + { + "epoch": 0.6368143459915612, + "grad_norm": 0.38409286737442017, + "learning_rate": 0.0015, + "loss": 2.0273, + "step": 6037 + }, + { + "epoch": 0.6369198312236287, + "grad_norm": 0.49234163761138916, + "learning_rate": 0.0015, + "loss": 2.0066, + "step": 6038 + }, + { + "epoch": 0.6370253164556962, + "grad_norm": 0.34906163811683655, + "learning_rate": 0.0015, + "loss": 1.9975, + "step": 6039 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.43876883387565613, + "learning_rate": 0.0015, + "loss": 2.0285, + "step": 6040 + }, + { + "epoch": 0.6372362869198313, + "grad_norm": 0.3747348189353943, + "learning_rate": 0.0015, + "loss": 1.9875, + "step": 6041 + }, + { + "epoch": 0.6373417721518987, + "grad_norm": 0.4504721164703369, + "learning_rate": 0.0015, + "loss": 2.0139, + "step": 6042 + }, + { + "epoch": 0.6374472573839662, + "grad_norm": 0.37424319982528687, + "learning_rate": 0.0015, + "loss": 1.9992, + "step": 6043 + }, + { + "epoch": 0.6375527426160338, + "grad_norm": 0.412316232919693, + "learning_rate": 0.0015, + "loss": 1.9802, + "step": 6044 + }, + { + "epoch": 0.6376582278481012, + "grad_norm": 0.38787293434143066, + "learning_rate": 0.0015, + "loss": 1.9995, + "step": 6045 + }, + { + "epoch": 0.6377637130801688, + "grad_norm": 0.41402268409729004, + "learning_rate": 0.0015, + "loss": 2.0059, + "step": 6046 + }, + { + "epoch": 0.6378691983122363, + "grad_norm": 0.3696643114089966, + "learning_rate": 0.0015, + "loss": 2.0178, + "step": 6047 + }, + { + "epoch": 0.6379746835443038, + "grad_norm": 0.48962855339050293, + "learning_rate": 0.0015, + "loss": 2.0369, + "step": 6048 + }, + { + "epoch": 0.6380801687763713, + "grad_norm": 0.42264655232429504, + "learning_rate": 0.0015, + "loss": 1.9997, + "step": 6049 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.42534396052360535, + "learning_rate": 0.0015, + "loss": 2.0098, + "step": 6050 + }, + { + "epoch": 0.6382911392405063, + "grad_norm": 0.38075557351112366, + "learning_rate": 0.0015, + "loss": 2.0141, + "step": 6051 + }, + { + "epoch": 0.6383966244725738, + "grad_norm": 0.36669406294822693, + "learning_rate": 0.0015, + "loss": 2.0474, + "step": 6052 + }, + { + "epoch": 0.6385021097046414, + "grad_norm": 0.4161529242992401, + "learning_rate": 0.0015, + "loss": 1.9952, + "step": 6053 + }, + { + "epoch": 0.6386075949367088, + "grad_norm": 0.3631613850593567, + "learning_rate": 0.0015, + "loss": 2.0218, + "step": 6054 + }, + { + "epoch": 0.6387130801687764, + "grad_norm": 0.4018809497356415, + "learning_rate": 0.0015, + "loss": 1.9853, + "step": 6055 + }, + { + "epoch": 0.6388185654008439, + "grad_norm": 0.3639559745788574, + "learning_rate": 0.0015, + "loss": 1.9893, + "step": 6056 + }, + { + "epoch": 0.6389240506329114, + "grad_norm": 0.3890235722064972, + "learning_rate": 0.0015, + "loss": 2.0304, + "step": 6057 + }, + { + "epoch": 0.6390295358649789, + "grad_norm": 0.3789025545120239, + "learning_rate": 0.0015, + "loss": 2.0198, + "step": 6058 + }, + { + "epoch": 0.6391350210970465, + "grad_norm": 0.3728809058666229, + "learning_rate": 0.0015, + "loss": 2.0343, + "step": 6059 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.41067227721214294, + "learning_rate": 0.0015, + "loss": 2.033, + "step": 6060 + }, + { + "epoch": 0.6393459915611814, + "grad_norm": 0.3758329153060913, + "learning_rate": 0.0015, + "loss": 1.9779, + "step": 6061 + }, + { + "epoch": 0.639451476793249, + "grad_norm": 0.388310968875885, + "learning_rate": 0.0015, + "loss": 1.993, + "step": 6062 + }, + { + "epoch": 0.6395569620253164, + "grad_norm": 0.36558735370635986, + "learning_rate": 0.0015, + "loss": 2.0081, + "step": 6063 + }, + { + "epoch": 0.639662447257384, + "grad_norm": 0.3952011466026306, + "learning_rate": 0.0015, + "loss": 2.0478, + "step": 6064 + }, + { + "epoch": 0.6397679324894515, + "grad_norm": 0.39058929681777954, + "learning_rate": 0.0015, + "loss": 2.0116, + "step": 6065 + }, + { + "epoch": 0.639873417721519, + "grad_norm": 0.40953361988067627, + "learning_rate": 0.0015, + "loss": 1.9739, + "step": 6066 + }, + { + "epoch": 0.6399789029535865, + "grad_norm": 0.4046730697154999, + "learning_rate": 0.0015, + "loss": 2.0116, + "step": 6067 + }, + { + "epoch": 0.640084388185654, + "grad_norm": 0.395750492811203, + "learning_rate": 0.0015, + "loss": 1.9931, + "step": 6068 + }, + { + "epoch": 0.6401898734177215, + "grad_norm": 0.41864341497421265, + "learning_rate": 0.0015, + "loss": 2.0431, + "step": 6069 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.45863932371139526, + "learning_rate": 0.0015, + "loss": 2.0342, + "step": 6070 + }, + { + "epoch": 0.6404008438818566, + "grad_norm": 0.44946324825286865, + "learning_rate": 0.0015, + "loss": 1.9945, + "step": 6071 + }, + { + "epoch": 0.640506329113924, + "grad_norm": 0.4201285243034363, + "learning_rate": 0.0015, + "loss": 2.0207, + "step": 6072 + }, + { + "epoch": 0.6406118143459916, + "grad_norm": 0.39666807651519775, + "learning_rate": 0.0015, + "loss": 2.0299, + "step": 6073 + }, + { + "epoch": 0.6407172995780591, + "grad_norm": 0.4454169273376465, + "learning_rate": 0.0015, + "loss": 2.0418, + "step": 6074 + }, + { + "epoch": 0.6408227848101266, + "grad_norm": 0.35221415758132935, + "learning_rate": 0.0015, + "loss": 1.9821, + "step": 6075 + }, + { + "epoch": 0.6409282700421941, + "grad_norm": 0.39620232582092285, + "learning_rate": 0.0015, + "loss": 1.9814, + "step": 6076 + }, + { + "epoch": 0.6410337552742617, + "grad_norm": 0.4880402684211731, + "learning_rate": 0.0015, + "loss": 2.0015, + "step": 6077 + }, + { + "epoch": 0.6411392405063291, + "grad_norm": 0.3760055601596832, + "learning_rate": 0.0015, + "loss": 2.0134, + "step": 6078 + }, + { + "epoch": 0.6412447257383966, + "grad_norm": 0.4180763065814972, + "learning_rate": 0.0015, + "loss": 2.0275, + "step": 6079 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.4296525716781616, + "learning_rate": 0.0015, + "loss": 1.9864, + "step": 6080 + }, + { + "epoch": 0.6414556962025316, + "grad_norm": 0.39761248230934143, + "learning_rate": 0.0015, + "loss": 2.0105, + "step": 6081 + }, + { + "epoch": 0.6415611814345992, + "grad_norm": 0.4000162184238434, + "learning_rate": 0.0015, + "loss": 1.9833, + "step": 6082 + }, + { + "epoch": 0.6416666666666667, + "grad_norm": 0.41644829511642456, + "learning_rate": 0.0015, + "loss": 2.0362, + "step": 6083 + }, + { + "epoch": 0.6417721518987342, + "grad_norm": 0.4071304202079773, + "learning_rate": 0.0015, + "loss": 2.0155, + "step": 6084 + }, + { + "epoch": 0.6418776371308017, + "grad_norm": 0.4528487026691437, + "learning_rate": 0.0015, + "loss": 2.0022, + "step": 6085 + }, + { + "epoch": 0.6419831223628693, + "grad_norm": 0.4663274884223938, + "learning_rate": 0.0015, + "loss": 2.0414, + "step": 6086 + }, + { + "epoch": 0.6420886075949367, + "grad_norm": 0.4373714327812195, + "learning_rate": 0.0015, + "loss": 2.0174, + "step": 6087 + }, + { + "epoch": 0.6421940928270042, + "grad_norm": 0.4712250232696533, + "learning_rate": 0.0015, + "loss": 1.9875, + "step": 6088 + }, + { + "epoch": 0.6422995780590718, + "grad_norm": 0.427062451839447, + "learning_rate": 0.0015, + "loss": 1.991, + "step": 6089 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.45923277735710144, + "learning_rate": 0.0015, + "loss": 2.0254, + "step": 6090 + }, + { + "epoch": 0.6425105485232068, + "grad_norm": 0.4937707483768463, + "learning_rate": 0.0015, + "loss": 2.0148, + "step": 6091 + }, + { + "epoch": 0.6426160337552742, + "grad_norm": 0.5107033252716064, + "learning_rate": 0.0015, + "loss": 2.0153, + "step": 6092 + }, + { + "epoch": 0.6427215189873418, + "grad_norm": 0.45174720883369446, + "learning_rate": 0.0015, + "loss": 2.0159, + "step": 6093 + }, + { + "epoch": 0.6428270042194093, + "grad_norm": 0.4155990779399872, + "learning_rate": 0.0015, + "loss": 2.002, + "step": 6094 + }, + { + "epoch": 0.6429324894514767, + "grad_norm": 0.5074058771133423, + "learning_rate": 0.0015, + "loss": 1.9792, + "step": 6095 + }, + { + "epoch": 0.6430379746835443, + "grad_norm": 0.39387616515159607, + "learning_rate": 0.0015, + "loss": 1.9957, + "step": 6096 + }, + { + "epoch": 0.6431434599156118, + "grad_norm": 0.4583861529827118, + "learning_rate": 0.0015, + "loss": 2.0027, + "step": 6097 + }, + { + "epoch": 0.6432489451476793, + "grad_norm": 0.5504308342933655, + "learning_rate": 0.0015, + "loss": 2.0272, + "step": 6098 + }, + { + "epoch": 0.6433544303797468, + "grad_norm": 0.3785899877548218, + "learning_rate": 0.0015, + "loss": 1.9912, + "step": 6099 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.5693714618682861, + "learning_rate": 0.0015, + "loss": 2.0055, + "step": 6100 + }, + { + "epoch": 0.6435654008438818, + "grad_norm": 0.44625818729400635, + "learning_rate": 0.0015, + "loss": 2.0434, + "step": 6101 + }, + { + "epoch": 0.6436708860759494, + "grad_norm": 0.504557192325592, + "learning_rate": 0.0015, + "loss": 1.9681, + "step": 6102 + }, + { + "epoch": 0.6437763713080169, + "grad_norm": 0.4946519136428833, + "learning_rate": 0.0015, + "loss": 1.9932, + "step": 6103 + }, + { + "epoch": 0.6438818565400843, + "grad_norm": 0.42842721939086914, + "learning_rate": 0.0015, + "loss": 2.0152, + "step": 6104 + }, + { + "epoch": 0.6439873417721519, + "grad_norm": 0.5675675272941589, + "learning_rate": 0.0015, + "loss": 2.0436, + "step": 6105 + }, + { + "epoch": 0.6440928270042194, + "grad_norm": 0.3644684851169586, + "learning_rate": 0.0015, + "loss": 2.0177, + "step": 6106 + }, + { + "epoch": 0.6441983122362869, + "grad_norm": 0.6040049195289612, + "learning_rate": 0.0015, + "loss": 2.0328, + "step": 6107 + }, + { + "epoch": 0.6443037974683544, + "grad_norm": 0.4285546839237213, + "learning_rate": 0.0015, + "loss": 2.0054, + "step": 6108 + }, + { + "epoch": 0.644409282700422, + "grad_norm": 0.49574002623558044, + "learning_rate": 0.0015, + "loss": 2.0418, + "step": 6109 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.49598202109336853, + "learning_rate": 0.0015, + "loss": 1.9987, + "step": 6110 + }, + { + "epoch": 0.644620253164557, + "grad_norm": 0.426870733499527, + "learning_rate": 0.0015, + "loss": 2.0022, + "step": 6111 + }, + { + "epoch": 0.6447257383966245, + "grad_norm": 0.45522063970565796, + "learning_rate": 0.0015, + "loss": 1.9918, + "step": 6112 + }, + { + "epoch": 0.6448312236286919, + "grad_norm": 0.4176657497882843, + "learning_rate": 0.0015, + "loss": 2.0381, + "step": 6113 + }, + { + "epoch": 0.6449367088607595, + "grad_norm": 0.412543922662735, + "learning_rate": 0.0015, + "loss": 1.9972, + "step": 6114 + }, + { + "epoch": 0.645042194092827, + "grad_norm": 0.3741489350795746, + "learning_rate": 0.0015, + "loss": 2.0033, + "step": 6115 + }, + { + "epoch": 0.6451476793248945, + "grad_norm": 0.41432681679725647, + "learning_rate": 0.0015, + "loss": 2.0406, + "step": 6116 + }, + { + "epoch": 0.645253164556962, + "grad_norm": 0.4353832006454468, + "learning_rate": 0.0015, + "loss": 2.0048, + "step": 6117 + }, + { + "epoch": 0.6453586497890296, + "grad_norm": 0.41304129362106323, + "learning_rate": 0.0015, + "loss": 2.0265, + "step": 6118 + }, + { + "epoch": 0.645464135021097, + "grad_norm": 0.40259850025177, + "learning_rate": 0.0015, + "loss": 1.995, + "step": 6119 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.465008407831192, + "learning_rate": 0.0015, + "loss": 2.0067, + "step": 6120 + }, + { + "epoch": 0.6456751054852321, + "grad_norm": 0.414339154958725, + "learning_rate": 0.0015, + "loss": 2.0002, + "step": 6121 + }, + { + "epoch": 0.6457805907172995, + "grad_norm": 0.486176073551178, + "learning_rate": 0.0015, + "loss": 2.027, + "step": 6122 + }, + { + "epoch": 0.6458860759493671, + "grad_norm": 0.4036640524864197, + "learning_rate": 0.0015, + "loss": 2.0073, + "step": 6123 + }, + { + "epoch": 0.6459915611814346, + "grad_norm": 0.4871128797531128, + "learning_rate": 0.0015, + "loss": 2.0035, + "step": 6124 + }, + { + "epoch": 0.6460970464135021, + "grad_norm": 0.4146539270877838, + "learning_rate": 0.0015, + "loss": 2.0143, + "step": 6125 + }, + { + "epoch": 0.6462025316455696, + "grad_norm": 0.5447449684143066, + "learning_rate": 0.0015, + "loss": 2.0185, + "step": 6126 + }, + { + "epoch": 0.6463080168776372, + "grad_norm": 0.5662007331848145, + "learning_rate": 0.0015, + "loss": 1.9994, + "step": 6127 + }, + { + "epoch": 0.6464135021097046, + "grad_norm": 0.4052295386791229, + "learning_rate": 0.0015, + "loss": 1.9784, + "step": 6128 + }, + { + "epoch": 0.6465189873417722, + "grad_norm": 0.47362953424453735, + "learning_rate": 0.0015, + "loss": 2.0009, + "step": 6129 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.4537448287010193, + "learning_rate": 0.0015, + "loss": 2.0342, + "step": 6130 + }, + { + "epoch": 0.6467299578059071, + "grad_norm": 0.42448070645332336, + "learning_rate": 0.0015, + "loss": 2.0489, + "step": 6131 + }, + { + "epoch": 0.6468354430379747, + "grad_norm": 0.4741910994052887, + "learning_rate": 0.0015, + "loss": 2.0463, + "step": 6132 + }, + { + "epoch": 0.6469409282700422, + "grad_norm": 0.408561110496521, + "learning_rate": 0.0015, + "loss": 1.9926, + "step": 6133 + }, + { + "epoch": 0.6470464135021097, + "grad_norm": 0.5560970902442932, + "learning_rate": 0.0015, + "loss": 2.0443, + "step": 6134 + }, + { + "epoch": 0.6471518987341772, + "grad_norm": 0.4507302939891815, + "learning_rate": 0.0015, + "loss": 1.9795, + "step": 6135 + }, + { + "epoch": 0.6472573839662448, + "grad_norm": 0.4441967308521271, + "learning_rate": 0.0015, + "loss": 2.0338, + "step": 6136 + }, + { + "epoch": 0.6473628691983122, + "grad_norm": 0.4645198881626129, + "learning_rate": 0.0015, + "loss": 2.0087, + "step": 6137 + }, + { + "epoch": 0.6474683544303798, + "grad_norm": 0.524660587310791, + "learning_rate": 0.0015, + "loss": 2.0019, + "step": 6138 + }, + { + "epoch": 0.6475738396624473, + "grad_norm": 0.42485085129737854, + "learning_rate": 0.0015, + "loss": 2.0112, + "step": 6139 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.5017753839492798, + "learning_rate": 0.0015, + "loss": 1.955, + "step": 6140 + }, + { + "epoch": 0.6477848101265823, + "grad_norm": 0.4965680241584778, + "learning_rate": 0.0015, + "loss": 2.0132, + "step": 6141 + }, + { + "epoch": 0.6478902953586498, + "grad_norm": 0.38419726490974426, + "learning_rate": 0.0015, + "loss": 1.9947, + "step": 6142 + }, + { + "epoch": 0.6479957805907173, + "grad_norm": 0.612550675868988, + "learning_rate": 0.0015, + "loss": 1.9893, + "step": 6143 + }, + { + "epoch": 0.6481012658227848, + "grad_norm": 0.5364147424697876, + "learning_rate": 0.0015, + "loss": 1.9998, + "step": 6144 + }, + { + "epoch": 0.6482067510548524, + "grad_norm": 0.45609694719314575, + "learning_rate": 0.0015, + "loss": 2.0356, + "step": 6145 + }, + { + "epoch": 0.6483122362869198, + "grad_norm": 0.40231776237487793, + "learning_rate": 0.0015, + "loss": 2.0064, + "step": 6146 + }, + { + "epoch": 0.6484177215189874, + "grad_norm": 0.39764973521232605, + "learning_rate": 0.0015, + "loss": 2.0265, + "step": 6147 + }, + { + "epoch": 0.6485232067510549, + "grad_norm": 0.3883860409259796, + "learning_rate": 0.0015, + "loss": 1.9848, + "step": 6148 + }, + { + "epoch": 0.6486286919831223, + "grad_norm": 0.36958402395248413, + "learning_rate": 0.0015, + "loss": 2.0273, + "step": 6149 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.4773707985877991, + "learning_rate": 0.0015, + "loss": 2.0496, + "step": 6150 + }, + { + "epoch": 0.6488396624472574, + "grad_norm": 0.3897709846496582, + "learning_rate": 0.0015, + "loss": 2.0091, + "step": 6151 + }, + { + "epoch": 0.6489451476793249, + "grad_norm": 0.4106290638446808, + "learning_rate": 0.0015, + "loss": 2.0347, + "step": 6152 + }, + { + "epoch": 0.6490506329113924, + "grad_norm": 0.4400649070739746, + "learning_rate": 0.0015, + "loss": 2.0014, + "step": 6153 + }, + { + "epoch": 0.64915611814346, + "grad_norm": 0.3589843809604645, + "learning_rate": 0.0015, + "loss": 1.988, + "step": 6154 + }, + { + "epoch": 0.6492616033755274, + "grad_norm": 0.4842980206012726, + "learning_rate": 0.0015, + "loss": 2.0082, + "step": 6155 + }, + { + "epoch": 0.649367088607595, + "grad_norm": 0.4376246929168701, + "learning_rate": 0.0015, + "loss": 2.0057, + "step": 6156 + }, + { + "epoch": 0.6494725738396624, + "grad_norm": 0.39331701397895813, + "learning_rate": 0.0015, + "loss": 2.0074, + "step": 6157 + }, + { + "epoch": 0.6495780590717299, + "grad_norm": 0.44102054834365845, + "learning_rate": 0.0015, + "loss": 2.042, + "step": 6158 + }, + { + "epoch": 0.6496835443037975, + "grad_norm": 0.39156854152679443, + "learning_rate": 0.0015, + "loss": 1.9698, + "step": 6159 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.44169601798057556, + "learning_rate": 0.0015, + "loss": 2.0293, + "step": 6160 + }, + { + "epoch": 0.6498945147679325, + "grad_norm": 0.42820584774017334, + "learning_rate": 0.0015, + "loss": 2.0146, + "step": 6161 + }, + { + "epoch": 0.65, + "grad_norm": 0.3560152053833008, + "learning_rate": 0.0015, + "loss": 1.9856, + "step": 6162 + }, + { + "epoch": 0.6501054852320675, + "grad_norm": 0.4561244547367096, + "learning_rate": 0.0015, + "loss": 1.9586, + "step": 6163 + }, + { + "epoch": 0.650210970464135, + "grad_norm": 0.4695797264575958, + "learning_rate": 0.0014979195407665976, + "loss": 1.977, + "step": 6164 + }, + { + "epoch": 0.6503164556962026, + "grad_norm": 0.37212568521499634, + "learning_rate": 0.00149584196707361, + "loss": 1.9837, + "step": 6165 + }, + { + "epoch": 0.65042194092827, + "grad_norm": 0.45452797412872314, + "learning_rate": 0.0014937672749188704, + "loss": 1.9792, + "step": 6166 + }, + { + "epoch": 0.6505274261603375, + "grad_norm": 0.4203944802284241, + "learning_rate": 0.0014916954603057643, + "loss": 1.9938, + "step": 6167 + }, + { + "epoch": 0.6506329113924051, + "grad_norm": 0.4461251199245453, + "learning_rate": 0.0014896265192432194, + "loss": 1.9857, + "step": 6168 + }, + { + "epoch": 0.6507383966244725, + "grad_norm": 0.4446987211704254, + "learning_rate": 0.001487560447745699, + "loss": 2.0356, + "step": 6169 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.4500492811203003, + "learning_rate": 0.0014854972418331944, + "loss": 1.9998, + "step": 6170 + }, + { + "epoch": 0.6509493670886076, + "grad_norm": 0.3463185429573059, + "learning_rate": 0.0014834368975312174, + "loss": 2.0054, + "step": 6171 + }, + { + "epoch": 0.6510548523206751, + "grad_norm": 0.43545740842819214, + "learning_rate": 0.0014813794108707917, + "loss": 1.9766, + "step": 6172 + }, + { + "epoch": 0.6511603375527426, + "grad_norm": 0.3530226945877075, + "learning_rate": 0.0014793247778884461, + "loss": 2.0099, + "step": 6173 + }, + { + "epoch": 0.6512658227848102, + "grad_norm": 0.4953750669956207, + "learning_rate": 0.0014772729946262069, + "loss": 2.034, + "step": 6174 + }, + { + "epoch": 0.6513713080168776, + "grad_norm": 0.4214910864830017, + "learning_rate": 0.0014752240571315894, + "loss": 1.9944, + "step": 6175 + }, + { + "epoch": 0.6514767932489451, + "grad_norm": 0.4579445421695709, + "learning_rate": 0.0014731779614575917, + "loss": 2.0223, + "step": 6176 + }, + { + "epoch": 0.6515822784810127, + "grad_norm": 0.5364389419555664, + "learning_rate": 0.0014711347036626854, + "loss": 2.0181, + "step": 6177 + }, + { + "epoch": 0.6516877637130801, + "grad_norm": 0.41264569759368896, + "learning_rate": 0.0014690942798108097, + "loss": 2.0084, + "step": 6178 + }, + { + "epoch": 0.6517932489451477, + "grad_norm": 0.44581231474876404, + "learning_rate": 0.0014670566859713624, + "loss": 2.0112, + "step": 6179 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.4332638084888458, + "learning_rate": 0.0014650219182191931, + "loss": 1.996, + "step": 6180 + }, + { + "epoch": 0.6520042194092827, + "grad_norm": 0.43176066875457764, + "learning_rate": 0.0014629899726345957, + "loss": 2.0251, + "step": 6181 + }, + { + "epoch": 0.6521097046413502, + "grad_norm": 0.35564082860946655, + "learning_rate": 0.0014609608453033007, + "loss": 2.0184, + "step": 6182 + }, + { + "epoch": 0.6522151898734178, + "grad_norm": 0.4774470627307892, + "learning_rate": 0.001458934532316467, + "loss": 2.0103, + "step": 6183 + }, + { + "epoch": 0.6523206751054852, + "grad_norm": 0.4093763530254364, + "learning_rate": 0.0014569110297706755, + "loss": 2.018, + "step": 6184 + }, + { + "epoch": 0.6524261603375527, + "grad_norm": 0.41481560468673706, + "learning_rate": 0.0014548903337679206, + "loss": 2.0068, + "step": 6185 + }, + { + "epoch": 0.6525316455696203, + "grad_norm": 0.38833722472190857, + "learning_rate": 0.0014528724404156037, + "loss": 2.0218, + "step": 6186 + }, + { + "epoch": 0.6526371308016877, + "grad_norm": 0.4962322413921356, + "learning_rate": 0.0014508573458265248, + "loss": 1.978, + "step": 6187 + }, + { + "epoch": 0.6527426160337553, + "grad_norm": 0.400362104177475, + "learning_rate": 0.0014488450461188752, + "loss": 1.9827, + "step": 6188 + }, + { + "epoch": 0.6528481012658228, + "grad_norm": 0.5311994552612305, + "learning_rate": 0.0014468355374162303, + "loss": 1.9985, + "step": 6189 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.4971667230129242, + "learning_rate": 0.001444828815847542, + "loss": 2.0389, + "step": 6190 + }, + { + "epoch": 0.6530590717299578, + "grad_norm": 0.4182915985584259, + "learning_rate": 0.0014428248775471316, + "loss": 2.0153, + "step": 6191 + }, + { + "epoch": 0.6531645569620254, + "grad_norm": 0.3909006714820862, + "learning_rate": 0.0014408237186546813, + "loss": 1.9737, + "step": 6192 + }, + { + "epoch": 0.6532700421940928, + "grad_norm": 0.5163107514381409, + "learning_rate": 0.0014388253353152278, + "loss": 1.9993, + "step": 6193 + }, + { + "epoch": 0.6533755274261603, + "grad_norm": 0.39276039600372314, + "learning_rate": 0.0014368297236791545, + "loss": 1.9787, + "step": 6194 + }, + { + "epoch": 0.6534810126582279, + "grad_norm": 0.5092513561248779, + "learning_rate": 0.0014348368799021844, + "loss": 2.0146, + "step": 6195 + }, + { + "epoch": 0.6535864978902953, + "grad_norm": 0.4169251024723053, + "learning_rate": 0.0014328468001453718, + "loss": 1.9804, + "step": 6196 + }, + { + "epoch": 0.6536919831223629, + "grad_norm": 0.4174463152885437, + "learning_rate": 0.001430859480575096, + "loss": 1.9899, + "step": 6197 + }, + { + "epoch": 0.6537974683544304, + "grad_norm": 0.41544970870018005, + "learning_rate": 0.0014288749173630535, + "loss": 2.0057, + "step": 6198 + }, + { + "epoch": 0.6539029535864979, + "grad_norm": 0.3690662086009979, + "learning_rate": 0.0014268931066862504, + "loss": 2.033, + "step": 6199 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.398246705532074, + "learning_rate": 0.0014249140447269945, + "loss": 2.0133, + "step": 6200 + }, + { + "epoch": 0.654113924050633, + "grad_norm": 0.4535138010978699, + "learning_rate": 0.00142293772767289, + "loss": 1.983, + "step": 6201 + }, + { + "epoch": 0.6542194092827004, + "grad_norm": 0.34308239817619324, + "learning_rate": 0.0014209641517168275, + "loss": 1.9964, + "step": 6202 + }, + { + "epoch": 0.6543248945147679, + "grad_norm": 0.40974161028862, + "learning_rate": 0.001418993313056979, + "loss": 1.9789, + "step": 6203 + }, + { + "epoch": 0.6544303797468355, + "grad_norm": 0.3641262650489807, + "learning_rate": 0.0014170252078967885, + "loss": 1.9961, + "step": 6204 + }, + { + "epoch": 0.6545358649789029, + "grad_norm": 0.4180217981338501, + "learning_rate": 0.0014150598324449667, + "loss": 1.9914, + "step": 6205 + }, + { + "epoch": 0.6546413502109705, + "grad_norm": 0.33713096380233765, + "learning_rate": 0.001413097182915482, + "loss": 2.0206, + "step": 6206 + }, + { + "epoch": 0.654746835443038, + "grad_norm": 0.4332951605319977, + "learning_rate": 0.0014111372555275542, + "loss": 2.0183, + "step": 6207 + }, + { + "epoch": 0.6548523206751055, + "grad_norm": 0.44042903184890747, + "learning_rate": 0.0014091800465056473, + "loss": 1.9943, + "step": 6208 + }, + { + "epoch": 0.654957805907173, + "grad_norm": 0.39445042610168457, + "learning_rate": 0.0014072255520794614, + "loss": 1.9945, + "step": 6209 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.4523070156574249, + "learning_rate": 0.0014052737684839257, + "loss": 1.9849, + "step": 6210 + }, + { + "epoch": 0.655168776371308, + "grad_norm": 0.40041399002075195, + "learning_rate": 0.001403324691959192, + "loss": 2.0031, + "step": 6211 + }, + { + "epoch": 0.6552742616033755, + "grad_norm": 0.5486627221107483, + "learning_rate": 0.0014013783187506268, + "loss": 2.021, + "step": 6212 + }, + { + "epoch": 0.6553797468354431, + "grad_norm": 0.4031299650669098, + "learning_rate": 0.0013994346451088036, + "loss": 2.0022, + "step": 6213 + }, + { + "epoch": 0.6554852320675105, + "grad_norm": 0.4493241608142853, + "learning_rate": 0.0013974936672894972, + "loss": 1.9825, + "step": 6214 + }, + { + "epoch": 0.6555907172995781, + "grad_norm": 0.4001169800758362, + "learning_rate": 0.0013955553815536747, + "loss": 2.0084, + "step": 6215 + }, + { + "epoch": 0.6556962025316456, + "grad_norm": 0.43979132175445557, + "learning_rate": 0.0013936197841674894, + "loss": 2.0303, + "step": 6216 + }, + { + "epoch": 0.6558016877637131, + "grad_norm": 0.42410704493522644, + "learning_rate": 0.0013916868714022737, + "loss": 1.9898, + "step": 6217 + }, + { + "epoch": 0.6559071729957806, + "grad_norm": 0.4128018617630005, + "learning_rate": 0.0013897566395345313, + "loss": 2.0166, + "step": 6218 + }, + { + "epoch": 0.6560126582278482, + "grad_norm": 0.4990490972995758, + "learning_rate": 0.0013878290848459301, + "loss": 2.0043, + "step": 6219 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.3533079922199249, + "learning_rate": 0.0013859042036232954, + "loss": 1.9861, + "step": 6220 + }, + { + "epoch": 0.6562236286919831, + "grad_norm": 0.4999893605709076, + "learning_rate": 0.0013839819921586025, + "loss": 2.0122, + "step": 6221 + }, + { + "epoch": 0.6563291139240506, + "grad_norm": 0.52134108543396, + "learning_rate": 0.00138206244674897, + "loss": 1.9865, + "step": 6222 + }, + { + "epoch": 0.6564345991561181, + "grad_norm": 0.4713158905506134, + "learning_rate": 0.0013801455636966516, + "loss": 1.9973, + "step": 6223 + }, + { + "epoch": 0.6565400843881857, + "grad_norm": 0.5214139223098755, + "learning_rate": 0.0013782313393090303, + "loss": 1.9835, + "step": 6224 + }, + { + "epoch": 0.6566455696202531, + "grad_norm": 0.5883769392967224, + "learning_rate": 0.0013763197698986101, + "loss": 2.0036, + "step": 6225 + }, + { + "epoch": 0.6567510548523207, + "grad_norm": 0.44938382506370544, + "learning_rate": 0.0013744108517830104, + "loss": 2.0117, + "step": 6226 + }, + { + "epoch": 0.6568565400843882, + "grad_norm": 0.39982834458351135, + "learning_rate": 0.0013725045812849569, + "loss": 1.9869, + "step": 6227 + }, + { + "epoch": 0.6569620253164556, + "grad_norm": 0.5086151361465454, + "learning_rate": 0.001370600954732276, + "loss": 1.9862, + "step": 6228 + }, + { + "epoch": 0.6570675105485232, + "grad_norm": 0.39855504035949707, + "learning_rate": 0.0013686999684578874, + "loss": 1.9876, + "step": 6229 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.38264045119285583, + "learning_rate": 0.001366801618799797, + "loss": 2.0133, + "step": 6230 + }, + { + "epoch": 0.6572784810126582, + "grad_norm": 0.39269647002220154, + "learning_rate": 0.0013649059021010894, + "loss": 2.0081, + "step": 6231 + }, + { + "epoch": 0.6573839662447257, + "grad_norm": 0.38177722692489624, + "learning_rate": 0.0013630128147099215, + "loss": 1.9921, + "step": 6232 + }, + { + "epoch": 0.6574894514767933, + "grad_norm": 0.3814459443092346, + "learning_rate": 0.0013611223529795156, + "loss": 2.0307, + "step": 6233 + }, + { + "epoch": 0.6575949367088607, + "grad_norm": 0.43225282430648804, + "learning_rate": 0.001359234513268151, + "loss": 2.0183, + "step": 6234 + }, + { + "epoch": 0.6577004219409283, + "grad_norm": 0.35289061069488525, + "learning_rate": 0.0013573492919391594, + "loss": 2.0072, + "step": 6235 + }, + { + "epoch": 0.6578059071729958, + "grad_norm": 0.4448026716709137, + "learning_rate": 0.0013554666853609146, + "loss": 1.9891, + "step": 6236 + }, + { + "epoch": 0.6579113924050632, + "grad_norm": 0.4035649597644806, + "learning_rate": 0.001353586689906829, + "loss": 1.9662, + "step": 6237 + }, + { + "epoch": 0.6580168776371308, + "grad_norm": 0.44123443961143494, + "learning_rate": 0.0013517093019553442, + "loss": 1.9961, + "step": 6238 + }, + { + "epoch": 0.6581223628691983, + "grad_norm": 0.4470311403274536, + "learning_rate": 0.001349834517889925, + "loss": 1.9552, + "step": 6239 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.4736505448818207, + "learning_rate": 0.001347962334099052, + "loss": 1.9781, + "step": 6240 + }, + { + "epoch": 0.6583333333333333, + "grad_norm": 0.3713541030883789, + "learning_rate": 0.0013460927469762154, + "loss": 2.0003, + "step": 6241 + }, + { + "epoch": 0.6584388185654009, + "grad_norm": 0.45848220586776733, + "learning_rate": 0.0013442257529199069, + "loss": 2.0057, + "step": 6242 + }, + { + "epoch": 0.6585443037974683, + "grad_norm": 0.45993876457214355, + "learning_rate": 0.0013423613483336142, + "loss": 2.0399, + "step": 6243 + }, + { + "epoch": 0.6586497890295359, + "grad_norm": 0.42340123653411865, + "learning_rate": 0.001340499529625812, + "loss": 2.0157, + "step": 6244 + }, + { + "epoch": 0.6587552742616034, + "grad_norm": 0.5182707905769348, + "learning_rate": 0.0013386402932099575, + "loss": 1.993, + "step": 6245 + }, + { + "epoch": 0.6588607594936708, + "grad_norm": 0.4052259922027588, + "learning_rate": 0.0013367836355044822, + "loss": 1.9779, + "step": 6246 + }, + { + "epoch": 0.6589662447257384, + "grad_norm": 0.3939439058303833, + "learning_rate": 0.0013349295529327845, + "loss": 1.9918, + "step": 6247 + }, + { + "epoch": 0.6590717299578059, + "grad_norm": 0.3971043825149536, + "learning_rate": 0.0013330780419232241, + "loss": 1.9702, + "step": 6248 + }, + { + "epoch": 0.6591772151898734, + "grad_norm": 0.4314514398574829, + "learning_rate": 0.001331229098909114, + "loss": 2.0247, + "step": 6249 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.37807610630989075, + "learning_rate": 0.0013293827203287143, + "loss": 2.0177, + "step": 6250 + }, + { + "epoch": 0.6593881856540085, + "grad_norm": 0.38560813665390015, + "learning_rate": 0.0013275389026252255, + "loss": 1.9908, + "step": 6251 + }, + { + "epoch": 0.6594936708860759, + "grad_norm": 0.4129689037799835, + "learning_rate": 0.0013256976422467803, + "loss": 2.0109, + "step": 6252 + }, + { + "epoch": 0.6595991561181435, + "grad_norm": 0.3783426582813263, + "learning_rate": 0.001323858935646439, + "loss": 1.9936, + "step": 6253 + }, + { + "epoch": 0.659704641350211, + "grad_norm": 0.4526903033256531, + "learning_rate": 0.0013220227792821804, + "loss": 2.0058, + "step": 6254 + }, + { + "epoch": 0.6598101265822784, + "grad_norm": 0.3922278881072998, + "learning_rate": 0.0013201891696168965, + "loss": 2.0022, + "step": 6255 + }, + { + "epoch": 0.659915611814346, + "grad_norm": 0.3703257739543915, + "learning_rate": 0.001318358103118385, + "loss": 1.977, + "step": 6256 + }, + { + "epoch": 0.6600210970464135, + "grad_norm": 0.39104145765304565, + "learning_rate": 0.0013165295762593426, + "loss": 1.9737, + "step": 6257 + }, + { + "epoch": 0.660126582278481, + "grad_norm": 0.3535842299461365, + "learning_rate": 0.0013147035855173587, + "loss": 2.0114, + "step": 6258 + }, + { + "epoch": 0.6602320675105485, + "grad_norm": 0.39096301794052124, + "learning_rate": 0.0013128801273749075, + "loss": 1.9999, + "step": 6259 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.345536470413208, + "learning_rate": 0.0013110591983193423, + "loss": 2.0011, + "step": 6260 + }, + { + "epoch": 0.6604430379746835, + "grad_norm": 0.4379611313343048, + "learning_rate": 0.0013092407948428887, + "loss": 1.9981, + "step": 6261 + }, + { + "epoch": 0.6605485232067511, + "grad_norm": 0.41607466340065, + "learning_rate": 0.0013074249134426368, + "loss": 2.0359, + "step": 6262 + }, + { + "epoch": 0.6606540084388186, + "grad_norm": 0.41219013929367065, + "learning_rate": 0.0013056115506205354, + "loss": 1.9371, + "step": 6263 + }, + { + "epoch": 0.660759493670886, + "grad_norm": 0.44657742977142334, + "learning_rate": 0.0013038007028833853, + "loss": 2.0386, + "step": 6264 + }, + { + "epoch": 0.6608649789029536, + "grad_norm": 0.4639435410499573, + "learning_rate": 0.001301992366742832, + "loss": 1.9883, + "step": 6265 + }, + { + "epoch": 0.6609704641350211, + "grad_norm": 0.42738333344459534, + "learning_rate": 0.0013001865387153588, + "loss": 2.0118, + "step": 6266 + }, + { + "epoch": 0.6610759493670886, + "grad_norm": 0.45000913739204407, + "learning_rate": 0.0012983832153222814, + "loss": 2.0155, + "step": 6267 + }, + { + "epoch": 0.6611814345991561, + "grad_norm": 0.4011460840702057, + "learning_rate": 0.0012965823930897401, + "loss": 1.9507, + "step": 6268 + }, + { + "epoch": 0.6612869198312237, + "grad_norm": 0.36897197365760803, + "learning_rate": 0.0012947840685486932, + "loss": 2.0066, + "step": 6269 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.3908213675022125, + "learning_rate": 0.0012929882382349102, + "loss": 1.9605, + "step": 6270 + }, + { + "epoch": 0.6614978902953587, + "grad_norm": 0.38120532035827637, + "learning_rate": 0.001291194898688966, + "loss": 1.9751, + "step": 6271 + }, + { + "epoch": 0.6616033755274262, + "grad_norm": 0.3967539668083191, + "learning_rate": 0.001289404046456233, + "loss": 2.0096, + "step": 6272 + }, + { + "epoch": 0.6617088607594936, + "grad_norm": 0.35375019907951355, + "learning_rate": 0.0012876156780868755, + "loss": 2.0486, + "step": 6273 + }, + { + "epoch": 0.6618143459915612, + "grad_norm": 0.41926050186157227, + "learning_rate": 0.0012858297901358424, + "loss": 1.9789, + "step": 6274 + }, + { + "epoch": 0.6619198312236287, + "grad_norm": 0.3685877025127411, + "learning_rate": 0.001284046379162861, + "loss": 2.0094, + "step": 6275 + }, + { + "epoch": 0.6620253164556962, + "grad_norm": 0.4357570707798004, + "learning_rate": 0.0012822654417324305, + "loss": 2.0001, + "step": 6276 + }, + { + "epoch": 0.6621308016877637, + "grad_norm": 0.36674752831459045, + "learning_rate": 0.0012804869744138137, + "loss": 1.959, + "step": 6277 + }, + { + "epoch": 0.6622362869198313, + "grad_norm": 0.391685426235199, + "learning_rate": 0.0012787109737810332, + "loss": 1.9893, + "step": 6278 + }, + { + "epoch": 0.6623417721518987, + "grad_norm": 0.3906556963920593, + "learning_rate": 0.0012769374364128628, + "loss": 2.0009, + "step": 6279 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.38061681389808655, + "learning_rate": 0.0012751663588928214, + "loss": 1.9879, + "step": 6280 + }, + { + "epoch": 0.6625527426160338, + "grad_norm": 0.42537084221839905, + "learning_rate": 0.001273397737809166, + "loss": 1.9523, + "step": 6281 + }, + { + "epoch": 0.6626582278481012, + "grad_norm": 0.4115712344646454, + "learning_rate": 0.001271631569754887, + "loss": 1.9919, + "step": 6282 + }, + { + "epoch": 0.6627637130801688, + "grad_norm": 0.4476662576198578, + "learning_rate": 0.0012698678513276987, + "loss": 1.9608, + "step": 6283 + }, + { + "epoch": 0.6628691983122363, + "grad_norm": 0.36357593536376953, + "learning_rate": 0.0012681065791300351, + "loss": 2.0179, + "step": 6284 + }, + { + "epoch": 0.6629746835443038, + "grad_norm": 0.4467647969722748, + "learning_rate": 0.0012663477497690421, + "loss": 1.9809, + "step": 6285 + }, + { + "epoch": 0.6630801687763713, + "grad_norm": 0.3447054624557495, + "learning_rate": 0.0012645913598565719, + "loss": 1.9945, + "step": 6286 + }, + { + "epoch": 0.6631856540084389, + "grad_norm": 0.45712217688560486, + "learning_rate": 0.0012628374060091757, + "loss": 2.008, + "step": 6287 + }, + { + "epoch": 0.6632911392405063, + "grad_norm": 0.4098168909549713, + "learning_rate": 0.0012610858848480973, + "loss": 1.9933, + "step": 6288 + }, + { + "epoch": 0.6633966244725739, + "grad_norm": 0.39726775884628296, + "learning_rate": 0.0012593367929992667, + "loss": 2.0089, + "step": 6289 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.4396860897541046, + "learning_rate": 0.0012575901270932943, + "loss": 1.9717, + "step": 6290 + }, + { + "epoch": 0.6636075949367088, + "grad_norm": 0.38738390803337097, + "learning_rate": 0.001255845883765463, + "loss": 1.9493, + "step": 6291 + }, + { + "epoch": 0.6637130801687764, + "grad_norm": 0.44322770833969116, + "learning_rate": 0.0012541040596557229, + "loss": 2.0267, + "step": 6292 + }, + { + "epoch": 0.6638185654008438, + "grad_norm": 0.4061475992202759, + "learning_rate": 0.001252364651408684, + "loss": 1.9835, + "step": 6293 + }, + { + "epoch": 0.6639240506329114, + "grad_norm": 0.38645291328430176, + "learning_rate": 0.001250627655673611, + "loss": 1.9868, + "step": 6294 + }, + { + "epoch": 0.6640295358649789, + "grad_norm": 0.42740800976753235, + "learning_rate": 0.0012488930691044145, + "loss": 1.9813, + "step": 6295 + }, + { + "epoch": 0.6641350210970464, + "grad_norm": 0.4072602689266205, + "learning_rate": 0.0012471608883596475, + "loss": 1.9792, + "step": 6296 + }, + { + "epoch": 0.6642405063291139, + "grad_norm": 0.3686123788356781, + "learning_rate": 0.0012454311101024967, + "loss": 1.9592, + "step": 6297 + }, + { + "epoch": 0.6643459915611815, + "grad_norm": 0.4056437611579895, + "learning_rate": 0.0012437037310007774, + "loss": 1.9934, + "step": 6298 + }, + { + "epoch": 0.6644514767932489, + "grad_norm": 0.3838574290275574, + "learning_rate": 0.0012419787477269257, + "loss": 1.9637, + "step": 6299 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.3948359787464142, + "learning_rate": 0.0012402561569579936, + "loss": 1.9563, + "step": 6300 + }, + { + "epoch": 0.664662447257384, + "grad_norm": 0.37976375222206116, + "learning_rate": 0.001238535955375642, + "loss": 1.9757, + "step": 6301 + }, + { + "epoch": 0.6647679324894514, + "grad_norm": 0.389725923538208, + "learning_rate": 0.001236818139666134, + "loss": 1.9699, + "step": 6302 + }, + { + "epoch": 0.664873417721519, + "grad_norm": 0.35241055488586426, + "learning_rate": 0.0012351027065203286, + "loss": 1.9634, + "step": 6303 + }, + { + "epoch": 0.6649789029535865, + "grad_norm": 0.4377902150154114, + "learning_rate": 0.001233389652633675, + "loss": 1.9512, + "step": 6304 + }, + { + "epoch": 0.665084388185654, + "grad_norm": 0.3973429501056671, + "learning_rate": 0.001231678974706205, + "loss": 1.9791, + "step": 6305 + }, + { + "epoch": 0.6651898734177215, + "grad_norm": 0.38975775241851807, + "learning_rate": 0.0012299706694425285, + "loss": 1.9699, + "step": 6306 + }, + { + "epoch": 0.6652953586497891, + "grad_norm": 0.42488881945610046, + "learning_rate": 0.0012282647335518245, + "loss": 2.0233, + "step": 6307 + }, + { + "epoch": 0.6654008438818565, + "grad_norm": 0.389030784368515, + "learning_rate": 0.0012265611637478377, + "loss": 1.9823, + "step": 6308 + }, + { + "epoch": 0.665506329113924, + "grad_norm": 0.46155330538749695, + "learning_rate": 0.0012248599567488698, + "loss": 1.9873, + "step": 6309 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.3743829131126404, + "learning_rate": 0.0012231611092777745, + "loss": 1.9955, + "step": 6310 + }, + { + "epoch": 0.665717299578059, + "grad_norm": 0.471852570772171, + "learning_rate": 0.0012214646180619506, + "loss": 1.9972, + "step": 6311 + }, + { + "epoch": 0.6658227848101266, + "grad_norm": 0.4110696613788605, + "learning_rate": 0.0012197704798333365, + "loss": 1.9506, + "step": 6312 + }, + { + "epoch": 0.6659282700421941, + "grad_norm": 0.5150596499443054, + "learning_rate": 0.0012180786913284026, + "loss": 1.9882, + "step": 6313 + }, + { + "epoch": 0.6660337552742616, + "grad_norm": 0.5053423643112183, + "learning_rate": 0.001216389249288146, + "loss": 1.9693, + "step": 6314 + }, + { + "epoch": 0.6661392405063291, + "grad_norm": 0.4201924502849579, + "learning_rate": 0.0012147021504580842, + "loss": 1.9679, + "step": 6315 + }, + { + "epoch": 0.6662447257383967, + "grad_norm": 0.4127747416496277, + "learning_rate": 0.0012130173915882478, + "loss": 1.9833, + "step": 6316 + }, + { + "epoch": 0.6663502109704641, + "grad_norm": 0.4396767020225525, + "learning_rate": 0.0012113349694331762, + "loss": 2.0141, + "step": 6317 + }, + { + "epoch": 0.6664556962025316, + "grad_norm": 0.4095035791397095, + "learning_rate": 0.0012096548807519092, + "loss": 1.9648, + "step": 6318 + }, + { + "epoch": 0.6665611814345992, + "grad_norm": 0.5092646479606628, + "learning_rate": 0.0012079771223079822, + "loss": 1.9938, + "step": 6319 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.38493412733078003, + "learning_rate": 0.0012063016908694193, + "loss": 1.9586, + "step": 6320 + }, + { + "epoch": 0.6667721518987342, + "grad_norm": 0.5002780556678772, + "learning_rate": 0.001204628583208727, + "loss": 2.0086, + "step": 6321 + }, + { + "epoch": 0.6668776371308017, + "grad_norm": 0.4573986530303955, + "learning_rate": 0.0012029577961028893, + "loss": 1.9968, + "step": 6322 + }, + { + "epoch": 0.6669831223628692, + "grad_norm": 0.40531009435653687, + "learning_rate": 0.0012012893263333587, + "loss": 1.9821, + "step": 6323 + }, + { + "epoch": 0.6670886075949367, + "grad_norm": 0.4135817587375641, + "learning_rate": 0.0011996231706860535, + "loss": 2.0138, + "step": 6324 + }, + { + "epoch": 0.6671940928270043, + "grad_norm": 0.4575827121734619, + "learning_rate": 0.0011979593259513486, + "loss": 1.9896, + "step": 6325 + }, + { + "epoch": 0.6672995780590717, + "grad_norm": 0.4463598132133484, + "learning_rate": 0.0011962977889240713, + "loss": 1.9939, + "step": 6326 + }, + { + "epoch": 0.6674050632911392, + "grad_norm": 0.45714375376701355, + "learning_rate": 0.001194638556403494, + "loss": 1.9926, + "step": 6327 + }, + { + "epoch": 0.6675105485232068, + "grad_norm": 0.5328767895698547, + "learning_rate": 0.0011929816251933286, + "loss": 1.9245, + "step": 6328 + }, + { + "epoch": 0.6676160337552742, + "grad_norm": 0.43058258295059204, + "learning_rate": 0.0011913269921017202, + "loss": 1.9598, + "step": 6329 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.4758239984512329, + "learning_rate": 0.0011896746539412405, + "loss": 2.019, + "step": 6330 + }, + { + "epoch": 0.6678270042194093, + "grad_norm": 0.40336087346076965, + "learning_rate": 0.0011880246075288827, + "loss": 1.9465, + "step": 6331 + }, + { + "epoch": 0.6679324894514768, + "grad_norm": 0.4809800386428833, + "learning_rate": 0.001186376849686054, + "loss": 2.0159, + "step": 6332 + }, + { + "epoch": 0.6680379746835443, + "grad_norm": 0.39943379163742065, + "learning_rate": 0.0011847313772385714, + "loss": 2.0022, + "step": 6333 + }, + { + "epoch": 0.6681434599156119, + "grad_norm": 0.41162511706352234, + "learning_rate": 0.0011830881870166531, + "loss": 1.9647, + "step": 6334 + }, + { + "epoch": 0.6682489451476793, + "grad_norm": 0.3970550298690796, + "learning_rate": 0.0011814472758549144, + "loss": 1.9538, + "step": 6335 + }, + { + "epoch": 0.6683544303797468, + "grad_norm": 0.371096134185791, + "learning_rate": 0.0011798086405923607, + "loss": 2.0121, + "step": 6336 + }, + { + "epoch": 0.6684599156118144, + "grad_norm": 0.3574126660823822, + "learning_rate": 0.0011781722780723819, + "loss": 1.9828, + "step": 6337 + }, + { + "epoch": 0.6685654008438818, + "grad_norm": 0.39453479647636414, + "learning_rate": 0.0011765381851427457, + "loss": 1.9853, + "step": 6338 + }, + { + "epoch": 0.6686708860759494, + "grad_norm": 0.36178073287010193, + "learning_rate": 0.0011749063586555919, + "loss": 2.0014, + "step": 6339 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.35662415623664856, + "learning_rate": 0.0011732767954674265, + "loss": 1.9874, + "step": 6340 + }, + { + "epoch": 0.6688818565400844, + "grad_norm": 0.37337714433670044, + "learning_rate": 0.001171649492439115, + "loss": 1.9712, + "step": 6341 + }, + { + "epoch": 0.6689873417721519, + "grad_norm": 0.3478543758392334, + "learning_rate": 0.0011700244464358776, + "loss": 1.9982, + "step": 6342 + }, + { + "epoch": 0.6690928270042195, + "grad_norm": 0.35482534766197205, + "learning_rate": 0.0011684016543272815, + "loss": 1.9911, + "step": 6343 + }, + { + "epoch": 0.6691983122362869, + "grad_norm": 0.39000511169433594, + "learning_rate": 0.0011667811129872365, + "loss": 1.9667, + "step": 6344 + }, + { + "epoch": 0.6693037974683544, + "grad_norm": 0.388772577047348, + "learning_rate": 0.0011651628192939872, + "loss": 1.9668, + "step": 6345 + }, + { + "epoch": 0.669409282700422, + "grad_norm": 0.4110354483127594, + "learning_rate": 0.001163546770130109, + "loss": 1.996, + "step": 6346 + }, + { + "epoch": 0.6695147679324894, + "grad_norm": 0.42598408460617065, + "learning_rate": 0.0011619329623825006, + "loss": 1.957, + "step": 6347 + }, + { + "epoch": 0.669620253164557, + "grad_norm": 0.3789190351963043, + "learning_rate": 0.0011603213929423785, + "loss": 1.9646, + "step": 6348 + }, + { + "epoch": 0.6697257383966245, + "grad_norm": 0.43157055974006653, + "learning_rate": 0.001158712058705271, + "loss": 1.9803, + "step": 6349 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.394418329000473, + "learning_rate": 0.0011571049565710122, + "loss": 1.9779, + "step": 6350 + }, + { + "epoch": 0.6699367088607595, + "grad_norm": 0.38260671496391296, + "learning_rate": 0.0011555000834437364, + "loss": 1.9794, + "step": 6351 + }, + { + "epoch": 0.6700421940928271, + "grad_norm": 0.44286268949508667, + "learning_rate": 0.0011538974362318712, + "loss": 1.9636, + "step": 6352 + }, + { + "epoch": 0.6701476793248945, + "grad_norm": 0.37248238921165466, + "learning_rate": 0.0011522970118481325, + "loss": 1.9667, + "step": 6353 + }, + { + "epoch": 0.670253164556962, + "grad_norm": 0.3619086444377899, + "learning_rate": 0.0011506988072095183, + "loss": 2.0091, + "step": 6354 + }, + { + "epoch": 0.6703586497890295, + "grad_norm": 0.3744290769100189, + "learning_rate": 0.0011491028192373023, + "loss": 1.9375, + "step": 6355 + }, + { + "epoch": 0.670464135021097, + "grad_norm": 0.3835381269454956, + "learning_rate": 0.0011475090448570281, + "loss": 1.9793, + "step": 6356 + }, + { + "epoch": 0.6705696202531646, + "grad_norm": 0.3814859092235565, + "learning_rate": 0.0011459174809985047, + "loss": 1.969, + "step": 6357 + }, + { + "epoch": 0.670675105485232, + "grad_norm": 0.406585693359375, + "learning_rate": 0.0011443281245957977, + "loss": 2.0155, + "step": 6358 + }, + { + "epoch": 0.6707805907172996, + "grad_norm": 0.44129449129104614, + "learning_rate": 0.0011427409725872262, + "loss": 1.9588, + "step": 6359 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.4319005608558655, + "learning_rate": 0.001141156021915355, + "loss": 1.9903, + "step": 6360 + }, + { + "epoch": 0.6709915611814345, + "grad_norm": 0.39716827869415283, + "learning_rate": 0.0011395732695269908, + "loss": 1.964, + "step": 6361 + }, + { + "epoch": 0.6710970464135021, + "grad_norm": 0.41094738245010376, + "learning_rate": 0.0011379927123731737, + "loss": 1.9783, + "step": 6362 + }, + { + "epoch": 0.6712025316455696, + "grad_norm": 0.37508121132850647, + "learning_rate": 0.0011364143474091727, + "loss": 1.966, + "step": 6363 + }, + { + "epoch": 0.6713080168776371, + "grad_norm": 0.41093742847442627, + "learning_rate": 0.0011348381715944804, + "loss": 1.9602, + "step": 6364 + }, + { + "epoch": 0.6714135021097046, + "grad_norm": 0.3770209550857544, + "learning_rate": 0.0011332641818928063, + "loss": 1.9609, + "step": 6365 + }, + { + "epoch": 0.6715189873417722, + "grad_norm": 0.43630459904670715, + "learning_rate": 0.001131692375272071, + "loss": 1.9372, + "step": 6366 + }, + { + "epoch": 0.6716244725738396, + "grad_norm": 0.38477689027786255, + "learning_rate": 0.0011301227487044005, + "loss": 1.9142, + "step": 6367 + }, + { + "epoch": 0.6717299578059072, + "grad_norm": 0.48964396119117737, + "learning_rate": 0.0011285552991661203, + "loss": 1.9775, + "step": 6368 + }, + { + "epoch": 0.6718354430379747, + "grad_norm": 0.399784117937088, + "learning_rate": 0.00112699002363775, + "loss": 1.9808, + "step": 6369 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.4436160624027252, + "learning_rate": 0.001125426919103997, + "loss": 1.9817, + "step": 6370 + }, + { + "epoch": 0.6720464135021097, + "grad_norm": 0.42763087153434753, + "learning_rate": 0.0011238659825537505, + "loss": 1.923, + "step": 6371 + }, + { + "epoch": 0.6721518987341772, + "grad_norm": 0.4849787950515747, + "learning_rate": 0.0011223072109800768, + "loss": 1.9916, + "step": 6372 + }, + { + "epoch": 0.6722573839662447, + "grad_norm": 0.5425018668174744, + "learning_rate": 0.0011207506013802117, + "loss": 1.9777, + "step": 6373 + }, + { + "epoch": 0.6723628691983122, + "grad_norm": 0.4151686429977417, + "learning_rate": 0.0011191961507555567, + "loss": 1.9679, + "step": 6374 + }, + { + "epoch": 0.6724683544303798, + "grad_norm": 0.4909451901912689, + "learning_rate": 0.0011176438561116713, + "loss": 1.9557, + "step": 6375 + }, + { + "epoch": 0.6725738396624472, + "grad_norm": 0.385680615901947, + "learning_rate": 0.0011160937144582695, + "loss": 1.9538, + "step": 6376 + }, + { + "epoch": 0.6726793248945148, + "grad_norm": 0.5222047567367554, + "learning_rate": 0.0011145457228092116, + "loss": 1.9871, + "step": 6377 + }, + { + "epoch": 0.6727848101265823, + "grad_norm": 0.3738361895084381, + "learning_rate": 0.0011129998781824997, + "loss": 1.976, + "step": 6378 + }, + { + "epoch": 0.6728902953586497, + "grad_norm": 0.4303044080734253, + "learning_rate": 0.0011114561776002726, + "loss": 1.9762, + "step": 6379 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.41057488322257996, + "learning_rate": 0.001109914618088799, + "loss": 1.9393, + "step": 6380 + }, + { + "epoch": 0.6731012658227848, + "grad_norm": 0.37997040152549744, + "learning_rate": 0.0011083751966784717, + "loss": 1.9684, + "step": 6381 + }, + { + "epoch": 0.6732067510548523, + "grad_norm": 0.4444407820701599, + "learning_rate": 0.0011068379104038023, + "loss": 1.9615, + "step": 6382 + }, + { + "epoch": 0.6733122362869198, + "grad_norm": 0.37526994943618774, + "learning_rate": 0.0011053027563034162, + "loss": 2.0094, + "step": 6383 + }, + { + "epoch": 0.6734177215189874, + "grad_norm": 0.4467705488204956, + "learning_rate": 0.001103769731420045, + "loss": 1.9551, + "step": 6384 + }, + { + "epoch": 0.6735232067510548, + "grad_norm": 0.36312514543533325, + "learning_rate": 0.0011022388328005232, + "loss": 1.9979, + "step": 6385 + }, + { + "epoch": 0.6736286919831224, + "grad_norm": 0.4619540274143219, + "learning_rate": 0.0011007100574957802, + "loss": 2.0126, + "step": 6386 + }, + { + "epoch": 0.6737341772151899, + "grad_norm": 0.37409764528274536, + "learning_rate": 0.0010991834025608363, + "loss": 1.9667, + "step": 6387 + }, + { + "epoch": 0.6738396624472573, + "grad_norm": 0.4713361859321594, + "learning_rate": 0.001097658865054796, + "loss": 1.9763, + "step": 6388 + }, + { + "epoch": 0.6739451476793249, + "grad_norm": 0.41869089007377625, + "learning_rate": 0.001096136442040843, + "loss": 1.9751, + "step": 6389 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.45337244868278503, + "learning_rate": 0.0010946161305862348, + "loss": 1.9374, + "step": 6390 + }, + { + "epoch": 0.6741561181434599, + "grad_norm": 0.3857814073562622, + "learning_rate": 0.0010930979277622953, + "loss": 1.9773, + "step": 6391 + }, + { + "epoch": 0.6742616033755274, + "grad_norm": 0.46130552887916565, + "learning_rate": 0.0010915818306444112, + "loss": 2.0288, + "step": 6392 + }, + { + "epoch": 0.674367088607595, + "grad_norm": 0.45872893929481506, + "learning_rate": 0.0010900678363120256, + "loss": 1.9678, + "step": 6393 + }, + { + "epoch": 0.6744725738396624, + "grad_norm": 0.3945707082748413, + "learning_rate": 0.001088555941848632, + "loss": 1.9764, + "step": 6394 + }, + { + "epoch": 0.67457805907173, + "grad_norm": 0.42842453718185425, + "learning_rate": 0.0010870461443417694, + "loss": 1.9518, + "step": 6395 + }, + { + "epoch": 0.6746835443037975, + "grad_norm": 0.4093819558620453, + "learning_rate": 0.001085538440883016, + "loss": 1.9716, + "step": 6396 + }, + { + "epoch": 0.674789029535865, + "grad_norm": 0.4150817394256592, + "learning_rate": 0.0010840328285679837, + "loss": 2.0193, + "step": 6397 + }, + { + "epoch": 0.6748945147679325, + "grad_norm": 0.40860292315483093, + "learning_rate": 0.0010825293044963132, + "loss": 1.9329, + "step": 6398 + }, + { + "epoch": 0.675, + "grad_norm": 0.37127918004989624, + "learning_rate": 0.001081027865771668, + "loss": 1.9588, + "step": 6399 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.46588999032974243, + "learning_rate": 0.001079528509501728, + "loss": 1.989, + "step": 6400 + }, + { + "epoch": 0.675210970464135, + "grad_norm": 0.455322265625, + "learning_rate": 0.0010780312327981854, + "loss": 1.9396, + "step": 6401 + }, + { + "epoch": 0.6753164556962026, + "grad_norm": 0.4456724524497986, + "learning_rate": 0.001076536032776738, + "loss": 1.9815, + "step": 6402 + }, + { + "epoch": 0.67542194092827, + "grad_norm": 0.45064812898635864, + "learning_rate": 0.0010750429065570842, + "loss": 2.0027, + "step": 6403 + }, + { + "epoch": 0.6755274261603376, + "grad_norm": 0.43305712938308716, + "learning_rate": 0.0010735518512629172, + "loss": 2.008, + "step": 6404 + }, + { + "epoch": 0.6756329113924051, + "grad_norm": 0.4267774224281311, + "learning_rate": 0.00107206286402192, + "loss": 1.9793, + "step": 6405 + }, + { + "epoch": 0.6757383966244725, + "grad_norm": 0.38250502943992615, + "learning_rate": 0.0010705759419657585, + "loss": 1.9809, + "step": 6406 + }, + { + "epoch": 0.6758438818565401, + "grad_norm": 0.4110143780708313, + "learning_rate": 0.0010690910822300777, + "loss": 1.9501, + "step": 6407 + }, + { + "epoch": 0.6759493670886076, + "grad_norm": 0.4055734872817993, + "learning_rate": 0.0010676082819544952, + "loss": 1.9521, + "step": 6408 + }, + { + "epoch": 0.6760548523206751, + "grad_norm": 0.3829496204853058, + "learning_rate": 0.0010661275382825958, + "loss": 1.9651, + "step": 6409 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.35513240098953247, + "learning_rate": 0.0010646488483619261, + "loss": 1.9421, + "step": 6410 + }, + { + "epoch": 0.6762658227848102, + "grad_norm": 0.4839339256286621, + "learning_rate": 0.0010631722093439888, + "loss": 2.0015, + "step": 6411 + }, + { + "epoch": 0.6763713080168776, + "grad_norm": 0.3716129958629608, + "learning_rate": 0.0010616976183842378, + "loss": 1.9446, + "step": 6412 + }, + { + "epoch": 0.6764767932489452, + "grad_norm": 0.44385626912117004, + "learning_rate": 0.001060225072642072, + "loss": 1.9597, + "step": 6413 + }, + { + "epoch": 0.6765822784810127, + "grad_norm": 0.3726147413253784, + "learning_rate": 0.0010587545692808302, + "loss": 1.9616, + "step": 6414 + }, + { + "epoch": 0.6766877637130801, + "grad_norm": 0.4416101574897766, + "learning_rate": 0.0010572861054677853, + "loss": 1.979, + "step": 6415 + }, + { + "epoch": 0.6767932489451477, + "grad_norm": 0.3933641016483307, + "learning_rate": 0.0010558196783741396, + "loss": 1.9463, + "step": 6416 + }, + { + "epoch": 0.6768987341772152, + "grad_norm": 0.4110652208328247, + "learning_rate": 0.0010543552851750187, + "loss": 1.978, + "step": 6417 + }, + { + "epoch": 0.6770042194092827, + "grad_norm": 0.37758705019950867, + "learning_rate": 0.001052892923049466, + "loss": 1.9699, + "step": 6418 + }, + { + "epoch": 0.6771097046413502, + "grad_norm": 0.45993849635124207, + "learning_rate": 0.0010514325891804379, + "loss": 1.9516, + "step": 6419 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.36786118149757385, + "learning_rate": 0.0010499742807547976, + "loss": 1.9681, + "step": 6420 + }, + { + "epoch": 0.6773206751054852, + "grad_norm": 0.42147135734558105, + "learning_rate": 0.00104851799496331, + "loss": 1.9709, + "step": 6421 + }, + { + "epoch": 0.6774261603375528, + "grad_norm": 0.36147502064704895, + "learning_rate": 0.0010470637290006365, + "loss": 1.9573, + "step": 6422 + }, + { + "epoch": 0.6775316455696202, + "grad_norm": 0.38392817974090576, + "learning_rate": 0.00104561148006533, + "loss": 1.9659, + "step": 6423 + }, + { + "epoch": 0.6776371308016877, + "grad_norm": 0.4063061773777008, + "learning_rate": 0.0010441612453598276, + "loss": 1.9795, + "step": 6424 + }, + { + "epoch": 0.6777426160337553, + "grad_norm": 0.4331367015838623, + "learning_rate": 0.001042713022090448, + "loss": 1.9942, + "step": 6425 + }, + { + "epoch": 0.6778481012658227, + "grad_norm": 0.41458892822265625, + "learning_rate": 0.0010412668074673832, + "loss": 1.9624, + "step": 6426 + }, + { + "epoch": 0.6779535864978903, + "grad_norm": 0.3947869539260864, + "learning_rate": 0.0010398225987046958, + "loss": 1.9661, + "step": 6427 + }, + { + "epoch": 0.6780590717299578, + "grad_norm": 0.37105709314346313, + "learning_rate": 0.001038380393020312, + "loss": 1.9706, + "step": 6428 + }, + { + "epoch": 0.6781645569620253, + "grad_norm": 0.36782917380332947, + "learning_rate": 0.0010369401876360166, + "loss": 1.9245, + "step": 6429 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.40512189269065857, + "learning_rate": 0.0010355019797774478, + "loss": 1.9766, + "step": 6430 + }, + { + "epoch": 0.6783755274261604, + "grad_norm": 0.36017942428588867, + "learning_rate": 0.0010340657666740914, + "loss": 1.9521, + "step": 6431 + }, + { + "epoch": 0.6784810126582278, + "grad_norm": 0.36830392479896545, + "learning_rate": 0.0010326315455592766, + "loss": 1.9732, + "step": 6432 + }, + { + "epoch": 0.6785864978902953, + "grad_norm": 0.37768039107322693, + "learning_rate": 0.001031199313670169, + "loss": 1.9357, + "step": 6433 + }, + { + "epoch": 0.6786919831223629, + "grad_norm": 0.38483500480651855, + "learning_rate": 0.0010297690682477669, + "loss": 1.9633, + "step": 6434 + }, + { + "epoch": 0.6787974683544303, + "grad_norm": 0.4016737937927246, + "learning_rate": 0.0010283408065368948, + "loss": 1.9423, + "step": 6435 + }, + { + "epoch": 0.6789029535864979, + "grad_norm": 0.37298184633255005, + "learning_rate": 0.0010269145257861987, + "loss": 1.9726, + "step": 6436 + }, + { + "epoch": 0.6790084388185654, + "grad_norm": 0.3488192558288574, + "learning_rate": 0.0010254902232481407, + "loss": 1.9614, + "step": 6437 + }, + { + "epoch": 0.6791139240506329, + "grad_norm": 0.3742702007293701, + "learning_rate": 0.0010240678961789937, + "loss": 1.9489, + "step": 6438 + }, + { + "epoch": 0.6792194092827004, + "grad_norm": 0.362453430891037, + "learning_rate": 0.001022647541838836, + "loss": 1.9674, + "step": 6439 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.37257200479507446, + "learning_rate": 0.001021229157491546, + "loss": 1.9571, + "step": 6440 + }, + { + "epoch": 0.6794303797468354, + "grad_norm": 0.35479891300201416, + "learning_rate": 0.0010198127404047975, + "loss": 1.985, + "step": 6441 + }, + { + "epoch": 0.679535864978903, + "grad_norm": 0.38368773460388184, + "learning_rate": 0.001018398287850053, + "loss": 1.9272, + "step": 6442 + }, + { + "epoch": 0.6796413502109705, + "grad_norm": 0.3908126652240753, + "learning_rate": 0.0010169857971025606, + "loss": 1.9774, + "step": 6443 + }, + { + "epoch": 0.6797468354430379, + "grad_norm": 0.40552589297294617, + "learning_rate": 0.0010155752654413468, + "loss": 1.9768, + "step": 6444 + }, + { + "epoch": 0.6798523206751055, + "grad_norm": 0.37590348720550537, + "learning_rate": 0.0010141666901492116, + "loss": 1.9634, + "step": 6445 + }, + { + "epoch": 0.679957805907173, + "grad_norm": 0.41578876972198486, + "learning_rate": 0.0010127600685127247, + "loss": 1.9413, + "step": 6446 + }, + { + "epoch": 0.6800632911392405, + "grad_norm": 0.4104231297969818, + "learning_rate": 0.0010113553978222192, + "loss": 1.9511, + "step": 6447 + }, + { + "epoch": 0.680168776371308, + "grad_norm": 0.42991772294044495, + "learning_rate": 0.0010099526753717856, + "loss": 1.9415, + "step": 6448 + }, + { + "epoch": 0.6802742616033756, + "grad_norm": 0.4048719108104706, + "learning_rate": 0.0010085518984592678, + "loss": 1.9265, + "step": 6449 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.3823038339614868, + "learning_rate": 0.0010071530643862578, + "loss": 1.9569, + "step": 6450 + }, + { + "epoch": 0.6804852320675105, + "grad_norm": 0.40610557794570923, + "learning_rate": 0.0010057561704580897, + "loss": 1.9465, + "step": 6451 + }, + { + "epoch": 0.6805907172995781, + "grad_norm": 0.43385443091392517, + "learning_rate": 0.001004361213983836, + "loss": 1.9949, + "step": 6452 + }, + { + "epoch": 0.6806962025316455, + "grad_norm": 0.38171622157096863, + "learning_rate": 0.0010029681922762998, + "loss": 1.975, + "step": 6453 + }, + { + "epoch": 0.6808016877637131, + "grad_norm": 0.42303895950317383, + "learning_rate": 0.0010015771026520132, + "loss": 1.9551, + "step": 6454 + }, + { + "epoch": 0.6809071729957806, + "grad_norm": 0.39765527844429016, + "learning_rate": 0.0010001879424312286, + "loss": 1.97, + "step": 6455 + }, + { + "epoch": 0.6810126582278481, + "grad_norm": 0.38662245869636536, + "learning_rate": 0.0009988007089379162, + "loss": 1.9356, + "step": 6456 + }, + { + "epoch": 0.6811181434599156, + "grad_norm": 0.3924289047718048, + "learning_rate": 0.000997415399499757, + "loss": 2.002, + "step": 6457 + }, + { + "epoch": 0.6812236286919832, + "grad_norm": 0.3846360743045807, + "learning_rate": 0.000996032011448139, + "loss": 1.969, + "step": 6458 + }, + { + "epoch": 0.6813291139240506, + "grad_norm": 0.3836338520050049, + "learning_rate": 0.0009946505421181513, + "loss": 1.9892, + "step": 6459 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.36940324306488037, + "learning_rate": 0.000993270988848579, + "loss": 1.9722, + "step": 6460 + }, + { + "epoch": 0.6815400843881857, + "grad_norm": 0.3667450547218323, + "learning_rate": 0.0009918933489818985, + "loss": 1.9646, + "step": 6461 + }, + { + "epoch": 0.6816455696202531, + "grad_norm": 0.4028165638446808, + "learning_rate": 0.000990517619864272, + "loss": 1.967, + "step": 6462 + }, + { + "epoch": 0.6817510548523207, + "grad_norm": 0.3679765462875366, + "learning_rate": 0.0009891437988455425, + "loss": 1.9345, + "step": 6463 + }, + { + "epoch": 0.6818565400843882, + "grad_norm": 0.37121206521987915, + "learning_rate": 0.0009877718832792285, + "loss": 1.9725, + "step": 6464 + }, + { + "epoch": 0.6819620253164557, + "grad_norm": 0.37269943952560425, + "learning_rate": 0.0009864018705225196, + "loss": 1.9677, + "step": 6465 + }, + { + "epoch": 0.6820675105485232, + "grad_norm": 0.38364529609680176, + "learning_rate": 0.0009850337579362701, + "loss": 1.9423, + "step": 6466 + }, + { + "epoch": 0.6821729957805908, + "grad_norm": 0.3665481209754944, + "learning_rate": 0.000983667542884996, + "loss": 1.9448, + "step": 6467 + }, + { + "epoch": 0.6822784810126582, + "grad_norm": 0.40436163544654846, + "learning_rate": 0.000982303222736867, + "loss": 1.9567, + "step": 6468 + }, + { + "epoch": 0.6823839662447257, + "grad_norm": 0.4044664800167084, + "learning_rate": 0.0009809407948637044, + "loss": 1.9581, + "step": 6469 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.3559001684188843, + "learning_rate": 0.0009795802566409742, + "loss": 1.9827, + "step": 6470 + }, + { + "epoch": 0.6825949367088607, + "grad_norm": 0.43288248777389526, + "learning_rate": 0.0009782216054477827, + "loss": 1.9576, + "step": 6471 + }, + { + "epoch": 0.6827004219409283, + "grad_norm": 0.37948474287986755, + "learning_rate": 0.000976864838666871, + "loss": 2.0103, + "step": 6472 + }, + { + "epoch": 0.6828059071729958, + "grad_norm": 0.3313630521297455, + "learning_rate": 0.0009755099536846105, + "loss": 1.9647, + "step": 6473 + }, + { + "epoch": 0.6829113924050633, + "grad_norm": 0.42221692204475403, + "learning_rate": 0.0009741569478909979, + "loss": 1.9539, + "step": 6474 + }, + { + "epoch": 0.6830168776371308, + "grad_norm": 0.38674309849739075, + "learning_rate": 0.0009728058186796492, + "loss": 1.9623, + "step": 6475 + }, + { + "epoch": 0.6831223628691984, + "grad_norm": 0.37859687209129333, + "learning_rate": 0.0009714565634477962, + "loss": 2.0042, + "step": 6476 + }, + { + "epoch": 0.6832278481012658, + "grad_norm": 0.3658261299133301, + "learning_rate": 0.00097010917959628, + "loss": 1.9587, + "step": 6477 + }, + { + "epoch": 0.6833333333333333, + "grad_norm": 0.3749760091304779, + "learning_rate": 0.0009687636645295469, + "loss": 1.9754, + "step": 6478 + }, + { + "epoch": 0.6834388185654009, + "grad_norm": 0.3474961221218109, + "learning_rate": 0.0009674200156556436, + "loss": 1.9582, + "step": 6479 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.4581906795501709, + "learning_rate": 0.0009660782303862109, + "loss": 1.9794, + "step": 6480 + }, + { + "epoch": 0.6836497890295359, + "grad_norm": 0.37834590673446655, + "learning_rate": 0.0009647383061364801, + "loss": 1.9703, + "step": 6481 + }, + { + "epoch": 0.6837552742616034, + "grad_norm": 0.42496103048324585, + "learning_rate": 0.0009634002403252678, + "loss": 1.918, + "step": 6482 + }, + { + "epoch": 0.6838607594936709, + "grad_norm": 0.41952410340309143, + "learning_rate": 0.00096206403037497, + "loss": 1.9092, + "step": 6483 + }, + { + "epoch": 0.6839662447257384, + "grad_norm": 0.41441071033477783, + "learning_rate": 0.000960729673711558, + "loss": 1.9681, + "step": 6484 + }, + { + "epoch": 0.6840717299578059, + "grad_norm": 0.3812636435031891, + "learning_rate": 0.0009593971677645735, + "loss": 1.9545, + "step": 6485 + }, + { + "epoch": 0.6841772151898734, + "grad_norm": 0.36486637592315674, + "learning_rate": 0.0009580665099671228, + "loss": 1.9591, + "step": 6486 + }, + { + "epoch": 0.684282700421941, + "grad_norm": 0.4296589493751526, + "learning_rate": 0.000956737697755873, + "loss": 1.9628, + "step": 6487 + }, + { + "epoch": 0.6843881856540084, + "grad_norm": 0.3650546073913574, + "learning_rate": 0.0009554107285710461, + "loss": 1.9863, + "step": 6488 + }, + { + "epoch": 0.6844936708860759, + "grad_norm": 0.3640323877334595, + "learning_rate": 0.0009540855998564147, + "loss": 1.9699, + "step": 6489 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.38982731103897095, + "learning_rate": 0.0009527623090592963, + "loss": 1.9658, + "step": 6490 + }, + { + "epoch": 0.6847046413502109, + "grad_norm": 0.4158605635166168, + "learning_rate": 0.0009514408536305495, + "loss": 2.02, + "step": 6491 + }, + { + "epoch": 0.6848101265822785, + "grad_norm": 0.37686458230018616, + "learning_rate": 0.0009501212310245682, + "loss": 1.9758, + "step": 6492 + }, + { + "epoch": 0.684915611814346, + "grad_norm": 0.43459662795066833, + "learning_rate": 0.0009488034386992771, + "loss": 1.984, + "step": 6493 + }, + { + "epoch": 0.6850210970464135, + "grad_norm": 0.41662099957466125, + "learning_rate": 0.0009474874741161266, + "loss": 1.9611, + "step": 6494 + }, + { + "epoch": 0.685126582278481, + "grad_norm": 0.3552919924259186, + "learning_rate": 0.0009461733347400879, + "loss": 1.993, + "step": 6495 + }, + { + "epoch": 0.6852320675105485, + "grad_norm": 0.37386468052864075, + "learning_rate": 0.0009448610180396485, + "loss": 1.954, + "step": 6496 + }, + { + "epoch": 0.685337552742616, + "grad_norm": 0.38108670711517334, + "learning_rate": 0.0009435505214868068, + "loss": 1.9882, + "step": 6497 + }, + { + "epoch": 0.6854430379746835, + "grad_norm": 0.3593713045120239, + "learning_rate": 0.0009422418425570675, + "loss": 1.9633, + "step": 6498 + }, + { + "epoch": 0.6855485232067511, + "grad_norm": 0.38260388374328613, + "learning_rate": 0.000940934978729437, + "loss": 1.9847, + "step": 6499 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.38558030128479004, + "learning_rate": 0.0009396299274864177, + "loss": 1.9445, + "step": 6500 + }, + { + "epoch": 0.6857594936708861, + "grad_norm": 0.3933887481689453, + "learning_rate": 0.0009383266863140042, + "loss": 1.9844, + "step": 6501 + }, + { + "epoch": 0.6858649789029536, + "grad_norm": 0.4005863070487976, + "learning_rate": 0.0009370252527016777, + "loss": 1.9764, + "step": 6502 + }, + { + "epoch": 0.685970464135021, + "grad_norm": 0.3841397762298584, + "learning_rate": 0.0009357256241424013, + "loss": 1.9807, + "step": 6503 + }, + { + "epoch": 0.6860759493670886, + "grad_norm": 0.40643489360809326, + "learning_rate": 0.0009344277981326158, + "loss": 1.9745, + "step": 6504 + }, + { + "epoch": 0.6861814345991561, + "grad_norm": 0.4190913736820221, + "learning_rate": 0.0009331317721722339, + "loss": 1.9726, + "step": 6505 + }, + { + "epoch": 0.6862869198312236, + "grad_norm": 0.36897730827331543, + "learning_rate": 0.0009318375437646361, + "loss": 1.9752, + "step": 6506 + }, + { + "epoch": 0.6863924050632911, + "grad_norm": 0.414628803730011, + "learning_rate": 0.0009305451104166652, + "loss": 1.9458, + "step": 6507 + }, + { + "epoch": 0.6864978902953587, + "grad_norm": 0.3786012828350067, + "learning_rate": 0.0009292544696386228, + "loss": 1.9496, + "step": 6508 + }, + { + "epoch": 0.6866033755274261, + "grad_norm": 0.35376158356666565, + "learning_rate": 0.0009279656189442628, + "loss": 1.9774, + "step": 6509 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.41608041524887085, + "learning_rate": 0.0009266785558507877, + "loss": 1.9537, + "step": 6510 + }, + { + "epoch": 0.6868143459915612, + "grad_norm": 0.37436601519584656, + "learning_rate": 0.000925393277878844, + "loss": 1.9561, + "step": 6511 + }, + { + "epoch": 0.6869198312236287, + "grad_norm": 0.4946730434894562, + "learning_rate": 0.0009241097825525162, + "loss": 1.9691, + "step": 6512 + }, + { + "epoch": 0.6870253164556962, + "grad_norm": 0.42039939761161804, + "learning_rate": 0.0009228280673993236, + "loss": 1.9623, + "step": 6513 + }, + { + "epoch": 0.6871308016877637, + "grad_norm": 0.44463205337524414, + "learning_rate": 0.0009215481299502144, + "loss": 1.9755, + "step": 6514 + }, + { + "epoch": 0.6872362869198312, + "grad_norm": 0.43403318524360657, + "learning_rate": 0.0009202699677395614, + "loss": 1.9521, + "step": 6515 + }, + { + "epoch": 0.6873417721518987, + "grad_norm": 0.4381048381328583, + "learning_rate": 0.000918993578305157, + "loss": 1.9462, + "step": 6516 + }, + { + "epoch": 0.6874472573839663, + "grad_norm": 0.4046580493450165, + "learning_rate": 0.0009177189591882088, + "loss": 1.9969, + "step": 6517 + }, + { + "epoch": 0.6875527426160337, + "grad_norm": 0.43172892928123474, + "learning_rate": 0.0009164461079333344, + "loss": 1.9318, + "step": 6518 + }, + { + "epoch": 0.6876582278481013, + "grad_norm": 0.4435008764266968, + "learning_rate": 0.0009151750220885573, + "loss": 1.9374, + "step": 6519 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.4199291169643402, + "learning_rate": 0.0009139056992053016, + "loss": 1.9618, + "step": 6520 + }, + { + "epoch": 0.6878691983122363, + "grad_norm": 0.4413623511791229, + "learning_rate": 0.0009126381368383879, + "loss": 1.9456, + "step": 6521 + }, + { + "epoch": 0.6879746835443038, + "grad_norm": 0.3633035123348236, + "learning_rate": 0.0009113723325460275, + "loss": 1.9356, + "step": 6522 + }, + { + "epoch": 0.6880801687763713, + "grad_norm": 0.406601220369339, + "learning_rate": 0.000910108283889819, + "loss": 1.9112, + "step": 6523 + }, + { + "epoch": 0.6881856540084388, + "grad_norm": 0.4311261773109436, + "learning_rate": 0.0009088459884347425, + "loss": 1.9507, + "step": 6524 + }, + { + "epoch": 0.6882911392405063, + "grad_norm": 0.36875542998313904, + "learning_rate": 0.0009075854437491562, + "loss": 1.9653, + "step": 6525 + }, + { + "epoch": 0.6883966244725739, + "grad_norm": 0.3783077299594879, + "learning_rate": 0.0009063266474047897, + "loss": 1.9475, + "step": 6526 + }, + { + "epoch": 0.6885021097046413, + "grad_norm": 0.4230908751487732, + "learning_rate": 0.0009050695969767418, + "loss": 1.9672, + "step": 6527 + }, + { + "epoch": 0.6886075949367089, + "grad_norm": 0.38168418407440186, + "learning_rate": 0.0009038142900434738, + "loss": 1.9408, + "step": 6528 + }, + { + "epoch": 0.6887130801687764, + "grad_norm": 0.42376580834388733, + "learning_rate": 0.0009025607241868057, + "loss": 1.9466, + "step": 6529 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.4577712416648865, + "learning_rate": 0.000901308896991912, + "loss": 1.9492, + "step": 6530 + }, + { + "epoch": 0.6889240506329114, + "grad_norm": 0.3871624767780304, + "learning_rate": 0.0009000588060473156, + "loss": 1.9153, + "step": 6531 + }, + { + "epoch": 0.689029535864979, + "grad_norm": 0.40937504172325134, + "learning_rate": 0.0008988104489448847, + "loss": 1.9317, + "step": 6532 + }, + { + "epoch": 0.6891350210970464, + "grad_norm": 0.3967224061489105, + "learning_rate": 0.0008975638232798275, + "loss": 1.9268, + "step": 6533 + }, + { + "epoch": 0.6892405063291139, + "grad_norm": 0.4940600097179413, + "learning_rate": 0.0008963189266506873, + "loss": 1.9761, + "step": 6534 + }, + { + "epoch": 0.6893459915611815, + "grad_norm": 0.3638090491294861, + "learning_rate": 0.000895075756659338, + "loss": 1.9899, + "step": 6535 + }, + { + "epoch": 0.6894514767932489, + "grad_norm": 0.48187920451164246, + "learning_rate": 0.0008938343109109804, + "loss": 1.9565, + "step": 6536 + }, + { + "epoch": 0.6895569620253165, + "grad_norm": 0.34743088483810425, + "learning_rate": 0.0008925945870141361, + "loss": 1.974, + "step": 6537 + }, + { + "epoch": 0.689662447257384, + "grad_norm": 0.5002012252807617, + "learning_rate": 0.0008913565825806437, + "loss": 1.9531, + "step": 6538 + }, + { + "epoch": 0.6897679324894515, + "grad_norm": 0.35288140177726746, + "learning_rate": 0.0008901202952256545, + "loss": 1.9745, + "step": 6539 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.4642280638217926, + "learning_rate": 0.000888885722567627, + "loss": 1.9667, + "step": 6540 + }, + { + "epoch": 0.6899789029535865, + "grad_norm": 0.3862091302871704, + "learning_rate": 0.0008876528622283235, + "loss": 1.9523, + "step": 6541 + }, + { + "epoch": 0.690084388185654, + "grad_norm": 0.3980412185192108, + "learning_rate": 0.0008864217118328042, + "loss": 1.9666, + "step": 6542 + }, + { + "epoch": 0.6901898734177215, + "grad_norm": 0.3918437659740448, + "learning_rate": 0.0008851922690094236, + "loss": 1.9441, + "step": 6543 + }, + { + "epoch": 0.6902953586497891, + "grad_norm": 0.48750123381614685, + "learning_rate": 0.0008839645313898255, + "loss": 1.9793, + "step": 6544 + }, + { + "epoch": 0.6904008438818565, + "grad_norm": 0.382351279258728, + "learning_rate": 0.0008827384966089386, + "loss": 1.9649, + "step": 6545 + }, + { + "epoch": 0.6905063291139241, + "grad_norm": 0.5040594935417175, + "learning_rate": 0.0008815141623049723, + "loss": 1.9768, + "step": 6546 + }, + { + "epoch": 0.6906118143459916, + "grad_norm": 0.37332606315612793, + "learning_rate": 0.0008802915261194108, + "loss": 1.973, + "step": 6547 + }, + { + "epoch": 0.690717299578059, + "grad_norm": 0.4704506993293762, + "learning_rate": 0.00087907058569701, + "loss": 1.958, + "step": 6548 + }, + { + "epoch": 0.6908227848101266, + "grad_norm": 0.3723013401031494, + "learning_rate": 0.0008778513386857928, + "loss": 1.9229, + "step": 6549 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.4172382056713104, + "learning_rate": 0.0008766337827370438, + "loss": 1.9258, + "step": 6550 + }, + { + "epoch": 0.6910337552742616, + "grad_norm": 0.41888147592544556, + "learning_rate": 0.0008754179155053053, + "loss": 1.9735, + "step": 6551 + }, + { + "epoch": 0.6911392405063291, + "grad_norm": 0.3575185537338257, + "learning_rate": 0.0008742037346483729, + "loss": 1.9235, + "step": 6552 + }, + { + "epoch": 0.6912447257383966, + "grad_norm": 0.48374301195144653, + "learning_rate": 0.00087299123782729, + "loss": 1.9521, + "step": 6553 + }, + { + "epoch": 0.6913502109704641, + "grad_norm": 0.36349597573280334, + "learning_rate": 0.0008717804227063454, + "loss": 1.9582, + "step": 6554 + }, + { + "epoch": 0.6914556962025317, + "grad_norm": 0.4689796566963196, + "learning_rate": 0.0008705712869530661, + "loss": 1.9537, + "step": 6555 + }, + { + "epoch": 0.6915611814345991, + "grad_norm": 0.3837205469608307, + "learning_rate": 0.0008693638282382152, + "loss": 1.9624, + "step": 6556 + }, + { + "epoch": 0.6916666666666667, + "grad_norm": 0.4836702346801758, + "learning_rate": 0.0008681580442357857, + "loss": 1.9555, + "step": 6557 + }, + { + "epoch": 0.6917721518987342, + "grad_norm": 0.40580135583877563, + "learning_rate": 0.000866953932622997, + "loss": 1.953, + "step": 6558 + }, + { + "epoch": 0.6918776371308016, + "grad_norm": 0.4260753393173218, + "learning_rate": 0.0008657514910802905, + "loss": 1.9462, + "step": 6559 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.40578192472457886, + "learning_rate": 0.000864550717291324, + "loss": 1.9369, + "step": 6560 + }, + { + "epoch": 0.6920886075949367, + "grad_norm": 0.5059919357299805, + "learning_rate": 0.0008633516089429683, + "loss": 1.968, + "step": 6561 + }, + { + "epoch": 0.6921940928270042, + "grad_norm": 0.3835734724998474, + "learning_rate": 0.0008621541637253029, + "loss": 1.9275, + "step": 6562 + }, + { + "epoch": 0.6922995780590717, + "grad_norm": 0.4480023980140686, + "learning_rate": 0.0008609583793316104, + "loss": 1.9233, + "step": 6563 + }, + { + "epoch": 0.6924050632911393, + "grad_norm": 0.4164978563785553, + "learning_rate": 0.0008597642534583734, + "loss": 1.9383, + "step": 6564 + }, + { + "epoch": 0.6925105485232067, + "grad_norm": 0.4328394830226898, + "learning_rate": 0.0008585717838052689, + "loss": 1.961, + "step": 6565 + }, + { + "epoch": 0.6926160337552743, + "grad_norm": 0.4429185390472412, + "learning_rate": 0.0008573809680751646, + "loss": 1.9391, + "step": 6566 + }, + { + "epoch": 0.6927215189873418, + "grad_norm": 0.4230235815048218, + "learning_rate": 0.0008561918039741143, + "loss": 1.9741, + "step": 6567 + }, + { + "epoch": 0.6928270042194092, + "grad_norm": 0.40176889300346375, + "learning_rate": 0.0008550042892113534, + "loss": 1.9479, + "step": 6568 + }, + { + "epoch": 0.6929324894514768, + "grad_norm": 0.4032588005065918, + "learning_rate": 0.0008538184214992943, + "loss": 1.962, + "step": 6569 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.3775917589664459, + "learning_rate": 0.0008526341985535229, + "loss": 1.9522, + "step": 6570 + }, + { + "epoch": 0.6931434599156118, + "grad_norm": 0.45628902316093445, + "learning_rate": 0.0008514516180927928, + "loss": 1.9557, + "step": 6571 + }, + { + "epoch": 0.6932489451476793, + "grad_norm": 0.40616273880004883, + "learning_rate": 0.0008502706778390216, + "loss": 1.9509, + "step": 6572 + }, + { + "epoch": 0.6933544303797469, + "grad_norm": 0.46616142988204956, + "learning_rate": 0.0008490913755172875, + "loss": 1.9281, + "step": 6573 + }, + { + "epoch": 0.6934599156118143, + "grad_norm": 0.3596310615539551, + "learning_rate": 0.0008479137088558226, + "loss": 1.9555, + "step": 6574 + }, + { + "epoch": 0.6935654008438819, + "grad_norm": 0.40900617837905884, + "learning_rate": 0.0008467376755860108, + "loss": 1.952, + "step": 6575 + }, + { + "epoch": 0.6936708860759494, + "grad_norm": 0.3822267949581146, + "learning_rate": 0.0008455632734423824, + "loss": 1.8975, + "step": 6576 + }, + { + "epoch": 0.6937763713080168, + "grad_norm": 0.3650358021259308, + "learning_rate": 0.0008443905001626097, + "loss": 1.9417, + "step": 6577 + }, + { + "epoch": 0.6938818565400844, + "grad_norm": 0.4288491904735565, + "learning_rate": 0.0008432193534875027, + "loss": 1.9447, + "step": 6578 + }, + { + "epoch": 0.6939873417721519, + "grad_norm": 0.402157187461853, + "learning_rate": 0.0008420498311610049, + "loss": 1.9359, + "step": 6579 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.40396469831466675, + "learning_rate": 0.0008408819309301891, + "loss": 1.9685, + "step": 6580 + }, + { + "epoch": 0.6941983122362869, + "grad_norm": 0.3716714382171631, + "learning_rate": 0.0008397156505452524, + "loss": 1.9329, + "step": 6581 + }, + { + "epoch": 0.6943037974683545, + "grad_norm": 0.4082616865634918, + "learning_rate": 0.0008385509877595129, + "loss": 1.949, + "step": 6582 + }, + { + "epoch": 0.6944092827004219, + "grad_norm": 0.3977774679660797, + "learning_rate": 0.0008373879403294043, + "loss": 1.9469, + "step": 6583 + }, + { + "epoch": 0.6945147679324895, + "grad_norm": 0.39873984456062317, + "learning_rate": 0.0008362265060144721, + "loss": 1.9379, + "step": 6584 + }, + { + "epoch": 0.694620253164557, + "grad_norm": 0.39335939288139343, + "learning_rate": 0.0008350666825773697, + "loss": 1.9722, + "step": 6585 + }, + { + "epoch": 0.6947257383966244, + "grad_norm": 0.39924463629722595, + "learning_rate": 0.0008339084677838532, + "loss": 1.9285, + "step": 6586 + }, + { + "epoch": 0.694831223628692, + "grad_norm": 0.378814697265625, + "learning_rate": 0.0008327518594027778, + "loss": 1.9619, + "step": 6587 + }, + { + "epoch": 0.6949367088607595, + "grad_norm": 0.383346289396286, + "learning_rate": 0.0008315968552060928, + "loss": 1.9642, + "step": 6588 + }, + { + "epoch": 0.695042194092827, + "grad_norm": 0.3509616255760193, + "learning_rate": 0.0008304434529688382, + "loss": 1.9595, + "step": 6589 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.34668466448783875, + "learning_rate": 0.0008292916504691397, + "loss": 1.9885, + "step": 6590 + }, + { + "epoch": 0.6952531645569621, + "grad_norm": 0.3613523840904236, + "learning_rate": 0.0008281414454882051, + "loss": 1.9373, + "step": 6591 + }, + { + "epoch": 0.6953586497890295, + "grad_norm": 0.418025940656662, + "learning_rate": 0.000826992835810319, + "loss": 1.9731, + "step": 6592 + }, + { + "epoch": 0.695464135021097, + "grad_norm": 0.3715098798274994, + "learning_rate": 0.0008258458192228395, + "loss": 1.9663, + "step": 6593 + }, + { + "epoch": 0.6955696202531646, + "grad_norm": 0.38601991534233093, + "learning_rate": 0.0008247003935161936, + "loss": 1.9318, + "step": 6594 + }, + { + "epoch": 0.695675105485232, + "grad_norm": 0.4198477268218994, + "learning_rate": 0.0008235565564838727, + "loss": 1.9334, + "step": 6595 + }, + { + "epoch": 0.6957805907172996, + "grad_norm": 0.39717355370521545, + "learning_rate": 0.0008224143059224287, + "loss": 1.9779, + "step": 6596 + }, + { + "epoch": 0.6958860759493671, + "grad_norm": 0.39235326647758484, + "learning_rate": 0.0008212736396314697, + "loss": 1.9757, + "step": 6597 + }, + { + "epoch": 0.6959915611814346, + "grad_norm": 0.352752685546875, + "learning_rate": 0.0008201345554136556, + "loss": 1.9505, + "step": 6598 + }, + { + "epoch": 0.6960970464135021, + "grad_norm": 0.367715984582901, + "learning_rate": 0.0008189970510746938, + "loss": 1.9579, + "step": 6599 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.3793562650680542, + "learning_rate": 0.0008178611244233354, + "loss": 1.9661, + "step": 6600 + }, + { + "epoch": 0.6963080168776371, + "grad_norm": 0.3596304953098297, + "learning_rate": 0.0008167267732713704, + "loss": 1.9474, + "step": 6601 + }, + { + "epoch": 0.6964135021097047, + "grad_norm": 0.4011725187301636, + "learning_rate": 0.0008155939954336242, + "loss": 1.9505, + "step": 6602 + }, + { + "epoch": 0.6965189873417722, + "grad_norm": 0.36155903339385986, + "learning_rate": 0.0008144627887279526, + "loss": 1.9558, + "step": 6603 + }, + { + "epoch": 0.6966244725738396, + "grad_norm": 0.4029024839401245, + "learning_rate": 0.0008133331509752381, + "loss": 1.9651, + "step": 6604 + }, + { + "epoch": 0.6967299578059072, + "grad_norm": 0.3965834677219391, + "learning_rate": 0.0008122050799993857, + "loss": 1.9591, + "step": 6605 + }, + { + "epoch": 0.6968354430379747, + "grad_norm": 0.3624008595943451, + "learning_rate": 0.0008110785736273183, + "loss": 1.9177, + "step": 6606 + }, + { + "epoch": 0.6969409282700422, + "grad_norm": 0.4326598346233368, + "learning_rate": 0.0008099536296889731, + "loss": 1.9562, + "step": 6607 + }, + { + "epoch": 0.6970464135021097, + "grad_norm": 0.36070647835731506, + "learning_rate": 0.0008088302460172971, + "loss": 1.9073, + "step": 6608 + }, + { + "epoch": 0.6971518987341773, + "grad_norm": 0.4294513761997223, + "learning_rate": 0.0008077084204482425, + "loss": 1.9352, + "step": 6609 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.4008948504924774, + "learning_rate": 0.0008065881508207637, + "loss": 1.9992, + "step": 6610 + }, + { + "epoch": 0.6973628691983123, + "grad_norm": 0.38275575637817383, + "learning_rate": 0.0008054694349768117, + "loss": 1.9125, + "step": 6611 + }, + { + "epoch": 0.6974683544303798, + "grad_norm": 0.37402161955833435, + "learning_rate": 0.000804352270761331, + "loss": 1.9363, + "step": 6612 + }, + { + "epoch": 0.6975738396624472, + "grad_norm": 0.37463709712028503, + "learning_rate": 0.0008032366560222553, + "loss": 1.9995, + "step": 6613 + }, + { + "epoch": 0.6976793248945148, + "grad_norm": 0.439776748418808, + "learning_rate": 0.0008021225886105027, + "loss": 1.9435, + "step": 6614 + }, + { + "epoch": 0.6977848101265823, + "grad_norm": 0.34077197313308716, + "learning_rate": 0.0008010100663799726, + "loss": 1.9352, + "step": 6615 + }, + { + "epoch": 0.6978902953586498, + "grad_norm": 0.3924694061279297, + "learning_rate": 0.0007998990871875402, + "loss": 1.9622, + "step": 6616 + }, + { + "epoch": 0.6979957805907173, + "grad_norm": 0.34820157289505005, + "learning_rate": 0.0007987896488930539, + "loss": 1.9572, + "step": 6617 + }, + { + "epoch": 0.6981012658227848, + "grad_norm": 0.33892008662223816, + "learning_rate": 0.0007976817493593302, + "loss": 1.9484, + "step": 6618 + }, + { + "epoch": 0.6982067510548523, + "grad_norm": 0.3890441060066223, + "learning_rate": 0.0007965753864521494, + "loss": 1.9444, + "step": 6619 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.3504222631454468, + "learning_rate": 0.0007954705580402523, + "loss": 1.9381, + "step": 6620 + }, + { + "epoch": 0.6984177215189873, + "grad_norm": 0.35237330198287964, + "learning_rate": 0.0007943672619953359, + "loss": 1.9409, + "step": 6621 + }, + { + "epoch": 0.6985232067510548, + "grad_norm": 0.33688294887542725, + "learning_rate": 0.0007932654961920488, + "loss": 1.9424, + "step": 6622 + }, + { + "epoch": 0.6986286919831224, + "grad_norm": 0.3547237813472748, + "learning_rate": 0.0007921652585079873, + "loss": 1.9382, + "step": 6623 + }, + { + "epoch": 0.6987341772151898, + "grad_norm": 0.36905285716056824, + "learning_rate": 0.0007910665468236916, + "loss": 1.9517, + "step": 6624 + }, + { + "epoch": 0.6988396624472574, + "grad_norm": 0.3564024269580841, + "learning_rate": 0.0007899693590226415, + "loss": 1.9555, + "step": 6625 + }, + { + "epoch": 0.6989451476793249, + "grad_norm": 0.36301353573799133, + "learning_rate": 0.0007888736929912525, + "loss": 1.9183, + "step": 6626 + }, + { + "epoch": 0.6990506329113924, + "grad_norm": 0.3822649121284485, + "learning_rate": 0.0007877795466188712, + "loss": 1.9351, + "step": 6627 + }, + { + "epoch": 0.6991561181434599, + "grad_norm": 0.32734474539756775, + "learning_rate": 0.0007866869177977721, + "loss": 1.9565, + "step": 6628 + }, + { + "epoch": 0.6992616033755275, + "grad_norm": 0.36776626110076904, + "learning_rate": 0.0007855958044231527, + "loss": 1.9243, + "step": 6629 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.3583080470561981, + "learning_rate": 0.0007845062043931298, + "loss": 1.9393, + "step": 6630 + }, + { + "epoch": 0.6994725738396624, + "grad_norm": 0.4565315842628479, + "learning_rate": 0.0007834181156087356, + "loss": 1.9553, + "step": 6631 + }, + { + "epoch": 0.69957805907173, + "grad_norm": 0.41604310274124146, + "learning_rate": 0.0007823315359739137, + "loss": 1.9381, + "step": 6632 + }, + { + "epoch": 0.6996835443037974, + "grad_norm": 0.3954734802246094, + "learning_rate": 0.0007812464633955144, + "loss": 1.9554, + "step": 6633 + }, + { + "epoch": 0.699789029535865, + "grad_norm": 0.35993361473083496, + "learning_rate": 0.0007801628957832918, + "loss": 1.971, + "step": 6634 + }, + { + "epoch": 0.6998945147679325, + "grad_norm": 0.37916892766952515, + "learning_rate": 0.0007790808310498984, + "loss": 1.9534, + "step": 6635 + }, + { + "epoch": 0.7, + "grad_norm": 0.3737298846244812, + "learning_rate": 0.0007780002671108819, + "loss": 1.9291, + "step": 6636 + }, + { + "epoch": 0.7001054852320675, + "grad_norm": 0.3523250222206116, + "learning_rate": 0.0007769212018846818, + "loss": 1.9565, + "step": 6637 + }, + { + "epoch": 0.700210970464135, + "grad_norm": 0.41399484872817993, + "learning_rate": 0.0007758436332926237, + "loss": 1.9579, + "step": 6638 + }, + { + "epoch": 0.7003164556962025, + "grad_norm": 0.3734181225299835, + "learning_rate": 0.000774767559258917, + "loss": 1.9364, + "step": 6639 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.3965725004673004, + "learning_rate": 0.0007736929777106497, + "loss": 1.9464, + "step": 6640 + }, + { + "epoch": 0.7005274261603376, + "grad_norm": 0.37034326791763306, + "learning_rate": 0.0007726198865777852, + "loss": 1.9456, + "step": 6641 + }, + { + "epoch": 0.700632911392405, + "grad_norm": 0.36579567193984985, + "learning_rate": 0.000771548283793158, + "loss": 1.9299, + "step": 6642 + }, + { + "epoch": 0.7007383966244726, + "grad_norm": 0.381375253200531, + "learning_rate": 0.000770478167292469, + "loss": 1.9575, + "step": 6643 + }, + { + "epoch": 0.7008438818565401, + "grad_norm": 0.3974554240703583, + "learning_rate": 0.0007694095350142834, + "loss": 1.9064, + "step": 6644 + }, + { + "epoch": 0.7009493670886076, + "grad_norm": 0.39895349740982056, + "learning_rate": 0.0007683423849000246, + "loss": 1.9232, + "step": 6645 + }, + { + "epoch": 0.7010548523206751, + "grad_norm": 0.3761448264122009, + "learning_rate": 0.0007672767148939714, + "loss": 1.9217, + "step": 6646 + }, + { + "epoch": 0.7011603375527427, + "grad_norm": 0.3924572467803955, + "learning_rate": 0.0007662125229432543, + "loss": 1.9527, + "step": 6647 + }, + { + "epoch": 0.7012658227848101, + "grad_norm": 0.37737181782722473, + "learning_rate": 0.0007651498069978504, + "loss": 1.9813, + "step": 6648 + }, + { + "epoch": 0.7013713080168776, + "grad_norm": 0.37848880887031555, + "learning_rate": 0.0007640885650105806, + "loss": 1.9192, + "step": 6649 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.3911510407924652, + "learning_rate": 0.000763028794937105, + "loss": 1.9569, + "step": 6650 + }, + { + "epoch": 0.7015822784810126, + "grad_norm": 0.38837355375289917, + "learning_rate": 0.0007619704947359191, + "loss": 1.9268, + "step": 6651 + }, + { + "epoch": 0.7016877637130802, + "grad_norm": 0.3962518870830536, + "learning_rate": 0.0007609136623683499, + "loss": 1.9678, + "step": 6652 + }, + { + "epoch": 0.7017932489451477, + "grad_norm": 0.33892473578453064, + "learning_rate": 0.0007598582957985525, + "loss": 1.9353, + "step": 6653 + }, + { + "epoch": 0.7018987341772152, + "grad_norm": 0.37119802832603455, + "learning_rate": 0.000758804392993505, + "loss": 1.9406, + "step": 6654 + }, + { + "epoch": 0.7020042194092827, + "grad_norm": 0.3315490484237671, + "learning_rate": 0.0007577519519230052, + "loss": 1.9397, + "step": 6655 + }, + { + "epoch": 0.7021097046413503, + "grad_norm": 0.34352996945381165, + "learning_rate": 0.0007567009705596673, + "loss": 1.9197, + "step": 6656 + }, + { + "epoch": 0.7022151898734177, + "grad_norm": 0.39824652671813965, + "learning_rate": 0.0007556514468789169, + "loss": 1.9441, + "step": 6657 + }, + { + "epoch": 0.7023206751054852, + "grad_norm": 0.37810850143432617, + "learning_rate": 0.0007546033788589883, + "loss": 1.9769, + "step": 6658 + }, + { + "epoch": 0.7024261603375528, + "grad_norm": 0.38909125328063965, + "learning_rate": 0.0007535567644809191, + "loss": 1.9346, + "step": 6659 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.3749707341194153, + "learning_rate": 0.0007525116017285476, + "loss": 1.9289, + "step": 6660 + }, + { + "epoch": 0.7026371308016878, + "grad_norm": 0.41756850481033325, + "learning_rate": 0.0007514678885885087, + "loss": 1.9088, + "step": 6661 + }, + { + "epoch": 0.7027426160337553, + "grad_norm": 0.3796330988407135, + "learning_rate": 0.000750425623050229, + "loss": 1.9438, + "step": 6662 + }, + { + "epoch": 0.7028481012658228, + "grad_norm": 0.3892050087451935, + "learning_rate": 0.0007493848031059247, + "loss": 1.9324, + "step": 6663 + }, + { + "epoch": 0.7029535864978903, + "grad_norm": 0.40998223423957825, + "learning_rate": 0.0007483454267505959, + "loss": 1.9149, + "step": 6664 + }, + { + "epoch": 0.7030590717299579, + "grad_norm": 0.4410995841026306, + "learning_rate": 0.000747307491982024, + "loss": 1.951, + "step": 6665 + }, + { + "epoch": 0.7031645569620253, + "grad_norm": 0.3676208257675171, + "learning_rate": 0.0007462709968007675, + "loss": 1.9186, + "step": 6666 + }, + { + "epoch": 0.7032700421940928, + "grad_norm": 0.38988247513771057, + "learning_rate": 0.0007452359392101578, + "loss": 1.934, + "step": 6667 + }, + { + "epoch": 0.7033755274261604, + "grad_norm": 0.4145175516605377, + "learning_rate": 0.0007442023172162958, + "loss": 1.9354, + "step": 6668 + }, + { + "epoch": 0.7034810126582278, + "grad_norm": 0.38567057251930237, + "learning_rate": 0.0007431701288280478, + "loss": 1.9561, + "step": 6669 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.4250597357749939, + "learning_rate": 0.0007421393720570417, + "loss": 1.9477, + "step": 6670 + }, + { + "epoch": 0.7036919831223629, + "grad_norm": 0.3758053183555603, + "learning_rate": 0.0007411100449176633, + "loss": 1.9556, + "step": 6671 + }, + { + "epoch": 0.7037974683544304, + "grad_norm": 0.3511640727519989, + "learning_rate": 0.0007400821454270525, + "loss": 1.9535, + "step": 6672 + }, + { + "epoch": 0.7039029535864979, + "grad_norm": 0.3859745264053345, + "learning_rate": 0.0007390556716050993, + "loss": 1.951, + "step": 6673 + }, + { + "epoch": 0.7040084388185655, + "grad_norm": 0.36620068550109863, + "learning_rate": 0.0007380306214744398, + "loss": 1.9778, + "step": 6674 + }, + { + "epoch": 0.7041139240506329, + "grad_norm": 0.3675137460231781, + "learning_rate": 0.000737006993060453, + "loss": 1.9227, + "step": 6675 + }, + { + "epoch": 0.7042194092827004, + "grad_norm": 0.38119766116142273, + "learning_rate": 0.0007359847843912564, + "loss": 1.9861, + "step": 6676 + }, + { + "epoch": 0.704324894514768, + "grad_norm": 0.3428940773010254, + "learning_rate": 0.0007349639934977029, + "loss": 1.9303, + "step": 6677 + }, + { + "epoch": 0.7044303797468354, + "grad_norm": 0.41944044828414917, + "learning_rate": 0.0007339446184133759, + "loss": 1.9608, + "step": 6678 + }, + { + "epoch": 0.704535864978903, + "grad_norm": 0.3518962860107422, + "learning_rate": 0.0007329266571745864, + "loss": 1.915, + "step": 6679 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.37608057260513306, + "learning_rate": 0.0007319101078203694, + "loss": 1.9124, + "step": 6680 + }, + { + "epoch": 0.704746835443038, + "grad_norm": 0.3630862236022949, + "learning_rate": 0.0007308949683924791, + "loss": 1.9257, + "step": 6681 + }, + { + "epoch": 0.7048523206751055, + "grad_norm": 0.3465906083583832, + "learning_rate": 0.0007298812369353862, + "loss": 1.9204, + "step": 6682 + }, + { + "epoch": 0.7049578059071729, + "grad_norm": 0.42490002512931824, + "learning_rate": 0.0007288689114962731, + "loss": 1.9235, + "step": 6683 + }, + { + "epoch": 0.7050632911392405, + "grad_norm": 0.3451535105705261, + "learning_rate": 0.0007278579901250316, + "loss": 1.9065, + "step": 6684 + }, + { + "epoch": 0.705168776371308, + "grad_norm": 0.38303545117378235, + "learning_rate": 0.0007268484708742574, + "loss": 1.9483, + "step": 6685 + }, + { + "epoch": 0.7052742616033755, + "grad_norm": 0.35104256868362427, + "learning_rate": 0.0007258403517992476, + "loss": 1.947, + "step": 6686 + }, + { + "epoch": 0.705379746835443, + "grad_norm": 0.4041435420513153, + "learning_rate": 0.0007248336309579965, + "loss": 1.9396, + "step": 6687 + }, + { + "epoch": 0.7054852320675106, + "grad_norm": 0.3606736660003662, + "learning_rate": 0.0007238283064111917, + "loss": 1.9217, + "step": 6688 + }, + { + "epoch": 0.705590717299578, + "grad_norm": 0.34917086362838745, + "learning_rate": 0.0007228243762222109, + "loss": 1.9646, + "step": 6689 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.3752404749393463, + "learning_rate": 0.0007218218384571178, + "loss": 1.9232, + "step": 6690 + }, + { + "epoch": 0.7058016877637131, + "grad_norm": 0.3757127523422241, + "learning_rate": 0.000720820691184658, + "loss": 1.9536, + "step": 6691 + }, + { + "epoch": 0.7059071729957805, + "grad_norm": 0.37221112847328186, + "learning_rate": 0.0007198209324762563, + "loss": 1.9449, + "step": 6692 + }, + { + "epoch": 0.7060126582278481, + "grad_norm": 0.3941373825073242, + "learning_rate": 0.0007188225604060119, + "loss": 1.9543, + "step": 6693 + }, + { + "epoch": 0.7061181434599156, + "grad_norm": 0.3477690517902374, + "learning_rate": 0.0007178255730506955, + "loss": 1.9439, + "step": 6694 + }, + { + "epoch": 0.7062236286919831, + "grad_norm": 0.35666608810424805, + "learning_rate": 0.0007168299684897451, + "loss": 1.9286, + "step": 6695 + }, + { + "epoch": 0.7063291139240506, + "grad_norm": 0.3602604866027832, + "learning_rate": 0.0007158357448052624, + "loss": 1.9385, + "step": 6696 + }, + { + "epoch": 0.7064345991561182, + "grad_norm": 0.37668660283088684, + "learning_rate": 0.0007148429000820094, + "loss": 1.9152, + "step": 6697 + }, + { + "epoch": 0.7065400843881856, + "grad_norm": 0.37081390619277954, + "learning_rate": 0.0007138514324074042, + "loss": 1.9415, + "step": 6698 + }, + { + "epoch": 0.7066455696202532, + "grad_norm": 0.346884548664093, + "learning_rate": 0.0007128613398715179, + "loss": 1.9034, + "step": 6699 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.34316352009773254, + "learning_rate": 0.0007118726205670703, + "loss": 1.9142, + "step": 6700 + }, + { + "epoch": 0.7068565400843881, + "grad_norm": 0.3471775949001312, + "learning_rate": 0.0007108852725894269, + "loss": 1.9667, + "step": 6701 + }, + { + "epoch": 0.7069620253164557, + "grad_norm": 0.35832661390304565, + "learning_rate": 0.0007098992940365947, + "loss": 1.9448, + "step": 6702 + }, + { + "epoch": 0.7070675105485232, + "grad_norm": 0.40340620279312134, + "learning_rate": 0.0007089146830092185, + "loss": 1.9412, + "step": 6703 + }, + { + "epoch": 0.7071729957805907, + "grad_norm": 0.3653823733329773, + "learning_rate": 0.0007079314376105778, + "loss": 1.9192, + "step": 6704 + }, + { + "epoch": 0.7072784810126582, + "grad_norm": 0.44821128249168396, + "learning_rate": 0.0007069495559465826, + "loss": 1.9483, + "step": 6705 + }, + { + "epoch": 0.7073839662447258, + "grad_norm": 0.3491039276123047, + "learning_rate": 0.0007059690361257701, + "loss": 1.9351, + "step": 6706 + }, + { + "epoch": 0.7074894514767932, + "grad_norm": 0.3893064856529236, + "learning_rate": 0.0007049898762593007, + "loss": 1.9463, + "step": 6707 + }, + { + "epoch": 0.7075949367088608, + "grad_norm": 0.35464584827423096, + "learning_rate": 0.0007040120744609548, + "loss": 1.9116, + "step": 6708 + }, + { + "epoch": 0.7077004219409283, + "grad_norm": 0.34953901171684265, + "learning_rate": 0.0007030356288471288, + "loss": 1.9503, + "step": 6709 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.3695349097251892, + "learning_rate": 0.0007020605375368316, + "loss": 1.9046, + "step": 6710 + }, + { + "epoch": 0.7079113924050633, + "grad_norm": 0.3735077977180481, + "learning_rate": 0.000701086798651681, + "loss": 1.8977, + "step": 6711 + }, + { + "epoch": 0.7080168776371308, + "grad_norm": 0.40044769644737244, + "learning_rate": 0.0007001144103159, + "loss": 1.9319, + "step": 6712 + }, + { + "epoch": 0.7081223628691983, + "grad_norm": 0.3631187975406647, + "learning_rate": 0.0006991433706563135, + "loss": 1.9408, + "step": 6713 + }, + { + "epoch": 0.7082278481012658, + "grad_norm": 0.37548330426216125, + "learning_rate": 0.0006981736778023443, + "loss": 1.942, + "step": 6714 + }, + { + "epoch": 0.7083333333333334, + "grad_norm": 0.39465761184692383, + "learning_rate": 0.0006972053298860092, + "loss": 1.8815, + "step": 6715 + }, + { + "epoch": 0.7084388185654008, + "grad_norm": 0.4171413779258728, + "learning_rate": 0.0006962383250419168, + "loss": 1.9118, + "step": 6716 + }, + { + "epoch": 0.7085443037974684, + "grad_norm": 0.3886035978794098, + "learning_rate": 0.0006952726614072621, + "loss": 1.9656, + "step": 6717 + }, + { + "epoch": 0.7086497890295359, + "grad_norm": 0.3800443410873413, + "learning_rate": 0.0006943083371218242, + "loss": 1.9205, + "step": 6718 + }, + { + "epoch": 0.7087552742616033, + "grad_norm": 0.3891938328742981, + "learning_rate": 0.0006933453503279619, + "loss": 1.9418, + "step": 6719 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.3768191337585449, + "learning_rate": 0.000692383699170611, + "loss": 1.9457, + "step": 6720 + }, + { + "epoch": 0.7089662447257384, + "grad_norm": 0.404889851808548, + "learning_rate": 0.0006914233817972798, + "loss": 1.9463, + "step": 6721 + }, + { + "epoch": 0.7090717299578059, + "grad_norm": 0.3762191832065582, + "learning_rate": 0.0006904643963580462, + "loss": 1.9447, + "step": 6722 + }, + { + "epoch": 0.7091772151898734, + "grad_norm": 0.442758709192276, + "learning_rate": 0.0006895067410055538, + "loss": 1.933, + "step": 6723 + }, + { + "epoch": 0.709282700421941, + "grad_norm": 0.4380684494972229, + "learning_rate": 0.0006885504138950082, + "loss": 1.9579, + "step": 6724 + }, + { + "epoch": 0.7093881856540084, + "grad_norm": 0.46540001034736633, + "learning_rate": 0.0006875954131841743, + "loss": 1.9397, + "step": 6725 + }, + { + "epoch": 0.709493670886076, + "grad_norm": 0.4272436499595642, + "learning_rate": 0.0006866417370333716, + "loss": 1.9378, + "step": 6726 + }, + { + "epoch": 0.7095991561181435, + "grad_norm": 0.40064412355422974, + "learning_rate": 0.0006856893836054712, + "loss": 1.9423, + "step": 6727 + }, + { + "epoch": 0.7097046413502109, + "grad_norm": 0.3635590076446533, + "learning_rate": 0.0006847383510658927, + "loss": 1.9501, + "step": 6728 + }, + { + "epoch": 0.7098101265822785, + "grad_norm": 0.3532469868659973, + "learning_rate": 0.0006837886375825994, + "loss": 1.9166, + "step": 6729 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.3467662036418915, + "learning_rate": 0.0006828402413260966, + "loss": 1.9087, + "step": 6730 + }, + { + "epoch": 0.7100210970464135, + "grad_norm": 0.3520323932170868, + "learning_rate": 0.0006818931604694261, + "loss": 1.9433, + "step": 6731 + }, + { + "epoch": 0.710126582278481, + "grad_norm": 0.39186418056488037, + "learning_rate": 0.0006809473931881645, + "loss": 1.9336, + "step": 6732 + }, + { + "epoch": 0.7102320675105486, + "grad_norm": 0.33142638206481934, + "learning_rate": 0.0006800029376604181, + "loss": 1.9412, + "step": 6733 + }, + { + "epoch": 0.710337552742616, + "grad_norm": 0.34664425253868103, + "learning_rate": 0.0006790597920668204, + "loss": 1.918, + "step": 6734 + }, + { + "epoch": 0.7104430379746836, + "grad_norm": 0.3558906614780426, + "learning_rate": 0.0006781179545905287, + "loss": 1.9294, + "step": 6735 + }, + { + "epoch": 0.7105485232067511, + "grad_norm": 0.36291342973709106, + "learning_rate": 0.0006771774234172195, + "loss": 1.9487, + "step": 6736 + }, + { + "epoch": 0.7106540084388185, + "grad_norm": 0.3546764552593231, + "learning_rate": 0.0006762381967350861, + "loss": 1.9205, + "step": 6737 + }, + { + "epoch": 0.7107594936708861, + "grad_norm": 0.4029354751110077, + "learning_rate": 0.0006753002727348349, + "loss": 1.9264, + "step": 6738 + }, + { + "epoch": 0.7108649789029536, + "grad_norm": 0.3454131782054901, + "learning_rate": 0.0006743636496096813, + "loss": 1.9503, + "step": 6739 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.3800186216831207, + "learning_rate": 0.0006734283255553471, + "loss": 1.9215, + "step": 6740 + }, + { + "epoch": 0.7110759493670886, + "grad_norm": 0.40414509177207947, + "learning_rate": 0.0006724942987700563, + "loss": 1.9248, + "step": 6741 + }, + { + "epoch": 0.7111814345991562, + "grad_norm": 0.3850378692150116, + "learning_rate": 0.0006715615674545319, + "loss": 1.9479, + "step": 6742 + }, + { + "epoch": 0.7112869198312236, + "grad_norm": 0.43503767251968384, + "learning_rate": 0.0006706301298119925, + "loss": 1.9213, + "step": 6743 + }, + { + "epoch": 0.7113924050632912, + "grad_norm": 0.3510148227214813, + "learning_rate": 0.0006696999840481491, + "loss": 1.9172, + "step": 6744 + }, + { + "epoch": 0.7114978902953587, + "grad_norm": 0.3646567463874817, + "learning_rate": 0.0006687711283712009, + "loss": 1.9693, + "step": 6745 + }, + { + "epoch": 0.7116033755274261, + "grad_norm": 0.3914799094200134, + "learning_rate": 0.0006678435609918323, + "loss": 1.9613, + "step": 6746 + }, + { + "epoch": 0.7117088607594937, + "grad_norm": 0.35108572244644165, + "learning_rate": 0.0006669172801232098, + "loss": 1.9557, + "step": 6747 + }, + { + "epoch": 0.7118143459915611, + "grad_norm": 0.38031086325645447, + "learning_rate": 0.0006659922839809779, + "loss": 1.9375, + "step": 6748 + }, + { + "epoch": 0.7119198312236287, + "grad_norm": 0.3619939386844635, + "learning_rate": 0.0006650685707832559, + "loss": 1.9677, + "step": 6749 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.3756372332572937, + "learning_rate": 0.0006641461387506347, + "loss": 1.9165, + "step": 6750 + }, + { + "epoch": 0.7121308016877637, + "grad_norm": 0.346809983253479, + "learning_rate": 0.0006632249861061732, + "loss": 1.9088, + "step": 6751 + }, + { + "epoch": 0.7122362869198312, + "grad_norm": 0.3643167316913605, + "learning_rate": 0.0006623051110753947, + "loss": 1.9182, + "step": 6752 + }, + { + "epoch": 0.7123417721518988, + "grad_norm": 0.36768513917922974, + "learning_rate": 0.0006613865118862837, + "loss": 1.9345, + "step": 6753 + }, + { + "epoch": 0.7124472573839662, + "grad_norm": 0.3619231879711151, + "learning_rate": 0.0006604691867692828, + "loss": 1.9655, + "step": 6754 + }, + { + "epoch": 0.7125527426160337, + "grad_norm": 0.3877890110015869, + "learning_rate": 0.0006595531339572881, + "loss": 1.9537, + "step": 6755 + }, + { + "epoch": 0.7126582278481013, + "grad_norm": 0.37325364351272583, + "learning_rate": 0.0006586383516856475, + "loss": 1.9065, + "step": 6756 + }, + { + "epoch": 0.7127637130801687, + "grad_norm": 0.34923622012138367, + "learning_rate": 0.000657724838192156, + "loss": 1.9493, + "step": 6757 + }, + { + "epoch": 0.7128691983122363, + "grad_norm": 0.3738572597503662, + "learning_rate": 0.0006568125917170526, + "loss": 1.9439, + "step": 6758 + }, + { + "epoch": 0.7129746835443038, + "grad_norm": 0.36128002405166626, + "learning_rate": 0.0006559016105030176, + "loss": 1.9604, + "step": 6759 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.3703822195529938, + "learning_rate": 0.0006549918927951678, + "loss": 1.9308, + "step": 6760 + }, + { + "epoch": 0.7131856540084388, + "grad_norm": 0.4195560812950134, + "learning_rate": 0.0006540834368410549, + "loss": 1.943, + "step": 6761 + }, + { + "epoch": 0.7132911392405064, + "grad_norm": 0.36030709743499756, + "learning_rate": 0.0006531762408906606, + "loss": 1.9124, + "step": 6762 + }, + { + "epoch": 0.7133966244725738, + "grad_norm": 0.41570615768432617, + "learning_rate": 0.0006522703031963939, + "loss": 1.9489, + "step": 6763 + }, + { + "epoch": 0.7135021097046413, + "grad_norm": 0.43188947439193726, + "learning_rate": 0.0006513656220130878, + "loss": 1.9336, + "step": 6764 + }, + { + "epoch": 0.7136075949367089, + "grad_norm": 0.4027613401412964, + "learning_rate": 0.0006504621955979959, + "loss": 1.9471, + "step": 6765 + }, + { + "epoch": 0.7137130801687763, + "grad_norm": 0.4029931426048279, + "learning_rate": 0.0006495600222107885, + "loss": 1.8986, + "step": 6766 + }, + { + "epoch": 0.7138185654008439, + "grad_norm": 0.3740691542625427, + "learning_rate": 0.0006486591001135502, + "loss": 1.9229, + "step": 6767 + }, + { + "epoch": 0.7139240506329114, + "grad_norm": 0.3652665913105011, + "learning_rate": 0.0006477594275707758, + "loss": 1.945, + "step": 6768 + }, + { + "epoch": 0.7140295358649789, + "grad_norm": 0.3738614618778229, + "learning_rate": 0.000646861002849367, + "loss": 1.9452, + "step": 6769 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.36457401514053345, + "learning_rate": 0.0006459638242186297, + "loss": 1.9385, + "step": 6770 + }, + { + "epoch": 0.714240506329114, + "grad_norm": 0.38356807827949524, + "learning_rate": 0.0006450678899502701, + "loss": 1.9604, + "step": 6771 + }, + { + "epoch": 0.7143459915611814, + "grad_norm": 0.3721197545528412, + "learning_rate": 0.0006441731983183911, + "loss": 1.9277, + "step": 6772 + }, + { + "epoch": 0.7144514767932489, + "grad_norm": 0.40183621644973755, + "learning_rate": 0.0006432797475994899, + "loss": 1.965, + "step": 6773 + }, + { + "epoch": 0.7145569620253165, + "grad_norm": 0.3471564054489136, + "learning_rate": 0.0006423875360724538, + "loss": 1.9041, + "step": 6774 + }, + { + "epoch": 0.7146624472573839, + "grad_norm": 0.3955729603767395, + "learning_rate": 0.0006414965620185574, + "loss": 1.9057, + "step": 6775 + }, + { + "epoch": 0.7147679324894515, + "grad_norm": 0.3709337115287781, + "learning_rate": 0.0006406068237214593, + "loss": 1.8774, + "step": 6776 + }, + { + "epoch": 0.714873417721519, + "grad_norm": 0.376568466424942, + "learning_rate": 0.000639718319467198, + "loss": 1.9404, + "step": 6777 + }, + { + "epoch": 0.7149789029535865, + "grad_norm": 0.38373637199401855, + "learning_rate": 0.0006388310475441899, + "loss": 1.949, + "step": 6778 + }, + { + "epoch": 0.715084388185654, + "grad_norm": 0.3621743321418762, + "learning_rate": 0.0006379450062432248, + "loss": 1.9289, + "step": 6779 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.4237421452999115, + "learning_rate": 0.0006370601938574639, + "loss": 1.9279, + "step": 6780 + }, + { + "epoch": 0.715295358649789, + "grad_norm": 0.3529890477657318, + "learning_rate": 0.0006361766086824345, + "loss": 1.9398, + "step": 6781 + }, + { + "epoch": 0.7154008438818565, + "grad_norm": 0.3585268259048462, + "learning_rate": 0.0006352942490160293, + "loss": 1.93, + "step": 6782 + }, + { + "epoch": 0.7155063291139241, + "grad_norm": 0.3892909586429596, + "learning_rate": 0.0006344131131585007, + "loss": 1.9202, + "step": 6783 + }, + { + "epoch": 0.7156118143459915, + "grad_norm": 0.4150315821170807, + "learning_rate": 0.0006335331994124592, + "loss": 1.9579, + "step": 6784 + }, + { + "epoch": 0.7157172995780591, + "grad_norm": 0.3888131380081177, + "learning_rate": 0.0006326545060828696, + "loss": 1.9659, + "step": 6785 + }, + { + "epoch": 0.7158227848101266, + "grad_norm": 0.39202845096588135, + "learning_rate": 0.000631777031477047, + "loss": 1.9077, + "step": 6786 + }, + { + "epoch": 0.7159282700421941, + "grad_norm": 0.3723297715187073, + "learning_rate": 0.0006309007739046551, + "loss": 1.9219, + "step": 6787 + }, + { + "epoch": 0.7160337552742616, + "grad_norm": 0.40929871797561646, + "learning_rate": 0.0006300257316777014, + "loss": 1.9274, + "step": 6788 + }, + { + "epoch": 0.7161392405063292, + "grad_norm": 0.4241711497306824, + "learning_rate": 0.0006291519031105347, + "loss": 1.9334, + "step": 6789 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.3551909625530243, + "learning_rate": 0.0006282792865198421, + "loss": 1.9898, + "step": 6790 + }, + { + "epoch": 0.7163502109704641, + "grad_norm": 0.3990418314933777, + "learning_rate": 0.000627407880224645, + "loss": 1.9373, + "step": 6791 + }, + { + "epoch": 0.7164556962025317, + "grad_norm": 0.34838661551475525, + "learning_rate": 0.0006265376825462964, + "loss": 1.9204, + "step": 6792 + }, + { + "epoch": 0.7165611814345991, + "grad_norm": 0.3888564109802246, + "learning_rate": 0.0006256686918084777, + "loss": 1.9531, + "step": 6793 + }, + { + "epoch": 0.7166666666666667, + "grad_norm": 0.3744974434375763, + "learning_rate": 0.0006248009063371953, + "loss": 1.9326, + "step": 6794 + }, + { + "epoch": 0.7167721518987342, + "grad_norm": 0.3634089231491089, + "learning_rate": 0.0006239343244607771, + "loss": 1.9334, + "step": 6795 + }, + { + "epoch": 0.7168776371308017, + "grad_norm": 0.39157843589782715, + "learning_rate": 0.0006230689445098697, + "loss": 1.8991, + "step": 6796 + }, + { + "epoch": 0.7169831223628692, + "grad_norm": 0.4090539813041687, + "learning_rate": 0.0006222047648174351, + "loss": 1.9195, + "step": 6797 + }, + { + "epoch": 0.7170886075949368, + "grad_norm": 0.37360429763793945, + "learning_rate": 0.0006213417837187475, + "loss": 1.9138, + "step": 6798 + }, + { + "epoch": 0.7171940928270042, + "grad_norm": 0.35909512639045715, + "learning_rate": 0.00062047999955139, + "loss": 1.9331, + "step": 6799 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.38091936707496643, + "learning_rate": 0.0006196194106552512, + "loss": 1.8945, + "step": 6800 + }, + { + "epoch": 0.7174050632911393, + "grad_norm": 0.3951355516910553, + "learning_rate": 0.0006187600153725225, + "loss": 1.9543, + "step": 6801 + }, + { + "epoch": 0.7175105485232067, + "grad_norm": 0.38169893622398376, + "learning_rate": 0.0006179018120476945, + "loss": 1.9123, + "step": 6802 + }, + { + "epoch": 0.7176160337552743, + "grad_norm": 0.37409159541130066, + "learning_rate": 0.000617044799027554, + "loss": 1.9426, + "step": 6803 + }, + { + "epoch": 0.7177215189873418, + "grad_norm": 0.33420583605766296, + "learning_rate": 0.0006161889746611808, + "loss": 1.8979, + "step": 6804 + }, + { + "epoch": 0.7178270042194093, + "grad_norm": 0.362277090549469, + "learning_rate": 0.0006153343372999444, + "loss": 1.9608, + "step": 6805 + }, + { + "epoch": 0.7179324894514768, + "grad_norm": 0.3573530912399292, + "learning_rate": 0.0006144808852975009, + "loss": 1.9556, + "step": 6806 + }, + { + "epoch": 0.7180379746835444, + "grad_norm": 0.3626542389392853, + "learning_rate": 0.00061362861700979, + "loss": 1.9106, + "step": 6807 + }, + { + "epoch": 0.7181434599156118, + "grad_norm": 0.3424411416053772, + "learning_rate": 0.0006127775307950314, + "loss": 1.9485, + "step": 6808 + }, + { + "epoch": 0.7182489451476793, + "grad_norm": 0.4065515100955963, + "learning_rate": 0.000611927625013722, + "loss": 1.9346, + "step": 6809 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.3940742313861847, + "learning_rate": 0.0006110788980286328, + "loss": 1.8588, + "step": 6810 + }, + { + "epoch": 0.7184599156118143, + "grad_norm": 0.3407335877418518, + "learning_rate": 0.0006102313482048055, + "loss": 1.9446, + "step": 6811 + }, + { + "epoch": 0.7185654008438819, + "grad_norm": 0.36697468161582947, + "learning_rate": 0.0006093849739095494, + "loss": 1.9504, + "step": 6812 + }, + { + "epoch": 0.7186708860759494, + "grad_norm": 0.3607521057128906, + "learning_rate": 0.0006085397735124382, + "loss": 1.9349, + "step": 6813 + }, + { + "epoch": 0.7187763713080169, + "grad_norm": 0.34717366099357605, + "learning_rate": 0.0006076957453853072, + "loss": 1.9006, + "step": 6814 + }, + { + "epoch": 0.7188818565400844, + "grad_norm": 0.3959454894065857, + "learning_rate": 0.0006068528879022496, + "loss": 1.9119, + "step": 6815 + }, + { + "epoch": 0.7189873417721518, + "grad_norm": 0.3723211884498596, + "learning_rate": 0.0006060111994396143, + "loss": 1.9133, + "step": 6816 + }, + { + "epoch": 0.7190928270042194, + "grad_norm": 0.3608042299747467, + "learning_rate": 0.0006051706783760013, + "loss": 1.9196, + "step": 6817 + }, + { + "epoch": 0.7191983122362869, + "grad_norm": 0.37934261560440063, + "learning_rate": 0.0006043313230922601, + "loss": 1.9201, + "step": 6818 + }, + { + "epoch": 0.7193037974683544, + "grad_norm": 0.3706204295158386, + "learning_rate": 0.0006034931319714858, + "loss": 1.9322, + "step": 6819 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.4043653905391693, + "learning_rate": 0.0006026561033990158, + "loss": 1.9327, + "step": 6820 + }, + { + "epoch": 0.7195147679324895, + "grad_norm": 0.35923415422439575, + "learning_rate": 0.0006018202357624274, + "loss": 1.9367, + "step": 6821 + }, + { + "epoch": 0.7196202531645569, + "grad_norm": 0.4087372422218323, + "learning_rate": 0.0006009855274515337, + "loss": 1.9801, + "step": 6822 + }, + { + "epoch": 0.7197257383966245, + "grad_norm": 0.42502477765083313, + "learning_rate": 0.0006001519768583819, + "loss": 1.9024, + "step": 6823 + }, + { + "epoch": 0.719831223628692, + "grad_norm": 0.40769100189208984, + "learning_rate": 0.0005993195823772488, + "loss": 1.919, + "step": 6824 + }, + { + "epoch": 0.7199367088607594, + "grad_norm": 0.4336490333080292, + "learning_rate": 0.0005984883424046384, + "loss": 1.926, + "step": 6825 + }, + { + "epoch": 0.720042194092827, + "grad_norm": 0.3857959806919098, + "learning_rate": 0.0005976582553392788, + "loss": 1.8988, + "step": 6826 + }, + { + "epoch": 0.7201476793248945, + "grad_norm": 0.3641507923603058, + "learning_rate": 0.000596829319582119, + "loss": 1.9406, + "step": 6827 + }, + { + "epoch": 0.720253164556962, + "grad_norm": 0.3967772126197815, + "learning_rate": 0.0005960015335363258, + "loss": 1.9327, + "step": 6828 + }, + { + "epoch": 0.7203586497890295, + "grad_norm": 0.36329904198646545, + "learning_rate": 0.0005951748956072806, + "loss": 1.9208, + "step": 6829 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.45044955611228943, + "learning_rate": 0.000594349404202577, + "loss": 1.9384, + "step": 6830 + }, + { + "epoch": 0.7205696202531645, + "grad_norm": 0.40231257677078247, + "learning_rate": 0.0005935250577320168, + "loss": 1.9256, + "step": 6831 + }, + { + "epoch": 0.7206751054852321, + "grad_norm": 0.4253661334514618, + "learning_rate": 0.0005927018546076072, + "loss": 1.9077, + "step": 6832 + }, + { + "epoch": 0.7207805907172996, + "grad_norm": 0.3957289755344391, + "learning_rate": 0.0005918797932435585, + "loss": 1.9218, + "step": 6833 + }, + { + "epoch": 0.720886075949367, + "grad_norm": 0.4088852107524872, + "learning_rate": 0.0005910588720562799, + "loss": 1.912, + "step": 6834 + }, + { + "epoch": 0.7209915611814346, + "grad_norm": 0.3822614550590515, + "learning_rate": 0.0005902390894643773, + "loss": 1.9319, + "step": 6835 + }, + { + "epoch": 0.7210970464135021, + "grad_norm": 0.4025323987007141, + "learning_rate": 0.0005894204438886498, + "loss": 1.9109, + "step": 6836 + }, + { + "epoch": 0.7212025316455696, + "grad_norm": 0.41767454147338867, + "learning_rate": 0.0005886029337520871, + "loss": 1.9193, + "step": 6837 + }, + { + "epoch": 0.7213080168776371, + "grad_norm": 0.3777007460594177, + "learning_rate": 0.0005877865574798655, + "loss": 1.9296, + "step": 6838 + }, + { + "epoch": 0.7214135021097047, + "grad_norm": 0.4105435907840729, + "learning_rate": 0.0005869713134993463, + "loss": 1.912, + "step": 6839 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.38100188970565796, + "learning_rate": 0.0005861572002400716, + "loss": 1.902, + "step": 6840 + }, + { + "epoch": 0.7216244725738397, + "grad_norm": 0.3919289708137512, + "learning_rate": 0.0005853442161337618, + "loss": 1.9101, + "step": 6841 + }, + { + "epoch": 0.7217299578059072, + "grad_norm": 0.3871398866176605, + "learning_rate": 0.0005845323596143124, + "loss": 1.9282, + "step": 6842 + }, + { + "epoch": 0.7218354430379746, + "grad_norm": 0.35708606243133545, + "learning_rate": 0.0005837216291177911, + "loss": 1.9147, + "step": 6843 + }, + { + "epoch": 0.7219409282700422, + "grad_norm": 0.39336729049682617, + "learning_rate": 0.0005829120230824345, + "loss": 1.9348, + "step": 6844 + }, + { + "epoch": 0.7220464135021097, + "grad_norm": 0.3904447555541992, + "learning_rate": 0.0005821035399486458, + "loss": 1.9443, + "step": 6845 + }, + { + "epoch": 0.7221518987341772, + "grad_norm": 0.41273802518844604, + "learning_rate": 0.0005812961781589908, + "loss": 1.9352, + "step": 6846 + }, + { + "epoch": 0.7222573839662447, + "grad_norm": 0.398284912109375, + "learning_rate": 0.000580489936158196, + "loss": 1.9334, + "step": 6847 + }, + { + "epoch": 0.7223628691983123, + "grad_norm": 0.4149520993232727, + "learning_rate": 0.0005796848123931443, + "loss": 1.9134, + "step": 6848 + }, + { + "epoch": 0.7224683544303797, + "grad_norm": 0.4072887599468231, + "learning_rate": 0.0005788808053128734, + "loss": 1.9333, + "step": 6849 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.3986971378326416, + "learning_rate": 0.0005780779133685717, + "loss": 1.8892, + "step": 6850 + }, + { + "epoch": 0.7226793248945148, + "grad_norm": 0.4240944981575012, + "learning_rate": 0.0005772761350135759, + "loss": 1.9249, + "step": 6851 + }, + { + "epoch": 0.7227848101265822, + "grad_norm": 0.3390337824821472, + "learning_rate": 0.000576475468703368, + "loss": 1.9231, + "step": 6852 + }, + { + "epoch": 0.7228902953586498, + "grad_norm": 0.44053879380226135, + "learning_rate": 0.0005756759128955721, + "loss": 1.9172, + "step": 6853 + }, + { + "epoch": 0.7229957805907173, + "grad_norm": 0.3785850405693054, + "learning_rate": 0.0005748774660499515, + "loss": 1.9058, + "step": 6854 + }, + { + "epoch": 0.7231012658227848, + "grad_norm": 0.3900478780269623, + "learning_rate": 0.0005740801266284058, + "loss": 1.8936, + "step": 6855 + }, + { + "epoch": 0.7232067510548523, + "grad_norm": 0.44979360699653625, + "learning_rate": 0.0005732838930949678, + "loss": 1.9306, + "step": 6856 + }, + { + "epoch": 0.7233122362869199, + "grad_norm": 0.400293231010437, + "learning_rate": 0.000572488763915801, + "loss": 1.8977, + "step": 6857 + }, + { + "epoch": 0.7234177215189873, + "grad_norm": 0.4295171797275543, + "learning_rate": 0.0005716947375591958, + "loss": 1.9499, + "step": 6858 + }, + { + "epoch": 0.7235232067510549, + "grad_norm": 0.3877260088920593, + "learning_rate": 0.0005709018124955674, + "loss": 1.9377, + "step": 6859 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.39585381746292114, + "learning_rate": 0.0005701099871974524, + "loss": 1.912, + "step": 6860 + }, + { + "epoch": 0.7237341772151898, + "grad_norm": 0.3537193536758423, + "learning_rate": 0.0005693192601395058, + "loss": 1.8944, + "step": 6861 + }, + { + "epoch": 0.7238396624472574, + "grad_norm": 0.3600601255893707, + "learning_rate": 0.0005685296297984985, + "loss": 1.9411, + "step": 6862 + }, + { + "epoch": 0.7239451476793249, + "grad_norm": 0.3912869393825531, + "learning_rate": 0.0005677410946533138, + "loss": 1.9438, + "step": 6863 + }, + { + "epoch": 0.7240506329113924, + "grad_norm": 0.37615203857421875, + "learning_rate": 0.0005669536531849449, + "loss": 1.9481, + "step": 6864 + }, + { + "epoch": 0.7241561181434599, + "grad_norm": 0.41595661640167236, + "learning_rate": 0.0005661673038764916, + "loss": 1.9162, + "step": 6865 + }, + { + "epoch": 0.7242616033755275, + "grad_norm": 0.3624691963195801, + "learning_rate": 0.000565382045213158, + "loss": 1.9196, + "step": 6866 + }, + { + "epoch": 0.7243670886075949, + "grad_norm": 0.38756266236305237, + "learning_rate": 0.000564597875682249, + "loss": 1.9188, + "step": 6867 + }, + { + "epoch": 0.7244725738396625, + "grad_norm": 0.38969850540161133, + "learning_rate": 0.0005638147937731673, + "loss": 1.9234, + "step": 6868 + }, + { + "epoch": 0.72457805907173, + "grad_norm": 0.38592109084129333, + "learning_rate": 0.0005630327979774111, + "loss": 1.8897, + "step": 6869 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.455768883228302, + "learning_rate": 0.0005622518867885708, + "loss": 1.8849, + "step": 6870 + }, + { + "epoch": 0.724789029535865, + "grad_norm": 0.3855791985988617, + "learning_rate": 0.000561472058702326, + "loss": 1.9269, + "step": 6871 + }, + { + "epoch": 0.7248945147679325, + "grad_norm": 0.4342122972011566, + "learning_rate": 0.0005606933122164428, + "loss": 1.933, + "step": 6872 + }, + { + "epoch": 0.725, + "grad_norm": 0.37693437933921814, + "learning_rate": 0.000559915645830771, + "loss": 1.93, + "step": 6873 + }, + { + "epoch": 0.7251054852320675, + "grad_norm": 0.440798819065094, + "learning_rate": 0.0005591390580472411, + "loss": 1.928, + "step": 6874 + }, + { + "epoch": 0.7252109704641351, + "grad_norm": 0.3743338882923126, + "learning_rate": 0.0005583635473698608, + "loss": 1.9317, + "step": 6875 + }, + { + "epoch": 0.7253164556962025, + "grad_norm": 0.3577115535736084, + "learning_rate": 0.0005575891123047136, + "loss": 1.9195, + "step": 6876 + }, + { + "epoch": 0.7254219409282701, + "grad_norm": 0.44070571660995483, + "learning_rate": 0.0005568157513599543, + "loss": 1.9157, + "step": 6877 + }, + { + "epoch": 0.7255274261603376, + "grad_norm": 0.3443093001842499, + "learning_rate": 0.0005560434630458071, + "loss": 1.9096, + "step": 6878 + }, + { + "epoch": 0.725632911392405, + "grad_norm": 0.3890027105808258, + "learning_rate": 0.0005552722458745627, + "loss": 1.9482, + "step": 6879 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.33805111050605774, + "learning_rate": 0.0005545020983605748, + "loss": 1.9155, + "step": 6880 + }, + { + "epoch": 0.72584388185654, + "grad_norm": 0.362597793340683, + "learning_rate": 0.000553733019020258, + "loss": 1.9058, + "step": 6881 + }, + { + "epoch": 0.7259493670886076, + "grad_norm": 0.41540002822875977, + "learning_rate": 0.0005529650063720844, + "loss": 1.8914, + "step": 6882 + }, + { + "epoch": 0.7260548523206751, + "grad_norm": 0.36820125579833984, + "learning_rate": 0.0005521980589365809, + "loss": 1.9211, + "step": 6883 + }, + { + "epoch": 0.7261603375527426, + "grad_norm": 0.4529680609703064, + "learning_rate": 0.0005514321752363265, + "loss": 1.9102, + "step": 6884 + }, + { + "epoch": 0.7262658227848101, + "grad_norm": 0.34086447954177856, + "learning_rate": 0.0005506673537959495, + "loss": 1.9381, + "step": 6885 + }, + { + "epoch": 0.7263713080168777, + "grad_norm": 0.477765291929245, + "learning_rate": 0.0005499035931421242, + "loss": 1.8731, + "step": 6886 + }, + { + "epoch": 0.7264767932489451, + "grad_norm": 0.3341849446296692, + "learning_rate": 0.0005491408918035683, + "loss": 1.9258, + "step": 6887 + }, + { + "epoch": 0.7265822784810126, + "grad_norm": 0.3955651521682739, + "learning_rate": 0.0005483792483110407, + "loss": 1.9347, + "step": 6888 + }, + { + "epoch": 0.7266877637130802, + "grad_norm": 0.3600245416164398, + "learning_rate": 0.0005476186611973374, + "loss": 1.9168, + "step": 6889 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.32960817217826843, + "learning_rate": 0.0005468591289972898, + "loss": 1.9043, + "step": 6890 + }, + { + "epoch": 0.7268987341772152, + "grad_norm": 0.3646381199359894, + "learning_rate": 0.0005461006502477612, + "loss": 1.9441, + "step": 6891 + }, + { + "epoch": 0.7270042194092827, + "grad_norm": 0.3528483510017395, + "learning_rate": 0.0005453432234876445, + "loss": 1.9398, + "step": 6892 + }, + { + "epoch": 0.7271097046413502, + "grad_norm": 0.38321539759635925, + "learning_rate": 0.000544586847257859, + "loss": 1.9313, + "step": 6893 + }, + { + "epoch": 0.7272151898734177, + "grad_norm": 0.34626635909080505, + "learning_rate": 0.0005438315201013477, + "loss": 1.8944, + "step": 6894 + }, + { + "epoch": 0.7273206751054853, + "grad_norm": 0.37977686524391174, + "learning_rate": 0.0005430772405630743, + "loss": 1.8764, + "step": 6895 + }, + { + "epoch": 0.7274261603375527, + "grad_norm": 0.40273892879486084, + "learning_rate": 0.0005423240071900209, + "loss": 1.92, + "step": 6896 + }, + { + "epoch": 0.7275316455696202, + "grad_norm": 0.3591029942035675, + "learning_rate": 0.0005415718185311847, + "loss": 1.9192, + "step": 6897 + }, + { + "epoch": 0.7276371308016878, + "grad_norm": 0.3676699101924896, + "learning_rate": 0.0005408206731375755, + "loss": 1.9206, + "step": 6898 + }, + { + "epoch": 0.7277426160337552, + "grad_norm": 0.3682975172996521, + "learning_rate": 0.000540070569562213, + "loss": 1.9286, + "step": 6899 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.39900967478752136, + "learning_rate": 0.0005393215063601232, + "loss": 1.8927, + "step": 6900 + }, + { + "epoch": 0.7279535864978903, + "grad_norm": 0.4150860905647278, + "learning_rate": 0.0005385734820883369, + "loss": 1.9349, + "step": 6901 + }, + { + "epoch": 0.7280590717299578, + "grad_norm": 0.3589065968990326, + "learning_rate": 0.000537826495305886, + "loss": 1.9276, + "step": 6902 + }, + { + "epoch": 0.7281645569620253, + "grad_norm": 0.3852872848510742, + "learning_rate": 0.000537080544573801, + "loss": 1.953, + "step": 6903 + }, + { + "epoch": 0.7282700421940929, + "grad_norm": 0.38413313031196594, + "learning_rate": 0.000536335628455108, + "loss": 1.9108, + "step": 6904 + }, + { + "epoch": 0.7283755274261603, + "grad_norm": 0.35310956835746765, + "learning_rate": 0.0005355917455148267, + "loss": 1.9218, + "step": 6905 + }, + { + "epoch": 0.7284810126582278, + "grad_norm": 0.37403932213783264, + "learning_rate": 0.0005348488943199665, + "loss": 1.9391, + "step": 6906 + }, + { + "epoch": 0.7285864978902954, + "grad_norm": 0.34112676978111267, + "learning_rate": 0.0005341070734395245, + "loss": 1.9403, + "step": 6907 + }, + { + "epoch": 0.7286919831223628, + "grad_norm": 0.3875408172607422, + "learning_rate": 0.0005333662814444825, + "loss": 1.9387, + "step": 6908 + }, + { + "epoch": 0.7287974683544304, + "grad_norm": 0.34919819235801697, + "learning_rate": 0.0005326265169078048, + "loss": 1.9752, + "step": 6909 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.3418669104576111, + "learning_rate": 0.0005318877784044343, + "loss": 1.9401, + "step": 6910 + }, + { + "epoch": 0.7290084388185654, + "grad_norm": 0.38137495517730713, + "learning_rate": 0.0005311500645112907, + "loss": 1.9309, + "step": 6911 + }, + { + "epoch": 0.7291139240506329, + "grad_norm": 0.37904641032218933, + "learning_rate": 0.0005304133738072676, + "loss": 1.8839, + "step": 6912 + }, + { + "epoch": 0.7292194092827005, + "grad_norm": 0.384857714176178, + "learning_rate": 0.0005296777048732292, + "loss": 1.9372, + "step": 6913 + }, + { + "epoch": 0.7293248945147679, + "grad_norm": 0.4165876507759094, + "learning_rate": 0.0005289430562920086, + "loss": 1.9107, + "step": 6914 + }, + { + "epoch": 0.7294303797468354, + "grad_norm": 0.36003291606903076, + "learning_rate": 0.0005282094266484041, + "loss": 1.9272, + "step": 6915 + }, + { + "epoch": 0.729535864978903, + "grad_norm": 0.40258878469467163, + "learning_rate": 0.0005274768145291769, + "loss": 1.9147, + "step": 6916 + }, + { + "epoch": 0.7296413502109704, + "grad_norm": 0.41679683327674866, + "learning_rate": 0.0005267452185230483, + "loss": 1.9338, + "step": 6917 + }, + { + "epoch": 0.729746835443038, + "grad_norm": 0.3759574294090271, + "learning_rate": 0.000526014637220697, + "loss": 1.9356, + "step": 6918 + }, + { + "epoch": 0.7298523206751055, + "grad_norm": 0.40913063287734985, + "learning_rate": 0.0005252850692147567, + "loss": 1.9221, + "step": 6919 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.40419331192970276, + "learning_rate": 0.0005245565130998126, + "loss": 1.9265, + "step": 6920 + }, + { + "epoch": 0.7300632911392405, + "grad_norm": 0.39622029662132263, + "learning_rate": 0.0005238289674723993, + "loss": 1.9272, + "step": 6921 + }, + { + "epoch": 0.7301687763713081, + "grad_norm": 0.36353862285614014, + "learning_rate": 0.0005231024309309981, + "loss": 1.9166, + "step": 6922 + }, + { + "epoch": 0.7302742616033755, + "grad_norm": 0.369181364774704, + "learning_rate": 0.0005223769020760345, + "loss": 1.9312, + "step": 6923 + }, + { + "epoch": 0.730379746835443, + "grad_norm": 0.45153477787971497, + "learning_rate": 0.0005216523795098743, + "loss": 1.9206, + "step": 6924 + }, + { + "epoch": 0.7304852320675106, + "grad_norm": 0.3454451262950897, + "learning_rate": 0.0005209288618368225, + "loss": 1.9412, + "step": 6925 + }, + { + "epoch": 0.730590717299578, + "grad_norm": 0.44680047035217285, + "learning_rate": 0.0005202063476631198, + "loss": 1.942, + "step": 6926 + }, + { + "epoch": 0.7306962025316456, + "grad_norm": 0.3934580087661743, + "learning_rate": 0.0005194848355969396, + "loss": 1.9189, + "step": 6927 + }, + { + "epoch": 0.7308016877637131, + "grad_norm": 0.34695473313331604, + "learning_rate": 0.0005187643242483862, + "loss": 1.9053, + "step": 6928 + }, + { + "epoch": 0.7309071729957806, + "grad_norm": 0.4089353382587433, + "learning_rate": 0.0005180448122294913, + "loss": 1.9185, + "step": 6929 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.3782200515270233, + "learning_rate": 0.000517326298154212, + "loss": 1.9458, + "step": 6930 + }, + { + "epoch": 0.7311181434599157, + "grad_norm": 0.3344726264476776, + "learning_rate": 0.0005166087806384274, + "loss": 1.9407, + "step": 6931 + }, + { + "epoch": 0.7312236286919831, + "grad_norm": 0.40273237228393555, + "learning_rate": 0.0005158922582999367, + "loss": 1.9057, + "step": 6932 + }, + { + "epoch": 0.7313291139240506, + "grad_norm": 0.3702074885368347, + "learning_rate": 0.0005151767297584562, + "loss": 1.9536, + "step": 6933 + }, + { + "epoch": 0.7314345991561182, + "grad_norm": 0.37584012746810913, + "learning_rate": 0.0005144621936356161, + "loss": 1.909, + "step": 6934 + }, + { + "epoch": 0.7315400843881856, + "grad_norm": 0.3737192153930664, + "learning_rate": 0.000513748648554959, + "loss": 1.9622, + "step": 6935 + }, + { + "epoch": 0.7316455696202532, + "grad_norm": 0.3839816749095917, + "learning_rate": 0.0005130360931419364, + "loss": 1.8831, + "step": 6936 + }, + { + "epoch": 0.7317510548523207, + "grad_norm": 0.36439499258995056, + "learning_rate": 0.0005123245260239057, + "loss": 1.9161, + "step": 6937 + }, + { + "epoch": 0.7318565400843882, + "grad_norm": 0.3615054190158844, + "learning_rate": 0.0005116139458301291, + "loss": 1.9458, + "step": 6938 + }, + { + "epoch": 0.7319620253164557, + "grad_norm": 0.36644038558006287, + "learning_rate": 0.0005109043511917693, + "loss": 1.9369, + "step": 6939 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.4220210909843445, + "learning_rate": 0.0005101957407418877, + "loss": 1.9403, + "step": 6940 + }, + { + "epoch": 0.7321729957805907, + "grad_norm": 0.4090961515903473, + "learning_rate": 0.0005094881131154418, + "loss": 1.9546, + "step": 6941 + }, + { + "epoch": 0.7322784810126582, + "grad_norm": 0.4096966087818146, + "learning_rate": 0.0005087814669492819, + "loss": 1.8959, + "step": 6942 + }, + { + "epoch": 0.7323839662447258, + "grad_norm": 0.3865748345851898, + "learning_rate": 0.0005080758008821495, + "loss": 1.8924, + "step": 6943 + }, + { + "epoch": 0.7324894514767932, + "grad_norm": 0.42584067583084106, + "learning_rate": 0.0005073711135546738, + "loss": 1.9197, + "step": 6944 + }, + { + "epoch": 0.7325949367088608, + "grad_norm": 0.4082348346710205, + "learning_rate": 0.0005066674036093695, + "loss": 1.9424, + "step": 6945 + }, + { + "epoch": 0.7327004219409282, + "grad_norm": 0.3704657554626465, + "learning_rate": 0.000505964669690634, + "loss": 1.906, + "step": 6946 + }, + { + "epoch": 0.7328059071729958, + "grad_norm": 0.4124796390533447, + "learning_rate": 0.0005052629104447452, + "loss": 1.9197, + "step": 6947 + }, + { + "epoch": 0.7329113924050633, + "grad_norm": 0.3593192994594574, + "learning_rate": 0.0005045621245198582, + "loss": 1.9573, + "step": 6948 + }, + { + "epoch": 0.7330168776371307, + "grad_norm": 0.4005964994430542, + "learning_rate": 0.0005038623105660032, + "loss": 1.9744, + "step": 6949 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.37814298272132874, + "learning_rate": 0.0005031634672350829, + "loss": 1.924, + "step": 6950 + }, + { + "epoch": 0.7332278481012658, + "grad_norm": 0.38860929012298584, + "learning_rate": 0.0005024655931808696, + "loss": 1.9401, + "step": 6951 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.3800739645957947, + "learning_rate": 0.0005017686870590029, + "loss": 1.9083, + "step": 6952 + }, + { + "epoch": 0.7334388185654008, + "grad_norm": 0.417728066444397, + "learning_rate": 0.0005010727475269868, + "loss": 1.9071, + "step": 6953 + }, + { + "epoch": 0.7335443037974684, + "grad_norm": 0.3886604607105255, + "learning_rate": 0.0005003777732441875, + "loss": 1.9075, + "step": 6954 + }, + { + "epoch": 0.7336497890295358, + "grad_norm": 0.36532893776893616, + "learning_rate": 0.0004996837628718307, + "loss": 1.923, + "step": 6955 + }, + { + "epoch": 0.7337552742616034, + "grad_norm": 0.3933946192264557, + "learning_rate": 0.0004989907150729988, + "loss": 1.8887, + "step": 6956 + }, + { + "epoch": 0.7338607594936709, + "grad_norm": 0.37590041756629944, + "learning_rate": 0.0004982986285126283, + "loss": 1.9634, + "step": 6957 + }, + { + "epoch": 0.7339662447257383, + "grad_norm": 0.3601176142692566, + "learning_rate": 0.0004976075018575078, + "loss": 1.9003, + "step": 6958 + }, + { + "epoch": 0.7340717299578059, + "grad_norm": 0.3935020864009857, + "learning_rate": 0.0004969173337762747, + "loss": 1.8813, + "step": 6959 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.35613903403282166, + "learning_rate": 0.0004962281229394129, + "loss": 1.917, + "step": 6960 + }, + { + "epoch": 0.7342827004219409, + "grad_norm": 0.3877807855606079, + "learning_rate": 0.0004955398680192508, + "loss": 1.9465, + "step": 6961 + }, + { + "epoch": 0.7343881856540084, + "grad_norm": 0.35526272654533386, + "learning_rate": 0.0004948525676899577, + "loss": 1.8914, + "step": 6962 + }, + { + "epoch": 0.734493670886076, + "grad_norm": 0.34581905603408813, + "learning_rate": 0.0004941662206275422, + "loss": 1.8811, + "step": 6963 + }, + { + "epoch": 0.7345991561181434, + "grad_norm": 0.3611166179180145, + "learning_rate": 0.0004934808255098488, + "loss": 1.9234, + "step": 6964 + }, + { + "epoch": 0.734704641350211, + "grad_norm": 0.3468420207500458, + "learning_rate": 0.000492796381016556, + "loss": 1.9075, + "step": 6965 + }, + { + "epoch": 0.7348101265822785, + "grad_norm": 0.39805641770362854, + "learning_rate": 0.0004921128858291739, + "loss": 1.9205, + "step": 6966 + }, + { + "epoch": 0.734915611814346, + "grad_norm": 0.3483985662460327, + "learning_rate": 0.0004914303386310408, + "loss": 1.9106, + "step": 6967 + }, + { + "epoch": 0.7350210970464135, + "grad_norm": 0.37753117084503174, + "learning_rate": 0.0004907487381073215, + "loss": 1.9213, + "step": 6968 + }, + { + "epoch": 0.735126582278481, + "grad_norm": 0.3784444034099579, + "learning_rate": 0.0004900680829450042, + "loss": 1.92, + "step": 6969 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.362934410572052, + "learning_rate": 0.0004893883718328983, + "loss": 1.899, + "step": 6970 + }, + { + "epoch": 0.735337552742616, + "grad_norm": 0.35920560359954834, + "learning_rate": 0.000488709603461632, + "loss": 1.9039, + "step": 6971 + }, + { + "epoch": 0.7354430379746836, + "grad_norm": 0.3621824383735657, + "learning_rate": 0.00048803177652364935, + "loss": 1.8986, + "step": 6972 + }, + { + "epoch": 0.735548523206751, + "grad_norm": 0.37738972902297974, + "learning_rate": 0.0004873548897132077, + "loss": 1.8888, + "step": 6973 + }, + { + "epoch": 0.7356540084388186, + "grad_norm": 0.4269740581512451, + "learning_rate": 0.000486678941726376, + "loss": 1.9236, + "step": 6974 + }, + { + "epoch": 0.7357594936708861, + "grad_norm": 0.3537449240684509, + "learning_rate": 0.00048600393126103117, + "loss": 1.9366, + "step": 6975 + }, + { + "epoch": 0.7358649789029535, + "grad_norm": 0.38083332777023315, + "learning_rate": 0.0004853298570168566, + "loss": 1.8898, + "step": 6976 + }, + { + "epoch": 0.7359704641350211, + "grad_norm": 0.3891746401786804, + "learning_rate": 0.00048465671769533884, + "loss": 1.9324, + "step": 6977 + }, + { + "epoch": 0.7360759493670886, + "grad_norm": 0.374962717294693, + "learning_rate": 0.00048398451199976574, + "loss": 1.9056, + "step": 6978 + }, + { + "epoch": 0.7361814345991561, + "grad_norm": 0.4186440706253052, + "learning_rate": 0.0004833132386352233, + "loss": 1.9224, + "step": 6979 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.3791608214378357, + "learning_rate": 0.0004826428963085938, + "loss": 1.9019, + "step": 6980 + }, + { + "epoch": 0.7363924050632912, + "grad_norm": 0.37731680274009705, + "learning_rate": 0.000481973483728553, + "loss": 1.9174, + "step": 6981 + }, + { + "epoch": 0.7364978902953586, + "grad_norm": 0.3545527458190918, + "learning_rate": 0.0004813049996055675, + "loss": 1.8966, + "step": 6982 + }, + { + "epoch": 0.7366033755274262, + "grad_norm": 0.3598862886428833, + "learning_rate": 0.00048063744265189275, + "loss": 1.881, + "step": 6983 + }, + { + "epoch": 0.7367088607594937, + "grad_norm": 0.39676910638809204, + "learning_rate": 0.0004799708115815701, + "loss": 1.9372, + "step": 6984 + }, + { + "epoch": 0.7368143459915611, + "grad_norm": 0.36016085743904114, + "learning_rate": 0.0004793051051104244, + "loss": 1.9224, + "step": 6985 + }, + { + "epoch": 0.7369198312236287, + "grad_norm": 0.36627820134162903, + "learning_rate": 0.0004786403219560618, + "loss": 1.8887, + "step": 6986 + }, + { + "epoch": 0.7370253164556962, + "grad_norm": 0.35537031292915344, + "learning_rate": 0.000477976460837867, + "loss": 1.882, + "step": 6987 + }, + { + "epoch": 0.7371308016877637, + "grad_norm": 0.3717958629131317, + "learning_rate": 0.00047731352047700095, + "loss": 1.9372, + "step": 6988 + }, + { + "epoch": 0.7372362869198312, + "grad_norm": 0.38649439811706543, + "learning_rate": 0.00047665149959639813, + "loss": 1.9594, + "step": 6989 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.36166420578956604, + "learning_rate": 0.00047599039692076457, + "loss": 1.9022, + "step": 6990 + }, + { + "epoch": 0.7374472573839662, + "grad_norm": 0.358929842710495, + "learning_rate": 0.0004753302111765748, + "loss": 1.9319, + "step": 6991 + }, + { + "epoch": 0.7375527426160338, + "grad_norm": 0.39948341250419617, + "learning_rate": 0.00047467094109206984, + "loss": 1.9421, + "step": 6992 + }, + { + "epoch": 0.7376582278481013, + "grad_norm": 0.33284103870391846, + "learning_rate": 0.0004740125853972546, + "loss": 1.9116, + "step": 6993 + }, + { + "epoch": 0.7377637130801687, + "grad_norm": 0.3848295211791992, + "learning_rate": 0.00047335514282389557, + "loss": 1.9243, + "step": 6994 + }, + { + "epoch": 0.7378691983122363, + "grad_norm": 0.35793349146842957, + "learning_rate": 0.0004726986121055179, + "loss": 1.9265, + "step": 6995 + }, + { + "epoch": 0.7379746835443038, + "grad_norm": 0.37075918912887573, + "learning_rate": 0.00047204299197740364, + "loss": 1.9218, + "step": 6996 + }, + { + "epoch": 0.7380801687763713, + "grad_norm": 0.3663555681705475, + "learning_rate": 0.0004713882811765889, + "loss": 1.9042, + "step": 6997 + }, + { + "epoch": 0.7381856540084388, + "grad_norm": 0.3246367275714874, + "learning_rate": 0.0004707344784418611, + "loss": 1.8866, + "step": 6998 + }, + { + "epoch": 0.7382911392405064, + "grad_norm": 0.37390920519828796, + "learning_rate": 0.0004700815825137577, + "loss": 1.9627, + "step": 6999 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.38411885499954224, + "learning_rate": 0.0004694295921345622, + "loss": 1.9041, + "step": 7000 + }, + { + "epoch": 0.7385021097046414, + "grad_norm": 0.4032052755355835, + "learning_rate": 0.0004687785060483032, + "loss": 1.8886, + "step": 7001 + }, + { + "epoch": 0.7386075949367089, + "grad_norm": 0.3617439866065979, + "learning_rate": 0.0004681283230007507, + "loss": 1.9026, + "step": 7002 + }, + { + "epoch": 0.7387130801687763, + "grad_norm": 0.4233638346195221, + "learning_rate": 0.0004674790417394145, + "loss": 1.9038, + "step": 7003 + }, + { + "epoch": 0.7388185654008439, + "grad_norm": 0.36491167545318604, + "learning_rate": 0.00046683066101354197, + "loss": 1.8703, + "step": 7004 + }, + { + "epoch": 0.7389240506329114, + "grad_norm": 0.3933751881122589, + "learning_rate": 0.00046618317957411475, + "loss": 1.8834, + "step": 7005 + }, + { + "epoch": 0.7390295358649789, + "grad_norm": 0.39514032006263733, + "learning_rate": 0.00046553659617384684, + "loss": 1.8863, + "step": 7006 + }, + { + "epoch": 0.7391350210970464, + "grad_norm": 0.3567812144756317, + "learning_rate": 0.00046489090956718234, + "loss": 1.906, + "step": 7007 + }, + { + "epoch": 0.739240506329114, + "grad_norm": 0.4003152847290039, + "learning_rate": 0.00046424611851029313, + "loss": 1.8673, + "step": 7008 + }, + { + "epoch": 0.7393459915611814, + "grad_norm": 0.40445640683174133, + "learning_rate": 0.00046360222176107584, + "loss": 1.915, + "step": 7009 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.3979579210281372, + "learning_rate": 0.00046295921807915015, + "loss": 1.9008, + "step": 7010 + }, + { + "epoch": 0.7395569620253165, + "grad_norm": 0.40258511900901794, + "learning_rate": 0.0004623171062258558, + "loss": 1.9458, + "step": 7011 + }, + { + "epoch": 0.739662447257384, + "grad_norm": 0.4149259924888611, + "learning_rate": 0.00046167588496425074, + "loss": 1.9088, + "step": 7012 + }, + { + "epoch": 0.7397679324894515, + "grad_norm": 0.36666741967201233, + "learning_rate": 0.0004610355530591087, + "loss": 1.9111, + "step": 7013 + }, + { + "epoch": 0.7398734177215189, + "grad_norm": 0.4221954643726349, + "learning_rate": 0.0004603961092769163, + "loss": 1.8942, + "step": 7014 + }, + { + "epoch": 0.7399789029535865, + "grad_norm": 0.4007534980773926, + "learning_rate": 0.0004597575523858713, + "loss": 1.9015, + "step": 7015 + }, + { + "epoch": 0.740084388185654, + "grad_norm": 0.43254899978637695, + "learning_rate": 0.0004591198811558795, + "loss": 1.9393, + "step": 7016 + }, + { + "epoch": 0.7401898734177215, + "grad_norm": 0.45798203349113464, + "learning_rate": 0.0004584830943585531, + "loss": 1.902, + "step": 7017 + }, + { + "epoch": 0.740295358649789, + "grad_norm": 0.39186665415763855, + "learning_rate": 0.0004578471907672084, + "loss": 1.9155, + "step": 7018 + }, + { + "epoch": 0.7404008438818566, + "grad_norm": 0.4148457646369934, + "learning_rate": 0.0004572121691568625, + "loss": 1.8774, + "step": 7019 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.38780477643013, + "learning_rate": 0.00045657802830423164, + "loss": 1.9165, + "step": 7020 + }, + { + "epoch": 0.7406118143459915, + "grad_norm": 0.3670617938041687, + "learning_rate": 0.0004559447669877288, + "loss": 1.9129, + "step": 7021 + }, + { + "epoch": 0.7407172995780591, + "grad_norm": 0.38393741846084595, + "learning_rate": 0.00045531238398746133, + "loss": 1.9018, + "step": 7022 + }, + { + "epoch": 0.7408227848101265, + "grad_norm": 0.4314410984516144, + "learning_rate": 0.0004546808780852286, + "loss": 1.8812, + "step": 7023 + }, + { + "epoch": 0.7409282700421941, + "grad_norm": 0.3407090902328491, + "learning_rate": 0.0004540502480645194, + "loss": 1.8905, + "step": 7024 + }, + { + "epoch": 0.7410337552742616, + "grad_norm": 0.43238121271133423, + "learning_rate": 0.0004534204927105097, + "loss": 1.8993, + "step": 7025 + }, + { + "epoch": 0.7411392405063291, + "grad_norm": 0.33827492594718933, + "learning_rate": 0.0004527916108100607, + "loss": 1.8996, + "step": 7026 + }, + { + "epoch": 0.7412447257383966, + "grad_norm": 0.38072746992111206, + "learning_rate": 0.00045216360115171613, + "loss": 1.9356, + "step": 7027 + }, + { + "epoch": 0.7413502109704642, + "grad_norm": 0.34970512986183167, + "learning_rate": 0.00045153646252569976, + "loss": 1.9521, + "step": 7028 + }, + { + "epoch": 0.7414556962025316, + "grad_norm": 0.33470314741134644, + "learning_rate": 0.00045091019372391354, + "loss": 1.9335, + "step": 7029 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.33529531955718994, + "learning_rate": 0.00045028479353993473, + "loss": 1.8979, + "step": 7030 + }, + { + "epoch": 0.7416666666666667, + "grad_norm": 0.3532085716724396, + "learning_rate": 0.00044966026076901413, + "loss": 1.9084, + "step": 7031 + }, + { + "epoch": 0.7417721518987341, + "grad_norm": 0.33775195479393005, + "learning_rate": 0.00044903659420807347, + "loss": 1.9154, + "step": 7032 + }, + { + "epoch": 0.7418776371308017, + "grad_norm": 0.3483448028564453, + "learning_rate": 0.000448413792655703, + "loss": 1.9251, + "step": 7033 + }, + { + "epoch": 0.7419831223628692, + "grad_norm": 0.3540234863758087, + "learning_rate": 0.0004477918549121593, + "loss": 1.9024, + "step": 7034 + }, + { + "epoch": 0.7420886075949367, + "grad_norm": 0.3542759418487549, + "learning_rate": 0.0004471707797793631, + "loss": 1.9168, + "step": 7035 + }, + { + "epoch": 0.7421940928270042, + "grad_norm": 0.34796369075775146, + "learning_rate": 0.00044655056606089655, + "loss": 1.8983, + "step": 7036 + }, + { + "epoch": 0.7422995780590718, + "grad_norm": 0.3554125726222992, + "learning_rate": 0.00044593121256200163, + "loss": 1.9125, + "step": 7037 + }, + { + "epoch": 0.7424050632911392, + "grad_norm": 0.3342958092689514, + "learning_rate": 0.000445312718089577, + "loss": 1.9195, + "step": 7038 + }, + { + "epoch": 0.7425105485232067, + "grad_norm": 0.3322131931781769, + "learning_rate": 0.0004446950814521764, + "loss": 1.9439, + "step": 7039 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.3350513279438019, + "learning_rate": 0.00044407830146000587, + "loss": 1.8528, + "step": 7040 + }, + { + "epoch": 0.7427215189873417, + "grad_norm": 0.3762584924697876, + "learning_rate": 0.00044346237692492177, + "loss": 1.8777, + "step": 7041 + }, + { + "epoch": 0.7428270042194093, + "grad_norm": 0.33629825711250305, + "learning_rate": 0.0004428473066604284, + "loss": 1.9693, + "step": 7042 + }, + { + "epoch": 0.7429324894514768, + "grad_norm": 0.35530778765678406, + "learning_rate": 0.0004422330894816757, + "loss": 1.9376, + "step": 7043 + }, + { + "epoch": 0.7430379746835443, + "grad_norm": 0.3398701250553131, + "learning_rate": 0.0004416197242054569, + "loss": 1.8896, + "step": 7044 + }, + { + "epoch": 0.7431434599156118, + "grad_norm": 0.3625837564468384, + "learning_rate": 0.0004410072096502064, + "loss": 1.9244, + "step": 7045 + }, + { + "epoch": 0.7432489451476794, + "grad_norm": 0.386341392993927, + "learning_rate": 0.00044039554463599716, + "loss": 1.9157, + "step": 7046 + }, + { + "epoch": 0.7433544303797468, + "grad_norm": 0.3568350672721863, + "learning_rate": 0.00043978472798453895, + "loss": 1.8821, + "step": 7047 + }, + { + "epoch": 0.7434599156118143, + "grad_norm": 0.35845717787742615, + "learning_rate": 0.0004391747585191759, + "loss": 1.9055, + "step": 7048 + }, + { + "epoch": 0.7435654008438819, + "grad_norm": 0.34664830565452576, + "learning_rate": 0.0004385656350648835, + "loss": 1.8919, + "step": 7049 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.35776758193969727, + "learning_rate": 0.0004379573564482676, + "loss": 1.8728, + "step": 7050 + }, + { + "epoch": 0.7437763713080169, + "grad_norm": 0.37044569849967957, + "learning_rate": 0.0004373499214975615, + "loss": 1.9208, + "step": 7051 + }, + { + "epoch": 0.7438818565400844, + "grad_norm": 0.3854825794696808, + "learning_rate": 0.0004367433290426232, + "loss": 1.894, + "step": 7052 + }, + { + "epoch": 0.7439873417721519, + "grad_norm": 0.40100082755088806, + "learning_rate": 0.0004361375779149342, + "loss": 1.8741, + "step": 7053 + }, + { + "epoch": 0.7440928270042194, + "grad_norm": 0.3790895342826843, + "learning_rate": 0.0004355326669475963, + "loss": 1.901, + "step": 7054 + }, + { + "epoch": 0.744198312236287, + "grad_norm": 0.35633736848831177, + "learning_rate": 0.0004349285949753299, + "loss": 1.9309, + "step": 7055 + }, + { + "epoch": 0.7443037974683544, + "grad_norm": 0.35110795497894287, + "learning_rate": 0.0004343253608344718, + "loss": 1.9376, + "step": 7056 + }, + { + "epoch": 0.744409282700422, + "grad_norm": 0.35536065697669983, + "learning_rate": 0.0004337229633629726, + "loss": 1.8925, + "step": 7057 + }, + { + "epoch": 0.7445147679324895, + "grad_norm": 0.33565470576286316, + "learning_rate": 0.0004331214014003945, + "loss": 1.9372, + "step": 7058 + }, + { + "epoch": 0.7446202531645569, + "grad_norm": 0.3802632987499237, + "learning_rate": 0.00043252067378790946, + "loss": 1.91, + "step": 7059 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.3451473116874695, + "learning_rate": 0.0004319207793682963, + "loss": 1.8998, + "step": 7060 + }, + { + "epoch": 0.744831223628692, + "grad_norm": 0.3607955574989319, + "learning_rate": 0.0004313217169859396, + "loss": 1.9413, + "step": 7061 + }, + { + "epoch": 0.7449367088607595, + "grad_norm": 0.3509884476661682, + "learning_rate": 0.0004307234854868261, + "loss": 1.8862, + "step": 7062 + }, + { + "epoch": 0.745042194092827, + "grad_norm": 0.3608245253562927, + "learning_rate": 0.00043012608371854324, + "loss": 1.9049, + "step": 7063 + }, + { + "epoch": 0.7451476793248946, + "grad_norm": 0.43469008803367615, + "learning_rate": 0.00042952951053027696, + "loss": 1.9238, + "step": 7064 + }, + { + "epoch": 0.745253164556962, + "grad_norm": 0.36005014181137085, + "learning_rate": 0.0004289337647728092, + "loss": 1.8914, + "step": 7065 + }, + { + "epoch": 0.7453586497890295, + "grad_norm": 0.36650627851486206, + "learning_rate": 0.00042833884529851614, + "loss": 1.9099, + "step": 7066 + }, + { + "epoch": 0.7454641350210971, + "grad_norm": 0.40011563897132874, + "learning_rate": 0.0004277447509613654, + "loss": 1.8994, + "step": 7067 + }, + { + "epoch": 0.7455696202531645, + "grad_norm": 0.3668334186077118, + "learning_rate": 0.00042715148061691407, + "loss": 1.9274, + "step": 7068 + }, + { + "epoch": 0.7456751054852321, + "grad_norm": 0.42977166175842285, + "learning_rate": 0.00042655903312230673, + "loss": 1.9181, + "step": 7069 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.43027248978614807, + "learning_rate": 0.0004259674073362731, + "loss": 1.9075, + "step": 7070 + }, + { + "epoch": 0.7458860759493671, + "grad_norm": 0.3775312006473541, + "learning_rate": 0.0004253766021191256, + "loss": 1.9347, + "step": 7071 + }, + { + "epoch": 0.7459915611814346, + "grad_norm": 0.4384298026561737, + "learning_rate": 0.0004247866163327576, + "loss": 1.9276, + "step": 7072 + }, + { + "epoch": 0.7460970464135022, + "grad_norm": 0.41486087441444397, + "learning_rate": 0.00042419744884064083, + "loss": 1.9142, + "step": 7073 + }, + { + "epoch": 0.7462025316455696, + "grad_norm": 0.39280474185943604, + "learning_rate": 0.00042360909850782324, + "loss": 1.8875, + "step": 7074 + }, + { + "epoch": 0.7463080168776371, + "grad_norm": 0.39921557903289795, + "learning_rate": 0.0004230215642009273, + "loss": 1.8803, + "step": 7075 + }, + { + "epoch": 0.7464135021097047, + "grad_norm": 0.3604714870452881, + "learning_rate": 0.0004224348447881472, + "loss": 1.9017, + "step": 7076 + }, + { + "epoch": 0.7465189873417721, + "grad_norm": 0.3941001892089844, + "learning_rate": 0.000421848939139247, + "loss": 1.8967, + "step": 7077 + }, + { + "epoch": 0.7466244725738397, + "grad_norm": 0.365188330411911, + "learning_rate": 0.0004212638461255582, + "loss": 1.9244, + "step": 7078 + }, + { + "epoch": 0.7467299578059071, + "grad_norm": 0.3381135165691376, + "learning_rate": 0.0004206795646199778, + "loss": 1.9145, + "step": 7079 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.4405667185783386, + "learning_rate": 0.00042009609349696626, + "loss": 1.9384, + "step": 7080 + }, + { + "epoch": 0.7469409282700422, + "grad_norm": 0.3814393877983093, + "learning_rate": 0.00041951343163254497, + "loss": 1.8902, + "step": 7081 + }, + { + "epoch": 0.7470464135021097, + "grad_norm": 0.37079092860221863, + "learning_rate": 0.0004189315779042942, + "loss": 1.9277, + "step": 7082 + }, + { + "epoch": 0.7471518987341772, + "grad_norm": 0.385856032371521, + "learning_rate": 0.00041835053119135095, + "loss": 1.9198, + "step": 7083 + }, + { + "epoch": 0.7472573839662447, + "grad_norm": 0.4035007953643799, + "learning_rate": 0.00041777029037440695, + "loss": 1.8963, + "step": 7084 + }, + { + "epoch": 0.7473628691983122, + "grad_norm": 0.4107097387313843, + "learning_rate": 0.00041719085433570657, + "loss": 1.9199, + "step": 7085 + }, + { + "epoch": 0.7474683544303797, + "grad_norm": 0.3480037450790405, + "learning_rate": 0.0004166122219590441, + "loss": 1.8952, + "step": 7086 + }, + { + "epoch": 0.7475738396624473, + "grad_norm": 0.3577502965927124, + "learning_rate": 0.00041603439212976217, + "loss": 1.948, + "step": 7087 + }, + { + "epoch": 0.7476793248945147, + "grad_norm": 0.3554288148880005, + "learning_rate": 0.00041545736373474934, + "loss": 1.9018, + "step": 7088 + }, + { + "epoch": 0.7477848101265823, + "grad_norm": 0.40808290243148804, + "learning_rate": 0.0004148811356624379, + "loss": 1.9285, + "step": 7089 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.3618568778038025, + "learning_rate": 0.00041430570680280233, + "loss": 1.8514, + "step": 7090 + }, + { + "epoch": 0.7479957805907173, + "grad_norm": 0.34585073590278625, + "learning_rate": 0.00041373107604735626, + "loss": 1.8892, + "step": 7091 + }, + { + "epoch": 0.7481012658227848, + "grad_norm": 0.3591282367706299, + "learning_rate": 0.00041315724228915075, + "loss": 1.8975, + "step": 7092 + }, + { + "epoch": 0.7482067510548523, + "grad_norm": 0.35558760166168213, + "learning_rate": 0.00041258420442277235, + "loss": 1.9042, + "step": 7093 + }, + { + "epoch": 0.7483122362869198, + "grad_norm": 0.37308335304260254, + "learning_rate": 0.0004120119613443408, + "loss": 1.9174, + "step": 7094 + }, + { + "epoch": 0.7484177215189873, + "grad_norm": 0.36754101514816284, + "learning_rate": 0.00041144051195150685, + "loss": 1.9108, + "step": 7095 + }, + { + "epoch": 0.7485232067510549, + "grad_norm": 0.3356092870235443, + "learning_rate": 0.00041086985514345004, + "loss": 1.9076, + "step": 7096 + }, + { + "epoch": 0.7486286919831223, + "grad_norm": 0.38330039381980896, + "learning_rate": 0.0004102999898208767, + "loss": 1.9144, + "step": 7097 + }, + { + "epoch": 0.7487341772151899, + "grad_norm": 0.3793129026889801, + "learning_rate": 0.00040973091488601815, + "loss": 1.9243, + "step": 7098 + }, + { + "epoch": 0.7488396624472574, + "grad_norm": 0.3287757933139801, + "learning_rate": 0.0004091626292426282, + "loss": 1.9091, + "step": 7099 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.3549636900424957, + "learning_rate": 0.0004085951317959809, + "loss": 1.8956, + "step": 7100 + }, + { + "epoch": 0.7490506329113924, + "grad_norm": 0.3931213319301605, + "learning_rate": 0.0004080284214528687, + "loss": 1.9496, + "step": 7101 + }, + { + "epoch": 0.74915611814346, + "grad_norm": 0.3510752320289612, + "learning_rate": 0.00040746249712160065, + "loss": 1.9385, + "step": 7102 + }, + { + "epoch": 0.7492616033755274, + "grad_norm": 0.3366512954235077, + "learning_rate": 0.0004068973577119993, + "loss": 1.9016, + "step": 7103 + }, + { + "epoch": 0.7493670886075949, + "grad_norm": 0.38492703437805176, + "learning_rate": 0.0004063330021354, + "loss": 1.8662, + "step": 7104 + }, + { + "epoch": 0.7494725738396625, + "grad_norm": 0.37224799394607544, + "learning_rate": 0.0004057694293046476, + "loss": 1.9159, + "step": 7105 + }, + { + "epoch": 0.7495780590717299, + "grad_norm": 0.3824895918369293, + "learning_rate": 0.00040520663813409474, + "loss": 1.9371, + "step": 7106 + }, + { + "epoch": 0.7496835443037975, + "grad_norm": 0.38862037658691406, + "learning_rate": 0.0004046446275396001, + "loss": 1.9106, + "step": 7107 + }, + { + "epoch": 0.749789029535865, + "grad_norm": 0.42576056718826294, + "learning_rate": 0.00040408339643852574, + "loss": 1.8918, + "step": 7108 + }, + { + "epoch": 0.7498945147679325, + "grad_norm": 0.39559778571128845, + "learning_rate": 0.0004035229437497357, + "loss": 1.9082, + "step": 7109 + }, + { + "epoch": 0.75, + "grad_norm": 0.43504875898361206, + "learning_rate": 0.00040296326839359315, + "loss": 1.9134, + "step": 7110 + }, + { + "epoch": 0.7501054852320675, + "grad_norm": 0.36316895484924316, + "learning_rate": 0.0004024043692919589, + "loss": 1.941, + "step": 7111 + }, + { + "epoch": 0.750210970464135, + "grad_norm": 0.4058597683906555, + "learning_rate": 0.000401846245368189, + "loss": 1.9006, + "step": 7112 + }, + { + "epoch": 0.7503164556962025, + "grad_norm": 0.3548565208911896, + "learning_rate": 0.00040128889554713273, + "loss": 1.9693, + "step": 7113 + }, + { + "epoch": 0.7504219409282701, + "grad_norm": 0.3398802578449249, + "learning_rate": 0.0004007323187551308, + "loss": 1.8783, + "step": 7114 + }, + { + "epoch": 0.7505274261603375, + "grad_norm": 0.4359436631202698, + "learning_rate": 0.0004001765139200129, + "loss": 1.9019, + "step": 7115 + }, + { + "epoch": 0.7506329113924051, + "grad_norm": 0.41044312715530396, + "learning_rate": 0.00039962147997109587, + "loss": 1.8878, + "step": 7116 + }, + { + "epoch": 0.7507383966244726, + "grad_norm": 0.37728607654571533, + "learning_rate": 0.00039906721583918124, + "loss": 1.9195, + "step": 7117 + }, + { + "epoch": 0.75084388185654, + "grad_norm": 0.43445852398872375, + "learning_rate": 0.0003985137204565541, + "loss": 1.8841, + "step": 7118 + }, + { + "epoch": 0.7509493670886076, + "grad_norm": 0.3962329626083374, + "learning_rate": 0.00039796099275697986, + "loss": 1.8773, + "step": 7119 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.3470785617828369, + "learning_rate": 0.000397409031675703, + "loss": 1.9413, + "step": 7120 + }, + { + "epoch": 0.7511603375527426, + "grad_norm": 0.4452926516532898, + "learning_rate": 0.0003968578361494449, + "loss": 1.9667, + "step": 7121 + }, + { + "epoch": 0.7512658227848101, + "grad_norm": 0.3939453065395355, + "learning_rate": 0.0003963074051164014, + "loss": 1.9221, + "step": 7122 + }, + { + "epoch": 0.7513713080168777, + "grad_norm": 0.35836362838745117, + "learning_rate": 0.0003957577375162413, + "loss": 1.9077, + "step": 7123 + }, + { + "epoch": 0.7514767932489451, + "grad_norm": 0.4112444221973419, + "learning_rate": 0.0003952088322901039, + "loss": 1.917, + "step": 7124 + }, + { + "epoch": 0.7515822784810127, + "grad_norm": 0.3728596568107605, + "learning_rate": 0.0003946606883805972, + "loss": 1.9379, + "step": 7125 + }, + { + "epoch": 0.7516877637130802, + "grad_norm": 0.4057784676551819, + "learning_rate": 0.0003941133047317957, + "loss": 1.939, + "step": 7126 + }, + { + "epoch": 0.7517932489451477, + "grad_norm": 0.44061529636383057, + "learning_rate": 0.0003935666802892382, + "loss": 1.9283, + "step": 7127 + }, + { + "epoch": 0.7518987341772152, + "grad_norm": 0.3681333065032959, + "learning_rate": 0.00039302081399992676, + "loss": 1.9197, + "step": 7128 + }, + { + "epoch": 0.7520042194092827, + "grad_norm": 0.3623766005039215, + "learning_rate": 0.0003924757048123232, + "loss": 1.8939, + "step": 7129 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.37397828698158264, + "learning_rate": 0.00039193135167634786, + "loss": 1.9215, + "step": 7130 + }, + { + "epoch": 0.7522151898734177, + "grad_norm": 0.40056878328323364, + "learning_rate": 0.000391387753543378, + "loss": 1.8912, + "step": 7131 + }, + { + "epoch": 0.7523206751054853, + "grad_norm": 0.3354543447494507, + "learning_rate": 0.0003908449093662446, + "loss": 1.8886, + "step": 7132 + }, + { + "epoch": 0.7524261603375527, + "grad_norm": 0.3981257975101471, + "learning_rate": 0.00039030281809923173, + "loss": 1.8779, + "step": 7133 + }, + { + "epoch": 0.7525316455696203, + "grad_norm": 0.38061079382896423, + "learning_rate": 0.00038976147869807345, + "loss": 1.8975, + "step": 7134 + }, + { + "epoch": 0.7526371308016878, + "grad_norm": 0.3679467439651489, + "learning_rate": 0.00038922089011995216, + "loss": 1.8785, + "step": 7135 + }, + { + "epoch": 0.7527426160337553, + "grad_norm": 0.35976773500442505, + "learning_rate": 0.0003886810513234966, + "loss": 1.9143, + "step": 7136 + }, + { + "epoch": 0.7528481012658228, + "grad_norm": 0.4080093502998352, + "learning_rate": 0.0003881419612687803, + "loss": 1.9253, + "step": 7137 + }, + { + "epoch": 0.7529535864978903, + "grad_norm": 0.35497763752937317, + "learning_rate": 0.0003876036189173186, + "loss": 1.9143, + "step": 7138 + }, + { + "epoch": 0.7530590717299578, + "grad_norm": 0.3749162554740906, + "learning_rate": 0.0003870660232320675, + "loss": 1.9081, + "step": 7139 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.4259759485721588, + "learning_rate": 0.00038652917317742123, + "loss": 1.8977, + "step": 7140 + }, + { + "epoch": 0.7532700421940929, + "grad_norm": 0.37746500968933105, + "learning_rate": 0.00038599306771921023, + "loss": 1.9124, + "step": 7141 + }, + { + "epoch": 0.7533755274261603, + "grad_norm": 0.41207802295684814, + "learning_rate": 0.00038545770582469976, + "loss": 1.8852, + "step": 7142 + }, + { + "epoch": 0.7534810126582279, + "grad_norm": 0.34840938448905945, + "learning_rate": 0.00038492308646258705, + "loss": 1.8812, + "step": 7143 + }, + { + "epoch": 0.7535864978902953, + "grad_norm": 0.39048826694488525, + "learning_rate": 0.0003843892086029999, + "loss": 1.9027, + "step": 7144 + }, + { + "epoch": 0.7536919831223629, + "grad_norm": 0.3566725254058838, + "learning_rate": 0.0003838560712174944, + "loss": 1.8941, + "step": 7145 + }, + { + "epoch": 0.7537974683544304, + "grad_norm": 0.38058382272720337, + "learning_rate": 0.00038332367327905293, + "loss": 1.9157, + "step": 7146 + }, + { + "epoch": 0.7539029535864978, + "grad_norm": 0.3765145242214203, + "learning_rate": 0.00038279201376208285, + "loss": 1.9135, + "step": 7147 + }, + { + "epoch": 0.7540084388185654, + "grad_norm": 0.4008599817752838, + "learning_rate": 0.00038226109164241355, + "loss": 1.8726, + "step": 7148 + }, + { + "epoch": 0.7541139240506329, + "grad_norm": 0.42834246158599854, + "learning_rate": 0.000381730905897295, + "loss": 1.8593, + "step": 7149 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.3808562755584717, + "learning_rate": 0.0003812014555053956, + "loss": 1.9142, + "step": 7150 + }, + { + "epoch": 0.7543248945147679, + "grad_norm": 0.4102194011211395, + "learning_rate": 0.0003806727394468004, + "loss": 1.8889, + "step": 7151 + }, + { + "epoch": 0.7544303797468355, + "grad_norm": 0.41627296805381775, + "learning_rate": 0.00038014475670300935, + "loss": 1.8978, + "step": 7152 + }, + { + "epoch": 0.7545358649789029, + "grad_norm": 0.3410526514053345, + "learning_rate": 0.0003796175062569345, + "loss": 1.8963, + "step": 7153 + }, + { + "epoch": 0.7546413502109705, + "grad_norm": 0.4025893807411194, + "learning_rate": 0.0003790909870928989, + "loss": 1.887, + "step": 7154 + }, + { + "epoch": 0.754746835443038, + "grad_norm": 0.33603736758232117, + "learning_rate": 0.0003785651981966342, + "loss": 1.9442, + "step": 7155 + }, + { + "epoch": 0.7548523206751054, + "grad_norm": 0.3718319237232208, + "learning_rate": 0.00037804013855527886, + "loss": 1.9167, + "step": 7156 + }, + { + "epoch": 0.754957805907173, + "grad_norm": 0.37419092655181885, + "learning_rate": 0.0003775158071573762, + "loss": 1.8613, + "step": 7157 + }, + { + "epoch": 0.7550632911392405, + "grad_norm": 0.38199713826179504, + "learning_rate": 0.0003769922029928723, + "loss": 1.9078, + "step": 7158 + }, + { + "epoch": 0.755168776371308, + "grad_norm": 0.39686518907546997, + "learning_rate": 0.0003764693250531141, + "loss": 1.8931, + "step": 7159 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.38563987612724304, + "learning_rate": 0.00037594717233084774, + "loss": 1.9506, + "step": 7160 + }, + { + "epoch": 0.7553797468354431, + "grad_norm": 0.42433395981788635, + "learning_rate": 0.0003754257438202162, + "loss": 1.9109, + "step": 7161 + }, + { + "epoch": 0.7554852320675105, + "grad_norm": 0.3654523491859436, + "learning_rate": 0.0003749050385167578, + "loss": 1.8881, + "step": 7162 + }, + { + "epoch": 0.755590717299578, + "grad_norm": 0.3795557916164398, + "learning_rate": 0.00037438505541740366, + "loss": 1.9014, + "step": 7163 + }, + { + "epoch": 0.7556962025316456, + "grad_norm": 0.39695340394973755, + "learning_rate": 0.0003738657935204763, + "loss": 1.9204, + "step": 7164 + }, + { + "epoch": 0.755801687763713, + "grad_norm": 0.38523250818252563, + "learning_rate": 0.00037334725182568764, + "loss": 1.9189, + "step": 7165 + }, + { + "epoch": 0.7559071729957806, + "grad_norm": 0.37618640065193176, + "learning_rate": 0.00037282942933413685, + "loss": 1.8964, + "step": 7166 + }, + { + "epoch": 0.7560126582278481, + "grad_norm": 0.45815929770469666, + "learning_rate": 0.00037231232504830866, + "loss": 1.8955, + "step": 7167 + }, + { + "epoch": 0.7561181434599156, + "grad_norm": 0.4128783345222473, + "learning_rate": 0.0003717959379720712, + "loss": 1.923, + "step": 7168 + }, + { + "epoch": 0.7562236286919831, + "grad_norm": 0.3692014813423157, + "learning_rate": 0.0003712802671106742, + "loss": 1.9377, + "step": 7169 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.41133105754852295, + "learning_rate": 0.0003707653114707471, + "loss": 1.9553, + "step": 7170 + }, + { + "epoch": 0.7564345991561181, + "grad_norm": 0.36350828409194946, + "learning_rate": 0.0003702510700602974, + "loss": 1.8894, + "step": 7171 + }, + { + "epoch": 0.7565400843881857, + "grad_norm": 0.38837140798568726, + "learning_rate": 0.00036973754188870803, + "loss": 1.9151, + "step": 7172 + }, + { + "epoch": 0.7566455696202532, + "grad_norm": 0.3514285981655121, + "learning_rate": 0.00036922472596673614, + "loss": 1.9012, + "step": 7173 + }, + { + "epoch": 0.7567510548523206, + "grad_norm": 0.3561309278011322, + "learning_rate": 0.0003687126213065109, + "loss": 1.8745, + "step": 7174 + }, + { + "epoch": 0.7568565400843882, + "grad_norm": 0.38238632678985596, + "learning_rate": 0.0003682012269215314, + "loss": 1.922, + "step": 7175 + }, + { + "epoch": 0.7569620253164557, + "grad_norm": 0.3368440270423889, + "learning_rate": 0.0003676905418266654, + "loss": 1.8761, + "step": 7176 + }, + { + "epoch": 0.7570675105485232, + "grad_norm": 0.3645894527435303, + "learning_rate": 0.00036718056503814674, + "loss": 1.9252, + "step": 7177 + }, + { + "epoch": 0.7571729957805907, + "grad_norm": 0.37520894408226013, + "learning_rate": 0.00036667129557357375, + "loss": 1.8738, + "step": 7178 + }, + { + "epoch": 0.7572784810126583, + "grad_norm": 0.39077815413475037, + "learning_rate": 0.0003661627324519073, + "loss": 1.9041, + "step": 7179 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.37002071738243103, + "learning_rate": 0.00036565487469346906, + "loss": 1.8977, + "step": 7180 + }, + { + "epoch": 0.7574894514767933, + "grad_norm": 0.42324700951576233, + "learning_rate": 0.0003651477213199393, + "loss": 1.8861, + "step": 7181 + }, + { + "epoch": 0.7575949367088608, + "grad_norm": 0.3418649733066559, + "learning_rate": 0.0003646412713543554, + "loss": 1.8999, + "step": 7182 + }, + { + "epoch": 0.7577004219409282, + "grad_norm": 0.3862384557723999, + "learning_rate": 0.0003641355238211096, + "loss": 1.8671, + "step": 7183 + }, + { + "epoch": 0.7578059071729958, + "grad_norm": 0.387445330619812, + "learning_rate": 0.0003636304777459472, + "loss": 1.9234, + "step": 7184 + }, + { + "epoch": 0.7579113924050633, + "grad_norm": 0.3869968056678772, + "learning_rate": 0.0003631261321559652, + "loss": 1.9221, + "step": 7185 + }, + { + "epoch": 0.7580168776371308, + "grad_norm": 0.3993377089500427, + "learning_rate": 0.0003626224860796095, + "loss": 1.8601, + "step": 7186 + }, + { + "epoch": 0.7581223628691983, + "grad_norm": 0.33464545011520386, + "learning_rate": 0.0003621195385466738, + "loss": 1.9138, + "step": 7187 + }, + { + "epoch": 0.7582278481012659, + "grad_norm": 0.36003509163856506, + "learning_rate": 0.0003616172885882972, + "loss": 1.8682, + "step": 7188 + }, + { + "epoch": 0.7583333333333333, + "grad_norm": 0.3584383428096771, + "learning_rate": 0.0003611157352369628, + "loss": 1.9232, + "step": 7189 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.3510622978210449, + "learning_rate": 0.0003606148775264958, + "loss": 1.9035, + "step": 7190 + }, + { + "epoch": 0.7585443037974684, + "grad_norm": 0.37859827280044556, + "learning_rate": 0.000360114714492061, + "loss": 1.899, + "step": 7191 + }, + { + "epoch": 0.7586497890295358, + "grad_norm": 0.34307438135147095, + "learning_rate": 0.0003596152451701616, + "loss": 1.8956, + "step": 7192 + }, + { + "epoch": 0.7587552742616034, + "grad_norm": 0.3805692791938782, + "learning_rate": 0.00035911646859863725, + "loss": 1.9087, + "step": 7193 + }, + { + "epoch": 0.7588607594936709, + "grad_norm": 0.36112070083618164, + "learning_rate": 0.00035861838381666194, + "loss": 1.9058, + "step": 7194 + }, + { + "epoch": 0.7589662447257384, + "grad_norm": 0.3741062581539154, + "learning_rate": 0.0003581209898647425, + "loss": 1.9439, + "step": 7195 + }, + { + "epoch": 0.7590717299578059, + "grad_norm": 0.38817036151885986, + "learning_rate": 0.0003576242857847163, + "loss": 1.9391, + "step": 7196 + }, + { + "epoch": 0.7591772151898735, + "grad_norm": 0.4081869125366211, + "learning_rate": 0.0003571282706197498, + "loss": 1.8791, + "step": 7197 + }, + { + "epoch": 0.7592827004219409, + "grad_norm": 0.3787235617637634, + "learning_rate": 0.0003566329434143366, + "loss": 1.9214, + "step": 7198 + }, + { + "epoch": 0.7593881856540085, + "grad_norm": 0.35406339168548584, + "learning_rate": 0.00035613830321429534, + "loss": 1.8863, + "step": 7199 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.4337584674358368, + "learning_rate": 0.0003556443490667684, + "loss": 1.8873, + "step": 7200 + }, + { + "epoch": 0.7595991561181434, + "grad_norm": 0.4150247275829315, + "learning_rate": 0.0003551510800202195, + "loss": 1.8816, + "step": 7201 + }, + { + "epoch": 0.759704641350211, + "grad_norm": 0.34472474455833435, + "learning_rate": 0.0003546584951244323, + "loss": 1.8858, + "step": 7202 + }, + { + "epoch": 0.7598101265822785, + "grad_norm": 0.3797995448112488, + "learning_rate": 0.00035416659343050807, + "loss": 1.9042, + "step": 7203 + }, + { + "epoch": 0.759915611814346, + "grad_norm": 0.3452579975128174, + "learning_rate": 0.0003536753739908646, + "loss": 1.9127, + "step": 7204 + }, + { + "epoch": 0.7600210970464135, + "grad_norm": 0.3564036786556244, + "learning_rate": 0.0003531848358592338, + "loss": 1.9321, + "step": 7205 + }, + { + "epoch": 0.7601265822784811, + "grad_norm": 0.36905407905578613, + "learning_rate": 0.00035269497809065976, + "loss": 1.9155, + "step": 7206 + }, + { + "epoch": 0.7602320675105485, + "grad_norm": 0.3611906170845032, + "learning_rate": 0.00035220579974149755, + "loss": 1.8973, + "step": 7207 + }, + { + "epoch": 0.760337552742616, + "grad_norm": 0.3700009882450104, + "learning_rate": 0.00035171729986941075, + "loss": 1.9217, + "step": 7208 + }, + { + "epoch": 0.7604430379746835, + "grad_norm": 0.3691650331020355, + "learning_rate": 0.00035122947753337037, + "loss": 1.8864, + "step": 7209 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.40713179111480713, + "learning_rate": 0.0003507423317936521, + "loss": 1.9119, + "step": 7210 + }, + { + "epoch": 0.7606540084388186, + "grad_norm": 0.3484981656074524, + "learning_rate": 0.0003502558617118353, + "loss": 1.9267, + "step": 7211 + }, + { + "epoch": 0.760759493670886, + "grad_norm": 0.3862965404987335, + "learning_rate": 0.0003497700663508009, + "loss": 1.8918, + "step": 7212 + }, + { + "epoch": 0.7608649789029536, + "grad_norm": 0.39419788122177124, + "learning_rate": 0.0003492849447747293, + "loss": 1.9034, + "step": 7213 + }, + { + "epoch": 0.7609704641350211, + "grad_norm": 0.36275503039360046, + "learning_rate": 0.00034880049604909933, + "loss": 1.9171, + "step": 7214 + }, + { + "epoch": 0.7610759493670886, + "grad_norm": 0.40584298968315125, + "learning_rate": 0.00034831671924068555, + "loss": 1.9025, + "step": 7215 + }, + { + "epoch": 0.7611814345991561, + "grad_norm": 0.38341689109802246, + "learning_rate": 0.00034783361341755707, + "loss": 1.9073, + "step": 7216 + }, + { + "epoch": 0.7612869198312237, + "grad_norm": 0.3408887982368469, + "learning_rate": 0.0003473511776490756, + "loss": 1.9267, + "step": 7217 + }, + { + "epoch": 0.7613924050632911, + "grad_norm": 0.3988606035709381, + "learning_rate": 0.00034686941100589336, + "loss": 1.937, + "step": 7218 + }, + { + "epoch": 0.7614978902953586, + "grad_norm": 0.4113406538963318, + "learning_rate": 0.0003463883125599521, + "loss": 1.8937, + "step": 7219 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.3340252935886383, + "learning_rate": 0.00034590788138448006, + "loss": 1.8955, + "step": 7220 + }, + { + "epoch": 0.7617088607594936, + "grad_norm": 0.3819078803062439, + "learning_rate": 0.0003454281165539914, + "loss": 1.9221, + "step": 7221 + }, + { + "epoch": 0.7618143459915612, + "grad_norm": 0.3814660906791687, + "learning_rate": 0.00034494901714428365, + "loss": 1.9045, + "step": 7222 + }, + { + "epoch": 0.7619198312236287, + "grad_norm": 0.37712225317955017, + "learning_rate": 0.0003444705822324364, + "loss": 1.8894, + "step": 7223 + }, + { + "epoch": 0.7620253164556962, + "grad_norm": 0.3378586173057556, + "learning_rate": 0.0003439928108968091, + "loss": 1.9004, + "step": 7224 + }, + { + "epoch": 0.7621308016877637, + "grad_norm": 0.3854992687702179, + "learning_rate": 0.0003435157022170396, + "loss": 1.9272, + "step": 7225 + }, + { + "epoch": 0.7622362869198313, + "grad_norm": 0.35618939995765686, + "learning_rate": 0.0003430392552740422, + "loss": 1.899, + "step": 7226 + }, + { + "epoch": 0.7623417721518987, + "grad_norm": 0.3478589355945587, + "learning_rate": 0.0003425634691500059, + "loss": 1.8844, + "step": 7227 + }, + { + "epoch": 0.7624472573839662, + "grad_norm": 0.36291268467903137, + "learning_rate": 0.0003420883429283929, + "loss": 1.9118, + "step": 7228 + }, + { + "epoch": 0.7625527426160338, + "grad_norm": 0.36949604749679565, + "learning_rate": 0.00034161387569393647, + "loss": 1.9462, + "step": 7229 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.34631019830703735, + "learning_rate": 0.0003411400665326393, + "loss": 1.903, + "step": 7230 + }, + { + "epoch": 0.7627637130801688, + "grad_norm": 0.3376736342906952, + "learning_rate": 0.00034066691453177176, + "loss": 1.9066, + "step": 7231 + }, + { + "epoch": 0.7628691983122363, + "grad_norm": 0.3516145348548889, + "learning_rate": 0.00034019441877987015, + "loss": 1.8926, + "step": 7232 + }, + { + "epoch": 0.7629746835443038, + "grad_norm": 0.33224010467529297, + "learning_rate": 0.00033972257836673513, + "loss": 1.8812, + "step": 7233 + }, + { + "epoch": 0.7630801687763713, + "grad_norm": 0.3544415831565857, + "learning_rate": 0.00033925139238342956, + "loss": 1.9042, + "step": 7234 + }, + { + "epoch": 0.7631856540084389, + "grad_norm": 0.3571839928627014, + "learning_rate": 0.0003387808599222771, + "loss": 1.9154, + "step": 7235 + }, + { + "epoch": 0.7632911392405063, + "grad_norm": 0.3694862425327301, + "learning_rate": 0.0003383109800768603, + "loss": 1.896, + "step": 7236 + }, + { + "epoch": 0.7633966244725738, + "grad_norm": 0.3491717278957367, + "learning_rate": 0.0003378417519420187, + "loss": 1.9475, + "step": 7237 + }, + { + "epoch": 0.7635021097046414, + "grad_norm": 0.3421505093574524, + "learning_rate": 0.00033737317461384766, + "loss": 1.9317, + "step": 7238 + }, + { + "epoch": 0.7636075949367088, + "grad_norm": 0.3725544810295105, + "learning_rate": 0.00033690524718969593, + "loss": 1.8886, + "step": 7239 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.3669561743736267, + "learning_rate": 0.00033643796876816424, + "loss": 1.8867, + "step": 7240 + }, + { + "epoch": 0.7638185654008439, + "grad_norm": 0.3486732542514801, + "learning_rate": 0.0003359713384491037, + "loss": 1.8862, + "step": 7241 + }, + { + "epoch": 0.7639240506329114, + "grad_norm": 0.3732537031173706, + "learning_rate": 0.00033550535533361366, + "loss": 1.9361, + "step": 7242 + }, + { + "epoch": 0.7640295358649789, + "grad_norm": 0.37564486265182495, + "learning_rate": 0.0003350400185240405, + "loss": 1.9151, + "step": 7243 + }, + { + "epoch": 0.7641350210970465, + "grad_norm": 0.35006192326545715, + "learning_rate": 0.0003345753271239754, + "loss": 1.9008, + "step": 7244 + }, + { + "epoch": 0.7642405063291139, + "grad_norm": 0.39273202419281006, + "learning_rate": 0.00033411128023825296, + "loss": 1.8997, + "step": 7245 + }, + { + "epoch": 0.7643459915611814, + "grad_norm": 0.37656912207603455, + "learning_rate": 0.0003336478769729492, + "loss": 1.8829, + "step": 7246 + }, + { + "epoch": 0.764451476793249, + "grad_norm": 0.40631556510925293, + "learning_rate": 0.0003331851164353802, + "loss": 1.9251, + "step": 7247 + }, + { + "epoch": 0.7645569620253164, + "grad_norm": 0.4278174042701721, + "learning_rate": 0.00033272299773410007, + "loss": 1.8946, + "step": 7248 + }, + { + "epoch": 0.764662447257384, + "grad_norm": 0.3842338025569916, + "learning_rate": 0.0003322615199788993, + "loss": 1.936, + "step": 7249 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.38711124658584595, + "learning_rate": 0.000331800682280803, + "loss": 1.893, + "step": 7250 + }, + { + "epoch": 0.764873417721519, + "grad_norm": 0.3757840096950531, + "learning_rate": 0.00033134048375206944, + "loss": 1.9043, + "step": 7251 + }, + { + "epoch": 0.7649789029535865, + "grad_norm": 0.38113853335380554, + "learning_rate": 0.0003308809235061881, + "loss": 1.877, + "step": 7252 + }, + { + "epoch": 0.765084388185654, + "grad_norm": 0.4240224063396454, + "learning_rate": 0.000330422000657878, + "loss": 1.8939, + "step": 7253 + }, + { + "epoch": 0.7651898734177215, + "grad_norm": 0.3421115577220917, + "learning_rate": 0.00032996371432308605, + "loss": 1.9078, + "step": 7254 + }, + { + "epoch": 0.765295358649789, + "grad_norm": 0.4029121398925781, + "learning_rate": 0.00032950606361898527, + "loss": 1.8931, + "step": 7255 + }, + { + "epoch": 0.7654008438818566, + "grad_norm": 0.4123445749282837, + "learning_rate": 0.0003290490476639731, + "loss": 1.895, + "step": 7256 + }, + { + "epoch": 0.765506329113924, + "grad_norm": 0.35934826731681824, + "learning_rate": 0.00032859266557766996, + "loss": 1.9118, + "step": 7257 + }, + { + "epoch": 0.7656118143459916, + "grad_norm": 0.396348774433136, + "learning_rate": 0.000328136916480917, + "loss": 1.8816, + "step": 7258 + }, + { + "epoch": 0.7657172995780591, + "grad_norm": 0.35919371247291565, + "learning_rate": 0.00032768179949577516, + "loss": 1.9253, + "step": 7259 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.36618608236312866, + "learning_rate": 0.0003272273137455226, + "loss": 1.8671, + "step": 7260 + }, + { + "epoch": 0.7659282700421941, + "grad_norm": 0.37658214569091797, + "learning_rate": 0.0003267734583546536, + "loss": 1.8669, + "step": 7261 + }, + { + "epoch": 0.7660337552742617, + "grad_norm": 0.3619188070297241, + "learning_rate": 0.0003263202324488771, + "loss": 1.8747, + "step": 7262 + }, + { + "epoch": 0.7661392405063291, + "grad_norm": 0.3453660011291504, + "learning_rate": 0.0003258676351551143, + "loss": 1.9016, + "step": 7263 + }, + { + "epoch": 0.7662447257383966, + "grad_norm": 0.38379040360450745, + "learning_rate": 0.0003254156656014973, + "loss": 1.9299, + "step": 7264 + }, + { + "epoch": 0.7663502109704642, + "grad_norm": 0.37390223145484924, + "learning_rate": 0.0003249643229173677, + "loss": 1.8901, + "step": 7265 + }, + { + "epoch": 0.7664556962025316, + "grad_norm": 0.3773546516895294, + "learning_rate": 0.0003245136062332745, + "loss": 1.8591, + "step": 7266 + }, + { + "epoch": 0.7665611814345992, + "grad_norm": 0.3567565083503723, + "learning_rate": 0.0003240635146809727, + "loss": 1.9085, + "step": 7267 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 0.3836156129837036, + "learning_rate": 0.0003236140473934215, + "loss": 1.9163, + "step": 7268 + }, + { + "epoch": 0.7667721518987342, + "grad_norm": 0.377393513917923, + "learning_rate": 0.0003231652035047826, + "loss": 1.9099, + "step": 7269 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.3833286464214325, + "learning_rate": 0.00032271698215041863, + "loss": 1.9258, + "step": 7270 + }, + { + "epoch": 0.7669831223628693, + "grad_norm": 0.3783455491065979, + "learning_rate": 0.00032226938246689157, + "loss": 1.9098, + "step": 7271 + }, + { + "epoch": 0.7670886075949367, + "grad_norm": 0.4104682207107544, + "learning_rate": 0.00032182240359196083, + "loss": 1.894, + "step": 7272 + }, + { + "epoch": 0.7671940928270042, + "grad_norm": 0.3487912714481354, + "learning_rate": 0.0003213760446645818, + "loss": 1.8895, + "step": 7273 + }, + { + "epoch": 0.7672995780590718, + "grad_norm": 0.37795189023017883, + "learning_rate": 0.00032093030482490396, + "loss": 1.8684, + "step": 7274 + }, + { + "epoch": 0.7674050632911392, + "grad_norm": 0.43773433566093445, + "learning_rate": 0.00032048518321426946, + "loss": 1.8971, + "step": 7275 + }, + { + "epoch": 0.7675105485232068, + "grad_norm": 0.3842078447341919, + "learning_rate": 0.0003200406789752116, + "loss": 1.9342, + "step": 7276 + }, + { + "epoch": 0.7676160337552742, + "grad_norm": 0.3955802023410797, + "learning_rate": 0.00031959679125145277, + "loss": 1.878, + "step": 7277 + }, + { + "epoch": 0.7677215189873418, + "grad_norm": 0.4182548522949219, + "learning_rate": 0.0003191535191879029, + "loss": 1.9145, + "step": 7278 + }, + { + "epoch": 0.7678270042194093, + "grad_norm": 0.4172656536102295, + "learning_rate": 0.000318710861930658, + "loss": 1.8902, + "step": 7279 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.4059489071369171, + "learning_rate": 0.0003182688186269984, + "loss": 1.879, + "step": 7280 + }, + { + "epoch": 0.7680379746835443, + "grad_norm": 0.48098650574684143, + "learning_rate": 0.0003178273884253874, + "loss": 1.936, + "step": 7281 + }, + { + "epoch": 0.7681434599156118, + "grad_norm": 0.36209845542907715, + "learning_rate": 0.0003173865704754689, + "loss": 1.9303, + "step": 7282 + }, + { + "epoch": 0.7682489451476793, + "grad_norm": 0.37829071283340454, + "learning_rate": 0.0003169463639280665, + "loss": 1.9046, + "step": 7283 + }, + { + "epoch": 0.7683544303797468, + "grad_norm": 0.46617162227630615, + "learning_rate": 0.00031650676793518157, + "loss": 1.8872, + "step": 7284 + }, + { + "epoch": 0.7684599156118144, + "grad_norm": 0.33938589692115784, + "learning_rate": 0.00031606778164999143, + "loss": 1.9085, + "step": 7285 + }, + { + "epoch": 0.7685654008438818, + "grad_norm": 0.36941564083099365, + "learning_rate": 0.00031562940422684833, + "loss": 1.899, + "step": 7286 + }, + { + "epoch": 0.7686708860759494, + "grad_norm": 0.42645418643951416, + "learning_rate": 0.00031519163482127696, + "loss": 1.9016, + "step": 7287 + }, + { + "epoch": 0.7687763713080169, + "grad_norm": 0.40327543020248413, + "learning_rate": 0.00031475447258997355, + "loss": 1.8852, + "step": 7288 + }, + { + "epoch": 0.7688818565400843, + "grad_norm": 0.35218286514282227, + "learning_rate": 0.0003143179166908038, + "loss": 1.9334, + "step": 7289 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.38843417167663574, + "learning_rate": 0.0003138819662828017, + "loss": 1.8921, + "step": 7290 + }, + { + "epoch": 0.7690928270042194, + "grad_norm": 0.36622947454452515, + "learning_rate": 0.0003134466205261674, + "loss": 1.9517, + "step": 7291 + }, + { + "epoch": 0.7691983122362869, + "grad_norm": 0.3740665912628174, + "learning_rate": 0.0003130118785822658, + "loss": 1.9051, + "step": 7292 + }, + { + "epoch": 0.7693037974683544, + "grad_norm": 0.3730669617652893, + "learning_rate": 0.0003125777396136251, + "loss": 1.8821, + "step": 7293 + }, + { + "epoch": 0.769409282700422, + "grad_norm": 0.386014461517334, + "learning_rate": 0.00031214420278393487, + "loss": 1.8848, + "step": 7294 + }, + { + "epoch": 0.7695147679324894, + "grad_norm": 0.3818463385105133, + "learning_rate": 0.00031171126725804496, + "loss": 1.9468, + "step": 7295 + }, + { + "epoch": 0.769620253164557, + "grad_norm": 0.4206986725330353, + "learning_rate": 0.0003112789322019633, + "loss": 1.9057, + "step": 7296 + }, + { + "epoch": 0.7697257383966245, + "grad_norm": 0.39589425921440125, + "learning_rate": 0.0003108471967828545, + "loss": 1.9149, + "step": 7297 + }, + { + "epoch": 0.7698312236286919, + "grad_norm": 0.3523927927017212, + "learning_rate": 0.00031041606016903847, + "loss": 1.8775, + "step": 7298 + }, + { + "epoch": 0.7699367088607595, + "grad_norm": 0.37647902965545654, + "learning_rate": 0.00030998552152998834, + "loss": 1.8947, + "step": 7299 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.43973803520202637, + "learning_rate": 0.00030955558003632966, + "loss": 1.9294, + "step": 7300 + }, + { + "epoch": 0.7701476793248945, + "grad_norm": 0.37864989042282104, + "learning_rate": 0.0003091262348598378, + "loss": 1.9001, + "step": 7301 + }, + { + "epoch": 0.770253164556962, + "grad_norm": 0.37447690963745117, + "learning_rate": 0.00030869748517343705, + "loss": 1.858, + "step": 7302 + }, + { + "epoch": 0.7703586497890296, + "grad_norm": 0.4218522310256958, + "learning_rate": 0.000308269330151199, + "loss": 1.8941, + "step": 7303 + }, + { + "epoch": 0.770464135021097, + "grad_norm": 0.37844154238700867, + "learning_rate": 0.00030784176896834033, + "loss": 1.9012, + "step": 7304 + }, + { + "epoch": 0.7705696202531646, + "grad_norm": 0.3915519714355469, + "learning_rate": 0.0003074148008012223, + "loss": 1.8762, + "step": 7305 + }, + { + "epoch": 0.7706751054852321, + "grad_norm": 0.37242236733436584, + "learning_rate": 0.00030698842482734806, + "loss": 1.9138, + "step": 7306 + }, + { + "epoch": 0.7707805907172995, + "grad_norm": 0.3595430254936218, + "learning_rate": 0.0003065626402253616, + "loss": 1.8973, + "step": 7307 + }, + { + "epoch": 0.7708860759493671, + "grad_norm": 0.36156216263771057, + "learning_rate": 0.00030613744617504624, + "loss": 1.8975, + "step": 7308 + }, + { + "epoch": 0.7709915611814346, + "grad_norm": 0.3801517188549042, + "learning_rate": 0.00030571284185732276, + "loss": 1.9114, + "step": 7309 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.3840555250644684, + "learning_rate": 0.0003052888264542483, + "loss": 1.8738, + "step": 7310 + }, + { + "epoch": 0.7712025316455696, + "grad_norm": 0.3569415211677551, + "learning_rate": 0.0003048653991490141, + "loss": 1.8945, + "step": 7311 + }, + { + "epoch": 0.7713080168776372, + "grad_norm": 0.36957356333732605, + "learning_rate": 0.0003044425591259445, + "loss": 1.9247, + "step": 7312 + }, + { + "epoch": 0.7714135021097046, + "grad_norm": 0.3917827606201172, + "learning_rate": 0.0003040203055704949, + "loss": 1.9084, + "step": 7313 + }, + { + "epoch": 0.7715189873417722, + "grad_norm": 0.3612488806247711, + "learning_rate": 0.000303598637669251, + "loss": 1.8826, + "step": 7314 + }, + { + "epoch": 0.7716244725738397, + "grad_norm": 0.35628941655158997, + "learning_rate": 0.0003031775546099261, + "loss": 1.8965, + "step": 7315 + }, + { + "epoch": 0.7717299578059071, + "grad_norm": 0.3759494721889496, + "learning_rate": 0.0003027570555813604, + "loss": 1.912, + "step": 7316 + }, + { + "epoch": 0.7718354430379747, + "grad_norm": 0.37511664628982544, + "learning_rate": 0.00030233713977351906, + "loss": 1.8542, + "step": 7317 + }, + { + "epoch": 0.7719409282700422, + "grad_norm": 0.35427024960517883, + "learning_rate": 0.00030191780637749084, + "loss": 1.8692, + "step": 7318 + }, + { + "epoch": 0.7720464135021097, + "grad_norm": 0.37146443128585815, + "learning_rate": 0.0003014990545854864, + "loss": 1.8917, + "step": 7319 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.36504805088043213, + "learning_rate": 0.0003010808835908368, + "loss": 1.8805, + "step": 7320 + }, + { + "epoch": 0.7722573839662448, + "grad_norm": 0.39247989654541016, + "learning_rate": 0.00030066329258799187, + "loss": 1.9347, + "step": 7321 + }, + { + "epoch": 0.7723628691983122, + "grad_norm": 0.405710369348526, + "learning_rate": 0.0003002462807725186, + "loss": 1.8804, + "step": 7322 + }, + { + "epoch": 0.7724683544303798, + "grad_norm": 0.38062670826911926, + "learning_rate": 0.00029982984734109995, + "loss": 1.8771, + "step": 7323 + }, + { + "epoch": 0.7725738396624473, + "grad_norm": 0.37446466088294983, + "learning_rate": 0.00029941399149153303, + "loss": 1.8863, + "step": 7324 + }, + { + "epoch": 0.7726793248945147, + "grad_norm": 0.3811671733856201, + "learning_rate": 0.00029899871242272745, + "loss": 1.9275, + "step": 7325 + }, + { + "epoch": 0.7727848101265823, + "grad_norm": 0.3827137351036072, + "learning_rate": 0.000298584009334704, + "loss": 1.8931, + "step": 7326 + }, + { + "epoch": 0.7728902953586498, + "grad_norm": 0.4069504141807556, + "learning_rate": 0.00029816988142859286, + "loss": 1.8695, + "step": 7327 + }, + { + "epoch": 0.7729957805907173, + "grad_norm": 0.3745785653591156, + "learning_rate": 0.0002977563279066324, + "loss": 1.8679, + "step": 7328 + }, + { + "epoch": 0.7731012658227848, + "grad_norm": 0.36749833822250366, + "learning_rate": 0.0002973433479721675, + "loss": 1.8599, + "step": 7329 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.40950360894203186, + "learning_rate": 0.00029693094082964785, + "loss": 1.9329, + "step": 7330 + }, + { + "epoch": 0.7733122362869198, + "grad_norm": 0.3696414530277252, + "learning_rate": 0.0002965191056846266, + "loss": 1.876, + "step": 7331 + }, + { + "epoch": 0.7734177215189874, + "grad_norm": 0.4004625380039215, + "learning_rate": 0.0002961078417437587, + "loss": 1.9026, + "step": 7332 + }, + { + "epoch": 0.7735232067510549, + "grad_norm": 0.39905649423599243, + "learning_rate": 0.0002956971482147996, + "loss": 1.9137, + "step": 7333 + }, + { + "epoch": 0.7736286919831223, + "grad_norm": 0.36316177248954773, + "learning_rate": 0.0002952870243066035, + "loss": 1.8841, + "step": 7334 + }, + { + "epoch": 0.7737341772151899, + "grad_norm": 0.3827871084213257, + "learning_rate": 0.00029487746922912173, + "loss": 1.9306, + "step": 7335 + }, + { + "epoch": 0.7738396624472574, + "grad_norm": 0.37171733379364014, + "learning_rate": 0.00029446848219340173, + "loss": 1.873, + "step": 7336 + }, + { + "epoch": 0.7739451476793249, + "grad_norm": 0.32705092430114746, + "learning_rate": 0.00029406006241158487, + "loss": 1.8784, + "step": 7337 + }, + { + "epoch": 0.7740506329113924, + "grad_norm": 0.38370177149772644, + "learning_rate": 0.0002936522090969055, + "loss": 1.8935, + "step": 7338 + }, + { + "epoch": 0.77415611814346, + "grad_norm": 0.38411420583724976, + "learning_rate": 0.00029324492146368906, + "loss": 1.911, + "step": 7339 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.36384060978889465, + "learning_rate": 0.0002928381987273508, + "loss": 1.8901, + "step": 7340 + }, + { + "epoch": 0.774367088607595, + "grad_norm": 0.3661918044090271, + "learning_rate": 0.000292432040104394, + "loss": 1.8745, + "step": 7341 + }, + { + "epoch": 0.7744725738396624, + "grad_norm": 0.35422465205192566, + "learning_rate": 0.00029202644481240867, + "loss": 1.888, + "step": 7342 + }, + { + "epoch": 0.7745780590717299, + "grad_norm": 0.40527528524398804, + "learning_rate": 0.0002916214120700702, + "loss": 1.8722, + "step": 7343 + }, + { + "epoch": 0.7746835443037975, + "grad_norm": 0.3570922315120697, + "learning_rate": 0.00029121694109713757, + "loss": 1.9208, + "step": 7344 + }, + { + "epoch": 0.7747890295358649, + "grad_norm": 0.34584808349609375, + "learning_rate": 0.0002908130311144518, + "loss": 1.8831, + "step": 7345 + }, + { + "epoch": 0.7748945147679325, + "grad_norm": 0.38353896141052246, + "learning_rate": 0.0002904096813439346, + "loss": 1.8488, + "step": 7346 + }, + { + "epoch": 0.775, + "grad_norm": 0.3800409734249115, + "learning_rate": 0.00029000689100858694, + "loss": 1.9192, + "step": 7347 + }, + { + "epoch": 0.7751054852320675, + "grad_norm": 0.38521426916122437, + "learning_rate": 0.0002896046593324875, + "loss": 1.9207, + "step": 7348 + }, + { + "epoch": 0.775210970464135, + "grad_norm": 0.36362114548683167, + "learning_rate": 0.00028920298554079113, + "loss": 1.8976, + "step": 7349 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.3596227765083313, + "learning_rate": 0.0002888018688597272, + "loss": 1.8699, + "step": 7350 + }, + { + "epoch": 0.77542194092827, + "grad_norm": 0.3591630756855011, + "learning_rate": 0.00028840130851659853, + "loss": 1.8723, + "step": 7351 + }, + { + "epoch": 0.7755274261603375, + "grad_norm": 0.37629133462905884, + "learning_rate": 0.00028800130373977934, + "loss": 1.9205, + "step": 7352 + }, + { + "epoch": 0.7756329113924051, + "grad_norm": 0.3573421537876129, + "learning_rate": 0.00028760185375871445, + "loss": 1.9003, + "step": 7353 + }, + { + "epoch": 0.7757383966244725, + "grad_norm": 0.3555057644844055, + "learning_rate": 0.0002872029578039172, + "loss": 1.8721, + "step": 7354 + }, + { + "epoch": 0.7758438818565401, + "grad_norm": 0.35910919308662415, + "learning_rate": 0.0002868046151069681, + "loss": 1.8969, + "step": 7355 + }, + { + "epoch": 0.7759493670886076, + "grad_norm": 0.38547998666763306, + "learning_rate": 0.0002864068249005136, + "loss": 1.9306, + "step": 7356 + }, + { + "epoch": 0.7760548523206751, + "grad_norm": 0.3538517951965332, + "learning_rate": 0.0002860095864182644, + "loss": 1.8992, + "step": 7357 + }, + { + "epoch": 0.7761603375527426, + "grad_norm": 0.3801458179950714, + "learning_rate": 0.00028561289889499417, + "loss": 1.898, + "step": 7358 + }, + { + "epoch": 0.7762658227848102, + "grad_norm": 0.3678768575191498, + "learning_rate": 0.00028521676156653756, + "loss": 1.9089, + "step": 7359 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.3613553047180176, + "learning_rate": 0.0002848211736697894, + "loss": 1.8813, + "step": 7360 + }, + { + "epoch": 0.7764767932489451, + "grad_norm": 0.3727304935455322, + "learning_rate": 0.0002844261344427028, + "loss": 1.9151, + "step": 7361 + }, + { + "epoch": 0.7765822784810127, + "grad_norm": 0.34561753273010254, + "learning_rate": 0.000284031643124288, + "loss": 1.9165, + "step": 7362 + }, + { + "epoch": 0.7766877637130801, + "grad_norm": 0.3717995882034302, + "learning_rate": 0.00028363769895461044, + "loss": 1.8668, + "step": 7363 + }, + { + "epoch": 0.7767932489451477, + "grad_norm": 0.3274199664592743, + "learning_rate": 0.00028324430117478974, + "loss": 1.8649, + "step": 7364 + }, + { + "epoch": 0.7768987341772152, + "grad_norm": 0.3312003016471863, + "learning_rate": 0.0002828514490269979, + "loss": 1.8901, + "step": 7365 + }, + { + "epoch": 0.7770042194092827, + "grad_norm": 0.3396450877189636, + "learning_rate": 0.0002824591417544582, + "loss": 1.8815, + "step": 7366 + }, + { + "epoch": 0.7771097046413502, + "grad_norm": 0.33892500400543213, + "learning_rate": 0.0002820673786014436, + "loss": 1.8804, + "step": 7367 + }, + { + "epoch": 0.7772151898734178, + "grad_norm": 0.3540956377983093, + "learning_rate": 0.00028167615881327494, + "loss": 1.8569, + "step": 7368 + }, + { + "epoch": 0.7773206751054852, + "grad_norm": 0.37293460965156555, + "learning_rate": 0.00028128548163632006, + "loss": 1.9245, + "step": 7369 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.35530808568000793, + "learning_rate": 0.00028089534631799183, + "loss": 1.9357, + "step": 7370 + }, + { + "epoch": 0.7775316455696203, + "grad_norm": 0.3816775679588318, + "learning_rate": 0.0002805057521067471, + "loss": 1.9285, + "step": 7371 + }, + { + "epoch": 0.7776371308016877, + "grad_norm": 0.35138171911239624, + "learning_rate": 0.0002801166982520851, + "loss": 1.913, + "step": 7372 + }, + { + "epoch": 0.7777426160337553, + "grad_norm": 0.376354455947876, + "learning_rate": 0.000279728184004546, + "loss": 1.8604, + "step": 7373 + }, + { + "epoch": 0.7778481012658228, + "grad_norm": 0.3609488606452942, + "learning_rate": 0.0002793402086157093, + "loss": 1.9029, + "step": 7374 + }, + { + "epoch": 0.7779535864978903, + "grad_norm": 0.3610442876815796, + "learning_rate": 0.0002789527713381925, + "loss": 1.9175, + "step": 7375 + }, + { + "epoch": 0.7780590717299578, + "grad_norm": 0.35376977920532227, + "learning_rate": 0.00027856587142565005, + "loss": 1.8562, + "step": 7376 + }, + { + "epoch": 0.7781645569620254, + "grad_norm": 0.35858190059661865, + "learning_rate": 0.0002781795081327712, + "loss": 1.8738, + "step": 7377 + }, + { + "epoch": 0.7782700421940928, + "grad_norm": 0.3557851314544678, + "learning_rate": 0.0002777936807152791, + "loss": 1.868, + "step": 7378 + }, + { + "epoch": 0.7783755274261603, + "grad_norm": 0.3694247603416443, + "learning_rate": 0.0002774083884299292, + "loss": 1.9103, + "step": 7379 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.3781526982784271, + "learning_rate": 0.0002770236305345076, + "loss": 1.8835, + "step": 7380 + }, + { + "epoch": 0.7785864978902953, + "grad_norm": 0.35883623361587524, + "learning_rate": 0.00027663940628783017, + "loss": 1.8893, + "step": 7381 + }, + { + "epoch": 0.7786919831223629, + "grad_norm": 0.3467499017715454, + "learning_rate": 0.0002762557149497405, + "loss": 1.9604, + "step": 7382 + }, + { + "epoch": 0.7787974683544304, + "grad_norm": 0.359209805727005, + "learning_rate": 0.00027587255578110894, + "loss": 1.8961, + "step": 7383 + }, + { + "epoch": 0.7789029535864979, + "grad_norm": 0.34856072068214417, + "learning_rate": 0.0002754899280438309, + "loss": 1.8931, + "step": 7384 + }, + { + "epoch": 0.7790084388185654, + "grad_norm": 0.3592338562011719, + "learning_rate": 0.0002751078310008254, + "loss": 1.8966, + "step": 7385 + }, + { + "epoch": 0.779113924050633, + "grad_norm": 0.3296029567718506, + "learning_rate": 0.0002747262639160341, + "loss": 1.8363, + "step": 7386 + }, + { + "epoch": 0.7792194092827004, + "grad_norm": 0.359417200088501, + "learning_rate": 0.0002743452260544193, + "loss": 1.9072, + "step": 7387 + }, + { + "epoch": 0.7793248945147679, + "grad_norm": 0.3820835053920746, + "learning_rate": 0.0002739647166819628, + "loss": 1.8946, + "step": 7388 + }, + { + "epoch": 0.7794303797468355, + "grad_norm": 0.36948391795158386, + "learning_rate": 0.0002735847350656645, + "loss": 1.8946, + "step": 7389 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.34903794527053833, + "learning_rate": 0.00027320528047354093, + "loss": 1.8796, + "step": 7390 + }, + { + "epoch": 0.7796413502109705, + "grad_norm": 0.36495453119277954, + "learning_rate": 0.00027282635217462393, + "loss": 1.8778, + "step": 7391 + }, + { + "epoch": 0.779746835443038, + "grad_norm": 0.343237966299057, + "learning_rate": 0.0002724479494389592, + "loss": 1.9142, + "step": 7392 + }, + { + "epoch": 0.7798523206751055, + "grad_norm": 0.38693419098854065, + "learning_rate": 0.00027207007153760463, + "loss": 1.8778, + "step": 7393 + }, + { + "epoch": 0.779957805907173, + "grad_norm": 0.33597153425216675, + "learning_rate": 0.0002716927177426294, + "loss": 1.8939, + "step": 7394 + }, + { + "epoch": 0.7800632911392406, + "grad_norm": 0.3777642548084259, + "learning_rate": 0.0002713158873271122, + "loss": 1.8724, + "step": 7395 + }, + { + "epoch": 0.780168776371308, + "grad_norm": 0.3485106825828552, + "learning_rate": 0.00027093957956513985, + "loss": 1.9438, + "step": 7396 + }, + { + "epoch": 0.7802742616033755, + "grad_norm": 0.3401412069797516, + "learning_rate": 0.0002705637937318062, + "loss": 1.9003, + "step": 7397 + }, + { + "epoch": 0.7803797468354431, + "grad_norm": 0.36115342378616333, + "learning_rate": 0.00027018852910321045, + "loss": 1.8845, + "step": 7398 + }, + { + "epoch": 0.7804852320675105, + "grad_norm": 0.37098297476768494, + "learning_rate": 0.0002698137849564556, + "loss": 1.9127, + "step": 7399 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.36476486921310425, + "learning_rate": 0.00026943956056964773, + "loss": 1.9122, + "step": 7400 + }, + { + "epoch": 0.7806962025316456, + "grad_norm": 0.3488933742046356, + "learning_rate": 0.0002690658552218937, + "loss": 1.8739, + "step": 7401 + }, + { + "epoch": 0.7808016877637131, + "grad_norm": 0.3388707935810089, + "learning_rate": 0.0002686926681933006, + "loss": 1.92, + "step": 7402 + }, + { + "epoch": 0.7809071729957806, + "grad_norm": 0.3565027415752411, + "learning_rate": 0.00026831999876497376, + "loss": 1.8835, + "step": 7403 + }, + { + "epoch": 0.7810126582278482, + "grad_norm": 0.326692670583725, + "learning_rate": 0.00026794784621901564, + "loss": 1.8964, + "step": 7404 + }, + { + "epoch": 0.7811181434599156, + "grad_norm": 0.33897024393081665, + "learning_rate": 0.0002675762098385246, + "loss": 1.8661, + "step": 7405 + }, + { + "epoch": 0.7812236286919831, + "grad_norm": 0.35949477553367615, + "learning_rate": 0.000267205088907593, + "loss": 1.8684, + "step": 7406 + }, + { + "epoch": 0.7813291139240506, + "grad_norm": 0.34453141689300537, + "learning_rate": 0.00026683448271130645, + "loss": 1.8897, + "step": 7407 + }, + { + "epoch": 0.7814345991561181, + "grad_norm": 0.38825786113739014, + "learning_rate": 0.0002664643905357418, + "loss": 1.8857, + "step": 7408 + }, + { + "epoch": 0.7815400843881857, + "grad_norm": 0.3499142527580261, + "learning_rate": 0.0002660948116679665, + "loss": 1.908, + "step": 7409 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.33237674832344055, + "learning_rate": 0.0002657257453960364, + "loss": 1.8791, + "step": 7410 + }, + { + "epoch": 0.7817510548523207, + "grad_norm": 0.36553895473480225, + "learning_rate": 0.00026535719100899516, + "loss": 1.906, + "step": 7411 + }, + { + "epoch": 0.7818565400843882, + "grad_norm": 0.3761415481567383, + "learning_rate": 0.00026498914779687227, + "loss": 1.9031, + "step": 7412 + }, + { + "epoch": 0.7819620253164556, + "grad_norm": 0.37876343727111816, + "learning_rate": 0.000264621615050682, + "loss": 1.8987, + "step": 7413 + }, + { + "epoch": 0.7820675105485232, + "grad_norm": 0.3541346788406372, + "learning_rate": 0.0002642545920624219, + "loss": 1.908, + "step": 7414 + }, + { + "epoch": 0.7821729957805907, + "grad_norm": 0.39901018142700195, + "learning_rate": 0.0002638880781250718, + "loss": 1.8696, + "step": 7415 + }, + { + "epoch": 0.7822784810126582, + "grad_norm": 0.36474958062171936, + "learning_rate": 0.00026352207253259167, + "loss": 1.8791, + "step": 7416 + }, + { + "epoch": 0.7823839662447257, + "grad_norm": 0.36050930619239807, + "learning_rate": 0.0002631565745799212, + "loss": 1.8776, + "step": 7417 + }, + { + "epoch": 0.7824894514767933, + "grad_norm": 0.3677757978439331, + "learning_rate": 0.0002627915835629777, + "loss": 1.9202, + "step": 7418 + }, + { + "epoch": 0.7825949367088607, + "grad_norm": 0.3618911802768707, + "learning_rate": 0.00026242709877865493, + "loss": 1.8994, + "step": 7419 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.35926517844200134, + "learning_rate": 0.0002620631195248222, + "loss": 1.8905, + "step": 7420 + }, + { + "epoch": 0.7828059071729958, + "grad_norm": 0.3403022587299347, + "learning_rate": 0.00026169964510032245, + "loss": 1.8891, + "step": 7421 + }, + { + "epoch": 0.7829113924050632, + "grad_norm": 0.35324254631996155, + "learning_rate": 0.0002613366748049711, + "loss": 1.9051, + "step": 7422 + }, + { + "epoch": 0.7830168776371308, + "grad_norm": 0.348141610622406, + "learning_rate": 0.0002609742079395546, + "loss": 1.8734, + "step": 7423 + }, + { + "epoch": 0.7831223628691983, + "grad_norm": 0.35899603366851807, + "learning_rate": 0.0002606122438058295, + "loss": 1.882, + "step": 7424 + }, + { + "epoch": 0.7832278481012658, + "grad_norm": 0.37481972575187683, + "learning_rate": 0.00026025078170652043, + "loss": 1.8919, + "step": 7425 + }, + { + "epoch": 0.7833333333333333, + "grad_norm": 0.3546105921268463, + "learning_rate": 0.00025988982094531945, + "loss": 1.8547, + "step": 7426 + }, + { + "epoch": 0.7834388185654009, + "grad_norm": 0.39484667778015137, + "learning_rate": 0.00025952936082688415, + "loss": 1.8711, + "step": 7427 + }, + { + "epoch": 0.7835443037974683, + "grad_norm": 0.3797547519207001, + "learning_rate": 0.00025916940065683655, + "loss": 1.8755, + "step": 7428 + }, + { + "epoch": 0.7836497890295359, + "grad_norm": 0.3954053521156311, + "learning_rate": 0.00025880993974176204, + "loss": 1.8864, + "step": 7429 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.37606558203697205, + "learning_rate": 0.00025845097738920735, + "loss": 1.8997, + "step": 7430 + }, + { + "epoch": 0.7838607594936708, + "grad_norm": 0.35304591059684753, + "learning_rate": 0.00025809251290767984, + "loss": 1.8761, + "step": 7431 + }, + { + "epoch": 0.7839662447257384, + "grad_norm": 0.3390900194644928, + "learning_rate": 0.000257734545606646, + "loss": 1.8693, + "step": 7432 + }, + { + "epoch": 0.7840717299578059, + "grad_norm": 0.38151660561561584, + "learning_rate": 0.00025737707479652985, + "loss": 1.8724, + "step": 7433 + }, + { + "epoch": 0.7841772151898734, + "grad_norm": 0.4029979407787323, + "learning_rate": 0.0002570200997887122, + "loss": 1.902, + "step": 7434 + }, + { + "epoch": 0.7842827004219409, + "grad_norm": 0.34219661355018616, + "learning_rate": 0.0002566636198955286, + "loss": 1.8605, + "step": 7435 + }, + { + "epoch": 0.7843881856540085, + "grad_norm": 0.33440443873405457, + "learning_rate": 0.0002563076344302685, + "loss": 1.8416, + "step": 7436 + }, + { + "epoch": 0.7844936708860759, + "grad_norm": 0.3469255268573761, + "learning_rate": 0.00025595214270717387, + "loss": 1.9068, + "step": 7437 + }, + { + "epoch": 0.7845991561181435, + "grad_norm": 0.3719151020050049, + "learning_rate": 0.00025559714404143767, + "loss": 1.8651, + "step": 7438 + }, + { + "epoch": 0.784704641350211, + "grad_norm": 0.37709301710128784, + "learning_rate": 0.0002552426377492028, + "loss": 1.8964, + "step": 7439 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.3671756684780121, + "learning_rate": 0.0002548886231475606, + "loss": 1.8887, + "step": 7440 + }, + { + "epoch": 0.784915611814346, + "grad_norm": 0.3906153440475464, + "learning_rate": 0.00025453509955454957, + "loss": 1.8908, + "step": 7441 + }, + { + "epoch": 0.7850210970464135, + "grad_norm": 0.3450516164302826, + "learning_rate": 0.00025418206628915406, + "loss": 1.8931, + "step": 7442 + }, + { + "epoch": 0.785126582278481, + "grad_norm": 0.34839990735054016, + "learning_rate": 0.00025382952267130306, + "loss": 1.845, + "step": 7443 + }, + { + "epoch": 0.7852320675105485, + "grad_norm": 0.3775815963745117, + "learning_rate": 0.0002534774680218686, + "loss": 1.8728, + "step": 7444 + }, + { + "epoch": 0.7853375527426161, + "grad_norm": 0.35866740345954895, + "learning_rate": 0.00025312590166266493, + "loss": 1.896, + "step": 7445 + }, + { + "epoch": 0.7854430379746835, + "grad_norm": 0.36462244391441345, + "learning_rate": 0.00025277482291644667, + "loss": 1.8733, + "step": 7446 + }, + { + "epoch": 0.7855485232067511, + "grad_norm": 0.34961751103401184, + "learning_rate": 0.00025242423110690787, + "loss": 1.9149, + "step": 7447 + }, + { + "epoch": 0.7856540084388186, + "grad_norm": 0.3413016200065613, + "learning_rate": 0.0002520741255586806, + "loss": 1.8704, + "step": 7448 + }, + { + "epoch": 0.785759493670886, + "grad_norm": 0.33875221014022827, + "learning_rate": 0.0002517245055973337, + "loss": 1.8915, + "step": 7449 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.408234566450119, + "learning_rate": 0.0002513753705493713, + "loss": 1.8965, + "step": 7450 + }, + { + "epoch": 0.7859704641350211, + "grad_norm": 0.36543309688568115, + "learning_rate": 0.00025102671974223175, + "loss": 1.9065, + "step": 7451 + }, + { + "epoch": 0.7860759493670886, + "grad_norm": 0.36437323689460754, + "learning_rate": 0.0002506785525042861, + "loss": 1.8967, + "step": 7452 + }, + { + "epoch": 0.7861814345991561, + "grad_norm": 0.37491074204444885, + "learning_rate": 0.0002503308681648371, + "loss": 1.872, + "step": 7453 + }, + { + "epoch": 0.7862869198312237, + "grad_norm": 0.3563857972621918, + "learning_rate": 0.0002499836660541177, + "loss": 1.89, + "step": 7454 + }, + { + "epoch": 0.7863924050632911, + "grad_norm": 0.35962414741516113, + "learning_rate": 0.00024963694550328967, + "loss": 1.8933, + "step": 7455 + }, + { + "epoch": 0.7864978902953587, + "grad_norm": 0.36568373441696167, + "learning_rate": 0.0002492907058444425, + "loss": 1.8997, + "step": 7456 + }, + { + "epoch": 0.7866033755274262, + "grad_norm": 0.3756716847419739, + "learning_rate": 0.00024894494641059217, + "loss": 1.8867, + "step": 7457 + }, + { + "epoch": 0.7867088607594936, + "grad_norm": 0.383032888174057, + "learning_rate": 0.00024859966653567963, + "loss": 1.8906, + "step": 7458 + }, + { + "epoch": 0.7868143459915612, + "grad_norm": 0.3982655107975006, + "learning_rate": 0.00024825486555456975, + "loss": 1.8841, + "step": 7459 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.37815651297569275, + "learning_rate": 0.0002479105428030497, + "loss": 1.8699, + "step": 7460 + }, + { + "epoch": 0.7870253164556962, + "grad_norm": 0.38475319743156433, + "learning_rate": 0.00024756669761782815, + "loss": 1.9101, + "step": 7461 + }, + { + "epoch": 0.7871308016877637, + "grad_norm": 0.39297741651535034, + "learning_rate": 0.00024722332933653344, + "loss": 1.8948, + "step": 7462 + }, + { + "epoch": 0.7872362869198313, + "grad_norm": 0.3515428304672241, + "learning_rate": 0.000246880437297713, + "loss": 1.8823, + "step": 7463 + }, + { + "epoch": 0.7873417721518987, + "grad_norm": 0.35613515973091125, + "learning_rate": 0.0002465380208408314, + "loss": 1.8612, + "step": 7464 + }, + { + "epoch": 0.7874472573839663, + "grad_norm": 0.35608917474746704, + "learning_rate": 0.0002461960793062694, + "loss": 1.8916, + "step": 7465 + }, + { + "epoch": 0.7875527426160338, + "grad_norm": 0.34016159176826477, + "learning_rate": 0.00024585461203532254, + "loss": 1.9038, + "step": 7466 + }, + { + "epoch": 0.7876582278481012, + "grad_norm": 0.3818528652191162, + "learning_rate": 0.00024551361837020025, + "loss": 1.8948, + "step": 7467 + }, + { + "epoch": 0.7877637130801688, + "grad_norm": 0.36135798692703247, + "learning_rate": 0.0002451730976540241, + "loss": 1.8895, + "step": 7468 + }, + { + "epoch": 0.7878691983122363, + "grad_norm": 0.3589397668838501, + "learning_rate": 0.00024483304923082663, + "loss": 1.8771, + "step": 7469 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.3697337508201599, + "learning_rate": 0.00024449347244555043, + "loss": 1.9034, + "step": 7470 + }, + { + "epoch": 0.7880801687763713, + "grad_norm": 0.36743488907814026, + "learning_rate": 0.0002441543666440464, + "loss": 1.88, + "step": 7471 + }, + { + "epoch": 0.7881856540084389, + "grad_norm": 0.3415161669254303, + "learning_rate": 0.00024381573117307302, + "loss": 1.871, + "step": 7472 + }, + { + "epoch": 0.7882911392405063, + "grad_norm": 0.35705968737602234, + "learning_rate": 0.00024347756538029453, + "loss": 1.9253, + "step": 7473 + }, + { + "epoch": 0.7883966244725739, + "grad_norm": 0.420254111289978, + "learning_rate": 0.00024313986861428, + "loss": 1.8686, + "step": 7474 + }, + { + "epoch": 0.7885021097046413, + "grad_norm": 0.34936368465423584, + "learning_rate": 0.00024280264022450215, + "loss": 1.869, + "step": 7475 + }, + { + "epoch": 0.7886075949367088, + "grad_norm": 0.3602170944213867, + "learning_rate": 0.00024246587956133572, + "loss": 1.9164, + "step": 7476 + }, + { + "epoch": 0.7887130801687764, + "grad_norm": 0.34617555141448975, + "learning_rate": 0.0002421295859760568, + "loss": 1.9088, + "step": 7477 + }, + { + "epoch": 0.7888185654008438, + "grad_norm": 0.34704816341400146, + "learning_rate": 0.00024179375882084098, + "loss": 1.8888, + "step": 7478 + }, + { + "epoch": 0.7889240506329114, + "grad_norm": 0.34000423550605774, + "learning_rate": 0.0002414583974487624, + "loss": 1.8596, + "step": 7479 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.3718421161174774, + "learning_rate": 0.00024112350121379254, + "loss": 1.8881, + "step": 7480 + }, + { + "epoch": 0.7891350210970464, + "grad_norm": 0.3527815341949463, + "learning_rate": 0.00024078906947079878, + "loss": 1.9119, + "step": 7481 + }, + { + "epoch": 0.7892405063291139, + "grad_norm": 0.341316819190979, + "learning_rate": 0.00024045510157554356, + "loss": 1.9175, + "step": 7482 + }, + { + "epoch": 0.7893459915611815, + "grad_norm": 0.3634943664073944, + "learning_rate": 0.00024012159688468254, + "loss": 1.8549, + "step": 7483 + }, + { + "epoch": 0.7894514767932489, + "grad_norm": 0.3452189862728119, + "learning_rate": 0.00023978855475576384, + "loss": 1.9004, + "step": 7484 + }, + { + "epoch": 0.7895569620253164, + "grad_norm": 0.3538750112056732, + "learning_rate": 0.00023945597454722657, + "loss": 1.8965, + "step": 7485 + }, + { + "epoch": 0.789662447257384, + "grad_norm": 0.34278011322021484, + "learning_rate": 0.00023912385561839983, + "loss": 1.8722, + "step": 7486 + }, + { + "epoch": 0.7897679324894514, + "grad_norm": 0.36972635984420776, + "learning_rate": 0.00023879219732950114, + "loss": 1.8837, + "step": 7487 + }, + { + "epoch": 0.789873417721519, + "grad_norm": 0.344277560710907, + "learning_rate": 0.0002384609990416354, + "loss": 1.8898, + "step": 7488 + }, + { + "epoch": 0.7899789029535865, + "grad_norm": 0.36826789379119873, + "learning_rate": 0.00023813026011679372, + "loss": 1.8748, + "step": 7489 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.3699629306793213, + "learning_rate": 0.000237799979917852, + "loss": 1.8998, + "step": 7490 + }, + { + "epoch": 0.7901898734177215, + "grad_norm": 0.37366437911987305, + "learning_rate": 0.00023747015780857005, + "loss": 1.871, + "step": 7491 + }, + { + "epoch": 0.7902953586497891, + "grad_norm": 0.37498876452445984, + "learning_rate": 0.0002371407931535898, + "loss": 1.9079, + "step": 7492 + }, + { + "epoch": 0.7904008438818565, + "grad_norm": 0.3559134304523468, + "learning_rate": 0.00023681188531843469, + "loss": 1.9017, + "step": 7493 + }, + { + "epoch": 0.790506329113924, + "grad_norm": 0.38856232166290283, + "learning_rate": 0.00023648343366950792, + "loss": 1.8741, + "step": 7494 + }, + { + "epoch": 0.7906118143459916, + "grad_norm": 0.3816864490509033, + "learning_rate": 0.0002361554375740916, + "loss": 1.9316, + "step": 7495 + }, + { + "epoch": 0.790717299578059, + "grad_norm": 0.37773504853248596, + "learning_rate": 0.00023582789640034545, + "loss": 1.8604, + "step": 7496 + }, + { + "epoch": 0.7908227848101266, + "grad_norm": 0.39668846130371094, + "learning_rate": 0.0002355008095173055, + "loss": 1.8724, + "step": 7497 + }, + { + "epoch": 0.7909282700421941, + "grad_norm": 0.37538403272628784, + "learning_rate": 0.00023517417629488285, + "loss": 1.8984, + "step": 7498 + }, + { + "epoch": 0.7910337552742616, + "grad_norm": 0.37282654643058777, + "learning_rate": 0.0002348479961038625, + "loss": 1.8764, + "step": 7499 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.40159812569618225, + "learning_rate": 0.00023452226831590227, + "loss": 1.9037, + "step": 7500 + }, + { + "epoch": 0.7912447257383967, + "grad_norm": 0.3736564815044403, + "learning_rate": 0.00023419699230353144, + "loss": 1.9316, + "step": 7501 + }, + { + "epoch": 0.7913502109704641, + "grad_norm": 0.3801629841327667, + "learning_rate": 0.00023387216744014946, + "loss": 1.9091, + "step": 7502 + }, + { + "epoch": 0.7914556962025316, + "grad_norm": 0.4063912034034729, + "learning_rate": 0.00023354779310002504, + "loss": 1.8935, + "step": 7503 + }, + { + "epoch": 0.7915611814345992, + "grad_norm": 0.38035038113594055, + "learning_rate": 0.00023322386865829456, + "loss": 1.9295, + "step": 7504 + }, + { + "epoch": 0.7916666666666666, + "grad_norm": 0.3612591326236725, + "learning_rate": 0.00023290039349096122, + "loss": 1.8956, + "step": 7505 + }, + { + "epoch": 0.7917721518987342, + "grad_norm": 0.3784867823123932, + "learning_rate": 0.0002325773669748937, + "loss": 1.909, + "step": 7506 + }, + { + "epoch": 0.7918776371308017, + "grad_norm": 0.354899138212204, + "learning_rate": 0.00023225478848782483, + "loss": 1.9227, + "step": 7507 + }, + { + "epoch": 0.7919831223628692, + "grad_norm": 0.35011428594589233, + "learning_rate": 0.00023193265740835056, + "loss": 1.8804, + "step": 7508 + }, + { + "epoch": 0.7920886075949367, + "grad_norm": 0.34930723905563354, + "learning_rate": 0.00023161097311592867, + "loss": 1.838, + "step": 7509 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.3713693618774414, + "learning_rate": 0.00023128973499087779, + "loss": 1.867, + "step": 7510 + }, + { + "epoch": 0.7922995780590717, + "grad_norm": 0.3631233274936676, + "learning_rate": 0.00023096894241437586, + "loss": 1.8808, + "step": 7511 + }, + { + "epoch": 0.7924050632911392, + "grad_norm": 0.3724093437194824, + "learning_rate": 0.00023064859476845908, + "loss": 1.8803, + "step": 7512 + }, + { + "epoch": 0.7925105485232068, + "grad_norm": 0.331993967294693, + "learning_rate": 0.00023032869143602085, + "loss": 1.8745, + "step": 7513 + }, + { + "epoch": 0.7926160337552742, + "grad_norm": 0.34614884853363037, + "learning_rate": 0.00023000923180081047, + "loss": 1.9016, + "step": 7514 + }, + { + "epoch": 0.7927215189873418, + "grad_norm": 0.33532676100730896, + "learning_rate": 0.00022969021524743197, + "loss": 1.8495, + "step": 7515 + }, + { + "epoch": 0.7928270042194093, + "grad_norm": 0.36969107389450073, + "learning_rate": 0.00022937164116134282, + "loss": 1.9065, + "step": 7516 + }, + { + "epoch": 0.7929324894514768, + "grad_norm": 0.34217047691345215, + "learning_rate": 0.00022905350892885298, + "loss": 1.8663, + "step": 7517 + }, + { + "epoch": 0.7930379746835443, + "grad_norm": 0.3791446387767792, + "learning_rate": 0.0002287358179371235, + "loss": 1.8749, + "step": 7518 + }, + { + "epoch": 0.7931434599156119, + "grad_norm": 0.3469908535480499, + "learning_rate": 0.00022841856757416538, + "loss": 1.9008, + "step": 7519 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.3499135971069336, + "learning_rate": 0.00022810175722883858, + "loss": 1.866, + "step": 7520 + }, + { + "epoch": 0.7933544303797468, + "grad_norm": 0.3481650948524475, + "learning_rate": 0.00022778538629085056, + "loss": 1.8757, + "step": 7521 + }, + { + "epoch": 0.7934599156118144, + "grad_norm": 0.3546859622001648, + "learning_rate": 0.0002274694541507553, + "loss": 1.8895, + "step": 7522 + }, + { + "epoch": 0.7935654008438818, + "grad_norm": 0.3549768030643463, + "learning_rate": 0.00022715396019995203, + "loss": 1.891, + "step": 7523 + }, + { + "epoch": 0.7936708860759494, + "grad_norm": 0.3391817510128021, + "learning_rate": 0.00022683890383068403, + "loss": 1.9057, + "step": 7524 + }, + { + "epoch": 0.7937763713080169, + "grad_norm": 0.35470137000083923, + "learning_rate": 0.00022652428443603774, + "loss": 1.8935, + "step": 7525 + }, + { + "epoch": 0.7938818565400844, + "grad_norm": 0.348499059677124, + "learning_rate": 0.00022621010140994125, + "loss": 1.8732, + "step": 7526 + }, + { + "epoch": 0.7939873417721519, + "grad_norm": 0.3574594259262085, + "learning_rate": 0.0002258963541471631, + "loss": 1.8832, + "step": 7527 + }, + { + "epoch": 0.7940928270042195, + "grad_norm": 0.37065309286117554, + "learning_rate": 0.00022558304204331147, + "loss": 1.8951, + "step": 7528 + }, + { + "epoch": 0.7941983122362869, + "grad_norm": 0.38190439343452454, + "learning_rate": 0.0002252701644948328, + "loss": 1.8773, + "step": 7529 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.33559003472328186, + "learning_rate": 0.0002249577208990106, + "loss": 1.8941, + "step": 7530 + }, + { + "epoch": 0.794409282700422, + "grad_norm": 0.40437644720077515, + "learning_rate": 0.00022464571065396427, + "loss": 1.9078, + "step": 7531 + }, + { + "epoch": 0.7945147679324894, + "grad_norm": 0.3816181421279907, + "learning_rate": 0.0002243341331586481, + "loss": 1.8858, + "step": 7532 + }, + { + "epoch": 0.794620253164557, + "grad_norm": 0.35445427894592285, + "learning_rate": 0.0002240229878128499, + "loss": 1.8996, + "step": 7533 + }, + { + "epoch": 0.7947257383966245, + "grad_norm": 0.35256245732307434, + "learning_rate": 0.00022371227401719017, + "loss": 1.9095, + "step": 7534 + }, + { + "epoch": 0.794831223628692, + "grad_norm": 0.360943078994751, + "learning_rate": 0.00022340199117312052, + "loss": 1.8426, + "step": 7535 + }, + { + "epoch": 0.7949367088607595, + "grad_norm": 0.36167195439338684, + "learning_rate": 0.00022309213868292277, + "loss": 1.8684, + "step": 7536 + }, + { + "epoch": 0.7950421940928271, + "grad_norm": 0.3811332583427429, + "learning_rate": 0.0002227827159497079, + "loss": 1.8831, + "step": 7537 + }, + { + "epoch": 0.7951476793248945, + "grad_norm": 0.3595214784145355, + "learning_rate": 0.0002224737223774145, + "loss": 1.8838, + "step": 7538 + }, + { + "epoch": 0.795253164556962, + "grad_norm": 0.3470074534416199, + "learning_rate": 0.00022216515737080817, + "loss": 1.8732, + "step": 7539 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.3706475794315338, + "learning_rate": 0.00022185702033547996, + "loss": 1.8938, + "step": 7540 + }, + { + "epoch": 0.795464135021097, + "grad_norm": 0.3890976905822754, + "learning_rate": 0.00022154931067784521, + "loss": 1.8346, + "step": 7541 + }, + { + "epoch": 0.7955696202531646, + "grad_norm": 0.3662917912006378, + "learning_rate": 0.0002212420278051428, + "loss": 1.9014, + "step": 7542 + }, + { + "epoch": 0.795675105485232, + "grad_norm": 0.35041719675064087, + "learning_rate": 0.0002209351711254335, + "loss": 1.8786, + "step": 7543 + }, + { + "epoch": 0.7957805907172996, + "grad_norm": 0.3688686788082123, + "learning_rate": 0.00022062874004759935, + "loss": 1.8891, + "step": 7544 + }, + { + "epoch": 0.7958860759493671, + "grad_norm": 0.3694148659706116, + "learning_rate": 0.00022032273398134208, + "loss": 1.89, + "step": 7545 + }, + { + "epoch": 0.7959915611814345, + "grad_norm": 0.35447239875793457, + "learning_rate": 0.00022001715233718213, + "loss": 1.9153, + "step": 7546 + }, + { + "epoch": 0.7960970464135021, + "grad_norm": 0.36616966128349304, + "learning_rate": 0.0002197119945264576, + "loss": 1.8653, + "step": 7547 + }, + { + "epoch": 0.7962025316455696, + "grad_norm": 0.385937362909317, + "learning_rate": 0.00021940725996132303, + "loss": 1.8649, + "step": 7548 + }, + { + "epoch": 0.7963080168776371, + "grad_norm": 0.3594330847263336, + "learning_rate": 0.00021910294805474833, + "loss": 1.8839, + "step": 7549 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.4005342125892639, + "learning_rate": 0.00021879905822051756, + "loss": 1.8932, + "step": 7550 + }, + { + "epoch": 0.7965189873417722, + "grad_norm": 0.3489825129508972, + "learning_rate": 0.00021849558987322782, + "loss": 1.8807, + "step": 7551 + }, + { + "epoch": 0.7966244725738396, + "grad_norm": 0.37310123443603516, + "learning_rate": 0.00021819254242828816, + "loss": 1.9084, + "step": 7552 + }, + { + "epoch": 0.7967299578059072, + "grad_norm": 0.36800217628479004, + "learning_rate": 0.0002178899153019185, + "loss": 1.9096, + "step": 7553 + }, + { + "epoch": 0.7968354430379747, + "grad_norm": 0.35106727480888367, + "learning_rate": 0.00021758770791114845, + "loss": 1.873, + "step": 7554 + }, + { + "epoch": 0.7969409282700421, + "grad_norm": 0.3608514070510864, + "learning_rate": 0.00021728591967381606, + "loss": 1.8388, + "step": 7555 + }, + { + "epoch": 0.7970464135021097, + "grad_norm": 0.3624444305896759, + "learning_rate": 0.0002169845500085669, + "loss": 1.9235, + "step": 7556 + }, + { + "epoch": 0.7971518987341772, + "grad_norm": 0.3357791602611542, + "learning_rate": 0.00021668359833485287, + "loss": 1.8759, + "step": 7557 + }, + { + "epoch": 0.7972573839662447, + "grad_norm": 0.3669358491897583, + "learning_rate": 0.00021638306407293116, + "loss": 1.8651, + "step": 7558 + }, + { + "epoch": 0.7973628691983122, + "grad_norm": 0.3601647913455963, + "learning_rate": 0.0002160829466438629, + "loss": 1.8744, + "step": 7559 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.34478262066841125, + "learning_rate": 0.00021578324546951222, + "loss": 1.8801, + "step": 7560 + }, + { + "epoch": 0.7975738396624472, + "grad_norm": 0.36891159415245056, + "learning_rate": 0.0002154839599725452, + "loss": 1.8609, + "step": 7561 + }, + { + "epoch": 0.7976793248945148, + "grad_norm": 0.3463604748249054, + "learning_rate": 0.0002151850895764285, + "loss": 1.9088, + "step": 7562 + }, + { + "epoch": 0.7977848101265823, + "grad_norm": 0.367169588804245, + "learning_rate": 0.00021488663370542862, + "loss": 1.8783, + "step": 7563 + }, + { + "epoch": 0.7978902953586497, + "grad_norm": 0.3680773675441742, + "learning_rate": 0.00021458859178461048, + "loss": 1.8993, + "step": 7564 + }, + { + "epoch": 0.7979957805907173, + "grad_norm": 0.3813187777996063, + "learning_rate": 0.00021429096323983645, + "loss": 1.8979, + "step": 7565 + }, + { + "epoch": 0.7981012658227848, + "grad_norm": 0.35231032967567444, + "learning_rate": 0.00021399374749776512, + "loss": 1.8562, + "step": 7566 + }, + { + "epoch": 0.7982067510548523, + "grad_norm": 0.3644540011882782, + "learning_rate": 0.00021369694398585033, + "loss": 1.9045, + "step": 7567 + }, + { + "epoch": 0.7983122362869198, + "grad_norm": 0.3911899924278259, + "learning_rate": 0.0002134005521323402, + "loss": 1.8866, + "step": 7568 + }, + { + "epoch": 0.7984177215189874, + "grad_norm": 0.3741798996925354, + "learning_rate": 0.00021310457136627562, + "loss": 1.9202, + "step": 7569 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.34572818875312805, + "learning_rate": 0.00021280900111748948, + "loss": 1.8798, + "step": 7570 + }, + { + "epoch": 0.7986286919831224, + "grad_norm": 0.3794964551925659, + "learning_rate": 0.00021251384081660544, + "loss": 1.8519, + "step": 7571 + }, + { + "epoch": 0.7987341772151899, + "grad_norm": 0.38156658411026, + "learning_rate": 0.00021221908989503698, + "loss": 1.8872, + "step": 7572 + }, + { + "epoch": 0.7988396624472573, + "grad_norm": 0.3695385754108429, + "learning_rate": 0.00021192474778498606, + "loss": 1.8788, + "step": 7573 + }, + { + "epoch": 0.7989451476793249, + "grad_norm": 0.3564225137233734, + "learning_rate": 0.00021163081391944227, + "loss": 1.8853, + "step": 7574 + }, + { + "epoch": 0.7990506329113924, + "grad_norm": 0.3460406959056854, + "learning_rate": 0.00021133728773218148, + "loss": 1.8725, + "step": 7575 + }, + { + "epoch": 0.7991561181434599, + "grad_norm": 0.3648071587085724, + "learning_rate": 0.00021104416865776502, + "loss": 1.9063, + "step": 7576 + }, + { + "epoch": 0.7992616033755274, + "grad_norm": 0.37515249848365784, + "learning_rate": 0.00021075145613153853, + "loss": 1.8923, + "step": 7577 + }, + { + "epoch": 0.799367088607595, + "grad_norm": 0.3669472634792328, + "learning_rate": 0.0002104591495896306, + "loss": 1.8901, + "step": 7578 + }, + { + "epoch": 0.7994725738396624, + "grad_norm": 0.36146271228790283, + "learning_rate": 0.00021016724846895213, + "loss": 1.9023, + "step": 7579 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.35360732674598694, + "learning_rate": 0.00020987575220719483, + "loss": 1.8867, + "step": 7580 + }, + { + "epoch": 0.7996835443037975, + "grad_norm": 0.4337455928325653, + "learning_rate": 0.0002095846602428303, + "loss": 1.8673, + "step": 7581 + }, + { + "epoch": 0.799789029535865, + "grad_norm": 0.3702511787414551, + "learning_rate": 0.00020929397201510915, + "loss": 1.9095, + "step": 7582 + }, + { + "epoch": 0.7998945147679325, + "grad_norm": 0.3419906795024872, + "learning_rate": 0.00020900368696405964, + "loss": 1.9046, + "step": 7583 + }, + { + "epoch": 0.8, + "grad_norm": 0.3488527238368988, + "learning_rate": 0.00020871380453048668, + "loss": 1.8806, + "step": 7584 + }, + { + "epoch": 0.8001054852320675, + "grad_norm": 0.3692456781864166, + "learning_rate": 0.00020842432415597067, + "loss": 1.8999, + "step": 7585 + }, + { + "epoch": 0.800210970464135, + "grad_norm": 0.3602665066719055, + "learning_rate": 0.0002081352452828667, + "loss": 1.8709, + "step": 7586 + }, + { + "epoch": 0.8003164556962026, + "grad_norm": 0.3813033699989319, + "learning_rate": 0.0002078465673543032, + "loss": 1.8564, + "step": 7587 + }, + { + "epoch": 0.80042194092827, + "grad_norm": 0.35271260142326355, + "learning_rate": 0.00020755828981418106, + "loss": 1.8855, + "step": 7588 + }, + { + "epoch": 0.8005274261603376, + "grad_norm": 0.3713846802711487, + "learning_rate": 0.00020727041210717235, + "loss": 1.8837, + "step": 7589 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.3551430404186249, + "learning_rate": 0.00020698293367871933, + "loss": 1.8708, + "step": 7590 + }, + { + "epoch": 0.8007383966244725, + "grad_norm": 0.3549375534057617, + "learning_rate": 0.00020669585397503358, + "loss": 1.8904, + "step": 7591 + }, + { + "epoch": 0.8008438818565401, + "grad_norm": 0.35079964995384216, + "learning_rate": 0.0002064091724430947, + "loss": 1.8954, + "step": 7592 + }, + { + "epoch": 0.8009493670886076, + "grad_norm": 0.381229430437088, + "learning_rate": 0.00020612288853064925, + "loss": 1.9184, + "step": 7593 + }, + { + "epoch": 0.8010548523206751, + "grad_norm": 0.3858181834220886, + "learning_rate": 0.00020583700168620985, + "loss": 1.8671, + "step": 7594 + }, + { + "epoch": 0.8011603375527426, + "grad_norm": 0.35383525490760803, + "learning_rate": 0.00020555151135905384, + "loss": 1.9257, + "step": 7595 + }, + { + "epoch": 0.8012658227848102, + "grad_norm": 0.32795262336730957, + "learning_rate": 0.00020526641699922268, + "loss": 1.869, + "step": 7596 + }, + { + "epoch": 0.8013713080168776, + "grad_norm": 0.3632115423679352, + "learning_rate": 0.00020498171805752037, + "loss": 1.8423, + "step": 7597 + }, + { + "epoch": 0.8014767932489452, + "grad_norm": 0.3594977855682373, + "learning_rate": 0.00020469741398551278, + "loss": 1.8572, + "step": 7598 + }, + { + "epoch": 0.8015822784810127, + "grad_norm": 0.37584471702575684, + "learning_rate": 0.00020441350423552624, + "loss": 1.8946, + "step": 7599 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.3304339051246643, + "learning_rate": 0.00020412998826064692, + "loss": 1.8707, + "step": 7600 + }, + { + "epoch": 0.8017932489451477, + "grad_norm": 0.35664987564086914, + "learning_rate": 0.0002038468655147195, + "loss": 1.916, + "step": 7601 + }, + { + "epoch": 0.8018987341772152, + "grad_norm": 0.35499632358551025, + "learning_rate": 0.00020356413545234603, + "loss": 1.882, + "step": 7602 + }, + { + "epoch": 0.8020042194092827, + "grad_norm": 0.3739197552204132, + "learning_rate": 0.0002032817975288851, + "loss": 1.937, + "step": 7603 + }, + { + "epoch": 0.8021097046413502, + "grad_norm": 0.34588050842285156, + "learning_rate": 0.00020299985120045069, + "loss": 1.8817, + "step": 7604 + }, + { + "epoch": 0.8022151898734177, + "grad_norm": 0.34551364183425903, + "learning_rate": 0.00020271829592391114, + "loss": 1.8832, + "step": 7605 + }, + { + "epoch": 0.8023206751054852, + "grad_norm": 0.35894259810447693, + "learning_rate": 0.0002024371311568882, + "loss": 1.898, + "step": 7606 + }, + { + "epoch": 0.8024261603375528, + "grad_norm": 0.3494652807712555, + "learning_rate": 0.0002021563563577556, + "loss": 1.8723, + "step": 7607 + }, + { + "epoch": 0.8025316455696202, + "grad_norm": 0.3779989778995514, + "learning_rate": 0.00020187597098563864, + "loss": 1.8502, + "step": 7608 + }, + { + "epoch": 0.8026371308016877, + "grad_norm": 0.373457133769989, + "learning_rate": 0.00020159597450041257, + "loss": 1.8907, + "step": 7609 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.3857486844062805, + "learning_rate": 0.00020131636636270178, + "loss": 1.9209, + "step": 7610 + }, + { + "epoch": 0.8028481012658227, + "grad_norm": 0.3677959740161896, + "learning_rate": 0.00020103714603387894, + "loss": 1.8397, + "step": 7611 + }, + { + "epoch": 0.8029535864978903, + "grad_norm": 0.38777056336402893, + "learning_rate": 0.00020075831297606357, + "loss": 1.8985, + "step": 7612 + }, + { + "epoch": 0.8030590717299578, + "grad_norm": 0.3835591971874237, + "learning_rate": 0.00020047986665212137, + "loss": 1.8672, + "step": 7613 + }, + { + "epoch": 0.8031645569620253, + "grad_norm": 0.341354101896286, + "learning_rate": 0.0002002018065256629, + "loss": 1.8586, + "step": 7614 + }, + { + "epoch": 0.8032700421940928, + "grad_norm": 0.36918193101882935, + "learning_rate": 0.00019992413206104277, + "loss": 1.871, + "step": 7615 + }, + { + "epoch": 0.8033755274261604, + "grad_norm": 0.36799871921539307, + "learning_rate": 0.00019964684272335854, + "loss": 1.8655, + "step": 7616 + }, + { + "epoch": 0.8034810126582278, + "grad_norm": 0.34796521067619324, + "learning_rate": 0.0001993699379784496, + "loss": 1.8548, + "step": 7617 + }, + { + "epoch": 0.8035864978902953, + "grad_norm": 0.3645276129245758, + "learning_rate": 0.0001990934172928962, + "loss": 1.8498, + "step": 7618 + }, + { + "epoch": 0.8036919831223629, + "grad_norm": 0.34482133388519287, + "learning_rate": 0.00019881728013401842, + "loss": 1.8852, + "step": 7619 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.35312360525131226, + "learning_rate": 0.00019854152596987523, + "loss": 1.8725, + "step": 7620 + }, + { + "epoch": 0.8039029535864979, + "grad_norm": 0.34672847390174866, + "learning_rate": 0.00019826615426926338, + "loss": 1.9497, + "step": 7621 + }, + { + "epoch": 0.8040084388185654, + "grad_norm": 0.3691411316394806, + "learning_rate": 0.00019799116450171624, + "loss": 1.8635, + "step": 7622 + }, + { + "epoch": 0.8041139240506329, + "grad_norm": 0.36870887875556946, + "learning_rate": 0.00019771655613750317, + "loss": 1.8755, + "step": 7623 + }, + { + "epoch": 0.8042194092827004, + "grad_norm": 0.3500407338142395, + "learning_rate": 0.0001974423286476279, + "loss": 1.8994, + "step": 7624 + }, + { + "epoch": 0.804324894514768, + "grad_norm": 0.3790668547153473, + "learning_rate": 0.0001971684815038283, + "loss": 1.8642, + "step": 7625 + }, + { + "epoch": 0.8044303797468354, + "grad_norm": 0.3527127802371979, + "learning_rate": 0.00019689501417857462, + "loss": 1.8774, + "step": 7626 + }, + { + "epoch": 0.804535864978903, + "grad_norm": 0.3615489602088928, + "learning_rate": 0.00019662192614506883, + "loss": 1.884, + "step": 7627 + }, + { + "epoch": 0.8046413502109705, + "grad_norm": 0.35341230034828186, + "learning_rate": 0.00019634921687724358, + "loss": 1.8901, + "step": 7628 + }, + { + "epoch": 0.8047468354430379, + "grad_norm": 0.3808216452598572, + "learning_rate": 0.00019607688584976116, + "loss": 1.8969, + "step": 7629 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.36089545488357544, + "learning_rate": 0.00019580493253801255, + "loss": 1.895, + "step": 7630 + }, + { + "epoch": 0.804957805907173, + "grad_norm": 0.35216617584228516, + "learning_rate": 0.00019553335641811625, + "loss": 1.9171, + "step": 7631 + }, + { + "epoch": 0.8050632911392405, + "grad_norm": 0.3635654151439667, + "learning_rate": 0.00019526215696691747, + "loss": 1.8608, + "step": 7632 + }, + { + "epoch": 0.805168776371308, + "grad_norm": 0.36205968260765076, + "learning_rate": 0.00019499133366198686, + "loss": 1.8708, + "step": 7633 + }, + { + "epoch": 0.8052742616033756, + "grad_norm": 0.36492955684661865, + "learning_rate": 0.00019472088598161984, + "loss": 1.8943, + "step": 7634 + }, + { + "epoch": 0.805379746835443, + "grad_norm": 0.4052882790565491, + "learning_rate": 0.00019445081340483536, + "loss": 1.9025, + "step": 7635 + }, + { + "epoch": 0.8054852320675105, + "grad_norm": 0.3670704662799835, + "learning_rate": 0.0001941811154113749, + "loss": 1.8393, + "step": 7636 + }, + { + "epoch": 0.8055907172995781, + "grad_norm": 0.3473335802555084, + "learning_rate": 0.0001939117914817016, + "loss": 1.8541, + "step": 7637 + }, + { + "epoch": 0.8056962025316455, + "grad_norm": 0.36063674092292786, + "learning_rate": 0.0001936428410969991, + "loss": 1.8791, + "step": 7638 + }, + { + "epoch": 0.8058016877637131, + "grad_norm": 0.35313475131988525, + "learning_rate": 0.00019337426373917076, + "loss": 1.8772, + "step": 7639 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.3637119233608246, + "learning_rate": 0.00019310605889083838, + "loss": 1.8651, + "step": 7640 + }, + { + "epoch": 0.8060126582278481, + "grad_norm": 0.34928563237190247, + "learning_rate": 0.0001928382260353415, + "loss": 1.8772, + "step": 7641 + }, + { + "epoch": 0.8061181434599156, + "grad_norm": 0.34794682264328003, + "learning_rate": 0.00019257076465673605, + "loss": 1.9254, + "step": 7642 + }, + { + "epoch": 0.8062236286919832, + "grad_norm": 0.3355150818824768, + "learning_rate": 0.00019230367423979372, + "loss": 1.8587, + "step": 7643 + }, + { + "epoch": 0.8063291139240506, + "grad_norm": 0.3653055429458618, + "learning_rate": 0.0001920369542700008, + "loss": 1.8941, + "step": 7644 + }, + { + "epoch": 0.8064345991561181, + "grad_norm": 0.3495890498161316, + "learning_rate": 0.00019177060423355714, + "loss": 1.8879, + "step": 7645 + }, + { + "epoch": 0.8065400843881857, + "grad_norm": 0.3727804720401764, + "learning_rate": 0.00019150462361737527, + "loss": 1.9188, + "step": 7646 + }, + { + "epoch": 0.8066455696202531, + "grad_norm": 0.35868459939956665, + "learning_rate": 0.00019123901190907928, + "loss": 1.8783, + "step": 7647 + }, + { + "epoch": 0.8067510548523207, + "grad_norm": 0.3475462794303894, + "learning_rate": 0.00019097376859700393, + "loss": 1.861, + "step": 7648 + }, + { + "epoch": 0.8068565400843882, + "grad_norm": 0.3735373020172119, + "learning_rate": 0.00019070889317019375, + "loss": 1.8878, + "step": 7649 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.35352474451065063, + "learning_rate": 0.0001904443851184018, + "loss": 1.9117, + "step": 7650 + }, + { + "epoch": 0.8070675105485232, + "grad_norm": 0.35304465889930725, + "learning_rate": 0.00019018024393208902, + "loss": 1.8806, + "step": 7651 + }, + { + "epoch": 0.8071729957805908, + "grad_norm": 0.37552809715270996, + "learning_rate": 0.00018991646910242288, + "loss": 1.8129, + "step": 7652 + }, + { + "epoch": 0.8072784810126582, + "grad_norm": 0.3565998673439026, + "learning_rate": 0.00018965306012127663, + "loss": 1.8928, + "step": 7653 + }, + { + "epoch": 0.8073839662447257, + "grad_norm": 0.37883031368255615, + "learning_rate": 0.00018939001648122844, + "loss": 1.9073, + "step": 7654 + }, + { + "epoch": 0.8074894514767933, + "grad_norm": 0.36218923330307007, + "learning_rate": 0.00018912733767556005, + "loss": 1.8686, + "step": 7655 + }, + { + "epoch": 0.8075949367088607, + "grad_norm": 0.3607180118560791, + "learning_rate": 0.00018886502319825612, + "loss": 1.8678, + "step": 7656 + }, + { + "epoch": 0.8077004219409283, + "grad_norm": 0.3614151179790497, + "learning_rate": 0.00018860307254400305, + "loss": 1.9057, + "step": 7657 + }, + { + "epoch": 0.8078059071729958, + "grad_norm": 0.3630603551864624, + "learning_rate": 0.0001883414852081882, + "loss": 1.9031, + "step": 7658 + }, + { + "epoch": 0.8079113924050633, + "grad_norm": 0.3687366843223572, + "learning_rate": 0.00018808026068689883, + "loss": 1.8798, + "step": 7659 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.37211158871650696, + "learning_rate": 0.00018781939847692096, + "loss": 1.8594, + "step": 7660 + }, + { + "epoch": 0.8081223628691984, + "grad_norm": 0.36587464809417725, + "learning_rate": 0.00018755889807573872, + "loss": 1.8531, + "step": 7661 + }, + { + "epoch": 0.8082278481012658, + "grad_norm": 0.34303274750709534, + "learning_rate": 0.00018729875898153305, + "loss": 1.8912, + "step": 7662 + }, + { + "epoch": 0.8083333333333333, + "grad_norm": 0.3461381196975708, + "learning_rate": 0.00018703898069318112, + "loss": 1.9018, + "step": 7663 + }, + { + "epoch": 0.8084388185654009, + "grad_norm": 0.37584081292152405, + "learning_rate": 0.00018677956271025492, + "loss": 1.8784, + "step": 7664 + }, + { + "epoch": 0.8085443037974683, + "grad_norm": 0.3921014964580536, + "learning_rate": 0.00018652050453302066, + "loss": 1.888, + "step": 7665 + }, + { + "epoch": 0.8086497890295359, + "grad_norm": 0.33546483516693115, + "learning_rate": 0.0001862618056624376, + "loss": 1.8388, + "step": 7666 + }, + { + "epoch": 0.8087552742616034, + "grad_norm": 0.3912770748138428, + "learning_rate": 0.00018600346560015716, + "loss": 1.8669, + "step": 7667 + }, + { + "epoch": 0.8088607594936709, + "grad_norm": 0.36727669835090637, + "learning_rate": 0.00018574548384852206, + "loss": 1.9161, + "step": 7668 + }, + { + "epoch": 0.8089662447257384, + "grad_norm": 0.37971407175064087, + "learning_rate": 0.00018548785991056508, + "loss": 1.8819, + "step": 7669 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.3697861433029175, + "learning_rate": 0.00018523059329000844, + "loss": 1.8824, + "step": 7670 + }, + { + "epoch": 0.8091772151898734, + "grad_norm": 0.39398789405822754, + "learning_rate": 0.00018497368349126262, + "loss": 1.9048, + "step": 7671 + }, + { + "epoch": 0.809282700421941, + "grad_norm": 0.3692530393600464, + "learning_rate": 0.00018471713001942533, + "loss": 1.9224, + "step": 7672 + }, + { + "epoch": 0.8093881856540084, + "grad_norm": 0.35081765055656433, + "learning_rate": 0.000184460932380281, + "loss": 1.9162, + "step": 7673 + }, + { + "epoch": 0.8094936708860759, + "grad_norm": 0.37048614025115967, + "learning_rate": 0.00018420509008029931, + "loss": 1.8913, + "step": 7674 + }, + { + "epoch": 0.8095991561181435, + "grad_norm": 0.36643725633621216, + "learning_rate": 0.00018394960262663448, + "loss": 1.8502, + "step": 7675 + }, + { + "epoch": 0.8097046413502109, + "grad_norm": 0.35330212116241455, + "learning_rate": 0.0001836944695271243, + "loss": 1.9022, + "step": 7676 + }, + { + "epoch": 0.8098101265822785, + "grad_norm": 0.3581152558326721, + "learning_rate": 0.00018343969029028915, + "loss": 1.8814, + "step": 7677 + }, + { + "epoch": 0.809915611814346, + "grad_norm": 0.3562348186969757, + "learning_rate": 0.00018318526442533123, + "loss": 1.9239, + "step": 7678 + }, + { + "epoch": 0.8100210970464135, + "grad_norm": 0.4032309949398041, + "learning_rate": 0.00018293119144213328, + "loss": 1.8468, + "step": 7679 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.3878093957901001, + "learning_rate": 0.0001826774708512579, + "loss": 1.9132, + "step": 7680 + }, + { + "epoch": 0.8102320675105485, + "grad_norm": 0.35936275124549866, + "learning_rate": 0.00018242410216394648, + "loss": 1.8823, + "step": 7681 + }, + { + "epoch": 0.810337552742616, + "grad_norm": 0.3694087564945221, + "learning_rate": 0.00018217108489211841, + "loss": 1.8746, + "step": 7682 + }, + { + "epoch": 0.8104430379746835, + "grad_norm": 0.36460715532302856, + "learning_rate": 0.00018191841854836994, + "loss": 1.8699, + "step": 7683 + }, + { + "epoch": 0.8105485232067511, + "grad_norm": 0.34639909863471985, + "learning_rate": 0.00018166610264597332, + "loss": 1.857, + "step": 7684 + }, + { + "epoch": 0.8106540084388185, + "grad_norm": 0.3765473961830139, + "learning_rate": 0.00018141413669887598, + "loss": 1.8991, + "step": 7685 + }, + { + "epoch": 0.8107594936708861, + "grad_norm": 0.3621606230735779, + "learning_rate": 0.00018116252022169936, + "loss": 1.8838, + "step": 7686 + }, + { + "epoch": 0.8108649789029536, + "grad_norm": 0.33558934926986694, + "learning_rate": 0.00018091125272973825, + "loss": 1.8801, + "step": 7687 + }, + { + "epoch": 0.810970464135021, + "grad_norm": 0.3549710512161255, + "learning_rate": 0.0001806603337389596, + "loss": 1.8728, + "step": 7688 + }, + { + "epoch": 0.8110759493670886, + "grad_norm": 0.3433974087238312, + "learning_rate": 0.00018040976276600176, + "loss": 1.8778, + "step": 7689 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.3442663848400116, + "learning_rate": 0.00018015953932817348, + "loss": 1.8346, + "step": 7690 + }, + { + "epoch": 0.8112869198312236, + "grad_norm": 0.33886221051216125, + "learning_rate": 0.0001799096629434529, + "loss": 1.8852, + "step": 7691 + }, + { + "epoch": 0.8113924050632911, + "grad_norm": 0.36756014823913574, + "learning_rate": 0.00017966013313048688, + "loss": 1.8901, + "step": 7692 + }, + { + "epoch": 0.8114978902953587, + "grad_norm": 0.356926828622818, + "learning_rate": 0.00017941094940858982, + "loss": 1.869, + "step": 7693 + }, + { + "epoch": 0.8116033755274261, + "grad_norm": 0.35180431604385376, + "learning_rate": 0.00017916211129774277, + "loss": 1.8734, + "step": 7694 + }, + { + "epoch": 0.8117088607594937, + "grad_norm": 0.3536582291126251, + "learning_rate": 0.00017891361831859262, + "loss": 1.8582, + "step": 7695 + }, + { + "epoch": 0.8118143459915612, + "grad_norm": 0.3543791174888611, + "learning_rate": 0.00017866546999245102, + "loss": 1.8928, + "step": 7696 + }, + { + "epoch": 0.8119198312236287, + "grad_norm": 0.4214498996734619, + "learning_rate": 0.00017841766584129372, + "loss": 1.8534, + "step": 7697 + }, + { + "epoch": 0.8120253164556962, + "grad_norm": 0.35992106795310974, + "learning_rate": 0.00017817020538775933, + "loss": 1.8533, + "step": 7698 + }, + { + "epoch": 0.8121308016877637, + "grad_norm": 0.3385350704193115, + "learning_rate": 0.00017792308815514854, + "loss": 1.871, + "step": 7699 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.3424116373062134, + "learning_rate": 0.00017767631366742332, + "loss": 1.8898, + "step": 7700 + }, + { + "epoch": 0.8123417721518987, + "grad_norm": 0.3680153787136078, + "learning_rate": 0.00017742988144920578, + "loss": 1.8673, + "step": 7701 + }, + { + "epoch": 0.8124472573839663, + "grad_norm": 0.377890944480896, + "learning_rate": 0.00017718379102577746, + "loss": 1.8993, + "step": 7702 + }, + { + "epoch": 0.8125527426160337, + "grad_norm": 0.37058719992637634, + "learning_rate": 0.00017693804192307826, + "loss": 1.8564, + "step": 7703 + }, + { + "epoch": 0.8126582278481013, + "grad_norm": 0.34919047355651855, + "learning_rate": 0.0001766926336677056, + "loss": 1.8726, + "step": 7704 + }, + { + "epoch": 0.8127637130801688, + "grad_norm": 0.33476656675338745, + "learning_rate": 0.00017644756578691345, + "loss": 1.8604, + "step": 7705 + }, + { + "epoch": 0.8128691983122363, + "grad_norm": 0.3747885823249817, + "learning_rate": 0.0001762028378086116, + "loss": 1.8588, + "step": 7706 + }, + { + "epoch": 0.8129746835443038, + "grad_norm": 0.3928654193878174, + "learning_rate": 0.00017595844926136456, + "loss": 1.8918, + "step": 7707 + }, + { + "epoch": 0.8130801687763713, + "grad_norm": 0.35791927576065063, + "learning_rate": 0.0001757143996743906, + "loss": 1.8826, + "step": 7708 + }, + { + "epoch": 0.8131856540084388, + "grad_norm": 0.3564048707485199, + "learning_rate": 0.00017547068857756104, + "loss": 1.9077, + "step": 7709 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.3789852559566498, + "learning_rate": 0.00017522731550139922, + "loss": 1.8662, + "step": 7710 + }, + { + "epoch": 0.8133966244725739, + "grad_norm": 0.3580683171749115, + "learning_rate": 0.00017498427997707976, + "loss": 1.9081, + "step": 7711 + }, + { + "epoch": 0.8135021097046413, + "grad_norm": 0.3465126156806946, + "learning_rate": 0.0001747415815364274, + "loss": 1.8764, + "step": 7712 + }, + { + "epoch": 0.8136075949367089, + "grad_norm": 0.3668536841869354, + "learning_rate": 0.00017449921971191622, + "loss": 1.8442, + "step": 7713 + }, + { + "epoch": 0.8137130801687764, + "grad_norm": 0.3531382977962494, + "learning_rate": 0.00017425719403666877, + "loss": 1.8946, + "step": 7714 + }, + { + "epoch": 0.8138185654008439, + "grad_norm": 0.3731708526611328, + "learning_rate": 0.00017401550404445515, + "loss": 1.8785, + "step": 7715 + }, + { + "epoch": 0.8139240506329114, + "grad_norm": 0.3893696367740631, + "learning_rate": 0.0001737741492696922, + "loss": 1.8775, + "step": 7716 + }, + { + "epoch": 0.814029535864979, + "grad_norm": 0.36404696106910706, + "learning_rate": 0.00017353312924744236, + "loss": 1.9219, + "step": 7717 + }, + { + "epoch": 0.8141350210970464, + "grad_norm": 0.3677983283996582, + "learning_rate": 0.000173292443513413, + "loss": 1.8248, + "step": 7718 + }, + { + "epoch": 0.8142405063291139, + "grad_norm": 0.38159534335136414, + "learning_rate": 0.00017305209160395547, + "loss": 1.8552, + "step": 7719 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.3553406596183777, + "learning_rate": 0.00017281207305606407, + "loss": 1.8881, + "step": 7720 + }, + { + "epoch": 0.8144514767932489, + "grad_norm": 0.34903040528297424, + "learning_rate": 0.00017257238740737548, + "loss": 1.9201, + "step": 7721 + }, + { + "epoch": 0.8145569620253165, + "grad_norm": 0.3577241003513336, + "learning_rate": 0.0001723330341961675, + "loss": 1.8709, + "step": 7722 + }, + { + "epoch": 0.814662447257384, + "grad_norm": 0.3443550765514374, + "learning_rate": 0.0001720940129613584, + "loss": 1.8961, + "step": 7723 + }, + { + "epoch": 0.8147679324894515, + "grad_norm": 0.3527786433696747, + "learning_rate": 0.0001718553232425059, + "loss": 1.8658, + "step": 7724 + }, + { + "epoch": 0.814873417721519, + "grad_norm": 0.35814693570137024, + "learning_rate": 0.00017161696457980641, + "loss": 1.9138, + "step": 7725 + }, + { + "epoch": 0.8149789029535865, + "grad_norm": 0.3411121666431427, + "learning_rate": 0.00017137893651409406, + "loss": 1.87, + "step": 7726 + }, + { + "epoch": 0.815084388185654, + "grad_norm": 0.3520480692386627, + "learning_rate": 0.0001711412385868398, + "loss": 1.8872, + "step": 7727 + }, + { + "epoch": 0.8151898734177215, + "grad_norm": 0.35133466124534607, + "learning_rate": 0.00017090387034015054, + "loss": 1.8794, + "step": 7728 + }, + { + "epoch": 0.8152953586497891, + "grad_norm": 0.33634287118911743, + "learning_rate": 0.00017066683131676825, + "loss": 1.8729, + "step": 7729 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.3951643407344818, + "learning_rate": 0.00017043012106006926, + "loss": 1.8878, + "step": 7730 + }, + { + "epoch": 0.8155063291139241, + "grad_norm": 0.3472840189933777, + "learning_rate": 0.00017019373911406307, + "loss": 1.877, + "step": 7731 + }, + { + "epoch": 0.8156118143459916, + "grad_norm": 0.3532439172267914, + "learning_rate": 0.00016995768502339165, + "loss": 1.8934, + "step": 7732 + }, + { + "epoch": 0.815717299578059, + "grad_norm": 0.34944960474967957, + "learning_rate": 0.00016972195833332858, + "loss": 1.8793, + "step": 7733 + }, + { + "epoch": 0.8158227848101266, + "grad_norm": 0.3608245849609375, + "learning_rate": 0.00016948655858977808, + "loss": 1.8587, + "step": 7734 + }, + { + "epoch": 0.8159282700421941, + "grad_norm": 0.38403379917144775, + "learning_rate": 0.00016925148533927429, + "loss": 1.8833, + "step": 7735 + }, + { + "epoch": 0.8160337552742616, + "grad_norm": 0.34085553884506226, + "learning_rate": 0.00016901673812898022, + "loss": 1.8875, + "step": 7736 + }, + { + "epoch": 0.8161392405063291, + "grad_norm": 0.3488834798336029, + "learning_rate": 0.0001687823165066869, + "loss": 1.863, + "step": 7737 + }, + { + "epoch": 0.8162447257383966, + "grad_norm": 0.36447665095329285, + "learning_rate": 0.00016854822002081266, + "loss": 1.8934, + "step": 7738 + }, + { + "epoch": 0.8163502109704641, + "grad_norm": 0.3776645362377167, + "learning_rate": 0.00016831444822040207, + "loss": 1.9102, + "step": 7739 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.34480372071266174, + "learning_rate": 0.00016808100065512528, + "loss": 1.9052, + "step": 7740 + }, + { + "epoch": 0.8165611814345991, + "grad_norm": 0.33600136637687683, + "learning_rate": 0.000167847876875277, + "loss": 1.8588, + "step": 7741 + }, + { + "epoch": 0.8166666666666667, + "grad_norm": 0.35687026381492615, + "learning_rate": 0.00016761507643177557, + "loss": 1.8494, + "step": 7742 + }, + { + "epoch": 0.8167721518987342, + "grad_norm": 0.3819723129272461, + "learning_rate": 0.0001673825988761623, + "loss": 1.9127, + "step": 7743 + }, + { + "epoch": 0.8168776371308016, + "grad_norm": 0.37689945101737976, + "learning_rate": 0.00016715044376060042, + "loss": 1.8932, + "step": 7744 + }, + { + "epoch": 0.8169831223628692, + "grad_norm": 0.34065017104148865, + "learning_rate": 0.00016691861063787436, + "loss": 1.8295, + "step": 7745 + }, + { + "epoch": 0.8170886075949367, + "grad_norm": 0.36190420389175415, + "learning_rate": 0.0001666870990613889, + "loss": 1.8813, + "step": 7746 + }, + { + "epoch": 0.8171940928270042, + "grad_norm": 0.4173731207847595, + "learning_rate": 0.000166455908585168, + "loss": 1.8491, + "step": 7747 + }, + { + "epoch": 0.8172995780590717, + "grad_norm": 0.3655277192592621, + "learning_rate": 0.00016622503876385437, + "loss": 1.8498, + "step": 7748 + }, + { + "epoch": 0.8174050632911393, + "grad_norm": 0.3391162157058716, + "learning_rate": 0.00016599448915270845, + "loss": 1.9057, + "step": 7749 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.35763290524482727, + "learning_rate": 0.00016576425930760734, + "loss": 1.8692, + "step": 7750 + }, + { + "epoch": 0.8176160337552743, + "grad_norm": 0.4098583459854126, + "learning_rate": 0.00016553434878504428, + "loss": 1.8906, + "step": 7751 + }, + { + "epoch": 0.8177215189873418, + "grad_norm": 0.378612220287323, + "learning_rate": 0.00016530475714212755, + "loss": 1.8263, + "step": 7752 + }, + { + "epoch": 0.8178270042194092, + "grad_norm": 0.32473495602607727, + "learning_rate": 0.00016507548393657973, + "loss": 1.8639, + "step": 7753 + }, + { + "epoch": 0.8179324894514768, + "grad_norm": 0.38107526302337646, + "learning_rate": 0.00016484652872673694, + "loss": 1.8752, + "step": 7754 + }, + { + "epoch": 0.8180379746835443, + "grad_norm": 0.3835596442222595, + "learning_rate": 0.00016461789107154767, + "loss": 1.8943, + "step": 7755 + }, + { + "epoch": 0.8181434599156118, + "grad_norm": 0.33951449394226074, + "learning_rate": 0.00016438957053057236, + "loss": 1.874, + "step": 7756 + }, + { + "epoch": 0.8182489451476793, + "grad_norm": 0.33661535382270813, + "learning_rate": 0.0001641615666639821, + "loss": 1.8873, + "step": 7757 + }, + { + "epoch": 0.8183544303797469, + "grad_norm": 0.3598361909389496, + "learning_rate": 0.00016393387903255815, + "loss": 1.8607, + "step": 7758 + }, + { + "epoch": 0.8184599156118143, + "grad_norm": 0.3448057174682617, + "learning_rate": 0.000163706507197691, + "loss": 1.9063, + "step": 7759 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.3633960485458374, + "learning_rate": 0.00016347945072137934, + "loss": 1.8712, + "step": 7760 + }, + { + "epoch": 0.8186708860759494, + "grad_norm": 0.36276164650917053, + "learning_rate": 0.00016325270916622947, + "loss": 1.8698, + "step": 7761 + }, + { + "epoch": 0.8187763713080168, + "grad_norm": 0.3669593036174774, + "learning_rate": 0.00016302628209545426, + "loss": 1.8482, + "step": 7762 + }, + { + "epoch": 0.8188818565400844, + "grad_norm": 0.3616148829460144, + "learning_rate": 0.00016280016907287237, + "loss": 1.9132, + "step": 7763 + }, + { + "epoch": 0.8189873417721519, + "grad_norm": 0.3633652329444885, + "learning_rate": 0.00016257436966290764, + "loss": 1.9045, + "step": 7764 + }, + { + "epoch": 0.8190928270042194, + "grad_norm": 0.3758713901042938, + "learning_rate": 0.0001623488834305878, + "loss": 1.884, + "step": 7765 + }, + { + "epoch": 0.8191983122362869, + "grad_norm": 0.3754219710826874, + "learning_rate": 0.000162123709941544, + "loss": 1.9194, + "step": 7766 + }, + { + "epoch": 0.8193037974683545, + "grad_norm": 0.3580959141254425, + "learning_rate": 0.00016189884876200979, + "loss": 1.8891, + "step": 7767 + }, + { + "epoch": 0.8194092827004219, + "grad_norm": 0.33398178219795227, + "learning_rate": 0.00016167429945882031, + "loss": 1.8901, + "step": 7768 + }, + { + "epoch": 0.8195147679324895, + "grad_norm": 0.35681259632110596, + "learning_rate": 0.0001614500615994117, + "loss": 1.9082, + "step": 7769 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.37025347352027893, + "learning_rate": 0.00016122613475181977, + "loss": 1.8363, + "step": 7770 + }, + { + "epoch": 0.8197257383966244, + "grad_norm": 0.35841241478919983, + "learning_rate": 0.00016100251848467966, + "loss": 1.8666, + "step": 7771 + }, + { + "epoch": 0.819831223628692, + "grad_norm": 0.3587653338909149, + "learning_rate": 0.00016077921236722464, + "loss": 1.8503, + "step": 7772 + }, + { + "epoch": 0.8199367088607595, + "grad_norm": 0.3508157432079315, + "learning_rate": 0.00016055621596928563, + "loss": 1.8409, + "step": 7773 + }, + { + "epoch": 0.820042194092827, + "grad_norm": 0.3584843575954437, + "learning_rate": 0.00016033352886129, + "loss": 1.8278, + "step": 7774 + }, + { + "epoch": 0.8201476793248945, + "grad_norm": 0.360687255859375, + "learning_rate": 0.00016011115061426103, + "loss": 1.8978, + "step": 7775 + }, + { + "epoch": 0.8202531645569621, + "grad_norm": 0.3500330150127411, + "learning_rate": 0.00015988908079981698, + "loss": 1.9159, + "step": 7776 + }, + { + "epoch": 0.8203586497890295, + "grad_norm": 0.34748128056526184, + "learning_rate": 0.00015966731899017014, + "loss": 1.9038, + "step": 7777 + }, + { + "epoch": 0.820464135021097, + "grad_norm": 0.35905954241752625, + "learning_rate": 0.00015944586475812633, + "loss": 1.8672, + "step": 7778 + }, + { + "epoch": 0.8205696202531646, + "grad_norm": 0.36465343832969666, + "learning_rate": 0.00015922471767708377, + "loss": 1.8562, + "step": 7779 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.36022326350212097, + "learning_rate": 0.00015900387732103232, + "loss": 1.9071, + "step": 7780 + }, + { + "epoch": 0.8207805907172996, + "grad_norm": 0.3697722256183624, + "learning_rate": 0.0001587833432645528, + "loss": 1.9097, + "step": 7781 + }, + { + "epoch": 0.8208860759493671, + "grad_norm": 0.37139713764190674, + "learning_rate": 0.00015856311508281594, + "loss": 1.8916, + "step": 7782 + }, + { + "epoch": 0.8209915611814346, + "grad_norm": 0.3668661415576935, + "learning_rate": 0.00015834319235158187, + "loss": 1.8837, + "step": 7783 + }, + { + "epoch": 0.8210970464135021, + "grad_norm": 0.3635692000389099, + "learning_rate": 0.00015812357464719905, + "loss": 1.9023, + "step": 7784 + }, + { + "epoch": 0.8212025316455697, + "grad_norm": 0.35766446590423584, + "learning_rate": 0.0001579042615466035, + "loss": 1.8728, + "step": 7785 + }, + { + "epoch": 0.8213080168776371, + "grad_norm": 0.37199127674102783, + "learning_rate": 0.00015768525262731804, + "loss": 1.9178, + "step": 7786 + }, + { + "epoch": 0.8214135021097047, + "grad_norm": 0.3763056993484497, + "learning_rate": 0.0001574665474674514, + "loss": 1.9108, + "step": 7787 + }, + { + "epoch": 0.8215189873417722, + "grad_norm": 0.3797191083431244, + "learning_rate": 0.00015724814564569765, + "loss": 1.8903, + "step": 7788 + }, + { + "epoch": 0.8216244725738396, + "grad_norm": 0.36087697744369507, + "learning_rate": 0.00015703004674133498, + "loss": 1.8614, + "step": 7789 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.369269460439682, + "learning_rate": 0.00015681225033422526, + "loss": 1.8823, + "step": 7790 + }, + { + "epoch": 0.8218354430379747, + "grad_norm": 0.40155819058418274, + "learning_rate": 0.00015659475600481292, + "loss": 1.8928, + "step": 7791 + }, + { + "epoch": 0.8219409282700422, + "grad_norm": 0.3575928509235382, + "learning_rate": 0.00015637756333412454, + "loss": 1.9341, + "step": 7792 + }, + { + "epoch": 0.8220464135021097, + "grad_norm": 0.35425490140914917, + "learning_rate": 0.0001561606719037676, + "loss": 1.8796, + "step": 7793 + }, + { + "epoch": 0.8221518987341773, + "grad_norm": 0.3997424244880676, + "learning_rate": 0.00015594408129592993, + "loss": 1.8588, + "step": 7794 + }, + { + "epoch": 0.8222573839662447, + "grad_norm": 0.34441235661506653, + "learning_rate": 0.00015572779109337888, + "loss": 1.8488, + "step": 7795 + }, + { + "epoch": 0.8223628691983123, + "grad_norm": 0.3577572703361511, + "learning_rate": 0.00015551180087946046, + "loss": 1.8773, + "step": 7796 + }, + { + "epoch": 0.8224683544303798, + "grad_norm": 0.35559332370758057, + "learning_rate": 0.00015529611023809868, + "loss": 1.8869, + "step": 7797 + }, + { + "epoch": 0.8225738396624472, + "grad_norm": 0.3525368869304657, + "learning_rate": 0.00015508071875379448, + "loss": 1.8611, + "step": 7798 + }, + { + "epoch": 0.8226793248945148, + "grad_norm": 0.3586418628692627, + "learning_rate": 0.00015486562601162512, + "loss": 1.9193, + "step": 7799 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.354084312915802, + "learning_rate": 0.00015465083159724345, + "loss": 1.848, + "step": 7800 + }, + { + "epoch": 0.8228902953586498, + "grad_norm": 0.3797892928123474, + "learning_rate": 0.00015443633509687688, + "loss": 1.8843, + "step": 7801 + }, + { + "epoch": 0.8229957805907173, + "grad_norm": 0.3519793748855591, + "learning_rate": 0.0001542221360973268, + "loss": 1.8789, + "step": 7802 + }, + { + "epoch": 0.8231012658227848, + "grad_norm": 0.3515004813671112, + "learning_rate": 0.00015400823418596765, + "loss": 1.88, + "step": 7803 + }, + { + "epoch": 0.8232067510548523, + "grad_norm": 0.362846314907074, + "learning_rate": 0.0001537946289507462, + "loss": 1.8807, + "step": 7804 + }, + { + "epoch": 0.8233122362869199, + "grad_norm": 0.35439223051071167, + "learning_rate": 0.00015358131998018072, + "loss": 1.8432, + "step": 7805 + }, + { + "epoch": 0.8234177215189873, + "grad_norm": 0.36783868074417114, + "learning_rate": 0.00015336830686336008, + "loss": 1.9111, + "step": 7806 + }, + { + "epoch": 0.8235232067510548, + "grad_norm": 0.40394771099090576, + "learning_rate": 0.00015315558918994331, + "loss": 1.8856, + "step": 7807 + }, + { + "epoch": 0.8236286919831224, + "grad_norm": 0.3518728017807007, + "learning_rate": 0.0001529431665501584, + "loss": 1.8394, + "step": 7808 + }, + { + "epoch": 0.8237341772151898, + "grad_norm": 0.35697776079177856, + "learning_rate": 0.0001527310385348017, + "loss": 1.8823, + "step": 7809 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.4036790132522583, + "learning_rate": 0.0001525192047352371, + "loss": 1.8721, + "step": 7810 + }, + { + "epoch": 0.8239451476793249, + "grad_norm": 0.36027613282203674, + "learning_rate": 0.00015230766474339536, + "loss": 1.8738, + "step": 7811 + }, + { + "epoch": 0.8240506329113924, + "grad_norm": 0.34700968861579895, + "learning_rate": 0.0001520964181517731, + "loss": 1.8691, + "step": 7812 + }, + { + "epoch": 0.8241561181434599, + "grad_norm": 0.3862028121948242, + "learning_rate": 0.00015188546455343228, + "loss": 1.8796, + "step": 7813 + }, + { + "epoch": 0.8242616033755275, + "grad_norm": 0.34905391931533813, + "learning_rate": 0.00015167480354199908, + "loss": 1.9143, + "step": 7814 + }, + { + "epoch": 0.8243670886075949, + "grad_norm": 0.34098464250564575, + "learning_rate": 0.00015146443471166345, + "loss": 1.8842, + "step": 7815 + }, + { + "epoch": 0.8244725738396624, + "grad_norm": 0.34201061725616455, + "learning_rate": 0.00015125435765717815, + "loss": 1.8669, + "step": 7816 + }, + { + "epoch": 0.82457805907173, + "grad_norm": 0.35357221961021423, + "learning_rate": 0.000151044571973858, + "loss": 1.8692, + "step": 7817 + }, + { + "epoch": 0.8246835443037974, + "grad_norm": 0.36016207933425903, + "learning_rate": 0.00015083507725757912, + "loss": 1.9192, + "step": 7818 + }, + { + "epoch": 0.824789029535865, + "grad_norm": 0.3429969549179077, + "learning_rate": 0.00015062587310477813, + "loss": 1.8811, + "step": 7819 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.35197800397872925, + "learning_rate": 0.00015041695911245136, + "loss": 1.9067, + "step": 7820 + }, + { + "epoch": 0.825, + "grad_norm": 0.35197684168815613, + "learning_rate": 0.00015020833487815416, + "loss": 1.8997, + "step": 7821 + }, + { + "epoch": 0.8251054852320675, + "grad_norm": 0.39108607172966003, + "learning_rate": 0.00015000000000000001, + "loss": 1.8793, + "step": 7822 + }, + { + "epoch": 0.825210970464135, + "grad_norm": 0.3870248794555664, + "learning_rate": 0.00014979195407665975, + "loss": 1.8699, + "step": 7823 + }, + { + "epoch": 0.8253164556962025, + "grad_norm": 0.3757625222206116, + "learning_rate": 0.000149584196707361, + "loss": 1.8531, + "step": 7824 + }, + { + "epoch": 0.82542194092827, + "grad_norm": 0.36276596784591675, + "learning_rate": 0.00014937672749188704, + "loss": 1.8746, + "step": 7825 + }, + { + "epoch": 0.8255274261603376, + "grad_norm": 0.3696567118167877, + "learning_rate": 0.0001491695460305765, + "loss": 1.8693, + "step": 7826 + }, + { + "epoch": 0.825632911392405, + "grad_norm": 0.36105209589004517, + "learning_rate": 0.00014896265192432194, + "loss": 1.8997, + "step": 7827 + }, + { + "epoch": 0.8257383966244726, + "grad_norm": 0.35225433111190796, + "learning_rate": 0.00014875604477456987, + "loss": 1.8781, + "step": 7828 + }, + { + "epoch": 0.8258438818565401, + "grad_norm": 0.35196974873542786, + "learning_rate": 0.00014854972418331948, + "loss": 1.8947, + "step": 7829 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.3464502990245819, + "learning_rate": 0.00014834368975312172, + "loss": 1.8319, + "step": 7830 + }, + { + "epoch": 0.8260548523206751, + "grad_norm": 0.34048449993133545, + "learning_rate": 0.0001481379410870792, + "loss": 1.8728, + "step": 7831 + }, + { + "epoch": 0.8261603375527427, + "grad_norm": 0.33669713139533997, + "learning_rate": 0.00014793247778884463, + "loss": 1.8621, + "step": 7832 + }, + { + "epoch": 0.8262658227848101, + "grad_norm": 0.3416072726249695, + "learning_rate": 0.00014772729946262067, + "loss": 1.8756, + "step": 7833 + }, + { + "epoch": 0.8263713080168776, + "grad_norm": 0.33105167746543884, + "learning_rate": 0.00014752240571315898, + "loss": 1.8559, + "step": 7834 + }, + { + "epoch": 0.8264767932489452, + "grad_norm": 0.3375115394592285, + "learning_rate": 0.00014731779614575915, + "loss": 1.8509, + "step": 7835 + }, + { + "epoch": 0.8265822784810126, + "grad_norm": 0.3448018729686737, + "learning_rate": 0.00014711347036626857, + "loss": 1.8327, + "step": 7836 + }, + { + "epoch": 0.8266877637130802, + "grad_norm": 0.37081006169319153, + "learning_rate": 0.00014690942798108097, + "loss": 1.887, + "step": 7837 + }, + { + "epoch": 0.8267932489451477, + "grad_norm": 0.3406483829021454, + "learning_rate": 0.00014670566859713622, + "loss": 1.8921, + "step": 7838 + }, + { + "epoch": 0.8268987341772152, + "grad_norm": 0.3343172073364258, + "learning_rate": 0.00014650219182191934, + "loss": 1.8039, + "step": 7839 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.33596423268318176, + "learning_rate": 0.00014629899726345958, + "loss": 1.8718, + "step": 7840 + }, + { + "epoch": 0.8271097046413503, + "grad_norm": 0.3394632339477539, + "learning_rate": 0.00014609608453033013, + "loss": 1.8824, + "step": 7841 + }, + { + "epoch": 0.8272151898734177, + "grad_norm": 0.36653903126716614, + "learning_rate": 0.0001458934532316467, + "loss": 1.8706, + "step": 7842 + }, + { + "epoch": 0.8273206751054852, + "grad_norm": 0.3483956456184387, + "learning_rate": 0.0001456911029770675, + "loss": 1.871, + "step": 7843 + }, + { + "epoch": 0.8274261603375528, + "grad_norm": 0.3478300869464874, + "learning_rate": 0.00014548903337679208, + "loss": 1.8538, + "step": 7844 + }, + { + "epoch": 0.8275316455696202, + "grad_norm": 0.3558306396007538, + "learning_rate": 0.0001452872440415604, + "loss": 1.8642, + "step": 7845 + }, + { + "epoch": 0.8276371308016878, + "grad_norm": 0.340495228767395, + "learning_rate": 0.00014508573458265245, + "loss": 1.8725, + "step": 7846 + }, + { + "epoch": 0.8277426160337553, + "grad_norm": 0.3418813943862915, + "learning_rate": 0.00014488450461188753, + "loss": 1.8829, + "step": 7847 + }, + { + "epoch": 0.8278481012658228, + "grad_norm": 0.36263951659202576, + "learning_rate": 0.000144683553741623, + "loss": 1.8881, + "step": 7848 + }, + { + "epoch": 0.8279535864978903, + "grad_norm": 0.3484504818916321, + "learning_rate": 0.00014448288158475423, + "loss": 1.8384, + "step": 7849 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.37730321288108826, + "learning_rate": 0.00014428248775471316, + "loss": 1.8654, + "step": 7850 + }, + { + "epoch": 0.8281645569620253, + "grad_norm": 0.35493919253349304, + "learning_rate": 0.00014408237186546807, + "loss": 1.8836, + "step": 7851 + }, + { + "epoch": 0.8282700421940928, + "grad_norm": 0.3337674140930176, + "learning_rate": 0.00014388253353152277, + "loss": 1.8421, + "step": 7852 + }, + { + "epoch": 0.8283755274261604, + "grad_norm": 0.3451319932937622, + "learning_rate": 0.00014368297236791545, + "loss": 1.8765, + "step": 7853 + }, + { + "epoch": 0.8284810126582278, + "grad_norm": 0.34404394030570984, + "learning_rate": 0.00014348368799021845, + "loss": 1.8888, + "step": 7854 + }, + { + "epoch": 0.8285864978902954, + "grad_norm": 0.36555588245391846, + "learning_rate": 0.00014328468001453717, + "loss": 1.8858, + "step": 7855 + }, + { + "epoch": 0.8286919831223629, + "grad_norm": 0.35071757435798645, + "learning_rate": 0.00014308594805750958, + "loss": 1.8531, + "step": 7856 + }, + { + "epoch": 0.8287974683544304, + "grad_norm": 0.35112127661705017, + "learning_rate": 0.0001428874917363054, + "loss": 1.8427, + "step": 7857 + }, + { + "epoch": 0.8289029535864979, + "grad_norm": 0.35315969586372375, + "learning_rate": 0.000142689310668625, + "loss": 1.8411, + "step": 7858 + }, + { + "epoch": 0.8290084388185655, + "grad_norm": 0.36331790685653687, + "learning_rate": 0.0001424914044726995, + "loss": 1.8621, + "step": 7859 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.3731858432292938, + "learning_rate": 0.000142293772767289, + "loss": 1.8614, + "step": 7860 + }, + { + "epoch": 0.8292194092827004, + "grad_norm": 0.3894490897655487, + "learning_rate": 0.00014209641517168273, + "loss": 1.875, + "step": 7861 + }, + { + "epoch": 0.829324894514768, + "grad_norm": 0.36338773369789124, + "learning_rate": 0.0001418993313056979, + "loss": 1.897, + "step": 7862 + }, + { + "epoch": 0.8294303797468354, + "grad_norm": 0.33290377259254456, + "learning_rate": 0.0001417025207896788, + "loss": 1.8691, + "step": 7863 + }, + { + "epoch": 0.829535864978903, + "grad_norm": 0.34009480476379395, + "learning_rate": 0.00014150598324449667, + "loss": 1.8703, + "step": 7864 + }, + { + "epoch": 0.8296413502109705, + "grad_norm": 0.3564646542072296, + "learning_rate": 0.0001413097182915482, + "loss": 1.8637, + "step": 7865 + }, + { + "epoch": 0.829746835443038, + "grad_norm": 0.3401848077774048, + "learning_rate": 0.0001411137255527554, + "loss": 1.9051, + "step": 7866 + }, + { + "epoch": 0.8298523206751055, + "grad_norm": 0.34727057814598083, + "learning_rate": 0.00014091800465056476, + "loss": 1.8439, + "step": 7867 + }, + { + "epoch": 0.8299578059071729, + "grad_norm": 0.3502914309501648, + "learning_rate": 0.00014072255520794613, + "loss": 1.8407, + "step": 7868 + }, + { + "epoch": 0.8300632911392405, + "grad_norm": 0.3639891743659973, + "learning_rate": 0.0001405273768483926, + "loss": 1.9014, + "step": 7869 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.37454864382743835, + "learning_rate": 0.00014033246919591922, + "loss": 1.8898, + "step": 7870 + }, + { + "epoch": 0.8302742616033755, + "grad_norm": 0.3529956340789795, + "learning_rate": 0.00014013783187506265, + "loss": 1.9235, + "step": 7871 + }, + { + "epoch": 0.830379746835443, + "grad_norm": 0.34587493538856506, + "learning_rate": 0.00013994346451088037, + "loss": 1.9122, + "step": 7872 + }, + { + "epoch": 0.8304852320675106, + "grad_norm": 0.3635154068470001, + "learning_rate": 0.00013974936672894972, + "loss": 1.8571, + "step": 7873 + }, + { + "epoch": 0.830590717299578, + "grad_norm": 0.36075064539909363, + "learning_rate": 0.0001395555381553675, + "loss": 1.8939, + "step": 7874 + }, + { + "epoch": 0.8306962025316456, + "grad_norm": 0.33687853813171387, + "learning_rate": 0.00013936197841674894, + "loss": 1.859, + "step": 7875 + }, + { + "epoch": 0.8308016877637131, + "grad_norm": 0.3414284884929657, + "learning_rate": 0.00013916868714022735, + "loss": 1.9027, + "step": 7876 + }, + { + "epoch": 0.8309071729957805, + "grad_norm": 0.38494688272476196, + "learning_rate": 0.00013897566395345315, + "loss": 1.9056, + "step": 7877 + }, + { + "epoch": 0.8310126582278481, + "grad_norm": 0.3710670471191406, + "learning_rate": 0.000138782908484593, + "loss": 1.8773, + "step": 7878 + }, + { + "epoch": 0.8311181434599156, + "grad_norm": 0.3442472517490387, + "learning_rate": 0.0001385904203623296, + "loss": 1.8815, + "step": 7879 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.36545494198799133, + "learning_rate": 0.00013839819921586025, + "loss": 1.865, + "step": 7880 + }, + { + "epoch": 0.8313291139240506, + "grad_norm": 0.345548152923584, + "learning_rate": 0.00013820624467489697, + "loss": 1.8898, + "step": 7881 + }, + { + "epoch": 0.8314345991561182, + "grad_norm": 0.35534870624542236, + "learning_rate": 0.00013801455636966518, + "loss": 1.8765, + "step": 7882 + }, + { + "epoch": 0.8315400843881856, + "grad_norm": 0.38141316175460815, + "learning_rate": 0.00013782313393090301, + "loss": 1.8649, + "step": 7883 + }, + { + "epoch": 0.8316455696202532, + "grad_norm": 0.3762746751308441, + "learning_rate": 0.00013763197698986107, + "loss": 1.9082, + "step": 7884 + }, + { + "epoch": 0.8317510548523207, + "grad_norm": 0.36573168635368347, + "learning_rate": 0.00013744108517830105, + "loss": 1.9178, + "step": 7885 + }, + { + "epoch": 0.8318565400843881, + "grad_norm": 0.37172237038612366, + "learning_rate": 0.00013725045812849567, + "loss": 1.8983, + "step": 7886 + }, + { + "epoch": 0.8319620253164557, + "grad_norm": 0.37489837408065796, + "learning_rate": 0.00013706009547322762, + "loss": 1.9026, + "step": 7887 + }, + { + "epoch": 0.8320675105485232, + "grad_norm": 0.3456435203552246, + "learning_rate": 0.00013686999684578871, + "loss": 1.8702, + "step": 7888 + }, + { + "epoch": 0.8321729957805907, + "grad_norm": 0.34824132919311523, + "learning_rate": 0.00013668016187997964, + "loss": 1.8809, + "step": 7889 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.36513590812683105, + "learning_rate": 0.00013649059021010894, + "loss": 1.8815, + "step": 7890 + }, + { + "epoch": 0.8323839662447258, + "grad_norm": 0.3704710602760315, + "learning_rate": 0.00013630128147099213, + "loss": 1.843, + "step": 7891 + }, + { + "epoch": 0.8324894514767932, + "grad_norm": 0.3468650281429291, + "learning_rate": 0.00013611223529795156, + "loss": 1.8399, + "step": 7892 + }, + { + "epoch": 0.8325949367088608, + "grad_norm": 0.3804793059825897, + "learning_rate": 0.00013592345132681512, + "loss": 1.8763, + "step": 7893 + }, + { + "epoch": 0.8327004219409283, + "grad_norm": 0.40209996700286865, + "learning_rate": 0.0001357349291939159, + "loss": 1.8528, + "step": 7894 + }, + { + "epoch": 0.8328059071729957, + "grad_norm": 0.3598349094390869, + "learning_rate": 0.00013554666853609146, + "loss": 1.9199, + "step": 7895 + }, + { + "epoch": 0.8329113924050633, + "grad_norm": 0.3678196966648102, + "learning_rate": 0.0001353586689906829, + "loss": 1.8735, + "step": 7896 + }, + { + "epoch": 0.8330168776371308, + "grad_norm": 0.38986560702323914, + "learning_rate": 0.00013517093019553444, + "loss": 1.857, + "step": 7897 + }, + { + "epoch": 0.8331223628691983, + "grad_norm": 0.35990601778030396, + "learning_rate": 0.00013498345178899248, + "loss": 1.8832, + "step": 7898 + }, + { + "epoch": 0.8332278481012658, + "grad_norm": 0.3496098518371582, + "learning_rate": 0.0001347962334099052, + "loss": 1.8505, + "step": 7899 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.37373051047325134, + "learning_rate": 0.00013460927469762155, + "loss": 1.89, + "step": 7900 + }, + { + "epoch": 0.8334388185654008, + "grad_norm": 0.3659755289554596, + "learning_rate": 0.00013442257529199068, + "loss": 1.8667, + "step": 7901 + }, + { + "epoch": 0.8335443037974684, + "grad_norm": 0.36068084836006165, + "learning_rate": 0.00013423613483336144, + "loss": 1.8789, + "step": 7902 + }, + { + "epoch": 0.8336497890295359, + "grad_norm": 0.3585476875305176, + "learning_rate": 0.00013404995296258118, + "loss": 1.843, + "step": 7903 + }, + { + "epoch": 0.8337552742616033, + "grad_norm": 0.34197989106178284, + "learning_rate": 0.00013386402932099572, + "loss": 1.863, + "step": 7904 + }, + { + "epoch": 0.8338607594936709, + "grad_norm": 0.37732434272766113, + "learning_rate": 0.0001336783635504482, + "loss": 1.8772, + "step": 7905 + }, + { + "epoch": 0.8339662447257384, + "grad_norm": 0.3447614014148712, + "learning_rate": 0.00013349295529327843, + "loss": 1.876, + "step": 7906 + }, + { + "epoch": 0.8340717299578059, + "grad_norm": 0.3487159013748169, + "learning_rate": 0.00013330780419232245, + "loss": 1.872, + "step": 7907 + }, + { + "epoch": 0.8341772151898734, + "grad_norm": 0.349739670753479, + "learning_rate": 0.0001331229098909114, + "loss": 1.8746, + "step": 7908 + }, + { + "epoch": 0.834282700421941, + "grad_norm": 0.367176353931427, + "learning_rate": 0.00013293827203287141, + "loss": 1.8953, + "step": 7909 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.35620957612991333, + "learning_rate": 0.00013275389026252255, + "loss": 1.8906, + "step": 7910 + }, + { + "epoch": 0.834493670886076, + "grad_norm": 0.35120299458503723, + "learning_rate": 0.00013256976422467803, + "loss": 1.8727, + "step": 7911 + }, + { + "epoch": 0.8345991561181435, + "grad_norm": 0.35658127069473267, + "learning_rate": 0.00013238589356464394, + "loss": 1.8866, + "step": 7912 + }, + { + "epoch": 0.8347046413502109, + "grad_norm": 0.36923879384994507, + "learning_rate": 0.00013220227792821806, + "loss": 1.8787, + "step": 7913 + }, + { + "epoch": 0.8348101265822785, + "grad_norm": 0.34480026364326477, + "learning_rate": 0.00013201891696168963, + "loss": 1.8551, + "step": 7914 + }, + { + "epoch": 0.834915611814346, + "grad_norm": 0.36370328068733215, + "learning_rate": 0.00013183581031183853, + "loss": 1.9038, + "step": 7915 + }, + { + "epoch": 0.8350210970464135, + "grad_norm": 0.35988906025886536, + "learning_rate": 0.00013165295762593426, + "loss": 1.8821, + "step": 7916 + }, + { + "epoch": 0.835126582278481, + "grad_norm": 0.35658812522888184, + "learning_rate": 0.0001314703585517359, + "loss": 1.8803, + "step": 7917 + }, + { + "epoch": 0.8352320675105486, + "grad_norm": 0.34482595324516296, + "learning_rate": 0.00013128801273749074, + "loss": 1.883, + "step": 7918 + }, + { + "epoch": 0.835337552742616, + "grad_norm": 0.3705143332481384, + "learning_rate": 0.00013110591983193424, + "loss": 1.8677, + "step": 7919 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.3562207818031311, + "learning_rate": 0.0001309240794842889, + "loss": 1.8678, + "step": 7920 + }, + { + "epoch": 0.8355485232067511, + "grad_norm": 0.34411752223968506, + "learning_rate": 0.00013074249134426366, + "loss": 1.9048, + "step": 7921 + }, + { + "epoch": 0.8356540084388185, + "grad_norm": 0.4154362082481384, + "learning_rate": 0.00013056115506205357, + "loss": 1.8916, + "step": 7922 + }, + { + "epoch": 0.8357594936708861, + "grad_norm": 0.3512861132621765, + "learning_rate": 0.00013038007028833853, + "loss": 1.9019, + "step": 7923 + }, + { + "epoch": 0.8358649789029536, + "grad_norm": 0.3667321503162384, + "learning_rate": 0.00013019923667428315, + "loss": 1.8825, + "step": 7924 + }, + { + "epoch": 0.8359704641350211, + "grad_norm": 0.3680517077445984, + "learning_rate": 0.0001300186538715359, + "loss": 1.8794, + "step": 7925 + }, + { + "epoch": 0.8360759493670886, + "grad_norm": 0.37047892808914185, + "learning_rate": 0.00012983832153222816, + "loss": 1.8938, + "step": 7926 + }, + { + "epoch": 0.8361814345991562, + "grad_norm": 0.3510376811027527, + "learning_rate": 0.00012965823930897406, + "loss": 1.856, + "step": 7927 + }, + { + "epoch": 0.8362869198312236, + "grad_norm": 0.3440750241279602, + "learning_rate": 0.00012947840685486933, + "loss": 1.8839, + "step": 7928 + }, + { + "epoch": 0.8363924050632912, + "grad_norm": 0.3521532118320465, + "learning_rate": 0.00012929882382349103, + "loss": 1.8893, + "step": 7929 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.3542807996273041, + "learning_rate": 0.00012911948986889664, + "loss": 1.8677, + "step": 7930 + }, + { + "epoch": 0.8366033755274261, + "grad_norm": 0.35521551966667175, + "learning_rate": 0.0001289404046456233, + "loss": 1.8793, + "step": 7931 + }, + { + "epoch": 0.8367088607594937, + "grad_norm": 0.37079304456710815, + "learning_rate": 0.00012876156780868752, + "loss": 1.8641, + "step": 7932 + }, + { + "epoch": 0.8368143459915611, + "grad_norm": 0.3728228509426117, + "learning_rate": 0.00012858297901358425, + "loss": 1.8833, + "step": 7933 + }, + { + "epoch": 0.8369198312236287, + "grad_norm": 0.3425503969192505, + "learning_rate": 0.0001284046379162861, + "loss": 1.8919, + "step": 7934 + }, + { + "epoch": 0.8370253164556962, + "grad_norm": 0.3448263704776764, + "learning_rate": 0.00012822654417324306, + "loss": 1.8628, + "step": 7935 + }, + { + "epoch": 0.8371308016877637, + "grad_norm": 0.3734988868236542, + "learning_rate": 0.00012804869744138136, + "loss": 1.8957, + "step": 7936 + }, + { + "epoch": 0.8372362869198312, + "grad_norm": 0.3515889346599579, + "learning_rate": 0.0001278710973781033, + "loss": 1.8564, + "step": 7937 + }, + { + "epoch": 0.8373417721518988, + "grad_norm": 0.3405628800392151, + "learning_rate": 0.00012769374364128628, + "loss": 1.8918, + "step": 7938 + }, + { + "epoch": 0.8374472573839662, + "grad_norm": 0.34964719414711, + "learning_rate": 0.0001275166358892821, + "loss": 1.8906, + "step": 7939 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.33421334624290466, + "learning_rate": 0.00012733977378091664, + "loss": 1.8698, + "step": 7940 + }, + { + "epoch": 0.8376582278481013, + "grad_norm": 0.3690473139286041, + "learning_rate": 0.0001271631569754887, + "loss": 1.8805, + "step": 7941 + }, + { + "epoch": 0.8377637130801687, + "grad_norm": 0.33581656217575073, + "learning_rate": 0.00012698678513276985, + "loss": 1.8931, + "step": 7942 + }, + { + "epoch": 0.8378691983122363, + "grad_norm": 0.3319907784461975, + "learning_rate": 0.00012681065791300352, + "loss": 1.8823, + "step": 7943 + }, + { + "epoch": 0.8379746835443038, + "grad_norm": 0.36292174458503723, + "learning_rate": 0.00012663477497690422, + "loss": 1.8865, + "step": 7944 + }, + { + "epoch": 0.8380801687763713, + "grad_norm": 0.34691205620765686, + "learning_rate": 0.0001264591359856572, + "loss": 1.8497, + "step": 7945 + }, + { + "epoch": 0.8381856540084388, + "grad_norm": 0.32964256405830383, + "learning_rate": 0.00012628374060091757, + "loss": 1.8894, + "step": 7946 + }, + { + "epoch": 0.8382911392405064, + "grad_norm": 0.35333794355392456, + "learning_rate": 0.00012610858848480968, + "loss": 1.8885, + "step": 7947 + }, + { + "epoch": 0.8383966244725738, + "grad_norm": 0.36850014328956604, + "learning_rate": 0.00012593367929992667, + "loss": 1.8663, + "step": 7948 + }, + { + "epoch": 0.8385021097046413, + "grad_norm": 0.3596217632293701, + "learning_rate": 0.00012575901270932944, + "loss": 1.8778, + "step": 7949 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.3403030335903168, + "learning_rate": 0.00012558458837654633, + "loss": 1.891, + "step": 7950 + }, + { + "epoch": 0.8387130801687763, + "grad_norm": 0.3408500850200653, + "learning_rate": 0.0001254104059655723, + "loss": 1.8692, + "step": 7951 + }, + { + "epoch": 0.8388185654008439, + "grad_norm": 0.3375507593154907, + "learning_rate": 0.0001252364651408684, + "loss": 1.8355, + "step": 7952 + }, + { + "epoch": 0.8389240506329114, + "grad_norm": 0.3568466305732727, + "learning_rate": 0.00012506276556736108, + "loss": 1.8777, + "step": 7953 + }, + { + "epoch": 0.8390295358649789, + "grad_norm": 0.35083162784576416, + "learning_rate": 0.00012488930691044144, + "loss": 1.8395, + "step": 7954 + }, + { + "epoch": 0.8391350210970464, + "grad_norm": 0.35609835386276245, + "learning_rate": 0.00012471608883596476, + "loss": 1.8435, + "step": 7955 + }, + { + "epoch": 0.839240506329114, + "grad_norm": 0.3440849483013153, + "learning_rate": 0.00012454311101024967, + "loss": 1.8733, + "step": 7956 + }, + { + "epoch": 0.8393459915611814, + "grad_norm": 0.34492844343185425, + "learning_rate": 0.0001243703731000777, + "loss": 1.8679, + "step": 7957 + }, + { + "epoch": 0.8394514767932489, + "grad_norm": 0.3746944069862366, + "learning_rate": 0.00012419787477269256, + "loss": 1.8837, + "step": 7958 + }, + { + "epoch": 0.8395569620253165, + "grad_norm": 0.3934699296951294, + "learning_rate": 0.00012402561569579935, + "loss": 1.8679, + "step": 7959 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.3663318455219269, + "learning_rate": 0.00012385359553756422, + "loss": 1.8597, + "step": 7960 + }, + { + "epoch": 0.8397679324894515, + "grad_norm": 0.34098124504089355, + "learning_rate": 0.00012368181396661337, + "loss": 1.8442, + "step": 7961 + }, + { + "epoch": 0.839873417721519, + "grad_norm": 0.3607950210571289, + "learning_rate": 0.00012351027065203284, + "loss": 1.8606, + "step": 7962 + }, + { + "epoch": 0.8399789029535865, + "grad_norm": 0.3948383033275604, + "learning_rate": 0.00012333896526336752, + "loss": 1.8623, + "step": 7963 + }, + { + "epoch": 0.840084388185654, + "grad_norm": 0.3771279752254486, + "learning_rate": 0.0001231678974706205, + "loss": 1.8839, + "step": 7964 + }, + { + "epoch": 0.8401898734177216, + "grad_norm": 0.39571434259414673, + "learning_rate": 0.00012299706694425286, + "loss": 1.8714, + "step": 7965 + }, + { + "epoch": 0.840295358649789, + "grad_norm": 0.36764249205589294, + "learning_rate": 0.00012282647335518248, + "loss": 1.8863, + "step": 7966 + }, + { + "epoch": 0.8404008438818565, + "grad_norm": 0.34254395961761475, + "learning_rate": 0.00012265611637478376, + "loss": 1.868, + "step": 7967 + }, + { + "epoch": 0.8405063291139241, + "grad_norm": 0.3822396695613861, + "learning_rate": 0.00012248599567488697, + "loss": 1.8862, + "step": 7968 + }, + { + "epoch": 0.8406118143459915, + "grad_norm": 0.40979400277137756, + "learning_rate": 0.00012231611092777743, + "loss": 1.851, + "step": 7969 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.33472657203674316, + "learning_rate": 0.0001221464618061951, + "loss": 1.8687, + "step": 7970 + }, + { + "epoch": 0.8408227848101266, + "grad_norm": 0.3546699583530426, + "learning_rate": 0.00012197704798333364, + "loss": 1.8842, + "step": 7971 + }, + { + "epoch": 0.8409282700421941, + "grad_norm": 0.3548680543899536, + "learning_rate": 0.00012180786913284024, + "loss": 1.8852, + "step": 7972 + }, + { + "epoch": 0.8410337552742616, + "grad_norm": 0.3858281672000885, + "learning_rate": 0.00012163892492881463, + "loss": 1.8538, + "step": 7973 + }, + { + "epoch": 0.8411392405063292, + "grad_norm": 0.3738190829753876, + "learning_rate": 0.00012147021504580839, + "loss": 1.8451, + "step": 7974 + }, + { + "epoch": 0.8412447257383966, + "grad_norm": 0.3552870750427246, + "learning_rate": 0.00012130173915882475, + "loss": 1.8738, + "step": 7975 + }, + { + "epoch": 0.8413502109704641, + "grad_norm": 0.3586968779563904, + "learning_rate": 0.00012113349694331764, + "loss": 1.8955, + "step": 7976 + }, + { + "epoch": 0.8414556962025317, + "grad_norm": 0.4021929204463959, + "learning_rate": 0.0001209654880751909, + "loss": 1.825, + "step": 7977 + }, + { + "epoch": 0.8415611814345991, + "grad_norm": 0.38856229186058044, + "learning_rate": 0.00012079771223079825, + "loss": 1.8637, + "step": 7978 + }, + { + "epoch": 0.8416666666666667, + "grad_norm": 0.38104334473609924, + "learning_rate": 0.00012063016908694192, + "loss": 1.864, + "step": 7979 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.349486768245697, + "learning_rate": 0.0001204628583208727, + "loss": 1.8521, + "step": 7980 + }, + { + "epoch": 0.8418776371308017, + "grad_norm": 0.3704957365989685, + "learning_rate": 0.00012029577961028894, + "loss": 1.8627, + "step": 7981 + }, + { + "epoch": 0.8419831223628692, + "grad_norm": 0.36208829283714294, + "learning_rate": 0.00012012893263333586, + "loss": 1.8664, + "step": 7982 + }, + { + "epoch": 0.8420886075949368, + "grad_norm": 0.3722020387649536, + "learning_rate": 0.00011996231706860537, + "loss": 1.9051, + "step": 7983 + }, + { + "epoch": 0.8421940928270042, + "grad_norm": 0.37587693333625793, + "learning_rate": 0.00011979593259513487, + "loss": 1.8695, + "step": 7984 + }, + { + "epoch": 0.8422995780590717, + "grad_norm": 0.35364219546318054, + "learning_rate": 0.00011962977889240712, + "loss": 1.9077, + "step": 7985 + }, + { + "epoch": 0.8424050632911393, + "grad_norm": 0.37027788162231445, + "learning_rate": 0.00011946385564034942, + "loss": 1.8677, + "step": 7986 + }, + { + "epoch": 0.8425105485232067, + "grad_norm": 0.35375937819480896, + "learning_rate": 0.00011929816251933285, + "loss": 1.8838, + "step": 7987 + }, + { + "epoch": 0.8426160337552743, + "grad_norm": 0.3624197840690613, + "learning_rate": 0.00011913269921017203, + "loss": 1.9137, + "step": 7988 + }, + { + "epoch": 0.8427215189873418, + "grad_norm": 0.4291250705718994, + "learning_rate": 0.00011896746539412405, + "loss": 1.8645, + "step": 7989 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.36678966879844666, + "learning_rate": 0.00011880246075288824, + "loss": 1.8659, + "step": 7990 + }, + { + "epoch": 0.8429324894514768, + "grad_norm": 0.3731558620929718, + "learning_rate": 0.00011863768496860542, + "loss": 1.8655, + "step": 7991 + }, + { + "epoch": 0.8430379746835444, + "grad_norm": 0.3574913740158081, + "learning_rate": 0.00011847313772385713, + "loss": 1.8563, + "step": 7992 + }, + { + "epoch": 0.8431434599156118, + "grad_norm": 0.3670060634613037, + "learning_rate": 0.00011830881870166533, + "loss": 1.8761, + "step": 7993 + }, + { + "epoch": 0.8432489451476793, + "grad_norm": 0.39499524235725403, + "learning_rate": 0.00011814472758549143, + "loss": 1.8687, + "step": 7994 + }, + { + "epoch": 0.8433544303797469, + "grad_norm": 0.3636425733566284, + "learning_rate": 0.00011798086405923605, + "loss": 1.8702, + "step": 7995 + }, + { + "epoch": 0.8434599156118143, + "grad_norm": 0.3489941358566284, + "learning_rate": 0.0001178172278072382, + "loss": 1.8936, + "step": 7996 + }, + { + "epoch": 0.8435654008438819, + "grad_norm": 0.39699220657348633, + "learning_rate": 0.00011765381851427454, + "loss": 1.8714, + "step": 7997 + }, + { + "epoch": 0.8436708860759494, + "grad_norm": 0.35948216915130615, + "learning_rate": 0.00011749063586555921, + "loss": 1.8652, + "step": 7998 + }, + { + "epoch": 0.8437763713080169, + "grad_norm": 0.35557618737220764, + "learning_rate": 0.00011732767954674264, + "loss": 1.872, + "step": 7999 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.3551954925060272, + "learning_rate": 0.00011716494924391148, + "loss": 1.8552, + "step": 8000 + }, + { + "epoch": 0.8439873417721518, + "grad_norm": 0.36826545000076294, + "learning_rate": 0.00011700244464358777, + "loss": 1.8838, + "step": 8001 + }, + { + "epoch": 0.8440928270042194, + "grad_norm": 0.3444053530693054, + "learning_rate": 0.00011684016543272816, + "loss": 1.8975, + "step": 8002 + }, + { + "epoch": 0.8441983122362869, + "grad_norm": 0.3589162528514862, + "learning_rate": 0.00011667811129872368, + "loss": 1.8964, + "step": 8003 + }, + { + "epoch": 0.8443037974683544, + "grad_norm": 0.33943426609039307, + "learning_rate": 0.00011651628192939872, + "loss": 1.8412, + "step": 8004 + }, + { + "epoch": 0.8444092827004219, + "grad_norm": 0.34680411219596863, + "learning_rate": 0.0001163546770130109, + "loss": 1.8747, + "step": 8005 + }, + { + "epoch": 0.8445147679324895, + "grad_norm": 0.354581356048584, + "learning_rate": 0.00011619329623825008, + "loss": 1.8752, + "step": 8006 + }, + { + "epoch": 0.8446202531645569, + "grad_norm": 0.3630285859107971, + "learning_rate": 0.00011603213929423784, + "loss": 1.9198, + "step": 8007 + }, + { + "epoch": 0.8447257383966245, + "grad_norm": 0.3755515217781067, + "learning_rate": 0.0001158712058705271, + "loss": 1.8746, + "step": 8008 + }, + { + "epoch": 0.844831223628692, + "grad_norm": 0.35671427845954895, + "learning_rate": 0.00011571049565710122, + "loss": 1.8461, + "step": 8009 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.3369694948196411, + "learning_rate": 0.00011555000834437363, + "loss": 1.8627, + "step": 8010 + }, + { + "epoch": 0.845042194092827, + "grad_norm": 0.34809190034866333, + "learning_rate": 0.00011538974362318715, + "loss": 1.8748, + "step": 8011 + }, + { + "epoch": 0.8451476793248945, + "grad_norm": 0.3673906624317169, + "learning_rate": 0.00011522970118481326, + "loss": 1.8695, + "step": 8012 + }, + { + "epoch": 0.845253164556962, + "grad_norm": 0.368400514125824, + "learning_rate": 0.00011506988072095186, + "loss": 1.8674, + "step": 8013 + }, + { + "epoch": 0.8453586497890295, + "grad_norm": 0.34446826577186584, + "learning_rate": 0.00011491028192373022, + "loss": 1.8718, + "step": 8014 + }, + { + "epoch": 0.8454641350210971, + "grad_norm": 0.33677229285240173, + "learning_rate": 0.00011475090448570282, + "loss": 1.8517, + "step": 8015 + }, + { + "epoch": 0.8455696202531645, + "grad_norm": 0.34302619099617004, + "learning_rate": 0.00011459174809985047, + "loss": 1.8697, + "step": 8016 + }, + { + "epoch": 0.8456751054852321, + "grad_norm": 0.3526250123977661, + "learning_rate": 0.00011443281245957975, + "loss": 1.8276, + "step": 8017 + }, + { + "epoch": 0.8457805907172996, + "grad_norm": 0.37556737661361694, + "learning_rate": 0.00011427409725872264, + "loss": 1.896, + "step": 8018 + }, + { + "epoch": 0.845886075949367, + "grad_norm": 0.36254188418388367, + "learning_rate": 0.00011411560219153552, + "loss": 1.882, + "step": 8019 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.3562539219856262, + "learning_rate": 0.00011395732695269907, + "loss": 1.8806, + "step": 8020 + }, + { + "epoch": 0.8460970464135021, + "grad_norm": 0.3530427813529968, + "learning_rate": 0.00011379927123731737, + "loss": 1.8633, + "step": 8021 + }, + { + "epoch": 0.8462025316455696, + "grad_norm": 0.3852268159389496, + "learning_rate": 0.00011364143474091725, + "loss": 1.8503, + "step": 8022 + }, + { + "epoch": 0.8463080168776371, + "grad_norm": 0.34009042382240295, + "learning_rate": 0.00011348381715944802, + "loss": 1.8476, + "step": 8023 + }, + { + "epoch": 0.8464135021097047, + "grad_norm": 0.3484840989112854, + "learning_rate": 0.00011332641818928063, + "loss": 1.8963, + "step": 8024 + }, + { + "epoch": 0.8465189873417721, + "grad_norm": 0.35964176058769226, + "learning_rate": 0.00011316923752720708, + "loss": 1.8824, + "step": 8025 + }, + { + "epoch": 0.8466244725738397, + "grad_norm": 0.35860276222229004, + "learning_rate": 0.00011301227487044006, + "loss": 1.8603, + "step": 8026 + }, + { + "epoch": 0.8467299578059072, + "grad_norm": 0.3492109477519989, + "learning_rate": 0.00011285552991661202, + "loss": 1.8333, + "step": 8027 + }, + { + "epoch": 0.8468354430379746, + "grad_norm": 0.3483872413635254, + "learning_rate": 0.00011269900236377497, + "loss": 1.8575, + "step": 8028 + }, + { + "epoch": 0.8469409282700422, + "grad_norm": 0.3649575710296631, + "learning_rate": 0.0001125426919103997, + "loss": 1.8921, + "step": 8029 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.34838780760765076, + "learning_rate": 0.00011238659825537507, + "loss": 1.8756, + "step": 8030 + }, + { + "epoch": 0.8471518987341772, + "grad_norm": 0.3401307761669159, + "learning_rate": 0.0001122307210980077, + "loss": 1.8918, + "step": 8031 + }, + { + "epoch": 0.8472573839662447, + "grad_norm": 0.35153818130493164, + "learning_rate": 0.00011207506013802118, + "loss": 1.8512, + "step": 8032 + }, + { + "epoch": 0.8473628691983123, + "grad_norm": 0.3447367548942566, + "learning_rate": 0.00011191961507555564, + "loss": 1.8979, + "step": 8033 + }, + { + "epoch": 0.8474683544303797, + "grad_norm": 0.34489089250564575, + "learning_rate": 0.00011176438561116717, + "loss": 1.8497, + "step": 8034 + }, + { + "epoch": 0.8475738396624473, + "grad_norm": 0.35988956689834595, + "learning_rate": 0.00011160937144582693, + "loss": 1.8822, + "step": 8035 + }, + { + "epoch": 0.8476793248945148, + "grad_norm": 0.3499513566493988, + "learning_rate": 0.00011145457228092119, + "loss": 1.8854, + "step": 8036 + }, + { + "epoch": 0.8477848101265822, + "grad_norm": 0.3290562331676483, + "learning_rate": 0.00011129998781824997, + "loss": 1.8753, + "step": 8037 + }, + { + "epoch": 0.8478902953586498, + "grad_norm": 0.35162946581840515, + "learning_rate": 0.00011114561776002725, + "loss": 1.916, + "step": 8038 + }, + { + "epoch": 0.8479957805907173, + "grad_norm": 0.34884876012802124, + "learning_rate": 0.00011099146180887992, + "loss": 1.8998, + "step": 8039 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.33156028389930725, + "learning_rate": 0.00011083751966784716, + "loss": 1.8881, + "step": 8040 + }, + { + "epoch": 0.8482067510548523, + "grad_norm": 0.3562105894088745, + "learning_rate": 0.00011068379104038026, + "loss": 1.8738, + "step": 8041 + }, + { + "epoch": 0.8483122362869199, + "grad_norm": 0.36727699637413025, + "learning_rate": 0.00011053027563034162, + "loss": 1.825, + "step": 8042 + }, + { + "epoch": 0.8484177215189873, + "grad_norm": 0.3419550955295563, + "learning_rate": 0.0001103769731420045, + "loss": 1.8953, + "step": 8043 + }, + { + "epoch": 0.8485232067510549, + "grad_norm": 0.3582662045955658, + "learning_rate": 0.00011022388328005234, + "loss": 1.8626, + "step": 8044 + }, + { + "epoch": 0.8486286919831224, + "grad_norm": 0.3905150890350342, + "learning_rate": 0.00011007100574957802, + "loss": 1.863, + "step": 8045 + }, + { + "epoch": 0.8487341772151898, + "grad_norm": 0.3474056124687195, + "learning_rate": 0.00010991834025608366, + "loss": 1.8658, + "step": 8046 + }, + { + "epoch": 0.8488396624472574, + "grad_norm": 0.3630386292934418, + "learning_rate": 0.00010976588650547961, + "loss": 1.8416, + "step": 8047 + }, + { + "epoch": 0.8489451476793249, + "grad_norm": 0.3479885458946228, + "learning_rate": 0.00010961364420408429, + "loss": 1.819, + "step": 8048 + }, + { + "epoch": 0.8490506329113924, + "grad_norm": 0.364075630903244, + "learning_rate": 0.0001094616130586235, + "loss": 1.9063, + "step": 8049 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.33588168025016785, + "learning_rate": 0.00010930979277622952, + "loss": 1.8751, + "step": 8050 + }, + { + "epoch": 0.8492616033755275, + "grad_norm": 0.34287703037261963, + "learning_rate": 0.00010915818306444116, + "loss": 1.8831, + "step": 8051 + }, + { + "epoch": 0.8493670886075949, + "grad_norm": 0.34883439540863037, + "learning_rate": 0.00010900678363120256, + "loss": 1.8702, + "step": 8052 + }, + { + "epoch": 0.8494725738396625, + "grad_norm": 0.38462361693382263, + "learning_rate": 0.00010885559418486318, + "loss": 1.8966, + "step": 8053 + }, + { + "epoch": 0.84957805907173, + "grad_norm": 0.3398038148880005, + "learning_rate": 0.00010870461443417695, + "loss": 1.8532, + "step": 8054 + }, + { + "epoch": 0.8496835443037974, + "grad_norm": 0.34857457876205444, + "learning_rate": 0.00010855384408830159, + "loss": 1.873, + "step": 8055 + }, + { + "epoch": 0.849789029535865, + "grad_norm": 0.38537701964378357, + "learning_rate": 0.0001084032828567984, + "loss": 1.8465, + "step": 8056 + }, + { + "epoch": 0.8498945147679325, + "grad_norm": 0.3549545705318451, + "learning_rate": 0.00010825293044963134, + "loss": 1.8865, + "step": 8057 + }, + { + "epoch": 0.85, + "grad_norm": 0.3653614819049835, + "learning_rate": 0.00010810278657716679, + "loss": 1.8898, + "step": 8058 + }, + { + "epoch": 0.8501054852320675, + "grad_norm": 0.3474223017692566, + "learning_rate": 0.00010795285095017282, + "loss": 1.8264, + "step": 8059 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.3641330301761627, + "learning_rate": 0.00010780312327981853, + "loss": 1.867, + "step": 8060 + }, + { + "epoch": 0.8503164556962025, + "grad_norm": 0.3568892478942871, + "learning_rate": 0.00010765360327767384, + "loss": 1.8607, + "step": 8061 + }, + { + "epoch": 0.8504219409282701, + "grad_norm": 0.35188058018684387, + "learning_rate": 0.00010750429065570842, + "loss": 1.8861, + "step": 8062 + }, + { + "epoch": 0.8505274261603376, + "grad_norm": 0.3653988242149353, + "learning_rate": 0.0001073551851262917, + "loss": 1.8587, + "step": 8063 + }, + { + "epoch": 0.850632911392405, + "grad_norm": 0.349581778049469, + "learning_rate": 0.000107206286402192, + "loss": 1.8846, + "step": 8064 + }, + { + "epoch": 0.8507383966244726, + "grad_norm": 0.343741774559021, + "learning_rate": 0.00010705759419657583, + "loss": 1.9196, + "step": 8065 + }, + { + "epoch": 0.85084388185654, + "grad_norm": 0.34654897451400757, + "learning_rate": 0.00010690910822300775, + "loss": 1.8542, + "step": 8066 + }, + { + "epoch": 0.8509493670886076, + "grad_norm": 0.33765271306037903, + "learning_rate": 0.00010676082819544952, + "loss": 1.8735, + "step": 8067 + }, + { + "epoch": 0.8510548523206751, + "grad_norm": 0.3487968444824219, + "learning_rate": 0.00010661275382825955, + "loss": 1.8791, + "step": 8068 + }, + { + "epoch": 0.8511603375527426, + "grad_norm": 0.35667726397514343, + "learning_rate": 0.00010646488483619263, + "loss": 1.8861, + "step": 8069 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.3717903792858124, + "learning_rate": 0.0001063172209343989, + "loss": 1.869, + "step": 8070 + }, + { + "epoch": 0.8513713080168777, + "grad_norm": 0.3534775972366333, + "learning_rate": 0.00010616976183842376, + "loss": 1.8428, + "step": 8071 + }, + { + "epoch": 0.8514767932489451, + "grad_norm": 0.3508715331554413, + "learning_rate": 0.00010602250726420722, + "loss": 1.8811, + "step": 8072 + }, + { + "epoch": 0.8515822784810126, + "grad_norm": 0.3327198922634125, + "learning_rate": 0.00010587545692808299, + "loss": 1.8795, + "step": 8073 + }, + { + "epoch": 0.8516877637130802, + "grad_norm": 0.3691020607948303, + "learning_rate": 0.00010572861054677855, + "loss": 1.881, + "step": 8074 + }, + { + "epoch": 0.8517932489451476, + "grad_norm": 0.3520808815956116, + "learning_rate": 0.00010558196783741396, + "loss": 1.907, + "step": 8075 + }, + { + "epoch": 0.8518987341772152, + "grad_norm": 0.34514424204826355, + "learning_rate": 0.00010543552851750185, + "loss": 1.9124, + "step": 8076 + }, + { + "epoch": 0.8520042194092827, + "grad_norm": 0.35514283180236816, + "learning_rate": 0.00010528929230494662, + "loss": 1.8879, + "step": 8077 + }, + { + "epoch": 0.8521097046413502, + "grad_norm": 0.3599276542663574, + "learning_rate": 0.00010514325891804378, + "loss": 1.8422, + "step": 8078 + }, + { + "epoch": 0.8522151898734177, + "grad_norm": 0.35263752937316895, + "learning_rate": 0.00010499742807547978, + "loss": 1.8779, + "step": 8079 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.3707065284252167, + "learning_rate": 0.000104851799496331, + "loss": 1.9079, + "step": 8080 + }, + { + "epoch": 0.8524261603375527, + "grad_norm": 0.3417995870113373, + "learning_rate": 0.00010470637290006365, + "loss": 1.84, + "step": 8081 + }, + { + "epoch": 0.8525316455696202, + "grad_norm": 0.33554232120513916, + "learning_rate": 0.000104561148006533, + "loss": 1.8204, + "step": 8082 + }, + { + "epoch": 0.8526371308016878, + "grad_norm": 0.35931462049484253, + "learning_rate": 0.00010441612453598276, + "loss": 1.8612, + "step": 8083 + }, + { + "epoch": 0.8527426160337552, + "grad_norm": 0.36801332235336304, + "learning_rate": 0.0001042713022090448, + "loss": 1.8803, + "step": 8084 + }, + { + "epoch": 0.8528481012658228, + "grad_norm": 0.35659894347190857, + "learning_rate": 0.00010412668074673832, + "loss": 1.8831, + "step": 8085 + }, + { + "epoch": 0.8529535864978903, + "grad_norm": 0.35998913645744324, + "learning_rate": 0.00010398225987046957, + "loss": 1.8776, + "step": 8086 + }, + { + "epoch": 0.8530590717299578, + "grad_norm": 0.3353712856769562, + "learning_rate": 0.00010383803930203124, + "loss": 1.8686, + "step": 8087 + }, + { + "epoch": 0.8531645569620253, + "grad_norm": 0.345137357711792, + "learning_rate": 0.00010369401876360168, + "loss": 1.8731, + "step": 8088 + }, + { + "epoch": 0.8532700421940929, + "grad_norm": 0.36613667011260986, + "learning_rate": 0.0001035501979777448, + "loss": 1.9134, + "step": 8089 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.36462265253067017, + "learning_rate": 0.00010340657666740917, + "loss": 1.9131, + "step": 8090 + }, + { + "epoch": 0.8534810126582278, + "grad_norm": 0.33476540446281433, + "learning_rate": 0.00010326315455592764, + "loss": 1.8758, + "step": 8091 + }, + { + "epoch": 0.8535864978902954, + "grad_norm": 0.3425934612751007, + "learning_rate": 0.00010311993136701693, + "loss": 1.8632, + "step": 8092 + }, + { + "epoch": 0.8536919831223628, + "grad_norm": 0.3598099946975708, + "learning_rate": 0.00010297690682477669, + "loss": 1.8604, + "step": 8093 + }, + { + "epoch": 0.8537974683544304, + "grad_norm": 0.3559568524360657, + "learning_rate": 0.00010283408065368951, + "loss": 1.8627, + "step": 8094 + }, + { + "epoch": 0.8539029535864979, + "grad_norm": 0.34848713874816895, + "learning_rate": 0.00010269145257861988, + "loss": 1.8206, + "step": 8095 + }, + { + "epoch": 0.8540084388185654, + "grad_norm": 0.37242984771728516, + "learning_rate": 0.00010254902232481406, + "loss": 1.8538, + "step": 8096 + }, + { + "epoch": 0.8541139240506329, + "grad_norm": 0.3749469816684723, + "learning_rate": 0.0001024067896178994, + "loss": 1.8864, + "step": 8097 + }, + { + "epoch": 0.8542194092827005, + "grad_norm": 0.3685019910335541, + "learning_rate": 0.0001022647541838836, + "loss": 1.8482, + "step": 8098 + }, + { + "epoch": 0.8543248945147679, + "grad_norm": 0.34180039167404175, + "learning_rate": 0.00010212291574915464, + "loss": 1.8684, + "step": 8099 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.35213732719421387, + "learning_rate": 0.00010198127404047976, + "loss": 1.8877, + "step": 8100 + }, + { + "epoch": 0.854535864978903, + "grad_norm": 0.3790186941623688, + "learning_rate": 0.0001018398287850053, + "loss": 1.8944, + "step": 8101 + }, + { + "epoch": 0.8546413502109704, + "grad_norm": 0.3615725040435791, + "learning_rate": 0.00010169857971025608, + "loss": 1.9089, + "step": 8102 + }, + { + "epoch": 0.854746835443038, + "grad_norm": 0.34791702032089233, + "learning_rate": 0.00010155752654413465, + "loss": 1.8588, + "step": 8103 + }, + { + "epoch": 0.8548523206751055, + "grad_norm": 0.34374794363975525, + "learning_rate": 0.00010141666901492119, + "loss": 1.9033, + "step": 8104 + }, + { + "epoch": 0.854957805907173, + "grad_norm": 0.38206392526626587, + "learning_rate": 0.00010127600685127249, + "loss": 1.8755, + "step": 8105 + }, + { + "epoch": 0.8550632911392405, + "grad_norm": 0.36124926805496216, + "learning_rate": 0.0001011355397822219, + "loss": 1.8658, + "step": 8106 + }, + { + "epoch": 0.8551687763713081, + "grad_norm": 0.35344555974006653, + "learning_rate": 0.00010099526753717856, + "loss": 1.8569, + "step": 8107 + }, + { + "epoch": 0.8552742616033755, + "grad_norm": 0.3516816794872284, + "learning_rate": 0.00010085518984592678, + "loss": 1.8325, + "step": 8108 + }, + { + "epoch": 0.855379746835443, + "grad_norm": 0.3482087254524231, + "learning_rate": 0.00010071530643862575, + "loss": 1.877, + "step": 8109 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.3668799102306366, + "learning_rate": 0.00010057561704580898, + "loss": 1.8968, + "step": 8110 + }, + { + "epoch": 0.855590717299578, + "grad_norm": 0.3513518273830414, + "learning_rate": 0.00010043612139838357, + "loss": 1.8737, + "step": 8111 + }, + { + "epoch": 0.8556962025316456, + "grad_norm": 0.3852561116218567, + "learning_rate": 0.00010029681922763002, + "loss": 1.8947, + "step": 8112 + }, + { + "epoch": 0.8558016877637131, + "grad_norm": 0.37247905135154724, + "learning_rate": 0.0001001577102652013, + "loss": 1.8328, + "step": 8113 + }, + { + "epoch": 0.8559071729957806, + "grad_norm": 0.3603208661079407, + "learning_rate": 0.00010001879424312283, + "loss": 1.8961, + "step": 8114 + }, + { + "epoch": 0.8560126582278481, + "grad_norm": 0.3522854745388031, + "learning_rate": 9.988007089379161e-05, + "loss": 1.8778, + "step": 8115 + }, + { + "epoch": 0.8561181434599157, + "grad_norm": 0.3483900725841522, + "learning_rate": 9.974153994997569e-05, + "loss": 1.8884, + "step": 8116 + }, + { + "epoch": 0.8562236286919831, + "grad_norm": 0.368760883808136, + "learning_rate": 9.960320114481391e-05, + "loss": 1.9019, + "step": 8117 + }, + { + "epoch": 0.8563291139240506, + "grad_norm": 0.36969903111457825, + "learning_rate": 9.946505421181513e-05, + "loss": 1.844, + "step": 8118 + }, + { + "epoch": 0.8564345991561182, + "grad_norm": 0.352465957403183, + "learning_rate": 9.932709888485788e-05, + "loss": 1.8432, + "step": 8119 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.3816291093826294, + "learning_rate": 9.918933489818986e-05, + "loss": 1.8347, + "step": 8120 + }, + { + "epoch": 0.8566455696202532, + "grad_norm": 0.3474489748477936, + "learning_rate": 9.905176198642719e-05, + "loss": 1.8482, + "step": 8121 + }, + { + "epoch": 0.8567510548523207, + "grad_norm": 0.36846408247947693, + "learning_rate": 9.891437988455427e-05, + "loss": 1.8584, + "step": 8122 + }, + { + "epoch": 0.8568565400843882, + "grad_norm": 0.34499886631965637, + "learning_rate": 9.877718832792286e-05, + "loss": 1.8542, + "step": 8123 + }, + { + "epoch": 0.8569620253164557, + "grad_norm": 0.34545376896858215, + "learning_rate": 9.864018705225193e-05, + "loss": 1.8687, + "step": 8124 + }, + { + "epoch": 0.8570675105485233, + "grad_norm": 0.3509027063846588, + "learning_rate": 9.850337579362703e-05, + "loss": 1.8867, + "step": 8125 + }, + { + "epoch": 0.8571729957805907, + "grad_norm": 0.333566814661026, + "learning_rate": 9.836675428849958e-05, + "loss": 1.8795, + "step": 8126 + }, + { + "epoch": 0.8572784810126582, + "grad_norm": 0.33969762921333313, + "learning_rate": 9.823032227368671e-05, + "loss": 1.8843, + "step": 8127 + }, + { + "epoch": 0.8573839662447258, + "grad_norm": 0.34840378165245056, + "learning_rate": 9.809407948637043e-05, + "loss": 1.8731, + "step": 8128 + }, + { + "epoch": 0.8574894514767932, + "grad_norm": 0.3556610941886902, + "learning_rate": 9.79580256640974e-05, + "loss": 1.9006, + "step": 8129 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.3734126389026642, + "learning_rate": 9.782216054477828e-05, + "loss": 1.8558, + "step": 8130 + }, + { + "epoch": 0.8577004219409282, + "grad_norm": 0.34472590684890747, + "learning_rate": 9.76864838666871e-05, + "loss": 1.85, + "step": 8131 + }, + { + "epoch": 0.8578059071729958, + "grad_norm": 0.3478164076805115, + "learning_rate": 9.755099536846107e-05, + "loss": 1.8792, + "step": 8132 + }, + { + "epoch": 0.8579113924050633, + "grad_norm": 0.3664385676383972, + "learning_rate": 9.741569478909979e-05, + "loss": 1.8663, + "step": 8133 + }, + { + "epoch": 0.8580168776371307, + "grad_norm": 0.34184131026268005, + "learning_rate": 9.728058186796492e-05, + "loss": 1.8595, + "step": 8134 + }, + { + "epoch": 0.8581223628691983, + "grad_norm": 0.35168665647506714, + "learning_rate": 9.714565634477964e-05, + "loss": 1.8641, + "step": 8135 + }, + { + "epoch": 0.8582278481012658, + "grad_norm": 0.3542643189430237, + "learning_rate": 9.7010917959628e-05, + "loss": 1.8808, + "step": 8136 + }, + { + "epoch": 0.8583333333333333, + "grad_norm": 0.3496454060077667, + "learning_rate": 9.687636645295472e-05, + "loss": 1.8847, + "step": 8137 + }, + { + "epoch": 0.8584388185654008, + "grad_norm": 0.3519970774650574, + "learning_rate": 9.674200156556436e-05, + "loss": 1.8617, + "step": 8138 + }, + { + "epoch": 0.8585443037974684, + "grad_norm": 0.3646875321865082, + "learning_rate": 9.660782303862107e-05, + "loss": 1.861, + "step": 8139 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.3524155020713806, + "learning_rate": 9.647383061364803e-05, + "loss": 1.8671, + "step": 8140 + }, + { + "epoch": 0.8587552742616034, + "grad_norm": 0.33220136165618896, + "learning_rate": 9.634002403252676e-05, + "loss": 1.8372, + "step": 8141 + }, + { + "epoch": 0.8588607594936709, + "grad_norm": 0.33947640657424927, + "learning_rate": 9.6206403037497e-05, + "loss": 1.8653, + "step": 8142 + }, + { + "epoch": 0.8589662447257383, + "grad_norm": 0.35294198989868164, + "learning_rate": 9.60729673711558e-05, + "loss": 1.8748, + "step": 8143 + }, + { + "epoch": 0.8590717299578059, + "grad_norm": 0.36597323417663574, + "learning_rate": 9.593971677645732e-05, + "loss": 1.8547, + "step": 8144 + }, + { + "epoch": 0.8591772151898734, + "grad_norm": 0.361380934715271, + "learning_rate": 9.58066509967123e-05, + "loss": 1.8463, + "step": 8145 + }, + { + "epoch": 0.8592827004219409, + "grad_norm": 0.36234939098358154, + "learning_rate": 9.56737697755873e-05, + "loss": 1.909, + "step": 8146 + }, + { + "epoch": 0.8593881856540084, + "grad_norm": 0.34319740533828735, + "learning_rate": 9.554107285710464e-05, + "loss": 1.8747, + "step": 8147 + }, + { + "epoch": 0.859493670886076, + "grad_norm": 0.36284321546554565, + "learning_rate": 9.540855998564147e-05, + "loss": 1.8552, + "step": 8148 + }, + { + "epoch": 0.8595991561181434, + "grad_norm": 0.3526425361633301, + "learning_rate": 9.527623090592962e-05, + "loss": 1.8493, + "step": 8149 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.3682006001472473, + "learning_rate": 9.514408536305497e-05, + "loss": 1.8904, + "step": 8150 + }, + { + "epoch": 0.8598101265822785, + "grad_norm": 0.36461135745048523, + "learning_rate": 9.501212310245681e-05, + "loss": 1.8726, + "step": 8151 + }, + { + "epoch": 0.859915611814346, + "grad_norm": 0.3577125072479248, + "learning_rate": 9.488034386992769e-05, + "loss": 1.8594, + "step": 8152 + }, + { + "epoch": 0.8600210970464135, + "grad_norm": 0.3719693422317505, + "learning_rate": 9.474874741161267e-05, + "loss": 1.8958, + "step": 8153 + }, + { + "epoch": 0.860126582278481, + "grad_norm": 0.357893705368042, + "learning_rate": 9.461733347400877e-05, + "loss": 1.8871, + "step": 8154 + }, + { + "epoch": 0.8602320675105485, + "grad_norm": 0.34748411178588867, + "learning_rate": 9.448610180396487e-05, + "loss": 1.8826, + "step": 8155 + }, + { + "epoch": 0.860337552742616, + "grad_norm": 0.3943416476249695, + "learning_rate": 9.435505214868068e-05, + "loss": 1.8914, + "step": 8156 + }, + { + "epoch": 0.8604430379746836, + "grad_norm": 0.3442302644252777, + "learning_rate": 9.422418425570673e-05, + "loss": 1.879, + "step": 8157 + }, + { + "epoch": 0.860548523206751, + "grad_norm": 0.37039732933044434, + "learning_rate": 9.409349787294371e-05, + "loss": 1.8862, + "step": 8158 + }, + { + "epoch": 0.8606540084388186, + "grad_norm": 0.33635109663009644, + "learning_rate": 9.396299274864176e-05, + "loss": 1.8601, + "step": 8159 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.32605084776878357, + "learning_rate": 9.383266863140043e-05, + "loss": 1.8735, + "step": 8160 + }, + { + "epoch": 0.8608649789029535, + "grad_norm": 0.3735434114933014, + "learning_rate": 9.370252527016777e-05, + "loss": 1.8423, + "step": 8161 + }, + { + "epoch": 0.8609704641350211, + "grad_norm": 0.4102347493171692, + "learning_rate": 9.357256241424012e-05, + "loss": 1.8828, + "step": 8162 + }, + { + "epoch": 0.8610759493670886, + "grad_norm": 0.363688588142395, + "learning_rate": 9.34427798132616e-05, + "loss": 1.8791, + "step": 8163 + }, + { + "epoch": 0.8611814345991561, + "grad_norm": 0.3553178906440735, + "learning_rate": 9.331317721722338e-05, + "loss": 1.8385, + "step": 8164 + }, + { + "epoch": 0.8612869198312236, + "grad_norm": 0.34431323409080505, + "learning_rate": 9.318375437646363e-05, + "loss": 1.8725, + "step": 8165 + }, + { + "epoch": 0.8613924050632912, + "grad_norm": 0.3514340817928314, + "learning_rate": 9.305451104166652e-05, + "loss": 1.8923, + "step": 8166 + }, + { + "epoch": 0.8614978902953586, + "grad_norm": 0.3470463454723358, + "learning_rate": 9.292544696386227e-05, + "loss": 1.891, + "step": 8167 + }, + { + "epoch": 0.8616033755274262, + "grad_norm": 0.3353218138217926, + "learning_rate": 9.279656189442629e-05, + "loss": 1.8728, + "step": 8168 + }, + { + "epoch": 0.8617088607594937, + "grad_norm": 0.34396201372146606, + "learning_rate": 9.266785558507876e-05, + "loss": 1.8535, + "step": 8169 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.3611760437488556, + "learning_rate": 9.25393277878844e-05, + "loss": 1.8369, + "step": 8170 + }, + { + "epoch": 0.8619198312236287, + "grad_norm": 0.3489040434360504, + "learning_rate": 9.241097825525163e-05, + "loss": 1.8696, + "step": 8171 + }, + { + "epoch": 0.8620253164556962, + "grad_norm": 0.3572551906108856, + "learning_rate": 9.228280673993234e-05, + "loss": 1.8556, + "step": 8172 + }, + { + "epoch": 0.8621308016877637, + "grad_norm": 0.34435760974884033, + "learning_rate": 9.215481299502145e-05, + "loss": 1.9004, + "step": 8173 + }, + { + "epoch": 0.8622362869198312, + "grad_norm": 0.36705026030540466, + "learning_rate": 9.202699677395613e-05, + "loss": 1.8794, + "step": 8174 + }, + { + "epoch": 0.8623417721518988, + "grad_norm": 0.3689478039741516, + "learning_rate": 9.189935783051572e-05, + "loss": 1.8407, + "step": 8175 + }, + { + "epoch": 0.8624472573839662, + "grad_norm": 0.3641527593135834, + "learning_rate": 9.177189591882086e-05, + "loss": 1.8887, + "step": 8176 + }, + { + "epoch": 0.8625527426160338, + "grad_norm": 0.33640262484550476, + "learning_rate": 9.164461079333342e-05, + "loss": 1.8959, + "step": 8177 + }, + { + "epoch": 0.8626582278481013, + "grad_norm": 0.36184659600257874, + "learning_rate": 9.151750220885574e-05, + "loss": 1.851, + "step": 8178 + }, + { + "epoch": 0.8627637130801687, + "grad_norm": 0.3785282373428345, + "learning_rate": 9.139056992053017e-05, + "loss": 1.8646, + "step": 8179 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.34389597177505493, + "learning_rate": 9.126381368383881e-05, + "loss": 1.9001, + "step": 8180 + }, + { + "epoch": 0.8629746835443038, + "grad_norm": 0.33825522661209106, + "learning_rate": 9.113723325460276e-05, + "loss": 1.8512, + "step": 8181 + }, + { + "epoch": 0.8630801687763713, + "grad_norm": 0.34924671053886414, + "learning_rate": 9.101082838898188e-05, + "loss": 1.8828, + "step": 8182 + }, + { + "epoch": 0.8631856540084388, + "grad_norm": 0.34387052059173584, + "learning_rate": 9.088459884347427e-05, + "loss": 1.8728, + "step": 8183 + }, + { + "epoch": 0.8632911392405064, + "grad_norm": 0.3379242420196533, + "learning_rate": 9.07585443749156e-05, + "loss": 1.8839, + "step": 8184 + }, + { + "epoch": 0.8633966244725738, + "grad_norm": 0.35094019770622253, + "learning_rate": 9.063266474047899e-05, + "loss": 1.8716, + "step": 8185 + }, + { + "epoch": 0.8635021097046414, + "grad_norm": 0.3474300503730774, + "learning_rate": 9.050695969767418e-05, + "loss": 1.8676, + "step": 8186 + }, + { + "epoch": 0.8636075949367089, + "grad_norm": 0.37489309906959534, + "learning_rate": 9.038142900434736e-05, + "loss": 1.8673, + "step": 8187 + }, + { + "epoch": 0.8637130801687763, + "grad_norm": 0.32924535870552063, + "learning_rate": 9.02560724186806e-05, + "loss": 1.8636, + "step": 8188 + }, + { + "epoch": 0.8638185654008439, + "grad_norm": 0.3665507733821869, + "learning_rate": 9.01308896991912e-05, + "loss": 1.8754, + "step": 8189 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.3587382137775421, + "learning_rate": 9.000588060473158e-05, + "loss": 1.8571, + "step": 8190 + }, + { + "epoch": 0.8640295358649789, + "grad_norm": 0.3443779945373535, + "learning_rate": 8.988104489448849e-05, + "loss": 1.875, + "step": 8191 + }, + { + "epoch": 0.8641350210970464, + "grad_norm": 0.3415895104408264, + "learning_rate": 8.975638232798275e-05, + "loss": 1.87, + "step": 8192 + }, + { + "epoch": 0.864240506329114, + "grad_norm": 0.36854133009910583, + "learning_rate": 8.963189266506874e-05, + "loss": 1.8694, + "step": 8193 + }, + { + "epoch": 0.8643459915611814, + "grad_norm": 0.3579379618167877, + "learning_rate": 8.950757566593381e-05, + "loss": 1.8951, + "step": 8194 + }, + { + "epoch": 0.864451476793249, + "grad_norm": 0.36971795558929443, + "learning_rate": 8.938343109109802e-05, + "loss": 1.8403, + "step": 8195 + }, + { + "epoch": 0.8645569620253165, + "grad_norm": 0.34169673919677734, + "learning_rate": 8.925945870141361e-05, + "loss": 1.9224, + "step": 8196 + }, + { + "epoch": 0.864662447257384, + "grad_norm": 0.3378649950027466, + "learning_rate": 8.913565825806436e-05, + "loss": 1.8854, + "step": 8197 + }, + { + "epoch": 0.8647679324894515, + "grad_norm": 0.3729082942008972, + "learning_rate": 8.901202952256546e-05, + "loss": 1.881, + "step": 8198 + }, + { + "epoch": 0.8648734177215189, + "grad_norm": 0.36061546206474304, + "learning_rate": 8.88885722567627e-05, + "loss": 1.9036, + "step": 8199 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.35621970891952515, + "learning_rate": 8.876528622283232e-05, + "loss": 1.8494, + "step": 8200 + }, + { + "epoch": 0.865084388185654, + "grad_norm": 0.34787195920944214, + "learning_rate": 8.864217118328042e-05, + "loss": 1.8756, + "step": 8201 + }, + { + "epoch": 0.8651898734177215, + "grad_norm": 0.34799128770828247, + "learning_rate": 8.851922690094234e-05, + "loss": 1.909, + "step": 8202 + }, + { + "epoch": 0.865295358649789, + "grad_norm": 0.3387766480445862, + "learning_rate": 8.839645313898257e-05, + "loss": 1.848, + "step": 8203 + }, + { + "epoch": 0.8654008438818566, + "grad_norm": 0.33162811398506165, + "learning_rate": 8.827384966089387e-05, + "loss": 1.8601, + "step": 8204 + }, + { + "epoch": 0.865506329113924, + "grad_norm": 0.3383134603500366, + "learning_rate": 8.81514162304972e-05, + "loss": 1.872, + "step": 8205 + }, + { + "epoch": 0.8656118143459915, + "grad_norm": 0.3579888939857483, + "learning_rate": 8.802915261194108e-05, + "loss": 1.8623, + "step": 8206 + }, + { + "epoch": 0.8657172995780591, + "grad_norm": 0.37141481041908264, + "learning_rate": 8.7907058569701e-05, + "loss": 1.8834, + "step": 8207 + }, + { + "epoch": 0.8658227848101265, + "grad_norm": 0.3457212746143341, + "learning_rate": 8.778513386857931e-05, + "loss": 1.886, + "step": 8208 + }, + { + "epoch": 0.8659282700421941, + "grad_norm": 0.35277149081230164, + "learning_rate": 8.766337827370438e-05, + "loss": 1.8527, + "step": 8209 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.3618517518043518, + "learning_rate": 8.754179155053052e-05, + "loss": 1.8269, + "step": 8210 + }, + { + "epoch": 0.8661392405063291, + "grad_norm": 0.33169710636138916, + "learning_rate": 8.742037346483729e-05, + "loss": 1.8568, + "step": 8211 + }, + { + "epoch": 0.8662447257383966, + "grad_norm": 0.3316635489463806, + "learning_rate": 8.7299123782729e-05, + "loss": 1.8632, + "step": 8212 + }, + { + "epoch": 0.8663502109704642, + "grad_norm": 0.37564757466316223, + "learning_rate": 8.717804227063455e-05, + "loss": 1.8846, + "step": 8213 + }, + { + "epoch": 0.8664556962025316, + "grad_norm": 0.3404627740383148, + "learning_rate": 8.705712869530661e-05, + "loss": 1.8681, + "step": 8214 + }, + { + "epoch": 0.8665611814345991, + "grad_norm": 0.3520433306694031, + "learning_rate": 8.69363828238215e-05, + "loss": 1.8932, + "step": 8215 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 0.3657194972038269, + "learning_rate": 8.681580442357857e-05, + "loss": 1.8604, + "step": 8216 + }, + { + "epoch": 0.8667721518987341, + "grad_norm": 0.3663824200630188, + "learning_rate": 8.66953932622997e-05, + "loss": 1.89, + "step": 8217 + }, + { + "epoch": 0.8668776371308017, + "grad_norm": 0.3632323443889618, + "learning_rate": 8.657514910802906e-05, + "loss": 1.8468, + "step": 8218 + }, + { + "epoch": 0.8669831223628692, + "grad_norm": 0.3299463093280792, + "learning_rate": 8.645507172913238e-05, + "loss": 1.881, + "step": 8219 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.3410460948944092, + "learning_rate": 8.63351608942968e-05, + "loss": 1.8949, + "step": 8220 + }, + { + "epoch": 0.8671940928270042, + "grad_norm": 0.33051586151123047, + "learning_rate": 8.62154163725303e-05, + "loss": 1.8519, + "step": 8221 + }, + { + "epoch": 0.8672995780590718, + "grad_norm": 0.3679036498069763, + "learning_rate": 8.609583793316103e-05, + "loss": 1.87, + "step": 8222 + }, + { + "epoch": 0.8674050632911392, + "grad_norm": 0.3499612510204315, + "learning_rate": 8.597642534583735e-05, + "loss": 1.872, + "step": 8223 + }, + { + "epoch": 0.8675105485232067, + "grad_norm": 0.3423275947570801, + "learning_rate": 8.585717838052689e-05, + "loss": 1.8716, + "step": 8224 + }, + { + "epoch": 0.8676160337552743, + "grad_norm": 0.35300007462501526, + "learning_rate": 8.573809680751644e-05, + "loss": 1.9119, + "step": 8225 + }, + { + "epoch": 0.8677215189873417, + "grad_norm": 0.34130263328552246, + "learning_rate": 8.561918039741144e-05, + "loss": 1.8764, + "step": 8226 + }, + { + "epoch": 0.8678270042194093, + "grad_norm": 0.35482853651046753, + "learning_rate": 8.550042892113534e-05, + "loss": 1.8447, + "step": 8227 + }, + { + "epoch": 0.8679324894514768, + "grad_norm": 0.34575462341308594, + "learning_rate": 8.538184214992946e-05, + "loss": 1.8761, + "step": 8228 + }, + { + "epoch": 0.8680379746835443, + "grad_norm": 0.3657102584838867, + "learning_rate": 8.52634198553523e-05, + "loss": 1.8616, + "step": 8229 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.3606880009174347, + "learning_rate": 8.514516180927926e-05, + "loss": 1.8912, + "step": 8230 + }, + { + "epoch": 0.8682489451476794, + "grad_norm": 0.35621559619903564, + "learning_rate": 8.502706778390219e-05, + "loss": 1.8791, + "step": 8231 + }, + { + "epoch": 0.8683544303797468, + "grad_norm": 0.37077847123146057, + "learning_rate": 8.490913755172874e-05, + "loss": 1.858, + "step": 8232 + }, + { + "epoch": 0.8684599156118143, + "grad_norm": 0.3789447546005249, + "learning_rate": 8.479137088558228e-05, + "loss": 1.877, + "step": 8233 + }, + { + "epoch": 0.8685654008438819, + "grad_norm": 0.37842002511024475, + "learning_rate": 8.467376755860109e-05, + "loss": 1.8901, + "step": 8234 + }, + { + "epoch": 0.8686708860759493, + "grad_norm": 0.36533665657043457, + "learning_rate": 8.455632734423823e-05, + "loss": 1.8613, + "step": 8235 + }, + { + "epoch": 0.8687763713080169, + "grad_norm": 0.3437618315219879, + "learning_rate": 8.443905001626099e-05, + "loss": 1.8696, + "step": 8236 + }, + { + "epoch": 0.8688818565400844, + "grad_norm": 0.3363083600997925, + "learning_rate": 8.432193534875027e-05, + "loss": 1.8849, + "step": 8237 + }, + { + "epoch": 0.8689873417721519, + "grad_norm": 0.37087351083755493, + "learning_rate": 8.420498311610047e-05, + "loss": 1.8987, + "step": 8238 + }, + { + "epoch": 0.8690928270042194, + "grad_norm": 0.37040218710899353, + "learning_rate": 8.408819309301891e-05, + "loss": 1.8359, + "step": 8239 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.33824750781059265, + "learning_rate": 8.397156505452524e-05, + "loss": 1.8654, + "step": 8240 + }, + { + "epoch": 0.8693037974683544, + "grad_norm": 0.36621150374412537, + "learning_rate": 8.38550987759513e-05, + "loss": 1.8867, + "step": 8241 + }, + { + "epoch": 0.869409282700422, + "grad_norm": 0.338739812374115, + "learning_rate": 8.373879403294042e-05, + "loss": 1.8559, + "step": 8242 + }, + { + "epoch": 0.8695147679324895, + "grad_norm": 0.3708372712135315, + "learning_rate": 8.36226506014472e-05, + "loss": 1.9075, + "step": 8243 + }, + { + "epoch": 0.8696202531645569, + "grad_norm": 0.3461562395095825, + "learning_rate": 8.350666825773698e-05, + "loss": 1.8675, + "step": 8244 + }, + { + "epoch": 0.8697257383966245, + "grad_norm": 0.3579878509044647, + "learning_rate": 8.339084677838533e-05, + "loss": 1.8528, + "step": 8245 + }, + { + "epoch": 0.869831223628692, + "grad_norm": 0.35042932629585266, + "learning_rate": 8.327518594027779e-05, + "loss": 1.8933, + "step": 8246 + }, + { + "epoch": 0.8699367088607595, + "grad_norm": 0.3535434901714325, + "learning_rate": 8.315968552060927e-05, + "loss": 1.8855, + "step": 8247 + }, + { + "epoch": 0.870042194092827, + "grad_norm": 0.35271531343460083, + "learning_rate": 8.304434529688379e-05, + "loss": 1.8939, + "step": 8248 + }, + { + "epoch": 0.8701476793248946, + "grad_norm": 0.35916051268577576, + "learning_rate": 8.292916504691398e-05, + "loss": 1.8742, + "step": 8249 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.3450419306755066, + "learning_rate": 8.28141445488205e-05, + "loss": 1.8991, + "step": 8250 + }, + { + "epoch": 0.8703586497890295, + "grad_norm": 0.374846488237381, + "learning_rate": 8.269928358103191e-05, + "loss": 1.9009, + "step": 8251 + }, + { + "epoch": 0.8704641350210971, + "grad_norm": 0.34916239976882935, + "learning_rate": 8.258458192228395e-05, + "loss": 1.8658, + "step": 8252 + }, + { + "epoch": 0.8705696202531645, + "grad_norm": 0.3321937024593353, + "learning_rate": 8.247003935161934e-05, + "loss": 1.8711, + "step": 8253 + }, + { + "epoch": 0.8706751054852321, + "grad_norm": 0.34533780813217163, + "learning_rate": 8.235565564838727e-05, + "loss": 1.8549, + "step": 8254 + }, + { + "epoch": 0.8707805907172996, + "grad_norm": 0.34345221519470215, + "learning_rate": 8.224143059224287e-05, + "loss": 1.9107, + "step": 8255 + }, + { + "epoch": 0.8708860759493671, + "grad_norm": 0.3460540175437927, + "learning_rate": 8.2127363963147e-05, + "loss": 1.8739, + "step": 8256 + }, + { + "epoch": 0.8709915611814346, + "grad_norm": 0.3562089502811432, + "learning_rate": 8.201345554136556e-05, + "loss": 1.875, + "step": 8257 + }, + { + "epoch": 0.8710970464135022, + "grad_norm": 0.34499356150627136, + "learning_rate": 8.189970510746936e-05, + "loss": 1.8554, + "step": 8258 + }, + { + "epoch": 0.8712025316455696, + "grad_norm": 0.33976489305496216, + "learning_rate": 8.178611244233354e-05, + "loss": 1.8718, + "step": 8259 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.34552842378616333, + "learning_rate": 8.167267732713705e-05, + "loss": 1.8713, + "step": 8260 + }, + { + "epoch": 0.8714135021097047, + "grad_norm": 0.35453706979751587, + "learning_rate": 8.155939954336243e-05, + "loss": 1.8689, + "step": 8261 + }, + { + "epoch": 0.8715189873417721, + "grad_norm": 0.33668839931488037, + "learning_rate": 8.144627887279526e-05, + "loss": 1.8648, + "step": 8262 + }, + { + "epoch": 0.8716244725738397, + "grad_norm": 0.35321399569511414, + "learning_rate": 8.13333150975238e-05, + "loss": 1.8736, + "step": 8263 + }, + { + "epoch": 0.8717299578059071, + "grad_norm": 0.3391433656215668, + "learning_rate": 8.122050799993858e-05, + "loss": 1.8525, + "step": 8264 + }, + { + "epoch": 0.8718354430379747, + "grad_norm": 0.36559030413627625, + "learning_rate": 8.110785736273183e-05, + "loss": 1.8513, + "step": 8265 + }, + { + "epoch": 0.8719409282700422, + "grad_norm": 0.3452458679676056, + "learning_rate": 8.099536296889734e-05, + "loss": 1.8782, + "step": 8266 + }, + { + "epoch": 0.8720464135021097, + "grad_norm": 0.3500586748123169, + "learning_rate": 8.08830246017297e-05, + "loss": 1.8816, + "step": 8267 + }, + { + "epoch": 0.8721518987341772, + "grad_norm": 0.3620140850543976, + "learning_rate": 8.077084204482424e-05, + "loss": 1.8362, + "step": 8268 + }, + { + "epoch": 0.8722573839662447, + "grad_norm": 0.3527824580669403, + "learning_rate": 8.065881508207636e-05, + "loss": 1.895, + "step": 8269 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.3555591106414795, + "learning_rate": 8.054694349768114e-05, + "loss": 1.8731, + "step": 8270 + }, + { + "epoch": 0.8724683544303797, + "grad_norm": 0.34301143884658813, + "learning_rate": 8.043522707613312e-05, + "loss": 1.9031, + "step": 8271 + }, + { + "epoch": 0.8725738396624473, + "grad_norm": 0.35820192098617554, + "learning_rate": 8.032366560222553e-05, + "loss": 1.8558, + "step": 8272 + }, + { + "epoch": 0.8726793248945147, + "grad_norm": 0.35403239727020264, + "learning_rate": 8.021225886105027e-05, + "loss": 1.8619, + "step": 8273 + }, + { + "epoch": 0.8727848101265823, + "grad_norm": 0.36544397473335266, + "learning_rate": 8.010100663799726e-05, + "loss": 1.8623, + "step": 8274 + }, + { + "epoch": 0.8728902953586498, + "grad_norm": 0.36062127351760864, + "learning_rate": 7.998990871875402e-05, + "loss": 1.8662, + "step": 8275 + }, + { + "epoch": 0.8729957805907173, + "grad_norm": 0.38927122950553894, + "learning_rate": 7.987896488930541e-05, + "loss": 1.8765, + "step": 8276 + }, + { + "epoch": 0.8731012658227848, + "grad_norm": 0.33911359310150146, + "learning_rate": 7.976817493593301e-05, + "loss": 1.8536, + "step": 8277 + }, + { + "epoch": 0.8732067510548523, + "grad_norm": 0.36781683564186096, + "learning_rate": 7.965753864521492e-05, + "loss": 1.8363, + "step": 8278 + }, + { + "epoch": 0.8733122362869198, + "grad_norm": 0.3761378824710846, + "learning_rate": 7.954705580402525e-05, + "loss": 1.8664, + "step": 8279 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.3540000021457672, + "learning_rate": 7.943672619953359e-05, + "loss": 1.8281, + "step": 8280 + }, + { + "epoch": 0.8735232067510549, + "grad_norm": 0.355813592672348, + "learning_rate": 7.932654961920486e-05, + "loss": 1.8575, + "step": 8281 + }, + { + "epoch": 0.8736286919831223, + "grad_norm": 0.35996973514556885, + "learning_rate": 7.921652585079873e-05, + "loss": 1.881, + "step": 8282 + }, + { + "epoch": 0.8737341772151899, + "grad_norm": 0.34887930750846863, + "learning_rate": 7.910665468236916e-05, + "loss": 1.8729, + "step": 8283 + }, + { + "epoch": 0.8738396624472574, + "grad_norm": 0.3309483230113983, + "learning_rate": 7.899693590226418e-05, + "loss": 1.8437, + "step": 8284 + }, + { + "epoch": 0.8739451476793249, + "grad_norm": 0.3534908890724182, + "learning_rate": 7.888736929912525e-05, + "loss": 1.8344, + "step": 8285 + }, + { + "epoch": 0.8740506329113924, + "grad_norm": 0.336465984582901, + "learning_rate": 7.877795466188711e-05, + "loss": 1.8279, + "step": 8286 + }, + { + "epoch": 0.87415611814346, + "grad_norm": 0.35318130254745483, + "learning_rate": 7.866869177977722e-05, + "loss": 1.8755, + "step": 8287 + }, + { + "epoch": 0.8742616033755274, + "grad_norm": 0.34221094846725464, + "learning_rate": 7.855958044231526e-05, + "loss": 1.8808, + "step": 8288 + }, + { + "epoch": 0.8743670886075949, + "grad_norm": 0.36512476205825806, + "learning_rate": 7.845062043931299e-05, + "loss": 1.8824, + "step": 8289 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.3338331878185272, + "learning_rate": 7.834181156087357e-05, + "loss": 1.8274, + "step": 8290 + }, + { + "epoch": 0.8745780590717299, + "grad_norm": 0.35658028721809387, + "learning_rate": 7.823315359739135e-05, + "loss": 1.8994, + "step": 8291 + }, + { + "epoch": 0.8746835443037975, + "grad_norm": 0.3490496873855591, + "learning_rate": 7.812464633955146e-05, + "loss": 1.8819, + "step": 8292 + }, + { + "epoch": 0.874789029535865, + "grad_norm": 0.35260000824928284, + "learning_rate": 7.801628957832916e-05, + "loss": 1.8461, + "step": 8293 + }, + { + "epoch": 0.8748945147679325, + "grad_norm": 0.3501712679862976, + "learning_rate": 7.790808310498984e-05, + "loss": 1.8854, + "step": 8294 + }, + { + "epoch": 0.875, + "grad_norm": 0.354076087474823, + "learning_rate": 7.78000267110882e-05, + "loss": 1.901, + "step": 8295 + }, + { + "epoch": 0.8751054852320675, + "grad_norm": 0.3435710668563843, + "learning_rate": 7.769212018846815e-05, + "loss": 1.8876, + "step": 8296 + }, + { + "epoch": 0.875210970464135, + "grad_norm": 0.34309089183807373, + "learning_rate": 7.758436332926238e-05, + "loss": 1.8866, + "step": 8297 + }, + { + "epoch": 0.8753164556962025, + "grad_norm": 0.35187703371047974, + "learning_rate": 7.747675592589168e-05, + "loss": 1.8812, + "step": 8298 + }, + { + "epoch": 0.8754219409282701, + "grad_norm": 0.36885741353034973, + "learning_rate": 7.736929777106499e-05, + "loss": 1.8908, + "step": 8299 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.34501516819000244, + "learning_rate": 7.726198865777852e-05, + "loss": 1.8713, + "step": 8300 + }, + { + "epoch": 0.8756329113924051, + "grad_norm": 0.34739288687705994, + "learning_rate": 7.715482837931577e-05, + "loss": 1.8863, + "step": 8301 + }, + { + "epoch": 0.8757383966244726, + "grad_norm": 0.3651568591594696, + "learning_rate": 7.704781672924692e-05, + "loss": 1.8754, + "step": 8302 + }, + { + "epoch": 0.87584388185654, + "grad_norm": 0.34587305784225464, + "learning_rate": 7.694095350142833e-05, + "loss": 1.852, + "step": 8303 + }, + { + "epoch": 0.8759493670886076, + "grad_norm": 0.3717268109321594, + "learning_rate": 7.683423849000246e-05, + "loss": 1.8565, + "step": 8304 + }, + { + "epoch": 0.8760548523206751, + "grad_norm": 0.35759779810905457, + "learning_rate": 7.672767148939714e-05, + "loss": 1.8473, + "step": 8305 + }, + { + "epoch": 0.8761603375527426, + "grad_norm": 0.3427722454071045, + "learning_rate": 7.66212522943254e-05, + "loss": 1.8714, + "step": 8306 + }, + { + "epoch": 0.8762658227848101, + "grad_norm": 0.3796320855617523, + "learning_rate": 7.651498069978505e-05, + "loss": 1.8471, + "step": 8307 + }, + { + "epoch": 0.8763713080168777, + "grad_norm": 0.35944700241088867, + "learning_rate": 7.640885650105804e-05, + "loss": 1.865, + "step": 8308 + }, + { + "epoch": 0.8764767932489451, + "grad_norm": 0.36039409041404724, + "learning_rate": 7.630287949371051e-05, + "loss": 1.8268, + "step": 8309 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.3510807454586029, + "learning_rate": 7.61970494735919e-05, + "loss": 1.8702, + "step": 8310 + }, + { + "epoch": 0.8766877637130802, + "grad_norm": 0.3436391055583954, + "learning_rate": 7.6091366236835e-05, + "loss": 1.9006, + "step": 8311 + }, + { + "epoch": 0.8767932489451477, + "grad_norm": 0.36106207966804504, + "learning_rate": 7.598582957985526e-05, + "loss": 1.8772, + "step": 8312 + }, + { + "epoch": 0.8768987341772152, + "grad_norm": 0.41508376598358154, + "learning_rate": 7.588043929935049e-05, + "loss": 1.8863, + "step": 8313 + }, + { + "epoch": 0.8770042194092827, + "grad_norm": 0.37256669998168945, + "learning_rate": 7.577519519230054e-05, + "loss": 1.8852, + "step": 8314 + }, + { + "epoch": 0.8771097046413502, + "grad_norm": 0.35162782669067383, + "learning_rate": 7.567009705596672e-05, + "loss": 1.8722, + "step": 8315 + }, + { + "epoch": 0.8772151898734177, + "grad_norm": 0.3536425828933716, + "learning_rate": 7.556514468789169e-05, + "loss": 1.8558, + "step": 8316 + }, + { + "epoch": 0.8773206751054853, + "grad_norm": 0.3547583520412445, + "learning_rate": 7.546033788589884e-05, + "loss": 1.8489, + "step": 8317 + }, + { + "epoch": 0.8774261603375527, + "grad_norm": 0.34824997186660767, + "learning_rate": 7.53556764480919e-05, + "loss": 1.8578, + "step": 8318 + }, + { + "epoch": 0.8775316455696203, + "grad_norm": 0.363150030374527, + "learning_rate": 7.525116017285479e-05, + "loss": 1.8561, + "step": 8319 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.34551456570625305, + "learning_rate": 7.514678885885086e-05, + "loss": 1.8498, + "step": 8320 + }, + { + "epoch": 0.8777426160337553, + "grad_norm": 0.3385595381259918, + "learning_rate": 7.504256230502289e-05, + "loss": 1.8543, + "step": 8321 + }, + { + "epoch": 0.8778481012658228, + "grad_norm": 0.35400938987731934, + "learning_rate": 7.493848031059248e-05, + "loss": 1.8837, + "step": 8322 + }, + { + "epoch": 0.8779535864978903, + "grad_norm": 0.34276625514030457, + "learning_rate": 7.483454267505959e-05, + "loss": 1.8938, + "step": 8323 + }, + { + "epoch": 0.8780590717299578, + "grad_norm": 0.35162225365638733, + "learning_rate": 7.473074919820243e-05, + "loss": 1.8727, + "step": 8324 + }, + { + "epoch": 0.8781645569620253, + "grad_norm": 0.3722911477088928, + "learning_rate": 7.462709968007676e-05, + "loss": 1.8584, + "step": 8325 + }, + { + "epoch": 0.8782700421940929, + "grad_norm": 0.3458762764930725, + "learning_rate": 7.452359392101578e-05, + "loss": 1.8813, + "step": 8326 + }, + { + "epoch": 0.8783755274261603, + "grad_norm": 0.359882116317749, + "learning_rate": 7.442023172162959e-05, + "loss": 1.8836, + "step": 8327 + }, + { + "epoch": 0.8784810126582279, + "grad_norm": 0.389540433883667, + "learning_rate": 7.431701288280477e-05, + "loss": 1.8461, + "step": 8328 + }, + { + "epoch": 0.8785864978902953, + "grad_norm": 0.33912256360054016, + "learning_rate": 7.421393720570416e-05, + "loss": 1.8505, + "step": 8329 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.3470924496650696, + "learning_rate": 7.411100449176634e-05, + "loss": 1.8713, + "step": 8330 + }, + { + "epoch": 0.8787974683544304, + "grad_norm": 0.34017646312713623, + "learning_rate": 7.400821454270524e-05, + "loss": 1.8802, + "step": 8331 + }, + { + "epoch": 0.8789029535864978, + "grad_norm": 0.3741602599620819, + "learning_rate": 7.390556716050994e-05, + "loss": 1.9182, + "step": 8332 + }, + { + "epoch": 0.8790084388185654, + "grad_norm": 0.35028183460235596, + "learning_rate": 7.380306214744398e-05, + "loss": 1.845, + "step": 8333 + }, + { + "epoch": 0.8791139240506329, + "grad_norm": 0.3459800183773041, + "learning_rate": 7.370069930604528e-05, + "loss": 1.876, + "step": 8334 + }, + { + "epoch": 0.8792194092827004, + "grad_norm": 0.3404410779476166, + "learning_rate": 7.359847843912566e-05, + "loss": 1.8925, + "step": 8335 + }, + { + "epoch": 0.8793248945147679, + "grad_norm": 0.35112297534942627, + "learning_rate": 7.349639934977028e-05, + "loss": 1.8566, + "step": 8336 + }, + { + "epoch": 0.8794303797468355, + "grad_norm": 0.3454596996307373, + "learning_rate": 7.33944618413376e-05, + "loss": 1.8477, + "step": 8337 + }, + { + "epoch": 0.8795358649789029, + "grad_norm": 0.34358397126197815, + "learning_rate": 7.329266571745865e-05, + "loss": 1.8474, + "step": 8338 + }, + { + "epoch": 0.8796413502109705, + "grad_norm": 0.3485214412212372, + "learning_rate": 7.319101078203692e-05, + "loss": 1.794, + "step": 8339 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.33159974217414856, + "learning_rate": 7.308949683924792e-05, + "loss": 1.8342, + "step": 8340 + }, + { + "epoch": 0.8798523206751054, + "grad_norm": 0.3587484657764435, + "learning_rate": 7.29881236935386e-05, + "loss": 1.8465, + "step": 8341 + }, + { + "epoch": 0.879957805907173, + "grad_norm": 0.341315895318985, + "learning_rate": 7.288689114962734e-05, + "loss": 1.8469, + "step": 8342 + }, + { + "epoch": 0.8800632911392405, + "grad_norm": 0.35254621505737305, + "learning_rate": 7.278579901250316e-05, + "loss": 1.8362, + "step": 8343 + }, + { + "epoch": 0.880168776371308, + "grad_norm": 0.3408176004886627, + "learning_rate": 7.268484708742574e-05, + "loss": 1.8422, + "step": 8344 + }, + { + "epoch": 0.8802742616033755, + "grad_norm": 0.3324517011642456, + "learning_rate": 7.258403517992476e-05, + "loss": 1.8709, + "step": 8345 + }, + { + "epoch": 0.8803797468354431, + "grad_norm": 0.37970879673957825, + "learning_rate": 7.248336309579965e-05, + "loss": 1.8519, + "step": 8346 + }, + { + "epoch": 0.8804852320675105, + "grad_norm": 0.3476521372795105, + "learning_rate": 7.238283064111919e-05, + "loss": 1.8956, + "step": 8347 + }, + { + "epoch": 0.880590717299578, + "grad_norm": 0.3260384500026703, + "learning_rate": 7.228243762222109e-05, + "loss": 1.8489, + "step": 8348 + }, + { + "epoch": 0.8806962025316456, + "grad_norm": 0.3470735251903534, + "learning_rate": 7.218218384571176e-05, + "loss": 1.8612, + "step": 8349 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.3517494201660156, + "learning_rate": 7.208206911846581e-05, + "loss": 1.9032, + "step": 8350 + }, + { + "epoch": 0.8809071729957806, + "grad_norm": 0.35087302327156067, + "learning_rate": 7.198209324762562e-05, + "loss": 1.8828, + "step": 8351 + }, + { + "epoch": 0.8810126582278481, + "grad_norm": 0.35000863671302795, + "learning_rate": 7.188225604060121e-05, + "loss": 1.8339, + "step": 8352 + }, + { + "epoch": 0.8811181434599156, + "grad_norm": 0.3395305871963501, + "learning_rate": 7.178255730506956e-05, + "loss": 1.8188, + "step": 8353 + }, + { + "epoch": 0.8812236286919831, + "grad_norm": 0.35901543498039246, + "learning_rate": 7.16829968489745e-05, + "loss": 1.853, + "step": 8354 + }, + { + "epoch": 0.8813291139240507, + "grad_norm": 0.35142266750335693, + "learning_rate": 7.158357448052624e-05, + "loss": 1.8479, + "step": 8355 + }, + { + "epoch": 0.8814345991561181, + "grad_norm": 0.3564679026603699, + "learning_rate": 7.148429000820093e-05, + "loss": 1.8564, + "step": 8356 + }, + { + "epoch": 0.8815400843881857, + "grad_norm": 0.34653690457344055, + "learning_rate": 7.138514324074043e-05, + "loss": 1.9078, + "step": 8357 + }, + { + "epoch": 0.8816455696202532, + "grad_norm": 0.36560410261154175, + "learning_rate": 7.128613398715179e-05, + "loss": 1.8667, + "step": 8358 + }, + { + "epoch": 0.8817510548523206, + "grad_norm": 0.3369196355342865, + "learning_rate": 7.118726205670702e-05, + "loss": 1.8866, + "step": 8359 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.36694109439849854, + "learning_rate": 7.10885272589427e-05, + "loss": 1.8522, + "step": 8360 + }, + { + "epoch": 0.8819620253164557, + "grad_norm": 0.3503267168998718, + "learning_rate": 7.098992940365946e-05, + "loss": 1.8927, + "step": 8361 + }, + { + "epoch": 0.8820675105485232, + "grad_norm": 0.349261611700058, + "learning_rate": 7.089146830092187e-05, + "loss": 1.9003, + "step": 8362 + }, + { + "epoch": 0.8821729957805907, + "grad_norm": 0.34354516863822937, + "learning_rate": 7.079314376105778e-05, + "loss": 1.8757, + "step": 8363 + }, + { + "epoch": 0.8822784810126583, + "grad_norm": 0.3616570234298706, + "learning_rate": 7.069495559465825e-05, + "loss": 1.8914, + "step": 8364 + }, + { + "epoch": 0.8823839662447257, + "grad_norm": 0.37954577803611755, + "learning_rate": 7.059690361257703e-05, + "loss": 1.8879, + "step": 8365 + }, + { + "epoch": 0.8824894514767933, + "grad_norm": 0.35786017775535583, + "learning_rate": 7.049898762593007e-05, + "loss": 1.842, + "step": 8366 + }, + { + "epoch": 0.8825949367088608, + "grad_norm": 0.3441547751426697, + "learning_rate": 7.04012074460955e-05, + "loss": 1.8573, + "step": 8367 + }, + { + "epoch": 0.8827004219409282, + "grad_norm": 0.35996657609939575, + "learning_rate": 7.030356288471289e-05, + "loss": 1.878, + "step": 8368 + }, + { + "epoch": 0.8828059071729958, + "grad_norm": 0.3576320707798004, + "learning_rate": 7.020605375368314e-05, + "loss": 1.8466, + "step": 8369 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.3551853895187378, + "learning_rate": 7.010867986516811e-05, + "loss": 1.8227, + "step": 8370 + }, + { + "epoch": 0.8830168776371308, + "grad_norm": 0.34010180830955505, + "learning_rate": 7.001144103159e-05, + "loss": 1.857, + "step": 8371 + }, + { + "epoch": 0.8831223628691983, + "grad_norm": 0.33581653237342834, + "learning_rate": 6.991433706563134e-05, + "loss": 1.8905, + "step": 8372 + }, + { + "epoch": 0.8832278481012659, + "grad_norm": 0.33873897790908813, + "learning_rate": 6.981736778023443e-05, + "loss": 1.8792, + "step": 8373 + }, + { + "epoch": 0.8833333333333333, + "grad_norm": 0.34525546431541443, + "learning_rate": 6.972053298860092e-05, + "loss": 1.8565, + "step": 8374 + }, + { + "epoch": 0.8834388185654009, + "grad_norm": 0.3515998423099518, + "learning_rate": 6.962383250419169e-05, + "loss": 1.9064, + "step": 8375 + }, + { + "epoch": 0.8835443037974684, + "grad_norm": 0.35742858052253723, + "learning_rate": 6.952726614072621e-05, + "loss": 1.9069, + "step": 8376 + }, + { + "epoch": 0.8836497890295358, + "grad_norm": 0.34120532870292664, + "learning_rate": 6.94308337121824e-05, + "loss": 1.8577, + "step": 8377 + }, + { + "epoch": 0.8837552742616034, + "grad_norm": 0.34810927510261536, + "learning_rate": 6.93345350327962e-05, + "loss": 1.8587, + "step": 8378 + }, + { + "epoch": 0.8838607594936709, + "grad_norm": 0.33997491002082825, + "learning_rate": 6.923836991706108e-05, + "loss": 1.8438, + "step": 8379 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.350077748298645, + "learning_rate": 6.914233817972799e-05, + "loss": 1.9095, + "step": 8380 + }, + { + "epoch": 0.8840717299578059, + "grad_norm": 0.33304211497306824, + "learning_rate": 6.904643963580461e-05, + "loss": 1.8648, + "step": 8381 + }, + { + "epoch": 0.8841772151898735, + "grad_norm": 0.3379049003124237, + "learning_rate": 6.895067410055536e-05, + "loss": 1.8993, + "step": 8382 + }, + { + "epoch": 0.8842827004219409, + "grad_norm": 0.3733586072921753, + "learning_rate": 6.885504138950084e-05, + "loss": 1.855, + "step": 8383 + }, + { + "epoch": 0.8843881856540085, + "grad_norm": 0.37332937121391296, + "learning_rate": 6.875954131841743e-05, + "loss": 1.8473, + "step": 8384 + }, + { + "epoch": 0.884493670886076, + "grad_norm": 0.3394981026649475, + "learning_rate": 6.866417370333717e-05, + "loss": 1.8793, + "step": 8385 + }, + { + "epoch": 0.8845991561181434, + "grad_norm": 0.33402952551841736, + "learning_rate": 6.856893836054713e-05, + "loss": 1.882, + "step": 8386 + }, + { + "epoch": 0.884704641350211, + "grad_norm": 0.3428032100200653, + "learning_rate": 6.847383510658925e-05, + "loss": 1.8797, + "step": 8387 + }, + { + "epoch": 0.8848101265822785, + "grad_norm": 0.3358452320098877, + "learning_rate": 6.837886375825995e-05, + "loss": 1.8408, + "step": 8388 + }, + { + "epoch": 0.884915611814346, + "grad_norm": 0.3556680679321289, + "learning_rate": 6.828402413260965e-05, + "loss": 1.8688, + "step": 8389 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.35363489389419556, + "learning_rate": 6.818931604694264e-05, + "loss": 1.8728, + "step": 8390 + }, + { + "epoch": 0.8851265822784811, + "grad_norm": 0.3994188606739044, + "learning_rate": 6.809473931881644e-05, + "loss": 1.883, + "step": 8391 + }, + { + "epoch": 0.8852320675105485, + "grad_norm": 0.3381074070930481, + "learning_rate": 6.800029376604179e-05, + "loss": 1.8602, + "step": 8392 + }, + { + "epoch": 0.885337552742616, + "grad_norm": 0.3478979170322418, + "learning_rate": 6.790597920668206e-05, + "loss": 1.8496, + "step": 8393 + }, + { + "epoch": 0.8854430379746835, + "grad_norm": 0.3741663098335266, + "learning_rate": 6.781179545905287e-05, + "loss": 1.8703, + "step": 8394 + }, + { + "epoch": 0.885548523206751, + "grad_norm": 0.3823329508304596, + "learning_rate": 6.771774234172196e-05, + "loss": 1.8515, + "step": 8395 + }, + { + "epoch": 0.8856540084388186, + "grad_norm": 0.35275787115097046, + "learning_rate": 6.762381967350862e-05, + "loss": 1.8762, + "step": 8396 + }, + { + "epoch": 0.885759493670886, + "grad_norm": 0.3840509355068207, + "learning_rate": 6.753002727348348e-05, + "loss": 1.8654, + "step": 8397 + }, + { + "epoch": 0.8858649789029536, + "grad_norm": 0.33651450276374817, + "learning_rate": 6.743636496096815e-05, + "loss": 1.8536, + "step": 8398 + }, + { + "epoch": 0.8859704641350211, + "grad_norm": 0.3600196838378906, + "learning_rate": 6.73428325555347e-05, + "loss": 1.9043, + "step": 8399 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.3637711703777313, + "learning_rate": 6.724942987700563e-05, + "loss": 1.8611, + "step": 8400 + }, + { + "epoch": 0.8861814345991561, + "grad_norm": 0.3379918336868286, + "learning_rate": 6.71561567454532e-05, + "loss": 1.8795, + "step": 8401 + }, + { + "epoch": 0.8862869198312237, + "grad_norm": 0.3338322341442108, + "learning_rate": 6.706301298119924e-05, + "loss": 1.8649, + "step": 8402 + }, + { + "epoch": 0.8863924050632911, + "grad_norm": 0.3347778916358948, + "learning_rate": 6.696999840481492e-05, + "loss": 1.8969, + "step": 8403 + }, + { + "epoch": 0.8864978902953586, + "grad_norm": 0.36954358220100403, + "learning_rate": 6.687711283712008e-05, + "loss": 1.8914, + "step": 8404 + }, + { + "epoch": 0.8866033755274262, + "grad_norm": 0.3464561402797699, + "learning_rate": 6.678435609918325e-05, + "loss": 1.8803, + "step": 8405 + }, + { + "epoch": 0.8867088607594936, + "grad_norm": 0.3820663094520569, + "learning_rate": 6.669172801232099e-05, + "loss": 1.8976, + "step": 8406 + }, + { + "epoch": 0.8868143459915612, + "grad_norm": 0.3348489999771118, + "learning_rate": 6.659922839809777e-05, + "loss": 1.8951, + "step": 8407 + }, + { + "epoch": 0.8869198312236287, + "grad_norm": 0.3423973023891449, + "learning_rate": 6.65068570783256e-05, + "loss": 1.867, + "step": 8408 + }, + { + "epoch": 0.8870253164556962, + "grad_norm": 0.3505173921585083, + "learning_rate": 6.641461387506347e-05, + "loss": 1.8939, + "step": 8409 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.37107759714126587, + "learning_rate": 6.632249861061733e-05, + "loss": 1.8398, + "step": 8410 + }, + { + "epoch": 0.8872362869198313, + "grad_norm": 0.3631918132305145, + "learning_rate": 6.623051110753948e-05, + "loss": 1.8914, + "step": 8411 + }, + { + "epoch": 0.8873417721518987, + "grad_norm": 0.33823728561401367, + "learning_rate": 6.613865118862837e-05, + "loss": 1.8714, + "step": 8412 + }, + { + "epoch": 0.8874472573839662, + "grad_norm": 0.3375466465950012, + "learning_rate": 6.604691867692828e-05, + "loss": 1.8832, + "step": 8413 + }, + { + "epoch": 0.8875527426160338, + "grad_norm": 0.35995420813560486, + "learning_rate": 6.595531339572882e-05, + "loss": 1.8676, + "step": 8414 + }, + { + "epoch": 0.8876582278481012, + "grad_norm": 0.33720219135284424, + "learning_rate": 6.586383516856473e-05, + "loss": 1.885, + "step": 8415 + }, + { + "epoch": 0.8877637130801688, + "grad_norm": 0.345517635345459, + "learning_rate": 6.57724838192156e-05, + "loss": 1.831, + "step": 8416 + }, + { + "epoch": 0.8878691983122363, + "grad_norm": 0.3387812077999115, + "learning_rate": 6.568125917170527e-05, + "loss": 1.8618, + "step": 8417 + }, + { + "epoch": 0.8879746835443038, + "grad_norm": 0.3610466420650482, + "learning_rate": 6.559016105030177e-05, + "loss": 1.88, + "step": 8418 + }, + { + "epoch": 0.8880801687763713, + "grad_norm": 0.336632639169693, + "learning_rate": 6.549918927951679e-05, + "loss": 1.8539, + "step": 8419 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.3670237064361572, + "learning_rate": 6.540834368410549e-05, + "loss": 1.8716, + "step": 8420 + }, + { + "epoch": 0.8882911392405063, + "grad_norm": 0.34951117634773254, + "learning_rate": 6.531762408906607e-05, + "loss": 1.8914, + "step": 8421 + }, + { + "epoch": 0.8883966244725738, + "grad_norm": 0.35980531573295593, + "learning_rate": 6.522703031963938e-05, + "loss": 1.874, + "step": 8422 + }, + { + "epoch": 0.8885021097046414, + "grad_norm": 0.3341461718082428, + "learning_rate": 6.513656220130879e-05, + "loss": 1.8882, + "step": 8423 + }, + { + "epoch": 0.8886075949367088, + "grad_norm": 0.33701375126838684, + "learning_rate": 6.504621955979958e-05, + "loss": 1.8598, + "step": 8424 + }, + { + "epoch": 0.8887130801687764, + "grad_norm": 0.34404054284095764, + "learning_rate": 6.495600222107884e-05, + "loss": 1.8528, + "step": 8425 + }, + { + "epoch": 0.8888185654008439, + "grad_norm": 0.38447362184524536, + "learning_rate": 6.486591001135503e-05, + "loss": 1.8453, + "step": 8426 + }, + { + "epoch": 0.8889240506329114, + "grad_norm": 0.36997362971305847, + "learning_rate": 6.477594275707757e-05, + "loss": 1.8923, + "step": 8427 + }, + { + "epoch": 0.8890295358649789, + "grad_norm": 0.34307926893234253, + "learning_rate": 6.468610028493671e-05, + "loss": 1.8729, + "step": 8428 + }, + { + "epoch": 0.8891350210970465, + "grad_norm": 0.3402564823627472, + "learning_rate": 6.459638242186298e-05, + "loss": 1.8709, + "step": 8429 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.3397671580314636, + "learning_rate": 6.4506788995027e-05, + "loss": 1.8741, + "step": 8430 + }, + { + "epoch": 0.8893459915611814, + "grad_norm": 0.36264097690582275, + "learning_rate": 6.441731983183912e-05, + "loss": 1.8467, + "step": 8431 + }, + { + "epoch": 0.889451476793249, + "grad_norm": 0.3352421224117279, + "learning_rate": 6.432797475994898e-05, + "loss": 1.8314, + "step": 8432 + }, + { + "epoch": 0.8895569620253164, + "grad_norm": 0.3742561638355255, + "learning_rate": 6.42387536072454e-05, + "loss": 1.8383, + "step": 8433 + }, + { + "epoch": 0.889662447257384, + "grad_norm": 0.3447900116443634, + "learning_rate": 6.414965620185575e-05, + "loss": 1.8392, + "step": 8434 + }, + { + "epoch": 0.8897679324894515, + "grad_norm": 0.34255534410476685, + "learning_rate": 6.406068237214591e-05, + "loss": 1.8916, + "step": 8435 + }, + { + "epoch": 0.889873417721519, + "grad_norm": 0.3508528172969818, + "learning_rate": 6.39718319467198e-05, + "loss": 1.8767, + "step": 8436 + }, + { + "epoch": 0.8899789029535865, + "grad_norm": 0.36500030755996704, + "learning_rate": 6.388310475441898e-05, + "loss": 1.8501, + "step": 8437 + }, + { + "epoch": 0.890084388185654, + "grad_norm": 0.36895039677619934, + "learning_rate": 6.379450062432251e-05, + "loss": 1.8835, + "step": 8438 + }, + { + "epoch": 0.8901898734177215, + "grad_norm": 0.3450184464454651, + "learning_rate": 6.370601938574637e-05, + "loss": 1.863, + "step": 8439 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.34596967697143555, + "learning_rate": 6.361766086824344e-05, + "loss": 1.8662, + "step": 8440 + }, + { + "epoch": 0.8904008438818566, + "grad_norm": 0.33330878615379333, + "learning_rate": 6.352942490160292e-05, + "loss": 1.8626, + "step": 8441 + }, + { + "epoch": 0.890506329113924, + "grad_norm": 0.3446773290634155, + "learning_rate": 6.344131131585007e-05, + "loss": 1.8495, + "step": 8442 + }, + { + "epoch": 0.8906118143459916, + "grad_norm": 0.3413025140762329, + "learning_rate": 6.335331994124594e-05, + "loss": 1.8655, + "step": 8443 + }, + { + "epoch": 0.8907172995780591, + "grad_norm": 0.34136033058166504, + "learning_rate": 6.326545060828696e-05, + "loss": 1.8628, + "step": 8444 + }, + { + "epoch": 0.8908227848101266, + "grad_norm": 0.36001574993133545, + "learning_rate": 6.31777031477047e-05, + "loss": 1.8498, + "step": 8445 + }, + { + "epoch": 0.8909282700421941, + "grad_norm": 0.38652268052101135, + "learning_rate": 6.309007739046552e-05, + "loss": 1.8632, + "step": 8446 + }, + { + "epoch": 0.8910337552742617, + "grad_norm": 0.3489409387111664, + "learning_rate": 6.300257316777014e-05, + "loss": 1.8935, + "step": 8447 + }, + { + "epoch": 0.8911392405063291, + "grad_norm": 0.3446243405342102, + "learning_rate": 6.291519031105349e-05, + "loss": 1.8934, + "step": 8448 + }, + { + "epoch": 0.8912447257383966, + "grad_norm": 0.355072021484375, + "learning_rate": 6.282792865198421e-05, + "loss": 1.8933, + "step": 8449 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.3472936153411865, + "learning_rate": 6.274078802246449e-05, + "loss": 1.8545, + "step": 8450 + }, + { + "epoch": 0.8914556962025316, + "grad_norm": 0.33822306990623474, + "learning_rate": 6.265376825462966e-05, + "loss": 1.8399, + "step": 8451 + }, + { + "epoch": 0.8915611814345992, + "grad_norm": 0.3421364724636078, + "learning_rate": 6.256686918084778e-05, + "loss": 1.8709, + "step": 8452 + }, + { + "epoch": 0.8916666666666667, + "grad_norm": 0.34864047169685364, + "learning_rate": 6.248009063371955e-05, + "loss": 1.8562, + "step": 8453 + }, + { + "epoch": 0.8917721518987342, + "grad_norm": 0.3483445942401886, + "learning_rate": 6.239343244607771e-05, + "loss": 1.88, + "step": 8454 + }, + { + "epoch": 0.8918776371308017, + "grad_norm": 0.3722592890262604, + "learning_rate": 6.230689445098696e-05, + "loss": 1.8315, + "step": 8455 + }, + { + "epoch": 0.8919831223628693, + "grad_norm": 0.36060553789138794, + "learning_rate": 6.222047648174353e-05, + "loss": 1.8807, + "step": 8456 + }, + { + "epoch": 0.8920886075949367, + "grad_norm": 0.34702596068382263, + "learning_rate": 6.213417837187475e-05, + "loss": 1.8529, + "step": 8457 + }, + { + "epoch": 0.8921940928270042, + "grad_norm": 0.35403698682785034, + "learning_rate": 6.204799995513898e-05, + "loss": 1.8602, + "step": 8458 + }, + { + "epoch": 0.8922995780590718, + "grad_norm": 0.342255175113678, + "learning_rate": 6.196194106552512e-05, + "loss": 1.8715, + "step": 8459 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.3428564965724945, + "learning_rate": 6.187600153725223e-05, + "loss": 1.8846, + "step": 8460 + }, + { + "epoch": 0.8925105485232068, + "grad_norm": 0.34811344742774963, + "learning_rate": 6.179018120476945e-05, + "loss": 1.8917, + "step": 8461 + }, + { + "epoch": 0.8926160337552742, + "grad_norm": 0.34493327140808105, + "learning_rate": 6.17044799027554e-05, + "loss": 1.8837, + "step": 8462 + }, + { + "epoch": 0.8927215189873418, + "grad_norm": 0.35282132029533386, + "learning_rate": 6.161889746611807e-05, + "loss": 1.85, + "step": 8463 + }, + { + "epoch": 0.8928270042194093, + "grad_norm": 0.36081555485725403, + "learning_rate": 6.153343372999445e-05, + "loss": 1.8553, + "step": 8464 + }, + { + "epoch": 0.8929324894514767, + "grad_norm": 0.35130664706230164, + "learning_rate": 6.14480885297501e-05, + "loss": 1.8816, + "step": 8465 + }, + { + "epoch": 0.8930379746835443, + "grad_norm": 0.35312581062316895, + "learning_rate": 6.1362861700979e-05, + "loss": 1.8423, + "step": 8466 + }, + { + "epoch": 0.8931434599156118, + "grad_norm": 0.3375323414802551, + "learning_rate": 6.127775307950314e-05, + "loss": 1.863, + "step": 8467 + }, + { + "epoch": 0.8932489451476793, + "grad_norm": 0.38447248935699463, + "learning_rate": 6.119276250137219e-05, + "loss": 1.8435, + "step": 8468 + }, + { + "epoch": 0.8933544303797468, + "grad_norm": 0.3567967712879181, + "learning_rate": 6.110788980286329e-05, + "loss": 1.8805, + "step": 8469 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.3383502662181854, + "learning_rate": 6.1023134820480546e-05, + "loss": 1.8851, + "step": 8470 + }, + { + "epoch": 0.8935654008438818, + "grad_norm": 0.34337282180786133, + "learning_rate": 6.0938497390954946e-05, + "loss": 1.875, + "step": 8471 + }, + { + "epoch": 0.8936708860759494, + "grad_norm": 0.33993256092071533, + "learning_rate": 6.0853977351243815e-05, + "loss": 1.8681, + "step": 8472 + }, + { + "epoch": 0.8937763713080169, + "grad_norm": 0.34588655829429626, + "learning_rate": 6.0769574538530704e-05, + "loss": 1.8683, + "step": 8473 + }, + { + "epoch": 0.8938818565400843, + "grad_norm": 0.34142011404037476, + "learning_rate": 6.0685288790224975e-05, + "loss": 1.8472, + "step": 8474 + }, + { + "epoch": 0.8939873417721519, + "grad_norm": 0.34643474221229553, + "learning_rate": 6.0601119943961425e-05, + "loss": 1.8906, + "step": 8475 + }, + { + "epoch": 0.8940928270042194, + "grad_norm": 0.3317405879497528, + "learning_rate": 6.0517067837600144e-05, + "loss": 1.8278, + "step": 8476 + }, + { + "epoch": 0.8941983122362869, + "grad_norm": 0.3597392141819, + "learning_rate": 6.0433132309226017e-05, + "loss": 1.8713, + "step": 8477 + }, + { + "epoch": 0.8943037974683544, + "grad_norm": 0.3507520854473114, + "learning_rate": 6.034931319714857e-05, + "loss": 1.8827, + "step": 8478 + }, + { + "epoch": 0.894409282700422, + "grad_norm": 0.3406740725040436, + "learning_rate": 6.026561033990159e-05, + "loss": 1.8532, + "step": 8479 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.34266048669815063, + "learning_rate": 6.0182023576242725e-05, + "loss": 1.8522, + "step": 8480 + }, + { + "epoch": 0.894620253164557, + "grad_norm": 0.3659510314464569, + "learning_rate": 6.009855274515339e-05, + "loss": 1.8793, + "step": 8481 + }, + { + "epoch": 0.8947257383966245, + "grad_norm": 0.3439449071884155, + "learning_rate": 6.001519768583819e-05, + "loss": 1.905, + "step": 8482 + }, + { + "epoch": 0.8948312236286919, + "grad_norm": 0.342777818441391, + "learning_rate": 5.993195823772487e-05, + "loss": 1.8736, + "step": 8483 + }, + { + "epoch": 0.8949367088607595, + "grad_norm": 0.3583220839500427, + "learning_rate": 5.9848834240463846e-05, + "loss": 1.8532, + "step": 8484 + }, + { + "epoch": 0.895042194092827, + "grad_norm": 0.3462280035018921, + "learning_rate": 5.976582553392788e-05, + "loss": 1.8632, + "step": 8485 + }, + { + "epoch": 0.8951476793248945, + "grad_norm": 0.3414909541606903, + "learning_rate": 5.968293195821191e-05, + "loss": 1.8676, + "step": 8486 + }, + { + "epoch": 0.895253164556962, + "grad_norm": 0.36894136667251587, + "learning_rate": 5.960015335363258e-05, + "loss": 1.8747, + "step": 8487 + }, + { + "epoch": 0.8953586497890296, + "grad_norm": 0.34079936146736145, + "learning_rate": 5.9517489560728056e-05, + "loss": 1.8397, + "step": 8488 + }, + { + "epoch": 0.895464135021097, + "grad_norm": 0.3537771701812744, + "learning_rate": 5.943494042025771e-05, + "loss": 1.8456, + "step": 8489 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.3451216518878937, + "learning_rate": 5.9352505773201664e-05, + "loss": 1.8294, + "step": 8490 + }, + { + "epoch": 0.8956751054852321, + "grad_norm": 0.32621586322784424, + "learning_rate": 5.9270185460760735e-05, + "loss": 1.8578, + "step": 8491 + }, + { + "epoch": 0.8957805907172995, + "grad_norm": 0.36226093769073486, + "learning_rate": 5.918797932435585e-05, + "loss": 1.8414, + "step": 8492 + }, + { + "epoch": 0.8958860759493671, + "grad_norm": 0.352947860956192, + "learning_rate": 5.9105887205627985e-05, + "loss": 1.8235, + "step": 8493 + }, + { + "epoch": 0.8959915611814346, + "grad_norm": 0.3788469135761261, + "learning_rate": 5.9023908946437736e-05, + "loss": 1.8453, + "step": 8494 + }, + { + "epoch": 0.8960970464135021, + "grad_norm": 0.36876538395881653, + "learning_rate": 5.894204438886499e-05, + "loss": 1.9069, + "step": 8495 + }, + { + "epoch": 0.8962025316455696, + "grad_norm": 0.3701067864894867, + "learning_rate": 5.886029337520872e-05, + "loss": 1.887, + "step": 8496 + }, + { + "epoch": 0.8963080168776372, + "grad_norm": 0.34789225459098816, + "learning_rate": 5.877865574798656e-05, + "loss": 1.8401, + "step": 8497 + }, + { + "epoch": 0.8964135021097046, + "grad_norm": 0.3281148374080658, + "learning_rate": 5.869713134993462e-05, + "loss": 1.8429, + "step": 8498 + }, + { + "epoch": 0.8965189873417722, + "grad_norm": 0.35306236147880554, + "learning_rate": 5.8615720024007174e-05, + "loss": 1.8524, + "step": 8499 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.3655345141887665, + "learning_rate": 5.8534421613376175e-05, + "loss": 1.8656, + "step": 8500 + }, + { + "epoch": 0.8967299578059071, + "grad_norm": 0.3326323926448822, + "learning_rate": 5.8453235961431225e-05, + "loss": 1.8514, + "step": 8501 + }, + { + "epoch": 0.8968354430379747, + "grad_norm": 0.34125253558158875, + "learning_rate": 5.837216291177911e-05, + "loss": 1.8947, + "step": 8502 + }, + { + "epoch": 0.8969409282700422, + "grad_norm": 0.3397912383079529, + "learning_rate": 5.829120230824344e-05, + "loss": 1.8566, + "step": 8503 + }, + { + "epoch": 0.8970464135021097, + "grad_norm": 0.3449687659740448, + "learning_rate": 5.821035399486458e-05, + "loss": 1.8826, + "step": 8504 + }, + { + "epoch": 0.8971518987341772, + "grad_norm": 0.3528134524822235, + "learning_rate": 5.8129617815899086e-05, + "loss": 1.8669, + "step": 8505 + }, + { + "epoch": 0.8972573839662448, + "grad_norm": 0.33610832691192627, + "learning_rate": 5.8048993615819584e-05, + "loss": 1.8435, + "step": 8506 + }, + { + "epoch": 0.8973628691983122, + "grad_norm": 0.3400745391845703, + "learning_rate": 5.7968481239314435e-05, + "loss": 1.859, + "step": 8507 + }, + { + "epoch": 0.8974683544303798, + "grad_norm": 0.3282717168331146, + "learning_rate": 5.788808053128733e-05, + "loss": 1.8716, + "step": 8508 + }, + { + "epoch": 0.8975738396624473, + "grad_norm": 0.3642805516719818, + "learning_rate": 5.780779133685717e-05, + "loss": 1.8949, + "step": 8509 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.3733340799808502, + "learning_rate": 5.772761350135759e-05, + "loss": 1.8615, + "step": 8510 + }, + { + "epoch": 0.8977848101265823, + "grad_norm": 0.35529690980911255, + "learning_rate": 5.764754687033678e-05, + "loss": 1.8467, + "step": 8511 + }, + { + "epoch": 0.8978902953586498, + "grad_norm": 0.3427521586418152, + "learning_rate": 5.756759128955722e-05, + "loss": 1.853, + "step": 8512 + }, + { + "epoch": 0.8979957805907173, + "grad_norm": 0.35740596055984497, + "learning_rate": 5.748774660499514e-05, + "loss": 1.8589, + "step": 8513 + }, + { + "epoch": 0.8981012658227848, + "grad_norm": 0.34873703122138977, + "learning_rate": 5.740801266284059e-05, + "loss": 1.8431, + "step": 8514 + }, + { + "epoch": 0.8982067510548524, + "grad_norm": 0.34957021474838257, + "learning_rate": 5.732838930949679e-05, + "loss": 1.875, + "step": 8515 + }, + { + "epoch": 0.8983122362869198, + "grad_norm": 0.34089064598083496, + "learning_rate": 5.724887639158008e-05, + "loss": 1.8789, + "step": 8516 + }, + { + "epoch": 0.8984177215189874, + "grad_norm": 0.3520606458187103, + "learning_rate": 5.716947375591959e-05, + "loss": 1.8703, + "step": 8517 + }, + { + "epoch": 0.8985232067510549, + "grad_norm": 0.3620174527168274, + "learning_rate": 5.709018124955674e-05, + "loss": 1.8773, + "step": 8518 + }, + { + "epoch": 0.8986286919831223, + "grad_norm": 0.3540838956832886, + "learning_rate": 5.701099871974525e-05, + "loss": 1.8389, + "step": 8519 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.34198346734046936, + "learning_rate": 5.6931926013950586e-05, + "loss": 1.8843, + "step": 8520 + }, + { + "epoch": 0.8988396624472574, + "grad_norm": 0.35135334730148315, + "learning_rate": 5.6852962979849836e-05, + "loss": 1.8678, + "step": 8521 + }, + { + "epoch": 0.8989451476793249, + "grad_norm": 0.35605770349502563, + "learning_rate": 5.677410946533138e-05, + "loss": 1.8657, + "step": 8522 + }, + { + "epoch": 0.8990506329113924, + "grad_norm": 0.342822402715683, + "learning_rate": 5.6695365318494475e-05, + "loss": 1.8926, + "step": 8523 + }, + { + "epoch": 0.89915611814346, + "grad_norm": 0.35471394658088684, + "learning_rate": 5.6616730387649173e-05, + "loss": 1.8498, + "step": 8524 + }, + { + "epoch": 0.8992616033755274, + "grad_norm": 0.3731857240200043, + "learning_rate": 5.6538204521315804e-05, + "loss": 1.8892, + "step": 8525 + }, + { + "epoch": 0.899367088607595, + "grad_norm": 0.3589857816696167, + "learning_rate": 5.6459787568224886e-05, + "loss": 1.905, + "step": 8526 + }, + { + "epoch": 0.8994725738396624, + "grad_norm": 0.34355974197387695, + "learning_rate": 5.6381479377316726e-05, + "loss": 1.8607, + "step": 8527 + }, + { + "epoch": 0.8995780590717299, + "grad_norm": 0.3602466881275177, + "learning_rate": 5.630327979774111e-05, + "loss": 1.887, + "step": 8528 + }, + { + "epoch": 0.8996835443037975, + "grad_norm": 0.3473949730396271, + "learning_rate": 5.6225188678857095e-05, + "loss": 1.8408, + "step": 8529 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.3349258601665497, + "learning_rate": 5.61472058702326e-05, + "loss": 1.8791, + "step": 8530 + }, + { + "epoch": 0.8998945147679325, + "grad_norm": 0.3696322441101074, + "learning_rate": 5.6069331221644284e-05, + "loss": 1.8839, + "step": 8531 + }, + { + "epoch": 0.9, + "grad_norm": 0.3475489914417267, + "learning_rate": 5.599156458307712e-05, + "loss": 1.871, + "step": 8532 + }, + { + "epoch": 0.9001054852320675, + "grad_norm": 0.33253321051597595, + "learning_rate": 5.5913905804724106e-05, + "loss": 1.8479, + "step": 8533 + }, + { + "epoch": 0.900210970464135, + "grad_norm": 0.3749080300331116, + "learning_rate": 5.58363547369861e-05, + "loss": 1.9098, + "step": 8534 + }, + { + "epoch": 0.9003164556962026, + "grad_norm": 0.3603401780128479, + "learning_rate": 5.575891123047136e-05, + "loss": 1.859, + "step": 8535 + }, + { + "epoch": 0.90042194092827, + "grad_norm": 0.355351984500885, + "learning_rate": 5.568157513599542e-05, + "loss": 1.8875, + "step": 8536 + }, + { + "epoch": 0.9005274261603375, + "grad_norm": 0.34050822257995605, + "learning_rate": 5.5604346304580727e-05, + "loss": 1.8939, + "step": 8537 + }, + { + "epoch": 0.9006329113924051, + "grad_norm": 0.3363865911960602, + "learning_rate": 5.552722458745626e-05, + "loss": 1.8674, + "step": 8538 + }, + { + "epoch": 0.9007383966244725, + "grad_norm": 0.36293837428092957, + "learning_rate": 5.545020983605749e-05, + "loss": 1.8488, + "step": 8539 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.3514556288719177, + "learning_rate": 5.53733019020258e-05, + "loss": 1.8887, + "step": 8540 + }, + { + "epoch": 0.9009493670886076, + "grad_norm": 0.35932305455207825, + "learning_rate": 5.529650063720842e-05, + "loss": 1.8787, + "step": 8541 + }, + { + "epoch": 0.9010548523206751, + "grad_norm": 0.33526411652565, + "learning_rate": 5.52198058936581e-05, + "loss": 1.88, + "step": 8542 + }, + { + "epoch": 0.9011603375527426, + "grad_norm": 0.3509579300880432, + "learning_rate": 5.5143217523632655e-05, + "loss": 1.8486, + "step": 8543 + }, + { + "epoch": 0.9012658227848102, + "grad_norm": 0.35678336024284363, + "learning_rate": 5.5066735379594944e-05, + "loss": 1.891, + "step": 8544 + }, + { + "epoch": 0.9013713080168776, + "grad_norm": 0.3440455198287964, + "learning_rate": 5.4990359314212424e-05, + "loss": 1.8363, + "step": 8545 + }, + { + "epoch": 0.9014767932489451, + "grad_norm": 0.3518320620059967, + "learning_rate": 5.491408918035683e-05, + "loss": 1.8838, + "step": 8546 + }, + { + "epoch": 0.9015822784810127, + "grad_norm": 0.3521043360233307, + "learning_rate": 5.483792483110408e-05, + "loss": 1.8439, + "step": 8547 + }, + { + "epoch": 0.9016877637130801, + "grad_norm": 0.3427799344062805, + "learning_rate": 5.476186611973374e-05, + "loss": 1.8812, + "step": 8548 + }, + { + "epoch": 0.9017932489451477, + "grad_norm": 0.35440245270729065, + "learning_rate": 5.4685912899728965e-05, + "loss": 1.8495, + "step": 8549 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.3558730483055115, + "learning_rate": 5.4610065024776125e-05, + "loss": 1.8916, + "step": 8550 + }, + { + "epoch": 0.9020042194092827, + "grad_norm": 0.3371177613735199, + "learning_rate": 5.453432234876445e-05, + "loss": 1.8511, + "step": 8551 + }, + { + "epoch": 0.9021097046413502, + "grad_norm": 0.3407037556171417, + "learning_rate": 5.445868472578592e-05, + "loss": 1.8255, + "step": 8552 + }, + { + "epoch": 0.9022151898734178, + "grad_norm": 0.34911638498306274, + "learning_rate": 5.438315201013476e-05, + "loss": 1.8637, + "step": 8553 + }, + { + "epoch": 0.9023206751054852, + "grad_norm": 0.3465733826160431, + "learning_rate": 5.430772405630742e-05, + "loss": 1.8877, + "step": 8554 + }, + { + "epoch": 0.9024261603375527, + "grad_norm": 0.356247216463089, + "learning_rate": 5.423240071900209e-05, + "loss": 1.8591, + "step": 8555 + }, + { + "epoch": 0.9025316455696203, + "grad_norm": 0.37166187167167664, + "learning_rate": 5.4157181853118464e-05, + "loss": 1.8593, + "step": 8556 + }, + { + "epoch": 0.9026371308016877, + "grad_norm": 0.3661405146121979, + "learning_rate": 5.408206731375757e-05, + "loss": 1.814, + "step": 8557 + }, + { + "epoch": 0.9027426160337553, + "grad_norm": 0.33805105090141296, + "learning_rate": 5.400705695622129e-05, + "loss": 1.8331, + "step": 8558 + }, + { + "epoch": 0.9028481012658228, + "grad_norm": 0.3617623448371887, + "learning_rate": 5.39321506360123e-05, + "loss": 1.8592, + "step": 8559 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.3706520199775696, + "learning_rate": 5.38573482088337e-05, + "loss": 1.8613, + "step": 8560 + }, + { + "epoch": 0.9030590717299578, + "grad_norm": 0.33694419264793396, + "learning_rate": 5.37826495305886e-05, + "loss": 1.8729, + "step": 8561 + }, + { + "epoch": 0.9031645569620254, + "grad_norm": 0.3439914584159851, + "learning_rate": 5.370805445738011e-05, + "loss": 1.8737, + "step": 8562 + }, + { + "epoch": 0.9032700421940928, + "grad_norm": 0.3720777928829193, + "learning_rate": 5.3633562845510806e-05, + "loss": 1.8565, + "step": 8563 + }, + { + "epoch": 0.9033755274261603, + "grad_norm": 0.3770931661128998, + "learning_rate": 5.3559174551482656e-05, + "loss": 1.8544, + "step": 8564 + }, + { + "epoch": 0.9034810126582279, + "grad_norm": 0.3748472332954407, + "learning_rate": 5.3484889431996646e-05, + "loss": 1.8538, + "step": 8565 + }, + { + "epoch": 0.9035864978902953, + "grad_norm": 0.34414103627204895, + "learning_rate": 5.341070734395244e-05, + "loss": 1.8884, + "step": 8566 + }, + { + "epoch": 0.9036919831223629, + "grad_norm": 0.36085575819015503, + "learning_rate": 5.3336628144448266e-05, + "loss": 1.86, + "step": 8567 + }, + { + "epoch": 0.9037974683544304, + "grad_norm": 0.38399025797843933, + "learning_rate": 5.326265169078048e-05, + "loss": 1.8664, + "step": 8568 + }, + { + "epoch": 0.9039029535864979, + "grad_norm": 0.38434699177742004, + "learning_rate": 5.318877784044342e-05, + "loss": 1.88, + "step": 8569 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.3522164821624756, + "learning_rate": 5.3115006451129075e-05, + "loss": 1.8561, + "step": 8570 + }, + { + "epoch": 0.904113924050633, + "grad_norm": 0.34315282106399536, + "learning_rate": 5.304133738072674e-05, + "loss": 1.8539, + "step": 8571 + }, + { + "epoch": 0.9042194092827004, + "grad_norm": 0.33191123604774475, + "learning_rate": 5.296777048732293e-05, + "loss": 1.8338, + "step": 8572 + }, + { + "epoch": 0.9043248945147679, + "grad_norm": 0.3358452022075653, + "learning_rate": 5.289430562920086e-05, + "loss": 1.8742, + "step": 8573 + }, + { + "epoch": 0.9044303797468355, + "grad_norm": 0.34603258967399597, + "learning_rate": 5.2820942664840405e-05, + "loss": 1.8837, + "step": 8574 + }, + { + "epoch": 0.9045358649789029, + "grad_norm": 0.34117355942726135, + "learning_rate": 5.2747681452917697e-05, + "loss": 1.8678, + "step": 8575 + }, + { + "epoch": 0.9046413502109705, + "grad_norm": 0.36057236790657043, + "learning_rate": 5.267452185230482e-05, + "loss": 1.8533, + "step": 8576 + }, + { + "epoch": 0.904746835443038, + "grad_norm": 0.3623771369457245, + "learning_rate": 5.260146372206972e-05, + "loss": 1.8759, + "step": 8577 + }, + { + "epoch": 0.9048523206751055, + "grad_norm": 0.3469369411468506, + "learning_rate": 5.2528506921475664e-05, + "loss": 1.8424, + "step": 8578 + }, + { + "epoch": 0.904957805907173, + "grad_norm": 0.34074780344963074, + "learning_rate": 5.245565130998124e-05, + "loss": 1.8887, + "step": 8579 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.3248576819896698, + "learning_rate": 5.2382896747239935e-05, + "loss": 1.8697, + "step": 8580 + }, + { + "epoch": 0.905168776371308, + "grad_norm": 0.3376217782497406, + "learning_rate": 5.2310243093099814e-05, + "loss": 1.8461, + "step": 8581 + }, + { + "epoch": 0.9052742616033755, + "grad_norm": 0.3392743170261383, + "learning_rate": 5.223769020760346e-05, + "loss": 1.8881, + "step": 8582 + }, + { + "epoch": 0.9053797468354431, + "grad_norm": 0.3689427375793457, + "learning_rate": 5.216523795098743e-05, + "loss": 1.8752, + "step": 8583 + }, + { + "epoch": 0.9054852320675105, + "grad_norm": 0.3300771415233612, + "learning_rate": 5.209288618368225e-05, + "loss": 1.8598, + "step": 8584 + }, + { + "epoch": 0.9055907172995781, + "grad_norm": 0.35926470160484314, + "learning_rate": 5.202063476631199e-05, + "loss": 1.8603, + "step": 8585 + }, + { + "epoch": 0.9056962025316456, + "grad_norm": 0.33220112323760986, + "learning_rate": 5.194848355969396e-05, + "loss": 1.8554, + "step": 8586 + }, + { + "epoch": 0.9058016877637131, + "grad_norm": 0.34394845366477966, + "learning_rate": 5.18764324248386e-05, + "loss": 1.8859, + "step": 8587 + }, + { + "epoch": 0.9059071729957806, + "grad_norm": 0.33274152874946594, + "learning_rate": 5.180448122294913e-05, + "loss": 1.8663, + "step": 8588 + }, + { + "epoch": 0.9060126582278482, + "grad_norm": 0.36067837476730347, + "learning_rate": 5.173262981542119e-05, + "loss": 1.8834, + "step": 8589 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.35261139273643494, + "learning_rate": 5.166087806384275e-05, + "loss": 1.8832, + "step": 8590 + }, + { + "epoch": 0.9062236286919831, + "grad_norm": 0.3357314169406891, + "learning_rate": 5.158922582999368e-05, + "loss": 1.857, + "step": 8591 + }, + { + "epoch": 0.9063291139240506, + "grad_norm": 0.36374348402023315, + "learning_rate": 5.1517672975845604e-05, + "loss": 1.8609, + "step": 8592 + }, + { + "epoch": 0.9064345991561181, + "grad_norm": 0.3569132685661316, + "learning_rate": 5.144621936356162e-05, + "loss": 1.8819, + "step": 8593 + }, + { + "epoch": 0.9065400843881857, + "grad_norm": 0.335563600063324, + "learning_rate": 5.1374864855495894e-05, + "loss": 1.8508, + "step": 8594 + }, + { + "epoch": 0.9066455696202531, + "grad_norm": 0.3496129512786865, + "learning_rate": 5.130360931419364e-05, + "loss": 1.9105, + "step": 8595 + }, + { + "epoch": 0.9067510548523207, + "grad_norm": 0.344830185174942, + "learning_rate": 5.123245260239058e-05, + "loss": 1.7826, + "step": 8596 + }, + { + "epoch": 0.9068565400843882, + "grad_norm": 0.34248965978622437, + "learning_rate": 5.1161394583012904e-05, + "loss": 1.845, + "step": 8597 + }, + { + "epoch": 0.9069620253164556, + "grad_norm": 0.3357292413711548, + "learning_rate": 5.109043511917694e-05, + "loss": 1.8777, + "step": 8598 + }, + { + "epoch": 0.9070675105485232, + "grad_norm": 0.3813318610191345, + "learning_rate": 5.101957407418877e-05, + "loss": 1.8678, + "step": 8599 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.3608510196208954, + "learning_rate": 5.0948811311544186e-05, + "loss": 1.8942, + "step": 8600 + }, + { + "epoch": 0.9072784810126582, + "grad_norm": 0.36402684450149536, + "learning_rate": 5.087814669492819e-05, + "loss": 1.8821, + "step": 8601 + }, + { + "epoch": 0.9073839662447257, + "grad_norm": 0.33174028992652893, + "learning_rate": 5.080758008821494e-05, + "loss": 1.8716, + "step": 8602 + }, + { + "epoch": 0.9074894514767933, + "grad_norm": 0.33929863572120667, + "learning_rate": 5.073711135546738e-05, + "loss": 1.8656, + "step": 8603 + }, + { + "epoch": 0.9075949367088607, + "grad_norm": 0.3615324795246124, + "learning_rate": 5.0666740360936944e-05, + "loss": 1.8963, + "step": 8604 + }, + { + "epoch": 0.9077004219409283, + "grad_norm": 0.3441731631755829, + "learning_rate": 5.0596466969063415e-05, + "loss": 1.8525, + "step": 8605 + }, + { + "epoch": 0.9078059071729958, + "grad_norm": 0.33910563588142395, + "learning_rate": 5.052629104447452e-05, + "loss": 1.853, + "step": 8606 + }, + { + "epoch": 0.9079113924050632, + "grad_norm": 0.347074031829834, + "learning_rate": 5.0456212451985806e-05, + "loss": 1.8636, + "step": 8607 + }, + { + "epoch": 0.9080168776371308, + "grad_norm": 0.35596993565559387, + "learning_rate": 5.038623105660033e-05, + "loss": 1.8567, + "step": 8608 + }, + { + "epoch": 0.9081223628691983, + "grad_norm": 0.33683332800865173, + "learning_rate": 5.0316346723508287e-05, + "loss": 1.8818, + "step": 8609 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.3400604724884033, + "learning_rate": 5.024655931808697e-05, + "loss": 1.8502, + "step": 8610 + }, + { + "epoch": 0.9083333333333333, + "grad_norm": 0.3508000373840332, + "learning_rate": 5.017686870590028e-05, + "loss": 1.8732, + "step": 8611 + }, + { + "epoch": 0.9084388185654009, + "grad_norm": 0.3516567051410675, + "learning_rate": 5.010727475269867e-05, + "loss": 1.8785, + "step": 8612 + }, + { + "epoch": 0.9085443037974683, + "grad_norm": 0.346305787563324, + "learning_rate": 5.0037777324418756e-05, + "loss": 1.8338, + "step": 8613 + }, + { + "epoch": 0.9086497890295359, + "grad_norm": 0.34959158301353455, + "learning_rate": 4.9968376287183074e-05, + "loss": 1.8818, + "step": 8614 + }, + { + "epoch": 0.9087552742616034, + "grad_norm": 0.3550531268119812, + "learning_rate": 4.989907150729989e-05, + "loss": 1.8214, + "step": 8615 + }, + { + "epoch": 0.9088607594936708, + "grad_norm": 0.3464187681674957, + "learning_rate": 4.9829862851262845e-05, + "loss": 1.8747, + "step": 8616 + }, + { + "epoch": 0.9089662447257384, + "grad_norm": 0.3465474545955658, + "learning_rate": 4.976075018575077e-05, + "loss": 1.8545, + "step": 8617 + }, + { + "epoch": 0.9090717299578059, + "grad_norm": 0.3460406959056854, + "learning_rate": 4.9691733377627475e-05, + "loss": 1.8852, + "step": 8618 + }, + { + "epoch": 0.9091772151898734, + "grad_norm": 0.34175044298171997, + "learning_rate": 4.962281229394129e-05, + "loss": 1.8332, + "step": 8619 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.3377409875392914, + "learning_rate": 4.955398680192509e-05, + "loss": 1.8703, + "step": 8620 + }, + { + "epoch": 0.9093881856540085, + "grad_norm": 0.338579922914505, + "learning_rate": 4.948525676899577e-05, + "loss": 1.8787, + "step": 8621 + }, + { + "epoch": 0.9094936708860759, + "grad_norm": 0.34821662306785583, + "learning_rate": 4.9416622062754195e-05, + "loss": 1.8428, + "step": 8622 + }, + { + "epoch": 0.9095991561181435, + "grad_norm": 0.3364161252975464, + "learning_rate": 4.934808255098487e-05, + "loss": 1.8659, + "step": 8623 + }, + { + "epoch": 0.909704641350211, + "grad_norm": 0.3369210362434387, + "learning_rate": 4.92796381016556e-05, + "loss": 1.8557, + "step": 8624 + }, + { + "epoch": 0.9098101265822784, + "grad_norm": 0.33793336153030396, + "learning_rate": 4.9211288582917396e-05, + "loss": 1.8686, + "step": 8625 + }, + { + "epoch": 0.909915611814346, + "grad_norm": 0.3429228663444519, + "learning_rate": 4.9143033863104094e-05, + "loss": 1.8724, + "step": 8626 + }, + { + "epoch": 0.9100210970464135, + "grad_norm": 0.3622226119041443, + "learning_rate": 4.907487381073214e-05, + "loss": 1.8985, + "step": 8627 + }, + { + "epoch": 0.910126582278481, + "grad_norm": 0.334748238325119, + "learning_rate": 4.900680829450043e-05, + "loss": 1.8281, + "step": 8628 + }, + { + "epoch": 0.9102320675105485, + "grad_norm": 0.35108694434165955, + "learning_rate": 4.893883718328984e-05, + "loss": 1.8895, + "step": 8629 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.34718984365463257, + "learning_rate": 4.887096034616319e-05, + "loss": 1.9311, + "step": 8630 + }, + { + "epoch": 0.9104430379746835, + "grad_norm": 0.34555038809776306, + "learning_rate": 4.880317765236493e-05, + "loss": 1.8905, + "step": 8631 + }, + { + "epoch": 0.9105485232067511, + "grad_norm": 0.3516028821468353, + "learning_rate": 4.873548897132076e-05, + "loss": 1.8332, + "step": 8632 + }, + { + "epoch": 0.9106540084388186, + "grad_norm": 0.37018364667892456, + "learning_rate": 4.8667894172637606e-05, + "loss": 1.8438, + "step": 8633 + }, + { + "epoch": 0.910759493670886, + "grad_norm": 0.35490214824676514, + "learning_rate": 4.860039312610312e-05, + "loss": 1.882, + "step": 8634 + }, + { + "epoch": 0.9108649789029536, + "grad_norm": 0.354737251996994, + "learning_rate": 4.8532985701685654e-05, + "loss": 1.8511, + "step": 8635 + }, + { + "epoch": 0.9109704641350211, + "grad_norm": 0.35962435603141785, + "learning_rate": 4.846567176953389e-05, + "loss": 1.8106, + "step": 8636 + }, + { + "epoch": 0.9110759493670886, + "grad_norm": 0.3489286005496979, + "learning_rate": 4.839845119997657e-05, + "loss": 1.867, + "step": 8637 + }, + { + "epoch": 0.9111814345991561, + "grad_norm": 0.35514241456985474, + "learning_rate": 4.833132386352234e-05, + "loss": 1.8543, + "step": 8638 + }, + { + "epoch": 0.9112869198312237, + "grad_norm": 0.3536839485168457, + "learning_rate": 4.8264289630859386e-05, + "loss": 1.8563, + "step": 8639 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.3588424026966095, + "learning_rate": 4.819734837285529e-05, + "loss": 1.8483, + "step": 8640 + }, + { + "epoch": 0.9114978902953587, + "grad_norm": 0.3515063226222992, + "learning_rate": 4.8130499960556755e-05, + "loss": 1.8604, + "step": 8641 + }, + { + "epoch": 0.9116033755274262, + "grad_norm": 0.3415443003177643, + "learning_rate": 4.806374426518927e-05, + "loss": 1.8695, + "step": 8642 + }, + { + "epoch": 0.9117088607594936, + "grad_norm": 0.3333544433116913, + "learning_rate": 4.799708115815702e-05, + "loss": 1.8544, + "step": 8643 + }, + { + "epoch": 0.9118143459915612, + "grad_norm": 0.3419317603111267, + "learning_rate": 4.793051051104244e-05, + "loss": 1.8348, + "step": 8644 + }, + { + "epoch": 0.9119198312236287, + "grad_norm": 0.355430006980896, + "learning_rate": 4.786403219560617e-05, + "loss": 1.8029, + "step": 8645 + }, + { + "epoch": 0.9120253164556962, + "grad_norm": 0.3498598337173462, + "learning_rate": 4.779764608378671e-05, + "loss": 1.8753, + "step": 8646 + }, + { + "epoch": 0.9121308016877637, + "grad_norm": 0.3567068576812744, + "learning_rate": 4.7731352047700095e-05, + "loss": 1.8654, + "step": 8647 + }, + { + "epoch": 0.9122362869198313, + "grad_norm": 0.36011219024658203, + "learning_rate": 4.7665149959639824e-05, + "loss": 1.9098, + "step": 8648 + }, + { + "epoch": 0.9123417721518987, + "grad_norm": 0.3411653935909271, + "learning_rate": 4.759903969207646e-05, + "loss": 1.8817, + "step": 8649 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.3497137129306793, + "learning_rate": 4.7533021117657475e-05, + "loss": 1.8834, + "step": 8650 + }, + { + "epoch": 0.9125527426160338, + "grad_norm": 0.3518518805503845, + "learning_rate": 4.746709410920699e-05, + "loss": 1.87, + "step": 8651 + }, + { + "epoch": 0.9126582278481012, + "grad_norm": 0.3522239327430725, + "learning_rate": 4.740125853972546e-05, + "loss": 1.896, + "step": 8652 + }, + { + "epoch": 0.9127637130801688, + "grad_norm": 0.3485839366912842, + "learning_rate": 4.733551428238957e-05, + "loss": 1.857, + "step": 8653 + }, + { + "epoch": 0.9128691983122363, + "grad_norm": 0.3492329716682434, + "learning_rate": 4.726986121055179e-05, + "loss": 1.9021, + "step": 8654 + }, + { + "epoch": 0.9129746835443038, + "grad_norm": 0.3761976659297943, + "learning_rate": 4.720429919774036e-05, + "loss": 1.8605, + "step": 8655 + }, + { + "epoch": 0.9130801687763713, + "grad_norm": 0.3519754707813263, + "learning_rate": 4.713882811765889e-05, + "loss": 1.8474, + "step": 8656 + }, + { + "epoch": 0.9131856540084389, + "grad_norm": 0.3650324046611786, + "learning_rate": 4.7073447844186114e-05, + "loss": 1.8308, + "step": 8657 + }, + { + "epoch": 0.9132911392405063, + "grad_norm": 0.36544397473335266, + "learning_rate": 4.700815825137578e-05, + "loss": 1.8453, + "step": 8658 + }, + { + "epoch": 0.9133966244725739, + "grad_norm": 0.3346247673034668, + "learning_rate": 4.694295921345623e-05, + "loss": 1.8414, + "step": 8659 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.3963053226470947, + "learning_rate": 4.687785060483031e-05, + "loss": 1.8762, + "step": 8660 + }, + { + "epoch": 0.9136075949367088, + "grad_norm": 0.3263835310935974, + "learning_rate": 4.681283230007507e-05, + "loss": 1.8261, + "step": 8661 + }, + { + "epoch": 0.9137130801687764, + "grad_norm": 0.3539985120296478, + "learning_rate": 4.674790417394145e-05, + "loss": 1.8645, + "step": 8662 + }, + { + "epoch": 0.9138185654008438, + "grad_norm": 0.36061811447143555, + "learning_rate": 4.6683066101354215e-05, + "loss": 1.9089, + "step": 8663 + }, + { + "epoch": 0.9139240506329114, + "grad_norm": 0.34955164790153503, + "learning_rate": 4.661831795741148e-05, + "loss": 1.874, + "step": 8664 + }, + { + "epoch": 0.9140295358649789, + "grad_norm": 0.35233190655708313, + "learning_rate": 4.655365961738467e-05, + "loss": 1.8858, + "step": 8665 + }, + { + "epoch": 0.9141350210970464, + "grad_norm": 0.34019365906715393, + "learning_rate": 4.648909095671825e-05, + "loss": 1.856, + "step": 8666 + }, + { + "epoch": 0.9142405063291139, + "grad_norm": 0.34298643469810486, + "learning_rate": 4.6424611851029316e-05, + "loss": 1.8593, + "step": 8667 + }, + { + "epoch": 0.9143459915611815, + "grad_norm": 0.35041719675064087, + "learning_rate": 4.63602221761076e-05, + "loss": 1.8465, + "step": 8668 + }, + { + "epoch": 0.9144514767932489, + "grad_norm": 0.33823978900909424, + "learning_rate": 4.629592180791501e-05, + "loss": 1.8536, + "step": 8669 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.3414214849472046, + "learning_rate": 4.623171062258557e-05, + "loss": 1.8751, + "step": 8670 + }, + { + "epoch": 0.914662447257384, + "grad_norm": 0.3864794969558716, + "learning_rate": 4.616758849642509e-05, + "loss": 1.8945, + "step": 8671 + }, + { + "epoch": 0.9147679324894514, + "grad_norm": 0.3407982289791107, + "learning_rate": 4.610355530591087e-05, + "loss": 1.9114, + "step": 8672 + }, + { + "epoch": 0.914873417721519, + "grad_norm": 0.3391370475292206, + "learning_rate": 4.6039610927691646e-05, + "loss": 1.8479, + "step": 8673 + }, + { + "epoch": 0.9149789029535865, + "grad_norm": 0.33643922209739685, + "learning_rate": 4.597575523858712e-05, + "loss": 1.8568, + "step": 8674 + }, + { + "epoch": 0.915084388185654, + "grad_norm": 0.3629697859287262, + "learning_rate": 4.5911988115587936e-05, + "loss": 1.8899, + "step": 8675 + }, + { + "epoch": 0.9151898734177215, + "grad_norm": 0.35504913330078125, + "learning_rate": 4.584830943585533e-05, + "loss": 1.8562, + "step": 8676 + }, + { + "epoch": 0.9152953586497891, + "grad_norm": 0.3684694468975067, + "learning_rate": 4.5784719076720844e-05, + "loss": 1.8623, + "step": 8677 + }, + { + "epoch": 0.9154008438818565, + "grad_norm": 0.3431739807128906, + "learning_rate": 4.572121691568624e-05, + "loss": 1.881, + "step": 8678 + }, + { + "epoch": 0.915506329113924, + "grad_norm": 0.34779807925224304, + "learning_rate": 4.565780283042316e-05, + "loss": 1.9014, + "step": 8679 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.32732445001602173, + "learning_rate": 4.559447669877288e-05, + "loss": 1.8348, + "step": 8680 + }, + { + "epoch": 0.915717299578059, + "grad_norm": 0.34319692850112915, + "learning_rate": 4.553123839874615e-05, + "loss": 1.8855, + "step": 8681 + }, + { + "epoch": 0.9158227848101266, + "grad_norm": 0.3459620475769043, + "learning_rate": 4.546808780852286e-05, + "loss": 1.8598, + "step": 8682 + }, + { + "epoch": 0.9159282700421941, + "grad_norm": 0.3485899269580841, + "learning_rate": 4.5405024806451926e-05, + "loss": 1.8534, + "step": 8683 + }, + { + "epoch": 0.9160337552742616, + "grad_norm": 0.37529218196868896, + "learning_rate": 4.534204927105098e-05, + "loss": 1.8722, + "step": 8684 + }, + { + "epoch": 0.9161392405063291, + "grad_norm": 0.3417487144470215, + "learning_rate": 4.5279161081006076e-05, + "loss": 1.8777, + "step": 8685 + }, + { + "epoch": 0.9162447257383967, + "grad_norm": 0.3540792167186737, + "learning_rate": 4.521636011517162e-05, + "loss": 1.8628, + "step": 8686 + }, + { + "epoch": 0.9163502109704641, + "grad_norm": 0.3471720814704895, + "learning_rate": 4.515364625256998e-05, + "loss": 1.8514, + "step": 8687 + }, + { + "epoch": 0.9164556962025316, + "grad_norm": 0.36922386288642883, + "learning_rate": 4.5091019372391345e-05, + "loss": 1.8851, + "step": 8688 + }, + { + "epoch": 0.9165611814345992, + "grad_norm": 0.3549286723136902, + "learning_rate": 4.502847935399348e-05, + "loss": 1.9005, + "step": 8689 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.36493319272994995, + "learning_rate": 4.496602607690141e-05, + "loss": 1.8357, + "step": 8690 + }, + { + "epoch": 0.9167721518987342, + "grad_norm": 0.3445006310939789, + "learning_rate": 4.490365942080736e-05, + "loss": 1.9005, + "step": 8691 + }, + { + "epoch": 0.9168776371308017, + "grad_norm": 0.3500535190105438, + "learning_rate": 4.48413792655703e-05, + "loss": 1.8679, + "step": 8692 + }, + { + "epoch": 0.9169831223628692, + "grad_norm": 0.350360244512558, + "learning_rate": 4.4779185491215926e-05, + "loss": 1.8611, + "step": 8693 + }, + { + "epoch": 0.9170886075949367, + "grad_norm": 0.3478192389011383, + "learning_rate": 4.471707797793631e-05, + "loss": 1.8739, + "step": 8694 + }, + { + "epoch": 0.9171940928270043, + "grad_norm": 0.3516528606414795, + "learning_rate": 4.465505660608965e-05, + "loss": 1.8466, + "step": 8695 + }, + { + "epoch": 0.9172995780590717, + "grad_norm": 0.361005038022995, + "learning_rate": 4.459312125620017e-05, + "loss": 1.8593, + "step": 8696 + }, + { + "epoch": 0.9174050632911392, + "grad_norm": 0.36328795552253723, + "learning_rate": 4.4531271808957704e-05, + "loss": 1.8474, + "step": 8697 + }, + { + "epoch": 0.9175105485232068, + "grad_norm": 0.34877443313598633, + "learning_rate": 4.4469508145217626e-05, + "loss": 1.8663, + "step": 8698 + }, + { + "epoch": 0.9176160337552742, + "grad_norm": 0.3381713628768921, + "learning_rate": 4.440783014600059e-05, + "loss": 1.8942, + "step": 8699 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.3727704584598541, + "learning_rate": 4.434623769249217e-05, + "loss": 1.8593, + "step": 8700 + }, + { + "epoch": 0.9178270042194093, + "grad_norm": 0.34319955110549927, + "learning_rate": 4.428473066604285e-05, + "loss": 1.8625, + "step": 8701 + }, + { + "epoch": 0.9179324894514768, + "grad_norm": 0.34485653042793274, + "learning_rate": 4.422330894816757e-05, + "loss": 1.8635, + "step": 8702 + }, + { + "epoch": 0.9180379746835443, + "grad_norm": 0.36771059036254883, + "learning_rate": 4.4161972420545684e-05, + "loss": 1.8789, + "step": 8703 + }, + { + "epoch": 0.9181434599156119, + "grad_norm": 0.33913707733154297, + "learning_rate": 4.410072096502064e-05, + "loss": 1.8469, + "step": 8704 + }, + { + "epoch": 0.9182489451476793, + "grad_norm": 0.3621068596839905, + "learning_rate": 4.403955446359971e-05, + "loss": 1.8784, + "step": 8705 + }, + { + "epoch": 0.9183544303797468, + "grad_norm": 0.3691217005252838, + "learning_rate": 4.397847279845391e-05, + "loss": 1.8752, + "step": 8706 + }, + { + "epoch": 0.9184599156118144, + "grad_norm": 0.3621848225593567, + "learning_rate": 4.391747585191758e-05, + "loss": 1.8924, + "step": 8707 + }, + { + "epoch": 0.9185654008438818, + "grad_norm": 0.3475242853164673, + "learning_rate": 4.385656350648834e-05, + "loss": 1.8716, + "step": 8708 + }, + { + "epoch": 0.9186708860759494, + "grad_norm": 0.36372843384742737, + "learning_rate": 4.3795735644826776e-05, + "loss": 1.8542, + "step": 8709 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.3508041203022003, + "learning_rate": 4.373499214975615e-05, + "loss": 1.8904, + "step": 8710 + }, + { + "epoch": 0.9188818565400844, + "grad_norm": 0.3510255217552185, + "learning_rate": 4.367433290426233e-05, + "loss": 1.8828, + "step": 8711 + }, + { + "epoch": 0.9189873417721519, + "grad_norm": 0.3396248519420624, + "learning_rate": 4.361375779149342e-05, + "loss": 1.8647, + "step": 8712 + }, + { + "epoch": 0.9190928270042195, + "grad_norm": 0.3256036341190338, + "learning_rate": 4.3553266694759614e-05, + "loss": 1.8762, + "step": 8713 + }, + { + "epoch": 0.9191983122362869, + "grad_norm": 0.32935503125190735, + "learning_rate": 4.3492859497533e-05, + "loss": 1.8487, + "step": 8714 + }, + { + "epoch": 0.9193037974683544, + "grad_norm": 0.34450045228004456, + "learning_rate": 4.343253608344718e-05, + "loss": 1.8677, + "step": 8715 + }, + { + "epoch": 0.919409282700422, + "grad_norm": 0.34929731488227844, + "learning_rate": 4.337229633629727e-05, + "loss": 1.8322, + "step": 8716 + }, + { + "epoch": 0.9195147679324894, + "grad_norm": 0.35431620478630066, + "learning_rate": 4.3312140140039447e-05, + "loss": 1.8098, + "step": 8717 + }, + { + "epoch": 0.919620253164557, + "grad_norm": 0.3434685170650482, + "learning_rate": 4.3252067378790934e-05, + "loss": 1.8716, + "step": 8718 + }, + { + "epoch": 0.9197257383966245, + "grad_norm": 0.3493737578392029, + "learning_rate": 4.319207793682965e-05, + "loss": 1.8657, + "step": 8719 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.3416723608970642, + "learning_rate": 4.313217169859397e-05, + "loss": 1.8443, + "step": 8720 + }, + { + "epoch": 0.9199367088607595, + "grad_norm": 0.33953043818473816, + "learning_rate": 4.3072348548682595e-05, + "loss": 1.8918, + "step": 8721 + }, + { + "epoch": 0.9200421940928271, + "grad_norm": 0.3563733696937561, + "learning_rate": 4.3012608371854326e-05, + "loss": 1.8977, + "step": 8722 + }, + { + "epoch": 0.9201476793248945, + "grad_norm": 0.342715322971344, + "learning_rate": 4.2952951053027684e-05, + "loss": 1.8741, + "step": 8723 + }, + { + "epoch": 0.920253164556962, + "grad_norm": 0.35217320919036865, + "learning_rate": 4.2893376477280934e-05, + "loss": 1.9063, + "step": 8724 + }, + { + "epoch": 0.9203586497890295, + "grad_norm": 0.3363710939884186, + "learning_rate": 4.283388452985162e-05, + "loss": 1.8458, + "step": 8725 + }, + { + "epoch": 0.920464135021097, + "grad_norm": 0.32919296622276306, + "learning_rate": 4.2774475096136525e-05, + "loss": 1.8253, + "step": 8726 + }, + { + "epoch": 0.9205696202531646, + "grad_norm": 0.3647512197494507, + "learning_rate": 4.271514806169141e-05, + "loss": 1.8522, + "step": 8727 + }, + { + "epoch": 0.920675105485232, + "grad_norm": 0.35393911600112915, + "learning_rate": 4.265590331223067e-05, + "loss": 1.8339, + "step": 8728 + }, + { + "epoch": 0.9207805907172996, + "grad_norm": 0.34656718373298645, + "learning_rate": 4.259674073362732e-05, + "loss": 1.9219, + "step": 8729 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.35004255175590515, + "learning_rate": 4.253766021191256e-05, + "loss": 1.8561, + "step": 8730 + }, + { + "epoch": 0.9209915611814345, + "grad_norm": 0.3466360569000244, + "learning_rate": 4.247866163327575e-05, + "loss": 1.8533, + "step": 8731 + }, + { + "epoch": 0.9210970464135021, + "grad_norm": 0.36627352237701416, + "learning_rate": 4.241974488406408e-05, + "loss": 1.8599, + "step": 8732 + }, + { + "epoch": 0.9212025316455696, + "grad_norm": 0.3504071831703186, + "learning_rate": 4.236090985078232e-05, + "loss": 1.8899, + "step": 8733 + }, + { + "epoch": 0.9213080168776371, + "grad_norm": 0.3377840220928192, + "learning_rate": 4.230215642009275e-05, + "loss": 1.8502, + "step": 8734 + }, + { + "epoch": 0.9214135021097046, + "grad_norm": 0.3551312983036041, + "learning_rate": 4.224348447881473e-05, + "loss": 1.8378, + "step": 8735 + }, + { + "epoch": 0.9215189873417722, + "grad_norm": 0.3498280644416809, + "learning_rate": 4.218489391392469e-05, + "loss": 1.8538, + "step": 8736 + }, + { + "epoch": 0.9216244725738396, + "grad_norm": 0.34575676918029785, + "learning_rate": 4.212638461255582e-05, + "loss": 1.8381, + "step": 8737 + }, + { + "epoch": 0.9217299578059072, + "grad_norm": 0.3426664471626282, + "learning_rate": 4.206795646199778e-05, + "loss": 1.8654, + "step": 8738 + }, + { + "epoch": 0.9218354430379747, + "grad_norm": 0.3367648124694824, + "learning_rate": 4.200960934969664e-05, + "loss": 1.8695, + "step": 8739 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.3611074984073639, + "learning_rate": 4.19513431632545e-05, + "loss": 1.8519, + "step": 8740 + }, + { + "epoch": 0.9220464135021097, + "grad_norm": 0.35397106409072876, + "learning_rate": 4.1893157790429404e-05, + "loss": 1.8645, + "step": 8741 + }, + { + "epoch": 0.9221518987341772, + "grad_norm": 0.3493363559246063, + "learning_rate": 4.1835053119135096e-05, + "loss": 1.8431, + "step": 8742 + }, + { + "epoch": 0.9222573839662447, + "grad_norm": 0.35510754585266113, + "learning_rate": 4.17770290374407e-05, + "loss": 1.8225, + "step": 8743 + }, + { + "epoch": 0.9223628691983122, + "grad_norm": 0.32793495059013367, + "learning_rate": 4.171908543357067e-05, + "loss": 1.8619, + "step": 8744 + }, + { + "epoch": 0.9224683544303798, + "grad_norm": 0.33601614832878113, + "learning_rate": 4.166122219590441e-05, + "loss": 1.886, + "step": 8745 + }, + { + "epoch": 0.9225738396624472, + "grad_norm": 0.3440276086330414, + "learning_rate": 4.1603439212976205e-05, + "loss": 1.8863, + "step": 8746 + }, + { + "epoch": 0.9226793248945148, + "grad_norm": 0.3601619303226471, + "learning_rate": 4.1545736373474935e-05, + "loss": 1.8711, + "step": 8747 + }, + { + "epoch": 0.9227848101265823, + "grad_norm": 0.33332470059394836, + "learning_rate": 4.148811356624379e-05, + "loss": 1.8456, + "step": 8748 + }, + { + "epoch": 0.9228902953586497, + "grad_norm": 0.3776458203792572, + "learning_rate": 4.143057068028024e-05, + "loss": 1.8706, + "step": 8749 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.3431142568588257, + "learning_rate": 4.1373107604735626e-05, + "loss": 1.8798, + "step": 8750 + }, + { + "epoch": 0.9231012658227848, + "grad_norm": 0.3326657712459564, + "learning_rate": 4.1315724228915066e-05, + "loss": 1.8313, + "step": 8751 + }, + { + "epoch": 0.9232067510548523, + "grad_norm": 0.37708431482315063, + "learning_rate": 4.125842044227725e-05, + "loss": 1.8765, + "step": 8752 + }, + { + "epoch": 0.9233122362869198, + "grad_norm": 0.34694162011146545, + "learning_rate": 4.120119613443409e-05, + "loss": 1.8312, + "step": 8753 + }, + { + "epoch": 0.9234177215189874, + "grad_norm": 0.34711185097694397, + "learning_rate": 4.114405119515069e-05, + "loss": 1.8486, + "step": 8754 + }, + { + "epoch": 0.9235232067510548, + "grad_norm": 0.33898431062698364, + "learning_rate": 4.1086985514344996e-05, + "loss": 1.8621, + "step": 8755 + }, + { + "epoch": 0.9236286919831224, + "grad_norm": 0.3311728537082672, + "learning_rate": 4.102999898208766e-05, + "loss": 1.842, + "step": 8756 + }, + { + "epoch": 0.9237341772151899, + "grad_norm": 0.3274940252304077, + "learning_rate": 4.0973091488601826e-05, + "loss": 1.8752, + "step": 8757 + }, + { + "epoch": 0.9238396624472573, + "grad_norm": 0.35166993737220764, + "learning_rate": 4.091626292426282e-05, + "loss": 1.7919, + "step": 8758 + }, + { + "epoch": 0.9239451476793249, + "grad_norm": 0.34898656606674194, + "learning_rate": 4.0859513179598096e-05, + "loss": 1.865, + "step": 8759 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.3483086824417114, + "learning_rate": 4.0802842145286876e-05, + "loss": 1.846, + "step": 8760 + }, + { + "epoch": 0.9241561181434599, + "grad_norm": 0.35850584506988525, + "learning_rate": 4.074624971216005e-05, + "loss": 1.8749, + "step": 8761 + }, + { + "epoch": 0.9242616033755274, + "grad_norm": 0.34625008702278137, + "learning_rate": 4.0689735771199944e-05, + "loss": 1.8288, + "step": 8762 + }, + { + "epoch": 0.924367088607595, + "grad_norm": 0.33492088317871094, + "learning_rate": 4.0633300213540004e-05, + "loss": 1.9046, + "step": 8763 + }, + { + "epoch": 0.9244725738396624, + "grad_norm": 0.34259846806526184, + "learning_rate": 4.057694293046475e-05, + "loss": 1.8636, + "step": 8764 + }, + { + "epoch": 0.92457805907173, + "grad_norm": 0.3445339798927307, + "learning_rate": 4.052066381340948e-05, + "loss": 1.849, + "step": 8765 + }, + { + "epoch": 0.9246835443037975, + "grad_norm": 0.36351242661476135, + "learning_rate": 4.0464462753960006e-05, + "loss": 1.8526, + "step": 8766 + }, + { + "epoch": 0.924789029535865, + "grad_norm": 0.34142330288887024, + "learning_rate": 4.040833964385259e-05, + "loss": 1.8839, + "step": 8767 + }, + { + "epoch": 0.9248945147679325, + "grad_norm": 0.3477879464626312, + "learning_rate": 4.035229437497357e-05, + "loss": 1.8592, + "step": 8768 + }, + { + "epoch": 0.925, + "grad_norm": 0.3548937439918518, + "learning_rate": 4.02963268393593e-05, + "loss": 1.8497, + "step": 8769 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.3506772220134735, + "learning_rate": 4.024043692919589e-05, + "loss": 1.8268, + "step": 8770 + }, + { + "epoch": 0.925210970464135, + "grad_norm": 0.33368635177612305, + "learning_rate": 4.018462453681889e-05, + "loss": 1.8808, + "step": 8771 + }, + { + "epoch": 0.9253164556962026, + "grad_norm": 0.3589679002761841, + "learning_rate": 4.0128889554713276e-05, + "loss": 1.8335, + "step": 8772 + }, + { + "epoch": 0.92542194092827, + "grad_norm": 0.35047242045402527, + "learning_rate": 4.007323187551308e-05, + "loss": 1.8688, + "step": 8773 + }, + { + "epoch": 0.9255274261603376, + "grad_norm": 0.3457986116409302, + "learning_rate": 4.0017651392001285e-05, + "loss": 1.8446, + "step": 8774 + }, + { + "epoch": 0.9256329113924051, + "grad_norm": 0.34780099987983704, + "learning_rate": 3.9962147997109584e-05, + "loss": 1.8699, + "step": 8775 + }, + { + "epoch": 0.9257383966244725, + "grad_norm": 0.35665157437324524, + "learning_rate": 3.990672158391812e-05, + "loss": 1.87, + "step": 8776 + }, + { + "epoch": 0.9258438818565401, + "grad_norm": 0.34194257855415344, + "learning_rate": 3.9851372045655414e-05, + "loss": 1.8599, + "step": 8777 + }, + { + "epoch": 0.9259493670886076, + "grad_norm": 0.3492591381072998, + "learning_rate": 3.979609927569798e-05, + "loss": 1.8659, + "step": 8778 + }, + { + "epoch": 0.9260548523206751, + "grad_norm": 0.3458264172077179, + "learning_rate": 3.974090316757029e-05, + "loss": 1.8685, + "step": 8779 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.32955795526504517, + "learning_rate": 3.968578361494449e-05, + "loss": 1.8852, + "step": 8780 + }, + { + "epoch": 0.9262658227848102, + "grad_norm": 0.35940083861351013, + "learning_rate": 3.963074051164014e-05, + "loss": 1.8565, + "step": 8781 + }, + { + "epoch": 0.9263713080168776, + "grad_norm": 0.33771049976348877, + "learning_rate": 3.957577375162414e-05, + "loss": 1.9001, + "step": 8782 + }, + { + "epoch": 0.9264767932489452, + "grad_norm": 0.33305585384368896, + "learning_rate": 3.952088322901039e-05, + "loss": 1.8336, + "step": 8783 + }, + { + "epoch": 0.9265822784810127, + "grad_norm": 0.3590802848339081, + "learning_rate": 3.946606883805971e-05, + "loss": 1.8488, + "step": 8784 + }, + { + "epoch": 0.9266877637130801, + "grad_norm": 0.36709064245224, + "learning_rate": 3.941133047317956e-05, + "loss": 1.8751, + "step": 8785 + }, + { + "epoch": 0.9267932489451477, + "grad_norm": 0.346365749835968, + "learning_rate": 3.9356668028923825e-05, + "loss": 1.8583, + "step": 8786 + }, + { + "epoch": 0.9268987341772152, + "grad_norm": 0.32914450764656067, + "learning_rate": 3.930208139999269e-05, + "loss": 1.841, + "step": 8787 + }, + { + "epoch": 0.9270042194092827, + "grad_norm": 0.3532019555568695, + "learning_rate": 3.9247570481232314e-05, + "loss": 1.8575, + "step": 8788 + }, + { + "epoch": 0.9271097046413502, + "grad_norm": 0.3479084074497223, + "learning_rate": 3.919313516763478e-05, + "loss": 1.8631, + "step": 8789 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.3477379083633423, + "learning_rate": 3.91387753543378e-05, + "loss": 1.8725, + "step": 8790 + }, + { + "epoch": 0.9273206751054852, + "grad_norm": 0.3422442674636841, + "learning_rate": 3.908449093662446e-05, + "loss": 1.8725, + "step": 8791 + }, + { + "epoch": 0.9274261603375528, + "grad_norm": 0.34919309616088867, + "learning_rate": 3.9030281809923186e-05, + "loss": 1.843, + "step": 8792 + }, + { + "epoch": 0.9275316455696202, + "grad_norm": 0.3344503939151764, + "learning_rate": 3.897614786980734e-05, + "loss": 1.8686, + "step": 8793 + }, + { + "epoch": 0.9276371308016877, + "grad_norm": 0.32207396626472473, + "learning_rate": 3.892208901199521e-05, + "loss": 1.8464, + "step": 8794 + }, + { + "epoch": 0.9277426160337553, + "grad_norm": 0.3368719220161438, + "learning_rate": 3.886810513234967e-05, + "loss": 1.8507, + "step": 8795 + }, + { + "epoch": 0.9278481012658227, + "grad_norm": 0.3336300253868103, + "learning_rate": 3.881419612687803e-05, + "loss": 1.882, + "step": 8796 + }, + { + "epoch": 0.9279535864978903, + "grad_norm": 0.3382047712802887, + "learning_rate": 3.8760361891731874e-05, + "loss": 1.8737, + "step": 8797 + }, + { + "epoch": 0.9280590717299578, + "grad_norm": 0.339801162481308, + "learning_rate": 3.870660232320675e-05, + "loss": 1.8553, + "step": 8798 + }, + { + "epoch": 0.9281645569620253, + "grad_norm": 0.36121228337287903, + "learning_rate": 3.8652917317742106e-05, + "loss": 1.8472, + "step": 8799 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.3579988479614258, + "learning_rate": 3.859930677192103e-05, + "loss": 1.8641, + "step": 8800 + }, + { + "epoch": 0.9283755274261604, + "grad_norm": 0.3309142291545868, + "learning_rate": 3.854577058246998e-05, + "loss": 1.8847, + "step": 8801 + }, + { + "epoch": 0.9284810126582278, + "grad_norm": 0.34092745184898376, + "learning_rate": 3.8492308646258714e-05, + "loss": 1.853, + "step": 8802 + }, + { + "epoch": 0.9285864978902953, + "grad_norm": 0.3526779115200043, + "learning_rate": 3.843892086029999e-05, + "loss": 1.8666, + "step": 8803 + }, + { + "epoch": 0.9286919831223629, + "grad_norm": 0.33929169178009033, + "learning_rate": 3.8385607121749426e-05, + "loss": 1.8565, + "step": 8804 + }, + { + "epoch": 0.9287974683544303, + "grad_norm": 0.342775821685791, + "learning_rate": 3.83323673279053e-05, + "loss": 1.8607, + "step": 8805 + }, + { + "epoch": 0.9289029535864979, + "grad_norm": 0.3389759361743927, + "learning_rate": 3.827920137620828e-05, + "loss": 1.8684, + "step": 8806 + }, + { + "epoch": 0.9290084388185654, + "grad_norm": 0.3613205850124359, + "learning_rate": 3.822610916424134e-05, + "loss": 1.8696, + "step": 8807 + }, + { + "epoch": 0.9291139240506329, + "grad_norm": 0.34732866287231445, + "learning_rate": 3.81730905897295e-05, + "loss": 1.8948, + "step": 8808 + }, + { + "epoch": 0.9292194092827004, + "grad_norm": 0.344350665807724, + "learning_rate": 3.812014555053955e-05, + "loss": 1.9068, + "step": 8809 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.3476255536079407, + "learning_rate": 3.806727394468005e-05, + "loss": 1.9126, + "step": 8810 + }, + { + "epoch": 0.9294303797468354, + "grad_norm": 0.3343738615512848, + "learning_rate": 3.801447567030094e-05, + "loss": 1.8643, + "step": 8811 + }, + { + "epoch": 0.929535864978903, + "grad_norm": 0.35643598437309265, + "learning_rate": 3.796175062569344e-05, + "loss": 1.8428, + "step": 8812 + }, + { + "epoch": 0.9296413502109705, + "grad_norm": 0.3571321368217468, + "learning_rate": 3.790909870928989e-05, + "loss": 1.8566, + "step": 8813 + }, + { + "epoch": 0.9297468354430379, + "grad_norm": 0.35805797576904297, + "learning_rate": 3.785651981966341e-05, + "loss": 1.884, + "step": 8814 + }, + { + "epoch": 0.9298523206751055, + "grad_norm": 0.35342898964881897, + "learning_rate": 3.7804013855527896e-05, + "loss": 1.8414, + "step": 8815 + }, + { + "epoch": 0.929957805907173, + "grad_norm": 0.3403395414352417, + "learning_rate": 3.7751580715737614e-05, + "loss": 1.9033, + "step": 8816 + }, + { + "epoch": 0.9300632911392405, + "grad_norm": 0.3463051915168762, + "learning_rate": 3.7699220299287214e-05, + "loss": 1.8668, + "step": 8817 + }, + { + "epoch": 0.930168776371308, + "grad_norm": 0.3536339998245239, + "learning_rate": 3.764693250531141e-05, + "loss": 1.8522, + "step": 8818 + }, + { + "epoch": 0.9302742616033756, + "grad_norm": 0.34870290756225586, + "learning_rate": 3.759471723308477e-05, + "loss": 1.8561, + "step": 8819 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.35086846351623535, + "learning_rate": 3.7542574382021635e-05, + "loss": 1.8568, + "step": 8820 + }, + { + "epoch": 0.9304852320675105, + "grad_norm": 0.34685343503952026, + "learning_rate": 3.7490503851675777e-05, + "loss": 1.8868, + "step": 8821 + }, + { + "epoch": 0.9305907172995781, + "grad_norm": 0.3579290211200714, + "learning_rate": 3.7438505541740356e-05, + "loss": 1.9428, + "step": 8822 + }, + { + "epoch": 0.9306962025316455, + "grad_norm": 0.3484593331813812, + "learning_rate": 3.738657935204763e-05, + "loss": 1.8654, + "step": 8823 + }, + { + "epoch": 0.9308016877637131, + "grad_norm": 0.36168235540390015, + "learning_rate": 3.733472518256876e-05, + "loss": 1.898, + "step": 8824 + }, + { + "epoch": 0.9309071729957806, + "grad_norm": 0.3427902162075043, + "learning_rate": 3.7282942933413696e-05, + "loss": 1.8644, + "step": 8825 + }, + { + "epoch": 0.9310126582278481, + "grad_norm": 0.35809555649757385, + "learning_rate": 3.723123250483086e-05, + "loss": 1.8676, + "step": 8826 + }, + { + "epoch": 0.9311181434599156, + "grad_norm": 0.3424351215362549, + "learning_rate": 3.717959379720711e-05, + "loss": 1.8535, + "step": 8827 + }, + { + "epoch": 0.9312236286919832, + "grad_norm": 0.3495258092880249, + "learning_rate": 3.712802671106742e-05, + "loss": 1.889, + "step": 8828 + }, + { + "epoch": 0.9313291139240506, + "grad_norm": 0.3606933057308197, + "learning_rate": 3.707653114707471e-05, + "loss": 1.8614, + "step": 8829 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.3529948890209198, + "learning_rate": 3.702510700602975e-05, + "loss": 1.8328, + "step": 8830 + }, + { + "epoch": 0.9315400843881857, + "grad_norm": 0.34618350863456726, + "learning_rate": 3.6973754188870806e-05, + "loss": 1.8199, + "step": 8831 + }, + { + "epoch": 0.9316455696202531, + "grad_norm": 0.3179613947868347, + "learning_rate": 3.692247259667361e-05, + "loss": 1.8744, + "step": 8832 + }, + { + "epoch": 0.9317510548523207, + "grad_norm": 0.3332843780517578, + "learning_rate": 3.687126213065109e-05, + "loss": 1.8539, + "step": 8833 + }, + { + "epoch": 0.9318565400843882, + "grad_norm": 0.36140134930610657, + "learning_rate": 3.682012269215314e-05, + "loss": 1.8577, + "step": 8834 + }, + { + "epoch": 0.9319620253164557, + "grad_norm": 0.33879533410072327, + "learning_rate": 3.676905418266655e-05, + "loss": 1.8706, + "step": 8835 + }, + { + "epoch": 0.9320675105485232, + "grad_norm": 0.3550735414028168, + "learning_rate": 3.671805650381468e-05, + "loss": 1.8836, + "step": 8836 + }, + { + "epoch": 0.9321729957805908, + "grad_norm": 0.3560705780982971, + "learning_rate": 3.666712955735737e-05, + "loss": 1.9054, + "step": 8837 + }, + { + "epoch": 0.9322784810126582, + "grad_norm": 0.3724788427352905, + "learning_rate": 3.661627324519074e-05, + "loss": 1.8628, + "step": 8838 + }, + { + "epoch": 0.9323839662447257, + "grad_norm": 0.33549997210502625, + "learning_rate": 3.6565487469346904e-05, + "loss": 1.8531, + "step": 8839 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.36318647861480713, + "learning_rate": 3.651477213199394e-05, + "loss": 1.877, + "step": 8840 + }, + { + "epoch": 0.9325949367088607, + "grad_norm": 0.3485604524612427, + "learning_rate": 3.6464127135435536e-05, + "loss": 1.8452, + "step": 8841 + }, + { + "epoch": 0.9327004219409283, + "grad_norm": 0.37284278869628906, + "learning_rate": 3.641355238211095e-05, + "loss": 1.9147, + "step": 8842 + }, + { + "epoch": 0.9328059071729958, + "grad_norm": 0.34280630946159363, + "learning_rate": 3.6363047774594736e-05, + "loss": 1.851, + "step": 8843 + }, + { + "epoch": 0.9329113924050633, + "grad_norm": 0.3455776572227478, + "learning_rate": 3.631261321559652e-05, + "loss": 1.8758, + "step": 8844 + }, + { + "epoch": 0.9330168776371308, + "grad_norm": 0.3629375696182251, + "learning_rate": 3.626224860796096e-05, + "loss": 1.8322, + "step": 8845 + }, + { + "epoch": 0.9331223628691984, + "grad_norm": 0.34724119305610657, + "learning_rate": 3.6211953854667373e-05, + "loss": 1.8756, + "step": 8846 + }, + { + "epoch": 0.9332278481012658, + "grad_norm": 0.32945510745048523, + "learning_rate": 3.616172885882972e-05, + "loss": 1.8197, + "step": 8847 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.3492596447467804, + "learning_rate": 3.6111573523696295e-05, + "loss": 1.8779, + "step": 8848 + }, + { + "epoch": 0.9334388185654009, + "grad_norm": 0.3427576720714569, + "learning_rate": 3.606148775264958e-05, + "loss": 1.8808, + "step": 8849 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.33952680230140686, + "learning_rate": 3.601147144920609e-05, + "loss": 1.8586, + "step": 8850 + }, + { + "epoch": 0.9336497890295359, + "grad_norm": 0.3437604010105133, + "learning_rate": 3.596152451701616e-05, + "loss": 1.8632, + "step": 8851 + }, + { + "epoch": 0.9337552742616034, + "grad_norm": 0.3414642810821533, + "learning_rate": 3.591164685986372e-05, + "loss": 1.8796, + "step": 8852 + }, + { + "epoch": 0.9338607594936709, + "grad_norm": 0.3367331624031067, + "learning_rate": 3.58618383816662e-05, + "loss": 1.8647, + "step": 8853 + }, + { + "epoch": 0.9339662447257384, + "grad_norm": 0.33187374472618103, + "learning_rate": 3.581209898647425e-05, + "loss": 1.8623, + "step": 8854 + }, + { + "epoch": 0.9340717299578059, + "grad_norm": 0.3355811536312103, + "learning_rate": 3.576242857847162e-05, + "loss": 1.8678, + "step": 8855 + }, + { + "epoch": 0.9341772151898734, + "grad_norm": 0.355213463306427, + "learning_rate": 3.5712827061974984e-05, + "loss": 1.8667, + "step": 8856 + }, + { + "epoch": 0.934282700421941, + "grad_norm": 0.34692129492759705, + "learning_rate": 3.566329434143365e-05, + "loss": 1.8438, + "step": 8857 + }, + { + "epoch": 0.9343881856540084, + "grad_norm": 0.3471428453922272, + "learning_rate": 3.5613830321429545e-05, + "loss": 1.8705, + "step": 8858 + }, + { + "epoch": 0.9344936708860759, + "grad_norm": 0.34408921003341675, + "learning_rate": 3.5564434906676834e-05, + "loss": 1.8842, + "step": 8859 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.34153273701667786, + "learning_rate": 3.5515108002021946e-05, + "loss": 1.8262, + "step": 8860 + }, + { + "epoch": 0.9347046413502109, + "grad_norm": 0.34371429681777954, + "learning_rate": 3.5465849512443226e-05, + "loss": 1.8418, + "step": 8861 + }, + { + "epoch": 0.9348101265822785, + "grad_norm": 0.3388127088546753, + "learning_rate": 3.541665934305081e-05, + "loss": 1.9034, + "step": 8862 + }, + { + "epoch": 0.934915611814346, + "grad_norm": 0.3308006823062897, + "learning_rate": 3.5367537399086476e-05, + "loss": 1.8745, + "step": 8863 + }, + { + "epoch": 0.9350210970464135, + "grad_norm": 0.35096070170402527, + "learning_rate": 3.531848358592338e-05, + "loss": 1.8407, + "step": 8864 + }, + { + "epoch": 0.935126582278481, + "grad_norm": 0.34301456809043884, + "learning_rate": 3.5269497809065966e-05, + "loss": 1.8703, + "step": 8865 + }, + { + "epoch": 0.9352320675105485, + "grad_norm": 0.3549436628818512, + "learning_rate": 3.522057997414975e-05, + "loss": 1.8839, + "step": 8866 + }, + { + "epoch": 0.935337552742616, + "grad_norm": 0.3482239246368408, + "learning_rate": 3.517172998694108e-05, + "loss": 1.8899, + "step": 8867 + }, + { + "epoch": 0.9354430379746835, + "grad_norm": 0.34025055170059204, + "learning_rate": 3.512294775333705e-05, + "loss": 1.8824, + "step": 8868 + }, + { + "epoch": 0.9355485232067511, + "grad_norm": 0.3584999442100525, + "learning_rate": 3.507423317936521e-05, + "loss": 1.857, + "step": 8869 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.3353740870952606, + "learning_rate": 3.502558617118352e-05, + "loss": 1.8251, + "step": 8870 + }, + { + "epoch": 0.9357594936708861, + "grad_norm": 0.3415069282054901, + "learning_rate": 3.4977006635080086e-05, + "loss": 1.9012, + "step": 8871 + }, + { + "epoch": 0.9358649789029536, + "grad_norm": 0.3374742567539215, + "learning_rate": 3.4928494477472926e-05, + "loss": 1.8289, + "step": 8872 + }, + { + "epoch": 0.935970464135021, + "grad_norm": 0.338225394487381, + "learning_rate": 3.488004960490994e-05, + "loss": 1.8961, + "step": 8873 + }, + { + "epoch": 0.9360759493670886, + "grad_norm": 0.3365306556224823, + "learning_rate": 3.4831671924068555e-05, + "loss": 1.845, + "step": 8874 + }, + { + "epoch": 0.9361814345991561, + "grad_norm": 0.3419894278049469, + "learning_rate": 3.47833613417557e-05, + "loss": 1.8536, + "step": 8875 + }, + { + "epoch": 0.9362869198312236, + "grad_norm": 0.33445775508880615, + "learning_rate": 3.473511776490756e-05, + "loss": 1.8717, + "step": 8876 + }, + { + "epoch": 0.9363924050632911, + "grad_norm": 0.3408292829990387, + "learning_rate": 3.4686941100589344e-05, + "loss": 1.8617, + "step": 8877 + }, + { + "epoch": 0.9364978902953587, + "grad_norm": 0.33491501212120056, + "learning_rate": 3.463883125599521e-05, + "loss": 1.8917, + "step": 8878 + }, + { + "epoch": 0.9366033755274261, + "grad_norm": 0.3462611734867096, + "learning_rate": 3.4590788138448004e-05, + "loss": 1.8703, + "step": 8879 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.3417584002017975, + "learning_rate": 3.454281165539913e-05, + "loss": 1.8664, + "step": 8880 + }, + { + "epoch": 0.9368143459915612, + "grad_norm": 0.3332637846469879, + "learning_rate": 3.449490171442838e-05, + "loss": 1.8577, + "step": 8881 + }, + { + "epoch": 0.9369198312236287, + "grad_norm": 0.3523288667201996, + "learning_rate": 3.444705822324364e-05, + "loss": 1.8975, + "step": 8882 + }, + { + "epoch": 0.9370253164556962, + "grad_norm": 0.3411533832550049, + "learning_rate": 3.4399281089680924e-05, + "loss": 1.8392, + "step": 8883 + }, + { + "epoch": 0.9371308016877637, + "grad_norm": 0.35390353202819824, + "learning_rate": 3.435157022170396e-05, + "loss": 1.8774, + "step": 8884 + }, + { + "epoch": 0.9372362869198312, + "grad_norm": 0.34395211935043335, + "learning_rate": 3.430392552740421e-05, + "loss": 1.8419, + "step": 8885 + }, + { + "epoch": 0.9373417721518987, + "grad_norm": 0.34414589405059814, + "learning_rate": 3.42563469150006e-05, + "loss": 1.8789, + "step": 8886 + }, + { + "epoch": 0.9374472573839663, + "grad_norm": 0.32595735788345337, + "learning_rate": 3.42088342928393e-05, + "loss": 1.8702, + "step": 8887 + }, + { + "epoch": 0.9375527426160337, + "grad_norm": 0.33470624685287476, + "learning_rate": 3.416138756939366e-05, + "loss": 1.8499, + "step": 8888 + }, + { + "epoch": 0.9376582278481013, + "grad_norm": 0.3363019526004791, + "learning_rate": 3.411400665326393e-05, + "loss": 1.8723, + "step": 8889 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.330068975687027, + "learning_rate": 3.406669145317717e-05, + "loss": 1.8424, + "step": 8890 + }, + { + "epoch": 0.9378691983122363, + "grad_norm": 0.3550276756286621, + "learning_rate": 3.401944187798702e-05, + "loss": 1.8534, + "step": 8891 + }, + { + "epoch": 0.9379746835443038, + "grad_norm": 0.3480680286884308, + "learning_rate": 3.397225783667351e-05, + "loss": 1.8577, + "step": 8892 + }, + { + "epoch": 0.9380801687763713, + "grad_norm": 0.3454885482788086, + "learning_rate": 3.3925139238342954e-05, + "loss": 1.8456, + "step": 8893 + }, + { + "epoch": 0.9381856540084388, + "grad_norm": 0.3397480249404907, + "learning_rate": 3.387808599222771e-05, + "loss": 1.8631, + "step": 8894 + }, + { + "epoch": 0.9382911392405063, + "grad_norm": 0.3444482982158661, + "learning_rate": 3.383109800768602e-05, + "loss": 1.8665, + "step": 8895 + }, + { + "epoch": 0.9383966244725739, + "grad_norm": 0.34289535880088806, + "learning_rate": 3.378417519420188e-05, + "loss": 1.8712, + "step": 8896 + }, + { + "epoch": 0.9385021097046413, + "grad_norm": 0.3626118004322052, + "learning_rate": 3.373731746138477e-05, + "loss": 1.8584, + "step": 8897 + }, + { + "epoch": 0.9386075949367089, + "grad_norm": 0.3555525541305542, + "learning_rate": 3.3690524718969586e-05, + "loss": 1.8374, + "step": 8898 + }, + { + "epoch": 0.9387130801687764, + "grad_norm": 0.3493459224700928, + "learning_rate": 3.364379687681642e-05, + "loss": 1.8766, + "step": 8899 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.3540320098400116, + "learning_rate": 3.359713384491036e-05, + "loss": 1.8325, + "step": 8900 + }, + { + "epoch": 0.9389240506329114, + "grad_norm": 0.34160587191581726, + "learning_rate": 3.355053553336137e-05, + "loss": 1.8681, + "step": 8901 + }, + { + "epoch": 0.939029535864979, + "grad_norm": 0.3392672836780548, + "learning_rate": 3.350400185240404e-05, + "loss": 1.8537, + "step": 8902 + }, + { + "epoch": 0.9391350210970464, + "grad_norm": 0.34091684222221375, + "learning_rate": 3.345753271239753e-05, + "loss": 1.8631, + "step": 8903 + }, + { + "epoch": 0.9392405063291139, + "grad_norm": 0.3396502137184143, + "learning_rate": 3.3411128023825295e-05, + "loss": 1.8733, + "step": 8904 + }, + { + "epoch": 0.9393459915611815, + "grad_norm": 0.3489725887775421, + "learning_rate": 3.336478769729492e-05, + "loss": 1.8678, + "step": 8905 + }, + { + "epoch": 0.9394514767932489, + "grad_norm": 0.3462705612182617, + "learning_rate": 3.331851164353803e-05, + "loss": 1.8906, + "step": 8906 + }, + { + "epoch": 0.9395569620253165, + "grad_norm": 0.3419593870639801, + "learning_rate": 3.327229977341001e-05, + "loss": 1.8433, + "step": 8907 + }, + { + "epoch": 0.939662447257384, + "grad_norm": 0.3648776113986969, + "learning_rate": 3.322615199788992e-05, + "loss": 1.8334, + "step": 8908 + }, + { + "epoch": 0.9397679324894515, + "grad_norm": 0.339978963136673, + "learning_rate": 3.31800682280803e-05, + "loss": 1.836, + "step": 8909 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.3585304319858551, + "learning_rate": 3.313404837520694e-05, + "loss": 1.832, + "step": 8910 + }, + { + "epoch": 0.9399789029535865, + "grad_norm": 0.3548716604709625, + "learning_rate": 3.308809235061882e-05, + "loss": 1.849, + "step": 8911 + }, + { + "epoch": 0.940084388185654, + "grad_norm": 0.3494146764278412, + "learning_rate": 3.30422000657878e-05, + "loss": 1.874, + "step": 8912 + }, + { + "epoch": 0.9401898734177215, + "grad_norm": 0.3574804961681366, + "learning_rate": 3.2996371432308596e-05, + "loss": 1.8679, + "step": 8913 + }, + { + "epoch": 0.9402953586497891, + "grad_norm": 0.3603658080101013, + "learning_rate": 3.295060636189853e-05, + "loss": 1.8567, + "step": 8914 + }, + { + "epoch": 0.9404008438818565, + "grad_norm": 0.37080666422843933, + "learning_rate": 3.290490476639731e-05, + "loss": 1.8617, + "step": 8915 + }, + { + "epoch": 0.9405063291139241, + "grad_norm": 0.34719404578208923, + "learning_rate": 3.2859266557767e-05, + "loss": 1.8863, + "step": 8916 + }, + { + "epoch": 0.9406118143459916, + "grad_norm": 0.3432215452194214, + "learning_rate": 3.2813691648091704e-05, + "loss": 1.8845, + "step": 8917 + }, + { + "epoch": 0.940717299578059, + "grad_norm": 0.33624136447906494, + "learning_rate": 3.2768179949577505e-05, + "loss": 1.8359, + "step": 8918 + }, + { + "epoch": 0.9408227848101266, + "grad_norm": 0.3520926833152771, + "learning_rate": 3.272273137455225e-05, + "loss": 1.8636, + "step": 8919 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.3421134948730469, + "learning_rate": 3.267734583546536e-05, + "loss": 1.8389, + "step": 8920 + }, + { + "epoch": 0.9410337552742616, + "grad_norm": 0.3688114583492279, + "learning_rate": 3.263202324488772e-05, + "loss": 1.8776, + "step": 8921 + }, + { + "epoch": 0.9411392405063291, + "grad_norm": 0.3568557798862457, + "learning_rate": 3.258676351551143e-05, + "loss": 1.8422, + "step": 8922 + }, + { + "epoch": 0.9412447257383966, + "grad_norm": 0.3385866582393646, + "learning_rate": 3.2541566560149726e-05, + "loss": 1.8666, + "step": 8923 + }, + { + "epoch": 0.9413502109704641, + "grad_norm": 0.3494528532028198, + "learning_rate": 3.249643229173678e-05, + "loss": 1.852, + "step": 8924 + }, + { + "epoch": 0.9414556962025317, + "grad_norm": 0.33807653188705444, + "learning_rate": 3.245136062332745e-05, + "loss": 1.8772, + "step": 8925 + }, + { + "epoch": 0.9415611814345991, + "grad_norm": 0.3451527953147888, + "learning_rate": 3.240635146809728e-05, + "loss": 1.8443, + "step": 8926 + }, + { + "epoch": 0.9416666666666667, + "grad_norm": 0.3371361494064331, + "learning_rate": 3.236140473934215e-05, + "loss": 1.8726, + "step": 8927 + }, + { + "epoch": 0.9417721518987342, + "grad_norm": 0.36338818073272705, + "learning_rate": 3.231652035047825e-05, + "loss": 1.8698, + "step": 8928 + }, + { + "epoch": 0.9418776371308016, + "grad_norm": 0.3366001844406128, + "learning_rate": 3.227169821504187e-05, + "loss": 1.8436, + "step": 8929 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.3424869775772095, + "learning_rate": 3.222693824668916e-05, + "loss": 1.8496, + "step": 8930 + }, + { + "epoch": 0.9420886075949367, + "grad_norm": 0.35178959369659424, + "learning_rate": 3.218224035919609e-05, + "loss": 1.8516, + "step": 8931 + }, + { + "epoch": 0.9421940928270042, + "grad_norm": 0.3422624170780182, + "learning_rate": 3.213760446645818e-05, + "loss": 1.8586, + "step": 8932 + }, + { + "epoch": 0.9422995780590717, + "grad_norm": 0.34398967027664185, + "learning_rate": 3.2093030482490385e-05, + "loss": 1.8546, + "step": 8933 + }, + { + "epoch": 0.9424050632911393, + "grad_norm": 0.3436407744884491, + "learning_rate": 3.204851832142696e-05, + "loss": 1.8812, + "step": 8934 + }, + { + "epoch": 0.9425105485232067, + "grad_norm": 0.3495113253593445, + "learning_rate": 3.200406789752116e-05, + "loss": 1.87, + "step": 8935 + }, + { + "epoch": 0.9426160337552743, + "grad_norm": 0.33250728249549866, + "learning_rate": 3.195967912514527e-05, + "loss": 1.8614, + "step": 8936 + }, + { + "epoch": 0.9427215189873418, + "grad_norm": 0.3366357386112213, + "learning_rate": 3.191535191879029e-05, + "loss": 1.8617, + "step": 8937 + }, + { + "epoch": 0.9428270042194092, + "grad_norm": 0.3491470217704773, + "learning_rate": 3.1871086193065794e-05, + "loss": 1.8482, + "step": 8938 + }, + { + "epoch": 0.9429324894514768, + "grad_norm": 0.33516860008239746, + "learning_rate": 3.182688186269985e-05, + "loss": 1.8847, + "step": 8939 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.3382872939109802, + "learning_rate": 3.178273884253874e-05, + "loss": 1.8909, + "step": 8940 + }, + { + "epoch": 0.9431434599156118, + "grad_norm": 0.3364901840686798, + "learning_rate": 3.173865704754688e-05, + "loss": 1.8393, + "step": 8941 + }, + { + "epoch": 0.9432489451476793, + "grad_norm": 0.3320569097995758, + "learning_rate": 3.169463639280665e-05, + "loss": 1.8637, + "step": 8942 + }, + { + "epoch": 0.9433544303797469, + "grad_norm": 0.34197649359703064, + "learning_rate": 3.1650676793518144e-05, + "loss": 1.8598, + "step": 8943 + }, + { + "epoch": 0.9434599156118143, + "grad_norm": 0.37341412901878357, + "learning_rate": 3.1606778164999155e-05, + "loss": 1.8892, + "step": 8944 + }, + { + "epoch": 0.9435654008438819, + "grad_norm": 0.3383985757827759, + "learning_rate": 3.156294042268483e-05, + "loss": 1.8478, + "step": 8945 + }, + { + "epoch": 0.9436708860759494, + "grad_norm": 0.33608826994895935, + "learning_rate": 3.151916348212769e-05, + "loss": 1.8639, + "step": 8946 + }, + { + "epoch": 0.9437763713080168, + "grad_norm": 0.3600350022315979, + "learning_rate": 3.147544725899736e-05, + "loss": 1.8473, + "step": 8947 + }, + { + "epoch": 0.9438818565400844, + "grad_norm": 0.35380399227142334, + "learning_rate": 3.1431791669080386e-05, + "loss": 1.8629, + "step": 8948 + }, + { + "epoch": 0.9439873417721519, + "grad_norm": 0.34388530254364014, + "learning_rate": 3.138819662828018e-05, + "loss": 1.8734, + "step": 8949 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.34721723198890686, + "learning_rate": 3.134466205261674e-05, + "loss": 1.8613, + "step": 8950 + }, + { + "epoch": 0.9441983122362869, + "grad_norm": 0.3513150215148926, + "learning_rate": 3.130118785822657e-05, + "loss": 1.8555, + "step": 8951 + }, + { + "epoch": 0.9443037974683545, + "grad_norm": 0.3429173529148102, + "learning_rate": 3.125777396136251e-05, + "loss": 1.8801, + "step": 8952 + }, + { + "epoch": 0.9444092827004219, + "grad_norm": 0.3396470248699188, + "learning_rate": 3.121442027839349e-05, + "loss": 1.8826, + "step": 8953 + }, + { + "epoch": 0.9445147679324895, + "grad_norm": 0.35503315925598145, + "learning_rate": 3.117112672580451e-05, + "loss": 1.8744, + "step": 8954 + }, + { + "epoch": 0.944620253164557, + "grad_norm": 0.3405601680278778, + "learning_rate": 3.112789322019633e-05, + "loss": 1.8825, + "step": 8955 + }, + { + "epoch": 0.9447257383966244, + "grad_norm": 0.347352534532547, + "learning_rate": 3.108471967828545e-05, + "loss": 1.8457, + "step": 8956 + }, + { + "epoch": 0.944831223628692, + "grad_norm": 0.35179269313812256, + "learning_rate": 3.1041606016903844e-05, + "loss": 1.8757, + "step": 8957 + }, + { + "epoch": 0.9449367088607595, + "grad_norm": 0.3276670575141907, + "learning_rate": 3.0998552152998837e-05, + "loss": 1.8482, + "step": 8958 + }, + { + "epoch": 0.945042194092827, + "grad_norm": 0.3360580801963806, + "learning_rate": 3.095555800363297e-05, + "loss": 1.8398, + "step": 8959 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.34654152393341064, + "learning_rate": 3.0912623485983774e-05, + "loss": 1.8749, + "step": 8960 + }, + { + "epoch": 0.9452531645569621, + "grad_norm": 0.35118791460990906, + "learning_rate": 3.08697485173437e-05, + "loss": 1.8661, + "step": 8961 + }, + { + "epoch": 0.9453586497890295, + "grad_norm": 0.3318227529525757, + "learning_rate": 3.0826933015119895e-05, + "loss": 1.8602, + "step": 8962 + }, + { + "epoch": 0.945464135021097, + "grad_norm": 0.34245678782463074, + "learning_rate": 3.0784176896834036e-05, + "loss": 1.8786, + "step": 8963 + }, + { + "epoch": 0.9455696202531646, + "grad_norm": 0.3350088894367218, + "learning_rate": 3.074148008012224e-05, + "loss": 1.8552, + "step": 8964 + }, + { + "epoch": 0.945675105485232, + "grad_norm": 0.3639564514160156, + "learning_rate": 3.06988424827348e-05, + "loss": 1.869, + "step": 8965 + }, + { + "epoch": 0.9457805907172996, + "grad_norm": 0.35208335518836975, + "learning_rate": 3.0656264022536146e-05, + "loss": 1.9143, + "step": 8966 + }, + { + "epoch": 0.9458860759493671, + "grad_norm": 0.36192694306373596, + "learning_rate": 3.0613744617504625e-05, + "loss": 1.8269, + "step": 8967 + }, + { + "epoch": 0.9459915611814346, + "grad_norm": 0.3483559191226959, + "learning_rate": 3.0571284185732275e-05, + "loss": 1.8711, + "step": 8968 + }, + { + "epoch": 0.9460970464135021, + "grad_norm": 0.3400314748287201, + "learning_rate": 3.052888264542484e-05, + "loss": 1.8735, + "step": 8969 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.3364540934562683, + "learning_rate": 3.048653991490141e-05, + "loss": 1.8758, + "step": 8970 + }, + { + "epoch": 0.9463080168776371, + "grad_norm": 0.3497465252876282, + "learning_rate": 3.0444255912594442e-05, + "loss": 1.8647, + "step": 8971 + }, + { + "epoch": 0.9464135021097047, + "grad_norm": 0.3410831093788147, + "learning_rate": 3.0402030557049503e-05, + "loss": 1.8368, + "step": 8972 + }, + { + "epoch": 0.9465189873417722, + "grad_norm": 0.3408503830432892, + "learning_rate": 3.0359863766925097e-05, + "loss": 1.8178, + "step": 8973 + }, + { + "epoch": 0.9466244725738396, + "grad_norm": 0.355508416891098, + "learning_rate": 3.0317755460992616e-05, + "loss": 1.8563, + "step": 8974 + }, + { + "epoch": 0.9467299578059072, + "grad_norm": 0.34231144189834595, + "learning_rate": 3.027570555813604e-05, + "loss": 1.8838, + "step": 8975 + }, + { + "epoch": 0.9468354430379747, + "grad_norm": 0.35036709904670715, + "learning_rate": 3.0233713977351904e-05, + "loss": 1.8809, + "step": 8976 + }, + { + "epoch": 0.9469409282700422, + "grad_norm": 0.34362176060676575, + "learning_rate": 3.0191780637749097e-05, + "loss": 1.8736, + "step": 8977 + }, + { + "epoch": 0.9470464135021097, + "grad_norm": 0.335096538066864, + "learning_rate": 3.0149905458548646e-05, + "loss": 1.8743, + "step": 8978 + }, + { + "epoch": 0.9471518987341773, + "grad_norm": 0.34957602620124817, + "learning_rate": 3.0108088359083675e-05, + "loss": 1.8391, + "step": 8979 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.3594614565372467, + "learning_rate": 3.0066329258799184e-05, + "loss": 1.8946, + "step": 8980 + }, + { + "epoch": 0.9473628691983123, + "grad_norm": 0.35328349471092224, + "learning_rate": 3.002462807725185e-05, + "loss": 1.8583, + "step": 8981 + }, + { + "epoch": 0.9474683544303798, + "grad_norm": 0.3366435766220093, + "learning_rate": 2.9982984734110005e-05, + "loss": 1.8622, + "step": 8982 + }, + { + "epoch": 0.9475738396624472, + "grad_norm": 0.3446855843067169, + "learning_rate": 2.9941399149153305e-05, + "loss": 1.8767, + "step": 8983 + }, + { + "epoch": 0.9476793248945148, + "grad_norm": 0.35649505257606506, + "learning_rate": 2.9899871242272736e-05, + "loss": 1.8753, + "step": 8984 + }, + { + "epoch": 0.9477848101265823, + "grad_norm": 0.35532885789871216, + "learning_rate": 2.9858400933470395e-05, + "loss": 1.8721, + "step": 8985 + }, + { + "epoch": 0.9478902953586498, + "grad_norm": 0.34707367420196533, + "learning_rate": 2.9816988142859272e-05, + "loss": 1.8769, + "step": 8986 + }, + { + "epoch": 0.9479957805907173, + "grad_norm": 0.34185871481895447, + "learning_rate": 2.9775632790663244e-05, + "loss": 1.8435, + "step": 8987 + }, + { + "epoch": 0.9481012658227848, + "grad_norm": 0.3413589298725128, + "learning_rate": 2.973433479721675e-05, + "loss": 1.8704, + "step": 8988 + }, + { + "epoch": 0.9482067510548523, + "grad_norm": 0.3551003634929657, + "learning_rate": 2.9693094082964775e-05, + "loss": 1.8722, + "step": 8989 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.3356633186340332, + "learning_rate": 2.965191056846266e-05, + "loss": 1.8593, + "step": 8990 + }, + { + "epoch": 0.9484177215189873, + "grad_norm": 0.3338903486728668, + "learning_rate": 2.9610784174375868e-05, + "loss": 1.8399, + "step": 8991 + }, + { + "epoch": 0.9485232067510548, + "grad_norm": 0.3422680199146271, + "learning_rate": 2.9569714821479966e-05, + "loss": 1.8511, + "step": 8992 + }, + { + "epoch": 0.9486286919831224, + "grad_norm": 0.3426360487937927, + "learning_rate": 2.9528702430660346e-05, + "loss": 1.8402, + "step": 8993 + }, + { + "epoch": 0.9487341772151898, + "grad_norm": 0.34995877742767334, + "learning_rate": 2.948774692291217e-05, + "loss": 1.8755, + "step": 8994 + }, + { + "epoch": 0.9488396624472574, + "grad_norm": 0.3324069082736969, + "learning_rate": 2.9446848219340173e-05, + "loss": 1.9122, + "step": 8995 + }, + { + "epoch": 0.9489451476793249, + "grad_norm": 0.33078524470329285, + "learning_rate": 2.9406006241158485e-05, + "loss": 1.8475, + "step": 8996 + }, + { + "epoch": 0.9490506329113924, + "grad_norm": 0.35418397188186646, + "learning_rate": 2.936522090969056e-05, + "loss": 1.8637, + "step": 8997 + }, + { + "epoch": 0.9491561181434599, + "grad_norm": 0.34748783707618713, + "learning_rate": 2.9324492146368908e-05, + "loss": 1.8734, + "step": 8998 + }, + { + "epoch": 0.9492616033755275, + "grad_norm": 0.32937973737716675, + "learning_rate": 2.928381987273507e-05, + "loss": 1.8656, + "step": 8999 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.3369988799095154, + "learning_rate": 2.9243204010439396e-05, + "loss": 1.8531, + "step": 9000 + }, + { + "epoch": 0.9494725738396624, + "grad_norm": 0.3433398902416229, + "learning_rate": 2.920264448124087e-05, + "loss": 1.8737, + "step": 9001 + }, + { + "epoch": 0.94957805907173, + "grad_norm": 0.3512086272239685, + "learning_rate": 2.916214120700703e-05, + "loss": 1.86, + "step": 9002 + }, + { + "epoch": 0.9496835443037974, + "grad_norm": 0.3412225842475891, + "learning_rate": 2.9121694109713758e-05, + "loss": 1.8684, + "step": 9003 + }, + { + "epoch": 0.949789029535865, + "grad_norm": 0.33262115716934204, + "learning_rate": 2.9081303111445172e-05, + "loss": 1.8404, + "step": 9004 + }, + { + "epoch": 0.9498945147679325, + "grad_norm": 0.3344259560108185, + "learning_rate": 2.904096813439346e-05, + "loss": 1.8243, + "step": 9005 + }, + { + "epoch": 0.95, + "grad_norm": 0.331933856010437, + "learning_rate": 2.9000689100858695e-05, + "loss": 1.8596, + "step": 9006 + }, + { + "epoch": 0.9501054852320675, + "grad_norm": 0.3450171649456024, + "learning_rate": 2.896046593324876e-05, + "loss": 1.8878, + "step": 9007 + }, + { + "epoch": 0.950210970464135, + "grad_norm": 0.3463997542858124, + "learning_rate": 2.8920298554079114e-05, + "loss": 1.8847, + "step": 9008 + }, + { + "epoch": 0.9503164556962025, + "grad_norm": 0.3550094664096832, + "learning_rate": 2.8880186885972716e-05, + "loss": 1.8285, + "step": 9009 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.33891770243644714, + "learning_rate": 2.8840130851659852e-05, + "loss": 1.8645, + "step": 9010 + }, + { + "epoch": 0.9505274261603376, + "grad_norm": 0.3470384180545807, + "learning_rate": 2.8800130373977936e-05, + "loss": 1.8437, + "step": 9011 + }, + { + "epoch": 0.950632911392405, + "grad_norm": 0.3390069305896759, + "learning_rate": 2.876018537587146e-05, + "loss": 1.8674, + "step": 9012 + }, + { + "epoch": 0.9507383966244726, + "grad_norm": 0.33470967411994934, + "learning_rate": 2.8720295780391722e-05, + "loss": 1.8794, + "step": 9013 + }, + { + "epoch": 0.9508438818565401, + "grad_norm": 0.3452693223953247, + "learning_rate": 2.8680461510696802e-05, + "loss": 1.8504, + "step": 9014 + }, + { + "epoch": 0.9509493670886076, + "grad_norm": 0.34875383973121643, + "learning_rate": 2.8640682490051365e-05, + "loss": 1.8479, + "step": 9015 + }, + { + "epoch": 0.9510548523206751, + "grad_norm": 0.3615492582321167, + "learning_rate": 2.8600958641826447e-05, + "loss": 1.8242, + "step": 9016 + }, + { + "epoch": 0.9511603375527427, + "grad_norm": 0.3539378345012665, + "learning_rate": 2.8561289889499422e-05, + "loss": 1.8399, + "step": 9017 + }, + { + "epoch": 0.9512658227848101, + "grad_norm": 0.3548135757446289, + "learning_rate": 2.8521676156653756e-05, + "loss": 1.8767, + "step": 9018 + }, + { + "epoch": 0.9513713080168776, + "grad_norm": 0.33210596442222595, + "learning_rate": 2.8482117366978935e-05, + "loss": 1.8655, + "step": 9019 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.35520637035369873, + "learning_rate": 2.844261344427029e-05, + "loss": 1.8995, + "step": 9020 + }, + { + "epoch": 0.9515822784810126, + "grad_norm": 0.32950177788734436, + "learning_rate": 2.84031643124288e-05, + "loss": 1.8868, + "step": 9021 + }, + { + "epoch": 0.9516877637130802, + "grad_norm": 0.3535826802253723, + "learning_rate": 2.8363769895461053e-05, + "loss": 1.9098, + "step": 9022 + }, + { + "epoch": 0.9517932489451477, + "grad_norm": 0.34632793068885803, + "learning_rate": 2.8324430117478972e-05, + "loss": 1.9024, + "step": 9023 + }, + { + "epoch": 0.9518987341772152, + "grad_norm": 0.34752321243286133, + "learning_rate": 2.8285144902699785e-05, + "loss": 1.8514, + "step": 9024 + }, + { + "epoch": 0.9520042194092827, + "grad_norm": 0.3430077135562897, + "learning_rate": 2.824591417544583e-05, + "loss": 1.8657, + "step": 9025 + }, + { + "epoch": 0.9521097046413503, + "grad_norm": 0.3475254774093628, + "learning_rate": 2.820673786014436e-05, + "loss": 1.905, + "step": 9026 + }, + { + "epoch": 0.9522151898734177, + "grad_norm": 0.33829301595687866, + "learning_rate": 2.816761588132749e-05, + "loss": 1.8365, + "step": 9027 + }, + { + "epoch": 0.9523206751054852, + "grad_norm": 0.33635082840919495, + "learning_rate": 2.8128548163632005e-05, + "loss": 1.873, + "step": 9028 + }, + { + "epoch": 0.9524261603375528, + "grad_norm": 0.34177064895629883, + "learning_rate": 2.808953463179918e-05, + "loss": 1.8871, + "step": 9029 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.36139237880706787, + "learning_rate": 2.805057521067472e-05, + "loss": 1.8549, + "step": 9030 + }, + { + "epoch": 0.9526371308016878, + "grad_norm": 0.33965054154396057, + "learning_rate": 2.8011669825208517e-05, + "loss": 1.8263, + "step": 9031 + }, + { + "epoch": 0.9527426160337553, + "grad_norm": 0.33014997839927673, + "learning_rate": 2.7972818400454596e-05, + "loss": 1.8757, + "step": 9032 + }, + { + "epoch": 0.9528481012658228, + "grad_norm": 0.34983888268470764, + "learning_rate": 2.7934020861570928e-05, + "loss": 1.8695, + "step": 9033 + }, + { + "epoch": 0.9529535864978903, + "grad_norm": 0.33363577723503113, + "learning_rate": 2.789527713381925e-05, + "loss": 1.8937, + "step": 9034 + }, + { + "epoch": 0.9530590717299579, + "grad_norm": 0.3305141031742096, + "learning_rate": 2.7856587142565008e-05, + "loss": 1.8495, + "step": 9035 + }, + { + "epoch": 0.9531645569620253, + "grad_norm": 0.3330591917037964, + "learning_rate": 2.781795081327712e-05, + "loss": 1.8364, + "step": 9036 + }, + { + "epoch": 0.9532700421940928, + "grad_norm": 0.35225772857666016, + "learning_rate": 2.7779368071527907e-05, + "loss": 1.835, + "step": 9037 + }, + { + "epoch": 0.9533755274261604, + "grad_norm": 0.34881219267845154, + "learning_rate": 2.7740838842992916e-05, + "loss": 1.8618, + "step": 9038 + }, + { + "epoch": 0.9534810126582278, + "grad_norm": 0.3432053327560425, + "learning_rate": 2.770236305345076e-05, + "loss": 1.8624, + "step": 9039 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.3451058566570282, + "learning_rate": 2.766394062878302e-05, + "loss": 1.875, + "step": 9040 + }, + { + "epoch": 0.9536919831223629, + "grad_norm": 0.352916955947876, + "learning_rate": 2.762557149497405e-05, + "loss": 1.8683, + "step": 9041 + }, + { + "epoch": 0.9537974683544304, + "grad_norm": 0.34575560688972473, + "learning_rate": 2.758725557811089e-05, + "loss": 1.8743, + "step": 9042 + }, + { + "epoch": 0.9539029535864979, + "grad_norm": 0.34177911281585693, + "learning_rate": 2.754899280438309e-05, + "loss": 1.8424, + "step": 9043 + }, + { + "epoch": 0.9540084388185655, + "grad_norm": 0.3497907221317291, + "learning_rate": 2.7510783100082544e-05, + "loss": 1.8711, + "step": 9044 + }, + { + "epoch": 0.9541139240506329, + "grad_norm": 0.33835569024086, + "learning_rate": 2.747262639160342e-05, + "loss": 1.8635, + "step": 9045 + }, + { + "epoch": 0.9542194092827004, + "grad_norm": 0.3436308205127716, + "learning_rate": 2.743452260544193e-05, + "loss": 1.8497, + "step": 9046 + }, + { + "epoch": 0.954324894514768, + "grad_norm": 0.3512459993362427, + "learning_rate": 2.7396471668196274e-05, + "loss": 1.8984, + "step": 9047 + }, + { + "epoch": 0.9544303797468354, + "grad_norm": 0.33008357882499695, + "learning_rate": 2.7358473506566453e-05, + "loss": 1.8585, + "step": 9048 + }, + { + "epoch": 0.954535864978903, + "grad_norm": 0.33699941635131836, + "learning_rate": 2.732052804735409e-05, + "loss": 1.8593, + "step": 9049 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.348916620016098, + "learning_rate": 2.7282635217462405e-05, + "loss": 1.8545, + "step": 9050 + }, + { + "epoch": 0.954746835443038, + "grad_norm": 0.33936959505081177, + "learning_rate": 2.724479494389592e-05, + "loss": 1.8857, + "step": 9051 + }, + { + "epoch": 0.9548523206751055, + "grad_norm": 0.3692876398563385, + "learning_rate": 2.7207007153760456e-05, + "loss": 1.8907, + "step": 9052 + }, + { + "epoch": 0.9549578059071729, + "grad_norm": 0.33928054571151733, + "learning_rate": 2.7169271774262942e-05, + "loss": 1.8655, + "step": 9053 + }, + { + "epoch": 0.9550632911392405, + "grad_norm": 0.3413833677768707, + "learning_rate": 2.7131588732711214e-05, + "loss": 1.8915, + "step": 9054 + }, + { + "epoch": 0.955168776371308, + "grad_norm": 0.34857305884361267, + "learning_rate": 2.7093957956513993e-05, + "loss": 1.8671, + "step": 9055 + }, + { + "epoch": 0.9552742616033755, + "grad_norm": 0.3439025282859802, + "learning_rate": 2.7056379373180626e-05, + "loss": 1.8849, + "step": 9056 + }, + { + "epoch": 0.955379746835443, + "grad_norm": 0.3411286175251007, + "learning_rate": 2.701885291032104e-05, + "loss": 1.8635, + "step": 9057 + }, + { + "epoch": 0.9554852320675106, + "grad_norm": 0.35113656520843506, + "learning_rate": 2.6981378495645566e-05, + "loss": 1.895, + "step": 9058 + }, + { + "epoch": 0.955590717299578, + "grad_norm": 0.3386937975883484, + "learning_rate": 2.6943956056964773e-05, + "loss": 1.8485, + "step": 9059 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.33001965284347534, + "learning_rate": 2.6906585522189378e-05, + "loss": 1.8493, + "step": 9060 + }, + { + "epoch": 0.9558016877637131, + "grad_norm": 0.3425282835960388, + "learning_rate": 2.6869266819330058e-05, + "loss": 1.8833, + "step": 9061 + }, + { + "epoch": 0.9559071729957805, + "grad_norm": 0.3319646716117859, + "learning_rate": 2.6831999876497372e-05, + "loss": 1.901, + "step": 9062 + }, + { + "epoch": 0.9560126582278481, + "grad_norm": 0.33472833037376404, + "learning_rate": 2.6794784621901574e-05, + "loss": 1.861, + "step": 9063 + }, + { + "epoch": 0.9561181434599156, + "grad_norm": 0.3478805422782898, + "learning_rate": 2.675762098385246e-05, + "loss": 1.8609, + "step": 9064 + }, + { + "epoch": 0.9562236286919831, + "grad_norm": 0.33709365129470825, + "learning_rate": 2.672050889075931e-05, + "loss": 1.8869, + "step": 9065 + }, + { + "epoch": 0.9563291139240506, + "grad_norm": 0.3410908281803131, + "learning_rate": 2.6683448271130646e-05, + "loss": 1.8469, + "step": 9066 + }, + { + "epoch": 0.9564345991561182, + "grad_norm": 0.3336600363254547, + "learning_rate": 2.6646439053574176e-05, + "loss": 1.8782, + "step": 9067 + }, + { + "epoch": 0.9565400843881856, + "grad_norm": 0.32627469301223755, + "learning_rate": 2.6609481166796652e-05, + "loss": 1.8618, + "step": 9068 + }, + { + "epoch": 0.9566455696202532, + "grad_norm": 0.3485521376132965, + "learning_rate": 2.6572574539603643e-05, + "loss": 1.8697, + "step": 9069 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.36774298548698425, + "learning_rate": 2.653571910089951e-05, + "loss": 1.8502, + "step": 9070 + }, + { + "epoch": 0.9568565400843881, + "grad_norm": 0.3323816955089569, + "learning_rate": 2.6498914779687228e-05, + "loss": 1.9, + "step": 9071 + }, + { + "epoch": 0.9569620253164557, + "grad_norm": 0.3464554250240326, + "learning_rate": 2.646216150506819e-05, + "loss": 1.832, + "step": 9072 + }, + { + "epoch": 0.9570675105485232, + "grad_norm": 0.3360196053981781, + "learning_rate": 2.6425459206242196e-05, + "loss": 1.8803, + "step": 9073 + }, + { + "epoch": 0.9571729957805907, + "grad_norm": 0.3642116189002991, + "learning_rate": 2.6388807812507172e-05, + "loss": 1.8738, + "step": 9074 + }, + { + "epoch": 0.9572784810126582, + "grad_norm": 0.33688992261886597, + "learning_rate": 2.6352207253259166e-05, + "loss": 1.8424, + "step": 9075 + }, + { + "epoch": 0.9573839662447258, + "grad_norm": 0.3565288782119751, + "learning_rate": 2.6315657457992123e-05, + "loss": 1.8162, + "step": 9076 + }, + { + "epoch": 0.9574894514767932, + "grad_norm": 0.34338709712028503, + "learning_rate": 2.627915835629776e-05, + "loss": 1.8874, + "step": 9077 + }, + { + "epoch": 0.9575949367088608, + "grad_norm": 0.34362173080444336, + "learning_rate": 2.62427098778655e-05, + "loss": 1.8677, + "step": 9078 + }, + { + "epoch": 0.9577004219409283, + "grad_norm": 0.3280572295188904, + "learning_rate": 2.6206311952482224e-05, + "loss": 1.8325, + "step": 9079 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.3532244563102722, + "learning_rate": 2.6169964510032243e-05, + "loss": 1.8704, + "step": 9080 + }, + { + "epoch": 0.9579113924050633, + "grad_norm": 0.33473944664001465, + "learning_rate": 2.6133667480497115e-05, + "loss": 1.8867, + "step": 9081 + }, + { + "epoch": 0.9580168776371308, + "grad_norm": 0.3387579619884491, + "learning_rate": 2.6097420793955464e-05, + "loss": 1.8716, + "step": 9082 + }, + { + "epoch": 0.9581223628691983, + "grad_norm": 0.33524489402770996, + "learning_rate": 2.6061224380582955e-05, + "loss": 1.8641, + "step": 9083 + }, + { + "epoch": 0.9582278481012658, + "grad_norm": 0.3422587215900421, + "learning_rate": 2.6025078170652045e-05, + "loss": 1.8715, + "step": 9084 + }, + { + "epoch": 0.9583333333333334, + "grad_norm": 0.3538360297679901, + "learning_rate": 2.5988982094531942e-05, + "loss": 1.8387, + "step": 9085 + }, + { + "epoch": 0.9584388185654008, + "grad_norm": 0.3437923789024353, + "learning_rate": 2.595293608268842e-05, + "loss": 1.8707, + "step": 9086 + }, + { + "epoch": 0.9585443037974684, + "grad_norm": 0.3409182131290436, + "learning_rate": 2.591694006568366e-05, + "loss": 1.8838, + "step": 9087 + }, + { + "epoch": 0.9586497890295359, + "grad_norm": 0.34241780638694763, + "learning_rate": 2.588099397417621e-05, + "loss": 1.8809, + "step": 9088 + }, + { + "epoch": 0.9587552742616033, + "grad_norm": 0.3560262620449066, + "learning_rate": 2.584509773892073e-05, + "loss": 1.8637, + "step": 9089 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.36829501390457153, + "learning_rate": 2.580925129076798e-05, + "loss": 1.8761, + "step": 9090 + }, + { + "epoch": 0.9589662447257384, + "grad_norm": 0.3376680612564087, + "learning_rate": 2.5773454560664597e-05, + "loss": 1.8322, + "step": 9091 + }, + { + "epoch": 0.9590717299578059, + "grad_norm": 0.3338610827922821, + "learning_rate": 2.5737707479652988e-05, + "loss": 1.8431, + "step": 9092 + }, + { + "epoch": 0.9591772151898734, + "grad_norm": 0.35562214255332947, + "learning_rate": 2.5702009978871223e-05, + "loss": 1.8743, + "step": 9093 + }, + { + "epoch": 0.959282700421941, + "grad_norm": 0.33485281467437744, + "learning_rate": 2.566636198955286e-05, + "loss": 1.8329, + "step": 9094 + }, + { + "epoch": 0.9593881856540084, + "grad_norm": 0.3434459865093231, + "learning_rate": 2.5630763443026845e-05, + "loss": 1.8509, + "step": 9095 + }, + { + "epoch": 0.959493670886076, + "grad_norm": 0.3427855670452118, + "learning_rate": 2.5595214270717388e-05, + "loss": 1.8751, + "step": 9096 + }, + { + "epoch": 0.9595991561181435, + "grad_norm": 0.349576473236084, + "learning_rate": 2.5559714404143766e-05, + "loss": 1.885, + "step": 9097 + }, + { + "epoch": 0.9597046413502109, + "grad_norm": 0.3534068167209625, + "learning_rate": 2.5524263774920287e-05, + "loss": 1.8378, + "step": 9098 + }, + { + "epoch": 0.9598101265822785, + "grad_norm": 0.36346325278282166, + "learning_rate": 2.5488862314756066e-05, + "loss": 1.8091, + "step": 9099 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.3459490239620209, + "learning_rate": 2.5453509955454954e-05, + "loss": 1.8659, + "step": 9100 + }, + { + "epoch": 0.9600210970464135, + "grad_norm": 0.3409859240055084, + "learning_rate": 2.541820662891541e-05, + "loss": 1.8814, + "step": 9101 + }, + { + "epoch": 0.960126582278481, + "grad_norm": 0.34515029191970825, + "learning_rate": 2.53829522671303e-05, + "loss": 1.8541, + "step": 9102 + }, + { + "epoch": 0.9602320675105486, + "grad_norm": 0.3495963215827942, + "learning_rate": 2.5347746802186866e-05, + "loss": 1.8506, + "step": 9103 + }, + { + "epoch": 0.960337552742616, + "grad_norm": 0.34006714820861816, + "learning_rate": 2.531259016626649e-05, + "loss": 1.8814, + "step": 9104 + }, + { + "epoch": 0.9604430379746836, + "grad_norm": 0.34767863154411316, + "learning_rate": 2.5277482291644662e-05, + "loss": 1.8659, + "step": 9105 + }, + { + "epoch": 0.9605485232067511, + "grad_norm": 0.3476067781448364, + "learning_rate": 2.524242311069079e-05, + "loss": 1.8579, + "step": 9106 + }, + { + "epoch": 0.9606540084388185, + "grad_norm": 0.353841096162796, + "learning_rate": 2.520741255586806e-05, + "loss": 1.8636, + "step": 9107 + }, + { + "epoch": 0.9607594936708861, + "grad_norm": 0.36708372831344604, + "learning_rate": 2.5172450559733375e-05, + "loss": 1.837, + "step": 9108 + }, + { + "epoch": 0.9608649789029536, + "grad_norm": 0.33633121848106384, + "learning_rate": 2.513753705493713e-05, + "loss": 1.8862, + "step": 9109 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.3596188426017761, + "learning_rate": 2.510267197422317e-05, + "loss": 1.8429, + "step": 9110 + }, + { + "epoch": 0.9610759493670886, + "grad_norm": 0.35170429944992065, + "learning_rate": 2.5067855250428616e-05, + "loss": 1.8655, + "step": 9111 + }, + { + "epoch": 0.9611814345991562, + "grad_norm": 0.3580751121044159, + "learning_rate": 2.5033086816483705e-05, + "loss": 1.8497, + "step": 9112 + }, + { + "epoch": 0.9612869198312236, + "grad_norm": 0.34211254119873047, + "learning_rate": 2.499836660541176e-05, + "loss": 1.849, + "step": 9113 + }, + { + "epoch": 0.9613924050632912, + "grad_norm": 0.3528009355068207, + "learning_rate": 2.4963694550328964e-05, + "loss": 1.8589, + "step": 9114 + }, + { + "epoch": 0.9614978902953587, + "grad_norm": 0.3641049265861511, + "learning_rate": 2.492907058444425e-05, + "loss": 1.861, + "step": 9115 + }, + { + "epoch": 0.9616033755274261, + "grad_norm": 0.3485892117023468, + "learning_rate": 2.489449464105922e-05, + "loss": 1.9003, + "step": 9116 + }, + { + "epoch": 0.9617088607594937, + "grad_norm": 0.3446388244628906, + "learning_rate": 2.4859966653567965e-05, + "loss": 1.8832, + "step": 9117 + }, + { + "epoch": 0.9618143459915611, + "grad_norm": 0.355823814868927, + "learning_rate": 2.482548655545697e-05, + "loss": 1.8871, + "step": 9118 + }, + { + "epoch": 0.9619198312236287, + "grad_norm": 0.33890581130981445, + "learning_rate": 2.4791054280304972e-05, + "loss": 1.8827, + "step": 9119 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.3640995919704437, + "learning_rate": 2.4756669761782806e-05, + "loss": 1.8013, + "step": 9120 + }, + { + "epoch": 0.9621308016877637, + "grad_norm": 0.36305737495422363, + "learning_rate": 2.472233293365335e-05, + "loss": 1.8811, + "step": 9121 + }, + { + "epoch": 0.9622362869198312, + "grad_norm": 0.3474370241165161, + "learning_rate": 2.4688043729771304e-05, + "loss": 1.8358, + "step": 9122 + }, + { + "epoch": 0.9623417721518988, + "grad_norm": 0.3429102599620819, + "learning_rate": 2.4653802084083134e-05, + "loss": 1.8536, + "step": 9123 + }, + { + "epoch": 0.9624472573839662, + "grad_norm": 0.33830904960632324, + "learning_rate": 2.4619607930626937e-05, + "loss": 1.8392, + "step": 9124 + }, + { + "epoch": 0.9625527426160337, + "grad_norm": 0.33875733613967896, + "learning_rate": 2.4585461203532253e-05, + "loss": 1.8427, + "step": 9125 + }, + { + "epoch": 0.9626582278481013, + "grad_norm": 0.34580594301223755, + "learning_rate": 2.4551361837020032e-05, + "loss": 1.8404, + "step": 9126 + }, + { + "epoch": 0.9627637130801687, + "grad_norm": 0.348154217004776, + "learning_rate": 2.4517309765402408e-05, + "loss": 1.8712, + "step": 9127 + }, + { + "epoch": 0.9628691983122363, + "grad_norm": 0.3423191010951996, + "learning_rate": 2.448330492308266e-05, + "loss": 1.8377, + "step": 9128 + }, + { + "epoch": 0.9629746835443038, + "grad_norm": 0.33270683884620667, + "learning_rate": 2.4449347244555043e-05, + "loss": 1.8237, + "step": 9129 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.34337905049324036, + "learning_rate": 2.4415436664404643e-05, + "loss": 1.9078, + "step": 9130 + }, + { + "epoch": 0.9631856540084388, + "grad_norm": 0.3423006236553192, + "learning_rate": 2.4381573117307307e-05, + "loss": 1.8575, + "step": 9131 + }, + { + "epoch": 0.9632911392405064, + "grad_norm": 0.3426886200904846, + "learning_rate": 2.4347756538029454e-05, + "loss": 1.8905, + "step": 9132 + }, + { + "epoch": 0.9633966244725738, + "grad_norm": 0.35320916771888733, + "learning_rate": 2.4313986861427997e-05, + "loss": 1.9001, + "step": 9133 + }, + { + "epoch": 0.9635021097046413, + "grad_norm": 0.3599960207939148, + "learning_rate": 2.4280264022450212e-05, + "loss": 1.824, + "step": 9134 + }, + { + "epoch": 0.9636075949367089, + "grad_norm": 0.34549397230148315, + "learning_rate": 2.4246587956133573e-05, + "loss": 1.8572, + "step": 9135 + }, + { + "epoch": 0.9637130801687763, + "grad_norm": 0.3292298913002014, + "learning_rate": 2.421295859760569e-05, + "loss": 1.8652, + "step": 9136 + }, + { + "epoch": 0.9638185654008439, + "grad_norm": 0.3554985225200653, + "learning_rate": 2.4179375882084098e-05, + "loss": 1.8718, + "step": 9137 + }, + { + "epoch": 0.9639240506329114, + "grad_norm": 0.3349110186100006, + "learning_rate": 2.4145839744876232e-05, + "loss": 1.8729, + "step": 9138 + }, + { + "epoch": 0.9640295358649789, + "grad_norm": 0.3621252775192261, + "learning_rate": 2.4112350121379255e-05, + "loss": 1.8704, + "step": 9139 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.3367616534233093, + "learning_rate": 2.4078906947079882e-05, + "loss": 1.8743, + "step": 9140 + }, + { + "epoch": 0.964240506329114, + "grad_norm": 0.3484412133693695, + "learning_rate": 2.4045510157554362e-05, + "loss": 1.8579, + "step": 9141 + }, + { + "epoch": 0.9643459915611814, + "grad_norm": 0.34034642577171326, + "learning_rate": 2.4012159688468252e-05, + "loss": 1.8351, + "step": 9142 + }, + { + "epoch": 0.9644514767932489, + "grad_norm": 0.3480254113674164, + "learning_rate": 2.397885547557638e-05, + "loss": 1.8689, + "step": 9143 + }, + { + "epoch": 0.9645569620253165, + "grad_norm": 0.33528846502304077, + "learning_rate": 2.3945597454722663e-05, + "loss": 1.8651, + "step": 9144 + }, + { + "epoch": 0.9646624472573839, + "grad_norm": 0.34328392148017883, + "learning_rate": 2.3912385561839984e-05, + "loss": 1.8561, + "step": 9145 + }, + { + "epoch": 0.9647679324894515, + "grad_norm": 0.3355260193347931, + "learning_rate": 2.3879219732950117e-05, + "loss": 1.8824, + "step": 9146 + }, + { + "epoch": 0.964873417721519, + "grad_norm": 0.3458874821662903, + "learning_rate": 2.384609990416354e-05, + "loss": 1.8876, + "step": 9147 + }, + { + "epoch": 0.9649789029535865, + "grad_norm": 0.34256163239479065, + "learning_rate": 2.3813026011679366e-05, + "loss": 1.8509, + "step": 9148 + }, + { + "epoch": 0.965084388185654, + "grad_norm": 0.3460289537906647, + "learning_rate": 2.3779997991785207e-05, + "loss": 1.8646, + "step": 9149 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.3459146320819855, + "learning_rate": 2.3747015780857007e-05, + "loss": 1.8692, + "step": 9150 + }, + { + "epoch": 0.965295358649789, + "grad_norm": 0.3368473947048187, + "learning_rate": 2.3714079315358985e-05, + "loss": 1.8469, + "step": 9151 + }, + { + "epoch": 0.9654008438818565, + "grad_norm": 0.3410532772541046, + "learning_rate": 2.3681188531843466e-05, + "loss": 1.8818, + "step": 9152 + }, + { + "epoch": 0.9655063291139241, + "grad_norm": 0.347059428691864, + "learning_rate": 2.3648343366950783e-05, + "loss": 1.8612, + "step": 9153 + }, + { + "epoch": 0.9656118143459915, + "grad_norm": 0.3537541627883911, + "learning_rate": 2.3615543757409166e-05, + "loss": 1.8614, + "step": 9154 + }, + { + "epoch": 0.9657172995780591, + "grad_norm": 0.33753150701522827, + "learning_rate": 2.3582789640034548e-05, + "loss": 1.8882, + "step": 9155 + }, + { + "epoch": 0.9658227848101266, + "grad_norm": 0.33066874742507935, + "learning_rate": 2.3550080951730548e-05, + "loss": 1.8489, + "step": 9156 + }, + { + "epoch": 0.9659282700421941, + "grad_norm": 0.33307749032974243, + "learning_rate": 2.3517417629488286e-05, + "loss": 1.8515, + "step": 9157 + }, + { + "epoch": 0.9660337552742616, + "grad_norm": 0.34847232699394226, + "learning_rate": 2.3484799610386246e-05, + "loss": 1.8784, + "step": 9158 + }, + { + "epoch": 0.9661392405063292, + "grad_norm": 0.33694198727607727, + "learning_rate": 2.3452226831590232e-05, + "loss": 1.8404, + "step": 9159 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.33692997694015503, + "learning_rate": 2.3419699230353144e-05, + "loss": 1.8699, + "step": 9160 + }, + { + "epoch": 0.9663502109704641, + "grad_norm": 0.33596575260162354, + "learning_rate": 2.338721674401494e-05, + "loss": 1.8358, + "step": 9161 + }, + { + "epoch": 0.9664556962025317, + "grad_norm": 0.3483578562736511, + "learning_rate": 2.3354779310002503e-05, + "loss": 1.8498, + "step": 9162 + }, + { + "epoch": 0.9665611814345991, + "grad_norm": 0.351825475692749, + "learning_rate": 2.3322386865829453e-05, + "loss": 1.8271, + "step": 9163 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 0.3601164221763611, + "learning_rate": 2.3290039349096127e-05, + "loss": 1.8822, + "step": 9164 + }, + { + "epoch": 0.9667721518987342, + "grad_norm": 0.36409690976142883, + "learning_rate": 2.325773669748937e-05, + "loss": 1.9143, + "step": 9165 + }, + { + "epoch": 0.9668776371308017, + "grad_norm": 0.3398037552833557, + "learning_rate": 2.3225478848782476e-05, + "loss": 1.8686, + "step": 9166 + }, + { + "epoch": 0.9669831223628692, + "grad_norm": 0.34204214811325073, + "learning_rate": 2.3193265740835058e-05, + "loss": 1.8842, + "step": 9167 + }, + { + "epoch": 0.9670886075949368, + "grad_norm": 0.3649356961250305, + "learning_rate": 2.3161097311592866e-05, + "loss": 1.8487, + "step": 9168 + }, + { + "epoch": 0.9671940928270042, + "grad_norm": 0.3352561295032501, + "learning_rate": 2.3128973499087785e-05, + "loss": 1.8868, + "step": 9169 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.35884878039360046, + "learning_rate": 2.3096894241437583e-05, + "loss": 1.8279, + "step": 9170 + }, + { + "epoch": 0.9674050632911393, + "grad_norm": 0.3521241545677185, + "learning_rate": 2.30648594768459e-05, + "loss": 1.8252, + "step": 9171 + }, + { + "epoch": 0.9675105485232067, + "grad_norm": 0.3493197560310364, + "learning_rate": 2.3032869143602086e-05, + "loss": 1.9078, + "step": 9172 + }, + { + "epoch": 0.9676160337552743, + "grad_norm": 0.36764538288116455, + "learning_rate": 2.3000923180081046e-05, + "loss": 1.878, + "step": 9173 + }, + { + "epoch": 0.9677215189873418, + "grad_norm": 0.34582391381263733, + "learning_rate": 2.29690215247432e-05, + "loss": 1.8739, + "step": 9174 + }, + { + "epoch": 0.9678270042194093, + "grad_norm": 0.3365190923213959, + "learning_rate": 2.293716411613428e-05, + "loss": 1.8458, + "step": 9175 + }, + { + "epoch": 0.9679324894514768, + "grad_norm": 0.35117581486701965, + "learning_rate": 2.2905350892885293e-05, + "loss": 1.8321, + "step": 9176 + }, + { + "epoch": 0.9680379746835444, + "grad_norm": 0.36640915274620056, + "learning_rate": 2.287358179371235e-05, + "loss": 1.8664, + "step": 9177 + }, + { + "epoch": 0.9681434599156118, + "grad_norm": 0.3518238067626953, + "learning_rate": 2.2841856757416538e-05, + "loss": 1.8811, + "step": 9178 + }, + { + "epoch": 0.9682489451476793, + "grad_norm": 0.3473042845726013, + "learning_rate": 2.2810175722883866e-05, + "loss": 1.8846, + "step": 9179 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.3670293688774109, + "learning_rate": 2.2778538629085057e-05, + "loss": 1.8476, + "step": 9180 + }, + { + "epoch": 0.9684599156118143, + "grad_norm": 0.3379354178905487, + "learning_rate": 2.2746945415075523e-05, + "loss": 1.8972, + "step": 9181 + }, + { + "epoch": 0.9685654008438819, + "grad_norm": 0.36218494176864624, + "learning_rate": 2.27153960199952e-05, + "loss": 1.8511, + "step": 9182 + }, + { + "epoch": 0.9686708860759494, + "grad_norm": 0.342084676027298, + "learning_rate": 2.26838903830684e-05, + "loss": 1.8065, + "step": 9183 + }, + { + "epoch": 0.9687763713080169, + "grad_norm": 0.34484466910362244, + "learning_rate": 2.2652428443603782e-05, + "loss": 1.8707, + "step": 9184 + }, + { + "epoch": 0.9688818565400844, + "grad_norm": 0.36007702350616455, + "learning_rate": 2.2621010140994126e-05, + "loss": 1.8646, + "step": 9185 + }, + { + "epoch": 0.9689873417721518, + "grad_norm": 0.3421363830566406, + "learning_rate": 2.2589635414716307e-05, + "loss": 1.8456, + "step": 9186 + }, + { + "epoch": 0.9690928270042194, + "grad_norm": 0.33778712153434753, + "learning_rate": 2.2558304204331152e-05, + "loss": 1.8851, + "step": 9187 + }, + { + "epoch": 0.9691983122362869, + "grad_norm": 0.33210989832878113, + "learning_rate": 2.2527016449483282e-05, + "loss": 1.8675, + "step": 9188 + }, + { + "epoch": 0.9693037974683544, + "grad_norm": 0.3445073068141937, + "learning_rate": 2.2495772089901067e-05, + "loss": 1.8936, + "step": 9189 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.3434637486934662, + "learning_rate": 2.2464571065396428e-05, + "loss": 1.8551, + "step": 9190 + }, + { + "epoch": 0.9695147679324895, + "grad_norm": 0.3680356442928314, + "learning_rate": 2.2433413315864803e-05, + "loss": 1.9058, + "step": 9191 + }, + { + "epoch": 0.9696202531645569, + "grad_norm": 0.3402303159236908, + "learning_rate": 2.2402298781284995e-05, + "loss": 1.8516, + "step": 9192 + }, + { + "epoch": 0.9697257383966245, + "grad_norm": 0.3673950433731079, + "learning_rate": 2.2371227401719017e-05, + "loss": 1.8289, + "step": 9193 + }, + { + "epoch": 0.969831223628692, + "grad_norm": 0.34952113032341003, + "learning_rate": 2.2340199117312058e-05, + "loss": 1.8697, + "step": 9194 + }, + { + "epoch": 0.9699367088607594, + "grad_norm": 0.3520772457122803, + "learning_rate": 2.2309213868292277e-05, + "loss": 1.8639, + "step": 9195 + }, + { + "epoch": 0.970042194092827, + "grad_norm": 0.3529227674007416, + "learning_rate": 2.2278271594970783e-05, + "loss": 1.8502, + "step": 9196 + }, + { + "epoch": 0.9701476793248945, + "grad_norm": 0.35676705837249756, + "learning_rate": 2.2247372237741457e-05, + "loss": 1.8513, + "step": 9197 + }, + { + "epoch": 0.970253164556962, + "grad_norm": 0.3451744318008423, + "learning_rate": 2.2216515737080818e-05, + "loss": 1.8274, + "step": 9198 + }, + { + "epoch": 0.9703586497890295, + "grad_norm": 0.3666698634624481, + "learning_rate": 2.218570203354799e-05, + "loss": 1.8997, + "step": 9199 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.34826409816741943, + "learning_rate": 2.2154931067784525e-05, + "loss": 1.834, + "step": 9200 + }, + { + "epoch": 0.9705696202531645, + "grad_norm": 0.3622470498085022, + "learning_rate": 2.2124202780514277e-05, + "loss": 1.8729, + "step": 9201 + }, + { + "epoch": 0.9706751054852321, + "grad_norm": 0.34943243861198425, + "learning_rate": 2.2093517112543358e-05, + "loss": 1.8783, + "step": 9202 + }, + { + "epoch": 0.9707805907172996, + "grad_norm": 0.3403840959072113, + "learning_rate": 2.2062874004759936e-05, + "loss": 1.8207, + "step": 9203 + }, + { + "epoch": 0.970886075949367, + "grad_norm": 0.34276002645492554, + "learning_rate": 2.20322733981342e-05, + "loss": 1.877, + "step": 9204 + }, + { + "epoch": 0.9709915611814346, + "grad_norm": 0.33109724521636963, + "learning_rate": 2.200171523371821e-05, + "loss": 1.8756, + "step": 9205 + }, + { + "epoch": 0.9710970464135021, + "grad_norm": 0.3369627892971039, + "learning_rate": 2.197119945264576e-05, + "loss": 1.8364, + "step": 9206 + }, + { + "epoch": 0.9712025316455696, + "grad_norm": 0.34415751695632935, + "learning_rate": 2.1940725996132308e-05, + "loss": 1.8878, + "step": 9207 + }, + { + "epoch": 0.9713080168776371, + "grad_norm": 0.34583672881126404, + "learning_rate": 2.1910294805474834e-05, + "loss": 1.8423, + "step": 9208 + }, + { + "epoch": 0.9714135021097047, + "grad_norm": 0.34103649854660034, + "learning_rate": 2.187990582205175e-05, + "loss": 1.8498, + "step": 9209 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.3323943316936493, + "learning_rate": 2.1849558987322783e-05, + "loss": 1.8498, + "step": 9210 + }, + { + "epoch": 0.9716244725738397, + "grad_norm": 0.3529103696346283, + "learning_rate": 2.1819254242828815e-05, + "loss": 1.8792, + "step": 9211 + }, + { + "epoch": 0.9717299578059072, + "grad_norm": 0.33641302585601807, + "learning_rate": 2.1788991530191857e-05, + "loss": 1.8768, + "step": 9212 + }, + { + "epoch": 0.9718354430379746, + "grad_norm": 0.3476641774177551, + "learning_rate": 2.1758770791114845e-05, + "loss": 1.8837, + "step": 9213 + }, + { + "epoch": 0.9719409282700422, + "grad_norm": 0.34371325373649597, + "learning_rate": 2.17285919673816e-05, + "loss": 1.8561, + "step": 9214 + }, + { + "epoch": 0.9720464135021097, + "grad_norm": 0.3393658399581909, + "learning_rate": 2.1698455000856692e-05, + "loss": 1.8575, + "step": 9215 + }, + { + "epoch": 0.9721518987341772, + "grad_norm": 0.3372701108455658, + "learning_rate": 2.1668359833485287e-05, + "loss": 1.8509, + "step": 9216 + }, + { + "epoch": 0.9722573839662447, + "grad_norm": 0.3600403070449829, + "learning_rate": 2.1638306407293122e-05, + "loss": 1.842, + "step": 9217 + }, + { + "epoch": 0.9723628691983123, + "grad_norm": 0.33725452423095703, + "learning_rate": 2.160829466438629e-05, + "loss": 1.8276, + "step": 9218 + }, + { + "epoch": 0.9724683544303797, + "grad_norm": 0.3824012577533722, + "learning_rate": 2.157832454695122e-05, + "loss": 1.8531, + "step": 9219 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.3580176532268524, + "learning_rate": 2.1548395997254516e-05, + "loss": 1.8642, + "step": 9220 + }, + { + "epoch": 0.9726793248945148, + "grad_norm": 0.34384259581565857, + "learning_rate": 2.151850895764285e-05, + "loss": 1.8646, + "step": 9221 + }, + { + "epoch": 0.9727848101265822, + "grad_norm": 0.3495957553386688, + "learning_rate": 2.148866337054287e-05, + "loss": 1.8406, + "step": 9222 + }, + { + "epoch": 0.9728902953586498, + "grad_norm": 0.33934488892555237, + "learning_rate": 2.145885917846105e-05, + "loss": 1.8763, + "step": 9223 + }, + { + "epoch": 0.9729957805907173, + "grad_norm": 0.35417357087135315, + "learning_rate": 2.1429096323983638e-05, + "loss": 1.8476, + "step": 9224 + }, + { + "epoch": 0.9731012658227848, + "grad_norm": 0.32977744936943054, + "learning_rate": 2.1399374749776512e-05, + "loss": 1.8598, + "step": 9225 + }, + { + "epoch": 0.9732067510548523, + "grad_norm": 0.34297987818717957, + "learning_rate": 2.1369694398585035e-05, + "loss": 1.8543, + "step": 9226 + }, + { + "epoch": 0.9733122362869199, + "grad_norm": 0.33991631865501404, + "learning_rate": 2.1340055213234025e-05, + "loss": 1.8346, + "step": 9227 + }, + { + "epoch": 0.9734177215189873, + "grad_norm": 0.34152674674987793, + "learning_rate": 2.131045713662756e-05, + "loss": 1.8636, + "step": 9228 + }, + { + "epoch": 0.9735232067510549, + "grad_norm": 0.33495378494262695, + "learning_rate": 2.1280900111748943e-05, + "loss": 1.8296, + "step": 9229 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.33965790271759033, + "learning_rate": 2.1251384081660546e-05, + "loss": 1.8714, + "step": 9230 + }, + { + "epoch": 0.9737341772151898, + "grad_norm": 0.33765530586242676, + "learning_rate": 2.12219089895037e-05, + "loss": 1.8768, + "step": 9231 + }, + { + "epoch": 0.9738396624472574, + "grad_norm": 0.3515378534793854, + "learning_rate": 2.1192474778498613e-05, + "loss": 1.8174, + "step": 9232 + }, + { + "epoch": 0.9739451476793249, + "grad_norm": 0.333644300699234, + "learning_rate": 2.1163081391944224e-05, + "loss": 1.865, + "step": 9233 + }, + { + "epoch": 0.9740506329113924, + "grad_norm": 0.33684906363487244, + "learning_rate": 2.1133728773218143e-05, + "loss": 1.8483, + "step": 9234 + }, + { + "epoch": 0.9741561181434599, + "grad_norm": 0.36363327503204346, + "learning_rate": 2.1104416865776507e-05, + "loss": 1.852, + "step": 9235 + }, + { + "epoch": 0.9742616033755275, + "grad_norm": 0.3428443670272827, + "learning_rate": 2.1075145613153853e-05, + "loss": 1.8417, + "step": 9236 + }, + { + "epoch": 0.9743670886075949, + "grad_norm": 0.3512275516986847, + "learning_rate": 2.104591495896307e-05, + "loss": 1.8583, + "step": 9237 + }, + { + "epoch": 0.9744725738396625, + "grad_norm": 0.3371615409851074, + "learning_rate": 2.1016724846895213e-05, + "loss": 1.8633, + "step": 9238 + }, + { + "epoch": 0.97457805907173, + "grad_norm": 0.3480895757675171, + "learning_rate": 2.0987575220719476e-05, + "loss": 1.7989, + "step": 9239 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.34177345037460327, + "learning_rate": 2.0958466024283035e-05, + "loss": 1.8495, + "step": 9240 + }, + { + "epoch": 0.974789029535865, + "grad_norm": 0.32777905464172363, + "learning_rate": 2.092939720151092e-05, + "loss": 1.8175, + "step": 9241 + }, + { + "epoch": 0.9748945147679325, + "grad_norm": 0.3448660373687744, + "learning_rate": 2.090036869640596e-05, + "loss": 1.8695, + "step": 9242 + }, + { + "epoch": 0.975, + "grad_norm": 0.33424440026283264, + "learning_rate": 2.0871380453048667e-05, + "loss": 1.85, + "step": 9243 + }, + { + "epoch": 0.9751054852320675, + "grad_norm": 0.3440498113632202, + "learning_rate": 2.0842432415597064e-05, + "loss": 1.8699, + "step": 9244 + }, + { + "epoch": 0.9752109704641351, + "grad_norm": 0.33712127804756165, + "learning_rate": 2.0813524528286672e-05, + "loss": 1.8219, + "step": 9245 + }, + { + "epoch": 0.9753164556962025, + "grad_norm": 0.35036420822143555, + "learning_rate": 2.0784656735430323e-05, + "loss": 1.8695, + "step": 9246 + }, + { + "epoch": 0.9754219409282701, + "grad_norm": 0.3414463400840759, + "learning_rate": 2.07558289814181e-05, + "loss": 1.9045, + "step": 9247 + }, + { + "epoch": 0.9755274261603376, + "grad_norm": 0.33413198590278625, + "learning_rate": 2.0727041210717232e-05, + "loss": 1.8572, + "step": 9248 + }, + { + "epoch": 0.975632911392405, + "grad_norm": 0.338277667760849, + "learning_rate": 2.069829336787193e-05, + "loss": 1.875, + "step": 9249 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.3387382924556732, + "learning_rate": 2.0669585397503362e-05, + "loss": 1.8756, + "step": 9250 + }, + { + "epoch": 0.97584388185654, + "grad_norm": 0.3395342528820038, + "learning_rate": 2.064091724430947e-05, + "loss": 1.8473, + "step": 9251 + }, + { + "epoch": 0.9759493670886076, + "grad_norm": 0.33110806345939636, + "learning_rate": 2.061228885306492e-05, + "loss": 1.8643, + "step": 9252 + }, + { + "epoch": 0.9760548523206751, + "grad_norm": 0.35168755054473877, + "learning_rate": 2.0583700168620984e-05, + "loss": 1.8487, + "step": 9253 + }, + { + "epoch": 0.9761603375527426, + "grad_norm": 0.3380931317806244, + "learning_rate": 2.055515113590538e-05, + "loss": 1.878, + "step": 9254 + }, + { + "epoch": 0.9762658227848101, + "grad_norm": 0.34676098823547363, + "learning_rate": 2.0526641699922274e-05, + "loss": 1.8374, + "step": 9255 + }, + { + "epoch": 0.9763713080168777, + "grad_norm": 0.3332047164440155, + "learning_rate": 2.0498171805752038e-05, + "loss": 1.8399, + "step": 9256 + }, + { + "epoch": 0.9764767932489451, + "grad_norm": 0.343569815158844, + "learning_rate": 2.0469741398551272e-05, + "loss": 1.889, + "step": 9257 + }, + { + "epoch": 0.9765822784810126, + "grad_norm": 0.3371160328388214, + "learning_rate": 2.0441350423552625e-05, + "loss": 1.8545, + "step": 9258 + }, + { + "epoch": 0.9766877637130802, + "grad_norm": 0.34053486585617065, + "learning_rate": 2.0412998826064695e-05, + "loss": 1.866, + "step": 9259 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.3378836214542389, + "learning_rate": 2.0384686551471954e-05, + "loss": 1.8419, + "step": 9260 + }, + { + "epoch": 0.9768987341772152, + "grad_norm": 0.33850640058517456, + "learning_rate": 2.0356413545234603e-05, + "loss": 1.8684, + "step": 9261 + }, + { + "epoch": 0.9770042194092827, + "grad_norm": 0.34430673718452454, + "learning_rate": 2.0328179752888504e-05, + "loss": 1.8755, + "step": 9262 + }, + { + "epoch": 0.9771097046413502, + "grad_norm": 0.34346941113471985, + "learning_rate": 2.029998512004507e-05, + "loss": 1.8741, + "step": 9263 + }, + { + "epoch": 0.9772151898734177, + "grad_norm": 0.32694578170776367, + "learning_rate": 2.0271829592391113e-05, + "loss": 1.8898, + "step": 9264 + }, + { + "epoch": 0.9773206751054853, + "grad_norm": 0.33796319365501404, + "learning_rate": 2.0243713115688823e-05, + "loss": 1.8378, + "step": 9265 + }, + { + "epoch": 0.9774261603375527, + "grad_norm": 0.34139397740364075, + "learning_rate": 2.021563563577556e-05, + "loss": 1.8777, + "step": 9266 + }, + { + "epoch": 0.9775316455696202, + "grad_norm": 0.348297655582428, + "learning_rate": 2.0187597098563862e-05, + "loss": 1.8502, + "step": 9267 + }, + { + "epoch": 0.9776371308016878, + "grad_norm": 0.35472896695137024, + "learning_rate": 2.0159597450041257e-05, + "loss": 1.8743, + "step": 9268 + }, + { + "epoch": 0.9777426160337552, + "grad_norm": 0.35070112347602844, + "learning_rate": 2.0131636636270178e-05, + "loss": 1.8604, + "step": 9269 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.3313450217247009, + "learning_rate": 2.0103714603387898e-05, + "loss": 1.858, + "step": 9270 + }, + { + "epoch": 0.9779535864978903, + "grad_norm": 0.3511632978916168, + "learning_rate": 2.0075831297606357e-05, + "loss": 1.8257, + "step": 9271 + }, + { + "epoch": 0.9780590717299578, + "grad_norm": 0.33973148465156555, + "learning_rate": 2.004798666521213e-05, + "loss": 1.8607, + "step": 9272 + }, + { + "epoch": 0.9781645569620253, + "grad_norm": 0.34097686409950256, + "learning_rate": 2.0020180652566292e-05, + "loss": 1.8788, + "step": 9273 + }, + { + "epoch": 0.9782700421940929, + "grad_norm": 0.3419921398162842, + "learning_rate": 1.999241320610428e-05, + "loss": 1.8712, + "step": 9274 + }, + { + "epoch": 0.9783755274261603, + "grad_norm": 0.34352585673332214, + "learning_rate": 1.996468427233586e-05, + "loss": 1.8789, + "step": 9275 + }, + { + "epoch": 0.9784810126582278, + "grad_norm": 0.3554772734642029, + "learning_rate": 1.9936993797844958e-05, + "loss": 1.8498, + "step": 9276 + }, + { + "epoch": 0.9785864978902954, + "grad_norm": 0.33388552069664, + "learning_rate": 1.9909341729289613e-05, + "loss": 1.8295, + "step": 9277 + }, + { + "epoch": 0.9786919831223628, + "grad_norm": 0.3440462648868561, + "learning_rate": 1.9881728013401843e-05, + "loss": 1.8847, + "step": 9278 + }, + { + "epoch": 0.9787974683544304, + "grad_norm": 0.33917078375816345, + "learning_rate": 1.9854152596987523e-05, + "loss": 1.8847, + "step": 9279 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.36574819684028625, + "learning_rate": 1.9826615426926342e-05, + "loss": 1.855, + "step": 9280 + }, + { + "epoch": 0.9790084388185654, + "grad_norm": 0.3423081934452057, + "learning_rate": 1.9799116450171627e-05, + "loss": 1.8603, + "step": 9281 + }, + { + "epoch": 0.9791139240506329, + "grad_norm": 0.3349783718585968, + "learning_rate": 1.9771655613750312e-05, + "loss": 1.8537, + "step": 9282 + }, + { + "epoch": 0.9792194092827005, + "grad_norm": 0.33532091975212097, + "learning_rate": 1.9744232864762798e-05, + "loss": 1.8461, + "step": 9283 + }, + { + "epoch": 0.9793248945147679, + "grad_norm": 0.3299511671066284, + "learning_rate": 1.971684815038283e-05, + "loss": 1.8589, + "step": 9284 + }, + { + "epoch": 0.9794303797468354, + "grad_norm": 0.36666175723075867, + "learning_rate": 1.9689501417857458e-05, + "loss": 1.8732, + "step": 9285 + }, + { + "epoch": 0.979535864978903, + "grad_norm": 0.35109445452690125, + "learning_rate": 1.9662192614506883e-05, + "loss": 1.8515, + "step": 9286 + }, + { + "epoch": 0.9796413502109704, + "grad_norm": 0.3431878685951233, + "learning_rate": 1.9634921687724354e-05, + "loss": 1.8711, + "step": 9287 + }, + { + "epoch": 0.979746835443038, + "grad_norm": 0.3408137261867523, + "learning_rate": 1.960768858497612e-05, + "loss": 1.853, + "step": 9288 + }, + { + "epoch": 0.9798523206751055, + "grad_norm": 0.35432669520378113, + "learning_rate": 1.9580493253801253e-05, + "loss": 1.8454, + "step": 9289 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.3379450738430023, + "learning_rate": 1.9553335641811623e-05, + "loss": 1.8726, + "step": 9290 + }, + { + "epoch": 0.9800632911392405, + "grad_norm": 0.3501506447792053, + "learning_rate": 1.952621569669175e-05, + "loss": 1.8312, + "step": 9291 + }, + { + "epoch": 0.9801687763713081, + "grad_norm": 0.3300964832305908, + "learning_rate": 1.9499133366198684e-05, + "loss": 1.8316, + "step": 9292 + }, + { + "epoch": 0.9802742616033755, + "grad_norm": 0.34597048163414, + "learning_rate": 1.947208859816199e-05, + "loss": 1.8677, + "step": 9293 + }, + { + "epoch": 0.980379746835443, + "grad_norm": 0.36292174458503723, + "learning_rate": 1.9445081340483534e-05, + "loss": 1.879, + "step": 9294 + }, + { + "epoch": 0.9804852320675106, + "grad_norm": 0.34402695298194885, + "learning_rate": 1.9418111541137484e-05, + "loss": 1.8721, + "step": 9295 + }, + { + "epoch": 0.980590717299578, + "grad_norm": 0.33737698197364807, + "learning_rate": 1.939117914817016e-05, + "loss": 1.8509, + "step": 9296 + }, + { + "epoch": 0.9806962025316456, + "grad_norm": 0.32821154594421387, + "learning_rate": 1.936428410969991e-05, + "loss": 1.8544, + "step": 9297 + }, + { + "epoch": 0.9808016877637131, + "grad_norm": 0.32792726159095764, + "learning_rate": 1.933742637391708e-05, + "loss": 1.8909, + "step": 9298 + }, + { + "epoch": 0.9809071729957806, + "grad_norm": 0.3300624191761017, + "learning_rate": 1.9310605889083842e-05, + "loss": 1.8546, + "step": 9299 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.3471886217594147, + "learning_rate": 1.9283822603534143e-05, + "loss": 1.8547, + "step": 9300 + }, + { + "epoch": 0.9811181434599157, + "grad_norm": 0.34817737340927124, + "learning_rate": 1.9257076465673605e-05, + "loss": 1.8404, + "step": 9301 + }, + { + "epoch": 0.9812236286919831, + "grad_norm": 0.3317491114139557, + "learning_rate": 1.923036742397937e-05, + "loss": 1.8663, + "step": 9302 + }, + { + "epoch": 0.9813291139240506, + "grad_norm": 0.33372050523757935, + "learning_rate": 1.9203695427000086e-05, + "loss": 1.8735, + "step": 9303 + }, + { + "epoch": 0.9814345991561182, + "grad_norm": 0.34196174144744873, + "learning_rate": 1.9177060423355717e-05, + "loss": 1.8379, + "step": 9304 + }, + { + "epoch": 0.9815400843881856, + "grad_norm": 0.34965047240257263, + "learning_rate": 1.9150462361737524e-05, + "loss": 1.878, + "step": 9305 + }, + { + "epoch": 0.9816455696202532, + "grad_norm": 0.33127862215042114, + "learning_rate": 1.912390119090793e-05, + "loss": 1.8557, + "step": 9306 + }, + { + "epoch": 0.9817510548523207, + "grad_norm": 0.33724671602249146, + "learning_rate": 1.909737685970039e-05, + "loss": 1.8867, + "step": 9307 + }, + { + "epoch": 0.9818565400843882, + "grad_norm": 0.3395964801311493, + "learning_rate": 1.9070889317019377e-05, + "loss": 1.869, + "step": 9308 + }, + { + "epoch": 0.9819620253164557, + "grad_norm": 0.3334942162036896, + "learning_rate": 1.904443851184018e-05, + "loss": 1.8213, + "step": 9309 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.33430004119873047, + "learning_rate": 1.90180243932089e-05, + "loss": 1.8701, + "step": 9310 + }, + { + "epoch": 0.9821729957805907, + "grad_norm": 0.33186525106430054, + "learning_rate": 1.899164691024229e-05, + "loss": 1.8514, + "step": 9311 + }, + { + "epoch": 0.9822784810126582, + "grad_norm": 0.33556532859802246, + "learning_rate": 1.8965306012127665e-05, + "loss": 1.8496, + "step": 9312 + }, + { + "epoch": 0.9823839662447258, + "grad_norm": 0.3295688033103943, + "learning_rate": 1.8939001648122847e-05, + "loss": 1.8482, + "step": 9313 + }, + { + "epoch": 0.9824894514767932, + "grad_norm": 0.33873438835144043, + "learning_rate": 1.8912733767556005e-05, + "loss": 1.8979, + "step": 9314 + }, + { + "epoch": 0.9825949367088608, + "grad_norm": 0.33189865946769714, + "learning_rate": 1.8886502319825606e-05, + "loss": 1.8459, + "step": 9315 + }, + { + "epoch": 0.9827004219409282, + "grad_norm": 0.34250855445861816, + "learning_rate": 1.8860307254400307e-05, + "loss": 1.8854, + "step": 9316 + }, + { + "epoch": 0.9828059071729958, + "grad_norm": 0.334346741437912, + "learning_rate": 1.883414852081882e-05, + "loss": 1.8317, + "step": 9317 + }, + { + "epoch": 0.9829113924050633, + "grad_norm": 0.34735268354415894, + "learning_rate": 1.8808026068689887e-05, + "loss": 1.8484, + "step": 9318 + }, + { + "epoch": 0.9830168776371307, + "grad_norm": 0.35390427708625793, + "learning_rate": 1.87819398476921e-05, + "loss": 1.8472, + "step": 9319 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.3478325307369232, + "learning_rate": 1.8755889807573868e-05, + "loss": 1.8548, + "step": 9320 + }, + { + "epoch": 0.9832278481012658, + "grad_norm": 0.32304227352142334, + "learning_rate": 1.872987589815331e-05, + "loss": 1.8726, + "step": 9321 + }, + { + "epoch": 0.9833333333333333, + "grad_norm": 0.34797561168670654, + "learning_rate": 1.870389806931811e-05, + "loss": 1.871, + "step": 9322 + }, + { + "epoch": 0.9834388185654008, + "grad_norm": 0.33498889207839966, + "learning_rate": 1.8677956271025497e-05, + "loss": 1.8941, + "step": 9323 + }, + { + "epoch": 0.9835443037974684, + "grad_norm": 0.3283692002296448, + "learning_rate": 1.865205045330207e-05, + "loss": 1.8633, + "step": 9324 + }, + { + "epoch": 0.9836497890295358, + "grad_norm": 0.33523431420326233, + "learning_rate": 1.8626180566243758e-05, + "loss": 1.853, + "step": 9325 + }, + { + "epoch": 0.9837552742616034, + "grad_norm": 0.3430776298046112, + "learning_rate": 1.8600346560015723e-05, + "loss": 1.8688, + "step": 9326 + }, + { + "epoch": 0.9838607594936709, + "grad_norm": 0.33896490931510925, + "learning_rate": 1.8574548384852206e-05, + "loss": 1.8832, + "step": 9327 + }, + { + "epoch": 0.9839662447257383, + "grad_norm": 0.3627791404724121, + "learning_rate": 1.8548785991056514e-05, + "loss": 1.8327, + "step": 9328 + }, + { + "epoch": 0.9840717299578059, + "grad_norm": 0.3586136996746063, + "learning_rate": 1.8523059329000848e-05, + "loss": 1.8238, + "step": 9329 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.3476429879665375, + "learning_rate": 1.8497368349126255e-05, + "loss": 1.8363, + "step": 9330 + }, + { + "epoch": 0.9842827004219409, + "grad_norm": 0.3432973623275757, + "learning_rate": 1.8471713001942538e-05, + "loss": 1.8542, + "step": 9331 + }, + { + "epoch": 0.9843881856540084, + "grad_norm": 0.35546791553497314, + "learning_rate": 1.84460932380281e-05, + "loss": 1.8365, + "step": 9332 + }, + { + "epoch": 0.984493670886076, + "grad_norm": 0.34411630034446716, + "learning_rate": 1.842050900802993e-05, + "loss": 1.8764, + "step": 9333 + }, + { + "epoch": 0.9845991561181434, + "grad_norm": 0.3430240750312805, + "learning_rate": 1.8394960262663446e-05, + "loss": 1.8818, + "step": 9334 + }, + { + "epoch": 0.984704641350211, + "grad_norm": 0.3382391631603241, + "learning_rate": 1.8369446952712427e-05, + "loss": 1.8243, + "step": 9335 + }, + { + "epoch": 0.9848101265822785, + "grad_norm": 0.3380090892314911, + "learning_rate": 1.834396902902892e-05, + "loss": 1.8638, + "step": 9336 + }, + { + "epoch": 0.984915611814346, + "grad_norm": 0.34247472882270813, + "learning_rate": 1.8318526442533124e-05, + "loss": 1.8589, + "step": 9337 + }, + { + "epoch": 0.9850210970464135, + "grad_norm": 0.361458420753479, + "learning_rate": 1.8293119144213324e-05, + "loss": 1.863, + "step": 9338 + }, + { + "epoch": 0.985126582278481, + "grad_norm": 0.343313068151474, + "learning_rate": 1.826774708512579e-05, + "loss": 1.8667, + "step": 9339 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.33349061012268066, + "learning_rate": 1.824241021639465e-05, + "loss": 1.8804, + "step": 9340 + }, + { + "epoch": 0.985337552742616, + "grad_norm": 0.35401806235313416, + "learning_rate": 1.8217108489211845e-05, + "loss": 1.8904, + "step": 9341 + }, + { + "epoch": 0.9854430379746836, + "grad_norm": 0.3491126298904419, + "learning_rate": 1.8191841854836994e-05, + "loss": 1.8801, + "step": 9342 + }, + { + "epoch": 0.985548523206751, + "grad_norm": 0.3550468385219574, + "learning_rate": 1.8166610264597328e-05, + "loss": 1.9005, + "step": 9343 + }, + { + "epoch": 0.9856540084388186, + "grad_norm": 0.33815476298332214, + "learning_rate": 1.8141413669887598e-05, + "loss": 1.8487, + "step": 9344 + }, + { + "epoch": 0.9857594936708861, + "grad_norm": 0.34806492924690247, + "learning_rate": 1.8116252022169935e-05, + "loss": 1.8686, + "step": 9345 + }, + { + "epoch": 0.9858649789029535, + "grad_norm": 0.3504815399646759, + "learning_rate": 1.809112527297383e-05, + "loss": 1.8969, + "step": 9346 + }, + { + "epoch": 0.9859704641350211, + "grad_norm": 0.3479999899864197, + "learning_rate": 1.8066033373895962e-05, + "loss": 1.8592, + "step": 9347 + }, + { + "epoch": 0.9860759493670886, + "grad_norm": 0.3531075119972229, + "learning_rate": 1.804097627660017e-05, + "loss": 1.8843, + "step": 9348 + }, + { + "epoch": 0.9861814345991561, + "grad_norm": 0.3518960773944855, + "learning_rate": 1.8015953932817347e-05, + "loss": 1.8817, + "step": 9349 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.3409340977668762, + "learning_rate": 1.799096629434529e-05, + "loss": 1.8575, + "step": 9350 + }, + { + "epoch": 0.9863924050632912, + "grad_norm": 0.35102900862693787, + "learning_rate": 1.7966013313048696e-05, + "loss": 1.893, + "step": 9351 + }, + { + "epoch": 0.9864978902953586, + "grad_norm": 0.3568642735481262, + "learning_rate": 1.794109494085898e-05, + "loss": 1.8523, + "step": 9352 + }, + { + "epoch": 0.9866033755274262, + "grad_norm": 0.3641592562198639, + "learning_rate": 1.7916211129774273e-05, + "loss": 1.8826, + "step": 9353 + }, + { + "epoch": 0.9867088607594937, + "grad_norm": 0.3534960448741913, + "learning_rate": 1.7891361831859263e-05, + "loss": 1.8452, + "step": 9354 + }, + { + "epoch": 0.9868143459915611, + "grad_norm": 0.33696672320365906, + "learning_rate": 1.78665469992451e-05, + "loss": 1.884, + "step": 9355 + }, + { + "epoch": 0.9869198312236287, + "grad_norm": 0.33279168605804443, + "learning_rate": 1.7841766584129377e-05, + "loss": 1.8483, + "step": 9356 + }, + { + "epoch": 0.9870253164556962, + "grad_norm": 0.3424586355686188, + "learning_rate": 1.7817020538775933e-05, + "loss": 1.8594, + "step": 9357 + }, + { + "epoch": 0.9871308016877637, + "grad_norm": 0.3284037411212921, + "learning_rate": 1.779230881551485e-05, + "loss": 1.8428, + "step": 9358 + }, + { + "epoch": 0.9872362869198312, + "grad_norm": 0.35381191968917847, + "learning_rate": 1.7767631366742332e-05, + "loss": 1.889, + "step": 9359 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.3572961688041687, + "learning_rate": 1.7742988144920578e-05, + "loss": 1.8679, + "step": 9360 + }, + { + "epoch": 0.9874472573839662, + "grad_norm": 0.3324790298938751, + "learning_rate": 1.7718379102577752e-05, + "loss": 1.8648, + "step": 9361 + }, + { + "epoch": 0.9875527426160338, + "grad_norm": 0.35882535576820374, + "learning_rate": 1.7693804192307827e-05, + "loss": 1.8745, + "step": 9362 + }, + { + "epoch": 0.9876582278481013, + "grad_norm": 0.3438187837600708, + "learning_rate": 1.7669263366770554e-05, + "loss": 1.8705, + "step": 9363 + }, + { + "epoch": 0.9877637130801687, + "grad_norm": 0.3615082800388336, + "learning_rate": 1.7644756578691348e-05, + "loss": 1.8458, + "step": 9364 + }, + { + "epoch": 0.9878691983122363, + "grad_norm": 0.3598630726337433, + "learning_rate": 1.7620283780861163e-05, + "loss": 1.8721, + "step": 9365 + }, + { + "epoch": 0.9879746835443038, + "grad_norm": 0.33856022357940674, + "learning_rate": 1.759584492613646e-05, + "loss": 1.864, + "step": 9366 + }, + { + "epoch": 0.9880801687763713, + "grad_norm": 0.34512022137641907, + "learning_rate": 1.757143996743906e-05, + "loss": 1.8282, + "step": 9367 + }, + { + "epoch": 0.9881856540084388, + "grad_norm": 0.3401206433773041, + "learning_rate": 1.75470688577561e-05, + "loss": 1.8917, + "step": 9368 + }, + { + "epoch": 0.9882911392405064, + "grad_norm": 0.36084699630737305, + "learning_rate": 1.7522731550139926e-05, + "loss": 1.8575, + "step": 9369 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.3503566384315491, + "learning_rate": 1.7498427997707978e-05, + "loss": 1.887, + "step": 9370 + }, + { + "epoch": 0.9885021097046414, + "grad_norm": 0.3366674780845642, + "learning_rate": 1.7474158153642745e-05, + "loss": 1.8468, + "step": 9371 + }, + { + "epoch": 0.9886075949367089, + "grad_norm": 0.33488473296165466, + "learning_rate": 1.744992197119162e-05, + "loss": 1.8452, + "step": 9372 + }, + { + "epoch": 0.9887130801687763, + "grad_norm": 0.33648014068603516, + "learning_rate": 1.7425719403666873e-05, + "loss": 1.8647, + "step": 9373 + }, + { + "epoch": 0.9888185654008439, + "grad_norm": 0.3448355495929718, + "learning_rate": 1.7401550404445523e-05, + "loss": 1.8014, + "step": 9374 + }, + { + "epoch": 0.9889240506329114, + "grad_norm": 0.3537713885307312, + "learning_rate": 1.737741492696922e-05, + "loss": 1.8779, + "step": 9375 + }, + { + "epoch": 0.9890295358649789, + "grad_norm": 0.3461473882198334, + "learning_rate": 1.735331292474423e-05, + "loss": 1.8348, + "step": 9376 + }, + { + "epoch": 0.9891350210970464, + "grad_norm": 0.34537625312805176, + "learning_rate": 1.73292443513413e-05, + "loss": 1.8248, + "step": 9377 + }, + { + "epoch": 0.989240506329114, + "grad_norm": 0.33209657669067383, + "learning_rate": 1.730520916039554e-05, + "loss": 1.8858, + "step": 9378 + }, + { + "epoch": 0.9893459915611814, + "grad_norm": 0.33861079812049866, + "learning_rate": 1.728120730560641e-05, + "loss": 1.8709, + "step": 9379 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.3344067931175232, + "learning_rate": 1.7257238740737548e-05, + "loss": 1.8476, + "step": 9380 + }, + { + "epoch": 0.9895569620253165, + "grad_norm": 0.3357123136520386, + "learning_rate": 1.7233303419616745e-05, + "loss": 1.8222, + "step": 9381 + }, + { + "epoch": 0.989662447257384, + "grad_norm": 0.3379712998867035, + "learning_rate": 1.720940129613584e-05, + "loss": 1.8706, + "step": 9382 + }, + { + "epoch": 0.9897679324894515, + "grad_norm": 0.34633803367614746, + "learning_rate": 1.718553232425059e-05, + "loss": 1.8736, + "step": 9383 + }, + { + "epoch": 0.9898734177215189, + "grad_norm": 0.34637391567230225, + "learning_rate": 1.7161696457980646e-05, + "loss": 1.8566, + "step": 9384 + }, + { + "epoch": 0.9899789029535865, + "grad_norm": 0.33003056049346924, + "learning_rate": 1.7137893651409406e-05, + "loss": 1.8705, + "step": 9385 + }, + { + "epoch": 0.990084388185654, + "grad_norm": 0.3326970338821411, + "learning_rate": 1.7114123858683976e-05, + "loss": 1.8602, + "step": 9386 + }, + { + "epoch": 0.9901898734177215, + "grad_norm": 0.344705194234848, + "learning_rate": 1.7090387034015054e-05, + "loss": 1.8674, + "step": 9387 + }, + { + "epoch": 0.990295358649789, + "grad_norm": 0.3386549949645996, + "learning_rate": 1.7066683131676825e-05, + "loss": 1.8437, + "step": 9388 + }, + { + "epoch": 0.9904008438818566, + "grad_norm": 0.36277276277542114, + "learning_rate": 1.704301210600693e-05, + "loss": 1.8487, + "step": 9389 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.36110928654670715, + "learning_rate": 1.7019373911406307e-05, + "loss": 1.8541, + "step": 9390 + }, + { + "epoch": 0.9906118143459915, + "grad_norm": 0.3304116725921631, + "learning_rate": 1.699576850233916e-05, + "loss": 1.8716, + "step": 9391 + }, + { + "epoch": 0.9907172995780591, + "grad_norm": 0.3346260190010071, + "learning_rate": 1.697219583333286e-05, + "loss": 1.8657, + "step": 9392 + }, + { + "epoch": 0.9908227848101265, + "grad_norm": 0.3336678445339203, + "learning_rate": 1.694865585897781e-05, + "loss": 1.841, + "step": 9393 + }, + { + "epoch": 0.9909282700421941, + "grad_norm": 0.3523908257484436, + "learning_rate": 1.6925148533927435e-05, + "loss": 1.8381, + "step": 9394 + }, + { + "epoch": 0.9910337552742616, + "grad_norm": 0.34313276410102844, + "learning_rate": 1.690167381289802e-05, + "loss": 1.9001, + "step": 9395 + }, + { + "epoch": 0.9911392405063291, + "grad_norm": 0.32971101999282837, + "learning_rate": 1.6878231650668686e-05, + "loss": 1.8451, + "step": 9396 + }, + { + "epoch": 0.9912447257383966, + "grad_norm": 0.3400001525878906, + "learning_rate": 1.6854822002081265e-05, + "loss": 1.8655, + "step": 9397 + }, + { + "epoch": 0.9913502109704642, + "grad_norm": 0.3381459712982178, + "learning_rate": 1.6831444822040207e-05, + "loss": 1.8783, + "step": 9398 + }, + { + "epoch": 0.9914556962025316, + "grad_norm": 0.3333260715007782, + "learning_rate": 1.6808100065512536e-05, + "loss": 1.8868, + "step": 9399 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.3468720018863678, + "learning_rate": 1.67847876875277e-05, + "loss": 1.8572, + "step": 9400 + }, + { + "epoch": 0.9916666666666667, + "grad_norm": 0.35075756907463074, + "learning_rate": 1.6761507643177553e-05, + "loss": 1.8376, + "step": 9401 + }, + { + "epoch": 0.9917721518987341, + "grad_norm": 0.3500541150569916, + "learning_rate": 1.673825988761623e-05, + "loss": 1.8461, + "step": 9402 + }, + { + "epoch": 0.9918776371308017, + "grad_norm": 0.3636898994445801, + "learning_rate": 1.671504437606004e-05, + "loss": 1.8107, + "step": 9403 + }, + { + "epoch": 0.9919831223628692, + "grad_norm": 0.3473997414112091, + "learning_rate": 1.6691861063787444e-05, + "loss": 1.8358, + "step": 9404 + }, + { + "epoch": 0.9920886075949367, + "grad_norm": 0.3391052186489105, + "learning_rate": 1.666870990613889e-05, + "loss": 1.8421, + "step": 9405 + }, + { + "epoch": 0.9921940928270042, + "grad_norm": 0.3421878516674042, + "learning_rate": 1.6645590858516798e-05, + "loss": 1.8412, + "step": 9406 + }, + { + "epoch": 0.9922995780590718, + "grad_norm": 0.3367885649204254, + "learning_rate": 1.662250387638544e-05, + "loss": 1.831, + "step": 9407 + }, + { + "epoch": 0.9924050632911392, + "grad_norm": 0.34096047282218933, + "learning_rate": 1.6599448915270843e-05, + "loss": 1.8543, + "step": 9408 + }, + { + "epoch": 0.9925105485232067, + "grad_norm": 0.34794676303863525, + "learning_rate": 1.657642593076074e-05, + "loss": 1.8566, + "step": 9409 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.3344206213951111, + "learning_rate": 1.655343487850443e-05, + "loss": 1.8485, + "step": 9410 + }, + { + "epoch": 0.9927215189873417, + "grad_norm": 0.35250037908554077, + "learning_rate": 1.6530475714212752e-05, + "loss": 1.8995, + "step": 9411 + }, + { + "epoch": 0.9928270042194093, + "grad_norm": 0.3309120833873749, + "learning_rate": 1.6507548393657978e-05, + "loss": 1.8296, + "step": 9412 + }, + { + "epoch": 0.9929324894514768, + "grad_norm": 0.3528839945793152, + "learning_rate": 1.6484652872673692e-05, + "loss": 1.8152, + "step": 9413 + }, + { + "epoch": 0.9930379746835443, + "grad_norm": 0.38137662410736084, + "learning_rate": 1.6461789107154772e-05, + "loss": 1.893, + "step": 9414 + }, + { + "epoch": 0.9931434599156118, + "grad_norm": 0.3427259922027588, + "learning_rate": 1.6438957053057234e-05, + "loss": 1.8785, + "step": 9415 + }, + { + "epoch": 0.9932489451476794, + "grad_norm": 0.3327445983886719, + "learning_rate": 1.6416156666398208e-05, + "loss": 1.8584, + "step": 9416 + }, + { + "epoch": 0.9933544303797468, + "grad_norm": 0.35106974840164185, + "learning_rate": 1.6393387903255822e-05, + "loss": 1.8828, + "step": 9417 + }, + { + "epoch": 0.9934599156118143, + "grad_norm": 0.34446069598197937, + "learning_rate": 1.63706507197691e-05, + "loss": 1.8543, + "step": 9418 + }, + { + "epoch": 0.9935654008438819, + "grad_norm": 0.34655502438545227, + "learning_rate": 1.634794507213793e-05, + "loss": 1.8992, + "step": 9419 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.3540962040424347, + "learning_rate": 1.6325270916622947e-05, + "loss": 1.8264, + "step": 9420 + }, + { + "epoch": 0.9937763713080169, + "grad_norm": 0.35015714168548584, + "learning_rate": 1.6302628209545423e-05, + "loss": 1.8795, + "step": 9421 + }, + { + "epoch": 0.9938818565400844, + "grad_norm": 0.3453613817691803, + "learning_rate": 1.6280016907287243e-05, + "loss": 1.8751, + "step": 9422 + }, + { + "epoch": 0.9939873417721519, + "grad_norm": 0.34770557284355164, + "learning_rate": 1.6257436966290764e-05, + "loss": 1.879, + "step": 9423 + }, + { + "epoch": 0.9940928270042194, + "grad_norm": 0.3349344730377197, + "learning_rate": 1.623488834305878e-05, + "loss": 1.845, + "step": 9424 + }, + { + "epoch": 0.994198312236287, + "grad_norm": 0.33736729621887207, + "learning_rate": 1.62123709941544e-05, + "loss": 1.8517, + "step": 9425 + }, + { + "epoch": 0.9943037974683544, + "grad_norm": 0.330612450838089, + "learning_rate": 1.6189884876200976e-05, + "loss": 1.8882, + "step": 9426 + }, + { + "epoch": 0.994409282700422, + "grad_norm": 0.34668731689453125, + "learning_rate": 1.6167429945882038e-05, + "loss": 1.8776, + "step": 9427 + }, + { + "epoch": 0.9945147679324895, + "grad_norm": 0.34030604362487793, + "learning_rate": 1.6145006159941168e-05, + "loss": 1.8473, + "step": 9428 + }, + { + "epoch": 0.9946202531645569, + "grad_norm": 0.3430401384830475, + "learning_rate": 1.6122613475181976e-05, + "loss": 1.8965, + "step": 9429 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.3483499586582184, + "learning_rate": 1.610025184846797e-05, + "loss": 1.8496, + "step": 9430 + }, + { + "epoch": 0.994831223628692, + "grad_norm": 0.33400896191596985, + "learning_rate": 1.6077921236722464e-05, + "loss": 1.8298, + "step": 9431 + }, + { + "epoch": 0.9949367088607595, + "grad_norm": 0.3359280824661255, + "learning_rate": 1.6055621596928567e-05, + "loss": 1.8697, + "step": 9432 + }, + { + "epoch": 0.995042194092827, + "grad_norm": 0.3528861105442047, + "learning_rate": 1.6033352886129e-05, + "loss": 1.877, + "step": 9433 + }, + { + "epoch": 0.9951476793248946, + "grad_norm": 0.34139692783355713, + "learning_rate": 1.60111150614261e-05, + "loss": 1.8604, + "step": 9434 + }, + { + "epoch": 0.995253164556962, + "grad_norm": 0.3375034034252167, + "learning_rate": 1.5988908079981696e-05, + "loss": 1.8555, + "step": 9435 + }, + { + "epoch": 0.9953586497890295, + "grad_norm": 0.34087395668029785, + "learning_rate": 1.5966731899017015e-05, + "loss": 1.8416, + "step": 9436 + }, + { + "epoch": 0.9954641350210971, + "grad_norm": 0.3390779197216034, + "learning_rate": 1.5944586475812638e-05, + "loss": 1.858, + "step": 9437 + }, + { + "epoch": 0.9955696202531645, + "grad_norm": 0.35079383850097656, + "learning_rate": 1.592247176770838e-05, + "loss": 1.8494, + "step": 9438 + }, + { + "epoch": 0.9956751054852321, + "grad_norm": 0.3585003614425659, + "learning_rate": 1.590038773210323e-05, + "loss": 1.8949, + "step": 9439 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.33613672852516174, + "learning_rate": 1.587833432645528e-05, + "loss": 1.8482, + "step": 9440 + }, + { + "epoch": 0.9958860759493671, + "grad_norm": 0.3301384747028351, + "learning_rate": 1.5856311508281594e-05, + "loss": 1.8777, + "step": 9441 + }, + { + "epoch": 0.9959915611814346, + "grad_norm": 0.34366053342819214, + "learning_rate": 1.5834319235158193e-05, + "loss": 1.8726, + "step": 9442 + }, + { + "epoch": 0.9960970464135022, + "grad_norm": 0.35774123668670654, + "learning_rate": 1.5812357464719904e-05, + "loss": 1.8269, + "step": 9443 + }, + { + "epoch": 0.9962025316455696, + "grad_norm": 0.338861346244812, + "learning_rate": 1.5790426154660347e-05, + "loss": 1.8676, + "step": 9444 + }, + { + "epoch": 0.9963080168776371, + "grad_norm": 0.3455209732055664, + "learning_rate": 1.5768525262731804e-05, + "loss": 1.8509, + "step": 9445 + }, + { + "epoch": 0.9964135021097047, + "grad_norm": 0.33705756068229675, + "learning_rate": 1.574665474674514e-05, + "loss": 1.8498, + "step": 9446 + }, + { + "epoch": 0.9965189873417721, + "grad_norm": 0.3416200280189514, + "learning_rate": 1.5724814564569767e-05, + "loss": 1.8629, + "step": 9447 + }, + { + "epoch": 0.9966244725738397, + "grad_norm": 0.37029996514320374, + "learning_rate": 1.57030046741335e-05, + "loss": 1.8646, + "step": 9448 + }, + { + "epoch": 0.9967299578059071, + "grad_norm": 0.3486921191215515, + "learning_rate": 1.568122503342252e-05, + "loss": 1.882, + "step": 9449 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.32802242040634155, + "learning_rate": 1.5659475600481297e-05, + "loss": 1.876, + "step": 9450 + }, + { + "epoch": 0.9969409282700422, + "grad_norm": 0.3513413071632385, + "learning_rate": 1.5637756333412454e-05, + "loss": 1.872, + "step": 9451 + }, + { + "epoch": 0.9970464135021097, + "grad_norm": 0.34918612241744995, + "learning_rate": 1.5616067190376765e-05, + "loss": 1.8863, + "step": 9452 + }, + { + "epoch": 0.9971518987341772, + "grad_norm": 0.34069615602493286, + "learning_rate": 1.559440812959299e-05, + "loss": 1.8576, + "step": 9453 + }, + { + "epoch": 0.9972573839662447, + "grad_norm": 0.35237619280815125, + "learning_rate": 1.5572779109337886e-05, + "loss": 1.855, + "step": 9454 + }, + { + "epoch": 0.9973628691983122, + "grad_norm": 0.3378899395465851, + "learning_rate": 1.555118008794605e-05, + "loss": 1.8372, + "step": 9455 + }, + { + "epoch": 0.9974683544303797, + "grad_norm": 0.3367578089237213, + "learning_rate": 1.552961102380987e-05, + "loss": 1.8414, + "step": 9456 + }, + { + "epoch": 0.9975738396624473, + "grad_norm": 0.33842065930366516, + "learning_rate": 1.550807187537945e-05, + "loss": 1.832, + "step": 9457 + }, + { + "epoch": 0.9976793248945147, + "grad_norm": 0.35705694556236267, + "learning_rate": 1.5486562601162513e-05, + "loss": 1.8382, + "step": 9458 + }, + { + "epoch": 0.9977848101265823, + "grad_norm": 0.33653149008750916, + "learning_rate": 1.5465083159724344e-05, + "loss": 1.8494, + "step": 9459 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.3478158116340637, + "learning_rate": 1.544363350968769e-05, + "loss": 1.8554, + "step": 9460 + }, + { + "epoch": 0.9979957805907173, + "grad_norm": 0.35773515701293945, + "learning_rate": 1.542221360973268e-05, + "loss": 1.8517, + "step": 9461 + }, + { + "epoch": 0.9981012658227848, + "grad_norm": 0.34391218423843384, + "learning_rate": 1.5400823418596764e-05, + "loss": 1.8523, + "step": 9462 + }, + { + "epoch": 0.9982067510548523, + "grad_norm": 0.3495359718799591, + "learning_rate": 1.537946289507462e-05, + "loss": 1.8815, + "step": 9463 + }, + { + "epoch": 0.9983122362869198, + "grad_norm": 0.35505107045173645, + "learning_rate": 1.5358131998018067e-05, + "loss": 1.8809, + "step": 9464 + }, + { + "epoch": 0.9984177215189873, + "grad_norm": 0.3453887701034546, + "learning_rate": 1.5336830686336012e-05, + "loss": 1.8859, + "step": 9465 + }, + { + "epoch": 0.9985232067510549, + "grad_norm": 0.35607844591140747, + "learning_rate": 1.5315558918994333e-05, + "loss": 1.8519, + "step": 9466 + }, + { + "epoch": 0.9986286919831223, + "grad_norm": 0.3333752453327179, + "learning_rate": 1.5294316655015837e-05, + "loss": 1.8352, + "step": 9467 + }, + { + "epoch": 0.9987341772151899, + "grad_norm": 0.33002176880836487, + "learning_rate": 1.527310385348017e-05, + "loss": 1.8633, + "step": 9468 + }, + { + "epoch": 0.9988396624472574, + "grad_norm": 0.3410337567329407, + "learning_rate": 1.5251920473523708e-05, + "loss": 1.8555, + "step": 9469 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.34303125739097595, + "learning_rate": 1.523076647433954e-05, + "loss": 1.8808, + "step": 9470 + }, + { + "epoch": 0.9990506329113924, + "grad_norm": 0.33618631958961487, + "learning_rate": 1.5209641815177312e-05, + "loss": 1.8751, + "step": 9471 + }, + { + "epoch": 0.99915611814346, + "grad_norm": 0.34084799885749817, + "learning_rate": 1.5188546455343223e-05, + "loss": 1.8112, + "step": 9472 + }, + { + "epoch": 0.9992616033755274, + "grad_norm": 0.33278071880340576, + "learning_rate": 1.5167480354199909e-05, + "loss": 1.8567, + "step": 9473 + }, + { + "epoch": 0.9993670886075949, + "grad_norm": 0.3462759554386139, + "learning_rate": 1.5146443471166345e-05, + "loss": 1.8894, + "step": 9474 + }, + { + "epoch": 0.9994725738396625, + "grad_norm": 0.35432249307632446, + "learning_rate": 1.5125435765717816e-05, + "loss": 1.8471, + "step": 9475 + }, + { + "epoch": 0.9995780590717299, + "grad_norm": 0.3568649888038635, + "learning_rate": 1.5104457197385799e-05, + "loss": 1.868, + "step": 9476 + }, + { + "epoch": 0.9996835443037975, + "grad_norm": 0.34251806139945984, + "learning_rate": 1.508350772575791e-05, + "loss": 1.8477, + "step": 9477 + }, + { + "epoch": 0.999789029535865, + "grad_norm": 0.3536956012248993, + "learning_rate": 1.5062587310477816e-05, + "loss": 1.8797, + "step": 9478 + }, + { + "epoch": 0.9998945147679325, + "grad_norm": 0.3401359021663666, + "learning_rate": 1.5041695911245136e-05, + "loss": 1.8696, + "step": 9479 + }, + { + "epoch": 1.0, + "grad_norm": 0.9571086168289185, + "learning_rate": 1.5020833487815421e-05, + "loss": 1.8245, + "step": 9480 + } + ], + "logging_steps": 1, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.9911340678754304e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-rwkv/checkpoint-9480/training_args.bin b/saves-rwkv/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4fb812f5ffda2d0cc53948bb1e8df38ca5d6bf1a --- /dev/null +++ b/saves-rwkv/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73cff432a9d5e5d9d52ef99a023c544d5b3ccb035be1f2efb0dcc83e4a7107d2 +size 5112 diff --git a/saves-rwkv/config.json b/saves-rwkv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fba72b626605f3d995236b195dd7881fa619390d --- /dev/null +++ b/saves-rwkv/config.json @@ -0,0 +1,22 @@ +{ + "architectures": [ + "RwkvForCausalLM" + ], + "attention_hidden_size": 256, + "bos_token_id": 0, + "context_length": 1024, + "eos_token_id": 0, + "hidden_size": 256, + "intermediate_size": 1024, + "layer_norm_epsilon": 1e-05, + "model_type": "rwkv", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "rescale_every": 6, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-rwkv/generation_config.json b/saves-rwkv/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..14e4f03d0d73dc2707d488ac8f586bd62ef72a7e --- /dev/null +++ b/saves-rwkv/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-rwkv/model.safetensors b/saves-rwkv/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ecf29ae079005556035aa74d1f06e51347d43d1 --- /dev/null +++ b/saves-rwkv/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba962d913f6c1b9a7456214da0cdd86502672a90974aa3f746673733543ffe9 +size 8894568 diff --git a/saves-rwkv/result.log b/saves-rwkv/result.log new file mode 100644 index 0000000000000000000000000000000000000000..d1c5eece17d0ab093ec8c5cbb2e5ba18fbb6c511 --- /dev/null +++ b/saves-rwkv/result.log @@ -0,0 +1 @@ +{'train_runtime': 13152.83, 'train_samples_per_second': 737.986, 'train_steps_per_second': 0.721, 'train_loss': 3.0150678825655066, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-rwkv/special_tokens_map.json b/saves-rwkv/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-rwkv/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-rwkv/tokenizer.json b/saves-rwkv/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-rwkv/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-rwkv/tokenizer_config.json b/saves-rwkv/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-rwkv/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-stablelm-cosine/checkpoint-9480/config.json b/saves-stablelm-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..25a1ab486ad5c4cccf2de695a11e27be68a84045 --- /dev/null +++ b/saves-stablelm-cosine/checkpoint-9480/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "StableLmForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "eos_token_id": 0, + "hidden_act": "silu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 4096, + "model_type": "stablelm", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "partial_rotary_factor": 0.25, + "qk_layernorm": false, + "rope_scaling": null, + "rope_theta": 10000, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_parallel_residual": false, + "use_qkv_bias": false, + "vocab_size": 2000 +} diff --git a/saves-stablelm-cosine/checkpoint-9480/generation_config.json b/saves-stablelm-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..14e4f03d0d73dc2707d488ac8f586bd62ef72a7e --- /dev/null +++ b/saves-stablelm-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-stablelm-cosine/checkpoint-9480/model.safetensors b/saves-stablelm-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e73ff745dff597362da5194291c0e5c13389d0f8 --- /dev/null +++ b/saves-stablelm-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b388878961f2b1f6972dafddafe806d7d5f52868db7bfd0df7b7c8fda6b9916 +size 8352336 diff --git a/saves-stablelm-cosine/checkpoint-9480/optimizer.pt b/saves-stablelm-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..62239e39f0adfb2c473d9750bd3438c3b5c04167 --- /dev/null +++ b/saves-stablelm-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b737c60f0adbd02aa106219094632ab1b78083d3c16e91a41b82c077f58049e +size 16720851 diff --git a/saves-stablelm-cosine/checkpoint-9480/rng_state.pth b/saves-stablelm-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-stablelm-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-stablelm-cosine/checkpoint-9480/scheduler.pt b/saves-stablelm-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..47ca193b702fc31e51e3ee0689a4054b394880b6 --- /dev/null +++ b/saves-stablelm-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f195640e66bde784a0961679ecd73c2a561c5a12962a7316325d731f304936 +size 1064 diff --git a/saves-stablelm-cosine/checkpoint-9480/special_tokens_map.json b/saves-stablelm-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-stablelm-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-stablelm-cosine/checkpoint-9480/tokenizer.json b/saves-stablelm-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-stablelm-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-stablelm-cosine/checkpoint-9480/tokenizer_config.json b/saves-stablelm-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-stablelm-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-stablelm-cosine/checkpoint-9480/trainer_state.json b/saves-stablelm-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e6c27eec218aeeb5319414bf1bcf85ac2981e7e9 --- /dev/null +++ b/saves-stablelm-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.2756527662277222, + "learning_rate": 0.00015789473684210527, + "loss": 7.5387, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1541600227355957, + "learning_rate": 0.00031578947368421053, + "loss": 6.9163, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8401379585266113, + "learning_rate": 0.00047368421052631577, + "loss": 6.2534, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.8081388473510742, + "learning_rate": 0.0006315789473684211, + "loss": 5.7519, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.5945696830749512, + "learning_rate": 0.0007894736842105263, + "loss": 5.2835, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 1.2485466003417969, + "learning_rate": 0.0009473684210526315, + "loss": 4.7731, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.0089627504348755, + "learning_rate": 0.0011052631578947368, + "loss": 4.3676, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.4536213874816895, + "learning_rate": 0.0012631578947368421, + "loss": 4.1446, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.0684837102890015, + "learning_rate": 0.0014210526315789472, + "loss": 3.9499, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 0.8271965980529785, + "learning_rate": 0.0014999989494847376, + "loss": 3.813, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 1.0353797674179077, + "learning_rate": 0.0014999905453802946, + "loss": 3.6641, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.6420148015022278, + "learning_rate": 0.0014999737372655805, + "loss": 3.5646, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.7069715261459351, + "learning_rate": 0.0014999485253289388, + "loss": 3.466, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.6200314164161682, + "learning_rate": 0.0014999149098528814, + "loss": 3.3664, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.9739541411399841, + "learning_rate": 0.0014998728912140862, + "loss": 3.2897, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.72667396068573, + "learning_rate": 0.0014998224698833922, + "loss": 3.2309, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.5952132940292358, + "learning_rate": 0.0014997636464257956, + "loss": 3.1614, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.7342408895492554, + "learning_rate": 0.0014996964215004416, + "loss": 3.1155, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.9151244163513184, + "learning_rate": 0.0014996207958606182, + "loss": 3.0555, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.7636347413063049, + "learning_rate": 0.001499536770353748, + "loss": 3.0053, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.7413684725761414, + "learning_rate": 0.0014994443459213774, + "loss": 2.9643, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.6428682804107666, + "learning_rate": 0.001499343523599168, + "loss": 2.9333, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.6471658945083618, + "learning_rate": 0.0014992343045168823, + "loss": 2.8777, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.8812265396118164, + "learning_rate": 0.0014991166898983739, + "loss": 2.8594, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.6855578422546387, + "learning_rate": 0.001498990681061572, + "loss": 2.8228, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.701411783695221, + "learning_rate": 0.001498856279418467, + "loss": 2.783, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.8474719524383545, + "learning_rate": 0.0014987134864750948, + "loss": 2.7569, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.8126780390739441, + "learning_rate": 0.0014985623038315206, + "loss": 2.7268, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.832779049873352, + "learning_rate": 0.0014984027331818193, + "loss": 2.7039, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.9304881691932678, + "learning_rate": 0.0014982347763140584, + "loss": 2.6857, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.8015810251235962, + "learning_rate": 0.0014980584351102762, + "loss": 2.665, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.692222535610199, + "learning_rate": 0.001497873711546462, + "loss": 2.6277, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 1.1149343252182007, + "learning_rate": 0.0014976806076925334, + "loss": 2.6227, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.6867502927780151, + "learning_rate": 0.0014974791257123137, + "loss": 2.5997, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.9448521733283997, + "learning_rate": 0.001497269267863507, + "loss": 2.5577, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.7209936380386353, + "learning_rate": 0.0014970510364976724, + "loss": 2.5442, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 1.0526411533355713, + "learning_rate": 0.0014968244340601996, + "loss": 2.5478, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.7158535122871399, + "learning_rate": 0.001496589463090279, + "loss": 2.5229, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.099924087524414, + "learning_rate": 0.001496346126220875, + "loss": 2.4965, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.7273547649383545, + "learning_rate": 0.0014960944261786966, + "loss": 2.4753, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 1.0178419351577759, + "learning_rate": 0.0014958343657841655, + "loss": 2.4609, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.8677815794944763, + "learning_rate": 0.001495565947951385, + "loss": 2.4488, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.7638417482376099, + "learning_rate": 0.0014952891756881085, + "loss": 2.4218, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 0.8483127951622009, + "learning_rate": 0.0014950040520957037, + "loss": 2.4034, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.9330300688743591, + "learning_rate": 0.0014947105803691204, + "loss": 2.4055, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.8546178340911865, + "learning_rate": 0.0014944087637968522, + "loss": 2.3865, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.7100884318351746, + "learning_rate": 0.0014940986057609012, + "loss": 2.3631, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.214513897895813, + "learning_rate": 0.0014937801097367396, + "loss": 2.3612, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.7304611802101135, + "learning_rate": 0.001493453279293271, + "loss": 2.326, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 0.7026907205581665, + "learning_rate": 0.0014931181180927902, + "loss": 2.3168, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.9027733206748962, + "learning_rate": 0.001492774629890942, + "loss": 2.3112, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.8832113742828369, + "learning_rate": 0.001492422818536679, + "loss": 2.2983, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.7855344414710999, + "learning_rate": 0.00149206268797222, + "loss": 2.2812, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.7895187735557556, + "learning_rate": 0.0014916942422330032, + "loss": 2.2638, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 1.1167247295379639, + "learning_rate": 0.001491317485447643, + "loss": 2.2692, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.8082314133644104, + "learning_rate": 0.0014909324218378838, + "loss": 2.2235, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 0.8874272704124451, + "learning_rate": 0.0014905390557185508, + "loss": 2.2292, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 0.7964592576026917, + "learning_rate": 0.0014901373914975036, + "loss": 2.2296, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.9922918081283569, + "learning_rate": 0.0014897274336755856, + "loss": 2.2044, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 1.030908465385437, + "learning_rate": 0.001489309186846575, + "loss": 2.1868, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 1.3219823837280273, + "learning_rate": 0.0014888826556971313, + "loss": 2.19, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 1.5440878868103027, + "learning_rate": 0.0014884478450067444, + "loss": 2.1786, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 1.6214284896850586, + "learning_rate": 0.0014880047596476807, + "loss": 2.1604, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.9427124857902527, + "learning_rate": 0.0014875534045849274, + "loss": 2.1709, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 1.041245698928833, + "learning_rate": 0.0014870937848761388, + "loss": 2.1605, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.7092825770378113, + "learning_rate": 0.001486625905671578, + "loss": 2.1469, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 0.9053676724433899, + "learning_rate": 0.00148614977221406, + "loss": 2.135, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 0.956945538520813, + "learning_rate": 0.0014856653898388927, + "loss": 2.1334, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.7607901096343994, + "learning_rate": 0.001485172763973817, + "loss": 2.1271, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 0.7718624472618103, + "learning_rate": 0.0014846719001389466, + "loss": 2.1138, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 1.094922661781311, + "learning_rate": 0.001484162803946705, + "loss": 2.1058, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.1661168336868286, + "learning_rate": 0.0014836454811017635, + "loss": 2.0914, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.8227654695510864, + "learning_rate": 0.0014831199374009778, + "loss": 2.0811, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.9518805742263794, + "learning_rate": 0.0014825861787333208, + "loss": 2.0855, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.8974795937538147, + "learning_rate": 0.0014820442110798197, + "loss": 2.072, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.9827273488044739, + "learning_rate": 0.0014814940405134865, + "loss": 2.0643, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 1.2071738243103027, + "learning_rate": 0.001480935673199251, + "loss": 2.0575, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 1.1836011409759521, + "learning_rate": 0.0014803691153938915, + "loss": 2.0493, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 0.7490857243537903, + "learning_rate": 0.0014797943734459653, + "loss": 2.0538, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.8227195143699646, + "learning_rate": 0.001479211453795736, + "loss": 2.0322, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.9381656646728516, + "learning_rate": 0.0014786203629751033, + "loss": 2.0161, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.9963043332099915, + "learning_rate": 0.0014780211076075279, + "loss": 2.0232, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.9443498253822327, + "learning_rate": 0.0014774136944079594, + "loss": 2.0315, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 1.029593586921692, + "learning_rate": 0.0014767981301827592, + "loss": 2.0073, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.786686360836029, + "learning_rate": 0.0014761744218296249, + "loss": 2.0046, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.9562454223632812, + "learning_rate": 0.001475542576337513, + "loss": 2.0058, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.7851126194000244, + "learning_rate": 0.001474902600786561, + "loss": 1.999, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.7478358745574951, + "learning_rate": 0.0014742545023480075, + "loss": 1.9947, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.9895840883255005, + "learning_rate": 0.0014735982882841117, + "loss": 1.9814, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.842948317527771, + "learning_rate": 0.0014729339659480727, + "loss": 1.9763, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.940549910068512, + "learning_rate": 0.0014722615427839468, + "loss": 1.9784, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.9862644672393799, + "learning_rate": 0.0014715810263265633, + "loss": 1.9702, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.803255558013916, + "learning_rate": 0.0014708924242014423, + "loss": 1.9506, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 1.1073334217071533, + "learning_rate": 0.0014701957441247064, + "loss": 1.9604, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.9371849894523621, + "learning_rate": 0.0014694909939029959, + "loss": 1.9417, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.8160054683685303, + "learning_rate": 0.0014687781814333814, + "loss": 1.9469, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 0.9736929535865784, + "learning_rate": 0.0014680573147032746, + "loss": 1.9526, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 1.0032892227172852, + "learning_rate": 0.0014673284017903392, + "loss": 1.9261, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 1.0404109954833984, + "learning_rate": 0.0014665914508624, + "loss": 1.9281, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 1.166176438331604, + "learning_rate": 0.0014658464701773526, + "loss": 1.9616, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 1.1827714443206787, + "learning_rate": 0.0014650934680830688, + "loss": 1.9351, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 1.1190147399902344, + "learning_rate": 0.0014643324530173051, + "loss": 1.9044, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.8263761401176453, + "learning_rate": 0.0014635634335076067, + "loss": 1.8974, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.9258265495300293, + "learning_rate": 0.001462786418171213, + "loss": 1.9, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 2.1446609497070312, + "learning_rate": 0.0014620014157149597, + "loss": 1.9161, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.8658919930458069, + "learning_rate": 0.001461208434935183, + "loss": 1.9123, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.9310222864151001, + "learning_rate": 0.0014604074847176197, + "loss": 1.8869, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 1.1793153285980225, + "learning_rate": 0.0014595985740373082, + "loss": 1.8974, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 1.285751461982727, + "learning_rate": 0.0014587817119584873, + "loss": 1.8972, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 1.0273982286453247, + "learning_rate": 0.001457956907634496, + "loss": 1.8878, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.9092646837234497, + "learning_rate": 0.0014571241703076692, + "loss": 1.8913, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.7965483069419861, + "learning_rate": 0.0014562835093092348, + "loss": 1.8807, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.469024419784546, + "learning_rate": 0.0014554349340592104, + "loss": 1.8737, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.8912035226821899, + "learning_rate": 0.001454578454066296, + "loss": 1.8856, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.7830798625946045, + "learning_rate": 0.0014537140789277678, + "loss": 1.8717, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 1.1559503078460693, + "learning_rate": 0.0014528418183293716, + "loss": 1.8878, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.81947261095047, + "learning_rate": 0.001451961682045213, + "loss": 1.8572, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 2.734455108642578, + "learning_rate": 0.001451073679937649, + "loss": 1.8507, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 1.2583578824996948, + "learning_rate": 0.0014501778219571766, + "loss": 1.8637, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 1.1853755712509155, + "learning_rate": 0.0014492741181423225, + "loss": 1.8538, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 1.4969146251678467, + "learning_rate": 0.0014483625786195285, + "loss": 1.8424, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.9474691152572632, + "learning_rate": 0.0014474432136030405, + "loss": 1.8272, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.8918873071670532, + "learning_rate": 0.0014465160333947923, + "loss": 1.8236, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.8738095164299011, + "learning_rate": 0.0014455810483842908, + "loss": 1.8429, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 1.3271464109420776, + "learning_rate": 0.0014446382690484997, + "loss": 1.8491, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 1.3655813932418823, + "learning_rate": 0.0014436877059517215, + "loss": 1.8339, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.8837555646896362, + "learning_rate": 0.0014427293697454803, + "loss": 1.8273, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.9122318625450134, + "learning_rate": 0.001441763271168401, + "loss": 1.843, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.7891908288002014, + "learning_rate": 0.00144078942104609, + "loss": 1.832, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.9152756333351135, + "learning_rate": 0.001439807830291013, + "loss": 1.8185, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 1.1370059251785278, + "learning_rate": 0.0014388185099023744, + "loss": 1.8245, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.9323263168334961, + "learning_rate": 0.0014378214709659916, + "loss": 1.8252, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.3948912620544434, + "learning_rate": 0.0014368167246541733, + "loss": 1.8094, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 1.9676992893218994, + "learning_rate": 0.0014358042822255918, + "loss": 1.8151, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 1.3819154500961304, + "learning_rate": 0.0014347841550251597, + "loss": 1.819, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 1.0634726285934448, + "learning_rate": 0.0014337563544838997, + "loss": 1.8025, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.825230062007904, + "learning_rate": 0.001432720892118819, + "loss": 1.7987, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 1.3411486148834229, + "learning_rate": 0.0014316777795327794, + "loss": 1.7921, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 1.1883015632629395, + "learning_rate": 0.001430627028414366, + "loss": 1.8007, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 1.8188656568527222, + "learning_rate": 0.0014295686505377586, + "loss": 1.7811, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 1.4893032312393188, + "learning_rate": 0.0014285026577625982, + "loss": 1.7993, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 1.1121314764022827, + "learning_rate": 0.0014274290620338542, + "loss": 1.7873, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 1.2530624866485596, + "learning_rate": 0.0014263478753816906, + "loss": 1.7817, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.9561247229576111, + "learning_rate": 0.0014252591099213326, + "loss": 1.7759, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 0.8421205282211304, + "learning_rate": 0.001424162777852928, + "loss": 1.7797, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.8445647358894348, + "learning_rate": 0.0014230588914614134, + "loss": 1.775, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 0.935764729976654, + "learning_rate": 0.0014219474631163745, + "loss": 1.7746, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 1.024232268333435, + "learning_rate": 0.001420828505271909, + "loss": 1.776, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.8377081155776978, + "learning_rate": 0.0014197020304664856, + "loss": 1.7738, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.9285411238670349, + "learning_rate": 0.0014185680513228048, + "loss": 1.7748, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.9150543212890625, + "learning_rate": 0.0014174265805476564, + "loss": 1.7695, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 2.08648419380188, + "learning_rate": 0.0014162776309317778, + "loss": 1.7755, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 1.7825872898101807, + "learning_rate": 0.0014151212153497108, + "loss": 1.7757, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 1.6050572395324707, + "learning_rate": 0.0014139573467596561, + "loss": 1.7406, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 1.615334391593933, + "learning_rate": 0.00141278603820333, + "loss": 1.7304, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 1.5192193984985352, + "learning_rate": 0.0014116073028058165, + "loss": 1.7262, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.8589497804641724, + "learning_rate": 0.0014104211537754217, + "loss": 1.7159, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.9579806327819824, + "learning_rate": 0.001409227604403524, + "loss": 1.7368, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 0.8760929703712463, + "learning_rate": 0.0014080266680644277, + "loss": 1.7351, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.9583684206008911, + "learning_rate": 0.0014068183582152103, + "loss": 1.755, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 1.0535222291946411, + "learning_rate": 0.001405602688395574, + "loss": 1.7488, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.930578887462616, + "learning_rate": 0.0014043796722276924, + "loss": 1.7226, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 1.6720802783966064, + "learning_rate": 0.0014031493234160591, + "loss": 1.7467, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 1.4819642305374146, + "learning_rate": 0.0014019116557473332, + "loss": 1.7301, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.6672861576080322, + "learning_rate": 0.0014006666830901854, + "loss": 1.7106, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 1.650740146636963, + "learning_rate": 0.001399414419395142, + "loss": 1.7045, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 1.6551830768585205, + "learning_rate": 0.0013981548786944293, + "loss": 1.7092, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.689831256866455, + "learning_rate": 0.0013968880751018158, + "loss": 1.6995, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 1.2965401411056519, + "learning_rate": 0.0013956140228124545, + "loss": 1.7005, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.8493403792381287, + "learning_rate": 0.0013943327361027231, + "loss": 1.7066, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.856659471988678, + "learning_rate": 0.0013930442293300649, + "loss": 1.6961, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.7524833679199219, + "learning_rate": 0.0013917485169328279, + "loss": 1.7012, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.9674717783927917, + "learning_rate": 0.0013904456134301016, + "loss": 1.712, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.9238637685775757, + "learning_rate": 0.0013891355334215562, + "loss": 1.722, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.7928546667098999, + "learning_rate": 0.0013878182915872776, + "loss": 1.7387, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.9576042294502258, + "learning_rate": 0.001386493902687604, + "loss": 1.7198, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 1.4766720533370972, + "learning_rate": 0.00138516238156296, + "loss": 1.7265, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 1.9756706953048706, + "learning_rate": 0.0013838237431336895, + "loss": 1.7334, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 1.470369577407837, + "learning_rate": 0.0013824780023998899, + "loss": 1.7168, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 1.5894272327423096, + "learning_rate": 0.0013811251744412431, + "loss": 1.6961, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 1.7183265686035156, + "learning_rate": 0.0013797652744168473, + "loss": 1.6776, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 1.5747398138046265, + "learning_rate": 0.0013783983175650457, + "loss": 1.6772, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 1.7128010988235474, + "learning_rate": 0.0013770243192032581, + "loss": 1.6799, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 1.136812686920166, + "learning_rate": 0.0013756432947278064, + "loss": 1.6745, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.9770800471305847, + "learning_rate": 0.0013742552596137444, + "loss": 1.6787, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 1.1060165166854858, + "learning_rate": 0.0013728602294146833, + "loss": 1.6908, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 1.2064745426177979, + "learning_rate": 0.0013714582197626175, + "loss": 1.7027, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.9761057496070862, + "learning_rate": 0.0013700492463677501, + "loss": 1.6934, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.9375060796737671, + "learning_rate": 0.0013686333250183154, + "loss": 1.6997, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 1.3932044506072998, + "learning_rate": 0.001367210471580404, + "loss": 1.6935, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 1.7959548234939575, + "learning_rate": 0.0013657807019977835, + "loss": 1.6955, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 1.1226527690887451, + "learning_rate": 0.0013643440322917198, + "loss": 1.6909, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 1.1206132173538208, + "learning_rate": 0.0013629004785607989, + "loss": 1.6909, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 1.1389427185058594, + "learning_rate": 0.0013614500569807445, + "loss": 1.6834, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 1.0868134498596191, + "learning_rate": 0.0013599927838042394, + "loss": 1.6687, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.8616573810577393, + "learning_rate": 0.0013585286753607408, + "loss": 1.6643, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.9690150022506714, + "learning_rate": 0.0013570577480562986, + "loss": 1.681, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 1.2257400751113892, + "learning_rate": 0.0013555800183733717, + "loss": 1.6786, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 1.3563079833984375, + "learning_rate": 0.0013540955028706425, + "loss": 1.6708, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 1.0724034309387207, + "learning_rate": 0.0013526042181828324, + "loss": 1.6549, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 1.1370488405227661, + "learning_rate": 0.0013511061810205143, + "loss": 1.6672, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 1.3393785953521729, + "learning_rate": 0.001349601408169926, + "loss": 1.6858, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 1.0464202165603638, + "learning_rate": 0.0013480899164927823, + "loss": 1.6739, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.8456589579582214, + "learning_rate": 0.0013465717229260853, + "loss": 1.672, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.8528109788894653, + "learning_rate": 0.001345046844481935, + "loss": 1.6703, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 1.4631264209747314, + "learning_rate": 0.0013435152982473396, + "loss": 1.6703, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 2.1788136959075928, + "learning_rate": 0.0013419771013840217, + "loss": 1.6777, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 1.3556214570999146, + "learning_rate": 0.001340432271128229, + "loss": 1.661, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 1.5335493087768555, + "learning_rate": 0.0013388808247905381, + "loss": 1.6426, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.9891492128372192, + "learning_rate": 0.0013373227797556634, + "loss": 1.6436, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.9255599975585938, + "learning_rate": 0.00133575815348226, + "loss": 1.6427, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.936639130115509, + "learning_rate": 0.0013341869635027292, + "loss": 1.6429, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.891482412815094, + "learning_rate": 0.001332609227423022, + "loss": 1.6527, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.9986829161643982, + "learning_rate": 0.0013310249629224417, + "loss": 1.654, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.760584831237793, + "learning_rate": 0.0013294341877534454, + "loss": 1.6535, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.2482274770736694, + "learning_rate": 0.0013278369197414458, + "loss": 1.6719, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 1.3534704446792603, + "learning_rate": 0.0013262331767846104, + "loss": 1.6539, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 1.02821946144104, + "learning_rate": 0.0013246229768536628, + "loss": 1.6399, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 1.0258729457855225, + "learning_rate": 0.001323006337991679, + "loss": 1.6563, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 1.1591565608978271, + "learning_rate": 0.0013213832783138873, + "loss": 1.6599, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 1.6321678161621094, + "learning_rate": 0.0013197538160074633, + "loss": 1.6534, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.9629566073417664, + "learning_rate": 0.0013181179693313283, + "loss": 1.645, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 1.3709475994110107, + "learning_rate": 0.0013164757566159428, + "loss": 1.6321, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 1.2793376445770264, + "learning_rate": 0.001314827196263102, + "loss": 1.6366, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 1.5623000860214233, + "learning_rate": 0.0013131723067457302, + "loss": 1.6295, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 1.4178276062011719, + "learning_rate": 0.0013115111066076721, + "loss": 1.6088, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 1.4362581968307495, + "learning_rate": 0.0013098436144634862, + "loss": 1.6557, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 1.246057391166687, + "learning_rate": 0.0013081698489982364, + "loss": 1.6364, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 1.4290248155593872, + "learning_rate": 0.001306489828967282, + "loss": 1.6201, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.9489884376525879, + "learning_rate": 0.0013048035731960679, + "loss": 1.6146, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 1.7333236932754517, + "learning_rate": 0.0013031111005799133, + "loss": 1.6303, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 1.205209732055664, + "learning_rate": 0.0013014124300838004, + "loss": 1.6424, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 1.2775635719299316, + "learning_rate": 0.0012997075807421612, + "loss": 1.6172, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.9178423285484314, + "learning_rate": 0.0012979965716586653, + "loss": 1.6072, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 1.173417091369629, + "learning_rate": 0.0012962794220060048, + "loss": 1.6139, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 1.099623441696167, + "learning_rate": 0.0012945561510256801, + "loss": 1.6281, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.8599802255630493, + "learning_rate": 0.001292826778027784, + "loss": 1.6228, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 1.1425857543945312, + "learning_rate": 0.0012910913223907856, + "loss": 1.6233, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 1.2793357372283936, + "learning_rate": 0.0012893498035613123, + "loss": 1.6234, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 1.0961679220199585, + "learning_rate": 0.001287602241053933, + "loss": 1.6226, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 1.0977396965026855, + "learning_rate": 0.0012858486544509392, + "loss": 1.629, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.8137303590774536, + "learning_rate": 0.0012840890634021249, + "loss": 1.624, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 1.0447537899017334, + "learning_rate": 0.0012823234876245667, + "loss": 1.6266, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 1.3971070051193237, + "learning_rate": 0.0012805519469024035, + "loss": 1.6337, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.8034732937812805, + "learning_rate": 0.0012787744610866143, + "loss": 1.615, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.9638532996177673, + "learning_rate": 0.0012769910500947954, + "loss": 1.6296, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.9953581094741821, + "learning_rate": 0.0012752017339109376, + "loss": 1.6239, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 1.3530935049057007, + "learning_rate": 0.0012734065325852029, + "loss": 1.6275, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.9526589512825012, + "learning_rate": 0.0012716054662336987, + "loss": 1.6121, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 1.1126807928085327, + "learning_rate": 0.001269798555038252, + "loss": 1.6132, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 1.7150431871414185, + "learning_rate": 0.0012679858192461864, + "loss": 1.6183, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 1.181544303894043, + "learning_rate": 0.0012661672791700906, + "loss": 1.6176, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 1.051591157913208, + "learning_rate": 0.0012643429551875945, + "loss": 1.5936, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 1.1829042434692383, + "learning_rate": 0.0012625128677411388, + "loss": 1.5942, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 1.2094480991363525, + "learning_rate": 0.0012606770373377475, + "loss": 1.5924, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.8718088269233704, + "learning_rate": 0.0012588354845487959, + "loss": 1.6037, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.8232197165489197, + "learning_rate": 0.001256988230009783, + "loss": 1.5982, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.7273486256599426, + "learning_rate": 0.0012551352944200976, + "loss": 1.6041, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.8456985354423523, + "learning_rate": 0.0012532766985427874, + "loss": 1.6133, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.9165290594100952, + "learning_rate": 0.0012514124632043272, + "loss": 1.6136, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 1.0359596014022827, + "learning_rate": 0.0012495426092943842, + "loss": 1.6093, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.8679115176200867, + "learning_rate": 0.0012476671577655845, + "loss": 1.6036, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 1.0881611108779907, + "learning_rate": 0.0012457861296332774, + "loss": 1.5939, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 1.2735915184020996, + "learning_rate": 0.001243899545975303, + "loss": 1.6095, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.8626992106437683, + "learning_rate": 0.0012420074279317515, + "loss": 1.5968, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 1.2875665426254272, + "learning_rate": 0.0012401097967047298, + "loss": 1.5774, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 1.0421370267868042, + "learning_rate": 0.001238206673558122, + "loss": 1.5844, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 1.0359898805618286, + "learning_rate": 0.0012362980798173526, + "loss": 1.5945, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.9459850192070007, + "learning_rate": 0.0012343840368691462, + "loss": 1.5852, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.9274162650108337, + "learning_rate": 0.0012324645661612886, + "loss": 1.5806, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.9620082974433899, + "learning_rate": 0.0012305396892023867, + "loss": 1.592, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.9705211520195007, + "learning_rate": 0.0012286094275616264, + "loss": 1.5929, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 1.2897881269454956, + "learning_rate": 0.0012266738028685318, + "loss": 1.5748, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 1.2897615432739258, + "learning_rate": 0.001224732836812723, + "loss": 1.5924, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 1.49538254737854, + "learning_rate": 0.0012227865511436724, + "loss": 1.5967, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 1.6564406156539917, + "learning_rate": 0.001220834967670461, + "loss": 1.5915, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 1.867066502571106, + "learning_rate": 0.0012188781082615346, + "loss": 1.5799, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 1.0987522602081299, + "learning_rate": 0.0012169159948444588, + "loss": 1.574, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 1.3418217897415161, + "learning_rate": 0.001214948649405672, + "loss": 1.5771, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.7982292771339417, + "learning_rate": 0.0012129760939902407, + "loss": 1.564, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 1.0031260251998901, + "learning_rate": 0.0012109983507016114, + "loss": 1.5666, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.7942342758178711, + "learning_rate": 0.0012090154417013636, + "loss": 1.582, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 1.0223634243011475, + "learning_rate": 0.0012070273892089605, + "loss": 1.5634, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 1.802129864692688, + "learning_rate": 0.0012050342155015012, + "loss": 1.5689, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.9970457553863525, + "learning_rate": 0.0012030359429134707, + "loss": 1.583, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 1.3829203844070435, + "learning_rate": 0.0012010325938364883, + "loss": 1.5737, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 1.2699828147888184, + "learning_rate": 0.0011990241907190592, + "loss": 1.5678, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.3639419078826904, + "learning_rate": 0.001197010756066321, + "loss": 1.5496, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 1.4653608798980713, + "learning_rate": 0.0011949923124397917, + "loss": 1.5517, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 1.397127389907837, + "learning_rate": 0.001192968882457118, + "loss": 1.5629, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 1.636437177658081, + "learning_rate": 0.001190940488791821, + "loss": 1.5604, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 1.1211754083633423, + "learning_rate": 0.0011889071541730419, + "loss": 1.5611, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.8533825278282166, + "learning_rate": 0.001186868901385288, + "loss": 1.5569, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.9143281579017639, + "learning_rate": 0.001184825753268177, + "loss": 1.5699, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 1.4628593921661377, + "learning_rate": 0.0011827777327161814, + "loss": 1.5833, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 1.5831266641616821, + "learning_rate": 0.0011807248626783714, + "loss": 1.5709, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 1.5758095979690552, + "learning_rate": 0.0011786671661581584, + "loss": 1.5522, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 1.1197705268859863, + "learning_rate": 0.001176604666213036, + "loss": 1.5521, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 1.1750646829605103, + "learning_rate": 0.0011745373859543236, + "loss": 1.5629, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 1.3293509483337402, + "learning_rate": 0.0011724653485469063, + "loss": 1.5529, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.9493930339813232, + "learning_rate": 0.0011703885772089743, + "loss": 1.5661, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 1.1234840154647827, + "learning_rate": 0.0011683070952117646, + "loss": 1.5653, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.9376155138015747, + "learning_rate": 0.0011662209258792998, + "loss": 1.5503, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.7863777875900269, + "learning_rate": 0.0011641300925881257, + "loss": 1.5607, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 2.1524288654327393, + "learning_rate": 0.0011620346187670501, + "loss": 1.5637, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 1.7093727588653564, + "learning_rate": 0.0011599345278968806, + "loss": 1.5686, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 1.3028459548950195, + "learning_rate": 0.0011578298435101604, + "loss": 1.5513, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 1.3798213005065918, + "learning_rate": 0.0011557205891909062, + "loss": 1.5401, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 1.0061970949172974, + "learning_rate": 0.0011536067885743423, + "loss": 1.5458, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 1.133569359779358, + "learning_rate": 0.001151488465346637, + "loss": 1.5363, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 1.0495191812515259, + "learning_rate": 0.0011493656432446362, + "loss": 1.5532, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.9396116733551025, + "learning_rate": 0.0011472383460555983, + "loss": 1.5488, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.858273446559906, + "learning_rate": 0.001145106597616927, + "loss": 1.5663, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 1.3865262269973755, + "learning_rate": 0.001142970421815904, + "loss": 1.5494, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.9085630178451538, + "learning_rate": 0.0011408298425894226, + "loss": 1.5553, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.9654815793037415, + "learning_rate": 0.0011386848839237186, + "loss": 1.5506, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 1.145520567893982, + "learning_rate": 0.0011365355698541005, + "loss": 1.5566, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 1.1294238567352295, + "learning_rate": 0.0011343819244646824, + "loss": 1.5531, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 1.190986156463623, + "learning_rate": 0.001132223971888112, + "loss": 1.5504, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 1.471362829208374, + "learning_rate": 0.0011300617363053024, + "loss": 1.5476, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 1.138670802116394, + "learning_rate": 0.0011278952419451586, + "loss": 1.5544, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 1.0700798034667969, + "learning_rate": 0.0011257245130843077, + "loss": 1.5358, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.9018650054931641, + "learning_rate": 0.0011235495740468265, + "loss": 1.5273, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 1.001027226448059, + "learning_rate": 0.0011213704492039694, + "loss": 1.511, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.839265763759613, + "learning_rate": 0.001119187162973894, + "loss": 1.5353, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 1.2938200235366821, + "learning_rate": 0.001116999739821388, + "loss": 1.5342, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 1.5568938255310059, + "learning_rate": 0.0011148082042575968, + "loss": 1.5483, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 1.0602411031723022, + "learning_rate": 0.0011126125808397461, + "loss": 1.5413, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 1.9033334255218506, + "learning_rate": 0.0011104128941708683, + "loss": 1.525, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 1.2254990339279175, + "learning_rate": 0.001108209168899527, + "loss": 1.5376, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.95367830991745, + "learning_rate": 0.0011060014297195396, + "loss": 1.5314, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.9687831401824951, + "learning_rate": 0.0011037897013697015, + "loss": 1.5436, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.9362744688987732, + "learning_rate": 0.0011015740086335092, + "loss": 1.5352, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.9354070425033569, + "learning_rate": 0.0010993543763388814, + "loss": 1.5399, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.8967704772949219, + "learning_rate": 0.0010971308293578814, + "loss": 1.533, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 1.1824617385864258, + "learning_rate": 0.0010949033926064397, + "loss": 1.5359, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.7711478471755981, + "learning_rate": 0.0010926720910440725, + "loss": 1.5443, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 1.20838463306427, + "learning_rate": 0.001090436949673603, + "loss": 1.5403, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.9990103244781494, + "learning_rate": 0.0010881979935408815, + "loss": 1.5328, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 1.008127212524414, + "learning_rate": 0.0010859552477345052, + "loss": 1.5489, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 1.0973767042160034, + "learning_rate": 0.001083708737385536, + "loss": 1.5393, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 1.385464072227478, + "learning_rate": 0.0010814584876672187, + "loss": 1.5086, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 1.0201750993728638, + "learning_rate": 0.0010792045237947008, + "loss": 1.5273, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.9866612553596497, + "learning_rate": 0.0010769468710247478, + "loss": 1.5253, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 1.6059131622314453, + "learning_rate": 0.0010746855546554612, + "loss": 1.5351, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 1.5470569133758545, + "learning_rate": 0.0010724206000259954, + "loss": 1.5195, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 1.6722826957702637, + "learning_rate": 0.0010701520325162727, + "loss": 1.5267, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 1.6640346050262451, + "learning_rate": 0.0010678798775467001, + "loss": 1.5307, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 1.129494547843933, + "learning_rate": 0.0010656041605778832, + "loss": 1.5189, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 1.501945972442627, + "learning_rate": 0.001063324907110342, + "loss": 1.5023, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 1.1208363771438599, + "learning_rate": 0.0010610421426842241, + "loss": 1.5171, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.9138652682304382, + "learning_rate": 0.00105875589287902, + "loss": 1.5206, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 1.0991270542144775, + "learning_rate": 0.0010564661833132752, + "loss": 1.5375, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 1.0480202436447144, + "learning_rate": 0.001054173039644303, + "loss": 1.5375, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 1.5541396141052246, + "learning_rate": 0.0010518764875678981, + "loss": 1.5311, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 1.0067538022994995, + "learning_rate": 0.001049576552818048, + "loss": 1.5178, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 1.288666009902954, + "learning_rate": 0.0010472732611666448, + "loss": 1.515, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.9280933737754822, + "learning_rate": 0.0010449666384231954, + "loss": 1.5083, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 1.145607590675354, + "learning_rate": 0.0010426567104345346, + "loss": 1.5288, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.2817634344100952, + "learning_rate": 0.0010403435030845332, + "loss": 1.5079, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.8887024521827698, + "learning_rate": 0.0010380270422938093, + "loss": 1.5066, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 1.238087773323059, + "learning_rate": 0.0010357073540194362, + "loss": 1.509, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 1.1466600894927979, + "learning_rate": 0.001033384464254655, + "loss": 1.5174, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 1.2554274797439575, + "learning_rate": 0.001031058399028579, + "loss": 1.5149, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 1.2648078203201294, + "learning_rate": 0.001028729184405905, + "loss": 1.5295, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 1.2610468864440918, + "learning_rate": 0.0010263968464866201, + "loss": 1.5193, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 1.0135608911514282, + "learning_rate": 0.0010240614114057098, + "loss": 1.4981, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 1.6226307153701782, + "learning_rate": 0.001021722905332864, + "loss": 1.5053, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 1.0088990926742554, + "learning_rate": 0.0010193813544721855, + "loss": 1.5199, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.8307347893714905, + "learning_rate": 0.001017036785061895, + "loss": 1.5172, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.9282181859016418, + "learning_rate": 0.0010146892233740376, + "loss": 1.5097, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.9127342700958252, + "learning_rate": 0.0010123386957141883, + "loss": 1.4944, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 1.2892626523971558, + "learning_rate": 0.0010099852284211573, + "loss": 1.5171, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 1.1507022380828857, + "learning_rate": 0.0010076288478666944, + "loss": 1.5123, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 1.2972285747528076, + "learning_rate": 0.0010052695804551946, + "loss": 1.5097, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 1.2276149988174438, + "learning_rate": 0.0010029074526234014, + "loss": 1.496, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.9434043169021606, + "learning_rate": 0.0010005424908401104, + "loss": 1.5064, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 1.066426396369934, + "learning_rate": 0.0009981747216058728, + "loss": 1.4981, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 1.1268680095672607, + "learning_rate": 0.0009958041714526998, + "loss": 1.5092, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 1.0926895141601562, + "learning_rate": 0.0009934308669437627, + "loss": 1.5193, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.9493520855903625, + "learning_rate": 0.0009910548346730972, + "loss": 1.4971, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.9283155798912048, + "learning_rate": 0.0009886761012653062, + "loss": 1.4954, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 1.0267612934112549, + "learning_rate": 0.000986294693375258, + "loss": 1.4984, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.8910433650016785, + "learning_rate": 0.000983910637687791, + "loss": 1.5179, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 1.746009349822998, + "learning_rate": 0.0009815239609174138, + "loss": 1.506, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 1.2061247825622559, + "learning_rate": 0.0009791346898080043, + "loss": 1.5123, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.8675798177719116, + "learning_rate": 0.0009767428511325122, + "loss": 1.4973, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.9759266972541809, + "learning_rate": 0.0009743484716926576, + "loss": 1.4916, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.924249529838562, + "learning_rate": 0.0009719515783186319, + "loss": 1.4913, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 1.4552154541015625, + "learning_rate": 0.0009695521978687951, + "loss": 1.4942, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.9275292158126831, + "learning_rate": 0.0009671503572293767, + "loss": 1.4972, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.8926223516464233, + "learning_rate": 0.0009647460833141742, + "loss": 1.4901, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.9083124399185181, + "learning_rate": 0.0009623394030642507, + "loss": 1.4959, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 1.2589706182479858, + "learning_rate": 0.0009599303434476334, + "loss": 1.494, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 1.0253772735595703, + "learning_rate": 0.0009575189314590118, + "loss": 1.5016, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.8012276291847229, + "learning_rate": 0.0009551051941194346, + "loss": 1.4988, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 1.7180521488189697, + "learning_rate": 0.0009526891584760071, + "loss": 1.4888, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 1.2081207036972046, + "learning_rate": 0.0009502708516015889, + "loss": 1.4999, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.9866225719451904, + "learning_rate": 0.0009478503005944888, + "loss": 1.4851, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.8849467039108276, + "learning_rate": 0.0009454275325781632, + "loss": 1.4924, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.9805383086204529, + "learning_rate": 0.0009430025747009104, + "loss": 1.4841, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.9877537488937378, + "learning_rate": 0.0009405754541355677, + "loss": 1.4901, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 1.038251519203186, + "learning_rate": 0.0009381461980792061, + "loss": 1.4799, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.8941358923912048, + "learning_rate": 0.0009357148337528256, + "loss": 1.4949, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 1.115275502204895, + "learning_rate": 0.0009332813884010511, + "loss": 1.4956, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.924994707107544, + "learning_rate": 0.0009308458892918259, + "loss": 1.4958, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 1.0559468269348145, + "learning_rate": 0.0009284083637161064, + "loss": 1.4911, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.7392977476119995, + "learning_rate": 0.0009259688389875574, + "loss": 1.5037, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 1.3173929452896118, + "learning_rate": 0.0009235273424422442, + "loss": 1.4856, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 1.0727694034576416, + "learning_rate": 0.0009210839014383282, + "loss": 1.4808, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 1.8249340057373047, + "learning_rate": 0.000918883164651781, + "loss": 1.4702, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 1.1946078538894653, + "learning_rate": 0.0009164361046260412, + "loss": 1.4749, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 1.4862942695617676, + "learning_rate": 0.0009139871796024807, + "loss": 1.4618, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 1.4611212015151978, + "learning_rate": 0.0009115364170225, + "loss": 1.4707, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.9322112798690796, + "learning_rate": 0.0009090838443480903, + "loss": 1.4768, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.9898280501365662, + "learning_rate": 0.0009066294890615266, + "loss": 1.4659, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 1.5917967557907104, + "learning_rate": 0.0009041733786650578, + "loss": 1.475, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.9226060509681702, + "learning_rate": 0.0009017155406806006, + "loss": 1.4665, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.8916873931884766, + "learning_rate": 0.0008992560026494294, + "loss": 1.4751, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 1.337715744972229, + "learning_rate": 0.0008967947921318689, + "loss": 1.4811, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 1.0582878589630127, + "learning_rate": 0.0008943319367069844, + "loss": 1.473, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.9971445798873901, + "learning_rate": 0.0008918674639722742, + "loss": 1.4743, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 1.140392780303955, + "learning_rate": 0.0008894014015433582, + "loss": 1.4678, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.9928585886955261, + "learning_rate": 0.0008869337770536699, + "loss": 1.4704, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 1.1994088888168335, + "learning_rate": 0.0008844646181541472, + "loss": 1.4795, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.9302793145179749, + "learning_rate": 0.0008819939525129207, + "loss": 1.4838, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.9184184670448303, + "learning_rate": 0.0008795218078150056, + "loss": 1.4727, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 1.3533642292022705, + "learning_rate": 0.0008770482117619901, + "loss": 1.4702, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.8947915434837341, + "learning_rate": 0.000874573192071726, + "loss": 1.4812, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.9022558927536011, + "learning_rate": 0.0008720967764780173, + "loss": 1.4776, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.8097779750823975, + "learning_rate": 0.0008696189927303101, + "loss": 1.4726, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.6133254766464233, + "learning_rate": 0.0008671398685933811, + "loss": 1.4694, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 1.4822062253952026, + "learning_rate": 0.0008646594318470268, + "loss": 1.4805, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 1.1711740493774414, + "learning_rate": 0.000862177710285752, + "loss": 1.4642, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 1.601130485534668, + "learning_rate": 0.0008596947317184585, + "loss": 1.468, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.9446767568588257, + "learning_rate": 0.0008572105239681338, + "loss": 1.4888, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.7556731104850769, + "learning_rate": 0.0008547251148715386, + "loss": 1.4568, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.8460499048233032, + "learning_rate": 0.0008522385322788955, + "loss": 1.4593, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.8376351594924927, + "learning_rate": 0.0008497508040535766, + "loss": 1.4608, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.7853705883026123, + "learning_rate": 0.0008472619580717914, + "loss": 1.4668, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.8533798456192017, + "learning_rate": 0.000844772022222274, + "loss": 1.4823, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.9216268062591553, + "learning_rate": 0.0008422810244059721, + "loss": 1.4621, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 1.0749009847640991, + "learning_rate": 0.000839788992535732, + "loss": 1.467, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 1.0959140062332153, + "learning_rate": 0.000837295954535988, + "loss": 1.4645, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 1.3824844360351562, + "learning_rate": 0.0008348019383424479, + "loss": 1.4736, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.9010656476020813, + "learning_rate": 0.0008323069719017812, + "loss": 1.46, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 1.3230277299880981, + "learning_rate": 0.0008298110831713047, + "loss": 1.4588, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.9296961426734924, + "learning_rate": 0.0008273143001186709, + "loss": 1.4698, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 1.2335829734802246, + "learning_rate": 0.0008248166507215526, + "loss": 1.4532, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 1.139744520187378, + "learning_rate": 0.0008223181629673312, + "loss": 1.4604, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.9167611002922058, + "learning_rate": 0.0008198188648527818, + "loss": 1.455, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 1.0506314039230347, + "learning_rate": 0.00081731878438376, + "loss": 1.4427, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.9885739684104919, + "learning_rate": 0.0008148179495748885, + "loss": 1.4645, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.8699249029159546, + "learning_rate": 0.0008123163884492422, + "loss": 1.4662, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 1.2881697416305542, + "learning_rate": 0.0008098141290380353, + "loss": 1.464, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.9078066945075989, + "learning_rate": 0.000807311199380306, + "loss": 1.4545, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.8200896382331848, + "learning_rate": 0.0008048076275226032, + "loss": 1.4341, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 1.194288730621338, + "learning_rate": 0.0008023034415186725, + "loss": 1.4566, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 1.0817091464996338, + "learning_rate": 0.0007997986694291404, + "loss": 1.4462, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.9248474836349487, + "learning_rate": 0.0007972933393212012, + "loss": 1.4569, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 1.3401905298233032, + "learning_rate": 0.0007947874792683025, + "loss": 1.4631, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 1.0691499710083008, + "learning_rate": 0.0007922811173498293, + "loss": 1.4354, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.847450852394104, + "learning_rate": 0.000789774281650791, + "loss": 1.4526, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.834292471408844, + "learning_rate": 0.0007872670002615056, + "loss": 1.456, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.8298832774162292, + "learning_rate": 0.0007847593012772852, + "loss": 1.4627, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.9535987973213196, + "learning_rate": 0.0007822512127981218, + "loss": 1.4504, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 1.2429676055908203, + "learning_rate": 0.0007797427629283708, + "loss": 1.453, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 1.0355405807495117, + "learning_rate": 0.0007772339797764385, + "loss": 1.4476, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.9865694046020508, + "learning_rate": 0.0007747248914544646, + "loss": 1.4397, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 1.784401774406433, + "learning_rate": 0.0007722155260780093, + "loss": 1.4513, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 1.1959586143493652, + "learning_rate": 0.0007697059117657368, + "loss": 1.4477, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.8725881576538086, + "learning_rate": 0.0007671960766391008, + "loss": 1.4382, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.7803207039833069, + "learning_rate": 0.0007646860488220293, + "loss": 1.4335, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.942233681678772, + "learning_rate": 0.00076217585644061, + "loss": 1.4375, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 1.2009118795394897, + "learning_rate": 0.0007596655276227739, + "loss": 1.4611, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.8131436705589294, + "learning_rate": 0.0007571550904979812, + "loss": 1.4448, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.8192455172538757, + "learning_rate": 0.0007546445731969056, + "loss": 1.4446, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.8240109086036682, + "learning_rate": 0.0007521340038511196, + "loss": 1.4559, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.9395986795425415, + "learning_rate": 0.0007496234105927785, + "loss": 1.4378, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.2303539514541626, + "learning_rate": 0.0007471128215543056, + "loss": 1.4439, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 1.11430823802948, + "learning_rate": 0.0007446022648680768, + "loss": 1.4454, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 1.0075438022613525, + "learning_rate": 0.0007420917686661055, + "loss": 1.4644, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 1.0784398317337036, + "learning_rate": 0.0007395813610797283, + "loss": 1.4393, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.994050145149231, + "learning_rate": 0.0007370710702392873, + "loss": 1.4315, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.871109664440155, + "learning_rate": 0.0007345609242738173, + "loss": 1.4477, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 1.2071360349655151, + "learning_rate": 0.0007320509513107296, + "loss": 1.4382, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.8437405228614807, + "learning_rate": 0.0007295411794754967, + "loss": 1.4351, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.9953734874725342, + "learning_rate": 0.0007270316368913374, + "loss": 1.4516, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 1.2972866296768188, + "learning_rate": 0.0007245223516789019, + "loss": 1.4408, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 1.0412185192108154, + "learning_rate": 0.0007220133519559563, + "loss": 1.4431, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.8913591504096985, + "learning_rate": 0.0007195046658370675, + "loss": 1.4409, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 1.6948673725128174, + "learning_rate": 0.0007169963214332885, + "loss": 1.433, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.8518940806388855, + "learning_rate": 0.000714488346851843, + "loss": 1.4423, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.9640561938285828, + "learning_rate": 0.000711980770195811, + "loss": 1.4365, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 1.0477244853973389, + "learning_rate": 0.0007094736195638128, + "loss": 1.4476, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.8721113801002502, + "learning_rate": 0.0007069669230496961, + "loss": 1.43, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 1.391849160194397, + "learning_rate": 0.0007044607087422191, + "loss": 1.4366, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 1.3078187704086304, + "learning_rate": 0.000701955004724737, + "loss": 1.4265, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 1.050345778465271, + "learning_rate": 0.0006994498390748865, + "loss": 1.4245, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 1.0833719968795776, + "learning_rate": 0.0006969452398642721, + "loss": 1.4387, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.9100883603096008, + "learning_rate": 0.0006944412351581506, + "loss": 1.4268, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 1.3380824327468872, + "learning_rate": 0.0006919378530151182, + "loss": 1.4272, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 1.17009437084198, + "learning_rate": 0.0006894351214867937, + "loss": 1.411, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.7711371779441833, + "learning_rate": 0.0006869330686175058, + "loss": 1.4372, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 1.3001494407653809, + "learning_rate": 0.0006844317224439788, + "loss": 1.4307, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.9509657025337219, + "learning_rate": 0.0006819311109950177, + "loss": 1.4384, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 1.4849536418914795, + "learning_rate": 0.0006794312622911953, + "loss": 1.4268, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 1.3324843645095825, + "learning_rate": 0.0006769322043445363, + "loss": 1.4454, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 1.501376748085022, + "learning_rate": 0.0006744339651582059, + "loss": 1.4265, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.9368513226509094, + "learning_rate": 0.0006719365727261935, + "loss": 1.4106, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 1.0235135555267334, + "learning_rate": 0.0006694400550330013, + "loss": 1.4251, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.9516688585281372, + "learning_rate": 0.0006669444400533286, + "loss": 1.4234, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.8607965111732483, + "learning_rate": 0.0006644497557517599, + "loss": 1.4223, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 1.041974425315857, + "learning_rate": 0.0006619560300824507, + "loss": 1.4423, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 1.0846997499465942, + "learning_rate": 0.0006594632909888154, + "loss": 1.4325, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 1.0283704996109009, + "learning_rate": 0.0006569715664032124, + "loss": 1.4252, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.9735073447227478, + "learning_rate": 0.0006544808842466324, + "loss": 1.4282, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 1.0649961233139038, + "learning_rate": 0.0006519912724283851, + "loss": 1.4369, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 1.0015232563018799, + "learning_rate": 0.0006495027588457864, + "loss": 1.416, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 1.247849941253662, + "learning_rate": 0.0006470153713838463, + "loss": 1.4301, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 1.075993537902832, + "learning_rate": 0.0006445291379149556, + "loss": 1.4171, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.8853337168693542, + "learning_rate": 0.0006420440862985748, + "loss": 1.4272, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 1.6556155681610107, + "learning_rate": 0.0006395602443809203, + "loss": 1.4221, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 1.0757739543914795, + "learning_rate": 0.0006370776399946536, + "loss": 1.4253, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 1.0792763233184814, + "learning_rate": 0.0006345963009585694, + "loss": 1.4167, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 1.116723895072937, + "learning_rate": 0.0006321162550772829, + "loss": 1.4188, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.8563523888587952, + "learning_rate": 0.0006296375301409187, + "loss": 1.4296, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 1.211867332458496, + "learning_rate": 0.0006271601539248012, + "loss": 1.4174, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.9517644047737122, + "learning_rate": 0.0006246841541891399, + "loss": 1.4184, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 1.1543703079223633, + "learning_rate": 0.0006222095586787208, + "loss": 1.4252, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 1.085797905921936, + "learning_rate": 0.0006197363951225951, + "loss": 1.4134, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.993361234664917, + "learning_rate": 0.0006172646912337678, + "loss": 1.4109, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.8418601751327515, + "learning_rate": 0.0006147944747088881, + "loss": 1.4231, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.8986855745315552, + "learning_rate": 0.000612325773227938, + "loss": 1.4157, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.8898463845252991, + "learning_rate": 0.0006098586144539235, + "loss": 1.4104, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 1.000430703163147, + "learning_rate": 0.0006073930260325632, + "loss": 1.4186, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.8765115737915039, + "learning_rate": 0.0006049290355919792, + "loss": 1.4183, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.8545541763305664, + "learning_rate": 0.0006024666707423875, + "loss": 1.4122, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 1.0435597896575928, + "learning_rate": 0.0006000059590757886, + "loss": 1.4121, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 1.3188209533691406, + "learning_rate": 0.0005975469281656581, + "loss": 1.4131, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.9604945182800293, + "learning_rate": 0.0005950896055666384, + "loss": 1.4116, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.8399946689605713, + "learning_rate": 0.0005926340188142289, + "loss": 1.409, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.8280611634254456, + "learning_rate": 0.0005901801954244782, + "loss": 1.4046, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.9459635019302368, + "learning_rate": 0.0005877281628936756, + "loss": 1.403, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 1.022103190422058, + "learning_rate": 0.0005852779486980427, + "loss": 1.4086, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.8436564207077026, + "learning_rate": 0.0005828295802934263, + "loss": 1.4083, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 1.1897363662719727, + "learning_rate": 0.0005803830851149892, + "loss": 1.4119, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.9797780513763428, + "learning_rate": 0.0005779384905769053, + "loss": 1.4128, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 1.0760149955749512, + "learning_rate": 0.0005754958240720498, + "loss": 1.4134, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.899355411529541, + "learning_rate": 0.0005730551129716936, + "loss": 1.4041, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 1.233454704284668, + "learning_rate": 0.0005706163846251961, + "loss": 1.4119, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.8687321543693542, + "learning_rate": 0.0005681796663596996, + "loss": 1.4042, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 1.105754017829895, + "learning_rate": 0.0005657449854798216, + "loss": 1.3968, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 1.1164929866790771, + "learning_rate": 0.0005633123692673503, + "loss": 1.3936, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 1.0412369966506958, + "learning_rate": 0.0005608818449809387, + "loss": 1.4148, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 1.060502052307129, + "learning_rate": 0.0005584534398557977, + "loss": 1.4171, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 1.0371047258377075, + "learning_rate": 0.0005560271811033928, + "loss": 1.4015, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 1.0166165828704834, + "learning_rate": 0.0005536030959111377, + "loss": 1.4015, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 1.1408765316009521, + "learning_rate": 0.0005511812114420908, + "loss": 1.404, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 1.0308762788772583, + "learning_rate": 0.0005487615548346502, + "loss": 1.3994, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.8468049764633179, + "learning_rate": 0.0005463441532022495, + "loss": 1.402, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.931928277015686, + "learning_rate": 0.0005439290336330545, + "loss": 1.4117, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 1.0371968746185303, + "learning_rate": 0.0005415162231896593, + "loss": 1.4048, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.8968199491500854, + "learning_rate": 0.000539105748908783, + "loss": 1.4144, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.9804399013519287, + "learning_rate": 0.0005366976378009668, + "loss": 1.4048, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.9492760300636292, + "learning_rate": 0.0005342919168502717, + "loss": 1.4003, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 1.0558573007583618, + "learning_rate": 0.0005318886130139753, + "loss": 1.4085, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.8577802777290344, + "learning_rate": 0.0005294877532222709, + "loss": 1.4085, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 1.1582485437393188, + "learning_rate": 0.0005270893643779649, + "loss": 1.3889, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 1.041375994682312, + "learning_rate": 0.0005246934733561751, + "loss": 1.3941, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.8611592054367065, + "learning_rate": 0.0005223001070040305, + "loss": 1.3989, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 1.1016923189163208, + "learning_rate": 0.0005199092921403696, + "loss": 1.406, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.9427316188812256, + "learning_rate": 0.00051752105555544, + "loss": 1.3989, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.8022031784057617, + "learning_rate": 0.0005151354240105994, + "loss": 1.3957, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.9037137627601624, + "learning_rate": 0.0005127524242380139, + "loss": 1.3723, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.8666563034057617, + "learning_rate": 0.0005103720829403594, + "loss": 1.3851, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 1.0490473508834839, + "learning_rate": 0.0005079944267905226, + "loss": 1.3855, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.9020686745643616, + "learning_rate": 0.0005056194824313015, + "loss": 1.3921, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.8248091340065002, + "learning_rate": 0.0005032472764751074, + "loss": 1.3918, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.993508517742157, + "learning_rate": 0.000500877835503666, + "loss": 1.3925, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 1.329724907875061, + "learning_rate": 0.000498511186067721, + "loss": 1.4007, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.9227579832077026, + "learning_rate": 0.0004961473546867346, + "loss": 1.4084, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 1.1254634857177734, + "learning_rate": 0.0004937863678485915, + "loss": 1.3925, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.8597703576087952, + "learning_rate": 0.0004914282520093023, + "loss": 1.403, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.8589973449707031, + "learning_rate": 0.0004890730335927063, + "loss": 1.4061, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 1.016656517982483, + "learning_rate": 0.00048672073899017564, + "loss": 1.3948, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 1.1069231033325195, + "learning_rate": 0.0004843713945603205, + "loss": 1.3886, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.8865434527397156, + "learning_rate": 0.00048202502662869195, + "loss": 1.3982, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 1.2471851110458374, + "learning_rate": 0.0004796816614874885, + "loss": 1.3892, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.9006211161613464, + "learning_rate": 0.00047734132539526086, + "loss": 1.3887, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 1.0410528182983398, + "learning_rate": 0.00047500404457661747, + "loss": 1.3843, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 1.257081389427185, + "learning_rate": 0.00047266984522193134, + "loss": 1.3796, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.7313785552978516, + "learning_rate": 0.00047033875348704576, + "loss": 1.3881, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 1.175317406654358, + "learning_rate": 0.00046801079549298224, + "loss": 1.3934, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.9211337566375732, + "learning_rate": 0.0004656859973256466, + "loss": 1.3772, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 1.0640242099761963, + "learning_rate": 0.00046336438503553754, + "loss": 1.3854, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.8318250775337219, + "learning_rate": 0.00046104598463745424, + "loss": 1.3875, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 1.059361457824707, + "learning_rate": 0.0004587308221102053, + "loss": 1.3719, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.8778765201568604, + "learning_rate": 0.00045641892339631703, + "loss": 1.3908, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.9438395500183105, + "learning_rate": 0.000454110314401744, + "loss": 1.398, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 1.1765334606170654, + "learning_rate": 0.00045180502099557686, + "loss": 1.387, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 1.078106164932251, + "learning_rate": 0.00044950306900975377, + "loss": 1.3882, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 1.1579101085662842, + "learning_rate": 0.00044720448423877113, + "loss": 1.3702, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 1.45957612991333, + "learning_rate": 0.0004449092924393933, + "loss": 1.389, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 1.252821445465088, + "learning_rate": 0.00044261751933036525, + "loss": 1.3692, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 1.5366891622543335, + "learning_rate": 0.0004403291905921233, + "loss": 1.3876, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.9114354252815247, + "learning_rate": 0.00043804433186650916, + "loss": 1.3764, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.8389323353767395, + "learning_rate": 0.00043576296875647984, + "loss": 1.378, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.8668853044509888, + "learning_rate": 0.0004334851268258234, + "loss": 1.3763, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 1.1157852411270142, + "learning_rate": 0.00043121083159887056, + "loss": 1.3708, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.8253630995750427, + "learning_rate": 0.00042894010856020997, + "loss": 1.385, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.9189977645874023, + "learning_rate": 0.0004266729831544017, + "loss": 1.379, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.9487914443016052, + "learning_rate": 0.0004244094807856936, + "loss": 1.3795, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 1.0475350618362427, + "learning_rate": 0.00042214962681773457, + "loss": 1.3775, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 1.1389249563217163, + "learning_rate": 0.00041989344657329187, + "loss": 1.3903, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 1.4092427492141724, + "learning_rate": 0.00041764096533396667, + "loss": 1.3826, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.8769392371177673, + "learning_rate": 0.00041539220833991124, + "loss": 1.3601, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.943771243095398, + "learning_rate": 0.0004131472007895457, + "loss": 1.3824, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.9562660455703735, + "learning_rate": 0.00041090596783927583, + "loss": 1.3849, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.9747799634933472, + "learning_rate": 0.0004086685346032111, + "loss": 1.3764, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.9912639856338501, + "learning_rate": 0.00040643492615288367, + "loss": 1.3727, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.9368113875389099, + "learning_rate": 0.00040420516751696664, + "loss": 1.3798, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.9247981905937195, + "learning_rate": 0.00040197928368099445, + "loss": 1.3703, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.8152162432670593, + "learning_rate": 0.00039975729958708223, + "loss": 1.3741, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.9346628189086914, + "learning_rate": 0.0003975392401336468, + "loss": 1.3786, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.9680904150009155, + "learning_rate": 0.00039532513017512694, + "loss": 1.3728, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.8811993598937988, + "learning_rate": 0.00039311499452170665, + "loss": 1.3806, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.8844695687294006, + "learning_rate": 0.0003909088579390347, + "loss": 1.3791, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 1.2616381645202637, + "learning_rate": 0.00038870674514794877, + "loss": 1.3767, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 1.0400831699371338, + "learning_rate": 0.0003865086808241979, + "loss": 1.3722, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.8763938546180725, + "learning_rate": 0.0003843146895981661, + "loss": 1.3664, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1.1762927770614624, + "learning_rate": 0.00038212479605459617, + "loss": 1.364, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 1.2252676486968994, + "learning_rate": 0.000379939024732315, + "loss": 1.3731, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.8399419784545898, + "learning_rate": 0.0003777574001239573, + "loss": 1.3655, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 1.159757375717163, + "learning_rate": 0.00037557994667569217, + "loss": 1.3733, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.7996174693107605, + "learning_rate": 0.0003734066887869485, + "loss": 1.3724, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.9313139915466309, + "learning_rate": 0.0003712376508101424, + "loss": 1.3532, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.827307939529419, + "learning_rate": 0.0003690728570504032, + "loss": 1.3587, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.9429633021354675, + "learning_rate": 0.00036691233176530197, + "loss": 1.3739, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 1.1498621702194214, + "learning_rate": 0.00036475609916457996, + "loss": 1.3733, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.7852573394775391, + "learning_rate": 0.000362604183409876, + "loss": 1.3687, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.9668509364128113, + "learning_rate": 0.00036045660861445684, + "loss": 1.3612, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.95549476146698, + "learning_rate": 0.0003583133988429468, + "loss": 1.3674, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 1.141499400138855, + "learning_rate": 0.0003561745781110579, + "loss": 1.3537, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.9368671774864197, + "learning_rate": 0.00035404017038532045, + "loss": 1.3571, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 1.0541714429855347, + "learning_rate": 0.00035191019958281575, + "loss": 1.3692, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 1.0733474493026733, + "learning_rate": 0.00034978468957090635, + "loss": 1.3646, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.2284212112426758, + "learning_rate": 0.0003476636641669699, + "loss": 1.3714, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.968519926071167, + "learning_rate": 0.0003455471471381318, + "loss": 1.3662, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 1.1435002088546753, + "learning_rate": 0.0003434351622009985, + "loss": 1.3806, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 1.150015950202942, + "learning_rate": 0.0003413277330213928, + "loss": 1.3781, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.8712752461433411, + "learning_rate": 0.0003392248832140876, + "loss": 1.3645, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.9313428997993469, + "learning_rate": 0.00033712663634254163, + "loss": 1.3538, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.8843823671340942, + "learning_rate": 0.00033503301591863586, + "loss": 1.3663, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.8311747312545776, + "learning_rate": 0.0003329440454024092, + "loss": 1.3609, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 1.1824254989624023, + "learning_rate": 0.0003308597482017965, + "loss": 1.3621, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 1.3377056121826172, + "learning_rate": 0.0003287801476723656, + "loss": 1.3599, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.8160244822502136, + "learning_rate": 0.00032670526711705536, + "loss": 1.3567, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 1.1316763162612915, + "learning_rate": 0.0003246351297859164, + "loss": 1.3606, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 1.1303960084915161, + "learning_rate": 0.00032256975887584783, + "loss": 1.366, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 1.0347130298614502, + "learning_rate": 0.00032050917753033935, + "loss": 1.3505, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.8145801424980164, + "learning_rate": 0.000318453408839211, + "loss": 1.3627, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.9575753211975098, + "learning_rate": 0.0003164024758383548, + "loss": 1.3529, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.9671070575714111, + "learning_rate": 0.00031435640150947645, + "loss": 1.3615, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.9095988869667053, + "learning_rate": 0.0003123152087798376, + "loss": 1.3519, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 1.1247655153274536, + "learning_rate": 0.00031027892052200003, + "loss": 1.3537, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.9407227635383606, + "learning_rate": 0.0003082475595535677, + "loss": 1.3569, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.8301571607589722, + "learning_rate": 0.00030622114863693205, + "loss": 1.3606, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.9853189587593079, + "learning_rate": 0.00030419971047901704, + "loss": 1.3508, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 1.1359410285949707, + "learning_rate": 0.00030218326773102407, + "loss": 1.3553, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.9238126873970032, + "learning_rate": 0.00030017184298817873, + "loss": 1.356, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 1.0436140298843384, + "learning_rate": 0.00029816545878947763, + "loss": 1.3473, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.8441702723503113, + "learning_rate": 0.00029616413761743537, + "loss": 1.3527, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.8402972221374512, + "learning_rate": 0.00029416790189783286, + "loss": 1.3558, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.9062199592590332, + "learning_rate": 0.000292176773999466, + "loss": 1.3601, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 1.1165028810501099, + "learning_rate": 0.0002901907762338952, + "loss": 1.3552, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 1.046691656112671, + "learning_rate": 0.0002882099308551951, + "loss": 1.3583, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.9402929544448853, + "learning_rate": 0.00028623426005970517, + "loss": 1.3519, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 1.009200930595398, + "learning_rate": 0.00028426378598578187, + "loss": 1.3607, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.969684898853302, + "learning_rate": 0.0002822985307135491, + "loss": 1.3468, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 1.104597806930542, + "learning_rate": 0.0002803385162646518, + "loss": 1.349, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.9233344793319702, + "learning_rate": 0.0002783837646020089, + "loss": 1.3511, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.8974108099937439, + "learning_rate": 0.0002764342976295673, + "loss": 1.3495, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.9477070569992065, + "learning_rate": 0.00027449013719205623, + "loss": 1.3393, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.8763365149497986, + "learning_rate": 0.00027255130507474276, + "loss": 1.3481, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.9956598281860352, + "learning_rate": 0.00027061782300318726, + "loss": 1.3469, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 1.155517339706421, + "learning_rate": 0.0002686897126430009, + "loss": 1.3413, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 1.0581501722335815, + "learning_rate": 0.00026676699559960145, + "loss": 1.3506, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.8615381121635437, + "learning_rate": 0.00026484969341797224, + "loss": 1.3382, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.8697940111160278, + "learning_rate": 0.0002629378275824204, + "loss": 1.3397, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.9290231466293335, + "learning_rate": 0.00026103141951633617, + "loss": 1.3639, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 1.268671989440918, + "learning_rate": 0.00025913049058195277, + "loss": 1.3449, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.9922332167625427, + "learning_rate": 0.0002572350620801072, + "loss": 1.3503, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.9278436899185181, + "learning_rate": 0.0002553451552500012, + "loss": 1.3548, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.942987322807312, + "learning_rate": 0.0002534607912689637, + "loss": 1.351, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 1.0488636493682861, + "learning_rate": 0.00025158199125221325, + "loss": 1.3441, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.8735558390617371, + "learning_rate": 0.0002497087762526211, + "loss": 1.3374, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.866409957408905, + "learning_rate": 0.0002478411672604766, + "loss": 1.3411, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.9220066070556641, + "learning_rate": 0.00024597918520324994, + "loss": 1.3421, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.9668800234794617, + "learning_rate": 0.00024412285094535952, + "loss": 1.3527, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.920261025428772, + "learning_rate": 0.00024227218528793696, + "loss": 1.3234, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 1.3615689277648926, + "learning_rate": 0.00024042720896859471, + "loss": 1.34, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 1.2785807847976685, + "learning_rate": 0.00023858794266119323, + "loss": 1.3435, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 1.5023430585861206, + "learning_rate": 0.00023675440697560943, + "loss": 1.3499, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 1.1168500185012817, + "learning_rate": 0.0002349266224575063, + "loss": 1.3351, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.857887864112854, + "learning_rate": 0.0002331046095881017, + "loss": 1.3416, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.9729728698730469, + "learning_rate": 0.00023128838878393946, + "loss": 1.3447, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.9850969910621643, + "learning_rate": 0.00022947798039666051, + "loss": 1.3429, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.8831260800361633, + "learning_rate": 0.00022767340471277492, + "loss": 1.343, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.856084406375885, + "learning_rate": 0.00022587468195343436, + "loss": 1.3379, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 1.0066194534301758, + "learning_rate": 0.00022408183227420528, + "loss": 1.3492, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 1.1706987619400024, + "learning_rate": 0.0002222948757648443, + "loss": 1.3436, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.8579622507095337, + "learning_rate": 0.00022051383244907143, + "loss": 1.3524, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.8967866897583008, + "learning_rate": 0.0002187387222843467, + "loss": 1.3344, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 1.1682286262512207, + "learning_rate": 0.0002169695651616463, + "loss": 1.3307, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.8951349258422852, + "learning_rate": 0.00021520638090523955, + "loss": 1.3419, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 1.4064162969589233, + "learning_rate": 0.00021344918927246678, + "loss": 1.3501, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 1.0532881021499634, + "learning_rate": 0.00021169800995351874, + "loss": 1.3252, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.9429325461387634, + "learning_rate": 0.00020995286257121453, + "loss": 1.3282, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.8618940114974976, + "learning_rate": 0.00020821376668078264, + "loss": 1.3351, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.8398911356925964, + "learning_rate": 0.00020648074176964182, + "loss": 1.3392, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.9137090444564819, + "learning_rate": 0.00020475380725718228, + "loss": 1.3521, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.8889319896697998, + "learning_rate": 0.00020303298249454857, + "loss": 1.3472, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.8635417222976685, + "learning_rate": 0.00020131828676442237, + "loss": 1.3445, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.8107008934020996, + "learning_rate": 0.00019960973928080666, + "loss": 1.338, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.7742936015129089, + "learning_rate": 0.0001979073591888101, + "loss": 1.3368, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.9148384928703308, + "learning_rate": 0.000196211165564433, + "loss": 1.3352, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.8345164656639099, + "learning_rate": 0.00019452117741435314, + "loss": 1.3361, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.9945336580276489, + "learning_rate": 0.00019283741367571294, + "loss": 1.3476, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 1.0628036260604858, + "learning_rate": 0.00019115989321590694, + "loss": 1.3528, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.810931921005249, + "learning_rate": 0.00018948863483237154, + "loss": 1.3325, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.8863750100135803, + "learning_rate": 0.00018782365725237272, + "loss": 1.328, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.8980864882469177, + "learning_rate": 0.00018616497913279728, + "loss": 1.3342, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 1.2691972255706787, + "learning_rate": 0.0001845126190599434, + "loss": 1.3288, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.9011431932449341, + "learning_rate": 0.00018286659554931254, + "loss": 1.3253, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.9904370307922363, + "learning_rate": 0.00018122692704540194, + "loss": 1.3343, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.8374282717704773, + "learning_rate": 0.00017959363192149752, + "loss": 1.3364, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.933814525604248, + "learning_rate": 0.00017796672847946905, + "loss": 1.3224, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.8784878849983215, + "learning_rate": 0.0001763462349495639, + "loss": 1.3334, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 1.019925594329834, + "learning_rate": 0.00017473216949020326, + "loss": 1.3407, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.8140546083450317, + "learning_rate": 0.0001731245501877787, + "loss": 1.3224, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.8241371512413025, + "learning_rate": 0.00017152339505644963, + "loss": 1.3295, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.8268043398857117, + "learning_rate": 0.0001699287220379407, + "loss": 1.3137, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.9069514274597168, + "learning_rate": 0.00016834054900134228, + "loss": 1.3193, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.8530046939849854, + "learning_rate": 0.00016675889374290852, + "loss": 1.321, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.8672659993171692, + "learning_rate": 0.0001651837739858589, + "loss": 1.3294, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 1.0813183784484863, + "learning_rate": 0.00016361520738017934, + "loss": 1.3233, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 1.2170803546905518, + "learning_rate": 0.00016205321150242454, + "loss": 1.3285, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.9234071969985962, + "learning_rate": 0.00016049780385552113, + "loss": 1.3364, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.8898211121559143, + "learning_rate": 0.00015894900186857105, + "loss": 1.331, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.8925827145576477, + "learning_rate": 0.00015740682289665714, + "loss": 1.3373, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.833173930644989, + "learning_rate": 0.0001558712842206477, + "loss": 1.3219, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.8198329210281372, + "learning_rate": 0.00015434240304700332, + "loss": 1.3215, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.8121780157089233, + "learning_rate": 0.0001528201965075841, + "loss": 1.3196, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.9158532023429871, + "learning_rate": 0.0001513046816594575, + "loss": 1.3162, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.8811294436454773, + "learning_rate": 0.0001497958754847076, + "loss": 1.3202, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.8862573504447937, + "learning_rate": 0.00014829379489024415, + "loss": 1.3306, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.9224464893341064, + "learning_rate": 0.0001467984567076137, + "loss": 1.3272, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.8181999921798706, + "learning_rate": 0.00014530987769281075, + "loss": 1.3195, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.7921404242515564, + "learning_rate": 0.00014382807452609003, + "loss": 1.3216, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.9339359998703003, + "learning_rate": 0.00014235306381177952, + "loss": 1.337, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.8177172541618347, + "learning_rate": 0.00014088486207809449, + "loss": 1.3094, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.9177625775337219, + "learning_rate": 0.0001394234857769521, + "loss": 1.3311, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.8442224860191345, + "learning_rate": 0.0001379689512837878, + "loss": 1.3238, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.7487714290618896, + "learning_rate": 0.00013652127489737067, + "loss": 1.3385, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 1.0118433237075806, + "learning_rate": 0.00013508047283962137, + "loss": 1.3162, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.832543671131134, + "learning_rate": 0.00013364656125543044, + "loss": 1.3223, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.8501887917518616, + "learning_rate": 0.00013221955621247749, + "loss": 1.3376, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.8732434511184692, + "learning_rate": 0.00013079947370105057, + "loss": 1.3098, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.8390721082687378, + "learning_rate": 0.00012938632963386808, + "loss": 1.3206, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.8412296175956726, + "learning_rate": 0.00012798013984589894, + "loss": 1.3181, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 1.029778242111206, + "learning_rate": 0.00012658092009418652, + "loss": 1.3209, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 1.079094409942627, + "learning_rate": 0.00012518868605767118, + "loss": 1.3298, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.8126010894775391, + "learning_rate": 0.0001238034533370153, + "loss": 1.3299, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.8354755640029907, + "learning_rate": 0.0001224252374544278, + "loss": 1.3137, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.9125696420669556, + "learning_rate": 0.00012105405385349047, + "loss": 1.3151, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.8458261489868164, + "learning_rate": 0.00011968991789898533, + "loss": 1.3237, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 1.2978262901306152, + "learning_rate": 0.00011833284487672185, + "loss": 1.3155, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.8373542428016663, + "learning_rate": 0.00011698284999336578, + "loss": 1.3314, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.985282301902771, + "learning_rate": 0.00011563994837626898, + "loss": 1.3201, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.82973712682724, + "learning_rate": 0.00011430415507329975, + "loss": 1.3175, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.9041714668273926, + "learning_rate": 0.00011297548505267424, + "loss": 1.3271, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.9055811762809753, + "learning_rate": 0.00011165395320278898, + "loss": 1.3168, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.8894053101539612, + "learning_rate": 0.00011033957433205364, + "loss": 1.3029, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.8363162875175476, + "learning_rate": 0.00010903236316872514, + "loss": 1.3113, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.9180843234062195, + "learning_rate": 0.00010773233436074287, + "loss": 1.3026, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 1.1453430652618408, + "learning_rate": 0.00010643950247556447, + "loss": 1.3135, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.9637054800987244, + "learning_rate": 0.00010515388200000245, + "loss": 1.333, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.8407840132713318, + "learning_rate": 0.00010387548734006195, + "loss": 1.3284, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.8612921833992004, + "learning_rate": 0.00010260433282077944, + "loss": 1.3104, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 1.2952125072479248, + "learning_rate": 0.00010134043268606191, + "loss": 1.3186, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.9985410571098328, + "learning_rate": 0.00010008380109852752, + "loss": 1.3177, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.9404343962669373, + "learning_rate": 9.883445213934675e-05, + "loss": 1.3289, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.9196553826332092, + "learning_rate": 9.759239980808494e-05, + "loss": 1.3186, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.8105236291885376, + "learning_rate": 9.635765802254482e-05, + "loss": 1.322, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.8465095162391663, + "learning_rate": 9.5130240618611e-05, + "loss": 1.3076, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.8782684206962585, + "learning_rate": 9.391016135009484e-05, + "loss": 1.3096, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.8183960914611816, + "learning_rate": 9.269743388858019e-05, + "loss": 1.3067, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.8307322859764099, + "learning_rate": 9.149207182327054e-05, + "loss": 1.32, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.8045220971107483, + "learning_rate": 9.029408866083638e-05, + "loss": 1.3127, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.830212414264679, + "learning_rate": 8.910349782526394e-05, + "loss": 1.3202, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.7290546894073486, + "learning_rate": 8.792031265770475e-05, + "loss": 1.3151, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.8761226534843445, + "learning_rate": 8.67445464163267e-05, + "loss": 1.3136, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.77815842628479, + "learning_rate": 8.557621227616444e-05, + "loss": 1.3227, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.8016933798789978, + "learning_rate": 8.441532332897248e-05, + "loss": 1.3048, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.7986785173416138, + "learning_rate": 8.326189258307832e-05, + "loss": 1.3142, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.827481746673584, + "learning_rate": 8.211593296323672e-05, + "loss": 1.3175, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.8289914131164551, + "learning_rate": 8.097745731048475e-05, + "loss": 1.3199, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.8566462397575378, + "learning_rate": 7.984647838199773e-05, + "loss": 1.3078, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 1.155052661895752, + "learning_rate": 7.872300885094736e-05, + "loss": 1.3029, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.7748734354972839, + "learning_rate": 7.760706130635792e-05, + "loss": 1.3211, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.8384186625480652, + "learning_rate": 7.649864825296669e-05, + "loss": 1.312, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.8143622875213623, + "learning_rate": 7.539778211108309e-05, + "loss": 1.3104, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.8443122506141663, + "learning_rate": 7.430447521644973e-05, + "loss": 1.3094, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.7718736529350281, + "learning_rate": 7.321873982010422e-05, + "loss": 1.3066, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.7639861702919006, + "learning_rate": 7.214058808824192e-05, + "loss": 1.3143, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.8100250363349915, + "learning_rate": 7.107003210207947e-05, + "loss": 1.3065, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.9365565776824951, + "learning_rate": 7.000708385771928e-05, + "loss": 1.3159, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.8225710988044739, + "learning_rate": 6.905694490312064e-05, + "loss": 1.3058, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.9176223874092102, + "learning_rate": 6.80084841120226e-05, + "loss": 1.3172, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.7667461037635803, + "learning_rate": 6.696766536886692e-05, + "loss": 1.3108, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.81577467918396, + "learning_rate": 6.593450033653586e-05, + "loss": 1.3204, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.922707736492157, + "learning_rate": 6.490900059214836e-05, + "loss": 1.313, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.8253586292266846, + "learning_rate": 6.389117762692952e-05, + "loss": 1.3118, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.8695806264877319, + "learning_rate": 6.288104284608284e-05, + "loss": 1.3253, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.9169137477874756, + "learning_rate": 6.187860756866157e-05, + "loss": 1.3164, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.9671026468276978, + "learning_rate": 6.088388302744266e-05, + "loss": 1.3141, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.7651844024658203, + "learning_rate": 5.9896880368800115e-05, + "loss": 1.2969, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.8291880488395691, + "learning_rate": 5.891761065258089e-05, + "loss": 1.2973, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.8059927821159363, + "learning_rate": 5.794608485198008e-05, + "loss": 1.3228, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.8814519047737122, + "learning_rate": 5.698231385341887e-05, + "loss": 1.3007, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.9179897904396057, + "learning_rate": 5.60263084564217e-05, + "loss": 1.3195, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.8008880615234375, + "learning_rate": 5.507807937349604e-05, + "loss": 1.3143, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.7599408030509949, + "learning_rate": 5.413763723001164e-05, + "loss": 1.3005, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.8539789915084839, + "learning_rate": 5.320499256408204e-05, + "loss": 1.3032, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.7954179644584656, + "learning_rate": 5.228015582644585e-05, + "loss": 1.301, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.8177045583724976, + "learning_rate": 5.136313738035059e-05, + "loss": 1.3084, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.8889172077178955, + "learning_rate": 5.045394750143567e-05, + "loss": 1.3143, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.7663213014602661, + "learning_rate": 4.955259637761761e-05, + "loss": 1.3052, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.8146766424179077, + "learning_rate": 4.865909410897576e-05, + "loss": 1.3057, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 1.0107501745224, + "learning_rate": 4.7773450707639414e-05, + "loss": 1.3227, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.7708172798156738, + "learning_rate": 4.6895676097675225e-05, + "loss": 1.3125, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.7667234539985657, + "learning_rate": 4.6025780114976545e-05, + "loss": 1.3081, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.7523211240768433, + "learning_rate": 4.5163772507152425e-05, + "loss": 1.3029, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.8292438387870789, + "learning_rate": 4.430966293341912e-05, + "loss": 1.31, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.8368623852729797, + "learning_rate": 4.346346096449136e-05, + "loss": 1.3066, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.7637028098106384, + "learning_rate": 4.26251760824754e-05, + "loss": 1.3063, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.7906360626220703, + "learning_rate": 4.179481768076274e-05, + "loss": 1.3018, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.7924454808235168, + "learning_rate": 4.0972395063924554e-05, + "loss": 1.3073, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.7591859102249146, + "learning_rate": 4.015791744760811e-05, + "loss": 1.2965, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.8179988265037537, + "learning_rate": 3.93513939584326e-05, + "loss": 1.3079, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.7971022129058838, + "learning_rate": 3.855283363388762e-05, + "loss": 1.3056, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.8212478160858154, + "learning_rate": 3.7762245422231476e-05, + "loss": 1.3204, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.7564961910247803, + "learning_rate": 3.697963818239117e-05, + "loss": 1.3127, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.7462640404701233, + "learning_rate": 3.6205020683862836e-05, + "loss": 1.305, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.8003872036933899, + "learning_rate": 3.543840160661396e-05, + "loss": 1.2966, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 1.0412746667861938, + "learning_rate": 3.467978954098549e-05, + "loss": 1.3156, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.9014061689376831, + "learning_rate": 3.392919298759623e-05, + "loss": 1.3091, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.8375516533851624, + "learning_rate": 3.318662035724679e-05, + "loss": 1.3114, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.8745583891868591, + "learning_rate": 3.2452079970826335e-05, + "loss": 1.3054, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.8606129884719849, + "learning_rate": 3.172558005921841e-05, + "loss": 1.3045, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.8239139914512634, + "learning_rate": 3.100712876320924e-05, + "loss": 1.3025, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.8255100846290588, + "learning_rate": 3.029673413339651e-05, + "loss": 1.3124, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.8986062407493591, + "learning_rate": 2.959440413009895e-05, + "loss": 1.2935, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.7901493310928345, + "learning_rate": 2.890014662326701e-05, + "loss": 1.2983, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.8808306455612183, + "learning_rate": 2.8213969392395233e-05, + "loss": 1.2979, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.7528807520866394, + "learning_rate": 2.7535880126434433e-05, + "loss": 1.3052, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.7773553133010864, + "learning_rate": 2.686588642370591e-05, + "loss": 1.315, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.7798444032669067, + "learning_rate": 2.6203995791816372e-05, + "loss": 1.3074, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.783479630947113, + "learning_rate": 2.5550215647573482e-05, + "loss": 1.3054, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.8833469152450562, + "learning_rate": 2.490455331690303e-05, + "loss": 1.3041, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.8234348893165588, + "learning_rate": 2.4267016034766637e-05, + "loss": 1.2999, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.7946054935455322, + "learning_rate": 2.363761094508085e-05, + "loss": 1.3105, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.8370343446731567, + "learning_rate": 2.301634510063702e-05, + "loss": 1.2996, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.7501527070999146, + "learning_rate": 2.2403225463022288e-05, + "loss": 1.2989, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.9547525644302368, + "learning_rate": 2.1798258902541723e-05, + "loss": 1.286, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.7142996191978455, + "learning_rate": 2.120145219814082e-05, + "loss": 1.2982, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.8606114983558655, + "learning_rate": 2.0612812037330202e-05, + "loss": 1.3054, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.7364078760147095, + "learning_rate": 2.003234501611037e-05, + "loss": 1.3003, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.74664705991745, + "learning_rate": 1.9460057638897578e-05, + "loss": 1.2967, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.7664146423339844, + "learning_rate": 1.8895956318451398e-05, + "loss": 1.3155, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.7673099040985107, + "learning_rate": 1.8340047375802693e-05, + "loss": 1.3037, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.7814343571662903, + "learning_rate": 1.7792337040182434e-05, + "loss": 1.3167, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.7312383651733398, + "learning_rate": 1.72528314489524e-05, + "loss": 1.3041, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.9524529576301575, + "learning_rate": 1.6721536647536255e-05, + "loss": 1.3011, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.8338428735733032, + "learning_rate": 1.6198458589351595e-05, + "loss": 1.3009, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.7873789668083191, + "learning_rate": 1.568360313574349e-05, + "loss": 1.3048, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.7741798758506775, + "learning_rate": 1.517697605591864e-05, + "loss": 1.3086, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.7789709568023682, + "learning_rate": 1.4678583026880993e-05, + "loss": 1.3076, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.7503833770751953, + "learning_rate": 1.4188429633367721e-05, + "loss": 1.2985, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.8001459240913391, + "learning_rate": 1.370652136778694e-05, + "loss": 1.2957, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.7391160726547241, + "learning_rate": 1.3232863630156077e-05, + "loss": 1.3023, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.7666647434234619, + "learning_rate": 1.2767461728041357e-05, + "loss": 1.302, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.7341530323028564, + "learning_rate": 1.2310320876498333e-05, + "loss": 1.3024, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.7655852437019348, + "learning_rate": 1.186144619801352e-05, + "loss": 1.3032, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.7553996443748474, + "learning_rate": 1.14208427224467e-05, + "loss": 1.3078, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.8105466365814209, + "learning_rate": 1.0988515386975206e-05, + "loss": 1.3016, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.8963633179664612, + "learning_rate": 1.0564469036037722e-05, + "loss": 1.3001, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.7956309914588928, + "learning_rate": 1.0148708421280822e-05, + "loss": 1.3087, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.7527729868888855, + "learning_rate": 9.74123820150502e-06, + "loss": 1.3008, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.8137521743774414, + "learning_rate": 9.342062942613222e-06, + "loss": 1.2962, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.7494956851005554, + "learning_rate": 8.9511871175591e-06, + "loss": 1.2966, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.7394681572914124, + "learning_rate": 8.568615106297223e-06, + "loss": 1.3186, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.779046356678009, + "learning_rate": 8.194351195733585e-06, + "loss": 1.2934, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.7495801448822021, + "learning_rate": 7.828399579678153e-06, + "loss": 1.3027, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.7527459263801575, + "learning_rate": 7.470764358797566e-06, + "loss": 1.306, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.7468723058700562, + "learning_rate": 7.121449540568842e-06, + "loss": 1.3084, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.7153322696685791, + "learning_rate": 6.780459039235409e-06, + "loss": 1.3073, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.7828732132911682, + "learning_rate": 6.447796675762146e-06, + "loss": 1.3089, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.733468770980835, + "learning_rate": 6.123466177793247e-06, + "loss": 1.2893, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.7473047971725464, + "learning_rate": 5.807471179610418e-06, + "loss": 1.2975, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.8689956665039062, + "learning_rate": 5.499815222091836e-06, + "loss": 1.308, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.7832184433937073, + "learning_rate": 5.200501752672754e-06, + "loss": 1.2913, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.797156810760498, + "learning_rate": 4.909534125306702e-06, + "loss": 1.3118, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.8503409028053284, + "learning_rate": 4.626915600428105e-06, + "loss": 1.2998, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.7264394164085388, + "learning_rate": 4.352649344915471e-06, + "loss": 1.3079, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.7678040862083435, + "learning_rate": 4.086738432056092e-06, + "loss": 1.3049, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.7595843076705933, + "learning_rate": 3.8291858415117344e-06, + "loss": 1.3002, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.8076558113098145, + "learning_rate": 3.579994459284752e-06, + "loss": 1.2976, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.7638036012649536, + "learning_rate": 3.339167077686278e-06, + "loss": 1.3012, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.741239070892334, + "learning_rate": 3.1067063953048313e-06, + "loss": 1.3009, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.7953388690948486, + "learning_rate": 2.8826150169758425e-06, + "loss": 1.2992, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.7369252443313599, + "learning_rate": 2.66689545375251e-06, + "loss": 1.291, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.763466477394104, + "learning_rate": 2.4595501228779906e-06, + "loss": 1.2865, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.7321091294288635, + "learning_rate": 2.2605813477579172e-06, + "loss": 1.2978, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.7575995326042175, + "learning_rate": 2.069991357934592e-06, + "loss": 1.295, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.7364868521690369, + "learning_rate": 1.8877822890618346e-06, + "loss": 1.3047, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.7599325180053711, + "learning_rate": 1.7139561828813377e-06, + "loss": 1.295, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.7361293435096741, + "learning_rate": 1.5485149871995175e-06, + "loss": 1.2996, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.7398934960365295, + "learning_rate": 1.3914605558656146e-06, + "loss": 1.2972, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.7641172409057617, + "learning_rate": 1.2427946487512941e-06, + "loss": 1.3003, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.7507689595222473, + "learning_rate": 1.1025189317305784e-06, + "loss": 1.2961, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.7651985883712769, + "learning_rate": 9.706349766615275e-07, + "loss": 1.3068, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.6986876726150513, + "learning_rate": 8.47144261368088e-07, + "loss": 1.3028, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.7264364957809448, + "learning_rate": 7.320481696241887e-07, + "loss": 1.3166, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.7372962236404419, + "learning_rate": 6.253479911375037e-07, + "loss": 1.3069, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.7441498637199402, + "learning_rate": 5.270449215358797e-07, + "loss": 1.3056, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.7419185638427734, + "learning_rate": 4.371400623530142e-07, + "loss": 1.2896, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.7932320237159729, + "learning_rate": 3.5563442101696486e-07, + "loss": 1.2952, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.7371935248374939, + "learning_rate": 2.825289108379925e-07, + "loss": 1.309, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.7584180235862732, + "learning_rate": 2.1782435099923503e-07, + "loss": 1.2839, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.7317600250244141, + "learning_rate": 1.6152146654671573e-07, + "loss": 1.304, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.7093532681465149, + "learning_rate": 1.1362088838193229e-07, + "loss": 1.3046, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.7737886309623718, + "learning_rate": 7.412315325411312e-08, + "loss": 1.3006, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.7694770097732544, + "learning_rate": 4.302870375472168e-08, + "loss": 1.3084, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.7325647473335266, + "learning_rate": 2.0337888312210727e-08, + "loss": 1.2925, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.7565366625785828, + "learning_rate": 6.050961188358573e-09, + "loss": 1.3036, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 2.393306016921997, + "learning_rate": 1.6808247493838026e-10, + "loss": 1.2961, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.833798122374349e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-stablelm-cosine/checkpoint-9480/training_args.bin b/saves-stablelm-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..9fbd825468d2494b17dd2f0e51f06360781e65c4 --- /dev/null +++ b/saves-stablelm-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e0dc454f095d13ddb5ed55c8a2c4b54ca81e58fd808dd9e3a094cc2ebe62c1 +size 5176 diff --git a/saves-stablelm-cosine/config.json b/saves-stablelm-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..25a1ab486ad5c4cccf2de695a11e27be68a84045 --- /dev/null +++ b/saves-stablelm-cosine/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "StableLmForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "eos_token_id": 0, + "hidden_act": "silu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 4096, + "model_type": "stablelm", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "partial_rotary_factor": 0.25, + "qk_layernorm": false, + "rope_scaling": null, + "rope_theta": 10000, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_parallel_residual": false, + "use_qkv_bias": false, + "vocab_size": 2000 +} diff --git a/saves-stablelm-cosine/generation_config.json b/saves-stablelm-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..14e4f03d0d73dc2707d488ac8f586bd62ef72a7e --- /dev/null +++ b/saves-stablelm-cosine/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-stablelm-cosine/model.safetensors b/saves-stablelm-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e73ff745dff597362da5194291c0e5c13389d0f8 --- /dev/null +++ b/saves-stablelm-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b388878961f2b1f6972dafddafe806d7d5f52868db7bfd0df7b7c8fda6b9916 +size 8352336 diff --git a/saves-stablelm-cosine/result.log b/saves-stablelm-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..adaf099404fca0a45e7867f10b25389e33c8bcb1 --- /dev/null +++ b/saves-stablelm-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 1869.8885, 'train_samples_per_second': 5191.007, 'train_steps_per_second': 5.07, 'train_loss': 1.5904806389587338, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-stablelm-cosine/special_tokens_map.json b/saves-stablelm-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-stablelm-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-stablelm-cosine/tokenizer.json b/saves-stablelm-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-stablelm-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-stablelm-cosine/tokenizer_config.json b/saves-stablelm-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-stablelm-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-stablelm/checkpoint-9480/config.json b/saves-stablelm/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..25a1ab486ad5c4cccf2de695a11e27be68a84045 --- /dev/null +++ b/saves-stablelm/checkpoint-9480/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "StableLmForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "eos_token_id": 0, + "hidden_act": "silu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 4096, + "model_type": "stablelm", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "partial_rotary_factor": 0.25, + "qk_layernorm": false, + "rope_scaling": null, + "rope_theta": 10000, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_parallel_residual": false, + "use_qkv_bias": false, + "vocab_size": 2000 +} diff --git a/saves-stablelm/checkpoint-9480/generation_config.json b/saves-stablelm/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..14e4f03d0d73dc2707d488ac8f586bd62ef72a7e --- /dev/null +++ b/saves-stablelm/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-stablelm/checkpoint-9480/model.safetensors b/saves-stablelm/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e65c82d5ba9ce5d81a3b2ce5c2d9de5d6072757 --- /dev/null +++ b/saves-stablelm/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:befa512227bb908b637e47f3aae315e20876a6f1e38cd11cf22ca3d4afdccc09 +size 8352336 diff --git a/saves-stablelm/checkpoint-9480/optimizer.pt b/saves-stablelm/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebd8c07cc77c5064e7f28a12d2b6b1721a5cad62 --- /dev/null +++ b/saves-stablelm/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b56134597910d0797060fffb32c8d4ea8ddb8a5ba5cf00029e72bfe133acd6 +size 16720723 diff --git a/saves-stablelm/checkpoint-9480/rng_state.pth b/saves-stablelm/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-stablelm/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-stablelm/checkpoint-9480/scheduler.pt b/saves-stablelm/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..75fc58e9b05cc951a82cac092de91cd65804440d --- /dev/null +++ b/saves-stablelm/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0a00f09d701c4f602dd021702c8cfc44bc37c286d3a858d845780a823871eb9 +size 1064 diff --git a/saves-stablelm/checkpoint-9480/special_tokens_map.json b/saves-stablelm/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-stablelm/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-stablelm/checkpoint-9480/tokenizer.json b/saves-stablelm/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-stablelm/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-stablelm/checkpoint-9480/tokenizer_config.json b/saves-stablelm/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-stablelm/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-stablelm/checkpoint-9480/trainer_state.json b/saves-stablelm/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..55c2d3af65af2da675fede9917d40d87f69bca8b --- /dev/null +++ b/saves-stablelm/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.287693738937378, + "learning_rate": 0.00015822784810126583, + "loss": 7.504, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.1752086877822876, + "learning_rate": 0.00031645569620253165, + "loss": 6.9083, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8464803099632263, + "learning_rate": 0.00047468354430379745, + "loss": 6.2512, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.832782506942749, + "learning_rate": 0.0006329113924050633, + "loss": 5.7563, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.6848328709602356, + "learning_rate": 0.0007911392405063291, + "loss": 5.2785, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.7377444505691528, + "learning_rate": 0.0009493670886075949, + "loss": 4.7649, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.3218724727630615, + "learning_rate": 0.0011075949367088608, + "loss": 4.3897, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 2.2047746181488037, + "learning_rate": 0.0012658227848101266, + "loss": 4.1424, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.4691811800003052, + "learning_rate": 0.0014240506329113926, + "loss": 3.9439, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.1241525411605835, + "learning_rate": 0.0015, + "loss": 3.8046, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 0.7155131697654724, + "learning_rate": 0.0015, + "loss": 3.6576, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.8953099250793457, + "learning_rate": 0.0015, + "loss": 3.5474, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 0.7484499216079712, + "learning_rate": 0.0015, + "loss": 3.4409, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.9211852550506592, + "learning_rate": 0.0015, + "loss": 3.3453, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 0.6819095611572266, + "learning_rate": 0.0015, + "loss": 3.2633, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 1.4579458236694336, + "learning_rate": 0.0015, + "loss": 3.2016, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 0.7378158569335938, + "learning_rate": 0.0015, + "loss": 3.1311, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.7594382166862488, + "learning_rate": 0.0015, + "loss": 3.0853, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.8648813366889954, + "learning_rate": 0.0015, + "loss": 3.0228, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.6805261969566345, + "learning_rate": 0.0015, + "loss": 2.9792, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.6806155443191528, + "learning_rate": 0.0015, + "loss": 2.9464, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 0.7102679014205933, + "learning_rate": 0.0015, + "loss": 2.9139, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.5942780375480652, + "learning_rate": 0.0015, + "loss": 2.8566, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 0.7879489660263062, + "learning_rate": 0.0015, + "loss": 2.8269, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.7731751203536987, + "learning_rate": 0.0015, + "loss": 2.7951, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 0.8008527755737305, + "learning_rate": 0.0015, + "loss": 2.7677, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 0.7339242696762085, + "learning_rate": 0.0015, + "loss": 2.7373, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 0.7911431789398193, + "learning_rate": 0.0015, + "loss": 2.7021, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 0.7747300863265991, + "learning_rate": 0.0015, + "loss": 2.6853, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 0.7996903657913208, + "learning_rate": 0.0015, + "loss": 2.6618, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 1.1671041250228882, + "learning_rate": 0.0015, + "loss": 2.6367, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.9307910799980164, + "learning_rate": 0.0015, + "loss": 2.6081, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.8109868168830872, + "learning_rate": 0.0015, + "loss": 2.6046, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 1.0915179252624512, + "learning_rate": 0.0015, + "loss": 2.5813, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 1.4125624895095825, + "learning_rate": 0.0015, + "loss": 2.5477, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 0.6731581687927246, + "learning_rate": 0.0015, + "loss": 2.5279, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 0.8448395729064941, + "learning_rate": 0.0015, + "loss": 2.5148, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 0.95484459400177, + "learning_rate": 0.0015, + "loss": 2.509, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 0.7932597994804382, + "learning_rate": 0.0015, + "loss": 2.4825, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 0.8613596558570862, + "learning_rate": 0.0015, + "loss": 2.4545, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 1.0658255815505981, + "learning_rate": 0.0015, + "loss": 2.4485, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 0.9920756220817566, + "learning_rate": 0.0015, + "loss": 2.4335, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 1.2806950807571411, + "learning_rate": 0.0015, + "loss": 2.4053, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 1.0018268823623657, + "learning_rate": 0.0015, + "loss": 2.3893, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 0.8824273943901062, + "learning_rate": 0.0015, + "loss": 2.3913, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 0.968868613243103, + "learning_rate": 0.0015, + "loss": 2.3682, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.8527579307556152, + "learning_rate": 0.0015, + "loss": 2.3442, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.0633043050765991, + "learning_rate": 0.0015, + "loss": 2.3463, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 0.877286970615387, + "learning_rate": 0.0015, + "loss": 2.3183, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 1.0159716606140137, + "learning_rate": 0.0015, + "loss": 2.3197, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.7632014155387878, + "learning_rate": 0.0015, + "loss": 2.2983, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.962928056716919, + "learning_rate": 0.0015, + "loss": 2.2887, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 0.9052041172981262, + "learning_rate": 0.0015, + "loss": 2.2708, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.8504638671875, + "learning_rate": 0.0015, + "loss": 2.2586, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.8195692896842957, + "learning_rate": 0.0015, + "loss": 2.2539, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 1.163393259048462, + "learning_rate": 0.0015, + "loss": 2.215, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 1.3868380784988403, + "learning_rate": 0.0015, + "loss": 2.227, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 2.0864264965057373, + "learning_rate": 0.0015, + "loss": 2.2404, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 1.0416491031646729, + "learning_rate": 0.0015, + "loss": 2.21, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 1.1740283966064453, + "learning_rate": 0.0015, + "loss": 2.174, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.8408395648002625, + "learning_rate": 0.0015, + "loss": 2.1814, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 1.127395749092102, + "learning_rate": 0.0015, + "loss": 2.1718, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 1.178957223892212, + "learning_rate": 0.0015, + "loss": 2.1562, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.8522863388061523, + "learning_rate": 0.0015, + "loss": 2.1705, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 1.2730300426483154, + "learning_rate": 0.0015, + "loss": 2.1593, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.7847221493721008, + "learning_rate": 0.0015, + "loss": 2.143, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 1.0140389204025269, + "learning_rate": 0.0015, + "loss": 2.1233, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 1.4771313667297363, + "learning_rate": 0.0015, + "loss": 2.1218, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 0.8238674998283386, + "learning_rate": 0.0015, + "loss": 2.1287, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 1.2008603811264038, + "learning_rate": 0.0015, + "loss": 2.1098, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 0.7931945323944092, + "learning_rate": 0.0015, + "loss": 2.0962, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.9941354393959045, + "learning_rate": 0.0015, + "loss": 2.0764, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 1.0509740114212036, + "learning_rate": 0.0015, + "loss": 2.0708, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.9858490824699402, + "learning_rate": 0.0015, + "loss": 2.0811, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 1.4704546928405762, + "learning_rate": 0.0015, + "loss": 2.0674, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.8545016646385193, + "learning_rate": 0.0015, + "loss": 2.0626, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.8317205905914307, + "learning_rate": 0.0015, + "loss": 2.0462, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.723811149597168, + "learning_rate": 0.0015, + "loss": 2.0431, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 2.26023268699646, + "learning_rate": 0.0015, + "loss": 2.0469, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.8357682228088379, + "learning_rate": 0.0015, + "loss": 2.0368, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.7203294634819031, + "learning_rate": 0.0015, + "loss": 2.0015, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 1.1513116359710693, + "learning_rate": 0.0015, + "loss": 2.0131, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.7644749879837036, + "learning_rate": 0.0015, + "loss": 2.0207, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 1.069175362586975, + "learning_rate": 0.0015, + "loss": 2.003, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 0.8183401823043823, + "learning_rate": 0.0015, + "loss": 1.9935, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 1.1448237895965576, + "learning_rate": 0.0015, + "loss": 1.9936, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.9131275415420532, + "learning_rate": 0.0015, + "loss": 1.9889, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 1.197825312614441, + "learning_rate": 0.0015, + "loss": 1.982, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 2.0946359634399414, + "learning_rate": 0.0015, + "loss": 1.9936, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 1.1563642024993896, + "learning_rate": 0.0015, + "loss": 1.9763, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 1.3439797163009644, + "learning_rate": 0.0015, + "loss": 1.9751, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.7975325584411621, + "learning_rate": 0.0015, + "loss": 1.9568, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.7094942331314087, + "learning_rate": 0.0015, + "loss": 1.9418, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.8757395148277283, + "learning_rate": 0.0015, + "loss": 1.9563, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 1.3536033630371094, + "learning_rate": 0.0015, + "loss": 1.9469, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.8518950939178467, + "learning_rate": 0.0015, + "loss": 1.9431, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 1.2500149011611938, + "learning_rate": 0.0015, + "loss": 1.9401, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.8824960589408875, + "learning_rate": 0.0015, + "loss": 1.9227, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 1.3813319206237793, + "learning_rate": 0.0015, + "loss": 1.9241, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.8368834257125854, + "learning_rate": 0.0015, + "loss": 1.9304, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 1.9546079635620117, + "learning_rate": 0.0015, + "loss": 1.9236, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 1.0075868368148804, + "learning_rate": 0.0015, + "loss": 1.9202, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 1.280001163482666, + "learning_rate": 0.0015, + "loss": 1.9301, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.977297306060791, + "learning_rate": 0.0015, + "loss": 1.8952, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.1419881582260132, + "learning_rate": 0.0015, + "loss": 1.8936, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.9668622612953186, + "learning_rate": 0.0015, + "loss": 1.8915, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.884096622467041, + "learning_rate": 0.0015, + "loss": 1.8785, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 1.0495129823684692, + "learning_rate": 0.0015, + "loss": 1.8813, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 1.1702920198440552, + "learning_rate": 0.0015, + "loss": 1.889, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 1.279408574104309, + "learning_rate": 0.0015, + "loss": 1.8796, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.7171920537948608, + "learning_rate": 0.0015, + "loss": 1.8823, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 1.0340485572814941, + "learning_rate": 0.0015, + "loss": 1.8758, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 0.9206237196922302, + "learning_rate": 0.0015, + "loss": 1.8593, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 1.2166917324066162, + "learning_rate": 0.0015, + "loss": 1.8713, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.209561824798584, + "learning_rate": 0.0015, + "loss": 1.8742, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 1.1506495475769043, + "learning_rate": 0.0015, + "loss": 1.8594, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 1.2940099239349365, + "learning_rate": 0.0015, + "loss": 1.8465, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.8348226547241211, + "learning_rate": 0.0015, + "loss": 1.8316, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 1.2827224731445312, + "learning_rate": 0.0015, + "loss": 1.8345, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 2.010375499725342, + "learning_rate": 0.0015, + "loss": 1.8534, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 1.8737359046936035, + "learning_rate": 0.0015, + "loss": 1.8596, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.8775668740272522, + "learning_rate": 0.0015, + "loss": 1.8251, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.9482736587524414, + "learning_rate": 0.0015, + "loss": 1.8065, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 1.0006464719772339, + "learning_rate": 0.0015, + "loss": 1.8276, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 1.3121036291122437, + "learning_rate": 0.0015, + "loss": 1.8324, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 1.3296419382095337, + "learning_rate": 0.0015, + "loss": 1.82, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 1.179487943649292, + "learning_rate": 0.0015, + "loss": 1.8184, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 1.171682357788086, + "learning_rate": 0.0015, + "loss": 1.8295, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.7854935526847839, + "learning_rate": 0.0015, + "loss": 1.8158, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.8158584833145142, + "learning_rate": 0.0015, + "loss": 1.8008, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 1.1654446125030518, + "learning_rate": 0.0015, + "loss": 1.8039, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.9658716917037964, + "learning_rate": 0.0015, + "loss": 1.8111, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.854474663734436, + "learning_rate": 0.0015, + "loss": 1.8044, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 1.2906005382537842, + "learning_rate": 0.0015, + "loss": 1.8048, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 1.195814847946167, + "learning_rate": 0.0015, + "loss": 1.7995, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 1.1217211484909058, + "learning_rate": 0.0015, + "loss": 1.7885, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.8637157678604126, + "learning_rate": 0.0015, + "loss": 1.7863, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.8490321636199951, + "learning_rate": 0.0015, + "loss": 1.7789, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.8772439956665039, + "learning_rate": 0.0015, + "loss": 1.7884, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 1.184425950050354, + "learning_rate": 0.0015, + "loss": 1.7763, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.7618146538734436, + "learning_rate": 0.0015, + "loss": 1.7725, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 1.08968186378479, + "learning_rate": 0.0015, + "loss": 1.7772, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 1.7729954719543457, + "learning_rate": 0.0015, + "loss": 1.7753, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 1.4365698099136353, + "learning_rate": 0.0015, + "loss": 1.7831, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 1.0357367992401123, + "learning_rate": 0.0015, + "loss": 1.7582, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 0.8200437426567078, + "learning_rate": 0.0015, + "loss": 1.7574, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 1.0429527759552002, + "learning_rate": 0.0015, + "loss": 1.7571, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.7643953561782837, + "learning_rate": 0.0015, + "loss": 1.7586, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.9563626646995544, + "learning_rate": 0.0015, + "loss": 1.7577, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 1.0259424448013306, + "learning_rate": 0.0015, + "loss": 1.7654, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 1.021310567855835, + "learning_rate": 0.0015, + "loss": 1.7555, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 1.4872322082519531, + "learning_rate": 0.0015, + "loss": 1.7602, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.7495704293251038, + "learning_rate": 0.0015, + "loss": 1.7379, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.8076335191726685, + "learning_rate": 0.0015, + "loss": 1.7288, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.9234035611152649, + "learning_rate": 0.0015, + "loss": 1.7287, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 1.1073510646820068, + "learning_rate": 0.0015, + "loss": 1.7342, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 1.3561965227127075, + "learning_rate": 0.0015, + "loss": 1.729, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.9569161534309387, + "learning_rate": 0.0015, + "loss": 1.7364, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 1.0910649299621582, + "learning_rate": 0.0015, + "loss": 1.7239, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.7667181491851807, + "learning_rate": 0.0015, + "loss": 1.7302, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.9559243321418762, + "learning_rate": 0.0015, + "loss": 1.7327, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 2.180569648742676, + "learning_rate": 0.0015, + "loss": 1.7117, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 1.3768761157989502, + "learning_rate": 0.0015, + "loss": 1.7256, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 1.2568798065185547, + "learning_rate": 0.0015, + "loss": 1.7031, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.2443517446517944, + "learning_rate": 0.0015, + "loss": 1.6957, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 1.0823681354522705, + "learning_rate": 0.0015, + "loss": 1.6965, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.7143921256065369, + "learning_rate": 0.0015, + "loss": 1.7149, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.192913293838501, + "learning_rate": 0.0015, + "loss": 1.7023, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 0.7353207468986511, + "learning_rate": 0.0015, + "loss": 1.6926, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 1.989004135131836, + "learning_rate": 0.0015, + "loss": 1.7147, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 1.4087939262390137, + "learning_rate": 0.0015, + "loss": 1.7099, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.7485800385475159, + "learning_rate": 0.0015, + "loss": 1.6901, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.8378844857215881, + "learning_rate": 0.0015, + "loss": 1.6904, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.807313859462738, + "learning_rate": 0.0015, + "loss": 1.6947, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.7325294017791748, + "learning_rate": 0.0015, + "loss": 1.7173, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 1.0724302530288696, + "learning_rate": 0.0015, + "loss": 1.6982, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.8010008931159973, + "learning_rate": 0.0015, + "loss": 1.7002, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 1.2035045623779297, + "learning_rate": 0.0015, + "loss": 1.6989, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 2.1248722076416016, + "learning_rate": 0.0015, + "loss": 1.7064, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 1.253764271736145, + "learning_rate": 0.0015, + "loss": 1.7084, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 1.0728580951690674, + "learning_rate": 0.0015, + "loss": 1.6754, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 1.0926449298858643, + "learning_rate": 0.0015, + "loss": 1.6803, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.8255897164344788, + "learning_rate": 0.0015, + "loss": 1.6782, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.9984588027000427, + "learning_rate": 0.0015, + "loss": 1.6742, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.8310997486114502, + "learning_rate": 0.0015, + "loss": 1.6804, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.8596823811531067, + "learning_rate": 0.0015, + "loss": 1.6801, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 1.032983422279358, + "learning_rate": 0.0015, + "loss": 1.6839, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 1.4641234874725342, + "learning_rate": 0.0015, + "loss": 1.6727, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 1.7239103317260742, + "learning_rate": 0.0015, + "loss": 1.687, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 1.1446754932403564, + "learning_rate": 0.0015, + "loss": 1.6716, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.7303375005722046, + "learning_rate": 0.0015, + "loss": 1.6672, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.8565747737884521, + "learning_rate": 0.0015, + "loss": 1.6676, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 1.4304906129837036, + "learning_rate": 0.0015, + "loss": 1.6703, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.7794588804244995, + "learning_rate": 0.0015, + "loss": 1.6617, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 1.0363630056381226, + "learning_rate": 0.0015, + "loss": 1.6622, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.8111469745635986, + "learning_rate": 0.0015, + "loss": 1.6624, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.9722016453742981, + "learning_rate": 0.0015, + "loss": 1.6744, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 1.5261015892028809, + "learning_rate": 0.0015, + "loss": 1.662, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 1.0485862493515015, + "learning_rate": 0.0015, + "loss": 1.6585, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 1.2649242877960205, + "learning_rate": 0.0015, + "loss": 1.6407, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 1.0048673152923584, + "learning_rate": 0.0015, + "loss": 1.6444, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 1.2437852621078491, + "learning_rate": 0.0015, + "loss": 1.6516, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.9147947430610657, + "learning_rate": 0.0015, + "loss": 1.6441, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.9184978008270264, + "learning_rate": 0.0015, + "loss": 1.6469, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.7670009732246399, + "learning_rate": 0.0015, + "loss": 1.649, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.8591586351394653, + "learning_rate": 0.0015, + "loss": 1.6419, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.904686689376831, + "learning_rate": 0.0015, + "loss": 1.6418, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.8237951397895813, + "learning_rate": 0.0015, + "loss": 1.6498, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.8084820508956909, + "learning_rate": 0.0015, + "loss": 1.6415, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.7755509614944458, + "learning_rate": 0.0015, + "loss": 1.6475, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 1.1172354221343994, + "learning_rate": 0.0015, + "loss": 1.6389, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.6994699835777283, + "learning_rate": 0.0015, + "loss": 1.6352, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.6910060048103333, + "learning_rate": 0.0015, + "loss": 1.6389, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.7449250817298889, + "learning_rate": 0.0015, + "loss": 1.6363, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 1.6556404829025269, + "learning_rate": 0.0015, + "loss": 1.6385, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.477162480354309, + "learning_rate": 0.0015, + "loss": 1.6557, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 1.244597315788269, + "learning_rate": 0.0015, + "loss": 1.6325, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 1.0506130456924438, + "learning_rate": 0.0015, + "loss": 1.6064, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 1.087409257888794, + "learning_rate": 0.0015, + "loss": 1.6176, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 1.3156177997589111, + "learning_rate": 0.0015, + "loss": 1.6195, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.8737175464630127, + "learning_rate": 0.0015, + "loss": 1.6052, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.9622544646263123, + "learning_rate": 0.0015, + "loss": 1.6276, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.8804680109024048, + "learning_rate": 0.0015, + "loss": 1.6253, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 1.0059597492218018, + "learning_rate": 0.0015, + "loss": 1.6286, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.9855912327766418, + "learning_rate": 0.0015, + "loss": 1.6157, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.7909422516822815, + "learning_rate": 0.0015, + "loss": 1.6077, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.7605032920837402, + "learning_rate": 0.0015, + "loss": 1.64, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 1.2274240255355835, + "learning_rate": 0.0015, + "loss": 1.6288, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.73419588804245, + "learning_rate": 0.0015, + "loss": 1.6229, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.8200790286064148, + "learning_rate": 0.0015, + "loss": 1.6152, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.8523649573326111, + "learning_rate": 0.0015, + "loss": 1.6149, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 1.0392019748687744, + "learning_rate": 0.0015, + "loss": 1.6244, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 1.284392237663269, + "learning_rate": 0.0015, + "loss": 1.6087, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.8495599627494812, + "learning_rate": 0.0015, + "loss": 1.603, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 1.285441517829895, + "learning_rate": 0.0015, + "loss": 1.6029, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.7823737263679504, + "learning_rate": 0.0015, + "loss": 1.6148, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.7628457546234131, + "learning_rate": 0.0015, + "loss": 1.6114, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.7867480516433716, + "learning_rate": 0.0015, + "loss": 1.6032, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.9032204747200012, + "learning_rate": 0.0015, + "loss": 1.6084, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.7454695105552673, + "learning_rate": 0.0015, + "loss": 1.6068, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.7464936971664429, + "learning_rate": 0.0015, + "loss": 1.6119, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.7432589530944824, + "learning_rate": 0.0015, + "loss": 1.5987, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.7591716051101685, + "learning_rate": 0.0015, + "loss": 1.6014, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 1.3022727966308594, + "learning_rate": 0.0015, + "loss": 1.6119, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.8855205178260803, + "learning_rate": 0.0015, + "loss": 1.5972, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.8955703973770142, + "learning_rate": 0.0015, + "loss": 1.6099, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 0.7272588014602661, + "learning_rate": 0.0015, + "loss": 1.6009, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.7769203186035156, + "learning_rate": 0.0015, + "loss": 1.6004, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.8273778557777405, + "learning_rate": 0.0015, + "loss": 1.584, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.8707215189933777, + "learning_rate": 0.0015, + "loss": 1.588, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.7950523495674133, + "learning_rate": 0.0015, + "loss": 1.5958, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 1.4492233991622925, + "learning_rate": 0.0015, + "loss": 1.5986, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 1.3407398462295532, + "learning_rate": 0.0015, + "loss": 1.596, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.7252386808395386, + "learning_rate": 0.0015, + "loss": 1.5957, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.6952676177024841, + "learning_rate": 0.0015, + "loss": 1.5898, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.7816562056541443, + "learning_rate": 0.0015, + "loss": 1.5978, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.742734968662262, + "learning_rate": 0.0015, + "loss": 1.5887, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.9567485451698303, + "learning_rate": 0.0015, + "loss": 1.5918, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.8258093595504761, + "learning_rate": 0.0015, + "loss": 1.5964, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.7969068884849548, + "learning_rate": 0.0015, + "loss": 1.5943, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.9237832427024841, + "learning_rate": 0.0015, + "loss": 1.5935, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 1.0418105125427246, + "learning_rate": 0.0015, + "loss": 1.5872, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.8371959924697876, + "learning_rate": 0.0015, + "loss": 1.5801, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.8801147937774658, + "learning_rate": 0.0015, + "loss": 1.5895, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.7982711791992188, + "learning_rate": 0.0015, + "loss": 1.5801, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.7942262887954712, + "learning_rate": 0.0015, + "loss": 1.5765, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.7188666462898254, + "learning_rate": 0.0015, + "loss": 1.5767, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.7271984815597534, + "learning_rate": 0.0015, + "loss": 1.5767, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.8093920350074768, + "learning_rate": 0.0015, + "loss": 1.5753, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.8024405241012573, + "learning_rate": 0.0015, + "loss": 1.5705, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.9707393050193787, + "learning_rate": 0.0015, + "loss": 1.5792, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.7250108122825623, + "learning_rate": 0.0015, + "loss": 1.5772, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.904821515083313, + "learning_rate": 0.0015, + "loss": 1.5582, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 1.6974451541900635, + "learning_rate": 0.0015, + "loss": 1.572, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 1.7962638139724731, + "learning_rate": 0.0015, + "loss": 1.5839, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 1.3701332807540894, + "learning_rate": 0.0015, + "loss": 1.581, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 1.520825743675232, + "learning_rate": 0.0015, + "loss": 1.5673, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 1.5979210138320923, + "learning_rate": 0.0015, + "loss": 1.5553, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 1.1109466552734375, + "learning_rate": 0.0015, + "loss": 1.5568, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.9549930095672607, + "learning_rate": 0.0015, + "loss": 1.5499, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.9649303555488586, + "learning_rate": 0.0015, + "loss": 1.559, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.8223366141319275, + "learning_rate": 0.0015, + "loss": 1.5729, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.9037811160087585, + "learning_rate": 0.0015, + "loss": 1.5474, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 1.0865650177001953, + "learning_rate": 0.0015, + "loss": 1.5611, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 0.9263425469398499, + "learning_rate": 0.0015, + "loss": 1.5584, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.8378676176071167, + "learning_rate": 0.0015, + "loss": 1.5608, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.7904060482978821, + "learning_rate": 0.0015, + "loss": 1.5631, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 1.1880890130996704, + "learning_rate": 0.0015, + "loss": 1.552, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.7788017392158508, + "learning_rate": 0.0015, + "loss": 1.5571, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.7393240928649902, + "learning_rate": 0.0015, + "loss": 1.5638, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 1.5488377809524536, + "learning_rate": 0.0015, + "loss": 1.5655, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 1.727482557296753, + "learning_rate": 0.0015, + "loss": 1.565, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 1.0410548448562622, + "learning_rate": 0.0015, + "loss": 1.5485, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.7249805331230164, + "learning_rate": 0.0015, + "loss": 1.5456, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 1.1945127248764038, + "learning_rate": 0.0015, + "loss": 1.5557, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 1.2108405828475952, + "learning_rate": 0.0015, + "loss": 1.5489, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 1.587646484375, + "learning_rate": 0.0015, + "loss": 1.5363, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 1.1653233766555786, + "learning_rate": 0.0015, + "loss": 1.5399, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 1.39346182346344, + "learning_rate": 0.0015, + "loss": 1.5552, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 1.3960925340652466, + "learning_rate": 0.0015, + "loss": 1.5389, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 1.1005711555480957, + "learning_rate": 0.0015, + "loss": 1.5472, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.9205166101455688, + "learning_rate": 0.0015, + "loss": 1.5524, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.665204644203186, + "learning_rate": 0.0015, + "loss": 1.5355, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.8014669418334961, + "learning_rate": 0.0015, + "loss": 1.5488, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 1.0097742080688477, + "learning_rate": 0.0015, + "loss": 1.543, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.7205875515937805, + "learning_rate": 0.0015, + "loss": 1.5567, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.7192597985267639, + "learning_rate": 0.0015, + "loss": 1.5452, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.8210223317146301, + "learning_rate": 0.0015, + "loss": 1.5382, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.6948227286338806, + "learning_rate": 0.0015, + "loss": 1.5567, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.8766517043113708, + "learning_rate": 0.0015, + "loss": 1.5402, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.9443091154098511, + "learning_rate": 0.0015, + "loss": 1.5419, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.814469039440155, + "learning_rate": 0.0015, + "loss": 1.5461, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.8049113750457764, + "learning_rate": 0.0015, + "loss": 1.5577, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 1.1375205516815186, + "learning_rate": 0.0015, + "loss": 1.5354, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.723448634147644, + "learning_rate": 0.0015, + "loss": 1.5429, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.8229670524597168, + "learning_rate": 0.0015, + "loss": 1.5443, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 1.014961838722229, + "learning_rate": 0.0015, + "loss": 1.5459, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.858233630657196, + "learning_rate": 0.0015, + "loss": 1.5411, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.9734646081924438, + "learning_rate": 0.0015, + "loss": 1.538, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.8653167486190796, + "learning_rate": 0.0015, + "loss": 1.5337, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 1.6927708387374878, + "learning_rate": 0.0015, + "loss": 1.5547, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 1.0919712781906128, + "learning_rate": 0.0015, + "loss": 1.54, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 1.0060960054397583, + "learning_rate": 0.0015, + "loss": 1.5254, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.8458909392356873, + "learning_rate": 0.0015, + "loss": 1.5025, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.7706610560417175, + "learning_rate": 0.0015, + "loss": 1.5267, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.781003475189209, + "learning_rate": 0.0015, + "loss": 1.5205, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.7733911275863647, + "learning_rate": 0.0015, + "loss": 1.5439, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 1.0572519302368164, + "learning_rate": 0.0015, + "loss": 1.5428, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 1.2839736938476562, + "learning_rate": 0.0015, + "loss": 1.5341, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 1.434453010559082, + "learning_rate": 0.0015, + "loss": 1.536, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.8537628054618835, + "learning_rate": 0.0015, + "loss": 1.5227, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.7342628240585327, + "learning_rate": 0.0015, + "loss": 1.5305, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.7750384211540222, + "learning_rate": 0.0015, + "loss": 1.5257, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 1.0470014810562134, + "learning_rate": 0.0015, + "loss": 1.5335, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.7823499441146851, + "learning_rate": 0.0015, + "loss": 1.525, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.7475601434707642, + "learning_rate": 0.0015, + "loss": 1.5237, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.8280608057975769, + "learning_rate": 0.0015, + "loss": 1.5346, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.8254327178001404, + "learning_rate": 0.0015, + "loss": 1.5259, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.9501877427101135, + "learning_rate": 0.0015, + "loss": 1.5242, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.7832955121994019, + "learning_rate": 0.0015, + "loss": 1.5311, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.8145080208778381, + "learning_rate": 0.0015, + "loss": 1.5279, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.8909751176834106, + "learning_rate": 0.0015, + "loss": 1.5097, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.7514746785163879, + "learning_rate": 0.0015, + "loss": 1.5257, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.8049983382225037, + "learning_rate": 0.0015, + "loss": 1.5212, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.7919100522994995, + "learning_rate": 0.0015, + "loss": 1.5183, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.7895784378051758, + "learning_rate": 0.0015, + "loss": 1.5141, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.8803941011428833, + "learning_rate": 0.0015, + "loss": 1.5311, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 1.9049732685089111, + "learning_rate": 0.0015, + "loss": 1.5382, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 1.9559845924377441, + "learning_rate": 0.0015, + "loss": 1.5307, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 1.4480795860290527, + "learning_rate": 0.0015, + "loss": 1.499, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 1.0502501726150513, + "learning_rate": 0.0015, + "loss": 1.5163, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.7384530305862427, + "learning_rate": 0.0015, + "loss": 1.5109, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.9356790781021118, + "learning_rate": 0.0015, + "loss": 1.527, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 1.3657453060150146, + "learning_rate": 0.0015, + "loss": 1.5238, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.9036597609519958, + "learning_rate": 0.0015, + "loss": 1.5165, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.7337153553962708, + "learning_rate": 0.0015, + "loss": 1.5142, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 1.1770774126052856, + "learning_rate": 0.0015, + "loss": 1.5129, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.8776475191116333, + "learning_rate": 0.0015, + "loss": 1.513, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 1.6191794872283936, + "learning_rate": 0.0015, + "loss": 1.532, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.553029179573059, + "learning_rate": 0.0015, + "loss": 1.504, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 1.268593668937683, + "learning_rate": 0.0015, + "loss": 1.4975, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.9780778884887695, + "learning_rate": 0.0015, + "loss": 1.4979, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.8036577105522156, + "learning_rate": 0.0015, + "loss": 1.5032, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 1.0447702407836914, + "learning_rate": 0.0015, + "loss": 1.5069, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.9029561281204224, + "learning_rate": 0.0015, + "loss": 1.5193, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 1.476319670677185, + "learning_rate": 0.0015, + "loss": 1.5142, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 1.5154517889022827, + "learning_rate": 0.0015, + "loss": 1.514, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.9895890355110168, + "learning_rate": 0.0015, + "loss": 1.51, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 1.0274276733398438, + "learning_rate": 0.0015, + "loss": 1.5088, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.7761687636375427, + "learning_rate": 0.0015, + "loss": 1.5168, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.7506299018859863, + "learning_rate": 0.0015, + "loss": 1.5089, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.67684006690979, + "learning_rate": 0.0015, + "loss": 1.4895, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.6793816089630127, + "learning_rate": 0.0015, + "loss": 1.5089, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.8591964244842529, + "learning_rate": 0.0015, + "loss": 1.5108, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 1.0348570346832275, + "learning_rate": 0.0015, + "loss": 1.514, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.9831239581108093, + "learning_rate": 0.0015, + "loss": 1.5015, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.817678689956665, + "learning_rate": 0.0015, + "loss": 1.5176, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.7182941436767578, + "learning_rate": 0.0015, + "loss": 1.5031, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 0.707720160484314, + "learning_rate": 0.0015, + "loss": 1.5045, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.7496322989463806, + "learning_rate": 0.0015, + "loss": 1.5103, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 1.032157063484192, + "learning_rate": 0.0015, + "loss": 1.5057, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.7606027722358704, + "learning_rate": 0.0015, + "loss": 1.4889, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.9294939041137695, + "learning_rate": 0.0015, + "loss": 1.4901, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.9041895866394043, + "learning_rate": 0.0015, + "loss": 1.5157, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.7366722226142883, + "learning_rate": 0.0015, + "loss": 1.4983, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.8771160840988159, + "learning_rate": 0.0015, + "loss": 1.5124, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.7493230104446411, + "learning_rate": 0.0015, + "loss": 1.5004, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.7156760692596436, + "learning_rate": 0.0015, + "loss": 1.4891, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 1.3130086660385132, + "learning_rate": 0.0015, + "loss": 1.4914, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 0.8592076897621155, + "learning_rate": 0.0015, + "loss": 1.4951, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.9535844922065735, + "learning_rate": 0.0015, + "loss": 1.4942, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.8603590726852417, + "learning_rate": 0.0015, + "loss": 1.4904, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 1.1170023679733276, + "learning_rate": 0.0015, + "loss": 1.5004, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.7160546183586121, + "learning_rate": 0.0015, + "loss": 1.4831, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 1.1518510580062866, + "learning_rate": 0.0015, + "loss": 1.485, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.8579856157302856, + "learning_rate": 0.0015, + "loss": 1.4937, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.8090450763702393, + "learning_rate": 0.0015, + "loss": 1.4845, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.6617305874824524, + "learning_rate": 0.0015, + "loss": 1.4971, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 1.7243452072143555, + "learning_rate": 0.0015, + "loss": 1.4916, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.9017415046691895, + "learning_rate": 0.0015, + "loss": 1.5025, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 1.258110761642456, + "learning_rate": 0.0015, + "loss": 1.4919, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.8516669869422913, + "learning_rate": 0.0015, + "loss": 1.4918, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.8630198836326599, + "learning_rate": 0.0015, + "loss": 1.4723, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.7881191968917847, + "learning_rate": 0.0015, + "loss": 1.4876, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.6973815560340881, + "learning_rate": 0.0015, + "loss": 1.491, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 1.2573370933532715, + "learning_rate": 0.0015, + "loss": 1.496, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.7609820365905762, + "learning_rate": 0.0015, + "loss": 1.4919, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 1.2620562314987183, + "learning_rate": 0.0015, + "loss": 1.5011, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.9185616970062256, + "learning_rate": 0.0015, + "loss": 1.4881, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.7457355260848999, + "learning_rate": 0.0015, + "loss": 1.4784, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.7375855445861816, + "learning_rate": 0.0015, + "loss": 1.4755, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.8157951831817627, + "learning_rate": 0.0015, + "loss": 1.489, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.9145938158035278, + "learning_rate": 0.0015, + "loss": 1.4793, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 1.1349347829818726, + "learning_rate": 0.0015, + "loss": 1.4889, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 1.352932095527649, + "learning_rate": 0.0015, + "loss": 1.4916, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.8317147493362427, + "learning_rate": 0.0015, + "loss": 1.4726, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.9205922484397888, + "learning_rate": 0.0015, + "loss": 1.4702, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.8562262654304504, + "learning_rate": 0.0015, + "loss": 1.4711, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 1.0415457487106323, + "learning_rate": 0.0015, + "loss": 1.484, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 1.134642243385315, + "learning_rate": 0.0015, + "loss": 1.4894, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.8800190687179565, + "learning_rate": 0.0015, + "loss": 1.4733, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.8477508425712585, + "learning_rate": 0.0015, + "loss": 1.4838, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 1.5659480094909668, + "learning_rate": 0.0015, + "loss": 1.4845, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.6970528960227966, + "learning_rate": 0.0015, + "loss": 1.4807, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.7401099801063538, + "learning_rate": 0.0015, + "loss": 1.4827, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.9524136185646057, + "learning_rate": 0.0015, + "loss": 1.4876, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.8094889521598816, + "learning_rate": 0.0015, + "loss": 1.4786, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 1.2130169868469238, + "learning_rate": 0.0015, + "loss": 1.472, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 1.67189621925354, + "learning_rate": 0.0015, + "loss": 1.486, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.9942037463188171, + "learning_rate": 0.0015, + "loss": 1.4826, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.8564034700393677, + "learning_rate": 0.0015, + "loss": 1.4796, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.0184824466705322, + "learning_rate": 0.0015, + "loss": 1.4799, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.8033286929130554, + "learning_rate": 0.0015, + "loss": 1.4826, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.8001899123191833, + "learning_rate": 0.0015, + "loss": 1.4763, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.6640867590904236, + "learning_rate": 0.0015, + "loss": 1.4768, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 1.0242187976837158, + "learning_rate": 0.0015, + "loss": 1.4946, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.7344139218330383, + "learning_rate": 0.0015, + "loss": 1.4723, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.8038732409477234, + "learning_rate": 0.0015, + "loss": 1.4751, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.7014099955558777, + "learning_rate": 0.0015, + "loss": 1.4705, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 1.0165364742279053, + "learning_rate": 0.0015, + "loss": 1.4791, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 1.0620061159133911, + "learning_rate": 0.0015, + "loss": 1.49, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 1.5204108953475952, + "learning_rate": 0.0015, + "loss": 1.4788, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 1.3763737678527832, + "learning_rate": 0.0015, + "loss": 1.4878, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 1.1369930505752563, + "learning_rate": 0.0015, + "loss": 1.4703, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.8767896294593811, + "learning_rate": 0.0015, + "loss": 1.4677, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.7189898490905762, + "learning_rate": 0.0015, + "loss": 1.4591, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.7376815676689148, + "learning_rate": 0.0015, + "loss": 1.4733, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.810576856136322, + "learning_rate": 0.0015, + "loss": 1.4814, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.992668092250824, + "learning_rate": 0.0015, + "loss": 1.4682, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.6415207386016846, + "learning_rate": 0.0015, + "loss": 1.4708, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.812940239906311, + "learning_rate": 0.0015, + "loss": 1.4751, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.779383659362793, + "learning_rate": 0.0015, + "loss": 1.457, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 1.0059748888015747, + "learning_rate": 0.0015, + "loss": 1.4774, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 1.1730549335479736, + "learning_rate": 0.0015, + "loss": 1.4784, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.8351691365242004, + "learning_rate": 0.0015, + "loss": 1.4755, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.7783625721931458, + "learning_rate": 0.0015, + "loss": 1.469, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.677624523639679, + "learning_rate": 0.0015, + "loss": 1.4463, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.7732692956924438, + "learning_rate": 0.0015, + "loss": 1.4681, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.71184241771698, + "learning_rate": 0.0015, + "loss": 1.4551, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.7908509373664856, + "learning_rate": 0.0015, + "loss": 1.4742, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.756129264831543, + "learning_rate": 0.0015, + "loss": 1.4841, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.7894635796546936, + "learning_rate": 0.0015, + "loss": 1.4475, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 1.121127963066101, + "learning_rate": 0.0015, + "loss": 1.472, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.7341466546058655, + "learning_rate": 0.0015, + "loss": 1.4716, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.9443486928939819, + "learning_rate": 0.0015, + "loss": 1.4749, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.6546844244003296, + "learning_rate": 0.0015, + "loss": 1.4633, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 1.1092888116836548, + "learning_rate": 0.0015, + "loss": 1.463, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.6215645670890808, + "learning_rate": 0.0015, + "loss": 1.4581, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.8538206815719604, + "learning_rate": 0.0015, + "loss": 1.4565, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.7629497051239014, + "learning_rate": 0.0015, + "loss": 1.4659, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.8160348534584045, + "learning_rate": 0.0015, + "loss": 1.4677, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.678744375705719, + "learning_rate": 0.0015, + "loss": 1.4656, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.8286721110343933, + "learning_rate": 0.0015, + "loss": 1.4574, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.7032412886619568, + "learning_rate": 0.0015, + "loss": 1.4562, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.7853618264198303, + "learning_rate": 0.0015, + "loss": 1.4789, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.840776801109314, + "learning_rate": 0.0015, + "loss": 1.4631, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.7104450464248657, + "learning_rate": 0.0015, + "loss": 1.462, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 1.067781686782837, + "learning_rate": 0.0015, + "loss": 1.4736, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.9482417106628418, + "learning_rate": 0.0015, + "loss": 1.4554, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 1.0961337089538574, + "learning_rate": 0.0015, + "loss": 1.4622, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.6901832818984985, + "learning_rate": 0.0015, + "loss": 1.458, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.8511425256729126, + "learning_rate": 0.0015, + "loss": 1.4761, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 1.4769139289855957, + "learning_rate": 0.0015, + "loss": 1.4587, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.9670759439468384, + "learning_rate": 0.0015, + "loss": 1.4537, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.7199227809906006, + "learning_rate": 0.0015, + "loss": 1.4679, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.6534480452537537, + "learning_rate": 0.0015, + "loss": 1.4535, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.6710234880447388, + "learning_rate": 0.0015, + "loss": 1.4484, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 1.0456293821334839, + "learning_rate": 0.0015, + "loss": 1.4693, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.6936899423599243, + "learning_rate": 0.0015, + "loss": 1.4591, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 1.9122974872589111, + "learning_rate": 0.0015, + "loss": 1.4729, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 1.3576304912567139, + "learning_rate": 0.0015, + "loss": 1.4649, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.697835385799408, + "learning_rate": 0.0015, + "loss": 1.4393, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.750251293182373, + "learning_rate": 0.0015, + "loss": 1.4498, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.9501820802688599, + "learning_rate": 0.0015, + "loss": 1.4539, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.7399319410324097, + "learning_rate": 0.0015, + "loss": 1.4646, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.9583612084388733, + "learning_rate": 0.0015, + "loss": 1.4469, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 1.029189109802246, + "learning_rate": 0.0015, + "loss": 1.4547, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.8271332383155823, + "learning_rate": 0.0015, + "loss": 1.4496, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.7524606585502625, + "learning_rate": 0.0015, + "loss": 1.4508, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.7575464844703674, + "learning_rate": 0.0015, + "loss": 1.4618, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.7084495425224304, + "learning_rate": 0.0015, + "loss": 1.4555, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.734721839427948, + "learning_rate": 0.0015, + "loss": 1.4495, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.897038996219635, + "learning_rate": 0.0015, + "loss": 1.4327, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.7780396938323975, + "learning_rate": 0.0015, + "loss": 1.4611, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.7951447367668152, + "learning_rate": 0.0015, + "loss": 1.4492, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.7090691328048706, + "learning_rate": 0.0015, + "loss": 1.4627, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 1.0311530828475952, + "learning_rate": 0.0015, + "loss": 1.4498, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.9052611589431763, + "learning_rate": 0.0015, + "loss": 1.4674, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.8142776489257812, + "learning_rate": 0.0015, + "loss": 1.4524, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.7009817957878113, + "learning_rate": 0.0015, + "loss": 1.429, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 1.483508825302124, + "learning_rate": 0.0015, + "loss": 1.449, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.9793059825897217, + "learning_rate": 0.0015, + "loss": 1.4499, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.8050643801689148, + "learning_rate": 0.0015, + "loss": 1.4422, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.936424195766449, + "learning_rate": 0.0015, + "loss": 1.4587, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 1.0679045915603638, + "learning_rate": 0.0015, + "loss": 1.4556, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.8840908408164978, + "learning_rate": 0.0015, + "loss": 1.4524, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.6780058145523071, + "learning_rate": 0.0015, + "loss": 1.4527, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.9701220989227295, + "learning_rate": 0.0015, + "loss": 1.4592, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.7446921467781067, + "learning_rate": 0.0015, + "loss": 1.4415, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 1.5284725427627563, + "learning_rate": 0.0015, + "loss": 1.4563, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.7322260141372681, + "learning_rate": 0.0015, + "loss": 1.4416, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.7711663246154785, + "learning_rate": 0.0015, + "loss": 1.4529, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.7658071517944336, + "learning_rate": 0.0015, + "loss": 1.4438, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.7830654978752136, + "learning_rate": 0.0015, + "loss": 1.4502, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.8025482296943665, + "learning_rate": 0.0015, + "loss": 1.4422, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 1.0064853429794312, + "learning_rate": 0.0015, + "loss": 1.4481, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.6973406672477722, + "learning_rate": 0.0015, + "loss": 1.4524, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.7741480469703674, + "learning_rate": 0.0015, + "loss": 1.4462, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 1.0004830360412598, + "learning_rate": 0.0015, + "loss": 1.4468, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.764845073223114, + "learning_rate": 0.0015, + "loss": 1.4542, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 1.1237423419952393, + "learning_rate": 0.0015, + "loss": 1.4439, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 1.1748300790786743, + "learning_rate": 0.0015, + "loss": 1.4347, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.817560076713562, + "learning_rate": 0.0015, + "loss": 1.4449, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.7376773953437805, + "learning_rate": 0.0015, + "loss": 1.4387, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.7824847102165222, + "learning_rate": 0.0015, + "loss": 1.4342, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 1.1887530088424683, + "learning_rate": 0.0015, + "loss": 1.4441, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 1.041296124458313, + "learning_rate": 0.0015, + "loss": 1.4434, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.734176516532898, + "learning_rate": 0.0015, + "loss": 1.4369, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 1.1206353902816772, + "learning_rate": 0.0015, + "loss": 1.4438, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.667698323726654, + "learning_rate": 0.0015, + "loss": 1.4357, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.9575449228286743, + "learning_rate": 0.0015, + "loss": 1.438, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.8410739302635193, + "learning_rate": 0.0015, + "loss": 1.4345, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 1.2623330354690552, + "learning_rate": 0.0015, + "loss": 1.4343, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.7597998976707458, + "learning_rate": 0.0015, + "loss": 1.4311, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.8629986643791199, + "learning_rate": 0.0015, + "loss": 1.4379, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.7643434405326843, + "learning_rate": 0.0015, + "loss": 1.4409, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.8496126532554626, + "learning_rate": 0.0015, + "loss": 1.4402, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 1.3537168502807617, + "learning_rate": 0.0015, + "loss": 1.4409, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.9861345291137695, + "learning_rate": 0.0015, + "loss": 1.4572, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 1.2857369184494019, + "learning_rate": 0.0015, + "loss": 1.4374, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.931484580039978, + "learning_rate": 0.0015, + "loss": 1.4393, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.8116416335105896, + "learning_rate": 0.0015, + "loss": 1.4269, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.6553507447242737, + "learning_rate": 0.0015, + "loss": 1.4273, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.7439187169075012, + "learning_rate": 0.0015, + "loss": 1.4271, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 1.192237138748169, + "learning_rate": 0.0015, + "loss": 1.4459, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.7650824189186096, + "learning_rate": 0.0015, + "loss": 1.4523, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.7812345623970032, + "learning_rate": 0.0015, + "loss": 1.4349, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.780968427658081, + "learning_rate": 0.0015, + "loss": 1.4366, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 1.207230806350708, + "learning_rate": 0.0015, + "loss": 1.4436, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.596867024898529, + "learning_rate": 0.0015, + "loss": 1.4346, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.6693760752677917, + "learning_rate": 0.0015, + "loss": 1.4304, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.7231243848800659, + "learning_rate": 0.0015, + "loss": 1.4421, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.9922919273376465, + "learning_rate": 0.0015, + "loss": 1.438, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.720392644405365, + "learning_rate": 0.0015, + "loss": 1.4464, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.7540363073348999, + "learning_rate": 0.0015, + "loss": 1.4406, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.7336458563804626, + "learning_rate": 0.0015, + "loss": 1.439, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.6883314251899719, + "learning_rate": 0.0015, + "loss": 1.4436, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.7491645812988281, + "learning_rate": 0.0015, + "loss": 1.4466, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 1.5193525552749634, + "learning_rate": 0.0015, + "loss": 1.4264, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 0.9386850595474243, + "learning_rate": 0.0015, + "loss": 1.4316, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 1.110142469406128, + "learning_rate": 0.0015, + "loss": 1.438, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.916222333908081, + "learning_rate": 0.0015, + "loss": 1.4333, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 1.0064270496368408, + "learning_rate": 0.0015, + "loss": 1.4297, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.7309591770172119, + "learning_rate": 0.0015, + "loss": 1.4322, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.9111922979354858, + "learning_rate": 0.0015, + "loss": 1.4063, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.7754584550857544, + "learning_rate": 0.0015, + "loss": 1.4201, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 1.2049202919006348, + "learning_rate": 0.0015, + "loss": 1.4247, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 1.4835129976272583, + "learning_rate": 0.0015, + "loss": 1.433, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.7738795876502991, + "learning_rate": 0.0015, + "loss": 1.4181, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.6776912808418274, + "learning_rate": 0.0015, + "loss": 1.4222, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.6024855971336365, + "learning_rate": 0.0015, + "loss": 1.4292, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.9627833366394043, + "learning_rate": 0.0015, + "loss": 1.4467, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.7835455536842346, + "learning_rate": 0.0015, + "loss": 1.4301, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.7023641467094421, + "learning_rate": 0.0015, + "loss": 1.4427, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.7147296667098999, + "learning_rate": 0.0015, + "loss": 1.4475, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.989305317401886, + "learning_rate": 0.0015, + "loss": 1.4336, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 1.5840697288513184, + "learning_rate": 0.0015, + "loss": 1.432, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.6862075328826904, + "learning_rate": 0.0015, + "loss": 1.434, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 1.2028660774230957, + "learning_rate": 0.0015, + "loss": 1.4278, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.6749687194824219, + "learning_rate": 0.0015, + "loss": 1.4275, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.9053614139556885, + "learning_rate": 0.0015, + "loss": 1.4236, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.8926159739494324, + "learning_rate": 0.0015, + "loss": 1.4194, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.16741943359375, + "learning_rate": 0.0015, + "loss": 1.4306, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 1.071181058883667, + "learning_rate": 0.0015, + "loss": 1.4293, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 1.2650463581085205, + "learning_rate": 0.0015, + "loss": 1.4137, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.7591111063957214, + "learning_rate": 0.0015, + "loss": 1.4193, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.7614467740058899, + "learning_rate": 0.0015, + "loss": 1.4207, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.7675385475158691, + "learning_rate": 0.0015, + "loss": 1.4087, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 1.0191330909729004, + "learning_rate": 0.0015, + "loss": 1.4334, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.7505598068237305, + "learning_rate": 0.0015, + "loss": 1.4446, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.6819525957107544, + "learning_rate": 0.0015, + "loss": 1.4231, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 1.759894609451294, + "learning_rate": 0.0015, + "loss": 1.4292, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 1.2066490650177002, + "learning_rate": 0.0015, + "loss": 1.4143, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 1.2120089530944824, + "learning_rate": 0.0015, + "loss": 1.4254, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 1.3433258533477783, + "learning_rate": 0.0015, + "loss": 1.4081, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 1.247788667678833, + "learning_rate": 0.0015, + "loss": 1.4313, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.9588020443916321, + "learning_rate": 0.0015, + "loss": 1.4182, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.6824244856834412, + "learning_rate": 0.0015, + "loss": 1.4125, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.8143693208694458, + "learning_rate": 0.0015, + "loss": 1.4156, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6659235954284668, + "learning_rate": 0.0015, + "loss": 1.4174, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 1.0041710138320923, + "learning_rate": 0.0015, + "loss": 1.4283, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.7794994711875916, + "learning_rate": 0.0015, + "loss": 1.4287, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 1.0012164115905762, + "learning_rate": 0.0015, + "loss": 1.427, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 1.0531517267227173, + "learning_rate": 0.0015, + "loss": 1.424, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.7130517363548279, + "learning_rate": 0.0015, + "loss": 1.4311, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.8131248354911804, + "learning_rate": 0.0015, + "loss": 1.427, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.7530655860900879, + "learning_rate": 0.001487560447745699, + "loss": 1.4067, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.7366441488265991, + "learning_rate": 0.0014670566859713624, + "loss": 1.4295, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.7664610743522644, + "learning_rate": 0.0014468355374162303, + "loss": 1.4332, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.680364727973938, + "learning_rate": 0.0014268931066862504, + "loss": 1.4223, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.7482441663742065, + "learning_rate": 0.0014072255520794614, + "loss": 1.4188, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.6117098927497864, + "learning_rate": 0.0013878290848459301, + "loss": 1.424, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.7364591956138611, + "learning_rate": 0.0013686999684578874, + "loss": 1.4096, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.8092082738876343, + "learning_rate": 0.001349834517889925, + "loss": 1.4141, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.7603645324707031, + "learning_rate": 0.001331229098909114, + "loss": 1.4168, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 1.070099115371704, + "learning_rate": 0.0013128801273749075, + "loss": 1.4107, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.9038859009742737, + "learning_rate": 0.0012947840685486932, + "loss": 1.4199, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.8539178371429443, + "learning_rate": 0.0012769374364128628, + "loss": 1.4126, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.8272867202758789, + "learning_rate": 0.0012593367929992667, + "loss": 1.4096, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.847212553024292, + "learning_rate": 0.0012419787477269257, + "loss": 1.4044, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.6419264674186707, + "learning_rate": 0.0012248599567488698, + "loss": 1.4009, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.686825692653656, + "learning_rate": 0.0012079771223079822, + "loss": 1.3938, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.678505003452301, + "learning_rate": 0.0011913269921017202, + "loss": 1.3996, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6446670889854431, + "learning_rate": 0.0011749063586555919, + "loss": 1.3945, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.7400952577590942, + "learning_rate": 0.001158712058705271, + "loss": 1.3991, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.8822047114372253, + "learning_rate": 0.0011427409725872262, + "loss": 1.3961, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.7633283734321594, + "learning_rate": 0.00112699002363775, + "loss": 1.3775, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 1.3245850801467896, + "learning_rate": 0.0011114561776002726, + "loss": 1.3826, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.9352310299873352, + "learning_rate": 0.001096136442040843, + "loss": 1.395, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.6964030861854553, + "learning_rate": 0.001081027865771668, + "loss": 1.3926, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.7971494793891907, + "learning_rate": 0.0010661275382825958, + "loss": 1.3864, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.8077700734138489, + "learning_rate": 0.0010514325891804379, + "loss": 1.3764, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.6210984587669373, + "learning_rate": 0.0010369401876360166, + "loss": 1.3827, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.7208024263381958, + "learning_rate": 0.001022647541838836, + "loss": 1.3677, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.7337672114372253, + "learning_rate": 0.0010085518984592678, + "loss": 1.3727, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.6331183314323425, + "learning_rate": 0.0009946505421181513, + "loss": 1.3857, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.6681822538375854, + "learning_rate": 0.0009809407948637044, + "loss": 1.3775, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.7302703261375427, + "learning_rate": 0.0009674200156556436, + "loss": 1.3832, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.7156602740287781, + "learning_rate": 0.0009540855998564147, + "loss": 1.3783, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.9306661486625671, + "learning_rate": 0.000940934978729437, + "loss": 1.3887, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.7251031398773193, + "learning_rate": 0.0009279656189442628, + "loss": 1.3919, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.6620749235153198, + "learning_rate": 0.0009151750220885573, + "loss": 1.376, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.9000309705734253, + "learning_rate": 0.0009025607241868057, + "loss": 1.3645, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 1.1065431833267212, + "learning_rate": 0.0008901202952256545, + "loss": 1.3736, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.773053765296936, + "learning_rate": 0.0008778513386857928, + "loss": 1.3702, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.9815838932991028, + "learning_rate": 0.0008657514910802905, + "loss": 1.372, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.5923182964324951, + "learning_rate": 0.0008538184214992943, + "loss": 1.3631, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.6020275354385376, + "learning_rate": 0.0008420498311610049, + "loss": 1.3617, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.596274197101593, + "learning_rate": 0.0008304434529688382, + "loss": 1.3649, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.6799557209014893, + "learning_rate": 0.0008189970510746938, + "loss": 1.3701, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.6392850875854492, + "learning_rate": 0.0008077084204482425, + "loss": 1.3538, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.6304835081100464, + "learning_rate": 0.0007965753864521494, + "loss": 1.3617, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.7398068904876709, + "learning_rate": 0.0007855958044231527, + "loss": 1.3548, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.6057365536689758, + "learning_rate": 0.000774767559258917, + "loss": 1.3587, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.6741600036621094, + "learning_rate": 0.0007640885650105806, + "loss": 1.3516, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.7560098767280579, + "learning_rate": 0.0007535567644809191, + "loss": 1.3521, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.5901792049407959, + "learning_rate": 0.0007431701288280478, + "loss": 1.357, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.6631166934967041, + "learning_rate": 0.0007329266571745864, + "loss": 1.3554, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.7188959121704102, + "learning_rate": 0.0007228243762222109, + "loss": 1.3463, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.6191877126693726, + "learning_rate": 0.0007128613398715179, + "loss": 1.3501, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.564612090587616, + "learning_rate": 0.0007030356288471288, + "loss": 1.351, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.6952012777328491, + "learning_rate": 0.0006933453503279619, + "loss": 1.3399, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.743454098701477, + "learning_rate": 0.0006837886375825994, + "loss": 1.3487, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.5751940608024597, + "learning_rate": 0.0006743636496096813, + "loss": 1.3492, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.5761560201644897, + "learning_rate": 0.0006650685707832559, + "loss": 1.3516, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.6535277962684631, + "learning_rate": 0.0006559016105030176, + "loss": 1.3445, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.620011568069458, + "learning_rate": 0.000646861002849367, + "loss": 1.3483, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.6828277707099915, + "learning_rate": 0.0006379450062432248, + "loss": 1.3434, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.7670395374298096, + "learning_rate": 0.0006291519031105347, + "loss": 1.3521, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.6052244305610657, + "learning_rate": 0.00062047999955139, + "loss": 1.3356, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.7439938187599182, + "learning_rate": 0.000611927625013722, + "loss": 1.338, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.5995110273361206, + "learning_rate": 0.0006034931319714858, + "loss": 1.3398, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.7887725830078125, + "learning_rate": 0.0005951748956072806, + "loss": 1.3378, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.6389921307563782, + "learning_rate": 0.0005869713134993463, + "loss": 1.3298, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.5964858531951904, + "learning_rate": 0.0005788808053128734, + "loss": 1.3354, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.6833141446113586, + "learning_rate": 0.0005709018124955674, + "loss": 1.3315, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.7699258923530579, + "learning_rate": 0.0005630327979774111, + "loss": 1.3287, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.5962306261062622, + "learning_rate": 0.0005552722458745627, + "loss": 1.3347, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.6438961029052734, + "learning_rate": 0.0005476186611973374, + "loss": 1.3253, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.675112783908844, + "learning_rate": 0.000540070569562213, + "loss": 1.3245, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.8002157807350159, + "learning_rate": 0.0005326265169078048, + "loss": 1.3498, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.5718674659729004, + "learning_rate": 0.0005252850692147567, + "loss": 1.3275, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.606947124004364, + "learning_rate": 0.0005180448122294913, + "loss": 1.3343, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.6586506366729736, + "learning_rate": 0.0005109043511917693, + "loss": 1.3376, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.6559349894523621, + "learning_rate": 0.0005038623105660032, + "loss": 1.3332, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.6336144804954529, + "learning_rate": 0.0004969173337762747, + "loss": 1.3237, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.6228824853897095, + "learning_rate": 0.0004900680829450042, + "loss": 1.3196, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.633075475692749, + "learning_rate": 0.0004833132386352233, + "loss": 1.3216, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.7837070822715759, + "learning_rate": 0.00047665149959639813, + "loss": 1.3242, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.6982854604721069, + "learning_rate": 0.0004700815825137577, + "loss": 1.3343, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.6608773469924927, + "learning_rate": 0.00046360222176107584, + "loss": 1.3028, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.694812536239624, + "learning_rate": 0.0004572121691568625, + "loss": 1.3186, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.6343438029289246, + "learning_rate": 0.00045091019372391354, + "loss": 1.3226, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.7682545185089111, + "learning_rate": 0.0004446950814521764, + "loss": 1.3274, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.626300036907196, + "learning_rate": 0.0004385656350648835, + "loss": 1.3133, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.6249107718467712, + "learning_rate": 0.00043252067378790946, + "loss": 1.3216, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.6103988289833069, + "learning_rate": 0.00042655903312230673, + "loss": 1.3258, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.6114507913589478, + "learning_rate": 0.0004206795646199778, + "loss": 1.3225, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.677329957485199, + "learning_rate": 0.0004148811356624379, + "loss": 1.3193, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.6117640137672424, + "learning_rate": 0.0004091626292426282, + "loss": 1.3159, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.7301117777824402, + "learning_rate": 0.0004035229437497357, + "loss": 1.325, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.6520455479621887, + "learning_rate": 0.00039796099275697986, + "loss": 1.3173, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.6321918368339539, + "learning_rate": 0.0003924757048123232, + "loss": 1.3276, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.6355084776878357, + "learning_rate": 0.0003870660232320675, + "loss": 1.3103, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.725093424320221, + "learning_rate": 0.000381730905897295, + "loss": 1.3063, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.7786434888839722, + "learning_rate": 0.0003764693250531141, + "loss": 1.3156, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.9077960848808289, + "learning_rate": 0.0003712802671106742, + "loss": 1.3243, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.5941562056541443, + "learning_rate": 0.0003661627324519073, + "loss": 1.3041, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.6901363730430603, + "learning_rate": 0.0003611157352369628, + "loss": 1.3036, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.6014901399612427, + "learning_rate": 0.00035613830321429534, + "loss": 1.3088, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.5553300380706787, + "learning_rate": 0.00035122947753337037, + "loss": 1.312, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.6891167759895325, + "learning_rate": 0.0003463883125599521, + "loss": 1.3251, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.5523408651351929, + "learning_rate": 0.00034161387569393647, + "loss": 1.3206, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.6099393963813782, + "learning_rate": 0.00033690524718969593, + "loss": 1.3155, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.6410874724388123, + "learning_rate": 0.0003322615199788993, + "loss": 1.3124, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.5620908141136169, + "learning_rate": 0.00032768179949577516, + "loss": 1.3096, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.6075364351272583, + "learning_rate": 0.0003231652035047826, + "loss": 1.3066, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.6979631185531616, + "learning_rate": 0.000318710861930658, + "loss": 1.3087, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.6119896173477173, + "learning_rate": 0.0003143179166908038, + "loss": 1.3188, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.7065871953964233, + "learning_rate": 0.00030998552152998834, + "loss": 1.3235, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.5684677958488464, + "learning_rate": 0.00030571284185732276, + "loss": 1.3038, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.5912297964096069, + "learning_rate": 0.0003014990545854864, + "loss": 1.299, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.5699722170829773, + "learning_rate": 0.0002973433479721675, + "loss": 1.3047, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.7533963918685913, + "learning_rate": 0.00029324492146368906, + "loss": 1.3031, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.6559065580368042, + "learning_rate": 0.00028920298554079113, + "loss": 1.2978, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.7107135653495789, + "learning_rate": 0.00028521676156653756, + "loss": 1.3039, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.6395411491394043, + "learning_rate": 0.00028128548163632006, + "loss": 1.3082, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.5706745982170105, + "learning_rate": 0.0002774083884299292, + "loss": 1.2935, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.5461851954460144, + "learning_rate": 0.0002735847350656645, + "loss": 1.3042, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.7464679479598999, + "learning_rate": 0.0002698137849564556, + "loss": 1.3117, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.613097071647644, + "learning_rate": 0.0002660948116679665, + "loss": 1.2924, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.6618145704269409, + "learning_rate": 0.00026242709877865493, + "loss": 1.2952, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.566565752029419, + "learning_rate": 0.00025880993974176204, + "loss": 1.282, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.6015925407409668, + "learning_rate": 0.0002552426377492028, + "loss": 1.2873, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.5233849287033081, + "learning_rate": 0.0002517245055973337, + "loss": 1.2901, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.5597347021102905, + "learning_rate": 0.00024825486555456975, + "loss": 1.3, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.6684860587120056, + "learning_rate": 0.00024483304923082663, + "loss": 1.2911, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.8047541379928589, + "learning_rate": 0.0002414583974487624, + "loss": 1.2977, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.5415153503417969, + "learning_rate": 0.00023813026011679372, + "loss": 1.3032, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.5976951718330383, + "learning_rate": 0.0002348479961038625, + "loss": 1.2989, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.5823932886123657, + "learning_rate": 0.00023161097311592867, + "loss": 1.3059, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.557056725025177, + "learning_rate": 0.00022841856757416538, + "loss": 1.2885, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.5866828560829163, + "learning_rate": 0.0002252701644948328, + "loss": 1.2923, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.6609965562820435, + "learning_rate": 0.00022216515737080817, + "loss": 1.2871, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.6180319786071777, + "learning_rate": 0.00021910294805474833, + "loss": 1.2854, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5820198655128479, + "learning_rate": 0.0002160829466438629, + "loss": 1.2875, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.6657914519309998, + "learning_rate": 0.00021310457136627562, + "loss": 1.301, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.573926568031311, + "learning_rate": 0.00021016724846895213, + "loss": 1.2951, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.5542241334915161, + "learning_rate": 0.00020727041210717235, + "loss": 1.2865, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.5828022956848145, + "learning_rate": 0.00020441350423552624, + "loss": 1.2903, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.6762633919715881, + "learning_rate": 0.00020159597450041257, + "loss": 1.3027, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.5481005311012268, + "learning_rate": 0.00019881728013401842, + "loss": 1.2755, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.5577453374862671, + "learning_rate": 0.00019607688584976116, + "loss": 1.298, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.6402403712272644, + "learning_rate": 0.00019337426373917076, + "loss": 1.2894, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.5664169192314148, + "learning_rate": 0.00019070889317019375, + "loss": 1.3052, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.5562662482261658, + "learning_rate": 0.00018808026068689883, + "loss": 1.2844, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.541614294052124, + "learning_rate": 0.00018548785991056508, + "loss": 1.2918, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.6785740256309509, + "learning_rate": 0.00018293119144213328, + "loss": 1.3044, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.704336941242218, + "learning_rate": 0.00018040976276600176, + "loss": 1.2766, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.5556739568710327, + "learning_rate": 0.00017792308815514854, + "loss": 1.2899, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.6499749422073364, + "learning_rate": 0.00017547068857756104, + "loss": 1.2852, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.7175188064575195, + "learning_rate": 0.00017305209160395547, + "loss": 1.2885, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.6209173798561096, + "learning_rate": 0.00017066683131676825, + "loss": 1.2932, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.5800016522407532, + "learning_rate": 0.00016831444822040207, + "loss": 1.2942, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.5366095304489136, + "learning_rate": 0.00016599448915270845, + "loss": 1.2805, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.6281787157058716, + "learning_rate": 0.000163706507197691, + "loss": 1.2801, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.5832951068878174, + "learning_rate": 0.0001614500615994117, + "loss": 1.2898, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.8327756524085999, + "learning_rate": 0.00015922471767708377, + "loss": 1.282, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.7514030337333679, + "learning_rate": 0.00015703004674133498, + "loss": 1.2966, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.5415205359458923, + "learning_rate": 0.00015486562601162512, + "loss": 1.2848, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.5393462777137756, + "learning_rate": 0.0001527310385348017, + "loss": 1.2858, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.5403496026992798, + "learning_rate": 0.00015062587310477813, + "loss": 1.2939, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.5638842582702637, + "learning_rate": 0.00014854972418331948, + "loss": 1.2838, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.5383678078651428, + "learning_rate": 0.00014650219182191934, + "loss": 1.2666, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.5516685247421265, + "learning_rate": 0.00014448288158475423, + "loss": 1.276, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.5326411724090576, + "learning_rate": 0.0001424914044726995, + "loss": 1.2659, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5708659887313843, + "learning_rate": 0.0001405273768483926, + "loss": 1.2811, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.7766915559768677, + "learning_rate": 0.0001385904203623296, + "loss": 1.3019, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.5668004751205444, + "learning_rate": 0.00013668016187997964, + "loss": 1.2943, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5771456360816956, + "learning_rate": 0.0001347962334099052, + "loss": 1.2763, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.738175630569458, + "learning_rate": 0.00013293827203287141, + "loss": 1.2871, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.7563618421554565, + "learning_rate": 0.00013110591983193424, + "loss": 1.282, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.5457082986831665, + "learning_rate": 0.00012929882382349103, + "loss": 1.2936, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.5385299324989319, + "learning_rate": 0.0001275166358892821, + "loss": 1.2853, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.5696066617965698, + "learning_rate": 0.00012575901270932944, + "loss": 1.2901, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.5557435750961304, + "learning_rate": 0.00012402561569579935, + "loss": 1.2747, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.6374795436859131, + "learning_rate": 0.00012231611092777743, + "loss": 1.2748, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.5395510196685791, + "learning_rate": 0.00012063016908694192, + "loss": 1.2743, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.6053385138511658, + "learning_rate": 0.00011896746539412405, + "loss": 1.2858, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.6090646386146545, + "learning_rate": 0.00011732767954674264, + "loss": 1.2795, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.5928127765655518, + "learning_rate": 0.00011571049565710122, + "loss": 1.2875, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.5803035497665405, + "learning_rate": 0.00011411560219153552, + "loss": 1.2801, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.5826364159584045, + "learning_rate": 0.0001125426919103997, + "loss": 1.2788, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.5570178031921387, + "learning_rate": 0.00011099146180887992, + "loss": 1.2908, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.5646833777427673, + "learning_rate": 0.0001094616130586235, + "loss": 1.269, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.6026574969291687, + "learning_rate": 0.00010795285095017282, + "loss": 1.279, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.5310707092285156, + "learning_rate": 0.00010646488483619263, + "loss": 1.2833, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.5395969152450562, + "learning_rate": 0.00010499742807547978, + "loss": 1.2851, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.5338074564933777, + "learning_rate": 0.0001035501979777448, + "loss": 1.2739, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.5860329866409302, + "learning_rate": 0.00010212291574915464, + "loss": 1.267, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.5978607535362244, + "learning_rate": 0.00010071530643862575, + "loss": 1.286, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.5724721550941467, + "learning_rate": 9.932709888485788e-05, + "loss": 1.2754, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.5468754768371582, + "learning_rate": 9.79580256640974e-05, + "loss": 1.2768, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.552446186542511, + "learning_rate": 9.660782303862107e-05, + "loss": 1.2749, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.5334843993186951, + "learning_rate": 9.527623090592962e-05, + "loss": 1.2706, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.545730471611023, + "learning_rate": 9.396299274864176e-05, + "loss": 1.2827, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.5525760650634766, + "learning_rate": 9.266785558507876e-05, + "loss": 1.2722, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.7040855288505554, + "learning_rate": 9.139056992053017e-05, + "loss": 1.2805, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.6228870153427124, + "learning_rate": 9.01308896991912e-05, + "loss": 1.2729, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.5916566252708435, + "learning_rate": 8.88885722567627e-05, + "loss": 1.2844, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.5285206437110901, + "learning_rate": 8.766337827370438e-05, + "loss": 1.2768, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.5946583151817322, + "learning_rate": 8.645507172913238e-05, + "loss": 1.2848, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.5718642473220825, + "learning_rate": 8.52634198553523e-05, + "loss": 1.2804, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.5281186103820801, + "learning_rate": 8.408819309301891e-05, + "loss": 1.2764, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.5469676852226257, + "learning_rate": 8.292916504691398e-05, + "loss": 1.2929, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.5332617163658142, + "learning_rate": 8.178611244233354e-05, + "loss": 1.2833, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.583291232585907, + "learning_rate": 8.065881508207636e-05, + "loss": 1.2786, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.5411346554756165, + "learning_rate": 7.954705580402525e-05, + "loss": 1.2637, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.5563315749168396, + "learning_rate": 7.845062043931299e-05, + "loss": 1.264, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.52442467212677, + "learning_rate": 7.736929777106499e-05, + "loss": 1.2889, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.5574929714202881, + "learning_rate": 7.630287949371051e-05, + "loss": 1.2661, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.6341872811317444, + "learning_rate": 7.525116017285479e-05, + "loss": 1.2884, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.5448864102363586, + "learning_rate": 7.421393720570416e-05, + "loss": 1.2782, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.5391655564308167, + "learning_rate": 7.319101078203692e-05, + "loss": 1.266, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.5553624629974365, + "learning_rate": 7.218218384571176e-05, + "loss": 1.2685, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.567132294178009, + "learning_rate": 7.118726205670702e-05, + "loss": 1.2683, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.5360206365585327, + "learning_rate": 7.020605375368314e-05, + "loss": 1.2744, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.5772556066513062, + "learning_rate": 6.923836991706108e-05, + "loss": 1.2804, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.5516787171363831, + "learning_rate": 6.828402413260965e-05, + "loss": 1.2724, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.5543622374534607, + "learning_rate": 6.73428325555347e-05, + "loss": 1.2708, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.717890202999115, + "learning_rate": 6.641461387506347e-05, + "loss": 1.2918, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.5493847727775574, + "learning_rate": 6.549918927951679e-05, + "loss": 1.277, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.5648174285888672, + "learning_rate": 6.459638242186298e-05, + "loss": 1.2736, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.5381418466567993, + "learning_rate": 6.370601938574637e-05, + "loss": 1.2699, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.6226041913032532, + "learning_rate": 6.282792865198421e-05, + "loss": 1.2765, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.5545132160186768, + "learning_rate": 6.196194106552512e-05, + "loss": 1.2736, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.556125819683075, + "learning_rate": 6.110788980286329e-05, + "loss": 1.2747, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.550099790096283, + "learning_rate": 6.026561033990159e-05, + "loss": 1.2679, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.5472410321235657, + "learning_rate": 5.943494042025771e-05, + "loss": 1.2744, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.5375562310218811, + "learning_rate": 5.8615720024007174e-05, + "loss": 1.2647, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.5676476955413818, + "learning_rate": 5.780779133685717e-05, + "loss": 1.2762, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.5716816782951355, + "learning_rate": 5.701099871974525e-05, + "loss": 1.2711, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.5577533841133118, + "learning_rate": 5.6225188678857095e-05, + "loss": 1.2848, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.5745214223861694, + "learning_rate": 5.545020983605749e-05, + "loss": 1.2803, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.5501154065132141, + "learning_rate": 5.4685912899728965e-05, + "loss": 1.2721, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.5267707705497742, + "learning_rate": 5.39321506360123e-05, + "loss": 1.265, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.5977159738540649, + "learning_rate": 5.318877784044342e-05, + "loss": 1.283, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.6258057951927185, + "learning_rate": 5.245565130998124e-05, + "loss": 1.2764, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.5939884185791016, + "learning_rate": 5.173262981542119e-05, + "loss": 1.2799, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.65263432264328, + "learning_rate": 5.101957407418877e-05, + "loss": 1.2694, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.5583781599998474, + "learning_rate": 5.0316346723508287e-05, + "loss": 1.2709, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.5337013602256775, + "learning_rate": 4.962281229394129e-05, + "loss": 1.2689, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.590074896812439, + "learning_rate": 4.893883718328984e-05, + "loss": 1.278, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.6577619314193726, + "learning_rate": 4.8264289630859386e-05, + "loss": 1.2592, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.565948486328125, + "learning_rate": 4.759903969207646e-05, + "loss": 1.2651, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.7754303812980652, + "learning_rate": 4.694295921345623e-05, + "loss": 1.2657, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.568676233291626, + "learning_rate": 4.629592180791501e-05, + "loss": 1.2749, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.5254570841789246, + "learning_rate": 4.565780283042316e-05, + "loss": 1.2812, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.5713823437690735, + "learning_rate": 4.502847935399348e-05, + "loss": 1.2751, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.585989773273468, + "learning_rate": 4.440783014600059e-05, + "loss": 1.2736, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.5710393190383911, + "learning_rate": 4.3795735644826776e-05, + "loss": 1.2702, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.5540229678153992, + "learning_rate": 4.319207793682965e-05, + "loss": 1.2655, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.5305635929107666, + "learning_rate": 4.259674073362732e-05, + "loss": 1.2769, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.5160630345344543, + "learning_rate": 4.200960934969664e-05, + "loss": 1.2644, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.5420458912849426, + "learning_rate": 4.143057068028024e-05, + "loss": 1.2669, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.6328778862953186, + "learning_rate": 4.0859513179598096e-05, + "loss": 1.2519, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.5354390144348145, + "learning_rate": 4.02963268393593e-05, + "loss": 1.2638, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.5483447909355164, + "learning_rate": 3.974090316757029e-05, + "loss": 1.2739, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.5258885622024536, + "learning_rate": 3.919313516763478e-05, + "loss": 1.2674, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.5314297080039978, + "learning_rate": 3.8652917317742106e-05, + "loss": 1.2641, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.5217207074165344, + "learning_rate": 3.812014555053955e-05, + "loss": 1.2833, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.5018077492713928, + "learning_rate": 3.759471723308477e-05, + "loss": 1.2695, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.5357413291931152, + "learning_rate": 3.707653114707471e-05, + "loss": 1.2844, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.5380489826202393, + "learning_rate": 3.6565487469346904e-05, + "loss": 1.2716, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.5998616218566895, + "learning_rate": 3.606148775264958e-05, + "loss": 1.2688, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.573021650314331, + "learning_rate": 3.5564434906676834e-05, + "loss": 1.2694, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.522533655166626, + "learning_rate": 3.507423317936521e-05, + "loss": 1.2706, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.5812368988990784, + "learning_rate": 3.4590788138448004e-05, + "loss": 1.275, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.5710785984992981, + "learning_rate": 3.411400665326393e-05, + "loss": 1.2727, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.5232232213020325, + "learning_rate": 3.364379687681642e-05, + "loss": 1.2647, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.5663409233093262, + "learning_rate": 3.31800682280803e-05, + "loss": 1.2637, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.4934537708759308, + "learning_rate": 3.272273137455225e-05, + "loss": 1.2724, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.5227131247520447, + "learning_rate": 3.227169821504187e-05, + "loss": 1.2686, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.5141364932060242, + "learning_rate": 3.182688186269985e-05, + "loss": 1.2686, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.5139867663383484, + "learning_rate": 3.138819662828018e-05, + "loss": 1.2718, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.5554178357124329, + "learning_rate": 3.095555800363297e-05, + "loss": 1.2734, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.6278396248817444, + "learning_rate": 3.052888264542484e-05, + "loss": 1.272, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.6647807955741882, + "learning_rate": 3.0108088359083675e-05, + "loss": 1.2671, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.5185957551002502, + "learning_rate": 2.9693094082964775e-05, + "loss": 1.2735, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.5422529578208923, + "learning_rate": 2.928381987273507e-05, + "loss": 1.2691, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.5782288312911987, + "learning_rate": 2.8880186885972716e-05, + "loss": 1.2637, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.5432119369506836, + "learning_rate": 2.8482117366978935e-05, + "loss": 1.2648, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.5608564615249634, + "learning_rate": 2.808953463179918e-05, + "loss": 1.2837, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.528923511505127, + "learning_rate": 2.770236305345076e-05, + "loss": 1.261, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.5418644547462463, + "learning_rate": 2.732052804735409e-05, + "loss": 1.271, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.5128931403160095, + "learning_rate": 2.6943956056964773e-05, + "loss": 1.2724, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.5198342800140381, + "learning_rate": 2.6572574539603643e-05, + "loss": 1.2733, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.5258825421333313, + "learning_rate": 2.6206311952482224e-05, + "loss": 1.2744, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.5528097748756409, + "learning_rate": 2.584509773892073e-05, + "loss": 1.2734, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.5232042074203491, + "learning_rate": 2.5488862314756066e-05, + "loss": 1.2535, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.5420709252357483, + "learning_rate": 2.513753705493713e-05, + "loss": 1.266, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.6632100939750671, + "learning_rate": 2.4791054280304972e-05, + "loss": 1.2765, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.5401764512062073, + "learning_rate": 2.4449347244555043e-05, + "loss": 1.2587, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.6123665571212769, + "learning_rate": 2.4112350121379255e-05, + "loss": 1.2787, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.5475592613220215, + "learning_rate": 2.3779997991785207e-05, + "loss": 1.2648, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.5607396364212036, + "learning_rate": 2.3452226831590232e-05, + "loss": 1.2749, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.5257548093795776, + "learning_rate": 2.3128973499087785e-05, + "loss": 1.2713, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.5165111422538757, + "learning_rate": 2.2810175722883866e-05, + "loss": 1.2685, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.5612360835075378, + "learning_rate": 2.2495772089901067e-05, + "loss": 1.2648, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.5288776159286499, + "learning_rate": 2.218570203354799e-05, + "loss": 1.2668, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.5250431895256042, + "learning_rate": 2.187990582205175e-05, + "loss": 1.267, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.541925311088562, + "learning_rate": 2.157832454695122e-05, + "loss": 1.2658, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.5204476118087769, + "learning_rate": 2.1280900111748943e-05, + "loss": 1.259, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.5493022203445435, + "learning_rate": 2.0987575220719476e-05, + "loss": 1.2526, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.5256417393684387, + "learning_rate": 2.069829336787193e-05, + "loss": 1.2641, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.5424153804779053, + "learning_rate": 2.0412998826064695e-05, + "loss": 1.2609, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.5490899682044983, + "learning_rate": 2.0131636636270178e-05, + "loss": 1.2723, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.5246019959449768, + "learning_rate": 1.9854152596987523e-05, + "loss": 1.259, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.5395703315734863, + "learning_rate": 1.9580493253801253e-05, + "loss": 1.2658, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.5456987023353577, + "learning_rate": 1.9310605889083842e-05, + "loss": 1.2659, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.5712338089942932, + "learning_rate": 1.904443851184018e-05, + "loss": 1.2677, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.5761494040489197, + "learning_rate": 1.87819398476921e-05, + "loss": 1.2627, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.5261714458465576, + "learning_rate": 1.8523059329000848e-05, + "loss": 1.2737, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.5157657861709595, + "learning_rate": 1.826774708512579e-05, + "loss": 1.2715, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.5284278392791748, + "learning_rate": 1.8015953932817347e-05, + "loss": 1.2813, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.4970618784427643, + "learning_rate": 1.7767631366742332e-05, + "loss": 1.2735, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.570702850818634, + "learning_rate": 1.7522731550139926e-05, + "loss": 1.2721, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.5712816715240479, + "learning_rate": 1.728120730560641e-05, + "loss": 1.2582, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.5668426156044006, + "learning_rate": 1.704301210600693e-05, + "loss": 1.2626, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.49879419803619385, + "learning_rate": 1.6808100065512536e-05, + "loss": 1.2748, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.5390027761459351, + "learning_rate": 1.657642593076074e-05, + "loss": 1.2515, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.5791448950767517, + "learning_rate": 1.634794507213793e-05, + "loss": 1.2684, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.5216628909111023, + "learning_rate": 1.6122613475181976e-05, + "loss": 1.2693, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.5421786904335022, + "learning_rate": 1.590038773210323e-05, + "loss": 1.2661, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.5739501118659973, + "learning_rate": 1.568122503342252e-05, + "loss": 1.2751, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.5325318574905396, + "learning_rate": 1.5465083159724344e-05, + "loss": 1.2603, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.5379754304885864, + "learning_rate": 1.5251920473523708e-05, + "loss": 1.2719, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.5988247394561768, + "learning_rate": 1.5041695911245136e-05, + "loss": 1.2623, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.833798122374349e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-stablelm/checkpoint-9480/training_args.bin b/saves-stablelm/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e95364c2a1a568a69a2a84e48ce2792d1e00a619 --- /dev/null +++ b/saves-stablelm/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e1d1b10443f7b2dd57edc3d42306b72f19978a82109fda5f02a9fa8d33df5aa +size 5112 diff --git a/saves-stablelm/config.json b/saves-stablelm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..25a1ab486ad5c4cccf2de695a11e27be68a84045 --- /dev/null +++ b/saves-stablelm/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "StableLmForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "eos_token_id": 0, + "hidden_act": "silu", + "hidden_dropout": 0.0, + "hidden_size": 256, + "initializer_range": 0.02, + "intermediate_size": 768, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 4096, + "model_type": "stablelm", + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "partial_rotary_factor": 0.25, + "qk_layernorm": false, + "rope_scaling": null, + "rope_theta": 10000, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_cache": true, + "use_parallel_residual": false, + "use_qkv_bias": false, + "vocab_size": 2000 +} diff --git a/saves-stablelm/generation_config.json b/saves-stablelm/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..14e4f03d0d73dc2707d488ac8f586bd62ef72a7e --- /dev/null +++ b/saves-stablelm/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "eos_token_id": 0, + "transformers_version": "4.42.4" +} diff --git a/saves-stablelm/model.safetensors b/saves-stablelm/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e65c82d5ba9ce5d81a3b2ce5c2d9de5d6072757 --- /dev/null +++ b/saves-stablelm/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:befa512227bb908b637e47f3aae315e20876a6f1e38cd11cf22ca3d4afdccc09 +size 8352336 diff --git a/saves-stablelm/result.log b/saves-stablelm/result.log new file mode 100644 index 0000000000000000000000000000000000000000..7fb578da29cd3a7e41e8e2111bd19e8782ef9d28 --- /dev/null +++ b/saves-stablelm/result.log @@ -0,0 +1 @@ +{'train_runtime': 1874.714, 'train_samples_per_second': 5177.645, 'train_steps_per_second': 5.057, 'train_loss': 1.5834917402468653, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-stablelm/special_tokens_map.json b/saves-stablelm/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-stablelm/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-stablelm/tokenizer.json b/saves-stablelm/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-stablelm/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-stablelm/tokenizer_config.json b/saves-stablelm/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-stablelm/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-starcoder2-cosine/checkpoint-9480/config.json b/saves-starcoder2-cosine/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7255245cb3ad011849e2e9471897dc5fbc33393a --- /dev/null +++ b/saves-starcoder2-cosine/checkpoint-9480/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "Starcoder2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 50256, + "embedding_dropout": 0.0, + "eos_token_id": 50256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.018042, + "intermediate_size": 768, + "max_position_embeddings": 4096, + "model_type": "starcoder2", + "norm_epsilon": 1e-05, + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_dropout": 0.0, + "rope_theta": 10000.0, + "sliding_window": null, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_bias": true, + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-starcoder2-cosine/checkpoint-9480/generation_config.json b/saves-starcoder2-cosine/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-starcoder2-cosine/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-starcoder2-cosine/checkpoint-9480/model.safetensors b/saves-starcoder2-cosine/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..877fab80e0b7393112819f9f88d25582a2eaeb7c --- /dev/null +++ b/saves-starcoder2-cosine/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fde609862b7431958ec806051005c455e4d64887e7d5c4555bf6c9b704a177f +size 6794784 diff --git a/saves-starcoder2-cosine/checkpoint-9480/optimizer.pt b/saves-starcoder2-cosine/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..34b5b9c3610e370e8c7eddf4b664f5a127bd3494 --- /dev/null +++ b/saves-starcoder2-cosine/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a776bd8d2f0a4484f4baa73767ba514f1dafb3c015ae75e6d70feae2d61fe9cb +size 13612026 diff --git a/saves-starcoder2-cosine/checkpoint-9480/rng_state.pth b/saves-starcoder2-cosine/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-starcoder2-cosine/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-starcoder2-cosine/checkpoint-9480/scheduler.pt b/saves-starcoder2-cosine/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..47ca193b702fc31e51e3ee0689a4054b394880b6 --- /dev/null +++ b/saves-starcoder2-cosine/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97f195640e66bde784a0961679ecd73c2a561c5a12962a7316325d731f304936 +size 1064 diff --git a/saves-starcoder2-cosine/checkpoint-9480/special_tokens_map.json b/saves-starcoder2-cosine/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-starcoder2-cosine/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-starcoder2-cosine/checkpoint-9480/tokenizer.json b/saves-starcoder2-cosine/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-starcoder2-cosine/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-starcoder2-cosine/checkpoint-9480/tokenizer_config.json b/saves-starcoder2-cosine/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-starcoder2-cosine/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-starcoder2-cosine/checkpoint-9480/trainer_state.json b/saves-starcoder2-cosine/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a826dbe48fa983327c0457b2c865399e01be222c --- /dev/null +++ b/saves-starcoder2-cosine/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.370417833328247, + "learning_rate": 0.00015789473684210527, + "loss": 7.4617, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.150888442993164, + "learning_rate": 0.00031578947368421053, + "loss": 6.8502, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.8894761204719543, + "learning_rate": 0.00047368421052631577, + "loss": 6.2399, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 1.3080759048461914, + "learning_rate": 0.0006315789473684211, + "loss": 5.7357, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.7724598050117493, + "learning_rate": 0.0007894736842105263, + "loss": 5.2688, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 1.4499249458312988, + "learning_rate": 0.0009473684210526315, + "loss": 4.7832, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.5776304006576538, + "learning_rate": 0.0011052631578947368, + "loss": 4.4177, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.8438329696655273, + "learning_rate": 0.0012631578947368421, + "loss": 4.1798, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.204397439956665, + "learning_rate": 0.0014210526315789472, + "loss": 3.9921, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.1295990943908691, + "learning_rate": 0.0014999989494847376, + "loss": 3.8639, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 1.0295943021774292, + "learning_rate": 0.0014999905453802946, + "loss": 3.7259, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.7916091084480286, + "learning_rate": 0.0014999737372655805, + "loss": 3.63, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 1.0191645622253418, + "learning_rate": 0.0014999485253289388, + "loss": 3.539, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 0.9488696455955505, + "learning_rate": 0.0014999149098528814, + "loss": 3.4477, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 1.490135669708252, + "learning_rate": 0.0014998728912140862, + "loss": 3.3752, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 0.9727163910865784, + "learning_rate": 0.0014998224698833922, + "loss": 3.3145, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 1.3109499216079712, + "learning_rate": 0.0014997636464257956, + "loss": 3.2541, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.9598575234413147, + "learning_rate": 0.0014996964215004416, + "loss": 3.2149, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 1.072351336479187, + "learning_rate": 0.0014996207958606182, + "loss": 3.149, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.9921668767929077, + "learning_rate": 0.001499536770353748, + "loss": 3.1007, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 0.9595531821250916, + "learning_rate": 0.0014994443459213774, + "loss": 3.0632, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 1.0567376613616943, + "learning_rate": 0.001499343523599168, + "loss": 3.0324, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.8116419911384583, + "learning_rate": 0.0014992343045168823, + "loss": 2.9802, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 1.0400077104568481, + "learning_rate": 0.0014991166898983739, + "loss": 2.9463, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 0.94350266456604, + "learning_rate": 0.001498990681061572, + "loss": 2.9153, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 1.1482031345367432, + "learning_rate": 0.001498856279418467, + "loss": 2.879, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 1.0873770713806152, + "learning_rate": 0.0014987134864750948, + "loss": 2.8581, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 1.0023763179779053, + "learning_rate": 0.0014985623038315206, + "loss": 2.815, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 1.016857385635376, + "learning_rate": 0.0014984027331818193, + "loss": 2.7974, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 1.1707167625427246, + "learning_rate": 0.0014982347763140584, + "loss": 2.7753, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 0.9500437378883362, + "learning_rate": 0.0014980584351102762, + "loss": 2.7492, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 1.1192020177841187, + "learning_rate": 0.001497873711546462, + "loss": 2.7212, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 0.9918935894966125, + "learning_rate": 0.0014976806076925334, + "loss": 2.704, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.8603395223617554, + "learning_rate": 0.0014974791257123137, + "loss": 2.6772, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 0.9788806438446045, + "learning_rate": 0.001497269267863507, + "loss": 2.6385, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 1.098434567451477, + "learning_rate": 0.0014970510364976724, + "loss": 2.6153, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 1.1004198789596558, + "learning_rate": 0.0014968244340601996, + "loss": 2.599, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 1.6365076303482056, + "learning_rate": 0.001496589463090279, + "loss": 2.5897, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.6185334920883179, + "learning_rate": 0.001496346126220875, + "loss": 2.5753, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 1.0286613702774048, + "learning_rate": 0.0014960944261786966, + "loss": 2.5362, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 1.5629587173461914, + "learning_rate": 0.0014958343657841655, + "loss": 2.531, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 1.309227466583252, + "learning_rate": 0.001495565947951385, + "loss": 2.5132, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 0.9111704230308533, + "learning_rate": 0.0014952891756881085, + "loss": 2.4799, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 1.0822027921676636, + "learning_rate": 0.0014950040520957037, + "loss": 2.4658, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 1.1960536241531372, + "learning_rate": 0.0014947105803691204, + "loss": 2.4643, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 1.181658387184143, + "learning_rate": 0.0014944087637968522, + "loss": 2.4423, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.9750946164131165, + "learning_rate": 0.0014940986057609012, + "loss": 2.4178, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.2033475637435913, + "learning_rate": 0.0014937801097367396, + "loss": 2.4225, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 1.2110592126846313, + "learning_rate": 0.001493453279293271, + "loss": 2.3987, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 1.0759574174880981, + "learning_rate": 0.0014931181180927902, + "loss": 2.3765, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 0.8695982098579407, + "learning_rate": 0.001492774629890942, + "loss": 2.3708, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 0.9708008170127869, + "learning_rate": 0.001492422818536679, + "loss": 2.3544, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 1.0902724266052246, + "learning_rate": 0.00149206268797222, + "loss": 2.3364, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 0.9239563345909119, + "learning_rate": 0.0014916942422330032, + "loss": 2.3207, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 1.0082863569259644, + "learning_rate": 0.001491317485447643, + "loss": 2.3237, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 1.1742995977401733, + "learning_rate": 0.0014909324218378838, + "loss": 2.2784, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 1.529625654220581, + "learning_rate": 0.0014905390557185508, + "loss": 2.2846, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.066186785697937, + "learning_rate": 0.0014901373914975036, + "loss": 2.2833, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.993272602558136, + "learning_rate": 0.0014897274336755856, + "loss": 2.2547, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 0.871677815914154, + "learning_rate": 0.001489309186846575, + "loss": 2.2341, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 1.4139857292175293, + "learning_rate": 0.0014888826556971313, + "loss": 2.2378, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 1.2311094999313354, + "learning_rate": 0.0014884478450067444, + "loss": 2.2228, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 1.1235512495040894, + "learning_rate": 0.0014880047596476807, + "loss": 2.2049, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 0.9908660650253296, + "learning_rate": 0.0014875534045849274, + "loss": 2.2165, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 1.1578383445739746, + "learning_rate": 0.0014870937848761388, + "loss": 2.2071, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 1.4566097259521484, + "learning_rate": 0.001486625905671578, + "loss": 2.1958, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 1.0704221725463867, + "learning_rate": 0.00148614977221406, + "loss": 2.1725, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 1.2500007152557373, + "learning_rate": 0.0014856653898388927, + "loss": 2.1591, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 1.4496512413024902, + "learning_rate": 0.001485172763973817, + "loss": 2.1738, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 1.8970293998718262, + "learning_rate": 0.0014846719001389466, + "loss": 2.1556, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 1.0276812314987183, + "learning_rate": 0.001484162803946705, + "loss": 2.1425, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 0.9425234198570251, + "learning_rate": 0.0014836454811017635, + "loss": 2.114, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 0.9796899557113647, + "learning_rate": 0.0014831199374009778, + "loss": 2.1152, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 1.1745048761367798, + "learning_rate": 0.0014825861787333208, + "loss": 2.1519, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 1.0231369733810425, + "learning_rate": 0.0014820442110798197, + "loss": 2.1041, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.914484977722168, + "learning_rate": 0.0014814940405134865, + "loss": 2.0907, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 1.166015863418579, + "learning_rate": 0.001480935673199251, + "loss": 2.0872, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 1.2583591938018799, + "learning_rate": 0.0014803691153938915, + "loss": 2.0791, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 1.1579957008361816, + "learning_rate": 0.0014797943734459653, + "loss": 2.091, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 0.8116486668586731, + "learning_rate": 0.001479211453795736, + "loss": 2.067, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 1.2728939056396484, + "learning_rate": 0.0014786203629751033, + "loss": 2.0574, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.8399823307991028, + "learning_rate": 0.0014780211076075279, + "loss": 2.0599, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 1.4281278848648071, + "learning_rate": 0.0014774136944079594, + "loss": 2.0691, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.8689684271812439, + "learning_rate": 0.0014767981301827592, + "loss": 2.0406, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 1.2786409854888916, + "learning_rate": 0.0014761744218296249, + "loss": 2.0422, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 0.7808472514152527, + "learning_rate": 0.001475542576337513, + "loss": 2.0362, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 1.3187841176986694, + "learning_rate": 0.001474902600786561, + "loss": 2.0358, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.8596416115760803, + "learning_rate": 0.0014742545023480075, + "loss": 2.0275, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 1.2138091325759888, + "learning_rate": 0.0014735982882841117, + "loss": 2.0203, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 1.1678885221481323, + "learning_rate": 0.0014729339659480727, + "loss": 2.0149, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 1.3737812042236328, + "learning_rate": 0.0014722615427839468, + "loss": 2.0189, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.9194842576980591, + "learning_rate": 0.0014715810263265633, + "loss": 2.0063, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.999172031879425, + "learning_rate": 0.0014708924242014423, + "loss": 1.9859, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.8639631271362305, + "learning_rate": 0.0014701957441247064, + "loss": 1.9976, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 1.2475820779800415, + "learning_rate": 0.0014694909939029959, + "loss": 1.977, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 0.8125089406967163, + "learning_rate": 0.0014687781814333814, + "loss": 1.9822, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 1.0938138961791992, + "learning_rate": 0.0014680573147032746, + "loss": 1.9844, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.7794817686080933, + "learning_rate": 0.0014673284017903392, + "loss": 1.9569, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.9741127490997314, + "learning_rate": 0.0014665914508624, + "loss": 1.9589, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.8810638189315796, + "learning_rate": 0.0014658464701773526, + "loss": 1.9709, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 1.0419055223464966, + "learning_rate": 0.0014650934680830688, + "loss": 1.9603, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 1.8411555290222168, + "learning_rate": 0.0014643324530173051, + "loss": 1.9553, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 0.9106413722038269, + "learning_rate": 0.0014635634335076067, + "loss": 1.9684, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 1.2079719305038452, + "learning_rate": 0.001462786418171213, + "loss": 1.9372, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.2383036613464355, + "learning_rate": 0.0014620014157149597, + "loss": 1.9469, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.9366504549980164, + "learning_rate": 0.001461208434935183, + "loss": 1.9338, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.8578380942344666, + "learning_rate": 0.0014604074847176197, + "loss": 1.9243, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 1.4467988014221191, + "learning_rate": 0.0014595985740373082, + "loss": 1.9228, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 0.9940866827964783, + "learning_rate": 0.0014587817119584873, + "loss": 1.932, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 1.0697187185287476, + "learning_rate": 0.001457956907634496, + "loss": 1.9216, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.7710791230201721, + "learning_rate": 0.0014571241703076692, + "loss": 1.9199, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 1.1157621145248413, + "learning_rate": 0.0014562835093092348, + "loss": 1.9169, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.1666948795318604, + "learning_rate": 0.0014554349340592104, + "loss": 1.9, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 1.1377625465393066, + "learning_rate": 0.001454578454066296, + "loss": 1.9166, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.5753127336502075, + "learning_rate": 0.0014537140789277678, + "loss": 1.8979, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 1.0287224054336548, + "learning_rate": 0.0014528418183293716, + "loss": 1.9143, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 1.259722113609314, + "learning_rate": 0.001451961682045213, + "loss": 1.8906, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.876148521900177, + "learning_rate": 0.001451073679937649, + "loss": 1.8786, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.7682937383651733, + "learning_rate": 0.0014501778219571766, + "loss": 1.8809, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.8414434194564819, + "learning_rate": 0.0014492741181423225, + "loss": 1.8976, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.8955063223838806, + "learning_rate": 0.0014483625786195285, + "loss": 1.8855, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.8241366147994995, + "learning_rate": 0.0014474432136030405, + "loss": 1.8697, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.8855438232421875, + "learning_rate": 0.0014465160333947923, + "loss": 1.8624, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 1.0857152938842773, + "learning_rate": 0.0014455810483842908, + "loss": 1.8809, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.9938157200813293, + "learning_rate": 0.0014446382690484997, + "loss": 1.8802, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 1.202011227607727, + "learning_rate": 0.0014436877059517215, + "loss": 1.8673, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 1.3604882955551147, + "learning_rate": 0.0014427293697454803, + "loss": 1.8833, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.8333151936531067, + "learning_rate": 0.001441763271168401, + "loss": 1.8687, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.9510217308998108, + "learning_rate": 0.00144078942104609, + "loss": 1.8544, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.9509080648422241, + "learning_rate": 0.001439807830291013, + "loss": 1.8475, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.8813123106956482, + "learning_rate": 0.0014388185099023744, + "loss": 1.8519, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 1.007832407951355, + "learning_rate": 0.0014378214709659916, + "loss": 1.8558, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 1.4728190898895264, + "learning_rate": 0.0014368167246541733, + "loss": 1.8496, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.9498640894889832, + "learning_rate": 0.0014358042822255918, + "loss": 1.8538, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.846640944480896, + "learning_rate": 0.0014347841550251597, + "loss": 1.8488, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.8134399056434631, + "learning_rate": 0.0014337563544838997, + "loss": 1.8401, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.8588923811912537, + "learning_rate": 0.001432720892118819, + "loss": 1.8364, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 1.0944496393203735, + "learning_rate": 0.0014316777795327794, + "loss": 1.8288, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 0.8849280476570129, + "learning_rate": 0.001430627028414366, + "loss": 1.8403, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 1.0602022409439087, + "learning_rate": 0.0014295686505377586, + "loss": 1.8259, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.9489578008651733, + "learning_rate": 0.0014285026577625982, + "loss": 1.8254, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 1.1067856550216675, + "learning_rate": 0.0014274290620338542, + "loss": 1.8352, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.9376221299171448, + "learning_rate": 0.0014263478753816906, + "loss": 1.8264, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.806212842464447, + "learning_rate": 0.0014252591099213326, + "loss": 1.8188, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 1.2669298648834229, + "learning_rate": 0.001424162777852928, + "loss": 1.8283, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 1.0821505784988403, + "learning_rate": 0.0014230588914614134, + "loss": 1.8111, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 1.2630783319473267, + "learning_rate": 0.0014219474631163745, + "loss": 1.8012, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.8795732259750366, + "learning_rate": 0.001420828505271909, + "loss": 1.808, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.8997399806976318, + "learning_rate": 0.0014197020304664856, + "loss": 1.7995, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.8293248414993286, + "learning_rate": 0.0014185680513228048, + "loss": 1.8017, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 1.0001919269561768, + "learning_rate": 0.0014174265805476564, + "loss": 1.8083, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 1.0639148950576782, + "learning_rate": 0.0014162776309317778, + "loss": 1.812, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 1.1502482891082764, + "learning_rate": 0.0014151212153497108, + "loss": 1.8026, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 1.2619913816452026, + "learning_rate": 0.0014139573467596561, + "loss": 1.7914, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 1.1077357530593872, + "learning_rate": 0.00141278603820333, + "loss": 1.7799, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.8831108808517456, + "learning_rate": 0.0014116073028058165, + "loss": 1.7759, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.8525316119194031, + "learning_rate": 0.0014104211537754217, + "loss": 1.7693, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.8042332530021667, + "learning_rate": 0.001409227604403524, + "loss": 1.787, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 1.1101175546646118, + "learning_rate": 0.0014080266680644277, + "loss": 1.7871, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.8573349714279175, + "learning_rate": 0.0014068183582152103, + "loss": 1.7937, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.7878482341766357, + "learning_rate": 0.001405602688395574, + "loss": 1.7925, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.798627495765686, + "learning_rate": 0.0014043796722276924, + "loss": 1.7622, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 1.0478029251098633, + "learning_rate": 0.0014031493234160591, + "loss": 1.778, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.889505922794342, + "learning_rate": 0.0014019116557473332, + "loss": 1.7768, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.0377753973007202, + "learning_rate": 0.0014006666830901854, + "loss": 1.7676, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.8745960593223572, + "learning_rate": 0.001399414419395142, + "loss": 1.7697, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 1.1971619129180908, + "learning_rate": 0.0013981548786944293, + "loss": 1.7876, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 1.355685830116272, + "learning_rate": 0.0013968880751018158, + "loss": 1.7678, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 1.0085058212280273, + "learning_rate": 0.0013956140228124545, + "loss": 1.7507, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.9867112040519714, + "learning_rate": 0.0013943327361027231, + "loss": 1.7711, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.8053104281425476, + "learning_rate": 0.0013930442293300649, + "loss": 1.7492, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.7771016359329224, + "learning_rate": 0.0013917485169328279, + "loss": 1.7531, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 1.2333118915557861, + "learning_rate": 0.0013904456134301016, + "loss": 1.7575, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.778057873249054, + "learning_rate": 0.0013891355334215562, + "loss": 1.7619, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.8622661232948303, + "learning_rate": 0.0013878182915872776, + "loss": 1.7792, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.7761487364768982, + "learning_rate": 0.001386493902687604, + "loss": 1.755, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.8743688464164734, + "learning_rate": 0.00138516238156296, + "loss": 1.7619, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 0.8979333639144897, + "learning_rate": 0.0013838237431336895, + "loss": 1.7611, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 2.118516683578491, + "learning_rate": 0.0013824780023998899, + "loss": 1.7688, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.7186918258666992, + "learning_rate": 0.0013811251744412431, + "loss": 1.7523, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.7687019109725952, + "learning_rate": 0.0013797652744168473, + "loss": 1.7344, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.7175707221031189, + "learning_rate": 0.0013783983175650457, + "loss": 1.7359, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.8784117698669434, + "learning_rate": 0.0013770243192032581, + "loss": 1.739, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.8043698668479919, + "learning_rate": 0.0013756432947278064, + "loss": 1.7386, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.7913126945495605, + "learning_rate": 0.0013742552596137444, + "loss": 1.7424, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.9553866386413574, + "learning_rate": 0.0013728602294146833, + "loss": 1.7452, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.9565508961677551, + "learning_rate": 0.0013714582197626175, + "loss": 1.7444, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.7277857661247253, + "learning_rate": 0.0013700492463677501, + "loss": 1.7355, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.7405062317848206, + "learning_rate": 0.0013686333250183154, + "loss": 1.7381, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 1.2782166004180908, + "learning_rate": 0.001367210471580404, + "loss": 1.7346, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 1.1269385814666748, + "learning_rate": 0.0013657807019977835, + "loss": 1.7337, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.8279767036437988, + "learning_rate": 0.0013643440322917198, + "loss": 1.726, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 1.377508521080017, + "learning_rate": 0.0013629004785607989, + "loss": 1.7282, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.7705981135368347, + "learning_rate": 0.0013614500569807445, + "loss": 1.7205, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.8119442462921143, + "learning_rate": 0.0013599927838042394, + "loss": 1.7213, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 1.0496710538864136, + "learning_rate": 0.0013585286753607408, + "loss": 1.7221, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.874154269695282, + "learning_rate": 0.0013570577480562986, + "loss": 1.7347, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 1.0901719331741333, + "learning_rate": 0.0013555800183733717, + "loss": 1.7211, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.8325103521347046, + "learning_rate": 0.0013540955028706425, + "loss": 1.715, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.8242533206939697, + "learning_rate": 0.0013526042181828324, + "loss": 1.7087, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 1.0116493701934814, + "learning_rate": 0.0013511061810205143, + "loss": 1.7188, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 0.925283670425415, + "learning_rate": 0.001349601408169926, + "loss": 1.731, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.8995429873466492, + "learning_rate": 0.0013480899164927823, + "loss": 1.7157, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.7425591349601746, + "learning_rate": 0.0013465717229260853, + "loss": 1.7129, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.9348381757736206, + "learning_rate": 0.001345046844481935, + "loss": 1.7148, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.868652880191803, + "learning_rate": 0.0013435152982473396, + "loss": 1.7059, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 1.137388825416565, + "learning_rate": 0.0013419771013840217, + "loss": 1.7055, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 1.1614747047424316, + "learning_rate": 0.001340432271128229, + "loss": 1.7196, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.8824067115783691, + "learning_rate": 0.0013388808247905381, + "loss": 1.6921, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.8711856603622437, + "learning_rate": 0.0013373227797556634, + "loss": 1.7032, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.9083946347236633, + "learning_rate": 0.00133575815348226, + "loss": 1.7014, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 1.0575937032699585, + "learning_rate": 0.0013341869635027292, + "loss": 1.6965, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.7907885909080505, + "learning_rate": 0.001332609227423022, + "loss": 1.6961, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.8461203575134277, + "learning_rate": 0.0013310249629224417, + "loss": 1.6907, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 0.9071108102798462, + "learning_rate": 0.0013294341877534454, + "loss": 1.7015, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.0483574867248535, + "learning_rate": 0.0013278369197414458, + "loss": 1.7168, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.7090762257575989, + "learning_rate": 0.0013262331767846104, + "loss": 1.6997, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.9070972800254822, + "learning_rate": 0.0013246229768536628, + "loss": 1.6775, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 1.7173430919647217, + "learning_rate": 0.001323006337991679, + "loss": 1.7011, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 1.061678409576416, + "learning_rate": 0.0013213832783138873, + "loss": 1.7051, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.9055444598197937, + "learning_rate": 0.0013197538160074633, + "loss": 1.6799, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.8044984936714172, + "learning_rate": 0.0013181179693313283, + "loss": 1.6936, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 1.0172468423843384, + "learning_rate": 0.0013164757566159428, + "loss": 1.6902, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 1.2488845586776733, + "learning_rate": 0.001314827196263102, + "loss": 1.6871, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 0.9155538082122803, + "learning_rate": 0.0013131723067457302, + "loss": 1.6825, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.8130294680595398, + "learning_rate": 0.0013115111066076721, + "loss": 1.6724, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.8489205837249756, + "learning_rate": 0.0013098436144634862, + "loss": 1.7093, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.9134144186973572, + "learning_rate": 0.0013081698489982364, + "loss": 1.6886, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 1.7627509832382202, + "learning_rate": 0.001306489828967282, + "loss": 1.6823, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 1.0341514348983765, + "learning_rate": 0.0013048035731960679, + "loss": 1.686, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 1.231884241104126, + "learning_rate": 0.0013031111005799133, + "loss": 1.6733, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 1.1539286375045776, + "learning_rate": 0.0013014124300838004, + "loss": 1.6761, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.8614572286605835, + "learning_rate": 0.0012997075807421612, + "loss": 1.662, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.8235961198806763, + "learning_rate": 0.0012979965716586653, + "loss": 1.6613, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.8476676344871521, + "learning_rate": 0.0012962794220060048, + "loss": 1.6666, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.778982400894165, + "learning_rate": 0.0012945561510256801, + "loss": 1.677, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.7421671152114868, + "learning_rate": 0.001292826778027784, + "loss": 1.6715, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 1.008040428161621, + "learning_rate": 0.0012910913223907856, + "loss": 1.6708, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.7762741446495056, + "learning_rate": 0.0012893498035613123, + "loss": 1.67, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.7778167128562927, + "learning_rate": 0.001287602241053933, + "loss": 1.6702, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 0.8762797117233276, + "learning_rate": 0.0012858486544509392, + "loss": 1.672, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 1.156212568283081, + "learning_rate": 0.0012840890634021249, + "loss": 1.669, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.7746683359146118, + "learning_rate": 0.0012823234876245667, + "loss": 1.665, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.8059052228927612, + "learning_rate": 0.0012805519469024035, + "loss": 1.6743, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.829504132270813, + "learning_rate": 0.0012787744610866143, + "loss": 1.6577, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.917306125164032, + "learning_rate": 0.0012769910500947954, + "loss": 1.6736, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 1.1249607801437378, + "learning_rate": 0.0012752017339109376, + "loss": 1.6651, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.837443470954895, + "learning_rate": 0.0012734065325852029, + "loss": 1.6673, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.9369168281555176, + "learning_rate": 0.0012716054662336987, + "loss": 1.6543, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.8164992332458496, + "learning_rate": 0.001269798555038252, + "loss": 1.6589, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.8499755263328552, + "learning_rate": 0.0012679858192461864, + "loss": 1.6574, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 1.0605430603027344, + "learning_rate": 0.0012661672791700906, + "loss": 1.6618, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 1.09691321849823, + "learning_rate": 0.0012643429551875945, + "loss": 1.6553, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 0.755530059337616, + "learning_rate": 0.0012625128677411388, + "loss": 1.6606, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.7017857432365417, + "learning_rate": 0.0012606770373377475, + "loss": 1.6508, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.787723183631897, + "learning_rate": 0.0012588354845487959, + "loss": 1.6609, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.7666086554527283, + "learning_rate": 0.001256988230009783, + "loss": 1.6523, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.7913174033164978, + "learning_rate": 0.0012551352944200976, + "loss": 1.652, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.7828168869018555, + "learning_rate": 0.0012532766985427874, + "loss": 1.6597, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.7753764986991882, + "learning_rate": 0.0012514124632043272, + "loss": 1.657, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.8908626437187195, + "learning_rate": 0.0012495426092943842, + "loss": 1.6567, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 1.2044034004211426, + "learning_rate": 0.0012476671577655845, + "loss": 1.6545, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.9810152053833008, + "learning_rate": 0.0012457861296332774, + "loss": 1.64, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.8495761752128601, + "learning_rate": 0.001243899545975303, + "loss": 1.6563, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.9891127347946167, + "learning_rate": 0.0012420074279317515, + "loss": 1.6414, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.7976694703102112, + "learning_rate": 0.0012401097967047298, + "loss": 1.6365, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.7217979431152344, + "learning_rate": 0.001238206673558122, + "loss": 1.6384, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.7441779971122742, + "learning_rate": 0.0012362980798173526, + "loss": 1.6374, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 1.0701630115509033, + "learning_rate": 0.0012343840368691462, + "loss": 1.6422, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.7323340177536011, + "learning_rate": 0.0012324645661612886, + "loss": 1.6362, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 1.0833120346069336, + "learning_rate": 0.0012305396892023867, + "loss": 1.6427, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 1.6579355001449585, + "learning_rate": 0.0012286094275616264, + "loss": 1.6464, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 1.5970370769500732, + "learning_rate": 0.0012266738028685318, + "loss": 1.6234, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 1.163870096206665, + "learning_rate": 0.001224732836812723, + "loss": 1.631, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.8926600813865662, + "learning_rate": 0.0012227865511436724, + "loss": 1.6311, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 1.1485803127288818, + "learning_rate": 0.001220834967670461, + "loss": 1.6434, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.8932816982269287, + "learning_rate": 0.0012188781082615346, + "loss": 1.6405, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.8555330634117126, + "learning_rate": 0.0012169159948444588, + "loss": 1.6376, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.8089908957481384, + "learning_rate": 0.001214948649405672, + "loss": 1.6356, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.8039626479148865, + "learning_rate": 0.0012129760939902407, + "loss": 1.6324, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.9363980293273926, + "learning_rate": 0.0012109983507016114, + "loss": 1.6382, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.8755062818527222, + "learning_rate": 0.0012090154417013636, + "loss": 1.6443, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 1.3032336235046387, + "learning_rate": 0.0012070273892089605, + "loss": 1.6164, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 0.9401730298995972, + "learning_rate": 0.0012050342155015012, + "loss": 1.6219, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 1.3471062183380127, + "learning_rate": 0.0012030359429134707, + "loss": 1.6255, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 0.8162136077880859, + "learning_rate": 0.0012010325938364883, + "loss": 1.6195, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 1.0004981756210327, + "learning_rate": 0.0011990241907190592, + "loss": 1.6214, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.9056284427642822, + "learning_rate": 0.001197010756066321, + "loss": 1.6114, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.7228212952613831, + "learning_rate": 0.0011949923124397917, + "loss": 1.6193, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.9336810111999512, + "learning_rate": 0.001192968882457118, + "loss": 1.626, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.8797746300697327, + "learning_rate": 0.001190940488791821, + "loss": 1.627, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.8261645436286926, + "learning_rate": 0.0011889071541730419, + "loss": 1.6184, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.8117802143096924, + "learning_rate": 0.001186868901385288, + "loss": 1.6133, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.7580009698867798, + "learning_rate": 0.001184825753268177, + "loss": 1.6218, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 1.5047001838684082, + "learning_rate": 0.0011827777327161814, + "loss": 1.6302, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 0.8300422430038452, + "learning_rate": 0.0011809303672538417, + "loss": 1.62, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 0.937896192073822, + "learning_rate": 0.0011788731523451718, + "loss": 1.5993, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 1.4205741882324219, + "learning_rate": 0.0011768111317034173, + "loss": 1.613, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.9500992298126221, + "learning_rate": 0.001174744328434526, + "loss": 1.6276, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 1.1347622871398926, + "learning_rate": 0.0011726727656980378, + "loss": 1.6021, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.7653483748435974, + "learning_rate": 0.001170596466706825, + "loss": 1.6137, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 1.0032473802566528, + "learning_rate": 0.0011685154547268312, + "loss": 1.6149, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.7907644510269165, + "learning_rate": 0.0011664297530768117, + "loss": 1.6027, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.8439200520515442, + "learning_rate": 0.0011643393851280724, + "loss": 1.6115, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.9166921377182007, + "learning_rate": 0.0011622443743042065, + "loss": 1.608, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.7506648302078247, + "learning_rate": 0.0011601447440808335, + "loss": 1.6161, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 1.087438702583313, + "learning_rate": 0.001158040517985335, + "loss": 1.608, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 1.0308938026428223, + "learning_rate": 0.001155931719596592, + "loss": 1.5995, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.7822879552841187, + "learning_rate": 0.00115381837254472, + "loss": 1.6176, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.9514574408531189, + "learning_rate": 0.0011517005005108048, + "loss": 1.6001, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.7356628179550171, + "learning_rate": 0.0011495781272266366, + "loss": 1.6057, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.8729589581489563, + "learning_rate": 0.0011474512764744445, + "loss": 1.6091, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.8177212476730347, + "learning_rate": 0.0011453199720866296, + "loss": 1.6188, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 0.7609232068061829, + "learning_rate": 0.0011431842379454982, + "loss": 1.5955, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.8037144541740417, + "learning_rate": 0.0011410440979829942, + "loss": 1.605, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.8781481981277466, + "learning_rate": 0.0011388995761804311, + "loss": 1.6039, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.7719651460647583, + "learning_rate": 0.0011367506965682225, + "loss": 1.6095, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.7966209650039673, + "learning_rate": 0.0011345974832256138, + "loss": 1.6009, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 1.0874760150909424, + "learning_rate": 0.001132439960280412, + "loss": 1.6016, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.975898802280426, + "learning_rate": 0.0011302781519087154, + "loss": 1.595, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 1.1608455181121826, + "learning_rate": 0.0011281120823346418, + "loss": 1.6094, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.8411582708358765, + "learning_rate": 0.001125941775830059, + "loss": 1.5979, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 1.0604037046432495, + "learning_rate": 0.0011237672567143107, + "loss": 1.592, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.9034127593040466, + "learning_rate": 0.001121588549353946, + "loss": 1.5756, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.9037360548973083, + "learning_rate": 0.001119405678162444, + "loss": 1.5918, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 1.0131258964538574, + "learning_rate": 0.0011172186675999425, + "loss": 1.5879, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.8777729868888855, + "learning_rate": 0.0011150275421729628, + "loss": 1.6025, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 1.284121036529541, + "learning_rate": 0.0011128323264341352, + "loss": 1.6056, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 1.1002249717712402, + "learning_rate": 0.001110633044981924, + "loss": 1.589, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.785549521446228, + "learning_rate": 0.0011084297224603517, + "loss": 1.5918, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.7483548521995544, + "learning_rate": 0.001106222383558723, + "loss": 1.5924, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.7502890825271606, + "learning_rate": 0.001104011053011348, + "loss": 1.6014, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.8134595155715942, + "learning_rate": 0.0011017957555972656, + "loss": 1.5899, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.8079041242599487, + "learning_rate": 0.0010995765161399646, + "loss": 1.5934, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.7559992671012878, + "learning_rate": 0.001097353359507107, + "loss": 1.5821, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.8005449175834656, + "learning_rate": 0.001095126310610248, + "loss": 1.5809, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.9777634739875793, + "learning_rate": 0.0010928953944045585, + "loss": 1.5891, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.8267648816108704, + "learning_rate": 0.0010906606358885437, + "loss": 1.5881, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.894895613193512, + "learning_rate": 0.0010884220601037637, + "loss": 1.5818, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.8712685704231262, + "learning_rate": 0.0010861796921345537, + "loss": 1.5903, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 0.7920988202095032, + "learning_rate": 0.0010839335571077415, + "loss": 1.5888, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.9324398636817932, + "learning_rate": 0.0010816836801923666, + "loss": 1.5664, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.9261088371276855, + "learning_rate": 0.0010794300865993988, + "loss": 1.5867, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.7369474172592163, + "learning_rate": 0.0010771728015814544, + "loss": 1.5812, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.836535632610321, + "learning_rate": 0.0010749118504325146, + "loss": 1.5796, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.9330554008483887, + "learning_rate": 0.0010726472584876403, + "loss": 1.5736, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 1.007695198059082, + "learning_rate": 0.001070379051122691, + "loss": 1.5882, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.8466542959213257, + "learning_rate": 0.001068107253754037, + "loss": 1.5938, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.9405096173286438, + "learning_rate": 0.0010658318918382774, + "loss": 1.5855, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.7974971532821655, + "learning_rate": 0.0010635529908719537, + "loss": 1.5694, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.9563730359077454, + "learning_rate": 0.0010612705763912635, + "loss": 1.5851, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.7959553003311157, + "learning_rate": 0.0010589846739717755, + "loss": 1.5808, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.9178664684295654, + "learning_rate": 0.0010566953092281432, + "loss": 1.5915, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 1.3304373025894165, + "learning_rate": 0.0010544025078138156, + "loss": 1.5878, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 1.0758113861083984, + "learning_rate": 0.0010521062954207527, + "loss": 1.5764, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 0.7357787489891052, + "learning_rate": 0.001049806697779135, + "loss": 1.5701, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.9464322924613953, + "learning_rate": 0.0010475037406570775, + "loss": 1.5687, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 0.7952796220779419, + "learning_rate": 0.001045197449860339, + "loss": 1.5704, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.9277865886688232, + "learning_rate": 0.001042887851232034, + "loss": 1.5833, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 1.4571284055709839, + "learning_rate": 0.0010405749706523428, + "loss": 1.5653, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.9622997045516968, + "learning_rate": 0.0010382588340382218, + "loss": 1.5677, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.7644709348678589, + "learning_rate": 0.0010359394673431126, + "loss": 1.5617, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.8672974109649658, + "learning_rate": 0.0010336168965566516, + "loss": 1.5653, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.8764004111289978, + "learning_rate": 0.0010312911477043784, + "loss": 1.5689, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.8729314208030701, + "learning_rate": 0.0010289622468474448, + "loss": 1.5763, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.9395121932029724, + "learning_rate": 0.001026630220082322, + "loss": 1.5685, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 1.7594925165176392, + "learning_rate": 0.0010242950935405084, + "loss": 1.5631, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.7581227421760559, + "learning_rate": 0.0010219568933882372, + "loss": 1.5633, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.7861031293869019, + "learning_rate": 0.0010196156458261827, + "loss": 1.5715, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.7494408488273621, + "learning_rate": 0.0010172713770891673, + "loss": 1.5786, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.8371500372886658, + "learning_rate": 0.0010149241134458666, + "loss": 1.5686, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.8405299186706543, + "learning_rate": 0.001012573881198516, + "loss": 1.5456, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 1.461445927619934, + "learning_rate": 0.0010102207066826155, + "loss": 1.5664, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.873375415802002, + "learning_rate": 0.0010078646162666345, + "loss": 1.5635, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.9902838468551636, + "learning_rate": 0.0010055056363517162, + "loss": 1.5647, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 1.311172604560852, + "learning_rate": 0.001003143793371383, + "loss": 1.5564, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.9140952229499817, + "learning_rate": 0.0010007791137912386, + "loss": 1.5738, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.7611537575721741, + "learning_rate": 0.0009984116241086723, + "loss": 1.556, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 1.3938380479812622, + "learning_rate": 0.0009960413508525617, + "loss": 1.5649, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 1.3314985036849976, + "learning_rate": 0.0009936683205829762, + "loss": 1.5665, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.8216208815574646, + "learning_rate": 0.0009912925598908788, + "loss": 1.5523, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.9045887589454651, + "learning_rate": 0.000988914095397828, + "loss": 1.5441, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.79442298412323, + "learning_rate": 0.00098653295375568, + "loss": 1.5474, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.7749638557434082, + "learning_rate": 0.0009841491616462892, + "loss": 1.5683, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.8550074696540833, + "learning_rate": 0.0009817627457812106, + "loss": 1.5509, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 1.1499353647232056, + "learning_rate": 0.000979373732901399, + "loss": 1.5652, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 0.8334490656852722, + "learning_rate": 0.0009769821497769102, + "loss": 1.5514, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.79237300157547, + "learning_rate": 0.0009745880232066007, + "loss": 1.5451, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.7715566754341125, + "learning_rate": 0.0009721913800178281, + "loss": 1.5452, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 1.1646877527236938, + "learning_rate": 0.0009697922470661497, + "loss": 1.5461, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.8519831895828247, + "learning_rate": 0.0009673906512350213, + "loss": 1.5502, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.8819095492362976, + "learning_rate": 0.0009649866194354967, + "loss": 1.5452, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.7870282530784607, + "learning_rate": 0.0009625801786059267, + "loss": 1.5524, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.9159582257270813, + "learning_rate": 0.0009601713557116554, + "loss": 1.5469, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.8133012056350708, + "learning_rate": 0.0009577601777447194, + "loss": 1.5502, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.8157657980918884, + "learning_rate": 0.0009553466717235456, + "loss": 1.5519, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 1.3414241075515747, + "learning_rate": 0.0009529308646926473, + "loss": 1.5413, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.720044732093811, + "learning_rate": 0.0009505127837223215, + "loss": 1.5512, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.8758785128593445, + "learning_rate": 0.0009480924559083468, + "loss": 1.5404, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.8820964694023132, + "learning_rate": 0.0009456699083716777, + "loss": 1.5558, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 1.0177242755889893, + "learning_rate": 0.0009432451682581424, + "loss": 1.5406, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.7593818306922913, + "learning_rate": 0.000940818262738138, + "loss": 1.5455, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.8708838224411011, + "learning_rate": 0.0009383892190063256, + "loss": 1.5337, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.7521721720695496, + "learning_rate": 0.0009359580642813265, + "loss": 1.5487, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.800601601600647, + "learning_rate": 0.0009335248258054162, + "loss": 1.5478, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.9585070610046387, + "learning_rate": 0.0009310895308442202, + "loss": 1.5506, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.8830100893974304, + "learning_rate": 0.0009286522066864078, + "loss": 1.5482, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.9807027578353882, + "learning_rate": 0.0009262128806433858, + "loss": 1.5514, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 1.0073405504226685, + "learning_rate": 0.0009237715800489942, + "loss": 1.5477, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.8871352076530457, + "learning_rate": 0.0009213283322591977, + "loss": 1.5394, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 1.0316435098648071, + "learning_rate": 0.000918883164651781, + "loss": 1.5331, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.8299744129180908, + "learning_rate": 0.0009164361046260412, + "loss": 1.5414, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.9238260388374329, + "learning_rate": 0.0009139871796024807, + "loss": 1.5311, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.7925126552581787, + "learning_rate": 0.0009115364170225, + "loss": 1.5382, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 1.0686644315719604, + "learning_rate": 0.0009090838443480903, + "loss": 1.542, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.826414942741394, + "learning_rate": 0.0009066294890615266, + "loss": 1.5339, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.775549590587616, + "learning_rate": 0.0009041733786650578, + "loss": 1.5345, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.7107667326927185, + "learning_rate": 0.0009017155406806006, + "loss": 1.5296, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.8288777470588684, + "learning_rate": 0.0008992560026494294, + "loss": 1.5399, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.89658123254776, + "learning_rate": 0.0008967947921318689, + "loss": 1.5414, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 0.8326410055160522, + "learning_rate": 0.0008943319367069844, + "loss": 1.5302, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.8234502673149109, + "learning_rate": 0.0008918674639722742, + "loss": 1.536, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 1.1202415227890015, + "learning_rate": 0.0008894014015433582, + "loss": 1.5312, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 1.0334528684616089, + "learning_rate": 0.0008869337770536699, + "loss": 1.532, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 1.2864969968795776, + "learning_rate": 0.0008844646181541472, + "loss": 1.5343, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 0.9088866114616394, + "learning_rate": 0.0008819939525129207, + "loss": 1.5416, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.8951067924499512, + "learning_rate": 0.0008795218078150056, + "loss": 1.5231, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.844066858291626, + "learning_rate": 0.0008770482117619901, + "loss": 1.5214, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 0.7570001482963562, + "learning_rate": 0.000874573192071726, + "loss": 1.5344, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 1.155387043952942, + "learning_rate": 0.0008720967764780173, + "loss": 1.5336, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.827941358089447, + "learning_rate": 0.0008696189927303101, + "loss": 1.5311, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 0.9367187023162842, + "learning_rate": 0.0008671398685933811, + "loss": 1.5238, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 1.2789530754089355, + "learning_rate": 0.0008646594318470268, + "loss": 1.5331, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.8115516901016235, + "learning_rate": 0.000862177710285752, + "loss": 1.5235, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.7811355590820312, + "learning_rate": 0.0008596947317184585, + "loss": 1.5239, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.9020124673843384, + "learning_rate": 0.0008572105239681338, + "loss": 1.5414, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.7724413871765137, + "learning_rate": 0.0008547251148715386, + "loss": 1.5163, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 1.045262336730957, + "learning_rate": 0.0008522385322788955, + "loss": 1.5234, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.9018635153770447, + "learning_rate": 0.0008497508040535766, + "loss": 1.5187, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.8781928420066833, + "learning_rate": 0.0008472619580717914, + "loss": 1.525, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.7768313884735107, + "learning_rate": 0.000844772022222274, + "loss": 1.538, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.7500082850456238, + "learning_rate": 0.0008422810244059721, + "loss": 1.5216, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.9787099361419678, + "learning_rate": 0.000839788992535732, + "loss": 1.5249, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.8730626702308655, + "learning_rate": 0.000837295954535988, + "loss": 1.5241, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.8407042622566223, + "learning_rate": 0.0008348019383424479, + "loss": 1.5271, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.7425684332847595, + "learning_rate": 0.0008323069719017812, + "loss": 1.5161, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.7992134094238281, + "learning_rate": 0.0008298110831713047, + "loss": 1.524, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.8140209913253784, + "learning_rate": 0.0008273143001186709, + "loss": 1.5296, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.9075658917427063, + "learning_rate": 0.0008248166507215526, + "loss": 1.5143, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.8177787661552429, + "learning_rate": 0.0008223181629673312, + "loss": 1.517, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.8003637790679932, + "learning_rate": 0.0008198188648527818, + "loss": 1.5246, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.7751783728599548, + "learning_rate": 0.00081731878438376, + "loss": 1.5046, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 1.5463695526123047, + "learning_rate": 0.0008148179495748885, + "loss": 1.5242, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.7906073927879333, + "learning_rate": 0.0008123163884492422, + "loss": 1.5226, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.9936162233352661, + "learning_rate": 0.0008098141290380353, + "loss": 1.5209, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 1.1033506393432617, + "learning_rate": 0.000807311199380306, + "loss": 1.51, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.8154590129852295, + "learning_rate": 0.0008048076275226032, + "loss": 1.4936, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 1.0632771253585815, + "learning_rate": 0.0008023034415186725, + "loss": 1.5133, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.862719714641571, + "learning_rate": 0.0007997986694291404, + "loss": 1.5028, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.7583125233650208, + "learning_rate": 0.0007972933393212012, + "loss": 1.5222, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.9806751012802124, + "learning_rate": 0.0007947874792683025, + "loss": 1.5245, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 1.2766774892807007, + "learning_rate": 0.0007922811173498293, + "loss": 1.5011, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.8371054530143738, + "learning_rate": 0.000789774281650791, + "loss": 1.5153, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.7588473558425903, + "learning_rate": 0.0007872670002615056, + "loss": 1.5133, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.803425133228302, + "learning_rate": 0.0007847593012772852, + "loss": 1.5195, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.9654234647750854, + "learning_rate": 0.0007822512127981218, + "loss": 1.5087, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.8735927939414978, + "learning_rate": 0.0007797427629283708, + "loss": 1.5082, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.7938900589942932, + "learning_rate": 0.0007772339797764385, + "loss": 1.5054, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 1.0898908376693726, + "learning_rate": 0.0007747248914544646, + "loss": 1.5031, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.8071373701095581, + "learning_rate": 0.0007722155260780093, + "loss": 1.5106, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 1.043516993522644, + "learning_rate": 0.0007697059117657368, + "loss": 1.5118, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 0.8896366357803345, + "learning_rate": 0.0007671960766391008, + "loss": 1.5066, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 0.7969541549682617, + "learning_rate": 0.0007646860488220293, + "loss": 1.5016, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.8427814245223999, + "learning_rate": 0.00076217585644061, + "loss": 1.5034, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 1.3816167116165161, + "learning_rate": 0.0007596655276227739, + "loss": 1.5222, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.8847688436508179, + "learning_rate": 0.0007571550904979812, + "loss": 1.5048, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.7680759429931641, + "learning_rate": 0.0007546445731969056, + "loss": 1.5034, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.7896491289138794, + "learning_rate": 0.0007521340038511196, + "loss": 1.5128, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.9814112782478333, + "learning_rate": 0.0007496234105927785, + "loss": 1.5002, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.8398501873016357, + "learning_rate": 0.0007471128215543056, + "loss": 1.5045, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 1.0425026416778564, + "learning_rate": 0.0007446022648680768, + "loss": 1.5029, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.7993313670158386, + "learning_rate": 0.0007420917686661055, + "loss": 1.5214, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.7925863862037659, + "learning_rate": 0.0007395813610797283, + "loss": 1.5032, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 1.2084681987762451, + "learning_rate": 0.0007370710702392873, + "loss": 1.497, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 1.0614187717437744, + "learning_rate": 0.0007345609242738173, + "loss": 1.5115, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 1.0857713222503662, + "learning_rate": 0.0007320509513107296, + "loss": 1.4942, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.7491085529327393, + "learning_rate": 0.0007295411794754967, + "loss": 1.4899, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.8832652568817139, + "learning_rate": 0.0007270316368913374, + "loss": 1.513, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.8317632675170898, + "learning_rate": 0.0007245223516789019, + "loss": 1.501, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.9447041749954224, + "learning_rate": 0.0007220133519559563, + "loss": 1.5087, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 0.7509909868240356, + "learning_rate": 0.0007195046658370675, + "loss": 1.5011, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 1.209110140800476, + "learning_rate": 0.0007169963214332885, + "loss": 1.4921, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.8436342477798462, + "learning_rate": 0.000714488346851843, + "loss": 1.5021, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.916789710521698, + "learning_rate": 0.000711980770195811, + "loss": 1.498, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.8583993911743164, + "learning_rate": 0.0007094736195638128, + "loss": 1.5057, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.8120072484016418, + "learning_rate": 0.0007069669230496961, + "loss": 1.4878, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 1.0954148769378662, + "learning_rate": 0.0007044607087422191, + "loss": 1.4939, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.8621455430984497, + "learning_rate": 0.000701955004724737, + "loss": 1.4892, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.811439573764801, + "learning_rate": 0.0006994498390748865, + "loss": 1.4918, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 1.0042996406555176, + "learning_rate": 0.0006969452398642721, + "loss": 1.5026, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.7981130480766296, + "learning_rate": 0.0006944412351581506, + "loss": 1.4964, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.8449530005455017, + "learning_rate": 0.0006919378530151182, + "loss": 1.4892, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.8506574034690857, + "learning_rate": 0.0006894351214867937, + "loss": 1.4741, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.9452551007270813, + "learning_rate": 0.0006869330686175058, + "loss": 1.5029, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.7912987470626831, + "learning_rate": 0.0006844317224439788, + "loss": 1.4887, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.9540243744850159, + "learning_rate": 0.0006819311109950177, + "loss": 1.5005, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 1.3117432594299316, + "learning_rate": 0.0006794312622911953, + "loss": 1.4895, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 1.0674008131027222, + "learning_rate": 0.0006769322043445363, + "loss": 1.5026, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.852445662021637, + "learning_rate": 0.0006744339651582059, + "loss": 1.4909, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.8971731066703796, + "learning_rate": 0.0006719365727261935, + "loss": 1.4741, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.8166397213935852, + "learning_rate": 0.0006694400550330013, + "loss": 1.4882, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 0.8435339331626892, + "learning_rate": 0.0006669444400533286, + "loss": 1.4876, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.7707935571670532, + "learning_rate": 0.0006644497557517599, + "loss": 1.4821, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.9662792682647705, + "learning_rate": 0.0006619560300824507, + "loss": 1.5009, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.8456140756607056, + "learning_rate": 0.0006594632909888154, + "loss": 1.4946, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 0.8111633658409119, + "learning_rate": 0.0006569715664032124, + "loss": 1.4886, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.7303435206413269, + "learning_rate": 0.0006544808842466324, + "loss": 1.4893, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 1.0382319688796997, + "learning_rate": 0.0006519912724283851, + "loss": 1.4995, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 1.0878241062164307, + "learning_rate": 0.0006495027588457864, + "loss": 1.4808, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.9460204839706421, + "learning_rate": 0.0006470153713838463, + "loss": 1.4918, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 0.9110044240951538, + "learning_rate": 0.0006445291379149556, + "loss": 1.4774, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.8861435055732727, + "learning_rate": 0.0006420440862985748, + "loss": 1.4882, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 1.056693196296692, + "learning_rate": 0.0006395602443809203, + "loss": 1.4814, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.8175979256629944, + "learning_rate": 0.0006370776399946536, + "loss": 1.4876, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.8069374561309814, + "learning_rate": 0.0006345963009585694, + "loss": 1.4808, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.9578755497932434, + "learning_rate": 0.0006321162550772829, + "loss": 1.486, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.8186458349227905, + "learning_rate": 0.0006296375301409187, + "loss": 1.4905, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.8206368088722229, + "learning_rate": 0.0006271601539248012, + "loss": 1.4774, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.8607250452041626, + "learning_rate": 0.0006246841541891399, + "loss": 1.4827, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 1.1231073141098022, + "learning_rate": 0.0006222095586787208, + "loss": 1.4856, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.7894901037216187, + "learning_rate": 0.0006197363951225951, + "loss": 1.4762, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.820431113243103, + "learning_rate": 0.0006172646912337678, + "loss": 1.4726, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 0.8754194378852844, + "learning_rate": 0.0006147944747088881, + "loss": 1.4848, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.8587943911552429, + "learning_rate": 0.000612325773227938, + "loss": 1.4784, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 1.04790198802948, + "learning_rate": 0.0006098586144539235, + "loss": 1.4723, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.9993575811386108, + "learning_rate": 0.0006073930260325632, + "loss": 1.4807, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.7557750940322876, + "learning_rate": 0.0006049290355919792, + "loss": 1.4817, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.7643726468086243, + "learning_rate": 0.0006024666707423875, + "loss": 1.474, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.738929808139801, + "learning_rate": 0.0006000059590757886, + "loss": 1.4732, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.7679731249809265, + "learning_rate": 0.0005975469281656581, + "loss": 1.4719, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.7685670256614685, + "learning_rate": 0.0005950896055666384, + "loss": 1.476, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.8041839599609375, + "learning_rate": 0.0005926340188142289, + "loss": 1.4701, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.9008498787879944, + "learning_rate": 0.0005901801954244782, + "loss": 1.467, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 0.6945821046829224, + "learning_rate": 0.0005877281628936756, + "loss": 1.4666, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.8363376259803772, + "learning_rate": 0.0005852779486980427, + "loss": 1.4722, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.7649796009063721, + "learning_rate": 0.0005828295802934263, + "loss": 1.4736, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.8728696703910828, + "learning_rate": 0.0005803830851149892, + "loss": 1.4741, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 0.9347139000892639, + "learning_rate": 0.0005779384905769053, + "loss": 1.4753, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.8734438419342041, + "learning_rate": 0.0005754958240720498, + "loss": 1.4788, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.8313824534416199, + "learning_rate": 0.0005730551129716936, + "loss": 1.4686, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.9852312207221985, + "learning_rate": 0.0005706163846251961, + "loss": 1.4773, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 1.0101618766784668, + "learning_rate": 0.0005681796663596996, + "loss": 1.4677, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.7628858089447021, + "learning_rate": 0.0005657449854798216, + "loss": 1.4597, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.7917925119400024, + "learning_rate": 0.0005633123692673503, + "loss": 1.4584, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.9232448935508728, + "learning_rate": 0.0005608818449809387, + "loss": 1.4752, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 0.742405116558075, + "learning_rate": 0.0005584534398557977, + "loss": 1.4823, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.7977108955383301, + "learning_rate": 0.0005560271811033928, + "loss": 1.4654, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 1.0988959074020386, + "learning_rate": 0.0005536030959111377, + "loss": 1.4632, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.8272600769996643, + "learning_rate": 0.0005511812114420908, + "loss": 1.4683, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.8356586694717407, + "learning_rate": 0.0005487615548346502, + "loss": 1.4623, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.9066822528839111, + "learning_rate": 0.0005463441532022495, + "loss": 1.4676, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.8626927733421326, + "learning_rate": 0.0005439290336330545, + "loss": 1.4769, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.8186584711074829, + "learning_rate": 0.0005415162231896593, + "loss": 1.4692, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.8036007881164551, + "learning_rate": 0.000539105748908783, + "loss": 1.4765, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.7557952404022217, + "learning_rate": 0.0005366976378009668, + "loss": 1.472, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.805500328540802, + "learning_rate": 0.0005342919168502717, + "loss": 1.4652, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.7832914590835571, + "learning_rate": 0.0005318886130139753, + "loss": 1.4726, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.7593386769294739, + "learning_rate": 0.0005294877532222709, + "loss": 1.4721, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 0.8965731859207153, + "learning_rate": 0.0005270893643779649, + "loss": 1.454, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 1.1572834253311157, + "learning_rate": 0.0005246934733561751, + "loss": 1.4579, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.805120050907135, + "learning_rate": 0.0005223001070040305, + "loss": 1.465, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.7718102931976318, + "learning_rate": 0.0005199092921403696, + "loss": 1.4653, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.7578348517417908, + "learning_rate": 0.00051752105555544, + "loss": 1.4596, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.7246220707893372, + "learning_rate": 0.0005151354240105994, + "loss": 1.4591, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 0.930872917175293, + "learning_rate": 0.0005127524242380139, + "loss": 1.4355, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.794120192527771, + "learning_rate": 0.0005103720829403594, + "loss": 1.4457, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 0.870374858379364, + "learning_rate": 0.0005079944267905226, + "loss": 1.4475, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.7071398496627808, + "learning_rate": 0.0005056194824313015, + "loss": 1.4559, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.6861531138420105, + "learning_rate": 0.0005032472764751074, + "loss": 1.4525, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 1.1245986223220825, + "learning_rate": 0.000500877835503666, + "loss": 1.4576, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.8311609625816345, + "learning_rate": 0.000498511186067721, + "loss": 1.4624, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.8746213316917419, + "learning_rate": 0.0004961473546867346, + "loss": 1.4741, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.9684680700302124, + "learning_rate": 0.0004937863678485915, + "loss": 1.4573, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.809626579284668, + "learning_rate": 0.0004914282520093023, + "loss": 1.4679, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.7640191912651062, + "learning_rate": 0.0004890730335927063, + "loss": 1.4717, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.9215764999389648, + "learning_rate": 0.00048672073899017564, + "loss": 1.4569, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 0.7624779343605042, + "learning_rate": 0.0004843713945603205, + "loss": 1.4514, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.7117841243743896, + "learning_rate": 0.00048202502662869195, + "loss": 1.4612, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.9278427958488464, + "learning_rate": 0.0004796816614874885, + "loss": 1.452, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.8216862082481384, + "learning_rate": 0.00047734132539526086, + "loss": 1.4553, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 1.1640589237213135, + "learning_rate": 0.00047500404457661747, + "loss": 1.4505, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 1.30487859249115, + "learning_rate": 0.00047266984522193134, + "loss": 1.4436, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 0.7403445839881897, + "learning_rate": 0.00047033875348704576, + "loss": 1.4508, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.8770729899406433, + "learning_rate": 0.00046801079549298224, + "loss": 1.4557, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 0.9731765389442444, + "learning_rate": 0.0004656859973256466, + "loss": 1.4452, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.8889421820640564, + "learning_rate": 0.00046336438503553754, + "loss": 1.4547, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.8108775615692139, + "learning_rate": 0.00046104598463745424, + "loss": 1.4528, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 1.1195112466812134, + "learning_rate": 0.0004587308221102053, + "loss": 1.4379, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.9770963191986084, + "learning_rate": 0.00045641892339631703, + "loss": 1.4566, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 0.779200553894043, + "learning_rate": 0.000454110314401744, + "loss": 1.4607, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.8143290877342224, + "learning_rate": 0.00045180502099557686, + "loss": 1.4499, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 0.9525236487388611, + "learning_rate": 0.00044950306900975377, + "loss": 1.4554, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.774480938911438, + "learning_rate": 0.00044720448423877113, + "loss": 1.4367, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 1.0294450521469116, + "learning_rate": 0.0004449092924393933, + "loss": 1.4553, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.9049546718597412, + "learning_rate": 0.00044261751933036525, + "loss": 1.4364, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.9582249522209167, + "learning_rate": 0.0004403291905921233, + "loss": 1.4539, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.8585244417190552, + "learning_rate": 0.00043804433186650916, + "loss": 1.4466, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.8350580930709839, + "learning_rate": 0.00043576296875647984, + "loss": 1.444, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.8500372767448425, + "learning_rate": 0.0004334851268258234, + "loss": 1.4453, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 1.0709333419799805, + "learning_rate": 0.00043121083159887056, + "loss": 1.4371, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.8786770105361938, + "learning_rate": 0.00042894010856020997, + "loss": 1.4512, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.8004217147827148, + "learning_rate": 0.0004266729831544017, + "loss": 1.4465, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.929474949836731, + "learning_rate": 0.0004244094807856936, + "loss": 1.4445, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.9203303456306458, + "learning_rate": 0.00042214962681773457, + "loss": 1.4433, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.8021324276924133, + "learning_rate": 0.00041989344657329187, + "loss": 1.4558, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.7882404923439026, + "learning_rate": 0.00041764096533396667, + "loss": 1.4475, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.7939091324806213, + "learning_rate": 0.00041539220833991124, + "loss": 1.4226, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.808198094367981, + "learning_rate": 0.0004131472007895457, + "loss": 1.4479, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.8201770782470703, + "learning_rate": 0.00041090596783927583, + "loss": 1.4505, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.9328317046165466, + "learning_rate": 0.0004086685346032111, + "loss": 1.4428, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.777702808380127, + "learning_rate": 0.00040643492615288367, + "loss": 1.437, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.7963785529136658, + "learning_rate": 0.00040420516751696664, + "loss": 1.4462, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.7491010427474976, + "learning_rate": 0.00040197928368099445, + "loss": 1.4342, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.8153260350227356, + "learning_rate": 0.00039975729958708223, + "loss": 1.4416, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.999072253704071, + "learning_rate": 0.0003975392401336468, + "loss": 1.4477, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.8815893530845642, + "learning_rate": 0.00039532513017512694, + "loss": 1.4392, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.8566296696662903, + "learning_rate": 0.00039311499452170665, + "loss": 1.449, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.7206413745880127, + "learning_rate": 0.0003909088579390347, + "loss": 1.447, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.8595064878463745, + "learning_rate": 0.00038870674514794877, + "loss": 1.4424, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.8483584523200989, + "learning_rate": 0.0003865086808241979, + "loss": 1.4351, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.7777718901634216, + "learning_rate": 0.0003843146895981661, + "loss": 1.4381, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.9142327308654785, + "learning_rate": 0.00038212479605459617, + "loss": 1.4312, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.8369050621986389, + "learning_rate": 0.000379939024732315, + "loss": 1.4401, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.7825643420219421, + "learning_rate": 0.0003777574001239573, + "loss": 1.4322, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.8955847024917603, + "learning_rate": 0.00037557994667569217, + "loss": 1.4398, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.7753504514694214, + "learning_rate": 0.0003734066887869485, + "loss": 1.4394, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.8527138233184814, + "learning_rate": 0.0003712376508101424, + "loss": 1.421, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 0.7981077432632446, + "learning_rate": 0.0003690728570504032, + "loss": 1.4234, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.8617408275604248, + "learning_rate": 0.00036691233176530197, + "loss": 1.4401, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 1.1736260652542114, + "learning_rate": 0.00036475609916457996, + "loss": 1.4401, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.9903077483177185, + "learning_rate": 0.000362604183409876, + "loss": 1.4351, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.8533949851989746, + "learning_rate": 0.00036045660861445684, + "loss": 1.4262, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.7817601561546326, + "learning_rate": 0.0003583133988429468, + "loss": 1.4336, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.9680834412574768, + "learning_rate": 0.0003561745781110579, + "loss": 1.4198, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.8849319219589233, + "learning_rate": 0.00035404017038532045, + "loss": 1.4231, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 1.0471911430358887, + "learning_rate": 0.00035191019958281575, + "loss": 1.4384, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.8505642414093018, + "learning_rate": 0.00034978468957090635, + "loss": 1.4318, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 1.20522141456604, + "learning_rate": 0.0003476636641669699, + "loss": 1.4397, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.7050632834434509, + "learning_rate": 0.0003455471471381318, + "loss": 1.431, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.7978450655937195, + "learning_rate": 0.0003434351622009985, + "loss": 1.4481, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.8616747260093689, + "learning_rate": 0.0003413277330213928, + "loss": 1.4466, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.7226597666740417, + "learning_rate": 0.0003392248832140876, + "loss": 1.4313, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.7383049130439758, + "learning_rate": 0.00033712663634254163, + "loss": 1.424, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.7875366806983948, + "learning_rate": 0.00033503301591863586, + "loss": 1.4343, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.7701209783554077, + "learning_rate": 0.0003329440454024092, + "loss": 1.4286, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 1.0878496170043945, + "learning_rate": 0.0003308597482017965, + "loss": 1.4269, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.858603298664093, + "learning_rate": 0.0003287801476723656, + "loss": 1.4266, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.8791925311088562, + "learning_rate": 0.00032670526711705536, + "loss": 1.4225, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.9509696960449219, + "learning_rate": 0.0003246351297859164, + "loss": 1.4264, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.868752658367157, + "learning_rate": 0.00032256975887584783, + "loss": 1.4337, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.952176034450531, + "learning_rate": 0.00032050917753033935, + "loss": 1.4198, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.7968972325325012, + "learning_rate": 0.000318453408839211, + "loss": 1.4282, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.8150470852851868, + "learning_rate": 0.0003164024758383548, + "loss": 1.4226, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.978992223739624, + "learning_rate": 0.00031435640150947645, + "loss": 1.4275, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.8182054758071899, + "learning_rate": 0.0003123152087798376, + "loss": 1.4196, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.7879346609115601, + "learning_rate": 0.00031027892052200003, + "loss": 1.4211, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.7786043286323547, + "learning_rate": 0.0003082475595535677, + "loss": 1.4233, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.730938196182251, + "learning_rate": 0.00030622114863693205, + "loss": 1.426, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.767034649848938, + "learning_rate": 0.00030419971047901704, + "loss": 1.4171, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.7468644380569458, + "learning_rate": 0.00030218326773102407, + "loss": 1.4212, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.8216288089752197, + "learning_rate": 0.00030017184298817873, + "loss": 1.4216, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.8369116187095642, + "learning_rate": 0.00029816545878947763, + "loss": 1.4134, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.8663755655288696, + "learning_rate": 0.00029616413761743537, + "loss": 1.4218, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.9546319842338562, + "learning_rate": 0.00029416790189783286, + "loss": 1.4252, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.8872477412223816, + "learning_rate": 0.000292176773999466, + "loss": 1.4281, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 1.0228055715560913, + "learning_rate": 0.0002901907762338952, + "loss": 1.4211, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.7918773889541626, + "learning_rate": 0.0002882099308551951, + "loss": 1.427, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.7468551993370056, + "learning_rate": 0.00028623426005970517, + "loss": 1.4227, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.7687323689460754, + "learning_rate": 0.00028426378598578187, + "loss": 1.4265, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.7447020411491394, + "learning_rate": 0.0002822985307135491, + "loss": 1.4141, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 1.0718920230865479, + "learning_rate": 0.0002803385162646518, + "loss": 1.4173, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 1.0294510126113892, + "learning_rate": 0.0002783837646020089, + "loss": 1.4215, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.8822906613349915, + "learning_rate": 0.0002764342976295673, + "loss": 1.4166, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.9633239507675171, + "learning_rate": 0.00027449013719205623, + "loss": 1.4071, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.767971396446228, + "learning_rate": 0.00027255130507474276, + "loss": 1.4175, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.8593080639839172, + "learning_rate": 0.00027061782300318726, + "loss": 1.4116, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.7351493239402771, + "learning_rate": 0.0002686897126430009, + "loss": 1.4099, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.8148220181465149, + "learning_rate": 0.00026676699559960145, + "loss": 1.4165, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.9154622554779053, + "learning_rate": 0.00026484969341797224, + "loss": 1.407, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.8635755777359009, + "learning_rate": 0.0002629378275824204, + "loss": 1.4104, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.788490355014801, + "learning_rate": 0.00026103141951633617, + "loss": 1.4326, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.8148037195205688, + "learning_rate": 0.00025913049058195277, + "loss": 1.4123, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.7658647894859314, + "learning_rate": 0.0002572350620801072, + "loss": 1.4183, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.8661559224128723, + "learning_rate": 0.0002553451552500012, + "loss": 1.4214, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.7364020943641663, + "learning_rate": 0.0002534607912689637, + "loss": 1.4187, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.7841727137565613, + "learning_rate": 0.00025158199125221325, + "loss": 1.4109, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.880293071269989, + "learning_rate": 0.0002497087762526211, + "loss": 1.4064, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.7938927412033081, + "learning_rate": 0.0002478411672604766, + "loss": 1.4087, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.8287999033927917, + "learning_rate": 0.00024597918520324994, + "loss": 1.4108, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.8764036893844604, + "learning_rate": 0.00024412285094535952, + "loss": 1.4225, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.7647675275802612, + "learning_rate": 0.00024227218528793696, + "loss": 1.3896, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.8155769109725952, + "learning_rate": 0.00024061144999060956, + "loss": 1.4089, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.8776196241378784, + "learning_rate": 0.00023877161175351206, + "loss": 1.4128, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 1.0906946659088135, + "learning_rate": 0.00023693750208013045, + "loss": 1.4159, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.9415591359138489, + "learning_rate": 0.0002351091415225591, + "loss": 1.4041, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.9977360367774963, + "learning_rate": 0.00023328655056847124, + "loss": 1.4084, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 1.0259097814559937, + "learning_rate": 0.00023146974964088825, + "loss": 1.4167, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.7665786147117615, + "learning_rate": 0.00022965875909795164, + "loss": 1.4115, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.8351196646690369, + "learning_rate": 0.0002278535992326947, + "loss": 1.409, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.8138718605041504, + "learning_rate": 0.0002260542902728151, + "loss": 1.4078, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.8579652309417725, + "learning_rate": 0.00022426085238044823, + "loss": 1.4161, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.7810524106025696, + "learning_rate": 0.00022247330565194171, + "loss": 1.413, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.7943568825721741, + "learning_rate": 0.0002206916701176293, + "loss": 1.4186, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.7505974173545837, + "learning_rate": 0.00021891596574160715, + "loss": 1.4048, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.8935466408729553, + "learning_rate": 0.00021714621242150973, + "loss": 1.4013, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.777111828327179, + "learning_rate": 0.0002153824299882872, + "loss": 1.4113, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 1.0382202863693237, + "learning_rate": 0.00021362463820598297, + "loss": 1.4161, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.7905944585800171, + "learning_rate": 0.00021187285677151205, + "loss": 1.3962, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.8056989312171936, + "learning_rate": 0.00021012710531444112, + "loss": 1.3987, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.7644554376602173, + "learning_rate": 0.00020838740339676763, + "loss": 1.4027, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.9238404631614685, + "learning_rate": 0.00020665377051270095, + "loss": 1.4099, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.8496394157409668, + "learning_rate": 0.0002049262260884441, + "loss": 1.4204, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.8369856476783752, + "learning_rate": 0.0002032047894819758, + "loss": 1.4151, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.8093398809432983, + "learning_rate": 0.00020148947998283381, + "loss": 1.4114, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.9279979467391968, + "learning_rate": 0.00019978031681189864, + "loss": 1.4094, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.7290925979614258, + "learning_rate": 0.00019807731912117828, + "loss": 1.4063, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.8266335725784302, + "learning_rate": 0.00019638050599359326, + "loss": 1.4052, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.8397286534309387, + "learning_rate": 0.0001946898964427633, + "loss": 1.4057, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.7896254062652588, + "learning_rate": 0.0001930055094127938, + "loss": 1.4152, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.8959488868713379, + "learning_rate": 0.00019132736377806394, + "loss": 1.4213, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.7991187572479248, + "learning_rate": 0.0001896554783430149, + "loss": 1.4031, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.8315223455429077, + "learning_rate": 0.00018798987184193963, + "loss": 1.3976, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 1.0662803649902344, + "learning_rate": 0.00018633056293877203, + "loss": 1.4027, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.9657235741615295, + "learning_rate": 0.00018467757022687864, + "loss": 1.4009, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.7975175380706787, + "learning_rate": 0.00018303091222884998, + "loss": 1.3971, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.7693151235580444, + "learning_rate": 0.00018139060739629287, + "loss": 1.4041, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.7824791669845581, + "learning_rate": 0.00017975667410962366, + "loss": 1.4052, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.8728565573692322, + "learning_rate": 0.00017812913067786313, + "loss": 1.3903, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.7611585855484009, + "learning_rate": 0.00017650799533842996, + "loss": 1.4011, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.9808141589164734, + "learning_rate": 0.00017489328625693715, + "loss": 1.4111, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.8244407176971436, + "learning_rate": 0.0001732850215269885, + "loss": 1.3914, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.9074159264564514, + "learning_rate": 0.00017168321916997547, + "loss": 1.3965, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.794861376285553, + "learning_rate": 0.00017008789713487558, + "loss": 1.3828, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.723751962184906, + "learning_rate": 0.00016849907329805118, + "loss": 1.3889, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.8232799768447876, + "learning_rate": 0.00016691676546304936, + "loss": 1.3912, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.759524941444397, + "learning_rate": 0.00016534099136040207, + "loss": 1.4011, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.8017905354499817, + "learning_rate": 0.00016377176864742734, + "loss": 1.3918, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.9440204501152039, + "learning_rate": 0.00016220911490803206, + "loss": 1.3979, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.7225520014762878, + "learning_rate": 0.00016065304765251423, + "loss": 1.406, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.7647609710693359, + "learning_rate": 0.00015910358431736745, + "loss": 1.3988, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.8175790309906006, + "learning_rate": 0.0001575607422650846, + "loss": 1.4085, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.726203978061676, + "learning_rate": 0.00015602453878396479, + "loss": 1.3903, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.7473750710487366, + "learning_rate": 0.0001544949910879177, + "loss": 1.3912, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.7265754342079163, + "learning_rate": 0.00015297211631627234, + "loss": 1.3904, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.7196293473243713, + "learning_rate": 0.00015145593153358412, + "loss": 1.3876, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.7821575403213501, + "learning_rate": 0.00014994645372944367, + "loss": 1.3889, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.7767731547355652, + "learning_rate": 0.00014844369981828698, + "loss": 1.4018, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.8259230256080627, + "learning_rate": 0.00014694768663920537, + "loss": 1.3968, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.7251445055007935, + "learning_rate": 0.00014545843095575709, + "loss": 1.3889, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.7013382911682129, + "learning_rate": 0.00014397594945577912, + "loss": 1.3915, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.7437251210212708, + "learning_rate": 0.0001425002587512005, + "loss": 1.406, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.7746645212173462, + "learning_rate": 0.00014103137537785633, + "loss": 1.3788, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.7491689920425415, + "learning_rate": 0.00013956931579530194, + "loss": 1.4024, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.7306551337242126, + "learning_rate": 0.00013811409638662858, + "loss": 1.3907, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.7300136685371399, + "learning_rate": 0.00013666573345828083, + "loss": 1.4093, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.727403461933136, + "learning_rate": 0.0001352242432398723, + "loss": 1.3854, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.7489141821861267, + "learning_rate": 0.00013378964188400457, + "loss": 1.393, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.9241371750831604, + "learning_rate": 0.00013236194546608645, + "loss": 1.4079, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.837066113948822, + "learning_rate": 0.00013094116998415358, + "loss": 1.3818, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.758649468421936, + "learning_rate": 0.0001295273313586885, + "loss": 1.3922, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.8825933337211609, + "learning_rate": 0.00012812044543244395, + "loss": 1.388, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 1.0071312189102173, + "learning_rate": 0.00012672052797026344, + "loss": 1.3928, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.8833295106887817, + "learning_rate": 0.00012532759465890567, + "loss": 1.3991, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.7635900974273682, + "learning_rate": 0.00012394166110686857, + "loss": 1.396, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.7910550236701965, + "learning_rate": 0.0001225627428442143, + "loss": 1.3842, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.8160268068313599, + "learning_rate": 0.0001211908553223954, + "loss": 1.3837, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.7550303936004639, + "learning_rate": 0.00011982601391408115, + "loss": 1.3949, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 1.0125888586044312, + "learning_rate": 0.00011846823391298628, + "loss": 1.3847, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.81779545545578, + "learning_rate": 0.00011711753053369861, + "loss": 1.4034, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.7367184162139893, + "learning_rate": 0.00011577391891150901, + "loss": 1.3919, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.7033666968345642, + "learning_rate": 0.00011443741410224173, + "loss": 1.3886, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.7628786563873291, + "learning_rate": 0.00011310803108208581, + "loss": 1.3966, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.8438088893890381, + "learning_rate": 0.00011178578474742687, + "loss": 1.3863, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.8121126294136047, + "learning_rate": 0.00011047068991468118, + "loss": 1.3734, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.7043092250823975, + "learning_rate": 0.00010916276132012818, + "loss": 1.3809, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.8019934296607971, + "learning_rate": 0.00010786201361974646, + "loss": 1.3688, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.8266105055809021, + "learning_rate": 0.00010656846138904916, + "loss": 1.384, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.8277871012687683, + "learning_rate": 0.00010528211912292066, + "loss": 1.4036, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.7325411438941956, + "learning_rate": 0.0001040030012354542, + "loss": 1.4008, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.7594698667526245, + "learning_rate": 0.00010273112205979012, + "loss": 1.38, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 1.0727715492248535, + "learning_rate": 0.00010146649584795575, + "loss": 1.3904, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.8332909941673279, + "learning_rate": 0.0001002091367707053, + "loss": 1.385, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.7732603549957275, + "learning_rate": 9.895905891736118e-05, + "loss": 1.3961, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.7329246997833252, + "learning_rate": 9.771627629565599e-05, + "loss": 1.3899, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.7157188653945923, + "learning_rate": 9.648080283157604e-05, + "loss": 1.3945, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.7503757476806641, + "learning_rate": 9.525265236920452e-05, + "loss": 1.3781, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.8179202675819397, + "learning_rate": 9.40318386705673e-05, + "loss": 1.3796, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.773155152797699, + "learning_rate": 9.281837541547791e-05, + "loss": 1.3786, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.8069334626197815, + "learning_rate": 9.161227620138468e-05, + "loss": 1.3904, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.7726801037788391, + "learning_rate": 9.041355454321803e-05, + "loss": 1.3854, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.7428098320960999, + "learning_rate": 8.92222238732397e-05, + "loss": 1.392, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.904868483543396, + "learning_rate": 8.803829754089138e-05, + "loss": 1.3839, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.8663773536682129, + "learning_rate": 8.686178881264568e-05, + "loss": 1.3827, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.7745610475540161, + "learning_rate": 8.569271087185756e-05, + "loss": 1.3954, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.7131131887435913, + "learning_rate": 8.453107681861616e-05, + "loss": 1.3754, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.7543023228645325, + "learning_rate": 8.337689966959819e-05, + "loss": 1.3829, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.7432689070701599, + "learning_rate": 8.223019235792214e-05, + "loss": 1.388, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.7472406625747681, + "learning_rate": 8.109096773300348e-05, + "loss": 1.3908, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.7438907623291016, + "learning_rate": 7.995923856041013e-05, + "loss": 1.3785, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.8140907287597656, + "learning_rate": 7.883501752172038e-05, + "loss": 1.3725, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.7933176159858704, + "learning_rate": 7.771831721437989e-05, + "loss": 1.3925, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.7360539436340332, + "learning_rate": 7.660915015156067e-05, + "loss": 1.3805, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.7351184487342834, + "learning_rate": 7.55075287620215e-05, + "loss": 1.3812, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.7421064972877502, + "learning_rate": 7.441346538996769e-05, + "loss": 1.3803, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.7387655973434448, + "learning_rate": 7.332697229491373e-05, + "loss": 1.3739, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.731476902961731, + "learning_rate": 7.224806165154504e-05, + "loss": 1.386, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.7565726637840271, + "learning_rate": 7.117674554958253e-05, + "loss": 1.378, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.8654202222824097, + "learning_rate": 7.011303599364608e-05, + "loss": 1.3866, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.8074269890785217, + "learning_rate": 6.905694490312064e-05, + "loss": 1.3776, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.7434083819389343, + "learning_rate": 6.80084841120226e-05, + "loss": 1.3907, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.7444329261779785, + "learning_rate": 6.696766536886692e-05, + "loss": 1.3841, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.7350063323974609, + "learning_rate": 6.593450033653586e-05, + "loss": 1.3894, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.7732083797454834, + "learning_rate": 6.490900059214836e-05, + "loss": 1.3853, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.7255344390869141, + "learning_rate": 6.389117762692952e-05, + "loss": 1.3823, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.7467983365058899, + "learning_rate": 6.288104284608284e-05, + "loss": 1.3986, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.7956937551498413, + "learning_rate": 6.187860756866157e-05, + "loss": 1.3852, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.8788682818412781, + "learning_rate": 6.088388302744266e-05, + "loss": 1.385, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.7194802761077881, + "learning_rate": 5.9896880368800115e-05, + "loss": 1.368, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.8008246421813965, + "learning_rate": 5.891761065258089e-05, + "loss": 1.369, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.7183042168617249, + "learning_rate": 5.794608485198008e-05, + "loss": 1.3937, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.7317408919334412, + "learning_rate": 5.698231385341887e-05, + "loss": 1.3719, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.7183142304420471, + "learning_rate": 5.60263084564217e-05, + "loss": 1.3897, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.6817972660064697, + "learning_rate": 5.507807937349604e-05, + "loss": 1.385, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.7614268660545349, + "learning_rate": 5.413763723001164e-05, + "loss": 1.3705, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.7410838603973389, + "learning_rate": 5.320499256408204e-05, + "loss": 1.3757, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.7747814059257507, + "learning_rate": 5.228015582644585e-05, + "loss": 1.3697, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.7390511631965637, + "learning_rate": 5.136313738035059e-05, + "loss": 1.3805, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.7486708164215088, + "learning_rate": 5.045394750143567e-05, + "loss": 1.386, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.7123799324035645, + "learning_rate": 4.955259637761761e-05, + "loss": 1.3774, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.7088447213172913, + "learning_rate": 4.865909410897576e-05, + "loss": 1.3798, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.7922322750091553, + "learning_rate": 4.7773450707639414e-05, + "loss": 1.3955, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.7206838726997375, + "learning_rate": 4.6895676097675225e-05, + "loss": 1.3832, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.7517143487930298, + "learning_rate": 4.6025780114976545e-05, + "loss": 1.3784, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.7308545708656311, + "learning_rate": 4.5163772507152425e-05, + "loss": 1.3721, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.9061422348022461, + "learning_rate": 4.430966293341912e-05, + "loss": 1.3838, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.8228933811187744, + "learning_rate": 4.346346096449136e-05, + "loss": 1.3785, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.701607882976532, + "learning_rate": 4.26251760824754e-05, + "loss": 1.3797, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.6856865882873535, + "learning_rate": 4.179481768076274e-05, + "loss": 1.372, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.7319398522377014, + "learning_rate": 4.0972395063924554e-05, + "loss": 1.3797, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.6869888305664062, + "learning_rate": 4.015791744760811e-05, + "loss": 1.367, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.7039361596107483, + "learning_rate": 3.93513939584326e-05, + "loss": 1.3805, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.773228108882904, + "learning_rate": 3.855283363388762e-05, + "loss": 1.3758, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.7001714706420898, + "learning_rate": 3.7762245422231476e-05, + "loss": 1.3899, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.7151424884796143, + "learning_rate": 3.697963818239117e-05, + "loss": 1.387, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.7504922747612, + "learning_rate": 3.6205020683862836e-05, + "loss": 1.3769, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.6846266984939575, + "learning_rate": 3.543840160661396e-05, + "loss": 1.3667, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.8188782930374146, + "learning_rate": 3.467978954098549e-05, + "loss": 1.3868, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.7538011074066162, + "learning_rate": 3.392919298759623e-05, + "loss": 1.3778, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.9018068313598633, + "learning_rate": 3.318662035724679e-05, + "loss": 1.3819, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.7369102239608765, + "learning_rate": 3.2452079970826335e-05, + "loss": 1.3744, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.782171905040741, + "learning_rate": 3.172558005921841e-05, + "loss": 1.3774, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.7482620477676392, + "learning_rate": 3.100712876320924e-05, + "loss": 1.3733, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.7122312784194946, + "learning_rate": 3.029673413339651e-05, + "loss": 1.3834, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.7540768980979919, + "learning_rate": 2.959440413009895e-05, + "loss": 1.364, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.7224364280700684, + "learning_rate": 2.890014662326701e-05, + "loss": 1.3697, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.8223809003829956, + "learning_rate": 2.8213969392395233e-05, + "loss": 1.3711, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.6913205981254578, + "learning_rate": 2.7535880126434433e-05, + "loss": 1.3777, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.6961574554443359, + "learning_rate": 2.686588642370591e-05, + "loss": 1.3856, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.7171003818511963, + "learning_rate": 2.6203995791816372e-05, + "loss": 1.3793, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.7158347964286804, + "learning_rate": 2.5550215647573482e-05, + "loss": 1.3787, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.7596192955970764, + "learning_rate": 2.490455331690303e-05, + "loss": 1.3768, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.693540632724762, + "learning_rate": 2.4267016034766637e-05, + "loss": 1.3693, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.7140907645225525, + "learning_rate": 2.363761094508085e-05, + "loss": 1.3818, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.7264201641082764, + "learning_rate": 2.301634510063702e-05, + "loss": 1.3706, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.7321516275405884, + "learning_rate": 2.2403225463022288e-05, + "loss": 1.3682, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.7197241187095642, + "learning_rate": 2.1798258902541723e-05, + "loss": 1.3581, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.7272405624389648, + "learning_rate": 2.120145219814082e-05, + "loss": 1.3686, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.7011463046073914, + "learning_rate": 2.0612812037330202e-05, + "loss": 1.376, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.6905795335769653, + "learning_rate": 2.003234501611037e-05, + "loss": 1.3703, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.7261915802955627, + "learning_rate": 1.9460057638897578e-05, + "loss": 1.3693, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.7011500597000122, + "learning_rate": 1.8895956318451398e-05, + "loss": 1.3874, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.6646822094917297, + "learning_rate": 1.8340047375802693e-05, + "loss": 1.3745, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.7215378880500793, + "learning_rate": 1.7792337040182434e-05, + "loss": 1.3888, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.7035871744155884, + "learning_rate": 1.72528314489524e-05, + "loss": 1.3748, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.7532442808151245, + "learning_rate": 1.6721536647536255e-05, + "loss": 1.3729, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.7641000151634216, + "learning_rate": 1.6198458589351595e-05, + "loss": 1.374, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.7048219442367554, + "learning_rate": 1.568360313574349e-05, + "loss": 1.3752, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.7231506109237671, + "learning_rate": 1.517697605591864e-05, + "loss": 1.3789, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.675595223903656, + "learning_rate": 1.4678583026880993e-05, + "loss": 1.3779, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.6852805614471436, + "learning_rate": 1.4188429633367721e-05, + "loss": 1.3692, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.7769940495491028, + "learning_rate": 1.370652136778694e-05, + "loss": 1.3697, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.6934064030647278, + "learning_rate": 1.3232863630156077e-05, + "loss": 1.3753, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.7142991423606873, + "learning_rate": 1.2767461728041357e-05, + "loss": 1.3724, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.7094256281852722, + "learning_rate": 1.2310320876498333e-05, + "loss": 1.3722, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.6710363030433655, + "learning_rate": 1.186144619801352e-05, + "loss": 1.3754, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.712020993232727, + "learning_rate": 1.14208427224467e-05, + "loss": 1.3797, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.7879590392112732, + "learning_rate": 1.0988515386975206e-05, + "loss": 1.3747, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.7470480799674988, + "learning_rate": 1.0564469036037722e-05, + "loss": 1.3713, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.6829283237457275, + "learning_rate": 1.0148708421280822e-05, + "loss": 1.3774, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.6993764638900757, + "learning_rate": 9.74123820150502e-06, + "loss": 1.3688, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.7376212477684021, + "learning_rate": 9.342062942613222e-06, + "loss": 1.3696, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.7060539722442627, + "learning_rate": 8.9511871175591e-06, + "loss": 1.3701, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.7359106540679932, + "learning_rate": 8.568615106297223e-06, + "loss": 1.3912, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.7002447843551636, + "learning_rate": 8.194351195733585e-06, + "loss": 1.3648, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.6700918078422546, + "learning_rate": 7.828399579678153e-06, + "loss": 1.3766, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.7192947864532471, + "learning_rate": 7.470764358797566e-06, + "loss": 1.379, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.6700484752655029, + "learning_rate": 7.121449540568842e-06, + "loss": 1.3785, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.6870172619819641, + "learning_rate": 6.780459039235409e-06, + "loss": 1.3779, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.6844492554664612, + "learning_rate": 6.447796675762146e-06, + "loss": 1.3802, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.702568769454956, + "learning_rate": 6.123466177793247e-06, + "loss": 1.3598, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.7000457048416138, + "learning_rate": 5.807471179610418e-06, + "loss": 1.3707, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.7072151899337769, + "learning_rate": 5.499815222091836e-06, + "loss": 1.3797, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.727371096611023, + "learning_rate": 5.200501752672754e-06, + "loss": 1.3644, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.7074583172798157, + "learning_rate": 4.909534125306702e-06, + "loss": 1.3861, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.6938356757164001, + "learning_rate": 4.626915600428105e-06, + "loss": 1.3702, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.6885902881622314, + "learning_rate": 4.352649344915471e-06, + "loss": 1.3796, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.684018611907959, + "learning_rate": 4.086738432056092e-06, + "loss": 1.377, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.7056172490119934, + "learning_rate": 3.8291858415117344e-06, + "loss": 1.3744, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.7018168568611145, + "learning_rate": 3.579994459284752e-06, + "loss": 1.368, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.7419307231903076, + "learning_rate": 3.339167077686278e-06, + "loss": 1.3734, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.7201178669929504, + "learning_rate": 3.1067063953048313e-06, + "loss": 1.374, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.6971818804740906, + "learning_rate": 2.8826150169758425e-06, + "loss": 1.3716, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.6903793811798096, + "learning_rate": 2.66689545375251e-06, + "loss": 1.3627, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.6866322755813599, + "learning_rate": 2.4595501228779906e-06, + "loss": 1.356, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.6669610142707825, + "learning_rate": 2.2605813477579172e-06, + "loss": 1.3697, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.7245898246765137, + "learning_rate": 2.069991357934592e-06, + "loss": 1.3652, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.7006564140319824, + "learning_rate": 1.8877822890618346e-06, + "loss": 1.3783, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.6645061373710632, + "learning_rate": 1.7139561828813377e-06, + "loss": 1.364, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.6863107085227966, + "learning_rate": 1.5485149871995175e-06, + "loss": 1.3741, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.7346316576004028, + "learning_rate": 1.3914605558656146e-06, + "loss": 1.371, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.6920057535171509, + "learning_rate": 1.2427946487512941e-06, + "loss": 1.3683, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.7081940770149231, + "learning_rate": 1.1025189317305784e-06, + "loss": 1.3674, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.6696018576622009, + "learning_rate": 9.706349766615275e-07, + "loss": 1.3785, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.6865301728248596, + "learning_rate": 8.47144261368088e-07, + "loss": 1.3765, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.6761831045150757, + "learning_rate": 7.320481696241887e-07, + "loss": 1.3871, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.7286713719367981, + "learning_rate": 6.253479911375037e-07, + "loss": 1.3807, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.7000607252120972, + "learning_rate": 5.270449215358797e-07, + "loss": 1.377, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.6871421337127686, + "learning_rate": 4.371400623530142e-07, + "loss": 1.3625, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.7131569385528564, + "learning_rate": 3.5563442101696486e-07, + "loss": 1.3673, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.6850053668022156, + "learning_rate": 2.825289108379925e-07, + "loss": 1.3795, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.7431160807609558, + "learning_rate": 2.1782435099923503e-07, + "loss": 1.3565, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.6915649175643921, + "learning_rate": 1.6152146654671573e-07, + "loss": 1.3743, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.6918001770973206, + "learning_rate": 1.1362088838193229e-07, + "loss": 1.3759, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.7018936276435852, + "learning_rate": 7.412315325411312e-08, + "loss": 1.3702, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.6886297464370728, + "learning_rate": 4.302870375472168e-08, + "loss": 1.3826, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.694330632686615, + "learning_rate": 2.0337888312210727e-08, + "loss": 1.3656, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.6779256463050842, + "learning_rate": 6.050961188358573e-09, + "loss": 1.3773, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 2.03425931930542, + "learning_rate": 1.6808247493838026e-10, + "loss": 1.3688, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.3802653400776704e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-starcoder2-cosine/checkpoint-9480/training_args.bin b/saves-starcoder2-cosine/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2130579d88a4c7e7cd57b59ab7416710695867d6 --- /dev/null +++ b/saves-starcoder2-cosine/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e67e72323dc00aa0df7867bd6e8b68af9ec41397452f269a28e0823a775eb1e +size 5176 diff --git a/saves-starcoder2-cosine/config.json b/saves-starcoder2-cosine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7255245cb3ad011849e2e9471897dc5fbc33393a --- /dev/null +++ b/saves-starcoder2-cosine/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "Starcoder2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 50256, + "embedding_dropout": 0.0, + "eos_token_id": 50256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.018042, + "intermediate_size": 768, + "max_position_embeddings": 4096, + "model_type": "starcoder2", + "norm_epsilon": 1e-05, + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_dropout": 0.0, + "rope_theta": 10000.0, + "sliding_window": null, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_bias": true, + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-starcoder2-cosine/generation_config.json b/saves-starcoder2-cosine/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-starcoder2-cosine/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-starcoder2-cosine/model.safetensors b/saves-starcoder2-cosine/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..877fab80e0b7393112819f9f88d25582a2eaeb7c --- /dev/null +++ b/saves-starcoder2-cosine/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fde609862b7431958ec806051005c455e4d64887e7d5c4555bf6c9b704a177f +size 6794784 diff --git a/saves-starcoder2-cosine/result.log b/saves-starcoder2-cosine/result.log new file mode 100644 index 0000000000000000000000000000000000000000..95280e2162bc137b74a1101da346da809ba893c5 --- /dev/null +++ b/saves-starcoder2-cosine/result.log @@ -0,0 +1 @@ +{'train_runtime': 1786.4351, 'train_samples_per_second': 5433.505, 'train_steps_per_second': 5.307, 'train_loss': 1.6498495339341304, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-starcoder2-cosine/special_tokens_map.json b/saves-starcoder2-cosine/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-starcoder2-cosine/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-starcoder2-cosine/tokenizer.json b/saves-starcoder2-cosine/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-starcoder2-cosine/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-starcoder2-cosine/tokenizer_config.json b/saves-starcoder2-cosine/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-starcoder2-cosine/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-starcoder2/checkpoint-9480/config.json b/saves-starcoder2/checkpoint-9480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..89b9935265177c72718344a457fefeead4bef178 --- /dev/null +++ b/saves-starcoder2/checkpoint-9480/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "Starcoder2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 50256, + "embedding_dropout": 0.0, + "eos_token_id": 50256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.018042, + "intermediate_size": 1024, + "max_position_embeddings": 4096, + "model_type": "starcoder2", + "norm_epsilon": 1e-05, + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_dropout": 0.0, + "rope_theta": 10000.0, + "sliding_window": null, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_bias": true, + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-starcoder2/checkpoint-9480/generation_config.json b/saves-starcoder2/checkpoint-9480/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-starcoder2/checkpoint-9480/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-starcoder2/checkpoint-9480/model.safetensors b/saves-starcoder2/checkpoint-9480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e41a67626d9677746201bbe3362bceb54d2410d --- /dev/null +++ b/saves-starcoder2/checkpoint-9480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7806c14b37217c9069cfbd0b643396fd0605501bbc80add41003c3849b8ea504 +size 7845408 diff --git a/saves-starcoder2/checkpoint-9480/optimizer.pt b/saves-starcoder2/checkpoint-9480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..220daed0f66365a86eebb7a8b153d406314ac697 --- /dev/null +++ b/saves-starcoder2/checkpoint-9480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc125b5fb954f48f8757c78dc0d735655de46b8eb8bb0006a521b3eb4f716a6c +size 15713082 diff --git a/saves-starcoder2/checkpoint-9480/rng_state.pth b/saves-starcoder2/checkpoint-9480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f8291cd6ce87668b786a72f3e93d072fbe54902 --- /dev/null +++ b/saves-starcoder2/checkpoint-9480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c917636c7a58af68a29056522a757e9f9b99005b776641aa157c536967817d +size 14244 diff --git a/saves-starcoder2/checkpoint-9480/scheduler.pt b/saves-starcoder2/checkpoint-9480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..75fc58e9b05cc951a82cac092de91cd65804440d --- /dev/null +++ b/saves-starcoder2/checkpoint-9480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0a00f09d701c4f602dd021702c8cfc44bc37c286d3a858d845780a823871eb9 +size 1064 diff --git a/saves-starcoder2/checkpoint-9480/special_tokens_map.json b/saves-starcoder2/checkpoint-9480/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-starcoder2/checkpoint-9480/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-starcoder2/checkpoint-9480/tokenizer.json b/saves-starcoder2/checkpoint-9480/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-starcoder2/checkpoint-9480/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-starcoder2/checkpoint-9480/tokenizer_config.json b/saves-starcoder2/checkpoint-9480/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-starcoder2/checkpoint-9480/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/saves-starcoder2/checkpoint-9480/trainer_state.json b/saves-starcoder2/checkpoint-9480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bc0ef95060ce53b7e6ae0e4ed4f98628bc34be61 --- /dev/null +++ b/saves-starcoder2/checkpoint-9480/trainer_state.json @@ -0,0 +1,6669 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 9480, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010548523206751054, + "grad_norm": 1.380517840385437, + "learning_rate": 0.00015822784810126583, + "loss": 7.4375, + "step": 10 + }, + { + "epoch": 0.002109704641350211, + "grad_norm": 1.135229229927063, + "learning_rate": 0.00031645569620253165, + "loss": 6.8052, + "step": 20 + }, + { + "epoch": 0.0031645569620253164, + "grad_norm": 0.9208482503890991, + "learning_rate": 0.00047468354430379745, + "loss": 6.2147, + "step": 30 + }, + { + "epoch": 0.004219409282700422, + "grad_norm": 0.9341186881065369, + "learning_rate": 0.0006329113924050633, + "loss": 5.6896, + "step": 40 + }, + { + "epoch": 0.005274261603375527, + "grad_norm": 0.6837510466575623, + "learning_rate": 0.0007911392405063291, + "loss": 5.1997, + "step": 50 + }, + { + "epoch": 0.006329113924050633, + "grad_norm": 0.77073073387146, + "learning_rate": 0.0009493670886075949, + "loss": 4.7401, + "step": 60 + }, + { + "epoch": 0.007383966244725738, + "grad_norm": 1.9307482242584229, + "learning_rate": 0.0011075949367088608, + "loss": 4.3976, + "step": 70 + }, + { + "epoch": 0.008438818565400843, + "grad_norm": 1.3496501445770264, + "learning_rate": 0.0012658227848101266, + "loss": 4.1659, + "step": 80 + }, + { + "epoch": 0.00949367088607595, + "grad_norm": 1.4544936418533325, + "learning_rate": 0.0014240506329113926, + "loss": 3.9796, + "step": 90 + }, + { + "epoch": 0.010548523206751054, + "grad_norm": 1.0401917695999146, + "learning_rate": 0.0015, + "loss": 3.8571, + "step": 100 + }, + { + "epoch": 0.011603375527426161, + "grad_norm": 1.362817406654358, + "learning_rate": 0.0015, + "loss": 3.7164, + "step": 110 + }, + { + "epoch": 0.012658227848101266, + "grad_norm": 0.9374111294746399, + "learning_rate": 0.0015, + "loss": 3.6159, + "step": 120 + }, + { + "epoch": 0.013713080168776372, + "grad_norm": 1.087600588798523, + "learning_rate": 0.0015, + "loss": 3.5206, + "step": 130 + }, + { + "epoch": 0.014767932489451477, + "grad_norm": 1.0248064994812012, + "learning_rate": 0.0015, + "loss": 3.4246, + "step": 140 + }, + { + "epoch": 0.015822784810126583, + "grad_norm": 1.352480173110962, + "learning_rate": 0.0015, + "loss": 3.3594, + "step": 150 + }, + { + "epoch": 0.016877637130801686, + "grad_norm": 1.035747766494751, + "learning_rate": 0.0015, + "loss": 3.2929, + "step": 160 + }, + { + "epoch": 0.017932489451476793, + "grad_norm": 1.3004776239395142, + "learning_rate": 0.0015, + "loss": 3.2292, + "step": 170 + }, + { + "epoch": 0.0189873417721519, + "grad_norm": 0.9723942279815674, + "learning_rate": 0.0015, + "loss": 3.1859, + "step": 180 + }, + { + "epoch": 0.020042194092827006, + "grad_norm": 0.8211439847946167, + "learning_rate": 0.0015, + "loss": 3.1227, + "step": 190 + }, + { + "epoch": 0.02109704641350211, + "grad_norm": 0.962811291217804, + "learning_rate": 0.0015, + "loss": 3.0747, + "step": 200 + }, + { + "epoch": 0.022151898734177215, + "grad_norm": 1.2249480485916138, + "learning_rate": 0.0015, + "loss": 3.0323, + "step": 210 + }, + { + "epoch": 0.023206751054852322, + "grad_norm": 1.4821194410324097, + "learning_rate": 0.0015, + "loss": 2.9995, + "step": 220 + }, + { + "epoch": 0.024261603375527425, + "grad_norm": 0.994897723197937, + "learning_rate": 0.0015, + "loss": 2.9458, + "step": 230 + }, + { + "epoch": 0.02531645569620253, + "grad_norm": 1.2094449996948242, + "learning_rate": 0.0015, + "loss": 2.9039, + "step": 240 + }, + { + "epoch": 0.026371308016877638, + "grad_norm": 1.0856186151504517, + "learning_rate": 0.0015, + "loss": 2.87, + "step": 250 + }, + { + "epoch": 0.027426160337552744, + "grad_norm": 1.3587604761123657, + "learning_rate": 0.0015, + "loss": 2.8315, + "step": 260 + }, + { + "epoch": 0.028481012658227847, + "grad_norm": 1.0936589241027832, + "learning_rate": 0.0015, + "loss": 2.798, + "step": 270 + }, + { + "epoch": 0.029535864978902954, + "grad_norm": 1.2068792581558228, + "learning_rate": 0.0015, + "loss": 2.766, + "step": 280 + }, + { + "epoch": 0.03059071729957806, + "grad_norm": 1.3179926872253418, + "learning_rate": 0.0015, + "loss": 2.7404, + "step": 290 + }, + { + "epoch": 0.03164556962025317, + "grad_norm": 1.551661491394043, + "learning_rate": 0.0015, + "loss": 2.7158, + "step": 300 + }, + { + "epoch": 0.03270042194092827, + "grad_norm": 1.0468871593475342, + "learning_rate": 0.0015, + "loss": 2.6881, + "step": 310 + }, + { + "epoch": 0.03375527426160337, + "grad_norm": 0.875356912612915, + "learning_rate": 0.0015, + "loss": 2.6514, + "step": 320 + }, + { + "epoch": 0.03481012658227848, + "grad_norm": 1.027299404144287, + "learning_rate": 0.0015, + "loss": 2.6441, + "step": 330 + }, + { + "epoch": 0.035864978902953586, + "grad_norm": 0.9613593816757202, + "learning_rate": 0.0015, + "loss": 2.6188, + "step": 340 + }, + { + "epoch": 0.03691983122362869, + "grad_norm": 1.697529911994934, + "learning_rate": 0.0015, + "loss": 2.5769, + "step": 350 + }, + { + "epoch": 0.0379746835443038, + "grad_norm": 2.06827449798584, + "learning_rate": 0.0015, + "loss": 2.5619, + "step": 360 + }, + { + "epoch": 0.039029535864978905, + "grad_norm": 1.0375722646713257, + "learning_rate": 0.0015, + "loss": 2.5499, + "step": 370 + }, + { + "epoch": 0.04008438818565401, + "grad_norm": 1.172898769378662, + "learning_rate": 0.0015, + "loss": 2.5317, + "step": 380 + }, + { + "epoch": 0.04113924050632911, + "grad_norm": 1.221989393234253, + "learning_rate": 0.0015, + "loss": 2.5101, + "step": 390 + }, + { + "epoch": 0.04219409282700422, + "grad_norm": 1.3337557315826416, + "learning_rate": 0.0015, + "loss": 2.4868, + "step": 400 + }, + { + "epoch": 0.043248945147679324, + "grad_norm": 1.4051837921142578, + "learning_rate": 0.0015, + "loss": 2.4743, + "step": 410 + }, + { + "epoch": 0.04430379746835443, + "grad_norm": 1.1877429485321045, + "learning_rate": 0.0015, + "loss": 2.4525, + "step": 420 + }, + { + "epoch": 0.04535864978902954, + "grad_norm": 1.0655913352966309, + "learning_rate": 0.0015, + "loss": 2.43, + "step": 430 + }, + { + "epoch": 0.046413502109704644, + "grad_norm": 1.141899824142456, + "learning_rate": 0.0015, + "loss": 2.4043, + "step": 440 + }, + { + "epoch": 0.04746835443037975, + "grad_norm": 1.1890851259231567, + "learning_rate": 0.0015, + "loss": 2.4093, + "step": 450 + }, + { + "epoch": 0.04852320675105485, + "grad_norm": 1.4701390266418457, + "learning_rate": 0.0015, + "loss": 2.3905, + "step": 460 + }, + { + "epoch": 0.049578059071729956, + "grad_norm": 0.9667548537254333, + "learning_rate": 0.0015, + "loss": 2.3679, + "step": 470 + }, + { + "epoch": 0.05063291139240506, + "grad_norm": 1.0652215480804443, + "learning_rate": 0.0015, + "loss": 2.3652, + "step": 480 + }, + { + "epoch": 0.05168776371308017, + "grad_norm": 1.0762356519699097, + "learning_rate": 0.0015, + "loss": 2.3402, + "step": 490 + }, + { + "epoch": 0.052742616033755275, + "grad_norm": 1.7024035453796387, + "learning_rate": 0.0015, + "loss": 2.345, + "step": 500 + }, + { + "epoch": 0.05379746835443038, + "grad_norm": 1.5398619174957275, + "learning_rate": 0.0015, + "loss": 2.324, + "step": 510 + }, + { + "epoch": 0.05485232067510549, + "grad_norm": 1.2019097805023193, + "learning_rate": 0.0015, + "loss": 2.3001, + "step": 520 + }, + { + "epoch": 0.05590717299578059, + "grad_norm": 1.011959433555603, + "learning_rate": 0.0015, + "loss": 2.2824, + "step": 530 + }, + { + "epoch": 0.056962025316455694, + "grad_norm": 1.2221145629882812, + "learning_rate": 0.0015, + "loss": 2.2679, + "step": 540 + }, + { + "epoch": 0.0580168776371308, + "grad_norm": 0.9527300000190735, + "learning_rate": 0.0015, + "loss": 2.2739, + "step": 550 + }, + { + "epoch": 0.05907172995780591, + "grad_norm": 0.987117350101471, + "learning_rate": 0.0015, + "loss": 2.2353, + "step": 560 + }, + { + "epoch": 0.060126582278481014, + "grad_norm": 1.1012482643127441, + "learning_rate": 0.0015, + "loss": 2.2409, + "step": 570 + }, + { + "epoch": 0.06118143459915612, + "grad_norm": 1.1162430047988892, + "learning_rate": 0.0015, + "loss": 2.2422, + "step": 580 + }, + { + "epoch": 0.06223628691983123, + "grad_norm": 0.878807544708252, + "learning_rate": 0.0015, + "loss": 2.2114, + "step": 590 + }, + { + "epoch": 0.06329113924050633, + "grad_norm": 1.137564778327942, + "learning_rate": 0.0015, + "loss": 2.1939, + "step": 600 + }, + { + "epoch": 0.06434599156118144, + "grad_norm": 0.899512529373169, + "learning_rate": 0.0015, + "loss": 2.1952, + "step": 610 + }, + { + "epoch": 0.06540084388185655, + "grad_norm": 0.8190832138061523, + "learning_rate": 0.0015, + "loss": 2.1789, + "step": 620 + }, + { + "epoch": 0.06645569620253164, + "grad_norm": 0.9665809869766235, + "learning_rate": 0.0015, + "loss": 2.1638, + "step": 630 + }, + { + "epoch": 0.06751054852320675, + "grad_norm": 1.1866236925125122, + "learning_rate": 0.0015, + "loss": 2.1743, + "step": 640 + }, + { + "epoch": 0.06856540084388185, + "grad_norm": 0.8200702667236328, + "learning_rate": 0.0015, + "loss": 2.1562, + "step": 650 + }, + { + "epoch": 0.06962025316455696, + "grad_norm": 0.8939485549926758, + "learning_rate": 0.0015, + "loss": 2.1407, + "step": 660 + }, + { + "epoch": 0.07067510548523206, + "grad_norm": 1.5228382349014282, + "learning_rate": 0.0015, + "loss": 2.1267, + "step": 670 + }, + { + "epoch": 0.07172995780590717, + "grad_norm": 1.5638633966445923, + "learning_rate": 0.0015, + "loss": 2.1427, + "step": 680 + }, + { + "epoch": 0.07278481012658228, + "grad_norm": 1.4032880067825317, + "learning_rate": 0.0015, + "loss": 2.1301, + "step": 690 + }, + { + "epoch": 0.07383966244725738, + "grad_norm": 1.469533085823059, + "learning_rate": 0.0015, + "loss": 2.1017, + "step": 700 + }, + { + "epoch": 0.07489451476793249, + "grad_norm": 1.485270380973816, + "learning_rate": 0.0015, + "loss": 2.0891, + "step": 710 + }, + { + "epoch": 0.0759493670886076, + "grad_norm": 1.0954358577728271, + "learning_rate": 0.0015, + "loss": 2.0719, + "step": 720 + }, + { + "epoch": 0.0770042194092827, + "grad_norm": 1.1161761283874512, + "learning_rate": 0.0015, + "loss": 2.0604, + "step": 730 + }, + { + "epoch": 0.07805907172995781, + "grad_norm": 0.9234256148338318, + "learning_rate": 0.0015, + "loss": 2.08, + "step": 740 + }, + { + "epoch": 0.07911392405063292, + "grad_norm": 0.9153168201446533, + "learning_rate": 0.0015, + "loss": 2.0704, + "step": 750 + }, + { + "epoch": 0.08016877637130802, + "grad_norm": 0.9943775534629822, + "learning_rate": 0.0015, + "loss": 2.0653, + "step": 760 + }, + { + "epoch": 0.08122362869198312, + "grad_norm": 0.9319306015968323, + "learning_rate": 0.0015, + "loss": 2.0563, + "step": 770 + }, + { + "epoch": 0.08227848101265822, + "grad_norm": 0.8930582404136658, + "learning_rate": 0.0015, + "loss": 2.0547, + "step": 780 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 1.2218706607818604, + "learning_rate": 0.0015, + "loss": 2.0572, + "step": 790 + }, + { + "epoch": 0.08438818565400844, + "grad_norm": 1.0489214658737183, + "learning_rate": 0.0015, + "loss": 2.0394, + "step": 800 + }, + { + "epoch": 0.08544303797468354, + "grad_norm": 0.8977221846580505, + "learning_rate": 0.0015, + "loss": 2.0095, + "step": 810 + }, + { + "epoch": 0.08649789029535865, + "grad_norm": 0.820949137210846, + "learning_rate": 0.0015, + "loss": 2.0168, + "step": 820 + }, + { + "epoch": 0.08755274261603375, + "grad_norm": 0.9163674712181091, + "learning_rate": 0.0015, + "loss": 2.0237, + "step": 830 + }, + { + "epoch": 0.08860759493670886, + "grad_norm": 0.8781024813652039, + "learning_rate": 0.0015, + "loss": 2.0031, + "step": 840 + }, + { + "epoch": 0.08966244725738397, + "grad_norm": 1.0236303806304932, + "learning_rate": 0.0015, + "loss": 2.0075, + "step": 850 + }, + { + "epoch": 0.09071729957805907, + "grad_norm": 1.1334477663040161, + "learning_rate": 0.0015, + "loss": 2.0097, + "step": 860 + }, + { + "epoch": 0.09177215189873418, + "grad_norm": 0.9466424584388733, + "learning_rate": 0.0015, + "loss": 1.9935, + "step": 870 + }, + { + "epoch": 0.09282700421940929, + "grad_norm": 0.7779718637466431, + "learning_rate": 0.0015, + "loss": 1.9783, + "step": 880 + }, + { + "epoch": 0.0938818565400844, + "grad_norm": 0.9219574332237244, + "learning_rate": 0.0015, + "loss": 1.9767, + "step": 890 + }, + { + "epoch": 0.0949367088607595, + "grad_norm": 0.7957385182380676, + "learning_rate": 0.0015, + "loss": 1.9751, + "step": 900 + }, + { + "epoch": 0.09599156118143459, + "grad_norm": 0.8075884580612183, + "learning_rate": 0.0015, + "loss": 1.985, + "step": 910 + }, + { + "epoch": 0.0970464135021097, + "grad_norm": 0.8959895372390747, + "learning_rate": 0.0015, + "loss": 1.9678, + "step": 920 + }, + { + "epoch": 0.0981012658227848, + "grad_norm": 0.7862400412559509, + "learning_rate": 0.0015, + "loss": 1.9545, + "step": 930 + }, + { + "epoch": 0.09915611814345991, + "grad_norm": 0.7486106157302856, + "learning_rate": 0.0015, + "loss": 1.9585, + "step": 940 + }, + { + "epoch": 0.10021097046413502, + "grad_norm": 0.7733015418052673, + "learning_rate": 0.0015, + "loss": 1.9395, + "step": 950 + }, + { + "epoch": 0.10126582278481013, + "grad_norm": 1.0001468658447266, + "learning_rate": 0.0015, + "loss": 1.9475, + "step": 960 + }, + { + "epoch": 0.10232067510548523, + "grad_norm": 1.235973596572876, + "learning_rate": 0.0015, + "loss": 1.9476, + "step": 970 + }, + { + "epoch": 0.10337552742616034, + "grad_norm": 0.8393082618713379, + "learning_rate": 0.0015, + "loss": 1.926, + "step": 980 + }, + { + "epoch": 0.10443037974683544, + "grad_norm": 0.9256130456924438, + "learning_rate": 0.0015, + "loss": 1.9229, + "step": 990 + }, + { + "epoch": 0.10548523206751055, + "grad_norm": 0.9824784398078918, + "learning_rate": 0.0015, + "loss": 1.9333, + "step": 1000 + }, + { + "epoch": 0.10654008438818566, + "grad_norm": 0.8495360016822815, + "learning_rate": 0.0015, + "loss": 1.9224, + "step": 1010 + }, + { + "epoch": 0.10759493670886076, + "grad_norm": 1.0773872137069702, + "learning_rate": 0.0015, + "loss": 1.9162, + "step": 1020 + }, + { + "epoch": 0.10864978902953587, + "grad_norm": 1.3959119319915771, + "learning_rate": 0.0015, + "loss": 1.9236, + "step": 1030 + }, + { + "epoch": 0.10970464135021098, + "grad_norm": 0.9055245518684387, + "learning_rate": 0.0015, + "loss": 1.9032, + "step": 1040 + }, + { + "epoch": 0.11075949367088607, + "grad_norm": 1.3100861310958862, + "learning_rate": 0.0015, + "loss": 1.8989, + "step": 1050 + }, + { + "epoch": 0.11181434599156118, + "grad_norm": 0.8331581950187683, + "learning_rate": 0.0015, + "loss": 1.8923, + "step": 1060 + }, + { + "epoch": 0.11286919831223628, + "grad_norm": 0.7473538517951965, + "learning_rate": 0.0015, + "loss": 1.8811, + "step": 1070 + }, + { + "epoch": 0.11392405063291139, + "grad_norm": 1.0861890316009521, + "learning_rate": 0.0015, + "loss": 1.8849, + "step": 1080 + }, + { + "epoch": 0.1149789029535865, + "grad_norm": 1.1819864511489868, + "learning_rate": 0.0015, + "loss": 1.8956, + "step": 1090 + }, + { + "epoch": 0.1160337552742616, + "grad_norm": 1.0402820110321045, + "learning_rate": 0.0015, + "loss": 1.8816, + "step": 1100 + }, + { + "epoch": 0.11708860759493671, + "grad_norm": 0.7219258546829224, + "learning_rate": 0.0015, + "loss": 1.8826, + "step": 1110 + }, + { + "epoch": 0.11814345991561181, + "grad_norm": 0.7776960134506226, + "learning_rate": 0.0015, + "loss": 1.8733, + "step": 1120 + }, + { + "epoch": 0.11919831223628692, + "grad_norm": 1.0068633556365967, + "learning_rate": 0.0015, + "loss": 1.8611, + "step": 1130 + }, + { + "epoch": 0.12025316455696203, + "grad_norm": 0.7182141542434692, + "learning_rate": 0.0015, + "loss": 1.8736, + "step": 1140 + }, + { + "epoch": 0.12130801687763713, + "grad_norm": 1.2758862972259521, + "learning_rate": 0.0015, + "loss": 1.8607, + "step": 1150 + }, + { + "epoch": 0.12236286919831224, + "grad_norm": 0.7729710936546326, + "learning_rate": 0.0015, + "loss": 1.8682, + "step": 1160 + }, + { + "epoch": 0.12341772151898735, + "grad_norm": 0.824714720249176, + "learning_rate": 0.0015, + "loss": 1.8553, + "step": 1170 + }, + { + "epoch": 0.12447257383966245, + "grad_norm": 0.7625918984413147, + "learning_rate": 0.0015, + "loss": 1.8436, + "step": 1180 + }, + { + "epoch": 0.12552742616033755, + "grad_norm": 0.7074143886566162, + "learning_rate": 0.0015, + "loss": 1.8445, + "step": 1190 + }, + { + "epoch": 0.12658227848101267, + "grad_norm": 0.8470212817192078, + "learning_rate": 0.0015, + "loss": 1.8557, + "step": 1200 + }, + { + "epoch": 0.12763713080168776, + "grad_norm": 0.8082977533340454, + "learning_rate": 0.0015, + "loss": 1.8484, + "step": 1210 + }, + { + "epoch": 0.12869198312236288, + "grad_norm": 0.7619545459747314, + "learning_rate": 0.0015, + "loss": 1.8302, + "step": 1220 + }, + { + "epoch": 0.12974683544303797, + "grad_norm": 0.780472457408905, + "learning_rate": 0.0015, + "loss": 1.8249, + "step": 1230 + }, + { + "epoch": 0.1308016877637131, + "grad_norm": 0.9205415844917297, + "learning_rate": 0.0015, + "loss": 1.84, + "step": 1240 + }, + { + "epoch": 0.13185654008438819, + "grad_norm": 0.7577593922615051, + "learning_rate": 0.0015, + "loss": 1.8432, + "step": 1250 + }, + { + "epoch": 0.13291139240506328, + "grad_norm": 1.1202126741409302, + "learning_rate": 0.0015, + "loss": 1.8246, + "step": 1260 + }, + { + "epoch": 0.1339662447257384, + "grad_norm": 0.751420259475708, + "learning_rate": 0.0015, + "loss": 1.8198, + "step": 1270 + }, + { + "epoch": 0.1350210970464135, + "grad_norm": 0.9752739667892456, + "learning_rate": 0.0015, + "loss": 1.8334, + "step": 1280 + }, + { + "epoch": 0.1360759493670886, + "grad_norm": 0.911014199256897, + "learning_rate": 0.0015, + "loss": 1.8245, + "step": 1290 + }, + { + "epoch": 0.1371308016877637, + "grad_norm": 0.7375240325927734, + "learning_rate": 0.0015, + "loss": 1.8096, + "step": 1300 + }, + { + "epoch": 0.13818565400843882, + "grad_norm": 0.7254061102867126, + "learning_rate": 0.0015, + "loss": 1.8084, + "step": 1310 + }, + { + "epoch": 0.13924050632911392, + "grad_norm": 0.7882977724075317, + "learning_rate": 0.0015, + "loss": 1.8112, + "step": 1320 + }, + { + "epoch": 0.14029535864978904, + "grad_norm": 0.9537419080734253, + "learning_rate": 0.0015, + "loss": 1.8067, + "step": 1330 + }, + { + "epoch": 0.14135021097046413, + "grad_norm": 0.800441563129425, + "learning_rate": 0.0015, + "loss": 1.8063, + "step": 1340 + }, + { + "epoch": 0.14240506329113925, + "grad_norm": 0.8895426988601685, + "learning_rate": 0.0015, + "loss": 1.8095, + "step": 1350 + }, + { + "epoch": 0.14345991561181434, + "grad_norm": 0.8667899370193481, + "learning_rate": 0.0015, + "loss": 1.8014, + "step": 1360 + }, + { + "epoch": 0.14451476793248946, + "grad_norm": 0.7451757192611694, + "learning_rate": 0.0015, + "loss": 1.7951, + "step": 1370 + }, + { + "epoch": 0.14556962025316456, + "grad_norm": 0.6888129711151123, + "learning_rate": 0.0015, + "loss": 1.7856, + "step": 1380 + }, + { + "epoch": 0.14662447257383968, + "grad_norm": 1.1507620811462402, + "learning_rate": 0.0015, + "loss": 1.7963, + "step": 1390 + }, + { + "epoch": 0.14767932489451477, + "grad_norm": 0.9323578476905823, + "learning_rate": 0.0015, + "loss": 1.7842, + "step": 1400 + }, + { + "epoch": 0.14873417721518986, + "grad_norm": 0.7493823766708374, + "learning_rate": 0.0015, + "loss": 1.7827, + "step": 1410 + }, + { + "epoch": 0.14978902953586498, + "grad_norm": 0.8317467570304871, + "learning_rate": 0.0015, + "loss": 1.7905, + "step": 1420 + }, + { + "epoch": 0.15084388185654007, + "grad_norm": 0.814274787902832, + "learning_rate": 0.0015, + "loss": 1.7812, + "step": 1430 + }, + { + "epoch": 0.1518987341772152, + "grad_norm": 0.7995831966400146, + "learning_rate": 0.0015, + "loss": 1.7779, + "step": 1440 + }, + { + "epoch": 0.1529535864978903, + "grad_norm": 1.2135629653930664, + "learning_rate": 0.0015, + "loss": 1.7878, + "step": 1450 + }, + { + "epoch": 0.1540084388185654, + "grad_norm": 1.0643233060836792, + "learning_rate": 0.0015, + "loss": 1.7735, + "step": 1460 + }, + { + "epoch": 0.1550632911392405, + "grad_norm": 1.0490831136703491, + "learning_rate": 0.0015, + "loss": 1.7631, + "step": 1470 + }, + { + "epoch": 0.15611814345991562, + "grad_norm": 0.7500695586204529, + "learning_rate": 0.0015, + "loss": 1.7674, + "step": 1480 + }, + { + "epoch": 0.1571729957805907, + "grad_norm": 0.7511032223701477, + "learning_rate": 0.0015, + "loss": 1.7688, + "step": 1490 + }, + { + "epoch": 0.15822784810126583, + "grad_norm": 0.7045536637306213, + "learning_rate": 0.0015, + "loss": 1.7651, + "step": 1500 + }, + { + "epoch": 0.15928270042194093, + "grad_norm": 0.7500197291374207, + "learning_rate": 0.0015, + "loss": 1.7678, + "step": 1510 + }, + { + "epoch": 0.16033755274261605, + "grad_norm": 0.7548623085021973, + "learning_rate": 0.0015, + "loss": 1.7699, + "step": 1520 + }, + { + "epoch": 0.16139240506329114, + "grad_norm": 0.7176685333251953, + "learning_rate": 0.0015, + "loss": 1.7536, + "step": 1530 + }, + { + "epoch": 0.16244725738396623, + "grad_norm": 0.7291255593299866, + "learning_rate": 0.0015, + "loss": 1.7427, + "step": 1540 + }, + { + "epoch": 0.16350210970464135, + "grad_norm": 0.9021326899528503, + "learning_rate": 0.0015, + "loss": 1.7449, + "step": 1550 + }, + { + "epoch": 0.16455696202531644, + "grad_norm": 0.7925799489021301, + "learning_rate": 0.0015, + "loss": 1.749, + "step": 1560 + }, + { + "epoch": 0.16561181434599156, + "grad_norm": 0.8485352396965027, + "learning_rate": 0.0015, + "loss": 1.7378, + "step": 1570 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 0.693575918674469, + "learning_rate": 0.0015, + "loss": 1.7449, + "step": 1580 + }, + { + "epoch": 0.16772151898734178, + "grad_norm": 1.1650432348251343, + "learning_rate": 0.0015, + "loss": 1.7417, + "step": 1590 + }, + { + "epoch": 0.16877637130801687, + "grad_norm": 0.7259058356285095, + "learning_rate": 0.0015, + "loss": 1.7483, + "step": 1600 + }, + { + "epoch": 0.169831223628692, + "grad_norm": 0.7936257123947144, + "learning_rate": 0.0015, + "loss": 1.7474, + "step": 1610 + }, + { + "epoch": 0.17088607594936708, + "grad_norm": 0.8029617071151733, + "learning_rate": 0.0015, + "loss": 1.7153, + "step": 1620 + }, + { + "epoch": 0.1719409282700422, + "grad_norm": 1.1223828792572021, + "learning_rate": 0.0015, + "loss": 1.7305, + "step": 1630 + }, + { + "epoch": 0.1729957805907173, + "grad_norm": 0.7719654440879822, + "learning_rate": 0.0015, + "loss": 1.7343, + "step": 1640 + }, + { + "epoch": 0.17405063291139242, + "grad_norm": 1.1776994466781616, + "learning_rate": 0.0015, + "loss": 1.7241, + "step": 1650 + }, + { + "epoch": 0.1751054852320675, + "grad_norm": 0.718641459941864, + "learning_rate": 0.0015, + "loss": 1.7233, + "step": 1660 + }, + { + "epoch": 0.17616033755274263, + "grad_norm": 0.8557989597320557, + "learning_rate": 0.0015, + "loss": 1.7324, + "step": 1670 + }, + { + "epoch": 0.17721518987341772, + "grad_norm": 2.0041747093200684, + "learning_rate": 0.0015, + "loss": 1.7233, + "step": 1680 + }, + { + "epoch": 0.17827004219409281, + "grad_norm": 1.0094479322433472, + "learning_rate": 0.0015, + "loss": 1.7213, + "step": 1690 + }, + { + "epoch": 0.17932489451476794, + "grad_norm": 0.7915872931480408, + "learning_rate": 0.0015, + "loss": 1.7231, + "step": 1700 + }, + { + "epoch": 0.18037974683544303, + "grad_norm": 0.7157123684883118, + "learning_rate": 0.0015, + "loss": 1.7025, + "step": 1710 + }, + { + "epoch": 0.18143459915611815, + "grad_norm": 0.736443817615509, + "learning_rate": 0.0015, + "loss": 1.705, + "step": 1720 + }, + { + "epoch": 0.18248945147679324, + "grad_norm": 0.8941551446914673, + "learning_rate": 0.0015, + "loss": 1.7097, + "step": 1730 + }, + { + "epoch": 0.18354430379746836, + "grad_norm": 0.7566021680831909, + "learning_rate": 0.0015, + "loss": 1.7175, + "step": 1740 + }, + { + "epoch": 0.18459915611814345, + "grad_norm": 0.7393425703048706, + "learning_rate": 0.0015, + "loss": 1.7354, + "step": 1750 + }, + { + "epoch": 0.18565400843881857, + "grad_norm": 0.8968936800956726, + "learning_rate": 0.0015, + "loss": 1.7097, + "step": 1760 + }, + { + "epoch": 0.18670886075949367, + "grad_norm": 0.822433352470398, + "learning_rate": 0.0015, + "loss": 1.7137, + "step": 1770 + }, + { + "epoch": 0.1877637130801688, + "grad_norm": 1.120972990989685, + "learning_rate": 0.0015, + "loss": 1.7135, + "step": 1780 + }, + { + "epoch": 0.18881856540084388, + "grad_norm": 1.3097538948059082, + "learning_rate": 0.0015, + "loss": 1.7165, + "step": 1790 + }, + { + "epoch": 0.189873417721519, + "grad_norm": 0.9212289452552795, + "learning_rate": 0.0015, + "loss": 1.7114, + "step": 1800 + }, + { + "epoch": 0.1909282700421941, + "grad_norm": 0.6948496699333191, + "learning_rate": 0.0015, + "loss": 1.6932, + "step": 1810 + }, + { + "epoch": 0.19198312236286919, + "grad_norm": 0.8515973687171936, + "learning_rate": 0.0015, + "loss": 1.6945, + "step": 1820 + }, + { + "epoch": 0.1930379746835443, + "grad_norm": 0.7402908802032471, + "learning_rate": 0.0015, + "loss": 1.694, + "step": 1830 + }, + { + "epoch": 0.1940928270042194, + "grad_norm": 0.8134198188781738, + "learning_rate": 0.0015, + "loss": 1.6939, + "step": 1840 + }, + { + "epoch": 0.19514767932489452, + "grad_norm": 0.8519315123558044, + "learning_rate": 0.0015, + "loss": 1.6961, + "step": 1850 + }, + { + "epoch": 0.1962025316455696, + "grad_norm": 0.775204598903656, + "learning_rate": 0.0015, + "loss": 1.6987, + "step": 1860 + }, + { + "epoch": 0.19725738396624473, + "grad_norm": 0.7700886130332947, + "learning_rate": 0.0015, + "loss": 1.6997, + "step": 1870 + }, + { + "epoch": 0.19831223628691982, + "grad_norm": 0.8577374219894409, + "learning_rate": 0.0015, + "loss": 1.6842, + "step": 1880 + }, + { + "epoch": 0.19936708860759494, + "grad_norm": 0.7702552676200867, + "learning_rate": 0.0015, + "loss": 1.6922, + "step": 1890 + }, + { + "epoch": 0.20042194092827004, + "grad_norm": 0.8590148091316223, + "learning_rate": 0.0015, + "loss": 1.6874, + "step": 1900 + }, + { + "epoch": 0.20147679324894516, + "grad_norm": 0.7567857503890991, + "learning_rate": 0.0015, + "loss": 1.686, + "step": 1910 + }, + { + "epoch": 0.20253164556962025, + "grad_norm": 0.8051232695579529, + "learning_rate": 0.0015, + "loss": 1.6881, + "step": 1920 + }, + { + "epoch": 0.20358649789029537, + "grad_norm": 0.9527768492698669, + "learning_rate": 0.0015, + "loss": 1.6856, + "step": 1930 + }, + { + "epoch": 0.20464135021097046, + "grad_norm": 0.7400715947151184, + "learning_rate": 0.0015, + "loss": 1.6758, + "step": 1940 + }, + { + "epoch": 0.20569620253164558, + "grad_norm": 0.737914502620697, + "learning_rate": 0.0015, + "loss": 1.6774, + "step": 1950 + }, + { + "epoch": 0.20675105485232068, + "grad_norm": 0.7497859597206116, + "learning_rate": 0.0015, + "loss": 1.6792, + "step": 1960 + }, + { + "epoch": 0.20780590717299577, + "grad_norm": 0.6914311051368713, + "learning_rate": 0.0015, + "loss": 1.6907, + "step": 1970 + }, + { + "epoch": 0.2088607594936709, + "grad_norm": 0.9675593972206116, + "learning_rate": 0.0015, + "loss": 1.6724, + "step": 1980 + }, + { + "epoch": 0.20991561181434598, + "grad_norm": 0.8558260798454285, + "learning_rate": 0.0015, + "loss": 1.6682, + "step": 1990 + }, + { + "epoch": 0.2109704641350211, + "grad_norm": 0.8800602555274963, + "learning_rate": 0.0015, + "loss": 1.6666, + "step": 2000 + }, + { + "epoch": 0.2120253164556962, + "grad_norm": 0.9772295951843262, + "learning_rate": 0.0015, + "loss": 1.675, + "step": 2010 + }, + { + "epoch": 0.21308016877637131, + "grad_norm": 1.2774696350097656, + "learning_rate": 0.0015, + "loss": 1.6867, + "step": 2020 + }, + { + "epoch": 0.2141350210970464, + "grad_norm": 0.8352954983711243, + "learning_rate": 0.0015, + "loss": 1.672, + "step": 2030 + }, + { + "epoch": 0.21518987341772153, + "grad_norm": 0.8202356100082397, + "learning_rate": 0.0015, + "loss": 1.6726, + "step": 2040 + }, + { + "epoch": 0.21624472573839662, + "grad_norm": 0.7393966913223267, + "learning_rate": 0.0015, + "loss": 1.6695, + "step": 2050 + }, + { + "epoch": 0.21729957805907174, + "grad_norm": 0.7405022382736206, + "learning_rate": 0.0015, + "loss": 1.6602, + "step": 2060 + }, + { + "epoch": 0.21835443037974683, + "grad_norm": 0.8124085664749146, + "learning_rate": 0.0015, + "loss": 1.662, + "step": 2070 + }, + { + "epoch": 0.21940928270042195, + "grad_norm": 0.7155125141143799, + "learning_rate": 0.0015, + "loss": 1.6661, + "step": 2080 + }, + { + "epoch": 0.22046413502109705, + "grad_norm": 0.8826368451118469, + "learning_rate": 0.0015, + "loss": 1.6566, + "step": 2090 + }, + { + "epoch": 0.22151898734177214, + "grad_norm": 0.7748101353645325, + "learning_rate": 0.0015, + "loss": 1.6652, + "step": 2100 + }, + { + "epoch": 0.22257383966244726, + "grad_norm": 0.8152534365653992, + "learning_rate": 0.0015, + "loss": 1.6518, + "step": 2110 + }, + { + "epoch": 0.22362869198312235, + "grad_norm": 0.8592486381530762, + "learning_rate": 0.0015, + "loss": 1.6524, + "step": 2120 + }, + { + "epoch": 0.22468354430379747, + "grad_norm": 0.746995210647583, + "learning_rate": 0.0015, + "loss": 1.6556, + "step": 2130 + }, + { + "epoch": 0.22573839662447256, + "grad_norm": 0.6672290563583374, + "learning_rate": 0.0015, + "loss": 1.648, + "step": 2140 + }, + { + "epoch": 0.22679324894514769, + "grad_norm": 1.0183080434799194, + "learning_rate": 0.0015, + "loss": 1.6564, + "step": 2150 + }, + { + "epoch": 0.22784810126582278, + "grad_norm": 1.185172200202942, + "learning_rate": 0.0015, + "loss": 1.6799, + "step": 2160 + }, + { + "epoch": 0.2289029535864979, + "grad_norm": 0.863520622253418, + "learning_rate": 0.0015, + "loss": 1.6517, + "step": 2170 + }, + { + "epoch": 0.229957805907173, + "grad_norm": 0.7501918077468872, + "learning_rate": 0.0015, + "loss": 1.6319, + "step": 2180 + }, + { + "epoch": 0.2310126582278481, + "grad_norm": 0.7372732162475586, + "learning_rate": 0.0015, + "loss": 1.6514, + "step": 2190 + }, + { + "epoch": 0.2320675105485232, + "grad_norm": 0.7103750109672546, + "learning_rate": 0.0015, + "loss": 1.652, + "step": 2200 + }, + { + "epoch": 0.23312236286919832, + "grad_norm": 0.6133575439453125, + "learning_rate": 0.0015, + "loss": 1.6375, + "step": 2210 + }, + { + "epoch": 0.23417721518987342, + "grad_norm": 0.8095520734786987, + "learning_rate": 0.0015, + "loss": 1.6496, + "step": 2220 + }, + { + "epoch": 0.23523206751054854, + "grad_norm": 0.6962016820907593, + "learning_rate": 0.0015, + "loss": 1.6475, + "step": 2230 + }, + { + "epoch": 0.23628691983122363, + "grad_norm": 0.7668031454086304, + "learning_rate": 0.0015, + "loss": 1.6426, + "step": 2240 + }, + { + "epoch": 0.23734177215189872, + "grad_norm": 1.2006616592407227, + "learning_rate": 0.0015, + "loss": 1.6388, + "step": 2250 + }, + { + "epoch": 0.23839662447257384, + "grad_norm": 0.7320358753204346, + "learning_rate": 0.0015, + "loss": 1.6321, + "step": 2260 + }, + { + "epoch": 0.23945147679324894, + "grad_norm": 0.8369874954223633, + "learning_rate": 0.0015, + "loss": 1.661, + "step": 2270 + }, + { + "epoch": 0.24050632911392406, + "grad_norm": 0.8425046801567078, + "learning_rate": 0.0015, + "loss": 1.6494, + "step": 2280 + }, + { + "epoch": 0.24156118143459915, + "grad_norm": 0.7920575737953186, + "learning_rate": 0.0015, + "loss": 1.6351, + "step": 2290 + }, + { + "epoch": 0.24261603375527427, + "grad_norm": 0.7762938737869263, + "learning_rate": 0.0015, + "loss": 1.6331, + "step": 2300 + }, + { + "epoch": 0.24367088607594936, + "grad_norm": 0.900653600692749, + "learning_rate": 0.0015, + "loss": 1.6333, + "step": 2310 + }, + { + "epoch": 0.24472573839662448, + "grad_norm": 0.745788037776947, + "learning_rate": 0.0015, + "loss": 1.6424, + "step": 2320 + }, + { + "epoch": 0.24578059071729957, + "grad_norm": 0.6916600465774536, + "learning_rate": 0.0015, + "loss": 1.6251, + "step": 2330 + }, + { + "epoch": 0.2468354430379747, + "grad_norm": 0.8070979118347168, + "learning_rate": 0.0015, + "loss": 1.6207, + "step": 2340 + }, + { + "epoch": 0.2478902953586498, + "grad_norm": 0.990899920463562, + "learning_rate": 0.0015, + "loss": 1.6252, + "step": 2350 + }, + { + "epoch": 0.2489451476793249, + "grad_norm": 0.7636021375656128, + "learning_rate": 0.0015, + "loss": 1.6367, + "step": 2360 + }, + { + "epoch": 0.25, + "grad_norm": 0.7139121294021606, + "learning_rate": 0.0015, + "loss": 1.6285, + "step": 2370 + }, + { + "epoch": 0.2510548523206751, + "grad_norm": 0.6985851526260376, + "learning_rate": 0.0015, + "loss": 1.6261, + "step": 2380 + }, + { + "epoch": 0.2521097046413502, + "grad_norm": 0.7967408299446106, + "learning_rate": 0.0015, + "loss": 1.6272, + "step": 2390 + }, + { + "epoch": 0.25316455696202533, + "grad_norm": 0.6866884231567383, + "learning_rate": 0.0015, + "loss": 1.6245, + "step": 2400 + }, + { + "epoch": 0.2542194092827004, + "grad_norm": 1.020408272743225, + "learning_rate": 0.0015, + "loss": 1.6262, + "step": 2410 + }, + { + "epoch": 0.2552742616033755, + "grad_norm": 0.814422607421875, + "learning_rate": 0.0015, + "loss": 1.6193, + "step": 2420 + }, + { + "epoch": 0.2563291139240506, + "grad_norm": 0.8215078711509705, + "learning_rate": 0.0015, + "loss": 1.6204, + "step": 2430 + }, + { + "epoch": 0.25738396624472576, + "grad_norm": 0.8579127192497253, + "learning_rate": 0.0015, + "loss": 1.6302, + "step": 2440 + }, + { + "epoch": 0.25843881856540085, + "grad_norm": 0.6998600363731384, + "learning_rate": 0.0015, + "loss": 1.6138, + "step": 2450 + }, + { + "epoch": 0.25949367088607594, + "grad_norm": 0.7620789408683777, + "learning_rate": 0.0015, + "loss": 1.6301, + "step": 2460 + }, + { + "epoch": 0.26054852320675104, + "grad_norm": 1.034751534461975, + "learning_rate": 0.0015, + "loss": 1.6214, + "step": 2470 + }, + { + "epoch": 0.2616033755274262, + "grad_norm": 0.8016301393508911, + "learning_rate": 0.0015, + "loss": 1.6245, + "step": 2480 + }, + { + "epoch": 0.2626582278481013, + "grad_norm": 0.6942989826202393, + "learning_rate": 0.0015, + "loss": 1.611, + "step": 2490 + }, + { + "epoch": 0.26371308016877637, + "grad_norm": 0.7227789163589478, + "learning_rate": 0.0015, + "loss": 1.6157, + "step": 2500 + }, + { + "epoch": 0.26476793248945146, + "grad_norm": 0.7039353847503662, + "learning_rate": 0.0015, + "loss": 1.6179, + "step": 2510 + }, + { + "epoch": 0.26582278481012656, + "grad_norm": 1.0898023843765259, + "learning_rate": 0.0015, + "loss": 1.619, + "step": 2520 + }, + { + "epoch": 0.2668776371308017, + "grad_norm": 1.2237480878829956, + "learning_rate": 0.0015, + "loss": 1.611, + "step": 2530 + }, + { + "epoch": 0.2679324894514768, + "grad_norm": 1.3692232370376587, + "learning_rate": 0.0015, + "loss": 1.618, + "step": 2540 + }, + { + "epoch": 0.2689873417721519, + "grad_norm": 0.9368470311164856, + "learning_rate": 0.0015, + "loss": 1.6105, + "step": 2550 + }, + { + "epoch": 0.270042194092827, + "grad_norm": 0.7768730521202087, + "learning_rate": 0.0015, + "loss": 1.6174, + "step": 2560 + }, + { + "epoch": 0.27109704641350213, + "grad_norm": 0.7026436924934387, + "learning_rate": 0.0015, + "loss": 1.6115, + "step": 2570 + }, + { + "epoch": 0.2721518987341772, + "grad_norm": 0.7590056657791138, + "learning_rate": 0.0015, + "loss": 1.6108, + "step": 2580 + }, + { + "epoch": 0.2732067510548523, + "grad_norm": 0.7302026152610779, + "learning_rate": 0.0015, + "loss": 1.6188, + "step": 2590 + }, + { + "epoch": 0.2742616033755274, + "grad_norm": 0.766941249370575, + "learning_rate": 0.0015, + "loss": 1.6168, + "step": 2600 + }, + { + "epoch": 0.27531645569620256, + "grad_norm": 0.7447951436042786, + "learning_rate": 0.0015, + "loss": 1.6145, + "step": 2610 + }, + { + "epoch": 0.27637130801687765, + "grad_norm": 0.7790787220001221, + "learning_rate": 0.0015, + "loss": 1.6121, + "step": 2620 + }, + { + "epoch": 0.27742616033755274, + "grad_norm": 0.8253964781761169, + "learning_rate": 0.0015, + "loss": 1.6015, + "step": 2630 + }, + { + "epoch": 0.27848101265822783, + "grad_norm": 0.9600082635879517, + "learning_rate": 0.0015, + "loss": 1.6141, + "step": 2640 + }, + { + "epoch": 0.2795358649789029, + "grad_norm": 0.6609300374984741, + "learning_rate": 0.0015, + "loss": 1.6104, + "step": 2650 + }, + { + "epoch": 0.2805907172995781, + "grad_norm": 0.8002963066101074, + "learning_rate": 0.0015, + "loss": 1.5921, + "step": 2660 + }, + { + "epoch": 0.28164556962025317, + "grad_norm": 0.6853958964347839, + "learning_rate": 0.0015, + "loss": 1.5976, + "step": 2670 + }, + { + "epoch": 0.28270042194092826, + "grad_norm": 0.8286063075065613, + "learning_rate": 0.0015, + "loss": 1.6007, + "step": 2680 + }, + { + "epoch": 0.28375527426160335, + "grad_norm": 0.6813147068023682, + "learning_rate": 0.0015, + "loss": 1.5981, + "step": 2690 + }, + { + "epoch": 0.2848101265822785, + "grad_norm": 0.6169975996017456, + "learning_rate": 0.0015, + "loss": 1.5952, + "step": 2700 + }, + { + "epoch": 0.2858649789029536, + "grad_norm": 0.7873396873474121, + "learning_rate": 0.0015, + "loss": 1.6018, + "step": 2710 + }, + { + "epoch": 0.2869198312236287, + "grad_norm": 0.7605020403862, + "learning_rate": 0.0015, + "loss": 1.6041, + "step": 2720 + }, + { + "epoch": 0.2879746835443038, + "grad_norm": 0.7295371890068054, + "learning_rate": 0.0015, + "loss": 1.5811, + "step": 2730 + }, + { + "epoch": 0.2890295358649789, + "grad_norm": 1.0102437734603882, + "learning_rate": 0.0015, + "loss": 1.5957, + "step": 2740 + }, + { + "epoch": 0.290084388185654, + "grad_norm": 0.7566195130348206, + "learning_rate": 0.0015, + "loss": 1.5982, + "step": 2750 + }, + { + "epoch": 0.2911392405063291, + "grad_norm": 0.9955616593360901, + "learning_rate": 0.0015, + "loss": 1.6064, + "step": 2760 + }, + { + "epoch": 0.2921940928270042, + "grad_norm": 0.845483124256134, + "learning_rate": 0.0015, + "loss": 1.6038, + "step": 2770 + }, + { + "epoch": 0.29324894514767935, + "grad_norm": 0.7566556334495544, + "learning_rate": 0.0015, + "loss": 1.5989, + "step": 2780 + }, + { + "epoch": 0.29430379746835444, + "grad_norm": 0.7398541569709778, + "learning_rate": 0.0015, + "loss": 1.594, + "step": 2790 + }, + { + "epoch": 0.29535864978902954, + "grad_norm": 0.7129564881324768, + "learning_rate": 0.0015, + "loss": 1.5958, + "step": 2800 + }, + { + "epoch": 0.29641350210970463, + "grad_norm": 0.8826091885566711, + "learning_rate": 0.0015, + "loss": 1.5955, + "step": 2810 + }, + { + "epoch": 0.2974683544303797, + "grad_norm": 0.8548098206520081, + "learning_rate": 0.0015, + "loss": 1.6033, + "step": 2820 + }, + { + "epoch": 0.29852320675105487, + "grad_norm": 0.982220470905304, + "learning_rate": 0.0015, + "loss": 1.576, + "step": 2830 + }, + { + "epoch": 0.29957805907172996, + "grad_norm": 1.76087486743927, + "learning_rate": 0.0015, + "loss": 1.5832, + "step": 2840 + }, + { + "epoch": 0.30063291139240506, + "grad_norm": 1.2214303016662598, + "learning_rate": 0.0015, + "loss": 1.5897, + "step": 2850 + }, + { + "epoch": 0.30168776371308015, + "grad_norm": 1.0710519552230835, + "learning_rate": 0.0015, + "loss": 1.5829, + "step": 2860 + }, + { + "epoch": 0.3027426160337553, + "grad_norm": 0.9057137370109558, + "learning_rate": 0.0015, + "loss": 1.5783, + "step": 2870 + }, + { + "epoch": 0.3037974683544304, + "grad_norm": 0.647352397441864, + "learning_rate": 0.0015, + "loss": 1.5708, + "step": 2880 + }, + { + "epoch": 0.3048523206751055, + "grad_norm": 0.6587830781936646, + "learning_rate": 0.0015, + "loss": 1.5769, + "step": 2890 + }, + { + "epoch": 0.3059071729957806, + "grad_norm": 0.9226182699203491, + "learning_rate": 0.0015, + "loss": 1.5843, + "step": 2900 + }, + { + "epoch": 0.3069620253164557, + "grad_norm": 0.8765528798103333, + "learning_rate": 0.0015, + "loss": 1.5854, + "step": 2910 + }, + { + "epoch": 0.3080168776371308, + "grad_norm": 0.6805595755577087, + "learning_rate": 0.0015, + "loss": 1.5816, + "step": 2920 + }, + { + "epoch": 0.3090717299578059, + "grad_norm": 0.8840172290802002, + "learning_rate": 0.0015, + "loss": 1.5732, + "step": 2930 + }, + { + "epoch": 0.310126582278481, + "grad_norm": 0.6584175825119019, + "learning_rate": 0.0015, + "loss": 1.5825, + "step": 2940 + }, + { + "epoch": 0.3111814345991561, + "grad_norm": 1.2141950130462646, + "learning_rate": 0.0015, + "loss": 1.5947, + "step": 2950 + }, + { + "epoch": 0.31223628691983124, + "grad_norm": 1.018752098083496, + "learning_rate": 0.0015, + "loss": 1.5834, + "step": 2960 + }, + { + "epoch": 0.31329113924050633, + "grad_norm": 1.063403606414795, + "learning_rate": 0.0015, + "loss": 1.5629, + "step": 2970 + }, + { + "epoch": 0.3143459915611814, + "grad_norm": 0.6287301778793335, + "learning_rate": 0.0015, + "loss": 1.5701, + "step": 2980 + }, + { + "epoch": 0.3154008438818565, + "grad_norm": 0.7948575019836426, + "learning_rate": 0.0015, + "loss": 1.589, + "step": 2990 + }, + { + "epoch": 0.31645569620253167, + "grad_norm": 0.8574248552322388, + "learning_rate": 0.0015, + "loss": 1.569, + "step": 3000 + }, + { + "epoch": 0.31751054852320676, + "grad_norm": 0.9827894568443298, + "learning_rate": 0.0015, + "loss": 1.5763, + "step": 3010 + }, + { + "epoch": 0.31856540084388185, + "grad_norm": 0.9797403812408447, + "learning_rate": 0.0015, + "loss": 1.5819, + "step": 3020 + }, + { + "epoch": 0.31962025316455694, + "grad_norm": 0.6438536047935486, + "learning_rate": 0.0015, + "loss": 1.5635, + "step": 3030 + }, + { + "epoch": 0.3206751054852321, + "grad_norm": 0.7498347759246826, + "learning_rate": 0.0015, + "loss": 1.5683, + "step": 3040 + }, + { + "epoch": 0.3217299578059072, + "grad_norm": 0.644600510597229, + "learning_rate": 0.0015, + "loss": 1.5678, + "step": 3050 + }, + { + "epoch": 0.3227848101265823, + "grad_norm": 0.7296001315116882, + "learning_rate": 0.0015, + "loss": 1.5818, + "step": 3060 + }, + { + "epoch": 0.32383966244725737, + "grad_norm": 0.6949995756149292, + "learning_rate": 0.0015, + "loss": 1.5692, + "step": 3070 + }, + { + "epoch": 0.32489451476793246, + "grad_norm": 0.7084774374961853, + "learning_rate": 0.0015, + "loss": 1.5621, + "step": 3080 + }, + { + "epoch": 0.3259493670886076, + "grad_norm": 0.7495141625404358, + "learning_rate": 0.0015, + "loss": 1.5769, + "step": 3090 + }, + { + "epoch": 0.3270042194092827, + "grad_norm": 0.7148309350013733, + "learning_rate": 0.0015, + "loss": 1.5646, + "step": 3100 + }, + { + "epoch": 0.3280590717299578, + "grad_norm": 0.8621363043785095, + "learning_rate": 0.0015, + "loss": 1.5674, + "step": 3110 + }, + { + "epoch": 0.3291139240506329, + "grad_norm": 0.7373954057693481, + "learning_rate": 0.0015, + "loss": 1.5675, + "step": 3120 + }, + { + "epoch": 0.33016877637130804, + "grad_norm": 0.9246774315834045, + "learning_rate": 0.0015, + "loss": 1.5806, + "step": 3130 + }, + { + "epoch": 0.33122362869198313, + "grad_norm": 1.0000355243682861, + "learning_rate": 0.0015, + "loss": 1.5596, + "step": 3140 + }, + { + "epoch": 0.3322784810126582, + "grad_norm": 0.6955458521842957, + "learning_rate": 0.0015, + "loss": 1.5669, + "step": 3150 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.7976107001304626, + "learning_rate": 0.0015, + "loss": 1.5698, + "step": 3160 + }, + { + "epoch": 0.33438818565400846, + "grad_norm": 0.7036485075950623, + "learning_rate": 0.0015, + "loss": 1.5713, + "step": 3170 + }, + { + "epoch": 0.33544303797468356, + "grad_norm": 0.8242287039756775, + "learning_rate": 0.0015, + "loss": 1.5666, + "step": 3180 + }, + { + "epoch": 0.33649789029535865, + "grad_norm": 0.8952867984771729, + "learning_rate": 0.0015, + "loss": 1.5667, + "step": 3190 + }, + { + "epoch": 0.33755274261603374, + "grad_norm": 0.8135554194450378, + "learning_rate": 0.0015, + "loss": 1.5574, + "step": 3200 + }, + { + "epoch": 0.33860759493670883, + "grad_norm": 0.8204286098480225, + "learning_rate": 0.0015, + "loss": 1.575, + "step": 3210 + }, + { + "epoch": 0.339662447257384, + "grad_norm": 0.8386204242706299, + "learning_rate": 0.0015, + "loss": 1.5634, + "step": 3220 + }, + { + "epoch": 0.3407172995780591, + "grad_norm": 0.6780179142951965, + "learning_rate": 0.0015, + "loss": 1.5568, + "step": 3230 + }, + { + "epoch": 0.34177215189873417, + "grad_norm": 0.7867633700370789, + "learning_rate": 0.0015, + "loss": 1.5371, + "step": 3240 + }, + { + "epoch": 0.34282700421940926, + "grad_norm": 0.6894981861114502, + "learning_rate": 0.0015, + "loss": 1.562, + "step": 3250 + }, + { + "epoch": 0.3438818565400844, + "grad_norm": 0.8108405470848083, + "learning_rate": 0.0015, + "loss": 1.551, + "step": 3260 + }, + { + "epoch": 0.3449367088607595, + "grad_norm": 0.7118820548057556, + "learning_rate": 0.0015, + "loss": 1.5687, + "step": 3270 + }, + { + "epoch": 0.3459915611814346, + "grad_norm": 0.9861348867416382, + "learning_rate": 0.0015, + "loss": 1.5695, + "step": 3280 + }, + { + "epoch": 0.3470464135021097, + "grad_norm": 0.7879726886749268, + "learning_rate": 0.0015, + "loss": 1.5523, + "step": 3290 + }, + { + "epoch": 0.34810126582278483, + "grad_norm": 0.6835602521896362, + "learning_rate": 0.0015, + "loss": 1.5581, + "step": 3300 + }, + { + "epoch": 0.3491561181434599, + "grad_norm": 0.6511582732200623, + "learning_rate": 0.0015, + "loss": 1.5615, + "step": 3310 + }, + { + "epoch": 0.350210970464135, + "grad_norm": 0.6698130369186401, + "learning_rate": 0.0015, + "loss": 1.5662, + "step": 3320 + }, + { + "epoch": 0.3512658227848101, + "grad_norm": 0.7387862205505371, + "learning_rate": 0.0015, + "loss": 1.5571, + "step": 3330 + }, + { + "epoch": 0.35232067510548526, + "grad_norm": 0.7718158960342407, + "learning_rate": 0.0015, + "loss": 1.5579, + "step": 3340 + }, + { + "epoch": 0.35337552742616035, + "grad_norm": 0.7898529767990112, + "learning_rate": 0.0015, + "loss": 1.5509, + "step": 3350 + }, + { + "epoch": 0.35443037974683544, + "grad_norm": 0.7661499381065369, + "learning_rate": 0.0015, + "loss": 1.5498, + "step": 3360 + }, + { + "epoch": 0.35548523206751054, + "grad_norm": 0.8911453485488892, + "learning_rate": 0.0015, + "loss": 1.5572, + "step": 3370 + }, + { + "epoch": 0.35654008438818563, + "grad_norm": 0.8298493027687073, + "learning_rate": 0.0015, + "loss": 1.5522, + "step": 3380 + }, + { + "epoch": 0.3575949367088608, + "grad_norm": 0.7602770924568176, + "learning_rate": 0.0015, + "loss": 1.5472, + "step": 3390 + }, + { + "epoch": 0.35864978902953587, + "grad_norm": 0.7430171966552734, + "learning_rate": 0.0015, + "loss": 1.5562, + "step": 3400 + }, + { + "epoch": 0.35970464135021096, + "grad_norm": 1.0005077123641968, + "learning_rate": 0.0015, + "loss": 1.5557, + "step": 3410 + }, + { + "epoch": 0.36075949367088606, + "grad_norm": 0.8610879182815552, + "learning_rate": 0.0015, + "loss": 1.5327, + "step": 3420 + }, + { + "epoch": 0.3618143459915612, + "grad_norm": 0.6350323557853699, + "learning_rate": 0.0015, + "loss": 1.5485, + "step": 3430 + }, + { + "epoch": 0.3628691983122363, + "grad_norm": 0.7002079486846924, + "learning_rate": 0.0015, + "loss": 1.5461, + "step": 3440 + }, + { + "epoch": 0.3639240506329114, + "grad_norm": 0.7629624605178833, + "learning_rate": 0.0015, + "loss": 1.5442, + "step": 3450 + }, + { + "epoch": 0.3649789029535865, + "grad_norm": 0.9452305436134338, + "learning_rate": 0.0015, + "loss": 1.5399, + "step": 3460 + }, + { + "epoch": 0.36603375527426163, + "grad_norm": 0.7865879535675049, + "learning_rate": 0.0015, + "loss": 1.5553, + "step": 3470 + }, + { + "epoch": 0.3670886075949367, + "grad_norm": 0.8868057131767273, + "learning_rate": 0.0015, + "loss": 1.56, + "step": 3480 + }, + { + "epoch": 0.3681434599156118, + "grad_norm": 0.7033258676528931, + "learning_rate": 0.0015, + "loss": 1.554, + "step": 3490 + }, + { + "epoch": 0.3691983122362869, + "grad_norm": 0.7414070963859558, + "learning_rate": 0.0015, + "loss": 1.5346, + "step": 3500 + }, + { + "epoch": 0.370253164556962, + "grad_norm": 0.8187000751495361, + "learning_rate": 0.0015, + "loss": 1.5529, + "step": 3510 + }, + { + "epoch": 0.37130801687763715, + "grad_norm": 0.6687618494033813, + "learning_rate": 0.0015, + "loss": 1.5493, + "step": 3520 + }, + { + "epoch": 0.37236286919831224, + "grad_norm": 0.7299246788024902, + "learning_rate": 0.0015, + "loss": 1.56, + "step": 3530 + }, + { + "epoch": 0.37341772151898733, + "grad_norm": 1.2000600099563599, + "learning_rate": 0.0015, + "loss": 1.5536, + "step": 3540 + }, + { + "epoch": 0.3744725738396624, + "grad_norm": 0.751514196395874, + "learning_rate": 0.0015, + "loss": 1.5431, + "step": 3550 + }, + { + "epoch": 0.3755274261603376, + "grad_norm": 1.0355634689331055, + "learning_rate": 0.0015, + "loss": 1.5435, + "step": 3560 + }, + { + "epoch": 0.37658227848101267, + "grad_norm": 0.7776429057121277, + "learning_rate": 0.0015, + "loss": 1.5379, + "step": 3570 + }, + { + "epoch": 0.37763713080168776, + "grad_norm": 1.2408852577209473, + "learning_rate": 0.0015, + "loss": 1.5411, + "step": 3580 + }, + { + "epoch": 0.37869198312236285, + "grad_norm": 0.7168144583702087, + "learning_rate": 0.0015, + "loss": 1.552, + "step": 3590 + }, + { + "epoch": 0.379746835443038, + "grad_norm": 0.9008408188819885, + "learning_rate": 0.0015, + "loss": 1.533, + "step": 3600 + }, + { + "epoch": 0.3808016877637131, + "grad_norm": 0.8503849506378174, + "learning_rate": 0.0015, + "loss": 1.5392, + "step": 3610 + }, + { + "epoch": 0.3818565400843882, + "grad_norm": 0.7525566816329956, + "learning_rate": 0.0015, + "loss": 1.5363, + "step": 3620 + }, + { + "epoch": 0.3829113924050633, + "grad_norm": 0.8973195552825928, + "learning_rate": 0.0015, + "loss": 1.5392, + "step": 3630 + }, + { + "epoch": 0.38396624472573837, + "grad_norm": 0.8579481840133667, + "learning_rate": 0.0015, + "loss": 1.5395, + "step": 3640 + }, + { + "epoch": 0.3850210970464135, + "grad_norm": 0.9837735891342163, + "learning_rate": 0.0015, + "loss": 1.5499, + "step": 3650 + }, + { + "epoch": 0.3860759493670886, + "grad_norm": 0.8938726186752319, + "learning_rate": 0.0015, + "loss": 1.5412, + "step": 3660 + }, + { + "epoch": 0.3871308016877637, + "grad_norm": 0.7313820719718933, + "learning_rate": 0.0015, + "loss": 1.536, + "step": 3670 + }, + { + "epoch": 0.3881856540084388, + "grad_norm": 0.9659876227378845, + "learning_rate": 0.0015, + "loss": 1.5374, + "step": 3680 + }, + { + "epoch": 0.38924050632911394, + "grad_norm": 0.9065628051757812, + "learning_rate": 0.0015, + "loss": 1.5537, + "step": 3690 + }, + { + "epoch": 0.39029535864978904, + "grad_norm": 0.6022276878356934, + "learning_rate": 0.0015, + "loss": 1.547, + "step": 3700 + }, + { + "epoch": 0.39135021097046413, + "grad_norm": 0.6405502557754517, + "learning_rate": 0.0015, + "loss": 1.5322, + "step": 3710 + }, + { + "epoch": 0.3924050632911392, + "grad_norm": 0.6802970767021179, + "learning_rate": 0.0015, + "loss": 1.5128, + "step": 3720 + }, + { + "epoch": 0.39345991561181437, + "grad_norm": 0.6758455634117126, + "learning_rate": 0.0015, + "loss": 1.5336, + "step": 3730 + }, + { + "epoch": 0.39451476793248946, + "grad_norm": 0.684499204158783, + "learning_rate": 0.0015, + "loss": 1.5345, + "step": 3740 + }, + { + "epoch": 0.39556962025316456, + "grad_norm": 0.6756916642189026, + "learning_rate": 0.0015, + "loss": 1.5377, + "step": 3750 + }, + { + "epoch": 0.39662447257383965, + "grad_norm": 0.7286632657051086, + "learning_rate": 0.0015, + "loss": 1.5262, + "step": 3760 + }, + { + "epoch": 0.39767932489451474, + "grad_norm": 0.7097970247268677, + "learning_rate": 0.0015, + "loss": 1.5429, + "step": 3770 + }, + { + "epoch": 0.3987341772151899, + "grad_norm": 0.8133723139762878, + "learning_rate": 0.0015, + "loss": 1.5294, + "step": 3780 + }, + { + "epoch": 0.399789029535865, + "grad_norm": 1.2429341077804565, + "learning_rate": 0.0015, + "loss": 1.536, + "step": 3790 + }, + { + "epoch": 0.4008438818565401, + "grad_norm": 0.9785285592079163, + "learning_rate": 0.0015, + "loss": 1.5421, + "step": 3800 + }, + { + "epoch": 0.40189873417721517, + "grad_norm": 0.9460749626159668, + "learning_rate": 0.0015, + "loss": 1.5219, + "step": 3810 + }, + { + "epoch": 0.4029535864978903, + "grad_norm": 0.7399861812591553, + "learning_rate": 0.0015, + "loss": 1.5182, + "step": 3820 + }, + { + "epoch": 0.4040084388185654, + "grad_norm": 0.8449380993843079, + "learning_rate": 0.0015, + "loss": 1.5223, + "step": 3830 + }, + { + "epoch": 0.4050632911392405, + "grad_norm": 0.6383181810379028, + "learning_rate": 0.0015, + "loss": 1.5381, + "step": 3840 + }, + { + "epoch": 0.4061181434599156, + "grad_norm": 0.8886164426803589, + "learning_rate": 0.0015, + "loss": 1.523, + "step": 3850 + }, + { + "epoch": 0.40717299578059074, + "grad_norm": 0.8363742828369141, + "learning_rate": 0.0015, + "loss": 1.5375, + "step": 3860 + }, + { + "epoch": 0.40822784810126583, + "grad_norm": 1.1316901445388794, + "learning_rate": 0.0015, + "loss": 1.528, + "step": 3870 + }, + { + "epoch": 0.4092827004219409, + "grad_norm": 0.7917295098304749, + "learning_rate": 0.0015, + "loss": 1.5183, + "step": 3880 + }, + { + "epoch": 0.410337552742616, + "grad_norm": 0.827797532081604, + "learning_rate": 0.0015, + "loss": 1.5166, + "step": 3890 + }, + { + "epoch": 0.41139240506329117, + "grad_norm": 1.3193960189819336, + "learning_rate": 0.0015, + "loss": 1.5185, + "step": 3900 + }, + { + "epoch": 0.41244725738396626, + "grad_norm": 0.7750526070594788, + "learning_rate": 0.0015, + "loss": 1.5224, + "step": 3910 + }, + { + "epoch": 0.41350210970464135, + "grad_norm": 0.7234126925468445, + "learning_rate": 0.0015, + "loss": 1.5166, + "step": 3920 + }, + { + "epoch": 0.41455696202531644, + "grad_norm": 0.7168633341789246, + "learning_rate": 0.0015, + "loss": 1.5236, + "step": 3930 + }, + { + "epoch": 0.41561181434599154, + "grad_norm": 0.6642080545425415, + "learning_rate": 0.0015, + "loss": 1.5189, + "step": 3940 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 0.7708320021629333, + "learning_rate": 0.0015, + "loss": 1.5248, + "step": 3950 + }, + { + "epoch": 0.4177215189873418, + "grad_norm": 0.9721519351005554, + "learning_rate": 0.0015, + "loss": 1.5226, + "step": 3960 + }, + { + "epoch": 0.41877637130801687, + "grad_norm": 0.7849475741386414, + "learning_rate": 0.0015, + "loss": 1.5168, + "step": 3970 + }, + { + "epoch": 0.41983122362869196, + "grad_norm": 0.6269410848617554, + "learning_rate": 0.0015, + "loss": 1.5193, + "step": 3980 + }, + { + "epoch": 0.4208860759493671, + "grad_norm": 0.7435353398323059, + "learning_rate": 0.0015, + "loss": 1.5129, + "step": 3990 + }, + { + "epoch": 0.4219409282700422, + "grad_norm": 0.7428051233291626, + "learning_rate": 0.0015, + "loss": 1.5312, + "step": 4000 + }, + { + "epoch": 0.4229957805907173, + "grad_norm": 0.7038542032241821, + "learning_rate": 0.0015, + "loss": 1.5183, + "step": 4010 + }, + { + "epoch": 0.4240506329113924, + "grad_norm": 0.8199252486228943, + "learning_rate": 0.0015, + "loss": 1.5225, + "step": 4020 + }, + { + "epoch": 0.42510548523206754, + "grad_norm": 0.6119398474693298, + "learning_rate": 0.0015, + "loss": 1.5098, + "step": 4030 + }, + { + "epoch": 0.42616033755274263, + "grad_norm": 0.9010416269302368, + "learning_rate": 0.0015, + "loss": 1.5203, + "step": 4040 + }, + { + "epoch": 0.4272151898734177, + "grad_norm": 0.7067353129386902, + "learning_rate": 0.0015, + "loss": 1.5237, + "step": 4050 + }, + { + "epoch": 0.4282700421940928, + "grad_norm": 0.8889469504356384, + "learning_rate": 0.0015, + "loss": 1.5271, + "step": 4060 + }, + { + "epoch": 0.4293248945147679, + "grad_norm": 0.8620189428329468, + "learning_rate": 0.0015, + "loss": 1.5225, + "step": 4070 + }, + { + "epoch": 0.43037974683544306, + "grad_norm": 0.7344073057174683, + "learning_rate": 0.0015, + "loss": 1.5228, + "step": 4080 + }, + { + "epoch": 0.43143459915611815, + "grad_norm": 0.814547598361969, + "learning_rate": 0.0015, + "loss": 1.5183, + "step": 4090 + }, + { + "epoch": 0.43248945147679324, + "grad_norm": 0.6589001417160034, + "learning_rate": 0.0015, + "loss": 1.5175, + "step": 4100 + }, + { + "epoch": 0.43354430379746833, + "grad_norm": 0.7324472665786743, + "learning_rate": 0.0015, + "loss": 1.5121, + "step": 4110 + }, + { + "epoch": 0.4345991561181435, + "grad_norm": 0.8498498797416687, + "learning_rate": 0.0015, + "loss": 1.5157, + "step": 4120 + }, + { + "epoch": 0.4356540084388186, + "grad_norm": 0.7202371954917908, + "learning_rate": 0.0015, + "loss": 1.5067, + "step": 4130 + }, + { + "epoch": 0.43670886075949367, + "grad_norm": 0.857814371585846, + "learning_rate": 0.0015, + "loss": 1.5137, + "step": 4140 + }, + { + "epoch": 0.43776371308016876, + "grad_norm": 0.7166550755500793, + "learning_rate": 0.0015, + "loss": 1.5196, + "step": 4150 + }, + { + "epoch": 0.4388185654008439, + "grad_norm": 0.6573425531387329, + "learning_rate": 0.0015, + "loss": 1.5108, + "step": 4160 + }, + { + "epoch": 0.439873417721519, + "grad_norm": 0.7102767825126648, + "learning_rate": 0.0015, + "loss": 1.5099, + "step": 4170 + }, + { + "epoch": 0.4409282700421941, + "grad_norm": 0.7615636587142944, + "learning_rate": 0.0015, + "loss": 1.5059, + "step": 4180 + }, + { + "epoch": 0.4419831223628692, + "grad_norm": 0.674432098865509, + "learning_rate": 0.0015, + "loss": 1.5149, + "step": 4190 + }, + { + "epoch": 0.4430379746835443, + "grad_norm": 0.6192401647567749, + "learning_rate": 0.0015, + "loss": 1.515, + "step": 4200 + }, + { + "epoch": 0.4440928270042194, + "grad_norm": 1.0639270544052124, + "learning_rate": 0.0015, + "loss": 1.505, + "step": 4210 + }, + { + "epoch": 0.4451476793248945, + "grad_norm": 0.6360841393470764, + "learning_rate": 0.0015, + "loss": 1.5128, + "step": 4220 + }, + { + "epoch": 0.4462025316455696, + "grad_norm": 1.2504181861877441, + "learning_rate": 0.0015, + "loss": 1.5067, + "step": 4230 + }, + { + "epoch": 0.4472573839662447, + "grad_norm": 0.662340521812439, + "learning_rate": 0.0015, + "loss": 1.5099, + "step": 4240 + }, + { + "epoch": 0.44831223628691985, + "grad_norm": 0.6765600442886353, + "learning_rate": 0.0015, + "loss": 1.5112, + "step": 4250 + }, + { + "epoch": 0.44936708860759494, + "grad_norm": 1.038103699684143, + "learning_rate": 0.0015, + "loss": 1.5187, + "step": 4260 + }, + { + "epoch": 0.45042194092827004, + "grad_norm": 0.8971492052078247, + "learning_rate": 0.0015, + "loss": 1.5081, + "step": 4270 + }, + { + "epoch": 0.45147679324894513, + "grad_norm": 0.8223301768302917, + "learning_rate": 0.0015, + "loss": 1.5006, + "step": 4280 + }, + { + "epoch": 0.4525316455696203, + "grad_norm": 1.0835819244384766, + "learning_rate": 0.0015, + "loss": 1.5128, + "step": 4290 + }, + { + "epoch": 0.45358649789029537, + "grad_norm": 0.817401111125946, + "learning_rate": 0.0015, + "loss": 1.512, + "step": 4300 + }, + { + "epoch": 0.45464135021097046, + "grad_norm": 0.6971426606178284, + "learning_rate": 0.0015, + "loss": 1.5072, + "step": 4310 + }, + { + "epoch": 0.45569620253164556, + "grad_norm": 1.0024466514587402, + "learning_rate": 0.0015, + "loss": 1.5059, + "step": 4320 + }, + { + "epoch": 0.45675105485232065, + "grad_norm": 0.6802116632461548, + "learning_rate": 0.0015, + "loss": 1.511, + "step": 4330 + }, + { + "epoch": 0.4578059071729958, + "grad_norm": 0.7710457444190979, + "learning_rate": 0.0015, + "loss": 1.5041, + "step": 4340 + }, + { + "epoch": 0.4588607594936709, + "grad_norm": 0.6620674133300781, + "learning_rate": 0.0015, + "loss": 1.5056, + "step": 4350 + }, + { + "epoch": 0.459915611814346, + "grad_norm": 0.9944728016853333, + "learning_rate": 0.0015, + "loss": 1.5216, + "step": 4360 + }, + { + "epoch": 0.4609704641350211, + "grad_norm": 0.8333024382591248, + "learning_rate": 0.0015, + "loss": 1.4978, + "step": 4370 + }, + { + "epoch": 0.4620253164556962, + "grad_norm": 0.8024762868881226, + "learning_rate": 0.0015, + "loss": 1.505, + "step": 4380 + }, + { + "epoch": 0.4630801687763713, + "grad_norm": 0.731507420539856, + "learning_rate": 0.0015, + "loss": 1.5014, + "step": 4390 + }, + { + "epoch": 0.4641350210970464, + "grad_norm": 0.6970316767692566, + "learning_rate": 0.0015, + "loss": 1.5071, + "step": 4400 + }, + { + "epoch": 0.4651898734177215, + "grad_norm": 0.768801748752594, + "learning_rate": 0.0015, + "loss": 1.5234, + "step": 4410 + }, + { + "epoch": 0.46624472573839665, + "grad_norm": 0.8452940583229065, + "learning_rate": 0.0015, + "loss": 1.5037, + "step": 4420 + }, + { + "epoch": 0.46729957805907174, + "grad_norm": 0.7110008597373962, + "learning_rate": 0.0015, + "loss": 1.5044, + "step": 4430 + }, + { + "epoch": 0.46835443037974683, + "grad_norm": 0.8797896504402161, + "learning_rate": 0.0015, + "loss": 1.5044, + "step": 4440 + }, + { + "epoch": 0.4694092827004219, + "grad_norm": 0.7840330600738525, + "learning_rate": 0.0015, + "loss": 1.5089, + "step": 4450 + }, + { + "epoch": 0.4704641350210971, + "grad_norm": 0.9131385684013367, + "learning_rate": 0.0015, + "loss": 1.4977, + "step": 4460 + }, + { + "epoch": 0.47151898734177217, + "grad_norm": 0.6851815581321716, + "learning_rate": 0.0015, + "loss": 1.5104, + "step": 4470 + }, + { + "epoch": 0.47257383966244726, + "grad_norm": 0.8572779297828674, + "learning_rate": 0.0015, + "loss": 1.5112, + "step": 4480 + }, + { + "epoch": 0.47362869198312235, + "grad_norm": 0.6729965806007385, + "learning_rate": 0.0015, + "loss": 1.5007, + "step": 4490 + }, + { + "epoch": 0.47468354430379744, + "grad_norm": 0.7328243851661682, + "learning_rate": 0.0015, + "loss": 1.4995, + "step": 4500 + }, + { + "epoch": 0.4757383966244726, + "grad_norm": 0.6300152540206909, + "learning_rate": 0.0015, + "loss": 1.5042, + "step": 4510 + }, + { + "epoch": 0.4767932489451477, + "grad_norm": 0.7402103543281555, + "learning_rate": 0.0015, + "loss": 1.4821, + "step": 4520 + }, + { + "epoch": 0.4778481012658228, + "grad_norm": 0.8757051229476929, + "learning_rate": 0.0015, + "loss": 1.5048, + "step": 4530 + }, + { + "epoch": 0.47890295358649787, + "grad_norm": 0.7290232181549072, + "learning_rate": 0.0015, + "loss": 1.509, + "step": 4540 + }, + { + "epoch": 0.479957805907173, + "grad_norm": 0.7419838905334473, + "learning_rate": 0.0015, + "loss": 1.5069, + "step": 4550 + }, + { + "epoch": 0.4810126582278481, + "grad_norm": 0.8696455955505371, + "learning_rate": 0.0015, + "loss": 1.494, + "step": 4560 + }, + { + "epoch": 0.4820675105485232, + "grad_norm": 0.7075020670890808, + "learning_rate": 0.0015, + "loss": 1.4759, + "step": 4570 + }, + { + "epoch": 0.4831223628691983, + "grad_norm": 0.7586351633071899, + "learning_rate": 0.0015, + "loss": 1.4997, + "step": 4580 + }, + { + "epoch": 0.48417721518987344, + "grad_norm": 0.8951983451843262, + "learning_rate": 0.0015, + "loss": 1.4865, + "step": 4590 + }, + { + "epoch": 0.48523206751054854, + "grad_norm": 0.8391162157058716, + "learning_rate": 0.0015, + "loss": 1.5029, + "step": 4600 + }, + { + "epoch": 0.48628691983122363, + "grad_norm": 0.8178354501724243, + "learning_rate": 0.0015, + "loss": 1.5132, + "step": 4610 + }, + { + "epoch": 0.4873417721518987, + "grad_norm": 0.7619510293006897, + "learning_rate": 0.0015, + "loss": 1.4855, + "step": 4620 + }, + { + "epoch": 0.4883966244725738, + "grad_norm": 0.6087182760238647, + "learning_rate": 0.0015, + "loss": 1.5023, + "step": 4630 + }, + { + "epoch": 0.48945147679324896, + "grad_norm": 0.6939111351966858, + "learning_rate": 0.0015, + "loss": 1.5046, + "step": 4640 + }, + { + "epoch": 0.49050632911392406, + "grad_norm": 0.5982709527015686, + "learning_rate": 0.0015, + "loss": 1.5049, + "step": 4650 + }, + { + "epoch": 0.49156118143459915, + "grad_norm": 0.7674986720085144, + "learning_rate": 0.0015, + "loss": 1.4921, + "step": 4660 + }, + { + "epoch": 0.49261603375527424, + "grad_norm": 0.5965104699134827, + "learning_rate": 0.0015, + "loss": 1.4942, + "step": 4670 + }, + { + "epoch": 0.4936708860759494, + "grad_norm": 0.650132417678833, + "learning_rate": 0.0015, + "loss": 1.4914, + "step": 4680 + }, + { + "epoch": 0.4947257383966245, + "grad_norm": 0.7628523111343384, + "learning_rate": 0.0015, + "loss": 1.4836, + "step": 4690 + }, + { + "epoch": 0.4957805907172996, + "grad_norm": 0.8797044157981873, + "learning_rate": 0.0015, + "loss": 1.498, + "step": 4700 + }, + { + "epoch": 0.49683544303797467, + "grad_norm": 0.8594357967376709, + "learning_rate": 0.0015, + "loss": 1.5004, + "step": 4710 + }, + { + "epoch": 0.4978902953586498, + "grad_norm": 1.8891788721084595, + "learning_rate": 0.0015, + "loss": 1.4982, + "step": 4720 + }, + { + "epoch": 0.4989451476793249, + "grad_norm": 1.3719761371612549, + "learning_rate": 0.0015, + "loss": 1.4975, + "step": 4730 + }, + { + "epoch": 0.5, + "grad_norm": 0.7331068515777588, + "learning_rate": 0.0015, + "loss": 1.4856, + "step": 4740 + }, + { + "epoch": 0.5010548523206751, + "grad_norm": 0.7334670424461365, + "learning_rate": 0.0015, + "loss": 1.4986, + "step": 4750 + }, + { + "epoch": 0.5021097046413502, + "grad_norm": 0.6452269554138184, + "learning_rate": 0.0015, + "loss": 1.4867, + "step": 4760 + }, + { + "epoch": 0.5031645569620253, + "grad_norm": 0.636197566986084, + "learning_rate": 0.0015, + "loss": 1.4875, + "step": 4770 + }, + { + "epoch": 0.5042194092827004, + "grad_norm": 0.8560203909873962, + "learning_rate": 0.0015, + "loss": 1.5013, + "step": 4780 + }, + { + "epoch": 0.5052742616033755, + "grad_norm": 0.6279614567756653, + "learning_rate": 0.0015, + "loss": 1.4834, + "step": 4790 + }, + { + "epoch": 0.5063291139240507, + "grad_norm": 0.8693887591362, + "learning_rate": 0.0015, + "loss": 1.4909, + "step": 4800 + }, + { + "epoch": 0.5073839662447257, + "grad_norm": 0.6598283648490906, + "learning_rate": 0.0015, + "loss": 1.4912, + "step": 4810 + }, + { + "epoch": 0.5084388185654009, + "grad_norm": 0.640850305557251, + "learning_rate": 0.0015, + "loss": 1.5098, + "step": 4820 + }, + { + "epoch": 0.509493670886076, + "grad_norm": 0.6679250597953796, + "learning_rate": 0.0015, + "loss": 1.4943, + "step": 4830 + }, + { + "epoch": 0.510548523206751, + "grad_norm": 0.9556334018707275, + "learning_rate": 0.0015, + "loss": 1.4902, + "step": 4840 + }, + { + "epoch": 0.5116033755274262, + "grad_norm": 0.6685370802879333, + "learning_rate": 0.0015, + "loss": 1.4957, + "step": 4850 + }, + { + "epoch": 0.5126582278481012, + "grad_norm": 0.7409107685089111, + "learning_rate": 0.0015, + "loss": 1.4849, + "step": 4860 + }, + { + "epoch": 0.5137130801687764, + "grad_norm": 0.6716946363449097, + "learning_rate": 0.0015, + "loss": 1.481, + "step": 4870 + }, + { + "epoch": 0.5147679324894515, + "grad_norm": 0.6936619877815247, + "learning_rate": 0.0015, + "loss": 1.5001, + "step": 4880 + }, + { + "epoch": 0.5158227848101266, + "grad_norm": 0.6151592135429382, + "learning_rate": 0.0015, + "loss": 1.4895, + "step": 4890 + }, + { + "epoch": 0.5168776371308017, + "grad_norm": 0.5886061191558838, + "learning_rate": 0.0015, + "loss": 1.4987, + "step": 4900 + }, + { + "epoch": 0.5179324894514767, + "grad_norm": 1.0712522268295288, + "learning_rate": 0.0015, + "loss": 1.4953, + "step": 4910 + }, + { + "epoch": 0.5189873417721519, + "grad_norm": 0.6144120097160339, + "learning_rate": 0.0015, + "loss": 1.4784, + "step": 4920 + }, + { + "epoch": 0.520042194092827, + "grad_norm": 0.6724780797958374, + "learning_rate": 0.0015, + "loss": 1.4895, + "step": 4930 + }, + { + "epoch": 0.5210970464135021, + "grad_norm": 0.7050338983535767, + "learning_rate": 0.0015, + "loss": 1.4887, + "step": 4940 + }, + { + "epoch": 0.5221518987341772, + "grad_norm": 0.6367285251617432, + "learning_rate": 0.0015, + "loss": 1.4956, + "step": 4950 + }, + { + "epoch": 0.5232067510548524, + "grad_norm": 0.8847823143005371, + "learning_rate": 0.0015, + "loss": 1.4806, + "step": 4960 + }, + { + "epoch": 0.5242616033755274, + "grad_norm": 0.7965973615646362, + "learning_rate": 0.0015, + "loss": 1.4838, + "step": 4970 + }, + { + "epoch": 0.5253164556962026, + "grad_norm": 0.6900337934494019, + "learning_rate": 0.0015, + "loss": 1.4839, + "step": 4980 + }, + { + "epoch": 0.5263713080168776, + "grad_norm": 0.730391800403595, + "learning_rate": 0.0015, + "loss": 1.4842, + "step": 4990 + }, + { + "epoch": 0.5274261603375527, + "grad_norm": 0.8101289868354797, + "learning_rate": 0.0015, + "loss": 1.4961, + "step": 5000 + }, + { + "epoch": 0.5284810126582279, + "grad_norm": 0.7348197102546692, + "learning_rate": 0.0015, + "loss": 1.4868, + "step": 5010 + }, + { + "epoch": 0.5295358649789029, + "grad_norm": 0.7244386076927185, + "learning_rate": 0.0015, + "loss": 1.4812, + "step": 5020 + }, + { + "epoch": 0.5305907172995781, + "grad_norm": 0.8366394639015198, + "learning_rate": 0.0015, + "loss": 1.4672, + "step": 5030 + }, + { + "epoch": 0.5316455696202531, + "grad_norm": 0.7258561253547668, + "learning_rate": 0.0015, + "loss": 1.4932, + "step": 5040 + }, + { + "epoch": 0.5327004219409283, + "grad_norm": 0.8292760252952576, + "learning_rate": 0.0015, + "loss": 1.4836, + "step": 5050 + }, + { + "epoch": 0.5337552742616034, + "grad_norm": 0.7079561352729797, + "learning_rate": 0.0015, + "loss": 1.4952, + "step": 5060 + }, + { + "epoch": 0.5348101265822784, + "grad_norm": 0.7570391893386841, + "learning_rate": 0.0015, + "loss": 1.4802, + "step": 5070 + }, + { + "epoch": 0.5358649789029536, + "grad_norm": 0.5928521156311035, + "learning_rate": 0.0015, + "loss": 1.4947, + "step": 5080 + }, + { + "epoch": 0.5369198312236287, + "grad_norm": 0.647959291934967, + "learning_rate": 0.0015, + "loss": 1.486, + "step": 5090 + }, + { + "epoch": 0.5379746835443038, + "grad_norm": 0.6214932799339294, + "learning_rate": 0.0015, + "loss": 1.469, + "step": 5100 + }, + { + "epoch": 0.5390295358649789, + "grad_norm": 0.6665555834770203, + "learning_rate": 0.0015, + "loss": 1.4815, + "step": 5110 + }, + { + "epoch": 0.540084388185654, + "grad_norm": 1.064918041229248, + "learning_rate": 0.0015, + "loss": 1.4817, + "step": 5120 + }, + { + "epoch": 0.5411392405063291, + "grad_norm": 0.8128167986869812, + "learning_rate": 0.0015, + "loss": 1.4798, + "step": 5130 + }, + { + "epoch": 0.5421940928270043, + "grad_norm": 0.9505357146263123, + "learning_rate": 0.0015, + "loss": 1.5004, + "step": 5140 + }, + { + "epoch": 0.5432489451476793, + "grad_norm": 0.8126603364944458, + "learning_rate": 0.0015, + "loss": 1.487, + "step": 5150 + }, + { + "epoch": 0.5443037974683544, + "grad_norm": 1.0123765468597412, + "learning_rate": 0.0015, + "loss": 1.4817, + "step": 5160 + }, + { + "epoch": 0.5453586497890295, + "grad_norm": 0.9351062774658203, + "learning_rate": 0.0015, + "loss": 1.4812, + "step": 5170 + }, + { + "epoch": 0.5464135021097046, + "grad_norm": 0.8817696571350098, + "learning_rate": 0.0015, + "loss": 1.4935, + "step": 5180 + }, + { + "epoch": 0.5474683544303798, + "grad_norm": 0.8254026174545288, + "learning_rate": 0.0015, + "loss": 1.4747, + "step": 5190 + }, + { + "epoch": 0.5485232067510548, + "grad_norm": 0.6571090817451477, + "learning_rate": 0.0015, + "loss": 1.4845, + "step": 5200 + }, + { + "epoch": 0.54957805907173, + "grad_norm": 1.3422095775604248, + "learning_rate": 0.0015, + "loss": 1.4716, + "step": 5210 + }, + { + "epoch": 0.5506329113924051, + "grad_norm": 0.9930456280708313, + "learning_rate": 0.0015, + "loss": 1.4826, + "step": 5220 + }, + { + "epoch": 0.5516877637130801, + "grad_norm": 0.8384245038032532, + "learning_rate": 0.0015, + "loss": 1.4756, + "step": 5230 + }, + { + "epoch": 0.5527426160337553, + "grad_norm": 0.6357622742652893, + "learning_rate": 0.0015, + "loss": 1.484, + "step": 5240 + }, + { + "epoch": 0.5537974683544303, + "grad_norm": 0.6911512017250061, + "learning_rate": 0.0015, + "loss": 1.4746, + "step": 5250 + }, + { + "epoch": 0.5548523206751055, + "grad_norm": 0.6909387111663818, + "learning_rate": 0.0015, + "loss": 1.476, + "step": 5260 + }, + { + "epoch": 0.5559071729957806, + "grad_norm": 0.571313738822937, + "learning_rate": 0.0015, + "loss": 1.487, + "step": 5270 + }, + { + "epoch": 0.5569620253164557, + "grad_norm": 0.8499085903167725, + "learning_rate": 0.0015, + "loss": 1.4791, + "step": 5280 + }, + { + "epoch": 0.5580168776371308, + "grad_norm": 0.6986479759216309, + "learning_rate": 0.0015, + "loss": 1.4792, + "step": 5290 + }, + { + "epoch": 0.5590717299578059, + "grad_norm": 0.6757096648216248, + "learning_rate": 0.0015, + "loss": 1.4829, + "step": 5300 + }, + { + "epoch": 0.560126582278481, + "grad_norm": 0.7170940041542053, + "learning_rate": 0.0015, + "loss": 1.4721, + "step": 5310 + }, + { + "epoch": 0.5611814345991561, + "grad_norm": 0.7288855314254761, + "learning_rate": 0.0015, + "loss": 1.4693, + "step": 5320 + }, + { + "epoch": 0.5622362869198312, + "grad_norm": 1.109542965888977, + "learning_rate": 0.0015, + "loss": 1.4867, + "step": 5330 + }, + { + "epoch": 0.5632911392405063, + "grad_norm": 0.742298424243927, + "learning_rate": 0.0015, + "loss": 1.4754, + "step": 5340 + }, + { + "epoch": 0.5643459915611815, + "grad_norm": 0.7660397291183472, + "learning_rate": 0.0015, + "loss": 1.4663, + "step": 5350 + }, + { + "epoch": 0.5654008438818565, + "grad_norm": 0.8228219151496887, + "learning_rate": 0.0015, + "loss": 1.478, + "step": 5360 + }, + { + "epoch": 0.5664556962025317, + "grad_norm": 0.6834386587142944, + "learning_rate": 0.0015, + "loss": 1.4783, + "step": 5370 + }, + { + "epoch": 0.5675105485232067, + "grad_norm": 0.6891368627548218, + "learning_rate": 0.0015, + "loss": 1.4695, + "step": 5380 + }, + { + "epoch": 0.5685654008438819, + "grad_norm": 0.8292567729949951, + "learning_rate": 0.0015, + "loss": 1.471, + "step": 5390 + }, + { + "epoch": 0.569620253164557, + "grad_norm": 0.7165568470954895, + "learning_rate": 0.0015, + "loss": 1.4741, + "step": 5400 + }, + { + "epoch": 0.570675105485232, + "grad_norm": 0.6814695596694946, + "learning_rate": 0.0015, + "loss": 1.4745, + "step": 5410 + }, + { + "epoch": 0.5717299578059072, + "grad_norm": 0.7345909476280212, + "learning_rate": 0.0015, + "loss": 1.4689, + "step": 5420 + }, + { + "epoch": 0.5727848101265823, + "grad_norm": 0.7443748116493225, + "learning_rate": 0.0015, + "loss": 1.4647, + "step": 5430 + }, + { + "epoch": 0.5738396624472574, + "grad_norm": 1.2326922416687012, + "learning_rate": 0.0015, + "loss": 1.466, + "step": 5440 + }, + { + "epoch": 0.5748945147679325, + "grad_norm": 0.8606684803962708, + "learning_rate": 0.0015, + "loss": 1.4725, + "step": 5450 + }, + { + "epoch": 0.5759493670886076, + "grad_norm": 0.6423730254173279, + "learning_rate": 0.0015, + "loss": 1.4701, + "step": 5460 + }, + { + "epoch": 0.5770042194092827, + "grad_norm": 0.6081930994987488, + "learning_rate": 0.0015, + "loss": 1.4702, + "step": 5470 + }, + { + "epoch": 0.5780590717299579, + "grad_norm": 1.188807725906372, + "learning_rate": 0.0015, + "loss": 1.4749, + "step": 5480 + }, + { + "epoch": 0.5791139240506329, + "grad_norm": 0.6775677800178528, + "learning_rate": 0.0015, + "loss": 1.4821, + "step": 5490 + }, + { + "epoch": 0.580168776371308, + "grad_norm": 0.7700093984603882, + "learning_rate": 0.0015, + "loss": 1.469, + "step": 5500 + }, + { + "epoch": 0.5812236286919831, + "grad_norm": 0.9888231754302979, + "learning_rate": 0.0015, + "loss": 1.4768, + "step": 5510 + }, + { + "epoch": 0.5822784810126582, + "grad_norm": 0.6557987928390503, + "learning_rate": 0.0015, + "loss": 1.4688, + "step": 5520 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 0.6790832281112671, + "learning_rate": 0.0015, + "loss": 1.462, + "step": 5530 + }, + { + "epoch": 0.5843881856540084, + "grad_norm": 0.6496003270149231, + "learning_rate": 0.0015, + "loss": 1.4598, + "step": 5540 + }, + { + "epoch": 0.5854430379746836, + "grad_norm": 0.7932361364364624, + "learning_rate": 0.0015, + "loss": 1.4777, + "step": 5550 + }, + { + "epoch": 0.5864978902953587, + "grad_norm": 1.1256890296936035, + "learning_rate": 0.0015, + "loss": 1.4848, + "step": 5560 + }, + { + "epoch": 0.5875527426160337, + "grad_norm": 0.629036545753479, + "learning_rate": 0.0015, + "loss": 1.4703, + "step": 5570 + }, + { + "epoch": 0.5886075949367089, + "grad_norm": 0.9802296161651611, + "learning_rate": 0.0015, + "loss": 1.4657, + "step": 5580 + }, + { + "epoch": 0.5896624472573839, + "grad_norm": 0.9266071915626526, + "learning_rate": 0.0015, + "loss": 1.47, + "step": 5590 + }, + { + "epoch": 0.5907172995780591, + "grad_norm": 0.6785564422607422, + "learning_rate": 0.0015, + "loss": 1.4654, + "step": 5600 + }, + { + "epoch": 0.5917721518987342, + "grad_norm": 0.8482469320297241, + "learning_rate": 0.0015, + "loss": 1.472, + "step": 5610 + }, + { + "epoch": 0.5928270042194093, + "grad_norm": 0.6671830415725708, + "learning_rate": 0.0015, + "loss": 1.4795, + "step": 5620 + }, + { + "epoch": 0.5938818565400844, + "grad_norm": 0.8174152970314026, + "learning_rate": 0.0015, + "loss": 1.4726, + "step": 5630 + }, + { + "epoch": 0.5949367088607594, + "grad_norm": 0.657257616519928, + "learning_rate": 0.0015, + "loss": 1.4817, + "step": 5640 + }, + { + "epoch": 0.5959915611814346, + "grad_norm": 0.6925194263458252, + "learning_rate": 0.0015, + "loss": 1.4736, + "step": 5650 + }, + { + "epoch": 0.5970464135021097, + "grad_norm": 0.8189612030982971, + "learning_rate": 0.0015, + "loss": 1.4729, + "step": 5660 + }, + { + "epoch": 0.5981012658227848, + "grad_norm": 0.5913254022598267, + "learning_rate": 0.0015, + "loss": 1.4785, + "step": 5670 + }, + { + "epoch": 0.5991561181434599, + "grad_norm": 0.7870919704437256, + "learning_rate": 0.0015, + "loss": 1.4781, + "step": 5680 + }, + { + "epoch": 0.6002109704641351, + "grad_norm": 1.8005049228668213, + "learning_rate": 0.0015, + "loss": 1.4582, + "step": 5690 + }, + { + "epoch": 0.6012658227848101, + "grad_norm": 1.3186218738555908, + "learning_rate": 0.0015, + "loss": 1.4725, + "step": 5700 + }, + { + "epoch": 0.6023206751054853, + "grad_norm": 0.7971305251121521, + "learning_rate": 0.0015, + "loss": 1.4646, + "step": 5710 + }, + { + "epoch": 0.6033755274261603, + "grad_norm": 0.7504110932350159, + "learning_rate": 0.0015, + "loss": 1.4683, + "step": 5720 + }, + { + "epoch": 0.6044303797468354, + "grad_norm": 0.6676053404808044, + "learning_rate": 0.0015, + "loss": 1.4642, + "step": 5730 + }, + { + "epoch": 0.6054852320675106, + "grad_norm": 0.7062017917633057, + "learning_rate": 0.0015, + "loss": 1.4643, + "step": 5740 + }, + { + "epoch": 0.6065400843881856, + "grad_norm": 1.3282155990600586, + "learning_rate": 0.0015, + "loss": 1.4399, + "step": 5750 + }, + { + "epoch": 0.6075949367088608, + "grad_norm": 0.7391160726547241, + "learning_rate": 0.0015, + "loss": 1.4523, + "step": 5760 + }, + { + "epoch": 0.6086497890295358, + "grad_norm": 1.1405787467956543, + "learning_rate": 0.0015, + "loss": 1.4538, + "step": 5770 + }, + { + "epoch": 0.609704641350211, + "grad_norm": 0.6818697452545166, + "learning_rate": 0.0015, + "loss": 1.4609, + "step": 5780 + }, + { + "epoch": 0.6107594936708861, + "grad_norm": 0.7080982327461243, + "learning_rate": 0.0015, + "loss": 1.459, + "step": 5790 + }, + { + "epoch": 0.6118143459915611, + "grad_norm": 0.8231603503227234, + "learning_rate": 0.0015, + "loss": 1.464, + "step": 5800 + }, + { + "epoch": 0.6128691983122363, + "grad_norm": 0.6718677878379822, + "learning_rate": 0.0015, + "loss": 1.4686, + "step": 5810 + }, + { + "epoch": 0.6139240506329114, + "grad_norm": 0.7346148490905762, + "learning_rate": 0.0015, + "loss": 1.4813, + "step": 5820 + }, + { + "epoch": 0.6149789029535865, + "grad_norm": 0.706602931022644, + "learning_rate": 0.0015, + "loss": 1.4639, + "step": 5830 + }, + { + "epoch": 0.6160337552742616, + "grad_norm": 0.6805515289306641, + "learning_rate": 0.0015, + "loss": 1.477, + "step": 5840 + }, + { + "epoch": 0.6170886075949367, + "grad_norm": 0.9167012572288513, + "learning_rate": 0.0015, + "loss": 1.4809, + "step": 5850 + }, + { + "epoch": 0.6181434599156118, + "grad_norm": 0.9430621266365051, + "learning_rate": 0.0015, + "loss": 1.4684, + "step": 5860 + }, + { + "epoch": 0.619198312236287, + "grad_norm": 1.0335649251937866, + "learning_rate": 0.0015, + "loss": 1.463, + "step": 5870 + }, + { + "epoch": 0.620253164556962, + "grad_norm": 0.7511252164840698, + "learning_rate": 0.0015, + "loss": 1.4693, + "step": 5880 + }, + { + "epoch": 0.6213080168776371, + "grad_norm": 0.8173877596855164, + "learning_rate": 0.0015, + "loss": 1.4615, + "step": 5890 + }, + { + "epoch": 0.6223628691983122, + "grad_norm": 0.6442335247993469, + "learning_rate": 0.0015, + "loss": 1.4637, + "step": 5900 + }, + { + "epoch": 0.6234177215189873, + "grad_norm": 0.9126557111740112, + "learning_rate": 0.0015, + "loss": 1.458, + "step": 5910 + }, + { + "epoch": 0.6244725738396625, + "grad_norm": 0.7762312293052673, + "learning_rate": 0.0015, + "loss": 1.4525, + "step": 5920 + }, + { + "epoch": 0.6255274261603375, + "grad_norm": 1.5072693824768066, + "learning_rate": 0.0015, + "loss": 1.4624, + "step": 5930 + }, + { + "epoch": 0.6265822784810127, + "grad_norm": 0.7015624642372131, + "learning_rate": 0.0015, + "loss": 1.4675, + "step": 5940 + }, + { + "epoch": 0.6276371308016878, + "grad_norm": 1.0417861938476562, + "learning_rate": 0.0015, + "loss": 1.4561, + "step": 5950 + }, + { + "epoch": 0.6286919831223629, + "grad_norm": 0.6604360342025757, + "learning_rate": 0.0015, + "loss": 1.4633, + "step": 5960 + }, + { + "epoch": 0.629746835443038, + "grad_norm": 0.8248988389968872, + "learning_rate": 0.0015, + "loss": 1.4638, + "step": 5970 + }, + { + "epoch": 0.630801687763713, + "grad_norm": 0.5746728777885437, + "learning_rate": 0.0015, + "loss": 1.447, + "step": 5980 + }, + { + "epoch": 0.6318565400843882, + "grad_norm": 0.8695204854011536, + "learning_rate": 0.0015, + "loss": 1.4686, + "step": 5990 + }, + { + "epoch": 0.6329113924050633, + "grad_norm": 1.322616696357727, + "learning_rate": 0.0015, + "loss": 1.4759, + "step": 6000 + }, + { + "epoch": 0.6339662447257384, + "grad_norm": 0.7655949592590332, + "learning_rate": 0.0015, + "loss": 1.4603, + "step": 6010 + }, + { + "epoch": 0.6350210970464135, + "grad_norm": 1.1184861660003662, + "learning_rate": 0.0015, + "loss": 1.4601, + "step": 6020 + }, + { + "epoch": 0.6360759493670886, + "grad_norm": 0.5791922807693481, + "learning_rate": 0.0015, + "loss": 1.4454, + "step": 6030 + }, + { + "epoch": 0.6371308016877637, + "grad_norm": 0.9653255939483643, + "learning_rate": 0.0015, + "loss": 1.4647, + "step": 6040 + }, + { + "epoch": 0.6381856540084389, + "grad_norm": 0.5939893126487732, + "learning_rate": 0.0015, + "loss": 1.445, + "step": 6050 + }, + { + "epoch": 0.6392405063291139, + "grad_norm": 0.6907451152801514, + "learning_rate": 0.0015, + "loss": 1.4666, + "step": 6060 + }, + { + "epoch": 0.640295358649789, + "grad_norm": 0.8181938529014587, + "learning_rate": 0.0015, + "loss": 1.4578, + "step": 6070 + }, + { + "epoch": 0.6413502109704642, + "grad_norm": 0.6147717833518982, + "learning_rate": 0.0015, + "loss": 1.4561, + "step": 6080 + }, + { + "epoch": 0.6424050632911392, + "grad_norm": 0.775211751461029, + "learning_rate": 0.0015, + "loss": 1.4567, + "step": 6090 + }, + { + "epoch": 0.6434599156118144, + "grad_norm": 0.6842384934425354, + "learning_rate": 0.0015, + "loss": 1.453, + "step": 6100 + }, + { + "epoch": 0.6445147679324894, + "grad_norm": 0.6017322540283203, + "learning_rate": 0.0015, + "loss": 1.4628, + "step": 6110 + }, + { + "epoch": 0.6455696202531646, + "grad_norm": 0.621711015701294, + "learning_rate": 0.0015, + "loss": 1.4567, + "step": 6120 + }, + { + "epoch": 0.6466244725738397, + "grad_norm": 0.6990188956260681, + "learning_rate": 0.0015, + "loss": 1.4547, + "step": 6130 + }, + { + "epoch": 0.6476793248945147, + "grad_norm": 0.7271523475646973, + "learning_rate": 0.0015, + "loss": 1.4616, + "step": 6140 + }, + { + "epoch": 0.6487341772151899, + "grad_norm": 0.8045366406440735, + "learning_rate": 0.0015, + "loss": 1.4651, + "step": 6150 + }, + { + "epoch": 0.6497890295358649, + "grad_norm": 0.758266031742096, + "learning_rate": 0.0015, + "loss": 1.459, + "step": 6160 + }, + { + "epoch": 0.6508438818565401, + "grad_norm": 0.7664952874183655, + "learning_rate": 0.001487560447745699, + "loss": 1.4359, + "step": 6170 + }, + { + "epoch": 0.6518987341772152, + "grad_norm": 0.833990216255188, + "learning_rate": 0.0014670566859713624, + "loss": 1.46, + "step": 6180 + }, + { + "epoch": 0.6529535864978903, + "grad_norm": 0.6131618618965149, + "learning_rate": 0.0014468355374162303, + "loss": 1.4626, + "step": 6190 + }, + { + "epoch": 0.6540084388185654, + "grad_norm": 0.7745141386985779, + "learning_rate": 0.0014268931066862504, + "loss": 1.4511, + "step": 6200 + }, + { + "epoch": 0.6550632911392406, + "grad_norm": 0.582751989364624, + "learning_rate": 0.0014072255520794614, + "loss": 1.4481, + "step": 6210 + }, + { + "epoch": 0.6561181434599156, + "grad_norm": 0.6661667823791504, + "learning_rate": 0.0013878290848459301, + "loss": 1.4547, + "step": 6220 + }, + { + "epoch": 0.6571729957805907, + "grad_norm": 0.6974245309829712, + "learning_rate": 0.0013686999684578874, + "loss": 1.443, + "step": 6230 + }, + { + "epoch": 0.6582278481012658, + "grad_norm": 0.6928418874740601, + "learning_rate": 0.001349834517889925, + "loss": 1.4486, + "step": 6240 + }, + { + "epoch": 0.6592827004219409, + "grad_norm": 0.880823016166687, + "learning_rate": 0.001331229098909114, + "loss": 1.4505, + "step": 6250 + }, + { + "epoch": 0.6603375527426161, + "grad_norm": 0.8757248520851135, + "learning_rate": 0.0013128801273749075, + "loss": 1.4444, + "step": 6260 + }, + { + "epoch": 0.6613924050632911, + "grad_norm": 0.7334885001182556, + "learning_rate": 0.0012947840685486932, + "loss": 1.4489, + "step": 6270 + }, + { + "epoch": 0.6624472573839663, + "grad_norm": 0.6741816401481628, + "learning_rate": 0.0012769374364128628, + "loss": 1.4433, + "step": 6280 + }, + { + "epoch": 0.6635021097046413, + "grad_norm": 0.6987295746803284, + "learning_rate": 0.0012593367929992667, + "loss": 1.4416, + "step": 6290 + }, + { + "epoch": 0.6645569620253164, + "grad_norm": 0.6985065937042236, + "learning_rate": 0.0012419787477269257, + "loss": 1.4342, + "step": 6300 + }, + { + "epoch": 0.6656118143459916, + "grad_norm": 0.5744985342025757, + "learning_rate": 0.0012248599567488698, + "loss": 1.433, + "step": 6310 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.6331796050071716, + "learning_rate": 0.0012079771223079822, + "loss": 1.4294, + "step": 6320 + }, + { + "epoch": 0.6677215189873418, + "grad_norm": 0.5858669877052307, + "learning_rate": 0.0011913269921017202, + "loss": 1.4347, + "step": 6330 + }, + { + "epoch": 0.6687763713080169, + "grad_norm": 0.6251872777938843, + "learning_rate": 0.0011749063586555919, + "loss": 1.427, + "step": 6340 + }, + { + "epoch": 0.669831223628692, + "grad_norm": 0.6726731657981873, + "learning_rate": 0.001158712058705271, + "loss": 1.4335, + "step": 6350 + }, + { + "epoch": 0.6708860759493671, + "grad_norm": 0.7635205388069153, + "learning_rate": 0.0011427409725872262, + "loss": 1.431, + "step": 6360 + }, + { + "epoch": 0.6719409282700421, + "grad_norm": 0.6536107063293457, + "learning_rate": 0.00112699002363775, + "loss": 1.4124, + "step": 6370 + }, + { + "epoch": 0.6729957805907173, + "grad_norm": 1.0245848894119263, + "learning_rate": 0.0011114561776002726, + "loss": 1.415, + "step": 6380 + }, + { + "epoch": 0.6740506329113924, + "grad_norm": 0.9107499718666077, + "learning_rate": 0.001096136442040843, + "loss": 1.4283, + "step": 6390 + }, + { + "epoch": 0.6751054852320675, + "grad_norm": 0.7017847895622253, + "learning_rate": 0.001081027865771668, + "loss": 1.4284, + "step": 6400 + }, + { + "epoch": 0.6761603375527426, + "grad_norm": 0.6004816293716431, + "learning_rate": 0.0010661275382825958, + "loss": 1.423, + "step": 6410 + }, + { + "epoch": 0.6772151898734177, + "grad_norm": 0.5733190774917603, + "learning_rate": 0.0010514325891804379, + "loss": 1.4103, + "step": 6420 + }, + { + "epoch": 0.6782700421940928, + "grad_norm": 0.7946569919586182, + "learning_rate": 0.0010369401876360166, + "loss": 1.4178, + "step": 6430 + }, + { + "epoch": 0.679324894514768, + "grad_norm": 0.5692176818847656, + "learning_rate": 0.001022647541838836, + "loss": 1.4028, + "step": 6440 + }, + { + "epoch": 0.680379746835443, + "grad_norm": 0.7804424166679382, + "learning_rate": 0.0010085518984592678, + "loss": 1.4038, + "step": 6450 + }, + { + "epoch": 0.6814345991561181, + "grad_norm": 0.618582546710968, + "learning_rate": 0.0009946505421181513, + "loss": 1.4174, + "step": 6460 + }, + { + "epoch": 0.6824894514767933, + "grad_norm": 0.7743845582008362, + "learning_rate": 0.0009809407948637044, + "loss": 1.4122, + "step": 6470 + }, + { + "epoch": 0.6835443037974683, + "grad_norm": 0.7738763689994812, + "learning_rate": 0.0009674200156556436, + "loss": 1.4161, + "step": 6480 + }, + { + "epoch": 0.6845991561181435, + "grad_norm": 0.6182332634925842, + "learning_rate": 0.0009540855998564147, + "loss": 1.4097, + "step": 6490 + }, + { + "epoch": 0.6856540084388185, + "grad_norm": 0.7736266851425171, + "learning_rate": 0.000940934978729437, + "loss": 1.4281, + "step": 6500 + }, + { + "epoch": 0.6867088607594937, + "grad_norm": 0.6278747916221619, + "learning_rate": 0.0009279656189442628, + "loss": 1.4247, + "step": 6510 + }, + { + "epoch": 0.6877637130801688, + "grad_norm": 0.5344725251197815, + "learning_rate": 0.0009151750220885573, + "loss": 1.4064, + "step": 6520 + }, + { + "epoch": 0.6888185654008439, + "grad_norm": 0.7462520599365234, + "learning_rate": 0.0009025607241868057, + "loss": 1.3982, + "step": 6530 + }, + { + "epoch": 0.689873417721519, + "grad_norm": 0.8247795104980469, + "learning_rate": 0.0008901202952256545, + "loss": 1.4066, + "step": 6540 + }, + { + "epoch": 0.6909282700421941, + "grad_norm": 0.6390166282653809, + "learning_rate": 0.0008778513386857928, + "loss": 1.4038, + "step": 6550 + }, + { + "epoch": 0.6919831223628692, + "grad_norm": 0.8160372376441956, + "learning_rate": 0.0008657514910802905, + "loss": 1.4041, + "step": 6560 + }, + { + "epoch": 0.6930379746835443, + "grad_norm": 0.5480459928512573, + "learning_rate": 0.0008538184214992943, + "loss": 1.4, + "step": 6570 + }, + { + "epoch": 0.6940928270042194, + "grad_norm": 0.6410509347915649, + "learning_rate": 0.0008420498311610049, + "loss": 1.3962, + "step": 6580 + }, + { + "epoch": 0.6951476793248945, + "grad_norm": 0.6110230088233948, + "learning_rate": 0.0008304434529688382, + "loss": 1.399, + "step": 6590 + }, + { + "epoch": 0.6962025316455697, + "grad_norm": 0.572085976600647, + "learning_rate": 0.0008189970510746938, + "loss": 1.4039, + "step": 6600 + }, + { + "epoch": 0.6972573839662447, + "grad_norm": 0.580246090888977, + "learning_rate": 0.0008077084204482425, + "loss": 1.3901, + "step": 6610 + }, + { + "epoch": 0.6983122362869199, + "grad_norm": 0.6792917847633362, + "learning_rate": 0.0007965753864521494, + "loss": 1.3976, + "step": 6620 + }, + { + "epoch": 0.6993670886075949, + "grad_norm": 0.6540189385414124, + "learning_rate": 0.0007855958044231527, + "loss": 1.3892, + "step": 6630 + }, + { + "epoch": 0.70042194092827, + "grad_norm": 0.5936343669891357, + "learning_rate": 0.000774767559258917, + "loss": 1.3936, + "step": 6640 + }, + { + "epoch": 0.7014767932489452, + "grad_norm": 0.644698977470398, + "learning_rate": 0.0007640885650105806, + "loss": 1.387, + "step": 6650 + }, + { + "epoch": 0.7025316455696202, + "grad_norm": 0.5821612477302551, + "learning_rate": 0.0007535567644809191, + "loss": 1.3896, + "step": 6660 + }, + { + "epoch": 0.7035864978902954, + "grad_norm": 0.4968838095664978, + "learning_rate": 0.0007431701288280478, + "loss": 1.3893, + "step": 6670 + }, + { + "epoch": 0.7046413502109705, + "grad_norm": 0.5282427668571472, + "learning_rate": 0.0007329266571745864, + "loss": 1.3914, + "step": 6680 + }, + { + "epoch": 0.7056962025316456, + "grad_norm": 0.701461672782898, + "learning_rate": 0.0007228243762222109, + "loss": 1.3833, + "step": 6690 + }, + { + "epoch": 0.7067510548523207, + "grad_norm": 0.6693341135978699, + "learning_rate": 0.0007128613398715179, + "loss": 1.3882, + "step": 6700 + }, + { + "epoch": 0.7078059071729957, + "grad_norm": 0.5370439291000366, + "learning_rate": 0.0007030356288471288, + "loss": 1.3849, + "step": 6710 + }, + { + "epoch": 0.7088607594936709, + "grad_norm": 0.613661527633667, + "learning_rate": 0.0006933453503279619, + "loss": 1.3753, + "step": 6720 + }, + { + "epoch": 0.709915611814346, + "grad_norm": 0.6006898880004883, + "learning_rate": 0.0006837886375825994, + "loss": 1.3845, + "step": 6730 + }, + { + "epoch": 0.7109704641350211, + "grad_norm": 0.7256669998168945, + "learning_rate": 0.0006743636496096813, + "loss": 1.3858, + "step": 6740 + }, + { + "epoch": 0.7120253164556962, + "grad_norm": 0.588106095790863, + "learning_rate": 0.0006650685707832559, + "loss": 1.3877, + "step": 6750 + }, + { + "epoch": 0.7130801687763713, + "grad_norm": 0.597061812877655, + "learning_rate": 0.0006559016105030176, + "loss": 1.3806, + "step": 6760 + }, + { + "epoch": 0.7141350210970464, + "grad_norm": 0.7154589891433716, + "learning_rate": 0.000646861002849367, + "loss": 1.3821, + "step": 6770 + }, + { + "epoch": 0.7151898734177216, + "grad_norm": 0.6940481662750244, + "learning_rate": 0.0006379450062432248, + "loss": 1.3833, + "step": 6780 + }, + { + "epoch": 0.7162447257383966, + "grad_norm": 0.573932409286499, + "learning_rate": 0.0006291519031105347, + "loss": 1.3861, + "step": 6790 + }, + { + "epoch": 0.7172995780590717, + "grad_norm": 0.5575270652770996, + "learning_rate": 0.00062047999955139, + "loss": 1.3736, + "step": 6800 + }, + { + "epoch": 0.7183544303797469, + "grad_norm": 0.6313186287879944, + "learning_rate": 0.000611927625013722, + "loss": 1.3736, + "step": 6810 + }, + { + "epoch": 0.7194092827004219, + "grad_norm": 0.6282409429550171, + "learning_rate": 0.0006034931319714858, + "loss": 1.3756, + "step": 6820 + }, + { + "epoch": 0.7204641350210971, + "grad_norm": 0.7034692168235779, + "learning_rate": 0.0005951748956072806, + "loss": 1.3734, + "step": 6830 + }, + { + "epoch": 0.7215189873417721, + "grad_norm": 0.5762534737586975, + "learning_rate": 0.0005869713134993463, + "loss": 1.3623, + "step": 6840 + }, + { + "epoch": 0.7225738396624473, + "grad_norm": 0.5582824945449829, + "learning_rate": 0.0005788808053128734, + "loss": 1.3699, + "step": 6850 + }, + { + "epoch": 0.7236286919831224, + "grad_norm": 0.6575324535369873, + "learning_rate": 0.0005709018124955674, + "loss": 1.366, + "step": 6860 + }, + { + "epoch": 0.7246835443037974, + "grad_norm": 0.6613243818283081, + "learning_rate": 0.0005630327979774111, + "loss": 1.3649, + "step": 6870 + }, + { + "epoch": 0.7257383966244726, + "grad_norm": 0.6109374165534973, + "learning_rate": 0.0005552722458745627, + "loss": 1.3723, + "step": 6880 + }, + { + "epoch": 0.7267932489451476, + "grad_norm": 0.6053740978240967, + "learning_rate": 0.0005476186611973374, + "loss": 1.3594, + "step": 6890 + }, + { + "epoch": 0.7278481012658228, + "grad_norm": 0.5715690851211548, + "learning_rate": 0.000540070569562213, + "loss": 1.3626, + "step": 6900 + }, + { + "epoch": 0.7289029535864979, + "grad_norm": 0.5404677391052246, + "learning_rate": 0.0005326265169078048, + "loss": 1.3846, + "step": 6910 + }, + { + "epoch": 0.729957805907173, + "grad_norm": 0.6760450005531311, + "learning_rate": 0.0005252850692147567, + "loss": 1.3652, + "step": 6920 + }, + { + "epoch": 0.7310126582278481, + "grad_norm": 0.539077877998352, + "learning_rate": 0.0005180448122294913, + "loss": 1.372, + "step": 6930 + }, + { + "epoch": 0.7320675105485233, + "grad_norm": 0.7224864363670349, + "learning_rate": 0.0005109043511917693, + "loss": 1.3744, + "step": 6940 + }, + { + "epoch": 0.7331223628691983, + "grad_norm": 0.6780089735984802, + "learning_rate": 0.0005038623105660032, + "loss": 1.3699, + "step": 6950 + }, + { + "epoch": 0.7341772151898734, + "grad_norm": 0.5734032988548279, + "learning_rate": 0.0004969173337762747, + "loss": 1.3619, + "step": 6960 + }, + { + "epoch": 0.7352320675105485, + "grad_norm": 0.5931891202926636, + "learning_rate": 0.0004900680829450042, + "loss": 1.3532, + "step": 6970 + }, + { + "epoch": 0.7362869198312236, + "grad_norm": 0.5751489400863647, + "learning_rate": 0.0004833132386352233, + "loss": 1.3583, + "step": 6980 + }, + { + "epoch": 0.7373417721518988, + "grad_norm": 0.5907680988311768, + "learning_rate": 0.00047665149959639813, + "loss": 1.3592, + "step": 6990 + }, + { + "epoch": 0.7383966244725738, + "grad_norm": 0.5386176109313965, + "learning_rate": 0.0004700815825137577, + "loss": 1.3694, + "step": 7000 + }, + { + "epoch": 0.739451476793249, + "grad_norm": 0.5586495399475098, + "learning_rate": 0.00046360222176107584, + "loss": 1.3399, + "step": 7010 + }, + { + "epoch": 0.740506329113924, + "grad_norm": 0.6473351716995239, + "learning_rate": 0.0004572121691568625, + "loss": 1.3575, + "step": 7020 + }, + { + "epoch": 0.7415611814345991, + "grad_norm": 0.5740780234336853, + "learning_rate": 0.00045091019372391354, + "loss": 1.3591, + "step": 7030 + }, + { + "epoch": 0.7426160337552743, + "grad_norm": 0.6933109760284424, + "learning_rate": 0.0004446950814521764, + "loss": 1.3647, + "step": 7040 + }, + { + "epoch": 0.7436708860759493, + "grad_norm": 0.5194635391235352, + "learning_rate": 0.0004385656350648835, + "loss": 1.3513, + "step": 7050 + }, + { + "epoch": 0.7447257383966245, + "grad_norm": 0.5705422759056091, + "learning_rate": 0.00043252067378790946, + "loss": 1.3554, + "step": 7060 + }, + { + "epoch": 0.7457805907172996, + "grad_norm": 0.6068917512893677, + "learning_rate": 0.00042655903312230673, + "loss": 1.363, + "step": 7070 + }, + { + "epoch": 0.7468354430379747, + "grad_norm": 0.5380057692527771, + "learning_rate": 0.0004206795646199778, + "loss": 1.3572, + "step": 7080 + }, + { + "epoch": 0.7478902953586498, + "grad_norm": 0.5950965881347656, + "learning_rate": 0.0004148811356624379, + "loss": 1.3568, + "step": 7090 + }, + { + "epoch": 0.7489451476793249, + "grad_norm": 0.5918527245521545, + "learning_rate": 0.0004091626292426282, + "loss": 1.3512, + "step": 7100 + }, + { + "epoch": 0.75, + "grad_norm": 0.5303682088851929, + "learning_rate": 0.0004035229437497357, + "loss": 1.3618, + "step": 7110 + }, + { + "epoch": 0.7510548523206751, + "grad_norm": 0.6381595730781555, + "learning_rate": 0.00039796099275697986, + "loss": 1.3563, + "step": 7120 + }, + { + "epoch": 0.7521097046413502, + "grad_norm": 0.538011908531189, + "learning_rate": 0.0003924757048123232, + "loss": 1.3639, + "step": 7130 + }, + { + "epoch": 0.7531645569620253, + "grad_norm": 0.5151325464248657, + "learning_rate": 0.0003870660232320675, + "loss": 1.3466, + "step": 7140 + }, + { + "epoch": 0.7542194092827004, + "grad_norm": 0.7674492597579956, + "learning_rate": 0.000381730905897295, + "loss": 1.3447, + "step": 7150 + }, + { + "epoch": 0.7552742616033755, + "grad_norm": 0.5814359188079834, + "learning_rate": 0.0003764693250531141, + "loss": 1.3536, + "step": 7160 + }, + { + "epoch": 0.7563291139240507, + "grad_norm": 0.736849844455719, + "learning_rate": 0.0003712802671106742, + "loss": 1.359, + "step": 7170 + }, + { + "epoch": 0.7573839662447257, + "grad_norm": 0.6498019099235535, + "learning_rate": 0.0003661627324519073, + "loss": 1.3378, + "step": 7180 + }, + { + "epoch": 0.7584388185654009, + "grad_norm": 0.596129298210144, + "learning_rate": 0.0003611157352369628, + "loss": 1.3403, + "step": 7190 + }, + { + "epoch": 0.759493670886076, + "grad_norm": 0.5587188601493835, + "learning_rate": 0.00035613830321429534, + "loss": 1.3465, + "step": 7200 + }, + { + "epoch": 0.760548523206751, + "grad_norm": 0.5939928889274597, + "learning_rate": 0.00035122947753337037, + "loss": 1.3523, + "step": 7210 + }, + { + "epoch": 0.7616033755274262, + "grad_norm": 0.5640298128128052, + "learning_rate": 0.0003463883125599521, + "loss": 1.3636, + "step": 7220 + }, + { + "epoch": 0.7626582278481012, + "grad_norm": 0.585434079170227, + "learning_rate": 0.00034161387569393647, + "loss": 1.3595, + "step": 7230 + }, + { + "epoch": 0.7637130801687764, + "grad_norm": 0.5625174641609192, + "learning_rate": 0.00033690524718969593, + "loss": 1.3545, + "step": 7240 + }, + { + "epoch": 0.7647679324894515, + "grad_norm": 0.5559488534927368, + "learning_rate": 0.0003322615199788993, + "loss": 1.3493, + "step": 7250 + }, + { + "epoch": 0.7658227848101266, + "grad_norm": 0.5726392269134521, + "learning_rate": 0.00032768179949577516, + "loss": 1.3467, + "step": 7260 + }, + { + "epoch": 0.7668776371308017, + "grad_norm": 0.6350004076957703, + "learning_rate": 0.0003231652035047826, + "loss": 1.3443, + "step": 7270 + }, + { + "epoch": 0.7679324894514767, + "grad_norm": 0.6924746036529541, + "learning_rate": 0.000318710861930658, + "loss": 1.3455, + "step": 7280 + }, + { + "epoch": 0.7689873417721519, + "grad_norm": 0.5796951651573181, + "learning_rate": 0.0003143179166908038, + "loss": 1.3564, + "step": 7290 + }, + { + "epoch": 0.770042194092827, + "grad_norm": 0.6961168646812439, + "learning_rate": 0.00030998552152998834, + "loss": 1.3612, + "step": 7300 + }, + { + "epoch": 0.7710970464135021, + "grad_norm": 0.6730489730834961, + "learning_rate": 0.00030571284185732276, + "loss": 1.3414, + "step": 7310 + }, + { + "epoch": 0.7721518987341772, + "grad_norm": 0.6810585260391235, + "learning_rate": 0.0003014990545854864, + "loss": 1.3344, + "step": 7320 + }, + { + "epoch": 0.7732067510548524, + "grad_norm": 0.680448591709137, + "learning_rate": 0.0002973433479721675, + "loss": 1.3418, + "step": 7330 + }, + { + "epoch": 0.7742616033755274, + "grad_norm": 0.5667837262153625, + "learning_rate": 0.00029324492146368906, + "loss": 1.3407, + "step": 7340 + }, + { + "epoch": 0.7753164556962026, + "grad_norm": 0.5182477235794067, + "learning_rate": 0.00028920298554079113, + "loss": 1.3355, + "step": 7350 + }, + { + "epoch": 0.7763713080168776, + "grad_norm": 0.5167650580406189, + "learning_rate": 0.00028521676156653756, + "loss": 1.3417, + "step": 7360 + }, + { + "epoch": 0.7774261603375527, + "grad_norm": 0.5599521994590759, + "learning_rate": 0.00028128548163632006, + "loss": 1.3424, + "step": 7370 + }, + { + "epoch": 0.7784810126582279, + "grad_norm": 0.5846539735794067, + "learning_rate": 0.0002774083884299292, + "loss": 1.33, + "step": 7380 + }, + { + "epoch": 0.7795358649789029, + "grad_norm": 0.5187151432037354, + "learning_rate": 0.0002735847350656645, + "loss": 1.3422, + "step": 7390 + }, + { + "epoch": 0.7805907172995781, + "grad_norm": 0.6782974600791931, + "learning_rate": 0.0002698137849564556, + "loss": 1.3497, + "step": 7400 + }, + { + "epoch": 0.7816455696202531, + "grad_norm": 0.5337135195732117, + "learning_rate": 0.0002660948116679665, + "loss": 1.3283, + "step": 7410 + }, + { + "epoch": 0.7827004219409283, + "grad_norm": 0.5930860042572021, + "learning_rate": 0.00026242709877865493, + "loss": 1.3338, + "step": 7420 + }, + { + "epoch": 0.7837552742616034, + "grad_norm": 0.5113661289215088, + "learning_rate": 0.00025880993974176204, + "loss": 1.3198, + "step": 7430 + }, + { + "epoch": 0.7848101265822784, + "grad_norm": 0.5587288737297058, + "learning_rate": 0.0002552426377492028, + "loss": 1.3256, + "step": 7440 + }, + { + "epoch": 0.7858649789029536, + "grad_norm": 0.525753378868103, + "learning_rate": 0.0002517245055973337, + "loss": 1.3282, + "step": 7450 + }, + { + "epoch": 0.7869198312236287, + "grad_norm": 0.5152617692947388, + "learning_rate": 0.00024825486555456975, + "loss": 1.3378, + "step": 7460 + }, + { + "epoch": 0.7879746835443038, + "grad_norm": 0.5560811758041382, + "learning_rate": 0.00024483304923082663, + "loss": 1.3301, + "step": 7470 + }, + { + "epoch": 0.7890295358649789, + "grad_norm": 0.6864498257637024, + "learning_rate": 0.0002414583974487624, + "loss": 1.3368, + "step": 7480 + }, + { + "epoch": 0.790084388185654, + "grad_norm": 0.5324978828430176, + "learning_rate": 0.00023813026011679372, + "loss": 1.3411, + "step": 7490 + }, + { + "epoch": 0.7911392405063291, + "grad_norm": 0.5366637706756592, + "learning_rate": 0.0002348479961038625, + "loss": 1.3375, + "step": 7500 + }, + { + "epoch": 0.7921940928270043, + "grad_norm": 0.559596061706543, + "learning_rate": 0.00023161097311592867, + "loss": 1.3439, + "step": 7510 + }, + { + "epoch": 0.7932489451476793, + "grad_norm": 0.5395399928092957, + "learning_rate": 0.00022841856757416538, + "loss": 1.3272, + "step": 7520 + }, + { + "epoch": 0.7943037974683544, + "grad_norm": 0.5332239866256714, + "learning_rate": 0.0002252701644948328, + "loss": 1.3288, + "step": 7530 + }, + { + "epoch": 0.7953586497890295, + "grad_norm": 0.54463791847229, + "learning_rate": 0.00022216515737080817, + "loss": 1.3252, + "step": 7540 + }, + { + "epoch": 0.7964135021097046, + "grad_norm": 0.534031867980957, + "learning_rate": 0.00021910294805474833, + "loss": 1.3227, + "step": 7550 + }, + { + "epoch": 0.7974683544303798, + "grad_norm": 0.5391992330551147, + "learning_rate": 0.0002160829466438629, + "loss": 1.3255, + "step": 7560 + }, + { + "epoch": 0.7985232067510548, + "grad_norm": 0.5907092094421387, + "learning_rate": 0.00021310457136627562, + "loss": 1.3358, + "step": 7570 + }, + { + "epoch": 0.79957805907173, + "grad_norm": 0.5663644075393677, + "learning_rate": 0.00021016724846895213, + "loss": 1.3327, + "step": 7580 + }, + { + "epoch": 0.8006329113924051, + "grad_norm": 0.5528636574745178, + "learning_rate": 0.00020727041210717235, + "loss": 1.3243, + "step": 7590 + }, + { + "epoch": 0.8016877637130801, + "grad_norm": 0.5246782302856445, + "learning_rate": 0.00020441350423552624, + "loss": 1.328, + "step": 7600 + }, + { + "epoch": 0.8027426160337553, + "grad_norm": 0.576280415058136, + "learning_rate": 0.00020159597450041257, + "loss": 1.3416, + "step": 7610 + }, + { + "epoch": 0.8037974683544303, + "grad_norm": 0.5015661120414734, + "learning_rate": 0.00019881728013401842, + "loss": 1.3143, + "step": 7620 + }, + { + "epoch": 0.8048523206751055, + "grad_norm": 0.5284491777420044, + "learning_rate": 0.00019607688584976116, + "loss": 1.3361, + "step": 7630 + }, + { + "epoch": 0.8059071729957806, + "grad_norm": 0.5766530632972717, + "learning_rate": 0.00019337426373917076, + "loss": 1.327, + "step": 7640 + }, + { + "epoch": 0.8069620253164557, + "grad_norm": 0.5360243916511536, + "learning_rate": 0.00019070889317019375, + "loss": 1.3419, + "step": 7650 + }, + { + "epoch": 0.8080168776371308, + "grad_norm": 0.5268714427947998, + "learning_rate": 0.00018808026068689883, + "loss": 1.321, + "step": 7660 + }, + { + "epoch": 0.8090717299578059, + "grad_norm": 0.5576908588409424, + "learning_rate": 0.00018548785991056508, + "loss": 1.3288, + "step": 7670 + }, + { + "epoch": 0.810126582278481, + "grad_norm": 0.6194130182266235, + "learning_rate": 0.00018293119144213328, + "loss": 1.3408, + "step": 7680 + }, + { + "epoch": 0.8111814345991561, + "grad_norm": 0.510420024394989, + "learning_rate": 0.00018040976276600176, + "loss": 1.3154, + "step": 7690 + }, + { + "epoch": 0.8122362869198312, + "grad_norm": 0.5545004606246948, + "learning_rate": 0.00017792308815514854, + "loss": 1.3266, + "step": 7700 + }, + { + "epoch": 0.8132911392405063, + "grad_norm": 0.5477995872497559, + "learning_rate": 0.00017547068857756104, + "loss": 1.3209, + "step": 7710 + }, + { + "epoch": 0.8143459915611815, + "grad_norm": 0.6094242334365845, + "learning_rate": 0.00017305209160395547, + "loss": 1.3267, + "step": 7720 + }, + { + "epoch": 0.8154008438818565, + "grad_norm": 0.625672459602356, + "learning_rate": 0.00017066683131676825, + "loss": 1.3327, + "step": 7730 + }, + { + "epoch": 0.8164556962025317, + "grad_norm": 0.5112758874893188, + "learning_rate": 0.00016831444822040207, + "loss": 1.3328, + "step": 7740 + }, + { + "epoch": 0.8175105485232067, + "grad_norm": 0.5942822098731995, + "learning_rate": 0.00016599448915270845, + "loss": 1.3177, + "step": 7750 + }, + { + "epoch": 0.8185654008438819, + "grad_norm": 0.577560305595398, + "learning_rate": 0.000163706507197691, + "loss": 1.3172, + "step": 7760 + }, + { + "epoch": 0.819620253164557, + "grad_norm": 0.5374029278755188, + "learning_rate": 0.0001614500615994117, + "loss": 1.3273, + "step": 7770 + }, + { + "epoch": 0.820675105485232, + "grad_norm": 0.819732666015625, + "learning_rate": 0.00015922471767708377, + "loss": 1.3202, + "step": 7780 + }, + { + "epoch": 0.8217299578059072, + "grad_norm": 0.7055717706680298, + "learning_rate": 0.00015703004674133498, + "loss": 1.3377, + "step": 7790 + }, + { + "epoch": 0.8227848101265823, + "grad_norm": 0.5231856107711792, + "learning_rate": 0.00015486562601162512, + "loss": 1.3238, + "step": 7800 + }, + { + "epoch": 0.8238396624472574, + "grad_norm": 0.5319185853004456, + "learning_rate": 0.0001527310385348017, + "loss": 1.324, + "step": 7810 + }, + { + "epoch": 0.8248945147679325, + "grad_norm": 0.5366581678390503, + "learning_rate": 0.00015062587310477813, + "loss": 1.331, + "step": 7820 + }, + { + "epoch": 0.8259493670886076, + "grad_norm": 0.6566741466522217, + "learning_rate": 0.00014854972418331948, + "loss": 1.3203, + "step": 7830 + }, + { + "epoch": 0.8270042194092827, + "grad_norm": 0.48517322540283203, + "learning_rate": 0.00014650219182191934, + "loss": 1.3057, + "step": 7840 + }, + { + "epoch": 0.8280590717299579, + "grad_norm": 0.5799199938774109, + "learning_rate": 0.00014448288158475423, + "loss": 1.3156, + "step": 7850 + }, + { + "epoch": 0.8291139240506329, + "grad_norm": 0.5134887099266052, + "learning_rate": 0.0001424914044726995, + "loss": 1.3022, + "step": 7860 + }, + { + "epoch": 0.830168776371308, + "grad_norm": 0.5238925218582153, + "learning_rate": 0.0001405273768483926, + "loss": 1.3158, + "step": 7870 + }, + { + "epoch": 0.8312236286919831, + "grad_norm": 0.5995121598243713, + "learning_rate": 0.0001385904203623296, + "loss": 1.3373, + "step": 7880 + }, + { + "epoch": 0.8322784810126582, + "grad_norm": 0.5171149373054504, + "learning_rate": 0.00013668016187997964, + "loss": 1.3342, + "step": 7890 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5242458581924438, + "learning_rate": 0.0001347962334099052, + "loss": 1.3121, + "step": 7900 + }, + { + "epoch": 0.8343881856540084, + "grad_norm": 0.7036519646644592, + "learning_rate": 0.00013293827203287141, + "loss": 1.3237, + "step": 7910 + }, + { + "epoch": 0.8354430379746836, + "grad_norm": 0.6417196393013, + "learning_rate": 0.00013110591983193424, + "loss": 1.321, + "step": 7920 + }, + { + "epoch": 0.8364978902953587, + "grad_norm": 0.542813777923584, + "learning_rate": 0.00012929882382349103, + "loss": 1.3318, + "step": 7930 + }, + { + "epoch": 0.8375527426160337, + "grad_norm": 0.5496925115585327, + "learning_rate": 0.0001275166358892821, + "loss": 1.3223, + "step": 7940 + }, + { + "epoch": 0.8386075949367089, + "grad_norm": 0.5226621031761169, + "learning_rate": 0.00012575901270932944, + "loss": 1.3279, + "step": 7950 + }, + { + "epoch": 0.8396624472573839, + "grad_norm": 0.5171064138412476, + "learning_rate": 0.00012402561569579935, + "loss": 1.3107, + "step": 7960 + }, + { + "epoch": 0.8407172995780591, + "grad_norm": 0.6075687408447266, + "learning_rate": 0.00012231611092777743, + "loss": 1.314, + "step": 7970 + }, + { + "epoch": 0.8417721518987342, + "grad_norm": 0.5964035391807556, + "learning_rate": 0.00012063016908694192, + "loss": 1.312, + "step": 7980 + }, + { + "epoch": 0.8428270042194093, + "grad_norm": 0.5801118612289429, + "learning_rate": 0.00011896746539412405, + "loss": 1.3228, + "step": 7990 + }, + { + "epoch": 0.8438818565400844, + "grad_norm": 0.537604808807373, + "learning_rate": 0.00011732767954674264, + "loss": 1.3185, + "step": 8000 + }, + { + "epoch": 0.8449367088607594, + "grad_norm": 0.5109334588050842, + "learning_rate": 0.00011571049565710122, + "loss": 1.3243, + "step": 8010 + }, + { + "epoch": 0.8459915611814346, + "grad_norm": 0.5539976358413696, + "learning_rate": 0.00011411560219153552, + "loss": 1.3181, + "step": 8020 + }, + { + "epoch": 0.8470464135021097, + "grad_norm": 0.5412393808364868, + "learning_rate": 0.0001125426919103997, + "loss": 1.3147, + "step": 8030 + }, + { + "epoch": 0.8481012658227848, + "grad_norm": 0.5257119536399841, + "learning_rate": 0.00011099146180887992, + "loss": 1.3264, + "step": 8040 + }, + { + "epoch": 0.8491561181434599, + "grad_norm": 0.49860504269599915, + "learning_rate": 0.0001094616130586235, + "loss": 1.3087, + "step": 8050 + }, + { + "epoch": 0.8502109704641351, + "grad_norm": 0.5290922522544861, + "learning_rate": 0.00010795285095017282, + "loss": 1.3164, + "step": 8060 + }, + { + "epoch": 0.8512658227848101, + "grad_norm": 0.4936314821243286, + "learning_rate": 0.00010646488483619263, + "loss": 1.3215, + "step": 8070 + }, + { + "epoch": 0.8523206751054853, + "grad_norm": 0.5153605937957764, + "learning_rate": 0.00010499742807547978, + "loss": 1.3228, + "step": 8080 + }, + { + "epoch": 0.8533755274261603, + "grad_norm": 0.5684274435043335, + "learning_rate": 0.0001035501979777448, + "loss": 1.3125, + "step": 8090 + }, + { + "epoch": 0.8544303797468354, + "grad_norm": 0.5810254812240601, + "learning_rate": 0.00010212291574915464, + "loss": 1.3083, + "step": 8100 + }, + { + "epoch": 0.8554852320675106, + "grad_norm": 0.6040735244750977, + "learning_rate": 0.00010071530643862575, + "loss": 1.3258, + "step": 8110 + }, + { + "epoch": 0.8565400843881856, + "grad_norm": 0.5180647373199463, + "learning_rate": 9.932709888485788e-05, + "loss": 1.3139, + "step": 8120 + }, + { + "epoch": 0.8575949367088608, + "grad_norm": 0.5374089479446411, + "learning_rate": 9.79580256640974e-05, + "loss": 1.3137, + "step": 8130 + }, + { + "epoch": 0.8586497890295358, + "grad_norm": 0.5367872714996338, + "learning_rate": 9.660782303862107e-05, + "loss": 1.3136, + "step": 8140 + }, + { + "epoch": 0.859704641350211, + "grad_norm": 0.5319285988807678, + "learning_rate": 9.527623090592962e-05, + "loss": 1.3078, + "step": 8150 + }, + { + "epoch": 0.8607594936708861, + "grad_norm": 0.5125951766967773, + "learning_rate": 9.396299274864176e-05, + "loss": 1.3207, + "step": 8160 + }, + { + "epoch": 0.8618143459915611, + "grad_norm": 0.5163692235946655, + "learning_rate": 9.266785558507876e-05, + "loss": 1.31, + "step": 8170 + }, + { + "epoch": 0.8628691983122363, + "grad_norm": 0.5440879464149475, + "learning_rate": 9.139056992053017e-05, + "loss": 1.3186, + "step": 8180 + }, + { + "epoch": 0.8639240506329114, + "grad_norm": 0.7431283593177795, + "learning_rate": 9.01308896991912e-05, + "loss": 1.3136, + "step": 8190 + }, + { + "epoch": 0.8649789029535865, + "grad_norm": 0.515559196472168, + "learning_rate": 8.88885722567627e-05, + "loss": 1.3231, + "step": 8200 + }, + { + "epoch": 0.8660337552742616, + "grad_norm": 0.5317827463150024, + "learning_rate": 8.766337827370438e-05, + "loss": 1.3164, + "step": 8210 + }, + { + "epoch": 0.8670886075949367, + "grad_norm": 0.5329760313034058, + "learning_rate": 8.645507172913238e-05, + "loss": 1.3218, + "step": 8220 + }, + { + "epoch": 0.8681434599156118, + "grad_norm": 0.5092115998268127, + "learning_rate": 8.52634198553523e-05, + "loss": 1.3197, + "step": 8230 + }, + { + "epoch": 0.869198312236287, + "grad_norm": 0.4676823914051056, + "learning_rate": 8.408819309301891e-05, + "loss": 1.3149, + "step": 8240 + }, + { + "epoch": 0.870253164556962, + "grad_norm": 0.5172714591026306, + "learning_rate": 8.292916504691398e-05, + "loss": 1.3308, + "step": 8250 + }, + { + "epoch": 0.8713080168776371, + "grad_norm": 0.5405924320220947, + "learning_rate": 8.178611244233354e-05, + "loss": 1.3214, + "step": 8260 + }, + { + "epoch": 0.8723628691983122, + "grad_norm": 0.5424674153327942, + "learning_rate": 8.065881508207636e-05, + "loss": 1.3174, + "step": 8270 + }, + { + "epoch": 0.8734177215189873, + "grad_norm": 0.4920295774936676, + "learning_rate": 7.954705580402525e-05, + "loss": 1.3005, + "step": 8280 + }, + { + "epoch": 0.8744725738396625, + "grad_norm": 0.5501099228858948, + "learning_rate": 7.845062043931299e-05, + "loss": 1.2997, + "step": 8290 + }, + { + "epoch": 0.8755274261603375, + "grad_norm": 0.5065336227416992, + "learning_rate": 7.736929777106499e-05, + "loss": 1.3278, + "step": 8300 + }, + { + "epoch": 0.8765822784810127, + "grad_norm": 0.5615873336791992, + "learning_rate": 7.630287949371051e-05, + "loss": 1.305, + "step": 8310 + }, + { + "epoch": 0.8776371308016878, + "grad_norm": 0.6621586680412292, + "learning_rate": 7.525116017285479e-05, + "loss": 1.3232, + "step": 8320 + }, + { + "epoch": 0.8786919831223629, + "grad_norm": 0.5354205369949341, + "learning_rate": 7.421393720570416e-05, + "loss": 1.3175, + "step": 8330 + }, + { + "epoch": 0.879746835443038, + "grad_norm": 0.5024200081825256, + "learning_rate": 7.319101078203692e-05, + "loss": 1.3053, + "step": 8340 + }, + { + "epoch": 0.880801687763713, + "grad_norm": 0.48704084753990173, + "learning_rate": 7.218218384571176e-05, + "loss": 1.3092, + "step": 8350 + }, + { + "epoch": 0.8818565400843882, + "grad_norm": 0.5138947367668152, + "learning_rate": 7.118726205670702e-05, + "loss": 1.3054, + "step": 8360 + }, + { + "epoch": 0.8829113924050633, + "grad_norm": 0.5180509686470032, + "learning_rate": 7.020605375368314e-05, + "loss": 1.3132, + "step": 8370 + }, + { + "epoch": 0.8839662447257384, + "grad_norm": 0.5213926434516907, + "learning_rate": 6.923836991706108e-05, + "loss": 1.3206, + "step": 8380 + }, + { + "epoch": 0.8850210970464135, + "grad_norm": 0.5391127467155457, + "learning_rate": 6.828402413260965e-05, + "loss": 1.3082, + "step": 8390 + }, + { + "epoch": 0.8860759493670886, + "grad_norm": 0.4997680187225342, + "learning_rate": 6.73428325555347e-05, + "loss": 1.3098, + "step": 8400 + }, + { + "epoch": 0.8871308016877637, + "grad_norm": 0.5638720989227295, + "learning_rate": 6.641461387506347e-05, + "loss": 1.3295, + "step": 8410 + }, + { + "epoch": 0.8881856540084389, + "grad_norm": 0.4928487241268158, + "learning_rate": 6.549918927951679e-05, + "loss": 1.3171, + "step": 8420 + }, + { + "epoch": 0.8892405063291139, + "grad_norm": 0.5617793798446655, + "learning_rate": 6.459638242186298e-05, + "loss": 1.3111, + "step": 8430 + }, + { + "epoch": 0.890295358649789, + "grad_norm": 0.5282180309295654, + "learning_rate": 6.370601938574637e-05, + "loss": 1.3056, + "step": 8440 + }, + { + "epoch": 0.8913502109704642, + "grad_norm": 0.5836192965507507, + "learning_rate": 6.282792865198421e-05, + "loss": 1.3137, + "step": 8450 + }, + { + "epoch": 0.8924050632911392, + "grad_norm": 0.5485517978668213, + "learning_rate": 6.196194106552512e-05, + "loss": 1.3098, + "step": 8460 + }, + { + "epoch": 0.8934599156118144, + "grad_norm": 0.5073134303092957, + "learning_rate": 6.110788980286329e-05, + "loss": 1.312, + "step": 8470 + }, + { + "epoch": 0.8945147679324894, + "grad_norm": 0.5078051090240479, + "learning_rate": 6.026561033990159e-05, + "loss": 1.3065, + "step": 8480 + }, + { + "epoch": 0.8955696202531646, + "grad_norm": 0.5380173325538635, + "learning_rate": 5.943494042025771e-05, + "loss": 1.3116, + "step": 8490 + }, + { + "epoch": 0.8966244725738397, + "grad_norm": 0.5105624794960022, + "learning_rate": 5.8615720024007174e-05, + "loss": 1.3026, + "step": 8500 + }, + { + "epoch": 0.8976793248945147, + "grad_norm": 0.5025993585586548, + "learning_rate": 5.780779133685717e-05, + "loss": 1.3118, + "step": 8510 + }, + { + "epoch": 0.8987341772151899, + "grad_norm": 0.5540491342544556, + "learning_rate": 5.701099871974525e-05, + "loss": 1.3095, + "step": 8520 + }, + { + "epoch": 0.8997890295358649, + "grad_norm": 0.5202192068099976, + "learning_rate": 5.6225188678857095e-05, + "loss": 1.3233, + "step": 8530 + }, + { + "epoch": 0.9008438818565401, + "grad_norm": 0.5068135857582092, + "learning_rate": 5.545020983605749e-05, + "loss": 1.3204, + "step": 8540 + }, + { + "epoch": 0.9018987341772152, + "grad_norm": 0.5442157983779907, + "learning_rate": 5.4685912899728965e-05, + "loss": 1.3099, + "step": 8550 + }, + { + "epoch": 0.9029535864978903, + "grad_norm": 0.5056540369987488, + "learning_rate": 5.39321506360123e-05, + "loss": 1.3018, + "step": 8560 + }, + { + "epoch": 0.9040084388185654, + "grad_norm": 0.5659096240997314, + "learning_rate": 5.318877784044342e-05, + "loss": 1.3225, + "step": 8570 + }, + { + "epoch": 0.9050632911392406, + "grad_norm": 0.5110923051834106, + "learning_rate": 5.245565130998124e-05, + "loss": 1.3119, + "step": 8580 + }, + { + "epoch": 0.9061181434599156, + "grad_norm": 0.6200442314147949, + "learning_rate": 5.173262981542119e-05, + "loss": 1.3166, + "step": 8590 + }, + { + "epoch": 0.9071729957805907, + "grad_norm": 0.6147569417953491, + "learning_rate": 5.101957407418877e-05, + "loss": 1.3073, + "step": 8600 + }, + { + "epoch": 0.9082278481012658, + "grad_norm": 0.6131770014762878, + "learning_rate": 5.0316346723508287e-05, + "loss": 1.3098, + "step": 8610 + }, + { + "epoch": 0.9092827004219409, + "grad_norm": 0.5320587158203125, + "learning_rate": 4.962281229394129e-05, + "loss": 1.3074, + "step": 8620 + }, + { + "epoch": 0.9103375527426161, + "grad_norm": 0.5317681431770325, + "learning_rate": 4.893883718328984e-05, + "loss": 1.3189, + "step": 8630 + }, + { + "epoch": 0.9113924050632911, + "grad_norm": 0.6486487984657288, + "learning_rate": 4.8264289630859386e-05, + "loss": 1.2977, + "step": 8640 + }, + { + "epoch": 0.9124472573839663, + "grad_norm": 0.5020943284034729, + "learning_rate": 4.759903969207646e-05, + "loss": 1.3015, + "step": 8650 + }, + { + "epoch": 0.9135021097046413, + "grad_norm": 0.6304572820663452, + "learning_rate": 4.694295921345623e-05, + "loss": 1.3028, + "step": 8660 + }, + { + "epoch": 0.9145569620253164, + "grad_norm": 0.5135694742202759, + "learning_rate": 4.629592180791501e-05, + "loss": 1.31, + "step": 8670 + }, + { + "epoch": 0.9156118143459916, + "grad_norm": 0.5027759671211243, + "learning_rate": 4.565780283042316e-05, + "loss": 1.3197, + "step": 8680 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 0.4994494915008545, + "learning_rate": 4.502847935399348e-05, + "loss": 1.3134, + "step": 8690 + }, + { + "epoch": 0.9177215189873418, + "grad_norm": 0.5003330707550049, + "learning_rate": 4.440783014600059e-05, + "loss": 1.3129, + "step": 8700 + }, + { + "epoch": 0.9187763713080169, + "grad_norm": 0.5652446150779724, + "learning_rate": 4.3795735644826776e-05, + "loss": 1.3098, + "step": 8710 + }, + { + "epoch": 0.919831223628692, + "grad_norm": 0.49004751443862915, + "learning_rate": 4.319207793682965e-05, + "loss": 1.3038, + "step": 8720 + }, + { + "epoch": 0.9208860759493671, + "grad_norm": 0.5124107599258423, + "learning_rate": 4.259674073362732e-05, + "loss": 1.3173, + "step": 8730 + }, + { + "epoch": 0.9219409282700421, + "grad_norm": 0.5119485259056091, + "learning_rate": 4.200960934969664e-05, + "loss": 1.3066, + "step": 8740 + }, + { + "epoch": 0.9229957805907173, + "grad_norm": 0.49826186895370483, + "learning_rate": 4.143057068028024e-05, + "loss": 1.3031, + "step": 8750 + }, + { + "epoch": 0.9240506329113924, + "grad_norm": 0.5968794822692871, + "learning_rate": 4.0859513179598096e-05, + "loss": 1.2918, + "step": 8760 + }, + { + "epoch": 0.9251054852320675, + "grad_norm": 0.5147610306739807, + "learning_rate": 4.02963268393593e-05, + "loss": 1.3029, + "step": 8770 + }, + { + "epoch": 0.9261603375527426, + "grad_norm": 0.4845047891139984, + "learning_rate": 3.974090316757029e-05, + "loss": 1.3088, + "step": 8780 + }, + { + "epoch": 0.9272151898734177, + "grad_norm": 0.5033217668533325, + "learning_rate": 3.919313516763478e-05, + "loss": 1.3042, + "step": 8790 + }, + { + "epoch": 0.9282700421940928, + "grad_norm": 0.4895450472831726, + "learning_rate": 3.8652917317742106e-05, + "loss": 1.3008, + "step": 8800 + }, + { + "epoch": 0.929324894514768, + "grad_norm": 0.5039649605751038, + "learning_rate": 3.812014555053955e-05, + "loss": 1.3228, + "step": 8810 + }, + { + "epoch": 0.930379746835443, + "grad_norm": 0.49170997738838196, + "learning_rate": 3.759471723308477e-05, + "loss": 1.3063, + "step": 8820 + }, + { + "epoch": 0.9314345991561181, + "grad_norm": 0.492278128862381, + "learning_rate": 3.707653114707471e-05, + "loss": 1.3226, + "step": 8830 + }, + { + "epoch": 0.9324894514767933, + "grad_norm": 0.5184905529022217, + "learning_rate": 3.6565487469346904e-05, + "loss": 1.3087, + "step": 8840 + }, + { + "epoch": 0.9335443037974683, + "grad_norm": 0.5590412616729736, + "learning_rate": 3.606148775264958e-05, + "loss": 1.3074, + "step": 8850 + }, + { + "epoch": 0.9345991561181435, + "grad_norm": 0.5469756722450256, + "learning_rate": 3.5564434906676834e-05, + "loss": 1.3079, + "step": 8860 + }, + { + "epoch": 0.9356540084388185, + "grad_norm": 0.5224888324737549, + "learning_rate": 3.507423317936521e-05, + "loss": 1.3099, + "step": 8870 + }, + { + "epoch": 0.9367088607594937, + "grad_norm": 0.5415185689926147, + "learning_rate": 3.4590788138448004e-05, + "loss": 1.3159, + "step": 8880 + }, + { + "epoch": 0.9377637130801688, + "grad_norm": 0.48988479375839233, + "learning_rate": 3.411400665326393e-05, + "loss": 1.3106, + "step": 8890 + }, + { + "epoch": 0.9388185654008439, + "grad_norm": 0.4790487587451935, + "learning_rate": 3.364379687681642e-05, + "loss": 1.3033, + "step": 8900 + }, + { + "epoch": 0.939873417721519, + "grad_norm": 0.5707380175590515, + "learning_rate": 3.31800682280803e-05, + "loss": 1.3029, + "step": 8910 + }, + { + "epoch": 0.9409282700421941, + "grad_norm": 0.49728935956954956, + "learning_rate": 3.272273137455225e-05, + "loss": 1.3086, + "step": 8920 + }, + { + "epoch": 0.9419831223628692, + "grad_norm": 0.5275545120239258, + "learning_rate": 3.227169821504187e-05, + "loss": 1.3062, + "step": 8930 + }, + { + "epoch": 0.9430379746835443, + "grad_norm": 0.4772747755050659, + "learning_rate": 3.182688186269985e-05, + "loss": 1.3091, + "step": 8940 + }, + { + "epoch": 0.9440928270042194, + "grad_norm": 0.49634435772895813, + "learning_rate": 3.138819662828018e-05, + "loss": 1.3075, + "step": 8950 + }, + { + "epoch": 0.9451476793248945, + "grad_norm": 0.5057269930839539, + "learning_rate": 3.095555800363297e-05, + "loss": 1.3098, + "step": 8960 + }, + { + "epoch": 0.9462025316455697, + "grad_norm": 0.5679986476898193, + "learning_rate": 3.052888264542484e-05, + "loss": 1.3086, + "step": 8970 + }, + { + "epoch": 0.9472573839662447, + "grad_norm": 0.5982249975204468, + "learning_rate": 3.0108088359083675e-05, + "loss": 1.3059, + "step": 8980 + }, + { + "epoch": 0.9483122362869199, + "grad_norm": 0.5085403919219971, + "learning_rate": 2.9693094082964775e-05, + "loss": 1.3113, + "step": 8990 + }, + { + "epoch": 0.9493670886075949, + "grad_norm": 0.5180398225784302, + "learning_rate": 2.928381987273507e-05, + "loss": 1.3058, + "step": 9000 + }, + { + "epoch": 0.95042194092827, + "grad_norm": 0.5681703090667725, + "learning_rate": 2.8880186885972716e-05, + "loss": 1.3013, + "step": 9010 + }, + { + "epoch": 0.9514767932489452, + "grad_norm": 0.5079729557037354, + "learning_rate": 2.8482117366978935e-05, + "loss": 1.3023, + "step": 9020 + }, + { + "epoch": 0.9525316455696202, + "grad_norm": 0.5362443327903748, + "learning_rate": 2.808953463179918e-05, + "loss": 1.324, + "step": 9030 + }, + { + "epoch": 0.9535864978902954, + "grad_norm": 0.5186260938644409, + "learning_rate": 2.770236305345076e-05, + "loss": 1.2989, + "step": 9040 + }, + { + "epoch": 0.9546413502109705, + "grad_norm": 0.504482090473175, + "learning_rate": 2.732052804735409e-05, + "loss": 1.3103, + "step": 9050 + }, + { + "epoch": 0.9556962025316456, + "grad_norm": 0.527151346206665, + "learning_rate": 2.6943956056964773e-05, + "loss": 1.3105, + "step": 9060 + }, + { + "epoch": 0.9567510548523207, + "grad_norm": 0.5054603219032288, + "learning_rate": 2.6572574539603643e-05, + "loss": 1.3101, + "step": 9070 + }, + { + "epoch": 0.9578059071729957, + "grad_norm": 0.5028514266014099, + "learning_rate": 2.6206311952482224e-05, + "loss": 1.3111, + "step": 9080 + }, + { + "epoch": 0.9588607594936709, + "grad_norm": 0.5420445799827576, + "learning_rate": 2.584509773892073e-05, + "loss": 1.3119, + "step": 9090 + }, + { + "epoch": 0.959915611814346, + "grad_norm": 0.49170857667922974, + "learning_rate": 2.5488862314756066e-05, + "loss": 1.2921, + "step": 9100 + }, + { + "epoch": 0.9609704641350211, + "grad_norm": 0.49011969566345215, + "learning_rate": 2.513753705493713e-05, + "loss": 1.304, + "step": 9110 + }, + { + "epoch": 0.9620253164556962, + "grad_norm": 0.5170599818229675, + "learning_rate": 2.4791054280304972e-05, + "loss": 1.3144, + "step": 9120 + }, + { + "epoch": 0.9630801687763713, + "grad_norm": 0.5054581761360168, + "learning_rate": 2.4449347244555043e-05, + "loss": 1.2991, + "step": 9130 + }, + { + "epoch": 0.9641350210970464, + "grad_norm": 0.6427635550498962, + "learning_rate": 2.4112350121379255e-05, + "loss": 1.3186, + "step": 9140 + }, + { + "epoch": 0.9651898734177216, + "grad_norm": 0.5237611532211304, + "learning_rate": 2.3779997991785207e-05, + "loss": 1.3053, + "step": 9150 + }, + { + "epoch": 0.9662447257383966, + "grad_norm": 0.5023574829101562, + "learning_rate": 2.3452226831590232e-05, + "loss": 1.3129, + "step": 9160 + }, + { + "epoch": 0.9672995780590717, + "grad_norm": 0.5252450704574585, + "learning_rate": 2.3128973499087785e-05, + "loss": 1.3111, + "step": 9170 + }, + { + "epoch": 0.9683544303797469, + "grad_norm": 0.49019819498062134, + "learning_rate": 2.2810175722883866e-05, + "loss": 1.3064, + "step": 9180 + }, + { + "epoch": 0.9694092827004219, + "grad_norm": 0.48925381898880005, + "learning_rate": 2.2495772089901067e-05, + "loss": 1.303, + "step": 9190 + }, + { + "epoch": 0.9704641350210971, + "grad_norm": 0.5147925019264221, + "learning_rate": 2.218570203354799e-05, + "loss": 1.3047, + "step": 9200 + }, + { + "epoch": 0.9715189873417721, + "grad_norm": 0.5135880708694458, + "learning_rate": 2.187990582205175e-05, + "loss": 1.3082, + "step": 9210 + }, + { + "epoch": 0.9725738396624473, + "grad_norm": 0.5313271284103394, + "learning_rate": 2.157832454695122e-05, + "loss": 1.3035, + "step": 9220 + }, + { + "epoch": 0.9736286919831224, + "grad_norm": 0.4924921691417694, + "learning_rate": 2.1280900111748943e-05, + "loss": 1.2948, + "step": 9230 + }, + { + "epoch": 0.9746835443037974, + "grad_norm": 0.5299881100654602, + "learning_rate": 2.0987575220719476e-05, + "loss": 1.2911, + "step": 9240 + }, + { + "epoch": 0.9757383966244726, + "grad_norm": 0.5006606578826904, + "learning_rate": 2.069829336787193e-05, + "loss": 1.3035, + "step": 9250 + }, + { + "epoch": 0.9767932489451476, + "grad_norm": 0.5131909847259521, + "learning_rate": 2.0412998826064695e-05, + "loss": 1.2981, + "step": 9260 + }, + { + "epoch": 0.9778481012658228, + "grad_norm": 0.4876004457473755, + "learning_rate": 2.0131636636270178e-05, + "loss": 1.3099, + "step": 9270 + }, + { + "epoch": 0.9789029535864979, + "grad_norm": 0.49453970789909363, + "learning_rate": 1.9854152596987523e-05, + "loss": 1.2969, + "step": 9280 + }, + { + "epoch": 0.979957805907173, + "grad_norm": 0.49736711382865906, + "learning_rate": 1.9580493253801253e-05, + "loss": 1.3048, + "step": 9290 + }, + { + "epoch": 0.9810126582278481, + "grad_norm": 0.5143839716911316, + "learning_rate": 1.9310605889083842e-05, + "loss": 1.3018, + "step": 9300 + }, + { + "epoch": 0.9820675105485233, + "grad_norm": 0.5679917335510254, + "learning_rate": 1.904443851184018e-05, + "loss": 1.305, + "step": 9310 + }, + { + "epoch": 0.9831223628691983, + "grad_norm": 0.5228298306465149, + "learning_rate": 1.87819398476921e-05, + "loss": 1.3001, + "step": 9320 + }, + { + "epoch": 0.9841772151898734, + "grad_norm": 0.4962661862373352, + "learning_rate": 1.8523059329000848e-05, + "loss": 1.3112, + "step": 9330 + }, + { + "epoch": 0.9852320675105485, + "grad_norm": 0.5303143858909607, + "learning_rate": 1.826774708512579e-05, + "loss": 1.3096, + "step": 9340 + }, + { + "epoch": 0.9862869198312236, + "grad_norm": 0.49372726678848267, + "learning_rate": 1.8015953932817347e-05, + "loss": 1.3194, + "step": 9350 + }, + { + "epoch": 0.9873417721518988, + "grad_norm": 0.5093536376953125, + "learning_rate": 1.7767631366742332e-05, + "loss": 1.3108, + "step": 9360 + }, + { + "epoch": 0.9883966244725738, + "grad_norm": 0.5410741567611694, + "learning_rate": 1.7522731550139926e-05, + "loss": 1.3102, + "step": 9370 + }, + { + "epoch": 0.989451476793249, + "grad_norm": 0.5336592793464661, + "learning_rate": 1.728120730560641e-05, + "loss": 1.2959, + "step": 9380 + }, + { + "epoch": 0.990506329113924, + "grad_norm": 0.5561039447784424, + "learning_rate": 1.704301210600693e-05, + "loss": 1.3, + "step": 9390 + }, + { + "epoch": 0.9915611814345991, + "grad_norm": 0.5306506156921387, + "learning_rate": 1.6808100065512536e-05, + "loss": 1.3138, + "step": 9400 + }, + { + "epoch": 0.9926160337552743, + "grad_norm": 0.5216425061225891, + "learning_rate": 1.657642593076074e-05, + "loss": 1.2874, + "step": 9410 + }, + { + "epoch": 0.9936708860759493, + "grad_norm": 0.4980105459690094, + "learning_rate": 1.634794507213793e-05, + "loss": 1.3082, + "step": 9420 + }, + { + "epoch": 0.9947257383966245, + "grad_norm": 0.4847981035709381, + "learning_rate": 1.6122613475181976e-05, + "loss": 1.3071, + "step": 9430 + }, + { + "epoch": 0.9957805907172996, + "grad_norm": 0.49484384059906006, + "learning_rate": 1.590038773210323e-05, + "loss": 1.3052, + "step": 9440 + }, + { + "epoch": 0.9968354430379747, + "grad_norm": 0.5197098851203918, + "learning_rate": 1.568122503342252e-05, + "loss": 1.315, + "step": 9450 + }, + { + "epoch": 0.9978902953586498, + "grad_norm": 0.5191103219985962, + "learning_rate": 1.5465083159724344e-05, + "loss": 1.2987, + "step": 9460 + }, + { + "epoch": 0.9989451476793249, + "grad_norm": 0.49531593918800354, + "learning_rate": 1.5251920473523708e-05, + "loss": 1.3093, + "step": 9470 + }, + { + "epoch": 1.0, + "grad_norm": 1.455296277999878, + "learning_rate": 1.5041695911245136e-05, + "loss": 1.2996, + "step": 9480 + } + ], + "logging_steps": 10, + "max_steps": 9480, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.6859976887218176e+16, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +} diff --git a/saves-starcoder2/checkpoint-9480/training_args.bin b/saves-starcoder2/checkpoint-9480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..76a128cdc468799dea7cdffb174fa46484457deb --- /dev/null +++ b/saves-starcoder2/checkpoint-9480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e79df87a2f3b8dc30a7a89c42c51920d32761343252708fb5dae391c7bac69a +size 5176 diff --git a/saves-starcoder2/config.json b/saves-starcoder2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..89b9935265177c72718344a457fefeead4bef178 --- /dev/null +++ b/saves-starcoder2/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "Starcoder2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 50256, + "embedding_dropout": 0.0, + "eos_token_id": 50256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_size": 256, + "initializer_range": 0.018042, + "intermediate_size": 1024, + "max_position_embeddings": 4096, + "model_type": "starcoder2", + "norm_epsilon": 1e-05, + "num_attention_heads": 8, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_dropout": 0.0, + "rope_theta": 10000.0, + "sliding_window": null, + "torch_dtype": "float32", + "transformers_version": "4.42.4", + "use_bias": true, + "use_cache": true, + "vocab_size": 2000 +} diff --git a/saves-starcoder2/generation_config.json b/saves-starcoder2/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb2eba6018c75d5bca061373b0ddaa2abf0a1f68 --- /dev/null +++ b/saves-starcoder2/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "transformers_version": "4.42.4" +} diff --git a/saves-starcoder2/model.safetensors b/saves-starcoder2/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0e41a67626d9677746201bbe3362bceb54d2410d --- /dev/null +++ b/saves-starcoder2/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7806c14b37217c9069cfbd0b643396fd0605501bbc80add41003c3849b8ea504 +size 7845408 diff --git a/saves-starcoder2/result.log b/saves-starcoder2/result.log new file mode 100644 index 0000000000000000000000000000000000000000..60428aa87b475bc7b1e18eb03c298df18533365b --- /dev/null +++ b/saves-starcoder2/result.log @@ -0,0 +1 @@ +{'train_runtime': 3151.4439, 'train_samples_per_second': 3080.05, 'train_steps_per_second': 3.008, 'train_loss': 1.6124926334694971, 'epoch': 1.0} \ No newline at end of file diff --git a/saves-starcoder2/special_tokens_map.json b/saves-starcoder2/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..f501d252ea85a678bc1b158a986c26c720ab3a1d --- /dev/null +++ b/saves-starcoder2/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/saves-starcoder2/tokenizer.json b/saves-starcoder2/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5346396e758a5c1ff62440b455262589af5c8660 --- /dev/null +++ b/saves-starcoder2/tokenizer.json @@ -0,0 +1,3893 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|endoftext|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|im_start|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|im_end|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFC" + }, + "pre_tokenizer": { + "type": "Sequence", + "pretokenizers": [ + { + "type": "Split", + "pattern": { + "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" + }, + "behavior": "Isolated", + "invert": false + }, + { + "type": "ByteLevel", + "add_prefix_space": false, + "trim_offsets": true, + "use_regex": false + } + ] + }, + "post_processor": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": false, + "use_regex": true + }, + "decoder": { + "type": "ByteLevel", + "add_prefix_space": true, + "trim_offsets": true, + "use_regex": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": null, + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|endoftext|>": 0, + "<|im_start|>": 1, + "<|im_end|>": 2, + "!": 3, + "\"": 4, + "#": 5, + "$": 6, + "%": 7, + "&": 8, + "'": 9, + "(": 10, + ")": 11, + "*": 12, + "+": 13, + ",": 14, + "-": 15, + ".": 16, + "/": 17, + "0": 18, + "1": 19, + "2": 20, + "3": 21, + "4": 22, + "5": 23, + "6": 24, + "7": 25, + "8": 26, + "9": 27, + ":": 28, + ";": 29, + "<": 30, + "=": 31, + ">": 32, + "?": 33, + "@": 34, + "A": 35, + "B": 36, + "C": 37, + "D": 38, + "E": 39, + "F": 40, + "G": 41, + "H": 42, + "I": 43, + "J": 44, + "K": 45, + "L": 46, + "M": 47, + "N": 48, + "O": 49, + "P": 50, + "Q": 51, + "R": 52, + "S": 53, + "T": 54, + "U": 55, + "V": 56, + "W": 57, + "X": 58, + "Y": 59, + "Z": 60, + "[": 61, + "\\": 62, + "]": 63, + "^": 64, + "_": 65, + "`": 66, + "a": 67, + "b": 68, + "c": 69, + "d": 70, + "e": 71, + "f": 72, + "g": 73, + "h": 74, + "i": 75, + "j": 76, + "k": 77, + "l": 78, + "m": 79, + "n": 80, + "o": 81, + "p": 82, + "q": 83, + "r": 84, + "s": 85, + "t": 86, + "u": 87, + "v": 88, + "w": 89, + "x": 90, + "y": 91, + "z": 92, + "{": 93, + "|": 94, + "}": 95, + "~": 96, + "¡": 97, + "¢": 98, + "£": 99, + "¤": 100, + "¥": 101, + "¦": 102, + "§": 103, + "¨": 104, + "©": 105, + "ª": 106, + "«": 107, + "¬": 108, + "®": 109, + "¯": 110, + "°": 111, + "±": 112, + "²": 113, + "³": 114, + "´": 115, + "µ": 116, + "¶": 117, + "·": 118, + "¸": 119, + "¹": 120, + "º": 121, + "»": 122, + "¼": 123, + "½": 124, + "¾": 125, + "¿": 126, + "Â": 127, + "Ã": 128, + "Ä": 129, + "Å": 130, + "Æ": 131, + "Ç": 132, + "É": 133, + "Ë": 134, + "Î": 135, + "Ï": 136, + "Ð": 137, + "Ñ": 138, + "â": 139, + "ã": 140, + "ä": 141, + "å": 142, + "æ": 143, + "ç": 144, + "è": 145, + "é": 146, + "î": 147, + "ï": 148, + "ð": 149, + "ă": 150, + "ć": 151, + "ĉ": 152, + "Ċ": 153, + "Ġ": 154, + "Ģ": 155, + "ģ": 156, + "Ĥ": 157, + "ĥ": 158, + "Ħ": 159, + "ħ": 160, + "Ĩ": 161, + "ĩ": 162, + "Ī": 163, + "ī": 164, + "Ĭ": 165, + "ĭ": 166, + "Į": 167, + "į": 168, + "İ": 169, + "ı": 170, + "IJ": 171, + "ij": 172, + "Ĵ": 173, + "ĵ": 174, + "Ķ": 175, + "ķ": 176, + "ĸ": 177, + "Ĺ": 178, + "ĺ": 179, + "Ļ": 180, + "ļ": 181, + "Ľ": 182, + "ľ": 183, + "Ŀ": 184, + "ŀ": 185, + "Ł": 186, + "ł": 187, + "Ń": 188, + "Ġt": 189, + "ä¸": 190, + "Ġa": 191, + "he": 192, + "in": 193, + "re": 194, + "çļ": 195, + "çļĦ": 196, + "on": 197, + "Ġthe": 198, + "er": 199, + "äº": 200, + "Ġs": 201, + "at": 202, + "Ġw": 203, + "Ġo": 204, + "en": 205, + "it": 206, + "æľ": 207, + "Ġc": 208, + "åı": 209, + "or": 210, + "es": 211, + "is": 212, + "an": 213, + "ä»": 214, + "åħ": 215, + "Ġb": 216, + "ing": 217, + "Ġp": 218, + "Ġf": 219, + "Ġan": 220, + "ou": 221, + "ed": 222, + "Ġm": 223, + "ar": 224, + "al": 225, + "Ġto": 226, + "è¿": 227, + "å¤": 228, + "Ġof": 229, + "å®": 230, + "Ġin": 231, + "åĪ": 232, + "Ġd": 233, + "ä½": 234, + "Ġand": 235, + "æĺ": 236, + "Ġth": 237, + "ic": 238, + "åIJ": 239, + "le": 240, + "åľ": 241, + "ro": 242, + "ion": 243, + "çĶ": 244, + "Ġh": 245, + "as": 246, + "æĪ": 247, + "åĬ": 248, + "ä¸Ģ": 249, + "ä¹": 250, + ".Ċ": 251, + "ç»": 252, + ",å": 253, + "æĸ": 254, + "æĹ": 255, + "è¯": 256, + "æĺ¯": 257, + "è®": 258, + "Ġl": 259, + "ent": 260, + "om": 261, + "åĽ": 262, + "ä¼": 263, + "Ġe": 264, + "st": 265, + "Ġre": 266, + "ve": 267, + "Ġn": 268, + "å°": 269, + "ll": 270, + "ĠI": 271, + "å¹": 272, + "ly": 273, + "åľ¨": 274, + "æĿ": 275, + "ct": 276, + "åį": 277, + "人": 278, + "æľī": 279, + "Ġbe": 280, + "æī": 281, + "Ġg": 282, + "ä¸į": 283, + "ut": 284, + "å¼": 285, + "Ġis": 286, + "å·": 287, + "å¸": 288, + "åŃ": 289, + "è¡": 290, + "Ġthat": 291, + "âĢ": 292, + "et": 293, + "éĩ": 294, + "ĠT": 295, + "åº": 296, + "äºĨ": 297, + "ot": 298, + "id": 299, + "se": 300, + "å¾": 301, + "ä¸Ń": 302, + "Ġon": 303, + "åĩ": 304, + "ĥ½": 305, + "am": 306, + "ch": 307, + "æķ": 308, + "ĠA": 309, + "åĮ": 310, + "éĢ": 311, + "ce": 312, + "åĨ": 313, + "Ġfor": 314, + "ĠS": 315, + "im": 316, + "ow": 317, + "Ġy": 318, + "èĢ": 319, + "å¯": 320, + "ĠC": 321, + "Ġit": 322, + "Ġu": 323, + ",æ": 324, + "è§": 325, + "ver": 326, + "ig": 327, + "个": 328, + "为": 329, + "çĽ": 330, + "èµ": 331, + "ation": 332, + "ay": 333, + "è´": 334, + "åij": 335, + "ld": 336, + "åĴ": 337, + "çİ": 338, + "ad": 339, + "大": 340, + "éĻ": 341, + "Ġpro": 342, + "åĴĮ": 343, + "å¥": 344, + "ĠM": 345, + "Ġst": 346, + "以": 347, + "ur": 348, + "æĢ": 349, + "å¹´": 350, + "æĬ": 351, + "å¿": 352, + "å±": 353, + "è¿Ļ": 354, + "ä¸Ĭ": 355, + "æł": 356, + "æ³": 357, + "åİ": 358, + "æĪij": 359, + "ith": 360, + "èĩ": 361, + "ç§": 362, + "¦ģ": 363, + "æŶ": 364, + "éĹ": 365, + "her": 366, + "çī": 367, + "ers": 368, + "ĠP": 369, + "Ġyou": 370, + "è¦ģ": 371, + "Ġwh": 372, + "æĶ": 373, + "ol": 374, + "èĥ½": 375, + "ä¼ļ": 376, + "Ġas": 377, + "ag": 378, + "çĶ¨": 379, + "çº": 380, + "è¡Į": 381, + "æŀ": 382, + "il": 383, + "å½": 384, + "åı¯": 385, + "æ°": 386, + "ä¿": 387, + "ir": 388, + "åıij": 389, + "Ġwith": 390, + "çIJ": 391, + "æŃ": 392, + "od": 393, + "åĪ°": 394, + "çľ": 395, + "åĽ½": 396, + "if": 397, + "op": 398, + "em": 399, + "éĿ": 400, + "Ġwe": 401, + "æİ": 402, + "æĿ¥": 403, + "è½": 404, + "ĠB": 405, + "ul": 406, + "ĠĠ": 407, + "ç«": 408, + "ri": 409, + "Ġcon": 410, + "æĥ": 411, + "ä¸ļ": 412, + "åĩº": 413, + "Ġcom": 414, + "é¢": 415, + "ä½ľ": 416, + "çĶŁ": 417, + "ĠThe": 418, + "æµ": 419, + "äºİ": 420, + "ke": 421, + "ra": 422, + "ist": 423, + "çŃ": 424, + "ill": 425, + "Ġare": 426, + "Ġ(": 427, + "æĭ": 428, + "对": 429, + "nd": 430, + "æ²": 431, + "ore": 432, + "Ġhe": 433, + "ç¬": 434, + "æĪIJ": 435, + "ter": 436, + "è¾": 437, + "æ¬": 438, + "Ġex": 439, + "Ġde": 440, + "'s": 441, + "âĢĻ": 442, + "th": 443, + "们": 444, + "ate": 445, + "æĸ¹": 446, + "ĠW": 447, + "ç½": 448, + "å¤ļ": 449, + "ä¾": 450, + "Ġat": 451, + "è¿ĩ": 452, + "æĮ": 453, + "Ġwas": 454, + "ess": 455, + "çİ°": 456, + "æ³ķ": 457, + "ew": 458, + "est": 459, + "èĩª": 460, + "pp": 461, + "ĠH": 462, + "åĵ": 463, + "æĦ": 464, + "åī": 465, + "åѦ": 466, + "ä¸ĭ": 467, + "åIJİ": 468, + "ab": 469, + "Ġha": 470, + "å·¥": 471, + "çIJĨ": 472, + "and": 473, + "¦Ĥ": 474, + "Ġor": 475, + "åħ¬": 476, + "ity": 477, + "æ¯": 478, + "ä¹Ł": 479, + "ment": 480, + "å®ļ": 481, + "éĺ": 482, + "ĠD": 483, + "åĪĨ": 484, + "æľ¬": 485, + "åĬ¨": 486, + "æĸ°": 487, + "æ±": 488, + "æĹ¥": 489, + "æķ°": 490, + "å®ŀ": 491, + "res": 492, + "ac": 493, + "åľ°": 494, + "us": 495, + "å¼Ģ": 496, + "el": 497, + "åŁ": 498, + "un": 499, + "ĠG": 500, + "ect": 501, + "家": 502, + "ç¨": 503, + "rom": 504, + "第": 505, + "ĠR": 506, + "Ġr": 507, + "ht": 508, + "Ġv": 509, + "å°±": 510, + "art": 511, + "ĠF": 512, + "åħ¨": 513, + "os": 514, + "è¿Ľ": 515, + "Ġby": 516, + "nt": 517, + "Ġsu": 518, + "åIJĮ": 519, + "ant": 520, + "Ġnot": 521, + "åĢ": 522, + "æľĪ": 523, + "ç®": 524, + "ä»ĸ": 525, + "Ġwor": 526, + "éģ": 527, + "ies": 528, + "æĸĩ": 529, + "åĮĸ": 530, + "ang": 531, + "Ġthis": 532, + "qu": 533, + "ive": 534, + "æı": 535, + "Ġfrom": 536, + "éķ": 537, + "å¸Ĥ": 538, + "Ġal": 539, + "ĠJ": 540, + "ç»ı": 541, + "ort": 542, + "æ¸": 543, + "Ġ\"": 544, + "ä¸Ģ个": 545, + "ally": 546, + "end": 547, + "Ġhave": 548, + "ould": 549, + "um": 550, + "é¡": 551, + "è°": 552, + "产": 553, + "åģ": 554, + "å¿ĥ": 555, + "çĤ": 556, + "åĬĽ": 557, + "ĠN": 558, + "èĤ": 559, + "ain": 560, + "out": 561, + "ä¸İ": 562, + "og": 563, + "Ġab": 564, + "èģ": 565, + "åijĬ": 566, + "æľº": 567, + "ä½ł": 568, + "ĠE": 569, + "æĽ": 570, + "æľĢ": 571, + "ä¹ĭ": 572, + "好": 573, + "æį": 574, + "çŃī": 575, + "éĥ½": 576, + "Ġdo": 577, + "è·": 578, + "é«": 579, + "Ġse": 580, + "è¶": 581, + "ome": 582, + "å¾Ĺ": 583, + "ust": 584, + "éĥ": 585, + "é«ĺ": 586, + "ard": 587, + "Ġcomp": 588, + "åħ³": 589, + "æĻ": 590, + "ä½ĵ": 591, + "主": 592, + "åīį": 593, + "å¦Ĥ": 594, + "..": 595, + "our": 596, + "äºĭ": 597, + "ç¤": 598, + "åIJĪ": 599, + "èĢħ": 600, + "éĹ´": 601, + "iv": 602, + "ight": 603, + "å§": 604, + "æ°ij": 605, + "éĿ¢": 606, + "è¿ĺ": 607, + "æīĢ": 608, + "pt": 609, + "è¢": 610, + "ĠL": 611, + "Ġcan": 612, + "çŁ": 613, + "éĽ": 614, + "åħ¶": 615, + "ç¨ĭ": 616, + "èĢĮ": 617, + "Ġle": 618, + "æĢ§": 619, + "çĤ¹": 620, + "rou": 621, + "all": 622, + "度": 623, + "ear": 624, + "ere": 625, + "oc": 626, + "ost": 627, + "age": 628, + "计": 629, + "åĬ¡": 630, + "ĠâĢ": 631, + "Ġwhe": 632, + "Ġhas": 633, + "ok": 634, + "说": 635, + "代": 636, + "æ´": 637, + "被": 638, + "çĿ": 639, + "åŃIJ": 640, + "ure": 641, + "Ġch": 642, + "å°ı": 643, + "ther": 644, + "ç³": 645, + "åĬł": 646, + "Ġus": 647, + "åĿ": 648, + "Ġint": 649, + "éĢļ": 650, + "æıIJ": 651, + "表": 652, + "éĥ¨": 653, + "æĪij们": 654, + "are": 655, + "Ġme": 656, + "Ġsh": 657, + "å»": 658, + "ĠO": 659, + "éķ¿": 660, + "ç¾": 661, + "Ġbut": 662, + "åľº": 663, + "çĦ": 664, + "Ġma": 665, + "Ġj": 666, + "åĮº": 667, + "å±ķ": 668, + "Ġk": 669, + "éĩį": 670, + "ich": 671, + "âĢĻs": 672, + "天": 673, + "éĤ": 674, + "车": 675, + "ç³»": 676, + "Ġgo": 677, + "设": 678, + "æĥħ": 679, + "ram": 680, + "ud": 681, + "åı¯ä»¥": 682, + "çł": 683, + "件": 684, + "Ġwill": 685, + "vel": 686, + "åĨħ": 687, + "ä¸ī": 688, + "ç½ij": 689, + "æŁ": 690, + "åħ¥": 691, + "ook": 692, + "Ġli": 693, + "çľĭ": 694, + "èµĦ": 695, + "Ġmore": 696, + "éľ": 697, + "æ¶": 698, + "èī": 699, + "æŀľ": 700, + "Ġthey": 701, + "pe": 702, + "æĦı": 703, + "Ġall": 704, + "ine": 705, + "åijĺ": 706, + "ions": 707, + "éĻ¢": 708, + "Ġso": 709, + "æ¡": 710, + "æľŁ": 711, + "çĦ¶": 712, + "åĵģ": 713, + "æģ": 714, + "åıĬ": 715, + "çĶµ": 716, + "å¼ı": 717, + "éĩĮ": 718, + "缮": 719, + "Ġne": 720, + "å¹³": 721, + "åºĶ": 722, + "éļ": 723, + "éĶ": 724, + "缸": 725, + "ĠĠĠĠ": 726, + "pl": 727, + "çĿĢ": 728, + "å¾Ī": 729, + "ä¿¡": 730, + "å°Ĩ": 731, + "ie": 732, + "çĻ": 733, + "ated": 734, + "å¤ĸ": 735, + "ä¿Ŀ": 736, + "å½ĵ": 737, + "éĩı": 738, + "Ġpl": 739, + "ä½Ĩ": 740, + "æĺİ": 741, + "èĬ": 742, + "Ġabout": 743, + "éĩij": 744, + "The": 745, + "é¦": 746, + "av": 747, + "ast": 748, + "ide": 749, + "ind": 750, + "èµ·": 751, + "éģĵ": 752, + "Ġone": 753, + "æĽ´": 754, + "gh": 755, + "ä»İ": 756, + "Ġwho": 757, + "ä¹Ī": 758, + ".\"": 759, + "常": 760, + "ç±": 761, + "çķ": 762, + "ĠIn": 763, + "åı¸": 764, + "iz": 765, + "ĠU": 766, + "ä¸ŃåĽ½": 767, + "建": 768, + "Ġ-": 769, + "äºĮ": 770, + "é£": 771, + "one": 772, + "Ġwork": 773, + "Ġte": 774, + "Ġim": 775, + "ff": 776, + "次": 777, + "åĪ©": 778, + "解": 779, + "çī¹": 780, + "äºĽ": 781, + "'t": 782, + "per": 783, + "so": 784, + "æ¯Ķ": 785, + "æį®": 786, + "æĹł": 787, + "very": 788, + "çī©": 789, + "çĸ": 790, + "åħĥ": 791, + "ç©": 792, + "ary": 793, + "使": 794, + "åİŁ": 795, + "Ġwhich": 796, + "ial": 797, + "orm": 798, + "Ġother": 799, + "able": 800, + "gram": 801, + "ans": 802, + "åİ»": 803, + "ire": 804, + "ime": 805, + "Ġtheir": 806, + "åħ¬åı¸": 807, + "Ġad": 808, + "ä½į": 809, + "å¢": 810, + "ç¥": 811, + "æīĭ": 812, + "å·¥ä½ľ": 813, + "åķ": 814, + "ç¡": 815, + "ge": 816, + "é©": 817, + "Ġen": 818, + "ĠTh": 819, + "è¿IJ": 820, + "åĪ¶": 821, + "Ġout": 822, + "ack": 823, + "rit": 824, + "cc": 825, + "ç§į": 826, + "åįģ": 827, + "ong": 828, + "æľį": 829, + "ations": 830, + "Ġcont": 831, + "ber": 832, + "æѤ": 833, + "éħ": 834, + "åIJį": 835, + "ult": 836, + "éĤ£": 837, + "Ġres": 838, + "du": 839, + "çĪ": 840, + "angu": 841, + "ame": 842, + "æł·": 843, + "ç¾İ": 844, + "æĥ³": 845, + "æİ¥": 846, + "èĭ": 847, + "ous": 848, + "ry": 849, + "交": 850, + "Ġhis": 851, + "没": 852, + "ç´": 853, + "èIJ": 854, + "Ġcl": 855, + "Ġup": 856, + "âĢĿ": 857, + "åı°": 858, + "Ġapp": 859, + "Ġsome": 860, + "éĹ®": 861, + "é¢ĺ": 862, + "ical": 863, + "æĪĸ": 864, + "au": 865, + "åĽŀ": 866, + "Ġlike": 867, + "æŃ£": 868, + "ç»ĵ": 869, + "ç¼": 870, + "åıĺ": 871, + "Ġman": 872, + "ue": 873, + "éĵ": 874, + "ip": 875, + "æĬĢ": 876, + "ap": 877, + "Ġbec": 878, + "),": 879, + "绣": 880, + "å·±": 881, + "ĠSt": 882, + "åıª": 883, + "æĶ¿": 884, + "ĠIt": 885, + "èģĶ": 886, + "ä»»": 887, + "Ġsc": 888, + "å®ī": 889, + ",å°": 890, + "act": 891, + "çĶ±": 892, + "Ġsa": 893, + ",\"": 894, + "Ġmy": 895, + "æĮģ": 896, + "è°ĥ": 897, + "å·²": 898, + "ach": 899, + "Ġthere": 900, + "åŁº": 901, + "Ġlangu": 902, + "ays": 903, + "èĩªå·±": 904, + "ble": 905, + "ice": 906, + "客": 907, + "Ġprogram": 908, + "ç§ij": 909, + "åģļ": 910, + "éľĢ": 911, + "æ±Ĥ": 912, + "çŁ¥": 913, + "举": 914, + "Ġwere": 915, + ",å¹": 916, + "ign": 917, + "ory": 918, + "king": 919, + "éĴ": 920, + "ç²": 921, + "çĥ": 922, + "åŀ": 923, + "Ġwould": 924, + "åIJij": 925, + "Ġyour": 926, + "now": 927, + "Ġar": 928, + "èĦ": 929, + "int": 930, + "è®°": 931, + "Ġyear": 932, + "项": 933, + ",è¿Ļ": 934, + "velop": 935, + "åĽł": 936, + "èº": 937, + "æĪ·": 938, + "ä¸ĩ": 939, + "cl": 940, + "ep": 941, + "ject": 942, + "åĪĽ": 943, + "èį": 944, + "两": 945, + "Ġpart": 946, + "让": 947, + "è¯ģ": 948, + "Ġnew": 949, + "æģ¯": 950, + "ft": 951, + "Ġwhat": 952, + "Ġun": 953, + "çĹ": 954, + "æĸ¯": 955, + "Ġits": 956, + "示": 957, + "å¤Ħ": 958, + "Ġdis": 959, + "Ġwhen": 960, + "èĥ": 961, + "书": 962, + "Ġthem": 963, + "åIJĦ": 964, + "强": 965, + "ĠâĢľ": 966, + "åĨ³": 967, + "ub": 968, + "åıĹ": 969, + "eop": 970, + "Ġev": 971, + "åĪ«": 972, + "è§Ħ": 973, + "æ·": 974, + "ĠV": 975, + "åķĨ": 976, + "æ´»": 977, + "æ¡Ī": 978, + "æĤ": 979, + "åıijå±ķ": 980, + "ä¼ģ": 981, + "ings": 982, + "管": 983, + "eople": 984, + "ose": 985, + "Ġif": 986, + "身": 987, + "ace": 988, + "form": 989, + "è·¯": 990, + "å®Į": 991, + "款": 992, + "Ġalso": 993, + "ob": 994, + "ç«ł": 995, + "Ġbeen": 996, + "Ġtime": 997, + "Ġthan": 998, + "ance": 999, + "ass": 1000, + ",ä½Ĩ": 1001, + "vers": 1002, + "ç«ĭ": 1003, + "éª": 1004, + "é¢Ĩ": 1005, + "导": 1006, + "æĿ¡": 1007, + "ree": 1008, + "æľ¯": 1009, + "ode": 1010, + "å£": 1011, + "æ»": 1012, + "æµ·": 1013, + ",åľ¨": 1014, + "Ġhow": 1015, + "çIJĥ": 1016, + "线": 1017, + "æĦŁ": 1018, + "èĩ³": 1019, + "æº": 1020, + "ase": 1021, + "Ġper": 1022, + "èİ": 1023, + "Ġher": 1024, + "æłĩ": 1025, + "ence": 1026, + "åħ·": 1027, + "åŀĭ": 1028, + ",ä¸į": 1029, + "æī§": 1030, + "Ġknow": 1031, + "Ġimp": 1032, + "Ġqu": 1033, + "Ġsy": 1034, + "åħ±": 1035, + "ru": 1036, + "åĢ¼": 1037, + "ä¼ģä¸ļ": 1038, + "åĸ": 1039, + "åĻ": 1040, + "Ġany": 1041, + "认": 1042, + "æµģ": 1043, + "ç±»": 1044, + "Ġhad": 1045, + ".ĊĊ": 1046, + "ite": 1047, + "åħī": 1048, + "ction": 1049, + "ia": 1050, + "Ġpeople": 1051, + "wo": 1052, + "èĻ": 1053, + "ors": 1054, + "åįķ": 1055, + "Ġsp": 1056, + "çľģ": 1057, + "æĶ¶": 1058, + "è£": 1059, + "é»": 1060, + "ĠY": 1061, + "åįİ": 1062, + "è´¨": 1063, + "人æ°ij": 1064, + "æķĻ": 1065, + "éĢī": 1066, + "ile": 1067, + "社": 1068, + "con": 1069, + "ink": 1070, + "Ġfe": 1071, + "æķĪ": 1072, + "Ġsaid": 1073, + "èĪ": 1074, + "èµĽ": 1075, + "mer": 1076, + "å½±": 1077, + "è¾¾": 1078, + "æ°´": 1079, + "æݨ": 1080, + "æľįåĬ¡": 1081, + "è¯ī": 1082, + "åĽĽ": 1083, + "olog": 1084, + "ç»Ļ": 1085, + "Ġget": 1086, + "羣": 1087, + "åĻ¨": 1088, + "ition": 1089, + "fere": 1090, + "Ġdevelop": 1091, + "æĮĩ": 1092, + "irst": 1093, + "stem": 1094, + "ces": 1095, + "ric": 1096, + "Ġmat": 1097, + "éĻIJ": 1098, + "ount": 1099, + "ä¼ł": 1100, + "ause": 1101, + "è¿Ľè¡Į": 1102, + "ç¦": 1103, + "ĠCh": 1104, + "è´¹": 1105, + "ĊĊ": 1106, + "æĬ¥": 1107, + "çłĶ": 1108, + "åıĸ": 1109, + "该": 1110, + "ä¾Ľ": 1111, + "æ¼": 1112, + "ress": 1113, + "ç®Ĺ": 1114, + "ç¡®": 1115, + "Ġour": 1116, + "è¿ij": 1117, + "æķ´": 1118, + "没æľī": 1119, + "广": 1120, + "缴": 1121, + "ä¸ĵ": 1122, + "Ġjust": 1123, + "ov": 1124, + "Ġdes": 1125, + "注": 1126, + "æĶ¾": 1127, + "ç«Ļ": 1128, + "èī²": 1129, + "ark": 1130, + "å¤į": 1131, + "ents": 1132, + "éĽĨ": 1133, + "ak": 1134, + "ail": 1135, + "Ġrec": 1136, + "Ġshe": 1137, + "ens": 1138, + "马": 1139, + "æ¨": 1140, + "è¯Ŀ": 1141, + "请": 1142, + "éĹ®é¢ĺ": 1143, + "è¥": 1144, + "ĠK": 1145, + "æŶéĹ´": 1146, + "lic": 1147, + "reat": 1148, + "ä¾ĭ": 1149, + ",并": 1150, + "\",": 1151, + "ä¹ł": 1152, + "ild": 1153, + "ä»Ĭ": 1154, + "ä¼ĺ": 1155, + "é£İ": 1156, + "级": 1157, + "西": 1158, + "çģ": 1159, + "容": 1160, + "å®ĥ": 1161, + ",åĽ": 1162, + "æī§è¡Į": 1163, + "è®®": 1164, + ").": 1165, + "Ġser": 1166, + "åıĤ": 1167, + "å°ij": 1168, + "æīĵ": 1169, + "æ£": 1170, + "Ġag": 1171, + "ä¹ī": 1172, + "å°Ķ": 1173, + "æĶ¯": 1174, + "Ġinto": 1175, + "åı·": 1176, + "Ġdif": 1177, + "éĿŀ": 1178, + "è±": 1179, + "ç»Ħ": 1180, + "éĢł": 1181, + "Ġuse": 1182, + ":Ċ": 1183, + "åĬ©": 1184, + "æºIJ": 1185, + "?Ċ": 1186, + "éŨ": 1187, + "ound": 1188, + "æ®": 1189, + "ä»·": 1190, + "åħĪ": 1191, + "ç»Ń": 1192, + "Ġpre": 1193, + "被åijĬ": 1194, + "è¯Ĩ": 1195, + "ĠHe": 1196, + "çĬ": 1197, + "女": 1198, + "æĢ»": 1199, + "æŀĦ": 1200, + "çݯ": 1201, + "èĬĤ": 1202, + "ish": 1203, + "ari": 1204, + "æŃ¥": 1205, + "wn": 1206, + "èĮ": 1207, + "å¸ĥ": 1208, + "åĮĹ": 1209, + "å§ĭ": 1210, + "éĺŁ": 1211, + "Ġlanguage": 1212, + "Ġwrit": 1213, + "éĢŁ": 1214, + "ple": 1215, + "Ġoff": 1216, + "æĺĵ": 1217, + "åĩĨ": 1218, + "Ġfirst": 1219, + "å²": 1220, + "ä¸ĸ": 1221, + "Ġover": 1222, + "ç²¾": 1223, + "ove": 1224, + "æľª": 1225, + "Ġmost": 1226, + "æĬķ": 1227, + "Ġtr": 1228, + "èĢģ": 1229, + ",æĪij": 1230, + "ough": 1231, + "çħ": 1232, + "æł¼": 1233, + "使çĶ¨": 1234, + "te": 1235, + "erm": 1236, + "ular": 1237, + "å¢ŀ": 1238, + "ord": 1239, + "å·ŀ": 1240, + "第ä¸Ģ": 1241, + "游": 1242, + "çķĮ": 1243, + "ä½ķ": 1244, + "æĢģ": 1245, + "ä»Ģ": 1246, + "çİĩ": 1247, + "Ġtwo": 1248, + "ck": 1249, + "头": 1250, + "Ġno": 1251, + "Ġsystem": 1252, + "模": 1253, + "Ġacc": 1254, + "æ²»": 1255, + "设计": 1256, + "lect": 1257, + "Ġonly": 1258, + ",ä¹Ł": 1259, + "éŁ": 1260, + "éĢļè¿ĩ": 1261, + "æ¶Ī": 1262, + "éľĢè¦ģ": 1263, + "åĮħ": 1264, + "åı¯èĥ½": 1265, + "åĽ¢": 1266, + "è§Ĩ": 1267, + "å¦": 1268, + "åŁİ": 1269, + "å§Ķ": 1270, + "Ġneed": 1271, + "ne": 1272, + "oll": 1273, + "ics": 1274, + "åĬŀ": 1275, + "æ¢": 1276, + "Ġway": 1277, + "åĬŁ": 1278, + "转": 1279, + "Ġbecause": 1280, + "ç³»ç»Ł": 1281, + "并": 1282, + "ŀį": 1283, + "é¦ĸ": 1284, + "Ġbet": 1285, + "æ°Ķ": 1286, + "ss": 1287, + "çĭ": 1288, + "å¸Ī": 1289, + "Ġprodu": 1290, + "æķ°æį®": 1291, + "åĽ¾": 1292, + "ages": 1293, + "å°±æĺ¯": 1294, + "Ġro": 1295, + "她": 1296, + "æ¯ı": 1297, + "ä»Ģä¹Ī": 1298, + "åĶ": 1299, + "å¿«": 1300, + "å±±": 1301, + "Ġmake": 1302, + "åĮ»": 1303, + "Ġcomm": 1304, + "è¶ħ": 1305, + ",èĢĮ": 1306, + "åĨį": 1307, + "æĶ¹": 1308, + "éħį": 1309, + "è¿Ļ个": 1310, + "æĭī": 1311, + "åĢĻ": 1312, + "Ġbook": 1313, + "å±Ģ": 1314, + "Ġcould": 1315, + "ĠĠĠĠĠĠĠĠ": 1316, + "sel": 1317, + "产åĵģ": 1318, + "äºĶ": 1319, + "åı£": 1320, + "nder": 1321, + "ĠThis": 1322, + "çĪ±": 1323, + "Ġevery": 1324, + "å¼ķ": 1325, + "åŃĹ": 1326, + "å½¢": 1327, + "Ġspe": 1328, + "Ġwant": 1329, + "åŃĺ": 1330, + "æĺ¾": 1331, + "è§ģ": 1332, + "è§Ĥ": 1333, + "åįĹ": 1334, + "uch": 1335, + "we": 1336, + "åħļ": 1337, + "è¾ĥ": 1338, + "ĠWe": 1339, + "Ġpo": 1340, + "hed": 1341, + "rib": 1342, + "æĪĺ": 1343, + ",åį": 1344, + "own": 1345, + "åĪĻ": 1346, + "èŀį": 1347, + "ures": 1348, + "åĪĿ": 1349, + "eth": 1350, + "ĠUn": 1351, + "ç¥ŀ": 1352, + "ates": 1353, + "å¼ł": 1354, + "åŁŁ": 1355, + "ä¹IJ": 1356, + "ä¸Ķ": 1357, + "Ġthese": 1358, + "åĪĹ": 1359, + "ä½ı": 1360, + "ä¾Ŀ": 1361, + "å¸Ĥåľº": 1362, + "Ġbu": 1363, + "æĿĥ": 1364, + "iew": 1365, + "åĨµ": 1366, + "æŁIJ": 1367, + "Ġadd": 1368, + "br": 1369, + "æīį": 1370, + "æ·±": 1371, + "éªĮ": 1372, + "Ġdon": 1373, + "ç»Ī": 1374, + "åįı": 1375, + "ä»ĸ们": 1376, + "ning": 1377, + "Ġmay": 1378, + "约": 1379, + "oy": 1380, + "带": 1381, + "ting": 1382, + "Ġact": 1383, + ",å¯": 1384, + "论": 1385, + "æŁ¥": 1386, + "称": 1387, + "Ġcons": 1388, + "æĬĢæľ¯": 1389, + "cess": 1390, + "èĢĥ": 1391, + "èĤ¡": 1392, + "li": 1393, + "审": 1394, + "çīĩ": 1395, + "ments": 1396, + "ward": 1397, + "ware": 1398, + "éļ¾": 1399, + "ava": 1400, + "è§ī": 1401, + "足": 1402, + ",以": 1403, + "ual": 1404, + "èģĮ": 1405, + "round": 1406, + "æĻº": 1407, + "Ġinter": 1408, + "è¯Ń": 1409, + "chn": 1410, + "çħ§": 1411, + "get": 1412, + "æŶåĢĻ": 1413, + "Ġsuch": 1414, + "åıĪ": 1415, + "åįĩ": 1416, + "ĠRe": 1417, + "Ġworld": 1418, + "çĻ¾": 1419, + "Ġmod": 1420, + "è¿ŀ": 1421, + "riend": 1422, + "any": 1423, + "å®ŀçİ°": 1424, + "è¶Ĭ": 1425, + "åºı": 1426, + "çĹħ": 1427, + "ons": 1428, + "ating": 1429, + ",ä¸Ģ": 1430, + "åĥ": 1431, + "åĪ¤": 1432, + "ĠPro": 1433, + "hat": 1434, + "Ġrep": 1435, + "ian": 1436, + "ming": 1437, + "æ¸ħ": 1438, + "ced": 1439, + "ool": 1440, + "ood": 1441, + "å¤ĩ": 1442, + "Ġdoes": 1443, + "ä¿¡æģ¯": 1444, + "èIJ¥": 1445, + "æ£Ģ": 1446, + "Ġta": 1447, + "Ġunder": 1448, + ",æľī": 1449, + "é¥": 1450, + "éĩĩ": 1451, + "ource": 1452, + "Ġwhere": 1453, + "aw": 1454, + "pen": 1455, + "Ġdid": 1456, + "ade": 1457, + "clud": 1458, + "fter": 1459, + "ks": 1460, + "Ġind": 1461, + "roup": 1462, + "les": 1463, + "Ġthen": 1464, + "ener": 1465, + "åıį": 1466, + "çĸĹ": 1467, + "ond": 1468, + "Ġprov": 1469, + "究": 1470, + "Ġam": 1471, + "Ġcode": 1472, + "Ġyears": 1473, + "åĩ½": 1474, + "æĬĬ": 1475, + "ific": 1476, + "æ±Ł": 1477, + "rough": 1478, + ",å°±": 1479, + "æ¼Ķ": 1480, + "æľĽ": 1481, + "åĩł": 1482, + "cept": 1483, + "pect": 1484, + "京": 1485, + "iss": 1486, + "Ġmany": 1487, + "ology": 1488, + "Ġdiffere": 1489, + "å¾·": 1490, + "份": 1491, + "æŀģ": 1492, + "空": 1493, + "èİ·": 1494, + "åIJ¬": 1495, + "éĻħ": 1496, + "ious": 1497, + "oss": 1498, + "Ġeven": 1499, + "èĮĥ": 1500, + "å¦Ĥæŀľ": 1501, + "lp": 1502, + "æ¹": 1503, + "Ġpers": 1504, + "èĩ´": 1505, + "Ġvery": 1506, + "hen": 1507, + "row": 1508, + "æĪ¿": 1509, + "è´£": 1510, + "Ġatt": 1511, + "æıIJä¾Ľ": 1512, + "çĬ¶": 1513, + ")Ċ": 1514, + "çļĦ人": 1515, + "ative": 1516, + "è¯ķ": 1517, + "ational": 1518, + "ata": 1519, + "æĸŃ": 1520, + "è´Ń": 1521, + "Ġstart": 1522, + "å¨": 1523, + "ident": 1524, + "éĩįè¦ģ": 1525, + "xt": 1526, + "Ġread": 1527, + "å¾ĭ": 1528, + "Ġused": 1529, + "Ġfriend": 1530, + "ç»´": 1531, + ",å¥": 1532, + "æĢĿ": 1533, + "Ġhelp": 1534, + "Ġsee": 1535, + "象": 1536, + "åĥı": 1537, + "mat": 1538, + "å¤Ł": 1539, + "Ġthink": 1540, + "pport": 1541, + "èĤ²": 1542, + "ph": 1543, + "Ġmuch": 1544, + "åĩ½æķ°": 1545, + "ec": 1546, + "许": 1547, + "Ġlook": 1548, + "积": 1549, + "æŀĹ": 1550, + "ĠBut": 1551, + "âĢĻt": 1552, + "fore": 1553, + "Ħ¿": 1554, + "Ġthrough": 1555, + "æĸĩåĮĸ": 1556, + "æĥħåĨµ": 1557, + "bs": 1558, + "Ġrel": 1559, + "第äºĮ": 1560, + "ng": 1561, + "·": 1562, + "æħ": 1563, + "ä¼Ĺ": 1564, + "Ġem": 1565, + "å·²ç»ı": 1566, + "社ä¼ļ": 1567, + "Ġinc": 1568, + "Ġwell": 1569, + "满": 1570, + "æij": 1571, + "亿": 1572, + "åıĭ": 1573, + "åİĨ": 1574, + "ick": 1575, + "other": 1576, + "åĨĻ": 1577, + "éĻ©": 1578, + "çĥŃ": 1579, + "ible": 1580, + "Ġph": 1581, + "ĠCom": 1582, + "ç½®": 1583, + ",æĪij们": 1584, + "çĶ·": 1585, + "Ġreally": 1586, + "èµ°": 1587, + "Ġproject": 1588, + "æĸ¹æ³ķ": 1589, + "Ġown": 1590, + "éĻ¤": 1591, + "æĻ¯": 1592, + "é¡¹çĽ®": 1593, + "æĴ": 1594, + "离": 1595, + "tain": 1596, + "red": 1597, + "Ġsom": 1598, + "åħ´": 1599, + "igh": 1600, + "æİĴ": 1601, + "æĪı": 1602, + "ĠAnd": 1603, + "åĵį": 1604, + "å¢ĥ": 1605, + "!Ċ": 1606, + ",æĺ¯": 1607, + "Ġcall": 1608, + "æł¹": 1609, + "çŃĸ": 1610, + "Ġshould": 1611, + "段": 1612, + "ell": 1613, + "Ġinv": 1614, + "åºĹ": 1615, + "åIJĮæŶ": 1616, + "éĶĢ": 1617, + "åİŁåijĬ": 1618, + "çļĦä¸Ģ": 1619, + "Ġsim": 1620, + "åħĭ": 1621, + "ä½İ": 1622, + "å¼Ģåıij": 1623, + "ç®Ģ": 1624, + "...": 1625, + "Ġinclud": 1626, + "便": 1627, + "ities": 1628, + "åİ¿": 1629, + "Ġdec": 1630, + "ont": 1631, + "ise": 1632, + "old": 1633, + "read": 1634, + "åĬ¿": 1635, + "Ġnow": 1636, + "ock": 1637, + "表示": 1638, + "itt": 1639, + ",æ¯": 1640, + "Ġass": 1641, + "ç½Ĺ": 1642, + "çłĶ究": 1643, + "self": 1644, + ",对": 1645, + "åĦ¿": 1646, + "ited": 1647, + "Ġtechn": 1648, + "æĿij": 1649, + "ange": 1650, + "ient": 1651, + "èij": 1652, + "äºĴ": 1653, + "Ġfu": 1654, + "çľ¼": 1655, + "çĻ½": 1656, + "ful": 1657, + "ten": 1658, + "yth": 1659, + "¨Ģ": 1660, + "è¨Ģ": 1661, + "arch": 1662, + "urn": 1663, + "å¿ħ": 1664, + "com": 1665, + "uth": 1666, + "ife": 1667, + "Ġsame": 1668, + "çİĭ": 1669, + "Ġdifferent": 1670, + "Ġtra": 1671, + "ä¹°": 1672, + "irect": 1673, + "Ġback": 1674, + "Ġfin": 1675, + ",å¦Ĥ": 1676, + "åŃ¦ä¹ł": 1677, + "ä¸ĸçķĮ": 1678, + "ms": 1679, + "Ġeff": 1680, + "Ġret": 1681, + "è§Ĵ": 1682, + "ility": 1683, + "æݧ": 1684, + "ists": 1685, + "éŁ³": 1686, + "ä¸įæĺ¯": 1687, + "æ±½": 1688, + "ç»Ĩ": 1689, + "ts": 1690, + "Ġnum": 1691, + "oth": 1692, + "åij½": 1693, + "é¦Ļ": 1694, + "ght": 1695, + "Ġbl": 1696, + "Ġhim": 1697, + "ĠJava": 1698, + ",åĽł": 1699, + "åĪĴ": 1700, + "æĿİ": 1701, + "è´Ł": 1702, + "å¼Ģå§ĭ": 1703, + "Ġdisc": 1704, + "Ġpoint": 1705, + "Ġperson": 1706, + "Ġco": 1707, + "ä»ħ": 1708, + "çĶ³": 1709, + "Ġgroup": 1710, + "红": 1711, + "ually": 1712, + "å¡": 1713, + "çı": 1714, + "èħ": 1715, + "éĢĤ": 1716, + "çĶŁæ´»": 1717, + "\"çļĦ": 1718, + "åĭ": 1719, + "Ġac": 1720, + "Ġmem": 1721, + "é¢Ħ": 1722, + "æį¢": 1723, + "主è¦ģ": 1724, + ",åħ¶": 1725, + "ied": 1726, + "Ġpub": 1727, + "ased": 1728, + "Ġlong": 1729, + "å¹³åı°": 1730, + "ç´ł": 1731, + ",åĨ": 1732, + "ork": 1733, + "ä»ĺ": 1734, + "Ġthose": 1735, + "ement": 1736, + "ç±³": 1737, + "Ġent": 1738, + "Ġbeing": 1739, + "ract": 1740, + "Ġgood": 1741, + "ython": 1742, + "tt": 1743, + "åĤ": 1744, + "æIJ": 1745, + "ä¸ŃçļĦ": 1746, + "çİ°åľ¨": 1747, + "ĠNew": 1748, + "çª": 1749, + "its": 1750, + "Ġfind": 1751, + "Ġvari": 1752, + "ved": 1753, + "Ġed": 1754, + "Ġrem": 1755, + "æĬ¤": 1756, + "æµĭ": 1757, + "è¿ľ": 1758, + "Ġear": 1759, + "åŃ£": 1760, + "éļı": 1761, + "iness": 1762, + "Ġsay": 1763, + "ered": 1764, + ".è¿Ļ": 1765, + "Ġke": 1766, + "å¯Į": 1767, + "大çļĦ": 1768, + "åĽ½å®¶": 1769, + "ives": 1770, + "bers": 1771, + "ib": 1772, + "çĮ": 1773, + "æĹı": 1774, + "åįĬ": 1775, + "å¾Ģ": 1776, + "å¾Īå¤ļ": 1777, + "Ġtest": 1778, + "Ġpres": 1779, + "失": 1780, + "Ġeach": 1781, + "æµİ": 1782, + "æĪIJ为": 1783, + "æľ¬éĻ¢": 1784, + "day": 1785, + "åıĮ": 1786, + "æĻºèĥ½": 1787, + "ven": 1788, + "é¾": 1789, + "åį³": 1790, + "åŃ©": 1791, + "è¡Ģ": 1792, + "康": 1793, + "çīĮ": 1794, + "好çļĦ": 1795, + "è¿ĺæĺ¯": 1796, + "ĠYou": 1797, + "Ġproble": 1798, + "Ġstat": 1799, + "产ä¸ļ": 1800, + "èĬ±": 1801, + "建设": 1802, + "举": 1803, + "绾": 1804, + "Ġstr": 1805, + "tern": 1806, + "åİĭ": 1807, + "æĸ¹å¼ı": 1808, + "ues": 1809, + "å©": 1810, + "çĨ": 1811, + "çļĦæĺ¯": 1812, + "Ġset": 1813, + "Ġthings": 1814, + ",å¾": 1815, + "Ġright": 1816, + "Ġart": 1817, + "管çIJĨ": 1818, + "Ġafter": 1819, + "人çļĦ": 1820, + "åij¨": 1821, + "Ġval": 1822, + "++": 1823, + "ever": 1824, + "æĭ©": 1825, + "代表": 1826, + "è£ħ": 1827, + "ience": 1828, + "Ġfun": 1829, + "Ġinst": 1830, + ",å¼": 1831, + "Ġgener": 1832, + "å¾®": 1833, + "eg": 1834, + "ç¢": 1835, + "ince": 1836, + "Ġexam": 1837, + "缸åħ³": 1838, + "èIJ½": 1839, + "声": 1840, + "éĿŀ常": 1841, + "led": 1842, + "ys": 1843, + "åĪĩ": 1844, + "æĸĻ": 1845, + "ç½ijç«Ļ": 1846, + "hip": 1847, + "太": 1848, + "ution": 1849, + "ows": 1850, + "大家": 1851, + "çłģ": 1852, + ".\"Ċ": 1853, + "ater": 1854, + "åħħ": 1855, + "æī¿": 1856, + "åĪ°äºĨ": 1857, + "æĭħ": 1858, + "formation": 1859, + "hes": 1860, + "äºļ": 1861, + "Ġsur": 1862, + "Ġopen": 1863, + "åħ«": 1864, + "Ġpr": 1865, + "éĢģ": 1866, + "è½»": 1867, + "æĸ¹éĿ¢": 1868, + "ç¨ĭåºı": 1869, + "ins": 1870, + "Ġdist": 1871, + "åĽ´": 1872, + "ĠAn": 1873, + "ä¸ĬçļĦ": 1874, + "usiness": 1875, + "erest": 1876, + "Ġprogramming": 1877, + "éĢīæĭ©": 1878, + "'re": 1879, + "cial": 1880, + "Ġ$": 1881, + "Ġbest": 1882, + "ftware": 1883, + "Ġstud": 1884, + "è¿ĩç¨ĭ": 1885, + "æīĢæľī": 1886, + "æīĢ以": 1887, + "çīĪ": 1888, + "çĶ³è¯·": 1889, + "lo": 1890, + "als": 1891, + "å±Ĥ": 1892, + "èĥ½å¤Ł": 1893, + "Ġdesign": 1894, + ",ä½ł": 1895, + "ix": 1896, + "Ġfound": 1897, + "Ġdown": 1898, + "åĽŃ": 1899, + "æľīéĻIJ": 1900, + "ç¼ĸ": 1901, + "客æĪ·": 1902, + "ily": 1903, + "åħŃ": 1904, + "Ġreal": 1905, + "Ġprocess": 1906, + "å¥ĸ": 1907, + "ript": 1908, + "Ġext": 1909, + "ield": 1910, + "å®ĮæĪIJ": 1911, + "Ġcount": 1912, + "åı²": 1913, + ",å½": 1914, + "æīĺ": 1915, + "ä¸įåIJĮ": 1916, + "ames": 1917, + "è´¢": 1918, + ";Ċ": 1919, + "ner": 1920, + "Ġsm": 1921, + "Ġob": 1922, + "ict": 1923, + "Ġreg": 1924, + "åĢŁ": 1925, + "åħ¶ä»ĸ": 1926, + ",ä»ĸ": 1927, + ",éĤ£": 1928, + "--": 1929, + "ines": 1930, + "åĿļ": 1931, + "æĪĸèĢħ": 1932, + ",为": 1933, + ".åľ¨": 1934, + "Th": 1935, + "ves": 1936, + "å±ŀ": 1937, + "è¦ģæ±Ĥ": 1938, + "æŀIJ": 1939, + "æĮī": 1940, + "éĵ¶": 1941, + "ç§ijæĬĢ": 1942, + "ween": 1943, + "ivers": 1944, + "Ġaround": 1945, + "ä¸ĢäºĽ": 1946, + "æī¾": 1947, + "欢": 1948, + "Ġthree": 1949, + "æĸ½": 1950, + "ower": 1951, + "Ġdef": 1952, + "ateg": 1953, + "Ġresult": 1954, + "Ġimport": 1955, + "åĶ®": 1956, + "室": 1957, + "æł¡": 1958, + "ĠPl": 1959, + "ä½ľä¸º": 1960, + "Ġexper": 1961, + "çŁ¥éģĵ": 1962, + "æļ": 1963, + "ars": 1964, + "念": 1965, + "æĻ®": 1966, + "Ġlear": 1967, + "éĶĻ": 1968, + "士": 1969, + "ank": 1970, + "Ġeas": 1971, + "éĺ¿": 1972, + "éĥ½æĺ¯": 1973, + "èĩªå·±çļĦ": 1974, + "meric": 1975, + "gg": 1976, + "èı": 1977, + "Ġpol": 1978, + "å·´": 1979, + "端": 1980, + "Ġexpl": 1981, + "èĭ±": 1982, + ",让": 1983, + "ving": 1984, + "yp": 1985, + "ä¸ĥ": 1986, + "äºī": 1987, + "ath": 1988, + "è¾¹": 1989, + "Ġcontro": 1990, + "the": 1991, + "vent": 1992, + "å¼Ĥ": 1993, + "imes": 1994, + "éĻį": 1995, + "éĵ¾": 1996, + "é»Ħ": 1997, + "Ġmight": 1998, + "Ġgreat": 1999 + }, + "merges": [ + "Ġ t", + "ä ¸", + "Ġ a", + "h e", + "i n", + "r e", + "ç ļ", + "çļ Ħ", + "o n", + "Ġt he", + "e r", + "ä º", + "Ġ s", + "a t", + "Ġ w", + "Ġ o", + "e n", + "i t", + "æ ľ", + "Ġ c", + "å ı", + "o r", + "e s", + "i s", + "a n", + "ä »", + "å ħ", + "Ġ b", + "in g", + "Ġ p", + "Ġ f", + "Ġa n", + "o u", + "e d", + "Ġ m", + "a r", + "a l", + "Ġt o", + "è ¿", + "å ¤", + "Ġo f", + "å ®", + "Ġ in", + "å Ī", + "Ġ d", + "ä ½", + "Ġan d", + "æ ĺ", + "Ġt h", + "i c", + "å IJ", + "l e", + "å ľ", + "r o", + "i on", + "ç Ķ", + "Ġ h", + "a s", + "æ Ī", + "å Ĭ", + "ä¸ Ģ", + "ä ¹", + ". Ċ", + "ç »", + ", å", + "æ ĸ", + "æ Ĺ", + "è ¯", + "æĺ ¯", + "è ®", + "Ġ l", + "en t", + "o m", + "å Ľ", + "ä ¼", + "Ġ e", + "s t", + "Ġ re", + "v e", + "Ġ n", + "å °", + "l l", + "Ġ I", + "å ¹", + "l y", + "åľ ¨", + "æ Ŀ", + "c t", + "å į", + "äº º", + "æľ ī", + "Ġb e", + "æ ī", + "Ġ g", + "ä¸ į", + "u t", + "å ¼", + "Ġ is", + "å ·", + "å ¸", + "å Ń", + "è ¡", + "Ġth at", + "â Ģ", + "e t", + "é ĩ", + "Ġ T", + "å º", + "äº Ĩ", + "o t", + "i d", + "s e", + "å ¾", + "ä¸ Ń", + "Ġ on", + "å ĩ", + "ĥ ½", + "a m", + "c h", + "æ ķ", + "Ġ A", + "å Į", + "é Ģ", + "c e", + "å Ĩ", + "Ġf or", + "Ġ S", + "i m", + "o w", + "Ġ y", + "è Ģ", + "å ¯", + "Ġ C", + "Ġ it", + "Ġ u", + ", æ", + "è §", + "v er", + "i g", + "ä¸ ª", + "ä¸ º", + "ç Ľ", + "è µ", + "at ion", + "a y", + "è ´", + "å ij", + "l d", + "å Ĵ", + "ç İ", + "a d", + "å¤ §", + "é Ļ", + "Ġp ro", + "åĴ Į", + "å ¥", + "Ġ M", + "Ġs t", + "ä» ¥", + "u r", + "æ Ģ", + "å¹ ´", + "æ Ĭ", + "å ¿", + "å ±", + "è¿ Ļ", + "ä¸ Ĭ", + "æ ł", + "æ ³", + "å İ", + "æĪ ij", + "it h", + "è ĩ", + "ç §", + "¦ ģ", + "æĹ ¶", + "é Ĺ", + "he r", + "ç ī", + "er s", + "Ġ P", + "Ġy ou", + "è ¦ģ", + "Ġw h", + "æ Ķ", + "o l", + "è ĥ½", + "ä¼ ļ", + "Ġa s", + "a g", + "çĶ ¨", + "ç º", + "è¡ Į", + "æ ŀ", + "i l", + "å ½", + "åı ¯", + "æ °", + "ä ¿", + "i r", + "åı ij", + "Ġw ith", + "ç IJ", + "æ Ń", + "o d", + "åĪ °", + "ç ľ", + "åĽ ½", + "i f", + "o p", + "e m", + "é Ŀ", + "Ġw e", + "æ İ", + "æĿ ¥", + "è ½", + "Ġ B", + "u l", + "Ġ Ġ", + "ç «", + "r i", + "Ġc on", + "æ ĥ", + "ä¸ ļ", + "åĩ º", + "Ġc om", + "é ¢", + "ä½ ľ", + "çĶ Ł", + "ĠT he", + "æ µ", + "äº İ", + "k e", + "r a", + "is t", + "ç Ń", + "i ll", + "Ġa re", + "Ġ (", + "æ ĭ", + "å¯ ¹", + "n d", + "æ ²", + "o re", + "Ġ he", + "ç ¬", + "æĪ IJ", + "t er", + "è ¾", + "æ ¬", + "Ġe x", + "Ġd e", + "' s", + "âĢ Ļ", + "t h", + "ä» ¬", + "at e", + "æĸ ¹", + "Ġ W", + "ç ½", + "å¤ ļ", + "ä ¾", + "Ġa t", + "è¿ ĩ", + "æ Į", + "Ġw as", + "es s", + "çİ °", + "æ³ ķ", + "e w", + "es t", + "èĩ ª", + "p p", + "Ġ H", + "å ĵ", + "æ Ħ", + "å ī", + "åŃ ¦", + "ä¸ ĭ", + "åIJ İ", + "a b", + "Ġh a", + "å· ¥", + "çIJ Ĩ", + "an d", + "¦ Ĥ", + "Ġo r", + "åħ ¬", + "it y", + "æ ¯", + "ä¹ Ł", + "m ent", + "å® ļ", + "é ĺ", + "Ġ D", + "åĪ Ĩ", + "æľ ¬", + "åĬ ¨", + "æĸ °", + "æ ±", + "æĹ ¥", + "æķ °", + "å® ŀ", + "re s", + "a c", + "åľ °", + "u s", + "å¼ Ģ", + "e l", + "å Ł", + "u n", + "Ġ G", + "e ct", + "å® ¶", + "ç ¨", + "ro m", + "ç¬ ¬", + "Ġ R", + "Ġ r", + "h t", + "Ġ v", + "å° ±", + "ar t", + "Ġ F", + "åħ ¨", + "o s", + "è¿ Ľ", + "Ġb y", + "n t", + "Ġs u", + "åIJ Į", + "an t", + "Ġn ot", + "å Ģ", + "æľ Ī", + "ç ®", + "ä» ĸ", + "Ġw or", + "é ģ", + "i es", + "æĸ ĩ", + "åĮ ĸ", + "an g", + "Ġth is", + "q u", + "i ve", + "æ ı", + "Ġf rom", + "é ķ", + "å¸ Ĥ", + "Ġa l", + "Ġ J", + "ç» ı", + "or t", + "æ ¸", + "Ġ \"", + "ä¸Ģ 个", + "al ly", + "en d", + "Ġha ve", + "ou ld", + "u m", + "é ¡", + "è °", + "äº §", + "å ģ", + "å¿ ĥ", + "ç Ĥ", + "åĬ Ľ", + "Ġ N", + "è Ĥ", + "a in", + "ou t", + "ä¸ İ", + "o g", + "Ġa b", + "è ģ", + "åij Ĭ", + "æľ º", + "ä½ ł", + "Ġ E", + "æ Ľ", + "æľ Ģ", + "ä¹ ĭ", + "å¥ ½", + "æ į", + "çŃ ī", + "é ĥ½", + "Ġd o", + "è ·", + "é «", + "Ġs e", + "è ¶", + "om e", + "å¾ Ĺ", + "u st", + "é ĥ", + "é« ĺ", + "ar d", + "Ġcom p", + "åħ ³", + "æ Ļ", + "ä½ ĵ", + "ä¸ »", + "åī į", + "å ¦Ĥ", + ". .", + "ou r", + "äº ĭ", + "ç ¤", + "åIJ Ī", + "èĢ ħ", + "éĹ ´", + "i v", + "ig ht", + "å §", + "æ° ij", + "éĿ ¢", + "è¿ ĺ", + "æī Ģ", + "p t", + "è ¢", + "Ġ L", + "Ġc an", + "ç Ł", + "é Ľ", + "åħ ¶", + "ç¨ ĭ", + "èĢ Į", + "Ġ le", + "æĢ §", + "çĤ ¹", + "r ou", + "al l", + "åº ¦", + "e ar", + "e re", + "o c", + "o st", + "ag e", + "è® ¡", + "åĬ ¡", + "Ġ âĢ", + "Ġw he", + "Ġh as", + "o k", + "è¯ ´", + "ä» £", + "æ ´", + "è¢ «", + "ç Ŀ", + "åŃ IJ", + "u re", + "Ġc h", + "å° ı", + "t her", + "ç ³", + "åĬ ł", + "Ġu s", + "å Ŀ", + "Ġin t", + "éĢ ļ", + "æı IJ", + "è¡ ¨", + "éĥ ¨", + "æĪij 们", + "a re", + "Ġm e", + "Ġs h", + "å »", + "Ġ O", + "éķ ¿", + "ç ¾", + "Ġb ut", + "åľ º", + "ç Ħ", + "Ġm a", + "Ġ j", + "åĮ º", + "å± ķ", + "Ġ k", + "éĩ į", + "ic h", + "âĢĻ s", + "å¤ ©", + "é Ĥ", + "è½ ¦", + "ç³ »", + "Ġg o", + "è® ¾", + "æĥ ħ", + "r am", + "u d", + "åı¯ 以", + "ç ł", + "ä» ¶", + "Ġw ill", + "ve l", + "åĨ ħ", + "ä¸ ī", + "ç½ ij", + "æ Ł", + "åħ ¥", + "o ok", + "Ġl i", + "çľ ĭ", + "èµ Ħ", + "Ġm ore", + "é ľ", + "æ ¶", + "è ī", + "æŀ ľ", + "Ġthe y", + "p e", + "æĦ ı", + "Ġa ll", + "in e", + "åij ĺ", + "ion s", + "éĻ ¢", + "Ġs o", + "æ ¡", + "æľ Ł", + "çĦ ¶", + "åĵ ģ", + "æ ģ", + "åı Ĭ", + "çĶ µ", + "å¼ ı", + "éĩ Į", + "çĽ ®", + "Ġn e", + "å¹ ³", + "åº Ķ", + "é ļ", + "é Ķ", + "çĽ ¸", + "ĠĠ ĠĠ", + "p l", + "çĿ Ģ", + "å¾ Ī", + "ä¿ ¡", + "å° Ĩ", + "i e", + "ç Ļ", + "at ed", + "å¤ ĸ", + "ä¿ Ŀ", + "å½ ĵ", + "éĩ ı", + "Ġp l", + "ä½ Ĩ", + "æĺ İ", + "è Ĭ", + "Ġab out", + "éĩ ij", + "T he", + "é ¦", + "a v", + "as t", + "id e", + "in d", + "èµ ·", + "éģ ĵ", + "Ġon e", + "æĽ ´", + "g h", + "ä» İ", + "Ġwh o", + "ä¹ Ī", + ". \"", + "å¸ ¸", + "ç ±", + "ç ķ", + "ĠI n", + "åı ¸", + "i z", + "Ġ U", + "ä¸Ń åĽ½", + "å» º", + "Ġ -", + "äº Į", + "é £", + "on e", + "Ġwor k", + "Ġt e", + "Ġ im", + "f f", + "æ¬ ¡", + "åĪ ©", + "è§ £", + "çī ¹", + "äº Ľ", + "' t", + "p er", + "s o", + "æ¯ Ķ", + "æį ®", + "æĹ ł", + "ver y", + "çī ©", + "ç ĸ", + "åħ ĥ", + "ç ©", + "ar y", + "ä½ ¿", + "åİ Ł", + "Ġwh ich", + "i al", + "or m", + "Ġo ther", + "ab le", + "g ram", + "an s", + "åİ »", + "i re", + "im e", + "Ġthe ir", + "åħ¬ åı¸", + "Ġa d", + "ä½ į", + "å ¢", + "ç ¥", + "æī ĭ", + "å·¥ ä½ľ", + "å ķ", + "ç ¡", + "g e", + "é ©", + "Ġ en", + "ĠT h", + "è¿ IJ", + "åĪ ¶", + "Ġo ut", + "ac k", + "r it", + "c c", + "ç§ į", + "åį ģ", + "on g", + "æľ į", + "ation s", + "Ġcon t", + "b er", + "æŃ ¤", + "é ħ", + "åIJ į", + "ul t", + "éĤ £", + "Ġre s", + "d u", + "ç Ī", + "ang u", + "am e", + "æł ·", + "ç¾ İ", + "æĥ ³", + "æİ ¥", + "è ĭ", + "ou s", + "r y", + "äº ¤", + "Ġh is", + "æ² ¡", + "ç ´", + "è IJ", + "Ġc l", + "Ġu p", + "âĢ Ŀ", + "åı °", + "Ġa pp", + "Ġs ome", + "éĹ ®", + "é¢ ĺ", + "ic al", + "æĪ ĸ", + "a u", + "åĽ ŀ", + "Ġli ke", + "æŃ £", + "ç» ĵ", + "ç ¼", + "åı ĺ", + "Ġm an", + "u e", + "é ĵ", + "i p", + "æĬ Ģ", + "a p", + "Ġbe c", + ") ,", + "ç» Ł", + "å· ±", + "ĠS t", + "åı ª", + "æĶ ¿", + "ĠI t", + "èģ Ķ", + "ä» »", + "Ġs c", + "å® ī", + ",å °", + "a ct", + "çĶ ±", + "Ġs a", + ", \"", + "Ġm y", + "æĮ ģ", + "è° ĥ", + "å· ²", + "a ch", + "Ġthe re", + "åŁ º", + "Ġl angu", + "ay s", + "èĩª å·±", + "b le", + "ic e", + "å® ¢", + "Ġpro gram", + "ç§ ij", + "åģ ļ", + "éľ Ģ", + "æ± Ĥ", + "çŁ ¥", + "ä¸ ľ", + "Ġwe re", + ",å ¹", + "ig n", + "or y", + "k ing", + "é Ĵ", + "ç ²", + "ç ĥ", + "å ŀ", + "Ġw ould", + "åIJ ij", + "Ġyou r", + "n ow", + "Ġa r", + "è Ħ", + "in t", + "è® °", + "Ġy ear", + "é¡ ¹", + ", è¿Ļ", + "vel op", + "åĽ ł", + "è º", + "æĪ ·", + "ä¸ ĩ", + "c l", + "e p", + "j ect", + "åĪ Ľ", + "è į", + "ä¸ ¤", + "Ġp art", + "è® ©", + "è¯ ģ", + "Ġn ew", + "æģ ¯", + "f t", + "Ġwh at", + "Ġu n", + "ç Ĺ", + "æĸ ¯", + "Ġit s", + "ç¤ º", + "å¤ Ħ", + "Ġd is", + "Ġwhe n", + "è ĥ", + "ä¹ ¦", + "Ġthe m", + "åIJ Ħ", + "å¼ º", + "ĠâĢ ľ", + "åĨ ³", + "u b", + "åı Ĺ", + "e op", + "Ġe v", + "åĪ «", + "è§ Ħ", + "æ ·", + "Ġ V", + "åķ Ĩ", + "æ´ »", + "æ¡ Ī", + "æ Ĥ", + "åıij å±ķ", + "ä¼ ģ", + "ing s", + "ç® ¡", + "eop le", + "o se", + "Ġ if", + "èº «", + "a ce", + "f orm", + "è· ¯", + "å® Į", + "æ¬ ¾", + "Ġal so", + "o b", + "ç« ł", + "Ġbe en", + "Ġt ime", + "Ġth an", + "an ce", + "as s", + ", ä½Ĩ", + "ver s", + "ç« ĭ", + "é ª", + "é¢ Ĩ", + "å¯ ¼", + "æĿ ¡", + "re e", + "æľ ¯", + "od e", + "å £", + "æ »", + "æµ ·", + ", åľ¨", + "Ġh ow", + "çIJ ĥ", + "çº ¿", + "æĦ Ł", + "èĩ ³", + "æ º", + "as e", + "Ġp er", + "è İ", + "Ġ her", + "æł ĩ", + "en ce", + "åħ ·", + "åŀ ĭ", + ", ä¸į", + "æī §", + "Ġk now", + "Ġim p", + "Ġ qu", + "Ġs y", + "åħ ±", + "r u", + "åĢ ¼", + "ä¼ģ ä¸ļ", + "å ĸ", + "å Ļ", + "Ġan y", + "è® ¤", + "æµ ģ", + "ç± »", + "Ġh ad", + ".Ċ Ċ", + "it e", + "åħ ī", + "ct ion", + "i a", + "Ġp eople", + "w o", + "è Ļ", + "or s", + "åį ķ", + "Ġs p", + "çľ ģ", + "æĶ ¶", + "è £", + "é »", + "Ġ Y", + "åį İ", + "è´ ¨", + "人 æ°ij", + "æķ Ļ", + "éĢ ī", + "i le", + "ç¤ ¾", + "c on", + "in k", + "Ġf e", + "æķ Ī", + "Ġsa id", + "è Ī", + "èµ Ľ", + "m er", + "å½ ±", + "è¾ ¾", + "æ° ´", + "æİ ¨", + "æľį åĬ¡", + "è¯ ī", + "åĽ Ľ", + "ol og", + "ç» Ļ", + "Ġg et", + "çľ Ł", + "åĻ ¨", + "it ion", + "f ere", + "Ġde velop", + "æĮ ĩ", + "ir st", + "st em", + "c es", + "r ic", + "Ġm at", + "éĻ IJ", + "ou nt", + "ä¼ ł", + "au se", + "è¿Ľ è¡Į", + "ç ¦", + "ĠC h", + "è´ ¹", + "Ċ Ċ", + "æĬ ¥", + "çł Ķ", + "åı ĸ", + "è¯ ¥", + "ä¾ Ľ", + "æ ¼", + "res s", + "ç® Ĺ", + "ç¡ ®", + "Ġo ur", + "è¿ ij", + "æķ ´", + "没 æľī", + "å¹ ¿", + "çĽ ´", + "ä¸ ĵ", + "Ġj ust", + "o v", + "Ġd es", + "æ³ ¨", + "æĶ ¾", + "ç« Ļ", + "èī ²", + "ar k", + "å¤ į", + "ent s", + "éĽ Ĩ", + "a k", + "a il", + "Ġre c", + "Ġs he", + "en s", + "é© ¬", + "æ ¨", + "è¯ Ŀ", + "è¯ ·", + "éĹ® é¢ĺ", + "è ¥", + "Ġ K", + "æŶ éĹ´", + "l ic", + "re at", + "ä¾ ĭ", + ",å¹ ¶", + "\" ,", + "ä¹ ł", + "i ld", + "ä» Ĭ", + "ä¼ ĺ", + "é£ İ", + "çº §", + "è¥ ¿", + "ç ģ", + "å® ¹", + "å® ĥ", + ",å Ľ", + "æī§ è¡Į", + "è® ®", + ") .", + "Ġs er", + "åı Ĥ", + "å° ij", + "æī ĵ", + "æ £", + "Ġa g", + "ä¹ ī", + "å° Ķ", + "æĶ ¯", + "Ġint o", + "åı ·", + "Ġd if", + "éĿ ŀ", + "è ±", + "ç» Ħ", + "éĢ ł", + "Ġu se", + ": Ċ", + "åĬ ©", + "æº IJ", + "? Ċ", + "éĹ ¨", + "ou nd", + "æ ®", + "ä» ·", + "åħ Ī", + "ç» Ń", + "Ġp re", + "被 åijĬ", + "è¯ Ĩ", + "ĠH e", + "ç Ĭ", + "å¥ ³", + "æĢ »", + "æŀ Ħ", + "çİ ¯", + "èĬ Ĥ", + "is h", + "ar i", + "æŃ ¥", + "w n", + "è Į", + "å¸ ĥ", + "åĮ Ĺ", + "å§ ĭ", + "éĺ Ł", + "Ġlangu age", + "Ġw rit", + "éĢ Ł", + "p le", + "Ġof f", + "æĺ ĵ", + "åĩ Ĩ", + "Ġf irst", + "å ²", + "ä¸ ĸ", + "Ġo ver", + "ç² ¾", + "o ve", + "æľ ª", + "Ġm ost", + "æĬ ķ", + "Ġt r", + "èĢ ģ", + ", æĪij", + "ou gh", + "ç ħ", + "æł ¼", + "使 çĶ¨", + "t e", + "er m", + "ul ar", + "å¢ ŀ", + "or d", + "å· ŀ", + "第 ä¸Ģ", + "æ¸ ¸", + "çķ Į", + "ä½ ķ", + "æĢ ģ", + "ä» Ģ", + "çİ ĩ", + "Ġt wo", + "c k", + "å¤ ´", + "Ġn o", + "Ġsy stem", + "æ¨ ¡", + "Ġa cc", + "æ² »", + "设 计", + "le ct", + "Ġon ly", + ", ä¹Ł", + "é Ł", + "éĢļ è¿ĩ", + "æ¶ Ī", + "éľĢ è¦ģ", + "åĮ ħ", + "åı¯ èĥ½", + "åĽ ¢", + "è§ Ĩ", + "å ¦", + "åŁ İ", + "å§ Ķ", + "Ġne ed", + "n e", + "o ll", + "ic s", + "åĬ ŀ", + "æ ¢", + "Ġw ay", + "åĬ Ł", + "è½ ¬", + "Ġbec ause", + "ç³» 绣", + "å¹ ¶", + "ŀ į", + "é¦ ĸ", + "Ġbe t", + "æ° Ķ", + "s s", + "ç ĭ", + "å¸ Ī", + "Ġpro du", + "æķ° æį®", + "åĽ ¾", + "ag es", + "å°± æĺ¯", + "Ġ ro", + "å¥ ¹", + "æ¯ ı", + "ä»Ģ ä¹Ī", + "å Ķ", + "å¿ «", + "å± ±", + "Ġma ke", + "åĮ »", + "Ġcom m", + "è¶ ħ", + ", èĢĮ", + "åĨ į", + "æĶ ¹", + "éħ į", + "è¿Ļ 个", + "æĭ ī", + "åĢ Ļ", + "Ġb ook", + "å± Ģ", + "Ġc ould", + "ĠĠĠĠ ĠĠĠĠ", + "se l", + "产 åĵģ", + "äº Ķ", + "åı £", + "nd er", + "ĠTh is", + "çĪ ±", + "Ġe very", + "å¼ ķ", + "åŃ Ĺ", + "å½ ¢", + "Ġs pe", + "Ġw ant", + "åŃ ĺ", + "æĺ ¾", + "è§ ģ", + "è§ Ĥ", + "åį Ĺ", + "u ch", + "w e", + "åħ ļ", + "è¾ ĥ", + "ĠW e", + "Ġp o", + "he d", + "ri b", + "æĪ ĺ", + ",å į", + "ow n", + "åĪ Ļ", + "è ŀį", + "u res", + "åĪ Ŀ", + "et h", + "ĠU n", + "ç¥ ŀ", + "at es", + "å¼ ł", + "åŁ Ł", + "ä¹ IJ", + "ä¸ Ķ", + "Ġthe se", + "åĪ Ĺ", + "ä½ ı", + "ä¾ Ŀ", + "å¸Ĥ åľº", + "Ġb u", + "æĿ ĥ", + "i ew", + "åĨ µ", + "æŁ IJ", + "Ġad d", + "b r", + "æī į", + "æ· ±", + "éª Į", + "Ġd on", + "ç» Ī", + "åį ı", + "ä»ĸ 们", + "n ing", + "Ġm ay", + "çº ¦", + "o y", + "å¸ ¦", + "t ing", + "Ġa ct", + ",å ¯", + "è® º", + "æŁ ¥", + "ç§ °", + "Ġcon s", + "æĬĢ æľ¯", + "c ess", + "èĢ ĥ", + "èĤ ¡", + "l i", + "å® ¡", + "çī ĩ", + "ment s", + "w ard", + "w are", + "éļ ¾", + "av a", + "è§ ī", + "è¶ ³", + ", 以", + "u al", + "èģ Į", + "rou nd", + "æĻ º", + "Ġin ter", + "è¯ Ń", + "ch n", + "çħ §", + "g et", + "æŶ åĢĻ", + "Ġsu ch", + "åı Ī", + "åį ĩ", + "ĠR e", + "Ġwor ld", + "çĻ ¾", + "Ġm od", + "è¿ ŀ", + "ri end", + "an y", + "å®ŀ çİ°", + "è¶ Ĭ", + "åº ı", + "çĹ ħ", + "on s", + "at ing", + ", ä¸Ģ", + "å ĥ", + "åĪ ¤", + "ĠP ro", + "h at", + "Ġre p", + "i an", + "m ing", + "æ¸ ħ", + "c ed", + "o ol", + "o od", + "å¤ ĩ", + "Ġdo es", + "ä¿¡ æģ¯", + "èIJ ¥", + "æ£ Ģ", + "Ġt a", + "Ġu nder", + ", æľī", + "é ¥", + "éĩ ĩ", + "our ce", + "Ġwhe re", + "a w", + "p en", + "Ġd id", + "ad e", + "cl ud", + "f ter", + "k s", + "Ġin d", + "rou p", + "l es", + "Ġthe n", + "en er", + "åı į", + "çĸ Ĺ", + "on d", + "Ġpro v", + "ç© ¶", + "Ġa m", + "Ġc ode", + "Ġyear s", + "åĩ ½", + "æĬ Ĭ", + "if ic", + "æ± Ł", + "rou gh", + ",å° ±", + "æ¼ Ķ", + "æľ Ľ", + "åĩ ł", + "ce pt", + "p ect", + "äº ¬", + "is s", + "Ġman y", + "olog y", + "Ġdif fere", + "å¾ ·", + "ä» ½", + "æŀ ģ", + "ç© º", + "èİ ·", + "åIJ ¬", + "éĻ ħ", + "i ous", + "os s", + "Ġev en", + "èĮ ĥ", + "å¦Ĥ æŀľ", + "l p", + "æ ¹", + "Ġp ers", + "èĩ ´", + "Ġ very", + "he n", + "ro w", + "æĪ ¿", + "è´ £", + "Ġat t", + "æıIJ ä¾Ľ", + "çĬ ¶", + ") Ċ", + "çļĦ 人", + "at ive", + "è¯ ķ", + "ation al", + "at a", + "æĸ Ń", + "è´ Ń", + "Ġst art", + "å ¨", + "id ent", + "éĩį è¦ģ", + "x t", + "Ġre ad", + "å¾ ĭ", + "Ġus ed", + "Ġf riend", + "ç» ´", + ",å ¥", + "æĢ Ŀ", + "Ġhe lp", + "Ġse e", + "è± ¡", + "åĥ ı", + "m at", + "å¤ Ł", + "Ġth ink", + "pp ort", + "èĤ ²", + "p h", + "Ġm uch", + "åĩ½ æķ°", + "e c", + "è® ¸", + "Ġl ook", + "ç§ ¯", + "æŀ Ĺ", + "ĠB ut", + "âĢĻ t", + "f ore", + "Ħ ¿", + "Ġth rough", + "æĸĩ åĮĸ", + "æĥħ åĨµ", + "b s", + "Ġre l", + "第 äºĮ", + "n g", + " ·", + "æ ħ", + "ä¼ Ĺ", + "Ġe m", + "å·² ç»ı", + "社 ä¼ļ", + "Ġin c", + "Ġwe ll", + "æ» ¡", + "æ ij", + "äº ¿", + "åı ĭ", + "åİ Ĩ", + "ic k", + "ot her", + "åĨ Ļ", + "éĻ ©", + "çĥ Ń", + "i ble", + "Ġp h", + "ĠC om", + "ç½ ®", + ", æĪij们", + "çĶ ·", + "Ġre ally", + "èµ °", + "Ġpro ject", + "æĸ¹ æ³ķ", + "Ġo wn", + "éĻ ¤", + "æĻ ¯", + "项 缮", + "æ Ĵ", + "ç¦ »", + "t ain", + "re d", + "Ġs om", + "åħ ´", + "ig h", + "æİ Ĵ", + "æĪ ı", + "ĠA nd", + "åĵ į", + "å¢ ĥ", + "! Ċ", + ", æĺ¯", + "Ġc all", + "æł ¹", + "çŃ ĸ", + "Ġsh ould", + "æ® µ", + "e ll", + "Ġin v", + "åº Ĺ", + "åIJĮ æŶ", + "éĶ Ģ", + "åİŁ åijĬ", + "çļĦ ä¸Ģ", + "Ġs im", + "åħ ĭ", + "ä½ İ", + "å¼Ģ åıij", + "ç® Ģ", + ".. .", + "Ġin clud", + "ä¾ ¿", + "it ies", + "åİ ¿", + "Ġde c", + "on t", + "is e", + "o ld", + "re ad", + "åĬ ¿", + "Ġn ow", + "oc k", + "表 示", + "it t", + ",æ ¯", + "Ġas s", + "ç½ Ĺ", + "çłĶ 究", + "sel f", + ",å¯ ¹", + "å Ħ¿", + "it ed", + "Ġte chn", + "æĿ ij", + "ang e", + "i ent", + "è ij", + "äº Ĵ", + "Ġf u", + "çľ ¼", + "çĻ ½", + "f ul", + "t en", + "y th", + "¨ Ģ", + "è ¨Ģ", + "ar ch", + "ur n", + "å¿ ħ", + "c om", + "ut h", + "if e", + "Ġs ame", + "çİ ĭ", + "Ġdiffere nt", + "Ġt ra", + "ä¹ °", + "ire ct", + "Ġb ack", + "Ġf in", + ",å ¦Ĥ", + "åѦ ä¹ł", + "ä¸ĸ çķĮ", + "m s", + "Ġe ff", + "Ġre t", + "è§ Ĵ", + "il ity", + "æİ §", + "ist s", + "éŁ ³", + "ä¸į æĺ¯", + "æ± ½", + "ç» Ĩ", + "t s", + "Ġn um", + "ot h", + "åij ½", + "é¦ Ļ", + "g ht", + "Ġb l", + "Ġh im", + "ĠJ ava", + ",åĽ ł", + "åĪ Ĵ", + "æĿ İ", + "è´ Ł", + "å¼Ģ å§ĭ", + "Ġdis c", + "Ġpo int", + "Ġpers on", + "Ġc o", + "ä» ħ", + "çĶ ³", + "Ġg roup", + "çº ¢", + "u ally", + "å ¡", + "ç ı", + "è ħ", + "éĢ Ĥ", + "çĶŁ æ´»", + "\" çļĦ", + "å ĭ", + "Ġa c", + "Ġm em", + "é¢ Ħ", + "æį ¢", + "主 è¦ģ", + ", åħ¶", + "i ed", + "Ġp ub", + "as ed", + "Ġl ong", + "å¹³ åı°", + "ç´ ł", + ",å Ĩ", + "or k", + "ä» ĺ", + "Ġth ose", + "em ent", + "ç± ³", + "Ġ ent", + "Ġbe ing", + "ra ct", + "Ġgo od", + "yth on", + "t t", + "å Ĥ", + "æ IJ", + "ä¸Ń çļĦ", + "çİ° åľ¨", + "ĠN ew", + "ç ª", + "it s", + "Ġf ind", + "Ġv ari", + "v ed", + "Ġ ed", + "Ġre m", + "æĬ ¤", + "æµ ĭ", + "è¿ ľ", + "Ġe ar", + "åŃ £", + "éļ ı", + "in ess", + "Ġs ay", + "ere d", + ". è¿Ļ", + "Ġ ke", + "å¯ Į", + "大 çļĦ", + "åĽ½ 家", + "iv es", + "b ers", + "i b", + "ç Į", + "æĹ ı", + "åį Ĭ", + "å¾ Ģ", + "å¾Ī å¤ļ", + "Ġt est", + "Ġp res", + "å¤ ±", + "Ġe ach", + "æµ İ", + "æĪIJ 为", + "æľ¬ éĻ¢", + "d ay", + "åı Į", + "æĻº èĥ½", + "v en", + "é ¾", + "åį ³", + "åŃ ©", + "è¡ Ģ", + "åº ·", + "çī Į", + "好 çļĦ", + "è¿ĺ æĺ¯", + "ĠY ou", + "Ġpro ble", + "Ġst at", + "产 ä¸ļ", + "èĬ ±", + "建 设", + "ä¸ ¾", + "ç» ľ", + "Ġst r", + "ter n", + "åİ ĭ", + "æĸ¹ å¼ı", + "u es", + "å ©", + "ç Ĩ", + "çļĦ æĺ¯", + "Ġs et", + "Ġth ings", + ",å ¾", + "Ġr ight", + "Ġar t", + "管 çIJĨ", + "Ġa fter", + "人 çļĦ", + "åij ¨", + "Ġv al", + "+ +", + "e ver", + "æĭ ©", + "代 表", + "è£ ħ", + "i ence", + "Ġf un", + "Ġin st", + ",å ¼", + "Ġg ener", + "å¾ ®", + "e g", + "ç ¢", + "in ce", + "Ġex am", + "缸 åħ³", + "èIJ ½", + "å£ °", + "éĿŀ 常", + "l ed", + "y s", + "åĪ ĩ", + "æĸ Ļ", + "ç½ij ç«Ļ", + "h ip", + "å¤ ª", + "ut ion", + "ow s", + "大 家", + "çł ģ", + ".\" Ċ", + "at er", + "åħ ħ", + "æī ¿", + "åĪ° äºĨ", + "æĭ ħ", + "form ation", + "he s", + "äº ļ", + "Ġs ur", + "Ġo pen", + "åħ «", + "Ġp r", + "éĢ ģ", + "è½ »", + "æĸ¹ éĿ¢", + "ç¨ĭ åºı", + "in s", + "Ġd ist", + "åĽ ´", + "ĠA n", + "ä¸Ĭ çļĦ", + "us iness", + "ere st", + "Ġprogram ming", + "éĢī æĭ©", + "' re", + "c ial", + "Ġ $", + "Ġb est", + "ft ware", + "Ġst ud", + "è¿ĩ ç¨ĭ", + "æīĢ æľī", + "æīĢ 以", + "çī Ī", + "çĶ³ 请", + "l o", + "al s", + "å± Ĥ", + "èĥ½ å¤Ł", + "Ġdes ign", + ", ä½ł", + "i x", + "Ġf ound", + "Ġd own", + "åĽ Ń", + "æľī éĻIJ", + "ç¼ ĸ", + "客 æĪ·", + "i ly", + "åħ Ń", + "Ġre al", + "Ġpro cess", + "å¥ ĸ", + "ri pt", + "Ġex t", + "ie ld", + "å®Į æĪIJ", + "Ġc ount", + "åı ²", + ",å ½", + "æī ĺ", + "ä¸į åIJĮ", + "am es", + "è´ ¢", + "; Ċ", + "n er", + "Ġs m", + "Ġo b", + "ic t", + "Ġre g", + "åĢ Ł", + "åħ¶ ä»ĸ", + ", ä»ĸ", + ", éĤ£", + "- -", + "in es", + "åĿ ļ", + "æĪĸ èĢħ", + ", 为", + ". åľ¨", + "T h", + "v es", + "å± ŀ", + "è¦ģ æ±Ĥ", + "æŀ IJ", + "æĮ ī", + "éĵ ¶", + "ç§ij æĬĢ", + "we en", + "i vers", + "Ġa round", + "ä¸Ģ äºĽ", + "æī ¾", + "æ¬ ¢", + "Ġth ree", + "æĸ ½", + "ow er", + "Ġde f", + "ate g", + "Ġres ult", + "Ġimp ort", + "åĶ ®", + "å® ¤", + "æł ¡", + "ĠP l", + "ä½ľ 为", + "Ġex per", + "çŁ¥ éģĵ", + "æ ļ", + "ar s", + "å¿ µ", + "æĻ ®", + "Ġle ar", + "éĶ Ļ", + "å£ «", + "an k", + "Ġe as", + "éĺ ¿", + "éĥ½ æĺ¯", + "èĩªå·± çļĦ", + "mer ic", + "g g", + "è ı", + "Ġp ol", + "å· ´", + "ç« ¯", + "Ġex pl", + "èĭ ±", + ", 让", + "v ing", + "y p", + "ä¸ ĥ", + "äº ī", + "at h", + "è¾ ¹", + "Ġcont ro", + "t he", + "v ent", + "å¼ Ĥ", + "im es", + "éĻ į", + "éĵ ¾", + "é» Ħ", + "Ġm ight", + "Ġg reat" + ] + } +} \ No newline at end of file diff --git a/saves-starcoder2/tokenizer_config.json b/saves-starcoder2/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..10879969180439c31c9055e3301e336e2c5eec3d --- /dev/null +++ b/saves-starcoder2/tokenizer_config.json @@ -0,0 +1,37 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|endoftext|>", + "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "model_max_length": 4096, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +}